Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull misc filesystem and quota fixes from Jan Kara:
 "Some smaller udf, ext2, quota & reiserfs fixes"

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  ext2: Unmap metadata when zeroing blocks
  udf: don't bother with full-page write optimisations in adinicb case
  reiserfs: Unlock superblock before calling reiserfs_quota_on_mount()
  udf: Remove useless check in udf_adinicb_write_begin()
  quota: fill in Q_XGETQSTAT inode information for inactive quotas
  ext2: Check return value from ext2_get_group_desc()
diff --git a/.mailmap b/.mailmap
index 2a91c14..1dab0a1 100644
--- a/.mailmap
+++ b/.mailmap
@@ -69,6 +69,7 @@
 James Bottomley <jejb@titanic.il.steeleye.com>
 James E Wilson <wilson@specifix.com>
 James Ketrenos <jketreno@io.(none)>
+Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
 <javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
 Jean Tourrilhes <jt@hpl.hp.com>
 Jeff Garzik <jgarzik@pretzel.yyz.us>
@@ -88,6 +89,7 @@
 Kenneth W Chen <kenneth.w.chen@intel.com>
 Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
 Koushik <raghavendra.koushik@neterion.com>
+Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
 Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
 Leonid I Ananiev <leonid.i.ananiev@intel.com>
@@ -158,6 +160,8 @@
 Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.kumar2@arm.com>
+Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
+Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
 Takashi YOSHII <takashi.yoshii.zj@renesas.com>
 Yusuke Goda <goda.yusuke@renesas.com>
 Gustavo Padovan <gustavo@las.ic.unicamp.br>
diff --git a/CREDITS b/CREDITS
index 2a3fbcd..936f05a 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1944,6 +1944,11 @@
 E: kraxel@suse.de
 D: video4linux, bttv, vesafb, some scsi, misc fixes
 
+N: Hans J. Koch
+D: USERSPACE I/O, MAX6650
+D: Hans passed away in June 2016, and will be greatly missed.
+W: https://lwn.net/Articles/691000/
+
 N: Harald Koenig
 E: koenig@tat.physik.uni-tuebingen.de
 D: XFree86 (S3), DCF77, some kernel hacks and fixes
@@ -3518,6 +3523,10 @@
 S: Northborough, MA 01532
 S: USA
 
+N: Doug Thompson
+E: dougthompson@xmission.com
+D: EDAC
+
 N: Tommy Thorn
 E: Tommy.Thorn@irisa.fr
 W: http://www.irisa.fr/prive/thorn/index.html
@@ -3654,6 +3663,10 @@
 S: 97078 Wuerzburg
 S: Germany
 
+N: Jason Uhlenkott
+E: juhlenko@akamai.com
+D: I3000 EDAC driver
+
 N: Greg Ungerer
 E: gerg@snapgear.com
 D: uClinux kernel hacker
@@ -3691,7 +3704,7 @@
 N: Geert Uytterhoeven
 E: geert@linux-m68k.org
 W: http://users.telenet.be/geertu/
-P: 1024/862678A6 C51D 361C 0BD1 4C90 B275  C553 6EEA 11BA 8626 78A6
+P: 4096R/4804B4BC3F55EEFB 750D 82B0 A781 5431 5E25  925B 4804 B4BC 3F55 EEFB
 D: m68k/Amiga and PPC/CHRP Longtrail coordinator
 D: Frame buffer device and XF68_FBDev maintainer
 D: m68k IDE maintainer
diff --git a/Documentation/ABI/stable/sysfs-devices b/Documentation/ABI/stable/sysfs-devices
index 43f78b88d..df449d7 100644
--- a/Documentation/ABI/stable/sysfs-devices
+++ b/Documentation/ABI/stable/sysfs-devices
@@ -1,7 +1,7 @@
 # Note: This documents additional properties of any device beyond what
 # is documented in Documentation/sysfs-rules.txt
 
-What:		/sys/devices/*/of_path
+What:		/sys/devices/*/of_node
 Date:		February 2015
 Contact:	Device Tree mailing list <devicetree@vger.kernel.org>
 Description:
diff --git a/Documentation/ABI/testing/sysfs-class-led b/Documentation/ABI/testing/sysfs-class-led
index 3646ec8..86ace28 100644
--- a/Documentation/ABI/testing/sysfs-class-led
+++ b/Documentation/ABI/testing/sysfs-class-led
@@ -24,7 +24,8 @@
 		of led events.
 		You can change triggers in a similar manner to the way an IO
 		scheduler is chosen. Trigger specific parameters can appear in
-		/sys/class/leds/<led> once a given trigger is selected.
+		/sys/class/leds/<led> once a given trigger is selected. For
+		their documentation see sysfs-class-led-trigger-*.
 
 What:		/sys/class/leds/<led>/inverted
 Date:		January 2011
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-oneshot b/Documentation/ABI/testing/sysfs-class-led-trigger-oneshot
new file mode 100644
index 0000000..378a3a4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-oneshot
@@ -0,0 +1,36 @@
+What:		/sys/class/leds/<led>/delay_on
+Date:		Jun 2012
+KernelVersion:	3.6
+Contact:	linux-leds@vger.kernel.org
+Description:
+		Specifies for how many milliseconds the LED has to stay at
+		LED_FULL brightness after it has been armed.
+		Defaults to 100 ms.
+
+What:		/sys/class/leds/<led>/delay_off
+Date:		Jun 2012
+KernelVersion:	3.6
+Contact:	linux-leds@vger.kernel.org
+Description:
+		Specifies for how many milliseconds the LED has to stay at
+		LED_OFF brightness after it has been armed.
+		Defaults to 100 ms.
+
+What:		/sys/class/leds/<led>/invert
+Date:		Jun 2012
+KernelVersion:	3.6
+Contact:	linux-leds@vger.kernel.org
+Description:
+		Reverse the blink logic. If set to 0 (default) blink on for
+		delay_on ms, then blink off for delay_off ms, leaving the LED
+		normally off. If set to 1, blink off for delay_off ms, then
+		blink on for delay_on ms, leaving the LED normally on.
+		Setting this value also immediately changes the LED state.
+
+What:		/sys/class/leds/<led>/shot
+Date:		Jun 2012
+KernelVersion:	3.6
+Contact:	linux-leds@vger.kernel.org
+Description:
+		Write any non-empty string to signal an events, this starts a
+		blink sequence if not already running.
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-usbport b/Documentation/ABI/testing/sysfs-class-led-trigger-usbport
new file mode 100644
index 0000000..f440e69
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-usbport
@@ -0,0 +1,12 @@
+What:		/sys/class/leds/<led>/ports/<port>
+Date:		September 2016
+KernelVersion:	4.9
+Contact:	linux-leds@vger.kernel.org
+		linux-usb@vger.kernel.org
+Description:
+		Every dir entry represents a single USB port that can be
+		selected for the USB port trigger. Selecting ports makes trigger
+		observing them for any connected devices and lighting on LED if
+		there are any.
+		Echoing "1" value selects USB port. Echoing "0" unselects it.
+		Current state can be also read.
diff --git a/Documentation/ABI/testing/sysfs-class-mic.txt b/Documentation/ABI/testing/sysfs-class-mic.txt
index d45eed2..6ef6826 100644
--- a/Documentation/ABI/testing/sysfs-class-mic.txt
+++ b/Documentation/ABI/testing/sysfs-class-mic.txt
@@ -153,7 +153,7 @@
 
 What:		/sys/class/mic/mic(x)/heartbeat_enable
 Date:		March 2015
-KernelVersion:	3.20
+KernelVersion:	4.4
 Contact:	Ashutosh Dixit <ashutosh.dixit@intel.com>
 Description:
 		The MIC drivers detect and inform user space about card crashes
diff --git a/Documentation/ABI/testing/sysfs-i2c-bmp085 b/Documentation/ABI/testing/sysfs-i2c-bmp085
deleted file mode 100644
index 585962a..0000000
--- a/Documentation/ABI/testing/sysfs-i2c-bmp085
+++ /dev/null
@@ -1,31 +0,0 @@
-What:		/sys/bus/i2c/devices/<busnum>-<devaddr>/pressure0_input
-Date:		June 2010
-Contact:	Christoph Mair <christoph.mair@gmail.com>
-Description:	Start a pressure measurement and read the result. Values
-		represent the ambient air pressure in pascal (0.01 millibar).
-
-		Reading: returns the current air pressure.
-
-
-What:		/sys/bus/i2c/devices/<busnum>-<devaddr>/temp0_input
-Date:		June 2010
-Contact:	Christoph Mair <christoph.mair@gmail.com>
-Description:	Measure the ambient temperature. The returned value represents
-		the ambient temperature in units of 0.1 degree celsius.
-
-		Reading: returns the current temperature.
-
-
-What:		/sys/bus/i2c/devices/<busnum>-<devaddr>/oversampling
-Date:		June 2010
-Contact:	Christoph Mair <christoph.mair@gmail.com>
-Description:	Tell the bmp085 to use more samples to calculate a pressure
-		value. When writing to this file the chip will use 2^x samples
-		to calculate the next pressure value with x being the value
-		written. Using this feature will decrease RMS noise and
-		increase the measurement time.
-
-		Reading: returns the current oversampling setting.
-
-		Writing: sets a new oversampling setting.
-		Accepted values: 0..3.
diff --git a/Documentation/ABI/testing/sysfs-kernel-irq b/Documentation/ABI/testing/sysfs-kernel-irq
new file mode 100644
index 0000000..eb074b1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-irq
@@ -0,0 +1,53 @@
+What:		/sys/kernel/irq
+Date:		September 2016
+KernelVersion:	4.9
+Contact:	Craig Gallek <kraig@google.com>
+Description:	Directory containing information about the system's IRQs.
+		Specifically, data from the associated struct irq_desc.
+		The information here is similar to that in /proc/interrupts
+		but in a more machine-friendly format.  This directory contains
+		one subdirectory for each Linux IRQ number.
+
+What:		/sys/kernel/irq/<irq>/actions
+Date:		September 2016
+KernelVersion:	4.9
+Contact:	Craig Gallek <kraig@google.com>
+Description:	The IRQ action chain.  A comma-separated list of zero or more
+		device names associated with this interrupt.
+
+What:		/sys/kernel/irq/<irq>/chip_name
+Date:		September 2016
+KernelVersion:	4.9
+Contact:	Craig Gallek <kraig@google.com>
+Description:	Human-readable chip name supplied by the associated device
+		driver.
+
+What:		/sys/kernel/irq/<irq>/hwirq
+Date:		September 2016
+KernelVersion:	4.9
+Contact:	Craig Gallek <kraig@google.com>
+Description:	When interrupt translation domains are used, this file contains
+		the underlying hardware IRQ number used for this Linux IRQ.
+
+What:		/sys/kernel/irq/<irq>/name
+Date:		September 2016
+KernelVersion:	4.9
+Contact:	Craig Gallek <kraig@google.com>
+Description:	Human-readable flow handler name as defined by the irq chip
+		driver.
+
+What:		/sys/kernel/irq/<irq>/per_cpu_count
+Date:		September 2016
+KernelVersion:	4.9
+Contact:	Craig Gallek <kraig@google.com>
+Description:	The number of times the interrupt has fired since boot.  This
+		is a comma-separated list of counters; one per CPU in CPU id
+		order.  NOTE: This file consistently shows counters for all
+		CPU ids.  This differs from the behavior of /proc/interrupts
+		which only shows counters for online CPUs.
+
+What:		/sys/kernel/irq/<irq>/type
+Date:		September 2016
+KernelVersion:	4.9
+Contact:	Craig Gallek <kraig@google.com>
+Description:	The type of the interrupt.  Either the string 'level' or 'edge'.
diff --git a/Documentation/Changes b/Documentation/Changes
index ec97b77..22797a1 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -1,8 +1,13 @@
+.. _changes:
+
+Minimal requerements to compile the Kernel
+++++++++++++++++++++++++++++++++++++++++++
+
 Intro
 =====
 
 This document is designed to provide a list of the minimum levels of
-software necessary to run the 3.0 kernels.
+software necessary to run the 4.x kernels.
 
 This document is originally based on my "Changes" file for 2.0.x kernels
 and therefore owes credit to the same people as that file (Jared Mauch,
@@ -10,9 +15,9 @@
 'net).
 
 Current Minimal Requirements
-============================
+****************************
 
-Upgrade to at *least* these software revisions before thinking you've
+Upgrade to at **least** these software revisions before thinking you've
 encountered a bug!  If you're unsure what version you're currently
 running, the suggested command should tell you.
 
@@ -21,34 +26,40 @@
 systems; obviously, if you don't have any ISDN hardware, for example,
 you probably needn't concern yourself with isdn4k-utils.
 
-o  GNU C                  3.2                     # gcc --version
-o  GNU make               3.80                    # make --version
-o  binutils               2.12                    # ld -v
-o  util-linux             2.10o                   # fdformat --version
-o  module-init-tools      0.9.10                  # depmod -V
-o  e2fsprogs              1.41.4                  # e2fsck -V
-o  jfsutils               1.1.3                   # fsck.jfs -V
-o  reiserfsprogs          3.6.3                   # reiserfsck -V
-o  xfsprogs               2.6.0                   # xfs_db -V
-o  squashfs-tools         4.0                     # mksquashfs -version
-o  btrfs-progs            0.18                    # btrfsck
-o  pcmciautils            004                     # pccardctl -V
-o  quota-tools            3.09                    # quota -V
-o  PPP                    2.4.0                   # pppd --version
-o  isdn4k-utils           3.1pre1                 # isdnctrl 2>&1|grep version
-o  nfs-utils              1.0.5                   # showmount --version
-o  procps                 3.2.0                   # ps --version
-o  oprofile               0.9                     # oprofiled --version
-o  udev                   081                     # udevd --version
-o  grub                   0.93                    # grub --version || grub-install --version
-o  mcelog                 0.6                     # mcelog --version
-o  iptables               1.4.2                   # iptables -V
-o  openssl & libcrypto    1.0.0                   # openssl version
-o  bc                     1.06.95                 # bc --version
+====================== ===============  ========================================
+        Program        Minimal version       Command to check the version
+====================== ===============  ========================================
+GNU C                  3.2              gcc --version
+GNU make               3.80             make --version
+binutils               2.12             ld -v
+util-linux             2.10o            fdformat --version
+module-init-tools      0.9.10           depmod -V
+e2fsprogs              1.41.4           e2fsck -V
+jfsutils               1.1.3            fsck.jfs -V
+reiserfsprogs          3.6.3            reiserfsck -V
+xfsprogs               2.6.0            xfs_db -V
+squashfs-tools         4.0              mksquashfs -version
+btrfs-progs            0.18             btrfsck
+pcmciautils            004              pccardctl -V
+quota-tools            3.09             quota -V
+PPP                    2.4.0            pppd --version
+isdn4k-utils           3.1pre1          isdnctrl 2>&1|grep version
+nfs-utils              1.0.5            showmount --version
+procps                 3.2.0            ps --version
+oprofile               0.9              oprofiled --version
+udev                   081              udevd --version
+grub                   0.93             grub --version || grub-install --version
+mcelog                 0.6              mcelog --version
+iptables               1.4.2            iptables -V
+openssl & libcrypto    1.0.0            openssl version
+bc                     1.06.95          bc --version
+Sphinx\ [#f1]_	       1.2		sphinx-build --version
+====================== ===============  ========================================
 
+.. [#f1] Sphinx is needed only to build the Kernel documentation
 
 Kernel compilation
-==================
+******************
 
 GCC
 ---
@@ -64,16 +75,16 @@
 Binutils
 --------
 
-Linux on IA-32 has recently switched from using as86 to using gas for
-assembling the 16-bit boot code, removing the need for as86 to compile
+Linux on IA-32 has recently switched from using ``as86`` to using ``gas`` for
+assembling the 16-bit boot code, removing the need for ``as86`` to compile
 your kernel.  This change does, however, mean that you need a recent
 release of binutils.
 
 Perl
 ----
 
-You will need perl 5 and the following modules: Getopt::Long, Getopt::Std,
-File::Basename, and File::Find to build the kernel.
+You will need perl 5 and the following modules: ``Getopt::Long``,
+``Getopt::Std``, ``File::Basename``, and ``File::Find`` to build the kernel.
 
 BC
 --
@@ -93,7 +104,7 @@
 
 
 System utilities
-================
+****************
 
 Architectural changes
 ---------------------
@@ -115,7 +126,7 @@
 Util-linux
 ----------
 
-New versions of util-linux provide *fdisk support for larger disks,
+New versions of util-linux provide ``fdisk`` support for larger disks,
 support new options to mount, recognize more supported partition
 types, have a fdformat which works with 2.4 kernels, and similar goodies.
 You'll probably want to upgrade.
@@ -125,54 +136,57 @@
 
 If the unthinkable happens and your kernel oopses, you may need the
 ksymoops tool to decode it, but in most cases you don't.
-It is generally preferred to build the kernel with CONFIG_KALLSYMS so
+It is generally preferred to build the kernel with ``CONFIG_KALLSYMS`` so
 that it produces readable dumps that can be used as-is (this also
 produces better output than ksymoops).  If for some reason your kernel
-is not build with CONFIG_KALLSYMS and you have no way to rebuild and
+is not build with ``CONFIG_KALLSYMS`` and you have no way to rebuild and
 reproduce the Oops with that option, then you can still decode that Oops
 with ksymoops.
 
 Module-Init-Tools
 -----------------
 
-A new module loader is now in the kernel that requires module-init-tools
+A new module loader is now in the kernel that requires ``module-init-tools``
 to use.  It is backward compatible with the 2.4.x series kernels.
 
 Mkinitrd
 --------
 
-These changes to the /lib/modules file tree layout also require that
+These changes to the ``/lib/modules`` file tree layout also require that
 mkinitrd be upgraded.
 
 E2fsprogs
 ---------
 
-The latest version of e2fsprogs fixes several bugs in fsck and
+The latest version of ``e2fsprogs`` fixes several bugs in fsck and
 debugfs.  Obviously, it's a good idea to upgrade.
 
 JFSutils
 --------
 
-The jfsutils package contains the utilities for the file system.
+The ``jfsutils`` package contains the utilities for the file system.
 The following utilities are available:
-o fsck.jfs - initiate replay of the transaction log, and check
+
+- ``fsck.jfs`` - initiate replay of the transaction log, and check
   and repair a JFS formatted partition.
-o mkfs.jfs - create a JFS formatted partition.
-o other file system utilities are also available in this package.
+
+- ``mkfs.jfs`` - create a JFS formatted partition.
+
+- other file system utilities are also available in this package.
 
 Reiserfsprogs
 -------------
 
 The reiserfsprogs package should be used for reiserfs-3.6.x
 (Linux kernels 2.4.x). It is a combined package and contains working
-versions of mkreiserfs, resize_reiserfs, debugreiserfs and
-reiserfsck. These utils work on both i386 and alpha platforms.
+versions of ``mkreiserfs``, ``resize_reiserfs``, ``debugreiserfs`` and
+``reiserfsck``. These utils work on both i386 and alpha platforms.
 
 Xfsprogs
 --------
 
-The latest version of xfsprogs contains mkfs.xfs, xfs_db, and the
-xfs_repair utilities, among others, for the XFS filesystem.  It is
+The latest version of ``xfsprogs`` contains ``mkfs.xfs``, ``xfs_db``, and the
+``xfs_repair`` utilities, among others, for the XFS filesystem.  It is
 architecture independent and any version from 2.0.0 onward should
 work correctly with this version of the XFS kernel code (2.6.0 or
 later is recommended, due to some significant improvements).
@@ -180,7 +194,7 @@
 PCMCIAutils
 -----------
 
-PCMCIAutils replaces pcmcia-cs. It properly sets up
+PCMCIAutils replaces ``pcmcia-cs``. It properly sets up
 PCMCIA sockets at system startup and loads the appropriate modules
 for 16-bit PCMCIA devices if the kernel is modularized and the hotplug
 subsystem is used.
@@ -198,19 +212,20 @@
 
 A driver has been added to allow updating of Intel IA32 microcode,
 accessible as a normal (misc) character device.  If you are not using
-udev you may need to:
+udev you may need to::
 
-mkdir /dev/cpu
-mknod /dev/cpu/microcode c 10 184
-chmod 0644 /dev/cpu/microcode
+  mkdir /dev/cpu
+  mknod /dev/cpu/microcode c 10 184
+  chmod 0644 /dev/cpu/microcode
 
 as root before you can use this.  You'll probably also want to
 get the user-space microcode_ctl utility to use with this.
 
 udev
 ----
-udev is a userspace application for populating /dev dynamically with
-only entries for devices actually present.  udev replaces the basic
+
+``udev`` is a userspace application for populating ``/dev`` dynamically with
+only entries for devices actually present. ``udev`` replaces the basic
 functionality of devfs, while allowing persistent device naming for
 devices.
 
@@ -218,10 +233,10 @@
 ----
 
 Needs libfuse 2.4.0 or later.  Absolute minimum is 2.3.0 but mount
-options 'direct_io' and 'kernel_cache' won't work.
+options ``direct_io`` and ``kernel_cache`` won't work.
 
 Networking
-==========
+**********
 
 General changes
 ---------------
@@ -243,9 +258,9 @@
 upgrade pppd to at least 2.4.0.
 
 If you are not using udev, you must have the device file /dev/ppp
-which can be made by:
+which can be made by::
 
-mknod /dev/ppp c 108 0
+  mknod /dev/ppp c 108 0
 
 as root.
 
@@ -260,22 +275,22 @@
 
 In ancient (2.4 and earlier) kernels, the nfs server needed to know
 about any client that expected to be able to access files via NFS.  This
-information would be given to the kernel by "mountd" when the client
-mounted the filesystem, or by "exportfs" at system startup.  exportfs
-would take information about active clients from /var/lib/nfs/rmtab.
+information would be given to the kernel by ``mountd`` when the client
+mounted the filesystem, or by ``exportfs`` at system startup.  exportfs
+would take information about active clients from ``/var/lib/nfs/rmtab``.
 
 This approach is quite fragile as it depends on rmtab being correct
 which is not always easy, particularly when trying to implement
-fail-over.  Even when the system is working well, rmtab suffers from
+fail-over.  Even when the system is working well, ``rmtab`` suffers from
 getting lots of old entries that never get removed.
 
 With modern kernels we have the option of having the kernel tell mountd
 when it gets a request from an unknown host, and mountd can give
 appropriate export information to the kernel.  This removes the
-dependency on rmtab and means that the kernel only needs to know about
+dependency on ``rmtab`` and means that the kernel only needs to know about
 currently active clients.
 
-To enable this new functionality, you need to:
+To enable this new functionality, you need to::
 
   mount -t nfsd nfsd /proc/fs/nfsd
 
@@ -287,8 +302,32 @@
 ------
 
 On x86 kernels the mcelog utility is needed to process and log machine check
-events when CONFIG_X86_MCE is enabled. Machine check events are errors reported
-by the CPU. Processing them is strongly encouraged.
+events when ``CONFIG_X86_MCE`` is enabled. Machine check events are errors
+reported by the CPU. Processing them is strongly encouraged.
+
+Kernel documentation
+********************
+
+Sphinx
+------
+
+The ReST markups currently used by the Documentation/ files are meant to be
+built with ``Sphinx`` version 1.2 or upper. If you're desiring to build
+PDF outputs, it is recommended to use version 1.4.6.
+
+.. note::
+
+  Please notice that, for PDF and LaTeX output, you'll also need ``XeLaTeX``
+  version 3.14159265. Depending on the distribution, you may also need
+  to install a series of ``texlive`` packages that provide the minimal
+  set of functionalities required for ``XeLaTex`` to work.
+
+Other tools
+-----------
+
+In order to produce documentation from DocBook, you'll also need ``xmlto``.
+Please notice, however, that we're currently migrating all documents to use
+``Sphinx``.
 
 Getting updated software
 ========================
@@ -298,114 +337,149 @@
 
 gcc
 ---
-o  <ftp://ftp.gnu.org/gnu/gcc/>
+
+- <ftp://ftp.gnu.org/gnu/gcc/>
 
 Make
 ----
-o  <ftp://ftp.gnu.org/gnu/make/>
+
+- <ftp://ftp.gnu.org/gnu/make/>
 
 Binutils
 --------
-o  <ftp://ftp.kernel.org/pub/linux/devel/binutils/>
+
+- <ftp://ftp.kernel.org/pub/linux/devel/binutils/>
 
 OpenSSL
 -------
-o  <https://www.openssl.org/>
+
+- <https://www.openssl.org/>
 
 System utilities
 ****************
 
 Util-linux
 ----------
-o  <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>
+
+- <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>
 
 Ksymoops
 --------
-o  <ftp://ftp.kernel.org/pub/linux/utils/kernel/ksymoops/v2.4/>
+
+- <ftp://ftp.kernel.org/pub/linux/utils/kernel/ksymoops/v2.4/>
 
 Module-Init-Tools
 -----------------
-o  <ftp://ftp.kernel.org/pub/linux/kernel/people/rusty/modules/>
+
+- <ftp://ftp.kernel.org/pub/linux/kernel/people/rusty/modules/>
 
 Mkinitrd
 --------
-o  <https://code.launchpad.net/initrd-tools/main>
+
+- <https://code.launchpad.net/initrd-tools/main>
 
 E2fsprogs
 ---------
-o  <http://prdownloads.sourceforge.net/e2fsprogs/e2fsprogs-1.29.tar.gz>
+
+- <http://prdownloads.sourceforge.net/e2fsprogs/e2fsprogs-1.29.tar.gz>
 
 JFSutils
 --------
-o  <http://jfs.sourceforge.net/>
+
+- <http://jfs.sourceforge.net/>
 
 Reiserfsprogs
 -------------
-o  <http://www.kernel.org/pub/linux/utils/fs/reiserfs/>
+
+- <http://www.kernel.org/pub/linux/utils/fs/reiserfs/>
 
 Xfsprogs
 --------
-o  <ftp://oss.sgi.com/projects/xfs/>
+
+- <ftp://oss.sgi.com/projects/xfs/>
 
 Pcmciautils
 -----------
-o  <ftp://ftp.kernel.org/pub/linux/utils/kernel/pcmcia/>
+
+- <ftp://ftp.kernel.org/pub/linux/utils/kernel/pcmcia/>
 
 Quota-tools
-----------
-o  <http://sourceforge.net/projects/linuxquota/>
+-----------
+
+- <http://sourceforge.net/projects/linuxquota/>
 
 DocBook Stylesheets
 -------------------
-o  <http://sourceforge.net/projects/docbook/files/docbook-dsssl/>
+
+- <http://sourceforge.net/projects/docbook/files/docbook-dsssl/>
 
 XMLTO XSLT Frontend
 -------------------
-o  <http://cyberelk.net/tim/xmlto/>
+
+- <http://cyberelk.net/tim/xmlto/>
 
 Intel P6 microcode
 ------------------
-o  <https://downloadcenter.intel.com/>
+
+- <https://downloadcenter.intel.com/>
 
 udev
 ----
-o <http://www.freedesktop.org/software/systemd/man/udev.html>
+
+- <http://www.freedesktop.org/software/systemd/man/udev.html>
 
 FUSE
 ----
-o <http://sourceforge.net/projects/fuse>
+
+- <http://sourceforge.net/projects/fuse>
 
 mcelog
 ------
-o <http://www.mcelog.org/>
+
+- <http://www.mcelog.org/>
 
 Networking
 **********
 
 PPP
 ---
-o  <ftp://ftp.samba.org/pub/ppp/>
+
+- <ftp://ftp.samba.org/pub/ppp/>
 
 Isdn4k-utils
 ------------
-o  <ftp://ftp.isdn4linux.de/pub/isdn4linux/utils/>
+
+- <ftp://ftp.isdn4linux.de/pub/isdn4linux/utils/>
 
 NFS-utils
 ---------
-o  <http://sourceforge.net/project/showfiles.php?group_id=14>
+
+- <http://sourceforge.net/project/showfiles.php?group_id=14>
 
 Iptables
 --------
-o  <http://www.iptables.org/downloads.html>
+
+- <http://www.iptables.org/downloads.html>
 
 Ip-route2
 ---------
-o  <https://www.kernel.org/pub/linux/utils/net/iproute2/>
+
+- <https://www.kernel.org/pub/linux/utils/net/iproute2/>
 
 OProfile
 --------
-o  <http://oprofile.sf.net/download/>
+
+- <http://oprofile.sf.net/download/>
 
 NFS-Utils
 ---------
-o  <http://nfs.sourceforge.net/>
+
+- <http://nfs.sourceforge.net/>
+
+Kernel documentation
+********************
+
+Sphinx
+------
+
+- <http://www.sphinx-doc.org/>
diff --git a/Documentation/CodeOfConflict b/Documentation/CodeOfConflict
index 1684d0b..49a8ecc 100644
--- a/Documentation/CodeOfConflict
+++ b/Documentation/CodeOfConflict
@@ -19,7 +19,7 @@
 will work to resolve the issue to the best of their ability.  For more
 information on who is on the Technical Advisory Board and what their
 role is, please see:
-	http://www.linuxfoundation.org/programs/advisory-councils/tab
+	http://www.linuxfoundation.org/projects/linux/tab
 
 As a reviewer of code, please strive to keep things civil and focused on
 the technical issues involved.  We are all humans, and frustrations can
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index a096836..9c61c03 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -1,8 +1,10 @@
+.. _codingstyle:
 
-		Linux kernel coding style
+Linux kernel coding style
+=========================
 
 This is a short document describing the preferred coding style for the
-linux kernel.  Coding style is very personal, and I won't _force_ my
+linux kernel.  Coding style is very personal, and I won't **force** my
 views on anybody, but this is what goes for anything that I have to be
 able to maintain, and I'd prefer it for most other things too.  Please
 at least consider the points made here.
@@ -13,7 +15,8 @@
 Anyway, here goes:
 
 
-		Chapter 1: Indentation
+1) Indentation
+--------------
 
 Tabs are 8 characters, and thus indentations are also 8 characters.
 There are heretic movements that try to make indentations 4 (or even 2!)
@@ -36,8 +39,10 @@
 Heed that warning.
 
 The preferred way to ease multiple indentation levels in a switch statement is
-to align the "switch" and its subordinate "case" labels in the same column
-instead of "double-indenting" the "case" labels.  E.g.:
+to align the ``switch`` and its subordinate ``case`` labels in the same column
+instead of ``double-indenting`` the ``case`` labels.  E.g.:
+
+.. code-block:: c
 
 	switch (suffix) {
 	case 'G':
@@ -59,6 +64,8 @@
 Don't put multiple statements on a single line unless you have
 something to hide:
 
+.. code-block:: c
+
 	if (condition) do_this;
 	  do_something_everytime;
 
@@ -71,7 +78,8 @@
 Get a decent editor and don't leave whitespace at the end of lines.
 
 
-		Chapter 2: Breaking long lines and strings
+2) Breaking long lines and strings
+----------------------------------
 
 Coding style is all about readability and maintainability using commonly
 available tools.
@@ -87,7 +95,8 @@
 printk messages, because that breaks the ability to grep for them.
 
 
-		Chapter 3: Placing Braces and Spaces
+3) Placing Braces and Spaces
+----------------------------
 
 The other issue that always comes up in C styling is the placement of
 braces.  Unlike the indent size, there are few technical reasons to
@@ -95,6 +104,8 @@
 shown to us by the prophets Kernighan and Ritchie, is to put the opening
 brace last on the line, and put the closing brace first, thusly:
 
+.. code-block:: c
+
 	if (x is true) {
 		we do y
 	}
@@ -102,6 +113,8 @@
 This applies to all non-function statement blocks (if, switch, for,
 while, do).  E.g.:
 
+.. code-block:: c
+
 	switch (action) {
 	case KOBJ_ADD:
 		return "add";
@@ -116,6 +129,8 @@
 However, there is one special case, namely functions: they have the
 opening brace at the beginning of the next line, thus:
 
+.. code-block:: c
+
 	int function(int x)
 	{
 		body of function
@@ -123,20 +138,24 @@
 
 Heretic people all over the world have claimed that this inconsistency
 is ...  well ...  inconsistent, but all right-thinking people know that
-(a) K&R are _right_ and (b) K&R are right.  Besides, functions are
+(a) K&R are **right** and (b) K&R are right.  Besides, functions are
 special anyway (you can't nest them in C).
 
-Note that the closing brace is empty on a line of its own, _except_ in
+Note that the closing brace is empty on a line of its own, **except** in
 the cases where it is followed by a continuation of the same statement,
-ie a "while" in a do-statement or an "else" in an if-statement, like
+ie a ``while`` in a do-statement or an ``else`` in an if-statement, like
 this:
 
+.. code-block:: c
+
 	do {
 		body of do-loop
 	} while (condition);
 
 and
 
+.. code-block:: c
+
 	if (x == y) {
 		..
 	} else if (x > y) {
@@ -155,11 +174,15 @@
 
 Do not unnecessarily use braces where a single statement will do.
 
+.. code-block:: c
+
 	if (condition)
 		action();
 
 and
 
+.. code-block:: none
+
 	if (condition)
 		do_this();
 	else
@@ -168,6 +191,8 @@
 This does not apply if only one branch of a conditional statement is a single
 statement; in the latter case use braces in both branches:
 
+.. code-block:: c
+
 	if (condition) {
 		do_this();
 		do_that();
@@ -175,57 +200,67 @@
 		otherwise();
 	}
 
-		3.1:  Spaces
+3.1) Spaces
+***********
 
 Linux kernel style for use of spaces depends (mostly) on
 function-versus-keyword usage.  Use a space after (most) keywords.  The
 notable exceptions are sizeof, typeof, alignof, and __attribute__, which look
 somewhat like functions (and are usually used with parentheses in Linux,
-although they are not required in the language, as in: "sizeof info" after
-"struct fileinfo info;" is declared).
+although they are not required in the language, as in: ``sizeof info`` after
+``struct fileinfo info;`` is declared).
 
-So use a space after these keywords:
+So use a space after these keywords::
 
 	if, switch, case, for, do, while
 
 but not with sizeof, typeof, alignof, or __attribute__.  E.g.,
 
+.. code-block:: c
+
+
 	s = sizeof(struct file);
 
 Do not add spaces around (inside) parenthesized expressions.  This example is
-*bad*:
+**bad**:
+
+.. code-block:: c
+
 
 	s = sizeof( struct file );
 
 When declaring pointer data or a function that returns a pointer type, the
-preferred use of '*' is adjacent to the data name or function name and not
+preferred use of ``*`` is adjacent to the data name or function name and not
 adjacent to the type name.  Examples:
 
+.. code-block:: c
+
+
 	char *linux_banner;
 	unsigned long long memparse(char *ptr, char **retptr);
 	char *match_strdup(substring_t *s);
 
 Use one space around (on each side of) most binary and ternary operators,
-such as any of these:
+such as any of these::
 
 	=  +  -  <  >  *  /  %  |  &  ^  <=  >=  ==  !=  ?  :
 
-but no space after unary operators:
+but no space after unary operators::
 
 	&  *  +  -  ~  !  sizeof  typeof  alignof  __attribute__  defined
 
-no space before the postfix increment & decrement unary operators:
+no space before the postfix increment & decrement unary operators::
 
 	++  --
 
-no space after the prefix increment & decrement unary operators:
+no space after the prefix increment & decrement unary operators::
 
 	++  --
 
-and no space around the '.' and "->" structure member operators.
+and no space around the ``.`` and ``->`` structure member operators.
 
 Do not leave trailing whitespace at the ends of lines.  Some editors with
-"smart" indentation will insert whitespace at the beginning of new lines as
+``smart`` indentation will insert whitespace at the beginning of new lines as
 appropriate, so you can start typing the next line of code right away.
 However, some such editors do not remove the whitespace if you end up not
 putting a line of code there, such as if you leave a blank line.  As a result,
@@ -237,22 +272,23 @@
 context lines.
 
 
-		Chapter 4: Naming
+4) Naming
+---------
 
 C is a Spartan language, and so should your naming be.  Unlike Modula-2
 and Pascal programmers, C programmers do not use cute names like
 ThisVariableIsATemporaryCounter.  A C programmer would call that
-variable "tmp", which is much easier to write, and not the least more
+variable ``tmp``, which is much easier to write, and not the least more
 difficult to understand.
 
 HOWEVER, while mixed-case names are frowned upon, descriptive names for
-global variables are a must.  To call a global function "foo" is a
+global variables are a must.  To call a global function ``foo`` is a
 shooting offense.
 
-GLOBAL variables (to be used only if you _really_ need them) need to
+GLOBAL variables (to be used only if you **really** need them) need to
 have descriptive names, as do global functions.  If you have a function
 that counts the number of active users, you should call that
-"count_active_users()" or similar, you should _not_ call it "cntusr()".
+``count_active_users()`` or similar, you should **not** call it ``cntusr()``.
 
 Encoding the type of a function into the name (so-called Hungarian
 notation) is brain damaged - the compiler knows the types anyway and can
@@ -260,9 +296,9 @@
 makes buggy programs.
 
 LOCAL variable names should be short, and to the point.  If you have
-some random integer loop counter, it should probably be called "i".
-Calling it "loop_counter" is non-productive, if there is no chance of it
-being mis-understood.  Similarly, "tmp" can be just about any type of
+some random integer loop counter, it should probably be called ``i``.
+Calling it ``loop_counter`` is non-productive, if there is no chance of it
+being mis-understood.  Similarly, ``tmp`` can be just about any type of
 variable that is used to hold a temporary value.
 
 If you are afraid to mix up your local variable names, you have another
@@ -270,59 +306,69 @@
 See chapter 6 (Functions).
 
 
-		Chapter 5: Typedefs
+5) Typedefs
+-----------
 
-Please don't use things like "vps_t".
-It's a _mistake_ to use typedef for structures and pointers. When you see a
+Please don't use things like ``vps_t``.
+It's a **mistake** to use typedef for structures and pointers. When you see a
+
+.. code-block:: c
+
 
 	vps_t a;
 
 in the source, what does it mean?
 In contrast, if it says
 
+.. code-block:: c
+
 	struct virtual_container *a;
 
-you can actually tell what "a" is.
+you can actually tell what ``a`` is.
 
-Lots of people think that typedefs "help readability". Not so. They are
+Lots of people think that typedefs ``help readability``. Not so. They are
 useful only for:
 
- (a) totally opaque objects (where the typedef is actively used to _hide_
+ (a) totally opaque objects (where the typedef is actively used to **hide**
      what the object is).
 
-     Example: "pte_t" etc. opaque objects that you can only access using
+     Example: ``pte_t`` etc. opaque objects that you can only access using
      the proper accessor functions.
 
-     NOTE! Opaqueness and "accessor functions" are not good in themselves.
-     The reason we have them for things like pte_t etc. is that there
-     really is absolutely _zero_ portably accessible information there.
+     .. note::
 
- (b) Clear integer types, where the abstraction _helps_ avoid confusion
-     whether it is "int" or "long".
+       Opaqueness and ``accessor functions`` are not good in themselves.
+       The reason we have them for things like pte_t etc. is that there
+       really is absolutely **zero** portably accessible information there.
+
+ (b) Clear integer types, where the abstraction **helps** avoid confusion
+     whether it is ``int`` or ``long``.
 
      u8/u16/u32 are perfectly fine typedefs, although they fit into
      category (d) better than here.
 
-     NOTE! Again - there needs to be a _reason_ for this. If something is
-     "unsigned long", then there's no reason to do
+     .. note::
+
+       Again - there needs to be a **reason** for this. If something is
+       ``unsigned long``, then there's no reason to do
 
 	typedef unsigned long myflags_t;
 
      but if there is a clear reason for why it under certain circumstances
-     might be an "unsigned int" and under other configurations might be
-     "unsigned long", then by all means go ahead and use a typedef.
+     might be an ``unsigned int`` and under other configurations might be
+     ``unsigned long``, then by all means go ahead and use a typedef.
 
- (c) when you use sparse to literally create a _new_ type for
+ (c) when you use sparse to literally create a **new** type for
      type-checking.
 
  (d) New types which are identical to standard C99 types, in certain
      exceptional circumstances.
 
      Although it would only take a short amount of time for the eyes and
-     brain to become accustomed to the standard types like 'uint32_t',
+     brain to become accustomed to the standard types like ``uint32_t``,
      some people object to their use anyway.
 
-     Therefore, the Linux-specific 'u8/u16/u32/u64' types and their
+     Therefore, the Linux-specific ``u8/u16/u32/u64`` types and their
      signed equivalents which are identical to standard types are
      permitted -- although they are not mandatory in new code of your
      own.
@@ -333,7 +379,7 @@
  (e) Types safe for use in userspace.
 
      In certain structures which are visible to userspace, we cannot
-     require C99 types and cannot use the 'u32' form above. Thus, we
+     require C99 types and cannot use the ``u32`` form above. Thus, we
      use __u32 and similar types in all structures which are shared
      with userspace.
 
@@ -341,10 +387,11 @@
 EVER use a typedef unless you can clearly match one of those rules.
 
 In general, a pointer, or a struct that has elements that can reasonably
-be directly accessed should _never_ be a typedef.
+be directly accessed should **never** be a typedef.
 
 
-		Chapter 6: Functions
+6) Functions
+------------
 
 Functions should be short and sweet, and do just one thing.  They should
 fit on one or two screenfuls of text (the ISO/ANSI screen size is 80x24,
@@ -372,8 +419,10 @@
 to understand what you did 2 weeks from now.
 
 In source files, separate functions with one blank line.  If the function is
-exported, the EXPORT* macro for it should follow immediately after the closing
-function brace line.  E.g.:
+exported, the **EXPORT** macro for it should follow immediately after the
+closing function brace line.  E.g.:
+
+.. code-block:: c
 
 	int system_is_up(void)
 	{
@@ -386,7 +435,8 @@
 because it is a simple way to add valuable information for the reader.
 
 
-		Chapter 7: Centralized exiting of functions
+7) Centralized exiting of functions
+-----------------------------------
 
 Albeit deprecated by some people, the equivalent of the goto statement is
 used frequently by compilers in form of the unconditional jump instruction.
@@ -396,18 +446,21 @@
 cleanup needed then just return directly.
 
 Choose label names which say what the goto does or why the goto exists.  An
-example of a good name could be "out_buffer:" if the goto frees "buffer".  Avoid
-using GW-BASIC names like "err1:" and "err2:".  Also don't name them after the
-goto location like "err_kmalloc_failed:"
+example of a good name could be ``out_free_buffer:`` if the goto frees ``buffer``.
+Avoid using GW-BASIC names like ``err1:`` and ``err2:``, as you would have to
+renumber them if you ever add or remove exit paths, and they make correctness
+difficult to verify anyway.
 
 The rationale for using gotos is:
 
 - unconditional statements are easier to understand and follow
 - nesting is reduced
 - errors by not updating individual exit points when making
-    modifications are prevented
+  modifications are prevented
 - saves the compiler work to optimize redundant code away ;)
 
+.. code-block:: c
+
 	int fun(int a)
 	{
 		int result = 0;
@@ -425,27 +478,41 @@
 			goto out_buffer;
 		}
 		...
-	out_buffer:
+	out_free_buffer:
 		kfree(buffer);
 		return result;
 	}
 
-A common type of bug to be aware of is "one err bugs" which look like this:
+A common type of bug to be aware of is ``one err bugs`` which look like this:
+
+.. code-block:: c
 
 	err:
 		kfree(foo->bar);
 		kfree(foo);
 		return ret;
 
-The bug in this code is that on some exit paths "foo" is NULL.  Normally the
-fix for this is to split it up into two error labels "err_bar:" and "err_foo:".
+The bug in this code is that on some exit paths ``foo`` is NULL.  Normally the
+fix for this is to split it up into two error labels ``err_free_bar:`` and
+``err_free_foo:``:
+
+.. code-block:: c
+
+	 err_free_bar:
+		kfree(foo->bar);
+	 err_free_foo:
+		kfree(foo);
+		return ret;
+
+Ideally you should simulate errors to test all exit paths.
 
 
-		Chapter 8: Commenting
+8) Commenting
+-------------
 
 Comments are good, but there is also a danger of over-commenting.  NEVER
 try to explain HOW your code works in a comment: it's much better to
-write the code so that the _working_ is obvious, and it's a waste of
+write the code so that the **working** is obvious, and it's a waste of
 time to explain badly written code.
 
 Generally, you want your comments to tell WHAT your code does, not HOW.
@@ -461,11 +528,10 @@
 See the files Documentation/kernel-documentation.rst and scripts/kernel-doc
 for details.
 
-Linux style for comments is the C89 "/* ... */" style.
-Don't use C99-style "// ..." comments.
-
 The preferred style for long (multi-line) comments is:
 
+.. code-block:: c
+
 	/*
 	 * This is the preferred style for multi-line
 	 * comments in the Linux kernel source code.
@@ -478,6 +544,8 @@
 For files in net/ and drivers/net/ the preferred style for long (multi-line)
 comments is a little different.
 
+.. code-block:: c
+
 	/* The preferred comment style for files in net/ and drivers/net
 	 * looks like this.
 	 *
@@ -491,10 +559,11 @@
 item, explaining its use.
 
 
-		Chapter 9: You've made a mess of it
+9) You've made a mess of it
+---------------------------
 
 That's OK, we all do.  You've probably been told by your long-time Unix
-user helper that "GNU emacs" automatically formats the C sources for
+user helper that ``GNU emacs`` automatically formats the C sources for
 you, and you've noticed that yes, it does do that, but the defaults it
 uses are less than desirable (in fact, they are worse than random
 typing - an infinite number of monkeys typing into GNU emacs would never
@@ -503,63 +572,66 @@
 So, you can either get rid of GNU emacs, or change it to use saner
 values.  To do the latter, you can stick the following in your .emacs file:
 
-(defun c-lineup-arglist-tabs-only (ignored)
-  "Line up argument lists by tabs, not spaces"
-  (let* ((anchor (c-langelem-pos c-syntactic-element))
-         (column (c-langelem-2nd-pos c-syntactic-element))
-         (offset (- (1+ column) anchor))
-         (steps (floor offset c-basic-offset)))
-    (* (max steps 1)
-       c-basic-offset)))
+.. code-block:: none
 
-(add-hook 'c-mode-common-hook
-          (lambda ()
-            ;; Add kernel style
-            (c-add-style
-             "linux-tabs-only"
-             '("linux" (c-offsets-alist
-                        (arglist-cont-nonempty
-                         c-lineup-gcc-asm-reg
-                         c-lineup-arglist-tabs-only))))))
+  (defun c-lineup-arglist-tabs-only (ignored)
+    "Line up argument lists by tabs, not spaces"
+    (let* ((anchor (c-langelem-pos c-syntactic-element))
+           (column (c-langelem-2nd-pos c-syntactic-element))
+           (offset (- (1+ column) anchor))
+           (steps (floor offset c-basic-offset)))
+      (* (max steps 1)
+         c-basic-offset)))
 
-(add-hook 'c-mode-hook
-          (lambda ()
-            (let ((filename (buffer-file-name)))
-              ;; Enable kernel mode for the appropriate files
-              (when (and filename
-                         (string-match (expand-file-name "~/src/linux-trees")
-                                       filename))
-                (setq indent-tabs-mode t)
-                (setq show-trailing-whitespace t)
-                (c-set-style "linux-tabs-only")))))
+  (add-hook 'c-mode-common-hook
+            (lambda ()
+              ;; Add kernel style
+              (c-add-style
+               "linux-tabs-only"
+               '("linux" (c-offsets-alist
+                          (arglist-cont-nonempty
+                           c-lineup-gcc-asm-reg
+                           c-lineup-arglist-tabs-only))))))
+
+  (add-hook 'c-mode-hook
+            (lambda ()
+              (let ((filename (buffer-file-name)))
+                ;; Enable kernel mode for the appropriate files
+                (when (and filename
+                           (string-match (expand-file-name "~/src/linux-trees")
+                                         filename))
+                  (setq indent-tabs-mode t)
+                  (setq show-trailing-whitespace t)
+                  (c-set-style "linux-tabs-only")))))
 
 This will make emacs go better with the kernel coding style for C
-files below ~/src/linux-trees.
+files below ``~/src/linux-trees``.
 
 But even if you fail in getting emacs to do sane formatting, not
-everything is lost: use "indent".
+everything is lost: use ``indent``.
 
 Now, again, GNU indent has the same brain-dead settings that GNU emacs
 has, which is why you need to give it a few command line options.
 However, that's not too bad, because even the makers of GNU indent
 recognize the authority of K&R (the GNU people aren't evil, they are
 just severely misguided in this matter), so you just give indent the
-options "-kr -i8" (stands for "K&R, 8 character indents"), or use
-"scripts/Lindent", which indents in the latest style.
+options ``-kr -i8`` (stands for ``K&R, 8 character indents``), or use
+``scripts/Lindent``, which indents in the latest style.
 
-"indent" has a lot of options, and especially when it comes to comment
+``indent`` has a lot of options, and especially when it comes to comment
 re-formatting you may want to take a look at the man page.  But
-remember: "indent" is not a fix for bad programming.
+remember: ``indent`` is not a fix for bad programming.
 
 
-		Chapter 10: Kconfig configuration files
+10) Kconfig configuration files
+-------------------------------
 
 For all of the Kconfig* configuration files throughout the source tree,
-the indentation is somewhat different.  Lines under a "config" definition
+the indentation is somewhat different.  Lines under a ``config`` definition
 are indented with one tab, while help text is indented an additional two
-spaces.  Example:
+spaces.  Example::
 
-config AUDIT
+  config AUDIT
 	bool "Auditing support"
 	depends on NET
 	help
@@ -569,9 +641,9 @@
 	  auditing without CONFIG_AUDITSYSCALL.
 
 Seriously dangerous features (such as write support for certain
-filesystems) should advertise this prominently in their prompt string:
+filesystems) should advertise this prominently in their prompt string::
 
-config ADFS_FS_RW
+  config ADFS_FS_RW
 	bool "ADFS write support (DANGEROUS)"
 	depends on ADFS_FS
 	...
@@ -580,41 +652,45 @@
 Documentation/kbuild/kconfig-language.txt.
 
 
-		Chapter 11: Data structures
+11) Data structures
+-------------------
 
 Data structures that have visibility outside the single-threaded
 environment they are created and destroyed in should always have
 reference counts.  In the kernel, garbage collection doesn't exist (and
 outside the kernel garbage collection is slow and inefficient), which
-means that you absolutely _have_ to reference count all your uses.
+means that you absolutely **have** to reference count all your uses.
 
 Reference counting means that you can avoid locking, and allows multiple
 users to have access to the data structure in parallel - and not having
 to worry about the structure suddenly going away from under them just
 because they slept or did something else for a while.
 
-Note that locking is _not_ a replacement for reference counting.
+Note that locking is **not** a replacement for reference counting.
 Locking is used to keep data structures coherent, while reference
 counting is a memory management technique.  Usually both are needed, and
 they are not to be confused with each other.
 
 Many data structures can indeed have two levels of reference counting,
-when there are users of different "classes".  The subclass count counts
+when there are users of different ``classes``.  The subclass count counts
 the number of subclass users, and decrements the global count just once
 when the subclass count goes to zero.
 
-Examples of this kind of "multi-level-reference-counting" can be found in
-memory management ("struct mm_struct": mm_users and mm_count), and in
-filesystem code ("struct super_block": s_count and s_active).
+Examples of this kind of ``multi-level-reference-counting`` can be found in
+memory management (``struct mm_struct``: mm_users and mm_count), and in
+filesystem code (``struct super_block``: s_count and s_active).
 
 Remember: if another thread can find your data structure, and you don't
 have a reference count on it, you almost certainly have a bug.
 
 
-		Chapter 12: Macros, Enums and RTL
+12) Macros, Enums and RTL
+-------------------------
 
 Names of macros defining constants and labels in enums are capitalized.
 
+.. code-block:: c
+
 	#define CONSTANT 0x12345
 
 Enums are preferred when defining several related constants.
@@ -626,7 +702,9 @@
 
 Macros with multiple statements should be enclosed in a do - while block:
 
-	#define macrofun(a, b, c) 			\
+.. code-block:: c
+
+	#define macrofun(a, b, c)			\
 		do {					\
 			if (a == 5)			\
 				do_this(b, c);		\
@@ -636,17 +714,21 @@
 
 1) macros that affect control flow:
 
+.. code-block:: c
+
 	#define FOO(x)					\
 		do {					\
 			if (blah(x) < 0)		\
 				return -EBUGGERED;	\
 		} while (0)
 
-is a _very_ bad idea.  It looks like a function call but exits the "calling"
+is a **very** bad idea.  It looks like a function call but exits the ``calling``
 function; don't break the internal parsers of those who will read the code.
 
 2) macros that depend on having a local variable with a magic name:
 
+.. code-block:: c
+
 	#define FOO(val) bar(index, val)
 
 might look like a good thing, but it's confusing as hell when one reads the
@@ -659,18 +741,22 @@
 must enclose the expression in parentheses. Beware of similar issues with
 macros using parameters.
 
+.. code-block:: c
+
 	#define CONSTANT 0x4000
 	#define CONSTEXP (CONSTANT | 3)
 
 5) namespace collisions when defining local variables in macros resembling
 functions:
 
-#define FOO(x)				\
-({					\
-	typeof(x) ret;			\
-	ret = calc_ret(x);		\
-	(ret);				\
-})
+.. code-block:: c
+
+	#define FOO(x)				\
+	({					\
+		typeof(x) ret;			\
+		ret = calc_ret(x);		\
+		(ret);				\
+	})
 
 ret is a common name for a local variable - __foo_ret is less likely
 to collide with an existing variable.
@@ -679,11 +765,12 @@
 covers RTL which is used frequently with assembly language in the kernel.
 
 
-		Chapter 13: Printing kernel messages
+13) Printing kernel messages
+----------------------------
 
 Kernel developers like to be seen as literate. Do mind the spelling
 of kernel messages to make a good impression. Do not use crippled
-words like "dont"; use "do not" or "don't" instead.  Make the messages
+words like ``dont``; use ``do not`` or ``don't`` instead.  Make the messages
 concise, clear, and unambiguous.
 
 Kernel messages do not have to be terminated with a period.
@@ -713,7 +800,8 @@
 used.
 
 
-		Chapter 14: Allocating memory
+14) Allocating memory
+---------------------
 
 The kernel provides the following general purpose memory allocators:
 kmalloc(), kzalloc(), kmalloc_array(), kcalloc(), vmalloc(), and
@@ -722,6 +810,8 @@
 
 The preferred form for passing a size of a struct is the following:
 
+.. code-block:: c
+
 	p = kmalloc(sizeof(*p), ...);
 
 The alternative form where struct name is spelled out hurts readability and
@@ -734,20 +824,25 @@
 
 The preferred form for allocating an array is the following:
 
+.. code-block:: c
+
 	p = kmalloc_array(n, sizeof(...), ...);
 
 The preferred form for allocating a zeroed array is the following:
 
+.. code-block:: c
+
 	p = kcalloc(n, sizeof(...), ...);
 
 Both forms check for overflow on the allocation size n * sizeof(...),
 and return NULL if that occurred.
 
 
-		Chapter 15: The inline disease
+15) The inline disease
+----------------------
 
 There appears to be a common misperception that gcc has a magic "make me
-faster" speedup option called "inline". While the use of inlines can be
+faster" speedup option called ``inline``. While the use of inlines can be
 appropriate (for example as a means of replacing macros, see Chapter 12), it
 very often is not. Abundant use of the inline keyword leads to a much bigger
 kernel, which in turn slows the system as a whole down, due to a bigger
@@ -771,26 +866,27 @@
 something it would have done anyway.
 
 
-		Chapter 16: Function return values and names
+16) Function return values and names
+------------------------------------
 
 Functions can return values of many different kinds, and one of the
 most common is a value indicating whether the function succeeded or
 failed.  Such a value can be represented as an error-code integer
-(-Exxx = failure, 0 = success) or a "succeeded" boolean (0 = failure,
+(-Exxx = failure, 0 = success) or a ``succeeded`` boolean (0 = failure,
 non-zero = success).
 
 Mixing up these two sorts of representations is a fertile source of
 difficult-to-find bugs.  If the C language included a strong distinction
 between integers and booleans then the compiler would find these mistakes
 for us... but it doesn't.  To help prevent such bugs, always follow this
-convention:
+convention::
 
 	If the name of a function is an action or an imperative command,
 	the function should return an error-code integer.  If the name
 	is a predicate, the function should return a "succeeded" boolean.
 
-For example, "add work" is a command, and the add_work() function returns 0
-for success or -EBUSY for failure.  In the same way, "PCI device present" is
+For example, ``add work`` is a command, and the add_work() function returns 0
+for success or -EBUSY for failure.  In the same way, ``PCI device present`` is
 a predicate, and the pci_dev_present() function returns 1 if it succeeds in
 finding a matching device or 0 if it doesn't.
 
@@ -805,17 +901,22 @@
 NULL or the ERR_PTR mechanism to report failure.
 
 
-		Chapter 17:  Don't re-invent the kernel macros
+17) Don't re-invent the kernel macros
+-------------------------------------
 
 The header file include/linux/kernel.h contains a number of macros that
 you should use, rather than explicitly coding some variant of them yourself.
 For example, if you need to calculate the length of an array, take advantage
 of the macro
 
+.. code-block:: c
+
 	#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 
 Similarly, if you need to calculate the size of some structure member, use
 
+.. code-block:: c
+
 	#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
 
 There are also min() and max() macros that do strict type checking if you
@@ -823,16 +924,21 @@
 defined that you shouldn't reproduce in your code.
 
 
-		Chapter 18:  Editor modelines and other cruft
+18) Editor modelines and other cruft
+------------------------------------
 
 Some editors can interpret configuration information embedded in source files,
 indicated with special markers.  For example, emacs interprets lines marked
 like this:
 
+.. code-block:: c
+
 	-*- mode: c -*-
 
 Or like this:
 
+.. code-block:: c
+
 	/*
 	Local Variables:
 	compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
@@ -841,6 +947,8 @@
 
 Vim interprets markers that look like this:
 
+.. code-block:: c
+
 	/* vim:set sw=8 noet */
 
 Do not include any of these in source files.  People have their own personal
@@ -850,7 +958,8 @@
 work correctly.
 
 
-		Chapter 19:  Inline assembly
+19) Inline assembly
+-------------------
 
 In architecture-specific code, you may need to use inline assembly to interface
 with CPU or platform functionality.  Don't hesitate to do so when necessary.
@@ -863,7 +972,7 @@
 
 Large, non-trivial assembly functions should go in .S files, with corresponding
 C prototypes defined in C header files.  The C prototypes for assembly
-functions should use "asmlinkage".
+functions should use ``asmlinkage``.
 
 You may need to mark your asm statement as volatile, to prevent GCC from
 removing it if GCC doesn't notice any side effects.  You don't always need to
@@ -874,12 +983,15 @@
 string, and end each string except the last with \n\t to properly indent the
 next instruction in the assembly output:
 
+.. code-block:: c
+
 	asm ("magic %reg1, #42\n\t"
 	     "more_magic %reg2, %reg3"
 	     : /* outputs */ : /* inputs */ : /* clobbers */);
 
 
-		Chapter 20: Conditional Compilation
+20) Conditional Compilation
+---------------------------
 
 Wherever possible, don't use preprocessor conditionals (#if, #ifdef) in .c
 files; doing so makes code harder to read and logic harder to follow.  Instead,
@@ -903,6 +1015,8 @@
 Within code, where possible, use the IS_ENABLED macro to convert a Kconfig
 symbol into a C boolean expression, and use it in a normal C conditional:
 
+.. code-block:: c
+
 	if (IS_ENABLED(CONFIG_SOMETHING)) {
 		...
 	}
@@ -918,12 +1032,15 @@
 place a comment after the #endif on the same line, noting the conditional
 expression used.  For instance:
 
+.. code-block:: c
+
 	#ifdef CONFIG_SOMETHING
 	...
 	#endif /* CONFIG_SOMETHING */
 
 
-		Appendix I: References
+Appendix I) References
+----------------------
 
 The C Programming Language, Second Edition
 by Brian W. Kernighan and Dennis M. Ritchie.
@@ -943,4 +1060,3 @@
 
 Kernel CodingStyle, by greg@kroah.com at OLS 2002:
 http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
-
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index 781024e..979228b 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -699,7 +699,7 @@
 		dma_addr_t mapping;
 
 		mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE);
-		if (dma_mapping_error(cp->dev, dma_handle)) {
+		if (dma_mapping_error(cp->dev, mapping)) {
 			/*
 			 * reduce current DMA mapping usage,
 			 * delay and try again later or
@@ -931,10 +931,8 @@
 
 1) Struct scatterlist requirements.
 
-   Don't invent the architecture specific struct scatterlist; just use
-   <asm-generic/scatterlist.h>. You need to enable
-   CONFIG_NEED_SG_DMA_LENGTH if the architecture supports IOMMUs
-   (including software IOMMU).
+   You need to enable CONFIG_NEED_SG_DMA_LENGTH if the architecture
+   supports IOMMUs (including software IOMMU).
 
 2) ARCH_DMA_MINALIGN
 
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
index 64460a8..736f591 100644
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@@ -6,7 +6,7 @@
 # To add a new book the only step required is to add the book to the
 # list of DOCBOOKS.
 
-DOCBOOKS := z8530book.xml device-drivers.xml \
+DOCBOOKS := z8530book.xml  \
 	    kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
 	    writing_usb_driver.xml networking.xml \
 	    kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
@@ -22,8 +22,14 @@
 # Skip DocBook build if the user explicitly requested no DOCBOOKS.
 .DEFAULT:
 	@echo "  SKIP    DocBook $@ target (DOCBOOKS=\"\" specified)."
-
 else
+ifneq ($(SPHINXDIRS),)
+
+# Skip DocBook build if the user explicitly requested a sphinx dir
+.DEFAULT:
+	@echo "  SKIP    DocBook $@ target (SPHINXDIRS specified)."
+else
+
 
 ###
 # The build process is as follows (targets):
@@ -66,6 +72,7 @@
 
 # no-op for the DocBook toolchain
 epubdocs:
+latexdocs:
 
 ###
 #External programs used
@@ -221,6 +228,7 @@
 	   echo "</programlisting>")  > $@
 
 endif # DOCBOOKS=""
+endif # SPHINDIR=...
 
 ###
 # Help targets as used by the top-level makefile
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
deleted file mode 100644
index 9c10030e..0000000
--- a/Documentation/DocBook/device-drivers.tmpl
+++ /dev/null
@@ -1,521 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
-	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
-
-<book id="LinuxDriversAPI">
- <bookinfo>
-  <title>Linux Device Drivers</title>
-
-  <legalnotice>
-   <para>
-     This documentation is free software; you can redistribute
-     it and/or modify it under the terms of the GNU General Public
-     License as published by the Free Software Foundation; either
-     version 2 of the License, or (at your option) any later
-     version.
-   </para>
-
-   <para>
-     This program is distributed in the hope that it will be
-     useful, but WITHOUT ANY WARRANTY; without even the implied
-     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-     See the GNU General Public License for more details.
-   </para>
-
-   <para>
-     You should have received a copy of the GNU General Public
-     License along with this program; if not, write to the Free
-     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
-     MA 02111-1307 USA
-   </para>
-
-   <para>
-     For more details see the file COPYING in the source
-     distribution of Linux.
-   </para>
-  </legalnotice>
- </bookinfo>
-
-<toc></toc>
-
-  <chapter id="Basics">
-     <title>Driver Basics</title>
-     <sect1><title>Driver Entry and Exit points</title>
-!Iinclude/linux/init.h
-     </sect1>
-
-     <sect1><title>Atomic and pointer manipulation</title>
-!Iarch/x86/include/asm/atomic.h
-     </sect1>
-
-     <sect1><title>Delaying, scheduling, and timer routines</title>
-!Iinclude/linux/sched.h
-!Ekernel/sched/core.c
-!Ikernel/sched/cpupri.c
-!Ikernel/sched/fair.c
-!Iinclude/linux/completion.h
-!Ekernel/time/timer.c
-     </sect1>
-     <sect1><title>Wait queues and Wake events</title>
-!Iinclude/linux/wait.h
-!Ekernel/sched/wait.c
-     </sect1>
-     <sect1><title>High-resolution timers</title>
-!Iinclude/linux/ktime.h
-!Iinclude/linux/hrtimer.h
-!Ekernel/time/hrtimer.c
-     </sect1>
-     <sect1><title>Workqueues and Kevents</title>
-!Iinclude/linux/workqueue.h
-!Ekernel/workqueue.c
-     </sect1>
-     <sect1><title>Internal Functions</title>
-!Ikernel/exit.c
-!Ikernel/signal.c
-!Iinclude/linux/kthread.h
-!Ekernel/kthread.c
-     </sect1>
-
-     <sect1><title>Kernel objects manipulation</title>
-<!--
-X!Iinclude/linux/kobject.h
--->
-!Elib/kobject.c
-     </sect1>
-
-     <sect1><title>Kernel utility functions</title>
-!Iinclude/linux/kernel.h
-!Ekernel/printk/printk.c
-!Ekernel/panic.c
-!Ekernel/sys.c
-!Ekernel/rcu/srcu.c
-!Ekernel/rcu/tree.c
-!Ekernel/rcu/tree_plugin.h
-!Ekernel/rcu/update.c
-     </sect1>
-
-     <sect1><title>Device Resource Management</title>
-!Edrivers/base/devres.c
-     </sect1>
-
-  </chapter>
-
-  <chapter id="devdrivers">
-     <title>Device drivers infrastructure</title>
-     <sect1><title>The Basic Device Driver-Model Structures </title>
-!Iinclude/linux/device.h
-     </sect1>
-     <sect1><title>Device Drivers Base</title>
-!Idrivers/base/init.c
-!Edrivers/base/driver.c
-!Edrivers/base/core.c
-!Edrivers/base/syscore.c
-!Edrivers/base/class.c
-!Idrivers/base/node.c
-!Edrivers/base/firmware_class.c
-!Edrivers/base/transport_class.c
-<!-- Cannot be included, because
-     attribute_container_add_class_device_adapter
- and attribute_container_classdev_to_container
-     exceed allowed 44 characters maximum
-X!Edrivers/base/attribute_container.c
--->
-!Edrivers/base/dd.c
-<!--
-X!Edrivers/base/interface.c
--->
-!Iinclude/linux/platform_device.h
-!Edrivers/base/platform.c
-!Edrivers/base/bus.c
-     </sect1>
-     <sect1>
-       <title>Buffer Sharing and Synchronization</title>
-       <para>
-         The dma-buf subsystem provides the framework for sharing buffers
-         for hardware (DMA) access across multiple device drivers and
-         subsystems, and for synchronizing asynchronous hardware access.
-       </para>
-       <para>
-         This is used, for example, by drm "prime" multi-GPU support, but
-         is of course not limited to GPU use cases.
-       </para>
-       <para>
-         The three main components of this are: (1) dma-buf, representing
-         a sg_table and exposed to userspace as a file descriptor to allow
-         passing between devices, (2) fence, which provides a mechanism
-         to signal when one device as finished access, and (3) reservation,
-         which manages the shared or exclusive fence(s) associated with
-         the buffer.
-       </para>
-       <sect2><title>dma-buf</title>
-!Edrivers/dma-buf/dma-buf.c
-!Iinclude/linux/dma-buf.h
-       </sect2>
-       <sect2><title>reservation</title>
-!Pdrivers/dma-buf/reservation.c Reservation Object Overview
-!Edrivers/dma-buf/reservation.c
-!Iinclude/linux/reservation.h
-       </sect2>
-       <sect2><title>fence</title>
-!Edrivers/dma-buf/fence.c
-!Iinclude/linux/fence.h
-!Edrivers/dma-buf/seqno-fence.c
-!Iinclude/linux/seqno-fence.h
-!Edrivers/dma-buf/fence-array.c
-!Iinclude/linux/fence-array.h
-!Edrivers/dma-buf/reservation.c
-!Iinclude/linux/reservation.h
-!Edrivers/dma-buf/sync_file.c
-!Iinclude/linux/sync_file.h
-       </sect2>
-     </sect1>
-     <sect1><title>Device Drivers DMA Management</title>
-!Edrivers/base/dma-coherent.c
-!Edrivers/base/dma-mapping.c
-     </sect1>
-     <sect1><title>Device Drivers Power Management</title>
-!Edrivers/base/power/main.c
-     </sect1>
-     <sect1><title>Device Drivers ACPI Support</title>
-<!-- Internal functions only
-X!Edrivers/acpi/sleep/main.c
-X!Edrivers/acpi/sleep/wakeup.c
-X!Edrivers/acpi/motherboard.c
-X!Edrivers/acpi/bus.c
--->
-!Edrivers/acpi/scan.c
-!Idrivers/acpi/scan.c
-<!-- No correct structured comments
-X!Edrivers/acpi/pci_bind.c
--->
-     </sect1>
-     <sect1><title>Device drivers PnP support</title>
-!Idrivers/pnp/core.c
-<!-- No correct structured comments
-X!Edrivers/pnp/system.c
- -->
-!Edrivers/pnp/card.c
-!Idrivers/pnp/driver.c
-!Edrivers/pnp/manager.c
-!Edrivers/pnp/support.c
-     </sect1>
-     <sect1><title>Userspace IO devices</title>
-!Edrivers/uio/uio.c
-!Iinclude/linux/uio_driver.h
-     </sect1>
-  </chapter>
-
-  <chapter id="parportdev">
-     <title>Parallel Port Devices</title>
-!Iinclude/linux/parport.h
-!Edrivers/parport/ieee1284.c
-!Edrivers/parport/share.c
-!Idrivers/parport/daisy.c
-  </chapter>
-
-  <chapter id="message_devices">
-	<title>Message-based devices</title>
-     <sect1><title>Fusion message devices</title>
-!Edrivers/message/fusion/mptbase.c
-!Idrivers/message/fusion/mptbase.c
-!Edrivers/message/fusion/mptscsih.c
-!Idrivers/message/fusion/mptscsih.c
-!Idrivers/message/fusion/mptctl.c
-!Idrivers/message/fusion/mptspi.c
-!Idrivers/message/fusion/mptfc.c
-!Idrivers/message/fusion/mptlan.c
-     </sect1>
-  </chapter>
-
-  <chapter id="snddev">
-     <title>Sound Devices</title>
-!Iinclude/sound/core.h
-!Esound/sound_core.c
-!Iinclude/sound/pcm.h
-!Esound/core/pcm.c
-!Esound/core/device.c
-!Esound/core/info.c
-!Esound/core/rawmidi.c
-!Esound/core/sound.c
-!Esound/core/memory.c
-!Esound/core/pcm_memory.c
-!Esound/core/init.c
-!Esound/core/isadma.c
-!Esound/core/control.c
-!Esound/core/pcm_lib.c
-!Esound/core/hwdep.c
-!Esound/core/pcm_native.c
-!Esound/core/memalloc.c
-<!-- FIXME: Removed for now since no structured comments in source
-X!Isound/sound_firmware.c
--->
-  </chapter>
-
-
-  <chapter id="uart16x50">
-     <title>16x50 UART Driver</title>
-!Edrivers/tty/serial/serial_core.c
-!Edrivers/tty/serial/8250/8250_core.c
-  </chapter>
-
-  <chapter id="fbdev">
-     <title>Frame Buffer Library</title>
-
-     <para>
-       The frame buffer drivers depend heavily on four data structures.
-       These structures are declared in include/linux/fb.h.  They are
-       fb_info, fb_var_screeninfo, fb_fix_screeninfo and fb_monospecs.
-       The last three can be made available to and from userland.
-     </para>
-
-     <para>
-       fb_info defines the current state of a particular video card.
-       Inside fb_info, there exists a fb_ops structure which is a
-       collection of needed functions to make fbdev and fbcon work.
-       fb_info is only visible to the kernel.
-     </para>
-
-     <para>
-       fb_var_screeninfo is used to describe the features of a video card
-       that are user defined.  With fb_var_screeninfo, things such as
-       depth and the resolution may be defined.
-     </para>
-
-     <para>
-       The next structure is fb_fix_screeninfo. This defines the
-       properties of a card that are created when a mode is set and can't
-       be changed otherwise.  A good example of this is the start of the
-       frame buffer memory.  This "locks" the address of the frame buffer
-       memory, so that it cannot be changed or moved.
-     </para>
-
-     <para>
-       The last structure is fb_monospecs. In the old API, there was
-       little importance for fb_monospecs. This allowed for forbidden things
-       such as setting a mode of 800x600 on a fix frequency monitor. With
-       the new API, fb_monospecs prevents such things, and if used
-       correctly, can prevent a monitor from being cooked.  fb_monospecs
-       will not be useful until kernels 2.5.x.
-     </para>
-
-     <sect1><title>Frame Buffer Memory</title>
-!Edrivers/video/fbdev/core/fbmem.c
-     </sect1>
-<!--
-     <sect1><title>Frame Buffer Console</title>
-X!Edrivers/video/console/fbcon.c
-     </sect1>
--->
-     <sect1><title>Frame Buffer Colormap</title>
-!Edrivers/video/fbdev/core/fbcmap.c
-     </sect1>
-<!-- FIXME:
-  drivers/video/fbgen.c has no docs, which stuffs up the sgml.  Comment
-  out until somebody adds docs.  KAO
-     <sect1><title>Frame Buffer Generic Functions</title>
-X!Idrivers/video/fbgen.c
-     </sect1>
-KAO -->
-     <sect1><title>Frame Buffer Video Mode Database</title>
-!Idrivers/video/fbdev/core/modedb.c
-!Edrivers/video/fbdev/core/modedb.c
-     </sect1>
-     <sect1><title>Frame Buffer Macintosh Video Mode Database</title>
-!Edrivers/video/fbdev/macmodes.c
-     </sect1>
-     <sect1><title>Frame Buffer Fonts</title>
-        <para>
-           Refer to the file lib/fonts/fonts.c for more information.
-        </para>
-<!-- FIXME: Removed for now since no structured comments in source
-X!Ilib/fonts/fonts.c
--->
-     </sect1>
-  </chapter>
-
-  <chapter id="input_subsystem">
-     <title>Input Subsystem</title>
-     <sect1><title>Input core</title>
-!Iinclude/linux/input.h
-!Edrivers/input/input.c
-!Edrivers/input/ff-core.c
-!Edrivers/input/ff-memless.c
-     </sect1>
-     <sect1><title>Multitouch Library</title>
-!Iinclude/linux/input/mt.h
-!Edrivers/input/input-mt.c
-     </sect1>
-     <sect1><title>Polled input devices</title>
-!Iinclude/linux/input-polldev.h
-!Edrivers/input/input-polldev.c
-     </sect1>
-     <sect1><title>Matrix keyboards/keypads</title>
-!Iinclude/linux/input/matrix_keypad.h
-     </sect1>
-     <sect1><title>Sparse keymap support</title>
-!Iinclude/linux/input/sparse-keymap.h
-!Edrivers/input/sparse-keymap.c
-     </sect1>
-  </chapter>
-
-  <chapter id="spi">
-      <title>Serial Peripheral Interface (SPI)</title>
-  <para>
-	SPI is the "Serial Peripheral Interface", widely used with
-	embedded systems because it is a simple and efficient
-	interface:  basically a multiplexed shift register.
-	Its three signal wires hold a clock (SCK, often in the range
-	of 1-20 MHz), a "Master Out, Slave In" (MOSI) data line, and
-	a "Master In, Slave Out" (MISO) data line.
-	SPI is a full duplex protocol; for each bit shifted out the
-	MOSI line (one per clock) another is shifted in on the MISO line.
-	Those bits are assembled into words of various sizes on the
-	way to and from system memory.
-	An additional chipselect line is usually active-low (nCS);
-	four signals are normally used for each peripheral, plus
-	sometimes an interrupt.
-  </para>
-  <para>
-	The SPI bus facilities listed here provide a generalized
-	interface to declare SPI busses and devices, manage them
-	according to the standard Linux driver model, and perform
-	input/output operations.
-	At this time, only "master" side interfaces are supported,
-	where Linux talks to SPI peripherals and does not implement
-	such a peripheral itself.
-	(Interfaces to support implementing SPI slaves would
-	necessarily look different.)
-  </para>
-  <para>
-	The programming interface is structured around two kinds of driver,
-	and two kinds of device.
-	A "Controller Driver" abstracts the controller hardware, which may
-	be as simple as a set of GPIO pins or as complex as a pair of FIFOs
-	connected to dual DMA engines on the other side of the SPI shift
-	register (maximizing throughput).  Such drivers bridge between
-	whatever bus they sit on (often the platform bus) and SPI, and
-	expose the SPI side of their device as a
-	<structname>struct spi_master</structname>.
-	SPI devices are children of that master, represented as a
-	<structname>struct spi_device</structname> and manufactured from
-	<structname>struct spi_board_info</structname> descriptors which
-	are usually provided by board-specific initialization code.
-	A <structname>struct spi_driver</structname> is called a
-	"Protocol Driver", and is bound to a spi_device using normal
-	driver model calls.
-  </para>
-  <para>
-	The I/O model is a set of queued messages.  Protocol drivers
-	submit one or more <structname>struct spi_message</structname>
-	objects, which are processed and completed asynchronously.
-	(There are synchronous wrappers, however.)  Messages are
-	built from one or more <structname>struct spi_transfer</structname>
-	objects, each of which wraps a full duplex SPI transfer.
-	A variety of protocol tweaking options are needed, because
-	different chips adopt very different policies for how they
-	use the bits transferred with SPI.
-  </para>
-!Iinclude/linux/spi/spi.h
-!Fdrivers/spi/spi.c spi_register_board_info
-!Edrivers/spi/spi.c
-  </chapter>
-
-  <chapter id="i2c">
-     <title>I<superscript>2</superscript>C and SMBus Subsystem</title>
-
-     <para>
-	I<superscript>2</superscript>C (or without fancy typography, "I2C")
-	is an acronym for the "Inter-IC" bus, a simple bus protocol which is
-	widely used where low data rate communications suffice.
-	Since it's also a licensed trademark, some vendors use another
-	name (such as "Two-Wire Interface", TWI) for the same bus.
-	I2C only needs two signals (SCL for clock, SDA for data), conserving
-	board real estate and minimizing signal quality issues.
-	Most I2C devices use seven bit addresses, and bus speeds of up
-	to 400 kHz; there's a high speed extension (3.4 MHz) that's not yet
-	found wide use.
-	I2C is a multi-master bus; open drain signaling is used to
-	arbitrate between masters, as well as to handshake and to
-	synchronize clocks from slower clients.
-     </para>
-
-     <para>
-	The Linux I2C programming interfaces support only the master
-	side of bus interactions, not the slave side.
-	The programming interface is structured around two kinds of driver,
-	and two kinds of device.
-	An I2C "Adapter Driver" abstracts the controller hardware; it binds
-	to a physical device (perhaps a PCI device or platform_device) and
-	exposes a <structname>struct i2c_adapter</structname> representing
-	each I2C bus segment it manages.
-	On each I2C bus segment will be I2C devices represented by a
-	<structname>struct i2c_client</structname>.  Those devices will
-	be bound to a <structname>struct i2c_driver</structname>,
-	which should follow the standard Linux driver model.
-	(At this writing, a legacy model is more widely used.)
-	There are functions to perform various I2C protocol operations; at
-	this writing all such functions are usable only from task context.
-     </para>
-
-     <para>
-	The System Management Bus (SMBus) is a sibling protocol.  Most SMBus
-	systems are also I2C conformant.  The electrical constraints are
-	tighter for SMBus, and it standardizes particular protocol messages
-	and idioms.  Controllers that support I2C can also support most
-	SMBus operations, but SMBus controllers don't support all the protocol
-	options that an I2C controller will.
-	There are functions to perform various SMBus protocol operations,
-	either using I2C primitives or by issuing SMBus commands to
-	i2c_adapter devices which don't support those I2C operations.
-     </para>
-
-!Iinclude/linux/i2c.h
-!Fdrivers/i2c/i2c-boardinfo.c i2c_register_board_info
-!Edrivers/i2c/i2c-core.c
-  </chapter>
-
-  <chapter id="hsi">
-     <title>High Speed Synchronous Serial Interface (HSI)</title>
-
-     <para>
-	High Speed Synchronous Serial Interface (HSI) is a
-	serial interface mainly used for connecting application
-	engines (APE) with cellular modem engines (CMT) in cellular
-	handsets.
-
-	HSI provides multiplexing for up to 16 logical channels,
-	low-latency and full duplex communication.
-     </para>
-
-!Iinclude/linux/hsi/hsi.h
-!Edrivers/hsi/hsi_core.c
-  </chapter>
-
-  <chapter id="pwm">
-    <title>Pulse-Width Modulation (PWM)</title>
-    <para>
-      Pulse-width modulation is a modulation technique primarily used to
-      control power supplied to electrical devices.
-    </para>
-    <para>
-      The PWM framework provides an abstraction for providers and consumers
-      of PWM signals. A controller that provides one or more PWM signals is
-      registered as <structname>struct pwm_chip</structname>. Providers are
-      expected to embed this structure in a driver-specific structure. This
-      structure contains fields that describe a particular chip.
-    </para>
-    <para>
-      A chip exposes one or more PWM signal sources, each of which exposed
-      as a <structname>struct pwm_device</structname>. Operations can be
-      performed on PWM devices to control the period, duty cycle, polarity
-      and active state of the signal.
-    </para>
-    <para>
-      Note that PWM devices are exclusive resources: they can always only be
-      used by one consumer at a time.
-    </para>
-!Iinclude/linux/pwm.h
-!Edrivers/pwm/core.c
-  </chapter>
-
-</book>
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
index 1f345da..5f04234 100644
--- a/Documentation/HOWTO
+++ b/Documentation/HOWTO
@@ -1,5 +1,5 @@
 HOWTO do Linux kernel development
----------------------------------
+=================================
 
 This is the be-all, end-all document on this topic.  It contains
 instructions on how to become a Linux kernel developer and how to learn
@@ -28,6 +28,7 @@
 you plan to do low-level development for that architecture.  Though they
 are not a good substitute for a solid C education and/or years of
 experience, the following books are good for, if anything, reference:
+
  - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
  - "Practical C Programming" by Steve Oualline [O'Reilly]
  - "C:  A Reference Manual" by Harbison and Steele [Prentice Hall]
@@ -64,7 +65,8 @@
 their statements on legal matters.
 
 For common questions and answers about the GPL, please see:
-	http://www.gnu.org/licenses/gpl-faq.html
+
+	https://www.gnu.org/licenses/gpl-faq.html
 
 
 Documentation
@@ -82,96 +84,118 @@
 
 Here is a list of files that are in the kernel source tree that are
 required reading:
+
   README
     This file gives a short background on the Linux kernel and describes
     what is necessary to do to configure and build the kernel.  People
     who are new to the kernel should start here.
 
-  Documentation/Changes
+  :ref:`Documentation/Changes <changes>`
     This file gives a list of the minimum levels of various software
     packages that are necessary to build and run the kernel
     successfully.
 
-  Documentation/CodingStyle
+  :ref:`Documentation/CodingStyle <codingstyle>`
     This describes the Linux kernel coding style, and some of the
     rationale behind it. All new code is expected to follow the
     guidelines in this document. Most maintainers will only accept
     patches if these rules are followed, and many people will only
     review code if it is in the proper style.
 
-  Documentation/SubmittingPatches
-  Documentation/SubmittingDrivers
+  :ref:`Documentation/SubmittingPatches <submittingpatches>` and :ref:`Documentation/SubmittingDrivers <submittingdrivers>`
     These files describe in explicit detail how to successfully create
     and send a patch, including (but not limited to):
+
        - Email contents
        - Email format
        - Who to send it to
+
     Following these rules will not guarantee success (as all patches are
     subject to scrutiny for content and style), but not following them
     will almost always prevent it.
 
     Other excellent descriptions of how to create patches properly are:
+
 	"The Perfect Patch"
-		http://www.ozlabs.org/~akpm/stuff/tpp.txt
+		https://www.ozlabs.org/~akpm/stuff/tpp.txt
+
 	"Linux kernel patch submission format"
 		http://linux.yyz.us/patch-format.html
 
-  Documentation/stable_api_nonsense.txt
+  :ref:`Documentation/stable_api_nonsense.txt <stable_api_nonsense>`
     This file describes the rationale behind the conscious decision to
     not have a stable API within the kernel, including things like:
+
       - Subsystem shim-layers (for compatibility?)
       - Driver portability between Operating Systems.
       - Mitigating rapid change within the kernel source tree (or
 	preventing rapid change)
+
     This document is crucial for understanding the Linux development
     philosophy and is very important for people moving to Linux from
     development on other Operating Systems.
 
-  Documentation/SecurityBugs
+  :ref:`Documentation/SecurityBugs <securitybugs>`
     If you feel you have found a security problem in the Linux kernel,
     please follow the steps in this document to help notify the kernel
     developers, and help solve the issue.
 
-  Documentation/ManagementStyle
+  :ref:`Documentation/ManagementStyle <managementstyle>`
     This document describes how Linux kernel maintainers operate and the
     shared ethos behind their methodologies.  This is important reading
     for anyone new to kernel development (or anyone simply curious about
     it), as it resolves a lot of common misconceptions and confusion
     about the unique behavior of kernel maintainers.
 
-  Documentation/stable_kernel_rules.txt
+  :ref:`Documentation/stable_kernel_rules.txt <stable_kernel_rules>`
     This file describes the rules on how the stable kernel releases
     happen, and what to do if you want to get a change into one of these
     releases.
 
-  Documentation/kernel-docs.txt
+  :ref:`Documentation/kernel-docs.txt <kernel_docs>`
     A list of external documentation that pertains to kernel
     development.  Please consult this list if you do not find what you
     are looking for within the in-kernel documentation.
 
-  Documentation/applying-patches.txt
+  :ref:`Documentation/applying-patches.txt <applying_patches>`
     A good introduction describing exactly what a patch is and how to
     apply it to the different development branches of the kernel.
 
 The kernel also has a large number of documents that can be
-automatically generated from the source code itself.  This includes a
+automatically generated from the source code itself or from
+ReStructuredText markups (ReST), like this one. This includes a
 full description of the in-kernel API, and rules on how to handle
-locking properly.  The documents will be created in the
-Documentation/DocBook/ directory and can be generated as PDF,
-Postscript, HTML, and man pages by running:
+locking properly.
+
+All such documents can be generated as PDF or HTML by running::
+
 	make pdfdocs
-	make psdocs
 	make htmldocs
-	make mandocs
+
 respectively from the main kernel source directory.
 
+The documents that uses ReST markup will be generated at Documentation/output.
+They can also be generated on LaTeX and ePub formats with::
+
+	make latexdocs
+	make epubdocs
+
+Currently, there are some documents written on DocBook that are in
+the process of conversion to ReST. Such documents will be created in the
+Documentation/DocBook/ directory and can be generated also as
+Postscript or man pages by running::
+
+	make psdocs
+	make mandocs
 
 Becoming A Kernel Developer
 ---------------------------
 
 If you do not know anything about Linux kernel development, you should
 look at the Linux KernelNewbies project:
-	http://kernelnewbies.org
+
+	https://kernelnewbies.org
+
 It consists of a helpful mailing list where you can ask almost any type
 of basic kernel development question (make sure to search the archives
 first, before asking something that has already been answered in the
@@ -187,7 +211,9 @@
 If you do not know where you want to start, but you want to look for
 some task to start doing to join into the kernel development community,
 go to the Linux Kernel Janitor's project:
-	http://kernelnewbies.org/KernelJanitors
+
+	https://kernelnewbies.org/KernelJanitors
+
 It is a great place to start.  It describes a list of relatively simple
 problems that need to be cleaned up and fixed within the Linux kernel
 source tree.  Working with the developers in charge of this project, you
@@ -199,7 +225,8 @@
 tree, but need some help getting it in the proper form, the
 kernel-mentors project was created to help you out with this.  It is a
 mailing list, and can be found at:
-	http://selenic.com/mailman/listinfo/kernel-mentors
+
+	https://selenic.com/mailman/listinfo/kernel-mentors
 
 Before making any actual modifications to the Linux kernel code, it is
 imperative to understand how the code in question works.  For this
@@ -209,6 +236,7 @@
 Cross-Reference project, which is able to present source code in a
 self-referential, indexed webpage format. An excellent up-to-date
 repository of the kernel code may be found at:
+
 	http://lxr.free-electrons.com/
 
 
@@ -218,6 +246,7 @@
 Linux kernel development process currently consists of a few different
 main kernel "branches" and lots of different subsystem-specific kernel
 branches.  These different branches are:
+
   - main 4.x kernel tree
   - 4.x.y -stable kernel tree
   - 4.x -git kernel patches
@@ -227,14 +256,15 @@
 4.x kernel tree
 -----------------
 4.x kernels are maintained by Linus Torvalds, and can be found on
-kernel.org in the pub/linux/kernel/v4.x/ directory.  Its development
+https://kernel.org in the pub/linux/kernel/v4.x/ directory.  Its development
 process is as follows:
+
   - As soon as a new kernel is released a two weeks window is open,
     during this period of time maintainers can submit big diffs to
     Linus, usually the patches that have already been included in the
     -next kernel for a few weeks.  The preferred way to submit big changes
     is using git (the kernel's source management tool, more information
-    can be found at http://git-scm.com/) but plain patches are also just
+    can be found at https://git-scm.com/) but plain patches are also just
     fine.
   - After two weeks a -rc1 kernel is released it is now possible to push
     only patches that do not include new features that could affect the
@@ -253,9 +283,10 @@
 
 It is worth mentioning what Andrew Morton wrote on the linux-kernel
 mailing list about kernel releases:
-	"Nobody knows when a kernel will be released, because it's
+
+	*"Nobody knows when a kernel will be released, because it's
 	released according to perceived bug status, not according to a
-	preconceived timeline."
+	preconceived timeline."*
 
 4.x.y -stable kernel tree
 -------------------------
@@ -301,7 +332,7 @@
 Most of these repositories are git trees, but there are also other SCMs
 in use, or patch queues being published as quilt series.  Addresses of
 these subsystem repositories are listed in the MAINTAINERS file.  Many
-of them can be browsed at http://git.kernel.org/.
+of them can be browsed at https://git.kernel.org/.
 
 Before a proposed patch is committed to such a subsystem tree, it is
 subject to review which primarily happens on mailing lists (see the
@@ -310,7 +341,7 @@
 interface which shows patch postings, any comments on a patch or
 revisions to it, and maintainers can mark patches as under review,
 accepted, or rejected.  Most of these patchwork sites are listed at
-http://patchwork.kernel.org/.
+https://patchwork.kernel.org/.
 
 4.x -next kernel tree for integration tests
 -------------------------------------------
@@ -318,7 +349,8 @@
 tree, they need to be integration-tested.  For this purpose, a special
 testing repository exists into which virtually all subsystem trees are
 pulled on an almost daily basis:
-	http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
+
+	https://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
 
 This way, the -next kernel gives a summary outlook onto what will be
 expected to go into the mainline kernel at the next merge period.
@@ -328,10 +360,11 @@
 Bug Reporting
 -------------
 
-bugzilla.kernel.org is where the Linux kernel developers track kernel
+https://bugzilla.kernel.org is where the Linux kernel developers track kernel
 bugs.  Users are encouraged to report all bugs that they find in this
 tool.  For details on how to use the kernel bugzilla, please see:
-	http://bugzilla.kernel.org/page.cgi?id=faq.html
+
+	https://bugzilla.kernel.org/page.cgi?id=faq.html
 
 The file REPORTING-BUGS in the main kernel source directory has a good
 template for how to report a possible kernel bug, and details what kind
@@ -349,13 +382,14 @@
 bugs is one of the best ways to get merits among other developers, because
 not many people like wasting time fixing other people's bugs.
 
-To work in the already reported bug reports, go to http://bugzilla.kernel.org.
+To work in the already reported bug reports, go to https://bugzilla.kernel.org.
 If you want to be advised of the future bug reports, you can subscribe to the
 bugme-new mailing list (only new bug reports are mailed here) or to the
 bugme-janitor mailing list (every change in the bugzilla is mailed here)
 
-	http://lists.linux-foundation.org/mailman/listinfo/bugme-new
-	http://lists.linux-foundation.org/mailman/listinfo/bugme-janitors
+	https://lists.linux-foundation.org/mailman/listinfo/bugme-new
+
+	https://lists.linux-foundation.org/mailman/listinfo/bugme-janitors
 
 
 
@@ -365,10 +399,14 @@
 As some of the above documents describe, the majority of the core kernel
 developers participate on the Linux Kernel Mailing list.  Details on how
 to subscribe and unsubscribe from the list can be found at:
+
 	http://vger.kernel.org/vger-lists.html#linux-kernel
+
 There are archives of the mailing list on the web in many different
 places.  Use a search engine to find these archives.  For example:
+
 	http://dir.gmane.org/gmane.linux.kernel
+
 It is highly recommended that you search the archives about the topic
 you want to bring up, before you post it to the list. A lot of things
 already discussed in detail are only recorded at the mailing list
@@ -381,11 +419,13 @@
 
 Many of the lists are hosted on kernel.org. Information on them can be
 found at:
+
 	http://vger.kernel.org/vger-lists.html
 
 Please remember to follow good behavioral habits when using the lists.
 Though a bit cheesy, the following URL has some simple guidelines for
 interacting with the list (or any list):
+
 	http://www.albion.com/netiquette/
 
 If multiple people respond to your mail, the CC: list of recipients may
@@ -400,13 +440,14 @@
 writing at the top of the mail.
 
 If you add patches to your mail, make sure they are plain readable text
-as stated in Documentation/SubmittingPatches. Kernel developers don't
-want to deal with attachments or compressed patches; they may want
-to comment on individual lines of your patch, which works only that way.
-Make sure you use a mail program that does not mangle spaces and tab
-characters. A good first test is to send the mail to yourself and try
-to apply your own patch by yourself. If that doesn't work, get your
-mail program fixed or change it until it works.
+as stated in Documentation/SubmittingPatches.
+Kernel developers don't want to deal with
+attachments or compressed patches; they may want to comment on
+individual lines of your patch, which works only that way. Make sure you
+use a mail program that does not mangle spaces and tab characters. A
+good first test is to send the mail to yourself and try to apply your
+own patch by yourself. If that doesn't work, get your mail program fixed
+or change it until it works.
 
 Above all, please remember to show respect to other subscribers.
 
@@ -418,6 +459,7 @@
 there is.  When you submit a patch for acceptance, it will be reviewed
 on its technical merits and those alone.  So, what should you be
 expecting?
+
   - criticism
   - comments
   - requests for change
@@ -432,6 +474,7 @@
 again, sometimes things get lost in the huge volume.
 
 What should you not do?
+
   - expect your patch to be accepted without question
   - become defensive
   - ignore comments
@@ -445,8 +488,8 @@
 toward a solution that is right.
 
 It is normal that the answers to your first patch might simply be a list
-of a dozen things you should correct.  This does _not_ imply that your
-patch will not be accepted, and it is _not_ meant against you
+of a dozen things you should correct.  This does **not** imply that your
+patch will not be accepted, and it is **not** meant against you
 personally.  Simply correct all issues raised against your patch and
 resend it.
 
@@ -457,7 +500,9 @@
 The kernel community works differently than most traditional corporate
 development environments.  Here are a list of things that you can try to
 do to avoid problems:
+
   Good things to say regarding your proposed changes:
+
     - "This solves multiple problems."
     - "This deletes 2000 lines of code."
     - "Here is a patch that explains what I am trying to describe."
@@ -466,6 +511,7 @@
     - "This increases performance on typical machines..."
 
   Bad things you should avoid saying:
+
     - "We did it this way in AIX/ptx/Solaris, so therefore it must be
       good..."
     - "I've being doing this for 20 years, so..."
@@ -527,17 +573,18 @@
    and simplify (or simply re-order) patches before submitting them.
 
 Here is an analogy from kernel developer Al Viro:
-	"Think of a teacher grading homework from a math student.  The
+
+	*"Think of a teacher grading homework from a math student.  The
 	teacher does not want to see the student's trials and errors
 	before they came up with the solution. They want to see the
 	cleanest, most elegant answer.  A good student knows this, and
 	would never submit her intermediate work before the final
-	solution."
+	solution.*
 
-	The same is true of kernel development. The maintainers and
+	*The same is true of kernel development. The maintainers and
 	reviewers do not want to see the thought process behind the
 	solution to the problem one is solving. They want to see a
-	simple and elegant solution."
+	simple and elegant solution."*
 
 It may be challenging to keep the balance between presenting an elegant
 solution and working together with the community and discussing your
@@ -565,6 +612,7 @@
 the text in your email.  This information will become the ChangeLog
 information for the patch, and will be preserved for everyone to see for
 all time.  It should describe the patch completely, containing:
+
   - why the change is necessary
   - the overall design approach in the patch
   - implementation details
@@ -572,12 +620,11 @@
 
 For more details on what this should all look like, please see the
 ChangeLog section of the document:
+
   "The Perfect Patch"
       http://www.ozlabs.org/~akpm/stuff/tpp.txt
 
 
-
-
 All of these things are sometimes very hard to do. It can take years to
 perfect these practices (if at all). It's a continuous process of
 improvement that requires a lot of patience and determination. But
@@ -588,8 +635,9 @@
 
 
 ----------
+
 Thanks to Paolo Ciarrocchi who allowed the "Development Process"
-(http://lwn.net/Articles/94386/) section
+(https://lwn.net/Articles/94386/) section
 to be based on text he had written, and to Randy Dunlap and Gerrit
 Huizenga for some of the list of things you should and should not say.
 Also thanks to Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
diff --git a/Documentation/Makefile.sphinx b/Documentation/Makefile.sphinx
index 857f1e27..92deea3 100644
--- a/Documentation/Makefile.sphinx
+++ b/Documentation/Makefile.sphinx
@@ -5,6 +5,9 @@
 # You can set these variables from the command line.
 SPHINXBUILD   = sphinx-build
 SPHINXOPTS    =
+SPHINXDIRS    = .
+_SPHINXDIRS   = $(patsubst $(srctree)/Documentation/%/conf.py,%,$(wildcard $(srctree)/Documentation/*/conf.py))
+SPHINX_CONF   = conf.py
 PAPER         =
 BUILDDIR      = $(obj)/output
 
@@ -25,38 +28,62 @@
 
 else # HAVE_SPHINX
 
-# User-friendly check for rst2pdf
-HAVE_RST2PDF := $(shell if python -c "import rst2pdf" >/dev/null 2>&1; then echo 1; else echo 0; fi)
+# User-friendly check for pdflatex
+HAVE_PDFLATEX := $(shell if which xelatex >/dev/null 2>&1; then echo 1; else echo 0; fi)
 
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
 KERNELDOC       = $(srctree)/scripts/kernel-doc
 KERNELDOC_CONF  = -D kerneldoc_srctree=$(srctree) -D kerneldoc_bin=$(KERNELDOC)
-ALLSPHINXOPTS   = -D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) -d $(BUILDDIR)/.doctrees $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) -c $(srctree)/$(src) $(SPHINXOPTS) $(srctree)/$(src)
+ALLSPHINXOPTS   =  $(KERNELDOC_CONF) $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)
 # the i18n builder cannot share the environment and doctrees with the others
 I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 
-quiet_cmd_sphinx = SPHINX  $@
-      cmd_sphinx = BUILDDIR=$(BUILDDIR) $(SPHINXBUILD) -b $2 $(ALLSPHINXOPTS) $(BUILDDIR)/$2
+# commands; the 'cmd' from scripts/Kbuild.include is not *loopable*
+loop_cmd = $(echo-cmd) $(cmd_$(1))
+
+# $2 sphinx builder e.g. "html"
+# $3 name of the build subfolder / e.g. "media", used as:
+#    * dest folder relative to $(BUILDDIR) and
+#    * cache folder relative to $(BUILDDIR)/.doctrees
+# $4 dest subfolder e.g. "man" for man pages at media/man
+# $5 reST source folder relative to $(srctree)/$(src),
+#    e.g. "media" for the linux-tv book-set at ./Documentation/media
+
+quiet_cmd_sphinx = SPHINX  $@ --> file://$(abspath $(BUILDDIR)/$3/$4);
+      cmd_sphinx = $(MAKE) BUILDDIR=$(abspath $(BUILDDIR)) $(build)=Documentation/media all;\
+	BUILDDIR=$(abspath $(BUILDDIR)) SPHINX_CONF=$(abspath $(srctree)/$(src)/$5/$(SPHINX_CONF)) \
+	$(SPHINXBUILD) \
+	-b $2 \
+	-c $(abspath $(srctree)/$(src)) \
+	-d $(abspath $(BUILDDIR)/.doctrees/$3) \
+	-D version=$(KERNELVERSION) -D release=$(KERNELRELEASE) \
+	$(ALLSPHINXOPTS) \
+	$(abspath $(srctree)/$(src)/$5) \
+	$(abspath $(BUILDDIR)/$3/$4);
 
 htmldocs:
-	$(MAKE) BUILDDIR=$(BUILDDIR) -f $(srctree)/Documentation/media/Makefile $@
-	$(call cmd,sphinx,html)
+	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var)))
 
-pdfdocs:
-ifeq ($(HAVE_RST2PDF),0)
-	$(warning The Python 'rst2pdf' module was not found. Make sure you have the module installed to produce PDF output.)
+latexdocs:
+ifeq ($(HAVE_PDFLATEX),0)
+	$(warning The 'xelatex' command was not found. Make sure you have it installed and in PATH to produce PDF output.)
 	@echo "  SKIP    Sphinx $@ target."
-else # HAVE_RST2PDF
-	$(call cmd,sphinx,pdf)
-endif # HAVE_RST2PDF
+else # HAVE_PDFLATEX
+	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,latex,$(var),latex,$(var)))
+endif # HAVE_PDFLATEX
+
+pdfdocs: latexdocs
+ifneq ($(HAVE_PDFLATEX),0)
+	$(foreach var,$(SPHINXDIRS), $(MAKE) PDFLATEX=xelatex LATEXOPTS="-interaction=nonstopmode" -C $(BUILDDIR)/$(var)/latex)
+endif # HAVE_PDFLATEX
 
 epubdocs:
-	$(call cmd,sphinx,epub)
+	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,epub,$(var),epub,$(var)))
 
 xmldocs:
-	$(call cmd,sphinx,xml)
+	@$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,xml,$(var),xml,$(var)))
 
 # no-ops for the Sphinx toolchain
 sgmldocs:
@@ -72,7 +99,14 @@
 dochelp:
 	@echo  ' Linux kernel internal documentation in different formats (Sphinx):'
 	@echo  '  htmldocs        - HTML'
+	@echo  '  latexdocs       - LaTeX'
 	@echo  '  pdfdocs         - PDF'
 	@echo  '  epubdocs        - EPUB'
 	@echo  '  xmldocs         - XML'
 	@echo  '  cleandocs       - clean all generated files'
+	@echo
+	@echo  '  make SPHINXDIRS="s1 s2" [target] Generate only docs of folder s1, s2'
+	@echo  '  valid values for SPHINXDIRS are: $(_SPHINXDIRS)'
+	@echo
+	@echo  '  make SPHINX_CONF={conf-file} [target] use *additional* sphinx-build'
+	@echo  '  configuration. This is e.g. useful to build with nit-picking config.'
diff --git a/Documentation/ManagementStyle b/Documentation/ManagementStyle
index a211ee8..dea2e66c 100644
--- a/Documentation/ManagementStyle
+++ b/Documentation/ManagementStyle
@@ -1,10 +1,12 @@
+.. _managementstyle:
 
-                Linux kernel management style
+Linux kernel management style
+=============================
 
 This is a short document describing the preferred (or made up, depending
 on who you ask) management style for the linux kernel.  It's meant to
 mirror the CodingStyle document to some degree, and mainly written to
-avoid answering (*) the same (or similar) questions over and over again. 
+avoid answering [#f1]_  the same (or similar) questions over and over again.
 
 Management style is very personal and much harder to quantify than
 simple coding style rules, so this document may or may not have anything
@@ -14,50 +16,52 @@
 Btw, when talking about "kernel manager", it's all about the technical
 lead persons, not the people who do traditional management inside
 companies.  If you sign purchase orders or you have any clue about the
-budget of your group, you're almost certainly not a kernel manager. 
-These suggestions may or may not apply to you. 
+budget of your group, you're almost certainly not a kernel manager.
+These suggestions may or may not apply to you.
 
 First off, I'd suggest buying "Seven Habits of Highly Effective
-People", and NOT read it.  Burn it, it's a great symbolic gesture. 
+People", and NOT read it.  Burn it, it's a great symbolic gesture.
 
-(*) This document does so not so much by answering the question, but by
-making it painfully obvious to the questioner that we don't have a clue
-to what the answer is. 
+.. [#f1] This document does so not so much by answering the question, but by
+  making it painfully obvious to the questioner that we don't have a clue
+  to what the answer is.
 
 Anyway, here goes:
 
+.. _decisions:
 
-		Chapter 1: Decisions
+1) Decisions
+------------
 
 Everybody thinks managers make decisions, and that decision-making is
 important.  The bigger and more painful the decision, the bigger the
 manager must be to make it.  That's very deep and obvious, but it's not
-actually true. 
+actually true.
 
-The name of the game is to _avoid_ having to make a decision.  In
+The name of the game is to **avoid** having to make a decision.  In
 particular, if somebody tells you "choose (a) or (b), we really need you
 to decide on this", you're in trouble as a manager.  The people you
 manage had better know the details better than you, so if they come to
 you for a technical decision, you're screwed.  You're clearly not
-competent to make that decision for them. 
+competent to make that decision for them.
 
 (Corollary:if the people you manage don't know the details better than
-you, you're also screwed, although for a totally different reason. 
-Namely that you are in the wrong job, and that _they_ should be managing
-your brilliance instead). 
+you, you're also screwed, although for a totally different reason.
+Namely that you are in the wrong job, and that **they** should be managing
+your brilliance instead).
 
-So the name of the game is to _avoid_ decisions, at least the big and
+So the name of the game is to **avoid** decisions, at least the big and
 painful ones.  Making small and non-consequential decisions is fine, and
 makes you look like you know what you're doing, so what a kernel manager
 needs to do is to turn the big and painful ones into small things where
-nobody really cares. 
+nobody really cares.
 
 It helps to realize that the key difference between a big decision and a
 small one is whether you can fix your decision afterwards.  Any decision
 can be made small by just always making sure that if you were wrong (and
-you _will_ be wrong), you can always undo the damage later by
+you **will** be wrong), you can always undo the damage later by
 backtracking.  Suddenly, you get to be doubly managerial for making
-_two_ inconsequential decisions - the wrong one _and_ the right one. 
+**two** inconsequential decisions - the wrong one **and** the right one.
 
 And people will even see that as true leadership (*cough* bullshit
 *cough*).
@@ -65,10 +69,10 @@
 Thus the key to avoiding big decisions becomes to just avoiding to do
 things that can't be undone.  Don't get ushered into a corner from which
 you cannot escape.  A cornered rat may be dangerous - a cornered manager
-is just pitiful. 
+is just pitiful.
 
 It turns out that since nobody would be stupid enough to ever really let
-a kernel manager have huge fiscal responsibility _anyway_, it's usually
+a kernel manager have huge fiscal responsibility **anyway**, it's usually
 fairly easy to backtrack.  Since you're not going to be able to waste
 huge amounts of money that you might not be able to repay, the only
 thing you can backtrack on is a technical decision, and there
@@ -76,113 +80,118 @@
 incompetent nincompoop, say you're sorry, and undo all the worthless
 work you had people work on for the last year.  Suddenly the decision
 you made a year ago wasn't a big decision after all, since it could be
-easily undone. 
+easily undone.
 
 It turns out that some people have trouble with this approach, for two
 reasons:
+
  - admitting you were an idiot is harder than it looks.  We all like to
    maintain appearances, and coming out in public to say that you were
-   wrong is sometimes very hard indeed. 
+   wrong is sometimes very hard indeed.
  - having somebody tell you that what you worked on for the last year
    wasn't worthwhile after all can be hard on the poor lowly engineers
-   too, and while the actual _work_ was easy enough to undo by just
+   too, and while the actual **work** was easy enough to undo by just
    deleting it, you may have irrevocably lost the trust of that
    engineer.  And remember: "irrevocable" was what we tried to avoid in
    the first place, and your decision ended up being a big one after
-   all. 
+   all.
 
 Happily, both of these reasons can be mitigated effectively by just
 admitting up-front that you don't have a friggin' clue, and telling
 people ahead of the fact that your decision is purely preliminary, and
 might be the wrong thing.  You should always reserve the right to change
-your mind, and make people very _aware_ of that.  And it's much easier
-to admit that you are stupid when you haven't _yet_ done the really
+your mind, and make people very **aware** of that.  And it's much easier
+to admit that you are stupid when you haven't **yet** done the really
 stupid thing.
 
 Then, when it really does turn out to be stupid, people just roll their
-eyes and say "Oops, he did it again".  
+eyes and say "Oops, he did it again".
 
 This preemptive admission of incompetence might also make the people who
 actually do the work also think twice about whether it's worth doing or
-not.  After all, if _they_ aren't certain whether it's a good idea, you
+not.  After all, if **they** aren't certain whether it's a good idea, you
 sure as hell shouldn't encourage them by promising them that what they
 work on will be included.  Make them at least think twice before they
-embark on a big endeavor. 
+embark on a big endeavor.
 
 Remember: they'd better know more about the details than you do, and
 they usually already think they have the answer to everything.  The best
 thing you can do as a manager is not to instill confidence, but rather a
-healthy dose of critical thinking on what they do. 
+healthy dose of critical thinking on what they do.
 
 Btw, another way to avoid a decision is to plaintively just whine "can't
 we just do both?" and look pitiful.  Trust me, it works.  If it's not
 clear which approach is better, they'll eventually figure it out.  The
 answer may end up being that both teams get so frustrated by the
-situation that they just give up. 
+situation that they just give up.
 
 That may sound like a failure, but it's usually a sign that there was
 something wrong with both projects, and the reason the people involved
 couldn't decide was that they were both wrong.  You end up coming up
 smelling like roses, and you avoided yet another decision that you could
-have screwed up on. 
+have screwed up on.
 
 
-		Chapter 2: People
+2) People
+---------
 
 Most people are idiots, and being a manager means you'll have to deal
-with it, and perhaps more importantly, that _they_ have to deal with
-_you_. 
+with it, and perhaps more importantly, that **they** have to deal with
+**you**.
 
 It turns out that while it's easy to undo technical mistakes, it's not
 as easy to undo personality disorders.  You just have to live with
-theirs - and yours. 
+theirs - and yours.
 
 However, in order to prepare yourself as a kernel manager, it's best to
 remember not to burn any bridges, bomb any innocent villagers, or
 alienate too many kernel developers. It turns out that alienating people
 is fairly easy, and un-alienating them is hard. Thus "alienating"
 immediately falls under the heading of "not reversible", and becomes a
-no-no according to Chapter 1.
+no-no according to :ref:`decisions`.
 
 There's just a few simple rules here:
+
  (1) don't call people d*ckheads (at least not in public)
  (2) learn how to apologize when you forgot rule (1)
 
 The problem with #1 is that it's very easy to do, since you can say
-"you're a d*ckhead" in millions of different ways (*), sometimes without
+"you're a d*ckhead" in millions of different ways [#f2]_, sometimes without
 even realizing it, and almost always with a white-hot conviction that
-you are right. 
+you are right.
 
 And the more convinced you are that you are right (and let's face it,
-you can call just about _anybody_ a d*ckhead, and you often _will_ be
-right), the harder it ends up being to apologize afterwards. 
+you can call just about **anybody** a d*ckhead, and you often **will** be
+right), the harder it ends up being to apologize afterwards.
 
 To solve this problem, you really only have two options:
+
  - get really good at apologies
  - spread the "love" out so evenly that nobody really ends up feeling
    like they get unfairly targeted.  Make it inventive enough, and they
-   might even be amused. 
+   might even be amused.
 
 The option of being unfailingly polite really doesn't exist. Nobody will
 trust somebody who is so clearly hiding his true character.
 
-(*) Paul Simon sang "Fifty Ways to Leave Your Lover", because quite
-frankly, "A Million Ways to Tell a Developer He Is a D*ckhead" doesn't
-scan nearly as well.  But I'm sure he thought about it. 
+.. [#f2] Paul Simon sang "Fifty Ways to Leave Your Lover", because quite
+  frankly, "A Million Ways to Tell a Developer He Is a D*ckhead" doesn't
+  scan nearly as well.  But I'm sure he thought about it.
 
 
-		Chapter 3: People II - the Good Kind
+3) People II - the Good Kind
+----------------------------
 
 While it turns out that most people are idiots, the corollary to that is
 sadly that you are one too, and that while we can all bask in the secure
 knowledge that we're better than the average person (let's face it,
 nobody ever believes that they're average or below-average), we should
 also admit that we're not the sharpest knife around, and there will be
-other people that are less of an idiot than you are. 
+other people that are less of an idiot than you are.
 
-Some people react badly to smart people.  Others take advantage of them. 
+Some people react badly to smart people.  Others take advantage of them.
 
-Make sure that you, as a kernel maintainer, are in the second group. 
+Make sure that you, as a kernel maintainer, are in the second group.
 Suck up to them, because they are the people who will make your job
 easier. In particular, they'll be able to make your decisions for you,
 which is what the game is all about.
@@ -191,7 +200,7 @@
 management responsibilities largely become ones of saying "Sounds like a
 good idea - go wild", or "That sounds good, but what about xxx?".  The
 second version in particular is a great way to either learn something
-new about "xxx" or seem _extra_ managerial by pointing out something the
+new about "xxx" or seem **extra** managerial by pointing out something the
 smarter person hadn't thought about.  In either case, you win.
 
 One thing to look out for is to realize that greatness in one area does
@@ -199,47 +208,49 @@
 specific directions, but let's face it, they might be good at what they
 do, and suck at everything else.  The good news is that people tend to
 naturally gravitate back to what they are good at, so it's not like you
-are doing something irreversible when you _do_ prod them in some
+are doing something irreversible when you **do** prod them in some
 direction, just don't push too hard.
 
 
-		Chapter 4: Placing blame
+4) Placing blame
+----------------
 
 Things will go wrong, and people want somebody to blame. Tag, you're it.
 
 It's not actually that hard to accept the blame, especially if people
-kind of realize that it wasn't _all_ your fault.  Which brings us to the
+kind of realize that it wasn't **all** your fault.  Which brings us to the
 best way of taking the blame: do it for another guy. You'll feel good
 for taking the fall, he'll feel good about not getting blamed, and the
 guy who lost his whole 36GB porn-collection because of your incompetence
 will grudgingly admit that you at least didn't try to weasel out of it.
 
 Then make the developer who really screwed up (if you can find him) know
-_in_private_ that he screwed up.  Not just so he can avoid it in the
+**in_private** that he screwed up.  Not just so he can avoid it in the
 future, but so that he knows he owes you one.  And, perhaps even more
 importantly, he's also likely the person who can fix it.  Because, let's
-face it, it sure ain't you. 
+face it, it sure ain't you.
 
-Taking the blame is also why you get to be manager in the first place. 
+Taking the blame is also why you get to be manager in the first place.
 It's part of what makes people trust you, and allow you the potential
 glory, because you're the one who gets to say "I screwed up".  And if
 you've followed the previous rules, you'll be pretty good at saying that
-by now. 
+by now.
 
 
-		Chapter 5: Things to avoid
+5) Things to avoid
+------------------
 
 There's one thing people hate even more than being called "d*ckhead",
 and that is being called a "d*ckhead" in a sanctimonious voice.  The
 first you can apologize for, the second one you won't really get the
 chance.  They likely will no longer be listening even if you otherwise
-do a good job. 
+do a good job.
 
 We all think we're better than anybody else, which means that when
-somebody else puts on airs, it _really_ rubs us the wrong way.  You may
+somebody else puts on airs, it **really** rubs us the wrong way.  You may
 be morally and intellectually superior to everybody around you, but
-don't try to make it too obvious unless you really _intend_ to irritate
-somebody (*). 
+don't try to make it too obvious unless you really **intend** to irritate
+somebody [#f3]_.
 
 Similarly, don't be too polite or subtle about things. Politeness easily
 ends up going overboard and hiding the problem, and as they say, "On the
@@ -251,15 +262,16 @@
 overboard to the point of being ridiculous can drive a point home
 without making it painful to the recipient, who just thinks you're being
 silly.  It can thus help get through the personal mental block we all
-have about criticism. 
+have about criticism.
 
-(*) Hint: internet newsgroups that are not directly related to your work
-are great ways to take out your frustrations at other people. Write
-insulting posts with a sneer just to get into a good flame every once in
-a while, and you'll feel cleansed. Just don't crap too close to home.
+.. [#f3] Hint: internet newsgroups that are not directly related to your work
+  are great ways to take out your frustrations at other people. Write
+  insulting posts with a sneer just to get into a good flame every once in
+  a while, and you'll feel cleansed. Just don't crap too close to home.
 
 
-		Chapter 6: Why me?
+6) Why me?
+----------
 
 Since your main responsibility seems to be to take the blame for other
 peoples mistakes, and make it painfully obvious to everybody else that
@@ -268,9 +280,9 @@
 
 First off, while you may or may not get screaming teenage girls (or
 boys, let's not be judgmental or sexist here) knocking on your dressing
-room door, you _will_ get an immense feeling of personal accomplishment
+room door, you **will** get an immense feeling of personal accomplishment
 for being "in charge".  Never mind the fact that you're really leading
 by trying to keep up with everybody else and running after them as fast
-as you can.  Everybody will still think you're the person in charge. 
+as you can.  Everybody will still think you're the person in charge.
 
 It's a great job if you can hack it.
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index c55df29..cd9c9f6 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -94,14 +94,11 @@
 min_vecs argument set to this limit, and the PCI core will return -ENOSPC
 if it can't meet the minimum number of vectors.
 
-The flags argument should normally be set to 0, but can be used to pass the
-PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support
-MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in
-case the device does not support legacy interrupt lines.
-
-By default this function will spread the interrupts around the available
-CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY
-flag.
+The flags argument is used to specify which type of interrupt can be used
+by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX).
+A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for
+any possible kind of interrupt.  If the PCI_IRQ_AFFINITY flag is set,
+pci_alloc_irq_vectors() will spread the interrupts around the available CPUs.
 
 To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
 vectors, use the following function:
@@ -131,7 +128,7 @@
 capped to the supported limit, so there is no need to query the number of
 vectors supported beforehand:
 
-	nvec = pci_alloc_irq_vectors(pdev, 1, nvec, 0);
+	nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES)
 	if (nvec < 0)
 		goto out_err;
 
@@ -140,7 +137,7 @@
 number to pci_alloc_irq_vectors() function as both 'min_vecs' and
 'max_vecs' parameters:
 
-	ret = pci_alloc_irq_vectors(pdev, nvec, nvec, 0);
+	ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES);
 	if (ret < 0)
 		goto out_err;
 
@@ -148,15 +145,14 @@
 the single MSI mode for a device.  It could be done by passing two 1s as
 'min_vecs' and 'max_vecs':
 
-	ret = pci_alloc_irq_vectors(pdev, 1, 1, 0);
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
 	if (ret < 0)
 		goto out_err;
 
 Some devices might not support using legacy line interrupts, in which case
-the PCI_IRQ_NOLEGACY flag can be used to fail the request if the platform
-can't provide MSI or MSI-X interrupts:
+the driver can specify that only MSI or MSI-X is acceptable:
 
-	nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_NOLEGACY);
+	nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX);
 	if (nvec < 0)
 		goto out_err;
 
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
index 123881f..77f49dc 100644
--- a/Documentation/PCI/pci.txt
+++ b/Documentation/PCI/pci.txt
@@ -124,7 +124,6 @@
 
 The ID table is an array of struct pci_device_id entries ending with an
 all-zero entry.  Definitions with static const are generally preferred.
-Use of the deprecated macro DEFINE_PCI_DEVICE_TABLE should be avoided.
 
 Each entry consists of:
 
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index ece410f..a4d3838 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2493,6 +2493,28 @@
 variant of <tt>call_rcu()</tt> that might one day be created for
 energy-efficiency purposes.
 
+<p>
+That said, there are limits.
+RCU requires that the <tt>rcu_head</tt> structure be aligned to a
+two-byte boundary, and passing a misaligned <tt>rcu_head</tt>
+structure to one of the <tt>call_rcu()</tt> family of functions
+will result in a splat.
+It is therefore necessary to exercise caution when packing
+structures containing fields of type <tt>rcu_head</tt>.
+Why not a four-byte or even eight-byte alignment requirement?
+Because the m68k architecture provides only two-byte alignment,
+and thus acts as alignment's least common denominator.
+
+<p>
+The reason for reserving the bottom bit of pointers to
+<tt>rcu_head</tt> structures is to leave the door open to
+&ldquo;lazy&rdquo; callbacks whose invocations can safely be deferred.
+Deferring invocation could potentially have energy-efficiency
+benefits, but only if the rate of non-lazy callbacks decreases
+significantly for some important workload.
+In the meantime, reserving the bottom bit keeps this option open
+in case it one day becomes useful.
+
 <h3><a name="Performance, Scalability, Response Time, and Reliability">
 Performance, Scalability, Response Time, and Reliability</a></h3>
 
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 118e7c1..278f6a9 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -10,21 +10,6 @@
 command (perhaps grepping for "torture").  The test is started
 when the module is loaded, and stops when the module is unloaded.
 
-CONFIG_RCU_TORTURE_TEST_RUNNABLE
-
-It is also possible to specify CONFIG_RCU_TORTURE_TEST=y, which will
-result in the tests being loaded into the base kernel.  In this case,
-the CONFIG_RCU_TORTURE_TEST_RUNNABLE config option is used to specify
-whether the RCU torture tests are to be started immediately during
-boot or whether the /proc/sys/kernel/rcutorture_runnable file is used
-to enable them.  This /proc file can be used to repeatedly pause and
-restart the tests, regardless of the initial state specified by the
-CONFIG_RCU_TORTURE_TEST_RUNNABLE config option.
-
-You will normally -not- want to start the RCU torture tests during boot
-(and thus the default is CONFIG_RCU_TORTURE_TEST_RUNNABLE=n), but doing
-this can sometimes be useful in finding boot-time bugs.
-
 
 MODULE PARAMETERS
 
diff --git a/Documentation/SecurityBugs b/Documentation/SecurityBugs
index a660d49..342d769 100644
--- a/Documentation/SecurityBugs
+++ b/Documentation/SecurityBugs
@@ -1,9 +1,15 @@
+.. _securitybugs:
+
+Security bugs
+=============
+
 Linux kernel developers take security very seriously.  As such, we'd
 like to know when a security bug is found so that it can be fixed and
 disclosed as quickly as possible.  Please report security bugs to the
 Linux kernel security team.
 
 1) Contact
+----------
 
 The Linux kernel security team can be contacted by email at
 <security@kernel.org>.  This is a private list of security officers
@@ -18,6 +24,7 @@
 consent from the reporter unless it has already been made public.
 
 2) Disclosure
+-------------
 
 The goal of the Linux kernel security team is to work with the
 bug submitter to bug resolution as well as disclosure.  We prefer
@@ -33,6 +40,7 @@
 disclosure date to be on the order of 7 days.
 
 3) Non-disclosure agreements
+----------------------------
 
 The Linux kernel security team is not a formal body and therefore unable
 to enter any non-disclosure agreements.
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index 2b7e32d..894289b 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist
@@ -1,109 +1,120 @@
+.. _submitchecklist:
+
 Linux Kernel patch submission checklist
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Here are some basic things that developers should do if they want to see their
 kernel patch submissions accepted more quickly.
 
 These are all above and beyond the documentation that is provided in
-Documentation/SubmittingPatches and elsewhere regarding submitting Linux
-kernel patches.
+:ref:`Documentation/SubmittingPatches <submittingpatches>`
+and elsewhere regarding submitting Linux kernel patches.
 
 
-1: If you use a facility then #include the file that defines/declares
+1) If you use a facility then #include the file that defines/declares
    that facility.  Don't depend on other header files pulling in ones
    that you use.
 
-2: Builds cleanly with applicable or modified CONFIG options =y, =m, and
-   =n.  No gcc warnings/errors, no linker warnings/errors.
+2) Builds cleanly:
 
-2b: Passes allnoconfig, allmodconfig
+  a) with applicable or modified ``CONFIG`` options ``=y``, ``=m``, and
+     ``=n``.  No ``gcc`` warnings/errors, no linker warnings/errors.
 
-2c: Builds successfully when using O=builddir
+  b) Passes ``allnoconfig``, ``allmodconfig``
 
-3: Builds on multiple CPU architectures by using local cross-compile tools
+  c) Builds successfully when using ``O=builddir``
+
+3) Builds on multiple CPU architectures by using local cross-compile tools
    or some other build farm.
 
-4: ppc64 is a good architecture for cross-compilation checking because it
-   tends to use `unsigned long' for 64-bit quantities.
+4) ppc64 is a good architecture for cross-compilation checking because it
+   tends to use ``unsigned long`` for 64-bit quantities.
 
-5: Check your patch for general style as detailed in
-   Documentation/CodingStyle.  Check for trivial violations with the
-   patch style checker prior to submission (scripts/checkpatch.pl).
+5) Check your patch for general style as detailed in
+   :ref:`Documentation/CodingStyle <codingstyle>`.
+   Check for trivial violations with the patch style checker prior to
+   submission (``scripts/checkpatch.pl``).
    You should be able to justify all violations that remain in
    your patch.
 
-6: Any new or modified CONFIG options don't muck up the config menu.
+6) Any new or modified ``CONFIG`` options don't muck up the config menu.
 
-7: All new Kconfig options have help text.
+7) All new ``Kconfig`` options have help text.
 
-8: Has been carefully reviewed with respect to relevant Kconfig
+8) Has been carefully reviewed with respect to relevant ``Kconfig``
    combinations.  This is very hard to get right with testing -- brainpower
    pays off here.
 
-9: Check cleanly with sparse.
+9) Check cleanly with sparse.
 
-10: Use 'make checkstack' and 'make namespacecheck' and fix any problems
-    that they find.  Note: checkstack does not point out problems explicitly,
-    but any one function that uses more than 512 bytes on the stack is a
-    candidate for change.
+10) Use ``make checkstack`` and ``make namespacecheck`` and fix any problems
+    that they find.
 
-11: Include kernel-doc to document global kernel APIs.  (Not required for
-    static functions, but OK there also.) Use 'make htmldocs' or 'make
-    mandocs' to check the kernel-doc and fix any issues.
+    .. note::
 
-12: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
-    CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
-    CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP, CONFIG_PROVE_RCU
-    and CONFIG_DEBUG_OBJECTS_RCU_HEAD all simultaneously enabled.
+       ``checkstack`` does not point out problems explicitly,
+       but any one function that uses more than 512 bytes on the stack is a
+       candidate for change.
 
-13: Has been build- and runtime tested with and without CONFIG_SMP and
-    CONFIG_PREEMPT.
+11) Include :ref:`kernel-doc <kernel_doc>` to document global  kernel APIs.
+    (Not required for static functions, but OK there also.) Use
+    ``make htmldocs`` or ``make pdfdocs`` to check the
+    :ref:`kernel-doc <kernel_doc>` and fix any issues.
 
-14: If the patch affects IO/Disk, etc: has been tested with and without
-    CONFIG_LBDAF.
+12) Has been tested with ``CONFIG_PREEMPT``, ``CONFIG_DEBUG_PREEMPT``,
+    ``CONFIG_DEBUG_SLAB``, ``CONFIG_DEBUG_PAGEALLOC``, ``CONFIG_DEBUG_MUTEXES``,
+    ``CONFIG_DEBUG_SPINLOCK``, ``CONFIG_DEBUG_ATOMIC_SLEEP``,
+    ``CONFIG_PROVE_RCU`` and ``CONFIG_DEBUG_OBJECTS_RCU_HEAD`` all
+    simultaneously enabled.
 
-15: All codepaths have been exercised with all lockdep features enabled.
+13) Has been build- and runtime tested with and without ``CONFIG_SMP`` and
+    ``CONFIG_PREEMPT.``
 
-16: All new /proc entries are documented under Documentation/
+14) If the patch affects IO/Disk, etc: has been tested with and without
+    ``CONFIG_LBDAF.``
 
-17: All new kernel boot parameters are documented in
-    Documentation/kernel-parameters.txt.
+15) All codepaths have been exercised with all lockdep features enabled.
 
-18: All new module parameters are documented with MODULE_PARM_DESC()
+16) All new ``/proc`` entries are documented under ``Documentation/``
 
-19: All new userspace interfaces are documented in Documentation/ABI/.
-    See Documentation/ABI/README for more information.
+17) All new kernel boot parameters are documented in
+    ``Documentation/kernel-parameters.txt``.
+
+18) All new module parameters are documented with ``MODULE_PARM_DESC()``
+
+19) All new userspace interfaces are documented in ``Documentation/ABI/``.
+    See ``Documentation/ABI/README`` for more information.
     Patches that change userspace interfaces should be CCed to
     linux-api@vger.kernel.org.
 
-20: Check that it all passes `make headers_check'.
+20) Check that it all passes ``make headers_check``.
 
-21: Has been checked with injection of at least slab and page-allocation
-    failures.  See Documentation/fault-injection/.
+21) Has been checked with injection of at least slab and page-allocation
+    failures.  See ``Documentation/fault-injection/``.
 
     If the new code is substantial, addition of subsystem-specific fault
     injection might be appropriate.
 
-22: Newly-added code has been compiled with `gcc -W' (use "make
-    EXTRA_CFLAGS=-W").  This will generate lots of noise, but is good for
-    finding bugs like "warning: comparison between signed and unsigned".
+22) Newly-added code has been compiled with ``gcc -W`` (use
+    ``make EXTRA_CFLAGS=-W``).  This will generate lots of noise, but is good
+    for finding bugs like "warning: comparison between signed and unsigned".
 
-23: Tested after it has been merged into the -mm patchset to make sure
+23) Tested after it has been merged into the -mm patchset to make sure
     that it still works with all of the other queued patches and various
     changes in the VM, VFS, and other subsystems.
 
-24: All memory barriers {e.g., barrier(), rmb(), wmb()} need a comment in the
-    source code that explains the logic of what they are doing and why.
+24) All memory barriers {e.g., ``barrier()``, ``rmb()``, ``wmb()``} need a
+    comment in the source code that explains the logic of what they are doing
+    and why.
 
-25: If any ioctl's are added by the patch, then also update
-    Documentation/ioctl/ioctl-number.txt.
+25) If any ioctl's are added by the patch, then also update
+    ``Documentation/ioctl/ioctl-number.txt``.
 
-26: If your modified source code depends on or uses any of the kernel
-    APIs or features that are related to the following kconfig symbols,
-    then test multiple builds with the related kconfig symbols disabled
-    and/or =m (if that option is available) [not all of these at the
+26) If your modified source code depends on or uses any of the kernel
+    APIs or features that are related to the following ``Kconfig`` symbols,
+    then test multiple builds with the related ``Kconfig`` symbols disabled
+    and/or ``=m`` (if that option is available) [not all of these at the
     same time, just various/random combinations of them]:
 
-    CONFIG_SMP, CONFIG_SYSFS, CONFIG_PROC_FS, CONFIG_INPUT, CONFIG_PCI,
-    CONFIG_BLOCK, CONFIG_PM, CONFIG_MAGIC_SYSRQ,
-    CONFIG_NET, CONFIG_INET=n (but latter with CONFIG_NET=y)
+    ``CONFIG_SMP``, ``CONFIG_SYSFS``, ``CONFIG_PROC_FS``, ``CONFIG_INPUT``, ``CONFIG_PCI``, ``CONFIG_BLOCK``, ``CONFIG_PM``, ``CONFIG_MAGIC_SYSRQ``,
+    ``CONFIG_NET``, ``CONFIG_INET=n`` (but latter with ``CONFIG_NET=y``).
diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers
index 31d3726..252b77a 100644
--- a/Documentation/SubmittingDrivers
+++ b/Documentation/SubmittingDrivers
@@ -1,5 +1,7 @@
+.. _submittingdrivers:
+
 Submitting Drivers For The Linux Kernel
----------------------------------------
+=======================================
 
 This document is intended to explain how to submit device drivers to the
 various kernel trees. Note that if you are interested in video card drivers
@@ -38,42 +40,48 @@
 	maintainer does not respond or you cannot find the appropriate
 	maintainer then please contact Willy Tarreau <w@1wt.eu>.
 
-Linux 2.6:
+Linux 2.6 and upper:
 	The same rules apply as 2.4 except that you should follow linux-kernel
-	to track changes in API's. The final contact point for Linux 2.6
+	to track changes in API's. The final contact point for Linux 2.6+
 	submissions is Andrew Morton.
 
 What Criteria Determine Acceptance
 ----------------------------------
 
-Licensing:	The code must be released to us under the
+Licensing:
+		The code must be released to us under the
 		GNU General Public License. We don't insist on any kind
 		of exclusive GPL licensing, and if you wish the driver
 		to be useful to other communities such as BSD you may well
 		wish to release under multiple licenses.
 		See accepted licenses at include/linux/module.h
 
-Copyright:	The copyright owner must agree to use of GPL.
+Copyright:
+		The copyright owner must agree to use of GPL.
 		It's best if the submitter and copyright owner
 		are the same person/entity. If not, the name of
 		the person/entity authorizing use of GPL should be
 		listed in case it's necessary to verify the will of
 		the copyright owner.
 
-Interfaces:	If your driver uses existing interfaces and behaves like
+Interfaces:
+		If your driver uses existing interfaces and behaves like
 		other drivers in the same class it will be much more likely
 		to be accepted than if it invents gratuitous new ones.
 		If you need to implement a common API over Linux and NT
 		drivers do it in userspace.
 
-Code:		Please use the Linux style of code formatting as documented
-		in Documentation/CodingStyle. If you have sections of code
+Code:
+		Please use the Linux style of code formatting as documented
+		in :ref:`Documentation/CodingStyle <codingStyle>`.
+		If you have sections of code
 		that need to be in other formats, for example because they
 		are shared with a windows driver kit and you want to
 		maintain them just once separate them out nicely and note
 		this fact.
 
-Portability:	Pointers are not always 32bits, not all computers are little
+Portability:
+		Pointers are not always 32bits, not all computers are little
 		endian, people do not all have floating point and you
 		shouldn't use inline x86 assembler in your driver without
 		careful thought. Pure x86 drivers generally are not popular.
@@ -81,12 +89,14 @@
 		but it is easy to make sure the code can easily be made
 		portable.
 
-Clarity:	It helps if anyone can see how to fix the driver. It helps
+Clarity:
+		It helps if anyone can see how to fix the driver. It helps
 		you because you get patches not bug reports. If you submit a
 		driver that intentionally obfuscates how the hardware works
 		it will go in the bitbucket.
 
-PM support:	Since Linux is used on many portable and desktop systems, your
+PM support:
+		Since Linux is used on many portable and desktop systems, your
 		driver is likely to be used on such a system and therefore it
 		should support basic power management by implementing, if
 		necessary, the .suspend and .resume methods used during the
@@ -101,7 +111,8 @@
 		complete overview of the power management issues related to
 		drivers see Documentation/power/devices.txt .
 
-Control:	In general if there is active maintenance of a driver by
+Control:
+		In general if there is active maintenance of a driver by
 		the author then patches will be redirected to them unless
 		they are totally obvious and without need of checking.
 		If you want to be the contact and update point for the
@@ -111,13 +122,15 @@
 What Criteria Do Not Determine Acceptance
 -----------------------------------------
 
-Vendor:		Being the hardware vendor and maintaining the driver is
+Vendor:
+		Being the hardware vendor and maintaining the driver is
 		often a good thing. If there is a stable working driver from
 		other people already in the tree don't expect 'we are the
 		vendor' to get your driver chosen. Ideally work with the
 		existing driver author to build a single perfect driver.
 
-Author:		It doesn't matter if a large Linux company wrote the driver,
+Author:
+		It doesn't matter if a large Linux company wrote the driver,
 		or you did. Nobody has any special access to the kernel
 		tree. Anyone who tells you otherwise isn't telling the
 		whole story.
@@ -127,8 +140,10 @@
 ---------
 
 Linux kernel master tree:
-	ftp.??.kernel.org:/pub/linux/kernel/...
-	?? == your country code, such as "us", "uk", "fr", etc.
+	ftp.\ *country_code*\ .kernel.org:/pub/linux/kernel/...
+
+	where *country_code* == your country code, such as
+	**us**, **uk**, **fr**, etc.
 
 	http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git
 
@@ -141,14 +156,19 @@
 
 LWN.net:
 	Weekly summary of kernel development activity - http://lwn.net/
+
 	2.6 API changes:
+
 		http://lwn.net/Articles/2.6-kernel-api/
+
 	Porting drivers from prior kernels to 2.6:
+
 		http://lwn.net/Articles/driver-porting/
 
 KernelNewbies:
 	Documentation and assistance for new kernel programmers
-	http://kernelnewbies.org/
+
+		http://kernelnewbies.org/
 
 Linux USB project:
 	http://www.linux-usb.org/
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 8c79f1d..36f1ded 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -1,9 +1,7 @@
+.. _submittingpatches:
 
-	How to Get Your Change Into the Linux Kernel
-		or
-	Care And Operation Of Your Linus Torvalds
-
-
+How to Get Your Change Into the Linux Kernel or Care And Operation Of Your Linus Torvalds
+=========================================================================================
 
 For a person or company who wishes to submit a change to the Linux
 kernel, the process can sometimes be daunting if you're not familiar
@@ -12,57 +10,59 @@
 
 This document contains a large number of suggestions in a relatively terse
 format.  For detailed information on how the kernel development process
-works, see Documentation/development-process.  Also, read
-Documentation/SubmitChecklist for a list of items to check before
+works, see :ref:`Documentation/development-process <development_process_main>`.
+Also, read :ref:`Documentation/SubmitChecklist <submitchecklist>`
+for a list of items to check before
 submitting code.  If you are submitting a driver, also read
-Documentation/SubmittingDrivers; for device tree binding patches, read
+:ref:`Documentation/SubmittingDrivers <submittingdrivers>`;
+for device tree binding patches, read
 Documentation/devicetree/bindings/submitting-patches.txt.
 
-Many of these steps describe the default behavior of the git version
-control system; if you use git to prepare your patches, you'll find much
+Many of these steps describe the default behavior of the ``git`` version
+control system; if you use ``git`` to prepare your patches, you'll find much
 of the mechanical work done for you, though you'll still need to prepare
-and document a sensible set of patches.  In general, use of git will make
+and document a sensible set of patches.  In general, use of ``git`` will make
 your life as a kernel developer easier.
 
---------------------------------------------
-SECTION 1 - CREATING AND SENDING YOUR CHANGE
---------------------------------------------
+Creating and Sending your Change
+********************************
 
 
 0) Obtain a current source tree
 -------------------------------
 
 If you do not have a repository with the current kernel source handy, use
-git to obtain one.  You'll want to start with the mainline repository,
-which can be grabbed with:
+``git`` to obtain one.  You'll want to start with the mainline repository,
+which can be grabbed with::
 
-  git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
+  git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
 
 Note, however, that you may not want to develop against the mainline tree
 directly.  Most subsystem maintainers run their own trees and want to see
-patches prepared against those trees.  See the "T:" entry for the subsystem
+patches prepared against those trees.  See the **T:** entry for the subsystem
 in the MAINTAINERS file to find that tree, or simply ask the maintainer if
 the tree is not listed there.
 
 It is still possible to download kernel releases via tarballs (as described
 in the next section), but that is the hard way to do kernel development.
 
-1) "diff -up"
-------------
+1) ``diff -up``
+---------------
 
-If you must generate your patches by hand, use "diff -up" or "diff -uprN"
+If you must generate your patches by hand, use ``diff -up`` or ``diff -uprN``
 to create patches.  Git generates patches in this form by default; if
-you're using git, you can skip this section entirely.
+you're using ``git``, you can skip this section entirely.
 
 All changes to the Linux kernel occur in the form of patches, as
-generated by diff(1).  When creating your patch, make sure to create it
-in "unified diff" format, as supplied by the '-u' argument to diff(1).
-Also, please use the '-p' argument which shows which C function each
-change is in - that makes the resultant diff a lot easier to read.
+generated by :manpage:`diff(1)`.  When creating your patch, make sure to
+create it in "unified diff" format, as supplied by the ``-u`` argument
+to :manpage:`diff(1)`.
+Also, please use the ``-p`` argument which shows which C function each
+change is in - that makes the resultant ``diff`` a lot easier to read.
 Patches should be based in the root kernel source directory,
 not in any lower subdirectory.
 
-To create a patch for a single file, it is often sufficient to do:
+To create a patch for a single file, it is often sufficient to do::
 
 	SRCTREE= linux
 	MYFILE=  drivers/net/mydriver.c
@@ -74,8 +74,8 @@
 	diff -up $SRCTREE/$MYFILE{.orig,} > /tmp/patch
 
 To create a patch for multiple files, you should unpack a "vanilla",
-or unmodified kernel source tree, and generate a diff against your
-own source tree.  For example:
+or unmodified kernel source tree, and generate a ``diff`` against your
+own source tree.  For example::
 
 	MYSRC= /devel/linux
 
@@ -84,27 +84,27 @@
 	diff -uprN -X linux-3.19-vanilla/Documentation/dontdiff \
 		linux-3.19-vanilla $MYSRC > /tmp/patch
 
-"dontdiff" is a list of files which are generated by the kernel during
-the build process, and should be ignored in any diff(1)-generated
+``dontdiff`` is a list of files which are generated by the kernel during
+the build process, and should be ignored in any :manpage:`diff(1)`-generated
 patch.
 
 Make sure your patch does not include any extra files which do not
 belong in a patch submission.  Make sure to review your patch -after-
-generating it with diff(1), to ensure accuracy.
+generating it with :manpage:`diff(1)`, to ensure accuracy.
 
 If your changes produce a lot of deltas, you need to split them into
-individual patches which modify things in logical stages; see section
-#3.  This will facilitate review by other kernel developers,
+individual patches which modify things in logical stages; see
+:ref:`split_changes`.  This will facilitate review by other kernel developers,
 very important if you want your patch accepted.
 
-If you're using git, "git rebase -i" can help you with this process.  If
-you're not using git, quilt <http://savannah.nongnu.org/projects/quilt>
+If you're using ``git``, ``git rebase -i`` can help you with this process.  If
+you're not using ``git``, ``quilt`` <http://savannah.nongnu.org/projects/quilt>
 is another popular alternative.
 
+.. _describe_changes:
 
-
-2) Describe your changes.
--------------------------
+2) Describe your changes
+------------------------
 
 Describe your problem.  Whether your patch is a one-line bug fix or
 5000 lines of a new feature, there must be an underlying problem that
@@ -137,11 +137,11 @@
 
 The maintainer will thank you if you write your patch description in a
 form which can be easily pulled into Linux's source code management
-system, git, as a "commit log".  See #15, below.
+system, ``git``, as a "commit log".  See :ref:`explicit_in_reply_to`.
 
 Solve only one problem per patch.  If your description starts to get
 long, that's a sign that you probably need to split up your patch.
-See #3, next.
+See :ref:`split_changes`.
 
 When you submit or resubmit a patch or patch series, include the
 complete patch description and justification for it.  Don't just
@@ -160,7 +160,7 @@
 If the patch fixes a logged bug entry, refer to that bug entry by
 number and URL.  If the patch follows from a mailing list discussion,
 give a URL to the mailing list archive; use the https://lkml.kernel.org/
-redirector with a Message-Id, to ensure that the links cannot become
+redirector with a ``Message-Id``, to ensure that the links cannot become
 stale.
 
 However, try to make your explanation understandable without external
@@ -171,7 +171,7 @@
 If you want to refer to a specific commit, don't just refer to the
 SHA-1 ID of the commit. Please also include the oneline summary of
 the commit, to make it easier for reviewers to know what it is about.
-Example:
+Example::
 
 	Commit e21d2170f36602ae2708 ("video: remove unnecessary
 	platform_set_drvdata()") removed the unnecessary
@@ -185,23 +185,25 @@
 change five years from now.
 
 If your patch fixes a bug in a specific commit, e.g. you found an issue using
-git-bisect, please use the 'Fixes:' tag with the first 12 characters of the
-SHA-1 ID, and the one line summary.  For example:
+``git bisect``, please use the 'Fixes:' tag with the first 12 characters of
+the SHA-1 ID, and the one line summary.  For example::
 
 	Fixes: e21d2170f366 ("video: remove unnecessary platform_set_drvdata()")
 
-The following git-config settings can be used to add a pretty format for
-outputting the above style in the git log or git show commands
+The following ``git config`` settings can be used to add a pretty format for
+outputting the above style in the ``git log`` or ``git show`` commands::
 
 	[core]
 		abbrev = 12
 	[pretty]
 		fixes = Fixes: %h (\"%s\")
 
-3) Separate your changes.
--------------------------
+.. _split_changes:
 
-Separate each _logical change_ into a separate patch.
+3) Separate your changes
+------------------------
+
+Separate each **logical change** into a separate patch.
 
 For example, if your changes include both bug fixes and performance
 enhancements for a single driver, separate those changes into two
@@ -217,12 +219,12 @@
 on its own merits.
 
 If one patch depends on another patch in order for a change to be
-complete, that is OK.  Simply note "this patch depends on patch X"
+complete, that is OK.  Simply note **"this patch depends on patch X"**
 in your patch description.
 
 When dividing your change into a series of patches, take special care to
 ensure that the kernel builds and runs properly after each patch in the
-series.  Developers using "git bisect" to track down a problem can end up
+series.  Developers using ``git bisect`` to track down a problem can end up
 splitting your patch series at any point; they will not thank you if you
 introduce bugs in the middle.
 
@@ -231,11 +233,13 @@
 
 
 
-4) Style-check your changes.
-----------------------------
+4) Style-check your changes
+---------------------------
 
 Check your patch for basic style violations, details of which can be
-found in Documentation/CodingStyle.  Failure to do so simply wastes
+found in
+:ref:`Documentation/CodingStyle <codingstyle>`.
+Failure to do so simply wastes
 the reviewers time and will get your patch rejected, probably
 without even being read.
 
@@ -260,8 +264,8 @@
 patch.
 
 
-5) Select the recipients for your patch.
-----------------------------------------
+5) Select the recipients for your patch
+---------------------------------------
 
 You should always copy the appropriate subsystem maintainer(s) on any patch
 to code that they maintain; look through the MAINTAINERS file and the
@@ -295,13 +299,14 @@
 obviously, the patch should not be sent to any public lists.
 
 Patches that fix a severe bug in a released kernel should be directed
-toward the stable maintainers by putting a line like this:
+toward the stable maintainers by putting a line like this::
 
   Cc: stable@vger.kernel.org
 
 into the sign-off area of your patch (note, NOT an email recipient).  You
-should also read Documentation/stable_kernel_rules.txt in addition to this
-file.
+should also read
+:ref:`Documentation/stable_kernel_rules.txt <stable_kernel_rules>`
+in addition to this file.
 
 Note, however, that some subsystem maintainers want to come to their own
 conclusions on which patches should go to the stable trees.  The networking
@@ -312,28 +317,30 @@
 maintainer (as listed in the MAINTAINERS file) a man-pages patch, or at
 least a notification of the change, so that some information makes its way
 into the manual pages.  User-space API changes should also be copied to
-linux-api@vger.kernel.org. 
+linux-api@vger.kernel.org.
 
 For small patches you may want to CC the Trivial Patch Monkey
 trivial@kernel.org which collects "trivial" patches. Have a look
 into the MAINTAINERS file for its current manager.
+
 Trivial patches must qualify for one of the following rules:
- Spelling fixes in documentation
- Spelling fixes for errors which could break grep(1)
- Warning fixes (cluttering with useless warnings is bad)
- Compilation fixes (only if they are actually correct)
- Runtime fixes (only if they actually fix things)
- Removing use of deprecated functions/macros
- Contact detail and documentation fixes
- Non-portable code replaced by portable code (even in arch-specific,
- since people copy, as long as it's trivial)
- Any fix by the author/maintainer of the file (ie. patch monkey
- in re-transmission mode)
+
+- Spelling fixes in documentation
+- Spelling fixes for errors which could break :manpage:`grep(1)`
+- Warning fixes (cluttering with useless warnings is bad)
+- Compilation fixes (only if they are actually correct)
+- Runtime fixes (only if they actually fix things)
+- Removing use of deprecated functions/macros
+- Contact detail and documentation fixes
+- Non-portable code replaced by portable code (even in arch-specific,
+  since people copy, as long as it's trivial)
+- Any fix by the author/maintainer of the file (ie. patch monkey
+  in re-transmission mode)
 
 
 
-6) No MIME, no links, no compression, no attachments.  Just plain text.
------------------------------------------------------------------------
+6) No MIME, no links, no compression, no attachments.  Just plain text
+----------------------------------------------------------------------
 
 Linus and other kernel developers need to be able to read and comment
 on the changes you are submitting.  It is important for a kernel
@@ -341,8 +348,11 @@
 tools, so that they may comment on specific portions of your code.
 
 For this reason, all patches should be submitted by e-mail "inline".
-WARNING:  Be wary of your editor's word-wrap corrupting your patch,
-if you choose to cut-n-paste your patch.
+
+.. warning::
+
+  Be wary of your editor's word-wrap corrupting your patch,
+  if you choose to cut-n-paste your patch.
 
 Do not attach the patch as a MIME attachment, compressed or not.
 Many popular e-mail applications will not always transmit a MIME
@@ -353,11 +363,12 @@
 Exception:  If your mailer is mangling patches then someone may ask
 you to re-send them using MIME.
 
-See Documentation/email-clients.txt for hints about configuring
-your e-mail client so that it sends your patches untouched.
+See :ref:`Documentation/email-clients.txt <email_clients>`
+for hints about configuring your e-mail client so that it sends your patches
+untouched.
 
-7) E-mail size.
----------------
+7) E-mail size
+--------------
 
 Large changes are not appropriate for mailing lists, and some
 maintainers.  If your patch, uncompressed, exceeds 300 kB in size,
@@ -366,8 +377,8 @@
 that if your patch exceeds 300 kB, it almost certainly needs to be broken up
 anyway.
 
-8) Respond to review comments.
-------------------------------
+8) Respond to review comments
+-----------------------------
 
 Your patch will almost certainly get comments from reviewers on ways in
 which the patch can be improved.  You must respond to those comments;
@@ -382,8 +393,8 @@
 politely and address the problems they have pointed out.
 
 
-9) Don't get discouraged - or impatient.
-----------------------------------------
+9) Don't get discouraged - or impatient
+---------------------------------------
 
 After you have submitted your change, be patient and wait.  Reviewers are
 busy people and may not get to your patch right away.
@@ -419,9 +430,10 @@
 pass it on as an open-source patch.  The rules are pretty simple: if you
 can certify the below:
 
-        Developer's Certificate of Origin 1.1
+Developer's Certificate of Origin 1.1
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-        By making a contribution to this project, I certify that:
+By making a contribution to this project, I certify that:
 
         (a) The contribution was created in whole or in part by me and I
             have the right to submit it under the open source license
@@ -445,7 +457,7 @@
             maintained indefinitely and may be redistributed consistent with
             this project or the open source license(s) involved.
 
-then you just add a line saying
+then you just add a line saying::
 
 	Signed-off-by: Random J Developer <random@developer.example.org>
 
@@ -466,7 +478,7 @@
 the nature of your changes. While there is nothing mandatory about this, it
 seems like prepending the description with your mail and/or name, all
 enclosed in square brackets, is noticeable enough to make it obvious that
-you are responsible for last-minute changes. Example :
+you are responsible for last-minute changes. Example::
 
 	Signed-off-by: Random J Developer <random@developer.example.org>
 	[lucky@maintainer.example.org: struct foo moved from foo.c to foo.h]
@@ -481,15 +493,15 @@
 Special note to back-porters: It seems to be a common and useful practice
 to insert an indication of the origin of a patch at the top of the commit
 message (just after the subject line) to facilitate tracking. For instance,
-here's what we see in a 3.x-stable release:
+here's what we see in a 3.x-stable release::
 
-Date:   Tue Oct 7 07:26:38 2014 -0400
+  Date:   Tue Oct 7 07:26:38 2014 -0400
 
     libata: Un-break ATA blacklist
 
     commit 1c40279960bcd7d52dbdf1d466b20d24b99176c8 upstream.
 
-And here's what might appear in an older kernel once a patch is backported:
+And here's what might appear in an older kernel once a patch is backported::
 
     Date:   Tue May 13 22:12:27 2008 +0200
 
@@ -529,7 +541,7 @@
 list archives.
 
 If a person has had the opportunity to comment on a patch, but has not
-provided such comments, you may optionally add a "Cc:" tag to the patch.
+provided such comments, you may optionally add a ``Cc:`` tag to the patch.
 This is the only tag which might be added without an explicit action by the
 person it names - but it should indicate that this person was copied on the
 patch.  This tag documents that potentially interested parties
@@ -552,11 +564,12 @@
 Reviewed-by:, instead, indicates that the patch has been reviewed and found
 acceptable according to the Reviewer's Statement:
 
-	Reviewer's statement of oversight
+Reviewer's statement of oversight
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-	By offering my Reviewed-by: tag, I state that:
+By offering my Reviewed-by: tag, I state that:
 
- 	 (a) I have carried out a technical review of this patch to
+	 (a) I have carried out a technical review of this patch to
 	     evaluate its appropriateness and readiness for inclusion into
 	     the mainline kernel.
 
@@ -594,24 +607,25 @@
 is used to make it easy to determine where a bug originated, which can help
 review a bug fix. This tag also assists the stable kernel team in determining
 which stable kernel versions should receive your fix. This is the preferred
-method for indicating a bug fixed by the patch. See #2 above for more details.
+method for indicating a bug fixed by the patch. See :ref:`describe_changes`
+for more details.
 
 
 14) The canonical patch format
 ------------------------------
 
 This section describes how the patch itself should be formatted.  Note
-that, if you have your patches stored in a git repository, proper patch
-formatting can be had with "git format-patch".  The tools cannot create
+that, if you have your patches stored in a ``git`` repository, proper patch
+formatting can be had with ``git format-patch``.  The tools cannot create
 the necessary text, though, so read the instructions below anyway.
 
-The canonical patch subject line is:
+The canonical patch subject line is::
 
     Subject: [PATCH 001/123] subsystem: summary phrase
 
 The canonical patch message body contains the following:
 
-  - A "from" line specifying the patch author (only needed if the person
+  - A ``from`` line specifying the patch author (only needed if the person
     sending the patch is not the author).
 
   - An empty line.
@@ -619,46 +633,46 @@
   - The body of the explanation, line wrapped at 75 columns, which will
     be copied to the permanent changelog to describe this patch.
 
-  - The "Signed-off-by:" lines, described above, which will
+  - The ``Signed-off-by:`` lines, described above, which will
     also go in the changelog.
 
-  - A marker line containing simply "---".
+  - A marker line containing simply ``---``.
 
   - Any additional comments not suitable for the changelog.
 
-  - The actual patch (diff output).
+  - The actual patch (``diff`` output).
 
 The Subject line format makes it very easy to sort the emails
 alphabetically by subject line - pretty much any email reader will
 support that - since because the sequence number is zero-padded,
 the numerical and alphabetic sort is the same.
 
-The "subsystem" in the email's Subject should identify which
+The ``subsystem`` in the email's Subject should identify which
 area or subsystem of the kernel is being patched.
 
-The "summary phrase" in the email's Subject should concisely
-describe the patch which that email contains.  The "summary
-phrase" should not be a filename.  Do not use the same "summary
-phrase" for every patch in a whole patch series (where a "patch
-series" is an ordered sequence of multiple, related patches).
+The ``summary phrase`` in the email's Subject should concisely
+describe the patch which that email contains.  The ``summary
+phrase`` should not be a filename.  Do not use the same ``summary
+phrase`` for every patch in a whole patch series (where a ``patch
+series`` is an ordered sequence of multiple, related patches).
 
-Bear in mind that the "summary phrase" of your email becomes a
+Bear in mind that the ``summary phrase`` of your email becomes a
 globally-unique identifier for that patch.  It propagates all the way
-into the git changelog.  The "summary phrase" may later be used in
+into the ``git`` changelog.  The ``summary phrase`` may later be used in
 developer discussions which refer to the patch.  People will want to
-google for the "summary phrase" to read discussion regarding that
+google for the ``summary phrase`` to read discussion regarding that
 patch.  It will also be the only thing that people may quickly see
 when, two or three months later, they are going through perhaps
-thousands of patches using tools such as "gitk" or "git log
---oneline".
+thousands of patches using tools such as ``gitk`` or ``git log
+--oneline``.
 
-For these reasons, the "summary" must be no more than 70-75
+For these reasons, the ``summary`` must be no more than 70-75
 characters, and it must describe both what the patch changes, as well
 as why the patch might be necessary.  It is challenging to be both
 succinct and descriptive, but that is what a well-written summary
 should do.
 
-The "summary phrase" may be prefixed by tags enclosed in square
+The ``summary phrase`` may be prefixed by tags enclosed in square
 brackets: "Subject: [PATCH <tag>...] <summary phrase>".  The tags are
 not considered part of the summary phrase, but describe how the patch
 should be treated.  Common tags might include a version descriptor if
@@ -670,19 +684,19 @@
 applied and that they have reviewed or applied all of the patches in
 the patch series.
 
-A couple of example Subjects:
+A couple of example Subjects::
 
     Subject: [PATCH 2/5] ext2: improve scalability of bitmap searching
     Subject: [PATCH v2 01/27] x86: fix eflags tracking
 
-The "from" line must be the very first line in the message body,
+The ``from`` line must be the very first line in the message body,
 and has the form:
 
         From: Original Author <author@example.com>
 
-The "from" line specifies who will be credited as the author of the
-patch in the permanent changelog.  If the "from" line is missing,
-then the "From:" line from the email header will be used to determine
+The ``from`` line specifies who will be credited as the author of the
+patch in the permanent changelog.  If the ``from`` line is missing,
+then the ``From:`` line from the email header will be used to determine
 the patch author in the changelog.
 
 The explanation body will be committed to the permanent source
@@ -694,35 +708,37 @@
 looking for the applicable patch.  If a patch fixes a compile failure,
 it may not be necessary to include _all_ of the compile failures; just
 enough that it is likely that someone searching for the patch can find
-it.  As in the "summary phrase", it is important to be both succinct as
+it.  As in the ``summary phrase``, it is important to be both succinct as
 well as descriptive.
 
-The "---" marker line serves the essential purpose of marking for patch
+The ``---`` marker line serves the essential purpose of marking for patch
 handling tools where the changelog message ends.
 
-One good use for the additional comments after the "---" marker is for
-a diffstat, to show what files have changed, and the number of
-inserted and deleted lines per file.  A diffstat is especially useful
+One good use for the additional comments after the ``---`` marker is for
+a ``diffstat``, to show what files have changed, and the number of
+inserted and deleted lines per file.  A ``diffstat`` is especially useful
 on bigger patches.  Other comments relevant only to the moment or the
 maintainer, not suitable for the permanent changelog, should also go
-here.  A good example of such comments might be "patch changelogs"
+here.  A good example of such comments might be ``patch changelogs``
 which describe what has changed between the v1 and v2 version of the
 patch.
 
-If you are going to include a diffstat after the "---" marker, please
-use diffstat options "-p 1 -w 70" so that filenames are listed from
+If you are going to include a ``diffstat`` after the ``---`` marker, please
+use ``diffstat`` options ``-p 1 -w 70`` so that filenames are listed from
 the top of the kernel source tree and don't use too much horizontal
-space (easily fit in 80 columns, maybe with some indentation).  (git
+space (easily fit in 80 columns, maybe with some indentation).  (``git``
 generates appropriate diffstats by default.)
 
 See more details on the proper patch format in the following
 references.
 
+.. _explicit_in_reply_to:
+
 15) Explicit In-Reply-To headers
 --------------------------------
 
 It can be helpful to manually add In-Reply-To: headers to a patch
-(e.g., when using "git send-email") to associate the patch with
+(e.g., when using ``git send-email``) to associate the patch with
 previous relevant discussion, e.g. to link a bug fix to the email with
 the bug report.  However, for a multi-patch series, it is generally
 best to avoid using In-Reply-To: to link to older versions of the
@@ -732,12 +748,12 @@
 the cover email text) to link to an earlier version of the patch series.
 
 
-16) Sending "git pull" requests
--------------------------------
+16) Sending ``git pull`` requests
+---------------------------------
 
 If you have a series of patches, it may be most convenient to have the
 maintainer pull them directly into the subsystem repository with a
-"git pull" operation.  Note, however, that pulling patches from a developer
+``git pull`` operation.  Note, however, that pulling patches from a developer
 requires a higher degree of trust than taking patches from a mailing list.
 As a result, many subsystem maintainers are reluctant to take pull
 requests, especially from new, unknown developers.  If in doubt you can use
@@ -746,7 +762,7 @@
 
 A pull request should have [GIT] or [PULL] in the subject line.  The
 request itself should include the repository name and the branch of
-interest on a single line; it should look something like:
+interest on a single line; it should look something like::
 
   Please pull from
 
@@ -755,10 +771,10 @@
   to get these changes:
 
 A pull request should also include an overall message saying what will be
-included in the request, a "git shortlog" listing of the patches
-themselves, and a diffstat showing the overall effect of the patch series.
+included in the request, a ``git shortlog`` listing of the patches
+themselves, and a ``diffstat`` showing the overall effect of the patch series.
 The easiest way to get all this information together is, of course, to let
-git do it for you with the "git request-pull" command.
+``git`` do it for you with the ``git request-pull`` command.
 
 Some maintainers (including Linus) want to see pull requests from signed
 commits; that increases their confidence that the request actually came
@@ -770,8 +786,8 @@
 new developers, but there is no way around it.  Attending conferences can
 be a good way to find developers who can sign your key.
 
-Once you have prepared a patch series in git that you wish to have somebody
-pull, create a signed tag with "git tag -s".  This will create a new tag
+Once you have prepared a patch series in ``git`` that you wish to have somebody
+pull, create a signed tag with ``git tag -s``.  This will create a new tag
 identifying the last commit in the series and containing a signature
 created with your private key.  You will also have the opportunity to add a
 changelog-style message to the tag; this is an ideal place to describe the
@@ -782,14 +798,13 @@
 public tree.
 
 When generating your pull request, use the signed tag as the target.  A
-command like this will do the trick:
+command like this will do the trick::
 
   git request-pull master git://my.public.tree/linux.git my-signed-tag
 
 
-----------------------
-SECTION 2 - REFERENCES
-----------------------
+REFERENCES
+**********
 
 Andrew Morton, "The perfect patch" (tpp).
   <http://www.ozlabs.org/~akpm/stuff/tpp.txt>
@@ -799,23 +814,28 @@
 
 Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
   <http://www.kroah.com/log/linux/maintainer.html>
+
   <http://www.kroah.com/log/linux/maintainer-02.html>
+
   <http://www.kroah.com/log/linux/maintainer-03.html>
+
   <http://www.kroah.com/log/linux/maintainer-04.html>
+
   <http://www.kroah.com/log/linux/maintainer-05.html>
+
   <http://www.kroah.com/log/linux/maintainer-06.html>
 
 NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
   <https://lkml.org/lkml/2005/7/11/336>
 
 Kernel Documentation/CodingStyle:
-  <Documentation/CodingStyle>
+  :ref:`Documentation/CodingStyle <codingstyle>`
 
 Linus Torvalds's mail on the canonical patch format:
   <http://lkml.org/lkml/2005/4/7/183>
 
 Andi Kleen, "On submitting kernel patches"
   Some strategies to get difficult or controversial changes in.
+
   http://halobates.de/on-submitting-patches.pdf
 
---
diff --git a/Documentation/acpi/acpi-lid.txt b/Documentation/acpi/acpi-lid.txt
new file mode 100644
index 0000000..effe7af
--- /dev/null
+++ b/Documentation/acpi/acpi-lid.txt
@@ -0,0 +1,96 @@
+Special Usage Model of the ACPI Control Method Lid Device
+
+Copyright (C) 2016, Intel Corporation
+Author: Lv Zheng <lv.zheng@intel.com>
+
+
+Abstract:
+
+Platforms containing lids convey lid state (open/close) to OSPMs using a
+control method lid device. To implement this, the AML tables issue
+Notify(lid_device, 0x80) to notify the OSPMs whenever the lid state has
+changed. The _LID control method for the lid device must be implemented to
+report the "current" state of the lid as either "opened" or "closed".
+
+For most platforms, both the _LID method and the lid notifications are
+reliable. However, there are exceptions. In order to work with these
+exceptional buggy platforms, special restrictions and expections should be
+taken into account. This document describes the restrictions and the
+expections of the Linux ACPI lid device driver.
+
+
+1. Restrictions of the returning value of the _LID control method
+
+The _LID control method is described to return the "current" lid state.
+However the word of "current" has ambiguity, some buggy AML tables return
+the lid state upon the last lid notification instead of returning the lid
+state upon the last _LID evaluation. There won't be difference when the
+_LID control method is evaluated during the runtime, the problem is its
+initial returning value. When the AML tables implement this control method
+with cached value, the initial returning value is likely not reliable.
+There are platforms always retun "closed" as initial lid state.
+
+2. Restrictions of the lid state change notifications
+
+There are buggy AML tables never notifying when the lid device state is
+changed to "opened". Thus the "opened" notification is not guaranteed. But
+it is guaranteed that the AML tables always notify "closed" when the lid
+state is changed to "closed". The "closed" notification is normally used to
+trigger some system power saving operations on Windows. Since it is fully
+tested, it is reliable from all AML tables.
+
+3. Expections for the userspace users of the ACPI lid device driver
+
+The ACPI button driver exports the lid state to the userspace via the
+following file:
+  /proc/acpi/button/lid/LID0/state
+This file actually calls the _LID control method described above. And given
+the previous explanation, it is not reliable enough on some platforms. So
+it is advised for the userspace program to not to solely rely on this file
+to determine the actual lid state.
+
+The ACPI button driver emits the following input event to the userspace:
+  SW_LID
+The ACPI lid device driver is implemented to try to deliver the platform
+triggered events to the userspace. However, given the fact that the buggy
+firmware cannot make sure "opened"/"closed" events are paired, the ACPI
+button driver uses the following 3 modes in order not to trigger issues.
+
+If the userspace hasn't been prepared to ignore the unreliable "opened"
+events and the unreliable initial state notification, Linux users can use
+the following kernel parameters to handle the possible issues:
+A. button.lid_init_state=method:
+   When this option is specified, the ACPI button driver reports the
+   initial lid state using the returning value of the _LID control method
+   and whether the "opened"/"closed" events are paired fully relies on the
+   firmware implementation.
+   This option can be used to fix some platforms where the returning value
+   of the _LID control method is reliable but the initial lid state
+   notification is missing.
+   This option is the default behavior during the period the userspace
+   isn't ready to handle the buggy AML tables.
+B. button.lid_init_state=open:
+   When this option is specified, the ACPI button driver always reports the
+   initial lid state as "opened" and whether the "opened"/"closed" events
+   are paired fully relies on the firmware implementation.
+   This may fix some platforms where the returning value of the _LID
+   control method is not reliable and the initial lid state notification is
+   missing.
+
+If the userspace has been prepared to ignore the unreliable "opened" events
+and the unreliable initial state notification, Linux users should always
+use the following kernel parameter:
+C. button.lid_init_state=ignore:
+   When this option is specified, the ACPI button driver never reports the
+   initial lid state and there is a compensation mechanism implemented to
+   ensure that the reliable "closed" notifications can always be delievered
+   to the userspace by always pairing "closed" input events with complement
+   "opened" input events. But there is still no guarantee that the "opened"
+   notifications can be delivered to the userspace when the lid is actually
+   opens given that some AML tables do not send "opened" notifications
+   reliably.
+   In this mode, if everything is correctly implemented by the platform
+   firmware, the old userspace programs should still work. Otherwise, the
+   new userspace programs are required to work with the ACPI button driver.
+   This option will be the default behavior after the userspace is ready to
+   handle the buggy AML tables.
diff --git a/Documentation/acpi/gpio-properties.txt b/Documentation/acpi/gpio-properties.txt
index f35dad11..5aafe0b3 100644
--- a/Documentation/acpi/gpio-properties.txt
+++ b/Documentation/acpi/gpio-properties.txt
@@ -28,8 +28,8 @@
           ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
           Package ()
 	  {
-              Package () {"reset-gpio", Package() {^BTH, 1, 1, 0 }},
-              Package () {"shutdown-gpio", Package() {^BTH, 0, 0, 0 }},
+              Package () {"reset-gpios", Package() {^BTH, 1, 1, 0 }},
+              Package () {"shutdown-gpios", Package() {^BTH, 0, 0, 0 }},
           }
       })
   }
@@ -48,7 +48,7 @@
 active low or high, the "active_low" argument can be used here.  Setting
 it to 1 marks the GPIO as active low.
 
-In our Bluetooth example the "reset-gpio" refers to the second GpioIo()
+In our Bluetooth example the "reset-gpios" refers to the second GpioIo()
 resource, second pin in that resource with the GPIO number of 31.
 
 ACPI GPIO Mappings Provided by Drivers
@@ -83,8 +83,8 @@
 static const struct acpi_gpio_params shutdown_gpio = { 0, 0, false };
 
 static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = {
-  { "reset-gpio", &reset_gpio, 1 },
-  { "shutdown-gpio", &shutdown_gpio, 1 },
+  { "reset-gpios", &reset_gpio, 1 },
+  { "shutdown-gpios", &shutdown_gpio, 1 },
   { },
 };
 
diff --git a/Documentation/applying-patches.txt b/Documentation/applying-patches.txt
index 77df55b..02ce492 100644
--- a/Documentation/applying-patches.txt
+++ b/Documentation/applying-patches.txt
@@ -1,9 +1,13 @@
+.. _applying_patches:
 
-	Applying Patches To The Linux Kernel
-	------------------------------------
+Applying Patches To The Linux Kernel
+++++++++++++++++++++++++++++++++++++
 
-	Original by: Jesper Juhl, August 2005
-	Last update: 2006-01-05
+Original by:
+	Jesper Juhl, August 2005
+
+Last update:
+	2016-09-14
 
 
 A frequently asked question on the Linux Kernel Mailing List is how to apply
@@ -17,10 +21,12 @@
 
 
 What is a patch?
----
- A patch is a small text document containing a delta of changes between two
-different versions of a source tree. Patches are created with the `diff'
+================
+
+A patch is a small text document containing a delta of changes between two
+different versions of a source tree. Patches are created with the ``diff``
 program.
+
 To correctly apply a patch you need to know what base it was generated from
 and what new version the patch will change the source tree into. These
 should both be present in the patch file metadata or be possible to deduce
@@ -28,8 +34,9 @@
 
 
 How do I apply or revert a patch?
----
- You apply a patch with the `patch' program. The patch program reads a diff
+=================================
+
+You apply a patch with the ``patch`` program. The patch program reads a diff
 (or patch) file and makes the changes to the source tree described in it.
 
 Patches for the Linux kernel are generated relative to the parent directory
@@ -38,26 +45,33 @@
 This means that paths to files inside the patch file contain the name of the
 kernel source directories it was generated against (or some other directory
 names like "a/" and "b/").
+
 Since this is unlikely to match the name of the kernel source dir on your
 local machine (but is often useful info to see what version an otherwise
 unlabeled patch was generated against) you should change into your kernel
 source directory and then strip the first element of the path from filenames
-in the patch file when applying it (the -p1 argument to `patch' does this).
+in the patch file when applying it (the ``-p1`` argument to ``patch`` does
+this).
 
 To revert a previously applied patch, use the -R argument to patch.
-So, if you applied a patch like this:
+So, if you applied a patch like this::
+
 	patch -p1 < ../patch-x.y.z
 
-You can revert (undo) it like this:
+You can revert (undo) it like this::
+
 	patch -R -p1 < ../patch-x.y.z
 
 
-How do I feed a patch/diff file to `patch'?
----
- This (as usual with Linux and other UNIX like operating systems) can be
+How do I feed a patch/diff file to ``patch``?
+=============================================
+
+This (as usual with Linux and other UNIX like operating systems) can be
 done in several different ways.
+
 In all the examples below I feed the file (in uncompressed form) to patch
-via stdin using the following syntax:
+via stdin using the following syntax::
+
 	patch -p1 < path/to/patch-x.y.z
 
 If you just want to be able to follow the examples below and don't want to
@@ -65,35 +79,40 @@
 section here.
 
 Patch can also get the name of the file to use via the -i argument, like
-this:
+this::
+
 	patch -p1 -i path/to/patch-x.y.z
 
-If your patch file is compressed with gzip or bzip2 and you don't want to
+If your patch file is compressed with gzip or xz and you don't want to
 uncompress it before applying it, then you can feed it to patch like this
-instead:
-	zcat path/to/patch-x.y.z.gz | patch -p1
-	bzcat path/to/patch-x.y.z.bz2 | patch -p1
+instead::
+
+	xzcat path/to/patch-x.y.z.xz | patch -p1
+	bzcat path/to/patch-x.y.z.gz | patch -p1
 
 If you wish to uncompress the patch file by hand first before applying it
 (what I assume you've done in the examples below), then you simply run
-gunzip or bunzip2 on the file -- like this:
+gunzip or xz on the file -- like this::
+
 	gunzip patch-x.y.z.gz
-	bunzip2 patch-x.y.z.bz2
+	xz -d patch-x.y.z.xz
 
 Which will leave you with a plain text patch-x.y.z file that you can feed to
-patch via stdin or the -i argument, as you prefer.
+patch via stdin or the ``-i`` argument, as you prefer.
 
-A few other nice arguments for patch are -s which causes patch to be silent
+A few other nice arguments for patch are ``-s`` which causes patch to be silent
 except for errors which is nice to prevent errors from scrolling out of the
-screen too fast, and --dry-run which causes patch to just print a listing of
-what would happen, but doesn't actually make any changes. Finally --verbose
+screen too fast, and ``--dry-run`` which causes patch to just print a listing of
+what would happen, but doesn't actually make any changes. Finally ``--verbose``
 tells patch to print more information about the work being done.
 
 
 Common errors when patching
----
- When patch applies a patch file it attempts to verify the sanity of the
+===========================
+
+When patch applies a patch file it attempts to verify the sanity of the
 file in different ways.
+
 Checking that the file looks like a valid patch file and checking the code
 around the bits being modified matches the context provided in the patch are
 just two of the basic sanity checks patch does.
@@ -111,13 +130,13 @@
 usually adjust the line numbers and apply the patch.
 
 Whenever patch applies a patch that it had to modify a bit to make it fit
-it'll tell you about it by saying the patch applied with 'fuzz'.
+it'll tell you about it by saying the patch applied with **fuzz**.
 You should be wary of such changes since even though patch probably got it
 right it doesn't /always/ get it right, and the result will sometimes be
 wrong.
 
 When patch encounters a change that it can't fix up with fuzz it rejects it
-outright and leaves a file with a .rej extension (a reject file). You can
+outright and leaves a file with a ``.rej`` extension (a reject file). You can
 read this file to see exactly what change couldn't be applied, so you can
 go fix it up by hand if you wish.
 
@@ -132,43 +151,47 @@
 
 Let's look a bit more at some of the messages patch can produce.
 
-If patch stops and presents a "File to patch:" prompt, then patch could not
+If patch stops and presents a ``File to patch:`` prompt, then patch could not
 find a file to be patched. Most likely you forgot to specify -p1 or you are
 in the wrong directory. Less often, you'll find patches that need to be
-applied with -p0 instead of -p1 (reading the patch file should reveal if
+applied with ``-p0`` instead of ``-p1`` (reading the patch file should reveal if
 this is the case -- if so, then this is an error by the person who created
 the patch but is not fatal).
 
-If you get "Hunk #2 succeeded at 1887 with fuzz 2 (offset 7 lines)." or a
+If you get ``Hunk #2 succeeded at 1887 with fuzz 2 (offset 7 lines).`` or a
 message similar to that, then it means that patch had to adjust the location
 of the change (in this example it needed to move 7 lines from where it
 expected to make the change to make it fit).
+
 The resulting file may or may not be OK, depending on the reason the file
 was different than expected.
+
 This often happens if you try to apply a patch that was generated against a
 different kernel version than the one you are trying to patch.
 
-If you get a message like "Hunk #3 FAILED at 2387.", then it means that the
+If you get a message like ``Hunk #3 FAILED at 2387.``, then it means that the
 patch could not be applied correctly and the patch program was unable to
-fuzz its way through. This will generate a .rej file with the change that
-caused the patch to fail and also a .orig file showing you the original
+fuzz its way through. This will generate a ``.rej`` file with the change that
+caused the patch to fail and also a ``.orig`` file showing you the original
 content that couldn't be changed.
 
-If you get "Reversed (or previously applied) patch detected!  Assume -R? [n]"
+If you get ``Reversed (or previously applied) patch detected!  Assume -R? [n]``
 then patch detected that the change contained in the patch seems to have
 already been made.
+
 If you actually did apply this patch previously and you just re-applied it
 in error, then just say [n]o and abort this patch. If you applied this patch
 previously and actually intended to revert it, but forgot to specify -R,
-then you can say [y]es here to make patch revert it for you.
+then you can say [**y**]es here to make patch revert it for you.
+
 This can also happen if the creator of the patch reversed the source and
 destination directories when creating the patch, and in that case reverting
 the patch will in fact apply it.
 
-A message similar to "patch: **** unexpected end of file in patch" or "patch
-unexpectedly ends in middle of line" means that patch could make no sense of
-the file you fed to it. Either your download is broken, you tried to feed
-patch a compressed patch file without uncompressing it first, or the patch
+A message similar to ``patch: **** unexpected end of file in patch`` or
+``patch unexpectedly ends in middle of line`` means that patch could make no
+sense of the file you fed to it. Either your download is broken, you tried to
+feed patch a compressed patch file without uncompressing it first, or the patch
 file that you are using has been mangled by a mail client or mail transfer
 agent along the way somewhere, e.g., by splitting a long line into two lines.
 Often these warnings can easily be fixed by joining (concatenating) the
@@ -182,28 +205,32 @@
 wish to apply.
 
 
-Are there any alternatives to `patch'?
----
- Yes there are alternatives.
+Are there any alternatives to ``patch``?
+========================================
 
- You can use the `interdiff' program (http://cyberelk.net/tim/patchutils/) to
+
+Yes there are alternatives.
+
+You can use the ``interdiff`` program (http://cyberelk.net/tim/patchutils/) to
 generate a patch representing the differences between two patches and then
 apply the result.
-This will let you move from something like 2.6.12.2 to 2.6.12.3 in a single
+
+This will let you move from something like 4.7.2 to 4.7.3 in a single
 step. The -z flag to interdiff will even let you feed it patches in gzip or
 bzip2 compressed form directly without the use of zcat or bzcat or manual
 decompression.
 
-Here's how you'd go from 2.6.12.2 to 2.6.12.3 in a single step:
-	interdiff -z ../patch-2.6.12.2.bz2 ../patch-2.6.12.3.gz | patch -p1
+Here's how you'd go from 4.7.2 to 4.7.3 in a single step::
+
+	interdiff -z ../patch-4.7.2.gz ../patch-4.7.3.gz | patch -p1
 
 Although interdiff may save you a step or two you are generally advised to
 do the additional steps since interdiff can get things wrong in some cases.
 
- Another alternative is `ketchup', which is a python script for automatic
+Another alternative is ``ketchup``, which is a python script for automatic
 downloading and applying of patches (http://www.selenic.com/ketchup/).
 
- Other nice tools are diffstat, which shows a summary of changes made by a
+Other nice tools are diffstat, which shows a summary of changes made by a
 patch; lsdiff, which displays a short listing of affected files in a patch
 file, along with (optionally) the line numbers of the start of each patch;
 and grepdiff, which displays a list of the files modified by a patch where
@@ -211,99 +238,103 @@
 
 
 Where can I download the patches?
----
- The patches are available at http://kernel.org/
+=================================
+
+The patches are available at http://kernel.org/
 Most recent patches are linked from the front page, but they also have
 specific homes.
 
-The 2.6.x.y (-stable) and 2.6.x patches live at
- ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
+The 4.x.y (-stable) and 4.x patches live at
+
+	ftp://ftp.kernel.org/pub/linux/kernel/v4.x/
 
 The -rc patches live at
- ftp://ftp.kernel.org/pub/linux/kernel/v2.6/testing/
 
-The -git patches live at
- ftp://ftp.kernel.org/pub/linux/kernel/v2.6/snapshots/
+	ftp://ftp.kernel.org/pub/linux/kernel/v4.x/testing/
 
-The -mm kernels live at
- ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/
-
-In place of ftp.kernel.org you can use ftp.cc.kernel.org, where cc is a
+In place of ``ftp.kernel.org`` you can use ``ftp.cc.kernel.org``, where cc is a
 country code. This way you'll be downloading from a mirror site that's most
 likely geographically closer to you, resulting in faster downloads for you,
 less bandwidth used globally and less load on the main kernel.org servers --
 these are good things, so do use mirrors when possible.
 
 
-The 2.6.x kernels
----
- These are the base stable releases released by Linus. The highest numbered
+The 4.x kernels
+===============
+
+These are the base stable releases released by Linus. The highest numbered
 release is the most recent.
 
 If regressions or other serious flaws are found, then a -stable fix patch
-will be released (see below) on top of this base. Once a new 2.6.x base
+will be released (see below) on top of this base. Once a new 4.x base
 kernel is released, a patch is made available that is a delta between the
-previous 2.6.x kernel and the new one.
+previous 4.x kernel and the new one.
 
-To apply a patch moving from 2.6.11 to 2.6.12, you'd do the following (note
-that such patches do *NOT* apply on top of 2.6.x.y kernels but on top of the
-base 2.6.x kernel -- if you need to move from 2.6.x.y to 2.6.x+1 you need to
-first revert the 2.6.x.y patch).
+To apply a patch moving from 4.6 to 4.7, you'd do the following (note
+that such patches do **NOT** apply on top of 4.x.y kernels but on top of the
+base 4.x kernel -- if you need to move from 4.x.y to 4.x+1 you need to
+first revert the 4.x.y patch).
 
-Here are some examples:
+Here are some examples::
 
-# moving from 2.6.11 to 2.6.12
-$ cd ~/linux-2.6.11			# change to kernel source dir
-$ patch -p1 < ../patch-2.6.12		# apply the 2.6.12 patch
-$ cd ..
-$ mv linux-2.6.11 linux-2.6.12		# rename source dir
+	# moving from 4.6 to 4.7
 
-# moving from 2.6.11.1 to 2.6.12
-$ cd ~/linux-2.6.11.1			# change to kernel source dir
-$ patch -p1 -R < ../patch-2.6.11.1	# revert the 2.6.11.1 patch
-					# source dir is now 2.6.11
-$ patch -p1 < ../patch-2.6.12		# apply new 2.6.12 patch
-$ cd ..
-$ mv linux-2.6.11.1 linux-2.6.12		# rename source dir
+	$ cd ~/linux-4.6		# change to kernel source dir
+	$ patch -p1 < ../patch-4.7	# apply the 4.7 patch
+	$ cd ..
+	$ mv linux-4.6 linux-4.7	# rename source dir
+
+	# moving from 4.6.1 to 4.7
+
+	$ cd ~/linux-4.6.1		# change to kernel source dir
+	$ patch -p1 -R < ../patch-4.6.1	# revert the 4.6.1 patch
+					# source dir is now 4.6
+	$ patch -p1 < ../patch-4.7	# apply new 4.7 patch
+	$ cd ..
+	$ mv linux-4.6.1 linux-4.7	# rename source dir
 
 
-The 2.6.x.y kernels
----
- Kernels with 4-digit versions are -stable kernels. They contain small(ish)
+The 4.x.y kernels
+=================
+
+Kernels with 3-digit versions are -stable kernels. They contain small(ish)
 critical fixes for security problems or significant regressions discovered
-in a given 2.6.x kernel.
+in a given 4.x kernel.
 
 This is the recommended branch for users who want the most recent stable
 kernel and are not interested in helping test development/experimental
 versions.
 
-If no 2.6.x.y kernel is available, then the highest numbered 2.6.x kernel is
+If no 4.x.y kernel is available, then the highest numbered 4.x kernel is
 the current stable kernel.
 
- note: the -stable team usually do make incremental patches available as well
+.. note::
+
+ The -stable team usually do make incremental patches available as well
  as patches against the latest mainline release, but I only cover the
  non-incremental ones below. The incremental ones can be found at
- ftp://ftp.kernel.org/pub/linux/kernel/v2.6/incr/
+ ftp://ftp.kernel.org/pub/linux/kernel/v4.x/incr/
 
-These patches are not incremental, meaning that for example the 2.6.12.3
-patch does not apply on top of the 2.6.12.2 kernel source, but rather on top
-of the base 2.6.12 kernel source .
-So, in order to apply the 2.6.12.3 patch to your existing 2.6.12.2 kernel
-source you have to first back out the 2.6.12.2 patch (so you are left with a
-base 2.6.12 kernel source) and then apply the new 2.6.12.3 patch.
+These patches are not incremental, meaning that for example the 4.7.3
+patch does not apply on top of the 4.7.2 kernel source, but rather on top
+of the base 4.7 kernel source.
 
-Here's a small example:
+So, in order to apply the 4.7.3 patch to your existing 4.7.2 kernel
+source you have to first back out the 4.7.2 patch (so you are left with a
+base 4.7 kernel source) and then apply the new 4.7.3 patch.
 
-$ cd ~/linux-2.6.12.2			# change into the kernel source dir
-$ patch -p1 -R < ../patch-2.6.12.2	# revert the 2.6.12.2 patch
-$ patch -p1 < ../patch-2.6.12.3		# apply the new 2.6.12.3 patch
-$ cd ..
-$ mv linux-2.6.12.2 linux-2.6.12.3	# rename the kernel source dir
+Here's a small example::
 
+	$ cd ~/linux-4.7.2		# change to the kernel source dir
+	$ patch -p1 -R < ../patch-4.7.2	# revert the 4.7.2 patch
+	$ patch -p1 < ../patch-4.7.3	# apply the new 4.7.3 patch
+	$ cd ..
+	$ mv linux-4.7.2 linux-4.7.3	# rename the kernel source dir
 
 The -rc kernels
----
- These are release-candidate kernels. These are development kernels released
+===============
+
+These are release-candidate kernels. These are development kernels released
 by Linus whenever he deems the current git (the kernel's source management
 tool) tree to be in a reasonably sane state adequate for testing.
 
@@ -317,39 +348,44 @@
 development kernels but do not want to run some of the really experimental
 stuff (such people should see the sections about -git and -mm kernels below).
 
-The -rc patches are not incremental, they apply to a base 2.6.x kernel, just
-like the 2.6.x.y patches described above. The kernel version before the -rcN
+The -rc patches are not incremental, they apply to a base 4.x kernel, just
+like the 4.x.y patches described above. The kernel version before the -rcN
 suffix denotes the version of the kernel that this -rc kernel will eventually
 turn into.
-So, 2.6.13-rc5 means that this is the fifth release candidate for the 2.6.13
-kernel and the patch should be applied on top of the 2.6.12 kernel source.
 
-Here are 3 examples of how to apply these patches:
+So, 4.8-rc5 means that this is the fifth release candidate for the 4.8
+kernel and the patch should be applied on top of the 4.7 kernel source.
 
-# first an example of moving from 2.6.12 to 2.6.13-rc3
-$ cd ~/linux-2.6.12			# change into the 2.6.12 source dir
-$ patch -p1 < ../patch-2.6.13-rc3	# apply the 2.6.13-rc3 patch
-$ cd ..
-$ mv linux-2.6.12 linux-2.6.13-rc3	# rename the source dir
+Here are 3 examples of how to apply these patches::
 
-# now let's move from 2.6.13-rc3 to 2.6.13-rc5
-$ cd ~/linux-2.6.13-rc3			# change into the 2.6.13-rc3 dir
-$ patch -p1 -R < ../patch-2.6.13-rc3	# revert the 2.6.13-rc3 patch
-$ patch -p1 < ../patch-2.6.13-rc5	# apply the new 2.6.13-rc5 patch
-$ cd ..
-$ mv linux-2.6.13-rc3 linux-2.6.13-rc5	# rename the source dir
+	# first an example of moving from 4.7 to 4.8-rc3
 
-# finally let's try and move from 2.6.12.3 to 2.6.13-rc5
-$ cd ~/linux-2.6.12.3			# change to the kernel source dir
-$ patch -p1 -R < ../patch-2.6.12.3	# revert the 2.6.12.3 patch
-$ patch -p1 < ../patch-2.6.13-rc5	# apply new 2.6.13-rc5 patch
-$ cd ..
-$ mv linux-2.6.12.3 linux-2.6.13-rc5	# rename the kernel source dir
+	$ cd ~/linux-4.7			# change to the 4.7 source dir
+	$ patch -p1 < ../patch-4.8-rc3		# apply the 4.8-rc3 patch
+	$ cd ..
+	$ mv linux-4.7 linux-4.8-rc3		# rename the source dir
+
+	# now let's move from 4.8-rc3 to 4.8-rc5
+
+	$ cd ~/linux-4.8-rc3			# change to the 4.8-rc3 dir
+	$ patch -p1 -R < ../patch-4.8-rc3	# revert the 4.8-rc3 patch
+	$ patch -p1 < ../patch-4.8-rc5		# apply the new 4.8-rc5 patch
+	$ cd ..
+	$ mv linux-4.8-rc3 linux-4.8-rc5	# rename the source dir
+
+	# finally let's try and move from 4.7.3 to 4.8-rc5
+
+	$ cd ~/linux-4.7.3			# change to the kernel source dir
+	$ patch -p1 -R < ../patch-4.7.3		# revert the 4.7.3 patch
+	$ patch -p1 < ../patch-4.8-rc5		# apply new 4.8-rc5 patch
+	$ cd ..
+	$ mv linux-4.7.3 linux-4.8-rc5		# rename the kernel source dir
 
 
 The -git kernels
----
- These are daily snapshots of Linus' kernel tree (managed in a git
+================
+
+These are daily snapshots of Linus' kernel tree (managed in a git
 repository, hence the name).
 
 These patches are usually released daily and represent the current state of
@@ -357,91 +393,66 @@
 generated automatically without even a cursory glance to see if they are
 sane.
 
--git patches are not incremental and apply either to a base 2.6.x kernel or
-a base 2.6.x-rc kernel -- you can see which from their name.
-A patch named 2.6.12-git1 applies to the 2.6.12 kernel source and a patch
-named 2.6.13-rc3-git2 applies to the source of the 2.6.13-rc3 kernel.
+-git patches are not incremental and apply either to a base 4.x kernel or
+a base 4.x-rc kernel -- you can see which from their name.
+A patch named 4.7-git1 applies to the 4.7 kernel source and a patch
+named 4.8-rc3-git2 applies to the source of the 4.8-rc3 kernel.
 
-Here are some examples of how to apply these patches:
+Here are some examples of how to apply these patches::
 
-# moving from 2.6.12 to 2.6.12-git1
-$ cd ~/linux-2.6.12			# change to the kernel source dir
-$ patch -p1 < ../patch-2.6.12-git1	# apply the 2.6.12-git1 patch
-$ cd ..
-$ mv linux-2.6.12 linux-2.6.12-git1	# rename the kernel source dir
+	# moving from 4.7 to 4.7-git1
 
-# moving from 2.6.12-git1 to 2.6.13-rc2-git3
-$ cd ~/linux-2.6.12-git1		# change to the kernel source dir
-$ patch -p1 -R < ../patch-2.6.12-git1	# revert the 2.6.12-git1 patch
-					# we now have a 2.6.12 kernel
-$ patch -p1 < ../patch-2.6.13-rc2	# apply the 2.6.13-rc2 patch
-					# the kernel is now 2.6.13-rc2
-$ patch -p1 < ../patch-2.6.13-rc2-git3	# apply the 2.6.13-rc2-git3 patch
-					# the kernel is now 2.6.13-rc2-git3
-$ cd ..
-$ mv linux-2.6.12-git1 linux-2.6.13-rc2-git3	# rename source dir
+	$ cd ~/linux-4.7			# change to the kernel source dir
+	$ patch -p1 < ../patch-4.7-git1		# apply the 4.7-git1 patch
+	$ cd ..
+	$ mv linux-4.7 linux-4.7-git1		# rename the kernel source dir
+
+	# moving from 4.7-git1 to 4.8-rc2-git3
+
+	$ cd ~/linux-4.7-git1			# change to the kernel source dir
+	$ patch -p1 -R < ../patch-4.7-git1	# revert the 4.7-git1 patch
+						# we now have a 4.7 kernel
+	$ patch -p1 < ../patch-4.8-rc2		# apply the 4.8-rc2 patch
+						# the kernel is now 4.8-rc2
+	$ patch -p1 < ../patch-4.8-rc2-git3	# apply the 4.8-rc2-git3 patch
+						# the kernel is now 4.8-rc2-git3
+	$ cd ..
+	$ mv linux-4.7-git1 linux-4.8-rc2-git3	# rename source dir
 
 
-The -mm kernels
----
- These are experimental kernels released by Andrew Morton.
+The -mm patches and the linux-next tree
+=======================================
 
-The -mm tree serves as a sort of proving ground for new features and other
-experimental patches.
-Once a patch has proved its worth in -mm for a while Andrew pushes it on to
-Linus for inclusion in mainline.
+The -mm patches are experimental patches released by Andrew Morton.
 
-Although it's encouraged that patches flow to Linus via the -mm tree, this
-is not always enforced.
-Subsystem maintainers (or individuals) sometimes push their patches directly
-to Linus, even though (or after) they have been merged and tested in -mm (or
-sometimes even without prior testing in -mm).
+In the past, -mm tree were used to also test subsystem patches, but this
+function is now done via the
+:ref:`linux-next <https://www.kernel.org/doc/man-pages/linux-next.html>`
+tree. The Subsystem maintainers push their patches first to linux-next,
+and, during the merge window, sends them directly to Linus.
 
-You should generally strive to get your patches into mainline via -mm to
-ensure maximum testing.
+The -mm patches serve as a sort of proving ground for new features and other
+experimental patches that aren't merged via a subsystem tree.
+Once such patches has proved its worth in -mm for a while Andrew pushes
+it on to Linus for inclusion in mainline.
 
-This branch is in constant flux and contains many experimental features, a
+The linux-next tree is daily updated, and includes the -mm patches.
+Both are in constant flux and contains many experimental features, a
 lot of debugging patches not appropriate for mainline etc., and is the most
 experimental of the branches described in this document.
 
-These kernels are not appropriate for use on systems that are supposed to be
+These patches are not appropriate for use on systems that are supposed to be
 stable and they are more risky to run than any of the other branches (make
 sure you have up-to-date backups -- that goes for any experimental kernel but
-even more so for -mm kernels).
+even more so for -mm patches or using a Kernel from the linux-next tree).
 
-These kernels in addition to all the other experimental patches they contain
-usually also contain any changes in the mainline -git kernels available at
-the time of release.
+Testing of -mm patches and linux-next is greatly appreciated since the whole
+point of those are to weed out regressions, crashes, data corruption bugs,
+build breakage (and any other bug in general) before changes are merged into
+the more stable mainline Linus tree.
 
-Testing of -mm kernels is greatly appreciated since the whole point of the
-tree is to weed out regressions, crashes, data corruption bugs, build
-breakage (and any other bug in general) before changes are merged into the
-more stable mainline Linus tree.
-But testers of -mm should be aware that breakage in this tree is more common
-than in any other tree.
-
-The -mm kernels are not released on a fixed schedule, but usually a few -mm
-kernels are released in between each -rc kernel (1 to 3 is common).
-The -mm kernels apply to either a base 2.6.x kernel (when no -rc kernels
-have been released yet) or to a Linus -rc kernel.
-
-Here are some examples of applying the -mm patches:
-
-# moving from 2.6.12 to 2.6.12-mm1
-$ cd ~/linux-2.6.12			# change to the 2.6.12 source dir
-$ patch -p1 < ../2.6.12-mm1		# apply the 2.6.12-mm1 patch
-$ cd ..
-$ mv linux-2.6.12 linux-2.6.12-mm1	# rename the source appropriately
-
-# moving from 2.6.12-mm1 to 2.6.13-rc3-mm3
-$ cd ~/linux-2.6.12-mm1
-$ patch -p1 -R < ../2.6.12-mm1		# revert the 2.6.12-mm1 patch
-					# we now have a 2.6.12 source
-$ patch -p1 < ../patch-2.6.13-rc3	# apply the 2.6.13-rc3 patch
-					# we now have a 2.6.13-rc3 source
-$ patch -p1 < ../2.6.13-rc3-mm3		# apply the 2.6.13-rc3-mm3 patch
-$ cd ..
-$ mv linux-2.6.12-mm1 linux-2.6.13-rc3-mm3	# rename the source dir
+But testers of -mm and linux-next should be aware that breakages are
+more common than in any other tree.
 
 
 This concludes this list of explanations of the various kernel trees.
diff --git a/Documentation/arm/CCN.txt b/Documentation/arm/CCN.txt
index ffca443..15cdb7b 100644
--- a/Documentation/arm/CCN.txt
+++ b/Documentation/arm/CCN.txt
@@ -18,13 +18,17 @@
 directory provides configuration templates for all documented
 events, that can be used with perf tool. For example "xp_valid_flit"
 is an equivalent of "type=0x8,event=0x4". Other parameters must be
-explicitly specified. For events originating from device, "node"
-defines its index. All crosspoint events require "xp" (index),
-"port" (device port number) and "vc" (virtual channel ID) and
-"dir" (direction). Watchpoints (special "event" value 0xfe) also
-require comparator values ("cmp_l" and "cmp_h") and "mask", being
-index of the comparator mask.
+explicitly specified.
 
+For events originating from device, "node" defines its index.
+
+Crosspoint PMU events require "xp" (index), "bus" (bus number)
+and "vc" (virtual channel ID).
+
+Crosspoint watchpoint-based events (special "event" value 0xfe)
+require "xp" and "vc" as as above plus "port" (device port index),
+"dir" (transmit/receive direction), comparator values ("cmp_l"
+and "cmp_h") and "mask", being index of the comparator mask.
 Masks are defined separately from the event description
 (due to limited number of the config values) in the "cmp_mask"
 directory, with first 8 configurable by user and additional
diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi/README
index e5a115f..c7a0554 100644
--- a/Documentation/arm/sunxi/README
+++ b/Documentation/arm/sunxi/README
@@ -73,4 +73,13 @@
     * Octa ARM Cortex-A7 based SoCs
       - Allwinner A83T
         + Datasheet
-          http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf
+          https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_Datasheet_v1.3_20150510.pdf
+        + User Manual
+          https://github.com/allwinner-zh/documents/raw/master/A83T/A83T_User_Manual_v1.5.1_20150513.pdf
+
+    * Quad ARM Cortex-A53 based SoCs
+      - Allwinner A64
+        + Datasheet
+          http://dl.linux-sunxi.org/A64/A64_Datasheet_V1.1.pdf
+        + User Manual
+          http://dl.linux-sunxi.org/A64/Allwinner%20A64%20User%20Manual%20v1.0.pdf
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index 4da60b4..405da11 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -53,6 +53,7 @@
 | ARM            | Cortex-A57      | #832075         | ARM64_ERRATUM_832075    |
 | ARM            | Cortex-A57      | #852523         | N/A                     |
 | ARM            | Cortex-A57      | #834220         | ARM64_ERRATUM_834220    |
+| ARM            | Cortex-A72      | #853709         | N/A                     |
 | ARM            | MMU-500         | #841119,#826419 | N/A                     |
 |                |                 |                 |                         |
 | Cavium         | ThunderX ITS    | #22375, #24313  | CAVIUM_ERRATUM_22375    |
@@ -60,3 +61,5 @@
 | Cavium         | ThunderX GICv3  | #23154          | CAVIUM_ERRATUM_23154    |
 | Cavium         | ThunderX Core   | #27456          | CAVIUM_ERRATUM_27456    |
 | Cavium         | ThunderX SMMUv2 | #27704          | N/A		       |
+|                |                 |                 |                         |
+| Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585     |
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index d515d58..2a39040 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -14,6 +14,12 @@
 This file allows to turn off the disk entropy contribution. Default
 value of this file is '1'(on).
 
+dax (RO)
+--------
+This file indicates whether the device supports Direct Access (DAX),
+used by CPU-addressable storage to bypass the pagecache.  It shows '1'
+if true, '0' if not.
+
 discard_granularity (RO)
 -----------------------
 This shows the size of internal allocation of the device in bytes, if
@@ -46,6 +52,12 @@
 -------------------
 This is the hardware sector size of the device, in bytes.
 
+io_poll (RW)
+------------
+When read, this file shows the total number of block IO polls and how
+many returned success.  Writing '0' to this file will disable polling
+for this device.  Writing any non-zero value will enable this feature.
+
 iostats (RW)
 -------------
 This file is used to control (on/off) the iostats accounting of the
@@ -151,5 +163,11 @@
 setting from "write back" to "write through", since that will also
 eliminate cache flushes issued by the kernel.
 
+write_same_max_bytes (RO)
+-------------------------
+This is the number of bytes the device can write in a single write-same
+command.  A value of '0' means write-same is not supported by this
+device.
+
 
 Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/clk.txt b/Documentation/clk.txt
index 5c4bc4d..22f026a 100644
--- a/Documentation/clk.txt
+++ b/Documentation/clk.txt
@@ -31,24 +31,25 @@
 hardware-specific bits for the hypothetical "foo" hardware.
 
 Tying the two halves of this interface together is struct clk_hw, which
-is defined in struct clk_foo and pointed to within struct clk.  This
+is defined in struct clk_foo and pointed to within struct clk_core.  This
 allows for easy navigation between the two discrete halves of the common
 clock interface.
 
 	Part 2 - common data structures and api
 
-Below is the common struct clk definition from
-include/linux/clk-private.h, modified for brevity:
+Below is the common struct clk_core definition from
+drivers/clk/clk.c, modified for brevity:
 
-	struct clk {
+	struct clk_core {
 		const char		*name;
 		const struct clk_ops	*ops;
 		struct clk_hw		*hw;
-		char			**parent_names;
-		struct clk		**parents;
-		struct clk		*parent;
-		struct hlist_head	children;
-		struct hlist_node	child_node;
+		struct module		*owner;
+		struct clk_core		*parent;
+		const char		**parent_names;
+		struct clk_core		**parents;
+		u8			num_parents;
+		u8			new_parent_index;
 		...
 	};
 
@@ -56,16 +57,19 @@
 api itself defines several driver-facing functions which operate on
 struct clk.  That api is documented in include/linux/clk.h.
 
-Platforms and devices utilizing the common struct clk use the struct
-clk_ops pointer in struct clk to perform the hardware-specific parts of
-the operations defined in clk.h:
+Platforms and devices utilizing the common struct clk_core use the struct
+clk_ops pointer in struct clk_core to perform the hardware-specific parts of
+the operations defined in clk-provider.h:
 
 	struct clk_ops {
 		int		(*prepare)(struct clk_hw *hw);
 		void		(*unprepare)(struct clk_hw *hw);
+		int		(*is_prepared)(struct clk_hw *hw);
+		void		(*unprepare_unused)(struct clk_hw *hw);
 		int		(*enable)(struct clk_hw *hw);
 		void		(*disable)(struct clk_hw *hw);
 		int		(*is_enabled)(struct clk_hw *hw);
+		void		(*disable_unused)(struct clk_hw *hw);
 		unsigned long	(*recalc_rate)(struct clk_hw *hw,
 						unsigned long parent_rate);
 		long		(*round_rate)(struct clk_hw *hw,
@@ -84,6 +88,8 @@
 					    u8 index);
 		unsigned long	(*recalc_accuracy)(struct clk_hw *hw,
 						unsigned long parent_accuracy);
+		int		(*get_phase)(struct clk_hw *hw);
+		int		(*set_phase)(struct clk_hw *hw, int degrees);
 		void		(*init)(struct clk_hw *hw);
 		int		(*debug_init)(struct clk_hw *hw,
 					      struct dentry *dentry);
@@ -91,7 +97,7 @@
 
 	Part 3 - hardware clk implementations
 
-The strength of the common struct clk comes from its .ops and .hw pointers
+The strength of the common struct clk_core comes from its .ops and .hw pointers
 which abstract the details of struct clk from the hardware-specific bits, and
 vice versa.  To illustrate consider the simple gateable clk implementation in
 drivers/clk/clk-gate.c:
@@ -107,7 +113,7 @@
 knowledge about which register and bit controls this clk's gating.
 Nothing about clock topology or accounting, such as enable_count or
 notifier_count, is needed here.  That is all handled by the common
-framework code and struct clk.
+framework code and struct clk_core.
 
 Let's walk through enabling this clk from driver code:
 
@@ -139,22 +145,18 @@
 
 Note that to_clk_gate is defined as:
 
-#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, clk)
+#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
 
 This pattern of abstraction is used for every clock hardware
 representation.
 
 	Part 4 - supporting your own clk hardware
 
-When implementing support for a new type of clock it only necessary to
+When implementing support for a new type of clock it is only necessary to
 include the following header:
 
 #include <linux/clk-provider.h>
 
-include/linux/clk.h is included within that header and clk-private.h
-must never be included from the code which implements the operations for
-a clock.  More on that below in Part 5.
-
 To construct a clk hardware structure for your platform you must define
 the following:
 
diff --git a/Documentation/coccinelle.txt b/Documentation/coccinelle.txt
deleted file mode 100644
index 01fb1da..0000000
--- a/Documentation/coccinelle.txt
+++ /dev/null
@@ -1,470 +0,0 @@
-Copyright 2010 Nicolas Palix <npalix@diku.dk>
-Copyright 2010 Julia Lawall <julia@diku.dk>
-Copyright 2010 Gilles Muller <Gilles.Muller@lip6.fr>
-
-
- Getting Coccinelle
-~~~~~~~~~~~~~~~~~~~~
-
-The semantic patches included in the kernel use features and options
-which are provided by Coccinelle version 1.0.0-rc11 and above.
-Using earlier versions will fail as the option names used by
-the Coccinelle files and coccicheck have been updated.
-
-Coccinelle is available through the package manager
-of many distributions, e.g. :
-
- - Debian
- - Fedora
- - Ubuntu
- - OpenSUSE
- - Arch Linux
- - NetBSD
- - FreeBSD
-
-
-You can get the latest version released from the Coccinelle homepage at
-http://coccinelle.lip6.fr/
-
-Information and tips about Coccinelle are also provided on the wiki
-pages at http://cocci.ekstranet.diku.dk/wiki/doku.php
-
-Once you have it, run the following command:
-
-     	./configure
-        make
-
-as a regular user, and install it with
-
-        sudo make install
-
- Supplemental documentation
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-For supplemental documentation refer to the wiki:
-
-https://bottest.wiki.kernel.org/coccicheck
-
-The wiki documentation always refers to the linux-next version of the script.
-
- Using Coccinelle on the Linux kernel
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-A Coccinelle-specific target is defined in the top level
-Makefile. This target is named 'coccicheck' and calls the 'coccicheck'
-front-end in the 'scripts' directory.
-
-Four basic modes are defined: patch, report, context, and org. The mode to
-use is specified by setting the MODE variable with 'MODE=<mode>'.
-
-'patch' proposes a fix, when possible.
-
-'report' generates a list in the following format:
-  file:line:column-column: message
-
-'context' highlights lines of interest and their context in a
-diff-like style.Lines of interest are indicated with '-'.
-
-'org' generates a report in the Org mode format of Emacs.
-
-Note that not all semantic patches implement all modes. For easy use
-of Coccinelle, the default mode is "report".
-
-Two other modes provide some common combinations of these modes.
-
-'chain' tries the previous modes in the order above until one succeeds.
-
-'rep+ctxt' runs successively the report mode and the context mode.
-	   It should be used with the C option (described later)
-	   which checks the code on a file basis.
-
-Examples:
-	To make a report for every semantic patch, run the following command:
-
-		make coccicheck MODE=report
-
-	To produce patches, run:
-
-		make coccicheck MODE=patch
-
-
-The coccicheck target applies every semantic patch available in the
-sub-directories of 'scripts/coccinelle' to the entire Linux kernel.
-
-For each semantic patch, a commit message is proposed.  It gives a
-description of the problem being checked by the semantic patch, and
-includes a reference to Coccinelle.
-
-As any static code analyzer, Coccinelle produces false
-positives. Thus, reports must be carefully checked, and patches
-reviewed.
-
-To enable verbose messages set the V= variable, for example:
-
-   make coccicheck MODE=report V=1
-
- Coccinelle parallelization
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-By default, coccicheck tries to run as parallel as possible. To change
-the parallelism, set the J= variable. For example, to run across 4 CPUs:
-
-   make coccicheck MODE=report J=4
-
-As of Coccinelle 1.0.2 Coccinelle uses Ocaml parmap for parallelization,
-if support for this is detected you will benefit from parmap parallelization.
-
-When parmap is enabled coccicheck will enable dynamic load balancing by using
-'--chunksize 1' argument, this ensures we keep feeding threads with work
-one by one, so that we avoid the situation where most work gets done by only
-a few threads. With dynamic load balancing, if a thread finishes early we keep
-feeding it more work.
-
-When parmap is enabled, if an error occurs in Coccinelle, this error
-value is propagated back, the return value of the 'make coccicheck'
-captures this return value.
-
- Using Coccinelle with a single semantic patch
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The optional make variable COCCI can be used to check a single
-semantic patch. In that case, the variable must be initialized with
-the name of the semantic patch to apply.
-
-For instance:
-
-	make coccicheck COCCI=<my_SP.cocci> MODE=patch
-or
-	make coccicheck COCCI=<my_SP.cocci> MODE=report
-
-
- Controlling Which Files are Processed by Coccinelle
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-By default the entire kernel source tree is checked.
-
-To apply Coccinelle to a specific directory, M= can be used.
-For example, to check drivers/net/wireless/ one may write:
-
-    make coccicheck M=drivers/net/wireless/
-
-To apply Coccinelle on a file basis, instead of a directory basis, the
-following command may be used:
-
-    make C=1 CHECK="scripts/coccicheck"
-
-To check only newly edited code, use the value 2 for the C flag, i.e.
-
-    make C=2 CHECK="scripts/coccicheck"
-
-In these modes, which works on a file basis, there is no information
-about semantic patches displayed, and no commit message proposed.
-
-This runs every semantic patch in scripts/coccinelle by default. The
-COCCI variable may additionally be used to only apply a single
-semantic patch as shown in the previous section.
-
-The "report" mode is the default. You can select another one with the
-MODE variable explained above.
-
- Debugging Coccinelle SmPL patches
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Using coccicheck is best as it provides in the spatch command line
-include options matching the options used when we compile the kernel.
-You can learn what these options are by using V=1, you could then
-manually run Coccinelle with debug options added.
-
-Alternatively you can debug running Coccinelle against SmPL patches
-by asking for stderr to be redirected to stderr, by default stderr
-is redirected to /dev/null, if you'd like to capture stderr you
-can specify the DEBUG_FILE="file.txt" option to coccicheck. For
-instance:
-
-    rm -f cocci.err
-    make coccicheck COCCI=scripts/coccinelle/free/kfree.cocci MODE=report DEBUG_FILE=cocci.err
-    cat cocci.err
-
-You can use SPFLAGS to add debugging flags, for instance you may want to
-add both --profile --show-trying to SPFLAGS when debugging. For instance
-you may want to use:
-
-    rm -f err.log
-    export COCCI=scripts/coccinelle/misc/irqf_oneshot.cocci
-    make coccicheck DEBUG_FILE="err.log" MODE=report SPFLAGS="--profile --show-trying" M=./drivers/mfd/arizona-irq.c
-
-err.log will now have the profiling information, while stdout will
-provide some progress information as Coccinelle moves forward with
-work.
-
-DEBUG_FILE support is only supported when using coccinelle >= 1.2.
-
- .cocciconfig support
-~~~~~~~~~~~~~~~~~~~~~~
-
-Coccinelle supports reading .cocciconfig for default Coccinelle options that
-should be used every time spatch is spawned, the order of precedence for
-variables for .cocciconfig is as follows:
-
-  o Your current user's home directory is processed first
-  o Your directory from which spatch is called is processed next
-  o The directory provided with the --dir option is processed last, if used
-
-Since coccicheck runs through make, it naturally runs from the kernel
-proper dir, as such the second rule above would be implied for picking up a
-.cocciconfig when using 'make coccicheck'.
-
-'make coccicheck' also supports using M= targets.If you do not supply
-any M= target, it is assumed you want to target the entire kernel.
-The kernel coccicheck script has:
-
-    if [ "$KBUILD_EXTMOD" = "" ] ; then
-        OPTIONS="--dir $srctree $COCCIINCLUDE"
-    else
-        OPTIONS="--dir $KBUILD_EXTMOD $COCCIINCLUDE"
-    fi
-
-KBUILD_EXTMOD is set when an explicit target with M= is used. For both cases
-the spatch --dir argument is used, as such third rule applies when whether M=
-is used or not, and when M= is used the target directory can have its own
-.cocciconfig file. When M= is not passed as an argument to coccicheck the
-target directory is the same as the directory from where spatch was called.
-
-If not using the kernel's coccicheck target, keep the above precedence
-order logic of .cocciconfig reading. If using the kernel's coccicheck target,
-override any of the kernel's .coccicheck's settings using SPFLAGS.
-
-We help Coccinelle when used against Linux with a set of sensible defaults
-options for Linux with our own Linux .cocciconfig. This hints to coccinelle
-git can be used for 'git grep' queries over coccigrep. A timeout of 200
-seconds should suffice for now.
-
-The options picked up by coccinelle when reading a .cocciconfig do not appear
-as arguments to spatch processes running on your system, to confirm what
-options will be used by Coccinelle run:
-
-      spatch --print-options-only
-
-You can override with your own preferred index option by using SPFLAGS. Take
-note that when there are conflicting options Coccinelle takes precedence for
-the last options passed. Using .cocciconfig is possible to use idutils, however
-given the order of precedence followed by Coccinelle, since the kernel now
-carries its own .cocciconfig, you will need to use SPFLAGS to use idutils if
-desired. See below section "Additional flags" for more details on how to use
-idutils.
-
- Additional flags
-~~~~~~~~~~~~~~~~~~
-
-Additional flags can be passed to spatch through the SPFLAGS
-variable. This works as Coccinelle respects the last flags
-given to it when options are in conflict.
-
-    make SPFLAGS=--use-glimpse coccicheck
-
-Coccinelle supports idutils as well but requires coccinelle >= 1.0.6.
-When no ID file is specified coccinelle assumes your ID database file
-is in the file .id-utils.index on the top level of the kernel, coccinelle
-carries a script scripts/idutils_index.sh which creates the database with
-
-    mkid -i C --output .id-utils.index
-
-If you have another database filename you can also just symlink with this
-name.
-
-    make SPFLAGS=--use-idutils coccicheck
-
-Alternatively you can specify the database filename explicitly, for
-instance:
-
-    make SPFLAGS="--use-idutils /full-path/to/ID" coccicheck
-
-See spatch --help to learn more about spatch options.
-
-Note that the '--use-glimpse' and '--use-idutils' options
-require external tools for indexing the code. None of them is
-thus active by default. However, by indexing the code with
-one of these tools, and according to the cocci file used,
-spatch could proceed the entire code base more quickly.
-
- SmPL patch specific options
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-SmPL patches can have their own requirements for options passed
-to Coccinelle. SmPL patch specific options can be provided by
-providing them at the top of the SmPL patch, for instance:
-
-// Options: --no-includes --include-headers
-
- SmPL patch Coccinelle requirements
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-As Coccinelle features get added some more advanced SmPL patches
-may require newer versions of Coccinelle. If an SmPL patch requires
-at least a version of Coccinelle, this can be specified as follows,
-as an example if requiring at least Coccinelle >= 1.0.5:
-
-// Requires: 1.0.5
-
- Proposing new semantic patches
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-New semantic patches can be proposed and submitted by kernel
-developers. For sake of clarity, they should be organized in the
-sub-directories of 'scripts/coccinelle/'.
-
-
- Detailed description of the 'report' mode
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-'report' generates a list in the following format:
-  file:line:column-column: message
-
-Example:
-
-Running
-
-	make coccicheck MODE=report COCCI=scripts/coccinelle/api/err_cast.cocci
-
-will execute the following part of the SmPL script.
-
-<smpl>
-@r depends on !context && !patch && (org || report)@
-expression x;
-position p;
-@@
-
- ERR_PTR@p(PTR_ERR(x))
-
-@script:python depends on report@
-p << r.p;
-x << r.x;
-@@
-
-msg="ERR_CAST can be used with %s" % (x)
-coccilib.report.print_report(p[0], msg)
-</smpl>
-
-This SmPL excerpt generates entries on the standard output, as
-illustrated below:
-
-/home/user/linux/crypto/ctr.c:188:9-16: ERR_CAST can be used with alg
-/home/user/linux/crypto/authenc.c:619:9-16: ERR_CAST can be used with auth
-/home/user/linux/crypto/xts.c:227:9-16: ERR_CAST can be used with alg
-
-
- Detailed description of the 'patch' mode
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-When the 'patch' mode is available, it proposes a fix for each problem
-identified.
-
-Example:
-
-Running
-	make coccicheck MODE=patch COCCI=scripts/coccinelle/api/err_cast.cocci
-
-will execute the following part of the SmPL script.
-
-<smpl>
-@ depends on !context && patch && !org && !report @
-expression x;
-@@
-
-- ERR_PTR(PTR_ERR(x))
-+ ERR_CAST(x)
-</smpl>
-
-This SmPL excerpt generates patch hunks on the standard output, as
-illustrated below:
-
-diff -u -p a/crypto/ctr.c b/crypto/ctr.c
---- a/crypto/ctr.c 2010-05-26 10:49:38.000000000 +0200
-+++ b/crypto/ctr.c 2010-06-03 23:44:49.000000000 +0200
-@@ -185,7 +185,7 @@ static struct crypto_instance *crypto_ct
- 	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
- 				  CRYPTO_ALG_TYPE_MASK);
- 	if (IS_ERR(alg))
--		return ERR_PTR(PTR_ERR(alg));
-+		return ERR_CAST(alg);
- 
- 	/* Block size must be >= 4 bytes. */
- 	err = -EINVAL;
-
- Detailed description of the 'context' mode
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-'context' highlights lines of interest and their context
-in a diff-like style.
-
-NOTE: The diff-like output generated is NOT an applicable patch. The
-      intent of the 'context' mode is to highlight the important lines
-      (annotated with minus, '-') and gives some surrounding context
-      lines around. This output can be used with the diff mode of
-      Emacs to review the code.
-
-Example:
-
-Running
-	make coccicheck MODE=context COCCI=scripts/coccinelle/api/err_cast.cocci
-
-will execute the following part of the SmPL script.
-
-<smpl>
-@ depends on context && !patch && !org && !report@
-expression x;
-@@
-
-* ERR_PTR(PTR_ERR(x))
-</smpl>
-
-This SmPL excerpt generates diff hunks on the standard output, as
-illustrated below:
-
-diff -u -p /home/user/linux/crypto/ctr.c /tmp/nothing
---- /home/user/linux/crypto/ctr.c	2010-05-26 10:49:38.000000000 +0200
-+++ /tmp/nothing
-@@ -185,7 +185,6 @@ static struct crypto_instance *crypto_ct
- 	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
- 				  CRYPTO_ALG_TYPE_MASK);
- 	if (IS_ERR(alg))
--		return ERR_PTR(PTR_ERR(alg));
- 
- 	/* Block size must be >= 4 bytes. */
- 	err = -EINVAL;
-
- Detailed description of the 'org' mode
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-'org' generates a report in the Org mode format of Emacs.
-
-Example:
-
-Running
-	make coccicheck MODE=org COCCI=scripts/coccinelle/api/err_cast.cocci
-
-will execute the following part of the SmPL script.
-
-<smpl>
-@r depends on !context && !patch && (org || report)@
-expression x;
-position p;
-@@
-
- ERR_PTR@p(PTR_ERR(x))
-
-@script:python depends on org@
-p << r.p;
-x << r.x;
-@@
-
-msg="ERR_CAST can be used with %s" % (x)
-msg_safe=msg.replace("[","@(").replace("]",")")
-coccilib.org.print_todo(p[0], msg_safe)
-</smpl>
-
-This SmPL excerpt generates Org entries on the standard output, as
-illustrated below:
-
-* TODO [[view:/home/user/linux/crypto/ctr.c::face=ovl-face1::linb=188::colb=9::cole=16][ERR_CAST can be used with alg]]
-* TODO [[view:/home/user/linux/crypto/authenc.c::face=ovl-face1::linb=619::colb=9::cole=16][ERR_CAST can be used with auth]]
-* TODO [[view:/home/user/linux/crypto/xts.c::face=ovl-face1::linb=227::colb=9::cole=16][ERR_CAST can be used with alg]]
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 96b7aa6..0cc8765 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -14,11 +14,17 @@
 
 import sys
 import os
+import sphinx
+
+# Get Sphinx version
+major, minor, patch = map(int, sphinx.__version__.split("."))
+
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath('sphinx'))
+from load_config import loadConfig
 
 # -- General configuration ------------------------------------------------
 
@@ -28,14 +34,13 @@
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = ['kernel-doc', 'rstFlatTable', 'kernel_include']
+extensions = ['kernel-doc', 'rstFlatTable', 'kernel_include', 'cdomain']
 
-# Gracefully handle missing rst2pdf.
-try:
-    import rst2pdf
-    extensions += ['rst2pdf.pdfbuilder']
-except ImportError:
-    pass
+# The name of the math extension changed on Sphinx 1.4
+if minor > 3:
+    extensions.append("sphinx.ext.imgmath")
+else:
+    extensions.append("sphinx.ext.pngmath")
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -131,7 +136,7 @@
 todo_include_todos = False
 
 primary_domain = 'C'
-highlight_language = 'C'
+highlight_language = 'guess'
 
 # -- Options for HTML output ----------------------------------------------
 
@@ -252,23 +257,90 @@
 
 latex_elements = {
 # The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
+'papersize': 'a4paper',
 
 # The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
+'pointsize': '8pt',
 
 # Latex figure (float) alignment
 #'figure_align': 'htbp',
+
+# Don't mangle with UTF-8 chars
+'inputenc': '',
+'utf8extra': '',
+
+# Additional stuff for the LaTeX preamble.
+    'preamble': '''
+	% Adjust margins
+	\\usepackage[margin=0.5in, top=1in, bottom=1in]{geometry}
+
+        % Allow generate some pages in landscape
+        \\usepackage{lscape}
+
+        % Put notes in color and let them be inside a table
+	\\definecolor{NoteColor}{RGB}{204,255,255}
+	\\definecolor{WarningColor}{RGB}{255,204,204}
+	\\definecolor{AttentionColor}{RGB}{255,255,204}
+	\\definecolor{OtherColor}{RGB}{204,204,204}
+        \\newlength{\\mynoticelength}
+        \\makeatletter\\newenvironment{coloredbox}[1]{%
+	   \\setlength{\\fboxrule}{1pt}
+	   \\setlength{\\fboxsep}{7pt}
+	   \\setlength{\\mynoticelength}{\\linewidth}
+	   \\addtolength{\\mynoticelength}{-2\\fboxsep}
+	   \\addtolength{\\mynoticelength}{-2\\fboxrule}
+           \\begin{lrbox}{\\@tempboxa}\\begin{minipage}{\\mynoticelength}}{\\end{minipage}\\end{lrbox}%
+	   \\ifthenelse%
+	      {\\equal{\\py@noticetype}{note}}%
+	      {\\colorbox{NoteColor}{\\usebox{\\@tempboxa}}}%
+	      {%
+	         \\ifthenelse%
+	         {\\equal{\\py@noticetype}{warning}}%
+	         {\\colorbox{WarningColor}{\\usebox{\\@tempboxa}}}%
+		 {%
+	            \\ifthenelse%
+	            {\\equal{\\py@noticetype}{attention}}%
+	            {\\colorbox{AttentionColor}{\\usebox{\\@tempboxa}}}%
+	            {\\colorbox{OtherColor}{\\usebox{\\@tempboxa}}}%
+		 }%
+	      }%
+        }\\makeatother
+
+        \\makeatletter
+        \\renewenvironment{notice}[2]{%
+          \\def\\py@noticetype{#1}
+          \\begin{coloredbox}{#1}
+          \\bf\\it
+          \\par\\strong{#2}
+          \\csname py@noticestart@#1\\endcsname
+        }
+	{
+          \\csname py@noticeend@\\py@noticetype\\endcsname
+          \\end{coloredbox}
+        }
+	\\makeatother
+
+	% Use some font with UTF-8 support with XeLaTeX
+        \\usepackage{fontspec}
+        \\setsansfont{DejaVu Serif}
+        \\setromanfont{DejaVu Sans}
+        \\setmonofont{DejaVu Sans Mono}
+
+	% To allow adjusting table sizes
+	\\usepackage{adjustbox}
+
+     '''
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'TheLinuxKernel.tex', 'The Linux Kernel Documentation',
+    ('kernel-documentation', 'kernel-documentation.tex', 'The Linux Kernel Documentation',
+     'The kernel development community', 'manual'),
+    ('development-process/index', 'development-process.tex', 'Linux Kernel Development Documentation',
+     'The kernel development community', 'manual'),
+    ('gpu/index', 'gpu.tex', 'Linux GPU Driver Developer\'s Guide',
      'The kernel development community', 'manual'),
 ]
 
@@ -419,3 +491,9 @@
 # line arguments.
 kerneldoc_bin = '../scripts/kernel-doc'
 kerneldoc_srctree = '..'
+
+# ------------------------------------------------------------------------------
+# Since loadConfig overwrites settings from the global namespace, it has to be
+# the last statement in the conf.py file
+# ------------------------------------------------------------------------------
+loadConfig(globals())
diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt
index fc64749..8d9773f 100644
--- a/Documentation/cpu-freq/cpufreq-stats.txt
+++ b/Documentation/cpu-freq/cpufreq-stats.txt
@@ -103,7 +103,7 @@
 	Power management options (ACPI, APM)  --->
 		CPU Frequency scaling  --->
 			[*] CPU Frequency scaling
-			<*>   CPU frequency translation statistics 
+			[*]   CPU frequency translation statistics
 			[*]     CPU frequency translation statistics details
 
 
diff --git a/Documentation/dev-tools/coccinelle.rst b/Documentation/dev-tools/coccinelle.rst
new file mode 100644
index 0000000..4a64b4c
--- /dev/null
+++ b/Documentation/dev-tools/coccinelle.rst
@@ -0,0 +1,491 @@
+.. Copyright 2010 Nicolas Palix <npalix@diku.dk>
+.. Copyright 2010 Julia Lawall <julia@diku.dk>
+.. Copyright 2010 Gilles Muller <Gilles.Muller@lip6.fr>
+
+.. highlight:: none
+
+Coccinelle
+==========
+
+Coccinelle is a tool for pattern matching and text transformation that has
+many uses in kernel development, including the application of complex,
+tree-wide patches and detection of problematic programming patterns.
+
+Getting Coccinelle
+-------------------
+
+The semantic patches included in the kernel use features and options
+which are provided by Coccinelle version 1.0.0-rc11 and above.
+Using earlier versions will fail as the option names used by
+the Coccinelle files and coccicheck have been updated.
+
+Coccinelle is available through the package manager
+of many distributions, e.g. :
+
+ - Debian
+ - Fedora
+ - Ubuntu
+ - OpenSUSE
+ - Arch Linux
+ - NetBSD
+ - FreeBSD
+
+You can get the latest version released from the Coccinelle homepage at
+http://coccinelle.lip6.fr/
+
+Information and tips about Coccinelle are also provided on the wiki
+pages at http://cocci.ekstranet.diku.dk/wiki/doku.php
+
+Once you have it, run the following command::
+
+     	./configure
+        make
+
+as a regular user, and install it with::
+
+        sudo make install
+
+Supplemental documentation
+---------------------------
+
+For supplemental documentation refer to the wiki:
+
+https://bottest.wiki.kernel.org/coccicheck
+
+The wiki documentation always refers to the linux-next version of the script.
+
+Using Coccinelle on the Linux kernel
+------------------------------------
+
+A Coccinelle-specific target is defined in the top level
+Makefile. This target is named ``coccicheck`` and calls the ``coccicheck``
+front-end in the ``scripts`` directory.
+
+Four basic modes are defined: ``patch``, ``report``, ``context``, and
+``org``. The mode to use is specified by setting the MODE variable with
+``MODE=<mode>``.
+
+- ``patch`` proposes a fix, when possible.
+
+- ``report`` generates a list in the following format:
+  file:line:column-column: message
+
+- ``context`` highlights lines of interest and their context in a
+  diff-like style.Lines of interest are indicated with ``-``.
+
+- ``org`` generates a report in the Org mode format of Emacs.
+
+Note that not all semantic patches implement all modes. For easy use
+of Coccinelle, the default mode is "report".
+
+Two other modes provide some common combinations of these modes.
+
+- ``chain`` tries the previous modes in the order above until one succeeds.
+
+- ``rep+ctxt`` runs successively the report mode and the context mode.
+  It should be used with the C option (described later)
+  which checks the code on a file basis.
+
+Examples
+~~~~~~~~
+
+To make a report for every semantic patch, run the following command::
+
+		make coccicheck MODE=report
+
+To produce patches, run::
+
+		make coccicheck MODE=patch
+
+
+The coccicheck target applies every semantic patch available in the
+sub-directories of ``scripts/coccinelle`` to the entire Linux kernel.
+
+For each semantic patch, a commit message is proposed.  It gives a
+description of the problem being checked by the semantic patch, and
+includes a reference to Coccinelle.
+
+As any static code analyzer, Coccinelle produces false
+positives. Thus, reports must be carefully checked, and patches
+reviewed.
+
+To enable verbose messages set the V= variable, for example::
+
+   make coccicheck MODE=report V=1
+
+Coccinelle parallelization
+---------------------------
+
+By default, coccicheck tries to run as parallel as possible. To change
+the parallelism, set the J= variable. For example, to run across 4 CPUs::
+
+   make coccicheck MODE=report J=4
+
+As of Coccinelle 1.0.2 Coccinelle uses Ocaml parmap for parallelization,
+if support for this is detected you will benefit from parmap parallelization.
+
+When parmap is enabled coccicheck will enable dynamic load balancing by using
+``--chunksize 1`` argument, this ensures we keep feeding threads with work
+one by one, so that we avoid the situation where most work gets done by only
+a few threads. With dynamic load balancing, if a thread finishes early we keep
+feeding it more work.
+
+When parmap is enabled, if an error occurs in Coccinelle, this error
+value is propagated back, the return value of the ``make coccicheck``
+captures this return value.
+
+Using Coccinelle with a single semantic patch
+---------------------------------------------
+
+The optional make variable COCCI can be used to check a single
+semantic patch. In that case, the variable must be initialized with
+the name of the semantic patch to apply.
+
+For instance::
+
+	make coccicheck COCCI=<my_SP.cocci> MODE=patch
+
+or::
+
+	make coccicheck COCCI=<my_SP.cocci> MODE=report
+
+
+Controlling Which Files are Processed by Coccinelle
+---------------------------------------------------
+
+By default the entire kernel source tree is checked.
+
+To apply Coccinelle to a specific directory, ``M=`` can be used.
+For example, to check drivers/net/wireless/ one may write::
+
+    make coccicheck M=drivers/net/wireless/
+
+To apply Coccinelle on a file basis, instead of a directory basis, the
+following command may be used::
+
+    make C=1 CHECK="scripts/coccicheck"
+
+To check only newly edited code, use the value 2 for the C flag, i.e.::
+
+    make C=2 CHECK="scripts/coccicheck"
+
+In these modes, which works on a file basis, there is no information
+about semantic patches displayed, and no commit message proposed.
+
+This runs every semantic patch in scripts/coccinelle by default. The
+COCCI variable may additionally be used to only apply a single
+semantic patch as shown in the previous section.
+
+The "report" mode is the default. You can select another one with the
+MODE variable explained above.
+
+Debugging Coccinelle SmPL patches
+---------------------------------
+
+Using coccicheck is best as it provides in the spatch command line
+include options matching the options used when we compile the kernel.
+You can learn what these options are by using V=1, you could then
+manually run Coccinelle with debug options added.
+
+Alternatively you can debug running Coccinelle against SmPL patches
+by asking for stderr to be redirected to stderr, by default stderr
+is redirected to /dev/null, if you'd like to capture stderr you
+can specify the ``DEBUG_FILE="file.txt"`` option to coccicheck. For
+instance::
+
+    rm -f cocci.err
+    make coccicheck COCCI=scripts/coccinelle/free/kfree.cocci MODE=report DEBUG_FILE=cocci.err
+    cat cocci.err
+
+You can use SPFLAGS to add debugging flags, for instance you may want to
+add both --profile --show-trying to SPFLAGS when debugging. For instance
+you may want to use::
+
+    rm -f err.log
+    export COCCI=scripts/coccinelle/misc/irqf_oneshot.cocci
+    make coccicheck DEBUG_FILE="err.log" MODE=report SPFLAGS="--profile --show-trying" M=./drivers/mfd/arizona-irq.c
+
+err.log will now have the profiling information, while stdout will
+provide some progress information as Coccinelle moves forward with
+work.
+
+DEBUG_FILE support is only supported when using coccinelle >= 1.2.
+
+.cocciconfig support
+--------------------
+
+Coccinelle supports reading .cocciconfig for default Coccinelle options that
+should be used every time spatch is spawned, the order of precedence for
+variables for .cocciconfig is as follows:
+
+- Your current user's home directory is processed first
+- Your directory from which spatch is called is processed next
+- The directory provided with the --dir option is processed last, if used
+
+Since coccicheck runs through make, it naturally runs from the kernel
+proper dir, as such the second rule above would be implied for picking up a
+.cocciconfig when using ``make coccicheck``.
+
+``make coccicheck`` also supports using M= targets.If you do not supply
+any M= target, it is assumed you want to target the entire kernel.
+The kernel coccicheck script has::
+
+    if [ "$KBUILD_EXTMOD" = "" ] ; then
+        OPTIONS="--dir $srctree $COCCIINCLUDE"
+    else
+        OPTIONS="--dir $KBUILD_EXTMOD $COCCIINCLUDE"
+    fi
+
+KBUILD_EXTMOD is set when an explicit target with M= is used. For both cases
+the spatch --dir argument is used, as such third rule applies when whether M=
+is used or not, and when M= is used the target directory can have its own
+.cocciconfig file. When M= is not passed as an argument to coccicheck the
+target directory is the same as the directory from where spatch was called.
+
+If not using the kernel's coccicheck target, keep the above precedence
+order logic of .cocciconfig reading. If using the kernel's coccicheck target,
+override any of the kernel's .coccicheck's settings using SPFLAGS.
+
+We help Coccinelle when used against Linux with a set of sensible defaults
+options for Linux with our own Linux .cocciconfig. This hints to coccinelle
+git can be used for ``git grep`` queries over coccigrep. A timeout of 200
+seconds should suffice for now.
+
+The options picked up by coccinelle when reading a .cocciconfig do not appear
+as arguments to spatch processes running on your system, to confirm what
+options will be used by Coccinelle run::
+
+      spatch --print-options-only
+
+You can override with your own preferred index option by using SPFLAGS. Take
+note that when there are conflicting options Coccinelle takes precedence for
+the last options passed. Using .cocciconfig is possible to use idutils, however
+given the order of precedence followed by Coccinelle, since the kernel now
+carries its own .cocciconfig, you will need to use SPFLAGS to use idutils if
+desired. See below section "Additional flags" for more details on how to use
+idutils.
+
+Additional flags
+----------------
+
+Additional flags can be passed to spatch through the SPFLAGS
+variable. This works as Coccinelle respects the last flags
+given to it when options are in conflict. ::
+
+    make SPFLAGS=--use-glimpse coccicheck
+
+Coccinelle supports idutils as well but requires coccinelle >= 1.0.6.
+When no ID file is specified coccinelle assumes your ID database file
+is in the file .id-utils.index on the top level of the kernel, coccinelle
+carries a script scripts/idutils_index.sh which creates the database with::
+
+    mkid -i C --output .id-utils.index
+
+If you have another database filename you can also just symlink with this
+name. ::
+
+    make SPFLAGS=--use-idutils coccicheck
+
+Alternatively you can specify the database filename explicitly, for
+instance::
+
+    make SPFLAGS="--use-idutils /full-path/to/ID" coccicheck
+
+See ``spatch --help`` to learn more about spatch options.
+
+Note that the ``--use-glimpse`` and ``--use-idutils`` options
+require external tools for indexing the code. None of them is
+thus active by default. However, by indexing the code with
+one of these tools, and according to the cocci file used,
+spatch could proceed the entire code base more quickly.
+
+SmPL patch specific options
+---------------------------
+
+SmPL patches can have their own requirements for options passed
+to Coccinelle. SmPL patch specific options can be provided by
+providing them at the top of the SmPL patch, for instance::
+
+	// Options: --no-includes --include-headers
+
+SmPL patch Coccinelle requirements
+----------------------------------
+
+As Coccinelle features get added some more advanced SmPL patches
+may require newer versions of Coccinelle. If an SmPL patch requires
+at least a version of Coccinelle, this can be specified as follows,
+as an example if requiring at least Coccinelle >= 1.0.5::
+
+	// Requires: 1.0.5
+
+Proposing new semantic patches
+-------------------------------
+
+New semantic patches can be proposed and submitted by kernel
+developers. For sake of clarity, they should be organized in the
+sub-directories of ``scripts/coccinelle/``.
+
+
+Detailed description of the ``report`` mode
+-------------------------------------------
+
+``report`` generates a list in the following format::
+
+  file:line:column-column: message
+
+Example
+~~~~~~~
+
+Running::
+
+	make coccicheck MODE=report COCCI=scripts/coccinelle/api/err_cast.cocci
+
+will execute the following part of the SmPL script::
+
+   <smpl>
+   @r depends on !context && !patch && (org || report)@
+   expression x;
+   position p;
+   @@
+
+     ERR_PTR@p(PTR_ERR(x))
+
+   @script:python depends on report@
+   p << r.p;
+   x << r.x;
+   @@
+
+   msg="ERR_CAST can be used with %s" % (x)
+   coccilib.report.print_report(p[0], msg)
+   </smpl>
+
+This SmPL excerpt generates entries on the standard output, as
+illustrated below::
+
+    /home/user/linux/crypto/ctr.c:188:9-16: ERR_CAST can be used with alg
+    /home/user/linux/crypto/authenc.c:619:9-16: ERR_CAST can be used with auth
+    /home/user/linux/crypto/xts.c:227:9-16: ERR_CAST can be used with alg
+
+
+Detailed description of the ``patch`` mode
+------------------------------------------
+
+When the ``patch`` mode is available, it proposes a fix for each problem
+identified.
+
+Example
+~~~~~~~
+
+Running::
+
+	make coccicheck MODE=patch COCCI=scripts/coccinelle/api/err_cast.cocci
+
+will execute the following part of the SmPL script::
+
+    <smpl>
+    @ depends on !context && patch && !org && !report @
+    expression x;
+    @@
+
+    - ERR_PTR(PTR_ERR(x))
+    + ERR_CAST(x)
+    </smpl>
+
+This SmPL excerpt generates patch hunks on the standard output, as
+illustrated below::
+
+    diff -u -p a/crypto/ctr.c b/crypto/ctr.c
+    --- a/crypto/ctr.c 2010-05-26 10:49:38.000000000 +0200
+    +++ b/crypto/ctr.c 2010-06-03 23:44:49.000000000 +0200
+    @@ -185,7 +185,7 @@ static struct crypto_instance *crypto_ct
+ 	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
+ 				  CRYPTO_ALG_TYPE_MASK);
+ 	if (IS_ERR(alg))
+    -		return ERR_PTR(PTR_ERR(alg));
+    +		return ERR_CAST(alg);
+
+ 	/* Block size must be >= 4 bytes. */
+ 	err = -EINVAL;
+
+Detailed description of the ``context`` mode
+--------------------------------------------
+
+``context`` highlights lines of interest and their context
+in a diff-like style.
+
+      **NOTE**: The diff-like output generated is NOT an applicable patch. The
+      intent of the ``context`` mode is to highlight the important lines
+      (annotated with minus, ``-``) and gives some surrounding context
+      lines around. This output can be used with the diff mode of
+      Emacs to review the code.
+
+Example
+~~~~~~~
+
+Running::
+
+	make coccicheck MODE=context COCCI=scripts/coccinelle/api/err_cast.cocci
+
+will execute the following part of the SmPL script::
+
+    <smpl>
+    @ depends on context && !patch && !org && !report@
+    expression x;
+    @@
+
+    * ERR_PTR(PTR_ERR(x))
+    </smpl>
+
+This SmPL excerpt generates diff hunks on the standard output, as
+illustrated below::
+
+    diff -u -p /home/user/linux/crypto/ctr.c /tmp/nothing
+    --- /home/user/linux/crypto/ctr.c	2010-05-26 10:49:38.000000000 +0200
+    +++ /tmp/nothing
+    @@ -185,7 +185,6 @@ static struct crypto_instance *crypto_ct
+ 	alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
+ 				  CRYPTO_ALG_TYPE_MASK);
+ 	if (IS_ERR(alg))
+    -		return ERR_PTR(PTR_ERR(alg));
+
+ 	/* Block size must be >= 4 bytes. */
+ 	err = -EINVAL;
+
+Detailed description of the ``org`` mode
+----------------------------------------
+
+``org`` generates a report in the Org mode format of Emacs.
+
+Example
+~~~~~~~
+
+Running::
+
+	make coccicheck MODE=org COCCI=scripts/coccinelle/api/err_cast.cocci
+
+will execute the following part of the SmPL script::
+
+    <smpl>
+    @r depends on !context && !patch && (org || report)@
+    expression x;
+    position p;
+    @@
+
+      ERR_PTR@p(PTR_ERR(x))
+
+    @script:python depends on org@
+    p << r.p;
+    x << r.x;
+    @@
+
+    msg="ERR_CAST can be used with %s" % (x)
+    msg_safe=msg.replace("[","@(").replace("]",")")
+    coccilib.org.print_todo(p[0], msg_safe)
+    </smpl>
+
+This SmPL excerpt generates Org entries on the standard output, as
+illustrated below::
+
+    * TODO [[view:/home/user/linux/crypto/ctr.c::face=ovl-face1::linb=188::colb=9::cole=16][ERR_CAST can be used with alg]]
+    * TODO [[view:/home/user/linux/crypto/authenc.c::face=ovl-face1::linb=619::colb=9::cole=16][ERR_CAST can be used with auth]]
+    * TODO [[view:/home/user/linux/crypto/xts.c::face=ovl-face1::linb=227::colb=9::cole=16][ERR_CAST can be used with alg]]
diff --git a/Documentation/dev-tools/gcov.rst b/Documentation/dev-tools/gcov.rst
new file mode 100644
index 0000000..19eedfe
--- /dev/null
+++ b/Documentation/dev-tools/gcov.rst
@@ -0,0 +1,256 @@
+Using gcov with the Linux kernel
+================================
+
+gcov profiling kernel support enables the use of GCC's coverage testing
+tool gcov_ with the Linux kernel. Coverage data of a running kernel
+is exported in gcov-compatible format via the "gcov" debugfs directory.
+To get coverage data for a specific file, change to the kernel build
+directory and use gcov with the ``-o`` option as follows (requires root)::
+
+    # cd /tmp/linux-out
+    # gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c
+
+This will create source code files annotated with execution counts
+in the current directory. In addition, graphical gcov front-ends such
+as lcov_ can be used to automate the process of collecting data
+for the entire kernel and provide coverage overviews in HTML format.
+
+Possible uses:
+
+* debugging (has this line been reached at all?)
+* test improvement (how do I change my test to cover these lines?)
+* minimizing kernel configurations (do I need this option if the
+  associated code is never run?)
+
+.. _gcov: http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+.. _lcov: http://ltp.sourceforge.net/coverage/lcov.php
+
+
+Preparation
+-----------
+
+Configure the kernel with::
+
+        CONFIG_DEBUG_FS=y
+        CONFIG_GCOV_KERNEL=y
+
+select the gcc's gcov format, default is autodetect based on gcc version::
+
+        CONFIG_GCOV_FORMAT_AUTODETECT=y
+
+and to get coverage data for the entire kernel::
+
+        CONFIG_GCOV_PROFILE_ALL=y
+
+Note that kernels compiled with profiling flags will be significantly
+larger and run slower. Also CONFIG_GCOV_PROFILE_ALL may not be supported
+on all architectures.
+
+Profiling data will only become accessible once debugfs has been
+mounted::
+
+        mount -t debugfs none /sys/kernel/debug
+
+
+Customization
+-------------
+
+To enable profiling for specific files or directories, add a line
+similar to the following to the respective kernel Makefile:
+
+- For a single file (e.g. main.o)::
+
+	GCOV_PROFILE_main.o := y
+
+- For all files in one directory::
+
+	GCOV_PROFILE := y
+
+To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
+is specified, use::
+
+	GCOV_PROFILE_main.o := n
+
+and::
+
+	GCOV_PROFILE := n
+
+Only files which are linked to the main kernel image or are compiled as
+kernel modules are supported by this mechanism.
+
+
+Files
+-----
+
+The gcov kernel support creates the following files in debugfs:
+
+``/sys/kernel/debug/gcov``
+	Parent directory for all gcov-related files.
+
+``/sys/kernel/debug/gcov/reset``
+	Global reset file: resets all coverage data to zero when
+        written to.
+
+``/sys/kernel/debug/gcov/path/to/compile/dir/file.gcda``
+	The actual gcov data file as understood by the gcov
+        tool. Resets file coverage data to zero when written to.
+
+``/sys/kernel/debug/gcov/path/to/compile/dir/file.gcno``
+	Symbolic link to a static data file required by the gcov
+        tool. This file is generated by gcc when compiling with
+        option ``-ftest-coverage``.
+
+
+Modules
+-------
+
+Kernel modules may contain cleanup code which is only run during
+module unload time. The gcov mechanism provides a means to collect
+coverage data for such code by keeping a copy of the data associated
+with the unloaded module. This data remains available through debugfs.
+Once the module is loaded again, the associated coverage counters are
+initialized with the data from its previous instantiation.
+
+This behavior can be deactivated by specifying the gcov_persist kernel
+parameter::
+
+        gcov_persist=0
+
+At run-time, a user can also choose to discard data for an unloaded
+module by writing to its data file or the global reset file.
+
+
+Separated build and test machines
+---------------------------------
+
+The gcov kernel profiling infrastructure is designed to work out-of-the
+box for setups where kernels are built and run on the same machine. In
+cases where the kernel runs on a separate machine, special preparations
+must be made, depending on where the gcov tool is used:
+
+a) gcov is run on the TEST machine
+
+    The gcov tool version on the test machine must be compatible with the
+    gcc version used for kernel build. Also the following files need to be
+    copied from build to test machine:
+
+    from the source tree:
+      - all C source files + headers
+
+    from the build tree:
+      - all C source files + headers
+      - all .gcda and .gcno files
+      - all links to directories
+
+    It is important to note that these files need to be placed into the
+    exact same file system location on the test machine as on the build
+    machine. If any of the path components is symbolic link, the actual
+    directory needs to be used instead (due to make's CURDIR handling).
+
+b) gcov is run on the BUILD machine
+
+    The following files need to be copied after each test case from test
+    to build machine:
+
+    from the gcov directory in sysfs:
+      - all .gcda files
+      - all links to .gcno files
+
+    These files can be copied to any location on the build machine. gcov
+    must then be called with the -o option pointing to that directory.
+
+    Example directory setup on the build machine::
+
+      /tmp/linux:    kernel source tree
+      /tmp/out:      kernel build directory as specified by make O=
+      /tmp/coverage: location of the files copied from the test machine
+
+      [user@build] cd /tmp/out
+      [user@build] gcov -o /tmp/coverage/tmp/out/init main.c
+
+
+Troubleshooting
+---------------
+
+Problem
+    Compilation aborts during linker step.
+
+Cause
+    Profiling flags are specified for source files which are not
+    linked to the main kernel or which are linked by a custom
+    linker procedure.
+
+Solution
+    Exclude affected source files from profiling by specifying
+    ``GCOV_PROFILE := n`` or ``GCOV_PROFILE_basename.o := n`` in the
+    corresponding Makefile.
+
+Problem
+    Files copied from sysfs appear empty or incomplete.
+
+Cause
+    Due to the way seq_file works, some tools such as cp or tar
+    may not correctly copy files from sysfs.
+
+Solution
+    Use ``cat``' to read ``.gcda`` files and ``cp -d`` to copy links.
+    Alternatively use the mechanism shown in Appendix B.
+
+
+Appendix A: gather_on_build.sh
+------------------------------
+
+Sample script to gather coverage meta files on the build machine
+(see 6a)::
+
+    #!/bin/bash
+
+    KSRC=$1
+    KOBJ=$2
+    DEST=$3
+
+    if [ -z "$KSRC" ] || [ -z "$KOBJ" ] || [ -z "$DEST" ]; then
+      echo "Usage: $0 <ksrc directory> <kobj directory> <output.tar.gz>" >&2
+      exit 1
+    fi
+
+    KSRC=$(cd $KSRC; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
+    KOBJ=$(cd $KOBJ; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
+
+    find $KSRC $KOBJ \( -name '*.gcno' -o -name '*.[ch]' -o -type l \) -a \
+                     -perm /u+r,g+r | tar cfz $DEST -P -T -
+
+    if [ $? -eq 0 ] ; then
+      echo "$DEST successfully created, copy to test system and unpack with:"
+      echo "  tar xfz $DEST -P"
+    else
+      echo "Could not create file $DEST"
+    fi
+
+
+Appendix B: gather_on_test.sh
+-----------------------------
+
+Sample script to gather coverage data files on the test machine
+(see 6b)::
+
+    #!/bin/bash -e
+
+    DEST=$1
+    GCDA=/sys/kernel/debug/gcov
+
+    if [ -z "$DEST" ] ; then
+      echo "Usage: $0 <output.tar.gz>" >&2
+      exit 1
+    fi
+
+    TEMPDIR=$(mktemp -d)
+    echo Collecting data..
+    find $GCDA -type d -exec mkdir -p $TEMPDIR/\{\} \;
+    find $GCDA -name '*.gcda' -exec sh -c 'cat < $0 > '$TEMPDIR'/$0' {} \;
+    find $GCDA -name '*.gcno' -exec sh -c 'cp -d $0 '$TEMPDIR'/$0' {} \;
+    tar czf $DEST -C $TEMPDIR sys
+    rm -rf $TEMPDIR
+
+    echo "$DEST successfully created, copy to build system and unpack with:"
+    echo "  tar xfz $DEST"
diff --git a/Documentation/gdb-kernel-debugging.txt b/Documentation/dev-tools/gdb-kernel-debugging.rst
similarity index 72%
rename from Documentation/gdb-kernel-debugging.txt
rename to Documentation/dev-tools/gdb-kernel-debugging.rst
index 7050ce8..5e93c9b 100644
--- a/Documentation/gdb-kernel-debugging.txt
+++ b/Documentation/dev-tools/gdb-kernel-debugging.rst
@@ -1,3 +1,5 @@
+.. highlight:: none
+
 Debugging kernel and modules via gdb
 ====================================
 
@@ -13,54 +15,58 @@
 Requirements
 ------------
 
- o gdb 7.2+ (recommended: 7.4+) with python support enabled (typically true
-   for distributions)
+- gdb 7.2+ (recommended: 7.4+) with python support enabled (typically true
+  for distributions)
 
 
 Setup
 -----
 
- o Create a virtual Linux machine for QEMU/KVM (see www.linux-kvm.org and
-   www.qemu.org for more details). For cross-development,
-   http://landley.net/aboriginal/bin keeps a pool of machine images and
-   toolchains that can be helpful to start from.
+- Create a virtual Linux machine for QEMU/KVM (see www.linux-kvm.org and
+  www.qemu.org for more details). For cross-development,
+  http://landley.net/aboriginal/bin keeps a pool of machine images and
+  toolchains that can be helpful to start from.
 
- o Build the kernel with CONFIG_GDB_SCRIPTS enabled, but leave
-   CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports
-   CONFIG_FRAME_POINTER, keep it enabled.
+- Build the kernel with CONFIG_GDB_SCRIPTS enabled, but leave
+  CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports
+  CONFIG_FRAME_POINTER, keep it enabled.
 
- o Install that kernel on the guest.
+- Install that kernel on the guest.
+  Alternatively, QEMU allows to boot the kernel directly using -kernel,
+  -append, -initrd command line switches. This is generally only useful if
+  you do not depend on modules. See QEMU documentation for more details on
+  this mode.
 
-   Alternatively, QEMU allows to boot the kernel directly using -kernel,
-   -append, -initrd command line switches. This is generally only useful if
-   you do not depend on modules. See QEMU documentation for more details on
-   this mode.
+- Enable the gdb stub of QEMU/KVM, either
 
- o Enable the gdb stub of QEMU/KVM, either
     - at VM startup time by appending "-s" to the QEMU command line
-   or
+
+  or
+
     - during runtime by issuing "gdbserver" from the QEMU monitor
       console
 
- o cd /path/to/linux-build
+- cd /path/to/linux-build
 
- o Start gdb: gdb vmlinux
+- Start gdb: gdb vmlinux
 
-   Note: Some distros may restrict auto-loading of gdb scripts to known safe
-   directories. In case gdb reports to refuse loading vmlinux-gdb.py, add
+  Note: Some distros may restrict auto-loading of gdb scripts to known safe
+  directories. In case gdb reports to refuse loading vmlinux-gdb.py, add::
 
     add-auto-load-safe-path /path/to/linux-build
 
-   to ~/.gdbinit. See gdb help for more details.
+  to ~/.gdbinit. See gdb help for more details.
 
- o Attach to the booted guest:
+- Attach to the booted guest::
+
     (gdb) target remote :1234
 
 
 Examples of using the Linux-provided gdb helpers
 ------------------------------------------------
 
- o Load module (and main kernel) symbols:
+- Load module (and main kernel) symbols::
+
     (gdb) lx-symbols
     loading vmlinux
     scanning for modules in /home/user/linux/build
@@ -72,17 +78,20 @@
     ...
     loading @0xffffffffa0000000: /home/user/linux/build/drivers/ata/ata_generic.ko
 
- o Set a breakpoint on some not yet loaded module function, e.g.:
+- Set a breakpoint on some not yet loaded module function, e.g.::
+
     (gdb) b btrfs_init_sysfs
     Function "btrfs_init_sysfs" not defined.
     Make breakpoint pending on future shared library load? (y or [n]) y
     Breakpoint 1 (btrfs_init_sysfs) pending.
 
- o Continue the target
+- Continue the target::
+
     (gdb) c
 
- o Load the module on the target and watch the symbols being loaded as well as
-   the breakpoint hit:
+- Load the module on the target and watch the symbols being loaded as well as
+  the breakpoint hit::
+
     loading @0xffffffffa0034000: /home/user/linux/build/lib/libcrc32c.ko
     loading @0xffffffffa0050000: /home/user/linux/build/lib/lzo/lzo_compress.ko
     loading @0xffffffffa006e000: /home/user/linux/build/lib/zlib_deflate/zlib_deflate.ko
@@ -91,7 +100,8 @@
     Breakpoint 1, btrfs_init_sysfs () at /home/user/linux/fs/btrfs/sysfs.c:36
     36              btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
 
- o Dump the log buffer of the target kernel:
+- Dump the log buffer of the target kernel::
+
     (gdb) lx-dmesg
     [     0.000000] Initializing cgroup subsys cpuset
     [     0.000000] Initializing cgroup subsys cpu
@@ -102,19 +112,22 @@
     [     0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
     ....
 
- o Examine fields of the current task struct:
+- Examine fields of the current task struct::
+
     (gdb) p $lx_current().pid
     $1 = 4998
     (gdb) p $lx_current().comm
     $2 = "modprobe\000\000\000\000\000\000\000"
 
- o Make use of the per-cpu function for the current or a specified CPU:
+- Make use of the per-cpu function for the current or a specified CPU::
+
     (gdb) p $lx_per_cpu("runqueues").nr_running
     $3 = 1
     (gdb) p $lx_per_cpu("runqueues", 2).nr_running
     $4 = 0
 
- o Dig into hrtimers using the container_of helper:
+- Dig into hrtimers using the container_of helper::
+
     (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next
     (gdb) p *$container_of($next, "struct hrtimer", "node")
     $5 = {
@@ -144,7 +157,7 @@
 ------------------------------
 
 The number of commands and convenience functions may evolve over the time,
-this is just a snapshot of the initial version:
+this is just a snapshot of the initial version::
 
  (gdb) apropos lx
  function lx_current -- Return current task
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
new file mode 100644
index 0000000..f7a18f2
--- /dev/null
+++ b/Documentation/dev-tools/kasan.rst
@@ -0,0 +1,173 @@
+The Kernel Address Sanitizer (KASAN)
+====================================
+
+Overview
+--------
+
+KernelAddressSANitizer (KASAN) is a dynamic memory error detector. It provides
+a fast and comprehensive solution for finding use-after-free and out-of-bounds
+bugs.
+
+KASAN uses compile-time instrumentation for checking every memory access,
+therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
+required for detection of out-of-bounds accesses to stack or global variables.
+
+Currently KASAN is supported only for the x86_64 and arm64 architectures.
+
+Usage
+-----
+
+To enable KASAN configure kernel with::
+
+	  CONFIG_KASAN = y
+
+and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline and
+inline are compiler instrumentation types. The former produces smaller binary
+the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
+version 5.0 or later.
+
+KASAN works with both SLUB and SLAB memory allocators.
+For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
+
+To disable instrumentation for specific files or directories, add a line
+similar to the following to the respective kernel Makefile:
+
+- For a single file (e.g. main.o)::
+
+    KASAN_SANITIZE_main.o := n
+
+- For all files in one directory::
+
+    KASAN_SANITIZE := n
+
+Error reports
+~~~~~~~~~~~~~
+
+A typical out of bounds access report looks like this::
+
+    ==================================================================
+    BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3
+    Write of size 1 by task modprobe/1689
+    =============================================================================
+    BUG kmalloc-128 (Not tainted): kasan error
+    -----------------------------------------------------------------------------
+
+    Disabling lock debugging due to kernel taint
+    INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689
+     __slab_alloc+0x4b4/0x4f0
+     kmem_cache_alloc_trace+0x10b/0x190
+     kmalloc_oob_right+0x3d/0x75 [test_kasan]
+     init_module+0x9/0x47 [test_kasan]
+     do_one_initcall+0x99/0x200
+     load_module+0x2cb3/0x3b20
+     SyS_finit_module+0x76/0x80
+     system_call_fastpath+0x12/0x17
+    INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080
+    INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720
+
+    Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a  ........ZZZZZZZZ
+    Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+    Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+    Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+    Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+    Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+    Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+    Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
+    Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5  kkkkkkkkkkkkkkk.
+    Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc                          ........
+    Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a                          ZZZZZZZZ
+    CPU: 0 PID: 1689 Comm: modprobe Tainted: G    B          3.18.0-rc1-mm1+ #98
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
+     ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78
+     ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8
+     ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558
+    Call Trace:
+     [<ffffffff81cc68ae>] dump_stack+0x46/0x58
+     [<ffffffff811fd848>] print_trailer+0xf8/0x160
+     [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
+     [<ffffffff811ff0f5>] object_err+0x35/0x40
+     [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
+     [<ffffffff8120b9fa>] kasan_report_error+0x38a/0x3f0
+     [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
+     [<ffffffff8120b344>] ? kasan_unpoison_shadow+0x14/0x40
+     [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
+     [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
+     [<ffffffff8120a995>] __asan_store1+0x75/0xb0
+     [<ffffffffa0002601>] ? kmem_cache_oob+0x1d/0xc3 [test_kasan]
+     [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
+     [<ffffffffa0002065>] kmalloc_oob_right+0x65/0x75 [test_kasan]
+     [<ffffffffa00026b0>] init_module+0x9/0x47 [test_kasan]
+     [<ffffffff810002d9>] do_one_initcall+0x99/0x200
+     [<ffffffff811e4e5c>] ? __vunmap+0xec/0x160
+     [<ffffffff81114f63>] load_module+0x2cb3/0x3b20
+     [<ffffffff8110fd70>] ? m_show+0x240/0x240
+     [<ffffffff81115f06>] SyS_finit_module+0x76/0x80
+     [<ffffffff81cd3129>] system_call_fastpath+0x12/0x17
+    Memory state around the buggy address:
+     ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+     ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc
+     ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+     ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+     ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00
+    >ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc
+                                                 ^
+     ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+     ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+     ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb
+     ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+     ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+    ==================================================================
+
+The header of the report discribe what kind of bug happened and what kind of
+access caused it. It's followed by the description of the accessed slub object
+(see 'SLUB Debug output' section in Documentation/vm/slub.txt for details) and
+the description of the accessed memory page.
+
+In the last section the report shows memory state around the accessed address.
+Reading this part requires some understanding of how KASAN works.
+
+The state of each 8 aligned bytes of memory is encoded in one shadow byte.
+Those 8 bytes can be accessible, partially accessible, freed or be a redzone.
+We use the following encoding for each shadow byte: 0 means that all 8 bytes
+of the corresponding memory region are accessible; number N (1 <= N <= 7) means
+that the first N bytes are accessible, and other (8 - N) bytes are not;
+any negative value indicates that the entire 8-byte word is inaccessible.
+We use different negative values to distinguish between different kinds of
+inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h).
+
+In the report above the arrows point to the shadow byte 03, which means that
+the accessed address is partially accessible.
+
+
+Implementation details
+----------------------
+
+From a high level, our approach to memory error detection is similar to that
+of kmemcheck: use shadow memory to record whether each byte of memory is safe
+to access, and use compile-time instrumentation to check shadow memory on each
+memory access.
+
+AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory
+(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and
+offset to translate a memory address to its corresponding shadow address.
+
+Here is the function which translates an address to its corresponding shadow
+address::
+
+    static inline void *kasan_mem_to_shadow(const void *addr)
+    {
+	return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+		+ KASAN_SHADOW_OFFSET;
+    }
+
+where ``KASAN_SHADOW_SCALE_SHIFT = 3``.
+
+Compile-time instrumentation used for checking memory accesses. Compiler inserts
+function calls (__asan_load*(addr), __asan_store*(addr)) before each memory
+access of size 1, 2, 4, 8 or 16. These functions check whether memory access is
+valid or not by checking corresponding shadow memory.
+
+GCC 5.0 has possibility to perform inline instrumentation. Instead of making
+function calls GCC directly inserts the code to check the shadow memory.
+This option significantly enlarges kernel but it gives x1.1-x2 performance
+boost over outline instrumented kernel.
diff --git a/Documentation/kcov.txt b/Documentation/dev-tools/kcov.rst
similarity index 77%
rename from Documentation/kcov.txt
rename to Documentation/dev-tools/kcov.rst
index 779ff4a..aca0e27 100644
--- a/Documentation/kcov.txt
+++ b/Documentation/dev-tools/kcov.rst
@@ -12,38 +12,38 @@
 and instrumentation of some inherently non-deterministic parts of kernel is
 disbled (e.g. scheduler, locking).
 
-Usage:
-======
+Usage
+-----
 
-Configure kernel with:
+Configure the kernel with::
 
         CONFIG_KCOV=y
 
 CONFIG_KCOV requires gcc built on revision 231296 or later.
-Profiling data will only become accessible once debugfs has been mounted:
+Profiling data will only become accessible once debugfs has been mounted::
 
         mount -t debugfs none /sys/kernel/debug
 
-The following program demonstrates kcov usage from within a test program:
+The following program demonstrates kcov usage from within a test program::
 
-#include <stdio.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <unistd.h>
-#include <fcntl.h>
+    #include <stdio.h>
+    #include <stddef.h>
+    #include <stdint.h>
+    #include <stdlib.h>
+    #include <sys/types.h>
+    #include <sys/stat.h>
+    #include <sys/ioctl.h>
+    #include <sys/mman.h>
+    #include <unistd.h>
+    #include <fcntl.h>
 
-#define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
-#define KCOV_ENABLE			_IO('c', 100)
-#define KCOV_DISABLE			_IO('c', 101)
-#define COVER_SIZE			(64<<10)
+    #define KCOV_INIT_TRACE			_IOR('c', 1, unsigned long)
+    #define KCOV_ENABLE			_IO('c', 100)
+    #define KCOV_DISABLE			_IO('c', 101)
+    #define COVER_SIZE			(64<<10)
 
-int main(int argc, char **argv)
-{
+    int main(int argc, char **argv)
+    {
 	int fd;
 	unsigned long *cover, n, i;
 
@@ -83,24 +83,24 @@
 	if (close(fd))
 		perror("close"), exit(1);
 	return 0;
-}
+    }
 
-After piping through addr2line output of the program looks as follows:
+After piping through addr2line output of the program looks as follows::
 
-SyS_read
-fs/read_write.c:562
-__fdget_pos
-fs/file.c:774
-__fget_light
-fs/file.c:746
-__fget_light
-fs/file.c:750
-__fget_light
-fs/file.c:760
-__fdget_pos
-fs/file.c:784
-SyS_read
-fs/read_write.c:562
+    SyS_read
+    fs/read_write.c:562
+    __fdget_pos
+    fs/file.c:774
+    __fget_light
+    fs/file.c:746
+    __fget_light
+    fs/file.c:750
+    __fget_light
+    fs/file.c:760
+    __fdget_pos
+    fs/file.c:784
+    SyS_read
+    fs/read_write.c:562
 
 If a program needs to collect coverage from several threads (independently),
 it needs to open /sys/kernel/debug/kcov in each thread separately.
diff --git a/Documentation/dev-tools/kmemcheck.rst b/Documentation/dev-tools/kmemcheck.rst
new file mode 100644
index 0000000..7f3d198
--- /dev/null
+++ b/Documentation/dev-tools/kmemcheck.rst
@@ -0,0 +1,733 @@
+Getting started with kmemcheck
+==============================
+
+Vegard Nossum <vegardno@ifi.uio.no>
+
+
+Introduction
+------------
+
+kmemcheck is a debugging feature for the Linux Kernel. More specifically, it
+is a dynamic checker that detects and warns about some uses of uninitialized
+memory.
+
+Userspace programmers might be familiar with Valgrind's memcheck. The main
+difference between memcheck and kmemcheck is that memcheck works for userspace
+programs only, and kmemcheck works for the kernel only. The implementations
+are of course vastly different. Because of this, kmemcheck is not as accurate
+as memcheck, but it turns out to be good enough in practice to discover real
+programmer errors that the compiler is not able to find through static
+analysis.
+
+Enabling kmemcheck on a kernel will probably slow it down to the extent that
+the machine will not be usable for normal workloads such as e.g. an
+interactive desktop. kmemcheck will also cause the kernel to use about twice
+as much memory as normal. For this reason, kmemcheck is strictly a debugging
+feature.
+
+
+Downloading
+-----------
+
+As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel.
+
+
+Configuring and compiling
+-------------------------
+
+kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of
+configuration variables must have specific settings in order for the kmemcheck
+menu to even appear in "menuconfig". These are:
+
+- ``CONFIG_CC_OPTIMIZE_FOR_SIZE=n``
+	This option is located under "General setup" / "Optimize for size".
+
+	Without this, gcc will use certain optimizations that usually lead to
+	false positive warnings from kmemcheck. An example of this is a 16-bit
+	field in a struct, where gcc may load 32 bits, then discard the upper
+	16 bits. kmemcheck sees only the 32-bit load, and may trigger a
+	warning for the upper 16 bits (if they're uninitialized).
+
+- ``CONFIG_SLAB=y`` or ``CONFIG_SLUB=y``
+	This option is located under "General setup" / "Choose SLAB
+	allocator".
+
+- ``CONFIG_FUNCTION_TRACER=n``
+	This option is located under "Kernel hacking" / "Tracers" / "Kernel
+	Function Tracer"
+
+	When function tracing is compiled in, gcc emits a call to another
+	function at the beginning of every function. This means that when the
+	page fault handler is called, the ftrace framework will be called
+	before kmemcheck has had a chance to handle the fault. If ftrace then
+	modifies memory that was tracked by kmemcheck, the result is an
+	endless recursive page fault.
+
+- ``CONFIG_DEBUG_PAGEALLOC=n``
+	This option is located under "Kernel hacking" / "Memory Debugging"
+	/ "Debug page memory allocations".
+
+In addition, I highly recommend turning on ``CONFIG_DEBUG_INFO=y``. This is also
+located under "Kernel hacking". With this, you will be able to get line number
+information from the kmemcheck warnings, which is extremely valuable in
+debugging a problem. This option is not mandatory, however, because it slows
+down the compilation process and produces a much bigger kernel image.
+
+Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory
+Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows
+a description of the kmemcheck configuration variables:
+
+- ``CONFIG_KMEMCHECK``
+	This must be enabled in order to use kmemcheck at all...
+
+- ``CONFIG_KMEMCHECK_``[``DISABLED`` | ``ENABLED`` | ``ONESHOT``]``_BY_DEFAULT``
+	This option controls the status of kmemcheck at boot-time. "Enabled"
+	will enable kmemcheck right from the start, "disabled" will boot the
+	kernel as normal (but with the kmemcheck code compiled in, so it can
+	be enabled at run-time after the kernel has booted), and "one-shot" is
+	a special mode which will turn kmemcheck off automatically after
+	detecting the first use of uninitialized memory.
+
+	If you are using kmemcheck to actively debug a problem, then you
+	probably want to choose "enabled" here.
+
+	The one-shot mode is mostly useful in automated test setups because it
+	can prevent floods of warnings and increase the chances of the machine
+	surviving in case something is really wrong. In other cases, the one-
+	shot mode could actually be counter-productive because it would turn
+	itself off at the very first error -- in the case of a false positive
+	too -- and this would come in the way of debugging the specific
+	problem you were interested in.
+
+	If you would like to use your kernel as normal, but with a chance to
+	enable kmemcheck in case of some problem, it might be a good idea to
+	choose "disabled" here. When kmemcheck is disabled, most of the run-
+	time overhead is not incurred, and the kernel will be almost as fast
+	as normal.
+
+- ``CONFIG_KMEMCHECK_QUEUE_SIZE``
+	Select the maximum number of error reports to store in an internal
+	(fixed-size) buffer. Since errors can occur virtually anywhere and in
+	any context, we need a temporary storage area which is guaranteed not
+	to generate any other page faults when accessed. The queue will be
+	emptied as soon as a tasklet may be scheduled. If the queue is full,
+	new error reports will be lost.
+
+	The default value of 64 is probably fine. If some code produces more
+	than 64 errors within an irqs-off section, then the code is likely to
+	produce many, many more, too, and these additional reports seldom give
+	any more information (the first report is usually the most valuable
+	anyway).
+
+	This number might have to be adjusted if you are not using serial
+	console or similar to capture the kernel log. If you are using the
+	"dmesg" command to save the log, then getting a lot of kmemcheck
+	warnings might overflow the kernel log itself, and the earlier reports
+	will get lost in that way instead. Try setting this to 10 or so on
+	such a setup.
+
+- ``CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT``
+	Select the number of shadow bytes to save along with each entry of the
+	error-report queue. These bytes indicate what parts of an allocation
+	are initialized, uninitialized, etc. and will be displayed when an
+	error is detected to help the debugging of a particular problem.
+
+	The number entered here is actually the logarithm of the number of
+	bytes that will be saved. So if you pick for example 5 here, kmemcheck
+	will save 2^5 = 32 bytes.
+
+	The default value should be fine for debugging most problems. It also
+	fits nicely within 80 columns.
+
+- ``CONFIG_KMEMCHECK_PARTIAL_OK``
+	This option (when enabled) works around certain GCC optimizations that
+	produce 32-bit reads from 16-bit variables where the upper 16 bits are
+	thrown away afterwards.
+
+	The default value (enabled) is recommended. This may of course hide
+	some real errors, but disabling it would probably produce a lot of
+	false positives.
+
+- ``CONFIG_KMEMCHECK_BITOPS_OK``
+	This option silences warnings that would be generated for bit-field
+	accesses where not all the bits are initialized at the same time. This
+	may also hide some real bugs.
+
+	This option is probably obsolete, or it should be replaced with
+	the kmemcheck-/bitfield-annotations for the code in question. The
+	default value is therefore fine.
+
+Now compile the kernel as usual.
+
+
+How to use
+----------
+
+Booting
+~~~~~~~
+
+First some information about the command-line options. There is only one
+option specific to kmemcheck, and this is called "kmemcheck". It can be used
+to override the default mode as chosen by the ``CONFIG_KMEMCHECK_*_BY_DEFAULT``
+option. Its possible settings are:
+
+- ``kmemcheck=0`` (disabled)
+- ``kmemcheck=1`` (enabled)
+- ``kmemcheck=2`` (one-shot mode)
+
+If SLUB debugging has been enabled in the kernel, it may take precedence over
+kmemcheck in such a way that the slab caches which are under SLUB debugging
+will not be tracked by kmemcheck. In order to ensure that this doesn't happen
+(even though it shouldn't by default), use SLUB's boot option ``slub_debug``,
+like this: ``slub_debug=-``
+
+In fact, this option may also be used for fine-grained control over SLUB vs.
+kmemcheck. For example, if the command line includes
+``kmemcheck=1 slub_debug=,dentry``, then SLUB debugging will be used only
+for the "dentry" slab cache, and with kmemcheck tracking all the other
+caches. This is advanced usage, however, and is not generally recommended.
+
+
+Run-time enable/disable
+~~~~~~~~~~~~~~~~~~~~~~~
+
+When the kernel has booted, it is possible to enable or disable kmemcheck at
+run-time. WARNING: This feature is still experimental and may cause false
+positive warnings to appear. Therefore, try not to use this. If you find that
+it doesn't work properly (e.g. you see an unreasonable amount of warnings), I
+will be happy to take bug reports.
+
+Use the file ``/proc/sys/kernel/kmemcheck`` for this purpose, e.g.::
+
+	$ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck
+
+The numbers are the same as for the ``kmemcheck=`` command-line option.
+
+
+Debugging
+~~~~~~~~~
+
+A typical report will look something like this::
+
+    WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
+    80000000000000000000000000000000000000000088ffff0000000000000000
+     i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
+             ^
+
+    Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A
+    RIP: 0010:[<ffffffff8104ede8>]  [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
+    RSP: 0018:ffff88003cdf7d98  EFLAGS: 00210002
+    RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
+    RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84
+    RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000
+    R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e
+    R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8
+    FS:  0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000
+    CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
+    CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0
+    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+    DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400
+     [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
+     [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
+     [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
+     [<ffffffff8100c7b5>] int_signal+0x12/0x17
+     [<ffffffffffffffff>] 0xffffffffffffffff
+
+The single most valuable information in this report is the RIP (or EIP on 32-
+bit) value. This will help us pinpoint exactly which instruction that caused
+the warning.
+
+If your kernel was compiled with ``CONFIG_DEBUG_INFO=y``, then all we have to do
+is give this address to the addr2line program, like this::
+
+	$ addr2line -e vmlinux -i ffffffff8104ede8
+	arch/x86/include/asm/string_64.h:12
+	include/asm-generic/siginfo.h:287
+	kernel/signal.c:380
+	kernel/signal.c:410
+
+The "``-e vmlinux``" tells addr2line which file to look in. **IMPORTANT:**
+This must be the vmlinux of the kernel that produced the warning in the
+first place! If not, the line number information will almost certainly be
+wrong.
+
+The "``-i``" tells addr2line to also print the line numbers of inlined
+functions.  In this case, the flag was very important, because otherwise,
+it would only have printed the first line, which is just a call to
+``memcpy()``, which could be called from a thousand places in the kernel, and
+is therefore not very useful.  These inlined functions would not show up in
+the stack trace above, simply because the kernel doesn't load the extra
+debugging information. This technique can of course be used with ordinary
+kernel oopses as well.
+
+In this case, it's the caller of ``memcpy()`` that is interesting, and it can be
+found in ``include/asm-generic/siginfo.h``, line 287::
+
+    281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
+    282 {
+    283         if (from->si_code < 0)
+    284                 memcpy(to, from, sizeof(*to));
+    285         else
+    286                 /* _sigchld is currently the largest know union member */
+    287                 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
+    288 }
+
+Since this was a read (kmemcheck usually warns about reads only, though it can
+warn about writes to unallocated or freed memory as well), it was probably the
+"from" argument which contained some uninitialized bytes. Following the chain
+of calls, we move upwards to see where "from" was allocated or initialized,
+``kernel/signal.c``, line 380::
+
+    359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+    360 {
+    ...
+    367         list_for_each_entry(q, &list->list, list) {
+    368                 if (q->info.si_signo == sig) {
+    369                         if (first)
+    370                                 goto still_pending;
+    371                         first = q;
+    ...
+    377         if (first) {
+    378 still_pending:
+    379                 list_del_init(&first->list);
+    380                 copy_siginfo(info, &first->info);
+    381                 __sigqueue_free(first);
+    ...
+    392         }
+    393 }
+
+Here, it is ``&first->info`` that is being passed on to ``copy_siginfo()``. The
+variable ``first`` was found on a list -- passed in as the second argument to
+``collect_signal()``. We  continue our journey through the stack, to figure out
+where the item on "list" was allocated or initialized. We move to line 410::
+
+    395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
+    396                         siginfo_t *info)
+    397 {
+    ...
+    410                 collect_signal(sig, pending, info);
+    ...
+    414 }
+
+Now we need to follow the ``pending`` pointer, since that is being passed on to
+``collect_signal()`` as ``list``. At this point, we've run out of lines from the
+"addr2line" output. Not to worry, we just paste the next addresses from the
+kmemcheck stack dump, i.e.::
+
+     [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
+     [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
+     [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
+     [<ffffffff8100c7b5>] int_signal+0x12/0x17
+
+	$ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \
+		ffffffff8100b87d ffffffff8100c7b5
+	kernel/signal.c:446
+	kernel/signal.c:1806
+	arch/x86/kernel/signal.c:805
+	arch/x86/kernel/signal.c:871
+	arch/x86/kernel/entry_64.S:694
+
+Remember that since these addresses were found on the stack and not as the
+RIP value, they actually point to the _next_ instruction (they are return
+addresses). This becomes obvious when we look at the code for line 446::
+
+    422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
+    423 {
+    ...
+    431                 signr = __dequeue_signal(&tsk->signal->shared_pending,
+    432						 mask, info);
+    433			/*
+    434			 * itimer signal ?
+    435			 *
+    436			 * itimers are process shared and we restart periodic
+    437			 * itimers in the signal delivery path to prevent DoS
+    438			 * attacks in the high resolution timer case. This is
+    439			 * compliant with the old way of self restarting
+    440			 * itimers, as the SIGALRM is a legacy signal and only
+    441			 * queued once. Changing the restart behaviour to
+    442			 * restart the timer in the signal dequeue path is
+    443			 * reducing the timer noise on heavy loaded !highres
+    444			 * systems too.
+    445			 */
+    446			if (unlikely(signr == SIGALRM)) {
+    ...
+    489 }
+
+So instead of looking at 446, we should be looking at 431, which is the line
+that executes just before 446. Here we see that what we are looking for is
+``&tsk->signal->shared_pending``.
+
+Our next task is now to figure out which function that puts items on this
+``shared_pending`` list. A crude, but efficient tool, is ``git grep``::
+
+	$ git grep -n 'shared_pending' kernel/
+	...
+	kernel/signal.c:828:	pending = group ? &t->signal->shared_pending : &t->pending;
+	kernel/signal.c:1339:	pending = group ? &t->signal->shared_pending : &t->pending;
+	...
+
+There were more results, but none of them were related to list operations,
+and these were the only assignments. We inspect the line numbers more closely
+and find that this is indeed where items are being added to the list::
+
+    816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
+    817				int group)
+    818 {
+    ...
+    828		pending = group ? &t->signal->shared_pending : &t->pending;
+    ...
+    851		q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
+    852						     (is_si_special(info) ||
+    853						      info->si_code >= 0)));
+    854		if (q) {
+    855			list_add_tail(&q->list, &pending->list);
+    ...
+    890 }
+
+and::
+
+    1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
+    1310 {
+    ....
+    1339	 pending = group ? &t->signal->shared_pending : &t->pending;
+    1340	 list_add_tail(&q->list, &pending->list);
+    ....
+    1347 }
+
+In the first case, the list element we are looking for, ``q``, is being
+returned from the function ``__sigqueue_alloc()``, which looks like an
+allocation function.  Let's take a look at it::
+
+    187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
+    188						 int override_rlimit)
+    189 {
+    190		struct sigqueue *q = NULL;
+    191		struct user_struct *user;
+    192
+    193		/*
+    194		 * We won't get problems with the target's UID changing under us
+    195		 * because changing it requires RCU be used, and if t != current, the
+    196		 * caller must be holding the RCU readlock (by way of a spinlock) and
+    197		 * we use RCU protection here
+    198		 */
+    199		user = get_uid(__task_cred(t)->user);
+    200		atomic_inc(&user->sigpending);
+    201		if (override_rlimit ||
+    202		    atomic_read(&user->sigpending) <=
+    203				t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
+    204			q = kmem_cache_alloc(sigqueue_cachep, flags);
+    205		if (unlikely(q == NULL)) {
+    206			atomic_dec(&user->sigpending);
+    207			free_uid(user);
+    208		} else {
+    209			INIT_LIST_HEAD(&q->list);
+    210			q->flags = 0;
+    211			q->user = user;
+    212		}
+    213
+    214		return q;
+    215 }
+
+We see that this function initializes ``q->list``, ``q->flags``, and
+``q->user``. It seems that now is the time to look at the definition of
+``struct sigqueue``, e.g.::
+
+    14 struct sigqueue {
+    15	       struct list_head list;
+    16	       int flags;
+    17	       siginfo_t info;
+    18	       struct user_struct *user;
+    19 };
+
+And, you might remember, it was a ``memcpy()`` on ``&first->info`` that
+caused the warning, so this makes perfect sense. It also seems reasonable
+to assume that it is the caller of ``__sigqueue_alloc()`` that has the
+responsibility of filling out (initializing) this member.
+
+But just which fields of the struct were uninitialized? Let's look at
+kmemcheck's report again::
+
+    WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
+    80000000000000000000000000000000000000000088ffff0000000000000000
+     i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
+	     ^
+
+These first two lines are the memory dump of the memory object itself, and
+the shadow bytemap, respectively. The memory object itself is in this case
+``&first->info``. Just beware that the start of this dump is NOT the start
+of the object itself! The position of the caret (^) corresponds with the
+address of the read (ffff88003e4a2024).
+
+The shadow bytemap dump legend is as follows:
+
+- i: initialized
+- u: uninitialized
+- a: unallocated (memory has been allocated by the slab layer, but has not
+  yet been handed off to anybody)
+- f: freed (memory has been allocated by the slab layer, but has been freed
+  by the previous owner)
+
+In order to figure out where (relative to the start of the object) the
+uninitialized memory was located, we have to look at the disassembly. For
+that, we'll need the RIP address again::
+
+    RIP: 0010:[<ffffffff8104ede8>]  [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
+
+	$ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8:
+	ffffffff8104edc8:	mov    %r8,0x8(%r8)
+	ffffffff8104edcc:	test   %r10d,%r10d
+	ffffffff8104edcf:	js     ffffffff8104ee88 <__dequeue_signal+0x168>
+	ffffffff8104edd5:	mov    %rax,%rdx
+	ffffffff8104edd8:	mov    $0xc,%ecx
+	ffffffff8104eddd:	mov    %r13,%rdi
+	ffffffff8104ede0:	mov    $0x30,%eax
+	ffffffff8104ede5:	mov    %rdx,%rsi
+	ffffffff8104ede8:	rep movsl %ds:(%rsi),%es:(%rdi)
+	ffffffff8104edea:	test   $0x2,%al
+	ffffffff8104edec:	je     ffffffff8104edf0 <__dequeue_signal+0xd0>
+	ffffffff8104edee:	movsw  %ds:(%rsi),%es:(%rdi)
+	ffffffff8104edf0:	test   $0x1,%al
+	ffffffff8104edf2:	je     ffffffff8104edf5 <__dequeue_signal+0xd5>
+	ffffffff8104edf4:	movsb  %ds:(%rsi),%es:(%rdi)
+	ffffffff8104edf5:	mov    %r8,%rdi
+	ffffffff8104edf8:	callq  ffffffff8104de60 <__sigqueue_free>
+
+As expected, it's the "``rep movsl``" instruction from the ``memcpy()``
+that causes the warning. We know about ``REP MOVSL`` that it uses the register
+``RCX`` to count the number of remaining iterations. By taking a look at the
+register dump again (from the kmemcheck report), we can figure out how many
+bytes were left to copy::
+
+    RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
+
+By looking at the disassembly, we also see that ``%ecx`` is being loaded
+with the value ``$0xc`` just before (ffffffff8104edd8), so we are very
+lucky. Keep in mind that this is the number of iterations, not bytes. And
+since this is a "long" operation, we need to multiply by 4 to get the
+number of bytes. So this means that the uninitialized value was encountered
+at 4 * (0xc - 0x9) = 12 bytes from the start of the object.
+
+We can now try to figure out which field of the "``struct siginfo``" that
+was not initialized. This is the beginning of the struct::
+
+    40 typedef struct siginfo {
+    41	       int si_signo;
+    42	       int si_errno;
+    43	       int si_code;
+    44
+    45	       union {
+    ..
+    92	       } _sifields;
+    93 } siginfo_t;
+
+On 64-bit, the int is 4 bytes long, so it must the union member that has
+not been initialized. We can verify this using gdb::
+
+	$ gdb vmlinux
+	...
+	(gdb) p &((struct siginfo *) 0)->_sifields
+	$1 = (union {...} *) 0x10
+
+Actually, it seems that the union member is located at offset 0x10 -- which
+means that gcc has inserted 4 bytes of padding between the members ``si_code``
+and ``_sifields``. We can now get a fuller picture of the memory dump::
+
+		 _----------------------------=> si_code
+		/	 _--------------------=> (padding)
+	       |	/	 _------------=> _sifields(._kill._pid)
+	       |       |	/	 _----=> _sifields(._kill._uid)
+	       |       |       |	/
+	-------|-------|-------|-------|
+	80000000000000000000000000000000000000000088ffff0000000000000000
+	 i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
+
+This allows us to realize another important fact: ``si_code`` contains the
+value 0x80. Remember that x86 is little endian, so the first 4 bytes
+"80000000" are really the number 0x00000080. With a bit of research, we
+find that this is actually the constant ``SI_KERNEL`` defined in
+``include/asm-generic/siginfo.h``::
+
+    144 #define SI_KERNEL	0x80		/* sent by the kernel from somewhere	 */
+
+This macro is used in exactly one place in the x86 kernel: In ``send_signal()``
+in ``kernel/signal.c``::
+
+    816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
+    817				int group)
+    818 {
+    ...
+    828		pending = group ? &t->signal->shared_pending : &t->pending;
+    ...
+    851		q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
+    852						     (is_si_special(info) ||
+    853						      info->si_code >= 0)));
+    854		if (q) {
+    855			list_add_tail(&q->list, &pending->list);
+    856			switch ((unsigned long) info) {
+    ...
+    865			case (unsigned long) SEND_SIG_PRIV:
+    866				q->info.si_signo = sig;
+    867				q->info.si_errno = 0;
+    868				q->info.si_code = SI_KERNEL;
+    869				q->info.si_pid = 0;
+    870				q->info.si_uid = 0;
+    871				break;
+    ...
+    890 }
+
+Not only does this match with the ``.si_code`` member, it also matches the place
+we found earlier when looking for where siginfo_t objects are enqueued on the
+``shared_pending`` list.
+
+So to sum up: It seems that it is the padding introduced by the compiler
+between two struct fields that is uninitialized, and this gets reported when
+we do a ``memcpy()`` on the struct. This means that we have identified a false
+positive warning.
+
+Normally, kmemcheck will not report uninitialized accesses in ``memcpy()`` calls
+when both the source and destination addresses are tracked. (Instead, we copy
+the shadow bytemap as well). In this case, the destination address clearly
+was not tracked. We can dig a little deeper into the stack trace from above::
+
+	arch/x86/kernel/signal.c:805
+	arch/x86/kernel/signal.c:871
+	arch/x86/kernel/entry_64.S:694
+
+And we clearly see that the destination siginfo object is located on the
+stack::
+
+    782 static void do_signal(struct pt_regs *regs)
+    783 {
+    784		struct k_sigaction ka;
+    785		siginfo_t info;
+    ...
+    804		signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+    ...
+    854 }
+
+And this ``&info`` is what eventually gets passed to ``copy_siginfo()`` as the
+destination argument.
+
+Now, even though we didn't find an actual error here, the example is still a
+good one, because it shows how one would go about to find out what the report
+was all about.
+
+
+Annotating false positives
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+There are a few different ways to make annotations in the source code that
+will keep kmemcheck from checking and reporting certain allocations. Here
+they are:
+
+- ``__GFP_NOTRACK_FALSE_POSITIVE``
+	This flag can be passed to ``kmalloc()`` or ``kmem_cache_alloc()``
+	(therefore also to other functions that end up calling one of
+	these) to indicate that the allocation should not be tracked
+	because it would lead to a false positive report. This is a "big
+	hammer" way of silencing kmemcheck; after all, even if the false
+	positive pertains to particular field in a struct, for example, we
+	will now lose the ability to find (real) errors in other parts of
+	the same struct.
+
+	Example::
+
+	    /* No warnings will ever trigger on accessing any part of x */
+	    x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE);
+
+- ``kmemcheck_bitfield_begin(name)``/``kmemcheck_bitfield_end(name)`` and
+	``kmemcheck_annotate_bitfield(ptr, name)``
+	The first two of these three macros can be used inside struct
+	definitions to signal, respectively, the beginning and end of a
+	bitfield. Additionally, this will assign the bitfield a name, which
+	is given as an argument to the macros.
+
+	Having used these markers, one can later use
+	kmemcheck_annotate_bitfield() at the point of allocation, to indicate
+	which parts of the allocation is part of a bitfield.
+
+	Example::
+
+	    struct foo {
+		int x;
+
+		kmemcheck_bitfield_begin(flags);
+		int flag_a:1;
+		int flag_b:1;
+		kmemcheck_bitfield_end(flags);
+
+		int y;
+	    };
+
+	    struct foo *x = kmalloc(sizeof *x);
+
+	    /* No warnings will trigger on accessing the bitfield of x */
+	    kmemcheck_annotate_bitfield(x, flags);
+
+	Note that ``kmemcheck_annotate_bitfield()`` can be used even before the
+	return value of ``kmalloc()`` is checked -- in other words, passing NULL
+	as the first argument is legal (and will do nothing).
+
+
+Reporting errors
+----------------
+
+As we have seen, kmemcheck will produce false positive reports. Therefore, it
+is not very wise to blindly post kmemcheck warnings to mailing lists and
+maintainers. Instead, I encourage maintainers and developers to find errors
+in their own code. If you get a warning, you can try to work around it, try
+to figure out if it's a real error or not, or simply ignore it. Most
+developers know their own code and will quickly and efficiently determine the
+root cause of a kmemcheck report. This is therefore also the most efficient
+way to work with kmemcheck.
+
+That said, we (the kmemcheck maintainers) will always be on the lookout for
+false positives that we can annotate and silence. So whatever you find,
+please drop us a note privately! Kernel configs and steps to reproduce (if
+available) are of course a great help too.
+
+Happy hacking!
+
+
+Technical description
+---------------------
+
+kmemcheck works by marking memory pages non-present. This means that whenever
+somebody attempts to access the page, a page fault is generated. The page
+fault handler notices that the page was in fact only hidden, and so it calls
+on the kmemcheck code to make further investigations.
+
+When the investigations are completed, kmemcheck "shows" the page by marking
+it present (as it would be under normal circumstances). This way, the
+interrupted code can continue as usual.
+
+But after the instruction has been executed, we should hide the page again, so
+that we can catch the next access too! Now kmemcheck makes use of a debugging
+feature of the processor, namely single-stepping. When the processor has
+finished the one instruction that generated the memory access, a debug
+exception is raised. From here, we simply hide the page again and continue
+execution, this time with the single-stepping feature turned off.
+
+kmemcheck requires some assistance from the memory allocator in order to work.
+The memory allocator needs to
+
+  1. Tell kmemcheck about newly allocated pages and pages that are about to
+     be freed. This allows kmemcheck to set up and tear down the shadow memory
+     for the pages in question. The shadow memory stores the status of each
+     byte in the allocation proper, e.g. whether it is initialized or
+     uninitialized.
+
+  2. Tell kmemcheck which parts of memory should be marked uninitialized.
+     There are actually a few more states, such as "not yet allocated" and
+     "recently freed".
+
+If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
+memory that can take page faults because of kmemcheck.
+
+If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
+request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags.
+This does not prevent the page faults from occurring, however, but marks the
+object in question as being initialized so that no warnings will ever be
+produced for this object.
+
+Currently, the SLAB and SLUB allocators are supported by kmemcheck.
diff --git a/Documentation/kmemleak.txt b/Documentation/dev-tools/kmemleak.rst
similarity index 73%
rename from Documentation/kmemleak.txt
rename to Documentation/dev-tools/kmemleak.rst
index 18e24ab..1788722 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/dev-tools/kmemleak.rst
@@ -1,15 +1,12 @@
 Kernel Memory Leak Detector
 ===========================
 
-Introduction
-------------
-
 Kmemleak provides a way of detecting possible kernel memory leaks in a
 way similar to a tracing garbage collector
 (https://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
 with the difference that the orphan objects are not freed but only
 reported via /sys/kernel/debug/kmemleak. A similar method is used by the
-Valgrind tool (memcheck --leak-check) to detect the memory leaks in
+Valgrind tool (``memcheck --leak-check``) to detect the memory leaks in
 user-space applications.
 Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze, ppc, mips, s390, metag and tile.
 
@@ -19,20 +16,20 @@
 CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
 thread scans the memory every 10 minutes (by default) and prints the
 number of new unreferenced objects found. To display the details of all
-the possible memory leaks:
+the possible memory leaks::
 
   # mount -t debugfs nodev /sys/kernel/debug/
   # cat /sys/kernel/debug/kmemleak
 
-To trigger an intermediate memory scan:
+To trigger an intermediate memory scan::
 
   # echo scan > /sys/kernel/debug/kmemleak
 
-To clear the list of all current possible memory leaks:
+To clear the list of all current possible memory leaks::
 
   # echo clear > /sys/kernel/debug/kmemleak
 
-New leaks will then come up upon reading /sys/kernel/debug/kmemleak
+New leaks will then come up upon reading ``/sys/kernel/debug/kmemleak``
 again.
 
 Note that the orphan objects are listed in the order they were allocated
@@ -40,22 +37,31 @@
 objects to be reported as orphan.
 
 Memory scanning parameters can be modified at run-time by writing to the
-/sys/kernel/debug/kmemleak file. The following parameters are supported:
+``/sys/kernel/debug/kmemleak`` file. The following parameters are supported:
 
-  off		- disable kmemleak (irreversible)
-  stack=on	- enable the task stacks scanning (default)
-  stack=off	- disable the tasks stacks scanning
-  scan=on	- start the automatic memory scanning thread (default)
-  scan=off	- stop the automatic memory scanning thread
-  scan=<secs>	- set the automatic memory scanning period in seconds
-		  (default 600, 0 to stop the automatic scanning)
-  scan		- trigger a memory scan
-  clear		- clear list of current memory leak suspects, done by
-		  marking all current reported unreferenced objects grey,
-		  or free all kmemleak objects if kmemleak has been disabled.
-  dump=<addr>	- dump information about the object found at <addr>
+- off
+    disable kmemleak (irreversible)
+- stack=on
+    enable the task stacks scanning (default)
+- stack=off
+    disable the tasks stacks scanning
+- scan=on
+    start the automatic memory scanning thread (default)
+- scan=off
+    stop the automatic memory scanning thread
+- scan=<secs>
+    set the automatic memory scanning period in seconds
+    (default 600, 0 to stop the automatic scanning)
+- scan
+    trigger a memory scan
+- clear
+    clear list of current memory leak suspects, done by
+    marking all current reported unreferenced objects grey,
+    or free all kmemleak objects if kmemleak has been disabled.
+- dump=<addr>
+    dump information about the object found at <addr>
 
-Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
+Kmemleak can also be disabled at boot-time by passing ``kmemleak=off`` on
 the kernel command line.
 
 Memory may be allocated or freed before kmemleak is initialised and
@@ -63,13 +69,14 @@
 is configured via the CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE option.
 
 If CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF are enabled, the kmemleak is
-disabled by default. Passing "kmemleak=on" on the kernel command
+disabled by default. Passing ``kmemleak=on`` on the kernel command
 line enables the function. 
 
 Basic Algorithm
 ---------------
 
-The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and
+The memory allocations via :c:func:`kmalloc`, :c:func:`vmalloc`,
+:c:func:`kmem_cache_alloc` and
 friends are traced and the pointers, together with additional
 information like size and stack trace, are stored in a rbtree.
 The corresponding freeing function calls are tracked and the pointers
@@ -113,13 +120,13 @@
 you can find new unreferenced objects; this should help with testing
 specific sections of code.
 
-To test a critical section on demand with a clean kmemleak do:
+To test a critical section on demand with a clean kmemleak do::
 
   # echo clear > /sys/kernel/debug/kmemleak
   ... test your kernel or modules ...
   # echo scan > /sys/kernel/debug/kmemleak
 
-Then as usual to get your report with:
+Then as usual to get your report with::
 
   # cat /sys/kernel/debug/kmemleak
 
@@ -131,7 +138,7 @@
 won't be freed when kmemleak is disabled, and those objects may occupy
 a large part of physical memory.
 
-In this situation, you may reclaim memory with:
+In this situation, you may reclaim memory with::
 
   # echo clear > /sys/kernel/debug/kmemleak
 
@@ -140,20 +147,20 @@
 
 See the include/linux/kmemleak.h header for the functions prototype.
 
-kmemleak_init		 - initialize kmemleak
-kmemleak_alloc		 - notify of a memory block allocation
-kmemleak_alloc_percpu	 - notify of a percpu memory block allocation
-kmemleak_free		 - notify of a memory block freeing
-kmemleak_free_part	 - notify of a partial memory block freeing
-kmemleak_free_percpu	 - notify of a percpu memory block freeing
-kmemleak_update_trace	 - update object allocation stack trace
-kmemleak_not_leak	 - mark an object as not a leak
-kmemleak_ignore		 - do not scan or report an object as leak
-kmemleak_scan_area	 - add scan areas inside a memory block
-kmemleak_no_scan	 - do not scan a memory block
-kmemleak_erase		 - erase an old value in a pointer variable
-kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness
-kmemleak_free_recursive	 - as kmemleak_free but checks the recursiveness
+- ``kmemleak_init``		 - initialize kmemleak
+- ``kmemleak_alloc``		 - notify of a memory block allocation
+- ``kmemleak_alloc_percpu``	 - notify of a percpu memory block allocation
+- ``kmemleak_free``		 - notify of a memory block freeing
+- ``kmemleak_free_part``	 - notify of a partial memory block freeing
+- ``kmemleak_free_percpu``	 - notify of a percpu memory block freeing
+- ``kmemleak_update_trace``	 - update object allocation stack trace
+- ``kmemleak_not_leak``	 - mark an object as not a leak
+- ``kmemleak_ignore``		 - do not scan or report an object as leak
+- ``kmemleak_scan_area``	 - add scan areas inside a memory block
+- ``kmemleak_no_scan``	 - do not scan a memory block
+- ``kmemleak_erase``		 - erase an old value in a pointer variable
+- ``kmemleak_alloc_recursive`` - as kmemleak_alloc but checks the recursiveness
+- ``kmemleak_free_recursive``	 - as kmemleak_free but checks the recursiveness
 
 Dealing with false positives/negatives
 --------------------------------------
diff --git a/Documentation/sparse.txt b/Documentation/dev-tools/sparse.rst
similarity index 81%
rename from Documentation/sparse.txt
rename to Documentation/dev-tools/sparse.rst
index eceab13..8c250e8 100644
--- a/Documentation/sparse.txt
+++ b/Documentation/dev-tools/sparse.rst
@@ -1,11 +1,20 @@
-Copyright 2004 Linus Torvalds
-Copyright 2004 Pavel Machek <pavel@ucw.cz>
-Copyright 2006 Bob Copeland <me@bobcopeland.com>
+.. Copyright 2004 Linus Torvalds
+.. Copyright 2004 Pavel Machek <pavel@ucw.cz>
+.. Copyright 2006 Bob Copeland <me@bobcopeland.com>
+
+Sparse
+======
+
+Sparse is a semantic checker for C programs; it can be used to find a
+number of potential problems with kernel code.  See
+https://lwn.net/Articles/689907/ for an overview of sparse; this document
+contains some kernel-specific sparse information.
+
 
 Using sparse for typechecking
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-----------------------------
 
-"__bitwise" is a type attribute, so you have to do something like this:
+"__bitwise" is a type attribute, so you have to do something like this::
 
         typedef int __bitwise pm_request_t;
 
@@ -20,13 +29,13 @@
 the enum values are all the same type, now "enum pm_request" will be that
 type too.
 
-And with gcc, all the __bitwise/__force stuff goes away, and it all ends
-up looking just like integers to gcc.
+And with gcc, all the "__bitwise"/"__force stuff" goes away, and it all
+ends up looking just like integers to gcc.
 
 Quite frankly, you don't need the enum there. The above all really just
 boils down to one special "int __bitwise" type.
 
-So the simpler way is to just do
+So the simpler way is to just do::
 
         typedef int __bitwise pm_request_t;
 
@@ -50,7 +59,7 @@
 don't want to drown in noise unless we'd explicitly asked for it.
 
 Using sparse for lock checking
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------
 
 The following macros are undefined for gcc and defined during a sparse
 run to use the "context" tracking feature of sparse, applied to
@@ -69,22 +78,22 @@
 sparse would otherwise report a context imbalance.
 
 Getting sparse
-~~~~~~~~~~~~~~
+--------------
 
 You can get latest released versions from the Sparse homepage at
 https://sparse.wiki.kernel.org/index.php/Main_Page
 
 Alternatively, you can get snapshots of the latest development version
-of sparse using git to clone..
+of sparse using git to clone::
 
         git://git.kernel.org/pub/scm/devel/sparse/sparse.git
 
-DaveJ has hourly generated tarballs of the git tree available at..
+DaveJ has hourly generated tarballs of the git tree available at::
 
         http://www.codemonkey.org.uk/projects/git-snapshots/sparse/
 
 
-Once you have it, just do
+Once you have it, just do::
 
         make
         make install
@@ -92,7 +101,7 @@
 as a regular user, and it will install sparse in your ~/bin directory.
 
 Using sparse
-~~~~~~~~~~~~
+------------
 
 Do a kernel make with "make C=1" to run sparse on all the C files that get
 recompiled, or use "make C=2" to run sparse on the files whether they need to
@@ -101,7 +110,7 @@
 
 The optional make variable CF can be used to pass arguments to sparse.  The
 build system passes -Wbitwise to sparse automatically.  To perform endianness
-checks, you may define __CHECK_ENDIAN__:
+checks, you may define __CHECK_ENDIAN__::
 
         make C=2 CF="-D__CHECK_ENDIAN__"
 
diff --git a/Documentation/dev-tools/tools.rst b/Documentation/dev-tools/tools.rst
new file mode 100644
index 0000000..824ae8e
--- /dev/null
+++ b/Documentation/dev-tools/tools.rst
@@ -0,0 +1,25 @@
+================================
+Development tools for the kernel
+================================
+
+This document is a collection of documents about development tools that can
+be used to work on the kernel.  For now, the documents have been pulled
+together without any significant effot to integrate them into a coherent
+whole; patches welcome!
+
+.. class:: toc-title
+
+	   Table of contents
+
+.. toctree::
+   :maxdepth: 2
+
+   coccinelle
+   sparse
+   kcov
+   gcov
+   kasan
+   ubsan
+   kmemleak
+   kmemcheck
+   gdb-kernel-debugging
diff --git a/Documentation/ubsan.txt b/Documentation/dev-tools/ubsan.rst
similarity index 77%
rename from Documentation/ubsan.txt
rename to Documentation/dev-tools/ubsan.rst
index f58215e..655e6b6 100644
--- a/Documentation/ubsan.txt
+++ b/Documentation/dev-tools/ubsan.rst
@@ -1,7 +1,5 @@
-Undefined Behavior Sanitizer - UBSAN
-
-Overview
---------
+The Undefined Behavior Sanitizer - UBSAN
+========================================
 
 UBSAN is a runtime undefined behaviour checker.
 
@@ -10,11 +8,13 @@
 that may cause UB. If check fails (i.e. UB detected) __ubsan_handle_*
 function called to print error message.
 
-GCC has that feature since 4.9.x [1] (see -fsanitize=undefined option and
-its suboptions). GCC 5.x has more checkers implemented [2].
+GCC has that feature since 4.9.x [1_] (see ``-fsanitize=undefined`` option and
+its suboptions). GCC 5.x has more checkers implemented [2_].
 
 Report example
----------------
+--------------
+
+::
 
 	 ================================================================================
 	 UBSAN: Undefined behaviour in ../include/linux/bitops.h:110:33
@@ -47,29 +47,33 @@
 Usage
 -----
 
-To enable UBSAN configure kernel with:
+To enable UBSAN configure kernel with::
 
 	CONFIG_UBSAN=y
 
-and to check the entire kernel:
+and to check the entire kernel::
 
         CONFIG_UBSAN_SANITIZE_ALL=y
 
 To enable instrumentation for specific files or directories, add a line
 similar to the following to the respective kernel Makefile:
 
-        For a single file (e.g. main.o):
-                UBSAN_SANITIZE_main.o := y
+- For a single file (e.g. main.o)::
 
-        For all files in one directory:
-                UBSAN_SANITIZE := y
+    UBSAN_SANITIZE_main.o := y
+
+- For all files in one directory::
+
+    UBSAN_SANITIZE := y
 
 To exclude files from being instrumented even if
-CONFIG_UBSAN_SANITIZE_ALL=y, use:
+``CONFIG_UBSAN_SANITIZE_ALL=y``, use::
 
-                UBSAN_SANITIZE_main.o := n
-        and:
-                UBSAN_SANITIZE := n
+  UBSAN_SANITIZE_main.o := n
+
+and::
+
+  UBSAN_SANITIZE := n
 
 Detection of unaligned accesses controlled through the separate option -
 CONFIG_UBSAN_ALIGNMENT. It's off by default on architectures that support
@@ -80,5 +84,5 @@
 References
 ----------
 
-[1] - https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Debugging-Options.html
-[2] - https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html
+.. _1: https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/Debugging-Options.html
+.. _2: https://gcc.gnu.org/onlinedocs/gcc/Debugging-Options.html
diff --git a/Documentation/development-process/1.Intro b/Documentation/development-process/1.Intro.rst
similarity index 86%
rename from Documentation/development-process/1.Intro
rename to Documentation/development-process/1.Intro.rst
index 9b61448..22642b3 100644
--- a/Documentation/development-process/1.Intro
+++ b/Documentation/development-process/1.Intro.rst
@@ -1,16 +1,8 @@
-1: A GUIDE TO THE KERNEL DEVELOPMENT PROCESS
+Introdution
+===========
 
-The purpose of this document is to help developers (and their managers)
-work with the development community with a minimum of frustration.  It is
-an attempt to document how this community works in a way which is
-accessible to those who are not intimately familiar with Linux kernel
-development (or, indeed, free software development in general).  While
-there is some technical material here, this is very much a process-oriented
-discussion which does not require a deep knowledge of kernel programming to
-understand.
-
-
-1.1: EXECUTIVE SUMMARY
+Executive summary
+-----------------
 
 The rest of this section covers the scope of the kernel development process
 and the kinds of frustrations that developers and their employers can
@@ -20,41 +12,41 @@
 influence the direction of kernel development.  Code contributed to the
 Linux kernel must be made available under a GPL-compatible license.
 
-Section 2 introduces the development process, the kernel release cycle, and
-the mechanics of the merge window.  The various phases in the patch
-development, review, and merging cycle are covered.  There is some
+:ref:`development_process` introduces the development process, the kernel
+release cycle, and the mechanics of the merge window.  The various phases in
+the patch development, review, and merging cycle are covered.  There is some
 discussion of tools and mailing lists.  Developers wanting to get started
 with kernel development are encouraged to track down and fix bugs as an
 initial exercise.
 
-Section 3 covers early-stage project planning, with an emphasis on
-involving the development community as soon as possible.
+:ref:`development_early_stage` covers early-stage project planning, with an
+emphasis on involving the development community as soon as possible.
 
-Section 4 is about the coding process; several pitfalls which have been
-encountered by other developers are discussed.  Some requirements for
+:ref:`development_coding` is about the coding process; several pitfalls which
+have been encountered by other developers are discussed.  Some requirements for
 patches are covered, and there is an introduction to some of the tools
 which can help to ensure that kernel patches are correct.
 
-Section 5 talks about the process of posting patches for review.  To be
-taken seriously by the development community, patches must be properly
-formatted and described, and they must be sent to the right place.
+:ref:`development_posting` talks about the process of posting patches for
+review. To be taken seriously by the development community, patches must be
+properly formatted and described, and they must be sent to the right place.
 Following the advice in this section should help to ensure the best
 possible reception for your work.
 
-Section 6 covers what happens after posting patches; the job is far from
-done at that point.  Working with reviewers is a crucial part of the
-development process; this section offers a number of tips on how to avoid
-problems at this important stage.  Developers are cautioned against
+:ref:`development_followthrough` covers what happens after posting patches; the
+job is far from done at that point.  Working with reviewers is a crucial part
+of the development process; this section offers a number of tips on how to
+avoid problems at this important stage.  Developers are cautioned against
 assuming that the job is done when a patch is merged into the mainline.
 
-Section 7 introduces a couple of "advanced" topics: managing patches with
-git and reviewing patches posted by others.
+:ref:`development_advancedtopics` introduces a couple of "advanced" topics:
+managing patches with git and reviewing patches posted by others.
 
-Section 8 concludes the document with pointers to sources for more
-information on kernel development.
+:ref:`development_conclusion` concludes the document with pointers to sources
+for more information on kernel development.
 
-
-1.2: WHAT THIS DOCUMENT IS ABOUT
+What this document is about
+---------------------------
 
 The Linux kernel, at over 8 million lines of code and well over 1000
 contributors to each release, is one of the largest and most active free
@@ -108,8 +100,8 @@
 better; the following text should help you - or those who work for you -
 join our community.
 
-
-1.3: CREDITS
+Credits
+-------
 
 This document was written by Jonathan Corbet, corbet@lwn.net.  It has been
 improved by comments from Johannes Berg, James Berry, Alex Chiang, Roland
@@ -120,8 +112,8 @@
 This work was supported by the Linux Foundation; thanks especially to
 Amanda McPherson, who saw the value of this effort and made it all happen.
 
-
-1.4: THE IMPORTANCE OF GETTING CODE INTO THE MAINLINE
+The importance of getting code into the mainline
+------------------------------------------------
 
 Some companies and developers occasionally wonder why they should bother
 learning how to work with the kernel community and get their code into the
@@ -233,8 +225,8 @@
 point, vendors whose code is in the mainline and well maintained will be
 much better positioned to get the new product ready for market quickly.
 
-
-1.5: LICENSING
+Licensing
+---------
 
 Code is contributed to the Linux kernel under a number of licenses, but all
 code must be compatible with version 2 of the GNU General Public License
diff --git a/Documentation/development-process/2.Process b/Documentation/development-process/2.Process.rst
similarity index 96%
rename from Documentation/development-process/2.Process
rename to Documentation/development-process/2.Process.rst
index c24e156..ce5561b 100644
--- a/Documentation/development-process/2.Process
+++ b/Documentation/development-process/2.Process.rst
@@ -1,4 +1,7 @@
-2: HOW THE DEVELOPMENT PROCESS WORKS
+.. _development_process:
+
+How the development process works
+=================================
 
 Linux kernel development in the early 1990's was a pretty loose affair,
 with relatively small numbers of users and developers involved.  With a
@@ -7,19 +10,21 @@
 processes to keep development happening smoothly.  A solid understanding of
 how the process works is required in order to be an effective part of it.
 
-
-2.1: THE BIG PICTURE
+The big picture
+---------------
 
 The kernel developers use a loosely time-based release process, with a new
 major kernel release happening every two or three months.  The recent
 release history looks like this:
 
+	======  =================
 	2.6.38	March 14, 2011
 	2.6.37	January 4, 2011
 	2.6.36	October 20, 2010
 	2.6.35	August 1, 2010
 	2.6.34	May 15, 2010
 	2.6.33	February 24, 2010
+	======  =================
 
 Every 2.6.x release is a major kernel release with new features, internal
 API changes, and more.  A typical 2.6 release can contain nearly 10,000
@@ -68,6 +73,7 @@
 As an example, here is how the 2.6.38 development cycle went (all dates in
 2011):
 
+	==============  ===============================
 	January 4	2.6.37 stable release
 	January 18	2.6.38-rc1, merge window closes
 	January 21	2.6.38-rc2
@@ -78,6 +84,7 @@
 	March 1		2.6.38-rc7
 	March 7		2.6.38-rc8
 	March 14	2.6.38 stable release
+	==============  ===============================
 
 How do the developers decide when to close the development cycle and create
 the stable release?  The most significant metric used is the list of
@@ -105,11 +112,13 @@
 a little more than one development cycle past their initial release.  So,
 for example, the 2.6.36 kernel's history looked like:
 
+	==============  ===============================
 	October 10	2.6.36 stable release
 	November 22	2.6.36.1
 	December 9	2.6.36.2
 	January 7	2.6.36.3
 	February 17	2.6.36.4
+	==============  ===============================
 
 2.6.36.4 was the final stable update for the 2.6.36 release.
 
@@ -117,9 +126,11 @@
 for a longer period.  As of this writing, the current long term kernels
 and their maintainers are:
 
+	======  ======================  ===========================
 	2.6.27	Willy Tarreau		(Deep-frozen stable kernel)
 	2.6.32	Greg Kroah-Hartman
 	2.6.35	Andi Kleen		(Embedded flag kernel)
+	======  ======================  ===========================
 
 The selection of a kernel for long-term support is purely a matter of a
 maintainer having the need and the time to maintain that release.  There
@@ -127,7 +138,8 @@
 release.
 
 
-2.2: THE LIFECYCLE OF A PATCH
+The lifecycle of a patch
+------------------------
 
 Patches do not go directly from the developer's keyboard into the mainline
 kernel.  There is, instead, a somewhat involved (if somewhat informal)
@@ -195,8 +207,8 @@
 step.  This approach invariably leads to frustration for everybody
 involved.
 
-
-2.3: HOW PATCHES GET INTO THE KERNEL
+How patches get into the Kernel
+-------------------------------
 
 There is exactly one person who can merge patches into the mainline kernel
 repository: Linus Torvalds.  But, of the over 9,500 patches which went
@@ -242,7 +254,8 @@
 normally the right way to go.
 
 
-2.4: NEXT TREES
+Next trees
+----------
 
 The chain of subsystem trees guides the flow of patches into the kernel,
 but it also raises an interesting question: what if somebody wants to look
@@ -294,7 +307,8 @@
 their way into linux-next some time before the merge window opens.
 
 
-2.4.1: STAGING TREES
+Staging trees
+-------------
 
 The kernel source tree contains the drivers/staging/ directory, where
 many sub-directories for drivers or filesystems that are on their way to
@@ -322,7 +336,8 @@
 a proper mainline driver.
 
 
-2.5: TOOLS
+Tools
+-----
 
 As can be seen from the above text, the kernel development process depends
 heavily on the ability to herd collections of patches in various
@@ -368,7 +383,8 @@
 quilt is the best tool for the job.
 
 
-2.6: MAILING LISTS
+Mailing lists
+-------------
 
 A great deal of Linux kernel development work is done by way of mailing
 lists.  It is hard to be a fully-functioning member of the community
@@ -436,7 +452,8 @@
 in the MAINTAINERS file packaged with the kernel source.
 
 
-2.7: GETTING STARTED WITH KERNEL DEVELOPMENT
+Getting started with Kernel development
+---------------------------------------
 
 Questions about how to get started with the kernel development process are
 common - from both individuals and companies.  Equally common are missteps
@@ -463,6 +480,8 @@
 
 Andrew Morton gives this advice for aspiring kernel developers
 
+::
+
 	The #1 project for all kernel beginners should surely be "make sure
 	that the kernel runs perfectly at all times on all machines which
 	you can lay your hands on".  Usually the way to do this is to work
diff --git a/Documentation/development-process/3.Early-stage b/Documentation/development-process/3.Early-stage.rst
similarity index 97%
rename from Documentation/development-process/3.Early-stage
rename to Documentation/development-process/3.Early-stage.rst
index f87ba7b..af2c0af 100644
--- a/Documentation/development-process/3.Early-stage
+++ b/Documentation/development-process/3.Early-stage.rst
@@ -1,4 +1,7 @@
-3: EARLY-STAGE PLANNING
+.. _development_early_stage:
+
+Early-stage planning
+====================
 
 When contemplating a Linux kernel development project, it can be tempting
 to jump right in and start coding.  As with any significant project,
@@ -7,7 +10,8 @@
 communication can save far more time later on.
 
 
-3.1: SPECIFYING THE PROBLEM
+Specifying the problem
+----------------------
 
 Like any engineering project, a successful kernel enhancement starts with a
 clear description of the problem to be solved.  In some cases, this step is
@@ -64,7 +68,8 @@
 Only then does it make sense to start considering possible solutions.
 
 
-3.2: EARLY DISCUSSION
+Early discussion
+----------------
 
 When planning a kernel development project, it makes great sense to hold
 discussions with the community before launching into implementation.  Early
@@ -117,7 +122,8 @@
 avoided with some early discussion with the kernel developers.
 
 
-3.3: WHO DO YOU TALK TO?
+Who do you talk to?
+-------------------
 
 When developers decide to take their plans public, the next question will
 be: where do we start?  The answer is to find the right mailing list(s) and
@@ -141,6 +147,8 @@
 The task of finding the right maintainer is sometimes challenging enough
 that the kernel developers have added a script to ease the process:
 
+::
+
 	.../scripts/get_maintainer.pl
 
 This script will return the current maintainer(s) for a given file or
@@ -155,7 +163,8 @@
 track down a maintainer for a specific piece of code.
 
 
-3.4: WHEN TO POST?
+When to post?
+-------------
 
 If possible, posting your plans during the early stages can only be
 helpful.  Describe the problem being solved and any plans that have been
@@ -179,7 +188,8 @@
 community informed as you go.
 
 
-3.5: GETTING OFFICIAL BUY-IN
+Getting official buy-in
+-----------------------
 
 If your work is being done in a corporate environment - as most Linux
 kernel work is - you must, obviously, have permission from suitably
diff --git a/Documentation/development-process/4.Coding b/Documentation/development-process/4.Coding.rst
similarity index 97%
rename from Documentation/development-process/4.Coding
rename to Documentation/development-process/4.Coding.rst
index 9a3ee77..9d5cef9 100644
--- a/Documentation/development-process/4.Coding
+++ b/Documentation/development-process/4.Coding.rst
@@ -1,4 +1,7 @@
-4: GETTING THE CODE RIGHT
+.. _development_coding:
+
+Getting the code right
+======================
 
 While there is much to be said for a solid and community-oriented design
 process, the proof of any kernel development project is in the resulting
@@ -12,9 +15,11 @@
 quest.
 
 
-4.1: PITFALLS
+Pitfalls
+---------
 
-* Coding style
+Coding style
+************
 
 The kernel has long had a standard coding style, described in
 Documentation/CodingStyle.  For much of that time, the policies described
@@ -54,7 +59,8 @@
 80-column limit, for example), just do it.
 
 
-* Abstraction layers
+Abstraction layers
+******************
 
 Computer Science professors teach students to make extensive use of
 abstraction layers in the name of flexibility and information hiding.
@@ -87,7 +93,8 @@
 replicating the same code throughout the kernel.
 
 
-* #ifdef and preprocessor use in general
+#ifdef and preprocessor use in general
+**************************************
 
 The C preprocessor seems to present a powerful temptation to some C
 programmers, who see it as a way to efficiently encode a great deal of
@@ -113,7 +120,8 @@
 the compiler to perform type checking on the arguments and return value.
 
 
-* Inline functions
+Inline functions
+****************
 
 Inline functions present a hazard of their own, though.  Programmers can
 become enamored of the perceived efficiency inherent in avoiding a function
@@ -137,7 +145,8 @@
 irrelevant.
 
 
-* Locking
+Locking
+*******
 
 In May, 2006, the "Devicescape" networking stack was, with great
 fanfare, released under the GPL and made available for inclusion in the
@@ -151,7 +160,7 @@
 corporate doors.  But one large problem in particular was that it was not
 designed to work on multiprocessor systems.  Before this networking stack
 (now called mac80211) could be merged, a locking scheme needed to be
-retrofitted onto it.  
+retrofitted onto it.
 
 Once upon a time, Linux kernel code could be developed without thinking
 about the concurrency issues presented by multiprocessor systems.  Now,
@@ -169,7 +178,8 @@
 attention to concurrency will have a difficult path into the mainline.
 
 
-* Regressions
+Regressions
+***********
 
 One final hazard worth mentioning is this: it can be tempting to make a
 change (which may bring big improvements) which causes something to break
@@ -185,6 +195,8 @@
 breaks?  The best answer to this question was expressed by Linus in July,
 2007:
 
+::
+
 	So we don't fix bugs by introducing new problems.  That way lies
 	madness, and nobody ever knows if you actually make any real
 	progress at all. Is it two steps forwards, one step back, or one
@@ -201,8 +213,8 @@
 user-space interfaces is always required.
 
 
-
-4.2: CODE CHECKING TOOLS
+Code checking tools
+-------------------
 
 For now, at least, the writing of error-free code remains an ideal that few
 of us can reach.  What we can hope to do, though, is to catch and fix as
@@ -250,7 +262,7 @@
 There are quite a few other debugging options, some of which will be
 discussed below.  Some of them have a significant performance impact and
 should not be used all of the time.  But some time spent learning the
-available options will likely be paid back many times over in short order. 
+available options will likely be paid back many times over in short order.
 
 One of the heavier debugging tools is the locking checker, or "lockdep."
 This tool will track the acquisition and release of every lock (spinlock or
@@ -263,7 +275,7 @@
 developers and users) in a deployed system; lockdep allows them to be found
 in an automated manner ahead of time.  Code with any sort of non-trivial
 locking should be run with lockdep enabled before being submitted for
-inclusion. 
+inclusion.
 
 As a diligent kernel programmer, you will, beyond doubt, check the return
 status of any operation (such as a memory allocation) which can fail.  The
@@ -300,7 +312,7 @@
 Other kinds of portability errors are best found by compiling your code for
 other architectures.  If you do not happen to have an S/390 system or a
 Blackfin development board handy, you can still perform the compilation
-step.  A large set of cross compilers for x86 systems can be found at 
+step.  A large set of cross compilers for x86 systems can be found at
 
 	http://www.kernel.org/pub/tools/crosstool/
 
@@ -308,7 +320,8 @@
 embarrassment later.
 
 
-4.3: DOCUMENTATION
+Documentation
+-------------
 
 Documentation has often been more the exception than the rule with kernel
 development.  Even so, adequate documentation will help to ease the merging
@@ -364,7 +377,8 @@
 "cleanup" needs a comment saying why it is done the way it is.  And so on.
 
 
-4.4: INTERNAL API CHANGES
+Internal API changes
+--------------------
 
 The binary interface provided by the kernel to user space cannot be broken
 except under the most severe circumstances.  The kernel's internal
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting.rst
similarity index 97%
rename from Documentation/development-process/5.Posting
rename to Documentation/development-process/5.Posting.rst
index 8a48c9b..b511ddf 100644
--- a/Documentation/development-process/5.Posting
+++ b/Documentation/development-process/5.Posting.rst
@@ -1,4 +1,7 @@
-5: POSTING PATCHES
+.. _development_posting:
+
+Posting patches
+===============
 
 Sooner or later, the time comes when your work is ready to be presented to
 the community for review and, eventually, inclusion into the mainline
@@ -11,7 +14,8 @@
 directory.
 
 
-5.1: WHEN TO POST
+When to post
+------------
 
 There is a constant temptation to avoid posting patches before they are
 completely "ready."  For simple patches, that is not a problem.  If the
@@ -27,7 +31,8 @@
 with the idea that they can help you drive the work in the right direction.
 
 
-5.2: BEFORE CREATING PATCHES
+Before creating patches
+-----------------------
 
 There are a number of things which should be done before you consider
 sending patches to the development community.  These include:
@@ -52,7 +57,8 @@
 always pays back the effort in short order.
 
 
-5.3: PATCH PREPARATION
+Patch preparation
+-----------------
 
 The preparation of patches for posting can be a surprising amount of work,
 but, once again, attempting to save time here is not generally advisable
@@ -122,7 +128,8 @@
 done.  When done properly, though, it is time well spent.
 
 
-5.4: PATCH FORMATTING AND CHANGELOGS
+Patch formatting and changelogs
+-------------------------------
 
 So now you have a perfect series of patches for posting, but the work is
 not done quite yet.  Each patch needs to be formatted into a message which
@@ -140,6 +147,8 @@
    subsystem name first, followed by the purpose of the patch.  For
    example:
 
+   ::
+
 	gpio: fix build on CONFIG_GPIO_SYSFS=n
 
  - A blank line followed by a detailed description of the contents of the
@@ -192,6 +201,8 @@
 detail in the SubmittingPatches document; what follows here is a brief
 summary.  Each of these lines has the format:
 
+::
+
 	tag: Full Name <email address>  optional-other-stuff
 
 The tags in common use are:
@@ -225,7 +236,8 @@
 for addition without the explicit permission of the person named.
 
 
-5.5: SENDING THE PATCH
+Sending the patch
+-----------------
 
 Before you mail your patches, there are a couple of other things you should
 take care of:
@@ -287,6 +299,8 @@
 Patches need good subject lines.  The canonical format for a patch line is
 something like:
 
+::
+
 	[PATCH nn/mm] subsys: one-line description of the patch
 
 where "nn" is the ordinal number of the patch, "mm" is the total number of
diff --git a/Documentation/development-process/6.Followthrough b/Documentation/development-process/6.Followthrough.rst
similarity index 97%
rename from Documentation/development-process/6.Followthrough
rename to Documentation/development-process/6.Followthrough.rst
index 41d324a..a173cd5 100644
--- a/Documentation/development-process/6.Followthrough
+++ b/Documentation/development-process/6.Followthrough.rst
@@ -1,4 +1,7 @@
-6: FOLLOWTHROUGH
+.. _development_followthrough:
+
+Followthrough
+=============
 
 At this point, you have followed the guidelines given so far and, with the
 addition of your own engineering skills, have posted a perfect series of
@@ -16,7 +19,8 @@
 prevent the inclusion of your patches into the mainline.
 
 
-6.1: WORKING WITH REVIEWERS
+Working with reviewers
+----------------------
 
 A patch of any significance will result in a number of comments from other
 developers as they review the code.  Working with reviewers can be, for
@@ -97,7 +101,8 @@
 in mind, of course, that he may not agree with you either.
 
 
-6.2: WHAT HAPPENS NEXT
+What happens next
+-----------------
 
 If a patch is considered to be a good thing to add to the kernel, and once
 most of the review issues have been resolved, the next step is usually
@@ -177,7 +182,8 @@
 afterward.
 
 
-6.3: OTHER THINGS THAT CAN HAPPEN
+Other things that can happen
+-----------------------------
 
 One day, you may open your mail client and see that somebody has mailed you
 a patch to your code.  That is one of the advantages of having your code
diff --git a/Documentation/development-process/7.AdvancedTopics b/Documentation/development-process/7.AdvancedTopics.rst
similarity index 97%
rename from Documentation/development-process/7.AdvancedTopics
rename to Documentation/development-process/7.AdvancedTopics.rst
index 26dc3fa..81d61c5 100644
--- a/Documentation/development-process/7.AdvancedTopics
+++ b/Documentation/development-process/7.AdvancedTopics.rst
@@ -1,11 +1,15 @@
-7: ADVANCED TOPICS
+.. _development_advancedtopics:
+
+Advanced topics
+===============
 
 At this point, hopefully, you have a handle on how the development process
 works.  There is still more to learn, however!  This section will cover a
 number of topics which can be helpful for developers wanting to become a
 regular part of the Linux kernel development process.
 
-7.1: MANAGING PATCHES WITH GIT
+Managing patches with git
+-------------------------
 
 The use of distributed version control for the kernel began in early 2002,
 when Linus first started playing with the proprietary BitKeeper
@@ -114,6 +118,8 @@
 thing happening; putting up a git tree with unreviewed or off-topic patches
 can affect your ability to get trees pulled in the future.  Quoting Linus:
 
+::
+
 	You can send me patches, but for me to pull a git patch from you, I
 	need to know that you know what you're doing, and I need to be able
 	to trust things *without* then having to go and check every
@@ -141,7 +147,8 @@
 sure that you have remembered to push those changes to the public server.
 
 
-7.2: REVIEWING PATCHES
+Reviewing patches
+-----------------
 
 Some readers will certainly object to putting this section with "advanced
 topics" on the grounds that even beginning kernel developers should be
diff --git a/Documentation/development-process/8.Conclusion b/Documentation/development-process/8.Conclusion.rst
similarity index 96%
rename from Documentation/development-process/8.Conclusion
rename to Documentation/development-process/8.Conclusion.rst
index caef690..23ec7cb 100644
--- a/Documentation/development-process/8.Conclusion
+++ b/Documentation/development-process/8.Conclusion.rst
@@ -1,4 +1,7 @@
-8: FOR MORE INFORMATION
+.. _development_conclusion:
+
+For more information
+====================
 
 There are numerous sources of information on Linux kernel development and
 related topics.  First among those will always be the Documentation
@@ -47,7 +50,8 @@
 	http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
 
 
-9: CONCLUSION
+Conclusion
+==========
 
 Congratulations to anybody who has made it through this long-winded
 document.  Hopefully it has provided a helpful understanding of how the
diff --git a/Documentation/development-process/conf.py b/Documentation/development-process/conf.py
new file mode 100644
index 0000000..4b4a12d
--- /dev/null
+++ b/Documentation/development-process/conf.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = 'Linux Kernel Development Documentation'
+
+tags.add("subproject")
+
+latex_documents = [
+    ('index', 'development-process.tex', 'Linux Kernel Development Documentation',
+     'The kernel development community', 'manual'),
+]
diff --git a/Documentation/development-process/development-process.rst b/Documentation/development-process/development-process.rst
new file mode 100644
index 0000000..bd1399f
--- /dev/null
+++ b/Documentation/development-process/development-process.rst
@@ -0,0 +1,29 @@
+.. _development_process_main:
+
+A guide to the Kernel Development Process
+=========================================
+
+Contents:
+
+.. toctree::
+   :numbered:
+   :maxdepth: 2
+
+   1.Intro
+   2.Process
+   3.Early-stage
+   4.Coding
+   5.Posting
+   6.Followthrough
+   7.AdvancedTopics
+   8.Conclusion
+
+The purpose of this document is to help developers (and their managers)
+work with the development community with a minimum of frustration.  It is
+an attempt to document how this community works in a way which is
+accessible to those who are not intimately familiar with Linux kernel
+development (or, indeed, free software development in general).  While
+there is some technical material here, this is very much a process-oriented
+discussion which does not require a deep knowledge of kernel programming to
+understand.
+
diff --git a/Documentation/development-process/index.rst b/Documentation/development-process/index.rst
new file mode 100644
index 0000000..c37475d
--- /dev/null
+++ b/Documentation/development-process/index.rst
@@ -0,0 +1,9 @@
+Linux Kernel Development Documentation
+======================================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   development-process
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
index b545856..4a1714f 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-eccmgr.txt
@@ -90,6 +90,47 @@
 - interrupts      : Should be single bit error interrupt, then double bit error
 	interrupt, in this order.
 
+NAND FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-nand-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent NAND node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+DMA FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-dma-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent DMA node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+USB FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-usb-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent USB node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+QSPI FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-qspi-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent QSPI node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order.
+
+SDMMC FIFO ECC
+Required Properties:
+- compatible      : Should be "altr,socfpga-sdmmc-ecc"
+- reg             : Address and size for ECC block registers.
+- altr,ecc-parent : phandle to parent SD/MMC node.
+- interrupts      : Should be single bit error interrupt, then double bit error
+	interrupt, in this order for port A, and then single bit error interrupt,
+	then double bit error interrupt in this order for port B.
+
 Example:
 
 	eccmgr: eccmgr@ffd06000 {
@@ -132,4 +173,61 @@
 			interrupts = <5 IRQ_TYPE_LEVEL_HIGH>,
 				     <37 IRQ_TYPE_LEVEL_HIGH>;
 		};
+
+		nand-buf-ecc@ff8c2000 {
+			compatible = "altr,socfpga-nand-ecc";
+			reg = <0xff8c2000 0x400>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <11 IRQ_TYPE_LEVEL_HIGH>,
+				     <43 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		nand-rd-ecc@ff8c2400 {
+			compatible = "altr,socfpga-nand-ecc";
+			reg = <0xff8c2400 0x400>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <13 IRQ_TYPE_LEVEL_HIGH>,
+				     <45 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		nand-wr-ecc@ff8c2800 {
+			compatible = "altr,socfpga-nand-ecc";
+			reg = <0xff8c2800 0x400>;
+			altr,ecc-parent = <&nand>;
+			interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+				     <44 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		dma-ecc@ff8c8000 {
+			compatible = "altr,socfpga-dma-ecc";
+			reg = <0xff8c8000 0x400>;
+			altr,ecc-parent = <&pdma>;
+			interrupts = <10 IRQ_TYPE_LEVEL_HIGH>,
+				     <42 IRQ_TYPE_LEVEL_HIGH>;
+
+		usb0-ecc@ff8c8800 {
+			compatible = "altr,socfpga-usb-ecc";
+			reg = <0xff8c8800 0x400>;
+			altr,ecc-parent = <&usb0>;
+			interrupts = <2 IRQ_TYPE_LEVEL_HIGH>,
+				     <34 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		qspi-ecc@ff8c8400 {
+			compatible = "altr,socfpga-qspi-ecc";
+			reg = <0xff8c8400 0x400>;
+			altr,ecc-parent = <&qspi>;
+			interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+				     <46 IRQ_TYPE_LEVEL_HIGH>;
+		};
+
+		sdmmc-ecc@ff8c2c00 {
+			compatible = "altr,socfpga-sdmmc-ecc";
+			reg = <0xff8c2c00 0x400>;
+			altr,ecc-parent = <&mmc>;
+			interrupts = <15 IRQ_TYPE_LEVEL_HIGH>,
+				     <47 IRQ_TYPE_LEVEL_HIGH>,
+				     <16 IRQ_TYPE_LEVEL_HIGH>,
+				     <48 IRQ_TYPE_LEVEL_HIGH>;
+		};
 	};
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index e774128..ef5fbe9 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -25,6 +25,12 @@
 - always-on : a boolean property. If present, the timer is powered through an
   always-on power domain, therefore it never loses context.
 
+- fsl,erratum-a008585 : A boolean property. Indicates the presence of
+  QorIQ erratum A-008585, which says that reading the counter is
+  unreliable unless the same value is returned by back-to-back reads.
+  This also affects writes to the tval register, due to the implicit
+  counter read.
+
 ** Optional properties:
 
 - arm,cpu-registers-not-fw-configured : Firmware does not initialize
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
index 936166f..cb0054a 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apmixedsys.txt
@@ -5,7 +5,8 @@
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt2701-apmixedsys"
 	- "mediatek,mt8135-apmixedsys"
 	- "mediatek,mt8173-apmixedsys"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt
new file mode 100644
index 0000000..4137196
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,bdpsys.txt
@@ -0,0 +1,22 @@
+Mediatek bdpsys controller
+============================
+
+The Mediatek bdpsys controller provides various clocks to the system.
+
+Required Properties:
+
+- compatible: Should be:
+	- "mediatek,mt2701-bdpsys", "syscon"
+- #clock-cells: Must be 1
+
+The bdpsys controller uses the common clk binding from
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+The available clocks are defined in dt-bindings/clock/mt*-clk.h.
+
+Example:
+
+bdpsys: clock-controller@1c000000 {
+	compatible = "mediatek,mt2701-bdpsys", "syscon";
+	reg = <0 0x1c000000 0 0x1000>;
+	#clock-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt
new file mode 100644
index 0000000..768f3a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,ethsys.txt
@@ -0,0 +1,22 @@
+Mediatek ethsys controller
+============================
+
+The Mediatek ethsys controller provides various clocks to the system.
+
+Required Properties:
+
+- compatible: Should be:
+	- "mediatek,mt2701-ethsys", "syscon"
+- #clock-cells: Must be 1
+
+The ethsys controller uses the common clk binding from
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+The available clocks are defined in dt-bindings/clock/mt*-clk.h.
+
+Example:
+
+ethsys: clock-controller@1b000000 {
+	compatible = "mediatek,mt2701-ethsys", "syscon";
+	reg = <0 0x1b000000 0 0x1000>;
+	#clock-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt
new file mode 100644
index 0000000..beed7b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,hifsys.txt
@@ -0,0 +1,24 @@
+Mediatek hifsys controller
+============================
+
+The Mediatek hifsys controller provides various clocks and reset
+outputs to the system.
+
+Required Properties:
+
+- compatible: Should be:
+	- "mediatek,mt2701-hifsys", "syscon"
+- #clock-cells: Must be 1
+
+The hifsys controller uses the common clk binding from
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+The available clocks are defined in dt-bindings/clock/mt*-clk.h.
+
+Example:
+
+hifsys: clock-controller@1a000000 {
+	compatible = "mediatek,mt2701-hifsys", "syscon";
+	reg = <0 0x1a000000 0 0x1000>;
+	#clock-cells = <1>;
+	#reset-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
index b1f2ce1..f6a9166 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,imgsys.txt
@@ -5,7 +5,8 @@
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt2701-imgsys", "syscon"
 	- "mediatek,mt8173-imgsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
index aaf8d14..1620ec2 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,infracfg.txt
@@ -6,7 +6,8 @@
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt2701-infracfg", "syscon"
 	- "mediatek,mt8135-infracfg", "syscon"
 	- "mediatek,mt8173-infracfg", "syscon"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
index 4385946e..67dd2e4 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mmsys.txt
@@ -5,7 +5,8 @@
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt2701-mmsys", "syscon"
 	- "mediatek,mt8173-mmsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
index 2f6ff86..e494366 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
@@ -6,7 +6,8 @@
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt2701-pericfg", "syscon"
 	- "mediatek,mt8135-pericfg", "syscon"
 	- "mediatek,mt8173-pericfg", "syscon"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
index f9e9179..9f2fe78 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,topckgen.txt
@@ -5,7 +5,8 @@
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt2701-topckgen"
 	- "mediatek,mt8135-topckgen"
 	- "mediatek,mt8173-topckgen"
 - #clock-cells: Must be 1
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
index 1faacf1..2440f73 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,vdecsys.txt
@@ -5,7 +5,8 @@
 
 Required Properties:
 
-- compatible: Should be:
+- compatible: Should be one of:
+	- "mediatek,mt2701-vdecsys", "syscon"
 	- "mediatek,mt8173-vdecsys", "syscon"
 - #clock-cells: Must be 1
 
diff --git a/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt b/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt
new file mode 100644
index 0000000..a55d31b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/amlogic,gxbb-aoclkc.txt
@@ -0,0 +1,45 @@
+* Amlogic GXBB AO Clock and Reset Unit
+
+The Amlogic GXBB AO clock controller generates and supplies clock to various
+controllers within the Always-On part of the SoC.
+
+Required Properties:
+
+- compatible: should be "amlogic,gxbb-aoclkc"
+- reg: physical base address of the clock controller and length of memory
+       mapped region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/gxbb-aoclkc.h header and can be
+used in device tree sources.
+
+- #reset-cells: should be 1.
+
+Each reset is assigned an identifier and client nodes can use this identifier
+to specify the reset which they consume. All available resets are defined as
+preprocessor macros in the dt-bindings/reset/gxbb-aoclkc.h header and can be
+used in device tree sources.
+
+Example: AO Clock controller node:
+
+	clkc_AO: clock-controller@040 {
+		compatible = "amlogic,gxbb-aoclkc";
+		reg = <0x0 0x040 0x0 0x4>;
+		#clock-cells = <1>;
+		#reset-cells = <1>;
+	};
+
+Example: UART controller node that consumes the clock and reset generated
+  by the clock controller:
+
+	uart_AO: serial@4c0 {
+		compatible = "amlogic,meson-uart";
+		reg = <0x4c0 0x14>;
+		interrupts = <0 90 1>;
+		clocks = <&clkc_AO CLKID_AO_UART1>;
+		resets = <&clkc_AO RESET_AO_UART1>;
+		status = "disabled";
+	};
diff --git a/Documentation/devicetree/bindings/clock/arm-syscon-icst.txt b/Documentation/devicetree/bindings/clock/arm-syscon-icst.txt
index 8b7177c..2746811 100644
--- a/Documentation/devicetree/bindings/clock/arm-syscon-icst.txt
+++ b/Documentation/devicetree/bindings/clock/arm-syscon-icst.txt
@@ -5,20 +5,50 @@
 reference designs by adding special control registers that manage such
 oscillators to their system controllers.
 
-The ARM system controller contains logic to serialize and initialize
+The various ARM system controllers contain logic to serialize and initialize
 an ICST clock request after a write to the 32 bit register at an offset
 into the system controller. Furthermore, to even be able to alter one of
 these frequencies, the system controller must first be unlocked by
 writing a special token to another offset in the system controller.
 
+Some ARM hardware contain special versions of the serial interface that only
+connects the low 8 bits of the VDW (missing one bit), hardwires RDW to
+different values and sometimes also hardwire the output divider. They
+therefore have special compatible strings as per this table (the OD value is
+the value on the pins, not the resulting output divider):
+
+Hardware variant:        RDW     OD          VDW
+
+Integrator/AP            22      1           Bit 8 0, rest variable
+integratorap-cm
+
+Integrator/AP            46      3           Bit 8 0, rest variable
+integratorap-sys
+
+Integrator/AP            22 or   1           17 or (33 or 25 MHz)
+integratorap-pci         14      1           14
+
+Integrator/CP            22      variable    Bit 8 0, rest variable
+integratorcp-cm-core
+
+Integrator/CP            22      variable    Bit 8 0, rest variable
+integratorcp-cm-mem
+
 The ICST oscillator must be provided inside a system controller node.
 
 Required properties:
+- compatible: must be one of
+  "arm,syscon-icst525"
+  "arm,syscon-icst307"
+  "arm,syscon-icst525-integratorap-cm"
+  "arm,syscon-icst525-integratorap-sys"
+  "arm,syscon-icst525-integratorap-pci"
+  "arm,syscon-icst525-integratorcp-cm-core"
+  "arm,syscon-icst525-integratorcp-cm-mem"
 - lock-offset: the offset address into the system controller where the
   unlocking register is located
 - vco-offset: the offset address into the system controller where the
   ICST control register is located (even 32 bit address)
-- compatible: must be one of "arm,syscon-icst525" or "arm,syscon-icst307"
 - #clock-cells: must be <0>
 - clocks: parent clock, since the ICST needs a parent clock to derive its
   frequency from, this attribute is compulsory.
diff --git a/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt b/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt
new file mode 100644
index 0000000..1e3370b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/armada3700-periph-clock.txt
@@ -0,0 +1,70 @@
+* Peripheral Clock bindings for Marvell Armada 37xx SoCs
+
+Marvell Armada 37xx SoCs provide peripheral clocks which are
+used as clock source for the peripheral of the SoC.
+
+There are two different blocks associated to north bridge and south
+bridge.
+
+The peripheral clock consumer should specify the desired clock by
+having the clock ID in its "clocks" phandle cell.
+
+The following is a list of provided IDs for Armada 370 North bridge clocks:
+ID	Clock name	Description
+-----------------------------------
+0	mmc		MMC controller
+1	sata_host	Sata Host
+2	sec_at		Security AT
+3	sac_dap		Security DAP
+4	tsecm		Security Engine
+5	setm_tmx	Serial Embedded Trace Module
+6	avs		Adaptive Voltage Scaling
+7	sqf		SPI
+8	pwm		PWM
+9	i2c_2		I2C 2
+10	i2c_1		I2C 1
+11	ddr_phy		DDR PHY
+12	ddr_fclk	DDR F clock
+13	trace		Trace
+14	counter		Counter
+15	eip97		EIP 97
+16	cpu		CPU
+
+The following is a list of provided IDs for Armada 370 South bridge clocks:
+ID	Clock name	Description
+-----------------------------------
+0	gbe-50		50 MHz parent clock for Gigabit Ethernet
+1	gbe-core	parent clock for Gigabit Ethernet core
+2	gbe-125		125 MHz parent clock for Gigabit Ethernet
+3	gbe1-50		50 MHz clock for Gigabit Ethernet port 1
+4	gbe0-50		50 MHz clock for Gigabit Ethernet port 0
+5	gbe1-125	125 MHz clock for Gigabit Ethernet port 1
+6	gbe0-125	125 MHz clock for Gigabit Ethernet port 0
+7	gbe1-core	Gigabit Ethernet core port 1
+8	gbe0-core	Gigabit Ethernet core port 0
+9	gbe-bm		Gigabit Ethernet Buffer Manager
+10	sdio		SDIO
+11	usb32-sub2-sys	USB 2 clock
+12	usb32-ss-sys	USB 3 clock
+
+Required properties:
+
+- compatible : shall be "marvell,armada-3700-periph-clock-nb" for the
+  north bridge block, or
+  "marvell,armada-3700-periph-clock-sb" for the south bridge block
+- reg : must be the register address of North/South Bridge Clock register
+- #clock-cells : from common clock binding; shall be set to 1
+
+- clocks : list of the parent clock phandle in the following order:
+  TBG-A P, TBG-B P, TBG-A S, TBG-B S and finally the xtal clock.
+
+
+Example:
+
+nb_perih_clk: nb-periph-clk@13000{
+	compatible = "marvell,armada-3700-periph-clock-nb";
+	reg = <0x13000 0x1000>;
+	clocks = <&tbg 0>, <&tbg 1>, <&tbg 2>,
+	<&tbg 3>, <&xtalclk>;
+	#clock-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/clock/armada3700-tbg-clock.txt b/Documentation/devicetree/bindings/clock/armada3700-tbg-clock.txt
new file mode 100644
index 0000000..0ba1d83
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/armada3700-tbg-clock.txt
@@ -0,0 +1,27 @@
+* Time Base Generator Clock bindings for Marvell Armada 37xx SoCs
+
+Marvell Armada 37xx SoCs provde Time Base Generator clocks which are
+used as parent clocks for the peripheral clocks.
+
+The TBG clock consumer should specify the desired clock by having the
+clock ID in its "clocks" phandle cell.
+
+The following is a list of provided IDs and clock names on Armada 3700:
+ 0 = TBG A P
+ 1 = TBG B P
+ 2 = TBG A S
+ 3 = TBG B S
+
+Required properties:
+- compatible : shall be "marvell,armada-3700-tbg-clock"
+- reg : must be the register address of North Bridge PLL register
+- #clock-cells : from common clock binding; shall be set to 1
+
+Example:
+
+tbg: tbg@13200 {
+	compatible = "marvell,armada-3700-tbg-clock";
+	reg = <0x13200 0x1000>;
+	clocks = <&xtalclk>;
+	#clock-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt b/Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt
new file mode 100644
index 0000000..a88f1f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/armada3700-xtal-clock.txt
@@ -0,0 +1,28 @@
+* Xtal Clock bindings for Marvell Armada 37xx SoCs
+
+Marvell Armada 37xx SoCs allow to determine the xtal clock frequencies by
+reading the gpio latch register.
+
+This node must be a subnode of the node exposing the register address
+of the GPIO block where the gpio latch is located.
+
+Required properties:
+- compatible : shall be one of the following:
+	"marvell,armada-3700-xtal-clock"
+- #clock-cells : from common clock binding; shall be set to 0
+
+Optional properties:
+- clock-output-names : from common clock binding; allows overwrite default clock
+	output names ("xtal")
+
+Example:
+gpio1: gpio@13800 {
+	compatible = "marvell,armada-3700-gpio", "syscon", "simple-mfd";
+	reg = <0x13800 0x1000>;
+
+	xtalclk: xtal-clk {
+		compatible = "marvell,armada-3700-xtal-clock";
+		clock-output-names = "xtal";
+		#clock-cells = <0>;
+	};
+};
diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt
index 181bc8a..5f3ad65d 100644
--- a/Documentation/devicetree/bindings/clock/at91-clock.txt
+++ b/Documentation/devicetree/bindings/clock/at91-clock.txt
@@ -6,7 +6,8 @@
 
 Required properties:
 - compatible : shall be one of the following:
-	"atmel,at91sam9x5-sckc":
+	"atmel,at91sam9x5-sckc" or
+	"atmel,sama5d4-sckc":
 		at91 SCKC (Slow Clock Controller)
 		This node contains the slow clock definitions.
 
diff --git a/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.txt b/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.txt
new file mode 100644
index 0000000..2ebb107
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,bcm53573-ilp.txt
@@ -0,0 +1,36 @@
+Broadcom BCM53573 ILP clock
+===========================
+
+This binding uses the common clock binding:
+    Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+This binding is used for ILP clock (sometimes referred as "slow clock")
+on Broadcom BCM53573 devices using Cortex-A7 CPU.
+
+ILP's rate has to be calculated on runtime and it depends on ALP clock
+which has to be referenced.
+
+This clock is part of PMU (Power Management Unit), a Broadcom's device
+handing power-related aspects. Its node must be sub-node of the PMU
+device.
+
+Required properties:
+- compatible: "brcm,bcm53573-ilp"
+- clocks: has to reference an ALP clock
+- #clock-cells: should be <0>
+- clock-output-names: from common clock bindings, should contain clock
+		      name
+
+Example:
+
+pmu@18012000 {
+	compatible = "simple-mfd", "syscon";
+	reg = <0x18012000 0x00001000>;
+
+	ilp {
+		compatible = "brcm,bcm53573-ilp";
+		clocks = <&alp>;
+		#clock-cells = <0>;
+		clock-output-names = "ilp";
+	};
+};
diff --git a/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt b/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
index 180e883..0c3d601 100644
--- a/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
+++ b/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
@@ -10,6 +10,8 @@
   - "samsung,exynos4210-audss-clock" - controller compatible with all Exynos4 SoCs.
   - "samsung,exynos5250-audss-clock" - controller compatible with Exynos5250
     SoCs.
+  - "samsung,exynos5410-audss-clock" - controller compatible with Exynos5410
+    SoCs.
   - "samsung,exynos5420-audss-clock" - controller compatible with Exynos5420
     SoCs.
 - reg: physical base address and length of the controller's register set.
@@ -91,5 +93,5 @@
 		<&clock_audss EXYNOS_MOUT_AUDSS>,
 		<&clock_audss EXYNOS_MOUT_I2S>;
 	clock-names = "iis", "i2s_opclk0", "i2s_opclk1",
-	"mout_audss", "mout_i2s";
+		      "mout_audss", "mout_i2s";
 };
diff --git a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
index aeab635..4527de3 100644
--- a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
+++ b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
@@ -12,24 +12,29 @@
 
 - #clock-cells: should be 1.
 
+- clocks: should contain an entry specifying the root clock from external
+  oscillator supplied through XXTI or XusbXTI pin.  This clock should be
+  defined using standard clock bindings with "fin_pll" clock-output-name.
+  That clock is being passed internally to the 9 PLLs.
+
 All available clocks are defined as preprocessor macros in
 dt-bindings/clock/exynos5410.h header and can be used in device
 tree sources.
 
-External clock:
-
-There is clock that is generated outside the SoC. It
-is expected that it is defined using standard clock bindings
-with following clock-output-name:
-
- - "fin_pll" - PLL input clock from XXTI
-
 Example 1: An example of a clock controller node is listed below.
 
+	fin_pll: xxti {
+		compatible = "fixed-clock";
+		clock-frequency = <24000000>;
+		clock-output-names = "fin_pll";
+		#clock-cells = <0>;
+	};
+
 	clock: clock-controller@0x10010000 {
 		compatible = "samsung,exynos5410-clock";
 		reg = <0x10010000 0x30000>;
 		#clock-cells = <1>;
+		clocks = <&fin_pll>;
 	};
 
 Example 2: UART controller node that consumes the clock generated by the clock
diff --git a/Documentation/devicetree/bindings/clock/maxim,max77686.txt b/Documentation/devicetree/bindings/clock/maxim,max77686.txt
index 9c40739..8398a3a 100644
--- a/Documentation/devicetree/bindings/clock/maxim,max77686.txt
+++ b/Documentation/devicetree/bindings/clock/maxim,max77686.txt
@@ -1,10 +1,24 @@
-Binding for Maxim MAX77686 32k clock generator block
+Binding for Maxim MAX77686/MAX77802/MAX77620 32k clock generator block
 
-This is a part of device tree bindings of MAX77686 multi-function device.
-More information can be found in bindings/mfd/max77686.txt file.
+This is a part of device tree bindings of MAX77686/MAX77802/MAX77620
+multi-function device. More information can be found in MFD DT binding
+doc as follows:
+	bindings/mfd/max77686.txt for MAX77686 and
+	bindings/mfd/max77802.txt for MAX77802 and
+	bindings/mfd/max77620.txt for MAX77620.
 
 The MAX77686 contains three 32.768khz clock outputs that can be controlled
-(gated/ungated) over I2C.
+(gated/ungated) over I2C. Clocks are defined as preprocessor macros in
+dt-bindings/clock/maxim,max77686.h.
+
+
+The MAX77802 contains two 32.768khz clock outputs that can be controlled
+(gated/ungated) over I2C. Clocks are defined as preprocessor macros in
+dt-bindings/clock/maxim,max77802.h.
+
+The MAX77686 contains one 32.768khz clock outputs that can be controlled
+(gated/ungated) over I2C. Clocks are defined as preprocessor macros in
+dt-bindings/clock/maxim,max77620.h.
 
 Following properties should be presend in main device node of the MFD chip.
 
@@ -17,30 +31,84 @@
 
 Each clock is assigned an identifier and client nodes can use this identifier
 to specify the clock which they consume. Following indices are allowed:
-    - 0: 32khz_ap clock,
-    - 1: 32khz_cp clock,
-    - 2: 32khz_pmic clock.
+    - 0: 32khz_ap clock (max77686, max77802), 32khz_out0 (max77620)
+    - 1: 32khz_cp clock (max77686, max77802),
+    - 2: 32khz_pmic clock (max77686).
 
-Clocks are defined as preprocessor macros in dt-bindings/clock/maxim,max77686.h
-header and can be used in device tree sources.
+Clocks are defined as preprocessor macros in above dt-binding header for
+respective chips.
 
-Example: Node of the MFD chip
+Example:
 
-	max77686: max77686@09 {
-		compatible = "maxim,max77686";
-		interrupt-parent = <&wakeup_eint>;
-		interrupts = <26 0>;
-		reg = <0x09>;
-		#clock-cells = <1>;
+1. With MAX77686:
 
-		/* ... */
-	};
+#include <dt-bindings/clock/maxim,max77686.h>
+/* ... */
 
-Example: Clock consumer node
+	Node of the MFD chip
+		max77686: max77686@09 {
+			compatible = "maxim,max77686";
+			interrupt-parent = <&wakeup_eint>;
+			interrupts = <26 0>;
+			reg = <0x09>;
+			#clock-cells = <1>;
 
-	foo@0 {
-		compatible = "bar,foo";
-		/* ... */
-		clock-names = "my-clock";
-		clocks = <&max77686 MAX77686_CLK_PMIC>;
-	};
+			/* ... */
+		};
+
+	Clock consumer node
+
+		foo@0 {
+			compatible = "bar,foo";
+			/* ... */
+			clock-names = "my-clock";
+			clocks = <&max77686 MAX77686_CLK_PMIC>;
+		};
+
+2. With MAX77802:
+
+#include <dt-bindings/clock/maxim,max77802.h>
+/* ... */
+
+	Node of the MFD chip
+		max77802: max77802@09 {
+			compatible = "maxim,max77802";
+			interrupt-parent = <&wakeup_eint>;
+			interrupts = <26 0>;
+			reg = <0x09>;
+			#clock-cells = <1>;
+
+			/* ... */
+		};
+
+	Clock consumer node
+
+		foo@0 {
+			compatible = "bar,foo";
+			/* ... */
+			clock-names = "my-clock";
+			clocks = <&max77802 MAX77802_CLK_32K_AP>;
+		};
+
+
+3. With MAX77620:
+
+#include <dt-bindings/clock/maxim,max77620.h>
+/* ... */
+
+	Node of the MFD chip
+		max77620: max77620@3c {
+			compatible = "maxim,max77620";
+			reg = <0x3c>;
+			#clock-cells = <1>;
+			/* ... */
+		};
+
+	Clock consumer node
+
+		foo@0 {
+			compatible = "bar,foo";
+			/* ... */
+			clock-names = "my-clock";
+			clocks = <&max77620 MAX77620_CLK_32K_OUT0>;
+		};
diff --git a/Documentation/devicetree/bindings/clock/maxim,max77802.txt b/Documentation/devicetree/bindings/clock/maxim,max77802.txt
deleted file mode 100644
index c6dc783..0000000
--- a/Documentation/devicetree/bindings/clock/maxim,max77802.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Binding for Maxim MAX77802 32k clock generator block
-
-This is a part of device tree bindings of MAX77802 multi-function device.
-More information can be found in bindings/mfd/max77802.txt file.
-
-The MAX77802 contains two 32.768khz clock outputs that can be controlled
-(gated/ungated) over I2C.
-
-Following properties should be present in main device node of the MFD chip.
-
-Required properties:
-- #clock-cells: From common clock binding; shall be set to 1.
-
-Optional properties:
-- clock-output-names: From common clock binding.
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Following indices are allowed:
-     - 0: 32khz_ap clock,
-     - 1: 32khz_cp clock.
-
-Clocks are defined as preprocessor macros in dt-bindings/clock/maxim,max77802.h
-header and can be used in device tree sources.
-
-Example: Node of the MFD chip
-
-	max77802: max77802@09 {
-		compatible = "maxim,max77802";
-		interrupt-parent = <&wakeup_eint>;
-		interrupts = <26 0>;
-		reg = <0x09>;
-		#clock-cells = <1>;
-
-		/* ... */
-	};
-
-Example: Clock consumer node
-
-	foo@0 {
-		compatible = "bar,foo";
-		/* ... */
-		clock-names = "my-clock";
-		clocks = <&max77802 MAX77802_CLK_32K_AP>;
-	};
diff --git a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
index 660e649..cb8542d 100644
--- a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
+++ b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
@@ -86,6 +86,8 @@
 7	pex3		PCIe 3
 8	pex0		PCIe 0
 9	usb3h0		USB3 Host 0
+10	usb3h1		USB3 Host 1
+15	sata0		SATA 0
 17	sdio		SDIO
 22	xor0		XOR 0
 28	xor1		XOR 1
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
index 9a60fde..869a2f0 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
@@ -15,6 +15,7 @@
 			"qcom,gcc-msm8974pro"
 			"qcom,gcc-msm8974pro-ac"
 			"qcom,gcc-msm8996"
+			"qcom,gcc-mdm9615"
 
 - reg : shall contain base register location and length
 - #clock-cells : shall contain 1
diff --git a/Documentation/devicetree/bindings/clock/qcom,lcc.txt b/Documentation/devicetree/bindings/clock/qcom,lcc.txt
index dd755be..a3c78aa 100644
--- a/Documentation/devicetree/bindings/clock/qcom,lcc.txt
+++ b/Documentation/devicetree/bindings/clock/qcom,lcc.txt
@@ -7,6 +7,7 @@
 			"qcom,lcc-msm8960"
 			"qcom,lcc-apq8064"
 			"qcom,lcc-ipq8064"
+			"qcom,lcc-mdm9615"
 
 - reg : shall contain base register location and length
 - #clock-cells : shall contain 1
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-divmux.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-divmux.txt
deleted file mode 100644
index 6247652..0000000
--- a/Documentation/devicetree/bindings/clock/st/st,clkgen-divmux.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Binding for a ST divider and multiplexer clock driver.
-
-This binding uses the common clock binding[1].
-Base address is located to the parent node. See clock binding[2]
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/clock/st/st,clkgen.txt
-
-Required properties:
-
-- compatible : shall be:
-	"st,clkgena-divmux-c65-hs",	"st,clkgena-divmux"
-	"st,clkgena-divmux-c65-ls",	"st,clkgena-divmux"
-	"st,clkgena-divmux-c32-odf0",	"st,clkgena-divmux"
-	"st,clkgena-divmux-c32-odf1",	"st,clkgena-divmux"
-	"st,clkgena-divmux-c32-odf2",	"st,clkgena-divmux"
-	"st,clkgena-divmux-c32-odf3",	"st,clkgena-divmux"
-
-- #clock-cells : From common clock binding; shall be set to 1.
-
-- clocks : From common clock binding
-
-- clock-output-names : From common clock binding.
-
-Example:
-
-	clockgen-a@fd345000 {
-		reg = <0xfd345000 0xb50>;
-
-		clk_m_a1_div1: clk-m-a1-div1 {
-			#clock-cells = <1>;
-			compatible = "st,clkgena-divmux-c32-odf1",
-				     "st,clkgena-divmux";
-
-			clocks = <&clk_m_a1_osc_prediv>,
-				 <&clk_m_a1_pll0 1>, /* PLL0 PHI1 */
-				 <&clk_m_a1_pll1 1>; /* PLL1 PHI1 */
-
-			clock-output-names = "clk-m-rx-icn-ts",
-					     "clk-m-rx-icn-vdp-0",
-					     "", /* unused */
-					     "clk-m-prv-t1-bus",
-					     "clk-m-icn-reg-12",
-					     "clk-m-icn-reg-10",
-					     "", /* unused */
-					     "clk-m-icn-st231";
-		};
-	};
-
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-mux.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-mux.txt
index f1fa91c..9a46cb1 100644
--- a/Documentation/devicetree/bindings/clock/st/st,clkgen-mux.txt
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen-mux.txt
@@ -10,14 +10,7 @@
 Required properties:
 
 - compatible : shall be:
-	"st,stih416-clkgenc-vcc-hd",	"st,clkgen-mux"
-	"st,stih416-clkgenf-vcc-fvdp",	"st,clkgen-mux"
-	"st,stih416-clkgenf-vcc-hva", 	"st,clkgen-mux"
-	"st,stih416-clkgenf-vcc-hd",	"st,clkgen-mux"
-	"st,stih416-clkgenf-vcc-sd",	"st,clkgen-mux"
-	"st,stih415-clkgen-a9-mux",	"st,clkgen-mux"
-	"st,stih416-clkgen-a9-mux",	"st,clkgen-mux"
-	"st,stih407-clkgen-a9-mux",	"st,clkgen-mux"
+	"st,stih407-clkgen-a9-mux"
 
 - #clock-cells : from common clock binding; shall be set to 0.
 
@@ -27,10 +20,13 @@
 
 Example:
 
-	clk_m_hva: clk-m-hva@fd690868 {
+	clk_m_a9: clk-m-a9@92b0000 {
 		#clock-cells = <0>;
-		compatible = "st,stih416-clkgenf-vcc-hva", "st,clkgen-mux";
-		reg = <0xfd690868 4>;
+		compatible = "st,stih407-clkgen-a9-mux";
+		reg = <0x92b0000 0x10000>;
 
-		clocks = <&clockgen_f 1>, <&clk_m_a1_div0 3>;
+		clocks = <&clockgen_a9_pll 0>,
+			 <&clockgen_a9_pll 0>,
+			 <&clk_s_c0_flexgen 13>,
+			 <&clk_m_a9_ext2f_div2>;
 	};
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt
index 844b3a0..f207053 100644
--- a/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt
@@ -9,24 +9,10 @@
 Required properties:
 
 - compatible : shall be:
-	"st,clkgena-prediv-c65",	"st,clkgena-prediv"
-	"st,clkgena-prediv-c32",	"st,clkgena-prediv"
-
-	"st,clkgena-plls-c65"
-	"st,plls-c32-a1x-0",		"st,clkgen-plls-c32"
-	"st,plls-c32-a1x-1",		"st,clkgen-plls-c32"
-	"st,stih415-plls-c32-a9",	"st,clkgen-plls-c32"
-	"st,stih415-plls-c32-ddr",	"st,clkgen-plls-c32"
-	"st,stih416-plls-c32-a9",	"st,clkgen-plls-c32"
-	"st,stih416-plls-c32-ddr",	"st,clkgen-plls-c32"
-	"st,stih407-plls-c32-a0",	"st,clkgen-plls-c32"
-	"st,stih407-plls-c32-a9",	"st,clkgen-plls-c32"
-	"sst,plls-c32-cx_0",		"st,clkgen-plls-c32"
-	"sst,plls-c32-cx_1",		"st,clkgen-plls-c32"
-	"st,stih418-plls-c28-a9",	"st,clkgen-plls-c32"
-
-	"st,stih415-gpu-pll-c32",	"st,clkgengpu-pll-c32"
-	"st,stih416-gpu-pll-c32",	"st,clkgengpu-pll-c32"
+	"st,clkgen-pll0"
+	"st,clkgen-pll1"
+	"st,stih407-clkgen-plla9"
+	"st,stih418-clkgen-plla9"
 
 - #clock-cells : From common clock binding; shall be set to 1.
 
@@ -36,17 +22,16 @@
 
 Example:
 
-	clockgen-a@fee62000 {
-		reg = <0xfee62000 0xb48>;
+	clockgen-a9@92b0000 {
+		compatible = "st,clkgen-c32";
+		reg = <0x92b0000 0xffff>;
 
-		clk_s_a0_pll: clk-s-a0-pll {
+		clockgen_a9_pll: clockgen-a9-pll {
 			#clock-cells = <1>;
-			compatible = "st,clkgena-plls-c65";
+			compatible = "st,stih407-clkgen-plla9";
 
 			clocks = <&clk_sysin>;
 
-			clock-output-names = "clk-s-a0-pll0-hs",
-					     "clk-s-a0-pll0-ls",
-					     "clk-s-a0-pll1";
+			clock-output-names = "clockgen-a9-pll-odf";
 		};
 	};
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-prediv.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-prediv.txt
deleted file mode 100644
index 604766c..0000000
--- a/Documentation/devicetree/bindings/clock/st/st,clkgen-prediv.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Binding for a ST pre-divider clock driver.
-
-This binding uses the common clock binding[1].
-Base address is located to the parent node. See clock binding[2]
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/clock/st/st,clkgen.txt
-
-Required properties:
-
-- compatible : shall be:
-	"st,clkgena-prediv-c65",	"st,clkgena-prediv"
-	"st,clkgena-prediv-c32",	"st,clkgena-prediv"
-
-- #clock-cells : From common clock binding; shall be set to 0.
-
-- clocks : From common clock binding
-
-- clock-output-names : From common clock binding.
-
-Example:
-
-	clockgen-a@fd345000 {
-		reg = <0xfd345000 0xb50>;
-
-		clk_m_a2_osc_prediv: clk-m-a2-osc-prediv {
-			#clock-cells = <0>;
-			compatible = "st,clkgena-prediv-c32",
-				     "st,clkgena-prediv";
-
-			clocks = <&clk_sysin>;
-
-			clock-output-names = "clk-m-a2-osc-prediv";
-		};
-	};
-
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-vcc.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-vcc.txt
deleted file mode 100644
index 109b3ed..0000000
--- a/Documentation/devicetree/bindings/clock/st/st,clkgen-vcc.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-Binding for a type of STMicroelectronics clock crossbar (VCC).
-
-The crossbar can take up to 4 input clocks and control up to 16
-output clocks. Not all inputs or outputs have to be in use in a
-particular instantiation. Each output can be individually enabled,
-select any of the input clocks and apply a divide (by 1,2,4 or 8) to
-that selected clock.
-
-This binding uses the common clock binding[1].
-
-[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-
-Required properties:
-
-- compatible : shall be:
-	"st,stih416-clkgenc",		"st,vcc"
-	"st,stih416-clkgenf",		"st,vcc"
-
-- #clock-cells : from common clock binding; shall be set to 1.
-
-- reg : A Base address and length of the register set.
-
-- clocks : from common clock binding
-
-- clock-output-names : From common clock binding. The block has 16
-                       clock outputs but not all of them in a specific instance
-                       have to be used in the SoC. If a clock name is left as
-                       an empty string then no clock will be created for the
-                       output associated with that string index. If fewer than
-                       16 strings are provided then no clocks will be created
-                       for the remaining outputs.
-
-Example:
-
-	clockgen_c_vcc: clockgen-c-vcc@0xfe8308ac {
-		#clock-cells = <1>;
-		compatible = "st,stih416-clkgenc", "st,clkgen-vcc";
-		reg = <0xfe8308ac 12>;
-
-		clocks = <&clk_s_vcc_hd>,
-			 <&clockgen_c 1>,
-			 <&clk_s_tmds_fromphy>,
-			 <&clockgen_c 2>;
-
-		clock-output-names  = "clk-s-pix-hdmi",
-				      "clk-s-pix-dvo",
-				      "clk-s-out-dvo",
-				      "clk-s-pix-hd",
-				      "clk-s-hddac",
-				      "clk-s-denc",
-				      "clk-s-sddac",
-				      "clk-s-pix-main",
-				      "clk-s-pix-aux",
-				      "clk-s-stfe-frc-0",
-				      "clk-s-ref-mcru",
-				      "clk-s-slave-mcru",
-				      "clk-s-tmds-hdmi",
-				      "clk-s-hdmi-reject-pll",
-				      "clk-s-thsens";
-	};
-
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen.txt
index b18bf86..c35390f 100644
--- a/Documentation/devicetree/bindings/clock/st/st,clkgen.txt
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen.txt
@@ -13,14 +13,6 @@
 			...
 		};
 
-		prediv_node {
-			...
-		};
-
-		divmux_node {
-			...
-		};
-
 		quadfs_node {
 			...
 		};
@@ -29,10 +21,6 @@
 			...
 		};
 
-		vcc_node {
-			...
-		};
-
 		flexgen_node {
 			...
 		};
@@ -43,11 +31,8 @@
 Each subnode should use the binding described in [2]..[7]
 
 [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
-[2] Documentation/devicetree/bindings/clock/st,clkgen-divmux.txt
 [3] Documentation/devicetree/bindings/clock/st,clkgen-mux.txt
 [4] Documentation/devicetree/bindings/clock/st,clkgen-pll.txt
-[5] Documentation/devicetree/bindings/clock/st,clkgen-prediv.txt
-[6] Documentation/devicetree/bindings/clock/st,vcc.txt
 [7] Documentation/devicetree/bindings/clock/st,quadfs.txt
 [8] Documentation/devicetree/bindings/clock/st,flexgen.txt
 
@@ -57,44 +42,27 @@
 
 Example:
 
-	clockgen-a@fee62000 {
-
-		reg = <0xfee62000 0xb48>;
+	clockgen-a@090ff000 {
+		compatible = "st,clkgen-c32";
+		reg = <0x90ff000 0x1000>;
 
 		clk_s_a0_pll: clk-s-a0-pll {
 			#clock-cells = <1>;
-			compatible = "st,clkgena-plls-c65";
-
-			clocks = <&clk-sysin>;
-
-			clock-output-names = "clk-s-a0-pll0-hs",
-					     "clk-s-a0-pll0-ls",
-					     "clk-s-a0-pll1";
-		};
-
-		clk_s_a0_osc_prediv: clk-s-a0-osc-prediv {
-			#clock-cells = <0>;
-			compatible = "st,clkgena-prediv-c65",
-				     "st,clkgena-prediv";
+			compatible = "st,clkgen-pll0";
 
 			clocks = <&clk_sysin>;
 
-			clock-output-names = "clk-s-a0-osc-prediv";
+			clock-output-names = "clk-s-a0-pll-ofd-0";
 		};
 
-		clk_s_a0_hs: clk-s-a0-hs {
+		clk_s_a0_flexgen: clk-s-a0-flexgen {
+			compatible = "st,flexgen";
+
 			#clock-cells = <1>;
-			compatible = "st,clkgena-divmux-c65-hs",
-				     "st,clkgena-divmux";
 
-			clocks = <&clk-s_a0_osc_prediv>,
-				 <&clk-s_a0_pll 0>, /* pll0 hs */
-				 <&clk-s_a0_pll 2>; /* pll1 */
+			clocks = <&clk_s_a0_pll 0>,
+				 <&clk_sysin>;
 
-			clock-output-names = "clk-s-fdma-0",
-					     "clk-s-fdma-1",
-					     ""; /* clk-s-jit-sense */
-					     /* fourth output unused */
+			clock-output-names = "clk-ic-lmi0";
 		};
 	};
-
diff --git a/Documentation/devicetree/bindings/clock/st/st,flexgen.txt b/Documentation/devicetree/bindings/clock/st/st,flexgen.txt
index b7ee5c7..7ff77fc 100644
--- a/Documentation/devicetree/bindings/clock/st/st,flexgen.txt
+++ b/Documentation/devicetree/bindings/clock/st/st,flexgen.txt
@@ -60,6 +60,10 @@
 Required properties:
 - compatible : shall be:
   "st,flexgen"
+  "st,flexgen-audio", "st,flexgen" (enable clock propagation on parent for
+  audio use case)
+  "st,flexgen-video", "st,flexgen" (enable clock propagation on parent
+					and activate synchronous mode)
 
 - #clock-cells : from common clock binding; shall be set to 1 (multiple clock
   outputs).
diff --git a/Documentation/devicetree/bindings/clock/st/st,quadfs.txt b/Documentation/devicetree/bindings/clock/st/st,quadfs.txt
index cedeb9c..d93d493 100644
--- a/Documentation/devicetree/bindings/clock/st/st,quadfs.txt
+++ b/Documentation/devicetree/bindings/clock/st/st,quadfs.txt
@@ -11,12 +11,8 @@
 
 Required properties:
 - compatible : shall be:
-  "st,stih416-quadfs216",	"st,quadfs"
-  "st,stih416-quadfs432",	"st,quadfs"
-  "st,stih416-quadfs660-E",	"st,quadfs"
-  "st,stih416-quadfs660-F",	"st,quadfs"
-  "st,stih407-quadfs660-C",	"st,quadfs"
-  "st,stih407-quadfs660-D",	"st,quadfs"
+  "st,quadfs"
+  "st,quadfs-pll"
 
 
 - #clock-cells : from common clock binding; shall be set to 1.
@@ -35,14 +31,15 @@
 
 Example:
 
-	clockgen_e: clockgen-e@fd3208bc {
-                #clock-cells = <1>;
-                compatible = "st,stih416-quadfs660-E", "st,quadfs";
-                reg = <0xfd3208bc 0xB0>;
+	clk_s_c0_quadfs: clk-s-c0-quadfs@9103000 {
+		#clock-cells = <1>;
+		compatible = "st,quadfs-pll";
+		reg = <0x9103000 0x1000>;
 
-                clocks = <&clk_sysin>;
-                clock-output-names = "clk-m-pix-mdtp-0",
-				     "clk-m-pix-mdtp-1",
-				     "clk-m-pix-mdtp-2",
-				     "clk-m-mpelpc";
-        };
+		clocks = <&clk_sysin>;
+
+		clock-output-names = "clk-s-c0-fs0-ch0",
+				     "clk-s-c0-fs0-ch1",
+				     "clk-s-c0-fs0-ch2",
+				     "clk-s-c0-fs0-ch3";
+	};
diff --git a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
index cb91507..3868458 100644
--- a/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi-ccu.txt
@@ -2,7 +2,10 @@
 ------------------------------------
 
 Required properties :
-- compatible: must contain one of the following compatible:
+- compatible: must contain one of the following compatibles:
+		- "allwinner,sun6i-a31-ccu"
+		- "allwinner,sun8i-a23-ccu"
+		- "allwinner,sun8i-a33-ccu"
 		- "allwinner,sun8i-h3-ccu"
 
 - reg: Must contain the registers base address and length
diff --git a/Documentation/devicetree/bindings/clock/uniphier-clock.txt b/Documentation/devicetree/bindings/clock/uniphier-clock.txt
new file mode 100644
index 0000000..c7179d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/uniphier-clock.txt
@@ -0,0 +1,134 @@
+UniPhier clock controller
+
+
+System clock
+------------
+
+Required properties:
+- compatible: should be one of the following:
+    "socionext,uniphier-sld3-clock" - for sLD3 SoC.
+    "socionext,uniphier-ld4-clock"  - for LD4 SoC.
+    "socionext,uniphier-pro4-clock" - for Pro4 SoC.
+    "socionext,uniphier-sld8-clock" - for sLD8 SoC.
+    "socionext,uniphier-pro5-clock" - for Pro5 SoC.
+    "socionext,uniphier-pxs2-clock" - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-clock" - for LD11 SoC.
+    "socionext,uniphier-ld20-clock" - for LD20 SoC.
+- #clock-cells: should be 1.
+
+Example:
+
+	sysctrl@61840000 {
+		compatible = "socionext,uniphier-sysctrl",
+			     "simple-mfd", "syscon";
+		reg = <0x61840000 0x4000>;
+
+		clock {
+			compatible = "socionext,uniphier-ld20-clock";
+			#clock-cells = <1>;
+		};
+
+		other nodes ...
+	};
+
+Provided clocks:
+
+ 8: ST DMAC
+12: GIO (Giga bit stream I/O)
+14: USB3 ch0 host
+15: USB3 ch1 host
+16: USB3 ch0 PHY0
+17: USB3 ch0 PHY1
+20: USB3 ch1 PHY0
+21: USB3 ch1 PHY1
+
+
+Media I/O (MIO) clock
+---------------------
+
+Required properties:
+- compatible: should be one of the following:
+    "socionext,uniphier-sld3-mio-clock" - for sLD3 SoC.
+    "socionext,uniphier-ld4-mio-clock"  - for LD4 SoC.
+    "socionext,uniphier-pro4-mio-clock" - for Pro4 SoC.
+    "socionext,uniphier-sld8-mio-clock" - for sLD8 SoC.
+    "socionext,uniphier-pro5-mio-clock" - for Pro5 SoC.
+    "socionext,uniphier-pxs2-mio-clock" - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-mio-clock" - for LD11 SoC.
+    "socionext,uniphier-ld20-mio-clock" - for LD20 SoC.
+- #clock-cells: should be 1.
+
+Example:
+
+	mioctrl@59810000 {
+		compatible = "socionext,uniphier-mioctrl",
+			     "simple-mfd", "syscon";
+		reg = <0x59810000 0x800>;
+
+		clock {
+			compatible = "socionext,uniphier-ld20-mio-clock";
+			#clock-cells = <1>;
+		};
+
+		other nodes ...
+	};
+
+Provided clocks:
+
+ 0: SD ch0 host
+ 1: eMMC host
+ 2: SD ch1 host
+ 7: MIO DMAC
+ 8: USB2 ch0 host
+ 9: USB2 ch1 host
+10: USB2 ch2 host
+11: USB2 ch3 host
+12: USB2 ch0 PHY
+13: USB2 ch1 PHY
+14: USB2 ch2 PHY
+15: USB2 ch3 PHY
+
+
+Peripheral clock
+----------------
+
+Required properties:
+- compatible: should be one of the following:
+    "socionext,uniphier-sld3-peri-clock" - for sLD3 SoC.
+    "socionext,uniphier-ld4-peri-clock"  - for LD4 SoC.
+    "socionext,uniphier-pro4-peri-clock" - for Pro4 SoC.
+    "socionext,uniphier-sld8-peri-clock" - for sLD8 SoC.
+    "socionext,uniphier-pro5-peri-clock" - for Pro5 SoC.
+    "socionext,uniphier-pxs2-peri-clock" - for PXs2/LD6b SoC.
+    "socionext,uniphier-ld11-peri-clock" - for LD11 SoC.
+    "socionext,uniphier-ld20-peri-clock" - for LD20 SoC.
+- #clock-cells: should be 1.
+
+Example:
+
+	perictrl@59820000 {
+		compatible = "socionext,uniphier-perictrl",
+			     "simple-mfd", "syscon";
+		reg = <0x59820000 0x200>;
+
+		clock {
+			compatible = "socionext,uniphier-ld20-peri-clock";
+			#clock-cells = <1>;
+		};
+
+		other nodes ...
+	};
+
+Provided clocks:
+
+ 0: UART ch0
+ 1: UART ch1
+ 2: UART ch2
+ 3: UART ch3
+ 4: I2C ch0
+ 5: I2C ch1
+ 6: I2C ch2
+ 7: I2C ch3
+ 8: I2C ch4
+ 9: I2C ch5
+10: I2C ch6
diff --git a/Documentation/devicetree/bindings/clock/xgene.txt b/Documentation/devicetree/bindings/clock/xgene.txt
index 82f9638..8233e77 100644
--- a/Documentation/devicetree/bindings/clock/xgene.txt
+++ b/Documentation/devicetree/bindings/clock/xgene.txt
@@ -8,6 +8,7 @@
 - compatible : shall be one of the following:
 	"apm,xgene-socpll-clock" - for a X-Gene SoC PLL clock
 	"apm,xgene-pcppll-clock" - for a X-Gene PCP PLL clock
+	"apm,xgene-pmd-clock" - for a X-Gene PMD clock
 	"apm,xgene-device-clock" - for a X-Gene device clock
 	"apm,xgene-socpll-v2-clock" - for a X-Gene SoC PLL v2 clock
 	"apm,xgene-pcppll-v2-clock" - for a X-Gene PCP PLL v2 clock
@@ -22,6 +23,15 @@
 Optional properties for PLL clocks:
 - clock-names : shall be the name of the PLL. If missing, use the device name.
 
+Required properties for PMD clocks:
+- reg : shall be the physical register address for the pmd clock.
+- clocks : shall be the input parent clock phandle for the clock.
+- #clock-cells : shall be set to 1.
+- clock-output-names : shall be the name of the clock referenced by derive
+  clock.
+Optional properties for PLL clocks:
+- clock-names : shall be the name of the clock. If missing, use the device name.
+
 Required properties for device clocks:
 - reg : shall be a list of address and length pairs describing the CSR
          reset and/or the divider. Either may be omitted, but at least
@@ -59,6 +69,14 @@
 		type = <0>;
 	};
 
+	pmd0clk: pmd0clk@7e200200 {
+		compatible = "apm,xgene-pmd-clock";
+		#clock-cells = <1>;
+		clocks = <&pmdpll 0>;
+		reg = <0x0 0x7e200200 0x0 0x10>;
+		clock-output-names = "pmd0clk";
+	};
+
 	socpll: socpll@17000120 {
 		compatible = "apm,xgene-socpll-clock";
 		#clock-cells = <1>;
diff --git a/Documentation/devicetree/bindings/clock/zx296718-clk.txt b/Documentation/devicetree/bindings/clock/zx296718-clk.txt
new file mode 100644
index 0000000..8c18b7b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/zx296718-clk.txt
@@ -0,0 +1,35 @@
+Device Tree Clock bindings for ZTE zx296718
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of the following:
+	"zte,zx296718-topcrm":
+		zx296718 top clock selection, divider and gating
+
+	"zte,zx296718-lsp0crm" and
+	"zte,zx296718-lsp1crm":
+		zx296718 device level clock selection and gating
+
+- reg: Address and length of the register set
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/zx296718-clock.h
+for the full list of zx296718 clock IDs.
+
+
+topclk: topcrm@1461000 {
+        compatible = "zte,zx296718-topcrm-clk";
+        reg = <0x01461000 0x1000>;
+        #clock-cells = <1>;
+};
+
+usbphy0:usb-phy0 {
+	compatible = "zte,zx296718-usb-phy";
+	#phy-cells = <0>;
+	clocks = <&topclk USB20_PHY_CLK>;
+	clock-names = "phyclk";
+	status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
new file mode 100644
index 0000000..f223313
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/rockchip-dfi.txt
@@ -0,0 +1,19 @@
+
+* Rockchip rk3399 DFI device
+
+Required properties:
+- compatible: Must be "rockchip,rk3399-dfi".
+- reg: physical base address of each DFI and length of memory mapped region
+- rockchip,pmu: phandle to the syscon managing the "pmu general register files"
+- clocks: phandles for clock specified in "clock-names" property
+- clock-names : the name of clock used by the DFI, must be "pclk_ddr_mon";
+
+Example:
+	dfi: dfi@0xff630000 {
+		compatible = "rockchip,rk3399-dfi";
+		reg = <0x00 0xff630000 0x00 0x4000>;
+		rockchip,pmu = <&pmugrf>;
+		clocks = <&cru PCLK_DDR_MON>;
+		clock-names = "pclk_ddr_mon";
+		status = "disabled";
+	};
diff --git a/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
new file mode 100644
index 0000000..7a9e860
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/rk3399_dmc.txt
@@ -0,0 +1,209 @@
+* Rockchip rk3399 DMC(Dynamic Memory Controller) device
+
+Required properties:
+- compatible:		 Must be "rockchip,rk3399-dmc".
+- devfreq-events:	 Node to get DDR loading, Refer to
+			 Documentation/devicetree/bindings/devfreq/
+			 rockchip-dfi.txt
+- interrupts:		 The interrupt number to the CPU. The interrupt
+			 specifier format depends on the interrupt controller.
+			 It should be DCF interrupts, when DDR dvfs finish,
+			 it will happen.
+- clocks:		 Phandles for clock specified in "clock-names" property
+- clock-names :		 The name of clock used by the DFI, must be
+			 "pclk_ddr_mon";
+- operating-points-v2:	 Refer to Documentation/devicetree/bindings/power/opp.txt
+			 for details.
+- center-supply:	 DMC supply node.
+- status:		 Marks the node enabled/disabled.
+
+Following properties are ddr timing:
+
+- rockchip,dram_speed_bin :	  Value reference include/dt-bindings/clock/ddr.h,
+				  it select ddr3 cl-trp-trcd type, default value
+				  "DDR3_DEFAULT".it must selected according to
+				  "Speed Bin" in ddr3 datasheet, DO NOT use
+				  smaller "Speed Bin" than ddr3 exactly is.
+
+- rockchip,pd_idle :		  Config the PD_IDLE value, defined the power-down
+				  idle period, memories are places into power-down
+				  mode if bus is idle for PD_IDLE DFI clocks.
+
+- rockchip,sr_idle :		  Configure the SR_IDLE value, defined the
+				  selfrefresh idle period, memories are places
+				  into self-refresh mode if bus is idle for
+				  SR_IDLE*1024 DFI clocks (DFI clocks freq is
+				  half of dram's clocks), defaule value is "0".
+
+- rockchip,sr_mc_gate_idle :	  Defined the self-refresh with memory and
+				  controller clock gating idle period, memories
+				  are places into self-refresh mode and memory
+				  controller clock arg gating if bus is idle for
+				  sr_mc_gate_idle*1024 DFI clocks.
+
+- rockchip,srpd_lite_idle :	  Defined the self-refresh power down idle
+				  period, memories are places into self-refresh
+				  power down mode if bus is idle for
+				  srpd_lite_idle*1024 DFI clocks. This parameter
+				  is for LPDDR4 only.
+
+- rockchip,standby_idle :	  Defined the standby idle period, memories are
+				  places into self-refresh than controller, pi,
+				  phy and dram clock will gating if bus is idle
+				  for standby_idle * DFI clocks.
+
+- rockchip,dram_dll_disb_freq :  It's defined the DDR3 dll bypass frequency in
+				  MHz, when ddr freq less than DRAM_DLL_DISB_FREQ,
+				  ddr3 dll will bypssed note: if dll was bypassed,
+				  the odt also stop working.
+
+- rockchip,phy_dll_disb_freq :	  Defined the PHY dll bypass frequency in
+				  MHz (Mega Hz), when ddr freq less than
+				  DRAM_DLL_DISB_FREQ, phy dll will bypssed.
+				  note: phy dll and phy odt are independent.
+
+- rockchip,ddr3_odt_disb_freq :  When dram type is DDR3, this parameter defined
+				  the odt disable frequency in MHz (Mega Hz),
+				  when ddr frequency less then ddr3_odt_disb_freq,
+				  the odt on dram side and controller side are
+				  both disabled.
+
+- rockchip,ddr3_drv :		  When dram type is DDR3, this parameter define
+				  the dram side driver stength in ohm, default
+				  value is DDR3_DS_40ohm.
+
+- rockchip,ddr3_odt :		  When dram type is DDR3, this parameter define
+				  the dram side ODT stength in ohm, default value
+				  is DDR3_ODT_120ohm.
+
+- rockchip,phy_ddr3_ca_drv :	  When dram type is DDR3, this parameter define
+				  the phy side CA line(incluing command line,
+				  address line and clock line) driver strength.
+				  Default value is PHY_DRV_ODT_40.
+
+- rockchip,phy_ddr3_dq_drv :	  When dram type is DDR3, this parameter define
+				  the phy side DQ line(incluing DQS/DQ/DM line)
+				  driver strength. default value is PHY_DRV_ODT_40.
+
+- rockchip,phy_ddr3_odt : 	  When dram type is DDR3, this parameter define the
+				  phy side odt strength, default value is
+				  PHY_DRV_ODT_240.
+
+- rockchip,lpddr3_odt_disb_freq : When dram type is LPDDR3, this parameter defined
+				  then odt disable frequency in MHz (Mega Hz),
+				  when ddr frequency less then ddr3_odt_disb_freq,
+				  the odt on dram side and controller side are
+				  both disabled.
+
+- rockchip,lpddr3_drv :	  When dram type is LPDDR3, this parameter define
+				  the dram side driver stength in ohm, default
+				  value is LP3_DS_34ohm.
+
+- rockchip,lpddr3_odt :	  When dram type is LPDDR3, this parameter define
+				  the dram side ODT stength in ohm, default value
+				  is LP3_ODT_240ohm.
+
+- rockchip,phy_lpddr3_ca_drv :	  When dram type is LPDDR3, this parameter define
+				  the phy side CA line(incluing command line,
+				  address line and clock line) driver strength.
+				  default value is PHY_DRV_ODT_40.
+
+- rockchip,phy_lpddr3_dq_drv :	  When dram type is LPDDR3, this parameter define
+				  the phy side DQ line(incluing DQS/DQ/DM line)
+				  driver strength. default value is
+				  PHY_DRV_ODT_40.
+
+- rockchip,phy_lpddr3_odt : 	  When dram type is LPDDR3, this parameter define
+				  the phy side odt strength, default value is
+				  PHY_DRV_ODT_240.
+
+- rockchip,lpddr4_odt_disb_freq : When dram type is LPDDR4, this parameter
+				  defined the odt disable frequency in
+				  MHz (Mega Hz), when ddr frequency less then
+				  ddr3_odt_disb_freq, the odt on dram side and
+				  controller side are both disabled.
+
+- rockchip,lpddr4_drv :	  When dram type is LPDDR4, this parameter define
+				  the dram side driver stength in ohm, default
+				  value is LP4_PDDS_60ohm.
+
+- rockchip,lpddr4_dq_odt : 	  When dram type is LPDDR4, this parameter define
+				  the dram side ODT on dqs/dq line stength in ohm,
+				  default value is LP4_DQ_ODT_40ohm.
+
+- rockchip,lpddr4_ca_odt :	  When dram type is LPDDR4, this parameter define
+				  the dram side ODT on ca line stength in ohm,
+				  default value is LP4_CA_ODT_40ohm.
+
+- rockchip,phy_lpddr4_ca_drv :	  When dram type is LPDDR4, this parameter define
+				  the phy side  CA line(incluing command address
+				  line) driver strength. default value is
+				  PHY_DRV_ODT_40.
+
+- rockchip,phy_lpddr4_ck_cs_drv : When dram type is LPDDR4, this parameter define
+				  the phy side clock line and cs line driver
+				  strength. default value is PHY_DRV_ODT_80.
+
+- rockchip,phy_lpddr4_dq_drv :	  When dram type is LPDDR4, this parameter define
+				  the phy side DQ line(incluing DQS/DQ/DM line)
+				  driver strength. default value is PHY_DRV_ODT_80.
+
+- rockchip,phy_lpddr4_odt :	  When dram type is LPDDR4, this parameter define
+				  the phy side odt strength, default value is
+				  PHY_DRV_ODT_60.
+
+Example:
+	dmc_opp_table: dmc_opp_table {
+		compatible = "operating-points-v2";
+
+		opp00 {
+			opp-hz = /bits/ 64 <300000000>;
+			opp-microvolt = <900000>;
+		};
+		opp01 {
+			opp-hz = /bits/ 64 <666000000>;
+			opp-microvolt = <900000>;
+		};
+	};
+
+	dmc: dmc {
+		compatible = "rockchip,rk3399-dmc";
+		devfreq-events = <&dfi>;
+		interrupts = <GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&cru SCLK_DDRCLK>;
+		clock-names = "dmc_clk";
+		operating-points-v2 = <&dmc_opp_table>;
+		center-supply = <&ppvar_centerlogic>;
+		upthreshold = <15>;
+		downdifferential = <10>;
+		rockchip,ddr3_speed_bin = <21>;
+		rockchip,pd_idle = <0x40>;
+		rockchip,sr_idle = <0x2>;
+		rockchip,sr_mc_gate_idle = <0x3>;
+		rockchip,srpd_lite_idle	= <0x4>;
+		rockchip,standby_idle = <0x2000>;
+		rockchip,dram_dll_dis_freq = <300>;
+		rockchip,phy_dll_dis_freq = <125>;
+		rockchip,auto_pd_dis_freq = <666>;
+		rockchip,ddr3_odt_dis_freq = <333>;
+		rockchip,ddr3_drv = <DDR3_DS_40ohm>;
+		rockchip,ddr3_odt = <DDR3_ODT_120ohm>;
+		rockchip,phy_ddr3_ca_drv = <PHY_DRV_ODT_40>;
+		rockchip,phy_ddr3_dq_drv = <PHY_DRV_ODT_40>;
+		rockchip,phy_ddr3_odt = <PHY_DRV_ODT_240>;
+		rockchip,lpddr3_odt_dis_freq = <333>;
+		rockchip,lpddr3_drv = <LP3_DS_34ohm>;
+		rockchip,lpddr3_odt = <LP3_ODT_240ohm>;
+		rockchip,phy_lpddr3_ca_drv = <PHY_DRV_ODT_40>;
+		rockchip,phy_lpddr3_dq_drv = <PHY_DRV_ODT_40>;
+		rockchip,phy_lpddr3_odt = <PHY_DRV_ODT_240>;
+		rockchip,lpddr4_odt_dis_freq = <333>;
+		rockchip,lpddr4_drv = <LP4_PDDS_60ohm>;
+		rockchip,lpddr4_dq_odt = <LP4_DQ_ODT_40ohm>;
+		rockchip,lpddr4_ca_odt = <LP4_CA_ODT_40ohm>;
+		rockchip,phy_lpddr4_ca_drv = <PHY_DRV_ODT_40>;
+		rockchip,phy_lpddr4_ck_cs_drv = <PHY_DRV_ODT_80>;
+		rockchip,phy_lpddr4_dq_drv = <PHY_DRV_ODT_80>;
+		rockchip,phy_lpddr4_odt = <PHY_DRV_ODT_60>;
+		status = "disabled";
+	};
diff --git a/Documentation/devicetree/bindings/extcon/qcom,pm8941-misc.txt b/Documentation/devicetree/bindings/extcon/qcom,pm8941-misc.txt
new file mode 100644
index 0000000..35383adb
--- /dev/null
+++ b/Documentation/devicetree/bindings/extcon/qcom,pm8941-misc.txt
@@ -0,0 +1,41 @@
+Qualcomm's PM8941 USB ID Extcon device
+
+Some Qualcomm PMICs have a "misc" module that can be used to detect when
+the USB ID pin has been pulled low or high.
+
+PROPERTIES
+
+- compatible:
+    Usage: required
+    Value type: <string>
+    Definition: Should contain "qcom,pm8941-misc";
+
+- reg:
+    Usage: required
+    Value type: <u32>
+    Definition: Should contain the offset to the misc address space
+
+- interrupts:
+    Usage: required
+    Value type: <prop-encoded-array>
+    Definition: Should contain the usb id interrupt
+
+- interrupt-names:
+    Usage: required
+    Value type: <stringlist>
+    Definition: Should contain the string "usb_id" for the usb id interrupt
+
+Example:
+
+	pmic {
+		usb_id: misc@900 {
+			compatible = "qcom,pm8941-misc";
+			reg = <0x900>;
+			interrupts = <0x0 0x9 0 IRQ_TYPE_EDGE_BOTH>;
+			interrupt-names = "usb_id";
+		};
+	}
+
+	usb-controller {
+		extcon = <&usb_id>;
+	};
diff --git a/Documentation/devicetree/bindings/hwmon/ltc4151.txt b/Documentation/devicetree/bindings/hwmon/ltc4151.txt
new file mode 100644
index 0000000..d008a5e
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ltc4151.txt
@@ -0,0 +1,18 @@
+LTC4151 High Voltage I2C Current and Voltage Monitor
+
+Required properties:
+- compatible: Must be "lltc,ltc4151"
+- reg: I2C address
+
+Optional properties:
+- shunt-resistor-micro-ohms
+	Shunt resistor value in micro-Ohms
+	Defaults to <1000> if unset.
+
+Example:
+
+ltc4151@6e {
+	compatible = "lltc,ltc4151";
+	reg = <0x6e>;
+	shunt-resistor-micro-ohms = <1500>;
+};
diff --git a/Documentation/devicetree/bindings/hwmon/max6650.txt b/Documentation/devicetree/bindings/hwmon/max6650.txt
new file mode 100644
index 0000000..f6bd87d
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/max6650.txt
@@ -0,0 +1,28 @@
+Bindings for MAX6651 and MAX6650 I2C fan controllers
+
+Reference:
+[1]	https://datasheets.maximintegrated.com/en/ds/MAX6650-MAX6651.pdf
+
+Required properties:
+- compatible : One of "maxim,max6650" or "maxim,max6651"
+- reg        : I2C address, one of 0x1b, 0x1f, 0x4b, 0x48.
+
+Optional properties, default is to retain the chip's current setting:
+- maxim,fan-microvolt : The supply voltage of the fan, either 5000000 uV or
+			12000000 uV.
+- maxim,fan-prescale  : Pre-scaling value, as per datasheet [1]. Lower values
+			allow more fine-grained control of slower fans.
+			Valid: 1, 2, 4, 8, 16.
+- maxim,fan-target-rpm: Initial requested fan rotation speed. If specified, the
+			driver selects closed-loop mode and the requested speed.
+			This ensures the fan is already running before userspace
+			takes over.
+
+Example:
+	fan-max6650: max6650@1b {
+		reg = <0x1b>;
+		compatible = "maxim,max6650";
+		maxim,fan-microvolt = <12000000>;
+		maxim,fan-prescale = <4>;
+		maxim,fan-target-rpm = <1200>;
+	};
diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
index bf99e2f..205593f 100644
--- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
+++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
@@ -16,6 +16,11 @@
 - vref-supply: The regulator supply ADC reference voltage.
 - #io-channel-cells: Should be 1, see ../iio-bindings.txt
 
+Optional properties:
+- resets: Must contain an entry for each entry in reset-names if need support
+	  this option. See ../reset/reset.txt for details.
+- reset-names: Must include the name "saradc-apb".
+
 Example:
 	saradc: saradc@2006c000 {
 		compatible = "rockchip,saradc";
@@ -23,6 +28,8 @@
 		interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
 		clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
 		clock-names = "saradc", "apb_pclk";
+		resets = <&cru SRST_SARADC>;
+		reset-names = "saradc-apb";
 		#io-channel-cells = <1>;
 		vref-supply = <&vcc18>;
 	};
diff --git a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt
index 1112e0d..820fee4 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt
+++ b/Documentation/devicetree/bindings/input/touchscreen/silead_gsl1680.txt
@@ -13,6 +13,7 @@
 - touchscreen-size-y	  : See touchscreen.txt
 
 Optional properties:
+- firmware-name		  : File basename (string) for board specific firmware
 - touchscreen-inverted-x  : See touchscreen.txt
 - touchscreen-inverted-y  : See touchscreen.txt
 - touchscreen-swapped-x-y : See touchscreen.txt
diff --git a/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt b/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt
new file mode 100644
index 0000000..ee2ad36
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/jcore,aic.txt
@@ -0,0 +1,26 @@
+J-Core Advanced Interrupt Controller
+
+Required properties:
+
+- compatible: Should be "jcore,aic1" for the (obsolete) first-generation aic
+  with 8 interrupt lines with programmable priorities, or "jcore,aic2" for
+  the "aic2" core with 64 interrupts.
+
+- reg: Memory region(s) for configuration. For SMP, there should be one
+  region per cpu, indexed by the sequential, zero-based hardware cpu
+  number.
+
+- interrupt-controller: Identifies the node as an interrupt controller
+
+- #interrupt-cells: Specifies the number of cells needed to encode an
+  interrupt source. The value shall be 1.
+
+
+Example:
+
+aic: interrupt-controller@200 {
+	compatible = "jcore,aic2";
+	reg = < 0x200 0x30 0x500 0x30 >;
+	interrupt-controller;
+	#interrupt-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt
new file mode 100644
index 0000000..86a7b4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-8k-pic.txt
@@ -0,0 +1,25 @@
+Marvell Armada 7K/8K PIC Interrupt controller
+---------------------------------------------
+
+This is the Device Tree binding for the PIC, a secondary interrupt
+controller available on the Marvell Armada 7K/8K ARM64 SoCs, and
+typically connected to the GIC as the primary interrupt controller.
+
+Required properties:
+- compatible: should be "marvell,armada-8k-pic"
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: the number of cells to define interrupts on this
+  controller. Should be 1
+- reg: the register area for the PIC interrupt controller
+- interrupts: the interrupt to the primary interrupt controller,
+  typically the GIC
+
+Example:
+
+	pic: interrupt-controller@3f0100 {
+		compatible = "marvell,armada-8k-pic";
+		reg = <0x3f0100 0x10>;
+		#interrupt-cells = <1>;
+		interrupt-controller;
+		interrupts = <GIC_PPI 15 IRQ_TYPE_LEVEL_HIGH>;
+	};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
index 8af0a8e..3f6442c 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,odmi-controller.txt
@@ -31,7 +31,7 @@
 Example:
 
 	odmi: odmi@300000 {
-		compatible = "marvell,ap806-odm-controller",
+		compatible = "marvell,ap806-odmi-controller",
 			     "marvell,odmi-controller";
 		interrupt-controller;
 		msi-controller;
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
new file mode 100644
index 0000000..6e7703d
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.txt
@@ -0,0 +1,20 @@
+STM32 External Interrupt Controller
+
+Required properties:
+
+- compatible: Should be "st,stm32-exti"
+- reg: Specifies base physical address and size of the registers
+- interrupt-controller: Indentifies the node as an interrupt controller
+- #interrupt-cells: Specifies the number of cells to encode an interrupt
+  specifier, shall be 2
+- interrupts: interrupts references to primary interrupt controller
+
+Example:
+
+exti: interrupt-controller@40013c00 {
+	compatible = "st,stm32-exti";
+	interrupt-controller;
+	#interrupt-cells = <2>;
+	reg = <0x40013C00 0x400>;
+	interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>;
+};
diff --git a/Documentation/devicetree/bindings/leds/common.txt b/Documentation/devicetree/bindings/leds/common.txt
index 93ef6e6..696be57 100644
--- a/Documentation/devicetree/bindings/leds/common.txt
+++ b/Documentation/devicetree/bindings/leds/common.txt
@@ -19,6 +19,13 @@
 	  a device, i.e. no other LED class device can be assigned the same
 	  label.
 
+- default-state : The initial state of the LED. Valid values are "on", "off",
+  and "keep". If the LED is already on or off and the default-state property is
+  set the to same value, then no glitch should be produced where the LED
+  momentarily turns off (or on). The "keep" setting will keep the LED at
+  whatever its current state is, without producing a glitch.  The default is
+  off if this property is not present.
+
 - linux,default-trigger :  This parameter, if present, is a
     string defining the trigger assigned to the LED.  Current triggers are:
      "backlight" - LED will act as a back-light, controlled by the framebuffer
diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
index 3f48c1e..ccebce5 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
+++ b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt
@@ -49,7 +49,7 @@
     - active-low : Boolean, makes LED active low.
       Default : false
     - default-state : see
-      Documentation/devicetree/bindings/leds/leds-gpio.txt
+      Documentation/devicetree/bindings/leds/common.txt
     - linux,default-trigger : see
       Documentation/devicetree/bindings/leds/common.txt
 
diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
index b22a55b..da5708e 100644
--- a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
+++ b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt
@@ -28,7 +28,7 @@
   - active-low : Boolean, makes LED active low.
     Default : false
   - default-state : see
-    Documentation/devicetree/bindings/leds/leds-gpio.txt
+    Documentation/devicetree/bindings/leds/common.txt
   - linux,default-trigger : see
     Documentation/devicetree/bindings/leds/common.txt
 
diff --git a/Documentation/devicetree/bindings/leds/leds-gpio.txt b/Documentation/devicetree/bindings/leds/leds-gpio.txt
index 5b1b43a..76535ca 100644
--- a/Documentation/devicetree/bindings/leds/leds-gpio.txt
+++ b/Documentation/devicetree/bindings/leds/leds-gpio.txt
@@ -14,13 +14,8 @@
   see Documentation/devicetree/bindings/leds/common.txt
 - linux,default-trigger :  (optional)
   see Documentation/devicetree/bindings/leds/common.txt
-- default-state:  (optional) The initial state of the LED.  Valid
-  values are "on", "off", and "keep".  If the LED is already on or off
-  and the default-state property is set the to same value, then no
-  glitch should be produced where the LED momentarily turns off (or
-  on).  The "keep" setting will keep the LED at whatever its current
-  state is, without producing a glitch.  The default is off if this
-  property is not present.
+- default-state:  (optional) The initial state of the LED.
+  see Documentation/devicetree/bindings/leds/common.txt
 - retain-state-suspended: (optional) The suspend state can be retained.Such
   as charge-led gpio.
 - panic-indicator : (optional)
diff --git a/Documentation/devicetree/bindings/leds/leds-is31fl319x.txt b/Documentation/devicetree/bindings/leds/leds-is31fl319x.txt
new file mode 100644
index 0000000..fc26034
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-is31fl319x.txt
@@ -0,0 +1,59 @@
+LEDs connected to is31fl319x LED controller chip
+
+Required properties:
+- compatible : Should be any of
+	"issi,is31fl3190"
+	"issi,is31fl3191"
+	"issi,is31fl3193"
+	"issi,is31fl3196"
+	"issi,is31fl3199"
+	"si-en,sn3199".
+- #address-cells: Must be 1.
+- #size-cells: Must be 0.
+- reg: 0x64, 0x65, 0x66, or 0x67.
+
+Optional properties:
+- audio-gain-db : audio gain selection for external analog modulation input.
+	Valid values: 0 - 21, step by 3 (rounded down)
+	Default: 0
+
+Each led is represented as a sub-node of the issi,is31fl319x device.
+There can be less leds subnodes than the chip can support but not more.
+
+Required led sub-node properties:
+- reg : number of LED line
+	Valid values: 1 - number of leds supported by the chip variant.
+
+Optional led sub-node properties:
+- label : see Documentation/devicetree/bindings/leds/common.txt.
+- linux,default-trigger :
+	see Documentation/devicetree/bindings/leds/common.txt.
+- led-max-microamp : (optional)
+	Valid values: 5000 - 40000, step by 5000 (rounded down)
+	Default: 20000 (20 mA)
+	Note: a driver will take the lowest of all led limits since the
+	chip has a single global setting. The lowest value will be chosen
+	due to the PWM specificity, where lower brightness is achieved
+	by reducing the dury-cycle of pulses and not the current, which
+	will always have its peak value equal to led-max-microamp.
+
+Examples:
+
+fancy_leds: leds@65 {
+	compatible = "issi,is31fl3196";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	reg = <0x65>;
+
+	red_aux: led@1 {
+		label = "red:aux";
+		reg = <1>;
+		led-max-microamp = <10000>;
+	};
+
+	green_power: led@5 {
+		label = "green:power";
+		reg = <5>;
+		linux,default-trigger = "default-on";
+	};
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-pm8058.txt b/Documentation/devicetree/bindings/leds/leds-pm8058.txt
new file mode 100644
index 0000000..89584c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-pm8058.txt
@@ -0,0 +1,67 @@
+Qualcomm PM8058 LED driver
+
+The Qualcomm PM8058 is a multi-functional device which contains
+an LED driver block for up to six LEDs: three normal LEDs, two
+"flash" LEDs and one "keypad backlight" LED. The names are
+quoted because sometimes these LED drivers are used for wildly
+different things than flash or keypad backlight: their names
+are more of a suggestion than a hard-wired usecase.
+
+Hardware-wise the different LEDs support slightly different
+output currents. The "flash" LEDs do not need to charge nor
+do they support external triggers. They are just powerful LED
+drivers.
+
+The LEDs appear as children to the PM8058 device, with the
+proper compatible string. For the PM8058 bindings see:
+mfd/qcom-pm8xxx.txt.
+
+Each LED is represented as a sub-node of the syscon device. Each
+node's name represents the name of the corresponding LED.
+
+LED sub-node properties:
+
+Required properties:
+- compatible: one of
+  "qcom,pm8058-led" (for the normal LEDs at 0x131, 0x132 and 0x133)
+  "qcom,pm8058-keypad-led" (for the "keypad" LED at 0x48)
+  "qcom,pm8058-flash-led" (for the "flash" LEDs at 0x49 and 0xFB)
+
+Optional properties:
+- label: see Documentation/devicetree/bindings/leds/common.txt
+- default-state: see Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger: see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+
+qcom,ssbi@500000 {
+	pmicintc: pmic@0 {
+		compatible = "qcom,pm8058";
+		led@48 {
+			compatible = "qcom,pm8058-keypad-led";
+			reg = <0x48>;
+			label = "pm8050:white:keypad";
+			default-state = "off";
+		};
+		led@131 {
+			compatible = "qcom,pm8058-led";
+			reg = <0x131>;
+			label = "pm8058:red";
+			default-state = "off";
+		};
+		led@132 {
+			compatible = "qcom,pm8058-led";
+			reg = <0x132>;
+			label = "pm8058:yellow";
+			default-state = "off";
+			linux,default-trigger = "mmc0";
+		};
+		led@133 {
+			compatible = "qcom,pm8058-led";
+			reg = <0x133>;
+			label = "pm8058:green";
+			default-state = "on";
+			linux,default-trigger = "heartbeat";
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/leds/register-bit-led.txt b/Documentation/devicetree/bindings/leds/register-bit-led.txt
index 379cefdc..59b5636 100644
--- a/Documentation/devicetree/bindings/leds/register-bit-led.txt
+++ b/Documentation/devicetree/bindings/leds/register-bit-led.txt
@@ -23,13 +23,8 @@
   see Documentation/devicetree/bindings/leds/common.txt
 - linux,default-trigger : (optional)
   see Documentation/devicetree/bindings/leds/common.txt
-- default-state: (optional) The initial state of the LED. Valid
-  values are "on", "off", and "keep". If the LED is already on or off
-  and the default-state property is set the to same value, then no
-  glitch should be produced where the LED momentarily turns off (or
-  on). The "keep" setting will keep the LED at whatever its current
-  state is, without producing a glitch.  The default is off if this
-  property is not present.
+- default-state: (optional) The initial state of the LED
+  see Documentation/devicetree/bindings/leds/common.txt
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mem-ctrlr.txt b/Documentation/devicetree/bindings/memory-controllers/fsl/ddr.txt
similarity index 85%
rename from Documentation/devicetree/bindings/powerpc/fsl/mem-ctrlr.txt
rename to Documentation/devicetree/bindings/memory-controllers/fsl/ddr.txt
index f87856f..dde6d83 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/mem-ctrlr.txt
+++ b/Documentation/devicetree/bindings/memory-controllers/fsl/ddr.txt
@@ -7,6 +7,8 @@
 		  "fsl,qoriq-memory-controller".
 - reg		: Address and size of DDR controller registers
 - interrupts	: Error interrupt of DDR controller
+- little-endian	: Specifies little-endian access to registers
+		  If omitted, big-endian will be used.
 
 Example 1:
 
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
index 88faa91..3cd4c43 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-st.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
@@ -10,7 +10,7 @@
 			subsystem (mmcss) inside the FlashSS (available in STiH407 SoC
 			family).
 
-- clock-names:		Should be "mmc".
+- clock-names:		Should be "mmc" and "icn".  (NB: The latter is not compulsory)
 			See: Documentation/devicetree/bindings/resource-names.txt
 - clocks:		Phandle to the clock.
 			See: Documentation/devicetree/bindings/clock/clock-bindings.txt
diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
index e41b2d5..f591ab7 100644
--- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
+++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
@@ -47,6 +47,9 @@
 	    Valid values are between 0 to 7, that maps to
 	    273, 589, 899, 1222, 1480, 1806, 2147, 2464 ps
 	    Default value is 2, which corresponds to 899 ps
+- rxlos-gpios: Input gpio from SFP+ module to indicate availability of
+	       incoming signal.
+
 
 Example:
 	menetclk: menetclk {
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
index 30d4875..fb40891 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
+++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
@@ -6,9 +6,13 @@
 - reg: addresses and length of the register sets for the device, must be 6
   pairs of register addresses and lengths
 - interrupts: interrupts for the devices, must be two interrupts
+- #address-cells: must be 1, see dsa/dsa.txt
+- #size-cells: must be 0, see dsa/dsa.txt
+
+Deprecated binding required properties:
+
 - dsa,mii-bus: phandle to the MDIO bus controller, see dsa/dsa.txt
 - dsa,ethernet: phandle to the CPU network interface controller, see dsa/dsa.txt
-- #size-cells: must be 0
 - #address-cells: must be 2, see dsa/dsa.txt
 
 Subnodes:
@@ -48,6 +52,45 @@
 	ethernet_switch@0 {
 		compatible = "brcm,bcm7445-switch-v4.0";
 		#size-cells = <0>;
+		#address-cells = <1>;
+		reg = <0x0 0x40000
+			0x40000 0x110
+			0x40340 0x30
+			0x40380 0x30
+			0x40400 0x34
+			0x40600 0x208>;
+		reg-names = "core", "reg", intrl2_0", "intrl2_1",
+			    "fcb, "acb";
+		interrupts = <0 0x18 0
+				0 0x19 0>;
+		brcm,num-gphy = <1>;
+		brcm,num-rgmii-ports = <2>;
+		brcm,fcb-pause-override;
+		brcm,acb-packets-inflight;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				label = "gphy";
+				reg = <0>;
+			};
+		};
+	};
+};
+
+Example using the old DSA DeviceTree binding:
+
+switch_top@f0b00000 {
+	compatible = "simple-bus";
+	#size-cells = <1>;
+	#address-cells = <1>;
+	ranges = <0 0xf0b00000 0x40804>;
+
+	ethernet_switch@0 {
+		compatible = "brcm,bcm7445-switch-v4.0";
+		#size-cells = <0>;
 		#address-cells = <2>;
 		reg = <0x0 0x40000
 			0x40000 0x110
diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
new file mode 100644
index 0000000..9c67ee4
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt
@@ -0,0 +1,89 @@
+* Qualcomm Atheros QCA8xxx switch family
+
+Required properties:
+
+- compatible: should be "qca,qca8337"
+- #size-cells: must be 0
+- #address-cells: must be 1
+
+Subnodes:
+
+The integrated switch subnode should be specified according to the binding
+described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of
+port and PHY id, each subnode describing a port needs to have a valid phandle
+referencing the internal PHY connected to it. The CPU port of this switch is
+always port 0.
+
+Example:
+
+
+	&mdio0 {
+		phy_port1: phy@0 {
+			reg = <0>;
+		};
+
+		phy_port2: phy@1 {
+			reg = <1>;
+		};
+
+		phy_port3: phy@2 {
+			reg = <2>;
+		};
+
+		phy_port4: phy@3 {
+			reg = <3>;
+		};
+
+		phy_port5: phy@4 {
+			reg = <4>;
+		};
+
+		switch0@0 {
+			compatible = "qca,qca8337";
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			reg = <0>;
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				port@0 {
+					reg = <0>;
+					label = "cpu";
+					ethernet = <&gmac1>;
+					phy-mode = "rgmii";
+				};
+
+				port@1 {
+					reg = <1>;
+					label = "lan1";
+					phy-handle = <&phy_port1>;
+				};
+
+				port@2 {
+					reg = <2>;
+					label = "lan2";
+					phy-handle = <&phy_port2>;
+				};
+
+				port@3 {
+					reg = <3>;
+					label = "lan3";
+					phy-handle = <&phy_port3>;
+				};
+
+				port@4 {
+					reg = <4>;
+					label = "lan4";
+					phy-handle = <&phy_port4>;
+				};
+
+				port@5 {
+					reg = <5>;
+					label = "wan";
+					phy-handle = <&phy_port5>;
+				};
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index 5d88f37..e1d7681 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -11,8 +11,8 @@
   the maximum frame size (there's contradiction in ePAPR).
 - phy-mode: string, operation mode of the PHY interface; supported values are
   "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id",
-  "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto
-  standard property;
+  "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii", "trgmii"; this is now a
+  de-facto standard property;
 - phy-connection-type: the same as "phy-mode" property but described in ePAPR;
 - phy-handle: phandle, specifies a reference to a node representing a PHY
   device; this property is described in ePAPR and so preferred;
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
index b5a42df..1506e94 100644
--- a/Documentation/devicetree/bindings/net/macb.txt
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -21,6 +21,7 @@
 - clock-names: Tuple listing input clock names.
 	Required elements: 'pclk', 'hclk'
 	Optional elements: 'tx_clk'
+	Optional elements: 'rx_clk' applies to cdns,zynqmp-gem
 - clocks: Phandles to input clocks.
 
 Optional properties for PHY child node:
diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt
index 32eaaca..f095257 100644
--- a/Documentation/devicetree/bindings/net/mediatek-net.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-net.txt
@@ -24,14 +24,17 @@
 Optional properties:
 - interrupt-parent: Should be the phandle for the interrupt controller
   that services interrupts for this device
-
+- mediatek,hwlro: the capability if the hardware supports LRO functions
 
 * Ethernet MAC node
 
 Required properties:
 - compatible: Should be "mediatek,eth-mac"
 - reg: The number of the MAC
-- phy-handle: see ethernet.txt file in the same directory.
+- phy-handle: see ethernet.txt file in the same directory and
+	the phy-mode "trgmii" required being provided when reg
+	is equal to 0 and the MAC uses fixed-link to connect
+	with internal switch such as MT7530.
 
 Example:
 
@@ -51,6 +54,7 @@
 	reset-names = "eth";
 	mediatek,ethsys = <&ethsys>;
 	mediatek,pctl = <&syscfg_pctl_a>;
+	mediatek,hwlro;
 	#address-cells = <1>;
 	#size-cells = <0>;
 
diff --git a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt
new file mode 100644
index 0000000..99c7eb0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt
@@ -0,0 +1,58 @@
+* Microsemi - vsc8531 Giga bit ethernet phy
+
+Required properties:
+- compatible	: Should contain phy id as "ethernet-phy-idAAAA.BBBB"
+		  The PHY device uses the binding described in
+		  Documentation/devicetree/bindings/net/phy.txt
+
+Optional properties:
+- vsc8531,vddmac	: The vddmac in mV.
+- vsc8531,edge-slowdown	: % the edge should be slowed down relative to
+			  the fastest possible edge time. Native sign
+			  need not enter.
+			  Edge rate sets the drive strength of the MAC
+			  interface output signals.  Changing the drive
+			  strength will affect the edge rate of the output
+			  signal.  The goal of this setting is to help
+			  reduce electrical emission (EMI) by being able
+			  to reprogram drive strength and in effect slow
+			  down the edge rate if desired.  Table 1 shows the
+			  impact to the edge rate per VDDMAC supply for each
+			  drive strength setting.
+			  Ref: Table:1 - Edge rate change below.
+
+Note: see dt-bindings/net/mscc-phy-vsc8531.h for applicable values
+
+Table: 1 - Edge rate change
+----------------------------------------------------------------|
+| 		Edge Rate Change (VDDMAC)			|
+|								|
+| 3300 mV	2500 mV		1800 mV		1500 mV		|
+|---------------------------------------------------------------|
+| Default	Deafult		Default		Default		|
+| (Fastest)			(recommended)	(recommended)	|
+|---------------------------------------------------------------|
+| -2%		-3%		-5%		-6%		|
+|---------------------------------------------------------------|
+| -4%		-6%		-9%		-14%		|
+|---------------------------------------------------------------|
+| -7%		-10%		-16%		-21%		|
+|(recommended)	(recommended)					|
+|---------------------------------------------------------------|
+| -10%		-14%		-23%		-29%		|
+|---------------------------------------------------------------|
+| -17%		-23%		-35%		-42%		|
+|---------------------------------------------------------------|
+| -29%		-37%		-52%		-58%		|
+|---------------------------------------------------------------|
+| -53%		-63%		-76%		-77%		|
+| (slowest)							|
+|---------------------------------------------------------------|
+
+Example:
+
+        vsc8531_0: ethernet-phy@0 {
+                compatible = "ethernet-phy-id0007.0570";
+                vsc8531,vddmac		= <3300>;
+                vsc8531,edge-slowdown	= <21>;
+        };
diff --git a/Documentation/devicetree/bindings/net/qcom-emac.txt b/Documentation/devicetree/bindings/net/qcom-emac.txt
new file mode 100644
index 0000000..346e6c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qcom-emac.txt
@@ -0,0 +1,111 @@
+Qualcomm Technologies EMAC Gigabit Ethernet Controller
+
+This network controller consists of two devices: a MAC and an SGMII
+internal PHY.  Each device is represented by a device tree node.  A phandle
+connects the MAC node to its corresponding internal phy node.  Another
+phandle points to the external PHY node.
+
+Required properties:
+
+MAC node:
+- compatible : Should be "qcom,fsm9900-emac".
+- reg : Offset and length of the register regions for the device
+- interrupts : Interrupt number used by this controller
+- mac-address : The 6-byte MAC address. If present, it is the default
+	MAC address.
+- internal-phy : phandle to the internal PHY node
+- phy-handle : phandle the the external PHY node
+
+Internal PHY node:
+- compatible : Should be "qcom,fsm9900-emac-sgmii" or "qcom,qdf2432-emac-sgmii".
+- reg : Offset and length of the register region(s) for the device
+- interrupts : Interrupt number used by this controller
+
+The external phy child node:
+- reg : The phy address
+
+Example:
+
+FSM9900:
+
+soc {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	emac0: ethernet@feb20000 {
+		compatible = "qcom,fsm9900-emac";
+		reg = <0xfeb20000 0x10000>,
+		      <0xfeb36000 0x1000>;
+		interrupts = <76>;
+
+		clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>,
+			<&gcc 6>, <&gcc 7>;
+		clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk",
+			"mdio_clk", "tx_clk", "rx_clk", "sys_clk";
+
+		internal-phy = <&emac_sgmii>;
+
+		phy-handle = <&phy0>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+		phy0: ethernet-phy@0 {
+			reg = <0>;
+		};
+
+		pinctrl-names = "default";
+		pinctrl-0 = <&mdio_pins_a>;
+	};
+
+	emac_sgmii: ethernet@feb38000 {
+		compatible = "qcom,fsm9900-emac-sgmii";
+		reg = <0xfeb38000 0x1000>;
+		interrupts = <80>;
+	};
+
+	tlmm: pinctrl@fd510000 {
+		compatible = "qcom,fsm9900-pinctrl";
+
+		mdio_pins_a: mdio {
+			state {
+				pins = "gpio123", "gpio124";
+				function = "mdio";
+			};
+		};
+	};
+
+
+QDF2432:
+
+soc {
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	emac0: ethernet@38800000 {
+		compatible = "qcom,fsm9900-emac";
+		reg = <0x0 0x38800000 0x0 0x10000>,
+		      <0x0 0x38816000 0x0 0x1000>;
+		interrupts = <0 256 4>;
+
+		clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>,
+			 <&gcc 6>, <&gcc 7>;
+		clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk",
+			"mdio_clk", "tx_clk", "rx_clk", "sys_clk";
+
+		internal-phy = <&emac_sgmii>;
+
+		phy-handle = <&phy0>;
+
+		#address-cells = <1>;
+		#size-cells = <0>;
+		phy0: ethernet-phy@4 {
+			reg = <4>;
+		};
+	};
+
+	emac_sgmii: ethernet@410400 {
+		compatible = "qcom,qdf2432-emac-sgmii";
+		reg = <0x0 0x00410400 0x0 0xc00>, /* Base address */
+		      <0x0 0x00410000 0x0 0x400>; /* Per-lane digital */
+		interrupts = <0 254 1>;
+	};
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
index cccd945..95383c5 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -3,8 +3,12 @@
 The device node has following properties.
 
 Required properties:
- - compatible: Can be one of "rockchip,rk3228-gmac", "rockchip,rk3288-gmac",
-                             "rockchip,rk3368-gmac"
+ - compatible: should be "rockchip,<name>-gamc"
+   "rockchip,rk3228-gmac": found on RK322x SoCs
+   "rockchip,rk3288-gmac": found on RK3288 SoCs
+   "rockchip,rk3366-gmac": found on RK3366 SoCs
+   "rockchip,rk3368-gmac": found on RK3368 SoCs
+   "rockchip,rk3399-gmac": found on RK3399 SoCs
  - reg: addresses and length of the register sets for the device.
  - interrupts: Should contain the GMAC interrupts.
  - interrupt-names: Should contain the interrupt names "macirq".
diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt
index 2f6ec85..0115c85 100644
--- a/Documentation/devicetree/bindings/net/sh_eth.txt
+++ b/Documentation/devicetree/bindings/net/sh_eth.txt
@@ -5,6 +5,8 @@
 
 Required properties:
 - compatible: "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC.
+	      "renesas,ether-r8a7743"  if the device is a part of R8A7743 SoC.
+	      "renesas,ether-r8a7745"  if the device is a part of R8A7745 SoC.
 	      "renesas,ether-r8a7778"  if the device is a part of R8A7778 SoC.
 	      "renesas,ether-r8a7779"  if the device is a part of R8A7779 SoC.
 	      "renesas,ether-r8a7790"  if the device is a part of R8A7790 SoC.
diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt
index 3fed3c1..16c3a950 100644
--- a/Documentation/devicetree/bindings/net/smsc911x.txt
+++ b/Documentation/devicetree/bindings/net/smsc911x.txt
@@ -3,9 +3,11 @@
 Required properties:
 - compatible : Should be "smsc,lan<model>", "smsc,lan9115"
 - reg : Address and length of the io space for SMSC LAN
-- interrupts : Should contain SMSC LAN interrupt line
-- interrupt-parent : Should be the phandle for the interrupt controller
-  that services interrupts for this device
+- interrupts : one or two interrupt specifiers
+  - The first interrupt is the SMSC LAN interrupt line
+  - The second interrupt (if present) is the PME (power
+    management event) interrupt that is able to wake up the host
+     system with a 50ms pulse on network activity
 - phy-mode : See ethernet.txt file in the same directory
 
 Optional properties:
@@ -21,6 +23,10 @@
   external PHY
 - smsc,save-mac-address : Indicates that mac address needs to be saved
   before resetting the controller
+- reset-gpios : a GPIO line connected to the RESET (active low) signal
+  of the device. On many systems this is wired high so the device goes
+  out of reset at power-on, but if it is under program control, this
+  optional GPIO can wake up in response to it.
 
 Examples:
 
@@ -29,7 +35,8 @@
 	reg = <0xf4000000 0x2000000>;
 	phy-mode = "mii";
 	interrupt-parent = <&gpio1>;
-	interrupts = <31>;
+	interrupts = <31>, <32>;
+	reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>;
 	reg-io-width = <4>;
 	smsc,irq-push-pull;
 };
diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.txt b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
new file mode 100644
index 0000000..c35afb7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.txt
@@ -0,0 +1,32 @@
+STMicroelectronics STM32 / MCU DWMAC glue layer controller
+
+This file documents platform glue layer for stmmac.
+Please see stmmac.txt for the other unchanged properties.
+
+The device node has following properties.
+
+Required properties:
+- compatible:  Should be "st,stm32-dwmac" to select glue, and
+	       "snps,dwmac-3.50a" to select IP version.
+- clocks: Must contain a phandle for each entry in clock-names.
+- clock-names: Should be "stmmaceth" for the host clock.
+	       Should be "mac-clk-tx" for the MAC TX clock.
+	       Should be "mac-clk-rx" for the MAC RX clock.
+- st,syscon : Should be phandle/offset pair. The phandle to the syscon node which
+	      encompases the glue register, and the offset of the control register.
+Example:
+
+	ethernet@40028000 {
+		compatible = "st,stm32-dwmac", "snps,dwmac-3.50a";
+		status = "disabled";
+		reg = <0x40028000 0x8000>;
+		reg-names = "stmmaceth";
+		interrupts = <0 61 0>, <0 62 0>;
+		interrupt-names = "macirq", "eth_wake_irq";
+		clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx";
+		clocks = <&rcc 0 25>, <&rcc 0 26>, <&rcc 0 27>;
+		st,syscon = <&syscfg 0x4>;
+		snps,pbl = <8>;
+		snps,mixed-burst;
+		dma-ranges;
+	};
diff --git a/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt
new file mode 100644
index 0000000..038dda4
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt
@@ -0,0 +1,35 @@
+XILINX GMIITORGMII Converter Driver Device Tree Bindings
+--------------------------------------------------------
+
+The Gigabit Media Independent Interface (GMII) to Reduced Gigabit Media
+Independent Interface (RGMII) core provides the RGMII between RGMII-compliant
+Ethernet physical media devices (PHY) and the Gigabit Ethernet controller.
+This core can be used in all three modes of operation(10/100/1000 Mb/s).
+The Management Data Input/Output (MDIO) interface is used to configure the
+Speed of operation. This core can switch dynamically between the three
+Different speed modes by configuring the conveter register through mdio write.
+
+This converter sits between the ethernet MAC and the external phy.
+MAC <==> GMII2RGMII <==> RGMII_PHY
+
+For more details about mdio please refer phy.txt file in the same directory.
+
+Required properties:
+- compatible	: Should be "xlnx,gmii-to-rgmii-1.0"
+- reg		: The ID number for the phy, usually a small integer
+- phy-handle	: Should point to the external phy device.
+		  See ethernet.txt file in the same directory.
+
+Example:
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		phy: ethernet-phy@0 {
+			......
+		};
+		gmiitorgmii: gmiitorgmii@8 {
+			compatible = "xlnx,gmii-to-rgmii-1.0";
+			reg = <8>;
+			phy-handle = <&phy>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt b/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
index 8f86ab3..94aeeea 100644
--- a/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
+++ b/Documentation/devicetree/bindings/nvmem/rockchip-efuse.txt
@@ -1,11 +1,20 @@
 = Rockchip eFuse device tree bindings =
 
 Required properties:
-- compatible: Should be "rockchip,rockchip-efuse"
+- compatible: Should be one of the following.
+  - "rockchip,rk3066a-efuse" - for RK3066a SoCs.
+  - "rockchip,rk3188-efuse" - for RK3188 SoCs.
+  - "rockchip,rk3288-efuse" - for RK3288 SoCs.
+  - "rockchip,rk3399-efuse" - for RK3399 SoCs.
 - reg: Should contain the registers location and exact eFuse size
 - clocks: Should be the clock id of eFuse
 - clock-names: Should be "pclk_efuse"
 
+Deprecated properties:
+- compatible: "rockchip,rockchip-efuse"
+  Old efuse compatible value compatible to rk3066a, rk3188 and rk3288
+  efuses
+
 = Data cells =
 Are child nodes of eFuse, bindings of which as described in
 bindings/nvmem/nvmem.txt
@@ -13,7 +22,7 @@
 Example:
 
 	efuse: efuse@ffb40000 {
-		compatible = "rockchip,rockchip-efuse";
+		compatible = "rockchip,rk3288-efuse";
 		reg = <0xffb40000 0x20>;
 		#address-cells = <1>;
 		#size-cells = <1>;
diff --git a/Documentation/devicetree/bindings/phy/bcm-ns-usb3-phy.txt b/Documentation/devicetree/bindings/phy/bcm-ns-usb3-phy.txt
new file mode 100644
index 0000000..09aeba9
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/bcm-ns-usb3-phy.txt
@@ -0,0 +1,23 @@
+Driver for Broadcom Northstar USB 3.0 PHY
+
+Required properties:
+
+- compatible: one of: "brcm,ns-ax-usb3-phy", "brcm,ns-bx-usb3-phy".
+- reg: register mappings for DMP (Device Management Plugin) and ChipCommon B
+       MMI.
+- reg-names: "dmp" and "ccb-mii"
+
+Initialization of USB 3.0 PHY depends on Northstar version. There are currently
+three known series: Ax, Bx and Cx.
+Known A0: BCM4707 rev 0
+Known B0: BCM4707 rev 4, BCM53573 rev 2
+Known B1: BCM4707 rev 6
+Known C0: BCM47094 rev 0
+
+Example:
+	usb3-phy {
+		compatible = "brcm,ns-ax-usb3-phy";
+		reg = <0x18105000 0x1000>, <0x18003000 0x1000>;
+		reg-names = "dmp", "ccb-mii";
+		#phy-cells = <0>;
+	};
diff --git a/Documentation/devicetree/bindings/phy/mxs-usb-phy.txt b/Documentation/devicetree/bindings/phy/mxs-usb-phy.txt
index 379b84a..1d25b04 100644
--- a/Documentation/devicetree/bindings/phy/mxs-usb-phy.txt
+++ b/Documentation/devicetree/bindings/phy/mxs-usb-phy.txt
@@ -12,6 +12,16 @@
 - interrupts: Should contain phy interrupt
 - fsl,anatop: phandle for anatop register, it is only for imx6 SoC series
 
+Optional properties:
+- fsl,tx-cal-45-dn-ohms: Integer [30-55]. Resistance (in ohms) of switchable
+  high-speed trimming resistor connected in parallel with the 45 ohm resistor
+  that terminates the DN output signal. Default: 45
+- fsl,tx-cal-45-dp-ohms: Integer [30-55]. Resistance (in ohms) of switchable
+  high-speed trimming resistor connected in parallel with the 45 ohm resistor
+  that terminates the DP output signal. Default: 45
+- fsl,tx-d-cal: Integer [79-119]. Current trimming value (as a percentage) of
+  the 17.78mA TX reference current. Default: 100
+
 Example:
 usbphy1: usbphy@020c9000 {
 	compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy";
diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt
new file mode 100644
index 0000000..3c29c77
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-rockchip-inno-usb2.txt
@@ -0,0 +1,64 @@
+ROCKCHIP USB2.0 PHY WITH INNO IP BLOCK
+
+Required properties (phy (parent) node):
+ - compatible : should be one of the listed compatibles:
+	* "rockchip,rk3366-usb2phy"
+	* "rockchip,rk3399-usb2phy"
+ - reg : the address offset of grf for usb-phy configuration.
+ - #clock-cells : should be 0.
+ - clock-output-names : specify the 480m output clock name.
+
+Optional properties:
+ - clocks : phandle + phy specifier pair, for the input clock of phy.
+ - clock-names : input clock name of phy, must be "phyclk".
+
+Required nodes : a sub-node is required for each port the phy provides.
+		 The sub-node name is used to identify host or otg port,
+		 and shall be the following entries:
+	* "otg-port" : the name of otg port.
+	* "host-port" : the name of host port.
+
+Required properties (port (child) node):
+ - #phy-cells : must be 0. See ./phy-bindings.txt for details.
+ - interrupts : specify an interrupt for each entry in interrupt-names.
+ - interrupt-names : a list which shall be the following entries:
+	* "otg-id" : for the otg id interrupt.
+	* "otg-bvalid" : for the otg vbus interrupt.
+	* "linestate" : for the host/otg linestate interrupt.
+
+Optional properties:
+ - phy-supply : phandle to a regulator that provides power to VBUS.
+		See ./phy-bindings.txt for details.
+
+Example:
+
+grf: syscon@ff770000 {
+	compatible = "rockchip,rk3366-grf", "syscon", "simple-mfd";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+...
+
+	u2phy: usb2-phy@700 {
+		compatible = "rockchip,rk3366-usb2phy";
+		reg = <0x700 0x2c>;
+		#clock-cells = <0>;
+		clock-output-names = "sclk_otgphy0_480m";
+
+		u2phy_otg: otg-port {
+			#phy-cells = <0>;
+			interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "otg-id", "otg-bvalid", "linestate";
+			status = "okay";
+		};
+
+		u2phy_host: host-port {
+			#phy-cells = <0>;
+			interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "linestate";
+			status = "okay";
+		};
+	};
+};
diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt b/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt
new file mode 100644
index 0000000..6ea867e
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-rockchip-typec.txt
@@ -0,0 +1,101 @@
+* ROCKCHIP type-c PHY
+---------------------
+
+Required properties:
+ - compatible : must be "rockchip,rk3399-typec-phy"
+ - reg: Address and length of the usb phy control register set
+ - rockchip,grf : phandle to the syscon managing the "general
+   register files"
+ - clocks : phandle + clock specifier for the phy clocks
+ - clock-names : string, clock name, must be "tcpdcore", "tcpdphy-ref";
+ - assigned-clocks: main clock, should be <&cru SCLK_UPHY0_TCPDCORE> or
+		    <&cru SCLK_UPHY1_TCPDCORE>;
+ - assigned-clock-rates : the phy core clk frequency, shall be: 50000000
+ - resets : a list of phandle + reset specifier pairs
+ - reset-names : string reset name, must be:
+		 "uphy", "uphy-pipe", "uphy-tcphy"
+ - extcon : extcon specifier for the Power Delivery
+
+Note, there are 2 type-c phys for RK3399, and they are almost identical, except
+these registers(description below), every register node contains 3 sections:
+offset, enable bit, write mask bit.
+ - rockchip,typec-conn-dir : the register of type-c connector direction,
+   for type-c phy0, it must be <0xe580 0 16>;
+   for type-c phy1, it must be <0xe58c 0 16>;
+ - rockchip,usb3tousb2-en : the register of type-c force usb3 to usb2 enable
+   control.
+   for type-c phy0, it must be <0xe580 3 19>;
+   for type-c phy1, it must be <0xe58c 3 19>;
+ - rockchip,external-psm : the register of type-c phy external psm clock
+   selection.
+   for type-c phy0, it must be <0xe588 14 30>;
+   for type-c phy1, it must be <0xe594 14 30>;
+ - rockchip,pipe-status : the register of type-c phy pipe status.
+   for type-c phy0, it must be <0xe5c0 0 0>;
+   for type-c phy1, it must be <0xe5c0 16 16>;
+
+Required nodes : a sub-node is required for each port the phy provides.
+		 The sub-node name is used to identify dp or usb3 port,
+		 and shall be the following entries:
+	* "dp-port" : the name of DP port.
+	* "usb3-port" : the name of USB3 port.
+
+Required properties (port (child) node):
+- #phy-cells : must be 0, See ./phy-bindings.txt for details.
+
+Example:
+	tcphy0: phy@ff7c0000 {
+		compatible = "rockchip,rk3399-typec-phy";
+		reg = <0x0 0xff7c0000 0x0 0x40000>;
+		rockchip,grf = <&grf>;
+		extcon = <&fusb0>;
+		clocks = <&cru SCLK_UPHY0_TCPDCORE>,
+			 <&cru SCLK_UPHY0_TCPDPHY_REF>;
+		clock-names = "tcpdcore", "tcpdphy-ref";
+		assigned-clocks = <&cru SCLK_UPHY0_TCPDCORE>;
+		assigned-clock-rates = <50000000>;
+		resets = <&cru SRST_UPHY0>,
+			 <&cru SRST_UPHY0_PIPE_L00>,
+			 <&cru SRST_P_UPHY0_TCPHY>;
+		reset-names = "uphy", "uphy-pipe", "uphy-tcphy";
+		rockchip,typec-conn-dir = <0xe580 0 16>;
+		rockchip,usb3tousb2-en = <0xe580 3 19>;
+		rockchip,external-psm = <0xe588 14 30>;
+		rockchip,pipe-status = <0xe5c0 0 0>;
+
+		tcphy0_dp: dp-port {
+			#phy-cells = <0>;
+		};
+
+		tcphy0_usb3: usb3-port {
+			#phy-cells = <0>;
+		};
+	};
+
+	tcphy1: phy@ff800000 {
+		compatible = "rockchip,rk3399-typec-phy";
+		reg = <0x0 0xff800000 0x0 0x40000>;
+		rockchip,grf = <&grf>;
+		extcon = <&fusb1>;
+		clocks = <&cru SCLK_UPHY1_TCPDCORE>,
+			 <&cru SCLK_UPHY1_TCPDPHY_REF>;
+		clock-names = "tcpdcore", "tcpdphy-ref";
+		assigned-clocks = <&cru SCLK_UPHY1_TCPDCORE>;
+		assigned-clock-rates = <50000000>;
+		resets = <&cru SRST_UPHY1>,
+			 <&cru SRST_UPHY1_PIPE_L00>,
+			 <&cru SRST_P_UPHY1_TCPHY>;
+		reset-names = "uphy", "uphy-pipe", "uphy-tcphy";
+		rockchip,typec-conn-dir = <0xe58c 0 16>;
+		rockchip,usb3tousb2-en = <0xe58c 3 19>;
+		rockchip,external-psm = <0xe594 14 30>;
+		rockchip,pipe-status = <0xe5c0 16 16>;
+
+		tcphy1_dp: dp-port {
+			#phy-cells = <0>;
+		};
+
+		tcphy1_usb3: usb3-port {
+			#phy-cells = <0>;
+		};
+	};
diff --git a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
index 2281d6c..ace9cce 100644
--- a/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
+++ b/Documentation/devicetree/bindings/phy/rcar-gen3-phy-usb2.txt
@@ -6,6 +6,8 @@
 Required properties:
 - compatible: "renesas,usb2-phy-r8a7795" if the device is a part of an R8A7795
 	      SoC.
+	      "renesas,usb2-phy-r8a7796" if the device is a part of an R8A7796
+	      SoC.
 	      "renesas,rcar-gen3-usb2-phy" for a generic R-Car Gen3 compatible device.
 
 	      When compatible with the generic version, nodes must list the
@@ -30,11 +32,11 @@
 		compatible = "renesas,usb2-phy-r8a7795", "renesas,rcar-gen3-usb2-phy";
 		reg = <0 0xee080200 0 0x700>;
 		interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&mstp7_clks R8A7795_CLK_EHCI0>;
+		clocks = <&cpg CPG_MOD 703>;
 	};
 
 	usb-phy@ee0a0200 {
 		compatible = "renesas,usb2-phy-r8a7795", "renesas,rcar-gen3-usb2-phy";
 		reg = <0 0xee0a0200 0 0x700>;
-		clocks = <&mstp7_clks R8A7795_CLK_EHCI0>;
+		clocks = <&cpg CPG_MOD 702>;
 	};
diff --git a/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
new file mode 100644
index 0000000..0f6222a
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/rockchip-pcie-phy.txt
@@ -0,0 +1,31 @@
+Rockchip PCIE PHY
+-----------------------
+
+Required properties:
+ - compatible: rockchip,rk3399-pcie-phy
+ - #phy-cells: must be 0
+ - clocks: Must contain an entry in clock-names.
+	See ../clocks/clock-bindings.txt for details.
+ - clock-names: Must be "refclk"
+ - resets: Must contain an entry in reset-names.
+	See ../reset/reset.txt for details.
+ - reset-names: Must be "phy"
+
+Example:
+
+grf: syscon@ff770000 {
+	compatible = "rockchip,rk3399-grf", "syscon", "simple-mfd";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	...
+
+	pcie_phy: pcie-phy {
+		compatible = "rockchip,rk3399-pcie-phy";
+		#phy-cells = <0>;
+		clocks = <&cru SCLK_PCIEPHY_REF>;
+		clock-names = "refclk";
+		resets = <&cru SRST_PCIEPHY>;
+		reset-names = "phy";
+	};
+};
diff --git a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt
index cc6be96..57dc388 100644
--- a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt
+++ b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt
@@ -27,6 +27,9 @@
 - clocks : phandle + clock specifier for the phy clocks
 - clock-names: string, clock name, must be "phyclk"
 - #clock-cells: for users of the phy-pll, should be 0
+- reset-names: Only allow the following entries:
+ - phy-reset
+- resets: Must contain an entry for each entry in reset-names.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
index 95736d7..287150d 100644
--- a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
+++ b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
@@ -10,6 +10,7 @@
   * allwinner,sun8i-a23-usb-phy
   * allwinner,sun8i-a33-usb-phy
   * allwinner,sun8i-h3-usb-phy
+  * allwinner,sun50i-a64-usb-phy
 - reg : a list of offset + length pairs
 - reg-names :
   * "phy_ctrl"
diff --git a/Documentation/devicetree/bindings/phy/ti-phy.txt b/Documentation/devicetree/bindings/phy/ti-phy.txt
index a3b3945..cd13e615 100644
--- a/Documentation/devicetree/bindings/phy/ti-phy.txt
+++ b/Documentation/devicetree/bindings/phy/ti-phy.txt
@@ -31,6 +31,8 @@
 
 Required properties:
  - compatible: Should be "ti,omap-usb2"
+	       Should be "ti,dra7x-usb2" for the 1st instance of USB2 PHY on
+	       DRA7x
 	       Should be "ti,dra7x-usb2-phy2" for the 2nd instance of USB2 PHY
 	       in DRA7x
  - reg : Address and length of the register set for the device.
diff --git a/Documentation/devicetree/bindings/regulator/ltc3676.txt b/Documentation/devicetree/bindings/regulator/ltc3676.txt
new file mode 100644
index 0000000..d4eb366
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/ltc3676.txt
@@ -0,0 +1,94 @@
+Linear Technology LTC3676 8-output regulators
+
+Required properties:
+- compatible: "lltc,ltc3676"
+- reg: I2C slave address
+
+Required child node:
+- regulators: Contains eight regulator child nodes sw1, sw2, sw3, sw4,
+  ldo1, ldo2, ldo3, and ldo4, specifying the initialization data as
+  documented in Documentation/devicetree/bindings/regulator/regulator.txt.
+
+Each regulator is defined using the standard binding for regulators. The
+nodes for sw1, sw2, sw3, sw4, ldo1, ldo2 and ldo4 additionally need to specify
+the resistor values of their external feedback voltage dividers:
+
+Required properties (not on ldo3):
+- lltc,fb-voltage-divider: An array of two integers containing the resistor
+  values R1 and R2 of the feedback voltage divider in ohms.
+
+Regulators sw1, sw2, sw3, sw4 can regulate the feedback reference from:
+412.5mV to 800mV in 12.5 mV steps. The output voltage thus ranges between
+0.4125 * (1 + R1/R2) V and 0.8 * (1 + R1/R2) V.
+
+Regulators ldo1, ldo2, and ldo4 have a fixed 0.725 V reference and thus output
+0.725 * (1 + R1/R2) V. The ldo3 regulator is fixed to 1.8 V.  The ldo1 standby
+regulator can not be disabled and thus should have the regulator-always-on
+property set.
+
+Example:
+
+	ltc3676: pmic@3c {
+		compatible = "lltc,ltc3676";
+		reg = <0x3c>;
+
+		regulators {
+			sw1_reg: sw1 {
+				regulator-min-microvolt = <674400>;
+				regulator-max-microvolt = <1308000>;
+				lltc,fb-voltage-divider = <127000 200000>;
+				regulator-ramp-delay = <7000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			sw2_reg: sw2 {
+				regulator-min-microvolt = <1033310>;
+				regulator-max-microvolt = <200400>;
+				lltc,fb-voltage-divider = <301000 200000>;
+				regulator-ramp-delay = <7000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			sw3_reg: sw3 {
+				regulator-min-microvolt = <674400>;
+				regulator-max-microvolt = <130800>;
+				lltc,fb-voltage-divider = <127000 200000>;
+				regulator-ramp-delay = <7000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			sw4_reg: sw4 {
+				regulator-min-microvolt = <868310>;
+				regulator-max-microvolt = <168400>;
+				lltc,fb-voltage-divider = <221000 200000>;
+				regulator-ramp-delay = <7000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			ldo2_reg: ldo2 {
+				regulator-min-microvolt = <2490375>;
+				regulator-max-microvolt = <2490375>;
+				lltc,fb-voltage-divider = <487000 200000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+
+			ldo3_reg: ldo3 {
+				regulator-min-microvolt = <1800000>;
+				regulator-max-microvolt = <1800000>;
+				regulator-boot-on;
+			};
+
+			ldo4_reg: ldo4 {
+				regulator-min-microvolt = <3023250>;
+				regulator-max-microvolt = <3023250>;
+				lltc,fb-voltage-divider = <634000 200000>;
+				regulator-boot-on;
+				regulator-always-on;
+			};
+		};
+	};
diff --git a/Documentation/devicetree/bindings/regulator/pv88080.txt b/Documentation/devicetree/bindings/regulator/pv88080.txt
index 38a6142..e6e4b9c 100644
--- a/Documentation/devicetree/bindings/regulator/pv88080.txt
+++ b/Documentation/devicetree/bindings/regulator/pv88080.txt
@@ -1,22 +1,28 @@
 * Powerventure Semiconductor PV88080 Voltage Regulator
 
 Required properties:
-- compatible: "pvs,pv88080".
-- reg: I2C slave address, usually 0x49.
+- compatible: Must be one of the following, depending on the
+  silicon version:
+	- "pvs,pv88080" (DEPRECATED)
+
+	- "pvs,pv88080-aa" for PV88080 AA or AB silicon
+	- "pvs,pv88080-ba" for PV88080 BA or BB silicon
+  NOTE: The use of the compatibles with no silicon version is deprecated.
+- reg: I2C slave address, usually 0x49
 - interrupts: the interrupt outputs of the controller
 - regulators: A node that houses a sub-node for each regulator within the
   device. Each sub-node is identified using the node's name, with valid
   values listed below. The content of each sub-node is defined by the
   standard binding for regulators; see regulator.txt.
-  BUCK1, BUCK2, and BUCK3.
+  BUCK1, BUCK2, BUCK3 and HVBUCK.
 
 Optional properties:
 - Any optional property defined in regulator.txt
 
-Example
+Example:
 
 	pmic: pv88080@49 {
-		compatible = "pvs,pv88080";
+		compatible = "pvs,pv88080-ba";
 		reg = <0x49>;
 		interrupt-parent = <&gpio>;
 		interrupts = <24 24>;
@@ -45,5 +51,12 @@
 				regulator-min-microamp 	= <1496000>;
 				regulator-max-microamp 	= <4189000>;
 			};
+
+			HVBUCK {
+				regulator-name = "hvbuck";
+				regulator-min-microvolt = <   5000>;
+				regulator-max-microvolt = <1275000>;
+ 			};
 		};
 	};
+
diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
index ecfc593..6ab5aef 100644
--- a/Documentation/devicetree/bindings/regulator/regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -13,7 +13,7 @@
 - regulator-allow-bypass: allow the regulator to go into bypass mode
 - regulator-allow-set-load: allow the regulator performance level to be configured
 - <name>-supply: phandle to the parent supply/regulator node
-- regulator-ramp-delay: ramp delay for regulator(in uV/uS)
+- regulator-ramp-delay: ramp delay for regulator(in uV/us)
   For hardware which supports disabling ramp rate, it should be explicitly
   initialised to zero (regulator-ramp-delay = <0>) for disabling ramp delay.
 - regulator-enable-ramp-delay: The time taken, in microseconds, for the supply
diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt
index f5561ac..f86bb06 100644
--- a/Documentation/devicetree/bindings/serial/8250.txt
+++ b/Documentation/devicetree/bindings/serial/8250.txt
@@ -42,9 +42,8 @@
 - auto-flow-control: one way to enable automatic flow control support. The
   driver is allowed to detect support for the capability even without this
   property.
-- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD
-  line respectively. It will use specified GPIO instead of the peripheral
-  function pin for the UART feature. If unsure, don't specify this property.
+- tx-threshold: Specify the TX FIFO low water indication for parts with
+  programmable TX FIFO thresholds.
 
 Note:
 * fsl,ns16550:
@@ -66,19 +65,3 @@
 		interrupts = <10>;
 		reg-shift = <2>;
 	};
-
-Example for OMAP UART using GPIO-based modem control signals:
-
-	uart4: serial@49042000 {
-		compatible = "ti,omap3-uart";
-		reg = <0x49042000 0x400>;
-		interrupts = <80>;
-		ti,hwmods = "uart4";
-		clock-frequency = <48000000>;
-		cts-gpios = <&gpio3 5 GPIO_ACTIVE_LOW>;
-		rts-gpios = <&gpio3 6 GPIO_ACTIVE_LOW>;
-		dtr-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
-		dsr-gpios = <&gpio1 13 GPIO_ACTIVE_LOW>;
-		dcd-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>;
-		rng-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
-	};
diff --git a/Documentation/devicetree/bindings/serial/st,stm32-usart.txt b/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
new file mode 100644
index 0000000..85ec5f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/st,stm32-usart.txt
@@ -0,0 +1,46 @@
+* STMicroelectronics STM32 USART
+
+Required properties:
+- compatible: Can be either "st,stm32-usart", "st,stm32-uart",
+"st,stm32f7-usart" or "st,stm32f7-uart" depending on whether
+the device supports synchronous mode and is compatible with
+stm32(f4) or stm32f7.
+- reg: The address and length of the peripheral registers space
+- interrupts: The interrupt line of the USART instance
+- clocks: The input clock of the USART instance
+
+Optional properties:
+- pinctrl: The reference on the pins configuration
+- st,hw-flow-ctrl: bool flag to enable hardware flow control.
+- dmas: phandle(s) to DMA controller node(s). Refer to stm32-dma.txt
+- dma-names: "rx" and/or "tx"
+
+Examples:
+usart4: serial@40004c00 {
+	compatible = "st,stm32-uart";
+	reg = <0x40004c00 0x400>;
+	interrupts = <52>;
+	clocks = <&clk_pclk1>;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usart4>;
+};
+
+usart2: serial@40004400 {
+	compatible = "st,stm32-usart", "st,stm32-uart";
+	reg = <0x40004400 0x400>;
+	interrupts = <38>;
+	clocks = <&clk_pclk1>;
+	st,hw-flow-ctrl;
+	pinctrl-names = "default";
+	pinctrl-0 = <&pinctrl_usart2 &pinctrl_usart2_rtscts>;
+};
+
+usart1: serial@40011000 {
+	compatible = "st,stm32-usart", "st,stm32-uart";
+	reg = <0x40011000 0x400>;
+	interrupts = <37>;
+	clocks = <&rcc 0 164>;
+	dmas = <&dma2 2 4 0x414 0x0>,
+	       <&dma2 7 4 0x414 0x0>;
+	dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/sound/omap-mcpdm.txt b/Documentation/devicetree/bindings/sound/omap-mcpdm.txt
index 6f6c2f8..0741dff 100644
--- a/Documentation/devicetree/bindings/sound/omap-mcpdm.txt
+++ b/Documentation/devicetree/bindings/sound/omap-mcpdm.txt
@@ -8,8 +8,6 @@
 - interrupts: Interrupt number for McPDM
 - interrupt-parent: The parent interrupt controller
 - ti,hwmods: Name of the hwmod associated to the McPDM
-- clocks:  phandle for the pdmclk provider, likely <&twl6040>
-- clock-names: Must be "pdmclk"
 
 Example:
 
@@ -21,11 +19,3 @@
 	interrupt-parent = <&gic>;
 	ti,hwmods = "mcpdm";
 };
-
-In board DTS file the pdmclk needs to be added:
-
-&mcpdm {
-	clocks = <&twl6040>;
-	clock-names = "pdmclk";
-	status = "okay";
-};
diff --git a/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt b/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
new file mode 100644
index 0000000..ad7ac80
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/brcm,spi-bcm-qspi.txt
@@ -0,0 +1,233 @@
+Broadcom SPI controller
+
+The Broadcom SPI controller is a SPI master found on various SOCs, including
+BRCMSTB (BCM7XXX), Cygnus, NSP and NS2. The Broadcom Master SPI hw IP consits
+of :
+ MSPI : SPI master controller can read and write to a SPI slave device
+ BSPI : Broadcom SPI in combination with the MSPI hw IP provides acceleration
+	for flash reads and be configured to do single, double, quad lane
+	io with 3-byte and 4-byte addressing support.
+
+ Supported Broadcom SoCs have one instance of MSPI+BSPI controller IP.
+ MSPI master can be used wihout BSPI. BRCMSTB SoCs have an additional instance
+ of a MSPI master without the BSPI to use with non flash slave devices that
+ use SPI protocol.
+
+Required properties:
+
+- #address-cells:
+    Must be <1>, as required by generic SPI binding.
+
+- #size-cells:
+    Must be <0>, also as required by generic SPI binding.
+
+- compatible:
+    Must be one of :
+    "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-qspi" : MSPI+BSPI on BRCMSTB SoCs
+    "brcm,spi-bcm-qspi", "brcm,spi-brcmstb-mspi" : Second Instance of MSPI
+						   BRCMSTB  SoCs
+    "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi"     : MSPI+BSPI on Cygnus, NSP
+    "brcm,spi-bcm-qspi", "brcm,spi-ns2-qspi"     : NS2 SoCs
+
+- reg:
+    Define the bases and ranges of the associated I/O address spaces.
+    The required range is MSPI controller registers.
+
+- reg-names:
+    First name does not matter, but must be reserved for the MSPI controller
+    register range as mentioned in 'reg' above, and will typically contain
+    - "bspi_regs": BSPI register range, not required with compatible
+		   "spi-brcmstb-mspi"
+    - "mspi_regs": MSPI register range is required for compatible strings
+    - "intr_regs", "intr_status_reg" : Interrupt and status register for
+      NSP, NS2, Cygnus SoC
+
+- interrupts
+    The interrupts used by the MSPI and/or BSPI controller.
+
+- interrupt-names:
+    Names of interrupts associated with MSPI
+    - "mspi_halted" :
+    - "mspi_done": Indicates that the requested SPI operation is complete.
+    - "spi_lr_fullness_reached" : Linear read BSPI pipe full
+    - "spi_lr_session_aborted"  : Linear read BSPI pipe aborted
+    - "spi_lr_impatient" : Linear read BSPI requested when pipe empty
+    - "spi_lr_session_done" : Linear read BSPI session done
+
+- clocks:
+    A phandle to the reference clock for this block.
+
+Optional properties:
+
+
+- native-endian
+    Defined when using BE SoC and device uses BE register read/write
+
+Recommended optional m25p80 properties:
+- spi-rx-bus-width: Definition as per
+                    Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Examples:
+
+BRCMSTB SoC Example:
+
+  SPI Master (MSPI+BSPI) for SPI-NOR access:
+
+    spi@f03e3400 {
+		#address-cells = <0x1>;
+		#size-cells = <0x0>;
+		compatible = "brcm,spi-brcmstb-qspi", "brcm,spi-brcmstb-qspi";
+		reg = <0xf03e0920 0x4 0xf03e3400 0x188 0xf03e3200 0x50>;
+		reg-names = "cs_reg", "mspi", "bspi";
+		interrupts = <0x6 0x5 0x4 0x3 0x2 0x1 0x0>;
+		interrupt-parent = <0x1c>;
+		interrupt-names = "mspi_halted",
+				  "mspi_done",
+				  "spi_lr_overread",
+				  "spi_lr_session_done",
+				  "spi_lr_impatient",
+				  "spi_lr_session_aborted",
+				  "spi_lr_fullness_reached";
+
+		clocks = <&hif_spi>;
+		clock-names = "sw_spi";
+
+		m25p80@0 {
+			#size-cells = <0x2>;
+			#address-cells = <0x2>;
+			compatible = "m25p80";
+			reg = <0x0>;
+			spi-max-frequency = <0x2625a00>;
+			spi-cpol;
+			spi-cpha;
+			m25p,fast-read;
+
+			flash0.bolt@0 {
+				reg = <0x0 0x0 0x0 0x100000>;
+			};
+
+			flash0.macadr@100000 {
+				reg = <0x0 0x100000 0x0 0x10000>;
+			};
+
+			flash0.nvram@110000 {
+				reg = <0x0 0x110000 0x0 0x10000>;
+			};
+
+			flash0.kernel@120000 {
+				reg = <0x0 0x120000 0x0 0x400000>;
+			};
+
+			flash0.devtree@520000 {
+				reg = <0x0 0x520000 0x0 0x10000>;
+			};
+
+			flash0.splash@530000 {
+				reg = <0x0 0x530000 0x0 0x80000>;
+			};
+
+			flash0@0 {
+				reg = <0x0 0x0 0x0 0x4000000>;
+			};
+		};
+	};
+
+
+    MSPI master for any SPI device :
+
+	spi@f0416000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&upg_fixed>;
+		compatible = "brcm,spi-brcmstb-qspi", "brcm,spi-brcmstb-mspi";
+		reg = <0xf0416000 0x180>;
+		reg-names = "mspi";
+		interrupts = <0x14>;
+		interrupt-parent = <&irq0_aon_intc>;
+		interrupt-names = "mspi_done";
+	};
+
+iProc SoC Example:
+
+    qspi: spi@18027200 {
+	compatible = "brcm,spi-bcm-qspi", "brcm,spi-nsp-qspi";
+	reg = <0x18027200 0x184>,
+	      <0x18027000 0x124>,
+	      <0x1811c408 0x004>,
+	      <0x180273a0 0x01c>;
+	reg-names = "mspi_regs", "bspi_regs", "intr_regs", "intr_status_reg";
+	interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 78 IRQ_TYPE_LEVEL_HIGH>;
+	interrupt-names =
+		     "spi_lr_fullness_reached",
+		     "spi_lr_session_aborted",
+		     "spi_lr_impatient",
+		     "spi_lr_session_done",
+		     "mspi_done",
+		     "mspi_halted";
+	clocks = <&iprocmed>;
+	clock-names = "iprocmed";
+	num-cs = <2>;
+	#address-cells = <1>;
+	#size-cells = <0>;
+    };
+
+
+ NS2 SoC Example:
+
+	       qspi: spi@66470200 {
+		       compatible = "brcm,spi-bcm-qspi", "brcm,spi-ns2-qspi";
+		       reg = <0x66470200 0x184>,
+			     <0x66470000 0x124>,
+			     <0x67017408 0x004>,
+			     <0x664703a0 0x01c>;
+		       reg-names = "mspi", "bspi", "intr_regs",
+			"intr_status_reg";
+		       interrupts = <GIC_SPI 419 IRQ_TYPE_LEVEL_HIGH>;
+		       interrupt-names = "spi_l1_intr";
+			clocks = <&iprocmed>;
+			clock-names = "iprocmed";
+			num-cs = <2>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+	       };
+
+
+ m25p80 node for NSP, NS2
+
+	 &qspi {
+		      flash: m25p80@0 {
+		      #address-cells = <1>;
+		      #size-cells = <1>;
+		      compatible = "m25p80";
+		      reg = <0x0>;
+		      spi-max-frequency = <12500000>;
+		      m25p,fast-read;
+		      spi-cpol;
+		      spi-cpha;
+
+		      partition@0 {
+				  label = "boot";
+				  reg = <0x00000000 0x000a0000>;
+		      };
+
+		      partition@a0000 {
+				  label = "env";
+				  reg = <0x000a0000 0x00060000>;
+		      };
+
+		      partition@100000 {
+				  label = "system";
+				  reg = <0x00100000 0x00600000>;
+		      };
+
+		      partition@700000 {
+				  label = "rootfs";
+				  reg = <0x00700000 0x01900000>;
+		      };
+	};
diff --git a/Documentation/devicetree/bindings/spi/jcore,spi.txt b/Documentation/devicetree/bindings/spi/jcore,spi.txt
new file mode 100644
index 0000000..93936d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/jcore,spi.txt
@@ -0,0 +1,34 @@
+J-Core SPI master
+
+Required properties:
+
+- compatible: Must be "jcore,spi2".
+
+- reg: Memory region for registers.
+
+- #address-cells: Must be 1.
+
+- #size-cells: Must be 0.
+
+Optional properties:
+
+- clocks: If a phandle named "ref_clk" is present, SPI clock speed
+  programming is relative to the frequency of the indicated clock.
+  Necessary only if the input clock rate is something other than a
+  fixed 50 MHz.
+
+- clock-names: Clock names, one for each phandle in clocks.
+
+See spi-bus.txt for additional properties not specific to this device.
+
+Example:
+
+spi@40 {
+	compatible = "jcore,spi2";
+	#address-cells = <1>;
+	#size-cells = <0>;
+	reg = <0x40 0x8>;
+	spi-max-frequency = <25000000>;
+	clocks = <&bus_clk>;
+	clock-names = "ref_clk";
+}
diff --git a/Documentation/devicetree/bindings/spi/spi-meson.txt b/Documentation/devicetree/bindings/spi/spi-meson.txt
index bb52a86..dc6d031 100644
--- a/Documentation/devicetree/bindings/spi/spi-meson.txt
+++ b/Documentation/devicetree/bindings/spi/spi-meson.txt
@@ -7,7 +7,7 @@
 receive buffer.
 
 Required properties:
- - compatible: should be "amlogic,meson6-spifc"
+ - compatible: should be "amlogic,meson6-spifc" or "amlogic,meson-gxbb-spifc"
  - reg: physical base address and length of the controller registers
  - clocks: phandle of the input clock for the baud rate generator
  - #address-cells: should be 1
diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
index 41b817f..88b6ea1 100644
--- a/Documentation/devicetree/bindings/thermal/thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -62,7 +62,7 @@
 Required properties:
 - #cooling-cells:	Used to provide cooling device specific information
   Type: unsigned	while referring to it. Must be at least 2, in order
-  Size: one cell      	to specify minimum and maximum cooling state used
+  Size: one cell	to specify minimum and maximum cooling state used
 			in the reference. The first cell is the minimum
 			cooling state requested and the second cell is
 			the maximum cooling state requested in the reference.
@@ -119,7 +119,7 @@
 Optional property:
 - contribution:		The cooling contribution to the thermal zone of the
   Type: unsigned	referred cooling device at the referred trip point.
-  Size: one cell      	The contribution is a ratio of the sum
+  Size: one cell	The contribution is a ratio of the sum
 			of all cooling contributions within a thermal zone.
 
 Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle
@@ -145,7 +145,7 @@
   Size: one cell
 
 - thermal-sensors:	A list of thermal sensor phandles and sensor specifier
-  Type: list of 	used while monitoring the thermal zone.
+  Type: list of		used while monitoring the thermal zone.
   phandles + sensor
   specifier
 
@@ -473,7 +473,7 @@
 				  <&adc>;	/* pcb north */
 
 		/* hotspot = 100 * bandgap - 120 * adc + 484 */
-		coefficients = 		<100	-120	484>;
+		coefficients =		<100	-120	484>;
 
 		trips {
 			...
@@ -502,7 +502,7 @@
         thermal-sensors =  <&adc>;
 
 		/* hotspot = 1 * adc + 6000 */
-	coefficients = 		<1	6000>;
+	coefficients =		<1	6000>;
 
 (d) - Board thermal
 
diff --git a/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt b/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt
index da2d510..e207c11 100644
--- a/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt
+++ b/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt
@@ -2,7 +2,9 @@
 
 Required properties:
 
-- compatible : Must be "moxa,moxart-timer"
+- compatible : Must be one of:
+ 	- "moxa,moxart-timer"
+ 	- "aspeed,ast2400-timer"
 - reg : Should contain registers location and length
 - interrupts : Should contain the timer interrupt number
 - clocks : Should contain phandle for the clock that drives the counter
diff --git a/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt b/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt
index 3ca89cd..d191612 100644
--- a/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt
+++ b/Documentation/devicetree/bindings/timer/oxsemi,rps-timer.txt
@@ -2,7 +2,7 @@
 ================================================
 
 Required properties:
-- compatible: Should be "oxsemi,ox810se-rps-timer"
+- compatible: Should be "oxsemi,ox810se-rps-timer" or "oxsemi,ox820-rps-timer"
 - reg : Specifies base physical address and size of the registers.
 - interrupts : The interrupts of the two timers
 - clocks : The phandle of the timer clock source
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
index 341dc67..0e03344 100644
--- a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
@@ -81,6 +81,8 @@
 - fsl,usbmisc: phandler of non-core register device, with one
   argument that indicate usb controller index
 - disable-over-current: disable over current detect
+- over-current-active-high: over current signal polarity is high active,
+  typically over current signal polarity is low active.
 - external-vbus-divider: enables off-chip resistor divider for Vbus
 
 Example:
diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
index 20a68bf..7d16ebf 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.txt
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt
@@ -26,7 +26,10 @@
 - g-use-dma: enable dma usage in gadget driver.
 - g-rx-fifo-size: size of rx fifo size in gadget mode.
 - g-np-tx-fifo-size: size of non-periodic tx fifo size in gadget mode.
-- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0) in gadget mode.
+
+Deprecated properties:
+- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0)
+  in gadget mode.
 
 Example:
 
diff --git a/Documentation/devicetree/bindings/usb/dwc3-cavium.txt b/Documentation/devicetree/bindings/usb/dwc3-cavium.txt
new file mode 100644
index 0000000..710b782
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/dwc3-cavium.txt
@@ -0,0 +1,28 @@
+Cavium SuperSpeed DWC3 USB SoC controller
+
+Required properties:
+- compatible:	Should contain "cavium,octeon-7130-usb-uctl"
+
+Required child node:
+A child node must exist to represent the core DWC3 IP block. The name of
+the node is not important. The content of the node is defined in dwc3.txt.
+
+Example device node:
+
+		    uctl@1180069000000 {
+			    compatible = "cavium,octeon-7130-usb-uctl";
+			    reg = <0x00011800 0x69000000 0x00000000 0x00000100>;
+			    ranges;
+			    #address-cells = <0x00000002>;
+			    #size-cells = <0x00000002>;
+			    refclk-frequency = <0x05f5e100>;
+			    refclk-type-ss = "dlmc_ref_clk0";
+			    refclk-type-hs = "dlmc_ref_clk0";
+			    power = <0x00000002 0x00000002 0x00000001>;
+			    xhci@1690000000000 {
+				    compatible = "cavium,octeon-7130-xhci", "synopsys,dwc3";
+				    reg = <0x00016900 0x00000000 0x00000010 0x00000000>;
+				    interrupt-parent = <0x00000010>;
+				    interrupts = <0x00000009 0x00000004>;
+			    };
+		    };
diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt
index 7d7ce08..e3e6983 100644
--- a/Documentation/devicetree/bindings/usb/dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -13,7 +13,8 @@
    in the array is expected to be a handle to the USB2/HS PHY and
    the second element is expected to be a handle to the USB3/SS PHY
  - phys: from the *Generic PHY* bindings
- - phy-names: from the *Generic PHY* bindings
+ - phy-names: from the *Generic PHY* bindings; supported names are "usb2-phy"
+	or "usb3-phy".
  - snps,usb3_lpm_capable: determines if platform is USB3 LPM capable
  - snps,disable_scramble_quirk: true when SW should disable data scrambling.
 	Only really useful for FPGA builds.
@@ -39,6 +40,11 @@
 			disabling the suspend signal to the PHY.
  - snps,dis_rxdet_inp3_quirk: when set core will disable receiver detection
 			in PHY P3 power state.
+ - snps,dis-u2-freeclk-exists-quirk: when set, clear the u2_freeclk_exists
+			in GUSB2PHYCFG, specify that USB2 PHY doesn't provide
+			a free-running PHY clock.
+ - snps,dis-del-phy-power-chg-quirk: when set core will change PHY power
+			from P0 to P1/P2/P3 without delay.
  - snps,is-utmi-l1-suspend: true when DWC3 asserts output signal
 			utmi_l1_suspend_n, false when asserts utmi_sleep_n
  - snps,hird-threshold: HIRD threshold
diff --git a/Documentation/devicetree/bindings/usb/generic.txt b/Documentation/devicetree/bindings/usb/generic.txt
index bba8257..bfadeb1 100644
--- a/Documentation/devicetree/bindings/usb/generic.txt
+++ b/Documentation/devicetree/bindings/usb/generic.txt
@@ -11,6 +11,11 @@
 			"peripheral" and "otg". In case this attribute isn't
 			passed via DT, USB DRD controllers should default to
 			OTG.
+ - phy_type: tells USB controllers that we want to configure the core to support
+			a UTMI+ PHY with an 8- or 16-bit interface if UTMI+ is
+			selected. Valid arguments are "utmi" and "utmi_wide".
+			In case this isn't passed via DT, USB controllers should
+			default to HW capability.
  - otg-rev: tells usb driver the release number of the OTG and EH supplement
 			with which the device and its descriptors are compliant,
 			in binary-coded decimal (i.e. 2.0 is 0200H). This
@@ -34,6 +39,7 @@
 	usb-phy = <&usb2_phy>, <&usb3,phy>;
 	maximum-speed = "super-speed";
 	dr_mode = "otg";
+	phy_type = "utmi_wide";
 	otg-rev = <0x0200>;
 	adp-disable;
 };
diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
index b604056..9e18e00 100644
--- a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
+++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
@@ -9,6 +9,7 @@
 	- "renesas,usbhs-r8a7793" for r8a7793 (R-Car M2-N) compatible device
 	- "renesas,usbhs-r8a7794" for r8a7794 (R-Car E2) compatible device
 	- "renesas,usbhs-r8a7795" for r8a7795 (R-Car H3) compatible device
+	- "renesas,usbhs-r8a7796" for r8a7796 (R-Car M3-W) compatible device
 	- "renesas,rcar-gen2-usbhs" for R-Car Gen2 compatible device
 	- "renesas,rcar-gen3-usbhs" for R-Car Gen3 compatible device
 
diff --git a/Documentation/devicetree/bindings/usb/rockchip,dwc3.txt b/Documentation/devicetree/bindings/usb/rockchip,dwc3.txt
new file mode 100644
index 0000000..0536a93
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/rockchip,dwc3.txt
@@ -0,0 +1,59 @@
+Rockchip SuperSpeed DWC3 USB SoC controller
+
+Required properties:
+- compatible:	should contain "rockchip,rk3399-dwc3" for rk3399 SoC
+- clocks:	A list of phandle + clock-specifier pairs for the
+		clocks listed in clock-names
+- clock-names:	Should contain the following:
+  "ref_clk"	Controller reference clk, have to be 24 MHz
+  "suspend_clk"	Controller suspend clk, have to be 24 MHz or 32 KHz
+  "bus_clk"	Master/Core clock, have to be >= 62.5 MHz for SS
+		operation and >= 30MHz for HS operation
+  "grf_clk"	Controller grf clk
+
+Required child node:
+A child node must exist to represent the core DWC3 IP block. The name of
+the node is not important. The content of the node is defined in dwc3.txt.
+
+Phy documentation is provided in the following places:
+Documentation/devicetree/bindings/phy/rockchip,dwc3-usb-phy.txt
+
+Example device nodes:
+
+	usbdrd3_0: usb@fe800000 {
+		compatible = "rockchip,rk3399-dwc3";
+		clocks = <&cru SCLK_USB3OTG0_REF>, <&cru SCLK_USB3OTG0_SUSPEND>,
+			 <&cru ACLK_USB3OTG0>, <&cru ACLK_USB3_GRF>;
+		clock-names = "ref_clk", "suspend_clk",
+			      "bus_clk", "grf_clk";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		status = "disabled";
+		usbdrd_dwc3_0: dwc3@fe800000 {
+			compatible = "snps,dwc3";
+			reg = <0x0 0xfe800000 0x0 0x100000>;
+			interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+			dr_mode = "otg";
+			status = "disabled";
+		};
+	};
+
+	usbdrd3_1: usb@fe900000 {
+		compatible = "rockchip,rk3399-dwc3";
+		clocks = <&cru SCLK_USB3OTG1_REF>, <&cru SCLK_USB3OTG1_SUSPEND>,
+			 <&cru ACLK_USB3OTG1>, <&cru ACLK_USB3_GRF>;
+		clock-names = "ref_clk", "suspend_clk",
+			      "bus_clk", "grf_clk";
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		status = "disabled";
+		usbdrd_dwc3_1: dwc3@fe900000 {
+			compatible = "snps,dwc3";
+			reg = <0x0 0xfe900000 0x0 0x100000>;
+			interrupts = <GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>;
+			dr_mode = "otg";
+			status = "disabled";
+		};
+	};
diff --git a/Documentation/devicetree/bindings/usb/usb4604.txt b/Documentation/devicetree/bindings/usb/usb4604.txt
new file mode 100644
index 0000000..82506d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb4604.txt
@@ -0,0 +1,19 @@
+SMSC USB4604 High-Speed Hub Controller
+
+Required properties:
+- compatible: Should be "smsc,usb4604"
+
+Optional properties:
+- reg: Specifies the i2c slave address, it is required and should be 0x2d
+       if I2C is used.
+- reset-gpios: Should specify GPIO for reset.
+- initial-mode: Should specify initial mode.
+                (1 for HUB mode, 2 for STANDBY mode)
+
+Examples:
+	usb-hub@2d {
+		compatible = "smsc,usb4604";
+		reg = <0x2d>;
+		reset-gpios = <&gpx3 5 1>;
+		initial-mode = <1>;
+	};
diff --git a/Documentation/devicetree/bindings/usb/usbmisc-imx.txt b/Documentation/devicetree/bindings/usb/usbmisc-imx.txt
index 3539d4e..f1e27fa 100644
--- a/Documentation/devicetree/bindings/usb/usbmisc-imx.txt
+++ b/Documentation/devicetree/bindings/usb/usbmisc-imx.txt
@@ -6,6 +6,7 @@
 	"fsl,imx6q-usbmisc" for imx6q
 	"fsl,vf610-usbmisc" for Vybrid vf610
 	"fsl,imx6sx-usbmisc" for imx6sx
+	"fsl,imx7d-usbmisc" for imx7d
 - reg: Should contain registers location and length
 
 Examples:
diff --git a/Documentation/docutils.conf b/Documentation/docutils.conf
new file mode 100644
index 0000000..2830772
--- /dev/null
+++ b/Documentation/docutils.conf
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 mode: conf-colon -*-
+#
+# docutils configuration file
+# http://docutils.sourceforge.net/docs/user/config.html
+
+[general]
+halt_level: severe
\ No newline at end of file
diff --git a/Documentation/driver-api/basics.rst b/Documentation/driver-api/basics.rst
new file mode 100644
index 0000000..935b9b8
--- /dev/null
+++ b/Documentation/driver-api/basics.rst
@@ -0,0 +1,120 @@
+Driver Basics
+=============
+
+Driver Entry and Exit points
+----------------------------
+
+.. kernel-doc:: include/linux/init.h
+   :internal:
+
+Atomic and pointer manipulation
+-------------------------------
+
+.. kernel-doc:: arch/x86/include/asm/atomic.h
+   :internal:
+
+Delaying, scheduling, and timer routines
+----------------------------------------
+
+.. kernel-doc:: include/linux/sched.h
+   :internal:
+
+.. kernel-doc:: kernel/sched/core.c
+   :export:
+
+.. kernel-doc:: kernel/sched/cpupri.c
+   :internal:
+
+.. kernel-doc:: kernel/sched/fair.c
+   :internal:
+
+.. kernel-doc:: include/linux/completion.h
+   :internal:
+
+.. kernel-doc:: kernel/time/timer.c
+   :export:
+
+Wait queues and Wake events
+---------------------------
+
+.. kernel-doc:: include/linux/wait.h
+   :internal:
+
+.. kernel-doc:: kernel/sched/wait.c
+   :export:
+
+High-resolution timers
+----------------------
+
+.. kernel-doc:: include/linux/ktime.h
+   :internal:
+
+.. kernel-doc:: include/linux/hrtimer.h
+   :internal:
+
+.. kernel-doc:: kernel/time/hrtimer.c
+   :export:
+
+Workqueues and Kevents
+----------------------
+
+.. kernel-doc:: include/linux/workqueue.h
+   :internal:
+
+.. kernel-doc:: kernel/workqueue.c
+   :export:
+
+Internal Functions
+------------------
+
+.. kernel-doc:: kernel/exit.c
+   :internal:
+
+.. kernel-doc:: kernel/signal.c
+   :internal:
+
+.. kernel-doc:: include/linux/kthread.h
+   :internal:
+
+.. kernel-doc:: kernel/kthread.c
+   :export:
+
+Kernel objects manipulation
+---------------------------
+
+.. kernel-doc:: lib/kobject.c
+   :export:
+
+Kernel utility functions
+------------------------
+
+.. kernel-doc:: include/linux/kernel.h
+   :internal:
+
+.. kernel-doc:: kernel/printk/printk.c
+   :export:
+
+.. kernel-doc:: kernel/panic.c
+   :export:
+
+.. kernel-doc:: kernel/sys.c
+   :export:
+
+.. kernel-doc:: kernel/rcu/srcu.c
+   :export:
+
+.. kernel-doc:: kernel/rcu/tree.c
+   :export:
+
+.. kernel-doc:: kernel/rcu/tree_plugin.h
+   :export:
+
+.. kernel-doc:: kernel/rcu/update.c
+   :export:
+
+Device Resource Management
+--------------------------
+
+.. kernel-doc:: drivers/base/devres.c
+   :export:
+
diff --git a/Documentation/driver-api/frame-buffer.rst b/Documentation/driver-api/frame-buffer.rst
new file mode 100644
index 0000000..9dd3060
--- /dev/null
+++ b/Documentation/driver-api/frame-buffer.rst
@@ -0,0 +1,62 @@
+Frame Buffer Library
+====================
+
+The frame buffer drivers depend heavily on four data structures. These
+structures are declared in include/linux/fb.h. They are fb_info,
+fb_var_screeninfo, fb_fix_screeninfo and fb_monospecs. The last
+three can be made available to and from userland.
+
+fb_info defines the current state of a particular video card. Inside
+fb_info, there exists a fb_ops structure which is a collection of
+needed functions to make fbdev and fbcon work. fb_info is only visible
+to the kernel.
+
+fb_var_screeninfo is used to describe the features of a video card
+that are user defined. With fb_var_screeninfo, things such as depth
+and the resolution may be defined.
+
+The next structure is fb_fix_screeninfo. This defines the properties
+of a card that are created when a mode is set and can't be changed
+otherwise. A good example of this is the start of the frame buffer
+memory. This "locks" the address of the frame buffer memory, so that it
+cannot be changed or moved.
+
+The last structure is fb_monospecs. In the old API, there was little
+importance for fb_monospecs. This allowed for forbidden things such as
+setting a mode of 800x600 on a fix frequency monitor. With the new API,
+fb_monospecs prevents such things, and if used correctly, can prevent a
+monitor from being cooked. fb_monospecs will not be useful until
+kernels 2.5.x.
+
+Frame Buffer Memory
+-------------------
+
+.. kernel-doc:: drivers/video/fbdev/core/fbmem.c
+   :export:
+
+Frame Buffer Colormap
+---------------------
+
+.. kernel-doc:: drivers/video/fbdev/core/fbcmap.c
+   :export:
+
+Frame Buffer Video Mode Database
+--------------------------------
+
+.. kernel-doc:: drivers/video/fbdev/core/modedb.c
+   :internal:
+
+.. kernel-doc:: drivers/video/fbdev/core/modedb.c
+   :export:
+
+Frame Buffer Macintosh Video Mode Database
+------------------------------------------
+
+.. kernel-doc:: drivers/video/fbdev/macmodes.c
+   :export:
+
+Frame Buffer Fonts
+------------------
+
+Refer to the file lib/fonts/fonts.c for more information.
+
diff --git a/Documentation/driver-api/hsi.rst b/Documentation/driver-api/hsi.rst
new file mode 100644
index 0000000..f9cec02
--- /dev/null
+++ b/Documentation/driver-api/hsi.rst
@@ -0,0 +1,88 @@
+High Speed Synchronous Serial Interface (HSI)
+=============================================
+
+Introduction
+---------------
+
+High Speed Syncronous Interface (HSI) is a fullduplex, low latency protocol,
+that is optimized for die-level interconnect between an Application Processor
+and a Baseband chipset. It has been specified by the MIPI alliance in 2003 and
+implemented by multiple vendors since then.
+
+The HSI interface supports full duplex communication over multiple channels
+(typically 8) and is capable of reaching speeds up to 200 Mbit/s.
+
+The serial protocol uses two signals, DATA and FLAG as combined data and clock
+signals and an additional READY signal for flow control. An additional WAKE
+signal can be used to wakeup the chips from standby modes. The signals are
+commonly prefixed by AC for signals going from the application die to the
+cellular die and CA for signals going the other way around.
+
+::
+
+    +------------+                                 +---------------+
+    |  Cellular  |                                 |  Application  |
+    |    Die     |                                 |      Die      |
+    |            | - - - - - - CAWAKE - - - - - - >|               |
+    |           T|------------ CADATA ------------>|R              |
+    |           X|------------ CAFLAG ------------>|X              |
+    |            |<----------- ACREADY ------------|               |
+    |            |                                 |               |
+    |            |                                 |               |
+    |            |< - - - - -  ACWAKE - - - - - - -|               |
+    |           R|<----------- ACDATA -------------|T              |
+    |           X|<----------- ACFLAG -------------|X              |
+    |            |------------ CAREADY ----------->|               |
+    |            |                                 |               |
+    |            |                                 |               |
+    +------------+                                 +---------------+
+
+HSI Subsystem in Linux
+-------------------------
+
+In the Linux kernel the hsi subsystem is supposed to be used for HSI devices.
+The hsi subsystem contains drivers for hsi controllers including support for
+multi-port controllers and provides a generic API for using the HSI ports.
+
+It also contains HSI client drivers, which make use of the generic API to
+implement a protocol used on the HSI interface. These client drivers can
+use an arbitrary number of channels.
+
+hsi-char Device
+------------------
+
+Each port automatically registers a generic client driver called hsi_char,
+which provides a charecter device for userspace representing the HSI port.
+It can be used to communicate via HSI from userspace. Userspace may
+configure the hsi_char device using the following ioctl commands:
+
+HSC_RESET
+ flush the HSI port
+
+HSC_SET_PM
+ enable or disable the client.
+
+HSC_SEND_BREAK
+ send break
+
+HSC_SET_RX
+ set RX configuration
+
+HSC_GET_RX
+ get RX configuration
+
+HSC_SET_TX
+ set TX configuration
+
+HSC_GET_TX
+ get TX configuration
+
+The kernel HSI API
+------------------
+
+.. kernel-doc:: include/linux/hsi/hsi.h
+   :internal:
+
+.. kernel-doc:: drivers/hsi/hsi_core.c
+   :export:
+
diff --git a/Documentation/driver-api/i2c.rst b/Documentation/driver-api/i2c.rst
new file mode 100644
index 0000000..f3939f7
--- /dev/null
+++ b/Documentation/driver-api/i2c.rst
@@ -0,0 +1,46 @@
+I\ :sup:`2`\ C and SMBus Subsystem
+==================================
+
+I\ :sup:`2`\ C (or without fancy typography, "I2C") is an acronym for
+the "Inter-IC" bus, a simple bus protocol which is widely used where low
+data rate communications suffice. Since it's also a licensed trademark,
+some vendors use another name (such as "Two-Wire Interface", TWI) for
+the same bus. I2C only needs two signals (SCL for clock, SDA for data),
+conserving board real estate and minimizing signal quality issues. Most
+I2C devices use seven bit addresses, and bus speeds of up to 400 kHz;
+there's a high speed extension (3.4 MHz) that's not yet found wide use.
+I2C is a multi-master bus; open drain signaling is used to arbitrate
+between masters, as well as to handshake and to synchronize clocks from
+slower clients.
+
+The Linux I2C programming interfaces support only the master side of bus
+interactions, not the slave side. The programming interface is
+structured around two kinds of driver, and two kinds of device. An I2C
+"Adapter Driver" abstracts the controller hardware; it binds to a
+physical device (perhaps a PCI device or platform_device) and exposes a
+:c:type:`struct i2c_adapter <i2c_adapter>` representing each
+I2C bus segment it manages. On each I2C bus segment will be I2C devices
+represented by a :c:type:`struct i2c_client <i2c_client>`.
+Those devices will be bound to a :c:type:`struct i2c_driver
+<i2c_driver>`, which should follow the standard Linux driver
+model. (At this writing, a legacy model is more widely used.) There are
+functions to perform various I2C protocol operations; at this writing
+all such functions are usable only from task context.
+
+The System Management Bus (SMBus) is a sibling protocol. Most SMBus
+systems are also I2C conformant. The electrical constraints are tighter
+for SMBus, and it standardizes particular protocol messages and idioms.
+Controllers that support I2C can also support most SMBus operations, but
+SMBus controllers don't support all the protocol options that an I2C
+controller will. There are functions to perform various SMBus protocol
+operations, either using I2C primitives or by issuing SMBus commands to
+i2c_adapter devices which don't support those I2C operations.
+
+.. kernel-doc:: include/linux/i2c.h
+   :internal:
+
+.. kernel-doc:: drivers/i2c/i2c-boardinfo.c
+   :functions: i2c_register_board_info
+
+.. kernel-doc:: drivers/i2c/i2c-core.c
+   :export:
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst
new file mode 100644
index 0000000..8e259c5d
--- /dev/null
+++ b/Documentation/driver-api/index.rst
@@ -0,0 +1,26 @@
+========================================
+The Linux driver implementer's API guide
+========================================
+
+The kernel offers a wide variety of interfaces to support the development
+of device drivers.  This document is an only somewhat organized collection
+of some of those interfaces — it will hopefully get better over time!  The
+available subsections can be seen below.
+
+.. class:: toc-title
+
+	   Table of contents
+
+.. toctree::
+   :maxdepth: 2
+
+   basics
+   infrastructure
+   message-based
+   sound
+   frame-buffer
+   input
+   spi
+   i2c
+   hsi
+   miscellaneous
diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst
new file mode 100644
index 0000000..5d50d67
--- /dev/null
+++ b/Documentation/driver-api/infrastructure.rst
@@ -0,0 +1,169 @@
+Device drivers infrastructure
+=============================
+
+The Basic Device Driver-Model Structures
+----------------------------------------
+
+.. kernel-doc:: include/linux/device.h
+   :internal:
+
+Device Drivers Base
+-------------------
+
+.. kernel-doc:: drivers/base/init.c
+   :internal:
+
+.. kernel-doc:: drivers/base/driver.c
+   :export:
+
+.. kernel-doc:: drivers/base/core.c
+   :export:
+
+.. kernel-doc:: drivers/base/syscore.c
+   :export:
+
+.. kernel-doc:: drivers/base/class.c
+   :export:
+
+.. kernel-doc:: drivers/base/node.c
+   :internal:
+
+.. kernel-doc:: drivers/base/firmware_class.c
+   :export:
+
+.. kernel-doc:: drivers/base/transport_class.c
+   :export:
+
+.. kernel-doc:: drivers/base/dd.c
+   :export:
+
+.. kernel-doc:: include/linux/platform_device.h
+   :internal:
+
+.. kernel-doc:: drivers/base/platform.c
+   :export:
+
+.. kernel-doc:: drivers/base/bus.c
+   :export:
+
+Buffer Sharing and Synchronization
+----------------------------------
+
+The dma-buf subsystem provides the framework for sharing buffers for
+hardware (DMA) access across multiple device drivers and subsystems, and
+for synchronizing asynchronous hardware access.
+
+This is used, for example, by drm "prime" multi-GPU support, but is of
+course not limited to GPU use cases.
+
+The three main components of this are: (1) dma-buf, representing a
+sg_table and exposed to userspace as a file descriptor to allow passing
+between devices, (2) fence, which provides a mechanism to signal when
+one device as finished access, and (3) reservation, which manages the
+shared or exclusive fence(s) associated with the buffer.
+
+dma-buf
+~~~~~~~
+
+.. kernel-doc:: drivers/dma-buf/dma-buf.c
+   :export:
+
+.. kernel-doc:: include/linux/dma-buf.h
+   :internal:
+
+reservation
+~~~~~~~~~~~
+
+.. kernel-doc:: drivers/dma-buf/reservation.c
+   :doc: Reservation Object Overview
+
+.. kernel-doc:: drivers/dma-buf/reservation.c
+   :export:
+
+.. kernel-doc:: include/linux/reservation.h
+   :internal:
+
+fence
+~~~~~
+
+.. kernel-doc:: drivers/dma-buf/fence.c
+   :export:
+
+.. kernel-doc:: include/linux/fence.h
+   :internal:
+
+.. kernel-doc:: drivers/dma-buf/seqno-fence.c
+   :export:
+
+.. kernel-doc:: include/linux/seqno-fence.h
+   :internal:
+
+.. kernel-doc:: drivers/dma-buf/fence-array.c
+   :export:
+
+.. kernel-doc:: include/linux/fence-array.h
+   :internal:
+
+.. kernel-doc:: drivers/dma-buf/reservation.c
+   :export:
+
+.. kernel-doc:: include/linux/reservation.h
+   :internal:
+
+.. kernel-doc:: drivers/dma-buf/sync_file.c
+   :export:
+
+.. kernel-doc:: include/linux/sync_file.h
+   :internal:
+
+Device Drivers DMA Management
+-----------------------------
+
+.. kernel-doc:: drivers/base/dma-coherent.c
+   :export:
+
+.. kernel-doc:: drivers/base/dma-mapping.c
+   :export:
+
+Device Drivers Power Management
+-------------------------------
+
+.. kernel-doc:: drivers/base/power/main.c
+   :export:
+
+Device Drivers ACPI Support
+---------------------------
+
+.. kernel-doc:: drivers/acpi/scan.c
+   :export:
+
+.. kernel-doc:: drivers/acpi/scan.c
+   :internal:
+
+Device drivers PnP support
+--------------------------
+
+.. kernel-doc:: drivers/pnp/core.c
+   :internal:
+
+.. kernel-doc:: drivers/pnp/card.c
+   :export:
+
+.. kernel-doc:: drivers/pnp/driver.c
+   :internal:
+
+.. kernel-doc:: drivers/pnp/manager.c
+   :export:
+
+.. kernel-doc:: drivers/pnp/support.c
+   :export:
+
+Userspace IO devices
+--------------------
+
+.. kernel-doc:: drivers/uio/uio.c
+   :export:
+
+.. kernel-doc:: include/linux/uio_driver.h
+   :internal:
+
diff --git a/Documentation/driver-api/input.rst b/Documentation/driver-api/input.rst
new file mode 100644
index 0000000..d05bf58
--- /dev/null
+++ b/Documentation/driver-api/input.rst
@@ -0,0 +1,51 @@
+Input Subsystem
+===============
+
+Input core
+----------
+
+.. kernel-doc:: include/linux/input.h
+   :internal:
+
+.. kernel-doc:: drivers/input/input.c
+   :export:
+
+.. kernel-doc:: drivers/input/ff-core.c
+   :export:
+
+.. kernel-doc:: drivers/input/ff-memless.c
+   :export:
+
+Multitouch Library
+------------------
+
+.. kernel-doc:: include/linux/input/mt.h
+   :internal:
+
+.. kernel-doc:: drivers/input/input-mt.c
+   :export:
+
+Polled input devices
+--------------------
+
+.. kernel-doc:: include/linux/input-polldev.h
+   :internal:
+
+.. kernel-doc:: drivers/input/input-polldev.c
+   :export:
+
+Matrix keyboards/keypads
+------------------------
+
+.. kernel-doc:: include/linux/input/matrix_keypad.h
+   :internal:
+
+Sparse keymap support
+---------------------
+
+.. kernel-doc:: include/linux/input/sparse-keymap.h
+   :internal:
+
+.. kernel-doc:: drivers/input/sparse-keymap.c
+   :export:
+
diff --git a/Documentation/driver-api/message-based.rst b/Documentation/driver-api/message-based.rst
new file mode 100644
index 0000000..18ff94e
--- /dev/null
+++ b/Documentation/driver-api/message-based.rst
@@ -0,0 +1,12 @@
+Message-based devices
+=====================
+
+Fusion message devices
+----------------------
+
+.. kernel-doc:: drivers/message/fusion/mptbase.c
+   :export:
+
+.. kernel-doc:: drivers/message/fusion/mptscsih.c
+   :export:
+
diff --git a/Documentation/driver-api/miscellaneous.rst b/Documentation/driver-api/miscellaneous.rst
new file mode 100644
index 0000000..8da7d11
--- /dev/null
+++ b/Documentation/driver-api/miscellaneous.rst
@@ -0,0 +1,50 @@
+Parallel Port Devices
+=====================
+
+.. kernel-doc:: include/linux/parport.h
+   :internal:
+
+.. kernel-doc:: drivers/parport/ieee1284.c
+   :export:
+
+.. kernel-doc:: drivers/parport/share.c
+   :export:
+
+.. kernel-doc:: drivers/parport/daisy.c
+   :internal:
+
+16x50 UART Driver
+=================
+
+.. kernel-doc:: drivers/tty/serial/serial_core.c
+   :export:
+
+.. kernel-doc:: drivers/tty/serial/8250/8250_core.c
+   :export:
+
+Pulse-Width Modulation (PWM)
+============================
+
+Pulse-width modulation is a modulation technique primarily used to
+control power supplied to electrical devices.
+
+The PWM framework provides an abstraction for providers and consumers of
+PWM signals. A controller that provides one or more PWM signals is
+registered as :c:type:`struct pwm_chip <pwm_chip>`. Providers
+are expected to embed this structure in a driver-specific structure.
+This structure contains fields that describe a particular chip.
+
+A chip exposes one or more PWM signal sources, each of which exposed as
+a :c:type:`struct pwm_device <pwm_device>`. Operations can be
+performed on PWM devices to control the period, duty cycle, polarity and
+active state of the signal.
+
+Note that PWM devices are exclusive resources: they can always only be
+used by one consumer at a time.
+
+.. kernel-doc:: include/linux/pwm.h
+   :internal:
+
+.. kernel-doc:: drivers/pwm/core.c
+   :export:
+   
diff --git a/Documentation/driver-api/sound.rst b/Documentation/driver-api/sound.rst
new file mode 100644
index 0000000..afef6ea
--- /dev/null
+++ b/Documentation/driver-api/sound.rst
@@ -0,0 +1,54 @@
+Sound Devices
+=============
+
+.. kernel-doc:: include/sound/core.h
+   :internal:
+
+.. kernel-doc:: sound/sound_core.c
+   :export:
+
+.. kernel-doc:: include/sound/pcm.h
+   :internal:
+
+.. kernel-doc:: sound/core/pcm.c
+   :export:
+
+.. kernel-doc:: sound/core/device.c
+   :export:
+
+.. kernel-doc:: sound/core/info.c
+   :export:
+
+.. kernel-doc:: sound/core/rawmidi.c
+   :export:
+
+.. kernel-doc:: sound/core/sound.c
+   :export:
+
+.. kernel-doc:: sound/core/memory.c
+   :export:
+
+.. kernel-doc:: sound/core/pcm_memory.c
+   :export:
+
+.. kernel-doc:: sound/core/init.c
+   :export:
+
+.. kernel-doc:: sound/core/isadma.c
+   :export:
+
+.. kernel-doc:: sound/core/control.c
+   :export:
+
+.. kernel-doc:: sound/core/pcm_lib.c
+   :export:
+
+.. kernel-doc:: sound/core/hwdep.c
+   :export:
+
+.. kernel-doc:: sound/core/pcm_native.c
+   :export:
+
+.. kernel-doc:: sound/core/memalloc.c
+   :export:
+
diff --git a/Documentation/driver-api/spi.rst b/Documentation/driver-api/spi.rst
new file mode 100644
index 0000000..f64cb66
--- /dev/null
+++ b/Documentation/driver-api/spi.rst
@@ -0,0 +1,53 @@
+Serial Peripheral Interface (SPI)
+=================================
+
+SPI is the "Serial Peripheral Interface", widely used with embedded
+systems because it is a simple and efficient interface: basically a
+multiplexed shift register. Its three signal wires hold a clock (SCK,
+often in the range of 1-20 MHz), a "Master Out, Slave In" (MOSI) data
+line, and a "Master In, Slave Out" (MISO) data line. SPI is a full
+duplex protocol; for each bit shifted out the MOSI line (one per clock)
+another is shifted in on the MISO line. Those bits are assembled into
+words of various sizes on the way to and from system memory. An
+additional chipselect line is usually active-low (nCS); four signals are
+normally used for each peripheral, plus sometimes an interrupt.
+
+The SPI bus facilities listed here provide a generalized interface to
+declare SPI busses and devices, manage them according to the standard
+Linux driver model, and perform input/output operations. At this time,
+only "master" side interfaces are supported, where Linux talks to SPI
+peripherals and does not implement such a peripheral itself. (Interfaces
+to support implementing SPI slaves would necessarily look different.)
+
+The programming interface is structured around two kinds of driver, and
+two kinds of device. A "Controller Driver" abstracts the controller
+hardware, which may be as simple as a set of GPIO pins or as complex as
+a pair of FIFOs connected to dual DMA engines on the other side of the
+SPI shift register (maximizing throughput). Such drivers bridge between
+whatever bus they sit on (often the platform bus) and SPI, and expose
+the SPI side of their device as a :c:type:`struct spi_master
+<spi_master>`. SPI devices are children of that master,
+represented as a :c:type:`struct spi_device <spi_device>` and
+manufactured from :c:type:`struct spi_board_info
+<spi_board_info>` descriptors which are usually provided by
+board-specific initialization code. A :c:type:`struct spi_driver
+<spi_driver>` is called a "Protocol Driver", and is bound to a
+spi_device using normal driver model calls.
+
+The I/O model is a set of queued messages. Protocol drivers submit one
+or more :c:type:`struct spi_message <spi_message>` objects,
+which are processed and completed asynchronously. (There are synchronous
+wrappers, however.) Messages are built from one or more
+:c:type:`struct spi_transfer <spi_transfer>` objects, each of
+which wraps a full duplex SPI transfer. A variety of protocol tweaking
+options are needed, because different chips adopt very different
+policies for how they use the bits transferred with SPI.
+
+.. kernel-doc:: include/linux/spi/spi.h
+   :internal:
+
+.. kernel-doc:: drivers/spi/spi.c
+   :functions: spi_register_board_info
+
+.. kernel-doc:: drivers/spi/spi.c
+   :export:
diff --git a/Documentation/driver-model/device.txt b/Documentation/driver-model/device.txt
index 1e70220..2403eb8 100644
--- a/Documentation/driver-model/device.txt
+++ b/Documentation/driver-model/device.txt
@@ -50,7 +50,7 @@
 Please see Documentation/filesystems/sysfs.txt for more information
 on how sysfs works.
 
-As explained in Documentation/kobject.txt, device attributes must be be
+As explained in Documentation/kobject.txt, device attributes must be
 created before the KOBJ_ADD uevent is generated. The only way to realize
 that is by defining an attribute group.
 
diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt
index 2d485de..ac892b3 100644
--- a/Documentation/email-clients.txt
+++ b/Documentation/email-clients.txt
@@ -1,23 +1,27 @@
+.. _email_clients:
+
 Email clients info for Linux
-======================================================================
+============================
 
 Git
-----------------------------------------------------------------------
-These days most developers use `git send-email` instead of regular
-email clients.  The man page for this is quite good.  On the receiving
-end, maintainers use `git am` to apply the patches.
+---
 
-If you are new to git then send your first patch to yourself.  Save it
-as raw text including all the headers.  Run `git am raw_email.txt` and
-then review the changelog with `git log`.  When that works then send
+These days most developers use ``git send-email`` instead of regular
+email clients.  The man page for this is quite good.  On the receiving
+end, maintainers use ``git am`` to apply the patches.
+
+If you are new to ``git`` then send your first patch to yourself.  Save it
+as raw text including all the headers.  Run ``git am raw_email.txt`` and
+then review the changelog with ``git log``.  When that works then send
 the patch to the appropriate mailing list(s).
 
 General Preferences
-----------------------------------------------------------------------
+-------------------
+
 Patches for the Linux kernel are submitted via email, preferably as
 inline text in the body of the email.  Some maintainers accept
 attachments, but then the attachments should have content-type
-"text/plain".  However, attachments are generally frowned upon because
+``text/plain``.  However, attachments are generally frowned upon because
 it makes quoting portions of the patch more difficult in the patch
 review process.
 
@@ -25,7 +29,7 @@
 patch text untouched.  For example, they should not modify or delete tabs
 or spaces, even at the beginning or end of lines.
 
-Don't send patches with "format=flowed".  This can cause unexpected
+Don't send patches with ``format=flowed``.  This can cause unexpected
 and unwanted line breaks.
 
 Don't let your email client do automatic word wrapping for you.
@@ -54,57 +58,63 @@
 
 
 Some email client (MUA) hints
-----------------------------------------------------------------------
+-----------------------------
+
 Here are some specific MUA configuration hints for editing and sending
 patches for the Linux kernel.  These are not meant to be complete
 software package configuration summaries.
 
-Legend:
-TUI = text-based user interface
-GUI = graphical user interface
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Legend:
+
+- TUI = text-based user interface
+- GUI = graphical user interface
+
 Alpine (TUI)
+************
 
 Config options:
-In the "Sending Preferences" section:
 
-- "Do Not Send Flowed Text" must be enabled
-- "Strip Whitespace Before Sending" must be disabled
+In the :menuselection:`Sending Preferences` section:
+
+- :menuselection:`Do Not Send Flowed Text` must be ``enabled``
+- :menuselection:`Strip Whitespace Before Sending` must be ``disabled``
 
 When composing the message, the cursor should be placed where the patch
-should appear, and then pressing CTRL-R let you specify the patch file
+should appear, and then pressing :kbd:`CTRL-R` let you specify the patch file
 to insert into the message.
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Claws Mail (GUI)
+****************
 
 Works. Some people use this successfully for patches.
 
-To insert a patch use Message->Insert File (CTRL+i) or an external editor.
+To insert a patch use :menuselection:`Message-->Insert` File (:kbd:`CTRL-I`)
+or an external editor.
 
 If the inserted patch has to be edited in the Claws composition window
-"Auto wrapping" in Configuration->Preferences->Compose->Wrapping should be
+"Auto wrapping" in
+:menuselection:`Configuration-->Preferences-->Compose-->Wrapping` should be
 disabled.
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Evolution (GUI)
+***************
 
 Some people use this successfully for patches.
 
 When composing mail select: Preformat
-  from Format->Paragraph Style->Preformatted (Ctrl-7)
+  from :menuselection:`Format-->Paragraph Style-->Preformatted` (:kbd:`CTRL-7`)
   or the toolbar
 
 Then use:
-  Insert->Text File... (Alt-n x)
+:menuselection:`Insert-->Text File...` (:kbd:`ALT-N x`)
 to insert the patch.
 
-You can also "diff -Nru old.c new.c | xclip", select Preformat, then
-paste with the middle button.
+You can also ``diff -Nru old.c new.c | xclip``, select
+:menuselection:`Preformat`, then paste with the middle button.
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Kmail (GUI)
+***********
 
 Some people use Kmail successfully for patches.
 
@@ -120,11 +130,12 @@
 wrapping.
 
 At the bottom of your email, put the commonly-used patch delimiter before
-inserting your patch:  three hyphens (---).
+inserting your patch:  three hyphens (``---``).
 
-Then from the "Message" menu item, select insert file and choose your patch.
+Then from the :menuselection:`Message` menu item, select insert file and
+choose your patch.
 As an added bonus you can customise the message creation toolbar menu
-and put the "insert file" icon there.
+and put the :menuselection:`insert file` icon there.
 
 Make the composer window wide enough so that no lines wrap. As of
 KMail 1.13.5 (KDE 4.5.4), KMail will apply word wrapping when sending
@@ -139,86 +150,96 @@
 
 If you absolutely must send patches as attachments instead of inlining
 them as text, right click on the attachment and select properties, and
-highlight "Suggest automatic display" to make the attachment inlined to
-make it more viewable.
+highlight :menuselection:`Suggest automatic display` to make the attachment
+inlined to make it more viewable.
 
 When saving patches that are sent as inlined text, select the email that
 contains the patch from the message list pane, right click and select
-"save as".  You can use the whole email unmodified as a patch if it was
-properly composed.  There is no option currently to save the email when you
-are actually viewing it in its own window -- there has been a request filed
-at kmail's bugzilla and hopefully this will be addressed.  Emails are saved
-as read-write for user only so you will have to chmod them to make them
+:menuselection:`save as`.  You can use the whole email unmodified as a patch
+if it was properly composed.  There is no option currently to save the email
+when you are actually viewing it in its own window -- there has been a request
+filed at kmail's bugzilla and hopefully this will be addressed.  Emails are
+saved as read-write for user only so you will have to chmod them to make them
 group and world readable if you copy them elsewhere.
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Lotus Notes (GUI)
+*****************
 
 Run away from it.
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Mutt (TUI)
+**********
 
-Plenty of Linux developers use mutt, so it must work pretty well.
+Plenty of Linux developers use ``mutt``, so it must work pretty well.
 
 Mutt doesn't come with an editor, so whatever editor you use should be
 used in a way that there are no automatic linebreaks.  Most editors have
-an "insert file" option that inserts the contents of a file unaltered.
+an :menuselection:`insert file` option that inserts the contents of a file
+unaltered.
 
-To use 'vim' with mutt:
+To use ``vim`` with mutt::
+
   set editor="vi"
 
-  If using xclip, type the command
+If using xclip, type the command::
+
   :set paste
-  before middle button or shift-insert or use
+
+before middle button or shift-insert or use::
+
   :r filename
 
 if you want to include the patch inline.
-(a)ttach works fine without "set paste".
+(a)ttach works fine without ``set paste``.
 
-You can also generate patches with 'git format-patch' and then use Mutt
-to send them:
+You can also generate patches with ``git format-patch`` and then use Mutt
+to send them::
+
     $ mutt -H 0001-some-bug-fix.patch
 
 Config options:
+
 It should work with default settings.
-However, it's a good idea to set the "send_charset" to:
+However, it's a good idea to set the ``send_charset`` to::
+
   set send_charset="us-ascii:utf-8"
 
 Mutt is highly customizable. Here is a minimum configuration to start
-using Mutt to send patches through Gmail:
+using Mutt to send patches through Gmail::
 
-# .muttrc
-# ================  IMAP ====================
-set imap_user = 'yourusername@gmail.com'
-set imap_pass = 'yourpassword'
-set spoolfile = imaps://imap.gmail.com/INBOX
-set folder = imaps://imap.gmail.com/
-set record="imaps://imap.gmail.com/[Gmail]/Sent Mail"
-set postponed="imaps://imap.gmail.com/[Gmail]/Drafts"
-set mbox="imaps://imap.gmail.com/[Gmail]/All Mail"
+  # .muttrc
+  # ================  IMAP ====================
+  set imap_user = 'yourusername@gmail.com'
+  set imap_pass = 'yourpassword'
+  set spoolfile = imaps://imap.gmail.com/INBOX
+  set folder = imaps://imap.gmail.com/
+  set record="imaps://imap.gmail.com/[Gmail]/Sent Mail"
+  set postponed="imaps://imap.gmail.com/[Gmail]/Drafts"
+  set mbox="imaps://imap.gmail.com/[Gmail]/All Mail"
 
-# ================  SMTP  ====================
-set smtp_url = "smtp://username@smtp.gmail.com:587/"
-set smtp_pass = $imap_pass
-set ssl_force_tls = yes # Require encrypted connection
+  # ================  SMTP  ====================
+  set smtp_url = "smtp://username@smtp.gmail.com:587/"
+  set smtp_pass = $imap_pass
+  set ssl_force_tls = yes # Require encrypted connection
 
-# ================  Composition  ====================
-set editor = `echo \$EDITOR`
-set edit_headers = yes  # See the headers when editing
-set charset = UTF-8     # value of $LANG; also fallback for send_charset
-# Sender, email address, and sign-off line must match
-unset use_domain        # because joe@localhost is just embarrassing
-set realname = "YOUR NAME"
-set from = "username@gmail.com"
-set use_from = yes
+  # ================  Composition  ====================
+  set editor = `echo \$EDITOR`
+  set edit_headers = yes  # See the headers when editing
+  set charset = UTF-8     # value of $LANG; also fallback for send_charset
+  # Sender, email address, and sign-off line must match
+  unset use_domain        # because joe@localhost is just embarrassing
+  set realname = "YOUR NAME"
+  set from = "username@gmail.com"
+  set use_from = yes
 
 The Mutt docs have lots more information:
+
     http://dev.mutt.org/trac/wiki/UseCases/Gmail
+
     http://dev.mutt.org/doc/manual.html
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Pine (TUI)
+**********
 
 Pine has had some whitespace truncation issues in the past, but these
 should all be fixed now.
@@ -226,12 +247,13 @@
 Use alpine (pine's successor) if you can.
 
 Config options:
-- quell-flowed-text is needed for recent versions
-- the "no-strip-whitespace-before-send" option is needed
+
+- ``quell-flowed-text`` is needed for recent versions
+- the ``no-strip-whitespace-before-send`` option is needed
 
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Sylpheed (GUI)
+**************
 
 - Works well for inlining text (or using attachments).
 - Allows use of an external editor.
@@ -241,50 +263,50 @@
 - Adding addresses to address book doesn't understand the display name
   properly.
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Thunderbird (GUI)
+*****************
 
 Thunderbird is an Outlook clone that likes to mangle text, but there are ways
 to coerce it into behaving.
 
 - Allow use of an external editor:
   The easiest thing to do with Thunderbird and patches is to use an
-  "external editor" extension and then just use your favorite $EDITOR
+  "external editor" extension and then just use your favorite ``$EDITOR``
   for reading/merging patches into the body text.  To do this, download
   and install the extension, then add a button for it using
-  View->Toolbars->Customize... and finally just click on it when in the
-  Compose dialog.
+  :menuselection:`View-->Toolbars-->Customize...` and finally just click on it
+  when in the :menuselection:`Compose` dialog.
 
   Please note that "external editor" requires that your editor must not
   fork, or in other words, the editor must not return before closing.
   You may have to pass additional flags or change the settings of your
   editor. Most notably if you are using gvim then you must pass the -f
-  option to gvim by putting "/usr/bin/gvim -f" (if the binary is in
-  /usr/bin) to the text editor field in "external editor" settings. If you
-  are using some other editor then please read its manual to find out how
-  to do this.
+  option to gvim by putting ``/usr/bin/gvim -f`` (if the binary is in
+  ``/usr/bin``) to the text editor field in :menuselection:`external editor`
+  settings. If you are using some other editor then please read its manual
+  to find out how to do this.
 
 To beat some sense out of the internal editor, do this:
 
-- Edit your Thunderbird config settings so that it won't use format=flowed.
-  Go to "edit->preferences->advanced->config editor" to bring up the
-  thunderbird's registry editor.
+- Edit your Thunderbird config settings so that it won't use ``format=flowed``.
+  Go to :menuselection:`edit-->preferences-->advanced-->config editor` to bring up
+  the thunderbird's registry editor.
 
-- Set "mailnews.send_plaintext_flowed" to "false"
+- Set ``mailnews.send_plaintext_flowed`` to ``false``
 
-- Set "mailnews.wraplength" from "72" to "0"
+- Set ``mailnews.wraplength`` from ``72`` to ``0``
 
-- "View" > "Message Body As" > "Plain Text"
+- :menuselection:`View-->Message Body As-->Plain Text`
 
-- "View" > "Character Encoding" > "Unicode (UTF-8)"
+- :menuselection:`View-->Character Encoding-->Unicode (UTF-8)`
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 TkRat (GUI)
+***********
 
 Works.  Use "Insert file..." or external editor.
 
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Gmail (Web GUI)
+***************
 
 Does not work for sending patches.
 
@@ -295,5 +317,3 @@
 
 Another problem is that Gmail will base64-encode any message that has a
 non-ASCII character. That includes things like European names.
-
-                                ###
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index 0c16a22..23d18b8 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -123,9 +123,12 @@
 mapped caches such as ARM, MIPS and SPARC.
 
 Calling get_user_pages() on a range of user memory that has been mmaped
-from a DAX file will fail as there are no 'struct page' to describe
-those pages.  This problem is being worked on.  That means that O_DIRECT
-reads/writes to those memory ranges from a non-DAX file will fail (note
-that O_DIRECT reads/writes _of a DAX file_ do work, it is the memory
-that is being accessed that is key here).  Other things that will not
-work include RDMA, sendfile() and splice().
+from a DAX file will fail when there are no 'struct page' to describe
+those pages.  This problem has been addressed in some device drivers
+by adding optional struct page support for pages under the control of
+the driver (see CONFIG_NVDIMM_PFN in drivers/nvdimm for an example of
+how to do this). In the non struct page cases O_DIRECT reads/writes to
+those memory ranges from a non-DAX file will fail (note that O_DIRECT
+reads/writes _of a DAX file_ do work, it is the memory that is being
+accessed that is key here).  Other things that will not work in the
+non struct page case include RDMA, sendfile() and splice().
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
index d6259c7..bcbf971 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -183,12 +183,10 @@
 moves it over to the old name.  The new file may be on a different
 filesystem, so both st_dev and st_ino of the file may change.
 
-Any open files referring to this inode will access the old data and
-metadata.  Similarly any file locks obtained before copy_up will not
-apply to the copied up file.
+Any open files referring to this inode will access the old data.
 
-On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and
-fsetxattr(2) will fail with EROFS.
+Any file locks (and leases) obtained before copy_up will not apply
+to the copied up file.
 
 If a file with multiple hard links is copied up, then this will
 "break" the link.  Changes will not be propagated to other names
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 68080ad..fcc1ac0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -145,7 +145,7 @@
 		symbol the task is blocked in - or "0" if not blocked.
  pagemap	Page table
  stack		Report full stack trace, enable via CONFIG_STACKTRACE
- smaps		a extension based on maps, showing the memory consumption of
+ smaps		an extension based on maps, showing the memory consumption of
 		each mapping and flags associated with it
  numa_maps	an extension based on maps, showing the memory locality and
 		binding policy as well as mem usage (in pages) of each mapping.
diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt
deleted file mode 100644
index 7b72778..0000000
--- a/Documentation/gcov.txt
+++ /dev/null
@@ -1,257 +0,0 @@
-Using gcov with the Linux kernel
-================================
-
-1. Introduction
-2. Preparation
-3. Customization
-4. Files
-5. Modules
-6. Separated build and test machines
-7. Troubleshooting
-Appendix A: sample script: gather_on_build.sh
-Appendix B: sample script: gather_on_test.sh
-
-
-1. Introduction
-===============
-
-gcov profiling kernel support enables the use of GCC's coverage testing
-tool gcov [1] with the Linux kernel. Coverage data of a running kernel
-is exported in gcov-compatible format via the "gcov" debugfs directory.
-To get coverage data for a specific file, change to the kernel build
-directory and use gcov with the -o option as follows (requires root):
-
-# cd /tmp/linux-out
-# gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c
-
-This will create source code files annotated with execution counts
-in the current directory. In addition, graphical gcov front-ends such
-as lcov [2] can be used to automate the process of collecting data
-for the entire kernel and provide coverage overviews in HTML format.
-
-Possible uses:
-
-* debugging (has this line been reached at all?)
-* test improvement (how do I change my test to cover these lines?)
-* minimizing kernel configurations (do I need this option if the
-  associated code is never run?)
-
---
-
-[1] http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
-[2] http://ltp.sourceforge.net/coverage/lcov.php
-
-
-2. Preparation
-==============
-
-Configure the kernel with:
-
-        CONFIG_DEBUG_FS=y
-        CONFIG_GCOV_KERNEL=y
-
-select the gcc's gcov format, default is autodetect based on gcc version:
-
-        CONFIG_GCOV_FORMAT_AUTODETECT=y
-
-and to get coverage data for the entire kernel:
-
-        CONFIG_GCOV_PROFILE_ALL=y
-
-Note that kernels compiled with profiling flags will be significantly
-larger and run slower. Also CONFIG_GCOV_PROFILE_ALL may not be supported
-on all architectures.
-
-Profiling data will only become accessible once debugfs has been
-mounted:
-
-        mount -t debugfs none /sys/kernel/debug
-
-
-3. Customization
-================
-
-To enable profiling for specific files or directories, add a line
-similar to the following to the respective kernel Makefile:
-
-        For a single file (e.g. main.o):
-                GCOV_PROFILE_main.o := y
-
-        For all files in one directory:
-                GCOV_PROFILE := y
-
-To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
-is specified, use:
-
-                GCOV_PROFILE_main.o := n
-        and:
-                GCOV_PROFILE := n
-
-Only files which are linked to the main kernel image or are compiled as
-kernel modules are supported by this mechanism.
-
-
-4. Files
-========
-
-The gcov kernel support creates the following files in debugfs:
-
-        /sys/kernel/debug/gcov
-                Parent directory for all gcov-related files.
-
-        /sys/kernel/debug/gcov/reset
-                Global reset file: resets all coverage data to zero when
-                written to.
-
-        /sys/kernel/debug/gcov/path/to/compile/dir/file.gcda
-                The actual gcov data file as understood by the gcov
-                tool. Resets file coverage data to zero when written to.
-
-        /sys/kernel/debug/gcov/path/to/compile/dir/file.gcno
-                Symbolic link to a static data file required by the gcov
-                tool. This file is generated by gcc when compiling with
-                option -ftest-coverage.
-
-
-5. Modules
-==========
-
-Kernel modules may contain cleanup code which is only run during
-module unload time. The gcov mechanism provides a means to collect
-coverage data for such code by keeping a copy of the data associated
-with the unloaded module. This data remains available through debugfs.
-Once the module is loaded again, the associated coverage counters are
-initialized with the data from its previous instantiation.
-
-This behavior can be deactivated by specifying the gcov_persist kernel
-parameter:
-
-        gcov_persist=0
-
-At run-time, a user can also choose to discard data for an unloaded
-module by writing to its data file or the global reset file.
-
-
-6. Separated build and test machines
-====================================
-
-The gcov kernel profiling infrastructure is designed to work out-of-the
-box for setups where kernels are built and run on the same machine. In
-cases where the kernel runs on a separate machine, special preparations
-must be made, depending on where the gcov tool is used:
-
-a) gcov is run on the TEST machine
-
-The gcov tool version on the test machine must be compatible with the
-gcc version used for kernel build. Also the following files need to be
-copied from build to test machine:
-
-from the source tree:
-  - all C source files + headers
-
-from the build tree:
-  - all C source files + headers
-  - all .gcda and .gcno files
-  - all links to directories
-
-It is important to note that these files need to be placed into the
-exact same file system location on the test machine as on the build
-machine. If any of the path components is symbolic link, the actual
-directory needs to be used instead (due to make's CURDIR handling).
-
-b) gcov is run on the BUILD machine
-
-The following files need to be copied after each test case from test
-to build machine:
-
-from the gcov directory in sysfs:
-  - all .gcda files
-  - all links to .gcno files
-
-These files can be copied to any location on the build machine. gcov
-must then be called with the -o option pointing to that directory.
-
-Example directory setup on the build machine:
-
-  /tmp/linux:    kernel source tree
-  /tmp/out:      kernel build directory as specified by make O=
-  /tmp/coverage: location of the files copied from the test machine
-
-  [user@build] cd /tmp/out
-  [user@build] gcov -o /tmp/coverage/tmp/out/init main.c
-
-
-7. Troubleshooting
-==================
-
-Problem:  Compilation aborts during linker step.
-Cause:    Profiling flags are specified for source files which are not
-          linked to the main kernel or which are linked by a custom
-          linker procedure.
-Solution: Exclude affected source files from profiling by specifying
-          GCOV_PROFILE := n or GCOV_PROFILE_basename.o := n in the
-          corresponding Makefile.
-
-Problem:  Files copied from sysfs appear empty or incomplete.
-Cause:    Due to the way seq_file works, some tools such as cp or tar
-          may not correctly copy files from sysfs.
-Solution: Use 'cat' to read .gcda files and 'cp -d' to copy links.
-          Alternatively use the mechanism shown in Appendix B.
-
-
-Appendix A: gather_on_build.sh
-==============================
-
-Sample script to gather coverage meta files on the build machine
-(see 6a):
-#!/bin/bash
-
-KSRC=$1
-KOBJ=$2
-DEST=$3
-
-if [ -z "$KSRC" ] || [ -z "$KOBJ" ] || [ -z "$DEST" ]; then
-  echo "Usage: $0 <ksrc directory> <kobj directory> <output.tar.gz>" >&2
-  exit 1
-fi
-
-KSRC=$(cd $KSRC; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
-KOBJ=$(cd $KOBJ; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
-
-find $KSRC $KOBJ \( -name '*.gcno' -o -name '*.[ch]' -o -type l \) -a \
-                 -perm /u+r,g+r | tar cfz $DEST -P -T -
-
-if [ $? -eq 0 ] ; then
-  echo "$DEST successfully created, copy to test system and unpack with:"
-  echo "  tar xfz $DEST -P"
-else
-  echo "Could not create file $DEST"
-fi
-
-
-Appendix B: gather_on_test.sh
-=============================
-
-Sample script to gather coverage data files on the test machine
-(see 6b):
-
-#!/bin/bash -e
-
-DEST=$1
-GCDA=/sys/kernel/debug/gcov
-
-if [ -z "$DEST" ] ; then
-  echo "Usage: $0 <output.tar.gz>" >&2
-  exit 1
-fi
-
-TEMPDIR=$(mktemp -d)
-echo Collecting data..
-find $GCDA -type d -exec mkdir -p $TEMPDIR/\{\} \;
-find $GCDA -name '*.gcda' -exec sh -c 'cat < $0 > '$TEMPDIR'/$0' {} \;
-find $GCDA -name '*.gcno' -exec sh -c 'cp -d $0 '$TEMPDIR'/$0' {} \;
-tar czf $DEST -C $TEMPDIR sys
-rm -rf $TEMPDIR
-
-echo "$DEST successfully created, copy to build system and unpack with:"
-echo "  tar xfz $DEST"
diff --git a/Documentation/gpu/conf.py b/Documentation/gpu/conf.py
new file mode 100644
index 0000000..6314d17
--- /dev/null
+++ b/Documentation/gpu/conf.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = "Linux GPU Driver Developer's Guide"
+
+tags.add("subproject")
diff --git a/Documentation/gpu/index.rst b/Documentation/gpu/index.rst
index fcac0fa..5ff3d2b 100644
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@@ -12,3 +12,10 @@
    drm-uapi
    i915
    vga-switcheroo
+
+.. only::  subproject
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/hsi.txt b/Documentation/hsi.txt
deleted file mode 100644
index 6ac6cd5..0000000
--- a/Documentation/hsi.txt
+++ /dev/null
@@ -1,75 +0,0 @@
-HSI - High-speed Synchronous Serial Interface
-
-1. Introduction
-~~~~~~~~~~~~~~~
-
-High Speed Syncronous Interface (HSI) is a fullduplex, low latency protocol,
-that is optimized for die-level interconnect between an Application Processor
-and a Baseband chipset. It has been specified by the MIPI alliance in 2003 and
-implemented by multiple vendors since then.
-
-The HSI interface supports full duplex communication over multiple channels
-(typically 8) and is capable of reaching speeds up to 200 Mbit/s.
-
-The serial protocol uses two signals, DATA and FLAG as combined data and clock
-signals and an additional READY signal for flow control. An additional WAKE
-signal can be used to wakeup the chips from standby modes. The signals are
-commonly prefixed by AC for signals going from the application die to the
-cellular die and CA for signals going the other way around.
-
-+------------+                                 +---------------+
-|  Cellular  |                                 |  Application  |
-|    Die     |                                 |      Die      |
-|            | - - - - - - CAWAKE - - - - - - >|               |
-|           T|------------ CADATA ------------>|R              |
-|           X|------------ CAFLAG ------------>|X              |
-|            |<----------- ACREADY ------------|               |
-|            |                                 |               |
-|            |                                 |               |
-|            |< - - - - -  ACWAKE - - - - - - -|               |
-|           R|<----------- ACDATA -------------|T              |
-|           X|<----------- ACFLAG -------------|X              |
-|            |------------ CAREADY ----------->|               |
-|            |                                 |               |
-|            |                                 |               |
-+------------+                                 +---------------+
-
-2. HSI Subsystem in Linux
-~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In the Linux kernel the hsi subsystem is supposed to be used for HSI devices.
-The hsi subsystem contains drivers for hsi controllers including support for
-multi-port controllers and provides a generic API for using the HSI ports.
-
-It also contains HSI client drivers, which make use of the generic API to
-implement a protocol used on the HSI interface. These client drivers can
-use an arbitrary number of channels.
-
-3. hsi-char Device
-~~~~~~~~~~~~~~~~~~
-
-Each port automatically registers a generic client driver called hsi_char,
-which provides a charecter device for userspace representing the HSI port.
-It can be used to communicate via HSI from userspace. Userspace may
-configure the hsi_char device using the following ioctl commands:
-
-* HSC_RESET:
- - flush the HSI port
-
-* HSC_SET_PM
- - enable or disable the client.
-
-* HSC_SEND_BREAK
- - send break
-
-* HSC_SET_RX
- - set RX configuration
-
-* HSC_GET_RX
- - get RX configuration
-
-* HSC_SET_TX
- - set TX configuration
-
-* HSC_GET_TX
- - get TX configuration
diff --git a/Documentation/hwmon/adt7470 b/Documentation/hwmon/adt7470
index 8ce4aa0..fe68e18 100644
--- a/Documentation/hwmon/adt7470
+++ b/Documentation/hwmon/adt7470
@@ -65,6 +65,23 @@
 temperature sensor associated with the PWM control exceeds
 pwm#_auto_point2_temp.
 
+The driver also allows control of the PWM frequency:
+
+* pwm1_freq
+
+The PWM frequency is rounded to the nearest one of:
+
+* 11.0 Hz
+* 14.7 Hz
+* 22.1 Hz
+* 29.4 Hz
+* 35.3 Hz
+* 44.1 Hz
+* 58.8 Hz
+* 88.2 Hz
+* 1.4 kHz
+* 22.5 kHz
+
 Notes
 -----
 
diff --git a/Documentation/hwmon/ftsteutates b/Documentation/hwmon/ftsteutates
index 2a1bf69..8c10a91 100644
--- a/Documentation/hwmon/ftsteutates
+++ b/Documentation/hwmon/ftsteutates
@@ -19,5 +19,5 @@
 implemented in this driver.
 
 Specification of the chip can be found here:
-ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
-ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
+ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf
+ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf
diff --git a/Documentation/hwmon/hwmon-kernel-api.txt b/Documentation/hwmon/hwmon-kernel-api.txt
index 2ecdbfc..ef9d749 100644
--- a/Documentation/hwmon/hwmon-kernel-api.txt
+++ b/Documentation/hwmon/hwmon-kernel-api.txt
@@ -34,6 +34,19 @@
 				       const char *name, void *drvdata,
 				       const struct attribute_group **groups);
 
+struct device *
+hwmon_device_register_with_info(struct device *dev,
+				const char *name, void *drvdata,
+				const struct hwmon_chip_info *info,
+				const struct attribute_group **groups);
+
+struct device *
+devm_hwmon_device_register_with_info(struct device *dev,
+				     const char *name,
+				     void *drvdata,
+				     const struct hwmon_chip_info *info,
+				     const struct attribute_group **groups);
+
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
 
@@ -60,15 +73,229 @@
 hwmon_device_register_with_groups. However, it is device managed, meaning the
 hwmon device does not have to be removed explicitly by the removal function.
 
+hwmon_device_register_with_info is the most comprehensive and preferred means
+to register a hardware monitoring device. It creates the standard sysfs
+attributes in the hardware monitoring core, letting the driver focus on reading
+from and writing to the chip instead of having to bother with sysfs attributes.
+Its parameters are described in more detail below.
+
+devm_hwmon_device_register_with_info is similar to
+hwmon_device_register_with_info. However, it is device managed, meaning the
+hwmon device does not have to be removed explicitly by the removal function.
+
 hwmon_device_unregister deregisters a registered hardware monitoring device.
 The parameter of this function is the pointer to the registered hardware
 monitoring device structure. This function must be called from the driver
 remove function if the hardware monitoring device was registered with
-hwmon_device_register or with hwmon_device_register_with_groups.
+hwmon_device_register, hwmon_device_register_with_groups, or
+hwmon_device_register_with_info.
 
 devm_hwmon_device_unregister does not normally have to be called. It is only
 needed for error handling, and only needed if the driver probe fails after
-the call to devm_hwmon_device_register_with_groups.
+the call to devm_hwmon_device_register_with_groups and if the automatic
+(device managed) removal would be too late.
+
+Using devm_hwmon_device_register_with_info()
+--------------------------------------------
+
+hwmon_device_register_with_info() registers a hardware monitoring device.
+The parameters to this function are
+
+struct device *dev	Pointer to parent device
+const char *name	Device name
+void *drvdata		Driver private data
+const struct hwmon_chip_info *info
+			Pointer to chip description.
+const struct attribute_group **groups
+			Null-terminated list of additional sysfs attribute
+			groups.
+
+This function returns a pointer to the created hardware monitoring device
+on success and a negative error code for failure.
+
+The hwmon_chip_info structure looks as follows.
+
+struct hwmon_chip_info {
+	const struct hwmon_ops *ops;
+	const struct hwmon_channel_info **info;
+};
+
+It contains the following fields:
+
+* ops:	Pointer to device operations.
+* info: NULL-terminated list of device channel descriptors.
+
+The list of hwmon operations is defined as:
+
+struct hwmon_ops {
+	umode_t (*is_visible)(const void *, enum hwmon_sensor_types type,
+			      u32 attr, int);
+	int (*read)(struct device *, enum hwmon_sensor_types type,
+		    u32 attr, int, long *);
+	int (*write)(struct device *, enum hwmon_sensor_types type,
+		     u32 attr, int, long);
+};
+
+It defines the following operations.
+
+* is_visible: Pointer to a function to return the file mode for each supported
+  attribute. This function is mandatory.
+
+* read: Pointer to a function for reading a value from the chip. This function
+  is optional, but must be provided if any readable attributes exist.
+
+* write: Pointer to a function for writing a value to the chip. This function is
+  optional, but must be provided if any writeable attributes exist.
+
+Each sensor channel is described with struct hwmon_channel_info, which is
+defined as follows.
+
+struct hwmon_channel_info {
+	enum hwmon_sensor_types type;
+	u32 *config;
+};
+
+It contains following fields:
+
+* type: The hardware monitoring sensor type.
+  Supported sensor types are
+  * hwmon_chip		A virtual sensor type, used to describe attributes
+			which apply to the entire chip.
+  * hwmon_temp		Temperature sensor
+  * hwmon_in		Voltage sensor
+  * hwmon_curr		Current sensor
+  * hwmon_power		Power sensor
+  * hwmon_energy	Energy sensor
+  * hwmon_humidity	Humidity sensor
+  * hwmon_fan		Fan speed sensor
+  * hwmon_pwm		PWM control
+
+* config: Pointer to a 0-terminated list of configuration values for each
+  sensor of the given type. Each value is a combination of bit values
+  describing the attributes supposed by a single sensor.
+
+As an example, here is the complete description file for a LM75 compatible
+sensor chip. The chip has a single temperature sensor. The driver wants to
+register with the thermal subsystem (HWMON_C_REGISTER_TZ), and it supports
+the update_interval attribute (HWMON_C_UPDATE_INTERVAL). The chip supports
+reading the temperature (HWMON_T_INPUT), it has a maximum temperature
+register (HWMON_T_MAX) as well as a maximum temperature hysteresis register
+(HWMON_T_MAX_HYST).
+
+static const u32 lm75_chip_config[] = {
+	HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL,
+	0
+};
+
+static const struct hwmon_channel_info lm75_chip = {
+	.type = hwmon_chip,
+	.config = lm75_chip_config,
+};
+
+static const u32 lm75_temp_config[] = {
+	HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MAX_HYST,
+	0
+};
+
+static const struct hwmon_channel_info lm75_temp = {
+	.type = hwmon_temp,
+	.config = lm75_temp_config,
+};
+
+static const struct hwmon_channel_info *lm75_info[] = {
+	&lm75_chip,
+	&lm75_temp,
+	NULL
+};
+
+static const struct hwmon_ops lm75_hwmon_ops = {
+	.is_visible = lm75_is_visible,
+	.read = lm75_read,
+	.write = lm75_write,
+};
+
+static const struct hwmon_chip_info lm75_chip_info = {
+	.ops = &lm75_hwmon_ops,
+	.info = lm75_info,
+};
+
+A complete list of bit values indicating individual attribute support
+is defined in include/linux/hwmon.h. Definition prefixes are as follows.
+
+HWMON_C_xxxx	Chip attributes, for use with hwmon_chip.
+HWMON_T_xxxx	Temperature attributes, for use with hwmon_temp.
+HWMON_I_xxxx	Voltage attributes, for use with hwmon_in.
+HWMON_C_xxxx	Current attributes, for use with hwmon_curr.
+		Notice the prefix overlap with chip attributes.
+HWMON_P_xxxx	Power attributes, for use with hwmon_power.
+HWMON_E_xxxx	Energy attributes, for use with hwmon_energy.
+HWMON_H_xxxx	Humidity attributes, for use with hwmon_humidity.
+HWMON_F_xxxx	Fan speed attributes, for use with hwmon_fan.
+HWMON_PWM_xxxx	PWM control attributes, for use with hwmon_pwm.
+
+Driver callback functions
+-------------------------
+
+Each driver provides is_visible, read, and write functions. Parameters
+and return values for those functions are as follows.
+
+umode_t is_visible_func(const void *data, enum hwmon_sensor_types type,
+			u32 attr, int channel)
+
+Parameters:
+	data:	Pointer to device private data structure.
+	type:	The sensor type.
+	attr:	Attribute identifier associated with a specific attribute.
+		For example, the attribute value for HWMON_T_INPUT would be
+		hwmon_temp_input. For complete mappings of bit fields to
+		attribute values please see include/linux/hwmon.h.
+	channel:The sensor channel number.
+
+Return value:
+	The file mode for this attribute. Typically, this will be 0 (the
+	attribute will not be created), S_IRUGO, or 'S_IRUGO | S_IWUSR'.
+
+int read_func(struct device *dev, enum hwmon_sensor_types type,
+	      u32 attr, int channel, long *val)
+
+Parameters:
+	dev:	Pointer to the hardware monitoring device.
+	type:	The sensor type.
+	attr:	Attribute identifier associated with a specific attribute.
+		For example, the attribute value for HWMON_T_INPUT would be
+		hwmon_temp_input. For complete mappings please see
+		include/linux/hwmon.h.
+	channel:The sensor channel number.
+	val:	Pointer to attribute value.
+
+Return value:
+	0 on success, a negative error number otherwise.
+
+int write_func(struct device *dev, enum hwmon_sensor_types type,
+	       u32 attr, int channel, long val)
+
+Parameters:
+	dev:	Pointer to the hardware monitoring device.
+	type:	The sensor type.
+	attr:	Attribute identifier associated with a specific attribute.
+		For example, the attribute value for HWMON_T_INPUT would be
+		hwmon_temp_input. For complete mappings please see
+		include/linux/hwmon.h.
+	channel:The sensor channel number.
+	val:	The value to write to the chip.
+
+Return value:
+	0 on success, a negative error number otherwise.
+
+
+Driver-provided sysfs attributes
+--------------------------------
+
+If the hardware monitoring device is registered with
+hwmon_device_register_with_info or devm_hwmon_device_register_with_info,
+it is most likely not necessary to provide sysfs attributes. Only non-standard
+sysfs attributes need to be provided when one of those registration functions
+is used.
 
 The header file linux/hwmon-sysfs.h provides a number of useful macros to
 declare and use hardware monitoring sysfs attributes.
diff --git a/Documentation/hwmon/max6650 b/Documentation/hwmon/max6650
index 58d9644..dff1d29 100644
--- a/Documentation/hwmon/max6650
+++ b/Documentation/hwmon/max6650
@@ -34,6 +34,7 @@
 fan4_input	ro	"
 fan1_target	rw	desired fan speed in RPM (closed loop mode only)
 pwm1_enable	rw	regulator mode, 0=full on, 1=open loop, 2=closed loop
+			3=off
 pwm1		rw	relative speed (0-255), 255=max. speed.
 			Used in open loop mode only.
 fan1_div	rw	sets the speed range the inputs can handle. Legal
diff --git a/Documentation/hwmon/ucd9000 b/Documentation/hwmon/ucd9000
index 805e33e..262e713 100644
--- a/Documentation/hwmon/ucd9000
+++ b/Documentation/hwmon/ucd9000
@@ -2,12 +2,13 @@
 =====================
 
 Supported chips:
-  * TI UCD90120, UCD90124, UCD9090, and UCD90910
-    Prefixes: 'ucd90120', 'ucd90124', 'ucd9090', 'ucd90910'
+  * TI UCD90120, UCD90124, UCD90160, UCD9090, and UCD90910
+    Prefixes: 'ucd90120', 'ucd90124', 'ucd90160', 'ucd9090', 'ucd90910'
     Addresses scanned: -
     Datasheets:
 	http://focus.ti.com/lit/ds/symlink/ucd90120.pdf
 	http://focus.ti.com/lit/ds/symlink/ucd90124.pdf
+	http://focus.ti.com/lit/ds/symlink/ucd90160.pdf
 	http://focus.ti.com/lit/ds/symlink/ucd9090.pdf
 	http://focus.ti.com/lit/ds/symlink/ucd90910.pdf
 
@@ -32,6 +33,13 @@
 functionality. Using these pins, the UCD90124 offers support for fan control,
 margining, and general-purpose PWM functions.
 
+The UCD90160 is a 16-rail PMBus/I2C addressable power-supply sequencer and
+monitor. The device integrates a 12-bit ADC for monitoring up to 16 power-supply
+voltage inputs. Twenty-six GPIO pins can be used for power supply enables,
+power-on reset signals, external interrupts, cascading, or other system
+functions. Twelve of these pins offer PWM functionality. Using these pins, the
+UCD90160 offers support for margining, and general-purpose PWM functions.
+
 The UCD9090 is a 10-rail PMBus/I2C addressable power-supply sequencer and
 monitor. The device integrates a 12-bit ADC for monitoring up to 10 power-supply
 voltage inputs. Twenty-three GPIO pins can be used for power supply enables,
diff --git a/Documentation/hwmon/xgene-hwmon b/Documentation/hwmon/xgene-hwmon
new file mode 100644
index 0000000..6ec50ed
--- /dev/null
+++ b/Documentation/hwmon/xgene-hwmon
@@ -0,0 +1,30 @@
+Kernel driver xgene-hwmon
+========================
+
+Supported chips:
+ * APM X-Gene SoC
+
+Description
+-----------
+
+This driver adds hardware temperature and power reading support for
+APM X-Gene SoC using the mailbox communication interface.
+For device tree, it is the standard DT mailbox.
+For ACPI, it is the PCC mailbox.
+
+The following sensors are supported
+
+  * Temperature
+    - SoC on-die temperature in milli-degree C
+    - Alarm when high/over temperature occurs
+  * Power
+    - CPU power in uW
+    - IO power in uW
+
+sysfs-Interface
+---------------
+
+temp0_input - SoC on-die temperature (milli-degree C)
+temp0_critical_alarm - An 1 would indicates on-die temperature exceeded threshold
+power0_input - CPU power in (uW)
+power1_input - IO power in (uW)
diff --git a/Documentation/i2c/slave-interface b/Documentation/i2c/slave-interface
index 80807ad..7e2a228 100644
--- a/Documentation/i2c/slave-interface
+++ b/Documentation/i2c/slave-interface
@@ -145,6 +145,11 @@
 
 * Catch the slave interrupts and send appropriate i2c_slave_events to the backend.
 
+Note that most hardware supports being master _and_ slave on the same bus. So,
+if you extend a bus driver, please make sure that the driver supports that as
+well. In almost all cases, slave support does not need to disable the master
+functionality.
+
 Check the i2c-rcar driver as an example.
 
 
diff --git a/Documentation/iio/iio_configfs.txt b/Documentation/iio/iio_configfs.txt
index f0add35..4e5f101 100644
--- a/Documentation/iio/iio_configfs.txt
+++ b/Documentation/iio/iio_configfs.txt
@@ -82,8 +82,8 @@
 
 e.g:
 
-$ mkdir /config/triggers/hrtimer/instance1
-$ rmdir /config/triggers/hrtimer/instance1
+$ mkdir /config/iio/triggers/hrtimer/instance1
+$ rmdir /config/iio/triggers/hrtimer/instance1
 
 Each trigger can have one or more attributes specific to the trigger type.
 
diff --git a/Documentation/index.rst b/Documentation/index.rst
index e0fc729..d9ccb94 100644
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@@ -6,22 +6,19 @@
 Welcome to The Linux Kernel's documentation!
 ============================================
 
-Nothing for you to see here *yet*. Please move along.
-
 Contents:
 
 .. toctree::
    :maxdepth: 2
 
    kernel-documentation
-   media/media_uapi
-   media/media_kapi
-   media/dvb-drivers/index
-   media/v4l-drivers/index
+   development-process/index
+   dev-tools/tools
+   driver-api/index
+   media/index
    gpu/index
 
 Indices and tables
 ==================
 
 * :ref:`genindex`
-* :ref:`search`
diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt
index 45bcafe..77570d1 100644
--- a/Documentation/infiniband/sysfs.txt
+++ b/Documentation/infiniband/sysfs.txt
@@ -89,6 +89,36 @@
    nctxts - number of allowed contexts (PSM2)
    chip_reset - diagnostic (root only)
    boardversion - board version
+
+   sdma<N>/ - one directory per sdma engine (0 - 15)
+	sdma<N>/cpu_list - read-write, list of cpus for user-process to sdma
+			   engine assignment.
+	sdma<N>/vl - read-only, vl the sdma engine maps to.
+
+	The new interface will give the user control on the affinity settings
+	for the hfi1 device.
+	As an example, to set an sdma engine irq affinity and thread affinity
+	of a user processes to use the sdma engine, which is "near" in terms
+	of NUMA configuration, or physical cpu location, the user will do:
+
+	echo "3" > /proc/irq/<N>/smp_affinity_list
+	echo "4-7" > /sys/devices/.../sdma3/cpu_list
+	cat /sys/devices/.../sdma3/vl
+	0
+	echo "8" > /proc/irq/<M>/smp_affinity_list
+	echo "9-12" > /sys/devices/.../sdma4/cpu_list
+	cat /sys/devices/.../sdma4/vl
+	1
+
+	to make sure that when a process runs on cpus 4,5,6, or 7,
+	and uses vl=0, then sdma engine 3 is selected by the driver,
+	and also the interrupt of the sdma engine 3 is steered to cpu 3.
+	Similarly, when a process runs on cpus 9,10,11, or 12 and sets vl=1,
+	then engine 4 will be selected and the irq of the sdma engine 4 is
+	steered to cpu 8.
+	This assumes that in the above N is the irq number of "sdma3",
+	and M is irq number of "sdma4" in the /proc/interrupts file.
+
    ports/1/
           CCMgtA/
                cc_settings_bin - CCA tables used by PSM2
diff --git a/Documentation/ioctl/botching-up-ioctls.txt b/Documentation/ioctl/botching-up-ioctls.txt
index cc30b14..36138c6 100644
--- a/Documentation/ioctl/botching-up-ioctls.txt
+++ b/Documentation/ioctl/botching-up-ioctls.txt
@@ -34,15 +34,18 @@
    64-bit platforms do. So we always need padding to the natural size to get
    this right.
 
- * Pad the entire struct to a multiple of 64-bits - the structure size will
-   otherwise differ on 32-bit versus 64-bit. Having a different structure size
-   hurts when passing arrays of structures to the kernel, or if the kernel
-   checks the structure size, which e.g. the drm core does.
+ * Pad the entire struct to a multiple of 64-bits if the structure contains
+   64-bit types - the structure size will otherwise differ on 32-bit versus
+   64-bit. Having a different structure size hurts when passing arrays of
+   structures to the kernel, or if the kernel checks the structure size, which
+   e.g. the drm core does.
 
  * Pointers are __u64, cast from/to a uintprt_t on the userspace side and
    from/to a void __user * in the kernel. Try really hard not to delay this
    conversion or worse, fiddle the raw __u64 through your code since that
-   diminishes the checking tools like sparse can provide.
+   diminishes the checking tools like sparse can provide. The macro
+   u64_to_user_ptr can be used in the kernel to avoid warnings about integers
+   and pointres of different sizes.
 
 
 Basics
diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt
deleted file mode 100644
index 7dd95b3..0000000
--- a/Documentation/kasan.txt
+++ /dev/null
@@ -1,171 +0,0 @@
-KernelAddressSanitizer (KASAN)
-==============================
-
-0. Overview
-===========
-
-KernelAddressSANitizer (KASAN) is a dynamic memory error detector. It provides
-a fast and comprehensive solution for finding use-after-free and out-of-bounds
-bugs.
-
-KASAN uses compile-time instrumentation for checking every memory access,
-therefore you will need a GCC version 4.9.2 or later. GCC 5.0 or later is
-required for detection of out-of-bounds accesses to stack or global variables.
-
-Currently KASAN is supported only for x86_64 architecture.
-
-1. Usage
-========
-
-To enable KASAN configure kernel with:
-
-	  CONFIG_KASAN = y
-
-and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline and
-inline are compiler instrumentation types. The former produces smaller binary
-the latter is 1.1 - 2 times faster. Inline instrumentation requires a GCC
-version 5.0 or later.
-
-KASAN works with both SLUB and SLAB memory allocators.
-For better bug detection and nicer reporting, enable CONFIG_STACKTRACE.
-
-To disable instrumentation for specific files or directories, add a line
-similar to the following to the respective kernel Makefile:
-
-        For a single file (e.g. main.o):
-                KASAN_SANITIZE_main.o := n
-
-        For all files in one directory:
-                KASAN_SANITIZE := n
-
-1.1 Error reports
-=================
-
-A typical out of bounds access report looks like this:
-
-==================================================================
-BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3
-Write of size 1 by task modprobe/1689
-=============================================================================
-BUG kmalloc-128 (Not tainted): kasan error
------------------------------------------------------------------------------
-
-Disabling lock debugging due to kernel taint
-INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689
- __slab_alloc+0x4b4/0x4f0
- kmem_cache_alloc_trace+0x10b/0x190
- kmalloc_oob_right+0x3d/0x75 [test_kasan]
- init_module+0x9/0x47 [test_kasan]
- do_one_initcall+0x99/0x200
- load_module+0x2cb3/0x3b20
- SyS_finit_module+0x76/0x80
- system_call_fastpath+0x12/0x17
-INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080
-INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720
-
-Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a  ........ZZZZZZZZ
-Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
-Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
-Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
-Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
-Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
-Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
-Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b  kkkkkkkkkkkkkkkk
-Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5  kkkkkkkkkkkkkkk.
-Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc                          ........
-Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a                          ZZZZZZZZ
-CPU: 0 PID: 1689 Comm: modprobe Tainted: G    B          3.18.0-rc1-mm1+ #98
-Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
- ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78
- ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8
- ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558
-Call Trace:
- [<ffffffff81cc68ae>] dump_stack+0x46/0x58
- [<ffffffff811fd848>] print_trailer+0xf8/0x160
- [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
- [<ffffffff811ff0f5>] object_err+0x35/0x40
- [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
- [<ffffffff8120b9fa>] kasan_report_error+0x38a/0x3f0
- [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
- [<ffffffff8120b344>] ? kasan_unpoison_shadow+0x14/0x40
- [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
- [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
- [<ffffffff8120a995>] __asan_store1+0x75/0xb0
- [<ffffffffa0002601>] ? kmem_cache_oob+0x1d/0xc3 [test_kasan]
- [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
- [<ffffffffa0002065>] kmalloc_oob_right+0x65/0x75 [test_kasan]
- [<ffffffffa00026b0>] init_module+0x9/0x47 [test_kasan]
- [<ffffffff810002d9>] do_one_initcall+0x99/0x200
- [<ffffffff811e4e5c>] ? __vunmap+0xec/0x160
- [<ffffffff81114f63>] load_module+0x2cb3/0x3b20
- [<ffffffff8110fd70>] ? m_show+0x240/0x240
- [<ffffffff81115f06>] SyS_finit_module+0x76/0x80
- [<ffffffff81cd3129>] system_call_fastpath+0x12/0x17
-Memory state around the buggy address:
- ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc
- ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00
->ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc
-                                                 ^
- ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
- ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb
- ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
- ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
-==================================================================
-
-The header of the report discribe what kind of bug happened and what kind of
-access caused it. It's followed by the description of the accessed slub object
-(see 'SLUB Debug output' section in Documentation/vm/slub.txt for details) and
-the description of the accessed memory page.
-
-In the last section the report shows memory state around the accessed address.
-Reading this part requires some understanding of how KASAN works.
-
-The state of each 8 aligned bytes of memory is encoded in one shadow byte.
-Those 8 bytes can be accessible, partially accessible, freed or be a redzone.
-We use the following encoding for each shadow byte: 0 means that all 8 bytes
-of the corresponding memory region are accessible; number N (1 <= N <= 7) means
-that the first N bytes are accessible, and other (8 - N) bytes are not;
-any negative value indicates that the entire 8-byte word is inaccessible.
-We use different negative values to distinguish between different kinds of
-inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h).
-
-In the report above the arrows point to the shadow byte 03, which means that
-the accessed address is partially accessible.
-
-
-2. Implementation details
-=========================
-
-From a high level, our approach to memory error detection is similar to that
-of kmemcheck: use shadow memory to record whether each byte of memory is safe
-to access, and use compile-time instrumentation to check shadow memory on each
-memory access.
-
-AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory
-(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and
-offset to translate a memory address to its corresponding shadow address.
-
-Here is the function which translates an address to its corresponding shadow
-address:
-
-static inline void *kasan_mem_to_shadow(const void *addr)
-{
-	return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
-		+ KASAN_SHADOW_OFFSET;
-}
-
-where KASAN_SHADOW_SCALE_SHIFT = 3.
-
-Compile-time instrumentation used for checking memory accesses. Compiler inserts
-function calls (__asan_load*(addr), __asan_store*(addr)) before each memory
-access of size 1, 2, 4, 8 or 16. These functions check whether memory access is
-valid or not by checking corresponding shadow memory.
-
-GCC 5.0 has possibility to perform inline instrumentation. Instead of making
-function calls GCC directly inserts the code to check the shadow memory.
-This option significantly enlarges kernel but it gives x1.1-x2 performance
-boost over outline instrumented kernel.
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index db10185..069fcb3 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -274,7 +274,44 @@
 
 This is similar to the simple config entry above, but it also gives a
 hint to front ends, that all suboptions should be displayed as a
-separate list of options.
+separate list of options. To make sure all the suboptions will really
+show up under the menuconfig entry and not outside of it, every item
+from the <config options> list must depend on the menuconfig symbol.
+In practice, this is achieved by using one of the next two constructs:
+
+(1):
+menuconfig M
+if M
+    config C1
+    config C2
+endif
+
+(2):
+menuconfig M
+config C1
+    depends on M
+config C2
+    depends on M
+
+In the following examples (3) and (4), C1 and C2 still have the M
+dependency, but will not appear under menuconfig M anymore, because
+of C0, which doesn't depend on M:
+
+(3):
+menuconfig M
+    config C0
+if M
+    config C1
+    config C2
+endif
+
+(4):
+menuconfig M
+config C0
+config C1
+    depends on M
+config C2
+    depends on M
 
 choices:
 
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 88ff63d..b0eb27b 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -393,6 +393,15 @@
 * We generally don' have to bring up a SMP kernel just to capture the
   dump. Hence generally it is useful either to build a UP dump-capture
   kernel or specify maxcpus=1 option while loading dump-capture kernel.
+  Note, though maxcpus always works, you had better replace it with
+  nr_cpus to save memory if supported by the current ARCH, such as x86.
+
+* You should enable multi-cpu support in dump-capture kernel if you intend
+  to use multi-thread programs with it, such as parallel dump feature of
+  makedumpfile. Otherwise, the multi-thread program may have a great
+  performance degradation. To enable multi-cpu support, you should bring up an
+  SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X]
+  options while loading it.
 
 * For s390x there are two kdump modes: If a ELF header is specified with
   the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt
index 1dafc52..05a7857 100644
--- a/Documentation/kernel-docs.txt
+++ b/Documentation/kernel-docs.txt
@@ -1,731 +1,652 @@
+.. _kernel_docs:
 
-    Index of Documentation for People Interested in Writing and/or
-
-                   Understanding the Linux Kernel.
+Index of Documentation for People Interested in Writing and/or Understanding the Linux Kernel
+=============================================================================================
 
           Juan-Mariano de Goyeneche <jmseyas@dit.upm.es>
 
-/*
- * The latest version of this document may be found at:
- *   http://www.dit.upm.es/~jmseyas/linux/kernel/hackers-docs.html
- */
+The need for a document like this one became apparent in the
+linux-kernel mailing list as the same questions, asking for pointers
+to information, appeared again and again.
 
-   The need for a document like this one became apparent in the
-   linux-kernel mailing list as the same questions, asking for pointers
-   to information, appeared again and again.
-   
-   Fortunately, as more and more people get to GNU/Linux, more and more
-   get interested in the Kernel. But reading the sources is not always
-   enough. It is easy to understand the code, but miss the concepts, the
-   philosophy and design decisions behind this code.
-   
-   Unfortunately, not many documents are available for beginners to
-   start. And, even if they exist, there was no "well-known" place which
-   kept track of them. These lines try to cover this lack. All documents
-   available on line known by the author are listed, while some reference
-   books are also mentioned.
-   
-   PLEASE, if you know any paper not listed here or write a new document,
-   send me an e-mail, and I'll include a reference to it here. Any
-   corrections, ideas or comments are also welcomed.
-   
-   The papers that follow are listed in no particular order. All are
-   cataloged with the following fields: the document's "Title", the
-   "Author"/s, the "URL" where they can be found, some "Keywords" helpful
-   when searching for specific topics, and a brief "Description" of the
-   Document.
-   
-   Enjoy!
-   
-     ON-LINE DOCS:
-       
-     * Title: "Linux Device Drivers, Third Edition"
-       Author: Jonathan Corbet, Alessandro Rubini, Greg Kroah-Hartman
-       URL: http://lwn.net/Kernel/LDD3/
-       Description: A 600-page book covering the (2.6.10) driver
-       programming API and kernel hacking in general.  Available under the
-       Creative Commons Attribution-ShareAlike 2.0 license.
+Fortunately, as more and more people get to GNU/Linux, more and more
+get interested in the Kernel. But reading the sources is not always
+enough. It is easy to understand the code, but miss the concepts, the
+philosophy and design decisions behind this code.
 
-     * Title: "The Linux Kernel"
-       Author: David A. Rusling.
-       URL: http://www.tldp.org/LDP/tlk/tlk.html
-       Keywords: everything!, book.
-       Description: On line, 200 pages book describing most aspects of
-       the Linux Kernel. Probably, the first reference for beginners.
-       Lots of illustrations explaining data structures use and
-       relationships in the purest Richard W. Stevens' style. Contents:
-       "1.-Hardware Basics, 2.-Software Basics, 3.-Memory Management,
-       4.-Processes, 5.-Interprocess Communication Mechanisms, 6.-PCI,
-       7.-Interrupts and Interrupt Handling, 8.-Device Drivers, 9.-The
-       File system, 10.-Networks, 11.-Kernel Mechanisms, 12.-Modules,
-       13.-The Linux Kernel Sources, A.-Linux Data Structures, B.-The
-       Alpha AXP Processor, C.-Useful Web and FTP Sites, D.-The GNU
-       General Public License, Glossary". In short: a must have.
+Unfortunately, not many documents are available for beginners to
+start. And, even if they exist, there was no "well-known" place which
+kept track of them. These lines try to cover this lack. All documents
+available on line known by the author are listed, while some reference
+books are also mentioned.
 
-     * Title: "Linux Device Drivers, 2nd Edition"
-       Author: Alessandro Rubini and Jonathan Corbet.
-       URL: http://www.xml.com/ldd/chapter/book/index.html
-       Keywords: device drivers, modules, debugging, memory, hardware,
-       interrupt handling, char drivers, block drivers, kmod, mmap, DMA,
-       buses.
-       Description: O'Reilly's popular book, now also on-line under the
-       GNU Free Documentation License.
-       Notes: You can also buy it in paper-form from O'Reilly. See below
-       under BOOKS (Not on-line).
+PLEASE, if you know any paper not listed here or write a new document,
+send me an e-mail, and I'll include a reference to it here. Any
+corrections, ideas or comments are also welcomed.
 
-     * Title: "Conceptual Architecture of the Linux Kernel"
-       Author: Ivan T. Bowman.
-       URL: http://plg.uwaterloo.ca/
-       Keywords: conceptual software architecture, extracted design,
-       reverse engineering, system structure.
-       Description: Conceptual software architecture of the Linux kernel,
-       automatically extracted from the source code. Very detailed. Good
-       figures. Gives good overall kernel understanding.
+The papers that follow are listed in no particular order. All are
+cataloged with the following fields: the document's "Title", the
+"Author"/s, the "URL" where they can be found, some "Keywords" helpful
+when searching for specific topics, and a brief "Description" of the
+Document.
 
-     * Title: "Concrete Architecture of the Linux Kernel"
-       Author: Ivan T. Bowman, Saheem Siddiqi, and Meyer C. Tanuan.
-       URL: http://plg.uwaterloo.ca/
-       Keywords: concrete architecture, extracted design, reverse
-       engineering, system structure, dependencies.
-       Description: Concrete architecture of the Linux kernel,
-       automatically extracted from the source code. Very detailed. Good
-       figures. Gives good overall kernel understanding. This papers
-       focus on lower details than its predecessor (files, variables...).
+Enjoy!
 
-     * Title: "Linux as a Case Study: Its Extracted Software
-       Architecture"
-       Author: Ivan T. Bowman, Richard C. Holt and Neil V. Brewster.
-       URL: http://plg.uwaterloo.ca/
-       Keywords: software architecture, architecture recovery,
-       redocumentation.
-       Description: Paper appeared at ICSE'99, Los Angeles, May 16-22,
-       1999. A mixture of the previous two documents from the same
-       author.
+.. note::
 
-     * Title: "Overview of the Virtual File System"
-       Author: Richard Gooch.
-       URL: http://www.mjmwired.net/kernel/Documentation/filesystems/vfs.txt
-       Keywords: VFS, File System, mounting filesystems, opening files,
-       dentries, dcache.
-       Description: Brief introduction to the Linux Virtual File System.
-       What is it, how it works, operations taken when opening a file or
-       mounting a file system and description of important data
-       structures explaining the purpose of each of their entries.
+   The documents on each section of this document are ordered by its
+   published date, from the newest to the oldest.
 
-     * Title: "The Linux RAID-1, 4, 5 Code"
-       Author: Ingo Molnar, Gadi Oxman and Miguel de Icaza.
-       URL: http://www.linuxjournal.com/article.php?sid=2391
-       Keywords: RAID, MD driver.
-       Description: Linux Journal Kernel Korner article. Here is its
-       abstract: "A description of the implementation of the RAID-1,
-       RAID-4 and RAID-5 personalities of the MD device driver in the
-       Linux kernel, providing users with high performance and reliable,
-       secondary-storage capability using software".
+Docs at the Linux Kernel tree
+-----------------------------
 
-     * Title: "Dynamic Kernels: Modularized Device Drivers"
-       Author: Alessandro Rubini.
-       URL: http://www.linuxjournal.com/article.php?sid=1219
-       Keywords: device driver, module, loading/unloading modules,
-       allocating resources.
-       Description: Linux Journal Kernel Korner article. Here is its
-       abstract: "This is the first of a series of four articles
-       co-authored by Alessandro Rubini and Georg Zezchwitz which present
-       a practical approach to writing Linux device drivers as kernel
-       loadable modules. This installment presents an introduction to the
-       topic, preparing the reader to understand next month's
-       installment".
+The DocBook books should be built with ``make {htmldocs | psdocs | pdfdocs}``.
+The Sphinx books should be built with ``make {htmldocs | pdfdocs | epubdocs}``.
 
-     * Title: "Dynamic Kernels: Discovery"
-       Author: Alessandro Rubini.
-       URL: http://www.linuxjournal.com/article.php?sid=1220
-       Keywords: character driver, init_module, clean_up module,
-       autodetection, mayor number, minor number, file operations,
-       open(), close().
-       Description: Linux Journal Kernel Korner article. Here is its
-       abstract: "This article, the second of four, introduces part of
-       the actual code to create custom module implementing a character
-       device driver. It describes the code for module initialization and
-       cleanup, as well as the open() and close() system calls".
+    * Name: **linux/Documentation**
 
-     * Title: "The Devil's in the Details"
-       Author: Georg v. Zezschwitz and Alessandro Rubini.
-       URL: http://www.linuxjournal.com/article.php?sid=1221
-       Keywords: read(), write(), select(), ioctl(), blocking/non
-       blocking mode, interrupt handler.
-       Description: Linux Journal Kernel Korner article. Here is its
-       abstract: "This article, the third of four on writing character
-       device drivers, introduces concepts of reading, writing, and using
-       ioctl-calls".
+      :Author: Many.
+      :Location: Documentation/
+      :Keywords: text files, Sphinx, DocBook.
+      :Description: Documentation that comes with the kernel sources,
+        inside the Documentation directory. Some pages from this document
+        (including this document itself) have been moved there, and might
+        be more up to date than the web version.
 
-     * Title: "Dissecting Interrupts and Browsing DMA"
-       Author: Alessandro Rubini and Georg v. Zezschwitz.
-       URL: http://www.linuxjournal.com/article.php?sid=1222
-       Keywords: interrupts, irqs, DMA, bottom halves, task queues.
-       Description: Linux Journal Kernel Korner article. Here is its
-       abstract: "This is the fourth in a series of articles about
-       writing character device drivers as loadable kernel modules. This
-       month, we further investigate the field of interrupt handling.
-       Though it is conceptually simple, practical limitations and
-       constraints make this an ``interesting'' part of device driver
-       writing, and several different facilities have been provided for
-       different situations. We also investigate the complex topic of
-       DMA".
+    * Title: **The Kernel Hacking HOWTO**
 
-     * Title: "Device Drivers Concluded"
-       Author: Georg v. Zezschwitz.
-       URL: http://www.linuxjournal.com/article.php?sid=1287
-       Keywords: address spaces, pages, pagination, page management,
-       demand loading, swapping, memory protection, memory mapping, mmap,
-       virtual memory areas (VMAs), vremap, PCI.
-       Description: Finally, the above turned out into a five articles
-       series. This latest one's introduction reads: "This is the last of
-       five articles about character device drivers. In this final
-       section, Georg deals with memory mapping devices, beginning with
-       an overall description of the Linux memory management concepts".
+      :Author: Various Talented People, and Rusty.
+      :Location: Documentation/DocBook/kernel-hacking.tmpl
+      :Keywords: HOWTO, kernel contexts, deadlock, locking, modules,
+        symbols, return conventions.
+      :Description: From the Introduction: "Please understand that I
+        never wanted to write this document, being grossly underqualified,
+        but I always wanted to read it, and this was the only way. I
+        simply explain some best practices, and give reading entry-points
+        into the kernel sources. I avoid implementation details: that's
+        what the code is for, and I ignore whole tracts of useful
+        routines. This document assumes familiarity with C, and an
+        understanding of what the kernel is, and how it is used. It was
+        originally written for the 2.3 kernels, but nearly all of it
+        applies to 2.2 too; 2.0 is slightly different".
 
-     * Title: "Network Buffers And Memory Management"
-       Author: Alan Cox.
-       URL: http://www.linuxjournal.com/article.php?sid=1312
-       Keywords: sk_buffs, network devices, protocol/link layer
-       variables, network devices flags, transmit, receive,
-       configuration, multicast.
-       Description: Linux Journal Kernel Korner. Here is the abstract:
-       "Writing a network device driver for Linux is fundamentally
-       simple---most of the complexity (other than talking to the
-       hardware) involves managing network packets in memory".
-       
-     * Title: "Linux Kernel Hackers' Guide"
-       Author: Michael K. Johnson.
-       URL: http://www.tldp.org/LDP/khg/HyperNews/get/khg.html
-       Keywords: device drivers, files, VFS, kernel interface, character vs
-       block devices, hardware interrupts, scsi, DMA, access to user memory,
-       memory allocation, timers.
-       Description: A guide designed to help you get up to speed on the
-       concepts that are not intuitevly obvious, and to document the internal
-       structures of Linux.
-       
-     * Title: "The Venus kernel interface"
-       Author: Peter J. Braam.
-       URL:
-       http://www.coda.cs.cmu.edu/doc/html/kernel-venus-protocol.html
-       Keywords: coda, filesystem, venus, cache manager.
-       Description: "This document describes the communication between
-       Venus and kernel level file system code needed for the operation
-       of the Coda filesystem. This version document is meant to describe
-       the current interface (version 1.0) as well as improvements we
-       envisage".
+    * Title: **Linux Kernel Locking HOWTO**
 
-     * Title: "Programming PCI-Devices under Linux"
-       Author: Claus Schroeter.
-       URL:
-       ftp://ftp.llp.fu-berlin.de/pub/linux/LINUX-LAB/whitepapers/pcip.ps.gz
-       Keywords: PCI, device, busmastering.
-       Description: 6 pages tutorial on PCI programming under Linux.
-       Gives the basic concepts on the architecture of the PCI subsystem,
-       as long as basic functions and macros to read/write the devices
-       and perform busmastering.
+      :Author: Various Talented People, and Rusty.
+      :Location: Documentation/DocBook/kernel-locking.tmpl
+      :Keywords: locks, locking, spinlock, semaphore, atomic, race
+        condition, bottom halves, tasklets, softirqs.
+      :Description: The title says it all: document describing the
+        locking system in the Linux Kernel either in uniprocessor or SMP
+        systems.
+      :Notes: "It was originally written for the later (>2.3.47) 2.3
+        kernels, but most of it applies to 2.2 too; 2.0 is slightly
+        different". Freely redistributable under the conditions of the GNU
+        General Public License.
 
-     * Title: "Writing Character Device Driver for Linux"
-       Author: R. Baruch and C. Schroeter.
-       URL:
-       ftp://ftp.llp.fu-berlin.de/pub/linux/LINUX-LAB/whitepapers/drivers.ps.gz
-       Keywords: character device drivers, I/O, signals, DMA, accessing
-       ports in user space, kernel environment.
-       Description: 68 pages paper on writing character drivers. A little
-       bit old (1.993, 1.994) although still useful.
+On-line docs
+------------
 
-     * Title: "Design and Implementation of the Second Extended
-       Filesystem"
-       Author: Rémy Card, Theodore Ts'o, Stephen Tweedie.
-       URL: http://web.mit.edu/tytso/www/linux/ext2intro.html
-       Keywords: ext2, linux fs history, inode, directory, link, devices,
-       VFS, physical structure, performance, benchmarks, ext2fs library,
-       ext2fs tools, e2fsck.
-       Description: Paper written by three of the top ext2 hackers.
-       Covers Linux filesystems history, ext2 motivation, ext2 features,
-       design, physical structure on disk, performance, benchmarks,
-       e2fsck's passes description... A must read!
-       Notes: This paper was first published in the Proceedings of the
-       First Dutch International Symposium on Linux, ISBN 90-367-0385-9.
+    * Title: **Linux Kernel Mailing List Glossary**
 
-     * Title: "Analysis of the Ext2fs structure"
-       Author: Louis-Dominique Dubeau.
-       URL: http://teaching.csse.uwa.edu.au/units/CITS2002/fs-ext2/
-       Keywords: ext2, filesystem, ext2fs.
-       Description: Description of ext2's blocks, directories, inodes,
-       bitmaps, invariants...
+      :Author: various
+      :URL: http://kernelnewbies.org/glossary/
+      :Date: rolling version
+      :Keywords: glossary, terms, linux-kernel.
+      :Description: From the introduction: "This glossary is intended as
+        a brief description of some of the acronyms and terms you may hear
+        during discussion of the Linux kernel".
 
-     * Title: "Journaling the Linux ext2fs Filesystem"
-       Author: Stephen C. Tweedie.
-       URL:
-       ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/journal-design.ps.gz
-       Keywords: ext3, journaling.
-       Description: Excellent 8-pages paper explaining the journaling
-       capabilities added to ext2 by the author, showing different
-       problems faced and the alternatives chosen.
+    * Title: **Tracing the Way of Data in a TCP Connection through the Linux Kernel**
 
-     * Title: "Kernel API changes from 2.0 to 2.2"
-       Author: Richard Gooch.
-       URL: http://www.safe-mbox.com/~rgooch/linux/docs/porting-to-2.2.html
-       Keywords: 2.2, changes.
-       Description: Kernel functions/structures/variables which changed
-       from 2.0.x to 2.2.x.
+      :Author: Richard Sailer
+      :URL: https://archive.org/details/linux_kernel_data_flow_short_paper
+      :Date: 2016
+      :Keywords: Linux Kernel Networking, TCP, tracing, ftrace
+      :Description: A seminar paper explaining ftrace and how to use it for
+        understanding linux kernel internals,
+        illustrated at tracing the way of a TCP packet through the kernel.
+      :Abstract: *This short paper outlines the usage of ftrace a tracing framework
+        as a tool to understand a running Linux system.
+        Having obtained a trace-log a kernel hacker can read and understand
+        source code more determined and with context.
+        In a detailed example this approach is demonstrated in tracing
+        and the way of data in a TCP Connection through the kernel.
+        Finally this trace-log is used as base for more a exact conceptual
+        exploration and description of the Linux TCP/IP implementation.*
 
-     * Title: "Kernel API changes from 2.2 to 2.4"
-       Author: Richard Gooch.
-       URL: http://www.safe-mbox.com/~rgooch/linux/docs/porting-to-2.4.html
-       Keywords: 2.4, changes.
-       Description: Kernel functions/structures/variables which changed
-       from 2.2.x to 2.4.x.
-       
-     * Title: "Linux Kernel Module Programming Guide"
-       Author: Ori Pomerantz.
-       URL: http://tldp.org/LDP/lkmpg/2.6/html/index.html
-       Keywords: modules, GPL book, /proc, ioctls, system calls,
-       interrupt handlers .
-       Description: Very nice 92 pages GPL book on the topic of modules
-       programming. Lots of examples.
-       
-     * Title: "I/O Event Handling Under Linux"
-       Author: Richard Gooch.
-       Keywords: IO, I/O, select(2), poll(2), FDs, aio_read(2), readiness
-       event queues.
-       Description: From the Introduction: "I/O Event handling is about
-       how your Operating System allows you to manage a large number of
-       open files (file descriptors in UNIX/POSIX, or FDs) in your
-       application. You want the OS to notify you when FDs become active
-       (have data ready to be read or are ready for writing). Ideally you
-       want a mechanism that is scalable. This means a large number of
-       inactive FDs cost very little in memory and CPU time to manage".
-       
-     * Title: "The Kernel Hacking HOWTO"
-       Author: Various Talented People, and Rusty.
-       Location: in kernel tree, Documentation/DocBook/kernel-hacking.tmpl
-       (must be built as "make {htmldocs | psdocs | pdfdocs})
-       Keywords: HOWTO, kernel contexts, deadlock, locking, modules,
-       symbols, return conventions.
-       Description: From the Introduction: "Please understand that I
-       never wanted to write this document, being grossly underqualified,
-       but I always wanted to read it, and this was the only way. I
-       simply explain some best practices, and give reading entry-points
-       into the kernel sources. I avoid implementation details: that's
-       what the code is for, and I ignore whole tracts of useful
-       routines. This document assumes familiarity with C, and an
-       understanding of what the kernel is, and how it is used. It was
-       originally written for the 2.3 kernels, but nearly all of it
-       applies to 2.2 too; 2.0 is slightly different".
-       
-     * Title: "Writing an ALSA Driver"
-       Author: Takashi Iwai <tiwai@suse.de>
-       URL: http://www.alsa-project.org/~iwai/writing-an-alsa-driver/index.html
-       Keywords: ALSA, sound, soundcard, driver, lowlevel, hardware.
-       Description: Advanced Linux Sound Architecture for developers,
-       both at kernel and user-level sides. ALSA is the Linux kernel
-       sound architecture in the 2.6 kernel version.
-       
-     * Title: "Programming Guide for Linux USB Device Drivers"
-       Author: Detlef Fliegl.
-       URL: http://usb.in.tum.de/usbdoc/
-       Keywords: USB, universal serial bus.
-       Description: A must-read. From the Preface: "This document should
-       give detailed information about the current state of the USB
-       subsystem and its API for USB device drivers. The first section
-       will deal with the basics of USB devices. You will learn about
-       different types of devices and their properties. Going into detail
-       you will see how USB devices communicate on the bus. The second
-       section gives an overview of the Linux USB subsystem [2] and the
-       device driver framework. Then the API and its data structures will
-       be explained step by step. The last section of this document
-       contains a reference of all API calls and their return codes".
-       Notes: Beware: the main page states: "This document may not be
-       published, printed or used in excerpts without explicit permission
-       of the author". Fortunately, it may still be read...
+    * Title: **On submitting kernel Patches**
 
-     * Title: "Linux Kernel Mailing List Glossary"
-       Author: various
-       URL: http://kernelnewbies.org/glossary/
-       Keywords: glossary, terms, linux-kernel.
-       Description: From the introduction: "This glossary is intended as
-       a brief description of some of the acronyms and terms you may hear
-       during discussion of the Linux kernel".
-       
-     * Title: "Linux Kernel Locking HOWTO"
-       Author: Various Talented People, and Rusty.
-       Location: in kernel tree, Documentation/DocBook/kernel-locking.tmpl
-       (must be built as "make {htmldocs | psdocs | pdfdocs})
-       Keywords: locks, locking, spinlock, semaphore, atomic, race
-       condition, bottom halves, tasklets, softirqs.
-       Description: The title says it all: document describing the
-       locking system in the Linux Kernel either in uniprocessor or SMP
-       systems.
-       Notes: "It was originally written for the later (>2.3.47) 2.3
-       kernels, but most of it applies to 2.2 too; 2.0 is slightly
-       different". Freely redistributable under the conditions of the GNU
-       General Public License.
+      :Author: Andi Kleen
+      :URL: http://halobates.de/on-submitting-kernel-patches.pdf
+      :Date: 2008
+      :Keywords: patches, review process, types of submissions, basic rules, case studies
+      :Description: This paper gives several experience values on what types of patches
+        there are and how likley they get merged.
+      :Abstract:
+        [...]. This paper examines some common problems for
+        submitting larger changes and some strategies to avoid problems.
 
-     * Title: "Global spinlock list and usage"
-       Author: Rick Lindsley.
-       URL: http://lse.sourceforge.net/lockhier/global-spin-lock
-       Keywords: spinlock.
-       Description: This is an attempt to document both the existence and
-       usage of the spinlocks in the Linux 2.4.5 kernel. Comprehensive
-       list of spinlocks showing when they are used, which functions
-       access them, how each lock is acquired, under what conditions it
-       is held, whether interrupts can occur or not while it is held...
+    * Title: **Overview of the Virtual File System**
 
-     * Title: "Porting Linux 2.0 Drivers To Linux 2.2: Changes and New
-       Features "
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/1999-05/gear_01.html
-       Keywords: ports, porting.
-       Description: Article from Linux Magazine on porting from 2.0 to
-       2.2 kernels.
+      :Author: Richard Gooch.
+      :URL: http://www.mjmwired.net/kernel/Documentation/filesystems/vfs.txt
+      :Date: 2007
+      :Keywords: VFS, File System, mounting filesystems, opening files,
+        dentries, dcache.
+      :Description: Brief introduction to the Linux Virtual File System.
+        What is it, how it works, operations taken when opening a file or
+        mounting a file system and description of important data
+        structures explaining the purpose of each of their entries.
 
-     * Title: "Porting Device Drivers To Linux 2.2: part II"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/238 
-       Keywords: ports, porting.
-       Description: Second part on porting from 2.0 to 2.2 kernels.
+    * Title: **Linux Device Drivers, Third Edition**
 
-     * Title: "How To Make Sure Your Driver Will Work On The Power
-       Macintosh"
-       Author: Paul Mackerras.
-       URL: http://www.linux-mag.com/id/261
-       Keywords: Mac, Power Macintosh, porting, drivers, compatibility.
-       Description: The title says it all.
+      :Author: Jonathan Corbet, Alessandro Rubini, Greg Kroah-Hartman
+      :URL: http://lwn.net/Kernel/LDD3/
+      :Date: 2005
+      :Description: A 600-page book covering the (2.6.10) driver
+        programming API and kernel hacking in general.  Available under the
+        Creative Commons Attribution-ShareAlike 2.0 license.
+      :note: You can also :ref:`purchase a copy from O'Reilly or elsewhere  <ldd3_published>`.
 
-     * Title: "An Introduction to SCSI Drivers"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/284
-       Keywords: SCSI, device, driver.
-       Description: The title says it all.
+    * Title: **Writing an ALSA Driver**
 
-     * Title: "Advanced SCSI Drivers And Other Tales"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/307
-       Keywords: SCSI, device, driver, advanced.
-       Description: The title says it all.
+      :Author: Takashi Iwai <tiwai@suse.de>
+      :URL: http://www.alsa-project.org/~iwai/writing-an-alsa-driver/index.html
+      :Date: 2005
+      :Keywords: ALSA, sound, soundcard, driver, lowlevel, hardware.
+      :Description: Advanced Linux Sound Architecture for developers,
+        both at kernel and user-level sides. ALSA is the Linux kernel
+        sound architecture in the 2.6 kernel version.
 
-     * Title: "Writing Linux Mouse Drivers"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/330
-       Keywords: mouse, driver, gpm.
-       Description: The title says it all.
+    * Title: **Linux PCMCIA Programmer's Guide**
 
-     * Title: "More on Mouse Drivers"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/356
-       Keywords: mouse, driver, gpm, races, asynchronous I/O.
-       Description: The title still says it all.
+      :Author: David Hinds.
+      :URL: http://pcmcia-cs.sourceforge.net/ftp/doc/PCMCIA-PROG.html
+      :Date: 2003
+      :Keywords: PCMCIA.
+      :Description: "This document describes how to write kernel device
+        drivers for the Linux PCMCIA Card Services interface. It also
+        describes how to write user-mode utilities for communicating with
+        Card Services.
 
-     * Title: "Writing Video4linux Radio Driver"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/381
-       Keywords: video4linux, driver, radio, radio devices.
-       Description: The title says it all.
+    * Title: **Linux Kernel Module Programming Guide**
 
-     * Title: "Video4linux Drivers, Part 1: Video-Capture Device"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/406
-       Keywords: video4linux, driver, video capture, capture devices,
-       camera driver.
-       Description: The title says it all.
+      :Author: Ori Pomerantz.
+      :URL: http://tldp.org/LDP/lkmpg/2.6/html/index.html
+      :Date: 2001
+      :Keywords: modules, GPL book, /proc, ioctls, system calls,
+        interrupt handlers .
+      :Description: Very nice 92 pages GPL book on the topic of modules
+        programming. Lots of examples.
 
-     * Title: "Video4linux Drivers, Part 2: Video-capture Devices"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/429
-       Keywords: video4linux, driver, video capture, capture devices,
-       camera driver, control, query capabilities, capability, facility.
-       Description: The title says it all.
+    * Title: **Global spinlock list and usage**
 
-     * Title: "PCI Management in Linux 2.2"
-       Author: Alan Cox.
-       URL: http://www.linux-mag.com/id/452
-       Keywords: PCI, bus, bus-mastering.
-       Description: The title says it all.
+      :Author: Rick Lindsley.
+      :URL: http://lse.sourceforge.net/lockhier/global-spin-lock
+      :Date: 2001
+      :Keywords: spinlock.
+      :Description: This is an attempt to document both the existence and
+        usage of the spinlocks in the Linux 2.4.5 kernel. Comprehensive
+        list of spinlocks showing when they are used, which functions
+        access them, how each lock is acquired, under what conditions it
+        is held, whether interrupts can occur or not while it is held...
 
-     * Title: "Linux 2.4 Kernel Internals"
-       Author: Tigran Aivazian and Christoph Hellwig.
-       URL: http://www.moses.uklinux.net/patches/lki.html
-       Keywords: Linux, kernel, booting, SMB boot, VFS, page cache.
-       Description: A little book used for a short training course.
-       Covers building the kernel image, booting (including SMP bootup),
-       process management, VFS and more.
+    * Title: **A Linux vm README**
 
-     * Title: "Linux IP Networking. A Guide to the Implementation and
-       Modification of the Linux Protocol Stack."
-       Author: Glenn Herrin.
-       URL: http://www.cs.unh.edu/cnrg/gherrin
-       Keywords: network, networking, protocol, IP, UDP, TCP, connection,
-       socket, receiving, transmitting, forwarding, routing, packets,
-       modules, /proc, sk_buff, FIB, tags.
-       Description: Excellent paper devoted to the Linux IP Networking,
-       explaining anything from the kernel's to the user space
-       configuration tools' code. Very good to get a general overview of
-       the kernel networking implementation and understand all steps
-       packets follow from the time they are received at the network
-       device till they are delivered to applications. The studied kernel
-       code is from 2.2.14 version. Provides code for a working packet
-       dropper example.
-       
-     * Title: "Get those boards talking under Linux."
-       Author: Alex Ivchenko.
-       URL: http://www.edn.com/article/CA46968.html
-       Keywords: data-acquisition boards, drivers, modules, interrupts,
-       memory allocation.
-       Description: Article written for people wishing to make their data
-       acquisition boards work on their GNU/Linux machines. Gives a basic
-       overview on writing drivers, from the naming of functions to
-       interrupt handling.
-       Notes: Two-parts article. Part II is at
-       URL: http://www.edn.com/article/CA46998.html
-       
-     * Title: "Linux PCMCIA Programmer's Guide"
-       Author: David Hinds.
-       URL: http://pcmcia-cs.sourceforge.net/ftp/doc/PCMCIA-PROG.html
-       Keywords: PCMCIA.
-       Description: "This document describes how to write kernel device
-       drivers for the Linux PCMCIA Card Services interface. It also
-       describes how to write user-mode utilities for communicating with
-       Card Services.
+      :Author: Kanoj Sarcar.
+      :URL: http://kos.enix.org/pub/linux-vmm.html
+      :Date: 2001
+      :Keywords: virtual memory, mm, pgd, vma, page, page flags, page
+        cache, swap cache, kswapd.
+      :Description: Telegraphic, short descriptions and definitions
+        relating the Linux virtual memory implementation.
 
-     * Title: "The Linux Kernel NFSD Implementation"
-       Author: Neil Brown.
-       URL:
-       http://www.cse.unsw.edu.au/~neilb/oss/linux-commentary/nfsd.html
-       Keywords: knfsd, nfsd, NFS, RPC, lockd, mountd, statd.
-       Description: The title says it all.
-       Notes: Covers knfsd's version 1.4.7 (patch against 2.2.7 kernel).
-       
-     * Title: "A Linux vm README"
-       Author: Kanoj Sarcar.
-       URL: http://kos.enix.org/pub/linux-vmm.html
-       Keywords: virtual memory, mm, pgd, vma, page, page flags, page
-       cache, swap cache, kswapd.
-       Description: Telegraphic, short descriptions and definitions
-       relating the Linux virtual memory implementation.
-       
-     * Title: "(nearly) Complete Linux Loadable Kernel Modules. The
-       definitive guide for hackers, virus coders and system
-       administrators."
-       Author: pragmatic/THC.
-       URL: http://packetstormsecurity.org/docs/hack/LKM_HACKING.html
-       Keywords: syscalls, intercept, hide, abuse, symbol table.
-       Description: Interesting paper on how to abuse the Linux kernel in
-       order to intercept and modify syscalls, make
-       files/directories/processes invisible, become root, hijack ttys,
-       write kernel modules based virus... and solutions for admins to
-       avoid all those abuses.
-       Notes: For 2.0.x kernels. Gives guidances to port it to 2.2.x
-       kernels.
-       
-     BOOKS: (Not on-line)
-   
-     * Title: "Linux Device Drivers"
-       Author: Alessandro Rubini.
-       Publisher: O'Reilly & Associates.
-       Date: 1998.
-       Pages: 439.
-       ISBN: 1-56592-292-1
-       
-     * Title: "Linux Device Drivers, 2nd Edition"
-       Author: Alessandro Rubini and Jonathan Corbet.
-       Publisher: O'Reilly & Associates.
-       Date: 2001.
-       Pages: 586.
-       ISBN: 0-59600-008-1
-       Notes: Further information in
-       http://www.oreilly.com/catalog/linuxdrive2/
+    * Title: **Video4linux Drivers, Part 1: Video-Capture Device**
 
-     * Title: "Linux Device Drivers, 3rd Edition"
-       Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman
-       Publisher: O'Reilly & Associates.
-       Date: 2005.
-       Pages: 636.
-       ISBN: 0-596-00590-3
-       Notes: Further information in
-       http://www.oreilly.com/catalog/linuxdrive3/
-       PDF format, URL: http://lwn.net/Kernel/LDD3/
+      :Author: Alan Cox.
+      :URL: http://www.linux-mag.com/id/406
+      :Date: 2000
+      :Keywords: video4linux, driver, video capture, capture devices,
+        camera driver.
+      :Description: The title says it all.
 
-     * Title: "Linux Kernel Internals"
-       Author: Michael Beck.
-       Publisher: Addison-Wesley.
-       Date: 1997.
-       ISBN: 0-201-33143-8 (second edition)
-       
-     * Title: "The Design of the UNIX Operating System"
-       Author: Maurice J. Bach.
-       Publisher: Prentice Hall.
-       Date: 1986.
-       Pages: 471.
-       ISBN: 0-13-201757-1
-       
-     * Title: "The Design and Implementation of the 4.3 BSD UNIX
-       Operating System"
-       Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J.
-       Karels, John S. Quarterman.
-       Publisher: Addison-Wesley.
-       Date: 1989 (reprinted with corrections on October, 1990).
-       ISBN: 0-201-06196-1
-       
-     * Title: "The Design and Implementation of the 4.4 BSD UNIX
-       Operating System"
-       Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels,
-       John S. Quarterman.
-       Publisher: Addison-Wesley.
-       Date: 1996.
-       ISBN: 0-201-54979-4
-       
-     * Title: "Programmation Linux 2.0 API systeme et fonctionnement du
-       noyau"
-       Author: Remy Card, Eric Dumas, Franck Mevel.
-       Publisher: Eyrolles.
-       Date: 1997.
-       Pages: 520.
-       ISBN: 2-212-08932-5
-       Notes: French.
+    * Title: **Video4linux Drivers, Part 2: Video-capture Devices**
 
-     * Title: "Unix internals -- the new frontiers"
-       Author: Uresh Vahalia.
-       Publisher: Prentice Hall.
-       Date: 1996.
-       Pages: 600.
-       ISBN: 0-13-101908-2
+      :Author: Alan Cox.
+      :URL: http://www.linux-mag.com/id/429
+      :Date: 2000
+      :Keywords: video4linux, driver, video capture, capture devices,
+        camera driver, control, query capabilities, capability, facility.
+      :Description: The title says it all.
 
-     * Title: "Programming for the real world - POSIX.4"
-       Author: Bill O. Gallmeister.
-       Publisher: O'Reilly & Associates, Inc..
-       Date: 1995.
-       Pages: ???.
-       ISBN: I-56592-074-0
-       Notes: Though not being directly about Linux, Linux aims to be
-       POSIX. Good reference.
+    * Title: **Linux IP Networking. A Guide to the Implementation and Modification of the Linux Protocol Stack.**
 
-     * Title:  "UNIX  Systems  for  Modern Architectures: Symmetric
-       Multiprocessing and Caching for Kernel Programmers"
-       Author: Curt Schimmel.
-       Publisher: Addison Wesley.
-       Date: June, 1994.
-       Pages: 432.
-       ISBN: 0-201-63338-8
+      :Author: Glenn Herrin.
+      :URL: http://www.cs.unh.edu/cnrg/gherrin
+      :Date: 2000
+      :Keywords: network, networking, protocol, IP, UDP, TCP, connection,
+        socket, receiving, transmitting, forwarding, routing, packets,
+        modules, /proc, sk_buff, FIB, tags.
+      :Description: Excellent paper devoted to the Linux IP Networking,
+        explaining anything from the kernel's to the user space
+        configuration tools' code. Very good to get a general overview of
+        the kernel networking implementation and understand all steps
+        packets follow from the time they are received at the network
+        device till they are delivered to applications. The studied kernel
+        code is from 2.2.14 version. Provides code for a working packet
+        dropper example.
 
-     * Title: "Linux Kernel Development, 3rd Edition"
-       Author: Robert Love
-       Publisher: Addison-Wesley.
-       Date: July, 2010
-       Pages: 440
-       ISBN: 978-0672329463
+    * Title: **How To Make Sure Your Driver Will Work On The Power Macintosh**
 
-     MISCELLANEOUS:
+      :Author: Paul Mackerras.
+      :URL: http://www.linux-mag.com/id/261
+      :Date: 1999
+      :Keywords: Mac, Power Macintosh, porting, drivers, compatibility.
+      :Description: The title says it all.
 
-     * Name: linux/Documentation
-       Author: Many.
-       URL: Just look inside your kernel sources.
-       Keywords: anything, DocBook.
-       Description: Documentation that comes with the kernel sources,
-       inside the Documentation directory. Some pages from this document
-       (including this document itself) have been moved there, and might
-       be more up to date than the web version.
+    * Title: **An Introduction to SCSI Drivers**
 
-     * Name: "Linux Kernel Source Reference"
-       Author: Thomas Graichen.
-       URL: http://marc.info/?l=linux-kernel&m=96446640102205&w=4
-       Keywords: CVS, web, cvsweb, browsing source code.
-       Description: Web interface to a CVS server with the kernel
-       sources. "Here you can have a look at any file of the Linux kernel
-       sources of any version starting from 1.0 up to the (daily updated)
-       current version available. Also you can check the differences
-       between two versions of a file".
+      :Author: Alan Cox.
+      :URL: http://www.linux-mag.com/id/284
+      :Date: 1999
+      :Keywords: SCSI, device, driver.
+      :Description: The title says it all.
 
-     * Name: "Cross-Referencing Linux"
-       URL: http://lxr.free-electrons.com/
-       Keywords: Browsing source code.
-       Description: Another web-based Linux kernel source code browser.
-       Lots of cross references to variables and functions. You can see
-       where they are defined and where they are used.
+    * Title: **Advanced SCSI Drivers And Other Tales**
 
-     * Name: "Linux Weekly News"
-       URL: http://lwn.net
-       Keywords: latest kernel news.
-       Description: The title says it all. There's a fixed kernel section
-       summarizing developers' work, bug fixes, new features and versions
-       produced during the week. Published every Thursday.
+      :Author: Alan Cox.
+      :URL: http://www.linux-mag.com/id/307
+      :Date: 1999
+      :Keywords: SCSI, device, driver, advanced.
+      :Description: The title says it all.
 
-     * Name: "Kernel Traffic"
-       URL: http://kt.earth.li/kernel-traffic/index.html
-       Keywords: linux-kernel mailing list, weekly kernel news.
-       Description: Weekly newsletter covering the most relevant
-       discussions of the linux-kernel mailing list.
+    * Title: **Writing Linux Mouse Drivers**
 
-     * Name: "CuTTiNG.eDGe.LiNuX"
-       URL: http://edge.kernelnotes.org
-       Keywords: changelist.
-       Description: Site which provides the changelist for every kernel
-       release. What's new, what's better, what's changed. Myrdraal reads
-       the patches and describes them. Pointers to the patches are there,
-       too.
+      :Author: Alan Cox.
+      :URL: http://www.linux-mag.com/id/330
+      :Date: 1999
+      :Keywords: mouse, driver, gpm.
+      :Description: The title says it all.
 
-     * Name: "New linux-kernel Mailing List FAQ"
-       URL: http://www.tux.org/lkml/
-       Keywords: linux-kernel mailing list FAQ.
-       Description: linux-kernel is a mailing list for developers to
-       communicate. This FAQ builds on the previous linux-kernel mailing
-       list FAQ maintained by Frohwalt Egerer, who no longer maintains
-       it. Read it to see how to join the mailing list. Dozens of
-       interesting questions regarding the list, Linux, developers (who
-       is ...?), terms (what is...?) are answered here too. Just read it.
+    * Title: **More on Mouse Drivers**
 
-     * Name: "Linux Virtual File System"
-       Author: Peter J. Braam.
-       URL: http://www.coda.cs.cmu.edu/doc/talks/linuxvfs/
-       Keywords: slides, VFS, inode, superblock, dentry, dcache.
-       Description: Set of slides, presumably from a presentation on the
-       Linux VFS layer. Covers version 2.1.x, with dentries and the
-       dcache.
+      :Author: Alan Cox.
+      :URL: http://www.linux-mag.com/id/356
+      :Date: 1999
+      :Keywords: mouse, driver, gpm, races, asynchronous I/O.
+      :Description: The title still says it all.
 
-     * Name: "Gary's Encyclopedia - The Linux Kernel"
-       Author: Gary (I suppose...).
-       URL: http://slencyclopedia.berlios.de/index.html
-       Keywords: linux, community, everything!
-       Description: Gary's Encyclopedia exists to allow the rapid finding
-       of documentation and other information of interest to GNU/Linux
-       users. It has about 4000 links to external pages in 150 major
-       categories. This link is for kernel-specific links, documents,
-       sites...  This list is now hosted by developer.Berlios.de,
-       but seems not to have been updated since sometime in 1999.
+    * Title: **Writing Video4linux Radio Driver**
 
-     * Name: "The home page of Linux-MM"
-       Author: The Linux-MM team.
-       URL: http://linux-mm.org/
-       Keywords: memory management, Linux-MM, mm patches, TODO, docs,
-       mailing list.
-       Description: Site devoted to Linux Memory Management development.
-       Memory related patches, HOWTOs, links, mm developers... Don't miss
-       it if you are interested in memory management development!
+      :Author: Alan Cox.
+      :URL: http://www.linux-mag.com/id/381
+      :Date: 1999
+      :Keywords: video4linux, driver, radio, radio devices.
+      :Description: The title says it all.
 
-     * Name: "Kernel Newbies IRC Channel and Website"
-       URL: http://www.kernelnewbies.org
-       Keywords: IRC, newbies, channel, asking doubts.
-       Description: #kernelnewbies on irc.oftc.net.
-       #kernelnewbies is an IRC network dedicated to the 'newbie'
-       kernel hacker. The audience mostly consists of people who are
-       learning about the kernel, working on kernel projects or
-       professional kernel hackers that want to help less seasoned kernel
-       people.
-       #kernelnewbies is on the OFTC IRC Network.
-       Try irc.oftc.net as your server and then /join #kernelnewbies.
-       The kernelnewbies website also hosts articles, documents, FAQs...
-       
-     * Name: "linux-kernel mailing list archives and search engines"
-       URL: http://vger.kernel.org/vger-lists.html
-       URL: http://www.uwsg.indiana.edu/hypermail/linux/kernel/index.html
-       URL: http://marc.theaimsgroup.com/?l=linux-kernel
-       URL: http://groups.google.com/group/mlist.linux.kernel
-       URL: http://www.cs.helsinki.fi/linux/linux-kernel/
-       URL: http://www.lib.uaa.alaska.edu/linux-kernel/
-       Keywords: linux-kernel, archives, search.
-       Description: Some of the linux-kernel mailing list archivers. If
-       you have a better/another one, please let me know.
-     _________________________________________________________________
-   
-   Document last updated on Sat 2005-NOV-19
+    * Title: **I/O Event Handling Under Linux**
+
+      :Author: Richard Gooch.
+      :URL: http://web.mit.edu/~yandros/doc/io-events.html
+      :Date: 1999
+      :Keywords: IO, I/O, select(2), poll(2), FDs, aio_read(2), readiness
+        event queues.
+      :Description: From the Introduction: "I/O Event handling is about
+        how your Operating System allows you to manage a large number of
+        open files (file descriptors in UNIX/POSIX, or FDs) in your
+        application. You want the OS to notify you when FDs become active
+        (have data ready to be read or are ready for writing). Ideally you
+        want a mechanism that is scalable. This means a large number of
+        inactive FDs cost very little in memory and CPU time to manage".
+
+    * Title: **(nearly) Complete Linux Loadable Kernel Modules. The definitive guide for hackers, virus coders and system administrators.**
+
+      :Author: pragmatic/THC.
+      :URL: http://packetstormsecurity.org/docs/hack/LKM_HACKING.html
+      :Date: 1999
+      :Keywords: syscalls, intercept, hide, abuse, symbol table.
+      :Description: Interesting paper on how to abuse the Linux kernel in
+        order to intercept and modify syscalls, make
+        files/directories/processes invisible, become root, hijack ttys,
+        write kernel modules based virus... and solutions for admins to
+        avoid all those abuses.
+      :Notes: For 2.0.x kernels. Gives guidances to port it to 2.2.x
+        kernels.
+
+    * Name: **Linux Virtual File System**
+
+      :Author: Peter J. Braam.
+      :URL: http://www.coda.cs.cmu.edu/doc/talks/linuxvfs/
+      :Date: 1998
+      :Keywords: slides, VFS, inode, superblock, dentry, dcache.
+      :Description: Set of slides, presumably from a presentation on the
+        Linux VFS layer. Covers version 2.1.x, with dentries and the
+        dcache.
+
+    * Title: **The Venus kernel interface**
+
+      :Author: Peter J. Braam.
+      :URL: http://www.coda.cs.cmu.edu/doc/html/kernel-venus-protocol.html
+      :Date: 1998
+      :Keywords: coda, filesystem, venus, cache manager.
+      :Description: "This document describes the communication between
+        Venus and kernel level file system code needed for the operation
+        of the Coda filesystem. This version document is meant to describe
+        the current interface (version 1.0) as well as improvements we
+        envisage".
+
+    * Title: **Design and Implementation of the Second Extended Filesystem**
+
+      :Author: Rémy Card, Theodore Ts'o, Stephen Tweedie.
+      :URL: http://web.mit.edu/tytso/www/linux/ext2intro.html
+      :Date: 1998
+      :Keywords: ext2, linux fs history, inode, directory, link, devices,
+        VFS, physical structure, performance, benchmarks, ext2fs library,
+        ext2fs tools, e2fsck.
+      :Description: Paper written by three of the top ext2 hackers.
+        Covers Linux filesystems history, ext2 motivation, ext2 features,
+        design, physical structure on disk, performance, benchmarks,
+        e2fsck's passes description... A must read!
+      :Notes: This paper was first published in the Proceedings of the
+        First Dutch International Symposium on Linux, ISBN 90-367-0385-9.
+
+    * Title: **The Linux RAID-1, 4, 5 Code**
+
+      :Author: Ingo Molnar, Gadi Oxman and Miguel de Icaza.
+      :URL: http://www.linuxjournal.com/article.php?sid=2391
+      :Date: 1997
+      :Keywords: RAID, MD driver.
+      :Description: Linux Journal Kernel Korner article. Here is its
+      :Abstract: *A description of the implementation of the RAID-1,
+        RAID-4 and RAID-5 personalities of the MD device driver in the
+        Linux kernel, providing users with high performance and reliable,
+        secondary-storage capability using software*.
+
+    * Title: **Linux Kernel Hackers' Guide**
+
+      :Author: Michael K. Johnson.
+      :URL: http://www.tldp.org/LDP/khg/HyperNews/get/khg.html
+      :Date: 1997
+      :Keywords: device drivers, files, VFS, kernel interface, character vs
+        block devices, hardware interrupts, scsi, DMA, access to user memory,
+        memory allocation, timers.
+      :Description: A guide designed to help you get up to speed on the
+        concepts that are not intuitevly obvious, and to document the internal
+        structures of Linux.
+
+    * Title: **Dynamic Kernels: Modularized Device Drivers**
+
+      :Author: Alessandro Rubini.
+      :URL: http://www.linuxjournal.com/article.php?sid=1219
+      :Date: 1996
+      :Keywords: device driver, module, loading/unloading modules,
+        allocating resources.
+      :Description: Linux Journal Kernel Korner article. Here is its
+      :Abstract: *This is the first of a series of four articles
+        co-authored by Alessandro Rubini and Georg Zezchwitz which present
+        a practical approach to writing Linux device drivers as kernel
+        loadable modules. This installment presents an introduction to the
+        topic, preparing the reader to understand next month's
+        installment*.
+
+    * Title: **Dynamic Kernels: Discovery**
+
+      :Author: Alessandro Rubini.
+      :URL: http://www.linuxjournal.com/article.php?sid=1220
+      :Date: 1996
+      :Keywords: character driver, init_module, clean_up module,
+        autodetection, mayor number, minor number, file operations,
+        open(), close().
+      :Description: Linux Journal Kernel Korner article. Here is its
+      :Abstract: *This article, the second of four, introduces part of
+        the actual code to create custom module implementing a character
+        device driver. It describes the code for module initialization and
+        cleanup, as well as the open() and close() system calls*.
+
+    * Title: **The Devil's in the Details**
+
+      :Author: Georg v. Zezschwitz and Alessandro Rubini.
+      :URL: http://www.linuxjournal.com/article.php?sid=1221
+      :Date: 1996
+      :Keywords: read(), write(), select(), ioctl(), blocking/non
+        blocking mode, interrupt handler.
+      :Description: Linux Journal Kernel Korner article. Here is its
+      :Abstract: *This article, the third of four on writing character
+        device drivers, introduces concepts of reading, writing, and using
+        ioctl-calls*.
+
+    * Title: **Dissecting Interrupts and Browsing DMA**
+
+      :Author: Alessandro Rubini and Georg v. Zezschwitz.
+      :URL: http://www.linuxjournal.com/article.php?sid=1222
+      :Date: 1996
+      :Keywords: interrupts, irqs, DMA, bottom halves, task queues.
+      :Description: Linux Journal Kernel Korner article. Here is its
+      :Abstract: *This is the fourth in a series of articles about
+        writing character device drivers as loadable kernel modules. This
+        month, we further investigate the field of interrupt handling.
+        Though it is conceptually simple, practical limitations and
+        constraints make this an ''interesting'' part of device driver
+        writing, and several different facilities have been provided for
+        different situations. We also investigate the complex topic of
+        DMA*.
+
+    * Title: **Device Drivers Concluded**
+
+      :Author: Georg v. Zezschwitz.
+      :URL: http://www.linuxjournal.com/article.php?sid=1287
+      :Date: 1996
+      :Keywords: address spaces, pages, pagination, page management,
+        demand loading, swapping, memory protection, memory mapping, mmap,
+        virtual memory areas (VMAs), vremap, PCI.
+      :Description: Finally, the above turned out into a five articles
+        series. This latest one's introduction reads: "This is the last of
+        five articles about character device drivers. In this final
+        section, Georg deals with memory mapping devices, beginning with
+        an overall description of the Linux memory management concepts".
+
+    * Title: **Network Buffers And Memory Management**
+
+      :Author: Alan Cox.
+      :URL: http://www.linuxjournal.com/article.php?sid=1312
+      :Date: 1996
+      :Keywords: sk_buffs, network devices, protocol/link layer
+        variables, network devices flags, transmit, receive,
+        configuration, multicast.
+      :Description: Linux Journal Kernel Korner.
+      :Abstract: *Writing a network device driver for Linux is fundamentally
+        simple---most of the complexity (other than talking to the
+        hardware) involves managing network packets in memory*.
+
+    * Title: **Analysis of the Ext2fs structure**
+
+      :Author: Louis-Dominique Dubeau.
+      :URL: http://teaching.csse.uwa.edu.au/units/CITS2002/fs-ext2/
+      :Date: 1994
+      :Keywords: ext2, filesystem, ext2fs.
+      :Description: Description of ext2's blocks, directories, inodes,
+        bitmaps, invariants...
+
+Published books
+---------------
+
+    * Title: **Linux Treiber entwickeln**
+
+      :Author: Jürgen Quade, Eva-Katharina Kunst
+      :Publisher: dpunkt.verlag
+      :Date: Oct 2015 (4th edition)
+      :Pages: 688
+      :ISBN: 978-3-86490-288-8
+      :Note: German. The third edition from 2011 is
+         much cheaper and still quite up-to-date.
+
+    * Title: **Linux Kernel Networking: Implementation and Theory**
+
+      :Author: Rami Rosen
+      :Publisher: Apress
+      :Date: December 22, 2013
+      :Pages: 648
+      :ISBN: 978-1430261964
+
+    * Title: **Embedded Linux Primer: A practical Real-World Approach, 2nd Edition**
+
+      :Author: Christopher Hallinan
+      :Publisher: Pearson
+      :Date: November, 2010
+      :Pages: 656
+      :ISBN: 978-0137017836
+
+    * Title: **Linux Kernel Development, 3rd Edition**
+
+      :Author: Robert Love
+      :Publisher: Addison-Wesley
+      :Date: July, 2010
+      :Pages: 440
+      :ISBN: 978-0672329463
+
+    * Title: **Essential Linux Device Drivers**
+
+      :Author: Sreekrishnan Venkateswaran
+      :Published: Prentice Hall
+      :Date: April, 2008
+      :Pages: 744
+      :ISBN: 978-0132396554
+
+.. _ldd3_published:
+
+    * Title: **Linux Device Drivers, 3rd Edition**
+
+      :Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman
+      :Publisher: O'Reilly & Associates
+      :Date: 2005
+      :Pages: 636
+      :ISBN: 0-596-00590-3
+      :Notes: Further information in
+        http://www.oreilly.com/catalog/linuxdrive3/
+        PDF format, URL: http://lwn.net/Kernel/LDD3/
+
+    * Title: **Linux Kernel Internals**
+
+      :Author: Michael Beck
+      :Publisher: Addison-Wesley
+      :Date: 1997
+      :ISBN: 0-201-33143-8 (second edition)
+
+    * Title: **Programmation Linux 2.0 API systeme et fonctionnement du noyau**
+
+      :Author: Remy Card, Eric Dumas, Franck Mevel
+      :Publisher: Eyrolles
+      :Date: 1997
+      :Pages: 520
+      :ISBN: 2-212-08932-5
+      :Notes: French
+
+    * Title: **The Design and Implementation of the 4.4 BSD UNIX Operating System**
+
+      :Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels,
+        John S. Quarterman
+      :Publisher: Addison-Wesley
+      :Date: 1996
+      :ISBN: 0-201-54979-4
+
+    * Title: **Unix internals -- the new frontiers**
+
+      :Author: Uresh Vahalia
+      :Publisher: Prentice Hall
+      :Date: 1996
+      :Pages: 600
+      :ISBN: 0-13-101908-2
+
+    * Title: **Programming for the real world - POSIX.4**
+
+      :Author: Bill O. Gallmeister
+      :Publisher: O'Reilly & Associates, Inc
+      :Date: 1995
+      :Pages: 552
+      :ISBN: I-56592-074-0
+      :Notes: Though not being directly about Linux, Linux aims to be
+        POSIX. Good reference.
+
+    * Title:  **UNIX  Systems  for  Modern Architectures: Symmetric Multiprocessing and Caching for Kernel Programmers**
+
+      :Author: Curt Schimmel
+      :Publisher: Addison Wesley
+      :Date: June, 1994
+      :Pages: 432
+      :ISBN: 0-201-63338-8
+
+    * Title: **The Design and Implementation of the 4.3 BSD UNIX Operating System**
+
+      :Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J
+        Karels, John S. Quarterman
+      :Publisher: Addison-Wesley
+      :Date: 1989 (reprinted with corrections on October, 1990)
+      :ISBN: 0-201-06196-1
+
+    * Title: **The Design of the UNIX Operating System**
+
+      :Author: Maurice J. Bach
+      :Publisher: Prentice Hall
+      :Date: 1986
+      :Pages: 471
+      :ISBN: 0-13-201757-1
+
+Miscellaneous
+-------------
+
+    * Name: **Cross-Referencing Linux**
+
+      :URL: http://lxr.free-electrons.com/
+      :Keywords: Browsing source code.
+      :Description: Another web-based Linux kernel source code browser.
+        Lots of cross references to variables and functions. You can see
+        where they are defined and where they are used.
+
+    * Name: **Linux Weekly News**
+
+      :URL: http://lwn.net
+      :Keywords: latest kernel news.
+      :Description: The title says it all. There's a fixed kernel section
+        summarizing developers' work, bug fixes, new features and versions
+        produced during the week. Published every Thursday.
+
+    * Name: **The home page of Linux-MM**
+
+      :Author: The Linux-MM team.
+      :URL: http://linux-mm.org/
+      :Keywords: memory management, Linux-MM, mm patches, TODO, docs,
+        mailing list.
+      :Description: Site devoted to Linux Memory Management development.
+        Memory related patches, HOWTOs, links, mm developers... Don't miss
+        it if you are interested in memory management development!
+
+    * Name: **Kernel Newbies IRC Channel and Website**
+
+      :URL: http://www.kernelnewbies.org
+      :Keywords: IRC, newbies, channel, asking doubts.
+      :Description: #kernelnewbies on irc.oftc.net.
+        #kernelnewbies is an IRC network dedicated to the 'newbie'
+        kernel hacker. The audience mostly consists of people who are
+        learning about the kernel, working on kernel projects or
+        professional kernel hackers that want to help less seasoned kernel
+        people.
+        #kernelnewbies is on the OFTC IRC Network.
+        Try irc.oftc.net as your server and then /join #kernelnewbies.
+        The kernelnewbies website also hosts articles, documents, FAQs...
+
+    * Name: **linux-kernel mailing list archives and search engines**
+
+      :URL: http://vger.kernel.org/vger-lists.html
+      :URL: http://www.uwsg.indiana.edu/hypermail/linux/kernel/index.html
+      :URL: http://groups.google.com/group/mlist.linux.kernel
+      :Keywords: linux-kernel, archives, search.
+      :Description: Some of the linux-kernel mailing list archivers. If
+        you have a better/another one, please let me know.
+
+-------
+
+Document last updated on Tue 2016-Sep-20
+
+This document is based on:
+ http://www.dit.upm.es/~jmseyas/linux/kernel/hackers-docs.html
diff --git a/Documentation/kernel-documentation.rst b/Documentation/kernel-documentation.rst
index c4eb504..10cc7dd 100644
--- a/Documentation/kernel-documentation.rst
+++ b/Documentation/kernel-documentation.rst
@@ -107,6 +107,35 @@
   the order as encountered."), having the higher levels the same overall makes
   it easier to follow the documents.
 
+
+the C domain
+------------
+
+The `Sphinx C Domain`_ (name c) is suited for documentation of C API. E.g. a
+function prototype:
+
+.. code-block:: rst
+
+    .. c:function:: int ioctl( int fd, int request )
+
+The C domain of the kernel-doc has some additional features. E.g. you can
+*rename* the reference name of a function with a common name like ``open`` or
+``ioctl``:
+
+.. code-block:: rst
+
+     .. c:function:: int ioctl( int fd, int request )
+        :name: VIDIOC_LOG_STATUS
+
+The func-name (e.g. ioctl) remains in the output but the ref-name changed from
+``ioctl`` to ``VIDIOC_LOG_STATUS``. The index entry for this function is also
+changed to ``VIDIOC_LOG_STATUS`` and the function can now referenced by:
+
+.. code-block:: rst
+
+     :c:func:`VIDIOC_LOG_STATUS`
+
+
 list tables
 -----------
 
@@ -265,6 +294,8 @@
 ``scripts/kernel-doc`` script to extract the documentation comments from the
 source.
 
+.. _kernel_doc:
+
 Writing kernel-doc comments
 ===========================
 
@@ -366,8 +397,6 @@
 Cross-referencing from reStructuredText
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. highlight:: none
-
 To cross-reference the functions and types defined in the kernel-doc comments
 from reStructuredText documents, please use the `Sphinx C Domain`_
 references. For example::
@@ -390,8 +419,6 @@
 Function documentation
 ----------------------
 
-.. highlight:: c
-
 The general format of a function and function-like macro kernel-doc comment is::
 
   /**
@@ -572,8 +599,6 @@
 Converting DocBook to Sphinx
 ----------------------------
 
-.. highlight:: none
-
 Over time, we expect all of the documents under ``Documentation/DocBook`` to be
 converted to Sphinx and reStructuredText. For most DocBook XML documents, a good
 enough solution is to use the simple ``Documentation/sphinx/tmplcvt`` script,
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 46c030a..fcf05a0 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -698,6 +698,15 @@
 			loops can be debugged more effectively on production
 			systems.
 
+	clocksource.arm_arch_timer.fsl-a008585=
+			[ARM64]
+			Format: <bool>
+			Enable/disable the workaround of Freescale/NXP
+			erratum A-008585.  This can be useful for KVM
+			guests, if the guest device tree doesn't show the
+			erratum.  If unspecified, the workaround is
+			enabled based on the device tree.
+
 	clearcpuid=BITNUM [X86]
 			Disable CPUID feature X for the kernel. See
 			arch/x86/include/asm/cpufeatures.h for the valid bit
@@ -1045,11 +1054,12 @@
 			determined by the stdout-path property in device
 			tree's chosen node.
 
-		cdns,<addr>
-			Start an early, polled-mode console on a cadence serial
-			port at the specified address. The cadence serial port
-			must already be setup and configured. Options are not
-			yet supported.
+		cdns,<addr>[,options]
+			Start an early, polled-mode console on a Cadence
+			(xuartps) serial port at the specified address. Only
+			supported option is baud rate. If baud rate is not
+			specified, the serial port must already be setup and
+			configured.
 
 		uart[8250],io,<addr>[,options]
 		uart[8250],mmio,<addr>[,options]
@@ -1688,7 +1698,7 @@
 
 	intel_idle.max_cstate=	[KNL,HW,ACPI,X86]
 			0	disables intel_idle and fall back on acpi_idle.
-			1 to 6	specify maximum depth of C-state.
+			1 to 9	specify maximum depth of C-state.
 
 	intel_pstate=  [X86]
 		       disable
@@ -2161,10 +2171,13 @@
 			than or equal to this physical address is ignored.
 
 	maxcpus=	[SMP] Maximum number of processors that	an SMP kernel
-			should make use of.  maxcpus=n : n >= 0 limits the
-			kernel to using 'n' processors.  n=0 is a special case,
-			it is equivalent to "nosmp", which also disables
-			the IO APIC.
+			will bring up during bootup.  maxcpus=n : n >= 0 limits
+			the kernel to bring up 'n' processors. Surely after
+			bootup you can bring up the other plugged cpu by executing
+			"echo 1 > /sys/devices/system/cpu/cpuX/online". So maxcpus
+			only takes effect during system bootup.
+			While n=0 is a special case, it is equivalent to "nosmp",
+			which also disables the IO APIC.
 
 	max_loop=	[LOOP] The number of loop block devices that get
 	(loop.max_loop)	unconditionally pre-created at init time. The default
@@ -2571,8 +2584,6 @@
 
 	nodelayacct	[KNL] Disable per-task delay accounting
 
-	nodisconnect	[HW,SCSI,M68K] Disables SCSI disconnects.
-
 	nodsp		[SH] Disable hardware DSP at boot time.
 
 	noefi		Disable EFI runtime services support.
@@ -2773,9 +2784,12 @@
 
 	nr_cpus=	[SMP] Maximum number of processors that	an SMP kernel
 			could support.  nr_cpus=n : n >= 1 limits the kernel to
-			supporting 'n' processors. Later in runtime you can not
-			use hotplug cpu feature to put more cpu back to online.
-			just like you compile the kernel NR_CPUS=n
+			support 'n' processors. It could be larger than the
+			number of already plugged CPU during bootup, later in
+			runtime you can physically add extra cpu until it reaches
+			n. So during boot up some boot time memory for per-cpu
+			variables need be pre-allocated for later physical cpu
+			hot plugging.
 
 	nr_uarts=	[SERIAL] maximum number of UARTs to be registered.
 
@@ -3032,6 +3046,10 @@
 				PAGE_SIZE is used as alignment.
 				PCI-PCI bridge can be specified, if resource
 				windows need to be expanded.
+				To specify the alignment for several
+				instances of a device, the PCI vendor,
+				device, subvendor, and subdevice may be
+				specified, e.g., 4096@pci:8086:9c22:103c:198f
 		ecrc=		Enable/disable PCIe ECRC (transaction layer
 				end-to-end CRC checking).
 				bios: Use BIOS/firmware settings. This is the
@@ -4234,6 +4252,8 @@
 				u = IGNORE_UAS (don't bind to the uas driver);
 				w = NO_WP_DETECT (don't test whether the
 					medium is write-protected).
+				y = ALWAYS_SYNC (issue a SYNCHRONIZE_CACHE
+					even if the device claims no cache)
 			Example: quirks=0419:aaf5:rl,0421:0433:rc
 
 	user_debug=	[KNL,ARM]
diff --git a/Documentation/kmemcheck.txt b/Documentation/kmemcheck.txt
deleted file mode 100644
index 80aae85..0000000
--- a/Documentation/kmemcheck.txt
+++ /dev/null
@@ -1,754 +0,0 @@
-GETTING STARTED WITH KMEMCHECK
-==============================
-
-Vegard Nossum <vegardno@ifi.uio.no>
-
-
-Contents
-========
-0. Introduction
-1. Downloading
-2. Configuring and compiling
-3. How to use
-3.1. Booting
-3.2. Run-time enable/disable
-3.3. Debugging
-3.4. Annotating false positives
-4. Reporting errors
-5. Technical description
-
-
-0. Introduction
-===============
-
-kmemcheck is a debugging feature for the Linux Kernel. More specifically, it
-is a dynamic checker that detects and warns about some uses of uninitialized
-memory.
-
-Userspace programmers might be familiar with Valgrind's memcheck. The main
-difference between memcheck and kmemcheck is that memcheck works for userspace
-programs only, and kmemcheck works for the kernel only. The implementations
-are of course vastly different. Because of this, kmemcheck is not as accurate
-as memcheck, but it turns out to be good enough in practice to discover real
-programmer errors that the compiler is not able to find through static
-analysis.
-
-Enabling kmemcheck on a kernel will probably slow it down to the extent that
-the machine will not be usable for normal workloads such as e.g. an
-interactive desktop. kmemcheck will also cause the kernel to use about twice
-as much memory as normal. For this reason, kmemcheck is strictly a debugging
-feature.
-
-
-1. Downloading
-==============
-
-As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel.
-
-
-2. Configuring and compiling
-============================
-
-kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of
-configuration variables must have specific settings in order for the kmemcheck
-menu to even appear in "menuconfig". These are:
-
-  o CONFIG_CC_OPTIMIZE_FOR_SIZE=n
-
-	This option is located under "General setup" / "Optimize for size".
-
-	Without this, gcc will use certain optimizations that usually lead to
-	false positive warnings from kmemcheck. An example of this is a 16-bit
-	field in a struct, where gcc may load 32 bits, then discard the upper
-	16 bits. kmemcheck sees only the 32-bit load, and may trigger a
-	warning for the upper 16 bits (if they're uninitialized).
-
-  o CONFIG_SLAB=y or CONFIG_SLUB=y
-
-	This option is located under "General setup" / "Choose SLAB
-	allocator".
-
-  o CONFIG_FUNCTION_TRACER=n
-
-	This option is located under "Kernel hacking" / "Tracers" / "Kernel
-	Function Tracer"
-
-	When function tracing is compiled in, gcc emits a call to another
-	function at the beginning of every function. This means that when the
-	page fault handler is called, the ftrace framework will be called
-	before kmemcheck has had a chance to handle the fault. If ftrace then
-	modifies memory that was tracked by kmemcheck, the result is an
-	endless recursive page fault.
-
-  o CONFIG_DEBUG_PAGEALLOC=n
-
-	This option is located under "Kernel hacking" / "Memory Debugging"
-	 / "Debug page memory allocations".
-
-In addition, I highly recommend turning on CONFIG_DEBUG_INFO=y. This is also
-located under "Kernel hacking". With this, you will be able to get line number
-information from the kmemcheck warnings, which is extremely valuable in
-debugging a problem. This option is not mandatory, however, because it slows
-down the compilation process and produces a much bigger kernel image.
-
-Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory
-Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows
-a description of the kmemcheck configuration variables:
-
-  o CONFIG_KMEMCHECK
-
-	This must be enabled in order to use kmemcheck at all...
-
-  o CONFIG_KMEMCHECK_[DISABLED | ENABLED | ONESHOT]_BY_DEFAULT
-
-	This option controls the status of kmemcheck at boot-time. "Enabled"
-	will enable kmemcheck right from the start, "disabled" will boot the
-	kernel as normal (but with the kmemcheck code compiled in, so it can
-	be enabled at run-time after the kernel has booted), and "one-shot" is
-	a special mode which will turn kmemcheck off automatically after
-	detecting the first use of uninitialized memory.
-
-	If you are using kmemcheck to actively debug a problem, then you
-	probably want to choose "enabled" here.
-
-	The one-shot mode is mostly useful in automated test setups because it
-	can prevent floods of warnings and increase the chances of the machine
-	surviving in case something is really wrong. In other cases, the one-
-	shot mode could actually be counter-productive because it would turn
-	itself off at the very first error -- in the case of a false positive
-	too -- and this would come in the way of debugging the specific
-	problem you were interested in.
-
-	If you would like to use your kernel as normal, but with a chance to
-	enable kmemcheck in case of some problem, it might be a good idea to
-	choose "disabled" here. When kmemcheck is disabled, most of the run-
-	time overhead is not incurred, and the kernel will be almost as fast
-	as normal.
-
-  o CONFIG_KMEMCHECK_QUEUE_SIZE
-
-	Select the maximum number of error reports to store in an internal
-	(fixed-size) buffer. Since errors can occur virtually anywhere and in
-	any context, we need a temporary storage area which is guaranteed not
-	to generate any other page faults when accessed. The queue will be
-	emptied as soon as a tasklet may be scheduled. If the queue is full,
-	new error reports will be lost.
-
-	The default value of 64 is probably fine. If some code produces more
-	than 64 errors within an irqs-off section, then the code is likely to
-	produce many, many more, too, and these additional reports seldom give
-	any more information (the first report is usually the most valuable
-	anyway).
-
-	This number might have to be adjusted if you are not using serial
-	console or similar to capture the kernel log. If you are using the
-	"dmesg" command to save the log, then getting a lot of kmemcheck
-	warnings might overflow the kernel log itself, and the earlier reports
-	will get lost in that way instead. Try setting this to 10 or so on
-	such a setup.
-
-  o CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT
-
-	Select the number of shadow bytes to save along with each entry of the
-	error-report queue. These bytes indicate what parts of an allocation
-	are initialized, uninitialized, etc. and will be displayed when an
-	error is detected to help the debugging of a particular problem.
-
-	The number entered here is actually the logarithm of the number of
-	bytes that will be saved. So if you pick for example 5 here, kmemcheck
-	will save 2^5 = 32 bytes.
-
-	The default value should be fine for debugging most problems. It also
-	fits nicely within 80 columns.
-
-  o CONFIG_KMEMCHECK_PARTIAL_OK
-
-	This option (when enabled) works around certain GCC optimizations that
-	produce 32-bit reads from 16-bit variables where the upper 16 bits are
-	thrown away afterwards.
-
-	The default value (enabled) is recommended. This may of course hide
-	some real errors, but disabling it would probably produce a lot of
-	false positives.
-
-  o CONFIG_KMEMCHECK_BITOPS_OK
-
-	This option silences warnings that would be generated for bit-field
-	accesses where not all the bits are initialized at the same time. This
-	may also hide some real bugs.
-
-	This option is probably obsolete, or it should be replaced with
-	the kmemcheck-/bitfield-annotations for the code in question. The
-	default value is therefore fine.
-
-Now compile the kernel as usual.
-
-
-3. How to use
-=============
-
-3.1. Booting
-============
-
-First some information about the command-line options. There is only one
-option specific to kmemcheck, and this is called "kmemcheck". It can be used
-to override the default mode as chosen by the CONFIG_KMEMCHECK_*_BY_DEFAULT
-option. Its possible settings are:
-
-  o kmemcheck=0 (disabled)
-  o kmemcheck=1 (enabled)
-  o kmemcheck=2 (one-shot mode)
-
-If SLUB debugging has been enabled in the kernel, it may take precedence over
-kmemcheck in such a way that the slab caches which are under SLUB debugging
-will not be tracked by kmemcheck. In order to ensure that this doesn't happen
-(even though it shouldn't by default), use SLUB's boot option "slub_debug",
-like this: slub_debug=-
-
-In fact, this option may also be used for fine-grained control over SLUB vs.
-kmemcheck. For example, if the command line includes "kmemcheck=1
-slub_debug=,dentry", then SLUB debugging will be used only for the "dentry"
-slab cache, and with kmemcheck tracking all the other caches. This is advanced
-usage, however, and is not generally recommended.
-
-
-3.2. Run-time enable/disable
-============================
-
-When the kernel has booted, it is possible to enable or disable kmemcheck at
-run-time. WARNING: This feature is still experimental and may cause false
-positive warnings to appear. Therefore, try not to use this. If you find that
-it doesn't work properly (e.g. you see an unreasonable amount of warnings), I
-will be happy to take bug reports.
-
-Use the file /proc/sys/kernel/kmemcheck for this purpose, e.g.:
-
-	$ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck
-
-The numbers are the same as for the kmemcheck= command-line option.
-
-
-3.3. Debugging
-==============
-
-A typical report will look something like this:
-
-WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
-80000000000000000000000000000000000000000088ffff0000000000000000
- i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
-         ^
-
-Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A
-RIP: 0010:[<ffffffff8104ede8>]  [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
-RSP: 0018:ffff88003cdf7d98  EFLAGS: 00210002
-RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
-RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84
-RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000
-R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e
-R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8
-FS:  0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000
-CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
-CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0
-DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
-DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400
- [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
- [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
- [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
- [<ffffffff8100c7b5>] int_signal+0x12/0x17
- [<ffffffffffffffff>] 0xffffffffffffffff
-
-The single most valuable information in this report is the RIP (or EIP on 32-
-bit) value. This will help us pinpoint exactly which instruction that caused
-the warning.
-
-If your kernel was compiled with CONFIG_DEBUG_INFO=y, then all we have to do
-is give this address to the addr2line program, like this:
-
-	$ addr2line -e vmlinux -i ffffffff8104ede8
-	arch/x86/include/asm/string_64.h:12
-	include/asm-generic/siginfo.h:287
-	kernel/signal.c:380
-	kernel/signal.c:410
-
-The "-e vmlinux" tells addr2line which file to look in. IMPORTANT: This must
-be the vmlinux of the kernel that produced the warning in the first place! If
-not, the line number information will almost certainly be wrong.
-
-The "-i" tells addr2line to also print the line numbers of inlined functions.
-In this case, the flag was very important, because otherwise, it would only
-have printed the first line, which is just a call to memcpy(), which could be
-called from a thousand places in the kernel, and is therefore not very useful.
-These inlined functions would not show up in the stack trace above, simply
-because the kernel doesn't load the extra debugging information. This
-technique can of course be used with ordinary kernel oopses as well.
-
-In this case, it's the caller of memcpy() that is interesting, and it can be
-found in include/asm-generic/siginfo.h, line 287:
-
-281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
-282 {
-283         if (from->si_code < 0)
-284                 memcpy(to, from, sizeof(*to));
-285         else
-286                 /* _sigchld is currently the largest know union member */
-287                 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
-288 }
-
-Since this was a read (kmemcheck usually warns about reads only, though it can
-warn about writes to unallocated or freed memory as well), it was probably the
-"from" argument which contained some uninitialized bytes. Following the chain
-of calls, we move upwards to see where "from" was allocated or initialized,
-kernel/signal.c, line 380:
-
-359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
-360 {
-...
-367         list_for_each_entry(q, &list->list, list) {
-368                 if (q->info.si_signo == sig) {
-369                         if (first)
-370                                 goto still_pending;
-371                         first = q;
-...
-377         if (first) {
-378 still_pending:
-379                 list_del_init(&first->list);
-380                 copy_siginfo(info, &first->info);
-381                 __sigqueue_free(first);
-...
-392         }
-393 }
-
-Here, it is &first->info that is being passed on to copy_siginfo(). The
-variable "first" was found on a list -- passed in as the second argument to
-collect_signal(). We  continue our journey through the stack, to figure out
-where the item on "list" was allocated or initialized. We move to line 410:
-
-395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
-396                         siginfo_t *info)
-397 {
-...
-410                 collect_signal(sig, pending, info);
-...
-414 }
-
-Now we need to follow the "pending" pointer, since that is being passed on to
-collect_signal() as "list". At this point, we've run out of lines from the
-"addr2line" output. Not to worry, we just paste the next addresses from the
-kmemcheck stack dump, i.e.:
-
- [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
- [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
- [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
- [<ffffffff8100c7b5>] int_signal+0x12/0x17
-
-	$ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \
-		ffffffff8100b87d ffffffff8100c7b5
-	kernel/signal.c:446
-	kernel/signal.c:1806
-	arch/x86/kernel/signal.c:805
-	arch/x86/kernel/signal.c:871
-	arch/x86/kernel/entry_64.S:694
-
-Remember that since these addresses were found on the stack and not as the
-RIP value, they actually point to the _next_ instruction (they are return
-addresses). This becomes obvious when we look at the code for line 446:
-
-422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
-423 {
-...
-431                 signr = __dequeue_signal(&tsk->signal->shared_pending,
-432                                          mask, info);
-433                 /*
-434                  * itimer signal ?
-435                  *
-436                  * itimers are process shared and we restart periodic
-437                  * itimers in the signal delivery path to prevent DoS
-438                  * attacks in the high resolution timer case. This is
-439                  * compliant with the old way of self restarting
-440                  * itimers, as the SIGALRM is a legacy signal and only
-441                  * queued once. Changing the restart behaviour to
-442                  * restart the timer in the signal dequeue path is
-443                  * reducing the timer noise on heavy loaded !highres
-444                  * systems too.
-445                  */
-446                 if (unlikely(signr == SIGALRM)) {
-...
-489 }
-
-So instead of looking at 446, we should be looking at 431, which is the line
-that executes just before 446. Here we see that what we are looking for is
-&tsk->signal->shared_pending.
-
-Our next task is now to figure out which function that puts items on this
-"shared_pending" list. A crude, but efficient tool, is git grep:
-
-	$ git grep -n 'shared_pending' kernel/
-	...
-	kernel/signal.c:828:    pending = group ? &t->signal->shared_pending : &t->pending;
-	kernel/signal.c:1339:   pending = group ? &t->signal->shared_pending : &t->pending;
-	...
-
-There were more results, but none of them were related to list operations,
-and these were the only assignments. We inspect the line numbers more closely
-and find that this is indeed where items are being added to the list:
-
-816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
-817                         int group)
-818 {
-...
-828         pending = group ? &t->signal->shared_pending : &t->pending;
-...
-851         q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
-852                                              (is_si_special(info) ||
-853                                               info->si_code >= 0)));
-854         if (q) {
-855                 list_add_tail(&q->list, &pending->list);
-...
-890 }
-
-and:
-
-1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
-1310 {
-....
-1339         pending = group ? &t->signal->shared_pending : &t->pending;
-1340         list_add_tail(&q->list, &pending->list);
-....
-1347 }
-
-In the first case, the list element we are looking for, "q", is being returned
-from the function __sigqueue_alloc(), which looks like an allocation function.
-Let's take a look at it:
-
-187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
-188                                          int override_rlimit)
-189 {
-190         struct sigqueue *q = NULL;
-191         struct user_struct *user;
-192 
-193         /*
-194          * We won't get problems with the target's UID changing under us
-195          * because changing it requires RCU be used, and if t != current, the
-196          * caller must be holding the RCU readlock (by way of a spinlock) and
-197          * we use RCU protection here
-198          */
-199         user = get_uid(__task_cred(t)->user);
-200         atomic_inc(&user->sigpending);
-201         if (override_rlimit ||
-202             atomic_read(&user->sigpending) <=
-203                         t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
-204                 q = kmem_cache_alloc(sigqueue_cachep, flags);
-205         if (unlikely(q == NULL)) {
-206                 atomic_dec(&user->sigpending);
-207                 free_uid(user);
-208         } else {
-209                 INIT_LIST_HEAD(&q->list);
-210                 q->flags = 0;
-211                 q->user = user;
-212         }
-213 
-214         return q;
-215 }
-
-We see that this function initializes q->list, q->flags, and q->user. It seems
-that now is the time to look at the definition of "struct sigqueue", e.g.:
-
-14 struct sigqueue {
-15         struct list_head list;
-16         int flags;
-17         siginfo_t info;
-18         struct user_struct *user;
-19 };
-
-And, you might remember, it was a memcpy() on &first->info that caused the
-warning, so this makes perfect sense. It also seems reasonable to assume that
-it is the caller of __sigqueue_alloc() that has the responsibility of filling
-out (initializing) this member.
-
-But just which fields of the struct were uninitialized? Let's look at
-kmemcheck's report again:
-
-WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
-80000000000000000000000000000000000000000088ffff0000000000000000
- i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
-         ^
-
-These first two lines are the memory dump of the memory object itself, and the
-shadow bytemap, respectively. The memory object itself is in this case
-&first->info. Just beware that the start of this dump is NOT the start of the
-object itself! The position of the caret (^) corresponds with the address of
-the read (ffff88003e4a2024).
-
-The shadow bytemap dump legend is as follows:
-
-  i - initialized
-  u - uninitialized
-  a - unallocated (memory has been allocated by the slab layer, but has not
-      yet been handed off to anybody)
-  f - freed (memory has been allocated by the slab layer, but has been freed
-      by the previous owner)
-
-In order to figure out where (relative to the start of the object) the
-uninitialized memory was located, we have to look at the disassembly. For
-that, we'll need the RIP address again:
-
-RIP: 0010:[<ffffffff8104ede8>]  [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
-
-	$ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8:
-	ffffffff8104edc8:       mov    %r8,0x8(%r8)
-	ffffffff8104edcc:       test   %r10d,%r10d
-	ffffffff8104edcf:       js     ffffffff8104ee88 <__dequeue_signal+0x168>
-	ffffffff8104edd5:       mov    %rax,%rdx
-	ffffffff8104edd8:       mov    $0xc,%ecx
-	ffffffff8104eddd:       mov    %r13,%rdi
-	ffffffff8104ede0:       mov    $0x30,%eax
-	ffffffff8104ede5:       mov    %rdx,%rsi
-	ffffffff8104ede8:       rep movsl %ds:(%rsi),%es:(%rdi)
-	ffffffff8104edea:       test   $0x2,%al
-	ffffffff8104edec:       je     ffffffff8104edf0 <__dequeue_signal+0xd0>
-	ffffffff8104edee:       movsw  %ds:(%rsi),%es:(%rdi)
-	ffffffff8104edf0:       test   $0x1,%al
-	ffffffff8104edf2:       je     ffffffff8104edf5 <__dequeue_signal+0xd5>
-	ffffffff8104edf4:       movsb  %ds:(%rsi),%es:(%rdi)
-	ffffffff8104edf5:       mov    %r8,%rdi
-	ffffffff8104edf8:       callq  ffffffff8104de60 <__sigqueue_free>
-
-As expected, it's the "rep movsl" instruction from the memcpy() that causes
-the warning. We know about REP MOVSL that it uses the register RCX to count
-the number of remaining iterations. By taking a look at the register dump
-again (from the kmemcheck report), we can figure out how many bytes were left
-to copy:
-
-RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
-
-By looking at the disassembly, we also see that %ecx is being loaded with the
-value $0xc just before (ffffffff8104edd8), so we are very lucky. Keep in mind
-that this is the number of iterations, not bytes. And since this is a "long"
-operation, we need to multiply by 4 to get the number of bytes. So this means
-that the uninitialized value was encountered at 4 * (0xc - 0x9) = 12 bytes
-from the start of the object.
-
-We can now try to figure out which field of the "struct siginfo" that was not
-initialized. This is the beginning of the struct:
-
-40 typedef struct siginfo {
-41         int si_signo;
-42         int si_errno;
-43         int si_code;
-44                 
-45         union {
-..
-92         } _sifields;
-93 } siginfo_t;
-
-On 64-bit, the int is 4 bytes long, so it must the union member that has
-not been initialized. We can verify this using gdb:
-
-	$ gdb vmlinux
-	...
-	(gdb) p &((struct siginfo *) 0)->_sifields
-	$1 = (union {...} *) 0x10
-
-Actually, it seems that the union member is located at offset 0x10 -- which
-means that gcc has inserted 4 bytes of padding between the members si_code
-and _sifields. We can now get a fuller picture of the memory dump:
-
-         _----------------------------=> si_code
-        /        _--------------------=> (padding)
-       |        /        _------------=> _sifields(._kill._pid)
-       |       |        /        _----=> _sifields(._kill._uid)
-       |       |       |        / 
--------|-------|-------|-------|
-80000000000000000000000000000000000000000088ffff0000000000000000
- i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
-
-This allows us to realize another important fact: si_code contains the value
-0x80. Remember that x86 is little endian, so the first 4 bytes "80000000" are
-really the number 0x00000080. With a bit of research, we find that this is
-actually the constant SI_KERNEL defined in include/asm-generic/siginfo.h:
-
-144 #define SI_KERNEL       0x80            /* sent by the kernel from somewhere     */
-
-This macro is used in exactly one place in the x86 kernel: In send_signal()
-in kernel/signal.c:
-
-816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
-817                         int group)
-818 {
-...
-828         pending = group ? &t->signal->shared_pending : &t->pending;
-...
-851         q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
-852                                              (is_si_special(info) ||
-853                                               info->si_code >= 0)));
-854         if (q) {
-855                 list_add_tail(&q->list, &pending->list);
-856                 switch ((unsigned long) info) {
-...
-865                 case (unsigned long) SEND_SIG_PRIV:
-866                         q->info.si_signo = sig;
-867                         q->info.si_errno = 0;
-868                         q->info.si_code = SI_KERNEL;
-869                         q->info.si_pid = 0;
-870                         q->info.si_uid = 0;
-871                         break;
-...
-890 }
-
-Not only does this match with the .si_code member, it also matches the place
-we found earlier when looking for where siginfo_t objects are enqueued on the
-"shared_pending" list.
-
-So to sum up: It seems that it is the padding introduced by the compiler
-between two struct fields that is uninitialized, and this gets reported when
-we do a memcpy() on the struct. This means that we have identified a false
-positive warning.
-
-Normally, kmemcheck will not report uninitialized accesses in memcpy() calls
-when both the source and destination addresses are tracked. (Instead, we copy
-the shadow bytemap as well). In this case, the destination address clearly
-was not tracked. We can dig a little deeper into the stack trace from above:
-
-	arch/x86/kernel/signal.c:805
-	arch/x86/kernel/signal.c:871
-	arch/x86/kernel/entry_64.S:694
-
-And we clearly see that the destination siginfo object is located on the
-stack:
-
-782 static void do_signal(struct pt_regs *regs)
-783 {
-784         struct k_sigaction ka;
-785         siginfo_t info;
-...
-804         signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-...
-854 }
-
-And this &info is what eventually gets passed to copy_siginfo() as the
-destination argument.
-
-Now, even though we didn't find an actual error here, the example is still a
-good one, because it shows how one would go about to find out what the report
-was all about.
-
-
-3.4. Annotating false positives
-===============================
-
-There are a few different ways to make annotations in the source code that
-will keep kmemcheck from checking and reporting certain allocations. Here
-they are:
-
-  o __GFP_NOTRACK_FALSE_POSITIVE
-
-	This flag can be passed to kmalloc() or kmem_cache_alloc() (therefore
-	also to other functions that end up calling one of these) to indicate
-	that the allocation should not be tracked because it would lead to
-	a false positive report. This is a "big hammer" way of silencing
-	kmemcheck; after all, even if the false positive pertains to 
-	particular field in a struct, for example, we will now lose the
-	ability to find (real) errors in other parts of the same struct.
-
-	Example:
-
-	    /* No warnings will ever trigger on accessing any part of x */
-	    x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE);
-
-  o kmemcheck_bitfield_begin(name)/kmemcheck_bitfield_end(name) and
-	kmemcheck_annotate_bitfield(ptr, name)
-
-	The first two of these three macros can be used inside struct
-	definitions to signal, respectively, the beginning and end of a
-	bitfield. Additionally, this will assign the bitfield a name, which
-	is given as an argument to the macros.
-
-	Having used these markers, one can later use
-	kmemcheck_annotate_bitfield() at the point of allocation, to indicate
-	which parts of the allocation is part of a bitfield.
-
-	Example:
-
-	    struct foo {
-		int x;
-
-		kmemcheck_bitfield_begin(flags);
-		int flag_a:1;
-		int flag_b:1;
-		kmemcheck_bitfield_end(flags);
-
-		int y;
-	    };
-
-	    struct foo *x = kmalloc(sizeof *x);
-
-	    /* No warnings will trigger on accessing the bitfield of x */
-	    kmemcheck_annotate_bitfield(x, flags);
-
-	Note that kmemcheck_annotate_bitfield() can be used even before the
-	return value of kmalloc() is checked -- in other words, passing NULL
-	as the first argument is legal (and will do nothing).
-
-
-4. Reporting errors
-===================
-
-As we have seen, kmemcheck will produce false positive reports. Therefore, it
-is not very wise to blindly post kmemcheck warnings to mailing lists and
-maintainers. Instead, I encourage maintainers and developers to find errors
-in their own code. If you get a warning, you can try to work around it, try
-to figure out if it's a real error or not, or simply ignore it. Most
-developers know their own code and will quickly and efficiently determine the
-root cause of a kmemcheck report. This is therefore also the most efficient
-way to work with kmemcheck.
-
-That said, we (the kmemcheck maintainers) will always be on the lookout for
-false positives that we can annotate and silence. So whatever you find,
-please drop us a note privately! Kernel configs and steps to reproduce (if
-available) are of course a great help too.
-
-Happy hacking!
-
-
-5. Technical description
-========================
-
-kmemcheck works by marking memory pages non-present. This means that whenever
-somebody attempts to access the page, a page fault is generated. The page
-fault handler notices that the page was in fact only hidden, and so it calls
-on the kmemcheck code to make further investigations.
-
-When the investigations are completed, kmemcheck "shows" the page by marking
-it present (as it would be under normal circumstances). This way, the
-interrupted code can continue as usual.
-
-But after the instruction has been executed, we should hide the page again, so
-that we can catch the next access too! Now kmemcheck makes use of a debugging
-feature of the processor, namely single-stepping. When the processor has
-finished the one instruction that generated the memory access, a debug
-exception is raised. From here, we simply hide the page again and continue
-execution, this time with the single-stepping feature turned off.
-
-kmemcheck requires some assistance from the memory allocator in order to work.
-The memory allocator needs to
-
-  1. Tell kmemcheck about newly allocated pages and pages that are about to
-     be freed. This allows kmemcheck to set up and tear down the shadow memory
-     for the pages in question. The shadow memory stores the status of each
-     byte in the allocation proper, e.g. whether it is initialized or
-     uninitialized.
-
-  2. Tell kmemcheck which parts of memory should be marked uninitialized.
-     There are actually a few more states, such as "not yet allocated" and
-     "recently freed".
-
-If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
-memory that can take page faults because of kmemcheck.
-
-If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
-request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags.
-This does not prevent the page faults from occurring, however, but marks the
-object in question as being initialized so that no warnings will ever be
-produced for this object.
-
-Currently, the SLAB and SLUB allocators are supported by kmemcheck.
diff --git a/Documentation/ko_KR/memory-barriers.txt b/Documentation/ko_KR/memory-barriers.txt
new file mode 100644
index 0000000..34d3d38
--- /dev/null
+++ b/Documentation/ko_KR/memory-barriers.txt
@@ -0,0 +1,3135 @@
+NOTE:
+This is a version of Documentation/memory-barriers.txt translated into Korean.
+This document is maintained by SeongJae Park <sj38.park@gmail.com>.
+If you find any difference between this document and the original file or
+a problem with the translation, please contact the maintainer of this file.
+
+Please also note that the purpose of this file is to be easier to
+read for non English (read: Korean) speakers and is not intended as
+a fork.  So if you have any comments or updates for this file please
+update the original English file first.  The English version is
+definitive, and readers should look there if they have any doubt.
+
+===================================
+이 문서는
+Documentation/memory-barriers.txt
+의 한글 번역입니다.
+
+역자: 박성재 <sj38.park@gmail.com>
+===================================
+
+
+			 =========================
+			 리눅스 커널 메모리 배리어
+			 =========================
+
+저자: David Howells <dhowells@redhat.com>
+      Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+      Will Deacon <will.deacon@arm.com>
+      Peter Zijlstra <peterz@infradead.org>
+
+========
+면책조항
+========
+
+이 문서는 명세서가 아닙니다; 이 문서는 완벽하지 않은데, 간결성을 위해 의도된
+부분도 있고, 의도하진 않았지만 사람에 의해 쓰였다보니 불완전한 부분도 있습니다.
+이 문서는 리눅스에서 제공하는 다양한 메모리 배리어들을 사용하기 위한
+안내서입니다만, 뭔가 이상하다 싶으면 (그런게 많을 겁니다) 질문을 부탁드립니다.
+
+다시 말하지만, 이 문서는 리눅스가 하드웨어에 기대하는 사항에 대한 명세서가
+아닙니다.
+
+이 문서의 목적은 두가지입니다:
+
+ (1) 어떤 특정 배리어에 대해 기대할 수 있는 최소한의 기능을 명세하기 위해서,
+     그리고
+
+ (2) 사용 가능한 배리어들에 대해 어떻게 사용해야 하는지에 대한 안내를 제공하기
+     위해서.
+
+어떤 아키텍쳐는 특정한 배리어들에 대해서는 여기서 이야기하는 최소한의
+요구사항들보다 많은 기능을 제공할 수도 있습니다만, 여기서 이야기하는
+요구사항들을 충족하지 않는 아키텍쳐가 있다면 그 아키텍쳐가 잘못된 것이란 점을
+알아두시기 바랍니다.
+
+또한, 특정 아키텍쳐에서 일부 배리어는 해당 아키텍쳐의 특수한 동작 방식으로 인해
+해당 배리어의 명시적 사용이 불필요해서 no-op 이 될수도 있음을 알아두시기
+바랍니다.
+
+역자: 본 번역 역시 완벽하지 않은데, 이 역시 부분적으로는 의도된 것이기도
+합니다.  여타 기술 문서들이 그렇듯 완벽한 이해를 위해서는 번역문과 원문을 함께
+읽으시되 번역문을 하나의 가이드로 활용하시길 추천드리며, 발견되는 오역 등에
+대해서는 언제든 의견을 부탁드립니다.  과한 번역으로 인한 오해를 최소화하기 위해
+애매한 부분이 있을 경우에는 어색함이 있더라도 원래의 용어를 차용합니다.
+
+
+=====
+목차:
+=====
+
+ (*) 추상 메모리 액세스 모델.
+
+     - 디바이스 오퍼레이션.
+     - 보장사항.
+
+ (*) 메모리 배리어란 무엇인가?
+
+     - 메모리 배리어의 종류.
+     - 메모리 배리어에 대해 가정해선 안될 것.
+     - 데이터 의존성 배리어.
+     - 컨트롤 의존성.
+     - SMP 배리어 짝맞추기.
+     - 메모리 배리어 시퀀스의 예.
+     - 읽기 메모리 배리어 vs 로드 예측.
+     - 이행성
+
+ (*) 명시적 커널 배리어.
+
+     - 컴파일러 배리어.
+     - CPU 메모리 배리어.
+     - MMIO 쓰기 배리어.
+
+ (*) 암묵적 커널 메모리 배리어.
+
+     - 락 Acquisition 함수.
+     - 인터럽트 비활성화 함수.
+     - 슬립과 웨이크업 함수.
+     - 그외의 함수들.
+
+ (*) CPU 간 ACQUIRING 배리어의 효과.
+
+     - Acquire vs 메모리 액세스.
+     - Acquire vs I/O 액세스.
+
+ (*) 메모리 배리어가 필요한 곳
+
+     - 프로세서간 상호 작용.
+     - 어토믹 오퍼레이션.
+     - 디바이스 액세스.
+     - 인터럽트.
+
+ (*) 커널 I/O 배리어의 효과.
+
+ (*) 가정되는 가장 완화된 실행 순서 모델.
+
+ (*) CPU 캐시의 영향.
+
+     - 캐시 일관성.
+     - 캐시 일관성 vs DMA.
+     - 캐시 일관성 vs MMIO.
+
+ (*) CPU 들이 저지르는 일들.
+
+     - 그리고, Alpha 가 있다.
+     - 가상 머신 게스트.
+
+ (*) 사용 예.
+
+     - 순환식 버퍼.
+
+ (*) 참고 문헌.
+
+
+=======================
+추상 메모리 액세스 모델
+=======================
+
+다음과 같이 추상화된 시스템 모델을 생각해 봅시다:
+
+		            :                :
+		            :                :
+		            :                :
+		+-------+   :   +--------+   :   +-------+
+		|       |   :   |        |   :   |       |
+		|       |   :   |        |   :   |       |
+		| CPU 1 |<----->| Memory |<----->| CPU 2 |
+		|       |   :   |        |   :   |       |
+		|       |   :   |        |   :   |       |
+		+-------+   :   +--------+   :   +-------+
+		    ^       :       ^        :       ^
+		    |       :       |        :       |
+		    |       :       |        :       |
+		    |       :       v        :       |
+		    |       :   +--------+   :       |
+		    |       :   |        |   :       |
+		    |       :   |        |   :       |
+		    +---------->| Device |<----------+
+		            :   |        |   :
+		            :   |        |   :
+		            :   +--------+   :
+		            :                :
+
+프로그램은 여러 메모리 액세스 오퍼레이션을 발생시키고, 각각의 CPU 는 그런
+프로그램들을 실행합니다.  추상화된 CPU 모델에서 메모리 오퍼레이션들의 순서는
+매우 완화되어 있고, CPU 는 프로그램이 인과관계를 어기지 않는 상태로 관리된다고
+보일 수만 있다면 메모리 오퍼레이션을 자신이 원하는 어떤 순서대로든 재배치해
+동작시킬 수 있습니다.  비슷하게, 컴파일러 또한 프로그램의 정상적 동작을 해치지
+않는 한도 내에서는 어떤 순서로든 자신이 원하는 대로 인스트럭션을 재배치 할 수
+있습니다.
+
+따라서 위의 다이어그램에서 한 CPU가 동작시키는 메모리 오퍼레이션이 만들어내는
+변화는 해당 오퍼레이션이 CPU 와 시스템의 다른 부분들 사이의 인터페이스(점선)를
+지나가면서 시스템의 나머지 부분들에 인지됩니다.
+
+
+예를 들어, 다음의 일련의 이벤트들을 생각해 봅시다:
+
+	CPU 1		CPU 2
+	===============	===============
+	{ A == 1; B == 2 }
+	A = 3;		x = B;
+	B = 4;		y = A;
+
+다이어그램의 가운데에 위치한 메모리 시스템에 보여지게 되는 액세스들은 다음의 총
+24개의 조합으로 재구성될 수 있습니다:
+
+	STORE A=3,	STORE B=4,	y=LOAD A->3,	x=LOAD B->4
+	STORE A=3,	STORE B=4,	x=LOAD B->4,	y=LOAD A->3
+	STORE A=3,	y=LOAD A->3,	STORE B=4,	x=LOAD B->4
+	STORE A=3,	y=LOAD A->3,	x=LOAD B->2,	STORE B=4
+	STORE A=3,	x=LOAD B->2,	STORE B=4,	y=LOAD A->3
+	STORE A=3,	x=LOAD B->2,	y=LOAD A->3,	STORE B=4
+	STORE B=4,	STORE A=3,	y=LOAD A->3,	x=LOAD B->4
+	STORE B=4, ...
+	...
+
+따라서 다음의 네가지 조합의 값들이 나올 수 있습니다:
+
+	x == 2, y == 1
+	x == 2, y == 3
+	x == 4, y == 1
+	x == 4, y == 3
+
+
+한발 더 나아가서, 한 CPU 가 메모리 시스템에 반영한 스토어 오퍼레이션들의 결과는
+다른 CPU 에서의 로드 오퍼레이션을 통해 인지되는데, 이 때 스토어가 반영된 순서와
+다른 순서로 인지될 수도 있습니다.
+
+
+예로, 아래의 일련의 이벤트들을 생각해 봅시다:
+
+	CPU 1		CPU 2
+	===============	===============
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
+	B = 4;		Q = P;
+	P = &B		D = *Q;
+
+D 로 읽혀지는 값은 CPU 2 에서 P 로부터 읽혀진 주소값에 의존적이기 때문에 여기엔
+분명한 데이터 의존성이 있습니다.  하지만 이 이벤트들의 실행 결과로는 아래의
+결과들이 모두 나타날 수 있습니다:
+
+	(Q == &A) and (D == 1)
+	(Q == &B) and (D == 2)
+	(Q == &B) and (D == 4)
+
+CPU 2 는 *Q 의 로드를 요청하기 전에 P 를 Q 에 넣기 때문에 D 에 C 를 집어넣는
+일은 없음을 알아두세요.
+
+
+디바이스 오퍼레이션
+-------------------
+
+일부 디바이스는 자신의 컨트롤 인터페이스를 메모리의 특정 영역으로 매핑해서
+제공하는데(Memory mapped I/O), 해당 컨트롤 레지스터에 접근하는 순서는 매우
+중요합니다.  예를 들어, 어드레스 포트 레지스터 (A) 와 데이터 포트 레지스터 (D)
+를 통해 접근되는 내부 레지스터 집합을 갖는 이더넷 카드를 생각해 봅시다.  내부의
+5번 레지스터를 읽기 위해 다음의 코드가 사용될 수 있습니다:
+
+	*A = 5;
+	x = *D;
+
+하지만, 이건 다음의 두 조합 중 하나로 만들어질 수 있습니다:
+
+	STORE *A = 5, x = LOAD *D
+	x = LOAD *D, STORE *A = 5
+
+두번째 조합은 데이터를 읽어온 _후에_ 주소를 설정하므로, 오동작을 일으킬 겁니다.
+
+
+보장사항
+--------
+
+CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니다:
+
+ (*) 어떤 CPU 든, 의존성이 존재하는 메모리 액세스들은 해당 CPU 자신에게
+     있어서는 순서대로 메모리 시스템에 수행 요청됩니다. 즉, 다음에 대해서:
+
+	Q = READ_ONCE(P); smp_read_barrier_depends(); D = READ_ONCE(*Q);
+
+     CPU 는 다음과 같은 메모리 오퍼레이션 시퀀스를 수행 요청합니다:
+
+	Q = LOAD P, D = LOAD *Q
+
+     그리고 그 시퀀스 내에서의 순서는 항상 지켜집니다.  대부분의 시스템에서
+     smp_read_barrier_depends() 는 아무일도 안하지만 DEC Alpha 에서는
+     명시적으로 사용되어야 합니다.  보통의 경우에는 smp_read_barrier_depends()
+     를 직접 사용하는 대신 rcu_dereference() 같은 것들을 사용해야 함을
+     알아두세요.
+
+ (*) 특정 CPU 내에서 겹치는 영역의 메모리에 행해지는 로드와 스토어 들은 해당
+     CPU 안에서는 순서가 바뀌지 않은 것으로 보여집니다.  즉, 다음에 대해서:
+
+	a = READ_ONCE(*X); WRITE_ONCE(*X, b);
+
+     CPU 는 다음의 메모리 오퍼레이션 시퀀스만을 메모리에 요청할 겁니다:
+
+	a = LOAD *X, STORE *X = b
+
+     그리고 다음에 대해서는:
+
+	WRITE_ONCE(*X, c); d = READ_ONCE(*X);
+
+     CPU 는 다음의 수행 요청만을 만들어 냅니다:
+
+	STORE *X = c, d = LOAD *X
+
+     (로드 오퍼레이션과 스토어 오퍼레이션이 겹치는 메모리 영역에 대해
+     수행된다면 해당 오퍼레이션들은 겹친다고 표현됩니다).
+
+그리고 _반드시_ 또는 _절대로_ 가정하거나 가정하지 말아야 하는 것들이 있습니다:
+
+ (*) 컴파일러가 READ_ONCE() 나 WRITE_ONCE() 로 보호되지 않은 메모리 액세스를
+     당신이 원하는 대로 할 것이라는 가정은 _절대로_ 해선 안됩니다.  그것들이
+     없다면, 컴파일러는 컴파일러 배리어 섹션에서 다루게 될, 모든 "창의적인"
+     변경들을 만들어낼 권한을 갖게 됩니다.
+
+ (*) 개별적인 로드와 스토어들이 주어진 순서대로 요청될 것이라는 가정은 _절대로_
+     하지 말아야 합니다.  이 말은 곧:
+
+	X = *A; Y = *B; *D = Z;
+
+     는 다음의 것들 중 어느 것으로든 만들어질 수 있다는 의미입니다:
+
+	X = LOAD *A,  Y = LOAD *B,  STORE *D = Z
+	X = LOAD *A,  STORE *D = Z, Y = LOAD *B
+	Y = LOAD *B,  X = LOAD *A,  STORE *D = Z
+	Y = LOAD *B,  STORE *D = Z, X = LOAD *A
+	STORE *D = Z, X = LOAD *A,  Y = LOAD *B
+	STORE *D = Z, Y = LOAD *B,  X = LOAD *A
+
+ (*) 겹치는 메모리 액세스들은 합쳐지거나 버려질 수 있음을 _반드시_ 가정해야
+     합니다.  다음의 코드는:
+
+	X = *A; Y = *(A + 4);
+
+     다음의 것들 중 뭐든 될 수 있습니다:
+
+	X = LOAD *A; Y = LOAD *(A + 4);
+	Y = LOAD *(A + 4); X = LOAD *A;
+	{X, Y} = LOAD {*A, *(A + 4) };
+
+     그리고:
+
+	*A = X; *(A + 4) = Y;
+
+     는 다음 중 뭐든 될 수 있습니다:
+
+	STORE *A = X; STORE *(A + 4) = Y;
+	STORE *(A + 4) = Y; STORE *A = X;
+	STORE {*A, *(A + 4) } = {X, Y};
+
+그리고 보장사항에 반대되는 것들(anti-guarantees)이 있습니다:
+
+ (*) 이 보장사항들은 bitfield 에는 적용되지 않는데, 컴파일러들은 bitfield 를
+     수정하는 코드를 생성할 때 원자성 없는(non-atomic) 읽고-수정하고-쓰는
+     인스트럭션들의 조합을 만드는 경우가 많기 때문입니다.  병렬 알고리즘의
+     동기화에 bitfield 를 사용하려 하지 마십시오.
+
+ (*) bitfield 들이 여러 락으로 보호되는 경우라 하더라도, 하나의 bitfield 의
+     모든 필드들은 하나의 락으로 보호되어야 합니다.  만약 한 bitfield 의 두
+     필드가 서로 다른 락으로 보호된다면, 컴파일러의 원자성 없는
+     읽고-수정하고-쓰는 인스트럭션 조합은 한 필드에의 업데이트가 근처의
+     필드에도 영향을 끼치게 할 수 있습니다.
+
+ (*) 이 보장사항들은 적절하게 정렬되고 크기가 잡힌 스칼라 변수들에 대해서만
+     적용됩니다.  "적절하게 크기가 잡힌" 이라함은 현재로써는 "char", "short",
+     "int" 그리고 "long" 과 같은 크기의 변수들을 의미합니다.  "적절하게 정렬된"
+     은 자연스런 정렬을 의미하는데, 따라서 "char" 에 대해서는 아무 제약이 없고,
+     "short" 에 대해서는 2바이트 정렬을, "int" 에는 4바이트 정렬을, 그리고
+     "long" 에 대해서는 32-bit 시스템인지 64-bit 시스템인지에 따라 4바이트 또는
+     8바이트 정렬을 의미합니다.  이 보장사항들은 C11 표준에서 소개되었으므로,
+     C11 전의 오래된 컴파일러(예를 들어, gcc 4.6) 를 사용할 때엔 주의하시기
+     바랍니다.  표준에 이 보장사항들은 "memory location" 을 정의하는 3.14
+     섹션에 다음과 같이 설명되어 있습니다:
+     (역자: 인용문이므로 번역하지 않습니다)
+
+	memory location
+		either an object of scalar type, or a maximal sequence
+		of adjacent bit-fields all having nonzero width
+
+		NOTE 1: Two threads of execution can update and access
+		separate memory locations without interfering with
+		each other.
+
+		NOTE 2: A bit-field and an adjacent non-bit-field member
+		are in separate memory locations. The same applies
+		to two bit-fields, if one is declared inside a nested
+		structure declaration and the other is not, or if the two
+		are separated by a zero-length bit-field declaration,
+		or if they are separated by a non-bit-field member
+		declaration. It is not safe to concurrently update two
+		bit-fields in the same structure if all members declared
+		between them are also bit-fields, no matter what the
+		sizes of those intervening bit-fields happen to be.
+
+
+=========================
+메모리 배리어란 무엇인가?
+=========================
+
+앞에서 봤듯이, 상호간 의존성이 없는 메모리 오퍼레이션들은 실제로는 무작위적
+순서로 수행될 수 있으며, 이는 CPU 와 CPU 간의 상호작용이나 I/O 에 문제가 될 수
+있습니다.  따라서 컴파일러와 CPU 가 순서를 바꾸는데 제약을 걸 수 있도록 개입할
+수 있는 어떤 방법이 필요합니다.
+
+메모리 배리어는 그런 개입 수단입니다.  메모리 배리어는 배리어를 사이에 둔 앞과
+뒤 양측의 메모리 오퍼레이션들 간에 부분적 순서가 존재하도록 하는 효과를 줍니다.
+
+시스템의 CPU 들과 여러 디바이스들은 성능을 올리기 위해 명령어 재배치, 실행
+유예, 메모리 오퍼레이션들의 조합, 예측적 로드(speculative load), 브랜치
+예측(speculative branch prediction), 다양한 종류의 캐싱(caching) 등의 다양한
+트릭을 사용할 수 있기 때문에 이런 강제력은 중요합니다.  메모리 배리어들은 이런
+트릭들을 무효로 하거나 억제하는 목적으로 사용되어져서 코드가 여러 CPU 와
+디바이스들 간의 상호작용을 정상적으로 제어할 수 있게 해줍니다.
+
+
+메모리 배리어의 종류
+--------------------
+
+메모리 배리어는 네개의 기본 타입으로 분류됩니다:
+
+ (1) 쓰기 (또는 스토어) 메모리 배리어.
+
+     쓰기 메모리 배리어는 시스템의 다른 컴포넌트들에 해당 배리어보다 앞서
+     명시된 모든 STORE 오퍼레이션들이 해당 배리어 뒤에 명시된 모든 STORE
+     오퍼레이션들보다 먼저 수행된 것으로 보일 것을 보장합니다.
+
+     쓰기 배리어는 스토어 오퍼레이션들에 대한 부분적 순서 세우기입니다; 로드
+     오퍼레이션들에 대해서는 어떤 영향도 끼치지 않습니다.
+
+     CPU 는 시간의 흐름에 따라 메모리 시스템에 일련의 스토어 오퍼레이션들을
+     하나씩 요청해 집어넣습니다.  쓰기 배리어 앞의 모든 스토어 오퍼레이션들은
+     쓰기 배리어 뒤의 모든 스토어 오퍼레이션들보다 _앞서_ 수행될 겁니다.
+
+     [!] 쓰기 배리어들은 읽기 또는 데이터 의존성 배리어와 함께 짝을 맞춰
+     사용되어야만 함을 알아두세요; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
+
+
+ (2) 데이터 의존성 배리어.
+
+     데이터 의존성 배리어는 읽기 배리어의 보다 완화된 형태입니다.  두개의 로드
+     오퍼레이션이 있고 두번째 것이 첫번째 것의 결과에 의존하고 있을 때(예:
+     두번째 로드가 참조할 주소를 첫번째 로드가 읽는 경우), 두번째 로드가 읽어올
+     데이터는 첫번째 로드에 의해 그 주소가 얻어지기 전에 업데이트 되어 있음을
+     보장하기 위해서 데이터 의존성 배리어가 필요할 수 있습니다.
+
+     데이터 의존성 배리어는 상호 의존적인 로드 오퍼레이션들 사이의 부분적 순서
+     세우기입니다; 스토어 오퍼레이션들이나 독립적인 로드들, 또는 중복되는
+     로드들에 대해서는 어떤 영향도 끼치지 않습니다.
+
+     (1) 에서 언급했듯이, 시스템의 CPU 들은 메모리 시스템에 일련의 스토어
+     오퍼레이션들을 던져 넣고 있으며, 거기에 관심이 있는 다른 CPU 는 그
+     오퍼레이션들을 메모리 시스템이 실행한 결과를 인지할 수 있습니다.  이처럼
+     다른 CPU 의 스토어 오퍼레이션의 결과에 관심을 두고 있는 CPU 가 수행 요청한
+     데이터 의존성 배리어는, 배리어 앞의 어떤 로드 오퍼레이션이 다른 CPU 에서
+     던져 넣은 스토어 오퍼레이션과 같은 영역을 향했다면, 그런 스토어
+     오퍼레이션들이 만들어내는 결과가 데이터 의존성 배리어 뒤의 로드
+     오퍼레이션들에게는 보일 것을 보장합니다.
+
+     이 순서 세우기 제약에 대한 그림을 보기 위해선 "메모리 배리어 시퀀스의 예"
+     서브섹션을 참고하시기 바랍니다.
+
+     [!] 첫번째 로드는 반드시 _데이터_ 의존성을 가져야지 컨트롤 의존성을 가져야
+     하는게 아님을 알아두십시오.  만약 두번째 로드를 위한 주소가 첫번째 로드에
+     의존적이지만 그 의존성은 조건적이지 그 주소 자체를 가져오는게 아니라면,
+     그것은 _컨트롤_ 의존성이고, 이 경우에는 읽기 배리어나 그보다 강력한
+     무언가가 필요합니다.  더 자세한 내용을 위해서는 "컨트롤 의존성" 서브섹션을
+     참고하시기 바랍니다.
+
+     [!] 데이터 의존성 배리어는 보통 쓰기 배리어들과 함께 짝을 맞춰 사용되어야
+     합니다; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
+
+
+ (3) 읽기 (또는 로드) 메모리 배리어.
+
+     읽기 배리어는 데이터 의존성 배리어 기능의 보장사항에 더해서 배리어보다
+     앞서 명시된 모든 LOAD 오퍼레이션들이 배리어 뒤에 명시되는 모든 LOAD
+     오퍼레이션들보다 먼저 행해진 것으로 시스템의 다른 컴포넌트들에 보여질 것을
+     보장합니다.
+
+     읽기 배리어는 로드 오퍼레이션에 행해지는 부분적 순서 세우기입니다; 스토어
+     오퍼레이션에 대해서는 어떤 영향도 끼치지 않습니다.
+
+     읽기 메모리 배리어는 데이터 의존성 배리어를 내장하므로 데이터 의존성
+     배리어를 대신할 수 있습니다.
+
+     [!] 읽기 배리어는 일반적으로 쓰기 배리어들과 함께 짝을 맞춰 사용되어야
+     합니다; "SMP 배리어 짝맞추기" 서브섹션을 참고하세요.
+
+
+ (4) 범용 메모리 배리어.
+
+     범용(general) 메모리 배리어는 배리어보다 앞서 명시된 모든 LOAD 와 STORE
+     오퍼레이션들이 배리어 뒤에 명시된 모든 LOAD 와 STORE 오퍼레이션들보다
+     먼저 수행된 것으로 시스템의 나머지 컴포넌트들에 보이게 됨을 보장합니다.
+
+     범용 메모리 배리어는 로드와 스토어 모두에 대한 부분적 순서 세우기입니다.
+
+     범용 메모리 배리어는 읽기 메모리 배리어, 쓰기 메모리 배리어 모두를
+     내장하므로, 두 배리어를 모두 대신할 수 있습니다.
+
+
+그리고 두개의 명시적이지 않은 타입이 있습니다:
+
+ (5) ACQUIRE 오퍼레이션.
+
+     이 타입의 오퍼레이션은 단방향의 투과성 배리어처럼 동작합니다.  ACQUIRE
+     오퍼레이션 뒤의 모든 메모리 오퍼레이션들이 ACQUIRE 오퍼레이션 후에
+     일어난 것으로 시스템의 나머지 컴포넌트들에 보이게 될 것이 보장됩니다.
+     LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_acquire() 오퍼레이션도
+     ACQUIRE 오퍼레이션에 포함됩니다.  smp_cond_acquire() 오퍼레이션은 컨트롤
+     의존성과 smp_rmb() 를 사용해서 ACQUIRE 의 의미적 요구사항(semantic)을
+     충족시킵니다.
+
+     ACQUIRE 오퍼레이션 앞의 메모리 오퍼레이션들은 ACQUIRE 오퍼레이션 완료 후에
+     수행된 것처럼 보일 수 있습니다.
+
+     ACQUIRE 오퍼레이션은 거의 항상 RELEASE 오퍼레이션과 짝을 지어 사용되어야
+     합니다.
+
+
+ (6) RELEASE 오퍼레이션.
+
+     이 타입의 오퍼레이션들도 단방향 투과성 배리어처럼 동작합니다.  RELEASE
+     오퍼레이션 앞의 모든 메모리 오퍼레이션들은 RELEASE 오퍼레이션 전에 완료된
+     것으로 시스템의 다른 컴포넌트들에 보여질 것이 보장됩니다.  UNLOCK 류의
+     오퍼레이션들과 smp_store_release() 오퍼레이션도 RELEASE 오퍼레이션의
+     일종입니다.
+
+     RELEASE 오퍼레이션 뒤의 메모리 오퍼레이션들은 RELEASE 오퍼레이션이
+     완료되기 전에 행해진 것처럼 보일 수 있습니다.
+
+     ACQUIRE 와 RELEASE 오퍼레이션의 사용은 일반적으로 다른 메모리 배리어의
+     필요성을 없앱니다 (하지만 "MMIO 쓰기 배리어" 서브섹션에서 설명되는 예외를
+     알아두세요).  또한, RELEASE+ACQUIRE 조합은 범용 메모리 배리어처럼 동작할
+     것을 보장하지 -않습니다-.  하지만, 어떤 변수에 대한 RELEASE 오퍼레이션을
+     앞서는 메모리 액세스들의 수행 결과는 이 RELEASE 오퍼레이션을 뒤이어 같은
+     변수에 대해 수행된 ACQUIRE 오퍼레이션을 뒤따르는 메모리 액세스에는 보여질
+     것이 보장됩니다.  다르게 말하자면, 주어진 변수의 크리티컬 섹션에서는, 해당
+     변수에 대한 앞의 크리티컬 섹션에서의 모든 액세스들이 완료되었을 것을
+     보장합니다.
+
+     즉, ACQUIRE 는 최소한의 "취득" 동작처럼, 그리고 RELEASE 는 최소한의 "공개"
+     처럼 동작한다는 의미입니다.
+
+atomic_ops.txt 에서 설명되는 어토믹 오퍼레이션들 중에는 완전히 순서잡힌 것들과
+(배리어를 사용하지 않는) 완화된 순서의 것들 외에 ACQUIRE 와 RELEASE 부류의
+것들도 존재합니다.  로드와 스토어를 모두 수행하는 조합된 어토믹 오퍼레이션에서,
+ACQUIRE 는 해당 오퍼레이션의 로드 부분에만 적용되고 RELEASE 는 해당
+오퍼레이션의 스토어 부분에만 적용됩니다.
+
+메모리 배리어들은 두 CPU 간, 또는 CPU 와 디바이스 간에 상호작용의 가능성이 있을
+때에만 필요합니다.  만약 어떤 코드에 그런 상호작용이 없을 것이 보장된다면, 해당
+코드에서는 메모리 배리어를 사용할 필요가 없습니다.
+
+
+이것들은 _최소한의_ 보장사항들임을 알아두세요.  다른 아키텍쳐에서는 더 강력한
+보장사항을 제공할 수도 있습니다만, 그런 보장사항은 아키텍쳐 종속적 코드 이외의
+부분에서는 신뢰되지 _않을_ 겁니다.
+
+
+메모리 배리어에 대해 가정해선 안될 것
+-------------------------------------
+
+리눅스 커널 메모리 배리어들이 보장하지 않는 것들이 있습니다:
+
+ (*) 메모리 배리어 앞에서 명시된 어떤 메모리 액세스도 메모리 배리어 명령의 수행
+     완료 시점까지 _완료_ 될 것이란 보장은 없습니다; 배리어가 하는 일은 CPU 의
+     액세스 큐에 특정 타입의 액세스들은 넘을 수 없는 선을 긋는 것으로 생각될 수
+     있습니다.
+
+ (*) 한 CPU 에서 메모리 배리어를 수행하는게 시스템의 다른 CPU 나 하드웨어에
+     어떤 직접적인 영향을 끼친다는 보장은 존재하지 않습니다.  배리어 수행이
+     만드는 간접적 영향은 두번째 CPU 가 첫번째 CPU 의 액세스들의 결과를
+     바라보는 순서가 됩니다만, 다음 항목을 보세요:
+
+ (*) 첫번째 CPU 가 두번째 CPU 의 메모리 액세스들의 결과를 바라볼 때, _설령_
+     두번째 CPU 가 메모리 배리어를 사용한다 해도, 첫번째 CPU _또한_ 그에 맞는
+     메모리 배리어를 사용하지 않는다면 ("SMP 배리어 짝맞추기" 서브섹션을
+     참고하세요) 그 결과가 올바른 순서로 보여진다는 보장은 없습니다.
+
+ (*) CPU 바깥의 하드웨어[*] 가 메모리 액세스들의 순서를 바꾸지 않는다는 보장은
+     존재하지 않습니다.  CPU 캐시 일관성 메커니즘은 메모리 배리어의 간접적
+     영향을 CPU 사이에 전파하긴 하지만, 순서대로 전파하지는 않을 수 있습니다.
+
+	[*] 버스 마스터링 DMA 와 일관성에 대해서는 다음을 참고하시기 바랍니다:
+
+	    Documentation/PCI/pci.txt
+	    Documentation/DMA-API-HOWTO.txt
+	    Documentation/DMA-API.txt
+
+
+데이터 의존성 배리어
+--------------------
+
+데이터 의존성 배리어의 사용에 있어 지켜야 하는 사항들은 약간 미묘하고, 데이터
+의존성 배리어가 사용되어야 하는 상황도 항상 명백하지는 않습니다.  설명을 위해
+다음의 이벤트 시퀀스를 생각해 봅시다:
+
+	CPU 1		      CPU 2
+	===============	      ===============
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
+	B = 4;
+	<쓰기 배리어>
+	WRITE_ONCE(P, &B)
+			      Q = READ_ONCE(P);
+			      D = *Q;
+
+여기엔 분명한 데이터 의존성이 존재하므로, 이 시퀀스가 끝났을 때 Q 는 &A 또는 &B
+일 것이고, 따라서:
+
+	(Q == &A) 는 (D == 1) 를,
+	(Q == &B) 는 (D == 4) 를 의미합니다.
+
+하지만!  CPU 2 는 B 의 업데이트를 인식하기 전에 P 의 업데이트를 인식할 수 있고,
+따라서 다음의 결과가 가능합니다:
+
+	(Q == &B) and (D == 2) ????
+
+이런 결과는 일관성이나 인과 관계 유지가 실패한 것처럼 보일 수도 있겠지만,
+그렇지 않습니다, 그리고 이 현상은 (DEC Alpha 와 같은) 여러 CPU 에서 실제로
+발견될 수 있습니다.
+
+이 문제 상황을 제대로 해결하기 위해, 데이터 의존성 배리어나 그보다 강화된
+무언가가 주소를 읽어올 때와 데이터를 읽어올 때 사이에 추가되어야만 합니다:
+
+	CPU 1		      CPU 2
+	===============	      ===============
+	{ A == 1, B == 2, C == 3, P == &A, Q == &C }
+	B = 4;
+	<쓰기 배리어>
+	WRITE_ONCE(P, &B);
+			      Q = READ_ONCE(P);
+			      <데이터 의존성 배리어>
+			      D = *Q;
+
+이 변경은 앞의 처음 두가지 결과 중 하나만이 발생할 수 있고, 세번째의 결과는
+발생할 수 없도록 합니다.
+
+데이터 의존성 배리어는 의존적 쓰기에 대해서도 순서를 잡아줍니다:
+
+	CPU 1		      CPU 2
+	===============	      ===============
+	{ A == 1, B == 2, C = 3, P == &A, Q == &C }
+	B = 4;
+	<쓰기 배리어>
+	WRITE_ONCE(P, &B);
+			      Q = READ_ONCE(P);
+			      <데이터 의존성 배리어>
+			      *Q = 5;
+
+이 데이터 의존성 배리어는 Q 로의 읽기가 *Q 로의 스토어와 순서를 맞추게
+해줍니다.  이는 다음과 같은 결과를 막습니다:
+
+	(Q == &B) && (B == 4)
+
+이런 패턴은 드물게 사용되어야 함을 알아 두시기 바랍니다.  무엇보다도, 의존성
+순서 규칙의 의도는 쓰기 작업을 -예방- 해서 그로 인해 발생하는 비싼 캐시 미스도
+없애려는 것입니다.  이 패턴은 드물게 발생하는 에러 조건 같은것들을 기록하는데
+사용될 수 있고, 이렇게 배리어를 사용해 순서를 지키게 함으로써 그런 기록이
+사라지는 것을 막습니다.
+
+
+[!] 상당히 비직관적인 이 상황은 분리된 캐시를 가진 기계, 예를 들어 한 캐시
+뱅크가 짝수번 캐시 라인을 처리하고 다른 뱅크는 홀수번 캐시 라인을 처리하는 기계
+등에서 가장 잘 발생합니다.  포인터 P 는 홀수 번호의 캐시 라인에 있고, 변수 B 는
+짝수 번호 캐시 라인에 있다고 생각해 봅시다.  그런 상태에서 읽기 작업을 하는 CPU
+의 짝수번 뱅크는 할 일이 쌓여 매우 바쁘지만 홀수번 뱅크는 할 일이 없어 아무
+일도 하지 않고  있었다면, 포인터 P 는 새 값 (&B) 을, 그리고 변수 B 는 옛날 값
+(2) 을 가지고 있는 상태가 보여질 수도 있습니다.
+
+
+데이터 의존성 배리어는 매우 중요한데, 예를 들어 RCU 시스템에서 그렇습니다.
+include/linux/rcupdate.h 의 rcu_assign_pointer() 와 rcu_dereference() 를
+참고하세요.  여기서 데이터 의존성 배리어는 RCU 로 관리되는 포인터의 타겟을 현재
+타겟에서 수정된 새로운 타겟으로 바꾸는 작업에서 새로 수정된 타겟이 초기화가
+완료되지 않은 채로 보여지는 일이 일어나지 않게 해줍니다.
+
+더 많은 예를 위해선 "캐시 일관성" 서브섹션을 참고하세요.
+
+
+컨트롤 의존성
+-------------
+
+로드-로드 컨트롤 의존성은 데이터 의존성 배리어만으로는 정확히 동작할 수가
+없어서 읽기 메모리 배리어를 필요로 합니다.  아래의 코드를 봅시다:
+
+	q = READ_ONCE(a);
+	if (q) {
+		<데이터 의존성 배리어>  /* BUG: No data dependency!!! */
+		p = READ_ONCE(b);
+	}
+
+이 코드는 원하는 대로의 효과를 내지 못할 수 있는데, 이 코드에는 데이터 의존성이
+아니라 컨트롤 의존성이 존재하기 때문으로, 이런 상황에서 CPU 는 실행 속도를 더
+빠르게 하기 위해 분기 조건의 결과를 예측하고 코드를 재배치 할 수 있어서 다른
+CPU 는 b 로부터의 로드 오퍼레이션이 a 로부터의 로드 오퍼레이션보다 먼저 발생한
+걸로 인식할 수 있습니다.  여기에 정말로 필요했던 건 다음과 같습니다:
+
+	q = READ_ONCE(a);
+	if (q) {
+		<읽기 배리어>
+		p = READ_ONCE(b);
+	}
+
+하지만, 스토어 오퍼레이션은 예측적으로 수행되지 않습니다.  즉, 다음 예에서와
+같이 로드-스토어 컨트롤 의존성이 존재하는 경우에는 순서가 -지켜진다-는
+의미입니다.
+
+	q = READ_ONCE(a);
+	if (q) {
+		WRITE_ONCE(b, p);
+	}
+
+컨트롤 의존성은 보통 다른 타입의 배리어들과 짝을 맞춰 사용됩니다.  그렇다곤
+하나, READ_ONCE() 는 반드시 사용해야 함을 부디 명심하세요!  READ_ONCE() 가
+없다면, 컴파일러가 'a' 로부터의 로드를 'a' 로부터의 또다른 로드와, 'b' 로의
+스토어를 'b' 로의 또다른 스토어와 조합해 버려 매우 비직관적인 결과를 초래할 수
+있습니다.
+
+이걸로 끝이 아닌게, 컴파일러가 변수 'a' 의 값이 항상 0이 아니라고 증명할 수
+있다면, 앞의 예에서 "if" 문을 없애서 다음과 같이 최적화 할 수도 있습니다:
+
+	q = a;
+	b = p;  /* BUG: Compiler and CPU can both reorder!!! */
+
+그러니 READ_ONCE() 를 반드시 사용하세요.
+
+다음과 같이 "if" 문의 양갈래 브랜치에 모두 존재하는 동일한 스토어에 대해 순서를
+강제하고 싶은 경우가 있을 수 있습니다:
+
+	q = READ_ONCE(a);
+	if (q) {
+		barrier();
+		WRITE_ONCE(b, p);
+		do_something();
+	} else {
+		barrier();
+		WRITE_ONCE(b, p);
+		do_something_else();
+	}
+
+안타깝게도, 현재의 컴파일러들은 높은 최적화 레벨에서는 이걸 다음과 같이
+바꿔버립니다:
+
+	q = READ_ONCE(a);
+	barrier();
+	WRITE_ONCE(b, p);  /* BUG: No ordering vs. load from a!!! */
+	if (q) {
+		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
+		do_something();
+	} else {
+		/* WRITE_ONCE(b, p); -- moved up, BUG!!! */
+		do_something_else();
+	}
+
+이제 'a' 에서의 로드와 'b' 로의 스토어 사이에는 조건적 관계가 없기 때문에 CPU
+는 이들의 순서를 바꿀 수 있게 됩니다: 이런 경우에 조건적 관계는 반드시
+필요한데, 모든 컴파일러 최적화가 이루어지고 난 후의 어셈블리 코드에서도
+마찬가지입니다.  따라서, 이 예에서 순서를 지키기 위해서는 smp_store_release()
+와 같은 명시적 메모리 배리어가 필요합니다:
+
+	q = READ_ONCE(a);
+	if (q) {
+		smp_store_release(&b, p);
+		do_something();
+	} else {
+		smp_store_release(&b, p);
+		do_something_else();
+	}
+
+반면에 명시적 메모리 배리어가 없다면, 이런 경우의 순서는 스토어 오퍼레이션들이
+서로 다를 때에만 보장되는데, 예를 들면 다음과 같은 경우입니다:
+
+	q = READ_ONCE(a);
+	if (q) {
+		WRITE_ONCE(b, p);
+		do_something();
+	} else {
+		WRITE_ONCE(b, r);
+		do_something_else();
+	}
+
+처음의 READ_ONCE() 는 컴파일러가 'a' 의 값을 증명해내는 것을 막기 위해 여전히
+필요합니다.
+
+또한, 로컬 변수 'q' 를 가지고 하는 일에 대해 주의해야 하는데, 그러지 않으면
+컴파일러는 그 값을 추측하고 또다시 필요한 조건관계를 없애버릴 수 있습니다.
+예를 들면:
+
+	q = READ_ONCE(a);
+	if (q % MAX) {
+		WRITE_ONCE(b, p);
+		do_something();
+	} else {
+		WRITE_ONCE(b, r);
+		do_something_else();
+	}
+
+만약 MAX 가 1 로 정의된 상수라면, 컴파일러는 (q % MAX) 는 0이란 것을 알아채고,
+위의 코드를 아래와 같이 바꿔버릴 수 있습니다:
+
+	q = READ_ONCE(a);
+	WRITE_ONCE(b, p);
+	do_something_else();
+
+이렇게 되면, CPU 는 변수 'a' 로부터의 로드와 변수 'b' 로의 스토어 사이의 순서를
+지켜줄 필요가 없어집니다.  barrier() 를 추가해 해결해 보고 싶겠지만, 그건
+도움이 안됩니다.  조건 관계는 사라졌고, barrier() 는 이를 되돌리지 못합니다.
+따라서, 이 순서를 지켜야 한다면, MAX 가 1 보다 크다는 것을, 다음과 같은 방법을
+사용해 분명히 해야 합니다:
+
+	q = READ_ONCE(a);
+	BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
+	if (q % MAX) {
+		WRITE_ONCE(b, p);
+		do_something();
+	} else {
+		WRITE_ONCE(b, r);
+		do_something_else();
+	}
+
+'b' 로의 스토어들은 여전히 서로 다름을 알아두세요.  만약 그것들이 동일하면,
+앞에서 이야기했듯, 컴파일러가 그 스토어 오퍼레이션들을 'if' 문 바깥으로
+끄집어낼 수 있습니다.
+
+또한 이진 조건문 평가에 너무 의존하지 않도록 조심해야 합니다.  다음의 예를
+봅시다:
+
+	q = READ_ONCE(a);
+	if (q || 1 > 0)
+		WRITE_ONCE(b, 1);
+
+첫번째 조건만으로는 브랜치 조건 전체를 거짓으로 만들 수 없고 두번째 조건은 항상
+참이기 때문에, 컴파일러는 이 예를 다음과 같이 바꿔서 컨트롤 의존성을 없애버릴
+수 있습니다:
+
+	q = READ_ONCE(a);
+	WRITE_ONCE(b, 1);
+
+이 예는 컴파일러가 코드를 추측으로 수정할 수 없도록 분명히 해야 한다는 점을
+강조합니다.  조금 더 일반적으로 말해서, READ_ONCE() 는 컴파일러에게 주어진 로드
+오퍼레이션을 위한 코드를 정말로 만들도록 하지만, 컴파일러가 그렇게 만들어진
+코드의 수행 결과를 사용하도록 강제하지는 않습니다.
+
+마지막으로, 컨트롤 의존성은 이행성 (transitivity) 을 제공하지 -않습니다-.  이건
+x 와 y 가 둘 다 0 이라는 초기값을 가졌다는 가정 하의 두개의 예제로
+보이겠습니다:
+
+	CPU 0                     CPU 1
+	=======================   =======================
+	r1 = READ_ONCE(x);        r2 = READ_ONCE(y);
+	if (r1 > 0)               if (r2 > 0)
+	  WRITE_ONCE(y, 1);         WRITE_ONCE(x, 1);
+
+	assert(!(r1 == 1 && r2 == 1));
+
+이 두 CPU 예제에서 assert() 의 조건은 항상 참일 것입니다.  그리고, 만약 컨트롤
+의존성이 이행성을 (실제로는 그러지 않지만) 보장한다면, 다음의 CPU 가 추가되어도
+아래의 assert() 조건은 참이 될것입니다:
+
+	CPU 2
+	=====================
+	WRITE_ONCE(x, 2);
+
+	assert(!(r1 == 2 && r2 == 1 && x == 2)); /* FAILS!!! */
+
+하지만 컨트롤 의존성은 이행성을 제공하지 -않기- 때문에, 세개의 CPU 예제가 실행
+완료된 후에 위의 assert() 의 조건은 거짓으로 평가될 수 있습니다.  세개의 CPU
+예제가 순서를 지키길 원한다면, CPU 0 와 CPU 1 코드의 로드와 스토어 사이, "if"
+문 바로 다음에 smp_mb()를 넣어야 합니다.  더 나아가서, 최초의 두 CPU 예제는
+매우 위험하므로 사용되지 않아야 합니다.
+
+이 두개의 예제는 다음 논문:
+http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf 와
+이 사이트: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html 에 나온 LB 와 WWC
+리트머스 테스트입니다.
+
+요약하자면:
+
+  (*) 컨트롤 의존성은 앞의 로드들을 뒤의 스토어들에 대해 순서를 맞춰줍니다.
+      하지만, 그 외의 어떤 순서도 보장하지 -않습니다-: 앞의 로드와 뒤의 로드들
+      사이에도, 앞의 스토어와 뒤의 스토어들 사이에도요.  이런 다른 형태의
+      순서가 필요하다면 smp_rmb() 나 smp_wmb()를, 또는, 앞의 스토어들과 뒤의
+      로드들 사이의 순서를 위해서는 smp_mb() 를 사용하세요.
+
+  (*) "if" 문의 양갈래 브랜치가 같은 변수에의 동일한 스토어로 시작한다면, 그
+      스토어들은 각 스토어 앞에 smp_mb() 를 넣거나 smp_store_release() 를
+      사용해서 스토어를 하는 식으로 순서를 맞춰줘야 합니다.  이 문제를 해결하기
+      위해 "if" 문의 양갈래 브랜치의 시작 지점에 barrier() 를 넣는 것만으로는
+      충분한 해결이 되지 않는데, 이는 앞의 예에서 본것과 같이, 컴파일러의
+      최적화는 barrier() 가 의미하는 바를 지키면서도 컨트롤 의존성을 손상시킬
+      수 있기 때문이라는 점을 부디 알아두시기 바랍니다.
+
+  (*) 컨트롤 의존성은 앞의 로드와 뒤의 스토어 사이에 최소 하나의, 실행
+      시점에서의 조건관계를 필요로 하며, 이 조건관계는 앞의 로드와 관계되어야
+      합니다.  만약 컴파일러가 조건 관계를 최적화로 없앨수 있다면, 순서도
+      최적화로 없애버렸을 겁니다.  READ_ONCE() 와 WRITE_ONCE() 의 주의 깊은
+      사용은 주어진 조건 관계를 유지하는데 도움이 될 수 있습니다.
+
+  (*) 컨트롤 의존성을 위해선 컴파일러가 조건관계를 없애버리는 것을 막아야
+      합니다.  주의 깊은 READ_ONCE() 나 atomic{,64}_read() 의 사용이 컨트롤
+      의존성이 사라지지 않게 하는데 도움을 줄 수 있습니다.  더 많은 정보를
+      위해선 "컴파일러 배리어" 섹션을 참고하시기 바랍니다.
+
+  (*) 컨트롤 의존성은 보통 다른 타입의 배리어들과 짝을 맞춰 사용됩니다.
+
+  (*) 컨트롤 의존성은 이행성을 제공하지 -않습니다-.  이행성이 필요하다면,
+      smp_mb() 를 사용하세요.
+
+
+SMP 배리어 짝맞추기
+--------------------
+
+CPU 간 상호작용을 다룰 때에 일부 타입의 메모리 배리어는 항상 짝을 맞춰
+사용되어야 합니다.  적절하게 짝을 맞추지 않은 코드는 사실상 에러에 가깝습니다.
+
+범용 배리어들은 범용 배리어끼리도 짝을 맞추지만 이행성이 없는 대부분의 다른
+타입의 배리어들과도 짝을 맞춥니다.  ACQUIRE 배리어는 RELEASE 배리어와 짝을
+맞춥니다만, 둘 다 범용 배리어를 포함해 다른 배리어들과도 짝을 맞출 수 있습니다.
+쓰기 배리어는 데이터 의존성 배리어나 컨트롤 의존성, ACQUIRE 배리어, RELEASE
+배리어, 읽기 배리어, 또는 범용 배리어와 짝을 맞춥니다.  비슷하게 읽기 배리어나
+컨트롤 의존성, 또는 데이터 의존성 배리어는 쓰기 배리어나 ACQUIRE 배리어,
+RELEASE 배리어, 또는 범용 배리어와 짝을 맞추는데, 다음과 같습니다:
+
+	CPU 1		      CPU 2
+	===============	      ===============
+	WRITE_ONCE(a, 1);
+	<쓰기 배리어>
+	WRITE_ONCE(b, 2);     x = READ_ONCE(b);
+			      <읽기 배리어>
+			      y = READ_ONCE(a);
+
+또는:
+
+	CPU 1		      CPU 2
+	===============	      ===============================
+	a = 1;
+	<쓰기 배리어>
+	WRITE_ONCE(b, &a);    x = READ_ONCE(b);
+			      <데이터 의존성 배리어>
+			      y = *x;
+
+또는:
+
+	CPU 1		      CPU 2
+	===============	      ===============================
+	r1 = READ_ONCE(y);
+	<범용 배리어>
+	WRITE_ONCE(y, 1);     if (r2 = READ_ONCE(x)) {
+			         <묵시적 컨트롤 의존성>
+			         WRITE_ONCE(y, 1);
+			      }
+
+	assert(r1 == 0 || r2 == 0);
+
+기본적으로, 여기서의 읽기 배리어는 "더 완화된" 타입일 순 있어도 항상 존재해야
+합니다.
+
+[!] 쓰기 배리어 앞의 스토어 오퍼레이션은 일반적으로 읽기 배리어나 데이터
+의존성 배리어 뒤의 로드 오퍼레이션과 매치될 것이고, 반대도 마찬가지입니다:
+
+	CPU 1                               CPU 2
+	===================                 ===================
+	WRITE_ONCE(a, 1);    }----   --->{  v = READ_ONCE(c);
+	WRITE_ONCE(b, 2);    }    \ /    {  w = READ_ONCE(d);
+	<쓰기 배리어>              \        <읽기 배리어>
+	WRITE_ONCE(c, 3);    }    / \    {  x = READ_ONCE(a);
+	WRITE_ONCE(d, 4);    }----   --->{  y = READ_ONCE(b);
+
+
+메모리 배리어 시퀀스의 예
+-------------------------
+
+첫째, 쓰기 배리어는 스토어 오퍼레이션들의 부분적 순서 세우기로 동작합니다.
+아래의 이벤트 시퀀스를 보세요:
+
+	CPU 1
+	=======================
+	STORE A = 1
+	STORE B = 2
+	STORE C = 3
+	<쓰기 배리어>
+	STORE D = 4
+	STORE E = 5
+
+이 이벤트 시퀀스는 메모리 일관성 시스템에 원소끼리의 순서가 존재하지 않는 집합
+{ STORE A, STORE B, STORE C } 가 역시 원소끼리의 순서가 존재하지 않는 집합
+{ STORE D, STORE E } 보다 먼저 일어난 것으로 시스템의 나머지 요소들에 보이도록
+전달됩니다:
+
+	+-------+       :      :
+	|       |       +------+
+	|       |------>| C=3  |     }     /\
+	|       |  :    +------+     }-----  \  -----> 시스템의 나머지 요소에
+	|       |  :    | A=1  |     }        \/       보여질 수 있는 이벤트들
+	|       |  :    +------+     }
+	| CPU 1 |  :    | B=2  |     }
+	|       |       +------+     }
+	|       |   wwwwwwwwwwwwwwww }   <--- 여기서 쓰기 배리어는 배리어 앞의
+	|       |       +------+     }        모든 스토어가 배리어 뒤의 스토어
+	|       |  :    | E=5  |     }        전에 메모리 시스템에 전달되도록
+	|       |  :    +------+     }        합니다
+	|       |------>| D=4  |     }
+	|       |       +------+
+	+-------+       :      :
+	                   |
+	                   | CPU 1 에 의해 메모리 시스템에 전달되는
+	                   | 일련의 스토어 오퍼레이션들
+	                   V
+
+
+둘째, 데이터 의존성 배리어는 데이터 의존적 로드 오퍼레이션들의 부분적 순서
+세우기로 동작합니다.  다음 일련의 이벤트들을 보세요:
+
+	CPU 1			CPU 2
+	=======================	=======================
+		{ B = 7; X = 9; Y = 8; C = &Y }
+	STORE A = 1
+	STORE B = 2
+	<쓰기 배리어>
+	STORE C = &B		LOAD X
+	STORE D = 4		LOAD C (gets &B)
+				LOAD *C (reads B)
+
+여기에 별다른 개입이 없다면, CPU 1 의 쓰기 배리어에도 불구하고 CPU 2 는 CPU 1
+의 이벤트들을 완전히 무작위적 순서로 인지하게 됩니다:
+
+	+-------+       :      :                :       :
+	|       |       +------+                +-------+  | CPU 2 에 인지되는
+	|       |------>| B=2  |-----       --->| Y->8  |  | 업데이트 이벤트
+	|       |  :    +------+     \          +-------+  | 시퀀스
+	| CPU 1 |  :    | A=1  |      \     --->| C->&Y |  V
+	|       |       +------+       |        +-------+
+	|       |   wwwwwwwwwwwwwwww   |        :       :
+	|       |       +------+       |        :       :
+	|       |  :    | C=&B |---    |        :       :       +-------+
+	|       |  :    +------+   \   |        +-------+       |       |
+	|       |------>| D=4  |    ----------->| C->&B |------>|       |
+	|       |       +------+       |        +-------+       |       |
+	+-------+       :      :       |        :       :       |       |
+	                               |        :       :       |       |
+	                               |        :       :       | CPU 2 |
+	                               |        +-------+       |       |
+	    분명히 잘못된        --->  |        | B->7  |------>|       |
+	    B 의 값 인지 (!)           |        +-------+       |       |
+	                               |        :       :       |       |
+	                               |        +-------+       |       |
+	    X 의 로드가 B 의    --->    \       | X->9  |------>|       |
+	    일관성 유지를                \      +-------+       |       |
+	    지연시킴                      ----->| B->2  |       +-------+
+	                                        +-------+
+	                                        :       :
+
+
+앞의 예에서, CPU 2 는 (B 의 값이 될) *C 의 값 읽기가 C 의 LOAD 뒤에 이어짐에도
+B 가 7 이라는 결과를 얻습니다.
+
+하지만, 만약 데이터 의존성 배리어가 C 의 로드와 *C (즉, B) 의 로드 사이에
+있었다면:
+
+	CPU 1			CPU 2
+	=======================	=======================
+		{ B = 7; X = 9; Y = 8; C = &Y }
+	STORE A = 1
+	STORE B = 2
+	<쓰기 배리어>
+	STORE C = &B		LOAD X
+	STORE D = 4		LOAD C (gets &B)
+				<데이터 의존성 배리어>
+				LOAD *C (reads B)
+
+다음과 같이 됩니다:
+
+	+-------+       :      :                :       :
+	|       |       +------+                +-------+
+	|       |------>| B=2  |-----       --->| Y->8  |
+	|       |  :    +------+     \          +-------+
+	| CPU 1 |  :    | A=1  |      \     --->| C->&Y |
+	|       |       +------+       |        +-------+
+	|       |   wwwwwwwwwwwwwwww   |        :       :
+	|       |       +------+       |        :       :
+	|       |  :    | C=&B |---    |        :       :       +-------+
+	|       |  :    +------+   \   |        +-------+       |       |
+	|       |------>| D=4  |    ----------->| C->&B |------>|       |
+	|       |       +------+       |        +-------+       |       |
+	+-------+       :      :       |        :       :       |       |
+	                               |        :       :       |       |
+	                               |        :       :       | CPU 2 |
+	                               |        +-------+       |       |
+	                               |        | X->9  |------>|       |
+	                               |        +-------+       |       |
+	  C 로의 스토어 앞의     --->   \   ddddddddddddddddd   |       |
+	  모든 이벤트 결과가             \      +-------+       |       |
+	  뒤의 로드에게                   ----->| B->2  |------>|       |
+	  보이게 강제한다                       +-------+       |       |
+	                                        :       :       +-------+
+
+
+셋째, 읽기 배리어는 로드 오퍼레이션들에의 부분적 순서 세우기로 동작합니다.
+아래의 일련의 이벤트를 봅시다:
+
+	CPU 1			CPU 2
+	=======================	=======================
+		{ A = 0, B = 9 }
+	STORE A=1
+	<쓰기 배리어>
+	STORE B=2
+				LOAD B
+				LOAD A
+
+CPU 1 은 쓰기 배리어를 쳤지만, 별다른 개입이 없다면 CPU 2 는 CPU 1 에서 행해진
+이벤트의 결과를 무작위적 순서로 인지하게 됩니다.
+
+	+-------+       :      :                :       :
+	|       |       +------+                +-------+
+	|       |------>| A=1  |------      --->| A->0  |
+	|       |       +------+      \         +-------+
+	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
+	|       |       +------+        |       +-------+
+	|       |------>| B=2  |---     |       :       :
+	|       |       +------+   \    |       :       :       +-------+
+	+-------+       :      :    \   |       +-------+       |       |
+	                             ---------->| B->2  |------>|       |
+	                                |       +-------+       | CPU 2 |
+	                                |       | A->0  |------>|       |
+	                                |       +-------+       |       |
+	                                |       :       :       +-------+
+	                                 \      :       :
+	                                  \     +-------+
+	                                   ---->| A->1  |
+	                                        +-------+
+	                                        :       :
+
+
+하지만, 만약 읽기 배리어가 B 의 로드와 A 의 로드 사이에 존재한다면:
+
+	CPU 1			CPU 2
+	=======================	=======================
+		{ A = 0, B = 9 }
+	STORE A=1
+	<쓰기 배리어>
+	STORE B=2
+				LOAD B
+				<읽기 배리어>
+				LOAD A
+
+CPU 1 에 의해 만들어진 부분적 순서가 CPU 2 에도 그대로 인지됩니다:
+
+	+-------+       :      :                :       :
+	|       |       +------+                +-------+
+	|       |------>| A=1  |------      --->| A->0  |
+	|       |       +------+      \         +-------+
+	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
+	|       |       +------+        |       +-------+
+	|       |------>| B=2  |---     |       :       :
+	|       |       +------+   \    |       :       :       +-------+
+	+-------+       :      :    \   |       +-------+       |       |
+	                             ---------->| B->2  |------>|       |
+	                                |       +-------+       | CPU 2 |
+	                                |       :       :       |       |
+	                                |       :       :       |       |
+	  여기서 읽기 배리어는   ---->   \  rrrrrrrrrrrrrrrrr   |       |
+	  B 로의 스토어 전의              \     +-------+       |       |
+	  모든 결과를 CPU 2 에             ---->| A->1  |------>|       |
+	  보이도록 한다                         +-------+       |       |
+	                                        :       :       +-------+
+
+
+더 완벽한 설명을 위해, A 의 로드가 읽기 배리어 앞과 뒤에 있으면 어떻게 될지
+생각해 봅시다:
+
+	CPU 1			CPU 2
+	=======================	=======================
+		{ A = 0, B = 9 }
+	STORE A=1
+	<쓰기 배리어>
+	STORE B=2
+				LOAD B
+				LOAD A [first load of A]
+				<읽기 배리어>
+				LOAD A [second load of A]
+
+A 의 로드 두개가 모두 B 의 로드 뒤에 있지만, 서로 다른 값을 얻어올 수
+있습니다:
+
+	+-------+       :      :                :       :
+	|       |       +------+                +-------+
+	|       |------>| A=1  |------      --->| A->0  |
+	|       |       +------+      \         +-------+
+	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
+	|       |       +------+        |       +-------+
+	|       |------>| B=2  |---     |       :       :
+	|       |       +------+   \    |       :       :       +-------+
+	+-------+       :      :    \   |       +-------+       |       |
+	                             ---------->| B->2  |------>|       |
+	                                |       +-------+       | CPU 2 |
+	                                |       :       :       |       |
+	                                |       :       :       |       |
+	                                |       +-------+       |       |
+	                                |       | A->0  |------>| 1st   |
+	                                |       +-------+       |       |
+	  여기서 읽기 배리어는   ---->   \  rrrrrrrrrrrrrrrrr   |       |
+	  B 로의 스토어 전의              \     +-------+       |       |
+	  모든 결과를 CPU 2 에             ---->| A->1  |------>| 2nd   |
+	  보이도록 한다                         +-------+       |       |
+	                                        :       :       +-------+
+
+
+하지만 CPU 1 에서의 A 업데이트는 읽기 배리어가 완료되기 전에도 보일 수도
+있긴 합니다:
+
+	+-------+       :      :                :       :
+	|       |       +------+                +-------+
+	|       |------>| A=1  |------      --->| A->0  |
+	|       |       +------+      \         +-------+
+	| CPU 1 |   wwwwwwwwwwwwwwww   \    --->| B->9  |
+	|       |       +------+        |       +-------+
+	|       |------>| B=2  |---     |       :       :
+	|       |       +------+   \    |       :       :       +-------+
+	+-------+       :      :    \   |       +-------+       |       |
+	                             ---------->| B->2  |------>|       |
+	                                |       +-------+       | CPU 2 |
+	                                |       :       :       |       |
+	                                 \      :       :       |       |
+	                                  \     +-------+       |       |
+	                                   ---->| A->1  |------>| 1st   |
+	                                        +-------+       |       |
+	                                    rrrrrrrrrrrrrrrrr   |       |
+	                                        +-------+       |       |
+	                                        | A->1  |------>| 2nd   |
+	                                        +-------+       |       |
+	                                        :       :       +-------+
+
+
+여기서 보장되는 건, 만약 B 의 로드가 B == 2 라는 결과를 봤다면, A 에의 두번째
+로드는 항상 A == 1 을 보게 될 것이라는 겁니다.  A 에의 첫번째 로드에는 그런
+보장이 없습니다; A == 0 이거나 A == 1 이거나 둘 중 하나의 결과를 보게 될겁니다.
+
+
+읽기 메모리 배리어 VS 로드 예측
+-------------------------------
+
+많은 CPU들이 로드를 예측적으로 (speculatively) 합니다: 어떤 데이터를 메모리에서
+로드해야 하게 될지 예측을 했다면, 해당 데이터를 로드하는 인스트럭션을 실제로는
+아직 만나지 않았더라도 다른 로드 작업이 없어 버스 (bus) 가 아무 일도 하고 있지
+않다면, 그 데이터를 로드합니다.  이후에 실제 로드 인스트럭션이 실행되면 CPU 가
+이미 그 값을 가지고 있기 때문에 그 로드 인스트럭션은 즉시 완료됩니다.
+
+해당 CPU 는 실제로는 그 값이 필요치 않았다는 사실이 나중에 드러날 수도 있는데 -
+해당 로드 인스트럭션이 브랜치로 우회되거나 했을 수 있겠죠 - , 그렇게 되면 앞서
+읽어둔 값을 버리거나 나중의 사용을 위해 캐시에 넣어둘 수 있습니다.
+
+다음을 생각해 봅시다:
+
+	CPU 1			CPU 2
+	=======================	=======================
+				LOAD B
+				DIVIDE		} 나누기 명령은 일반적으로
+				DIVIDE		} 긴 시간을 필요로 합니다
+				LOAD A
+
+는 이렇게 될 수 있습니다:
+
+	                                        :       :       +-------+
+	                                        +-------+       |       |
+	                                    --->| B->2  |------>|       |
+	                                        +-------+       | CPU 2 |
+	                                        :       :DIVIDE |       |
+	                                        +-------+       |       |
+	나누기 하느라 바쁜       --->       --->| A->0  |~~~~   |       |
+	CPU 는 A 의 LOAD 를                     +-------+   ~   |       |
+	예측해서 수행한다                       :       :   ~   |       |
+	                                        :       :DIVIDE |       |
+	                                        :       :   ~   |       |
+	나누기가 끝나면       --->     --->     :       :   ~-->|       |
+	CPU 는 해당 LOAD 를                     :       :       |       |
+	즉각 완료한다                           :       :       +-------+
+
+
+읽기 배리어나 데이터 의존성 배리어를 두번째 로드 직전에 놓는다면:
+
+	CPU 1			CPU 2
+	=======================	=======================
+				LOAD B
+				DIVIDE
+				DIVIDE
+				<읽기 배리어>
+				LOAD A
+
+예측으로 얻어진 값은 사용된 배리어의 타입에 따라서 해당 값이 옳은지 검토되게
+됩니다.  만약 해당 메모리 영역에 변화가 없었다면, 예측으로 얻어두었던 값이
+사용됩니다:
+
+	                                        :       :       +-------+
+	                                        +-------+       |       |
+	                                    --->| B->2  |------>|       |
+	                                        +-------+       | CPU 2 |
+	                                        :       :DIVIDE |       |
+	                                        +-------+       |       |
+	나누기 하느라 바쁜       --->       --->| A->0  |~~~~   |       |
+	CPU 는 A 의 LOAD 를                     +-------+   ~   |       |
+	예측한다                                :       :   ~   |       |
+	                                        :       :DIVIDE |       |
+	                                        :       :   ~   |       |
+	                                        :       :   ~   |       |
+	                                    rrrrrrrrrrrrrrrr~   |       |
+	                                        :       :   ~   |       |
+	                                        :       :   ~-->|       |
+	                                        :       :       |       |
+	                                        :       :       +-------+
+
+
+하지만 다른 CPU 에서 업데이트나 무효화가 있었다면, 그 예측은 무효화되고 그 값은
+다시 읽혀집니다:
+
+	                                        :       :       +-------+
+	                                        +-------+       |       |
+	                                    --->| B->2  |------>|       |
+	                                        +-------+       | CPU 2 |
+	                                        :       :DIVIDE |       |
+	                                        +-------+       |       |
+	나누기 하느라 바쁜       --->       --->| A->0  |~~~~   |       |
+	CPU 는 A 의 LOAD 를                     +-------+   ~   |       |
+	예측한다                                :       :   ~   |       |
+	                                        :       :DIVIDE |       |
+	                                        :       :   ~   |       |
+	                                        :       :   ~   |       |
+	                                    rrrrrrrrrrrrrrrrr   |       |
+	                                        +-------+       |       |
+	예측성 동작은 무효화 되고    --->   --->| A->1  |------>|       |
+	업데이트된 값이 다시 읽혀진다           +-------+       |       |
+	                                        :       :       +-------+
+
+
+이행성
+------
+
+이행성(transitivity)은 실제의 컴퓨터 시스템에서 항상 제공되지는 않는, 순서
+맞추기에 대한 상당히 직관적인 개념입니다.  다음의 예가 이행성을 보여줍니다:
+
+	CPU 1			CPU 2			CPU 3
+	=======================	=======================	=======================
+		{ X = 0, Y = 0 }
+	STORE X=1		LOAD X			STORE Y=1
+				<범용 배리어>		<범용 배리어>
+				LOAD Y			LOAD X
+
+CPU 2 의 X 로드가 1을 리턴했고 Y 로드가 0을 리턴했다고 해봅시다.  이는 CPU 2 의
+X 로드가 CPU 1 의 X 스토어 뒤에 이루어졌고 CPU 2 의 Y 로드는 CPU 3 의 Y 스토어
+전에 이루어졌음을 의미합니다.  그럼 "CPU 3 의 X 로드는 0을 리턴할 수 있나요?"
+
+CPU 2 의 X 로드는 CPU 1 의 스토어 후에 이루어졌으니, CPU 3 의 X 로드는 1을
+리턴하는게 자연스럽습니다.  이런 생각이 이행성의 한 예입니다: CPU A 에서 실행된
+로드가 CPU B 에서의 같은 변수에 대한 로드를 뒤따른다면, CPU A 의 로드는 CPU B
+의 로드가 내놓은 값과 같거나 그 후의 값을 내놓아야 합니다.
+
+리눅스 커널에서 범용 배리어의 사용은 이행성을 보장합니다.  따라서, 앞의 예에서
+CPU 2 의 X 로드가 1을, Y 로드는 0을 리턴했다면, CPU 3 의 X 로드는 반드시 1을
+리턴합니다.
+
+하지만, 읽기나 쓰기 배리어에 대해서는 이행성이 보장되지 -않습니다-.  예를 들어,
+앞의 예에서 CPU 2 의 범용 배리어가 아래처럼 읽기 배리어로 바뀐 경우를 생각해
+봅시다:
+
+	CPU 1			CPU 2			CPU 3
+	=======================	=======================	=======================
+		{ X = 0, Y = 0 }
+	STORE X=1		LOAD X			STORE Y=1
+				<읽기 배리어>		<범용 배리어>
+				LOAD Y			LOAD X
+
+이 코드는 이행성을 갖지 않습니다: 이 예에서는, CPU 2 의 X 로드가 1을
+리턴하고, Y 로드는 0을 리턴하지만 CPU 3 의 X 로드가 0을 리턴하는 것도 완전히
+합법적입니다.
+
+CPU 2 의 읽기 배리어가 자신의 읽기는 순서를 맞춰줘도, CPU 1 의 스토어와의
+순서를 맞춰준다고는 보장할 수 없다는게 핵심입니다.  따라서, CPU 1 과 CPU 2 가
+버퍼나 캐시를 공유하는 시스템에서 이 예제 코드가 실행된다면, CPU 2 는 CPU 1 이
+쓴 값에 좀 빨리 접근할 수 있을 것입니다.  따라서 CPU 1 과 CPU 2 의 접근으로
+조합된 순서를 모든 CPU 가 동의할 수 있도록 하기 위해 범용 배리어가 필요합니다.
+
+범용 배리어는 "글로벌 이행성"을 제공해서, 모든 CPU 들이 오퍼레이션들의 순서에
+동의하게 할 것입니다.  반면, release-acquire 조합은 "로컬 이행성" 만을
+제공해서, 해당 조합이 사용된 CPU 들만이 해당 액세스들의 조합된 순서에 동의함이
+보장됩니다.  예를 들어, 존경스런 Herman Hollerith 의 C 코드로 보면:
+
+	int u, v, x, y, z;
+
+	void cpu0(void)
+	{
+		r0 = smp_load_acquire(&x);
+		WRITE_ONCE(u, 1);
+		smp_store_release(&y, 1);
+	}
+
+	void cpu1(void)
+	{
+		r1 = smp_load_acquire(&y);
+		r4 = READ_ONCE(v);
+		r5 = READ_ONCE(u);
+		smp_store_release(&z, 1);
+	}
+
+	void cpu2(void)
+	{
+		r2 = smp_load_acquire(&z);
+		smp_store_release(&x, 1);
+	}
+
+	void cpu3(void)
+	{
+		WRITE_ONCE(v, 1);
+		smp_mb();
+		r3 = READ_ONCE(u);
+	}
+
+cpu0(), cpu1(), 그리고 cpu2() 는 smp_store_release()/smp_load_acquire() 쌍의
+연결을 통한 로컬 이행성에 동참하고 있으므로, 다음과 같은 결과는 나오지 않을
+겁니다:
+
+	r0 == 1 && r1 == 1 && r2 == 1
+
+더 나아가서, cpu0() 와 cpu1() 사이의 release-acquire 관계로 인해, cpu1() 은
+cpu0() 의 쓰기를 봐야만 하므로, 다음과 같은 결과도 없을 겁니다:
+
+	r1 == 1 && r5 == 0
+
+하지만, release-acquire 타동성은 동참한 CPU 들에만 적용되므로 cpu3() 에는
+적용되지 않습니다.  따라서, 다음과 같은 결과가 가능합니다:
+
+	r0 == 0 && r1 == 1 && r2 == 1 && r3 == 0 && r4 == 0
+
+비슷하게, 다음과 같은 결과도 가능합니다:
+
+	r0 == 0 && r1 == 1 && r2 == 1 && r3 == 0 && r4 == 0 && r5 == 1
+
+cpu0(), cpu1(), 그리고 cpu2() 는 그들의 읽기와 쓰기를 순서대로 보게 되지만,
+release-acquire 체인에 관여되지 않은 CPU 들은 그 순서에 이견을 가질 수
+있습니다.  이런 이견은 smp_load_acquire() 와 smp_store_release() 의 구현에
+사용되는 완화된 메모리 배리어 인스트럭션들은 항상 배리어 앞의 스토어들을 뒤의
+로드들에 앞세울 필요는 없다는 사실에서 기인합니다.  이 말은 cpu3() 는 cpu0() 의
+u 로의 스토어를 cpu1() 의 v 로부터의 로드 뒤에 일어난 것으로 볼 수 있다는
+뜻입니다, cpu0() 와 cpu1() 은 이 두 오퍼레이션이 의도된 순서대로 일어났음에
+모두 동의하는데도 말입니다.
+
+하지만, smp_load_acquire() 는 마술이 아님을 명심하시기 바랍니다.  구체적으로,
+이 함수는 단순히 순서 규칙을 지키며 인자로부터의 읽기를 수행합니다.  이것은
+어떤 특정한 값이 읽힐 것인지는 보장하지 -않습니다-.  따라서, 다음과 같은 결과도
+가능합니다:
+
+	r0 == 0 && r1 == 0 && r2 == 0 && r5 == 0
+
+이런 결과는 어떤 것도 재배치 되지 않는, 순차적 일관성을 가진 가상의
+시스템에서도 일어날 수 있음을 기억해 두시기 바랍니다.
+
+다시 말하지만, 당신의 코드가 글로벌 이행성을 필요로 한다면, 범용 배리어를
+사용하십시오.
+
+
+==================
+명시적 커널 배리어
+==================
+
+리눅스 커널은 서로 다른 단계에서 동작하는 다양한 배리어들을 가지고 있습니다:
+
+  (*) 컴파일러 배리어.
+
+  (*) CPU 메모리 배리어.
+
+  (*) MMIO 쓰기 배리어.
+
+
+컴파일러 배리어
+---------------
+
+리눅스 커널은 컴파일러가 메모리 액세스를 재배치 하는 것을 막아주는 명시적인
+컴파일러 배리어를 가지고 있습니다:
+
+	barrier();
+
+이건 범용 배리어입니다 -- barrier() 의 읽기-읽기 나 쓰기-쓰기 변종은 없습니다.
+하지만, READ_ONCE() 와 WRITE_ONCE() 는 특정 액세스들에 대해서만 동작하는
+barrier() 의 완화된 형태로 볼 수 있습니다.
+
+barrier() 함수는 다음과 같은 효과를 갖습니다:
+
+ (*) 컴파일러가 barrier() 뒤의 액세스들이 barrier() 앞의 액세스보다 앞으로
+     재배치되지 못하게 합니다.  예를 들어, 인터럽트 핸들러 코드와 인터럽트 당한
+     코드 사이의 통신을 신중히 하기 위해 사용될 수 있습니다.
+
+ (*) 루프에서, 컴파일러가 루프 조건에 사용된 변수를 매 이터레이션마다
+     메모리에서 로드하지 않아도 되도록 최적화 하는걸 방지합니다.
+
+READ_ONCE() 와 WRITE_ONCE() 함수는 싱글 쓰레드 코드에서는 문제 없지만 동시성이
+있는 코드에서는 문제가 될 수 있는 모든 최적화를 막습니다.  이런 류의 최적화에
+대한 예를 몇가지 들어보면 다음과 같습니다:
+
+ (*) 컴파일러는 같은 변수에 대한 로드와 스토어를 재배치 할 수 있고, 어떤
+     경우에는 CPU가 같은 변수로부터의 로드들을 재배치할 수도 있습니다.  이는
+     다음의 코드가:
+
+	a[0] = x;
+	a[1] = x;
+
+     x 의 예전 값이 a[1] 에, 새 값이 a[0] 에 있게 할 수 있다는 뜻입니다.
+     컴파일러와 CPU가 이런 일을 못하게 하려면 다음과 같이 해야 합니다:
+
+	a[0] = READ_ONCE(x);
+	a[1] = READ_ONCE(x);
+
+     즉, READ_ONCE() 와 WRITE_ONCE() 는 여러 CPU 에서 하나의 변수에 가해지는
+     액세스들에 캐시 일관성을 제공합니다.
+
+ (*) 컴파일러는 같은 변수에 대한 연속적인 로드들을 병합할 수 있습니다.  그런
+     병합 작업으로 컴파일러는 다음의 코드를:
+
+	while (tmp = a)
+		do_something_with(tmp);
+
+     다음과 같이, 싱글 쓰레드 코드에서는 말이 되지만 개발자의 의도와 전혀 맞지
+     않는 방향으로 "최적화" 할 수 있습니다:
+
+	if (tmp = a)
+		for (;;)
+			do_something_with(tmp);
+
+     컴파일러가 이런 짓을 하지 못하게 하려면 READ_ONCE() 를 사용하세요:
+
+	while (tmp = READ_ONCE(a))
+		do_something_with(tmp);
+
+ (*) 예컨대 레지스터 사용량이 많아 컴파일러가 모든 데이터를 레지스터에 담을 수
+     없는 경우, 컴파일러는 변수를 다시 로드할 수 있습니다.  따라서 컴파일러는
+     앞의 예에서 변수 'tmp' 사용을 최적화로 없애버릴 수 있습니다:
+
+	while (tmp = a)
+		do_something_with(tmp);
+
+     이 코드는 다음과 같이 싱글 쓰레드에서는 완벽하지만 동시성이 존재하는
+     경우엔 치명적인 코드로 바뀔 수 있습니다:
+
+	while (a)
+		do_something_with(a);
+
+     예를 들어, 최적화된 이 코드는 변수 a 가 다른 CPU 에 의해 "while" 문과
+     do_something_with() 호출 사이에 바뀌어 do_something_with() 에 0을 넘길
+     수도 있습니다.
+
+     이번에도, 컴파일러가 그런 짓을 하는걸 막기 위해 READ_ONCE() 를 사용하세요:
+
+	while (tmp = READ_ONCE(a))
+		do_something_with(tmp);
+
+     레지스터가 부족한 상황을 겪는 경우, 컴파일러는 tmp 를 스택에 저장해둘 수도
+     있습니다.  컴파일러가 변수를 다시 읽어들이는건 이렇게 저장해두고 후에 다시
+     읽어들이는데 드는 오버헤드 때문입니다.  그렇게 하는게 싱글 쓰레드
+     코드에서는 안전하므로, 안전하지 않은 경우에는 컴파일러에게 직접 알려줘야
+     합니다.
+
+ (*) 컴파일러는 그 값이 무엇일지 알고 있다면 로드를 아예 안할 수도 있습니다.
+     예를 들어, 다음의 코드는 변수 'a' 의 값이 항상 0임을 증명할 수 있다면:
+
+	while (tmp = a)
+		do_something_with(tmp);
+
+     이렇게 최적화 되어버릴 수 있습니다:
+
+	do { } while (0);
+
+     이 변환은 싱글 쓰레드 코드에서는 도움이 되는데 로드와 브랜치를 제거했기
+     때문입니다.  문제는 컴파일러가 'a' 의 값을 업데이트 하는건 현재의 CPU 하나
+     뿐이라는 가정 위에서 증명을 했다는데 있습니다.  만약 변수 'a' 가 공유되어
+     있다면, 컴파일러의 증명은 틀린 것이 될겁니다.  컴파일러는 그 자신이
+     생각하는 것만큼 많은 것을 알고 있지 못함을 컴파일러에게 알리기 위해
+     READ_ONCE() 를 사용하세요:
+
+	while (tmp = READ_ONCE(a))
+		do_something_with(tmp);
+
+     하지만 컴파일러는 READ_ONCE() 뒤에 나오는 값에 대해서도 눈길을 두고 있음을
+     기억하세요.  예를 들어, 다음의 코드에서 MAX 는 전처리기 매크로로, 1의 값을
+     갖는다고 해봅시다:
+
+	while ((tmp = READ_ONCE(a)) % MAX)
+		do_something_with(tmp);
+
+     이렇게 되면 컴파일러는 MAX 를 가지고 수행되는 "%" 오퍼레이터의 결과가 항상
+     0이라는 것을 알게 되고, 컴파일러가 코드를 실질적으로는 존재하지 않는
+     것처럼 최적화 하는 것이 허용되어 버립니다.  ('a' 변수의 로드는 여전히
+     행해질 겁니다.)
+
+ (*) 비슷하게, 컴파일러는 변수가 저장하려 하는 값을 이미 가지고 있다는 것을
+     알면 스토어 자체를 제거할 수 있습니다.  이번에도, 컴파일러는 현재의 CPU
+     만이 그 변수에 값을 쓰는 오로지 하나의 존재라고 생각하여 공유된 변수에
+     대해서는 잘못된 일을 하게 됩니다.  예를 들어, 다음과 같은 경우가 있을 수
+     있습니다:
+
+	a = 0;
+	... 변수 a 에 스토어를 하지 않는 코드 ...
+	a = 0;
+
+     컴파일러는 변수 'a' 의 값은 이미 0이라는 것을 알고, 따라서 두번째 스토어를
+     삭제할 겁니다.  만약 다른 CPU 가 그 사이 변수 'a' 에 다른 값을 썼다면
+     황당한 결과가 나올 겁니다.
+
+     컴파일러가 그런 잘못된 추측을 하지 않도록 WRITE_ONCE() 를 사용하세요:
+
+	WRITE_ONCE(a, 0);
+	... 변수 a 에 스토어를 하지 않는 코드 ...
+	WRITE_ONCE(a, 0);
+
+ (*) 컴파일러는 하지 말라고 하지 않으면 메모리 액세스들을 재배치 할 수
+     있습니다.  예를 들어, 다음의 프로세스 레벨 코드와 인터럽트 핸들러 사이의
+     상호작용을 생각해 봅시다:
+
+	void process_level(void)
+	{
+		msg = get_message();
+		flag = true;
+	}
+
+	void interrupt_handler(void)
+	{
+		if (flag)
+			process_message(msg);
+	}
+
+     이 코드에는 컴파일러가 process_level() 을 다음과 같이 변환하는 것을 막을
+     수단이 없고, 이런 변환은 싱글쓰레드에서라면 실제로 훌륭한 선택일 수
+     있습니다:
+
+	void process_level(void)
+	{
+		flag = true;
+		msg = get_message();
+	}
+
+     이 두개의 문장 사이에 인터럽트가 발생한다면, interrupt_handler() 는 의미를
+     알 수 없는 메세지를 받을 수도 있습니다.  이걸 막기 위해 다음과 같이
+     WRITE_ONCE() 를 사용하세요:
+
+	void process_level(void)
+	{
+		WRITE_ONCE(msg, get_message());
+		WRITE_ONCE(flag, true);
+	}
+
+	void interrupt_handler(void)
+	{
+		if (READ_ONCE(flag))
+			process_message(READ_ONCE(msg));
+	}
+
+     interrupt_handler() 안에서도 중첩된 인터럽트나 NMI 와 같이 인터럽트 핸들러
+     역시 'flag' 와 'msg' 에 접근하는 또다른 무언가에 인터럽트 될 수 있다면
+     READ_ONCE() 와 WRITE_ONCE() 를 사용해야 함을 기억해 두세요.  만약 그런
+     가능성이 없다면, interrupt_handler() 안에서는 문서화 목적이 아니라면
+     READ_ONCE() 와 WRITE_ONCE() 는 필요치 않습니다.  (근래의 리눅스 커널에서
+     중첩된 인터럽트는 보통 잘 일어나지 않음도 기억해 두세요, 실제로, 어떤
+     인터럽트 핸들러가 인터럽트가 활성화된 채로 리턴하면 WARN_ONCE() 가
+     실행됩니다.)
+
+     컴파일러는 READ_ONCE() 와 WRITE_ONCE() 뒤의 READ_ONCE() 나 WRITE_ONCE(),
+     barrier(), 또는 비슷한 것들을 담고 있지 않은 코드를 움직일 수 있을 것으로
+     가정되어야 합니다.
+
+     이 효과는 barrier() 를 통해서도 만들 수 있지만, READ_ONCE() 와
+     WRITE_ONCE() 가 좀 더 안목 높은 선택입니다: READ_ONCE() 와 WRITE_ONCE()는
+     컴파일러에 주어진 메모리 영역에 대해서만 최적화 가능성을 포기하도록
+     하지만, barrier() 는 컴파일러가 지금까지 기계의 레지스터에 캐시해 놓은
+     모든 메모리 영역의 값을 버려야 하게 하기 때문입니다.  물론, 컴파일러는
+     READ_ONCE() 와 WRITE_ONCE() 가 일어난 순서도 지켜줍니다, CPU 는 당연히
+     그 순서를 지킬 의무가 없지만요.
+
+ (*) 컴파일러는 다음의 예에서와 같이 변수에의 스토어를 날조해낼 수도 있습니다:
+
+	if (a)
+		b = a;
+	else
+		b = 42;
+
+     컴파일러는 아래와 같은 최적화로 브랜치를 줄일 겁니다:
+
+	b = 42;
+	if (a)
+		b = a;
+
+     싱글 쓰레드 코드에서 이 최적화는 안전할 뿐 아니라 브랜치 갯수를
+     줄여줍니다.  하지만 안타깝게도, 동시성이 있는 코드에서는 이 최적화는 다른
+     CPU 가 'b' 를 로드할 때, -- 'a' 가 0이 아닌데도 -- 가짜인 값, 42를 보게
+     되는 경우를 가능하게 합니다.  이걸 방지하기 위해 WRITE_ONCE() 를
+     사용하세요:
+
+	if (a)
+		WRITE_ONCE(b, a);
+	else
+		WRITE_ONCE(b, 42);
+
+     컴파일러는 로드를 만들어낼 수도 있습니다.  일반적으로는 문제를 일으키지
+     않지만, 캐시 라인 바운싱을 일으켜 성능과 확장성을 떨어뜨릴 수 있습니다.
+     날조된 로드를 막기 위해선 READ_ONCE() 를 사용하세요.
+
+ (*) 정렬된 메모리 주소에 위치한, 한번의 메모리 참조 인스트럭션으로 액세스
+     가능한 크기의 데이터는 하나의 큰 액세스가 여러개의 작은 액세스들로
+     대체되는 "로드 티어링(load tearing)" 과 "스토어 티어링(store tearing)" 을
+     방지합니다.  예를 들어, 주어진 아키텍쳐가 7-bit imeediate field 를 갖는
+     16-bit 스토어 인스트럭션을 제공한다면, 컴파일러는 다음의 32-bit 스토어를
+     구현하는데에 두개의 16-bit store-immediate 명령을 사용하려 할겁니다:
+
+	p = 0x00010002;
+
+     스토어 할 상수를 만들고 그 값을 스토어 하기 위해 두개가 넘는 인스트럭션을
+     사용하게 되는, 이런 종류의 최적화를 GCC 는 실제로 함을 부디 알아 두십시오.
+     이 최적화는 싱글 쓰레드 코드에서는 성공적인 최적화 입니다.  실제로, 근래에
+     발생한 (그리고 고쳐진) 버그는 GCC 가 volatile 스토어에 비정상적으로 이
+     최적화를 사용하게 했습니다.  그런 버그가 없다면, 다음의 예에서
+     WRITE_ONCE() 의 사용은 스토어 티어링을 방지합니다:
+
+	WRITE_ONCE(p, 0x00010002);
+
+     Packed 구조체의 사용 역시 다음의 예처럼  로드 / 스토어 티어링을 유발할 수
+     있습니다:
+
+	struct __attribute__((__packed__)) foo {
+		short a;
+		int b;
+		short c;
+	};
+	struct foo foo1, foo2;
+	...
+
+	foo2.a = foo1.a;
+	foo2.b = foo1.b;
+	foo2.c = foo1.c;
+
+     READ_ONCE() 나 WRITE_ONCE() 도 없고 volatile 마킹도 없기 때문에,
+     컴파일러는 이 세개의 대입문을 두개의 32-bit 로드와 두개의 32-bit 스토어로
+     변환할 수 있습니다.  이는 'foo1.b' 의 값의 로드 티어링과 'foo2.b' 의
+     스토어 티어링을 초래할 겁니다.  이 예에서도 READ_ONCE() 와 WRITE_ONCE()
+     가 티어링을 막을 수 있습니다:
+
+	foo2.a = foo1.a;
+	WRITE_ONCE(foo2.b, READ_ONCE(foo1.b));
+	foo2.c = foo1.c;
+
+그렇지만, volatile 로 마크된 변수에 대해서는 READ_ONCE() 와 WRITE_ONCE() 가
+필요치 않습니다.  예를 들어, 'jiffies' 는 volatile 로 마크되어 있기 때문에,
+READ_ONCE(jiffies) 라고 할 필요가 없습니다.  READ_ONCE() 와 WRITE_ONCE() 가
+실은 volatile 캐스팅으로 구현되어 있어서 인자가 이미 volatile 로 마크되어
+있다면 또다른 효과를 내지는 않기 때문입니다.
+
+이 컴파일러 배리어들은 CPU 에는 직접적 효과를 전혀 만들지 않기 때문에, 결국은
+재배치가 일어날 수도 있음을 부디 기억해 두십시오.
+
+
+CPU 메모리 배리어
+-----------------
+
+리눅스 커널은 다음의 여덟개 기본 CPU 메모리 배리어를 가지고 있습니다:
+
+	TYPE		MANDATORY		SMP CONDITIONAL
+	===============	=======================	===========================
+	범용		mb()			smp_mb()
+	쓰기		wmb()			smp_wmb()
+	읽기		rmb()			smp_rmb()
+	데이터 의존성	read_barrier_depends()	smp_read_barrier_depends()
+
+
+데이터 의존성 배리어를 제외한 모든 메모리 배리어는 컴파일러 배리어를
+포함합니다.  데이터 의존성은 컴파일러에의 추가적인 순서 보장을 포함하지
+않습니다.
+
+방백: 데이터 의존성이 있는 경우, 컴파일러는 해당 로드를 올바른 순서로 일으킬
+것으로 (예: `a[b]` 는 a[b] 를 로드 하기 전에 b 의 값을 먼저 로드한다)
+기대되지만, C 언어 사양에는 컴파일러가 b 의 값을 추측 (예: 1 과 같음) 해서
+b  로드 전에 a 로드를 하는 코드 (예: tmp = a[1]; if (b != 1) tmp = a[b]; ) 를
+만들지 않아야 한다는 내용 같은 건 없습니다.  또한 컴파일러는 a[b] 를 로드한
+후에 b 를 또다시 로드할 수도 있어서, a[b] 보다 최신 버전의 b 값을 가질 수도
+있습니다.  이런 문제들의 해결책에 대한 의견 일치는 아직 없습니다만, 일단
+READ_ONCE() 매크로부터 보기 시작하는게 좋은 시작이 될겁니다.
+
+SMP 메모리 배리어들은 유니프로세서로 컴파일된 시스템에서는 컴파일러 배리어로
+바뀌는데, 하나의 CPU 는 스스로 일관성을 유지하고, 겹치는 액세스들 역시 올바른
+순서로 행해질 것으로 생각되기 때문입니다.  하지만, 아래의 "Virtual Machine
+Guests" 서브섹션을 참고하십시오.
+
+[!] SMP 시스템에서 공유메모리로의 접근들을 순서 세워야 할 때, SMP 메모리
+배리어는 _반드시_ 사용되어야 함을 기억하세요, 그대신 락을 사용하는 것으로도
+충분하긴 하지만 말이죠.
+
+Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효과만 통제하기에는
+불필요한 오버헤드를 갖기 때문에 SMP 효과만 통제하면 되는 곳에는 사용되지 않아야
+합니다.  하지만, 느슨한 순서 규칙의 메모리 I/O 윈도우를 통한 MMIO 의 효과를
+통제할 때에는 mandatory 배리어들이 사용될 수 있습니다.  이 배리어들은
+컴파일러와 CPU 모두 재배치를 못하도록 함으로써 메모리 오퍼레이션들이 디바이스에
+보여지는 순서에도 영향을 주기 때문에, SMP 가 아닌 시스템이라 할지라도 필요할 수
+있습니다.
+
+
+일부 고급 배리어 함수들도 있습니다:
+
+ (*) smp_store_mb(var, value)
+
+     이 함수는 특정 변수에 특정 값을 대입하고 범용 메모리 배리어를 칩니다.
+     UP 컴파일에서는 컴파일러 배리어보다 더한 것을 친다고는 보장되지 않습니다.
+
+
+ (*) smp_mb__before_atomic();
+ (*) smp_mb__after_atomic();
+
+     이것들은 값을 리턴하지 않는 (더하기, 빼기, 증가, 감소와 같은) 어토믹
+     함수들을 위한, 특히 그것들이 레퍼런스 카운팅에 사용될 때를 위한
+     함수들입니다.  이 함수들은 메모리 배리어를 내포하고 있지는 않습니다.
+
+     이것들은 값을 리턴하지 않으며 어토믹한 (set_bit 과 clear_bit 같은) 비트
+     연산에도 사용될 수 있습니다.
+
+     한 예로, 객체 하나를 무효한 것으로 표시하고 그 객체의 레퍼런스 카운트를
+     감소시키는 다음 코드를 보세요:
+
+	obj->dead = 1;
+	smp_mb__before_atomic();
+	atomic_dec(&obj->ref_count);
+
+     이 코드는 객체의 업데이트된 death 마크가 레퍼런스 카운터 감소 동작
+     *전에* 보일 것을 보장합니다.
+
+     더 많은 정보를 위해선 Documentation/atomic_ops.txt 문서를 참고하세요.
+     어디서 이것들을 사용해야 할지 궁금하다면 "어토믹 오퍼레이션" 서브섹션을
+     참고하세요.
+
+
+ (*) lockless_dereference();
+
+     이 함수는 smp_read_barrier_depends() 데이터 의존성 배리어를 사용하는
+     포인터 읽어오기 래퍼(wrapper) 함수로 생각될 수 있습니다.
+
+     객체의 라이프타임이 RCU 외의 메커니즘으로 관리된다는 점을 제외하면
+     rcu_dereference() 와도 유사한데, 예를 들면 객체가 시스템이 꺼질 때에만
+     제거되는 경우 등입니다.  또한, lockless_dereference() 은 RCU 와 함께
+     사용될수도, RCU 없이 사용될 수도 있는 일부 데이터 구조에 사용되고
+     있습니다.
+
+
+ (*) dma_wmb();
+ (*) dma_rmb();
+
+     이것들은 CPU 와 DMA 가능한 디바이스에서 모두 액세스 가능한 공유 메모리의
+     읽기, 쓰기 작업들의 순서를 보장하기 위해 consistent memory 에서 사용하기
+     위한 것들입니다.
+
+     예를 들어, 디바이스와 메모리를 공유하며, 디스크립터 상태 값을 사용해
+     디스크립터가 디바이스에 속해 있는지 아니면 CPU 에 속해 있는지 표시하고,
+     공지용 초인종(doorbell) 을 사용해 업데이트된 디스크립터가 디바이스에 사용
+     가능해졌음을 공지하는 디바이스 드라이버를 생각해 봅시다:
+
+	if (desc->status != DEVICE_OWN) {
+		/* 디스크립터를 소유하기 전에는 데이터를 읽지 않음 */
+		dma_rmb();
+
+		/* 데이터를 읽고 씀 */
+		read_data = desc->data;
+		desc->data = write_data;
+
+		/* 상태 업데이트 전 수정사항을 반영 */
+		dma_wmb();
+
+		/* 소유권을 수정 */
+		desc->status = DEVICE_OWN;
+
+		/* MMIO 를 통해 디바이스에 공지를 하기 전에 메모리를 동기화 */
+		wmb();
+
+		/* 업데이트된 디스크립터의 디바이스에 공지 */
+		writel(DESC_NOTIFY, doorbell);
+	}
+
+     dma_rmb() 는 디스크립터로부터 데이터를 읽어오기 전에 디바이스가 소유권을
+     내놓았음을 보장하게 하고, dma_wmb() 는 디바이스가 자신이 소유권을 다시
+     가졌음을 보기 전에 디스크립터에 데이터가 쓰였음을 보장합니다.  wmb() 는
+     캐시 일관성이 없는 (cache incoherent) MMIO 영역에 쓰기를 시도하기 전에
+     캐시 일관성이 있는 메모리 (cache coherent memory) 쓰기가 완료되었음을
+     보장해주기 위해 필요합니다.
+
+     consistent memory 에 대한 자세한 내용을 위해선 Documentation/DMA-API.txt
+     문서를 참고하세요.
+
+
+MMIO 쓰기 배리어
+----------------
+
+리눅스 커널은 또한 memory-mapped I/O 쓰기를 위한 특별한 배리어도 가지고
+있습니다:
+
+	mmiowb();
+
+이것은 mandatory 쓰기 배리어의 변종으로, 완화된 순서 규칙의 I/O 영역에으로의
+쓰기가 부분적으로 순서를 맞추도록 해줍니다.  이 함수는 CPU->하드웨어 사이를
+넘어서 실제 하드웨어에까지 일부 수준의 영향을 끼칩니다.
+
+더 많은 정보를 위해선 "Acquire vs I/O 액세스" 서브섹션을 참고하세요.
+
+
+=========================
+암묵적 커널 메모리 배리어
+=========================
+
+리눅스 커널의 일부 함수들은 메모리 배리어를 내장하고 있는데, 락(lock)과
+스케쥴링 관련 함수들이 대부분입니다.
+
+여기선 _최소한의_ 보장을 설명합니다; 특정 아키텍쳐에서는 이 설명보다 더 많은
+보장을 제공할 수도 있습니다만 해당 아키텍쳐에 종속적인 코드 외의 부분에서는
+그런 보장을 기대해선 안될겁니다.
+
+
+락 ACQUISITION 함수
+-------------------
+
+리눅스 커널은 다양한 락 구성체를 가지고 있습니다:
+
+ (*) 스핀 락
+ (*) R/W 스핀 락
+ (*) 뮤텍스
+ (*) 세마포어
+ (*) R/W 세마포어
+
+각 구성체마다 모든 경우에 "ACQUIRE" 오퍼레이션과 "RELEASE" 오퍼레이션의 변종이
+존재합니다.  이 오퍼레이션들은 모두 적절한 배리어를 내포하고 있습니다:
+
+ (1) ACQUIRE 오퍼레이션의 영향:
+
+     ACQUIRE 뒤에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된
+     뒤에 완료됩니다.
+
+     ACQUIRE 앞에서 요청된 메모리 오퍼레이션은 ACQUIRE 오퍼레이션이 완료된 후에
+     완료될 수 있습니다.  smp_mb__before_spinlock() 뒤에 ACQUIRE 가 실행되는
+     코드 블록은 블록 앞의 스토어를 블록 뒤의 로드와 스토어에 대해 순서
+     맞춥니다.  이건 smp_mb() 보다 완화된 것임을 기억하세요!  많은 아키텍쳐에서
+     smp_mb__before_spinlock() 은 사실 아무일도 하지 않습니다.
+
+ (2) RELEASE 오퍼레이션의 영향:
+
+     RELEASE 앞에서 요청된 메모리 오퍼레이션은 RELEASE 오퍼레이션이 완료되기
+     전에 완료됩니다.
+
+     RELEASE 뒤에서 요청된 메모리 오퍼레이션은 RELEASE 오퍼레이션 완료 전에
+     완료될 수 있습니다.
+
+ (3) ACQUIRE vs ACQUIRE 영향:
+
+     어떤 ACQUIRE 오퍼레이션보다 앞에서 요청된 모든 ACQUIRE 오퍼레이션은 그
+     ACQUIRE 오퍼레이션 전에 완료됩니다.
+
+ (4) ACQUIRE vs RELEASE implication:
+
+     어떤 RELEASE 오퍼레이션보다 앞서 요청된 ACQUIRE 오퍼레이션은 그 RELEASE
+     오퍼레이션보다 먼저 완료됩니다.
+
+ (5) 실패한 조건적 ACQUIRE 영향:
+
+     ACQUIRE 오퍼레이션의 일부 락(lock) 변종은 락이 곧바로 획득하기에는
+     불가능한 상태이거나 락이 획득 가능해지도록 기다리는 도중 시그널을 받거나
+     해서 실패할 수 있습니다.  실패한 락은 어떤 배리어도 내포하지 않습니다.
+
+[!] 참고: 락 ACQUIRE 와 RELEASE 가 단방향 배리어여서 나타나는 현상 중 하나는
+크리티컬 섹션 바깥의 인스트럭션의 영향이 크리티컬 섹션 내부로도 들어올 수
+있다는 것입니다.
+
+RELEASE 후에 요청되는 ACQUIRE 는 전체 메모리 배리어라 여겨지면 안되는데,
+ACQUIRE 앞의 액세스가 ACQUIRE 후에 수행될 수 있고, RELEASE 후의 액세스가
+RELEASE 전에 수행될 수도 있으며, 그 두개의 액세스가 서로를 지나칠 수도 있기
+때문입니다:
+
+	*A = a;
+	ACQUIRE M
+	RELEASE M
+	*B = b;
+
+는 다음과 같이 될 수도 있습니다:
+
+	ACQUIRE M, STORE *B, STORE *A, RELEASE M
+
+ACQUIRE 와 RELEASE 가 락 획득과 해제라면, 그리고 락의 ACQUIRE 와 RELEASE 가
+같은 락 변수에 대한 것이라면, 해당 락을 쥐고 있지 않은 다른 CPU 의 시야에는
+이와 같은 재배치가 일어나는 것으로 보일 수 있습니다.  요약하자면, ACQUIRE 에
+이어 RELEASE 오퍼레이션을 순차적으로 실행하는 행위가 전체 메모리 배리어로
+생각되어선 -안됩니다-.
+
+비슷하게, 앞의 반대 케이스인 RELEASE 와 ACQUIRE 두개 오퍼레이션의 순차적 실행
+역시 전체 메모리 배리어를 내포하지 않습니다.  따라서, RELEASE, ACQUIRE 로
+규정되는 크리티컬 섹션의 CPU 수행은 RELEASE 와 ACQUIRE 를 가로지를 수 있으므로,
+다음과 같은 코드는:
+
+	*A = a;
+	RELEASE M
+	ACQUIRE N
+	*B = b;
+
+다음과 같이 수행될 수 있습니다:
+
+	ACQUIRE N, STORE *B, STORE *A, RELEASE M
+
+이런 재배치는 데드락을 일으킬 수도 있을 것처럼 보일 수 있습니다.  하지만, 그런
+데드락의 조짐이 있다면 RELEASE 는 단순히 완료될 것이므로 데드락은 존재할 수
+없습니다.
+
+	이게 어떻게 올바른 동작을 할 수 있을까요?
+
+	우리가 이야기 하고 있는건 재배치를 하는 CPU 에 대한 이야기이지,
+	컴파일러에 대한 것이 아니란 점이 핵심입니다.  컴파일러 (또는, 개발자)
+	가 오퍼레이션들을 이렇게 재배치하면, 데드락이 일어날 수 -있습-니다.
+
+	하지만 CPU 가 오퍼레이션들을 재배치 했다는걸 생각해 보세요.  이 예에서,
+	어셈블리 코드 상으로는 언락이 락을 앞서게 되어 있습니다.  CPU 가 이를
+	재배치해서 뒤의 락 오퍼레이션을 먼저 실행하게 됩니다.  만약 데드락이
+	존재한다면, 이 락 오퍼레이션은 그저 스핀을 하며 계속해서 락을
+	시도합니다 (또는, 한참 후에겠지만, 잠듭니다).  CPU 는 언젠가는
+	(어셈블리 코드에서는 락을 앞서는) 언락 오퍼레이션을 실행하는데, 이 언락
+	오퍼레이션이 잠재적 데드락을 해결하고, 락 오퍼레이션도 뒤이어 성공하게
+	됩니다.
+
+	하지만 만약 락이 잠을 자는 타입이었다면요?  그런 경우에 코드는
+	스케쥴러로 들어가려 할 거고, 여기서 결국은 메모리 배리어를 만나게
+	되는데, 이 메모리 배리어는 앞의 언락 오퍼레이션이 완료되도록 만들고,
+	데드락은 이번에도 해결됩니다.  잠을 자는 행위와 언락 사이의 경주 상황
+	(race) 도 있을 수 있겠습니다만, 락 관련 기능들은 그런 경주 상황을 모든
+	경우에 제대로 해결할 수 있어야 합니다.
+
+락과 세마포어는 UP 컴파일된 시스템에서의 순서에 대해 보장을 하지 않기 때문에,
+그런 상황에서 인터럽트 비활성화 오퍼레이션과 함께가 아니라면 어떤 일에도 - 특히
+I/O 액세스와 관련해서는 - 제대로 사용될 수 없을 겁니다.
+
+"CPU 간 ACQUIRING 배리어 효과" 섹션도 참고하시기 바랍니다.
+
+
+예를 들어, 다음과 같은 코드를 생각해 봅시다:
+
+	*A = a;
+	*B = b;
+	ACQUIRE
+	*C = c;
+	*D = d;
+	RELEASE
+	*E = e;
+	*F = f;
+
+여기선 다음의 이벤트 시퀀스가 생길 수 있습니다:
+
+	ACQUIRE, {*F,*A}, *E, {*C,*D}, *B, RELEASE
+
+	[+] {*F,*A} 는 조합된 액세스를 의미합니다.
+
+하지만 다음과 같은 건 불가능하죠:
+
+	{*F,*A}, *B,	ACQUIRE, *C, *D,	RELEASE, *E
+	*A, *B, *C,	ACQUIRE, *D,		RELEASE, *E, *F
+	*A, *B,		ACQUIRE, *C,		RELEASE, *D, *E, *F
+	*B,		ACQUIRE, *C, *D,	RELEASE, {*F,*A}, *E
+
+
+
+인터럽트 비활성화 함수
+----------------------
+
+인터럽트를 비활성화 하는 함수 (ACQUIRE 와 동일) 와 인터럽트를 활성화 하는 함수
+(RELEASE 와 동일) 는 컴파일러 배리어처럼만 동작합니다.  따라서, 별도의 메모리
+배리어나 I/O 배리어가 필요한 상황이라면 그 배리어들은 인터럽트 비활성화 함수
+외의 방법으로 제공되어야만 합니다.
+
+
+슬립과 웨이크업 함수
+--------------------
+
+글로벌 데이터에 표시된 이벤트에 의해 프로세스를 잠에 빠트리는 것과 깨우는 것은
+해당 이벤트를 기다리는 태스크의 태스크 상태와 그 이벤트를 알리기 위해 사용되는
+글로벌 데이터, 두 데이터간의 상호작용으로 볼 수 있습니다.  이것이 옳은 순서대로
+일어남을 분명히 하기 위해, 프로세스를 잠에 들게 하는 기능과 깨우는 기능은
+몇가지 배리어를 내포합니다.
+
+먼저, 잠을 재우는 쪽은 일반적으로 다음과 같은 이벤트 시퀀스를 따릅니다:
+
+	for (;;) {
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		if (event_indicated)
+			break;
+		schedule();
+	}
+
+set_current_state() 에 의해, 태스크 상태가 바뀐 후 범용 메모리 배리어가
+자동으로 삽입됩니다:
+
+	CPU 1
+	===============================
+	set_current_state();
+	  smp_store_mb();
+	    STORE current->state
+	    <범용 배리어>
+	LOAD event_indicated
+
+set_current_state() 는 다음의 것들로 감싸질 수도 있습니다:
+
+	prepare_to_wait();
+	prepare_to_wait_exclusive();
+
+이것들 역시 상태를 설정한 후 범용 메모리 배리어를 삽입합니다.
+앞의 전체 시퀀스는 다음과 같은 함수들로 한번에 수행 가능한데, 이것들은 모두
+올바른 장소에 메모리 배리어를 삽입합니다:
+
+	wait_event();
+	wait_event_interruptible();
+	wait_event_interruptible_exclusive();
+	wait_event_interruptible_timeout();
+	wait_event_killable();
+	wait_event_timeout();
+	wait_on_bit();
+	wait_on_bit_lock();
+
+
+두번째로, 깨우기를 수행하는 코드는 일반적으로 다음과 같을 겁니다:
+
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+또는:
+
+	event_indicated = 1;
+	wake_up_process(event_daemon);
+
+wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다.  만약 그것들이 뭔가를
+깨운다면요.  이 배리어는 태스크 상태가 지워지기 전에 수행되므로, 이벤트를
+알리기 위한 STORE 와 태스크 상태를 TASK_RUNNING 으로 설정하는 STORE 사이에
+위치하게 됩니다.
+
+	CPU 1				CPU 2
+	===============================	===============================
+	set_current_state();		STORE event_indicated
+	  smp_store_mb();		wake_up();
+	    STORE current->state	  <쓰기 배리어>
+	    <범용 배리어>		  STORE current->state
+	LOAD event_indicated
+
+한번더 말합니다만, 이 쓰기 메모리 배리어는 이 코드가 정말로 뭔가를 깨울 때에만
+실행됩니다.  이걸 설명하기 위해, X 와 Y 는 모두 0 으로 초기화 되어 있다는 가정
+하에 아래의 이벤트 시퀀스를 생각해 봅시다:
+
+	CPU 1				CPU 2
+	===============================	===============================
+	X = 1;				STORE event_indicated
+	smp_mb();			wake_up();
+	Y = 1;				wait_event(wq, Y == 1);
+	wake_up();			  load from Y sees 1, no memory barrier
+					load from X might see 0
+
+위 예제에서의 경우와 달리 깨우기가 정말로 행해졌다면, CPU 2 의 X 로드는 1 을
+본다고 보장될 수 있을 겁니다.
+
+사용 가능한 깨우기류 함수들로 다음과 같은 것들이 있습니다:
+
+	complete();
+	wake_up();
+	wake_up_all();
+	wake_up_bit();
+	wake_up_interruptible();
+	wake_up_interruptible_all();
+	wake_up_interruptible_nr();
+	wake_up_interruptible_poll();
+	wake_up_interruptible_sync();
+	wake_up_interruptible_sync_poll();
+	wake_up_locked();
+	wake_up_locked_poll();
+	wake_up_nr();
+	wake_up_poll();
+	wake_up_process();
+
+
+[!] 잠재우는 코드와 깨우는 코드에 내포되는 메모리 배리어들은 깨우기 전에
+이루어진 스토어를 잠재우는 코드가 set_current_state() 를 호출한 후에 행하는
+로드에 대해 순서를 맞추지 _않는다는_ 점을 기억하세요.  예를 들어, 잠재우는
+코드가 다음과 같고:
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (event_indicated)
+		break;
+	__set_current_state(TASK_RUNNING);
+	do_something(my_data);
+
+깨우는 코드는 다음과 같다면:
+
+	my_data = value;
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+event_indecated 에의 변경이 잠재우는 코드에게 my_data 에의 변경 후에 이루어진
+것으로 인지될 것이라는 보장이 없습니다.  이런 경우에는 양쪽 코드 모두 각각의
+데이터 액세스 사이에 메모리 배리어를 직접 쳐야 합니다.  따라서 앞의 재우는
+코드는 다음과 같이:
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	if (event_indicated) {
+		smp_rmb();
+		do_something(my_data);
+	}
+
+그리고 깨우는 코드는 다음과 같이 되어야 합니다:
+
+	my_data = value;
+	smp_wmb();
+	event_indicated = 1;
+	wake_up(&event_wait_queue);
+
+
+그외의 함수들
+-------------
+
+그외의 배리어를 내포하는 함수들은 다음과 같습니다:
+
+ (*) schedule() 과 그 유사한 것들이 완전한 메모리 배리어를 내포합니다.
+
+
+==============================
+CPU 간 ACQUIRING 배리어의 효과
+==============================
+
+SMP 시스템에서의 락 기능들은 더욱 강력한 형태의 배리어를 제공합니다: 이
+배리어는 동일한 락을 사용하는 다른 CPU 들의 메모리 액세스 순서에도 영향을
+끼칩니다.
+
+
+ACQUIRE VS 메모리 액세스
+------------------------
+
+다음의 예를 생각해 봅시다: 시스템은 두개의 스핀락 (M) 과 (Q), 그리고 세개의 CPU
+를 가지고 있습니다; 여기에 다음의 이벤트 시퀀스가 발생합니다:
+
+	CPU 1				CPU 2
+	===============================	===============================
+	WRITE_ONCE(*A, a);		WRITE_ONCE(*E, e);
+	ACQUIRE M			ACQUIRE Q
+	WRITE_ONCE(*B, b);		WRITE_ONCE(*F, f);
+	WRITE_ONCE(*C, c);		WRITE_ONCE(*G, g);
+	RELEASE M			RELEASE Q
+	WRITE_ONCE(*D, d);		WRITE_ONCE(*H, h);
+
+*A 로의 액세스부터 *H 로의 액세스까지가 어떤 순서로 CPU 3 에게 보여질지에
+대해서는 각 CPU 에서의 락 사용에 의해 내포되어 있는 제약을 제외하고는 어떤
+보장도 존재하지 않습니다.  예를 들어, CPU 3 에게 다음과 같은 순서로 보여지는
+것이 가능합니다:
+
+	*E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
+
+하지만 다음과 같이 보이지는 않을 겁니다:
+
+	*B, *C or *D preceding ACQUIRE M
+	*A, *B or *C following RELEASE M
+	*F, *G or *H preceding ACQUIRE Q
+	*E, *F or *G following RELEASE Q
+
+
+
+ACQUIRE VS I/O 액세스
+----------------------
+
+특정한 (특히 NUMA 가 관련된) 환경 하에서 두개의 CPU 에서 동일한 스핀락으로
+보호되는 두개의 크리티컬 섹션 안의 I/O 액세스는 PCI 브릿지에 겹쳐진 I/O
+액세스로 보일 수 있는데, PCI 브릿지는 캐시 일관성 프로토콜과 합을 맞춰야 할
+의무가 없으므로, 필요한 읽기 메모리 배리어가 요청되지 않기 때문입니다.
+
+예를 들어서:
+
+	CPU 1				CPU 2
+	===============================	===============================
+	spin_lock(Q)
+	writel(0, ADDR)
+	writel(1, DATA);
+	spin_unlock(Q);
+					spin_lock(Q);
+					writel(4, ADDR);
+					writel(5, DATA);
+					spin_unlock(Q);
+
+는 PCI 브릿지에 다음과 같이 보일 수 있습니다:
+
+	STORE *ADDR = 0, STORE *ADDR = 4, STORE *DATA = 1, STORE *DATA = 5
+
+이렇게 되면 하드웨어의 오동작을 일으킬 수 있습니다.
+
+
+이런 경우엔 잡아둔 스핀락을 내려놓기 전에 mmiowb() 를 수행해야 하는데, 예를
+들면 다음과 같습니다:
+
+	CPU 1				CPU 2
+	===============================	===============================
+	spin_lock(Q)
+	writel(0, ADDR)
+	writel(1, DATA);
+	mmiowb();
+	spin_unlock(Q);
+					spin_lock(Q);
+					writel(4, ADDR);
+					writel(5, DATA);
+					mmiowb();
+					spin_unlock(Q);
+
+이 코드는 CPU 1 에서 요청된 두개의 스토어가 PCI 브릿지에 CPU 2 에서 요청된
+스토어들보다 먼저 보여짐을 보장합니다.
+
+
+또한, 같은 디바이스에서 스토어를 이어 로드가 수행되면 이 로드는 로드가 수행되기
+전에 스토어가 완료되기를 강제하므로 mmiowb() 의 필요가 없어집니다:
+
+	CPU 1				CPU 2
+	===============================	===============================
+	spin_lock(Q)
+	writel(0, ADDR)
+	a = readl(DATA);
+	spin_unlock(Q);
+					spin_lock(Q);
+					writel(4, ADDR);
+					b = readl(DATA);
+					spin_unlock(Q);
+
+
+더 많은 정보를 위해선 Documenataion/DocBook/deviceiobook.tmpl 을 참고하세요.
+
+
+=========================
+메모리 배리어가 필요한 곳
+=========================
+
+설령 SMP 커널을 사용하더라도 싱글 쓰레드로 동작하는 코드는 올바르게 동작하는
+것으로 보여질 것이기 때문에, 평범한 시스템 운영중에 메모리 오퍼레이션 재배치는
+일반적으로 문제가 되지 않습니다.  하지만, 재배치가 문제가 _될 수 있는_ 네가지
+환경이 있습니다:
+
+ (*) 프로세서간 상호 작용.
+
+ (*) 어토믹 오퍼레이션.
+
+ (*) 디바이스 액세스.
+
+ (*) 인터럽트.
+
+
+프로세서간 상호 작용
+--------------------
+
+두개 이상의 프로세서를 가진 시스템이 있다면, 시스템의 두개 이상의 CPU 는 동시에
+같은 데이터에 대한 작업을 할 수 있습니다.  이는 동기화 문제를 일으킬 수 있고,
+이 문제를 해결하는 일반적 방법은 락을 사용하는 것입니다.  하지만, 락은 상당히
+비용이 비싸서 가능하면 락을 사용하지 않고 일을 처리하는 것이 낫습니다.  이런
+경우, 두 CPU 모두에 영향을 끼치는 오퍼레이션들은 오동작을 막기 위해 신중하게
+순서가 맞춰져야 합니다.
+
+예를 들어, R/W 세마포어의 느린 수행경로 (slow path) 를 생각해 봅시다.
+세마포어를 위해 대기를 하는 하나의 프로세스가 자신의 스택 중 일부를 이
+세마포어의 대기 프로세스 리스트에 링크한 채로 있습니다:
+
+	struct rw_semaphore {
+		...
+		spinlock_t lock;
+		struct list_head waiters;
+	};
+
+	struct rwsem_waiter {
+		struct list_head list;
+		struct task_struct *task;
+	};
+
+특정 대기 상태 프로세스를 깨우기 위해, up_read() 나 up_write() 함수는 다음과
+같은 일을 합니다:
+
+ (1) 다음 대기 상태 프로세스 레코드는 어디있는지 알기 위해 이 대기 상태
+     프로세스 레코드의 next 포인터를 읽습니다;
+
+ (2) 이 대기 상태 프로세스의 task 구조체로의 포인터를 읽습니다;
+
+ (3) 이 대기 상태 프로세스가 세마포어를 획득했음을 알리기 위해 task
+     포인터를 초기화 합니다;
+
+ (4) 해당 태스크에 대해 wake_up_process() 를 호출합니다; 그리고
+
+ (5) 해당 대기 상태 프로세스의 task 구조체를 잡고 있던 레퍼런스를 해제합니다.
+
+달리 말하자면, 다음 이벤트 시퀀스를 수행해야 합니다:
+
+	LOAD waiter->list.next;
+	LOAD waiter->task;
+	STORE waiter->task;
+	CALL wakeup
+	RELEASE task
+
+그리고 이 이벤트들이 다른 순서로 수행된다면, 오동작이 일어날 수 있습니다.
+
+한번 세마포어의 대기줄에 들어갔고 세마포어 락을 놓았다면, 해당 대기 프로세스는
+락을 다시는 잡지 않습니다; 대신 자신의 task 포인터가 초기화 되길 기다립니다.
+그 레코드는 대기 프로세스의 스택에 있기 때문에, 리스트의 next 포인터가 읽혀지기
+_전에_ task 포인터가 지워진다면, 다른 CPU 는 해당 대기 프로세스를 시작해 버리고
+up*() 함수가 next 포인터를 읽기 전에 대기 프로세스의 스택을 마구 건드릴 수
+있습니다.
+
+그렇게 되면 위의 이벤트 시퀀스에 어떤 일이 일어나는지 생각해 보죠:
+
+	CPU 1				CPU 2
+	===============================	===============================
+					down_xxx()
+					Queue waiter
+					Sleep
+	up_yyy()
+	LOAD waiter->task;
+	STORE waiter->task;
+					Woken up by other event
+	<preempt>
+					Resume processing
+					down_xxx() returns
+					call foo()
+					foo() clobbers *waiter
+	</preempt>
+	LOAD waiter->list.next;
+	--- OOPS ---
+
+이 문제는 세마포어 락의 사용으로 해결될 수도 있겠지만, 그렇게 되면 깨어난 후에
+down_xxx() 함수가 불필요하게 스핀락을 또다시 얻어야만 합니다.
+
+이 문제를 해결하는 방법은 범용 SMP 메모리 배리어를 추가하는 겁니다:
+
+	LOAD waiter->list.next;
+	LOAD waiter->task;
+	smp_mb();
+	STORE waiter->task;
+	CALL wakeup
+	RELEASE task
+
+이 경우에, 배리어는 시스템의 나머지 CPU 들에게 모든 배리어 앞의 메모리 액세스가
+배리어 뒤의 메모리 액세스보다 앞서 일어난 것으로 보이게 만듭니다.  배리어 앞의
+메모리 액세스들이 배리어 명령 자체가 완료되는 시점까지 완료된다고는 보장하지
+_않습니다_.
+
+(이게 문제가 되지 않을) 단일 프로세서 시스템에서 smp_mb() 는 실제로는 그저
+컴파일러가 CPU 안에서의 순서를 바꾸거나 하지 않고 주어진 순서대로 명령을
+내리도록 하는 컴파일러 배리어일 뿐입니다.  오직 하나의 CPU 만 있으니, CPU 의
+의존성 순서 로직이 그 외의 모든것을 알아서 처리할 겁니다.
+
+
+어토믹 오퍼레이션
+-----------------
+
+어토믹 오퍼레이션은 기술적으로 프로세서간 상호작용으로 분류되며 그 중 일부는
+전체 메모리 배리어를 내포하고 또 일부는 내포하지 않지만, 커널에서 상당히
+의존적으로 사용하는 기능 중 하나입니다.
+
+메모리의 어떤 상태를 수정하고 해당 상태에 대한 (예전의 또는 최신의) 정보를
+리턴하는 어토믹 오퍼레이션은 모두 SMP-조건적 범용 메모리 배리어(smp_mb())를
+실제 오퍼레이션의 앞과 뒤에 내포합니다.  이런 오퍼레이션은 다음의 것들을
+포함합니다:
+
+	xchg();
+	atomic_xchg();			atomic_long_xchg();
+	atomic_inc_return();		atomic_long_inc_return();
+	atomic_dec_return();		atomic_long_dec_return();
+	atomic_add_return();		atomic_long_add_return();
+	atomic_sub_return();		atomic_long_sub_return();
+	atomic_inc_and_test();		atomic_long_inc_and_test();
+	atomic_dec_and_test();		atomic_long_dec_and_test();
+	atomic_sub_and_test();		atomic_long_sub_and_test();
+	atomic_add_negative();		atomic_long_add_negative();
+	test_and_set_bit();
+	test_and_clear_bit();
+	test_and_change_bit();
+
+	/* exchange 조건이 성공할 때 */
+	cmpxchg();
+	atomic_cmpxchg();		atomic_long_cmpxchg();
+	atomic_add_unless();		atomic_long_add_unless();
+
+이것들은 메모리 배리어 효과가 필요한 ACQUIRE 부류와 RELEASE 부류 오퍼레이션들을
+구현할 때, 그리고 객체 해제를 위해 레퍼런스 카운터를 조정할 때, 암묵적 메모리
+배리어 효과가 필요한 곳 등에 사용됩니다.
+
+
+다음의 오퍼레이션들은 메모리 배리어를 내포하지 _않기_ 때문에 문제가 될 수
+있지만, RELEASE 부류의 오퍼레이션들과 같은 것들을 구현할 때 사용될 수도
+있습니다:
+
+	atomic_set();
+	set_bit();
+	clear_bit();
+	change_bit();
+
+이것들을 사용할 때에는 필요하다면 적절한 (예를 들면 smp_mb__before_atomic()
+같은) 메모리 배리어가 명시적으로 함께 사용되어야 합니다.
+
+
+아래의 것들도 메모리 배리어를 내포하지 _않기_ 때문에, 일부 환경에서는 (예를
+들면 smp_mb__before_atomic() 과 같은) 명시적인 메모리 배리어 사용이 필요합니다.
+
+	atomic_add();
+	atomic_sub();
+	atomic_inc();
+	atomic_dec();
+
+이것들이 통계 생성을 위해 사용된다면, 그리고 통계 데이터 사이에 관계가 존재하지
+않는다면 메모리 배리어는 필요치 않을 겁니다.
+
+객체의 수명을 관리하기 위해 레퍼런스 카운팅 목적으로 사용된다면, 레퍼런스
+카운터는 락으로 보호되는 섹션에서만 조정되거나 호출하는 쪽이 이미 충분한
+레퍼런스를 잡고 있을 것이기 때문에 메모리 배리어는 아마 필요 없을 겁니다.
+
+만약 어떤 락을 구성하기 위해 사용된다면, 락 관련 동작은 일반적으로 작업을 특정
+순서대로 진행해야 하므로 메모리 배리어가 필요할 수 있습니다.
+
+기본적으로, 각 사용처에서는 메모리 배리어가 필요한지 아닌지 충분히 고려해야
+합니다.
+
+아래의 오퍼레이션들은 특별한 락 관련 동작들입니다:
+
+	test_and_set_bit_lock();
+	clear_bit_unlock();
+	__clear_bit_unlock();
+
+이것들은 ACQUIRE 류와 RELEASE 류의 오퍼레이션들을 구현합니다.  락 관련 도구를
+구현할 때에는 이것들을 좀 더 선호하는 편이 나은데, 이것들의 구현은 많은
+아키텍쳐에서 최적화 될 수 있기 때문입니다.
+
+[!] 이런 상황에 사용할 수 있는 특수한 메모리 배리어 도구들이 있습니다만, 일부
+CPU 에서는 사용되는 어토믹 인스트럭션 자체에 메모리 배리어가 내포되어 있어서
+어토믹 오퍼레이션과 메모리 배리어를 함께 사용하는 게 불필요한 일이 될 수
+있는데, 그런 경우에 이 특수 메모리 배리어 도구들은 no-op 이 되어 실질적으로
+아무일도 하지 않습니다.
+
+더 많은 내용을 위해선 Documentation/atomic_ops.txt 를 참고하세요.
+
+
+디바이스 액세스
+---------------
+
+많은 디바이스가 메모리 매핑 기법으로 제어될 수 있는데, 그렇게 제어되는
+디바이스는 CPU 에는 단지 특정 메모리 영역의 집합처럼 보이게 됩니다.  드라이버는
+그런 디바이스를 제어하기 위해 정확히 올바른 순서로 올바른 메모리 액세스를
+만들어야 합니다.
+
+하지만, 액세스들을 재배치 하거나 조합하거나 병합하는게 더 효율적이라 판단하는
+영리한 CPU 나 컴파일러들을 사용하면 드라이버 코드의 조심스럽게 순서 맞춰진
+액세스들이 디바이스에는 요청된 순서대로 도착하지 못하게 할 수 있는 - 디바이스가
+오동작을 하게 할 - 잠재적 문제가 생길 수 있습니다.
+
+리눅스 커널 내부에서, I/O 는 어떻게 액세스들을 적절히 순차적이게 만들 수 있는지
+알고 있는, - inb() 나 writel() 과 같은 - 적절한 액세스 루틴을 통해 이루어져야만
+합니다.  이것들은 대부분의 경우에는 명시적 메모리 배리어 와 함께 사용될 필요가
+없습니다만, 다음의 두가지 상황에서는 명시적 메모리 배리어가 필요할 수 있습니다:
+
+ (1) 일부 시스템에서 I/O 스토어는 모든 CPU 에 일관되게 순서 맞춰지지 않는데,
+     따라서 _모든_ 일반적인 드라이버들에 락이 사용되어야만 하고 이 크리티컬
+     섹션을 빠져나오기 전에 mmiowb() 가 꼭 호출되어야 합니다.
+
+ (2) 만약 액세스 함수들이 완화된 메모리 액세스 속성을 갖는 I/O 메모리 윈도우를
+     사용한다면, 순서를 강제하기 위해선 _mandatory_ 메모리 배리어가 필요합니다.
+
+더 많은 정보를 위해선 Documentation/DocBook/deviceiobook.tmpl 을 참고하십시오.
+
+
+인터럽트
+--------
+
+드라이버는 자신의 인터럽트 서비스 루틴에 의해 인터럽트 당할 수 있기 때문에
+드라이버의 이 두 부분은 서로의 디바이스 제어 또는 액세스 부분과 상호 간섭할 수
+있습니다.
+
+스스로에게 인터럽트 당하는 걸 불가능하게 하고, 드라이버의 크리티컬한
+오퍼레이션들을 모두 인터럽트가 불가능하게 된 영역에 집어넣거나 하는 방법 (락의
+한 형태) 으로 이런 상호 간섭을 - 최소한 부분적으로라도 - 줄일 수 있습니다.
+드라이버의 인터럽트 루틴이 실행 중인 동안, 해당 드라이버의 코어는 같은 CPU 에서
+수행되지 않을 것이며, 현재의 인터럽트가 처리되는 중에는 또다시 인터럽트가
+일어나지 못하도록 되어 있으니 인터럽트 핸들러는 그에 대해서는 락을 잡지 않아도
+됩니다.
+
+하지만, 어드레스 레지스터와 데이터 레지스터를 갖는 이더넷 카드를 다루는
+드라이버를 생각해 봅시다.  만약 이 드라이버의 코어가 인터럽트를 비활성화시킨
+채로 이더넷 카드와 대화하고 드라이버의 인터럽트 핸들러가 호출되었다면:
+
+	LOCAL IRQ DISABLE
+	writew(ADDR, 3);
+	writew(DATA, y);
+	LOCAL IRQ ENABLE
+	<interrupt>
+	writew(ADDR, 4);
+	q = readw(DATA);
+	</interrupt>
+
+만약 순서 규칙이 충분히 완화되어 있다면 데이터 레지스터에의 스토어는 어드레스
+레지스터에 두번째로 행해지는 스토어 뒤에 일어날 수도 있습니다:
+
+	STORE *ADDR = 3, STORE *ADDR = 4, STORE *DATA = y, q = LOAD *DATA
+
+
+만약 순서 규칙이 충분히 완화되어 있고 묵시적으로든 명시적으로든 배리어가
+사용되지 않았다면 인터럽트 비활성화 섹션에서 일어난 액세스가 바깥으로 새어서
+인터럽트 내에서 일어난 액세스와 섞일 수 있다고 - 그리고 그 반대도 - 가정해야만
+합니다.
+
+그런 영역 안에서 일어나는 I/O 액세스들은 엄격한 순서 규칙의 I/O 레지스터에
+묵시적 I/O 배리어를 형성하는 동기적 (synchronous) 로드 오퍼레이션을 포함하기
+때문에 일반적으로는 이런게 문제가 되지 않습니다.  만약 이걸로는 충분치 않다면
+mmiowb() 가 명시적으로 사용될 필요가 있습니다.
+
+
+하나의 인터럽트 루틴과 별도의 CPU 에서 수행중이며 서로 통신을 하는 두 루틴
+사이에도 비슷한 상황이 일어날 수 있습니다.  만약 그런 경우가 발생할 가능성이
+있다면, 순서를 보장하기 위해 인터럽트 비활성화 락이 사용되어져야만 합니다.
+
+
+======================
+커널 I/O 배리어의 효과
+======================
+
+I/O 메모리에 액세스할 때, 드라이버는 적절한 액세스 함수를 사용해야 합니다:
+
+ (*) inX(), outX():
+
+     이것들은 메모리 공간보다는 I/O 공간에 이야기를 하려는 의도로
+     만들어졌습니다만, 그건 기본적으로 CPU 마다 다른 컨셉입니다.  i386 과
+     x86_64 프로세서들은 특별한 I/O 공간 액세스 사이클과 명령어를 실제로 가지고
+     있지만, 다른 많은 CPU 들에는 그런 컨셉이 존재하지 않습니다.
+
+     다른 것들 중에서도 PCI 버스가 I/O 공간 컨셉을 정의하는데, 이는 - i386 과
+     x86_64 같은 CPU 에서 - CPU 의 I/O 공간 컨셉으로 쉽게 매치됩니다.  하지만,
+     대체할 I/O 공간이 없는 CPU 에서는 CPU 의 메모리 맵의 가상 I/O 공간으로
+     매핑될 수도 있습니다.
+
+     이 공간으로의 액세스는 (i386 등에서는) 완전하게 동기화 됩니다만, 중간의
+     (PCI 호스트 브리지와 같은) 브리지들은 이를 완전히 보장하진 않을수도
+     있습니다.
+
+     이것들의 상호간의 순서는 완전하게 보장됩니다.
+
+     다른 타입의 메모리 오퍼레이션, I/O 오퍼레이션에 대한 순서는 완전하게
+     보장되지는 않습니다.
+
+ (*) readX(), writeX():
+
+     이것들이 수행 요청되는 CPU 에서 서로에게 완전히 순서가 맞춰지고 독립적으로
+     수행되는지에 대한 보장 여부는 이들이 액세스 하는 메모리 윈도우에 정의된
+     특성에 의해 결정됩니다.  예를 들어, 최신의 i386 아키텍쳐 머신에서는 MTRR
+     레지스터로 이 특성이 조정됩니다.
+
+     일반적으로는, 프리페치 (prefetch) 가능한 디바이스를 액세스 하는게
+     아니라면, 이것들은 완전히 순서가 맞춰지고 결합되지 않게 보장될 겁니다.
+
+     하지만, (PCI 브리지와 같은) 중간의 하드웨어는 자신이 원한다면 집행을
+     연기시킬 수 있습니다; 스토어 명령을 실제로 하드웨어로 내려보내기(flush)
+     위해서는 같은 위치로부터 로드를 하는 방법이 있습니다만[*], PCI 의 경우는
+     같은 디바이스나 환경 구성 영역에서의 로드만으로도 충분할 겁니다.
+
+     [*] 주의! 쓰여진 것과 같은 위치로부터의 로드를 시도하는 것은 오동작을
+	 일으킬 수도 있습니다 - 예로 16650 Rx/Tx 시리얼 레지스터를 생각해
+	 보세요.
+
+     프리페치 가능한 I/O 메모리가 사용되면, 스토어 명령들이 순서를 지키도록
+     하기 위해 mmiowb() 배리어가 필요할 수 있습니다.
+
+     PCI 트랜잭션 사이의 상호작용에 대해 더 많은 정보를 위해선 PCI 명세서를
+     참고하시기 바랍니다.
+
+ (*) readX_relaxed(), writeX_relaxed()
+
+     이것들은 readX() 와 writeX() 랑 비슷하지만, 더 완화된 메모리 순서 보장을
+     제공합니다.  구체적으로, 이것들은 일반적 메모리 액세스 (예: DMA 버퍼) 에도
+     LOCK 이나 UNLOCK 오퍼레이션들에도 순서를 보장하지 않습니다.  LOCK 이나
+     UNLOCK 오퍼레이션들에 맞춰지는 순서가 필요하다면, mmiowb() 배리어가 사용될
+     수 있습니다.  같은 주변 장치에의 완화된 액세스끼리는 순서가 지켜짐을 알아
+     두시기 바랍니다.
+
+ (*) ioreadX(), iowriteX()
+
+     이것들은 inX()/outX() 나 readX()/writeX() 처럼 실제로 수행하는 액세스의
+     종류에 따라 적절하게 수행될 것입니다.
+
+
+===================================
+가정되는 가장 완화된 실행 순서 모델
+===================================
+
+컨셉적으로 CPU 는 주어진 프로그램에 대해 프로그램 그 자체에는 인과성 (program
+causality) 을 지키는 것처럼 보이게 하지만 일반적으로는 순서를 거의 지켜주지
+않는다고 가정되어야만 합니다.  (i386 이나 x86_64 같은) 일부 CPU 들은 코드
+재배치에 (powerpc 나 frv 와 같은) 다른 것들에 비해 강한 제약을 갖지만, 아키텍쳐
+종속적 코드 이외의 코드에서는 순서에 대한 제약이 가장 완화된 경우 (DEC Alpha)
+를 가정해야 합니다.
+
+이 말은, CPU 에게 주어지는 인스트럭션 스트림 내의 한 인스트럭션이 앞의
+인스트럭션에 종속적이라면 앞의 인스트럭션은 뒤의 종속적 인스트럭션이 실행되기
+전에 완료[*]될 수 있어야 한다는 제약 (달리 말해서, 인과성이 지켜지는 것으로
+보이게 함) 외에는 자신이 원하는 순서대로 - 심지어 병렬적으로도 - 그 스트림을
+실행할 수 있음을 의미합니다
+
+ [*] 일부 인스트럭션은 하나 이상의 영향 - 조건 코드를 바꾼다던지, 레지스터나
+     메모리를 바꾼다던지 - 을 만들어내며, 다른 인스트럭션은 다른 효과에
+     종속적일 수 있습니다.
+
+CPU 는 최종적으로 아무 효과도 만들지 않는 인스트럭션 시퀀스는 없애버릴 수도
+있습니다.  예를 들어, 만약 두개의 연속되는 인스트럭션이 둘 다 같은 레지스터에
+직접적인 값 (immediate value) 을 집어넣는다면, 첫번째 인스트럭션은 버려질 수도
+있습니다.
+
+
+비슷하게, 컴파일러 역시 프로그램의 인과성만 지켜준다면 인스트럭션 스트림을
+자신이 보기에 올바르다 생각되는대로 재배치 할 수 있습니다.
+
+
+===============
+CPU 캐시의 영향
+===============
+
+캐시된 메모리 오퍼레이션들이 시스템 전체에 어떻게 인지되는지는 CPU 와 메모리
+사이에 존재하는 캐시들, 그리고 시스템 상태의 일관성을 관리하는 메모리 일관성
+시스템에 상당 부분 영향을 받습니다.
+
+한 CPU 가 시스템의 다른 부분들과 캐시를 통해 상호작용한다면, 메모리 시스템은
+CPU 의 캐시들을 포함해야 하며, CPU 와 CPU 자신의 캐시 사이에서의 동작을 위한
+메모리 배리어를 가져야 합니다. (메모리 배리어는 논리적으로는 다음 그림의
+점선에서 동작합니다):
+
+	    <--- CPU --->         :       <----------- Memory ----------->
+	                          :
+	+--------+    +--------+  :   +--------+    +-----------+
+	|        |    |        |  :   |        |    |           |    +--------+
+	|  CPU   |    | Memory |  :   | CPU    |    |           |    |        |
+	|  Core  |--->| Access |----->| Cache  |<-->|           |    |        |
+	|        |    | Queue  |  :   |        |    |           |--->| Memory |
+	|        |    |        |  :   |        |    |           |    |        |
+	+--------+    +--------+  :   +--------+    |           |    |        |
+	                          :                 | Cache     |    +--------+
+	                          :                 | Coherency |
+	                          :                 | Mechanism |    +--------+
+	+--------+    +--------+  :   +--------+    |           |    |	      |
+	|        |    |        |  :   |        |    |           |    |        |
+	|  CPU   |    | Memory |  :   | CPU    |    |           |--->| Device |
+	|  Core  |--->| Access |----->| Cache  |<-->|           |    |        |
+	|        |    | Queue  |  :   |        |    |           |    |        |
+	|        |    |        |  :   |        |    |           |    +--------+
+	+--------+    +--------+  :   +--------+    +-----------+
+	                          :
+	                          :
+
+특정 로드나 스토어는 해당 오퍼레이션을 요청한 CPU 의 캐시 내에서 동작을 완료할
+수도 있기 때문에 해당 CPU 의 바깥에는 보이지 않을 수 있지만, 다른 CPU 가 관심을
+갖는다면 캐시 일관성 메커니즘이 해당 캐시라인을 해당 CPU 에게 전달하고, 해당
+메모리 영역에 대한 오퍼레이션이 발생할 때마다 그 영향을 전파시키기 때문에, 해당
+오퍼레이션은 메모리에 실제로 액세스를 한것처럼 나타날 것입니다.
+
+CPU 코어는 프로그램의 인과성이 유지된다고만 여겨진다면 인스트럭션들을 어떤
+순서로든 재배치해서 수행할 수 있습니다.  일부 인스트럭션들은 로드나 스토어
+오퍼레이션을 만드는데 이 오퍼레이션들은 이후 수행될 메모리 액세스 큐에 들어가게
+됩니다.  코어는 이 오퍼레이션들을 해당 큐에 어떤 순서로든 원하는대로 넣을 수
+있고, 다른 인스트럭션의 완료를 기다리도록 강제되기 전까지는 수행을 계속합니다.
+
+메모리 배리어가 하는 일은 CPU 쪽에서 메모리 쪽으로 넘어가는 액세스들의 순서,
+그리고 그 액세스의 결과가 시스템의 다른 관찰자들에게 인지되는 순서를 제어하는
+것입니다.
+
+[!] CPU 들은 항상 그들 자신의 로드와 스토어는 프로그램 순서대로 일어난 것으로
+보기 때문에, 주어진 CPU 내에서는 메모리 배리어를 사용할 필요가 _없습니다_.
+
+[!] MMIO 나 다른 디바이스 액세스들은 캐시 시스템을 우회할 수도 있습니다.  우회
+여부는 디바이스가 액세스 되는 메모리 윈도우의 특성에 의해 결정될 수도 있고, CPU
+가 가지고 있을 수 있는 특수한 디바이스 통신 인스트럭션의 사용에 의해서 결정될
+수도 있습니다.
+
+
+캐시 일관성
+-----------
+
+하지만 삶은 앞에서 이야기한 것처럼 단순하지 않습니다: 캐시들은 일관적일 것으로
+기대되지만, 그 일관성이 순서에도 적용될 거라는 보장은 없습니다.  한 CPU 에서
+만들어진 변경 사항은 최종적으로는 시스템의 모든 CPU 에게 보여지게 되지만, 다른
+CPU 들에게도 같은 순서로 보이게 될 거라는 보장은 없다는 뜻입니다.
+
+
+두개의 CPU (1 & 2) 가 달려 있고, 각 CPU 에 두개의 데이터 캐시(CPU 1 은 A/B 를,
+CPU 2 는 C/D 를 갖습니다)가 병렬로 연결되어 있는 시스템을 다룬다고 생각해
+봅시다:
+
+	            :
+	            :                          +--------+
+	            :      +---------+         |        |
+	+--------+  : +--->| Cache A |<------->|        |
+	|        |  : |    +---------+         |        |
+	|  CPU 1 |<---+                        |        |
+	|        |  : |    +---------+         |        |
+	+--------+  : +--->| Cache B |<------->|        |
+	            :      +---------+         |        |
+	            :                          | Memory |
+	            :      +---------+         | System |
+	+--------+  : +--->| Cache C |<------->|        |
+	|        |  : |    +---------+         |        |
+	|  CPU 2 |<---+                        |        |
+	|        |  : |    +---------+         |        |
+	+--------+  : +--->| Cache D |<------->|        |
+	            :      +---------+         |        |
+	            :                          +--------+
+	            :
+
+이 시스템이 다음과 같은 특성을 갖는다 생각해 봅시다:
+
+ (*) 홀수번 캐시라인은 캐시 A, 캐시 C 또는 메모리에 위치할 수 있음;
+
+ (*) 짝수번 캐시라인은 캐시 B, 캐시 D 또는 메모리에 위치할 수 있음;
+
+ (*) CPU 코어가 한개의 캐시에 접근하는 동안, 다른 캐시는 - 더티 캐시라인을
+     메모리에 내리거나 추측성 로드를 하거나 하기 위해 - 시스템의 다른 부분에
+     액세스 하기 위해 버스를 사용할 수 있음;
+
+ (*) 각 캐시는 시스템의 나머지 부분들과 일관성을 맞추기 위해 해당 캐시에
+     적용되어야 할 오퍼레이션들의 큐를 가짐;
+
+ (*) 이 일관성 큐는 캐시에 이미 존재하는 라인에 가해지는 평범한 로드에 의해서는
+     비워지지 않는데, 큐의 오퍼레이션들이 이 로드의 결과에 영향을 끼칠 수 있다
+     할지라도 그러함.
+
+이제, 첫번째 CPU 에서 두개의 쓰기 오퍼레이션을 만드는데, 해당 CPU 의 캐시에
+요청된 순서로 오퍼레이션이 도달됨을 보장하기 위해 두 오퍼레이션 사이에 쓰기
+배리어를 사용하는 상황을 상상해 봅시다:
+
+	CPU 1		CPU 2		COMMENT
+	===============	===============	=======================================
+					u == 0, v == 1 and p == &u, q == &u
+	v = 2;
+	smp_wmb();			v 의 변경이 p 의 변경 전에 보일 것을
+					 분명히 함
+	<A:modify v=2>			v 는 이제 캐시 A 에 독점적으로 존재함
+	p = &v;
+	<B:modify p=&v>			p 는 이제 캐시 B 에 독점적으로 존재함
+
+여기서의 쓰기 메모리 배리어는 CPU 1 의 캐시가 올바른 순서로 업데이트 된 것으로
+시스템의 다른 CPU 들이 인지하게 만듭니다.  하지만, 이제 두번째 CPU 가 그 값들을
+읽으려 하는 상황을 생각해 봅시다:
+
+	CPU 1		CPU 2		COMMENT
+	===============	===============	=======================================
+	...
+			q = p;
+			x = *q;
+
+위의 두개의 읽기 오퍼레이션은 예상된 순서로 일어나지 못할 수 있는데, 두번째 CPU
+의 한 캐시에 다른 캐시 이벤트가 발생해 v 를 담고 있는 캐시라인의 해당 캐시에의
+업데이트가 지연되는 사이, p 를 담고 있는 캐시라인은 두번째 CPU 의 다른 캐시에
+업데이트 되어버렸을 수 있기 때문입니다.
+
+	CPU 1		CPU 2		COMMENT
+	===============	===============	=======================================
+					u == 0, v == 1 and p == &u, q == &u
+	v = 2;
+	smp_wmb();
+	<A:modify v=2>	<C:busy>
+			<C:queue v=2>
+	p = &v;		q = p;
+			<D:request p>
+	<B:modify p=&v>	<D:commit p=&v>
+			<D:read p>
+			x = *q;
+			<C:read *q>	캐시에 업데이트 되기 전의 v 를 읽음
+			<C:unbusy>
+			<C:commit v=2>
+
+기본적으로, 두개의 캐시라인 모두 CPU 2 에 최종적으로는 업데이트 될 것이지만,
+별도의 개입 없이는, 업데이트의 순서가 CPU 1 에서 만들어진 순서와 동일할
+것이라는 보장이 없습니다.
+
+
+여기에 개입하기 위해선, 데이터 의존성 배리어나 읽기 배리어를 로드 오퍼레이션들
+사이에 넣어야 합니다.  이렇게 함으로써 캐시가 다음 요청을 처리하기 전에 일관성
+큐를 처리하도록 강제하게 됩니다.
+
+	CPU 1		CPU 2		COMMENT
+	===============	===============	=======================================
+					u == 0, v == 1 and p == &u, q == &u
+	v = 2;
+	smp_wmb();
+	<A:modify v=2>	<C:busy>
+			<C:queue v=2>
+	p = &v;		q = p;
+			<D:request p>
+	<B:modify p=&v>	<D:commit p=&v>
+			<D:read p>
+			smp_read_barrier_depends()
+			<C:unbusy>
+			<C:commit v=2>
+			x = *q;
+			<C:read *q>	캐시에 업데이트 된 v 를 읽음
+
+
+이런 부류의 문제는 DEC Alpha 계열 프로세서들에서 발견될 수 있는데, 이들은
+데이터 버스를 좀 더 잘 사용해 성능을 개선할 수 있는, 분할된 캐시를 가지고 있기
+때문입니다.  대부분의 CPU 는 하나의 읽기 오퍼레이션의 메모리 액세스가 다른 읽기
+오퍼레이션에 의존적이라면 데이터 의존성 배리어를 내포시킵니다만, 모두가 그런건
+아니기 때문에 이점에 의존해선 안됩니다.
+
+다른 CPU 들도 분할된 캐시를 가지고 있을 수 있지만, 그런 CPU 들은 평범한 메모리
+액세스를 위해서도 이 분할된 캐시들 사이의 조정을 해야만 합니다.  Alpha 는 가장
+약한 메모리 순서 시맨틱 (semantic) 을 선택함으로써 메모리 배리어가 명시적으로
+사용되지 않았을 때에는 그런 조정이 필요하지 않게 했습니다.
+
+
+캐시 일관성 VS DMA
+------------------
+
+모든 시스템이 DMA 를 하는 디바이스에 대해서까지 캐시 일관성을 유지하지는
+않습니다.  그런 경우, DMA 를 시도하는 디바이스는 RAM 으로부터 잘못된 데이터를
+읽을 수 있는데, 더티 캐시 라인이 CPU 의 캐시에 머무르고 있고, 바뀐 값이 아직
+RAM 에 써지지 않았을 수 있기 때문입니다.  이 문제를 해결하기 위해선, 커널의
+적절한 부분에서 각 CPU 캐시의 문제되는 비트들을 플러시 (flush) 시켜야만 합니다
+(그리고 그것들을 무효화 - invalidation - 시킬 수도 있겠죠).
+
+또한, 디바이스에 의해 RAM 에 DMA 로 쓰여진 값은 디바이스가 쓰기를 완료한 후에
+CPU 의 캐시에서 RAM 으로 쓰여지는 더티 캐시 라인에 의해 덮어써질 수도 있고, CPU
+의 캐시에 존재하는 캐시 라인이 해당 캐시에서 삭제되고 다시 값을 읽어들이기
+전까지는 RAM 이 업데이트 되었다는 사실 자체가 숨겨져 버릴 수도 있습니다.  이
+문제를 해결하기 위해선, 커널의 적절한 부분에서 각 CPU 의 캐시 안의 문제가 되는
+비트들을 무효화 시켜야 합니다.
+
+캐시 관리에 대한 더 많은 정보를 위해선 Documentation/cachetlb.txt 를
+참고하세요.
+
+
+캐시 일관성 VS MMIO
+-------------------
+
+Memory mapped I/O 는 일반적으로 CPU 의 메모리 공간 내의 한 윈도우의 특정 부분
+내의 메모리 지역에 이루어지는데, 이 윈도우는 일반적인, RAM 으로 향하는
+윈도우와는 다른 특성을 갖습니다.
+
+그런 특성 가운데 하나는, 일반적으로 그런 액세스는 캐시를 완전히 우회하고
+디바이스 버스로 곧바로 향한다는 것입니다.  이 말은 MMIO 액세스는 먼저
+시작되어서 캐시에서 완료된 메모리 액세스를 추월할 수 있다는 뜻입니다.  이런
+경우엔 메모리 배리어만으로는 충분치 않고, 만약 캐시된 메모리 쓰기 오퍼레이션과
+MMIO 액세스가 어떤 방식으로든 의존적이라면 해당 캐시는 두 오퍼레이션 사이에
+비워져(flush)야만 합니다.
+
+
+======================
+CPU 들이 저지르는 일들
+======================
+
+프로그래머는 CPU 가 메모리 오퍼레이션들을 정확히 요청한대로 수행해 줄 것이라고
+생각하는데, 예를 들어 다음과 같은 코드를 CPU 에게 넘긴다면:
+
+	a = READ_ONCE(*A);
+	WRITE_ONCE(*B, b);
+	c = READ_ONCE(*C);
+	d = READ_ONCE(*D);
+	WRITE_ONCE(*E, e);
+
+CPU 는 다음 인스트럭션을 처리하기 전에 현재의 인스트럭션을 위한 메모리
+오퍼레이션을 완료할 것이라 생각하고, 따라서 시스템 외부에서 관찰하기에도 정해진
+순서대로 오퍼레이션이 수행될 것으로 예상합니다:
+
+	LOAD *A, STORE *B, LOAD *C, LOAD *D, STORE *E.
+
+
+당연하지만, 실제로는 훨씬 엉망입니다.  많은 CPU 와 컴파일러에서 앞의 가정은
+성립하지 못하는데 그 이유는 다음과 같습니다:
+
+ (*) 로드 오퍼레이션들은 실행을 계속 해나가기 위해 곧바로 완료될 필요가 있는
+     경우가 많은 반면, 스토어 오퍼레이션들은 종종 별다른 문제 없이 유예될 수
+     있습니다;
+
+ (*) 로드 오퍼레이션들은 예측적으로 수행될 수 있으며, 필요없는 로드였다고
+     증명된 예측적 로드의 결과는 버려집니다;
+
+ (*) 로드 오퍼레이션들은 예측적으로 수행될 수 있으므로, 예상된 이벤트의
+     시퀀스와 다른 시간에 로드가 이뤄질 수 있습니다;
+
+ (*) 메모리 액세스 순서는 CPU 버스와 캐시를 좀 더 잘 사용할 수 있도록 재배치
+     될 수 있습니다;
+
+ (*) 로드와 스토어는 인접한 위치에의 액세스들을 일괄적으로 처리할 수 있는
+     메모리나 I/O 하드웨어 (메모리와 PCI 디바이스 둘 다 이게 가능할 수
+     있습니다) 에 대해 요청되는 경우, 개별 오퍼레이션을 위한 트랜잭션 설정
+     비용을 아끼기 위해 조합되어 실행될 수 있습니다; 그리고
+
+ (*) 해당 CPU 의 데이터 캐시가 순서에 영향을 끼칠 수도 있고, 캐시 일관성
+     메커니즘이 - 스토어가 실제로 캐시에 도달한다면 - 이 문제를 완화시킬 수는
+     있지만 이 일관성 관리가 다른 CPU 들에도 같은 순서로 전달된다는 보장은
+     없습니다.
+
+따라서, 앞의 코드에 대해 다른 CPU 가 보는 결과는 다음과 같을 수 있습니다:
+
+	LOAD *A, ..., LOAD {*C,*D}, STORE *E, STORE *B
+
+	("LOAD {*C,*D}" 는 조합된 로드입니다)
+
+
+하지만, CPU 는 스스로는 일관적일 것을 보장합니다: CPU _자신_ 의 액세스들은
+자신에게는 메모리 배리어가 없음에도 불구하고 정확히 순서 세워진 것으로 보여질
+것입니다.  예를 들어 다음의 코드가 주어졌다면:
+
+	U = READ_ONCE(*A);
+	WRITE_ONCE(*A, V);
+	WRITE_ONCE(*A, W);
+	X = READ_ONCE(*A);
+	WRITE_ONCE(*A, Y);
+	Z = READ_ONCE(*A);
+
+그리고 외부의 영향에 의한 간섭이 없다고 가정하면, 최종 결과는 다음과 같이
+나타날 것이라고 예상될 수 있습니다:
+
+	U == *A 의 최초 값
+	X == W
+	Z == Y
+	*A == Y
+
+앞의 코드는 CPU 가 다음의 메모리 액세스 시퀀스를 만들도록 할겁니다:
+
+	U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A
+
+하지만, 별다른 개입이 없고 프로그램의 시야에 이 세상이 여전히 일관적이라고
+보인다는 보장만 지켜진다면 이 시퀀스는 어떤 조합으로든 재구성될 수 있으며, 각
+액세스들은 합쳐지거나 버려질 수 있습니다.  일부 아키텍쳐에서 CPU 는 같은 위치에
+대한 연속적인 로드 오퍼레이션들을 재배치 할 수 있기 때문에 앞의 예에서의
+READ_ONCE() 와 WRITE_ONCE() 는 반드시 존재해야 함을 알아두세요.  그런 종류의
+아키텍쳐에서 READ_ONCE() 와 WRITE_ONCE() 는 이 문제를 막기 위해 필요한 일을
+뭐가 됐든지 하게 되는데, 예를 들어 Itanium 에서는 READ_ONCE() 와 WRITE_ONCE()
+가 사용하는 volatile 캐스팅은 GCC 가 그런 재배치를 방지하는 특수 인스트럭션인
+ld.acq 와 stl.rel 인스트럭션을 각각 만들어 내도록 합니다.
+
+컴파일러 역시 이 시퀀스의 액세스들을 CPU 가 보기도 전에 합치거나 버리거나 뒤로
+미뤄버릴 수 있습니다.
+
+예를 들어:
+
+	*A = V;
+	*A = W;
+
+는 다음과 같이 변형될 수 있습니다:
+
+	*A = W;
+
+따라서, 쓰기 배리어나 WRITE_ONCE() 가 없다면 *A 로의 V 값의 저장의 효과는
+사라진다고 가정될 수 있습니다.  비슷하게:
+
+	*A = Y;
+	Z = *A;
+
+는, 메모리 배리어나 READ_ONCE() 와 WRITE_ONCE() 없이는 다음과 같이 변형될 수
+있습니다:
+
+	*A = Y;
+	Z = Y;
+
+그리고 이 LOAD 오퍼레이션은 CPU 바깥에는 아예 보이지 않습니다.
+
+
+그리고, ALPHA 가 있다
+---------------------
+
+DEC Alpha CPU 는 가장 완화된 메모리 순서의 CPU 중 하나입니다.  뿐만 아니라,
+Alpha CPU 의 일부 버전은 분할된 데이터 캐시를 가지고 있어서, 의미적으로
+관계되어 있는 두개의 캐시 라인이 서로 다른 시간에 업데이트 되는게 가능합니다.
+이게 데이터 의존성 배리어가 정말 필요해지는 부분인데, 데이터 의존성 배리어는
+메모리 일관성 시스템과 함께 두개의 캐시를 동기화 시켜서, 포인터 변경과 새로운
+데이터의 발견을 올바른 순서로 일어나게 하기 때문입니다.
+
+리눅스 커널의 메모리 배리어 모델은 Alpha 에 기초해서 정의되었습니다.
+
+위의 "캐시 일관성" 서브섹션을 참고하세요.
+
+
+가상 머신 게스트
+----------------
+
+가상 머신에서 동작하는 게스트들은 게스트 자체는 SMP 지원 없이 컴파일 되었다
+해도 SMP 영향을 받을 수 있습니다.  이건 UP 커널을 사용하면서 SMP 호스트와
+결부되어 발생하는 부작용입니다.  이 경우에는 mandatory 배리어를 사용해서 문제를
+해결할 수 있겠지만 그런 해결은 대부분의 경우 최적의 해결책이 아닙니다.
+
+이 문제를 완벽하게 해결하기 위해, 로우 레벨의 virt_mb() 등의 매크로를 사용할 수
+있습니다. 이것들은 SMP 가 활성화 되어 있다면 smp_mb() 등과 동일한 효과를
+갖습니다만, SMP 와 SMP 아닌 시스템 모두에 대해 동일한 코드를 만들어냅니다.
+예를 들어, 가상 머신 게스트들은 (SMP 일 수 있는) 호스트와 동기화를 할 때에는
+smp_mb() 가 아니라 virt_mb() 를 사용해야 합니다.
+
+이것들은 smp_mb() 류의 것들과 모든 부분에서 동일하며, 특히, MMIO 의 영향에
+대해서는 간여하지 않습니다: MMIO 의 영향을 제어하려면, mandatory 배리어를
+사용하시기 바랍니다.
+
+
+=======
+사용 예
+=======
+
+순환식 버퍼
+-----------
+
+메모리 배리어는 순환식 버퍼를 생성자(producer)와 소비자(consumer) 사이의
+동기화에 락을 사용하지 않고 구현하는데에 사용될 수 있습니다.  더 자세한 내용을
+위해선 다음을 참고하세요:
+
+	Documentation/circular-buffers.txt
+
+
+=========
+참고 문헌
+=========
+
+Alpha AXP Architecture Reference Manual, Second Edition (Sites & Witek,
+Digital Press)
+	Chapter 5.2: Physical Address Space Characteristics
+	Chapter 5.4: Caches and Write Buffers
+	Chapter 5.5: Data Sharing
+	Chapter 5.6: Read/Write Ordering
+
+AMD64 Architecture Programmer's Manual Volume 2: System Programming
+	Chapter 7.1: Memory-Access Ordering
+	Chapter 7.4: Buffering and Combining Memory Writes
+
+IA-32 Intel Architecture Software Developer's Manual, Volume 3:
+System Programming Guide
+	Chapter 7.1: Locked Atomic Operations
+	Chapter 7.2: Memory Ordering
+	Chapter 7.4: Serializing Instructions
+
+The SPARC Architecture Manual, Version 9
+	Chapter 8: Memory Models
+	Appendix D: Formal Specification of the Memory Models
+	Appendix J: Programming with the Memory Models
+
+UltraSPARC Programmer Reference Manual
+	Chapter 5: Memory Accesses and Cacheability
+	Chapter 15: Sparc-V9 Memory Models
+
+UltraSPARC III Cu User's Manual
+	Chapter 9: Memory Models
+
+UltraSPARC IIIi Processor User's Manual
+	Chapter 8: Memory Models
+
+UltraSPARC Architecture 2005
+	Chapter 9: Memory
+	Appendix D: Formal Specifications of the Memory Models
+
+UltraSPARC T1 Supplement to the UltraSPARC Architecture 2005
+	Chapter 8: Memory Models
+	Appendix F: Caches and Cache Coherency
+
+Solaris Internals, Core Kernel Architecture, p63-68:
+	Chapter 3.3: Hardware Considerations for Locks and
+			Synchronization
+
+Unix Systems for Modern Architectures, Symmetric Multiprocessing and Caching
+for Kernel Programmers:
+	Chapter 13: Other Memory Models
+
+Intel Itanium Architecture Software Developer's Manual: Volume 1:
+	Section 2.6: Speculation
+	Section 4.4: Memory Access
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 1f9b3e2..1f6d45a 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -103,6 +103,16 @@
 or in registers.  The jprobe will work in either case, so long as the
 handler's prototype matches that of the probed function.
 
+Note that in some architectures (e.g.: arm64 and sparc64) the stack
+copy is not done, as the actual location of stacked parameters may be
+outside of a reasonable MAX_STACK_SIZE value and because that location
+cannot be determined by the jprobes code. In this case the jprobes
+user must be careful to make certain the calling signature of the
+function does not cause parameters to be passed on the stack (e.g.:
+more than eight function arguments, an argument of more than sixteen
+bytes, or more than 64 bytes of argument data, depending on
+architecture).
+
 1.3 Return Probes
 
 1.3.1 How Does a Return Probe Work?
diff --git a/Documentation/leds/leds-mlxcpld.txt b/Documentation/leds/leds-mlxcpld.txt
new file mode 100644
index 0000000..a0e8fd4
--- /dev/null
+++ b/Documentation/leds/leds-mlxcpld.txt
@@ -0,0 +1,110 @@
+Kernel driver for Mellanox systems LEDs
+=======================================
+
+Provide system LED support for the nex Mellanox systems:
+"msx6710", "msx6720", "msb7700", "msn2700", "msx1410",
+"msn2410", "msb7800", "msn2740", "msn2100".
+
+Description
+-----------
+Driver provides the following LEDs for the systems "msx6710", "msx6720",
+"msb7700", "msn2700", "msx1410", "msn2410", "msb7800", "msn2740":
+  mlxcpld:fan1:green
+  mlxcpld:fan1:red
+  mlxcpld:fan2:green
+  mlxcpld:fan2:red
+  mlxcpld:fan3:green
+  mlxcpld:fan3:red
+  mlxcpld:fan4:green
+  mlxcpld:fan4:red
+  mlxcpld:psu:green
+  mlxcpld:psu:red
+  mlxcpld:status:green
+  mlxcpld:status:red
+
+ "status"
+  CPLD reg offset: 0x20
+  Bits [3:0]
+
+ "psu"
+  CPLD reg offset: 0x20
+  Bits [7:4]
+
+ "fan1"
+  CPLD reg offset: 0x21
+  Bits [3:0]
+
+ "fan2"
+  CPLD reg offset: 0x21
+  Bits [7:4]
+
+ "fan3"
+  CPLD reg offset: 0x22
+  Bits [3:0]
+
+ "fan4"
+  CPLD reg offset: 0x22
+  Bits [7:4]
+
+ Color mask for all the above LEDs:
+  [bit3,bit2,bit1,bit0] or
+  [bit7,bit6,bit5,bit4]:
+	[0,0,0,0] = LED OFF
+	[0,1,0,1] = Red static ON
+	[1,1,0,1] = Green static ON
+	[0,1,1,0] = Red blink 3Hz
+	[1,1,1,0] = Green blink 3Hz
+	[0,1,1,1] = Red blink 6Hz
+	[1,1,1,1] = Green blink 6Hz
+
+Driver provides the following LEDs for the system "msn2100":
+  mlxcpld:fan:green
+  mlxcpld:fan:red
+  mlxcpld:psu1:green
+  mlxcpld:psu1:red
+  mlxcpld:psu2:green
+  mlxcpld:psu2:red
+  mlxcpld:status:green
+  mlxcpld:status:red
+  mlxcpld:uid:blue
+
+ "status"
+  CPLD reg offset: 0x20
+  Bits [3:0]
+
+ "fan"
+  CPLD reg offset: 0x21
+  Bits [3:0]
+
+ "psu1"
+  CPLD reg offset: 0x23
+  Bits [3:0]
+
+ "psu2"
+  CPLD reg offset: 0x23
+  Bits [7:4]
+
+ "uid"
+  CPLD reg offset: 0x24
+  Bits [3:0]
+
+ Color mask for all the above LEDs, excepted uid:
+  [bit3,bit2,bit1,bit0] or
+  [bit7,bit6,bit5,bit4]:
+	[0,0,0,0] = LED OFF
+	[0,1,0,1] = Red static ON
+	[1,1,0,1] = Green static ON
+	[0,1,1,0] = Red blink 3Hz
+	[1,1,1,0] = Green blink 3Hz
+	[0,1,1,1] = Red blink 6Hz
+	[1,1,1,1] = Green blink 6Hz
+
+ Color mask for uid LED:
+  [bit3,bit2,bit1,bit0]:
+	[0,0,0,0] = LED OFF
+	[1,1,0,1] = Blue static ON
+	[1,1,1,0] = Blue blink 3Hz
+	[1,1,1,1] = Blue blink 6Hz
+
+Driver supports HW blinking at 3Hz and 6Hz frequency (50% duty cycle).
+For 3Hz duty cylce is about 167 msec, for 6Hz is about 83 msec.
diff --git a/Documentation/leds/ledtrig-oneshot.txt b/Documentation/leds/ledtrig-oneshot.txt
index 07cd1fa..fe57474 100644
--- a/Documentation/leds/ledtrig-oneshot.txt
+++ b/Documentation/leds/ledtrig-oneshot.txt
@@ -21,24 +21,8 @@
 
   echo oneshot > trigger
 
-This adds the following sysfs attributes to the LED:
-
-  delay_on - specifies for how many milliseconds the LED has to stay at
-             LED_FULL brightness after it has been armed.
-             Default to 100 ms.
-
-  delay_off - specifies for how many milliseconds the LED has to stay at
-              LED_OFF brightness after it has been armed.
-              Default to 100 ms.
-
-  invert - reverse the blink logic.  If set to 0 (default) blink on for delay_on
-           ms, then blink off for delay_off ms, leaving the LED normally off.  If
-           set to 1, blink off for delay_off ms, then blink on for delay_on ms,
-           leaving the LED normally on.
-           Setting this value also immediately change the LED state.
-
-  shot - write any non-empty string to signal an events, this starts a blink
-         sequence if not already running.
+This adds sysfs attributes to the LED that are documented in:
+Documentation/ABI/testing/sysfs-class-led-trigger-oneshot
 
 Example use-case: network devices, initialization:
 
diff --git a/Documentation/leds/ledtrig-usbport.txt b/Documentation/leds/ledtrig-usbport.txt
new file mode 100644
index 0000000..69f54bf
--- /dev/null
+++ b/Documentation/leds/ledtrig-usbport.txt
@@ -0,0 +1,41 @@
+USB port LED trigger
+====================
+
+This LED trigger can be used for signalling to the user a presence of USB device
+in a given port. It simply turns on LED when device appears and turns it off
+when it disappears.
+
+It requires selecting USB ports that should be observed. All available ones are
+listed as separated entries in a "ports" subdirectory. Selecting is handled by
+echoing "1" to a chosen port.
+
+Please note that this trigger allows selecting multiple USB ports for a single
+LED. This can be useful in two cases:
+
+1) Device with single USB LED and few physical ports
+
+In such a case LED will be turned on as long as there is at least one connected
+USB device.
+
+2) Device with a physical port handled by few controllers
+
+Some devices may have one controller per PHY standard. E.g. USB 3.0 physical
+port may be handled by ohci-platform, ehci-platform and xhci-hcd. If there is
+only one LED user will most likely want to assign ports from all 3 hubs.
+
+
+This trigger can be activated from user space on led class devices as shown
+below:
+
+  echo usbport > trigger
+
+This adds sysfs attributes to the LED that are documented in:
+Documentation/ABI/testing/sysfs-class-led-trigger-usbport
+
+Example use-case:
+
+  echo usbport > trigger
+  echo 1 > ports/usb1-port1
+  echo 1 > ports/usb2-port1
+  cat ports/usb1-port1
+  echo 0 > ports/usb1-port1
diff --git a/Documentation/locking/lglock.txt b/Documentation/locking/lglock.txt
deleted file mode 100644
index a6971e3..0000000
--- a/Documentation/locking/lglock.txt
+++ /dev/null
@@ -1,166 +0,0 @@
-lglock - local/global locks for mostly local access patterns
-------------------------------------------------------------
-
-Origin: Nick Piggin's VFS scalability series introduced during
-	2.6.35++ [1] [2]
-Location: kernel/locking/lglock.c
-	include/linux/lglock.h
-Users: currently only the VFS and stop_machine related code
-
-Design Goal:
-------------
-
-Improve scalability of globally used large data sets that are
-distributed over all CPUs as per_cpu elements.
-
-To manage global data structures that are partitioned over all CPUs
-as per_cpu elements but can be mostly handled by CPU local actions
-lglock will be used where the majority of accesses are cpu local
-reading and occasional cpu local writing with very infrequent
-global write access.
-
-
-* deal with things locally whenever possible
-	- very fast access to the local per_cpu data
-	- reasonably fast access to specific per_cpu data on a different
-	  CPU
-* while making global action possible when needed
-	- by expensive access to all CPUs locks - effectively
-	  resulting in a globally visible critical section.
-
-Design:
--------
-
-Basically it is an array of per_cpu spinlocks with the
-lg_local_lock/unlock accessing the local CPUs lock object and the
-lg_local_lock_cpu/unlock_cpu accessing a remote CPUs lock object
-the lg_local_lock has to disable preemption as migration protection so
-that the reference to the local CPUs lock does not go out of scope.
-Due to the lg_local_lock/unlock only touching cpu-local resources it
-is fast. Taking the local lock on a different CPU will be more
-expensive but still relatively cheap.
-
-One can relax the migration constraints by acquiring the current
-CPUs lock with lg_local_lock_cpu, remember the cpu, and release that
-lock at the end of the critical section even if migrated. This should
-give most of the performance benefits without inhibiting migration
-though needs careful considerations for nesting of lglocks and
-consideration of deadlocks with lg_global_lock.
-
-The lg_global_lock/unlock locks all underlying spinlocks of all
-possible CPUs (including those off-line). The preemption disable/enable
-are needed in the non-RT kernels to prevent deadlocks like:
-
-                     on cpu 1
-
-              task A          task B
-         lg_global_lock
-           got cpu 0 lock
-                 <<<< preempt <<<<
-                         lg_local_lock_cpu for cpu 0
-                           spin on cpu 0 lock
-
-On -RT this deadlock scenario is resolved by the arch_spin_locks in the
-lglocks being replaced by rt_mutexes which resolve the above deadlock
-by boosting the lock-holder.
-
-
-Implementation:
----------------
-
-The initial lglock implementation from Nick Piggin used some complex
-macros to generate the lglock/brlock in lglock.h - they were later
-turned into a set of functions by Andi Kleen [7]. The change to functions
-was motivated by the presence of multiple lock users and also by them
-being easier to maintain than the generating macros. This change to
-functions is also the basis to eliminated the restriction of not
-being initializeable in kernel modules (the remaining problem is that
-locks are not explicitly initialized - see lockdep-design.txt)
-
-Declaration and initialization:
--------------------------------
-
-  #include <linux/lglock.h>
-
-  DEFINE_LGLOCK(name)
-  or:
-  DEFINE_STATIC_LGLOCK(name);
-
-  lg_lock_init(&name, "lockdep_name_string");
-
-  on UP this is mapped to DEFINE_SPINLOCK(name) in both cases, note
-  also that as of 3.18-rc6 all declaration in use are of the _STATIC_
-  variant (and it seems that the non-static was never in use).
-  lg_lock_init is initializing the lockdep map only.
-
-Usage:
-------
-
-From the locking semantics it is a spinlock. It could be called a
-locality aware spinlock. lg_local_* behaves like a per_cpu
-spinlock and lg_global_* like a global spinlock.
-No surprises in the API.
-
-  lg_local_lock(*lglock);
-     access to protected per_cpu object on this CPU
-  lg_local_unlock(*lglock);
-
-  lg_local_lock_cpu(*lglock, cpu);
-     access to protected per_cpu object on other CPU cpu
-  lg_local_unlock_cpu(*lglock, cpu);
-
-  lg_global_lock(*lglock);
-     access all protected per_cpu objects on all CPUs
-  lg_global_unlock(*lglock);
-
-  There are no _trylock variants of the lglocks.
-
-Note that the lg_global_lock/unlock has to iterate over all possible
-CPUs rather than the actually present CPUs or a CPU could go off-line
-with a held lock [4] and that makes it very expensive. A discussion on
-these issues can be found at [5]
-
-Constraints:
-------------
-
-  * currently the declaration of lglocks in kernel modules is not
-    possible, though this should be doable with little change.
-  * lglocks are not recursive.
-  * suitable for code that can do most operations on the CPU local
-    data and will very rarely need the global lock
-  * lg_global_lock/unlock is *very* expensive and does not scale
-  * on UP systems all lg_* primitives are simply spinlocks
-  * in PREEMPT_RT the spinlock becomes an rt-mutex and can sleep but
-    does not change the tasks state while sleeping [6].
-  * in PREEMPT_RT the preempt_disable/enable in lg_local_lock/unlock
-    is downgraded to a migrate_disable/enable, the other
-    preempt_disable/enable are downgraded to barriers [6].
-    The deadlock noted for non-RT above is resolved due to rt_mutexes
-    boosting the lock-holder in this case which arch_spin_locks do
-    not do.
-
-lglocks were designed for very specific problems in the VFS and probably
-only are the right answer in these corner cases. Any new user that looks
-at lglocks probably wants to look at the seqlock and RCU alternatives as
-her first choice. There are also efforts to resolve the RCU issues that
-currently prevent using RCU in place of view remaining lglocks.
-
-Note on brlock history:
------------------------
-
-The 'Big Reader' read-write spinlocks were originally introduced by
-Ingo Molnar in 2000 (2.4/2.5 kernel series) and removed in 2003. They
-later were introduced by the VFS scalability patch set in 2.6 series
-again as the "big reader lock" brlock [2] variant of lglock which has
-been replaced by seqlock primitives or by RCU based primitives in the
-3.13 kernel series as was suggested in [3] in 2003. The brlock was
-entirely removed in the 3.13 kernel series.
-
-Link: 1 http://lkml.org/lkml/2010/8/2/81
-Link: 2 http://lwn.net/Articles/401738/
-Link: 3 http://lkml.org/lkml/2003/3/9/205
-Link: 4 https://lkml.org/lkml/2011/8/24/185
-Link: 5 http://lkml.org/lkml/2011/12/18/189
-Link: 6 https://www.kernel.org/pub/linux/kernel/projects/rt/
-        patch series - lglocks-rt.patch.patch
-Link: 7 http://lkml.org/lkml/2012/3/5/26
diff --git a/Documentation/media/Makefile b/Documentation/media/Makefile
index 39e2d76..a7fb352 100644
--- a/Documentation/media/Makefile
+++ b/Documentation/media/Makefile
@@ -10,7 +10,8 @@
 
 TARGETS := $(addprefix $(BUILDDIR)/, $(FILES))
 
-htmldocs: $(BUILDDIR) ${TARGETS}
+.PHONY: all
+all: $(BUILDDIR) ${TARGETS}
 
 $(BUILDDIR):
 	$(Q)mkdir -p $@
diff --git a/Documentation/media/conf.py b/Documentation/media/conf.py
new file mode 100644
index 0000000..bef927b
--- /dev/null
+++ b/Documentation/media/conf.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = 'Linux Media Subsystem Documentation'
+
+tags.add("subproject")
+
+latex_documents = [
+    ('index', 'media.tex', 'Linux Media Subsystem Documentation',
+     'The kernel development community', 'manual'),
+]
diff --git a/Documentation/media/conf_nitpick.py b/Documentation/media/conf_nitpick.py
new file mode 100644
index 0000000..11beac2
--- /dev/null
+++ b/Documentation/media/conf_nitpick.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8; mode: python -*-
+
+project = 'Linux Media Subsystem Documentation'
+
+# It is possible to run Sphinx in nickpick mode with:
+nitpicky = True
+
+# within nit-picking build, do not refer to any intersphinx object
+intersphinx_mapping = {}
+
+# In nickpick mode, it will complain about lots of missing references that
+#
+# 1) are just typedefs like: bool, __u32, etc;
+# 2) It will complain for things like: enum, NULL;
+# 3) It will complain for symbols that should be on different
+#    books (but currently aren't ported to ReST)
+#
+# The list below has a list of such symbols to be ignored in nitpick mode
+#
+nitpick_ignore = [
+    ("c:func", "clock_gettime"),
+    ("c:func", "close"),
+    ("c:func", "container_of"),
+    ("c:func", "determine_valid_ioctls"),
+    ("c:func", "ERR_PTR"),
+    ("c:func", "ioctl"),
+    ("c:func", "IS_ERR"),
+    ("c:func", "mmap"),
+    ("c:func", "open"),
+    ("c:func", "pci_name"),
+    ("c:func", "poll"),
+    ("c:func", "PTR_ERR"),
+    ("c:func", "read"),
+    ("c:func", "release"),
+    ("c:func", "set"),
+    ("c:func", "struct fd_set"),
+    ("c:func", "struct pollfd"),
+    ("c:func", "usb_make_path"),
+    ("c:func", "write"),
+    ("c:type", "atomic_t"),
+    ("c:type", "bool"),
+    ("c:type", "buf_queue"),
+    ("c:type", "device"),
+    ("c:type", "device_driver"),
+    ("c:type", "device_node"),
+    ("c:type", "enum"),
+    ("c:type", "file"),
+    ("c:type", "i2c_adapter"),
+    ("c:type", "i2c_board_info"),
+    ("c:type", "i2c_client"),
+    ("c:type", "ktime_t"),
+    ("c:type", "led_classdev_flash"),
+    ("c:type", "list_head"),
+    ("c:type", "lock_class_key"),
+    ("c:type", "module"),
+    ("c:type", "mutex"),
+    ("c:type", "pci_dev"),
+    ("c:type", "pdvbdev"),
+    ("c:type", "poll_table_struct"),
+    ("c:type", "s32"),
+    ("c:type", "s64"),
+    ("c:type", "sd"),
+    ("c:type", "spi_board_info"),
+    ("c:type", "spi_device"),
+    ("c:type", "spi_master"),
+    ("c:type", "struct fb_fix_screeninfo"),
+    ("c:type", "struct pollfd"),
+    ("c:type", "struct timeval"),
+    ("c:type", "struct video_capability"),
+    ("c:type", "u16"),
+    ("c:type", "u32"),
+    ("c:type", "u64"),
+    ("c:type", "u8"),
+    ("c:type", "union"),
+    ("c:type", "usb_device"),
+
+    ("cpp:type", "boolean"),
+    ("cpp:type", "fd"),
+    ("cpp:type", "fd_set"),
+    ("cpp:type", "int16_t"),
+    ("cpp:type", "NULL"),
+    ("cpp:type", "off_t"),
+    ("cpp:type", "pollfd"),
+    ("cpp:type", "size_t"),
+    ("cpp:type", "ssize_t"),
+    ("cpp:type", "timeval"),
+    ("cpp:type", "__u16"),
+    ("cpp:type", "__u32"),
+    ("cpp:type", "__u64"),
+    ("cpp:type", "uint16_t"),
+    ("cpp:type", "uint32_t"),
+    ("cpp:type", "video_system_t"),
+]
diff --git a/Documentation/media/index.rst b/Documentation/media/index.rst
new file mode 100644
index 0000000..7f8f0af
--- /dev/null
+++ b/Documentation/media/index.rst
@@ -0,0 +1,19 @@
+Linux Media Subsystem Documentation
+===================================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   media_uapi
+   media_kapi
+   dvb-drivers/index
+   v4l-drivers/index
+
+.. only::  subproject
+
+   Indices
+   =======
+
+   * :ref:`genindex`
diff --git a/Documentation/media/uapi/cec/cec-func-open.rst b/Documentation/media/uapi/cec/cec-func-open.rst
index 38fd7e0..7c0f981 100644
--- a/Documentation/media/uapi/cec/cec-func-open.rst
+++ b/Documentation/media/uapi/cec/cec-func-open.rst
@@ -32,7 +32,7 @@
     Open flags. Access mode must be ``O_RDWR``.
 
     When the ``O_NONBLOCK`` flag is given, the
-    :ref:`CEC_RECEIVE <CEC_RECEIVE>` and :ref:`CEC_DQEVENT <CEC_DQEVENT>` ioctls
+    :ref:`CEC_RECEIVE <CEC_RECEIVE>` and :c:func:`CEC_DQEVENT` ioctls
     will return the ``EAGAIN`` error code when no message or event is available, and
     ioctls :ref:`CEC_TRANSMIT <CEC_TRANSMIT>`,
     :ref:`CEC_ADAP_S_PHYS_ADDR <CEC_ADAP_S_PHYS_ADDR>` and
diff --git a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
index 04ee900..201d483 100644
--- a/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-adap-g-log-addrs.rst
@@ -144,7 +144,7 @@
 
        -  ``flags``
 
-       -  Flags. No flags are defined yet, so set this to 0.
+       -  Flags. See :ref:`cec-log-addrs-flags` for a list of available flags.
 
     -  .. row 7
 
@@ -201,6 +201,25 @@
           give the CEC framework more information about the device type, even
           though the framework won't use it directly in the CEC message.
 
+.. _cec-log-addrs-flags:
+
+.. flat-table:: Flags for struct cec_log_addrs
+    :header-rows:  0
+    :stub-columns: 0
+    :widths:       3 1 4
+
+
+    -  .. _`CEC-LOG-ADDRS-FL-ALLOW-UNREG-FALLBACK`:
+
+       -  ``CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK``
+
+       -  1
+
+       -  By default if no logical address of the requested type can be claimed, then
+	  it will go back to the unconfigured state. If this flag is set, then it will
+	  fallback to the Unregistered logical address. Note that if the Unregistered
+	  logical address was explicitly requested, then this flag has no effect.
+
 .. _cec-versions:
 
 .. flat-table:: CEC Versions
diff --git a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
index 7a6d6d0..f8caa28 100644
--- a/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
+++ b/Documentation/media/uapi/cec/cec-ioc-dqevent.rst
@@ -15,7 +15,8 @@
 Synopsis
 ========
 
-.. cpp:function:: int ioctl( int fd, int request, struct cec_event *argp )
+.. c:function:: int ioctl( int fd, int request, struct cec_event *argp )
+   :name: CEC_DQEVENT
 
 Arguments
 =========
@@ -36,7 +37,7 @@
    and is currently only available as a staging kernel module.
 
 CEC devices can send asynchronous events. These can be retrieved by
-calling :ref:`ioctl CEC_DQEVENT <CEC_DQEVENT>`. If the file descriptor is in
+calling :c:func:`CEC_DQEVENT`. If the file descriptor is in
 non-blocking mode and no event is pending, then it will return -1 and
 set errno to the ``EAGAIN`` error code.
 
@@ -64,7 +65,8 @@
 
        -  ``phys_addr``
 
-       -  The current physical address.
+       -  The current physical address. This is ``CEC_PHYS_ADDR_INVALID`` if no
+          valid physical address is set.
 
     -  .. row 2
 
@@ -72,7 +74,10 @@
 
        -  ``log_addr_mask``
 
-       -  The current set of claimed logical addresses.
+       -  The current set of claimed logical addresses. This is 0 if no logical
+          addresses are claimed or if ``phys_addr`` is ``CEC_PHYS_ADDR_INVALID``.
+	  If bit 15 is set (``1 << CEC_LOG_ADDR_UNREGISTERED``) then this device
+	  has the unregistered logical address. In that case all other bits are 0.
 
 
 
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index a4d0a99..ba818ec 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -609,7 +609,7 @@
 The data-dependency barrier must order the read into Q with the store
 into *Q.  This prohibits this outcome:
 
-	(Q == B) && (B == 4)
+	(Q == &B) && (B == 4)
 
 Please note that this pattern should be rare.  After all, the whole point
 of dependency ordering is to -prevent- writes to the data structure, along
@@ -1928,6 +1928,7 @@
 
      See Documentation/DMA-API.txt for more information on consistent memory.
 
+
 MMIO WRITE BARRIER
 ------------------
 
@@ -2075,7 +2076,7 @@
 anything at all - especially with respect to I/O accesses - unless combined
 with interrupt disabling operations.
 
-See also the section on "Inter-CPU locking barrier effects".
+See also the section on "Inter-CPU acquiring barrier effects".
 
 
 As an example, consider the following:
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index 415154a..a7697783 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -74,6 +74,8 @@
 	- The DNS resolver module allows kernel servies to make DNS queries.
 driver.txt
 	- Softnet driver issues.
+ena.txt
+	- info on Amazon's Elastic Network Adapter (ENA)
 e100.txt
 	- info on Intel's EtherExpress PRO/100 line of 10/100 boards
 e1000.txt
diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt
index 1b5e7a7..8a8d3d9 100644
--- a/Documentation/networking/batman-adv.txt
+++ b/Documentation/networking/batman-adv.txt
@@ -43,10 +43,15 @@
 reload the module if you plug your USB wifi adapter into your ma-
 chine after batman advanced was initially loaded.
 
-To activate a  given  interface  simply  write  "bat0"  into  its
-"mesh_iface" file inside the batman_adv subfolder:
+The batman-adv soft-interface can be created using  the  iproute2
+tool "ip"
 
-# echo bat0 > /sys/class/net/eth0/batman_adv/mesh_iface
+# ip link add name bat0 type batadv
+
+To  activate a  given  interface  simply  attach it to the "bat0"
+interface
+
+# ip link set dev eth0 master bat0
 
 Repeat  this step for all interfaces you wish to add.  Now batman
 starts using/broadcasting on this/these interface(s).
@@ -56,10 +61,10 @@
 # cat /sys/class/net/eth0/batman_adv/iface_status
 # active
 
-To deactivate an interface you have  to  write  "none"  into  its
-"mesh_iface" file:
+To  deactivate  an  interface  you  have   to  detach it from the
+"bat0" interface:
 
-# echo none > /sys/class/net/eth0/batman_adv/mesh_iface
+# ip link set dev eth0 nomaster
 
 
 All  mesh  wide  settings  can be found in batman's own interface
diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt
index 9d05ed7..6d6c07c 100644
--- a/Documentation/networking/dsa/dsa.txt
+++ b/Documentation/networking/dsa/dsa.txt
@@ -227,9 +227,9 @@
 
 dsa_switch: structure describing a switch device in the tree, referencing a
 dsa_switch_tree as a backpointer, slave network devices, master network device,
-and a reference to the backing dsa_switch_driver
+and a reference to the backing dsa_switch_ops
 
-dsa_switch_driver: structure referencing function pointers, see below for a full
+dsa_switch_ops: structure referencing function pointers, see below for a full
 description.
 
 Design limitations
@@ -357,10 +357,10 @@
 Driver development
 ==================
 
-DSA switch drivers need to implement a dsa_switch_driver structure which will
+DSA switch drivers need to implement a dsa_switch_ops structure which will
 contain the various members described below.
 
-register_switch_driver() registers this dsa_switch_driver in its internal list
+register_switch_driver() registers this dsa_switch_ops in its internal list
 of drivers to probe for. unregister_switch_driver() does the exact opposite.
 
 Unless requested differently by setting the priv_size member accordingly, DSA
@@ -379,7 +379,7 @@
   buses, return a non-NULL string
 
 - setup: setup function for the switch, this function is responsible for setting
-  up the dsa_switch_driver private structure with all it needs: register maps,
+  up the dsa_switch_ops private structure with all it needs: register maps,
   interrupts, mutexes, locks etc.. This function is also expected to properly
   configure the switch to separate all network interfaces from each other, that
   is, they should be isolated by the switch hardware itself, typically by creating
@@ -584,29 +584,32 @@
   function that the driver has to call for each MAC address known to be behind
   the given port. A switchdev object is used to carry the VID and FDB info.
 
+- port_mdb_prepare: bridge layer function invoked when the bridge prepares the
+  installation of a multicast database entry. If the operation is not supported,
+  this function should return -EOPNOTSUPP to inform the bridge code to fallback
+  to a software implementation. No hardware setup must be done in this function.
+  See port_fdb_add for this and details.
+
+- port_mdb_add: bridge layer function invoked when the bridge wants to install
+  a multicast database entry, the switch hardware should be programmed with the
+  specified address in the specified VLAN ID in the forwarding database
+  associated with this VLAN ID.
+
+Note: VLAN ID 0 corresponds to the port private database, which, in the context
+of DSA, would be the its port-based VLAN, used by the associated bridge device.
+
+- port_mdb_del: bridge layer function invoked when the bridge wants to remove a
+  multicast database entry, the switch hardware should be programmed to delete
+  the specified MAC address from the specified VLAN ID if it was mapped into
+  this port forwarding database.
+
+- port_mdb_dump: bridge layer function invoked with a switchdev callback
+  function that the driver has to call for each MAC address known to be behind
+  the given port. A switchdev object is used to carry the VID and MDB info.
+
 TODO
 ====
 
-The platform device problem
----------------------------
-DSA is currently implemented as a platform device driver which is far from ideal
-as was discussed in this thread:
-
-http://permalink.gmane.org/gmane.linux.network/329848
-
-This basically prevents the device driver model to be properly used and applied,
-and support non-MDIO, non-MMIO Ethernet connected switches.
-
-Another problem with the platform device driver approach is that it prevents the
-use of a modular switch drivers build due to a circular dependency, illustrated
-here:
-
-http://comments.gmane.org/gmane.linux.network/345803
-
-Attempts of reworking this has been done here:
-
-https://lwn.net/Articles/643149/
-
 Making SWITCHDEV and DSA converge towards an unified codebase
 -------------------------------------------------------------
 
diff --git a/Documentation/networking/ena.txt b/Documentation/networking/ena.txt
new file mode 100644
index 0000000..2b4b6f5
--- /dev/null
+++ b/Documentation/networking/ena.txt
@@ -0,0 +1,305 @@
+Linux kernel driver for Elastic Network Adapter (ENA) family:
+=============================================================
+
+Overview:
+=========
+ENA is a networking interface designed to make good use of modern CPU
+features and system architectures.
+
+The ENA device exposes a lightweight management interface with a
+minimal set of memory mapped registers and extendable command set
+through an Admin Queue.
+
+The driver supports a range of ENA devices, is link-speed independent
+(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
+a negotiated and extendable feature set.
+
+Some ENA devices support SR-IOV. This driver is used for both the
+SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
+
+ENA devices enable high speed and low overhead network traffic
+processing by providing multiple Tx/Rx queue pairs (the maximum number
+is advertised by the device via the Admin Queue), a dedicated MSI-X
+interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation,
+and CPU cacheline optimized data placement.
+
+The ENA driver supports industry standard TCP/IP offload features such
+as checksum offload and TCP transmit segmentation offload (TSO).
+Receive-side scaling (RSS) is supported for multi-core scaling.
+
+The ENA driver and its corresponding devices implement health
+monitoring mechanisms such as watchdog, enabling the device and driver
+to recover in a manner transparent to the application, as well as
+debug logs.
+
+Some of the ENA devices support a working mode called Low-latency
+Queue (LLQ), which saves several more microseconds.
+
+Supported PCI vendor ID/device IDs:
+===================================
+1d0f:0ec2 - ENA PF
+1d0f:1ec2 - ENA PF with LLQ support
+1d0f:ec20 - ENA VF
+1d0f:ec21 - ENA VF with LLQ support
+
+ENA Source Code Directory Structure:
+====================================
+ena_com.[ch]      - Management communication layer. This layer is
+                    responsible for the handling all the management
+                    (admin) communication between the device and the
+                    driver.
+ena_eth_com.[ch]  - Tx/Rx data path.
+ena_admin_defs.h  - Definition of ENA management interface.
+ena_eth_io_defs.h - Definition of ENA data path interface.
+ena_common_defs.h - Common definitions for ena_com layer.
+ena_regs_defs.h   - Definition of ENA PCI memory-mapped (MMIO) registers.
+ena_netdev.[ch]   - Main Linux kernel driver.
+ena_syfsfs.[ch]   - Sysfs files.
+ena_ethtool.c     - ethtool callbacks.
+ena_pci_id_tbl.h  - Supported device IDs.
+
+Management Interface:
+=====================
+ENA management interface is exposed by means of:
+- PCIe Configuration Space
+- Device Registers
+- Admin Queue (AQ) and Admin Completion Queue (ACQ)
+- Asynchronous Event Notification Queue (AENQ)
+
+ENA device MMIO Registers are accessed only during driver
+initialization and are not involved in further normal device
+operation.
+
+AQ is used for submitting management commands, and the
+results/responses are reported asynchronously through ACQ.
+
+ENA introduces a very small set of management commands with room for
+vendor-specific extensions. Most of the management operations are
+framed in a generic Get/Set feature command.
+
+The following admin queue commands are supported:
+- Create I/O submission queue
+- Create I/O completion queue
+- Destroy I/O submission queue
+- Destroy I/O completion queue
+- Get feature
+- Set feature
+- Configure AENQ
+- Get statistics
+
+Refer to ena_admin_defs.h for the list of supported Get/Set Feature
+properties.
+
+The Asynchronous Event Notification Queue (AENQ) is a uni-directional
+queue used by the ENA device to send to the driver events that cannot
+be reported using ACQ. AENQ events are subdivided into groups. Each
+group may have multiple syndromes, as shown below
+
+The events are:
+	Group			Syndrome
+	Link state change	- X -
+	Fatal error		- X -
+	Notification		Suspend traffic
+	Notification		Resume traffic
+	Keep-Alive		- X -
+
+ACQ and AENQ share the same MSI-X vector.
+
+Keep-Alive is a special mechanism that allows monitoring of the
+device's health. The driver maintains a watchdog (WD) handler which,
+if fired, logs the current state and statistics then resets and
+restarts the ENA device and driver. A Keep-Alive event is delivered by
+the device every second. The driver re-arms the WD upon reception of a
+Keep-Alive event. A missed Keep-Alive event causes the WD handler to
+fire.
+
+Data Path Interface:
+====================
+I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx
+SQ correspondingly). Each SQ has a completion queue (CQ) associated
+with it.
+
+The SQs and CQs are implemented as descriptor rings in contiguous
+physical memory.
+
+The ENA driver supports two Queue Operation modes for Tx SQs:
+- Regular mode
+  * In this mode the Tx SQs reside in the host's memory. The ENA
+    device fetches the ENA Tx descriptors and packet data from host
+    memory.
+- Low Latency Queue (LLQ) mode or "push-mode".
+  * In this mode the driver pushes the transmit descriptors and the
+    first 128 bytes of the packet directly to the ENA device memory
+    space. The rest of the packet payload is fetched by the
+    device. For this operation mode, the driver uses a dedicated PCI
+    device memory BAR, which is mapped with write-combine capability.
+
+The Rx SQs support only the regular mode.
+
+Note: Not all ENA devices support LLQ, and this feature is negotiated
+      with the device upon initialization. If the ENA device does not
+      support LLQ mode, the driver falls back to the regular mode.
+
+The driver supports multi-queue for both Tx and Rx. This has various
+benefits:
+- Reduced CPU/thread/process contention on a given Ethernet interface.
+- Cache miss rate on completion is reduced, particularly for data
+  cache lines that hold the sk_buff structures.
+- Increased process-level parallelism when handling received packets.
+- Increased data cache hit rate, by steering kernel processing of
+  packets to the CPU, where the application thread consuming the
+  packet is running.
+- In hardware interrupt re-direction.
+
+Interrupt Modes:
+================
+The driver assigns a single MSI-X vector per queue pair (for both Tx
+and Rx directions). The driver assigns an additional dedicated MSI-X vector
+for management (for ACQ and AENQ).
+
+Management interrupt registration is performed when the Linux kernel
+probes the adapter, and it is de-registered when the adapter is
+removed. I/O queue interrupt registration is performed when the Linux
+interface of the adapter is opened, and it is de-registered when the
+interface is closed.
+
+The management interrupt is named:
+   ena-mgmnt@pci:<PCI domain:bus:slot.function>
+and for each queue pair, an interrupt is named:
+   <interface name>-Tx-Rx-<queue index>
+
+The ENA device operates in auto-mask and auto-clear interrupt
+modes. That is, once MSI-X is delivered to the host, its Cause bit is
+automatically cleared and the interrupt is masked. The interrupt is
+unmasked by the driver after NAPI processing is complete.
+
+Interrupt Moderation:
+=====================
+ENA driver and device can operate in conventional or adaptive interrupt
+moderation mode.
+
+In conventional mode the driver instructs device to postpone interrupt
+posting according to static interrupt delay value. The interrupt delay
+value can be configured through ethtool(8). The following ethtool
+parameters are supported by the driver: tx-usecs, rx-usecs
+
+In adaptive interrupt moderation mode the interrupt delay value is
+updated by the driver dynamically and adjusted every NAPI cycle
+according to the traffic nature.
+
+By default ENA driver applies adaptive coalescing on Rx traffic and
+conventional coalescing on Tx traffic.
+
+Adaptive coalescing can be switched on/off through ethtool(8)
+adaptive_rx on|off parameter.
+
+The driver chooses interrupt delay value according to the number of
+bytes and packets received between interrupt unmasking and interrupt
+posting. The driver uses interrupt delay table that subdivides the
+range of received bytes/packets into 5 levels and assigns interrupt
+delay value to each level.
+
+The user can enable/disable adaptive moderation, modify the interrupt
+delay table and restore its default values through sysfs.
+
+The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
+and can be configured by the ETHTOOL_STUNABLE command of the
+SIOCETHTOOL ioctl.
+
+SKB:
+The driver-allocated SKB for frames received from Rx handling using
+NAPI context. The allocation method depends on the size of the packet.
+If the frame length is larger than rx_copybreak, napi_get_frags()
+is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer
+content is copied (by CPU) to the SKB, and the buffer is recycled.
+
+Statistics:
+===========
+The user can obtain ENA device and driver statistics using ethtool.
+The driver can collect regular or extended statistics (including
+per-queue stats) from the device.
+
+In addition the driver logs the stats to syslog upon device reset.
+
+MTU:
+====
+The driver supports an arbitrarily large MTU with a maximum that is
+negotiated with the device. The driver configures MTU using the
+SetFeature command (ENA_ADMIN_MTU property). The user can change MTU
+via ip(8) and similar legacy tools.
+
+Stateless Offloads:
+===================
+The ENA driver supports:
+- TSO over IPv4/IPv6
+- TSO with ECN
+- IPv4 header checksum offload
+- TCP/UDP over IPv4/IPv6 checksum offloads
+
+RSS:
+====
+- The ENA device supports RSS that allows flexible Rx traffic
+  steering.
+- Toeplitz and CRC32 hash functions are supported.
+- Different combinations of L2/L3/L4 fields can be configured as
+  inputs for hash functions.
+- The driver configures RSS settings using the AQ SetFeature command
+  (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and
+  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties).
+- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash
+  function delivered in the Rx CQ descriptor is set in the received
+  SKB.
+- The user can provide a hash key, hash function, and configure the
+  indirection table through ethtool(8).
+
+DATA PATH:
+==========
+Tx:
+---
+end_start_xmit() is called by the stack. This function does the following:
+- Maps data buffers (skb->data and frags).
+- Populates ena_buf for the push buffer (if the driver and device are
+  in push mode.)
+- Prepares ENA bufs for the remaining frags.
+- Allocates a new request ID from the empty req_id ring. The request
+  ID is the index of the packet in the Tx info. This is used for
+  out-of-order TX completions.
+- Adds the packet to the proper place in the Tx ring.
+- Calls ena_com_prepare_tx(), an ENA communication layer that converts
+  the ena_bufs to ENA descriptors (and adds meta ENA descriptors as
+  needed.)
+  * This function also copies the ENA descriptors and the push buffer
+    to the Device memory space (if in push mode.)
+- Writes doorbell to the ENA device.
+- When the ENA device finishes sending the packet, a completion
+  interrupt is raised.
+- The interrupt handler schedules NAPI.
+- The ena_clean_tx_irq() function is called. This function handles the
+  completion descriptors generated by the ENA, with a single
+  completion descriptor per completed packet.
+  * req_id is retrieved from the completion descriptor. The tx_info of
+    the packet is retrieved via the req_id. The data buffers are
+    unmapped and req_id is returned to the empty req_id ring.
+  * The function stops when the completion descriptors are completed or
+    the budget is reached.
+
+Rx:
+---
+- When a packet is received from the ENA device.
+- The interrupt handler schedules NAPI.
+- The ena_clean_rx_irq() function is called. This function calls
+  ena_rx_pkt(), an ENA communication layer function, which returns the
+  number of descriptors used for a new unhandled packet, and zero if
+  no new packet is found.
+- Then it calls the ena_clean_rx_irq() function.
+- ena_eth_rx_skb() checks packet length:
+  * If the packet is small (len < rx_copybreak), the driver allocates
+    a SKB for the new packet, and copies the packet payload into the
+    SKB data buffer.
+    - In this way the original data buffer is not passed to the stack
+      and is reused for future Rx packets.
+  * Otherwise the function unmaps the Rx buffer, then allocates the
+    new SKB structure and hooks the Rx buffer to the SKB frags.
+- The new SKB is updated with the necessary information (protocol,
+  checksum hw verify result, etc.), and then passed to the network
+  stack, using the NAPI interface function napi_gro_receive().
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 9ae9293..3db8c67d 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -575,32 +575,33 @@
 	unconditionally generation of syncookies.
 
 tcp_fastopen - INTEGER
-	Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data
-	in the opening SYN packet. To use this feature, the client application
-	must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than
-	connect() to perform a TCP handshake automatically.
+	Enable TCP Fast Open (RFC7413) to send and accept data in the opening
+	SYN packet.
+
+	The client support is enabled by flag 0x1 (on by default). The client
+	then must use sendmsg() or sendto() with the MSG_FASTOPEN flag,
+	rather than connect() to send data in SYN.
+
+	The server support is enabled by flag 0x2 (off by default). Then
+	either enable for all listeners with another flag (0x400) or
+	enable individual listeners via TCP_FASTOPEN socket option with
+	the option value being the length of the syn-data backlog.
 
 	The values (bitmap) are
-	1: Enables sending data in the opening SYN on the client w/ MSG_FASTOPEN.
-	2: Enables TCP Fast Open on the server side, i.e., allowing data in
-	   a SYN packet to be accepted and passed to the application before
-	   3-way hand shake finishes.
-	4: Send data in the opening SYN regardless of cookie availability and
-	   without a cookie option.
-	0x100: Accept SYN data w/o validating the cookie.
-	0x200: Accept data-in-SYN w/o any cookie option present.
-	0x400/0x800: Enable Fast Open on all listeners regardless of the
-	   TCP_FASTOPEN socket option. The two different flags designate two
-	   different ways of setting max_qlen without the TCP_FASTOPEN socket
-	   option.
+	  0x1: (client) enables sending data in the opening SYN on the client.
+	  0x2: (server) enables the server support, i.e., allowing data in
+			a SYN packet to be accepted and passed to the
+			application before 3-way handshake finishes.
+	  0x4: (client) send data in the opening SYN regardless of cookie
+			availability and without a cookie option.
+	0x200: (server) accept data-in-SYN w/o any cookie option present.
+	0x400: (server) enable all listeners to support Fast Open by
+			default without explicit TCP_FASTOPEN socket option.
 
-	Default: 1
+	Default: 0x1
 
-	Note that the client & server side Fast Open flags (1 and 2
-	respectively) must be also enabled before the rest of flags can take
-	effect.
-
-	See include/net/tcp.h and the code for more details.
+	Note that that additional client or server features are only
+	effective if the basic support (0x1 and 0x2) are enabled respectively.
 
 tcp_syn_retries - INTEGER
 	Number of times initial SYNs for an active TCP connection attempt
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
index 14422f8..24196ce 100644
--- a/Documentation/networking/ipvlan.txt
+++ b/Documentation/networking/ipvlan.txt
@@ -22,7 +22,7 @@
 	There are no module parameters for this driver and it can be configured
 using IProute2/ip utility.
 
-	ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 }
+	ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | l3 | l3s }
 
 	e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
 
@@ -48,6 +48,11 @@
 used before packets are queued on the outbound device. In this mode the slaves
 will not receive nor can send multicast / broadcast traffic.
 
+4.3 L3S mode:
+	This is very similar to the L3 mode except that iptables (conn-tracking)
+works in this mode and hence it is L3-symmetric (L3s). This will have slightly less
+performance but that shouldn't matter since you are choosing this mode over plain-L3
+mode to make conn-tracking work.
 
 5. What to choose (macvlan vs. ipvlan)?
 	These two devices are very similar in many regards and the specific use
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 16a924c..1b63bbc 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -725,7 +725,8 @@
 
  (*) End a client call.
 
-	void rxrpc_kernel_end_call(struct rxrpc_call *call);
+	void rxrpc_kernel_end_call(struct socket *sock,
+				   struct rxrpc_call *call);
 
      This is used to end a previously begun call.  The user_call_ID is expunged
      from AF_RXRPC's knowledge and will not be seen again in association with
@@ -733,7 +734,9 @@
 
  (*) Send data through a call.
 
-	int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
+	int rxrpc_kernel_send_data(struct socket *sock,
+				   struct rxrpc_call *call,
+				   struct msghdr *msg,
 				   size_t len);
 
      This is used to supply either the request part of a client call or the
@@ -745,9 +748,42 @@
      The msg must not specify a destination address, control data or any flags
      other than MSG_MORE.  len is the total amount of data to transmit.
 
+ (*) Receive data from a call.
+
+	int rxrpc_kernel_recv_data(struct socket *sock,
+				   struct rxrpc_call *call,
+				   void *buf,
+				   size_t size,
+				   size_t *_offset,
+				   bool want_more,
+				   u32 *_abort)
+
+      This is used to receive data from either the reply part of a client call
+      or the request part of a service call.  buf and size specify how much
+      data is desired and where to store it.  *_offset is added on to buf and
+      subtracted from size internally; the amount copied into the buffer is
+      added to *_offset before returning.
+
+      want_more should be true if further data will be required after this is
+      satisfied and false if this is the last item of the receive phase.
+
+      There are three normal returns: 0 if the buffer was filled and want_more
+      was true; 1 if the buffer was filled, the last DATA packet has been
+      emptied and want_more was false; and -EAGAIN if the function needs to be
+      called again.
+
+      If the last DATA packet is processed but the buffer contains less than
+      the amount requested, EBADMSG is returned.  If want_more wasn't set, but
+      more data was available, EMSGSIZE is returned.
+
+      If a remote ABORT is detected, the abort code received will be stored in
+      *_abort and ECONNABORTED will be returned.
+
  (*) Abort a call.
 
-	void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code);
+	void rxrpc_kernel_abort_call(struct socket *sock,
+				     struct rxrpc_call *call,
+				     u32 abort_code);
 
      This is used to abort a call if it's still in an abortable state.  The
      abort code specified will be placed in the ABORT message sent.
@@ -790,13 +826,12 @@
      Data messages can have their contents extracted with the usual bunch of
      socket buffer manipulation functions.  A data message can be determined to
      be the last one in a sequence with rxrpc_kernel_is_data_last().  When a
-     data message has been used up, rxrpc_kernel_data_delivered() should be
-     called on it..
+     data message has been used up, rxrpc_kernel_data_consumed() should be
+     called on it.
 
-     Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
-     of.  It is possible to get extra refs on all types of message for later
-     freeing, but this may pin the state of a call until the message is finally
-     freed.
+     Messages should be handled to rxrpc_kernel_free_skb() to dispose of.  It
+     is possible to get extra refs on all types of message for later freeing,
+     but this may pin the state of a call until the message is finally freed.
 
  (*) Accept an incoming call.
 
@@ -821,45 +856,6 @@
      Other errors may be returned if the call had been aborted (-ECONNABORTED)
      or had timed out (-ETIME).
 
- (*) Record the delivery of a data message and free it.
-
-	void rxrpc_kernel_data_delivered(struct sk_buff *skb);
-
-     This is used to record a data message as having been delivered and to
-     update the ACK state for the call.  The socket buffer will be freed.
-
- (*) Free a message.
-
-	void rxrpc_kernel_free_skb(struct sk_buff *skb);
-
-     This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC
-     socket.
-
- (*) Determine if a data message is the last one on a call.
-
-	bool rxrpc_kernel_is_data_last(struct sk_buff *skb);
-
-     This is used to determine if a socket buffer holds the last data message
-     to be received for a call (true will be returned if it does, false
-     if not).
-
-     The data message will be part of the reply on a client call and the
-     request on an incoming call.  In the latter case there will be more
-     messages, but in the former case there will not.
-
- (*) Get the abort code from an abort message.
-
-	u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb);
-
-     This is used to extract the abort code from a remote abort message.
-
- (*) Get the error number from a local or network error message.
-
-	int rxrpc_kernel_get_error_number(struct sk_buff *skb);
-
-     This is used to extract the error number from a message indicating either
-     a local error occurred or a network error occurred.
-
  (*) Allocate a null key for doing anonymous security.
 
 	struct key *rxrpc_get_null_key(const char *keyname);
@@ -867,6 +863,13 @@
      This is used to allocate a null RxRPC key that can be used to indicate
      anonymous security for a particular domain.
 
+ (*) Get the peer address of a call.
+
+	void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
+				   struct sockaddr_rxrpc *_srx);
+
+     This is used to find the remote peer address of a call.
+
 
 =======================
 CONFIGURABLE PARAMETERS
diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
new file mode 100644
index 0000000..a0bf573
--- /dev/null
+++ b/Documentation/networking/strparser.txt
@@ -0,0 +1,136 @@
+Stream Parser
+-------------
+
+The stream parser (strparser) is a utility that parses messages of an
+application layer protocol running over a TCP connection. The stream
+parser works in conjunction with an upper layer in the kernel to provide
+kernel support for application layer messages. For instance, Kernel
+Connection Multiplexor (KCM) uses the Stream Parser to parse messages
+using a BPF program.
+
+Interface
+---------
+
+The API includes a context structure, a set of callbacks, utility
+functions, and a data_ready function. The callbacks include
+a parse_msg function that is called to perform parsing (e.g.
+BPF parsing in case of KCM), and a rcv_msg function that is called
+when a full message has been completed.
+
+A stream parser can be instantiated for a TCP connection. This is done
+by:
+
+strp_init(struct strparser *strp, struct sock *csk,
+	  struct strp_callbacks *cb)
+
+strp is a struct of type strparser that is allocated by the upper layer.
+csk is the TCP socket associated with the stream parser. Callbacks are
+called by the stream parser.
+
+Callbacks
+---------
+
+There are four callbacks:
+
+int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
+
+    parse_msg is called to determine the length of the next message
+    in the stream. The upper layer must implement this function. It
+    should parse the sk_buff as containing the headers for the
+    next application layer messages in the stream.
+
+    The skb->cb in the input skb is a struct strp_rx_msg. Only
+    the offset field is relevant in parse_msg and gives the offset
+    where the message starts in the skb.
+
+    The return values of this function are:
+
+    >0 : indicates length of successfully parsed message
+    0  : indicates more data must be received to parse the message
+    -ESTRPIPE : current message should not be processed by the
+          kernel, return control of the socket to userspace which
+          can proceed to read the messages itself
+    other < 0 : Error is parsing, give control back to userspace
+          assuming that synchronization is lost and the stream
+          is unrecoverable (application expected to close TCP socket)
+
+    In the case that an error is returned (return value is less than
+    zero) the stream parser will set the error on TCP socket and wake
+    it up. If parse_msg returned -ESTRPIPE and the stream parser had
+    previously read some bytes for the current message, then the error
+    set on the attached socket is ENODATA since the stream is
+    unrecoverable in that case.
+
+void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+
+    rcv_msg is called when a full message has been received and
+    is queued. The callee must consume the sk_buff; it can
+    call strp_pause to prevent any further messages from being
+    received in rcv_msg (see strp_pause below). This callback
+    must be set.
+
+    The skb->cb in the input skb is a struct strp_rx_msg. This
+    struct contains two fields: offset and full_len. Offset is
+    where the message starts in the skb, and full_len is the
+    the length of the message. skb->len - offset may be greater
+    then full_len since strparser does not trim the skb.
+
+int (*read_sock_done)(struct strparser *strp, int err);
+
+     read_sock_done is called when the stream parser is done reading
+     the TCP socket. The stream parser may read multiple messages
+     in a loop and this function allows cleanup to occur when existing
+     the loop. If the callback is not set (NULL in strp_init) a
+     default function is used.
+
+void (*abort_parser)(struct strparser *strp, int err);
+
+     This function is called when stream parser encounters an error
+     in parsing. The default function stops the stream parser for the
+     TCP socket and sets the error in the socket. The default function
+     can be changed by setting the callback to non-NULL in strp_init.
+
+Functions
+---------
+
+The upper layer calls strp_tcp_data_ready when data is ready on the lower
+socket for strparser to process. This should be called from a data_ready
+callback that is set on the socket.
+
+strp_stop is called to completely stop stream parser operations. This
+is called internally when the stream parser encounters an error, and
+it is called from the upper layer when unattaching a TCP socket.
+
+strp_done is called to unattach the stream parser from the TCP socket.
+This must be called after the stream processor has be stopped.
+
+strp_check_rcv is called to check for new messages on the socket. This
+is normally called at initialization of the a stream parser instance
+of after strp_unpause.
+
+Statistics
+----------
+
+Various counters are kept for each stream parser for a TCP socket.
+These are in the strp_stats structure. strp_aggr_stats is a convenience
+structure for accumulating statistics for multiple stream parser
+instances. save_strp_stats and aggregate_strp_stats are helper functions
+to save and aggregate statistics.
+
+Message assembly limits
+-----------------------
+
+The stream parser provide mechanisms to limit the resources consumed by
+message assembly.
+
+A timer is set when assembly starts for a new message. The message
+timeout is taken from rcvtime for the associated TCP socket. If the
+timer fires before assembly completes the stream parser is aborted
+and the ETIMEDOUT error is set on the TCP socket.
+
+Message length is limited to the receive buffer size of the associated
+TCP socket. If the length returned by parse_msg is greater than
+the socket buffer size then the stream parser is aborted with
+EMSGSIZE error set on the TCP socket. Note that this makes the
+maximum size of receive skbuffs for a socket with a stream parser
+to be 2*sk_rcvbuf of the TCP socket.
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
index 31c3911..2bbac05 100644
--- a/Documentation/networking/switchdev.txt
+++ b/Documentation/networking/switchdev.txt
@@ -283,15 +283,10 @@
 bridge should not reflood the packet to the same ports the device flooded,
 otherwise there will be duplicate packets on the wire.
 
-To avoid duplicate packets, the device/driver should mark a packet as already
-forwarded using skb->offload_fwd_mark.  The same mark is set on the device
-ports in the domain using dev->offload_fwd_mark.  If the skb->offload_fwd_mark
-is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel
-will drop the skb right before transmit on the egress port, with the
-understanding that the device already forwarded the packet on same egress port.
-The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark
-for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and
-a group ifindex.
+To avoid duplicate packets, the switch driver should mark a packet as already
+forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark
+the skb using the ingress bridge port's mark and prevent it from being forwarded
+through any bridge port with the same mark.
 
 It is possible for the switch device to not handle flooding and push the
 packets up to the bridge driver for flooding.  This is not ideal as the number
@@ -319,30 +314,29 @@
 does a longest prefix match (LPM) on FIB entries matching route prefix and
 forwards the packet to the matching FIB entry's nexthop(s) egress ports.
 
-To program the device, the driver implements support for
-SWITCHDEV_OBJ_IPV[4|6]_FIB object using switchdev_port_obj_xxx ops.
-switchdev_port_obj_add is used for both adding a new FIB entry to the device,
-or modifying an existing entry on the device.
+To program the device, the driver has to register a FIB notifier handler
+using register_fib_notifier. The following events are available:
+FIB_EVENT_ENTRY_ADD: used for both adding a new FIB entry to the device,
+                     or modifying an existing entry on the device.
+FIB_EVENT_ENTRY_DEL: used for removing a FIB entry
+FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL: used to propagate FIB rule changes
 
-XXX: Currently, only SWITCHDEV_OBJ_ID_IPV4_FIB objects are supported.
+FIB_EVENT_ENTRY_ADD and FIB_EVENT_ENTRY_DEL events pass:
 
-SWITCHDEV_OBJ_ID_IPV4_FIB object passes:
-
-	struct switchdev_obj_ipv4_fib {         /* IPV4_FIB */
+	struct fib_entry_notifier_info {
+		struct fib_notifier_info info; /* must be first */
 		u32 dst;
 		int dst_len;
 		struct fib_info *fi;
 		u8 tos;
 		u8 type;
-		u32 nlflags;
 		u32 tb_id;
-	} ipv4_fib;
+		u32 nlflags;
+	};
 
 to add/modify/delete IPv4 dst/dest_len prefix on table tb_id.  The *fi
 structure holds details on the route and route's nexthops.  *dev is one of the
-port netdevs mentioned in the routes next hop list.  If the output port netdevs
-referenced in the route's nexthop list don't all have the same switch ID, the
-driver is not called to add/modify/delete the FIB entry.
+port netdevs mentioned in the route's next hop list.
 
 Routes offloaded to the device are labeled with "offload" in the ip route
 listing:
@@ -360,6 +354,8 @@
 	12.0.0.4 via 11.0.0.9 dev sw1p2  proto zebra  metric 20 offload
 	192.168.0.0/24 dev eth0  proto kernel  scope link  src 192.168.0.15
 
+The "offload" flag is set in case at least one device offloads the FIB entry.
+
 XXX: add/mod/del IPv6 FIB API
 
 Nexthop Resolution
diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt
index b96098c..708f87f 100644
--- a/Documentation/power/basic-pm-debugging.txt
+++ b/Documentation/power/basic-pm-debugging.txt
@@ -164,7 +164,32 @@
 Again, if you find the offending module(s), it(they) must be unloaded every time
 before hibernation, and please report the problem with it(them).
 
-c) Advanced debugging
+c) Using the "test_resume" hibernation option
+
+/sys/power/disk generally tells the kernel what to do after creating a
+hibernation image.  One of the available options is "test_resume" which
+causes the just created image to be used for immediate restoration.  Namely,
+after doing:
+
+# echo test_resume > /sys/power/disk
+# echo disk > /sys/power/state
+
+a hibernation image will be created and a resume from it will be triggered
+immediately without involving the platform firmware in any way.
+
+That test can be used to check if failures to resume from hibernation are
+related to bad interactions with the platform firmware.  That is, if the above
+works every time, but resume from actual hibernation does not work or is
+unreliable, the platform firmware may be responsible for the failures.
+
+On architectures and platforms that support using different kernels to restore
+hibernation images (that is, the kernel used to read the image from storage and
+load it into memory is different from the one included in the image) or support
+kernel address space randomization, it also can be used to check if failures
+to resume may be related to the differences between the restore and image
+kernels.
+
+d) Advanced debugging
 
 In case that hibernation does not work on your system even in the minimal
 configuration and compiling more drivers as modules is not practical or some
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index f1f0f59a..974916f 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -1,75 +1,76 @@
-Power Management Interface
+Power Management Interface for System Sleep
 
+Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
 
-The power management subsystem provides a unified sysfs interface to 
-userspace, regardless of what architecture or platform one is
-running. The interface exists in /sys/power/ directory (assuming sysfs
-is mounted at /sys). 
+The power management subsystem provides userspace with a unified sysfs interface
+for system sleep regardless of the underlying system architecture or platform.
+The interface is located in the /sys/power/ directory (assuming that sysfs is
+mounted at /sys).
 
-/sys/power/state controls system power state. Reading from this file
-returns what states are supported, which is hard-coded to 'freeze',
-'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk'
-(Suspend-to-Disk). 
+/sys/power/state is the system sleep state control file.
 
-Writing to this file one of those strings causes the system to
-transition into that state. Please see the file
-Documentation/power/states.txt for a description of each of those
-states.
+Reading from it returns a list of supported sleep states, encoded as:
 
+'freeze' (Suspend-to-Idle)
+'standby' (Power-On Suspend)
+'mem' (Suspend-to-RAM)
+'disk' (Suspend-to-Disk)
 
-/sys/power/disk controls the operating mode of the suspend-to-disk
-mechanism. Suspend-to-disk can be handled in several ways. We have a
-few options for putting the system to sleep - using the platform driver
-(e.g. ACPI or other suspend_ops), powering off the system or rebooting the
-system (for testing).
+Suspend-to-Idle is always supported.  Suspend-to-Disk is always supported
+too as long the kernel has been configured to support hibernation at all
+(ie. CONFIG_HIBERNATION is set in the kernel configuration file).  Support
+for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the
+platform.
 
-Additionally, /sys/power/disk can be used to turn on one of the two testing
-modes of the suspend-to-disk mechanism: 'testproc' or 'test'.  If the
-suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to
-/sys/power/state will cause the kernel to disable nonboot CPUs and freeze
-tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs.  If it is
-in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel
-to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait
-for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs.  Then,
-we are able to look in the log messages and work out, for example, which code
-is being slow and which device drivers are misbehaving.
+If one of the strings listed in /sys/power/state is written to it, the system
+will attempt to transition into the corresponding sleep state.  Refer to
+Documentation/power/states.txt for a description of each of those states.
 
-Reading from this file will display all supported modes and the currently
-selected one in brackets, for example
+/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk).
+Specifically, it tells the kernel what to do after creating a hibernation image.
 
-	[shutdown] reboot test testproc
+Reading from it returns a list of supported options encoded as:
 
-Writing to this file will accept one of
+'platform' (put the system into sleep using a platform-provided method)
+'shutdown' (shut the system down)
+'reboot' (reboot the system)
+'suspend' (trigger a Suspend-to-RAM transition)
+'test_resume' (resume-after-hibernation test mode)
 
-       'platform' (only if the platform supports it)
-       'shutdown'
-       'reboot'
-       'testproc'
-       'test'
+The currently selected option is printed in square brackets.
 
-/sys/power/image_size controls the size of the image created by
-the suspend-to-disk mechanism.  It can be written a string
-representing a non-negative integer that will be used as an upper
-limit of the image size, in bytes.  The suspend-to-disk mechanism will
-do its best to ensure the image size will not exceed that number.  However,
-if this turns out to be impossible, it will try to suspend anyway using the
-smallest image possible.  In particular, if "0" is written to this file, the
-suspend image will be as small as possible.
+The 'platform' option is only available if the platform provides a special
+mechanism to put the system to sleep after creating a hibernation image (ACPI
+does that, for example).  The 'suspend' option is available if Suspend-to-RAM
+is supported.  Refer to Documentation/power/basic_pm_debugging.txt for the
+description of the 'test_resume' option.
 
-Reading from this file will display the current image size limit, which
-is set to 2/5 of available RAM by default.
+To select an option, write the string representing it to /sys/power/disk.
 
-/sys/power/pm_trace controls the code which saves the last PM event point in
-the RTC across reboots, so that you can debug a machine that just hangs
-during suspend (or more commonly, during resume).  Namely, the RTC is only
-used to save the last PM event point if this file contains '1'.  Initially it
-contains '0' which may be changed to '1' by writing a string representing a
-nonzero integer into it.
+/sys/power/image_size controls the size of hibernation images.
 
-To use this debugging feature you should attempt to suspend the machine, then
-reboot it and run
+It can be written a string representing a non-negative integer that will be
+used as a best-effort upper limit of the image size, in bytes.  The hibernation
+core will do its best to ensure that the image size will not exceed that number.
+However, if that turns out to be impossible to achieve, a hibernation image will
+still be created and its size will be as small as possible.  In particular,
+writing '0' to this file will enforce hibernation images to be as small as
+possible.
 
-	dmesg -s 1000000 | grep 'hash matches'
+Reading from this file returns the current image size limit, which is set to
+around 2/5 of available RAM by default.
 
-CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
-set to a random invalid time after a resume.
+/sys/power/pm_trace controls the PM trace mechanism saving the last suspend
+or resume event point in the RTC across reboots.
+
+It helps to debug hard lockups or reboots due to device driver failures that
+occur during system suspend or resume (which is more common) more effectively.
+
+If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume
+event point in turn will be stored in the RTC memory (overwriting the actual
+RTC information), so it will survive a system crash if one occurs right after
+storing it and it can be used later to identify the driver that caused the crash
+to happen (see Documentation/power/s2ram.txt for more information).
+
+Initially it contains '0' which may be changed to '1' by writing a string
+representing a nonzero integer into it.
diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt
index ba0a2a4..e32fdbb 100644
--- a/Documentation/powerpc/transactional_memory.txt
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -167,6 +167,8 @@
 For signals taken in non-TM or suspended mode, we use the
 normal/non-checkpointed stack pointer.
 
+Any transaction initiated inside a sighandler and suspended on return
+from the sighandler to the kernel will get reclaimed and discarded.
 
 Failure cause codes used by kernel
 ==================================
diff --git a/Documentation/rapidio/mport_cdev.txt b/Documentation/rapidio/mport_cdev.txt
index 6e491a6..a53f786 100644
--- a/Documentation/rapidio/mport_cdev.txt
+++ b/Documentation/rapidio/mport_cdev.txt
@@ -80,6 +80,10 @@
 
 III. Module parameters
 
+- 'dma_timeout' - DMA transfer completion timeout (in msec, default value 3000).
+        This parameter set a maximum completion wait time for SYNC mode DMA
+        transfer requests and for RIO_WAIT_FOR_ASYNC ioctl requests.
+
 - 'dbg_level' - This parameter allows to control amount of debug information
         generated by this device driver. This parameter is formed by set of
         bit masks that correspond to the specific functional blocks.
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
index 53a2fe1..8e37b0b 100644
--- a/Documentation/scheduler/sched-deadline.txt
+++ b/Documentation/scheduler/sched-deadline.txt
@@ -16,6 +16,7 @@
    4.1 System-wide settings
    4.2 Task interface
    4.3 Default behavior
+   4.4 Behavior of sched_yield()
  5. Tasks CPU affinity
    5.1 SCHED_DEADLINE and cpusets HOWTO
  6. Future plans
@@ -426,6 +427,23 @@
  Finally, notice that in order not to jeopardize the admission control a
  -deadline task cannot fork.
 
+
+4.4 Behavior of sched_yield()
+-----------------------------
+
+ When a SCHED_DEADLINE task calls sched_yield(), it gives up its
+ remaining runtime and is immediately throttled, until the next
+ period, when its runtime will be replenished (a special flag
+ dl_yielded is set and used to handle correctly throttling and runtime
+ replenishment after a call to sched_yield()).
+
+ This behavior of sched_yield() allows the task to wake-up exactly at
+ the beginning of the next period. Also, this may be useful in the
+ future with bandwidth reclaiming mechanisms, where sched_yield() will
+ make the leftoever runtime available for reclamation by other
+ SCHED_DEADLINE tasks.
+
+
 5. Tasks CPU affinity
 =====================
 
diff --git a/Documentation/scsi/scsi-parameters.txt b/Documentation/scsi/scsi-parameters.txt
index 1241ac1..d5ae6ce 100644
--- a/Documentation/scsi/scsi-parameters.txt
+++ b/Documentation/scsi/scsi-parameters.txt
@@ -79,8 +79,6 @@
 
 	ncr53c8xx=	[HW,SCSI]
 
-	nodisconnect	[HW,SCSI,M68K] Disables SCSI disconnects.
-
 	osst=		[HW,SCSI] SCSI Tape Driver
 			Format: <buffer_size>,<write_threshold>
 			See also Documentation/scsi/st.txt.
diff --git a/Documentation/serial/serial-rs485.txt b/Documentation/serial/serial-rs485.txt
index 2253b8b..389fcd4 100644
--- a/Documentation/serial/serial-rs485.txt
+++ b/Documentation/serial/serial-rs485.txt
@@ -45,9 +45,8 @@
 
 	#include <linux/serial.h>
 
-	/* RS485 ioctls: */
-	#define TIOCGRS485      0x542E
-	#define TIOCSRS485      0x542F
+	/* Include definition for RS485 ioctls: TIOCGRS485 and TIOCSRS485 */
+	#include <sys/ioctl.h>
 
 	/* Open your specific device (e.g., /dev/mydevice): */
 	int fd = open ("/dev/mydevice", O_RDWR);
diff --git a/Documentation/sphinx-static/theme_overrides.css b/Documentation/sphinx-static/theme_overrides.css
index 3a2ac4b..d5764a4 100644
--- a/Documentation/sphinx-static/theme_overrides.css
+++ b/Documentation/sphinx-static/theme_overrides.css
@@ -42,11 +42,26 @@
     caption a.headerlink { opacity: 0; }
     caption a.headerlink:hover { opacity: 1; }
 
-    /* inline literal: drop the borderbox and red color */
+    /* Menu selection and keystrokes */
+
+    span.menuselection {
+	color: blue;
+	font-family: "Courier New", Courier, monospace
+    }
+
+    code.kbd, code.kbd span {
+	color: white;
+	background-color: darkblue;
+	font-weight: bold;
+	font-family: "Courier New", Courier, monospace
+    }
+
+    /* inline literal: drop the borderbox, padding and red color */
 
     code, .rst-content tt, .rst-content code {
         color: inherit;
         border: none;
+        padding: unset;
         background: inherit;
         font-size: 85%;
     }
@@ -54,5 +69,4 @@
     .rst-content tt.literal,.rst-content tt.literal,.rst-content code.literal {
         color: inherit;
     }
-
 }
diff --git a/Documentation/sphinx/cdomain.py b/Documentation/sphinx/cdomain.py
new file mode 100644
index 0000000..df0419c
--- /dev/null
+++ b/Documentation/sphinx/cdomain.py
@@ -0,0 +1,165 @@
+# -*- coding: utf-8; mode: python -*-
+# pylint: disable=W0141,C0113,C0103,C0325
+u"""
+    cdomain
+    ~~~~~~~
+
+    Replacement for the sphinx c-domain.
+
+    :copyright:  Copyright (C) 2016  Markus Heiser
+    :license:    GPL Version 2, June 1991 see Linux/COPYING for details.
+
+    List of customizations:
+
+    * Moved the *duplicate C object description* warnings for function
+      declarations in the nitpicky mode. See Sphinx documentation for
+      the config values for ``nitpick`` and ``nitpick_ignore``.
+
+    * Add option 'name' to the "c:function:" directive.  With option 'name' the
+      ref-name of a function can be modified. E.g.::
+
+          .. c:function:: int ioctl( int fd, int request )
+             :name: VIDIOC_LOG_STATUS
+
+      The func-name (e.g. ioctl) remains in the output but the ref-name changed
+      from 'ioctl' to 'VIDIOC_LOG_STATUS'. The function is referenced by::
+
+          * :c:func:`VIDIOC_LOG_STATUS` or
+          * :any:`VIDIOC_LOG_STATUS` (``:any:`` needs sphinx 1.3)
+
+     * Handle signatures of function-like macros well. Don't try to deduce
+       arguments types of function-like macros.
+
+"""
+
+from docutils import nodes
+from docutils.parsers.rst import directives
+
+import sphinx
+from sphinx import addnodes
+from sphinx.domains.c import c_funcptr_sig_re, c_sig_re
+from sphinx.domains.c import CObject as Base_CObject
+from sphinx.domains.c import CDomain as Base_CDomain
+
+__version__  = '1.0'
+
+# Get Sphinx version
+major, minor, patch = map(int, sphinx.__version__.split("."))
+
+def setup(app):
+
+    app.override_domain(CDomain)
+
+    return dict(
+        version = __version__,
+        parallel_read_safe = True,
+        parallel_write_safe = True
+    )
+
+class CObject(Base_CObject):
+
+    """
+    Description of a C language object.
+    """
+    option_spec = {
+        "name" : directives.unchanged
+    }
+
+    def handle_func_like_macro(self, sig, signode):
+        u"""Handles signatures of function-like macros.
+
+        If the objtype is 'function' and the the signature ``sig`` is a
+        function-like macro, the name of the macro is returned. Otherwise
+        ``False`` is returned.  """
+
+        if not self.objtype == 'function':
+            return False
+
+        m = c_funcptr_sig_re.match(sig)
+        if m is None:
+            m = c_sig_re.match(sig)
+            if m is None:
+                raise ValueError('no match')
+
+        rettype, fullname, arglist, _const = m.groups()
+        arglist = arglist.strip()
+        if rettype or not arglist:
+            return False
+
+        arglist = arglist.replace('`', '').replace('\\ ', '') # remove markup
+        arglist = [a.strip() for a in arglist.split(",")]
+
+        # has the first argument a type?
+        if len(arglist[0].split(" ")) > 1:
+            return False
+
+        # This is a function-like macro, it's arguments are typeless!
+        signode  += addnodes.desc_name(fullname, fullname)
+        paramlist = addnodes.desc_parameterlist()
+        signode  += paramlist
+
+        for argname in arglist:
+            param = addnodes.desc_parameter('', '', noemph=True)
+            # separate by non-breaking space in the output
+            param += nodes.emphasis(argname, argname)
+            paramlist += param
+
+        return fullname
+
+    def handle_signature(self, sig, signode):
+        """Transform a C signature into RST nodes."""
+
+        fullname = self.handle_func_like_macro(sig, signode)
+        if not fullname:
+            fullname = super(CObject, self).handle_signature(sig, signode)
+
+        if "name" in self.options:
+            if self.objtype == 'function':
+                fullname = self.options["name"]
+            else:
+                # FIXME: handle :name: value of other declaration types?
+                pass
+        return fullname
+
+    def add_target_and_index(self, name, sig, signode):
+        # for C API items we add a prefix since names are usually not qualified
+        # by a module name and so easily clash with e.g. section titles
+        targetname = 'c.' + name
+        if targetname not in self.state.document.ids:
+            signode['names'].append(targetname)
+            signode['ids'].append(targetname)
+            signode['first'] = (not self.names)
+            self.state.document.note_explicit_target(signode)
+            inv = self.env.domaindata['c']['objects']
+            if (name in inv and self.env.config.nitpicky):
+                if self.objtype == 'function':
+                    if ('c:func', name) not in self.env.config.nitpick_ignore:
+                        self.state_machine.reporter.warning(
+                            'duplicate C object description of %s, ' % name +
+                            'other instance in ' + self.env.doc2path(inv[name][0]),
+                            line=self.lineno)
+            inv[name] = (self.env.docname, self.objtype)
+
+        indextext = self.get_index_text(name)
+        if indextext:
+            if major == 1 and minor < 4:
+                # indexnode's tuple changed in 1.4
+                # https://github.com/sphinx-doc/sphinx/commit/e6a5a3a92e938fcd75866b4227db9e0524d58f7c
+                self.indexnode['entries'].append(
+                    ('single', indextext, targetname, ''))
+            else:
+                self.indexnode['entries'].append(
+                    ('single', indextext, targetname, '', None))
+
+class CDomain(Base_CDomain):
+
+    """C language domain."""
+    name = 'c'
+    label = 'C'
+    directives = {
+        'function': CObject,
+        'member':   CObject,
+        'macro':    CObject,
+        'type':     CObject,
+        'var':      CObject,
+    }
diff --git a/Documentation/sphinx/kernel-doc.py b/Documentation/sphinx/kernel-doc.py
index f6920c0..d15e07f 100644
--- a/Documentation/sphinx/kernel-doc.py
+++ b/Documentation/sphinx/kernel-doc.py
@@ -39,6 +39,8 @@
 from sphinx.util.compat import Directive
 from sphinx.ext.autodoc import AutodocReporter
 
+__version__  = '1.0'
+
 class KernelDocDirective(Directive):
     """Extract kernel-doc comments from the specified file"""
     required_argument = 1
@@ -139,3 +141,9 @@
     app.add_config_value('kerneldoc_verbosity', 1, 'env')
 
     app.add_directive('kernel-doc', KernelDocDirective)
+
+    return dict(
+        version = __version__,
+        parallel_read_safe = True,
+        parallel_write_safe = True
+    )
diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py
index db57382..f523aa6 100755
--- a/Documentation/sphinx/kernel_include.py
+++ b/Documentation/sphinx/kernel_include.py
@@ -39,11 +39,18 @@
 from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
 from docutils.parsers.rst.directives.misc import Include
 
+__version__  = '1.0'
+
 # ==============================================================================
 def setup(app):
 # ==============================================================================
 
     app.add_directive("kernel-include", KernelInclude)
+    return dict(
+        version = __version__,
+        parallel_read_safe = True,
+        parallel_write_safe = True
+    )
 
 # ==============================================================================
 class KernelInclude(Include):
diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py
new file mode 100644
index 0000000..301a21a
--- /dev/null
+++ b/Documentation/sphinx/load_config.py
@@ -0,0 +1,32 @@
+# -*- coding: utf-8; mode: python -*-
+# pylint: disable=R0903, C0330, R0914, R0912, E0401
+
+import os
+import sys
+from sphinx.util.pycompat import execfile_
+
+# ------------------------------------------------------------------------------
+def loadConfig(namespace):
+# ------------------------------------------------------------------------------
+
+    u"""Load an additional configuration file into *namespace*.
+
+    The name of the configuration file is taken from the environment
+    ``SPHINX_CONF``. The external configuration file extends (or overwrites) the
+    configuration values from the origin ``conf.py``.  With this you are able to
+    maintain *build themes*.  """
+
+    config_file = os.environ.get("SPHINX_CONF", None)
+    if (config_file is not None
+        and os.path.normpath(namespace["__file__"]) != os.path.normpath(config_file) ):
+        config_file = os.path.abspath(config_file)
+
+        if os.path.isfile(config_file):
+            sys.stdout.write("load additional sphinx-config: %s\n" % config_file)
+            config = namespace.copy()
+            config['__file__'] = config_file
+            execfile_(config_file, config)
+            del config['__file__']
+            namespace.update(config)
+        else:
+            sys.stderr.write("WARNING: additional sphinx-config not found: %s\n" % config_file)
diff --git a/Documentation/sphinx/parse-headers.pl b/Documentation/sphinx/parse-headers.pl
index 34bd9e2..74089b0 100755
--- a/Documentation/sphinx/parse-headers.pl
+++ b/Documentation/sphinx/parse-headers.pl
@@ -220,7 +220,7 @@
 #
 # Add escape codes for special characters
 #
-$data =~ s,([\_\`\*\<\>\&\\\\:\/\|]),\\$1,g;
+$data =~ s,([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^]),\\$1,g;
 
 $data =~ s,DEPRECATED,**DEPRECATED**,g;
 
diff --git a/Documentation/sphinx/rstFlatTable.py b/Documentation/sphinx/rstFlatTable.py
old mode 100644
new mode 100755
index 26db852..55f2757
--- a/Documentation/sphinx/rstFlatTable.py
+++ b/Documentation/sphinx/rstFlatTable.py
@@ -73,6 +73,12 @@
     roles.register_local_role('cspan', c_span)
     roles.register_local_role('rspan', r_span)
 
+    return dict(
+        version = __version__,
+        parallel_read_safe = True,
+        parallel_write_safe = True
+    )
+
 # ==============================================================================
 def c_span(name, rawtext, text, lineno, inliner, options=None, content=None):
 # ==============================================================================
diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt
index db3be89..24f5aee 100644
--- a/Documentation/stable_api_nonsense.txt
+++ b/Documentation/stable_api_nonsense.txt
@@ -1,17 +1,26 @@
+.. _stable_api_nonsense:
+
 The Linux Kernel Driver Interface
+==================================
+
 (all of your questions answered and then some)
 
 Greg Kroah-Hartman <greg@kroah.com>
 
-This is being written to try to explain why Linux does not have a binary
-kernel interface, nor does it have a stable kernel interface.  Please
-realize that this article describes the _in kernel_ interfaces, not the
-kernel to userspace interfaces.  The kernel to userspace interface is
-the one that application programs use, the syscall interface.  That
-interface is _very_ stable over time, and will not break.  I have old
-programs that were built on a pre 0.9something kernel that still work
-just fine on the latest 2.6 kernel release.  That interface is the one
-that users and application programmers can count on being stable.
+This is being written to try to explain why Linux **does not have a binary
+kernel interface, nor does it have a stable kernel interface**.
+
+.. note::
+
+  Please realize that this article describes the **in kernel** interfaces, not
+  the kernel to userspace interfaces.
+
+  The kernel to userspace interface is the one that application programs use,
+  the syscall interface.  That interface is **very** stable over time, and
+  will not break.  I have old programs that were built on a pre 0.9something
+  kernel that still work just fine on the latest 2.6 kernel release.
+  That interface is the one that users and application programmers can count
+  on being stable.
 
 
 Executive Summary
@@ -33,7 +42,7 @@
 the world, they neither see this interface, nor do they care about it at
 all.
 
-First off, I'm not going to address _any_ legal issues about closed
+First off, I'm not going to address **any** legal issues about closed
 source, hidden source, binary blobs, source wrappers, or any other term
 that describes kernel drivers that do not have their source code
 released under the GPL.  Please consult a lawyer if you have any legal
@@ -51,19 +60,23 @@
 Assuming that we had a stable kernel source interface for the kernel, a
 binary interface would naturally happen too, right?  Wrong.  Please
 consider the following facts about the Linux kernel:
+
   - Depending on the version of the C compiler you use, different kernel
     data structures will contain different alignment of structures, and
     possibly include different functions in different ways (putting
     functions inline or not.)  The individual function organization
     isn't that important, but the different data structure padding is
     very important.
+
   - Depending on what kernel build options you select, a wide range of
     different things can be assumed by the kernel:
+
       - different structures can contain different fields
       - Some functions may not be implemented at all, (i.e. some locks
 	compile away to nothing for non-SMP builds.)
       - Memory within the kernel can be aligned in different ways,
 	depending on the build options.
+
   - Linux runs on a wide range of different processor architectures.
     There is no way that binary drivers from one architecture will run
     on another architecture properly.
@@ -105,6 +118,7 @@
 undergone at least three different reworks over the lifetime of this
 subsystem.  These reworks were done to address a number of different
 issues:
+
   - A change from a synchronous model of data streams to an asynchronous
     one.  This reduced the complexity of a number of drivers and
     increased the throughput of all USB drivers such that we are now
@@ -166,6 +180,7 @@
 
 The very good side effects of having your driver in the main kernel tree
 are:
+
   - The quality of the driver will rise as the maintenance costs (to the
     original developer) will decrease.
   - Other developers will add features to your driver.
@@ -175,7 +190,7 @@
     changes require it.
   - The driver automatically gets shipped in all Linux distributions
     without having to ask the distros to add it.
-    
+
 As Linux supports a larger number of different devices "out of the box"
 than any other operating system, and it supports these devices on more
 different processor architectures than any other operating system, this
diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
index ffd4575..4d82e31 100644
--- a/Documentation/stable_kernel_rules.txt
+++ b/Documentation/stable_kernel_rules.txt
@@ -1,4 +1,7 @@
-Everything you ever wanted to know about Linux -stable releases.
+.. _stable_kernel_rules:
+
+Everything you ever wanted to know about Linux -stable releases
+===============================================================
 
 Rules on what kind of patches are accepted, and which ones are not, into the
 "-stable" tree:
@@ -23,68 +26,94 @@
    race can be exploited is also provided.
  - It cannot contain any "trivial" fixes in it (spelling changes,
    whitespace cleanups, etc).
- - It must follow the Documentation/SubmittingPatches rules.
+ - It must follow the
+   :ref:`Documentation/SubmittingPatches <submittingpatches>`
+   rules.
  - It or an equivalent fix must already exist in Linus' tree (upstream).
 
 
-Procedure for submitting patches to the -stable tree:
+Procedure for submitting patches to the -stable tree
+----------------------------------------------------
 
  - If the patch covers files in net/ or drivers/net please follow netdev stable
    submission guidelines as described in
    Documentation/networking/netdev-FAQ.txt
  - Security patches should not be handled (solely) by the -stable review
-   process but should follow the procedures in Documentation/SecurityBugs.
+   process but should follow the procedures in
+   :ref:`Documentation/SecurityBugs <securitybugs>`.
 
-For all other submissions, choose one of the following procedures:
+For all other submissions, choose one of the following procedures
+-----------------------------------------------------------------
 
-   --- Option 1 ---
+.. _option_1:
 
-   To have the patch automatically included in the stable tree, add the tag
+Option 1
+********
+
+To have the patch automatically included in the stable tree, add the tag
+
+.. code-block:: none
+
      Cc: stable@vger.kernel.org
-   in the sign-off area. Once the patch is merged it will be applied to
-   the stable tree without anything else needing to be done by the author
-   or subsystem maintainer.
 
-   --- Option 2 ---
+in the sign-off area. Once the patch is merged it will be applied to
+the stable tree without anything else needing to be done by the author
+or subsystem maintainer.
 
-   After the patch has been merged to Linus' tree, send an email to
-   stable@vger.kernel.org containing the subject of the patch, the commit ID,
-   why you think it should be applied, and what kernel version you wish it to
-   be applied to.
+.. _option_2:
 
-   --- Option 3 ---
+Option 2
+********
 
-   Send the patch, after verifying that it follows the above rules, to
-   stable@vger.kernel.org.  You must note the upstream commit ID in the
-   changelog of your submission, as well as the kernel version you wish
-   it to be applied to.
+After the patch has been merged to Linus' tree, send an email to
+stable@vger.kernel.org containing the subject of the patch, the commit ID,
+why you think it should be applied, and what kernel version you wish it to
+be applied to.
 
-Option 1 is *strongly* preferred, is the easiest and most common.  Options 2 and
-3 are more useful if the patch isn't deemed worthy at the time it is applied to
-a public git tree (for instance, because it deserves more regression testing
-first).  Option 3 is especially useful if the patch needs some special handling
-to apply to an older kernel (e.g., if API's have changed in the meantime).
+.. _option_3:
 
-Note that for Option 3, if the patch deviates from the original upstream patch
-(for example because it had to be backported) this must be very clearly
-documented and justified in the patch description.
+Option 3
+********
+
+Send the patch, after verifying that it follows the above rules, to
+stable@vger.kernel.org.  You must note the upstream commit ID in the
+changelog of your submission, as well as the kernel version you wish
+it to be applied to.
+
+:ref:`option_1` is **strongly** preferred, is the easiest and most common.
+:ref:`option_2` and :ref:`option_3` are more useful if the patch isn't deemed
+worthy at the time it is applied to a public git tree (for instance, because
+it deserves more regression testing first).  :ref:`option_3` is especially
+useful if the patch needs some special handling to apply to an older kernel
+(e.g., if API's have changed in the meantime).
+
+Note that for :ref:`option_3`, if the patch deviates from the original
+upstream patch (for example because it had to be backported) this must be very
+clearly documented and justified in the patch description.
 
 The upstream commit ID must be specified with a separate line above the commit
 text, like this:
 
+.. code-block:: none
+
     commit <sha1> upstream.
 
 Additionally, some patches submitted via Option 1 may have additional patch
 prerequisites which can be cherry-picked. This can be specified in the following
 format in the sign-off area:
 
+.. code-block:: none
+
      Cc: <stable@vger.kernel.org> # 3.3.x: a1f84a3: sched: Check for idle
      Cc: <stable@vger.kernel.org> # 3.3.x: 1b9508f: sched: Rate-limit newidle
      Cc: <stable@vger.kernel.org> # 3.3.x: fd21073: sched: Fix affinity logic
      Cc: <stable@vger.kernel.org> # 3.3.x
-    Signed-off-by: Ingo Molnar <mingo@elte.hu>
+     Signed-off-by: Ingo Molnar <mingo@elte.hu>
 
-   The tag sequence has the meaning of:
+The tag sequence has the meaning of:
+
+.. code-block:: none
+
      git cherry-pick a1f84a3
      git cherry-pick 1b9508f
      git cherry-pick fd21073
@@ -93,12 +122,17 @@
 Also, some patches may have kernel version prerequisites.  This can be
 specified in the following format in the sign-off area:
 
+.. code-block:: none
+
      Cc: <stable@vger.kernel.org> # 3.3.x-
 
-   The tag has the meaning of:
+The tag has the meaning of:
+
+.. code-block:: none
+
      git cherry-pick <this commit>
 
-   For each "-stable" tree starting with the specified version.
+For each "-stable" tree starting with the specified version.
 
 Following the submission:
 
@@ -109,7 +143,8 @@
    other developers and by the relevant subsystem maintainer.
 
 
-Review cycle:
+Review cycle
+------------
 
  - When the -stable maintainers decide for a review cycle, the patches will be
    sent to the review committee, and the maintainer of the affected area of
@@ -125,17 +160,22 @@
    security kernel team, and not go through the normal review cycle.
    Contact the kernel security team for more details on this procedure.
 
-Trees:
+Trees
+-----
 
  - The queues of patches, for both completed versions and in progress
    versions can be found at:
+
 	http://git.kernel.org/?p=linux/kernel/git/stable/stable-queue.git
+
  - The finalized and tagged releases of all stable kernels can be found
    in separate branches per version at:
+
 	http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git
 
 
-Review committee:
+Review committee
+----------------
 
  - This is made up of a number of kernel developers who have volunteered for
    this task, and a few that haven't.
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
index 477927b..ea8d7b4 100644
--- a/Documentation/static-keys.txt
+++ b/Documentation/static-keys.txt
@@ -15,6 +15,8 @@
 
 DEFINE_STATIC_KEY_TRUE(key);
 DEFINE_STATIC_KEY_FALSE(key);
+DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
+DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
 static_branch_likely()
 static_branch_unlikely()
 
@@ -140,6 +142,13 @@
 key is initialized false, a 'static_branch_inc()', will change the branch to
 true. And then a 'static_branch_dec()', will again make the branch false.
 
+Where an array of keys is required, it can be defined as:
+
+	DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
+
+or:
+
+	DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
 
 4) Architecture level code patching interface, 'jump labels'
 
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index dd5f916..a273dd0 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -203,6 +203,17 @@
 
 Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer.
 
+HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+--------------------------------
+
+An arch may pass in a pointer to the return address on the stack.  This
+prevents potential stack unwinding issues where the unwinder gets out of
+sync with ret_stack and the wrong addresses are reported by
+ftrace_graph_ret_addr().
+
+Adding support for it is easy: just define the macro in asm/ftrace.h and
+pass the return address pointer as the 'retp' argument to
+ftrace_push_return_trace().
 
 HAVE_FTRACE_NMI_ENTER
 ---------------------
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index ea52ec1..e4991fb 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -44,8 +44,8 @@
   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
-		  (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
-		  are supported.
+		  (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
+		  (x8/x16/x32/x64), "string" and bitfield are supported.
 
   (*) only for return probe.
   (**) this is useful for fetching a field of data structures.
@@ -54,7 +54,10 @@
 -----
 Several types are supported for fetch-args. Kprobe tracer will access memory
 by given type. Prefix 's' and 'u' means those types are signed and unsigned
-respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+respectively. 'x' prefix implies it is unsigned. Traced arguments are shown
+in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32'
+or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and
+x86-64 uses x64).
 String type is a special type, which fetches a "null-terminated" string from
 kernel space. This means it will fail and store NULL if the string container
 has been paged out.
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index 72d1cd4..94b6b45 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -40,8 +40,8 @@
    +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
    NAME=FETCHARG     : Set NAME as the argument name of FETCHARG.
    FETCHARG:TYPE     : Set TYPE as the type of FETCHARG. Currently, basic types
-		       (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
-		       are supported.
+		       (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
+		       (x8/x16/x32/x64), "string" and bitfield are supported.
 
   (*) only for return probe.
   (**) this is useful for fetching a field of data structures.
@@ -50,7 +50,10 @@
 -----
 Several types are supported for fetch-args. Uprobe tracer will access memory
 by given type. Prefix 's' and 'u' means those types are signed and unsigned
-respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+respectively. 'x' prefix implies it is unsigned. Traced arguments are shown
+in decimal ('s' and 'u') or hexadecimal ('x'). Without type casting, 'x32'
+or 'x64' is used depends on the architecture (e.g. x86-32 uses x32, and
+x86-64 uses x64).
 String type is a special type, which fetches a "null-terminated" string from
 user space.
 Bitfield is another special type, which takes 3 parameters, bit-width, bit-
diff --git a/Documentation/vme_api.txt b/Documentation/vme_api.txt
index ca5b827..9000655 100644
--- a/Documentation/vme_api.txt
+++ b/Documentation/vme_api.txt
@@ -8,13 +8,14 @@
 with the VME subsystem, typically called from the devices init routine.  This is
 achieved via a call to the following function:
 
-	int vme_register_driver (struct vme_driver *driver);
+	int vme_register_driver (struct vme_driver *driver, unsigned int ndevs);
 
 If driver registration is successful this function returns zero, if an error
 occurred a negative error code will be returned.
 
 A pointer to a structure of type 'vme_driver' must be provided to the
-registration function. The structure is as follows:
+registration function. Along with ndevs, which is the number of devices your
+driver is able to support. The structure is as follows:
 
 	struct vme_driver {
 		struct list_head node;
@@ -32,8 +33,8 @@
 should be correctly set. The '.name' element is a pointer to a string holding
 the device driver's name.
 
-The '.match' function allows controlling the number of devices that need to
-be registered. The match function should return 1 if a device should be
+The '.match' function allows control over which VME devices should be registered
+with the driver. The match function should return 1 if a device should be
 probed and 0 otherwise. This example match function (from vme_user.c) limits
 the number of devices probed to one:
 
@@ -385,13 +386,13 @@
 adjacent locations:
 
 	int vme_lm_attach(struct vme_resource *res, int num,
-		void (*callback)(int));
+		void (*callback)(void *));
 
 	int vme_lm_detach(struct vme_resource *res, int num);
 
 The callback function is declared as follows.
 
-	void callback(int num);
+	void callback(void *data);
 
 
 Slot Detection
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 8c7dd595..5724092 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -12,13 +12,13 @@
 ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
 ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
-ffffec0000000000 - fffffc0000000000 (=44 bits) kasan shadow memory (16TB)
+ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
 ... unused hole ...
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
-ffffffef00000000 - ffffffff00000000 (=64 GB) EFI region mapping space
+ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
 ... unused hole ...
-ffffffff80000000 - ffffffffa0000000 (=512 MB)  kernel text mapping, from phys 0
+ffffffff80000000 - ffffffff9fffffff (=512 MB)  kernel text mapping, from phys 0
 ffffffffa0000000 - ffffffffff5fffff (=1526 MB) module mapping space
 ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
diff --git a/MAINTAINERS b/MAINTAINERS
index 20bb1d0..f4b944c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -636,6 +636,15 @@
 F:	include/linux/altera_uart.h
 F:	include/linux/altera_jtaguart.h
 
+AMAZON ETHERNET DRIVERS
+M:	Netanel Belgazal <netanel@annapurnalabs.com>
+R:	Saeed Bishara <saeed@annapurnalabs.com>
+R:	Zorik Machulsky <zorik@annapurnalabs.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	Documentation/networking/ena.txt
+F:	drivers/net/ethernet/amazon/
+
 AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
 M:	Tom Lendacky <thomas.lendacky@amd.com>
 M:	Gary Hook <gary.hook@amd.com>
@@ -798,6 +807,7 @@
 M:	Sumit Semwal <sumit.semwal@linaro.org>
 L:	devel@driverdev.osuosl.org
 S:	Supported
+F:	Documentation/devicetree/bindings/staging/ion/
 F:	drivers/staging/android/ion
 F:	drivers/staging/android/uapi/ion.h
 F:	drivers/staging/android/uapi/ion_test.h
@@ -881,6 +891,15 @@
 F:	drivers/gpu/drm/arc/
 F:	Documentation/devicetree/bindings/display/snps,arcpgu.txt
 
+ARM ARCHITECTED TIMER DRIVER
+M:	Mark Rutland <mark.rutland@arm.com>
+M:	Marc Zyngier <marc.zyngier@arm.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm/include/asm/arch_timer.h
+F:	arch/arm64/include/asm/arch_timer.h
+F:	drivers/clocksource/arm_arch_timer.c
+
 ARM HDLCD DRM DRIVER
 M:	Liviu Dudau <liviu.dudau@arm.com>
 S:	Supported
@@ -903,15 +922,17 @@
 
 ARM PMU PROFILING AND DEBUGGING
 M:	Will Deacon <will.deacon@arm.com>
-R:	Mark Rutland <mark.rutland@arm.com>
+M:	Mark Rutland <mark.rutland@arm.com>
 S:	Maintained
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 F:	arch/arm*/kernel/perf_*
 F:	arch/arm/oprofile/common.c
 F:	arch/arm*/kernel/hw_breakpoint.c
 F:	arch/arm*/include/asm/hw_breakpoint.h
 F:	arch/arm*/include/asm/perf_event.h
-F:	drivers/perf/arm_pmu.c
+F:	drivers/perf/*
 F:	include/linux/perf/arm_pmu.h
+F:	Documentation/devicetree/bindings/arm/pmu.txt
 
 ARM PORT
 M:	Russell King <linux@armlinux.org.uk>
@@ -1004,6 +1025,7 @@
 ARM/Annapurna Labs ALPINE ARCHITECTURE
 M:	Tsahee Zidenberg <tsahee@annapurnalabs.com>
 M:	Antoine Tenart <antoine.tenart@free-electrons.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm/mach-alpine/
 F:	arch/arm/boot/dts/alpine*
@@ -1112,6 +1134,11 @@
 F:	Documentation/trace/coresight.txt
 F:	Documentation/devicetree/bindings/arm/coresight.txt
 F:	Documentation/ABI/testing/sysfs-bus-coresight-devices-*
+F:	tools/perf/arch/arm/util/pmu.c
+F:	tools/perf/arch/arm/util/auxtrace.c
+F:	tools/perf/arch/arm/util/cs-etm.c
+F:	tools/perf/arch/arm/util/cs-etm.h
+F:	tools/perf/util/cs-etm.h
 
 ARM/CORGI MACHINE SUPPORT
 M:	Richard Purdie <rpurdie@rpsys.net>
@@ -1613,7 +1640,8 @@
 
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M:	Kukjin Kim <kgene@kernel.org>
-M:	Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:	Krzysztof Kozlowski <krzk@kernel.org>
+R:	Javier Martinez Canillas <javier@osg.samsung.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 S:	Maintained
@@ -1633,7 +1661,6 @@
 F:	drivers/*/*s5pv210*
 F:	drivers/memory/samsung/*
 F:	drivers/soc/samsung/*
-F:	drivers/spi/spi-s3c*
 F:	Documentation/arm/Samsung/
 F:	Documentation/devicetree/bindings/arm/samsung/
 F:	Documentation/devicetree/bindings/sram/samsung-sram.txt
@@ -1821,6 +1848,7 @@
 ARM/UNIPHIER ARCHITECTURE
 M:	Masahiro Yamada <yamada.masahiro@socionext.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-uniphier.git
 S:	Maintained
 F:	arch/arm/boot/dts/uniphier*
 F:	arch/arm/include/asm/hardware/cache-uniphier.h
@@ -1828,6 +1856,7 @@
 F:	arch/arm/mm/cache-uniphier.c
 F:	arch/arm64/boot/dts/socionext/
 F:	drivers/bus/uniphier-system-bus.c
+F:	drivers/clk/uniphier/
 F:	drivers/i2c/busses/i2c-uniphier*
 F:	drivers/pinctrl/uniphier/
 F:	drivers/tty/serial/8250/8250_uniphier.c
@@ -2102,11 +2131,6 @@
 S:	Maintained
 F:	drivers/mmc/host/atmel-mci.c
 
-ATMEL AT91 / AT32 SERIAL DRIVER
-M:	Nicolas Ferre <nicolas.ferre@atmel.com>
-S:	Supported
-F:	drivers/tty/serial/atmel_serial.c
-
 ATMEL AT91 SAMA5D2-Compatible Shutdown Controller
 M:	Nicolas Ferre <nicolas.ferre@atmel.com>
 S:	Supported
@@ -2474,7 +2498,7 @@
 BONDING DRIVER
 M:	Jay Vosburgh <j.vosburgh@gmail.com>
 M:	Veaceslav Falico <vfalico@gmail.com>
-M:	Andy Gospodarek <gospo@cumulusnetworks.com>
+M:	Andy Gospodarek <andy@greyhouse.net>
 L:	netdev@vger.kernel.org
 W:	http://sourceforge.net/projects/bonding/
 S:	Supported
@@ -2489,7 +2513,7 @@
 F:	kernel/bpf/
 
 BROADCOM B44 10/100 ETHERNET DRIVER
-M:	Gary Zambrano <zambrano@broadcom.com>
+M:	Michael Chan <michael.chan@broadcom.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/broadcom/b44.*
@@ -3124,7 +3148,7 @@
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
 W:	http://coccinelle.lip6.fr/
 S:	Supported
-F:	Documentation/coccinelle.txt
+F:	Documentation/dev-tools/coccinelle.rst
 F:	scripts/coccinelle/
 F:	scripts/coccicheck
 
@@ -3150,6 +3174,7 @@
 M:	Michael Turquette <mturquette@baylibre.com>
 M:	Stephen Boyd <sboyd@codeaurora.org>
 L:	linux-clk@vger.kernel.org
+Q:	http://patchwork.kernel.org/project/linux-clk/list/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/clock/
@@ -3237,7 +3262,7 @@
 CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG)
 M:	Johannes Weiner <hannes@cmpxchg.org>
 M:	Michal Hocko <mhocko@kernel.org>
-M:	Vladimir Davydov <vdavydov@virtuozzo.com>
+M:	Vladimir Davydov <vdavydov.dev@gmail.com>
 L:	cgroups@vger.kernel.org
 L:	linux-mm@kvack.org
 S:	Maintained
@@ -3258,7 +3283,7 @@
 F:	drivers/net/wan/cosa*
 
 CPMAC ETHERNET DRIVER
-M:	Florian Fainelli <florian@openwrt.org>
+M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/ethernet/ti/cpmac.c
@@ -3270,6 +3295,7 @@
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
 T:	git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
+F:	Documentation/cpu-freq/
 F:	drivers/cpufreq/
 F:	include/linux/cpufreq.h
 
@@ -4393,7 +4419,6 @@
 F:	fs/ecryptfs/
 
 EDAC-CORE
-M:	Doug Thompson <dougthompson@xmission.com>
 M:	Borislav Petkov <bp@alien8.de>
 M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:	Mauro Carvalho Chehab <mchehab@kernel.org>
@@ -4406,14 +4431,12 @@
 F:	include/linux/edac.h
 
 EDAC-AMD64
-M:	Doug Thompson <dougthompson@xmission.com>
 M:	Borislav Petkov <bp@alien8.de>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/amd64_edac*
 
 EDAC-CALXEDA
-M:	Doug Thompson <dougthompson@xmission.com>
 M:	Robert Richter <rric@kernel.org>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
@@ -4429,17 +4452,21 @@
 
 EDAC-E752X
 M:	Mark Gross <mark.gross@intel.com>
-M:	Doug Thompson <dougthompson@xmission.com>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/e752x_edac.c
 
 EDAC-E7XXX
-M:	Doug Thompson <dougthompson@xmission.com>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/e7xxx_edac.c
 
+EDAC-FSL_DDR
+M:	York Sun <york.sun@nxp.com>
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/fsl_ddr_edac.*
+
 EDAC-GHES
 M:	Mauro Carvalho Chehab <mchehab@s-opensource.com>
 M:	Mauro Carvalho Chehab <mchehab@kernel.org>
@@ -4454,13 +4481,11 @@
 F:	drivers/edac/i82443bxgx_edac.c
 
 EDAC-I3000
-M:	Jason Uhlenkott <juhlenko@akamai.com>
 L:	linux-edac@vger.kernel.org
-S:	Maintained
+S:	Orphan
 F:	drivers/edac/i3000_edac.c
 
 EDAC-I5000
-M:	Doug Thompson <dougthompson@xmission.com>
 L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/i5000_edac.c
@@ -4524,6 +4549,12 @@
 S:	Maintained
 F:	drivers/edac/sb_edac.c
 
+EDAC-SKYLAKE
+M:	Tony Luck <tony.luck@intel.com>
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/skx_edac.c
+
 EDAC-XGENE
 APPLIED MICRO (APM) X-GENE SOC EDAC
 M:     Loc Ho <lho@apm.com>
@@ -4566,6 +4597,13 @@
 S:	Maintained
 F:	drivers/video/fbdev/efifb.c
 
+EFI TEST DRIVER
+L:	linux-efi@vger.kernel.org
+M:	Ivan Hu <ivan.hu@canonical.com>
+M:	Matt Fleming <matt@codeblueprint.co.uk>
+S:	Maintained
+F:	drivers/firmware/efi/test/
+
 EFS FILESYSTEM
 W:	http://aeschi.ch.eu.org/efs/
 S:	Orphan
@@ -4833,6 +4871,7 @@
 
 FIRMWARE LOADER (request_firmware)
 M:	Ming Lei <ming.lei@canonical.com>
+M:	Luis R. Rodriguez <mcgrof@kernel.org>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	Documentation/firmware_class/
@@ -5118,7 +5157,7 @@
 M:	Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
 S:	Maintained
 F:	kernel/gcov/
-F:	Documentation/gcov.txt
+F:	Documentation/dev-tools/gcov.rst
 
 GDT SCSI DISK ARRAY CONTROLLER DRIVER
 M:	Achim Leubner <achim_leubner@adaptec.com>
@@ -5566,10 +5605,9 @@
 
 HOST AP DRIVER
 M:	Jouni Malinen <j@w1.fi>
-L:	hostap@shmoo.com (subscribers-only)
 L:	linux-wireless@vger.kernel.org
-W:	http://hostap.epitest.fi/
-S:	Maintained
+W:	http://w1.fi/hostap-driver.html
+S:	Obsolete
 F:	drivers/net/wireless/intersil/hostap/
 
 HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER
@@ -5606,7 +5644,7 @@
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-hsi.git
 S:	Maintained
 F:	Documentation/ABI/testing/sysfs-bus-hsi
-F:	Documentation/hsi.txt
+F:	Documentation/device-drivers/serial-interfaces.rst
 F:	drivers/hsi/
 F:	include/linux/hsi/
 F:	include/uapi/linux/hsi/
@@ -6085,7 +6123,7 @@
 F:	drivers/cpufreq/intel_pstate.c
 
 INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
-M:	Maik Broemme <mbroemme@plusserver.de>
+M:	Maik Broemme <mbroemme@libmpq.org>
 L:	linux-fbdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/fb/intelfb.txt
@@ -6587,7 +6625,7 @@
 S:	Maintained
 F:	arch/*/include/asm/kasan.h
 F:	arch/*/mm/kasan_init*
-F:	Documentation/kasan.txt
+F:	Documentation/dev-tools/kasan.rst
 F:	include/linux/kasan*.h
 F:	lib/test_kasan.c
 F:	mm/kasan/
@@ -6803,7 +6841,7 @@
 M:	Vegard Nossum <vegardno@ifi.uio.no>
 M:	Pekka Enberg <penberg@kernel.org>
 S:	Maintained
-F:	Documentation/kmemcheck.txt
+F:	Documentation/dev-tools/kmemcheck.rst
 F:	arch/x86/include/asm/kmemcheck.h
 F:	arch/x86/mm/kmemcheck/
 F:	include/linux/kmemcheck.h
@@ -6812,7 +6850,7 @@
 KMEMLEAK
 M:	Catalin Marinas <catalin.marinas@arm.com>
 S:	Maintained
-F:	Documentation/kmemleak.txt
+F:	Documentation/dev-tools/kmemleak.rst
 F:	include/linux/kmemleak.h
 F:	mm/kmemleak.c
 F:	mm/kmemleak-test.c
@@ -7425,9 +7463,8 @@
 F:	drivers/hwmon/max20751.c
 
 MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER
-M:	"Hans J. Koch" <hjk@hansjkoch.de>
 L:	linux-hwmon@vger.kernel.org
-S:	Maintained
+S:	Orphan
 F:	Documentation/hwmon/max6650
 F:	drivers/hwmon/max6650.c
 
@@ -7448,7 +7485,8 @@
 F:	sound/soc/codecs/max9860.*
 
 MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M:	Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:	Krzysztof Kozlowski <krzk@kernel.org>
+M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	drivers/power/max14577_charger.c
@@ -7464,7 +7502,8 @@
 
 MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
 M:	Chanwoo Choi <cw00.choi@samsung.com>
-M:	Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:	Krzysztof Kozlowski <krzk@kernel.org>
+M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-kernel@vger.kernel.org
 S:	Supported
 F:	drivers/*/max14577*.c
@@ -7648,13 +7687,26 @@
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
 F:	drivers/net/ethernet/mellanox/mlxsw/
 
+MELLANOX MLXCPLD LED DRIVER
+M:	Vadim Pasternak <vadimp@mellanox.com>
+L:	linux-leds@vger.kernel.org
+S:	Supported
+F:	drivers/leds/leds-mlxcpld.c
+F:	Documentation/leds/leds-mlxcpld.txt
+
+MELLANOX PLATFORM DRIVER
+M:      Vadim Pasternak <vadimp@mellanox.com>
+L:      platform-driver-x86@vger.kernel.org
+S:      Supported
+F:      arch/x86/platform/mellanox/mlx-platform.c
+
 SOFT-ROCE DRIVER (rxe)
 M:	Moni Shoua <monis@mellanox.com>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 W:	https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home
 Q:	http://patchwork.kernel.org/project/linux-rdma/list/
-F:	drivers/infiniband/hw/rxe/
+F:	drivers/infiniband/sw/rxe/
 F:	include/uapi/rdma/rdma_user_rxe.h
 
 MEMBARRIER SUPPORT
@@ -7733,6 +7785,12 @@
 S:	Supported
 F:	arch/microblaze/
 
+MICROCHIP / ATMEL AT91 / AT32 SERIAL DRIVER
+M:	Richard Genoud <richard.genoud@gmail.com>
+S:	Maintained
+F:	drivers/tty/serial/atmel_serial.c
+F:	include/linux/atmel_serial.h
+
 MICROSOFT SURFACE PRO 3 BUTTON DRIVER
 M:	Chen Yu <yu.c.chen@intel.com>
 L:	platform-driver-x86@vger.kernel.org
@@ -8141,6 +8199,15 @@
 W:	https://fedorahosted.org/dropwatch/
 F:	net/core/drop_monitor.c
 
+NETWORKING [DSA]
+M:	Andrew Lunn <andrew@lunn.ch>
+M:	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+M:	Florian Fainelli <f.fainelli@gmail.com>
+S:	Maintained
+F:	net/dsa/
+F:	include/net/dsa.h
+F:	drivers/net/dsa/
+
 NETWORKING [GENERAL]
 M:	"David S. Miller" <davem@davemloft.net>
 L:	netdev@vger.kernel.org
@@ -8716,7 +8783,7 @@
 F:	include/linux/oprofile.h
 
 ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
-M:	Mark Fasheh <mfasheh@suse.com>
+M:	Mark Fasheh <mfasheh@versity.com>
 M:	Joel Becker <jlbec@evilplan.org>
 L:	ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
 W:	http://ocfs2.wiki.kernel.org
@@ -8828,6 +8895,7 @@
 F:	Documentation/virtual/paravirt_ops.txt
 F:	arch/*/kernel/paravirt*
 F:	arch/*/include/asm/paravirt.h
+F:	include/linux/hypervisor.h
 
 PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
 M:	Tim Waugh <tim@cyberelk.net>
@@ -9230,7 +9298,7 @@
 
 PIN CONTROLLER - SAMSUNG
 M:	Tomasz Figa <tomasz.figa@gmail.com>
-M:	Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:	Krzysztof Kozlowski <krzk@kernel.org>
 M:	Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
@@ -9671,6 +9739,12 @@
 S:	Supported
 F:	drivers/net/wireless/ath/ath10k/
 
+QUALCOMM EMAC GIGABIT ETHERNET DRIVER
+M:	Timur Tabi <timur@codeaurora.org>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/ethernet/qualcomm/emac/
+
 QUALCOMM HEXAGON ARCHITECTURE
 M:	Richard Kuo <rkuo@codeaurora.org>
 L:	linux-hexagon@vger.kernel.org
@@ -9891,6 +9965,12 @@
 F:	Documentation/rpmsg.txt
 F:	include/linux/rpmsg.h
 
+RENESAS CLOCK DRIVERS
+M:	Geert Uytterhoeven <geert+renesas@glider.be>
+L:	linux-renesas-soc@vger.kernel.org
+S:	Supported
+F:	drivers/clk/renesas/
+
 RENESAS ETHERNET DRIVERS
 R:	Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
 L:	netdev@vger.kernel.org
@@ -9926,6 +10006,7 @@
 
 RHASHTABLE
 M:	Thomas Graf <tgraf@suug.ch>
+M:	Herbert Xu <herbert@gondor.apana.org.au>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	lib/rhashtable.c
@@ -10069,8 +10150,8 @@
 F:	drivers/s390/cio/
 
 S390 DASD DRIVER
-M:	Stefan Weinhuber <wein@de.ibm.com>
-M:	Stefan Haberland <stefan.haberland@de.ibm.com>
+M:	Stefan Haberland <sth@linux.vnet.ibm.com>
+M:	Jan Hoeppner <hoeppner@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
@@ -10163,7 +10244,7 @@
 F:	drivers/platform/x86/samsung-laptop.c
 
 SAMSUNG AUDIO (ASoC) DRIVERS
-M:	Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:	Krzysztof Kozlowski <krzk@kernel.org>
 M:	Sangbeom Kim <sbkim73@samsung.com>
 M:	Sylwester Nawrocki <s.nawrocki@samsung.com>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
@@ -10178,7 +10259,8 @@
 
 SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
 M:	Sangbeom Kim <sbkim73@samsung.com>
-M:	Krzysztof Kozlowski <k.kozlowski@samsung.com>
+M:	Krzysztof Kozlowski <krzk@kernel.org>
+M:	Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
 L:	linux-kernel@vger.kernel.org
 L:	linux-samsung-soc@vger.kernel.org
 S:	Supported
@@ -10233,9 +10315,23 @@
 SAMSUNG SOC CLOCK DRIVERS
 M:	Sylwester Nawrocki <s.nawrocki@samsung.com>
 M:	Tomasz Figa <tomasz.figa@gmail.com>
+M:	Chanwoo Choi <cw00.choi@samsung.com>
 S:	Supported
 L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 F:	drivers/clk/samsung/
+F:	include/dt-bindings/clock/exynos*.h
+F:	Documentation/devicetree/bindings/clock/exynos*.txt
+
+SAMSUNG SPI DRIVERS
+M:	Kukjin Kim <kgene@kernel.org>
+M:	Krzysztof Kozlowski <krzk@kernel.org>
+M:	Andi Shyti <andi.shyti@samsung.com>
+L:	linux-spi@vger.kernel.org
+L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
+S:	Maintained
+F:	Documentation/devicetree/bindings/spi/spi-samsung.txt
+F:	drivers/spi/spi-s3c*
+F:	include/linux/platform_data/spi-s3c64xx.h
 
 SAMSUNG SXGBE DRIVERS
 M:	Byungho An <bh74.an@samsung.com>
@@ -11083,6 +11179,7 @@
 F:	drivers/spi/
 F:	include/linux/spi/
 F:	include/uapi/linux/spi/
+F:	tools/spi/
 
 SPIDERNET NETWORK DRIVER for CELL
 M:	Ishizaki Kou <kou.ishizaki@toshiba.co.jp>
@@ -11216,12 +11313,8 @@
 F:	drivers/staging/vt665?/
 
 STAGING - WILC1000 WIFI DRIVER
-M:	Johnny Kim <johnny.kim@atmel.com>
-M:	Austin Shin <austin.shin@atmel.com>
-M:	Chris Park <chris.park@atmel.com>
-M:	Tony Cho <tony.cho@atmel.com>
-M:	Glen Lee <glen.lee@atmel.com>
-M:	Leo Kim <leo.kim@atmel.com>
+M:	Aditya Shankar <aditya.shankar@microchip.com>
+M:	Ganesh Krishna <ganesh.krishna@microchip.com>
 L:	linux-wireless@vger.kernel.org
 S:	Supported
 F:	drivers/staging/wilc1000/
@@ -11589,7 +11682,7 @@
 THERMAL/CPU_COOLING
 M:	Amit Daniel Kachhap <amit.kachhap@gmail.com>
 M:	Viresh Kumar <viresh.kumar@linaro.org>
-M:	Javi Merino <javi.merino@arm.com>
+M:	Javi Merino <javi.merino@kernel.org>
 L:	linux-pm@vger.kernel.org
 S:	Supported
 F:	Documentation/thermal/cpu-cooling-api.txt
@@ -12156,7 +12249,7 @@
 F:	drivers/net/usb/lan78xx.*
 
 USB MASS STORAGE DRIVER
-M:	Matthew Dharm <mdharm-usb@one-eyed-alien.net>
+M:	Alan Stern <stern@rowland.harvard.edu>
 L:	linux-usb@vger.kernel.org
 L:	usb-storage@lists.one-eyed-alien.net
 S:	Maintained
@@ -12240,6 +12333,7 @@
 USB SERIAL SUBSYSTEM
 M:	Johan Hovold <johan@kernel.org>
 L:	linux-usb@vger.kernel.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/johan/usb-serial.git
 S:	Maintained
 F:	Documentation/usb/usb-serial.txt
 F:	drivers/usb/serial/
@@ -12253,6 +12347,7 @@
 
 USB SMSC95XX ETHERNET DRIVER
 M:	Steve Glendinning <steve.glendinning@shawell.net>
+M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	drivers/net/usb/smsc95xx.*
@@ -12359,7 +12454,6 @@
 F:	fs/hppfs/
 
 USERSPACE I/O (UIO)
-M:	"Hans J. Koch" <hjk@hansjkoch.de>
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
@@ -12541,7 +12635,7 @@
 F:	net/8021q/
 
 VLYNQ BUS
-M:	Florian Fainelli <florian@openwrt.org>
+M:	Florian Fainelli <f.fainelli@gmail.com>
 L:	openwrt-devel@lists.openwrt.org (subscribers-only)
 S:	Maintained
 F:	drivers/vlynq/vlynq.c
diff --git a/Makefile b/Makefile
index 70de144..addb235 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 8
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
 NAME = Psychotic Stoned Sheep
 
 # *DOCUMENTATION*
@@ -635,13 +635,6 @@
 # Tell gcc to never replace conditional load with a non-conditional one
 KBUILD_CFLAGS	+= $(call cc-option,--param=allow-store-data-races=0)
 
-PHONY += gcc-plugins
-gcc-plugins: scripts_basic
-ifdef CONFIG_GCC_PLUGINS
-	$(Q)$(MAKE) $(build)=scripts/gcc-plugins
-endif
-	@:
-
 include scripts/Makefile.gcc-plugins
 
 ifdef CONFIG_READABLE_ASM
@@ -1432,7 +1425,7 @@
 
 # Documentation targets
 # ---------------------------------------------------------------------------
-DOC_TARGETS := xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs epubdocs cleandocs
+DOC_TARGETS := xmldocs sgmldocs psdocs latexdocs pdfdocs htmldocs mandocs installmandocs epubdocs cleandocs
 PHONY += $(DOC_TARGETS)
 $(DOC_TARGETS): scripts_basic FORCE
 	$(Q)$(MAKE) $(build)=scripts build_docproc build_check-lc_ctype
diff --git a/README b/README
index e8c8a6d..09f34f78 100644
--- a/README
+++ b/README
@@ -229,10 +229,6 @@
       under some circumstances lead to problems: probing for a
       nonexistent controller card may confuse your other controllers
 
-    - Compiling the kernel with "Processor type" set higher than 386
-      will result in a kernel that does NOT work on a 386.  The
-      kernel will detect this on bootup, and give up.
-
     - A kernel with math-emulation compiled in will still use the
       coprocessor if one is present: the math emulation will just
       never get used in that case.  The kernel will be slightly larger,
@@ -289,7 +285,7 @@
    LOCALVERSION can be set in the "General Setup" menu.
 
  - In order to boot your new kernel, you'll need to copy the kernel
-   image (e.g. .../linux/arch/i386/boot/bzImage after compilation)
+   image (e.g. .../linux/arch/x86/boot/bzImage after compilation)
    to the place where your regular bootable kernel is found.
 
  - Booting a kernel directly from a floppy without the assistance of a
@@ -391,7 +387,7 @@
 
  - Alternatively, you can use gdb on a running kernel. (read-only; i.e. you
    cannot change values or set break points.) To do this, first compile the
-   kernel with -g; edit arch/i386/Makefile appropriately, then do a "make
+   kernel with -g; edit arch/x86/Makefile appropriately, then do a "make
    clean". You'll also need to enable CONFIG_PROC_FS (via "make config").
 
    After you've rebooted with the new kernel, do "gdb vmlinux /proc/kcore".
diff --git a/arch/Kconfig b/arch/Kconfig
index e9c9334..180ea33 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -336,17 +336,6 @@
 	    results in the system call being skipped immediately.
 	  - seccomp syscall wired up
 
-	  For best performance, an arch should use seccomp_phase1 and
-	  seccomp_phase2 directly.  It should call seccomp_phase1 for all
-	  syscalls if TIF_SECCOMP is set, but seccomp_phase1 does not
-	  need to be called from a ptrace-safe context.  It must then
-	  call seccomp_phase2 if seccomp_phase1 returns anything other
-	  than SECCOMP_PHASE1_OK or SECCOMP_PHASE1_SKIP.
-
-	  As an additional optimization, an arch may provide seccomp_data
-	  directly to seccomp_phase1; this avoids multiple calls
-	  to the syscall_xyz helpers for every syscall.
-
 config SECCOMP_FILTER
 	def_bool y
 	depends on HAVE_ARCH_SECCOMP_FILTER && SECCOMP && NET
@@ -707,4 +696,38 @@
 config CPU_NO_EFFICIENT_FFS
 	def_bool n
 
+config HAVE_ARCH_VMAP_STACK
+	def_bool n
+	help
+	  An arch should select this symbol if it can support kernel stacks
+	  in vmalloc space.  This means:
+
+	  - vmalloc space must be large enough to hold many kernel stacks.
+	    This may rule out many 32-bit architectures.
+
+	  - Stacks in vmalloc space need to work reliably.  For example, if
+	    vmap page tables are created on demand, either this mechanism
+	    needs to work while the stack points to a virtual address with
+	    unpopulated page tables or arch code (switch_to() and switch_mm(),
+	    most likely) needs to ensure that the stack's page table entries
+	    are populated before running on a possibly unpopulated stack.
+
+	  - If the stack overflows into a guard page, something reasonable
+	    should happen.  The definition of "reasonable" is flexible, but
+	    instantly rebooting without logging anything would be unfriendly.
+
+config VMAP_STACK
+	default y
+	bool "Use a virtually-mapped stack"
+	depends on HAVE_ARCH_VMAP_STACK && !KASAN
+	---help---
+	  Enable this if you want the use virtually-mapped kernel stacks
+	  with guard pages.  This causes kernel stack overflows to be
+	  caught immediately rather than causing difficult-to-diagnose
+	  corruption.
+
+	  This is presently incompatible with KASAN because KASAN expects
+	  the stack to map directly to the KASAN shadow map using a formula
+	  that is incorrect if the stack is in vmalloc space.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/include/asm/uaccess.h b/arch/alpha/include/asm/uaccess.h
index c419b43..466e42e 100644
--- a/arch/alpha/include/asm/uaccess.h
+++ b/arch/alpha/include/asm/uaccess.h
@@ -371,14 +371,6 @@
 	return __cu_len;
 }
 
-extern inline long
-__copy_tofrom_user(void *to, const void *from, long len, const void __user *validate)
-{
-	if (__access_ok((unsigned long)validate, len, get_fs()))
-		len = __copy_tofrom_user_nocheck(to, from, len);
-	return len;
-}
-
 #define __copy_to_user(to, from, n)					\
 ({									\
 	__chk_user_ptr(to);						\
@@ -393,17 +385,22 @@
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 
-
 extern inline long
 copy_to_user(void __user *to, const void *from, long n)
 {
-	return __copy_tofrom_user((__force void *)to, from, n, to);
+	if (likely(__access_ok((unsigned long)to, n, get_fs())))
+		n = __copy_tofrom_user_nocheck((__force void *)to, from, n);
+	return n;
 }
 
 extern inline long
 copy_from_user(void *to, const void __user *from, long n)
 {
-	return __copy_tofrom_user(to, (__force void *)from, n, from);
+	if (likely(__access_ok((unsigned long)from, n, get_fs())))
+		n = __copy_tofrom_user_nocheck(to, (__force void *)from, n);
+	else
+		memset(to, 0, n);
+	return n;
 }
 
 extern void __do_clear_user(void);
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 0d3e59f..ecd1237 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -13,7 +13,7 @@
 	select CLKSRC_OF
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select GENERIC_ATOMIC64
+	select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_FIND_FIRST_BIT
 	# for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP
@@ -353,8 +353,8 @@
 
 config NODES_SHIFT
 	int "Maximum NUMA Nodes (as a power of 2)"
-	default "1" if !DISCONTIGMEM
-	default "2" if DISCONTIGMEM
+	default "0" if !DISCONTIGMEM
+	default "1" if DISCONTIGMEM
 	depends on NEED_MULTIPLE_NODES
 	---help---
 	  Accessing memory beyond 1GB (with or w/o PAE) requires 2 memory
diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 601ed17..aa82d13 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -47,7 +47,6 @@
 
 upto_gcc44    :=  $(call cc-ifversion, -le, 0404, y)
 atleast_gcc44 :=  $(call cc-ifversion, -ge, 0404, y)
-atleast_gcc48 :=  $(call cc-ifversion, -ge, 0408, y)
 
 cflags-$(atleast_gcc44)			+= -fsection-anchors
 
@@ -66,10 +65,8 @@
 
 endif
 
-# By default gcc 4.8 generates dwarf4 which kernel unwinder can't grok
-ifeq ($(atleast_gcc48),y)
-cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -gdwarf-2
-endif
+cfi := $(call as-instr,.cfi_startproc\n.cfi_endproc,-DARC_DW2_UNWIND_AS_CFI)
+cflags-$(CONFIG_ARC_DW2_UNWIND)		+= -fasynchronous-unwind-tables $(cfi)
 
 ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
 # Generic build system uses -O2, we want -O3
diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts
index 3dd6ed9..3acf04d 100644
--- a/arch/arc/boot/dts/abilis_tb100_dvk.dts
+++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts
@@ -24,6 +24,7 @@
 /include/ "abilis_tb100.dtsi"
 
 / {
+	model = "abilis,tb100";
 	chosen {
 		bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8";
 	};
diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts
index 1cf51c2..37d88c5 100644
--- a/arch/arc/boot/dts/abilis_tb101_dvk.dts
+++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts
@@ -24,6 +24,7 @@
 /include/ "abilis_tb101.dtsi"
 
 / {
+	model = "abilis,tb101";
 	chosen {
 		bootargs = "earlycon=uart8250,mmio32,0xff100000,9600n8 console=ttyS0,9600n8";
 	};
diff --git a/arch/arc/boot/dts/axs101.dts b/arch/arc/boot/dts/axs101.dts
index 3f9b058..d9b9b9d 100644
--- a/arch/arc/boot/dts/axs101.dts
+++ b/arch/arc/boot/dts/axs101.dts
@@ -13,6 +13,7 @@
 /include/ "axs10x_mb.dtsi"
 
 / {
+	model = "snps,axs101";
 	compatible = "snps,axs101", "snps,arc-sdp";
 
 	chosen {
diff --git a/arch/arc/boot/dts/axs103.dts b/arch/arc/boot/dts/axs103.dts
index e6d0e31..ec7fb27 100644
--- a/arch/arc/boot/dts/axs103.dts
+++ b/arch/arc/boot/dts/axs103.dts
@@ -16,6 +16,7 @@
 /include/ "axs10x_mb.dtsi"
 
 / {
+	model = "snps,axs103";
 	compatible = "snps,axs103", "snps,arc-sdp";
 
 	chosen {
diff --git a/arch/arc/boot/dts/axs103_idu.dts b/arch/arc/boot/dts/axs103_idu.dts
index f999fef..070c297 100644
--- a/arch/arc/boot/dts/axs103_idu.dts
+++ b/arch/arc/boot/dts/axs103_idu.dts
@@ -16,6 +16,7 @@
 /include/ "axs10x_mb.dtsi"
 
 / {
+	model = "snps,axs103-smp";
 	compatible = "snps,axs103", "snps,arc-sdp";
 
 	chosen {
diff --git a/arch/arc/boot/dts/nsim_700.dts b/arch/arc/boot/dts/nsim_700.dts
index 6397051..ce0ccd20 100644
--- a/arch/arc/boot/dts/nsim_700.dts
+++ b/arch/arc/boot/dts/nsim_700.dts
@@ -10,6 +10,7 @@
 /include/ "skeleton.dtsi"
 
 / {
+	model = "snps,nsim";
 	compatible = "snps,nsim";
 	#address-cells = <1>;
 	#size-cells = <1>;
diff --git a/arch/arc/boot/dts/nsim_hs.dts b/arch/arc/boot/dts/nsim_hs.dts
index bf05fe5..3772c40 100644
--- a/arch/arc/boot/dts/nsim_hs.dts
+++ b/arch/arc/boot/dts/nsim_hs.dts
@@ -10,6 +10,7 @@
 /include/ "skeleton_hs.dtsi"
 
 / {
+	model = "snps,nsim_hs";
 	compatible = "snps,nsim_hs";
 	#address-cells = <2>;
 	#size-cells = <2>;
diff --git a/arch/arc/boot/dts/nsim_hs_idu.dts b/arch/arc/boot/dts/nsim_hs_idu.dts
index 99eabe1..48434d7c 100644
--- a/arch/arc/boot/dts/nsim_hs_idu.dts
+++ b/arch/arc/boot/dts/nsim_hs_idu.dts
@@ -10,6 +10,7 @@
 /include/ "skeleton_hs_idu.dtsi"
 
 / {
+	model = "snps,nsim_hs-smp";
 	compatible = "snps,nsim_hs";
 	interrupt-parent = <&core_intc>;
 
diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index e659a34..bcf6031 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@@ -10,6 +10,7 @@
 /include/ "skeleton.dtsi"
 
 / {
+	model = "snps,nsimosci";
 	compatible = "snps,nsimosci";
 	#address-cells = <1>;
 	#size-cells = <1>;
diff --git a/arch/arc/boot/dts/nsimosci_hs.dts b/arch/arc/boot/dts/nsimosci_hs.dts
index 16ce5d6..14a727c 100644
--- a/arch/arc/boot/dts/nsimosci_hs.dts
+++ b/arch/arc/boot/dts/nsimosci_hs.dts
@@ -10,6 +10,7 @@
 /include/ "skeleton_hs.dtsi"
 
 / {
+	model = "snps,nsimosci_hs";
 	compatible = "snps,nsimosci_hs";
 	#address-cells = <1>;
 	#size-cells = <1>;
diff --git a/arch/arc/boot/dts/nsimosci_hs_idu.dts b/arch/arc/boot/dts/nsimosci_hs_idu.dts
index ce8dfbc..cbf65b6 100644
--- a/arch/arc/boot/dts/nsimosci_hs_idu.dts
+++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts
@@ -10,6 +10,7 @@
 /include/ "skeleton_hs_idu.dtsi"
 
 / {
+	model = "snps,nsimosci_hs-smp";
 	compatible = "snps,nsimosci_hs";
 	#address-cells = <1>;
 	#size-cells = <1>;
diff --git a/arch/arc/boot/dts/vdk_hs38.dts b/arch/arc/boot/dts/vdk_hs38.dts
index 5d803dd..3c51103 100644
--- a/arch/arc/boot/dts/vdk_hs38.dts
+++ b/arch/arc/boot/dts/vdk_hs38.dts
@@ -13,6 +13,7 @@
 /include/ "vdk_axs10x_mb.dtsi"
 
 / {
+	model = "snps,vdk_archs";
 	compatible = "snps,axs103";
 
 	chosen {
diff --git a/arch/arc/boot/dts/vdk_hs38_smp.dts b/arch/arc/boot/dts/vdk_hs38_smp.dts
index 2ba60c39..6be6800 100644
--- a/arch/arc/boot/dts/vdk_hs38_smp.dts
+++ b/arch/arc/boot/dts/vdk_hs38_smp.dts
@@ -13,6 +13,7 @@
 /include/ "vdk_axs10x_mb.dtsi"
 
 / {
+	model = "snps,vdk_archs-smp";
 	compatible = "snps,axs103";
 
 	chosen {
diff --git a/arch/arc/boot/dts/zebu_hs.dts b/arch/arc/boot/dts/zebu_hs.dts
new file mode 100644
index 0000000..1c1324e
--- /dev/null
+++ b/arch/arc/boot/dts/zebu_hs.dts
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs.dtsi"
+
+/ {
+	model = "snps,zebu_hs";
+	compatible = "snps,zebu_hs";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&core_intc>;
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x20000000>;	/* 512 */
+	};
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <50000000>;
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+		};
+
+		uart0: serial@f0000000 {
+			compatible = "ns8250";
+			reg = <0xf0000000 0x2000>;
+			interrupts = <24>;
+			clock-frequency = <50000000>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,archs-pct";
+			#interrupt-cells = <1>;
+			interrupts = <20>;
+		};
+	};
+};
diff --git a/arch/arc/boot/dts/zebu_hs_idu.dts b/arch/arc/boot/dts/zebu_hs_idu.dts
new file mode 100644
index 0000000..65204b4
--- /dev/null
+++ b/arch/arc/boot/dts/zebu_hs_idu.dts
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2016-2014 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton_hs_idu.dtsi"
+
+/ {
+	model = "snps,zebu_hs-smp";
+	compatible = "snps,zebu_hs";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&core_intc>;
+
+	memory {
+		device_type = "memory";
+		reg = <0x80000000 0x20000000>;	/* 512 */
+	};
+
+	chosen {
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=ttyS0,115200n8 debug";
+	};
+
+	aliases {
+		serial0 = &uart0;
+	};
+
+	fpga {
+		compatible = "simple-bus";
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		/* child and parent address space 1:1 mapped */
+		ranges;
+
+		core_clk: core_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <50000000>;	/* 50 MHZ */
+		};
+
+		core_intc: interrupt-controller {
+			compatible = "snps,archs-intc";
+			interrupt-controller;
+			#interrupt-cells = <1>;
+/*			interrupts = <16 17 18 19 20 21 22 23 24 25>; */
+		};
+
+		idu_intc: idu-interrupt-controller {
+			compatible = "snps,archs-idu-intc";
+			interrupt-controller;
+			interrupt-parent = <&core_intc>;
+			/* <hwirq  distribution>
+			distribution: 0=RR; 1=cpu0, 2=cpu1, 4=cpu2, 8=cpu3 */
+			#interrupt-cells = <2>;
+			interrupts = <24 25 26 27 28 29 30 31>;
+
+		};
+
+		uart0: serial@f0000000 {
+			/* compatible = "ns8250"; Doesn't use FIFOs */
+			compatible = "ns16550a";
+			reg = <0xf0000000 0x2000>;
+			interrupt-parent = <&idu_intc>;
+			/* interrupts = <0 1>;  DEST=1*/
+			/* interrupts = <0 2>;  DEST=2*/
+			interrupts = <0 0>;  /* RR*/
+			clock-frequency = <50000000>;
+			baud = <115200>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			no-loopback-test = <1>;
+		};
+
+		arcpct0: pct {
+			compatible = "snps,archs-pct";
+			#interrupt-cells = <1>;
+			interrupts = <20>;
+		};
+	};
+};
diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 6cdffea..0a0eaf0 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -18,6 +18,9 @@
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS101=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index 491b3b5..2233f57 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -18,6 +18,9 @@
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index b25ee73..1108747 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -18,6 +18,9 @@
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SLAB=y
 CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_ARC_PLAT_AXS10X=y
 CONFIG_AXS103=y
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index a99dc7a..65ab9fb 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -11,13 +11,16 @@
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs_hs/"
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_SLUB_DEBUG is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_LBDAF is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 59f221f..3b3990c 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -16,6 +16,9 @@
 # CONFIG_COMPAT_BRK is not set
 CONFIG_KPROBES=y
 CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_LBDAF is not set
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_IOSCHED_DEADLINE is not set
diff --git a/arch/arc/configs/zebu_hs_defconfig b/arch/arc/configs/zebu_hs_defconfig
new file mode 100644
index 0000000..9f6166b
--- /dev/null
+++ b/arch/arc/configs/zebu_hs_defconfig
@@ -0,0 +1,86 @@
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EXPERT=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_ARC_BUILTIN_DTB_NAME="zebu_hs"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_MEMORY_INIT=y
+# CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/configs/zebu_hs_smp_defconfig b/arch/arc/configs/zebu_hs_smp_defconfig
new file mode 100644
index 0000000..44e9693
--- /dev/null
+++ b/arch/arc/configs/zebu_hs_smp_defconfig
@@ -0,0 +1,89 @@
+CONFIG_DEFAULT_HOSTNAME="ARCLinux"
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_NAMESPACES=y
+# CONFIG_UTS_NS is not set
+# CONFIG_PID_NS is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE="../../arc_initramfs_hs/"
+CONFIG_EMBEDDED=y
+CONFIG_PERF_EVENTS=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_KPROBES=y
+CONFIG_MODULES=y
+# CONFIG_LBDAF is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_ARC_PLAT_SIM=y
+CONFIG_ISA_ARCV2=y
+CONFIG_SMP=y
+CONFIG_ARC_BUILTIN_DTB_NAME="zebu_hs_idu"
+CONFIG_PREEMPT=y
+# CONFIG_COMPACTION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_NET_KEY=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+# CONFIG_BLK_DEV is not set
+CONFIG_NETDEVICES=y
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_BROADCOM is not set
+# CONFIG_NET_VENDOR_INTEL is not set
+# CONFIG_NET_VENDOR_MARVELL is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_MOUSE_PS2_TOUCHKIT=y
+# CONFIG_SERIO_SERPORT is not set
+CONFIG_SERIO_ARC_PS2=y
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+CONFIG_FB=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_NFS_FS=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_LOCKUP_DETECTOR=y
+# CONFIG_DEBUG_PREEMPT is not set
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 7fbaea0..db25c65 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -95,7 +95,7 @@
 /* Auxiliary registers */
 #define AUX_IDENTITY		4
 #define AUX_INTR_VEC_BASE	0x25
-#define AUX_NON_VOL		0x5e
+#define AUX_VOL			0x5e
 
 /*
  * Floating Pt Registers
@@ -240,14 +240,6 @@
 #endif
 };
 
-struct bcr_perip {
-#ifdef CONFIG_CPU_BIG_ENDIAN
-	unsigned int start:8, pad2:8, sz:8, ver:8;
-#else
-	unsigned int ver:8, sz:8, pad2:8, start:8;
-#endif
-};
-
 struct bcr_iccm_arcompact {
 #ifdef CONFIG_CPU_BIG_ENDIAN
 	unsigned int base:16, pad:5, sz:3, ver:8;
diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 4e3c1b6..b65930a 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -20,6 +20,7 @@
 #ifndef CONFIG_ARC_PLAT_EZNPS
 
 #define atomic_read(v)  READ_ONCE((v)->counter)
+#define ATOMIC_INIT(i)	{ (i) }
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
@@ -284,6 +285,7 @@
 ATOMIC_OPS(add, +=, CTOP_INST_AADD_DI_R2_R2_R3)
 #define atomic_sub(i, v) atomic_add(-(i), (v))
 #define atomic_sub_return(i, v) atomic_add_return(-(i), (v))
+#define atomic_fetch_sub(i, v) atomic_fetch_add(-(i), (v))
 
 #undef ATOMIC_OPS
 #define ATOMIC_OPS(op, c_op, asm_op)					\
@@ -292,6 +294,7 @@
 
 ATOMIC_OPS(and, &=, CTOP_INST_AAND_DI_R2_R2_R3)
 #define atomic_andnot(mask, v) atomic_and(~(mask), (v))
+#define atomic_fetch_andnot(mask, v) atomic_fetch_and(~(mask), (v))
 ATOMIC_OPS(or, |=, CTOP_INST_AOR_DI_R2_R2_R3)
 ATOMIC_OPS(xor, ^=, CTOP_INST_AXOR_DI_R2_R2_R3)
 
@@ -343,10 +346,266 @@
 
 #define atomic_add_negative(i, v)	(atomic_add_return(i, v) < 0)
 
-#define ATOMIC_INIT(i)			{ (i) }
+
+#ifdef CONFIG_GENERIC_ATOMIC64
 
 #include <asm-generic/atomic64.h>
 
-#endif
+#else	/* Kconfig ensures this is only enabled with needed h/w assist */
+
+/*
+ * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD)
+ *  - The address HAS to be 64-bit aligned
+ *  - There are 2 semantics involved here:
+ *    = exclusive implies no interim update between load/store to same addr
+ *    = both words are observed/updated together: this is guaranteed even
+ *      for regular 64-bit load (LDD) / store (STD). Thus atomic64_set()
+ *      is NOT required to use LLOCKD+SCONDD, STD suffices
+ */
+
+typedef struct {
+	aligned_u64 counter;
+} atomic64_t;
+
+#define ATOMIC64_INIT(a) { (a) }
+
+static inline long long atomic64_read(const atomic64_t *v)
+{
+	unsigned long long val;
+
+	__asm__ __volatile__(
+	"	ldd   %0, [%1]	\n"
+	: "=r"(val)
+	: "r"(&v->counter));
+
+	return val;
+}
+
+static inline void atomic64_set(atomic64_t *v, long long a)
+{
+	/*
+	 * This could have been a simple assignment in "C" but would need
+	 * explicit volatile. Otherwise gcc optimizers could elide the store
+	 * which borked atomic64 self-test
+	 * In the inline asm version, memory clobber needed for exact same
+	 * reason, to tell gcc about the store.
+	 *
+	 * This however is not needed for sibling atomic64_add() etc since both
+	 * load/store are explicitly done in inline asm. As long as API is used
+	 * for each access, gcc has no way to optimize away any load/store
+	 */
+	__asm__ __volatile__(
+	"	std   %0, [%1]	\n"
+	:
+	: "r"(a), "r"(&v->counter)
+	: "memory");
+}
+
+#define ATOMIC64_OP(op, op1, op2)					\
+static inline void atomic64_##op(long long a, atomic64_t *v)		\
+{									\
+	unsigned long long val;						\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd  %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(val)							\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");						\
+}									\
+
+#define ATOMIC64_OP_RETURN(op, op1, op2)		        	\
+static inline long long atomic64_##op##_return(long long a, atomic64_t *v)	\
+{									\
+	unsigned long long val;						\
+									\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%1]	\n"				\
+	"	" #op1 " %L0, %L0, %L2	\n"				\
+	"	" #op2 " %H0, %H0, %H2	\n"				\
+	"	scondd   %0, [%1]	\n"				\
+	"	bnz     1b		\n"				\
+	: [val] "=&r"(val)						\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	smp_mb();							\
+									\
+	return val;							\
+}
+
+#define ATOMIC64_FETCH_OP(op, op1, op2)		        		\
+static inline long long atomic64_fetch_##op(long long a, atomic64_t *v)	\
+{									\
+	unsigned long long val, orig;					\
+									\
+	smp_mb();							\
+									\
+	__asm__ __volatile__(						\
+	"1:				\n"				\
+	"	llockd   %0, [%2]	\n"				\
+	"	" #op1 " %L1, %L0, %L3	\n"				\
+	"	" #op2 " %H1, %H0, %H3	\n"				\
+	"	scondd   %1, [%2]	\n"				\
+	"	bnz     1b		\n"				\
+	: "=&r"(orig), "=&r"(val)					\
+	: "r"(&v->counter), "ir"(a)					\
+	: "cc");	/* memory clobber comes from smp_mb() */	\
+									\
+	smp_mb();							\
+									\
+	return orig;							\
+}
+
+#define ATOMIC64_OPS(op, op1, op2)					\
+	ATOMIC64_OP(op, op1, op2)					\
+	ATOMIC64_OP_RETURN(op, op1, op2)				\
+	ATOMIC64_FETCH_OP(op, op1, op2)
+
+#define atomic64_andnot atomic64_andnot
+
+ATOMIC64_OPS(add, add.f, adc)
+ATOMIC64_OPS(sub, sub.f, sbc)
+ATOMIC64_OPS(and, and, and)
+ATOMIC64_OPS(andnot, bic, bic)
+ATOMIC64_OPS(or, or, or)
+ATOMIC64_OPS(xor, xor, xor)
+
+#undef ATOMIC64_OPS
+#undef ATOMIC64_FETCH_OP
+#undef ATOMIC64_OP_RETURN
+#undef ATOMIC64_OP
+
+static inline long long
+atomic64_cmpxchg(atomic64_t *ptr, long long expected, long long new)
+{
+	long long prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	brne    %L0, %L2, 2f	\n"
+	"	brne    %H0, %H2, 2f	\n"
+	"	scondd  %3, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "ir"(expected), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+static inline long long atomic64_xchg(atomic64_t *ptr, long long new)
+{
+	long long prev;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	scondd  %2, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(prev)
+	: "r"(ptr), "r"(new)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return prev;
+}
+
+/**
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic64_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long long val;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%1]	\n"
+	"	sub.f   %L0, %L0, 1	# w0 - 1, set C on borrow\n"
+	"	sub.c   %H0, %H0, 1	# if C set, w1 - 1\n"
+	"	brlt    %H0, 0, 2f	\n"
+	"	scondd  %0, [%1]	\n"
+	"	bnz     1b		\n"
+	"2:				\n"
+	: "=&r"(val)
+	: "r"(&v->counter)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return val;
+}
+
+/**
+ * atomic64_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic64_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * if (v != u) { v += a; ret = 1} else {ret = 0}
+ * Returns 1 iff @v was not @u (i.e. if add actually happened)
+ */
+static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+	long long val;
+	int op_done;
+
+	smp_mb();
+
+	__asm__ __volatile__(
+	"1:	llockd  %0, [%2]	\n"
+	"	mov	%1, 1		\n"
+	"	brne	%L0, %L4, 2f	# continue to add since v != u \n"
+	"	breq.d	%H0, %H4, 3f	# return since v == u \n"
+	"	mov	%1, 0		\n"
+	"2:				\n"
+	"	add.f   %L0, %L0, %L3	\n"
+	"	adc     %H0, %H0, %H3	\n"
+	"	scondd  %0, [%2]	\n"
+	"	bnz     1b		\n"
+	"3:				\n"
+	: "=&r"(val), "=&r" (op_done)
+	: "r"(&v->counter), "r"(a), "r"(u)
+	: "cc");	/* memory clobber comes from smp_mb() */
+
+	smp_mb();
+
+	return op_done;
+}
+
+#define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
+#define atomic64_inc(v)			atomic64_add(1LL, (v))
+#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
+#define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
+#define atomic64_dec(v)			atomic64_sub(1LL, (v))
+#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
+#define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
+#define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
+
+#endif	/* !CONFIG_GENERIC_ATOMIC64 */
+
+#endif	/* !__ASSEMBLY__ */
 
 #endif
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 23706c6..fb781e3 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -54,7 +54,7 @@
 extern void read_decode_cache_bcr(void);
 
 extern int ioc_exists;
-extern unsigned long perip_base;
+extern unsigned long perip_base, perip_end;
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arc/include/asm/dwarf.h b/arch/arc/include/asm/dwarf.h
new file mode 100644
index 0000000..bb7bdbc5
--- /dev/null
+++ b/arch/arc/include/asm/dwarf.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2016-17 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ARC_DWARF_H
+#define _ASM_ARC_DWARF_H
+
+#ifdef __ASSEMBLY__
+
+#ifdef ARC_DW2_UNWIND_AS_CFI
+
+#define CFI_STARTPROC	.cfi_startproc
+#define CFI_ENDPROC	.cfi_endproc
+#define CFI_DEF_CFA	.cfi_def_cfa
+#define CFI_REGISTER	.cfi_register
+#define CFI_REL_OFFSET	.cfi_rel_offset
+#define CFI_UNDEFINED	.cfi_undefined
+
+#else
+
+#define CFI_IGNORE	#
+
+#define CFI_STARTPROC	CFI_IGNORE
+#define CFI_ENDPROC	CFI_IGNORE
+#define CFI_DEF_CFA	CFI_IGNORE
+#define CFI_REGISTER	CFI_IGNORE
+#define CFI_REL_OFFSET	CFI_IGNORE
+#define CFI_UNDEFINED	CFI_IGNORE
+
+#endif	/* !ARC_DW2_UNWIND_AS_CFI */
+
+#endif	/* __ASSEMBLY__ */
+
+#endif	/* _ASM_ARC_DWARF_H */
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index 51a99e2..7096f97 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -23,8 +23,7 @@
 /* ARC Relocations (kernel Modules only) */
 #define  R_ARC_32		0x4
 #define  R_ARC_32_ME		0x1B
-#define  R_ARC_S25H_PCREL	0x10
-#define  R_ARC_S25W_PCREL	0x11
+#define  R_ARC_32_PCREL		0x31
 
 /*to set parameters in the core dumps */
 #define ELF_ARCH		EM_ARCOMPACT
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index ad7860c..51597f3 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -142,7 +142,7 @@
 
 #ifdef CONFIG_ARC_CURR_IN_REG
 	; Retrieve orig r25 and save it with rest of callee_regs
-	ld.as   r12, [r12, PT_user_r25]
+	ld	r12, [r12, PT_user_r25]
 	PUSH	r12
 #else
 	PUSH	r25
@@ -198,7 +198,7 @@
 
 	; SP is back to start of pt_regs
 #ifdef CONFIG_ARC_CURR_IN_REG
-	st.as   r12, [sp, PT_user_r25]
+	st	r12, [sp, PT_user_r25]
 #endif
 .endm
 
diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h
index d1ec7f6..e880dfa 100644
--- a/arch/arc/include/asm/irqflags-arcv2.h
+++ b/arch/arc/include/asm/irqflags-arcv2.h
@@ -112,7 +112,7 @@
 	 */
 	temp = (1 << 5) |
 		((!!(temp & STATUS_IE_MASK)) << CLRI_STATUS_IE_BIT) |
-		(temp & CLRI_STATUS_E_MASK);
+		((temp >> 1) & CLRI_STATUS_E_MASK);
 	return temp;
 }
 
diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h
index c1d3645..4c6eed8 100644
--- a/arch/arc/include/asm/irqflags-compact.h
+++ b/arch/arc/include/asm/irqflags-compact.h
@@ -188,10 +188,10 @@
 .endm
 
 .macro IRQ_ENABLE  scratch
+	TRACE_ASM_IRQ_ENABLE
 	lr	\scratch, [status32]
 	or	\scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK)
 	flag	\scratch
-	TRACE_ASM_IRQ_ENABLE
 .endm
 
 #endif	/* __ASSEMBLY__ */
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index 5faad17..b29f1a9 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -9,6 +9,8 @@
 #ifndef __ASM_LINKAGE_H
 #define __ASM_LINKAGE_H
 
+#include <asm/dwarf.h>
+
 #ifdef __ASSEMBLY__
 
 #define ASM_NL		 `	/* use '`' to mark new line in macro */
@@ -32,6 +34,16 @@
 #endif
 .endm
 
+#define ENTRY_CFI(name)		\
+	.globl name ASM_NL	\
+	ALIGN ASM_NL 		\
+	name: ASM_NL		\
+	CFI_STARTPROC ASM_NL
+
+#define END_CFI(name) 		\
+	CFI_ENDPROC ASM_NL	\
+	.size name, .-name
+
 #else	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_ARC_HAS_ICCM
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h
index 5f07176..9185541 100644
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -118,6 +118,9 @@
 	[PERF_COUNT_ARC_ICM] = "icm",		/* I-cache Miss */
 	[PERF_COUNT_ARC_EDTLB] = "edtlb",	/* D-TLB Miss */
 	[PERF_COUNT_ARC_EITLB] = "eitlb",	/* I-TLB Miss */
+
+	[PERF_COUNT_HW_CACHE_REFERENCES] = "imemrdc",	/* Instr: mem read cached */
+	[PERF_COUNT_HW_CACHE_MISSES] = "dclm",		/* D-cache Load Miss */
 };
 
 #define C(_x)			PERF_COUNT_HW_CACHE_##_x
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 0f92d97..89eeb37 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -280,7 +280,7 @@
 
 #define pte_page(pte)		pfn_to_page(pte_pfn(pte))
 #define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
-#define pfn_pte(pfn, prot)	(__pte(((pte_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pfn_pte(pfn, prot)	__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
 
 /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
 #define pte_pfn(pte)		(pte_val(pte) >> PAGE_SHIFT)
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index a78d567..41faf17 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -83,7 +83,10 @@
 	"2:	;nop\n"				\
 	"	.section .fixup, \"ax\"\n"	\
 	"	.align 4\n"			\
-	"3:	mov %0, %3\n"			\
+	"3:	# return -EFAULT\n"		\
+	"	mov %0, %3\n"			\
+	"	# zero out dst ptr\n"		\
+	"	mov %1,  0\n"			\
 	"	j   2b\n"			\
 	"	.previous\n"			\
 	"	.section __ex_table, \"a\"\n"	\
@@ -101,7 +104,11 @@
 	"2:	;nop\n"				\
 	"	.section .fixup, \"ax\"\n"	\
 	"	.align 4\n"			\
-	"3:	mov %0, %3\n"			\
+	"3:	# return -EFAULT\n"		\
+	"	mov %0, %3\n"			\
+	"	# zero out dst ptr\n"		\
+	"	mov %1,  0\n"			\
+	"	mov %R1, 0\n"			\
 	"	j   2b\n"			\
 	"	.previous\n"			\
 	"	.section __ex_table, \"a\"\n"	\
diff --git a/arch/arc/include/uapi/asm/elf.h b/arch/arc/include/uapi/asm/elf.h
index 0f99ac8..0037a58 100644
--- a/arch/arc/include/uapi/asm/elf.h
+++ b/arch/arc/include/uapi/asm/elf.h
@@ -13,8 +13,15 @@
 
 /* Machine specific ELF Hdr flags */
 #define EF_ARC_OSABI_MSK	0x00000f00
-#define EF_ARC_OSABI_ORIG	0x00000000   /* MUST be zero for back-compat */
-#define EF_ARC_OSABI_CURRENT	0x00000300   /* v3 (no legacy syscalls) */
+
+#define EF_ARC_OSABI_V3		0x00000300   /* v3 (no legacy syscalls) */
+#define EF_ARC_OSABI_V4		0x00000400   /* v4 (64bit data any reg align) */
+
+#if __GNUC__ < 6
+#define EF_ARC_OSABI_CURRENT	EF_ARC_OSABI_V3
+#else
+#define EF_ARC_OSABI_CURRENT	EF_ARC_OSABI_V4
+#endif
 
 typedef unsigned long elf_greg_t;
 typedef unsigned long elf_fpregset_t;
diff --git a/arch/arc/kernel/arcksyms.c b/arch/arc/kernel/arcksyms.c
index 4d9e777..000dd04 100644
--- a/arch/arc/kernel/arcksyms.c
+++ b/arch/arc/kernel/arcksyms.c
@@ -28,6 +28,7 @@
 extern void __divdf3(void);
 extern void __floatunsidf(void);
 extern void __floatunsisf(void);
+extern void __udivdi3(void);
 
 EXPORT_SYMBOL(__ashldi3);
 EXPORT_SYMBOL(__ashrdi3);
@@ -45,6 +46,7 @@
 EXPORT_SYMBOL(__divdf3);
 EXPORT_SYMBOL(__floatunsidf);
 EXPORT_SYMBOL(__floatunsisf);
+EXPORT_SYMBOL(__udivdi3);
 
 /* ARC optimised assembler routines */
 EXPORT_SYMBOL(memset);
diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S
index e6890b1..7c1f365 100644
--- a/arch/arc/kernel/ctx_sw_asm.S
+++ b/arch/arc/kernel/ctx_sw_asm.S
@@ -23,6 +23,7 @@
 	.global __switch_to
 	.type   __switch_to, @function
 __switch_to:
+	CFI_STARTPROC
 
 	/* Save regs on kernel mode stack of task */
 	st.a    blink, [sp, -4]
@@ -59,4 +60,4 @@
 	ld.ab   blink, [sp, 4]
 	j       [blink]
 
-END(__switch_to)
+END_CFI(__switch_to)
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 2efb0625..1eea99b 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -35,7 +35,7 @@
 	btst r10, TIF_SYSCALL_TRACE
 	bnz  tracesys_exit
 
-	b ret_from_system_call
+	b .Lret_from_system_call
 END(sys_clone_wrapper)
 
 ENTRY(ret_from_fork)
@@ -61,18 +61,6 @@
 	b    ret_from_exception
 END(ret_from_fork)
 
-#ifdef CONFIG_ARC_DW2_UNWIND
-; Workaround for bug 94179 (STAR ):
-; Despite -fasynchronous-unwind-tables, linker is not making dwarf2 unwinder
-; section (.debug_frame) as loadable. So we force it here.
-; This also fixes STAR 9000487933 where the prev-workaround (objcopy --setflag)
-; would not work after a clean build due to kernel build system dependencies.
-.section .debug_frame, "wa",@progbits
-
-; Reset to .text as this file is included in entry-<isa>.S
-.section .text, "ax",@progbits
-#endif
-
 ;################### Non TLB Exception Handling #############################
 
 ; ---------------------------------------------
@@ -260,20 +248,18 @@
 	; syscall num shd not exceed the total system calls avail
 	cmp     r8,  NR_syscalls
 	mov.hi  r0, -ENOSYS
-	bhi     ret_from_system_call
+	bhi     .Lret_from_system_call
 
 	; Offset into the syscall_table and call handler
 	ld.as   r9,[sys_call_table, r8]
 	jl      [r9]        ; Entry into Sys Call Handler
 
-	; fall through to ret_from_system_call
-END(EV_Trap)
-
-ENTRY(ret_from_system_call)
+.Lret_from_system_call:
 
 	st  r0, [sp, PT_r0]     ; sys call return value in pt_regs
 
-	; fall through yet again to ret_from_exception
+	; fall through to ret_from_exception
+END(EV_Trap)
 
 ;############# Return from Intr/Excp/Trap (Linux Specifics) ##############
 ;
diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c
index 6c24faf..62b59409 100644
--- a/arch/arc/kernel/intc-arcv2.c
+++ b/arch/arc/kernel/intc-arcv2.c
@@ -74,7 +74,7 @@
 	tmp = read_aux_reg(0xa);
 	tmp |= STATUS_AD_MASK | (irq_prio << 1);
 	tmp &= ~STATUS_IE_MASK;
-	asm volatile("flag %0	\n"::"r"(tmp));
+	asm volatile("kflag %0	\n"::"r"(tmp));
 }
 
 static void arcv2_irq_mask(struct irq_data *data)
diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c
index 376e046..9a28497 100644
--- a/arch/arc/kernel/module.c
+++ b/arch/arc/kernel/module.c
@@ -22,13 +22,9 @@
 	*(addr + 1) = (value & 0xffff);
 }
 
-/* ARC specific section quirks - before relocation loop in generic loader
- *
- * For dwarf unwinding out of modules, this needs to
- * 1. Ensure the .debug_frame is allocatable (ARC Linker bug: despite
- *    -fasynchronous-unwind-tables it doesn't).
- * 2. Since we are iterating thru sec hdr tbl anyways, make a note of
- *    the exact section index, for later use.
+/*
+ * This gets called before relocation loop in generic loader
+ * Make a note of the section index of unwinding section
  */
 int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
 			      char *secstr, struct module *mod)
@@ -40,8 +36,7 @@
 	mod->arch.unw_info = NULL;
 
 	for (i = 1; i < hdr->e_shnum; i++) {
-		if (strcmp(secstr+sechdrs[i].sh_name, ".debug_frame") == 0) {
-			sechdrs[i].sh_flags |= SHF_ALLOC;
+		if (strcmp(secstr+sechdrs[i].sh_name, ".eh_frame") == 0) {
 			mod->arch.unw_sec_idx = i;
 			break;
 		}
@@ -106,10 +101,12 @@
 		 */
 		relo_type = ELF32_R_TYPE(rel_entry[i].r_info);
 
-		if (likely(R_ARC_32_ME == relo_type))
+		if (likely(R_ARC_32_ME == relo_type))	/* ME ( S + A ) */
 			arc_write_me((unsigned short *)location, relocation);
-		else if (R_ARC_32 == relo_type)
+		else if (R_ARC_32 == relo_type)		/* ( S + A ) */
 			*((Elf32_Addr *) location) = relocation;
+		else if (R_ARC_32_PCREL == relo_type)	/* ( S + A ) - PDATA ) */
+			*((Elf32_Addr *) location) = relocation - location;
 		else
 			goto relo_err;
 
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c
index 08f03d9..2ce24e7 100644
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -179,8 +179,8 @@
 		if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
 			return -ENOENT;
 		hwc->config |= arc_pmu->ev_hw_idx[event->attr.config];
-		pr_debug("init event %d with h/w %d \'%s\'\n",
-			 (int) event->attr.config, (int) hwc->config,
+		pr_debug("init event %d with h/w %08x \'%s\'\n",
+			 (int)event->attr.config, (int)hwc->config,
 			 arc_pmu_ev_hw_map[event->attr.config]);
 		return 0;
 
@@ -189,6 +189,8 @@
 		if (ret < 0)
 			return ret;
 		hwc->config |= arc_pmu->ev_hw_idx[ret];
+		pr_debug("init cache event with h/w %08x \'%s\'\n",
+			 (int)hwc->config, arc_pmu_ev_hw_map[ret]);
 		return 0;
 	default:
 		return -ENOENT;
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index b5db9e7..be1972b 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -199,7 +199,7 @@
 	}
 
 	eflags = x->e_flags;
-	if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) {
+	if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) {
 		pr_err("ABI mismatch - you need newer toolchain\n");
 		force_sigsegv(SIGSEGV, current);
 		return 0;
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
index a946400..3df7f9c 100644
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -171,6 +171,7 @@
 #else
 	{ {0x50, "ARC HS38 R2.0"}, 0x51},
 	{ {0x52, "ARC HS38 R2.1"}, 0x52},
+	{ {0x53, "ARC HS38 R3.0"}, 0x53},
 #endif
 	{ {0x00, NULL		} }
 };
@@ -272,8 +273,8 @@
 	FIX_PTR(cpu);
 
 	n += scnprintf(buf + n, len - n,
-		       "Vector Table\t: %#x\nUncached Base\t: %#lx\n",
-		       cpu->vec_base, perip_base);
+		       "Vector Table\t: %#x\nPeripherals\t: %#lx:%#lx\n",
+		       cpu->vec_base, perip_base, perip_end);
 
 	if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
 		n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
@@ -291,8 +292,10 @@
 			       cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
 			       cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
 
-	n += scnprintf(buf + n, len - n,
-		       "OS ABI [v3]\t: no-legacy-syscalls\n");
+	n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n",
+			EF_ARC_OSABI_CURRENT >> 8,
+			EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ?
+			"no-legacy-syscalls" : "64-bit data any register aligned");
 
 	return buf;
 }
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 0587bf1..61fd1ce 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -111,6 +111,8 @@
 #define DW_EH_PE_indirect 0x80
 #define DW_EH_PE_omit     0xff
 
+#define CIE_ID	0
+
 typedef unsigned long uleb128_t;
 typedef signed long sleb128_t;
 
@@ -232,6 +234,7 @@
 
 static const u32 bad_cie, not_fde;
 static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+static const u32 *__cie_for_fde(const u32 *fde);
 static signed fde_pointer_type(const u32 *cie);
 
 struct eh_frame_hdr_table_entry {
@@ -338,10 +341,9 @@
 	for (fde = table->address, tableSize = table->size, n = 0;
 	     tableSize;
 	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
-		/* const u32 *cie = fde + 1 - fde[1] / sizeof(*fde); */
-		const u32 *cie = (const u32 *)(fde[1]);
+		const u32 *cie = __cie_for_fde(fde);
 
-		if (fde[1] == 0xffffffff)
+		if (fde[1] == CIE_ID)
 			continue;	/* this is a CIE */
 		ptr = (const u8 *)(fde + 2);
 		header->table[n].start = read_pointer(&ptr,
@@ -504,6 +506,15 @@
 	return value;
 }
 
+static const u32 *__cie_for_fde(const u32 *fde)
+{
+	const u32 *cie;
+
+	cie = fde + 1 - fde[1] / sizeof(*fde);
+
+	return cie;
+}
+
 static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
 {
 	const u32 *cie;
@@ -511,19 +522,18 @@
 	if (!*fde || (*fde & (sizeof(*fde) - 1)))
 		return &bad_cie;
 
-	if (fde[1] == 0xffffffff)
+	if (fde[1] == CIE_ID)
 		return &not_fde;	/* this is a CIE */
 
 	if ((fde[1] & (sizeof(*fde) - 1)))
 /* || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address) */
 		return NULL;	/* this is not a valid FDE */
 
-	/* cie = fde + 1 - fde[1] / sizeof(*fde); */
-	cie = (u32 *) fde[1];
+	cie = __cie_for_fde(fde);
 
 	if (*cie <= sizeof(*cie) + 4 || *cie >= fde[1] - sizeof(*fde)
 	    || (*cie & (sizeof(*cie) - 1))
-	    || (cie[1] != 0xffffffff))
+	    || (cie[1] != CIE_ID))
 		return NULL;	/* this is not a (valid) CIE */
 	return cie;
 }
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 894e696..3661107 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -82,14 +82,6 @@
 
 	PERCPU_SECTION(L1_CACHE_BYTES)
 
-	/*
-	 * .exit.text is discard at runtime, not link time, to deal with
-	 * references from .debug_frame
-	 * It will be init freed, being inside [__init_start : __init_end]
-	 */
-	.exit.text : { EXIT_TEXT }
-	.exit.data : { EXIT_DATA }
-
 	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
 
@@ -120,18 +112,13 @@
 
 #ifdef CONFIG_ARC_DW2_UNWIND
 	. = ALIGN(PAGE_SIZE);
-	.debug_frame  : {
+	.eh_frame  : {
 		__start_unwind = .;
-		*(.debug_frame)
+		*(.eh_frame)
 		__end_unwind = .;
 	}
-	/*
-	 * gcc 4.8 generates this for -fasynchonous-unwind-tables,
-	 * while we still use the .debug_frame based unwinder
-	 */
-	/DISCARD/ : {	*(.eh_frame) }
 #else
-	/DISCARD/ : {	*(.debug_frame) }
+	/DISCARD/ : {	*(.eh_frame) }
 #endif
 
 	NOTES
@@ -148,7 +135,7 @@
 	}
 
 #ifndef CONFIG_DEBUG_INFO
-	/* open-coded because we need .debug_frame seperately for unwinding */
+	/DISCARD/ : { *(.debug_frame) }
 	/DISCARD/ : { *(.debug_aranges) }
 	/DISCARD/ : { *(.debug_pubnames) }
 	/DISCARD/ : { *(.debug_info) }
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
index a4015e7..21a1030 100644
--- a/arch/arc/lib/memcmp.S
+++ b/arch/arc/lib/memcmp.S
@@ -16,7 +16,7 @@
 #define SHIFT r2
 #endif
 
-ENTRY(memcmp)
+ENTRY_CFI(memcmp)
 	or	r12,r0,r1
 	asl_s	r12,r12,30
 	sub	r3,r2,1
@@ -149,4 +149,4 @@
 .Lnil:
 	j_s.d	[blink]
 	mov	r0,0
-END(memcmp)
+END_CFI(memcmp)
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
index 3222573..ba0becc 100644
--- a/arch/arc/lib/memcpy-700.S
+++ b/arch/arc/lib/memcpy-700.S
@@ -8,7 +8,7 @@
 
 #include <linux/linkage.h>
 
-ENTRY(memcpy)
+ENTRY_CFI(memcpy)
 	or	r3,r0,r1
 	asl_s	r3,r3,30
 	mov_s	r5,r0
@@ -63,4 +63,4 @@
 .Lendbloop:
 	j_s.d	[blink]
 	stb	r12,[r5,0]
-END(memcpy)
+END_CFI(memcpy)
diff --git a/arch/arc/lib/memcpy-archs.S b/arch/arc/lib/memcpy-archs.S
index f96c75e..d61044d 100644
--- a/arch/arc/lib/memcpy-archs.S
+++ b/arch/arc/lib/memcpy-archs.S
@@ -40,7 +40,7 @@
 # define ZOLAND			0xF
 #endif
 
-ENTRY(memcpy)
+ENTRY_CFI(memcpy)
 	prefetch [r1]		; Prefetch the read location
 	prefetchw [r0]		; Prefetch the write location
 	mov.f	0, r2
@@ -233,4 +233,4 @@
 .Lcopybytewise_3:
 	j	[blink]
 
-END(memcpy)
+END_CFI(memcpy)
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index 365b183..62ad4bc 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -10,7 +10,7 @@
 
 #undef PREALLOC_NOT_AVAIL
 
-ENTRY(memset)
+ENTRY_CFI(memset)
 	prefetchw [r0]		; Prefetch the write location
 	mov.f	0, r2
 ;;; if size is zero
@@ -112,11 +112,11 @@
 
 	j	[blink]
 
-END(memset)
+END_CFI(memset)
 
-ENTRY(memzero)
+ENTRY_CFI(memzero)
     ; adjust bzero args to memset args
     mov r2, r1
     b.d  memset    ;tail call so need to tinker with blink
     mov r1, 0
-END(memzero)
+END_CFI(memzero)
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
index d36bd43..cf736f9 100644
--- a/arch/arc/lib/memset.S
+++ b/arch/arc/lib/memset.S
@@ -10,7 +10,7 @@
 
 #define SMALL	7 /* Must be at least 6 to deal with alignment/loop issues.  */
 
-ENTRY(memset)
+ENTRY_CFI(memset)
 	mov_s	r4,r0
 	or	r12,r0,r2
 	bmsk.f	r12,r12,1
@@ -46,14 +46,14 @@
 	stb.ab	r1,[r4,1]
 .Ltiny_end:
 	j_s	[blink]
-END(memset)
+END_CFI(memset)
 
 ; memzero: @r0 = mem, @r1 = size_t
 ; memset:  @r0 = mem, @r1 = char, @r2 = size_t
 
-ENTRY(memzero)
+ENTRY_CFI(memzero)
     ; adjust bzero args to memset args
     mov r2, r1
     mov r1, 0
     b  memset    ;tail call so need to tinker with blink
-END(memzero)
+END_CFI(memzero)
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
index b725d58..2d300da 100644
--- a/arch/arc/lib/strchr-700.S
+++ b/arch/arc/lib/strchr-700.S
@@ -13,7 +13,7 @@
 
 #include <linux/linkage.h>
 
-ENTRY(strchr)
+ENTRY_CFI(strchr)
 	extb_s	r1,r1
 	asl	r5,r1,8
 	bmsk	r2,r0,1
@@ -130,4 +130,4 @@
 	j_s.d	[blink]
 	mov.mi	r0,0
 #endif /* ENDIAN */
-END(strchr)
+END_CFI(strchr)
diff --git a/arch/arc/lib/strcmp-archs.S b/arch/arc/lib/strcmp-archs.S
index 4f338ee..fae9e82 100644
--- a/arch/arc/lib/strcmp-archs.S
+++ b/arch/arc/lib/strcmp-archs.S
@@ -8,7 +8,7 @@
 
 #include <linux/linkage.h>
 
-ENTRY(strcmp)
+ENTRY_CFI(strcmp)
 	or	r2, r0, r1
 	bmsk_s	r2, r2, 1
 	brne	r2, 0, @.Lcharloop
@@ -75,4 +75,4 @@
 .Lcmpend:
 	j_s.d	[blink]
 	sub	r0, r2, r3
-END(strcmp)
+END_CFI(strcmp)
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
index 3544600..fb20096 100644
--- a/arch/arc/lib/strcmp.S
+++ b/arch/arc/lib/strcmp.S
@@ -15,7 +15,7 @@
 
 #include <linux/linkage.h>
 
-ENTRY(strcmp)
+ENTRY_CFI(strcmp)
 	or	r2,r0,r1
 	bmsk_s	r2,r2,1
 	brne	r2,0,.Lcharloop
@@ -93,4 +93,4 @@
 .Lcmpend:
 	j_s.d	[blink]
 	sub	r0,r2,r3
-END(strcmp)
+END_CFI(strcmp)
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
index 8422f38..6a6c155 100644
--- a/arch/arc/lib/strcpy-700.S
+++ b/arch/arc/lib/strcpy-700.S
@@ -18,7 +18,7 @@
 
 #include <linux/linkage.h>
 
-ENTRY(strcpy)
+ENTRY_CFI(strcpy)
 	or	r2,r0,r1
 	bmsk_s	r2,r2,1
 	brne.d	r2,0,charloop
@@ -67,4 +67,4 @@
 	brne.d	r3,0,charloop
 	stb.ab	r3,[r10,1]
 	j	[blink]
-END(strcpy)
+END_CFI(strcpy)
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
index 53cfd56..839b44b 100644
--- a/arch/arc/lib/strlen.S
+++ b/arch/arc/lib/strlen.S
@@ -8,7 +8,7 @@
 
 #include <linux/linkage.h>
 
-ENTRY(strlen)
+ENTRY_CFI(strlen)
 	or	r3,r0,7
 	ld	r2,[r3,-7]
 	ld.a	r6,[r3,-3]
@@ -80,4 +80,4 @@
 .Learly_end:
 	b.d	.Lend
 	sub_s.ne r1,r1,r1
-END(strlen)
+END_CFI(strlen)
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 5a294b2..97dddbe 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -25,6 +25,7 @@
 int ioc_exists;
 volatile int slc_enable = 1, ioc_enable = 1;
 unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
+unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
 
 void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
 			       unsigned long sz, const int cacheop);
@@ -76,7 +77,6 @@
 static void read_decode_cache_bcr_arcv2(int cpu)
 {
 	struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
-	struct bcr_generic uncached_space;
 	struct bcr_generic sbcr;
 
 	struct bcr_slc_cfg {
@@ -95,6 +95,15 @@
 #endif
 	} cbcr;
 
+	struct bcr_volatile {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+		unsigned int start:4, limit:4, pad:22, order:1, disable:1;
+#else
+		unsigned int disable:1, order:1, pad:22, limit:4, start:4;
+#endif
+	} vol;
+
+
 	READ_BCR(ARC_REG_SLC_BCR, sbcr);
 	if (sbcr.ver) {
 		READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
@@ -107,10 +116,14 @@
 	if (cbcr.c && ioc_enable)
 		ioc_exists = 1;
 
-	/* Legacy Data Uncached BCR is deprecated from v3 onwards */
-	READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
-	if (uncached_space.ver > 2)
-		perip_base = read_aux_reg(AUX_NON_VOL) & 0xF0000000;
+	/* HS 2.0 didn't have AUX_VOL */
+	if (cpuinfo_arc700[cpu].core.family > 0x51) {
+		READ_BCR(AUX_VOL, vol);
+		perip_base = vol.start << 28;
+		/* HS 3.0 has limit and strict-ordering fields */
+		if (cpuinfo_arc700[cpu].core.family > 0x52)
+			perip_end = (vol.limit << 28) - 1;
+	}
 }
 
 void read_decode_cache_bcr(void)
@@ -921,6 +934,15 @@
 
 	printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
 
+	/*
+	 * Only master CPU needs to execute rest of function:
+	 *  - Assume SMP so all cores will have same cache config so
+	 *    any geomtry checks will be same for all
+	 *  - IOC setup / dma callbacks only need to be setup once
+	 */
+	if (cpu)
+		return;
+
 	if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
 		struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
 
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index 04f8332..77ff64a 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -61,6 +61,7 @@
 
 	return kmap_high(page);
 }
+EXPORT_SYMBOL(kmap);
 
 void *kmap_atomic(struct page *page)
 {
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c
index f52b7db6..9881bd7 100644
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -19,7 +19,7 @@
 	if (is_isa_arcompact()) {
 		if (paddr >= ARC_UNCACHED_ADDR_SPACE)
 			return true;
-	} else if (paddr >= perip_base && paddr <= 0xFFFFFFFF) {
+	} else if (paddr >= perip_base && paddr <= perip_end) {
 		return true;
 	}
 
diff --git a/arch/arc/plat-sim/platform.c b/arch/arc/plat-sim/platform.c
index e4fe514..aea8738 100644
--- a/arch/arc/plat-sim/platform.c
+++ b/arch/arc/plat-sim/platform.c
@@ -24,6 +24,7 @@
 	"snps,nsim_hs",
 	"snps,nsimosci",
 	"snps,nsimosci_hs",
+	"snps,zebu_hs",
 	NULL,
 };
 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a9c4e48..3cd9042 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1,6 +1,7 @@
 config ARM
 	bool
 	default y
+	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
@@ -878,6 +879,7 @@
 	select CLKSRC_STM32
 	select PINCTRL
 	select RESET_CONTROLLER
+	select STM32_EXTI
 	help
 	  Support for STMicroelectronics STM32 processors.
 
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 56ea5c60b..61f6ccc 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -260,12 +260,14 @@
 platdirs := $(patsubst %,arch/arm/plat-%/,$(sort $(plat-y)))
 
 ifneq ($(CONFIG_ARCH_MULTIPLATFORM),y)
+ifneq ($(CONFIG_ARM_SINGLE_ARMV7M),y)
 ifeq ($(KBUILD_SRC),)
 KBUILD_CPPFLAGS += $(patsubst %,-I%include,$(machdirs) $(platdirs))
 else
 KBUILD_CPPFLAGS += $(patsubst %,-I$(srctree)/%include,$(machdirs) $(platdirs))
 endif
 endif
+endif
 
 export	TEXT_OFFSET GZFLAGS MMUEXT
 
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index af11c2f..fc6d541 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -779,7 +779,7 @@
 		orrne	r0, r0, #1		@ MMU enabled
 		movne	r1, #0xfffffffd		@ domain 0 = client
 		bic     r6, r6, #1 << 31        @ 32-bit translation system
-		bic     r6, r6, #3 << 0         @ use only ttbr0
+		bic     r6, r6, #(7 << 0) | (1 << 4)	@ use only ttbr0
 		mcrne	p15, 0, r3, c2, c0, 0	@ load page table pointer
 		mcrne	p15, 0, r1, c3, c0, 0	@ load domain access control
 		mcrne   p15, 0, r6, c2, c0, 2   @ load ttb control
diff --git a/arch/arm/boot/dts/am335x-baltos.dtsi b/arch/arm/boot/dts/am335x-baltos.dtsi
index c8609d8..b689172 100644
--- a/arch/arm/boot/dts/am335x-baltos.dtsi
+++ b/arch/arm/boot/dts/am335x-baltos.dtsi
@@ -226,7 +226,7 @@
 
 		#address-cells = <1>;
 		#size-cells = <1>;
-		elm_id = <&elm>;
+		ti,elm-id = <&elm>;
 	};
 };
 
diff --git a/arch/arm/boot/dts/am335x-igep0033.dtsi b/arch/arm/boot/dts/am335x-igep0033.dtsi
index df63484..e7d9ca1 100644
--- a/arch/arm/boot/dts/am335x-igep0033.dtsi
+++ b/arch/arm/boot/dts/am335x-igep0033.dtsi
@@ -161,7 +161,7 @@
 
 		#address-cells = <1>;
 		#size-cells = <1>;
-		elm_id = <&elm>;
+		ti,elm-id = <&elm>;
 
 		/* MTD partition table */
 		partition@0 {
diff --git a/arch/arm/boot/dts/am335x-phycore-som.dtsi b/arch/arm/boot/dts/am335x-phycore-som.dtsi
index 86f7731..1263c9d 100644
--- a/arch/arm/boot/dts/am335x-phycore-som.dtsi
+++ b/arch/arm/boot/dts/am335x-phycore-som.dtsi
@@ -197,7 +197,7 @@
 		gpmc,wr-access-ns = <30>;
 		gpmc,wr-data-mux-bus-ns = <0>;
 
-		elm_id = <&elm>;
+		ti,elm-id = <&elm>;
 
 		#address-cells = <1>;
 		#size-cells = <1>;
diff --git a/arch/arm/boot/dts/arm-realview-pbx-a9.dts b/arch/arm/boot/dts/arm-realview-pbx-a9.dts
index db808f9..90d00b4 100644
--- a/arch/arm/boot/dts/arm-realview-pbx-a9.dts
+++ b/arch/arm/boot/dts/arm-realview-pbx-a9.dts
@@ -70,13 +70,12 @@
 		 * associativity as these may be erroneously set
 		 * up by boot loader(s).
 		 */
-		cache-size = <1048576>; // 1MB
-		cache-sets = <4096>;
+		cache-size = <131072>; // 128KB
+		cache-sets = <512>;
 		cache-line-size = <32>;
 		arm,parity-disable;
-		arm,tag-latency = <1>;
-		arm,data-latency = <1 1>;
-		arm,dirty-latency = <1>;
+		arm,tag-latency = <1 1 1>;
+		arm,data-latency = <1 1 1>;
 	};
 
 	scu: scu@1f000000 {
diff --git a/arch/arm/boot/dts/armada-388-clearfog.dts b/arch/arm/boot/dts/armada-388-clearfog.dts
index 2e0556a..d3e6bd8 100644
--- a/arch/arm/boot/dts/armada-388-clearfog.dts
+++ b/arch/arm/boot/dts/armada-388-clearfog.dts
@@ -390,12 +390,12 @@
 
 			port@0 {
 				reg = <0>;
-				label = "lan1";
+				label = "lan5";
 			};
 
 			port@1 {
 				reg = <1>;
-				label = "lan2";
+				label = "lan4";
 			};
 
 			port@2 {
@@ -405,12 +405,12 @@
 
 			port@3 {
 				reg = <3>;
-				label = "lan4";
+				label = "lan2";
 			};
 
 			port@4 {
 				reg = <4>;
-				label = "lan5";
+				label = "lan1";
 			};
 
 			port@5 {
diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index caf2707..e9b47b2 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -2,6 +2,7 @@
 
 / {
 	memory {
+		device_type = "memory";
 		reg = <0 0x10000000>;
 	};
 
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index b982522..445624a 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -2,7 +2,6 @@
 #include <dt-bindings/clock/bcm2835.h>
 #include <dt-bindings/clock/bcm2835-aux.h>
 #include <dt-bindings/gpio/gpio.h>
-#include "skeleton.dtsi"
 
 /* This include file covers the common peripherals and configuration between
  * bcm2835 and bcm2836 implementations, leaving the CPU configuration to
@@ -13,6 +12,8 @@
 	compatible = "brcm,bcm2835";
 	model = "BCM2835";
 	interrupt-parent = <&intc>;
+	#address-cells = <1>;
+	#size-cells = <1>;
 
 	chosen {
 		bootargs = "earlyprintk console=ttyAMA0";
diff --git a/arch/arm/boot/dts/exynos5410-odroidxu.dts b/arch/arm/boot/dts/exynos5410-odroidxu.dts
index d949931..f6d1352 100644
--- a/arch/arm/boot/dts/exynos5410-odroidxu.dts
+++ b/arch/arm/boot/dts/exynos5410-odroidxu.dts
@@ -447,14 +447,11 @@
 	samsung,dw-mshc-ciu-div = <3>;
 	samsung,dw-mshc-sdr-timing = <0 4>;
 	samsung,dw-mshc-ddr-timing = <0 2>;
-	samsung,dw-mshc-hs400-timing = <0 2>;
-	samsung,read-strobe-delay = <90>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus1 &sd0_bus4 &sd0_bus8 &sd0_cd>;
 	bus-width = <8>;
 	cap-mmc-highspeed;
 	mmc-hs200-1_8v;
-	mmc-hs400-1_8v;
 	vmmc-supply = <&ldo20_reg>;
 	vqmmc-supply = <&ldo11_reg>;
 };
diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index b620ac8..b13b0b2 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -243,7 +243,7 @@
 					clocks = <&clks IMX6QDL_CLK_SPDIF_GCLK>, <&clks IMX6QDL_CLK_OSC>,
 						 <&clks IMX6QDL_CLK_SPDIF>, <&clks IMX6QDL_CLK_ASRC>,
 						 <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_ESAI_EXTAL>,
-						 <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_MLB>,
+						 <&clks IMX6QDL_CLK_IPG>, <&clks IMX6QDL_CLK_DUMMY>,
 						 <&clks IMX6QDL_CLK_DUMMY>, <&clks IMX6QDL_CLK_SPBA>;
 					clock-names = "core",  "rxtx0",
 						      "rxtx1", "rxtx2",
diff --git a/arch/arm/boot/dts/imx6sx-sabreauto.dts b/arch/arm/boot/dts/imx6sx-sabreauto.dts
index 96ea936..240a286 100644
--- a/arch/arm/boot/dts/imx6sx-sabreauto.dts
+++ b/arch/arm/boot/dts/imx6sx-sabreauto.dts
@@ -64,7 +64,7 @@
 	cd-gpios = <&gpio7 11 GPIO_ACTIVE_LOW>;
 	no-1-8-v;
 	keep-power-in-suspend;
-	enable-sdio-wakup;
+	wakeup-source;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts
index 95ee268..2f33c46 100644
--- a/arch/arm/boot/dts/imx7d-sdb.dts
+++ b/arch/arm/boot/dts/imx7d-sdb.dts
@@ -131,7 +131,7 @@
 		ti,y-min = /bits/ 16 <0>;
 		ti,y-max = /bits/ 16 <0>;
 		ti,pressure-max = /bits/ 16 <0>;
-		ti,x-plat-ohms = /bits/ 16 <400>;
+		ti,x-plate-ohms = /bits/ 16 <400>;
 		wakeup-source;
 	};
 };
diff --git a/arch/arm/boot/dts/integratorap.dts b/arch/arm/boot/dts/integratorap.dts
index cf06e32..4b34b54 100644
--- a/arch/arm/boot/dts/integratorap.dts
+++ b/arch/arm/boot/dts/integratorap.dts
@@ -42,7 +42,7 @@
 	};
 
 	syscon {
-		compatible = "arm,integrator-ap-syscon";
+		compatible = "arm,integrator-ap-syscon", "syscon";
 		reg = <0x11000000 0x100>;
 		interrupt-parent = <&pic>;
 		/* These are the logical module IRQs */
diff --git a/arch/arm/boot/dts/integratorcp.dts b/arch/arm/boot/dts/integratorcp.dts
index d43f15b..79430fb 100644
--- a/arch/arm/boot/dts/integratorcp.dts
+++ b/arch/arm/boot/dts/integratorcp.dts
@@ -94,7 +94,7 @@
 	};
 
 	syscon {
-		compatible = "arm,integrator-cp-syscon";
+		compatible = "arm,integrator-cp-syscon", "syscon";
 		reg = <0xcb000000 0x100>;
 	};
 
diff --git a/arch/arm/boot/dts/keystone.dtsi b/arch/arm/boot/dts/keystone.dtsi
index 00cb314..e23f46d 100644
--- a/arch/arm/boot/dts/keystone.dtsi
+++ b/arch/arm/boot/dts/keystone.dtsi
@@ -70,14 +70,6 @@
 		cpu_on		= <0x84000003>;
 	};
 
-	psci {
-		compatible	= "arm,psci";
-		method		= "smc";
-		cpu_suspend	= <0x84000001>;
-		cpu_off		= <0x84000002>;
-		cpu_on		= <0x84000003>;
-	};
-
 	soc {
 		#address-cells = <1>;
 		#size-cells = <1>;
diff --git a/arch/arm/boot/dts/kirkwood-ib62x0.dts b/arch/arm/boot/dts/kirkwood-ib62x0.dts
index ef84d86..5bf6289 100644
--- a/arch/arm/boot/dts/kirkwood-ib62x0.dts
+++ b/arch/arm/boot/dts/kirkwood-ib62x0.dts
@@ -113,7 +113,7 @@
 
 	partition@e0000 {
 		label = "u-boot environment";
-		reg = <0xe0000 0x100000>;
+		reg = <0xe0000 0x20000>;
 	};
 
 	partition@100000 {
diff --git a/arch/arm/boot/dts/kirkwood-openrd.dtsi b/arch/arm/boot/dts/kirkwood-openrd.dtsi
index e4ecab1..7175511 100644
--- a/arch/arm/boot/dts/kirkwood-openrd.dtsi
+++ b/arch/arm/boot/dts/kirkwood-openrd.dtsi
@@ -116,6 +116,10 @@
 	};
 };
 
+&pciec {
+	status = "okay";
+};
+
 &pcie0 {
 	status = "okay";
 };
diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi
index 365f39ff..0ff1c2d 100644
--- a/arch/arm/boot/dts/logicpd-som-lv.dtsi
+++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi
@@ -35,10 +35,15 @@
 	ranges = <0 0 0x00000000 0x1000000>;	/* CS0: 16MB for NAND */
 
 	nand@0,0 {
-		linux,mtd-name = "micron,mt29f4g16abbda3w";
+		compatible = "ti,omap2-nand";
 		reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
+		interrupt-parent = <&gpmc>;
+		interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */
+			     <1 IRQ_TYPE_NONE>;	/* termcount */
+		linux,mtd-name = "micron,mt29f4g16abbda3w";
 		nand-bus-width = <16>;
 		ti,nand-ecc-opt = "bch8";
+		rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */
 		gpmc,sync-clk-ps = <0>;
 		gpmc,cs-on-ns = <0>;
 		gpmc,cs-rd-off-ns = <44>;
@@ -54,10 +59,6 @@
 		gpmc,wr-access-ns = <40>;
 		gpmc,wr-data-mux-bus-ns = <0>;
 		gpmc,device-width = <2>;
-
-		gpmc,page-burst-access-ns = <5>;
-		gpmc,cycle2cycle-delay-ns = <50>;
-
 		#address-cells = <1>;
 		#size-cells = <1>;
 
diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
index 5e9a13c..1c2c746 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
+++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi
@@ -46,6 +46,7 @@
 		linux,mtd-name = "micron,mt29f4g16abbda3w";
 		nand-bus-width = <16>;
 		ti,nand-ecc-opt = "bch8";
+		rb-gpios = <&gpmc 0 GPIO_ACTIVE_HIGH>; /* gpmc_wait0 */
 		gpmc,sync-clk-ps = <0>;
 		gpmc,cs-on-ns = <0>;
 		gpmc,cs-rd-off-ns = <44>;
diff --git a/arch/arm/boot/dts/omap3-overo-base.dtsi b/arch/arm/boot/dts/omap3-overo-base.dtsi
index de256fa..3e946ca 100644
--- a/arch/arm/boot/dts/omap3-overo-base.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-base.dtsi
@@ -223,7 +223,9 @@
 };
 
 &gpmc {
-	ranges = <0 0 0x00000000 0x20000000>;
+	ranges = <0 0 0x30000000 0x1000000>,	/* CS0 */
+		 <4 0 0x2b000000 0x1000000>,	/* CS4 */
+		 <5 0 0x2c000000 0x1000000>;	/* CS5 */
 
 	nand@0,0 {
 		compatible = "ti,omap2-nand";
diff --git a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
index 7df2792..4f4c6ef 100644
--- a/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-chestnut43-common.dtsi
@@ -55,8 +55,6 @@
 #include "omap-gpmc-smsc9221.dtsi"
 
 &gpmc {
-	ranges = <5 0 0x2c000000 0x1000000>;	/* CS5 */
-
 	ethernet@gpmc {
 		reg = <5 0 0xff>;
 		interrupt-parent = <&gpio6>;
diff --git a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
index 9e24b6a..1b304e2 100644
--- a/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-tobi-common.dtsi
@@ -27,8 +27,6 @@
 #include "omap-gpmc-smsc9221.dtsi"
 
 &gpmc {
-	ranges = <5 0 0x2c000000 0x1000000>;	/* CS5 */
-
 	ethernet@gpmc {
 		reg = <5 0 0xff>;
 		interrupt-parent = <&gpio6>;
diff --git a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
index 334109e..82e98ee 100644
--- a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
+++ b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi
@@ -15,9 +15,6 @@
 #include "omap-gpmc-smsc9221.dtsi"
 
 &gpmc {
-	ranges = <4 0 0x2b000000 0x1000000>,	/* CS4 */
-		 <5 0 0x2c000000 0x1000000>;	/* CS5 */
-
 	smsc1: ethernet@gpmc {
 		reg = <5 0 0xff>;
 		interrupt-parent = <&gpio6>;
diff --git a/arch/arm/boot/dts/rk3066a.dtsi b/arch/arm/boot/dts/rk3066a.dtsi
index c0ba86c..0d0dae3 100644
--- a/arch/arm/boot/dts/rk3066a.dtsi
+++ b/arch/arm/boot/dts/rk3066a.dtsi
@@ -197,6 +197,8 @@
 		clock-names = "saradc", "apb_pclk";
 		interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
 		#io-channel-cells = <1>;
+		resets = <&cru SRST_SARADC>;
+		reset-names = "saradc-apb";
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index cd33f01..91c4b3c 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -279,6 +279,8 @@
 		#io-channel-cells = <1>;
 		clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
 		clock-names = "saradc", "apb_pclk";
+		resets = <&cru SRST_SARADC>;
+		reset-names = "saradc-apb";
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi
index 99bbcc2..e2cd683 100644
--- a/arch/arm/boot/dts/rk3xxx.dtsi
+++ b/arch/arm/boot/dts/rk3xxx.dtsi
@@ -399,6 +399,8 @@
 		#io-channel-cells = <1>;
 		clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
 		clock-names = "saradc", "apb_pclk";
+		resets = <&cru SRST_SARADC>;
+		reset-names = "saradc-apb";
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi
index 94000cb..f520cbf 100644
--- a/arch/arm/boot/dts/socfpga_arria10.dtsi
+++ b/arch/arm/boot/dts/socfpga_arria10.dtsi
@@ -639,6 +639,22 @@
 				interrupts = <5 IRQ_TYPE_LEVEL_HIGH>,
 					     <37 IRQ_TYPE_LEVEL_HIGH>;
 			};
+
+			dma-ecc@ff8c8000 {
+				compatible = "altr,socfpga-dma-ecc";
+				reg = <0xff8c8000 0x400>;
+				altr,ecc-parent = <&pdma>;
+				interrupts = <10 IRQ_TYPE_LEVEL_HIGH>,
+					     <42 IRQ_TYPE_LEVEL_HIGH>;
+			};
+
+			usb0-ecc@ff8c8800 {
+				compatible = "altr,socfpga-usb-ecc";
+				reg = <0xff8c8800 0x400>;
+				altr,ecc-parent = <&usb0>;
+				interrupts = <2 IRQ_TYPE_LEVEL_HIGH>,
+					     <34 IRQ_TYPE_LEVEL_HIGH>;
+			};
 		};
 
 		rst: rstmgr@ffd05000 {
diff --git a/arch/arm/boot/dts/socfpga_arria10_socdk_sdmmc.dts b/arch/arm/boot/dts/socfpga_arria10_socdk_sdmmc.dts
index 8a7dfa4..040a164 100644
--- a/arch/arm/boot/dts/socfpga_arria10_socdk_sdmmc.dts
+++ b/arch/arm/boot/dts/socfpga_arria10_socdk_sdmmc.dts
@@ -25,3 +25,15 @@
 	broken-cd;
 	bus-width = <4>;
 };
+
+&eccmgr {
+	sdmmca-ecc@ff8c2c00 {
+		compatible = "altr,socfpga-sdmmc-ecc";
+		reg = <0xff8c2c00 0x400>;
+		altr,ecc-parent = <&mmc>;
+		interrupts = <15 IRQ_TYPE_LEVEL_HIGH>,
+			     <47 IRQ_TYPE_LEVEL_HIGH>,
+			     <16 IRQ_TYPE_LEVEL_HIGH>,
+			     <48 IRQ_TYPE_LEVEL_HIGH>;
+	};
+};
diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi
index d294e82..8b063ab 100644
--- a/arch/arm/boot/dts/stih407-family.dtsi
+++ b/arch/arm/boot/dts/stih407-family.dtsi
@@ -550,8 +550,9 @@
 			interrupt-names = "mmcirq";
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_mmc0>;
-			clock-names = "mmc";
-			clocks = <&clk_s_c0_flexgen CLK_MMC_0>;
+			clock-names = "mmc", "icn";
+			clocks = <&clk_s_c0_flexgen CLK_MMC_0>,
+				 <&clk_s_c0_flexgen CLK_RX_ICN_HVA>;
 			bus-width = <8>;
 			non-removable;
 		};
@@ -565,8 +566,9 @@
 			interrupt-names = "mmcirq";
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_sd1>;
-			clock-names = "mmc";
-			clocks = <&clk_s_c0_flexgen CLK_MMC_1>;
+			clock-names = "mmc", "icn";
+			clocks = <&clk_s_c0_flexgen CLK_MMC_1>,
+				 <&clk_s_c0_flexgen CLK_RX_ICN_HVA>;
 			resets = <&softreset STIH407_MMC1_SOFTRESET>;
 			bus-width = <4>;
 		};
diff --git a/arch/arm/boot/dts/stih410.dtsi b/arch/arm/boot/dts/stih410.dtsi
index 18ed1ad..4031886 100644
--- a/arch/arm/boot/dts/stih410.dtsi
+++ b/arch/arm/boot/dts/stih410.dtsi
@@ -41,7 +41,8 @@
 			compatible = "st,st-ohci-300x";
 			reg = <0x9a03c00 0x100>;
 			interrupts = <GIC_SPI 180 IRQ_TYPE_NONE>;
-			clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
+			clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
+				 <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
 			resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>,
 				 <&softreset STIH407_USB2_PORT0_SOFTRESET>;
 			reset-names = "power", "softreset";
@@ -57,7 +58,8 @@
 			interrupts = <GIC_SPI 151 IRQ_TYPE_NONE>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_usb0>;
-			clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
+			clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
+				 <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
 			resets = <&powerdown STIH407_USB2_PORT0_POWERDOWN>,
 				 <&softreset STIH407_USB2_PORT0_SOFTRESET>;
 			reset-names = "power", "softreset";
@@ -71,7 +73,8 @@
 			compatible = "st,st-ohci-300x";
 			reg = <0x9a83c00 0x100>;
 			interrupts = <GIC_SPI 181 IRQ_TYPE_NONE>;
-			clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
+			clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
+				 <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
 			resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>,
 				 <&softreset STIH407_USB2_PORT1_SOFTRESET>;
 			reset-names = "power", "softreset";
@@ -87,7 +90,8 @@
 			interrupts = <GIC_SPI 153 IRQ_TYPE_NONE>;
 			pinctrl-names = "default";
 			pinctrl-0 = <&pinctrl_usb1>;
-			clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>;
+			clocks = <&clk_s_c0_flexgen CLK_TX_ICN_DISP_0>,
+				 <&clk_s_c0_flexgen CLK_RX_ICN_DISP_0>;
 			resets = <&powerdown STIH407_USB2_PORT1_POWERDOWN>,
 				 <&softreset STIH407_USB2_PORT1_SOFTRESET>;
 			reset-names = "power", "softreset";
diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi
index 35df462..1a189d4 100644
--- a/arch/arm/boot/dts/stm32f429.dtsi
+++ b/arch/arm/boot/dts/stm32f429.dtsi
@@ -176,6 +176,14 @@
 			reg = <0x40013800 0x400>;
 		};
 
+		exti: interrupt-controller@40013c00 {
+			compatible = "st,stm32-exti";
+			interrupt-controller;
+			#interrupt-cells = <2>;
+			reg = <0x40013C00 0x400>;
+			interrupts = <1>, <2>, <3>, <6>, <7>, <8>, <9>, <10>, <23>, <40>, <41>, <42>, <62>, <76>;
+		};
+
 		pin-controller {
 			#address-cells = <1>;
 			#size-cells = <1>;
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index e012890..a17ba02 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -84,7 +84,7 @@
 			trips {
 				cpu_alert0: cpu_alert0 {
 					/* milliCelsius */
-					temperature = <850000>;
+					temperature = <85000>;
 					hysteresis = <2000>;
 					type = "passive";
 				};
diff --git a/arch/arm/boot/dts/tegra114-dalmore.dts b/arch/arm/boot/dts/tegra114-dalmore.dts
index 1dfc492..1444fbd 100644
--- a/arch/arm/boot/dts/tegra114-dalmore.dts
+++ b/arch/arm/boot/dts/tegra114-dalmore.dts
@@ -897,7 +897,7 @@
 		palmas: tps65913@58 {
 			compatible = "ti,palmas";
 			reg = <0x58>;
-			interrupts = <0 86 IRQ_TYPE_LEVEL_LOW>;
+			interrupts = <0 86 IRQ_TYPE_LEVEL_HIGH>;
 
 			#interrupt-cells = <2>;
 			interrupt-controller;
diff --git a/arch/arm/boot/dts/tegra114-roth.dts b/arch/arm/boot/dts/tegra114-roth.dts
index 70cf409..966a7fc 100644
--- a/arch/arm/boot/dts/tegra114-roth.dts
+++ b/arch/arm/boot/dts/tegra114-roth.dts
@@ -802,7 +802,7 @@
 		palmas: pmic@58 {
 			compatible = "ti,palmas";
 			reg = <0x58>;
-			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_LOW>;
+			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
 
 			#interrupt-cells = <2>;
 			interrupt-controller;
diff --git a/arch/arm/boot/dts/tegra114-tn7.dts b/arch/arm/boot/dts/tegra114-tn7.dts
index 17dd145..a161fa1 100644
--- a/arch/arm/boot/dts/tegra114-tn7.dts
+++ b/arch/arm/boot/dts/tegra114-tn7.dts
@@ -63,7 +63,7 @@
 		palmas: pmic@58 {
 			compatible = "ti,palmas";
 			reg = <0x58>;
-			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_LOW>;
+			interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
 
 			#interrupt-cells = <2>;
 			interrupt-controller;
diff --git a/arch/arm/common/bL_switcher_dummy_if.c b/arch/arm/common/bL_switcher_dummy_if.c
index 3f47f12..6053f64 100644
--- a/arch/arm/common/bL_switcher_dummy_if.c
+++ b/arch/arm/common/bL_switcher_dummy_if.c
@@ -56,16 +56,4 @@
 	"b.L_switcher",
 	&bL_switcher_fops
 };
-
-static int __init bL_switcher_dummy_if_init(void)
-{
-	return misc_register(&bL_switcher_device);
-}
-
-static void __exit bL_switcher_dummy_if_exit(void)
-{
-	misc_deregister(&bL_switcher_device);
-}
-
-module_init(bL_switcher_dummy_if_init);
-module_exit(bL_switcher_dummy_if_exit);
+module_misc_device(bL_switcher_device);
diff --git a/arch/arm/common/locomo.c b/arch/arm/common/locomo.c
index 0e97b4b..6c7b06854 100644
--- a/arch/arm/common/locomo.c
+++ b/arch/arm/common/locomo.c
@@ -140,7 +140,7 @@
 
 static void locomo_handler(struct irq_desc *desc)
 {
-	struct locomo *lchip = irq_desc_get_chip_data(desc);
+	struct locomo *lchip = irq_desc_get_handler_data(desc);
 	int req, i;
 
 	/* Acknowledge the parent IRQ */
@@ -200,8 +200,7 @@
 	 * Install handler for IRQ_LOCOMO_HW.
 	 */
 	irq_set_irq_type(lchip->irq, IRQ_TYPE_EDGE_FALLING);
-	irq_set_chip_data(lchip->irq, lchip);
-	irq_set_chained_handler(lchip->irq, locomo_handler);
+	irq_set_chained_handler_and_data(lchip->irq, locomo_handler, lchip);
 
 	/* Install handlers for IRQ_LOCOMO_* */
 	for ( ; irq <= lchip->irq_base + 3; irq++) {
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index fb0a0a4..2e076c4 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -472,8 +472,8 @@
 	 * specifies that S0ReadyInt and S1ReadyInt should be '1'.
 	 */
 	sa1111_writel(0, irqbase + SA1111_INTPOL0);
-	sa1111_writel(SA1111_IRQMASK_HI(IRQ_S0_READY_NINT) |
-		      SA1111_IRQMASK_HI(IRQ_S1_READY_NINT),
+	sa1111_writel(BIT(IRQ_S0_READY_NINT & 31) |
+		      BIT(IRQ_S1_READY_NINT & 31),
 		      irqbase + SA1111_INTPOL1);
 
 	/* clear all IRQs */
@@ -754,7 +754,7 @@
 	if (sachip->irq != NO_IRQ) {
 		ret = sa1111_setup_irq(sachip, pd->irq_base);
 		if (ret)
-			goto err_unmap;
+			goto err_clk;
 	}
 
 #ifdef CONFIG_ARCH_SA1100
@@ -799,6 +799,8 @@
 
 	return 0;
 
+ err_clk:
+	clk_disable(sachip->clk);
  err_unmap:
 	iounmap(sachip->base);
  err_clk_unprep:
@@ -869,9 +871,9 @@
 
 #ifdef CONFIG_PM
 
-static int sa1111_suspend(struct platform_device *dev, pm_message_t state)
+static int sa1111_suspend_noirq(struct device *dev)
 {
-	struct sa1111 *sachip = platform_get_drvdata(dev);
+	struct sa1111 *sachip = dev_get_drvdata(dev);
 	struct sa1111_save_data *save;
 	unsigned long flags;
 	unsigned int val;
@@ -934,9 +936,9 @@
  *	restored by their respective drivers, and must be called
  *	via LDM after this function.
  */
-static int sa1111_resume(struct platform_device *dev)
+static int sa1111_resume_noirq(struct device *dev)
 {
-	struct sa1111 *sachip = platform_get_drvdata(dev);
+	struct sa1111 *sachip = dev_get_drvdata(dev);
 	struct sa1111_save_data *save;
 	unsigned long flags, id;
 	void __iomem *base;
@@ -952,7 +954,7 @@
 	id = sa1111_readl(sachip->base + SA1111_SKID);
 	if ((id & SKID_ID_MASK) != SKID_SA1111_ID) {
 		__sa1111_remove(sachip);
-		platform_set_drvdata(dev, NULL);
+		dev_set_drvdata(dev, NULL);
 		kfree(save);
 		return 0;
 	}
@@ -1003,8 +1005,8 @@
 }
 
 #else
-#define sa1111_suspend NULL
-#define sa1111_resume  NULL
+#define sa1111_suspend_noirq NULL
+#define sa1111_resume_noirq  NULL
 #endif
 
 static int sa1111_probe(struct platform_device *pdev)
@@ -1017,7 +1019,7 @@
 		return -EINVAL;
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
-		return -ENXIO;
+		return irq;
 
 	return __sa1111_probe(&pdev->dev, mem, irq);
 }
@@ -1038,6 +1040,11 @@
 	return 0;
 }
 
+static struct dev_pm_ops sa1111_pm_ops = {
+	.suspend_noirq = sa1111_suspend_noirq,
+	.resume_noirq = sa1111_resume_noirq,
+};
+
 /*
  *	Not sure if this should be on the system bus or not yet.
  *	We really want some way to register a system device at
@@ -1050,10 +1057,9 @@
 static struct platform_driver sa1111_device_driver = {
 	.probe		= sa1111_probe,
 	.remove		= sa1111_remove,
-	.suspend	= sa1111_suspend,
-	.resume		= sa1111_resume,
 	.driver		= {
 		.name	= "sa1111",
+		.pm	= &sa1111_pm_ops,
 	},
 };
 
diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig
index b6e54ee..ca39c04 100644
--- a/arch/arm/configs/aspeed_g4_defconfig
+++ b/arch/arm/configs/aspeed_g4_defconfig
@@ -58,7 +58,7 @@
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_FIRMWARE_MEMMAP=y
 CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_PAGE_POISONING=y
diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig
index 8926051..4f366b0 100644
--- a/arch/arm/configs/aspeed_g5_defconfig
+++ b/arch/arm/configs/aspeed_g5_defconfig
@@ -59,7 +59,7 @@
 # CONFIG_IOMMU_SUPPORT is not set
 CONFIG_FIRMWARE_MEMMAP=y
 CONFIG_FANOTIFY=y
-CONFIG_PRINTK_TIME=1
+CONFIG_PRINTK_TIME=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_PAGE_POISONING=y
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 01986de..36cc7cc 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -28,7 +28,7 @@
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=m
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_CPUFREQ_DT=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_EXYNOS_CPUIDLE=y
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 71b42e6..78cd2f1 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -161,6 +161,7 @@
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_STORAGE=y
 CONFIG_USB_DWC3=y
+CONFIG_NOP_USB_XCEIV=y
 CONFIG_KEYSTONE_USB_PHY=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index 2c8665c..5845910 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -135,7 +135,7 @@
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=m
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_QORIQ_CPUFREQ=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_CPUIDLE=y
@@ -781,7 +781,7 @@
 CONFIG_DMA_BCM2835=y
 CONFIG_DMA_OMAP=y
 CONFIG_QCOM_BAM_DMA=y
-CONFIG_XILINX_VDMA=y
+CONFIG_XILINX_DMA=y
 CONFIG_DMA_SUN6I=y
 CONFIG_STAGING=y
 CONFIG_SENSORS_ISL29018=y
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index da3c042..aef022a 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -284,7 +284,7 @@
 		err = blkcipher_walk_done(desc, &walk,
 					  walk.nbytes % AES_BLOCK_SIZE);
 	}
-	if (nbytes) {
+	if (walk.nbytes % AES_BLOCK_SIZE) {
 		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h
index e08d151..dfe4002 100644
--- a/arch/arm/include/asm/arch_gicv3.h
+++ b/arch/arm/include/asm/arch_gicv3.h
@@ -34,6 +34,7 @@
 #define ICC_CTLR			__ACCESS_CP15(c12, 0, c12, 4)
 #define ICC_SRE				__ACCESS_CP15(c12, 0, c12, 5)
 #define ICC_IGRPEN1			__ACCESS_CP15(c12, 0, c12, 7)
+#define ICC_BPR1			__ACCESS_CP15(c12, 0, c12, 3)
 
 #define ICC_HSRE			__ACCESS_CP15(c12, 4, c9, 5)
 
@@ -157,6 +158,11 @@
 	isb();
 }
 
+static inline void gic_write_bpr1(u32 val)
+{
+	asm volatile("mcr " __stringify(ICC_BPR1) : : "r" (val));
+}
+
 /*
  * Even in 32bit systems that use LPAE, there is no guarantee that the I/O
  * interface provides true 64bit atomic accesses, so using strd/ldrd doesn't
diff --git a/arch/arm/include/asm/clocksource.h b/arch/arm/include/asm/clocksource.h
new file mode 100644
index 0000000..0b350a7
--- /dev/null
+++ b/arch/arm/include/asm/clocksource.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_CLOCKSOURCE_H
+#define _ASM_CLOCKSOURCE_H
+
+struct arch_clocksource_data {
+	bool vdso_direct;	/* Usable for direct VDSO access? */
+};
+
+#endif
diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index d009f79..bf02dbd 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -111,7 +111,7 @@
 /* The ARM override for dma_max_pfn() */
 static inline unsigned long dma_max_pfn(struct device *dev)
 {
-	return PHYS_PFN_OFFSET + dma_to_pfn(dev, *dev->dma_mask);
+	return dma_to_pfn(dev, *dev->dma_mask);
 }
 #define dma_max_pfn(dev) dma_max_pfn(dev)
 
diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h
index d0131ee..3f82e9d 100644
--- a/arch/arm/include/asm/pgtable-2level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-2level-hwdef.h
@@ -47,6 +47,7 @@
 #define PMD_SECT_WB		(PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
 #define PMD_SECT_MINICACHE	(PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE)
 #define PMD_SECT_WBWA		(PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
+#define PMD_SECT_CACHE_MASK	(PMD_SECT_TEX(1) | PMD_SECT_CACHEABLE | PMD_SECT_BUFFERABLE)
 #define PMD_SECT_NONSHARED_DEV	(PMD_SECT_TEX(2))
 
 /*
diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h
index f8f1cff..4cd664a 100644
--- a/arch/arm/include/asm/pgtable-3level-hwdef.h
+++ b/arch/arm/include/asm/pgtable-3level-hwdef.h
@@ -62,6 +62,7 @@
 #define PMD_SECT_WT		(_AT(pmdval_t, 2) << 2)	/* normal inner write-through */
 #define PMD_SECT_WB		(_AT(pmdval_t, 3) << 2)	/* normal inner write-back */
 #define PMD_SECT_WBWA		(_AT(pmdval_t, 7) << 2)	/* normal inner write-alloc */
+#define PMD_SECT_CACHE_MASK	(_AT(pmdval_t, 7) << 2)
 
 /*
  * + Level 3 descriptor (PTE)
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 40ecd5f..f676feb 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -88,6 +88,8 @@
 		return;
 
 	for_each_child_of_node(cpus, cpu) {
+		const __be32 *cell;
+		int prop_bytes;
 		u32 hwid;
 
 		if (of_node_cmp(cpu->type, "cpu"))
@@ -99,7 +101,8 @@
 		 * properties is considered invalid to build the
 		 * cpu_logical_map.
 		 */
-		if (of_property_read_u32(cpu, "reg", &hwid)) {
+		cell = of_get_property(cpu, "reg", &prop_bytes);
+		if (!cell || prop_bytes < sizeof(*cell)) {
 			pr_debug(" * %s missing reg property\n",
 				     cpu->full_name);
 			of_node_put(cpu);
@@ -107,10 +110,15 @@
 		}
 
 		/*
-		 * 8 MSBs must be set to 0 in the DT since the reg property
+		 * Bits n:24 must be set to 0 in the DT since the reg property
 		 * defines the MPIDR[23:0].
 		 */
-		if (hwid & ~MPIDR_HWID_BITMASK) {
+		do {
+			hwid = be32_to_cpu(*cell++);
+			prop_bytes -= sizeof(*cell);
+		} while (!hwid && prop_bytes > 0);
+
+		if (prop_bytes || (hwid & ~MPIDR_HWID_BITMASK)) {
 			of_node_put(cpu);
 			return;
 		}
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index bc5f507..9f157e7 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -295,6 +295,7 @@
 	bl	__und_fault
 
 __und_svc_finish:
+	get_thread_info tsk
 	ldr	r5, [sp, #S_PSR]		@ Get SVC cpsr
 	svc_exit r5				@ return from exception
  UNWIND(.fnend		)
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 709ee1d..3f17594 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -218,7 +218,7 @@
 	}
 
 	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer);
+				       frame_pointer, NULL);
 	if (err == -EBUSY) {
 		*parent = old;
 		return;
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 0b1e4a9..15d073a 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -142,6 +142,19 @@
 	and	r7, #0x1f		@ Preserve HPMN
 	mcr	p15, 4, r7, c1, c1, 1	@ HDCR
 
+	@ Make sure NS-SVC is initialised appropriately
+	mrc	p15, 0, r7, c1, c0, 0	@ SCTLR
+	orr	r7, #(1 << 5)		@ CP15 barriers enabled
+	bic	r7, #(3 << 7)		@ Clear SED/ITD for v8 (RES0 for v7)
+	bic	r7, #(3 << 19)		@ WXN and UWXN disabled
+	mcr	p15, 0, r7, c1, c0, 0	@ SCTLR
+
+	mrc	p15, 0, r7, c0, c0, 0	@ MIDR
+	mcr	p15, 4, r7, c0, c0, 0	@ VPIDR
+
+	mrc	p15, 0, r7, c0, c0, 5	@ MPIDR
+	mcr	p15, 4, r7, c0, c0, 5	@ VMPIDR
+
 #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER)
 	@ make CNTP_* and CNTPCT accessible from PL1
 	mrc	p15, 0, r7, c0, c1, 1	@ ID_PFR1
diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c
index 1506385..b942349 100644
--- a/arch/arm/kernel/perf_event_v7.c
+++ b/arch/arm/kernel/perf_event_v7.c
@@ -596,12 +596,6 @@
 	.attrs = armv7_pmuv1_event_attrs,
 };
 
-static const struct attribute_group *armv7_pmuv1_attr_groups[] = {
-	&armv7_pmuv1_events_attr_group,
-	&armv7_pmu_format_attr_group,
-	NULL,
-};
-
 ARMV7_EVENT_ATTR(mem_access, ARMV7_PERFCTR_MEM_ACCESS);
 ARMV7_EVENT_ATTR(l1i_cache, ARMV7_PERFCTR_L1_ICACHE_ACCESS);
 ARMV7_EVENT_ATTR(l1d_cache_wb, ARMV7_PERFCTR_L1_DCACHE_WB);
@@ -653,12 +647,6 @@
 	.attrs = armv7_pmuv2_event_attrs,
 };
 
-static const struct attribute_group *armv7_pmuv2_attr_groups[] = {
-	&armv7_pmuv2_events_attr_group,
-	&armv7_pmu_format_attr_group,
-	NULL,
-};
-
 /*
  * Perf Events' indices
  */
@@ -1208,7 +1196,10 @@
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a8";
 	cpu_pmu->map_event	= armv7_a8_map_event;
-	cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv7_pmuv1_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv7_pmu_format_attr_group;
 	return armv7_probe_num_events(cpu_pmu);
 }
 
@@ -1217,7 +1208,10 @@
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a9";
 	cpu_pmu->map_event	= armv7_a9_map_event;
-	cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv7_pmuv1_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv7_pmu_format_attr_group;
 	return armv7_probe_num_events(cpu_pmu);
 }
 
@@ -1226,7 +1220,10 @@
 	armv7pmu_init(cpu_pmu);
 	cpu_pmu->name		= "armv7_cortex_a5";
 	cpu_pmu->map_event	= armv7_a5_map_event;
-	cpu_pmu->pmu.attr_groups = armv7_pmuv1_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv7_pmuv1_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv7_pmu_format_attr_group;
 	return armv7_probe_num_events(cpu_pmu);
 }
 
@@ -1236,7 +1233,10 @@
 	cpu_pmu->name		= "armv7_cortex_a15";
 	cpu_pmu->map_event	= armv7_a15_map_event;
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv7_pmuv2_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv7_pmu_format_attr_group;
 	return armv7_probe_num_events(cpu_pmu);
 }
 
@@ -1246,7 +1246,10 @@
 	cpu_pmu->name		= "armv7_cortex_a7";
 	cpu_pmu->map_event	= armv7_a7_map_event;
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv7_pmuv2_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv7_pmu_format_attr_group;
 	return armv7_probe_num_events(cpu_pmu);
 }
 
@@ -1256,7 +1259,10 @@
 	cpu_pmu->name		= "armv7_cortex_a12";
 	cpu_pmu->map_event	= armv7_a12_map_event;
 	cpu_pmu->set_event_filter = armv7pmu_set_event_filter;
-	cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv7_pmuv2_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv7_pmu_format_attr_group;
 	return armv7_probe_num_events(cpu_pmu);
 }
 
@@ -1264,7 +1270,10 @@
 {
 	int ret = armv7_a12_pmu_init(cpu_pmu);
 	cpu_pmu->name = "armv7_cortex_a17";
-	cpu_pmu->pmu.attr_groups = armv7_pmuv2_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv7_pmuv2_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv7_pmu_format_attr_group;
 	return ret;
 }
 
diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index 087acb5..5f221ac 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -279,8 +279,12 @@
 	mm_segment_t fs;
 	long ret, err, i;
 
-	if (maxevents <= 0 || maxevents > (INT_MAX/sizeof(struct epoll_event)))
+	if (maxevents <= 0 ||
+			maxevents > (INT_MAX/sizeof(*kbuf)) ||
+			maxevents > (INT_MAX/sizeof(*events)))
 		return -EINVAL;
+	if (!access_ok(VERIFY_WRITE, events, sizeof(*events) * maxevents))
+		return -EFAULT;
 	kbuf = kmalloc(sizeof(*kbuf) * maxevents, GFP_KERNEL);
 	if (!kbuf)
 		return -ENOMEM;
@@ -317,6 +321,8 @@
 
 	if (nsops < 1 || nsops > SEMOPM)
 		return -EINVAL;
+	if (!access_ok(VERIFY_READ, tsops, sizeof(*tsops) * nsops))
+		return -EFAULT;
 	sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL);
 	if (!sops)
 		return -ENOMEM;
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 994e971..a0affd1 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -270,7 +270,7 @@
 	if (!IS_ENABLED(CONFIG_ARM_ARCH_TIMER))
 		return false;
 
-	if (strcmp(tk->tkr_mono.clock->name, "arch_sys_counter") != 0)
+	if (!tk->tkr_mono.clock->archdata.vdso_direct)
 		return false;
 
 	return true;
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index d94bb90..c94b90d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -158,8 +158,6 @@
 {
 	int i;
 
-	kvm_free_stage2_pgd(kvm);
-
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		if (kvm->vcpus[i]) {
 			kvm_arch_vcpu_free(kvm->vcpus[i]);
@@ -1009,9 +1007,13 @@
 
 	switch (ioctl) {
 	case KVM_CREATE_IRQCHIP: {
+		int ret;
 		if (!vgic_present)
 			return -ENXIO;
-		return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+		mutex_lock(&kvm->lock);
+		ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
+		mutex_unlock(&kvm->lock);
+		return ret;
 	}
 	case KVM_ARM_SET_DEVICE_ADDR: {
 		struct kvm_arm_device_addr dev_addr;
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index bda27b6..e9a5c0e 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1309,7 +1309,7 @@
 	smp_rmb();
 
 	pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable);
-	if (is_error_pfn(pfn))
+	if (is_error_noslot_pfn(pfn))
 		return -EFAULT;
 
 	if (kvm_is_device_pfn(pfn)) {
@@ -1714,7 +1714,8 @@
 		 kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL));
 
 	if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) &&
-	    hyp_idmap_start <  kern_hyp_va(~0UL)) {
+	    hyp_idmap_start <  kern_hyp_va(~0UL) &&
+	    hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) {
 		/*
 		 * The idmap page is intersecting with the VA space,
 		 * it is not safe to continue further.
@@ -1893,6 +1894,7 @@
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
+	kvm_free_stage2_pgd(kvm);
 }
 
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig
index 34f0fca..7bf3ae7 100644
--- a/arch/arm/mach-bcm/Kconfig
+++ b/arch/arm/mach-bcm/Kconfig
@@ -15,7 +15,6 @@
 	select HAVE_ARM_SCU if SMP
 	select HAVE_ARM_TWD if SMP
 	select ARM_GLOBAL_TIMER
-	select COMMON_CLK_IPROC
 	select CLKSRC_MMIO
 	select GPIOLIB
 	select ARM_AMBA
diff --git a/arch/arm/mach-clps711x/Kconfig b/arch/arm/mach-clps711x/Kconfig
index dc7c6ed..61284b9 100644
--- a/arch/arm/mach-clps711x/Kconfig
+++ b/arch/arm/mach-clps711x/Kconfig
@@ -1,13 +1,13 @@
 menuconfig ARCH_CLPS711X
 	bool "Cirrus Logic EP721x/EP731x-based"
 	depends on ARCH_MULTI_V4T
-	select ARCH_REQUIRE_GPIOLIB
 	select AUTO_ZRELADDR
 	select CLKSRC_OF
 	select CLPS711X_TIMER
 	select COMMON_CLK
 	select CPU_ARM720T
 	select GENERIC_CLOCKEVENTS
+	select GPIOLIB
 	select MFD_SYSCON
 	select OF_IRQ
 	select USE_OF
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 3750575..06332f6 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -255,6 +255,12 @@
 		return -ENOMEM;
 	}
 
+	/*
+	 * Clear the OF_POPULATED flag set in of_irq_init so that
+	 * later the Exynos PMU platform device won't be skipped.
+	 */
+	of_node_clear_flag(node, OF_POPULATED);
+
 	return 0;
 }
 
diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c
index fd87205..0df062d 100644
--- a/arch/arm/mach-imx/gpc.c
+++ b/arch/arm/mach-imx/gpc.c
@@ -271,6 +271,12 @@
 	for (i = 0; i < IMR_NUM; i++)
 		writel_relaxed(~0, gpc_base + GPC_IMR1 + i * 4);
 
+	/*
+	 * Clear the OF_POPULATED flag set in of_irq_init so that
+	 * later the GPC power domain driver will not be skipped.
+	 */
+	of_node_clear_flag(node, OF_POPULATED);
+
 	return 0;
 }
 IRQCHIP_DECLARE(imx_gpc, "fsl,imx6q-gpc", imx_gpc_init);
diff --git a/arch/arm/mach-imx/mach-imx6ul.c b/arch/arm/mach-imx/mach-imx6ul.c
index 5d9bfab..6bb7d9c 100644
--- a/arch/arm/mach-imx/mach-imx6ul.c
+++ b/arch/arm/mach-imx/mach-imx6ul.c
@@ -64,6 +64,7 @@
 	if (parent == NULL)
 		pr_warn("failed to initialize soc device\n");
 
+	of_platform_default_populate(NULL, NULL, parent);
 	imx6ul_enet_init();
 	imx_anatop_init();
 	imx6ul_pm_init();
diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c
index 58924b3..fe708e2 100644
--- a/arch/arm/mach-imx/pm-imx6.c
+++ b/arch/arm/mach-imx/pm-imx6.c
@@ -295,7 +295,7 @@
 		val &= ~BM_CLPCR_SBYOS;
 		if (cpu_is_imx6sl())
 			val |= BM_CLPCR_BYPASS_PMIC_READY;
-		if (cpu_is_imx6sl() || cpu_is_imx6sx())
+		if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul())
 			val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS;
 		else
 			val |= BM_CLPCR_BYP_MMDC_CH1_LPM_HS;
@@ -310,7 +310,7 @@
 		val |= 0x3 << BP_CLPCR_STBY_COUNT;
 		val |= BM_CLPCR_VSTBY;
 		val |= BM_CLPCR_SBYOS;
-		if (cpu_is_imx6sl())
+		if (cpu_is_imx6sl() || cpu_is_imx6sx())
 			val |= BM_CLPCR_BYPASS_PMIC_READY;
 		if (cpu_is_imx6sl() || cpu_is_imx6sx() || cpu_is_imx6ul())
 			val |= BM_CLPCR_BYP_MMDC_CH0_LPM_HS;
diff --git a/arch/arm/mach-mvebu/Makefile b/arch/arm/mach-mvebu/Makefile
index e53c6cf..6c6497e 100644
--- a/arch/arm/mach-mvebu/Makefile
+++ b/arch/arm/mach-mvebu/Makefile
@@ -1,5 +1,4 @@
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-	-I$(srctree)/arch/arm/plat-orion/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-orion/include
 
 AFLAGS_coherency_ll.o		:= -Wa,-march=armv7-a
 CFLAGS_pmsu.o			:= -march=armv7-a
diff --git a/arch/arm/mach-omap2/cm33xx.c b/arch/arm/mach-omap2/cm33xx.c
index c073fb5..6f2d0ae 100644
--- a/arch/arm/mach-omap2/cm33xx.c
+++ b/arch/arm/mach-omap2/cm33xx.c
@@ -220,9 +220,6 @@
 {
 	int i = 0;
 
-	if (!clkctrl_offs)
-		return 0;
-
 	omap_test_timeout(_is_module_ready(inst, clkctrl_offs),
 			  MAX_MODULE_READY_TIME, i);
 
@@ -246,9 +243,6 @@
 {
 	int i = 0;
 
-	if (!clkctrl_offs)
-		return 0;
-
 	omap_test_timeout((_clkctrl_idlest(inst, clkctrl_offs) ==
 				CLKCTRL_IDLEST_DISABLED),
 				MAX_MODULE_READY_TIME, i);
diff --git a/arch/arm/mach-omap2/cminst44xx.c b/arch/arm/mach-omap2/cminst44xx.c
index 2c0e07e..2ab27ad 100644
--- a/arch/arm/mach-omap2/cminst44xx.c
+++ b/arch/arm/mach-omap2/cminst44xx.c
@@ -278,9 +278,6 @@
 {
 	int i = 0;
 
-	if (!clkctrl_offs)
-		return 0;
-
 	omap_test_timeout(_is_module_ready(part, inst, clkctrl_offs),
 			  MAX_MODULE_READY_TIME, i);
 
@@ -304,9 +301,6 @@
 {
 	int i = 0;
 
-	if (!clkctrl_offs)
-		return 0;
-
 	omap_test_timeout((_clkctrl_idlest(part, inst, clkctrl_offs) ==
 			   CLKCTRL_IDLEST_DISABLED),
 			  MAX_MODULE_DISABLE_TIME, i);
diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c
index 0c47543..369f95a 100644
--- a/arch/arm/mach-omap2/omap-wakeupgen.c
+++ b/arch/arm/mach-omap2/omap-wakeupgen.c
@@ -322,34 +322,25 @@
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int irq_cpu_hotplug_notify(struct notifier_block *self,
-				  unsigned long action, void *hcpu)
+static int omap_wakeupgen_cpu_online(unsigned int cpu)
 {
-	unsigned int cpu = (unsigned int)hcpu;
-
-	/*
-	 * Corresponding FROZEN transitions do not have to be handled,
-	 * they are handled by at a higher level
-	 * (drivers/cpuidle/coupled.c).
-	 */
-	switch (action) {
-	case CPU_ONLINE:
-		wakeupgen_irqmask_all(cpu, 0);
-		break;
-	case CPU_DEAD:
-		wakeupgen_irqmask_all(cpu, 1);
-		break;
-	}
-	return NOTIFY_OK;
+	wakeupgen_irqmask_all(cpu, 0);
+	return 0;
 }
 
-static struct notifier_block irq_hotplug_notifier = {
-	.notifier_call = irq_cpu_hotplug_notify,
-};
+static int omap_wakeupgen_cpu_dead(unsigned int cpu)
+{
+	wakeupgen_irqmask_all(cpu, 1);
+	return 0;
+}
 
 static void __init irq_hotplug_init(void)
 {
-	register_hotcpu_notifier(&irq_hotplug_notifier);
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "arm/omap-wake:online",
+				  omap_wakeupgen_cpu_online, NULL);
+	cpuhp_setup_state_nocalls(CPUHP_ARM_OMAP_WAKE_DEAD,
+				  "arm/omap-wake:dead", NULL,
+				  omap_wakeupgen_cpu_dead);
 }
 #else
 static void __init irq_hotplug_init(void)
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 5b70938..1052b29 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -1053,6 +1053,10 @@
 	if (oh->flags & HWMOD_NO_IDLEST)
 		return 0;
 
+	if (!oh->prcm.omap4.clkctrl_offs &&
+	    !(oh->prcm.omap4.flags & HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET))
+		return 0;
+
 	return omap_cm_wait_module_idle(oh->clkdm->prcm_partition,
 					oh->clkdm->cm_inst,
 					oh->prcm.omap4.clkctrl_offs, 0);
@@ -2971,6 +2975,10 @@
 	if (!_find_mpu_rt_port(oh))
 		return 0;
 
+	if (!oh->prcm.omap4.clkctrl_offs &&
+	    !(oh->prcm.omap4.flags & HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET))
+		return 0;
+
 	/* XXX check module SIDLEMODE, hardreset status */
 
 	return omap_cm_wait_module_ready(oh->clkdm->prcm_partition,
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index 4041bad..7890401 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -443,8 +443,12 @@
  * HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT: Some IP blocks don't have a PRCM
  *     module-level context loss register associated with them; this
  *     flag bit should be set in those cases
+ * HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET: Some IP blocks have a valid CLKCTRL
+ *	offset of zero; this flag bit should be set in those cases to
+ *	distinguish from hwmods that have no clkctrl offset.
  */
 #define HWMOD_OMAP4_NO_CONTEXT_LOSS_BIT		(1 << 0)
+#define HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET		(1 << 1)
 
 /**
  * struct omap_hwmod_omap4_prcm - OMAP4-specific PRCM data
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
index 55c5878..e2d84aa 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
@@ -29,6 +29,7 @@
 #define CLKCTRL(oh, clkctrl) ((oh).prcm.omap4.clkctrl_offs = (clkctrl))
 #define RSTCTRL(oh, rstctrl) ((oh).prcm.omap4.rstctrl_offs = (rstctrl))
 #define RSTST(oh, rstst) ((oh).prcm.omap4.rstst_offs = (rstst))
+#define PRCM_FLAGS(oh, flag) ((oh).prcm.omap4.flags = (flag))
 
 /*
  * 'l3' class
@@ -1296,6 +1297,7 @@
 	CLKCTRL(am33xx_i2c1_hwmod, AM33XX_CM_WKUP_I2C0_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_wd_timer1_hwmod, AM33XX_CM_WKUP_WDT1_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_rtc_hwmod, AM33XX_CM_RTC_RTC_CLKCTRL_OFFSET);
+	PRCM_FLAGS(am33xx_rtc_hwmod, HWMOD_OMAP4_ZERO_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_mmc2_hwmod, AM33XX_CM_PER_MMC2_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_gpmc_hwmod, AM33XX_CM_PER_GPMC_CLKCTRL_OFFSET);
 	CLKCTRL(am33xx_l4_ls_hwmod, AM33XX_CM_PER_L4LS_CLKCTRL_OFFSET);
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index d72ee61..1cc4a6f 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -722,8 +722,20 @@
  * display serial interface controller
  */
 
+static struct omap_hwmod_class_sysconfig omap3xxx_dsi_sysc = {
+	.rev_offs	= 0x0000,
+	.sysc_offs	= 0x0010,
+	.syss_offs	= 0x0014,
+	.sysc_flags	= (SYSC_HAS_AUTOIDLE | SYSC_HAS_CLOCKACTIVITY |
+			   SYSC_HAS_ENAWAKEUP | SYSC_HAS_SIDLEMODE |
+			   SYSC_HAS_SOFTRESET | SYSS_HAS_RESET_STATUS),
+	.idlemodes	= (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART),
+	.sysc_fields	= &omap_hwmod_sysc_type1,
+};
+
 static struct omap_hwmod_class omap3xxx_dsi_hwmod_class = {
 	.name = "dsi",
+	.sysc	= &omap3xxx_dsi_sysc,
 };
 
 static struct omap_hwmod_irq_info omap3xxx_dsi1_irqs[] = {
diff --git a/arch/arm/mach-oxnas/Kconfig b/arch/arm/mach-oxnas/Kconfig
index 567496b..29100be 100644
--- a/arch/arm/mach-oxnas/Kconfig
+++ b/arch/arm/mach-oxnas/Kconfig
@@ -11,11 +11,13 @@
 
 config MACH_OX810SE
 	bool "Support OX810SE Based Products"
+	select ARCH_HAS_RESET_CONTROLLER
 	select COMMON_CLK_OXNAS
 	select CPU_ARM926T
 	select MFD_SYSCON
 	select OXNAS_RPS_TIMER
 	select PINCTRL_OXNAS
+	select RESET_CONTROLLER
 	select RESET_OXNAS
 	select VERSATILE_FPGA_IRQ
 	help
diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c
index dc109dc3..10bfdb1 100644
--- a/arch/arm/mach-pxa/corgi.c
+++ b/arch/arm/mach-pxa/corgi.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>	/* symbol_get ; symbol_put */
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/major.h>
diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index c410d84..66070ac 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -83,7 +83,8 @@
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-	.flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		 SMC91X_USE_DMA | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/arm/mach-pxa/lubbock.c b/arch/arm/mach-pxa/lubbock.c
index 7245f33..d6159f8 100644
--- a/arch/arm/mach-pxa/lubbock.c
+++ b/arch/arm/mach-pxa/lubbock.c
@@ -137,6 +137,18 @@
 	// no D+ pullup; lubbock can't connect/disconnect in software
 };
 
+static void lubbock_init_pcmcia(void)
+{
+	struct clk *clk;
+
+	/* Add an alias for the SA1111 PCMCIA clock */
+	clk = clk_get_sys("pxa2xx-pcmcia", NULL);
+	if (!IS_ERR(clk)) {
+		clkdev_create(clk, NULL, "1800");
+		clk_put(clk);
+	}
+}
+
 static struct resource sa1111_resources[] = {
 	[0] = {
 		.start	= 0x10000000,
@@ -467,6 +479,8 @@
 	pxa_set_btuart_info(NULL);
 	pxa_set_stuart_info(NULL);
 
+	lubbock_init_pcmcia();
+
 	clk_add_alias("SA1111_CLK", NULL, "GPIO11_CLK", NULL);
 	pxa_set_udc_info(&udc_info);
 	pxa_set_fb_info(NULL, &sharp_lm8v31);
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 1080580..2c150bf 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>	/* symbol_get ; symbol_put */
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/gpio_keys.h>
diff --git a/arch/arm/mach-pxa/xcep.c b/arch/arm/mach-pxa/xcep.c
index 3f06cd9..056369e 100644
--- a/arch/arm/mach-pxa/xcep.c
+++ b/arch/arm/mach-pxa/xcep.c
@@ -120,7 +120,8 @@
 };
 
 static struct smc91x_platdata xcep_smc91x_info = {
-	.flags	= SMC91X_USE_32BIT | SMC91X_NOWAIT | SMC91X_USE_DMA,
+	.flags	= SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		  SMC91X_NOWAIT | SMC91X_USE_DMA,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/arm/mach-realview/Makefile b/arch/arm/mach-realview/Makefile
index dae8d86..4048821 100644
--- a/arch/arm/mach-realview/Makefile
+++ b/arch/arm/mach-realview/Makefile
@@ -1,8 +1,7 @@
 #
 # Makefile for the linux kernel.
 #
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
-	-I$(srctree)/arch/arm/plat-versatile/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/arch/arm/plat-versatile/include
 
 obj-y					:= core.o
 obj-$(CONFIG_REALVIEW_DT)		+= realview-dt.o
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index baf1745..a0ead0a 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -93,7 +93,8 @@
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-	.flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		 SMC91X_NOWAIT,
 };
 
 static struct platform_device realview_eth_device = {
diff --git a/arch/arm/mach-s5pv210/Makefile b/arch/arm/mach-s5pv210/Makefile
index 72b9e96..fa7fb71 100644
--- a/arch/arm/mach-s5pv210/Makefile
+++ b/arch/arm/mach-s5pv210/Makefile
@@ -5,7 +5,7 @@
 #
 # Licensed under GPLv2
 
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/$(src)/include -I$(srctree)/arch/arm/plat-samsung/include
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) += -I$(srctree)/arch/arm/plat-samsung/include
 
 # Core
 
diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c
index cbf53bb..0db46895 100644
--- a/arch/arm/mach-sa1100/clock.c
+++ b/arch/arm/mach-sa1100/clock.c
@@ -125,6 +125,8 @@
 }
 
 static struct clkops clk_36864_ops = {
+	.enable		= clk_cpu_enable,
+	.disable	= clk_cpu_disable,
 	.get_rate	= clk_36864_get_rate,
 };
 
@@ -140,9 +142,8 @@
 	CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864),
 };
 
-static int __init sa11xx_clk_init(void)
+int __init sa11xx_clk_init(void)
 {
 	clkdev_add_table(sa11xx_clkregs, ARRAY_SIZE(sa11xx_clkregs));
 	return 0;
 }
-core_initcall(sa11xx_clk_init);
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 345e63f..3e09bed 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -34,6 +34,7 @@
 
 #include <mach/hardware.h>
 #include <mach/irqs.h>
+#include <mach/reset.h>
 
 #include "generic.h"
 #include <clocksource/pxa.h>
@@ -95,6 +96,8 @@
 
 void sa11x0_restart(enum reboot_mode mode, const char *cmd)
 {
+	clear_reset_status(RESET_STATUS_ALL);
+
 	if (mode == REBOOT_SOFT) {
 		/* Jump into ROM at address 0 */
 		soft_restart(0);
@@ -388,6 +391,7 @@
 	sa11x0_init_irq_nodt(IRQ_GPIO0_SC, irq_resource.start);
 
 	sa1100_init_gpio();
+	sa11xx_clk_init();
 }
 
 /*
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index 0d92e11..68199b603 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -44,3 +44,5 @@
 #else
 static inline int sa11x0_pm_init(void) { return 0; }
 #endif
+
+int sa11xx_clk_init(void);
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index 1525d7b..88149f8 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -45,7 +45,7 @@
 };
 
 static struct smc91x_platdata smc91x_platdata = {
-	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_16BIT | SMC91X_USE_8BIT | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
diff --git a/arch/arm/mach-shmobile/platsmp-scu.c b/arch/arm/mach-shmobile/platsmp-scu.c
index 8d478f1..d1ecaf3 100644
--- a/arch/arm/mach-shmobile/platsmp-scu.c
+++ b/arch/arm/mach-shmobile/platsmp-scu.c
@@ -21,26 +21,14 @@
 static phys_addr_t shmobile_scu_base_phys;
 static void __iomem *shmobile_scu_base;
 
-static int shmobile_smp_scu_notifier_call(struct notifier_block *nfb,
-					  unsigned long action, void *hcpu)
+static int shmobile_scu_cpu_prepare(unsigned int cpu)
 {
-	unsigned int cpu = (long)hcpu;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-		/* For this particular CPU register SCU SMP boot vector */
-		shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu),
-				  shmobile_scu_base_phys);
-		break;
-	};
-
-	return NOTIFY_OK;
+	/* For this particular CPU register SCU SMP boot vector */
+	shmobile_smp_hook(cpu, virt_to_phys(shmobile_boot_scu),
+			  shmobile_scu_base_phys);
+	return 0;
 }
 
-static struct notifier_block shmobile_smp_scu_notifier = {
-	.notifier_call = shmobile_smp_scu_notifier_call,
-};
-
 void __init shmobile_smp_scu_prepare_cpus(phys_addr_t scu_base_phys,
 					  unsigned int max_cpus)
 {
@@ -54,7 +42,9 @@
 	scu_power_mode(shmobile_scu_base, SCU_PM_NORMAL);
 
 	/* Use CPU notifier for reset vector control */
-	register_cpu_notifier(&shmobile_smp_scu_notifier);
+	cpuhp_setup_state_nocalls(CPUHP_ARM_SHMOBILE_SCU_PREPARE,
+				  "arm/shmobile-scu:prepare",
+				  shmobile_scu_cpu_prepare, NULL);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c
index f3dba6f..02e21bc 100644
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -40,5 +40,8 @@
 bool __init shmobile_smp_init_fallback_ops(void)
 {
 	/* fallback on PSCI/smp_ops if no other DT based method is detected */
+	if (!IS_ENABLED(CONFIG_SMP))
+		return false;
+
 	return platform_can_secondary_boot() ? true : false;
 }
diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
index 62437b5..73e3adb 100644
--- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
+++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c
@@ -41,40 +41,27 @@
 
 #define REGULATOR_IRQ_MASK	BIT(2)	/* IRQ2, active low */
 
+/* start of DA9210 System Control and Event Registers */
+#define DA9210_REG_MASK_A		0x54
+
 static void __iomem *irqc;
 
-static const u8 da9063_mask_regs[] = {
-	DA9063_REG_IRQ_MASK_A,
-	DA9063_REG_IRQ_MASK_B,
-	DA9063_REG_IRQ_MASK_C,
-	DA9063_REG_IRQ_MASK_D,
+/* first byte sets the memory pointer, following are consecutive reg values */
+static u8 da9063_irq_clr[] = { DA9063_REG_IRQ_MASK_A, 0xff, 0xff, 0xff, 0xff };
+static u8 da9210_irq_clr[] = { DA9210_REG_MASK_A, 0xff, 0xff };
+
+static struct i2c_msg da9xxx_msgs[2] = {
+	{
+		.addr = 0x58,
+		.len = ARRAY_SIZE(da9063_irq_clr),
+		.buf = da9063_irq_clr,
+	}, {
+		.addr = 0x68,
+		.len = ARRAY_SIZE(da9210_irq_clr),
+		.buf = da9210_irq_clr,
+	},
 };
 
-/* DA9210 System Control and Event Registers */
-#define DA9210_REG_MASK_A		0x54
-#define DA9210_REG_MASK_B		0x55
-
-static const u8 da9210_mask_regs[] = {
-	DA9210_REG_MASK_A,
-	DA9210_REG_MASK_B,
-};
-
-static void da9xxx_mask_irqs(struct i2c_client *client, const u8 regs[],
-			     unsigned int nregs)
-{
-	unsigned int i;
-
-	dev_info(&client->dev, "Masking %s interrupt sources\n", client->name);
-
-	for (i = 0; i < nregs; i++) {
-		int error = i2c_smbus_write_byte_data(client, regs[i], ~0);
-		if (error) {
-			dev_err(&client->dev, "i2c error %d\n", error);
-			return;
-		}
-	}
-}
-
 static int regulator_quirk_notify(struct notifier_block *nb,
 				  unsigned long action, void *data)
 {
@@ -93,12 +80,15 @@
 	client = to_i2c_client(dev);
 	dev_dbg(dev, "Detected %s\n", client->name);
 
-	if ((client->addr == 0x58 && !strcmp(client->name, "da9063")))
-		da9xxx_mask_irqs(client, da9063_mask_regs,
-				 ARRAY_SIZE(da9063_mask_regs));
-	else if (client->addr == 0x68 && !strcmp(client->name, "da9210"))
-		da9xxx_mask_irqs(client, da9210_mask_regs,
-				 ARRAY_SIZE(da9210_mask_regs));
+	if ((client->addr == 0x58 && !strcmp(client->name, "da9063")) ||
+	    (client->addr == 0x68 && !strcmp(client->name, "da9210"))) {
+		int ret;
+
+		dev_info(&client->dev, "clearing da9063/da9210 interrupts\n");
+		ret = i2c_transfer(client->adapter, da9xxx_msgs, ARRAY_SIZE(da9xxx_msgs));
+		if (ret != ARRAY_SIZE(da9xxx_msgs))
+			dev_err(&client->dev, "i2c error %d\n", ret);
+	}
 
 	mon = ioread32(irqc + IRQC_MONITOR);
 	if (mon & REGULATOR_IRQ_MASK)
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 62f4d01..30fe03f 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -137,7 +137,7 @@
 
 	initial_pmd_value = pmd;
 
-	pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE;
+	pmd &= PMD_SECT_CACHE_MASK;
 
 	for (i = 0; i < ARRAY_SIZE(cache_policies); i++)
 		if (cache_policies[i].pmd == pmd) {
@@ -728,7 +728,8 @@
 {
 	void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz));
 
-	BUG_ON(!ptr);
+	if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
+		BUG();
 	return ptr;
 }
 
@@ -1155,10 +1156,19 @@
 {
 	phys_addr_t memblock_limit = 0;
 	int highmem = 0;
-	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
+	u64 vmalloc_limit;
 	struct memblock_region *reg;
 	bool should_use_highmem = false;
 
+	/*
+	 * Let's use our own (unoptimized) equivalent of __pa() that is
+	 * not affected by wrap-arounds when sizeof(phys_addr_t) == 4.
+	 * The result is used as the upper bound on physical memory address
+	 * and may itself be outside the valid range for which phys_addr_t
+	 * and therefore __pa() is defined.
+	 */
+	vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET;
+
 	for_each_memblock(memory, reg) {
 		phys_addr_t block_start = reg->base;
 		phys_addr_t block_end = reg->base + reg->size;
@@ -1183,10 +1193,11 @@
 			if (reg->size > size_limit) {
 				phys_addr_t overlap_size = reg->size - size_limit;
 
-				pr_notice("Truncating RAM at %pa-%pa to -%pa",
-					  &block_start, &block_end, &vmalloc_limit);
-				memblock_remove(vmalloc_limit, overlap_size);
+				pr_notice("Truncating RAM at %pa-%pa",
+					  &block_start, &block_end);
 				block_end = vmalloc_limit;
+				pr_cont(" to -%pa", &block_end);
+				memblock_remove(vmalloc_limit, overlap_size);
 				should_use_highmem = true;
 			}
 		}
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index a7123b4..d00d52c 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -16,6 +16,7 @@
 #include <asm/hwcap.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable.h>
+#include <asm/memory.h>
 
 #include "proc-macros.S"
 
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index b0b82f5..f193414 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -50,7 +50,7 @@
 static struct vcpu_info __percpu *xen_vcpu_info;
 
 /* Linux <-> Xen vCPU id mapping */
-DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
 EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 
 /* These are unused until we support booting "pre-ballooned" */
@@ -170,9 +170,6 @@
 	pr_info("Xen: initializing cpu%d\n", cpu);
 	vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
 
-	/* Direct vCPU id mapping for ARM guests. */
-	per_cpu(xen_vcpu_id, cpu) = cpu;
-
 	info.mfn = virt_to_gfn(vcpup);
 	info.offset = xen_offset_in_page(vcpup);
 
@@ -330,6 +327,7 @@
 {
 	struct xen_add_to_physmap xatp;
 	struct shared_info *shared_info_page = NULL;
+	int cpu;
 
 	if (!xen_domain())
 		return 0;
@@ -380,7 +378,8 @@
 		return -ENOMEM;
 
 	/* Direct vCPU id mapping for ARM guests. */
-	per_cpu(xen_vcpu_id, 0) = 0;
+	for_each_possible_cpu(cpu)
+		per_cpu(xen_vcpu_id, cpu) = cpu;
 
 	xen_auto_xlat_grant_frames.count = gnttab_max_grant_frames();
 	if (xen_xlate_map_ballooned_pages(&xen_auto_xlat_grant_frames.pfn,
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index bc3f00f..7d6bcf6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -4,6 +4,8 @@
 	select ACPI_GENERIC_GSI if ACPI
 	select ACPI_REDUCED_HARDWARE_ONLY if ACPI
 	select ACPI_MCFG if ACPI
+	select ACPI_SPCR_TABLE if ACPI
+	select ARCH_CLOCKSOURCE_DATA
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
@@ -102,10 +104,8 @@
 	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
-	select OF_NUMA if NUMA && OF
 	select OF_RESERVED_MEM
 	select PCI_ECAM if ACPI
-	select PERF_USE_VMALLOC
 	select POWER_RESET
 	select POWER_SUPPLY
 	select SPARSE_IRQ
@@ -122,6 +122,9 @@
 config MMU
 	def_bool y
 
+config DEBUG_RODATA
+	def_bool y
+
 config ARM64_PAGE_SHIFT
 	int
 	default 16 if ARM64_64K_PAGES
@@ -415,18 +418,13 @@
 
 config ARM64_ERRATUM_843419
 	bool "Cortex-A53: 843419: A load or store might access an incorrect address"
-	depends on MODULES
 	default y
-	select ARM64_MODULE_CMODEL_LARGE
+	select ARM64_MODULE_CMODEL_LARGE if MODULES
 	help
-	  This option builds kernel modules using the large memory model in
-	  order to avoid the use of the ADRP instruction, which can cause
-	  a subsequent memory access to use an incorrect address on Cortex-A53
-	  parts up to r0p4.
-
-	  Note that the kernel itself must be linked with a version of ld
-	  which fixes potentially affected ADRP instructions through the
-	  use of veneers.
+	  This option links the kernel with '--fix-cortex-a53-843419' and
+	  builds modules using the large memory model in order to avoid the use
+	  of the ADRP instruction, which can cause a subsequent memory access
+	  to use an incorrect address on Cortex-A53 parts up to r0p4.
 
 	  If unsure, say Y.
 
@@ -582,7 +580,8 @@
 # Common NUMA Features
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support"
-	depends on SMP
+	select ACPI_NUMA if ACPI
+	select OF_NUMA
 	help
 	  Enable NUMA (Non Uniform Memory Access) support.
 
@@ -603,11 +602,18 @@
 	def_bool y
 	depends on NUMA
 
+config HAVE_SETUP_PER_CPU_AREA
+	def_bool y
+	depends on NUMA
+
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+	def_bool y
+	depends on NUMA
+
 source kernel/Kconfig.preempt
 source kernel/Kconfig.hz
 
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
-	depends on !HIBERNATION
 	def_bool y
 
 config ARCH_HAS_HOLES_MEMORYMODEL
diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 0cc758c..b661fe7 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -49,16 +49,6 @@
 
 	  If in doubt, say Y.
 
-config DEBUG_RODATA
-	bool "Make kernel text and rodata read-only"
-	default y
-	help
-	  If this is set, kernel text and rodata will be made read-only. This
-	  is to help catch accidental or malicious attempts to change the
-	  kernel's executable code.
-
-	  If in doubt, say Y.
-
 config DEBUG_ALIGN_RODATA
 	depends on DEBUG_RODATA
 	bool "Align linker sections up to SECTION_SIZE"
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index bb2616b..96ef543 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -8,7 +8,7 @@
 
 config ARCH_ALPINE
 	bool "Annapurna Labs Alpine platform"
-	select ALPINE_MSI
+	select ALPINE_MSI if PCI
 	help
 	  This enables support for the Annapurna Labs Alpine
 	  Soc family.
@@ -55,6 +55,7 @@
 
 config ARCH_LAYERSCAPE
 	bool "ARMv8 based Freescale Layerscape SoC family"
+	select EDAC_SUPPORT
 	help
 	  This enables support for the Freescale Layerscape SoC family.
 
@@ -66,7 +67,7 @@
 config ARCH_HISI
 	bool "Hisilicon SoC Family"
 	select ARM_TIMER_SP804
-	select HISILICON_IRQ_MBIGEN
+	select HISILICON_IRQ_MBIGEN if PCI
 	help
 	  This enables support for Hisilicon ARMv8 SoC family
 
@@ -93,6 +94,7 @@
 	select ARMADA_CP110_SYSCON
 	select ARMADA_37XX_CLK
 	select MVEBU_ODMI
+	select MVEBU_PIC
 	help
 	  This enables support for Marvell EBU familly, including:
 	   - Armada 3700 SoC Family
@@ -159,7 +161,6 @@
 	select CLKSRC_MMIO
 	select CLKSRC_OF
 	select GENERIC_CLOCKEVENTS
-	select HAVE_CLK
 	select PINCTRL
 	select RESET_CONTROLLER
 	help
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 5b54f8c..ab51aed 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -18,6 +18,14 @@
 LDFLAGS_vmlinux		+= -pie -Bsymbolic
 endif
 
+ifeq ($(CONFIG_ARM64_ERRATUM_843419),y)
+  ifeq ($(call ld-option, --fix-cortex-a53-843419),)
+$(warning ld does not support --fix-cortex-a53-843419; kernel may be susceptible to erratum)
+  else
+LDFLAGS_vmlinux	+= --fix-cortex-a53-843419
+  endif
+endif
+
 KBUILD_DEFCONFIG := defconfig
 
 # Check for binutils support for specific extensions
@@ -38,10 +46,12 @@
 KBUILD_CPPFLAGS	+= -mbig-endian
 AS		+= -EB
 LD		+= -EB
+UTS_MACHINE	:= aarch64_be
 else
 KBUILD_CPPFLAGS	+= -mlittle-endian
 AS		+= -EL
 LD		+= -EL
+UTS_MACHINE	:= aarch64
 endif
 
 CHECKFLAGS	+= -D__aarch64__
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index 445aa67..c2b9bcb 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -255,10 +255,10 @@
 		/* Local timer */
 		timer {
 			compatible = "arm,armv8-timer";
-			interrupts = <1 13 0xf01>,
-				     <1 14 0xf01>,
-				     <1 11 0xf01>,
-				     <1 10 0xf01>;
+			interrupts = <1 13 0xf08>,
+				     <1 14 0xf08>,
+				     <1 11 0xf08>,
+				     <1 10 0xf08>;
 		};
 
 		timer0: timer0@ffc03000 {
diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
index e502c24..bf6c8d0 100644
--- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
+++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi
@@ -102,13 +102,13 @@
 	timer {
 		compatible = "arm,armv8-timer";
 		interrupts = <GIC_PPI 13
-			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>,
+			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 14
-			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>,
+			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 11
-			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>,
+			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10
-			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_EDGE_RISING)>;
+			(GIC_CPU_MASK_RAW(0xff) | IRQ_TYPE_LEVEL_LOW)>;
 	};
 
 	xtal: xtal-clk {
diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts
index b7fb5d9..32a961c 100644
--- a/arch/arm64/boot/dts/apm/apm-mustang.dts
+++ b/arch/arm64/boot/dts/apm/apm-mustang.dts
@@ -74,6 +74,7 @@
 
 &xgenet {
 	status = "ok";
+	rxlos-gpios = <&sbgpio 12 1>;
 };
 
 &mmc0 {
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index f1c2c71..31ea70a 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -110,10 +110,10 @@
 
 	timer {
 		compatible = "arm,armv8-timer";
-		interrupts = <1 0 0xff01>,	/* Secure Phys IRQ */
-			     <1 13 0xff01>,	/* Non-secure Phys IRQ */
-			     <1 14 0xff01>,	/* Virt IRQ */
-			     <1 15 0xff01>;	/* Hyp IRQ */
+		interrupts = <1 0 0xff08>,	/* Secure Phys IRQ */
+			     <1 13 0xff08>,	/* Non-secure Phys IRQ */
+			     <1 14 0xff08>,	/* Virt IRQ */
+			     <1 15 0xff08>;	/* Hyp IRQ */
 		clock-frequency = <50000000>;
 	};
 
@@ -923,7 +923,7 @@
 			/* mac address will be overwritten by the bootloader */
 			local-mac-address = [00 00 00 00 00 00];
 			phy-connection-type = "rgmii";
-			phy-handle = <&menet0phy>,<&menetphy>;
+			phy-handle = <&menetphy>,<&menet0phy>;
 			mdio {
 				compatible = "apm,xgene-mdio";
 				#address-cells = <1>;
diff --git a/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi
new file mode 120000
index 0000000..3937b77
--- /dev/null
+++ b/arch/arm64/boot/dts/broadcom/bcm2835-rpi.dtsi
@@ -0,0 +1 @@
+../../../../arm/boot/dts/bcm2835-rpi.dtsi
\ No newline at end of file
diff --git a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts
index 6f47dd2..7841b72 100644
--- a/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts
+++ b/arch/arm64/boot/dts/broadcom/bcm2837-rpi-3-b.dts
@@ -1,7 +1,7 @@
 /dts-v1/;
 #include "bcm2837.dtsi"
-#include "../../../../arm/boot/dts/bcm2835-rpi.dtsi"
-#include "../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi"
+#include "bcm2835-rpi.dtsi"
+#include "bcm283x-rpi-smsc9514.dtsi"
 
 / {
 	compatible = "raspberrypi,3-model-b", "brcm,bcm2837";
diff --git a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi
index f2a31d0..8216bbb 100644
--- a/arch/arm64/boot/dts/broadcom/bcm2837.dtsi
+++ b/arch/arm64/boot/dts/broadcom/bcm2837.dtsi
@@ -1,4 +1,4 @@
-#include "../../../../arm/boot/dts/bcm283x.dtsi"
+#include "bcm283x.dtsi"
 
 / {
 	compatible = "brcm,bcm2836";
diff --git a/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi
new file mode 120000
index 0000000..dca7c05
--- /dev/null
+++ b/arch/arm64/boot/dts/broadcom/bcm283x-rpi-smsc9514.dtsi
@@ -0,0 +1 @@
+../../../../arm/boot/dts/bcm283x-rpi-smsc9514.dtsi
\ No newline at end of file
diff --git a/arch/arm64/boot/dts/broadcom/bcm283x.dtsi b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi
new file mode 120000
index 0000000..5f54e4c
--- /dev/null
+++ b/arch/arm64/boot/dts/broadcom/bcm283x.dtsi
@@ -0,0 +1 @@
+../../../../arm/boot/dts/bcm283x.dtsi
\ No newline at end of file
diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi b/arch/arm64/boot/dts/broadcom/ns2.dtsi
index f53b095..d4a12fa 100644
--- a/arch/arm64/boot/dts/broadcom/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi
@@ -88,13 +88,13 @@
 	timer {
 		compatible = "arm,armv8-timer";
 		interrupts = <GIC_PPI 13 (GIC_CPU_MASK_RAW(0xff) |
-			      IRQ_TYPE_EDGE_RISING)>,
+			      IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 14 (GIC_CPU_MASK_RAW(0xff) |
-			      IRQ_TYPE_EDGE_RISING)>,
+			      IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 11 (GIC_CPU_MASK_RAW(0xff) |
-			      IRQ_TYPE_EDGE_RISING)>,
+			      IRQ_TYPE_LEVEL_LOW)>,
 			     <GIC_PPI 10 (GIC_CPU_MASK_RAW(0xff) |
-			      IRQ_TYPE_EDGE_RISING)>;
+			      IRQ_TYPE_LEVEL_LOW)>;
 	};
 
 	pmu {
diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
index 2eb9b22..04dc8a8 100644
--- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
+++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi
@@ -354,10 +354,10 @@
 
 	timer {
 		compatible = "arm,armv8-timer";
-		interrupts = <1 13 0xff01>,
-		             <1 14 0xff01>,
-		             <1 11 0xff01>,
-		             <1 10 0xff01>;
+		interrupts = <1 13 4>,
+		             <1 14 4>,
+		             <1 11 4>,
+		             <1 10 4>;
 	};
 
 	pmu {
diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
index 299f3ce..c528dd5 100644
--- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
+++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts
@@ -12,6 +12,7 @@
 /dts-v1/;
 #include "exynos7.dtsi"
 #include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/clock/samsung,s2mps11.h>
 
 / {
 	model = "Samsung Exynos7 Espresso board based on EXYNOS7";
@@ -43,6 +44,8 @@
 
 &rtc {
 	status = "okay";
+	clocks = <&clock_ccore PCLK_RTC>, <&s2mps15_osc S2MPS11_CLK_AP>;
+	clock-names = "rtc", "rtc_src";
 };
 
 &watchdog {
diff --git a/arch/arm64/boot/dts/exynos/exynos7.dtsi b/arch/arm64/boot/dts/exynos/exynos7.dtsi
index ca663df..1628315 100644
--- a/arch/arm64/boot/dts/exynos/exynos7.dtsi
+++ b/arch/arm64/boot/dts/exynos/exynos7.dtsi
@@ -473,10 +473,10 @@
 
 		timer {
 			compatible = "arm,armv8-timer";
-			interrupts = <1 13 0xff01>,
-				     <1 14 0xff01>,
-				     <1 11 0xff01>,
-				     <1 10 0xff01>;
+			interrupts = <1 13 0xff08>,
+				     <1 14 0xff08>,
+				     <1 11 0xff08>,
+				     <1 10 0xff08>;
 		};
 
 		pmu_system_controller: system-controller@105c0000 {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
index e669fbd..a67e210 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
@@ -119,10 +119,10 @@
 
 	timer {
 		compatible = "arm,armv8-timer";
-		interrupts = <1 13 0x1>, /* Physical Secure PPI */
-			     <1 14 0x1>, /* Physical Non-Secure PPI */
-			     <1 11 0x1>, /* Virtual PPI */
-			     <1 10 0x1>; /* Hypervisor PPI */
+		interrupts = <1 13 0xf08>, /* Physical Secure PPI */
+			     <1 14 0xf08>, /* Physical Non-Secure PPI */
+			     <1 11 0xf08>, /* Virtual PPI */
+			     <1 10 0xf08>; /* Hypervisor PPI */
 	};
 
 	pmu {
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
index 21023a3..e3b6034 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi
@@ -191,10 +191,10 @@
 
 	timer {
 		compatible = "arm,armv8-timer";
-		interrupts = <1 13 0x8>, /* Physical Secure PPI, active-low */
-			     <1 14 0x8>, /* Physical Non-Secure PPI, active-low */
-			     <1 11 0x8>, /* Virtual PPI, active-low */
-			     <1 10 0x8>; /* Hypervisor PPI, active-low */
+		interrupts = <1 13 4>, /* Physical Secure PPI, active-low */
+			     <1 14 4>, /* Physical Non-Secure PPI, active-low */
+			     <1 11 4>, /* Virtual PPI, active-low */
+			     <1 10 4>; /* Hypervisor PPI, active-low */
 	};
 
 	pmu {
diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
index eab1a42..c2a6745 100644
--- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi
@@ -122,10 +122,10 @@
 
 			timer {
 				compatible = "arm,armv8-timer";
-				interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>,
-					     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>,
-					     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>,
-					     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_EDGE_RISING)>;
+				interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+					     <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+					     <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>,
+					     <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_LOW)>;
 			};
 
 			odmi: odmi@300000 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
index d02a9003..4f44d11 100644
--- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi
@@ -270,6 +270,8 @@
 		#io-channel-cells = <1>;
 		clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
 		clock-names = "saradc", "apb_pclk";
+		resets = <&cru SRST_SARADC>;
+		reset-names = "saradc-apb";
 		status = "disabled";
 	};
 
diff --git a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi
index c223915..d73bdc8 100644
--- a/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi
+++ b/arch/arm64/boot/dts/socionext/uniphier-ph1-ld20.dtsi
@@ -129,10 +129,10 @@
 
 	timer {
 		compatible = "arm,armv8-timer";
-		interrupts = <1 13 0xf01>,
-			     <1 14 0xf01>,
-			     <1 11 0xf01>,
-			     <1 10 0xf01>;
+		interrupts = <1 13 4>,
+			     <1 14 4>,
+			     <1 11 4>,
+			     <1 10 4>;
 	};
 
 	soc {
diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
index e595f22..3e2e51f 100644
--- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
+++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi
@@ -65,10 +65,10 @@
 	timer {
 		compatible = "arm,armv8-timer";
 		interrupt-parent = <&gic>;
-		interrupts = <1 13 0xf01>,
-			     <1 14 0xf01>,
-			     <1 11 0xf01>,
-			     <1 10 0xf01>;
+		interrupts = <1 13 0xf08>,
+			     <1 14 0xf08>,
+			     <1 11 0xf08>,
+			     <1 10 0xf08>;
 	};
 
 	amba_apu {
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 0555b7c..eadf485 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,4 +1,3 @@
-# CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_AUDIT=y
@@ -15,10 +14,14 @@
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_HUGETLB=y
-# CONFIG_UTS_NS is not set
-# CONFIG_IPC_NS is not set
-# CONFIG_NET_NS is not set
+CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_USER_NS=y
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_KALLSYMS_ALL=y
@@ -71,6 +74,7 @@
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_CMA=y
+CONFIG_SECCOMP=y
 CONFIG_XEN=y
 CONFIG_KEXEC=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
@@ -84,10 +88,37 @@
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IPV6 is not set
+CONFIG_IPV6=m
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLAN_8021Q_MVRP=y
 CONFIG_BPF_JIT=y
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
@@ -103,6 +134,7 @@
 CONFIG_MTD_M25P80=y
 CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=m
 CONFIG_VIRTIO_BLK=y
 CONFIG_SRAM=y
 # CONFIG_SCSI_PROC_FS is not set
@@ -120,7 +152,10 @@
 CONFIG_PATA_PLATFORM=y
 CONFIG_PATA_OF_PLATFORM=y
 CONFIG_NETDEVICES=y
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
 CONFIG_TUN=y
+CONFIG_VETH=m
 CONFIG_VIRTIO_NET=y
 CONFIG_AMD_XGBE=y
 CONFIG_NET_XGENE=y
@@ -350,12 +385,16 @@
 CONFIG_PWM_SAMSUNG=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
 CONFIG_AUTOFS4_FS=y
-CONFIG_FUSE_FS=y
-CONFIG_CUSE=y
+CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
 CONFIG_HUGETLBFS=y
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 5c88804..6b2aa0f 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -216,7 +216,7 @@
 		err = blkcipher_walk_done(desc, &walk,
 					  walk.nbytes % AES_BLOCK_SIZE);
 	}
-	if (nbytes) {
+	if (walk.nbytes % AES_BLOCK_SIZE) {
 		u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
 		u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
 		u8 __aligned(8) tail[AES_BLOCK_SIZE];
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index f43d2c4..44e1d7f 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -1,4 +1,3 @@
-generic-y += bug.h
 generic-y += bugs.h
 generic-y += clkdev.h
 generic-y += cputime.h
@@ -10,7 +9,6 @@
 generic-y += early_ioremap.h
 generic-y += emergency-restart.h
 generic-y += errno.h
-generic-y += ftrace.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
@@ -27,12 +25,10 @@
 generic-y += msgbuf.h
 generic-y += msi.h
 generic-y += mutex.h
-generic-y += pci.h
 generic-y += poll.h
 generic-y += preempt.h
 generic-y += resource.h
 generic-y += rwsem.h
-generic-y += sections.h
 generic-y += segment.h
 generic-y += sembuf.h
 generic-y += serial.h
@@ -45,7 +41,6 @@
 generic-y += switch_to.h
 generic-y += termbits.h
 generic-y += termios.h
-generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += unaligned.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 5420cb0..e517088 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -12,7 +12,7 @@
 #ifndef _ASM_ACPI_H
 #define _ASM_ACPI_H
 
-#include <linux/mm.h>
+#include <linux/memblock.h>
 #include <linux/psci.h>
 
 #include <asm/cputype.h>
@@ -32,7 +32,11 @@
 static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
 					    acpi_size size)
 {
-	if (!page_is_ram(phys >> PAGE_SHIFT))
+	/*
+	 * EFI's reserve_regions() call adds memory with the WB attribute
+	 * to memblock via early_init_dt_add_memory_arch().
+	 */
+	if (!memblock_is_memory(phys))
 		return ioremap(phys, size);
 
 	return ioremap_cache(phys, size);
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 8746ff6..55101bd 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -2,6 +2,7 @@
 #define __ASM_ALTERNATIVE_H
 
 #include <asm/cpufeature.h>
+#include <asm/insn.h>
 
 #ifndef __ASSEMBLY__
 
@@ -90,24 +91,15 @@
 .endm
 
 /*
- * Begin an alternative code sequence.
+ * Alternative sequences
  *
- * The code that follows this macro will be assembled and linked as
- * normal. There are no restrictions on this code.
- */
-.macro alternative_if_not cap
-	.pushsection .altinstructions, "a"
-	altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
-	.popsection
-661:
-.endm
-
-/*
- * Provide the alternative code sequence.
+ * The code for the case where the capability is not present will be
+ * assembled and linked as normal. There are no restrictions on this
+ * code.
  *
- * The code that follows this macro is assembled into a special
- * section to be used for dynamic patching. Code that follows this
- * macro must:
+ * The code for the case where the capability is present will be
+ * assembled into a special section to be used for dynamic patching.
+ * Code for that case must:
  *
  * 1. Be exactly the same length (in bytes) as the default code
  *    sequence.
@@ -116,8 +108,38 @@
  *    alternative sequence it is defined in (branches into an
  *    alternative sequence are not fixed up).
  */
+
+/*
+ * Begin an alternative code sequence.
+ */
+.macro alternative_if_not cap
+	.set .Lasm_alt_mode, 0
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
+	.popsection
+661:
+.endm
+
+.macro alternative_if cap
+	.set .Lasm_alt_mode, 1
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
+	.popsection
+	.pushsection .altinstr_replacement, "ax"
+	.align 2	/* So GAS knows label 661 is suitably aligned */
+661:
+.endm
+
+/*
+ * Provide the other half of the alternative code sequence.
+ */
 .macro alternative_else
-662:	.pushsection .altinstr_replacement, "ax"
+662:
+	.if .Lasm_alt_mode==0
+	.pushsection .altinstr_replacement, "ax"
+	.else
+	.popsection
+	.endif
 663:
 .endm
 
@@ -125,11 +147,25 @@
  * Complete an alternative code sequence.
  */
 .macro alternative_endif
-664:	.popsection
+664:
+	.if .Lasm_alt_mode==0
+	.popsection
+	.endif
 	.org	. - (664b-663b) + (662b-661b)
 	.org	. - (662b-661b) + (664b-663b)
 .endm
 
+/*
+ * Provides a trivial alternative or default sequence consisting solely
+ * of NOPs. The number of NOPs is chosen automatically to match the
+ * previous case.
+ */
+.macro alternative_else_nop_endif
+alternative_else
+	nops	(662b-661b) / AARCH64_INSN_SIZE
+alternative_endif
+.endm
+
 #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...)	\
 	alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
 
diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h
index 8ec88e5..fc2a0cb 100644
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -28,6 +28,7 @@
 #define ICC_CTLR_EL1			sys_reg(3, 0, 12, 12, 4)
 #define ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
 #define ICC_GRPEN1_EL1			sys_reg(3, 0, 12, 12, 7)
+#define ICC_BPR1_EL1			sys_reg(3, 0, 12, 12, 3)
 
 #define ICC_SRE_EL2			sys_reg(3, 4, 12, 9, 5)
 
@@ -165,6 +166,11 @@
 	isb();
 }
 
+static inline void gic_write_bpr1(u32 val)
+{
+	asm volatile("msr_s " __stringify(ICC_BPR1_EL1) ", %0" : : "r" (val));
+}
+
 #define gic_read_typer(c)		readq_relaxed(c)
 #define gic_write_irouter(v, c)		writeq_relaxed(v, c)
 
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index fbe0ca3..eaa5bbe 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -20,13 +20,55 @@
 #define __ASM_ARCH_TIMER_H
 
 #include <asm/barrier.h>
+#include <asm/sysreg.h>
 
 #include <linux/bug.h>
 #include <linux/init.h>
+#include <linux/jump_label.h>
 #include <linux/types.h>
 
 #include <clocksource/arm_arch_timer.h>
 
+#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585)
+extern struct static_key_false arch_timer_read_ool_enabled;
+#define needs_fsl_a008585_workaround() \
+	static_branch_unlikely(&arch_timer_read_ool_enabled)
+#else
+#define needs_fsl_a008585_workaround()  false
+#endif
+
+u32 __fsl_a008585_read_cntp_tval_el0(void);
+u32 __fsl_a008585_read_cntv_tval_el0(void);
+u64 __fsl_a008585_read_cntvct_el0(void);
+
+/*
+ * The number of retries is an arbitrary value well beyond the highest number
+ * of iterations the loop has been observed to take.
+ */
+#define __fsl_a008585_read_reg(reg) ({			\
+	u64 _old, _new;					\
+	int _retries = 200;				\
+							\
+	do {						\
+		_old = read_sysreg(reg);		\
+		_new = read_sysreg(reg);		\
+		_retries--;				\
+	} while (unlikely(_old != _new) && _retries);	\
+							\
+	WARN_ON_ONCE(!_retries);			\
+	_new;						\
+})
+
+#define arch_timer_reg_read_stable(reg) 		\
+({							\
+	u64 _val;					\
+	if (needs_fsl_a008585_workaround())		\
+		_val = __fsl_a008585_read_##reg();	\
+	else						\
+		_val = read_sysreg(reg);		\
+	_val;						\
+})
+
 /*
  * These register accessors are marked inline so the compiler can
  * nicely work out which register we want, and chuck away the rest of
@@ -38,19 +80,19 @@
 	if (access == ARCH_TIMER_PHYS_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			asm volatile("msr cntp_ctl_el0,  %0" : : "r" (val));
+			write_sysreg(val, cntp_ctl_el0);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("msr cntp_tval_el0, %0" : : "r" (val));
+			write_sysreg(val, cntp_tval_el0);
 			break;
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			asm volatile("msr cntv_ctl_el0,  %0" : : "r" (val));
+			write_sysreg(val, cntv_ctl_el0);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("msr cntv_tval_el0, %0" : : "r" (val));
+			write_sysreg(val, cntv_tval_el0);
 			break;
 		}
 	}
@@ -61,48 +103,38 @@
 static __always_inline
 u32 arch_timer_reg_read_cp15(int access, enum arch_timer_reg reg)
 {
-	u32 val;
-
 	if (access == ARCH_TIMER_PHYS_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			asm volatile("mrs %0,  cntp_ctl_el0" : "=r" (val));
-			break;
+			return read_sysreg(cntp_ctl_el0);
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("mrs %0, cntp_tval_el0" : "=r" (val));
-			break;
+			return arch_timer_reg_read_stable(cntp_tval_el0);
 		}
 	} else if (access == ARCH_TIMER_VIRT_ACCESS) {
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			asm volatile("mrs %0,  cntv_ctl_el0" : "=r" (val));
-			break;
+			return read_sysreg(cntv_ctl_el0);
 		case ARCH_TIMER_REG_TVAL:
-			asm volatile("mrs %0, cntv_tval_el0" : "=r" (val));
-			break;
+			return arch_timer_reg_read_stable(cntv_tval_el0);
 		}
 	}
 
-	return val;
+	BUG();
 }
 
 static inline u32 arch_timer_get_cntfrq(void)
 {
-	u32 val;
-	asm volatile("mrs %0,   cntfrq_el0" : "=r" (val));
-	return val;
+	return read_sysreg(cntfrq_el0);
 }
 
 static inline u32 arch_timer_get_cntkctl(void)
 {
-	u32 cntkctl;
-	asm volatile("mrs	%0, cntkctl_el1" : "=r" (cntkctl));
-	return cntkctl;
+	return read_sysreg(cntkctl_el1);
 }
 
 static inline void arch_timer_set_cntkctl(u32 cntkctl)
 {
-	asm volatile("msr	cntkctl_el1, %0" : : "r" (cntkctl));
+	write_sysreg(cntkctl, cntkctl_el1);
 }
 
 static inline u64 arch_counter_get_cntpct(void)
@@ -116,12 +148,8 @@
 
 static inline u64 arch_counter_get_cntvct(void)
 {
-	u64 cval;
-
 	isb();
-	asm volatile("mrs %0, cntvct_el0" : "=r" (cval));
-
-	return cval;
+	return arch_timer_reg_read_stable(cntvct_el0);
 }
 
 static inline int arch_timer_arch_init(void)
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index d5025c6..28bfe61 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -87,6 +87,15 @@
 	.endm
 
 /*
+ * NOP sequence
+ */
+	.macro	nops, num
+	.rept	\num
+	nop
+	.endr
+	.endm
+
+/*
  * Emit an entry into the exception table
  */
 	.macro		_asm_extable, from, to
@@ -216,11 +225,26 @@
 	.macro	mmid, rd, rn
 	ldr	\rd, [\rn, #MM_CONTEXT_ID]
 	.endm
+/*
+ * read_ctr - read CTR_EL0. If the system has mismatched
+ * cache line sizes, provide the system wide safe value
+ * from arm64_ftr_reg_ctrel0.sys_val
+ */
+	.macro	read_ctr, reg
+alternative_if_not ARM64_MISMATCHED_CACHE_LINE_SIZE
+	mrs	\reg, ctr_el0			// read CTR
+	nop
+alternative_else
+	ldr_l	\reg, arm64_ftr_reg_ctrel0 + ARM64_FTR_SYSVAL
+alternative_endif
+	.endm
+
 
 /*
- * dcache_line_size - get the minimum D-cache line size from the CTR register.
+ * raw_dcache_line_size - get the minimum D-cache line size on this CPU
+ * from the CTR register.
  */
-	.macro	dcache_line_size, reg, tmp
+	.macro	raw_dcache_line_size, reg, tmp
 	mrs	\tmp, ctr_el0			// read CTR
 	ubfm	\tmp, \tmp, #16, #19		// cache line size encoding
 	mov	\reg, #4			// bytes per word
@@ -228,9 +252,20 @@
 	.endm
 
 /*
- * icache_line_size - get the minimum I-cache line size from the CTR register.
+ * dcache_line_size - get the safe D-cache line size across all CPUs
  */
-	.macro	icache_line_size, reg, tmp
+	.macro	dcache_line_size, reg, tmp
+	read_ctr	\tmp
+	ubfm		\tmp, \tmp, #16, #19	// cache line size encoding
+	mov		\reg, #4		// bytes per word
+	lsl		\reg, \reg, \tmp	// actual cache line size
+	.endm
+
+/*
+ * raw_icache_line_size - get the minimum I-cache line size on this CPU
+ * from the CTR register.
+ */
+	.macro	raw_icache_line_size, reg, tmp
 	mrs	\tmp, ctr_el0			// read CTR
 	and	\tmp, \tmp, #0xf		// cache line size encoding
 	mov	\reg, #4			// bytes per word
@@ -238,6 +273,16 @@
 	.endm
 
 /*
+ * icache_line_size - get the safe I-cache line size across all CPUs
+ */
+	.macro	icache_line_size, reg, tmp
+	read_ctr	\tmp
+	and		\tmp, \tmp, #0xf	// cache line size encoding
+	mov		\reg, #4		// bytes per word
+	lsl		\reg, \reg, \tmp	// actual cache line size
+	.endm
+
+/*
  * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
  */
 	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index b5890be..7457ce0 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -86,8 +86,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	__LL_SC_ATOMIC(add_return##name),				\
+	__LL_SC_ATOMIC(add_return##name)				\
+	__nops(1),							\
 	/* LSE atomics */						\
 	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
 	"	add	%w[i], %w[i], w30")				\
@@ -112,8 +112,8 @@
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC(and),
+	__LL_SC_ATOMIC(and)
+	__nops(1),
 	/* LSE atomics */
 	"	mvn	%w[i], %w[i]\n"
 	"	stclr	%w[i], %[v]")
@@ -130,8 +130,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	__LL_SC_ATOMIC(fetch_and##name),				\
+	__LL_SC_ATOMIC(fetch_and##name)					\
+	__nops(1),							\
 	/* LSE atomics */						\
 	"	mvn	%w[i], %w[i]\n"					\
 	"	ldclr" #mb "	%w[i], %w[i], %[v]")			\
@@ -156,8 +156,8 @@
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC(sub),
+	__LL_SC_ATOMIC(sub)
+	__nops(1),
 	/* LSE atomics */
 	"	neg	%w[i], %w[i]\n"
 	"	stadd	%w[i], %[v]")
@@ -174,9 +174,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
 	__LL_SC_ATOMIC(sub_return##name)				\
-	"	nop",							\
+	__nops(2),							\
 	/* LSE atomics */						\
 	"	neg	%w[i], %w[i]\n"					\
 	"	ldadd" #mb "	%w[i], w30, %[v]\n"			\
@@ -203,8 +202,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	__LL_SC_ATOMIC(fetch_sub##name),				\
+	__LL_SC_ATOMIC(fetch_sub##name)					\
+	__nops(1),							\
 	/* LSE atomics */						\
 	"	neg	%w[i], %w[i]\n"					\
 	"	ldadd" #mb "	%w[i], %w[i], %[v]")			\
@@ -284,8 +283,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	__LL_SC_ATOMIC64(add_return##name),				\
+	__LL_SC_ATOMIC64(add_return##name)				\
+	__nops(1),							\
 	/* LSE atomics */						\
 	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
 	"	add	%[i], %[i], x30")				\
@@ -310,8 +309,8 @@
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC64(and),
+	__LL_SC_ATOMIC64(and)
+	__nops(1),
 	/* LSE atomics */
 	"	mvn	%[i], %[i]\n"
 	"	stclr	%[i], %[v]")
@@ -328,8 +327,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	__LL_SC_ATOMIC64(fetch_and##name),				\
+	__LL_SC_ATOMIC64(fetch_and##name)				\
+	__nops(1),							\
 	/* LSE atomics */						\
 	"	mvn	%[i], %[i]\n"					\
 	"	ldclr" #mb "	%[i], %[i], %[v]")			\
@@ -354,8 +353,8 @@
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
-	__LL_SC_ATOMIC64(sub),
+	__LL_SC_ATOMIC64(sub)
+	__nops(1),
 	/* LSE atomics */
 	"	neg	%[i], %[i]\n"
 	"	stadd	%[i], %[v]")
@@ -372,9 +371,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
 	__LL_SC_ATOMIC64(sub_return##name)				\
-	"	nop",							\
+	__nops(2),							\
 	/* LSE atomics */						\
 	"	neg	%[i], %[i]\n"					\
 	"	ldadd" #mb "	%[i], x30, %[v]\n"			\
@@ -401,8 +399,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	__LL_SC_ATOMIC64(fetch_sub##name),				\
+	__LL_SC_ATOMIC64(fetch_sub##name)				\
+	__nops(1),							\
 	/* LSE atomics */						\
 	"	neg	%[i], %[i]\n"					\
 	"	ldadd" #mb "	%[i], %[i], %[v]")			\
@@ -426,13 +424,8 @@
 
 	asm volatile(ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
-	"	nop\n"
 	__LL_SC_ATOMIC64(dec_if_positive)
-	"	nop\n"
-	"	nop\n"
-	"	nop\n"
-	"	nop\n"
-	"	nop",
+	__nops(6),
 	/* LSE atomics */
 	"1:	ldr	x30, %[v]\n"
 	"	subs	%[ret], x30, #1\n"
@@ -464,9 +457,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-		__LL_SC_CMPXCHG(name)					\
-	"	nop",							\
+	__LL_SC_CMPXCHG(name)						\
+	__nops(2),							\
 	/* LSE atomics */						\
 	"	mov	" #w "30, %" #w "[old]\n"			\
 	"	cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n"		\
@@ -517,10 +509,8 @@
 									\
 	asm volatile(ARM64_LSE_ATOMIC_INSN(				\
 	/* LL/SC */							\
-	"	nop\n"							\
-	"	nop\n"							\
-	"	nop\n"							\
-	__LL_SC_CMPXCHG_DBL(name),					\
+	__LL_SC_CMPXCHG_DBL(name)					\
+	__nops(3),							\
 	/* LSE atomics */						\
 	"	casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\
 	"	eor	%[old1], %[old1], %[oldval1]\n"			\
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 4eea7f6..4e0497f 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -20,6 +20,9 @@
 
 #ifndef __ASSEMBLY__
 
+#define __nops(n)	".rept	" #n "\nnop\n.endr\n"
+#define nops(n)		asm volatile(__nops(n))
+
 #define sev()		asm volatile("sev" : : : "memory")
 #define wfe()		asm volatile("wfe" : : : "memory")
 #define wfi()		asm volatile("wfi" : : : "memory")
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index c64268d..2e5fb97 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -68,6 +68,7 @@
 extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void __flush_dcache_area(void *addr, size_t len);
+extern void __clean_dcache_area_poc(void *addr, size_t len);
 extern void __clean_dcache_area_pou(void *addr, size_t len);
 extern long __flush_cache_user_range(unsigned long start, unsigned long end);
 
@@ -85,7 +86,7 @@
  */
 extern void __dma_map_area(const void *, size_t, int);
 extern void __dma_unmap_area(const void *, size_t, int);
-extern void __dma_flush_range(const void *, const void *);
+extern void __dma_flush_area(const void *, size_t);
 
 /*
  * Copy user data from/to a page which is mapped into a different
diff --git a/arch/arm64/include/asm/clocksource.h b/arch/arm64/include/asm/clocksource.h
new file mode 100644
index 0000000..0b350a7
--- /dev/null
+++ b/arch/arm64/include/asm/clocksource.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_CLOCKSOURCE_H
+#define _ASM_CLOCKSOURCE_H
+
+struct arch_clocksource_data {
+	bool vdso_direct;	/* Usable for direct VDSO access? */
+};
+
+#endif
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index bd86a79..91b26d2 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -43,10 +43,8 @@
 	"	cbnz	%w1, 1b\n"					\
 	"	" #mb,							\
 	/* LSE atomics */						\
-	"	nop\n"							\
-	"	nop\n"							\
 	"	swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n"	\
-	"	nop\n"							\
+		__nops(3)						\
 	"	" #nop_lse)						\
 	: "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr)			\
 	: "r" (x)							\
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 7099f26..758d74f 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -9,6 +9,8 @@
 #ifndef __ASM_CPUFEATURE_H
 #define __ASM_CPUFEATURE_H
 
+#include <linux/jump_label.h>
+
 #include <asm/hwcap.h>
 #include <asm/sysreg.h>
 
@@ -37,8 +39,9 @@
 #define ARM64_WORKAROUND_CAVIUM_27456		12
 #define ARM64_HAS_32BIT_EL0			13
 #define ARM64_HYP_OFFSET_LOW			14
+#define ARM64_MISMATCHED_CACHE_LINE_SIZE	15
 
-#define ARM64_NCAPS				15
+#define ARM64_NCAPS				16
 
 #ifndef __ASSEMBLY__
 
@@ -63,7 +66,7 @@
 	enum ftr_type	type;
 	u8		shift;
 	u8		width;
-	s64		safe_val; /* safe value for discrete features */
+	s64		safe_val; /* safe value for FTR_EXACT features */
 };
 
 /*
@@ -72,13 +75,14 @@
  * @sys_val		Safe value across the CPUs (system view)
  */
 struct arm64_ftr_reg {
-	u32			sys_id;
-	const char		*name;
-	u64			strict_mask;
-	u64			sys_val;
-	struct arm64_ftr_bits	*ftr_bits;
+	const char			*name;
+	u64				strict_mask;
+	u64				sys_val;
+	const struct arm64_ftr_bits	*ftr_bits;
 };
 
+extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0;
+
 /* scope of capability check */
 enum {
 	SCOPE_SYSTEM,
@@ -109,6 +113,7 @@
 };
 
 extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
+extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
 
 bool this_cpu_has_cap(unsigned int cap);
 
@@ -121,16 +126,21 @@
 {
 	if (num >= ARM64_NCAPS)
 		return false;
-	return test_bit(num, cpu_hwcaps);
+	if (__builtin_constant_p(num))
+		return static_branch_unlikely(&cpu_hwcap_keys[num]);
+	else
+		return test_bit(num, cpu_hwcaps);
 }
 
 static inline void cpus_set_cap(unsigned int num)
 {
-	if (num >= ARM64_NCAPS)
+	if (num >= ARM64_NCAPS) {
 		pr_warn("Attempt to set an illegal CPU capability (%d >= %d)\n",
 			num, ARM64_NCAPS);
-	else
+	} else {
 		__set_bit(num, cpu_hwcaps);
+		static_branch_enable(&cpu_hwcap_keys[num]);
+	}
 }
 
 static inline int __attribute_const__
@@ -157,7 +167,7 @@
 	return cpuid_feature_extract_unsigned_field_width(features, field, 4);
 }
 
-static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp)
+static inline u64 arm64_ftr_mask(const struct arm64_ftr_bits *ftrp)
 {
 	return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift);
 }
@@ -170,7 +180,7 @@
 		cpuid_feature_extract_unsigned_field(features, field);
 }
 
-static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val)
+static inline s64 arm64_ftr_value(const struct arm64_ftr_bits *ftrp, u64 val)
 {
 	return (s64)cpuid_feature_extract_field(val, ftrp->shift, ftrp->sign);
 }
@@ -193,11 +203,11 @@
 void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
 			    const char *info);
 void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps);
-void check_local_cpu_errata(void);
-void __init enable_errata_workarounds(void);
+void check_local_cpu_capabilities(void);
 
-void verify_local_cpu_errata(void);
-void verify_local_cpu_capabilities(void);
+void update_cpu_errata_workarounds(void);
+void __init enable_errata_workarounds(void);
+void verify_local_cpu_errata_workarounds(void);
 
 u64 read_system_reg(u32 id);
 
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 9d9fd4b..26a68dd 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -93,11 +93,7 @@
 
 #include <asm/sysreg.h>
 
-#define read_cpuid(reg) ({						\
-	u64 __val;							\
-	asm("mrs_s	%0, " __stringify(SYS_ ## reg) : "=r" (__val));	\
-	__val;								\
-})
+#define read_cpuid(reg)			read_sysreg_s(SYS_ ## reg)
 
 /*
  * The CPU ID never changes at run time, so we might as well tell the
diff --git a/arch/arm64/include/asm/dcc.h b/arch/arm64/include/asm/dcc.h
index 65e0190..836b056 100644
--- a/arch/arm64/include/asm/dcc.h
+++ b/arch/arm64/include/asm/dcc.h
@@ -21,21 +21,16 @@
 #define __ASM_DCC_H
 
 #include <asm/barrier.h>
+#include <asm/sysreg.h>
 
 static inline u32 __dcc_getstatus(void)
 {
-	u32 ret;
-
-	asm volatile("mrs %0, mdccsr_el0" : "=r" (ret));
-
-	return ret;
+	return read_sysreg(mdccsr_el0);
 }
 
 static inline char __dcc_getchar(void)
 {
-	char c;
-
-	asm volatile("mrs %0, dbgdtrrx_el0" : "=r" (c));
+	char c = read_sysreg(dbgdtrrx_el0);
 	isb();
 
 	return c;
@@ -47,8 +42,7 @@
 	 * The typecast is to make absolutely certain that 'c' is
 	 * zero-extended.
 	 */
-	asm volatile("msr dbgdtrtx_el0, %0"
-			: : "r" ((unsigned long)(unsigned char)c));
+	write_sysreg((unsigned char)c, dbgdtrtx_el0);
 	isb();
 }
 
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 4b6b3f7..b71420a 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -61,8 +61,6 @@
 
 #define AARCH64_BREAK_KGDB_DYN_DBG	\
 	(AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5))
-#define KGDB_DYN_BRK_INS_BYTE(x)	\
-	((AARCH64_BREAK_KGDB_DYN_DBG >> (8 * (x))) & 0xff)
 
 #define CACHE_FLUSH_IS_SAFE		1
 
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index f772e15..d14c478 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -78,6 +78,23 @@
 
 #define ESR_ELx_IL		(UL(1) << 25)
 #define ESR_ELx_ISS_MASK	(ESR_ELx_IL - 1)
+
+/* ISS field definitions shared by different classes */
+#define ESR_ELx_WNR		(UL(1) << 6)
+
+/* Shared ISS field definitions for Data/Instruction aborts */
+#define ESR_ELx_EA		(UL(1) << 9)
+#define ESR_ELx_S1PTW		(UL(1) << 7)
+
+/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
+#define ESR_ELx_FSC		(0x3F)
+#define ESR_ELx_FSC_TYPE	(0x3C)
+#define ESR_ELx_FSC_EXTABT	(0x10)
+#define ESR_ELx_FSC_ACCESS	(0x08)
+#define ESR_ELx_FSC_FAULT	(0x04)
+#define ESR_ELx_FSC_PERM	(0x0C)
+
+/* ISS field definitions for Data Aborts */
 #define ESR_ELx_ISV		(UL(1) << 24)
 #define ESR_ELx_SAS_SHIFT	(22)
 #define ESR_ELx_SAS		(UL(3) << ESR_ELx_SAS_SHIFT)
@@ -86,16 +103,9 @@
 #define ESR_ELx_SRT_MASK	(UL(0x1F) << ESR_ELx_SRT_SHIFT)
 #define ESR_ELx_SF 		(UL(1) << 15)
 #define ESR_ELx_AR 		(UL(1) << 14)
-#define ESR_ELx_EA 		(UL(1) << 9)
 #define ESR_ELx_CM 		(UL(1) << 8)
-#define ESR_ELx_S1PTW 		(UL(1) << 7)
-#define ESR_ELx_WNR		(UL(1) << 6)
-#define ESR_ELx_FSC		(0x3F)
-#define ESR_ELx_FSC_TYPE	(0x3C)
-#define ESR_ELx_FSC_EXTABT	(0x10)
-#define ESR_ELx_FSC_ACCESS	(0x08)
-#define ESR_ELx_FSC_FAULT	(0x04)
-#define ESR_ELx_FSC_PERM	(0x0C)
+
+/* ISS field definitions for exceptions taken in to Hyp */
 #define ESR_ELx_CV		(UL(1) << 24)
 #define ESR_ELx_COND_SHIFT	(20)
 #define ESR_ELx_COND_MASK	(UL(0xF) << ESR_ELx_COND_SHIFT)
@@ -109,6 +119,62 @@
 	((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL |	\
 	 ((imm) & 0xffff))
 
+/* ISS field definitions for System instruction traps */
+#define ESR_ELx_SYS64_ISS_RES0_SHIFT	22
+#define ESR_ELx_SYS64_ISS_RES0_MASK	(UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT)
+#define ESR_ELx_SYS64_ISS_DIR_MASK	0x1
+#define ESR_ELx_SYS64_ISS_DIR_READ	0x1
+#define ESR_ELx_SYS64_ISS_DIR_WRITE	0x0
+
+#define ESR_ELx_SYS64_ISS_RT_SHIFT	5
+#define ESR_ELx_SYS64_ISS_RT_MASK	(UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT)
+#define ESR_ELx_SYS64_ISS_CRM_SHIFT	1
+#define ESR_ELx_SYS64_ISS_CRM_MASK	(UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT)
+#define ESR_ELx_SYS64_ISS_CRN_SHIFT	10
+#define ESR_ELx_SYS64_ISS_CRN_MASK	(UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP1_SHIFT	14
+#define ESR_ELx_SYS64_ISS_OP1_MASK	(UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP2_SHIFT	17
+#define ESR_ELx_SYS64_ISS_OP2_MASK	(UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT)
+#define ESR_ELx_SYS64_ISS_OP0_SHIFT	20
+#define ESR_ELx_SYS64_ISS_OP0_MASK	(UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT)
+#define ESR_ELx_SYS64_ISS_SYS_MASK	(ESR_ELx_SYS64_ISS_OP0_MASK | \
+					 ESR_ELx_SYS64_ISS_OP1_MASK | \
+					 ESR_ELx_SYS64_ISS_OP2_MASK | \
+					 ESR_ELx_SYS64_ISS_CRN_MASK | \
+					 ESR_ELx_SYS64_ISS_CRM_MASK)
+#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \
+					(((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \
+					 ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \
+					 ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \
+					 ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \
+					 ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT))
+
+#define ESR_ELx_SYS64_ISS_SYS_OP_MASK	(ESR_ELx_SYS64_ISS_SYS_MASK | \
+					 ESR_ELx_SYS64_ISS_DIR_MASK)
+/*
+ * User space cache operations have the following sysreg encoding
+ * in System instructions.
+ * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 14 }, WRITE (L=0)
+ */
+#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC	14
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU	11
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC	10
+#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU	5
+
+#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK	(ESR_ELx_SYS64_ISS_OP0_MASK | \
+						 ESR_ELx_SYS64_ISS_OP1_MASK | \
+						 ESR_ELx_SYS64_ISS_OP2_MASK | \
+						 ESR_ELx_SYS64_ISS_CRN_MASK | \
+						 ESR_ELx_SYS64_ISS_DIR_MASK)
+#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \
+				(ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \
+				 ESR_ELx_SYS64_ISS_DIR_WRITE)
+
+#define ESR_ELx_SYS64_ISS_SYS_CTR	ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0)
+#define ESR_ELx_SYS64_ISS_SYS_CTR_READ	(ESR_ELx_SYS64_ISS_SYS_CTR | \
+					 ESR_ELx_SYS64_ISS_DIR_READ)
+
 #ifndef __ASSEMBLY__
 #include <asm/types.h>
 
diff --git a/arch/arm64/include/asm/hw_breakpoint.h b/arch/arm64/include/asm/hw_breakpoint.h
index 115ea2a..9510ace 100644
--- a/arch/arm64/include/asm/hw_breakpoint.h
+++ b/arch/arm64/include/asm/hw_breakpoint.h
@@ -18,6 +18,7 @@
 
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
+#include <asm/sysreg.h>
 #include <asm/virt.h>
 
 #ifdef __KERNEL__
@@ -98,18 +99,18 @@
 #define AARCH64_DBG_REG_WCR	(AARCH64_DBG_REG_WVR + ARM_MAX_WRP)
 
 /* Debug register names. */
-#define AARCH64_DBG_REG_NAME_BVR	"bvr"
-#define AARCH64_DBG_REG_NAME_BCR	"bcr"
-#define AARCH64_DBG_REG_NAME_WVR	"wvr"
-#define AARCH64_DBG_REG_NAME_WCR	"wcr"
+#define AARCH64_DBG_REG_NAME_BVR	bvr
+#define AARCH64_DBG_REG_NAME_BCR	bcr
+#define AARCH64_DBG_REG_NAME_WVR	wvr
+#define AARCH64_DBG_REG_NAME_WCR	wcr
 
 /* Accessor macros for the debug registers. */
 #define AARCH64_DBG_READ(N, REG, VAL) do {\
-	asm volatile("mrs %0, dbg" REG #N "_el1" : "=r" (VAL));\
+	VAL = read_sysreg(dbg##REG##N##_el1);\
 } while (0)
 
 #define AARCH64_DBG_WRITE(N, REG, VAL) do {\
-	asm volatile("msr dbg" REG #N "_el1, %0" :: "r" (VAL));\
+	write_sysreg(VAL, dbg##REG##N##_el1);\
 } while (0)
 
 struct task_struct;
@@ -141,8 +142,6 @@
 }
 #endif
 
-extern struct pmu perf_ops_bp;
-
 /* Determine number of BRP registers available. */
 static inline int get_num_brps(void)
 {
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 1dbaa90..bc85366 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -246,7 +246,8 @@
 static __always_inline u32 aarch64_insn_get_##abbr##_value(void) \
 { return (val); }
 
-__AARCH64_INSN_FUNCS(adr_adrp,	0x1F000000, 0x10000000)
+__AARCH64_INSN_FUNCS(adr,	0x9F000000, 0x10000000)
+__AARCH64_INSN_FUNCS(adrp,	0x9F000000, 0x90000000)
 __AARCH64_INSN_FUNCS(prfm_lit,	0xFF000000, 0xD8000000)
 __AARCH64_INSN_FUNCS(str_reg,	0x3FE0EC00, 0x38206800)
 __AARCH64_INSN_FUNCS(ldr_reg,	0x3FE0EC00, 0x38606800)
@@ -318,6 +319,11 @@
 bool aarch64_insn_is_nop(u32 insn);
 bool aarch64_insn_is_branch_imm(u32 insn);
 
+static inline bool aarch64_insn_is_adr_adrp(u32 insn)
+{
+	return aarch64_insn_is_adr(insn) || aarch64_insn_is_adrp(insn);
+}
+
 int aarch64_insn_read(void *addr, u32 *insnp);
 int aarch64_insn_write(void *addr, u32 insn);
 enum aarch64_insn_encoding_class aarch64_get_insn_class(u32 insn);
@@ -398,6 +404,9 @@
 int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
 int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
 
+s32 aarch64_insn_adrp_get_offset(u32 insn);
+u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
+
 bool aarch32_insn_is_wide(u32 insn);
 
 #define A32_RN_OFFSET	16
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 9b6e408..0bba427 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -40,25 +40,25 @@
 #define __raw_writeb __raw_writeb
 static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
 {
-	asm volatile("strb %w0, [%1]" : : "r" (val), "r" (addr));
+	asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
 }
 
 #define __raw_writew __raw_writew
 static inline void __raw_writew(u16 val, volatile void __iomem *addr)
 {
-	asm volatile("strh %w0, [%1]" : : "r" (val), "r" (addr));
+	asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
 }
 
 #define __raw_writel __raw_writel
 static inline void __raw_writel(u32 val, volatile void __iomem *addr)
 {
-	asm volatile("str %w0, [%1]" : : "r" (val), "r" (addr));
+	asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
 }
 
 #define __raw_writeq __raw_writeq
 static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
 {
-	asm volatile("str %0, [%1]" : : "r" (val), "r" (addr));
+	asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
 }
 
 #define __raw_readb __raw_readb
@@ -184,17 +184,6 @@
 #define iowrite32be(v,p)	({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
 #define iowrite64be(v,p)	({ __iowmb(); __raw_writeq((__force __u64)cpu_to_be64(v), p); })
 
-/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
- */
-#define xlate_dev_mem_ptr(p)	__va(p)
-
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p)	p
-
 #include <asm-generic/io.h>
 
 /*
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 61b4915..1737aec 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -22,7 +22,6 @@
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
 #define MAX_INSN_SIZE			1
-#define MAX_STACK_SIZE			128
 
 #define flush_insn_slot(p)		do { } while (0)
 #define kretprobe_blacklist_size	0
@@ -47,7 +46,6 @@
 	struct prev_kprobe prev_kprobe;
 	struct kprobe_step_ctx ss_ctx;
 	struct pt_regs jprobe_saved_regs;
-	char jprobes_stack[MAX_STACK_SIZE];
 };
 
 void arch_remove_kprobe(struct kprobe *);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index b6bb834..dff1098 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -99,14 +99,10 @@
 .macro kern_hyp_va	reg
 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
 	and     \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK
-alternative_else
-	nop
-alternative_endif
-alternative_if_not ARM64_HYP_OFFSET_LOW
-	nop
-alternative_else
+alternative_else_nop_endif
+alternative_if ARM64_HYP_OFFSET_LOW
 	and     \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK
-alternative_endif
+alternative_else_nop_endif
 .endm
 
 #else
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 31b73227..ba62df8 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -214,7 +214,7 @@
 
 #ifndef CONFIG_SPARSEMEM_VMEMMAP
 #define virt_to_page(kaddr)	pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define _virt_addr_valid(kaddr)	pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
 #else
 #define __virt_to_pgoff(kaddr)	(((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page))
 #define __page_to_voff(kaddr)	(((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page))
@@ -222,11 +222,15 @@
 #define page_to_virt(page)	((void *)((__page_to_voff(page)) | PAGE_OFFSET))
 #define virt_to_page(vaddr)	((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))
 
-#define virt_addr_valid(kaddr)	pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
+#define _virt_addr_valid(kaddr)	pfn_valid((((u64)(kaddr) & ~PAGE_OFFSET) \
 					   + PHYS_OFFSET) >> PAGE_SHIFT)
 #endif
 #endif
 
+#define _virt_addr_is_linear(kaddr)	(((u64)(kaddr)) >= PAGE_OFFSET)
+#define virt_addr_valid(kaddr)		(_virt_addr_is_linear(kaddr) && \
+					 _virt_addr_valid(kaddr))
+
 #include <asm-generic/memory_model.h>
 
 #endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index b1892a0..a501853 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -27,22 +27,17 @@
 #include <asm-generic/mm_hooks.h>
 #include <asm/cputype.h>
 #include <asm/pgtable.h>
+#include <asm/sysreg.h>
 #include <asm/tlbflush.h>
 
-#ifdef CONFIG_PID_IN_CONTEXTIDR
 static inline void contextidr_thread_switch(struct task_struct *next)
 {
-	asm(
-	"	msr	contextidr_el1, %0\n"
-	"	isb"
-	:
-	: "r" (task_pid_nr(next)));
+	if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
+		return;
+
+	write_sysreg(task_pid_nr(next), contextidr_el1);
+	isb();
 }
-#else
-static inline void contextidr_thread_switch(struct task_struct *next)
-{
-}
-#endif
 
 /*
  * Set TTBR0 to empty_zero_page. No translations will be possible via TTBR0.
@@ -51,11 +46,8 @@
 {
 	unsigned long ttbr = virt_to_phys(empty_zero_page);
 
-	asm(
-	"	msr	ttbr0_el1, %0			// set TTBR0\n"
-	"	isb"
-	:
-	: "r" (ttbr));
+	write_sysreg(ttbr, ttbr0_el1);
+	isb();
 }
 
 /*
@@ -81,13 +73,11 @@
 	if (!__cpu_uses_extended_idmap())
 		return;
 
-	asm volatile (
-	"	mrs	%0, tcr_el1	;"
-	"	bfi	%0, %1, %2, %3	;"
-	"	msr	tcr_el1, %0	;"
-	"	isb"
-	: "=&r" (tcr)
-	: "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH));
+	tcr = read_sysreg(tcr_el1);
+	tcr &= ~TCR_T0SZ_MASK;
+	tcr |= t0sz << TCR_T0SZ_OFFSET;
+	write_sysreg(tcr, tcr_el1);
+	isb();
 }
 
 #define cpu_set_default_tcr_t0sz()	__cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS))
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 0a456be..2fee2f5 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -199,19 +199,19 @@
 #define _percpu_read(pcp)						\
 ({									\
 	typeof(pcp) __retval;						\
-	preempt_disable();						\
+	preempt_disable_notrace();					\
 	__retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), 	\
 					      sizeof(pcp));		\
-	preempt_enable();						\
+	preempt_enable_notrace();					\
 	__retval;							\
 })
 
 #define _percpu_write(pcp, val)						\
 do {									\
-	preempt_disable();						\
+	preempt_disable_notrace();					\
 	__percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), 	\
 				sizeof(pcp));				\
-	preempt_enable();						\
+	preempt_enable_notrace();					\
 } while(0)								\
 
 #define _pcp_protect(operation, pcp, val)			\
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index c3ae239..eb0c2bd 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -208,6 +208,7 @@
 #define TCR_T1SZ(x)		((UL(64) - (x)) << TCR_T1SZ_OFFSET)
 #define TCR_TxSZ(x)		(TCR_T0SZ(x) | TCR_T1SZ(x))
 #define TCR_TxSZ_WIDTH		6
+#define TCR_T0SZ_MASK		(((UL(1) << TCR_TxSZ_WIDTH) - 1) << TCR_T0SZ_OFFSET)
 
 #define TCR_IRGN0_SHIFT		8
 #define TCR_IRGN0_MASK		(UL(3) << TCR_IRGN0_SHIFT)
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 39f5252..2142c77 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -70,12 +70,13 @@
 #define PAGE_COPY_EXEC		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
 #define PAGE_READONLY		__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_READONLY_EXEC	__pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN)
+#define PAGE_EXECONLY		__pgprot(_PAGE_DEFAULT | PTE_NG | PTE_PXN)
 
 #define __P000  PAGE_NONE
 #define __P001  PAGE_READONLY
 #define __P010  PAGE_COPY
 #define __P011  PAGE_COPY
-#define __P100  PAGE_READONLY_EXEC
+#define __P100  PAGE_EXECONLY
 #define __P101  PAGE_READONLY_EXEC
 #define __P110  PAGE_COPY_EXEC
 #define __P111  PAGE_COPY_EXEC
@@ -84,7 +85,7 @@
 #define __S001  PAGE_READONLY
 #define __S010  PAGE_SHARED
 #define __S011  PAGE_SHARED
-#define __S100  PAGE_READONLY_EXEC
+#define __S100  PAGE_EXECONLY
 #define __S101  PAGE_READONLY_EXEC
 #define __S110  PAGE_SHARED_EXEC
 #define __S111  PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index e20bd43..ffbb9a5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -73,7 +73,7 @@
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
 #define pte_exec(pte)		(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
-#define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
+#define pte_ng(pte)		(!!(pte_val(pte) & PTE_NG))
 
 #ifdef CONFIG_ARM64_HW_AFDBM
 #define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
@@ -84,8 +84,8 @@
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
 #define pte_valid(pte)		(!!(pte_val(pte) & PTE_VALID))
-#define pte_valid_not_user(pte) \
-	((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
+#define pte_valid_global(pte) \
+	((pte_val(pte) & (PTE_VALID | PTE_NG)) == PTE_VALID)
 #define pte_valid_young(pte) \
 	((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
 
@@ -155,6 +155,16 @@
 	return clear_pte_bit(pte, __pgprot(PTE_CONT));
 }
 
+static inline pte_t pte_clear_rdonly(pte_t pte)
+{
+	return clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+}
+
+static inline pte_t pte_mkpresent(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_VALID));
+}
+
 static inline pmd_t pmd_mkcont(pmd_t pmd)
 {
 	return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
@@ -168,7 +178,7 @@
 	 * Only if the new pte is valid and kernel, otherwise TLB maintenance
 	 * or update_mmu_cache() have the necessary barriers.
 	 */
-	if (pte_valid_not_user(pte)) {
+	if (pte_valid_global(pte)) {
 		dsb(ishst);
 		isb();
 	}
@@ -202,7 +212,7 @@
 			pte_val(pte) &= ~PTE_RDONLY;
 		else
 			pte_val(pte) |= PTE_RDONLY;
-		if (pte_user(pte) && pte_exec(pte) && !pte_special(pte))
+		if (pte_ng(pte) && pte_exec(pte) && !pte_special(pte))
 			__sync_icache_dcache(pte, addr);
 	}
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ace0a96..df2e53d 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -37,7 +37,6 @@
 #include <asm/ptrace.h>
 #include <asm/types.h>
 
-#ifdef __KERNEL__
 #define STACK_TOP_MAX		TASK_SIZE_64
 #ifdef CONFIG_COMPAT
 #define AARCH32_VECTORS_BASE	0xffff0000
@@ -49,7 +48,6 @@
 
 extern phys_addr_t arm64_dma_phys_limit;
 #define ARCH_LOW_ADDRESS_LIMIT	(arm64_dma_phys_limit - 1)
-#endif /* __KERNEL__ */
 
 struct debug_info {
 	/* Have we suspended stepping by a debugger? */
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
new file mode 100644
index 0000000..4e7e706
--- /dev/null
+++ b/arch/arm64/include/asm/sections.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 ARM Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_SECTIONS_H
+#define __ASM_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char __alt_instructions[], __alt_instructions_end[];
+extern char __exception_text_start[], __exception_text_end[];
+extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
+extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
+extern char __hyp_text_start[], __hyp_text_end[];
+extern char __idmap_text_start[], __idmap_text_end[];
+extern char __irqentry_text_start[], __irqentry_text_end[];
+extern char __mmuoff_data_start[], __mmuoff_data_end[];
+
+#endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index e875a5a..cae331d 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -66,8 +66,7 @@
 	ARM64_LSE_ATOMIC_INSN(
 	/* LL/SC */
 "	stxr	%w1, %w0, %2\n"
-"	nop\n"
-"	nop\n",
+	__nops(2),
 	/* LSE atomics */
 "	mov	%w1, %w0\n"
 "	cas	%w0, %w0, %2\n"
@@ -99,9 +98,7 @@
 	/* LSE atomics */
 "	mov	%w2, %w5\n"
 "	ldadda	%w2, %w0, %3\n"
-"	nop\n"
-"	nop\n"
-"	nop\n"
+	__nops(3)
 	)
 
 	/* Did we get the lock? */
@@ -165,8 +162,8 @@
 	"	stlrh	%w1, %0",
 	/* LSE atomics */
 	"	mov	%w1, #1\n"
-	"	nop\n"
-	"	staddlh	%w1, %0")
+	"	staddlh	%w1, %0\n"
+	__nops(1))
 	: "=Q" (lock->owner), "=&r" (tmp)
 	:
 	: "memory");
@@ -212,7 +209,7 @@
 	"	cbnz	%w0, 1b\n"
 	"	stxr	%w0, %w2, %1\n"
 	"	cbnz	%w0, 2b\n"
-	"	nop",
+	__nops(1),
 	/* LSE atomics */
 	"1:	mov	%w0, wzr\n"
 	"2:	casa	%w0, %w2, %1\n"
@@ -241,8 +238,7 @@
 	/* LSE atomics */
 	"	mov	%w0, wzr\n"
 	"	casa	%w0, %w2, %1\n"
-	"	nop\n"
-	"	nop")
+	__nops(2))
 	: "=&r" (tmp), "+Q" (rw->lock)
 	: "r" (0x80000000)
 	: "memory");
@@ -290,8 +286,8 @@
 	"	add	%w0, %w0, #1\n"
 	"	tbnz	%w0, #31, 1b\n"
 	"	stxr	%w1, %w0, %2\n"
-	"	nop\n"
-	"	cbnz	%w1, 2b",
+	"	cbnz	%w1, 2b\n"
+	__nops(1),
 	/* LSE atomics */
 	"1:	wfe\n"
 	"2:	ldxr	%w0, %2\n"
@@ -317,9 +313,8 @@
 	"	cbnz	%w1, 1b",
 	/* LSE atomics */
 	"	movn	%w0, #0\n"
-	"	nop\n"
-	"	nop\n"
-	"	staddl	%w0, %2")
+	"	staddl	%w0, %2\n"
+	__nops(2))
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
 	: "memory");
@@ -344,7 +339,7 @@
 	"	tbnz	%w1, #31, 1f\n"
 	"	casa	%w0, %w1, %2\n"
 	"	sbc	%w1, %w1, %w0\n"
-	"	nop\n"
+	__nops(1)
 	"1:")
 	: "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
 	:
@@ -363,4 +358,14 @@
 #define arch_read_relax(lock)	cpu_relax()
 #define arch_write_relax(lock)	cpu_relax()
 
+/*
+ * Accesses appearing in program order before a spin_lock() operation
+ * can be reordered with accesses inside the critical section, by virtue
+ * of arch_spin_lock being constructed using acquire semantics.
+ *
+ * In cases where this is problematic (e.g. try_to_wake_up), an
+ * smp_mb__before_spinlock() can restore the required ordering.
+ */
+#define smp_mb__before_spinlock()	smp_mb()
+
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 024d623..b8a313f 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -47,4 +47,7 @@
 int arch_hibernation_header_save(void *addr, unsigned int max_size);
 int arch_hibernation_header_restore(void *addr);
 
+/* Used to resume on the CPU we hibernated on */
+int hibernate_resume_nonboot_cpu_disable(void);
+
 #endif
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index cc06794..e8d46e8 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -100,6 +100,7 @@
 /* SCTLR_EL1 specific flags. */
 #define SCTLR_EL1_UCI		(1 << 26)
 #define SCTLR_EL1_SPAN		(1 << 23)
+#define SCTLR_EL1_UCT		(1 << 15)
 #define SCTLR_EL1_SED		(1 << 8)
 #define SCTLR_EL1_CP15BEN	(1 << 5)
 
@@ -253,16 +254,6 @@
 "	.endm\n"
 );
 
-static inline void config_sctlr_el1(u32 clear, u32 set)
-{
-	u32 val;
-
-	asm volatile("mrs %0, sctlr_el1" : "=r" (val));
-	val &= ~clear;
-	val |= set;
-	asm volatile("msr sctlr_el1, %0" : : "r" (val));
-}
-
 /*
  * Unlike read_cpuid, calls to read_sysreg are never expected to be
  * optimized away or replaced with synthetic values.
@@ -273,12 +264,41 @@
 	__val;							\
 })
 
+/*
+ * The "Z" constraint normally means a zero immediate, but when combined with
+ * the "%x0" template means XZR.
+ */
 #define write_sysreg(v, r) do {					\
 	u64 __val = (u64)v;					\
-	asm volatile("msr " __stringify(r) ", %0"		\
-		     : : "r" (__val));				\
+	asm volatile("msr " __stringify(r) ", %x0"		\
+		     : : "rZ" (__val));				\
 } while (0)
 
+/*
+ * For registers without architectural names, or simply unsupported by
+ * GAS.
+ */
+#define read_sysreg_s(r) ({						\
+	u64 __val;							\
+	asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val));	\
+	__val;								\
+})
+
+#define write_sysreg_s(v, r) do {					\
+	u64 __val = (u64)v;						\
+	asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val));	\
+} while (0)
+
+static inline void config_sctlr_el1(u32 clear, u32 set)
+{
+	u32 val;
+
+	val = read_sysreg(sctlr_el1);
+	val &= ~clear;
+	val |= set;
+	write_sysreg(val, sctlr_el1);
+}
+
 #endif
 
 #endif	/* __ASM_SYSREG_H */
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 57f110b..bc81243 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -56,12 +56,6 @@
 	__show_ratelimited;						\
 })
 
-#define UDBG_UNDEFINED	(1 << 0)
-#define UDBG_SYSCALL	(1 << 1)
-#define UDBG_BADABORT	(1 << 2)
-#define UDBG_SEGV	(1 << 3)
-#define UDBG_BUS	(1 << 4)
-
 #endif	/* __ASSEMBLY__ */
 
 #endif	/* __ASM_SYSTEM_MISC_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index abd64bd..e9ea5a6 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -75,6 +75,9 @@
 
 /*
  * struct thread_info can be accessed directly via sp_el0.
+ *
+ * We don't use read_sysreg() as we want the compiler to cache the value where
+ * possible.
  */
 static inline struct thread_info *current_thread_info(void)
 {
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index b460ae2..deab523 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -25,6 +25,24 @@
 #include <asm/cputype.h>
 
 /*
+ * Raw TLBI operations.
+ *
+ * Where necessary, use the __tlbi() macro to avoid asm()
+ * boilerplate. Drivers and most kernel code should use the TLB
+ * management routines in preference to the macro below.
+ *
+ * The macro can be used as __tlbi(op) or __tlbi(op, arg), depending
+ * on whether a particular TLBI operation takes an argument or
+ * not. The macros handles invoking the asm with or without the
+ * register argument as appropriate.
+ */
+#define __TLBI_0(op, arg)		asm ("tlbi " #op)
+#define __TLBI_1(op, arg)		asm ("tlbi " #op ", %0" : : "r" (arg))
+#define __TLBI_N(op, arg, n, ...)	__TLBI_##n(op, arg)
+
+#define __tlbi(op, ...)		__TLBI_N(op, ##__VA_ARGS__, 1, 0)
+
+/*
  *	TLB Management
  *	==============
  *
@@ -66,7 +84,7 @@
 static inline void local_flush_tlb_all(void)
 {
 	dsb(nshst);
-	asm("tlbi	vmalle1");
+	__tlbi(vmalle1);
 	dsb(nsh);
 	isb();
 }
@@ -74,7 +92,7 @@
 static inline void flush_tlb_all(void)
 {
 	dsb(ishst);
-	asm("tlbi	vmalle1is");
+	__tlbi(vmalle1is);
 	dsb(ish);
 	isb();
 }
@@ -84,7 +102,7 @@
 	unsigned long asid = ASID(mm) << 48;
 
 	dsb(ishst);
-	asm("tlbi	aside1is, %0" : : "r" (asid));
+	__tlbi(aside1is, asid);
 	dsb(ish);
 }
 
@@ -94,7 +112,7 @@
 	unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48);
 
 	dsb(ishst);
-	asm("tlbi	vale1is, %0" : : "r" (addr));
+	__tlbi(vale1is, addr);
 	dsb(ish);
 }
 
@@ -122,9 +140,9 @@
 	dsb(ishst);
 	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
 		if (last_level)
-			asm("tlbi vale1is, %0" : : "r"(addr));
+			__tlbi(vale1is, addr);
 		else
-			asm("tlbi vae1is, %0" : : "r"(addr));
+			__tlbi(vae1is, addr);
 	}
 	dsb(ish);
 }
@@ -149,7 +167,7 @@
 
 	dsb(ishst);
 	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
-		asm("tlbi vaae1is, %0" : : "r"(addr));
+		__tlbi(vaae1is, addr);
 	dsb(ish);
 	isb();
 }
@@ -163,7 +181,7 @@
 {
 	unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
 
-	asm("tlbi	vae1is, %0" : : "r" (addr));
+	__tlbi(vae1is, addr);
 	dsb(ish);
 }
 
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 9cd03f3..02e9035 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -19,6 +19,7 @@
 #define __ASM_TRAP_H
 
 #include <linux/list.h>
+#include <asm/sections.h>
 
 struct pt_regs;
 
@@ -39,9 +40,6 @@
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static inline int __in_irqentry_text(unsigned long ptr)
 {
-	extern char __irqentry_text_start[];
-	extern char __irqentry_text_end[];
-
 	return ptr >= (unsigned long)&__irqentry_text_start &&
 	       ptr < (unsigned long)&__irqentry_text_end;
 }
@@ -54,8 +52,6 @@
 
 static inline int in_exception_text(unsigned long ptr)
 {
-	extern char __exception_text_start[];
-	extern char __exception_text_end[];
 	int in;
 
 	in = ptr >= (unsigned long)&__exception_text_start &&
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 1788545..fea1073 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -45,6 +45,8 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/sysreg.h>
 
 /*
  * __boot_cpu_mode records what mode CPUs were booted in.
@@ -75,10 +77,7 @@
 
 static inline bool is_kernel_in_hyp_mode(void)
 {
-	u64 el;
-
-	asm("mrs %0, CurrentEL" : "=r" (el));
-	return el == CurrentEL_EL2;
+	return read_sysreg(CurrentEL) == CurrentEL_EL2;
 }
 
 #ifdef CONFIG_ARM64_VHE
@@ -87,14 +86,6 @@
 static inline void verify_cpu_run_el(void) {}
 #endif
 
-/* The section containing the hypervisor idmap text */
-extern char __hyp_idmap_text_start[];
-extern char __hyp_idmap_text_end[];
-
-/* The section containing the hypervisor text */
-extern char __hyp_text_start[];
-extern char __hyp_text_end[];
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* ! __ASM__VIRT_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 14f7b65..7d66bba 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -10,6 +10,8 @@
 CFLAGS_REMOVE_insn.o = -pg
 CFLAGS_REMOVE_return_address.o = -pg
 
+CFLAGS_setup.o = -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
 # Object file lists.
 arm64-obj-y		:= debug-monitors.o entry.o irq.o fpsimd.o		\
 			   entry-fpsimd.o process.o ptrace.o setup.o signal.o	\
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index 3e4f1a4..252a6d9 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -24,6 +24,7 @@
 #include <linux/memblock.h>
 #include <linux/of_fdt.h>
 #include <linux/smp.h>
+#include <linux/serial_core.h>
 
 #include <asm/cputype.h>
 #include <asm/cpu_ops.h>
@@ -206,7 +207,7 @@
 	if (param_acpi_off ||
 	    (!param_acpi_on && !param_acpi_force &&
 	     of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
-		return;
+		goto done;
 
 	/*
 	 * ACPI is disabled at this point. Enable it in order to parse
@@ -226,6 +227,14 @@
 		if (!param_acpi_force)
 			disable_acpi();
 	}
+
+done:
+	if (acpi_disabled) {
+		if (earlycon_init_is_deferred)
+			early_init_dt_scan_chosen_stdout();
+	} else {
+		parse_spcr(earlycon_init_is_deferred);
+	}
 }
 
 #ifdef CONFIG_ACPI_APEI
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index f85149c..f01fab6 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -105,8 +105,10 @@
 	int ret;
 
 	ret = acpi_numa_init();
-	if (ret)
+	if (ret) {
+		pr_info("Failed to initialise from firmware\n");
 		return ret;
+	}
 
 	return srat_disabled() ? -EINVAL : 0;
 }
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index d2ee1b2..06d650f 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -25,14 +25,13 @@
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
 #include <asm/insn.h>
+#include <asm/sections.h>
 #include <linux/stop_machine.h>
 
 #define __ALT_PTR(a,f)		(u32 *)((void *)&(a)->f + (a)->f)
 #define ALT_ORIG_PTR(a)		__ALT_PTR(a, orig_offset)
 #define ALT_REPL_PTR(a)		__ALT_PTR(a, alt_offset)
 
-extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
-
 struct alt_region {
 	struct alt_instr *begin;
 	struct alt_instr *end;
@@ -59,6 +58,8 @@
 	BUG();
 }
 
+#define align_down(x, a)	((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
+
 static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr)
 {
 	u32 insn;
@@ -80,6 +81,25 @@
 			offset = target - (unsigned long)insnptr;
 			insn = aarch64_set_branch_offset(insn, offset);
 		}
+	} else if (aarch64_insn_is_adrp(insn)) {
+		s32 orig_offset, new_offset;
+		unsigned long target;
+
+		/*
+		 * If we're replacing an adrp instruction, which uses PC-relative
+		 * immediate addressing, adjust the offset to reflect the new
+		 * PC. adrp operates on 4K aligned addresses.
+		 */
+		orig_offset  = aarch64_insn_adrp_get_offset(insn);
+		target = align_down(altinsnptr, SZ_4K) + orig_offset;
+		new_offset = target - align_down(insnptr, SZ_4K);
+		insn = aarch64_insn_adrp_set_offset(insn, new_offset);
+	} else if (aarch64_insn_uses_literal(insn)) {
+		/*
+		 * Disallow patching unhandled instructions using PC relative
+		 * literal addresses
+		 */
+		BUG();
 	}
 
 	return insn;
@@ -124,8 +144,8 @@
 {
 	static int patched = 0;
 	struct alt_region region = {
-		.begin	= __alt_instructions,
-		.end	= __alt_instructions_end,
+		.begin	= (struct alt_instr *)__alt_instructions,
+		.end	= (struct alt_instr *)__alt_instructions_end,
 	};
 
 	/* We always have a CPU 0 at this point (__init) */
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 05070b7..4a2f0f0 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -23,6 +23,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/kvm_host.h>
 #include <linux/suspend.h>
+#include <asm/cpufeature.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/smp_plat.h>
@@ -145,5 +146,6 @@
   DEFINE(HIBERN_PBE_ORIG,	offsetof(struct pbe, orig_address));
   DEFINE(HIBERN_PBE_ADDR,	offsetof(struct pbe, address));
   DEFINE(HIBERN_PBE_NEXT,	offsetof(struct pbe, next));
+  DEFINE(ARM64_FTR_SYSVAL,	offsetof(struct arm64_ftr_reg, sys_val));
   return 0;
 }
diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c
index b8629d5..9617301 100644
--- a/arch/arm64/kernel/cacheinfo.c
+++ b/arch/arm64/kernel/cacheinfo.c
@@ -39,7 +39,7 @@
 
 	if (level > MAX_CACHE_LEVEL)
 		return CACHE_TYPE_NOCACHE;
-	asm volatile ("mrs     %x0, clidr_el1" : "=r" (clidr));
+	clidr = read_sysreg(clidr_el1);
 	return CLIDR_CTYPE(clidr, level);
 }
 
@@ -55,11 +55,9 @@
 
 	WARN_ON(preemptible());
 
-	/* Put value into CSSELR */
-	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+	write_sysreg(csselr, csselr_el1);
 	isb();
-	/* Read result out of CCSIDR */
-	asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr));
+	ccsidr = read_sysreg(ccsidr_el1);
 
 	return ccsidr;
 }
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 82b0fc2..0150394 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -30,6 +30,21 @@
 				       entry->midr_range_max);
 }
 
+static bool
+has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
+				int scope)
+{
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+	return (read_cpuid_cachetype() & arm64_ftr_reg_ctrel0.strict_mask) !=
+		(arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask);
+}
+
+static void cpu_enable_trap_ctr_access(void *__unused)
+{
+	/* Clear SCTLR_EL1.UCT */
+	config_sctlr_el1(SCTLR_EL1_UCT, 0);
+}
+
 #define MIDR_RANGE(model, min, max) \
 	.def_scope = SCOPE_LOCAL_CPU, \
 	.matches = is_affected_midr_range, \
@@ -108,6 +123,13 @@
 	},
 #endif
 	{
+		.desc = "Mismatched cache line size",
+		.capability = ARM64_MISMATCHED_CACHE_LINE_SIZE,
+		.matches = has_mismatched_cache_line_size,
+		.def_scope = SCOPE_LOCAL_CPU,
+		.enable = cpu_enable_trap_ctr_access,
+	},
+	{
 	}
 };
 
@@ -116,7 +138,7 @@
  * and the related information is freed soon after. If the new CPU requires
  * an errata not detected at boot, fail this CPU.
  */
-void verify_local_cpu_errata(void)
+void verify_local_cpu_errata_workarounds(void)
 {
 	const struct arm64_cpu_capabilities *caps = arm64_errata;
 
@@ -131,7 +153,7 @@
 		}
 }
 
-void check_local_cpu_errata(void)
+void update_cpu_errata_workarounds(void)
 {
 	update_cpu_capabilities(arm64_errata, "enabling workaround for");
 }
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index c7cfb8f..e137cea 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -17,6 +17,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/cache.h>
 #include <linux/errno.h>
 #include <linux/of.h>
 #include <linux/string.h>
@@ -28,7 +29,7 @@
 extern const struct cpu_operations acpi_parking_protocol_ops;
 extern const struct cpu_operations cpu_psci_ops;
 
-const struct cpu_operations *cpu_ops[NR_CPUS];
+const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
 
 static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
 	&smp_spin_table_ops,
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 62272ea..d577f26 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -46,6 +46,9 @@
 
 DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
 
+DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS);
+EXPORT_SYMBOL(cpu_hwcap_keys);
+
 #define __ARM64_FTR_BITS(SIGNED, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \
 	{						\
 		.sign = SIGNED,				\
@@ -74,7 +77,7 @@
 cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused);
 
 
-static struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),
@@ -87,7 +90,7 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0),
@@ -101,7 +104,7 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
+static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
 	S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
 	S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
@@ -119,7 +122,7 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
+static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0),
@@ -130,7 +133,7 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0),
@@ -139,7 +142,7 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_ctr[] = {
+static const struct arm64_ftr_bits ftr_ctr[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 31, 1, 1),	/* RAO */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 3, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_HIGHER_SAFE, 24, 4, 0),	/* CWG */
@@ -147,15 +150,21 @@
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 1),	/* DminLine */
 	/*
 	 * Linux can handle differing I-cache policies. Userspace JITs will
-	 * make use of *minLine
+	 * make use of *minLine.
+	 * If we have differing I-cache policies, report it as the weakest - AIVIVT.
 	 */
-	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, 0),	/* L1Ip */
+	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_EXACT, 14, 2, ICACHE_POLICY_AIVIVT),	/* L1Ip */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 10, 0),	/* RAZ */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* IminLine */
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_mmfr0[] = {
+struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = {
+	.name		= "SYS_CTR_EL0",
+	.ftr_bits	= ftr_ctr
+};
+
+static const struct arm64_ftr_bits ftr_id_mmfr0[] = {
 	S_ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 28, 4, 0xf),	/* InnerShr */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 24, 4, 0),	/* FCSE */
 	ARM64_FTR_BITS(FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0),	/* AuxReg */
@@ -167,7 +176,7 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
+static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 32, 32, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0),
@@ -178,14 +187,14 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_mvfr2[] = {
+static const struct arm64_ftr_bits ftr_mvfr2[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),	/* RAZ */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* FPMisc */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* SIMDMisc */
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_dczid[] = {
+static const struct arm64_ftr_bits ftr_dczid[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 5, 27, 0),	/* RAZ */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 1, 1),		/* DZP */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0),	/* BS */
@@ -193,7 +202,7 @@
 };
 
 
-static struct arm64_ftr_bits ftr_id_isar5[] = {
+static const struct arm64_ftr_bits ftr_id_isar5[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 20, 4, 0),	/* RAZ */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0),
@@ -204,14 +213,14 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_mmfr4[] = {
+static const struct arm64_ftr_bits ftr_id_mmfr4[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 24, 0),	/* RAZ */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 4, 4, 0),		/* ac2 */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 4, 0),		/* RAZ */
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_pfr0[] = {
+static const struct arm64_ftr_bits ftr_id_pfr0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 16, 16, 0),	/* RAZ */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 12, 4, 0),	/* State3 */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 8, 4, 0),		/* State2 */
@@ -220,7 +229,7 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_id_dfr0[] = {
+static const struct arm64_ftr_bits ftr_id_dfr0[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
 	S_ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf),	/* PerfMon */
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
@@ -238,7 +247,7 @@
  * 0. Covers the following 32bit registers:
  * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1]
  */
-static struct arm64_ftr_bits ftr_generic_32bits[] = {
+static const struct arm64_ftr_bits ftr_generic_32bits[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0),
 	ARM64_FTR_BITS(FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0),
@@ -250,29 +259,32 @@
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_generic[] = {
+static const struct arm64_ftr_bits ftr_generic[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_generic32[] = {
+static const struct arm64_ftr_bits ftr_generic32[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 32, 0),
 	ARM64_FTR_END,
 };
 
-static struct arm64_ftr_bits ftr_aa64raz[] = {
+static const struct arm64_ftr_bits ftr_aa64raz[] = {
 	ARM64_FTR_BITS(FTR_STRICT, FTR_EXACT, 0, 64, 0),
 	ARM64_FTR_END,
 };
 
-#define ARM64_FTR_REG(id, table)		\
-	{					\
-		.sys_id = id,			\
+#define ARM64_FTR_REG(id, table) {		\
+	.sys_id = id,				\
+	.reg = 	&(struct arm64_ftr_reg){	\
 		.name = #id,			\
 		.ftr_bits = &((table)[0]),	\
-	}
+	}}
 
-static struct arm64_ftr_reg arm64_ftr_regs[] = {
+static const struct __ftr_reg_entry {
+	u32			sys_id;
+	struct arm64_ftr_reg 	*reg;
+} arm64_ftr_regs[] = {
 
 	/* Op1 = 0, CRn = 0, CRm = 1 */
 	ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0),
@@ -315,7 +327,7 @@
 	ARM64_FTR_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2),
 
 	/* Op1 = 3, CRn = 0, CRm = 0 */
-	ARM64_FTR_REG(SYS_CTR_EL0, ftr_ctr),
+	{ SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 },
 	ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid),
 
 	/* Op1 = 3, CRn = 14, CRm = 0 */
@@ -324,7 +336,7 @@
 
 static int search_cmp_ftr_reg(const void *id, const void *regp)
 {
-	return (int)(unsigned long)id - (int)((const struct arm64_ftr_reg *)regp)->sys_id;
+	return (int)(unsigned long)id - (int)((const struct __ftr_reg_entry *)regp)->sys_id;
 }
 
 /*
@@ -339,14 +351,20 @@
  */
 static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id)
 {
-	return bsearch((const void *)(unsigned long)sys_id,
+	const struct __ftr_reg_entry *ret;
+
+	ret = bsearch((const void *)(unsigned long)sys_id,
 			arm64_ftr_regs,
 			ARRAY_SIZE(arm64_ftr_regs),
 			sizeof(arm64_ftr_regs[0]),
 			search_cmp_ftr_reg);
+	if (ret)
+		return ret->reg;
+	return NULL;
 }
 
-static u64 arm64_ftr_set_value(struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val)
+static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg,
+			       s64 ftr_val)
 {
 	u64 mask = arm64_ftr_mask(ftrp);
 
@@ -355,7 +373,8 @@
 	return reg;
 }
 
-static s64 arm64_ftr_safe_value(struct arm64_ftr_bits *ftrp, s64 new, s64 cur)
+static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new,
+				s64 cur)
 {
 	s64 ret = 0;
 
@@ -376,27 +395,13 @@
 	return ret;
 }
 
-static int __init sort_cmp_ftr_regs(const void *a, const void *b)
-{
-	return ((const struct arm64_ftr_reg *)a)->sys_id -
-		 ((const struct arm64_ftr_reg *)b)->sys_id;
-}
-
-static void __init swap_ftr_regs(void *a, void *b, int size)
-{
-	struct arm64_ftr_reg tmp = *(struct arm64_ftr_reg *)a;
-	*(struct arm64_ftr_reg *)a = *(struct arm64_ftr_reg *)b;
-	*(struct arm64_ftr_reg *)b = tmp;
-}
-
 static void __init sort_ftr_regs(void)
 {
-	/* Keep the array sorted so that we can do the binary search */
-	sort(arm64_ftr_regs,
-		ARRAY_SIZE(arm64_ftr_regs),
-		sizeof(arm64_ftr_regs[0]),
-		sort_cmp_ftr_regs,
-		swap_ftr_regs);
+	int i;
+
+	/* Check that the array is sorted so that we can do the binary search */
+	for (i = 1; i < ARRAY_SIZE(arm64_ftr_regs); i++)
+		BUG_ON(arm64_ftr_regs[i].sys_id < arm64_ftr_regs[i - 1].sys_id);
 }
 
 /*
@@ -407,7 +412,7 @@
 {
 	u64 val = 0;
 	u64 strict_mask = ~0x0ULL;
-	struct arm64_ftr_bits *ftrp;
+	const struct arm64_ftr_bits *ftrp;
 	struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg);
 
 	BUG_ON(!reg);
@@ -464,7 +469,7 @@
 
 static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new)
 {
-	struct arm64_ftr_bits *ftrp;
+	const struct arm64_ftr_bits *ftrp;
 
 	for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) {
 		s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val);
@@ -1004,25 +1009,35 @@
  * cannot do anything to fix it up and could cause unexpected failures. So
  * we park the CPU.
  */
-void verify_local_cpu_capabilities(void)
+static void verify_local_cpu_capabilities(void)
 {
-
-	check_early_cpu_features();
-
-	/*
-	 * If we haven't computed the system capabilities, there is nothing
-	 * to verify.
-	 */
-	if (!sys_caps_initialised)
-		return;
-
-	verify_local_cpu_errata();
+	verify_local_cpu_errata_workarounds();
 	verify_local_cpu_features(arm64_features);
 	verify_local_elf_hwcaps(arm64_elf_hwcaps);
 	if (system_supports_32bit_el0())
 		verify_local_elf_hwcaps(compat_elf_hwcaps);
 }
 
+void check_local_cpu_capabilities(void)
+{
+	/*
+	 * All secondary CPUs should conform to the early CPU features
+	 * in use by the kernel based on boot CPU.
+	 */
+	check_early_cpu_features();
+
+	/*
+	 * If we haven't finalised the system capabilities, this CPU gets
+	 * a chance to update the errata work arounds.
+	 * Otherwise, this CPU should verify that it has all the system
+	 * advertised capabilities.
+	 */
+	if (!sys_caps_initialised)
+		update_cpu_errata_workarounds();
+	else
+		verify_local_cpu_capabilities();
+}
+
 static void __init setup_feature_capabilities(void)
 {
 	update_cpu_capabilities(arm64_features, "detected feature:");
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index ed1b84f..b3d5b3e 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -363,8 +363,6 @@
 	}
 
 	cpuinfo_detect_icache_policy(info);
-
-	check_local_cpu_errata();
 }
 
 void cpuinfo_store_cpu(void)
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index 91fff48..73ae90e 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -46,16 +46,14 @@
 {
 	unsigned long flags;
 	local_dbg_save(flags);
-	asm volatile("msr mdscr_el1, %0" :: "r" (mdscr));
+	write_sysreg(mdscr, mdscr_el1);
 	local_dbg_restore(flags);
 }
 NOKPROBE_SYMBOL(mdscr_write);
 
 static u32 mdscr_read(void)
 {
-	u32 mdscr;
-	asm volatile("mrs %0, mdscr_el1" : "=r" (mdscr));
-	return mdscr;
+	return read_sysreg(mdscr_el1);
 }
 NOKPROBE_SYMBOL(mdscr_read);
 
@@ -132,36 +130,18 @@
 /*
  * OS lock clearing.
  */
-static void clear_os_lock(void *unused)
+static int clear_os_lock(unsigned int cpu)
 {
-	asm volatile("msr oslar_el1, %0" : : "r" (0));
+	write_sysreg(0, oslar_el1);
+	isb();
+	return 0;
 }
 
-static int os_lock_notify(struct notifier_block *self,
-				    unsigned long action, void *data)
-{
-	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE)
-		clear_os_lock(NULL);
-	return NOTIFY_OK;
-}
-
-static struct notifier_block os_lock_nb = {
-	.notifier_call = os_lock_notify,
-};
-
 static int debug_monitors_init(void)
 {
-	cpu_notifier_register_begin();
-
-	/* Clear the OS lock. */
-	on_each_cpu(clear_os_lock, NULL, 1);
-	isb();
-
-	/* Register hotplug handler. */
-	__register_cpu_notifier(&os_lock_nb);
-
-	cpu_notifier_register_done();
-	return 0;
+	return cpuhp_setup_state(CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
+				 "CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING",
+				 clear_os_lock, NULL);
 }
 postcore_initcall(debug_monitors_init);
 
@@ -254,7 +234,7 @@
 		return 0;
 
 	if (user_mode(regs)) {
-		send_user_sigtrap(TRAP_HWBKPT);
+		send_user_sigtrap(TRAP_TRACE);
 
 		/*
 		 * ptrace will disable single step unless explicitly
@@ -382,7 +362,7 @@
 static int __init debug_traps_init(void)
 {
 	hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP,
-			      TRAP_HWBKPT, "single-step handler");
+			      TRAP_TRACE, "single-step handler");
 	hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP,
 			      TRAP_BRKPT, "ptrace BRK handler");
 	return 0;
@@ -435,8 +415,10 @@
 /* ptrace API */
 void user_enable_single_step(struct task_struct *task)
 {
-	set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP);
-	set_regs_spsr_ss(task_pt_regs(task));
+	struct thread_info *ti = task_thread_info(task);
+
+	if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP))
+		set_regs_spsr_ss(task_pt_regs(task));
 }
 NOKPROBE_SYMBOL(user_enable_single_step);
 
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 0f03a8f..aef02d2 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -219,7 +219,7 @@
  *
  * Run ftrace_return_to_handler() before going back to parent.
  * @fp is checked against the value passed by ftrace_graph_caller()
- * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
+ * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
  */
 ENTRY(return_to_handler)
 	save_return_regs
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 96e4a2b..223d54a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -104,7 +104,7 @@
 	str	x20, [sp, #S_ORIG_ADDR_LIMIT]
 	mov	x20, #TASK_SIZE_64
 	str	x20, [tsk, #TI_ADDR_LIMIT]
-	ALTERNATIVE(nop, SET_PSTATE_UAO(0), ARM64_HAS_UAO, CONFIG_ARM64_UAO)
+	/* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */
 	.endif /* \el == 0 */
 	mrs	x22, elr_el1
 	mrs	x23, spsr_el1
@@ -150,13 +150,7 @@
 	ldr	x23, [sp, #S_SP]		// load return stack pointer
 	msr	sp_el0, x23
 #ifdef CONFIG_ARM64_ERRATUM_845719
-alternative_if_not ARM64_WORKAROUND_845719
-	nop
-	nop
-#ifdef CONFIG_PID_IN_CONTEXTIDR
-	nop
-#endif
-alternative_else
+alternative_if ARM64_WORKAROUND_845719
 	tbz	x22, #4, 1f
 #ifdef CONFIG_PID_IN_CONTEXTIDR
 	mrs	x29, contextidr_el1
@@ -165,7 +159,7 @@
 	msr contextidr_el1, xzr
 #endif
 1:
-alternative_endif
+alternative_else_nop_endif
 #endif
 	.endif
 	msr	elr_el1, x21			// set up the return data
@@ -353,6 +347,8 @@
 	lsr	x24, x1, #ESR_ELx_EC_SHIFT	// exception class
 	cmp	x24, #ESR_ELx_EC_DABT_CUR	// data abort in EL1
 	b.eq	el1_da
+	cmp	x24, #ESR_ELx_EC_IABT_CUR	// instruction abort in EL1
+	b.eq	el1_ia
 	cmp	x24, #ESR_ELx_EC_SYS64		// configurable trap
 	b.eq	el1_undef
 	cmp	x24, #ESR_ELx_EC_SP_ALIGN	// stack alignment exception
@@ -364,6 +360,11 @@
 	cmp	x24, #ESR_ELx_EC_BREAKPT_CUR	// debug exception in EL1
 	b.ge	el1_dbg
 	b	el1_inv
+
+el1_ia:
+	/*
+	 * Fall through to the Data abort case
+	 */
 el1_da:
 	/*
 	 * Data abort handling
@@ -700,18 +701,13 @@
  * Ok, we need to do extra processing, enter the slow path.
  */
 work_pending:
-	tbnz	x1, #TIF_NEED_RESCHED, work_resched
-	/* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */
 	mov	x0, sp				// 'regs'
-	enable_irq				// enable interrupts for do_notify_resume()
 	bl	do_notify_resume
-	b	ret_to_user
-work_resched:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	bl	trace_hardirqs_off		// the IRQs are off here, inform the tracing code
+	bl	trace_hardirqs_on		// enabled while in userspace
 #endif
-	bl	schedule
-
+	ldr	x1, [tsk, #TI_FLAGS]		// re-check for single-step
+	b	finish_ret_to_user
 /*
  * "slow" syscall return path.
  */
@@ -720,6 +716,7 @@
 	ldr	x1, [tsk, #TI_FLAGS]
 	and	x2, x1, #_TIF_WORK_MASK
 	cbnz	x2, work_pending
+finish_ret_to_user:
 	enable_step_tsk x1, x2
 	kernel_exit 0
 ENDPROC(ret_to_user)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 975b274..394c61d 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -299,28 +299,16 @@
 #endif /* CONFIG_CPU_PM */
 
 #ifdef CONFIG_HOTPLUG_CPU
-static int fpsimd_cpu_hotplug_notifier(struct notifier_block *nfb,
-				       unsigned long action,
-				       void *hcpu)
+static int fpsimd_cpu_dead(unsigned int cpu)
 {
-	unsigned int cpu = (long)hcpu;
-
-	switch (action) {
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		per_cpu(fpsimd_last_state, cpu) = NULL;
-		break;
-	}
-	return NOTIFY_OK;
+	per_cpu(fpsimd_last_state, cpu) = NULL;
+	return 0;
 }
 
-static struct notifier_block fpsimd_cpu_hotplug_notifier_block = {
-	.notifier_call = fpsimd_cpu_hotplug_notifier,
-};
-
 static inline void fpsimd_hotplug_init(void)
 {
-	register_cpu_notifier(&fpsimd_cpu_hotplug_notifier_block);
+	cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead",
+				  NULL, fpsimd_cpu_dead);
 }
 
 #else
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index ebecf9a..40ad08a 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -138,7 +138,7 @@
 		return;
 
 	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer);
+				       frame_pointer, NULL);
 	if (err == -EBUSY)
 		return;
 	else
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index b77f583..427f6d3 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -208,13 +208,23 @@
 
 	__INIT
 
+	/*
+	 * The following callee saved general purpose registers are used on the
+	 * primary lowlevel boot path:
+	 *
+	 *  Register   Scope                      Purpose
+	 *  x21        stext() .. start_kernel()  FDT pointer passed at boot in x0
+	 *  x23        stext() .. start_kernel()  physical misalignment/KASLR offset
+	 *  x28        __create_page_tables()     callee preserved temp register
+	 *  x19/x20    __primary_switch()         callee preserved temp registers
+	 */
 ENTRY(stext)
 	bl	preserve_boot_args
-	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
-	adrp	x24, __PHYS_OFFSET
-	and	x23, x24, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
+	bl	el2_setup			// Drop to EL1, w0=cpu_boot_mode
+	adrp	x23, __PHYS_OFFSET
+	and	x23, x23, MIN_KIMG_ALIGN - 1	// KASLR offset, defaults to 0
 	bl	set_cpu_boot_mode_flag
-	bl	__create_page_tables		// x25=TTBR0, x26=TTBR1
+	bl	__create_page_tables
 	/*
 	 * The following calls CPU setup code, see arch/arm64/mm/proc.S for
 	 * details.
@@ -222,9 +232,7 @@
 	 * the TCR will have been set.
 	 */
 	bl	__cpu_setup			// initialise processor
-	adr_l	x27, __primary_switch		// address to jump to after
-						// MMU has been enabled
-	b	__enable_mmu
+	b	__primary_switch
 ENDPROC(stext)
 
 /*
@@ -311,23 +319,21 @@
  *     been enabled
  */
 __create_page_tables:
-	adrp	x25, idmap_pg_dir
-	adrp	x26, swapper_pg_dir
 	mov	x28, lr
 
 	/*
 	 * Invalidate the idmap and swapper page tables to avoid potential
 	 * dirty cache lines being evicted.
 	 */
-	mov	x0, x25
-	add	x1, x26, #SWAPPER_DIR_SIZE
+	adrp	x0, idmap_pg_dir
+	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE
 	bl	__inval_cache_range
 
 	/*
 	 * Clear the idmap and swapper page tables.
 	 */
-	mov	x0, x25
-	add	x6, x26, #SWAPPER_DIR_SIZE
+	adrp	x0, idmap_pg_dir
+	adrp	x6, swapper_pg_dir + SWAPPER_DIR_SIZE
 1:	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
 	stp	xzr, xzr, [x0], #16
@@ -340,7 +346,7 @@
 	/*
 	 * Create the identity mapping.
 	 */
-	mov	x0, x25				// idmap_pg_dir
+	adrp	x0, idmap_pg_dir
 	adrp	x3, __idmap_text_start		// __pa(__idmap_text_start)
 
 #ifndef CONFIG_ARM64_VA_BITS_48
@@ -390,7 +396,7 @@
 	/*
 	 * Map the kernel image (starting with PHYS_OFFSET).
 	 */
-	mov	x0, x26				// swapper_pg_dir
+	adrp	x0, swapper_pg_dir
 	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
 	add	x5, x5, x23			// add KASLR displacement
 	create_pgd_entry x0, x5, x3, x6
@@ -405,8 +411,8 @@
 	 * accesses (MMU disabled), invalidate the idmap and swapper page
 	 * tables again to remove any speculatively loaded cache lines.
 	 */
-	mov	x0, x25
-	add	x1, x26, #SWAPPER_DIR_SIZE
+	adrp	x0, idmap_pg_dir
+	adrp	x1, swapper_pg_dir + SWAPPER_DIR_SIZE
 	dmb	sy
 	bl	__inval_cache_range
 
@@ -416,14 +422,27 @@
 
 /*
  * The following fragment of code is executed with the MMU enabled.
+ *
+ *   x0 = __PHYS_OFFSET
  */
-	.set	initial_sp, init_thread_union + THREAD_START_SP
 __primary_switched:
-	mov	x28, lr				// preserve LR
+	adrp	x4, init_thread_union
+	add	sp, x4, #THREAD_SIZE
+	msr	sp_el0, x4			// Save thread_info
+
 	adr_l	x8, vectors			// load VBAR_EL1 with virtual
 	msr	vbar_el1, x8			// vector table address
 	isb
 
+	stp	xzr, x30, [sp, #-16]!
+	mov	x29, sp
+
+	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
+
+	ldr_l	x4, kimage_vaddr		// Save the offset between
+	sub	x4, x4, x0			// the kernel virtual and
+	str_l	x4, kimage_voffset, x5		// physical mappings
+
 	// Clear BSS
 	adr_l	x0, __bss_start
 	mov	x1, xzr
@@ -432,17 +451,6 @@
 	bl	__pi_memset
 	dsb	ishst				// Make zero page visible to PTW
 
-	adr_l	sp, initial_sp, x4
-	mov	x4, sp
-	and	x4, x4, #~(THREAD_SIZE - 1)
-	msr	sp_el0, x4			// Save thread_info
-	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
-
-	ldr_l	x4, kimage_vaddr		// Save the offset between
-	sub	x4, x4, x24			// the kernel virtual and
-	str_l	x4, kimage_voffset, x5		// physical mappings
-
-	mov	x29, #0
 #ifdef CONFIG_KASAN
 	bl	kasan_early_init
 #endif
@@ -454,8 +462,8 @@
 	bl	kaslr_early_init		// parse FDT for KASLR options
 	cbz	x0, 0f				// KASLR disabled? just proceed
 	orr	x23, x23, x0			// record KASLR offset
-	ret	x28				// we must enable KASLR, return
-						// to __enable_mmu()
+	ldp	x29, x30, [sp], #16		// we must enable KASLR, return
+	ret					// to __primary_switch()
 0:
 #endif
 	b	start_kernel
@@ -465,7 +473,7 @@
  * end early head section, begin head code that is also used for
  * hotplug and needs to have the same protections as the text region
  */
-	.section ".text","ax"
+	.section ".idmap.text","ax"
 
 ENTRY(kimage_vaddr)
 	.quad		_text - TEXT_OFFSET
@@ -490,7 +498,7 @@
 CPU_BE(	orr	x0, x0, #(3 << 24)	)	// Set the EE and E0E bits for EL1
 CPU_LE(	bic	x0, x0, #(3 << 24)	)	// Clear the EE and E0E bits for EL1
 	msr	sctlr_el1, x0
-	mov	w20, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
+	mov	w0, #BOOT_CPU_MODE_EL1		// This cpu booted in EL1
 	isb
 	ret
 
@@ -586,7 +594,7 @@
 
 	cbz	x2, install_el2_stub
 
-	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
+	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
 	isb
 	ret
 
@@ -601,7 +609,7 @@
 		      PSR_MODE_EL1h)
 	msr	spsr_el2, x0
 	msr	elr_el2, lr
-	mov	w20, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
+	mov	w0, #BOOT_CPU_MODE_EL2		// This CPU booted in EL2
 	eret
 ENDPROC(el2_setup)
 
@@ -611,27 +619,39 @@
  */
 set_cpu_boot_mode_flag:
 	adr_l	x1, __boot_cpu_mode
-	cmp	w20, #BOOT_CPU_MODE_EL2
+	cmp	w0, #BOOT_CPU_MODE_EL2
 	b.ne	1f
 	add	x1, x1, #4
-1:	str	w20, [x1]			// This CPU has booted in EL1
+1:	str	w0, [x1]			// This CPU has booted in EL1
 	dmb	sy
 	dc	ivac, x1			// Invalidate potentially stale cache line
 	ret
 ENDPROC(set_cpu_boot_mode_flag)
 
 /*
+ * These values are written with the MMU off, but read with the MMU on.
+ * Writers will invalidate the corresponding address, discarding up to a
+ * 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
+ * sufficient alignment that the CWG doesn't overlap another section.
+ */
+	.pushsection ".mmuoff.data.write", "aw"
+/*
  * We need to find out the CPU boot mode long after boot, so we need to
  * store it in a writable variable.
  *
  * This is not in .bss, because we set it sufficiently early that the boot-time
  * zeroing of .bss would clobber it.
  */
-	.pushsection	.data..cacheline_aligned
-	.align	L1_CACHE_SHIFT
 ENTRY(__boot_cpu_mode)
 	.long	BOOT_CPU_MODE_EL2
 	.long	BOOT_CPU_MODE_EL1
+/*
+ * The booting CPU updates the failed status @__early_cpu_boot_status,
+ * with MMU turned off.
+ */
+ENTRY(__early_cpu_boot_status)
+	.long 	0
+
 	.popsection
 
 	/*
@@ -639,7 +659,7 @@
 	 * cores are held until we're ready for them to initialise.
 	 */
 ENTRY(secondary_holding_pen)
-	bl	el2_setup			// Drop to EL1, w20=cpu_boot_mode
+	bl	el2_setup			// Drop to EL1, w0=cpu_boot_mode
 	bl	set_cpu_boot_mode_flag
 	mrs	x0, mpidr_el1
 	mov_q	x1, MPIDR_HWID_BITMASK
@@ -666,12 +686,10 @@
 	/*
 	 * Common entry point for secondary CPUs.
 	 */
-	adrp	x25, idmap_pg_dir
-	adrp	x26, swapper_pg_dir
 	bl	__cpu_setup			// initialise processor
-
-	adr_l	x27, __secondary_switch		// address to jump to after enabling the MMU
-	b	__enable_mmu
+	bl	__enable_mmu
+	ldr	x8, =__secondary_switched
+	br	x8
 ENDPROC(secondary_startup)
 
 __secondary_switched:
@@ -706,33 +724,27 @@
 	dc	ivac, \tmp1			// Invalidate potentially stale cache line
 	.endm
 
-	.pushsection	.data..cacheline_aligned
-	.align	L1_CACHE_SHIFT
-ENTRY(__early_cpu_boot_status)
-	.long 	0
-	.popsection
-
 /*
  * Enable the MMU.
  *
  *  x0  = SCTLR_EL1 value for turning on the MMU.
- *  x27 = *virtual* address to jump to upon completion
  *
- * Other registers depend on the function called upon completion.
+ * Returns to the caller via x30/lr. This requires the caller to be covered
+ * by the .idmap.text section.
  *
  * Checks if the selected granule size is supported by the CPU.
  * If it isn't, park the CPU
  */
-	.section	".idmap.text", "ax"
 ENTRY(__enable_mmu)
-	mrs	x22, sctlr_el1			// preserve old SCTLR_EL1 value
 	mrs	x1, ID_AA64MMFR0_EL1
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
 	b.ne	__no_granule_support
 	update_early_cpu_boot_status 0, x1, x2
-	msr	ttbr0_el1, x25			// load TTBR0
-	msr	ttbr1_el1, x26			// load TTBR1
+	adrp	x1, idmap_pg_dir
+	adrp	x2, swapper_pg_dir
+	msr	ttbr0_el1, x1			// load TTBR0
+	msr	ttbr1_el1, x2			// load TTBR1
 	isb
 	msr	sctlr_el1, x0
 	isb
@@ -744,26 +756,7 @@
 	ic	iallu
 	dsb	nsh
 	isb
-#ifdef CONFIG_RANDOMIZE_BASE
-	mov	x19, x0				// preserve new SCTLR_EL1 value
-	blr	x27
-
-	/*
-	 * If we return here, we have a KASLR displacement in x23 which we need
-	 * to take into account by discarding the current kernel mapping and
-	 * creating a new one.
-	 */
-	msr	sctlr_el1, x22			// disable the MMU
-	isb
-	bl	__create_page_tables		// recreate kernel mapping
-
-	msr	sctlr_el1, x19			// re-enable the MMU
-	isb
-	ic	iallu				// flush instructions fetched
-	dsb	nsh				// via old mapping
-	isb
-#endif
-	br	x27
+	ret
 ENDPROC(__enable_mmu)
 
 __no_granule_support:
@@ -772,11 +765,11 @@
 1:
 	wfe
 	wfi
-	b 1b
+	b	1b
 ENDPROC(__no_granule_support)
 
-__primary_switch:
 #ifdef CONFIG_RELOCATABLE
+__relocate_kernel:
 	/*
 	 * Iterate over each entry in the relocation table, and apply the
 	 * relocations in place.
@@ -798,14 +791,46 @@
 	add	x13, x13, x23			// relocate
 	str	x13, [x11, x23]
 	b	0b
+1:	ret
+ENDPROC(__relocate_kernel)
+#endif
 
-1:
+__primary_switch:
+#ifdef CONFIG_RANDOMIZE_BASE
+	mov	x19, x0				// preserve new SCTLR_EL1 value
+	mrs	x20, sctlr_el1			// preserve old SCTLR_EL1 value
+#endif
+
+	bl	__enable_mmu
+#ifdef CONFIG_RELOCATABLE
+	bl	__relocate_kernel
+#ifdef CONFIG_RANDOMIZE_BASE
+	ldr	x8, =__primary_switched
+	adrp	x0, __PHYS_OFFSET
+	blr	x8
+
+	/*
+	 * If we return here, we have a KASLR displacement in x23 which we need
+	 * to take into account by discarding the current kernel mapping and
+	 * creating a new one.
+	 */
+	msr	sctlr_el1, x20			// disable the MMU
+	isb
+	bl	__create_page_tables		// recreate kernel mapping
+
+	tlbi	vmalle1				// Remove any stale TLB entries
+	dsb	nsh
+
+	msr	sctlr_el1, x19			// re-enable the MMU
+	isb
+	ic	iallu				// flush instructions fetched
+	dsb	nsh				// via old mapping
+	isb
+
+	bl	__relocate_kernel
+#endif
 #endif
 	ldr	x8, =__primary_switched
+	adrp	x0, __PHYS_OFFSET
 	br	x8
 ENDPROC(__primary_switch)
-
-__secondary_switch:
-	ldr	x8, =__secondary_switched
-	br	x8
-ENDPROC(__secondary_switch)
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index 46f29b6..e56d848 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -36,8 +36,8 @@
 .macro break_before_make_ttbr_switch zero_page, page_table
 	msr	ttbr1_el1, \zero_page
 	isb
-	tlbi	vmalle1is
-	dsb	ish
+	tlbi	vmalle1
+	dsb	nsh
 	msr	ttbr1_el1, \page_table
 	isb
 .endm
@@ -96,7 +96,7 @@
 
 	add	x1, x10, #PAGE_SIZE
 	/* Clean the copied page to PoU - based on flush_icache_range() */
-	dcache_line_size x2, x3
+	raw_dcache_line_size x2, x3
 	sub	x3, x2, #1
 	bic	x4, x10, x3
 2:	dc	cvau, x4	/* clean D line / unified line */
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 21ab5df..d55a7b0 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -15,9 +15,9 @@
  * License terms: GNU General Public License (GPL) version 2
  */
 #define pr_fmt(x) "hibernate: " x
+#include <linux/cpu.h>
 #include <linux/kvm_host.h>
 #include <linux/mm.h>
-#include <linux/notifier.h>
 #include <linux/pm.h>
 #include <linux/sched.h>
 #include <linux/suspend.h>
@@ -26,6 +26,7 @@
 
 #include <asm/barrier.h>
 #include <asm/cacheflush.h>
+#include <asm/cputype.h>
 #include <asm/irqflags.h>
 #include <asm/memory.h>
 #include <asm/mmu_context.h>
@@ -34,7 +35,9 @@
 #include <asm/pgtable-hwdef.h>
 #include <asm/sections.h>
 #include <asm/smp.h>
+#include <asm/smp_plat.h>
 #include <asm/suspend.h>
+#include <asm/sysreg.h>
 #include <asm/virt.h>
 
 /*
@@ -53,12 +56,6 @@
 /* Do we need to reset el2? */
 #define el2_reset_needed() (is_hyp_mode_available() && !is_kernel_in_hyp_mode())
 
-/*
- * Start/end of the hibernate exit code, this must be copied to a 'safe'
- * location in memory, and executed from there.
- */
-extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[];
-
 /* temporary el2 vectors in the __hibernate_exit_text section. */
 extern char hibernate_el2_vectors[];
 
@@ -66,6 +63,12 @@
 extern char __hyp_stub_vectors[];
 
 /*
+ * The logical cpu number we should resume on, initialised to a non-cpu
+ * number.
+ */
+static int sleep_cpu = -EINVAL;
+
+/*
  * Values that may not change over hibernate/resume. We put the build number
  * and date in here so that we guarantee not to resume with a different
  * kernel.
@@ -87,6 +90,8 @@
 	 * re-configure el2.
 	 */
 	phys_addr_t	__hyp_stub_vectors;
+
+	u64		sleep_cpu_mpidr;
 } resume_hdr;
 
 static inline void arch_hdr_invariants(struct arch_hibernate_hdr_invariants *i)
@@ -129,12 +134,22 @@
 	else
 		hdr->__hyp_stub_vectors = 0;
 
+	/* Save the mpidr of the cpu we called cpu_suspend() on... */
+	if (sleep_cpu < 0) {
+		pr_err("Failing to hibernate on an unkown CPU.\n");
+		return -ENODEV;
+	}
+	hdr->sleep_cpu_mpidr = cpu_logical_map(sleep_cpu);
+	pr_info("Hibernating on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
+		hdr->sleep_cpu_mpidr);
+
 	return 0;
 }
 EXPORT_SYMBOL(arch_hibernation_header_save);
 
 int arch_hibernation_header_restore(void *addr)
 {
+	int ret;
 	struct arch_hibernate_hdr_invariants invariants;
 	struct arch_hibernate_hdr *hdr = addr;
 
@@ -144,6 +159,24 @@
 		return -EINVAL;
 	}
 
+	sleep_cpu = get_logical_index(hdr->sleep_cpu_mpidr);
+	pr_info("Hibernated on CPU %d [mpidr:0x%llx]\n", sleep_cpu,
+		hdr->sleep_cpu_mpidr);
+	if (sleep_cpu < 0) {
+		pr_crit("Hibernated on a CPU not known to this kernel!\n");
+		sleep_cpu = -EINVAL;
+		return -EINVAL;
+	}
+	if (!cpu_online(sleep_cpu)) {
+		pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
+		ret = cpu_up(sleep_cpu);
+		if (ret) {
+			pr_err("Failed to bring hibernate-CPU up!\n");
+			sleep_cpu = -EINVAL;
+			return ret;
+		}
+	}
+
 	resume_hdr = *hdr;
 
 	return 0;
@@ -217,12 +250,22 @@
 	set_pte(pte, __pte(virt_to_phys((void *)dst) |
 			 pgprot_val(PAGE_KERNEL_EXEC)));
 
-	/* Load our new page tables */
-	asm volatile("msr	ttbr0_el1, %0;"
-		     "isb;"
-		     "tlbi	vmalle1is;"
-		     "dsb	ish;"
-		     "isb" : : "r"(virt_to_phys(pgd)));
+	/*
+	 * Load our new page tables. A strict BBM approach requires that we
+	 * ensure that TLBs are free of any entries that may overlap with the
+	 * global mappings we are about to install.
+	 *
+	 * For a real hibernate/resume cycle TTBR0 currently points to a zero
+	 * page, but TLBs may contain stale ASID-tagged entries (e.g. for EFI
+	 * runtime services), while for a userspace-driven test_resume cycle it
+	 * points to userspace page tables (and we must point it at a zero page
+	 * ourselves). Elsewhere we only (un)install the idmap with preemption
+	 * disabled, so T0SZ should be as required regardless.
+	 */
+	cpu_set_reserved_ttbr0();
+	local_flush_tlb_all();
+	write_sysreg(virt_to_phys(pgd), ttbr0_el1);
+	isb();
 
 	*phys_dst_addr = virt_to_phys((void *)dst);
 
@@ -230,6 +273,7 @@
 	return rc;
 }
 
+#define dcache_clean_range(start, end)	__flush_dcache_area(start, (end - start))
 
 int swsusp_arch_suspend(void)
 {
@@ -245,10 +289,16 @@
 	local_dbg_save(flags);
 
 	if (__cpu_suspend_enter(&state)) {
+		sleep_cpu = smp_processor_id();
 		ret = swsusp_save();
 	} else {
-		/* Clean kernel to PoC for secondary core startup */
-		__flush_dcache_area(LMADDR(KERNEL_START), KERNEL_END - KERNEL_START);
+		/* Clean kernel core startup/idle code to PoC*/
+		dcache_clean_range(__mmuoff_data_start, __mmuoff_data_end);
+		dcache_clean_range(__idmap_text_start, __idmap_text_end);
+
+		/* Clean kvm setup code to PoC? */
+		if (el2_reset_needed())
+			dcache_clean_range(__hyp_idmap_text_start, __hyp_idmap_text_end);
 
 		/*
 		 * Tell the hibernation core that we've just restored
@@ -256,6 +306,7 @@
 		 */
 		in_suspend = 0;
 
+		sleep_cpu = -EINVAL;
 		__cpu_suspend_exit();
 	}
 
@@ -264,6 +315,33 @@
 	return ret;
 }
 
+static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+{
+	pte_t pte = *src_pte;
+
+	if (pte_valid(pte)) {
+		/*
+		 * Resume will overwrite areas that may be marked
+		 * read only (code, rodata). Clear the RDONLY bit from
+		 * the temporary mappings we use during restore.
+		 */
+		set_pte(dst_pte, pte_clear_rdonly(pte));
+	} else if (debug_pagealloc_enabled() && !pte_none(pte)) {
+		/*
+		 * debug_pagealloc will removed the PTE_VALID bit if
+		 * the page isn't in use by the resume kernel. It may have
+		 * been in use by the original kernel, in which case we need
+		 * to put it back in our copy to do the restore.
+		 *
+		 * Before marking this entry valid, check the pfn should
+		 * be mapped.
+		 */
+		BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+		set_pte(dst_pte, pte_mkpresent(pte_clear_rdonly(pte)));
+	}
+}
+
 static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
 		    unsigned long end)
 {
@@ -279,13 +357,7 @@
 
 	src_pte = pte_offset_kernel(src_pmd, start);
 	do {
-		if (!pte_none(*src_pte))
-			/*
-			 * Resume will overwrite areas that may be marked
-			 * read only (code, rodata). Clear the RDONLY bit from
-			 * the temporary mappings we use during restore.
-			 */
-			set_pte(dst_pte, __pte(pte_val(*src_pte) & ~PTE_RDONLY));
+		_copy_pte(dst_pte, src_pte, addr);
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
 	return 0;
@@ -394,6 +466,38 @@
 					  void *, phys_addr_t, phys_addr_t);
 
 	/*
+	 * Restoring the memory image will overwrite the ttbr1 page tables.
+	 * Create a second copy of just the linear map, and use this when
+	 * restoring.
+	 */
+	tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
+	if (!tmp_pg_dir) {
+		pr_err("Failed to allocate memory for temporary page tables.");
+		rc = -ENOMEM;
+		goto out;
+	}
+	rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
+	if (rc)
+		goto out;
+
+	/*
+	 * Since we only copied the linear map, we need to find restore_pblist's
+	 * linear map address.
+	 */
+	lm_restore_pblist = LMADDR(restore_pblist);
+
+	/*
+	 * We need a zero page that is zero before & after resume in order to
+	 * to break before make on the ttbr1 page tables.
+	 */
+	zero_page = (void *)get_safe_page(GFP_ATOMIC);
+	if (!zero_page) {
+		pr_err("Failed to allocate zero page.");
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	/*
 	 * Locate the exit code in the bottom-but-one page, so that *NULL
 	 * still has disastrous affects.
 	 */
@@ -419,27 +523,6 @@
 	__flush_dcache_area(hibernate_exit, exit_size);
 
 	/*
-	 * Restoring the memory image will overwrite the ttbr1 page tables.
-	 * Create a second copy of just the linear map, and use this when
-	 * restoring.
-	 */
-	tmp_pg_dir = (pgd_t *)get_safe_page(GFP_ATOMIC);
-	if (!tmp_pg_dir) {
-		pr_err("Failed to allocate memory for temporary page tables.");
-		rc = -ENOMEM;
-		goto out;
-	}
-	rc = copy_page_tables(tmp_pg_dir, PAGE_OFFSET, 0);
-	if (rc)
-		goto out;
-
-	/*
-	 * Since we only copied the linear map, we need to find restore_pblist's
-	 * linear map address.
-	 */
-	lm_restore_pblist = LMADDR(restore_pblist);
-
-	/*
 	 * KASLR will cause the el2 vectors to be in a different location in
 	 * the resumed kernel. Load hibernate's temporary copy into el2.
 	 *
@@ -453,12 +536,6 @@
 		__hyp_set_vectors(el2_vectors);
 	}
 
-	/*
-	 * We need a zero page that is zero before & after resume in order to
-	 * to break before make on the ttbr1 page tables.
-	 */
-	zero_page = (void *)get_safe_page(GFP_ATOMIC);
-
 	hibernate_exit(virt_to_phys(tmp_pg_dir), resume_hdr.ttbr1_el1,
 		       resume_hdr.reenter_kernel, lm_restore_pblist,
 		       resume_hdr.__hyp_stub_vectors, virt_to_phys(zero_page));
@@ -467,27 +544,12 @@
 	return rc;
 }
 
-static int check_boot_cpu_online_pm_callback(struct notifier_block *nb,
-					     unsigned long action, void *ptr)
+int hibernate_resume_nonboot_cpu_disable(void)
 {
-	if (action == PM_HIBERNATION_PREPARE &&
-	     cpumask_first(cpu_online_mask) != 0) {
-		pr_warn("CPU0 is offline.\n");
-		return notifier_from_errno(-ENODEV);
+	if (sleep_cpu < 0) {
+		pr_err("Failing to resume from hibernate on an unkown CPU.\n");
+		return -ENODEV;
 	}
 
-	return NOTIFY_OK;
+	return freeze_secondary_cpus(sleep_cpu);
 }
-
-static int __init check_boot_cpu_online_init(void)
-{
-	/*
-	 * Set this pm_notifier callback with a lower priority than
-	 * cpu_hotplug_pm_callback, so that cpu_hotplug_pm_callback will be
-	 * called earlier to disable cpu hotplug before the cpu online check.
-	 */
-	pm_notifier(check_boot_cpu_online_pm_callback, -INT_MAX);
-
-	return 0;
-}
-core_initcall(check_boot_cpu_online_init);
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 26a6bf7..948b7314 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -857,7 +857,7 @@
 /*
  * CPU initialisation.
  */
-static void hw_breakpoint_reset(void *unused)
+static int hw_breakpoint_reset(unsigned int cpu)
 {
 	int i;
 	struct perf_event **slots;
@@ -888,28 +888,14 @@
 			write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL);
 		}
 	}
-}
 
-static int hw_breakpoint_reset_notify(struct notifier_block *self,
-						unsigned long action,
-						void *hcpu)
-{
-	if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) {
-		local_irq_disable();
-		hw_breakpoint_reset(NULL);
-		local_irq_enable();
-	}
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block hw_breakpoint_reset_nb = {
-	.notifier_call = hw_breakpoint_reset_notify,
-};
-
 #ifdef CONFIG_CPU_PM
-extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *));
+extern void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int));
 #else
-static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
+static inline void cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
 {
 }
 #endif
@@ -919,36 +905,34 @@
  */
 static int __init arch_hw_breakpoint_init(void)
 {
+	int ret;
+
 	core_num_brps = get_num_brps();
 	core_num_wrps = get_num_wrps();
 
 	pr_info("found %d breakpoint and %d watchpoint registers.\n",
 		core_num_brps, core_num_wrps);
 
-	cpu_notifier_register_begin();
-
-	/*
-	 * Reset the breakpoint resources. We assume that a halting
-	 * debugger will leave the world in a nice state for us.
-	 */
-	smp_call_function(hw_breakpoint_reset, NULL, 1);
-	hw_breakpoint_reset(NULL);
-
 	/* Register debug fault handlers. */
 	hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP,
 			      TRAP_HWBKPT, "hw-breakpoint handler");
 	hook_debug_fault_code(DBG_ESR_EVT_HWWP, watchpoint_handler, SIGTRAP,
 			      TRAP_HWBKPT, "hw-watchpoint handler");
 
-	/* Register hotplug notifier. */
-	__register_cpu_notifier(&hw_breakpoint_reset_nb);
-
-	cpu_notifier_register_done();
+	/*
+	 * Reset the breakpoint resources. We assume that a halting
+	 * debugger will leave the world in a nice state for us.
+	 */
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
+			  "CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING",
+			  hw_breakpoint_reset, NULL);
+	if (ret)
+		pr_err("failed to register CPU hotplug notifier: %d\n", ret);
 
 	/* Register cpu_suspend hw breakpoint restore hook */
 	cpu_suspend_set_dbg_restorer(hw_breakpoint_reset);
 
-	return 0;
+	return ret;
 }
 arch_initcall(arch_hw_breakpoint_init);
 
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 63f9432..6f2ac4f 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -96,7 +96,7 @@
 
 	if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX))
 		page = vmalloc_to_page(addr);
-	else if (!module && IS_ENABLED(CONFIG_DEBUG_RODATA))
+	else if (!module)
 		page = pfn_to_page(PHYS_PFN(__pa(addr)));
 	else
 		return addr;
@@ -1202,6 +1202,19 @@
 	BUG();
 }
 
+s32 aarch64_insn_adrp_get_offset(u32 insn)
+{
+	BUG_ON(!aarch64_insn_is_adrp(insn));
+	return aarch64_insn_decode_immediate(AARCH64_INSN_IMM_ADR, insn) << 12;
+}
+
+u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset)
+{
+	BUG_ON(!aarch64_insn_is_adrp(insn));
+	return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn,
+						offset >> 12);
+}
+
 /*
  * Extract the Op/CR data from a msr/mrs instruction.
  */
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index b054691..769f24e 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/cache.h>
 #include <linux/crc32.h>
 #include <linux/init.h>
 #include <linux/libfdt.h>
@@ -20,7 +21,7 @@
 #include <asm/pgtable.h>
 #include <asm/sections.h>
 
-u64 __read_mostly module_alloc_base;
+u64 __ro_after_init module_alloc_base;
 u16 __initdata memstart_offset_seed;
 
 static __init u64 get_kaslr_seed(void *fdt)
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 8c57f64..e017a94 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -19,10 +19,13 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/bug.h>
 #include <linux/irq.h>
 #include <linux/kdebug.h>
 #include <linux/kgdb.h>
 #include <linux/kprobes.h>
+#include <asm/debug-monitors.h>
+#include <asm/insn.h>
 #include <asm/traps.h>
 
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
@@ -338,15 +341,24 @@
 	unregister_die_notifier(&kgdb_notifier);
 }
 
-/*
- * ARM instructions are always in LE.
- * Break instruction is encoded in LE format
- */
-struct kgdb_arch arch_kgdb_ops = {
-	.gdb_bpt_instr = {
-		KGDB_DYN_BRK_INS_BYTE(0),
-		KGDB_DYN_BRK_INS_BYTE(1),
-		KGDB_DYN_BRK_INS_BYTE(2),
-		KGDB_DYN_BRK_INS_BYTE(3),
-	}
-};
+struct kgdb_arch arch_kgdb_ops;
+
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+	int err;
+
+	BUILD_BUG_ON(AARCH64_INSN_SIZE != BREAK_INSTR_SIZE);
+
+	err = aarch64_insn_read((void *)bpt->bpt_addr, (u32 *)bpt->saved_instr);
+	if (err)
+		return err;
+
+	return aarch64_insn_write((void *)bpt->bpt_addr,
+			(u32)AARCH64_BREAK_KGDB_DYN_DBG);
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+	return aarch64_insn_write((void *)bpt->bpt_addr,
+			*(u32 *)bpt->saved_instr);
+}
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 838ccf1..a9310a6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -24,6 +24,7 @@
 #include <asm/sysreg.h>
 #include <asm/virt.h>
 
+#include <linux/acpi.h>
 #include <linux/of.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
@@ -190,13 +191,23 @@
 #define ARMV8_THUNDER_PERFCTR_L1I_CACHE_PREF_MISS		0xED
 
 /* PMUv3 HW events mapping. */
+
+/*
+ * ARMv8 Architectural defined events, not all of these may
+ * be supported on any given implementation. Undefined events will
+ * be disabled at run-time.
+ */
 static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = {
 	PERF_MAP_ALL_UNSUPPORTED,
 	[PERF_COUNT_HW_CPU_CYCLES]		= ARMV8_PMUV3_PERFCTR_CPU_CYCLES,
 	[PERF_COUNT_HW_INSTRUCTIONS]		= ARMV8_PMUV3_PERFCTR_INST_RETIRED,
 	[PERF_COUNT_HW_CACHE_REFERENCES]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
 	[PERF_COUNT_HW_CACHE_MISSES]		= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED,
 	[PERF_COUNT_HW_BRANCH_MISSES]		= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
+	[PERF_COUNT_HW_BUS_CYCLES]		= ARMV8_PMUV3_PERFCTR_BUS_CYCLES,
+	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= ARMV8_PMUV3_PERFCTR_STALL_FRONTEND,
+	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= ARMV8_PMUV3_PERFCTR_STALL_BACKEND,
 };
 
 /* ARM Cortex-A53 HW events mapping. */
@@ -258,6 +269,15 @@
 	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE,
 	[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL,
 
+	[C(L1I)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE,
+	[C(L1I)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL,
+
+	[C(DTLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL,
+	[C(DTLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1D_TLB,
+
+	[C(ITLB)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL,
+	[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_L1I_TLB,
+
 	[C(BPU)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
 	[C(BPU)][C(OP_READ)][C(RESULT_MISS)]	= ARMV8_PMUV3_PERFCTR_BR_MIS_PRED,
 	[C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_PMUV3_PERFCTR_BR_PRED,
@@ -523,12 +543,6 @@
 	.attrs = armv8_pmuv3_format_attrs,
 };
 
-static const struct attribute_group *armv8_pmuv3_attr_groups[] = {
-	&armv8_pmuv3_events_attr_group,
-	&armv8_pmuv3_format_attr_group,
-	NULL,
-};
-
 /*
  * Perf Events' indices
  */
@@ -905,9 +919,22 @@
 
 static int armv8_pmuv3_map_event(struct perf_event *event)
 {
-	return armpmu_map_event(event, &armv8_pmuv3_perf_map,
-				&armv8_pmuv3_perf_cache_map,
-				ARMV8_PMU_EVTYPE_EVENT);
+	int hw_event_id;
+	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
+
+	hw_event_id = armpmu_map_event(event, &armv8_pmuv3_perf_map,
+				       &armv8_pmuv3_perf_cache_map,
+				       ARMV8_PMU_EVTYPE_EVENT);
+	if (hw_event_id < 0)
+		return hw_event_id;
+
+	/* disable micro/arch events not supported by this PMU */
+	if ((hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) &&
+		!test_bit(hw_event_id, armpmu->pmceid_bitmap)) {
+			return -EOPNOTSUPP;
+	}
+
+	return hw_event_id;
 }
 
 static int armv8_a53_map_event(struct perf_event *event)
@@ -985,7 +1012,10 @@
 	armv8_pmu_init(cpu_pmu);
 	cpu_pmu->name			= "armv8_pmuv3";
 	cpu_pmu->map_event		= armv8_pmuv3_map_event;
-	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
 	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
@@ -994,7 +1024,10 @@
 	armv8_pmu_init(cpu_pmu);
 	cpu_pmu->name			= "armv8_cortex_a53";
 	cpu_pmu->map_event		= armv8_a53_map_event;
-	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
 	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
@@ -1003,7 +1036,10 @@
 	armv8_pmu_init(cpu_pmu);
 	cpu_pmu->name			= "armv8_cortex_a57";
 	cpu_pmu->map_event		= armv8_a57_map_event;
-	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
 	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
@@ -1012,7 +1048,10 @@
 	armv8_pmu_init(cpu_pmu);
 	cpu_pmu->name			= "armv8_cortex_a72";
 	cpu_pmu->map_event		= armv8_a57_map_event;
-	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
 	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
@@ -1021,7 +1060,10 @@
 	armv8_pmu_init(cpu_pmu);
 	cpu_pmu->name			= "armv8_cavium_thunder";
 	cpu_pmu->map_event		= armv8_thunder_map_event;
-	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
 	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
@@ -1030,7 +1072,10 @@
 	armv8_pmu_init(cpu_pmu);
 	cpu_pmu->name			= "armv8_brcm_vulcan";
 	cpu_pmu->map_event		= armv8_vulcan_map_event;
-	cpu_pmu->pmu.attr_groups	= armv8_pmuv3_attr_groups;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] =
+		&armv8_pmuv3_events_attr_group;
+	cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] =
+		&armv8_pmuv3_format_attr_group;
 	return armv8pmu_probe_pmu(cpu_pmu);
 }
 
@@ -1044,21 +1089,32 @@
 	{},
 };
 
+/*
+ * Non DT systems have their micro/arch events probed at run-time.
+ * A fairly complete list of generic events are provided and ones that
+ * aren't supported by the current PMU are disabled.
+ */
+static const struct pmu_probe_info armv8_pmu_probe_table[] = {
+	PMU_PROBE(0, 0, armv8_pmuv3_init), /* enable all defined counters */
+	{ /* sentinel value */ }
+};
+
 static int armv8_pmu_device_probe(struct platform_device *pdev)
 {
-	return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids, NULL);
+	if (acpi_disabled)
+		return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
+					    NULL);
+
+	return arm_pmu_device_probe(pdev, armv8_pmu_of_device_ids,
+				    armv8_pmu_probe_table);
 }
 
 static struct platform_driver armv8_pmu_driver = {
 	.driver		= {
-		.name	= "armv8-pmu",
+		.name	= ARMV8_PMU_PDEV_NAME,
 		.of_match_table = armv8_pmu_of_device_ids,
 	},
 	.probe		= armv8_pmu_device_probe,
 };
 
-static int __init register_armv8_pmu_driver(void)
-{
-	return platform_driver_register(&armv8_pmu_driver);
-}
-device_initcall(register_armv8_pmu_driver);
+builtin_platform_driver(armv8_pmu_driver);
diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c
index 37e47a9..d1731bf 100644
--- a/arch/arm64/kernel/probes/decode-insn.c
+++ b/arch/arm64/kernel/probes/decode-insn.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
 #include <linux/module.h>
+#include <linux/kallsyms.h>
 #include <asm/kprobes.h>
 #include <asm/insn.h>
 #include <asm/sections.h>
@@ -122,7 +123,7 @@
 static bool __kprobes
 is_probed_address_atomic(kprobe_opcode_t *scan_start, kprobe_opcode_t *scan_end)
 {
-	while (scan_start > scan_end) {
+	while (scan_start >= scan_end) {
 		/*
 		 * atomic region starts from exclusive load and ends with
 		 * exclusive store.
@@ -142,33 +143,30 @@
 {
 	enum kprobe_insn decoded;
 	kprobe_opcode_t insn = le32_to_cpu(*addr);
-	kprobe_opcode_t *scan_start = addr - 1;
-	kprobe_opcode_t *scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
-#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
-	struct module *mod;
-#endif
+	kprobe_opcode_t *scan_end = NULL;
+	unsigned long size = 0, offset = 0;
 
-	if (addr >= (kprobe_opcode_t *)_text &&
-	    scan_end < (kprobe_opcode_t *)_text)
-		scan_end = (kprobe_opcode_t *)_text;
-#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
-	else {
-		preempt_disable();
-		mod = __module_address((unsigned long)addr);
-		if (mod && within_module_init((unsigned long)addr, mod) &&
-			!within_module_init((unsigned long)scan_end, mod))
-			scan_end = (kprobe_opcode_t *)mod->init_layout.base;
-		else if (mod && within_module_core((unsigned long)addr, mod) &&
-			!within_module_core((unsigned long)scan_end, mod))
-			scan_end = (kprobe_opcode_t *)mod->core_layout.base;
-		preempt_enable();
+	/*
+	 * If there's a symbol defined in front of and near enough to
+	 * the probe address assume it is the entry point to this
+	 * code and use it to further limit how far back we search
+	 * when determining if we're in an atomic sequence. If we could
+	 * not find any symbol skip the atomic test altogether as we
+	 * could otherwise end up searching irrelevant text/literals.
+	 * KPROBES depends on KALLSYMS so this last case should never
+	 * happen.
+	 */
+	if (kallsyms_lookup_size_offset((unsigned long) addr, &size, &offset)) {
+		if (offset < (MAX_ATOMIC_CONTEXT_SIZE*sizeof(kprobe_opcode_t)))
+			scan_end = addr - (offset / sizeof(kprobe_opcode_t));
+		else
+			scan_end = addr - MAX_ATOMIC_CONTEXT_SIZE;
 	}
-#endif
 	decoded = arm_probe_decode_insn(insn, asi);
 
-	if (decoded == INSN_REJECTED ||
-			is_probed_address_atomic(scan_start, scan_end))
-		return INSN_REJECTED;
+	if (decoded != INSN_REJECTED && scan_end)
+		if (is_probed_address_atomic(addr - 1, scan_end))
+			return INSN_REJECTED;
 
 	return decoded;
 }
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index bf97685..f5077ea 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -19,7 +19,7 @@
 #include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/kprobes.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
 #include <linux/stringify.h>
@@ -31,7 +31,7 @@
 #include <asm/insn.h>
 #include <asm/uaccess.h>
 #include <asm/irq.h>
-#include <asm-generic/sections.h>
+#include <asm/sections.h>
 
 #include "decode-insn.h"
 
@@ -41,18 +41,6 @@
 static void __kprobes
 post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
 
-static inline unsigned long min_stack_size(unsigned long addr)
-{
-	unsigned long size;
-
-	if (on_irq_stack(addr, raw_smp_processor_id()))
-		size = IRQ_STACK_PTR(raw_smp_processor_id()) - addr;
-	else
-		size = (unsigned long)current_thread_info() + THREAD_START_SP - addr;
-
-	return min(size, FIELD_SIZEOF(struct kprobe_ctlblk, jprobes_stack));
-}
-
 static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 {
 	/* prepare insn slot */
@@ -178,13 +166,18 @@
 }
 
 /*
- * The D-flag (Debug mask) is set (masked) upon debug exception entry.
- * Kprobes needs to clear (unmask) D-flag -ONLY- in case of recursive
- * probe i.e. when probe hit from kprobe handler context upon
- * executing the pre/post handlers. In this case we return with
- * D-flag clear so that single-stepping can be carried-out.
- *
- * Leave D-flag set in all other cases.
+ * When PSTATE.D is set (masked), then software step exceptions can not be
+ * generated.
+ * SPSR's D bit shows the value of PSTATE.D immediately before the
+ * exception was taken. PSTATE.D is set while entering into any exception
+ * mode, however software clears it for any normal (none-debug-exception)
+ * mode in the exception entry. Therefore, when we are entering into kprobe
+ * breakpoint handler from any normal mode then SPSR.D bit is already
+ * cleared, however it is set when we are entering from any debug exception
+ * mode.
+ * Since we always need to generate single step exception after a kprobe
+ * breakpoint exception therefore we need to clear it unconditionally, when
+ * we become sure that the current breakpoint exception is for kprobe.
  */
 static void __kprobes
 spsr_set_debug_flag(struct pt_regs *regs, int mask)
@@ -257,10 +250,7 @@
 
 		set_ss_context(kcb, slot);	/* mark pending ss */
 
-		if (kcb->kprobe_status == KPROBE_REENTER)
-			spsr_set_debug_flag(regs, 0);
-		else
-			WARN_ON(regs->pstate & PSR_D_BIT);
+		spsr_set_debug_flag(regs, 0);
 
 		/* IRQs and single stepping do not mix well. */
 		kprobes_save_local_irqflag(kcb, regs);
@@ -345,8 +335,6 @@
 			BUG();
 
 		kernel_disable_single_step();
-		if (kcb->kprobe_status == KPROBE_REENTER)
-			spsr_set_debug_flag(regs, 1);
 
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
@@ -469,9 +457,6 @@
 		kprobes_restore_local_irqflag(kcb, regs);
 		kernel_disable_single_step();
 
-		if (kcb->kprobe_status == KPROBE_REENTER)
-			spsr_set_debug_flag(regs, 1);
-
 		post_kprobe_handler(kcb, regs);
 	}
 
@@ -489,20 +474,15 @@
 {
 	struct jprobe *jp = container_of(p, struct jprobe, kp);
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	long stack_ptr = kernel_stack_pointer(regs);
 
 	kcb->jprobe_saved_regs = *regs;
 	/*
-	 * As Linus pointed out, gcc assumes that the callee
-	 * owns the argument space and could overwrite it, e.g.
-	 * tailcall optimization. So, to be absolutely safe
-	 * we also save and restore enough stack bytes to cover
-	 * the argument area.
+	 * Since we can't be sure where in the stack frame "stacked"
+	 * pass-by-value arguments are stored we just don't try to
+	 * duplicate any of the stack. Do not use jprobes on functions that
+	 * use more than 64 bytes (after padding each to an 8 byte boundary)
+	 * of arguments, or pass individual arguments larger than 16 bytes.
 	 */
-	kasan_disable_current();
-	memcpy(kcb->jprobes_stack, (void *)stack_ptr,
-	       min_stack_size(stack_ptr));
-	kasan_enable_current();
 
 	instruction_pointer_set(regs, (unsigned long) jp->entry);
 	preempt_disable();
@@ -554,19 +534,12 @@
 	}
 	unpause_graph_tracing();
 	*regs = kcb->jprobe_saved_regs;
-	kasan_disable_current();
-	memcpy((void *)stack_addr, kcb->jprobes_stack,
-	       min_stack_size(stack_addr));
-	kasan_enable_current();
 	preempt_enable_no_resched();
 	return 1;
 }
 
 bool arch_within_kprobe_blacklist(unsigned long addr)
 {
-	extern char __idmap_text_start[], __idmap_text_end[];
-	extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[];
-
 	if ((addr >= (unsigned long)__kprobes_text_start &&
 	    addr < (unsigned long)__kprobes_text_end) ||
 	    (addr >= (unsigned long)__entry_text_start &&
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6cd2612..a4f5f76 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -202,7 +202,7 @@
 
 static void tls_thread_flush(void)
 {
-	asm ("msr tpidr_el0, xzr");
+	write_sysreg(0, tpidr_el0);
 
 	if (is_compat_task()) {
 		current->thread.tp_value = 0;
@@ -213,7 +213,7 @@
 		 * with a stale shadow state during context switch.
 		 */
 		barrier();
-		asm ("msr tpidrro_el0, xzr");
+		write_sysreg(0, tpidrro_el0);
 	}
 }
 
@@ -253,7 +253,7 @@
 		 * Read the current TLS pointer from tpidr_el0 as it may be
 		 * out-of-sync with the saved value.
 		 */
-		asm("mrs %0, tpidr_el0" : "=r" (*task_user_tls(p)));
+		*task_user_tls(p) = read_sysreg(tpidr_el0);
 
 		if (stack_start) {
 			if (is_compat_thread(task_thread_info(p)))
@@ -289,17 +289,15 @@
 {
 	unsigned long tpidr, tpidrro;
 
-	asm("mrs %0, tpidr_el0" : "=r" (tpidr));
+	tpidr = read_sysreg(tpidr_el0);
 	*task_user_tls(current) = tpidr;
 
 	tpidr = *task_user_tls(next);
 	tpidrro = is_compat_thread(task_thread_info(next)) ?
 		  next->thread.tp_value : 0;
 
-	asm(
-	"	msr	tpidr_el0, %0\n"
-	"	msr	tpidrro_el0, %1"
-	: : "r" (tpidr), "r" (tpidrro));
+	write_sysreg(tpidr, tpidr_el0);
+	write_sysreg(tpidrro, tpidrro_el0);
 }
 
 /* Restore the UAO state depending on next's addr_limit */
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
index 51b73cd..ce704a4 100644
--- a/arch/arm64/kernel/relocate_kernel.S
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -34,7 +34,7 @@
 	/* Setup the list loop variables. */
 	mov	x17, x1				/* x17 = kimage_start */
 	mov	x16, x0				/* x16 = kimage_head */
-	dcache_line_size x15, x0		/* x15 = dcache line size */
+	raw_dcache_line_size x15, x0		/* x15 = dcache line size */
 	mov	x14, xzr			/* x14 = entry ptr */
 	mov	x13, xzr			/* x13 = copy dest */
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 536dce2..f534f49 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -206,10 +206,15 @@
 
 	for_each_memblock(memory, region) {
 		res = alloc_bootmem_low(sizeof(*res));
-		res->name  = "System RAM";
+		if (memblock_is_nomap(region)) {
+			res->name  = "reserved";
+			res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		} else {
+			res->name  = "System RAM";
+			res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+		}
 		res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
 		res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
-		res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
 		request_resource(&iomem_resource, res);
 
@@ -228,7 +233,7 @@
 {
 	pr_info("Boot CPU: AArch64 Processor [%08x]\n", read_cpuid_id());
 
-	sprintf(init_utsname()->machine, ELF_PLATFORM);
+	sprintf(init_utsname()->machine, UTS_MACHINE);
 	init_mm.start_code = (unsigned long) _text;
 	init_mm.end_code   = (unsigned long) _etext;
 	init_mm.end_data   = (unsigned long) _edata;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index a8eafdb..404dd67 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -402,15 +402,31 @@
 asmlinkage void do_notify_resume(struct pt_regs *regs,
 				 unsigned int thread_flags)
 {
-	if (thread_flags & _TIF_SIGPENDING)
-		do_signal(regs);
+	/*
+	 * The assembly code enters us with IRQs off, but it hasn't
+	 * informed the tracing code of that for efficiency reasons.
+	 * Update the trace code with the current status.
+	 */
+	trace_hardirqs_off();
+	do {
+		if (thread_flags & _TIF_NEED_RESCHED) {
+			schedule();
+		} else {
+			local_irq_enable();
 
-	if (thread_flags & _TIF_NOTIFY_RESUME) {
-		clear_thread_flag(TIF_NOTIFY_RESUME);
-		tracehook_notify_resume(regs);
-	}
+			if (thread_flags & _TIF_SIGPENDING)
+				do_signal(regs);
 
-	if (thread_flags & _TIF_FOREIGN_FPSTATE)
-		fpsimd_restore_current_state();
+			if (thread_flags & _TIF_NOTIFY_RESUME) {
+				clear_thread_flag(TIF_NOTIFY_RESUME);
+				tracehook_notify_resume(regs);
+			}
 
+			if (thread_flags & _TIF_FOREIGN_FPSTATE)
+				fpsimd_restore_current_state();
+		}
+
+		local_irq_disable();
+		thread_flags = READ_ONCE(current_thread_info()->flags);
+	} while (thread_flags & _TIF_WORK_MASK);
 }
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index 9a3aec9..b8799e7 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -73,10 +73,9 @@
 	str	x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
 
 	/* find the mpidr_hash */
-	ldr	x1, =sleep_save_stash
-	ldr	x1, [x1]
+	ldr_l	x1, sleep_save_stash
 	mrs	x7, mpidr_el1
-	ldr	x9, =mpidr_hash
+	adr_l	x9, mpidr_hash
 	ldr	x10, [x9, #MPIDR_HASH_MASK]
 	/*
 	 * Following code relies on the struct mpidr_hash
@@ -95,28 +94,30 @@
 	mov	x0, #1
 	ret
 ENDPROC(__cpu_suspend_enter)
-	.ltorg
 
+	.pushsection ".idmap.text", "ax"
 ENTRY(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
+	bl	__cpu_setup
 	/* enable the MMU early - so we can access sleep_save_stash by va */
-	adr_l	lr, __enable_mmu	/* __cpu_setup will return here */
-	ldr	x27, =_cpu_resume	/* __enable_mmu will branch here */
-	adrp	x25, idmap_pg_dir
-	adrp	x26, swapper_pg_dir
-	b	__cpu_setup
+	bl	__enable_mmu
+	ldr	x8, =_cpu_resume
+	br	x8
 ENDPROC(cpu_resume)
+	.ltorg
+	.popsection
 
 ENTRY(_cpu_resume)
 	mrs	x1, mpidr_el1
-	adrp	x8, mpidr_hash
-	add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
-        /* retrieve mpidr_hash members to compute the hash */
+	adr_l	x8, mpidr_hash		// x8 = struct mpidr_hash virt address
+
+	/* retrieve mpidr_hash members to compute the hash */
 	ldr	x2, [x8, #MPIDR_HASH_MASK]
 	ldp	w3, w4, [x8, #MPIDR_HASH_SHIFTS]
 	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
 	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
-        /* x7 contains hash index, let's use it to grab context pointer */
+
+	/* x7 contains hash index, let's use it to grab context pointer */
 	ldr_l	x0, sleep_save_stash
 	ldr	x0, [x0, x7, lsl #3]
 	add	x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 76a6d92..d3f151c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -201,12 +201,6 @@
 	return ret;
 }
 
-static void smp_store_cpu_info(unsigned int cpuid)
-{
-	store_cpu_topology(cpuid);
-	numa_store_cpu_info(cpuid);
-}
-
 /*
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack, but a set of temporary page tables.
@@ -239,7 +233,7 @@
 	 * this CPU ticks all of those. If it doesn't, the CPU will
 	 * fail to come online.
 	 */
-	verify_local_cpu_capabilities();
+	check_local_cpu_capabilities();
 
 	if (cpu_ops[cpu]->cpu_postboot)
 		cpu_ops[cpu]->cpu_postboot();
@@ -254,7 +248,7 @@
 	 */
 	notify_cpu_starting(cpu);
 
-	smp_store_cpu_info(cpu);
+	store_cpu_topology(cpu);
 
 	/*
 	 * OK, now it's safe to let the boot CPU continue.  Wait for
@@ -437,8 +431,19 @@
 void __init smp_prepare_boot_cpu(void)
 {
 	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+	/*
+	 * Initialise the static keys early as they may be enabled by the
+	 * cpufeature code.
+	 */
+	jump_label_init();
 	cpuinfo_store_boot_cpu();
 	save_boot_cpu_run_el();
+	/*
+	 * Run the errata work around checks on the boot CPU, once we have
+	 * initialised the cpu feature infrastructure from
+	 * cpuinfo_store_boot_cpu() above.
+	 */
+	update_cpu_errata_workarounds();
 }
 
 static u64 __init of_get_cpu_mpidr(struct device_node *dn)
@@ -619,6 +624,7 @@
 			}
 
 			bootcpu_valid = true;
+			early_map_cpu_to_node(0, of_node_to_nid(dn));
 
 			/*
 			 * cpu_logical_map has already been
@@ -661,9 +667,9 @@
 		acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
 				      acpi_parse_gic_cpu_interface, 0);
 
-	if (cpu_count > NR_CPUS)
-		pr_warn("no. of cores (%d) greater than configured maximum of %d - clipping\n",
-			cpu_count, NR_CPUS);
+	if (cpu_count > nr_cpu_ids)
+		pr_warn("Number of cores (%d) exceeds configured maximum of %d - clipping\n",
+			cpu_count, nr_cpu_ids);
 
 	if (!bootcpu_valid) {
 		pr_err("missing boot CPU MPIDR, not enabling secondaries\n");
@@ -677,7 +683,7 @@
 	 * with entries in cpu_logical_map while initializing the cpus.
 	 * If the cpu set-up fails, invalidate the cpu_logical_map entry.
 	 */
-	for (i = 1; i < NR_CPUS; i++) {
+	for (i = 1; i < nr_cpu_ids; i++) {
 		if (cpu_logical_map(i) != INVALID_HWID) {
 			if (smp_cpu_setup(i))
 				cpu_logical_map(i) = INVALID_HWID;
@@ -689,10 +695,13 @@
 {
 	int err;
 	unsigned int cpu;
+	unsigned int this_cpu;
 
 	init_cpu_topology();
 
-	smp_store_cpu_info(smp_processor_id());
+	this_cpu = smp_processor_id();
+	store_cpu_topology(this_cpu);
+	numa_store_cpu_info(this_cpu);
 
 	/*
 	 * If UP is mandated by "nosmp" (which implies "maxcpus=0"), don't set
@@ -719,6 +728,7 @@
 			continue;
 
 		set_cpu_present(cpu, true);
+		numa_store_cpu_info(cpu);
 	}
 }
 
diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c
index 18a71bc..9a00eee 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -29,7 +29,8 @@
 #include <asm/smp_plat.h>
 
 extern void secondary_holding_pen(void);
-volatile unsigned long secondary_holding_pen_release = INVALID_HWID;
+volatile unsigned long __section(".mmuoff.data.read")
+secondary_holding_pen_release = INVALID_HWID;
 
 static phys_addr_t cpu_release_addr[NR_CPUS];
 
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d9751a4..c2efddf 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -43,6 +43,9 @@
 	unsigned long fp = frame->fp;
 	unsigned long irq_stack_ptr;
 
+	if (!tsk)
+		tsk = current;
+
 	/*
 	 * Switching between stacks is valid when tracing current and in
 	 * non-preemptible context.
@@ -67,7 +70,7 @@
 	frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	if (tsk && tsk->ret_stack &&
+	if (tsk->ret_stack &&
 			(frame->pc == (unsigned long)return_to_handler)) {
 		/*
 		 * This is a case where function graph tracer has
@@ -152,6 +155,27 @@
 	return trace->nr_entries >= trace->max_entries;
 }
 
+void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	struct stack_trace_data data;
+	struct stackframe frame;
+
+	data.trace = trace;
+	data.skip = trace->skip;
+	data.no_sched_functions = 0;
+
+	frame.fp = regs->regs[29];
+	frame.sp = regs->sp;
+	frame.pc = regs->pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	frame.graph = current->curr_ret_stack;
+#endif
+
+	walk_stackframe(current, &frame, save_trace, &data);
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
 	struct stack_trace_data data;
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index b616e36..ad73414 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -23,8 +23,8 @@
  * time the notifier runs debug exceptions might have been enabled already,
  * with HW breakpoints registers content still in an unknown state.
  */
-static void (*hw_breakpoint_restore)(void *);
-void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *))
+static int (*hw_breakpoint_restore)(unsigned int);
+void __init cpu_suspend_set_dbg_restorer(int (*hw_bp_restore)(unsigned int))
 {
 	/* Prevent multiple restore hook initializations */
 	if (WARN_ON(hw_breakpoint_restore))
@@ -34,6 +34,8 @@
 
 void notrace __cpu_suspend_exit(void)
 {
+	unsigned int cpu = smp_processor_id();
+
 	/*
 	 * We are resuming from reset with the idmap active in TTBR0_EL1.
 	 * We must uninstall the idmap and restore the expected MMU
@@ -45,7 +47,7 @@
 	 * Restore per-cpu offset before any kernel
 	 * subsystem relying on it has a chance to run.
 	 */
-	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+	set_my_cpu_offset(per_cpu_offset(cpu));
 
 	/*
 	 * Restore HW breakpoint registers to sane values
@@ -53,7 +55,7 @@
 	 * through local_dbg_restore.
 	 */
 	if (hw_breakpoint_restore)
-		hw_breakpoint_restore(NULL);
+		hw_breakpoint_restore(cpu);
 }
 
 /*
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 28c511b..abaf582 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -94,7 +94,7 @@
 		 * See comment in tls_thread_flush.
 		 */
 		barrier();
-		asm ("msr tpidrro_el0, %0" : : "r" (regs->regs[0]));
+		write_sysreg(regs->regs[0], tpidrro_el0);
 		return 0;
 
 	default:
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index e04f838..5ff020f 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -142,6 +142,11 @@
 	unsigned long irq_stack_ptr;
 	int skip;
 
+	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
+
+	if (!tsk)
+		tsk = current;
+
 	/*
 	 * Switching between stacks is valid when tracing current and in
 	 * non-preemptible context.
@@ -151,11 +156,6 @@
 	else
 		irq_stack_ptr = 0;
 
-	pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk);
-
-	if (!tsk)
-		tsk = current;
-
 	if (tsk == current) {
 		frame.fp = (unsigned long)__builtin_frame_address(0);
 		frame.sp = current_stack_pointer;
@@ -447,36 +447,29 @@
 		: "=r" (res)					\
 		: "r" (address), "i" (-EFAULT) )
 
-asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
 {
 	unsigned long address;
-	int ret;
+	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+	int crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
+	int ret = 0;
 
-	/* if this is a write with: Op0=1, Op2=1, Op1=3, CRn=7 */
-	if ((esr & 0x01fffc01) == 0x0012dc00) {
-		int rt = (esr >> 5) & 0x1f;
-		int crm = (esr >> 1) & 0x0f;
+	address = (rt == 31) ? 0 : regs->regs[rt];
 
-		address = (rt == 31) ? 0 : regs->regs[rt];
-
-		switch (crm) {
-		case 11:		/* DC CVAU, gets promoted */
-			__user_cache_maint("dc civac", address, ret);
-			break;
-		case 10:		/* DC CVAC, gets promoted */
-			__user_cache_maint("dc civac", address, ret);
-			break;
-		case 14:		/* DC CIVAC */
-			__user_cache_maint("dc civac", address, ret);
-			break;
-		case 5:			/* IC IVAU */
-			__user_cache_maint("ic ivau", address, ret);
-			break;
-		default:
-			force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
-			return;
-		}
-	} else {
+	switch (crm) {
+	case ESR_ELx_SYS64_ISS_CRM_DC_CVAU:	/* DC CVAU, gets promoted */
+		__user_cache_maint("dc civac", address, ret);
+		break;
+	case ESR_ELx_SYS64_ISS_CRM_DC_CVAC:	/* DC CVAC, gets promoted */
+		__user_cache_maint("dc civac", address, ret);
+		break;
+	case ESR_ELx_SYS64_ISS_CRM_DC_CIVAC:	/* DC CIVAC */
+		__user_cache_maint("dc civac", address, ret);
+		break;
+	case ESR_ELx_SYS64_ISS_CRM_IC_IVAU:	/* IC IVAU */
+		__user_cache_maint("ic ivau", address, ret);
+		break;
+	default:
 		force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
 		return;
 	}
@@ -487,6 +480,48 @@
 		regs->pc += 4;
 }
 
+static void ctr_read_handler(unsigned int esr, struct pt_regs *regs)
+{
+	int rt = (esr & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT;
+
+	regs->regs[rt] = arm64_ftr_reg_ctrel0.sys_val;
+	regs->pc += 4;
+}
+
+struct sys64_hook {
+	unsigned int esr_mask;
+	unsigned int esr_val;
+	void (*handler)(unsigned int esr, struct pt_regs *regs);
+};
+
+static struct sys64_hook sys64_hooks[] = {
+	{
+		.esr_mask = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK,
+		.esr_val = ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL,
+		.handler = user_cache_maint_handler,
+	},
+	{
+		/* Trap read access to CTR_EL0 */
+		.esr_mask = ESR_ELx_SYS64_ISS_SYS_OP_MASK,
+		.esr_val = ESR_ELx_SYS64_ISS_SYS_CTR_READ,
+		.handler = ctr_read_handler,
+	},
+	{},
+};
+
+asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs)
+{
+	struct sys64_hook *hook;
+
+	for (hook = sys64_hooks; hook->handler; hook++)
+		if ((hook->esr_mask & esr) == hook->esr_val) {
+			hook->handler(esr, regs);
+			return;
+		}
+
+	force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0);
+}
+
 long compat_arm_syscall(struct pt_regs *regs);
 
 asmlinkage long do_ni_syscall(struct pt_regs *regs)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 076312b..a2c2478 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -18,12 +18,13 @@
  * Author: Will Deacon <will.deacon@arm.com>
  */
 
-#include <linux/kernel.h>
+#include <linux/cache.h>
 #include <linux/clocksource.h>
 #include <linux/elf.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/gfp.h>
+#include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/signal.h>
@@ -37,8 +38,7 @@
 #include <asm/vdso_datapage.h>
 
 extern char vdso_start, vdso_end;
-static unsigned long vdso_pages;
-static struct page **vdso_pagelist;
+static unsigned long vdso_pages __ro_after_init;
 
 /*
  * The vDSO data page.
@@ -53,9 +53,9 @@
 /*
  * Create and map the vectors page for AArch32 tasks.
  */
-static struct page *vectors_page[1];
+static struct page *vectors_page[1] __ro_after_init;
 
-static int alloc_vectors_page(void)
+static int __init alloc_vectors_page(void)
 {
 	extern char __kuser_helper_start[], __kuser_helper_end[];
 	extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
@@ -88,7 +88,7 @@
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = AARCH32_VECTORS_BASE;
-	static struct vm_special_mapping spec = {
+	static const struct vm_special_mapping spec = {
 		.name	= "[vectors]",
 		.pages	= vectors_page,
 
@@ -110,11 +110,19 @@
 }
 #endif /* CONFIG_COMPAT */
 
-static struct vm_special_mapping vdso_spec[2];
+static struct vm_special_mapping vdso_spec[2] __ro_after_init = {
+	{
+		.name	= "[vvar]",
+	},
+	{
+		.name	= "[vdso]",
+	},
+};
 
 static int __init vdso_init(void)
 {
 	int i;
+	struct page **vdso_pagelist;
 
 	if (memcmp(&vdso_start, "\177ELF", 4)) {
 		pr_err("vDSO is not a valid ELF object!\n");
@@ -138,16 +146,8 @@
 	for (i = 0; i < vdso_pages; i++)
 		vdso_pagelist[i + 1] = pfn_to_page(PHYS_PFN(__pa(&vdso_start)) + i);
 
-	/* Populate the special mapping structures */
-	vdso_spec[0] = (struct vm_special_mapping) {
-		.name	= "[vvar]",
-		.pages	= vdso_pagelist,
-	};
-
-	vdso_spec[1] = (struct vm_special_mapping) {
-		.name	= "[vdso]",
-		.pages	= &vdso_pagelist[1],
-	};
+	vdso_spec[0].pages = &vdso_pagelist[0];
+	vdso_spec[1].pages = &vdso_pagelist[1];
 
 	return 0;
 }
@@ -201,7 +201,7 @@
  */
 void update_vsyscall(struct timekeeper *tk)
 {
-	u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
+	u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct;
 
 	++vdso_data->tb_seq_count;
 	smp_wmb();
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 659963d..5ce9b29 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -185,6 +185,25 @@
 	_data = .;
 	_sdata = .;
 	RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+
+	/*
+	 * Data written with the MMU off but read with the MMU on requires
+	 * cache lines to be invalidated, discarding up to a Cache Writeback
+	 * Granule (CWG) of data from the cache. Keep the section that
+	 * requires this type of maintenance to be in its own Cache Writeback
+	 * Granule (CWG) area so the cache maintenance operations don't
+	 * interfere with adjacent data.
+	 */
+	.mmuoff.data.write : ALIGN(SZ_2K) {
+		__mmuoff_data_start = .;
+		*(.mmuoff.data.write)
+	}
+	. = ALIGN(SZ_2K);
+	.mmuoff.data.read : {
+		*(.mmuoff.data.read)
+		__mmuoff_data_end = .;
+	}
+
 	PECOFF_EDATA_PADDING
 	_edata = .;
 
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 7ce9315..2726635 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -46,10 +46,6 @@
 	hvc	#0
 	ldr     lr, [sp], #16
 	ret
-alternative_else
+alternative_else_nop_endif
 	b	__vhe_hyp_call
-	nop
-	nop
-	nop
-alternative_endif
 ENDPROC(__kvm_call_hyp)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ae7855f..5a84b45 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -256,7 +256,7 @@
 
 	/*
 	 * We must restore the 32-bit state before the sysregs, thanks
-	 * to Cortex-A57 erratum #852523.
+	 * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
 	 */
 	__sysreg32_restore_state(vcpu);
 	__sysreg_restore_guest_state(guest_ctxt);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b0b225c..f302fdb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -36,6 +36,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_mmu.h>
 #include <asm/perf_event.h>
+#include <asm/sysreg.h>
 
 #include <trace/events/kvm.h>
 
@@ -67,11 +68,9 @@
 
 	/* Make sure noone else changes CSSELR during this! */
 	local_irq_disable();
-	/* Put value into CSSELR */
-	asm volatile("msr csselr_el1, %x0" : : "r" (csselr));
+	write_sysreg(csselr, csselr_el1);
 	isb();
-	/* Read result out of CCSIDR */
-	asm volatile("mrs %0, ccsidr_el1" : "=r" (ccsidr));
+	ccsidr = read_sysreg(ccsidr_el1);
 	local_irq_enable();
 
 	return ccsidr;
@@ -174,9 +173,7 @@
 	if (p->is_write) {
 		return ignore_write(vcpu, p);
 	} else {
-		u32 val;
-		asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
-		p->regval = val;
+		p->regval = read_sysreg(dbgauthstatus_el1);
 		return true;
 	}
 }
@@ -429,10 +426,7 @@
 
 static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
-	u64 amair;
-
-	asm volatile("mrs %0, amair_el1\n" : "=r" (amair));
-	vcpu_sys_reg(vcpu, AMAIR_EL1) = amair;
+	vcpu_sys_reg(vcpu, AMAIR_EL1) = read_sysreg(amair_el1);
 }
 
 static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
@@ -456,8 +450,9 @@
 {
 	u64 pmcr, val;
 
-	asm volatile("mrs %0, pmcr_el0\n" : "=r" (pmcr));
-	/* Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) is reset to UNKNOWN
+	pmcr = read_sysreg(pmcr_el0);
+	/*
+	 * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN
 	 * except PMCR.E resetting to zero.
 	 */
 	val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
@@ -557,9 +552,9 @@
 		return false;
 
 	if (!(p->Op2 & 1))
-		asm volatile("mrs %0, pmceid0_el0\n" : "=r" (pmceid));
+		pmceid = read_sysreg(pmceid0_el0);
 	else
-		asm volatile("mrs %0, pmceid1_el0\n" : "=r" (pmceid));
+		pmceid = read_sysreg(pmceid1_el0);
 
 	p->regval = pmceid;
 
@@ -823,14 +818,6 @@
  * Architected system registers.
  * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
  *
- * We could trap ID_DFR0 and tell the guest we don't support performance
- * monitoring.  Unfortunately the patch to make the kernel check ID_DFR0 was
- * NAKed, so it will read the PMCR anyway.
- *
- * Therefore we tell the guest we have 0 counters.  Unfortunately, we
- * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
- * all PM registers, which doesn't crash the guest kernel at least.
- *
  * Debug handling: We do trap most, if not all debug related system
  * registers. The implementation is good enough to ensure that a guest
  * can use these with minimal performance degradation. The drawback is
@@ -1360,7 +1347,7 @@
 	{ Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 },
 
 	/* ICC_SRE */
-	{ Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi },
+	{ Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre },
 
 	{ Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID },
 
@@ -1841,11 +1828,7 @@
 	static void get_##reg(struct kvm_vcpu *v,			\
 			      const struct sys_reg_desc *r)		\
 	{								\
-		u64 val;						\
-									\
-		asm volatile("mrs %0, " __stringify(reg) "\n"		\
-			     : "=r" (val));				\
-		((struct sys_reg_desc *)r)->val = val;			\
+		((struct sys_reg_desc *)r)->val = read_sysreg(reg);	\
 	}
 
 FUNCTION_INVARIANT(midr_el1)
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index ed90578..46af718 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -26,6 +26,7 @@
 #include <asm/kvm_host.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
+#include <asm/sysreg.h>
 #include <linux/init.h>
 
 #include "sys_regs.h"
@@ -43,10 +44,7 @@
 
 static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
-	u64 actlr;
-
-	asm volatile("mrs %0, actlr_el1\n" : "=r" (actlr));
-	vcpu_sys_reg(vcpu, ACTLR_EL1) = actlr;
+	vcpu_sys_reg(vcpu, ACTLR_EL1) = read_sysreg(actlr_el1);
 }
 
 /*
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 4c1e700..c3cd65e 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -29,14 +29,11 @@
  *	x1 - src
  */
 ENTRY(copy_page)
-alternative_if_not ARM64_HAS_NO_HW_PREFETCH
-	nop
-	nop
-alternative_else
+alternative_if ARM64_HAS_NO_HW_PREFETCH
 	# Prefetch two cache lines ahead.
 	prfm    pldl1strm, [x1, #128]
 	prfm    pldl1strm, [x1, #256]
-alternative_endif
+alternative_else_nop_endif
 
 	ldp	x2, x3, [x1]
 	ldp	x4, x5, [x1, #16]
@@ -52,11 +49,9 @@
 1:
 	subs	x18, x18, #128
 
-alternative_if_not ARM64_HAS_NO_HW_PREFETCH
-	nop
-alternative_else
+alternative_if ARM64_HAS_NO_HW_PREFETCH
 	prfm    pldl1strm, [x1, #384]
-alternative_endif
+alternative_else_nop_endif
 
 	stnp	x2, x3, [x0]
 	ldp	x2, x3, [x1]
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 07d7352..58b5a90 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -105,19 +105,20 @@
 ENDPROC(__clean_dcache_area_pou)
 
 /*
+ *	__dma_inv_area(start, size)
+ *	- start   - virtual start address of region
+ *	- size    - size in question
+ */
+__dma_inv_area:
+	add	x1, x1, x0
+	/* FALLTHROUGH */
+
+/*
  *	__inval_cache_range(start, end)
  *	- start   - start address of region
  *	- end     - end address of region
  */
 ENTRY(__inval_cache_range)
-	/* FALLTHROUGH */
-
-/*
- *	__dma_inv_range(start, end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
- */
-__dma_inv_range:
 	dcache_line_size x2, x3
 	sub	x3, x2, #1
 	tst	x1, x3				// end cache line aligned?
@@ -136,46 +137,43 @@
 	dsb	sy
 	ret
 ENDPIPROC(__inval_cache_range)
-ENDPROC(__dma_inv_range)
+ENDPROC(__dma_inv_area)
 
 /*
- *	__dma_clean_range(start, end)
- *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	__clean_dcache_area_poc(kaddr, size)
+ *
+ * 	Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
+ * 	are cleaned to the PoC.
+ *
+ *	- kaddr   - kernel address
+ *	- size    - size in question
  */
-__dma_clean_range:
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
-	dc	cvac, x0
-alternative_else
-	dc	civac, x0
-alternative_endif
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
-	ret
-ENDPROC(__dma_clean_range)
+ENTRY(__clean_dcache_area_poc)
+	/* FALLTHROUGH */
 
 /*
- *	__dma_flush_range(start, end)
+ *	__dma_clean_area(start, size)
  *	- start   - virtual start address of region
- *	- end     - virtual end address of region
+ *	- size    - size in question
  */
-ENTRY(__dma_flush_range)
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-1:	dc	civac, x0			// clean & invalidate D / U line
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	1b
-	dsb	sy
+__dma_clean_area:
+	dcache_by_line_op cvac, sy, x0, x1, x2, x3
 	ret
-ENDPIPROC(__dma_flush_range)
+ENDPIPROC(__clean_dcache_area_poc)
+ENDPROC(__dma_clean_area)
+
+/*
+ *	__dma_flush_area(start, size)
+ *
+ *	clean & invalidate D / U line
+ *
+ *	- start   - virtual start address of region
+ *	- size    - size in question
+ */
+ENTRY(__dma_flush_area)
+	dcache_by_line_op civac, sy, x0, x1, x2, x3
+	ret
+ENDPIPROC(__dma_flush_area)
 
 /*
  *	__dma_map_area(start, size, dir)
@@ -184,10 +182,9 @@
  *	- dir	- DMA direction
  */
 ENTRY(__dma_map_area)
-	add	x1, x1, x0
 	cmp	w2, #DMA_FROM_DEVICE
-	b.eq	__dma_inv_range
-	b	__dma_clean_range
+	b.eq	__dma_inv_area
+	b	__dma_clean_area
 ENDPIPROC(__dma_map_area)
 
 /*
@@ -197,8 +194,7 @@
  *	- dir	- DMA direction
  */
 ENTRY(__dma_unmap_area)
-	add	x1, x1, x0
 	cmp	w2, #DMA_TO_DEVICE
-	b.ne	__dma_inv_range
+	b.ne	__dma_inv_area
 	ret
 ENDPIPROC(__dma_unmap_area)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index c4284c4..bdacead 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -20,6 +20,7 @@
 #include <linux/gfp.h>
 #include <linux/acpi.h>
 #include <linux/bootmem.h>
+#include <linux/cache.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/genalloc.h>
@@ -30,7 +31,7 @@
 
 #include <asm/cacheflush.h>
 
-static int swiotlb __read_mostly;
+static int swiotlb __ro_after_init;
 
 static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
 				 bool coherent)
@@ -168,7 +169,7 @@
 		return ptr;
 
 	/* remove any dirty cache lines on the kernel alias */
-	__dma_flush_range(ptr, ptr + size);
+	__dma_flush_area(ptr, size);
 
 	/* create a coherent mapping */
 	page = virt_to_page(ptr);
@@ -387,7 +388,7 @@
 		void *page_addr = page_address(page);
 
 		memset(page_addr, 0, atomic_pool_size);
-		__dma_flush_range(page_addr, page_addr + atomic_pool_size);
+		__dma_flush_area(page_addr, atomic_pool_size);
 
 		atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
 		if (!atomic_pool)
@@ -548,7 +549,7 @@
 /* Thankfully, all cache ops are by VA so we can ignore phys here */
 static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
 {
-	__dma_flush_range(virt, virt + PAGE_SIZE);
+	__dma_flush_area(virt, PAGE_SIZE);
 }
 
 static void *__iommu_alloc_attrs(struct device *dev, size_t size,
diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c
index f94b80e..9c3e75d 100644
--- a/arch/arm64/mm/dump.c
+++ b/arch/arm64/mm/dump.c
@@ -242,7 +242,7 @@
 
 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
 {
-	pte_t *pte = pte_offset_kernel(pmd, 0);
+	pte_t *pte = pte_offset_kernel(pmd, 0UL);
 	unsigned long addr;
 	unsigned i;
 
@@ -254,7 +254,7 @@
 
 static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
 {
-	pmd_t *pmd = pmd_offset(pud, 0);
+	pmd_t *pmd = pmd_offset(pud, 0UL);
 	unsigned long addr;
 	unsigned i;
 
@@ -271,7 +271,7 @@
 
 static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 {
-	pud_t *pud = pud_offset(pgd, 0);
+	pud_t *pud = pud_offset(pgd, 0UL);
 	unsigned long addr;
 	unsigned i;
 
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 81acd47..c9f118c 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -2,7 +2,7 @@
  * Based on arch/arm/mm/extable.c
  */
 
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/uaccess.h>
 
 int fixup_exception(struct pt_regs *regs)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index c8beaa0..53d9159 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -18,7 +18,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/signal.h>
 #include <linux/mm.h>
 #include <linux/hardirq.h>
@@ -153,6 +153,11 @@
 }
 #endif
 
+static bool is_el1_instruction_abort(unsigned int esr)
+{
+	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR;
+}
+
 /*
  * The kernel tried to access some page that wasn't present.
  */
@@ -161,8 +166,9 @@
 {
 	/*
 	 * Are we prepared to handle this kernel fault?
+	 * We are almost certainly not prepared to handle instruction faults.
 	 */
-	if (fixup_exception(regs))
+	if (!is_el1_instruction_abort(esr) && fixup_exception(regs))
 		return;
 
 	/*
@@ -245,8 +251,7 @@
 good_area:
 	/*
 	 * Check that the permissions on the VMA allow for the fault which
-	 * occurred. If we encountered a write or exec fault, we must have
-	 * appropriate permissions, otherwise we allow any permission.
+	 * occurred.
 	 */
 	if (!(vma->vm_flags & vm_flags)) {
 		fault = VM_FAULT_BADACCESS;
@@ -267,7 +272,8 @@
 	unsigned int ec       = ESR_ELx_EC(esr);
 	unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE;
 
-	return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
+	return (ec == ESR_ELx_EC_DABT_CUR && fsc_type == ESR_ELx_FSC_PERM) ||
+	       (ec == ESR_ELx_EC_IABT_CUR && fsc_type == ESR_ELx_FSC_PERM);
 }
 
 static bool is_el0_instruction_abort(unsigned int esr)
@@ -281,7 +287,7 @@
 	struct task_struct *tsk;
 	struct mm_struct *mm;
 	int fault, sig, code;
-	unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
+	unsigned long vm_flags = VM_READ | VM_WRITE;
 	unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 	if (notify_page_fault(regs, esr))
@@ -312,6 +318,9 @@
 		if (regs->orig_addr_limit == KERNEL_DS)
 			die("Accessing user space memory with fs=KERNEL_DS", regs, esr);
 
+		if (is_el1_instruction_abort(esr))
+			die("Attempting to execute userspace memory", regs, esr);
+
 		if (!search_exception_tables(regs->pc))
 			die("Accessing user space memory outside uaccess.h routines", regs, esr);
 	}
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c
index 43a76b0..8377329 100644
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -25,8 +25,6 @@
 #include <asm/cachetype.h>
 #include <asm/tlbflush.h>
 
-#include "mm.h"
-
 void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 		       unsigned long end)
 {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index bbb7ee7..21c489b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -23,6 +23,7 @@
 #include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/cache.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
@@ -34,6 +35,7 @@
 #include <linux/dma-contiguous.h>
 #include <linux/efi.h>
 #include <linux/swiotlb.h>
+#include <linux/vmalloc.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -47,16 +49,14 @@
 #include <asm/tlb.h>
 #include <asm/alternative.h>
 
-#include "mm.h"
-
 /*
  * We need to be able to catch inadvertent references to memstart_addr
  * that occur (potentially in generic code) before arm64_memblock_init()
  * executes, which assigns it its actual value. So use a default value
  * that cannot be mistaken for a real physical address.
  */
-s64 memstart_addr __read_mostly = -1;
-phys_addr_t arm64_dma_phys_limit __read_mostly;
+s64 memstart_addr __ro_after_init = -1;
+phys_addr_t arm64_dma_phys_limit __ro_after_init;
 
 #ifdef CONFIG_BLK_DEV_INITRD
 static int __init early_initrd(char *p)
@@ -485,7 +485,12 @@
 {
 	free_reserved_area(__va(__pa(__init_begin)), __va(__pa(__init_end)),
 			   0, "unused kernel");
-	fixup_init();
+	/*
+	 * Unmap the __init region but leave the VM area in place. This
+	 * prevents the region from being reused for kernel modules, which
+	 * is not supported by kallsyms.
+	 */
+	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
deleted file mode 100644
index 71fe9898..0000000
--- a/arch/arm64/mm/mm.h
+++ /dev/null
@@ -1,2 +0,0 @@
-
-void fixup_init(void);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 4989948..05615a3 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/cache.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
@@ -42,11 +43,9 @@
 #include <asm/memblock.h>
 #include <asm/mmu_context.h>
 
-#include "mm.h"
-
 u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
 
-u64 kimage_voffset __read_mostly;
+u64 kimage_voffset __ro_after_init;
 EXPORT_SYMBOL(kimage_voffset);
 
 /*
@@ -399,16 +398,6 @@
 			    section_size, PAGE_KERNEL_RO);
 }
 
-void fixup_init(void)
-{
-	/*
-	 * Unmap the __init region but leave the VM area in place. This
-	 * prevents the region from being reused for kernel modules, which
-	 * is not supported by kallsyms.
-	 */
-	unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin));
-}
-
 static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
 				      pgprot_t prot, struct vm_struct *vma)
 {
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index c7fe3ec..778a985 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -17,12 +17,17 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#define pr_fmt(fmt) "NUMA: " fmt
+
 #include <linux/acpi.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/module.h>
 #include <linux/of.h>
 
+#include <asm/acpi.h>
+#include <asm/sections.h>
+
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
 nodemask_t numa_nodes_parsed __initdata;
@@ -36,10 +41,9 @@
 {
 	if (!opt)
 		return -EINVAL;
-	if (!strncmp(opt, "off", 3)) {
-		pr_info("%s\n", "NUMA turned off");
+	if (!strncmp(opt, "off", 3))
 		numa_off = true;
-	}
+
 	return 0;
 }
 early_param("numa", numa_parse_early_param);
@@ -91,7 +95,6 @@
  */
 static void __init setup_node_to_cpumask_map(void)
 {
-	unsigned int cpu;
 	int node;
 
 	/* setup nr_node_ids if not done yet */
@@ -104,11 +107,8 @@
 		cpumask_clear(node_to_cpumask_map[node]);
 	}
 
-	for_each_possible_cpu(cpu)
-		set_cpu_numa_node(cpu, NUMA_NO_NODE);
-
 	/* cpumask_of_node() will now work */
-	pr_debug("NUMA: Node to cpumask map for %d nodes\n", nr_node_ids);
+	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
 }
 
 /*
@@ -116,18 +116,77 @@
  */
 void numa_store_cpu_info(unsigned int cpu)
 {
-	map_cpu_to_node(cpu, numa_off ? 0 : cpu_to_node_map[cpu]);
+	map_cpu_to_node(cpu, cpu_to_node_map[cpu]);
 }
 
 void __init early_map_cpu_to_node(unsigned int cpu, int nid)
 {
 	/* fallback to node 0 */
-	if (nid < 0 || nid >= MAX_NUMNODES)
+	if (nid < 0 || nid >= MAX_NUMNODES || numa_off)
 		nid = 0;
 
 	cpu_to_node_map[cpu] = nid;
+
+	/*
+	 * We should set the numa node of cpu0 as soon as possible, because it
+	 * has already been set up online before. cpu_to_node(0) will soon be
+	 * called.
+	 */
+	if (!cpu)
+		set_cpu_numa_node(cpu, nid);
 }
 
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init early_cpu_to_node(int cpu)
+{
+	return cpu_to_node_map[cpu];
+}
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	return node_distance(from, to);
+}
+
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
+				       size_t align)
+{
+	int nid = early_cpu_to_node(cpu);
+
+	return  memblock_virt_alloc_try_nid(size, align,
+			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+	memblock_free_early(__pa(ptr), size);
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long delta;
+	unsigned int cpu;
+	int rc;
+
+	/*
+	 * Always reserve area for module percpu variables.  That's
+	 * what the legacy allocator did.
+	 */
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
+				    pcpu_cpu_distance,
+				    pcpu_fc_alloc, pcpu_fc_free);
+	if (rc < 0)
+		panic("Failed to initialize percpu areas.");
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+#endif
+
 /**
  * numa_add_memblk - Set node id to memblk
  * @nid: NUMA node ID of the new memblk
@@ -143,13 +202,13 @@
 
 	ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
 	if (ret < 0) {
-		pr_err("NUMA: memblock [0x%llx - 0x%llx] failed to add on node %d\n",
+		pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n",
 			start, (end - 1), nid);
 		return ret;
 	}
 
 	node_set(nid, numa_nodes_parsed);
-	pr_info("NUMA: Adding memblock [0x%llx - 0x%llx] on node %d\n",
+	pr_info("Adding memblock [0x%llx - 0x%llx] on node %d\n",
 			start, (end - 1), nid);
 	return ret;
 }
@@ -164,19 +223,18 @@
 	void *nd;
 	int tnid;
 
-	pr_info("NUMA: Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
-			nid, start_pfn << PAGE_SHIFT,
-			(end_pfn << PAGE_SHIFT) - 1);
+	pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
+		nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1);
 
 	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
 	nd = __va(nd_pa);
 
 	/* report and initialize */
-	pr_info("NUMA: NODE_DATA [mem %#010Lx-%#010Lx]\n",
+	pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n",
 		nd_pa, nd_pa + nd_size - 1);
 	tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
 	if (tnid != nid)
-		pr_info("NUMA: NODE_DATA(%d) on node %d\n", nid, tnid);
+		pr_info("NODE_DATA(%d) on node %d\n", nid, tnid);
 
 	node_data[nid] = nd;
 	memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
@@ -233,8 +291,7 @@
 			numa_distance[i * numa_distance_cnt + j] = i == j ?
 				LOCAL_DISTANCE : REMOTE_DISTANCE;
 
-	pr_debug("NUMA: Initialized distance table, cnt=%d\n",
-			numa_distance_cnt);
+	pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt);
 
 	return 0;
 }
@@ -255,20 +312,20 @@
 void __init numa_set_distance(int from, int to, int distance)
 {
 	if (!numa_distance) {
-		pr_warn_once("NUMA: Warning: distance table not allocated yet\n");
+		pr_warn_once("Warning: distance table not allocated yet\n");
 		return;
 	}
 
 	if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
 			from < 0 || to < 0) {
-		pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
+		pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
 			    from, to, distance);
 		return;
 	}
 
 	if ((u8)distance != distance ||
 	    (from == to && distance != LOCAL_DISTANCE)) {
-		pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
+		pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
 			     from, to, distance);
 		return;
 	}
@@ -295,7 +352,7 @@
 	/* Check that valid nid is set to memblks */
 	for_each_memblock(memory, mblk)
 		if (mblk->nid == NUMA_NO_NODE || mblk->nid >= MAX_NUMNODES) {
-			pr_warn("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
+			pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
 				mblk->nid, mblk->base,
 				mblk->base + mblk->size - 1);
 			return -EINVAL;
@@ -333,8 +390,10 @@
 	if (ret < 0)
 		return ret;
 
-	if (nodes_empty(numa_nodes_parsed))
+	if (nodes_empty(numa_nodes_parsed)) {
+		pr_info("No NUMA configuration found\n");
 		return -EINVAL;
+	}
 
 	ret = numa_register_nodes();
 	if (ret < 0)
@@ -342,10 +401,6 @@
 
 	setup_node_to_cpumask_map();
 
-	/* init boot processor */
-	cpu_to_node_map[0] = 0;
-	map_cpu_to_node(0, 0);
-
 	return 0;
 }
 
@@ -365,10 +420,8 @@
 
 	if (numa_off)
 		pr_info("NUMA disabled\n"); /* Forced off on command line. */
-	else
-		pr_info("No NUMA configuration found\n");
-	pr_info("NUMA: Faking a node at [mem %#018Lx-%#018Lx]\n",
-	       0LLU, PFN_PHYS(max_pfn) - 1);
+	pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
+		0LLU, PFN_PHYS(max_pfn) - 1);
 
 	for_each_memblock(memory, mblk) {
 		ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index ca6d268..8def55e 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -139,4 +139,43 @@
 					__pgprot(0),
 					__pgprot(PTE_VALID));
 }
-#endif
+#ifdef CONFIG_HIBERNATION
+/*
+ * When built with CONFIG_DEBUG_PAGEALLOC and CONFIG_HIBERNATION, this function
+ * is used to determine if a linear map page has been marked as not-valid by
+ * CONFIG_DEBUG_PAGEALLOC. Walk the page table and check the PTE_VALID bit.
+ * This is based on kern_addr_valid(), which almost does what we need.
+ *
+ * Because this is only called on the kernel linear map,  p?d_sect() implies
+ * p?d_present(). When debug_pagealloc is enabled, sections mappings are
+ * disabled.
+ */
+bool kernel_page_present(struct page *page)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long addr = (unsigned long)page_address(page);
+
+	pgd = pgd_offset_k(addr);
+	if (pgd_none(*pgd))
+		return false;
+
+	pud = pud_offset(pgd, addr);
+	if (pud_none(*pud))
+		return false;
+	if (pud_sect(*pud))
+		return true;
+
+	pmd = pmd_offset(pud, addr);
+	if (pmd_none(*pmd))
+		return false;
+	if (pmd_sect(*pmd))
+		return true;
+
+	pte = pte_offset_kernel(pmd, addr);
+	return pte_valid(*pte);
+}
+#endif /* CONFIG_HIBERNATION */
+#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index ae11d4e..371c5f0 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -26,8 +26,6 @@
 #include <asm/page.h>
 #include <asm/tlbflush.h>
 
-#include "mm.h"
-
 static struct kmem_cache *pgd_cache;
 
 pgd_t *pgd_alloc(struct mm_struct *mm)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 5bb61de..352c73b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -83,6 +83,7 @@
  *
  * x0: Address of context pointer
  */
+	.pushsection ".idmap.text", "ax"
 ENTRY(cpu_do_resume)
 	ldp	x2, x3, [x0]
 	ldp	x4, x5, [x0, #16]
@@ -100,7 +101,16 @@
 
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
+
+	/*
+	 * __cpu_setup() cleared MDSCR_EL1.MDE and friends, before unmasking
+	 * debug exceptions. By restoring MDSCR_EL1 here, we may take a debug
+	 * exception. Mask them until local_dbg_restore() in cpu_suspend()
+	 * resets them.
+	 */
+	disable_dbg
 	msr	mdscr_el1, x10
+
 	msr	sctlr_el1, x12
 	/*
 	 * Restore oslsr_el1 by writing oslar_el1
@@ -111,6 +121,7 @@
 	isb
 	ret
 ENDPROC(cpu_do_resume)
+	.popsection
 #endif
 
 /*
@@ -125,17 +136,12 @@
 	bfi	x0, x1, #48, #16		// set the ASID
 	msr	ttbr0_el1, x0			// set TTBR0
 	isb
-alternative_if_not ARM64_WORKAROUND_CAVIUM_27456
-	ret
-	nop
-	nop
-	nop
-alternative_else
+alternative_if ARM64_WORKAROUND_CAVIUM_27456
 	ic	iallu
 	dsb	nsh
 	isb
+alternative_else_nop_endif
 	ret
-alternative_endif
 ENDPROC(cpu_do_switch_mm)
 
 	.pushsection ".idmap.text", "ax"
@@ -172,6 +178,7 @@
  *	Initialise the processor for turning the MMU on.  Return in x0 the
  *	value of the SCTLR_EL1 register.
  */
+	.pushsection ".idmap.text", "ax"
 ENTRY(__cpu_setup)
 	tlbi	vmalle1				// Invalidate local TLB
 	dsb	nsh
@@ -257,3 +264,4 @@
 crval:
 	.word	0xfcffffff			// clear
 	.word	0x34d5d91d			// set
+	.popsection
diff --git a/arch/avr32/include/asm/uaccess.h b/arch/avr32/include/asm/uaccess.h
index 68cf638..b1ec1fa 100644
--- a/arch/avr32/include/asm/uaccess.h
+++ b/arch/avr32/include/asm/uaccess.h
@@ -74,7 +74,7 @@
 
 extern __kernel_size_t copy_to_user(void __user *to, const void *from,
 				    __kernel_size_t n);
-extern __kernel_size_t copy_from_user(void *to, const void __user *from,
+extern __kernel_size_t ___copy_from_user(void *to, const void __user *from,
 				      __kernel_size_t n);
 
 static inline __kernel_size_t __copy_to_user(void __user *to, const void *from,
@@ -88,6 +88,15 @@
 {
 	return __copy_user(to, (const void __force *)from, n);
 }
+static inline __kernel_size_t copy_from_user(void *to,
+					       const void __user *from,
+					       __kernel_size_t n)
+{
+	size_t res = ___copy_from_user(to, from, n);
+	if (unlikely(res))
+		memset(to + (n - res), 0, res);
+	return res;
+}
 
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
diff --git a/arch/avr32/kernel/avr32_ksyms.c b/arch/avr32/kernel/avr32_ksyms.c
index d93ead0..7c6cf14 100644
--- a/arch/avr32/kernel/avr32_ksyms.c
+++ b/arch/avr32/kernel/avr32_ksyms.c
@@ -36,7 +36,7 @@
 /*
  * Userspace access stuff.
  */
-EXPORT_SYMBOL(copy_from_user);
+EXPORT_SYMBOL(___copy_from_user);
 EXPORT_SYMBOL(copy_to_user);
 EXPORT_SYMBOL(__copy_user);
 EXPORT_SYMBOL(strncpy_from_user);
diff --git a/arch/avr32/lib/copy_user.S b/arch/avr32/lib/copy_user.S
index ea59c04..0753734 100644
--- a/arch/avr32/lib/copy_user.S
+++ b/arch/avr32/lib/copy_user.S
@@ -23,13 +23,13 @@
 	 */
 	.text
 	.align	1
-	.global	copy_from_user
-	.type	copy_from_user, @function
-copy_from_user:
+	.global	___copy_from_user
+	.type	___copy_from_user, @function
+___copy_from_user:
 	branch_if_kernel r8, __copy_user
 	ret_if_privileged r8, r11, r10, r10
 	rjmp	__copy_user
-	.size	copy_from_user, . - copy_from_user
+	.size	___copy_from_user, . - ___copy_from_user
 
 	.global	copy_to_user
 	.type	copy_to_user, @function
diff --git a/arch/blackfin/include/asm/uaccess.h b/arch/blackfin/include/asm/uaccess.h
index 12f5d68..0a2a700 100644
--- a/arch/blackfin/include/asm/uaccess.h
+++ b/arch/blackfin/include/asm/uaccess.h
@@ -171,11 +171,12 @@
 static inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (access_ok(VERIFY_READ, from, n))
+	if (likely(access_ok(VERIFY_READ, from, n))) {
 		memcpy(to, (const void __force *)from, n);
-	else
-		return n;
-	return 0;
+		return 0;
+	}
+	memset(to, 0, n);
+	return n;
 }
 
 static inline unsigned long __must_check
diff --git a/arch/blackfin/kernel/ftrace-entry.S b/arch/blackfin/kernel/ftrace-entry.S
index 28d0595..3b8bdcb 100644
--- a/arch/blackfin/kernel/ftrace-entry.S
+++ b/arch/blackfin/kernel/ftrace-entry.S
@@ -169,7 +169,7 @@
 	r0 = sp;	/* unsigned long *parent */
 	r1 = [sp];	/* unsigned long self_addr */
 # endif
-# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	r2 = fp;	/* unsigned long frame_pointer */
 # endif
 	r0 += 16;	/* skip the 4 local regs on stack */
@@ -190,7 +190,7 @@
 	[--sp] = r1;
 
 	/* get original return address */
-# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
+# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	r0 = fp;	/* Blackfin is sane, so omit this */
 # endif
 	call _ftrace_return_to_handler;
diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c
index 095de0fa..8dad758 100644
--- a/arch/blackfin/kernel/ftrace.c
+++ b/arch/blackfin/kernel/ftrace.c
@@ -107,7 +107,7 @@
 		return;
 
 	if (ftrace_push_return_trace(*parent, self_addr, &trace.depth,
-	                             frame_pointer) == -EBUSY)
+				     frame_pointer, NULL) == -EBUSY)
 		return;
 
 	trace.func = self_addr;
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index c6db52b..10c5777 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -146,7 +146,8 @@
 #include <linux/smc91x.h>
 
 static struct smc91x_platdata smc91x_info = {
-	.flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		 SMC91X_NOWAIT,
 	.leda = RPC_LED_100_10,
 	.ledb = RPC_LED_TX_RX,
 };
diff --git a/arch/blackfin/mach-bf561/boards/ezkit.c b/arch/blackfin/mach-bf561/boards/ezkit.c
index f35525b..57d1c43 100644
--- a/arch/blackfin/mach-bf561/boards/ezkit.c
+++ b/arch/blackfin/mach-bf561/boards/ezkit.c
@@ -134,7 +134,8 @@
 #include <linux/smc91x.h>
 
 static struct smc91x_platdata smc91x_info = {
-	.flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+	.flags = SMC91X_USE_8BIT | SMC91X_USE_16BIT | SMC91X_USE_32BIT |
+		 SMC91X_NOWAIT,
 	.leda = RPC_LED_100_10,
 	.ledb = RPC_LED_TX_RX,
 };
diff --git a/arch/blackfin/mach-bf561/coreb.c b/arch/blackfin/mach-bf561/coreb.c
index 78ecb50..8a2543c 100644
--- a/arch/blackfin/mach-bf561/coreb.c
+++ b/arch/blackfin/mach-bf561/coreb.c
@@ -59,18 +59,7 @@
 	.name  = "coreb",
 	.fops  = &coreb_fops,
 };
-
-static int __init bf561_coreb_init(void)
-{
-	return misc_register(&coreb_dev);
-}
-module_init(bf561_coreb_init);
-
-static void __exit bf561_coreb_exit(void)
-{
-	misc_deregister(&coreb_dev);
-}
-module_exit(bf561_coreb_exit);
+module_misc_device(coreb_dev);
 
 MODULE_AUTHOR("Bas Vermeulen <bvermeul@blackstar.xs4all.nl>");
 MODULE_DESCRIPTION("BF561 Core B Support");
diff --git a/arch/cris/include/asm/uaccess.h b/arch/cris/include/asm/uaccess.h
index e3530d0..56c7d57 100644
--- a/arch/cris/include/asm/uaccess.h
+++ b/arch/cris/include/asm/uaccess.h
@@ -194,30 +194,6 @@
 extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n);
 extern unsigned long __do_clear_user(void __user *to, unsigned long n);
 
-static inline unsigned long
-__generic_copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-	if (access_ok(VERIFY_WRITE, to, n))
-		return __copy_user(to, from, n);
-	return n;
-}
-
-static inline unsigned long
-__generic_copy_from_user(void *to, const void __user *from, unsigned long n)
-{
-	if (access_ok(VERIFY_READ, from, n))
-		return __copy_user_zeroing(to, from, n);
-	return n;
-}
-
-static inline unsigned long
-__generic_clear_user(void __user *to, unsigned long n)
-{
-	if (access_ok(VERIFY_WRITE, to, n))
-		return __do_clear_user(to, n);
-	return n;
-}
-
 static inline long
 __strncpy_from_user(char *dst, const char __user *src, long count)
 {
@@ -282,7 +258,7 @@
 	else if (n == 24)
 		__asm_copy_from_user_24(to, from, ret);
 	else
-		ret = __generic_copy_from_user(to, from, n);
+		ret = __copy_user_zeroing(to, from, n);
 
 	return ret;
 }
@@ -333,7 +309,7 @@
 	else if (n == 24)
 		__asm_copy_to_user_24(to, from, ret);
 	else
-		ret = __generic_copy_to_user(to, from, n);
+		ret = __copy_user(to, from, n);
 
 	return ret;
 }
@@ -366,26 +342,43 @@
 	else if (n == 24)
 		__asm_clear_24(to, ret);
 	else
-		ret = __generic_clear_user(to, n);
+		ret = __do_clear_user(to, n);
 
 	return ret;
 }
 
 
-#define clear_user(to, n)				\
-	(__builtin_constant_p(n) ?			\
-	 __constant_clear_user(to, n) :			\
-	 __generic_clear_user(to, n))
+static inline size_t clear_user(void __user *to, size_t n)
+{
+	if (unlikely(!access_ok(VERIFY_WRITE, to, n)))
+		return n;
+	if (__builtin_constant_p(n))
+		return __constant_clear_user(to, n);
+	else
+		return __do_clear_user(to, n);
+}
 
-#define copy_from_user(to, from, n)			\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_from_user(to, from, n) :	\
-	 __generic_copy_from_user(to, from, n))
+static inline size_t copy_from_user(void *to, const void __user *from, size_t n)
+{
+	if (unlikely(!access_ok(VERIFY_READ, from, n))) {
+		memset(to, 0, n);
+		return n;
+	}
+	if (__builtin_constant_p(n))
+		return __constant_copy_from_user(to, from, n);
+	else
+		return __copy_user_zeroing(to, from, n);
+}
 
-#define copy_to_user(to, from, n)			\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_to_user(to, from, n) :		\
-	 __generic_copy_to_user(to, from, n))
+static inline size_t copy_to_user(void __user *to, const void *from, size_t n)
+{
+	if (unlikely(!access_ok(VERIFY_WRITE, to, n)))
+		return n;
+	if (__builtin_constant_p(n))
+		return __constant_copy_to_user(to, from, n);
+	else
+		return __copy_user(to, from, n);
+}
 
 /* We let the __ versions of copy_from/to_user inline, because they're often
  * used in fast paths and have only a small space overhead.
diff --git a/arch/frv/include/asm/uaccess.h b/arch/frv/include/asm/uaccess.h
index 3ac9a59..87d9e34 100644
--- a/arch/frv/include/asm/uaccess.h
+++ b/arch/frv/include/asm/uaccess.h
@@ -263,19 +263,25 @@
 extern long __memset_user(void *dst, unsigned long count);
 extern long __memcpy_user(void *dst, const void *src, unsigned long count);
 
-#define clear_user(dst,count)			__memset_user(____force(dst), (count))
+#define __clear_user(dst,count)			__memset_user(____force(dst), (count))
 #define __copy_from_user_inatomic(to, from, n)	__memcpy_user((to), ____force(from), (n))
 #define __copy_to_user_inatomic(to, from, n)	__memcpy_user(____force(to), (from), (n))
 
 #else
 
-#define clear_user(dst,count)			(memset(____force(dst), 0, (count)), 0)
+#define __clear_user(dst,count)			(memset(____force(dst), 0, (count)), 0)
 #define __copy_from_user_inatomic(to, from, n)	(memcpy((to), ____force(from), (n)), 0)
 #define __copy_to_user_inatomic(to, from, n)	(memcpy(____force(to), (from), (n)), 0)
 
 #endif
 
-#define __clear_user clear_user
+static inline unsigned long __must_check
+clear_user(void __user *to, unsigned long n)
+{
+	if (likely(__access_ok(to, n)))
+		n = __clear_user(to, n);
+	return n;
+}
 
 static inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
diff --git a/arch/h8300/include/asm/io.h b/arch/h8300/include/asm/io.h
index 2e221c5..f86918a 100644
--- a/arch/h8300/include/asm/io.h
+++ b/arch/h8300/include/asm/io.h
@@ -3,6 +3,8 @@
 
 #ifdef __KERNEL__
 
+#include <linux/types.h>
+
 /* H8/300 internal I/O functions */
 
 #define __raw_readb __raw_readb
diff --git a/arch/hexagon/include/asm/uaccess.h b/arch/hexagon/include/asm/uaccess.h
index f000a38..f61cfb2 100644
--- a/arch/hexagon/include/asm/uaccess.h
+++ b/arch/hexagon/include/asm/uaccess.h
@@ -103,7 +103,8 @@
 {
 	long res = __strnlen_user(src, n);
 
-	/* return from strnlen can't be zero -- that would be rubbish. */
+	if (unlikely(!res))
+		return -EFAULT;
 
 	if (res > n) {
 		copy_from_user(dst, src, n);
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 29bd597..c702642 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -56,7 +56,7 @@
 #define alloc_thread_stack_node(tsk, node)	((unsigned long *) 0)
 #define task_thread_info(tsk)	((struct thread_info *) 0)
 #endif
-#define free_thread_stack(ti)	/* nothing */
+#define free_thread_stack(tsk)	/* nothing */
 #define task_stack_page(tsk)	((void *)(tsk))
 
 #define __HAVE_THREAD_FUNCTIONS
diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h
index 465c709..bfe1319 100644
--- a/arch/ia64/include/asm/uaccess.h
+++ b/arch/ia64/include/asm/uaccess.h
@@ -241,8 +241,7 @@
 static inline unsigned long
 __copy_to_user (void __user *to, const void *from, unsigned long count)
 {
-	if (!__builtin_constant_p(count))
-		check_object_size(from, count, true);
+	check_object_size(from, count, true);
 
 	return __copy_user(to, (__force void __user *) from, count);
 }
@@ -250,8 +249,7 @@
 static inline unsigned long
 __copy_from_user (void *to, const void __user *from, unsigned long count)
 {
-	if (!__builtin_constant_p(count))
-		check_object_size(to, count, false);
+	check_object_size(to, count, false);
 
 	return __copy_user((__force void __user *) to, from, count);
 }
@@ -265,27 +263,22 @@
 	long __cu_len = (n);								\
 											\
 	if (__access_ok(__cu_to, __cu_len, get_fs())) {					\
-		if (!__builtin_constant_p(n))						\
-			check_object_size(__cu_from, __cu_len, true);			\
+		check_object_size(__cu_from, __cu_len, true);			\
 		__cu_len = __copy_user(__cu_to, (__force void __user *)  __cu_from, __cu_len);	\
 	}										\
 	__cu_len;									\
 })
 
-#define copy_from_user(to, from, n)							\
-({											\
-	void *__cu_to = (to);								\
-	const void __user *__cu_from = (from);						\
-	long __cu_len = (n);								\
-											\
-	__chk_user_ptr(__cu_from);							\
-	if (__access_ok(__cu_from, __cu_len, get_fs())) {				\
-		if (!__builtin_constant_p(n))						\
-			check_object_size(__cu_to, __cu_len, false);			\
-		__cu_len = __copy_user((__force void __user *) __cu_to, __cu_from, __cu_len);	\
-	}										\
-	__cu_len;									\
-})
+static inline unsigned long
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+	check_object_size(to, n, false);
+	if (likely(__access_ok(from, n, get_fs())))
+		n = __copy_user((__force void __user *) to, from, n);
+	else
+		memset(to, 0, n);
+	return n;
+}
 
 #define __copy_in_user(to, from, size)	__copy_user((to), (from), (size))
 
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 92b7bc9..9273e03 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -796,7 +796,7 @@
  *  ACPI based hotplug CPU support
  */
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
-static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
 	/*
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index eb9220c..9509cc7 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -986,7 +986,7 @@
 	int cpu = smp_processor_id();
 
 	previous_current = curr_task(cpu);
-	set_curr_task(cpu, current);
+	ia64_set_curr_task(cpu, current);
 	if ((p = strchr(current->comm, ' ')))
 		*p = '\0';
 
@@ -1360,14 +1360,14 @@
 				cpumask_clear_cpu(i, &mca_cpu);	/* wake next cpu */
 				while (monarch_cpu != -1)
 					cpu_relax();	/* spin until last cpu leaves */
-				set_curr_task(cpu, previous_current);
+				ia64_set_curr_task(cpu, previous_current);
 				ia64_mc_info.imi_rendez_checkin[cpu]
 						= IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
 				return;
 			}
 		}
 	}
-	set_curr_task(cpu, previous_current);
+	ia64_set_curr_task(cpu, previous_current);
 	ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
 	monarch_cpu = -1;	/* This frees the slaves and previous monarchs */
 }
@@ -1729,7 +1729,7 @@
 		NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1);
 
 		mprintk("Slave on cpu %d returning to normal service.\n", cpu);
-		set_curr_task(cpu, previous_current);
+		ia64_set_curr_task(cpu, previous_current);
 		ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
 		atomic_dec(&slaves);
 		return;
@@ -1756,7 +1756,7 @@
 
 	mprintk("\nINIT dump complete.  Monarch on cpu %d returning to normal service.\n", cpu);
 	atomic_dec(&monarchs);
-	set_curr_task(cpu, previous_current);
+	ia64_set_curr_task(cpu, previous_current);
 	monarch_cpu = -1;
 	return;
 }
@@ -1890,7 +1890,7 @@
 							      PAGE_KERNEL)));
 }
 
-static void ia64_mca_cmc_vector_adjust(void *dummy)
+static int ia64_mca_cpu_online(unsigned int cpu)
 {
 	unsigned long flags;
 
@@ -1898,25 +1898,9 @@
 	if (!cmc_polling_enabled)
 		ia64_mca_cmc_vector_enable(NULL);
 	local_irq_restore(flags);
+	return 0;
 }
 
-static int mca_cpu_callback(struct notifier_block *nfb,
-				      unsigned long action,
-				      void *hcpu)
-{
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		ia64_mca_cmc_vector_adjust(NULL);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block mca_cpu_notifier = {
-	.notifier_call = mca_cpu_callback
-};
-
 /*
  * ia64_mca_init
  *
@@ -2111,15 +2095,13 @@
 	if (!mca_init)
 		return 0;
 
-	register_hotcpu_notifier(&mca_cpu_notifier);
-
 	/* Setup the CMCI/P vector and handler */
 	setup_timer(&cmc_poll_timer, ia64_mca_cmc_poll, 0UL);
 
 	/* Unmask/enable the vector */
 	cmc_polling_enabled = 0;
-	schedule_work(&cmc_enable_work);
-
+	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/mca:online",
+			  ia64_mca_cpu_online, NULL);
 	IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__);
 
 #ifdef CONFIG_ACPI
diff --git a/arch/m32r/include/asm/uaccess.h b/arch/m32r/include/asm/uaccess.h
index cac7014..6f89821 100644
--- a/arch/m32r/include/asm/uaccess.h
+++ b/arch/m32r/include/asm/uaccess.h
@@ -219,7 +219,7 @@
 #define __get_user_nocheck(x, ptr, size)				\
 ({									\
 	long __gu_err = 0;						\
-	unsigned long __gu_val;						\
+	unsigned long __gu_val = 0;					\
 	might_fault();							\
 	__get_user_size(__gu_val, (ptr), (size), __gu_err);		\
 	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index ec9cc1f..ddb8192 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -396,7 +396,7 @@
 	mach_max_dma_address = 0xffffffff;
 
 	mach_reset           = amiga_reset;
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
 	mach_beep            = amiga_mksound;
 #endif
 
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index cbd5991..97a3c38 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -211,7 +211,7 @@
 	arch_gettimeoffset   = atari_gettimeoffset;
 	mach_reset           = atari_reset;
 	mach_max_dma_address = 0xffffff;
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
 	mach_beep          = atari_mksound;
 #endif
 #ifdef CONFIG_HEARTBEAT
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 8f5b6f7..55be7e3 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -566,6 +566,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -584,6 +586,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 31bded9..365dda66 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -525,6 +525,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -543,6 +545,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 0d7739e..ce3cbfd 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -546,6 +546,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -564,6 +566,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index 2cbb5c4..8db496a 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -517,6 +517,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -535,6 +537,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 96102a4..8314156 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -527,6 +527,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -545,6 +547,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 97d88f7..6600270 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -549,6 +549,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -567,6 +569,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index be25ef2..90abfe9 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -629,6 +629,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -647,6 +649,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index a008344..0d502c2 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -517,6 +517,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -535,6 +537,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index 6735a25..5930e91 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -517,6 +517,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -535,6 +537,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 780c6e9..74e3ad8 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -540,6 +540,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -558,6 +560,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 44693cf..4ba8606 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -518,6 +518,8 @@
 CONFIG_TEST_UDELAY=m
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -536,6 +538,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index ef0071d..c6f4972 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -519,6 +519,8 @@
 CONFIG_TEST_STATIC_KEYS=m
 CONFIG_EARLY_PRINTK=y
 CONFIG_CRYPTO_RSA=m
+CONFIG_CRYPTO_DH=m
+CONFIG_CRYPTO_ECDH=m
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_USER=m
 CONFIG_CRYPTO_CRYPTD=m
@@ -537,6 +539,7 @@
 CONFIG_CRYPTO_RMD256=m
 CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 5b8ec4d..50633c3 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -105,7 +105,7 @@
 #ifdef CONFIG_M68K_L2_CACHE
 void (*mach_l2_flush) (int);
 #endif
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
 void (*mach_beep)(unsigned int, unsigned int);
 EXPORT_SYMBOL(mach_beep);
 #endif
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 2dcee3a..58507ed 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -42,7 +42,7 @@
 #include <linux/personality.h>
 #include <linux/tty.h>
 #include <linux/binfmts.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/tracehook.h>
 
 #include <asm/setup.h>
@@ -213,7 +213,6 @@
 
 static inline void adjustformat(struct pt_regs *regs)
 {
-	((struct switch_stack *)regs - 1)->a5 = current->mm->start_data;
 	/*
 	 * set format byte to make stack appear modulo 4, which it will
 	 * be when doing the rte
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index 2f33a33..e468953 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -162,7 +162,7 @@
 	mach_halt = mac_poweroff;
 	mach_power_off = mac_poweroff;
 	mach_max_dma_address = 0xffffffff;
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
 	mach_beep = mac_mksound;
 #endif
 
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index fcb7f05..ea89a24 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -180,7 +180,7 @@
 	mach_reset = q40_reset;
 	mach_get_model = q40_get_model;
 
-#if defined(CONFIG_INPUT_M68K_BEEP) || defined(CONFIG_INPUT_M68K_BEEP_MODULE)
+#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
 	mach_beep = q40_mksound;
 #endif
 #ifdef CONFIG_HEARTBEAT
diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
index 8282cbc..273e612 100644
--- a/arch/metag/include/asm/uaccess.h
+++ b/arch/metag/include/asm/uaccess.h
@@ -204,8 +204,9 @@
 static inline unsigned long
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-	if (access_ok(VERIFY_READ, from, n))
+	if (likely(access_ok(VERIFY_READ, from, n)))
 		return __copy_user_zeroing(to, from, n);
+	memset(to, 0, n);
 	return n;
 }
 
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index 11fa51c..c0ec116 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -390,7 +390,6 @@
 
 	free_all_bootmem();
 	mem_init_print_info(NULL);
-	show_mem(0);
 }
 
 void free_initmem(void)
diff --git a/arch/microblaze/include/asm/uaccess.h b/arch/microblaze/include/asm/uaccess.h
index 331b0d3..8266767 100644
--- a/arch/microblaze/include/asm/uaccess.h
+++ b/arch/microblaze/include/asm/uaccess.h
@@ -227,7 +227,7 @@
 
 #define __get_user(x, ptr)						\
 ({									\
-	unsigned long __gu_val;						\
+	unsigned long __gu_val = 0;					\
 	/*unsigned long __gu_ptr = (unsigned long)(ptr);*/		\
 	long __gu_err;							\
 	switch (sizeof(*(ptr))) {					\
@@ -373,10 +373,13 @@
 static inline long copy_from_user(void *to,
 		const void __user *from, unsigned long n)
 {
+	unsigned long res = n;
 	might_fault();
-	if (access_ok(VERIFY_READ, from, n))
-		return __copy_from_user(to, from, n);
-	return n;
+	if (likely(access_ok(VERIFY_READ, from, n)))
+		res = __copy_from_user(to, from, n);
+	if (unlikely(res))
+		memset(to + (n - res), 0, res);
+	return res;
 }
 
 #define __copy_to_user(to, from, n)	\
diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c
index fc7b48a..d57563c 100644
--- a/arch/microblaze/kernel/ftrace.c
+++ b/arch/microblaze/kernel/ftrace.c
@@ -63,7 +63,7 @@
 		return;
 	}
 
-	err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
 	if (err == -EBUSY) {
 		*parent = old;
 		return;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2638856..212ff92 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -65,6 +65,7 @@
 	select ARCH_CLOCKSOURCE_DATA
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_EXIT_THREAD
+	select HAVE_REGS_AND_STACK_ACCESS_API
 
 menu "Machine selection"
 
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index f0e314c..7f975b2 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -113,42 +113,6 @@
 	help
 	  Add several files to the debugfs to test spinlock speed.
 
-if CPU_MIPSR6
-
-choice
-	prompt "Compact branch policy"
-	default MIPS_COMPACT_BRANCHES_OPTIMAL
-
-config MIPS_COMPACT_BRANCHES_NEVER
-	bool "Never (force delay slot branches)"
-	help
-	  Pass the -mcompact-branches=never flag to the compiler in order to
-	  force it to always emit branches with delay slots, and make no use
-	  of the compact branch instructions introduced by MIPSr6. This is
-	  useful if you suspect there may be an issue with compact branches in
-	  either the compiler or the CPU.
-
-config MIPS_COMPACT_BRANCHES_OPTIMAL
-	bool "Optimal (use where beneficial)"
-	help
-	  Pass the -mcompact-branches=optimal flag to the compiler in order for
-	  it to make use of compact branch instructions where it deems them
-	  beneficial, and use branches with delay slots elsewhere. This is the
-	  default compiler behaviour, and should be used unless you have a
-	  reason to choose otherwise.
-
-config MIPS_COMPACT_BRANCHES_ALWAYS
-	bool "Always (force compact branches)"
-	help
-	  Pass the -mcompact-branches=always flag to the compiler in order to
-	  force it to always emit compact branches, making no use of branch
-	  instructions with delay slots. This can result in more compact code
-	  which may be beneficial in some scenarios.
-
-endchoice
-
-endif # CPU_MIPSR6
-
 config SCACHE_DEBUGFS
 	bool "L2 cache debugfs entries"
 	depends on DEBUG_FS
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index efd7a9d..598ab29 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -203,10 +203,6 @@
 toolchain-virt				:= $(call cc-option-yn,$(mips-cflags) -mvirt)
 cflags-$(toolchain-virt)		+= -DTOOLCHAIN_SUPPORTS_VIRT
 
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_NEVER)	+= -mcompact-branches=never
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_OPTIMAL)	+= -mcompact-branches=optimal
-cflags-$(CONFIG_MIPS_COMPACT_BRANCHES_ALWAYS)	+= -mcompact-branches=always
-
 #
 # Firmware support
 #
diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c
index 2e73784..cc3a1e3 100644
--- a/arch/mips/ath79/clock.c
+++ b/arch/mips/ath79/clock.c
@@ -96,7 +96,7 @@
 	struct clk *clk;
 
 	clk = clk_register_fixed_factor(NULL, name, parent_name, 0, mult, div);
-	if (!clk)
+	if (IS_ERR(clk))
 		panic("failed to allocate %s clock structure", name);
 
 	return clk;
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 5a9b87b..c1eb1ff 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -1619,6 +1619,12 @@
 		return -ENOMEM;
 	}
 
+	/*
+	 * Clear the OF_POPULATED flag that was set by of_irq_init()
+	 * so that all GPIO devices will be probed.
+	 */
+	of_node_clear_flag(gpio_node, OF_POPULATED);
+
 	return 0;
 }
 /*
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index b31fbc9..37a932d 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -1059,7 +1059,7 @@
 {
 	return of_platform_bus_probe(NULL, octeon_ids, NULL);
 }
-device_initcall(octeon_publish_devices);
+arch_initcall(octeon_publish_devices);
 
 MODULE_AUTHOR("David Daney <ddaney@caviumnetworks.com>");
 MODULE_LICENSE("GPL");
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 4d457d60..256fe6f 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -380,29 +380,11 @@
 	return 0;
 }
 
-static int octeon_cpu_callback(struct notifier_block *nfb,
-	unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_UP_PREPARE:
-		octeon_update_boot_vector(cpu);
-		break;
-	case CPU_ONLINE:
-		pr_info("Cpu %d online\n", cpu);
-		break;
-	case CPU_DEAD:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
 static int register_cavium_notifier(void)
 {
-	hotcpu_notifier(octeon_cpu_callback, 0);
-	return 0;
+	return cpuhp_setup_state_nocalls(CPUHP_MIPS_SOC_PREPARE,
+					 "mips/cavium:prepare",
+					 octeon_update_boot_vector, NULL);
 }
 late_initcall(register_cavium_notifier);
 
diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S
index d7b9918..1910223 100644
--- a/arch/mips/dec/int-handler.S
+++ b/arch/mips/dec/int-handler.S
@@ -146,7 +146,25 @@
 		/*
 		 * Find irq with highest priority
 		 */
-		 PTR_LA	t1,cpu_mask_nr_tbl
+		# open coded PTR_LA t1, cpu_mask_nr_tbl
+#if (_MIPS_SZPTR == 32)
+		# open coded la t1, cpu_mask_nr_tbl
+		lui	t1, %hi(cpu_mask_nr_tbl)
+		addiu	t1, %lo(cpu_mask_nr_tbl)
+
+#endif
+#if (_MIPS_SZPTR == 64)
+		# open coded dla t1, cpu_mask_nr_tbl
+		.set	push
+		.set	noat
+		lui	t1, %highest(cpu_mask_nr_tbl)
+		lui	AT, %hi(cpu_mask_nr_tbl)
+		daddiu	t1, t1, %higher(cpu_mask_nr_tbl)
+		daddiu	AT, AT, %lo(cpu_mask_nr_tbl)
+		dsll	t1, 32
+		daddu	t1, t1, AT
+		.set	pop
+#endif
 1:		lw	t2,(t1)
 		nop
 		and	t2,t0
@@ -195,7 +213,25 @@
 		/*
 		 * Find irq with highest priority
 		 */
-		 PTR_LA	t1,asic_mask_nr_tbl
+		# open coded PTR_LA t1,asic_mask_nr_tbl
+#if (_MIPS_SZPTR == 32)
+		# open coded la t1, asic_mask_nr_tbl
+		lui	t1, %hi(asic_mask_nr_tbl)
+		addiu	t1, %lo(asic_mask_nr_tbl)
+
+#endif
+#if (_MIPS_SZPTR == 64)
+		# open coded dla t1, asic_mask_nr_tbl
+		.set	push
+		.set	noat
+		lui	t1, %highest(asic_mask_nr_tbl)
+		lui	AT, %hi(asic_mask_nr_tbl)
+		daddiu	t1, t1, %higher(asic_mask_nr_tbl)
+		daddiu	AT, AT, %lo(asic_mask_nr_tbl)
+		dsll	t1, 32
+		daddu	t1, t1, AT
+		.set	pop
+#endif
 2:		lw	t2,(t1)
 		nop
 		and	t2,t0
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 56584a6..83054f7 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -157,6 +157,7 @@
 	ldc1	$f28, THREAD_FPR28(\thread)
 	ldc1	$f30, THREAD_FPR30(\thread)
 	ctc1	\tmp, fcr31
+	.set	pop
 	.endm
 
 	.macro	fpu_restore_16odd thread
diff --git a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
index 0cf5ac1..8ff2cbdf 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/mangle-port.h
@@ -15,8 +15,8 @@
 static inline bool __should_swizzle_bits(volatile void *a)
 {
 	extern const bool octeon_should_swizzle_table[];
+	u64 did = ((u64)(uintptr_t)a >> 40) & 0xff;
 
-	unsigned long did = ((unsigned long)a >> 40) & 0xff;
 	return octeon_should_swizzle_table[did];
 }
 
@@ -29,7 +29,7 @@
 
 #define __should_swizzle_bits(a)	false
 
-static inline bool __should_swizzle_addr(unsigned long p)
+static inline bool __should_swizzle_addr(u64 p)
 {
 	/* boot bus? */
 	return ((p >> 40) & 0xff) == 0;
diff --git a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
index 2f82bfa..c9f5769 100644
--- a/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-paravirt/kernel-entry-init.h
@@ -11,11 +11,13 @@
 #define CP0_EBASE $15, 1
 
 	.macro  kernel_entry_setup
+#ifdef CONFIG_SMP
 	mfc0	t0, CP0_EBASE
 	andi	t0, t0, 0x3ff		# CPUNum
 	beqz	t0, 1f
 	# CPUs other than zero goto smp_bootstrap
 	j	smp_bootstrap
+#endif /* CONFIG_SMP */
 
 1:
 	.endm
diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h
index 58e7874..4fafeef 100644
--- a/arch/mips/include/asm/mips-cm.h
+++ b/arch/mips/include/asm/mips-cm.h
@@ -458,10 +458,21 @@
 static inline unsigned int mips_cm_max_vp_width(void)
 {
 	extern int smp_num_siblings;
+	uint32_t cfg;
 
 	if (mips_cm_revision() >= CM_REV_CM3)
 		return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW_MSK;
 
+	if (mips_cm_present()) {
+		/*
+		 * We presume that all cores in the system will have the same
+		 * number of VP(E)s, and if that ever changes then this will
+		 * need revisiting.
+		 */
+		cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE_MSK;
+		return (cfg >> CM_GCR_Cx_CONFIG_PVPE_SHF) + 1;
+	}
+
 	if (IS_ENABLED(CONFIG_SMP))
 		return smp_num_siblings;
 
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index def9d8d..7dd2dd47 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -660,8 +660,6 @@
 
 #define MIPS_CONF7_IAR		(_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR		(_ULCAST_(1) << 16)
-/* FTLB probability bits for R6 */
-#define MIPS_CONF7_FTLBP_SHIFT	(18)
 
 /* WatchLo* register definitions */
 #define MIPS_WATCHLO_IRW	(_ULCAST_(0x7) << 0)
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index ea0cd97..5f98759 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -164,7 +164,7 @@
  */
 static inline unsigned long ___pa(unsigned long x)
 {
-	if (config_enabled(CONFIG_64BIT)) {
+	if (IS_ENABLED(CONFIG_64BIT)) {
 		/*
 		 * For MIPS64 the virtual address may either be in one of
 		 * the compatibility segements ckseg0 or ckseg1, or it may
@@ -173,7 +173,7 @@
 		return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x);
 	}
 
-	if (!config_enabled(CONFIG_EVA)) {
+	if (!IS_ENABLED(CONFIG_EVA)) {
 		/*
 		 * We're using the standard MIPS32 legacy memory map, ie.
 		 * the address x is going to be in kseg0 or kseg1. We can
diff --git a/arch/mips/include/asm/uaccess.h b/arch/mips/include/asm/uaccess.h
index 11b965f..21a2aab 100644
--- a/arch/mips/include/asm/uaccess.h
+++ b/arch/mips/include/asm/uaccess.h
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/thread_info.h>
+#include <linux/string.h>
 #include <asm/asm-eva.h>
 
 /*
@@ -1170,6 +1171,8 @@
 			__cu_len = __invoke_copy_from_user(__cu_to,	\
 							   __cu_from,	\
 							   __cu_len);   \
+		} else {						\
+			memset(__cu_to, 0, __cu_len);			\
 		}							\
 	}								\
 	__cu_len;							\
diff --git a/arch/mips/include/asm/uprobes.h b/arch/mips/include/asm/uprobes.h
index 34c325c..70a4a2f 100644
--- a/arch/mips/include/asm/uprobes.h
+++ b/arch/mips/include/asm/uprobes.h
@@ -36,7 +36,6 @@
 	unsigned long	resume_epc;
 	u32	insn[2];
 	u32	ixol[2];
-	union	mips_instruction orig_inst[MAX_UINSN_BYTES / 4];
 };
 
 struct arch_uprobe_task {
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index a88d442..dd31754 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -352,7 +352,12 @@
 static int mips_ftlb_disabled;
 static int mips_has_ftlb_configured;
 
-static int set_ftlb_enable(struct cpuinfo_mips *c, int enable);
+enum ftlb_flags {
+	FTLB_EN		= 1 << 0,
+	FTLB_SET_PROB	= 1 << 1,
+};
+
+static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags);
 
 static int __init ftlb_disable(char *s)
 {
@@ -371,8 +376,6 @@
 		return 1;
 	}
 
-	back_to_back_c0_hazard();
-
 	config4 = read_c0_config4();
 
 	/* Check that FTLB has been disabled */
@@ -531,7 +534,7 @@
 		return 3;
 }
 
-static int set_ftlb_enable(struct cpuinfo_mips *c, int enable)
+static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags)
 {
 	unsigned int config;
 
@@ -542,33 +545,33 @@
 	case CPU_P6600:
 		/* proAptiv & related cores use Config6 to enable the FTLB */
 		config = read_c0_config6();
-		/* Clear the old probability value */
-		config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT);
-		if (enable)
-			/* Enable FTLB */
-			write_c0_config6(config |
-					 (calculate_ftlb_probability(c)
-					  << MIPS_CONF6_FTLBP_SHIFT)
-					 | MIPS_CONF6_FTLBEN);
+
+		if (flags & FTLB_EN)
+			config |= MIPS_CONF6_FTLBEN;
 		else
-			/* Disable FTLB */
-			write_c0_config6(config &  ~MIPS_CONF6_FTLBEN);
+			config &= ~MIPS_CONF6_FTLBEN;
+
+		if (flags & FTLB_SET_PROB) {
+			config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT);
+			config |= calculate_ftlb_probability(c)
+				  << MIPS_CONF6_FTLBP_SHIFT;
+		}
+
+		write_c0_config6(config);
+		back_to_back_c0_hazard();
 		break;
 	case CPU_I6400:
-		/* I6400 & related cores use Config7 to configure FTLB */
-		config = read_c0_config7();
-		/* Clear the old probability value */
-		config &= ~(3 << MIPS_CONF7_FTLBP_SHIFT);
-		write_c0_config7(config | (calculate_ftlb_probability(c)
-					   << MIPS_CONF7_FTLBP_SHIFT));
-		break;
+		/* There's no way to disable the FTLB */
+		if (!(flags & FTLB_EN))
+			return 1;
+		return 0;
 	case CPU_LOONGSON3:
 		/* Flush ITLB, DTLB, VTLB and FTLB */
 		write_c0_diag(LOONGSON_DIAG_ITLB | LOONGSON_DIAG_DTLB |
 			      LOONGSON_DIAG_VTLB | LOONGSON_DIAG_FTLB);
 		/* Loongson-3 cores use Config6 to enable the FTLB */
 		config = read_c0_config6();
-		if (enable)
+		if (flags & FTLB_EN)
 			/* Enable FTLB */
 			write_c0_config6(config & ~MIPS_CONF6_FTLBDIS);
 		else
@@ -788,6 +791,7 @@
 				       PAGE_SIZE, config4);
 				/* Switch FTLB off */
 				set_ftlb_enable(c, 0);
+				mips_ftlb_disabled = 1;
 				break;
 			}
 			c->tlbsizeftlbsets = 1 <<
@@ -852,7 +856,7 @@
 	c->scache.flags = MIPS_CACHE_NOT_PRESENT;
 
 	/* Enable FTLB if present and not disabled */
-	set_ftlb_enable(c, !mips_ftlb_disabled);
+	set_ftlb_enable(c, mips_ftlb_disabled ? 0 : FTLB_EN);
 
 	ok = decode_config0(c);			/* Read Config registers.  */
 	BUG_ON(!ok);				/* Arch spec violation!	 */
@@ -902,6 +906,9 @@
 		}
 	}
 
+	/* configure the FTLB write probability */
+	set_ftlb_enable(c, (mips_ftlb_disabled ? 0 : FTLB_EN) | FTLB_SET_PROB);
+
 	mips_probe_watch_registers(c);
 
 #ifndef CONFIG_MIPS_CPS
diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c
index 937c54b..30a3b75 100644
--- a/arch/mips/kernel/ftrace.c
+++ b/arch/mips/kernel/ftrace.c
@@ -382,8 +382,8 @@
 	if (unlikely(faulted))
 		goto out;
 
-	if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
-	    == -EBUSY) {
+	if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
+				     NULL) == -EBUSY) {
 		*parent_ra_addr = old_parent_ra;
 		return;
 	}
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 17326a9..dc0b296 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -142,9 +142,8 @@
 	PTR_LA	k1, __r4k_wait
 	ori	k0, 0x1f	/* 32 byte rollback region */
 	xori	k0, 0x1f
-	bne	k0, k1, 9f
+	bne	k0, k1, \handler
 	MTC0	k0, CP0_EPC
-9:
 	.set pop
 	.endm
 
diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c
index c3372ca..0a7e10b 100644
--- a/arch/mips/kernel/mips-r2-to-r6-emul.c
+++ b/arch/mips/kernel/mips-r2-to-r6-emul.c
@@ -1164,7 +1164,9 @@
 		regs->regs[31] = r31;
 		regs->cp0_epc = epc;
 		if (!used_math()) {     /* First time FPU user.  */
+			preempt_disable();
 			err = init_fpu();
+			preempt_enable();
 			set_used_math();
 		}
 		lose_fpu(1);    /* Save FPU state for the emulator. */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 7429ad0..d2d0615 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -605,14 +605,14 @@
 		return -EOPNOTSUPP;
 
 	/* Avoid inadvertently triggering emulation */
-	if ((value & PR_FP_MODE_FR) && cpu_has_fpu &&
-	    !(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+	if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu &&
+	    !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64))
 		return -EOPNOTSUPP;
-	if ((value & PR_FP_MODE_FRE) && cpu_has_fpu && !cpu_has_fre)
+	if ((value & PR_FP_MODE_FRE) && raw_cpu_has_fpu && !cpu_has_fre)
 		return -EOPNOTSUPP;
 
 	/* FR = 0 not supported in MIPS R6 */
-	if (!(value & PR_FP_MODE_FR) && cpu_has_fpu && cpu_has_mips_r6)
+	if (!(value & PR_FP_MODE_FR) && raw_cpu_has_fpu && cpu_has_mips_r6)
 		return -EOPNOTSUPP;
 
 	/* Proceed with the mode switch */
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 36cf8d6..0d57909 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -87,6 +87,13 @@
 	int x = boot_mem_map.nr_map;
 	int i;
 
+	/*
+	 * If the region reaches the top of the physical address space, adjust
+	 * the size slightly so that (start + size) doesn't overflow
+	 */
+	if (start + size - 1 == (phys_addr_t)ULLONG_MAX)
+		--size;
+
 	/* Sanity check */
 	if (start + size < start) {
 		pr_warn("Trying to add an invalid memory region, skipped\n");
@@ -757,7 +764,6 @@
 	device_tree_init();
 	sparse_init();
 	plat_swiotlb_setup();
-	paging_init();
 
 	dma_contiguous_reserve(PFN_PHYS(max_low_pfn));
 	/* Tell bootmem about cma reserved memblock section */
@@ -870,6 +876,7 @@
 	prefill_possible_map();
 
 	cpu_cache_init();
+	paging_init();
 }
 
 unsigned long kernelsp[NR_CPUS];
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index e9d9fc6..6183ad8 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -513,7 +513,7 @@
 		 * in which case the CPC will refuse to power down the core.
 		 */
 		do {
-			mips_cm_lock_other(core, vpe_id);
+			mips_cm_lock_other(core, 0);
 			mips_cpc_lock_other(core);
 			stat = read_cpc_co_stat_conf();
 			stat &= CPC_Cx_STAT_CONF_SEQSTATE_MSK;
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index f95f094..b0baf48 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -322,6 +322,9 @@
 	cpumask_set_cpu(cpu, &cpu_coherent_mask);
 	notify_cpu_starting(cpu);
 
+	cpumask_set_cpu(cpu, &cpu_callin_map);
+	synchronise_count_slave(cpu);
+
 	set_cpu_online(cpu, true);
 
 	set_cpu_sibling_map(cpu);
@@ -329,10 +332,6 @@
 
 	calculate_cpu_foreign_map();
 
-	cpumask_set_cpu(cpu, &cpu_callin_map);
-
-	synchronise_count_slave(cpu);
-
 	/*
 	 * irq will be enabled in ->smp_finish(), enabling it too early
 	 * is dangerous.
diff --git a/arch/mips/kernel/uprobes.c b/arch/mips/kernel/uprobes.c
index 8452d93..4c7c155 100644
--- a/arch/mips/kernel/uprobes.c
+++ b/arch/mips/kernel/uprobes.c
@@ -157,7 +157,6 @@
 int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
-	union mips_instruction insn;
 
 	/*
 	 * Now find the EPC where to resume after the breakpoint has been
@@ -168,10 +167,10 @@
 		unsigned long epc;
 
 		epc = regs->cp0_epc;
-		__compute_return_epc_for_insn(regs, insn);
+		__compute_return_epc_for_insn(regs,
+			(union mips_instruction) aup->insn[0]);
 		aup->resume_epc = regs->cp0_epc;
 	}
-
 	utask->autask.saved_trap_nr = current->thread.trap_nr;
 	current->thread.trap_nr = UPROBE_TRAP_NR;
 	regs->cp0_epc = current->utask->xol_vaddr;
@@ -222,7 +221,7 @@
 		return NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_BREAK:
+	case DIE_UPROBE:
 		if (uprobe_pre_sstep_notifier(regs))
 			return NOTIFY_STOP;
 		break;
@@ -257,7 +256,7 @@
 	ra = regs->regs[31];
 
 	/* Replace the return address with the trampoline address */
-	regs->regs[31] = ra;
+	regs->regs[31] = trampoline_vaddr;
 
 	return ra;
 }
@@ -280,24 +279,6 @@
 	return uprobe_write_opcode(mm, vaddr, UPROBE_SWBP_INSN);
 }
 
-/**
- * set_orig_insn - Restore the original instruction.
- * @mm: the probed process address space.
- * @auprobe: arch specific probepoint information.
- * @vaddr: the virtual address to insert the opcode.
- *
- * For mm @mm, restore the original opcode (opcode) at @vaddr.
- * Return 0 (success) or a negative errno.
- *
- * This overrides the weak version in kernel/events/uprobes.c.
- */
-int set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
-		 unsigned long vaddr)
-{
-	return uprobe_write_opcode(mm, vaddr,
-			*(uprobe_opcode_t *)&auprobe->orig_inst[0].word);
-}
-
 void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 				  void *src, unsigned long len)
 {
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 9abe447..f9dbfb1 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -39,16 +39,16 @@
 static void __init init_vdso_image(struct mips_vdso_image *image)
 {
 	unsigned long num_pages, i;
+	unsigned long data_pfn;
 
 	BUG_ON(!PAGE_ALIGNED(image->data));
 	BUG_ON(!PAGE_ALIGNED(image->size));
 
 	num_pages = image->size / PAGE_SIZE;
 
-	for (i = 0; i < num_pages; i++) {
-		image->mapping.pages[i] =
-			virt_to_page(image->data + (i * PAGE_SIZE));
-	}
+	data_pfn = __phys_to_pfn(__pa_symbol(image->data));
+	for (i = 0; i < num_pages; i++)
+		image->mapping.pages[i] = pfn_to_page(data_pfn + i);
 }
 
 static int __init init_vdso(void)
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 6eb52b9..e788515 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1642,8 +1642,14 @@
 
 	preempt_disable();
 	if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) {
-		if (kvm_mips_host_tlb_lookup(vcpu, va) < 0)
-			kvm_mips_handle_kseg0_tlb_fault(va, vcpu);
+		if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 &&
+		    kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) {
+			kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n",
+				__func__, va, vcpu, read_c0_entryhi());
+			er = EMULATE_FAIL;
+			preempt_enable();
+			goto done;
+		}
 	} else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) ||
 		   KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) {
 		int index;
@@ -1680,12 +1686,18 @@
 								run, vcpu);
 				preempt_enable();
 				goto dont_update_pc;
-			} else {
-				/*
-				 * We fault an entry from the guest tlb to the
-				 * shadow host TLB
-				 */
-				kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
+			}
+			/*
+			 * We fault an entry from the guest tlb to the
+			 * shadow host TLB
+			 */
+			if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
+				kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
+					__func__, va, index, vcpu,
+					read_c0_entryhi());
+				er = EMULATE_FAIL;
+				preempt_enable();
+				goto done;
 			}
 		}
 	} else {
@@ -2659,7 +2671,12 @@
 			 * OK we have a Guest TLB entry, now inject it into the
 			 * shadow host TLB
 			 */
-			kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb);
+			if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) {
+				kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n",
+					__func__, va, index, vcpu,
+					read_c0_entryhi());
+				er = EMULATE_FAIL;
+			}
 		}
 	}
 
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 57319ee5..121008c 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -40,7 +40,7 @@
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	pfn = gfn_to_pfn(kvm, gfn);
 
-	if (is_error_pfn(pfn)) {
+	if (is_error_noslot_pfn(pfn)) {
 		kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn);
 		err = -EFAULT;
 		goto out;
@@ -99,7 +99,7 @@
 	}
 
 	gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT);
-	if (gfn >= kvm->arch.guest_pmap_npages) {
+	if ((gfn | 1) >= kvm->arch.guest_pmap_npages) {
 		kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__,
 			gfn, badvaddr);
 		kvm_mips_dump_host_tlbs();
@@ -138,35 +138,49 @@
 	unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0;
 	struct kvm *kvm = vcpu->kvm;
 	kvm_pfn_t pfn0, pfn1;
+	gfn_t gfn0, gfn1;
+	long tlb_lo[2];
 	int ret;
 
-	if ((tlb->tlb_hi & VPN2_MASK) == 0) {
-		pfn0 = 0;
-		pfn1 = 0;
-	} else {
-		if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[0])
-					   >> PAGE_SHIFT) < 0)
-			return -1;
+	tlb_lo[0] = tlb->tlb_lo[0];
+	tlb_lo[1] = tlb->tlb_lo[1];
 
-		if (kvm_mips_map_page(kvm, mips3_tlbpfn_to_paddr(tlb->tlb_lo[1])
-					   >> PAGE_SHIFT) < 0)
-			return -1;
+	/*
+	 * The commpage address must not be mapped to anything else if the guest
+	 * TLB contains entries nearby, or commpage accesses will break.
+	 */
+	if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) &
+			VPN2_MASK & (PAGE_MASK << 1)))
+		tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0;
 
-		pfn0 = kvm->arch.guest_pmap[
-			mips3_tlbpfn_to_paddr(tlb->tlb_lo[0]) >> PAGE_SHIFT];
-		pfn1 = kvm->arch.guest_pmap[
-			mips3_tlbpfn_to_paddr(tlb->tlb_lo[1]) >> PAGE_SHIFT];
+	gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT;
+	gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT;
+	if (gfn0 >= kvm->arch.guest_pmap_npages ||
+	    gfn1 >= kvm->arch.guest_pmap_npages) {
+		kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n",
+			__func__, gfn0, gfn1, tlb->tlb_hi);
+		kvm_mips_dump_guest_tlbs(vcpu);
+		return -1;
 	}
 
+	if (kvm_mips_map_page(kvm, gfn0) < 0)
+		return -1;
+
+	if (kvm_mips_map_page(kvm, gfn1) < 0)
+		return -1;
+
+	pfn0 = kvm->arch.guest_pmap[gfn0];
+	pfn1 = kvm->arch.guest_pmap[gfn1];
+
 	/* Get attributes from the Guest TLB */
 	entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) |
 		((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
-		(tlb->tlb_lo[0] & ENTRYLO_D) |
-		(tlb->tlb_lo[0] & ENTRYLO_V);
+		(tlb_lo[0] & ENTRYLO_D) |
+		(tlb_lo[0] & ENTRYLO_V);
 	entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) |
 		((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) |
-		(tlb->tlb_lo[1] & ENTRYLO_D) |
-		(tlb->tlb_lo[1] & ENTRYLO_V);
+		(tlb_lo[1] & ENTRYLO_D) |
+		(tlb_lo[1] & ENTRYLO_V);
 
 	kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc,
 		  tlb->tlb_lo[0], tlb->tlb_lo[1]);
@@ -354,9 +368,15 @@
 				local_irq_restore(flags);
 				return KVM_INVALID_INST;
 			}
-			kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
-							     &vcpu->arch.
-							     guest_tlb[index]);
+			if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu,
+						&vcpu->arch.guest_tlb[index])) {
+				kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n",
+					__func__, opc, index, vcpu,
+					read_c0_entryhi());
+				kvm_mips_dump_guest_tlbs(vcpu);
+				local_irq_restore(flags);
+				return KVM_INVALID_INST;
+			}
 			inst = *(opc);
 		}
 		local_irq_restore(flags);
diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c
index 2fec6f7..99aab9f 100644
--- a/arch/mips/loongson64/loongson-3/smp.c
+++ b/arch/mips/loongson64/loongson-3/smp.c
@@ -677,7 +677,7 @@
 	play_dead_at_ckseg1(state_addr);
 }
 
-void loongson3_disable_clock(int cpu)
+static int loongson3_disable_clock(unsigned int cpu)
 {
 	uint64_t core_id = cpu_data[cpu].core;
 	uint64_t package_id = cpu_data[cpu].package;
@@ -688,9 +688,10 @@
 		if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
 			LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
 	}
+	return 0;
 }
 
-void loongson3_enable_clock(int cpu)
+static int loongson3_enable_clock(unsigned int cpu)
 {
 	uint64_t core_id = cpu_data[cpu].core;
 	uint64_t package_id = cpu_data[cpu].package;
@@ -701,34 +702,15 @@
 		if (!(loongson_sysconf.workarounds & WORKAROUND_CPUHOTPLUG))
 			LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
 	}
-}
-
-#define CPU_POST_DEAD_FROZEN	(CPU_POST_DEAD | CPU_TASKS_FROZEN)
-static int loongson3_cpu_callback(struct notifier_block *nfb,
-	unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-
-	switch (action) {
-	case CPU_POST_DEAD:
-	case CPU_POST_DEAD_FROZEN:
-		pr_info("Disable clock for CPU#%d\n", cpu);
-		loongson3_disable_clock(cpu);
-		break;
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		pr_info("Enable clock for CPU#%d\n", cpu);
-		loongson3_enable_clock(cpu);
-		break;
-	}
-
-	return NOTIFY_OK;
+	return 0;
 }
 
 static int register_loongson3_notifier(void)
 {
-	hotcpu_notifier(loongson3_cpu_callback, 0);
-	return 0;
+	return cpuhp_setup_state_nocalls(CPUHP_MIPS_SOC_PREPARE,
+					 "mips/loongson:prepare",
+					 loongson3_enable_clock,
+					 loongson3_disable_clock);
 }
 early_initcall(register_loongson3_notifier);
 
diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c
index 72a4642..4a094f7 100644
--- a/arch/mips/math-emu/dsemul.c
+++ b/arch/mips/math-emu/dsemul.c
@@ -298,5 +298,6 @@
 	/* Set EPC to return to post-branch instruction */
 	xcp->cp0_epc = current->thread.bd_emu_cont_pc;
 	pr_debug("dsemulret to 0x%08lx\n", xcp->cp0_epc);
+	MIPS_FPU_EMU_INC_STATS(ds_emul);
 	return true;
 }
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index cd72805..fa7d8d3 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -800,7 +800,7 @@
 		 * If address-based cache ops don't require an SMP call, then
 		 * use them exclusively for small flushes.
 		 */
-		size = start - end;
+		size = end - start;
 		cache_size = icache_size;
 		if (!cpu_has_ic_fills_f_dc) {
 			size *= 2;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index a5509e7..72f7478 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -261,7 +261,6 @@
 {
 	struct maar_config cfg[BOOT_MEM_MAP_MAX];
 	unsigned i, num_configured, num_cfg = 0;
-	phys_addr_t skip;
 
 	for (i = 0; i < boot_mem_map.nr_map; i++) {
 		switch (boot_mem_map.map[i].type) {
@@ -272,14 +271,14 @@
 			continue;
 		}
 
-		skip = 0x10000 - (boot_mem_map.map[i].addr & 0xffff);
-
+		/* Round lower up */
 		cfg[num_cfg].lower = boot_mem_map.map[i].addr;
-		cfg[num_cfg].lower += skip;
+		cfg[num_cfg].lower = (cfg[num_cfg].lower + 0xffff) & ~0xffff;
 
-		cfg[num_cfg].upper = cfg[num_cfg].lower;
-		cfg[num_cfg].upper += boot_mem_map.map[i].size - 1;
-		cfg[num_cfg].upper -= skip;
+		/* Round upper down */
+		cfg[num_cfg].upper = boot_mem_map.map[i].addr +
+					boot_mem_map.map[i].size;
+		cfg[num_cfg].upper = (cfg[num_cfg].upper & ~0xffff) - 1;
 
 		cfg[num_cfg].attrs = MIPS_MAAR_S;
 		num_cfg++;
@@ -441,6 +440,9 @@
 #ifdef CONFIG_HIGHMEM
 	unsigned long tmp;
 
+	if (cpu_has_dc_aliases)
+		return;
+
 	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
 		struct page *page = pfn_to_page(tmp);
 
diff --git a/arch/mips/mti-malta/malta-setup.c b/arch/mips/mti-malta/malta-setup.c
index ec5b216..7e7364b 100644
--- a/arch/mips/mti-malta/malta-setup.c
+++ b/arch/mips/mti-malta/malta-setup.c
@@ -39,6 +39,9 @@
 #include <linux/console.h>
 #endif
 
+#define ROCIT_CONFIG_GEN0		0x1f403000
+#define  ROCIT_CONFIG_GEN0_PCI_IOCU	BIT(7)
+
 extern void malta_be_init(void);
 extern int malta_be_handler(struct pt_regs *regs, int is_fixup);
 
@@ -107,6 +110,8 @@
 static int __init plat_enable_iocoherency(void)
 {
 	int supported = 0;
+	u32 cfg;
+
 	if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) {
 		if (BONITO_PCICACHECTRL & BONITO_PCICACHECTRL_CPUCOH_PRES) {
 			BONITO_PCICACHECTRL |= BONITO_PCICACHECTRL_CPUCOH_EN;
@@ -129,7 +134,8 @@
 	} else if (mips_cm_numiocu() != 0) {
 		/* Nothing special needs to be done to enable coherency */
 		pr_info("CMP IOCU detected\n");
-		if ((*(unsigned int *)0xbf403000 & 0x81) != 0x81) {
+		cfg = __raw_readl((u32 *)CKSEG1ADDR(ROCIT_CONFIG_GEN0));
+		if (!(cfg & ROCIT_CONFIG_GEN0_PCI_IOCU)) {
 			pr_crit("IOCU OPERATION DISABLED BY SWITCH - DEFAULTING TO SW IO COHERENCY\n");
 			return 0;
 		}
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index 20f7bf6..d012e87 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -166,6 +166,7 @@
 		"2:\n"						\
 		"	.section	.fixup,\"ax\"\n"	\
 		"3:\n\t"					\
+		"	mov		0,%1\n"			\
 		"	mov		%3,%0\n"		\
 		"	jmp		2b\n"			\
 		"	.previous\n"				\
diff --git a/arch/mn10300/lib/usercopy.c b/arch/mn10300/lib/usercopy.c
index 7826e6c..ce8899e 100644
--- a/arch/mn10300/lib/usercopy.c
+++ b/arch/mn10300/lib/usercopy.c
@@ -9,7 +9,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
  */
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 unsigned long
 __generic_copy_to_user(void *to, const void *from, unsigned long n)
@@ -24,6 +24,8 @@
 {
 	if (access_ok(VERIFY_READ, from, n))
 		__copy_user_zeroing(to, from, n);
+	else
+		memset(to, 0, n);
 	return n;
 }
 
diff --git a/arch/nios2/boot/dts/10m50_devboard.dts b/arch/nios2/boot/dts/10m50_devboard.dts
index 3e411c6..f362b22 100755
--- a/arch/nios2/boot/dts/10m50_devboard.dts
+++ b/arch/nios2/boot/dts/10m50_devboard.dts
@@ -83,6 +83,7 @@
 			fifo-size = <32>;
 			reg-io-width = <4>;
 			reg-shift = <2>;
+			tx-threshold = <16>;
 		};
 
 		sysid: sysid@18001528 {
diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index caa51ff..0ab8232 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h
@@ -102,9 +102,12 @@
 static inline long copy_from_user(void *to, const void __user *from,
 				unsigned long n)
 {
-	if (!access_ok(VERIFY_READ, from, n))
-		return n;
-	return __copy_from_user(to, from, n);
+	unsigned long res = n;
+	if (access_ok(VERIFY_READ, from, n))
+		res = __copy_from_user(to, from, n);
+	if (unlikely(res))
+		memset(to + (n - res), 0, res);
+	return res;
 }
 
 static inline long copy_to_user(void __user *to, const void *from,
@@ -139,7 +142,7 @@
 
 #define __get_user_unknown(val, size, ptr, err) do {			\
 	err = 0;							\
-	if (copy_from_user(&(val), ptr, size)) {			\
+	if (__copy_from_user(&(val), ptr, size)) {			\
 		err = -EFAULT;						\
 	}								\
 	} while (0)
@@ -166,7 +169,7 @@
 	({								\
 	long __gu_err = -EFAULT;					\
 	const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);		\
-	unsigned long __gu_val;						\
+	unsigned long __gu_val = 0;					\
 	__get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\
 	(x) = (__force __typeof__(x))__gu_val;				\
 	__gu_err;							\
diff --git a/arch/openrisc/include/asm/uaccess.h b/arch/openrisc/include/asm/uaccess.h
index a6bd07c..5cc6b4f 100644
--- a/arch/openrisc/include/asm/uaccess.h
+++ b/arch/openrisc/include/asm/uaccess.h
@@ -273,28 +273,20 @@
 static inline unsigned long
 copy_from_user(void *to, const void *from, unsigned long n)
 {
-	unsigned long over;
+	unsigned long res = n;
 
-	if (access_ok(VERIFY_READ, from, n))
-		return __copy_tofrom_user(to, from, n);
-	if ((unsigned long)from < TASK_SIZE) {
-		over = (unsigned long)from + n - TASK_SIZE;
-		return __copy_tofrom_user(to, from, n - over) + over;
-	}
-	return n;
+	if (likely(access_ok(VERIFY_READ, from, n)))
+		res = __copy_tofrom_user(to, from, n);
+	if (unlikely(res))
+		memset(to + (n - res), 0, res);
+	return res;
 }
 
 static inline unsigned long
 copy_to_user(void *to, const void *from, unsigned long n)
 {
-	unsigned long over;
-
-	if (access_ok(VERIFY_WRITE, to, n))
-		return __copy_tofrom_user(to, from, n);
-	if ((unsigned long)to < TASK_SIZE) {
-		over = (unsigned long)to + n - TASK_SIZE;
-		return __copy_tofrom_user(to, from, n - over) + over;
-	}
+	if (likely(access_ok(VERIFY_WRITE, to, n)))
+		n = __copy_tofrom_user(to, from, n);
 	return n;
 }
 
@@ -303,13 +295,8 @@
 static inline __must_check unsigned long
 clear_user(void *addr, unsigned long size)
 {
-
-	if (access_ok(VERIFY_WRITE, addr, size))
-		return __clear_user(addr, size);
-	if ((unsigned long)addr < TASK_SIZE) {
-		unsigned long over = (unsigned long)addr + size - TASK_SIZE;
-		return __clear_user(addr, size - over) + over;
-	}
+	if (likely(access_ok(VERIFY_WRITE, addr, size)))
+		size = __clear_user(addr, size);
 	return size;
 }
 
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index cd87781..af12c2d 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -1,6 +1,5 @@
 config PARISC
 	def_bool y
-	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select HAVE_IDE
 	select HAVE_OPROFILE
diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig
index 1a8f6f95..f6a4c01 100644
--- a/arch/parisc/configs/c8000_defconfig
+++ b/arch/parisc/configs/c8000_defconfig
@@ -245,7 +245,6 @@
 CONFIG_PROVE_RCU_DELAY=y
 CONFIG_DEBUG_BLOCK_EXT_DEVT=y
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_KEYS=y
 # CONFIG_CRYPTO_HW is not set
 CONFIG_FONTS=y
diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig
index 7e07926..c564e6e 100644
--- a/arch/parisc/configs/generic-64bit_defconfig
+++ b/arch/parisc/configs/generic-64bit_defconfig
@@ -291,7 +291,6 @@
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_TIMER_STATS=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_ECB=m
 CONFIG_CRYPTO_PCBC=m
diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h
index 0f59fd9..4828478 100644
--- a/arch/parisc/include/asm/uaccess.h
+++ b/arch/parisc/include/asm/uaccess.h
@@ -10,6 +10,7 @@
 #include <asm-generic/uaccess-unaligned.h>
 
 #include <linux/bug.h>
+#include <linux/string.h>
 
 #define VERIFY_READ 0
 #define VERIFY_WRITE 1
@@ -208,26 +209,30 @@
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 
-extern void copy_from_user_overflow(void)
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-        __compiletime_error("copy_from_user() buffer size is not provably correct")
-#else
-        __compiletime_warning("copy_from_user() buffer size is not provably correct")
-#endif
-;
+extern void __compiletime_error("usercopy buffer size is too small")
+__bad_copy_user(void);
+
+static inline void copy_user_overflow(int size, unsigned long count)
+{
+	WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
 
 static inline unsigned long __must_check copy_from_user(void *to,
                                           const void __user *from,
                                           unsigned long n)
 {
         int sz = __compiletime_object_size(to);
-        int ret = -EFAULT;
+        unsigned long ret = n;
 
-        if (likely(sz == -1 || !__builtin_constant_p(n) || sz >= n))
+        if (likely(sz == -1 || sz >= n))
                 ret = __copy_from_user(to, from, n);
-        else
-                copy_from_user_overflow();
+        else if (!__builtin_constant_p(n))
+		copy_user_overflow(sz, n);
+	else
+                __bad_copy_user();
 
+	if (unlikely(ret))
+		memset(to + (n - ret), 0, ret);
         return ret;
 }
 
diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h
index c0ae625..274d5bc 100644
--- a/arch/parisc/include/uapi/asm/errno.h
+++ b/arch/parisc/include/uapi/asm/errno.h
@@ -97,10 +97,10 @@
 #define	ENOTCONN	235	/* Transport endpoint is not connected */
 #define	ESHUTDOWN	236	/* Cannot send after transport endpoint shutdown */
 #define	ETOOMANYREFS	237	/* Too many references: cannot splice */
-#define EREFUSED	ECONNREFUSED	/* for HP's NFS apparently */
 #define	ETIMEDOUT	238	/* Connection timed out */
 #define	ECONNREFUSED	239	/* Connection refused */
-#define EREMOTERELEASE	240	/* Remote peer released connection */
+#define	EREFUSED	ECONNREFUSED	/* for HP's NFS apparently */
+#define	EREMOTERELEASE	240	/* Remote peer released connection */
 #define	EHOSTDOWN	241	/* Host is down */
 #define	EHOSTUNREACH	242	/* No route to host */
 
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index a828a0a..5a5506a 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -48,7 +48,7 @@
 		return;
 
         if (ftrace_push_return_trace(old, self_addr, &trace.depth,
-			0 ) == -EBUSY)
+				     0, NULL) == -EBUSY)
                 return;
 
 	/* activate parisc_return_to_handler() as return point */
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 5adc339..0c2a94a 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -51,8 +51,6 @@
 
 DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data);
 
-extern int update_cr16_clocksource(void);	/* from time.c */
-
 /*
 **  	PARISC CPU driver - claim "device" and initialize CPU data structures.
 **
@@ -228,12 +226,6 @@
 	}
 #endif
 
-	/* If we've registered more than one cpu,
-	 * we'll use the jiffies clocksource since cr16
-	 * is not synchronized between CPUs.
-	 */
-	update_cr16_clocksource();
-
 	return 0;
 }
 
diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c
index 505cf1a..4b0b963 100644
--- a/arch/parisc/kernel/time.c
+++ b/arch/parisc/kernel/time.c
@@ -221,18 +221,6 @@
 	.flags			= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-int update_cr16_clocksource(void)
-{
-	/* since the cr16 cycle counters are not synchronized across CPUs,
-	   we'll check if we should switch to a safe clocksource: */
-	if (clocksource_cr16.rating != 0 && num_online_cpus() > 1) {
-		clocksource_change_rating(&clocksource_cr16, 0);
-		return 1;
-	}
-
-	return 0;
-}
-
 void __init start_cpu_itimer(void)
 {
 	unsigned int cpu = smp_processor_id();
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index ca25454..1934707 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -66,29 +66,28 @@
 UTS_MACHINE := $(OLDARCH)
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC	+= -mlittle-endian
-ifneq ($(cc-name),clang)
-override CC	+= -mno-strict-align
-endif
-override AS	+= -mlittle-endian
 override LD	+= -EL
-override CROSS32CC += -mlittle-endian
 override CROSS32AS += -mlittle-endian
 LDEMULATION	:= lppc
 GNUTARGET	:= powerpcle
 MULTIPLEWORD	:= -mno-multiple
 KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
 else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC	+= -mbig-endian
-override AS	+= -mbig-endian
-endif
 override LD	+= -EB
 LDEMULATION	:= ppc
 GNUTARGET	:= powerpc
 MULTIPLEWORD	:= -mmultiple
 endif
 
+cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+ifneq ($(cc-name),clang)
+  cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mno-strict-align
+endif
+
+aflags-$(CONFIG_CPU_BIG_ENDIAN)		+= $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= -mlittle-endian
+
 ifeq ($(HAS_BIARCH),y)
 override AS	+= -a$(CONFIG_WORD_SIZE)
 override LD	+= -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
@@ -232,6 +231,9 @@
 KBUILD_AFLAGS += $(cpu-as-y)
 KBUILD_CFLAGS += $(cpu-as-y)
 
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
+
 head-y				:= arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
 head-$(CONFIG_8xx)		:= arch/powerpc/kernel/head_8xx.o
 head-$(CONFIG_40x)		:= arch/powerpc/kernel/head_40x.o
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index bfe3d37..9fa046d 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/cpufeature.h>
 #include <asm/switch_to.h>
 
 #define CHKSUM_BLOCK_SIZE	1
@@ -157,7 +158,7 @@
 	crypto_unregister_shash(&alg);
 }
 
-module_init(crc32c_vpmsum_mod_init);
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_init);
 module_exit(crc32c_vpmsum_mod_fini);
 
 MODULE_AUTHOR("Anton Blanchard <anton@samba.org>");
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
index 2ef55f8..b312b15 100644
--- a/arch/powerpc/include/asm/cpu_has_feature.h
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -15,7 +15,7 @@
 #ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
 #include <linux/jump_label.h>
 
-#define NUM_CPU_FTR_KEYS	64
+#define NUM_CPU_FTR_KEYS	BITS_PER_LONG
 
 extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS];
 
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 3d7fc06..01b8a13 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -19,4 +19,17 @@
 
 #endif
 
+/* Idle state entry routines */
+#ifdef	CONFIG_PPC_P7_NAP
+#define	IDLE_STATE_ENTER_SEQ(IDLE_INST)				\
+	/* Magic NAP/SLEEP/WINKLE mode enter sequence */	\
+	std	r0,0(r1);					\
+	ptesync;						\
+	ld	r0,0(r1);					\
+1:	cmp	cr0,r0,r0;					\
+	bne	1b;						\
+	IDLE_INST;						\
+	b	.
+#endif /* CONFIG_PPC_P7_NAP */
+
 #endif
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 666bef4e..9377bdf 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -3,6 +3,7 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
+#include <asm/cpu_has_feature.h>
 
 /*
  * Mapping of threads to cores
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 57fec8a..ddf54f5 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -186,6 +186,7 @@
 
 #ifndef __ASSEMBLY__
 void apply_feature_fixups(void);
+void setup_feature_keys(void);
 #endif
 
 #endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
index 88b4901..85b7a1a 100644
--- a/arch/powerpc/include/asm/hmi.h
+++ b/arch/powerpc/include/asm/hmi.h
@@ -21,7 +21,7 @@
 #ifndef __ASM_PPC64_HMI_H__
 #define __ASM_PPC64_HMI_H__
 
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 
 #define	CORE_TB_RESYNC_REQ_BIT		63
 #define MAX_SUBCORE_PER_CORE		4
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 148303e7..6a6792b 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -183,11 +183,6 @@
 	 */
 	u16 in_mce;
 	u8 hmi_event_available;		 /* HMI event is available */
-	/*
-	 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
-	 * more details
-	 */
-	struct sibling_subcore_state *sibling_subcore_state;
 #endif
 
 	/* Stuff for accurate time accounting */
@@ -202,6 +197,13 @@
 	struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
 #endif
 	struct kvmppc_host_state kvm_hstate;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/*
+	 * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
+	 * more details
+	 */
+	struct sibling_subcore_state *sibling_subcore_state;
+#endif
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index b5e88e4..c0309c5 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -301,6 +301,7 @@
 /* Allocate & free a PCI host bridge structure */
 extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev);
 extern void pcibios_free_controller(struct pci_controller *phb);
+extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge);
 
 #ifdef CONFIG_PCI
 extern int pcibios_vaddr_is_ioport(void __iomem *address);
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 0a74ebe..17c8380 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -75,14 +75,6 @@
 static inline void __giveup_spe(struct task_struct *t) { }
 #endif
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void flush_tmregs_to_thread(struct task_struct *);
-#else
-static inline void flush_tmregs_to_thread(struct task_struct *t)
-{
-}
-#endif
-
 static inline void clear_task_ebb(struct task_struct *t)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index c1dc6c1..c266227 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -308,40 +308,21 @@
 static inline unsigned long copy_from_user(void *to,
 		const void __user *from, unsigned long n)
 {
-	unsigned long over;
-
-	if (access_ok(VERIFY_READ, from, n)) {
-		if (!__builtin_constant_p(n))
-			check_object_size(to, n, false);
+	if (likely(access_ok(VERIFY_READ, from, n))) {
+		check_object_size(to, n, false);
 		return __copy_tofrom_user((__force void __user *)to, from, n);
 	}
-	if ((unsigned long)from < TASK_SIZE) {
-		over = (unsigned long)from + n - TASK_SIZE;
-		if (!__builtin_constant_p(n - over))
-			check_object_size(to, n - over, false);
-		return __copy_tofrom_user((__force void __user *)to, from,
-				n - over) + over;
-	}
+	memset(to, 0, n);
 	return n;
 }
 
 static inline unsigned long copy_to_user(void __user *to,
 		const void *from, unsigned long n)
 {
-	unsigned long over;
-
 	if (access_ok(VERIFY_WRITE, to, n)) {
-		if (!__builtin_constant_p(n))
-			check_object_size(from, n, true);
+		check_object_size(from, n, true);
 		return __copy_tofrom_user(to, (__force void __user *)from, n);
 	}
-	if ((unsigned long)to < TASK_SIZE) {
-		over = (unsigned long)to + n - TASK_SIZE;
-		if (!__builtin_constant_p(n))
-			check_object_size(from, n - over, true);
-		return __copy_tofrom_user(to, (__force void __user *)from,
-				n - over) + over;
-	}
 	return n;
 }
 
@@ -383,8 +364,7 @@
 			return 0;
 	}
 
-	if (!__builtin_constant_p(n))
-		check_object_size(to, n, false);
+	check_object_size(to, n, false);
 
 	return __copy_tofrom_user((__force void __user *)to, from, n);
 }
@@ -412,8 +392,8 @@
 		if (ret == 0)
 			return 0;
 	}
-	if (!__builtin_constant_p(n))
-		check_object_size(from, n, true);
+
+	check_object_size(from, n, true);
 
 	return __copy_tofrom_user(to, (__force const void __user *)from, n);
 }
@@ -439,10 +419,6 @@
 	might_fault();
 	if (likely(access_ok(VERIFY_WRITE, addr, size)))
 		return __clear_user(addr, size);
-	if ((unsigned long)addr < TASK_SIZE) {
-		unsigned long over = (unsigned long)addr + size - TASK_SIZE;
-		return __clear_user(addr, size - over) + over;
-	}
 	return size;
 }
 
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index f5f729c..f0b2385 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -159,6 +159,8 @@
 extern void xics_kexec_teardown_cpu(int secondary);
 extern void xics_migrate_irqs_away(void);
 extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
 #ifdef CONFIG_SMP
 extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
 			       unsigned int strict_check);
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index b2027a5..fe4c075 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -41,7 +41,7 @@
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_power.o
-obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o hmi.o
+obj-$(CONFIG_PPC_BOOK3S_64)	+= mce.o mce_power.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o idle_book3e.o
 obj-$(CONFIG_PPC64)		+= vdso64/
 obj-$(CONFIG_ALTIVEC)		+= vecemu.o
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c9bc78e..7429556 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -168,10 +168,10 @@
 	int n = 0, l = 0;
 	char buffer[128];
 
-	n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n",
+	n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
 		       edev->phb->global_number, pdn->busno,
 		       PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
-	pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n",
+	pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
 		edev->phb->global_number, pdn->busno,
 		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
 
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6b8bc0d..5afd03e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -368,13 +368,13 @@
 tabort_syscall:
 	/* Firstly we need to enable TM in the kernel */
 	mfmsr	r10
-	li	r13, 1
-	rldimi	r10, r13, MSR_TM_LG, 63-MSR_TM_LG
+	li	r9, 1
+	rldimi	r10, r9, MSR_TM_LG, 63-MSR_TM_LG
 	mtmsrd	r10, 0
 
 	/* tabort, this dooms the transaction, nothing else */
-	li	r13, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
-	TABORT(R13)
+	li	r9, (TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT)
+	TABORT(R9)
 
 	/*
 	 * Return directly to userspace. We have corrupted user register state,
@@ -382,8 +382,8 @@
 	 * resume after the tbegin of the aborted transaction with the
 	 * checkpointed register state.
 	 */
-	li	r13, MSR_RI
-	andc	r10, r10, r13
+	li	r9, MSR_RI
+	andc	r10, r10, r9
 	mtmsrd	r10, 1
 	mtspr	SPRN_SRR0, r11
 	mtspr	SPRN_SRR1, r12
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 41091fd..bffec73 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -144,29 +144,14 @@
 	 * vector
 	 */
 	SET_SCRATCH0(r13)		/* save r13 */
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
-	/* Running native on arch 2.06 or later, check if we are
-	 * waking up from nap. We only handle no state loss and
-	 * supervisor state loss. We do -not- handle hypervisor
-	 * state loss at this time.
+	/*
+	 * Running native on arch 2.06 or later, we may wakeup from winkle
+	 * inside machine check. If yes, then last bit of HSPGR0 would be set
+	 * to 1. Hence clear it unconditionally.
 	 */
-	mfspr	r13,SPRN_SRR1
-	rlwinm.	r13,r13,47-31,30,31
-	OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-	beq	9f
-
-	mfspr	r13,SPRN_SRR1
-	rlwinm.	r13,r13,47-31,30,31
-	/* waking up from powersave (nap) state */
-	cmpwi	cr1,r13,2
-	/* Total loss of HV state is fatal. let's just stay stuck here */
-	OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-	bgt	cr1,.
-9:
-	OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
+	GET_PACA(r13)
+	clrrdi	r13,r13,1
+	SET_PACA(r13)
 	EXCEPTION_PROLOG_0(PACA_EXMC)
 BEGIN_FTR_SECTION
 	b	machine_check_powernv_early
@@ -500,7 +485,23 @@
 	EXCEPTION_PROLOG_0(PACA_EXMC)
 machine_check_pSeries_0:
 	EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200)
-	EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD)
+	/*
+	 * The following is essentially EXCEPTION_PROLOG_PSERIES_1 with the
+	 * difference that MSR_RI is not enabled, because PACA_EXMC is being
+	 * used, so nested machine check corrupts it. machine_check_common
+	 * enables MSR_RI.
+	 */
+	ld	r12,PACAKBASE(r13)
+	ld	r10,PACAKMSR(r13)
+	xori	r10,r10,MSR_RI
+	mfspr	r11,SPRN_SRR0
+	LOAD_HANDLER(r12, machine_check_common)
+	mtspr	SPRN_SRR0,r12
+	mfspr	r12,SPRN_SRR1
+	mtspr	SPRN_SRR1,r10
+	rfid
+	b	.	/* prevent speculative execution */
+
 	KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
 	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
 	KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
@@ -984,14 +985,17 @@
 machine_check_common:
 
 	mfspr	r10,SPRN_DAR
-	std	r10,PACA_EXGEN+EX_DAR(r13)
+	std	r10,PACA_EXMC+EX_DAR(r13)
 	mfspr	r10,SPRN_DSISR
-	stw	r10,PACA_EXGEN+EX_DSISR(r13)
+	stw	r10,PACA_EXMC+EX_DSISR(r13)
 	EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
 	FINISH_NAP
 	RECONCILE_IRQ_STATE(r10, r11)
-	ld	r3,PACA_EXGEN+EX_DAR(r13)
-	lwz	r4,PACA_EXGEN+EX_DSISR(r13)
+	ld	r3,PACA_EXMC+EX_DAR(r13)
+	lwz	r4,PACA_EXMC+EX_DSISR(r13)
+	/* Enable MSR_RI when finished with PACA_EXMC */
+	li	r10,MSR_RI
+	mtmsrd 	r10,1
 	std	r3,_DAR(r1)
 	std	r4,_DSISR(r1)
 	bl	save_nvgprs
@@ -1273,25 +1277,51 @@
 	 * Check if thread was in power saving mode. We come here when any
 	 * of the following is true:
 	 * a. thread wasn't in power saving mode
-	 * b. thread was in power saving mode with no state loss or
-	 *    supervisor state loss
+	 * b. thread was in power saving mode with no state loss,
+	 *    supervisor state loss or hypervisor state loss.
 	 *
-	 * Go back to nap again if (b) is true.
+	 * Go back to nap/sleep/winkle mode again if (b) is true.
 	 */
 	rlwinm.	r11,r12,47-31,30,31	/* Was it in power saving mode? */
 	beq	4f			/* No, it wasn;t */
 	/* Thread was in power saving mode. Go back to nap again. */
 	cmpwi	r11,2
-	bne	3f
-	/* Supervisor state loss */
+	blt	3f
+	/* Supervisor/Hypervisor state loss */
 	li	r0,1
 	stb	r0,PACA_NAPSTATELOST(r13)
 3:	bl	machine_check_queue_event
 	MACHINE_CHECK_HANDLER_WINDUP
 	GET_PACA(r13)
 	ld	r1,PACAR1(r13)
-	li	r3,PNV_THREAD_NAP
-	b	pnv_enter_arch207_idle_mode
+	/*
+	 * Check what idle state this CPU was in and go back to same mode
+	 * again.
+	 */
+	lbz	r3,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi	r3,PNV_THREAD_NAP
+	bgt	10f
+	IDLE_STATE_ENTER_SEQ(PPC_NAP)
+	/* No return */
+10:
+	cmpwi	r3,PNV_THREAD_SLEEP
+	bgt	2f
+	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+	/* No return */
+
+2:
+	/*
+	 * Go back to winkle. Please note that this thread was woken up in
+	 * machine check from winkle and have not restored the per-subcore
+	 * state. Hence before going back to winkle, set last bit of HSPGR0
+	 * to 1. This will make sure that if this thread gets woken up
+	 * again at reset vector 0x100 then it will get chance to restore
+	 * the subcore state.
+	 */
+	ori	r13,r13,1
+	SET_PACA(r13)
+	IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
+	/* No return */
 4:
 #endif
 	/*
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index cc52d97..a95639b 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -593,7 +593,8 @@
 	if (!ftrace_graph_entry(&trace))
 		goto out;
 
-	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
+				     NULL) == -EBUSY)
 		goto out;
 
 	parent = return_hooker;
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index ba79d15..bd739fe 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -44,18 +44,6 @@
 				PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
 				PSSCR_MTL_MASK
 
-/* Idle state entry routines */
-
-#define	IDLE_STATE_ENTER_SEQ(IDLE_INST)				\
-	/* Magic NAP/SLEEP/WINKLE mode enter sequence */	\
-	std	r0,0(r1);					\
-	ptesync;						\
-	ld	r0,0(r1);					\
-1:	cmp	cr0,r0,r0;					\
-	bne	1b;						\
-	IDLE_INST;						\
-	b	.
-
 	.text
 
 /*
@@ -363,8 +351,8 @@
  * cr3 - set to gt if waking up with partial/complete hypervisor state loss
  */
 _GLOBAL(pnv_restore_hyp_resource)
-	ld	r2,PACATOC(r13);
 BEGIN_FTR_SECTION
+	ld	r2,PACATOC(r13);
 	/*
 	 * POWER ISA 3. Use PSSCR to determine if we
 	 * are waking up from deep idle state
@@ -395,6 +383,9 @@
 	 */
 	clrldi	r5,r13,63
 	clrrdi	r13,r13,1
+
+	/* Now that we are sure r13 is corrected, load TOC */
+	ld	r2,PACATOC(r13);
 	cmpwi	cr4,r5,1
 	mtspr	SPRN_HSPRG0,r13
 
@@ -420,7 +411,7 @@
  *
  * r13 - PACA
  * cr3 - gt if waking up with partial/complete hypervisor state loss
- * cr4 - eq if waking up from complete hypervisor state loss.
+ * cr4 - gt or eq if waking up from complete hypervisor state loss.
  */
 _GLOBAL(pnv_wakeup_tb_loss)
 	ld	r1,PACAR1(r13)
@@ -462,7 +453,7 @@
 	 * At this stage
 	 * cr2 - eq if first thread to wakeup in core
 	 * cr3-  gt if waking up with partial/complete hypervisor state loss
-	 * cr4 - eq if waking up from complete hypervisor state loss.
+	 * cr4 - gt or eq if waking up from complete hypervisor state loss.
 	 */
 
 	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
@@ -490,7 +481,7 @@
 	 * If waking up from sleep, subcore state is not lost. Hence
 	 * skip subcore state restore
 	 */
-	bne	cr4,subcore_state_restored
+	blt	cr4,subcore_state_restored
 
 	/* Restore per-subcore state */
 	ld      r4,_SDR1(r1)
@@ -535,7 +526,7 @@
 	 * If waking up from sleep, per core state is not lost, skip to
 	 * clear_lock.
 	 */
-	bne	cr4,clear_lock
+	blt	cr4,clear_lock
 
 	/*
 	 * First thread in the core to wake up and its waking up with
@@ -566,7 +557,7 @@
 	 * If waking up from sleep, hypervisor state is not lost. Hence
 	 * skip hypervisor state restore.
 	 */
-	bne	cr4,hypervisor_state_restored
+	blt	cr4,hypervisor_state_restored
 
 	/* Waking up from winkle */
 
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 3ed8ec09..e785cc9 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -29,7 +29,7 @@
 #include <linux/kprobes.h>
 #include <linux/ptrace.h>
 #include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/slab.h>
 #include <asm/code-patching.h>
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ef267fd..5e7ece0 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -92,7 +92,8 @@
 	mce->in_use = 1;
 
 	mce->initiator = MCE_INITIATOR_CPU;
-	if (handled)
+	/* Mark it recovered if we have handled it and MSR(RI=1). */
+	if (handled && (regs->msr & MSR_RI))
 		mce->disposition = MCE_DISPOSITION_RECOVERED;
 	else
 		mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index a5c0153..e589080 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -78,6 +78,7 @@
 static int get_phb_number(struct device_node *dn)
 {
 	int ret, phb_id = -1;
+	u32 prop_32;
 	u64 prop;
 
 	/*
@@ -86,8 +87,10 @@
 	 * reading "ibm,opal-phbid", only present in OPAL environment.
 	 */
 	ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
-	if (ret)
-		ret = of_property_read_u32_index(dn, "reg", 1, (u32 *)&prop);
+	if (ret) {
+		ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+		prop = prop_32;
+	}
 
 	if (!ret)
 		phb_id = (int)(prop & (MAX_PHBS - 1));
@@ -151,6 +154,42 @@
 EXPORT_SYMBOL_GPL(pcibios_free_controller);
 
 /*
+ * This function is used to call pcibios_free_controller()
+ * in a deferred manner: a callback from the PCI subsystem.
+ *
+ * _*DO NOT*_ call pcibios_free_controller() explicitly if
+ * this is used (or it may access an invalid *phb pointer).
+ *
+ * The callback occurs when all references to the root bus
+ * are dropped (e.g., child buses/devices and their users).
+ *
+ * It's called as .release_fn() of 'struct pci_host_bridge'
+ * which is associated with the 'struct pci_controller.bus'
+ * (root bus) - it expects .release_data to hold a pointer
+ * to 'struct pci_controller'.
+ *
+ * In order to use it, register .release_fn()/release_data
+ * like this:
+ *
+ * pci_set_host_bridge_release(bridge,
+ *                             pcibios_free_controller_deferred
+ *                             (void *) phb);
+ *
+ * e.g. in the pcibios_root_bridge_prepare() callback from
+ * pci_create_root_bus().
+ */
+void pcibios_free_controller_deferred(struct pci_host_bridge *bridge)
+{
+	struct pci_controller *phb = (struct pci_controller *)
+					 bridge->release_data;
+
+	pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic);
+
+	pcibios_free_controller(phb);
+}
+EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred);
+
+/*
  * The function is used to return the minimal alignment
  * for memory or I/O windows of the associated P2P bridge.
  * By default, 4KiB alignment for I/O windows and 1MiB for
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 58ccf86..9ee2623 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1074,26 +1074,6 @@
 #endif
 }
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void flush_tmregs_to_thread(struct task_struct *tsk)
-{
-	/*
-	 * Process self tracing is not yet supported through
-	 * ptrace interface. Ptrace generic code should have
-	 * prevented this from happening in the first place.
-	 * Warn once here with the message, if some how it
-	 * is attempted.
-	 */
-	WARN_ONCE(tsk == current,
-		"Not expecting ptrace on self: TM regs may be incorrect\n");
-
-	/*
-	 * If task is not current, it should have been flushed
-	 * already to it's thread_struct during __switch_to().
-	 */
-}
-#endif
-
 struct task_struct *__switch_to(struct task_struct *prev,
 	struct task_struct *new)
 {
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 6ee4b72..d3eff99 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -695,7 +695,7 @@
 	OV4_MIN_ENT_CAP,		/* minimum VP entitled capacity */
 
 	/* option vector 5: PAPR/OF options */
-	VECTOR_LENGTH(18),		/* length */
+	VECTOR_LENGTH(21),		/* length */
 	0,				/* don't ignore, don't halt */
 	OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
 	OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
@@ -726,8 +726,11 @@
 	0,
 	0,
 	OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) |
-	OV5_FEAT(OV5_PFO_HW_842),
-	OV5_FEAT(OV5_SUB_PROCESSORS),
+	OV5_FEAT(OV5_PFO_HW_842),				/* Byte 17 */
+	0,							/* Byte 18 */
+	0,							/* Byte 19 */
+	0,							/* Byte 20 */
+	OV5_FEAT(OV5_SUB_PROCESSORS),				/* Byte 21 */
 
 	/* option vector 6: IBM PAPR hints */
 	VECTOR_LENGTH(3),		/* length */
@@ -2940,7 +2943,7 @@
 
 	/* Don't print anything after quiesce under OPAL, it crashes OFW */
 	if (of_platform != PLATFORM_OPAL) {
-		prom_printf("Booting Linux via __start() ...\n");
+		prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
 		prom_debug("->dt_header_start=0x%x\n", hdr);
 	}
 
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 4f3c575..bf91658 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -38,6 +38,7 @@
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/switch_to.h>
+#include <asm/tm.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -118,6 +119,24 @@
 	REG_OFFSET_END,
 };
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+	/*
+	 * If task is not current, it will have been flushed already to
+	 * it's thread_struct during __switch_to().
+	 *
+	 * A reclaim flushes ALL the state.
+	 */
+
+	if (tsk == current && MSR_TM_SUSPENDED(mfmsr()))
+		tm_reclaim_current(TM_CAUSE_SIGNAL);
+
+}
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
 /**
  * regs_query_register_offset() - query register offset from its name
  * @name:	the name of a register
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index c3e861d..24ec3ea 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -93,15 +93,16 @@
  * and we are running with enough of the MMU enabled to have our
  * proper kernel virtual addresses
  *
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
  */
 extern unsigned int memset_nocache_branch; /* Insn to be replaced by NOP */
 
 notrace void __init machine_init(u64 dt_ptr)
 {
+	/* Configure static keys first, now that we're relocated. */
+	setup_feature_keys();
+
 	/* Enable early debugging if any specified (see udbg.h) */
 	udbg_early_init();
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eafb9a7..7ac8e6e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -300,6 +300,7 @@
 
 	/* Apply all the dynamic patching */
 	apply_feature_fixups();
+	setup_feature_keys();
 
 	/* Initialize the hash table or TLB handling */
 	early_init_mmu();
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index b6aa378..a7daf74 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1226,7 +1226,21 @@
 		(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
 	if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
 		goto bad;
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * If there is a transactional state then throw it away.
+	 * The purpose of a sigreturn is to destroy all traces of the
+	 * signal frame, this includes any transactional state created
+	 * within in. We only check for suspended as we can never be
+	 * active in the kernel, we are active, there is nothing better to
+	 * do than go ahead and Bad Thing later.
+	 * The cause is not important as there will never be a
+	 * recheckpoint so it's not user visible.
+	 */
+	if (MSR_TM_SUSPENDED(mfmsr()))
+		tm_reclaim_current(0);
+
 	if (__get_user(tmp, &rt_sf->uc.uc_link))
 		goto bad;
 	uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 7e49984..70409bb 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -676,7 +676,21 @@
 	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
 		goto badframe;
 	set_current_blocked(&set);
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+	/*
+	 * If there is a transactional state then throw it away.
+	 * The purpose of a sigreturn is to destroy all traces of the
+	 * signal frame, this includes any transactional state created
+	 * within in. We only check for suspended as we can never be
+	 * active in the kernel, we are active, there is nothing better to
+	 * do than go ahead and Bad Thing later.
+	 * The cause is not important as there will never be a
+	 * recheckpoint so it's not user visible.
+	 */
+	if (MSR_TM_SUSPENDED(mfmsr()))
+		tm_reclaim_current(0);
+
 	if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
 		goto badframe;
 	if (MSR_TM_ACTIVE(msr)) {
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 25a3905..9c6f3fd 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -830,7 +830,7 @@
 
 	/* Update sibling maps */
 	base = cpu_first_thread_sibling(cpu);
-	for (i = 0; i < threads_per_core; i++) {
+	for (i = 0; i < threads_per_core && base + i < nr_cpu_ids; i++) {
 		cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
 		cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
 		cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 2cb5892..62859eb 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -25,7 +25,8 @@
 #include <linux/user.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
+#include <linux/module.h>	/* print_modules */
 #include <linux/prctl.h>
 #include <linux/delay.h>
 #include <linux/kprobes.h>
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 6767605..4111d30 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -22,6 +22,7 @@
 #include <linux/security.h>
 #include <linux/memblock.h>
 
+#include <asm/cpu_has_feature.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index cbabd14..78a7449 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -30,7 +30,7 @@
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
+$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
 	$(call if_changed,vdso32ld)
 
 # strip rule for the .so file
@@ -39,12 +39,12 @@
 	$(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
+$(obj-vdso32): %.o: %.S FORCE
 	$(call if_changed_dep,vdso32as)
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
 
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index c710802..366ae09 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -23,7 +23,7 @@
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
 
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
+$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE
 	$(call if_changed,vdso64ld)
 
 # strip rule for the .so file
@@ -32,12 +32,12 @@
 	$(call if_changed,objcopy)
 
 # assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
+$(obj-vdso64): %.o: %.S FORCE
 	$(call if_changed_dep,vdso64as)
 
 # actual build commands
 quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
 
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 1f9e552..855d4b9 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -78,6 +78,7 @@
 
 ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+	book3s_hv_hmi.o \
 	book3s_hv_rmhandlers.o \
 	book3s_hv_rm_mmu.o \
 	book3s_hv_ras.o \
diff --git a/arch/powerpc/kernel/hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
similarity index 100%
rename from arch/powerpc/kernel/hmi.c
rename to arch/powerpc/kvm/book3s_hv_hmi.c
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index a75ba38..05aa113 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1329,20 +1329,16 @@
 	xics->kvm = kvm;
 
 	/* Already there ? */
-	mutex_lock(&kvm->lock);
 	if (kvm->arch.xics)
 		ret = -EEXIST;
 	else
 		kvm->arch.xics = xics;
-	mutex_unlock(&kvm->lock);
 
 	if (ret) {
 		kfree(xics);
 		return ret;
 	}
 
-	xics_debugfs_init(xics);
-
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
 		/* Enable real mode support */
@@ -1354,9 +1350,17 @@
 	return 0;
 }
 
+static void kvmppc_xics_init(struct kvm_device *dev)
+{
+	struct kvmppc_xics *xics = (struct kvmppc_xics *)dev->private;
+
+	xics_debugfs_init(xics);
+}
+
 struct kvm_device_ops kvm_xics_ops = {
 	.name = "kvm-xics",
 	.create = kvmppc_xics_create,
+	.init = kvmppc_xics_init,
 	.destroy = kvmppc_xics_free,
 	.set_attr = xics_set_attr,
 	.get_attr = xics_get_attr,
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index d90870a..aa8214f 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -127,17 +127,19 @@
 	stw	r7,12(r1)
 	stw	r8,8(r1)
 
-	andi.	r0,r4,1			/* is destination address even ? */
-	cmplwi	cr7,r0,0
 	addic	r12,r6,0
 	addi	r6,r4,-4
 	neg	r0,r4
 	addi	r4,r3,-4
 	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
+	crset	4*cr7+eq
 	beq	58f
 
 	cmplw	0,r5,r0			/* is this more than total to do? */
 	blt	63f			/* if not much to do */
+	rlwinm	r7,r6,3,0x8
+	rlwnm	r12,r12,r7,0,31	/* odd destination address: rotate one byte */
+	cmplwi	cr7,r7,0	/* is destination address even ? */
 	andi.	r8,r0,3			/* get it word-aligned first */
 	mtctr	r8
 	beq+	61f
@@ -237,7 +239,7 @@
 66:	addze	r3,r12
 	addi	r1,r1,16
 	beqlr+	cr7
-	rlwinm	r3,r3,8,0,31	/* swap bytes for odd destination */
+	rlwinm	r3,r3,8,0,31	/* odd destination address: rotate one byte */
 	blr
 
 /* read fault */
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 74145f0..043415f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -188,7 +188,10 @@
 			  &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
 #endif
 	do_final_fixups();
+}
 
+void __init setup_feature_keys(void)
+{
 	/*
 	 * Initialise jump label. This causes all the cpu/mmu_has_feature()
 	 * checks to take on their correct polarity based on the current set of
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a4db22f..bb1ffc5 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -26,7 +26,7 @@
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 7d95bc4..c491f2c 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -369,44 +369,34 @@
 }
 
 #ifdef CONFIG_SMP
-
-static int mmu_context_cpu_notify(struct notifier_block *self,
-				  unsigned long action, void *hcpu)
+static int mmu_ctx_cpu_prepare(unsigned int cpu)
 {
-	unsigned int cpu = (unsigned int)(long)hcpu;
-
 	/* We don't touch CPU 0 map, it's allocated at aboot and kept
 	 * around forever
 	 */
 	if (cpu == boot_cpuid)
-		return NOTIFY_OK;
+		return 0;
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
-		stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
-		break;
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
-		kfree(stale_map[cpu]);
-		stale_map[cpu] = NULL;
-
-		/* We also clear the cpu_vm_mask bits of CPUs going away */
-		clear_tasks_mm_cpumask(cpu);
-	break;
-#endif /* CONFIG_HOTPLUG_CPU */
-	}
-	return NOTIFY_OK;
+	pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
+	stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+	return 0;
 }
 
-static struct notifier_block mmu_context_cpu_nb = {
-	.notifier_call	= mmu_context_cpu_notify,
-};
+static int mmu_ctx_cpu_dead(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	if (cpu == boot_cpuid)
+		return 0;
+
+	pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
+	kfree(stale_map[cpu]);
+	stale_map[cpu] = NULL;
+
+	/* We also clear the cpu_vm_mask bits of CPUs going away */
+	clear_tasks_mm_cpumask(cpu);
+#endif
+	return 0;
+}
 
 #endif /* CONFIG_SMP */
 
@@ -469,7 +459,9 @@
 #else
 	stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
 
-	register_cpu_notifier(&mmu_context_cpu_nb);
+	cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+				  "powerpc/mmu/ctx:prepare",
+				  mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
 #endif
 
 	printk(KERN_INFO
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index dfdb90c..9f19834 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -113,7 +113,12 @@
 END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
 	b	slb_finish_load_1T
 
-0:
+0:	/*
+	 * For userspace addresses, make sure this is region 0.
+	 */
+	cmpdi	r9, 0
+	bne	8f
+
 	/* when using slices, we extract the psize off the slice bitmaps
 	 * and then we need to get the sllp encoding off the mmu_psize_defs
 	 * array.
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
index 8eb82b0..d93dd4a 100644
--- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -528,7 +528,6 @@
 	.remove = mpc512x_lpbfifo_remove,
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 		.of_match_table = mpc512x_lpbfifo_match,
 	},
 };
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index dbcd030..63c5ab64 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -222,7 +222,6 @@
 static struct i2c_driver mcu_driver = {
 	.driver = {
 		.name = "mcu-mpc8349emitx",
-		.owner = THIS_MODULE,
 		.of_match_table = mcu_of_match_table,
 	},
 	.probe = mcu_probe,
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 5be15cf..2975754 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -496,8 +496,10 @@
 	gang = alloc_spu_gang();
 	SPUFS_I(inode)->i_ctx = NULL;
 	SPUFS_I(inode)->i_gang = gang;
-	if (!gang)
+	if (!gang) {
+		ret = -ENOMEM;
 		goto out_iput;
+	}
 
 	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index dafba10..dfd3100 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -26,7 +26,7 @@
 #include <linux/tty.h>
 #include <linux/serial_core.h>
 #include <linux/of_platform.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 
 #include <asm/time.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
index 80804f9..f97bab8 100644
--- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
+++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c
@@ -23,7 +23,7 @@
 #include <linux/pci.h>
 #include <linux/kdev_t.h>
 #include <linux/console.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/seq_file.h>
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index 309d9cc..c61667e 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -187,6 +187,11 @@
 	if (dev->vendor == 0x1959 && dev->device == 0xa007 &&
 	    !firmware_has_feature(FW_FEATURE_LPAR)) {
 		dev->dev.archdata.dma_ops = &dma_direct_ops;
+		/*
+		 * Set the coherent DMA mask to prevent the iommu
+		 * being used unnecessarily
+		 */
+		dev->dev.coherent_dma_mask = DMA_BIT_MASK(44);
 		return;
 	}
 #endif
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 834868b..366e4f5 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -852,37 +852,33 @@
 
 #ifdef CONFIG_PPC64
 #ifdef CONFIG_HOTPLUG_CPU
-static int smp_core99_cpu_notify(struct notifier_block *self,
-				 unsigned long action, void *hcpu)
+static unsigned int smp_core99_host_open;
+
+static int smp_core99_cpu_prepare(unsigned int cpu)
 {
 	int rc;
 
-	switch(action & ~CPU_TASKS_FROZEN) {
-	case CPU_UP_PREPARE:
-		/* Open i2c bus if it was used for tb sync */
-		if (pmac_tb_clock_chip_host) {
-			rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
-			if (rc) {
-				pr_err("Failed to open i2c bus for time sync\n");
-				return notifier_from_errno(rc);
-			}
+	/* Open i2c bus if it was used for tb sync */
+	if (pmac_tb_clock_chip_host && !smp_core99_host_open) {
+		rc = pmac_i2c_open(pmac_tb_clock_chip_host, 1);
+		if (rc) {
+			pr_err("Failed to open i2c bus for time sync\n");
+			return notifier_from_errno(rc);
 		}
-		break;
-	case CPU_ONLINE:
-	case CPU_UP_CANCELED:
-		/* Close i2c bus if it was used for tb sync */
-		if (pmac_tb_clock_chip_host)
-			pmac_i2c_close(pmac_tb_clock_chip_host);
-		break;
-	default:
-		break;
+		smp_core99_host_open = 1;
 	}
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block smp_core99_cpu_nb = {
-	.notifier_call	= smp_core99_cpu_notify,
-};
+static int smp_core99_cpu_online(unsigned int cpu)
+{
+	/* Close i2c bus if it was used for tb sync */
+	if (pmac_tb_clock_chip_host && smp_core99_host_open) {
+		pmac_i2c_close(pmac_tb_clock_chip_host);
+		smp_core99_host_open = 0;
+	}
+	return 0;
+}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 static void __init smp_core99_bringup_done(void)
@@ -902,7 +898,11 @@
 		g5_phy_disable_cpu1();
 	}
 #ifdef CONFIG_HOTPLUG_CPU
-	register_cpu_notifier(&smp_core99_cpu_nb);
+	cpuhp_setup_state_nocalls(CPUHP_POWERPC_PMAC_PREPARE,
+				  "powerpc/pmac:prepare", smp_core99_cpu_prepare,
+				  NULL);
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/pmac:online",
+				  smp_core99_cpu_online, NULL);
 #endif
 
 	if (ppc_md.progress)
diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c
index 2ee9643..4c82782 100644
--- a/arch/powerpc/platforms/powernv/opal-dump.c
+++ b/arch/powerpc/platforms/powernv/opal-dump.c
@@ -370,6 +370,7 @@
 	uint32_t dump_id, dump_size, dump_type;
 	struct dump_obj *dump;
 	char name[22];
+	struct kobject *kobj;
 
 	rc = dump_read_info(&dump_id, &dump_size, &dump_type);
 	if (rc != OPAL_SUCCESS)
@@ -381,8 +382,12 @@
 	 * that gracefully and not create two conflicting
 	 * entries.
 	 */
-	if (kset_find_obj(dump_kset, name))
+	kobj = kset_find_obj(dump_kset, name);
+	if (kobj) {
+		/* Drop reference added by kset_find_obj() */
+		kobject_put(kobj);
 		return 0;
+	}
 
 	dump = create_dump_obj(dump_id, dump_size, dump_type);
 	if (!dump)
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 37f959b..f2344cb 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -247,6 +247,7 @@
 	uint64_t elog_type;
 	int rc;
 	char name[2+16+1];
+	struct kobject *kobj;
 
 	rc = opal_get_elog_size(&id, &size, &type);
 	if (rc != OPAL_SUCCESS) {
@@ -269,8 +270,12 @@
 	 * that gracefully and not create two conflicting
 	 * entries.
 	 */
-	if (kset_find_obj(elog_kset, name))
+	kobj = kset_find_obj(elog_kset, name);
+	if (kobj) {
+		/* Drop reference added by kset_find_obj() */
+		kobject_put(kobj);
 		return IRQ_HANDLED;
+	}
 
 	create_elog_obj(log_id, elog_size, elog_type);
 
diff --git a/arch/powerpc/platforms/powernv/opal-irqchip.c b/arch/powerpc/platforms/powernv/opal-irqchip.c
index e505223b..ed8bba6 100644
--- a/arch/powerpc/platforms/powernv/opal-irqchip.c
+++ b/arch/powerpc/platforms/powernv/opal-irqchip.c
@@ -228,7 +228,8 @@
 		}
 
 		/* Install interrupt handler */
-		rc = request_irq(virq, opal_interrupt, 0, "opal", NULL);
+		rc = request_irq(virq, opal_interrupt, IRQF_TRIGGER_LOW,
+				 "opal", NULL);
 		if (rc) {
 			irq_dispose_mapping(virq);
 			pr_warn("Error %d requesting irq %d (0x%x)\n",
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 8b4fc68..6c9a65b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -399,6 +399,7 @@
 
 	if (!(regs->msr & MSR_RI)) {
 		/* If MSR_RI isn't set, we cannot recover */
+		pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
 		recovered = 0;
 	} else if (evt->disposition == MCE_DISPOSITION_RECOVERED) {
 		/* Platform corrected itself */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6b95283..38a5c65 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -111,10 +111,24 @@
 }
 early_param("iommu", iommu_setup);
 
-static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
 {
-	return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
-		(IORESOURCE_MEM_64 | IORESOURCE_PREFETCH));
+	/*
+	 * WARNING: We cannot rely on the resource flags. The Linux PCI
+	 * allocation code sometimes decides to put a 64-bit prefetchable
+	 * BAR in the 32-bit window, so we have to compare the addresses.
+	 *
+	 * For simplicity we only test resource start.
+	 */
+	return (r->start >= phb->ioda.m64_base &&
+		r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
+}
+
+static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
+{
+	unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+
+	return (resource_flags & flags) == flags;
 }
 
 static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
@@ -142,7 +156,7 @@
 
 static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
 {
-	unsigned long pe = phb->ioda.total_pe_num - 1;
+	long pe;
 
 	for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
 		if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
@@ -155,11 +169,12 @@
 static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
 {
 	struct pnv_phb *phb = pe->phb;
+	unsigned int pe_num = pe->pe_number;
 
 	WARN_ON(pe->pdev);
 
 	memset(pe, 0, sizeof(struct pnv_ioda_pe));
-	clear_bit(pe->pe_number, phb->ioda.pe_alloc);
+	clear_bit(pe_num, phb->ioda.pe_alloc);
 }
 
 /* The default M64 BAR is shared by all PEs */
@@ -229,7 +244,7 @@
 	sgsz = phb->ioda.m64_segsize;
 	for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
 		r = &pdev->resource[i];
-		if (!r->parent || !pnv_pci_is_mem_pref_64(r->flags))
+		if (!r->parent || !pnv_pci_is_m64(phb, r))
 			continue;
 
 		start = _ALIGN_DOWN(r->start - base, sgsz);
@@ -1877,7 +1892,7 @@
 					unsigned shift, unsigned long index,
 					unsigned long npages)
 {
-	__be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, false);
+	__be64 __iomem *invalidate = pnv_ioda_get_inval_reg(pe->phb, rm);
 	unsigned long start, end, inc;
 
 	/* We'll invalidate DMA address in PE scope */
@@ -2209,7 +2224,7 @@
 
 	pnv_pci_link_table_and_group(phb->hose->node, num,
 			tbl, &pe->table_group);
-	pnv_pci_phb3_tce_invalidate_pe(pe);
+	pnv_pci_ioda2_tce_invalidate_pe(pe);
 
 	return 0;
 }
@@ -2347,7 +2362,7 @@
 	if (ret)
 		pe_warn(pe, "Unmapping failed, ret = %ld\n", ret);
 	else
-		pnv_pci_phb3_tce_invalidate_pe(pe);
+		pnv_pci_ioda2_tce_invalidate_pe(pe);
 
 	pnv_pci_unlink_table_and_group(table_group->tables[num], table_group);
 
@@ -2863,7 +2878,7 @@
 		res = &pdev->resource[i + PCI_IOV_RESOURCES];
 		if (!res->flags || res->parent)
 			continue;
-		if (!pnv_pci_is_mem_pref_64(res->flags)) {
+		if (!pnv_pci_is_m64_flags(res->flags)) {
 			dev_warn(&pdev->dev, "Don't support SR-IOV with"
 					" non M64 VF BAR%d: %pR. \n",
 				 i, res);
@@ -2958,7 +2973,7 @@
 			index++;
 		}
 	} else if ((res->flags & IORESOURCE_MEM) &&
-		   !pnv_pci_is_mem_pref_64(res->flags)) {
+		   !pnv_pci_is_m64(phb, res)) {
 		region.start = res->start -
 			       phb->hose->mem_offset[0] -
 			       phb->ioda.m32_pci_base;
@@ -3083,9 +3098,12 @@
 		bridge = bridge->bus->self;
 	}
 
-	/* We fail back to M32 if M64 isn't supported */
-	if (phb->ioda.m64_segsize &&
-	    pnv_pci_is_mem_pref_64(type))
+	/*
+	 * We fall back to M32 if M64 isn't supported. We enforce the M64
+	 * alignment for any 64-bit resource, PCIe doesn't care and
+	 * bridges only do 64-bit prefetchable anyway.
+	 */
+	if (phb->ioda.m64_segsize && pnv_pci_is_m64_flags(type))
 		return phb->ioda.m64_segsize;
 	if (type & IORESOURCE_MEM)
 		return phb->ioda.m32_segsize;
@@ -3125,7 +3143,7 @@
 		w = NULL;
 		if (r->flags & type & IORESOURCE_IO)
 			w = &hose->io_resource;
-		else if (pnv_pci_is_mem_pref_64(r->flags) &&
+		else if (pnv_pci_is_m64(phb, r) &&
 			 (type & IORESOURCE_PREFETCH) &&
 			 phb->ioda.m64_segsize)
 			w = &hose->mem_resources[1];
@@ -3392,12 +3410,6 @@
 	struct pnv_phb *phb = pe->phb;
 	struct pnv_ioda_pe *slave, *tmp;
 
-	/* Release slave PEs in compound PE */
-	if (pe->flags & PNV_IODA_PE_MASTER) {
-		list_for_each_entry_safe(slave, tmp, &pe->slaves, list)
-			pnv_ioda_release_pe(slave);
-	}
-
 	list_del(&pe->list);
 	switch (phb->type) {
 	case PNV_PHB_IODA1:
@@ -3412,7 +3424,26 @@
 
 	pnv_ioda_release_pe_seg(pe);
 	pnv_ioda_deconfigure_pe(pe->phb, pe);
-	pnv_ioda_free_pe(pe);
+
+	/* Release slave PEs in the compound PE */
+	if (pe->flags & PNV_IODA_PE_MASTER) {
+		list_for_each_entry_safe(slave, tmp, &pe->slaves, list) {
+			list_del(&slave->list);
+			pnv_ioda_free_pe(slave);
+		}
+	}
+
+	/*
+	 * The PE for root bus can be removed because of hotplug in EEH
+	 * recovery for fenced PHB error. We need to mark the PE dead so
+	 * that it can be populated again in PCI hot add path. The PE
+	 * shouldn't be destroyed as it's the global reserved resource.
+	 */
+	if (phb->ioda.root_pe_populated &&
+	    phb->ioda.root_pe_idx == pe->pe_number)
+		phb->ioda.root_pe_populated = false;
+	else
+		pnv_ioda_free_pe(pe);
 }
 
 static void pnv_pci_release_device(struct pci_dev *pdev)
@@ -3428,7 +3459,17 @@
 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
 		return;
 
+	/*
+	 * PCI hotplug can happen as part of EEH error recovery. The @pdn
+	 * isn't removed and added afterwards in this scenario. We should
+	 * set the PE number in @pdn to an invalid one. Otherwise, the PE's
+	 * device count is decreased on removing devices while failing to
+	 * be increased on adding devices. It leads to unbalanced PE's device
+	 * count and eventually make normal PCI hotplug path broken.
+	 */
 	pe = &phb->ioda.pe_array[pdn->pe_number];
+	pdn->pe_number = IODA_INVALID_PE;
+
 	WARN_ON(--pe->device_count < 0);
 	if (pe->device_count == 0)
 		pnv_ioda_release_pe(pe);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 43f7beb..76ec104 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -320,19 +320,6 @@
 	return dlpar_update_device_tree_lmb(lmb);
 }
 
-static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
-{
-	unsigned long section_nr;
-	struct mem_section *mem_sect;
-	struct memory_block *mem_block;
-
-	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
-	mem_sect = __nr_to_section(section_nr);
-
-	mem_block = find_memory_block(mem_sect);
-	return mem_block;
-}
-
 #ifdef CONFIG_MEMORY_HOTREMOVE
 static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size)
 {
@@ -420,6 +407,19 @@
 
 static int dlpar_add_lmb(struct of_drconf_cell *);
 
+static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb)
+{
+	unsigned long section_nr;
+	struct mem_section *mem_sect;
+	struct memory_block *mem_block;
+
+	section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
+	mem_sect = __nr_to_section(section_nr);
+
+	mem_block = find_memory_block(mem_sect);
+	return mem_block;
+}
+
 static int dlpar_remove_lmb(struct of_drconf_cell *lmb)
 {
 	struct memory_block *mem_block;
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index fe16a50..09eba5a 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -119,6 +119,10 @@
 
 	bus = bridge->bus;
 
+	/* Rely on the pcibios_free_controller_deferred() callback. */
+	pci_set_host_bridge_release(bridge, pcibios_free_controller_deferred,
+					(void *) pci_bus_to_host(bus));
+
 	dn = pcibios_get_phb_of_node(bus);
 	if (!dn)
 		return 0;
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index 906dbaa..547fd13 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -106,8 +106,11 @@
 		release_resource(res);
 	}
 
-	/* Free pci_controller data structure */
-	pcibios_free_controller(phb);
+	/*
+	 * The pci_controller data structure is freed by
+	 * the pcibios_free_controller_deferred() callback;
+	 * see pseries_root_bridge_prepare().
+	 */
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 4ffcaa6..a39d20e 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -41,7 +41,6 @@
 #include <linux/root_dev.h>
 #include <linux/of.h>
 #include <linux/of_pci.h>
-#include <linux/kexec.h>
 
 #include <asm/mmu.h>
 #include <asm/processor.h>
@@ -66,6 +65,7 @@
 #include <asm/eeh.h>
 #include <asm/reg.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/kexec.h>
 
 #include "pseries.h"
 
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/sysdev/cpm1.c
index 6c11099..81d4947 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/sysdev/cpm1.c
@@ -534,7 +534,8 @@
 
 static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-	struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm1_gpio16_chip *cpm1_gc =
+		container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
 	struct cpm_ioport16 __iomem *iop = mm_gc->regs;
 
 	cpm1_gc->cpdata = in_be16(&iop->dat);
@@ -649,7 +650,8 @@
 
 static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-	struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm1_gpio32_chip *cpm1_gc =
+		container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
 	struct cpm_ioport32b __iomem *iop = mm_gc->regs;
 
 	cpm1_gc->cpdata = in_be32(&iop->dat);
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 911456d..947f420 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -94,7 +94,8 @@
 
 static void cpm2_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
 {
-	struct cpm2_gpio32_chip *cpm2_gc = gpiochip_get_data(&mm_gc->gc);
+	struct cpm2_gpio32_chip *cpm2_gc =
+		container_of(mm_gc, struct cpm2_gpio32_chip, mm_gc);
 	struct cpm2_ioports __iomem *iop = mm_gc->regs;
 
 	cpm2_gc->cpdata = in_be32(&iop->dat);
diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 68e7c0d..3cc7cac 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -23,7 +23,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/types.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
diff --git a/arch/powerpc/sysdev/xics/Kconfig b/arch/powerpc/sysdev/xics/Kconfig
index 0031eda..385e7aa 100644
--- a/arch/powerpc/sysdev/xics/Kconfig
+++ b/arch/powerpc/sysdev/xics/Kconfig
@@ -1,6 +1,7 @@
 config PPC_XICS
        def_bool n
        select PPC_SMP_MUXED_IPI
+       select HARDIRQS_SW_RESEND
 
 config PPC_ICP_NATIVE
        def_bool n
diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c
index 57d72f1..9114243 100644
--- a/arch/powerpc/sysdev/xics/icp-opal.c
+++ b/arch/powerpc/sysdev/xics/icp-opal.c
@@ -23,10 +23,10 @@
 
 static void icp_opal_teardown_cpu(void)
 {
-	int cpu = smp_processor_id();
+	int hw_cpu = hard_smp_processor_id();
 
 	/* Clear any pending IPI */
-	opal_int_set_mfrr(cpu, 0xff);
+	opal_int_set_mfrr(hw_cpu, 0xff);
 }
 
 static void icp_opal_flush_ipi(void)
@@ -101,14 +101,16 @@
 
 static void icp_opal_cause_ipi(int cpu, unsigned long data)
 {
-	opal_int_set_mfrr(cpu, IPI_PRIORITY);
+	int hw_cpu = get_hard_smp_processor_id(cpu);
+
+	opal_int_set_mfrr(hw_cpu, IPI_PRIORITY);
 }
 
 static irqreturn_t icp_opal_ipi_action(int irq, void *dev_id)
 {
-	int cpu = smp_processor_id();
+	int hw_cpu = hard_smp_processor_id();
 
-	opal_int_set_mfrr(cpu, 0xff);
+	opal_int_set_mfrr(hw_cpu, 0xff);
 
 	return smp_ipi_demux();
 }
diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c
index 27c936c..1c6bf4b 100644
--- a/arch/powerpc/sysdev/xics/ics-opal.c
+++ b/arch/powerpc/sysdev/xics/ics-opal.c
@@ -156,7 +156,9 @@
 	.irq_mask = ics_opal_mask_irq,
 	.irq_unmask = ics_opal_unmask_irq,
 	.irq_eoi = NULL, /* Patched at init time */
-	.irq_set_affinity = ics_opal_set_affinity
+	.irq_set_affinity = ics_opal_set_affinity,
+	.irq_set_type = xics_set_irq_type,
+	.irq_retrigger = xics_retrigger,
 };
 
 static int ics_opal_map(struct ics *ics, unsigned int virq);
diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c
index 3854dd4..78ee5c7 100644
--- a/arch/powerpc/sysdev/xics/ics-rtas.c
+++ b/arch/powerpc/sysdev/xics/ics-rtas.c
@@ -163,7 +163,9 @@
 	.irq_mask = ics_rtas_mask_irq,
 	.irq_unmask = ics_rtas_unmask_irq,
 	.irq_eoi = NULL, /* Patched at init time */
-	.irq_set_affinity = ics_rtas_set_affinity
+	.irq_set_affinity = ics_rtas_set_affinity,
+	.irq_set_type = xics_set_irq_type,
+	.irq_retrigger = xics_retrigger,
 };
 
 static int ics_rtas_map(struct ics *ics, unsigned int virq)
diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c
index a795a5f..9d530f4 100644
--- a/arch/powerpc/sysdev/xics/xics-common.c
+++ b/arch/powerpc/sysdev/xics/xics-common.c
@@ -328,8 +328,12 @@
 
 	pr_devel("xics: map virq %d, hwirq 0x%lx\n", virq, hw);
 
-	/* They aren't all level sensitive but we just don't really know */
-	irq_set_status_flags(virq, IRQ_LEVEL);
+	/*
+	 * Mark interrupts as edge sensitive by default so that resend
+	 * actually works. The device-tree parsing will turn the LSIs
+	 * back to level.
+	 */
+	irq_clear_status_flags(virq, IRQ_LEVEL);
 
 	/* Don't call into ICS for IPIs */
 	if (hw == XICS_IPI) {
@@ -351,13 +355,54 @@
 			   irq_hw_number_t *out_hwirq, unsigned int *out_flags)
 
 {
-	/* Current xics implementation translates everything
-	 * to level. It is not technically right for MSIs but this
-	 * is irrelevant at this point. We might get smarter in the future
-	 */
 	*out_hwirq = intspec[0];
-	*out_flags = IRQ_TYPE_LEVEL_LOW;
 
+	/*
+	 * If intsize is at least 2, we look for the type in the second cell,
+	 * we assume the LSB indicates a level interrupt.
+	 */
+	if (intsize > 1) {
+		if (intspec[1] & 1)
+			*out_flags = IRQ_TYPE_LEVEL_LOW;
+		else
+			*out_flags = IRQ_TYPE_EDGE_RISING;
+	} else
+		*out_flags = IRQ_TYPE_LEVEL_LOW;
+
+	return 0;
+}
+
+int xics_set_irq_type(struct irq_data *d, unsigned int flow_type)
+{
+	/*
+	 * We only support these. This has really no effect other than setting
+	 * the corresponding descriptor bits mind you but those will in turn
+	 * affect the resend function when re-enabling an edge interrupt.
+	 *
+	 * Set set the default to edge as explained in map().
+	 */
+	if (flow_type == IRQ_TYPE_DEFAULT || flow_type == IRQ_TYPE_NONE)
+		flow_type = IRQ_TYPE_EDGE_RISING;
+
+	if (flow_type != IRQ_TYPE_EDGE_RISING &&
+	    flow_type != IRQ_TYPE_LEVEL_LOW)
+		return -EINVAL;
+
+	irqd_set_trigger_type(d, flow_type);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+int xics_retrigger(struct irq_data *data)
+{
+	/*
+	 * We need to push a dummy CPPR when retriggering, since the subsequent
+	 * EOI will try to pop it. Passing 0 works, as the function hard codes
+	 * the priority value anyway.
+	 */
+	xics_push_cppr(0);
+
+	/* Tell the core to do a soft retrigger */
 	return 0;
 }
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 0e34878..deeadfa 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -68,12 +68,12 @@
 config S390
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK
 	select ARCH_INLINE_READ_LOCK_BH
@@ -110,6 +110,7 @@
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANTS_PROT_NUMA_PROT_NONE
+	select ARCH_WANTS_UBSAN_NO_NULL
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
@@ -137,6 +138,7 @@
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_EXIT_THREAD
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
@@ -872,4 +874,17 @@
 	  Select this option if you want to run the kernel as a guest under
 	  the KVM hypervisor.
 
+config S390_GUEST_OLD_TRANSPORT
+	def_bool y
+	prompt "Guest support for old s390 virtio transport (DEPRECATED)"
+	depends on S390_GUEST
+	help
+	  Enable this option to add support for the old s390-virtio
+	  transport (i.e. virtio devices NOT based on virtio-ccw). This
+	  type of virtio devices is only available on the experimental
+	  kuli userspace or with old (< 2.6) qemu. If you are running
+	  with a modern version of qemu (which supports virtio-ccw since
+	  1.4 and uses it by default since version 2.4), you probably won't
+	  need this.
+
 endmenu
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 224b427..54e0052 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -46,6 +46,8 @@
 cflags-$(CONFIG_MARCH_ZEC12_TUNE)	+= -mtune=zEC12
 cflags-$(CONFIG_MARCH_Z13_TUNE)	+= -mtune=z13
 
+cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
+
 #KBUILD_IMAGE is necessary for make rpm
 KBUILD_IMAGE	:=arch/s390/boot/image
 
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 15c9424..f587c48 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -542,7 +542,7 @@
 		rc = PTR_ERR(appldata_pdev);
 		goto out_driver;
 	}
-	appldata_wq = create_singlethread_workqueue("appldata");
+	appldata_wq = alloc_ordered_workqueue("appldata", 0);
 	if (!appldata_wq) {
 		rc = -ENOMEM;
 		goto out_device;
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 33ba697c..0daa070 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -17,6 +17,7 @@
 KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o als.o)
 OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o
diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S
index f86a4ee..28c4f96 100644
--- a/arch/s390/boot/compressed/head.S
+++ b/arch/s390/boot/compressed/head.S
@@ -21,16 +21,21 @@
 	lg	%r15,.Lstack-.LPG1(%r13)
 	aghi	%r15,-160
 	brasl	%r14,decompress_kernel
-	# setup registers for memory mover & branch to target
+	# Set up registers for memory mover. We move the decompressed image to
+	# 0x11000, starting at offset 0x11000 in the decompressed image so
+	# that code living at 0x11000 in the image will end up at 0x11000 in
+	# memory.
 	lgr	%r4,%r2
 	lg	%r2,.Loffset-.LPG1(%r13)
 	la	%r4,0(%r2,%r4)
 	lg	%r3,.Lmvsize-.LPG1(%r13)
 	lgr	%r5,%r3
-	# move the memory mover someplace safe
+	# Move the memory mover someplace safe so it doesn't overwrite itself.
 	la	%r1,0x200
 	mvc	0(mover_end-mover,%r1),mover-.LPG1(%r13)
-	# decompress image is started at 0x11000
+	# When the memory mover is done we pass control to
+	# arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in
+	# the decompressed image.
 	lgr	%r6,%r2
 	br	%r1
 mover:
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 889ea34..4596868 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -260,7 +260,6 @@
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -269,6 +268,8 @@
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -281,7 +282,6 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -299,6 +299,8 @@
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -359,6 +361,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -409,6 +412,7 @@
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -428,6 +432,7 @@
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -453,7 +458,6 @@
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -495,6 +499,7 @@
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
@@ -602,7 +607,6 @@
 CONFIG_FAULT_INJECTION_DEBUG_FS=y
 CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_IRQSOFF_TRACER=y
 CONFIG_PREEMPT_TRACER=y
 CONFIG_SCHED_TRACER=y
@@ -678,7 +682,7 @@
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index 1bcfd76..1dd05e3 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -15,6 +15,8 @@
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y
@@ -255,7 +257,6 @@
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -264,6 +265,8 @@
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -276,7 +279,6 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -294,6 +296,8 @@
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -353,6 +357,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -403,6 +408,7 @@
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -422,6 +428,7 @@
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -447,7 +454,6 @@
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -487,6 +493,7 @@
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
@@ -552,7 +559,6 @@
 CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
 CONFIG_PM_NOTIFIER_ERROR_INJECT=m
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_BLK_DEV_IO_TRACE=y
 # CONFIG_KPROBE_EVENT is not set
 CONFIG_TRACE_ENUM_MAP_FILE=y
@@ -616,7 +622,7 @@
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 13ff090..29d1178 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -16,6 +16,8 @@
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y
@@ -255,7 +257,6 @@
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -264,6 +265,8 @@
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -276,7 +279,6 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -294,6 +296,8 @@
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -353,6 +357,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -403,6 +408,7 @@
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -422,6 +428,7 @@
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -447,7 +454,6 @@
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -488,6 +494,7 @@
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
@@ -549,7 +556,6 @@
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_STACK_TRACER=y
@@ -615,7 +621,7 @@
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
 CONFIG_CRYPTO_GHASH_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_ASYMMETRIC_KEY_TYPE=y
 CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m
 CONFIG_X509_CERTIFICATE_PARSER=m
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 2ea18b0..303d28e 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -31,69 +31,29 @@
 #include <crypto/xts.h>
 #include <asm/cpacf.h>
 
-#define AES_KEYLEN_128		1
-#define AES_KEYLEN_192		2
-#define AES_KEYLEN_256		4
-
 static u8 *ctrblk;
 static DEFINE_SPINLOCK(ctrblk_lock);
-static char keylen_flag;
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
 
 struct s390_aes_ctx {
 	u8 key[AES_MAX_KEY_SIZE];
-	long enc;
-	long dec;
 	int key_len;
+	unsigned long fc;
 	union {
 		struct crypto_skcipher *blk;
 		struct crypto_cipher *cip;
 	} fallback;
 };
 
-struct pcc_param {
-	u8 key[32];
-	u8 tweak[16];
-	u8 block[16];
-	u8 bit[16];
-	u8 xts[16];
-};
-
 struct s390_xts_ctx {
 	u8 key[32];
 	u8 pcc_key[32];
-	long enc;
-	long dec;
 	int key_len;
+	unsigned long fc;
 	struct crypto_skcipher *fallback;
 };
 
-/*
- * Check if the key_len is supported by the HW.
- * Returns 0 if it is, a positive number if it is not and software fallback is
- * required or a negative number in case the key size is not valid
- */
-static int need_fallback(unsigned int key_len)
-{
-	switch (key_len) {
-	case 16:
-		if (!(keylen_flag & AES_KEYLEN_128))
-			return 1;
-		break;
-	case 24:
-		if (!(keylen_flag & AES_KEYLEN_192))
-			return 1;
-		break;
-	case 32:
-		if (!(keylen_flag & AES_KEYLEN_256))
-			return 1;
-		break;
-	default:
-		return -1;
-		break;
-	}
-	return 0;
-}
-
 static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len)
 {
@@ -117,72 +77,44 @@
 		       unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret < 0) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KM_AES_128 :
+	     (key_len == 24) ? CPACF_KM_AES_192 :
+	     (key_len == 32) ? CPACF_KM_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_cip(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
-	if (!ret) {
-		memcpy(sctx->key, in_key, key_len);
-		return 0;
-	}
-
-	return setkey_fallback_cip(tfm, in_key, key_len);
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	if (unlikely(need_fallback(sctx->key_len))) {
+	if (unlikely(!sctx->fc)) {
 		crypto_cipher_encrypt_one(sctx->fallback.cip, out, in);
 		return;
 	}
-
-	switch (sctx->key_len) {
-	case 16:
-		cpacf_km(CPACF_KM_AES_128_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	case 24:
-		cpacf_km(CPACF_KM_AES_192_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	case 32:
-		cpacf_km(CPACF_KM_AES_256_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	}
+	cpacf_km(sctx->fc, &sctx->key, out, in, AES_BLOCK_SIZE);
 }
 
 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	if (unlikely(need_fallback(sctx->key_len))) {
+	if (unlikely(!sctx->fc)) {
 		crypto_cipher_decrypt_one(sctx->fallback.cip, out, in);
 		return;
 	}
-
-	switch (sctx->key_len) {
-	case 16:
-		cpacf_km(CPACF_KM_AES_128_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	case 24:
-		cpacf_km(CPACF_KM_AES_192_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	case 32:
-		cpacf_km(CPACF_KM_AES_256_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	}
+	cpacf_km(sctx->fc | CPACF_DECRYPT,
+		 &sctx->key, out, in, AES_BLOCK_SIZE);
 }
 
 static int fallback_init_cip(struct crypto_tfm *tfm)
@@ -291,50 +223,37 @@
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret > 0) {
-		sctx->key_len = key_len;
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KM_AES_128 :
+	     (key_len == 24) ? CPACF_KM_AES_192 :
+	     (key_len == 32) ? CPACF_KM_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
 		return setkey_fallback_blk(tfm, in_key, key_len);
-	}
 
-	switch (key_len) {
-	case 16:
-		sctx->enc = CPACF_KM_AES_128_ENC;
-		sctx->dec = CPACF_KM_AES_128_DEC;
-		break;
-	case 24:
-		sctx->enc = CPACF_KM_AES_192_ENC;
-		sctx->dec = CPACF_KM_AES_192_DEC;
-		break;
-	case 32:
-		sctx->enc = CPACF_KM_AES_256_ENC;
-		sctx->dec = CPACF_KM_AES_256_DEC;
-		break;
-	}
-
-	return aes_set_key(tfm, in_key, key_len);
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
-static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int nbytes, n;
+	int ret;
 
-	while ((nbytes = walk->nbytes)) {
+	ret = blkcipher_walk_virt(desc, walk);
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		ret = cpacf_km(func, param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_km(sctx->fc | modifier, sctx->key,
+			 walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
 
 	return ret;
@@ -347,11 +266,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk);
+	return ecb_aes_crypt(desc, 0, &walk);
 }
 
 static int ecb_aes_decrypt(struct blkcipher_desc *desc,
@@ -361,11 +280,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk);
+	return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static int fallback_init_blk(struct crypto_tfm *tfm)
@@ -420,64 +339,45 @@
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret > 0) {
-		sctx->key_len = key_len;
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KMC_AES_128 :
+	     (key_len == 24) ? CPACF_KMC_AES_192 :
+	     (key_len == 32) ? CPACF_KMC_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
 		return setkey_fallback_blk(tfm, in_key, key_len);
-	}
 
-	switch (key_len) {
-	case 16:
-		sctx->enc = CPACF_KMC_AES_128_ENC;
-		sctx->dec = CPACF_KMC_AES_128_DEC;
-		break;
-	case 24:
-		sctx->enc = CPACF_KMC_AES_192_ENC;
-		sctx->dec = CPACF_KMC_AES_192_DEC;
-		break;
-	case 32:
-		sctx->enc = CPACF_KMC_AES_256_ENC;
-		sctx->dec = CPACF_KMC_AES_256_DEC;
-		break;
-	}
-
-	return aes_set_key(tfm, in_key, key_len);
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
-static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
+static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
+	unsigned int nbytes, n;
+	int ret;
 	struct {
 		u8 iv[AES_BLOCK_SIZE];
 		u8 key[AES_MAX_KEY_SIZE];
 	} param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
 	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
 	memcpy(param.key, sctx->key, sctx->key_len);
-	do {
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		ret = cpacf_kmc(func, &param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_kmc(sctx->fc | modifier, &param,
+			  walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
-
-out:
 	return ret;
 }
 
@@ -488,11 +388,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, sctx->enc, &walk);
+	return cbc_aes_crypt(desc, 0, &walk);
 }
 
 static int cbc_aes_decrypt(struct blkcipher_desc *desc,
@@ -502,11 +402,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, sctx->dec, &walk);
+	return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_aes_alg = {
@@ -594,83 +494,67 @@
 			   unsigned int key_len)
 {
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
+	unsigned long fc;
 	int err;
 
 	err = xts_check_key(tfm, in_key, key_len);
 	if (err)
 		return err;
 
-	switch (key_len) {
-	case 32:
-		xts_ctx->enc = CPACF_KM_XTS_128_ENC;
-		xts_ctx->dec = CPACF_KM_XTS_128_DEC;
-		memcpy(xts_ctx->key + 16, in_key, 16);
-		memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
-		break;
-	case 48:
-		xts_ctx->enc = 0;
-		xts_ctx->dec = 0;
-		xts_fallback_setkey(tfm, in_key, key_len);
-		break;
-	case 64:
-		xts_ctx->enc = CPACF_KM_XTS_256_ENC;
-		xts_ctx->dec = CPACF_KM_XTS_256_DEC;
-		memcpy(xts_ctx->key, in_key, 32);
-		memcpy(xts_ctx->pcc_key, in_key + 32, 32);
-		break;
-	default:
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 32) ? CPACF_KM_XTS_128 :
+	     (key_len == 64) ? CPACF_KM_XTS_256 : 0;
+
+	/* Check if the function code is available */
+	xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!xts_ctx->fc)
+		return xts_fallback_setkey(tfm, in_key, key_len);
+
+	/* Split the XTS key into the two subkeys */
+	key_len = key_len / 2;
 	xts_ctx->key_len = key_len;
+	memcpy(xts_ctx->key, in_key, key_len);
+	memcpy(xts_ctx->pcc_key, in_key + key_len, key_len);
 	return 0;
 }
 
-static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
-			 struct s390_xts_ctx *xts_ctx,
+static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
-	unsigned int offset = (xts_ctx->key_len >> 1) & 0x10;
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
-	unsigned int n;
-	u8 *in, *out;
-	struct pcc_param pcc_param;
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int offset, nbytes, n;
+	int ret;
+	struct {
+		u8 key[32];
+		u8 tweak[16];
+		u8 block[16];
+		u8 bit[16];
+		u8 xts[16];
+	} pcc_param;
 	struct {
 		u8 key[32];
 		u8 init[16];
 	} xts_param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
+	offset = xts_ctx->key_len & 0x10;
 	memset(pcc_param.block, 0, sizeof(pcc_param.block));
 	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
 	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
 	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
-	memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
-	/* remove decipher modifier bit from 'func' and call PCC */
-	ret = cpacf_pcc(func & 0x7f, &pcc_param.key[offset]);
-	if (ret < 0)
-		return -EIO;
+	memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
+	cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
 
-	memcpy(xts_param.key, xts_ctx->key, 32);
+	memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
 	memcpy(xts_param.init, pcc_param.xts, 16);
-	do {
+
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-
-		ret = cpacf_km(func, &xts_param.key[offset], out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
-out:
+		cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
+			 walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	return ret;
 }
 
@@ -681,11 +565,11 @@
 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(xts_ctx->key_len == 48))
+	if (unlikely(!xts_ctx->fc))
 		return xts_fallback_encrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk);
+	return xts_aes_crypt(desc, 0, &walk);
 }
 
 static int xts_aes_decrypt(struct blkcipher_desc *desc,
@@ -695,11 +579,11 @@
 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(xts_ctx->key_len == 48))
+	if (unlikely(!xts_ctx->fc))
 		return xts_fallback_decrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk);
+	return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static int xts_fallback_init(struct crypto_tfm *tfm)
@@ -750,108 +634,79 @@
 	}
 };
 
-static int xts_aes_alg_reg;
-
 static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	unsigned long fc;
 
-	switch (key_len) {
-	case 16:
-		sctx->enc = CPACF_KMCTR_AES_128_ENC;
-		sctx->dec = CPACF_KMCTR_AES_128_DEC;
-		break;
-	case 24:
-		sctx->enc = CPACF_KMCTR_AES_192_ENC;
-		sctx->dec = CPACF_KMCTR_AES_192_DEC;
-		break;
-	case 32:
-		sctx->enc = CPACF_KMCTR_AES_256_ENC;
-		sctx->dec = CPACF_KMCTR_AES_256_DEC;
-		break;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KMCTR_AES_128 :
+	     (key_len == 24) ? CPACF_KMCTR_AES_192 :
+	     (key_len == 32) ? CPACF_KMCTR_AES_256 : 0;
 
-	return aes_set_key(tfm, in_key, key_len);
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_blk(tfm, in_key, key_len);
+
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
-static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 {
 	unsigned int i, n;
 
 	/* only use complete blocks, max. PAGE_SIZE */
+	memcpy(ctrptr, iv, AES_BLOCK_SIZE);
 	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
-	for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
-		memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
-		       AES_BLOCK_SIZE);
-		crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+	for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+		ctrptr += AES_BLOCK_SIZE;
 	}
 	return n;
 }
 
-static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
-			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+			 struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 buf[AES_BLOCK_SIZE], *ctrptr;
 	unsigned int n, nbytes;
-	u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
-	u8 *out, *in, *ctrptr = ctrbuf;
+	int ret, locked;
 
-	if (!walk->nbytes)
-		return ret;
+	locked = spin_trylock(&ctrblk_lock);
 
-	if (spin_trylock(&ctrblk_lock))
-		ctrptr = ctrblk;
-
-	memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
+	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		while (nbytes >= AES_BLOCK_SIZE) {
-			if (ctrptr == ctrblk)
-				n = __ctrblk_init(ctrptr, nbytes);
-			else
-				n = AES_BLOCK_SIZE;
-			ret = cpacf_kmctr(func, sctx->key, out, in, n, ctrptr);
-			if (ret < 0 || ret != n) {
-				if (ctrptr == ctrblk)
-					spin_unlock(&ctrblk_lock);
-				return -EIO;
-			}
-			if (n > AES_BLOCK_SIZE)
-				memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
-				       AES_BLOCK_SIZE);
-			crypto_inc(ctrptr, AES_BLOCK_SIZE);
-			out += n;
-			in += n;
-			nbytes -= n;
-		}
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = AES_BLOCK_SIZE;
+		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+		cpacf_kmctr(sctx->fc | modifier, sctx->key,
+			    walk->dst.virt.addr, walk->src.virt.addr,
+			    n, ctrptr);
+		if (ctrptr == ctrblk)
+			memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+			       AES_BLOCK_SIZE);
+		crypto_inc(walk->iv, AES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
-	if (ctrptr == ctrblk) {
-		if (nbytes)
-			memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
-		else
-			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+	if (locked)
 		spin_unlock(&ctrblk_lock);
-	} else {
-		if (!nbytes)
-			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
-	}
 	/*
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
 	if (nbytes) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		ret = cpacf_kmctr(func, sctx->key, buf, in,
-				  AES_BLOCK_SIZE, ctrbuf);
-		if (ret < 0 || ret != AES_BLOCK_SIZE)
-			return -EIO;
-		memcpy(out, buf, nbytes);
-		crypto_inc(ctrbuf, AES_BLOCK_SIZE);
+		cpacf_kmctr(sctx->fc | modifier, sctx->key,
+			    buf, walk->src.virt.addr,
+			    AES_BLOCK_SIZE, walk->iv);
+		memcpy(walk->dst.virt.addr, buf, nbytes);
+		crypto_inc(walk->iv, AES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
-		memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
 	}
 
 	return ret;
@@ -864,8 +719,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
+	if (unlikely(!sctx->fc))
+		return fallback_blk_enc(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, sctx->enc, sctx, &walk);
+	return ctr_aes_crypt(desc, 0, &walk);
 }
 
 static int ctr_aes_decrypt(struct blkcipher_desc *desc,
@@ -875,19 +733,25 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
+	if (unlikely(!sctx->fc))
+		return fallback_blk_dec(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, sctx->dec, sctx, &walk);
+	return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ctr_aes_alg = {
 	.cra_name		=	"ctr(aes)",
 	.cra_driver_name	=	"ctr-aes-s390",
 	.cra_priority		=	400,	/* combo: aes + ctr */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	1,
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
+	.cra_init		=	fallback_init_blk,
+	.cra_exit		=	fallback_exit_blk,
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,
@@ -900,89 +764,79 @@
 	}
 };
 
-static int ctr_aes_alg_reg;
+static struct crypto_alg *aes_s390_algs_ptr[5];
+static int aes_s390_algs_num;
+
+static int aes_s390_register_alg(struct crypto_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_alg(alg);
+	if (!ret)
+		aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+	return ret;
+}
+
+static void aes_s390_fini(void)
+{
+	while (aes_s390_algs_num--)
+		crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+}
 
 static int __init aes_s390_init(void)
 {
 	int ret;
 
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_128_ENC))
-		keylen_flag |= AES_KEYLEN_128;
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_192_ENC))
-		keylen_flag |= AES_KEYLEN_192;
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_256_ENC))
-		keylen_flag |= AES_KEYLEN_256;
+	/* Query available functions for KM, KMC and KMCTR */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
 
-	if (!keylen_flag)
-		return -EOPNOTSUPP;
-
-	/* z9 109 and z9 BC/EC only support 128 bit key length */
-	if (keylen_flag == AES_KEYLEN_128)
-		pr_info("AES hardware acceleration is only available for"
-			" 128-bit keys\n");
-
-	ret = crypto_register_alg(&aes_alg);
-	if (ret)
-		goto aes_err;
-
-	ret = crypto_register_alg(&ecb_aes_alg);
-	if (ret)
-		goto ecb_aes_err;
-
-	ret = crypto_register_alg(&cbc_aes_alg);
-	if (ret)
-		goto cbc_aes_err;
-
-	if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128_ENC) &&
-	    cpacf_query(CPACF_KM, CPACF_KM_XTS_256_ENC)) {
-		ret = crypto_register_alg(&xts_aes_alg);
+	if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
+		ret = aes_s390_register_alg(&aes_alg);
 		if (ret)
-			goto xts_aes_err;
-		xts_aes_alg_reg = 1;
+			goto out_err;
+		ret = aes_s390_register_alg(&ecb_aes_alg);
+		if (ret)
+			goto out_err;
 	}
 
-	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256_ENC)) {
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
+		ret = aes_s390_register_alg(&cbc_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
+		ret = aes_s390_register_alg(&xts_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_128) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_192) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_256)) {
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
-			goto ctr_aes_err;
+			goto out_err;
 		}
-		ret = crypto_register_alg(&ctr_aes_alg);
-		if (ret) {
-			free_page((unsigned long) ctrblk);
-			goto ctr_aes_err;
-		}
-		ctr_aes_alg_reg = 1;
+		ret = aes_s390_register_alg(&ctr_aes_alg);
+		if (ret)
+			goto out_err;
 	}
 
-out:
+	return 0;
+out_err:
+	aes_s390_fini();
 	return ret;
-
-ctr_aes_err:
-	crypto_unregister_alg(&xts_aes_alg);
-xts_aes_err:
-	crypto_unregister_alg(&cbc_aes_alg);
-cbc_aes_err:
-	crypto_unregister_alg(&ecb_aes_alg);
-ecb_aes_err:
-	crypto_unregister_alg(&aes_alg);
-aes_err:
-	goto out;
-}
-
-static void __exit aes_s390_fini(void)
-{
-	if (ctr_aes_alg_reg) {
-		crypto_unregister_alg(&ctr_aes_alg);
-		free_page((unsigned long) ctrblk);
-	}
-	if (xts_aes_alg_reg)
-		crypto_unregister_alg(&xts_aes_alg);
-	crypto_unregister_alg(&cbc_aes_alg);
-	crypto_unregister_alg(&ecb_aes_alg);
-	crypto_unregister_alg(&aes_alg);
 }
 
 module_cpu_feature_match(MSA, aes_s390_init);
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 577ae1d..992e630 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -51,6 +51,9 @@
 		struct kernel_fpu vxstate;				    \
 		unsigned long prealign, aligned, remaining;		    \
 									    \
+		if (datalen < VX_MIN_LEN + VX_ALIGN_MASK)		    \
+			return ___crc32_sw(crc, data, datalen);		    \
+									    \
 		if ((unsigned long)data & VX_ALIGN_MASK) {		    \
 			prealign = VX_ALIGNMENT -			    \
 				  ((unsigned long)data & VX_ALIGN_MASK);    \
@@ -59,15 +62,12 @@
 			data = (void *)((unsigned long)data + prealign);    \
 		}							    \
 									    \
-		if (datalen < VX_MIN_LEN)				    \
-			return ___crc32_sw(crc, data, datalen);		    \
-									    \
 		aligned = datalen & ~VX_ALIGN_MASK;			    \
 		remaining = datalen & VX_ALIGN_MASK;			    \
 									    \
 		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
 		crc = ___crc32_vx(crc, data, aligned);			    \
-		kernel_fpu_end(&vxstate);				    \
+		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
 									    \
 		if (remaining)						    \
 			crc = ___crc32_sw(crc, data + aligned, remaining);  \
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 697e71a..8b83144 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -27,6 +27,8 @@
 static u8 *ctrblk;
 static DEFINE_SPINLOCK(ctrblk_lock);
 
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+
 struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
 	u8 key[DES3_KEY_SIZE];
@@ -36,12 +38,12 @@
 		      unsigned int key_len)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 	u32 tmp[DES_EXPKEY_WORDS];
 
 	/* check for weak keys */
-	if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+	if (!des_ekey(tmp, key) &&
+	    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
 
@@ -53,14 +55,15 @@
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_DEA_ENC, ctx->key, out, in, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_DEA, ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_DEA_DEC, ctx->key, out, in, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_DEA | CPACF_DECRYPT,
+		 ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des_alg = {
@@ -82,61 +85,46 @@
 	}
 };
 
-static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
-			    u8 *key, struct blkcipher_walk *walk)
-{
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes;
-
-	while ((nbytes = walk->nbytes)) {
-		/* only use complete blocks */
-		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		ret = cpacf_km(func, key, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= DES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	}
-
-	return ret;
-}
-
-static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
+static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
 			    struct blkcipher_walk *walk)
 {
 	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
+	unsigned int nbytes, n;
+	int ret;
+
+	ret = blkcipher_walk_virt(desc, walk);
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+		/* only use complete blocks */
+		n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		cpacf_km(fc, ctx->key, walk->dst.virt.addr,
+			 walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
+	return ret;
+}
+
+static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
+			    struct blkcipher_walk *walk)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int nbytes, n;
+	int ret;
 	struct {
 		u8 iv[DES_BLOCK_SIZE];
 		u8 key[DES3_KEY_SIZE];
 	} param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
 	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
 	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
-	do {
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		ret = cpacf_kmc(func, &param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= DES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
+		n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		cpacf_kmc(fc, &param, walk->dst.virt.addr,
+			  walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
-
-out:
 	return ret;
 }
 
@@ -144,22 +132,20 @@
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA_ENC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
 }
 
 static int ecb_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA_DEC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ecb_des_alg = {
@@ -189,7 +175,7 @@
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA_ENC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
 }
 
 static int cbc_des_decrypt(struct blkcipher_desc *desc,
@@ -199,7 +185,7 @@
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA_DEC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des_alg = {
@@ -240,13 +226,12 @@
 		       unsigned int key_len)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 
 	if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
 	    crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
 			  DES_KEY_SIZE)) &&
-	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+	    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
 	memcpy(ctx->key, key, key_len);
@@ -257,14 +242,15 @@
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_TDEA_192_ENC, ctx->key, dst, src, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_TDEA_192, ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
 static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_TDEA_192_DEC, ctx->key, dst, src, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+		 ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des3_alg = {
@@ -290,22 +276,21 @@
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_ENC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
 }
 
 static int ecb_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_DEC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+				&walk);
 }
 
 static struct crypto_alg ecb_des3_alg = {
@@ -335,7 +320,7 @@
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_ENC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
 }
 
 static int cbc_des3_decrypt(struct blkcipher_desc *desc,
@@ -345,7 +330,8 @@
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_DEC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
+				&walk);
 }
 
 static struct crypto_alg cbc_des3_alg = {
@@ -369,81 +355,54 @@
 	}
 };
 
-static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 {
 	unsigned int i, n;
 
 	/* align to block size, max. PAGE_SIZE */
 	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
-	for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
-		memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-		crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
+	memcpy(ctrptr, iv, DES_BLOCK_SIZE);
+	for (i = (n / DES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + DES_BLOCK_SIZE, ctrptr, DES_BLOCK_SIZE);
+		crypto_inc(ctrptr + DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+		ctrptr += DES_BLOCK_SIZE;
 	}
 	return n;
 }
 
-static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
-			    struct s390_des_ctx *ctx,
+static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
 			    struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 buf[DES_BLOCK_SIZE], *ctrptr;
 	unsigned int n, nbytes;
-	u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
-	u8 *out, *in, *ctrptr = ctrbuf;
+	int ret, locked;
 
-	if (!walk->nbytes)
-		return ret;
+	locked = spin_trylock(&ctrblk_lock);
 
-	if (spin_trylock(&ctrblk_lock))
-		ctrptr = ctrblk;
-
-	memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
+	ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		while (nbytes >= DES_BLOCK_SIZE) {
-			if (ctrptr == ctrblk)
-				n = __ctrblk_init(ctrptr, nbytes);
-			else
-				n = DES_BLOCK_SIZE;
-			ret = cpacf_kmctr(func, ctx->key, out, in, n, ctrptr);
-			if (ret < 0 || ret != n) {
-				if (ctrptr == ctrblk)
-					spin_unlock(&ctrblk_lock);
-				return -EIO;
-			}
-			if (n > DES_BLOCK_SIZE)
-				memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
-				       DES_BLOCK_SIZE);
-			crypto_inc(ctrptr, DES_BLOCK_SIZE);
-			out += n;
-			in += n;
-			nbytes -= n;
-		}
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = DES_BLOCK_SIZE;
+		if (nbytes >= 2*DES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
+		cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
+			    walk->src.virt.addr, n, ctrptr);
+		if (ctrptr == ctrblk)
+			memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+				DES_BLOCK_SIZE);
+		crypto_inc(walk->iv, DES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
-	if (ctrptr == ctrblk) {
-		if (nbytes)
-			memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
-		else
-			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+	if (locked)
 		spin_unlock(&ctrblk_lock);
-	} else {
-		if (!nbytes)
-			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
-	}
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		ret = cpacf_kmctr(func, ctx->key, buf, in,
-				  DES_BLOCK_SIZE, ctrbuf);
-		if (ret < 0 || ret != DES_BLOCK_SIZE)
-			return -EIO;
-		memcpy(out, buf, nbytes);
-		crypto_inc(ctrbuf, DES_BLOCK_SIZE);
+		cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
+			    DES_BLOCK_SIZE, walk->iv);
+		memcpy(walk->dst.virt.addr, buf, nbytes);
+		crypto_inc(walk->iv, DES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
-		memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
 	}
 	return ret;
 }
@@ -452,22 +411,20 @@
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_ENC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
 }
 
 static int ctr_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_DEC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ctr_des_alg = {
@@ -495,22 +452,21 @@
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_ENC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
 }
 
 static int ctr_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_DEC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
+				&walk);
 }
 
 static struct crypto_alg ctr_des3_alg = {
@@ -534,83 +490,87 @@
 	}
 };
 
+static struct crypto_alg *des_s390_algs_ptr[8];
+static int des_s390_algs_num;
+
+static int des_s390_register_alg(struct crypto_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_alg(alg);
+	if (!ret)
+		des_s390_algs_ptr[des_s390_algs_num++] = alg;
+	return ret;
+}
+
+static void des_s390_exit(void)
+{
+	while (des_s390_algs_num--)
+		crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+}
+
 static int __init des_s390_init(void)
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KM, CPACF_KM_DEA_ENC) ||
-	    !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192_ENC))
-		return -EOPNOTSUPP;
+	/* Query available functions for KM, KMC and KMCTR */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
 
-	ret = crypto_register_alg(&des_alg);
-	if (ret)
-		goto des_err;
-	ret = crypto_register_alg(&ecb_des_alg);
-	if (ret)
-		goto ecb_des_err;
-	ret = crypto_register_alg(&cbc_des_alg);
-	if (ret)
-		goto cbc_des_err;
-	ret = crypto_register_alg(&des3_alg);
-	if (ret)
-		goto des3_err;
-	ret = crypto_register_alg(&ecb_des3_alg);
-	if (ret)
-		goto ecb_des3_err;
-	ret = crypto_register_alg(&cbc_des3_alg);
-	if (ret)
-		goto cbc_des3_err;
+	if (cpacf_test_func(&km_functions, CPACF_KM_DEA)) {
+		ret = des_s390_register_alg(&des_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_alg(&ecb_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
+		ret = des_s390_register_alg(&cbc_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&km_functions, CPACF_KM_TDEA_192)) {
+		ret = des_s390_register_alg(&des3_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_alg(&ecb_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
+		ret = des_s390_register_alg(&cbc_des3_alg);
+		if (ret)
+			goto out_err;
+	}
 
-	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192_ENC)) {
-		ret = crypto_register_alg(&ctr_des_alg);
-		if (ret)
-			goto ctr_des_err;
-		ret = crypto_register_alg(&ctr_des3_alg);
-		if (ret)
-			goto ctr_des3_err;
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
-			goto ctr_mem_err;
+			goto out_err;
 		}
 	}
-out:
-	return ret;
 
-ctr_mem_err:
-	crypto_unregister_alg(&ctr_des3_alg);
-ctr_des3_err:
-	crypto_unregister_alg(&ctr_des_alg);
-ctr_des_err:
-	crypto_unregister_alg(&cbc_des3_alg);
-cbc_des3_err:
-	crypto_unregister_alg(&ecb_des3_alg);
-ecb_des3_err:
-	crypto_unregister_alg(&des3_alg);
-des3_err:
-	crypto_unregister_alg(&cbc_des_alg);
-cbc_des_err:
-	crypto_unregister_alg(&ecb_des_alg);
-ecb_des_err:
-	crypto_unregister_alg(&des_alg);
-des_err:
-	goto out;
-}
-
-static void __exit des_s390_exit(void)
-{
-	if (ctrblk) {
-		crypto_unregister_alg(&ctr_des_alg);
-		crypto_unregister_alg(&ctr_des3_alg);
-		free_page((unsigned long) ctrblk);
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
+		ret = des_s390_register_alg(&ctr_des_alg);
+		if (ret)
+			goto out_err;
 	}
-	crypto_unregister_alg(&cbc_des3_alg);
-	crypto_unregister_alg(&ecb_des3_alg);
-	crypto_unregister_alg(&des3_alg);
-	crypto_unregister_alg(&cbc_des_alg);
-	crypto_unregister_alg(&ecb_des_alg);
-	crypto_unregister_alg(&des_alg);
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
+		ret = des_s390_register_alg(&ctr_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	return 0;
+out_err:
+	des_s390_exit();
+	return ret;
 }
 
 module_cpu_feature_match(MSA, des_s390_init);
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index ab68de7..564616d 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -58,7 +58,6 @@
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 	unsigned int n;
 	u8 *buf = dctx->buffer;
-	int ret;
 
 	if (dctx->bytes) {
 		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
@@ -71,18 +70,14 @@
 		src += n;
 
 		if (!dctx->bytes) {
-			ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
-					 GHASH_BLOCK_SIZE);
-			if (ret != GHASH_BLOCK_SIZE)
-				return -EIO;
+			cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
+				   GHASH_BLOCK_SIZE);
 		}
 	}
 
 	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
 	if (n) {
-		ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
-		if (ret != n)
-			return -EIO;
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
 		src += n;
 		srclen -= n;
 	}
@@ -98,17 +93,12 @@
 static int ghash_flush(struct ghash_desc_ctx *dctx)
 {
 	u8 *buf = dctx->buffer;
-	int ret;
 
 	if (dctx->bytes) {
 		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
 
 		memset(pos, 0, dctx->bytes);
-
-		ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
-		if (ret != GHASH_BLOCK_SIZE)
-			return -EIO;
-
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
 		dctx->bytes = 0;
 	}
 
@@ -146,7 +136,7 @@
 
 static int __init ghash_mod_init(void)
 {
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_GHASH))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
 		return -EOPNOTSUPP;
 
 	return crypto_register_shash(&ghash_alg);
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 41527b1..9cc050f 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -135,12 +135,7 @@
 		else
 			h = ebuf;
 		/* generate sha256 from this page */
-		if (cpacf_kimd(CPACF_KIMD_SHA_256, h,
-			       pg, PAGE_SIZE) != PAGE_SIZE) {
-			prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
-			ret = -EIO;
-			goto out;
-		}
+		cpacf_kimd(CPACF_KIMD_SHA_256, h, pg, PAGE_SIZE);
 		if (n < sizeof(hash))
 			memcpy(ebuf, hash, n);
 		ret += n;
@@ -148,7 +143,6 @@
 		nbytes -= n;
 	}
 
-out:
 	free_page((unsigned long)pg);
 	return ret;
 }
@@ -160,13 +154,11 @@
 {
 	__u64 entropy[4];
 	unsigned int i;
-	int ret;
 
 	for (i = 0; i < 16; i++) {
-		ret = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
-				(char *)entropy, (char *)entropy,
-				sizeof(entropy));
-		BUG_ON(ret < 0 || ret != sizeof(entropy));
+		cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+			  (char *) entropy, (char *) entropy,
+			  sizeof(entropy));
 		memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
 	}
 }
@@ -303,21 +295,14 @@
 		0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
 		0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
 
-	int ret = 0;
 	u8 buf[sizeof(random)];
 	struct ppno_ws_s ws;
 
 	memset(&ws, 0, sizeof(ws));
 
 	/* initial seed */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, &ws, NULL, 0,
-			 seed, sizeof(seed));
-	if (ret < 0) {
-		pr_err("The prng self test seed operation for the "
-		       "SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &ws, NULL, 0, seed, sizeof(seed));
 
 	/* check working states V and C */
 	if (memcmp(ws.V, V0, sizeof(V0)) != 0
@@ -329,22 +314,10 @@
 	}
 
 	/* generate random bytes */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &ws, buf, sizeof(buf), NULL, 0);
-	if (ret < 0) {
-		pr_err("The prng self test generate operation for "
-		       "the SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &ws, buf, sizeof(buf), NULL, 0);
-	if (ret < 0) {
-		pr_err("The prng self test generate operation for "
-		       "the SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &ws, buf, sizeof(buf), NULL, 0);
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &ws, buf, sizeof(buf), NULL, 0);
 
 	/* check against expected data */
 	if (memcmp(buf, random, sizeof(random)) != 0) {
@@ -392,26 +365,16 @@
 	get_tod_clock_ext(seed + 48);
 
 	/* initial seed of the ppno drng */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
-			 &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
-	if (ret < 0) {
-		prng_errorflag = PRNG_SEED_FAILED;
-		ret = -EIO;
-		goto outfree;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
 
 	/* if fips mode is enabled, generate a first block of random
 	   bytes for the FIPS 140-2 Conditional Self Test */
 	if (fips_enabled) {
 		prng_data->prev = prng_data->buf + prng_chunk_size;
-		ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-				 &prng_data->ppnows,
-				 prng_data->prev, prng_chunk_size, NULL, 0);
-		if (ret < 0 || ret != prng_chunk_size) {
-			prng_errorflag = PRNG_GEN_FAILED;
-			ret = -EIO;
-			goto outfree;
-		}
+		cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+			   &prng_data->ppnows,
+			   prng_data->prev, prng_chunk_size, NULL, 0);
 	}
 
 	return 0;
@@ -440,12 +403,8 @@
 		return ret;
 
 	/* do a reseed of the ppno drng with this bytestring */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
-			 &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
-	if (ret) {
-		prng_errorflag = PRNG_RESEED_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
 
 	return 0;
 }
@@ -463,12 +422,8 @@
 	}
 
 	/* PPNO generate */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &prng_data->ppnows, buf, nbytes, NULL, 0);
-	if (ret < 0 || ret != nbytes) {
-		prng_errorflag = PRNG_GEN_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &prng_data->ppnows, buf, nbytes, NULL, 0);
 
 	/* FIPS 140-2 Conditional Self Test */
 	if (fips_enabled) {
@@ -479,7 +434,7 @@
 		memcpy(prng_data->prev, buf, nbytes);
 	}
 
-	return ret;
+	return nbytes;
 }
 
 
@@ -494,7 +449,7 @@
 static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
 			      size_t nbytes, loff_t *ppos)
 {
-	int chunk, n, tmp, ret = 0;
+	int chunk, n, ret = 0;
 
 	/* lock prng_data struct */
 	if (mutex_lock_interruptible(&prng_data->mutex))
@@ -545,13 +500,9 @@
 		 *
 		 * Note: you can still get strict X9.17 conformity by setting
 		 * prng_chunk_size to 8 bytes.
-		*/
-		tmp = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
-				prng_data->buf, prng_data->buf, n);
-		if (tmp < 0 || tmp != n) {
-			ret = -EIO;
-			break;
-		}
+		 */
+		cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+			  prng_data->buf, prng_data->buf, n);
 
 		prng_data->prngws.byte_counter += n;
 		prng_data->prngws.reseed_counter += n;
@@ -806,13 +757,13 @@
 	int ret;
 
 	/* check if the CPU has a PRNG */
-	if (!cpacf_query(CPACF_KMC, CPACF_KMC_PRNG))
+	if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
 		return -EOPNOTSUPP;
 
 	/* choose prng mode */
 	if (prng_mode != PRNG_MODE_TDES) {
 		/* check for MSA5 support for PPNO operations */
-		if (!cpacf_query(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
+		if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
 			if (prng_mode == PRNG_MODE_SHA512) {
 				pr_err("The prng module cannot "
 				       "start in SHA-512 mode\n");
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5fbf91b..c7de53d 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -91,7 +91,7 @@
 
 static int __init sha1_s390_init(void)
 {
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_1))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
 		return -EOPNOTSUPP;
 	return crypto_register_shash(&alg);
 }
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 10aac0b..53c2779 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -123,7 +123,7 @@
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_256))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
 		return -EOPNOTSUPP;
 	ret = crypto_register_shash(&sha256_alg);
 	if (ret < 0)
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index ea85757..2f4caa1 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -133,7 +133,7 @@
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_512))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
 		return -EOPNOTSUPP;
 	if ((ret = crypto_register_shash(&sha512_alg)) < 0)
 		goto out;
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index 8e90816..c740f77 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -22,8 +22,7 @@
 {
 	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
-	unsigned int index;
-	int ret;
+	unsigned int index, n;
 
 	/* how much is already in the buffer? */
 	index = ctx->count & (bsize - 1);
@@ -35,9 +34,7 @@
 	/* process one stored block */
 	if (index) {
 		memcpy(ctx->buf + index, data, bsize - index);
-		ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
-		if (ret != bsize)
-			return -EIO;
+		cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
 		data += bsize - index;
 		len -= bsize - index;
 		index = 0;
@@ -45,12 +42,10 @@
 
 	/* process as many blocks as possible */
 	if (len >= bsize) {
-		ret = cpacf_kimd(ctx->func, ctx->state, data,
-				 len & ~(bsize - 1));
-		if (ret != (len & ~(bsize - 1)))
-			return -EIO;
-		data += ret;
-		len -= ret;
+		n = len & ~(bsize - 1);
+		cpacf_kimd(ctx->func, ctx->state, data, n);
+		data += n;
+		len -= n;
 	}
 store:
 	if (len)
@@ -66,7 +61,6 @@
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
 	u64 bits;
 	unsigned int index, end, plen;
-	int ret;
 
 	/* SHA-512 uses 128 bit padding length */
 	plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8;
@@ -88,10 +82,7 @@
 	 */
 	bits = ctx->count * 8;
 	memcpy(ctx->buf + end - 8, &bits, sizeof(bits));
-
-	ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, end);
-	if (ret != end)
-		return -EIO;
+	cpacf_kimd(ctx->func, ctx->state, ctx->buf, end);
 
 	/* copy digest to out */
 	memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index ccccebe..2d40ef0 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -172,7 +172,6 @@
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_RCU_TRACE=y
 CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
@@ -234,7 +233,7 @@
 CONFIG_CRYPTO_SHA512_S390=m
 CONFIG_CRYPTO_DES_S390=m
 CONFIG_CRYPTO_AES_S390=m
-CONFIG_CRYPTO_CRC32_S390=m
+CONFIG_CRYPTO_CRC32_S390=y
 CONFIG_CRC7=m
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_POWERPC is not set
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index d28621d..2c680db 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -28,67 +28,51 @@
 #define CPACF_PPNO		0xb93c		/* MSA5 */
 
 /*
- * Function codes for the KM (CIPHER MESSAGE)
- * instruction (0x80 is the decipher modifier bit)
+ * Decryption modifier bit
+ */
+#define CPACF_DECRYPT		0x80
+
+/*
+ * Function codes for the KM (CIPHER MESSAGE) instruction
  */
 #define CPACF_KM_QUERY		0x00
-#define CPACF_KM_DEA_ENC	0x01
-#define CPACF_KM_DEA_DEC	0x81
-#define CPACF_KM_TDEA_128_ENC	0x02
-#define CPACF_KM_TDEA_128_DEC	0x82
-#define CPACF_KM_TDEA_192_ENC	0x03
-#define CPACF_KM_TDEA_192_DEC	0x83
-#define CPACF_KM_AES_128_ENC	0x12
-#define CPACF_KM_AES_128_DEC	0x92
-#define CPACF_KM_AES_192_ENC	0x13
-#define CPACF_KM_AES_192_DEC	0x93
-#define CPACF_KM_AES_256_ENC	0x14
-#define CPACF_KM_AES_256_DEC	0x94
-#define CPACF_KM_XTS_128_ENC	0x32
-#define CPACF_KM_XTS_128_DEC	0xb2
-#define CPACF_KM_XTS_256_ENC	0x34
-#define CPACF_KM_XTS_256_DEC	0xb4
+#define CPACF_KM_DEA		0x01
+#define CPACF_KM_TDEA_128	0x02
+#define CPACF_KM_TDEA_192	0x03
+#define CPACF_KM_AES_128	0x12
+#define CPACF_KM_AES_192	0x13
+#define CPACF_KM_AES_256	0x14
+#define CPACF_KM_XTS_128	0x32
+#define CPACF_KM_XTS_256	0x34
 
 /*
  * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KMC_QUERY		0x00
-#define CPACF_KMC_DEA_ENC	0x01
-#define CPACF_KMC_DEA_DEC	0x81
-#define CPACF_KMC_TDEA_128_ENC	0x02
-#define CPACF_KMC_TDEA_128_DEC	0x82
-#define CPACF_KMC_TDEA_192_ENC	0x03
-#define CPACF_KMC_TDEA_192_DEC	0x83
-#define CPACF_KMC_AES_128_ENC	0x12
-#define CPACF_KMC_AES_128_DEC	0x92
-#define CPACF_KMC_AES_192_ENC	0x13
-#define CPACF_KMC_AES_192_DEC	0x93
-#define CPACF_KMC_AES_256_ENC	0x14
-#define CPACF_KMC_AES_256_DEC	0x94
+#define CPACF_KMC_DEA		0x01
+#define CPACF_KMC_TDEA_128	0x02
+#define CPACF_KMC_TDEA_192	0x03
+#define CPACF_KMC_AES_128	0x12
+#define CPACF_KMC_AES_192	0x13
+#define CPACF_KMC_AES_256	0x14
 #define CPACF_KMC_PRNG		0x43
 
 /*
  * Function codes for the KMCTR (CIPHER MESSAGE WITH COUNTER)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
-#define CPACF_KMCTR_QUERY	 0x00
-#define CPACF_KMCTR_DEA_ENC	 0x01
-#define CPACF_KMCTR_DEA_DEC	 0x81
-#define CPACF_KMCTR_TDEA_128_ENC 0x02
-#define CPACF_KMCTR_TDEA_128_DEC 0x82
-#define CPACF_KMCTR_TDEA_192_ENC 0x03
-#define CPACF_KMCTR_TDEA_192_DEC 0x83
-#define CPACF_KMCTR_AES_128_ENC	 0x12
-#define CPACF_KMCTR_AES_128_DEC	 0x92
-#define CPACF_KMCTR_AES_192_ENC	 0x13
-#define CPACF_KMCTR_AES_192_DEC	 0x93
-#define CPACF_KMCTR_AES_256_ENC	 0x14
-#define CPACF_KMCTR_AES_256_DEC	 0x94
+#define CPACF_KMCTR_QUERY	0x00
+#define CPACF_KMCTR_DEA		0x01
+#define CPACF_KMCTR_TDEA_128	0x02
+#define CPACF_KMCTR_TDEA_192	0x03
+#define CPACF_KMCTR_AES_128	0x12
+#define CPACF_KMCTR_AES_192	0x13
+#define CPACF_KMCTR_AES_256	0x14
 
 /*
  * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KIMD_QUERY	0x00
 #define CPACF_KIMD_SHA_1	0x01
@@ -98,7 +82,7 @@
 
 /*
  * Function codes for the KLMD (COMPUTE LAST MESSAGE DIGEST)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KLMD_QUERY	0x00
 #define CPACF_KLMD_SHA_1	0x01
@@ -107,7 +91,7 @@
 
 /*
  * function codes for the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KMAC_QUERY	0x00
 #define CPACF_KMAC_DEA		0x01
@@ -116,12 +100,14 @@
 
 /*
  * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_PPNO_QUERY		0x00
 #define CPACF_PPNO_SHA512_DRNG_GEN	0x03
 #define CPACF_PPNO_SHA512_DRNG_SEED	0x83
 
+typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
+
 /**
  * cpacf_query() - check if a specific CPACF function is available
  * @opcode: the opcode of the crypto instruction
@@ -132,55 +118,66 @@
  *
  * Returns 1 if @func is available for @opcode, 0 otherwise
  */
-static inline void __cpacf_query(unsigned int opcode, unsigned char *status)
+static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
 {
-	typedef struct { unsigned char _[16]; } status_type;
 	register unsigned long r0 asm("0") = 0;	/* query function */
-	register unsigned long r1 asm("1") = (unsigned long) status;
+	register unsigned long r1 asm("1") = (unsigned long) mask;
 
 	asm volatile(
 		"	spm 0\n" /* pckmo doesn't change the cc */
 		/* Parameter registers are ignored, but may not be 0 */
 		"0:	.insn	rrf,%[opc] << 16,2,2,2,0\n"
 		"	brc	1,0b\n"	/* handle partial completion */
-		: "=m" (*(status_type *) status)
+		: "=m" (*mask)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
 		: "cc");
 }
 
-static inline int cpacf_query(unsigned int opcode, unsigned int func)
+static inline int __cpacf_check_opcode(unsigned int opcode)
 {
-	unsigned char status[16];
-
 	switch (opcode) {
 	case CPACF_KMAC:
 	case CPACF_KM:
 	case CPACF_KMC:
 	case CPACF_KIMD:
 	case CPACF_KLMD:
-		if (!test_facility(17))	/* check for MSA */
-			return 0;
-		break;
+		return test_facility(17);	/* check for MSA */
 	case CPACF_PCKMO:
-		if (!test_facility(76))	/* check for MSA3 */
-			return 0;
-		break;
+		return test_facility(76);	/* check for MSA3 */
 	case CPACF_KMF:
 	case CPACF_KMO:
 	case CPACF_PCC:
 	case CPACF_KMCTR:
-		if (!test_facility(77))	/* check for MSA4 */
-			return 0;
-		break;
+		return test_facility(77);	/* check for MSA4 */
 	case CPACF_PPNO:
-		if (!test_facility(57))	/* check for MSA5 */
-			return 0;
-		break;
+		return test_facility(57);	/* check for MSA5 */
 	default:
 		BUG();
 	}
-	__cpacf_query(opcode, status);
-	return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+{
+	if (__cpacf_check_opcode(opcode)) {
+		__cpacf_query(opcode, mask);
+		return 1;
+	}
+	memset(mask, 0, sizeof(*mask));
+	return 0;
+}
+
+static inline int cpacf_test_func(cpacf_mask_t *mask, unsigned int func)
+{
+	return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+static inline int cpacf_query_func(unsigned int opcode, unsigned int func)
+{
+	cpacf_mask_t mask;
+
+	if (cpacf_query(opcode, &mask))
+		return cpacf_test_func(&mask, func);
+	return 0;
 }
 
 /**
@@ -194,7 +191,7 @@
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_km(long func, void *param,
+static inline int cpacf_km(unsigned long func, void *param,
 			   u8 *dest, const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -224,7 +221,7 @@
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_kmc(long func, void *param,
+static inline int cpacf_kmc(unsigned long func, void *param,
 			    u8 *dest, const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -250,11 +247,9 @@
  * @param: address of parameter block; see POP for details on each func
  * @src: address of source memory area
  * @src_len: length of src operand in bytes
- *
- * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_kimd(long func, void *param,
-			     const u8 *src, long src_len)
+static inline void cpacf_kimd(unsigned long func, void *param,
+			      const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -267,8 +262,6 @@
 		: [src] "+a" (r2), [len] "+d" (r3)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD)
 		: "cc", "memory");
-
-	return src_len - r3;
 }
 
 /**
@@ -277,11 +270,9 @@
  * @param: address of parameter block; see POP for details on each func
  * @src: address of source memory area
  * @src_len: length of src operand in bytes
- *
- * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_klmd(long func, void *param,
-			     const u8 *src, long src_len)
+static inline void cpacf_klmd(unsigned long func, void *param,
+			      const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -294,8 +285,6 @@
 		: [src] "+a" (r2), [len] "+d" (r3)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD)
 		: "cc", "memory");
-
-	return src_len - r3;
 }
 
 /**
@@ -308,7 +297,7 @@
  *
  * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_kmac(long func, void *param,
+static inline int cpacf_kmac(unsigned long func, void *param,
 			     const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -338,7 +327,7 @@
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_kmctr(long func, void *param, u8 *dest,
+static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
 			      const u8 *src, long src_len, u8 *counter)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -368,13 +357,10 @@
  * @dest_len: size of destination memory area in bytes
  * @seed: address of seed data
  * @seed_len: size of seed data in bytes
- *
- * Returns 0 for the query func, number of random bytes stored in
- * dest buffer for generate function
  */
-static inline int cpacf_ppno(long func, void *param,
-			     u8 *dest, long dest_len,
-			     const u8 *seed, long seed_len)
+static inline void cpacf_ppno(unsigned long func, void *param,
+			      u8 *dest, long dest_len,
+			      const u8 *seed, long seed_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -390,8 +376,6 @@
 		: [fc] "d" (r0), [pba] "a" (r1),
 		  [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO)
 		: "cc", "memory");
-
-	return dest_len - r3;
 }
 
 /**
@@ -399,10 +383,8 @@
  *		 instruction
  * @func: the function code passed to PCC; see CPACF_KM_xxx defines
  * @param: address of parameter block; see POP for details on each func
- *
- * Returns 0.
  */
-static inline int cpacf_pcc(long func, void *param)
+static inline void cpacf_pcc(unsigned long func, void *param)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -413,8 +395,6 @@
 		:
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC)
 		: "cc", "memory");
-
-	return 0;
 }
 
 #endif	/* _ASM_S390_CPACF_H */
diff --git a/arch/s390/include/asm/facilities_src.h b/arch/s390/include/asm/facilities_src.h
index 4917728..3b758f6 100644
--- a/arch/s390/include/asm/facilities_src.h
+++ b/arch/s390/include/asm/facilities_src.h
@@ -55,4 +55,28 @@
 			-1 /* END */
 		}
 	},
+	{
+		.name = "FACILITIES_KVM",
+		.bits = (int[]){
+			0,  /* N3 instructions */
+			1,  /* z/Arch mode installed */
+			2,  /* z/Arch mode active */
+			3,  /* DAT-enhancement */
+			4,  /* idte segment table */
+			5,  /* idte region table */
+			6,  /* ASN-and-LX reuse */
+			7,  /* stfle */
+			8,  /* enhanced-DAT 1 */
+			9,  /* sense-running-status */
+			10, /* conditional sske */
+			13, /* ipte-range */
+			14, /* nonquiescing key-setting */
+			73, /* transactional execution */
+			75, /* access-exception-fetch/store indication */
+			76, /* msa extension 3 */
+			77, /* msa extension 4 */
+			78, /* enhanced-DAT 2 */
+			-1  /* END */
+		}
+	},
 };
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 6aba6fc..02124d6 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -64,18 +64,18 @@
 	return rc;
 }
 
-#define KERNEL_VXR_V0V7		1
-#define KERNEL_VXR_V8V15	2
-#define KERNEL_VXR_V16V23	4
-#define KERNEL_VXR_V24V31	8
-#define KERNEL_FPR		16
-#define KERNEL_FPC		256
+#define KERNEL_FPC		1
+#define KERNEL_VXR_V0V7		2
+#define KERNEL_VXR_V8V15	4
+#define KERNEL_VXR_V16V23	8
+#define KERNEL_VXR_V24V31	16
 
 #define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
 #define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
 #define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
 
-#define KERNEL_FPU_MASK		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR)
+#define KERNEL_VXR		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
+#define KERNEL_FPR		(KERNEL_FPC|KERNEL_VXR_V0V7)
 
 struct kernel_fpu;
 
@@ -87,18 +87,28 @@
  * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
  */
 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
-void __kernel_fpu_end(struct kernel_fpu *state);
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
 
 
 static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 {
 	preempt_disable();
-	__kernel_fpu_begin(state, flags);
+	state->mask = S390_lowcore.fpu_flags;
+	if (!test_cpu_flag(CIF_FPU))
+		/* Save user space FPU state and register contents */
+		save_fpu_regs();
+	else if (state->mask & flags)
+		/* Save FPU/vector register in-use by the kernel */
+		__kernel_fpu_begin(state, flags);
+	S390_lowcore.fpu_flags |= flags;
 }
 
-static inline void kernel_fpu_end(struct kernel_fpu *state)
+static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
 {
-	__kernel_fpu_end(state);
+	S390_lowcore.fpu_flags = state->mask;
+	if (state->mask & flags)
+		/* Restore FPU/vector register in-use by the kernel */
+		__kernel_fpu_end(state, flags);
 	preempt_enable();
 }
 
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index d79ba7c..7b93b78 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -129,7 +129,8 @@
 	__u8	pad_0x0390[0x0398-0x0390];	/* 0x0390 */
 	__u64	gmap;				/* 0x0398 */
 	__u32	spinlock_lockval;		/* 0x03a0 */
-	__u8	pad_0x03a0[0x0400-0x03a4];	/* 0x03a4 */
+	__u32	fpu_flags;			/* 0x03a4 */
+	__u8	pad_0x03a8[0x0400-0x03a8];	/* 0x03a8 */
 
 	/* Per cpu primary space access list */
 	__u32	paste[16];			/* 0x0400 */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 6d39329..bea785d 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -12,6 +12,7 @@
 	struct list_head pgtable_list;
 	spinlock_t gmap_lock;
 	struct list_head gmap_list;
+	unsigned long gmap_asce;
 	unsigned long asce;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c6a088c..515fea5a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -21,6 +21,7 @@
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.flush_count, 0);
+	mm->context.gmap_asce = 0;
 	mm->context.flush_mm = 0;
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste;
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 0da91c4..6611f79 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -11,6 +11,7 @@
 #include <asm-generic/pci.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_debug.h>
+#include <asm/sclp.h>
 
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
@@ -117,6 +118,7 @@
 
 	spinlock_t	iommu_bitmap_lock;
 	unsigned long	*iommu_bitmap;
+	unsigned long	*lazy_bitmap;
 	unsigned long	iommu_size;
 	unsigned long	iommu_pages;
 	unsigned int	next_bit;
@@ -216,6 +218,9 @@
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
+/* Error reporting */
+int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+
 #ifdef CONFIG_NUMA
 
 /* Returns the node based on PCI bus */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 72c7f60..0362cd5 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -874,35 +874,31 @@
 }
 #endif
 
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+#define IPTE_GLOBAL	0
+#define	IPTE_LOCAL	1
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-	/* Invalidation + global TLB flush for the pte */
+	/* Invalidation + TLB flush for the pte */
 	asm volatile(
-		"	ipte	%2,%3"
-		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+		"       .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+		  [m4] "i" (local));
 }
 
-static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
+static inline void __ptep_ipte_range(unsigned long address, int nr,
+				     pte_t *ptep, int local)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-	/* Invalidation + local TLB flush for the pte */
-	asm volatile(
-		"	.insn rrf,0xb2210000,%2,%3,0,1"
-		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
-}
-
-static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
-{
-	unsigned long pto = (unsigned long) ptep;
-
-	/* Invalidate a range of ptes + global TLB flush of the ptes */
+	/* Invalidate a range of ptes + TLB flush of the ptes */
 	do {
 		asm volatile(
-			"	.insn rrf,0xb2210000,%2,%0,%1,0"
-			: "+a" (address), "+a" (nr) : "a" (pto) : "memory");
+			"       .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+			: [r2] "+a" (address), [r3] "+a" (nr)
+			: [r1] "a" (pto), [m4] "i" (local) : "memory");
 	} while (nr != 255);
 }
 
@@ -1239,53 +1235,33 @@
 	    pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
 }
 
-static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
+#define IDTE_GLOBAL	0
+#define IDTE_LOCAL	1
+
+static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
 {
 	unsigned long sto;
 
 	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
 	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
-		: "=m" (*pmdp)
-		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*pmdp)
+		: [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
+		  [m4] "i" (local)
 		: "cc" );
 }
 
-static inline void __pudp_idte(unsigned long address, pud_t *pudp)
+static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
 {
 	unsigned long r3o;
 
 	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
 	r3o |= _ASCE_TYPE_REGION3;
 	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
-		: "=m" (*pudp)
-		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
-		: "cc");
-}
-
-static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
-{
-	unsigned long sto;
-
-	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
-		: "=m" (*pmdp)
-		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
-		: "cc" );
-}
-
-static inline void __pudp_idte_local(unsigned long address, pud_t *pudp)
-{
-	unsigned long r3o;
-
-	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
-	r3o |= _ASCE_TYPE_REGION3;
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
-		: "=m" (*pudp)
-		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*pudp)
+		: [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
+		  [m4] "i" (local)
 		: "cc");
 }
 
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 1a691ef..3984610 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -26,17 +26,6 @@
 		: : "a" (2048), "a" (asce) : "cc");
 }
 
-/*
- * Flush TLB entries for a specific ASCE on the local CPU
- */
-static inline void __tlb_flush_idte_local(unsigned long asce)
-{
-	/* Local TLB flush for the mm */
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,0,%0,%1,1"
-		: : "a" (2048), "a" (asce) : "cc");
-}
-
 #ifdef CONFIG_SMP
 void smp_ptlb_all(void);
 
@@ -65,35 +54,33 @@
 		/* Global TLB flush */
 		__tlb_flush_global();
 		/* Reset TLB flush mask */
-		if (MACHINE_HAS_TLB_LC)
-			cpumask_copy(mm_cpumask(mm),
-				     &mm->context.cpu_attach_mask);
+		cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
 	}
 	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
 
-/*
- * Flush TLB entries for a specific ASCE on all CPUs. Should never be used
- * when more than one asce (e.g. gmap) ran on this mm.
- */
-static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+static inline void __tlb_flush_mm(struct mm_struct *mm)
 {
+	unsigned long gmap_asce;
+
+	/*
+	 * If the machine has IDTE we prefer to do a per mm flush
+	 * on all cpus instead of doing a local flush if the mm
+	 * only ran on the local cpu.
+	 */
 	preempt_disable();
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
-		__tlb_flush_idte_local(asce);
+	gmap_asce = READ_ONCE(mm->context.gmap_asce);
+	if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+		if (gmap_asce)
+			__tlb_flush_idte(gmap_asce);
+		__tlb_flush_idte(mm->context.asce);
 	} else {
-		if (MACHINE_HAS_IDTE)
-			__tlb_flush_idte(asce);
-		else
-			__tlb_flush_global();
-		/* Reset TLB flush mask */
-		if (MACHINE_HAS_TLB_LC)
-			cpumask_copy(mm_cpumask(mm),
-				     &mm->context.cpu_attach_mask);
+		__tlb_flush_full(mm);
 	}
+	/* Reset TLB flush mask */
+	cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
 	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
@@ -112,36 +99,17 @@
 /*
  * Flush TLB entries for a specific ASCE on all CPUs.
  */
-static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+static inline void __tlb_flush_mm(struct mm_struct *mm)
 {
-	if (MACHINE_HAS_TLB_LC)
-		__tlb_flush_idte_local(asce);
-	else
-		__tlb_flush_local();
+	__tlb_flush_local();
 }
 
 static inline void __tlb_flush_kernel(void)
 {
-	if (MACHINE_HAS_TLB_LC)
-		__tlb_flush_idte_local(init_mm.context.asce);
-	else
-		__tlb_flush_local();
+	__tlb_flush_local();
 }
 #endif
 
-static inline void __tlb_flush_mm(struct mm_struct * mm)
-{
-	/*
-	 * If the machine has IDTE we prefer to do a per mm flush
-	 * on all cpus instead of doing a local flush if the mm
-	 * only ran on the local cpu.
-	 */
-	if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
-		__tlb_flush_asce(mm, mm->context.asce);
-	else
-		__tlb_flush_full(mm);
-}
-
 static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
 {
 	if (mm->context.flush_mm) {
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 9b49cf1..52d7c870 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -266,28 +266,28 @@
 	__chk_user_ptr(ptr);					\
 	switch (sizeof(*(ptr))) {				\
 	case 1: {						\
-		unsigned char __x;				\
+		unsigned char __x = 0;				\
 		__gu_err = __get_user_fn(&__x, ptr,		\
 					 sizeof(*(ptr)));	\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 2: {						\
-		unsigned short __x;				\
+		unsigned short __x = 0;				\
 		__gu_err = __get_user_fn(&__x, ptr,		\
 					 sizeof(*(ptr)));	\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 4: {						\
-		unsigned int __x;				\
+		unsigned int __x = 0;				\
 		__gu_err = __get_user_fn(&__x, ptr,		\
 					 sizeof(*(ptr)));	\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
 		break;						\
 	};							\
 	case 8: {						\
-		unsigned long long __x;				\
+		unsigned long long __x = 0;			\
 		__gu_err = __get_user_fn(&__x, ptr,		\
 					 sizeof(*(ptr)));	\
 		(x) = *(__force __typeof__(*(ptr)) *) &__x;	\
@@ -311,6 +311,14 @@
 #define __put_user_unaligned __put_user
 #define __get_user_unaligned __get_user
 
+extern void __compiletime_error("usercopy buffer size is too small")
+__bad_copy_user(void);
+
+static inline void copy_user_overflow(int size, unsigned long count)
+{
+	WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
+
 /**
  * copy_to_user: - Copy a block of data into user space.
  * @to:   Destination address, in user space.
@@ -332,12 +340,6 @@
 	return __copy_to_user(to, from, n);
 }
 
-void copy_from_user_overflow(void)
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-__compiletime_warning("copy_from_user() buffer size is not provably correct")
-#endif
-;
-
 /**
  * copy_from_user: - Copy a block of data from user space.
  * @to:   Destination address, in kernel space.
@@ -362,7 +364,10 @@
 
 	might_fault();
 	if (unlikely(sz != -1 && sz < n)) {
-		copy_from_user_overflow();
+		if (!__builtin_constant_p(n))
+			copy_user_overflow(sz, n);
+		else
+			__bad_copy_user();
 		return n;
 	}
 	return __copy_from_user(to, from, n);
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
index 4a31356..49c24a2 100644
--- a/arch/s390/include/asm/vx-insn.h
+++ b/arch/s390/include/asm/vx-insn.h
@@ -16,15 +16,13 @@
 
 /* Macros to generate vector instruction byte code */
 
-#define REG_NUM_INVALID	       255
-
 /* GR_NUM - Retrieve general-purpose register number
  *
  * @opd:	Operand to store register number
  * @r64:	String designation register in the format "%rN"
  */
 .macro	GR_NUM	opd gr
-	\opd = REG_NUM_INVALID
+	\opd = 255
 	.ifc \gr,%r0
 		\opd = 0
 	.endif
@@ -73,14 +71,11 @@
 	.ifc \gr,%r15
 		\opd = 15
 	.endif
-	.if \opd == REG_NUM_INVALID
-		.error "Invalid general-purpose register designation: \gr"
+	.if \opd == 255
+		\opd = \gr
 	.endif
 .endm
 
-/* VX_R() - Macro to encode the VX_NUM into the instruction */
-#define VX_R(v)		(v & 0x0F)
-
 /* VX_NUM - Retrieve vector register number
  *
  * @opd:	Operand to store register number
@@ -88,11 +83,10 @@
  *
  * The vector register number is used for as input number to the
  * instruction and, as well as, to compute the RXB field of the
- * instruction.  To encode the particular vector register number,
- * use the VX_R(v) macro to extract the instruction opcode.
+ * instruction.
  */
 .macro	VX_NUM	opd vxr
-	\opd = REG_NUM_INVALID
+	\opd = 255
 	.ifc \vxr,%v0
 		\opd = 0
 	.endif
@@ -189,8 +183,8 @@
 	.ifc \vxr,%v31
 		\opd = 31
 	.endif
-	.if \opd == REG_NUM_INVALID
-		.error "Invalid vector register designation: \vxr"
+	.if \opd == 255
+		\opd = \vxr
 	.endif
 .endm
 
@@ -251,7 +245,7 @@
 /* VECTOR GENERATE BYTE MASK */
 .macro	VGBM	vr imm2
 	VX_NUM	v1, \vr
-	.word	(0xE700 | (VX_R(v1) << 4))
+	.word	(0xE700 | ((v1&15) << 4))
 	.word	\imm2
 	MRXBOPC	0, 0x44, v1
 .endm
@@ -267,7 +261,7 @@
 	VX_NUM	v1, \v
 	GR_NUM	b2, "%r0"
 	GR_NUM	r3, \gr
-	.word	0xE700 | (VX_R(v1) << 4) | r3
+	.word	0xE700 | ((v1&15) << 4) | r3
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m, 0x22, v1
 .endm
@@ -284,12 +278,21 @@
 	VLVG	\v, \gr, \index, 3
 .endm
 
+/* VECTOR LOAD REGISTER */
+.macro	VLR	v1, v2
+	VX_NUM	v1, \v1
+	VX_NUM	v2, \v2
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	0
+	MRXBOPC	0, 0x56, v1, v2
+.endm
+
 /* VECTOR LOAD */
 .macro	VL	v, disp, index="%r0", base
 	VX_NUM	v1, \v
 	GR_NUM	x2, \index
 	GR_NUM	b2, \base
-	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	0xE700 | ((v1&15) << 4) | x2
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC 0, 0x06, v1
 .endm
@@ -299,7 +302,7 @@
 	VX_NUM	v1, \vr1
 	GR_NUM	x2, \index
 	GR_NUM	b2, \base
-	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	0xE700 | ((v1&15) << 4) | x2
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m3, \opc, v1
 .endm
@@ -319,7 +322,7 @@
 /* VECTOR LOAD ELEMENT IMMEDIATE */
 .macro	VLEIx	vr1, imm2, m3, opc
 	VX_NUM	v1, \vr1
-	.word	0xE700 | (VX_R(v1) << 4)
+	.word	0xE700 | ((v1&15) << 4)
 	.word	\imm2
 	MRXBOPC	\m3, \opc, v1
 .endm
@@ -341,7 +344,7 @@
 	GR_NUM	r1, \gr
 	GR_NUM	b2, \base
 	VX_NUM	v3, \vr
-	.word	0xE700 | (r1 << 4) | VX_R(v3)
+	.word	0xE700 | (r1 << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m, 0x21, v3
 .endm
@@ -363,7 +366,7 @@
 	VX_NUM	v1, \vfrom
 	VX_NUM	v3, \vto
 	GR_NUM	b2, \base	    /* Base register */
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	0, 0x36, v1, v3
 .endm
@@ -373,7 +376,7 @@
 	VX_NUM	v1, \vfrom
 	VX_NUM	v3, \vto
 	GR_NUM	b2, \base	    /* Base register */
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	0, 0x3E, v1, v3
 .endm
@@ -384,16 +387,16 @@
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
 	VX_NUM	v4, \vr4
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
-	MRXBOPC	VX_R(v4), 0x8C, v1, v2, v3, v4
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	(v4&15), 0x8C, v1, v2, v3, v4
 .endm
 
 /* VECTOR UNPACK LOGICAL LOW */
 .macro	VUPLL	vr1, vr2, m3
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
 	.word	0x0000
 	MRXBOPC	\m3, 0xD4, v1, v2
 .endm
@@ -410,13 +413,23 @@
 
 /* Vector integer instructions */
 
+/* VECTOR AND */
+.macro	VN	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	0, 0x68, v1, v2, v3
+.endm
+
 /* VECTOR EXCLUSIVE OR */
 .macro	VX	vr1, vr2, vr3
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	0, 0x6D, v1, v2, v3
 .endm
 
@@ -425,8 +438,8 @@
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	\m4, 0xB4, v1, v2, v3
 .endm
 .macro	VGFMB	vr1, vr2, vr3
@@ -448,9 +461,9 @@
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
 	VX_NUM	v4, \vr4
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12) | (\m5 << 8)
-	MRXBOPC	VX_R(v4), 0xBC, v1, v2, v3, v4
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12) | (\m5 << 8)
+	MRXBOPC	(v4&15), 0xBC, v1, v2, v3, v4
 .endm
 .macro	VGFMAB	vr1, vr2, vr3, vr4
 	VGFMA	\vr1, \vr2, \vr3, \vr4, 0
@@ -470,11 +483,78 @@
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	0, 0x7D, v1, v2, v3
 .endm
 
+/* VECTOR REPLICATE IMMEDIATE */
+.macro	VREPI	vr1, imm2, m3
+	VX_NUM	v1, \vr1
+	.word	0xE700 | ((v1&15) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, 0x45, v1
+.endm
+.macro	VREPIB	vr1, imm2
+	VREPI	\vr1, \imm2, 0
+.endm
+.macro	VREPIH	vr1, imm2
+	VREPI	\vr1, \imm2, 1
+.endm
+.macro	VREPIF	vr1, imm2
+	VREPI	\vr1, \imm2, 2
+.endm
+.macro	VREPIG	vr1, imm2
+	VREP	\vr1, \imm2, 3
+.endm
+
+/* VECTOR ADD */
+.macro	VA	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0xF3, v1, v2, v3
+.endm
+.macro	VAB	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VAH	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VAF	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VAG	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 3
+.endm
+.macro	VAQ	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 4
+.endm
+
+/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */
+.macro	VESRAV	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC \m4, 0x7A, v1, v2, v3
+.endm
+
+.macro	VESRAVB	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VESRAVH	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VESRAVF	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VESRAVG	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 3
+.endm
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 08fe6da..cc44b09 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -6,6 +6,7 @@
 header-y += byteorder.h
 header-y += chpid.h
 header-y += chsc.h
+header-y += clp.h
 header-y += cmb.h
 header-y += dasd.h
 header-y += debug.h
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 3234817..72ccc41 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -48,6 +48,9 @@
 endif
 GCOV_PROFILE_sclp.o := n
 GCOV_PROFILE_als.o := n
+UBSAN_SANITIZE_als.o := n
+UBSAN_SANITIZE_early.o := n
+UBSAN_SANITIZE_sclp.o := n
 
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 29df8484..f9293bf 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -71,9 +71,7 @@
  */
 struct save_area * __init save_area_boot_cpu(void)
 {
-	if (list_empty(&dump_save_areas))
-		return NULL;
-	return list_first_entry(&dump_save_areas, struct save_area, list);
+	return list_first_entry_or_null(&dump_save_areas, struct save_area, list);
 }
 
 /*
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 717b03a..2374c5b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -13,7 +13,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/lockdep.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 81d1d18..1235b94 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -10,240 +10,167 @@
 #include <asm/fpu/types.h>
 #include <asm/fpu/api.h>
 
-/*
- * Per-CPU variable to maintain FPU register ranges that are in use
- * by the kernel.
- */
-static DEFINE_PER_CPU(u32, kernel_fpu_state);
-
-#define KERNEL_FPU_STATE_MASK	(KERNEL_FPU_MASK|KERNEL_FPC)
-
+asm(".include \"asm/vx-insn.h\"\n");
 
 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 {
-	if (!__this_cpu_read(kernel_fpu_state)) {
-		/*
-		 * Save user space FPU state and register contents.  Multiple
-		 * calls because of interruptions do not matter and return
-		 * immediately.  This also sets CIF_FPU to lazy restore FP/VX
-		 * register contents when returning to user space.
-		 */
-		save_fpu_regs();
-	}
-
-	/* Update flags to use the vector facility for KERNEL_FPR */
-	if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
-		flags |= KERNEL_VXR_LOW | KERNEL_FPC;
-		flags &= ~KERNEL_FPR;
-	}
-
-	/* Save and update current kernel VX state */
-	state->mask = __this_cpu_read(kernel_fpu_state);
-	__this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
-
 	/*
-	 * If this is the first call to __kernel_fpu_begin(), no additional
-	 * work is required.
+	 * Limit the save to the FPU/vector registers already
+	 * in use by the previous context
 	 */
-	if (!(state->mask & KERNEL_FPU_STATE_MASK))
-		return;
+	flags &= state->mask;
 
-	/*
-	 * If KERNEL_FPR is still set, the vector facility is not available
-	 * and, thus, save floating-point control and registers only.
-	 */
-	if (state->mask & KERNEL_FPR) {
-		asm volatile("stfpc %0" : "=Q" (state->fpc));
-		asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
-		asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
-		asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
-		asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
-		asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
-		asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
-		asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
-		asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
-		asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
-		asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
-		asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
-		asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
-		asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
-		asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
-		asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
-		asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
-		return;
-	}
-
-	/*
-	 * If this is a nested call to __kernel_fpu_begin(), check the saved
-	 * state mask to save and later restore the vector registers that
-	 * are already in use.	Let's start with checking floating-point
-	 * controls.
-	 */
-	if (state->mask & KERNEL_FPC)
+	if (flags & KERNEL_FPC)
+		/* Save floating point control */
 		asm volatile("stfpc %0" : "=m" (state->fpc));
 
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Save floating-point registers */
+			asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+			asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+			asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+			asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+			asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+			asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+			asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+			asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+			asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+			asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+			asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+			asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+			asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+			asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+			asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+			asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		}
+		return;
+	}
+
 	/* Test and save vector registers */
 	asm volatile (
 		/*
 		 * Test if any vector register must be saved and, if so,
 		 * test if all register can be saved.
 		 */
-		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
-		"	jz	20f\n"		/* no work -> done */
 		"	la	1,%[vxrs]\n"	/* load save area */
-		"	jo	18f\n"		/* -> save V0..V31 */
-
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> save V0..V31 */
 		/*
-		 * Test if V8..V23 can be saved at once... this speeds up
-		 * for KERNEL_fpu_MID only. Otherwise continue to split the
-		 * range of vector registers into two halves and test them
-		 * separately.
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vstm V8..V23 is the best instruction
 		 */
-		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
-		"	jo	17f\n"		/* -> save V8..V23 */
-
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> save V8..V23 */
+		"	VSTM	8,23,128,1\n"	/* vstm %v8,%v23,128(%r1) */
+		"	j	7f\n"
 		/* Test and save the first half of 16 vector registers */
-		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
-		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
 		"	jo	2f\n"		/* 11 -> save V0..V15 */
-		"	brc	4,3f\n"		/* 01 -> save V0..V7  */
-		"	brc	2,4f\n"		/* 10 -> save V8..V15 */
-
+		"	brc	2,1f\n"		/* 10 -> save V8..V15 */
+		"	VSTM	0,7,0,1\n"	/* vstm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VSTM	8,15,128,1\n"	/* vstm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
 		/* Test and save the second half of 16 vector registers */
-		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
-		"	jo	19f\n"		/* 11 -> save V16..V31 */
-		"	brc	4,11f\n"	/* 01 -> save V16..V23	*/
-		"	brc	2,12f\n"	/* 10 -> save V24..V31 */
-		"	j	20f\n"		/* 00 -> done */
-
-		/*
-		 * Below are the vstm combinations to save multiple vector
-		 * registers at once.
-		 */
-		"2:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"3:	.word	0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"4:	.word	0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"\n"
-		"11:	.word	0xe707,0x1100,0x0c3e\n"	/* vstm 16,23,256(1) */
-		"	j	20f\n"			/* -> done */
-		"12:	.word	0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
-		"	j	20f\n"			/* -> done */
-		"\n"
-		"17:	.word	0xe787,0x1080,0x043e\n"	/* vstm 8,23,128(1) */
-		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
-		"	j	1b\n"			/* -> VXR_LOW */
-		"\n"
-		"18:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"19:	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		"20:"
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> save V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> save V24..V31 */
+		"	VSTM	16,23,256,1\n"	/* vstm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VSTM	24,31,384,1\n"	/* vstm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
+		"6:	VSTM	16,31,256,1\n"	/* vstm %v16,%v31,256(%r1) */
+		"7:"
 		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
-		: [m] "d" (state->mask)
+		: [m] "d" (flags)
 		: "1", "cc");
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
 
-void __kernel_fpu_end(struct kernel_fpu *state)
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
 {
-	/* Just update the per-CPU state if there is nothing to restore */
-	if (!(state->mask & KERNEL_FPU_STATE_MASK))
-		goto update_fpu_state;
-
 	/*
-	 * If KERNEL_FPR is specified, the vector facility is not available
-	 * and, thus, restore floating-point control and registers only.
+	 * Limit the restore to the FPU/vector registers of the
+	 * previous context that have been overwritte by the
+	 * current context
 	 */
-	if (state->mask & KERNEL_FPR) {
-		asm volatile("lfpc %0" : : "Q" (state->fpc));
-		asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
-		asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
-		asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
-		asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
-		asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
-		asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
-		asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
-		asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
-		asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
-		asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
-		asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
-		asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
-		asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
-		asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
-		asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
-		asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
-		goto update_fpu_state;
-	}
+	flags &= state->mask;
 
-	/* Test and restore floating-point controls */
-	if (state->mask & KERNEL_FPC)
+	if (flags & KERNEL_FPC)
+		/* Restore floating-point controls */
 		asm volatile("lfpc %0" : : "Q" (state->fpc));
 
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Restore floating-point registers */
+			asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+			asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+			asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+			asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+			asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+			asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+			asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+			asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+			asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+			asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+			asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+			asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+			asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+			asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+			asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+			asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		}
+		return;
+	}
+
 	/* Test and restore (load) vector registers */
 	asm volatile (
 		/*
-		 * Test if any vector registers must be loaded and, if so,
+		 * Test if any vector register must be loaded and, if so,
 		 * test if all registers can be loaded at once.
 		 */
-		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
-		"	jz	20f\n"		/* no work -> done */
-		"	la	1,%[vxrs]\n"	/* load load area */
-		"	jo	18f\n"		/* -> load V0..V31 */
-
+		"	la	1,%[vxrs]\n"	/* load restore area */
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> restore V0..V31 */
 		/*
-		 * Test if V8..V23 can be restored at once... this speeds up
-		 * for KERNEL_VXR_MID only. Otherwise continue to split the
-		 * range of vector registers into two halves and test them
-		 * separately.
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vlm V8..V23 is the best instruction
 		 */
-		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
-		"	jo	17f\n"		/* -> load V8..V23 */
-
-		/* Test and load the first half of 16 vector registers */
-		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
-		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
-		"	jo	2f\n"		/* 11 -> load V0..V15 */
-		"	brc	4,3f\n"		/* 01 -> load V0..V7  */
-		"	brc	2,4f\n"		/* 10 -> load V8..V15 */
-
-		/* Test and load the second half of 16 vector registers */
-		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
-		"	jo	19f\n"		/* 11 -> load V16..V31 */
-		"	brc	4,11f\n"	/* 01 -> load V16..V23	*/
-		"	brc	2,12f\n"	/* 10 -> load V24..V31 */
-		"	j	20f\n"		/* 00 -> done */
-
-		/*
-		 * Below are the vstm combinations to load multiple vector
-		 * registers at once.
-		 */
-		"2:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"3:	.word	0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"4:	.word	0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"\n"
-		"11:	.word	0xe707,0x1100,0x0c36\n"	/* vlm 16,23,256(1) */
-		"	j	20f\n"			/* -> done */
-		"12:	.word	0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */
-		"	j	20f\n"			/* -> done */
-		"\n"
-		"17:	.word	0xe787,0x1080,0x0436\n"	/* vlm 8,23,128(1) */
-		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
-		"	j	1b\n"			/* -> VXR_LOW */
-		"\n"
-		"18:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"19:	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-		"20:"
-		:
-		: [vxrs] "Q" (*(struct vx_array *) &state->vxrs),
-		  [m] "d" (state->mask)
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> restore V8..V23 */
+		"	VLM	8,23,128,1\n"	/* vlm %v8,%v23,128(%r1) */
+		"	j	7f\n"
+		/* Test and restore the first half of 16 vector registers */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> restore V0..V15 */
+		"	brc	2,1f\n"		/* 10 -> restore V8..V15 */
+		"	VLM	0,7,0,1\n"	/* vlm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VLM	8,15,128,1\n"	/* vlm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		/* Test and restore the second half of 16 vector registers */
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> restore V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> restore V24..V31 */
+		"	VLM	16,23,256,1\n"	/* vlm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VLM	24,31,384,1\n"	/* vlm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		"6:	VLM	16,31,256,1\n"	/* vlm %v16,%v31,256(%r1) */
+		"7:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (flags)
 		: "1", "cc");
-
-update_fpu_state:
-	/* Update current kernel VX state */
-	__this_cpu_write(kernel_fpu_state, state->mask);
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 0f7bfeb..60a8a4e 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -209,7 +209,8 @@
 	/* Only trace if the calling function expects to. */
 	if (!ftrace_graph_entry(&trace))
 		goto out;
-	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
+	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
+				     NULL) == -EBUSY)
 		goto out;
 	parent = (unsigned long) return_to_handler;
 out:
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 56e4d82..4431905 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -309,7 +309,9 @@
 	l	%r15,.Lstack-.LPG0(%r13)
 	ahi	%r15,-STACK_FRAME_OVERHEAD
 	brasl	%r14,verify_facilities
-	/* Continue with startup code in head64.S */
+# For uncompressed images, continue in
+# arch/s390/kernel/head64.S. For compressed images, continue in
+# arch/s390/boot/compressed/head.S.
 	jg	startup_continue
 
 .Lstack:
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index dd6306c..fdb4042 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -26,12 +26,14 @@
 #include <linux/stop_machine.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
+#include <linux/extable.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 #include <linux/ftrace.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
+#include <asm/uaccess.h>
 #include <asm/dis.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe);
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 29376f0..9a32f74 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -98,7 +98,7 @@
  * returns 0 if all registers could be validated
  * returns 1 otherwise
  */
-static int notrace s390_validate_registers(union mci mci)
+static int notrace s390_validate_registers(union mci mci, int umode)
 {
 	int kill_task;
 	u64 zero;
@@ -110,26 +110,41 @@
 	if (!mci.gr) {
 		/*
 		 * General purpose registers couldn't be restored and have
-		 * unknown contents. Process needs to be terminated.
+		 * unknown contents. Stop system or terminate process.
 		 */
+		if (!umode)
+			s390_handle_damage();
 		kill_task = 1;
 	}
 	if (!mci.fp) {
 		/*
-		 * Floating point registers can't be restored and
-		 * therefore the process needs to be terminated.
+		 * Floating point registers can't be restored. If the
+		 * kernel currently uses floating point registers the
+		 * system is stopped. If the process has its floating
+		 * pointer registers loaded it is terminated.
+		 * Otherwise just revalidate the registers.
 		 */
-		kill_task = 1;
+		if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
+			s390_handle_damage();
+		if (!test_cpu_flag(CIF_FPU))
+			kill_task = 1;
 	}
 	fpt_save_area = &S390_lowcore.floating_pt_save_area;
 	fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
 	if (!mci.fc) {
 		/*
 		 * Floating point control register can't be restored.
-		 * Task will be terminated.
+		 * If the kernel currently uses the floating pointer
+		 * registers and needs the FPC register the system is
+		 * stopped. If the process has its floating pointer
+		 * registers loaded it is terminated. Otherwiese the
+		 * FPC is just revalidated.
 		 */
+		if (S390_lowcore.fpu_flags & KERNEL_FPC)
+			s390_handle_damage();
 		asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
-		kill_task = 1;
+		if (!test_cpu_flag(CIF_FPU))
+			kill_task = 1;
 	} else
 		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 
@@ -159,10 +174,16 @@
 
 		if (!mci.vr) {
 			/*
-			 * Vector registers can't be restored and therefore
-			 * the process needs to be terminated.
+			 * Vector registers can't be restored. If the kernel
+			 * currently uses vector registers the system is
+			 * stopped. If the process has its vector registers
+			 * loaded it is terminated. Otherwise just revalidate
+			 * the registers.
 			 */
-			kill_task = 1;
+			if (S390_lowcore.fpu_flags & KERNEL_VXR)
+				s390_handle_damage();
+			if (!test_cpu_flag(CIF_FPU))
+				kill_task = 1;
 		}
 		cr0.val = S390_lowcore.cregs_save_area[0];
 		cr0.afp = cr0.vx = 1;
@@ -250,13 +271,11 @@
 	struct mcck_struct *mcck;
 	unsigned long long tmp;
 	union mci mci;
-	int umode;
 
 	nmi_enter();
 	inc_irq_stat(NMI_NMI);
 	mci.val = S390_lowcore.mcck_interruption_code;
 	mcck = this_cpu_ptr(&cpu_mcck);
-	umode = user_mode(regs);
 
 	if (mci.sd) {
 		/* System damage -> stopping machine */
@@ -297,22 +316,14 @@
 			s390_handle_damage();
 		}
 	}
-	if (s390_validate_registers(mci)) {
-		if (umode) {
-			/*
-			 * Couldn't restore all register contents while in
-			 * user mode -> mark task for termination.
-			 */
-			mcck->kill_task = 1;
-			mcck->mcck_code = mci.val;
-			set_cpu_flag(CIF_MCCK_PENDING);
-		} else {
-			/*
-			 * Couldn't restore all register contents while in
-			 * kernel mode -> stopping machine.
-			 */
-			s390_handle_damage();
-		}
+	if (s390_validate_registers(mci, user_mode(regs))) {
+		/*
+		 * Couldn't restore all register contents for the
+		 * user space process -> mark task for termination.
+		 */
+		mcck->kill_task = 1;
+		mcck->mcck_code = mci.val;
+		set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	if (mci.cd) {
 		/* Timing facility damage */
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index ba5f456..7f7ba5f2 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -204,11 +204,9 @@
 #endif
 		}
 	} else if (MACHINE_IS_KVM) {
-		if (sclp.has_vt220 &&
-		    config_enabled(CONFIG_SCLP_VT220_CONSOLE))
+		if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
 			SET_CONSOLE_VT220;
-		else if (sclp.has_linemode &&
-			 config_enabled(CONFIG_SCLP_CONSOLE))
+		else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
 			SET_CONSOLE_SCLP;
 		else
 			SET_CONSOLE_HVC;
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 050b8d0..bfda6aa 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -454,7 +454,7 @@
 			: "Q" (info->capability), "d" (10000000), "d" (0)
 			: "cc"
 			);
-		kernel_fpu_end(&fpu);
+		kernel_fpu_end(&fpu, KERNEL_FPR);
 	} else
 		/*
 		 * Really old machine without stsi block for basic
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 4e99498..0bfcc49 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -50,10 +50,6 @@
 #include <asm/cio.h>
 #include "entry.h"
 
-/* change this if you have some constant time drift */
-#define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
-#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12)
-
 u64 sched_clock_base_cc = -1;	/* Force to data section. */
 EXPORT_SYMBOL_GPL(sched_clock_base_cc);
 
@@ -282,13 +278,8 @@
 
 void update_vsyscall_tz(void)
 {
-	/* Make userspace gettimeofday spin until we're done. */
-	++vdso_data->tb_update_count;
-	smp_wmb();
 	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
 	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-	smp_wmb();
-	++vdso_data->tb_update_count;
 }
 
 /*
@@ -318,51 +309,12 @@
 	vtime_init();
 }
 
-/*
- * The time is "clock". old is what we think the time is.
- * Adjust the value by a multiple of jiffies and add the delta to ntp.
- * "delay" is an approximation how long the synchronization took. If
- * the time correction is positive, then "delay" is subtracted from
- * the time difference and only the remaining part is passed to ntp.
- */
-static unsigned long long adjust_time(unsigned long long old,
-				      unsigned long long clock,
-				      unsigned long long delay)
-{
-	unsigned long long delta, ticks;
-	struct timex adjust;
-
-	if (clock > old) {
-		/* It is later than we thought. */
-		delta = ticks = clock - old;
-		delta = ticks = (delta < delay) ? 0 : delta - delay;
-		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		adjust.offset = ticks * (1000000 / HZ);
-	} else {
-		/* It is earlier than we thought. */
-		delta = ticks = old - clock;
-		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		delta = -delta;
-		adjust.offset = -ticks * (1000000 / HZ);
-	}
-	sched_clock_base_cc += delta;
-	if (adjust.offset != 0) {
-		pr_notice("The ETR interface has adjusted the clock "
-			  "by %li microseconds\n", adjust.offset);
-		adjust.modes = ADJ_OFFSET_SINGLESHOT;
-		do_adjtimex(&adjust);
-	}
-	return delta;
-}
-
 static DEFINE_PER_CPU(atomic_t, clock_sync_word);
 static DEFINE_MUTEX(clock_sync_mutex);
 static unsigned long clock_sync_flags;
 
-#define CLOCK_SYNC_HAS_ETR	0
-#define CLOCK_SYNC_HAS_STP	1
-#define CLOCK_SYNC_ETR		2
-#define CLOCK_SYNC_STP		3
+#define CLOCK_SYNC_HAS_STP	0
+#define CLOCK_SYNC_STP		1
 
 /*
  * The get_clock function for the physical clock. It will get the current
@@ -384,34 +336,32 @@
 	if (sw0 == sw1 && (sw0 & 0x80000000U))
 		/* Success: time is in sync. */
 		return 0;
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
-	    !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
 		return -EOPNOTSUPP;
-	if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
-	    !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+	if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
 		return -EACCES;
 	return -EAGAIN;
 }
 EXPORT_SYMBOL(get_phys_clock);
 
 /*
- * Make get_sync_clock return -EAGAIN.
+ * Make get_phys_clock() return -EAGAIN.
  */
 static void disable_sync_clock(void *dummy)
 {
 	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
 	/*
-	 * Clear the in-sync bit 2^31. All get_sync_clock calls will
+	 * Clear the in-sync bit 2^31. All get_phys_clock calls will
 	 * fail until the sync bit is turned back on. In addition
 	 * increase the "sequence" counter to avoid the race of an
-	 * etr event and the complete recovery against get_sync_clock.
+	 * stp event and the complete recovery against get_phys_clock.
 	 */
 	atomic_andnot(0x80000000, sw_ptr);
 	atomic_inc(sw_ptr);
 }
 
 /*
- * Make get_sync_clock return 0 again.
+ * Make get_phys_clock() return 0 again.
  * Needs to be called from a context disabled for preemption.
  */
 static void enable_sync_clock(void)
@@ -434,7 +384,7 @@
 	return rc;
 }
 
-/* Single threaded workqueue used for etr and stp sync events */
+/* Single threaded workqueue used for stp sync events */
 static struct workqueue_struct *time_sync_wq;
 
 static void __init time_init_wq(void)
@@ -448,20 +398,12 @@
 	atomic_t cpus;
 	int in_sync;
 	unsigned long long fixup_cc;
-	int etr_port;
-	struct etr_aib *etr_aib;
 };
 
 static void clock_sync_cpu(struct clock_sync_data *sync)
 {
 	atomic_dec(&sync->cpus);
 	enable_sync_clock();
-	/*
-	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
-	 * is called on all other cpus while the TOD clocks is stopped.
-	 * __udelay will stop the cpu on an enabled wait psw until the
-	 * TOD is running again.
-	 */
 	while (sync->in_sync == 0) {
 		__udelay(1);
 		/*
@@ -582,7 +524,7 @@
 static int stp_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long old_clock, delta, new_clock, clock_delta;
+	unsigned long long clock_delta;
 	struct clock_sync_data *stp_sync;
 	struct ptff_qto qto;
 	int rc;
@@ -605,18 +547,18 @@
 	if (stp_info.todoff[0] || stp_info.todoff[1] ||
 	    stp_info.todoff[2] || stp_info.todoff[3] ||
 	    stp_info.tmd != 2) {
-		old_clock = get_tod_clock();
 		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta);
 		if (rc == 0) {
-			new_clock = old_clock + clock_delta;
-			delta = adjust_time(old_clock, new_clock, 0);
+			/* fixup the monotonic sched clock */
+			sched_clock_base_cc += clock_delta;
 			if (ptff_query(PTFF_QTO) &&
 			    ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
 				/* Update LPAR offset */
 				lpar_offset = qto.tod_epoch_difference;
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 						   0, &clock_delta);
-			fixup_clock_comparator(delta);
+			stp_sync->fixup_cc = clock_delta;
+			fixup_clock_comparator(clock_delta);
 			rc = chsc_sstpi(stp_page, &stp_info,
 					sizeof(struct stp_sstpi));
 			if (rc == 0 && stp_info.tmd != 2)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index dd97a3e..d0539f7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -14,11 +14,12 @@
  */
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <asm/uaccess.h>
 #include <asm/fpu/api.h>
 #include "entry.h"
 
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 6814545..6cc9478 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -24,8 +24,9 @@
 extra-y += vdso32.lds
 CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
 
-# Disable gcov profiling for VDSO code
+# Disable gcov profiling and ubsan for VDSO code
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 # Force dependency (incbin is bad)
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 0b0fd22..2d54c18 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -24,8 +24,9 @@
 extra-y += vdso64.lds
 CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
 
-# Disable gcov profiling for VDSO code
+# Disable gcov profiling and ubsan for VDSO code
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 # Force dependency (incbin is bad)
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 3f3ae48..7e8cb6a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -132,10 +132,7 @@
 MODULE_PARM_DESC(nested, "Nested virtualization support");
 
 /* upper facilities limit for kvm */
-unsigned long kvm_s390_fac_list_mask[16] = {
-	0xffe6000000000000UL,
-	0x005e000000000000UL,
-};
+unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
 {
@@ -248,22 +245,33 @@
 		     PTFF_QAF);
 
 	if (test_facility(17)) { /* MSA */
-		__cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
-		__cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
-		__cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
-		__cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
-		__cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmac);
+		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmc);
+		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.km);
+		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kimd);
+		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.klmd);
 	}
 	if (test_facility(76)) /* MSA3 */
-		__cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.pckmo);
 	if (test_facility(77)) { /* MSA4 */
-		__cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
-		__cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
-		__cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
-		__cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmctr);
+		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmf);
+		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmo);
+		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.pcc);
 	}
 	if (test_facility(57)) /* MSA5 */
-		__cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+		__cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.ppno);
 
 	if (MACHINE_HAS_ESOP)
 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
@@ -1672,6 +1680,7 @@
 				    KVM_SYNC_CRS |
 				    KVM_SYNC_ARCH0 |
 				    KVM_SYNC_PFAULT;
+	kvm_s390_set_prefix(vcpu, 0);
 	if (test_kvm_facility(vcpu->kvm, 64))
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
@@ -2230,9 +2239,10 @@
 		return -EINVAL;
 	current->thread.fpu.fpc = fpu->fpc;
 	if (MACHINE_HAS_VX)
-		convert_fp_to_vx(current->thread.fpu.vxrs, (freg_t *)fpu->fprs);
+		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
+				 (freg_t *) fpu->fprs);
 	else
-		memcpy(current->thread.fpu.fprs, &fpu->fprs, sizeof(fpu->fprs));
+		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
 	return 0;
 }
 
@@ -2241,9 +2251,10 @@
 	/* make sure we have the latest values */
 	save_fpu_regs();
 	if (MACHINE_HAS_VX)
-		convert_vx_to_fp((freg_t *)fpu->fprs, current->thread.fpu.vxrs);
+		convert_vx_to_fp((freg_t *) fpu->fprs,
+				 (__vector128 *) vcpu->run->s.regs.vrs);
 	else
-		memcpy(fpu->fprs, current->thread.fpu.fprs, sizeof(fpu->fprs));
+		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
 	fpu->fpc = current->thread.fpu.fpc;
 	return 0;
 }
@@ -2361,8 +2372,10 @@
 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
 					  kvm_s390_get_prefix(vcpu),
 					  PAGE_SIZE * 2, PROT_WRITE);
-		if (rc)
+		if (rc) {
+			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 			return rc;
+		}
 		goto retry;
 	}
 
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index c106488..d8673e2 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -584,7 +584,7 @@
 		/* Validity 0x0044 will be checked by SIE */
 		if (rc)
 			goto unpin;
-		scb_s->gvrd = hpa;
+		scb_s->riccbd = hpa;
 	}
 	return 0;
 unpin:
diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c
index e390bbb..48352bf 100644
--- a/arch/s390/lib/string.c
+++ b/arch/s390/lib/string.c
@@ -237,11 +237,10 @@
 EXPORT_SYMBOL(strrchr);
 
 static inline int clcle(const char *s1, unsigned long l1,
-			const char *s2, unsigned long l2,
-			int *diff)
+			const char *s2, unsigned long l2)
 {
 	register unsigned long r2 asm("2") = (unsigned long) s1;
-	register unsigned long r3 asm("3") = (unsigned long) l2;
+	register unsigned long r3 asm("3") = (unsigned long) l1;
 	register unsigned long r4 asm("4") = (unsigned long) s2;
 	register unsigned long r5 asm("5") = (unsigned long) l2;
 	int cc;
@@ -252,7 +251,6 @@
 		      "   srl   %0,28"
 		      : "=&d" (cc), "+a" (r2), "+a" (r3),
 			"+a" (r4), "+a" (r5) : : "cc");
-	*diff = *(char *)r2 - *(char *)r4;
 	return cc;
 }
 
@@ -270,9 +268,9 @@
 		return (char *) s1;
 	l1 = __strend(s1) - s1;
 	while (l1-- >= l2) {
-		int cc, dummy;
+		int cc;
 
-		cc = clcle(s1, l1, s2, l2, &dummy);
+		cc = clcle(s1, l2, s2, l2);
 		if (!cc)
 			return (char *) s1;
 		s1++;
@@ -313,11 +311,11 @@
  */
 int memcmp(const void *cs, const void *ct, size_t n)
 {
-	int ret, diff;
+	int ret;
 
-	ret = clcle(cs, n, ct, n, &diff);
+	ret = clcle(cs, n, ct, n);
 	if (ret)
-		ret = diff;
+		ret = ret == 1 ? -1 : 1;
 	return ret;
 }
 EXPORT_SYMBOL(memcmp);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index a58bca6..661d9fe 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -24,7 +24,7 @@
 #include <linux/kdebug.h>
 #include <linux/init.h>
 #include <linux/console.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
@@ -740,28 +740,21 @@
 	put_task_struct(tsk);
 }
 
-static int pfault_cpu_notify(struct notifier_block *self, unsigned long action,
-			     void *hcpu)
+static int pfault_cpu_dead(unsigned int cpu)
 {
 	struct thread_struct *thread, *next;
 	struct task_struct *tsk;
 
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DEAD:
-		spin_lock_irq(&pfault_lock);
-		list_for_each_entry_safe(thread, next, &pfault_list, list) {
-			thread->pfault_wait = 0;
-			list_del(&thread->list);
-			tsk = container_of(thread, struct task_struct, thread);
-			wake_up_process(tsk);
-			put_task_struct(tsk);
-		}
-		spin_unlock_irq(&pfault_lock);
-		break;
-	default:
-		break;
+	spin_lock_irq(&pfault_lock);
+	list_for_each_entry_safe(thread, next, &pfault_list, list) {
+		thread->pfault_wait = 0;
+		list_del(&thread->list);
+		tsk = container_of(thread, struct task_struct, thread);
+		wake_up_process(tsk);
+		put_task_struct(tsk);
 	}
-	return NOTIFY_OK;
+	spin_unlock_irq(&pfault_lock);
+	return 0;
 }
 
 static int __init pfault_irq_init(void)
@@ -775,7 +768,8 @@
 	if (rc)
 		goto out_pfault;
 	irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
-	hotcpu_notifier(pfault_cpu_notify, 0);
+	cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
+				  NULL, pfault_cpu_dead);
 	return 0;
 
 out_pfault:
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 2ce6bb3..3ba6227 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -94,6 +94,7 @@
 struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
 {
 	struct gmap *gmap;
+	unsigned long gmap_asce;
 
 	gmap = gmap_alloc(limit);
 	if (!gmap)
@@ -101,6 +102,11 @@
 	gmap->mm = mm;
 	spin_lock(&mm->context.gmap_lock);
 	list_add_rcu(&gmap->list, &mm->context.gmap_list);
+	if (list_is_singular(&mm->context.gmap_list))
+		gmap_asce = gmap->asce;
+	else
+		gmap_asce = -1UL;
+	WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
 	spin_unlock(&mm->context.gmap_lock);
 	return gmap;
 }
@@ -230,6 +236,7 @@
 void gmap_remove(struct gmap *gmap)
 {
 	struct gmap *sg, *next;
+	unsigned long gmap_asce;
 
 	/* Remove all shadow gmaps linked to this gmap */
 	if (!list_empty(&gmap->children)) {
@@ -243,6 +250,14 @@
 	/* Remove gmap from the pre-mm list */
 	spin_lock(&gmap->mm->context.gmap_lock);
 	list_del_rcu(&gmap->list);
+	if (list_empty(&gmap->mm->context.gmap_list))
+		gmap_asce = 0;
+	else if (list_is_singular(&gmap->mm->context.gmap_list))
+		gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
+					     struct gmap, list)->asce;
+	else
+		gmap_asce = -1UL;
+	WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
 	spin_unlock(&gmap->mm->context.gmap_lock);
 	synchronize_rcu();
 	/* Put reference */
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 7104ffb..44f1503 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -252,6 +252,8 @@
 	int rc = -EINVAL;
 	pgd_t *pgdp;
 
+	if (addr == end)
+		return 0;
 	if (end >= MODULES_END)
 		return -EINVAL;
 	mutex_lock(&cpa_mutex);
@@ -307,11 +309,11 @@
 	int i;
 
 	if (test_facility(13)) {
-		__ptep_ipte_range(address, nr - 1, pte);
+		__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
 		return;
 	}
 	for (i = 0; i < nr; i++) {
-		__ptep_ipte(address, pte);
+		__ptep_ipte(address, pte, IPTE_GLOBAL);
 		address += PAGE_SIZE;
 		pte++;
 	}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5f09201..7a1897c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -35,9 +35,9 @@
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__ptep_ipte_local(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_LOCAL);
 	else
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -56,7 +56,7 @@
 		pte_val(*ptep) |= _PAGE_INVALID;
 		mm->context.flush_mm = 1;
 	} else
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -301,9 +301,9 @@
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pmdp_idte_local(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_LOCAL);
 	else
-		__pmdp_idte(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -322,7 +322,7 @@
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		mm->context.flush_mm = 1;
 	} else if (MACHINE_HAS_IDTE)
-		__pmdp_idte(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
 	else
 		__pmdp_csp(pmdp);
 	atomic_dec(&mm->context.flush_count);
@@ -374,9 +374,9 @@
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pudp_idte_local(addr, pudp);
+		__pudp_idte(addr, pudp, IDTE_LOCAL);
 	else
-		__pudp_idte(addr, pudp);
+		__pudp_idte(addr, pudp, IDTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -620,7 +620,7 @@
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 			pte_val(pte) |= _PAGE_PROTECT;
 		else
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 871af75..15ffc19 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -854,6 +854,15 @@
 }
 EXPORT_SYMBOL_GPL(zpci_stop_device);
 
+int zpci_report_error(struct pci_dev *pdev,
+		      struct zpci_report_error_header *report)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	return sclp_pci_report(report, zdev->fh, zdev->fid);
+}
+EXPORT_SYMBOL(zpci_report_error);
+
 static inline int barsize(u8 size)
 {
 	return (size) ? (1 << size) >> 10 : 0;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 7297fce..7350c8b 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -129,12 +129,11 @@
 		entry_clr_protected(entry);
 }
 
-static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
-			    dma_addr_t dma_addr, size_t size, int flags)
+static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+			      dma_addr_t dma_addr, size_t size, int flags)
 {
 	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
-	dma_addr_t start_dma_addr = dma_addr;
 	unsigned long irq_flags;
 	unsigned long *entry;
 	int i, rc = 0;
@@ -145,7 +144,7 @@
 	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
 	if (!zdev->dma_table) {
 		rc = -EINVAL;
-		goto no_refresh;
+		goto out_unlock;
 	}
 
 	for (i = 0; i < nr_pages; i++) {
@@ -159,20 +158,6 @@
 		dma_addr += PAGE_SIZE;
 	}
 
-	/*
-	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
-	 * translations when previously invalid translation-table entries are
-	 * validated. With lazy unmap, it also is skipped for previously valid
-	 * entries, but a global rpcit is then required before any address can
-	 * be re-used, i.e. after each iommu bitmap wrap-around.
-	 */
-	if (!zdev->tlb_refresh &&
-			(!s390_iommu_strict ||
-			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
-		goto no_refresh;
-
-	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
-				nr_pages * PAGE_SIZE);
 undo_cpu_trans:
 	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
 		flags = ZPCI_PTE_INVALID;
@@ -185,12 +170,46 @@
 			dma_update_cpu_trans(entry, page_addr, flags);
 		}
 	}
-
-no_refresh:
+out_unlock:
 	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
 	return rc;
 }
 
+static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
+			   size_t size, int flags)
+{
+	/*
+	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+	 * translations when previously invalid translation-table entries are
+	 * validated. With lazy unmap, it also is skipped for previously valid
+	 * entries, but a global rpcit is then required before any address can
+	 * be re-used, i.e. after each iommu bitmap wrap-around.
+	 */
+	if (!zdev->tlb_refresh &&
+			(!s390_iommu_strict ||
+			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
+		return 0;
+
+	return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
+				  PAGE_ALIGN(size));
+}
+
+static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+			    dma_addr_t dma_addr, size_t size, int flags)
+{
+	int rc;
+
+	rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
+	if (rc)
+		return rc;
+
+	rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
+	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
+		__dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
+
+	return rc;
+}
+
 void dma_free_seg_table(unsigned long entry)
 {
 	unsigned long *sto = get_rt_sto(entry);
@@ -230,45 +249,54 @@
 				boundary_size, 0);
 }
 
-static unsigned long dma_alloc_iommu(struct device *dev, int size)
+static dma_addr_t dma_alloc_address(struct device *dev, int size)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long offset, flags;
-	int wrap = 0;
 
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
 	if (offset == -1) {
+		if (!zdev->tlb_refresh && !s390_iommu_strict) {
+			/* global flush before DMA addresses are reused */
+			if (zpci_refresh_global(zdev))
+				goto out_error;
+
+			bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+				      zdev->lazy_bitmap, zdev->iommu_pages);
+			bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+		}
 		/* wrap-around */
 		offset = __dma_alloc_iommu(dev, 0, size);
-		wrap = 1;
+		if (offset == -1)
+			goto out_error;
 	}
-
-	if (offset != -1) {
-		zdev->next_bit = offset + size;
-		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
-			/* global flush after wrap-around with lazy unmap */
-			zpci_refresh_global(zdev);
-	}
+	zdev->next_bit = offset + size;
 	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-	return offset;
+
+	return zdev->start_dma + offset * PAGE_SIZE;
+
+out_error:
+	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+	return DMA_ERROR_CODE;
 }
 
-static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
+static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long flags;
+	unsigned long flags, offset;
+
+	offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 	if (!zdev->iommu_bitmap)
 		goto out;
-	bitmap_clear(zdev->iommu_bitmap, offset, size);
-	/*
-	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
-	 * until wrap-around.
-	 */
-	if (!s390_iommu_strict && offset >= zdev->next_bit)
-		zdev->next_bit = offset + size;
+
+	if (zdev->tlb_refresh || s390_iommu_strict)
+		bitmap_clear(zdev->iommu_bitmap, offset, size);
+	else
+		bitmap_set(zdev->lazy_bitmap, offset, size);
+
 out:
 	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 }
@@ -289,16 +317,16 @@
 				     unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long nr_pages, iommu_page_index;
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
+	unsigned long nr_pages;
 	dma_addr_t dma_addr;
 	int ret;
 
 	/* This rounds up number of pages based on size and offset */
 	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
-	iommu_page_index = dma_alloc_iommu(dev, nr_pages);
-	if (iommu_page_index == -1) {
+	dma_addr = dma_alloc_address(dev, nr_pages);
+	if (dma_addr == DMA_ERROR_CODE) {
 		ret = -ENOSPC;
 		goto out_err;
 	}
@@ -306,12 +334,6 @@
 	/* Use rounded up size */
 	size = nr_pages * PAGE_SIZE;
 
-	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
-	if (dma_addr + size > zdev->end_dma) {
-		ret = -ERANGE;
-		goto out_free;
-	}
-
 	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
 		flags |= ZPCI_TABLE_PROTECTED;
 
@@ -323,7 +345,7 @@
 	return dma_addr + (offset & ~PAGE_MASK);
 
 out_free:
-	dma_free_iommu(dev, iommu_page_index, nr_pages);
+	dma_free_address(dev, dma_addr, nr_pages);
 out_err:
 	zpci_err("map error:\n");
 	zpci_err_dma(ret, pa);
@@ -335,7 +357,6 @@
 				 unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long iommu_page_index;
 	int npages, ret;
 
 	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
@@ -349,8 +370,7 @@
 	}
 
 	atomic64_add(npages, &zdev->unmapped_pages);
-	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-	dma_free_iommu(dev, iommu_page_index, npages);
+	dma_free_address(dev, dma_addr, npages);
 }
 
 static void *s390_dma_alloc(struct device *dev, size_t size,
@@ -394,37 +414,98 @@
 	free_pages((unsigned long) pa, get_order(size));
 }
 
+/* Map a segment into a contiguous dma address area */
+static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			     size_t size, dma_addr_t *handle,
+			     enum dma_data_direction dir)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	dma_addr_t dma_addr_base, dma_addr;
+	int flags = ZPCI_PTE_VALID;
+	struct scatterlist *s;
+	unsigned long pa;
+	int ret;
+
+	size = PAGE_ALIGN(size);
+	dma_addr_base = dma_alloc_address(dev, size >> PAGE_SHIFT);
+	if (dma_addr_base == DMA_ERROR_CODE)
+		return -ENOMEM;
+
+	dma_addr = dma_addr_base;
+	if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
+		flags |= ZPCI_TABLE_PROTECTED;
+
+	for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
+		pa = page_to_phys(sg_page(s)) + s->offset;
+		ret = __dma_update_trans(zdev, pa, dma_addr, s->length, flags);
+		if (ret)
+			goto unmap;
+
+		dma_addr += s->length;
+	}
+	ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
+	if (ret)
+		goto unmap;
+
+	*handle = dma_addr_base;
+	atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages);
+
+	return ret;
+
+unmap:
+	dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
+			 ZPCI_PTE_INVALID);
+	dma_free_address(dev, dma_addr_base, size >> PAGE_SHIFT);
+	zpci_err("map error:\n");
+	zpci_err_dma(ret, pa);
+	return ret;
+}
+
 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 			   int nr_elements, enum dma_data_direction dir,
 			   unsigned long attrs)
 {
-	int mapped_elements = 0;
-	struct scatterlist *s;
-	int i;
+	struct scatterlist *s = sg, *start = sg, *dma = sg;
+	unsigned int max = dma_get_max_seg_size(dev);
+	unsigned int size = s->offset + s->length;
+	unsigned int offset = s->offset;
+	int count = 0, i;
 
-	for_each_sg(sg, s, nr_elements, i) {
-		struct page *page = sg_page(s);
-		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
-						    s->length, dir, 0);
-		if (!dma_mapping_error(dev, s->dma_address)) {
-			s->dma_length = s->length;
-			mapped_elements++;
-		} else
-			goto unmap;
-	}
-out:
-	return mapped_elements;
+	for (i = 1; i < nr_elements; i++) {
+		s = sg_next(s);
 
-unmap:
-	for_each_sg(sg, s, mapped_elements, i) {
-		if (s->dma_address)
-			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
-					     dir, 0);
-		s->dma_address = 0;
+		s->dma_address = DMA_ERROR_CODE;
 		s->dma_length = 0;
+
+		if (s->offset || (size & ~PAGE_MASK) ||
+		    size + s->length > max) {
+			if (__s390_dma_map_sg(dev, start, size,
+					      &dma->dma_address, dir))
+				goto unmap;
+
+			dma->dma_address += offset;
+			dma->dma_length = size - offset;
+
+			size = offset = s->offset;
+			start = s;
+			dma = sg_next(dma);
+			count++;
+		}
+		size += s->length;
 	}
-	mapped_elements = 0;
-	goto out;
+	if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir))
+		goto unmap;
+
+	dma->dma_address += offset;
+	dma->dma_length = size - offset;
+
+	return count + 1;
+unmap:
+	for_each_sg(sg, s, count, i)
+		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
+				     dir, attrs);
+
+	return 0;
 }
 
 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
@@ -435,8 +516,9 @@
 	int i;
 
 	for_each_sg(sg, s, nr_elements, i) {
-		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir,
-				     0);
+		if (s->dma_length)
+			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
+					     dir, attrs);
 		s->dma_address = 0;
 		s->dma_length = 0;
 	}
@@ -482,7 +564,14 @@
 		rc = -ENOMEM;
 		goto free_dma_table;
 	}
+	if (!zdev->tlb_refresh && !s390_iommu_strict) {
+		zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
+		if (!zdev->lazy_bitmap) {
+			rc = -ENOMEM;
+			goto free_bitmap;
+		}
 
+	}
 	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 				(u64) zdev->dma_table);
 	if (rc)
@@ -492,6 +581,8 @@
 free_bitmap:
 	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
+	vfree(zdev->lazy_bitmap);
+	zdev->lazy_bitmap = NULL;
 free_dma_table:
 	dma_free_cpu_table(zdev->dma_table);
 	zdev->dma_table = NULL;
@@ -513,6 +604,9 @@
 	zdev->dma_table = NULL;
 	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
+	vfree(zdev->lazy_bitmap);
+	zdev->lazy_bitmap = NULL;
+
 	zdev->next_bit = 0;
 }
 
diff --git a/arch/score/include/asm/uaccess.h b/arch/score/include/asm/uaccess.h
index 20a3591..01aec8c 100644
--- a/arch/score/include/asm/uaccess.h
+++ b/arch/score/include/asm/uaccess.h
@@ -163,7 +163,7 @@
 		__get_user_asm(val, "lw", ptr);				\
 		 break;							\
 	case 8: 							\
-		if ((copy_from_user((void *)&val, ptr, 8)) == 0)	\
+		if (__copy_from_user((void *)&val, ptr, 8) == 0)	\
 			__gu_err = 0;					\
 		else							\
 			__gu_err = -EFAULT;				\
@@ -188,6 +188,8 @@
 									\
 	if (likely(access_ok(VERIFY_READ, __gu_ptr, size)))		\
 		__get_user_common((x), size, __gu_ptr);			\
+	else								\
+		(x) = 0;						\
 									\
 	__gu_err;							\
 })
@@ -201,6 +203,7 @@
 		"2:\n"							\
 		".section .fixup,\"ax\"\n"				\
 		"3:li	%0, %4\n"					\
+		"li	%1, 0\n"					\
 		"j	2b\n"						\
 		".previous\n"						\
 		".section __ex_table,\"a\"\n"				\
@@ -298,35 +301,34 @@
 static inline unsigned long
 copy_from_user(void *to, const void *from, unsigned long len)
 {
-	unsigned long over;
+	unsigned long res = len;
 
-	if (access_ok(VERIFY_READ, from, len))
-		return __copy_tofrom_user(to, from, len);
+	if (likely(access_ok(VERIFY_READ, from, len)))
+		res = __copy_tofrom_user(to, from, len);
 
-	if ((unsigned long)from < TASK_SIZE) {
-		over = (unsigned long)from + len - TASK_SIZE;
-		return __copy_tofrom_user(to, from, len - over) + over;
-	}
-	return len;
+	if (unlikely(res))
+		memset(to + (len - res), 0, res);
+
+	return res;
 }
 
 static inline unsigned long
 copy_to_user(void *to, const void *from, unsigned long len)
 {
-	unsigned long over;
+	if (likely(access_ok(VERIFY_WRITE, to, len)))
+		len = __copy_tofrom_user(to, from, len);
 
-	if (access_ok(VERIFY_WRITE, to, len))
-		return __copy_tofrom_user(to, from, len);
-
-	if ((unsigned long)to < TASK_SIZE) {
-		over = (unsigned long)to + len - TASK_SIZE;
-		return __copy_tofrom_user(to, from, len - over) + over;
-	}
 	return len;
 }
 
-#define __copy_from_user(to, from, len)	\
-		__copy_tofrom_user((to), (from), (len))
+static inline unsigned long
+__copy_from_user(void *to, const void *from, unsigned long len)
+{
+	unsigned long left = __copy_tofrom_user(to, from, len);
+	if (unlikely(left))
+		memset(to + (len - left), 0, left);
+	return left;
+}
 
 #define __copy_to_user(to, from, len)		\
 		__copy_tofrom_user((to), (from), (len))
@@ -340,17 +342,17 @@
 static inline unsigned long
 __copy_from_user_inatomic(void *to, const void *from, unsigned long len)
 {
-	return __copy_from_user(to, from, len);
+	return __copy_tofrom_user(to, from, len);
 }
 
-#define __copy_in_user(to, from, len)	__copy_from_user(to, from, len)
+#define __copy_in_user(to, from, len)	__copy_tofrom_user(to, from, len)
 
 static inline unsigned long
 copy_in_user(void *to, const void *from, unsigned long len)
 {
 	if (access_ok(VERIFY_READ, from, len) &&
 		      access_ok(VERFITY_WRITE, to, len))
-		return copy_from_user(to, from, len);
+		return __copy_tofrom_user(to, from, len);
 }
 
 /*
diff --git a/arch/sh/include/asm/atomic-llsc.h b/arch/sh/include/asm/atomic-llsc.h
index caea2c4..1d159ce 100644
--- a/arch/sh/include/asm/atomic-llsc.h
+++ b/arch/sh/include/asm/atomic-llsc.h
@@ -60,7 +60,7 @@
 "	movco.l	%0, @%3					\n"		\
 "	bf	1b					\n"		\
 "	synco						\n"		\
-	: "=&z" (temp), "=&z" (res)					\
+	: "=&z" (temp), "=&r" (res)					\
 	: "r" (i), "r" (&v->counter)					\
 	: "t");								\
 									\
diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h
index a49635c..92ade79 100644
--- a/arch/sh/include/asm/uaccess.h
+++ b/arch/sh/include/asm/uaccess.h
@@ -151,7 +151,10 @@
 	__kernel_size_t __copy_size = (__kernel_size_t) n;
 
 	if (__copy_size && __access_ok(__copy_from, __copy_size))
-		return __copy_user(to, from, __copy_size);
+		__copy_size = __copy_user(to, from, __copy_size);
+
+	if (unlikely(__copy_size))
+		memset(to + (n - __copy_size), 0, __copy_size);
 
 	return __copy_size;
 }
diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h
index c01376c..ca5073d 100644
--- a/arch/sh/include/asm/uaccess_64.h
+++ b/arch/sh/include/asm/uaccess_64.h
@@ -24,6 +24,7 @@
 #define __get_user_size(x,ptr,size,retval)			\
 do {								\
 	retval = 0;						\
+	x = 0;							\
 	switch (size) {						\
 	case 1:							\
 		retval = __get_user_asm_b((void *)&x,		\
diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
index 839612c..0d3637c 100644
--- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c
@@ -122,32 +122,16 @@
 	__raw_writel(STBCR_RESET, STBCR_REG(cpu));
 }
 
-static int
-shx3_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+static int shx3_cpu_prepare(unsigned int cpu)
 {
-	unsigned int cpu = (unsigned int)hcpu;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-		shx3_update_boot_vector(cpu);
-		break;
-	case CPU_ONLINE:
-		pr_info("CPU %u is now online\n", cpu);
-		break;
-	case CPU_DEAD:
-		break;
-	}
-
-	return NOTIFY_OK;
+	shx3_update_boot_vector(cpu);
+	return 0;
 }
 
-static struct notifier_block shx3_cpu_notifier = {
-	.notifier_call		= shx3_cpu_callback,
-};
-
 static int register_shx3_cpu_notifier(void)
 {
-	register_hotcpu_notifier(&shx3_cpu_notifier);
+	cpuhp_setup_state_nocalls(CPUHP_SH_SH3X_PREPARE, "sh/shx3:prepare",
+				  shx3_cpu_prepare, NULL);
 	return 0;
 }
 late_initcall(register_shx3_cpu_notifier);
diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 38993e0..95eccd4 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -382,7 +382,7 @@
 		return;
 	}
 
-	err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
 	if (err == -EBUSY) {
 		__raw_writel(old, parent);
 		return;
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 59b0960..f5d60f1 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -56,7 +56,6 @@
 	def_bool 64BIT
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
-	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
 	select HAVE_RCU_TABLE_FREE if SMP
diff --git a/arch/sparc/include/asm/ftrace.h b/arch/sparc/include/asm/ftrace.h
index 3192a8e..62755a3 100644
--- a/arch/sparc/include/asm/ftrace.h
+++ b/arch/sparc/include/asm/ftrace.h
@@ -9,6 +9,10 @@
 void _mcount(void);
 #endif
 
+#endif /* CONFIG_MCOUNT */
+
+#if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY)
+#define HAVE_FUNCTION_GRAPH_FP_TEST
 #endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 8c2a8c9..c1263fc 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -25,6 +25,7 @@
 #define HPAGE_MASK		(~(HPAGE_SIZE - 1UL))
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#define REAL_HPAGE_PER_HPAGE	(_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index 26d9e77..ce2233f 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -43,6 +43,7 @@
 int hard_smp_processor_id(void);
 #define raw_smp_processor_id() (current_thread_info()->cpu)
 
+void smp_fill_in_cpu_possible_map(void);
 void smp_fill_in_sib_core_maps(void);
 void cpu_play_dead(void);
 
@@ -72,6 +73,7 @@
 #define smp_fill_in_sib_core_maps() do { } while (0)
 #define smp_fetch_global_regs() do { } while (0)
 #define smp_fetch_global_pmu() do { } while (0)
+#define smp_fill_in_cpu_possible_map() do { } while (0)
 
 #endif /* !(CONFIG_SMP) */
 
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 341a5a1..ea55f86 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -249,8 +249,7 @@
 static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	if (n && __access_ok((unsigned long) to, n)) {
-		if (!__builtin_constant_p(n))
-			check_object_size(from, n, true);
+		check_object_size(from, n, true);
 		return __copy_user(to, (__force void __user *) from, n);
 	} else
 		return n;
@@ -258,19 +257,19 @@
 
 static inline unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
-	if (!__builtin_constant_p(n))
-		check_object_size(from, n, true);
+	check_object_size(from, n, true);
 	return __copy_user(to, (__force void __user *) from, n);
 }
 
 static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (n && __access_ok((unsigned long) from, n)) {
-		if (!__builtin_constant_p(n))
-			check_object_size(to, n, false);
+		check_object_size(to, n, false);
 		return __copy_user((__force void __user *) to, from, n);
-	} else
+	} else {
+		memset(to, 0, n);
 		return n;
+	}
 }
 
 static inline unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n)
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index 8bda94f..37a315d 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -212,8 +212,7 @@
 {
 	unsigned long ret;
 
-	if (!__builtin_constant_p(size))
-		check_object_size(to, size, false);
+	check_object_size(to, size, false);
 
 	ret = ___copy_from_user(to, from, size);
 	if (unlikely(ret))
@@ -233,8 +232,8 @@
 {
 	unsigned long ret;
 
-	if (!__builtin_constant_p(size))
-		check_object_size(from, size, true);
+	check_object_size(from, size, true);
+
 	ret = ___copy_to_user(to, from, size);
 	if (unlikely(ret))
 		ret = copy_to_user_fixup(to, from, size);
diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c
index 0a2d2dd..6bcff69 100644
--- a/arch/sparc/kernel/ftrace.c
+++ b/arch/sparc/kernel/ftrace.c
@@ -131,7 +131,7 @@
 		return parent + 8UL;
 
 	if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
-				     frame_pointer) == -EBUSY)
+				     frame_pointer, NULL) == -EBUSY)
 		return parent + 8UL;
 
 	trace.func = self_addr;
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index 599f120..6b7331d 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -31,6 +31,7 @@
 #include <linux/initrd.h>
 #include <linux/module.h>
 #include <linux/start_kernel.h>
+#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -50,6 +51,8 @@
 #include <asm/elf.h>
 #include <asm/mdesc.h>
 #include <asm/cacheflush.h>
+#include <asm/dma.h>
+#include <asm/irq.h>
 
 #ifdef CONFIG_IP_PNP
 #include <net/ipconfig.h>
@@ -590,6 +593,22 @@
 		pause_patch();
 }
 
+void __init alloc_irqstack_bootmem(void)
+{
+	unsigned int i, node;
+
+	for_each_possible_cpu(i) {
+		node = cpu_to_node(i);
+
+		softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+							THREAD_SIZE,
+							THREAD_SIZE, 0);
+		hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
+							THREAD_SIZE,
+							THREAD_SIZE, 0);
+	}
+}
+
 void __init setup_arch(char **cmdline_p)
 {
 	/* Initialize PROM console and command line. */
@@ -650,6 +669,13 @@
 
 	paging_init();
 	init_sparc64_elf_hwcap();
+	smp_fill_in_cpu_possible_map();
+	/*
+	 * Once the OF device tree and MDESC have been setup and nr_cpus has
+	 * been parsed, we know the list of possible cpus.  Therefore we can
+	 * allocate the IRQ stacks.
+	 */
+	alloc_irqstack_bootmem();
 }
 
 extern int stop_a_enabled;
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index fb30e7c..e80e6ba 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -352,9 +352,7 @@
 	preempt_disable();
 	cpu = smp_processor_id();
 
-	/* Invoke the CPU_STARTING notifier callbacks */
 	notify_cpu_starting(cpu);
-
 	arch_cpu_pre_online(arg);
 
 	/* Set the CPU in the cpu_online_mask */
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 8a6151a..d3035ba 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1227,6 +1227,20 @@
 		xcall_deliver_impl = hypervisor_xcall_deliver;
 }
 
+void __init smp_fill_in_cpu_possible_map(void)
+{
+	int possible_cpus = num_possible_cpus();
+	int i;
+
+	if (possible_cpus > nr_cpu_ids)
+		possible_cpus = nr_cpu_ids;
+
+	for (i = 0; i < possible_cpus; i++)
+		set_cpu_possible(i, true);
+	for (; i < NR_CPUS; i++)
+		set_cpu_possible(i, false);
+}
+
 void smp_fill_in_sib_core_maps(void)
 {
 	unsigned int i;
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index e16fdd2..3f291d8 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -484,6 +484,7 @@
 		tsb_grow(mm, MM_TSB_BASE, mm_rss);
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
 	mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count;
+	mm_rss *= REAL_HPAGE_PER_HPAGE;
 	if (unlikely(mm_rss >
 		     mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) {
 		if (mm->context.tsb_block[MM_TSB_HUGE].tsb)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 65457c9..7ac6b62 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1160,7 +1160,7 @@
 	return numa_latency[from][to];
 }
 
-static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
+static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
 {
 	int i;
 
@@ -1173,8 +1173,8 @@
 	return i;
 }
 
-static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp,
-					  int index)
+static void __init find_numa_latencies_for_group(struct mdesc_handle *md,
+						 u64 grp, int index)
 {
 	u64 arc;
 
@@ -2081,7 +2081,6 @@
 {
 	unsigned long end_pfn, shift, phys_base;
 	unsigned long real_end, i;
-	int node;
 
 	setup_page_offset();
 
@@ -2250,21 +2249,6 @@
 	/* Setup bootmem... */
 	last_valid_pfn = end_pfn = bootmem_init(phys_base);
 
-	/* Once the OF device tree and MDESC have been setup, we know
-	 * the list of possible cpus.  Therefore we can allocate the
-	 * IRQ stacks.
-	 */
-	for_each_possible_cpu(i) {
-		node = cpu_to_node(i);
-
-		softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
-							THREAD_SIZE,
-							THREAD_SIZE, 0);
-		hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node),
-							THREAD_SIZE,
-							THREAD_SIZE, 0);
-	}
-
 	kernel_physical_mapping_init();
 
 	{
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index 3659d37..c56a195 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -174,10 +174,25 @@
 		return;
 
 	if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) {
-		if (pmd_val(pmd) & _PAGE_PMD_HUGE)
-			mm->context.thp_pte_count++;
-		else
-			mm->context.thp_pte_count--;
+		/*
+		 * Note that this routine only sets pmds for THP pages.
+		 * Hugetlb pages are handled elsewhere.  We need to check
+		 * for huge zero page.  Huge zero pages are like hugetlb
+		 * pages in that there is no RSS, but there is the need
+		 * for TSB entries.  So, huge zero page counts go into
+		 * hugetlb_pte_count.
+		 */
+		if (pmd_val(pmd) & _PAGE_PMD_HUGE) {
+			if (is_huge_zero_page(pmd_page(pmd)))
+				mm->context.hugetlb_pte_count++;
+			else
+				mm->context.thp_pte_count++;
+		} else {
+			if (is_huge_zero_page(pmd_page(orig)))
+				mm->context.hugetlb_pte_count--;
+			else
+				mm->context.thp_pte_count--;
+		}
 
 		/* Do not try to allocate the TSB hash table if we
 		 * don't have one already.  We have various locks held
@@ -204,6 +219,9 @@
 	}
 }
 
+/*
+ * This routine is only called when splitting a THP
+ */
 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pmd_t *pmdp)
 {
@@ -213,6 +231,15 @@
 
 	set_pmd_at(vma->vm_mm, address, pmdp, entry);
 	flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+
+	/*
+	 * set_pmd_at() will not be called in a way to decrement
+	 * thp_pte_count when splitting a THP, so do it now.
+	 * Sanity check pmd before doing the actual decrement.
+	 */
+	if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
+	    !is_huge_zero_page(pmd_page(entry)))
+		(vma->vm_mm)->context.thp_pte_count--;
 }
 
 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c
index 6725ed4..f2b7711 100644
--- a/arch/sparc/mm/tsb.c
+++ b/arch/sparc/mm/tsb.c
@@ -469,8 +469,10 @@
 
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
+	unsigned long mm_rss = get_mm_rss(mm);
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	unsigned long total_huge_pte_count;
+	unsigned long saved_hugetlb_pte_count;
+	unsigned long saved_thp_pte_count;
 #endif
 	unsigned int i;
 
@@ -483,10 +485,12 @@
 	 * will re-increment the counters as the parent PTEs are
 	 * copied into the child address space.
 	 */
-	total_huge_pte_count = mm->context.hugetlb_pte_count +
-			 mm->context.thp_pte_count;
+	saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
+	saved_thp_pte_count = mm->context.thp_pte_count;
 	mm->context.hugetlb_pte_count = 0;
 	mm->context.thp_pte_count = 0;
+
+	mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
 #endif
 
 	/* copy_mm() copies over the parent's mm_struct before calling
@@ -499,11 +503,13 @@
 	/* If this is fork, inherit the parent's TSB size.  We would
 	 * grow it to that size on the first page fault anyways.
 	 */
-	tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm));
+	tsb_grow(mm, MM_TSB_BASE, mm_rss);
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	if (unlikely(total_huge_pte_count))
-		tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count);
+	if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
+		tsb_grow(mm, MM_TSB_HUGE,
+			 (saved_hugetlb_pte_count + saved_thp_pte_count) *
+			 REAL_HPAGE_PER_HPAGE);
 #endif
 
 	if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 4820a02..78da75b 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -4,7 +4,6 @@
 config TILE
 	def_bool y
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_WANT_FRAME_POINTERS
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index 0a9c4265..a77369e 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -416,14 +416,13 @@
 	return n;
 }
 
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-/*
- * There are still unprovable places in the generic code as of 2.6.34, so this
- * option is not really compatible with -Werror, which is more useful in
- * general.
- */
-extern void copy_from_user_overflow(void)
-	__compiletime_warning("copy_from_user() size is not provably correct");
+extern void __compiletime_error("usercopy buffer size is too small")
+__bad_copy_user(void);
+
+static inline void copy_user_overflow(int size, unsigned long count)
+{
+	WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
+}
 
 static inline unsigned long __must_check copy_from_user(void *to,
 					  const void __user *from,
@@ -433,14 +432,13 @@
 
 	if (likely(sz == -1 || sz >= n))
 		n = _copy_from_user(to, from, n);
+	else if (!__builtin_constant_p(n))
+		copy_user_overflow(sz, n);
 	else
-		copy_from_user_overflow();
+		__bad_copy_user();
 
 	return n;
 }
-#else
-#define copy_from_user _copy_from_user
-#endif
 
 #ifdef __tilegx__
 /**
diff --git a/arch/tile/kernel/ftrace.c b/arch/tile/kernel/ftrace.c
index 4a57208..b827a41 100644
--- a/arch/tile/kernel/ftrace.c
+++ b/arch/tile/kernel/ftrace.c
@@ -184,7 +184,7 @@
 	*parent = return_hooker;
 
 	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer);
+				       frame_pointer, NULL);
 	if (err == -EBUSY) {
 		*parent = old;
 		return;
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 2d0266d..3282787 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -175,27 +175,4 @@
 	.name		= "watchdog",
 	.fops		= &harddog_fops,
 };
-
-static char banner[] __initdata = KERN_INFO "UML Watchdog Timer\n";
-
-static int __init harddog_init(void)
-{
-	int ret;
-
-	ret = misc_register(&harddog_miscdev);
-
-	if (ret)
-		return ret;
-
-	printk(banner);
-
-	return 0;
-}
-
-static void __exit harddog_exit(void)
-{
-	misc_deregister(&harddog_miscdev);
-}
-
-module_init(harddog_init);
-module_exit(harddog_exit);
+module_misc_device(harddog_miscdev);
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index 1dd5bd8..1330553 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -81,7 +81,7 @@
   .altinstr_replacement : { *(.altinstr_replacement) }
   /* .exit.text is discard at runtime, not link time, to deal with references
      from .altinstructions and .eh_frame */
-  .exit.text : { *(.exit.text) }
+  .exit.text : { EXIT_TEXT }
   .exit.data : { *(.exit.data) }
 
   .preinit_array : {
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index ef4b8f9..b783ac8 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -21,21 +21,17 @@
 	PT_REGS_SET_SYSCALL_RETURN(regs, -ENOSYS);
 
 	if (syscall_trace_enter(regs))
-		return;
+		goto out;
 
 	/* Do the seccomp check after ptrace; failures should be fast. */
 	if (secure_computing(NULL) == -1)
-		return;
+		goto out;
 
-	/* Update the syscall number after orig_ax has potentially been updated
-	 * with ptrace.
-	 */
-	UPT_SYSCALL_NR(r) = PT_SYSCALL_NR(r->gp);
 	syscall = UPT_SYSCALL_NR(r);
-
 	if (syscall >= 0 && syscall <= __NR_syscall_max)
 		PT_REGS_SET_SYSCALL_RETURN(regs,
 				EXECUTE_SYSCALL(syscall, regs));
 
+out:
 	syscall_trace_leave(regs);
 }
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index e35632e..62dfc64 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -98,7 +98,7 @@
 }
 
 static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
-		bool write, bool foreign)
+		bool write, bool execute, bool foreign)
 {
 	/* by default, allow everything */
 	return true;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c580d8c..9b2d50a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,6 @@
 	select ARCH_DISCARD_MEMBLOCK
 	select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-	select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FAST_MULTIPLIER
@@ -94,6 +93,7 @@
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_ARCH_WITHIN_STACK_FRAMES
 	select HAVE_EBPF_JIT			if X86_64
+	select HAVE_ARCH_VMAP_STACK		if X86_64
 	select HAVE_CC_STACKPROTECTOR
 	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CMPXCHG_LOCAL
@@ -110,7 +110,6 @@
 	select HAVE_EXIT_THREAD
 	select HAVE_FENTRY			if X86_64
 	select HAVE_FTRACE_MCOUNT_RECORD
-	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GCC_PLUGINS
@@ -158,6 +157,7 @@
 	select SPARSE_IRQ
 	select SRCU
 	select SYSCTL_EXCEPTION_TRACE
+	select THREAD_INFO_IN_TASK
 	select USER_STACKTRACE_SUPPORT
 	select VIRT_TO_BUS
 	select X86_DEV_DMA_OPS			if X86_64
@@ -550,6 +550,18 @@
 	  Say Y here if you have a Quark based system such as the Arduino
 	  compatible Intel Galileo.
 
+config MLX_PLATFORM
+	tristate "Mellanox Technologies platform support"
+	depends on X86_64
+	depends on X86_EXTENDED_PLATFORM
+	---help---
+	  This option enables system support for the Mellanox Technologies
+	  platform.
+
+	  Say Y here if you are building a kernel for Mellanox system.
+
+	  Otherwise, say N.
+
 config X86_INTEL_LPSS
 	bool "Intel Low Power Subsystem Support"
 	depends on X86 && ACPI
@@ -706,7 +718,6 @@
 config PARAVIRT_SPINLOCKS
 	bool "Paravirtualization layer for spinlocks"
 	depends on PARAVIRT && SMP
-	select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
 	---help---
 	  Paravirtualized spinlocks allow a pvops backend to replace the
 	  spinlock implementation with something virtualization-friendly
@@ -719,7 +730,7 @@
 
 config QUEUED_LOCK_STAT
 	bool "Paravirt queued spinlock statistics"
-	depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS
+	depends on PARAVIRT_SPINLOCKS && DEBUG_FS
 	---help---
 	  Enable the collection of statistical data on the slowpath
 	  behavior of paravirtualized queued spinlocks and report
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ff574da..cc69e37 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -29,22 +29,11 @@
 static void setup_boot_services##bits(struct efi_config *c)		\
 {									\
 	efi_system_table_##bits##_t *table;				\
-	efi_boot_services_##bits##_t *bt;				\
 									\
 	table = (typeof(table))sys_table;				\
 									\
+	c->boot_services = table->boottime;				\
 	c->text_output = table->con_out;				\
-									\
-	bt = (typeof(bt))(unsigned long)(table->boottime);		\
-									\
-	c->allocate_pool = bt->allocate_pool;				\
-	c->allocate_pages = bt->allocate_pages;				\
-	c->get_memory_map = bt->get_memory_map;				\
-	c->free_pool = bt->free_pool;					\
-	c->free_pages = bt->free_pages;					\
-	c->locate_handle = bt->locate_handle;				\
-	c->handle_protocol = bt->handle_protocol;			\
-	c->exit_boot_services = bt->exit_boot_services;			\
 }
 BOOT_SERVICES(32);
 BOOT_SERVICES(64);
@@ -286,29 +275,6 @@
 	}
 }
 
-static void find_bits(unsigned long mask, u8 *pos, u8 *size)
-{
-	u8 first, len;
-
-	first = 0;
-	len = 0;
-
-	if (mask) {
-		while (!(mask & 0x1)) {
-			mask = mask >> 1;
-			first++;
-		}
-
-		while (mask & 0x1) {
-			mask = mask >> 1;
-			len++;
-		}
-	}
-
-	*pos = first;
-	*size = len;
-}
-
 static efi_status_t
 __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
 {
@@ -578,7 +544,7 @@
 	efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
 	unsigned long nr_ugas;
 	u32 *handles = (u32 *)uga_handle;;
-	efi_status_t status;
+	efi_status_t status = EFI_INVALID_PARAMETER;
 	int i;
 
 	first_uga = NULL;
@@ -623,7 +589,7 @@
 	efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID;
 	unsigned long nr_ugas;
 	u64 *handles = (u64 *)uga_handle;;
-	efi_status_t status;
+	efi_status_t status = EFI_INVALID_PARAMETER;
 	int i;
 
 	first_uga = NULL;
@@ -1004,79 +970,87 @@
 	return status;
 }
 
+struct exit_boot_struct {
+	struct boot_params *boot_params;
+	struct efi_info *efi;
+	struct setup_data *e820ext;
+	__u32 e820ext_size;
+	bool is64;
+};
+
+static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
+				   struct efi_boot_memmap *map,
+				   void *priv)
+{
+	static bool first = true;
+	const char *signature;
+	__u32 nr_desc;
+	efi_status_t status;
+	struct exit_boot_struct *p = priv;
+
+	if (first) {
+		nr_desc = *map->buff_size / *map->desc_size;
+		if (nr_desc > ARRAY_SIZE(p->boot_params->e820_map)) {
+			u32 nr_e820ext = nr_desc -
+					ARRAY_SIZE(p->boot_params->e820_map);
+
+			status = alloc_e820ext(nr_e820ext, &p->e820ext,
+					       &p->e820ext_size);
+			if (status != EFI_SUCCESS)
+				return status;
+		}
+		first = false;
+	}
+
+	signature = p->is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE;
+	memcpy(&p->efi->efi_loader_signature, signature, sizeof(__u32));
+
+	p->efi->efi_systab = (unsigned long)sys_table_arg;
+	p->efi->efi_memdesc_size = *map->desc_size;
+	p->efi->efi_memdesc_version = *map->desc_ver;
+	p->efi->efi_memmap = (unsigned long)*map->map;
+	p->efi->efi_memmap_size = *map->map_size;
+
+#ifdef CONFIG_X86_64
+	p->efi->efi_systab_hi = (unsigned long)sys_table_arg >> 32;
+	p->efi->efi_memmap_hi = (unsigned long)*map->map >> 32;
+#endif
+
+	return EFI_SUCCESS;
+}
+
 static efi_status_t exit_boot(struct boot_params *boot_params,
 			      void *handle, bool is64)
 {
-	struct efi_info *efi = &boot_params->efi_info;
-	unsigned long map_sz, key, desc_size;
+	unsigned long map_sz, key, desc_size, buff_size;
 	efi_memory_desc_t *mem_map;
 	struct setup_data *e820ext;
-	const char *signature;
 	__u32 e820ext_size;
-	__u32 nr_desc, prev_nr_desc;
 	efi_status_t status;
 	__u32 desc_version;
-	bool called_exit = false;
-	u8 nr_entries;
-	int i;
+	struct efi_boot_memmap map;
+	struct exit_boot_struct priv;
 
-	nr_desc = 0;
-	e820ext = NULL;
-	e820ext_size = 0;
+	map.map =		&mem_map;
+	map.map_size =		&map_sz;
+	map.desc_size =		&desc_size;
+	map.desc_ver =		&desc_version;
+	map.key_ptr =		&key;
+	map.buff_size =		&buff_size;
+	priv.boot_params =	boot_params;
+	priv.efi =		&boot_params->efi_info;
+	priv.e820ext =		NULL;
+	priv.e820ext_size =	0;
+	priv.is64 =		is64;
 
-get_map:
-	status = efi_get_memory_map(sys_table, &mem_map, &map_sz, &desc_size,
-				    &desc_version, &key);
-
+	/* Might as well exit boot services now */
+	status = efi_exit_boot_services(sys_table, handle, &map, &priv,
+					exit_boot_func);
 	if (status != EFI_SUCCESS)
 		return status;
 
-	prev_nr_desc = nr_desc;
-	nr_desc = map_sz / desc_size;
-	if (nr_desc > prev_nr_desc &&
-	    nr_desc > ARRAY_SIZE(boot_params->e820_map)) {
-		u32 nr_e820ext = nr_desc - ARRAY_SIZE(boot_params->e820_map);
-
-		status = alloc_e820ext(nr_e820ext, &e820ext, &e820ext_size);
-		if (status != EFI_SUCCESS)
-			goto free_mem_map;
-
-		efi_call_early(free_pool, mem_map);
-		goto get_map; /* Allocated memory, get map again */
-	}
-
-	signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE;
-	memcpy(&efi->efi_loader_signature, signature, sizeof(__u32));
-
-	efi->efi_systab = (unsigned long)sys_table;
-	efi->efi_memdesc_size = desc_size;
-	efi->efi_memdesc_version = desc_version;
-	efi->efi_memmap = (unsigned long)mem_map;
-	efi->efi_memmap_size = map_sz;
-
-#ifdef CONFIG_X86_64
-	efi->efi_systab_hi = (unsigned long)sys_table >> 32;
-	efi->efi_memmap_hi = (unsigned long)mem_map >> 32;
-#endif
-
-	/* Might as well exit boot services now */
-	status = efi_call_early(exit_boot_services, handle, key);
-	if (status != EFI_SUCCESS) {
-		/*
-		 * ExitBootServices() will fail if any of the event
-		 * handlers change the memory map. In which case, we
-		 * must be prepared to retry, but only once so that
-		 * we're guaranteed to exit on repeated failures instead
-		 * of spinning forever.
-		 */
-		if (called_exit)
-			goto free_mem_map;
-
-		called_exit = true;
-		efi_call_early(free_pool, mem_map);
-		goto get_map;
-	}
-
+	e820ext = priv.e820ext;
+	e820ext_size = priv.e820ext_size;
 	/* Historic? */
 	boot_params->alt_mem_k = 32 * 1024;
 
@@ -1085,10 +1059,6 @@
 		return status;
 
 	return EFI_SUCCESS;
-
-free_mem_map:
-	efi_call_early(free_pool, mem_map);
-	return status;
 }
 
 /*
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 1038524..fd0b6a2 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -82,7 +82,7 @@
 
 	/* Relocate efi_config->call() */
 	leal	efi32_config(%esi), %eax
-	add	%esi, 88(%eax)
+	add	%esi, 32(%eax)
 	pushl	%eax
 
 	call	make_boot_params
@@ -108,7 +108,7 @@
 
 	/* Relocate efi_config->call() */
 	leal	efi32_config(%esi), %eax
-	add	%esi, 88(%eax)
+	add	%esi, 32(%eax)
 	pushl	%eax
 2:
 	call	efi_main
@@ -264,7 +264,7 @@
 #ifdef CONFIG_EFI_STUB
 	.data
 efi32_config:
-	.fill 11,8,0
+	.fill 4,8,0
 	.long efi_call_phys
 	.long 0
 	.byte 0
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 0d80a7a..efdfba2 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -265,7 +265,7 @@
 	/*
 	 * Relocate efi_config->call().
 	 */
-	addq	%rbp, efi64_config+88(%rip)
+	addq	%rbp, efi64_config+32(%rip)
 
 	movq	%rax, %rdi
 	call	make_boot_params
@@ -285,7 +285,7 @@
 	 * Relocate efi_config->call().
 	 */
 	movq	efi_config(%rip), %rax
-	addq	%rbp, 88(%rax)
+	addq	%rbp, 32(%rax)
 2:
 	movq	efi_config(%rip), %rdi
 	call	efi_main
@@ -457,14 +457,14 @@
 #ifdef CONFIG_EFI_MIXED
 	.global efi32_config
 efi32_config:
-	.fill	11,8,0
+	.fill	4,8,0
 	.quad	efi64_thunk
 	.byte	0
 #endif
 
 	.global efi64_config
 efi64_config:
-	.fill	11,8,0
+	.fill	4,8,0
 	.quad	efi_call
 	.byte	1
 #endif /* CONFIG_EFI_STUB */
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 4e2ecfa..4b429df 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1 +1,3 @@
 CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+# CONFIG_HIGHMEM64G is not set
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c
index 89fa85e..6f97fb3 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb.c
+++ b/arch/x86/crypto/sha256-mb/sha256_mb.c
@@ -485,10 +485,10 @@
 
 			req = cast_mcryptd_ctx_to_req(req_ctx);
 			if (irqs_disabled())
-				rctx->complete(&req->base, ret);
+				req_ctx->complete(&req->base, ret);
 			else {
 				local_bh_disable();
-				rctx->complete(&req->base, ret);
+				req_ctx->complete(&req->base, ret);
 				local_bh_enable();
 			}
 		}
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
index b691da9..a78a069 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
@@ -265,13 +265,14 @@
 	vpinsrd	$1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0
 	vpinsrd	$2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0
 	vpinsrd	$3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0
-	movl	_args_digest+4*32(state, idx, 4), tmp2_w
+	vmovd   _args_digest(state , idx, 4) , %xmm0
 	vpinsrd	$1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1
 	vpinsrd	$2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1
 	vpinsrd	$3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1
 
-	vmovdqu	%xmm0, _result_digest(job_rax)
-	movl	tmp2_w, _result_digest+1*16(job_rax)
+        vmovdqu %xmm0, _result_digest(job_rax)
+        offset =  (_result_digest + 1*16)
+        vmovdqu %xmm1, offset(job_rax)
 
 	pop	%rbx
 
diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c
index f4cf5b7..d210174 100644
--- a/arch/x86/crypto/sha512-mb/sha512_mb.c
+++ b/arch/x86/crypto/sha512-mb/sha512_mb.c
@@ -497,10 +497,10 @@
 
 			req = cast_mcryptd_ctx_to_req(req_ctx);
 			if (irqs_disabled())
-				rctx->complete(&req->base, ret);
+				req_ctx->complete(&req->base, ret);
 			else {
 				local_bh_disable();
-				rctx->complete(&req->base, ret);
+				req_ctx->complete(&req->base, ret);
 				local_bh_enable();
 			}
 		}
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index fe91c25..77f28ce 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -5,6 +5,8 @@
 OBJECT_FILES_NON_STANDARD_entry_$(BITS).o   := y
 OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y
 
+CFLAGS_syscall_64.o		+= -Wno-override-init
+CFLAGS_syscall_32.o		+= -Wno-override-init
 obj-y				:= entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
 obj-y				+= common.o
 
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 1433f6b..bdd9cc5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -31,13 +31,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
-static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
-{
-	unsigned long top_of_stack =
-		(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
-	return (struct thread_info *)(top_of_stack - THREAD_SIZE);
-}
-
 #ifdef CONFIG_CONTEXT_TRACKING
 /* Called on entry from user mode with IRQs off. */
 __visible inline void enter_from_user_mode(void)
@@ -71,7 +64,7 @@
 {
 	u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 
-	struct thread_info *ti = pt_regs_to_thread_info(regs);
+	struct thread_info *ti = current_thread_info();
 	unsigned long ret = 0;
 	bool emulated = false;
 	u32 work;
@@ -173,18 +166,17 @@
 		/* Disable IRQs and retry */
 		local_irq_disable();
 
-		cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);
+		cached_flags = READ_ONCE(current_thread_info()->flags);
 
 		if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
 			break;
-
 	}
 }
 
 /* Called with IRQs disabled. */
 __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
 {
-	struct thread_info *ti = pt_regs_to_thread_info(regs);
+	struct thread_info *ti = current_thread_info();
 	u32 cached_flags;
 
 	if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
@@ -209,7 +201,7 @@
 	 * special case only applies after poking regs and before the
 	 * very next return to user mode.
 	 */
-	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
+	current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
 #endif
 
 	user_enter_irqoff();
@@ -247,7 +239,7 @@
  */
 __visible inline void syscall_return_slowpath(struct pt_regs *regs)
 {
-	struct thread_info *ti = pt_regs_to_thread_info(regs);
+	struct thread_info *ti = current_thread_info();
 	u32 cached_flags = READ_ONCE(ti->flags);
 
 	CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
@@ -270,7 +262,7 @@
 #ifdef CONFIG_X86_64
 __visible void do_syscall_64(struct pt_regs *regs)
 {
-	struct thread_info *ti = pt_regs_to_thread_info(regs);
+	struct thread_info *ti = current_thread_info();
 	unsigned long nr = regs->orig_ax;
 
 	enter_from_user_mode();
@@ -303,11 +295,11 @@
  */
 static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
 {
-	struct thread_info *ti = pt_regs_to_thread_info(regs);
+	struct thread_info *ti = current_thread_info();
 	unsigned int nr = (unsigned int)regs->orig_ax;
 
 #ifdef CONFIG_IA32_EMULATION
-	ti->status |= TS_COMPAT;
+	current->thread.status |= TS_COMPAT;
 #endif
 
 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 0b56666..b75a8bc 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,34 +204,70 @@
 	POP_GS_EX
 .endm
 
+/*
+ * %eax: prev task
+ * %edx: next task
+ */
+ENTRY(__switch_to_asm)
+	/*
+	 * Save callee-saved registers
+	 * This must match the order in struct inactive_task_frame
+	 */
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%edi
+	pushl	%esi
+
+	/* switch stack */
+	movl	%esp, TASK_threadsp(%eax)
+	movl	TASK_threadsp(%edx), %esp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+	movl	TASK_stack_canary(%edx), %ebx
+	movl	%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+#endif
+
+	/* restore callee-saved registers */
+	popl	%esi
+	popl	%edi
+	popl	%ebx
+	popl	%ebp
+
+	jmp	__switch_to
+END(__switch_to_asm)
+
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * eax: prev task we switched from
+ * ebx: kernel thread func (NULL for user thread)
+ * edi: kernel thread arg
+ */
 ENTRY(ret_from_fork)
 	pushl	%eax
 	call	schedule_tail
 	popl	%eax
 
+	testl	%ebx, %ebx
+	jnz	1f		/* kernel threads are uncommon */
+
+2:
 	/* When we fork, we trace the syscall return in the child, too. */
 	movl    %esp, %eax
 	call    syscall_return_slowpath
 	jmp     restore_all
-END(ret_from_fork)
 
-ENTRY(ret_from_kernel_thread)
-	pushl	%eax
-	call	schedule_tail
-	popl	%eax
-	movl	PT_EBP(%esp), %eax
-	call	*PT_EBX(%esp)
-	movl	$0, PT_EAX(%esp)
-
+	/* kernel thread */
+1:	movl	%edi, %eax
+	call	*%ebx
 	/*
-	 * Kernel threads return to userspace as if returning from a syscall.
-	 * We should check whether anything actually uses this path and, if so,
-	 * consider switching it over to ret_from_fork.
+	 * A kernel thread is allowed to return here after successfully
+	 * calling do_execve().  Exit to userspace to complete the execve()
+	 * syscall.
 	 */
-	movl    %esp, %eax
-	call    syscall_return_slowpath
-	jmp     restore_all
-ENDPROC(ret_from_kernel_thread)
+	movl	$0, PT_EAX(%esp)
+	jmp	2b
+END(ret_from_fork)
 
 /*
  * Return to user mode is not as complex as all this looks,
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b846875..fee1d95 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -179,7 +179,8 @@
 	 * If we need to do entry work or if we guess we'll need to do
 	 * exit work, go straight to the slow path.
 	 */
-	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+	movq	PER_CPU_VAR(current_task), %r11
+	testl	$_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
 	jnz	entry_SYSCALL64_slow_path
 
 entry_SYSCALL_64_fastpath:
@@ -217,7 +218,8 @@
 	 */
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+	movq	PER_CPU_VAR(current_task), %r11
+	testl	$_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
 	jnz	1f
 
 	LOCKDEP_SYS_EXIT
@@ -288,11 +290,15 @@
 	jne	opportunistic_sysret_failed
 
 	/*
-	 * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
-	 * restoring TF results in a trap from userspace immediately after
-	 * SYSRET.  This would cause an infinite loop whenever #DB happens
-	 * with register state that satisfies the opportunistic SYSRET
-	 * conditions.  For example, single-stepping this user code:
+	 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+	 * restore RF properly. If the slowpath sets it for whatever reason, we
+	 * need to restore it correctly.
+	 *
+	 * SYSRET can restore TF, but unlike IRET, restoring TF results in a
+	 * trap from userspace immediately after SYSRET.  This would cause an
+	 * infinite loop whenever #DB happens with register state that satisfies
+	 * the opportunistic SYSRET conditions.  For example, single-stepping
+	 * this user code:
 	 *
 	 *           movq	$stuck_here, %rcx
 	 *           pushfq
@@ -347,8 +353,7 @@
 	jmp	entry_SYSCALL64_slow_path
 
 1:
-	/* Called from C */
-	jmp	*%rax				/* called from C */
+	jmp	*%rax				/* Called from C */
 END(stub_ptregs_64)
 
 .macro ptregs_stub func
@@ -365,41 +370,73 @@
 #include <asm/syscalls_64.h>
 
 /*
+ * %rdi: prev task
+ * %rsi: next task
+ */
+ENTRY(__switch_to_asm)
+	/*
+	 * Save callee-saved registers
+	 * This must match the order in inactive_task_frame
+	 */
+	pushq	%rbp
+	pushq	%rbx
+	pushq	%r12
+	pushq	%r13
+	pushq	%r14
+	pushq	%r15
+
+	/* switch stack */
+	movq	%rsp, TASK_threadsp(%rdi)
+	movq	TASK_threadsp(%rsi), %rsp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+	movq	TASK_stack_canary(%rsi), %rbx
+	movq	%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+#endif
+
+	/* restore callee-saved registers */
+	popq	%r15
+	popq	%r14
+	popq	%r13
+	popq	%r12
+	popq	%rbx
+	popq	%rbp
+
+	jmp	__switch_to
+END(__switch_to_asm)
+
+/*
  * A newly forked process directly context switches into this address.
  *
- * rdi: prev task we switched from
+ * rax: prev task we switched from
+ * rbx: kernel thread func (NULL for user thread)
+ * r12: kernel thread arg
  */
 ENTRY(ret_from_fork)
-	LOCK ; btr $TIF_FORK, TI_flags(%r8)
-
+	movq	%rax, %rdi
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 
-	testb	$3, CS(%rsp)			/* from kernel_thread? */
-	jnz	1f
+	testq	%rbx, %rbx			/* from kernel_thread? */
+	jnz	1f				/* kernel threads are uncommon */
 
-	/*
-	 * We came from kernel_thread.  This code path is quite twisted, and
-	 * someone should clean it up.
-	 *
-	 * copy_thread_tls stashes the function pointer in RBX and the
-	 * parameter to be passed in RBP.  The called function is permitted
-	 * to call do_execve and thereby jump to user mode.
-	 */
-	movq	RBP(%rsp), %rdi
-	call	*RBX(%rsp)
-	movl	$0, RAX(%rsp)
-
-	/*
-	 * Fall through as though we're exiting a syscall.  This makes a
-	 * twisted sort of sense if we just called do_execve.
-	 */
-
-1:
+2:
 	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
 	SWAPGS
 	jmp	restore_regs_and_iret
+
+1:
+	/* kernel thread */
+	movq	%r12, %rdi
+	call	*%rbx
+	/*
+	 * A kernel thread is allowed to return here after successfully
+	 * calling do_execve().  Exit to userspace to complete the execve()
+	 * syscall.
+	 */
+	movq	$0, RAX(%rsp)
+	jmp	2b
 END(ret_from_fork)
 
 /*
@@ -551,27 +588,69 @@
 
 #ifdef CONFIG_X86_ESPFIX64
 native_irq_return_ldt:
-	pushq	%rax
-	pushq	%rdi
+	/*
+	 * We are running with user GSBASE.  All GPRs contain their user
+	 * values.  We have a percpu ESPFIX stack that is eight slots
+	 * long (see ESPFIX_STACK_SIZE).  espfix_waddr points to the bottom
+	 * of the ESPFIX stack.
+	 *
+	 * We clobber RAX and RDI in this code.  We stash RDI on the
+	 * normal stack and RAX on the ESPFIX stack.
+	 *
+	 * The ESPFIX stack layout we set up looks like this:
+	 *
+	 * --- top of ESPFIX stack ---
+	 * SS
+	 * RSP
+	 * RFLAGS
+	 * CS
+	 * RIP  <-- RSP points here when we're done
+	 * RAX  <-- espfix_waddr points here
+	 * --- bottom of ESPFIX stack ---
+	 */
+
+	pushq	%rdi				/* Stash user RDI */
 	SWAPGS
 	movq	PER_CPU_VAR(espfix_waddr), %rdi
-	movq	%rax, (0*8)(%rdi)		/* RAX */
-	movq	(2*8)(%rsp), %rax		/* RIP */
+	movq	%rax, (0*8)(%rdi)		/* user RAX */
+	movq	(1*8)(%rsp), %rax		/* user RIP */
 	movq	%rax, (1*8)(%rdi)
-	movq	(3*8)(%rsp), %rax		/* CS */
+	movq	(2*8)(%rsp), %rax		/* user CS */
 	movq	%rax, (2*8)(%rdi)
-	movq	(4*8)(%rsp), %rax		/* RFLAGS */
+	movq	(3*8)(%rsp), %rax		/* user RFLAGS */
 	movq	%rax, (3*8)(%rdi)
-	movq	(6*8)(%rsp), %rax		/* SS */
+	movq	(5*8)(%rsp), %rax		/* user SS */
 	movq	%rax, (5*8)(%rdi)
-	movq	(5*8)(%rsp), %rax		/* RSP */
+	movq	(4*8)(%rsp), %rax		/* user RSP */
 	movq	%rax, (4*8)(%rdi)
-	andl	$0xffff0000, %eax
-	popq	%rdi
+	/* Now RAX == RSP. */
+
+	andl	$0xffff0000, %eax		/* RAX = (RSP & 0xffff0000) */
+	popq	%rdi				/* Restore user RDI */
+
+	/*
+	 * espfix_stack[31:16] == 0.  The page tables are set up such that
+	 * (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
+	 * espfix_waddr for any X.  That is, there are 65536 RO aliases of
+	 * the same page.  Set up RSP so that RSP[31:16] contains the
+	 * respective 16 bits of the /userspace/ RSP and RSP nonetheless
+	 * still points to an RO alias of the ESPFIX stack.
+	 */
 	orq	PER_CPU_VAR(espfix_stack), %rax
 	SWAPGS
 	movq	%rax, %rsp
-	popq	%rax
+
+	/*
+	 * At this point, we cannot write to the stack any more, but we can
+	 * still read.
+	 */
+	popq	%rax				/* Restore user RAX */
+
+	/*
+	 * RSP now points to an ordinary IRET frame, except that the page
+	 * is read-only and RSP[31:16] are preloaded with the userspace
+	 * values.  We can now IRET back to userspace.
+	 */
 	jmp	native_irq_return_iret
 #endif
 END(common_interrupt)
@@ -601,9 +680,20 @@
 .endm
 #endif
 
+/* Make sure APIC interrupt handlers end up in the irqentry section: */
+#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
+# define PUSH_SECTION_IRQENTRY	.pushsection .irqentry.text, "ax"
+# define POP_SECTION_IRQENTRY	.popsection
+#else
+# define PUSH_SECTION_IRQENTRY
+# define POP_SECTION_IRQENTRY
+#endif
+
 .macro apicinterrupt num sym do_sym
+PUSH_SECTION_IRQENTRY
 apicinterrupt3 \num \sym \do_sym
 trace_apicinterrupt \num \sym
+POP_SECTION_IRQENTRY
 .endm
 
 #ifdef CONFIG_SMP
@@ -987,7 +1077,6 @@
 	testb	$3, CS+8(%rsp)
 	jz	.Lerror_kernelspace
 
-.Lerror_entry_from_usermode_swapgs:
 	/*
 	 * We entered from user mode or we're pretending to have entered
 	 * from user mode due to an IRET fault.
@@ -1030,7 +1119,8 @@
 	 * gsbase and proceed.  We'll fix up the exception and land in
 	 * .Lgs_change's error handler with kernel gsbase.
 	 */
-	jmp	.Lerror_entry_from_usermode_swapgs
+	SWAPGS
+	jmp .Lerror_entry_done
 
 .Lbstep_iret:
 	/* Fix truncated RIP */
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index 4f74119..3dab75f 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -22,7 +22,7 @@
 
 	ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
 
-	if (hdr->e_type != ET_DYN)
+	if (GET_LE(&hdr->e_type) != ET_DYN)
 		fail("input is not a shared object\n");
 
 	/* Walk the segment table. */
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index f840766..23c881c 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -37,54 +37,6 @@
 
 struct linux_binprm;
 
-/*
- * Put the vdso above the (randomized) stack with another randomized
- * offset.  This way there is no hole in the middle of address space.
- * To save memory make sure it is still in the same PTE as the stack
- * top.  This doesn't give that many random bits.
- *
- * Note that this algorithm is imperfect: the distribution of the vdso
- * start address within a PMD is biased toward the end.
- *
- * Only used for the 64-bit and x32 vdsos.
- */
-static unsigned long vdso_addr(unsigned long start, unsigned len)
-{
-#ifdef CONFIG_X86_32
-	return 0;
-#else
-	unsigned long addr, end;
-	unsigned offset;
-
-	/*
-	 * Round up the start address.  It can start out unaligned as a result
-	 * of stack start randomization.
-	 */
-	start = PAGE_ALIGN(start);
-
-	/* Round the lowest possible end address up to a PMD boundary. */
-	end = (start + len + PMD_SIZE - 1) & PMD_MASK;
-	if (end >= TASK_SIZE_MAX)
-		end = TASK_SIZE_MAX;
-	end -= len;
-
-	if (end > start) {
-		offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
-		addr = start + (offset << PAGE_SHIFT);
-	} else {
-		addr = start;
-	}
-
-	/*
-	 * Forcibly align the final address in case we have a hardware
-	 * issue that requires alignment for performance reasons.
-	 */
-	addr = align_vdso_addr(addr);
-
-	return addr;
-#endif
-}
-
 static int vdso_fault(const struct vm_special_mapping *sm,
 		      struct vm_area_struct *vma, struct vm_fault *vmf)
 {
@@ -176,30 +128,28 @@
 	return VM_FAULT_SIGBUS;
 }
 
-static int map_vdso(const struct vdso_image *image, bool calculate_addr)
+static const struct vm_special_mapping vdso_mapping = {
+	.name = "[vdso]",
+	.fault = vdso_fault,
+	.mremap = vdso_mremap,
+};
+static const struct vm_special_mapping vvar_mapping = {
+	.name = "[vvar]",
+	.fault = vvar_fault,
+};
+
+/*
+ * Add vdso and vvar mappings to current process.
+ * @image          - blob to map
+ * @addr           - request a specific address (zero to map at free addr)
+ */
+static int map_vdso(const struct vdso_image *image, unsigned long addr)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
-	unsigned long addr, text_start;
+	unsigned long text_start;
 	int ret = 0;
 
-	static const struct vm_special_mapping vdso_mapping = {
-		.name = "[vdso]",
-		.fault = vdso_fault,
-		.mremap = vdso_mremap,
-	};
-	static const struct vm_special_mapping vvar_mapping = {
-		.name = "[vvar]",
-		.fault = vvar_fault,
-	};
-
-	if (calculate_addr) {
-		addr = vdso_addr(current->mm->start_stack,
-				 image->size - image->sym_vvar_start);
-	} else {
-		addr = 0;
-	}
-
 	if (down_write_killable(&mm->mmap_sem))
 		return -EINTR;
 
@@ -238,24 +188,104 @@
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
-		goto up_fail;
+		do_munmap(mm, text_start, image->size);
 	}
 
 up_fail:
-	if (ret)
+	if (ret) {
 		current->mm->context.vdso = NULL;
+		current->mm->context.vdso_image = NULL;
+	}
 
 	up_write(&mm->mmap_sem);
 	return ret;
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * Put the vdso above the (randomized) stack with another randomized
+ * offset.  This way there is no hole in the middle of address space.
+ * To save memory make sure it is still in the same PTE as the stack
+ * top.  This doesn't give that many random bits.
+ *
+ * Note that this algorithm is imperfect: the distribution of the vdso
+ * start address within a PMD is biased toward the end.
+ *
+ * Only used for the 64-bit and x32 vdsos.
+ */
+static unsigned long vdso_addr(unsigned long start, unsigned len)
+{
+	unsigned long addr, end;
+	unsigned offset;
+
+	/*
+	 * Round up the start address.  It can start out unaligned as a result
+	 * of stack start randomization.
+	 */
+	start = PAGE_ALIGN(start);
+
+	/* Round the lowest possible end address up to a PMD boundary. */
+	end = (start + len + PMD_SIZE - 1) & PMD_MASK;
+	if (end >= TASK_SIZE_MAX)
+		end = TASK_SIZE_MAX;
+	end -= len;
+
+	if (end > start) {
+		offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
+		addr = start + (offset << PAGE_SHIFT);
+	} else {
+		addr = start;
+	}
+
+	/*
+	 * Forcibly align the final address in case we have a hardware
+	 * issue that requires alignment for performance reasons.
+	 */
+	addr = align_vdso_addr(addr);
+
+	return addr;
+}
+
+static int map_vdso_randomized(const struct vdso_image *image)
+{
+	unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
+
+	return map_vdso(image, addr);
+}
+#endif
+
+int map_vdso_once(const struct vdso_image *image, unsigned long addr)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	down_write(&mm->mmap_sem);
+	/*
+	 * Check if we have already mapped vdso blob - fail to prevent
+	 * abusing from userspace install_speciall_mapping, which may
+	 * not do accounting and rlimit right.
+	 * We could search vma near context.vdso, but it's a slowpath,
+	 * so let's explicitely check all VMAs to be completely sure.
+	 */
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (vma_is_special_mapping(vma, &vdso_mapping) ||
+				vma_is_special_mapping(vma, &vvar_mapping)) {
+			up_write(&mm->mmap_sem);
+			return -EEXIST;
+		}
+	}
+	up_write(&mm->mmap_sem);
+
+	return map_vdso(image, addr);
+}
+
 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
 static int load_vdso32(void)
 {
 	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
 		return 0;
 
-	return map_vdso(&vdso_image_32, false);
+	return map_vdso(&vdso_image_32, 0);
 }
 #endif
 
@@ -265,7 +295,7 @@
 	if (!vdso64_enabled)
 		return 0;
 
-	return map_vdso(&vdso_image_64, true);
+	return map_vdso_randomized(&vdso_image_64);
 }
 
 #ifdef CONFIG_COMPAT
@@ -276,8 +306,7 @@
 	if (test_thread_flag(TIF_X32)) {
 		if (!vdso64_enabled)
 			return 0;
-
-		return map_vdso(&vdso_image_x32, true);
+		return map_vdso_randomized(&vdso_image_x32);
 	}
 #endif
 #ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index e07a22b..f5f4b3f 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -119,8 +119,8 @@
 {
   [PERF_COUNT_HW_CPU_CYCLES]			= 0x0076,
   [PERF_COUNT_HW_INSTRUCTIONS]			= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]		= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]			= 0x0081,
+  [PERF_COUNT_HW_CACHE_REFERENCES]		= 0x077d,
+  [PERF_COUNT_HW_CACHE_MISSES]			= 0x077e,
   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]		= 0x00c2,
   [PERF_COUNT_HW_BRANCH_MISSES]			= 0x00c3,
   [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= 0x00d0, /* "Decoder empty" event */
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index e6131d4..65577f0 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -29,6 +29,8 @@
 
 #define COUNTER_SHIFT		16
 
+static HLIST_HEAD(uncore_unused_list);
+
 struct amd_uncore {
 	int id;
 	int refcnt;
@@ -39,7 +41,7 @@
 	cpumask_t *active_mask;
 	struct pmu *pmu;
 	struct perf_event *events[MAX_COUNTERS];
-	struct amd_uncore *free_when_cpu_online;
+	struct hlist_node node;
 };
 
 static struct amd_uncore * __percpu *amd_uncore_nb;
@@ -306,6 +308,7 @@
 		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
 		uncore_nb->active_mask = &amd_nb_active_mask;
 		uncore_nb->pmu = &amd_nb_pmu;
+		uncore_nb->id = -1;
 		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
 	}
 
@@ -319,6 +322,7 @@
 		uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL;
 		uncore_l2->active_mask = &amd_l2_active_mask;
 		uncore_l2->pmu = &amd_l2_pmu;
+		uncore_l2->id = -1;
 		*per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2;
 	}
 
@@ -348,7 +352,7 @@
 			continue;
 
 		if (this->id == that->id) {
-			that->free_when_cpu_online = this;
+			hlist_add_head(&this->node, &uncore_unused_list);
 			this = that;
 			break;
 		}
@@ -388,13 +392,23 @@
 	return 0;
 }
 
+static void uncore_clean_online(void)
+{
+	struct amd_uncore *uncore;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
+		hlist_del(&uncore->node);
+		kfree(uncore);
+	}
+}
+
 static void uncore_online(unsigned int cpu,
 			  struct amd_uncore * __percpu *uncores)
 {
 	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
 
-	kfree(uncore->free_when_cpu_online);
-	uncore->free_when_cpu_online = NULL;
+	uncore_clean_online();
 
 	if (cpu == uncore->cpu)
 		cpumask_set_cpu(cpu, uncore->active_mask);
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d0efb5c..d31735f 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -37,6 +37,7 @@
 #include <asm/timer.h>
 #include <asm/desc.h>
 #include <asm/ldt.h>
+#include <asm/unwind.h>
 
 #include "perf_event.h"
 
@@ -1201,6 +1202,9 @@
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be performed
 	 * at commit time (->commit_txn) as a whole.
+	 *
+	 * If commit fails, we'll call ->del() on all events
+	 * for which ->add() was called.
 	 */
 	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		goto done_collect;
@@ -1223,6 +1227,14 @@
 	cpuc->n_added += n - n0;
 	cpuc->n_txn += n - n0;
 
+	if (x86_pmu.add) {
+		/*
+		 * This is before x86_pmu_enable() will call x86_pmu_start(),
+		 * so we enable LBRs before an event needs them etc..
+		 */
+		x86_pmu.add(event);
+	}
+
 	ret = 0;
 out:
 	return ret;
@@ -1346,7 +1358,7 @@
 	event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
 
 	/*
-	 * If we're called during a txn, we don't need to do anything.
+	 * If we're called during a txn, we only need to undo x86_pmu.add.
 	 * The events never got scheduled and ->cancel_txn will truncate
 	 * the event_list.
 	 *
@@ -1354,7 +1366,7 @@
 	 * an event added during that same TXN.
 	 */
 	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
-		return;
+		goto do_del;
 
 	/*
 	 * Not a TXN, therefore cleanup properly.
@@ -1384,6 +1396,15 @@
 	--cpuc->n_events;
 
 	perf_event_update_userpage(event);
+
+do_del:
+	if (x86_pmu.del) {
+		/*
+		 * This is after x86_pmu_stop(); so we disable LBRs after any
+		 * event can need them etc..
+		 */
+		x86_pmu.del(event);
+	}
 }
 
 int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -2247,39 +2268,26 @@
 	cyc2ns_read_end(data);
 }
 
-/*
- * callchain support
- */
-
-static int backtrace_stack(void *data, char *name)
-{
-	return 0;
-}
-
-static int backtrace_address(void *data, unsigned long addr, int reliable)
-{
-	struct perf_callchain_entry_ctx *entry = data;
-
-	return perf_callchain_store(entry, addr);
-}
-
-static const struct stacktrace_ops backtrace_ops = {
-	.stack			= backtrace_stack,
-	.address		= backtrace_address,
-	.walk_stack		= print_context_stack_bp,
-};
-
 void
 perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
+	struct unwind_state state;
+	unsigned long addr;
+
 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
 		/* TODO: We don't support guest os callchain now */
 		return;
 	}
 
-	perf_callchain_store(entry, regs->ip);
+	if (perf_callchain_store(entry, regs->ip))
+		return;
 
-	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+	for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
+	     unwind_next_frame(&state)) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || perf_callchain_store(entry, addr))
+			return;
+	}
 }
 
 static inline int
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 0a6e393..982c9e3 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -31,7 +31,17 @@
 struct bts_ctx {
 	struct perf_output_handle	handle;
 	struct debug_store		ds_back;
-	int				started;
+	int				state;
+};
+
+/* BTS context states: */
+enum {
+	/* no ongoing AUX transactions */
+	BTS_STATE_STOPPED = 0,
+	/* AUX transaction is on, BTS tracing is disabled */
+	BTS_STATE_INACTIVE,
+	/* AUX transaction is on, BTS tracing is running */
+	BTS_STATE_ACTIVE,
 };
 
 static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
@@ -204,6 +214,15 @@
 static int
 bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
 
+/*
+ * Ordering PMU callbacks wrt themselves and the PMI is done by means
+ * of bts::state, which:
+ *  - is set when bts::handle::event is valid, that is, between
+ *    perf_aux_output_begin() and perf_aux_output_end();
+ *  - is zero otherwise;
+ *  - is ordered against bts::handle::event with a compiler barrier.
+ */
+
 static void __bts_event_start(struct perf_event *event)
 {
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
@@ -221,10 +240,13 @@
 
 	/*
 	 * local barrier to make sure that ds configuration made it
-	 * before we enable BTS
+	 * before we enable BTS and bts::state goes ACTIVE
 	 */
 	wmb();
 
+	/* INACTIVE/STOPPED -> ACTIVE */
+	WRITE_ONCE(bts->state, BTS_STATE_ACTIVE);
+
 	intel_pmu_enable_bts(config);
 
 }
@@ -251,9 +273,6 @@
 
 	__bts_event_start(event);
 
-	/* PMI handler: this counter is running and likely generating PMIs */
-	ACCESS_ONCE(bts->started) = 1;
-
 	return;
 
 fail_end_stop:
@@ -263,30 +282,34 @@
 	event->hw.state = PERF_HES_STOPPED;
 }
 
-static void __bts_event_stop(struct perf_event *event)
+static void __bts_event_stop(struct perf_event *event, int state)
 {
+	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+
+	/* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */
+	WRITE_ONCE(bts->state, state);
+
 	/*
 	 * No extra synchronization is mandated by the documentation to have
 	 * BTS data stores globally visible.
 	 */
 	intel_pmu_disable_bts();
-
-	if (event->hw.state & PERF_HES_STOPPED)
-		return;
-
-	ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
 }
 
 static void bts_event_stop(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
-	struct bts_buffer *buf = perf_get_aux(&bts->handle);
+	struct bts_buffer *buf = NULL;
+	int state = READ_ONCE(bts->state);
 
-	/* PMI handler: don't restart this counter */
-	ACCESS_ONCE(bts->started) = 0;
+	if (state == BTS_STATE_ACTIVE)
+		__bts_event_stop(event, BTS_STATE_STOPPED);
 
-	__bts_event_stop(event);
+	if (state != BTS_STATE_STOPPED)
+		buf = perf_get_aux(&bts->handle);
+
+	event->hw.state |= PERF_HES_STOPPED;
 
 	if (flags & PERF_EF_UPDATE) {
 		bts_update(bts);
@@ -296,6 +319,7 @@
 				bts->handle.head =
 					local_xchg(&buf->data_size,
 						   buf->nr_pages << PAGE_SHIFT);
+
 			perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
 					    !!local_xchg(&buf->lost, 0));
 		}
@@ -310,8 +334,20 @@
 void intel_bts_enable_local(void)
 {
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
+	int state = READ_ONCE(bts->state);
 
-	if (bts->handle.event && bts->started)
+	/*
+	 * Here we transition from INACTIVE to ACTIVE;
+	 * if we instead are STOPPED from the interrupt handler,
+	 * stay that way. Can't be ACTIVE here though.
+	 */
+	if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE))
+		return;
+
+	if (state == BTS_STATE_STOPPED)
+		return;
+
+	if (bts->handle.event)
 		__bts_event_start(bts->handle.event);
 }
 
@@ -319,8 +355,15 @@
 {
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 
+	/*
+	 * Here we transition from ACTIVE to INACTIVE;
+	 * do nothing for STOPPED or INACTIVE.
+	 */
+	if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE)
+		return;
+
 	if (bts->handle.event)
-		__bts_event_stop(bts->handle.event);
+		__bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE);
 }
 
 static int
@@ -335,8 +378,6 @@
 		return 0;
 
 	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
-	if (WARN_ON_ONCE(head != local_read(&buf->head)))
-		return -EINVAL;
 
 	phys = &buf->buf[buf->cur_buf];
 	space = phys->offset + phys->displacement + phys->size - head;
@@ -403,22 +444,37 @@
 
 int intel_bts_interrupt(void)
 {
+	struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
 	struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 	struct perf_event *event = bts->handle.event;
 	struct bts_buffer *buf;
 	s64 old_head;
-	int err;
+	int err = -ENOSPC, handled = 0;
 
-	if (!event || !bts->started)
-		return 0;
+	/*
+	 * The only surefire way of knowing if this NMI is ours is by checking
+	 * the write ptr against the PMI threshold.
+	 */
+	if (ds && (ds->bts_index >= ds->bts_interrupt_threshold))
+		handled = 1;
+
+	/*
+	 * this is wrapped in intel_bts_enable_local/intel_bts_disable_local,
+	 * so we can only be INACTIVE or STOPPED
+	 */
+	if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
+		return handled;
 
 	buf = perf_get_aux(&bts->handle);
+	if (!buf)
+		return handled;
+
 	/*
 	 * Skip snapshot counters: they don't use the interrupt, but
 	 * there's no other way of telling, because the pointer will
 	 * keep moving
 	 */
-	if (!buf || buf->snapshot)
+	if (buf->snapshot)
 		return 0;
 
 	old_head = local_read(&buf->head);
@@ -426,18 +482,27 @@
 
 	/* no new data */
 	if (old_head == local_read(&buf->head))
-		return 0;
+		return handled;
 
 	perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
 			    !!local_xchg(&buf->lost, 0));
 
 	buf = perf_aux_output_begin(&bts->handle, event);
-	if (!buf)
-		return 1;
+	if (buf)
+		err = bts_buffer_reset(buf, &bts->handle);
 
-	err = bts_buffer_reset(buf, &bts->handle);
-	if (err)
-		perf_aux_output_end(&bts->handle, 0, false);
+	if (err) {
+		WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
+
+		if (buf) {
+			/*
+			 * BTS_STATE_STOPPED should be visible before
+			 * cleared handle::event
+			 */
+			barrier();
+			perf_aux_output_end(&bts->handle, 0, false);
+		}
+	}
 
 	return 1;
 }
@@ -519,7 +584,8 @@
 	if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
 		return -ENODEV;
 
-	bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
+	bts_pmu.capabilities	= PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
+				  PERF_PMU_CAP_EXCLUSIVE;
 	bts_pmu.task_ctx_nr	= perf_sw_context;
 	bts_pmu.event_init	= bts_event_init;
 	bts_pmu.add		= bts_event_add;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 2cbde2f..a3a9eb8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1730,9 +1730,11 @@
  * disabled state if called consecutively.
  *
  * During consecutive calls, the same disable value will be written to related
- * registers, so the PMU state remains unchanged. hw.state in
- * intel_bts_disable_local will remain PERF_HES_STOPPED too in consecutive
- * calls.
+ * registers, so the PMU state remains unchanged.
+ *
+ * intel_bts events don't coexist with intel PMU's BTS events because of
+ * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
+ * disabled around intel PMU's event batching etc, only inside the PMI handler.
  */
 static void __intel_pmu_disable_all(void)
 {
@@ -1742,8 +1744,6 @@
 
 	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 		intel_pmu_disable_bts();
-	else
-		intel_bts_disable_local();
 
 	intel_pmu_pebs_disable_all();
 }
@@ -1771,8 +1771,7 @@
 			return;
 
 		intel_pmu_enable_bts(event->hw.config);
-	} else
-		intel_bts_enable_local();
+	}
 }
 
 static void intel_pmu_enable_all(int added)
@@ -1907,13 +1906,6 @@
 	cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
 	cpuc->intel_cp_status &= ~(1ull << hwc->idx);
 
-	/*
-	 * must disable before any actual event
-	 * because any event may be combined with LBR
-	 */
-	if (needs_branch_stack(event))
-		intel_pmu_lbr_disable(event);
-
 	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
 		intel_pmu_disable_fixed(hwc);
 		return;
@@ -1925,6 +1917,14 @@
 		intel_pmu_pebs_disable(event);
 }
 
+static void intel_pmu_del_event(struct perf_event *event)
+{
+	if (needs_branch_stack(event))
+		intel_pmu_lbr_del(event);
+	if (event->attr.precise_ip)
+		intel_pmu_pebs_del(event);
+}
+
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
 {
 	int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
@@ -1968,12 +1968,6 @@
 		intel_pmu_enable_bts(hwc->config);
 		return;
 	}
-	/*
-	 * must enabled before any actual event
-	 * because any event may be combined with LBR
-	 */
-	if (needs_branch_stack(event))
-		intel_pmu_lbr_enable(event);
 
 	if (event->attr.exclude_host)
 		cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
@@ -1994,6 +1988,14 @@
 	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
 }
 
+static void intel_pmu_add_event(struct perf_event *event)
+{
+	if (event->attr.precise_ip)
+		intel_pmu_pebs_add(event);
+	if (needs_branch_stack(event))
+		intel_pmu_lbr_add(event);
+}
+
 /*
  * Save and restart an expired event. Called by NMI contexts,
  * so it has to be careful about preempting normal event ops:
@@ -2073,6 +2075,7 @@
 	 */
 	if (!x86_pmu.late_ack)
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
+	intel_bts_disable_local();
 	__intel_pmu_disable_all();
 	handled = intel_pmu_drain_bts_buffer();
 	handled += intel_bts_interrupt();
@@ -2172,6 +2175,7 @@
 	/* Only restore PMU state when it's active. See x86_pmu_disable(). */
 	if (cpuc->enabled)
 		__intel_pmu_enable_all(0, true);
+	intel_bts_enable_local();
 
 	/*
 	 * Only unmask the NMI after the overflow counters
@@ -3290,6 +3294,8 @@
 	.enable_all		= intel_pmu_enable_all,
 	.enable			= intel_pmu_enable_event,
 	.disable		= intel_pmu_disable_event,
+	.add			= intel_pmu_add_event,
+	.del			= intel_pmu_del_event,
 	.hw_config		= intel_pmu_hw_config,
 	.schedule_events	= x86_schedule_events,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
diff --git a/arch/x86/events/intel/cqm.c b/arch/x86/events/intel/cqm.c
index 783c49d..8f82b02 100644
--- a/arch/x86/events/intel/cqm.c
+++ b/arch/x86/events/intel/cqm.c
@@ -458,6 +458,11 @@
 static void init_mbm_sample(u32 rmid, u32 evt_type);
 static void __intel_mbm_event_count(void *info);
 
+static bool is_cqm_event(int e)
+{
+	return (e == QOS_L3_OCCUP_EVENT_ID);
+}
+
 static bool is_mbm_event(int e)
 {
 	return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
@@ -1366,6 +1371,10 @@
 	     (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
 		return -EINVAL;
 
+	if ((is_cqm_event(event->attr.config) && !cqm_enabled) ||
+	    (is_mbm_event(event->attr.config) && !mbm_enabled))
+		return -EINVAL;
+
 	/* unsupported modes and filters */
 	if (event->attr.exclude_user   ||
 	    event->attr.exclude_kernel ||
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7ce9f3f..0319311 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -806,9 +806,65 @@
 	return &emptyconstraint;
 }
 
-static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+/*
+ * We need the sched_task callback even for per-cpu events when we use
+ * the large interrupt threshold, such that we can provide PID and TID
+ * to PEBS samples.
+ */
+static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
 {
-	return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+	return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
+}
+
+static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
+{
+	struct debug_store *ds = cpuc->ds;
+	u64 threshold;
+
+	if (cpuc->n_pebs == cpuc->n_large_pebs) {
+		threshold = ds->pebs_absolute_maximum -
+			x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+	} else {
+		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+	}
+
+	ds->pebs_interrupt_threshold = threshold;
+}
+
+static void
+pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
+{
+	/*
+	 * Make sure we get updated with the first PEBS
+	 * event. It will trigger also during removal, but
+	 * that does not hurt:
+	 */
+	bool update = cpuc->n_pebs == 1;
+
+	if (needed_cb != pebs_needs_sched_cb(cpuc)) {
+		if (!needed_cb)
+			perf_sched_cb_inc(pmu);
+		else
+			perf_sched_cb_dec(pmu);
+
+		update = true;
+	}
+
+	if (update)
+		pebs_update_threshold(cpuc);
+}
+
+void intel_pmu_pebs_add(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	bool needed_cb = pebs_needs_sched_cb(cpuc);
+
+	cpuc->n_pebs++;
+	if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+		cpuc->n_large_pebs++;
+
+	pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_enable(struct perf_event *event)
@@ -816,12 +872,9 @@
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct debug_store *ds = cpuc->ds;
-	bool first_pebs;
-	u64 threshold;
 
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
-	first_pebs = !pebs_is_enabled(cpuc);
 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
 	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -830,46 +883,34 @@
 		cpuc->pebs_enabled |= 1ULL << 63;
 
 	/*
-	 * When the event is constrained enough we can use a larger
-	 * threshold and run the event with less frequent PMI.
+	 * Use auto-reload if possible to save a MSR write in the PMI.
+	 * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
 	 */
-	if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
-		threshold = ds->pebs_absolute_maximum -
-			x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
-
-		if (first_pebs)
-			perf_sched_cb_inc(event->ctx->pmu);
-	} else {
-		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
-
-		/*
-		 * If not all events can use larger buffer,
-		 * roll back to threshold = 1
-		 */
-		if (!first_pebs &&
-		    (ds->pebs_interrupt_threshold > threshold))
-			perf_sched_cb_dec(event->ctx->pmu);
-	}
-
-	/* Use auto-reload if possible to save a MSR write in the PMI */
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
 		ds->pebs_event_reset[hwc->idx] =
 			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
 	}
+}
 
-	if (first_pebs || ds->pebs_interrupt_threshold > threshold)
-		ds->pebs_interrupt_threshold = threshold;
+void intel_pmu_pebs_del(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	bool needed_cb = pebs_needs_sched_cb(cpuc);
+
+	cpuc->n_pebs--;
+	if (hwc->flags & PERF_X86_EVENT_FREERUNNING)
+		cpuc->n_large_pebs--;
+
+	pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
-	struct debug_store *ds = cpuc->ds;
-	bool large_pebs = ds->pebs_interrupt_threshold >
-		ds->pebs_buffer_base + x86_pmu.pebs_record_size;
 
-	if (large_pebs)
+	if (cpuc->n_pebs == cpuc->n_large_pebs)
 		intel_pmu_drain_pebs_buffer();
 
 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
@@ -879,9 +920,6 @@
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled &= ~(1ULL << 63);
 
-	if (large_pebs && !pebs_is_enabled(cpuc))
-		perf_sched_cb_dec(event->ctx->pmu);
-
 	if (cpuc->enabled)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
@@ -1274,18 +1312,18 @@
 		struct pebs_record_nhm *p = at;
 		u64 pebs_status;
 
-		/* PEBS v3 has accurate status bits */
+		pebs_status = p->status & cpuc->pebs_enabled;
+		pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
+
+		/* PEBS v3 has more accurate status bits */
 		if (x86_pmu.intel_cap.pebs_format >= 3) {
-			for_each_set_bit(bit, (unsigned long *)&p->status,
-					 MAX_PEBS_EVENTS)
+			for_each_set_bit(bit, (unsigned long *)&pebs_status,
+					 x86_pmu.max_pebs_events)
 				counts[bit]++;
 
 			continue;
 		}
 
-		pebs_status = p->status & cpuc->pebs_enabled;
-		pebs_status &= (1ULL << x86_pmu.max_pebs_events) - 1;
-
 		/*
 		 * On some CPUs the PEBS status can be zero when PEBS is
 		 * racing with clearing of GLOBAL_STATUS.
@@ -1333,8 +1371,11 @@
 			continue;
 
 		event = cpuc->events[bit];
-		WARN_ON_ONCE(!event);
-		WARN_ON_ONCE(!event->attr.precise_ip);
+		if (WARN_ON_ONCE(!event))
+			continue;
+
+		if (WARN_ON_ONCE(!event->attr.precise_ip))
+			continue;
 
 		/* log dropped samples number */
 		if (error[bit])
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 707d358..fc6cf21 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,7 +380,6 @@
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
 	/*
@@ -390,31 +389,21 @@
 	 */
 	task_ctx = ctx ? ctx->task_ctx_data : NULL;
 	if (task_ctx) {
-		if (sched_in) {
+		if (sched_in)
 			__intel_pmu_lbr_restore(task_ctx);
-			cpuc->lbr_context = ctx;
-		} else {
+		else
 			__intel_pmu_lbr_save(task_ctx);
-		}
 		return;
 	}
 
 	/*
-	 * When sampling the branck stack in system-wide, it may be
-	 * necessary to flush the stack on context switch. This happens
-	 * when the branch stack does not tag its entries with the pid
-	 * of the current task. Otherwise it becomes impossible to
-	 * associate a branch entry with a task. This ambiguity is more
-	 * likely to appear when the branch stack supports priv level
-	 * filtering and the user sets it to monitor only at the user
-	 * level (which could be a useful measurement in system-wide
-	 * mode). In that case, the risk is high of having a branch
-	 * stack with branch from multiple tasks.
- 	 */
-	if (sched_in) {
+	 * Since a context switch can flip the address space and LBR entries
+	 * are not tagged with an identifier, we need to wipe the LBR, even for
+	 * per-cpu events. You simply cannot resolve the branches from the old
+	 * address space.
+	 */
+	if (sched_in)
 		intel_pmu_lbr_reset();
-		cpuc->lbr_context = ctx;
-	}
 }
 
 static inline bool branch_user_callstack(unsigned br_sel)
@@ -422,7 +411,7 @@
 	return (br_sel & X86_BR_USER) && (br_sel & X86_BR_CALL_STACK);
 }
 
-void intel_pmu_lbr_enable(struct perf_event *event)
+void intel_pmu_lbr_add(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
@@ -430,27 +419,38 @@
 	if (!x86_pmu.lbr_nr)
 		return;
 
-	/*
-	 * Reset the LBR stack if we changed task context to
-	 * avoid data leaks.
-	 */
-	if (event->ctx->task && cpuc->lbr_context != event->ctx) {
-		intel_pmu_lbr_reset();
-		cpuc->lbr_context = event->ctx;
-	}
 	cpuc->br_sel = event->hw.branch_reg.reg;
 
-	if (branch_user_callstack(cpuc->br_sel) && event->ctx &&
-					event->ctx->task_ctx_data) {
+	if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
 		task_ctx = event->ctx->task_ctx_data;
 		task_ctx->lbr_callstack_users++;
 	}
 
-	cpuc->lbr_users++;
+	/*
+	 * Request pmu::sched_task() callback, which will fire inside the
+	 * regular perf event scheduling, so that call will:
+	 *
+	 *  - restore or wipe; when LBR-callstack,
+	 *  - wipe; otherwise,
+	 *
+	 * when this is from __perf_event_task_sched_in().
+	 *
+	 * However, if this is from perf_install_in_context(), no such callback
+	 * will follow and we'll need to reset the LBR here if this is the
+	 * first LBR event.
+	 *
+	 * The problem is, we cannot tell these cases apart... but we can
+	 * exclude the biggest chunk of cases by looking at
+	 * event->total_time_running. An event that has accrued runtime cannot
+	 * be 'new'. Conversely, a new event can get installed through the
+	 * context switch path for the first time.
+	 */
 	perf_sched_cb_inc(event->ctx->pmu);
+	if (!cpuc->lbr_users++ && !event->total_time_running)
+		intel_pmu_lbr_reset();
 }
 
-void intel_pmu_lbr_disable(struct perf_event *event)
+void intel_pmu_lbr_del(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
@@ -467,12 +467,6 @@
 	cpuc->lbr_users--;
 	WARN_ON_ONCE(cpuc->lbr_users < 0);
 	perf_sched_cb_dec(event->ctx->pmu);
-
-	if (cpuc->enabled && !cpuc->lbr_users) {
-		__intel_pmu_lbr_disable();
-		/* avoid stale pointer */
-		cpuc->lbr_context = NULL;
-	}
 }
 
 void intel_pmu_lbr_enable_all(bool pmi)
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 04bb5fb..c5047b8 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -69,6 +69,8 @@
 	PT_CAP(psb_cyc,			0, CR_EBX, BIT(1)),
 	PT_CAP(ip_filtering,		0, CR_EBX, BIT(2)),
 	PT_CAP(mtc,			0, CR_EBX, BIT(3)),
+	PT_CAP(ptwrite,			0, CR_EBX, BIT(4)),
+	PT_CAP(power_event_trace,	0, CR_EBX, BIT(5)),
 	PT_CAP(topa_output,		0, CR_ECX, BIT(0)),
 	PT_CAP(topa_multiple_entries,	0, CR_ECX, BIT(1)),
 	PT_CAP(single_range_output,	0, CR_ECX, BIT(2)),
@@ -259,10 +261,16 @@
 #define RTIT_CTL_MTC	(RTIT_CTL_MTC_EN	| \
 			 RTIT_CTL_MTC_RANGE)
 
+#define RTIT_CTL_PTW	(RTIT_CTL_PTW_EN	| \
+			 RTIT_CTL_FUP_ON_PTW)
+
 #define PT_CONFIG_MASK (RTIT_CTL_TSC_EN		| \
 			RTIT_CTL_DISRETC	| \
 			RTIT_CTL_CYC_PSB	| \
-			RTIT_CTL_MTC)
+			RTIT_CTL_MTC		| \
+			RTIT_CTL_PWR_EVT_EN	| \
+			RTIT_CTL_FUP_ON_PTW	| \
+			RTIT_CTL_PTW_EN)
 
 static bool pt_event_valid(struct perf_event *event)
 {
@@ -311,6 +319,20 @@
 			return false;
 	}
 
+	if (config & RTIT_CTL_PWR_EVT_EN &&
+	    !pt_cap_get(PT_CAP_power_event_trace))
+		return false;
+
+	if (config & RTIT_CTL_PTW) {
+		if (!pt_cap_get(PT_CAP_ptwrite))
+			return false;
+
+		/* FUPonPTW without PTW doesn't make sense */
+		if ((config & RTIT_CTL_FUP_ON_PTW) &&
+		    !(config & RTIT_CTL_PTW_EN))
+			return false;
+	}
+
 	return true;
 }
 
@@ -1074,6 +1096,11 @@
 	event->hw.addr_filters = NULL;
 }
 
+static inline bool valid_kernel_ip(unsigned long ip)
+{
+	return virt_addr_valid(ip) && kernel_ip(ip);
+}
+
 static int pt_event_addr_filters_validate(struct list_head *filters)
 {
 	struct perf_addr_filter *filter;
@@ -1081,11 +1108,16 @@
 
 	list_for_each_entry(filter, filters, entry) {
 		/* PT doesn't support single address triggers */
-		if (!filter->range)
+		if (!filter->range || !filter->size)
 			return -EOPNOTSUPP;
 
-		if (!filter->inode && !kernel_ip(filter->offset))
-			return -EINVAL;
+		if (!filter->inode) {
+			if (!valid_kernel_ip(filter->offset))
+				return -EINVAL;
+
+			if (!valid_kernel_ip(filter->offset + filter->size))
+				return -EINVAL;
+		}
 
 		if (++range > pt_cap_get(PT_CAP_num_address_ranges))
 			return -EOPNOTSUPP;
@@ -1111,7 +1143,7 @@
 		} else {
 			/* apply the offset */
 			msr_a = filter->offset + offs[range];
-			msr_b = filter->size + msr_a;
+			msr_b = filter->size + msr_a - 1;
 		}
 
 		filters->filter[range].msr_a  = msr_a;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index efffa4a..53473c2 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -26,11 +26,14 @@
 #define RTIT_CTL_CYCLEACC		BIT(1)
 #define RTIT_CTL_OS			BIT(2)
 #define RTIT_CTL_USR			BIT(3)
+#define RTIT_CTL_PWR_EVT_EN		BIT(4)
+#define RTIT_CTL_FUP_ON_PTW		BIT(5)
 #define RTIT_CTL_CR3EN			BIT(7)
 #define RTIT_CTL_TOPA			BIT(8)
 #define RTIT_CTL_MTC_EN			BIT(9)
 #define RTIT_CTL_TSC_EN			BIT(10)
 #define RTIT_CTL_DISRETC		BIT(11)
+#define RTIT_CTL_PTW_EN			BIT(12)
 #define RTIT_CTL_BRANCH_EN		BIT(13)
 #define RTIT_CTL_MTC_RANGE_OFFSET	14
 #define RTIT_CTL_MTC_RANGE		(0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
@@ -91,6 +94,8 @@
 	PT_CAP_psb_cyc,
 	PT_CAP_ip_filtering,
 	PT_CAP_mtc,
+	PT_CAP_ptwrite,
+	PT_CAP_power_event_trace,
 	PT_CAP_topa_output,
 	PT_CAP_topa_multiple_entries,
 	PT_CAP_single_range_output,
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 2886593..b0f0e83 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -357,6 +357,8 @@
 	if (event->cpu < 0)
 		return -EINVAL;
 
+	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
 	/*
 	 * check event is known (determines counter)
 	 */
@@ -765,6 +767,8 @@
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE,  skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init),
 	X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,	 hsx_rapl_init),
+
+	X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 463dc7a..d9844cc 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -664,6 +664,8 @@
 	event->cpu = box->cpu;
 	event->pmu_private = box;
 
+	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
 	event->hw.idx = -1;
 	event->hw.last_tag = ~0ULL;
 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
@@ -683,7 +685,8 @@
 		/* fixed counters have event field hardcoded to zero */
 		hwc->config = 0ULL;
 	} else {
-		hwc->config = event->attr.config & pmu->type->event_mask;
+		hwc->config = event->attr.config &
+			      (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32));
 		if (pmu->type->ops->hw_config) {
 			ret = pmu->type->ops->hw_config(box, event);
 			if (ret)
@@ -1321,6 +1324,11 @@
 	.pci_init = skl_uncore_pci_init,
 };
 
+static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
+	.cpu_init = skx_uncore_cpu_init,
+	.pci_init = skx_uncore_pci_init,
+};
+
 static const struct x86_cpu_id intel_uncore_match[] __initconst = {
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP,	  nhm_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM,	  nhm_uncore_init),
@@ -1343,6 +1351,7 @@
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL,	  knl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init),
 	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init),
+	X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X,      skx_uncore_init),
 	{},
 };
 
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 78b9c23..ad986c1 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -44,6 +44,7 @@
 	unsigned perf_ctr;
 	unsigned event_ctl;
 	unsigned event_mask;
+	unsigned event_mask_ext;
 	unsigned fixed_ctr;
 	unsigned fixed_ctl;
 	unsigned box_ctl;
@@ -120,6 +121,7 @@
 };
 
 #define UNCORE_BOX_FLAG_INITIATED	0
+#define UNCORE_BOX_FLAG_CTL_OFFS8	1 /* event config registers are 8-byte apart */
 
 struct uncore_event_desc {
 	struct kobj_attribute attr;
@@ -172,6 +174,9 @@
 static inline
 unsigned uncore_pci_event_ctl(struct intel_uncore_box *box, int idx)
 {
+	if (test_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags))
+		return idx * 8 + box->pmu->type->event_ctl;
+
 	return idx * 4 + box->pmu->type->event_ctl;
 }
 
@@ -377,6 +382,8 @@
 void bdx_uncore_cpu_init(void);
 int knl_uncore_pci_init(void);
 void knl_uncore_cpu_init(void);
+int skx_uncore_pci_init(void);
+void skx_uncore_cpu_init(void);
 
 /* perf_event_intel_uncore_nhmex.c */
 void nhmex_uncore_cpu_init(void);
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 97a69db..5f845ee 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -100,6 +100,12 @@
 	}
 }
 
+static void snb_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(SNB_UNC_PERF_GLOBAL_CTL,
+		SNB_UNC_GLOBAL_CTL_EN | SNB_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void snb_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->pmu_idx == 0)
@@ -127,6 +133,7 @@
 
 static struct intel_uncore_ops snb_uncore_msr_ops = {
 	.init_box	= snb_uncore_msr_init_box,
+	.enable_box	= snb_uncore_msr_enable_box,
 	.exit_box	= snb_uncore_msr_exit_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,
@@ -192,6 +199,12 @@
 	}
 }
 
+static void skl_uncore_msr_enable_box(struct intel_uncore_box *box)
+{
+	wrmsrl(SKL_UNC_PERF_GLOBAL_CTL,
+		SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL);
+}
+
 static void skl_uncore_msr_exit_box(struct intel_uncore_box *box)
 {
 	if (box->pmu->pmu_idx == 0)
@@ -200,6 +213,7 @@
 
 static struct intel_uncore_ops skl_uncore_msr_ops = {
 	.init_box	= skl_uncore_msr_init_box,
+	.enable_box	= skl_uncore_msr_enable_box,
 	.exit_box	= skl_uncore_msr_exit_box,
 	.disable_event	= snb_uncore_msr_disable_event,
 	.enable_event	= snb_uncore_msr_enable_event,
@@ -374,6 +388,8 @@
 	event->cpu = box->cpu;
 	event->pmu_private = box;
 
+	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
 	event->hw.idx = -1;
 	event->hw.last_tag = ~0ULL;
 	event->hw.extra_reg.idx = EXTRA_REG_NONE;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 824e540..2724277 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1,6 +1,10 @@
 /* SandyBridge-EP/IvyTown uncore support */
 #include "uncore.h"
 
+/* SNB-EP pci bus to socket mapping */
+#define SNBEP_CPUNODEID			0x40
+#define SNBEP_GIDNIDMAP			0x54
+
 /* SNB-EP Box level control */
 #define SNBEP_PMON_BOX_CTL_RST_CTRL	(1 << 0)
 #define SNBEP_PMON_BOX_CTL_RST_CTRS	(1 << 1)
@@ -264,15 +268,72 @@
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_INVERT | \
 				 SNBEP_PCU_MSR_PMON_CTL_OCC_EDGE_DET)
 
+/* SKX pci bus to socket mapping */
+#define SKX_CPUNODEID			0xc0
+#define SKX_GIDNIDMAP			0xd4
+
+/* SKX CHA */
+#define SKX_CHA_MSR_PMON_BOX_FILTER_TID		(0x1ffULL << 0)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_LINK	(0xfULL << 9)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_STATE	(0x3ffULL << 17)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_REM		(0x1ULL << 32)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_LOC		(0x1ULL << 33)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_ALL_OPC	(0x1ULL << 35)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NM		(0x1ULL << 36)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NOT_NM	(0x1ULL << 37)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC0	(0x3ffULL << 41)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_OPC1	(0x3ffULL << 51)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_C6		(0x1ULL << 61)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_NC		(0x1ULL << 62)
+#define SKX_CHA_MSR_PMON_BOX_FILTER_ISOC	(0x1ULL << 63)
+
+/* SKX IIO */
+#define SKX_IIO0_MSR_PMON_CTL0		0xa48
+#define SKX_IIO0_MSR_PMON_CTR0		0xa41
+#define SKX_IIO0_MSR_PMON_BOX_CTL	0xa40
+#define SKX_IIO_MSR_OFFSET		0x20
+
+#define SKX_PMON_CTL_TRESH_MASK		(0xff << 24)
+#define SKX_PMON_CTL_TRESH_MASK_EXT	(0xf)
+#define SKX_PMON_CTL_CH_MASK		(0xff << 4)
+#define SKX_PMON_CTL_FC_MASK		(0x7 << 12)
+#define SKX_IIO_PMON_RAW_EVENT_MASK	(SNBEP_PMON_CTL_EV_SEL_MASK | \
+					 SNBEP_PMON_CTL_UMASK_MASK | \
+					 SNBEP_PMON_CTL_EDGE_DET | \
+					 SNBEP_PMON_CTL_INVERT | \
+					 SKX_PMON_CTL_TRESH_MASK)
+#define SKX_IIO_PMON_RAW_EVENT_MASK_EXT	(SKX_PMON_CTL_TRESH_MASK_EXT | \
+					 SKX_PMON_CTL_CH_MASK | \
+					 SKX_PMON_CTL_FC_MASK)
+
+/* SKX IRP */
+#define SKX_IRP0_MSR_PMON_CTL0		0xa5b
+#define SKX_IRP0_MSR_PMON_CTR0		0xa59
+#define SKX_IRP0_MSR_PMON_BOX_CTL	0xa58
+#define SKX_IRP_MSR_OFFSET		0x20
+
+/* SKX UPI */
+#define SKX_UPI_PCI_PMON_CTL0		0x350
+#define SKX_UPI_PCI_PMON_CTR0		0x318
+#define SKX_UPI_PCI_PMON_BOX_CTL	0x378
+#define SKX_PMON_CTL_UMASK_EXT		0xff
+
+/* SKX M2M */
+#define SKX_M2M_PCI_PMON_CTL0		0x228
+#define SKX_M2M_PCI_PMON_CTR0		0x200
+#define SKX_M2M_PCI_PMON_BOX_CTL	0x258
+
 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
 DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
 DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
 DEFINE_UNCORE_FORMAT_ATTR(use_occ_ctr, use_occ_ctr, "config:7");
 DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
+DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-39");
 DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
 DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
 DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
 DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23");
+DEFINE_UNCORE_FORMAT_ATTR(thresh9, thresh, "config:24-35");
 DEFINE_UNCORE_FORMAT_ATTR(thresh8, thresh, "config:24-31");
 DEFINE_UNCORE_FORMAT_ATTR(thresh6, thresh, "config:24-29");
 DEFINE_UNCORE_FORMAT_ATTR(thresh5, thresh, "config:24-28");
@@ -280,6 +341,8 @@
 DEFINE_UNCORE_FORMAT_ATTR(occ_invert, occ_invert, "config:30");
 DEFINE_UNCORE_FORMAT_ATTR(occ_edge, occ_edge, "config:14-51");
 DEFINE_UNCORE_FORMAT_ATTR(occ_edge_det, occ_edge_det, "config:31");
+DEFINE_UNCORE_FORMAT_ATTR(ch_mask, ch_mask, "config:36-43");
+DEFINE_UNCORE_FORMAT_ATTR(fc_mask, fc_mask, "config:44-46");
 DEFINE_UNCORE_FORMAT_ATTR(filter_tid, filter_tid, "config1:0-4");
 DEFINE_UNCORE_FORMAT_ATTR(filter_tid2, filter_tid, "config1:0");
 DEFINE_UNCORE_FORMAT_ATTR(filter_tid3, filter_tid, "config1:0-5");
@@ -288,18 +351,26 @@
 DEFINE_UNCORE_FORMAT_ATTR(filter_link, filter_link, "config1:5-8");
 DEFINE_UNCORE_FORMAT_ATTR(filter_link2, filter_link, "config1:6-8");
 DEFINE_UNCORE_FORMAT_ATTR(filter_link3, filter_link, "config1:12");
+DEFINE_UNCORE_FORMAT_ATTR(filter_link4, filter_link, "config1:9-12");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nid, filter_nid, "config1:10-17");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nid2, filter_nid, "config1:32-47");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state, filter_state, "config1:18-22");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state2, filter_state, "config1:17-22");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state3, filter_state, "config1:17-23");
 DEFINE_UNCORE_FORMAT_ATTR(filter_state4, filter_state, "config1:18-20");
+DEFINE_UNCORE_FORMAT_ATTR(filter_state5, filter_state, "config1:17-26");
+DEFINE_UNCORE_FORMAT_ATTR(filter_rem, filter_rem, "config1:32");
+DEFINE_UNCORE_FORMAT_ATTR(filter_loc, filter_loc, "config1:33");
+DEFINE_UNCORE_FORMAT_ATTR(filter_nm, filter_nm, "config1:36");
+DEFINE_UNCORE_FORMAT_ATTR(filter_not_nm, filter_not_nm, "config1:37");
 DEFINE_UNCORE_FORMAT_ATTR(filter_local, filter_local, "config1:33");
 DEFINE_UNCORE_FORMAT_ATTR(filter_all_op, filter_all_op, "config1:35");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nnm, filter_nnm, "config1:37");
 DEFINE_UNCORE_FORMAT_ATTR(filter_opc, filter_opc, "config1:23-31");
 DEFINE_UNCORE_FORMAT_ATTR(filter_opc2, filter_opc, "config1:52-60");
 DEFINE_UNCORE_FORMAT_ATTR(filter_opc3, filter_opc, "config1:41-60");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc_0, filter_opc0, "config1:41-50");
+DEFINE_UNCORE_FORMAT_ATTR(filter_opc_1, filter_opc1, "config1:51-60");
 DEFINE_UNCORE_FORMAT_ATTR(filter_nc, filter_nc, "config1:62");
 DEFINE_UNCORE_FORMAT_ATTR(filter_c6, filter_c6, "config1:61");
 DEFINE_UNCORE_FORMAT_ATTR(filter_isoc, filter_isoc, "config1:63");
@@ -1153,7 +1224,7 @@
 /*
  * build pci bus to socket mapping
  */
-static int snbep_pci2phy_map_init(int devid)
+static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool reverse)
 {
 	struct pci_dev *ubox_dev = NULL;
 	int i, bus, nodeid, segment;
@@ -1168,12 +1239,12 @@
 			break;
 		bus = ubox_dev->bus->number;
 		/* get the Node ID of the local register */
-		err = pci_read_config_dword(ubox_dev, 0x40, &config);
+		err = pci_read_config_dword(ubox_dev, nodeid_loc, &config);
 		if (err)
 			break;
 		nodeid = config;
 		/* get the Node ID mapping */
-		err = pci_read_config_dword(ubox_dev, 0x54, &config);
+		err = pci_read_config_dword(ubox_dev, idmap_loc, &config);
 		if (err)
 			break;
 
@@ -1207,11 +1278,20 @@
 		raw_spin_lock(&pci2phy_map_lock);
 		list_for_each_entry(map, &pci2phy_map_head, list) {
 			i = -1;
-			for (bus = 255; bus >= 0; bus--) {
-				if (map->pbus_to_physid[bus] >= 0)
-					i = map->pbus_to_physid[bus];
-				else
-					map->pbus_to_physid[bus] = i;
+			if (reverse) {
+				for (bus = 255; bus >= 0; bus--) {
+					if (map->pbus_to_physid[bus] >= 0)
+						i = map->pbus_to_physid[bus];
+					else
+						map->pbus_to_physid[bus] = i;
+				}
+			} else {
+				for (bus = 0; bus <= 255; bus++) {
+					if (map->pbus_to_physid[bus] >= 0)
+						i = map->pbus_to_physid[bus];
+					else
+						map->pbus_to_physid[bus] = i;
+				}
 			}
 		}
 		raw_spin_unlock(&pci2phy_map_lock);
@@ -1224,7 +1304,7 @@
 
 int snbep_uncore_pci_init(void)
 {
-	int ret = snbep_pci2phy_map_init(0x3ce0);
+	int ret = snbep_pci2phy_map_init(0x3ce0, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
 	if (ret)
 		return ret;
 	uncore_pci_uncores = snbep_pci_uncores;
@@ -1788,7 +1868,7 @@
 
 int ivbep_uncore_pci_init(void)
 {
-	int ret = snbep_pci2phy_map_init(0x0e1e);
+	int ret = snbep_pci2phy_map_init(0x0e1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
 	if (ret)
 		return ret;
 	uncore_pci_uncores = ivbep_pci_uncores;
@@ -2626,7 +2706,7 @@
 
 static struct intel_uncore_type hswep_uncore_ha = {
 	.name		= "ha",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 2,
 	.perf_ctr_bits	= 48,
 	SNBEP_UNCORE_PCI_COMMON_INIT(),
@@ -2645,7 +2725,7 @@
 
 static struct intel_uncore_type hswep_uncore_imc = {
 	.name		= "imc",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 8,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,
@@ -2691,7 +2771,7 @@
 
 static struct intel_uncore_type hswep_uncore_qpi = {
 	.name			= "qpi",
-	.num_counters		= 5,
+	.num_counters		= 4,
 	.num_boxes		= 3,
 	.perf_ctr_bits		= 48,
 	.perf_ctr		= SNBEP_PCI_PMON_CTR0,
@@ -2773,7 +2853,7 @@
 
 static struct intel_uncore_type hswep_uncore_r3qpi = {
 	.name		= "r3qpi",
-	.num_counters   = 4,
+	.num_counters   = 3,
 	.num_boxes	= 3,
 	.perf_ctr_bits	= 44,
 	.constraints	= hswep_uncore_r3qpi_constraints,
@@ -2897,7 +2977,7 @@
 
 int hswep_uncore_pci_init(void)
 {
-	int ret = snbep_pci2phy_map_init(0x2f1e);
+	int ret = snbep_pci2phy_map_init(0x2f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
 	if (ret)
 		return ret;
 	uncore_pci_uncores = hswep_pci_uncores;
@@ -2972,7 +3052,7 @@
 
 static struct intel_uncore_type bdx_uncore_imc = {
 	.name		= "imc",
-	.num_counters   = 5,
+	.num_counters   = 4,
 	.num_boxes	= 8,
 	.perf_ctr_bits	= 48,
 	.fixed_ctr_bits	= 48,
@@ -3186,7 +3266,7 @@
 
 int bdx_uncore_pci_init(void)
 {
-	int ret = snbep_pci2phy_map_init(0x6f1e);
+	int ret = snbep_pci2phy_map_init(0x6f1e, SNBEP_CPUNODEID, SNBEP_GIDNIDMAP, true);
 
 	if (ret)
 		return ret;
@@ -3196,3 +3276,525 @@
 }
 
 /* end of BDX uncore support */
+
+/* SKX uncore support */
+
+static struct intel_uncore_type skx_uncore_ubox = {
+	.name			= "ubox",
+	.num_counters		= 2,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.fixed_ctr_bits		= 48,
+	.perf_ctr		= HSWEP_U_MSR_PMON_CTR0,
+	.event_ctl		= HSWEP_U_MSR_PMON_CTL0,
+	.event_mask		= SNBEP_U_MSR_PMON_RAW_EVENT_MASK,
+	.fixed_ctr		= HSWEP_U_MSR_PMON_UCLK_FIXED_CTR,
+	.fixed_ctl		= HSWEP_U_MSR_PMON_UCLK_FIXED_CTL,
+	.ops			= &ivbep_uncore_msr_ops,
+	.format_group		= &ivbep_uncore_ubox_format_group,
+};
+
+static struct attribute *skx_uncore_cha_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_tid_en.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	&format_attr_filter_tid4.attr,
+	&format_attr_filter_link4.attr,
+	&format_attr_filter_state5.attr,
+	&format_attr_filter_rem.attr,
+	&format_attr_filter_loc.attr,
+	&format_attr_filter_nm.attr,
+	&format_attr_filter_all_op.attr,
+	&format_attr_filter_not_nm.attr,
+	&format_attr_filter_opc_0.attr,
+	&format_attr_filter_opc_1.attr,
+	&format_attr_filter_nc.attr,
+	&format_attr_filter_c6.attr,
+	&format_attr_filter_isoc.attr,
+	NULL,
+};
+
+static struct attribute_group skx_uncore_chabox_format_group = {
+	.name = "format",
+	.attrs = skx_uncore_cha_formats_attr,
+};
+
+static struct event_constraint skx_uncore_chabox_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x11, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x36, 0x1),
+	EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg skx_uncore_cha_extra_regs[] = {
+	SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x2134, 0xffff, 0x4),
+	SNBEP_CBO_EVENT_EXTRA_REG(0x8134, 0xffff, 0x4),
+};
+
+static u64 skx_cha_filter_mask(int fields)
+{
+	u64 mask = 0;
+
+	if (fields & 0x1)
+		mask |= SKX_CHA_MSR_PMON_BOX_FILTER_TID;
+	if (fields & 0x2)
+		mask |= SKX_CHA_MSR_PMON_BOX_FILTER_LINK;
+	if (fields & 0x4)
+		mask |= SKX_CHA_MSR_PMON_BOX_FILTER_STATE;
+	return mask;
+}
+
+static struct event_constraint *
+skx_cha_get_constraint(struct intel_uncore_box *box, struct perf_event *event)
+{
+	return __snbep_cbox_get_constraint(box, event, skx_cha_filter_mask);
+}
+
+static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
+{
+	struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
+	struct extra_reg *er;
+	int idx = 0;
+
+	for (er = skx_uncore_cha_extra_regs; er->msr; er++) {
+		if (er->event != (event->hw.config & er->config_mask))
+			continue;
+		idx |= er->idx;
+	}
+
+	if (idx) {
+		reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 +
+			    HSWEP_CBO_MSR_OFFSET * box->pmu->pmu_idx;
+		reg1->config = event->attr.config1 & skx_cha_filter_mask(idx);
+		reg1->idx = idx;
+	}
+	return 0;
+}
+
+static struct intel_uncore_ops skx_uncore_chabox_ops = {
+	/* There is no frz_en for chabox ctl */
+	.init_box		= ivbep_uncore_msr_init_box,
+	.disable_box		= snbep_uncore_msr_disable_box,
+	.enable_box		= snbep_uncore_msr_enable_box,
+	.disable_event		= snbep_uncore_msr_disable_event,
+	.enable_event		= hswep_cbox_enable_event,
+	.read_counter		= uncore_msr_read_counter,
+	.hw_config		= skx_cha_hw_config,
+	.get_constraint		= skx_cha_get_constraint,
+	.put_constraint		= snbep_cbox_put_constraint,
+};
+
+static struct intel_uncore_type skx_uncore_chabox = {
+	.name			= "cha",
+	.num_counters		= 4,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= HSWEP_C0_MSR_PMON_CTL0,
+	.perf_ctr		= HSWEP_C0_MSR_PMON_CTR0,
+	.event_mask		= HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl		= HSWEP_C0_MSR_PMON_BOX_CTL,
+	.msr_offset		= HSWEP_CBO_MSR_OFFSET,
+	.num_shared_regs	= 1,
+	.constraints		= skx_uncore_chabox_constraints,
+	.ops			= &skx_uncore_chabox_ops,
+	.format_group		= &skx_uncore_chabox_format_group,
+};
+
+static struct attribute *skx_uncore_iio_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh9.attr,
+	&format_attr_ch_mask.attr,
+	&format_attr_fc_mask.attr,
+	NULL,
+};
+
+static struct attribute_group skx_uncore_iio_format_group = {
+	.name = "format",
+	.attrs = skx_uncore_iio_formats_attr,
+};
+
+static struct event_constraint skx_uncore_iio_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
+	UNCORE_EVENT_CONSTRAINT(0x88, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0x95, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
+	UNCORE_EVENT_CONSTRAINT(0xd4, 0xc),
+	EVENT_CONSTRAINT_END
+};
+
+static void skx_iio_enable_event(struct intel_uncore_box *box,
+				 struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	wrmsrl(hwc->config_base, hwc->config | SNBEP_PMON_CTL_EN);
+}
+
+static struct intel_uncore_ops skx_uncore_iio_ops = {
+	.init_box		= ivbep_uncore_msr_init_box,
+	.disable_box		= snbep_uncore_msr_disable_box,
+	.enable_box		= snbep_uncore_msr_enable_box,
+	.disable_event		= snbep_uncore_msr_disable_event,
+	.enable_event		= skx_iio_enable_event,
+	.read_counter		= uncore_msr_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_iio = {
+	.name			= "iio",
+	.num_counters		= 4,
+	.num_boxes		= 5,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= SKX_IIO0_MSR_PMON_CTL0,
+	.perf_ctr		= SKX_IIO0_MSR_PMON_CTR0,
+	.event_mask		= SKX_IIO_PMON_RAW_EVENT_MASK,
+	.event_mask_ext		= SKX_IIO_PMON_RAW_EVENT_MASK_EXT,
+	.box_ctl		= SKX_IIO0_MSR_PMON_BOX_CTL,
+	.msr_offset		= SKX_IIO_MSR_OFFSET,
+	.constraints		= skx_uncore_iio_constraints,
+	.ops			= &skx_uncore_iio_ops,
+	.format_group		= &skx_uncore_iio_format_group,
+};
+
+static struct attribute *skx_uncore_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	NULL,
+};
+
+static struct attribute_group skx_uncore_format_group = {
+	.name = "format",
+	.attrs = skx_uncore_formats_attr,
+};
+
+static struct intel_uncore_type skx_uncore_irp = {
+	.name			= "irp",
+	.num_counters		= 2,
+	.num_boxes		= 5,
+	.perf_ctr_bits		= 48,
+	.event_ctl		= SKX_IRP0_MSR_PMON_CTL0,
+	.perf_ctr		= SKX_IRP0_MSR_PMON_CTR0,
+	.event_mask		= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl		= SKX_IRP0_MSR_PMON_BOX_CTL,
+	.msr_offset		= SKX_IRP_MSR_OFFSET,
+	.ops			= &skx_uncore_iio_ops,
+	.format_group		= &skx_uncore_format_group,
+};
+
+static struct intel_uncore_ops skx_uncore_pcu_ops = {
+	IVBEP_UNCORE_MSR_OPS_COMMON_INIT(),
+	.hw_config		= hswep_pcu_hw_config,
+	.get_constraint		= snbep_pcu_get_constraint,
+	.put_constraint		= snbep_pcu_put_constraint,
+};
+
+static struct intel_uncore_type skx_uncore_pcu = {
+	.name			= "pcu",
+	.num_counters		= 4,
+	.num_boxes		= 1,
+	.perf_ctr_bits		= 48,
+	.perf_ctr		= HSWEP_PCU_MSR_PMON_CTR0,
+	.event_ctl		= HSWEP_PCU_MSR_PMON_CTL0,
+	.event_mask		= SNBEP_PCU_MSR_PMON_RAW_EVENT_MASK,
+	.box_ctl		= HSWEP_PCU_MSR_PMON_BOX_CTL,
+	.num_shared_regs	= 1,
+	.ops			= &skx_uncore_pcu_ops,
+	.format_group		= &snbep_uncore_pcu_format_group,
+};
+
+static struct intel_uncore_type *skx_msr_uncores[] = {
+	&skx_uncore_ubox,
+	&skx_uncore_chabox,
+	&skx_uncore_iio,
+	&skx_uncore_irp,
+	&skx_uncore_pcu,
+	NULL,
+};
+
+static int skx_count_chabox(void)
+{
+	struct pci_dev *chabox_dev = NULL;
+	int bus, count = 0;
+
+	while (1) {
+		chabox_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x208d, chabox_dev);
+		if (!chabox_dev)
+			break;
+		if (count == 0)
+			bus = chabox_dev->bus->number;
+		if (bus != chabox_dev->bus->number)
+			break;
+		count++;
+	}
+
+	pci_dev_put(chabox_dev);
+	return count;
+}
+
+void skx_uncore_cpu_init(void)
+{
+	skx_uncore_chabox.num_boxes = skx_count_chabox();
+	uncore_msr_uncores = skx_msr_uncores;
+}
+
+static struct intel_uncore_type skx_uncore_imc = {
+	.name		= "imc",
+	.num_counters   = 4,
+	.num_boxes	= 6,
+	.perf_ctr_bits	= 48,
+	.fixed_ctr_bits	= 48,
+	.fixed_ctr	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTR,
+	.fixed_ctl	= SNBEP_MC_CHy_PCI_PMON_FIXED_CTL,
+	.event_descs	= hswep_uncore_imc_events,
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
+	.ops		= &ivbep_uncore_pci_ops,
+	.format_group	= &skx_uncore_format_group,
+};
+
+static struct attribute *skx_upi_uncore_formats_attr[] = {
+	&format_attr_event_ext.attr,
+	&format_attr_umask_ext.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_thresh8.attr,
+	NULL,
+};
+
+static struct attribute_group skx_upi_uncore_format_group = {
+	.name = "format",
+	.attrs = skx_upi_uncore_formats_attr,
+};
+
+static void skx_upi_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+
+	__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+	pci_write_config_dword(pdev, SKX_UPI_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static struct intel_uncore_ops skx_upi_uncore_pci_ops = {
+	.init_box	= skx_upi_uncore_pci_init_box,
+	.disable_box	= snbep_uncore_pci_disable_box,
+	.enable_box	= snbep_uncore_pci_enable_box,
+	.disable_event	= snbep_uncore_pci_disable_event,
+	.enable_event	= snbep_uncore_pci_enable_event,
+	.read_counter	= snbep_uncore_pci_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_upi = {
+	.name		= "upi",
+	.num_counters   = 4,
+	.num_boxes	= 3,
+	.perf_ctr_bits	= 48,
+	.perf_ctr	= SKX_UPI_PCI_PMON_CTR0,
+	.event_ctl	= SKX_UPI_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_QPI_PCI_PMON_RAW_EVENT_MASK,
+	.event_mask_ext = SKX_PMON_CTL_UMASK_EXT,
+	.box_ctl	= SKX_UPI_PCI_PMON_BOX_CTL,
+	.ops		= &skx_upi_uncore_pci_ops,
+	.format_group	= &skx_upi_uncore_format_group,
+};
+
+static void skx_m2m_uncore_pci_init_box(struct intel_uncore_box *box)
+{
+	struct pci_dev *pdev = box->pci_dev;
+
+	__set_bit(UNCORE_BOX_FLAG_CTL_OFFS8, &box->flags);
+	pci_write_config_dword(pdev, SKX_M2M_PCI_PMON_BOX_CTL, IVBEP_PMON_BOX_CTL_INT);
+}
+
+static struct intel_uncore_ops skx_m2m_uncore_pci_ops = {
+	.init_box	= skx_m2m_uncore_pci_init_box,
+	.disable_box	= snbep_uncore_pci_disable_box,
+	.enable_box	= snbep_uncore_pci_enable_box,
+	.disable_event	= snbep_uncore_pci_disable_event,
+	.enable_event	= snbep_uncore_pci_enable_event,
+	.read_counter	= snbep_uncore_pci_read_counter,
+};
+
+static struct intel_uncore_type skx_uncore_m2m = {
+	.name		= "m2m",
+	.num_counters   = 4,
+	.num_boxes	= 2,
+	.perf_ctr_bits	= 48,
+	.perf_ctr	= SKX_M2M_PCI_PMON_CTR0,
+	.event_ctl	= SKX_M2M_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SKX_M2M_PCI_PMON_BOX_CTL,
+	.ops		= &skx_m2m_uncore_pci_ops,
+	.format_group	= &skx_uncore_format_group,
+};
+
+static struct event_constraint skx_uncore_m2pcie_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type skx_uncore_m2pcie = {
+	.name		= "m2pcie",
+	.num_counters   = 4,
+	.num_boxes	= 4,
+	.perf_ctr_bits	= 48,
+	.constraints	= skx_uncore_m2pcie_constraints,
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
+	.ops		= &ivbep_uncore_pci_ops,
+	.format_group	= &skx_uncore_format_group,
+};
+
+static struct event_constraint skx_uncore_m3upi_constraints[] = {
+	UNCORE_EVENT_CONSTRAINT(0x1d, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x1e, 0x1),
+	UNCORE_EVENT_CONSTRAINT(0x40, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x4e, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x4f, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x50, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x51, 0x7),
+	UNCORE_EVENT_CONSTRAINT(0x52, 0x7),
+	EVENT_CONSTRAINT_END
+};
+
+static struct intel_uncore_type skx_uncore_m3upi = {
+	.name		= "m3upi",
+	.num_counters   = 3,
+	.num_boxes	= 3,
+	.perf_ctr_bits	= 48,
+	.constraints	= skx_uncore_m3upi_constraints,
+	.perf_ctr	= SNBEP_PCI_PMON_CTR0,
+	.event_ctl	= SNBEP_PCI_PMON_CTL0,
+	.event_mask	= SNBEP_PMON_RAW_EVENT_MASK,
+	.box_ctl	= SNBEP_PCI_PMON_BOX_CTL,
+	.ops		= &ivbep_uncore_pci_ops,
+	.format_group	= &skx_uncore_format_group,
+};
+
+enum {
+	SKX_PCI_UNCORE_IMC,
+	SKX_PCI_UNCORE_M2M,
+	SKX_PCI_UNCORE_UPI,
+	SKX_PCI_UNCORE_M2PCIE,
+	SKX_PCI_UNCORE_M3UPI,
+};
+
+static struct intel_uncore_type *skx_pci_uncores[] = {
+	[SKX_PCI_UNCORE_IMC]	= &skx_uncore_imc,
+	[SKX_PCI_UNCORE_M2M]	= &skx_uncore_m2m,
+	[SKX_PCI_UNCORE_UPI]	= &skx_uncore_upi,
+	[SKX_PCI_UNCORE_M2PCIE]	= &skx_uncore_m2pcie,
+	[SKX_PCI_UNCORE_M3UPI]	= &skx_uncore_m3upi,
+	NULL,
+};
+
+static const struct pci_device_id skx_uncore_pci_ids[] = {
+	{ /* MC0 Channel 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 2, SKX_PCI_UNCORE_IMC, 0),
+	},
+	{ /* MC0 Channel 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(10, 6, SKX_PCI_UNCORE_IMC, 1),
+	},
+	{ /* MC0 Channel 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(11, 2, SKX_PCI_UNCORE_IMC, 2),
+	},
+	{ /* MC1 Channel 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2042),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 2, SKX_PCI_UNCORE_IMC, 3),
+	},
+	{ /* MC1 Channel 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2046),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 6, SKX_PCI_UNCORE_IMC, 4),
+	},
+	{ /* MC1 Channel 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204a),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 2, SKX_PCI_UNCORE_IMC, 5),
+	},
+	{ /* M2M0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(8, 0, SKX_PCI_UNCORE_M2M, 0),
+	},
+	{ /* M2M1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2066),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(9, 0, SKX_PCI_UNCORE_M2M, 1),
+	},
+	{ /* UPI0 Link 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, SKX_PCI_UNCORE_UPI, 0),
+	},
+	{ /* UPI0 Link 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, SKX_PCI_UNCORE_UPI, 1),
+	},
+	{ /* UPI1 Link 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2058),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(16, 0, SKX_PCI_UNCORE_UPI, 2),
+	},
+	{ /* M2PCIe 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 1, SKX_PCI_UNCORE_M2PCIE, 0),
+	},
+	{ /* M2PCIe 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(22, 1, SKX_PCI_UNCORE_M2PCIE, 1),
+	},
+	{ /* M2PCIe 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(23, 1, SKX_PCI_UNCORE_M2PCIE, 2),
+	},
+	{ /* M2PCIe 3 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2088),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3),
+	},
+	{ /* M3UPI0 Link 0 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0),
+	},
+	{ /* M3UPI0 Link 1 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1),
+	},
+	{ /* M3UPI1 Link 2 */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C),
+		.driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2),
+	},
+	{ /* end: all zeroes */ }
+};
+
+
+static struct pci_driver skx_uncore_pci_driver = {
+	.name		= "skx_uncore",
+	.id_table	= skx_uncore_pci_ids,
+};
+
+int skx_uncore_pci_init(void)
+{
+	/* need to double check pci address */
+	int ret = snbep_pci2phy_map_init(0x2014, SKX_CPUNODEID, SKX_GIDNIDMAP, false);
+
+	if (ret)
+		return ret;
+
+	uncore_pci_uncores = skx_pci_uncores;
+	uncore_pci_driver = &skx_uncore_pci_driver;
+	return 0;
+}
+
+/* end of SKX uncore support */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8c4a477..5874d8d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -194,12 +194,13 @@
 	 */
 	struct debug_store	*ds;
 	u64			pebs_enabled;
+	int			n_pebs;
+	int			n_large_pebs;
 
 	/*
 	 * Intel LBR bits
 	 */
 	int				lbr_users;
-	void				*lbr_context;
 	struct perf_branch_stack	lbr_stack;
 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
 	struct er_account		*lbr_sel;
@@ -508,6 +509,8 @@
 	void		(*enable_all)(int added);
 	void		(*enable)(struct perf_event *);
 	void		(*disable)(struct perf_event *);
+	void		(*add)(struct perf_event *);
+	void		(*del)(struct perf_event *);
 	int		(*hw_config)(struct perf_event *event);
 	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
 	unsigned	eventsel;
@@ -888,6 +891,10 @@
 
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
+void intel_pmu_pebs_add(struct perf_event *event);
+
+void intel_pmu_pebs_del(struct perf_event *event);
+
 void intel_pmu_pebs_enable(struct perf_event *event);
 
 void intel_pmu_pebs_disable(struct perf_event *event);
@@ -906,9 +913,9 @@
 
 void intel_pmu_lbr_reset(void);
 
-void intel_pmu_lbr_enable(struct perf_event *event);
+void intel_pmu_lbr_add(struct perf_event *event);
 
-void intel_pmu_lbr_disable(struct perf_event *event);
+void intel_pmu_lbr_del(struct perf_event *event);
 
 void intel_pmu_lbr_enable_all(bool pmi);
 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 2f29f4e..cb13c05 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -378,7 +378,7 @@
 		put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode);
 	} put_user_catch(err);
 
-	err |= copy_siginfo_to_user32(&frame->info, &ksig->info);
+	err |= __copy_siginfo_to_user32(&frame->info, &ksig->info, false);
 	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
 				     regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index e77a644..1b02038 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -217,10 +217,14 @@
  */
 #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2,   \
 			   output, input...)				      \
+{									      \
+	register void *__sp asm(_ASM_SP);				      \
 	asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
 		"call %P[new2]", feature2)				      \
-		: output : [old] "i" (oldfunc), [new1] "i" (newfunc1),	      \
-		[new2] "i" (newfunc2), ## input)
+		: output, "+r" (__sp)					      \
+		: [old] "i" (oldfunc), [new1] "i" (newfunc1),		      \
+		  [new2] "i" (newfunc2), ## input);			      \
+}
 
 /*
  * use this macro(s) if you need more than one output parameter
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f5befd4..f5aaf6c 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -135,6 +135,7 @@
 void register_lapic_address(unsigned long address);
 extern void setup_boot_APIC_clock(void);
 extern void setup_secondary_APIC_clock(void);
+extern void lapic_update_tsc_freq(void);
 extern int APIC_init_uniprocessor(void);
 
 #ifdef CONFIG_X86_64
@@ -170,6 +171,7 @@
 static inline void disable_local_APIC(void) { }
 # define setup_boot_APIC_clock x86_init_noop
 # define setup_secondary_APIC_clock x86_init_noop
+static inline void lapic_update_tsc_freq(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_X2APIC
@@ -648,8 +650,8 @@
 
 static inline void ipi_entering_ack_irq(void)
 {
-	ack_APIC_irq();
 	irq_enter();
+	ack_APIC_irq();
 }
 
 static inline void exiting_irq(void)
@@ -659,9 +661,8 @@
 
 static inline void exiting_ack_irq(void)
 {
-	irq_exit();
-	/* Ack only at the end to avoid potential reentry */
 	ack_APIC_irq();
+	irq_exit();
 }
 
 extern void ioapic_zap_locks(void);
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 9733361..97848cd 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -158,53 +158,9 @@
  * value of "*ptr".
  *
  * xadd() is locked when multiple CPUs are online
- * xadd_sync() is always locked
- * xadd_local() is never locked
  */
 #define __xadd(ptr, inc, lock)	__xchg_op((ptr), (inc), xadd, lock)
 #define xadd(ptr, inc)		__xadd((ptr), (inc), LOCK_PREFIX)
-#define xadd_sync(ptr, inc)	__xadd((ptr), (inc), "lock; ")
-#define xadd_local(ptr, inc)	__xadd((ptr), (inc), "")
-
-#define __add(ptr, inc, lock)						\
-	({								\
-	        __typeof__ (*(ptr)) __ret = (inc);			\
-		switch (sizeof(*(ptr))) {				\
-		case __X86_CASE_B:					\
-			asm volatile (lock "addb %b1, %0\n"		\
-				      : "+m" (*(ptr)) : "qi" (inc)	\
-				      : "memory", "cc");		\
-			break;						\
-		case __X86_CASE_W:					\
-			asm volatile (lock "addw %w1, %0\n"		\
-				      : "+m" (*(ptr)) : "ri" (inc)	\
-				      : "memory", "cc");		\
-			break;						\
-		case __X86_CASE_L:					\
-			asm volatile (lock "addl %1, %0\n"		\
-				      : "+m" (*(ptr)) : "ri" (inc)	\
-				      : "memory", "cc");		\
-			break;						\
-		case __X86_CASE_Q:					\
-			asm volatile (lock "addq %1, %0\n"		\
-				      : "+m" (*(ptr)) : "ri" (inc)	\
-				      : "memory", "cc");		\
-			break;						\
-		default:						\
-			__add_wrong_size();				\
-		}							\
-		__ret;							\
-	})
-
-/*
- * add_*() adds "inc" to "*ptr"
- *
- * __add() takes a lock prefix
- * add_smp() is locked when multiple CPUs are online
- * add_sync() is always locked
- */
-#define add_smp(ptr, inc)	__add((ptr), (inc), LOCK_PREFIX)
-#define add_sync(ptr, inc)	__add((ptr), (inc), "lock; ")
 
 #define __cmpxchg_double(pfx, p1, p2, o1, o2, n1, n2)			\
 ({									\
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index a188061..03d269b 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -275,10 +275,10 @@
 #ifdef CONFIG_X86_X32_ABI
 typedef struct user_regs_struct compat_elf_gregset_t;
 
-#define PR_REG_SIZE(S) (test_thread_flag(TIF_IA32) ? 68 : 216)
-#define PRSTATUS_SIZE(S) (test_thread_flag(TIF_IA32) ? 144 : 296)
-#define SET_PR_FPVALID(S,V) \
-  do { *(int *) (((void *) &((S)->pr_reg)) + PR_REG_SIZE(0)) = (V); } \
+/* Full regset -- prstatus on x32, otherwise on ia32 */
+#define PRSTATUS_SIZE(S, R) (R != sizeof(S.pr_reg) ? 144 : 296)
+#define SET_PR_FPVALID(S, V, R) \
+  do { *(int *) (((void *) &((S)->pr_reg)) + R) = (V); } \
   while (0)
 
 #define COMPAT_USE_64BIT_TIME \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 92a8308..1188bc8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -106,7 +106,6 @@
 #define X86_FEATURE_APERFMPERF	( 3*32+28) /* APERFMPERF */
 #define X86_FEATURE_EAGER_FPU	( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	( 4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 4e10d73..12080d8 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -36,7 +36,7 @@
 
 extern struct desc_ptr idt_descr;
 extern gate_desc idt_table[];
-extern struct desc_ptr debug_idt_descr;
+extern const struct desc_ptr debug_idt_descr;
 extern gate_desc debug_idt_table[];
 
 struct gdt_page {
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 3ab0537..476b574 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -10,8 +10,8 @@
 #include <uapi/asm/e820.h>
 #ifndef __ASSEMBLY__
 /* see comment in arch/x86/kernel/e820.c */
-extern struct e820map e820;
-extern struct e820map e820_saved;
+extern struct e820map *e820;
+extern struct e820map *e820_saved;
 
 extern unsigned long pci_mem_start;
 extern int e820_any_mapped(u64 start, u64 end, unsigned type);
@@ -53,6 +53,8 @@
 extern void setup_memory_map(void);
 extern char *default_machine_specific_memory_setup(void);
 
+extern void e820_reallocate_tables(void);
+
 /*
  * Returns true iff the specified range [s,e) is completely contained inside
  * the ISA region.
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index d0bb76d..389d700 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -117,7 +117,6 @@
 extern pgd_t * __init efi_call_phys_prolog(void);
 extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
 extern void __init efi_print_memmap(void);
-extern void __init efi_unmap_memmap(void);
 extern void __init efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
 extern void __init efi_map_region_fixed(efi_memory_desc_t *md);
@@ -192,14 +191,7 @@
 struct efi_config {
 	u64 image_handle;
 	u64 table;
-	u64 allocate_pool;
-	u64 allocate_pages;
-	u64 get_memory_map;
-	u64 free_pool;
-	u64 free_pages;
-	u64 locate_handle;
-	u64 handle_protocol;
-	u64 exit_boot_services;
+	u64 boot_services;
 	u64 text_output;
 	efi_status_t (*call)(unsigned long, ...);
 	bool is64;
@@ -207,14 +199,27 @@
 
 __pure const struct efi_config *__efi_early(void);
 
+static inline bool efi_is_64bit(void)
+{
+	if (!IS_ENABLED(CONFIG_X86_64))
+		return false;
+
+	if (!IS_ENABLED(CONFIG_EFI_MIXED))
+		return true;
+
+	return __efi_early()->is64;
+}
+
 #define efi_call_early(f, ...)						\
-	__efi_early()->call(__efi_early()->f, __VA_ARGS__);
+	__efi_early()->call(efi_is_64bit() ?				\
+		((efi_boot_services_64_t *)(unsigned long)		\
+			__efi_early()->boot_services)->f :		\
+		((efi_boot_services_32_t *)(unsigned long)		\
+			__efi_early()->boot_services)->f, __VA_ARGS__)
 
 #define __efi_call_early(f, ...)					\
 	__efi_early()->call((unsigned long)f, __VA_ARGS__);
 
-#define efi_is_64bit()		__efi_early()->is64
-
 extern bool efi_reboot_required(void);
 
 #else
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 0e970d0..20a1fbf7 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -19,6 +19,12 @@
 # define ia32_setup_rt_frame	__setup_rt_frame
 #endif
 
+#ifdef CONFIG_COMPAT
+int __copy_siginfo_to_user32(compat_siginfo_t __user *to,
+		const siginfo_t *from, bool x32_ABI);
+#endif
+
+
 extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
 			      struct task_struct *tsk);
 extern void convert_to_fxsr(struct task_struct *tsk,
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index ae55a43..d4957ac 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -45,7 +45,8 @@
 extern u64 xfeatures_mask;
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
-extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
+extern void __init update_regset_xstate_info(unsigned int size,
+					     u64 xstate_mask);
 
 void fpu__xstate_clear_all_cpu_caps(void);
 void *get_xsave_addr(struct xregs_state *xsave, int xstate);
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a4820d4..eccd0ac 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -6,6 +6,7 @@
 # define MCOUNT_ADDR		((unsigned long)(__fentry__))
 #else
 # define MCOUNT_ADDR		((unsigned long)(mcount))
+# define HAVE_FUNCTION_GRAPH_FP_TEST
 #endif
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
@@ -13,6 +14,8 @@
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #endif
 
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 extern atomic_t modifying_ftrace_code;
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 7178043..59405a2 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -22,10 +22,6 @@
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
-	/*
-	 * irq_tlb_count is double-counted in irq_call_count, so it must be
-	 * subtracted from irq_call_count when displaying irq_call_count
-	 */
 	unsigned int irq_tlb_count;
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 055ea99..67942b6 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -43,6 +43,9 @@
 
 	/* X2APIC detection (run once per boot) */
 	bool		(*x2apic_available)(void);
+
+	/* pin current vcpu to specified physical cpu (run rarely) */
+	void		(*pin_vcpu)(int);
 };
 
 extern const struct hypervisor_x86 *x86_hyper;
@@ -56,6 +59,7 @@
 extern void init_hypervisor(struct cpuinfo_x86 *c);
 extern void init_hypervisor_platform(void);
 extern bool hypervisor_x2apic_available(void);
+extern void hypervisor_pin_vcpu(int cpu);
 #else
 static inline void init_hypervisor(struct cpuinfo_x86 *c) { }
 static inline void init_hypervisor_platform(void) { }
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index 2230420..737da62 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -5,10 +5,10 @@
 	void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
 	void *context;			 /* context for alloc_pgt_page */
 	unsigned long pmd_flag;		 /* page flag for PMD entry */
-	bool kernel_mapping;		 /* kernel mapping or ident mapping */
+	unsigned long offset;		 /* ident mapping offset */
 };
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-				unsigned long addr, unsigned long end);
+				unsigned long pstart, unsigned long pend);
 
 #endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 6277194..9ae5ab8 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -56,8 +56,8 @@
 #define INTEL_FAM6_ATOM_SILVERMONT1	0x37 /* BayTrail/BYT / Valleyview */
 #define INTEL_FAM6_ATOM_SILVERMONT2	0x4D /* Avaton/Rangely */
 #define INTEL_FAM6_ATOM_AIRMONT		0x4C /* CherryTrail / Braswell */
-#define INTEL_FAM6_ATOM_MERRIFIELD1	0x4A /* Tangier */
-#define INTEL_FAM6_ATOM_MERRIFIELD2	0x5A /* Annidale */
+#define INTEL_FAM6_ATOM_MERRIFIELD	0x4A /* Tangier */
+#define INTEL_FAM6_ATOM_MOOREFIELD	0x5A /* Annidale */
 #define INTEL_FAM6_ATOM_GOLDMONT	0x5C
 #define INTEL_FAM6_ATOM_DENVERTON	0x5F /* Goldmont Microserver */
 
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 9d6b097..5b6753d 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -18,6 +18,8 @@
 extern int intel_mid_pci_init(void);
 extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
 
+extern void intel_mid_pwr_power_off(void);
+
 #define INTEL_MID_PWR_LSS_OFFSET	4
 #define INTEL_MID_PWR_LSS_TYPE		(1 << 7)
 
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
index 925b605..4fb1d0a 100644
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ b/arch/x86/include/asm/intel_scu_ipc.h
@@ -3,6 +3,8 @@
 
 #include <linux/notifier.h>
 
+#define IPCMSG_COLD_OFF		0x80	/* Only for Tangier */
+
 #define IPCMSG_WARM_RESET	0xF0
 #define IPCMSG_COLD_RESET	0xF1
 #define IPCMSG_SOFT_RESET	0xF2
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 2674ee3..1052a79 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -6,6 +6,7 @@
 #ifdef CONFIG_RANDOMIZE_MEMORY
 extern unsigned long page_offset_base;
 extern unsigned long vmalloc_base;
+extern unsigned long vmemmap_base;
 
 void kernel_randomize_memory(void);
 #else
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 1ef9d58..d318811 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -24,8 +24,6 @@
 extern void printk_address(unsigned long address);
 extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
-extern void show_trace(struct task_struct *t, struct pt_regs *regs,
-		       unsigned long *sp, unsigned long bp);
 extern void show_stack_regs(struct pt_regs *regs);
 extern void __show_regs(struct pt_regs *regs, int all);
 extern unsigned long oops_begin(void);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 8bf766e..9bd7ff5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -40,9 +40,10 @@
 #define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
 
 /* AMD-specific bits */
+#define MCI_STATUS_TCC		(1ULL<<55)  /* Task context corrupt */
+#define MCI_STATUS_SYNDV	(1ULL<<53)  /* synd reg. valid */
 #define MCI_STATUS_DEFERRED	(1ULL<<44)  /* uncorrected error, deferred exception */
 #define MCI_STATUS_POISON	(1ULL<<43)  /* access poisonous data */
-#define MCI_STATUS_TCC		(1ULL<<55)  /* Task context corrupt */
 
 /*
  * McaX field if set indicates a given bank supports MCA extensions:
@@ -110,6 +111,7 @@
 #define MSR_AMD64_SMCA_MC0_MISC0	0xc0002003
 #define MSR_AMD64_SMCA_MC0_CONFIG	0xc0002004
 #define MSR_AMD64_SMCA_MC0_IPID		0xc0002005
+#define MSR_AMD64_SMCA_MC0_SYND		0xc0002006
 #define MSR_AMD64_SMCA_MC0_DESTAT	0xc0002008
 #define MSR_AMD64_SMCA_MC0_DEADDR	0xc0002009
 #define MSR_AMD64_SMCA_MC0_MISC1	0xc000200a
@@ -119,6 +121,7 @@
 #define MSR_AMD64_SMCA_MCx_MISC(x)	(MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_CONFIG(x)	(MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_IPID(x)	(MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND(x)	(MSR_AMD64_SMCA_MC0_SYND + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_DESTAT(x)	(MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_DEADDR(x)	(MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISCy(x, y)	((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
@@ -334,44 +337,47 @@
  * Scalable MCA.
  */
 #ifdef CONFIG_X86_MCE_AMD
-enum amd_ip_types {
-	SMCA_F17H_CORE = 0,	/* Core errors */
-	SMCA_DF,		/* Data Fabric */
-	SMCA_UMC,		/* Unified Memory Controller */
-	SMCA_PB,		/* Parameter Block */
-	SMCA_PSP,		/* Platform Security Processor */
-	SMCA_SMU,		/* System Management Unit */
-	N_AMD_IP_TYPES
-};
 
-struct amd_hwid {
-	const char *name;
-	unsigned int hwid;
-};
-
-extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
-
-enum amd_core_mca_blocks {
+/* These may be used by multiple smca_hwid_mcatypes */
+enum smca_bank_types {
 	SMCA_LS = 0,	/* Load Store */
 	SMCA_IF,	/* Instruction Fetch */
-	SMCA_L2_CACHE,	/* L2 cache */
-	SMCA_DE,	/* Decoder unit */
-	RES,		/* Reserved */
-	SMCA_EX,	/* Execution unit */
+	SMCA_L2_CACHE,	/* L2 Cache */
+	SMCA_DE,	/* Decoder Unit */
+	SMCA_EX,	/* Execution Unit */
 	SMCA_FP,	/* Floating Point */
-	SMCA_L3_CACHE,	/* L3 cache */
-	N_CORE_MCA_BLOCKS
+	SMCA_L3_CACHE,	/* L3 Cache */
+	SMCA_CS,	/* Coherent Slave */
+	SMCA_PIE,	/* Power, Interrupts, etc. */
+	SMCA_UMC,	/* Unified Memory Controller */
+	SMCA_PB,	/* Parameter Block */
+	SMCA_PSP,	/* Platform Security Processor */
+	SMCA_SMU,	/* System Management Unit */
+	N_SMCA_BANK_TYPES
 };
 
-extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
-
-enum amd_df_mca_blocks {
-	SMCA_CS = 0,	/* Coherent Slave */
-	SMCA_PIE,	/* Power management, Interrupts, etc */
-	N_DF_BLOCKS
+struct smca_bank_name {
+	const char *name;	/* Short name for sysfs */
+	const char *long_name;	/* Long name for pretty-printing */
 };
 
-extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
+
+#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
+
+struct smca_hwid_mcatype {
+	unsigned int bank_type;	/* Use with smca_bank_types for easy indexing. */
+	u32 hwid_mcatype;	/* (hwid,mcatype) tuple */
+	u32 xec_bitmap;		/* Bitmap of valid ExtErrorCodes; current max is 21. */
+};
+
+struct smca_bank_info {
+	struct smca_hwid_mcatype *type;
+	u32 type_instance;
+};
+
+extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
+
 #endif
 
 #endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index b07233b..3200704 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,7 +6,6 @@
 #include <asm/x86_init.h>
 #include <asm/apicdef.h>
 
-extern int apic_version[];
 extern int pic_mode;
 
 #ifdef CONFIG_X86_32
@@ -40,6 +39,7 @@
 extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 extern unsigned int boot_cpu_physical_apicid;
+extern u8 boot_cpu_apic_version;
 extern unsigned long mp_lapic_addr;
 
 #ifdef CONFIG_X86_LOCAL_APIC
@@ -86,6 +86,7 @@
 #endif
 
 int generic_processor_info(int apicid, int version);
+int __generic_processor_info(int apicid, int version, bool enabled);
 
 #define PHYSID_ARRAY_SIZE	BITS_TO_LONGS(MAX_LOCAL_APIC)
 
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2970d22..ce93281 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -80,10 +80,6 @@
 {
 	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
 }
-static inline unsigned long __read_cr4_safe(void)
-{
-	return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
-}
 
 static inline void __write_cr4(unsigned long x)
 {
@@ -661,8 +657,6 @@
 
 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 
-#ifdef CONFIG_QUEUED_SPINLOCKS
-
 static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
 							u32 val)
 {
@@ -684,22 +678,6 @@
 	PVOP_VCALL1(pv_lock_ops.kick, cpu);
 }
 
-#else /* !CONFIG_QUEUED_SPINLOCKS */
-
-static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
-							__ticket_t ticket)
-{
-	PVOP_VCALLEE2(pv_lock_ops.lock_spinning, lock, ticket);
-}
-
-static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
-							__ticket_t ticket)
-{
-	PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
-}
-
-#endif /* CONFIG_QUEUED_SPINLOCKS */
-
 #endif /* SMP && PARAVIRT_SPINLOCKS */
 
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 7fa9e77..0f400c0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -108,7 +108,6 @@
 	unsigned long (*read_cr0)(void);
 	void (*write_cr0)(unsigned long);
 
-	unsigned long (*read_cr4_safe)(void);
 	unsigned long (*read_cr4)(void);
 	void (*write_cr4)(unsigned long);
 
@@ -301,23 +300,16 @@
 struct arch_spinlock;
 #ifdef CONFIG_SMP
 #include <asm/spinlock_types.h>
-#else
-typedef u16 __ticket_t;
 #endif
 
 struct qspinlock;
 
 struct pv_lock_ops {
-#ifdef CONFIG_QUEUED_SPINLOCKS
 	void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
 	struct paravirt_callee_save queued_spin_unlock;
 
 	void (*wait)(u8 *ptr, u8 val);
 	void (*kick)(int cpu);
-#else /* !CONFIG_QUEUED_SPINLOCKS */
-	struct paravirt_callee_save lock_spinning;
-	void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
-#endif /* !CONFIG_QUEUED_SPINLOCKS */
 };
 
 /* This contains all the paravirt structures: we get a convenient
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7e8ec7a..1cc82ec 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -145,7 +145,7 @@
  *
  * |     ...            | 11| 10|  9|8|7|6|5| 4| 3|2|1|0| <- bit number
  * |     ...            |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
- * | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
+ * | OFFSET (14->63) | TYPE (9-13)  |0|X|X|X| X| X|X|X|0| <- swp entry
  *
  * G (8) is aliased and used as a PROT_NONE indicator for
  * !present ptes.  We need to start storing swap entries above
@@ -156,7 +156,7 @@
 #define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
 #define SWP_TYPE_BITS 5
 /* Place the offset above the type: */
-#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
+#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)
 
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
 
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 6fdef9e..3a26420 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -57,11 +57,13 @@
 #define MAXMEM		_AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
 #define VMALLOC_SIZE_TB	_AC(32, UL)
 #define __VMALLOC_BASE	_AC(0xffffc90000000000, UL)
-#define VMEMMAP_START	_AC(0xffffea0000000000, UL)
+#define __VMEMMAP_BASE	_AC(0xffffea0000000000, UL)
 #ifdef CONFIG_RANDOMIZE_MEMORY
 #define VMALLOC_START	vmalloc_base
+#define VMEMMAP_START	vmemmap_base
 #else
 #define VMALLOC_START	__VMALLOC_BASE
+#define VMEMMAP_START	__VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 #define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
 #define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
index 643eba4..2c1ebeb 100644
--- a/arch/x86/include/asm/pmem.h
+++ b/arch/x86/include/asm/pmem.h
@@ -46,10 +46,7 @@
 
 static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
 {
-	if (static_cpu_has(X86_FEATURE_MCE_RECOVERY))
-		return memcpy_mcsafe(dst, src, n);
-	memcpy(dst, src, n);
-	return 0;
+	return memcpy_mcsafe(dst, src, n);
 }
 
 /**
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 63def95..984a7bf 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -389,9 +389,9 @@
 	unsigned short		fsindex;
 	unsigned short		gsindex;
 #endif
-#ifdef CONFIG_X86_32
-	unsigned long		ip;
-#endif
+
+	u32			status;		/* thread synchronous flags */
+
 #ifdef CONFIG_X86_64
 	unsigned long		fsbase;
 	unsigned long		gsbase;
@@ -438,6 +438,15 @@
 };
 
 /*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
+
+/*
  * Set IOPL bits in EFLAGS from given mask
  */
 static inline void native_set_iopl_mask(unsigned mask)
@@ -724,8 +733,6 @@
 	.addr_limit		= KERNEL_DS,				  \
 }
 
-extern unsigned long thread_saved_pc(struct task_struct *tsk);
-
 /*
  * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
  * This is necessary to guarantee that the entire "struct pt_regs"
@@ -776,17 +783,13 @@
 	.addr_limit		= KERNEL_DS,			\
 }
 
-/*
- * Return saved PC of a blocked thread.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- */
-#define thread_saved_pc(t)	READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8))
-
 #define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
 
+extern unsigned long thread_saved_pc(struct task_struct *tsk);
+
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 					       unsigned long new_sp);
 
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 9c6b890..230e190 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -44,9 +44,9 @@
 extern struct real_mode_header *real_mode_header;
 extern unsigned char real_mode_blob_end[];
 
-extern unsigned long init_rsp;
 extern unsigned long initial_code;
 extern unsigned long initial_gs;
+extern unsigned long initial_stack;
 
 extern unsigned char real_mode_blob[];
 extern unsigned char real_mode_relocs[];
@@ -58,7 +58,15 @@
 extern unsigned char secondary_startup_64[];
 #endif
 
+static inline size_t real_mode_size_needed(void)
+{
+	if (real_mode_header)
+		return 0;	/* already allocated. */
+
+	return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
+}
+
+void set_real_mode_mem(phys_addr_t mem, size_t size);
 void reserve_real_mode(void);
-void setup_real_mode(void);
 
 #endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
index 8dbc762..3d33a71 100644
--- a/arch/x86/include/asm/rwsem.h
+++ b/arch/x86/include/asm/rwsem.h
@@ -154,7 +154,7 @@
 		     : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1),
 		       CC_OUT(e) (result)
 		     : "er" (RWSEM_ACTIVE_WRITE_BIAS)
-		     : "memory", "cc");
+		     : "memory");
 	return result;
 }
 
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index dd1e7d6..8af22be 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -23,6 +23,10 @@
 	unsigned long sig[_NSIG_WORDS];
 } sigset_t;
 
+/* non-uapi in-kernel SA_FLAGS for those indicates ABI for a signal frame */
+#define SA_IA32_ABI	0x02000000u
+#define SA_X32_ABI	0x01000000u
+
 #ifndef CONFIG_COMPAT
 typedef sigset_t compat_sigset_t;
 #endif
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index ebd0c16..19980b3 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -39,9 +39,6 @@
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
 #endif
 
-/* Static state in head.S used to set up a CPU */
-extern unsigned long stack_start; /* Initial stack pointer address */
-
 struct task_struct;
 
 struct smp_ops {
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 587d791..19a2224 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -59,22 +59,19 @@
 static inline unsigned long native_read_cr4(void)
 {
 	unsigned long val;
-	asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
-	return val;
-}
-
-static inline unsigned long native_read_cr4_safe(void)
-{
-	unsigned long val;
-	/* This could fault if %cr4 does not exist. In x86_64, a cr4 always
-	 * exists, so it will never fail. */
 #ifdef CONFIG_X86_32
+	/*
+	 * This could fault if CR4 does not exist.  Non-existent CR4
+	 * is functionally equivalent to CR4 == 0.  Keep it simple and pretend
+	 * that CR4 == 0 on CPUs that don't have CR4.
+	 */
 	asm volatile("1: mov %%cr4, %0\n"
 		     "2:\n"
 		     _ASM_EXTABLE(1b, 2b)
 		     : "=r" (val), "=m" (__force_order) : "0" (0));
 #else
-	val = native_read_cr4();
+	/* CR4 always exists on x86_64. */
+	asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
 #endif
 	return val;
 }
@@ -182,11 +179,6 @@
 	return native_read_cr4();
 }
 
-static inline unsigned long __read_cr4_safe(void)
-{
-	return native_read_cr4_safe();
-}
-
 static inline void __write_cr4(unsigned long x)
 {
 	native_write_cr4(x);
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index be0a059..921bea7 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -20,187 +20,13 @@
  * (the type definitions are in asm/spinlock_types.h)
  */
 
-#ifdef CONFIG_X86_32
-# define LOCK_PTR_REG "a"
-#else
-# define LOCK_PTR_REG "D"
-#endif
-
-#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE))
-/*
- * On PPro SMP, we use a locked operation to unlock
- * (PPro errata 66, 92)
- */
-# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
-#else
-# define UNLOCK_LOCK_PREFIX
-#endif
-
 /* How long a lock should spin before we consider blocking */
 #define SPIN_THRESHOLD	(1 << 15)
 
 extern struct static_key paravirt_ticketlocks_enabled;
 static __always_inline bool static_key_false(struct static_key *key);
 
-#ifdef CONFIG_QUEUED_SPINLOCKS
 #include <asm/qspinlock.h>
-#else
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-
-static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
-{
-	set_bit(0, (volatile unsigned long *)&lock->tickets.head);
-}
-
-#else  /* !CONFIG_PARAVIRT_SPINLOCKS */
-static __always_inline void __ticket_lock_spinning(arch_spinlock_t *lock,
-							__ticket_t ticket)
-{
-}
-static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
-							__ticket_t ticket)
-{
-}
-
-#endif /* CONFIG_PARAVIRT_SPINLOCKS */
-static inline int  __tickets_equal(__ticket_t one, __ticket_t two)
-{
-	return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
-}
-
-static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
-							__ticket_t head)
-{
-	if (head & TICKET_SLOWPATH_FLAG) {
-		arch_spinlock_t old, new;
-
-		old.tickets.head = head;
-		new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
-		old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
-		new.tickets.tail = old.tickets.tail;
-
-		/* try to clear slowpath flag when there are no contenders */
-		cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
-	}
-}
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
-	return __tickets_equal(lock.tickets.head, lock.tickets.tail);
-}
-
-/*
- * Ticket locks are conceptually two parts, one indicating the current head of
- * the queue, and the other indicating the current tail. The lock is acquired
- * by atomically noting the tail and incrementing it by one (thus adding
- * ourself to the queue and noting our position), then waiting until the head
- * becomes equal to the the initial value of the tail.
- *
- * We use an xadd covering *both* parts of the lock, to increment the tail and
- * also load the position of the head, which takes care of memory ordering
- * issues and should be optimal for the uncontended case. Note the tail must be
- * in the high part, because a wide xadd increment of the low part would carry
- * up and contaminate the high part.
- */
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	register struct __raw_tickets inc = { .tail = TICKET_LOCK_INC };
-
-	inc = xadd(&lock->tickets, inc);
-	if (likely(inc.head == inc.tail))
-		goto out;
-
-	for (;;) {
-		unsigned count = SPIN_THRESHOLD;
-
-		do {
-			inc.head = READ_ONCE(lock->tickets.head);
-			if (__tickets_equal(inc.head, inc.tail))
-				goto clear_slowpath;
-			cpu_relax();
-		} while (--count);
-		__ticket_lock_spinning(lock, inc.tail);
-	}
-clear_slowpath:
-	__ticket_check_and_clear_slowpath(lock, inc.head);
-out:
-	barrier();	/* make sure nothing creeps before the lock is taken */
-}
-
-static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	arch_spinlock_t old, new;
-
-	old.tickets = READ_ONCE(lock->tickets);
-	if (!__tickets_equal(old.tickets.head, old.tickets.tail))
-		return 0;
-
-	new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
-	new.head_tail &= ~TICKET_SLOWPATH_FLAG;
-
-	/* cmpxchg is a full barrier, so nothing can move before it */
-	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
-}
-
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	if (TICKET_SLOWPATH_FLAG &&
-		static_key_false(&paravirt_ticketlocks_enabled)) {
-		__ticket_t head;
-
-		BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
-
-		head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
-
-		if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
-			head &= ~TICKET_SLOWPATH_FLAG;
-			__ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
-		}
-	} else
-		__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
-}
-
-static inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
-	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
-
-	return !__tickets_equal(tmp.tail, tmp.head);
-}
-
-static inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
-	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
-
-	tmp.head &= ~TICKET_SLOWPATH_FLAG;
-	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
-}
-#define arch_spin_is_contended	arch_spin_is_contended
-
-static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
-						  unsigned long flags)
-{
-	arch_spin_lock(lock);
-}
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-	__ticket_t head = READ_ONCE(lock->tickets.head);
-
-	for (;;) {
-		struct __raw_tickets tmp = READ_ONCE(lock->tickets);
-		/*
-		 * We need to check "unlocked" in a loop, tmp.head == head
-		 * can be false positive because of overflow.
-		 */
-		if (__tickets_equal(tmp.head, tmp.tail) ||
-				!__tickets_equal(tmp.head, head))
-			break;
-
-		cpu_relax();
-	}
-}
-#endif /* CONFIG_QUEUED_SPINLOCKS */
 
 /*
  * Read-write spinlocks, allowing multiple readers
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 65c3e37..25311eb 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,20 +23,7 @@
 
 #define TICKET_SHIFT	(sizeof(__ticket_t) * 8)
 
-#ifdef CONFIG_QUEUED_SPINLOCKS
 #include <asm-generic/qspinlock_types.h>
-#else
-typedef struct arch_spinlock {
-	union {
-		__ticketpair_t head_tail;
-		struct __raw_tickets {
-			__ticket_t head, tail;
-		} tickets;
-	};
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
-#endif /* CONFIG_QUEUED_SPINLOCKS */
 
 #include <asm-generic/qrwlock_types.h>
 
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 0944218..37f2e0b 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -8,86 +8,86 @@
 
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
+#include <asm/switch_to.h>
+
+enum stack_type {
+	STACK_TYPE_UNKNOWN,
+	STACK_TYPE_TASK,
+	STACK_TYPE_IRQ,
+	STACK_TYPE_SOFTIRQ,
+	STACK_TYPE_EXCEPTION,
+	STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
+};
+
+struct stack_info {
+	enum stack_type type;
+	unsigned long *begin, *end, *next_sp;
+};
+
+bool in_task_stack(unsigned long *stack, struct task_struct *task,
+		   struct stack_info *info);
+
+int get_stack_info(unsigned long *stack, struct task_struct *task,
+		   struct stack_info *info, unsigned long *visit_mask);
+
+void stack_type_str(enum stack_type type, const char **begin,
+		    const char **end);
+
+static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
+{
+	void *begin = info->begin;
+	void *end   = info->end;
+
+	return (info->type != STACK_TYPE_UNKNOWN &&
+		addr >= begin && addr < end &&
+		addr + len > begin && addr + len <= end);
+}
 
 extern int kstack_depth_to_print;
 
-struct thread_info;
-struct stacktrace_ops;
-
-typedef unsigned long (*walk_stack_t)(struct task_struct *task,
-				      unsigned long *stack,
-				      unsigned long bp,
-				      const struct stacktrace_ops *ops,
-				      void *data,
-				      unsigned long *end,
-				      int *graph);
-
-extern unsigned long
-print_context_stack(struct task_struct *task,
-		    unsigned long *stack, unsigned long bp,
-		    const struct stacktrace_ops *ops, void *data,
-		    unsigned long *end, int *graph);
-
-extern unsigned long
-print_context_stack_bp(struct task_struct *task,
-		       unsigned long *stack, unsigned long bp,
-		       const struct stacktrace_ops *ops, void *data,
-		       unsigned long *end, int *graph);
-
-/* Generic stack tracer with callbacks */
-
-struct stacktrace_ops {
-	int (*address)(void *data, unsigned long address, int reliable);
-	/* On negative return stop dumping */
-	int (*stack)(void *data, char *name);
-	walk_stack_t	walk_stack;
-};
-
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
-		const struct stacktrace_ops *ops, void *data);
-
 #ifdef CONFIG_X86_32
 #define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
 #else
 #define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
 #endif
 
 #ifdef CONFIG_FRAME_POINTER
-static inline unsigned long
-stack_frame(struct task_struct *task, struct pt_regs *regs)
+static inline unsigned long *
+get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 {
-	unsigned long bp;
-
 	if (regs)
-		return regs->bp;
+		return (unsigned long *)regs->bp;
 
-	if (task == current) {
-		/* Grab bp right from our regs */
-		get_bp(bp);
-		return bp;
-	}
+	if (task == current)
+		return __builtin_frame_address(0);
 
-	/* bp is the last reg pushed by switch_to */
-	return *(unsigned long *)task->thread.sp;
+	return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
 }
 #else
-static inline unsigned long
-stack_frame(struct task_struct *task, struct pt_regs *regs)
+static inline unsigned long *
+get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
 {
-	return 0;
+	return NULL;
 }
-#endif
+#endif /* CONFIG_FRAME_POINTER */
 
-extern void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *stack, unsigned long bp, char *log_lvl);
+static inline unsigned long *
+get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
+{
+	if (regs)
+		return (unsigned long *)kernel_stack_pointer(regs);
 
-extern void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *sp, unsigned long bp, char *log_lvl);
+	if (task == current)
+		return __builtin_frame_address(0);
+
+	return (unsigned long *)task->thread.sp;
+}
+
+void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+			unsigned long *stack, char *log_lvl);
+
+void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+			unsigned long *sp, char *log_lvl);
 
 extern unsigned int code_bytes;
 
@@ -106,7 +106,7 @@
 {
 	struct stack_frame *frame;
 
-	get_bp(frame);
+	frame = __builtin_frame_address(0);
 
 #ifdef CONFIG_FRAME_POINTER
 	frame = frame->next_frame;
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 90dbbd9..a164862 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -2,6 +2,7 @@
 #define _ASM_X86_STRING_64_H
 
 #ifdef __KERNEL__
+#include <linux/jump_label.h>
 
 /* Written 2002 by Andi Kleen */
 
@@ -78,6 +79,9 @@
 #define memset(s, c, n) __memset(s, c, n)
 #endif
 
+__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
+DECLARE_STATIC_KEY_FALSE(mcsafe_key);
+
 /**
  * memcpy_mcsafe - copy memory with indication if a machine check happened
  *
@@ -86,10 +90,23 @@
  * @cnt:	number of bytes to copy
  *
  * Low level memory copy function that catches machine checks
+ * We only call into the "safe" function on systems that can
+ * actually do machine check recovery. Everyone else can just
+ * use memcpy().
  *
  * Return 0 for success, -EFAULT for fail
  */
-int memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+static __always_inline __must_check int
+memcpy_mcsafe(void *dst, const void *src, size_t cnt)
+{
+#ifdef CONFIG_X86_MCE
+	if (static_branch_unlikely(&mcsafe_key))
+		return memcpy_mcsafe_unrolled(dst, src, cnt);
+	else
+#endif
+		memcpy(dst, src, cnt);
+	return 0;
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 8f321a1..5cb436a 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,130 +2,66 @@
 #define _ASM_X86_SWITCH_TO_H
 
 struct task_struct; /* one of the stranger aspects of C forward declarations */
+
+struct task_struct *__switch_to_asm(struct task_struct *prev,
+				    struct task_struct *next);
+
 __visible struct task_struct *__switch_to(struct task_struct *prev,
-					   struct task_struct *next);
+					  struct task_struct *next);
 struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		      struct tss_struct *tss);
 
-#ifdef CONFIG_X86_32
+/* This runs runs on the previous thread's stack. */
+static inline void prepare_switch_to(struct task_struct *prev,
+				     struct task_struct *next)
+{
+#ifdef CONFIG_VMAP_STACK
+	/*
+	 * If we switch to a stack that has a top-level paging entry
+	 * that is not present in the current mm, the resulting #PF will
+	 * will be promoted to a double-fault and we'll panic.  Probe
+	 * the new stack now so that vmalloc_fault can fix up the page
+	 * tables if needed.  This can only happen if we use a stack
+	 * in vmap space.
+	 *
+	 * We assume that the stack is aligned so that it never spans
+	 * more than one top-level paging entry.
+	 *
+	 * To minimize cache pollution, just follow the stack pointer.
+	 */
+	READ_ONCE(*(unsigned char *)next->thread.sp);
+#endif
+}
 
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary							\
-	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
-	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
-#define __switch_canary_oparam						\
-	, [stack_canary] "=m" (stack_canary.canary)
-#define __switch_canary_iparam						\
-	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else	/* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif	/* CC_STACKPROTECTOR */
+asmlinkage void ret_from_fork(void);
 
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
+/* data that is pointed to by thread.sp */
+struct inactive_task_frame {
+#ifdef CONFIG_X86_64
+	unsigned long r15;
+	unsigned long r14;
+	unsigned long r13;
+	unsigned long r12;
+#else
+	unsigned long si;
+	unsigned long di;
+#endif
+	unsigned long bx;
+	unsigned long bp;
+	unsigned long ret_addr;
+};
+
+struct fork_frame {
+	struct inactive_task_frame frame;
+	struct pt_regs regs;
+};
+
 #define switch_to(prev, next, last)					\
 do {									\
-	/*								\
-	 * Context-switching clobbers all registers, so we clobber	\
-	 * them explicitly, via unused output variables.		\
-	 * (EAX and EBP is not listed because EBP is saved/restored	\
-	 * explicitly for wchan access and EAX is the return value of	\
-	 * __switch_to())						\
-	 */								\
-	unsigned long ebx, ecx, edx, esi, edi;				\
+	prepare_switch_to(prev, next);					\
 									\
-	asm volatile("pushl %%ebp\n\t"		/* save    EBP   */	\
-		     "movl %%esp,%[prev_sp]\n\t"	/* save    ESP   */ \
-		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
-		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
-		     "pushl %[next_ip]\n\t"	/* restore EIP   */	\
-		     __switch_canary					\
-		     "jmp __switch_to\n"	/* regparm call  */	\
-		     "1:\t"						\
-		     "popl %%ebp\n\t"		/* restore EBP   */	\
-									\
-		     /* output parameters */				\
-		     : [prev_sp] "=m" (prev->thread.sp),		\
-		       [prev_ip] "=m" (prev->thread.ip),		\
-		       "=a" (last),					\
-									\
-		       /* clobbered output registers: */		\
-		       "=b" (ebx), "=c" (ecx), "=d" (edx),		\
-		       "=S" (esi), "=D" (edi)				\
-		       							\
-		       __switch_canary_oparam				\
-									\
-		       /* input parameters: */				\
-		     : [next_sp]  "m" (next->thread.sp),		\
-		       [next_ip]  "m" (next->thread.ip),		\
-		       							\
-		       /* regparm parameters for __switch_to(): */	\
-		       [prev]     "a" (prev),				\
-		       [next]     "d" (next)				\
-									\
-		       __switch_canary_iparam				\
-									\
-		     : /* reloaded segment registers */			\
-			"memory");					\
+	((last) = __switch_to_asm((prev), (next)));			\
 } while (0)
 
-#else /* CONFIG_X86_32 */
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT    "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
-
-#define __EXTRA_CLOBBER  \
-	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
-	  "r12", "r13", "r14", "r15", "flags"
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary							  \
-	"movq %P[task_canary](%%rsi),%%r8\n\t"				  \
-	"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
-#define __switch_canary_oparam						  \
-	, [gs_canary] "=m" (irq_stack_union.stack_canary)
-#define __switch_canary_iparam						  \
-	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else	/* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif	/* CC_STACKPROTECTOR */
-
-/*
- * There is no need to save or restore flags, because flags are always
- * clean in kernel mode, with the possible exception of IOPL.  Kernel IOPL
- * has no effect.
- */
-#define switch_to(prev, next, last) \
-	asm volatile(SAVE_CONTEXT					  \
-	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
-	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
-	     "call __switch_to\n\t"					  \
-	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
-	     __switch_canary						  \
-	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
-	     "movq %%rax,%%rdi\n\t" 					  \
-	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"		  \
-	     "jnz   ret_from_fork\n\t"					  \
-	     RESTORE_CONTEXT						  \
-	     : "=a" (last)					  	  \
-	       __switch_canary_oparam					  \
-	     : [next] "S" (next), [prev] "D" (prev),			  \
-	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
-	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
-	       [_tif_fork] "i" (_TIF_FORK),			  	  \
-	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-	       [current_task] "m" (current_task)			  \
-	       __switch_canary_iparam					  \
-	     : "memory", "cc" __EXTRA_CLOBBER)
-
-#endif /* CONFIG_X86_32 */
-
 #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4e23dd1..e3c95e8 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@
 	 * TS_COMPAT is set for 32-bit syscall entries and then
 	 * remains set until we return to user mode.
 	 */
-	if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
+	if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
 		/*
 		 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
 		 * and will match correctly in comparisons.
@@ -116,7 +116,7 @@
 					 unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task_thread_info(task)->status & TS_COMPAT)
+	if (task->thread.status & TS_COMPAT)
 		switch (i) {
 		case 0:
 			if (!n--) break;
@@ -177,7 +177,7 @@
 					 const unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task_thread_info(task)->status & TS_COMPAT)
+	if (task->thread.status & TS_COMPAT)
 		switch (i) {
 		case 0:
 			if (!n--) break;
@@ -234,18 +234,8 @@
 
 static inline int syscall_get_arch(void)
 {
-#ifdef CONFIG_IA32_EMULATION
-	/*
-	 * TS_COMPAT is set for 32-bit syscall entry and then
-	 * remains set until we return to user mode.
-	 *
-	 * x32 tasks should be considered AUDIT_ARCH_X86_64.
-	 */
-	if (task_thread_info(current)->status & TS_COMPAT)
-		return AUDIT_ARCH_I386;
-#endif
-	/* Both x32 and x86_64 are considered "64-bit". */
-	return AUDIT_ARCH_X86_64;
+	/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
+	return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
 #endif	/* CONFIG_X86_32 */
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8b7c8d8..2aaca53 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -52,21 +52,6 @@
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
-struct thread_info {
-	struct task_struct	*task;		/* main task structure */
-	__u32			flags;		/* low level flags */
-	__u32			status;		/* thread synchronous flags */
-	__u32			cpu;		/* current CPU */
-};
-
-#define INIT_THREAD_INFO(tsk)			\
-{						\
-	.task		= &tsk,			\
-	.flags		= 0,			\
-	.cpu		= 0,			\
-}
-
-#define init_thread_info	(init_thread_union.thread_info)
 #define init_stack		(init_thread_union.stack)
 
 #else /* !__ASSEMBLY__ */
@@ -95,7 +80,6 @@
 #define TIF_UPROBE		12	/* breakpointed or singlestepping */
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* IA32 compatibility process */
-#define TIF_FORK		18	/* ret_from_fork */
 #define TIF_NOHZ		19	/* in adaptive nohz mode */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG	21	/* idle is polling for TIF_NEED_RESCHED */
@@ -119,7 +103,6 @@
 #define _TIF_UPROBE		(1 << TIF_UPROBE)
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
-#define _TIF_FORK		(1 << TIF_FORK)
 #define _TIF_NOHZ		(1 << TIF_NOHZ)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
@@ -160,11 +143,6 @@
  */
 #ifndef __ASSEMBLY__
 
-static inline struct thread_info *current_thread_info(void)
-{
-	return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
-}
-
 static inline unsigned long current_stack_pointer(void)
 {
 	unsigned long sp;
@@ -226,60 +204,19 @@
 # define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
 #endif
 
-/*
- * ASM operand which evaluates to a 'thread_info' address of
- * the current task, if it is known that "reg" is exactly "off"
- * bytes below the top of the stack currently.
- *
- * ( The kernel stack's size is known at build time, it is usually
- *   2 or 4 pages, and the bottom  of the kernel stack contains
- *   the thread_info structure. So to access the thread_info very
- *   quickly from assembly code we can calculate down from the
- *   top of the kernel stack to the bottom, using constant,
- *   build-time calculations only. )
- *
- * For example, to fetch the current thread_info->flags value into %eax
- * on x86-64 defconfig kernels, in syscall entry code where RSP is
- * currently at exactly SIZEOF_PTREGS bytes away from the top of the
- * stack:
- *
- *      mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
- *
- * will translate to:
- *
- *      8b 84 24 b8 c0 ff ff      mov    -0x3f48(%rsp), %eax
- *
- * which is below the current RSP by almost 16K.
- */
-#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
-
 #endif
 
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT		0x0002	/* 32bit syscall active (64BIT)*/
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED	0x0004	/* regs poked by 32-bit ptracer */
 #endif
-
 #ifndef __ASSEMBLY__
 
-static inline bool in_ia32_syscall(void)
-{
 #ifdef CONFIG_X86_32
-	return true;
+#define in_ia32_syscall() true
+#else
+#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
+			   current->thread.status & TS_COMPAT)
 #endif
-#ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->status & TS_COMPAT)
-		return true;
-#endif
-	return false;
-}
 
 /*
  * Force syscall return via IRET by making it look as if there was
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4e5be94..6fa8594 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -135,7 +135,14 @@
 
 static inline void __native_flush_tlb(void)
 {
+	/*
+	 * If current->mm == NULL then we borrow a mm which may change during a
+	 * task switch and therefore we must not be preempted while we write CR3
+	 * back:
+	 */
+	preempt_disable();
 	native_write_cr3(native_read_cr3());
+	preempt_enable();
 }
 
 static inline void __native_flush_tlb_global_irq_disabled(void)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index c3496619..01fd0a7 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -117,6 +117,12 @@
 extern void ist_begin_non_atomic(struct pt_regs *regs);
 extern void ist_end_non_atomic(void);
 
+#ifdef CONFIG_VMAP_STACK
+void __noreturn handle_stack_overflow(const char *message,
+				      struct pt_regs *regs,
+				      unsigned long fault_address);
+#endif
+
 /* Interrupts/Exceptions */
 enum {
 	X86_TRAP_DE = 0,	/*  0, Divide-by-zero */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a0ae610..2131c4c 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -433,7 +433,11 @@
 #define __get_user_asm_ex(x, addr, itype, rtype, ltype)			\
 	asm volatile("1:	mov"itype" %1,%"rtype"0\n"		\
 		     "2:\n"						\
-		     _ASM_EXTABLE_EX(1b, 2b)				\
+		     ".section .fixup,\"ax\"\n"				\
+                     "3:xor"itype" %"rtype"0,%"rtype"0\n"		\
+		     "  jmp 2b\n"					\
+		     ".previous\n"					\
+		     _ASM_EXTABLE_EX(1b, 3b)				\
 		     : ltype(x) : "m" (__m(addr)))
 
 #define __put_user_nocheck(x, ptr, size)			\
@@ -697,44 +701,15 @@
 unsigned long __must_check _copy_to_user(void __user *to, const void *from,
 					 unsigned n);
 
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-# define copy_user_diag __compiletime_error
-#else
-# define copy_user_diag __compiletime_warning
-#endif
+extern void __compiletime_error("usercopy buffer size is too small")
+__bad_copy_user(void);
 
-extern void copy_user_diag("copy_from_user() buffer size is too small")
-copy_from_user_overflow(void);
-extern void copy_user_diag("copy_to_user() buffer size is too small")
-copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
-
-#undef copy_user_diag
-
-#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
-
-extern void
-__compiletime_warning("copy_from_user() buffer size is not provably correct")
-__copy_from_user_overflow(void) __asm__("copy_from_user_overflow");
-#define __copy_from_user_overflow(size, count) __copy_from_user_overflow()
-
-extern void
-__compiletime_warning("copy_to_user() buffer size is not provably correct")
-__copy_to_user_overflow(void) __asm__("copy_from_user_overflow");
-#define __copy_to_user_overflow(size, count) __copy_to_user_overflow()
-
-#else
-
-static inline void
-__copy_from_user_overflow(int size, unsigned long count)
+static inline void copy_user_overflow(int size, unsigned long count)
 {
 	WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count);
 }
 
-#define __copy_to_user_overflow __copy_from_user_overflow
-
-#endif
-
-static inline unsigned long __must_check
+static __always_inline unsigned long __must_check
 copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	int sz = __compiletime_object_size(to);
@@ -743,36 +718,18 @@
 
 	kasan_check_write(to, n);
 
-	/*
-	 * While we would like to have the compiler do the checking for us
-	 * even in the non-constant size case, any false positives there are
-	 * a problem (especially when DEBUG_STRICT_USER_COPY_CHECKS, but even
-	 * without - the [hopefully] dangerous looking nature of the warning
-	 * would make people go look at the respecitive call sites over and
-	 * over again just to find that there's no problem).
-	 *
-	 * And there are cases where it's just not realistic for the compiler
-	 * to prove the count to be in range. For example when multiple call
-	 * sites of a helper function - perhaps in different source files -
-	 * all doing proper range checking, yet the helper function not doing
-	 * so again.
-	 *
-	 * Therefore limit the compile time checking to the constant size
-	 * case, and do only runtime checking for non-constant sizes.
-	 */
-
 	if (likely(sz < 0 || sz >= n)) {
 		check_object_size(to, n, false);
 		n = _copy_from_user(to, from, n);
-	} else if (__builtin_constant_p(n))
-		copy_from_user_overflow();
+	} else if (!__builtin_constant_p(n))
+		copy_user_overflow(sz, n);
 	else
-		__copy_from_user_overflow(sz, n);
+		__bad_copy_user();
 
 	return n;
 }
 
-static inline unsigned long __must_check
+static __always_inline unsigned long __must_check
 copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	int sz = __compiletime_object_size(from);
@@ -781,21 +738,17 @@
 
 	might_fault();
 
-	/* See the comment in copy_from_user() above. */
 	if (likely(sz < 0 || sz >= n)) {
 		check_object_size(from, n, true);
 		n = _copy_to_user(to, from, n);
-	} else if (__builtin_constant_p(n))
-		copy_to_user_overflow();
+	} else if (!__builtin_constant_p(n))
+		copy_user_overflow(sz, n);
 	else
-		__copy_to_user_overflow(sz, n);
+		__bad_copy_user();
 
 	return n;
 }
 
-#undef __copy_from_user_overflow
-#undef __copy_to_user_overflow
-
 /*
  * We rely on the nested NMI work to allow atomic faults from the NMI path; the
  * nested NMI paths are careful to preserve CR2.
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
new file mode 100644
index 0000000..c4b6d1c
--- /dev/null
+++ b/arch/x86/include/asm/unwind.h
@@ -0,0 +1,73 @@
+#ifndef _ASM_X86_UNWIND_H
+#define _ASM_X86_UNWIND_H
+
+#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+struct unwind_state {
+	struct stack_info stack_info;
+	unsigned long stack_mask;
+	struct task_struct *task;
+	int graph_idx;
+#ifdef CONFIG_FRAME_POINTER
+	unsigned long *bp;
+#else
+	unsigned long *sp;
+#endif
+};
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long *first_frame);
+
+bool unwind_next_frame(struct unwind_state *state);
+
+static inline bool unwind_done(struct unwind_state *state)
+{
+	return state->stack_info.type == STACK_TYPE_UNKNOWN;
+}
+
+static inline
+void unwind_start(struct unwind_state *state, struct task_struct *task,
+		  struct pt_regs *regs, unsigned long *first_frame)
+{
+	first_frame = first_frame ? : get_stack_pointer(task, regs);
+
+	__unwind_start(state, task, regs, first_frame);
+}
+
+#ifdef CONFIG_FRAME_POINTER
+
+static inline
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return NULL;
+
+	return state->bp + 1;
+}
+
+unsigned long unwind_get_return_address(struct unwind_state *state);
+
+#else /* !CONFIG_FRAME_POINTER */
+
+static inline
+unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
+{
+	return NULL;
+}
+
+static inline
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	if (unwind_done(state))
+		return 0;
+
+	return ftrace_graph_ret_addr(state->task, &state->graph_idx,
+				     *state->sp, state->sp);
+}
+
+#endif /* CONFIG_FRAME_POINTER */
+
+#endif /* _ASM_X86_UNWIND_H */
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index c852590..e652a7c 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -79,7 +79,7 @@
 	u16	nasid;		/* HNasid */
 	u16	sockid;		/* Socket ID, high bits of APIC ID */
 	u16	pnode;		/* Index to MMR and GRU spaces */
-	u32	pxm;		/* ACPI proximity domain number */
+	u32	unused2;
 	u32	limit;		/* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
 };
 
@@ -88,7 +88,8 @@
 #define	UV_SYSTAB_VERSION_UV4		0x400	/* UV4 BIOS base version */
 #define	UV_SYSTAB_VERSION_UV4_1		0x401	/* + gpa_shift */
 #define	UV_SYSTAB_VERSION_UV4_2		0x402	/* + TYPE_NVRAM/WINDOW/MBOX */
-#define	UV_SYSTAB_VERSION_UV4_LATEST	UV_SYSTAB_VERSION_UV4_2
+#define	UV_SYSTAB_VERSION_UV4_3		0x403	/* - GAM Range PXM Value */
+#define	UV_SYSTAB_VERSION_UV4_LATEST	UV_SYSTAB_VERSION_UV4_3
 
 #define	UV_SYSTAB_TYPE_UNUSED		0	/* End of table (offset == 0) */
 #define	UV_SYSTAB_TYPE_GAM_PARAMS	1	/* GAM PARAM conversions */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index cc44d92..57ab86d 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -49,14 +49,12 @@
 #define UV_NET_ENDPOINT_INTD		(is_uv1_hub() ?			\
 			UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD)
 #define UV_DESC_PSHIFT			49
-#define UV_PAYLOADQ_PNODE_SHIFT		49
+#define UV_PAYLOADQ_GNODE_SHIFT		49
 #define UV_PTC_BASENAME			"sgi_uv/ptc_statistics"
 #define UV_BAU_BASENAME			"sgi_uv/bau_tunables"
 #define UV_BAU_TUNABLES_DIR		"sgi_uv"
 #define UV_BAU_TUNABLES_FILE		"bau_tunables"
 #define WHITESPACE			" \t\n"
-#define uv_mmask			((1UL << uv_hub_info->m_val) - 1)
-#define uv_physnodeaddr(x)		((__pa((unsigned long)(x)) & uv_mmask))
 #define cpubit_isset(cpu, bau_local_cpumask) \
 	test_bit((cpu), (bau_local_cpumask).bits)
 
@@ -387,6 +385,17 @@
 	/* bits 127:120 */
 };
 
+/* Abstracted BAU functions */
+struct bau_operations {
+	unsigned long (*read_l_sw_ack)(void);
+	unsigned long (*read_g_sw_ack)(int pnode);
+	unsigned long (*bau_gpa_to_offset)(unsigned long vaddr);
+	void (*write_l_sw_ack)(unsigned long mmr);
+	void (*write_g_sw_ack)(int pnode, unsigned long mmr);
+	void (*write_payload_first)(int pnode, unsigned long mmr);
+	void (*write_payload_last)(int pnode, unsigned long mmr);
+};
+
 /*
  * The activation descriptor:
  * The format of the message to send, plus all accompanying control
@@ -655,6 +664,16 @@
 	write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image);
 }
 
+static inline void write_mmr_proc_payload_first(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_FIRST, mmr_image);
+}
+
+static inline void write_mmr_proc_payload_last(int pnode, unsigned long mmr_image)
+{
+	write_gmmr(pnode, UV4H_LB_PROC_INTD_QUEUE_LAST, mmr_image);
+}
+
 static inline void write_mmr_payload_first(int pnode, unsigned long mmr_image)
 {
 	write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image);
@@ -700,6 +719,26 @@
 	return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE);
 }
 
+static inline void write_mmr_proc_sw_ack(unsigned long mr)
+{
+	uv_write_local_mmr(UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
+}
+
+static inline void write_gmmr_proc_sw_ack(int pnode, unsigned long mr)
+{
+	write_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_CLEAR, mr);
+}
+
+static inline unsigned long read_mmr_proc_sw_ack(void)
+{
+	return read_lmmr(UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
+}
+
+static inline unsigned long read_gmmr_proc_sw_ack(int pnode)
+{
+	return read_gmmr(pnode, UV4H_LB_PROC_INTD_SOFT_ACK_PENDING);
+}
+
 static inline void write_mmr_data_config(int pnode, unsigned long mr)
 {
 	uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr);
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 43dc55b..2444189 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -41,6 +41,8 @@
 
 extern void __init init_vdso_image(const struct vdso_image *image);
 
+extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
+
 #endif /* __ASSEMBLER__ */
 
 #endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index 2184943..69a6e07 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -26,6 +26,8 @@
 	__u32 socketid;	/* CPU socket ID */
 	__u32 apicid;	/* CPU initial apic ID */
 	__u64 mcgcap;	/* MCGCAP MSR: machine check capabilities of CPU */
+	__u64 synd;	/* MCA_SYND MSR: only valid on SMCA systems */
+	__u64 ipid;	/* MCA_IPID MSR: only valid on SMCA systems */
 };
 
 #define MCE_GET_RECORD_LEN   _IOR('M', 1, int)
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032..ae135de 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -6,4 +6,10 @@
 #define ARCH_GET_FS 0x1003
 #define ARCH_GET_GS 0x1004
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+# define ARCH_MAP_VDSO_X32	0x2001
+# define ARCH_MAP_VDSO_32	0x2002
+# define ARCH_MAP_VDSO_64	0x2003
+#endif
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0503f5b..45257cf 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -125,6 +125,12 @@
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 obj-$(CONFIG_TRACING)			+= tracepoint.o
 
+ifdef CONFIG_FRAME_POINTER
+obj-y					+= unwind_frame.o
+else
+obj-y					+= unwind_guess.o
+endif
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 3242e59..26b78d8 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
 obj-$(CONFIG_ACPI_APEI)		+= apei.o
+obj-$(CONFIG_ACPI_CPPC_LIB)	+= cppc_msr.o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y				+= cstate.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 90d84c3..32a7d70 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -176,15 +176,10 @@
 		return -EINVAL;
 	}
 
-	if (!enabled) {
-		++disabled_cpus;
-		return -EINVAL;
-	}
-
 	if (boot_cpu_physical_apicid != -1U)
-		ver = apic_version[boot_cpu_physical_apicid];
+		ver = boot_cpu_apic_version;
 
-	cpu = generic_processor_info(id, ver);
+	cpu = __generic_processor_info(id, ver, enabled);
 	if (cpu >= 0)
 		early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
 
@@ -282,6 +277,8 @@
 	if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
 		return -EINVAL;
 
+	acpi_table_print_madt_entry(header);
+
 	acpi_lapic_addr = lapic_addr_ovr->address;
 
 	return 0;
@@ -705,7 +702,7 @@
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 #include <acpi/processor.h>
 
-static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
 {
 #ifdef CONFIG_ACPI_NUMA
 	int nid;
@@ -716,6 +713,7 @@
 		numa_set_node(cpu, nid);
 	}
 #endif
+	return 0;
 }
 
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
@@ -998,21 +996,6 @@
 	if (!boot_cpu_has(X86_FEATURE_APIC))
 		return -ENODEV;
 
-	/*
-	 * Note that the LAPIC address is obtained from the MADT (32-bit value)
-	 * and (optionally) overridden by a LAPIC_ADDR_OVR entry (64-bit value).
-	 */
-
-	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE,
-				      acpi_parse_lapic_addr_ovr, 0);
-	if (count < 0) {
-		printk(KERN_ERR PREFIX
-		       "Error parsing LAPIC address override entry\n");
-		return count;
-	}
-
-	register_lapic_address(acpi_lapic_addr);
-
 	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
 				      acpi_parse_sapic, MAX_LOCAL_APIC);
 
@@ -1031,8 +1014,8 @@
 			return ret;
 		}
 
-		x2count = madt_proc[0].count;
-		count = madt_proc[1].count;
+		count = madt_proc[0].count;
+		x2count = madt_proc[1].count;
 	}
 	if (!count && !x2count) {
 		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -1513,7 +1496,7 @@
 	 * If acpi_disabled, bail out
 	 */
 	if (acpi_disabled)
-		return; 
+		return;
 
 	/*
 	 * Initialize the ACPI boot-time table parser.
diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc_msr.c
new file mode 100644
index 0000000..6fb478b
--- /dev/null
+++ b/arch/x86/kernel/acpi/cppc_msr.c
@@ -0,0 +1,58 @@
+/*
+ * cppc_msr.c:  MSR Interface for CPPC
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <acpi/cppc_acpi.h>
+#include <asm/msr.h>
+
+/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
+
+bool cpc_ffh_supported(void)
+{
+	return true;
+}
+
+int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
+{
+	int err;
+
+	err = rdmsrl_safe_on_cpu(cpunum, reg->address, val);
+	if (!err) {
+		u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+				       reg->bit_offset);
+
+		*val &= mask;
+		*val >>= reg->bit_offset;
+	}
+	return err;
+}
+
+int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+	u64 rd_val;
+	int err;
+
+	err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val);
+	if (!err) {
+		u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1,
+				       reg->bit_offset);
+
+		val <<= reg->bit_offset;
+		val &= mask;
+		rd_val &= ~mask;
+		rd_val |= val;
+		err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val);
+	}
+	return err;
+}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index adb3eaf..4858733 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -99,7 +99,7 @@
 	saved_magic = 0x12345678;
 #else /* CONFIG_64BIT */
 #ifdef CONFIG_SMP
-	stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
+	initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
 	early_gdt_descr.address =
 			(unsigned long)get_cpu_gdt_table(smp_processor_id());
 	initial_gs = per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 20abd91..f266b8a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -64,6 +64,8 @@
 unsigned int boot_cpu_physical_apicid = -1U;
 EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
 
+u8 boot_cpu_apic_version;
+
 /*
  * The highest APIC ID seen during enumeration.
  */
@@ -313,7 +315,7 @@
 
 /* Clock divisor */
 #define APIC_DIVISOR 16
-#define TSC_DIVISOR  32
+#define TSC_DIVISOR  8
 
 /*
  * This function sets up the local APIC timer, with a timeout of
@@ -565,13 +567,37 @@
 				    CLOCK_EVT_FEAT_DUMMY);
 		levt->set_next_event = lapic_next_deadline;
 		clockevents_config_and_register(levt,
-						(tsc_khz / TSC_DIVISOR) * 1000,
+						tsc_khz * (1000 / TSC_DIVISOR),
 						0xF, ~0UL);
 	} else
 		clockevents_register_device(levt);
 }
 
 /*
+ * Install the updated TSC frequency from recalibration at the TSC
+ * deadline clockevent devices.
+ */
+static void __lapic_update_tsc_freq(void *info)
+{
+	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+
+	if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+		return;
+
+	clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
+}
+
+void lapic_update_tsc_freq(void)
+{
+	/*
+	 * The clockevent device's ->mult and ->shift can both be
+	 * changed. In order to avoid races, schedule the frequency
+	 * update code on each CPU.
+	 */
+	on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
+}
+
+/*
  * In this functions we calibrate APIC bus clocks to the external timer.
  *
  * We want to do the calibration only once since we want to have local timer
@@ -1350,7 +1376,6 @@
 	 * Actually disabling the focus CPU check just makes the hang less
 	 * frequent as it makes the interrupt distributon model be more
 	 * like LRU than MRU (the short-term load is more even across CPUs).
-	 * See also the comment in end_level_ioapic_irq().  --macro
 	 */
 
 	/*
@@ -1599,6 +1624,9 @@
 	unsigned long flags;
 	int ret, ir_stat;
 
+	if (skip_ioapic_setup)
+		return;
+
 	ir_stat = irq_remapping_prepare();
 	if (ir_stat < 0 && !x2apic_supported())
 		return;
@@ -1789,8 +1817,7 @@
 		 * since smp_sanity_check is prepared for such a case
 		 * and disable smp mode
 		 */
-		apic_version[new_apicid] =
-			 GET_APIC_VERSION(apic_read(APIC_LVR));
+		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
 	}
 }
 
@@ -1801,17 +1828,14 @@
 	if (!x2apic_mode) {
 		set_fixmap_nocache(FIX_APIC_BASE, address);
 		apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
-			    APIC_BASE, mp_lapic_addr);
+			    APIC_BASE, address);
 	}
 	if (boot_cpu_physical_apicid == -1U) {
 		boot_cpu_physical_apicid  = read_apic_id();
-		apic_version[boot_cpu_physical_apicid] =
-			 GET_APIC_VERSION(apic_read(APIC_LVR));
+		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
 	}
 }
 
-int apic_version[MAX_LOCAL_APIC];
-
 /*
  * Local APIC interrupts
  */
@@ -2000,7 +2024,53 @@
 	apic_write(APIC_LVT1, value);
 }
 
-int generic_processor_info(int apicid, int version)
+/*
+ * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated
+ * contiguously, it equals to current allocated max logical CPU ID plus 1.
+ * All allocated CPU ID should be in [0, nr_logical_cpuidi), so the maximum of
+ * nr_logical_cpuids is nr_cpu_ids.
+ *
+ * NOTE: Reserve 0 for BSP.
+ */
+static int nr_logical_cpuids = 1;
+
+/*
+ * Used to store mapping between logical CPU IDs and APIC IDs.
+ */
+static int cpuid_to_apicid[] = {
+	[0 ... NR_CPUS - 1] = -1,
+};
+
+/*
+ * Should use this API to allocate logical CPU IDs to keep nr_logical_cpuids
+ * and cpuid_to_apicid[] synchronized.
+ */
+static int allocate_logical_cpuid(int apicid)
+{
+	int i;
+
+	/*
+	 * cpuid <-> apicid mapping is persistent, so when a cpu is up,
+	 * check if the kernel has allocated a cpuid for it.
+	 */
+	for (i = 0; i < nr_logical_cpuids; i++) {
+		if (cpuid_to_apicid[i] == apicid)
+			return i;
+	}
+
+	/* Allocate a new cpuid. */
+	if (nr_logical_cpuids >= nr_cpu_ids) {
+		WARN_ONCE(1, "Only %d processors supported."
+			     "Processor %d/0x%x and the rest are ignored.\n",
+			     nr_cpu_ids - 1, nr_logical_cpuids, apicid);
+		return -1;
+	}
+
+	cpuid_to_apicid[nr_logical_cpuids] = apicid;
+	return nr_logical_cpuids++;
+}
+
+int __generic_processor_info(int apicid, int version, bool enabled)
 {
 	int cpu, max = nr_cpu_ids;
 	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2066,7 +2136,6 @@
 		return -EINVAL;
 	}
 
-	num_processors++;
 	if (apicid == boot_cpu_physical_apicid) {
 		/*
 		 * x86_bios_cpu_apicid is required to have processors listed
@@ -2076,8 +2145,16 @@
 		 * for BSP.
 		 */
 		cpu = 0;
-	} else
-		cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+		/* Logical cpuid 0 is reserved for BSP. */
+		cpuid_to_apicid[0] = apicid;
+	} else {
+		cpu = allocate_logical_cpuid(apicid);
+		if (cpu < 0) {
+			disabled_cpus++;
+			return -EINVAL;
+		}
+	}
 
 	/*
 	 * This can happen on physical hotplug. The sanity check at boot time
@@ -2089,6 +2166,7 @@
 
 		pr_warning("APIC: Package limit reached. Processor %d/0x%x ignored.\n",
 			   thiscpu, apicid);
+
 		disabled_cpus++;
 		return -ENOSPC;
 	}
@@ -2101,14 +2179,12 @@
 			   cpu, apicid);
 		version = 0x10;
 	}
-	apic_version[apicid] = version;
 
-	if (version != apic_version[boot_cpu_physical_apicid]) {
+	if (version != boot_cpu_apic_version) {
 		pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
-			apic_version[boot_cpu_physical_apicid], cpu, version);
+			boot_cpu_apic_version, cpu, version);
 	}
 
-	physid_set(apicid, phys_cpu_present_map);
 	if (apicid > max_physical_apicid)
 		max_physical_apicid = apicid;
 
@@ -2121,11 +2197,23 @@
 		apic->x86_32_early_logical_apicid(cpu);
 #endif
 	set_cpu_possible(cpu, true);
-	set_cpu_present(cpu, true);
+
+	if (enabled) {
+		num_processors++;
+		physid_set(apicid, phys_cpu_present_map);
+		set_cpu_present(cpu, true);
+	} else {
+		disabled_cpus++;
+	}
 
 	return cpu;
 }
 
+int generic_processor_info(int apicid, int version)
+{
+	return __generic_processor_info(apicid, version, true);
+}
+
 int hard_smp_processor_id(void)
 {
 	return read_apic_id();
@@ -2248,7 +2336,7 @@
 	 * Complain if the BIOS pretends there is one.
 	 */
 	if (!boot_cpu_has(X86_FEATURE_APIC) &&
-	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+	    APIC_INTEGRATED(boot_cpu_apic_version)) {
 		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
 			boot_cpu_physical_apicid);
 		return -1;
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 5b2ae10..a4d7ff2 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -25,7 +25,7 @@
 static struct apic apic_physflat;
 static struct apic apic_flat;
 
-struct apic __read_mostly *apic = &apic_flat;
+struct apic *apic __ro_after_init = &apic_flat;
 EXPORT_SYMBOL_GPL(apic);
 
 static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
@@ -116,27 +116,17 @@
 
 static unsigned int flat_get_apic_id(unsigned long x)
 {
-	unsigned int id;
-
-	id = (((x)>>24) & 0xFFu);
-
-	return id;
+	return (x >> 24) & 0xFF;
 }
 
 static unsigned long set_apic_id(unsigned int id)
 {
-	unsigned long x;
-
-	x = ((id & 0xFFu)<<24);
-	return x;
+	return (id & 0xFF) << 24;
 }
 
 static unsigned int read_xapic_id(void)
 {
-	unsigned int id;
-
-	id = flat_get_apic_id(apic_read(APIC_ID));
-	return id;
+	return flat_get_apic_id(apic_read(APIC_ID));
 }
 
 static int flat_apic_id_registered(void)
@@ -154,7 +144,7 @@
 	return 1;
 }
 
-static struct apic apic_flat =  {
+static struct apic apic_flat __ro_after_init = {
 	.name				= "flat",
 	.probe				= flat_probe,
 	.acpi_madt_oem_check		= flat_acpi_madt_oem_check,
@@ -248,7 +238,7 @@
 	return 0;
 }
 
-static struct apic apic_physflat =  {
+static struct apic apic_physflat __ro_after_init = {
 
 	.name				= "physical flat",
 	.probe				= physflat_probe,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index c05688b..b109e43 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -108,7 +108,7 @@
 	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
 }
 
-struct apic apic_noop = {
+struct apic apic_noop __ro_after_init = {
 	.name				= "noop",
 	.probe				= noop_probe,
 	.acpi_madt_oem_check		= NULL,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 714d4fd..e08fe2c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -40,10 +40,7 @@
 
 static unsigned long numachip1_set_apic_id(unsigned int id)
 {
-	unsigned long x;
-
-	x = ((id & 0xffU) << 24);
-	return x;
+	return (id & 0xff) << 24;
 }
 
 static unsigned int numachip2_get_apic_id(unsigned long x)
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 06dbaa4..5601201 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -142,7 +142,7 @@
 	return dmi_bigsmp;
 }
 
-static struct apic apic_bigsmp = {
+static struct apic apic_bigsmp __ro_after_init = {
 
 	.name				= "bigsmp",
 	.probe				= probe_bigsmp,
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7491f41..48e6d84 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1593,7 +1593,7 @@
 	 * no meaning without the serial APIC bus.
 	 */
 	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		|| APIC_XAPIC(boot_cpu_apic_version))
 		return;
 	setup_ioapic_ids_from_mpc_nocheck();
 }
@@ -2423,7 +2423,7 @@
 static u8 io_apic_unique_id(int idx, u8 id)
 {
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+	    !APIC_XAPIC(boot_cpu_apic_version))
 		return io_apic_get_unique_id(idx, id);
 	else
 		return id;
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
index ade2532..015bbf3 100644
--- a/arch/x86/kernel/apic/msi.c
+++ b/arch/x86/kernel/apic/msi.c
@@ -269,7 +269,7 @@
 	hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
 }
 
-static struct irq_chip hpet_msi_controller = {
+static struct irq_chip hpet_msi_controller __ro_after_init = {
 	.name = "HPET-MSI",
 	.irq_unmask = hpet_msi_unmask,
 	.irq_mask = hpet_msi_mask,
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 7c43e71..c48264e 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -72,7 +72,7 @@
 	return 1;
 }
 
-static struct apic apic_default = {
+static struct apic apic_default __ro_after_init = {
 
 	.name				= "default",
 	.probe				= probe_default,
@@ -126,7 +126,7 @@
 
 apic_driver(apic_default);
 
-struct apic *apic = &apic_default;
+struct apic *apic __ro_after_init = &apic_default;
 EXPORT_SYMBOL_GPL(apic);
 
 static int cmdline_apic __initdata;
@@ -152,7 +152,7 @@
 
 void __init default_setup_apic_routing(void)
 {
-	int version = apic_version[boot_cpu_physical_apicid];
+	int version = boot_cpu_apic_version;
 
 	if (num_possible_cpus() > 8) {
 		switch (boot_cpu_data.x86_vendor) {
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 6368fa69..200af5a 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -155,7 +155,7 @@
 /*
  * At CPU state changes, update the x2apic cluster sibling info.
  */
-int x2apic_prepare_cpu(unsigned int cpu)
+static int x2apic_prepare_cpu(unsigned int cpu)
 {
 	if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
 		return -ENOMEM;
@@ -168,7 +168,7 @@
 	return 0;
 }
 
-int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int this_cpu)
 {
 	int cpu;
 
@@ -186,13 +186,18 @@
 static int x2apic_cluster_probe(void)
 {
 	int cpu = smp_processor_id();
+	int ret;
 
 	if (!x2apic_mode)
 		return 0;
 
+	ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
+				x2apic_prepare_cpu, x2apic_dead_cpu);
+	if (ret < 0) {
+		pr_err("Failed to register X2APIC_PREPARE\n");
+		return 0;
+	}
 	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
-	cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
-			  x2apic_prepare_cpu, x2apic_dead_cpu);
 	return 1;
 }
 
@@ -222,7 +227,7 @@
 		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
 }
 
-static struct apic apic_x2apic_cluster = {
+static struct apic apic_x2apic_cluster __ro_after_init = {
 
 	.name				= "cluster x2apic",
 	.probe				= x2apic_cluster_probe,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 4f13f54f..ff111f0 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -98,7 +98,7 @@
 	return apic == &apic_x2apic_phys;
 }
 
-static struct apic apic_x2apic_phys = {
+static struct apic apic_x2apic_phys __ro_after_init = {
 
 	.name				= "physical x2apic",
 	.probe				= x2apic_phys_probe,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 09b59ad..aeef53c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -223,6 +223,11 @@
 	if (strncmp(oem_id, "SGI", 3) != 0)
 		return 0;
 
+	if (numa_off) {
+		pr_err("UV: NUMA is off, disabling UV support\n");
+		return 0;
+	}
+
 	/* Setup early hub type field in uv_hub_info for Node 0 */
 	uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
 
@@ -325,7 +330,7 @@
 	struct uv_gam_range_entry *gre = uv_gre_table;
 	struct uv_gam_range_s *grt;
 	unsigned long last_limit = 0, ram_limit = 0;
-	int bytes, i, sid, lsid = -1;
+	int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;
 
 	if (!gre)
 		return;
@@ -356,11 +361,12 @@
 		}
 		sid = gre->sockid - _min_socket;
 		if (lsid < sid) {		/* new range */
-			grt = &_gr_table[sid];
-			grt->base = lsid;
+			grt = &_gr_table[indx];
+			grt->base = lindx;
 			grt->nasid = gre->nasid;
 			grt->limit = last_limit = gre->limit;
 			lsid = sid;
+			lindx = indx++;
 			continue;
 		}
 		if (lsid == sid && !ram_limit) {	/* update range */
@@ -371,7 +377,7 @@
 		}
 		if (!ram_limit) {		/* non-contiguous ram range */
 			grt++;
-			grt->base = sid - 1;
+			grt->base = lindx;
 			grt->nasid = gre->nasid;
 			grt->limit = last_limit = gre->limit;
 			continue;
@@ -527,11 +533,8 @@
 
 static unsigned long set_apic_id(unsigned int id)
 {
-	unsigned long x;
-
-	/* maskout x2apic_extra_bits ? */
-	x = id;
-	return x;
+	/* CHECKME: Do we need to mask out the xapic extra bits? */
+	return id;
 }
 
 static unsigned int uv_read_apic_id(void)
@@ -554,7 +557,7 @@
 	return apic == &apic_x2apic_uv_x;
 }
 
-static struct apic __refdata apic_x2apic_uv_x = {
+static struct apic apic_x2apic_uv_x __ro_after_init = {
 
 	.name				= "UV large system",
 	.probe				= uv_probe,
@@ -921,7 +924,7 @@
 	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
 }
 
-static void uv_heartbeat_enable(int cpu)
+static int uv_heartbeat_enable(unsigned int cpu)
 {
 	while (!uv_cpu_scir_info(cpu)->enabled) {
 		struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
@@ -935,43 +938,24 @@
 		/* also ensure that boot cpu is enabled */
 		cpu = 0;
 	}
+	return 0;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-static void uv_heartbeat_disable(int cpu)
+static int uv_heartbeat_disable(unsigned int cpu)
 {
 	if (uv_cpu_scir_info(cpu)->enabled) {
 		uv_cpu_scir_info(cpu)->enabled = 0;
 		del_timer(&uv_cpu_scir_info(cpu)->timer);
 	}
 	uv_set_cpu_scir_bits(cpu, 0xff);
-}
-
-/*
- * cpu hotplug notifier
- */
-static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
-			      void *hcpu)
-{
-	long cpu = (long)hcpu;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DOWN_FAILED:
-	case CPU_ONLINE:
-		uv_heartbeat_enable(cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-		uv_heartbeat_disable(cpu);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
+	return 0;
 }
 
 static __init void uv_scir_register_cpu_notifier(void)
 {
-	hotcpu_notifier(uv_scir_cpu_notify, 0);
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/x2apic-uvx:online",
+				  uv_heartbeat_enable, uv_heartbeat_disable);
 }
 
 #else /* !CONFIG_HOTPLUG_CPU */
@@ -1155,19 +1139,18 @@
 	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
 		if (!index) {
 			pr_info("UV: GAM Range Table...\n");
-			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s %3s\n",
+			pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n",
 				"Range", "", "Size", "Type", "NASID",
-				"SID", "PN", "PXM");
+				"SID", "PN");
 		}
 		pr_info(
-		"UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x %3d\n",
+		"UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
 			index++,
 			(unsigned long)lgre << UV_GAM_RANGE_SHFT,
 			(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
 			((unsigned long)(gre->limit - lgre)) >>
 				(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
-			gre->type, gre->nasid, gre->sockid,
-			gre->pnode, gre->pxm);
+			gre->type, gre->nasid, gre->sockid, gre->pnode);
 
 		lgre = gre->limit;
 		if (sock_min > gre->sockid)
@@ -1286,7 +1269,7 @@
 		_pnode_to_socket[i] = SOCK_EMPTY;
 
 	/* fill in pnode/node/addr conversion list values */
-	pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
+	pr_info("UV: GAM Building socket/pnode conversion tables\n");
 	for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
 		if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
 			continue;
@@ -1294,20 +1277,18 @@
 		if (_socket_to_pnode[i] != SOCK_EMPTY)
 			continue;	/* duplicate */
 		_socket_to_pnode[i] = gre->pnode;
-		_socket_to_node[i] = gre->pxm;
 
 		i = gre->pnode - minpnode;
 		_pnode_to_socket[i] = gre->sockid;
 
 		pr_info(
-		"UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
+		"UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
 			gre->sockid, gre->type, gre->nasid,
 			_socket_to_pnode[gre->sockid - minsock],
-			_socket_to_node[gre->sockid - minsock],
 			_pnode_to_socket[gre->pnode - minpnode]);
 	}
 
-	/* check socket -> node values */
+	/* Set socket -> node values */
 	lnid = -1;
 	for_each_present_cpu(cpu) {
 		int nid = cpu_to_node(cpu);
@@ -1318,14 +1299,9 @@
 		lnid = nid;
 		apicid = per_cpu(x86_cpu_to_apicid, cpu);
 		sockid = apicid >> uv_cpuid.socketid_shift;
-		i = sockid - minsock;
-
-		if (nid != _socket_to_node[i]) {
-			pr_warn(
-			"UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
-				i, sockid, gre->type, _socket_to_node[i], nid);
-			_socket_to_node[i] = nid;
-		}
+		_socket_to_node[sockid - minsock] = nid;
+		pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
+			sockid, apicid, nid);
 	}
 
 	/* Setup physical blade to pnode translation from GAM Range Table */
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 2bd5c6f..c62e015 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -29,10 +29,13 @@
 
 void common(void) {
 	BLANK();
-	OFFSET(TI_flags, thread_info, flags);
-	OFFSET(TI_status, thread_info, status);
+	OFFSET(TASK_threadsp, task_struct, thread.sp);
+#ifdef CONFIG_CC_STACKPROTECTOR
+	OFFSET(TASK_stack_canary, task_struct, stack_canary);
+#endif
 
 	BLANK();
+	OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
 	OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
 
 	BLANK();
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ecdc1d2..880aa09 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -57,6 +57,11 @@
 	/* Size of SYSENTER_stack */
 	DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
 
+#ifdef CONFIG_CC_STACKPROTECTOR
+	BLANK();
+	OFFSET(stack_canary_offset, stack_canary, canary);
+#endif
+
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
 	BLANK();
 	OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d875f97..210927e 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,11 @@
 	OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
 	BLANK();
 
+#ifdef CONFIG_CC_STACKPROTECTOR
+	DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
+	BLANK();
+#endif
+
 	DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
 	DEFINE(NR_syscalls, sizeof(syscalls_64));
 
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f5c69d8..b81fe2d 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -669,6 +669,17 @@
 		set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
 }
 
+#define MSR_AMD64_DE_CFG	0xC0011029
+
+static void init_amd_ln(struct cpuinfo_x86 *c)
+{
+	/*
+	 * Apply erratum 665 fix unconditionally so machines without a BIOS
+	 * fix work.
+	 */
+	msr_set_bit(MSR_AMD64_DE_CFG, 31);
+}
+
 static void init_amd_bd(struct cpuinfo_x86 *c)
 {
 	u64 value;
@@ -726,6 +737,7 @@
 	case 6:	   init_amd_k7(c); break;
 	case 0xf:  init_amd_k8(c); break;
 	case 0x10: init_amd_gh(c); break;
+	case 0x12: init_amd_ln(c); break;
 	case 0x15: init_amd_bd(c); break;
 	}
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 809eda0..9bd910a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -804,21 +804,20 @@
 		identify_cpu_without_cpuid(c);
 
 	/* cyrix could have cpuid enabled via c_identify()*/
-	if (!have_cpuid_p())
-		return;
+	if (have_cpuid_p()) {
+		cpu_detect(c);
+		get_cpu_vendor(c);
+		get_cpu_cap(c);
 
-	cpu_detect(c);
-	get_cpu_vendor(c);
-	get_cpu_cap(c);
+		if (this_cpu->c_early_init)
+			this_cpu->c_early_init(c);
 
-	if (this_cpu->c_early_init)
-		this_cpu->c_early_init(c);
+		c->cpu_index = 0;
+		filter_cpuid_features(c, false);
 
-	c->cpu_index = 0;
-	filter_cpuid_features(c, false);
-
-	if (this_cpu->c_bsp_init)
-		this_cpu->c_bsp_init(c);
+		if (this_cpu->c_bsp_init)
+			this_cpu->c_bsp_init(c);
+	}
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 	fpu__init_system(c);
@@ -1265,9 +1264,14 @@
 __setup("clearcpuid=", setup_disablecpuid);
 
 #ifdef CONFIG_X86_64
-struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
-struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
-				    (unsigned long) debug_idt_table };
+struct desc_ptr idt_descr __ro_after_init = {
+	.size = NR_VECTORS * 16 - 1,
+	.address = (unsigned long) idt_table,
+};
+const struct desc_ptr debug_idt_descr = {
+	.size = NR_VECTORS * 16 - 1,
+	.address = (unsigned long) debug_idt_table,
+};
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
 		     irq_stack_union) __aligned(PAGE_SIZE) __visible;
@@ -1281,7 +1285,7 @@
 EXPORT_PER_CPU_SYMBOL(current_task);
 
 DEFINE_PER_CPU(char *, irq_stack_ptr) =
-	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
 
 DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
 
@@ -1305,11 +1309,6 @@
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
 {
-	/*
-	 * LSTAR and STAR live in a bit strange symbiosis.
-	 * They both write to the same internal register. STAR allows to
-	 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
-	 */
 	wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
 	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
 
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 27e4665..35691a6 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -86,3 +86,14 @@
 	       x86_hyper->x2apic_available &&
 	       x86_hyper->x2apic_available();
 }
+
+void hypervisor_pin_vcpu(int cpu)
+{
+	if (!x86_hyper)
+		return;
+
+	if (x86_hyper->pin_vcpu)
+		x86_hyper->pin_vcpu(cpu);
+	else
+		WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 79d8ec8..a7fdf45 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -41,6 +41,7 @@
 #include <linux/debugfs.h>
 #include <linux/irq_work.h>
 #include <linux/export.h>
+#include <linux/jump_label.h>
 
 #include <asm/processor.h>
 #include <asm/traps.h>
@@ -292,6 +293,13 @@
 	if (m->misc)
 		pr_cont("MISC %llx ", m->misc);
 
+	if (mce_flags.smca) {
+		if (m->synd)
+			pr_cont("SYND %llx ", m->synd);
+		if (m->ipid)
+			pr_cont("IPID %llx ", m->ipid);
+	}
+
 	pr_cont("\n");
 	/*
 	 * Note this output is parsed by external tools and old fields
@@ -568,6 +576,7 @@
 {
 	if (m->status & MCI_STATUS_MISCV)
 		m->misc = mce_rdmsrl(msr_ops.misc(i));
+
 	if (m->status & MCI_STATUS_ADDRV) {
 		m->addr = mce_rdmsrl(msr_ops.addr(i));
 
@@ -579,6 +588,23 @@
 			m->addr >>= shift;
 			m->addr <<= shift;
 		}
+
+		/*
+		 * Extract [55:<lsb>] where lsb is the least significant
+		 * *valid* bit of the address bits.
+		 */
+		if (mce_flags.smca) {
+			u8 lsb = (m->addr >> 56) & 0x3f;
+
+			m->addr &= GENMASK_ULL(55, lsb);
+		}
+	}
+
+	if (mce_flags.smca) {
+		m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i));
+
+		if (m->status & MCI_STATUS_SYNDV)
+			m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i));
 	}
 }
 
@@ -1633,17 +1659,6 @@
 
 		if (c->x86 == 6 && c->x86_model == 45)
 			quirk_no_way_out = quirk_sandybridge_ifu;
-		/*
-		 * MCG_CAP.MCG_SER_P is necessary but not sufficient to know
-		 * whether this processor will actually generate recoverable
-		 * machine checks. Check to see if this is an E7 model Xeon.
-		 * We can't do a model number check because E5 and E7 use the
-		 * same model number. E5 doesn't support recovery, E7 does.
-		 */
-		if (mca_cfg.recovery || (mca_cfg.ser &&
-			!strncmp(c->x86_model_id,
-				 "Intel(R) Xeon(R) CPU E7-", 24)))
-			set_cpu_cap(c, X86_FEATURE_MCE_RECOVERY);
 	}
 	if (cfg->monarch_timeout < 0)
 		cfg->monarch_timeout = 0;
@@ -2080,6 +2095,7 @@
  * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
  * mce=nobootlog Don't log MCEs from before booting.
  * mce=bios_cmci_threshold Don't program the CMCI threshold
+ * mce=recovery force enable memcpy_mcsafe()
  */
 static int __init mcheck_enable(char *str)
 {
@@ -2676,8 +2692,14 @@
 static int __init mcheck_debugfs_init(void) { return -EINVAL; }
 #endif
 
+DEFINE_STATIC_KEY_FALSE(mcsafe_key);
+EXPORT_SYMBOL_GPL(mcsafe_key);
+
 static int __init mcheck_late_init(void)
 {
+	if (mca_cfg.recovery)
+		static_branch_inc(&mcsafe_key);
+
 	mcheck_debugfs_init();
 
 	/*
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 7b7f3be..9b54034 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
+#include <linux/string.h>
 
 #include <asm/amd_nb.h>
 #include <asm/apic.h>
@@ -63,34 +64,71 @@
 	"execution_unit",
 };
 
-/* Define HWID to IP type mappings for Scalable MCA */
-struct amd_hwid amd_hwids[] = {
-	[SMCA_F17H_CORE]	= { "f17h_core",	0xB0 },
-	[SMCA_DF]		= { "data_fabric",	0x2E },
-	[SMCA_UMC]		= { "umc",		0x96 },
-	[SMCA_PB]		= { "param_block",	0x5 },
-	[SMCA_PSP]		= { "psp",		0xFF },
-	[SMCA_SMU]		= { "smu",		0x1 },
+static const char * const smca_umc_block_names[] = {
+	"dram_ecc",
+	"misc_umc"
 };
-EXPORT_SYMBOL_GPL(amd_hwids);
 
-const char * const amd_core_mcablock_names[] = {
-	[SMCA_LS]		= "load_store",
-	[SMCA_IF]		= "insn_fetch",
-	[SMCA_L2_CACHE]		= "l2_cache",
-	[SMCA_DE]		= "decode_unit",
-	[RES]			= "",
-	[SMCA_EX]		= "execution_unit",
-	[SMCA_FP]		= "floating_point",
-	[SMCA_L3_CACHE]		= "l3_cache",
+struct smca_bank_name smca_bank_names[] = {
+	[SMCA_LS]	= { "load_store",	"Load Store Unit" },
+	[SMCA_IF]	= { "insn_fetch",	"Instruction Fetch Unit" },
+	[SMCA_L2_CACHE]	= { "l2_cache",		"L2 Cache" },
+	[SMCA_DE]	= { "decode_unit",	"Decode Unit" },
+	[SMCA_EX]	= { "execution_unit",	"Execution Unit" },
+	[SMCA_FP]	= { "floating_point",	"Floating Point Unit" },
+	[SMCA_L3_CACHE]	= { "l3_cache",		"L3 Cache" },
+	[SMCA_CS]	= { "coherent_slave",	"Coherent Slave" },
+	[SMCA_PIE]	= { "pie",		"Power, Interrupts, etc." },
+	[SMCA_UMC]	= { "umc",		"Unified Memory Controller" },
+	[SMCA_PB]	= { "param_block",	"Parameter Block" },
+	[SMCA_PSP]	= { "psp",		"Platform Security Processor" },
+	[SMCA_SMU]	= { "smu",		"System Management Unit" },
 };
-EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+EXPORT_SYMBOL_GPL(smca_bank_names);
 
-const char * const amd_df_mcablock_names[] = {
-	[SMCA_CS]		= "coherent_slave",
-	[SMCA_PIE]		= "pie",
+static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
+	/* { bank_type, hwid_mcatype, xec_bitmap } */
+
+	/* ZN Core (HWID=0xB0) MCA types */
+	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
+	{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
+	{ SMCA_DE,	 HWID_MCATYPE(0xB0, 0x3), 0x1FF },
+	/* HWID 0xB0 MCATYPE 0x4 is Reserved */
+	{ SMCA_EX,	 HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+	{ SMCA_FP,	 HWID_MCATYPE(0xB0, 0x6), 0x7F },
+	{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
+
+	/* Data Fabric MCA types */
+	{ SMCA_CS,	 HWID_MCATYPE(0x2E, 0x0), 0x1FF },
+	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0xF },
+
+	/* Unified Memory Controller MCA type */
+	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0x3F },
+
+	/* Parameter Block MCA type */
+	{ SMCA_PB,	 HWID_MCATYPE(0x05, 0x0), 0x1 },
+
+	/* Platform Security Processor MCA type */
+	{ SMCA_PSP,	 HWID_MCATYPE(0xFF, 0x0), 0x1 },
+
+	/* System Management Unit MCA type */
+	{ SMCA_SMU,	 HWID_MCATYPE(0x01, 0x0), 0x1 },
 };
-EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
+struct smca_bank_info smca_banks[MAX_NR_BANKS];
+EXPORT_SYMBOL_GPL(smca_banks);
+
+/*
+ * In SMCA enabled processors, we can have multiple banks for a given IP type.
+ * So to define a unique name for each bank, we use a temp c-string to append
+ * the MCA_IPID[InstanceId] to type's name in get_name().
+ *
+ * InstanceId is 32 bits which is 8 characters. Make sure MAX_MCATYPE_NAME_LEN
+ * is greater than 8 plus 1 (for underscore) plus length of longest type name.
+ */
+#define MAX_MCATYPE_NAME_LEN	30
+static char buf_mcatype[MAX_MCATYPE_NAME_LEN];
 
 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
 static DEFINE_PER_CPU(unsigned int, bank_map);	/* see which banks are on */
@@ -108,6 +146,36 @@
  * CPU Initialization
  */
 
+static void get_smca_bank_info(unsigned int bank)
+{
+	unsigned int i, hwid_mcatype, cpu = smp_processor_id();
+	struct smca_hwid_mcatype *type;
+	u32 high, instanceId;
+	u16 hwid, mcatype;
+
+	/* Collect bank_info using CPU 0 for now. */
+	if (cpu)
+		return;
+
+	if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
+		pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
+		return;
+	}
+
+	hwid = high & MCI_IPID_HWID;
+	mcatype = (high & MCI_IPID_MCATYPE) >> 16;
+	hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
+
+	for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
+		type = &smca_hwid_mcatypes[i];
+		if (hwid_mcatype == type->hwid_mcatype) {
+			smca_banks[bank].type = type;
+			smca_banks[bank].type_instance = instanceId;
+			break;
+		}
+	}
+}
+
 struct thresh_restart {
 	struct threshold_block	*b;
 	int			reset;
@@ -293,7 +361,7 @@
 	wrmsr(MSR_CU_DEF_ERR, low, high);
 }
 
-static u32 get_block_address(u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
 			     unsigned int bank, unsigned int block)
 {
 	u32 addr = 0, offset = 0;
@@ -309,13 +377,13 @@
 			 */
 			u32 low, high;
 
-			if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+			if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
 				return addr;
 
 			if (!(low & MCI_CONFIG_MCAX))
 				return addr;
 
-			if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+			if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
 			    (low & MASK_BLKPTR_LO))
 				addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
 		}
@@ -395,6 +463,20 @@
 		 */
 		smca_high &= ~BIT(2);
 
+		/*
+		 * SMCA sets the Deferred Error Interrupt type per bank.
+		 *
+		 * MCA_CONFIG[DeferredIntTypeSupported] is bit 5, and tells us
+		 * if the DeferredIntType bit field is available.
+		 *
+		 * MCA_CONFIG[DeferredIntType] is bits [38:37] ([6:5] in the
+		 * high portion of the MSR). OS should set this to 0x1 to enable
+		 * APIC based interrupt. First, check that no interrupt has been
+		 * set.
+		 */
+		if ((smca_low & BIT(5)) && !((smca_high >> 5) & 0x3))
+			smca_high |= BIT(5);
+
 		wrmsr(smca_addr, smca_low, smca_high);
 	}
 
@@ -421,12 +503,15 @@
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
 	u32 low = 0, high = 0, address = 0;
-	unsigned int bank, block;
+	unsigned int bank, block, cpu = smp_processor_id();
 	int offset = -1;
 
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
+		if (mce_flags.smca)
+			get_smca_bank_info(bank);
+
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			address = get_block_address(address, low, high, bank, block);
+			address = get_block_address(cpu, address, low, high, bank, block);
 			if (!address)
 				break;
 
@@ -476,9 +561,27 @@
 	if (threshold_err)
 		m.misc = misc;
 
-	if (m.status & MCI_STATUS_ADDRV)
+	if (m.status & MCI_STATUS_ADDRV) {
 		rdmsrl(msr_addr, m.addr);
 
+		/*
+		 * Extract [55:<lsb>] where lsb is the least significant
+		 * *valid* bit of the address bits.
+		 */
+		if (mce_flags.smca) {
+			u8 lsb = (m.addr >> 56) & 0x3f;
+
+			m.addr &= GENMASK_ULL(55, lsb);
+		}
+	}
+
+	if (mce_flags.smca) {
+		rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid);
+
+		if (m.status & MCI_STATUS_SYNDV)
+			rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd);
+	}
+
 	mce_log(&m);
 
 	wrmsrl(msr_status, 0);
@@ -541,15 +644,14 @@
 static void amd_threshold_interrupt(void)
 {
 	u32 low = 0, high = 0, address = 0;
-	int cpu = smp_processor_id();
-	unsigned int bank, block;
+	unsigned int bank, block, cpu = smp_processor_id();
 
 	/* assume first bank caused it */
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
 		for (block = 0; block < NR_BLOCKS; ++block) {
-			address = get_block_address(address, low, high, bank, block);
+			address = get_block_address(cpu, address, low, high, bank, block);
 			if (!address)
 				break;
 
@@ -713,6 +815,34 @@
 	.default_attrs		= default_attrs,
 };
 
+static const char *get_name(unsigned int bank, struct threshold_block *b)
+{
+	unsigned int bank_type;
+
+	if (!mce_flags.smca) {
+		if (b && bank == 4)
+			return bank4_names(b);
+
+		return th_names[bank];
+	}
+
+	if (!smca_banks[bank].type)
+		return NULL;
+
+	bank_type = smca_banks[bank].type->bank_type;
+
+	if (b && bank_type == SMCA_UMC) {
+		if (b->block < ARRAY_SIZE(smca_umc_block_names))
+			return smca_umc_block_names[b->block];
+		return NULL;
+	}
+
+	snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN,
+		 "%s_%x", smca_bank_names[bank_type].name,
+			  smca_banks[bank].type_instance);
+	return buf_mcatype;
+}
+
 static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
 				     unsigned int block, u32 address)
 {
@@ -767,11 +897,11 @@
 
 	err = kobject_init_and_add(&b->kobj, &threshold_ktype,
 				   per_cpu(threshold_banks, cpu)[bank]->kobj,
-				   (bank == 4 ? bank4_names(b) : th_names[bank]));
+				   get_name(bank, b));
 	if (err)
 		goto out_free;
 recurse:
-	address = get_block_address(address, low, high, bank, ++block);
+	address = get_block_address(cpu, address, low, high, bank, ++block);
 	if (!address)
 		return 0;
 
@@ -822,7 +952,7 @@
 	struct device *dev = per_cpu(mce_device, cpu);
 	struct amd_northbridge *nb = NULL;
 	struct threshold_bank *b = NULL;
-	const char *name = th_names[bank];
+	const char *name = get_name(bank, NULL);
 	int err = 0;
 
 	if (is_shared_bank(bank)) {
@@ -869,7 +999,7 @@
 		}
 	}
 
-	err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
+	err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank));
 	if (!err)
 		goto out;
 
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 27a0228..620ab06 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -54,6 +54,7 @@
  */
 static u8 *container;
 static size_t container_size;
+static bool ucode_builtin;
 
 static u32 ucode_new_rev;
 static u8 amd_ucode_patch[PATCH_MAX_SIZE];
@@ -281,18 +282,22 @@
 void __init load_ucode_amd_bsp(unsigned int family)
 {
 	struct cpio_data cp;
+	bool *builtin;
 	void **data;
 	size_t *size;
 
 #ifdef CONFIG_X86_32
 	data =  (void **)__pa_nodebug(&ucode_cpio.data);
 	size = (size_t *)__pa_nodebug(&ucode_cpio.size);
+	builtin = (bool *)__pa_nodebug(&ucode_builtin);
 #else
 	data = &ucode_cpio.data;
 	size = &ucode_cpio.size;
+	builtin = &ucode_builtin;
 #endif
 
-	if (!load_builtin_amd_microcode(&cp, family))
+	*builtin = load_builtin_amd_microcode(&cp, family);
+	if (!*builtin)
 		cp = find_ucode_in_initrd();
 
 	if (!(cp.data && cp.size))
@@ -355,6 +360,7 @@
 	unsigned int cpu = smp_processor_id();
 	struct equiv_cpu_entry *eq;
 	struct microcode_amd *mc;
+	u8 *cont = container;
 	u32 rev, eax;
 	u16 eq_id;
 
@@ -371,8 +377,12 @@
 	if (check_current_patch_level(&rev, false))
 		return;
 
+	/* Add CONFIG_RANDOMIZE_MEMORY offset. */
+	if (!ucode_builtin)
+		cont += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
 	eax = cpuid_eax(0x00000001);
-	eq  = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ);
+	eq  = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ);
 
 	eq_id = find_equiv_id(eq, eax);
 	if (!eq_id)
@@ -434,6 +444,10 @@
 	else
 		container = cont_va;
 
+	/* Add CONFIG_RANDOMIZE_MEMORY offset. */
+	if (!ucode_builtin)
+		container += PAGE_OFFSET - __PAGE_OFFSET_BASE;
+
 	eax   = cpuid_eax(0x00000001);
 	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
 
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index df04b2d..5ce5155 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -558,55 +558,36 @@
 	.resume			= mc_bp_resume,
 };
 
-static int
-mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
+static int mc_cpu_online(unsigned int cpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
 	struct device *dev;
 
 	dev = get_cpu_device(cpu);
+	microcode_update_cpu(cpu);
+	pr_debug("CPU%d added\n", cpu);
 
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-		microcode_update_cpu(cpu);
-		pr_debug("CPU%d added\n", cpu);
-		/*
-		 * "break" is missing on purpose here because we want to fall
-		 * through in order to create the sysfs group.
-		 */
+	if (sysfs_create_group(&dev->kobj, &mc_attr_group))
+		pr_err("Failed to create group for CPU%d\n", cpu);
+	return 0;
+}
 
-	case CPU_DOWN_FAILED:
-		if (sysfs_create_group(&dev->kobj, &mc_attr_group))
-			pr_err("Failed to create group for CPU%d\n", cpu);
-		break;
+static int mc_cpu_down_prep(unsigned int cpu)
+{
+	struct device *dev;
 
-	case CPU_DOWN_PREPARE:
-		/* Suspend is in progress, only remove the interface */
-		sysfs_remove_group(&dev->kobj, &mc_attr_group);
-		pr_debug("CPU%d removed\n", cpu);
-		break;
-
+	dev = get_cpu_device(cpu);
+	/* Suspend is in progress, only remove the interface */
+	sysfs_remove_group(&dev->kobj, &mc_attr_group);
+	pr_debug("CPU%d removed\n", cpu);
 	/*
-	 * case CPU_DEAD:
-	 *
 	 * When a CPU goes offline, don't free up or invalidate the copy of
 	 * the microcode in kernel memory, so that we can reuse it when the
 	 * CPU comes back online without unnecessarily requesting the userspace
 	 * for it again.
 	 */
-	}
-
-	/* The CPU refused to come up during a system resume */
-	if (action == CPU_UP_CANCELED_FROZEN)
-		microcode_fini_cpu(cpu);
-
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block mc_cpu_notifier = {
-	.notifier_call	= mc_cpu_callback,
-};
-
 static struct attribute *cpu_root_microcode_attrs[] = {
 	&dev_attr_reload.attr,
 	NULL
@@ -665,7 +646,8 @@
 		goto out_ucode_group;
 
 	register_syscore_ops(&mc_syscore_ops);
-	register_hotcpu_notifier(&mc_cpu_notifier);
+	cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/microcode:online",
+				  mc_cpu_online, mc_cpu_down_prep);
 
 	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
 		" <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 28f1b54..24e87e7 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -72,14 +72,14 @@
 u64 size_or_mask, size_and_mask;
 static bool mtrr_aps_delayed_init;
 
-static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
+static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
 
 const struct mtrr_ops *mtrr_if;
 
 static void set_mtrr(unsigned int reg, unsigned long base,
 		     unsigned long size, mtrr_type type);
 
-void set_mtrr_ops(const struct mtrr_ops *ops)
+void __init set_mtrr_ops(const struct mtrr_ops *ops)
 {
 	if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
 		mtrr_ops[ops->vendor] = ops;
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 6c7ced0..ad8bd76 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -54,7 +54,7 @@
 bool get_mtrr_state(void);
 void mtrr_bp_pat_init(void);
 
-extern void set_mtrr_ops(const struct mtrr_ops *ops);
+extern void __init set_mtrr_ops(const struct mtrr_ops *ops);
 
 extern u64 size_or_mask, size_and_mask;
 extern const struct mtrr_ops *mtrr_if;
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 92e8f0a..9b7cf5c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -17,7 +17,7 @@
 #include <linux/sysfs.h>
 
 #include <asm/stacktrace.h>
-
+#include <asm/unwind.h>
 
 int panic_on_unrecovered_nmi;
 int panic_on_io_nmi;
@@ -25,11 +25,29 @@
 int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
 
-static void printk_stack_address(unsigned long address, int reliable,
-		void *data)
+bool in_task_stack(unsigned long *stack, struct task_struct *task,
+		   struct stack_info *info)
 {
+	unsigned long *begin = task_stack_page(task);
+	unsigned long *end   = task_stack_page(task) + THREAD_SIZE;
+
+	if (stack < begin || stack >= end)
+		return false;
+
+	info->type	= STACK_TYPE_TASK;
+	info->begin	= begin;
+	info->end	= end;
+	info->next_sp	= NULL;
+
+	return true;
+}
+
+static void printk_stack_address(unsigned long address, int reliable,
+				 char *log_lvl)
+{
+	touch_nmi_watchdog();
 	printk("%s [<%p>] %s%pB\n",
-		(char *)data, (void *)address, reliable ? "" : "? ",
+		log_lvl, (void *)address, reliable ? "" : "? ",
 		(void *)address);
 }
 
@@ -38,176 +56,120 @@
 	pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
 }
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-static void
-print_ftrace_graph_addr(unsigned long addr, void *data,
-			const struct stacktrace_ops *ops,
-			struct task_struct *task, int *graph)
+void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+			unsigned long *stack, char *log_lvl)
 {
-	unsigned long ret_addr;
-	int index;
+	struct unwind_state state;
+	struct stack_info stack_info = {0};
+	unsigned long visit_mask = 0;
+	int graph_idx = 0;
 
-	if (addr != (unsigned long)return_to_handler)
-		return;
+	printk("%sCall Trace:\n", log_lvl);
 
-	index = task->curr_ret_stack;
-
-	if (!task->ret_stack || index < *graph)
-		return;
-
-	index -= *graph;
-	ret_addr = task->ret_stack[index].ret;
-
-	ops->address(data, ret_addr, 1);
-
-	(*graph)++;
-}
-#else
-static inline void
-print_ftrace_graph_addr(unsigned long addr, void *data,
-			const struct stacktrace_ops *ops,
-			struct task_struct *task, int *graph)
-{ }
-#endif
-
-/*
- * x86-64 can have up to three kernel stacks:
- * process stack
- * interrupt stack
- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
- */
-
-static inline int valid_stack_ptr(struct task_struct *task,
-			void *p, unsigned int size, void *end)
-{
-	void *t = task_stack_page(task);
-	if (end) {
-		if (p < end && p >= (end-THREAD_SIZE))
-			return 1;
-		else
-			return 0;
-	}
-	return p >= t && p < t + THREAD_SIZE - size;
-}
-
-unsigned long
-print_context_stack(struct task_struct *task,
-		unsigned long *stack, unsigned long bp,
-		const struct stacktrace_ops *ops, void *data,
-		unsigned long *end, int *graph)
-{
-	struct stack_frame *frame = (struct stack_frame *)bp;
+	unwind_start(&state, task, regs, stack);
 
 	/*
-	 * If we overflowed the stack into a guard page, jump back to the
-	 * bottom of the usable stack.
+	 * Iterate through the stacks, starting with the current stack pointer.
+	 * Each stack has a pointer to the next one.
+	 *
+	 * x86-64 can have several stacks:
+	 * - task stack
+	 * - interrupt stack
+	 * - HW exception stacks (double fault, nmi, debug, mce)
+	 *
+	 * x86-32 can have up to three stacks:
+	 * - task stack
+	 * - softirq stack
+	 * - hardirq stack
 	 */
-	if ((unsigned long)task_stack_page(task) - (unsigned long)stack <
-	    PAGE_SIZE)
-		stack = (unsigned long *)task_stack_page(task);
+	for (; stack; stack = stack_info.next_sp) {
+		const char *str_begin, *str_end;
 
-	while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
-		unsigned long addr;
+		/*
+		 * If we overflowed the task stack into a guard page, jump back
+		 * to the bottom of the usable stack.
+		 */
+		if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
+			stack = task_stack_page(task);
 
-		addr = *stack;
-		if (__kernel_text_address(addr)) {
-			if ((unsigned long) stack == bp + sizeof(long)) {
-				ops->address(data, addr, 1);
-				frame = frame->next_frame;
-				bp = (unsigned long) frame;
-			} else {
-				ops->address(data, addr, 0);
-			}
-			print_ftrace_graph_addr(addr, data, ops, task, graph);
+		if (get_stack_info(stack, task, &stack_info, &visit_mask))
+			break;
+
+		stack_type_str(stack_info.type, &str_begin, &str_end);
+		if (str_begin)
+			printk("%s <%s> ", log_lvl, str_begin);
+
+		/*
+		 * Scan the stack, printing any text addresses we find.  At the
+		 * same time, follow proper stack frames with the unwinder.
+		 *
+		 * Addresses found during the scan which are not reported by
+		 * the unwinder are considered to be additional clues which are
+		 * sometimes useful for debugging and are prefixed with '?'.
+		 * This also serves as a failsafe option in case the unwinder
+		 * goes off in the weeds.
+		 */
+		for (; stack < stack_info.end; stack++) {
+			unsigned long real_addr;
+			int reliable = 0;
+			unsigned long addr = *stack;
+			unsigned long *ret_addr_p =
+				unwind_get_return_address_ptr(&state);
+
+			if (!__kernel_text_address(addr))
+				continue;
+
+			if (stack == ret_addr_p)
+				reliable = 1;
+
+			/*
+			 * When function graph tracing is enabled for a
+			 * function, its return address on the stack is
+			 * replaced with the address of an ftrace handler
+			 * (return_to_handler).  In that case, before printing
+			 * the "real" address, we want to print the handler
+			 * address as an "unreliable" hint that function graph
+			 * tracing was involved.
+			 */
+			real_addr = ftrace_graph_ret_addr(task, &graph_idx,
+							  addr, stack);
+			if (real_addr != addr)
+				printk_stack_address(addr, 0, log_lvl);
+			printk_stack_address(real_addr, reliable, log_lvl);
+
+			if (!reliable)
+				continue;
+
+			/*
+			 * Get the next frame from the unwinder.  No need to
+			 * check for an error: if anything goes wrong, the rest
+			 * of the addresses will just be printed as unreliable.
+			 */
+			unwind_next_frame(&state);
 		}
-		stack++;
+
+		if (str_end)
+			printk("%s <%s> ", log_lvl, str_end);
 	}
-	return bp;
-}
-EXPORT_SYMBOL_GPL(print_context_stack);
-
-unsigned long
-print_context_stack_bp(struct task_struct *task,
-		       unsigned long *stack, unsigned long bp,
-		       const struct stacktrace_ops *ops, void *data,
-		       unsigned long *end, int *graph)
-{
-	struct stack_frame *frame = (struct stack_frame *)bp;
-	unsigned long *ret_addr = &frame->return_address;
-
-	while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
-		unsigned long addr = *ret_addr;
-
-		if (!__kernel_text_address(addr))
-			break;
-
-		if (ops->address(data, addr, 1))
-			break;
-		frame = frame->next_frame;
-		ret_addr = &frame->return_address;
-		print_ftrace_graph_addr(addr, data, ops, task, graph);
-	}
-
-	return (unsigned long)frame;
-}
-EXPORT_SYMBOL_GPL(print_context_stack_bp);
-
-static int print_trace_stack(void *data, char *name)
-{
-	printk("%s <%s> ", (char *)data, name);
-	return 0;
-}
-
-/*
- * Print one address/symbol entries per line.
- */
-static int print_trace_address(void *data, unsigned long addr, int reliable)
-{
-	touch_nmi_watchdog();
-	printk_stack_address(addr, reliable, data);
-	return 0;
-}
-
-static const struct stacktrace_ops print_trace_ops = {
-	.stack			= print_trace_stack,
-	.address		= print_trace_address,
-	.walk_stack		= print_context_stack,
-};
-
-void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl)
-{
-	printk("%sCall Trace:\n", log_lvl);
-	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
-}
-
-void show_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp)
-{
-	show_trace_log_lvl(task, regs, stack, bp, "");
 }
 
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
-	unsigned long bp = 0;
-	unsigned long stack;
+	task = task ? : current;
 
 	/*
 	 * Stack frames below this one aren't interesting.  Don't show them
 	 * if we're printing for %current.
 	 */
-	if (!sp && (!task || task == current)) {
-		sp = &stack;
-		bp = stack_frame(current, NULL);
-	}
+	if (!sp && task == current)
+		sp = get_stack_pointer(current, NULL);
 
-	show_stack_log_lvl(task, NULL, sp, bp, "");
+	show_stack_log_lvl(task, NULL, sp, "");
 }
 
 void show_stack_regs(struct pt_regs *regs)
 {
-	show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, "");
+	show_stack_log_lvl(current, regs, NULL, "");
 }
 
 static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 0967571..06eb322 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,93 +16,121 @@
 
 #include <asm/stacktrace.h>
 
-static void *is_irq_stack(void *p, void *irq)
+void stack_type_str(enum stack_type type, const char **begin, const char **end)
 {
-	if (p < irq || p >= (irq + THREAD_SIZE))
-		return NULL;
-	return irq + THREAD_SIZE;
+	switch (type) {
+	case STACK_TYPE_IRQ:
+	case STACK_TYPE_SOFTIRQ:
+		*begin = "IRQ";
+		*end   = "EOI";
+		break;
+	default:
+		*begin = NULL;
+		*end   = NULL;
+	}
 }
 
-
-static void *is_hardirq_stack(unsigned long *stack, int cpu)
+static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
 {
-	void *irq = per_cpu(hardirq_stack, cpu);
+	unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
+	unsigned long *end   = begin + (THREAD_SIZE / sizeof(long));
 
-	return is_irq_stack(stack, irq);
+	/*
+	 * This is a software stack, so 'end' can be a valid stack pointer.
+	 * It just means the stack is empty.
+	 */
+	if (stack < begin || stack > end)
+		return false;
+
+	info->type	= STACK_TYPE_IRQ;
+	info->begin	= begin;
+	info->end	= end;
+
+	/*
+	 * See irq_32.c -- the next stack pointer is stored at the beginning of
+	 * the stack.
+	 */
+	info->next_sp	= (unsigned long *)*begin;
+
+	return true;
 }
 
-static void *is_softirq_stack(unsigned long *stack, int cpu)
+static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
 {
-	void *irq = per_cpu(softirq_stack, cpu);
+	unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
+	unsigned long *end   = begin + (THREAD_SIZE / sizeof(long));
 
-	return is_irq_stack(stack, irq);
+	/*
+	 * This is a software stack, so 'end' can be a valid stack pointer.
+	 * It just means the stack is empty.
+	 */
+	if (stack < begin || stack > end)
+		return false;
+
+	info->type	= STACK_TYPE_SOFTIRQ;
+	info->begin	= begin;
+	info->end	= end;
+
+	/*
+	 * The next stack pointer is stored at the beginning of the stack.
+	 * See irq_32.c.
+	 */
+	info->next_sp	= (unsigned long *)*begin;
+
+	return true;
 }
 
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
-		const struct stacktrace_ops *ops, void *data)
+int get_stack_info(unsigned long *stack, struct task_struct *task,
+		   struct stack_info *info, unsigned long *visit_mask)
 {
-	const unsigned cpu = get_cpu();
-	int graph = 0;
-	u32 *prev_esp;
+	if (!stack)
+		goto unknown;
 
-	if (!task)
-		task = current;
+	task = task ? : current;
 
-	if (!stack) {
-		unsigned long dummy;
+	if (in_task_stack(stack, task, info))
+		goto recursion_check;
 
-		stack = &dummy;
-		if (task != current)
-			stack = (unsigned long *)task->thread.sp;
+	if (task != current)
+		goto unknown;
+
+	if (in_hardirq_stack(stack, info))
+		goto recursion_check;
+
+	if (in_softirq_stack(stack, info))
+		goto recursion_check;
+
+	goto unknown;
+
+recursion_check:
+	/*
+	 * Make sure we don't iterate through any given stack more than once.
+	 * If it comes up a second time then there's something wrong going on:
+	 * just break out and report an unknown stack type.
+	 */
+	if (visit_mask) {
+		if (*visit_mask & (1UL << info->type))
+			goto unknown;
+		*visit_mask |= 1UL << info->type;
 	}
 
-	if (!bp)
-		bp = stack_frame(task, regs);
+	return 0;
 
-	for (;;) {
-		void *end_stack;
-
-		end_stack = is_hardirq_stack(stack, cpu);
-		if (!end_stack)
-			end_stack = is_softirq_stack(stack, cpu);
-
-		bp = ops->walk_stack(task, stack, bp, ops, data,
-				     end_stack, &graph);
-
-		/* Stop if not on irq stack */
-		if (!end_stack)
-			break;
-
-		/* The previous esp is saved on the bottom of the stack */
-		prev_esp = (u32 *)(end_stack - THREAD_SIZE);
-		stack = (unsigned long *)*prev_esp;
-		if (!stack)
-			break;
-
-		if (ops->stack(data, "IRQ") < 0)
-			break;
-		touch_nmi_watchdog();
-	}
-	put_cpu();
+unknown:
+	info->type = STACK_TYPE_UNKNOWN;
+	return -EINVAL;
 }
-EXPORT_SYMBOL(dump_trace);
 
-void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *sp, unsigned long bp, char *log_lvl)
+void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+			unsigned long *sp, char *log_lvl)
 {
 	unsigned long *stack;
 	int i;
 
-	if (sp == NULL) {
-		if (regs)
-			sp = (unsigned long *)regs->sp;
-		else if (task)
-			sp = (unsigned long *)task->thread.sp;
-		else
-			sp = (unsigned long *)&sp;
-	}
+	if (!try_get_task_stack(task))
+		return;
+
+	sp = sp ? : get_stack_pointer(task, regs);
 
 	stack = sp;
 	for (i = 0; i < kstack_depth_to_print; i++) {
@@ -117,7 +145,9 @@
 		touch_nmi_watchdog();
 	}
 	pr_cont("\n");
-	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, log_lvl);
+
+	put_task_stack(task);
 }
 
 
@@ -139,7 +169,7 @@
 		u8 *ip;
 
 		pr_emerg("Stack:\n");
-		show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
+		show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
 
 		pr_emerg("Code:");
 
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 9ee4520..36cf1a4 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,261 +16,145 @@
 
 #include <asm/stacktrace.h>
 
-
-#define N_EXCEPTION_STACKS_END \
-		(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
-
-static char x86_stack_ids[][8] = {
-		[ DEBUG_STACK-1			]	= "#DB",
-		[ NMI_STACK-1			]	= "NMI",
-		[ DOUBLEFAULT_STACK-1		]	= "#DF",
-		[ MCE_STACK-1			]	= "#MC",
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
-		[ N_EXCEPTION_STACKS ...
-		  N_EXCEPTION_STACKS_END	]	= "#DB[?]"
-#endif
+static char *exception_stack_names[N_EXCEPTION_STACKS] = {
+		[ DOUBLEFAULT_STACK-1	]	= "#DF",
+		[ NMI_STACK-1		]	= "NMI",
+		[ DEBUG_STACK-1		]	= "#DB",
+		[ MCE_STACK-1		]	= "#MC",
 };
 
-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
-					 unsigned *usedp, char **idp)
+static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
+	[0 ... N_EXCEPTION_STACKS - 1]		= EXCEPTION_STKSZ,
+	[DEBUG_STACK - 1]			= DEBUG_STKSZ
+};
+
+void stack_type_str(enum stack_type type, const char **begin, const char **end)
 {
+	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+
+	switch (type) {
+	case STACK_TYPE_IRQ:
+		*begin = "IRQ";
+		*end   = "EOI";
+		break;
+	case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
+		*begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
+		*end   = "EOE";
+		break;
+	default:
+		*begin = NULL;
+		*end   = NULL;
+	}
+}
+
+static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
+{
+	unsigned long *begin, *end;
+	struct pt_regs *regs;
 	unsigned k;
 
-	/*
-	 * Iterate over all exception stacks, and figure out whether
-	 * 'stack' is in one of them:
-	 */
+	BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+
 	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
-		unsigned long end = per_cpu(orig_ist, cpu).ist[k];
-		/*
-		 * Is 'stack' above this exception frame's end?
-		 * If yes then skip to the next frame.
-		 */
-		if (stack >= end)
+		end   = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
+		begin = end - (exception_stack_sizes[k] / sizeof(long));
+		regs  = (struct pt_regs *)end - 1;
+
+		if (stack < begin || stack >= end)
 			continue;
-		/*
-		 * Is 'stack' above this exception frame's start address?
-		 * If yes then we found the right frame.
-		 */
-		if (stack >= end - EXCEPTION_STKSZ) {
-			/*
-			 * Make sure we only iterate through an exception
-			 * stack once. If it comes up for the second time
-			 * then there's something wrong going on - just
-			 * break out and return NULL:
-			 */
-			if (*usedp & (1U << k))
-				break;
-			*usedp |= 1U << k;
-			*idp = x86_stack_ids[k];
-			return (unsigned long *)end;
-		}
-		/*
-		 * If this is a debug stack, and if it has a larger size than
-		 * the usual exception stacks, then 'stack' might still
-		 * be within the lower portion of the debug stack:
-		 */
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
-		if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
-			unsigned j = N_EXCEPTION_STACKS - 1;
 
-			/*
-			 * Black magic. A large debug stack is composed of
-			 * multiple exception stack entries, which we
-			 * iterate through now. Dont look:
-			 */
-			do {
-				++j;
-				end -= EXCEPTION_STKSZ;
-				x86_stack_ids[j][4] = '1' +
-						(j - N_EXCEPTION_STACKS);
-			} while (stack < end - EXCEPTION_STKSZ);
-			if (*usedp & (1U << j))
-				break;
-			*usedp |= 1U << j;
-			*idp = x86_stack_ids[j];
-			return (unsigned long *)end;
-		}
-#endif
-	}
-	return NULL;
-}
+		info->type	= STACK_TYPE_EXCEPTION + k;
+		info->begin	= begin;
+		info->end	= end;
+		info->next_sp	= (unsigned long *)regs->sp;
 
-static inline int
-in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
-	     unsigned long *irq_stack_end)
-{
-	return (stack >= irq_stack && stack < irq_stack_end);
-}
-
-static const unsigned long irq_stack_size =
-	(IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
-
-enum stack_type {
-	STACK_IS_UNKNOWN,
-	STACK_IS_NORMAL,
-	STACK_IS_EXCEPTION,
-	STACK_IS_IRQ,
-};
-
-static enum stack_type
-analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
-	      unsigned long **stack_end, unsigned long *irq_stack,
-	      unsigned *used, char **id)
-{
-	unsigned long addr;
-
-	addr = ((unsigned long)stack & (~(THREAD_SIZE - 1)));
-	if ((unsigned long)task_stack_page(task) == addr)
-		return STACK_IS_NORMAL;
-
-	*stack_end = in_exception_stack(cpu, (unsigned long)stack,
-					used, id);
-	if (*stack_end)
-		return STACK_IS_EXCEPTION;
-
-	if (!irq_stack)
-		return STACK_IS_NORMAL;
-
-	*stack_end = irq_stack;
-	irq_stack = irq_stack - irq_stack_size;
-
-	if (in_irq_stack(stack, irq_stack, *stack_end))
-		return STACK_IS_IRQ;
-
-	return STACK_IS_UNKNOWN;
-}
-
-/*
- * x86-64 can have up to three kernel stacks:
- * process stack
- * interrupt stack
- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
- */
-
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp,
-		const struct stacktrace_ops *ops, void *data)
-{
-	const unsigned cpu = get_cpu();
-	unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
-	unsigned long dummy;
-	unsigned used = 0;
-	int graph = 0;
-	int done = 0;
-
-	if (!task)
-		task = current;
-
-	if (!stack) {
-		if (regs)
-			stack = (unsigned long *)regs->sp;
-		else if (task != current)
-			stack = (unsigned long *)task->thread.sp;
-		else
-			stack = &dummy;
+		return true;
 	}
 
-	if (!bp)
-		bp = stack_frame(task, regs);
-	/*
-	 * Print function call entries in all stacks, starting at the
-	 * current stack address. If the stacks consist of nested
-	 * exceptions
-	 */
-	while (!done) {
-		unsigned long *stack_end;
-		enum stack_type stype;
-		char *id;
+	return false;
+}
 
-		stype = analyze_stack(cpu, task, stack, &stack_end,
-				      irq_stack, &used, &id);
-
-		/* Default finish unless specified to continue */
-		done = 1;
-
-		switch (stype) {
-
-		/* Break out early if we are on the thread stack */
-		case STACK_IS_NORMAL:
-			break;
-
-		case STACK_IS_EXCEPTION:
-
-			if (ops->stack(data, id) < 0)
-				break;
-
-			bp = ops->walk_stack(task, stack, bp, ops,
-					     data, stack_end, &graph);
-			ops->stack(data, "<EOE>");
-			/*
-			 * We link to the next stack via the
-			 * second-to-last pointer (index -2 to end) in the
-			 * exception stack:
-			 */
-			stack = (unsigned long *) stack_end[-2];
-			done = 0;
-			break;
-
-		case STACK_IS_IRQ:
-
-			if (ops->stack(data, "IRQ") < 0)
-				break;
-			bp = ops->walk_stack(task, stack, bp,
-				     ops, data, stack_end, &graph);
-			/*
-			 * We link to the next stack (which would be
-			 * the process stack normally) the last
-			 * pointer (index -1 to end) in the IRQ stack:
-			 */
-			stack = (unsigned long *) (stack_end[-1]);
-			irq_stack = NULL;
-			ops->stack(data, "EOI");
-			done = 0;
-			break;
-
-		case STACK_IS_UNKNOWN:
-			ops->stack(data, "UNK");
-			break;
-		}
-	}
+static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
+{
+	unsigned long *end   = (unsigned long *)this_cpu_read(irq_stack_ptr);
+	unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
 
 	/*
-	 * This handles the process stack:
+	 * This is a software stack, so 'end' can be a valid stack pointer.
+	 * It just means the stack is empty.
 	 */
-	bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph);
-	put_cpu();
-}
-EXPORT_SYMBOL(dump_trace);
+	if (stack < begin || stack > end)
+		return false;
 
-void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		   unsigned long *sp, unsigned long bp, char *log_lvl)
+	info->type	= STACK_TYPE_IRQ;
+	info->begin	= begin;
+	info->end	= end;
+
+	/*
+	 * The next stack pointer is the first thing pushed by the entry code
+	 * after switching to the irq stack.
+	 */
+	info->next_sp = (unsigned long *)*(end - 1);
+
+	return true;
+}
+
+int get_stack_info(unsigned long *stack, struct task_struct *task,
+		   struct stack_info *info, unsigned long *visit_mask)
+{
+	if (!stack)
+		goto unknown;
+
+	task = task ? : current;
+
+	if (in_task_stack(stack, task, info))
+		goto recursion_check;
+
+	if (task != current)
+		goto unknown;
+
+	if (in_exception_stack(stack, info))
+		goto recursion_check;
+
+	if (in_irq_stack(stack, info))
+		goto recursion_check;
+
+	goto unknown;
+
+recursion_check:
+	/*
+	 * Make sure we don't iterate through any given stack more than once.
+	 * If it comes up a second time then there's something wrong going on:
+	 * just break out and report an unknown stack type.
+	 */
+	if (visit_mask) {
+		if (*visit_mask & (1UL << info->type))
+			goto unknown;
+		*visit_mask |= 1UL << info->type;
+	}
+
+	return 0;
+
+unknown:
+	info->type = STACK_TYPE_UNKNOWN;
+	return -EINVAL;
+}
+
+void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+			unsigned long *sp, char *log_lvl)
 {
 	unsigned long *irq_stack_end;
 	unsigned long *irq_stack;
 	unsigned long *stack;
-	int cpu;
 	int i;
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	if (!try_get_task_stack(task))
+		return;
 
-	irq_stack_end	= (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
-	irq_stack	= (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
+	irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
+	irq_stack     = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
 
-	/*
-	 * Debugging aid: "show_stack(NULL, NULL);" prints the
-	 * back trace for this cpu:
-	 */
-	if (sp == NULL) {
-		if (regs)
-			sp = (unsigned long *)regs->sp;
-		else if (task)
-			sp = (unsigned long *)task->thread.sp;
-		else
-			sp = (unsigned long *)&sp;
-	}
+	sp = sp ? : get_stack_pointer(task, regs);
 
 	stack = sp;
 	for (i = 0; i < kstack_depth_to_print; i++) {
@@ -299,18 +183,17 @@
 		stack++;
 		touch_nmi_watchdog();
 	}
-	preempt_enable();
 
 	pr_cont("\n");
-	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+	show_trace_log_lvl(task, regs, sp, log_lvl);
+
+	put_task_stack(task);
 }
 
 void show_regs(struct pt_regs *regs)
 {
 	int i;
-	unsigned long sp;
 
-	sp = regs->sp;
 	show_regs_print_info(KERN_DEFAULT);
 	__show_regs(regs, 1);
 
@@ -325,8 +208,7 @@
 		u8 *ip;
 
 		printk(KERN_DEFAULT "Stack:\n");
-		show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
-				   0, KERN_DEFAULT);
+		show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
 
 		printk(KERN_DEFAULT "Code: ");
 
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 621b501..b85fe5f 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -40,8 +40,10 @@
  * user can e.g. boot the original kernel with mem=1G while still booting the
  * next kernel with full memory.
  */
-struct e820map e820;
-struct e820map e820_saved;
+static struct e820map initial_e820  __initdata;
+static struct e820map initial_e820_saved  __initdata;
+struct e820map *e820 __refdata = &initial_e820;
+struct e820map *e820_saved __refdata = &initial_e820_saved;
 
 /* For PCI or other memory-mapped resources */
 unsigned long pci_mem_start = 0xaeedbabe;
@@ -58,8 +60,8 @@
 {
 	int i;
 
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *ei = &e820->map[i];
 
 		if (type && ei->type != type)
 			continue;
@@ -81,8 +83,8 @@
 {
 	int i;
 
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *ei = &e820->map[i];
 
 		if (type && ei->type != type)
 			continue;
@@ -128,7 +130,7 @@
 
 void __init e820_add_region(u64 start, u64 size, int type)
 {
-	__e820_add_region(&e820, start, size, type);
+	__e820_add_region(e820, start, size, type);
 }
 
 static void __init e820_print_type(u32 type)
@@ -164,12 +166,12 @@
 {
 	int i;
 
-	for (i = 0; i < e820.nr_map; i++) {
+	for (i = 0; i < e820->nr_map; i++) {
 		printk(KERN_INFO "%s: [mem %#018Lx-%#018Lx] ", who,
-		       (unsigned long long) e820.map[i].addr,
+		       (unsigned long long) e820->map[i].addr,
 		       (unsigned long long)
-		       (e820.map[i].addr + e820.map[i].size - 1));
-		e820_print_type(e820.map[i].type);
+		       (e820->map[i].addr + e820->map[i].size - 1));
+		e820_print_type(e820->map[i].type);
 		printk(KERN_CONT "\n");
 	}
 }
@@ -348,7 +350,7 @@
 		 * continue building up new bios map based on this
 		 * information
 		 */
-		if (current_type != last_type || current_type == E820_PRAM) {
+		if (current_type != last_type) {
 			if (last_type != 0)	 {
 				new_bios[new_bios_entry].size =
 					change_point[chgidx]->addr - last_addr;
@@ -388,11 +390,11 @@
 	while (nr_map) {
 		u64 start = biosmap->addr;
 		u64 size = biosmap->size;
-		u64 end = start + size;
+		u64 end = start + size - 1;
 		u32 type = biosmap->type;
 
 		/* Overflow in 64 bits? Ignore the memory map. */
-		if (start > end)
+		if (start > end && likely(size))
 			return -1;
 
 		e820_add_region(start, size, type);
@@ -493,13 +495,13 @@
 u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
 			     unsigned new_type)
 {
-	return __e820_update_range(&e820, start, size, old_type, new_type);
+	return __e820_update_range(e820, start, size, old_type, new_type);
 }
 
 static u64 __init e820_update_range_saved(u64 start, u64 size,
 					  unsigned old_type, unsigned new_type)
 {
-	return __e820_update_range(&e820_saved, start, size, old_type,
+	return __e820_update_range(e820_saved, start, size, old_type,
 				     new_type);
 }
 
@@ -521,8 +523,8 @@
 		e820_print_type(old_type);
 	printk(KERN_CONT "\n");
 
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *ei = &e820->map[i];
 		u64 final_start, final_end;
 		u64 ei_end;
 
@@ -566,15 +568,15 @@
 
 void __init update_e820(void)
 {
-	if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map))
+	if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map))
 		return;
 	printk(KERN_INFO "e820: modified physical RAM map:\n");
 	e820_print_map("modified");
 }
 static void __init update_e820_saved(void)
 {
-	sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map),
-				&e820_saved.nr_map);
+	sanitize_e820_map(e820_saved->map, ARRAY_SIZE(e820_saved->map),
+				&e820_saved->nr_map);
 }
 #define MAX_GAP_END 0x100000000ull
 /*
@@ -584,14 +586,14 @@
 		unsigned long start_addr, unsigned long long end_addr)
 {
 	unsigned long long last;
-	int i = e820.nr_map;
+	int i = e820->nr_map;
 	int found = 0;
 
 	last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
 
 	while (--i >= 0) {
-		unsigned long long start = e820.map[i].addr;
-		unsigned long long end = start + e820.map[i].size;
+		unsigned long long start = e820->map[i].addr;
+		unsigned long long end = start + e820->map[i].size;
 
 		if (end < start_addr)
 			continue;
@@ -649,6 +651,33 @@
 	       gapstart, gapstart + gapsize - 1);
 }
 
+/*
+ * Called late during init, in free_initmem().
+ *
+ * Initial e820 and e820_saved are largish __initdata arrays.
+ * Copy them to (usually much smaller) dynamically allocated area.
+ * This is done after all tweaks we ever do to them:
+ * all functions which modify them are __init functions,
+ * they won't exist after this point.
+ */
+__init void e820_reallocate_tables(void)
+{
+	struct e820map *n;
+	int size;
+
+	size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820->nr_map;
+	n = kmalloc(size, GFP_KERNEL);
+	BUG_ON(!n);
+	memcpy(n, e820, size);
+	e820 = n;
+
+	size = offsetof(struct e820map, map) + sizeof(struct e820entry) * e820_saved->nr_map;
+	n = kmalloc(size, GFP_KERNEL);
+	BUG_ON(!n);
+	memcpy(n, e820_saved, size);
+	e820_saved = n;
+}
+
 /**
  * Because of the size limitation of struct boot_params, only first
  * 128 E820 memory entries are passed to kernel via
@@ -665,7 +694,7 @@
 	entries = sdata->len / sizeof(struct e820entry);
 	extmap = (struct e820entry *)(sdata->data);
 	__append_e820_map(extmap, entries);
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
 	early_memunmap(sdata, data_len);
 	printk(KERN_INFO "e820: extended physical RAM map:\n");
 	e820_print_map("extended");
@@ -686,8 +715,8 @@
 	int i;
 	unsigned long pfn = 0;
 
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *ei = &e820->map[i];
 
 		if (pfn < PFN_UP(ei->addr))
 			register_nosave_region(pfn, PFN_UP(ei->addr));
@@ -712,8 +741,8 @@
 {
 	int i;
 
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *ei = &e820->map[i];
 
 		if (ei->type == E820_NVS)
 			acpi_nvs_register(ei->addr, ei->size);
@@ -754,22 +783,18 @@
 /*
  * Find the highest page frame number we have available
  */
-static unsigned long __init e820_end_pfn(unsigned long limit_pfn)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 {
 	int i;
 	unsigned long last_pfn = 0;
 	unsigned long max_arch_pfn = MAX_ARCH_PFN;
 
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *ei = &e820->map[i];
 		unsigned long start_pfn;
 		unsigned long end_pfn;
 
-		/*
-		 * Persistent memory is accounted as ram for purposes of
-		 * establishing max_pfn and mem_map.
-		 */
-		if (ei->type != E820_RAM && ei->type != E820_PRAM)
+		if (ei->type != type)
 			continue;
 
 		start_pfn = ei->addr >> PAGE_SHIFT;
@@ -794,15 +819,15 @@
 }
 unsigned long __init e820_end_of_ram_pfn(void)
 {
-	return e820_end_pfn(MAX_ARCH_PFN);
+	return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
 }
 
 unsigned long __init e820_end_of_low_ram_pfn(void)
 {
-	return e820_end_pfn(1UL << (32-PAGE_SHIFT));
+	return e820_end_pfn(1UL << (32 - PAGE_SHIFT), E820_RAM);
 }
 
-static void early_panic(char *msg)
+static void __init early_panic(char *msg)
 {
 	early_printk(msg);
 	panic(msg);
@@ -856,7 +881,7 @@
 		 */
 		saved_max_pfn = e820_end_of_ram_pfn();
 #endif
-		e820.nr_map = 0;
+		e820->nr_map = 0;
 		userdef = 1;
 		return 0;
 	}
@@ -903,8 +928,8 @@
 void __init finish_e820_parsing(void)
 {
 	if (userdef) {
-		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map),
-					&e820.nr_map) < 0)
+		if (sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map),
+					&e820->nr_map) < 0)
 			early_panic("Invalid user supplied memory map");
 
 		printk(KERN_INFO "e820: user-defined physical RAM map:\n");
@@ -912,7 +937,7 @@
 	}
 }
 
-static const char *e820_type_to_string(int e820_type)
+static const char *__init e820_type_to_string(int e820_type)
 {
 	switch (e820_type) {
 	case E820_RESERVED_KERN:
@@ -926,7 +951,7 @@
 	}
 }
 
-static unsigned long e820_type_to_iomem_type(int e820_type)
+static unsigned long __init e820_type_to_iomem_type(int e820_type)
 {
 	switch (e820_type) {
 	case E820_RESERVED_KERN:
@@ -942,7 +967,7 @@
 	}
 }
 
-static unsigned long e820_type_to_iores_desc(int e820_type)
+static unsigned long __init e820_type_to_iores_desc(int e820_type)
 {
 	switch (e820_type) {
 	case E820_ACPI:
@@ -961,7 +986,7 @@
 	}
 }
 
-static bool do_mark_busy(u32 type, struct resource *res)
+static bool __init do_mark_busy(u32 type, struct resource *res)
 {
 	/* this is the legacy bios/dos rom-shadow + mmio region */
 	if (res->start < (1ULL<<20))
@@ -991,35 +1016,35 @@
 	struct resource *res;
 	u64 end;
 
-	res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
+	res = alloc_bootmem(sizeof(struct resource) * e820->nr_map);
 	e820_res = res;
-	for (i = 0; i < e820.nr_map; i++) {
-		end = e820.map[i].addr + e820.map[i].size - 1;
+	for (i = 0; i < e820->nr_map; i++) {
+		end = e820->map[i].addr + e820->map[i].size - 1;
 		if (end != (resource_size_t)end) {
 			res++;
 			continue;
 		}
-		res->name = e820_type_to_string(e820.map[i].type);
-		res->start = e820.map[i].addr;
+		res->name = e820_type_to_string(e820->map[i].type);
+		res->start = e820->map[i].addr;
 		res->end = end;
 
-		res->flags = e820_type_to_iomem_type(e820.map[i].type);
-		res->desc = e820_type_to_iores_desc(e820.map[i].type);
+		res->flags = e820_type_to_iomem_type(e820->map[i].type);
+		res->desc = e820_type_to_iores_desc(e820->map[i].type);
 
 		/*
 		 * don't register the region that could be conflicted with
 		 * pci device BAR resource and insert them later in
 		 * pcibios_resource_survey()
 		 */
-		if (do_mark_busy(e820.map[i].type, res)) {
+		if (do_mark_busy(e820->map[i].type, res)) {
 			res->flags |= IORESOURCE_BUSY;
 			insert_resource(&iomem_resource, res);
 		}
 		res++;
 	}
 
-	for (i = 0; i < e820_saved.nr_map; i++) {
-		struct e820entry *entry = &e820_saved.map[i];
+	for (i = 0; i < e820_saved->nr_map; i++) {
+		struct e820entry *entry = &e820_saved->map[i];
 		firmware_map_add_early(entry->addr,
 			entry->addr + entry->size,
 			e820_type_to_string(entry->type));
@@ -1027,7 +1052,7 @@
 }
 
 /* How much should we pad RAM ending depending on where it is? */
-static unsigned long ram_alignment(resource_size_t pos)
+static unsigned long __init ram_alignment(resource_size_t pos)
 {
 	unsigned long mb = pos >> 20;
 
@@ -1051,7 +1076,7 @@
 	struct resource *res;
 
 	res = e820_res;
-	for (i = 0; i < e820.nr_map; i++) {
+	for (i = 0; i < e820->nr_map; i++) {
 		if (!res->parent && res->end)
 			insert_resource_expand_to_fit(&iomem_resource, res);
 		res++;
@@ -1061,8 +1086,8 @@
 	 * Try to bump up RAM regions to reasonable boundaries to
 	 * avoid stolen RAM:
 	 */
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *entry = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *entry = &e820->map[i];
 		u64 start, end;
 
 		if (entry->type != E820_RAM)
@@ -1110,7 +1135,7 @@
 			who = "BIOS-e801";
 		}
 
-		e820.nr_map = 0;
+		e820->nr_map = 0;
 		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
 		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
 	}
@@ -1124,7 +1149,7 @@
 	char *who;
 
 	who = x86_init.resources.memory_setup();
-	memcpy(&e820_saved, &e820, sizeof(struct e820map));
+	memcpy(e820_saved, e820, sizeof(struct e820map));
 	printk(KERN_INFO "e820: BIOS-provided physical RAM map:\n");
 	e820_print_map(who);
 }
@@ -1141,8 +1166,8 @@
 	 */
 	memblock_allow_resize();
 
-	for (i = 0; i < e820.nr_map; i++) {
-		struct e820entry *ei = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		struct e820entry *ei = &e820->map[i];
 
 		end = ei->addr + ei->size;
 		if (end != (resource_size_t)end)
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index de7501e..18bb3a6 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -555,7 +555,7 @@
 
 	/* Mark this space as reserved */
 	e820_add_region(base, size, E820_RESERVED);
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
 }
 
 static void __init intel_graphics_quirks(int num, int slot, int func)
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 93982ae..2f2b8c7 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -317,7 +317,6 @@
 	on_boot_cpu = 0;
 
 	WARN_ON_FPU(current->thread.fpu.fpstate_active);
-	current_thread_info()->status = 0;
 
 	if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
 		eagerfpu = ENABLE;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 680049a..01567aa 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -866,105 +866,17 @@
 	return get_xsave_addr(&fpu->state.xsave, xsave_state);
 }
 
-
-/*
- * Set xfeatures (aka XSTATE_BV) bit for a feature that we want
- * to take out of its "init state".  This will ensure that an
- * XRSTOR actually restores the state.
- */
-static void fpu__xfeature_set_non_init(struct xregs_state *xsave,
-		int xstate_feature_mask)
-{
-	xsave->header.xfeatures |= xstate_feature_mask;
-}
-
-/*
- * This function is safe to call whether the FPU is in use or not.
- *
- * Note that this only works on the current task.
- *
- * Inputs:
- *	@xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- *	XFEATURE_MASK_SSE, etc...)
- *	@xsave_state_ptr: a pointer to a copy of the state that you would
- *	like written in to the current task's FPU xsave state.  This pointer
- *	must not be located in the current tasks's xsave area.
- * Output:
- *	address of the state in the xsave area or NULL if the state
- *	is not present or is in its 'init state'.
- */
-static void fpu__xfeature_set_state(int xstate_feature_mask,
-		void *xstate_feature_src, size_t len)
-{
-	struct xregs_state *xsave = &current->thread.fpu.state.xsave;
-	struct fpu *fpu = &current->thread.fpu;
-	void *dst;
-
-	if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
-		WARN_ONCE(1, "%s() attempted with no xsave support", __func__);
-		return;
-	}
-
-	/*
-	 * Tell the FPU code that we need the FPU state to be in
-	 * 'fpu' (not in the registers), and that we need it to
-	 * be stable while we write to it.
-	 */
-	fpu__current_fpstate_write_begin();
-
-	/*
-	 * This method *WILL* *NOT* work for compact-format
-	 * buffers.  If the 'xstate_feature_mask' is unset in
-	 * xcomp_bv then we may need to move other feature state
-	 * "up" in the buffer.
-	 */
-	if (xsave->header.xcomp_bv & xstate_feature_mask) {
-		WARN_ON_ONCE(1);
-		goto out;
-	}
-
-	/* find the location in the xsave buffer of the desired state */
-	dst = __raw_xsave_addr(&fpu->state.xsave, xstate_feature_mask);
-
-	/*
-	 * Make sure that the pointer being passed in did not
-	 * come from the xsave buffer itself.
-	 */
-	WARN_ONCE(xstate_feature_src == dst, "set from xsave buffer itself");
-
-	/* put the caller-provided data in the location */
-	memcpy(dst, xstate_feature_src, len);
-
-	/*
-	 * Mark the xfeature so that the CPU knows there is state
-	 * in the buffer now.
-	 */
-	fpu__xfeature_set_non_init(xsave, xstate_feature_mask);
-out:
-	/*
-	 * We are done writing to the 'fpu'.  Reenable preeption
-	 * and (possibly) move the fpstate back in to the fpregs.
-	 */
-	fpu__current_fpstate_write_end();
-}
-
 #define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2)
 #define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1)
 
 /*
- * This will go out and modify the XSAVE buffer so that PKRU is
- * set to a particular state for access to 'pkey'.
- *
- * PKRU state does affect kernel access to user memory.  We do
- * not modfiy PKRU *itself* here, only the XSAVE state that will
- * be restored in to PKRU when we return back to userspace.
+ * This will go out and modify PKRU register to set the access
+ * rights for @pkey to @init_val.
  */
 int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 		unsigned long init_val)
 {
-	struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
-	struct pkru_state *old_pkru_state;
-	struct pkru_state new_pkru_state;
+	u32 old_pkru;
 	int pkey_shift = (pkey * PKRU_BITS_PER_PKEY);
 	u32 new_pkru_bits = 0;
 
@@ -974,6 +886,15 @@
 	 */
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return -EINVAL;
+	/*
+	 * For most XSAVE components, this would be an arduous task:
+	 * brining fpstate up to date with fpregs, updating fpstate,
+	 * then re-populating fpregs.  But, for components that are
+	 * never lazily managed, we can just access the fpregs
+	 * directly.  PKRU is never managed lazily, so we can just
+	 * manipulate it directly.  Make sure it stays that way.
+	 */
+	WARN_ON_ONCE(!use_eager_fpu());
 
 	/* Set the bits we need in PKRU:  */
 	if (init_val & PKEY_DISABLE_ACCESS)
@@ -984,37 +905,12 @@
 	/* Shift the bits in to the correct place in PKRU for pkey: */
 	new_pkru_bits <<= pkey_shift;
 
-	/* Locate old copy of the state in the xsave buffer: */
-	old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU);
+	/* Get old PKRU and mask off any old bits in place: */
+	old_pkru = read_pkru();
+	old_pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
 
-	/*
-	 * When state is not in the buffer, it is in the init
-	 * state, set it manually.  Otherwise, copy out the old
-	 * state.
-	 */
-	if (!old_pkru_state)
-		new_pkru_state.pkru = 0;
-	else
-		new_pkru_state.pkru = old_pkru_state->pkru;
-
-	/* Mask off any old bits in place: */
-	new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift);
-
-	/* Set the newly-requested bits: */
-	new_pkru_state.pkru |= new_pkru_bits;
-
-	/*
-	 * We could theoretically live without zeroing pkru.pad.
-	 * The current XSAVE feature state definition says that
-	 * only bytes 0->3 are used.  But we do not want to
-	 * chance leaking kernel stack out to userspace in case a
-	 * memcpy() of the whole xsave buffer was done.
-	 *
-	 * They're in the same cacheline anyway.
-	 */
-	new_pkru_state.pad = 0;
-
-	fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, sizeof(new_pkru_state));
+	/* Write old part along with new part: */
+	write_pkru(old_pkru | new_pkru_bits);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index d036cfb..8639bb2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1029,7 +1029,7 @@
 	}
 
 	if (ftrace_push_return_trace(old, self_addr, &trace.depth,
-		    frame_pointer) == -EBUSY) {
+				     frame_pointer, parent) == -EBUSY) {
 		*parent = old;
 		return;
 	}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 2dda0bc..f16c55b 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -25,8 +25,6 @@
 	/* Initialize 32bit specific setup functions */
 	x86_init.resources.reserve_resources = i386_reserve_resources;
 	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
-
-	reserve_bios_regions();
 }
 
 asmlinkage __visible void __init i386_start_kernel(void)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 99d48e7..54a2372 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -183,7 +183,6 @@
 		copy_bootdata(__va(real_mode_data));
 
 	x86_early_init_platform_quirks();
-	reserve_bios_regions();
 
 	switch (boot_params.hdr.hardware_subarch) {
 	case X86_SUBARCH_INTEL_MID:
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 6f8902b..5f40126 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -94,7 +94,7 @@
  */
 __HEAD
 ENTRY(startup_32)
-	movl pa(stack_start),%ecx
+	movl pa(initial_stack),%ecx
 	
 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
 		us to not reload segments */
@@ -286,7 +286,7 @@
  * start_secondary().
  */
 ENTRY(start_cpu0)
-	movl stack_start, %ecx
+	movl initial_stack, %ecx
 	movl %ecx, %esp
 	jmp  *(initial_code)
 ENDPROC(start_cpu0)
@@ -307,7 +307,7 @@
 	movl %eax,%es
 	movl %eax,%fs
 	movl %eax,%gs
-	movl pa(stack_start),%ecx
+	movl pa(initial_stack),%ecx
 	movl %eax,%ss
 	leal -__PAGE_OFFSET(%ecx),%esp
 
@@ -703,7 +703,7 @@
 
 .data
 .balign 4
-ENTRY(stack_start)
+ENTRY(initial_stack)
 	.long init_thread_union+THREAD_SIZE
 
 __INITRODATA
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 9f8efc9f..c98a559 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -66,7 +66,7 @@
 	 */
 
 	/*
-	 * Setup stack for verify_cpu(). "-8" because stack_start is defined
+	 * Setup stack for verify_cpu(). "-8" because initial_stack is defined
 	 * this way, see below. Our best guess is a NULL ptr for stack
 	 * termination heuristics and we don't want to break anything which
 	 * might depend on it (kgdb, ...).
@@ -226,7 +226,7 @@
 	movq	%rax, %cr0
 
 	/* Setup a boot time stack */
-	movq stack_start(%rip), %rsp
+	movq initial_stack(%rip), %rsp
 
 	/* zero EFLAGS after setting rsp */
 	pushq $0
@@ -310,7 +310,7 @@
  * start_secondary().
  */
 ENTRY(start_cpu0)
-	movq stack_start(%rip),%rsp
+	movq initial_stack(%rip),%rsp
 	movq	initial_code(%rip),%rax
 	pushq	$0		# fake return address to stop unwinder
 	pushq	$__KERNEL_CS	# set correct cs
@@ -319,17 +319,15 @@
 ENDPROC(start_cpu0)
 #endif
 
-	/* SMP bootup changes these two */
+	/* Both SMP bootup and ACPI suspend change these variables */
 	__REFDATA
 	.balign	8
 	GLOBAL(initial_code)
 	.quad	x86_64_start_kernel
 	GLOBAL(initial_gs)
 	.quad	INIT_PER_CPU_VAR(irq_stack_union)
-
-	GLOBAL(stack_start)
+	GLOBAL(initial_stack)
 	.quad  init_thread_union+THREAD_SIZE-8
-	.word  0
 	__FINITDATA
 
 bad_address:
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ed16e58..274fab9 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -756,10 +756,104 @@
 /*
  * Clock source related code
  */
+#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
+/*
+ * Reading the HPET counter is a very slow operation. If a large number of
+ * CPUs are trying to access the HPET counter simultaneously, it can cause
+ * massive delay and slow down system performance dramatically. This may
+ * happen when HPET is the default clock source instead of TSC. For a
+ * really large system with hundreds of CPUs, the slowdown may be so
+ * severe that it may actually crash the system because of a NMI watchdog
+ * soft lockup, for example.
+ *
+ * If multiple CPUs are trying to access the HPET counter at the same time,
+ * we don't actually need to read the counter multiple times. Instead, the
+ * other CPUs can use the counter value read by the first CPU in the group.
+ *
+ * This special feature is only enabled on x86-64 systems. It is unlikely
+ * that 32-bit x86 systems will have enough CPUs to require this feature
+ * with its associated locking overhead. And we also need 64-bit atomic
+ * read.
+ *
+ * The lock and the hpet value are stored together and can be read in a
+ * single atomic 64-bit read. It is explicitly assumed that arch_spinlock_t
+ * is 32 bits in size.
+ */
+union hpet_lock {
+	struct {
+		arch_spinlock_t lock;
+		u32 value;
+	};
+	u64 lockval;
+};
+
+static union hpet_lock hpet __cacheline_aligned = {
+	{ .lock = __ARCH_SPIN_LOCK_UNLOCKED, },
+};
+
+static cycle_t read_hpet(struct clocksource *cs)
+{
+	unsigned long flags;
+	union hpet_lock old, new;
+
+	BUILD_BUG_ON(sizeof(union hpet_lock) != 8);
+
+	/*
+	 * Read HPET directly if in NMI.
+	 */
+	if (in_nmi())
+		return (cycle_t)hpet_readl(HPET_COUNTER);
+
+	/*
+	 * Read the current state of the lock and HPET value atomically.
+	 */
+	old.lockval = READ_ONCE(hpet.lockval);
+
+	if (arch_spin_is_locked(&old.lock))
+		goto contended;
+
+	local_irq_save(flags);
+	if (arch_spin_trylock(&hpet.lock)) {
+		new.value = hpet_readl(HPET_COUNTER);
+		/*
+		 * Use WRITE_ONCE() to prevent store tearing.
+		 */
+		WRITE_ONCE(hpet.value, new.value);
+		arch_spin_unlock(&hpet.lock);
+		local_irq_restore(flags);
+		return (cycle_t)new.value;
+	}
+	local_irq_restore(flags);
+
+contended:
+	/*
+	 * Contended case
+	 * --------------
+	 * Wait until the HPET value change or the lock is free to indicate
+	 * its value is up-to-date.
+	 *
+	 * It is possible that old.value has already contained the latest
+	 * HPET value while the lock holder was in the process of releasing
+	 * the lock. Checking for lock state change will enable us to return
+	 * the value immediately instead of waiting for the next HPET reader
+	 * to come along.
+	 */
+	do {
+		cpu_relax();
+		new.lockval = READ_ONCE(hpet.lockval);
+	} while ((new.value == old.value) && arch_spin_is_locked(&new.lock));
+
+	return (cycle_t)new.value;
+}
+#else
+/*
+ * For UP or 32-bit.
+ */
 static cycle_t read_hpet(struct clocksource *cs)
 {
 	return (cycle_t)hpet_readl(HPET_COUNTER);
 }
+#endif
 
 static struct clocksource clocksource_hpet = {
 	.name		= "hpet",
@@ -1242,7 +1336,7 @@
 	memset(&curr_time, 0, sizeof(struct rtc_time));
 
 	if (hpet_rtc_flags & (RTC_UIE | RTC_AIE))
-		mc146818_set_time(&curr_time);
+		mc146818_get_time(&curr_time);
 
 	if (hpet_rtc_flags & RTC_UIE &&
 	    curr_time.tm_sec != hpet_prev_update_sec) {
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 61521dc..9f669fd 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -102,8 +102,7 @@
 	seq_puts(p, "  Rescheduling interrupts\n");
 	seq_printf(p, "%*s: ", prec, "CAL");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
-					irq_stats(j)->irq_tlb_count);
+		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
 	seq_puts(p, "  Function call interrupts\n");
 	seq_printf(p, "%*s: ", prec, "TLB");
 	for_each_online_cpu(j)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 4a79037..9ebd0b0 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -40,8 +40,7 @@
 	if (user_mode(regs))
 		return;
 
-	if (regs->sp >= curbase + sizeof(struct thread_info) +
-				  sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
+	if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
 	    regs->sp <= curbase + THREAD_SIZE)
 		return;
 
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index f2356bd..3407b14 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -99,14 +99,14 @@
 {
 	unsigned int nr_e820_entries;
 
-	nr_e820_entries = e820_saved.nr_map;
+	nr_e820_entries = e820_saved->nr_map;
 
 	/* TODO: Pass entries more than E820MAX in bootparams setup data */
 	if (nr_e820_entries > E820MAX)
 		nr_e820_entries = E820MAX;
 
 	params->e820_entries = nr_e820_entries;
-	memcpy(&params->e820_map, &e820_saved.map,
+	memcpy(&params->e820_map, &e820_saved->map,
 	       nr_e820_entries * sizeof(struct e820entry));
 
 	return 0;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 04cde52..8e36f24 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -50,6 +50,7 @@
 #include <asm/apicdef.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
+#include <asm/switch_to.h>
 
 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
 {
@@ -166,21 +167,19 @@
 	gdb_regs[GDB_DX]	= 0;
 	gdb_regs[GDB_SI]	= 0;
 	gdb_regs[GDB_DI]	= 0;
-	gdb_regs[GDB_BP]	= *(unsigned long *)p->thread.sp;
+	gdb_regs[GDB_BP]	= ((struct inactive_task_frame *)p->thread.sp)->bp;
 #ifdef CONFIG_X86_32
 	gdb_regs[GDB_DS]	= __KERNEL_DS;
 	gdb_regs[GDB_ES]	= __KERNEL_DS;
 	gdb_regs[GDB_PS]	= 0;
 	gdb_regs[GDB_CS]	= __KERNEL_CS;
-	gdb_regs[GDB_PC]	= p->thread.ip;
 	gdb_regs[GDB_SS]	= __KERNEL_DS;
 	gdb_regs[GDB_FS]	= 0xFFFF;
 	gdb_regs[GDB_GS]	= 0xFFFF;
 #else
-	gdb_regs32[GDB_PS]	= *(unsigned long *)(p->thread.sp + 8);
+	gdb_regs32[GDB_PS]	= 0;
 	gdb_regs32[GDB_CS]	= __KERNEL_CS;
 	gdb_regs32[GDB_SS]	= __KERNEL_DS;
-	gdb_regs[GDB_PC]	= 0;
 	gdb_regs[GDB_R8]	= 0;
 	gdb_regs[GDB_R9]	= 0;
 	gdb_regs[GDB_R10]	= 0;
@@ -190,6 +189,7 @@
 	gdb_regs[GDB_R14]	= 0;
 	gdb_regs[GDB_R15]	= 0;
 #endif
+	gdb_regs[GDB_PC]	= 0;
 	gdb_regs[GDB_SP]	= p->thread.sp;
 }
 
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 7847e5c..28cee01 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -45,7 +45,7 @@
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 #include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4425f59..3bb4c5f 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 #include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/kdebug.h>
 #include <linux/kallsyms.h>
 #include <linux/ftrace.h>
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index c2bedae..4afc67f 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -184,7 +184,7 @@
 
 static struct kobj_attribute type_attr = __ATTR_RO(type);
 
-static struct bin_attribute data_attr = {
+static struct bin_attribute data_attr __ro_after_init = {
 	.attr = {
 		.name = "data",
 		.mode = S_IRUGO,
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1726c4c..edbbfc8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -423,12 +423,7 @@
 	kvm_spinlock_init();
 }
 
-static void kvm_guest_cpu_online(void *dummy)
-{
-	kvm_guest_cpu_init();
-}
-
-static void kvm_guest_cpu_offline(void *dummy)
+static void kvm_guest_cpu_offline(void)
 {
 	kvm_disable_steal_time();
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
@@ -437,29 +432,21 @@
 	apf_task_wake_all();
 }
 
-static int kvm_cpu_notify(struct notifier_block *self, unsigned long action,
-			  void *hcpu)
+static int kvm_cpu_online(unsigned int cpu)
 {
-	int cpu = (unsigned long)hcpu;
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-	case CPU_ONLINE_FROZEN:
-		smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
+	local_irq_disable();
+	kvm_guest_cpu_init();
+	local_irq_enable();
+	return 0;
 }
 
-static struct notifier_block kvm_cpu_notifier = {
-        .notifier_call  = kvm_cpu_notify,
-};
+static int kvm_cpu_down_prepare(unsigned int cpu)
+{
+	local_irq_disable();
+	kvm_guest_cpu_offline();
+	local_irq_enable();
+	return 0;
+}
 #endif
 
 static void __init kvm_apf_trap_init(void)
@@ -494,7 +481,9 @@
 
 #ifdef CONFIG_SMP
 	smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
-	register_cpu_notifier(&kvm_cpu_notifier);
+	if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
+				      kvm_cpu_online, kvm_cpu_down_prepare) < 0)
+		pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
 #else
 	kvm_guest_cpu_init();
 #endif
@@ -575,9 +564,6 @@
 	kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
 }
 
-
-#ifdef CONFIG_QUEUED_SPINLOCKS
-
 #include <asm/qspinlock.h>
 
 static void kvm_wait(u8 *ptr, u8 val)
@@ -606,243 +592,6 @@
 	local_irq_restore(flags);
 }
 
-#else /* !CONFIG_QUEUED_SPINLOCKS */
-
-enum kvm_contention_stat {
-	TAKEN_SLOW,
-	TAKEN_SLOW_PICKUP,
-	RELEASED_SLOW,
-	RELEASED_SLOW_KICKED,
-	NR_CONTENTION_STATS
-};
-
-#ifdef CONFIG_KVM_DEBUG_FS
-#define HISTO_BUCKETS	30
-
-static struct kvm_spinlock_stats
-{
-	u32 contention_stats[NR_CONTENTION_STATS];
-	u32 histo_spin_blocked[HISTO_BUCKETS+1];
-	u64 time_blocked;
-} spinlock_stats;
-
-static u8 zero_stats;
-
-static inline void check_zero(void)
-{
-	u8 ret;
-	u8 old;
-
-	old = READ_ONCE(zero_stats);
-	if (unlikely(old)) {
-		ret = cmpxchg(&zero_stats, old, 0);
-		/* This ensures only one fellow resets the stat */
-		if (ret == old)
-			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
-	}
-}
-
-static inline void add_stats(enum kvm_contention_stat var, u32 val)
-{
-	check_zero();
-	spinlock_stats.contention_stats[var] += val;
-}
-
-
-static inline u64 spin_time_start(void)
-{
-	return sched_clock();
-}
-
-static void __spin_time_accum(u64 delta, u32 *array)
-{
-	unsigned index;
-
-	index = ilog2(delta);
-	check_zero();
-
-	if (index < HISTO_BUCKETS)
-		array[index]++;
-	else
-		array[HISTO_BUCKETS]++;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
-	u32 delta;
-
-	delta = sched_clock() - start;
-	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
-	spinlock_stats.time_blocked += delta;
-}
-
-static struct dentry *d_spin_debug;
-static struct dentry *d_kvm_debug;
-
-static struct dentry *kvm_init_debugfs(void)
-{
-	d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
-	if (!d_kvm_debug)
-		printk(KERN_WARNING "Could not create 'kvm' debugfs directory\n");
-
-	return d_kvm_debug;
-}
-
-static int __init kvm_spinlock_debugfs(void)
-{
-	struct dentry *d_kvm;
-
-	d_kvm = kvm_init_debugfs();
-	if (d_kvm == NULL)
-		return -ENOMEM;
-
-	d_spin_debug = debugfs_create_dir("spinlocks", d_kvm);
-
-	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
-
-	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
-		   &spinlock_stats.contention_stats[TAKEN_SLOW]);
-	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
-		   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
-
-	debugfs_create_u32("released_slow", 0444, d_spin_debug,
-		   &spinlock_stats.contention_stats[RELEASED_SLOW]);
-	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
-		   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
-
-	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
-			   &spinlock_stats.time_blocked);
-
-	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
-		     spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
-
-	return 0;
-}
-fs_initcall(kvm_spinlock_debugfs);
-#else  /* !CONFIG_KVM_DEBUG_FS */
-static inline void add_stats(enum kvm_contention_stat var, u32 val)
-{
-}
-
-static inline u64 spin_time_start(void)
-{
-	return 0;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
-}
-#endif  /* CONFIG_KVM_DEBUG_FS */
-
-struct kvm_lock_waiting {
-	struct arch_spinlock *lock;
-	__ticket_t want;
-};
-
-/* cpus 'waiting' on a spinlock to become available */
-static cpumask_t waiting_cpus;
-
-/* Track spinlock on which a cpu is waiting */
-static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
-
-__visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
-{
-	struct kvm_lock_waiting *w;
-	int cpu;
-	u64 start;
-	unsigned long flags;
-	__ticket_t head;
-
-	if (in_nmi())
-		return;
-
-	w = this_cpu_ptr(&klock_waiting);
-	cpu = smp_processor_id();
-	start = spin_time_start();
-
-	/*
-	 * Make sure an interrupt handler can't upset things in a
-	 * partially setup state.
-	 */
-	local_irq_save(flags);
-
-	/*
-	 * The ordering protocol on this is that the "lock" pointer
-	 * may only be set non-NULL if the "want" ticket is correct.
-	 * If we're updating "want", we must first clear "lock".
-	 */
-	w->lock = NULL;
-	smp_wmb();
-	w->want = want;
-	smp_wmb();
-	w->lock = lock;
-
-	add_stats(TAKEN_SLOW, 1);
-
-	/*
-	 * This uses set_bit, which is atomic but we should not rely on its
-	 * reordering gurantees. So barrier is needed after this call.
-	 */
-	cpumask_set_cpu(cpu, &waiting_cpus);
-
-	barrier();
-
-	/*
-	 * Mark entry to slowpath before doing the pickup test to make
-	 * sure we don't deadlock with an unlocker.
-	 */
-	__ticket_enter_slowpath(lock);
-
-	/* make sure enter_slowpath, which is atomic does not cross the read */
-	smp_mb__after_atomic();
-
-	/*
-	 * check again make sure it didn't become free while
-	 * we weren't looking.
-	 */
-	head = READ_ONCE(lock->tickets.head);
-	if (__tickets_equal(head, want)) {
-		add_stats(TAKEN_SLOW_PICKUP, 1);
-		goto out;
-	}
-
-	/*
-	 * halt until it's our turn and kicked. Note that we do safe halt
-	 * for irq enabled case to avoid hang when lock info is overwritten
-	 * in irq spinlock slowpath and no spurious interrupt occur to save us.
-	 */
-	if (arch_irqs_disabled_flags(flags))
-		halt();
-	else
-		safe_halt();
-
-out:
-	cpumask_clear_cpu(cpu, &waiting_cpus);
-	w->lock = NULL;
-	local_irq_restore(flags);
-	spin_time_accum_blocked(start);
-}
-PV_CALLEE_SAVE_REGS_THUNK(kvm_lock_spinning);
-
-/* Kick vcpu waiting on @lock->head to reach value @ticket */
-static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
-{
-	int cpu;
-
-	add_stats(RELEASED_SLOW, 1);
-	for_each_cpu(cpu, &waiting_cpus) {
-		const struct kvm_lock_waiting *w = &per_cpu(klock_waiting, cpu);
-		if (READ_ONCE(w->lock) == lock &&
-		    READ_ONCE(w->want) == ticket) {
-			add_stats(RELEASED_SLOW_KICKED, 1);
-			kvm_kick_cpu(cpu);
-			break;
-		}
-	}
-}
-
-#endif /* !CONFIG_QUEUED_SPINLOCKS */
-
 /*
  * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
  */
@@ -854,16 +603,11 @@
 	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
 		return;
 
-#ifdef CONFIG_QUEUED_SPINLOCKS
 	__pv_init_lock_hash();
 	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
 	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
 	pv_lock_ops.wait = kvm_wait;
 	pv_lock_ops.kick = kvm_kick_cpu;
-#else /* !CONFIG_QUEUED_SPINLOCKS */
-	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
-	pv_lock_ops.unlock_kick = kvm_unlock_kick;
-#endif
 }
 
 static __init int kvm_spinlock_init_jump(void)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1d39bfb..60b9949 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -29,7 +29,7 @@
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
 
-static int kvmclock = 1;
+static int kvmclock __ro_after_init = 1;
 static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
 static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
 static cycle_t kvm_sched_clock_offset;
@@ -289,6 +289,7 @@
 	put_cpu();
 
 	x86_platform.calibrate_tsc = kvm_get_tsc_khz;
+	x86_platform.calibrate_cpu = kvm_get_tsc_khz;
 	x86_platform.get_wallclock = kvm_get_wallclock;
 	x86_platform.set_wallclock = kvm_set_wallclock;
 #ifdef CONFIG_X86_LOCAL_APIC
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 068c4a9..0f8d204 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -499,6 +499,9 @@
 {
 	struct mpf_intel *mpf = mpf_found;
 
+	if (!smp_found_config)
+		return;
+
 	if (!mpf)
 		return;
 
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 1939a02..2c55a00 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,7 +8,6 @@
 
 #include <asm/paravirt.h>
 
-#ifdef CONFIG_QUEUED_SPINLOCKS
 __visible void __native_queued_spin_unlock(struct qspinlock *lock)
 {
 	native_queued_spin_unlock(lock);
@@ -21,19 +20,13 @@
 	return pv_lock_ops.queued_spin_unlock.func ==
 		__raw_callee_save___native_queued_spin_unlock;
 }
-#endif
 
 struct pv_lock_ops pv_lock_ops = {
 #ifdef CONFIG_SMP
-#ifdef CONFIG_QUEUED_SPINLOCKS
 	.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
 	.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
 	.wait = paravirt_nop,
 	.kick = paravirt_nop,
-#else /* !CONFIG_QUEUED_SPINLOCKS */
-	.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
-	.unlock_kick = paravirt_nop,
-#endif /* !CONFIG_QUEUED_SPINLOCKS */
 #endif /* SMP */
 };
 EXPORT_SYMBOL(pv_lock_ops);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index ad5bc95..bbf3d59 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -56,12 +56,12 @@
      ".popsection");
 
 /* identity function, which can be inlined */
-u32 _paravirt_ident_32(u32 x)
+u32 notrace _paravirt_ident_32(u32 x)
 {
 	return x;
 }
 
-u64 _paravirt_ident_64(u64 x)
+u64 notrace _paravirt_ident_64(u64 x)
 {
 	return x;
 }
@@ -332,7 +332,6 @@
 	.read_cr0 = native_read_cr0,
 	.write_cr0 = native_write_cr0,
 	.read_cr4 = native_read_cr4,
-	.read_cr4_safe = native_read_cr4_safe,
 	.write_cr4 = native_write_cr4,
 #ifdef CONFIG_X86_64
 	.read_cr8 = native_read_cr8,
@@ -389,7 +388,7 @@
 #define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_64)
 #endif
 
-struct pv_mmu_ops pv_mmu_ops = {
+struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
 
 	.read_cr2 = native_read_cr2,
 	.write_cr2 = native_write_cr2,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 158dc06..920c6ae 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -10,7 +10,7 @@
 DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
 DEF_NATIVE(pv_cpu_ops, clts, "clts");
 
-#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
 DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
 #endif
 
@@ -49,7 +49,7 @@
 		PATCH_SITE(pv_mmu_ops, read_cr3);
 		PATCH_SITE(pv_mmu_ops, write_cr3);
 		PATCH_SITE(pv_cpu_ops, clts);
-#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
 		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
 			if (pv_is_native_spin_unlock()) {
 				start = start_pv_lock_ops_queued_spin_unlock;
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index e70087a..bb3840c 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -19,7 +19,7 @@
 DEF_NATIVE(, mov32, "mov %edi, %eax");
 DEF_NATIVE(, mov64, "mov %rdi, %rax");
 
-#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
 DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
 #endif
 
@@ -61,7 +61,7 @@
 		PATCH_SITE(pv_cpu_ops, clts);
 		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
 		PATCH_SITE(pv_cpu_ops, wbinvd);
-#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
+#if defined(CONFIG_PARAVIRT_SPINLOCKS)
 		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
 			if (pv_is_native_spin_unlock()) {
 				start = start_pv_lock_ops_queued_spin_unlock;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 62c0b0e..4002b47 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -32,6 +32,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mce.h>
 #include <asm/vm86.h>
+#include <asm/switch_to.h>
 
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -513,6 +514,17 @@
 }
 
 /*
+ * Return saved PC of a blocked thread.
+ * What is this good for? it will be always the scheduler or ret_from_fork.
+ */
+unsigned long thread_saved_pc(struct task_struct *tsk)
+{
+	struct inactive_task_frame *frame =
+		(struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
+	return READ_ONCE_NOCHECK(frame->ret_addr);
+}
+
+/*
  * Called from fs/proc with a reference on @p to find the function
  * which called into schedule(). This needs to be done carefully
  * because the task might wake up and we might look at a stack
@@ -520,15 +532,18 @@
  */
 unsigned long get_wchan(struct task_struct *p)
 {
-	unsigned long start, bottom, top, sp, fp, ip;
+	unsigned long start, bottom, top, sp, fp, ip, ret = 0;
 	int count = 0;
 
 	if (!p || p == current || p->state == TASK_RUNNING)
 		return 0;
 
+	if (!try_get_task_stack(p))
+		return 0;
+
 	start = (unsigned long)task_stack_page(p);
 	if (!start)
-		return 0;
+		goto out;
 
 	/*
 	 * Layout of the stack page:
@@ -537,9 +552,7 @@
 	 * PADDING
 	 * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
 	 * stack
-	 * ----------- bottom = start + sizeof(thread_info)
-	 * thread_info
-	 * ----------- start
+	 * ----------- bottom = start
 	 *
 	 * The tasks stack pointer points at the location where the
 	 * framepointer is stored. The data on the stack is:
@@ -550,20 +563,25 @@
 	 */
 	top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
 	top -= 2 * sizeof(unsigned long);
-	bottom = start + sizeof(struct thread_info);
+	bottom = start;
 
 	sp = READ_ONCE(p->thread.sp);
 	if (sp < bottom || sp > top)
-		return 0;
+		goto out;
 
-	fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+	fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
 	do {
 		if (fp < bottom || fp > top)
-			return 0;
+			goto out;
 		ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
-		if (!in_sched_functions(ip))
-			return ip;
+		if (!in_sched_functions(ip)) {
+			ret = ip;
+			goto out;
+		}
 		fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
 	} while (count++ < 16 && p->state != TASK_RUNNING);
-	return 0;
+
+out:
+	put_task_stack(p);
+	return ret;
 }
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index d86be29..bd7be8e 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,17 +55,6 @@
 #include <asm/switch_to.h>
 #include <asm/vm86.h>
 
-asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
-asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
-
-/*
- * Return saved PC of a blocked thread.
- */
-unsigned long thread_saved_pc(struct task_struct *tsk)
-{
-	return ((unsigned long *)tsk->thread.sp)[3];
-}
-
 void __show_regs(struct pt_regs *regs, int all)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
@@ -101,7 +90,7 @@
 	cr0 = read_cr0();
 	cr2 = read_cr2();
 	cr3 = read_cr3();
-	cr4 = __read_cr4_safe();
+	cr4 = __read_cr4();
 	printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
 			cr0, cr2, cr3, cr4);
 
@@ -133,35 +122,31 @@
 	unsigned long arg, struct task_struct *p, unsigned long tls)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
+	struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
+	struct inactive_task_frame *frame = &fork_frame->frame;
 	struct task_struct *tsk;
 	int err;
 
-	p->thread.sp = (unsigned long) childregs;
+	frame->bp = 0;
+	frame->ret_addr = (unsigned long) ret_from_fork;
+	p->thread.sp = (unsigned long) fork_frame;
 	p->thread.sp0 = (unsigned long) (childregs+1);
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
-		p->thread.ip = (unsigned long) ret_from_kernel_thread;
-		task_user_gs(p) = __KERNEL_STACK_CANARY;
-		childregs->ds = __USER_DS;
-		childregs->es = __USER_DS;
-		childregs->fs = __KERNEL_PERCPU;
-		childregs->bx = sp;	/* function */
-		childregs->bp = arg;
-		childregs->orig_ax = -1;
-		childregs->cs = __KERNEL_CS | get_kernel_rpl();
-		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+		frame->bx = sp;		/* function */
+		frame->di = arg;
 		p->thread.io_bitmap_ptr = NULL;
 		return 0;
 	}
+	frame->bx = 0;
 	*childregs = *current_pt_regs();
 	childregs->ax = 0;
 	if (sp)
 		childregs->sp = sp;
 
-	p->thread.ip = (unsigned long) ret_from_fork;
 	task_user_gs(p) = get_user_gs(current_pt_regs());
 
 	p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 63236d8..ee944bd 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -49,8 +49,7 @@
 #include <asm/debugreg.h>
 #include <asm/switch_to.h>
 #include <asm/xen/hypervisor.h>
-
-asmlinkage extern void ret_from_fork(void);
+#include <asm/vdso.h>
 
 __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
 
@@ -141,12 +140,17 @@
 {
 	int err;
 	struct pt_regs *childregs;
+	struct fork_frame *fork_frame;
+	struct inactive_task_frame *frame;
 	struct task_struct *me = current;
 
 	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
 	childregs = task_pt_regs(p);
-	p->thread.sp = (unsigned long) childregs;
-	set_tsk_thread_flag(p, TIF_FORK);
+	fork_frame = container_of(childregs, struct fork_frame, regs);
+	frame = &fork_frame->frame;
+	frame->bp = 0;
+	frame->ret_addr = (unsigned long) ret_from_fork;
+	p->thread.sp = (unsigned long) fork_frame;
 	p->thread.io_bitmap_ptr = NULL;
 
 	savesegment(gs, p->thread.gsindex);
@@ -160,15 +164,11 @@
 	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
-		childregs->sp = (unsigned long)childregs;
-		childregs->ss = __KERNEL_DS;
-		childregs->bx = sp; /* function */
-		childregs->bp = arg;
-		childregs->orig_ax = -1;
-		childregs->cs = __KERNEL_CS | get_kernel_rpl();
-		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
+		frame->bx = sp;		/* function */
+		frame->r12 = arg;
 		return 0;
 	}
+	frame->bx = 0;
 	*childregs = *current_pt_regs();
 
 	childregs->ax = 0;
@@ -511,7 +511,7 @@
 		current->personality &= ~READ_IMPLIES_EXEC;
 		/* in_compat_syscall() uses the presence of the x32
 		   syscall bit flag to determine compat status */
-		current_thread_info()->status &= ~TS_COMPAT;
+		current->thread.status &= ~TS_COMPAT;
 	} else {
 		set_thread_flag(TIF_IA32);
 		clear_thread_flag(TIF_X32);
@@ -519,11 +519,24 @@
 			current->mm->context.ia32_compat = TIF_IA32;
 		current->personality |= force_personality32;
 		/* Prepare the first "return" to user space */
-		current_thread_info()->status |= TS_COMPAT;
+		current->thread.status |= TS_COMPAT;
 	}
 }
 EXPORT_SYMBOL_GPL(set_personality_ia32);
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
+{
+	int ret;
+
+	ret = map_vdso_once(image, addr);
+	if (ret)
+		return ret;
+
+	return (long)image->size;
+}
+#endif
+
 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
 {
 	int ret = 0;
@@ -577,6 +590,19 @@
 		break;
 	}
 
+#ifdef CONFIG_CHECKPOINT_RESTORE
+# ifdef CONFIG_X86_X32_ABI
+	case ARCH_MAP_VDSO_X32:
+		return prctl_map_vdso(&vdso_image_x32, addr);
+# endif
+# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
+	case ARCH_MAP_VDSO_32:
+		return prctl_map_vdso(&vdso_image_32, addr);
+# endif
+	case ARCH_MAP_VDSO_64:
+		return prctl_map_vdso(&vdso_image_64, addr);
+#endif
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f79576a..0e63c02 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -173,8 +173,8 @@
 		return sp;
 
 	prev_esp = (u32 *)(context);
-	if (prev_esp)
-		return (unsigned long)prev_esp;
+	if (*prev_esp)
+		return (unsigned long)*prev_esp;
 
 	return (unsigned long)regs;
 }
@@ -934,7 +934,7 @@
 		 */
 		regs->orig_ax = value;
 		if (syscall_get_nr(child, regs) >= 0)
-			task_thread_info(child)->status |= TS_I386_REGS_POKED;
+			child->thread.status |= TS_I386_REGS_POKED;
 		break;
 
 	case offsetof(struct user32, regs.eflags):
@@ -1250,7 +1250,7 @@
 
 #ifdef CONFIG_X86_64
 
-static struct user_regset x86_64_regsets[] __read_mostly = {
+static struct user_regset x86_64_regsets[] __ro_after_init = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
 		.n = sizeof(struct user_regs_struct) / sizeof(long),
@@ -1291,7 +1291,7 @@
 #endif	/* CONFIG_X86_64 */
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
-static struct user_regset x86_32_regsets[] __read_mostly = {
+static struct user_regset x86_32_regsets[] __ro_after_init = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
 		.n = sizeof(struct user_regs_struct32) / sizeof(u32),
@@ -1344,7 +1344,7 @@
  */
 u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
-void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
+void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
 {
 #ifdef CONFIG_X86_64
 	x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
@@ -1358,7 +1358,7 @@
 const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 {
 #ifdef CONFIG_IA32_EMULATION
-	if (test_tsk_thread_flag(task, TIF_IA32))
+	if (!user_64bit_mode(task_pt_regs(task)))
 #endif
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
 		return &user_x86_32_view;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index cc457ff..51402a7 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -626,3 +626,34 @@
 			amd_disable_seq_and_redirect_scrub);
 
 #endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+#include <linux/jump_label.h>
+#include <asm/string_64.h>
+
+/* Ivy Bridge, Haswell, Broadwell */
+static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
+{
+	u32 capid0;
+
+	pci_read_config_dword(pdev, 0x84, &capid0);
+
+	if (capid0 & 0x10)
+		static_branch_inc(&mcsafe_key);
+}
+
+/* Skylake */
+static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
+{
+	u32 capid0;
+
+	pci_read_config_dword(pdev, 0x84, &capid0);
+
+	if ((capid0 & 0xc0) == 0xc0)
+		static_branch_inc(&mcsafe_key);
+}
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
+#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 63bf27d..e244c19 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -705,7 +705,7 @@
 	tboot_shutdown(TB_SHUTDOWN_HALT);
 }
 
-struct machine_ops machine_ops = {
+struct machine_ops machine_ops __ro_after_init = {
 	.power_off = native_machine_power_off,
 	.shutdown = native_machine_shutdown,
 	.emergency_restart = native_machine_emergency_restart,
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 80eab01..2408c16 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -27,8 +27,8 @@
 	int i;
 	struct e820entry *entry;
 
-	for (i = 0; i < e820.nr_map; i++) {
-		entry = &e820.map[i];
+	for (i = 0; i < e820->nr_map; i++) {
+		entry = &e820->map[i];
 
 		resource_clip(avail, entry->addr,
 			      entry->addr + entry->size - 1);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 991b779..bbfbca5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -210,9 +210,9 @@
 
 
 #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-__visible unsigned long mmu_cr4_features;
+__visible unsigned long mmu_cr4_features __ro_after_init;
 #else
-__visible unsigned long mmu_cr4_features = X86_CR4_PAE;
+__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
 #endif
 
 /* Boot loader ID and version as integers, for the benefit of proc_dointvec */
@@ -458,8 +458,8 @@
 		early_memunmap(data, sizeof(*data));
 	}
 
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-	memcpy(&e820_saved, &e820, sizeof(struct e820map));
+	sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
+	memcpy(e820_saved, e820, sizeof(struct e820map));
 	printk(KERN_INFO "extended physical RAM map:\n");
 	e820_print_map("reserve setup_data");
 }
@@ -763,7 +763,7 @@
 	 */
 	e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
 
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
 }
 
 /* called before trim_bios_range() to spare extra sanitize */
@@ -936,8 +936,6 @@
 
 	x86_init.oem.arch_setup();
 
-	kernel_randomize_memory();
-
 	iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
 	setup_memory_map();
 	parse_setup_data();
@@ -1034,7 +1032,7 @@
 	if (ppro_with_ram_bug()) {
 		e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
 				  E820_RESERVED);
-		sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+		sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
 		printk(KERN_INFO "fixed physical RAM map:\n");
 		e820_print_map("bad_ppro");
 	}
@@ -1055,6 +1053,12 @@
 
 	max_possible_pfn = max_pfn;
 
+	/*
+	 * Define random base addresses for memory sections after max_pfn is
+	 * defined and before each memory section base is used.
+	 */
+	kernel_randomize_memory();
+
 #ifdef CONFIG_X86_32
 	/* max_low_pfn get updated here */
 	find_low_pfn_range();
@@ -1092,17 +1096,19 @@
 	memblock_set_current_limit(ISA_END_ADDRESS);
 	memblock_x86_fill();
 
-	if (efi_enabled(EFI_BOOT)) {
+	reserve_bios_regions();
+
+	if (efi_enabled(EFI_MEMMAP)) {
 		efi_fake_memmap();
 		efi_find_mirror();
-	}
+		efi_esrt_init();
 
-	/*
-	 * The EFI specification says that boot service code won't be called
-	 * after ExitBootServices(). This is, in fact, a lie.
-	 */
-	if (efi_enabled(EFI_MEMMAP))
+		/*
+		 * The EFI specification says that boot service code won't be
+		 * called after ExitBootServices(). This is, in fact, a lie.
+		 */
 		efi_reserve_boot_services();
+	}
 
 	/* preallocate 4k for mptable mpc */
 	early_reserve_e820_mpc_new();
@@ -1125,7 +1131,13 @@
 
 	early_trap_pf_init();
 
-	setup_real_mode();
+	/*
+	 * Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)
+	 * with the current CR4 value.  This may not be necessary, but
+	 * auditing all the early-boot CR4 manipulation would be needed to
+	 * rule it out.
+	 */
+	mmu_cr4_features = __read_cr4();
 
 	memblock_set_current_limit(get_max_mapped());
 
@@ -1174,13 +1186,6 @@
 
 	kasan_init();
 
-	if (boot_cpu_data.cpuid_level >= 0) {
-		/* A CPU has %cr4 if and only if it has CPUID */
-		mmu_cr4_features = __read_cr4();
-		if (trampoline_cr4_features)
-			*trampoline_cr4_features = mmu_cr4_features;
-	}
-
 #ifdef CONFIG_X86_32
 	/* sync back kernel address range */
 	clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
@@ -1214,8 +1219,7 @@
 	/*
 	 * get boot-time SMP configuration:
 	 */
-	if (smp_found_config)
-		get_smp_config();
+	get_smp_config();
 
 	prefill_possible_map();
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 7a40e06..2bbd27f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -33,7 +33,7 @@
 DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
 EXPORT_PER_CPU_SYMBOL(this_cpu_off);
 
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
+unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
 	[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
 };
 EXPORT_SYMBOL(__per_cpu_offset);
@@ -246,7 +246,7 @@
 #ifdef CONFIG_X86_64
 		per_cpu(irq_stack_ptr, cpu) =
 			per_cpu(irq_stack_union.irq_stack, cpu) +
-			IRQ_STACK_SIZE - 64;
+			IRQ_STACK_SIZE;
 #endif
 #ifdef CONFIG_NUMA
 		per_cpu(x86_cpu_to_node_map, cpu) =
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 04cb321..763af1d 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -42,6 +42,7 @@
 #include <asm/syscalls.h>
 
 #include <asm/sigframe.h>
+#include <asm/signal.h>
 
 #define COPY(x)			do {			\
 	get_user_ex(regs->x, &sc->x);			\
@@ -547,7 +548,7 @@
 		return -EFAULT;
 
 	if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
-		if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+		if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true))
 			return -EFAULT;
 	}
 
@@ -660,20 +661,21 @@
 	return 0;
 }
 
-static inline int is_ia32_compat_frame(void)
+static inline int is_ia32_compat_frame(struct ksignal *ksig)
 {
 	return IS_ENABLED(CONFIG_IA32_EMULATION) &&
-	       test_thread_flag(TIF_IA32);
+		ksig->ka.sa.sa_flags & SA_IA32_ABI;
 }
 
-static inline int is_ia32_frame(void)
+static inline int is_ia32_frame(struct ksignal *ksig)
 {
-	return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame();
+	return IS_ENABLED(CONFIG_X86_32) || is_ia32_compat_frame(ksig);
 }
 
-static inline int is_x32_frame(void)
+static inline int is_x32_frame(struct ksignal *ksig)
 {
-	return IS_ENABLED(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
+	return IS_ENABLED(CONFIG_X86_X32_ABI) &&
+		ksig->ka.sa.sa_flags & SA_X32_ABI;
 }
 
 static int
@@ -684,12 +686,12 @@
 	compat_sigset_t *cset = (compat_sigset_t *) set;
 
 	/* Set up the stack frame */
-	if (is_ia32_frame()) {
+	if (is_ia32_frame(ksig)) {
 		if (ksig->ka.sa.sa_flags & SA_SIGINFO)
 			return ia32_setup_rt_frame(usig, ksig, cset, regs);
 		else
 			return ia32_setup_frame(usig, ksig, cset, regs);
-	} else if (is_x32_frame()) {
+	} else if (is_x32_frame(ksig)) {
 		return x32_setup_rt_frame(ksig, cset, regs);
 	} else {
 		return __setup_rt_frame(ksig->sig, ksig, set, regs);
@@ -783,7 +785,7 @@
 	 * than the tracee.
 	 */
 #ifdef CONFIG_IA32_EMULATION
-	if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+	if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
 		return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index b44564b..40df337 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -1,5 +1,6 @@
 #include <linux/compat.h>
 #include <linux/uaccess.h>
+#include <linux/ptrace.h>
 
 /*
  * The compat_siginfo_t structure and handing code is very easy
@@ -92,10 +93,31 @@
 	/* any new si_fields should be added here */
 }
 
-int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
+void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact)
+{
+	/* Don't leak in-kernel non-uapi flags to user-space */
+	if (oact)
+		oact->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
+
+	if (!act)
+		return;
+
+	/* Don't let flags to be set from userspace */
+	act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI);
+
+	if (user_64bit_mode(current_pt_regs()))
+		return;
+
+	if (in_ia32_syscall())
+		act->sa.sa_flags |= SA_IA32_ABI;
+	if (in_x32_syscall())
+		act->sa.sa_flags |= SA_X32_ABI;
+}
+
+int __copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from,
+		bool x32_ABI)
 {
 	int err = 0;
-	bool ia32 = test_thread_flag(TIF_IA32);
 
 	signal_compat_build_tests();
 
@@ -146,7 +168,7 @@
 				put_user_ex(from->si_arch, &to->si_arch);
 				break;
 			case __SI_CHLD >> 16:
-				if (ia32) {
+				if (!x32_ABI) {
 					put_user_ex(from->si_utime, &to->si_utime);
 					put_user_ex(from->si_stime, &to->si_stime);
 				} else {
@@ -180,6 +202,12 @@
 	return err;
 }
 
+/* from syscall's path, where we know the ABI */
+int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
+{
+	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
+}
+
 int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 {
 	int err = 0;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2a6e84a..42a9362 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -100,10 +100,11 @@
 /* Logical package management. We might want to allocate that dynamically */
 static int *physical_to_logical_pkg __read_mostly;
 static unsigned long *physical_package_map __read_mostly;;
-static unsigned long *logical_package_map  __read_mostly;
 static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
+static unsigned int logical_packages __read_mostly;
+static bool logical_packages_frozen __read_mostly;
 
 /* Maximum number of SMT threads on any online core */
 int __max_smt_threads __read_mostly;
@@ -277,14 +278,14 @@
 	if (test_and_set_bit(pkg, physical_package_map))
 		goto found;
 
-	new = find_first_zero_bit(logical_package_map, __max_logical_packages);
-	if (new >= __max_logical_packages) {
+	if (logical_packages_frozen) {
 		physical_to_logical_pkg[pkg] = -1;
-		pr_warn("APIC(%x) Package %u exceeds logical package map\n",
+		pr_warn("APIC(%x) Package %u exceeds logical package max\n",
 			apicid, pkg);
 		return -ENOSPC;
 	}
-	set_bit(new, logical_package_map);
+
+	new = logical_packages++;
 	pr_info("APIC(%x) Converting physical %u to logical package %u\n",
 		apicid, pkg, new);
 	physical_to_logical_pkg[pkg] = new;
@@ -341,6 +342,7 @@
 	}
 
 	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
+	logical_packages = 0;
 
 	/*
 	 * Possibly larger than what we need as the number of apic ids per
@@ -352,10 +354,6 @@
 	memset(physical_to_logical_pkg, 0xff, size);
 	size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
 	physical_package_map = kzalloc(size, GFP_KERNEL);
-	size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long);
-	logical_package_map = kzalloc(size, GFP_KERNEL);
-
-	pr_info("Max logical packages: %u\n", __max_logical_packages);
 
 	for_each_present_cpu(cpu) {
 		unsigned int apicid = apic->cpu_present_to_apicid(cpu);
@@ -369,6 +367,15 @@
 		set_cpu_possible(cpu, false);
 		set_cpu_present(cpu, false);
 	}
+
+	if (logical_packages > __max_logical_packages) {
+		pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n",
+			logical_packages, __max_logical_packages);
+		logical_packages_frozen = true;
+		__max_logical_packages  = logical_packages;
+	}
+
+	pr_info("Max logical packages: %u\n", __max_logical_packages);
 }
 
 void __init smp_store_boot_cpu_info(void)
@@ -464,7 +471,7 @@
 	return false;
 }
 
-static struct sched_domain_topology_level numa_inside_package_topology[] = {
+static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
 #ifdef CONFIG_SCHED_SMT
 	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 #endif
@@ -473,22 +480,23 @@
 #endif
 	{ NULL, },
 };
+
+static struct sched_domain_topology_level x86_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
 /*
- * set_sched_topology() sets the topology internal to a CPU.  The
- * NUMA topologies are layered on top of it to build the full
- * system topology.
- *
- * If NUMA nodes are observed to occur within a CPU package, this
- * function should be called.  It forces the sched domain code to
- * only use the SMT level for the CPU portion of the topology.
- * This essentially falls back to relying on NUMA information
- * from the SRAT table to describe the entire system topology
- * (except for hyperthreads).
+ * Set if a package/die has multiple NUMA nodes inside.
+ * AMD Magny-Cours and Intel Cluster-on-Die have this.
  */
-static void primarily_use_numa_for_topology(void)
-{
-	set_sched_topology(numa_inside_package_topology);
-}
+static bool x86_has_numa_in_package;
 
 void set_cpu_sibling_map(int cpu)
 {
@@ -551,7 +559,7 @@
 				c->booted_cores = cpu_data(i).booted_cores;
 		}
 		if (match_die(c, o) && !topology_same_node(c, o))
-			primarily_use_numa_for_topology();
+			x86_has_numa_in_package = true;
 	}
 
 	threads = cpumask_weight(topology_sibling_cpumask(cpu));
@@ -683,7 +691,7 @@
 	 * Give the other CPU some time to accept the IPI.
 	 */
 	udelay(200);
-	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+	if (APIC_INTEGRATED(boot_cpu_apic_version)) {
 		maxlvt = lapic_get_maxlvt();
 		if (maxlvt > 3)			/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
@@ -710,7 +718,7 @@
 	/*
 	 * Be paranoid about clearing APIC errors.
 	 */
-	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
+	if (APIC_INTEGRATED(boot_cpu_apic_version)) {
 		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
 			apic_write(APIC_ESR, 0);
 		apic_read(APIC_ESR);
@@ -749,7 +757,7 @@
 	 * Determine this based on the APIC version.
 	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
 	 */
-	if (APIC_INTEGRATED(apic_version[phys_apicid]))
+	if (APIC_INTEGRATED(boot_cpu_apic_version))
 		num_starts = 2;
 	else
 		num_starts = 0;
@@ -935,7 +943,6 @@
 	per_cpu(cpu_current_top_of_stack, cpu) =
 		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
 #else
-	clear_tsk_thread_flag(idle, TIF_FORK);
 	initial_gs = per_cpu_offset(cpu);
 #endif
 }
@@ -962,7 +969,7 @@
 
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
 	initial_code = (unsigned long)start_secondary;
-	stack_start  = idle->thread.sp;
+	initial_stack  = idle->thread.sp;
 
 	/*
 	 * Enable the espfix hack for this CPU
@@ -987,7 +994,7 @@
 		/*
 		 * Be paranoid about clearing APIC errors.
 		*/
-		if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+		if (APIC_INTEGRATED(boot_cpu_apic_version)) {
 			apic_write(APIC_ESR, 0);
 			apic_read(APIC_ESR);
 		}
@@ -1108,17 +1115,8 @@
 
 	common_cpu_up(cpu, tidle);
 
-	/*
-	 * We have to walk the irq descriptors to setup the vector
-	 * space for the cpu which comes online.  Prevent irq
-	 * alloc/free across the bringup.
-	 */
-	irq_lock_sparse();
-
 	err = do_boot_cpu(apicid, cpu, tidle);
-
 	if (err) {
-		irq_unlock_sparse();
 		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
 		return -EIO;
 	}
@@ -1136,8 +1134,6 @@
 		touch_nmi_watchdog();
 	}
 
-	irq_unlock_sparse();
-
 	return 0;
 }
 
@@ -1242,7 +1238,7 @@
 	/*
 	 * If we couldn't find a local APIC, then get out of here now!
 	 */
-	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
+	if (APIC_INTEGRATED(boot_cpu_apic_version) &&
 	    !boot_cpu_has(X86_FEATURE_APIC)) {
 		if (!disable_apic) {
 			pr_err("BIOS bug, local APIC #%d not detected!...\n",
@@ -1297,6 +1293,16 @@
 		zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
 		zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
 	}
+
+	/*
+	 * Set 'default' x86 topology, this matches default_topology() in that
+	 * it has NUMA nodes as a topology level. See also
+	 * native_smp_cpus_done().
+	 *
+	 * Must be done before set_cpus_sibling_map() is ran.
+	 */
+	set_sched_topology(x86_topology);
+
 	set_cpu_sibling_map(0);
 
 	switch (smp_sanity_check(max_cpus)) {
@@ -1316,14 +1322,13 @@
 		break;
 	}
 
-	default_setup_apic_routing();
-
 	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 		     read_apic_id(), boot_cpu_physical_apicid);
 		/* Or can we switch back to PIC here? */
 	}
 
+	default_setup_apic_routing();
 	cpu0_logical_apicid = apic_bsp_setup(false);
 
 	pr_info("CPU%d: ", 0);
@@ -1363,6 +1368,9 @@
 {
 	pr_debug("Boot done\n");
 
+	if (x86_has_numa_in_package)
+		set_sched_topology(x86_numa_in_package_topology);
+
 	nmi_selftest();
 	impress_friends();
 	setup_ioapic_dest();
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 4738f5e..0653788 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -8,80 +8,69 @@
 #include <linux/export.h>
 #include <linux/uaccess.h>
 #include <asm/stacktrace.h>
+#include <asm/unwind.h>
 
-static int save_stack_stack(void *data, char *name)
+static int save_stack_address(struct stack_trace *trace, unsigned long addr,
+			      bool nosched)
 {
-	return 0;
-}
-
-static int
-__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
-{
-	struct stack_trace *trace = data;
-#ifdef CONFIG_FRAME_POINTER
-	if (!reliable)
-		return 0;
-#endif
 	if (nosched && in_sched_functions(addr))
 		return 0;
+
 	if (trace->skip > 0) {
 		trace->skip--;
 		return 0;
 	}
-	if (trace->nr_entries < trace->max_entries) {
-		trace->entries[trace->nr_entries++] = addr;
-		return 0;
-	} else {
-		return -1; /* no more room, stop walking the stack */
+
+	if (trace->nr_entries >= trace->max_entries)
+		return -1;
+
+	trace->entries[trace->nr_entries++] = addr;
+	return 0;
+}
+
+static void __save_stack_trace(struct stack_trace *trace,
+			       struct task_struct *task, struct pt_regs *regs,
+			       bool nosched)
+{
+	struct unwind_state state;
+	unsigned long addr;
+
+	if (regs)
+		save_stack_address(trace, regs->ip, nosched);
+
+	for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
+	     unwind_next_frame(&state)) {
+		addr = unwind_get_return_address(&state);
+		if (!addr || save_stack_address(trace, addr, nosched))
+			break;
 	}
+
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
 
-static int save_stack_address(void *data, unsigned long addr, int reliable)
-{
-	return __save_stack_address(data, addr, reliable, false);
-}
-
-static int
-save_stack_address_nosched(void *data, unsigned long addr, int reliable)
-{
-	return __save_stack_address(data, addr, reliable, true);
-}
-
-static const struct stacktrace_ops save_stack_ops = {
-	.stack		= save_stack_stack,
-	.address	= save_stack_address,
-	.walk_stack	= print_context_stack,
-};
-
-static const struct stacktrace_ops save_stack_ops_nosched = {
-	.stack		= save_stack_stack,
-	.address	= save_stack_address_nosched,
-	.walk_stack	= print_context_stack,
-};
-
 /*
  * Save stack-backtrace addresses into a stack_trace buffer.
  */
 void save_stack_trace(struct stack_trace *trace)
 {
-	dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
+	__save_stack_trace(trace, current, NULL, false);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
 {
-	dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
+	__save_stack_trace(trace, current, regs, false);
 }
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-	dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
-	if (trace->nr_entries < trace->max_entries)
-		trace->entries[trace->nr_entries++] = ULONG_MAX;
+	if (!try_get_task_stack(tsk))
+		return;
+
+	__save_stack_trace(trace, tsk, NULL, true);
+
+	put_task_stack(tsk);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 654f6c6..8402907 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -188,12 +188,12 @@
 
 	tboot->num_mac_regions = 0;
 
-	for (i = 0; i < e820.nr_map; i++) {
-		if ((e820.map[i].type != E820_RAM)
-		 && (e820.map[i].type != E820_RESERVED_KERN))
+	for (i = 0; i < e820->nr_map; i++) {
+		if ((e820->map[i].type != E820_RAM)
+		 && (e820->map[i].type != E820_RESERVED_KERN))
 			continue;
 
-		add_mac_region(e820.map[i].addr, e820.map[i].size);
+		add_mac_region(e820->map[i].addr, e820->map[i].size);
 	}
 
 	tboot->acpi_sinfo.kernel_s3_resume_vector =
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b70ca12..bd4e3d4 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,12 +292,30 @@
 DO_ERROR(X86_TRAP_SS,     SIGBUS,  "stack segment",		stack_segment)
 DO_ERROR(X86_TRAP_AC,     SIGBUS,  "alignment check",		alignment_check)
 
+#ifdef CONFIG_VMAP_STACK
+__visible void __noreturn handle_stack_overflow(const char *message,
+						struct pt_regs *regs,
+						unsigned long fault_address)
+{
+	printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
+		 (void *)fault_address, current->stack,
+		 (char *)current->stack + THREAD_SIZE - 1);
+	die(message, regs, 0);
+
+	/* Be absolutely certain we don't return. */
+	panic(message);
+}
+#endif
+
 #ifdef CONFIG_X86_64
 /* Runs on IST stack */
 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 {
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
+#ifdef CONFIG_VMAP_STACK
+	unsigned long cr2;
+#endif
 
 #ifdef CONFIG_X86_ESPFIX64
 	extern unsigned char native_irq_return_iret[];
@@ -332,6 +350,49 @@
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_nr = X86_TRAP_DF;
 
+#ifdef CONFIG_VMAP_STACK
+	/*
+	 * If we overflow the stack into a guard page, the CPU will fail
+	 * to deliver #PF and will send #DF instead.  Similarly, if we
+	 * take any non-IST exception while too close to the bottom of
+	 * the stack, the processor will get a page fault while
+	 * delivering the exception and will generate a double fault.
+	 *
+	 * According to the SDM (footnote in 6.15 under "Interrupt 14 -
+	 * Page-Fault Exception (#PF):
+	 *
+	 *   Processors update CR2 whenever a page fault is detected. If a
+	 *   second page fault occurs while an earlier page fault is being
+	 *   deliv- ered, the faulting linear address of the second fault will
+	 *   overwrite the contents of CR2 (replacing the previous
+	 *   address). These updates to CR2 occur even if the page fault
+	 *   results in a double fault or occurs during the delivery of a
+	 *   double fault.
+	 *
+	 * The logic below has a small possibility of incorrectly diagnosing
+	 * some errors as stack overflows.  For example, if the IDT or GDT
+	 * gets corrupted such that #GP delivery fails due to a bad descriptor
+	 * causing #GP and we hit this condition while CR2 coincidentally
+	 * points to the stack guard page, we'll think we overflowed the
+	 * stack.  Given that we're going to panic one way or another
+	 * if this happens, this isn't necessarily worth fixing.
+	 *
+	 * If necessary, we could improve the test by only diagnosing
+	 * a stack overflow if the saved RSP points within 47 bytes of
+	 * the bottom of the stack: if RSP == tsk_stack + 48 and we
+	 * take an exception, the stack is already aligned and there
+	 * will be enough room SS, RSP, RFLAGS, CS, RIP, and a
+	 * possible error code, so a stack overflow would *not* double
+	 * fault.  With any less space left, exception delivery could
+	 * fail, and, as a practical matter, we've overflowed the
+	 * stack even if the actual trigger for the double fault was
+	 * something else.
+	 */
+	cr2 = read_cr2();
+	if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
+		handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
+#endif
+
 #ifdef CONFIG_DOUBLEFAULT
 	df_debug(regs, error_code);
 #endif
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1ef87e8..46b2f41 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -22,6 +22,8 @@
 #include <asm/nmi.h>
 #include <asm/x86_init.h>
 #include <asm/geode.h>
+#include <asm/apic.h>
+#include <asm/intel-family.h>
 
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -685,11 +687,16 @@
 
 	if (crystal_khz == 0) {
 		switch (boot_cpu_data.x86_model) {
-		case 0x4E:	/* SKL */
-		case 0x5E:	/* SKL */
+		case INTEL_FAM6_SKYLAKE_MOBILE:
+		case INTEL_FAM6_SKYLAKE_DESKTOP:
+		case INTEL_FAM6_KABYLAKE_MOBILE:
+		case INTEL_FAM6_KABYLAKE_DESKTOP:
 			crystal_khz = 24000;	/* 24.0 MHz */
 			break;
-		case 0x5C:	/* BXT */
+		case INTEL_FAM6_SKYLAKE_X:
+			crystal_khz = 25000;	/* 25.0 MHz */
+			break;
+		case INTEL_FAM6_ATOM_GOLDMONT:
 			crystal_khz = 19200;	/* 19.2 MHz */
 			break;
 		}
@@ -1249,6 +1256,9 @@
 		(unsigned long)tsc_khz / 1000,
 		(unsigned long)tsc_khz % 1000);
 
+	/* Inform the TSC deadline clockevent devices about the recalibration */
+	lapic_update_tsc_freq();
+
 out:
 	if (boot_cpu_has(X86_FEATURE_ART))
 		art_related_clocksource = &clocksource_tsc;
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
new file mode 100644
index 0000000..a2456d4
--- /dev/null
+++ b/arch/x86/kernel/unwind_frame.c
@@ -0,0 +1,93 @@
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+#define FRAME_HEADER_SIZE (sizeof(long) * 2)
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+	unsigned long addr;
+	unsigned long *addr_p = unwind_get_return_address_ptr(state);
+
+	if (unwind_done(state))
+		return 0;
+
+	addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
+				     addr_p);
+
+	return __kernel_text_address(addr) ? addr : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+static bool update_stack_state(struct unwind_state *state, void *addr,
+			       size_t len)
+{
+	struct stack_info *info = &state->stack_info;
+
+	/*
+	 * If addr isn't on the current stack, switch to the next one.
+	 *
+	 * We may have to traverse multiple stacks to deal with the possibility
+	 * that 'info->next_sp' could point to an empty stack and 'addr' could
+	 * be on a subsequent stack.
+	 */
+	while (!on_stack(info, addr, len))
+		if (get_stack_info(info->next_sp, state->task, info,
+				   &state->stack_mask))
+			return false;
+
+	return true;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+	unsigned long *next_bp;
+
+	if (unwind_done(state))
+		return false;
+
+	next_bp = (unsigned long *)*state->bp;
+
+	/* make sure the next frame's data is accessible */
+	if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
+		return false;
+
+	/* move to the next frame */
+	state->bp = next_bp;
+	return true;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long *first_frame)
+{
+	memset(state, 0, sizeof(*state));
+	state->task = task;
+
+	/* don't even attempt to start from user mode regs */
+	if (regs && user_mode(regs)) {
+		state->stack_info.type = STACK_TYPE_UNKNOWN;
+		return;
+	}
+
+	/* set up the starting stack frame */
+	state->bp = get_frame_pointer(task, regs);
+
+	/* initialize stack info and make sure the frame data is accessible */
+	get_stack_info(state->bp, state->task, &state->stack_info,
+		       &state->stack_mask);
+	update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
+
+	/*
+	 * The caller can provide the address of the first frame directly
+	 * (first_frame) or indirectly (regs->sp) to indicate which stack frame
+	 * to start unwinding at.  Skip ahead until we reach it.
+	 */
+	while (!unwind_done(state) &&
+	       (!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
+			state->bp < first_frame))
+		unwind_next_frame(state);
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c
new file mode 100644
index 0000000..b5a834c
--- /dev/null
+++ b/arch/x86/kernel/unwind_guess.c
@@ -0,0 +1,43 @@
+#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+	struct stack_info *info = &state->stack_info;
+
+	if (unwind_done(state))
+		return false;
+
+	do {
+		for (state->sp++; state->sp < info->end; state->sp++)
+			if (__kernel_text_address(*state->sp))
+				return true;
+
+		state->sp = info->next_sp;
+
+	} while (!get_stack_info(state->sp, state->task, info,
+				 &state->stack_mask));
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+		    struct pt_regs *regs, unsigned long *first_frame)
+{
+	memset(state, 0, sizeof(*state));
+
+	state->task = task;
+	state->sp   = first_frame;
+
+	get_stack_info(first_frame, state->task, &state->stack_info,
+		       &state->stack_mask);
+
+	if (!__kernel_text_address(*first_frame))
+		unwind_next_frame(state);
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 6c1ff31..495c776 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -357,20 +357,22 @@
 		*cursor &= 0xfe;
 	}
 	/*
-	 * Similar treatment for VEX3 prefix.
-	 * TODO: add XOP/EVEX treatment when insn decoder supports them
+	 * Similar treatment for VEX3/EVEX prefix.
+	 * TODO: add XOP treatment when insn decoder supports them
 	 */
-	if (insn->vex_prefix.nbytes == 3) {
+	if (insn->vex_prefix.nbytes >= 3) {
 		/*
 		 * vex2:     c5    rvvvvLpp   (has no b bit)
 		 * vex3/xop: c4/8f rxbmmmmm wvvvvLpp
 		 * evex:     62    rxbR00mm wvvvv1pp zllBVaaa
-		 *   (evex will need setting of both b and x since
-		 *   in non-sib encoding evex.x is 4th bit of MODRM.rm)
-		 * Setting VEX3.b (setting because it has inverted meaning):
+		 * Setting VEX3.b (setting because it has inverted meaning).
+		 * Setting EVEX.x since (in non-SIB encoding) EVEX.x
+		 * is the 4th bit of MODRM.rm, and needs the same treatment.
+		 * For VEX3-encoded insns, VEX3.x value has no effect in
+		 * non-SIB encoding, the change is superfluous but harmless.
 		 */
 		cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
-		*cursor |= 0x20;
+		*cursor |= 0x60;
 	}
 
 	/*
@@ -415,12 +417,10 @@
 
 	reg = MODRM_REG(insn);	/* Fetch modrm.reg */
 	reg2 = 0xff;		/* Fetch vex.vvvv */
-	if (insn->vex_prefix.nbytes == 2)
-		reg2 = insn->vex_prefix.bytes[1];
-	else if (insn->vex_prefix.nbytes == 3)
+	if (insn->vex_prefix.nbytes)
 		reg2 = insn->vex_prefix.bytes[2];
 	/*
-	 * TODO: add XOP, EXEV vvvv reading.
+	 * TODO: add XOP vvvv reading.
 	 *
 	 * vex.vvvv field is in bits 6-3, bits are inverted.
 	 * But in 32-bit mode, high-order bit may be ignored.
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 95e49f6..b2cee3d1 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -38,7 +38,7 @@
 EXPORT_SYMBOL(_copy_from_user);
 EXPORT_SYMBOL(_copy_to_user);
 
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
+EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled);
 
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 76c5e52..0bd9f12 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -91,7 +91,7 @@
 static void default_nmi_init(void) { };
 static int default_i8042_detect(void) { return 1; };
 
-struct x86_platform_ops x86_platform = {
+struct x86_platform_ops x86_platform __ro_after_init = {
 	.calibrate_cpu			= native_calibrate_cpu,
 	.calibrate_tsc			= native_calibrate_tsc,
 	.get_wallclock			= mach_get_cmos_time,
@@ -108,7 +108,7 @@
 EXPORT_SYMBOL_GPL(x86_platform);
 
 #if defined(CONFIG_PCI_MSI)
-struct x86_msi_ops x86_msi = {
+struct x86_msi_ops x86_msi __ro_after_init = {
 	.setup_msi_irqs		= native_setup_msi_irqs,
 	.teardown_msi_irq	= native_teardown_msi_irq,
 	.teardown_msi_irqs	= default_teardown_msi_irqs,
@@ -137,7 +137,7 @@
 }
 #endif
 
-struct x86_io_apic_ops x86_io_apic_ops = {
+struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = {
 	.read			= native_io_apic_read,
 	.disable		= native_disable_io_apic,
 };
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 5f42d03..c7220ba 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -109,6 +109,7 @@
 {
 	bool new_val, old_val;
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+	struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
 	union kvm_ioapic_redirect_entry *e;
 
 	e = &ioapic->redirtbl[RTC_GSI];
@@ -117,16 +118,17 @@
 		return;
 
 	new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
-	old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
+	old_val = test_bit(vcpu->vcpu_id, dest_map->map);
 
 	if (new_val == old_val)
 		return;
 
 	if (new_val) {
-		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
+		__set_bit(vcpu->vcpu_id, dest_map->map);
+		dest_map->vectors[vcpu->vcpu_id] = e->fields.vector;
 		ioapic->rtc_status.pending_eoi++;
 	} else {
-		__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map.map);
+		__clear_bit(vcpu->vcpu_id, dest_map->map);
 		ioapic->rtc_status.pending_eoi--;
 		rtc_status_pending_eoi_check_valid(ioapic);
 	}
diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
index 39b9112..cd94443 100644
--- a/arch/x86/kvm/pmu_amd.c
+++ b/arch/x86/kvm/pmu_amd.c
@@ -23,8 +23,8 @@
 static struct kvm_event_hw_type_mapping amd_event_mapping[] = {
 	[0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
 	[1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
-	[2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES },
-	[3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES },
+	[2] = { 0x7d, 0x07, PERF_COUNT_HW_CACHE_REFERENCES },
+	[3] = { 0x7e, 0x07, PERF_COUNT_HW_CACHE_MISSES },
 	[4] = { 0xc2, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
 	[5] = { 0xc3, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
 	[6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af523d8..1e6b84b 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -4961,7 +4961,7 @@
 	avic_handle_ldr_update(vcpu);
 }
 
-static struct kvm_x86_ops svm_x86_ops = {
+static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
 	.hardware_setup = svm_hardware_setup,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a45d858..121fdf6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -422,6 +422,7 @@
 	struct list_head vmcs02_pool;
 	int vmcs02_num;
 	u64 vmcs01_tsc_offset;
+	bool change_vmcs01_virtual_x2apic_mode;
 	/* L2 must run next, and mustn't decide to exit to L1. */
 	bool nested_run_pending;
 	/*
@@ -435,6 +436,8 @@
 	bool pi_pending;
 	u16 posted_intr_nv;
 
+	unsigned long *msr_bitmap;
+
 	struct hrtimer preemption_timer;
 	bool preemption_timer_expired;
 
@@ -924,7 +927,6 @@
 static unsigned long *vmx_msr_bitmap_longmode;
 static unsigned long *vmx_msr_bitmap_legacy_x2apic;
 static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_nested;
 static unsigned long *vmx_vmread_bitmap;
 static unsigned long *vmx_vmwrite_bitmap;
 
@@ -2198,6 +2200,12 @@
 			new.control) != old.control);
 }
 
+static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
+{
+	vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio;
+	vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -2256,10 +2264,8 @@
 
 	/* Setup TSC multiplier */
 	if (kvm_has_tsc_control &&
-	    vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) {
-		vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio;
-		vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio);
-	}
+	    vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
+		decache_tsc_multiplier(vmx);
 
 	vmx_vcpu_pi_load(vcpu, cpu);
 	vmx->host_pkru = read_pkru();
@@ -2508,7 +2514,7 @@
 	unsigned long *msr_bitmap;
 
 	if (is_guest_mode(vcpu))
-		msr_bitmap = vmx_msr_bitmap_nested;
+		msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
 	else if (cpu_has_secondary_exec_ctrls() &&
 		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
 		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
@@ -6363,13 +6369,6 @@
 	if (!vmx_msr_bitmap_longmode_x2apic)
 		goto out4;
 
-	if (nested) {
-		vmx_msr_bitmap_nested =
-			(unsigned long *)__get_free_page(GFP_KERNEL);
-		if (!vmx_msr_bitmap_nested)
-			goto out5;
-	}
-
 	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
 	if (!vmx_vmread_bitmap)
 		goto out6;
@@ -6392,8 +6391,6 @@
 
 	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
 	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-	if (nested)
-		memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
 
 	if (setup_vmcs_config(&vmcs_config) < 0) {
 		r = -EIO;
@@ -6529,9 +6526,6 @@
 out7:
 	free_page((unsigned long)vmx_vmread_bitmap);
 out6:
-	if (nested)
-		free_page((unsigned long)vmx_msr_bitmap_nested);
-out5:
 	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
 out4:
 	free_page((unsigned long)vmx_msr_bitmap_longmode);
@@ -6557,8 +6551,6 @@
 	free_page((unsigned long)vmx_io_bitmap_a);
 	free_page((unsigned long)vmx_vmwrite_bitmap);
 	free_page((unsigned long)vmx_vmread_bitmap);
-	if (nested)
-		free_page((unsigned long)vmx_msr_bitmap_nested);
 
 	free_kvm_area();
 }
@@ -6995,16 +6987,21 @@
 		return 1;
 	}
 
+	if (cpu_has_vmx_msr_bitmap()) {
+		vmx->nested.msr_bitmap =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+		if (!vmx->nested.msr_bitmap)
+			goto out_msr_bitmap;
+	}
+
 	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
 	if (!vmx->nested.cached_vmcs12)
-		return -ENOMEM;
+		goto out_cached_vmcs12;
 
 	if (enable_shadow_vmcs) {
 		shadow_vmcs = alloc_vmcs();
-		if (!shadow_vmcs) {
-			kfree(vmx->nested.cached_vmcs12);
-			return -ENOMEM;
-		}
+		if (!shadow_vmcs)
+			goto out_shadow_vmcs;
 		/* mark vmcs as shadow */
 		shadow_vmcs->revision_id |= (1u << 31);
 		/* init shadow vmcs */
@@ -7024,6 +7021,15 @@
 	skip_emulated_instruction(vcpu);
 	nested_vmx_succeed(vcpu);
 	return 1;
+
+out_shadow_vmcs:
+	kfree(vmx->nested.cached_vmcs12);
+
+out_cached_vmcs12:
+	free_page((unsigned long)vmx->nested.msr_bitmap);
+
+out_msr_bitmap:
+	return -ENOMEM;
 }
 
 /*
@@ -7098,6 +7104,10 @@
 	vmx->nested.vmxon = false;
 	free_vpid(vmx->nested.vpid02);
 	nested_release_vmcs12(vmx);
+	if (vmx->nested.msr_bitmap) {
+		free_page((unsigned long)vmx->nested.msr_bitmap);
+		vmx->nested.msr_bitmap = NULL;
+	}
 	if (enable_shadow_vmcs)
 		free_vmcs(vmx->nested.current_shadow_vmcs);
 	kfree(vmx->nested.cached_vmcs12);
@@ -8419,6 +8429,12 @@
 {
 	u32 sec_exec_control;
 
+	/* Postpone execution until vmcs01 is the current VMCS. */
+	if (is_guest_mode(vcpu)) {
+		to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true;
+		return;
+	}
+
 	/*
 	 * There is not point to enable virtualize x2apic without enable
 	 * apicv
@@ -9472,8 +9488,10 @@
 {
 	int msr;
 	struct page *page;
-	unsigned long *msr_bitmap;
+	unsigned long *msr_bitmap_l1;
+	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
 
+	/* This shortcut is ok because we support only x2APIC MSRs so far. */
 	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
 		return false;
 
@@ -9482,63 +9500,37 @@
 		WARN_ON(1);
 		return false;
 	}
-	msr_bitmap = (unsigned long *)kmap(page);
-	if (!msr_bitmap) {
+	msr_bitmap_l1 = (unsigned long *)kmap(page);
+	if (!msr_bitmap_l1) {
 		nested_release_page_clean(page);
 		WARN_ON(1);
 		return false;
 	}
 
+	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
+
 	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
 		if (nested_cpu_has_apic_reg_virt(vmcs12))
 			for (msr = 0x800; msr <= 0x8ff; msr++)
 				nested_vmx_disable_intercept_for_msr(
-					msr_bitmap,
-					vmx_msr_bitmap_nested,
+					msr_bitmap_l1, msr_bitmap_l0,
 					msr, MSR_TYPE_R);
-		/* TPR is allowed */
-		nested_vmx_disable_intercept_for_msr(msr_bitmap,
-				vmx_msr_bitmap_nested,
+
+		nested_vmx_disable_intercept_for_msr(
+				msr_bitmap_l1, msr_bitmap_l0,
 				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
 				MSR_TYPE_R | MSR_TYPE_W);
+
 		if (nested_cpu_has_vid(vmcs12)) {
-			/* EOI and self-IPI are allowed */
 			nested_vmx_disable_intercept_for_msr(
-				msr_bitmap,
-				vmx_msr_bitmap_nested,
+				msr_bitmap_l1, msr_bitmap_l0,
 				APIC_BASE_MSR + (APIC_EOI >> 4),
 				MSR_TYPE_W);
 			nested_vmx_disable_intercept_for_msr(
-				msr_bitmap,
-				vmx_msr_bitmap_nested,
+				msr_bitmap_l1, msr_bitmap_l0,
 				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
 				MSR_TYPE_W);
 		}
-	} else {
-		/*
-		 * Enable reading intercept of all the x2apic
-		 * MSRs. We should not rely on vmcs12 to do any
-		 * optimizations here, it may have been modified
-		 * by L1.
-		 */
-		for (msr = 0x800; msr <= 0x8ff; msr++)
-			__vmx_enable_intercept_for_msr(
-				vmx_msr_bitmap_nested,
-				msr,
-				MSR_TYPE_R);
-
-		__vmx_enable_intercept_for_msr(
-				vmx_msr_bitmap_nested,
-				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
-				MSR_TYPE_W);
-		__vmx_enable_intercept_for_msr(
-				vmx_msr_bitmap_nested,
-				APIC_BASE_MSR + (APIC_EOI >> 4),
-				MSR_TYPE_W);
-		__vmx_enable_intercept_for_msr(
-				vmx_msr_bitmap_nested,
-				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
-				MSR_TYPE_W);
 	}
 	kunmap(page);
 	nested_release_page_clean(page);
@@ -9957,10 +9949,10 @@
 	}
 
 	if (cpu_has_vmx_msr_bitmap() &&
-	    exec_control & CPU_BASED_USE_MSR_BITMAPS) {
-		nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
-		/* MSR_BITMAP will be set by following vmx_set_efer. */
-	} else
+	    exec_control & CPU_BASED_USE_MSR_BITMAPS &&
+	    nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
+		; /* MSR_BITMAP will be set by following vmx_set_efer. */
+	else
 		exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
 
 	/*
@@ -10011,6 +10003,8 @@
 			vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
 	else
 		vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset);
+	if (kvm_has_tsc_control)
+		decache_tsc_multiplier(vmx);
 
 	if (enable_vpid) {
 		/*
@@ -10767,6 +10761,14 @@
 	else
 		vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
 			      PIN_BASED_VMX_PREEMPTION_TIMER);
+	if (kvm_has_tsc_control)
+		decache_tsc_multiplier(vmx);
+
+	if (vmx->nested.change_vmcs01_virtual_x2apic_mode) {
+		vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
+		vmx_set_virtual_x2apic_mode(vcpu,
+				vcpu->arch.apic_base & X2APIC_ENABLE);
+	}
 
 	/* This is needed for same reason as it was needed in prepare_vmcs02 */
 	vmx->host_rsp = 0;
@@ -11175,7 +11177,7 @@
 			~FEATURE_CONTROL_LMCE;
 }
 
-static struct kvm_x86_ops vmx_x86_ops = {
+static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
 	.hardware_setup = hardware_setup,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19f9f9e..699f872 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2743,16 +2743,16 @@
 		if (tsc_delta < 0)
 			mark_tsc_unstable("KVM discovered backwards TSC");
 
-		if (kvm_lapic_hv_timer_in_use(vcpu) &&
-				kvm_x86_ops->set_hv_timer(vcpu,
-					kvm_get_lapic_tscdeadline_msr(vcpu)))
-			kvm_lapic_switch_to_sw_timer(vcpu);
 		if (check_tsc_unstable()) {
 			u64 offset = kvm_compute_tsc_offset(vcpu,
 						vcpu->arch.last_guest_tsc);
 			kvm_x86_ops->write_tsc_offset(vcpu, offset);
 			vcpu->arch.tsc_catchup = 1;
 		}
+		if (kvm_lapic_hv_timer_in_use(vcpu) &&
+				kvm_x86_ops->set_hv_timer(vcpu,
+					kvm_get_lapic_tscdeadline_msr(vcpu)))
+			kvm_lapic_switch_to_sw_timer(vcpu);
 		/*
 		 * On a host with synchronized TSC, there is no need to update
 		 * kvmclock on vcpu->cpu migration
diff --git a/arch/x86/lib/kaslr.c b/arch/x86/lib/kaslr.c
index f7dfeda..121f59c 100644
--- a/arch/x86/lib/kaslr.c
+++ b/arch/x86/lib/kaslr.c
@@ -19,7 +19,7 @@
 #include <asm/cpufeature.h>
 #include <asm/setup.h>
 
-#define debug_putstr(v) early_printk(v)
+#define debug_putstr(v) early_printk("%s", v)
 #define has_cpuflag(f) boot_cpu_has(f)
 #define get_boot_seed() kaslr_offset()
 #endif
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 2ec0b0abb..49e6eba 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -181,11 +181,11 @@
 
 #ifndef CONFIG_UML
 /*
- * memcpy_mcsafe - memory copy with machine check exception handling
+ * memcpy_mcsafe_unrolled - memory copy with machine check exception handling
  * Note that we only catch machine checks when reading the source addresses.
  * Writes to target are posted and don't generate machine checks.
  */
-ENTRY(memcpy_mcsafe)
+ENTRY(memcpy_mcsafe_unrolled)
 	cmpl $8, %edx
 	/* Less than 8 bytes? Go to byte copy loop */
 	jb .L_no_whole_words
@@ -273,7 +273,7 @@
 .L_done_memcpy_trap:
 	xorq %rax, %rax
 	ret
-ENDPROC(memcpy_mcsafe)
+ENDPROC(memcpy_mcsafe_unrolled)
 
 	.section .fixup, "ax"
 	/* Return -EFAULT for any failure */
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index ba47524..d1c7de0 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -52,21 +52,6 @@
 	return -ENOENT;
 }
 
-static __init void early_get_boot_cpu_id(void)
-{
-	/*
-	 * need to get the APIC ID of the BSP so can use that to
-	 * create apicid_to_node in amd_scan_nodes()
-	 */
-#ifdef CONFIG_X86_MPPARSE
-	/*
-	 * get boot-time SMP configuration:
-	 */
-	if (smp_found_config)
-		early_get_smp_config();
-#endif
-}
-
 int __init amd_numa_init(void)
 {
 	u64 start = PFN_PHYS(0);
@@ -180,8 +165,11 @@
 	cores = 1 << bits;
 	apicid_base = 0;
 
-	/* get the APIC ID of the BSP early for systems with apicid lifting */
-	early_get_boot_cpu_id();
+	/*
+	 * get boot-time SMP configuration:
+	 */
+	early_get_smp_config();
+
 	if (boot_cpu_physical_apicid > 0) {
 		pr_info("BSP APIC ID: %02x\n", boot_cpu_physical_apicid);
 		apicid_base = boot_cpu_physical_apicid;
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 832b98f..79ae939 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -1,4 +1,4 @@
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <asm/uaccess.h>
 #include <asm/traps.h>
 #include <asm/kdebug.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index dc80230..1e52512 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -5,7 +5,7 @@
  */
 #include <linux/sched.h>		/* test_thread_flag(), ...	*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
-#include <linux/module.h>		/* search_exception_table	*/
+#include <linux/extable.h>		/* search_exception_table	*/
 #include <linux/bootmem.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
@@ -753,6 +753,38 @@
 		return;
 	}
 
+#ifdef CONFIG_VMAP_STACK
+	/*
+	 * Stack overflow?  During boot, we can fault near the initial
+	 * stack in the direct map, but that's not an overflow -- check
+	 * that we're in vmalloc space to avoid this.
+	 */
+	if (is_vmalloc_addr((void *)address) &&
+	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
+	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
+		register void *__sp asm("rsp");
+		unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
+		/*
+		 * We're likely to be running with very little stack space
+		 * left.  It's plausible that we'd hit this condition but
+		 * double-fault even before we get this far, in which case
+		 * we're fine: the double-fault handler will deal with it.
+		 *
+		 * We don't want to make it all the way into the oops code
+		 * and then double-fault, though, because we're likely to
+		 * break the console driver and lose most of the stack dump.
+		 */
+		asm volatile ("movq %[stack], %%rsp\n\t"
+			      "call handle_stack_overflow\n\t"
+			      "1: jmp 1b"
+			      : "+r" (__sp)
+			      : "D" ("kernel stack overflow (page fault)"),
+				"S" (regs), "d" (address),
+				[stack] "rm" (stack));
+		unreachable();
+	}
+#endif
+
 	/*
 	 * 32-bit:
 	 *
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index ec21796..4473cb4 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -3,15 +3,17 @@
  * included by both the compressed kernel and the regular kernel.
  */
 
-static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page,
+static void ident_pmd_init(struct x86_mapping_info *info, pmd_t *pmd_page,
 			   unsigned long addr, unsigned long end)
 {
 	addr &= PMD_MASK;
 	for (; addr < end; addr += PMD_SIZE) {
 		pmd_t *pmd = pmd_page + pmd_index(addr);
 
-		if (!pmd_present(*pmd))
-			set_pmd(pmd, __pmd(addr | pmd_flag));
+		if (pmd_present(*pmd))
+			continue;
+
+		set_pmd(pmd, __pmd((addr - info->offset) | info->pmd_flag));
 	}
 }
 
@@ -30,13 +32,13 @@
 
 		if (pud_present(*pud)) {
 			pmd = pmd_offset(pud, 0);
-			ident_pmd_init(info->pmd_flag, pmd, addr, next);
+			ident_pmd_init(info, pmd, addr, next);
 			continue;
 		}
 		pmd = (pmd_t *)info->alloc_pgt_page(info->context);
 		if (!pmd)
 			return -ENOMEM;
-		ident_pmd_init(info->pmd_flag, pmd, addr, next);
+		ident_pmd_init(info, pmd, addr, next);
 		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
 	}
 
@@ -44,14 +46,15 @@
 }
 
 int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
-			      unsigned long addr, unsigned long end)
+			      unsigned long pstart, unsigned long pend)
 {
+	unsigned long addr = pstart + info->offset;
+	unsigned long end = pend + info->offset;
 	unsigned long next;
 	int result;
-	int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0;
 
 	for (; addr < end; addr = next) {
-		pgd_t *pgd = pgd_page + pgd_index(addr) + off;
+		pgd_t *pgd = pgd_page + pgd_index(addr);
 		pud_t *pud;
 
 		next = (addr & PGDIR_MASK) + PGDIR_SIZE;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 6209289..22af912 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -122,8 +122,18 @@
 	return __va(pfn << PAGE_SHIFT);
 }
 
-/* need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS */
-#define INIT_PGT_BUF_SIZE	(6 * PAGE_SIZE)
+/*
+ * By default need 3 4k for initial PMD_SIZE,  3 4k for 0-ISA_END_ADDRESS.
+ * With KASLR memory randomization, depending on the machine e820 memory
+ * and the PUD alignment. We may need twice more pages when KASLR memory
+ * randomization is enabled.
+ */
+#ifndef CONFIG_RANDOMIZE_MEMORY
+#define INIT_PGD_PAGE_COUNT      6
+#else
+#define INIT_PGD_PAGE_COUNT      12
+#endif
+#define INIT_PGT_BUF_SIZE	(INIT_PGD_PAGE_COUNT * PAGE_SIZE)
 RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
 void  __init early_alloc_pgt_buf(void)
 {
@@ -689,8 +699,10 @@
 	}
 }
 
-void free_initmem(void)
+void __ref free_initmem(void)
 {
+	e820_reallocate_tables();
+
 	free_init_pages("unused kernel",
 			(unsigned long)(&__init_begin),
 			(unsigned long)(&__init_end));
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 26dccd6..ddd2661 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -40,17 +40,26 @@
  * You need to add an if/def entry if you introduce a new memory region
  * compatible with KASLR. Your entry must be in logical order with memory
  * layout. For example, ESPFIX is before EFI because its virtual address is
- * before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
+ * before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
  * ensure that this order is correct and won't be changed.
  */
 static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
-static const unsigned long vaddr_end = VMEMMAP_START;
+
+#if defined(CONFIG_X86_ESPFIX64)
+static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
+#elif defined(CONFIG_EFI)
+static const unsigned long vaddr_end = EFI_VA_START;
+#else
+static const unsigned long vaddr_end = __START_KERNEL_map;
+#endif
 
 /* Default values */
 unsigned long page_offset_base = __PAGE_OFFSET_BASE;
 EXPORT_SYMBOL(page_offset_base);
 unsigned long vmalloc_base = __VMALLOC_BASE;
 EXPORT_SYMBOL(vmalloc_base);
+unsigned long vmemmap_base = __VMEMMAP_BASE;
+EXPORT_SYMBOL(vmemmap_base);
 
 /*
  * Memory regions randomized by KASLR (except modules that use a separate logic
@@ -63,6 +72,7 @@
 } kaslr_regions[] = {
 	{ &page_offset_base, 64/* Maximum */ },
 	{ &vmalloc_base, VMALLOC_SIZE_TB },
+	{ &vmemmap_base, 1 },
 };
 
 /* Get size in bytes used by the memory region */
@@ -77,7 +87,7 @@
  */
 static inline bool kaslr_memory_enabled(void)
 {
-	return kaslr_enabled() && !config_enabled(CONFIG_KASAN);
+	return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
 }
 
 /* Initialize base and padding for each memory region randomized with KASLR */
@@ -89,6 +99,18 @@
 	struct rnd_state rand_state;
 	unsigned long remain_entropy;
 
+	/*
+	 * All these BUILD_BUG_ON checks ensures the memory layout is
+	 * consistent with the vaddr_start/vaddr_end variables.
+	 */
+	BUILD_BUG_ON(vaddr_start >= vaddr_end);
+	BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
+		     vaddr_end >= EFI_VA_START);
+	BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
+		      config_enabled(CONFIG_EFI)) &&
+		     vaddr_end >= __START_KERNEL_map);
+	BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
+
 	if (!kaslr_memory_enabled())
 		return;
 
@@ -97,7 +119,7 @@
 	 * add padding if needed (especially for memory hotplug support).
 	 */
 	BUG_ON(kaslr_regions[0].base != &page_offset_base);
-	memory_tb = ((max_pfn << PAGE_SHIFT) >> TB_SHIFT) +
+	memory_tb = DIV_ROUND_UP(max_pfn << PAGE_SHIFT, 1UL << TB_SHIFT) +
 		CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING;
 
 	/* Adapt phyiscal memory region size based on available memory */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fb68210..3f35b48 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -722,22 +722,19 @@
 	numa_init(dummy_numa_init);
 }
 
-static __init int find_near_online_node(int node)
+static void __init init_memory_less_node(int nid)
 {
-	int n, val;
-	int min_val = INT_MAX;
-	int best_node = -1;
+	unsigned long zones_size[MAX_NR_ZONES] = {0};
+	unsigned long zholes_size[MAX_NR_ZONES] = {0};
 
-	for_each_online_node(n) {
-		val = node_distance(node, n);
+	/* Allocate and initialize node data. Memory-less node is now online.*/
+	alloc_node_data(nid);
+	free_area_init_node(nid, zones_size, 0, zholes_size);
 
-		if (val < min_val) {
-			min_val = val;
-			best_node = n;
-		}
-	}
-
-	return best_node;
+	/*
+	 * All zonelists will be built later in start_kernel() after per cpu
+	 * areas are initialized.
+	 */
 }
 
 /*
@@ -766,8 +763,10 @@
 
 		if (node == NUMA_NO_NODE)
 			continue;
+
 		if (!node_online(node))
-			node = find_near_online_node(node);
+			init_memory_less_node(node);
+
 		numa_set_node(cpu, node);
 	}
 }
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 849dc09..e3353c9 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -917,11 +917,11 @@
 	}
 }
 
-static int populate_pmd(struct cpa_data *cpa,
-			unsigned long start, unsigned long end,
-			unsigned num_pages, pud_t *pud, pgprot_t pgprot)
+static long populate_pmd(struct cpa_data *cpa,
+			 unsigned long start, unsigned long end,
+			 unsigned num_pages, pud_t *pud, pgprot_t pgprot)
 {
-	unsigned int cur_pages = 0;
+	long cur_pages = 0;
 	pmd_t *pmd;
 	pgprot_t pmd_pgprot;
 
@@ -991,12 +991,12 @@
 	return num_pages;
 }
 
-static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
-			pgprot_t pgprot)
+static long populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd,
+			 pgprot_t pgprot)
 {
 	pud_t *pud;
 	unsigned long end;
-	int cur_pages = 0;
+	long cur_pages = 0;
 	pgprot_t pud_pgprot;
 
 	end = start + (cpa->numpages << PAGE_SHIFT);
@@ -1052,7 +1052,7 @@
 
 	/* Map trailing leftover */
 	if (start < end) {
-		int tmp;
+		long tmp;
 
 		pud = pud_offset(pgd, start);
 		if (pud_none(*pud))
@@ -1078,7 +1078,7 @@
 	pgprot_t pgprot = __pgprot(_KERNPG_TABLE);
 	pud_t *pud = NULL;	/* shut up gcc */
 	pgd_t *pgd_entry;
-	int ret;
+	long ret;
 
 	pgd_entry = cpa->pgd + pgd_index(addr);
 
@@ -1327,7 +1327,8 @@
 
 static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 {
-	int ret, numpages = cpa->numpages;
+	unsigned long numpages = cpa->numpages;
+	int ret;
 
 	while (numpages) {
 		/*
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index ecb1b69..170cc4f 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -927,9 +927,10 @@
 }
 
 /*
- * prot is passed in as a parameter for the new mapping. If the vma has a
- * linear pfn mapping for the entire range reserve the entire vma range with
- * single reserve_pfn_range call.
+ * prot is passed in as a parameter for the new mapping. If the vma has
+ * a linear pfn mapping for the entire range, or no vma is provided,
+ * reserve the entire pfn + size range with single reserve_pfn_range
+ * call.
  */
 int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 		    unsigned long pfn, unsigned long addr, unsigned long size)
@@ -938,11 +939,12 @@
 	enum page_cache_mode pcm;
 
 	/* reserve the whole chunk starting from paddr */
-	if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
+	if (!vma || (addr == vma->vm_start
+				&& size == (vma->vm_end - vma->vm_start))) {
 		int ret;
 
 		ret = reserve_pfn_range(paddr, size, prot, 0);
-		if (!ret)
+		if (ret == 0 && vma)
 			vma->vm_flags |= VM_PAT;
 		return ret;
 	}
@@ -997,7 +999,7 @@
 	resource_size_t paddr;
 	unsigned long prot;
 
-	if (!(vma->vm_flags & VM_PAT))
+	if (vma && !(vma->vm_flags & VM_PAT))
 		return;
 
 	/* free the chunk starting from pfn or the whole chunk */
@@ -1011,7 +1013,8 @@
 		size = vma->vm_end - vma->vm_start;
 	}
 	free_pfn_range(paddr, size);
-	vma->vm_flags &= ~VM_PAT;
+	if (vma)
+		vma->vm_flags &= ~VM_PAT;
 }
 
 /*
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index de391b7..159b52c 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -254,9 +254,7 @@
 
 struct memtype *rbt_memtype_lookup(u64 addr)
 {
-	struct memtype *data;
-	data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
-	return data;
+	return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
 }
 
 #if defined(CONFIG_DEBUG_FS)
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 4dbe656..a7655f6 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -77,10 +77,25 @@
 	unsigned cpu = smp_processor_id();
 
 	if (likely(prev != next)) {
+		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+			/*
+			 * If our current stack is in vmalloc space and isn't
+			 * mapped in the new pgd, we'll double-fault.  Forcibly
+			 * map it.
+			 */
+			unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
+
+			pgd_t *pgd = next->pgd + stack_pgd_index;
+
+			if (unlikely(pgd_none(*pgd)))
+				set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
+		}
+
 #ifdef CONFIG_SMP
 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		this_cpu_write(cpu_tlbstate.active_mm, next);
 #endif
+
 		cpumask_set_cpu(cpu, mm_cpumask(next));
 
 		/*
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index cb31a44..a2488b6 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -16,27 +16,7 @@
 
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
-
-static int backtrace_stack(void *data, char *name)
-{
-	/* Yes, we want all stacks */
-	return 0;
-}
-
-static int backtrace_address(void *data, unsigned long addr, int reliable)
-{
-	unsigned int *depth = data;
-
-	if ((*depth)--)
-		oprofile_add_trace(addr);
-	return 0;
-}
-
-static struct stacktrace_ops backtrace_ops = {
-	.stack		= backtrace_stack,
-	.address	= backtrace_address,
-	.walk_stack	= print_context_stack,
-};
+#include <asm/unwind.h>
 
 #ifdef CONFIG_COMPAT
 static struct stack_frame_ia32 *
@@ -113,10 +93,29 @@
 	struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
 
 	if (!user_mode(regs)) {
-		unsigned long stack = kernel_stack_pointer(regs);
-		if (depth)
-			dump_trace(NULL, regs, (unsigned long *)stack, 0,
-				   &backtrace_ops, &depth);
+		struct unwind_state state;
+		unsigned long addr;
+
+		if (!depth)
+			return;
+
+		oprofile_add_trace(regs->ip);
+
+		if (!--depth)
+			return;
+
+		for (unwind_start(&state, current, regs, NULL);
+		     !unwind_done(&state); unwind_next_frame(&state)) {
+			addr = unwind_get_return_address(&state);
+			if (!addr)
+				break;
+
+			oprofile_add_trace(addr);
+
+			if (!--depth)
+				break;
+		}
+
 		return;
 	}
 
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 837ea36..6d52b94 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -553,15 +553,21 @@
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x27B9, twinhead_reserve_killing_zone);
 
 /*
- * Broadwell EP Home Agent BARs erroneously return non-zero values when read.
+ * Device [8086:2fc0]
+ * Erratum HSE43
+ * CONFIG_TDP_NOMINAL CSR Implemented at Incorrect Offset
+ * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v3-spec-update.html
  *
- * See http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
- * entry BDF2.
+ * Devices [8086:6f60,6fa0,6fc0]
+ * Erratum BDF2
+ * PCI BARs in the Home Agent Will Return Non-Zero Values During Enumeration
+ * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e5-v4-spec-update.html
  */
-static void pci_bdwep_bar(struct pci_dev *dev)
+static void pci_invalid_bar(struct pci_dev *dev)
 {
 	dev->non_compliant_bars = 1;
 }
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_bdwep_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_bdwep_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_bdwep_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar);
+DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar);
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 9770e55..1d97cea 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -120,9 +120,12 @@
 static struct {
 	unsigned long address;
 	unsigned short segment;
-} pci_indirect = { 0, __KERNEL_CS };
+} pci_indirect __ro_after_init = {
+	.address = 0,
+	.segment = __KERNEL_CS,
+};
 
-static int pci_bios_present;
+static int pci_bios_present __ro_after_init;
 
 static int __init check_pcibios(void)
 {
diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c
index b814ca6..7948be3 100644
--- a/arch/x86/pci/vmd.c
+++ b/arch/x86/pci/vmd.c
@@ -41,6 +41,7 @@
  * @node:	list item for parent traversal.
  * @rcu:	RCU callback item for freeing.
  * @irq:	back pointer to parent.
+ * @enabled:	true if driver enabled IRQ
  * @virq:	the virtual IRQ value provided to the requesting driver.
  *
  * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
@@ -50,6 +51,7 @@
 	struct list_head	node;
 	struct rcu_head		rcu;
 	struct vmd_irq_list	*irq;
+	bool			enabled;
 	unsigned int		virq;
 };
 
@@ -122,7 +124,9 @@
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&list_lock, flags);
+	WARN_ON(vmdirq->enabled);
 	list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
+	vmdirq->enabled = true;
 	raw_spin_unlock_irqrestore(&list_lock, flags);
 
 	data->chip->irq_unmask(data);
@@ -136,8 +140,10 @@
 	data->chip->irq_mask(data);
 
 	raw_spin_lock_irqsave(&list_lock, flags);
-	list_del_rcu(&vmdirq->node);
-	INIT_LIST_HEAD_RCU(&vmdirq->node);
+	if (vmdirq->enabled) {
+		list_del_rcu(&vmdirq->node);
+		vmdirq->enabled = false;
+	}
 	raw_spin_unlock_irqrestore(&list_lock, flags);
 }
 
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 184842e..3c3c19e 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -8,6 +8,7 @@
 obj-y	+= intel/
 obj-y	+= intel-mid/
 obj-y	+= intel-quark/
+obj-y	+= mellanox/
 obj-y	+= olpc/
 obj-y	+= scx200/
 obj-y	+= sfi/
diff --git a/arch/x86/platform/atom/punit_atom_debug.c b/arch/x86/platform/atom/punit_atom_debug.c
index 8ff7b93..d49d3be 100644
--- a/arch/x86/platform/atom/punit_atom_debug.c
+++ b/arch/x86/platform/atom/punit_atom_debug.c
@@ -155,7 +155,7 @@
 
 static const struct x86_cpu_id intel_punit_cpu_ids[] = {
 	ICPU(INTEL_FAM6_ATOM_SILVERMONT1, punit_device_byt),
-	ICPU(INTEL_FAM6_ATOM_MERRIFIELD1, punit_device_tng),
+	ICPU(INTEL_FAM6_ATOM_MERRIFIELD,  punit_device_tng),
 	ICPU(INTEL_FAM6_ATOM_AIRMONT,	  punit_device_cht),
 	{}
 };
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index 6a2f569..6aad870 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -82,21 +82,12 @@
 	}
 	bgrt_image_size = bmp_header.size;
 
-	bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL | __GFP_NOWARN);
+	bgrt_image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB);
 	if (!bgrt_image) {
-		pr_notice("Ignoring BGRT: failed to allocate memory for image (wanted %zu bytes)\n",
-		       bgrt_image_size);
-		return;
-	}
-
-	image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB);
-	if (!image) {
 		pr_notice("Ignoring BGRT: failed to map image memory\n");
-		kfree(bgrt_image);
 		bgrt_image = NULL;
 		return;
 	}
 
-	memcpy(bgrt_image, image, bgrt_image_size);
-	memunmap(image);
+	efi_mem_reserve(bgrt_tab->image_address, bgrt_image_size);
 }
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 1fbb408..bf99aa7 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -166,13 +166,15 @@
 		}
 		e820_add_region(start, size, e820_type);
 	}
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
 }
 
 int __init efi_memblock_x86_reserve_range(void)
 {
 	struct efi_info *e = &boot_params.efi_info;
+	struct efi_memory_map_data data;
 	phys_addr_t pmap;
+	int rv;
 
 	if (efi_enabled(EFI_PARAVIRT))
 		return 0;
@@ -187,11 +189,17 @@
 #else
 	pmap = (e->efi_memmap |	((__u64)e->efi_memmap_hi << 32));
 #endif
-	efi.memmap.phys_map	= pmap;
-	efi.memmap.nr_map	= e->efi_memmap_size /
-				  e->efi_memdesc_size;
-	efi.memmap.desc_size	= e->efi_memdesc_size;
-	efi.memmap.desc_version	= e->efi_memdesc_version;
+	data.phys_map		= pmap;
+	data.size 		= e->efi_memmap_size;
+	data.desc_size		= e->efi_memdesc_size;
+	data.desc_version	= e->efi_memdesc_version;
+
+	rv = efi_memmap_init_early(&data);
+	if (rv)
+		return rv;
+
+	if (add_efi_memmap)
+		do_add_efi_memmap();
 
 	WARN(efi.memmap.desc_version != 1,
 	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
@@ -218,19 +226,6 @@
 	}
 }
 
-void __init efi_unmap_memmap(void)
-{
-	unsigned long size;
-
-	clear_bit(EFI_MEMMAP, &efi.flags);
-
-	size = efi.memmap.nr_map * efi.memmap.desc_size;
-	if (efi.memmap.map) {
-		early_memunmap(efi.memmap.map, size);
-		efi.memmap.map = NULL;
-	}
-}
-
 static int __init efi_systab_init(void *phys)
 {
 	if (efi_enabled(EFI_64BIT)) {
@@ -414,33 +409,6 @@
 	return 0;
 }
 
-static int __init efi_memmap_init(void)
-{
-	unsigned long addr, size;
-
-	if (efi_enabled(EFI_PARAVIRT))
-		return 0;
-
-	/* Map the EFI memory map */
-	size = efi.memmap.nr_map * efi.memmap.desc_size;
-	addr = (unsigned long)efi.memmap.phys_map;
-
-	efi.memmap.map = early_memremap(addr, size);
-	if (efi.memmap.map == NULL) {
-		pr_err("Could not map the memory map!\n");
-		return -ENOMEM;
-	}
-
-	efi.memmap.map_end = efi.memmap.map + size;
-
-	if (add_efi_memmap)
-		do_add_efi_memmap();
-
-	set_bit(EFI_MEMMAP, &efi.flags);
-
-	return 0;
-}
-
 void __init efi_init(void)
 {
 	efi_char16_t *c16;
@@ -498,16 +466,14 @@
 	if (!efi_runtime_supported())
 		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
 	else {
-		if (efi_runtime_disabled() || efi_runtime_init())
+		if (efi_runtime_disabled() || efi_runtime_init()) {
+			efi_memmap_unmap();
 			return;
+		}
 	}
-	if (efi_memmap_init())
-		return;
 
 	if (efi_enabled(EFI_DBG))
 		efi_print_memmap();
-
-	efi_esrt_init();
 }
 
 void __init efi_late_init(void)
@@ -624,42 +590,6 @@
 	}
 }
 
-static void __init save_runtime_map(void)
-{
-#ifdef CONFIG_KEXEC_CORE
-	unsigned long desc_size;
-	efi_memory_desc_t *md;
-	void *tmp, *q = NULL;
-	int count = 0;
-
-	if (efi_enabled(EFI_OLD_MEMMAP))
-		return;
-
-	desc_size = efi.memmap.desc_size;
-
-	for_each_efi_memory_desc(md) {
-		if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
-		    (md->type == EFI_BOOT_SERVICES_CODE) ||
-		    (md->type == EFI_BOOT_SERVICES_DATA))
-			continue;
-		tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
-		if (!tmp)
-			goto out;
-		q = tmp;
-
-		memcpy(q + count * desc_size, md, desc_size);
-		count++;
-	}
-
-	efi_runtime_map_setup(q, count, desc_size);
-	return;
-
-out:
-	kfree(q);
-	pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
-#endif
-}
-
 static void *realloc_pages(void *old_memmap, int old_shift)
 {
 	void *ret;
@@ -745,6 +675,46 @@
 	return entry;
 }
 
+static bool should_map_region(efi_memory_desc_t *md)
+{
+	/*
+	 * Runtime regions always require runtime mappings (obviously).
+	 */
+	if (md->attribute & EFI_MEMORY_RUNTIME)
+		return true;
+
+	/*
+	 * 32-bit EFI doesn't suffer from the bug that requires us to
+	 * reserve boot services regions, and mixed mode support
+	 * doesn't exist for 32-bit kernels.
+	 */
+	if (IS_ENABLED(CONFIG_X86_32))
+		return false;
+
+	/*
+	 * Map all of RAM so that we can access arguments in the 1:1
+	 * mapping when making EFI runtime calls.
+	 */
+	if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_is_native()) {
+		if (md->type == EFI_CONVENTIONAL_MEMORY ||
+		    md->type == EFI_LOADER_DATA ||
+		    md->type == EFI_LOADER_CODE)
+			return true;
+	}
+
+	/*
+	 * Map boot services regions as a workaround for buggy
+	 * firmware that accesses them even when they shouldn't.
+	 *
+	 * See efi_{reserve,free}_boot_services().
+	 */
+	if (md->type == EFI_BOOT_SERVICES_CODE ||
+	    md->type == EFI_BOOT_SERVICES_DATA)
+		return true;
+
+	return false;
+}
+
 /*
  * Map the efi memory ranges of the runtime services and update new_mmap with
  * virtual addresses.
@@ -761,13 +731,9 @@
 	p = NULL;
 	while ((p = efi_map_next_entry(p))) {
 		md = p;
-		if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
-#ifdef CONFIG_X86_64
-			if (md->type != EFI_BOOT_SERVICES_CODE &&
-			    md->type != EFI_BOOT_SERVICES_DATA)
-#endif
-				continue;
-		}
+
+		if (!should_map_region(md))
+			continue;
 
 		efi_map_region(md);
 		get_systab_virt_addr(md);
@@ -803,7 +769,7 @@
 	 * non-native EFI
 	 */
 	if (!efi_is_native()) {
-		efi_unmap_memmap();
+		efi_memmap_unmap();
 		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 		return;
 	}
@@ -823,7 +789,18 @@
 		get_systab_virt_addr(md);
 	}
 
-	save_runtime_map();
+	/*
+	 * Unregister the early EFI memmap from efi_init() and install
+	 * the new EFI memory map.
+	 */
+	efi_memmap_unmap();
+
+	if (efi_memmap_init_late(efi.memmap.phys_map,
+				 efi.memmap.desc_size * efi.memmap.nr_map)) {
+		pr_err("Failed to remap late EFI memory map\n");
+		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+		return;
+	}
 
 	BUG_ON(!efi.systab);
 
@@ -884,6 +861,7 @@
 	int count = 0, pg_shift = 0;
 	void *new_memmap = NULL;
 	efi_status_t status;
+	phys_addr_t pa;
 
 	efi.systab = NULL;
 
@@ -901,11 +879,24 @@
 		return;
 	}
 
-	save_runtime_map();
+	pa = __pa(new_memmap);
+
+	/*
+	 * Unregister the early EFI memmap from efi_init() and install
+	 * the new EFI memory map that we are about to pass to the
+	 * firmware via SetVirtualAddressMap().
+	 */
+	efi_memmap_unmap();
+
+	if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) {
+		pr_err("Failed to remap late EFI memory map\n");
+		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+		return;
+	}
 
 	BUG_ON(!efi.systab);
 
-	if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
+	if (efi_setup_page_tables(pa, 1 << pg_shift)) {
 		clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 		return;
 	}
@@ -917,14 +908,14 @@
 				efi.memmap.desc_size * count,
 				efi.memmap.desc_size,
 				efi.memmap.desc_version,
-				(efi_memory_desc_t *)__pa(new_memmap));
+				(efi_memory_desc_t *)pa);
 	} else {
 		status = efi_thunk_set_virtual_address_map(
 				efi_phys.set_virtual_address_map,
 				efi.memmap.desc_size * count,
 				efi.memmap.desc_size,
 				efi.memmap.desc_version,
-				(efi_memory_desc_t *)__pa(new_memmap));
+				(efi_memory_desc_t *)pa);
 	}
 
 	if (status != EFI_SUCCESS) {
@@ -956,15 +947,6 @@
 	efi_runtime_update_mappings();
 	efi_dump_pagetable();
 
-	/*
-	 * We mapped the descriptor array into the EFI pagetable above
-	 * but we're not unmapping it here because if we're running in
-	 * EFI mixed mode we need all of memory to be accessible when
-	 * we pass parameters to the EFI runtime services in the
-	 * thunking code.
-	 */
-	free_pages((unsigned long)new_memmap, pg_shift);
-
 	/* clean DUMMY object */
 	efi_delete_dummy_variable();
 }
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 677e29e..58b0f80 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -85,7 +85,7 @@
 	early_code_mapping_set_exec(1);
 
 	n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
-	save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
+	save_pgd = kmalloc_array(n_pgds, sizeof(*save_pgd), GFP_KERNEL);
 
 	for (pgd = 0; pgd < n_pgds; pgd++) {
 		save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
@@ -214,7 +214,6 @@
 int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
 {
 	unsigned long pfn, text;
-	efi_memory_desc_t *md;
 	struct page *page;
 	unsigned npages;
 	pgd_t *pgd;
@@ -245,28 +244,9 @@
 	 * text and allocate a new stack because we can't rely on the
 	 * stack pointer being < 4GB.
 	 */
-	if (!IS_ENABLED(CONFIG_EFI_MIXED))
+	if (!IS_ENABLED(CONFIG_EFI_MIXED) || efi_is_native())
 		return 0;
 
-	/*
-	 * Map all of RAM so that we can access arguments in the 1:1
-	 * mapping when making EFI runtime calls.
-	 */
-	for_each_efi_memory_desc(md) {
-		if (md->type != EFI_CONVENTIONAL_MEMORY &&
-		    md->type != EFI_LOADER_DATA &&
-		    md->type != EFI_LOADER_CODE)
-			continue;
-
-		pfn = md->phys_addr >> PAGE_SHIFT;
-		npages = md->num_pages;
-
-		if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, _PAGE_RW)) {
-			pr_err("Failed to map 1:1 memory\n");
-			return 1;
-		}
-	}
-
 	page = alloc_page(GFP_KERNEL|__GFP_DMA32);
 	if (!page)
 		panic("Unable to allocate EFI runtime stack < 4GB\n");
@@ -359,6 +339,7 @@
  */
 void __init efi_map_region_fixed(efi_memory_desc_t *md)
 {
+	__map_region(md, md->phys_addr);
 	__map_region(md, md->virt_addr);
 }
 
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 4480c06..10aca63 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -164,6 +164,75 @@
 EXPORT_SYMBOL_GPL(efi_query_variable_store);
 
 /*
+ * The UEFI specification makes it clear that the operating system is
+ * free to do whatever it wants with boot services code after
+ * ExitBootServices() has been called. Ignoring this recommendation a
+ * significant bunch of EFI implementations continue calling into boot
+ * services code (SetVirtualAddressMap). In order to work around such
+ * buggy implementations we reserve boot services region during EFI
+ * init and make sure it stays executable. Then, after
+ * SetVirtualAddressMap(), it is discarded.
+ *
+ * However, some boot services regions contain data that is required
+ * by drivers, so we need to track which memory ranges can never be
+ * freed. This is done by tagging those regions with the
+ * EFI_MEMORY_RUNTIME attribute.
+ *
+ * Any driver that wants to mark a region as reserved must use
+ * efi_mem_reserve() which will insert a new EFI memory descriptor
+ * into efi.memmap (splitting existing regions if necessary) and tag
+ * it with EFI_MEMORY_RUNTIME.
+ */
+void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
+{
+	phys_addr_t new_phys, new_size;
+	struct efi_mem_range mr;
+	efi_memory_desc_t md;
+	int num_entries;
+	void *new;
+
+	if (efi_mem_desc_lookup(addr, &md)) {
+		pr_err("Failed to lookup EFI memory descriptor for %pa\n", &addr);
+		return;
+	}
+
+	if (addr + size > md.phys_addr + (md.num_pages << EFI_PAGE_SHIFT)) {
+		pr_err("Region spans EFI memory descriptors, %pa\n", &addr);
+		return;
+	}
+
+	size += addr % EFI_PAGE_SIZE;
+	size = round_up(size, EFI_PAGE_SIZE);
+	addr = round_down(addr, EFI_PAGE_SIZE);
+
+	mr.range.start = addr;
+	mr.range.end = addr + size - 1;
+	mr.attribute = md.attribute | EFI_MEMORY_RUNTIME;
+
+	num_entries = efi_memmap_split_count(&md, &mr.range);
+	num_entries += efi.memmap.nr_map;
+
+	new_size = efi.memmap.desc_size * num_entries;
+
+	new_phys = memblock_alloc(new_size, 0);
+	if (!new_phys) {
+		pr_err("Could not allocate boot services memmap\n");
+		return;
+	}
+
+	new = early_memremap(new_phys, new_size);
+	if (!new) {
+		pr_err("Failed to map new boot services memmap\n");
+		return;
+	}
+
+	efi_memmap_insert(&efi.memmap, new, &mr);
+	early_memunmap(new, new_size);
+
+	efi_memmap_install(new_phys, num_entries);
+}
+
+/*
  * Helper function for efi_reserve_boot_services() to figure out if we
  * can free regions in efi_free_boot_services().
  *
@@ -184,15 +253,6 @@
 	return true;
 }
 
-/*
- * The UEFI specification makes it clear that the operating system is free to do
- * whatever it wants with boot services code after ExitBootServices() has been
- * called. Ignoring this recommendation a significant bunch of EFI implementations 
- * continue calling into boot services code (SetVirtualAddressMap). In order to 
- * work around such buggy implementations we reserve boot services region during 
- * EFI init and make sure it stays executable. Then, after SetVirtualAddressMap(), it
-* is discarded.
-*/
 void __init efi_reserve_boot_services(void)
 {
 	efi_memory_desc_t *md;
@@ -249,24 +309,86 @@
 
 void __init efi_free_boot_services(void)
 {
+	phys_addr_t new_phys, new_size;
 	efi_memory_desc_t *md;
+	int num_entries = 0;
+	void *new, *new_md;
 
 	for_each_efi_memory_desc(md) {
 		unsigned long long start = md->phys_addr;
 		unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+		size_t rm_size;
 
 		if (md->type != EFI_BOOT_SERVICES_CODE &&
-		    md->type != EFI_BOOT_SERVICES_DATA)
+		    md->type != EFI_BOOT_SERVICES_DATA) {
+			num_entries++;
 			continue;
+		}
 
 		/* Do not free, someone else owns it: */
-		if (md->attribute & EFI_MEMORY_RUNTIME)
+		if (md->attribute & EFI_MEMORY_RUNTIME) {
+			num_entries++;
 			continue;
+		}
+
+		/*
+		 * Nasty quirk: if all sub-1MB memory is used for boot
+		 * services, we can get here without having allocated the
+		 * real mode trampoline.  It's too late to hand boot services
+		 * memory back to the memblock allocator, so instead
+		 * try to manually allocate the trampoline if needed.
+		 *
+		 * I've seen this on a Dell XPS 13 9350 with firmware
+		 * 1.4.4 with SGX enabled booting Linux via Fedora 24's
+		 * grub2-efi on a hard disk.  (And no, I don't know why
+		 * this happened, but Linux should still try to boot rather
+		 * panicing early.)
+		 */
+		rm_size = real_mode_size_needed();
+		if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) {
+			set_real_mode_mem(start, rm_size);
+			start += rm_size;
+			size -= rm_size;
+		}
 
 		free_bootmem_late(start, size);
 	}
 
-	efi_unmap_memmap();
+	new_size = efi.memmap.desc_size * num_entries;
+	new_phys = memblock_alloc(new_size, 0);
+	if (!new_phys) {
+		pr_err("Failed to allocate new EFI memmap\n");
+		return;
+	}
+
+	new = memremap(new_phys, new_size, MEMREMAP_WB);
+	if (!new) {
+		pr_err("Failed to map new EFI memmap\n");
+		return;
+	}
+
+	/*
+	 * Build a new EFI memmap that excludes any boot services
+	 * regions that are not tagged EFI_MEMORY_RUNTIME, since those
+	 * regions have now been freed.
+	 */
+	new_md = new;
+	for_each_efi_memory_desc(md) {
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+		    (md->type == EFI_BOOT_SERVICES_CODE ||
+		     md->type == EFI_BOOT_SERVICES_DATA))
+			continue;
+
+		memcpy(new_md, md, efi.memmap.desc_size);
+		new_md += efi.memmap.desc_size;
+	}
+
+	memunmap(new);
+
+	if (efi_memmap_install(new_phys, num_entries)) {
+		pr_err("Could not install new EFI memmap\n");
+		return;
+	}
 }
 
 /*
@@ -344,7 +466,7 @@
 	 */
 	if (!efi_runtime_supported()) {
 		pr_info("Setup done, disabling due to 32/64-bit mismatch\n");
-		efi_unmap_memmap();
+		efi_memmap_unmap();
 	}
 
 	/* UV2+ BIOS has a fix for this issue.  UV1 still needs the quirk. */
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index fc135bf..429d08b 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -1,5 +1,9 @@
 # Family-Level Interface Shim (FLIS)
 obj-$(subst m,y,$(CONFIG_PINCTRL_MERRIFIELD)) += platform_mrfld_pinctrl.o
+# SDHCI Devices
+obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += platform_mrfld_sd.o
+# WiFi
+obj-$(subst m,y,$(CONFIG_BRCMFMAC_SDIO)) += platform_bcm43xx.o
 # IPC Devices
 obj-y += platform_ipc.o
 obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
new file mode 100644
index 0000000..4392c15
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
@@ -0,0 +1,95 @@
+/*
+ * platform_bcm43xx.c: bcm43xx platform data initilization file
+ *
+ * (C) Copyright 2016 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/gpio.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/fixed.h>
+#include <linux/sfi.h>
+
+#include <asm/intel-mid.h>
+
+#define WLAN_SFI_GPIO_IRQ_NAME		"WLAN-interrupt"
+#define WLAN_SFI_GPIO_ENABLE_NAME	"WLAN-enable"
+
+#define WLAN_DEV_NAME			"0000:00:01.3"
+
+static struct regulator_consumer_supply bcm43xx_vmmc_supply = {
+	.dev_name		= WLAN_DEV_NAME,
+	.supply			= "vmmc",
+};
+
+static struct regulator_init_data bcm43xx_vmmc_data = {
+	.constraints = {
+		.valid_ops_mask		= REGULATOR_CHANGE_STATUS,
+	},
+	.num_consumer_supplies	= 1,
+	.consumer_supplies	= &bcm43xx_vmmc_supply,
+};
+
+static struct fixed_voltage_config bcm43xx_vmmc = {
+	.supply_name		= "bcm43xx-vmmc-regulator",
+	/*
+	 * Announce 2.0V here to be compatible with SDIO specification. The
+	 * real voltage and signaling are still 1.8V.
+	 */
+	.microvolts		= 2000000,		/* 1.8V */
+	.gpio			= -EINVAL,
+	.startup_delay		= 250 * 1000,		/* 250ms */
+	.enable_high		= 1,			/* active high */
+	.enabled_at_boot	= 0,			/* disabled at boot */
+	.init_data		= &bcm43xx_vmmc_data,
+};
+
+static struct platform_device bcm43xx_vmmc_regulator = {
+	.name		= "reg-fixed-voltage",
+	.id		= PLATFORM_DEVID_AUTO,
+	.dev = {
+		.platform_data	= &bcm43xx_vmmc,
+	},
+};
+
+static int __init bcm43xx_regulator_register(void)
+{
+	int ret;
+
+	bcm43xx_vmmc.gpio = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME);
+	ret = platform_device_register(&bcm43xx_vmmc_regulator);
+	if (ret) {
+		pr_err("%s: vmmc regulator register failed\n", __func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __init *bcm43xx_platform_data(void *info)
+{
+	int ret;
+
+	ret = bcm43xx_regulator_register();
+	if (ret)
+		return NULL;
+
+	pr_info("Using generic wifi platform data\n");
+
+	/* For now it's empty */
+	return NULL;
+}
+
+static const struct devs_id bcm43xx_clk_vmmc_dev_id __initconst = {
+	.name			= "bcm43xx_clk_vmmc",
+	.type			= SFI_DEV_TYPE_SD,
+	.get_platform_data	= &bcm43xx_platform_data,
+};
+
+sfi_device(bcm43xx_clk_vmmc_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c
new file mode 100644
index 0000000..00c4a03
--- /dev/null
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c
@@ -0,0 +1,47 @@
+/*
+ * SDHCI platform data initilisation file
+ *
+ * (C) Copyright 2016 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/init.h>
+#include <linux/pci.h>
+
+#include <linux/mmc/sdhci-pci-data.h>
+
+#include <asm/intel-mid.h>
+
+#define INTEL_MRFLD_SD			2
+#define INTEL_MRFLD_SD_CD_GPIO		77
+
+static struct sdhci_pci_data mrfld_sdhci_pci_data = {
+	.rst_n_gpio	= -EINVAL,
+	.cd_gpio	= INTEL_MRFLD_SD_CD_GPIO,
+};
+
+static struct sdhci_pci_data *
+mrfld_sdhci_pci_get_data(struct pci_dev *pdev, int slotno)
+{
+	unsigned int func = PCI_FUNC(pdev->devfn);
+
+	if (func == INTEL_MRFLD_SD)
+		return &mrfld_sdhci_pci_data;
+
+	return NULL;
+}
+
+static int __init mrfld_sd_init(void)
+{
+	if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+		return -ENODEV;
+
+	sdhci_pci_get_data = mrfld_sdhci_pci_get_data;
+	return 0;
+}
+arch_initcall(mrfld_sd_init);
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c
index ce119d2..7850128 100644
--- a/arch/x86/platform/intel-mid/intel-mid.c
+++ b/arch/x86/platform/intel-mid/intel-mid.c
@@ -70,6 +70,11 @@
 
 static void intel_mid_power_off(void)
 {
+	/* Shut down South Complex via PWRMU */
+	intel_mid_pwr_power_off();
+
+	/* Only for Tangier, the rest will ignore this command */
+	intel_scu_ipc_simple_command(IPCMSG_COLD_OFF, 1);
 };
 
 static void intel_mid_reboot(void)
diff --git a/arch/x86/platform/intel-mid/pwr.c b/arch/x86/platform/intel-mid/pwr.c
index c901a34..5d3b45a 100644
--- a/arch/x86/platform/intel-mid/pwr.c
+++ b/arch/x86/platform/intel-mid/pwr.c
@@ -44,7 +44,19 @@
 /* Bits in PM_CMD */
 #define PM_CMD_CMD(x)		((x) << 0)
 #define PM_CMD_IOC		(1 << 8)
-#define PM_CMD_D3cold		(1 << 21)
+#define PM_CMD_CM_NOP		(0 << 9)
+#define PM_CMD_CM_IMMEDIATE	(1 << 9)
+#define PM_CMD_CM_DELAY		(2 << 9)
+#define PM_CMD_CM_TRIGGER	(3 << 9)
+
+/* System states */
+#define PM_CMD_SYS_STATE_S5	(5 << 16)
+
+/* Trigger variants */
+#define PM_CMD_CFG_TRIGGER_NC	(3 << 19)
+
+/* Message to wait for TRIGGER_NC case */
+#define TRIGGER_NC_MSG_2	(2 << 22)
 
 /* List of commands */
 #define CMD_SET_CFG		0x01
@@ -137,7 +149,7 @@
 
 static int mid_pwr_wait_for_cmd(struct mid_pwr *pwr, u8 cmd)
 {
-	writel(PM_CMD_CMD(cmd), pwr->regs + PM_CMD);
+	writel(PM_CMD_CMD(cmd) | PM_CMD_CM_IMMEDIATE, pwr->regs + PM_CMD);
 	return mid_pwr_wait(pwr);
 }
 
@@ -260,6 +272,20 @@
 }
 EXPORT_SYMBOL_GPL(intel_mid_pci_set_power_state);
 
+void intel_mid_pwr_power_off(void)
+{
+	struct mid_pwr *pwr = midpwr;
+	u32 cmd = PM_CMD_SYS_STATE_S5 |
+		  PM_CMD_CMD(CMD_SET_CFG) |
+		  PM_CMD_CM_TRIGGER |
+		  PM_CMD_CFG_TRIGGER_NC |
+		  TRIGGER_NC_MSG_2;
+
+	/* Send command to SCU */
+	writel(cmd, pwr->regs + PM_CMD);
+	mid_pwr_wait(pwr);
+}
+
 int intel_mid_pwr_get_lss_id(struct pci_dev *pdev)
 {
 	int vndr;
@@ -354,7 +380,7 @@
 	return 0;
 }
 
-static int mid_set_initial_state(struct mid_pwr *pwr)
+static int mid_set_initial_state(struct mid_pwr *pwr, const u32 *states)
 {
 	unsigned int i, j;
 	int ret;
@@ -379,10 +405,10 @@
 	 * NOTE: The actual device mapping is provided by a platform at run
 	 * time using vendor capability of PCI configuration space.
 	 */
-	mid_pwr_set_state(pwr, 0, 0xffffffff);
-	mid_pwr_set_state(pwr, 1, 0xffffffff);
-	mid_pwr_set_state(pwr, 2, 0xffffffff);
-	mid_pwr_set_state(pwr, 3, 0xffffffff);
+	mid_pwr_set_state(pwr, 0, states[0]);
+	mid_pwr_set_state(pwr, 1, states[1]);
+	mid_pwr_set_state(pwr, 2, states[2]);
+	mid_pwr_set_state(pwr, 3, states[3]);
 
 	/* Send command to SCU */
 	ret = mid_pwr_wait_for_cmd(pwr, CMD_SET_CFG);
@@ -397,13 +423,41 @@
 	return 0;
 }
 
-static const struct mid_pwr_device_info mid_info = {
-	.set_initial_state = mid_set_initial_state,
+static int pnw_set_initial_state(struct mid_pwr *pwr)
+{
+	/* On Penwell SRAM must stay powered on */
+	const u32 states[] = {
+		0xf00fffff,		/* PM_SSC(0) */
+		0xffffffff,		/* PM_SSC(1) */
+		0xffffffff,		/* PM_SSC(2) */
+		0xffffffff,		/* PM_SSC(3) */
+	};
+	return mid_set_initial_state(pwr, states);
+}
+
+static int tng_set_initial_state(struct mid_pwr *pwr)
+{
+	const u32 states[] = {
+		0xffffffff,		/* PM_SSC(0) */
+		0xffffffff,		/* PM_SSC(1) */
+		0xffffffff,		/* PM_SSC(2) */
+		0xffffffff,		/* PM_SSC(3) */
+	};
+	return mid_set_initial_state(pwr, states);
+}
+
+static const struct mid_pwr_device_info pnw_info = {
+	.set_initial_state = pnw_set_initial_state,
 };
 
+static const struct mid_pwr_device_info tng_info = {
+	.set_initial_state = tng_set_initial_state,
+};
+
+/* This table should be in sync with the one in drivers/pci/pci-mid.c */
 static const struct pci_device_id mid_pwr_pci_ids[] = {
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&mid_info },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&mid_info },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_PENWELL), (kernel_ulong_t)&pnw_info },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_TANGIER), (kernel_ulong_t)&tng_info },
 	{}
 };
 
diff --git a/arch/x86/platform/mellanox/Makefile b/arch/x86/platform/mellanox/Makefile
new file mode 100644
index 0000000..f43c931
--- /dev/null
+++ b/arch/x86/platform/mellanox/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MLX_PLATFORM)	+= mlx-platform.o
diff --git a/arch/x86/platform/mellanox/mlx-platform.c b/arch/x86/platform/mellanox/mlx-platform.c
new file mode 100644
index 0000000..7dcfcca
--- /dev/null
+++ b/arch/x86/platform/mellanox/mlx-platform.c
@@ -0,0 +1,266 @@
+/*
+ * arch/x86/platform/mellanox/mlx-platform.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/device.h>
+#include <linux/dmi.h>
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/i2c-mux-reg.h>
+
+#define MLX_PLAT_DEVICE_NAME		"mlxplat"
+
+/* LPC bus IO offsets */
+#define MLXPLAT_CPLD_LPC_I2C_BASE_ADRR		0x2000
+#define MLXPLAT_CPLD_LPC_REG_BASE_ADRR		0x2500
+#define MLXPLAT_CPLD_LPC_IO_RANGE		0x100
+#define MLXPLAT_CPLD_LPC_I2C_CH1_OFF		0xdb
+#define MLXPLAT_CPLD_LPC_I2C_CH2_OFF		0xda
+#define MLXPLAT_CPLD_LPC_PIO_OFFSET		0x10000UL
+#define MLXPLAT_CPLD_LPC_REG1	((MLXPLAT_CPLD_LPC_REG_BASE_ADRR + \
+				  MLXPLAT_CPLD_LPC_I2C_CH1_OFF) | \
+				  MLXPLAT_CPLD_LPC_PIO_OFFSET)
+#define MLXPLAT_CPLD_LPC_REG2	((MLXPLAT_CPLD_LPC_REG_BASE_ADRR + \
+				  MLXPLAT_CPLD_LPC_I2C_CH2_OFF) | \
+				  MLXPLAT_CPLD_LPC_PIO_OFFSET)
+
+/* Start channel numbers */
+#define MLXPLAT_CPLD_CH1			2
+#define MLXPLAT_CPLD_CH2			10
+
+/* Number of LPC attached MUX platform devices */
+#define MLXPLAT_CPLD_LPC_MUX_DEVS		2
+
+/* mlxplat_priv - platform private data
+ * @pdev_i2c - i2c controller platform device
+ * @pdev_mux - array of mux platform devices
+ */
+struct mlxplat_priv {
+	struct platform_device *pdev_i2c;
+	struct platform_device *pdev_mux[MLXPLAT_CPLD_LPC_MUX_DEVS];
+};
+
+/* Regions for LPC I2C controller and LPC base register space */
+static const struct resource mlxplat_lpc_resources[] = {
+	[0] = DEFINE_RES_NAMED(MLXPLAT_CPLD_LPC_I2C_BASE_ADRR,
+			       MLXPLAT_CPLD_LPC_IO_RANGE,
+			       "mlxplat_cpld_lpc_i2c_ctrl", IORESOURCE_IO),
+	[1] = DEFINE_RES_NAMED(MLXPLAT_CPLD_LPC_REG_BASE_ADRR,
+			       MLXPLAT_CPLD_LPC_IO_RANGE,
+			       "mlxplat_cpld_lpc_regs",
+			       IORESOURCE_IO),
+};
+
+/* Platform default channels */
+static const int mlxplat_default_channels[][8] = {
+	{
+		MLXPLAT_CPLD_CH1, MLXPLAT_CPLD_CH1 + 1, MLXPLAT_CPLD_CH1 + 2,
+		MLXPLAT_CPLD_CH1 + 3, MLXPLAT_CPLD_CH1 + 4, MLXPLAT_CPLD_CH1 +
+		5, MLXPLAT_CPLD_CH1 + 6, MLXPLAT_CPLD_CH1 + 7
+	},
+	{
+		MLXPLAT_CPLD_CH2, MLXPLAT_CPLD_CH2 + 1, MLXPLAT_CPLD_CH2 + 2,
+		MLXPLAT_CPLD_CH2 + 3, MLXPLAT_CPLD_CH2 + 4, MLXPLAT_CPLD_CH2 +
+		5, MLXPLAT_CPLD_CH2 + 6, MLXPLAT_CPLD_CH2 + 7
+	},
+};
+
+/* Platform channels for MSN21xx system family */
+static const int mlxplat_msn21xx_channels[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+
+/* Platform mux data */
+static struct i2c_mux_reg_platform_data mlxplat_mux_data[] = {
+	{
+		.parent = 1,
+		.base_nr = MLXPLAT_CPLD_CH1,
+		.write_only = 1,
+		.reg = (void __iomem *)MLXPLAT_CPLD_LPC_REG1,
+		.reg_size = 1,
+		.idle_in_use = 1,
+	},
+	{
+		.parent = 1,
+		.base_nr = MLXPLAT_CPLD_CH2,
+		.write_only = 1,
+		.reg = (void __iomem *)MLXPLAT_CPLD_LPC_REG2,
+		.reg_size = 1,
+		.idle_in_use = 1,
+	},
+
+};
+
+static struct platform_device *mlxplat_dev;
+
+static int __init mlxplat_dmi_default_matched(const struct dmi_system_id *dmi)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+		mlxplat_mux_data[i].values = mlxplat_default_channels[i];
+		mlxplat_mux_data[i].n_values =
+				ARRAY_SIZE(mlxplat_default_channels[i]);
+	}
+
+	return 1;
+};
+
+static int __init mlxplat_dmi_msn21xx_matched(const struct dmi_system_id *dmi)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+		mlxplat_mux_data[i].values = mlxplat_msn21xx_channels;
+		mlxplat_mux_data[i].n_values =
+				ARRAY_SIZE(mlxplat_msn21xx_channels);
+	}
+
+	return 1;
+};
+
+static struct dmi_system_id mlxplat_dmi_table[] __initdata = {
+	{
+		.callback = mlxplat_dmi_default_matched,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MSN24"),
+		},
+	},
+	{
+		.callback = mlxplat_dmi_default_matched,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MSN27"),
+		},
+	},
+	{
+		.callback = mlxplat_dmi_default_matched,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MSB"),
+		},
+	},
+	{
+		.callback = mlxplat_dmi_default_matched,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MSX"),
+		},
+	},
+	{
+		.callback = mlxplat_dmi_msn21xx_matched,
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Mellanox Technologies"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "MSN21"),
+		},
+	},
+	{ }
+};
+
+static int __init mlxplat_init(void)
+{
+	struct mlxplat_priv *priv;
+	int i, err;
+
+	if (!dmi_check_system(mlxplat_dmi_table))
+		return -ENODEV;
+
+	mlxplat_dev = platform_device_register_simple(MLX_PLAT_DEVICE_NAME, -1,
+					mlxplat_lpc_resources,
+					ARRAY_SIZE(mlxplat_lpc_resources));
+
+	if (IS_ERR(mlxplat_dev))
+		return PTR_ERR(mlxplat_dev);
+
+	priv = devm_kzalloc(&mlxplat_dev->dev, sizeof(struct mlxplat_priv),
+			    GFP_KERNEL);
+	if (!priv) {
+		err = -ENOMEM;
+		goto fail_alloc;
+	}
+	platform_set_drvdata(mlxplat_dev, priv);
+
+	priv->pdev_i2c = platform_device_register_simple("i2c_mlxcpld", -1,
+							 NULL, 0);
+	if (IS_ERR(priv->pdev_i2c)) {
+		err = PTR_ERR(priv->pdev_i2c);
+		goto fail_alloc;
+	};
+
+	for (i = 0; i < ARRAY_SIZE(mlxplat_mux_data); i++) {
+		priv->pdev_mux[i] = platform_device_register_resndata(
+						&mlxplat_dev->dev,
+						"i2c-mux-reg", i, NULL,
+						0, &mlxplat_mux_data[i],
+						sizeof(mlxplat_mux_data[i]));
+		if (IS_ERR(priv->pdev_mux[i])) {
+			err = PTR_ERR(priv->pdev_mux[i]);
+			goto fail_platform_mux_register;
+		}
+	}
+
+	return 0;
+
+fail_platform_mux_register:
+	for (i--; i > 0 ; i--)
+		platform_device_unregister(priv->pdev_mux[i]);
+	platform_device_unregister(priv->pdev_i2c);
+fail_alloc:
+	platform_device_unregister(mlxplat_dev);
+
+	return err;
+}
+module_init(mlxplat_init);
+
+static void __exit mlxplat_exit(void)
+{
+	struct mlxplat_priv *priv = platform_get_drvdata(mlxplat_dev);
+	int i;
+
+	for (i = ARRAY_SIZE(mlxplat_mux_data) - 1; i >= 0 ; i--)
+		platform_device_unregister(priv->pdev_mux[i]);
+
+	platform_device_unregister(priv->pdev_i2c);
+	platform_device_unregister(mlxplat_dev);
+}
+module_exit(mlxplat_exit);
+
+MODULE_AUTHOR("Vadim Pasternak (vadimp@mellanox.com)");
+MODULE_DESCRIPTION("Mellanox platform driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSN24*:");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSN27*:");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSB*:");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSX*:");
+MODULE_ALIAS("dmi:*:*Mellanox*:MSN21*:");
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index 66b2166..b4d5e95 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -149,11 +149,8 @@
 s64
 uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len)
 {
-	s64 ret;
-
-	ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
-					(u64)addr, buf, (u64)len, 0);
-	return ret;
+	return uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie,
+				    (u64)addr, buf, (u64)len, 0);
 }
 EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa);
 
@@ -187,7 +184,8 @@
 void uv_bios_init(void)
 {
 	uv_systab = NULL;
-	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab) {
+	if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) ||
+	    !efi.uv_systab || efi_runtime_disabled()) {
 		pr_crit("UV: UVsystab: missing\n");
 		return;
 	}
@@ -199,12 +197,14 @@
 		return;
 	}
 
+	/* Starting with UV4 the UV systab size is variable */
 	if (uv_systab->revision >= UV_SYSTAB_VERSION_UV4) {
+		int size = uv_systab->size;
+
 		iounmap(uv_systab);
-		uv_systab = ioremap(efi.uv_systab, uv_systab->size);
+		uv_systab = ioremap(efi.uv_systab, size);
 		if (!uv_systab) {
-			pr_err("UV: UVsystab: ioremap(%d) failed!\n",
-				uv_systab->size);
+			pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
 			return;
 		}
 	}
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index fdb4d42..9e42842 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -24,6 +24,29 @@
 #include <asm/irq_vectors.h>
 #include <asm/timer.h>
 
+static struct bau_operations ops;
+
+static struct bau_operations uv123_bau_ops = {
+	.bau_gpa_to_offset       = uv_gpa_to_offset,
+	.read_l_sw_ack           = read_mmr_sw_ack,
+	.read_g_sw_ack           = read_gmmr_sw_ack,
+	.write_l_sw_ack          = write_mmr_sw_ack,
+	.write_g_sw_ack          = write_gmmr_sw_ack,
+	.write_payload_first     = write_mmr_payload_first,
+	.write_payload_last      = write_mmr_payload_last,
+};
+
+static struct bau_operations uv4_bau_ops = {
+	.bau_gpa_to_offset       = uv_gpa_to_soc_phys_ram,
+	.read_l_sw_ack           = read_mmr_proc_sw_ack,
+	.read_g_sw_ack           = read_gmmr_proc_sw_ack,
+	.write_l_sw_ack          = write_mmr_proc_sw_ack,
+	.write_g_sw_ack          = write_gmmr_proc_sw_ack,
+	.write_payload_first     = write_mmr_proc_payload_first,
+	.write_payload_last      = write_mmr_proc_payload_last,
+};
+
+
 /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */
 static int timeout_base_ns[] = {
 		20,
@@ -55,16 +78,16 @@
 static int disabled_period	= DISABLED_PERIOD;
 
 static struct tunables tunables[] = {
-	{&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */
-	{&plugged_delay, PLUGGED_DELAY},
-	{&plugsb4reset, PLUGSB4RESET},
-	{&timeoutsb4reset, TIMEOUTSB4RESET},
-	{&ipi_reset_limit, IPI_RESET_LIMIT},
-	{&complete_threshold, COMPLETE_THRESHOLD},
-	{&congested_respns_us, CONGESTED_RESPONSE_US},
-	{&congested_reps, CONGESTED_REPS},
-	{&disabled_period, DISABLED_PERIOD},
-	{&giveup_limit, GIVEUP_LIMIT}
+	{&max_concurr,           MAX_BAU_CONCURRENT}, /* must be [0] */
+	{&plugged_delay,         PLUGGED_DELAY},
+	{&plugsb4reset,          PLUGSB4RESET},
+	{&timeoutsb4reset,       TIMEOUTSB4RESET},
+	{&ipi_reset_limit,       IPI_RESET_LIMIT},
+	{&complete_threshold,    COMPLETE_THRESHOLD},
+	{&congested_respns_us,   CONGESTED_RESPONSE_US},
+	{&congested_reps,        CONGESTED_REPS},
+	{&disabled_period,       DISABLED_PERIOD},
+	{&giveup_limit,          GIVEUP_LIMIT}
 };
 
 static struct dentry *tunables_dir;
@@ -216,7 +239,7 @@
 	msg = mdp->msg;
 	if (!msg->canceled && do_acknowledge) {
 		dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec;
-		write_mmr_sw_ack(dw);
+		ops.write_l_sw_ack(dw);
 	}
 	msg->replied_to = 1;
 	msg->swack_vec = 0;
@@ -252,7 +275,7 @@
 			msg->swack_vec) == 0) &&
 		    (msg2->sending_cpu == msg->sending_cpu) &&
 		    (msg2->msg_type != MSG_NOOP)) {
-			mmr = read_mmr_sw_ack();
+			mmr = ops.read_l_sw_ack();
 			msg_res = msg2->swack_vec;
 			/*
 			 * This is a message retry; clear the resources held
@@ -270,7 +293,7 @@
 				stat->d_canceled++;
 				cancel_count++;
 				mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
-				write_mmr_sw_ack(mr);
+				ops.write_l_sw_ack(mr);
 			}
 		}
 	}
@@ -403,12 +426,12 @@
 			/*
 			 * only reset the resource if it is still pending
 			 */
-			mmr = read_mmr_sw_ack();
+			mmr = ops.read_l_sw_ack();
 			msg_res = msg->swack_vec;
 			mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res;
 			if (mmr & msg_res) {
 				stat->d_rcanceled++;
-				write_mmr_sw_ack(mr);
+				ops.write_l_sw_ack(mr);
 			}
 		}
 	}
@@ -580,11 +603,7 @@
  */
 static unsigned long uv2_3_read_status(unsigned long offset, int rshft, int desc)
 {
-	unsigned long descriptor_status;
-
-	descriptor_status =
-		((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
-	return descriptor_status;
+	return ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK) << 1;
 }
 
 /*
@@ -1202,7 +1221,7 @@
 	struct bau_pq_entry *msg = mdp->msg;
 	struct bau_pq_entry *other_msg;
 
-	mmr_image = read_mmr_sw_ack();
+	mmr_image = ops.read_l_sw_ack();
 	swack_vec = msg->swack_vec;
 
 	if ((swack_vec & mmr_image) == 0) {
@@ -1431,7 +1450,7 @@
 		/* destination side statistics */
 		seq_printf(file,
 			"%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
-			   read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
+			   ops.read_g_sw_ack(uv_cpu_to_pnode(cpu)),
 			   stat->d_requestee, cycles_2_us(stat->d_time),
 			   stat->d_alltlb, stat->d_onetlb, stat->d_multmsg,
 			   stat->d_nomsg, stat->d_retries, stat->d_canceled,
@@ -1497,16 +1516,16 @@
 	}
 
 	if (kstrtol(optstr, 10, &input_arg) < 0) {
-		printk(KERN_DEBUG "%s is invalid\n", optstr);
+		pr_debug("%s is invalid\n", optstr);
 		return -EINVAL;
 	}
 
 	if (input_arg == 0) {
 		elements = ARRAY_SIZE(stat_description);
-		printk(KERN_DEBUG "# cpu:      cpu number\n");
-		printk(KERN_DEBUG "Sender statistics:\n");
+		pr_debug("# cpu:      cpu number\n");
+		pr_debug("Sender statistics:\n");
 		for (i = 0; i < elements; i++)
-			printk(KERN_DEBUG "%s\n", stat_description[i]);
+			pr_debug("%s\n", stat_description[i]);
 	} else if (input_arg == -1) {
 		for_each_present_cpu(cpu) {
 			stat = &per_cpu(ptcstats, cpu);
@@ -1554,7 +1573,7 @@
 			break;
 	}
 	if (cnt != e) {
-		printk(KERN_INFO "bau tunable error: should be %d values\n", e);
+		pr_info("bau tunable error: should be %d values\n", e);
 		return -EINVAL;
 	}
 
@@ -1571,7 +1590,7 @@
 				continue;
 			}
 			if (val < 1 || val > bcp->cpus_in_uvhub) {
-				printk(KERN_DEBUG
+				pr_debug(
 				"Error: BAU max concurrent %d is invalid\n",
 				val);
 				return -EINVAL;
@@ -1619,17 +1638,17 @@
 
 	for_each_present_cpu(cpu) {
 		bcp = &per_cpu(bau_control, cpu);
-		bcp->max_concurr =		max_concurr;
-		bcp->max_concurr_const =	max_concurr;
-		bcp->plugged_delay =		plugged_delay;
-		bcp->plugsb4reset =		plugsb4reset;
-		bcp->timeoutsb4reset =		timeoutsb4reset;
-		bcp->ipi_reset_limit =		ipi_reset_limit;
-		bcp->complete_threshold =	complete_threshold;
-		bcp->cong_response_us =		congested_respns_us;
-		bcp->cong_reps =		congested_reps;
-		bcp->disabled_period =		sec_2_cycles(disabled_period);
-		bcp->giveup_limit =		giveup_limit;
+		bcp->max_concurr         = max_concurr;
+		bcp->max_concurr_const   = max_concurr;
+		bcp->plugged_delay       = plugged_delay;
+		bcp->plugsb4reset        = plugsb4reset;
+		bcp->timeoutsb4reset     = timeoutsb4reset;
+		bcp->ipi_reset_limit     = ipi_reset_limit;
+		bcp->complete_threshold  = complete_threshold;
+		bcp->cong_response_us    = congested_respns_us;
+		bcp->cong_reps           = congested_reps;
+		bcp->disabled_period     = sec_2_cycles(disabled_period);
+		bcp->giveup_limit        = giveup_limit;
 	}
 	return count;
 }
@@ -1676,21 +1695,21 @@
 	proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
 				  &proc_uv_ptc_operations);
 	if (!proc_uv_ptc) {
-		printk(KERN_ERR "unable to create %s proc entry\n",
+		pr_err("unable to create %s proc entry\n",
 		       UV_PTC_BASENAME);
 		return -EINVAL;
 	}
 
 	tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL);
 	if (!tunables_dir) {
-		printk(KERN_ERR "unable to create debugfs directory %s\n",
+		pr_err("unable to create debugfs directory %s\n",
 		       UV_BAU_TUNABLES_DIR);
 		return -EINVAL;
 	}
 	tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600,
 					tunables_dir, NULL, &tunables_fops);
 	if (!tunables_file) {
-		printk(KERN_ERR "unable to create debugfs file %s\n",
+		pr_err("unable to create debugfs file %s\n",
 		       UV_BAU_TUNABLES_FILE);
 		return -EINVAL;
 	}
@@ -1725,7 +1744,7 @@
 
 	gpa = uv_gpa(bau_desc);
 	n = uv_gpa_to_gnode(gpa);
-	m = uv_gpa_to_offset(gpa);
+	m = ops.bau_gpa_to_offset(gpa);
 	if (is_uv1_hub())
 		uv1 = 1;
 
@@ -1740,7 +1759,7 @@
 		memset(bd2, 0, sizeof(struct bau_desc));
 		if (uv1) {
 			uv1_hdr = &bd2->header.uv1_hdr;
-			uv1_hdr->swack_flag =	1;
+			uv1_hdr->swack_flag = 1;
 			/*
 			 * The base_dest_nasid set in the message header
 			 * is the nasid of the first uvhub in the partition.
@@ -1749,10 +1768,10 @@
 			 * if nasid striding is being used.
 			 */
 			uv1_hdr->base_dest_nasid =
-						UV_PNODE_TO_NASID(base_pnode);
-			uv1_hdr->dest_subnodeid =	UV_LB_SUBNODEID;
-			uv1_hdr->command =		UV_NET_ENDPOINT_INTD;
-			uv1_hdr->int_both =		1;
+			                          UV_PNODE_TO_NASID(base_pnode);
+			uv1_hdr->dest_subnodeid  = UV_LB_SUBNODEID;
+			uv1_hdr->command         = UV_NET_ENDPOINT_INTD;
+			uv1_hdr->int_both        = 1;
 			/*
 			 * all others need to be set to zero:
 			 *   fairness chaining multilevel count replied_to
@@ -1763,11 +1782,11 @@
 			 * uses native mode for selective broadcasts.
 			 */
 			uv2_3_hdr = &bd2->header.uv2_3_hdr;
-			uv2_3_hdr->swack_flag =	1;
+			uv2_3_hdr->swack_flag      = 1;
 			uv2_3_hdr->base_dest_nasid =
-						UV_PNODE_TO_NASID(base_pnode);
-			uv2_3_hdr->dest_subnodeid =	UV_LB_SUBNODEID;
-			uv2_3_hdr->command =		UV_NET_ENDPOINT_INTD;
+			                          UV_PNODE_TO_NASID(base_pnode);
+			uv2_3_hdr->dest_subnodeid  = UV_LB_SUBNODEID;
+			uv2_3_hdr->command         = UV_NET_ENDPOINT_INTD;
 		}
 	}
 	for_each_present_cpu(cpu) {
@@ -1790,10 +1809,7 @@
 	size_t plsize;
 	char *cp;
 	void *vp;
-	unsigned long pn;
-	unsigned long first;
-	unsigned long pn_first;
-	unsigned long last;
+	unsigned long gnode, first, last, tail;
 	struct bau_pq_entry *pqp;
 	struct bau_control *bcp;
 
@@ -1814,17 +1830,25 @@
 		bcp->bau_msg_head	= pqp;
 		bcp->queue_last		= pqp + (DEST_Q_SIZE - 1);
 	}
+
+	first = ops.bau_gpa_to_offset(uv_gpa(pqp));
+	last = ops.bau_gpa_to_offset(uv_gpa(pqp + (DEST_Q_SIZE - 1)));
+
 	/*
-	 * need the gnode of where the memory was really allocated
+	 * Pre UV4, the gnode is required to locate the payload queue
+	 * and the payload queue tail must be maintained by the kernel.
 	 */
-	pn = uv_gpa_to_gnode(uv_gpa(pqp));
-	first = uv_physnodeaddr(pqp);
-	pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first;
-	last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1));
-	write_mmr_payload_first(pnode, pn_first);
-	write_mmr_payload_tail(pnode, first);
-	write_mmr_payload_last(pnode, last);
-	write_gmmr_sw_ack(pnode, 0xffffUL);
+	bcp = &per_cpu(bau_control, smp_processor_id());
+	if (bcp->uvhub_version <= 3) {
+		tail = first;
+		gnode = uv_gpa_to_gnode(uv_gpa(pqp));
+		first = (gnode << UV_PAYLOADQ_GNODE_SHIFT) | tail;
+		write_mmr_payload_tail(pnode, tail);
+	}
+
+	ops.write_payload_first(pnode, first);
+	ops.write_payload_last(pnode, last);
+	ops.write_g_sw_ack(pnode, 0xffffUL);
 
 	/* in effect, all msg_type's are set to MSG_NOOP */
 	memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE);
@@ -1914,8 +1938,8 @@
 		bcp->complete_threshold		= complete_threshold;
 		bcp->cong_response_us		= congested_respns_us;
 		bcp->cong_reps			= congested_reps;
-		bcp->disabled_period =		sec_2_cycles(disabled_period);
-		bcp->giveup_limit =		giveup_limit;
+		bcp->disabled_period		= sec_2_cycles(disabled_period);
+		bcp->giveup_limit		= giveup_limit;
 		spin_lock_init(&bcp->queue_lock);
 		spin_lock_init(&bcp->uvhub_lock);
 		spin_lock_init(&bcp->disable_lock);
@@ -1944,7 +1968,7 @@
 
 		pnode = uv_cpu_hub_info(cpu)->pnode;
 		if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) {
-			printk(KERN_EMERG
+			pr_emerg(
 				"cpu %d pnode %d-%d beyond %d; BAU disabled\n",
 				cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE);
 			return 1;
@@ -1969,7 +1993,7 @@
 		sdp->cpu_number[sdp->num_cpus] = cpu;
 		sdp->num_cpus++;
 		if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) {
-			printk(KERN_EMERG "%d cpus per socket invalid\n",
+			pr_emerg("%d cpus per socket invalid\n",
 				sdp->num_cpus);
 			return 1;
 		}
@@ -2035,15 +2059,17 @@
 			bcp->uvhub_version = 2;
 		else if (is_uv3_hub())
 			bcp->uvhub_version = 3;
+		else if (is_uv4_hub())
+			bcp->uvhub_version = 4;
 		else {
-			printk(KERN_EMERG "uvhub version not 1, 2 or 3\n");
+			pr_emerg("uvhub version not 1, 2, 3, or 4\n");
 			return 1;
 		}
 		bcp->uvhub_master = *hmasterp;
 		bcp->uvhub_cpu = uv_cpu_blade_processor_id(cpu);
 
 		if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
-			printk(KERN_EMERG "%d cpus per uvhub invalid\n",
+			pr_emerg("%d cpus per uvhub invalid\n",
 				bcp->uvhub_cpu);
 			return 1;
 		}
@@ -2098,7 +2124,8 @@
 	void *vp;
 	struct uvhub_desc *uvhub_descs;
 
-	timeout_us = calculate_destination_timeout();
+	if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
+		timeout_us = calculate_destination_timeout();
 
 	vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL);
 	uvhub_descs = (struct uvhub_desc *)vp;
@@ -2138,6 +2165,15 @@
 	if (!is_uv_system())
 		return 0;
 
+	if (is_uv4_hub())
+		ops = uv4_bau_ops;
+	else if (is_uv3_hub())
+		ops = uv123_bau_ops;
+	else if (is_uv2_hub())
+		ops = uv123_bau_ops;
+	else if (is_uv1_hub())
+		ops = uv123_bau_ops;
+
 	for_each_possible_cpu(cur_cpu) {
 		mask = &per_cpu(uv_flush_tlb_mask, cur_cpu);
 		zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu));
@@ -2153,7 +2189,9 @@
 			uv_base_pnode = uv_blade_to_pnode(uvhub);
 	}
 
-	enable_timeouts();
+	/* software timeouts are not supported on UV4 */
+	if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
+		enable_timeouts();
 
 	if (init_per_cpu(nuvhubs, uv_base_pnode)) {
 		set_bau_off();
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index b12c26e..53cace2 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -130,7 +130,7 @@
 	ctxt->cr0 = read_cr0();
 	ctxt->cr2 = read_cr2();
 	ctxt->cr3 = read_cr3();
-	ctxt->cr4 = __read_cr4_safe();
+	ctxt->cr4 = __read_cr4();
 #ifdef CONFIG_X86_64
 	ctxt->cr8 = read_cr8();
 #endif
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f0b5f2d..9634557 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -87,7 +87,7 @@
 	struct x86_mapping_info info = {
 		.alloc_pgt_page	= alloc_pgt_page,
 		.pmd_flag	= __PAGE_KERNEL_LARGE_EXEC,
-		.kernel_mapping = true,
+		.offset		= __PAGE_OFFSET,
 	};
 	unsigned long mstart, mend;
 	pgd_t *pgd;
@@ -113,7 +113,7 @@
 			return result;
 	}
 
-	temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET;
+	temp_level4_pgt = __pa(pgd);
 	return 0;
 }
 
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 1104515..1ac7647 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -68,6 +68,7 @@
 MCE_INJECT_SET(status);
 MCE_INJECT_SET(misc);
 MCE_INJECT_SET(addr);
+MCE_INJECT_SET(synd);
 
 #define MCE_INJECT_GET(reg)						\
 static int inj_##reg##_get(void *data, u64 *val)			\
@@ -81,10 +82,12 @@
 MCE_INJECT_GET(status);
 MCE_INJECT_GET(misc);
 MCE_INJECT_GET(addr);
+MCE_INJECT_GET(synd);
 
 DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
 DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
 DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
 
 /*
  * Caller needs to be make sure this cpu doesn't disappear
@@ -243,27 +246,27 @@
 
 static void prepare_msrs(void *info)
 {
-	struct mce i_mce = *(struct mce *)info;
-	u8 b = i_mce.bank;
+	struct mce m = *(struct mce *)info;
+	u8 b = m.bank;
 
-	wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus);
+	wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
 
 	if (boot_cpu_has(X86_FEATURE_SMCA)) {
-		if (i_mce.inject_flags == DFR_INT_INJ) {
-			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status);
-			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr);
+		if (m.inject_flags == DFR_INT_INJ) {
+			wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
+			wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
 		} else {
-			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status);
-			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr);
+			wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
+			wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
 		}
 
-		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc);
+		wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
+		wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
 	} else {
-		wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status);
-		wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr);
-		wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc);
+		wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
+		wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
+		wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
 	}
-
 }
 
 static void do_inject(void)
@@ -275,6 +278,9 @@
 	if (i_mce.misc)
 		i_mce.status |= MCI_STATUS_MISCV;
 
+	if (i_mce.synd)
+		i_mce.status |= MCI_STATUS_SYNDV;
+
 	if (inj_type == SW_INJ) {
 		mce_inject_log(&i_mce);
 		return;
@@ -301,7 +307,9 @@
 	 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
 	 * Fam10h and later BKDGs.
 	 */
-	if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+	if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
+	    b == 4 &&
+	    boot_cpu_data.x86 < 0x17) {
 		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
 		cpu = get_nbc_for_node(amd_get_nb_id(cpu));
 	}
@@ -371,6 +379,9 @@
 "\t used for error thresholding purposes and its validity is indicated by\n"
 "\t MCi_STATUS[MiscV].\n"
 "\n"
+"synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
+"\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
+"\n"
 "addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
 "\t associated with the error.\n"
 "\n"
@@ -420,6 +431,7 @@
 	{ .name = "status",	.fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
 	{ .name = "misc",	.fops = &misc_fops,   .perm = S_IRUSR | S_IWUSR },
 	{ .name = "addr",	.fops = &addr_fops,   .perm = S_IRUSR | S_IWUSR },
+	{ .name = "synd",	.fops = &synd_fops,   .perm = S_IRUSR | S_IWUSR },
 	{ .name = "bank",	.fops = &bank_fops,   .perm = S_IRUSR | S_IWUSR },
 	{ .name = "flags",	.fops = &flags_fops,  .perm = S_IRUSR | S_IWUSR },
 	{ .name = "cpu",	.fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
@@ -428,7 +440,7 @@
 
 static int __init init_mce_inject(void)
 {
-	int i;
+	unsigned int i;
 	u64 cap;
 
 	rdmsrl(MSR_IA32_MCG_CAP, cap);
@@ -452,26 +464,22 @@
 	return 0;
 
 err_dfs_add:
-	while (--i >= 0)
+	while (i-- > 0)
 		debugfs_remove(dfs_fls[i].d);
 
 	debugfs_remove(dfs_inj);
 	dfs_inj = NULL;
 
-	return -ENOMEM;
+	return -ENODEV;
 }
 
 static void __exit exit_mce_inject(void)
 {
-	int i;
 
-	for (i = 0; i < ARRAY_SIZE(dfs_fls); i++)
-		debugfs_remove(dfs_fls[i].d);
+	debugfs_remove_recursive(dfs_inj);
+	dfs_inj = NULL;
 
 	memset(&dfs_fls, 0, sizeof(dfs_fls));
-
-	debugfs_remove(dfs_inj);
-	dfs_inj = NULL;
 }
 module_init(init_mce_inject);
 module_exit(exit_mce_inject);
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 705e3ff..5db706f1 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -1,9 +1,11 @@
 #include <linux/io.h>
+#include <linux/slab.h>
 #include <linux/memblock.h>
 
 #include <asm/cacheflush.h>
 #include <asm/pgtable.h>
 #include <asm/realmode.h>
+#include <asm/tlbflush.h>
 
 struct real_mode_header *real_mode_header;
 u32 *trampoline_cr4_features;
@@ -11,25 +13,37 @@
 /* Hold the pgd entry used on booting additional CPUs */
 pgd_t trampoline_pgd_entry;
 
-void __init reserve_real_mode(void)
+void __init set_real_mode_mem(phys_addr_t mem, size_t size)
 {
-	phys_addr_t mem;
-	unsigned char *base;
-	size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
+	void *base = __va(mem);
 
-	/* Has to be under 1M so we can execute real-mode AP code. */
-	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
-	if (!mem)
-		panic("Cannot allocate trampoline\n");
-
-	base = __va(mem);
-	memblock_reserve(mem, size);
 	real_mode_header = (struct real_mode_header *) base;
 	printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n",
 	       base, (unsigned long long)mem, size);
 }
 
-void __init setup_real_mode(void)
+void __init reserve_real_mode(void)
+{
+	phys_addr_t mem;
+	size_t size = real_mode_size_needed();
+
+	if (!size)
+		return;
+
+	WARN_ON(slab_is_available());
+
+	/* Has to be under 1M so we can execute real-mode AP code. */
+	mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE);
+	if (!mem) {
+		pr_info("No sub-1M memory is available for the trampoline\n");
+		return;
+	}
+
+	memblock_reserve(mem, size);
+	set_real_mode_mem(mem, size);
+}
+
+static void __init setup_real_mode(void)
 {
 	u16 real_mode_seg;
 	const u32 *rel;
@@ -84,7 +98,7 @@
 
 	trampoline_header->start = (u64) secondary_startup_64;
 	trampoline_cr4_features = &trampoline_header->cr4;
-	*trampoline_cr4_features = __read_cr4();
+	*trampoline_cr4_features = mmu_cr4_features;
 
 	trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
 	trampoline_pgd[0] = trampoline_pgd_entry.pgd;
@@ -100,7 +114,7 @@
  * need to mark it executable at do_pre_smp_initcalls() at least,
  * thus run it as a early_initcall().
  */
-static int __init set_real_mode_permissions(void)
+static void __init set_real_mode_permissions(void)
 {
 	unsigned char *base = (unsigned char *) real_mode_header;
 	size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob);
@@ -119,7 +133,16 @@
 	set_memory_nx((unsigned long) base, size >> PAGE_SHIFT);
 	set_memory_ro((unsigned long) base, ro_size >> PAGE_SHIFT);
 	set_memory_x((unsigned long) text_start, text_size >> PAGE_SHIFT);
+}
+
+static int __init init_real_mode(void)
+{
+	if (!real_mode_header)
+		panic("Real mode trampoline was not allocated");
+
+	setup_real_mode();
+	set_real_mode_permissions();
 
 	return 0;
 }
-early_initcall(set_real_mode_permissions);
+early_initcall(init_real_mode);
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index ebd4dd6..5766ead 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -84,7 +84,10 @@
 	case EAX:
 	case EIP:
 	case UESP:
+		break;
 	case ORIG_EAX:
+		/* Update the syscall number. */
+		UPT_SYSCALL_NR(&child->thread.regs.regs) = value;
 		break;
 	case FS:
 		if (value && (value & 3) != 3)
@@ -191,7 +194,7 @@
 
 static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
 {
-	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+	int err, n, cpu = task_cpu(child);
 	struct user_i387_struct fpregs;
 
 	err = save_i387_registers(userspace_pid[cpu],
@@ -208,7 +211,7 @@
 
 static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
 {
-	int n, cpu = ((struct thread_info *) child->stack)->cpu;
+	int n, cpu = task_cpu(child);
 	struct user_i387_struct fpregs;
 
 	n = copy_from_user(&fpregs, buf, sizeof(fpregs));
@@ -221,7 +224,7 @@
 
 static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
 {
-	int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
+	int err, n, cpu = task_cpu(child);
 	struct user_fxsr_struct fpregs;
 
 	err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
@@ -237,7 +240,7 @@
 
 static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
 {
-	int n, cpu = ((struct thread_info *) child->stack)->cpu;
+	int n, cpu = task_cpu(child);
 	struct user_fxsr_struct fpregs;
 
 	n = copy_from_user(&fpregs, buf, sizeof(fpregs));
diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c
index faab418..0b5c184 100644
--- a/arch/x86/um/ptrace_64.c
+++ b/arch/x86/um/ptrace_64.c
@@ -78,7 +78,11 @@
 	case RSI:
 	case RDI:
 	case RBP:
+		break;
+
 	case ORIG_RAX:
+		/* Update the syscall number. */
+		UPT_SYSCALL_NR(&child->thread.regs.regs) = value;
 		break;
 
 	case FS:
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 8ffb089..f1d2182 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -118,7 +118,7 @@
 DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 
 /* Linux <-> Xen vCPU id mapping */
-DEFINE_PER_CPU(int, xen_vcpu_id) = -1;
+DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
 EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 
 enum xen_domain_type xen_domain_type = XEN_NATIVE;
@@ -1237,7 +1237,6 @@
 	.write_cr0 = xen_write_cr0,
 
 	.read_cr4 = native_read_cr4,
-	.read_cr4_safe = native_read_cr4_safe,
 	.write_cr4 = xen_write_cr4,
 
 #ifdef CONFIG_X86_64
@@ -1925,6 +1924,45 @@
 	}
 }
 
+static void xen_pin_vcpu(int cpu)
+{
+	static bool disable_pinning;
+	struct sched_pin_override pin_override;
+	int ret;
+
+	if (disable_pinning)
+		return;
+
+	pin_override.pcpu = cpu;
+	ret = HYPERVISOR_sched_op(SCHEDOP_pin_override, &pin_override);
+
+	/* Ignore errors when removing override. */
+	if (cpu < 0)
+		return;
+
+	switch (ret) {
+	case -ENOSYS:
+		pr_warn("Unable to pin on physical cpu %d. In case of problems consider vcpu pinning.\n",
+			cpu);
+		disable_pinning = true;
+		break;
+	case -EPERM:
+		WARN(1, "Trying to pin vcpu without having privilege to do so\n");
+		disable_pinning = true;
+		break;
+	case -EINVAL:
+	case -EBUSY:
+		pr_warn("Physical cpu %d not available for pinning. Check Xen cpu configuration.\n",
+			cpu);
+		break;
+	case 0:
+		break;
+	default:
+		WARN(1, "rc %d while trying to pin vcpu\n", ret);
+		disable_pinning = true;
+	}
+}
+
 const struct hypervisor_x86 x86_hyper_xen = {
 	.name			= "Xen",
 	.detect			= xen_platform,
@@ -1933,6 +1971,7 @@
 #endif
 	.x2apic_available	= xen_x2apic_para_available,
 	.set_cpu_features       = xen_set_cpu_features,
+	.pin_vcpu               = xen_pin_vcpu,
 };
 EXPORT_SYMBOL(x86_hyper_xen);
 
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 1764252..f8960fc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -861,7 +861,7 @@
 	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
 			E820_RESERVED);
 
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
 
 	/*
 	 * Check whether the kernel itself conflicts with the target E820 map.
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index f42e78d..3d6e006 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -21,8 +21,6 @@
 static DEFINE_PER_CPU(char *, irq_name);
 static bool xen_pvspin = true;
 
-#ifdef CONFIG_QUEUED_SPINLOCKS
-
 #include <asm/qspinlock.h>
 
 static void xen_qlock_kick(int cpu)
@@ -71,207 +69,6 @@
 	xen_poll_irq(irq);
 }
 
-#else /* CONFIG_QUEUED_SPINLOCKS */
-
-enum xen_contention_stat {
-	TAKEN_SLOW,
-	TAKEN_SLOW_PICKUP,
-	TAKEN_SLOW_SPURIOUS,
-	RELEASED_SLOW,
-	RELEASED_SLOW_KICKED,
-	NR_CONTENTION_STATS
-};
-
-
-#ifdef CONFIG_XEN_DEBUG_FS
-#define HISTO_BUCKETS	30
-static struct xen_spinlock_stats
-{
-	u32 contention_stats[NR_CONTENTION_STATS];
-	u32 histo_spin_blocked[HISTO_BUCKETS+1];
-	u64 time_blocked;
-} spinlock_stats;
-
-static u8 zero_stats;
-
-static inline void check_zero(void)
-{
-	u8 ret;
-	u8 old = READ_ONCE(zero_stats);
-	if (unlikely(old)) {
-		ret = cmpxchg(&zero_stats, old, 0);
-		/* This ensures only one fellow resets the stat */
-		if (ret == old)
-			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
-	}
-}
-
-static inline void add_stats(enum xen_contention_stat var, u32 val)
-{
-	check_zero();
-	spinlock_stats.contention_stats[var] += val;
-}
-
-static inline u64 spin_time_start(void)
-{
-	return xen_clocksource_read();
-}
-
-static void __spin_time_accum(u64 delta, u32 *array)
-{
-	unsigned index = ilog2(delta);
-
-	check_zero();
-
-	if (index < HISTO_BUCKETS)
-		array[index]++;
-	else
-		array[HISTO_BUCKETS]++;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
-	u32 delta = xen_clocksource_read() - start;
-
-	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
-	spinlock_stats.time_blocked += delta;
-}
-#else  /* !CONFIG_XEN_DEBUG_FS */
-static inline void add_stats(enum xen_contention_stat var, u32 val)
-{
-}
-
-static inline u64 spin_time_start(void)
-{
-	return 0;
-}
-
-static inline void spin_time_accum_blocked(u64 start)
-{
-}
-#endif  /* CONFIG_XEN_DEBUG_FS */
-
-struct xen_lock_waiting {
-	struct arch_spinlock *lock;
-	__ticket_t want;
-};
-
-static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
-static cpumask_t waiting_cpus;
-
-__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
-{
-	int irq = __this_cpu_read(lock_kicker_irq);
-	struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
-	int cpu = smp_processor_id();
-	u64 start;
-	__ticket_t head;
-	unsigned long flags;
-
-	/* If kicker interrupts not initialized yet, just spin */
-	if (irq == -1)
-		return;
-
-	start = spin_time_start();
-
-	/*
-	 * Make sure an interrupt handler can't upset things in a
-	 * partially setup state.
-	 */
-	local_irq_save(flags);
-	/*
-	 * We don't really care if we're overwriting some other
-	 * (lock,want) pair, as that would mean that we're currently
-	 * in an interrupt context, and the outer context had
-	 * interrupts enabled.  That has already kicked the VCPU out
-	 * of xen_poll_irq(), so it will just return spuriously and
-	 * retry with newly setup (lock,want).
-	 *
-	 * The ordering protocol on this is that the "lock" pointer
-	 * may only be set non-NULL if the "want" ticket is correct.
-	 * If we're updating "want", we must first clear "lock".
-	 */
-	w->lock = NULL;
-	smp_wmb();
-	w->want = want;
-	smp_wmb();
-	w->lock = lock;
-
-	/* This uses set_bit, which atomic and therefore a barrier */
-	cpumask_set_cpu(cpu, &waiting_cpus);
-	add_stats(TAKEN_SLOW, 1);
-
-	/* clear pending */
-	xen_clear_irq_pending(irq);
-
-	/* Only check lock once pending cleared */
-	barrier();
-
-	/*
-	 * Mark entry to slowpath before doing the pickup test to make
-	 * sure we don't deadlock with an unlocker.
-	 */
-	__ticket_enter_slowpath(lock);
-
-	/* make sure enter_slowpath, which is atomic does not cross the read */
-	smp_mb__after_atomic();
-
-	/*
-	 * check again make sure it didn't become free while
-	 * we weren't looking
-	 */
-	head = READ_ONCE(lock->tickets.head);
-	if (__tickets_equal(head, want)) {
-		add_stats(TAKEN_SLOW_PICKUP, 1);
-		goto out;
-	}
-
-	/* Allow interrupts while blocked */
-	local_irq_restore(flags);
-
-	/*
-	 * If an interrupt happens here, it will leave the wakeup irq
-	 * pending, which will cause xen_poll_irq() to return
-	 * immediately.
-	 */
-
-	/* Block until irq becomes pending (or perhaps a spurious wakeup) */
-	xen_poll_irq(irq);
-	add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
-
-	local_irq_save(flags);
-
-	kstat_incr_irq_this_cpu(irq);
-out:
-	cpumask_clear_cpu(cpu, &waiting_cpus);
-	w->lock = NULL;
-
-	local_irq_restore(flags);
-
-	spin_time_accum_blocked(start);
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
-
-static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
-{
-	int cpu;
-
-	add_stats(RELEASED_SLOW, 1);
-
-	for_each_cpu(cpu, &waiting_cpus) {
-		const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
-
-		/* Make sure we read lock before want */
-		if (READ_ONCE(w->lock) == lock &&
-		    READ_ONCE(w->want) == next) {
-			add_stats(RELEASED_SLOW_KICKED, 1);
-			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
-			break;
-		}
-	}
-}
-#endif /* CONFIG_QUEUED_SPINLOCKS */
-
 static irqreturn_t dummy_handler(int irq, void *dev_id)
 {
 	BUG();
@@ -334,16 +131,12 @@
 		return;
 	}
 	printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
-#ifdef CONFIG_QUEUED_SPINLOCKS
+
 	__pv_init_lock_hash();
 	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
 	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
 	pv_lock_ops.wait = xen_qlock_wait;
 	pv_lock_ops.kick = xen_qlock_kick;
-#else
-	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
-	pv_lock_ops.unlock_kick = xen_unlock_kick;
-#endif
 }
 
 /*
@@ -372,44 +165,3 @@
 }
 early_param("xen_nopvspin", xen_parse_nopvspin);
 
-#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
-
-static struct dentry *d_spin_debug;
-
-static int __init xen_spinlock_debugfs(void)
-{
-	struct dentry *d_xen = xen_init_debugfs();
-
-	if (d_xen == NULL)
-		return -ENOMEM;
-
-	if (!xen_pvspin)
-		return 0;
-
-	d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
-
-	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
-
-	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
-			   &spinlock_stats.contention_stats[TAKEN_SLOW]);
-	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
-			   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
-	debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
-			   &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
-
-	debugfs_create_u32("released_slow", 0444, d_spin_debug,
-			   &spinlock_stats.contention_stats[RELEASED_SLOW]);
-	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
-			   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
-
-	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
-			   &spinlock_stats.time_blocked);
-
-	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
-				spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
-
-	return 0;
-}
-fs_initcall(xen_spinlock_debugfs);
-
-#endif	/* CONFIG_XEN_DEBUG_FS */
diff --git a/block/bio.c b/block/bio.c
index f394775..aa73540 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -667,18 +667,19 @@
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
-	if (bio_op(bio) == REQ_OP_DISCARD)
-		goto integrity_clone;
-
-	if (bio_op(bio) == REQ_OP_WRITE_SAME) {
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+		break;
+	case REQ_OP_WRITE_SAME:
 		bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
-		goto integrity_clone;
+		break;
+	default:
+		bio_for_each_segment(bv, bio_src, iter)
+			bio->bi_io_vec[bio->bi_vcnt++] = bv;
+		break;
 	}
 
-	bio_for_each_segment(bv, bio_src, iter)
-		bio->bi_io_vec[bio->bi_vcnt++] = bv;
-
-integrity_clone:
 	if (bio_integrity(bio_src)) {
 		int ret;
 
@@ -1788,7 +1789,7 @@
 	 * Discards need a mutable bio_vec to accommodate the payload
 	 * required by the DSM TRIM and UNMAP commands.
 	 */
-	if (bio_op(bio) == REQ_OP_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
 		split = bio_clone_bioset(bio, gfp, bs);
 	else
 		split = bio_clone_fast(bio, gfp, bs);
diff --git a/block/blk-core.c b/block/blk-core.c
index 999442e..36c7ac3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -515,7 +515,9 @@
 
 void blk_set_queue_dying(struct request_queue *q)
 {
-	queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
+	spin_lock_irq(q->queue_lock);
+	queue_flag_set(QUEUE_FLAG_DYING, q);
+	spin_unlock_irq(q->queue_lock);
 
 	if (q->mq_ops)
 		blk_mq_wake_waiters(q);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 3eec75a..2642e5f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -94,9 +94,31 @@
 	bool do_split = true;
 	struct bio *new = NULL;
 	const unsigned max_sectors = get_max_io_size(q, bio);
+	unsigned bvecs = 0;
 
 	bio_for_each_segment(bv, bio, iter) {
 		/*
+		 * With arbitrary bio size, the incoming bio may be very
+		 * big. We have to split the bio into small bios so that
+		 * each holds at most BIO_MAX_PAGES bvecs because
+		 * bio_clone() can fail to allocate big bvecs.
+		 *
+		 * It should have been better to apply the limit per
+		 * request queue in which bio_clone() is involved,
+		 * instead of globally. The biggest blocker is the
+		 * bio_clone() in bio bounce.
+		 *
+		 * If bio is splitted by this reason, we should have
+		 * allowed to continue bios merging, but don't do
+		 * that now for making the change simple.
+		 *
+		 * TODO: deal with bio bounce's bio_clone() gracefully
+		 * and convert the global limit into per-queue limit.
+		 */
+		if (bvecs++ >= BIO_MAX_PAGES)
+			goto split;
+
+		/*
 		 * If the queue doesn't support SG gaps and adding this
 		 * offset would create a gap, disallow it.
 		 */
@@ -172,12 +194,18 @@
 	struct bio *split, *res;
 	unsigned nsegs;
 
-	if (bio_op(*bio) == REQ_OP_DISCARD)
+	switch (bio_op(*bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
 		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
-	else if (bio_op(*bio) == REQ_OP_WRITE_SAME)
+		break;
+	case REQ_OP_WRITE_SAME:
 		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
-	else
+		break;
+	default:
 		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
+		break;
+	}
 
 	/* physical segments can be figured out during splitting */
 	res = split ? split : *bio;
@@ -213,7 +241,7 @@
 	 * This should probably be returning 0, but blk_add_request_payload()
 	 * (Christoph!!!!)
 	 */
-	if (bio_op(bio) == REQ_OP_DISCARD)
+	if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
 		return 1;
 
 	if (bio_op(bio) == REQ_OP_WRITE_SAME)
@@ -385,7 +413,9 @@
 	nsegs = 0;
 	cluster = blk_queue_cluster(q);
 
-	if (bio_op(bio) == REQ_OP_DISCARD) {
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
 		/*
 		 * This is a hack - drivers should be neither modifying the
 		 * biovec, nor relying on bi_vcnt - but because of
@@ -393,19 +423,16 @@
 		 * a payload we need to set up here (thank you Christoph) and
 		 * bi_vcnt is really the only way of telling if we need to.
 		 */
-
-		if (bio->bi_vcnt)
-			goto single_segment;
-
-		return 0;
-	}
-
-	if (bio_op(bio) == REQ_OP_WRITE_SAME) {
-single_segment:
+		if (!bio->bi_vcnt)
+			return 0;
+		/* Fall through */
+	case REQ_OP_WRITE_SAME:
 		*sg = sglist;
 		bvec = bio_iovec(bio);
 		sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
 		return 1;
+	default:
+		break;
 	}
 
 	for_each_bio(bio)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e931a0e..c207fa9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -296,17 +296,29 @@
 	if (ret)
 		return ERR_PTR(ret);
 
+	/*
+	 * Check if the hardware context is actually mapped to anything.
+	 * If not tell the caller that it should skip this queue.
+	 */
 	hctx = q->queue_hw_ctx[hctx_idx];
+	if (!blk_mq_hw_queue_mapped(hctx)) {
+		ret = -EXDEV;
+		goto out_queue_exit;
+	}
 	ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
 
 	blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
 	rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
 	if (!rq) {
-		blk_queue_exit(q);
-		return ERR_PTR(-EWOULDBLOCK);
+		ret = -EWOULDBLOCK;
+		goto out_queue_exit;
 	}
 
 	return rq;
+
+out_queue_exit:
+	blk_queue_exit(q);
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
@@ -793,11 +805,12 @@
 	struct list_head *dptr;
 	int queued;
 
-	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
-
 	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
 		return;
 
+	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
+		cpu_online(hctx->next_cpu));
+
 	hctx->run++;
 
 	/*
@@ -1036,10 +1049,11 @@
 EXPORT_SYMBOL(blk_mq_delay_queue);
 
 static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
-					    struct blk_mq_ctx *ctx,
 					    struct request *rq,
 					    bool at_head)
 {
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+
 	trace_block_rq_insert(hctx->queue, rq);
 
 	if (at_head)
@@ -1053,20 +1067,16 @@
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 
-	__blk_mq_insert_req_list(hctx, ctx, rq, at_head);
+	__blk_mq_insert_req_list(hctx, rq, at_head);
 	blk_mq_hctx_mark_pending(hctx, ctx);
 }
 
 void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
-		bool async)
+			   bool async)
 {
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
 	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
-
-	current_ctx = blk_mq_get_ctx(q);
-	if (!cpu_online(ctx->cpu))
-		rq->mq_ctx = ctx = current_ctx;
 
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
@@ -1076,8 +1086,6 @@
 
 	if (run_queue)
 		blk_mq_run_hw_queue(hctx, async);
-
-	blk_mq_put_ctx(current_ctx);
 }
 
 static void blk_mq_insert_requests(struct request_queue *q,
@@ -1088,14 +1096,9 @@
 
 {
 	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *current_ctx;
 
 	trace_block_unplug(q, depth, !from_schedule);
 
-	current_ctx = blk_mq_get_ctx(q);
-
-	if (!cpu_online(ctx->cpu))
-		ctx = current_ctx;
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
 	/*
@@ -1107,15 +1110,14 @@
 		struct request *rq;
 
 		rq = list_first_entry(list, struct request, queuelist);
+		BUG_ON(rq->mq_ctx != ctx);
 		list_del_init(&rq->queuelist);
-		rq->mq_ctx = ctx;
-		__blk_mq_insert_req_list(hctx, ctx, rq, false);
+		__blk_mq_insert_req_list(hctx, rq, false);
 	}
 	blk_mq_hctx_mark_pending(hctx, ctx);
 	spin_unlock(&ctx->lock);
 
 	blk_mq_run_hw_queue(hctx, from_schedule);
-	blk_mq_put_ctx(current_ctx);
 }
 
 static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -1630,16 +1632,17 @@
 	return 0;
 }
 
+/*
+ * 'cpu' is going away. splice any existing rq_list entries from this
+ * software queue to the hw queue dispatch list, and ensure that it
+ * gets run.
+ */
 static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
 {
-	struct request_queue *q = hctx->queue;
 	struct blk_mq_ctx *ctx;
 	LIST_HEAD(tmp);
 
-	/*
-	 * Move ctx entries to new CPU, if this one is going away.
-	 */
-	ctx = __blk_mq_get_ctx(q, cpu);
+	ctx = __blk_mq_get_ctx(hctx->queue, cpu);
 
 	spin_lock(&ctx->lock);
 	if (!list_empty(&ctx->rq_list)) {
@@ -1651,24 +1654,11 @@
 	if (list_empty(&tmp))
 		return NOTIFY_OK;
 
-	ctx = blk_mq_get_ctx(q);
-	spin_lock(&ctx->lock);
-
-	while (!list_empty(&tmp)) {
-		struct request *rq;
-
-		rq = list_first_entry(&tmp, struct request, queuelist);
-		rq->mq_ctx = ctx;
-		list_move_tail(&rq->queuelist, &ctx->rq_list);
-	}
-
-	hctx = q->mq_ops->map_queue(q, ctx->cpu);
-	blk_mq_hctx_mark_pending(hctx, ctx);
-
-	spin_unlock(&ctx->lock);
+	spin_lock(&hctx->lock);
+	list_splice_tail_init(&tmp, &hctx->dispatch);
+	spin_unlock(&hctx->lock);
 
 	blk_mq_run_hw_queue(hctx, true);
-	blk_mq_put_ctx(ctx);
 	return NOTIFY_OK;
 }
 
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 53b1737..96631e6 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -78,30 +78,21 @@
 }
 #endif
 
-static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
-			  void *hcpu)
+static int blk_softirq_cpu_dead(unsigned int cpu)
 {
 	/*
 	 * If a CPU goes away, splice its entries to the current CPU
 	 * and trigger a run of the softirq
 	 */
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		int cpu = (unsigned long) hcpu;
+	local_irq_disable();
+	list_splice_init(&per_cpu(blk_cpu_done, cpu),
+			 this_cpu_ptr(&blk_cpu_done));
+	raise_softirq_irqoff(BLOCK_SOFTIRQ);
+	local_irq_enable();
 
-		local_irq_disable();
-		list_splice_init(&per_cpu(blk_cpu_done, cpu),
-				 this_cpu_ptr(&blk_cpu_done));
-		raise_softirq_irqoff(BLOCK_SOFTIRQ);
-		local_irq_enable();
-	}
-
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block blk_cpu_notifier = {
-	.notifier_call	= blk_cpu_notify,
-};
-
 void __blk_complete_request(struct request *req)
 {
 	int ccpu, cpu;
@@ -180,7 +171,9 @@
 		INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i));
 
 	open_softirq(BLOCK_SOFTIRQ, blk_done_softirq);
-	register_hotcpu_notifier(&blk_cpu_notifier);
+	cpuhp_setup_state_nocalls(CPUHP_BLOCK_SOFTIRQ_DEAD,
+				  "block/softirq:dead", NULL,
+				  blk_softirq_cpu_dead);
 	return 0;
 }
 subsys_initcall(blk_softirq_init);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f1aba26..a3ea826 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -780,9 +780,11 @@
 	/*
 	 * If previous slice expired, start a new one otherwise renew/extend
 	 * existing slice to make sure it is at least throtl_slice interval
-	 * long since now.
+	 * long since now. New slice is started only for empty throttle group.
+	 * If there is queued bio, that means there should be an active
+	 * slice and it should be extended instead.
 	 */
-	if (throtl_slice_used(tg, rw))
+	if (throtl_slice_used(tg, rw) && !(tg->service_queue.nr_queued[rw]))
 		throtl_start_new_slice(tg, rw);
 	else {
 		if (time_before(tg->slice_end[rw], jiffies + throtl_slice))
diff --git a/block/elevator.c b/block/elevator.c
index 7096c22..f7d973a 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -366,7 +366,7 @@
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
-		if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD))
+		if (req_op(rq) != req_op(pos))
 			break;
 		if (rq_data_dir(rq) != rq_data_dir(pos))
 			break;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a9377be..84d7148 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -439,7 +439,7 @@
 
 config CRYPT_CRC32C_VPMSUM
 	tristate "CRC32c CRC algorithm (powerpc64)"
-	depends on PPC64
+	depends on PPC64 && ALTIVEC
 	select CRYPTO_HASH
 	select CRC32
 	help
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 3699995..a832426 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -233,6 +233,8 @@
 		return blkcipher_walk_done(desc, walk, -EINVAL);
 	}
 
+	bsize = min(walk->walk_blocksize, n);
+
 	walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY |
 			 BLKCIPHER_WALK_DIFF);
 	if (!scatterwalk_aligned(&walk->in, walk->alignmask) ||
@@ -245,7 +247,6 @@
 		}
 	}
 
-	bsize = min(walk->walk_blocksize, n);
 	n = scatterwalk_clamp(&walk->in, n);
 	n = scatterwalk_clamp(&walk->out, n);
 
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index cf8037a..0c654e5 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -631,9 +631,14 @@
 
 static int cryptd_hash_import(struct ahash_request *req, const void *in)
 {
-	struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct shash_desc *desc = cryptd_shash_desc(req);
 
-	return crypto_shash_import(&rctx->desc, in);
+	desc->tfm = ctx->child;
+	desc->flags = req->base.flags;
+
+	return crypto_shash_import(desc, in);
 }
 
 static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
@@ -733,13 +738,14 @@
 	rctx = aead_request_ctx(req);
 	compl = rctx->complete;
 
+	tfm = crypto_aead_reqtfm(req);
+
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 	aead_request_set_tfm(req, child);
 	err = crypt( req );
 
 out:
-	tfm = crypto_aead_reqtfm(req);
 	ctx = crypto_aead_ctx(tfm);
 	refcnt = atomic_read(&ctx->refcnt);
 
diff --git a/crypto/echainiv.c b/crypto/echainiv.c
index 1b01fe9..e3d889b 100644
--- a/crypto/echainiv.c
+++ b/crypto/echainiv.c
@@ -1,8 +1,8 @@
 /*
  * echainiv: Encrypted Chain IV Generator
  *
- * This generator generates an IV based on a sequence number by xoring it
- * with a salt and then encrypting it with the same key as used to encrypt
+ * This generator generates an IV based on a sequence number by multiplying
+ * it with a salt and then encrypting it with the same key as used to encrypt
  * the plain text.  This algorithm requires that the block size be equal
  * to the IV size.  It is mainly useful for CBC.
  *
@@ -24,81 +24,17 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/mm.h>
 #include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/spinlock.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 
-#define MAX_IV_SIZE 16
-
-static DEFINE_PER_CPU(u32 [MAX_IV_SIZE / sizeof(u32)], echainiv_iv);
-
-/* We don't care if we get preempted and read/write IVs from the next CPU. */
-static void echainiv_read_iv(u8 *dst, unsigned size)
-{
-	u32 *a = (u32 *)dst;
-	u32 __percpu *b = echainiv_iv;
-
-	for (; size >= 4; size -= 4) {
-		*a++ = this_cpu_read(*b);
-		b++;
-	}
-}
-
-static void echainiv_write_iv(const u8 *src, unsigned size)
-{
-	const u32 *a = (const u32 *)src;
-	u32 __percpu *b = echainiv_iv;
-
-	for (; size >= 4; size -= 4) {
-		this_cpu_write(*b, *a);
-		a++;
-		b++;
-	}
-}
-
-static void echainiv_encrypt_complete2(struct aead_request *req, int err)
-{
-	struct aead_request *subreq = aead_request_ctx(req);
-	struct crypto_aead *geniv;
-	unsigned int ivsize;
-
-	if (err == -EINPROGRESS)
-		return;
-
-	if (err)
-		goto out;
-
-	geniv = crypto_aead_reqtfm(req);
-	ivsize = crypto_aead_ivsize(geniv);
-
-	echainiv_write_iv(subreq->iv, ivsize);
-
-	if (req->iv != subreq->iv)
-		memcpy(req->iv, subreq->iv, ivsize);
-
-out:
-	if (req->iv != subreq->iv)
-		kzfree(subreq->iv);
-}
-
-static void echainiv_encrypt_complete(struct crypto_async_request *base,
-					 int err)
-{
-	struct aead_request *req = base->data;
-
-	echainiv_encrypt_complete2(req, err);
-	aead_request_complete(req, err);
-}
-
 static int echainiv_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *geniv = crypto_aead_reqtfm(req);
 	struct aead_geniv_ctx *ctx = crypto_aead_ctx(geniv);
 	struct aead_request *subreq = aead_request_ctx(req);
-	crypto_completion_t compl;
-	void *data;
+	__be64 nseqno;
+	u64 seqno;
 	u8 *info;
 	unsigned int ivsize = crypto_aead_ivsize(geniv);
 	int err;
@@ -108,8 +44,6 @@
 
 	aead_request_set_tfm(subreq, ctx->child);
 
-	compl = echainiv_encrypt_complete;
-	data = req;
 	info = req->iv;
 
 	if (req->src != req->dst) {
@@ -127,29 +61,30 @@
 			return err;
 	}
 
-	if (unlikely(!IS_ALIGNED((unsigned long)info,
-				 crypto_aead_alignmask(geniv) + 1))) {
-		info = kmalloc(ivsize, req->base.flags &
-				       CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL:
-								  GFP_ATOMIC);
-		if (!info)
-			return -ENOMEM;
-
-		memcpy(info, req->iv, ivsize);
-	}
-
-	aead_request_set_callback(subreq, req->base.flags, compl, data);
+	aead_request_set_callback(subreq, req->base.flags,
+				  req->base.complete, req->base.data);
 	aead_request_set_crypt(subreq, req->dst, req->dst,
 			       req->cryptlen, info);
 	aead_request_set_ad(subreq, req->assoclen);
 
-	crypto_xor(info, ctx->salt, ivsize);
-	scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
-	echainiv_read_iv(info, ivsize);
+	memcpy(&nseqno, info + ivsize - 8, 8);
+	seqno = be64_to_cpu(nseqno);
+	memset(info, 0, ivsize);
 
-	err = crypto_aead_encrypt(subreq);
-	echainiv_encrypt_complete2(req, err);
-	return err;
+	scatterwalk_map_and_copy(info, req->dst, req->assoclen, ivsize, 1);
+
+	do {
+		u64 a;
+
+		memcpy(&a, ctx->salt + ivsize - 8, 8);
+
+		a |= 1;
+		a *= seqno;
+
+		memcpy(info + ivsize - 8, &a, 8);
+	} while ((ivsize -= 8));
+
+	return crypto_aead_encrypt(subreq);
 }
 
 static int echainiv_decrypt(struct aead_request *req)
@@ -196,8 +131,7 @@
 	alg = crypto_spawn_aead_alg(spawn);
 
 	err = -EINVAL;
-	if (inst->alg.ivsize & (sizeof(u32) - 1) ||
-	    inst->alg.ivsize > MAX_IV_SIZE)
+	if (inst->alg.ivsize & (sizeof(u64) - 1) || !inst->alg.ivsize)
 		goto free_inst;
 
 	inst->alg.encrypt = echainiv_encrypt;
@@ -206,7 +140,6 @@
 	inst->alg.init = aead_init_geniv;
 	inst->alg.exit = aead_exit_geniv;
 
-	inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;
 	inst->alg.base.cra_ctxsize = sizeof(struct aead_geniv_ctx);
 	inst->alg.base.cra_ctxsize += inst->alg.ivsize;
 
diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 877019a..8baab43 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -298,41 +298,48 @@
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct pkcs1pad_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct pkcs1pad_request *req_ctx = akcipher_request_ctx(req);
+	unsigned int dst_len;
 	unsigned int pos;
-
-	if (err == -EOVERFLOW)
-		/* Decrypted value had no leading 0 byte */
-		err = -EINVAL;
+	u8 *out_buf;
 
 	if (err)
 		goto done;
 
-	if (req_ctx->child_req.dst_len != ctx->key_size - 1) {
-		err = -EINVAL;
+	err = -EINVAL;
+	dst_len = req_ctx->child_req.dst_len;
+	if (dst_len < ctx->key_size - 1)
 		goto done;
+
+	out_buf = req_ctx->out_buf;
+	if (dst_len == ctx->key_size) {
+		if (out_buf[0] != 0x00)
+			/* Decrypted value had no leading 0 byte */
+			goto done;
+
+		dst_len--;
+		out_buf++;
 	}
 
-	if (req_ctx->out_buf[0] != 0x02) {
-		err = -EINVAL;
+	if (out_buf[0] != 0x02)
 		goto done;
-	}
-	for (pos = 1; pos < req_ctx->child_req.dst_len; pos++)
-		if (req_ctx->out_buf[pos] == 0x00)
+
+	for (pos = 1; pos < dst_len; pos++)
+		if (out_buf[pos] == 0x00)
 			break;
-	if (pos < 9 || pos == req_ctx->child_req.dst_len) {
-		err = -EINVAL;
+	if (pos < 9 || pos == dst_len)
 		goto done;
-	}
 	pos++;
 
-	if (req->dst_len < req_ctx->child_req.dst_len - pos)
+	err = 0;
+
+	if (req->dst_len < dst_len - pos)
 		err = -EOVERFLOW;
-	req->dst_len = req_ctx->child_req.dst_len - pos;
+	req->dst_len = dst_len - pos;
 
 	if (!err)
 		sg_copy_from_buffer(req->dst,
 				sg_nents_for_len(req->dst, req->dst_len),
-				req_ctx->out_buf + pos, req->dst_len);
+				out_buf + pos, req->dst_len);
 
 done:
 	kzfree(req_ctx->out_buf);
diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c
index 6226439..7e8ed96 100644
--- a/crypto/sha3_generic.c
+++ b/crypto/sha3_generic.c
@@ -24,14 +24,14 @@
 #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
 
 static const u64 keccakf_rndc[24] = {
-	0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-	0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-	0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-	0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-	0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-	0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
-	0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-	0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+	0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL,
+	0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL,
+	0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL,
+	0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL,
+	0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL,
+	0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL,
+	0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL,
+	0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL
 };
 
 static const int keccakf_rotc[24] = {
diff --git a/drivers/Makefile b/drivers/Makefile
index 53abb4a..f0afdfb 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -29,6 +29,8 @@
 # was used and do nothing if so
 obj-$(CONFIG_PNP)		+= pnp/
 obj-y				+= amba/
+
+obj-y				+= clk/
 # Many drivers will want to use DMA so this has to be made available
 # really early.
 obj-$(CONFIG_DMADEVICES)	+= dma/
@@ -142,8 +144,6 @@
 obj-$(CONFIG_VLYNQ)		+= vlynq/
 obj-$(CONFIG_STAGING)		+= staging/
 obj-y				+= platform/
-#common clk code
-obj-y				+= clk/
 
 obj-$(CONFIG_MAILBOX)		+= mailbox/
 obj-$(CONFIG_HWSPINLOCK)	+= hwspinlock/
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 445ce28..535e782 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -77,6 +77,9 @@
 
 endif
 
+config ACPI_SPCR_TABLE
+	bool
+
 config ACPI_SLEEP
 	bool
 	depends on SUSPEND || HIBERNATION
@@ -227,7 +230,6 @@
 config ACPI_CPPC_LIB
 	bool
 	depends on ACPI_PROCESSOR
-	depends on !ACPI_CPU_FREQ_PSS
 	select MAILBOX
 	select PCC
 	help
@@ -462,6 +464,9 @@
 source "drivers/acpi/apei/Kconfig"
 source "drivers/acpi/dptf/Kconfig"
 
+config ACPI_WATCHDOG
+	bool
+
 config ACPI_EXTLOG
 	tristate "Extended Error Log support"
 	depends on X86_MCE && X86_LOCAL_APIC
@@ -521,4 +526,8 @@
 	  userspace. The configurable ACPI groups will be visible under
 	  /config/acpi, assuming configfs is mounted under /config.
 
+if ARM64
+source "drivers/acpi/arm64/Kconfig"
+endif
+
 endif	# ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 5ae9d85..9ed0878 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -56,6 +56,7 @@
 acpi-$(CONFIG_ACPI_PROCFS_POWER) += cm_sbs.o
 acpi-y				+= acpi_lpat.o
 acpi-$(CONFIG_ACPI_GENERIC_GSI) += gsi.o
+acpi-$(CONFIG_ACPI_WATCHDOG)	+= acpi_watchdog.o
 
 # These are (potentially) separate modules
 
@@ -81,6 +82,7 @@
 obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o
 obj-$(CONFIG_ACPI_BGRT)		+= bgrt.o
 obj-$(CONFIG_ACPI_CPPC_LIB)	+= cppc_acpi.o
+obj-$(CONFIG_ACPI_SPCR_TABLE)	+= spcr.o
 obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o
 
 # processor has its own "processor." module_param namespace
@@ -105,3 +107,5 @@
 
 video-objs			+= acpi_video.o video_detect.o
 obj-y				+= dptf/
+
+obj-$(CONFIG_ARM64)		+= arm64/
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c
index 1daf9c4..d58fbf7 100644
--- a/drivers/acpi/acpi_apd.c
+++ b/drivers/acpi/acpi_apd.c
@@ -42,6 +42,7 @@
 struct apd_device_desc {
 	unsigned int flags;
 	unsigned int fixed_clk_rate;
+	struct property_entry *properties;
 	int (*setup)(struct apd_private_data *pdata);
 };
 
@@ -71,22 +72,35 @@
 }
 
 #ifdef CONFIG_X86_AMD_PLATFORM_DEVICE
-static struct apd_device_desc cz_i2c_desc = {
+static const struct apd_device_desc cz_i2c_desc = {
 	.setup = acpi_apd_setup,
 	.fixed_clk_rate = 133000000,
 };
 
-static struct apd_device_desc cz_uart_desc = {
+static struct property_entry uart_properties[] = {
+	PROPERTY_ENTRY_U32("reg-io-width", 4),
+	PROPERTY_ENTRY_U32("reg-shift", 2),
+	PROPERTY_ENTRY_BOOL("snps,uart-16550-compatible"),
+	{ },
+};
+
+static const struct apd_device_desc cz_uart_desc = {
 	.setup = acpi_apd_setup,
 	.fixed_clk_rate = 48000000,
+	.properties = uart_properties,
 };
 #endif
 
 #ifdef CONFIG_ARM64
-static struct apd_device_desc xgene_i2c_desc = {
+static const struct apd_device_desc xgene_i2c_desc = {
 	.setup = acpi_apd_setup,
 	.fixed_clk_rate = 100000000,
 };
+
+static const struct apd_device_desc vulcan_spi_desc = {
+	.setup = acpi_apd_setup,
+	.fixed_clk_rate = 133000000,
+};
 #endif
 
 #else
@@ -125,6 +139,12 @@
 			goto err_out;
 	}
 
+	if (dev_desc->properties) {
+		ret = device_add_properties(&adev->dev, dev_desc->properties);
+		if (ret)
+			goto err_out;
+	}
+
 	adev->driver_data = pdata;
 	pdev = acpi_create_platform_device(adev);
 	if (!IS_ERR_OR_NULL(pdev))
@@ -149,6 +169,7 @@
 #endif
 #ifdef CONFIG_ARM64
 	{ "APMC0D0F", APD_ADDR(xgene_i2c_desc) },
+	{ "BRCM900D", APD_ADDR(vulcan_spi_desc) },
 #endif
 	{ }
 };
diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 357a0b8..5520102 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -75,6 +75,7 @@
 	const char *clk_con_id;
 	unsigned int prv_offset;
 	size_t prv_size_override;
+	struct property_entry *properties;
 	void (*setup)(struct lpss_private_data *pdata);
 };
 
@@ -163,11 +164,19 @@
 	.prv_offset = 0x800,
 };
 
+static struct property_entry uart_properties[] = {
+	PROPERTY_ENTRY_U32("reg-io-width", 4),
+	PROPERTY_ENTRY_U32("reg-shift", 2),
+	PROPERTY_ENTRY_BOOL("snps,uart-16550-compatible"),
+	{ },
+};
+
 static const struct lpss_device_desc lpt_uart_dev_desc = {
 	.flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR,
 	.clk_con_id = "baudclk",
 	.prv_offset = 0x800,
 	.setup = lpss_uart_setup,
+	.properties = uart_properties,
 };
 
 static const struct lpss_device_desc lpt_sdio_dev_desc = {
@@ -189,6 +198,7 @@
 	.clk_con_id = "baudclk",
 	.prv_offset = 0x800,
 	.setup = lpss_uart_setup,
+	.properties = uart_properties,
 };
 
 static const struct lpss_device_desc bsw_uart_dev_desc = {
@@ -197,6 +207,7 @@
 	.clk_con_id = "baudclk",
 	.prv_offset = 0x800,
 	.setup = lpss_uart_setup,
+	.properties = uart_properties,
 };
 
 static const struct lpss_device_desc byt_spi_dev_desc = {
@@ -440,6 +451,12 @@
 		goto err_out;
 	}
 
+	if (dev_desc->properties) {
+		ret = device_add_properties(&adev->dev, dev_desc->properties);
+		if (ret)
+			goto err_out;
+	}
+
 	adev->driver_data = pdata;
 	pdev = acpi_create_platform_device(adev);
 	if (!IS_ERR_OR_NULL(pdev)) {
diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 159f7f1..b200ae1 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/dma-mapping.h>
+#include <linux/pci.h>
 #include <linux/platform_device.h>
 
 #include "internal.h"
@@ -30,6 +31,22 @@
 	{"", 0},
 };
 
+static void acpi_platform_fill_resource(struct acpi_device *adev,
+	const struct resource *src, struct resource *dest)
+{
+	struct device *parent;
+
+	*dest = *src;
+
+	/*
+	 * If the device has parent we need to take its resources into
+	 * account as well because this device might consume part of those.
+	 */
+	parent = acpi_get_first_physical_node(adev->parent);
+	if (parent && dev_is_pci(parent))
+		dest->parent = pci_find_resource(to_pci_dev(parent), dest);
+}
+
 /**
  * acpi_create_platform_device - Create platform device for ACPI device node
  * @adev: ACPI device node to create a platform device for.
@@ -70,7 +87,8 @@
 		}
 		count = 0;
 		list_for_each_entry(rentry, &resource_list, node)
-			resources[count++] = *rentry->res;
+			acpi_platform_fill_resource(adev, rentry->res,
+						    &resources[count++]);
 
 		acpi_dev_free_resource_list(&resource_list);
 	}
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index c7ba948..3de3b6b 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -182,6 +182,11 @@
 
 void __weak arch_unregister_cpu(int cpu) {}
 
+int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
+{
+	return -ENODEV;
+}
+
 static int acpi_processor_hotadd_init(struct acpi_processor *pr)
 {
 	unsigned long long sta;
@@ -300,8 +305,11 @@
 	 *  Extra Processor objects may be enumerated on MP systems with
 	 *  less than the max # of CPUs. They should be ignored _iff
 	 *  they are physically not present.
+	 *
+	 *  NOTE: Even if the processor has a cpuid, it may not be present
+	 *  because cpuid <-> apicid mapping is persistent now.
 	 */
-	if (invalid_logical_cpuid(pr->id)) {
+	if (invalid_logical_cpuid(pr->id) || !cpu_present(pr->id)) {
 		int ret = acpi_processor_hotadd_init(pr);
 		if (ret)
 			return ret;
@@ -573,8 +581,102 @@
 	.attach = acpi_processor_container_attach,
 };
 
+/* The number of the unique processor IDs */
+static int nr_unique_ids __initdata;
+
+/* The number of the duplicate processor IDs */
+static int nr_duplicate_ids __initdata;
+
+/* Used to store the unique processor IDs */
+static int unique_processor_ids[] __initdata = {
+	[0 ... NR_CPUS - 1] = -1,
+};
+
+/* Used to store the duplicate processor IDs */
+static int duplicate_processor_ids[] __initdata = {
+	[0 ... NR_CPUS - 1] = -1,
+};
+
+static void __init processor_validated_ids_update(int proc_id)
+{
+	int i;
+
+	if (nr_unique_ids == NR_CPUS||nr_duplicate_ids == NR_CPUS)
+		return;
+
+	/*
+	 * Firstly, compare the proc_id with duplicate IDs, if the proc_id is
+	 * already in the IDs, do nothing.
+	 */
+	for (i = 0; i < nr_duplicate_ids; i++) {
+		if (duplicate_processor_ids[i] == proc_id)
+			return;
+	}
+
+	/*
+	 * Secondly, compare the proc_id with unique IDs, if the proc_id is in
+	 * the IDs, put it in the duplicate IDs.
+	 */
+	for (i = 0; i < nr_unique_ids; i++) {
+		if (unique_processor_ids[i] == proc_id) {
+			duplicate_processor_ids[nr_duplicate_ids] = proc_id;
+			nr_duplicate_ids++;
+			return;
+		}
+	}
+
+	/*
+	 * Lastly, the proc_id is a unique ID, put it in the unique IDs.
+	 */
+	unique_processor_ids[nr_unique_ids] = proc_id;
+	nr_unique_ids++;
+}
+
+static acpi_status __init acpi_processor_ids_walk(acpi_handle handle,
+						  u32 lvl,
+						  void *context,
+						  void **rv)
+{
+	acpi_status status;
+	union acpi_object object = { 0 };
+	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+
+	status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+	if (ACPI_FAILURE(status))
+		acpi_handle_info(handle, "Not get the processor object\n");
+	else
+		processor_validated_ids_update(object.processor.proc_id);
+
+	return AE_OK;
+}
+
+static void __init acpi_processor_check_duplicates(void)
+{
+	/* Search all processor nodes in ACPI namespace */
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+						ACPI_UINT32_MAX,
+						acpi_processor_ids_walk,
+						NULL, NULL, NULL);
+}
+
+bool __init acpi_processor_validate_proc_id(int proc_id)
+{
+	int i;
+
+	/*
+	 * compare the proc_id with duplicate IDs, if the proc_id is already
+	 * in the duplicate IDs, return true, otherwise, return false.
+	 */
+	for (i = 0; i < nr_duplicate_ids; i++) {
+		if (duplicate_processor_ids[i] == proc_id)
+			return true;
+	}
+	return false;
+}
+
 void __init acpi_processor_init(void)
 {
+	acpi_processor_check_duplicates();
 	acpi_scan_add_handler_with_hotplug(&processor_handler, "processor");
 	acpi_scan_add_handler(&processor_container_handler);
 }
diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c
new file mode 100644
index 0000000..13caebd
--- /dev/null
+++ b/drivers/acpi/acpi_watchdog.c
@@ -0,0 +1,123 @@
+/*
+ * ACPI watchdog table parsing support.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "ACPI: watchdog: " fmt
+
+#include <linux/acpi.h>
+#include <linux/ioport.h>
+#include <linux/platform_device.h>
+
+#include "internal.h"
+
+/**
+ * Returns true if this system should prefer ACPI based watchdog instead of
+ * the native one (which are typically the same hardware).
+ */
+bool acpi_has_watchdog(void)
+{
+	struct acpi_table_header hdr;
+
+	if (acpi_disabled)
+		return false;
+
+	return ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_WDAT, 0, &hdr));
+}
+EXPORT_SYMBOL_GPL(acpi_has_watchdog);
+
+void __init acpi_watchdog_init(void)
+{
+	const struct acpi_wdat_entry *entries;
+	const struct acpi_table_wdat *wdat;
+	struct list_head resource_list;
+	struct resource_entry *rentry;
+	struct platform_device *pdev;
+	struct resource *resources;
+	size_t nresources = 0;
+	acpi_status status;
+	int i;
+
+	status = acpi_get_table(ACPI_SIG_WDAT, 0,
+				(struct acpi_table_header **)&wdat);
+	if (ACPI_FAILURE(status)) {
+		/* It is fine if there is no WDAT */
+		return;
+	}
+
+	/* Watchdog disabled by BIOS */
+	if (!(wdat->flags & ACPI_WDAT_ENABLED))
+		return;
+
+	/* Skip legacy PCI WDT devices */
+	if (wdat->pci_segment != 0xff || wdat->pci_bus != 0xff ||
+	    wdat->pci_device != 0xff || wdat->pci_function != 0xff)
+		return;
+
+	INIT_LIST_HEAD(&resource_list);
+
+	entries = (struct acpi_wdat_entry *)(wdat + 1);
+	for (i = 0; i < wdat->entries; i++) {
+		const struct acpi_generic_address *gas;
+		struct resource_entry *rentry;
+		struct resource res;
+		bool found;
+
+		gas = &entries[i].register_region;
+
+		res.start = gas->address;
+		if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+			res.flags = IORESOURCE_MEM;
+			res.end = res.start + ALIGN(gas->access_width, 4);
+		} else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+			res.flags = IORESOURCE_IO;
+			res.end = res.start + gas->access_width;
+		} else {
+			pr_warn("Unsupported address space: %u\n",
+				gas->space_id);
+			goto fail_free_resource_list;
+		}
+
+		found = false;
+		resource_list_for_each_entry(rentry, &resource_list) {
+			if (resource_contains(rentry->res, &res)) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			rentry = resource_list_create_entry(NULL, 0);
+			if (!rentry)
+				goto fail_free_resource_list;
+
+			*rentry->res = res;
+			resource_list_add_tail(rentry, &resource_list);
+			nresources++;
+		}
+	}
+
+	resources = kcalloc(nresources, sizeof(*resources), GFP_KERNEL);
+	if (!resources)
+		goto fail_free_resource_list;
+
+	i = 0;
+	resource_list_for_each_entry(rentry, &resource_list)
+		resources[i++] = *rentry->res;
+
+	pdev = platform_device_register_simple("wdat_wdt", PLATFORM_DEVID_NONE,
+					       resources, nresources);
+	if (IS_ERR(pdev))
+		pr_err("Failed to create platform device\n");
+
+	kfree(resources);
+
+fail_free_resource_list:
+	resource_list_free(&resource_list);
+}
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 227bb7b..32d93ed 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -175,6 +175,7 @@
 	utresrc.o	\
 	utstate.o	\
 	utstring.o	\
+	utstrtoul64.o	\
 	utxface.o	\
 	utxfinit.o	\
 	utxferror.o	\
diff --git a/drivers/acpi/acpica/acapps.h b/drivers/acpi/acpica/acapps.h
index ca2c060..0bd6307 100644
--- a/drivers/acpi/acpica/acapps.h
+++ b/drivers/acpi/acpica/acapps.h
@@ -44,7 +44,9 @@
 #ifndef _ACAPPS
 #define _ACAPPS
 
-#include <stdio.h>
+#ifdef ACPI_USE_STANDARD_HEADERS
+#include <sys/stat.h>
+#endif				/* ACPI_USE_STANDARD_HEADERS */
 
 /* Common info for tool signons */
 
@@ -81,13 +83,13 @@
 /* Macros for usage messages */
 
 #define ACPI_USAGE_HEADER(usage) \
-	acpi_os_printf ("Usage: %s\nOptions:\n", usage);
+	printf ("Usage: %s\nOptions:\n", usage);
 
 #define ACPI_USAGE_TEXT(description) \
-	acpi_os_printf (description);
+	printf (description);
 
 #define ACPI_OPTION(name, description) \
-	acpi_os_printf (" %-20s%s\n", name, description);
+	printf ("  %-20s%s\n", name, description);
 
 /* Check for unexpected exceptions */
 
diff --git a/drivers/acpi/acpica/acdebug.h b/drivers/acpi/acpica/acdebug.h
index f6404ea..94737f8 100644
--- a/drivers/acpi/acpica/acdebug.h
+++ b/drivers/acpi/acpica/acdebug.h
@@ -155,7 +155,7 @@
 
 void acpi_db_disassemble_aml(char *statements, union acpi_parse_object *op);
 
-void acpi_db_batch_execute(char *count_arg);
+void acpi_db_evaluate_predefined_names(void);
 
 /*
  * dbnames - namespace commands
diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h
index 77af91c..92fa47c 100644
--- a/drivers/acpi/acpica/acevents.h
+++ b/drivers/acpi/acpica/acevents.h
@@ -86,6 +86,9 @@
 acpi_status acpi_ev_enable_gpe(struct acpi_gpe_event_info *gpe_event_info);
 
 acpi_status
+acpi_ev_mask_gpe(struct acpi_gpe_event_info *gpe_event_info, u8 is_masked);
+
+acpi_status
 acpi_ev_add_gpe_reference(struct acpi_gpe_event_info *gpe_event_info);
 
 acpi_status
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index fded776..750fa82 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -317,6 +317,7 @@
 ACPI_INIT_GLOBAL(u8, acpi_gbl_cstyle_disassembly, TRUE);
 ACPI_INIT_GLOBAL(u8, acpi_gbl_force_aml_disassembly, FALSE);
 ACPI_INIT_GLOBAL(u8, acpi_gbl_dm_opt_verbose, TRUE);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_dm_emit_external_opcodes, FALSE);
 
 ACPI_GLOBAL(u8, acpi_gbl_dm_opt_disasm);
 ACPI_GLOBAL(u8, acpi_gbl_dm_opt_listing);
@@ -382,6 +383,7 @@
 
 ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_debug_file, NULL);
 ACPI_INIT_GLOBAL(ACPI_FILE, acpi_gbl_output_file, NULL);
+ACPI_INIT_GLOBAL(u8, acpi_gbl_debug_timeout, FALSE);
 
 /* Print buffer */
 
diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h
index 13331d7..dff1207 100644
--- a/drivers/acpi/acpica/aclocal.h
+++ b/drivers/acpi/acpica/aclocal.h
@@ -484,6 +484,7 @@
 	u8 flags;		/* Misc info about this GPE */
 	u8 gpe_number;		/* This GPE */
 	u8 runtime_count;	/* References to a run GPE */
+	u8 disable_for_dispatch;	/* Masked during dispatching */
 };
 
 /* Information about a GPE register pair, one per each status/enable pair in an array */
@@ -494,6 +495,7 @@
 	u16 base_gpe_number;	/* Base GPE number for this register */
 	u8 enable_for_wake;	/* GPEs to keep enabled when sleeping */
 	u8 enable_for_run;	/* GPEs to keep enabled when running */
+	u8 mask_for_run;	/* GPEs to keep masked when running */
 	u8 enable_mask;		/* Current mask of enabled GPEs */
 };
 
diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index f33a4ba..bb7fca1 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -130,6 +130,9 @@
 acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node);
 
 acpi_status
+acpi_ns_execute_table(u32 table_index, struct acpi_namespace_node *start_node);
+
+acpi_status
 acpi_ns_one_complete_parse(u32 pass_number,
 			   u32 table_index,
 			   struct acpi_namespace_node *start_node);
@@ -296,6 +299,11 @@
 acpi_ns_pattern_match(struct acpi_namespace_node *obj_node, char *search_for);
 
 acpi_status
+acpi_ns_get_node_unlocked(struct acpi_namespace_node *prefix_node,
+			  const char *external_pathname,
+			  u32 flags, struct acpi_namespace_node **out_node);
+
+acpi_status
 acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
 		 const char *external_pathname,
 		 u32 flags, struct acpi_namespace_node **out_node);
diff --git a/drivers/acpi/acpica/acparser.h b/drivers/acpi/acpica/acparser.h
index fc30577..939d411 100644
--- a/drivers/acpi/acpica/acparser.h
+++ b/drivers/acpi/acpica/acparser.h
@@ -78,6 +78,8 @@
  */
 acpi_status acpi_ps_execute_method(struct acpi_evaluate_info *info);
 
+acpi_status acpi_ps_execute_table(struct acpi_evaluate_info *info);
+
 /*
  * psargs - Parse AML opcode arguments
  */
diff --git a/drivers/acpi/acpica/actables.h b/drivers/acpi/acpica/actables.h
index cd5a135..e85953b 100644
--- a/drivers/acpi/acpica/actables.h
+++ b/drivers/acpi/acpica/actables.h
@@ -123,6 +123,14 @@
 
 void acpi_tb_uninstall_table(struct acpi_table_desc *table_desc);
 
+acpi_status
+acpi_tb_load_table(u32 table_index, struct acpi_namespace_node *parent_node);
+
+acpi_status
+acpi_tb_install_and_load_table(struct acpi_table_header *table,
+			       acpi_physical_address address,
+			       u8 flags, u8 override, u32 *table_index);
+
 void acpi_tb_terminate(void);
 
 acpi_status acpi_tb_delete_namespace_by_owner(u32 table_index);
@@ -155,10 +163,6 @@
 acpi_tb_install_table_with_override(struct acpi_table_desc *new_table_desc,
 				    u8 override, u32 *table_index);
 
-acpi_status
-acpi_tb_install_fixed_table(acpi_physical_address address,
-			    char *signature, u32 *table_index);
-
 acpi_status acpi_tb_parse_root_table(acpi_physical_address rsdp_address);
 
 /*
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index a7dbb2b..0a1b53c 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -114,13 +114,25 @@
 /*
  * Common error message prefixes
  */
+#ifndef ACPI_MSG_ERROR
 #define ACPI_MSG_ERROR          "ACPI Error: "
+#endif
+#ifndef ACPI_MSG_EXCEPTION
 #define ACPI_MSG_EXCEPTION      "ACPI Exception: "
+#endif
+#ifndef ACPI_MSG_WARNING
 #define ACPI_MSG_WARNING        "ACPI Warning: "
+#endif
+#ifndef ACPI_MSG_INFO
 #define ACPI_MSG_INFO           "ACPI: "
+#endif
 
+#ifndef ACPI_MSG_BIOS_ERROR
 #define ACPI_MSG_BIOS_ERROR     "ACPI BIOS Error (bug): "
+#endif
+#ifndef ACPI_MSG_BIOS_WARNING
 #define ACPI_MSG_BIOS_WARNING   "ACPI BIOS Warning (bug): "
+#endif
 
 /*
  * Common message suffix
@@ -184,14 +196,15 @@
 
 int acpi_ut_stricmp(char *string1, char *string2);
 
-acpi_status
-acpi_ut_strtoul64(char *string,
-		  u32 base, u32 max_integer_byte_width, u64 *ret_integer);
+acpi_status acpi_ut_strtoul64(char *string, u32 flags, u64 *ret_integer);
 
-/* Values for max_integer_byte_width above */
-
-#define ACPI_MAX32_BYTE_WIDTH       4
-#define ACPI_MAX64_BYTE_WIDTH       8
+/*
+ * Values for Flags above
+ * Note: LIMIT values correspond to acpi_gbl_integer_byte_width values (4/8)
+ */
+#define ACPI_STRTOUL_32BIT          0x04	/* 4 bytes */
+#define ACPI_STRTOUL_64BIT          0x08	/* 8 bytes */
+#define ACPI_STRTOUL_BASE16         0x10	/* Default: Base10/16 */
 
 /*
  * utglobal - Global data structures and procedures
@@ -221,6 +234,8 @@
 
 char acpi_ut_hex_to_ascii_char(u64 integer, u32 position);
 
+acpi_status acpi_ut_ascii_to_hex_byte(char *two_ascii_chars, u8 *return_byte);
+
 u8 acpi_ut_ascii_char_to_hex(int hex_char);
 
 u8 acpi_ut_valid_object_type(acpi_object_type type);
@@ -318,6 +333,11 @@
 		 const char *module_name, u32 component_id, u8 *ptr);
 
 void
+acpi_ut_str_exit(u32 line_number,
+		 const char *function_name,
+		 const char *module_name, u32 component_id, const char *string);
+
+void
 acpi_ut_debug_dump_buffer(u8 *buffer, u32 count, u32 display, u32 component_id);
 
 void acpi_ut_dump_buffer(u8 *buffer, u32 count, u32 display, u32 offset);
@@ -707,25 +727,6 @@
 const char *acpi_ah_match_uuid(u8 *data);
 
 /*
- * utprint - printf/vprintf output functions
- */
-const char *acpi_ut_scan_number(const char *string, u64 *number_ptr);
-
-const char *acpi_ut_print_number(char *string, u64 number);
-
-int
-acpi_ut_vsnprintf(char *string,
-		  acpi_size size, const char *format, va_list args);
-
-int acpi_ut_snprintf(char *string, acpi_size size, const char *format, ...);
-
-#ifdef ACPI_APPLICATION
-int acpi_ut_file_vprintf(ACPI_FILE file, const char *format, va_list args);
-
-int acpi_ut_file_printf(ACPI_FILE file, const char *format, ...);
-#endif
-
-/*
  * utuuid -- UUID support functions
  */
 #if (defined ACPI_ASL_COMPILER || defined ACPI_EXEC_APP || defined ACPI_HELP_APP)
diff --git a/drivers/acpi/acpica/dbconvert.c b/drivers/acpi/acpica/dbconvert.c
index 7cd07b2..147ce88 100644
--- a/drivers/acpi/acpica/dbconvert.c
+++ b/drivers/acpi/acpica/dbconvert.c
@@ -277,9 +277,10 @@
 	default:
 
 		object->type = ACPI_TYPE_INTEGER;
-		status =
-		    acpi_ut_strtoul64(string, 16, acpi_gbl_integer_byte_width,
-				      &object->integer.value);
+		status = acpi_ut_strtoul64(string,
+					   (acpi_gbl_integer_byte_width |
+					    ACPI_STRTOUL_BASE16),
+					   &object->integer.value);
 		break;
 	}
 
diff --git a/drivers/acpi/acpica/dbexec.c b/drivers/acpi/acpica/dbexec.c
index 12df291..fe3da7c 100644
--- a/drivers/acpi/acpica/dbexec.c
+++ b/drivers/acpi/acpica/dbexec.c
@@ -392,43 +392,49 @@
 					  acpi_db_execution_walk, NULL, NULL,
 					  NULL);
 		return;
-	} else {
-		name_string = ACPI_ALLOCATE(strlen(name) + 1);
-		if (!name_string) {
-			return;
-		}
-
-		memset(&acpi_gbl_db_method_info, 0,
-		       sizeof(struct acpi_db_method_info));
-
-		strcpy(name_string, name);
-		acpi_ut_strupr(name_string);
-		acpi_gbl_db_method_info.name = name_string;
-		acpi_gbl_db_method_info.args = args;
-		acpi_gbl_db_method_info.types = types;
-		acpi_gbl_db_method_info.flags = flags;
-
-		return_obj.pointer = NULL;
-		return_obj.length = ACPI_ALLOCATE_BUFFER;
-
-		status = acpi_db_execute_setup(&acpi_gbl_db_method_info);
-		if (ACPI_FAILURE(status)) {
-			ACPI_FREE(name_string);
-			return;
-		}
-
-		/* Get the NS node, determines existence also */
-
-		status = acpi_get_handle(NULL, acpi_gbl_db_method_info.pathname,
-					 &acpi_gbl_db_method_info.method);
-		if (ACPI_SUCCESS(status)) {
-			status =
-			    acpi_db_execute_method(&acpi_gbl_db_method_info,
-						   &return_obj);
-		}
-		ACPI_FREE(name_string);
 	}
 
+	name_string = ACPI_ALLOCATE(strlen(name) + 1);
+	if (!name_string) {
+		return;
+	}
+
+	memset(&acpi_gbl_db_method_info, 0, sizeof(struct acpi_db_method_info));
+	strcpy(name_string, name);
+	acpi_ut_strupr(name_string);
+
+	/* Subcommand to Execute all predefined names in the namespace */
+
+	if (!strncmp(name_string, "PREDEF", 6)) {
+		acpi_db_evaluate_predefined_names();
+		ACPI_FREE(name_string);
+		return;
+	}
+
+	acpi_gbl_db_method_info.name = name_string;
+	acpi_gbl_db_method_info.args = args;
+	acpi_gbl_db_method_info.types = types;
+	acpi_gbl_db_method_info.flags = flags;
+
+	return_obj.pointer = NULL;
+	return_obj.length = ACPI_ALLOCATE_BUFFER;
+
+	status = acpi_db_execute_setup(&acpi_gbl_db_method_info);
+	if (ACPI_FAILURE(status)) {
+		ACPI_FREE(name_string);
+		return;
+	}
+
+	/* Get the NS node, determines existence also */
+
+	status = acpi_get_handle(NULL, acpi_gbl_db_method_info.pathname,
+				 &acpi_gbl_db_method_info.method);
+	if (ACPI_SUCCESS(status)) {
+		status = acpi_db_execute_method(&acpi_gbl_db_method_info,
+						&return_obj);
+	}
+	ACPI_FREE(name_string);
+
 	/*
 	 * Allow any handlers in separate threads to complete.
 	 * (Such as Notify handlers invoked from AML executed above).
diff --git a/drivers/acpi/acpica/dbfileio.c b/drivers/acpi/acpica/dbfileio.c
index 4832879..6f05b8c 100644
--- a/drivers/acpi/acpica/dbfileio.c
+++ b/drivers/acpi/acpica/dbfileio.c
@@ -46,14 +46,12 @@
 #include "accommon.h"
 #include "acdebug.h"
 #include "actables.h"
-#include <stdio.h>
-#ifdef ACPI_APPLICATION
-#include "acapps.h"
-#endif
 
 #define _COMPONENT          ACPI_CA_DEBUGGER
 ACPI_MODULE_NAME("dbfileio")
 
+#ifdef ACPI_APPLICATION
+#include "acapps.h"
 #ifdef ACPI_DEBUGGER
 /*******************************************************************************
  *
@@ -69,8 +67,6 @@
 void acpi_db_close_debug_file(void)
 {
 
-#ifdef ACPI_APPLICATION
-
 	if (acpi_gbl_debug_file) {
 		fclose(acpi_gbl_debug_file);
 		acpi_gbl_debug_file = NULL;
@@ -78,7 +74,6 @@
 		acpi_os_printf("Debug output file %s closed\n",
 			       acpi_gbl_db_debug_filename);
 	}
-#endif
 }
 
 /*******************************************************************************
@@ -96,8 +91,6 @@
 void acpi_db_open_debug_file(char *name)
 {
 
-#ifdef ACPI_APPLICATION
-
 	acpi_db_close_debug_file();
 	acpi_gbl_debug_file = fopen(name, "w+");
 	if (!acpi_gbl_debug_file) {
@@ -109,8 +102,6 @@
 	strncpy(acpi_gbl_db_debug_filename, name,
 		sizeof(acpi_gbl_db_debug_filename));
 	acpi_gbl_db_output_to_file = TRUE;
-
-#endif
 }
 #endif
 
@@ -152,12 +143,13 @@
 			return (status);
 		}
 
-		fprintf(stderr,
-			"Acpi table [%4.4s] successfully installed and loaded\n",
-			table->signature);
+		acpi_os_printf
+		    ("Acpi table [%4.4s] successfully installed and loaded\n",
+		     table->signature);
 
 		table_list_head = table_list_head->next;
 	}
 
 	return (AE_OK);
 }
+#endif
diff --git a/drivers/acpi/acpica/dbinput.c b/drivers/acpi/acpica/dbinput.c
index 7cd5d2e..068214f 100644
--- a/drivers/acpi/acpica/dbinput.c
+++ b/drivers/acpi/acpica/dbinput.c
@@ -286,6 +286,8 @@
 	{1, "     \"Ascii String\"", "String method argument\n"},
 	{1, "     (Hex Byte List)", "Buffer method argument\n"},
 	{1, "     [Package Element List]", "Package method argument\n"},
+	{5, "  Execute predefined",
+	 "Execute all predefined (public) methods\n"},
 	{1, "  Go", "Allow method to run to completion\n"},
 	{1, "  Information", "Display info about the current method\n"},
 	{1, "  Into", "Step into (not over) a method call\n"},
diff --git a/drivers/acpi/acpica/dbmethod.c b/drivers/acpi/acpica/dbmethod.c
index f17a86f..314b94c 100644
--- a/drivers/acpi/acpica/dbmethod.c
+++ b/drivers/acpi/acpica/dbmethod.c
@@ -52,6 +52,11 @@
 #define _COMPONENT          ACPI_CA_DEBUGGER
 ACPI_MODULE_NAME("dbmethod")
 
+/* Local prototypes */
+static acpi_status
+acpi_db_walk_for_execute(acpi_handle obj_handle,
+			 u32 nesting_level, void *context, void **return_value);
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_db_set_method_breakpoint
@@ -66,6 +71,7 @@
  *              AML offset
  *
  ******************************************************************************/
+
 void
 acpi_db_set_method_breakpoint(char *location,
 			      struct acpi_walk_state *walk_state,
@@ -367,3 +373,129 @@
 	acpi_ut_release_owner_id(&obj_desc->method.owner_id);
 	return (AE_OK);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_db_walk_for_execute
+ *
+ * PARAMETERS:  Callback from walk_namespace
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Batch execution module. Currently only executes predefined
+ *              ACPI names.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_db_walk_for_execute(acpi_handle obj_handle,
+			 u32 nesting_level, void *context, void **return_value)
+{
+	struct acpi_namespace_node *node =
+	    (struct acpi_namespace_node *)obj_handle;
+	struct acpi_db_execute_walk *info =
+	    (struct acpi_db_execute_walk *)context;
+	struct acpi_buffer return_obj;
+	acpi_status status;
+	char *pathname;
+	u32 i;
+	struct acpi_device_info *obj_info;
+	struct acpi_object_list param_objects;
+	union acpi_object params[ACPI_METHOD_NUM_ARGS];
+	const union acpi_predefined_info *predefined;
+
+	predefined = acpi_ut_match_predefined_method(node->name.ascii);
+	if (!predefined) {
+		return (AE_OK);
+	}
+
+	if (node->type == ACPI_TYPE_LOCAL_SCOPE) {
+		return (AE_OK);
+	}
+
+	pathname = acpi_ns_get_external_pathname(node);
+	if (!pathname) {
+		return (AE_OK);
+	}
+
+	/* Get the object info for number of method parameters */
+
+	status = acpi_get_object_info(obj_handle, &obj_info);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	param_objects.pointer = NULL;
+	param_objects.count = 0;
+
+	if (obj_info->type == ACPI_TYPE_METHOD) {
+
+		/* Setup default parameters */
+
+		for (i = 0; i < obj_info->param_count; i++) {
+			params[i].type = ACPI_TYPE_INTEGER;
+			params[i].integer.value = 1;
+		}
+
+		param_objects.pointer = params;
+		param_objects.count = obj_info->param_count;
+	}
+
+	ACPI_FREE(obj_info);
+	return_obj.pointer = NULL;
+	return_obj.length = ACPI_ALLOCATE_BUFFER;
+
+	/* Do the actual method execution */
+
+	acpi_gbl_method_executing = TRUE;
+
+	status = acpi_evaluate_object(node, NULL, &param_objects, &return_obj);
+
+	acpi_os_printf("%-32s returned %s\n", pathname,
+		       acpi_format_exception(status));
+	acpi_gbl_method_executing = FALSE;
+	ACPI_FREE(pathname);
+
+	/* Ignore status from method execution */
+
+	status = AE_OK;
+
+	/* Update count, check if we have executed enough methods */
+
+	info->count++;
+	if (info->count >= info->max_count) {
+		status = AE_CTRL_TERMINATE;
+	}
+
+	return (status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_db_evaluate_predefined_names
+ *
+ * PARAMETERS:  None
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Namespace batch execution. Execute predefined names in the
+ *              namespace, up to the max count, if specified.
+ *
+ ******************************************************************************/
+
+void acpi_db_evaluate_predefined_names(void)
+{
+	struct acpi_db_execute_walk info;
+
+	info.count = 0;
+	info.max_count = ACPI_UINT32_MAX;
+
+	/* Search all nodes in namespace */
+
+	(void)acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT,
+				  ACPI_UINT32_MAX, acpi_db_walk_for_execute,
+				  NULL, (void *)&info, NULL);
+
+	acpi_os_printf("Evaluated %u predefined names in the namespace\n",
+		       info.count);
+}
diff --git a/drivers/acpi/acpica/dbobject.c b/drivers/acpi/acpica/dbobject.c
index 1d59e8b..08eaaf3 100644
--- a/drivers/acpi/acpica/dbobject.c
+++ b/drivers/acpi/acpica/dbobject.c
@@ -142,11 +142,11 @@
 
 	case ACPI_TYPE_STRING:
 
-		acpi_os_printf("(%u) \"%.24s",
+		acpi_os_printf("(%u) \"%.60s",
 			       obj_desc->string.length,
 			       obj_desc->string.pointer);
 
-		if (obj_desc->string.length > 24) {
+		if (obj_desc->string.length > 60) {
 			acpi_os_printf("...");
 		} else {
 			acpi_os_printf("\"");
diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c
index 47c7b52..32e9ddc 100644
--- a/drivers/acpi/acpica/dsmethod.c
+++ b/drivers/acpi/acpica/dsmethod.c
@@ -99,11 +99,14 @@
 			  "Method auto-serialization parse [%4.4s] %p\n",
 			  acpi_ut_get_node_name(node), node));
 
+	acpi_ex_enter_interpreter();
+
 	/* Create/Init a root op for the method parse tree */
 
 	op = acpi_ps_alloc_op(AML_METHOD_OP, obj_desc->method.aml_start);
 	if (!op) {
-		return_ACPI_STATUS(AE_NO_MEMORY);
+		status = AE_NO_MEMORY;
+		goto unlock;
 	}
 
 	acpi_ps_set_name(op, node->name.integer);
@@ -115,7 +118,8 @@
 	    acpi_ds_create_walk_state(node->owner_id, NULL, NULL, NULL);
 	if (!walk_state) {
 		acpi_ps_free_op(op);
-		return_ACPI_STATUS(AE_NO_MEMORY);
+		status = AE_NO_MEMORY;
+		goto unlock;
 	}
 
 	status = acpi_ds_init_aml_walk(walk_state, op, node,
@@ -134,6 +138,8 @@
 	status = acpi_ps_parse_aml(walk_state);
 
 	acpi_ps_delete_parse_tree(op);
+unlock:
+	acpi_ex_exit_interpreter();
 	return_ACPI_STATUS(status);
 }
 
@@ -757,8 +763,10 @@
 
 			/* Delete any direct children of (created by) this method */
 
+			(void)acpi_ex_exit_interpreter();
 			acpi_ns_delete_namespace_subtree(walk_state->
 							 method_node);
+			(void)acpi_ex_enter_interpreter();
 
 			/*
 			 * Delete any objects that were created by this method
@@ -769,9 +777,11 @@
 			 */
 			if (method_desc->method.
 			    info_flags & ACPI_METHOD_MODIFIED_NAMESPACE) {
+				(void)acpi_ex_exit_interpreter();
 				acpi_ns_delete_namespace_by_owner(method_desc->
 								  method.
 								  owner_id);
+				(void)acpi_ex_enter_interpreter();
 				method_desc->method.info_flags &=
 				    ~ACPI_METHOD_MODIFIED_NAMESPACE;
 			}
diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c
index f393de9..7d8ef52 100644
--- a/drivers/acpi/acpica/dsutils.c
+++ b/drivers/acpi/acpica/dsutils.c
@@ -565,15 +565,14 @@
 					status = AE_OK;
 				} else if (parent_op->common.aml_opcode ==
 					   AML_EXTERNAL_OP) {
-
-					/* TBD: May only be temporary */
-
-					obj_desc =
-					    acpi_ut_create_string_object((acpi_size)name_length);
-
-					strncpy(obj_desc->string.pointer,
-						name_string, name_length);
-					status = AE_OK;
+					/*
+					 * This opcode should never appear here. It is used only
+					 * by AML disassemblers and is surrounded by an If(0)
+					 * by the ASL compiler.
+					 *
+					 * Therefore, if we see it here, it is a serious error.
+					 */
+					status = AE_AML_BAD_OPCODE;
 				} else {
 					/*
 					 * We just plain didn't find it -- which is a
diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index 402ecc5..438597c 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -133,7 +133,8 @@
 	 * Result of predicate evaluation must be an Integer
 	 * object. Implicitly convert the argument if necessary.
 	 */
-	status = acpi_ex_convert_to_integer(obj_desc, &local_obj_desc, 16);
+	status = acpi_ex_convert_to_integer(obj_desc, &local_obj_desc,
+					    ACPI_STRTOUL_BASE16);
 	if (ACPI_FAILURE(status)) {
 		goto cleanup;
 	}
diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c
index 762db3f..028b22a 100644
--- a/drivers/acpi/acpica/dswload2.c
+++ b/drivers/acpi/acpica/dswload2.c
@@ -605,16 +605,13 @@
 				if (ACPI_FAILURE(status)) {
 					return_ACPI_STATUS(status);
 				}
-
-				acpi_ex_exit_interpreter();
 			}
 
+			acpi_ex_exit_interpreter();
 			status =
 			    acpi_ev_initialize_region
 			    (acpi_ns_get_attached_object(node), FALSE);
-			if (walk_state->method_node) {
-				acpi_ex_enter_interpreter();
-			}
+			acpi_ex_enter_interpreter();
 
 			if (ACPI_FAILURE(status)) {
 				/*
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index 4b4949c..bdb10be 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -130,6 +130,60 @@
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_ev_mask_gpe
+ *
+ * PARAMETERS:  gpe_event_info          - GPE to be blocked/unblocked
+ *              is_masked               - Whether the GPE is masked or not
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Unconditionally mask/unmask a GPE during runtime.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ev_mask_gpe(struct acpi_gpe_event_info *gpe_event_info, u8 is_masked)
+{
+	struct acpi_gpe_register_info *gpe_register_info;
+	u32 register_bit;
+
+	ACPI_FUNCTION_TRACE(ev_mask_gpe);
+
+	gpe_register_info = gpe_event_info->register_info;
+	if (!gpe_register_info) {
+		return_ACPI_STATUS(AE_NOT_EXIST);
+	}
+
+	register_bit = acpi_hw_get_gpe_register_bit(gpe_event_info);
+
+	/* Perform the action */
+
+	if (is_masked) {
+		if (register_bit & gpe_register_info->mask_for_run) {
+			return_ACPI_STATUS(AE_BAD_PARAMETER);
+		}
+
+		(void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
+		ACPI_SET_BIT(gpe_register_info->mask_for_run, (u8)register_bit);
+	} else {
+		if (!(register_bit & gpe_register_info->mask_for_run)) {
+			return_ACPI_STATUS(AE_BAD_PARAMETER);
+		}
+
+		ACPI_CLEAR_BIT(gpe_register_info->mask_for_run,
+			       (u8)register_bit);
+		if (gpe_event_info->runtime_count
+		    && !gpe_event_info->disable_for_dispatch) {
+			(void)acpi_hw_low_set_gpe(gpe_event_info,
+						  ACPI_GPE_ENABLE);
+		}
+	}
+
+	return_ACPI_STATUS(AE_OK);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_ev_add_gpe_reference
  *
  * PARAMETERS:  gpe_event_info          - Add a reference to this GPE
@@ -674,6 +728,7 @@
 	 * in the event_info.
 	 */
 	(void)acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_CONDITIONAL_ENABLE);
+	gpe_event_info->disable_for_dispatch = FALSE;
 	return (AE_OK);
 }
 
@@ -737,6 +792,8 @@
 		}
 	}
 
+	gpe_event_info->disable_for_dispatch = TRUE;
+
 	/*
 	 * Dispatch the GPE to either an installed handler or the control
 	 * method associated with this GPE (_Lxx or _Exx). If a handler
diff --git a/drivers/acpi/acpica/evgpeinit.c b/drivers/acpi/acpica/evgpeinit.c
index 7dc7547..16ce483 100644
--- a/drivers/acpi/acpica/evgpeinit.c
+++ b/drivers/acpi/acpica/evgpeinit.c
@@ -323,7 +323,9 @@
 	struct acpi_gpe_walk_info *walk_info =
 	    ACPI_CAST_PTR(struct acpi_gpe_walk_info, context);
 	struct acpi_gpe_event_info *gpe_event_info;
+	acpi_status status;
 	u32 gpe_number;
+	u8 temp_gpe_number;
 	char name[ACPI_NAME_SIZE + 1];
 	u8 type;
 
@@ -377,8 +379,8 @@
 
 	/* 4) The last two characters of the name are the hex GPE Number */
 
-	gpe_number = strtoul(&name[2], NULL, 16);
-	if (gpe_number == ACPI_UINT32_MAX) {
+	status = acpi_ut_ascii_to_hex_byte(&name[2], &temp_gpe_number);
+	if (ACPI_FAILURE(status)) {
 
 		/* Conversion failed; invalid method, just ignore it */
 
@@ -390,6 +392,7 @@
 
 	/* Ensure that we have a valid GPE number for this GPE block */
 
+	gpe_number = (u32)temp_gpe_number;
 	gpe_event_info =
 	    acpi_ev_low_get_gpe_info(gpe_number, walk_info->gpe_block);
 	if (!gpe_event_info) {
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index b6ea9c0..3843f1f 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -553,7 +553,8 @@
 				 *
 				 * See acpi_ns_exec_module_code
 				 */
-				if (obj_desc->method.
+				if (!acpi_gbl_parse_table_as_term_list &&
+				    obj_desc->method.
 				    info_flags & ACPI_METHOD_MODULE_LEVEL) {
 					handler_obj =
 					    obj_desc->method.dispatch.handler;
diff --git a/drivers/acpi/acpica/evxfgpe.c b/drivers/acpi/acpica/evxfgpe.c
index 17cfef7..d7a3b27 100644
--- a/drivers/acpi/acpica/evxfgpe.c
+++ b/drivers/acpi/acpica/evxfgpe.c
@@ -235,11 +235,13 @@
 	case ACPI_GPE_ENABLE:
 
 		status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_ENABLE);
+		gpe_event_info->disable_for_dispatch = FALSE;
 		break;
 
 	case ACPI_GPE_DISABLE:
 
 		status = acpi_hw_low_set_gpe(gpe_event_info, ACPI_GPE_DISABLE);
+		gpe_event_info->disable_for_dispatch = TRUE;
 		break;
 
 	default:
@@ -257,6 +259,47 @@
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_mask_gpe
+ *
+ * PARAMETERS:  gpe_device          - Parent GPE Device. NULL for GPE0/GPE1
+ *              gpe_number          - GPE level within the GPE block
+ *              is_masked           - Whether the GPE is masked or not
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Unconditionally mask/unmask the an individual GPE, ex., to
+ *              prevent a GPE flooding.
+ *
+ ******************************************************************************/
+acpi_status acpi_mask_gpe(acpi_handle gpe_device, u32 gpe_number, u8 is_masked)
+{
+	struct acpi_gpe_event_info *gpe_event_info;
+	acpi_status status;
+	acpi_cpu_flags flags;
+
+	ACPI_FUNCTION_TRACE(acpi_mask_gpe);
+
+	flags = acpi_os_acquire_lock(acpi_gbl_gpe_lock);
+
+	/* Ensure that we have a valid GPE number */
+
+	gpe_event_info = acpi_ev_get_gpe_event_info(gpe_device, gpe_number);
+	if (!gpe_event_info) {
+		status = AE_BAD_PARAMETER;
+		goto unlock_and_exit;
+	}
+
+	status = acpi_ev_mask_gpe(gpe_event_info, is_masked);
+
+unlock_and_exit:
+	acpi_os_release_lock(acpi_gbl_gpe_lock, flags);
+	return_ACPI_STATUS(status);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_mask_gpe)
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_mark_gpe_for_wake
  *
  * PARAMETERS:  gpe_device          - Parent GPE Device. NULL for GPE0/GPE1
diff --git a/drivers/acpi/acpica/exconcat.c b/drivers/acpi/acpica/exconcat.c
index 2423fe0..5429c2a 100644
--- a/drivers/acpi/acpica/exconcat.c
+++ b/drivers/acpi/acpica/exconcat.c
@@ -156,7 +156,7 @@
 
 		status =
 		    acpi_ex_convert_to_integer(local_operand1, &temp_operand1,
-					       16);
+					       ACPI_STRTOUL_BASE16);
 		break;
 
 	case ACPI_TYPE_BUFFER:
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index a1d177d..718428b 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -55,9 +55,7 @@
 
 /* Local prototypes */
 static acpi_status
-acpi_ex_add_table(u32 table_index,
-		  struct acpi_namespace_node *parent_node,
-		  union acpi_operand_object **ddb_handle);
+acpi_ex_add_table(u32 table_index, union acpi_operand_object **ddb_handle);
 
 static acpi_status
 acpi_ex_region_read(union acpi_operand_object *obj_desc,
@@ -79,13 +77,9 @@
  ******************************************************************************/
 
 static acpi_status
-acpi_ex_add_table(u32 table_index,
-		  struct acpi_namespace_node *parent_node,
-		  union acpi_operand_object **ddb_handle)
+acpi_ex_add_table(u32 table_index, union acpi_operand_object **ddb_handle)
 {
 	union acpi_operand_object *obj_desc;
-	acpi_status status;
-	acpi_owner_id owner_id;
 
 	ACPI_FUNCTION_TRACE(ex_add_table);
 
@@ -100,39 +94,8 @@
 
 	obj_desc->common.flags |= AOPOBJ_DATA_VALID;
 	obj_desc->reference.class = ACPI_REFCLASS_TABLE;
-	*ddb_handle = obj_desc;
-
-	/* Install the new table into the local data structures */
-
 	obj_desc->reference.value = table_index;
-
-	/* Add the table to the namespace */
-
-	status = acpi_ns_load_table(table_index, parent_node);
-	if (ACPI_FAILURE(status)) {
-		acpi_ut_remove_reference(obj_desc);
-		*ddb_handle = NULL;
-		return_ACPI_STATUS(status);
-	}
-
-	/* Execute any module-level code that was found in the table */
-
-	acpi_ex_exit_interpreter();
-	if (acpi_gbl_group_module_level_code) {
-		acpi_ns_exec_module_code_list();
-	}
-	acpi_ex_enter_interpreter();
-
-	/*
-	 * Update GPEs for any new _Lxx/_Exx methods. Ignore errors. The host is
-	 * responsible for discovering any new wake GPEs by running _PRW methods
-	 * that may have been loaded by this table.
-	 */
-	status = acpi_tb_get_owner_id(table_index, &owner_id);
-	if (ACPI_SUCCESS(status)) {
-		acpi_ev_update_gpes(owner_id);
-	}
-
+	*ddb_handle = obj_desc;
 	return_ACPI_STATUS(AE_OK);
 }
 
@@ -159,16 +122,17 @@
 	struct acpi_namespace_node *start_node;
 	struct acpi_namespace_node *parameter_node = NULL;
 	union acpi_operand_object *ddb_handle;
-	struct acpi_table_header *table;
 	u32 table_index;
 
 	ACPI_FUNCTION_TRACE(ex_load_table_op);
 
 	/* Find the ACPI table in the RSDT/XSDT */
 
+	acpi_ex_exit_interpreter();
 	status = acpi_tb_find_table(operand[0]->string.pointer,
 				    operand[1]->string.pointer,
 				    operand[2]->string.pointer, &table_index);
+	acpi_ex_enter_interpreter();
 	if (ACPI_FAILURE(status)) {
 		if (status != AE_NOT_FOUND) {
 			return_ACPI_STATUS(status);
@@ -197,9 +161,10 @@
 		 * Find the node referenced by the root_path_string. This is the
 		 * location within the namespace where the table will be loaded.
 		 */
-		status =
-		    acpi_ns_get_node(start_node, operand[3]->string.pointer,
-				     ACPI_NS_SEARCH_PARENT, &parent_node);
+		status = acpi_ns_get_node_unlocked(start_node,
+						   operand[3]->string.pointer,
+						   ACPI_NS_SEARCH_PARENT,
+						   &parent_node);
 		if (ACPI_FAILURE(status)) {
 			return_ACPI_STATUS(status);
 		}
@@ -219,9 +184,10 @@
 
 		/* Find the node referenced by the parameter_path_string */
 
-		status =
-		    acpi_ns_get_node(start_node, operand[4]->string.pointer,
-				     ACPI_NS_SEARCH_PARENT, &parameter_node);
+		status = acpi_ns_get_node_unlocked(start_node,
+						   operand[4]->string.pointer,
+						   ACPI_NS_SEARCH_PARENT,
+						   &parameter_node);
 		if (ACPI_FAILURE(status)) {
 			return_ACPI_STATUS(status);
 		}
@@ -229,7 +195,15 @@
 
 	/* Load the table into the namespace */
 
-	status = acpi_ex_add_table(table_index, parent_node, &ddb_handle);
+	ACPI_INFO(("Dynamic OEM Table Load:"));
+	acpi_ex_exit_interpreter();
+	status = acpi_tb_load_table(table_index, parent_node);
+	acpi_ex_enter_interpreter();
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	status = acpi_ex_add_table(table_index, &ddb_handle);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
@@ -252,19 +226,6 @@
 		}
 	}
 
-	status = acpi_get_table_by_index(table_index, &table);
-	if (ACPI_SUCCESS(status)) {
-		ACPI_INFO(("Dynamic OEM Table Load:"));
-		acpi_tb_print_table_header(0, table);
-	}
-
-	/* Invoke table handler if present */
-
-	if (acpi_gbl_table_handler) {
-		(void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
-					     acpi_gbl_table_handler_context);
-	}
-
 	*return_desc = ddb_handle;
 	return_ACPI_STATUS(status);
 }
@@ -475,13 +436,12 @@
 	/* Install the new table into the local data structures */
 
 	ACPI_INFO(("Dynamic OEM Table Load:"));
-	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
-
-	status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table),
-						ACPI_TABLE_ORIGIN_INTERNAL_VIRTUAL,
-						TRUE, TRUE, &table_index);
-
-	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+	acpi_ex_exit_interpreter();
+	status =
+	    acpi_tb_install_and_load_table(table, ACPI_PTR_TO_PHYSADDR(table),
+					   ACPI_TABLE_ORIGIN_INTERNAL_VIRTUAL,
+					   TRUE, &table_index);
+	acpi_ex_enter_interpreter();
 	if (ACPI_FAILURE(status)) {
 
 		/* Delete allocated table buffer */
@@ -491,25 +451,13 @@
 	}
 
 	/*
-	 * Note: Now table is "INSTALLED", it must be validated before
-	 * loading.
-	 */
-	status =
-	    acpi_tb_validate_table(&acpi_gbl_root_table_list.
-				   tables[table_index]);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	/*
 	 * Add the table to the namespace.
 	 *
 	 * Note: Load the table objects relative to the root of the namespace.
 	 * This appears to go against the ACPI specification, but we do it for
 	 * compatibility with other ACPI implementations.
 	 */
-	status =
-	    acpi_ex_add_table(table_index, acpi_gbl_root_node, &ddb_handle);
+	status = acpi_ex_add_table(table_index, &ddb_handle);
 	if (ACPI_FAILURE(status)) {
 
 		/* On error, table_ptr was deallocated above */
@@ -532,14 +480,6 @@
 	/* Remove the reference by added by acpi_ex_store above */
 
 	acpi_ut_remove_reference(ddb_handle);
-
-	/* Invoke table handler if present */
-
-	if (acpi_gbl_table_handler) {
-		(void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
-					     acpi_gbl_table_handler_context);
-	}
-
 	return_ACPI_STATUS(status);
 }
 
@@ -592,10 +532,17 @@
 
 	table_index = table_desc->reference.value;
 
+	/*
+	 * Release the interpreter lock so that the table lock won't have
+	 * strict order requirement against it.
+	 */
+	acpi_ex_exit_interpreter();
+
 	/* Ensure the table is still loaded */
 
 	if (!acpi_tb_is_table_loaded(table_index)) {
-		return_ACPI_STATUS(AE_NOT_EXIST);
+		status = AE_NOT_EXIST;
+		goto lock_and_exit;
 	}
 
 	/* Invoke table handler if present */
@@ -613,16 +560,24 @@
 
 	status = acpi_tb_delete_namespace_by_owner(table_index);
 	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
+		goto lock_and_exit;
 	}
 
 	(void)acpi_tb_release_owner_id(table_index);
 	acpi_tb_set_table_loaded_flag(table_index, FALSE);
 
+lock_and_exit:
+
+	/* Re-acquire the interpreter lock */
+
+	acpi_ex_enter_interpreter();
+
 	/*
 	 * Invalidate the handle. We do this because the handle may be stored
 	 * in a named object and may not be actually deleted until much later.
 	 */
-	ddb_handle->common.flags &= ~AOPOBJ_DATA_VALID;
-	return_ACPI_STATUS(AE_OK);
+	if (ACPI_SUCCESS(status)) {
+		ddb_handle->common.flags &= ~AOPOBJ_DATA_VALID;
+	}
+	return_ACPI_STATUS(status);
 }
diff --git a/drivers/acpi/acpica/exconvrt.c b/drivers/acpi/acpica/exconvrt.c
index b7e9b3d..588ad14 100644
--- a/drivers/acpi/acpica/exconvrt.c
+++ b/drivers/acpi/acpica/exconvrt.c
@@ -124,9 +124,9 @@
 		 * of ACPI 3.0) is that the to_integer() operator allows both decimal
 		 * and hexadecimal strings (hex prefixed with "0x").
 		 */
-		status = acpi_ut_strtoul64((char *)pointer, flags,
-					   acpi_gbl_integer_byte_width,
-					   &result);
+		status = acpi_ut_strtoul64(ACPI_CAST_PTR(char, pointer),
+					   (acpi_gbl_integer_byte_width |
+					    flags), &result);
 		if (ACPI_FAILURE(status)) {
 			return_ACPI_STATUS(status);
 		}
@@ -632,7 +632,7 @@
 			 */
 			status =
 			    acpi_ex_convert_to_integer(source_desc, result_desc,
-						       16);
+						       ACPI_STRTOUL_BASE16);
 			break;
 
 		case ACPI_TYPE_STRING:
diff --git a/drivers/acpi/acpica/exmisc.c b/drivers/acpi/acpica/exmisc.c
index 4f7e667..37c88b4 100644
--- a/drivers/acpi/acpica/exmisc.c
+++ b/drivers/acpi/acpica/exmisc.c
@@ -327,8 +327,8 @@
 	switch (operand0->common.type) {
 	case ACPI_TYPE_INTEGER:
 
-		status =
-		    acpi_ex_convert_to_integer(operand1, &local_operand1, 16);
+		status = acpi_ex_convert_to_integer(operand1, &local_operand1,
+						    ACPI_STRTOUL_BASE16);
 		break;
 
 	case ACPI_TYPE_STRING:
diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index 4e17506..0073004 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -521,9 +521,10 @@
 
 	case AML_TO_INTEGER_OP:	/* to_integer (Data, Result) */
 
+		/* Perform "explicit" conversion */
+
 		status =
-		    acpi_ex_convert_to_integer(operand[0], &return_desc,
-					       ACPI_ANY_BASE);
+		    acpi_ex_convert_to_integer(operand[0], &return_desc, 0);
 		if (return_desc == operand[0]) {
 
 			/* No conversion performed, add ref to handle return value */
@@ -890,14 +891,16 @@
 				 *    Field, so we need to resolve the node to a value.
 				 */
 				status =
-				    acpi_ns_get_node(walk_state->scope_info->
-						     scope.node,
-						     operand[0]->string.pointer,
-						     ACPI_NS_SEARCH_PARENT,
-						     ACPI_CAST_INDIRECT_PTR
-						     (struct
-						      acpi_namespace_node,
-						      &return_desc));
+				    acpi_ns_get_node_unlocked(walk_state->
+							      scope_info->scope.
+							      node,
+							      operand[0]->
+							      string.pointer,
+							      ACPI_NS_SEARCH_PARENT,
+							      ACPI_CAST_INDIRECT_PTR
+							      (struct
+							       acpi_namespace_node,
+							       &return_desc));
 				if (ACPI_FAILURE(status)) {
 					goto cleanup;
 				}
diff --git a/drivers/acpi/acpica/exresop.c b/drivers/acpi/acpica/exresop.c
index 27b41fd..f29eba1 100644
--- a/drivers/acpi/acpica/exresop.c
+++ b/drivers/acpi/acpica/exresop.c
@@ -410,12 +410,13 @@
 		case ARGI_INTEGER:
 
 			/*
-			 * Need an operand of type ACPI_TYPE_INTEGER,
-			 * But we can implicitly convert from a STRING or BUFFER
-			 * aka - "Implicit Source Operand Conversion"
+			 * Need an operand of type ACPI_TYPE_INTEGER, but we can
+			 * implicitly convert from a STRING or BUFFER.
+			 *
+			 * Known as "Implicit Source Operand Conversion"
 			 */
-			status =
-			    acpi_ex_convert_to_integer(obj_desc, stack_ptr, 16);
+			status = acpi_ex_convert_to_integer(obj_desc, stack_ptr,
+							    ACPI_STRTOUL_BASE16);
 			if (ACPI_FAILURE(status)) {
 				if (status == AE_TYPE) {
 					ACPI_ERROR((AE_INFO,
diff --git a/drivers/acpi/acpica/extrace.c b/drivers/acpi/acpica/extrace.c
index b52e848..c9ca826 100644
--- a/drivers/acpi/acpica/extrace.c
+++ b/drivers/acpi/acpica/extrace.c
@@ -201,7 +201,6 @@
 			   union acpi_operand_object *obj_desc,
 			   struct acpi_walk_state *walk_state)
 {
-	acpi_status status;
 	char *pathname = NULL;
 	u8 enabled = FALSE;
 
@@ -211,11 +210,6 @@
 		pathname = acpi_ns_get_normalized_pathname(method_node, TRUE);
 	}
 
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-	if (ACPI_FAILURE(status)) {
-		goto exit;
-	}
-
 	enabled = acpi_ex_interpreter_trace_enabled(pathname);
 	if (enabled && !acpi_gbl_trace_method_object) {
 		acpi_gbl_trace_method_object = obj_desc;
@@ -233,9 +227,6 @@
 		}
 	}
 
-	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
-exit:
 	if (enabled) {
 		ACPI_TRACE_POINT(ACPI_TRACE_AML_METHOD, TRUE,
 				 obj_desc ? obj_desc->method.aml_start : NULL,
@@ -267,7 +258,6 @@
 			  union acpi_operand_object *obj_desc,
 			  struct acpi_walk_state *walk_state)
 {
-	acpi_status status;
 	char *pathname = NULL;
 	u8 enabled;
 
@@ -277,26 +267,14 @@
 		pathname = acpi_ns_get_normalized_pathname(method_node, TRUE);
 	}
 
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-	if (ACPI_FAILURE(status)) {
-		goto exit_path;
-	}
-
 	enabled = acpi_ex_interpreter_trace_enabled(NULL);
 
-	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
 	if (enabled) {
 		ACPI_TRACE_POINT(ACPI_TRACE_AML_METHOD, FALSE,
 				 obj_desc ? obj_desc->method.aml_start : NULL,
 				 pathname);
 	}
 
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-	if (ACPI_FAILURE(status)) {
-		goto exit_path;
-	}
-
 	/* Check whether the tracer should be stopped */
 
 	if (acpi_gbl_trace_method_object == obj_desc) {
@@ -312,9 +290,6 @@
 		acpi_gbl_trace_method_object = NULL;
 	}
 
-	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
-exit_path:
 	if (pathname) {
 		ACPI_FREE(pathname);
 	}
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 425f133..a8b857a 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -94,6 +94,10 @@
 		ACPI_ERROR((AE_INFO,
 			    "Could not acquire AML Interpreter mutex"));
 	}
+	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
+	if (ACPI_FAILURE(status)) {
+		ACPI_ERROR((AE_INFO, "Could not acquire AML Namespace mutex"));
+	}
 
 	return_VOID;
 }
@@ -127,6 +131,10 @@
 
 	ACPI_FUNCTION_TRACE(ex_exit_interpreter);
 
+	status = acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
+	if (ACPI_FAILURE(status)) {
+		ACPI_ERROR((AE_INFO, "Could not release AML Namespace mutex"));
+	}
 	status = acpi_ut_release_mutex(ACPI_MTX_INTERPRETER);
 	if (ACPI_FAILURE(status)) {
 		ACPI_ERROR((AE_INFO,
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index bdecd5e..76b0e35 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -98,7 +98,7 @@
 acpi_hw_low_set_gpe(struct acpi_gpe_event_info *gpe_event_info, u32 action)
 {
 	struct acpi_gpe_register_info *gpe_register_info;
-	acpi_status status;
+	acpi_status status = AE_OK;
 	u32 enable_mask;
 	u32 register_bit;
 
@@ -148,9 +148,14 @@
 		return (AE_BAD_PARAMETER);
 	}
 
-	/* Write the updated enable mask */
+	if (!(register_bit & gpe_register_info->mask_for_run)) {
 
-	status = acpi_hw_write(enable_mask, &gpe_register_info->enable_address);
+		/* Write the updated enable mask */
+
+		status =
+		    acpi_hw_write(enable_mask,
+				  &gpe_register_info->enable_address);
+	}
 	return (status);
 }
 
@@ -242,6 +247,12 @@
 		local_event_status |= ACPI_EVENT_FLAG_ENABLED;
 	}
 
+	/* GPE currently masked? (masked for runtime?) */
+
+	if (register_bit & gpe_register_info->mask_for_run) {
+		local_event_status |= ACPI_EVENT_FLAG_MASKED;
+	}
+
 	/* GPE enabled for wake? */
 
 	if (register_bit & gpe_register_info->enable_for_wake) {
@@ -397,6 +408,7 @@
 	u32 i;
 	acpi_status status;
 	struct acpi_gpe_register_info *gpe_register_info;
+	u8 enable_mask;
 
 	/* NOTE: assumes that all GPEs are currently disabled */
 
@@ -410,9 +422,10 @@
 
 		/* Enable all "runtime" GPEs in this register */
 
+		enable_mask = gpe_register_info->enable_for_run &
+		    ~gpe_register_info->mask_for_run;
 		status =
-		    acpi_hw_gpe_enable_write(gpe_register_info->enable_for_run,
-					     gpe_register_info);
+		    acpi_hw_gpe_enable_write(enable_mask, gpe_register_info);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c
index 426a630..73f98d3 100644
--- a/drivers/acpi/acpica/nsaccess.c
+++ b/drivers/acpi/acpica/nsaccess.c
@@ -108,9 +108,9 @@
 		}
 
 		status =
-		    acpi_ns_lookup(NULL, (char *)init_val->name, init_val->type,
-				   ACPI_IMODE_LOAD_PASS2, ACPI_NS_NO_UPSEARCH,
-				   NULL, &new_node);
+		    acpi_ns_lookup(NULL, ACPI_CAST_PTR(char, init_val->name),
+				   init_val->type, ACPI_IMODE_LOAD_PASS2,
+				   ACPI_NS_NO_UPSEARCH, NULL, &new_node);
 		if (ACPI_FAILURE(status)) {
 			ACPI_EXCEPTION((AE_INFO, status,
 					"Could not create predefined name %s",
diff --git a/drivers/acpi/acpica/nsconvert.c b/drivers/acpi/acpica/nsconvert.c
index c803bda..2b85dee 100644
--- a/drivers/acpi/acpica/nsconvert.c
+++ b/drivers/acpi/acpica/nsconvert.c
@@ -79,7 +79,6 @@
 		/* String-to-Integer conversion */
 
 		status = acpi_ut_strtoul64(original_object->string.pointer,
-					   ACPI_ANY_BASE,
 					   acpi_gbl_integer_byte_width, &value);
 		if (ACPI_FAILURE(status)) {
 			return (status);
diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c
index ce1f860..84f35dd 100644
--- a/drivers/acpi/acpica/nsdump.c
+++ b/drivers/acpi/acpica/nsdump.c
@@ -338,7 +338,7 @@
 		case ACPI_TYPE_STRING:
 
 			acpi_os_printf("Len %.2X ", obj_desc->string.length);
-			acpi_ut_print_string(obj_desc->string.pointer, 32);
+			acpi_ut_print_string(obj_desc->string.pointer, 80);
 			acpi_os_printf("\n");
 			break;
 
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index b5e2b0a..334d3c5 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -46,6 +46,7 @@
 #include "acnamesp.h"
 #include "acdispat.h"
 #include "actables.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsload")
@@ -78,20 +79,6 @@
 
 	ACPI_FUNCTION_TRACE(ns_load_table);
 
-	/*
-	 * Parse the table and load the namespace with all named
-	 * objects found within. Control methods are NOT parsed
-	 * at this time. In fact, the control methods cannot be
-	 * parsed until the entire namespace is loaded, because
-	 * if a control method makes a forward reference (call)
-	 * to another control method, we can't continue parsing
-	 * because we don't know how many arguments to parse next!
-	 */
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
 	/* If table already loaded into namespace, just return */
 
 	if (acpi_tb_is_table_loaded(table_index)) {
@@ -107,6 +94,15 @@
 		goto unlock;
 	}
 
+	/*
+	 * Parse the table and load the namespace with all named
+	 * objects found within. Control methods are NOT parsed
+	 * at this time. In fact, the control methods cannot be
+	 * parsed until the entire namespace is loaded, because
+	 * if a control method makes a forward reference (call)
+	 * to another control method, we can't continue parsing
+	 * because we don't know how many arguments to parse next!
+	 */
 	status = acpi_ns_parse_table(table_index, node);
 	if (ACPI_SUCCESS(status)) {
 		acpi_tb_set_table_loaded_flag(table_index, TRUE);
@@ -120,7 +116,6 @@
 		 * exist. This target of Scope must already exist in the
 		 * namespace, as per the ACPI specification.
 		 */
-		(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 		acpi_ns_delete_namespace_by_owner(acpi_gbl_root_table_list.
 						  tables[table_index].owner_id);
 
@@ -129,8 +124,6 @@
 	}
 
 unlock:
-	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
@@ -162,7 +155,8 @@
 	 * other ACPI implementations. Optionally, the execution can be deferred
 	 * until later, see acpi_initialize_objects.
 	 */
-	if (!acpi_gbl_group_module_level_code) {
+	if (!acpi_gbl_parse_table_as_term_list
+	    && !acpi_gbl_group_module_level_code) {
 		acpi_ns_exec_module_code_list();
 	}
 
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index f631a47..4f14e92 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -47,12 +47,103 @@
 #include "acparser.h"
 #include "acdispat.h"
 #include "actables.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsparse")
 
 /*******************************************************************************
  *
+ * FUNCTION:    ns_execute_table
+ *
+ * PARAMETERS:  table_desc      - An ACPI table descriptor for table to parse
+ *              start_node      - Where to enter the table into the namespace
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Load ACPI/AML table by executing the entire table as a
+ *              term_list.
+ *
+ ******************************************************************************/
+acpi_status
+acpi_ns_execute_table(u32 table_index, struct acpi_namespace_node *start_node)
+{
+	acpi_status status;
+	struct acpi_table_header *table;
+	acpi_owner_id owner_id;
+	struct acpi_evaluate_info *info = NULL;
+	u32 aml_length;
+	u8 *aml_start;
+	union acpi_operand_object *method_obj = NULL;
+
+	ACPI_FUNCTION_TRACE(ns_execute_table);
+
+	status = acpi_get_table_by_index(table_index, &table);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Table must consist of at least a complete header */
+
+	if (table->length < sizeof(struct acpi_table_header)) {
+		return_ACPI_STATUS(AE_BAD_HEADER);
+	}
+
+	aml_start = (u8 *)table + sizeof(struct acpi_table_header);
+	aml_length = table->length - sizeof(struct acpi_table_header);
+
+	status = acpi_tb_get_owner_id(table_index, &owner_id);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Create, initialize, and link a new temporary method object */
+
+	method_obj = acpi_ut_create_internal_object(ACPI_TYPE_METHOD);
+	if (!method_obj) {
+		return_ACPI_STATUS(AE_NO_MEMORY);
+	}
+
+	/* Allocate the evaluation information block */
+
+	info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_evaluate_info));
+	if (!info) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_PARSE,
+			  "Create table code block: %p\n", method_obj));
+
+	method_obj->method.aml_start = aml_start;
+	method_obj->method.aml_length = aml_length;
+	method_obj->method.owner_id = owner_id;
+	method_obj->method.info_flags |= ACPI_METHOD_MODULE_LEVEL;
+
+	info->pass_number = ACPI_IMODE_EXECUTE;
+	info->node = start_node;
+	info->obj_desc = method_obj;
+	info->node_flags = info->node->flags;
+	info->full_pathname = acpi_ns_get_normalized_pathname(info->node, TRUE);
+	if (!info->full_pathname) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	status = acpi_ps_execute_table(info);
+
+cleanup:
+	if (info) {
+		ACPI_FREE(info->full_pathname);
+		info->full_pathname = NULL;
+	}
+	ACPI_FREE(info);
+	acpi_ut_remove_reference(method_obj);
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    ns_one_complete_parse
  *
  * PARAMETERS:  pass_number             - 1 or 2
@@ -63,6 +154,7 @@
  * DESCRIPTION: Perform one complete parse of an ACPI/AML table.
  *
  ******************************************************************************/
+
 acpi_status
 acpi_ns_one_complete_parse(u32 pass_number,
 			   u32 table_index,
@@ -143,7 +235,9 @@
 
 	ACPI_DEBUG_PRINT((ACPI_DB_PARSE,
 			  "*PARSE* pass %u parse\n", pass_number));
+	acpi_ex_enter_interpreter();
 	status = acpi_ps_parse_aml(walk_state);
+	acpi_ex_exit_interpreter();
 
 cleanup:
 	acpi_ps_delete_parse_tree(parse_root);
@@ -170,38 +264,47 @@
 
 	ACPI_FUNCTION_TRACE(ns_parse_table);
 
-	/*
-	 * AML Parse, pass 1
-	 *
-	 * In this pass, we load most of the namespace. Control methods
-	 * are not parsed until later. A parse tree is not created. Instead,
-	 * each Parser Op subtree is deleted when it is finished. This saves
-	 * a great deal of memory, and allows a small cache of parse objects
-	 * to service the entire parse. The second pass of the parse then
-	 * performs another complete parse of the AML.
-	 */
-	ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 1\n"));
+	if (acpi_gbl_parse_table_as_term_list) {
+		ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start load pass\n"));
 
-	status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
-					    table_index, start_node);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
+		status = acpi_ns_execute_table(table_index, start_node);
+		if (ACPI_FAILURE(status)) {
+			return_ACPI_STATUS(status);
+		}
+	} else {
+		/*
+		 * AML Parse, pass 1
+		 *
+		 * In this pass, we load most of the namespace. Control methods
+		 * are not parsed until later. A parse tree is not created.
+		 * Instead, each Parser Op subtree is deleted when it is finished.
+		 * This saves a great deal of memory, and allows a small cache of
+		 * parse objects to service the entire parse. The second pass of
+		 * the parse then performs another complete parse of the AML.
+		 */
+		ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 1\n"));
 
-	/*
-	 * AML Parse, pass 2
-	 *
-	 * In this pass, we resolve forward references and other things
-	 * that could not be completed during the first pass.
-	 * Another complete parse of the AML is performed, but the
-	 * overhead of this is compensated for by the fact that the
-	 * parse objects are all cached.
-	 */
-	ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 2\n"));
-	status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
-					    table_index, start_node);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
+		status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
+						    table_index, start_node);
+		if (ACPI_FAILURE(status)) {
+			return_ACPI_STATUS(status);
+		}
+
+		/*
+		 * AML Parse, pass 2
+		 *
+		 * In this pass, we resolve forward references and other things
+		 * that could not be completed during the first pass.
+		 * Another complete parse of the AML is performed, but the
+		 * overhead of this is compensated for by the fact that the
+		 * parse objects are all cached.
+		 */
+		ACPI_DEBUG_PRINT((ACPI_DB_PARSE, "**** Start pass 2\n"));
+		status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
+						    table_index, start_node);
+		if (ACPI_FAILURE(status)) {
+			return_ACPI_STATUS(status);
+		}
 	}
 
 	return_ACPI_STATUS(status);
diff --git a/drivers/acpi/acpica/nsutils.c b/drivers/acpi/acpica/nsutils.c
index 784a30b..691814d 100644
--- a/drivers/acpi/acpica/nsutils.c
+++ b/drivers/acpi/acpica/nsutils.c
@@ -662,7 +662,7 @@
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ns_get_node
+ * FUNCTION:    acpi_ns_get_node_unlocked
  *
  * PARAMETERS:  *pathname   - Name to be found, in external (ASL) format. The
  *                            \ (backslash) and ^ (carat) prefixes, and the
@@ -678,20 +678,21 @@
  * DESCRIPTION: Look up a name relative to a given scope and return the
  *              corresponding Node. NOTE: Scope can be null.
  *
- * MUTEX:       Locks namespace
+ * MUTEX:       Doesn't locks namespace
  *
  ******************************************************************************/
 
 acpi_status
-acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
-		 const char *pathname,
-		 u32 flags, struct acpi_namespace_node **return_node)
+acpi_ns_get_node_unlocked(struct acpi_namespace_node *prefix_node,
+			  const char *pathname,
+			  u32 flags, struct acpi_namespace_node **return_node)
 {
 	union acpi_generic_state scope_info;
 	acpi_status status;
 	char *internal_path;
 
-	ACPI_FUNCTION_TRACE_PTR(ns_get_node, ACPI_CAST_PTR(char, pathname));
+	ACPI_FUNCTION_TRACE_PTR(ns_get_node_unlocked,
+				ACPI_CAST_PTR(char, pathname));
 
 	/* Simplest case is a null pathname */
 
@@ -718,13 +719,6 @@
 		return_ACPI_STATUS(status);
 	}
 
-	/* Must lock namespace during lookup */
-
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-	if (ACPI_FAILURE(status)) {
-		goto cleanup;
-	}
-
 	/* Setup lookup scope (search starting point) */
 
 	scope_info.scope.node = prefix_node;
@@ -740,9 +734,49 @@
 				  pathname, acpi_format_exception(status)));
 	}
 
-	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-
-cleanup:
 	ACPI_FREE(internal_path);
 	return_ACPI_STATUS(status);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_get_node
+ *
+ * PARAMETERS:  *pathname   - Name to be found, in external (ASL) format. The
+ *                            \ (backslash) and ^ (carat) prefixes, and the
+ *                            . (period) to separate segments are supported.
+ *              prefix_node  - Root of subtree to be searched, or NS_ALL for the
+ *                            root of the name space. If Name is fully
+ *                            qualified (first s8 is '\'), the passed value
+ *                            of Scope will not be accessed.
+ *              flags       - Used to indicate whether to perform upsearch or
+ *                            not.
+ *              return_node - Where the Node is returned
+ *
+ * DESCRIPTION: Look up a name relative to a given scope and return the
+ *              corresponding Node. NOTE: Scope can be null.
+ *
+ * MUTEX:       Locks namespace
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ns_get_node(struct acpi_namespace_node *prefix_node,
+		 const char *pathname,
+		 u32 flags, struct acpi_namespace_node **return_node)
+{
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE_PTR(ns_get_node, ACPI_CAST_PTR(char, pathname));
+
+	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	status = acpi_ns_get_node_unlocked(prefix_node, pathname,
+					   flags, return_node);
+
+	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
+	return_ACPI_STATUS(status);
+}
diff --git a/drivers/acpi/acpica/psparse.c b/drivers/acpi/acpica/psparse.c
index 0a23897..1ce26d9 100644
--- a/drivers/acpi/acpica/psparse.c
+++ b/drivers/acpi/acpica/psparse.c
@@ -537,9 +537,11 @@
 
 			/* Either the method parse or actual execution failed */
 
+			acpi_ex_exit_interpreter();
 			ACPI_ERROR_METHOD("Method parse/execution failed",
 					  walk_state->method_node, NULL,
 					  status);
+			acpi_ex_enter_interpreter();
 
 			/* Check for possible multi-thread reentrancy problem */
 
@@ -571,7 +573,9 @@
 		 * cleanup to do
 		 */
 		if (((walk_state->parse_flags & ACPI_PARSE_MODE_MASK) ==
-		     ACPI_PARSE_EXECUTE) || (ACPI_FAILURE(status))) {
+		     ACPI_PARSE_EXECUTE &&
+		     !(walk_state->parse_flags & ACPI_PARSE_MODULE_LEVEL)) ||
+		    (ACPI_FAILURE(status))) {
 			acpi_ds_terminate_control_method(walk_state->
 							 method_desc,
 							 walk_state);
diff --git a/drivers/acpi/acpica/psxface.c b/drivers/acpi/acpica/psxface.c
index cf30cd82..f3c8726 100644
--- a/drivers/acpi/acpica/psxface.c
+++ b/drivers/acpi/acpica/psxface.c
@@ -252,6 +252,90 @@
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_ps_execute_table
+ *
+ * PARAMETERS:  info            - Method info block, contains:
+ *              node            - Node to where the is entered into the
+ *                                namespace
+ *              obj_desc        - Pseudo method object describing the AML
+ *                                code of the entire table
+ *              pass_number     - Parse or execute pass
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Execute a table
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ps_execute_table(struct acpi_evaluate_info *info)
+{
+	acpi_status status;
+	union acpi_parse_object *op = NULL;
+	struct acpi_walk_state *walk_state = NULL;
+
+	ACPI_FUNCTION_TRACE(ps_execute_table);
+
+	/* Create and init a Root Node */
+
+	op = acpi_ps_create_scope_op(info->obj_desc->method.aml_start);
+	if (!op) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Create and initialize a new walk state */
+
+	walk_state =
+	    acpi_ds_create_walk_state(info->obj_desc->method.owner_id, NULL,
+				      NULL, NULL);
+	if (!walk_state) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	status = acpi_ds_init_aml_walk(walk_state, op, info->node,
+				       info->obj_desc->method.aml_start,
+				       info->obj_desc->method.aml_length, info,
+				       info->pass_number);
+	if (ACPI_FAILURE(status)) {
+		goto cleanup;
+	}
+
+	if (info->obj_desc->method.info_flags & ACPI_METHOD_MODULE_LEVEL) {
+		walk_state->parse_flags |= ACPI_PARSE_MODULE_LEVEL;
+	}
+
+	/* Info->Node is the default location to load the table  */
+
+	if (info->node && info->node != acpi_gbl_root_node) {
+		status =
+		    acpi_ds_scope_stack_push(info->node, ACPI_TYPE_METHOD,
+					     walk_state);
+		if (ACPI_FAILURE(status)) {
+			goto cleanup;
+		}
+	}
+
+	/*
+	 * Parse the AML, walk_state will be deleted by parse_aml
+	 */
+	acpi_ex_enter_interpreter();
+	status = acpi_ps_parse_aml(walk_state);
+	acpi_ex_exit_interpreter();
+	walk_state = NULL;
+
+cleanup:
+	if (walk_state) {
+		acpi_ds_delete_walk_state(walk_state);
+	}
+	if (op) {
+		acpi_ps_delete_parse_tree(op);
+	}
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_ps_update_parameter_list
  *
  * PARAMETERS:  info            - See struct acpi_evaluate_info
diff --git a/drivers/acpi/acpica/tbdata.c b/drivers/acpi/acpica/tbdata.c
index 1388a19..d9ca8c2 100644
--- a/drivers/acpi/acpica/tbdata.c
+++ b/drivers/acpi/acpica/tbdata.c
@@ -45,6 +45,7 @@
 #include "accommon.h"
 #include "acnamesp.h"
 #include "actables.h"
+#include "acevents.h"
 
 #define _COMPONENT          ACPI_TABLES
 ACPI_MODULE_NAME("tbdata")
@@ -613,17 +614,12 @@
 	 * lock may block, and also since the execution of a namespace walk
 	 * must be allowed to use the interpreter.
 	 */
-	(void)acpi_ut_release_mutex(ACPI_MTX_INTERPRETER);
 	status = acpi_ut_acquire_write_lock(&acpi_gbl_namespace_rw_lock);
-
-	acpi_ns_delete_namespace_by_owner(owner_id);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
-
+	acpi_ns_delete_namespace_by_owner(owner_id);
 	acpi_ut_release_write_lock(&acpi_gbl_namespace_rw_lock);
-
-	status = acpi_ut_acquire_mutex(ACPI_MTX_INTERPRETER);
 	return_ACPI_STATUS(status);
 }
 
@@ -771,3 +767,142 @@
 
 	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_tb_load_table
+ *
+ * PARAMETERS:  table_index             - Table index
+ *              parent_node             - Where table index is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Load an ACPI table
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_tb_load_table(u32 table_index, struct acpi_namespace_node *parent_node)
+{
+	struct acpi_table_header *table;
+	acpi_status status;
+	acpi_owner_id owner_id;
+
+	ACPI_FUNCTION_TRACE(tb_load_table);
+
+	/*
+	 * Note: Now table is "INSTALLED", it must be validated before
+	 * using.
+	 */
+	status = acpi_get_table_by_index(table_index, &table);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	status = acpi_ns_load_table(table_index, parent_node);
+
+	/* Execute any module-level code that was found in the table */
+
+	if (!acpi_gbl_parse_table_as_term_list
+	    && acpi_gbl_group_module_level_code) {
+		acpi_ns_exec_module_code_list();
+	}
+
+	/*
+	 * Update GPEs for any new _Lxx/_Exx methods. Ignore errors. The host is
+	 * responsible for discovering any new wake GPEs by running _PRW methods
+	 * that may have been loaded by this table.
+	 */
+	status = acpi_tb_get_owner_id(table_index, &owner_id);
+	if (ACPI_SUCCESS(status)) {
+		acpi_ev_update_gpes(owner_id);
+	}
+
+	/* Invoke table handler if present */
+
+	if (acpi_gbl_table_handler) {
+		(void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
+					     acpi_gbl_table_handler_context);
+	}
+
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_tb_install_and_load_table
+ *
+ * PARAMETERS:  table                   - Pointer to the table
+ *              address                 - Physical address of the table
+ *              flags                   - Allocation flags of the table
+ *              table_index             - Where table index is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Install and load an ACPI table
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_tb_install_and_load_table(struct acpi_table_header *table,
+			       acpi_physical_address address,
+			       u8 flags, u8 override, u32 *table_index)
+{
+	acpi_status status;
+	u32 i;
+	acpi_owner_id owner_id;
+
+	ACPI_FUNCTION_TRACE(acpi_load_table);
+
+	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
+
+	/* Install the table and load it into the namespace */
+
+	status = acpi_tb_install_standard_table(address, flags, TRUE,
+						override, &i);
+	if (ACPI_FAILURE(status)) {
+		goto unlock_and_exit;
+	}
+
+	/*
+	 * Note: Now table is "INSTALLED", it must be validated before
+	 * using.
+	 */
+	status = acpi_tb_validate_table(&acpi_gbl_root_table_list.tables[i]);
+	if (ACPI_FAILURE(status)) {
+		goto unlock_and_exit;
+	}
+
+	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+	status = acpi_ns_load_table(i, acpi_gbl_root_node);
+
+	/* Execute any module-level code that was found in the table */
+
+	if (!acpi_gbl_parse_table_as_term_list
+	    && acpi_gbl_group_module_level_code) {
+		acpi_ns_exec_module_code_list();
+	}
+
+	/*
+	 * Update GPEs for any new _Lxx/_Exx methods. Ignore errors. The host is
+	 * responsible for discovering any new wake GPEs by running _PRW methods
+	 * that may have been loaded by this table.
+	 */
+	status = acpi_tb_get_owner_id(i, &owner_id);
+	if (ACPI_SUCCESS(status)) {
+		acpi_ev_update_gpes(owner_id);
+	}
+
+	/* Invoke table handler if present */
+
+	if (acpi_gbl_table_handler) {
+		(void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
+					     acpi_gbl_table_handler_context);
+	}
+	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
+
+unlock_and_exit:
+	*table_index = i;
+	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+	return_ACPI_STATUS(status);
+}
diff --git a/drivers/acpi/acpica/tbfadt.c b/drivers/acpi/acpica/tbfadt.c
index 6208069..046c4d0 100644
--- a/drivers/acpi/acpica/tbfadt.c
+++ b/drivers/acpi/acpica/tbfadt.c
@@ -344,23 +344,27 @@
 
 	/* Obtain the DSDT and FACS tables via their addresses within the FADT */
 
-	acpi_tb_install_fixed_table((acpi_physical_address)acpi_gbl_FADT.Xdsdt,
-				    ACPI_SIG_DSDT, &acpi_gbl_dsdt_index);
+	acpi_tb_install_standard_table((acpi_physical_address)acpi_gbl_FADT.
+				       Xdsdt,
+				       ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
+				       FALSE, TRUE, &acpi_gbl_dsdt_index);
 
 	/* If Hardware Reduced flag is set, there is no FACS */
 
 	if (!acpi_gbl_reduced_hardware) {
 		if (acpi_gbl_FADT.facs) {
-			acpi_tb_install_fixed_table((acpi_physical_address)
-						    acpi_gbl_FADT.facs,
-						    ACPI_SIG_FACS,
-						    &acpi_gbl_facs_index);
+			acpi_tb_install_standard_table((acpi_physical_address)
+						       acpi_gbl_FADT.facs,
+						       ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
+						       FALSE, TRUE,
+						       &acpi_gbl_facs_index);
 		}
 		if (acpi_gbl_FADT.Xfacs) {
-			acpi_tb_install_fixed_table((acpi_physical_address)
-						    acpi_gbl_FADT.Xfacs,
-						    ACPI_SIG_FACS,
-						    &acpi_gbl_xfacs_index);
+			acpi_tb_install_standard_table((acpi_physical_address)
+						       acpi_gbl_FADT.Xfacs,
+						       ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL,
+						       FALSE, TRUE,
+						       &acpi_gbl_xfacs_index);
 		}
 	}
 }
@@ -476,17 +480,19 @@
 	u32 i;
 
 	/*
-	 * For ACPI 1.0 FADTs (revision 1 or 2), ensure that reserved fields which
+	 * For ACPI 1.0 FADTs (revision 1), ensure that reserved fields which
 	 * should be zero are indeed zero. This will workaround BIOSs that
 	 * inadvertently place values in these fields.
 	 *
 	 * The ACPI 1.0 reserved fields that will be zeroed are the bytes located
 	 * at offset 45, 55, 95, and the word located at offset 109, 110.
 	 *
-	 * Note: The FADT revision value is unreliable. Only the length can be
-	 * trusted.
+	 * Note: The FADT revision value is unreliable because of BIOS errors.
+	 * The table length is instead used as the final word on the version.
+	 *
+	 * Note: FADT revision 3 is the ACPI 2.0 version of the FADT.
 	 */
-	if (acpi_gbl_FADT.header.length <= ACPI_FADT_V2_SIZE) {
+	if (acpi_gbl_FADT.header.length <= ACPI_FADT_V3_SIZE) {
 		acpi_gbl_FADT.preferred_profile = 0;
 		acpi_gbl_FADT.pstate_control = 0;
 		acpi_gbl_FADT.cst_control = 0;
@@ -552,76 +558,72 @@
 		 *
 		 * Address32 zero, Address64 [don't care]   - Use Address64
 		 *
+		 * No override: if acpi_gbl_use32_bit_fadt_addresses is FALSE, and:
 		 * Address32 non-zero, Address64 zero       - Copy/use Address32
 		 * Address32 non-zero == Address64 non-zero - Use Address64
 		 * Address32 non-zero != Address64 non-zero - Warning, use Address64
 		 *
 		 * Override: if acpi_gbl_use32_bit_fadt_addresses is TRUE, and:
+		 * Address32 non-zero, Address64 zero       - Copy/use Address32
+		 * Address32 non-zero == Address64 non-zero - Copy/use Address32
 		 * Address32 non-zero != Address64 non-zero - Warning, copy/use Address32
 		 *
 		 * Note: space_id is always I/O for 32-bit legacy address fields
 		 */
 		if (address32) {
-			if (!address64->address) {
+			if (address64->address) {
+				if (address64->address != (u64)address32) {
 
-				/* 64-bit address is zero, use 32-bit address */
+					/* Address mismatch */
 
-				acpi_tb_init_generic_address(address64,
-							     ACPI_ADR_SPACE_SYSTEM_IO,
-							     *ACPI_ADD_PTR(u8,
-									   &acpi_gbl_FADT,
-									   fadt_info_table
-									   [i].
-									   length),
-							     (u64)address32,
-							     name, flags);
-			} else if (address64->address != (u64)address32) {
+					ACPI_BIOS_WARNING((AE_INFO,
+							   "32/64X address mismatch in FADT/%s: "
+							   "0x%8.8X/0x%8.8X%8.8X, using %u-bit address",
+							   name, address32,
+							   ACPI_FORMAT_UINT64
+							   (address64->address),
+							   acpi_gbl_use32_bit_fadt_addresses
+							   ? 32 : 64));
+				}
 
-				/* Address mismatch */
-
-				ACPI_BIOS_WARNING((AE_INFO,
-						   "32/64X address mismatch in FADT/%s: "
-						   "0x%8.8X/0x%8.8X%8.8X, using %u-bit address",
-						   name, address32,
-						   ACPI_FORMAT_UINT64
-						   (address64->address),
-						   acpi_gbl_use32_bit_fadt_addresses
-						   ? 32 : 64));
-
-				if (acpi_gbl_use32_bit_fadt_addresses) {
-
-					/* 32-bit address override */
-
-					acpi_tb_init_generic_address(address64,
-								     ACPI_ADR_SPACE_SYSTEM_IO,
-								     *ACPI_ADD_PTR
-								     (u8,
-								      &acpi_gbl_FADT,
-								      fadt_info_table
-								      [i].
-								      length),
-								     (u64)
-								     address32,
-								     name,
-								     flags);
+				/*
+				 * For each extended field, check for length mismatch
+				 * between the legacy length field and the corresponding
+				 * 64-bit X length field.
+				 * Note: If the legacy length field is > 0xFF bits, ignore
+				 * this check. (GPE registers can be larger than the
+				 * 64-bit GAS structure can accomodate, 0xFF bits).
+				 */
+				if ((ACPI_MUL_8(length) <= ACPI_UINT8_MAX) &&
+				    (address64->bit_width !=
+				     ACPI_MUL_8(length))) {
+					ACPI_BIOS_WARNING((AE_INFO,
+							   "32/64X length mismatch in FADT/%s: %u/%u",
+							   name,
+							   ACPI_MUL_8(length),
+							   address64->
+							   bit_width));
 				}
 			}
-		}
 
-		/*
-		 * For each extended field, check for length mismatch between the
-		 * legacy length field and the corresponding 64-bit X length field.
-		 * Note: If the legacy length field is > 0xFF bits, ignore this
-		 * check. (GPE registers can be larger than the 64-bit GAS structure
-		 * can accomodate, 0xFF bits).
-		 */
-		if (address64->address &&
-		    (ACPI_MUL_8(length) <= ACPI_UINT8_MAX) &&
-		    (address64->bit_width != ACPI_MUL_8(length))) {
-			ACPI_BIOS_WARNING((AE_INFO,
-					   "32/64X length mismatch in FADT/%s: %u/%u",
-					   name, ACPI_MUL_8(length),
-					   address64->bit_width));
+			/*
+			 * Hardware register access code always uses the 64-bit fields.
+			 * So if the 64-bit field is zero or is to be overridden,
+			 * initialize it with the 32-bit fields.
+			 * Note that when the 32-bit address favor is specified, the
+			 * 64-bit fields are always re-initialized so that
+			 * access_size/bit_width/bit_offset fields can be correctly
+			 * configured to the values to trigger a 32-bit compatible
+			 * access mode in the hardware register access code.
+			 */
+			if (!address64->address
+			    || acpi_gbl_use32_bit_fadt_addresses) {
+				acpi_tb_init_generic_address(address64,
+							     ACPI_ADR_SPACE_SYSTEM_IO,
+							     length,
+							     (u64)address32,
+							     name, flags);
+			}
 		}
 
 		if (fadt_info_table[i].flags & ACPI_FADT_REQUIRED) {
diff --git a/drivers/acpi/acpica/tbfind.c b/drivers/acpi/acpica/tbfind.c
index e348d61..f6b9b4e 100644
--- a/drivers/acpi/acpica/tbfind.c
+++ b/drivers/acpi/acpica/tbfind.c
@@ -68,7 +68,7 @@
 acpi_tb_find_table(char *signature,
 		   char *oem_id, char *oem_table_id, u32 *table_index)
 {
-	acpi_status status;
+	acpi_status status = AE_OK;
 	struct acpi_table_header header;
 	u32 i;
 
@@ -96,6 +96,7 @@
 
 	/* Search for the table */
 
+	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 	for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) {
 		if (memcmp(&(acpi_gbl_root_table_list.tables[i].signature),
 			   header.signature, ACPI_NAME_SIZE)) {
@@ -115,7 +116,7 @@
 			    acpi_tb_validate_table(&acpi_gbl_root_table_list.
 						   tables[i]);
 			if (ACPI_FAILURE(status)) {
-				return_ACPI_STATUS(status);
+				goto unlock_and_exit;
 			}
 
 			if (!acpi_gbl_root_table_list.tables[i].pointer) {
@@ -144,9 +145,12 @@
 			ACPI_DEBUG_PRINT((ACPI_DB_TABLES,
 					  "Found table [%4.4s]\n",
 					  header.signature));
-			return_ACPI_STATUS(AE_OK);
+			goto unlock_and_exit;
 		}
 	}
+	status = AE_NOT_FOUND;
 
-	return_ACPI_STATUS(AE_NOT_FOUND);
+unlock_and_exit:
+	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
+	return_ACPI_STATUS(status);
 }
diff --git a/drivers/acpi/acpica/tbinstal.c b/drivers/acpi/acpica/tbinstal.c
index 8b13052..5fdf251 100644
--- a/drivers/acpi/acpica/tbinstal.c
+++ b/drivers/acpi/acpica/tbinstal.c
@@ -157,68 +157,6 @@
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_tb_install_fixed_table
- *
- * PARAMETERS:  address                 - Physical address of DSDT or FACS
- *              signature               - Table signature, NULL if no need to
- *                                        match
- *              table_index             - Where the table index is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Install a fixed ACPI table (DSDT/FACS) into the global data
- *              structure.
- *
- ******************************************************************************/
-
-acpi_status
-acpi_tb_install_fixed_table(acpi_physical_address address,
-			    char *signature, u32 *table_index)
-{
-	struct acpi_table_desc new_table_desc;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(tb_install_fixed_table);
-
-	if (!address) {
-		ACPI_ERROR((AE_INFO,
-			    "Null physical address for ACPI table [%s]",
-			    signature));
-		return (AE_NO_MEMORY);
-	}
-
-	/* Fill a table descriptor for validation */
-
-	status = acpi_tb_acquire_temp_table(&new_table_desc, address,
-					    ACPI_TABLE_ORIGIN_INTERNAL_PHYSICAL);
-	if (ACPI_FAILURE(status)) {
-		ACPI_ERROR((AE_INFO,
-			    "Could not acquire table length at %8.8X%8.8X",
-			    ACPI_FORMAT_UINT64(address)));
-		return_ACPI_STATUS(status);
-	}
-
-	/* Validate and verify a table before installation */
-
-	status = acpi_tb_verify_temp_table(&new_table_desc, signature);
-	if (ACPI_FAILURE(status)) {
-		goto release_and_exit;
-	}
-
-	/* Add the table to the global root table list */
-
-	acpi_tb_install_table_with_override(&new_table_desc, TRUE, table_index);
-
-release_and_exit:
-
-	/* Release the temporary table descriptor */
-
-	acpi_tb_release_temp_table(&new_table_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
  * FUNCTION:    acpi_tb_install_standard_table
  *
  * PARAMETERS:  address             - Address of the table (might be a virtual
@@ -230,8 +168,7 @@
  *
  * RETURN:      Status
  *
- * DESCRIPTION: This function is called to install an ACPI table that is
- *              neither DSDT nor FACS (a "standard" table.)
+ * DESCRIPTION: This function is called to verify and install an ACPI table.
  *              When this function is called by "Load" or "LoadTable" opcodes,
  *              or by acpi_load_table() API, the "Reload" parameter is set.
  *              After sucessfully returning from this function, table is
@@ -364,6 +301,14 @@
 	acpi_tb_install_table_with_override(&new_table_desc, override,
 					    table_index);
 
+	/* Invoke table handler if present */
+
+	if (acpi_gbl_table_handler) {
+		(void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_INSTALL,
+					     new_table_desc.pointer,
+					     acpi_gbl_table_handler_context);
+	}
+
 release_and_exit:
 
 	/* Release the temporary table descriptor */
diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c
index e285539..51eb07c 100644
--- a/drivers/acpi/acpica/tbutils.c
+++ b/drivers/acpi/acpica/tbutils.c
@@ -252,7 +252,8 @@
  *
  ******************************************************************************/
 
-acpi_status __init acpi_tb_parse_root_table(acpi_physical_address rsdp_address)
+acpi_status ACPI_INIT_FUNCTION
+acpi_tb_parse_root_table(acpi_physical_address rsdp_address)
 {
 	struct acpi_table_rsdp *rsdp;
 	u32 table_entry_size;
diff --git a/drivers/acpi/acpica/tbxface.c b/drivers/acpi/acpica/tbxface.c
index 3ecec93..4ab6b9c 100644
--- a/drivers/acpi/acpica/tbxface.c
+++ b/drivers/acpi/acpica/tbxface.c
@@ -98,7 +98,7 @@
  *
  ******************************************************************************/
 
-acpi_status __init
+acpi_status ACPI_INIT_FUNCTION
 acpi_initialize_tables(struct acpi_table_desc *initial_table_array,
 		       u32 initial_table_count, u8 allow_resize)
 {
@@ -164,7 +164,7 @@
  *              kernel.
  *
  ******************************************************************************/
-acpi_status __init acpi_reallocate_root_table(void)
+acpi_status ACPI_INIT_FUNCTION acpi_reallocate_root_table(void)
 {
 	acpi_status status;
 
diff --git a/drivers/acpi/acpica/tbxfload.c b/drivers/acpi/acpica/tbxfload.c
index ac71abc..5569f63 100644
--- a/drivers/acpi/acpica/tbxfload.c
+++ b/drivers/acpi/acpica/tbxfload.c
@@ -63,7 +63,7 @@
  * DESCRIPTION: Load the ACPI tables from the RSDT/XSDT
  *
  ******************************************************************************/
-acpi_status __init acpi_load_tables(void)
+acpi_status ACPI_INIT_FUNCTION acpi_load_tables(void)
 {
 	acpi_status status;
 
@@ -103,7 +103,8 @@
 				"While loading namespace from ACPI tables"));
 	}
 
-	if (!acpi_gbl_group_module_level_code) {
+	if (acpi_gbl_parse_table_as_term_list
+	    || !acpi_gbl_group_module_level_code) {
 		/*
 		 * Initialize the objects that remain uninitialized. This
 		 * runs the executable AML that may be part of the
@@ -188,11 +189,11 @@
 	memcpy(&acpi_gbl_original_dsdt_header, acpi_gbl_DSDT,
 	       sizeof(struct acpi_table_header));
 
-	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
-
 	/* Load and parse tables */
 
+	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
 	status = acpi_ns_load_table(acpi_gbl_dsdt_index, acpi_gbl_root_node);
+	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 	if (ACPI_FAILURE(status)) {
 		ACPI_EXCEPTION((AE_INFO, status, "[DSDT] table load failed"));
 		tables_failed++;
@@ -202,7 +203,6 @@
 
 	/* Load any SSDT or PSDT tables. Note: Loop leaves tables locked */
 
-	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 	for (i = 0; i < acpi_gbl_root_table_list.current_table_count; ++i) {
 		table = &acpi_gbl_root_table_list.tables[i];
 
@@ -220,6 +220,7 @@
 
 		(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
 		status = acpi_ns_load_table(i, acpi_gbl_root_node);
+		(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 		if (ACPI_FAILURE(status)) {
 			ACPI_EXCEPTION((AE_INFO, status,
 					"(%4.4s:%8.8s) while loading table",
@@ -235,8 +236,6 @@
 		} else {
 			tables_loaded++;
 		}
-
-		(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 	}
 
 	if (!tables_failed) {
@@ -272,7 +271,7 @@
  *
  ******************************************************************************/
 
-acpi_status __init
+acpi_status ACPI_INIT_FUNCTION
 acpi_install_table(acpi_physical_address address, u8 physical)
 {
 	acpi_status status;
@@ -324,49 +323,13 @@
 		return_ACPI_STATUS(AE_BAD_PARAMETER);
 	}
 
-	/* Must acquire the interpreter lock during this operation */
-
-	status = acpi_ut_acquire_mutex(ACPI_MTX_INTERPRETER);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
 	/* Install the table and load it into the namespace */
 
 	ACPI_INFO(("Host-directed Dynamic ACPI Table Load:"));
-	(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
-
-	status = acpi_tb_install_standard_table(ACPI_PTR_TO_PHYSADDR(table),
-						ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL,
-						TRUE, FALSE, &table_index);
-
-	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
-	if (ACPI_FAILURE(status)) {
-		goto unlock_and_exit;
-	}
-
-	/*
-	 * Note: Now table is "INSTALLED", it must be validated before
-	 * using.
-	 */
 	status =
-	    acpi_tb_validate_table(&acpi_gbl_root_table_list.
-				   tables[table_index]);
-	if (ACPI_FAILURE(status)) {
-		goto unlock_and_exit;
-	}
-
-	status = acpi_ns_load_table(table_index, acpi_gbl_root_node);
-
-	/* Invoke table handler if present */
-
-	if (acpi_gbl_table_handler) {
-		(void)acpi_gbl_table_handler(ACPI_TABLE_EVENT_LOAD, table,
-					     acpi_gbl_table_handler_context);
-	}
-
-unlock_and_exit:
-	(void)acpi_ut_release_mutex(ACPI_MTX_INTERPRETER);
+	    acpi_tb_install_and_load_table(table, ACPI_PTR_TO_PHYSADDR(table),
+					   ACPI_TABLE_ORIGIN_EXTERNAL_VIRTUAL,
+					   FALSE, &table_index);
 	return_ACPI_STATUS(status);
 }
 
@@ -415,9 +378,9 @@
 		return_ACPI_STATUS(AE_TYPE);
 	}
 
-	/* Must acquire the interpreter lock during this operation */
+	/* Must acquire the table lock during this operation */
 
-	status = acpi_ut_acquire_mutex(ACPI_MTX_INTERPRETER);
+	status = acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
@@ -444,8 +407,10 @@
 
 		/* Ensure the table is actually loaded */
 
+		(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
 		if (!acpi_tb_is_table_loaded(i)) {
 			status = AE_NOT_EXIST;
+			(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 			break;
 		}
 
@@ -471,10 +436,11 @@
 
 		status = acpi_tb_release_owner_id(i);
 		acpi_tb_set_table_loaded_flag(i, FALSE);
+		(void)acpi_ut_acquire_mutex(ACPI_MTX_TABLES);
 		break;
 	}
 
-	(void)acpi_ut_release_mutex(ACPI_MTX_INTERPRETER);
+	(void)acpi_ut_release_mutex(ACPI_MTX_TABLES);
 	return_ACPI_STATUS(status);
 }
 
diff --git a/drivers/acpi/acpica/tbxfroot.c b/drivers/acpi/acpica/tbxfroot.c
index adb6cfc..0adb1c7 100644
--- a/drivers/acpi/acpica/tbxfroot.c
+++ b/drivers/acpi/acpica/tbxfroot.c
@@ -142,7 +142,8 @@
  *
  ******************************************************************************/
 
-acpi_status __init acpi_find_root_pointer(acpi_physical_address *table_address)
+acpi_status ACPI_INIT_FUNCTION
+acpi_find_root_pointer(acpi_physical_address *table_address)
 {
 	u8 *table_ptr;
 	u8 *mem_rover;
@@ -244,6 +245,8 @@
 	return_ACPI_STATUS(AE_NOT_FOUND);
 }
 
+ACPI_EXPORT_SYMBOL_INIT(acpi_find_root_pointer)
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_tb_scan_memory_for_rsdp
diff --git a/drivers/acpi/acpica/utaddress.c b/drivers/acpi/acpica/utaddress.c
index c986ec6..433d822 100644
--- a/drivers/acpi/acpica/utaddress.c
+++ b/drivers/acpi/acpica/utaddress.c
@@ -77,7 +77,6 @@
 			  u32 length, struct acpi_namespace_node *region_node)
 {
 	struct acpi_address_range *range_info;
-	acpi_status status;
 
 	ACPI_FUNCTION_TRACE(ut_add_address_range);
 
@@ -97,12 +96,6 @@
 	range_info->end_address = (address + length - 1);
 	range_info->region_node = region_node;
 
-	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
-	if (ACPI_FAILURE(status)) {
-		ACPI_FREE(range_info);
-		return_ACPI_STATUS(status);
-	}
-
 	range_info->next = acpi_gbl_address_range_list[space_id];
 	acpi_gbl_address_range_list[space_id] = range_info;
 
@@ -112,7 +105,6 @@
 			  ACPI_FORMAT_UINT64(address),
 			  ACPI_FORMAT_UINT64(range_info->end_address)));
 
-	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 	return_ACPI_STATUS(AE_OK);
 }
 
diff --git a/drivers/acpi/acpica/utbuffer.c b/drivers/acpi/acpica/utbuffer.c
index bd31faf..ff29812 100644
--- a/drivers/acpi/acpica/utbuffer.c
+++ b/drivers/acpi/acpica/utbuffer.c
@@ -239,8 +239,7 @@
 	u8 buf_char;
 
 	if (!buffer) {
-		acpi_ut_file_printf(file,
-				    "Null Buffer Pointer in DumpBuffer!\n");
+		fprintf(file, "Null Buffer Pointer in DumpBuffer!\n");
 		return;
 	}
 
@@ -254,7 +253,7 @@
 
 		/* Print current offset */
 
-		acpi_ut_file_printf(file, "%6.4X: ", (base_offset + i));
+		fprintf(file, "%6.4X: ", (base_offset + i));
 
 		/* Print 16 hex chars */
 
@@ -263,8 +262,7 @@
 
 				/* Dump fill spaces */
 
-				acpi_ut_file_printf(file, "%*s",
-						    ((display * 2) + 1), " ");
+				fprintf(file, "%*s", ((display * 2) + 1), " ");
 				j += display;
 				continue;
 			}
@@ -273,34 +271,34 @@
 			case DB_BYTE_DISPLAY:
 			default:	/* Default is BYTE display */
 
-				acpi_ut_file_printf(file, "%02X ",
-						    buffer[(acpi_size)i + j]);
+				fprintf(file, "%02X ",
+					buffer[(acpi_size)i + j]);
 				break;
 
 			case DB_WORD_DISPLAY:
 
 				ACPI_MOVE_16_TO_32(&temp32,
 						   &buffer[(acpi_size)i + j]);
-				acpi_ut_file_printf(file, "%04X ", temp32);
+				fprintf(file, "%04X ", temp32);
 				break;
 
 			case DB_DWORD_DISPLAY:
 
 				ACPI_MOVE_32_TO_32(&temp32,
 						   &buffer[(acpi_size)i + j]);
-				acpi_ut_file_printf(file, "%08X ", temp32);
+				fprintf(file, "%08X ", temp32);
 				break;
 
 			case DB_QWORD_DISPLAY:
 
 				ACPI_MOVE_32_TO_32(&temp32,
 						   &buffer[(acpi_size)i + j]);
-				acpi_ut_file_printf(file, "%08X", temp32);
+				fprintf(file, "%08X", temp32);
 
 				ACPI_MOVE_32_TO_32(&temp32,
 						   &buffer[(acpi_size)i + j +
 							   4]);
-				acpi_ut_file_printf(file, "%08X ", temp32);
+				fprintf(file, "%08X ", temp32);
 				break;
 			}
 
@@ -311,24 +309,24 @@
 		 * Print the ASCII equivalent characters but watch out for the bad
 		 * unprintable ones (printable chars are 0x20 through 0x7E)
 		 */
-		acpi_ut_file_printf(file, " ");
+		fprintf(file, " ");
 		for (j = 0; j < 16; j++) {
 			if (i + j >= count) {
-				acpi_ut_file_printf(file, "\n");
+				fprintf(file, "\n");
 				return;
 			}
 
 			buf_char = buffer[(acpi_size)i + j];
 			if (isprint(buf_char)) {
-				acpi_ut_file_printf(file, "%c", buf_char);
+				fprintf(file, "%c", buf_char);
 			} else {
-				acpi_ut_file_printf(file, ".");
+				fprintf(file, ".");
 			}
 		}
 
 		/* Done with that line. */
 
-		acpi_ut_file_printf(file, "\n");
+		fprintf(file, "\n");
 		i += 16;
 	}
 
diff --git a/drivers/acpi/acpica/utdebug.c b/drivers/acpi/acpica/utdebug.c
index 5744222..044df9b 100644
--- a/drivers/acpi/acpica/utdebug.c
+++ b/drivers/acpi/acpica/utdebug.c
@@ -562,6 +562,43 @@
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_ut_str_exit
+ *
+ * PARAMETERS:  line_number         - Caller's line number
+ *              function_name       - Caller's procedure name
+ *              module_name         - Caller's module name
+ *              component_id        - Caller's component ID
+ *              string              - String to display
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Function exit trace. Prints only if TRACE_FUNCTIONS bit is
+ *              set in debug_level. Prints exit value also.
+ *
+ ******************************************************************************/
+
+void
+acpi_ut_str_exit(u32 line_number,
+		 const char *function_name,
+		 const char *module_name, u32 component_id, const char *string)
+{
+
+	/* Check if enabled up-front for performance */
+
+	if (ACPI_IS_DEBUG_ENABLED(ACPI_LV_FUNCTIONS, component_id)) {
+		acpi_debug_print(ACPI_LV_FUNCTIONS,
+				 line_number, function_name, module_name,
+				 component_id, "%s %s\n",
+				 acpi_gbl_function_exit_prefix, string);
+	}
+
+	if (acpi_gbl_nesting_level) {
+		acpi_gbl_nesting_level--;
+	}
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_trace_point
  *
  * PARAMETERS:  type                - Trace event type
@@ -591,27 +628,3 @@
 
 ACPI_EXPORT_SYMBOL(acpi_trace_point)
 #endif
-#ifdef ACPI_APPLICATION
-/*******************************************************************************
- *
- * FUNCTION:    acpi_log_error
- *
- * PARAMETERS:  format              - Printf format field
- *              ...                 - Optional printf arguments
- *
- * RETURN:      None
- *
- * DESCRIPTION: Print error message to the console, used by applications.
- *
- ******************************************************************************/
-void ACPI_INTERNAL_VAR_XFACE acpi_log_error(const char *format, ...)
-{
-	va_list args;
-
-	va_start(args, format);
-	(void)acpi_ut_file_vprintf(ACPI_FILE_ERR, format, args);
-	va_end(args);
-}
-
-ACPI_EXPORT_SYMBOL(acpi_log_error)
-#endif
diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c
index efd7988..15728ad8 100644
--- a/drivers/acpi/acpica/utdecode.c
+++ b/drivers/acpi/acpica/utdecode.c
@@ -253,7 +253,7 @@
 		return_PTR("Invalid object");
 	}
 
-	return_PTR(acpi_ut_get_type_name(obj_desc->common.type));
+	return_STR(acpi_ut_get_type_name(obj_desc->common.type));
 }
 
 /*******************************************************************************
diff --git a/drivers/acpi/acpica/uthex.c b/drivers/acpi/acpica/uthex.c
index 4354fb8..36d2fc7 100644
--- a/drivers/acpi/acpica/uthex.c
+++ b/drivers/acpi/acpica/uthex.c
@@ -75,9 +75,41 @@
 
 /*******************************************************************************
  *
+ * FUNCTION:    acpi_ut_ascii_to_hex_byte
+ *
+ * PARAMETERS:  two_ascii_chars             - Pointer to two ASCII characters
+ *              return_byte                 - Where converted byte is returned
+ *
+ * RETURN:      Status and converted hex byte
+ *
+ * DESCRIPTION: Perform ascii-to-hex translation, exactly two ASCII characters
+ *              to a single converted byte value.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_ascii_to_hex_byte(char *two_ascii_chars, u8 *return_byte)
+{
+
+	/* Both ASCII characters must be valid hex digits */
+
+	if (!isxdigit((int)two_ascii_chars[0]) ||
+	    !isxdigit((int)two_ascii_chars[1])) {
+		return (AE_BAD_HEX_CONSTANT);
+	}
+
+	*return_byte =
+	    acpi_ut_ascii_char_to_hex(two_ascii_chars[1]) |
+	    (acpi_ut_ascii_char_to_hex(two_ascii_chars[0]) << 4);
+
+	return (AE_OK);
+}
+
+/*******************************************************************************
+ *
  * FUNCTION:    acpi_ut_ascii_char_to_hex
  *
- * PARAMETERS:  hex_char                - Hex character in Ascii
+ * PARAMETERS:  hex_char                - Hex character in Ascii. Must be:
+ *                                        0-9 or A-F or a-f
  *
  * RETURN:      The binary value of the ascii/hex character
  *
@@ -88,13 +120,19 @@
 u8 acpi_ut_ascii_char_to_hex(int hex_char)
 {
 
-	if (hex_char <= 0x39) {
-		return ((u8)(hex_char - 0x30));
+	/* Values 0-9 */
+
+	if (hex_char <= '9') {
+		return ((u8)(hex_char - '0'));
 	}
 
-	if (hex_char <= 0x46) {
+	/* Upper case A-F */
+
+	if (hex_char <= 'F') {
 		return ((u8)(hex_char - 0x37));
 	}
 
+	/* Lower case a-f */
+
 	return ((u8)(hex_char - 0x57));
 }
diff --git a/drivers/acpi/acpica/utinit.c b/drivers/acpi/acpica/utinit.c
index f91f724..1711fdf4 100644
--- a/drivers/acpi/acpica/utinit.c
+++ b/drivers/acpi/acpica/utinit.c
@@ -206,7 +206,7 @@
 	acpi_gbl_next_owner_id_offset = 0;
 	acpi_gbl_debugger_configuration = DEBUGGER_THREADING;
 	acpi_gbl_osi_mutex = NULL;
-	acpi_gbl_max_loop_iterations = 0xFFFF;
+	acpi_gbl_max_loop_iterations = ACPI_MAX_LOOP_COUNT;
 
 	/* Hardware oriented */
 
diff --git a/drivers/acpi/acpica/utnonansi.c b/drivers/acpi/acpica/utnonansi.c
index 3465fe2..2514239 100644
--- a/drivers/acpi/acpica/utnonansi.c
+++ b/drivers/acpi/acpica/utnonansi.c
@@ -48,8 +48,8 @@
 ACPI_MODULE_NAME("utnonansi")
 
 /*
- * Non-ANSI C library functions - strlwr, strupr, stricmp, and a 64-bit
- * version of strtoul.
+ * Non-ANSI C library functions - strlwr, strupr, stricmp, and "safe"
+ * string functions.
  */
 /*******************************************************************************
  *
@@ -200,356 +200,3 @@
 	return (FALSE);
 }
 #endif
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_strtoul64
- *
- * PARAMETERS:  string                  - Null terminated string
- *              base                    - Radix of the string: 16 or 10 or
- *                                        ACPI_ANY_BASE
- *              max_integer_byte_width  - Maximum allowable integer,in bytes:
- *                                        4 or 8 (32 or 64 bits)
- *              ret_integer             - Where the converted integer is
- *                                        returned
- *
- * RETURN:      Status and Converted value
- *
- * DESCRIPTION: Convert a string into an unsigned value. Performs either a
- *              32-bit or 64-bit conversion, depending on the input integer
- *              size (often the current mode of the interpreter).
- *
- * NOTES:       Negative numbers are not supported, as they are not supported
- *              by ACPI.
- *
- *              acpi_gbl_integer_byte_width should be set to the proper width.
- *              For the core ACPICA code, this width depends on the DSDT
- *              version. For iASL, the default byte width is always 8 for the
- *              parser, but error checking is performed later to flag cases
- *              where a 64-bit constant is defined in a 32-bit DSDT/SSDT.
- *
- *              Does not support Octal strings, not needed at this time.
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_strtoul64(char *string,
-		  u32 base, u32 max_integer_byte_width, u64 *ret_integer)
-{
-	u32 this_digit = 0;
-	u64 return_value = 0;
-	u64 quotient;
-	u64 dividend;
-	u8 valid_digits = 0;
-	u8 sign_of0x = 0;
-	u8 term = 0;
-
-	ACPI_FUNCTION_TRACE_STR(ut_strtoul64, string);
-
-	switch (base) {
-	case ACPI_ANY_BASE:
-	case 10:
-	case 16:
-
-		break;
-
-	default:
-
-		/* Invalid Base */
-
-		return_ACPI_STATUS(AE_BAD_PARAMETER);
-	}
-
-	if (!string) {
-		goto error_exit;
-	}
-
-	/* Skip over any white space in the buffer */
-
-	while ((*string) && (isspace((int)*string) || *string == '\t')) {
-		string++;
-	}
-
-	if (base == ACPI_ANY_BASE) {
-		/*
-		 * Base equal to ACPI_ANY_BASE means 'Either decimal or hex'.
-		 * We need to determine if it is decimal or hexadecimal.
-		 */
-		if ((*string == '0') && (tolower((int)*(string + 1)) == 'x')) {
-			sign_of0x = 1;
-			base = 16;
-
-			/* Skip over the leading '0x' */
-			string += 2;
-		} else {
-			base = 10;
-		}
-	}
-
-	/* Any string left? Check that '0x' is not followed by white space. */
-
-	if (!(*string) || isspace((int)*string) || *string == '\t') {
-		if (base == ACPI_ANY_BASE) {
-			goto error_exit;
-		} else {
-			goto all_done;
-		}
-	}
-
-	/*
-	 * Perform a 32-bit or 64-bit conversion, depending upon the input
-	 * byte width
-	 */
-	dividend = (max_integer_byte_width <= ACPI_MAX32_BYTE_WIDTH) ?
-	    ACPI_UINT32_MAX : ACPI_UINT64_MAX;
-
-	/* Main loop: convert the string to a 32- or 64-bit integer */
-
-	while (*string) {
-		if (isdigit((int)*string)) {
-
-			/* Convert ASCII 0-9 to Decimal value */
-
-			this_digit = ((u8)*string) - '0';
-		} else if (base == 10) {
-
-			/* Digit is out of range; possible in to_integer case only */
-
-			term = 1;
-		} else {
-			this_digit = (u8)toupper((int)*string);
-			if (isxdigit((int)this_digit)) {
-
-				/* Convert ASCII Hex char to value */
-
-				this_digit = this_digit - 'A' + 10;
-			} else {
-				term = 1;
-			}
-		}
-
-		if (term) {
-			if (base == ACPI_ANY_BASE) {
-				goto error_exit;
-			} else {
-				break;
-			}
-		} else if ((valid_digits == 0) && (this_digit == 0)
-			   && !sign_of0x) {
-
-			/* Skip zeros */
-			string++;
-			continue;
-		}
-
-		valid_digits++;
-
-		if (sign_of0x && ((valid_digits > 16) ||
-				  ((valid_digits > 8)
-				   && (max_integer_byte_width <=
-				       ACPI_MAX32_BYTE_WIDTH)))) {
-			/*
-			 * This is to_integer operation case.
-			 * No restrictions for string-to-integer conversion,
-			 * see ACPI spec.
-			 */
-			goto error_exit;
-		}
-
-		/* Divide the digit into the correct position */
-
-		(void)acpi_ut_short_divide((dividend - (u64)this_digit), base,
-					   &quotient, NULL);
-
-		if (return_value > quotient) {
-			if (base == ACPI_ANY_BASE) {
-				goto error_exit;
-			} else {
-				break;
-			}
-		}
-
-		return_value *= base;
-		return_value += this_digit;
-		string++;
-	}
-
-	/* All done, normal exit */
-
-all_done:
-
-	ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Converted value: %8.8X%8.8X\n",
-			  ACPI_FORMAT_UINT64(return_value)));
-
-	*ret_integer = return_value;
-	return_ACPI_STATUS(AE_OK);
-
-error_exit:
-
-	/* Base was set/validated above (10 or 16) */
-
-	if (base == 10) {
-		return_ACPI_STATUS(AE_BAD_DECIMAL_CONSTANT);
-	} else {
-		return_ACPI_STATUS(AE_BAD_HEX_CONSTANT);
-	}
-}
-
-#ifdef _OBSOLETE_FUNCTIONS
-/* Removed: 01/2016 */
-
-/*******************************************************************************
- *
- * FUNCTION:    strtoul64
- *
- * PARAMETERS:  string              - Null terminated string
- *              terminater          - Where a pointer to the terminating byte
- *                                    is returned
- *              base                - Radix of the string
- *
- * RETURN:      Converted value
- *
- * DESCRIPTION: Convert a string into an unsigned value.
- *
- ******************************************************************************/
-
-acpi_status strtoul64(char *string, u32 base, u64 *ret_integer)
-{
-	u32 index;
-	u32 sign;
-	u64 return_value = 0;
-	acpi_status status = AE_OK;
-
-	*ret_integer = 0;
-
-	switch (base) {
-	case 0:
-	case 8:
-	case 10:
-	case 16:
-
-		break;
-
-	default:
-		/*
-		 * The specified Base parameter is not in the domain of
-		 * this function:
-		 */
-		return (AE_BAD_PARAMETER);
-	}
-
-	/* Skip over any white space in the buffer: */
-
-	while (isspace((int)*string) || *string == '\t') {
-		++string;
-	}
-
-	/*
-	 * The buffer may contain an optional plus or minus sign.
-	 * If it does, then skip over it but remember what is was:
-	 */
-	if (*string == '-') {
-		sign = ACPI_SIGN_NEGATIVE;
-		++string;
-	} else if (*string == '+') {
-		++string;
-		sign = ACPI_SIGN_POSITIVE;
-	} else {
-		sign = ACPI_SIGN_POSITIVE;
-	}
-
-	/*
-	 * If the input parameter Base is zero, then we need to
-	 * determine if it is octal, decimal, or hexadecimal:
-	 */
-	if (base == 0) {
-		if (*string == '0') {
-			if (tolower((int)*(++string)) == 'x') {
-				base = 16;
-				++string;
-			} else {
-				base = 8;
-			}
-		} else {
-			base = 10;
-		}
-	}
-
-	/*
-	 * For octal and hexadecimal bases, skip over the leading
-	 * 0 or 0x, if they are present.
-	 */
-	if (base == 8 && *string == '0') {
-		string++;
-	}
-
-	if (base == 16 && *string == '0' && tolower((int)*(++string)) == 'x') {
-		string++;
-	}
-
-	/* Main loop: convert the string to an unsigned long */
-
-	while (*string) {
-		if (isdigit((int)*string)) {
-			index = ((u8)*string) - '0';
-		} else {
-			index = (u8)toupper((int)*string);
-			if (isupper((int)index)) {
-				index = index - 'A' + 10;
-			} else {
-				goto error_exit;
-			}
-		}
-
-		if (index >= base) {
-			goto error_exit;
-		}
-
-		/* Check to see if value is out of range: */
-
-		if (return_value > ((ACPI_UINT64_MAX - (u64)index) / (u64)base)) {
-			goto error_exit;
-		} else {
-			return_value *= base;
-			return_value += index;
-		}
-
-		++string;
-	}
-
-	/* If a minus sign was present, then "the conversion is negated": */
-
-	if (sign == ACPI_SIGN_NEGATIVE) {
-		return_value = (ACPI_UINT32_MAX - return_value) + 1;
-	}
-
-	*ret_integer = return_value;
-	return (status);
-
-error_exit:
-	switch (base) {
-	case 8:
-
-		status = AE_BAD_OCTAL_CONSTANT;
-		break;
-
-	case 10:
-
-		status = AE_BAD_DECIMAL_CONSTANT;
-		break;
-
-	case 16:
-
-		status = AE_BAD_HEX_CONSTANT;
-		break;
-
-	default:
-
-		/* Base validated above */
-
-		break;
-	}
-
-	return (status);
-}
-#endif
diff --git a/drivers/acpi/acpica/utosi.c b/drivers/acpi/acpica/utosi.c
index 3f5fed6..f0484b0 100644
--- a/drivers/acpi/acpica/utosi.c
+++ b/drivers/acpi/acpica/utosi.c
@@ -390,11 +390,22 @@
  * PARAMETERS:  walk_state          - Current walk state
  *
  * RETURN:      Status
+ *              Integer: TRUE (0) if input string is matched
+ *                       FALSE (-1) if string is not matched
  *
  * DESCRIPTION: Implementation of the _OSI predefined control method. When
  *              an invocation of _OSI is encountered in the system AML,
  *              control is transferred to this function.
  *
+ * (August 2016)
+ * Note:  _OSI is now defined to return "Ones" to indicate a match, for
+ * compatibility with other ACPI implementations. On a 32-bit DSDT, Ones
+ * is 0xFFFFFFFF. On a 64-bit DSDT, Ones is 0xFFFFFFFFFFFFFFFF
+ * (ACPI_UINT64_MAX).
+ *
+ * This function always returns ACPI_UINT64_MAX for TRUE, and later code
+ * will truncate this to 32 bits if necessary.
+ *
  ******************************************************************************/
 
 acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state)
@@ -404,7 +415,7 @@
 	struct acpi_interface_info *interface_info;
 	acpi_interface_handler interface_handler;
 	acpi_status status;
-	u32 return_value;
+	u64 return_value;
 
 	ACPI_FUNCTION_TRACE(ut_osi_implementation);
 
@@ -444,7 +455,7 @@
 			acpi_gbl_osi_data = interface_info->value;
 		}
 
-		return_value = ACPI_UINT32_MAX;
+		return_value = ACPI_UINT64_MAX;
 	}
 
 	acpi_os_release_mutex(acpi_gbl_osi_mutex);
@@ -456,9 +467,10 @@
 	 */
 	interface_handler = acpi_gbl_interface_handler;
 	if (interface_handler) {
-		return_value =
-		    interface_handler(string_desc->string.pointer,
-				      return_value);
+		if (interface_handler
+		    (string_desc->string.pointer, (u32)return_value)) {
+			return_value = ACPI_UINT64_MAX;
+		}
 	}
 
 	ACPI_DEBUG_PRINT_RAW((ACPI_DB_INFO,
diff --git a/drivers/acpi/acpica/utpredef.c b/drivers/acpi/acpica/utpredef.c
index 770a177..ce18346 100644
--- a/drivers/acpi/acpica/utpredef.c
+++ b/drivers/acpi/acpica/utpredef.c
@@ -176,8 +176,6 @@
  ******************************************************************************/
 
 #if (defined ACPI_ASL_COMPILER || defined ACPI_HELP_APP)
-#include <stdio.h>
-#include <string.h>
 
 /* Local prototypes */
 
diff --git a/drivers/acpi/acpica/utprint.c b/drivers/acpi/acpica/utprint.c
index dd084cf..40eba80 100644
--- a/drivers/acpi/acpica/utprint.c
+++ b/drivers/acpi/acpica/utprint.c
@@ -336,7 +336,7 @@
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ut_vsnprintf
+ * FUNCTION:    vsnprintf
  *
  * PARAMETERS:  string              - String with boundary
  *              size                - Boundary of the string
@@ -349,9 +349,7 @@
  *
  ******************************************************************************/
 
-int
-acpi_ut_vsnprintf(char *string,
-		  acpi_size size, const char *format, va_list args)
+int vsnprintf(char *string, acpi_size size, const char *format, va_list args)
 {
 	u8 base;
 	u8 type;
@@ -586,7 +584,7 @@
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ut_snprintf
+ * FUNCTION:    snprintf
  *
  * PARAMETERS:  string              - String with boundary
  *              size                - Boundary of the string
@@ -598,13 +596,38 @@
  *
  ******************************************************************************/
 
-int acpi_ut_snprintf(char *string, acpi_size size, const char *format, ...)
+int snprintf(char *string, acpi_size size, const char *format, ...)
 {
 	va_list args;
 	int length;
 
 	va_start(args, format);
-	length = acpi_ut_vsnprintf(string, size, format, args);
+	length = vsnprintf(string, size, format, args);
+	va_end(args);
+
+	return (length);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    sprintf
+ *
+ * PARAMETERS:  string              - String with boundary
+ *              Format, ...         - Standard printf format
+ *
+ * RETURN:      Number of bytes actually written.
+ *
+ * DESCRIPTION: Formatted output to a string.
+ *
+ ******************************************************************************/
+
+int sprintf(char *string, const char *format, ...)
+{
+	va_list args;
+	int length;
+
+	va_start(args, format);
+	length = vsnprintf(string, ACPI_UINT32_MAX, format, args);
 	va_end(args);
 
 	return (length);
@@ -613,7 +636,59 @@
 #ifdef ACPI_APPLICATION
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ut_file_vprintf
+ * FUNCTION:    vprintf
+ *
+ * PARAMETERS:  format              - Standard printf format
+ *              args                - Argument list
+ *
+ * RETURN:      Number of bytes actually written.
+ *
+ * DESCRIPTION: Formatted output to stdout using argument list pointer.
+ *
+ ******************************************************************************/
+
+int vprintf(const char *format, va_list args)
+{
+	acpi_cpu_flags flags;
+	int length;
+
+	flags = acpi_os_acquire_lock(acpi_gbl_print_lock);
+	length = vsnprintf(acpi_gbl_print_buffer,
+			   sizeof(acpi_gbl_print_buffer), format, args);
+
+	(void)fwrite(acpi_gbl_print_buffer, length, 1, ACPI_FILE_OUT);
+	acpi_os_release_lock(acpi_gbl_print_lock, flags);
+
+	return (length);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    printf
+ *
+ * PARAMETERS:  Format, ...         - Standard printf format
+ *
+ * RETURN:      Number of bytes actually written.
+ *
+ * DESCRIPTION: Formatted output to stdout.
+ *
+ ******************************************************************************/
+
+int printf(const char *format, ...)
+{
+	va_list args;
+	int length;
+
+	va_start(args, format);
+	length = vprintf(format, args);
+	va_end(args);
+
+	return (length);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    vfprintf
  *
  * PARAMETERS:  file                - File descriptor
  *              format              - Standard printf format
@@ -625,16 +700,16 @@
  *
  ******************************************************************************/
 
-int acpi_ut_file_vprintf(ACPI_FILE file, const char *format, va_list args)
+int vfprintf(FILE * file, const char *format, va_list args)
 {
 	acpi_cpu_flags flags;
 	int length;
 
 	flags = acpi_os_acquire_lock(acpi_gbl_print_lock);
-	length = acpi_ut_vsnprintf(acpi_gbl_print_buffer,
-				   sizeof(acpi_gbl_print_buffer), format, args);
+	length = vsnprintf(acpi_gbl_print_buffer,
+			   sizeof(acpi_gbl_print_buffer), format, args);
 
-	(void)acpi_os_write_file(file, acpi_gbl_print_buffer, length, 1);
+	(void)fwrite(acpi_gbl_print_buffer, length, 1, file);
 	acpi_os_release_lock(acpi_gbl_print_lock, flags);
 
 	return (length);
@@ -642,7 +717,7 @@
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ut_file_printf
+ * FUNCTION:    fprintf
  *
  * PARAMETERS:  file                - File descriptor
  *              Format, ...         - Standard printf format
@@ -653,13 +728,13 @@
  *
  ******************************************************************************/
 
-int acpi_ut_file_printf(ACPI_FILE file, const char *format, ...)
+int fprintf(FILE * file, const char *format, ...)
 {
 	va_list args;
 	int length;
 
 	va_start(args, format);
-	length = acpi_ut_file_vprintf(file, format, args);
+	length = vfprintf(file, format, args);
 	va_end(args);
 
 	return (length);
diff --git a/drivers/acpi/acpica/utstrtoul64.c b/drivers/acpi/acpica/utstrtoul64.c
new file mode 100644
index 0000000..b4f341c
--- /dev/null
+++ b/drivers/acpi/acpica/utstrtoul64.c
@@ -0,0 +1,348 @@
+/*******************************************************************************
+ *
+ * Module Name: utstrtoul64 - string to 64-bit integer support
+ *
+ ******************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2016, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+
+/*******************************************************************************
+ *
+ * The functions in this module satisfy the need for 64-bit string-to-integer
+ * conversions on both 32-bit and 64-bit platforms.
+ *
+ ******************************************************************************/
+
+#define _COMPONENT          ACPI_UTILITIES
+ACPI_MODULE_NAME("utstrtoul64")
+
+/* Local prototypes */
+static u64 acpi_ut_strtoul_base10(char *string, u32 flags);
+
+static u64 acpi_ut_strtoul_base16(char *string, u32 flags);
+
+/*******************************************************************************
+ *
+ * String conversion rules as written in the ACPI specification. The error
+ * conditions and behavior are different depending on the type of conversion.
+ *
+ *
+ * Implicit data type conversion: string-to-integer
+ * --------------------------------------------------
+ *
+ * Base is always 16. This is the ACPI_STRTOUL_BASE16 case.
+ *
+ * Example:
+ *      Add ("BA98", Arg0, Local0)
+ *
+ * The integer is initialized to the value zero.
+ * The ASCII string is interpreted as a hexadecimal constant.
+ *
+ *  1)  A "0x" prefix is not allowed. However, ACPICA allows this for
+ *      compatibility with previous ACPICA. (NO ERROR)
+ *
+ *  2)  Terminates when the size of an integer is reached (32 or 64 bits).
+ *      (NO ERROR)
+ *
+ *  3)  The first non-hex character terminates the conversion without error.
+ *      (NO ERROR)
+ *
+ *  4)  Conversion of a null (zero-length) string to an integer is not
+ *      allowed. However, ACPICA allows this for compatibility with previous
+ *      ACPICA. This conversion returns the value 0. (NO ERROR)
+ *
+ *
+ * Explicit data type conversion:  to_integer() with string operand
+ * ---------------------------------------------------------------
+ *
+ * Base is either 10 (default) or 16 (with 0x prefix)
+ *
+ * Examples:
+ *      to_integer ("1000")
+ *      to_integer ("0xABCD")
+ *
+ *  1)  Can be (must be) either a decimal or hexadecimal numeric string.
+ *      A hex value must be prefixed by "0x" or it is interpreted as a decimal.
+ *
+ *  2)  The value must not exceed the maximum of an integer value. ACPI spec
+ *      states the behavior is "unpredictable", so ACPICA matches the behavior
+ *      of the implicit conversion case.(NO ERROR)
+ *
+ *  3)  Behavior on the first non-hex character is not specified by the ACPI
+ *      spec, so ACPICA matches the behavior of the implicit conversion case
+ *      and terminates. (NO ERROR)
+ *
+ *  4)  A null (zero-length) string is illegal.
+ *      However, ACPICA allows this for compatibility with previous ACPICA.
+ *      This conversion returns the value 0. (NO ERROR)
+ *
+ ******************************************************************************/
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_strtoul64
+ *
+ * PARAMETERS:  string                  - Null terminated input string
+ *              flags                   - Conversion info, see below
+ *              return_value            - Where the converted integer is
+ *                                        returned
+ *
+ * RETURN:      Status and Converted value
+ *
+ * DESCRIPTION: Convert a string into an unsigned value. Performs either a
+ *              32-bit or 64-bit conversion, depending on the input integer
+ *              size in Flags (often the current mode of the interpreter).
+ *
+ * Values for Flags:
+ *      ACPI_STRTOUL_32BIT      - Max integer value is 32 bits
+ *      ACPI_STRTOUL_64BIT      - Max integer value is 64 bits
+ *      ACPI_STRTOUL_BASE16     - Input string is hexadecimal. Default
+ *                                is 10/16 based on string prefix (0x).
+ *
+ * NOTES:
+ *   Negative numbers are not supported, as they are not supported by ACPI.
+ *
+ *   Supports only base 16 or base 10 strings/values. Does not
+ *   support Octal strings, as these are not supported by ACPI.
+ *
+ * Current users of this support:
+ *
+ *  interpreter - Implicit and explicit conversions, GPE method names
+ *  debugger    - Command line input string conversion
+ *  iASL        - Main parser, conversion of constants to integers
+ *  iASL        - Data Table Compiler parser (constant math expressions)
+ *  iASL        - Preprocessor (constant math expressions)
+ *  acpi_dump   - Input table addresses
+ *  acpi_exec   - Testing of the acpi_ut_strtoul64 function
+ *
+ * Note concerning callers:
+ *   acpi_gbl_integer_byte_width can be used to set the 32/64 limit. If used,
+ *   this global should be set to the proper width. For the core ACPICA code,
+ *   this width depends on the DSDT version. For iASL, the default byte
+ *   width is always 8 for the parser, but error checking is performed later
+ *   to flag cases where a 64-bit constant is defined in a 32-bit DSDT/SSDT.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_ut_strtoul64(char *string, u32 flags, u64 *return_value)
+{
+	acpi_status status = AE_OK;
+	u32 base;
+
+	ACPI_FUNCTION_TRACE_STR(ut_strtoul64, string);
+
+	/* Parameter validation */
+
+	if (!string || !return_value) {
+		return_ACPI_STATUS(AE_BAD_PARAMETER);
+	}
+
+	*return_value = 0;
+
+	/* Check for zero-length string, returns 0 */
+
+	if (*string == 0) {
+		return_ACPI_STATUS(AE_OK);
+	}
+
+	/* Skip over any white space at start of string */
+
+	while (isspace((int)*string)) {
+		string++;
+	}
+
+	/* End of string? return 0 */
+
+	if (*string == 0) {
+		return_ACPI_STATUS(AE_OK);
+	}
+
+	/*
+	 * 1) The "0x" prefix indicates base 16. Per the ACPI specification,
+	 * the "0x" prefix is only allowed for implicit (non-strict) conversions.
+	 * However, we always allow it for compatibility with older ACPICA.
+	 */
+	if ((*string == ACPI_ASCII_ZERO) &&
+	    (tolower((int)*(string + 1)) == 'x')) {
+		string += 2;	/* Go past the 0x */
+		if (*string == 0) {
+			return_ACPI_STATUS(AE_OK);	/* Return value 0 */
+		}
+
+		base = 16;
+	}
+
+	/* 2) Force to base 16 (implicit conversion case) */
+
+	else if (flags & ACPI_STRTOUL_BASE16) {
+		base = 16;
+	}
+
+	/* 3) Default fallback is to Base 10 */
+
+	else {
+		base = 10;
+	}
+
+	/* Skip all leading zeros */
+
+	while (*string == ACPI_ASCII_ZERO) {
+		string++;
+		if (*string == 0) {
+			return_ACPI_STATUS(AE_OK);	/* Return value 0 */
+		}
+	}
+
+	/* Perform the base 16 or 10 conversion */
+
+	if (base == 16) {
+		*return_value = acpi_ut_strtoul_base16(string, flags);
+	} else {
+		*return_value = acpi_ut_strtoul_base10(string, flags);
+	}
+
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_strtoul_base10
+ *
+ * PARAMETERS:  string                  - Null terminated input string
+ *              flags                   - Conversion info
+ *
+ * RETURN:      64-bit converted integer
+ *
+ * DESCRIPTION: Performs a base 10 conversion of the input string to an
+ *              integer value, either 32 or 64 bits.
+ *              Note: String must be valid and non-null.
+ *
+ ******************************************************************************/
+
+static u64 acpi_ut_strtoul_base10(char *string, u32 flags)
+{
+	int ascii_digit;
+	u64 next_value;
+	u64 return_value = 0;
+
+	/* Main loop: convert each ASCII byte in the input string */
+
+	while (*string) {
+		ascii_digit = *string;
+		if (!isdigit(ascii_digit)) {
+
+			/* Not ASCII 0-9, terminate */
+
+			goto exit;
+		}
+
+		/* Convert and insert (add) the decimal digit */
+
+		next_value =
+		    (return_value * 10) + (ascii_digit - ACPI_ASCII_ZERO);
+
+		/* Check for overflow (32 or 64 bit) - return current converted value */
+
+		if (((flags & ACPI_STRTOUL_32BIT) && (next_value > ACPI_UINT32_MAX)) || (next_value < return_value)) {	/* 64-bit overflow case */
+			goto exit;
+		}
+
+		return_value = next_value;
+		string++;
+	}
+
+exit:
+	return (return_value);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_strtoul_base16
+ *
+ * PARAMETERS:  string                  - Null terminated input string
+ *              flags                   - conversion info
+ *
+ * RETURN:      64-bit converted integer
+ *
+ * DESCRIPTION: Performs a base 16 conversion of the input string to an
+ *              integer value, either 32 or 64 bits.
+ *              Note: String must be valid and non-null.
+ *
+ ******************************************************************************/
+
+static u64 acpi_ut_strtoul_base16(char *string, u32 flags)
+{
+	int ascii_digit;
+	u32 valid_digits = 1;
+	u64 return_value = 0;
+
+	/* Main loop: convert each ASCII byte in the input string */
+
+	while (*string) {
+
+		/* Check for overflow (32 or 64 bit) - return current converted value */
+
+		if ((valid_digits > 16) ||
+		    ((valid_digits > 8) && (flags & ACPI_STRTOUL_32BIT))) {
+			goto exit;
+		}
+
+		ascii_digit = *string;
+		if (!isxdigit(ascii_digit)) {
+
+			/* Not Hex ASCII A-F, a-f, or 0-9, terminate */
+
+			goto exit;
+		}
+
+		/* Convert and insert the hex digit */
+
+		return_value =
+		    (return_value << 4) |
+		    acpi_ut_ascii_char_to_hex(ascii_digit);
+
+		string++;
+		valid_digits++;
+	}
+
+exit:
+	return (return_value);
+}
diff --git a/drivers/acpi/acpica/uttrack.c b/drivers/acpi/acpica/uttrack.c
index 0df07df..df31d71 100644
--- a/drivers/acpi/acpica/uttrack.c
+++ b/drivers/acpi/acpica/uttrack.c
@@ -95,13 +95,11 @@
 {
 	struct acpi_memory_list *cache;
 
-	cache = acpi_os_allocate(sizeof(struct acpi_memory_list));
+	cache = acpi_os_allocate_zeroed(sizeof(struct acpi_memory_list));
 	if (!cache) {
 		return (AE_NO_MEMORY);
 	}
 
-	memset(cache, 0, sizeof(struct acpi_memory_list));
-
 	cache->list_name = list_name;
 	cache->object_size = object_size;
 
diff --git a/drivers/acpi/acpica/utxface.c b/drivers/acpi/acpica/utxface.c
index d9e6aac..ec503c8 100644
--- a/drivers/acpi/acpica/utxface.c
+++ b/drivers/acpi/acpica/utxface.c
@@ -61,7 +61,7 @@
  * DESCRIPTION: Shutdown the ACPICA subsystem and release all resources.
  *
  ******************************************************************************/
-acpi_status __init acpi_terminate(void)
+acpi_status ACPI_INIT_FUNCTION acpi_terminate(void)
 {
 	acpi_status status;
 
diff --git a/drivers/acpi/acpica/utxfinit.c b/drivers/acpi/acpica/utxfinit.c
index 75b5f27..a5ca0f5 100644
--- a/drivers/acpi/acpica/utxfinit.c
+++ b/drivers/acpi/acpica/utxfinit.c
@@ -69,7 +69,7 @@
  *
  ******************************************************************************/
 
-acpi_status __init acpi_initialize_subsystem(void)
+acpi_status ACPI_INIT_FUNCTION acpi_initialize_subsystem(void)
 {
 	acpi_status status;
 
@@ -141,7 +141,7 @@
  *              Puts system into ACPI mode if it isn't already.
  *
  ******************************************************************************/
-acpi_status __init acpi_enable_subsystem(u32 flags)
+acpi_status ACPI_INIT_FUNCTION acpi_enable_subsystem(u32 flags)
 {
 	acpi_status status = AE_OK;
 
@@ -239,7 +239,7 @@
  *              objects and executing AML code for Regions, buffers, etc.
  *
  ******************************************************************************/
-acpi_status __init acpi_initialize_objects(u32 flags)
+acpi_status ACPI_INIT_FUNCTION acpi_initialize_objects(u32 flags)
 {
 	acpi_status status = AE_OK;
 
@@ -265,7 +265,8 @@
 	 * all of the tables have been loaded. It is a legacy option and is
 	 * not compatible with other ACPI implementations. See acpi_ns_load_table.
 	 */
-	if (acpi_gbl_group_module_level_code) {
+	if (!acpi_gbl_parse_table_as_term_list
+	    && acpi_gbl_group_module_level_code) {
 		acpi_ns_exec_module_code_list();
 
 		/*
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig
new file mode 100644
index 0000000..4616da4
--- /dev/null
+++ b/drivers/acpi/arm64/Kconfig
@@ -0,0 +1,6 @@
+#
+# ACPI Configuration for ARM64
+#
+
+config ACPI_IORT
+	bool
diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile
new file mode 100644
index 0000000..72331f2
--- /dev/null
+++ b/drivers/acpi/arm64/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_ACPI_IORT) 	+= iort.o
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
new file mode 100644
index 0000000..6b81746
--- /dev/null
+++ b/drivers/acpi/arm64/iort.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (C) 2016, Semihalf
+ *	Author: Tomasz Nowicki <tn@semihalf.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * This file implements early detection/parsing of I/O mapping
+ * reported to OS through firmware via I/O Remapping Table (IORT)
+ * IORT document number: ARM DEN 0049A
+ */
+
+#define pr_fmt(fmt)	"ACPI: IORT: " fmt
+
+#include <linux/acpi_iort.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+
+struct iort_its_msi_chip {
+	struct list_head	list;
+	struct fwnode_handle	*fw_node;
+	u32			translation_id;
+};
+
+typedef acpi_status (*iort_find_node_callback)
+	(struct acpi_iort_node *node, void *context);
+
+/* Root pointer to the mapped IORT table */
+static struct acpi_table_header *iort_table;
+
+static LIST_HEAD(iort_msi_chip_list);
+static DEFINE_SPINLOCK(iort_msi_chip_lock);
+
+/**
+ * iort_register_domain_token() - register domain token and related ITS ID
+ * to the list from where we can get it back later on.
+ * @trans_id: ITS ID.
+ * @fw_node: Domain token.
+ *
+ * Returns: 0 on success, -ENOMEM if no memory when allocating list element
+ */
+int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node)
+{
+	struct iort_its_msi_chip *its_msi_chip;
+
+	its_msi_chip = kzalloc(sizeof(*its_msi_chip), GFP_KERNEL);
+	if (!its_msi_chip)
+		return -ENOMEM;
+
+	its_msi_chip->fw_node = fw_node;
+	its_msi_chip->translation_id = trans_id;
+
+	spin_lock(&iort_msi_chip_lock);
+	list_add(&its_msi_chip->list, &iort_msi_chip_list);
+	spin_unlock(&iort_msi_chip_lock);
+
+	return 0;
+}
+
+/**
+ * iort_deregister_domain_token() - Deregister domain token based on ITS ID
+ * @trans_id: ITS ID.
+ *
+ * Returns: none.
+ */
+void iort_deregister_domain_token(int trans_id)
+{
+	struct iort_its_msi_chip *its_msi_chip, *t;
+
+	spin_lock(&iort_msi_chip_lock);
+	list_for_each_entry_safe(its_msi_chip, t, &iort_msi_chip_list, list) {
+		if (its_msi_chip->translation_id == trans_id) {
+			list_del(&its_msi_chip->list);
+			kfree(its_msi_chip);
+			break;
+		}
+	}
+	spin_unlock(&iort_msi_chip_lock);
+}
+
+/**
+ * iort_find_domain_token() - Find domain token based on given ITS ID
+ * @trans_id: ITS ID.
+ *
+ * Returns: domain token when find on the list, NULL otherwise
+ */
+struct fwnode_handle *iort_find_domain_token(int trans_id)
+{
+	struct fwnode_handle *fw_node = NULL;
+	struct iort_its_msi_chip *its_msi_chip;
+
+	spin_lock(&iort_msi_chip_lock);
+	list_for_each_entry(its_msi_chip, &iort_msi_chip_list, list) {
+		if (its_msi_chip->translation_id == trans_id) {
+			fw_node = its_msi_chip->fw_node;
+			break;
+		}
+	}
+	spin_unlock(&iort_msi_chip_lock);
+
+	return fw_node;
+}
+
+static struct acpi_iort_node *iort_scan_node(enum acpi_iort_node_type type,
+					     iort_find_node_callback callback,
+					     void *context)
+{
+	struct acpi_iort_node *iort_node, *iort_end;
+	struct acpi_table_iort *iort;
+	int i;
+
+	if (!iort_table)
+		return NULL;
+
+	/* Get the first IORT node */
+	iort = (struct acpi_table_iort *)iort_table;
+	iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort,
+				 iort->node_offset);
+	iort_end = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+				iort_table->length);
+
+	for (i = 0; i < iort->node_count; i++) {
+		if (WARN_TAINT(iort_node >= iort_end, TAINT_FIRMWARE_WORKAROUND,
+			       "IORT node pointer overflows, bad table!\n"))
+			return NULL;
+
+		if (iort_node->type == type &&
+		    ACPI_SUCCESS(callback(iort_node, context)))
+				return iort_node;
+
+		iort_node = ACPI_ADD_PTR(struct acpi_iort_node, iort_node,
+					 iort_node->length);
+	}
+
+	return NULL;
+}
+
+static acpi_status iort_match_node_callback(struct acpi_iort_node *node,
+					    void *context)
+{
+	struct device *dev = context;
+	acpi_status status;
+
+	if (node->type == ACPI_IORT_NODE_NAMED_COMPONENT) {
+		struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+		struct acpi_device *adev = to_acpi_device_node(dev->fwnode);
+		struct acpi_iort_named_component *ncomp;
+
+		if (!adev) {
+			status = AE_NOT_FOUND;
+			goto out;
+		}
+
+		status = acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &buf);
+		if (ACPI_FAILURE(status)) {
+			dev_warn(dev, "Can't get device full path name\n");
+			goto out;
+		}
+
+		ncomp = (struct acpi_iort_named_component *)node->node_data;
+		status = !strcmp(ncomp->device_name, buf.pointer) ?
+							AE_OK : AE_NOT_FOUND;
+		acpi_os_free(buf.pointer);
+	} else if (node->type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
+		struct acpi_iort_root_complex *pci_rc;
+		struct pci_bus *bus;
+
+		bus = to_pci_bus(dev);
+		pci_rc = (struct acpi_iort_root_complex *)node->node_data;
+
+		/*
+		 * It is assumed that PCI segment numbers maps one-to-one
+		 * with root complexes. Each segment number can represent only
+		 * one root complex.
+		 */
+		status = pci_rc->pci_segment_number == pci_domain_nr(bus) ?
+							AE_OK : AE_NOT_FOUND;
+	} else {
+		status = AE_NOT_FOUND;
+	}
+out:
+	return status;
+}
+
+static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in,
+		       u32 *rid_out)
+{
+	/* Single mapping does not care for input id */
+	if (map->flags & ACPI_IORT_ID_SINGLE_MAPPING) {
+		if (type == ACPI_IORT_NODE_NAMED_COMPONENT ||
+		    type == ACPI_IORT_NODE_PCI_ROOT_COMPLEX) {
+			*rid_out = map->output_base;
+			return 0;
+		}
+
+		pr_warn(FW_BUG "[map %p] SINGLE MAPPING flag not allowed for node type %d, skipping ID map\n",
+			map, type);
+		return -ENXIO;
+	}
+
+	if (rid_in < map->input_base ||
+	    (rid_in >= map->input_base + map->id_count))
+		return -ENXIO;
+
+	*rid_out = map->output_base + (rid_in - map->input_base);
+	return 0;
+}
+
+static struct acpi_iort_node *iort_node_map_rid(struct acpi_iort_node *node,
+						u32 rid_in, u32 *rid_out,
+						u8 type)
+{
+	u32 rid = rid_in;
+
+	/* Parse the ID mapping tree to find specified node type */
+	while (node) {
+		struct acpi_iort_id_mapping *map;
+		int i;
+
+		if (node->type == type) {
+			if (rid_out)
+				*rid_out = rid;
+			return node;
+		}
+
+		if (!node->mapping_offset || !node->mapping_count)
+			goto fail_map;
+
+		map = ACPI_ADD_PTR(struct acpi_iort_id_mapping, node,
+				   node->mapping_offset);
+
+		/* Firmware bug! */
+		if (!map->output_reference) {
+			pr_err(FW_BUG "[node %p type %d] ID map has NULL parent reference\n",
+			       node, node->type);
+			goto fail_map;
+		}
+
+		/* Do the RID translation */
+		for (i = 0; i < node->mapping_count; i++, map++) {
+			if (!iort_id_map(map, node->type, rid, &rid))
+				break;
+		}
+
+		if (i == node->mapping_count)
+			goto fail_map;
+
+		node = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
+				    map->output_reference);
+	}
+
+fail_map:
+	/* Map input RID to output RID unchanged on mapping failure*/
+	if (rid_out)
+		*rid_out = rid_in;
+
+	return NULL;
+}
+
+static struct acpi_iort_node *iort_find_dev_node(struct device *dev)
+{
+	struct pci_bus *pbus;
+
+	if (!dev_is_pci(dev))
+		return iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
+				      iort_match_node_callback, dev);
+
+	/* Find a PCI root bus */
+	pbus = to_pci_dev(dev)->bus;
+	while (!pci_is_root_bus(pbus))
+		pbus = pbus->parent;
+
+	return iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
+			      iort_match_node_callback, &pbus->dev);
+}
+
+/**
+ * iort_msi_map_rid() - Map a MSI requester ID for a device
+ * @dev: The device for which the mapping is to be done.
+ * @req_id: The device requester ID.
+ *
+ * Returns: mapped MSI RID on success, input requester ID otherwise
+ */
+u32 iort_msi_map_rid(struct device *dev, u32 req_id)
+{
+	struct acpi_iort_node *node;
+	u32 dev_id;
+
+	node = iort_find_dev_node(dev);
+	if (!node)
+		return req_id;
+
+	iort_node_map_rid(node, req_id, &dev_id, ACPI_IORT_NODE_ITS_GROUP);
+	return dev_id;
+}
+
+/**
+ * iort_dev_find_its_id() - Find the ITS identifier for a device
+ * @dev: The device.
+ * @idx: Index of the ITS identifier list.
+ * @its_id: ITS identifier.
+ *
+ * Returns: 0 on success, appropriate error value otherwise
+ */
+static int iort_dev_find_its_id(struct device *dev, u32 req_id,
+				unsigned int idx, int *its_id)
+{
+	struct acpi_iort_its_group *its;
+	struct acpi_iort_node *node;
+
+	node = iort_find_dev_node(dev);
+	if (!node)
+		return -ENXIO;
+
+	node = iort_node_map_rid(node, req_id, NULL, ACPI_IORT_NODE_ITS_GROUP);
+	if (!node)
+		return -ENXIO;
+
+	/* Move to ITS specific data */
+	its = (struct acpi_iort_its_group *)node->node_data;
+	if (idx > its->its_count) {
+		dev_err(dev, "requested ITS ID index [%d] is greater than available [%d]\n",
+			idx, its->its_count);
+		return -ENXIO;
+	}
+
+	*its_id = its->identifiers[idx];
+	return 0;
+}
+
+/**
+ * iort_get_device_domain() - Find MSI domain related to a device
+ * @dev: The device.
+ * @req_id: Requester ID for the device.
+ *
+ * Returns: the MSI domain for this device, NULL otherwise
+ */
+struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id)
+{
+	struct fwnode_handle *handle;
+	int its_id;
+
+	if (iort_dev_find_its_id(dev, req_id, 0, &its_id))
+		return NULL;
+
+	handle = iort_find_domain_token(its_id);
+	if (!handle)
+		return NULL;
+
+	return irq_find_matching_fwnode(handle, DOMAIN_BUS_PCI_MSI);
+}
+
+void __init acpi_iort_init(void)
+{
+	acpi_status status;
+
+	status = acpi_get_table(ACPI_SIG_IORT, 0, &iort_table);
+	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+		const char *msg = acpi_format_exception(status);
+		pr_err("Failed to get table, %s\n", msg);
+	}
+}
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index ab23479..93ecae5 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -733,15 +733,17 @@
 			return result;
 		acpi_battery_init_alarm(battery);
 	}
+
+	result = acpi_battery_get_state(battery);
+	if (result)
+		return result;
+	acpi_battery_quirks(battery);
+
 	if (!battery->bat) {
 		result = sysfs_add_battery(battery);
 		if (result)
 			return result;
 	}
-	result = acpi_battery_get_state(battery);
-	if (result)
-		return result;
-	acpi_battery_quirks(battery);
 
 	/*
 	 * Wakeup the system if battery is critical low
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 85b7d07..56190d0 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -36,6 +36,7 @@
 #ifdef CONFIG_X86
 #include <asm/mpspec.h>
 #endif
+#include <linux/acpi_iort.h>
 #include <linux/pci.h>
 #include <acpi/apei.h>
 #include <linux/dmi.h>
@@ -985,7 +986,8 @@
 		goto error0;
 	}
 
-	if (acpi_gbl_group_module_level_code) {
+	if (!acpi_gbl_parse_table_as_term_list &&
+	    acpi_gbl_group_module_level_code) {
 		status = acpi_load_tables();
 		if (ACPI_FAILURE(status)) {
 			printk(KERN_ERR PREFIX
@@ -1074,7 +1076,8 @@
 	status = acpi_ec_ecdt_probe();
 	/* Ignore result. Not having an ECDT is not fatal. */
 
-	if (!acpi_gbl_group_module_level_code) {
+	if (acpi_gbl_parse_table_as_term_list ||
+	    !acpi_gbl_group_module_level_code) {
 		status = acpi_load_tables();
 		if (ACPI_FAILURE(status)) {
 			printk(KERN_ERR PREFIX
@@ -1186,6 +1189,7 @@
 	}
 
 	pci_mmcfg_late_init();
+	acpi_iort_init();
 	acpi_scan_init();
 	acpi_ec_init();
 	acpi_debugfs_init();
@@ -1193,6 +1197,7 @@
 	acpi_wakeup_device_init();
 	acpi_debugger_init();
 	acpi_setup_sb_notify_handler();
+	acpi_set_processor_mapping();
 	return 0;
 }
 
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 31abb0b..e19f530 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -19,6 +19,8 @@
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  */
 
+#define pr_fmt(fmt) "ACPI : button: " fmt
+
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -104,6 +106,8 @@
 	struct input_dev *input;
 	char phys[32];			/* for input device */
 	unsigned long pushed;
+	int last_state;
+	ktime_t last_time;
 	bool suspended;
 };
 
@@ -111,6 +115,10 @@
 static struct acpi_device *lid_device;
 static u8 lid_init_state = ACPI_BUTTON_LID_INIT_METHOD;
 
+static unsigned long lid_report_interval __read_mostly = 500;
+module_param(lid_report_interval, ulong, 0644);
+MODULE_PARM_DESC(lid_report_interval, "Interval (ms) between lid key events");
+
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
@@ -134,10 +142,79 @@
 {
 	struct acpi_button *button = acpi_driver_data(device);
 	int ret;
+	ktime_t next_report;
+	bool do_update;
 
-	/* input layer checks if event is redundant */
-	input_report_switch(button->input, SW_LID, !state);
-	input_sync(button->input);
+	/*
+	 * In lid_init_state=ignore mode, if user opens/closes lid
+	 * frequently with "open" missing, and "last_time" is also updated
+	 * frequently, "close" cannot be delivered to the userspace.
+	 * So "last_time" is only updated after a timeout or an actual
+	 * switch.
+	 */
+	if (lid_init_state != ACPI_BUTTON_LID_INIT_IGNORE ||
+	    button->last_state != !!state)
+		do_update = true;
+	else
+		do_update = false;
+
+	next_report = ktime_add(button->last_time,
+				ms_to_ktime(lid_report_interval));
+	if (button->last_state == !!state &&
+	    ktime_after(ktime_get(), next_report)) {
+		/* Complain the buggy firmware */
+		pr_warn_once("The lid device is not compliant to SW_LID.\n");
+
+		/*
+		 * Send the unreliable complement switch event:
+		 *
+		 * On most platforms, the lid device is reliable. However
+		 * there are exceptions:
+		 * 1. Platforms returning initial lid state as "close" by
+		 *    default after booting/resuming:
+		 *     https://bugzilla.kernel.org/show_bug.cgi?id=89211
+		 *     https://bugzilla.kernel.org/show_bug.cgi?id=106151
+		 * 2. Platforms never reporting "open" events:
+		 *     https://bugzilla.kernel.org/show_bug.cgi?id=106941
+		 * On these buggy platforms, the usage model of the ACPI
+		 * lid device actually is:
+		 * 1. The initial returning value of _LID may not be
+		 *    reliable.
+		 * 2. The open event may not be reliable.
+		 * 3. The close event is reliable.
+		 *
+		 * But SW_LID is typed as input switch event, the input
+		 * layer checks if the event is redundant. Hence if the
+		 * state is not switched, the userspace cannot see this
+		 * platform triggered reliable event. By inserting a
+		 * complement switch event, it then is guaranteed that the
+		 * platform triggered reliable one can always be seen by
+		 * the userspace.
+		 */
+		if (lid_init_state == ACPI_BUTTON_LID_INIT_IGNORE) {
+			do_update = true;
+			/*
+			 * Do generate complement switch event for "close"
+			 * as "close" is reliable and wrong "open" won't
+			 * trigger unexpected behaviors.
+			 * Do not generate complement switch event for
+			 * "open" as "open" is not reliable and wrong
+			 * "close" will trigger unexpected behaviors.
+			 */
+			if (!state) {
+				input_report_switch(button->input,
+						    SW_LID, state);
+				input_sync(button->input);
+			}
+		}
+	}
+	/* Send the platform triggered reliable event */
+	if (do_update) {
+		input_report_switch(button->input, SW_LID, !state);
+		input_sync(button->input);
+		button->last_state = !!state;
+		button->last_time = ktime_get();
+	}
 
 	if (state)
 		pm_wakeup_event(&device->dev, 0);
@@ -411,6 +488,8 @@
 		strcpy(name, ACPI_BUTTON_DEVICE_NAME_LID);
 		sprintf(class, "%s/%s",
 			ACPI_BUTTON_CLASS, ACPI_BUTTON_SUBCLASS_LID);
+		button->last_state = !!acpi_lid_evaluate_state(device);
+		button->last_time = ktime_get();
 	} else {
 		printk(KERN_ERR PREFIX "Unsupported hid [%s]\n", hid);
 		error = -ENODEV;
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 2e98173..d0d0504 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -40,15 +40,48 @@
 #include <linux/cpufreq.h>
 #include <linux/delay.h>
 #include <linux/ktime.h>
+#include <linux/rwsem.h>
+#include <linux/wait.h>
 
 #include <acpi/cppc_acpi.h>
-/*
- * Lock to provide mutually exclusive access to the PCC
- * channel. e.g. When the remote updates the shared region
- * with new data, the reader needs to be protected from
- * other CPUs activity on the same channel.
- */
-static DEFINE_SPINLOCK(pcc_lock);
+
+struct cppc_pcc_data {
+	struct mbox_chan *pcc_channel;
+	void __iomem *pcc_comm_addr;
+	int pcc_subspace_idx;
+	bool pcc_channel_acquired;
+	ktime_t deadline;
+	unsigned int pcc_mpar, pcc_mrtt, pcc_nominal;
+
+	bool pending_pcc_write_cmd;	/* Any pending/batched PCC write cmds? */
+	bool platform_owns_pcc;		/* Ownership of PCC subspace */
+	unsigned int pcc_write_cnt;	/* Running count of PCC write commands */
+
+	/*
+	 * Lock to provide controlled access to the PCC channel.
+	 *
+	 * For performance critical usecases(currently cppc_set_perf)
+	 *	We need to take read_lock and check if channel belongs to OSPM
+	 * before reading or writing to PCC subspace
+	 *	We need to take write_lock before transferring the channel
+	 * ownership to the platform via a Doorbell
+	 *	This allows us to batch a number of CPPC requests if they happen
+	 * to originate in about the same time
+	 *
+	 * For non-performance critical usecases(init)
+	 *	Take write_lock for all purposes which gives exclusive access
+	 */
+	struct rw_semaphore pcc_lock;
+
+	/* Wait queue for CPUs whose requests were batched */
+	wait_queue_head_t pcc_write_wait_q;
+};
+
+/* Structure to represent the single PCC channel */
+static struct cppc_pcc_data pcc_data = {
+	.pcc_subspace_idx = -1,
+	.platform_owns_pcc = true,
+};
 
 /*
  * The cpc_desc structure contains the ACPI register details
@@ -59,18 +92,25 @@
  */
 static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
 
-/* This layer handles all the PCC specifics for CPPC. */
-static struct mbox_chan *pcc_channel;
-static void __iomem *pcc_comm_addr;
-static u64 comm_base_addr;
-static int pcc_subspace_idx = -1;
-static bool pcc_channel_acquired;
-static ktime_t deadline;
-static unsigned int pcc_mpar, pcc_mrtt;
-
 /* pcc mapped address + header size + offset within PCC subspace */
-#define GET_PCC_VADDR(offs) (pcc_comm_addr + 0x8 + (offs))
+#define GET_PCC_VADDR(offs) (pcc_data.pcc_comm_addr + 0x8 + (offs))
 
+/* Check if a CPC regsiter is in PCC */
+#define CPC_IN_PCC(cpc) ((cpc)->type == ACPI_TYPE_BUFFER &&		\
+				(cpc)->cpc_entry.reg.space_id ==	\
+				ACPI_ADR_SPACE_PLATFORM_COMM)
+
+/* Evalutes to True if reg is a NULL register descriptor */
+#define IS_NULL_REG(reg) ((reg)->space_id ==  ACPI_ADR_SPACE_SYSTEM_MEMORY && \
+				(reg)->address == 0 &&			\
+				(reg)->bit_width == 0 &&		\
+				(reg)->bit_offset == 0 &&		\
+				(reg)->access_width == 0)
+
+/* Evalutes to True if an optional cpc field is supported */
+#define CPC_SUPPORTED(cpc) ((cpc)->type == ACPI_TYPE_INTEGER ?		\
+				!!(cpc)->cpc_entry.int_value :		\
+				!IS_NULL_REG(&(cpc)->cpc_entry.reg))
 /*
  * Arbitrary Retries in case the remote processor is slow to respond
  * to PCC commands. Keeping it high enough to cover emulators where
@@ -78,11 +118,79 @@
  */
 #define NUM_RETRIES 500
 
-static int check_pcc_chan(void)
+struct cppc_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *kobj,
+			struct attribute *attr, char *buf);
+	ssize_t (*store)(struct kobject *kobj,
+			struct attribute *attr, const char *c, ssize_t count);
+};
+
+#define define_one_cppc_ro(_name)		\
+static struct cppc_attr _name =			\
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define to_cpc_desc(a) container_of(a, struct cpc_desc, kobj)
+
+static ssize_t show_feedback_ctrs(struct kobject *kobj,
+		struct attribute *attr, char *buf)
 {
-	int ret = -EIO;
-	struct acpi_pcct_shared_memory __iomem *generic_comm_base = pcc_comm_addr;
-	ktime_t next_deadline = ktime_add(ktime_get(), deadline);
+	struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
+	struct cppc_perf_fb_ctrs fb_ctrs = {0};
+
+	cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
+
+	return scnprintf(buf, PAGE_SIZE, "ref:%llu del:%llu\n",
+			fb_ctrs.reference, fb_ctrs.delivered);
+}
+define_one_cppc_ro(feedback_ctrs);
+
+static ssize_t show_reference_perf(struct kobject *kobj,
+		struct attribute *attr, char *buf)
+{
+	struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
+	struct cppc_perf_fb_ctrs fb_ctrs = {0};
+
+	cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n",
+			fb_ctrs.reference_perf);
+}
+define_one_cppc_ro(reference_perf);
+
+static ssize_t show_wraparound_time(struct kobject *kobj,
+				struct attribute *attr, char *buf)
+{
+	struct cpc_desc *cpc_ptr = to_cpc_desc(kobj);
+	struct cppc_perf_fb_ctrs fb_ctrs = {0};
+
+	cppc_get_perf_ctrs(cpc_ptr->cpu_id, &fb_ctrs);
+
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", fb_ctrs.ctr_wrap_time);
+
+}
+define_one_cppc_ro(wraparound_time);
+
+static struct attribute *cppc_attrs[] = {
+	&feedback_ctrs.attr,
+	&reference_perf.attr,
+	&wraparound_time.attr,
+	NULL
+};
+
+static struct kobj_type cppc_ktype = {
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_attrs = cppc_attrs,
+};
+
+static int check_pcc_chan(bool chk_err_bit)
+{
+	int ret = -EIO, status = 0;
+	struct acpi_pcct_shared_memory __iomem *generic_comm_base = pcc_data.pcc_comm_addr;
+	ktime_t next_deadline = ktime_add(ktime_get(), pcc_data.deadline);
+
+	if (!pcc_data.platform_owns_pcc)
+		return 0;
 
 	/* Retry in case the remote processor was too slow to catch up. */
 	while (!ktime_after(ktime_get(), next_deadline)) {
@@ -91,8 +199,11 @@
 		 * platform and should have set the command completion bit when
 		 * PCC can be used by OSPM
 		 */
-		if (readw_relaxed(&generic_comm_base->status) & PCC_CMD_COMPLETE) {
+		status = readw_relaxed(&generic_comm_base->status);
+		if (status & PCC_CMD_COMPLETE_MASK) {
 			ret = 0;
+			if (chk_err_bit && (status & PCC_ERROR_MASK))
+				ret = -EIO;
 			break;
 		}
 		/*
@@ -102,14 +213,23 @@
 		udelay(3);
 	}
 
+	if (likely(!ret))
+		pcc_data.platform_owns_pcc = false;
+	else
+		pr_err("PCC check channel failed. Status=%x\n", status);
+
 	return ret;
 }
 
+/*
+ * This function transfers the ownership of the PCC to the platform
+ * So it must be called while holding write_lock(pcc_lock)
+ */
 static int send_pcc_cmd(u16 cmd)
 {
-	int ret = -EIO;
+	int ret = -EIO, i;
 	struct acpi_pcct_shared_memory *generic_comm_base =
-		(struct acpi_pcct_shared_memory *) pcc_comm_addr;
+		(struct acpi_pcct_shared_memory *) pcc_data.pcc_comm_addr;
 	static ktime_t last_cmd_cmpl_time, last_mpar_reset;
 	static int mpar_count;
 	unsigned int time_delta;
@@ -119,20 +239,29 @@
 	 * the channel before writing to PCC space
 	 */
 	if (cmd == CMD_READ) {
-		ret = check_pcc_chan();
+		/*
+		 * If there are pending cpc_writes, then we stole the channel
+		 * before write completion, so first send a WRITE command to
+		 * platform
+		 */
+		if (pcc_data.pending_pcc_write_cmd)
+			send_pcc_cmd(CMD_WRITE);
+
+		ret = check_pcc_chan(false);
 		if (ret)
-			return ret;
-	}
+			goto end;
+	} else /* CMD_WRITE */
+		pcc_data.pending_pcc_write_cmd = FALSE;
 
 	/*
 	 * Handle the Minimum Request Turnaround Time(MRTT)
 	 * "The minimum amount of time that OSPM must wait after the completion
 	 * of a command before issuing the next command, in microseconds"
 	 */
-	if (pcc_mrtt) {
+	if (pcc_data.pcc_mrtt) {
 		time_delta = ktime_us_delta(ktime_get(), last_cmd_cmpl_time);
-		if (pcc_mrtt > time_delta)
-			udelay(pcc_mrtt - time_delta);
+		if (pcc_data.pcc_mrtt > time_delta)
+			udelay(pcc_data.pcc_mrtt - time_delta);
 	}
 
 	/*
@@ -146,15 +275,16 @@
 	 * not send the request to the platform after hitting the MPAR limit in
 	 * any 60s window
 	 */
-	if (pcc_mpar) {
+	if (pcc_data.pcc_mpar) {
 		if (mpar_count == 0) {
 			time_delta = ktime_ms_delta(ktime_get(), last_mpar_reset);
 			if (time_delta < 60 * MSEC_PER_SEC) {
 				pr_debug("PCC cmd not sent due to MPAR limit");
-				return -EIO;
+				ret = -EIO;
+				goto end;
 			}
 			last_mpar_reset = ktime_get();
-			mpar_count = pcc_mpar;
+			mpar_count = pcc_data.pcc_mpar;
 		}
 		mpar_count--;
 	}
@@ -165,33 +295,43 @@
 	/* Flip CMD COMPLETE bit */
 	writew_relaxed(0, &generic_comm_base->status);
 
+	pcc_data.platform_owns_pcc = true;
+
 	/* Ring doorbell */
-	ret = mbox_send_message(pcc_channel, &cmd);
+	ret = mbox_send_message(pcc_data.pcc_channel, &cmd);
 	if (ret < 0) {
 		pr_err("Err sending PCC mbox message. cmd:%d, ret:%d\n",
 				cmd, ret);
-		return ret;
+		goto end;
 	}
 
-	/*
-	 * For READs we need to ensure the cmd completed to ensure
-	 * the ensuing read()s can proceed. For WRITEs we dont care
-	 * because the actual write()s are done before coming here
-	 * and the next READ or WRITE will check if the channel
-	 * is busy/free at the entry of this call.
-	 *
-	 * If Minimum Request Turnaround Time is non-zero, we need
-	 * to record the completion time of both READ and WRITE
-	 * command for proper handling of MRTT, so we need to check
-	 * for pcc_mrtt in addition to CMD_READ
-	 */
-	if (cmd == CMD_READ || pcc_mrtt) {
-		ret = check_pcc_chan();
-		if (pcc_mrtt)
-			last_cmd_cmpl_time = ktime_get();
+	/* wait for completion and check for PCC errro bit */
+	ret = check_pcc_chan(true);
+
+	if (pcc_data.pcc_mrtt)
+		last_cmd_cmpl_time = ktime_get();
+
+	if (pcc_data.pcc_channel->mbox->txdone_irq)
+		mbox_chan_txdone(pcc_data.pcc_channel, ret);
+	else
+		mbox_client_txdone(pcc_data.pcc_channel, ret);
+
+end:
+	if (cmd == CMD_WRITE) {
+		if (unlikely(ret)) {
+			for_each_possible_cpu(i) {
+				struct cpc_desc *desc = per_cpu(cpc_desc_ptr, i);
+				if (!desc)
+					continue;
+
+				if (desc->write_cmd_id == pcc_data.pcc_write_cnt)
+					desc->write_cmd_status = ret;
+			}
+		}
+		pcc_data.pcc_write_cnt++;
+		wake_up_all(&pcc_data.pcc_write_wait_q);
 	}
 
-	mbox_client_txdone(pcc_channel, ret);
 	return ret;
 }
 
@@ -272,13 +412,13 @@
  *
  *	Return: 0 for success or negative value for err.
  */
-int acpi_get_psd_map(struct cpudata **all_cpu_data)
+int acpi_get_psd_map(struct cppc_cpudata **all_cpu_data)
 {
 	int count_target;
 	int retval = 0;
 	unsigned int i, j;
 	cpumask_var_t covered_cpus;
-	struct cpudata *pr, *match_pr;
+	struct cppc_cpudata *pr, *match_pr;
 	struct acpi_psd_package *pdomain;
 	struct acpi_psd_package *match_pdomain;
 	struct cpc_desc *cpc_ptr, *match_cpc_ptr;
@@ -394,14 +534,13 @@
 static int register_pcc_channel(int pcc_subspace_idx)
 {
 	struct acpi_pcct_hw_reduced *cppc_ss;
-	unsigned int len;
 	u64 usecs_lat;
 
 	if (pcc_subspace_idx >= 0) {
-		pcc_channel = pcc_mbox_request_channel(&cppc_mbox_cl,
+		pcc_data.pcc_channel = pcc_mbox_request_channel(&cppc_mbox_cl,
 				pcc_subspace_idx);
 
-		if (IS_ERR(pcc_channel)) {
+		if (IS_ERR(pcc_data.pcc_channel)) {
 			pr_err("Failed to find PCC communication channel\n");
 			return -ENODEV;
 		}
@@ -412,7 +551,7 @@
 		 * PCC channels) and stored pointers to the
 		 * subspace communication region in con_priv.
 		 */
-		cppc_ss = pcc_channel->con_priv;
+		cppc_ss = (pcc_data.pcc_channel)->con_priv;
 
 		if (!cppc_ss) {
 			pr_err("No PCC subspace found for CPPC\n");
@@ -420,35 +559,42 @@
 		}
 
 		/*
-		 * This is the shared communication region
-		 * for the OS and Platform to communicate over.
-		 */
-		comm_base_addr = cppc_ss->base_address;
-		len = cppc_ss->length;
-
-		/*
 		 * cppc_ss->latency is just a Nominal value. In reality
 		 * the remote processor could be much slower to reply.
 		 * So add an arbitrary amount of wait on top of Nominal.
 		 */
 		usecs_lat = NUM_RETRIES * cppc_ss->latency;
-		deadline = ns_to_ktime(usecs_lat * NSEC_PER_USEC);
-		pcc_mrtt = cppc_ss->min_turnaround_time;
-		pcc_mpar = cppc_ss->max_access_rate;
+		pcc_data.deadline = ns_to_ktime(usecs_lat * NSEC_PER_USEC);
+		pcc_data.pcc_mrtt = cppc_ss->min_turnaround_time;
+		pcc_data.pcc_mpar = cppc_ss->max_access_rate;
+		pcc_data.pcc_nominal = cppc_ss->latency;
 
-		pcc_comm_addr = acpi_os_ioremap(comm_base_addr, len);
-		if (!pcc_comm_addr) {
+		pcc_data.pcc_comm_addr = acpi_os_ioremap(cppc_ss->base_address, cppc_ss->length);
+		if (!pcc_data.pcc_comm_addr) {
 			pr_err("Failed to ioremap PCC comm region mem\n");
 			return -ENOMEM;
 		}
 
 		/* Set flag so that we dont come here for each CPU. */
-		pcc_channel_acquired = true;
+		pcc_data.pcc_channel_acquired = true;
 	}
 
 	return 0;
 }
 
+/**
+ * cpc_ffh_supported() - check if FFH reading supported
+ *
+ * Check if the architecture has support for functional fixed hardware
+ * read/write capability.
+ *
+ * Return: true for supported, false for not supported
+ */
+bool __weak cpc_ffh_supported(void)
+{
+	return false;
+}
+
 /*
  * An example CPC table looks like the following.
  *
@@ -507,6 +653,7 @@
 	union acpi_object *out_obj, *cpc_obj;
 	struct cpc_desc *cpc_ptr;
 	struct cpc_reg *gas_t;
+	struct device *cpu_dev;
 	acpi_handle handle = pr->handle;
 	unsigned int num_ent, i, cpc_rev;
 	acpi_status status;
@@ -545,6 +692,8 @@
 		goto out_free;
 	}
 
+	cpc_ptr->num_entries = num_ent;
+
 	/* Second entry should be revision. */
 	cpc_obj = &out_obj->package.elements[1];
 	if (cpc_obj->type == ACPI_TYPE_INTEGER)	{
@@ -579,16 +728,27 @@
 			 * so extract it only once.
 			 */
 			if (gas_t->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
-				if (pcc_subspace_idx < 0)
-					pcc_subspace_idx = gas_t->access_width;
-				else if (pcc_subspace_idx != gas_t->access_width) {
+				if (pcc_data.pcc_subspace_idx < 0)
+					pcc_data.pcc_subspace_idx = gas_t->access_width;
+				else if (pcc_data.pcc_subspace_idx != gas_t->access_width) {
 					pr_debug("Mismatched PCC ids.\n");
 					goto out_free;
 				}
-			} else if (gas_t->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-				/* Support only PCC and SYS MEM type regs */
-				pr_debug("Unsupported register type: %d\n", gas_t->space_id);
-				goto out_free;
+			} else if (gas_t->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+				if (gas_t->address) {
+					void __iomem *addr;
+
+					addr = ioremap(gas_t->address, gas_t->bit_width/8);
+					if (!addr)
+						goto out_free;
+					cpc_ptr->cpc_regs[i-2].sys_mem_vaddr = addr;
+				}
+			} else {
+				if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
+					/* Support only PCC ,SYS MEM and FFH type regs */
+					pr_debug("Unsupported register type: %d\n", gas_t->space_id);
+					goto out_free;
+				}
 			}
 
 			cpc_ptr->cpc_regs[i-2].type = ACPI_TYPE_BUFFER;
@@ -607,10 +767,13 @@
 		goto out_free;
 
 	/* Register PCC channel once for all CPUs. */
-	if (!pcc_channel_acquired) {
-		ret = register_pcc_channel(pcc_subspace_idx);
+	if (!pcc_data.pcc_channel_acquired) {
+		ret = register_pcc_channel(pcc_data.pcc_subspace_idx);
 		if (ret)
 			goto out_free;
+
+		init_rwsem(&pcc_data.pcc_lock);
+		init_waitqueue_head(&pcc_data.pcc_write_wait_q);
 	}
 
 	/* Plug PSD data into this CPUs CPC descriptor. */
@@ -619,10 +782,27 @@
 	/* Everything looks okay */
 	pr_debug("Parsed CPC struct for CPU: %d\n", pr->id);
 
+	/* Add per logical CPU nodes for reading its feedback counters. */
+	cpu_dev = get_cpu_device(pr->id);
+	if (!cpu_dev)
+		goto out_free;
+
+	ret = kobject_init_and_add(&cpc_ptr->kobj, &cppc_ktype, &cpu_dev->kobj,
+			"acpi_cppc");
+	if (ret)
+		goto out_free;
+
 	kfree(output.pointer);
 	return 0;
 
 out_free:
+	/* Free all the mapped sys mem areas for this CPU */
+	for (i = 2; i < cpc_ptr->num_entries; i++) {
+		void __iomem *addr = cpc_ptr->cpc_regs[i-2].sys_mem_vaddr;
+
+		if (addr)
+			iounmap(addr);
+	}
 	kfree(cpc_ptr);
 
 out_buf_free:
@@ -640,26 +820,82 @@
 void acpi_cppc_processor_exit(struct acpi_processor *pr)
 {
 	struct cpc_desc *cpc_ptr;
+	unsigned int i;
+	void __iomem *addr;
+
 	cpc_ptr = per_cpu(cpc_desc_ptr, pr->id);
+
+	/* Free all the mapped sys mem areas for this CPU */
+	for (i = 2; i < cpc_ptr->num_entries; i++) {
+		addr = cpc_ptr->cpc_regs[i-2].sys_mem_vaddr;
+		if (addr)
+			iounmap(addr);
+	}
+
+	kobject_put(&cpc_ptr->kobj);
 	kfree(cpc_ptr);
 }
 EXPORT_SYMBOL_GPL(acpi_cppc_processor_exit);
 
+/**
+ * cpc_read_ffh() - Read FFH register
+ * @cpunum:	cpu number to read
+ * @reg:	cppc register information
+ * @val:	place holder for return value
+ *
+ * Read bit_width bits from a specified address and bit_offset
+ *
+ * Return: 0 for success and error code
+ */
+int __weak cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val)
+{
+	return -ENOTSUPP;
+}
+
+/**
+ * cpc_write_ffh() - Write FFH register
+ * @cpunum:	cpu number to write
+ * @reg:	cppc register information
+ * @val:	value to write
+ *
+ * Write value of bit_width bits to a specified address and bit_offset
+ *
+ * Return: 0 for success and error code
+ */
+int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
+{
+	return -ENOTSUPP;
+}
+
 /*
  * Since cpc_read and cpc_write are called while holding pcc_lock, it should be
  * as fast as possible. We have already mapped the PCC subspace during init, so
  * we can directly write to it.
  */
 
-static int cpc_read(struct cpc_reg *reg, u64 *val)
+static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val)
 {
 	int ret_val = 0;
+	void __iomem *vaddr = 0;
+	struct cpc_reg *reg = &reg_res->cpc_entry.reg;
+
+	if (reg_res->type == ACPI_TYPE_INTEGER) {
+		*val = reg_res->cpc_entry.int_value;
+		return ret_val;
+	}
 
 	*val = 0;
-	if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
-		void __iomem *vaddr = GET_PCC_VADDR(reg->address);
+	if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM)
+		vaddr = GET_PCC_VADDR(reg->address);
+	else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		vaddr = reg_res->sys_mem_vaddr;
+	else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE)
+		return cpc_read_ffh(cpu, reg, val);
+	else
+		return acpi_os_read_memory((acpi_physical_address)reg->address,
+				val, reg->bit_width);
 
-		switch (reg->bit_width) {
+	switch (reg->bit_width) {
 		case 8:
 			*val = readb_relaxed(vaddr);
 			break;
@@ -674,23 +910,30 @@
 			break;
 		default:
 			pr_debug("Error: Cannot read %u bit width from PCC\n",
-				reg->bit_width);
+					reg->bit_width);
 			ret_val = -EFAULT;
-		}
-	} else
-		ret_val = acpi_os_read_memory((acpi_physical_address)reg->address,
-					val, reg->bit_width);
+	}
+
 	return ret_val;
 }
 
-static int cpc_write(struct cpc_reg *reg, u64 val)
+static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
 {
 	int ret_val = 0;
+	void __iomem *vaddr = 0;
+	struct cpc_reg *reg = &reg_res->cpc_entry.reg;
 
-	if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
-		void __iomem *vaddr = GET_PCC_VADDR(reg->address);
+	if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM)
+		vaddr = GET_PCC_VADDR(reg->address);
+	else if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		vaddr = reg_res->sys_mem_vaddr;
+	else if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE)
+		return cpc_write_ffh(cpu, reg, val);
+	else
+		return acpi_os_write_memory((acpi_physical_address)reg->address,
+				val, reg->bit_width);
 
-		switch (reg->bit_width) {
+	switch (reg->bit_width) {
 		case 8:
 			writeb_relaxed(val, vaddr);
 			break;
@@ -705,13 +948,11 @@
 			break;
 		default:
 			pr_debug("Error: Cannot write %u bit width to PCC\n",
-				reg->bit_width);
+					reg->bit_width);
 			ret_val = -EFAULT;
 			break;
-		}
-	} else
-		ret_val = acpi_os_write_memory((acpi_physical_address)reg->address,
-				val, reg->bit_width);
+	}
+
 	return ret_val;
 }
 
@@ -727,8 +968,8 @@
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
 	struct cpc_register_resource *highest_reg, *lowest_reg, *ref_perf,
 								 *nom_perf;
-	u64 high, low, ref, nom;
-	int ret = 0;
+	u64 high, low, nom;
+	int ret = 0, regs_in_pcc = 0;
 
 	if (!cpc_desc) {
 		pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
@@ -740,13 +981,11 @@
 	ref_perf = &cpc_desc->cpc_regs[REFERENCE_PERF];
 	nom_perf = &cpc_desc->cpc_regs[NOMINAL_PERF];
 
-	spin_lock(&pcc_lock);
-
 	/* Are any of the regs PCC ?*/
-	if ((highest_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
-			(lowest_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
-			(ref_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
-			(nom_perf->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) {
+	if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) ||
+		CPC_IN_PCC(ref_perf) || CPC_IN_PCC(nom_perf)) {
+		regs_in_pcc = 1;
+		down_write(&pcc_data.pcc_lock);
 		/* Ring doorbell once to update PCC subspace */
 		if (send_pcc_cmd(CMD_READ) < 0) {
 			ret = -EIO;
@@ -754,26 +993,21 @@
 		}
 	}
 
-	cpc_read(&highest_reg->cpc_entry.reg, &high);
+	cpc_read(cpunum, highest_reg, &high);
 	perf_caps->highest_perf = high;
 
-	cpc_read(&lowest_reg->cpc_entry.reg, &low);
+	cpc_read(cpunum, lowest_reg, &low);
 	perf_caps->lowest_perf = low;
 
-	cpc_read(&ref_perf->cpc_entry.reg, &ref);
-	perf_caps->reference_perf = ref;
-
-	cpc_read(&nom_perf->cpc_entry.reg, &nom);
+	cpc_read(cpunum, nom_perf, &nom);
 	perf_caps->nominal_perf = nom;
 
-	if (!ref)
-		perf_caps->reference_perf = perf_caps->nominal_perf;
-
 	if (!high || !low || !nom)
 		ret = -EFAULT;
 
 out_err:
-	spin_unlock(&pcc_lock);
+	if (regs_in_pcc)
+		up_write(&pcc_data.pcc_lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cppc_get_perf_caps);
@@ -788,9 +1022,10 @@
 int cppc_get_perf_ctrs(int cpunum, struct cppc_perf_fb_ctrs *perf_fb_ctrs)
 {
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
-	struct cpc_register_resource *delivered_reg, *reference_reg;
-	u64 delivered, reference;
-	int ret = 0;
+	struct cpc_register_resource *delivered_reg, *reference_reg,
+		*ref_perf_reg, *ctr_wrap_reg;
+	u64 delivered, reference, ref_perf, ctr_wrap_time;
+	int ret = 0, regs_in_pcc = 0;
 
 	if (!cpc_desc) {
 		pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
@@ -799,12 +1034,21 @@
 
 	delivered_reg = &cpc_desc->cpc_regs[DELIVERED_CTR];
 	reference_reg = &cpc_desc->cpc_regs[REFERENCE_CTR];
+	ref_perf_reg = &cpc_desc->cpc_regs[REFERENCE_PERF];
+	ctr_wrap_reg = &cpc_desc->cpc_regs[CTR_WRAP_TIME];
 
-	spin_lock(&pcc_lock);
+	/*
+	 * If refernce perf register is not supported then we should
+	 * use the nominal perf value
+	 */
+	if (!CPC_SUPPORTED(ref_perf_reg))
+		ref_perf_reg = &cpc_desc->cpc_regs[NOMINAL_PERF];
 
 	/* Are any of the regs PCC ?*/
-	if ((delivered_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) ||
-			(reference_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM)) {
+	if (CPC_IN_PCC(delivered_reg) || CPC_IN_PCC(reference_reg) ||
+		CPC_IN_PCC(ctr_wrap_reg) || CPC_IN_PCC(ref_perf_reg)) {
+		down_write(&pcc_data.pcc_lock);
+		regs_in_pcc = 1;
 		/* Ring doorbell once to update PCC subspace */
 		if (send_pcc_cmd(CMD_READ) < 0) {
 			ret = -EIO;
@@ -812,25 +1056,31 @@
 		}
 	}
 
-	cpc_read(&delivered_reg->cpc_entry.reg, &delivered);
-	cpc_read(&reference_reg->cpc_entry.reg, &reference);
+	cpc_read(cpunum, delivered_reg, &delivered);
+	cpc_read(cpunum, reference_reg, &reference);
+	cpc_read(cpunum, ref_perf_reg, &ref_perf);
 
-	if (!delivered || !reference) {
+	/*
+	 * Per spec, if ctr_wrap_time optional register is unsupported, then the
+	 * performance counters are assumed to never wrap during the lifetime of
+	 * platform
+	 */
+	ctr_wrap_time = (u64)(~((u64)0));
+	if (CPC_SUPPORTED(ctr_wrap_reg))
+		cpc_read(cpunum, ctr_wrap_reg, &ctr_wrap_time);
+
+	if (!delivered || !reference ||	!ref_perf) {
 		ret = -EFAULT;
 		goto out_err;
 	}
 
 	perf_fb_ctrs->delivered = delivered;
 	perf_fb_ctrs->reference = reference;
-
-	perf_fb_ctrs->delivered -= perf_fb_ctrs->prev_delivered;
-	perf_fb_ctrs->reference -= perf_fb_ctrs->prev_reference;
-
-	perf_fb_ctrs->prev_delivered = delivered;
-	perf_fb_ctrs->prev_reference = reference;
-
+	perf_fb_ctrs->reference_perf = ref_perf;
+	perf_fb_ctrs->ctr_wrap_time = ctr_wrap_time;
 out_err:
-	spin_unlock(&pcc_lock);
+	if (regs_in_pcc)
+		up_write(&pcc_data.pcc_lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cppc_get_perf_ctrs);
@@ -855,30 +1105,142 @@
 
 	desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
 
-	spin_lock(&pcc_lock);
-
-	/* If this is PCC reg, check if channel is free before writing */
-	if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
-		ret = check_pcc_chan();
-		if (ret)
-			goto busy_channel;
+	/*
+	 * This is Phase-I where we want to write to CPC registers
+	 * -> We want all CPUs to be able to execute this phase in parallel
+	 *
+	 * Since read_lock can be acquired by multiple CPUs simultaneously we
+	 * achieve that goal here
+	 */
+	if (CPC_IN_PCC(desired_reg)) {
+		down_read(&pcc_data.pcc_lock);	/* BEGIN Phase-I */
+		if (pcc_data.platform_owns_pcc) {
+			ret = check_pcc_chan(false);
+			if (ret) {
+				up_read(&pcc_data.pcc_lock);
+				return ret;
+			}
+		}
+		/*
+		 * Update the pending_write to make sure a PCC CMD_READ will not
+		 * arrive and steal the channel during the switch to write lock
+		 */
+		pcc_data.pending_pcc_write_cmd = true;
+		cpc_desc->write_cmd_id = pcc_data.pcc_write_cnt;
+		cpc_desc->write_cmd_status = 0;
 	}
 
 	/*
 	 * Skip writing MIN/MAX until Linux knows how to come up with
 	 * useful values.
 	 */
-	cpc_write(&desired_reg->cpc_entry.reg, perf_ctrls->desired_perf);
+	cpc_write(cpu, desired_reg, perf_ctrls->desired_perf);
 
-	/* Is this a PCC reg ?*/
-	if (desired_reg->cpc_entry.reg.space_id == ACPI_ADR_SPACE_PLATFORM_COMM) {
-		/* Ring doorbell so Remote can get our perf request. */
-		if (send_pcc_cmd(CMD_WRITE) < 0)
-			ret = -EIO;
+	if (CPC_IN_PCC(desired_reg))
+		up_read(&pcc_data.pcc_lock);	/* END Phase-I */
+	/*
+	 * This is Phase-II where we transfer the ownership of PCC to Platform
+	 *
+	 * Short Summary: Basically if we think of a group of cppc_set_perf
+	 * requests that happened in short overlapping interval. The last CPU to
+	 * come out of Phase-I will enter Phase-II and ring the doorbell.
+	 *
+	 * We have the following requirements for Phase-II:
+	 *     1. We want to execute Phase-II only when there are no CPUs
+	 * currently executing in Phase-I
+	 *     2. Once we start Phase-II we want to avoid all other CPUs from
+	 * entering Phase-I.
+	 *     3. We want only one CPU among all those who went through Phase-I
+	 * to run phase-II
+	 *
+	 * If write_trylock fails to get the lock and doesn't transfer the
+	 * PCC ownership to the platform, then one of the following will be TRUE
+	 *     1. There is at-least one CPU in Phase-I which will later execute
+	 * write_trylock, so the CPUs in Phase-I will be responsible for
+	 * executing the Phase-II.
+	 *     2. Some other CPU has beaten this CPU to successfully execute the
+	 * write_trylock and has already acquired the write_lock. We know for a
+	 * fact it(other CPU acquiring the write_lock) couldn't have happened
+	 * before this CPU's Phase-I as we held the read_lock.
+	 *     3. Some other CPU executing pcc CMD_READ has stolen the
+	 * down_write, in which case, send_pcc_cmd will check for pending
+	 * CMD_WRITE commands by checking the pending_pcc_write_cmd.
+	 * So this CPU can be certain that its request will be delivered
+	 *    So in all cases, this CPU knows that its request will be delivered
+	 * by another CPU and can return
+	 *
+	 * After getting the down_write we still need to check for
+	 * pending_pcc_write_cmd to take care of the following scenario
+	 *    The thread running this code could be scheduled out between
+	 * Phase-I and Phase-II. Before it is scheduled back on, another CPU
+	 * could have delivered the request to Platform by triggering the
+	 * doorbell and transferred the ownership of PCC to platform. So this
+	 * avoids triggering an unnecessary doorbell and more importantly before
+	 * triggering the doorbell it makes sure that the PCC channel ownership
+	 * is still with OSPM.
+	 *   pending_pcc_write_cmd can also be cleared by a different CPU, if
+	 * there was a pcc CMD_READ waiting on down_write and it steals the lock
+	 * before the pcc CMD_WRITE is completed. pcc_send_cmd checks for this
+	 * case during a CMD_READ and if there are pending writes it delivers
+	 * the write command before servicing the read command
+	 */
+	if (CPC_IN_PCC(desired_reg)) {
+		if (down_write_trylock(&pcc_data.pcc_lock)) {	/* BEGIN Phase-II */
+			/* Update only if there are pending write commands */
+			if (pcc_data.pending_pcc_write_cmd)
+				send_pcc_cmd(CMD_WRITE);
+			up_write(&pcc_data.pcc_lock);		/* END Phase-II */
+		} else
+			/* Wait until pcc_write_cnt is updated by send_pcc_cmd */
+			wait_event(pcc_data.pcc_write_wait_q,
+				cpc_desc->write_cmd_id != pcc_data.pcc_write_cnt);
+
+		/* send_pcc_cmd updates the status in case of failure */
+		ret = cpc_desc->write_cmd_status;
 	}
-busy_channel:
-	spin_unlock(&pcc_lock);
-
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cppc_set_perf);
+
+/**
+ * cppc_get_transition_latency - returns frequency transition latency in ns
+ *
+ * ACPI CPPC does not explicitly specifiy how a platform can specify the
+ * transition latency for perfromance change requests. The closest we have
+ * is the timing information from the PCCT tables which provides the info
+ * on the number and frequency of PCC commands the platform can handle.
+ */
+unsigned int cppc_get_transition_latency(int cpu_num)
+{
+	/*
+	 * Expected transition latency is based on the PCCT timing values
+	 * Below are definition from ACPI spec:
+	 * pcc_nominal- Expected latency to process a command, in microseconds
+	 * pcc_mpar   - The maximum number of periodic requests that the subspace
+	 *              channel can support, reported in commands per minute. 0
+	 *              indicates no limitation.
+	 * pcc_mrtt   - The minimum amount of time that OSPM must wait after the
+	 *              completion of a command before issuing the next command,
+	 *              in microseconds.
+	 */
+	unsigned int latency_ns = 0;
+	struct cpc_desc *cpc_desc;
+	struct cpc_register_resource *desired_reg;
+
+	cpc_desc = per_cpu(cpc_desc_ptr, cpu_num);
+	if (!cpc_desc)
+		return CPUFREQ_ETERNAL;
+
+	desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+	if (!CPC_IN_PCC(desired_reg))
+		return CPUFREQ_ETERNAL;
+
+	if (pcc_data.pcc_mpar)
+		latency_ns = 60 * (1000 * 1000 * 1000 / pcc_data.pcc_mpar);
+
+	latency_ns = max(latency_ns, pcc_data.pcc_nominal * 1000);
+	latency_ns = max(latency_ns, pcc_data.pcc_mrtt * 1000);
+
+	return latency_ns;
+}
+EXPORT_SYMBOL_GPL(cppc_get_transition_latency);
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index e7bd57c..6805310 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -104,10 +104,12 @@
 #define ACPI_EC_MAX_QUERIES	16	/* Maximum number of parallel queries */
 
 enum {
+	EC_FLAGS_QUERY_ENABLED,		/* Query is enabled */
 	EC_FLAGS_QUERY_PENDING,		/* Query is pending */
 	EC_FLAGS_QUERY_GUARDING,	/* Guard for SCI_EVT check */
 	EC_FLAGS_GPE_HANDLER_INSTALLED,	/* GPE handler installed */
 	EC_FLAGS_EC_HANDLER_INSTALLED,	/* OpReg handler installed */
+	EC_FLAGS_EVT_HANDLER_INSTALLED, /* _Qxx handlers installed */
 	EC_FLAGS_STARTED,		/* Driver is started */
 	EC_FLAGS_STOPPED,		/* Driver is stopped */
 	EC_FLAGS_COMMAND_STORM,		/* GPE storms occurred to the
@@ -145,6 +147,10 @@
 module_param(ec_storm_threshold, uint, 0644);
 MODULE_PARM_DESC(ec_storm_threshold, "Maxim false GPE numbers not considered as GPE storm");
 
+static bool ec_freeze_events __read_mostly = true;
+module_param(ec_freeze_events, bool, 0644);
+MODULE_PARM_DESC(ec_freeze_events, "Disabling event handling during suspend/resume");
+
 struct acpi_ec_query_handler {
 	struct list_head node;
 	acpi_ec_query_func func;
@@ -179,6 +185,7 @@
 
 struct acpi_ec *boot_ec, *first_ec;
 EXPORT_SYMBOL(first_ec);
+static bool boot_ec_is_ecdt = false;
 static struct workqueue_struct *ec_query_wq;
 
 static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */
@@ -239,6 +246,30 @@
 	       !test_bit(EC_FLAGS_STOPPED, &ec->flags);
 }
 
+static bool acpi_ec_event_enabled(struct acpi_ec *ec)
+{
+	/*
+	 * There is an OSPM early stage logic. During the early stages
+	 * (boot/resume), OSPMs shouldn't enable the event handling, only
+	 * the EC transactions are allowed to be performed.
+	 */
+	if (!test_bit(EC_FLAGS_QUERY_ENABLED, &ec->flags))
+		return false;
+	/*
+	 * However, disabling the event handling is experimental for late
+	 * stage (suspend), and is controlled by the boot parameter of
+	 * "ec_freeze_events":
+	 * 1. true:  The EC event handling is disabled before entering
+	 *           the noirq stage.
+	 * 2. false: The EC event handling is automatically disabled as
+	 *           soon as the EC driver is stopped.
+	 */
+	if (ec_freeze_events)
+		return acpi_ec_started(ec);
+	else
+		return test_bit(EC_FLAGS_STARTED, &ec->flags);
+}
+
 static bool acpi_ec_flushed(struct acpi_ec *ec)
 {
 	return ec->reference_count == 1;
@@ -429,7 +460,8 @@
 
 static void acpi_ec_submit_query(struct acpi_ec *ec)
 {
-	if (!test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) {
+	if (acpi_ec_event_enabled(ec) &&
+	    !test_and_set_bit(EC_FLAGS_QUERY_PENDING, &ec->flags)) {
 		ec_dbg_evt("Command(%s) submitted/blocked",
 			   acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY));
 		ec->nr_pending_queries++;
@@ -446,6 +478,86 @@
 	}
 }
 
+static inline void __acpi_ec_enable_event(struct acpi_ec *ec)
+{
+	if (!test_and_set_bit(EC_FLAGS_QUERY_ENABLED, &ec->flags))
+		ec_log_drv("event unblocked");
+	if (!test_bit(EC_FLAGS_QUERY_PENDING, &ec->flags))
+		advance_transaction(ec);
+}
+
+static inline void __acpi_ec_disable_event(struct acpi_ec *ec)
+{
+	if (test_and_clear_bit(EC_FLAGS_QUERY_ENABLED, &ec->flags))
+		ec_log_drv("event blocked");
+}
+
+/*
+ * Process _Q events that might have accumulated in the EC.
+ * Run with locked ec mutex.
+ */
+static void acpi_ec_clear(struct acpi_ec *ec)
+{
+	int i, status;
+	u8 value = 0;
+
+	for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) {
+		status = acpi_ec_query(ec, &value);
+		if (status || !value)
+			break;
+	}
+	if (unlikely(i == ACPI_EC_CLEAR_MAX))
+		pr_warn("Warning: Maximum of %d stale EC events cleared\n", i);
+	else
+		pr_info("%d stale EC events cleared\n", i);
+}
+
+static void acpi_ec_enable_event(struct acpi_ec *ec)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ec->lock, flags);
+	if (acpi_ec_started(ec))
+		__acpi_ec_enable_event(ec);
+	spin_unlock_irqrestore(&ec->lock, flags);
+
+	/* Drain additional events if hardware requires that */
+	if (EC_FLAGS_CLEAR_ON_RESUME)
+		acpi_ec_clear(ec);
+}
+
+static bool acpi_ec_query_flushed(struct acpi_ec *ec)
+{
+	bool flushed;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ec->lock, flags);
+	flushed = !ec->nr_pending_queries;
+	spin_unlock_irqrestore(&ec->lock, flags);
+	return flushed;
+}
+
+static void __acpi_ec_flush_event(struct acpi_ec *ec)
+{
+	/*
+	 * When ec_freeze_events is true, we need to flush events in
+	 * the proper position before entering the noirq stage.
+	 */
+	wait_event(ec->wait, acpi_ec_query_flushed(ec));
+	if (ec_query_wq)
+		flush_workqueue(ec_query_wq);
+}
+
+static void acpi_ec_disable_event(struct acpi_ec *ec)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ec->lock, flags);
+	__acpi_ec_disable_event(ec);
+	spin_unlock_irqrestore(&ec->lock, flags);
+	__acpi_ec_flush_event(ec);
+}
+
 static bool acpi_ec_guard_event(struct acpi_ec *ec)
 {
 	bool guarded = true;
@@ -832,27 +944,6 @@
 }
 EXPORT_SYMBOL(ec_get_handle);
 
-/*
- * Process _Q events that might have accumulated in the EC.
- * Run with locked ec mutex.
- */
-static void acpi_ec_clear(struct acpi_ec *ec)
-{
-	int i, status;
-	u8 value = 0;
-
-	for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) {
-		status = acpi_ec_query(ec, &value);
-		if (status || !value)
-			break;
-	}
-
-	if (unlikely(i == ACPI_EC_CLEAR_MAX))
-		pr_warn("Warning: Maximum of %d stale EC events cleared\n", i);
-	else
-		pr_info("%d stale EC events cleared\n", i);
-}
-
 static void acpi_ec_start(struct acpi_ec *ec, bool resuming)
 {
 	unsigned long flags;
@@ -896,7 +987,8 @@
 		if (!suspending) {
 			acpi_ec_complete_request(ec);
 			ec_dbg_ref(ec, "Decrease driver");
-		}
+		} else if (!ec_freeze_events)
+			__acpi_ec_disable_event(ec);
 		clear_bit(EC_FLAGS_STARTED, &ec->flags);
 		clear_bit(EC_FLAGS_STOPPED, &ec->flags);
 		ec_log_drv("EC stopped");
@@ -919,20 +1011,6 @@
 
 void acpi_ec_unblock_transactions(void)
 {
-	struct acpi_ec *ec = first_ec;
-
-	if (!ec)
-		return;
-
-	/* Allow transactions to be carried out again */
-	acpi_ec_start(ec, true);
-
-	if (EC_FLAGS_CLEAR_ON_RESUME)
-		acpi_ec_clear(ec);
-}
-
-void acpi_ec_unblock_transactions_early(void)
-{
 	/*
 	 * Allow transactions to happen again (this function is called from
 	 * atomic context during wakeup, so we don't need to acquire the mutex).
@@ -1228,13 +1306,21 @@
 static acpi_status
 ec_parse_io_ports(struct acpi_resource *resource, void *context);
 
-static struct acpi_ec *make_acpi_ec(void)
+static void acpi_ec_free(struct acpi_ec *ec)
+{
+	if (first_ec == ec)
+		first_ec = NULL;
+	if (boot_ec == ec)
+		boot_ec = NULL;
+	kfree(ec);
+}
+
+static struct acpi_ec *acpi_ec_alloc(void)
 {
 	struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL);
 
 	if (!ec)
 		return NULL;
-	ec->flags = 1 << EC_FLAGS_QUERY_PENDING;
 	mutex_init(&ec->mutex);
 	init_waitqueue_head(&ec->wait);
 	INIT_LIST_HEAD(&ec->list);
@@ -1290,7 +1376,12 @@
 	return AE_CTRL_TERMINATE;
 }
 
-static int ec_install_handlers(struct acpi_ec *ec)
+/*
+ * Note: This function returns an error code only when the address space
+ *       handler is not installed, which means "not able to handle
+ *       transactions".
+ */
+static int ec_install_handlers(struct acpi_ec *ec, bool handle_events)
 {
 	acpi_status status;
 
@@ -1319,6 +1410,16 @@
 		set_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
 	}
 
+	if (!handle_events)
+		return 0;
+
+	if (!test_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags)) {
+		/* Find and register all query methods */
+		acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1,
+				    acpi_ec_register_query_methods,
+				    NULL, ec, NULL);
+		set_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags);
+	}
 	if (!test_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags)) {
 		status = acpi_install_gpe_raw_handler(NULL, ec->gpe,
 					  ACPI_GPE_EDGE_TRIGGERED,
@@ -1329,6 +1430,9 @@
 			if (test_bit(EC_FLAGS_STARTED, &ec->flags) &&
 			    ec->reference_count >= 1)
 				acpi_ec_enable_gpe(ec, true);
+
+			/* EC is fully operational, allow queries */
+			acpi_ec_enable_event(ec);
 		}
 	}
 
@@ -1363,23 +1467,104 @@
 			pr_err("failed to remove gpe handler\n");
 		clear_bit(EC_FLAGS_GPE_HANDLER_INSTALLED, &ec->flags);
 	}
+	if (test_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags)) {
+		acpi_ec_remove_query_handlers(ec, true, 0);
+		clear_bit(EC_FLAGS_EVT_HANDLER_INSTALLED, &ec->flags);
+	}
 }
 
-static struct acpi_ec *acpi_ec_alloc(void)
+static int acpi_ec_setup(struct acpi_ec *ec, bool handle_events)
 {
-	struct acpi_ec *ec;
+	int ret;
 
-	/* Check for boot EC */
-	if (boot_ec) {
-		ec = boot_ec;
-		boot_ec = NULL;
-		ec_remove_handlers(ec);
-		if (first_ec == ec)
-			first_ec = NULL;
-	} else {
-		ec = make_acpi_ec();
+	ret = ec_install_handlers(ec, handle_events);
+	if (ret)
+		return ret;
+
+	/* First EC capable of handling transactions */
+	if (!first_ec) {
+		first_ec = ec;
+		acpi_handle_info(first_ec->handle, "Used as first EC\n");
 	}
-	return ec;
+
+	acpi_handle_info(ec->handle,
+			 "GPE=0x%lx, EC_CMD/EC_SC=0x%lx, EC_DATA=0x%lx\n",
+			 ec->gpe, ec->command_addr, ec->data_addr);
+	return ret;
+}
+
+static int acpi_config_boot_ec(struct acpi_ec *ec, acpi_handle handle,
+			       bool handle_events, bool is_ecdt)
+{
+	int ret;
+
+	/*
+	 * Changing the ACPI handle results in a re-configuration of the
+	 * boot EC. And if it happens after the namespace initialization,
+	 * it causes _REG evaluations.
+	 */
+	if (boot_ec && boot_ec->handle != handle)
+		ec_remove_handlers(boot_ec);
+
+	/* Unset old boot EC */
+	if (boot_ec != ec)
+		acpi_ec_free(boot_ec);
+
+	/*
+	 * ECDT device creation is split into acpi_ec_ecdt_probe() and
+	 * acpi_ec_ecdt_start(). This function takes care of completing the
+	 * ECDT parsing logic as the handle update should be performed
+	 * between the installation/uninstallation of the handlers.
+	 */
+	if (ec->handle != handle)
+		ec->handle = handle;
+
+	ret = acpi_ec_setup(ec, handle_events);
+	if (ret)
+		return ret;
+
+	/* Set new boot EC */
+	if (!boot_ec) {
+		boot_ec = ec;
+		boot_ec_is_ecdt = is_ecdt;
+	}
+
+	acpi_handle_info(boot_ec->handle,
+			 "Used as boot %s EC to handle transactions%s\n",
+			 is_ecdt ? "ECDT" : "DSDT",
+			 handle_events ? " and events" : "");
+	return ret;
+}
+
+static bool acpi_ec_ecdt_get_handle(acpi_handle *phandle)
+{
+	struct acpi_table_ecdt *ecdt_ptr;
+	acpi_status status;
+	acpi_handle handle;
+
+	status = acpi_get_table(ACPI_SIG_ECDT, 1,
+				(struct acpi_table_header **)&ecdt_ptr);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	status = acpi_get_handle(NULL, ecdt_ptr->id, &handle);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	*phandle = handle;
+	return true;
+}
+
+static bool acpi_is_boot_ec(struct acpi_ec *ec)
+{
+	if (!boot_ec)
+		return false;
+	if (ec->handle == boot_ec->handle &&
+	    ec->gpe == boot_ec->gpe &&
+	    ec->command_addr == boot_ec->command_addr &&
+	    ec->data_addr == boot_ec->data_addr)
+		return true;
+	return false;
 }
 
 static int acpi_ec_add(struct acpi_device *device)
@@ -1395,16 +1580,21 @@
 		return -ENOMEM;
 	if (ec_parse_device(device->handle, 0, ec, NULL) !=
 		AE_CTRL_TERMINATE) {
-			kfree(ec);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto err_alloc;
 	}
 
-	/* Find and register all query methods */
-	acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1,
-			    acpi_ec_register_query_methods, NULL, ec, NULL);
+	if (acpi_is_boot_ec(ec)) {
+		boot_ec_is_ecdt = false;
+		acpi_handle_debug(ec->handle, "duplicated.\n");
+		acpi_ec_free(ec);
+		ec = boot_ec;
+		ret = acpi_config_boot_ec(ec, ec->handle, true, false);
+	} else
+		ret = acpi_ec_setup(ec, true);
+	if (ret)
+		goto err_query;
 
-	if (!first_ec)
-		first_ec = ec;
 	device->driver_data = ec;
 
 	ret = !!request_region(ec->data_addr, 1, "EC data");
@@ -1412,20 +1602,17 @@
 	ret = !!request_region(ec->command_addr, 1, "EC cmd");
 	WARN(!ret, "Could not request EC cmd io port 0x%lx", ec->command_addr);
 
-	pr_info("GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx\n",
-			  ec->gpe, ec->command_addr, ec->data_addr);
-
-	ret = ec_install_handlers(ec);
-
 	/* Reprobe devices depending on the EC */
 	acpi_walk_dep_device_list(ec->handle);
+	acpi_handle_debug(ec->handle, "enumerated.\n");
+	return 0;
 
-	/* EC is fully operational, allow queries */
-	clear_bit(EC_FLAGS_QUERY_PENDING, &ec->flags);
-
-	/* Clear stale _Q events if hardware might require that */
-	if (EC_FLAGS_CLEAR_ON_RESUME)
-		acpi_ec_clear(ec);
+err_query:
+	if (ec != boot_ec)
+		acpi_ec_remove_query_handlers(ec, true, 0);
+err_alloc:
+	if (ec != boot_ec)
+		acpi_ec_free(ec);
 	return ret;
 }
 
@@ -1437,14 +1624,13 @@
 		return -EINVAL;
 
 	ec = acpi_driver_data(device);
-	ec_remove_handlers(ec);
-	acpi_ec_remove_query_handlers(ec, true, 0);
 	release_region(ec->data_addr, 1);
 	release_region(ec->command_addr, 1);
 	device->driver_data = NULL;
-	if (ec == first_ec)
-		first_ec = NULL;
-	kfree(ec);
+	if (ec != boot_ec) {
+		ec_remove_handlers(ec);
+		acpi_ec_free(ec);
+	}
 	return 0;
 }
 
@@ -1486,9 +1672,8 @@
 	if (!ec)
 		return -ENOMEM;
 	/*
-	 * Finding EC from DSDT if there is no ECDT EC available. When this
-	 * function is invoked, ACPI tables have been fully loaded, we can
-	 * walk namespace now.
+	 * At this point, the namespace is initialized, so start to find
+	 * the namespace objects.
 	 */
 	status = acpi_get_devices(ec_device_ids[0].id,
 				  ec_parse_device, ec, NULL);
@@ -1496,16 +1681,47 @@
 		ret = -ENODEV;
 		goto error;
 	}
-	ret = ec_install_handlers(ec);
-
+	/*
+	 * When the DSDT EC is available, always re-configure boot EC to
+	 * have _REG evaluated. _REG can only be evaluated after the
+	 * namespace initialization.
+	 * At this point, the GPE is not fully initialized, so do not to
+	 * handle the events.
+	 */
+	ret = acpi_config_boot_ec(ec, ec->handle, false, false);
 error:
 	if (ret)
-		kfree(ec);
-	else
-		first_ec = boot_ec = ec;
+		acpi_ec_free(ec);
 	return ret;
 }
 
+/*
+ * If the DSDT EC is not functioning, we still need to prepare a fully
+ * functioning ECDT EC first in order to handle the events.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=115021
+ */
+int __init acpi_ec_ecdt_start(void)
+{
+	acpi_handle handle;
+
+	if (!boot_ec)
+		return -ENODEV;
+	/*
+	 * The DSDT EC should have already been started in
+	 * acpi_ec_add().
+	 */
+	if (!boot_ec_is_ecdt)
+		return -ENODEV;
+
+	/*
+	 * At this point, the namespace and the GPE is initialized, so
+	 * start to find the namespace objects and handle the events.
+	 */
+	if (!acpi_ec_ecdt_get_handle(&handle))
+		return -ENODEV;
+	return acpi_config_boot_ec(boot_ec, handle, true, true);
+}
+
 #if 0
 /*
  * Some EC firmware variations refuses to respond QR_EC when SCI_EVT is not
@@ -1600,7 +1816,6 @@
 		goto error;
 	}
 
-	pr_info("EC description table is found, configuring boot EC\n");
 	if (EC_FLAGS_CORRECT_ECDT) {
 		ec->command_addr = ecdt_ptr->data.address;
 		ec->data_addr = ecdt_ptr->control.address;
@@ -1609,16 +1824,90 @@
 		ec->data_addr = ecdt_ptr->data.address;
 	}
 	ec->gpe = ecdt_ptr->gpe;
-	ec->handle = ACPI_ROOT_OBJECT;
-	ret = ec_install_handlers(ec);
+
+	/*
+	 * At this point, the namespace is not initialized, so do not find
+	 * the namespace objects, or handle the events.
+	 */
+	ret = acpi_config_boot_ec(ec, ACPI_ROOT_OBJECT, false, true);
 error:
 	if (ret)
-		kfree(ec);
-	else
-		first_ec = boot_ec = ec;
+		acpi_ec_free(ec);
 	return ret;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static void acpi_ec_enter_noirq(struct acpi_ec *ec)
+{
+	unsigned long flags;
+
+	if (ec == first_ec) {
+		spin_lock_irqsave(&ec->lock, flags);
+		ec->saved_busy_polling = ec_busy_polling;
+		ec->saved_polling_guard = ec_polling_guard;
+		ec_busy_polling = true;
+		ec_polling_guard = 0;
+		ec_log_drv("interrupt blocked");
+		spin_unlock_irqrestore(&ec->lock, flags);
+	}
+}
+
+static void acpi_ec_leave_noirq(struct acpi_ec *ec)
+{
+	unsigned long flags;
+
+	if (ec == first_ec) {
+		spin_lock_irqsave(&ec->lock, flags);
+		ec_busy_polling = ec->saved_busy_polling;
+		ec_polling_guard = ec->saved_polling_guard;
+		ec_log_drv("interrupt unblocked");
+		spin_unlock_irqrestore(&ec->lock, flags);
+	}
+}
+
+static int acpi_ec_suspend_noirq(struct device *dev)
+{
+	struct acpi_ec *ec =
+		acpi_driver_data(to_acpi_device(dev));
+
+	acpi_ec_enter_noirq(ec);
+	return 0;
+}
+
+static int acpi_ec_resume_noirq(struct device *dev)
+{
+	struct acpi_ec *ec =
+		acpi_driver_data(to_acpi_device(dev));
+
+	acpi_ec_leave_noirq(ec);
+	return 0;
+}
+
+static int acpi_ec_suspend(struct device *dev)
+{
+	struct acpi_ec *ec =
+		acpi_driver_data(to_acpi_device(dev));
+
+	if (ec_freeze_events)
+		acpi_ec_disable_event(ec);
+	return 0;
+}
+
+static int acpi_ec_resume(struct device *dev)
+{
+	struct acpi_ec *ec =
+		acpi_driver_data(to_acpi_device(dev));
+
+	acpi_ec_enable_event(ec);
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops acpi_ec_pm = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend_noirq, acpi_ec_resume_noirq)
+	SET_SYSTEM_SLEEP_PM_OPS(acpi_ec_suspend, acpi_ec_resume)
+};
+
 static int param_set_event_clearing(const char *val, struct kernel_param *kp)
 {
 	int result = 0;
@@ -1664,6 +1953,7 @@
 		.add = acpi_ec_add,
 		.remove = acpi_ec_remove,
 		},
+	.drv.pm = &acpi_ec_pm,
 };
 
 static inline int acpi_ec_query_init(void)
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 940218f..1b41a27 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -40,10 +40,8 @@
 void acpi_container_init(void);
 void acpi_memory_hotplug_init(void);
 #ifdef	CONFIG_ACPI_HOTPLUG_IOAPIC
-int acpi_ioapic_add(struct acpi_pci_root *root);
 int acpi_ioapic_remove(struct acpi_pci_root *root);
 #else
-static inline int acpi_ioapic_add(struct acpi_pci_root *root) { return 0; }
 static inline int acpi_ioapic_remove(struct acpi_pci_root *root) { return 0; }
 #endif
 #ifdef CONFIG_ACPI_DOCK
@@ -116,7 +114,6 @@
 bool acpi_device_is_battery(struct acpi_device *adev);
 bool acpi_device_is_first_physical_node(struct acpi_device *adev,
 					const struct device *dev);
-struct device *acpi_get_first_physical_node(struct acpi_device *adev);
 
 /* --------------------------------------------------------------------------
                      Device Matching and Notification
@@ -174,6 +171,8 @@
 	struct work_struct work;
 	unsigned long timestamp;
 	unsigned long nr_pending_queries;
+	bool saved_busy_polling;
+	unsigned int saved_polling_guard;
 };
 
 extern struct acpi_ec *first_ec;
@@ -185,9 +184,9 @@
 int acpi_ec_init(void);
 int acpi_ec_ecdt_probe(void);
 int acpi_ec_dsdt_probe(void);
+int acpi_ec_ecdt_start(void);
 void acpi_ec_block_transactions(void);
 void acpi_ec_unblock_transactions(void);
-void acpi_ec_unblock_transactions_early(void);
 int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
 			      acpi_handle handle, acpi_ec_query_func func,
 			      void *data);
@@ -225,4 +224,14 @@
 void acpi_init_properties(struct acpi_device *adev);
 void acpi_free_properties(struct acpi_device *adev);
 
+/*--------------------------------------------------------------------------
+				Watchdog
+  -------------------------------------------------------------------------- */
+
+#ifdef CONFIG_ACPI_WATCHDOG
+void acpi_watchdog_init(void);
+#else
+static inline void acpi_watchdog_init(void) {}
+#endif
+
 #endif /* _ACPI_INTERNAL_H_ */
diff --git a/drivers/acpi/ioapic.c b/drivers/acpi/ioapic.c
index ccdc8db..6d7ce6e 100644
--- a/drivers/acpi/ioapic.c
+++ b/drivers/acpi/ioapic.c
@@ -46,7 +46,7 @@
 	struct resource_win win;
 
 	res->flags = 0;
-	if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM) == 0)
+	if (acpi_dev_filter_resource_type(acpi_res, IORESOURCE_MEM))
 		return AE_OK;
 
 	if (!acpi_dev_resource_memory(acpi_res, res)) {
@@ -97,7 +97,7 @@
 	unsigned long long gsi_base;
 	struct acpi_pci_ioapic *ioapic;
 	struct pci_dev *dev = NULL;
-	struct resource *res = NULL;
+	struct resource *res = NULL, *pci_res = NULL, *crs_res;
 	char *type = NULL;
 
 	if (!acpi_is_ioapic(handle, &type))
@@ -137,23 +137,30 @@
 		pci_set_master(dev);
 		if (pci_request_region(dev, 0, type))
 			goto exit_disable;
-		res = &dev->resource[0];
+		pci_res = &dev->resource[0];
 		ioapic->pdev = dev;
 	} else {
 		pci_dev_put(dev);
 		dev = NULL;
-
-		res = &ioapic->res;
-		acpi_walk_resources(handle, METHOD_NAME__CRS, setup_res, res);
-		if (res->flags == 0) {
-			acpi_handle_warn(handle, "failed to get resource\n");
-			goto exit_free;
-		} else if (request_resource(&iomem_resource, res)) {
-			acpi_handle_warn(handle, "failed to insert resource\n");
-			goto exit_free;
-		}
 	}
 
+	crs_res = &ioapic->res;
+	acpi_walk_resources(handle, METHOD_NAME__CRS, setup_res, crs_res);
+	crs_res->name = type;
+	crs_res->flags |= IORESOURCE_BUSY;
+	if (crs_res->flags == 0) {
+		acpi_handle_warn(handle, "failed to get resource\n");
+		goto exit_release;
+	} else if (insert_resource(&iomem_resource, crs_res)) {
+		acpi_handle_warn(handle, "failed to insert resource\n");
+		goto exit_release;
+	}
+
+	/* try pci resource first, then "_CRS" resource */
+	res = pci_res;
+	if (!res || !res->flags)
+		res = crs_res;
+
 	if (acpi_register_ioapic(handle, res->start, (u32)gsi_base)) {
 		acpi_handle_warn(handle, "failed to register IOAPIC\n");
 		goto exit_release;
@@ -174,14 +181,13 @@
 exit_release:
 	if (dev)
 		pci_release_region(dev, 0);
-	else
-		release_resource(res);
+	if (ioapic->res.flags && ioapic->res.parent)
+		release_resource(&ioapic->res);
 exit_disable:
 	if (dev)
 		pci_disable_device(dev);
 exit_put:
 	pci_dev_put(dev);
-exit_free:
 	kfree(ioapic);
 exit:
 	mutex_unlock(&ioapic_list_lock);
@@ -189,13 +195,13 @@
 	return AE_OK;
 }
 
-int acpi_ioapic_add(struct acpi_pci_root *root)
+int acpi_ioapic_add(acpi_handle root_handle)
 {
 	acpi_status status, retval = AE_OK;
 
-	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, root->device->handle,
+	status = acpi_walk_namespace(ACPI_TYPE_DEVICE, root_handle,
 				     UINT_MAX, handle_ioapic_add, NULL,
-				     root->device->handle, (void **)&retval);
+				     root_handle, (void **)&retval);
 
 	return ACPI_SUCCESS(status) && ACPI_SUCCESS(retval) ? 0 : -ENODEV;
 }
@@ -217,9 +223,9 @@
 			pci_release_region(ioapic->pdev, 0);
 			pci_disable_device(ioapic->pdev);
 			pci_dev_put(ioapic->pdev);
-		} else if (ioapic->res.flags && ioapic->res.parent) {
-			release_resource(&ioapic->res);
 		}
+		if (ioapic->res.flags && ioapic->res.parent)
+			release_resource(&ioapic->res);
 		list_del(&ioapic->list);
 		kfree(ioapic);
 	}
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 8c234dd..e1d5ea6 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -94,54 +94,50 @@
 	return to_acpi_device(acpi_desc->dev);
 }
 
-static int xlat_status(void *buf, unsigned int cmd)
+static int xlat_status(void *buf, unsigned int cmd, u32 status)
 {
 	struct nd_cmd_clear_error *clear_err;
 	struct nd_cmd_ars_status *ars_status;
-	struct nd_cmd_ars_start *ars_start;
-	struct nd_cmd_ars_cap *ars_cap;
 	u16 flags;
 
 	switch (cmd) {
 	case ND_CMD_ARS_CAP:
-		ars_cap = buf;
-		if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
+		if ((status & 0xffff) == NFIT_ARS_CAP_NONE)
 			return -ENOTTY;
 
 		/* Command failed */
-		if (ars_cap->status & 0xffff)
+		if (status & 0xffff)
 			return -EIO;
 
 		/* No supported scan types for this range */
 		flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
-		if ((ars_cap->status >> 16 & flags) == 0)
+		if ((status >> 16 & flags) == 0)
 			return -ENOTTY;
 		break;
 	case ND_CMD_ARS_START:
-		ars_start = buf;
 		/* ARS is in progress */
-		if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
+		if ((status & 0xffff) == NFIT_ARS_START_BUSY)
 			return -EBUSY;
 
 		/* Command failed */
-		if (ars_start->status & 0xffff)
+		if (status & 0xffff)
 			return -EIO;
 		break;
 	case ND_CMD_ARS_STATUS:
 		ars_status = buf;
 		/* Command failed */
-		if (ars_status->status & 0xffff)
+		if (status & 0xffff)
 			return -EIO;
 		/* Check extended status (Upper two bytes) */
-		if (ars_status->status == NFIT_ARS_STATUS_DONE)
+		if (status == NFIT_ARS_STATUS_DONE)
 			return 0;
 
 		/* ARS is in progress */
-		if (ars_status->status == NFIT_ARS_STATUS_BUSY)
+		if (status == NFIT_ARS_STATUS_BUSY)
 			return -EBUSY;
 
 		/* No ARS performed for the current boot */
-		if (ars_status->status == NFIT_ARS_STATUS_NONE)
+		if (status == NFIT_ARS_STATUS_NONE)
 			return -EAGAIN;
 
 		/*
@@ -149,19 +145,19 @@
 		 * agent wants the scan to stop.  If we didn't overflow
 		 * then just continue with the returned results.
 		 */
-		if (ars_status->status == NFIT_ARS_STATUS_INTR) {
+		if (status == NFIT_ARS_STATUS_INTR) {
 			if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
 				return -ENOSPC;
 			return 0;
 		}
 
 		/* Unknown status */
-		if (ars_status->status >> 16)
+		if (status >> 16)
 			return -EIO;
 		break;
 	case ND_CMD_CLEAR_ERROR:
 		clear_err = buf;
-		if (clear_err->status & 0xffff)
+		if (status & 0xffff)
 			return -EIO;
 		if (!clear_err->cleared)
 			return -EIO;
@@ -172,6 +168,9 @@
 		break;
 	}
 
+	/* all other non-zero status results in an error */
+	if (status)
+		return -EIO;
 	return 0;
 }
 
@@ -186,10 +185,10 @@
 	struct nd_cmd_pkg *call_pkg = NULL;
 	const char *cmd_name, *dimm_name;
 	unsigned long cmd_mask, dsm_mask;
+	u32 offset, fw_status = 0;
 	acpi_handle handle;
 	unsigned int func;
 	const u8 *uuid;
-	u32 offset;
 	int rc, i;
 
 	func = cmd;
@@ -317,6 +316,15 @@
 				out_obj->buffer.pointer + offset, out_size);
 		offset += out_size;
 	}
+
+	/*
+	 * Set fw_status for all the commands with a known format to be
+	 * later interpreted by xlat_status().
+	 */
+	if (i >= 1 && ((cmd >= ND_CMD_ARS_CAP && cmd <= ND_CMD_CLEAR_ERROR)
+			|| (cmd >= ND_CMD_SMART && cmd <= ND_CMD_VENDOR)))
+		fw_status = *(u32 *) out_obj->buffer.pointer;
+
 	if (offset + in_buf.buffer.length < buf_len) {
 		if (i >= 1) {
 			/*
@@ -325,7 +333,7 @@
 			 */
 			rc = buf_len - offset - in_buf.buffer.length;
 			if (cmd_rc)
-				*cmd_rc = xlat_status(buf, cmd);
+				*cmd_rc = xlat_status(buf, cmd, fw_status);
 		} else {
 			dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
 					__func__, dimm_name, cmd_name, buf_len,
@@ -335,7 +343,7 @@
 	} else {
 		rc = 0;
 		if (cmd_rc)
-			*cmd_rc = xlat_status(buf, cmd);
+			*cmd_rc = xlat_status(buf, cmd, fw_status);
 	}
 
  out:
@@ -1527,11 +1535,12 @@
 {
 	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR];
 	u64 offset = nfit_blk->stat_offset + mmio->size * bw;
+	const u32 STATUS_MASK = 0x80000037;
 
 	if (mmio->num_lines)
 		offset = to_interleave_offset(offset, mmio);
 
-	return readl(mmio->addr.base + offset);
+	return readl(mmio->addr.base + offset) & STATUS_MASK;
 }
 
 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index 4c745bf..161f915 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -42,7 +42,7 @@
 		list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
 			struct acpi_nfit_system_address *spa = nfit_spa->spa;
 
-			if (nfit_spa_type(spa) == NFIT_SPA_PM)
+			if (nfit_spa_type(spa) != NFIT_SPA_PM)
 				continue;
 			/* find the spa that covers the mce addr */
 			if (spa->address > mce->addr)
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 2c45dd3..c576a6f 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -411,7 +411,15 @@
 	int gsi;
 	u8 pin;
 	int triggering = ACPI_LEVEL_SENSITIVE;
-	int polarity = ACPI_ACTIVE_LOW;
+	/*
+	 * On ARM systems with the GIC interrupt model, level interrupts
+	 * are always polarity high by specification; PCI legacy
+	 * IRQs lines are inverted before reaching the interrupt
+	 * controller and must therefore be considered active high
+	 * as default.
+	 */
+	int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ?
+				      ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW;
 	char *link = NULL;
 	char link_desc[16];
 	int rc;
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d144168d..bf601d4 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -614,7 +614,17 @@
 	if (hotadd) {
 		pcibios_resource_survey_bus(root->bus);
 		pci_assign_unassigned_root_bus_resources(root->bus);
-		acpi_ioapic_add(root);
+		/*
+		 * This is only called for the hotadd case. For the boot-time
+		 * case, we need to wait until after PCI initialization in
+		 * order to deal with IOAPICs mapped in on a PCI BAR.
+		 *
+		 * This is currently x86-specific, because acpi_ioapic_add()
+		 * is an empty function without CONFIG_ACPI_HOTPLUG_IOAPIC.
+		 * And CONFIG_ACPI_HOTPLUG_IOAPIC depends on CONFIG_X86_IO_APIC
+		 * (see drivers/acpi/Kconfig).
+		 */
+		acpi_ioapic_add(root->device->handle);
 	}
 
 	pci_lock_rescan_remove();
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 9125d7d..5c78ee1 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -32,12 +32,12 @@
 }
 
 static int map_lapic_id(struct acpi_subtable_header *entry,
-		 u32 acpi_id, phys_cpuid_t *apic_id)
+		 u32 acpi_id, phys_cpuid_t *apic_id, bool ignore_disabled)
 {
 	struct acpi_madt_local_apic *lapic =
 		container_of(entry, struct acpi_madt_local_apic, header);
 
-	if (!(lapic->lapic_flags & ACPI_MADT_ENABLED))
+	if (ignore_disabled && !(lapic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (lapic->processor_id != acpi_id)
@@ -48,12 +48,13 @@
 }
 
 static int map_x2apic_id(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
+		bool ignore_disabled)
 {
 	struct acpi_madt_local_x2apic *apic =
 		container_of(entry, struct acpi_madt_local_x2apic, header);
 
-	if (!(apic->lapic_flags & ACPI_MADT_ENABLED))
+	if (ignore_disabled && !(apic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (device_declaration && (apic->uid == acpi_id)) {
@@ -65,12 +66,13 @@
 }
 
 static int map_lsapic_id(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id,
+		bool ignore_disabled)
 {
 	struct acpi_madt_local_sapic *lsapic =
 		container_of(entry, struct acpi_madt_local_sapic, header);
 
-	if (!(lsapic->lapic_flags & ACPI_MADT_ENABLED))
+	if (ignore_disabled && !(lsapic->lapic_flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	if (device_declaration) {
@@ -87,12 +89,13 @@
  * Retrieve the ARM CPU physical identifier (MPIDR)
  */
 static int map_gicc_mpidr(struct acpi_subtable_header *entry,
-		int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
+		int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr,
+		bool ignore_disabled)
 {
 	struct acpi_madt_generic_interrupt *gicc =
 	    container_of(entry, struct acpi_madt_generic_interrupt, header);
 
-	if (!(gicc->flags & ACPI_MADT_ENABLED))
+	if (ignore_disabled && !(gicc->flags & ACPI_MADT_ENABLED))
 		return -ENODEV;
 
 	/* device_declaration means Device object in DSDT, in the
@@ -109,7 +112,7 @@
 }
 
 static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
-				   int type, u32 acpi_id)
+				   int type, u32 acpi_id, bool ignore_disabled)
 {
 	unsigned long madt_end, entry;
 	phys_cpuid_t phys_id = PHYS_CPUID_INVALID;	/* CPU hardware ID */
@@ -127,16 +130,20 @@
 		struct acpi_subtable_header *header =
 			(struct acpi_subtable_header *)entry;
 		if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
-			if (!map_lapic_id(header, acpi_id, &phys_id))
+			if (!map_lapic_id(header, acpi_id, &phys_id,
+					  ignore_disabled))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
-			if (!map_x2apic_id(header, type, acpi_id, &phys_id))
+			if (!map_x2apic_id(header, type, acpi_id, &phys_id,
+					   ignore_disabled))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
-			if (!map_lsapic_id(header, type, acpi_id, &phys_id))
+			if (!map_lsapic_id(header, type, acpi_id, &phys_id,
+					   ignore_disabled))
 				break;
 		} else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
-			if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
+			if (!map_gicc_mpidr(header, type, acpi_id, &phys_id,
+					    ignore_disabled))
 				break;
 		}
 		entry += header->length;
@@ -156,14 +163,15 @@
 	if (!madt)
 		return PHYS_CPUID_INVALID;
 
-	rv = map_madt_entry(madt, 1, acpi_id);
+	rv = map_madt_entry(madt, 1, acpi_id, true);
 
 	early_acpi_os_unmap_memory(madt, tbl_size);
 
 	return rv;
 }
 
-static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id,
+				  bool ignore_disabled)
 {
 	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
@@ -184,30 +192,38 @@
 
 	header = (struct acpi_subtable_header *)obj->buffer.pointer;
 	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
-		map_lapic_id(header, acpi_id, &phys_id);
+		map_lapic_id(header, acpi_id, &phys_id, ignore_disabled);
 	else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
-		map_lsapic_id(header, type, acpi_id, &phys_id);
+		map_lsapic_id(header, type, acpi_id, &phys_id, ignore_disabled);
 	else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
-		map_x2apic_id(header, type, acpi_id, &phys_id);
+		map_x2apic_id(header, type, acpi_id, &phys_id, ignore_disabled);
 	else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
-		map_gicc_mpidr(header, type, acpi_id, &phys_id);
+		map_gicc_mpidr(header, type, acpi_id, &phys_id,
+			       ignore_disabled);
 
 exit:
 	kfree(buffer.pointer);
 	return phys_id;
 }
 
-phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
+static phys_cpuid_t __acpi_get_phys_id(acpi_handle handle, int type,
+				       u32 acpi_id, bool ignore_disabled)
 {
 	phys_cpuid_t phys_id;
 
-	phys_id = map_mat_entry(handle, type, acpi_id);
+	phys_id = map_mat_entry(handle, type, acpi_id, ignore_disabled);
 	if (invalid_phys_cpuid(phys_id))
-		phys_id = map_madt_entry(get_madt_table(), type, acpi_id);
+		phys_id = map_madt_entry(get_madt_table(), type, acpi_id,
+					   ignore_disabled);
 
 	return phys_id;
 }
 
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
+{
+	return __acpi_get_phys_id(handle, type, acpi_id, true);
+}
+
 int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
 {
 #ifdef CONFIG_SMP
@@ -264,6 +280,79 @@
 }
 EXPORT_SYMBOL_GPL(acpi_get_cpuid);
 
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+static bool __init
+map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid)
+{
+	int type, id;
+	u32 acpi_id;
+	acpi_status status;
+	acpi_object_type acpi_type;
+	unsigned long long tmp;
+	union acpi_object object = { 0 };
+	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+
+	status = acpi_get_type(handle, &acpi_type);
+	if (ACPI_FAILURE(status))
+		return false;
+
+	switch (acpi_type) {
+	case ACPI_TYPE_PROCESSOR:
+		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+		if (ACPI_FAILURE(status))
+			return false;
+		acpi_id = object.processor.proc_id;
+
+		/* validate the acpi_id */
+		if(acpi_processor_validate_proc_id(acpi_id))
+			return false;
+		break;
+	case ACPI_TYPE_DEVICE:
+		status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
+		if (ACPI_FAILURE(status))
+			return false;
+		acpi_id = tmp;
+		break;
+	default:
+		return false;
+	}
+
+	type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
+
+	*phys_id = __acpi_get_phys_id(handle, type, acpi_id, false);
+	id = acpi_map_cpuid(*phys_id, acpi_id);
+
+	if (id < 0)
+		return false;
+	*cpuid = id;
+	return true;
+}
+
+static acpi_status __init
+set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context,
+			   void **rv)
+{
+	phys_cpuid_t phys_id;
+	int cpu_id;
+
+	if (!map_processor(handle, &phys_id, &cpu_id))
+		return AE_ERROR;
+
+	acpi_map_cpu2node(handle, cpu_id, phys_id);
+	return AE_OK;
+}
+
+void __init acpi_set_processor_mapping(void)
+{
+	/* Set persistent cpu <-> node mapping for all processors. */
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+			    ACPI_UINT32_MAX, set_processor_node_mapping,
+			    NULL, NULL, NULL);
+}
+#else
+void __init acpi_set_processor_mapping(void) {}
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base,
 			 u64 *phys_addr, int *ioapic_id)
diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c
index 0553aee..9d5f0c7 100644
--- a/drivers/acpi/processor_driver.c
+++ b/drivers/acpi/processor_driver.c
@@ -110,55 +110,46 @@
 
 static int __acpi_processor_start(struct acpi_device *device);
 
-static int acpi_cpu_soft_notify(struct notifier_block *nfb,
-					  unsigned long action, void *hcpu)
+static int acpi_soft_cpu_online(unsigned int cpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
 	struct acpi_processor *pr = per_cpu(processors, cpu);
 	struct acpi_device *device;
-	action &= ~CPU_TASKS_FROZEN;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_DEAD:
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
 
 	if (!pr || acpi_bus_get_device(pr->handle, &device))
-		return NOTIFY_DONE;
+		return 0;
+	/*
+	 * CPU got physically hotplugged and onlined for the first time:
+	 * Initialize missing things.
+	 */
+	if (pr->flags.need_hotplug_init) {
+		int ret;
 
-	if (action == CPU_ONLINE) {
-		/*
-		 * CPU got physically hotplugged and onlined for the first time:
-		 * Initialize missing things.
-		 */
-		if (pr->flags.need_hotplug_init) {
-			int ret;
-
-			pr_info("Will online and init hotplugged CPU: %d\n",
-				pr->id);
-			pr->flags.need_hotplug_init = 0;
-			ret = __acpi_processor_start(device);
-			WARN(ret, "Failed to start CPU: %d\n", pr->id);
-		} else {
-			/* Normal CPU soft online event. */
-			acpi_processor_ppc_has_changed(pr, 0);
-			acpi_processor_hotplug(pr);
-			acpi_processor_reevaluate_tstate(pr, action);
-			acpi_processor_tstate_has_changed(pr);
-		}
-	} else if (action == CPU_DEAD) {
-		/* Invalidate flag.throttling after the CPU is offline. */
-		acpi_processor_reevaluate_tstate(pr, action);
+		pr_info("Will online and init hotplugged CPU: %d\n",
+			pr->id);
+		pr->flags.need_hotplug_init = 0;
+		ret = __acpi_processor_start(device);
+		WARN(ret, "Failed to start CPU: %d\n", pr->id);
+	} else {
+		/* Normal CPU soft online event. */
+		acpi_processor_ppc_has_changed(pr, 0);
+		acpi_processor_hotplug(pr);
+		acpi_processor_reevaluate_tstate(pr, false);
+		acpi_processor_tstate_has_changed(pr);
 	}
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block acpi_cpu_notifier = {
-	    .notifier_call = acpi_cpu_soft_notify,
-};
+static int acpi_soft_cpu_dead(unsigned int cpu)
+{
+	struct acpi_processor *pr = per_cpu(processors, cpu);
+	struct acpi_device *device;
+
+	if (!pr || acpi_bus_get_device(pr->handle, &device))
+		return 0;
+
+	acpi_processor_reevaluate_tstate(pr, true);
+	return 0;
+}
 
 #ifdef CONFIG_ACPI_CPU_FREQ_PSS
 static int acpi_pss_perf_init(struct acpi_processor *pr,
@@ -245,8 +236,8 @@
 		return 0;
 
 	result = acpi_cppc_processor_probe(pr);
-	if (result)
-		return -ENODEV;
+	if (result && !IS_ENABLED(CONFIG_ACPI_CPU_FREQ_PSS))
+		dev_warn(&device->dev, "CPPC data invalid or not present\n");
 
 	if (!cpuidle_get_driver() || cpuidle_get_driver() == &acpi_idle_driver)
 		acpi_processor_power_init(pr);
@@ -303,7 +294,7 @@
  * This is needed for the powernow-k8 driver, that works even without
  * ACPI, but needs symbols from this driver
  */
-
+static enum cpuhp_state hp_online;
 static int __init acpi_processor_driver_init(void)
 {
 	int result = 0;
@@ -315,11 +306,22 @@
 	if (result < 0)
 		return result;
 
-	register_hotcpu_notifier(&acpi_cpu_notifier);
+	result = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					   "acpi/cpu-drv:online",
+					   acpi_soft_cpu_online, NULL);
+	if (result < 0)
+		goto err;
+	hp_online = result;
+	cpuhp_setup_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD, "acpi/cpu-drv:dead",
+				  NULL, acpi_soft_cpu_dead);
+
 	acpi_thermal_cpufreq_init();
 	acpi_processor_ppc_init();
 	acpi_processor_throttling_init();
 	return 0;
+err:
+	driver_unregister(&acpi_processor_driver);
+	return result;
 }
 
 static void __exit acpi_processor_driver_exit(void)
@@ -329,7 +331,8 @@
 
 	acpi_processor_ppc_exit();
 	acpi_thermal_cpufreq_exit();
-	unregister_hotcpu_notifier(&acpi_cpu_notifier);
+	cpuhp_remove_state_nocalls(hp_online);
+	cpuhp_remove_state_nocalls(CPUHP_ACPI_CPUDRV_DEAD);
 	driver_unregister(&acpi_processor_driver);
 }
 
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index c72e648..d51ca1c 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -375,11 +375,11 @@
  *	3. TSD domain
  */
 void acpi_processor_reevaluate_tstate(struct acpi_processor *pr,
-					unsigned long action)
+					bool is_dead)
 {
 	int result = 0;
 
-	if (action == CPU_DEAD) {
+	if (is_dead) {
 		/* When one CPU is offline, the T-state throttling
 		 * will be invalidated.
 		 */
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index ad9fc84..035ac64 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2002,6 +2002,7 @@
 	acpi_pnp_init();
 	acpi_int340x_thermal_init();
 	acpi_amba_init();
+	acpi_watchdog_init();
 
 	acpi_scan_add_handler(&generic_device_handler);
 
@@ -2044,6 +2045,7 @@
 	}
 
 	acpi_update_all_gpes();
+	acpi_ec_ecdt_start();
 
 	acpi_scan_initialized = true;
 
@@ -2054,7 +2056,7 @@
 
 static struct acpi_probe_entry *ape;
 static int acpi_probe_count;
-static DEFINE_SPINLOCK(acpi_probe_lock);
+static DEFINE_MUTEX(acpi_probe_mutex);
 
 static int __init acpi_match_madt(struct acpi_subtable_header *header,
 				  const unsigned long end)
@@ -2073,7 +2075,7 @@
 	if (acpi_disabled)
 		return 0;
 
-	spin_lock(&acpi_probe_lock);
+	mutex_lock(&acpi_probe_mutex);
 	for (ape = ap_head; nr; ape++, nr--) {
 		if (ACPI_COMPARE_NAME(ACPI_SIG_MADT, ape->id)) {
 			acpi_probe_count = 0;
@@ -2086,7 +2088,7 @@
 				count++;
 		}
 	}
-	spin_unlock(&acpi_probe_lock);
+	mutex_unlock(&acpi_probe_mutex);
 
 	return count;
 }
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 2b38c1b..deb0ff7 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -572,7 +572,7 @@
 
 		acpi_get_event_status(ACPI_EVENT_POWER_BUTTON, &pwr_btn_status);
 
-		if (pwr_btn_status & ACPI_EVENT_FLAG_SET) {
+		if (pwr_btn_status & ACPI_EVENT_FLAG_STATUS_SET) {
 			acpi_clear_event(ACPI_EVENT_POWER_BUTTON);
 			/* Flag for later */
 			pwr_btn_event_pending = true;
@@ -586,7 +586,7 @@
 	 */
 	acpi_disable_all_gpes();
 	/* Allow EC transactions to happen. */
-	acpi_ec_unblock_transactions_early();
+	acpi_ec_unblock_transactions();
 
 	suspend_nvs_restore();
 
@@ -784,7 +784,7 @@
 	/* Restore the NVS memory area */
 	suspend_nvs_restore();
 	/* Allow EC transactions to happen. */
-	acpi_ec_unblock_transactions_early();
+	acpi_ec_unblock_transactions();
 }
 
 static void acpi_pm_thaw(void)
diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
new file mode 100644
index 0000000..e8d7bc7
--- /dev/null
+++ b/drivers/acpi/spcr.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2012, Intel Corporation
+ * Copyright (c) 2015, Red Hat, Inc.
+ * Copyright (c) 2015, 2016 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) "ACPI: SPCR: " fmt
+
+#include <linux/acpi.h>
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/serial_core.h>
+
+/**
+ * parse_spcr() - parse ACPI SPCR table and add preferred console
+ *
+ * @earlycon: set up earlycon for the console specified by the table
+ *
+ * For the architectures with support for ACPI, CONFIG_ACPI_SPCR_TABLE may be
+ * defined to parse ACPI SPCR table.  As a result of the parsing preferred
+ * console is registered and if @earlycon is true, earlycon is set up.
+ *
+ * When CONFIG_ACPI_SPCR_TABLE is defined, this function should be called
+ * from arch inintialization code as soon as the DT/ACPI decision is made.
+ *
+ */
+int __init parse_spcr(bool earlycon)
+{
+	static char opts[64];
+	struct acpi_table_spcr *table;
+	acpi_size table_size;
+	acpi_status status;
+	char *uart;
+	char *iotype;
+	int baud_rate;
+	int err;
+
+	if (acpi_disabled)
+		return -ENODEV;
+
+	status = acpi_get_table_with_size(ACPI_SIG_SPCR, 0,
+					  (struct acpi_table_header **)&table,
+					  &table_size);
+
+	if (ACPI_FAILURE(status))
+		return -ENOENT;
+
+	if (table->header.revision < 2) {
+		err = -ENOENT;
+		pr_err("wrong table version\n");
+		goto done;
+	}
+
+	iotype = table->serial_port.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY ?
+			"mmio" : "io";
+
+	switch (table->interface_type) {
+	case ACPI_DBG2_ARM_SBSA_32BIT:
+		iotype = "mmio32";
+		/* fall through */
+	case ACPI_DBG2_ARM_PL011:
+	case ACPI_DBG2_ARM_SBSA_GENERIC:
+	case ACPI_DBG2_BCM2835:
+		uart = "pl011";
+		break;
+	case ACPI_DBG2_16550_COMPATIBLE:
+	case ACPI_DBG2_16550_SUBSET:
+		uart = "uart";
+		break;
+	default:
+		err = -ENOENT;
+		goto done;
+	}
+
+	switch (table->baud_rate) {
+	case 3:
+		baud_rate = 9600;
+		break;
+	case 4:
+		baud_rate = 19200;
+		break;
+	case 6:
+		baud_rate = 57600;
+		break;
+	case 7:
+		baud_rate = 115200;
+		break;
+	default:
+		err = -ENOENT;
+		goto done;
+	}
+
+	snprintf(opts, sizeof(opts), "%s,%s,0x%llx,%d", uart, iotype,
+		 table->serial_port.address, baud_rate);
+
+	pr_info("console: %s\n", opts);
+
+	if (earlycon)
+		setup_earlycon(opts);
+
+	err = add_preferred_console(uart, 0, opts + strlen(uart) + 1);
+
+done:
+	early_acpi_os_unmap_memory((void __iomem *)table, table_size);
+	return err;
+}
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 358165e..703c26e 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -314,10 +314,14 @@
 static struct kobject *dynamic_tables_kobj;
 static struct kobject *hotplug_kobj;
 
+#define ACPI_MAX_TABLE_INSTANCES	999
+#define ACPI_INST_SIZE			4 /* including trailing 0 */
+
 struct acpi_table_attr {
 	struct bin_attribute attr;
-	char name[8];
+	char name[ACPI_NAME_SIZE];
 	int instance;
+	char filename[ACPI_NAME_SIZE+ACPI_INST_SIZE];
 	struct list_head node;
 };
 
@@ -329,14 +333,9 @@
 	    container_of(bin_attr, struct acpi_table_attr, attr);
 	struct acpi_table_header *table_header = NULL;
 	acpi_status status;
-	char name[ACPI_NAME_SIZE];
 
-	if (strncmp(table_attr->name, "NULL", 4))
-		memcpy(name, table_attr->name, ACPI_NAME_SIZE);
-	else
-		memcpy(name, "\0\0\0\0", 4);
-
-	status = acpi_get_table(name, table_attr->instance, &table_header);
+	status = acpi_get_table(table_attr->name, table_attr->instance,
+				&table_header);
 	if (ACPI_FAILURE(status))
 		return -ENODEV;
 
@@ -344,38 +343,45 @@
 				       table_header, table_header->length);
 }
 
-static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
-				 struct acpi_table_header *table_header)
+static int acpi_table_attr_init(struct kobject *tables_obj,
+				struct acpi_table_attr *table_attr,
+				struct acpi_table_header *table_header)
 {
 	struct acpi_table_header *header = NULL;
 	struct acpi_table_attr *attr = NULL;
+	char instance_str[ACPI_INST_SIZE];
 
 	sysfs_attr_init(&table_attr->attr.attr);
-	if (table_header->signature[0] != '\0')
-		memcpy(table_attr->name, table_header->signature,
-		       ACPI_NAME_SIZE);
-	else
-		memcpy(table_attr->name, "NULL", 4);
+	ACPI_MOVE_NAME(table_attr->name, table_header->signature);
 
 	list_for_each_entry(attr, &acpi_table_attr_list, node) {
-		if (!memcmp(table_attr->name, attr->name, ACPI_NAME_SIZE))
+		if (ACPI_COMPARE_NAME(table_attr->name, attr->name))
 			if (table_attr->instance < attr->instance)
 				table_attr->instance = attr->instance;
 	}
 	table_attr->instance++;
+	if (table_attr->instance > ACPI_MAX_TABLE_INSTANCES) {
+		pr_warn("%4.4s: too many table instances\n",
+			table_attr->name);
+		return -ERANGE;
+	}
 
+	ACPI_MOVE_NAME(table_attr->filename, table_header->signature);
+	table_attr->filename[ACPI_NAME_SIZE] = '\0';
 	if (table_attr->instance > 1 || (table_attr->instance == 1 &&
 					 !acpi_get_table
-					 (table_header->signature, 2, &header)))
-		sprintf(table_attr->name + ACPI_NAME_SIZE, "%d",
-			table_attr->instance);
+					 (table_header->signature, 2, &header))) {
+		snprintf(instance_str, sizeof(instance_str), "%u",
+			 table_attr->instance);
+		strcat(table_attr->filename, instance_str);
+	}
 
 	table_attr->attr.size = table_header->length;
 	table_attr->attr.read = acpi_table_show;
-	table_attr->attr.attr.name = table_attr->name;
+	table_attr->attr.attr.name = table_attr->filename;
 	table_attr->attr.attr.mode = 0400;
 
-	return;
+	return sysfs_create_bin_file(tables_obj, &table_attr->attr);
 }
 
 acpi_status acpi_sysfs_table_handler(u32 event, void *table, void *context)
@@ -383,21 +389,22 @@
 	struct acpi_table_attr *table_attr;
 
 	switch (event) {
-	case ACPI_TABLE_EVENT_LOAD:
+	case ACPI_TABLE_EVENT_INSTALL:
 		table_attr =
 		    kzalloc(sizeof(struct acpi_table_attr), GFP_KERNEL);
 		if (!table_attr)
 			return AE_NO_MEMORY;
 
-		acpi_table_attr_init(table_attr, table);
-		if (sysfs_create_bin_file(dynamic_tables_kobj,
-					  &table_attr->attr)) {
+		if (acpi_table_attr_init(dynamic_tables_kobj,
+					 table_attr, table)) {
 			kfree(table_attr);
 			return AE_ERROR;
-		} else
-			list_add_tail(&table_attr->node, &acpi_table_attr_list);
+		}
+		list_add_tail(&table_attr->node, &acpi_table_attr_list);
 		break;
+	case ACPI_TABLE_EVENT_LOAD:
 	case ACPI_TABLE_EVENT_UNLOAD:
+	case ACPI_TABLE_EVENT_UNINSTALL:
 		/*
 		 * we do not need to do anything right now
 		 * because the table is not deleted from the
@@ -435,13 +442,12 @@
 		if (ACPI_FAILURE(status))
 			continue;
 
-		table_attr = NULL;
 		table_attr = kzalloc(sizeof(*table_attr), GFP_KERNEL);
 		if (!table_attr)
 			return -ENOMEM;
 
-		acpi_table_attr_init(table_attr, table_header);
-		ret = sysfs_create_bin_file(tables_kobj, &table_attr->attr);
+		ret = acpi_table_attr_init(tables_kobj,
+					   table_attr, table_header);
 		if (ret) {
 			kfree(table_attr);
 			return ret;
@@ -597,14 +603,27 @@
 	if (result)
 		goto end;
 
-	if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER))
-		size += sprintf(buf + size, "   invalid");
-	else if (status & ACPI_EVENT_FLAG_ENABLED)
-		size += sprintf(buf + size, "   enabled");
-	else if (status & ACPI_EVENT_FLAG_WAKE_ENABLED)
-		size += sprintf(buf + size, "   wake_enabled");
+	if (status & ACPI_EVENT_FLAG_ENABLE_SET)
+		size += sprintf(buf + size, "  EN");
 	else
-		size += sprintf(buf + size, "   disabled");
+		size += sprintf(buf + size, "    ");
+	if (status & ACPI_EVENT_FLAG_STATUS_SET)
+		size += sprintf(buf + size, " STS");
+	else
+		size += sprintf(buf + size, "    ");
+
+	if (!(status & ACPI_EVENT_FLAG_HAS_HANDLER))
+		size += sprintf(buf + size, " invalid     ");
+	else if (status & ACPI_EVENT_FLAG_ENABLED)
+		size += sprintf(buf + size, " enabled     ");
+	else if (status & ACPI_EVENT_FLAG_WAKE_ENABLED)
+		size += sprintf(buf + size, " wake_enabled");
+	else
+		size += sprintf(buf + size, " disabled    ");
+	if (status & ACPI_EVENT_FLAG_MASKED)
+		size += sprintf(buf + size, " masked  ");
+	else
+		size += sprintf(buf + size, " unmasked");
 
 end:
 	size += sprintf(buf + size, "\n");
@@ -655,8 +674,12 @@
 			 !(status & ACPI_EVENT_FLAG_ENABLED))
 			result = acpi_enable_gpe(handle, index);
 		else if (!strcmp(buf, "clear\n") &&
-			 (status & ACPI_EVENT_FLAG_SET))
+			 (status & ACPI_EVENT_FLAG_STATUS_SET))
 			result = acpi_clear_gpe(handle, index);
+		else if (!strcmp(buf, "mask\n"))
+			result = acpi_mask_gpe(handle, index, TRUE);
+		else if (!strcmp(buf, "unmask\n"))
+			result = acpi_mask_gpe(handle, index, FALSE);
 		else if (!kstrtoul(buf, 0, &tmp))
 			all_counters[index].count = tmp;
 		else
@@ -664,13 +687,13 @@
 	} else if (index < num_gpes + ACPI_NUM_FIXED_EVENTS) {
 		int event = index - num_gpes;
 		if (!strcmp(buf, "disable\n") &&
-		    (status & ACPI_EVENT_FLAG_ENABLED))
+		    (status & ACPI_EVENT_FLAG_ENABLE_SET))
 			result = acpi_disable_event(event, ACPI_NOT_ISR);
 		else if (!strcmp(buf, "enable\n") &&
-			 !(status & ACPI_EVENT_FLAG_ENABLED))
+			 !(status & ACPI_EVENT_FLAG_ENABLE_SET))
 			result = acpi_enable_event(event, ACPI_NOT_ISR);
 		else if (!strcmp(buf, "clear\n") &&
-			 (status & ACPI_EVENT_FLAG_SET))
+			 (status & ACPI_EVENT_FLAG_STATUS_SET))
 			result = acpi_clear_event(event);
 		else if (!kstrtoul(buf, 0, &tmp))
 			all_counters[index].count = tmp;
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 9f0ad6e..cdd56c4 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -35,7 +35,6 @@
 #include <linux/earlycpio.h>
 #include <linux/memblock.h>
 #include <linux/initrd.h>
-#include <linux/acpi.h>
 #include "internal.h"
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -246,6 +245,7 @@
 	struct acpi_subtable_header *entry;
 	unsigned long table_end;
 	int count = 0;
+	int errs = 0;
 	int i;
 
 	if (acpi_disabled)
@@ -278,10 +278,12 @@
 			if (entry->type != proc[i].id)
 				continue;
 			if (!proc[i].handler ||
-			     proc[i].handler(entry, table_end))
-				return -EINVAL;
+			     (!errs && proc[i].handler(entry, table_end))) {
+				errs++;
+				continue;
+			}
 
-			proc->count++;
+			proc[i].count++;
 			break;
 		}
 		if (i != proc_num)
@@ -301,11 +303,11 @@
 	}
 
 	if (max_entries && count > max_entries) {
-		pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
-			id, proc->id, count - max_entries, count);
+		pr_warn("[%4.4s:0x%02x] found the maximum %i entries\n",
+			id, proc->id, count);
 	}
 
-	return count;
+	return errs ? -EINVAL : count;
 }
 
 int __init
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 7461a58..dcf2c72 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -2524,7 +2524,7 @@
 
 		/* Do not receive interrupts sent by dummy ports */
 		if (!pp) {
-			disable_irq(irq + i);
+			disable_irq(irq);
 			continue;
 		}
 
diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c
index 633aa29..44f97ad 100644
--- a/drivers/ata/pata_ninja32.c
+++ b/drivers/ata/pata_ninja32.c
@@ -144,7 +144,7 @@
 	ap->ioaddr.altstatus_addr = base + 0x1E;
 	ap->ioaddr.bmdma_addr = base;
 	ata_sff_std_ports(&ap->ioaddr);
-	ap->pflags = ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE;
+	ap->pflags |= ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE;
 
 	ninja32_program(base);
 	/* FIXME: Should we disable them at remove ? */
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 6339efd..f2aaf9e 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -1845,8 +1845,9 @@
 	/* initialize memory management */
 	buffer_mem = eni_dev->mem - (buf - eni_dev->ram);
 	eni_dev->free_list_size = buffer_mem/MID_MIN_BUF_SIZE/2;
-	eni_dev->free_list = kmalloc(
-	    sizeof(struct eni_free)*(eni_dev->free_list_size+1),GFP_KERNEL);
+	eni_dev->free_list = kmalloc_array(eni_dev->free_list_size + 1,
+					   sizeof(*eni_dev->free_list),
+					   GFP_KERNEL);
 	if (!eni_dev->free_list) {
 		printk(KERN_ERR DEV_LABEL "(itf %d): couldn't get free page\n",
 		    dev->number);
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 75dde90..81aaa50 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -2489,7 +2489,7 @@
 {
     const struct firmware *firmware;
     struct device *device;
-    struct fw_header *fw_header;
+    const struct fw_header *fw_header;
     const __le32 *fw_data;
     u32 fw_size;
     u32 __iomem *load_addr;
@@ -2511,9 +2511,9 @@
 	return err;
     }
 
-    fw_data = (__le32 *) firmware->data;
+    fw_data = (const __le32 *)firmware->data;
     fw_size = firmware->size / sizeof(u32);
-    fw_header = (struct fw_header *) firmware->data;
+    fw_header = (const struct fw_header *)firmware->data;
     load_addr = fore200e->virt_base + le32_to_cpu(fw_header->load_offset);
 
     DPRINTK(2, "device %s firmware being loaded at 0x%p (%d words)\n",
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 0f5cb37..31b513a 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -779,8 +779,9 @@
 		  G0_RBPS_BS + (group * 32));
 
 	/* bitmap table */
-	he_dev->rbpl_table = kmalloc(BITS_TO_LONGS(RBPL_TABLE_SIZE)
-				     * sizeof(unsigned long), GFP_KERNEL);
+	he_dev->rbpl_table = kmalloc_array(BITS_TO_LONGS(RBPL_TABLE_SIZE),
+					   sizeof(*he_dev->rbpl_table),
+					   GFP_KERNEL);
 	if (!he_dev->rbpl_table) {
 		hprintk("unable to allocate rbpl bitmap table\n");
 		return -ENOMEM;
@@ -788,8 +789,9 @@
 	bitmap_zero(he_dev->rbpl_table, RBPL_TABLE_SIZE);
 
 	/* rbpl_virt 64-bit pointers */
-	he_dev->rbpl_virt = kmalloc(RBPL_TABLE_SIZE
-				    * sizeof(struct he_buff *), GFP_KERNEL);
+	he_dev->rbpl_virt = kmalloc_array(RBPL_TABLE_SIZE,
+					  sizeof(*he_dev->rbpl_virt),
+					  GFP_KERNEL);
 	if (!he_dev->rbpl_virt) {
 		hprintk("unable to allocate rbpl virt table\n");
 		goto out_free_rbpl_table;
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 809dd1e..b275676 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -1885,9 +1885,9 @@
                 if ((ret = ia_cbr_setup (iadev, vcc)) < 0) {     
                     return ret;
                 }
-       } 
-	else  
-           printk("iadev:  Non UBR, ABR and CBR traffic not supportedn"); 
+	} else {
+		printk("iadev:  Non UBR, ABR and CBR traffic not supported\n");
+	}
         
         iadev->testTable[vcc->vci]->vc_status |= VC_ACTIVE;
 	IF_EVENT(printk("ia open_tx returning \n");)  
@@ -1975,7 +1975,9 @@
 		buf_desc_ptr++;		  
 		tx_pkt_start += iadev->tx_buf_sz;  
 	}  
-        iadev->tx_buf = kmalloc(iadev->num_tx_desc*sizeof(struct cpcs_trailer_desc), GFP_KERNEL);
+	iadev->tx_buf = kmalloc_array(iadev->num_tx_desc,
+				      sizeof(*iadev->tx_buf),
+				      GFP_KERNEL);
         if (!iadev->tx_buf) {
             printk(KERN_ERR DEV_LABEL " couldn't get mem\n");
 	    goto err_free_dle;
@@ -1995,8 +1997,9 @@
 						       sizeof(*cpcs),
 						       DMA_TO_DEVICE);
         }
-        iadev->desc_tbl = kmalloc(iadev->num_tx_desc *
-                                   sizeof(struct desc_tbl_t), GFP_KERNEL);
+	iadev->desc_tbl = kmalloc_array(iadev->num_tx_desc,
+					sizeof(*iadev->desc_tbl),
+					GFP_KERNEL);
 	if (!iadev->desc_tbl) {
 		printk(KERN_ERR DEV_LABEL " couldn't get mem\n");
 		goto err_free_all_tx_bufs;
@@ -2124,7 +2127,9 @@
 	memset((caddr_t)(iadev->seg_ram+i),  0, iadev->num_vc*4);
 	vc = (struct main_vc *)iadev->MAIN_VC_TABLE_ADDR;  
 	evc = (struct ext_vc *)iadev->EXT_VC_TABLE_ADDR;  
-        iadev->testTable = kmalloc(sizeof(long)*iadev->num_vc, GFP_KERNEL); 
+	iadev->testTable = kmalloc_array(iadev->num_vc,
+					 sizeof(*iadev->testTable),
+					 GFP_KERNEL);
         if (!iadev->testTable) {
            printk("Get freepage  failed\n");
 	   goto err_free_desc_tbl;
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 700ed15..c7296b5 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -370,7 +370,8 @@
 		return error;
         }
 
-	if ((card = kmalloc(sizeof(ns_dev), GFP_KERNEL)) == NULL) {
+	card = kmalloc(sizeof(*card), GFP_KERNEL);
+	if (!card) {
 		printk
 		    ("nicstar%d: can't allocate memory for device structure.\n",
 		     i);
@@ -611,7 +612,7 @@
 	for (j = 0; j < card->rct_size; j++)
 		ns_write_sram(card, j * 4, u32d, 4);
 
-	memset(card->vcmap, 0, NS_MAX_RCTSIZE * sizeof(vc_map));
+	memset(card->vcmap, 0, sizeof(card->vcmap));
 
 	for (j = 0; j < NS_FRSCD_NUM; j++)
 		card->scd2vc[j] = NULL;
@@ -862,7 +863,7 @@
 	if (size != VBR_SCQSIZE && size != CBR_SCQSIZE)
 		return NULL;
 
-	scq = kmalloc(sizeof(scq_info), GFP_KERNEL);
+	scq = kmalloc(sizeof(*scq), GFP_KERNEL);
 	if (!scq)
 		return NULL;
         scq->org = dma_alloc_coherent(&card->pcidev->dev,
@@ -871,8 +872,9 @@
 		kfree(scq);
 		return NULL;
 	}
-	scq->skb = kmalloc(sizeof(struct sk_buff *) *
-			   (size / NS_SCQE_SIZE), GFP_KERNEL);
+	scq->skb = kmalloc_array(size / NS_SCQE_SIZE,
+				 sizeof(*scq->skb),
+				 GFP_KERNEL);
 	if (!scq->skb) {
 		dma_free_coherent(&card->pcidev->dev,
 				  2 * size, scq->org, scq->dma);
@@ -2021,7 +2023,8 @@
 
 		cell = skb->data;
 		for (i = ns_rsqe_cellcount(rsqe); i; i--) {
-			if ((sb = dev_alloc_skb(NS_SMSKBSIZE)) == NULL) {
+			sb = dev_alloc_skb(NS_SMSKBSIZE);
+			if (!sb) {
 				printk
 				    ("nicstar%d: Can't allocate buffers for aal0.\n",
 				     card->index);
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index cecfb94..d3dc954 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -598,12 +598,13 @@
 static int start_rx(struct atm_dev *dev)
 {
 	struct zatm_dev *zatm_dev;
-	int size,i;
+	int i;
 
-DPRINTK("start_rx\n");
+	DPRINTK("start_rx\n");
 	zatm_dev = ZATM_DEV(dev);
-	size = sizeof(struct atm_vcc *)*zatm_dev->chans;
-	zatm_dev->rx_map =  kzalloc(size,GFP_KERNEL);
+	zatm_dev->rx_map = kcalloc(zatm_dev->chans,
+				   sizeof(*zatm_dev->rx_map),
+				   GFP_KERNEL);
 	if (!zatm_dev->rx_map) return -ENOMEM;
 	/* set VPI/VCI split (use all VCIs and give what's left to VPIs) */
 	zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR);
@@ -998,8 +999,9 @@
 
 	DPRINTK("start_tx\n");
 	zatm_dev = ZATM_DEV(dev);
-	zatm_dev->tx_map = kmalloc(sizeof(struct atm_vcc *)*
-	    zatm_dev->chans,GFP_KERNEL);
+	zatm_dev->tx_map = kmalloc_array(zatm_dev->chans,
+					 sizeof(*zatm_dev->tx_map),
+					 GFP_KERNEL);
 	if (!zatm_dev->tx_map) return -ENOMEM;
 	zatm_dev->tx_bw = ATM_OC3_PCR;
 	zatm_dev->free_shapers = (1 << NR_SHAPERS)-1;
@@ -1398,7 +1400,7 @@
 	DPRINTK(DEV_LABEL "(itf %d): open %d.%d\n",vcc->dev->number,vcc->vpi,
 	    vcc->vci);
 	if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) {
-		zatm_vcc = kmalloc(sizeof(struct zatm_vcc),GFP_KERNEL);
+		zatm_vcc = kmalloc(sizeof(*zatm_vcc), GFP_KERNEL);
 		if (!zatm_vcc) {
 			clear_bit(ATM_VF_ADDR,&vcc->flags);
 			return -ENOMEM;
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 98504ec..fdf44ca 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -212,6 +212,16 @@
 
 	  If you are unsure about this, Say N here.
 
+config DEBUG_TEST_DRIVER_REMOVE
+	bool "Test driver remove calls during probe"
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here if you want the Driver core to test driver remove functions
+	  by calling probe, remove, probe. This tests the remove path without
+	  having to unbind the driver or unload the driver module.
+
+	  If you are unsure about this, say N here.
+
 config SYS_HYPERVISOR
 	bool
 	default n
diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c
index 2ba4cac..95e3ef8 100644
--- a/drivers/base/attribute_container.c
+++ b/drivers/base/attribute_container.c
@@ -243,7 +243,7 @@
  * @dev:  The generic device to run the trigger for
  * @fn	  the function to execute for each classdev.
  *
- * This funcion is for executing a trigger when you need to know both
+ * This function is for executing a trigger when you need to know both
  * the container and the classdev.  If you only care about the
  * container, then use attribute_container_trigger() instead.
  */
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 0a8bdad..ce057a5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -836,11 +836,29 @@
 	return NULL;
 }
 
+static inline bool live_in_glue_dir(struct kobject *kobj,
+				    struct device *dev)
+{
+	if (!kobj || !dev->class ||
+	    kobj->kset != &dev->class->p->glue_dirs)
+		return false;
+	return true;
+}
+
+static inline struct kobject *get_glue_dir(struct device *dev)
+{
+	return dev->kobj.parent;
+}
+
+/*
+ * make sure cleaning up dir as the last step, we need to make
+ * sure .release handler of kobject is run with holding the
+ * global lock
+ */
 static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir)
 {
 	/* see if we live in a "glue" directory */
-	if (!glue_dir || !dev->class ||
-	    glue_dir->kset != &dev->class->p->glue_dirs)
+	if (!live_in_glue_dir(glue_dir, dev))
 		return;
 
 	mutex_lock(&gdp_mutex);
@@ -848,11 +866,6 @@
 	mutex_unlock(&gdp_mutex);
 }
 
-static void cleanup_device_parent(struct device *dev)
-{
-	cleanup_glue_dir(dev, dev->kobj.parent);
-}
-
 static int device_add_class_symlinks(struct device *dev)
 {
 	struct device_node *of_node = dev_of_node(dev);
@@ -1028,6 +1041,7 @@
 	struct kobject *kobj;
 	struct class_interface *class_intf;
 	int error = -EINVAL;
+	struct kobject *glue_dir = NULL;
 
 	dev = get_device(dev);
 	if (!dev)
@@ -1072,8 +1086,10 @@
 	/* first, register with generic layer. */
 	/* we require the name to be set before, and pass NULL */
 	error = kobject_add(&dev->kobj, dev->kobj.parent, NULL);
-	if (error)
+	if (error) {
+		glue_dir = get_glue_dir(dev);
 		goto Error;
+	}
 
 	/* notify platform of device entry */
 	if (platform_notify)
@@ -1154,9 +1170,10 @@
 	device_remove_file(dev, &dev_attr_uevent);
  attrError:
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
+	glue_dir = get_glue_dir(dev);
 	kobject_del(&dev->kobj);
  Error:
-	cleanup_device_parent(dev);
+	cleanup_glue_dir(dev, glue_dir);
 	put_device(parent);
 name_error:
 	kfree(dev->p);
@@ -1232,6 +1249,7 @@
 void device_del(struct device *dev)
 {
 	struct device *parent = dev->parent;
+	struct kobject *glue_dir = NULL;
 	struct class_interface *class_intf;
 
 	/* Notify clients of device removal.  This call must come
@@ -1266,6 +1284,7 @@
 	bus_remove_device(dev);
 	device_pm_remove(dev);
 	driver_deferred_probe_del(dev);
+	device_remove_properties(dev);
 
 	/* Notify the platform of the removal, in case they
 	 * need to do anything...
@@ -1276,8 +1295,9 @@
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 					     BUS_NOTIFY_REMOVED_DEVICE, dev);
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
-	cleanup_device_parent(dev);
+	glue_dir = get_glue_dir(dev);
 	kobject_del(&dev->kobj);
+	cleanup_glue_dir(dev, glue_dir);
 	put_device(parent);
 }
 EXPORT_SYMBOL_GPL(device_del);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 691eeea..4c28e1a 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -371,12 +371,13 @@
 	if (cpu->hotpluggable)
 		cpu->dev.groups = hotplugable_cpu_attr_groups;
 	error = device_register(&cpu->dev);
-	if (!error)
-		per_cpu(cpu_sys_devices, num) = &cpu->dev;
-	if (!error)
-		register_cpu_under_node(num, cpu_to_node(num));
+	if (error)
+		return error;
 
-	return error;
+	per_cpu(cpu_sys_devices, num) = &cpu->dev;
+	register_cpu_under_node(num, cpu_to_node(num));
+
+	return 0;
 }
 
 struct device *get_cpu_device(unsigned cpu)
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 16688f5..d22a726 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -51,7 +51,6 @@
 static DEFINE_MUTEX(deferred_probe_mutex);
 static LIST_HEAD(deferred_probe_pending_list);
 static LIST_HEAD(deferred_probe_active_list);
-static struct workqueue_struct *deferred_wq;
 static atomic_t deferred_trigger_count = ATOMIC_INIT(0);
 
 /*
@@ -175,7 +174,7 @@
 	 * Kick the re-probe thread.  It may already be scheduled, but it is
 	 * safe to kick it again.
 	 */
-	queue_work(deferred_wq, &deferred_probe_work);
+	schedule_work(&deferred_probe_work);
 }
 
 /**
@@ -211,14 +210,10 @@
  */
 static int deferred_probe_initcall(void)
 {
-	deferred_wq = create_singlethread_workqueue("deferwq");
-	if (WARN_ON(!deferred_wq))
-		return -ENOMEM;
-
 	driver_deferred_probe_enable = true;
 	driver_deferred_probe_trigger();
 	/* Sort as many dependencies as possible before exiting initcalls */
-	flush_workqueue(deferred_wq);
+	flush_work(&deferred_probe_work);
 	return 0;
 }
 late_initcall(deferred_probe_initcall);
@@ -329,6 +324,7 @@
 {
 	int ret = -EPROBE_DEFER;
 	int local_trigger_count = atomic_read(&deferred_trigger_count);
+	bool test_remove = IS_ENABLED(CONFIG_DEBUG_TEST_DRIVER_REMOVE);
 
 	if (defer_all_probes) {
 		/*
@@ -346,6 +342,7 @@
 		 drv->bus->name, __func__, drv->name, dev_name(dev));
 	WARN_ON(!list_empty(&dev->devres_head));
 
+re_probe:
 	dev->driver = drv;
 
 	/* If using pinctrl, bind pins now before probing */
@@ -383,6 +380,25 @@
 			goto probe_failed;
 	}
 
+	if (test_remove) {
+		test_remove = false;
+
+		if (dev->bus && dev->bus->remove)
+			dev->bus->remove(dev);
+		else if (drv->remove)
+			drv->remove(dev);
+
+		devres_release_all(dev);
+		driver_sysfs_remove(dev);
+		dev->driver = NULL;
+		dev_set_drvdata(dev, NULL);
+		if (dev->pm_domain && dev->pm_domain->dismiss)
+			dev->pm_domain->dismiss(dev);
+		pm_runtime_reinit(dev);
+
+		goto re_probe;
+	}
+
 	pinctrl_init_done(dev);
 
 	if (dev->pm_domain && dev->pm_domain->sync)
@@ -460,8 +476,7 @@
 void wait_for_device_probe(void)
 {
 	/* wait for the deferred probe workqueue to finish */
-	if (driver_deferred_probe_enable)
-		flush_workqueue(deferred_wq);
+	flush_work(&deferred_probe_work);
 
 	/* wait for the known devices to complete their probing */
 	wait_event(probe_waitqueue, atomic_read(&probe_count) == 0);
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index bdf28f7..640a7e6 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -165,6 +165,7 @@
 	int order = get_order(size);
 	unsigned long flags;
 	int pageno;
+	int dma_memory_map;
 
 	if (!dev)
 		return 0;
@@ -187,11 +188,12 @@
 	 */
 	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
 	*ret = mem->virt_base + (pageno << PAGE_SHIFT);
-	if (mem->flags & DMA_MEMORY_MAP)
+	dma_memory_map = (mem->flags & DMA_MEMORY_MAP);
+	spin_unlock_irqrestore(&mem->spinlock, flags);
+	if (dma_memory_map)
 		memset(*ret, 0, size);
 	else
 		memset_io(*ret, 0, size);
-	spin_unlock_irqrestore(&mem->spinlock, flags);
 
 	return 1;
 
@@ -261,8 +263,8 @@
 		   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
 		unsigned long off = vma->vm_pgoff;
 		int start = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-		int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
-		int count = size >> PAGE_SHIFT;
+		int user_count = vma_pages(vma);
+		int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
 		*ret = -ENXIO;
 		if (off < count && user_count <= count - off) {
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index d799662..8f8b68c 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -198,10 +198,13 @@
 
 	rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
 					 flags);
-	if (rc == 0)
+	if (rc) {
 		devres_add(dev, res);
-	else
+		rc = 0;
+	} else {
 		devres_free(res);
+		rc = -ENOMEM;
+	}
 
 	return rc;
 }
@@ -247,7 +250,7 @@
 {
 	int ret = -ENXIO;
 #if defined(CONFIG_MMU) && !defined(CONFIG_ARCH_NO_COHERENT_DMA_MMAP)
-	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long user_count = vma_pages(vma);
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr));
 	unsigned long off = vma->vm_pgoff;
@@ -334,7 +337,7 @@
 		return;
 	}
 
-	unmap_kernel_range((unsigned long)cpu_addr, size);
+	unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
 	vunmap(cpu_addr);
 }
 #endif
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 279e539..be6a599 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -142,13 +142,12 @@
 	}
 
 	for (i = 0; i < nvec; i++) {
-		desc = alloc_msi_entry(dev);
+		desc = alloc_msi_entry(dev, 1, NULL);
 		if (!desc)
 			break;
 
 		desc->platform.msi_priv_data = data;
 		desc->platform.msi_index = base + i;
-		desc->nvec_used = 1;
 		desc->irq = virq ? virq + i : 0;
 
 		list_add_tail(&desc->list, dev_to_msi_list(dev));
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 6482d47..c4af003 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -97,7 +97,7 @@
 		int ret;
 
 		ret = of_irq_get(dev->dev.of_node, num);
-		if (ret >= 0 || ret == -EPROBE_DEFER)
+		if (ret > 0 || ret == -EPROBE_DEFER)
 			return ret;
 	}
 
@@ -108,9 +108,14 @@
 	 * IORESOURCE_BITS correspond 1-to-1 to the IRQF_TRIGGER*
 	 * settings.
 	 */
-	if (r && r->flags & IORESOURCE_BITS)
-		irqd_set_trigger_type(irq_get_irq_data(r->start),
-				      r->flags & IORESOURCE_BITS);
+	if (r && r->flags & IORESOURCE_BITS) {
+		struct irq_data *irqd;
+
+		irqd = irq_get_irq_data(r->start);
+		if (!irqd)
+			return -ENXIO;
+		irqd_set_trigger_type(irqd, r->flags & IORESOURCE_BITS);
+	}
 
 	return r ? r->start : -ENXIO;
 #endif
@@ -175,7 +180,7 @@
 		int ret;
 
 		ret = of_irq_get_byname(dev->dev.of_node, name);
-		if (ret >= 0 || ret == -EPROBE_DEFER)
+		if (ret > 0 || ret == -EPROBE_DEFER)
 			return ret;
 	}
 
@@ -434,6 +439,7 @@
 	int i;
 
 	if (pdev) {
+		device_remove_properties(&pdev->dev);
 		device_del(&pdev->dev);
 
 		if (pdev->id_auto) {
@@ -446,8 +452,6 @@
 			if (r->parent)
 				release_resource(r);
 		}
-
-		device_remove_properties(&pdev->dev);
 	}
 }
 EXPORT_SYMBOL_GPL(platform_device_del);
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index a1f2aff..e023066 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -45,7 +45,7 @@
  * and checks that the PM domain pointer is a real generic PM domain.
  * Any failure results in NULL being returned.
  */
-struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev)
+static struct generic_pm_domain *genpd_lookup_dev(struct device *dev)
 {
 	struct generic_pm_domain *genpd = NULL, *gpd;
 
@@ -586,7 +586,7 @@
 }
 late_initcall(genpd_poweroff_unused);
 
-#ifdef CONFIG_PM_SLEEP
+#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_GENERIC_DOMAINS_OF)
 
 /**
  * pm_genpd_present - Check if the given PM domain has been initialized.
@@ -606,6 +606,10 @@
 	return false;
 }
 
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+
 static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
 				    struct device *dev)
 {
@@ -613,9 +617,8 @@
 }
 
 /**
- * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
+ * genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
  * @genpd: PM domain to power off, if possible.
- * @timed: True if latency measurements are allowed.
  *
  * Check if the given PM domain can be powered off (during system suspend or
  * hibernation) and do that if so.  Also, in that case propagate to its masters.
@@ -625,8 +628,7 @@
  * executed sequentially, so it is guaranteed that it will never run twice in
  * parallel).
  */
-static void pm_genpd_sync_poweroff(struct generic_pm_domain *genpd,
-				   bool timed)
+static void genpd_sync_poweroff(struct generic_pm_domain *genpd)
 {
 	struct gpd_link *link;
 
@@ -639,28 +641,26 @@
 
 	/* Choose the deepest state when suspending */
 	genpd->state_idx = genpd->state_count - 1;
-	genpd_power_off(genpd, timed);
+	genpd_power_off(genpd, false);
 
 	genpd->status = GPD_STATE_POWER_OFF;
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
 		genpd_sd_counter_dec(link->master);
-		pm_genpd_sync_poweroff(link->master, timed);
+		genpd_sync_poweroff(link->master);
 	}
 }
 
 /**
- * pm_genpd_sync_poweron - Synchronously power on a PM domain and its masters.
+ * genpd_sync_poweron - Synchronously power on a PM domain and its masters.
  * @genpd: PM domain to power on.
- * @timed: True if latency measurements are allowed.
  *
  * This function is only called in "noirq" and "syscore" stages of system power
  * transitions, so it need not acquire locks (all of the "noirq" callbacks are
  * executed sequentially, so it is guaranteed that it will never run twice in
  * parallel).
  */
-static void pm_genpd_sync_poweron(struct generic_pm_domain *genpd,
-				  bool timed)
+static void genpd_sync_poweron(struct generic_pm_domain *genpd)
 {
 	struct gpd_link *link;
 
@@ -668,11 +668,11 @@
 		return;
 
 	list_for_each_entry(link, &genpd->slave_links, slave_node) {
-		pm_genpd_sync_poweron(link->master, timed);
+		genpd_sync_poweron(link->master);
 		genpd_sd_counter_inc(link->master);
 	}
 
-	genpd_power_on(genpd, timed);
+	genpd_power_on(genpd, false);
 
 	genpd->status = GPD_STATE_ACTIVE;
 }
@@ -784,7 +784,7 @@
 	 * the same PM domain, so it is not necessary to use locking here.
 	 */
 	genpd->suspended_count++;
-	pm_genpd_sync_poweroff(genpd, true);
+	genpd_sync_poweroff(genpd);
 
 	return 0;
 }
@@ -814,7 +814,7 @@
 	 * guaranteed that this function will never run twice in parallel for
 	 * the same PM domain, so it is not necessary to use locking here.
 	 */
-	pm_genpd_sync_poweron(genpd, true);
+	genpd_sync_poweron(genpd);
 	genpd->suspended_count--;
 
 	if (genpd->dev_ops.stop && genpd->dev_ops.start)
@@ -902,12 +902,12 @@
 	if (genpd->suspended_count++ == 0)
 		/*
 		 * The boot kernel might put the domain into arbitrary state,
-		 * so make it appear as powered off to pm_genpd_sync_poweron(),
+		 * so make it appear as powered off to genpd_sync_poweron(),
 		 * so that it tries to power it on in case it was really off.
 		 */
 		genpd->status = GPD_STATE_POWER_OFF;
 
-	pm_genpd_sync_poweron(genpd, true);
+	genpd_sync_poweron(genpd);
 
 	if (genpd->dev_ops.stop && genpd->dev_ops.start)
 		ret = pm_runtime_force_resume(dev);
@@ -962,9 +962,9 @@
 
 	if (suspend) {
 		genpd->suspended_count++;
-		pm_genpd_sync_poweroff(genpd, false);
+		genpd_sync_poweroff(genpd);
 	} else {
-		pm_genpd_sync_poweron(genpd, false);
+		genpd_sync_poweron(genpd);
 		genpd->suspended_count--;
 	}
 }
@@ -1056,14 +1056,8 @@
 	dev_pm_put_subsys_data(dev);
 }
 
-/**
- * __pm_genpd_add_device - Add a device to an I/O PM domain.
- * @genpd: PM domain to add the device to.
- * @dev: Device to be added.
- * @td: Set of PM QoS timing parameters to attach to the device.
- */
-int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
-			  struct gpd_timing_data *td)
+static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
+			    struct gpd_timing_data *td)
 {
 	struct generic_pm_domain_data *gpd_data;
 	int ret = 0;
@@ -1103,15 +1097,28 @@
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(__pm_genpd_add_device);
 
 /**
- * pm_genpd_remove_device - Remove a device from an I/O PM domain.
- * @genpd: PM domain to remove the device from.
- * @dev: Device to be removed.
+ * __pm_genpd_add_device - Add a device to an I/O PM domain.
+ * @genpd: PM domain to add the device to.
+ * @dev: Device to be added.
+ * @td: Set of PM QoS timing parameters to attach to the device.
  */
-int pm_genpd_remove_device(struct generic_pm_domain *genpd,
-			   struct device *dev)
+int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
+			  struct gpd_timing_data *td)
+{
+	int ret;
+
+	mutex_lock(&gpd_list_lock);
+	ret = genpd_add_device(genpd, dev, td);
+	mutex_unlock(&gpd_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__pm_genpd_add_device);
+
+static int genpd_remove_device(struct generic_pm_domain *genpd,
+			       struct device *dev)
 {
 	struct generic_pm_domain_data *gpd_data;
 	struct pm_domain_data *pdd;
@@ -1119,10 +1126,6 @@
 
 	dev_dbg(dev, "%s()\n", __func__);
 
-	if (!genpd || genpd != pm_genpd_lookup_dev(dev))
-		return -EINVAL;
-
-	/* The above validation also means we have existing domain_data. */
 	pdd = dev->power.subsys_data->domain_data;
 	gpd_data = to_gpd_data(pdd);
 	dev_pm_qos_remove_notifier(dev, &gpd_data->nb);
@@ -1154,15 +1157,24 @@
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(pm_genpd_remove_device);
 
 /**
- * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
- * @genpd: Master PM domain to add the subdomain to.
- * @subdomain: Subdomain to be added.
+ * pm_genpd_remove_device - Remove a device from an I/O PM domain.
+ * @genpd: PM domain to remove the device from.
+ * @dev: Device to be removed.
  */
-int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
-			   struct generic_pm_domain *subdomain)
+int pm_genpd_remove_device(struct generic_pm_domain *genpd,
+			   struct device *dev)
+{
+	if (!genpd || genpd != genpd_lookup_dev(dev))
+		return -EINVAL;
+
+	return genpd_remove_device(genpd, dev);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_remove_device);
+
+static int genpd_add_subdomain(struct generic_pm_domain *genpd,
+			       struct generic_pm_domain *subdomain)
 {
 	struct gpd_link *link, *itr;
 	int ret = 0;
@@ -1205,6 +1217,23 @@
 		kfree(link);
 	return ret;
 }
+
+/**
+ * pm_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
+ * @genpd: Master PM domain to add the subdomain to.
+ * @subdomain: Subdomain to be added.
+ */
+int pm_genpd_add_subdomain(struct generic_pm_domain *genpd,
+			   struct generic_pm_domain *subdomain)
+{
+	int ret;
+
+	mutex_lock(&gpd_list_lock);
+	ret = genpd_add_subdomain(genpd, subdomain);
+	mutex_unlock(&gpd_list_lock);
+
+	return ret;
+}
 EXPORT_SYMBOL_GPL(pm_genpd_add_subdomain);
 
 /**
@@ -1278,27 +1307,17 @@
 	genpd->device_count = 0;
 	genpd->max_off_time_ns = -1;
 	genpd->max_off_time_changed = true;
+	genpd->provider = NULL;
+	genpd->has_provider = false;
 	genpd->domain.ops.runtime_suspend = genpd_runtime_suspend;
 	genpd->domain.ops.runtime_resume = genpd_runtime_resume;
 	genpd->domain.ops.prepare = pm_genpd_prepare;
-	genpd->domain.ops.suspend = pm_generic_suspend;
-	genpd->domain.ops.suspend_late = pm_generic_suspend_late;
 	genpd->domain.ops.suspend_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.resume_noirq = pm_genpd_resume_noirq;
-	genpd->domain.ops.resume_early = pm_generic_resume_early;
-	genpd->domain.ops.resume = pm_generic_resume;
-	genpd->domain.ops.freeze = pm_generic_freeze;
-	genpd->domain.ops.freeze_late = pm_generic_freeze_late;
 	genpd->domain.ops.freeze_noirq = pm_genpd_freeze_noirq;
 	genpd->domain.ops.thaw_noirq = pm_genpd_thaw_noirq;
-	genpd->domain.ops.thaw_early = pm_generic_thaw_early;
-	genpd->domain.ops.thaw = pm_generic_thaw;
-	genpd->domain.ops.poweroff = pm_generic_poweroff;
-	genpd->domain.ops.poweroff_late = pm_generic_poweroff_late;
 	genpd->domain.ops.poweroff_noirq = pm_genpd_suspend_noirq;
 	genpd->domain.ops.restore_noirq = pm_genpd_restore_noirq;
-	genpd->domain.ops.restore_early = pm_generic_restore_early;
-	genpd->domain.ops.restore = pm_generic_restore;
 	genpd->domain.ops.complete = pm_genpd_complete;
 
 	if (genpd->flags & GENPD_FLAG_PM_CLK) {
@@ -1328,7 +1347,71 @@
 }
 EXPORT_SYMBOL_GPL(pm_genpd_init);
 
+static int genpd_remove(struct generic_pm_domain *genpd)
+{
+	struct gpd_link *l, *link;
+
+	if (IS_ERR_OR_NULL(genpd))
+		return -EINVAL;
+
+	mutex_lock(&genpd->lock);
+
+	if (genpd->has_provider) {
+		mutex_unlock(&genpd->lock);
+		pr_err("Provider present, unable to remove %s\n", genpd->name);
+		return -EBUSY;
+	}
+
+	if (!list_empty(&genpd->master_links) || genpd->device_count) {
+		mutex_unlock(&genpd->lock);
+		pr_err("%s: unable to remove %s\n", __func__, genpd->name);
+		return -EBUSY;
+	}
+
+	list_for_each_entry_safe(link, l, &genpd->slave_links, slave_node) {
+		list_del(&link->master_node);
+		list_del(&link->slave_node);
+		kfree(link);
+	}
+
+	list_del(&genpd->gpd_list_node);
+	mutex_unlock(&genpd->lock);
+	cancel_work_sync(&genpd->power_off_work);
+	pr_debug("%s: removed %s\n", __func__, genpd->name);
+
+	return 0;
+}
+
+/**
+ * pm_genpd_remove - Remove a generic I/O PM domain
+ * @genpd: Pointer to PM domain that is to be removed.
+ *
+ * To remove the PM domain, this function:
+ *  - Removes the PM domain as a subdomain to any parent domains,
+ *    if it was added.
+ *  - Removes the PM domain from the list of registered PM domains.
+ *
+ * The PM domain will only be removed, if the associated provider has
+ * been removed, it is not a parent to any other PM domain and has no
+ * devices associated with it.
+ */
+int pm_genpd_remove(struct generic_pm_domain *genpd)
+{
+	int ret;
+
+	mutex_lock(&gpd_list_lock);
+	ret = genpd_remove(genpd);
+	mutex_unlock(&gpd_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pm_genpd_remove);
+
 #ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+
+typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
+						   void *data);
+
 /*
  * Device Tree based PM domain providers.
  *
@@ -1340,8 +1423,8 @@
  * maps a PM domain specifier retrieved from the device tree to a PM domain.
  *
  * Two simple mapping functions have been provided for convenience:
- *  - __of_genpd_xlate_simple() for 1:1 device tree node to PM domain mapping.
- *  - __of_genpd_xlate_onecell() for mapping of multiple PM domains per node by
+ *  - genpd_xlate_simple() for 1:1 device tree node to PM domain mapping.
+ *  - genpd_xlate_onecell() for mapping of multiple PM domains per node by
  *    index.
  */
 
@@ -1366,7 +1449,7 @@
 static DEFINE_MUTEX(of_genpd_mutex);
 
 /**
- * __of_genpd_xlate_simple() - Xlate function for direct node-domain mapping
+ * genpd_xlate_simple() - Xlate function for direct node-domain mapping
  * @genpdspec: OF phandle args to map into a PM domain
  * @data: xlate function private data - pointer to struct generic_pm_domain
  *
@@ -1374,7 +1457,7 @@
  * have their own device tree nodes. The private data of xlate function needs
  * to be a valid pointer to struct generic_pm_domain.
  */
-struct generic_pm_domain *__of_genpd_xlate_simple(
+static struct generic_pm_domain *genpd_xlate_simple(
 					struct of_phandle_args *genpdspec,
 					void *data)
 {
@@ -1382,10 +1465,9 @@
 		return ERR_PTR(-EINVAL);
 	return data;
 }
-EXPORT_SYMBOL_GPL(__of_genpd_xlate_simple);
 
 /**
- * __of_genpd_xlate_onecell() - Xlate function using a single index.
+ * genpd_xlate_onecell() - Xlate function using a single index.
  * @genpdspec: OF phandle args to map into a PM domain
  * @data: xlate function private data - pointer to struct genpd_onecell_data
  *
@@ -1394,7 +1476,7 @@
  * A single cell is used as an index into an array of PM domains specified in
  * the genpd_onecell_data struct when registering the provider.
  */
-struct generic_pm_domain *__of_genpd_xlate_onecell(
+static struct generic_pm_domain *genpd_xlate_onecell(
 					struct of_phandle_args *genpdspec,
 					void *data)
 {
@@ -1414,16 +1496,15 @@
 
 	return genpd_data->domains[idx];
 }
-EXPORT_SYMBOL_GPL(__of_genpd_xlate_onecell);
 
 /**
- * __of_genpd_add_provider() - Register a PM domain provider for a node
+ * genpd_add_provider() - Register a PM domain provider for a node
  * @np: Device node pointer associated with the PM domain provider.
  * @xlate: Callback for decoding PM domain from phandle arguments.
  * @data: Context pointer for @xlate callback.
  */
-int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
-			void *data)
+static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
+			      void *data)
 {
 	struct of_genpd_provider *cp;
 
@@ -1442,7 +1523,83 @@
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(__of_genpd_add_provider);
+
+/**
+ * of_genpd_add_provider_simple() - Register a simple PM domain provider
+ * @np: Device node pointer associated with the PM domain provider.
+ * @genpd: Pointer to PM domain associated with the PM domain provider.
+ */
+int of_genpd_add_provider_simple(struct device_node *np,
+				 struct generic_pm_domain *genpd)
+{
+	int ret = -EINVAL;
+
+	if (!np || !genpd)
+		return -EINVAL;
+
+	mutex_lock(&gpd_list_lock);
+
+	if (pm_genpd_present(genpd))
+		ret = genpd_add_provider(np, genpd_xlate_simple, genpd);
+
+	if (!ret) {
+		genpd->provider = &np->fwnode;
+		genpd->has_provider = true;
+	}
+
+	mutex_unlock(&gpd_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(of_genpd_add_provider_simple);
+
+/**
+ * of_genpd_add_provider_onecell() - Register a onecell PM domain provider
+ * @np: Device node pointer associated with the PM domain provider.
+ * @data: Pointer to the data associated with the PM domain provider.
+ */
+int of_genpd_add_provider_onecell(struct device_node *np,
+				  struct genpd_onecell_data *data)
+{
+	unsigned int i;
+	int ret = -EINVAL;
+
+	if (!np || !data)
+		return -EINVAL;
+
+	mutex_lock(&gpd_list_lock);
+
+	for (i = 0; i < data->num_domains; i++) {
+		if (!data->domains[i])
+			continue;
+		if (!pm_genpd_present(data->domains[i]))
+			goto error;
+
+		data->domains[i]->provider = &np->fwnode;
+		data->domains[i]->has_provider = true;
+	}
+
+	ret = genpd_add_provider(np, genpd_xlate_onecell, data);
+	if (ret < 0)
+		goto error;
+
+	mutex_unlock(&gpd_list_lock);
+
+	return 0;
+
+error:
+	while (i--) {
+		if (!data->domains[i])
+			continue;
+		data->domains[i]->provider = NULL;
+		data->domains[i]->has_provider = false;
+	}
+
+	mutex_unlock(&gpd_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(of_genpd_add_provider_onecell);
 
 /**
  * of_genpd_del_provider() - Remove a previously registered PM domain provider
@@ -1451,10 +1608,21 @@
 void of_genpd_del_provider(struct device_node *np)
 {
 	struct of_genpd_provider *cp;
+	struct generic_pm_domain *gpd;
 
+	mutex_lock(&gpd_list_lock);
 	mutex_lock(&of_genpd_mutex);
 	list_for_each_entry(cp, &of_genpd_providers, link) {
 		if (cp->node == np) {
+			/*
+			 * For each PM domain associated with the
+			 * provider, set the 'has_provider' to false
+			 * so that the PM domain can be safely removed.
+			 */
+			list_for_each_entry(gpd, &gpd_list, gpd_list_node)
+				if (gpd->provider == &np->fwnode)
+					gpd->has_provider = false;
+
 			list_del(&cp->link);
 			of_node_put(cp->node);
 			kfree(cp);
@@ -1462,11 +1630,12 @@
 		}
 	}
 	mutex_unlock(&of_genpd_mutex);
+	mutex_unlock(&gpd_list_lock);
 }
 EXPORT_SYMBOL_GPL(of_genpd_del_provider);
 
 /**
- * of_genpd_get_from_provider() - Look-up PM domain
+ * genpd_get_from_provider() - Look-up PM domain
  * @genpdspec: OF phandle args to use for look-up
  *
  * Looks for a PM domain provider under the node specified by @genpdspec and if
@@ -1476,7 +1645,7 @@
  * Returns a valid pointer to struct generic_pm_domain on success or ERR_PTR()
  * on failure.
  */
-struct generic_pm_domain *of_genpd_get_from_provider(
+static struct generic_pm_domain *genpd_get_from_provider(
 					struct of_phandle_args *genpdspec)
 {
 	struct generic_pm_domain *genpd = ERR_PTR(-ENOENT);
@@ -1499,7 +1668,109 @@
 
 	return genpd;
 }
-EXPORT_SYMBOL_GPL(of_genpd_get_from_provider);
+
+/**
+ * of_genpd_add_device() - Add a device to an I/O PM domain
+ * @genpdspec: OF phandle args to use for look-up PM domain
+ * @dev: Device to be added.
+ *
+ * Looks-up an I/O PM domain based upon phandle args provided and adds
+ * the device to the PM domain. Returns a negative error code on failure.
+ */
+int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
+{
+	struct generic_pm_domain *genpd;
+	int ret;
+
+	mutex_lock(&gpd_list_lock);
+
+	genpd = genpd_get_from_provider(genpdspec);
+	if (IS_ERR(genpd)) {
+		ret = PTR_ERR(genpd);
+		goto out;
+	}
+
+	ret = genpd_add_device(genpd, dev, NULL);
+
+out:
+	mutex_unlock(&gpd_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(of_genpd_add_device);
+
+/**
+ * of_genpd_add_subdomain - Add a subdomain to an I/O PM domain.
+ * @parent_spec: OF phandle args to use for parent PM domain look-up
+ * @subdomain_spec: OF phandle args to use for subdomain look-up
+ *
+ * Looks-up a parent PM domain and subdomain based upon phandle args
+ * provided and adds the subdomain to the parent PM domain. Returns a
+ * negative error code on failure.
+ */
+int of_genpd_add_subdomain(struct of_phandle_args *parent_spec,
+			   struct of_phandle_args *subdomain_spec)
+{
+	struct generic_pm_domain *parent, *subdomain;
+	int ret;
+
+	mutex_lock(&gpd_list_lock);
+
+	parent = genpd_get_from_provider(parent_spec);
+	if (IS_ERR(parent)) {
+		ret = PTR_ERR(parent);
+		goto out;
+	}
+
+	subdomain = genpd_get_from_provider(subdomain_spec);
+	if (IS_ERR(subdomain)) {
+		ret = PTR_ERR(subdomain);
+		goto out;
+	}
+
+	ret = genpd_add_subdomain(parent, subdomain);
+
+out:
+	mutex_unlock(&gpd_list_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(of_genpd_add_subdomain);
+
+/**
+ * of_genpd_remove_last - Remove the last PM domain registered for a provider
+ * @provider: Pointer to device structure associated with provider
+ *
+ * Find the last PM domain that was added by a particular provider and
+ * remove this PM domain from the list of PM domains. The provider is
+ * identified by the 'provider' device structure that is passed. The PM
+ * domain will only be removed, if the provider associated with domain
+ * has been removed.
+ *
+ * Returns a valid pointer to struct generic_pm_domain on success or
+ * ERR_PTR() on failure.
+ */
+struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
+{
+	struct generic_pm_domain *gpd, *genpd = ERR_PTR(-ENOENT);
+	int ret;
+
+	if (IS_ERR_OR_NULL(np))
+		return ERR_PTR(-EINVAL);
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (gpd->provider == &np->fwnode) {
+			ret = genpd_remove(gpd);
+			genpd = ret ? ERR_PTR(ret) : gpd;
+			break;
+		}
+	}
+	mutex_unlock(&gpd_list_lock);
+
+	return genpd;
+}
+EXPORT_SYMBOL_GPL(of_genpd_remove_last);
 
 /**
  * genpd_dev_pm_detach - Detach a device from its PM domain.
@@ -1515,14 +1786,14 @@
 	unsigned int i;
 	int ret = 0;
 
-	pd = pm_genpd_lookup_dev(dev);
-	if (!pd)
+	pd = dev_to_genpd(dev);
+	if (IS_ERR(pd))
 		return;
 
 	dev_dbg(dev, "removing from PM domain %s\n", pd->name);
 
 	for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
-		ret = pm_genpd_remove_device(pd, dev);
+		ret = genpd_remove_device(pd, dev);
 		if (ret != -EAGAIN)
 			break;
 
@@ -1596,9 +1867,11 @@
 			return -ENOENT;
 	}
 
-	pd = of_genpd_get_from_provider(&pd_args);
+	mutex_lock(&gpd_list_lock);
+	pd = genpd_get_from_provider(&pd_args);
 	of_node_put(pd_args.np);
 	if (IS_ERR(pd)) {
+		mutex_unlock(&gpd_list_lock);
 		dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
 			__func__, PTR_ERR(pd));
 		return -EPROBE_DEFER;
@@ -1607,13 +1880,14 @@
 	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
 
 	for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) {
-		ret = pm_genpd_add_device(pd, dev);
+		ret = genpd_add_device(pd, dev, NULL);
 		if (ret != -EAGAIN)
 			break;
 
 		mdelay(i);
 		cond_resched();
 	}
+	mutex_unlock(&gpd_list_lock);
 
 	if (ret < 0) {
 		dev_err(dev, "failed to add to PM domain %s: %d",
@@ -1636,7 +1910,7 @@
 
 /***        debugfs support        ***/
 
-#ifdef CONFIG_PM_ADVANCED_DEBUG
+#ifdef CONFIG_DEBUG_FS
 #include <linux/pm.h>
 #include <linux/device.h>
 #include <linux/debugfs.h>
@@ -1784,4 +2058,4 @@
 	debugfs_remove_recursive(pm_genpd_debugfs_dir);
 }
 __exitcall(pm_genpd_debug_exit);
-#endif /* CONFIG_PM_ADVANCED_DEBUG */
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index df0c709..4c7c6da 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -584,7 +584,6 @@
 	struct clk *clk;
 	unsigned long freq, old_freq;
 	unsigned long u_volt, u_volt_min, u_volt_max;
-	unsigned long ou_volt, ou_volt_min, ou_volt_max;
 	int ret;
 
 	if (unlikely(!target_freq)) {
@@ -620,11 +619,7 @@
 	}
 
 	old_opp = _find_freq_ceil(opp_table, &old_freq);
-	if (!IS_ERR(old_opp)) {
-		ou_volt = old_opp->u_volt;
-		ou_volt_min = old_opp->u_volt_min;
-		ou_volt_max = old_opp->u_volt_max;
-	} else {
+	if (IS_ERR(old_opp)) {
 		dev_err(dev, "%s: failed to find current OPP for freq %lu (%ld)\n",
 			__func__, old_freq, PTR_ERR(old_opp));
 	}
@@ -683,7 +678,8 @@
 restore_voltage:
 	/* This shouldn't harm even if the voltages weren't updated earlier */
 	if (!IS_ERR(old_opp))
-		_set_opp_voltage(dev, reg, ou_volt, ou_volt_min, ou_volt_max);
+		_set_opp_voltage(dev, reg, old_opp->u_volt,
+				 old_opp->u_volt_min, old_opp->u_volt_max);
 
 	return ret;
 }
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
index 1dfd3dd..5552211 100644
--- a/drivers/base/power/opp/of.c
+++ b/drivers/base/power/opp/of.c
@@ -71,8 +71,18 @@
 	u32 version;
 	int ret;
 
-	if (!opp_table->supported_hw)
-		return true;
+	if (!opp_table->supported_hw) {
+		/*
+		 * In the case that no supported_hw has been set by the
+		 * platform but there is an opp-supported-hw value set for
+		 * an OPP then the OPP should not be enabled as there is
+		 * no way to see if the hardware supports it.
+		 */
+		if (of_find_property(np, "opp-supported-hw", NULL))
+			return false;
+		else
+			return true;
+	}
 
 	while (count--) {
 		ret = of_property_read_u32_index(np, "opp-supported-hw", count,
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index e097d35..82a081e 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -301,7 +301,7 @@
 	int (*callback)(struct device *);
 	int retval;
 
-	trace_rpm_idle(dev, rpmflags);
+	trace_rpm_idle_rcuidle(dev, rpmflags);
 	retval = rpm_check_suspend_allowed(dev);
 	if (retval < 0)
 		;	/* Conditions are wrong. */
@@ -337,7 +337,7 @@
 			dev->power.request_pending = true;
 			queue_work(pm_wq, &dev->power.work);
 		}
-		trace_rpm_return_int(dev, _THIS_IP_, 0);
+		trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
 		return 0;
 	}
 
@@ -352,7 +352,7 @@
 	wake_up_all(&dev->power.wait_queue);
 
  out:
-	trace_rpm_return_int(dev, _THIS_IP_, retval);
+	trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 	return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
 }
 
@@ -419,7 +419,7 @@
 	struct device *parent = NULL;
 	int retval;
 
-	trace_rpm_suspend(dev, rpmflags);
+	trace_rpm_suspend_rcuidle(dev, rpmflags);
 
  repeat:
 	retval = rpm_check_suspend_allowed(dev);
@@ -549,7 +549,7 @@
 	}
 
  out:
-	trace_rpm_return_int(dev, _THIS_IP_, retval);
+	trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 
 	return retval;
 
@@ -601,7 +601,7 @@
 	struct device *parent = NULL;
 	int retval = 0;
 
-	trace_rpm_resume(dev, rpmflags);
+	trace_rpm_resume_rcuidle(dev, rpmflags);
 
  repeat:
 	if (dev->power.runtime_error)
@@ -764,7 +764,7 @@
 		spin_lock_irq(&dev->power.lock);
 	}
 
-	trace_rpm_return_int(dev, _THIS_IP_, retval);
+	trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
 
 	return retval;
 }
diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index a038033..2a4435d 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -105,8 +105,8 @@
 
 	bool defer_caching;
 
-	u8 read_flag_mask;
-	u8 write_flag_mask;
+	unsigned long read_flag_mask;
+	unsigned long write_flag_mask;
 
 	/* number of bits to (left) shift the reg value when formatting*/
 	int reg_shift;
@@ -173,6 +173,7 @@
 	int (*drop)(struct regmap *map, unsigned int min, unsigned int max);
 };
 
+bool regmap_cached(struct regmap *map, unsigned int reg);
 bool regmap_writeable(struct regmap *map, unsigned int reg);
 bool regmap_readable(struct regmap *map, unsigned int reg);
 bool regmap_volatile(struct regmap *map, unsigned int reg);
diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
index aa56af8..b11af3f 100644
--- a/drivers/base/regmap/regcache-rbtree.c
+++ b/drivers/base/regmap/regcache-rbtree.c
@@ -404,6 +404,7 @@
 		unsigned int new_base_reg, new_top_reg;
 		unsigned int min, max;
 		unsigned int max_dist;
+		unsigned int dist, best_dist = UINT_MAX;
 
 		max_dist = map->reg_stride * sizeof(*rbnode_tmp) /
 			map->cache_word_size;
@@ -423,24 +424,41 @@
 				&base_reg, &top_reg);
 
 			if (base_reg <= max && top_reg >= min) {
-				new_base_reg = min(reg, base_reg);
-				new_top_reg = max(reg, top_reg);
-			} else {
-				if (max < base_reg)
-					node = node->rb_left;
+				if (reg < base_reg)
+					dist = base_reg - reg;
+				else if (reg > top_reg)
+					dist = reg - top_reg;
 				else
-					node = node->rb_right;
-
-				continue;
+					dist = 0;
+				if (dist < best_dist) {
+					rbnode = rbnode_tmp;
+					best_dist = dist;
+					new_base_reg = min(reg, base_reg);
+					new_top_reg = max(reg, top_reg);
+				}
 			}
 
-			ret = regcache_rbtree_insert_to_block(map, rbnode_tmp,
+			/*
+			 * Keep looking, we want to choose the closest block,
+			 * otherwise we might end up creating overlapping
+			 * blocks, which breaks the rbtree.
+			 */
+			if (reg < base_reg)
+				node = node->rb_left;
+			else if (reg > top_reg)
+				node = node->rb_right;
+			else
+				break;
+		}
+
+		if (rbnode) {
+			ret = regcache_rbtree_insert_to_block(map, rbnode,
 							      new_base_reg,
 							      new_top_reg, reg,
 							      value);
 			if (ret)
 				return ret;
-			rbtree_ctx->cached_rbnode = rbnode_tmp;
+			rbtree_ctx->cached_rbnode = rbnode;
 			return 0;
 		}
 
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index df7ff729..4e58256 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -38,10 +38,11 @@
 
 	/* calculate the size of reg_defaults */
 	for (count = 0, i = 0; i < map->num_reg_defaults_raw; i++)
-		if (!regmap_volatile(map, i * map->reg_stride))
+		if (regmap_readable(map, i * map->reg_stride) &&
+		    !regmap_volatile(map, i * map->reg_stride))
 			count++;
 
-	/* all registers are volatile, so just bypass */
+	/* all registers are unreadable or volatile, so just bypass */
 	if (!count) {
 		map->cache_bypass = true;
 		return 0;
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index 1ee3d40..36ce351 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -77,6 +77,17 @@
 	}
 }
 
+static bool regmap_printable(struct regmap *map, unsigned int reg)
+{
+	if (regmap_precious(map, reg))
+		return false;
+
+	if (!regmap_readable(map, reg) && !regmap_cached(map, reg))
+		return false;
+
+	return true;
+}
+
 /*
  * Work out where the start offset maps into register numbers, bearing
  * in mind that we suppress hidden registers.
@@ -105,8 +116,7 @@
 	if (list_empty(&map->debugfs_off_cache)) {
 		for (; i <= map->max_register; i += map->reg_stride) {
 			/* Skip unprinted registers, closing off cache entry */
-			if (!regmap_readable(map, i) ||
-			    regmap_precious(map, i)) {
+			if (!regmap_printable(map, i)) {
 				if (c) {
 					c->max = p - 1;
 					c->max_reg = i - map->reg_stride;
@@ -204,7 +214,7 @@
 	start_reg = regmap_debugfs_get_dump_start(map, from, *ppos, &p);
 
 	for (i = start_reg; i <= to; i += map->reg_stride) {
-		if (!regmap_readable(map, i))
+		if (!regmap_readable(map, i) && !regmap_cached(map, i))
 			continue;
 
 		if (regmap_precious(map, i))
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 51fa7d6..ae63bb0 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -93,6 +93,29 @@
 	return true;
 }
 
+bool regmap_cached(struct regmap *map, unsigned int reg)
+{
+	int ret;
+	unsigned int val;
+
+	if (map->cache == REGCACHE_NONE)
+		return false;
+
+	if (!map->cache_ops)
+		return false;
+
+	if (map->max_register && reg > map->max_register)
+		return false;
+
+	map->lock(map->lock_arg);
+	ret = regcache_read(map, reg, &val);
+	map->unlock(map->lock_arg);
+	if (ret)
+		return false;
+
+	return true;
+}
+
 bool regmap_readable(struct regmap *map, unsigned int reg)
 {
 	if (!map->reg_read)
@@ -749,6 +772,9 @@
 		case REGMAP_ENDIAN_BIG:
 			map->format.format_reg = regmap_format_16_be;
 			break;
+		case REGMAP_ENDIAN_LITTLE:
+			map->format.format_reg = regmap_format_16_le;
+			break;
 		case REGMAP_ENDIAN_NATIVE:
 			map->format.format_reg = regmap_format_16_native;
 			break;
@@ -768,6 +794,9 @@
 		case REGMAP_ENDIAN_BIG:
 			map->format.format_reg = regmap_format_32_be;
 			break;
+		case REGMAP_ENDIAN_LITTLE:
+			map->format.format_reg = regmap_format_32_le;
+			break;
 		case REGMAP_ENDIAN_NATIVE:
 			map->format.format_reg = regmap_format_32_native;
 			break;
@@ -782,6 +811,9 @@
 		case REGMAP_ENDIAN_BIG:
 			map->format.format_reg = regmap_format_64_be;
 			break;
+		case REGMAP_ENDIAN_LITTLE:
+			map->format.format_reg = regmap_format_64_le;
+			break;
 		case REGMAP_ENDIAN_NATIVE:
 			map->format.format_reg = regmap_format_64_native;
 			break;
@@ -1296,12 +1328,26 @@
 	return 0;
 }
 
+static void regmap_set_work_buf_flag_mask(struct regmap *map, int max_bytes,
+					  unsigned long mask)
+{
+	u8 *buf;
+	int i;
+
+	if (!mask || !map->work_buf)
+		return;
+
+	buf = map->work_buf;
+
+	for (i = 0; i < max_bytes; i++)
+		buf[i] |= (mask >> (8 * i)) & 0xff;
+}
+
 int _regmap_raw_write(struct regmap *map, unsigned int reg,
 		      const void *val, size_t val_len)
 {
 	struct regmap_range_node *range;
 	unsigned long flags;
-	u8 *u8 = map->work_buf;
 	void *work_val = map->work_buf + map->format.reg_bytes +
 		map->format.pad_bytes;
 	void *buf;
@@ -1370,8 +1416,8 @@
 	}
 
 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
-
-	u8[0] |= map->write_flag_mask;
+	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
+				      map->write_flag_mask);
 
 	/*
 	 * Essentially all I/O mechanisms will be faster with a single
@@ -1474,6 +1520,12 @@
 		ret = map->bus->write(map->bus_context, buf, len);
 
 		kfree(buf);
+	} else if (ret != 0 && !map->cache_bypass && map->format.parse_val) {
+		/* regcache_drop_region() takes lock that we already have,
+		 * thus call map->cache_ops->drop() directly
+		 */
+		if (map->cache_ops && map->cache_ops->drop)
+			map->cache_ops->drop(map, reg, reg + 1);
 	}
 
 	trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes);
@@ -2245,7 +2297,6 @@
 			    unsigned int val_len)
 {
 	struct regmap_range_node *range;
-	u8 *u8 = map->work_buf;
 	int ret;
 
 	WARN_ON(!map->bus);
@@ -2262,15 +2313,8 @@
 	}
 
 	map->format.format_reg(map->work_buf, reg, map->reg_shift);
-
-	/*
-	 * Some buses or devices flag reads by setting the high bits in the
-	 * register address; since it's always the high bits for all
-	 * current formats we can do this here rather than in
-	 * formatting.  This may break if we get interesting formats.
-	 */
-	u8[0] |= map->read_flag_mask;
-
+	regmap_set_work_buf_flag_mask(map, map->format.reg_bytes,
+				      map->read_flag_mask);
 	trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
 
 	ret = map->bus->read(map->bus_context, map->work_buf,
diff --git a/drivers/base/soc.c b/drivers/base/soc.c
index 75b98aa..b63f23e 100644
--- a/drivers/base/soc.c
+++ b/drivers/base/soc.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/sysfs.h>
-#include <linux/module.h>
 #include <linux/init.h>
 #include <linux/stat.h>
 #include <linux/slab.h>
@@ -160,11 +159,3 @@
 	return bus_register(&soc_bus_type);
 }
 core_initcall(soc_bus_register);
-
-static void __exit soc_bus_unregister(void)
-{
-	ida_destroy(&soc_ida);
-
-	bus_unregister(&soc_bus_type);
-}
-module_exit(soc_bus_unregister);
diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index 921ce18..b4f6520 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -36,12 +36,31 @@
 }
 EXPORT_SYMBOL_GPL(bcma_chipco_get_alp_clock);
 
+static bool bcma_core_cc_has_pmu_watchdog(struct bcma_drv_cc *cc)
+{
+	struct bcma_bus *bus = cc->core->bus;
+
+	if (cc->capabilities & BCMA_CC_CAP_PMU) {
+		if (bus->chipinfo.id == BCMA_CHIP_ID_BCM53573) {
+			WARN(bus->chipinfo.rev <= 1, "No watchdog available\n");
+			/* 53573B0 and 53573B1 have bugged PMU watchdog. It can
+			 * be enabled but timer can't be bumped. Use CC one
+			 * instead.
+			 */
+			return false;
+		}
+		return true;
+	} else {
+		return false;
+	}
+}
+
 static u32 bcma_chipco_watchdog_get_max_timer(struct bcma_drv_cc *cc)
 {
 	struct bcma_bus *bus = cc->core->bus;
 	u32 nb;
 
-	if (cc->capabilities & BCMA_CC_CAP_PMU) {
+	if (bcma_core_cc_has_pmu_watchdog(cc)) {
 		if (bus->chipinfo.id == BCMA_CHIP_ID_BCM4706)
 			nb = 32;
 		else if (cc->core->id.rev < 26)
@@ -95,9 +114,16 @@
 
 int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc)
 {
+	struct bcma_bus *bus = cc->core->bus;
 	struct bcm47xx_wdt wdt = {};
 	struct platform_device *pdev;
 
+	if (bus->chipinfo.id == BCMA_CHIP_ID_BCM53573 &&
+	    bus->chipinfo.rev <= 1) {
+		pr_debug("No watchdog on 53573A0 / 53573A1\n");
+		return 0;
+	}
+
 	wdt.driver_data = cc;
 	wdt.timer_set = bcma_chipco_watchdog_timer_set_wdt;
 	wdt.timer_set_ms = bcma_chipco_watchdog_timer_set_ms_wdt;
@@ -105,7 +131,7 @@
 		bcma_chipco_watchdog_get_max_timer(cc) / cc->ticks_per_ms;
 
 	pdev = platform_device_register_data(NULL, "bcm47xx-wdt",
-					     cc->core->bus->num, &wdt,
+					     bus->num, &wdt,
 					     sizeof(wdt));
 	if (IS_ERR(pdev))
 		return PTR_ERR(pdev);
@@ -217,7 +243,7 @@
 	u32 maxt;
 
 	maxt = bcma_chipco_watchdog_get_max_timer(cc);
-	if (cc->capabilities & BCMA_CC_CAP_PMU) {
+	if (bcma_core_cc_has_pmu_watchdog(cc)) {
 		if (ticks == 1)
 			ticks = 2;
 		else if (ticks > maxt)
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 1f63547..2c1798e 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -209,6 +209,8 @@
 		core->dev.of_node = node;
 
 	core->irq = bcma_of_get_irq(parent, core, 0);
+
+	of_dma_configure(&core->dev, node);
 }
 
 unsigned int bcma_core_irq(struct bcma_device *core, int num)
@@ -248,12 +250,12 @@
 		core->irq = bus->host_pci->irq;
 		break;
 	case BCMA_HOSTTYPE_SOC:
-		core->dev.dma_mask = &core->dev.coherent_dma_mask;
-		if (bus->host_pdev) {
+		if (IS_ENABLED(CONFIG_OF) && bus->host_pdev) {
 			core->dma_dev = &bus->host_pdev->dev;
 			core->dev.parent = &bus->host_pdev->dev;
 			bcma_of_fill_device(bus->host_pdev, core);
 		} else {
+			core->dev.dma_mask = &core->dev.coherent_dma_mask;
 			core->dma_dev = &core->dev;
 		}
 		break;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index b71a9c7..e3d8e4c 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3706,22 +3706,21 @@
 	if (UFDCS->rawcmd == 1)
 		UFDCS->rawcmd = 2;
 
-	if (mode & (FMODE_READ|FMODE_WRITE)) {
-		UDRS->last_checked = 0;
-		clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
-		check_disk_change(bdev);
-		if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
-			goto out;
-		if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+	if (!(mode & FMODE_NDELAY)) {
+		if (mode & (FMODE_READ|FMODE_WRITE)) {
+			UDRS->last_checked = 0;
+			clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+			check_disk_change(bdev);
+			if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
+				goto out;
+			if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+				goto out;
+		}
+		res = -EROFS;
+		if ((mode & FMODE_WRITE) &&
+		    !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
 			goto out;
 	}
-
-	res = -EROFS;
-
-	if ((mode & FMODE_WRITE) &&
-			!test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags))
-		goto out;
-
 	mutex_unlock(&open_lock);
 	mutex_unlock(&floppy_mutex);
 	return 0;
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 1a04af6..6c6519f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3950,6 +3950,7 @@
 	bool need_put = !!rbd_dev->opts;
 
 	ceph_oid_destroy(&rbd_dev->header_oid);
+	ceph_oloc_destroy(&rbd_dev->header_oloc);
 
 	rbd_put_client(rbd_dev->rbd_client);
 	rbd_spec_put(rbd_dev->spec);
@@ -5336,15 +5337,6 @@
 	}
 	spec->pool_id = (u64)rc;
 
-	/* The ceph file layout needs to fit pool id in 32 bits */
-
-	if (spec->pool_id > (u64)U32_MAX) {
-		rbd_warn(NULL, "pool id too large (%llu > %u)",
-				(unsigned long long)spec->pool_id, U32_MAX);
-		rc = -EIO;
-		goto err_out_client;
-	}
-
 	rbd_dev = rbd_dev_create(rbdc, spec, rbd_opts);
 	if (!rbd_dev) {
 		rc = -ENOMEM;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1523e05..93b1aaa 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -391,23 +391,17 @@
 		num_vqs = 1;
 
 	vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
-	if (!vblk->vqs) {
+	if (!vblk->vqs)
+		return -ENOMEM;
+
+	names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
+	callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
+	vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
+	if (!names || !callbacks || !vqs) {
 		err = -ENOMEM;
 		goto out;
 	}
 
-	names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
-	if (!names)
-		goto err_names;
-
-	callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
-	if (!callbacks)
-		goto err_callbacks;
-
-	vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
-	if (!vqs)
-		goto err_vqs;
-
 	for (i = 0; i < num_vqs; i++) {
 		callbacks[i] = virtblk_done;
 		snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
@@ -417,7 +411,7 @@
 	/* Discover virtqueues and write information to configuration.  */
 	err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
 	if (err)
-		goto err_find_vqs;
+		goto out;
 
 	for (i = 0; i < num_vqs; i++) {
 		spin_lock_init(&vblk->vqs[i].lock);
@@ -425,16 +419,12 @@
 	}
 	vblk->num_vqs = num_vqs;
 
- err_find_vqs:
+out:
 	kfree(vqs);
- err_vqs:
 	kfree(callbacks);
- err_callbacks:
 	kfree(names);
- err_names:
 	if (err)
 		kfree(vblk->vqs);
- out:
 	return err;
 }
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index be4fea6..88ef6d4 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -189,6 +189,8 @@
 	struct mutex mutex;
 	struct xenbus_device *xbdev;
 	struct gendisk *gd;
+	u16 sector_size;
+	unsigned int physical_sector_size;
 	int vdevice;
 	blkif_vdev_t handle;
 	enum blkif_state connected;
@@ -910,9 +912,45 @@
 	.map_queue = blk_mq_map_queue,
 };
 
+static void blkif_set_queue_limits(struct blkfront_info *info)
+{
+	struct request_queue *rq = info->rq;
+	struct gendisk *gd = info->gd;
+	unsigned int segments = info->max_indirect_segments ? :
+				BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
+
+	if (info->feature_discard) {
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
+		blk_queue_max_discard_sectors(rq, get_capacity(gd));
+		rq->limits.discard_granularity = info->discard_granularity;
+		rq->limits.discard_alignment = info->discard_alignment;
+		if (info->feature_secdiscard)
+			queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
+	}
+
+	/* Hard sector size and max sectors impersonate the equiv. hardware. */
+	blk_queue_logical_block_size(rq, info->sector_size);
+	blk_queue_physical_block_size(rq, info->physical_sector_size);
+	blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
+
+	/* Each segment in a request is up to an aligned page in size. */
+	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+	blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+	/* Ensure a merged request will fit in a single I/O ring slot. */
+	blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
+
+	/* Make sure buffer addresses are sector-aligned. */
+	blk_queue_dma_alignment(rq, 511);
+
+	/* Make sure we don't use bounce buffers. */
+	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
+}
+
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
-				unsigned int physical_sector_size,
-				unsigned int segments)
+				unsigned int physical_sector_size)
 {
 	struct request_queue *rq;
 	struct blkfront_info *info = gd->private_data;
@@ -944,36 +982,11 @@
 	}
 
 	rq->queuedata = info;
-	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
-
-	if (info->feature_discard) {
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq);
-		blk_queue_max_discard_sectors(rq, get_capacity(gd));
-		rq->limits.discard_granularity = info->discard_granularity;
-		rq->limits.discard_alignment = info->discard_alignment;
-		if (info->feature_secdiscard)
-			queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq);
-	}
-
-	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_logical_block_size(rq, sector_size);
-	blk_queue_physical_block_size(rq, physical_sector_size);
-	blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512);
-
-	/* Each segment in a request is up to an aligned page in size. */
-	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
-	blk_queue_max_segment_size(rq, PAGE_SIZE);
-
-	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG);
-
-	/* Make sure buffer addresses are sector-aligned. */
-	blk_queue_dma_alignment(rq, 511);
-
-	/* Make sure we don't use bounce buffers. */
-	blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
-
-	gd->queue = rq;
+	info->rq = gd->queue = rq;
+	info->gd = gd;
+	info->sector_size = sector_size;
+	info->physical_sector_size = physical_sector_size;
+	blkif_set_queue_limits(info);
 
 	return 0;
 }
@@ -1136,16 +1149,11 @@
 	gd->private_data = info;
 	set_capacity(gd, capacity);
 
-	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
-				 info->max_indirect_segments ? :
-				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) {
 		del_gendisk(gd);
 		goto release;
 	}
 
-	info->rq = gd->queue;
-	info->gd = gd;
-
 	xlvbd_flush(info);
 
 	if (vdisk_info & VDISK_READONLY)
@@ -1315,7 +1323,7 @@
 			rinfo->ring_ref[i] = GRANT_INVALID_REF;
 		}
 	}
-	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE));
+	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
 	rinfo->ring.sring = NULL;
 
 	if (rinfo->irq)
@@ -2007,8 +2015,10 @@
 	struct split_bio *split_bio;
 
 	blkfront_gather_backend_features(info);
+	/* Reset limits changed by blk_mq_update_nr_hw_queues(). */
+	blkif_set_queue_limits(info);
 	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	blk_queue_max_segments(info->rq, segs);
+	blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG);
 
 	for (r_index = 0; r_index < info->nr_rings; r_index++) {
 		struct blkfront_ring_info *rinfo = &info->rinfo[r_index];
@@ -2432,7 +2442,7 @@
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 				 info->xbdev->otherend);
-		return;
+		goto fail;
 	}
 
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
@@ -2445,6 +2455,11 @@
 	device_add_disk(&info->xbdev->dev, info->gd);
 
 	info->is_ready = 1;
+	return;
+
+fail:
+	blkif_free(info, 0);
+	return;
 }
 
 /**
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index cf50fd2..3cc9bff 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -180,6 +180,17 @@
 
 	  Say Y here to compile support for Intel AG6XX protocol.
 
+config BT_HCIUART_MRVL
+	bool "Marvell protocol support"
+	depends on BT_HCIUART
+	select BT_HCIUART_H4
+	help
+	  Marvell is serial protocol for communication between Bluetooth
+	  device and host. This protocol is required for most Marvell Bluetooth
+	  devices with UART interface.
+
+	  Say Y here to compile support for HCI MRVL protocol.
+
 config BT_HCIBCM203X
 	tristate "HCI BCM203x USB driver"
 	depends on USB
@@ -331,4 +342,16 @@
 	  Say Y here to compile support for Texas Instrument's WiLink7 driver
 	  into the kernel or say M to compile it as module (btwilink).
 
+config BT_QCOMSMD
+	tristate "Qualcomm SMD based HCI support"
+	depends on QCOM_SMD && QCOM_WCNSS_CTRL
+	select BT_QCA
+	help
+	  Qualcomm SMD based HCI driver.
+	  This driver is used to bridge HCI data onto the shared memory
+	  channels to the WCNSS core.
+
+	  Say Y here to compile support for HCI over Qualcomm SMD into the
+	  kernel or say M to compile as a module.
+
 endmenu
diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile
index 9c18939..b1fc29a 100644
--- a/drivers/bluetooth/Makefile
+++ b/drivers/bluetooth/Makefile
@@ -20,6 +20,7 @@
 obj-$(CONFIG_BT_MRVL)		+= btmrvl.o
 obj-$(CONFIG_BT_MRVL_SDIO)	+= btmrvl_sdio.o
 obj-$(CONFIG_BT_WILINK)		+= btwilink.o
+obj-$(CONFIG_BT_QCOMSMD)	+= btqcomsmd.o
 obj-$(CONFIG_BT_BCM)		+= btbcm.o
 obj-$(CONFIG_BT_RTL)		+= btrtl.o
 obj-$(CONFIG_BT_QCA)		+= btqca.o
@@ -37,6 +38,7 @@
 hci_uart-$(CONFIG_BT_HCIUART_BCM)	+= hci_bcm.o
 hci_uart-$(CONFIG_BT_HCIUART_QCA)	+= hci_qca.o
 hci_uart-$(CONFIG_BT_HCIUART_AG6XX)	+= hci_ag6xx.o
+hci_uart-$(CONFIG_BT_HCIUART_MRVL)	+= hci_mrvl.o
 hci_uart-objs				:= $(hci_uart-y)
 
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c
index 5b0ef7b..5ce6d41 100644
--- a/drivers/bluetooth/bcm203x.c
+++ b/drivers/bluetooth/bcm203x.c
@@ -185,10 +185,8 @@
 	data->state = BCM203X_LOAD_MINIDRV;
 
 	data->urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!data->urb) {
-		BT_ERR("Can't allocate URB");
+	if (!data->urb)
 		return -ENOMEM;
-	}
 
 	if (request_firmware(&firmware, "BCM2033-MD.hex", &udev->dev) < 0) {
 		BT_ERR("Mini driver request failed");
diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 4a62081..28afd5d 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -55,8 +55,8 @@
 	}
 
 	edl = (struct edl_event_hdr *)(skb->data);
-	if (!edl || !edl->data) {
-		BT_ERR("%s: TLV with no header or no data", hdev->name);
+	if (!edl) {
+		BT_ERR("%s: TLV with no header", hdev->name);
 		err = -EILSEQ;
 		goto out;
 	}
@@ -224,8 +224,8 @@
 	}
 
 	edl = (struct edl_event_hdr *)(skb->data);
-	if (!edl || !edl->data) {
-		BT_ERR("%s: TLV with no header or no data", hdev->name);
+	if (!edl) {
+		BT_ERR("%s: TLV with no header", hdev->name);
 		err = -EILSEQ;
 		goto out;
 	}
diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c
new file mode 100644
index 0000000..08c2c93
--- /dev/null
+++ b/drivers/bluetooth/btqcomsmd.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2016, Linaro Ltd.
+ * Copyright (c) 2015, Sony Mobile Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/soc/qcom/smd.h>
+#include <linux/soc/qcom/wcnss_ctrl.h>
+#include <linux/platform_device.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "btqca.h"
+
+struct btqcomsmd {
+	struct hci_dev *hdev;
+
+	struct qcom_smd_channel *acl_channel;
+	struct qcom_smd_channel *cmd_channel;
+};
+
+static int btqcomsmd_recv(struct hci_dev *hdev, unsigned int type,
+			   const void *data, size_t count)
+{
+	struct sk_buff *skb;
+
+	/* Use GFP_ATOMIC as we're in IRQ context */
+	skb = bt_skb_alloc(count, GFP_ATOMIC);
+	if (!skb) {
+		hdev->stat.err_rx++;
+		return -ENOMEM;
+	}
+
+	hci_skb_pkt_type(skb) = type;
+	memcpy(skb_put(skb, count), data, count);
+
+	return hci_recv_frame(hdev, skb);
+}
+
+static int btqcomsmd_acl_callback(struct qcom_smd_channel *channel,
+				  const void *data, size_t count)
+{
+	struct btqcomsmd *btq = qcom_smd_get_drvdata(channel);
+
+	btq->hdev->stat.byte_rx += count;
+	return btqcomsmd_recv(btq->hdev, HCI_ACLDATA_PKT, data, count);
+}
+
+static int btqcomsmd_cmd_callback(struct qcom_smd_channel *channel,
+				  const void *data, size_t count)
+{
+	struct btqcomsmd *btq = qcom_smd_get_drvdata(channel);
+
+	return btqcomsmd_recv(btq->hdev, HCI_EVENT_PKT, data, count);
+}
+
+static int btqcomsmd_send(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct btqcomsmd *btq = hci_get_drvdata(hdev);
+	int ret;
+
+	switch (hci_skb_pkt_type(skb)) {
+	case HCI_ACLDATA_PKT:
+		ret = qcom_smd_send(btq->acl_channel, skb->data, skb->len);
+		hdev->stat.acl_tx++;
+		hdev->stat.byte_tx += skb->len;
+		break;
+	case HCI_COMMAND_PKT:
+		ret = qcom_smd_send(btq->cmd_channel, skb->data, skb->len);
+		hdev->stat.cmd_tx++;
+		break;
+	default:
+		ret = -EILSEQ;
+		break;
+	}
+
+	kfree_skb(skb);
+
+	return ret;
+}
+
+static int btqcomsmd_open(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int btqcomsmd_close(struct hci_dev *hdev)
+{
+	return 0;
+}
+
+static int btqcomsmd_probe(struct platform_device *pdev)
+{
+	struct btqcomsmd *btq;
+	struct hci_dev *hdev;
+	void *wcnss;
+	int ret;
+
+	btq = devm_kzalloc(&pdev->dev, sizeof(*btq), GFP_KERNEL);
+	if (!btq)
+		return -ENOMEM;
+
+	wcnss = dev_get_drvdata(pdev->dev.parent);
+
+	btq->acl_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_ACL",
+						   btqcomsmd_acl_callback);
+	if (IS_ERR(btq->acl_channel))
+		return PTR_ERR(btq->acl_channel);
+
+	btq->cmd_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_CMD",
+						   btqcomsmd_cmd_callback);
+	if (IS_ERR(btq->cmd_channel))
+		return PTR_ERR(btq->cmd_channel);
+
+	qcom_smd_set_drvdata(btq->acl_channel, btq);
+	qcom_smd_set_drvdata(btq->cmd_channel, btq);
+
+	hdev = hci_alloc_dev();
+	if (!hdev)
+		return -ENOMEM;
+
+	hci_set_drvdata(hdev, btq);
+	btq->hdev = hdev;
+	SET_HCIDEV_DEV(hdev, &pdev->dev);
+
+	hdev->bus = HCI_SMD;
+	hdev->open = btqcomsmd_open;
+	hdev->close = btqcomsmd_close;
+	hdev->send = btqcomsmd_send;
+	hdev->set_bdaddr = qca_set_bdaddr_rome;
+
+	ret = hci_register_dev(hdev);
+	if (ret < 0) {
+		hci_free_dev(hdev);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, btq);
+
+	return 0;
+}
+
+static int btqcomsmd_remove(struct platform_device *pdev)
+{
+	struct btqcomsmd *btq = platform_get_drvdata(pdev);
+
+	hci_unregister_dev(btq->hdev);
+	hci_free_dev(btq->hdev);
+
+	return 0;
+}
+
+static const struct of_device_id btqcomsmd_of_match[] = {
+	{ .compatible = "qcom,wcnss-bt", },
+	{ },
+};
+
+static struct platform_driver btqcomsmd_driver = {
+	.probe = btqcomsmd_probe,
+	.remove = btqcomsmd_remove,
+	.driver  = {
+		.name  = "btqcomsmd",
+		.of_match_table = btqcomsmd_of_match,
+	},
+};
+
+module_platform_driver(btqcomsmd_driver);
+
+MODULE_AUTHOR("Bjorn Andersson <bjorn.andersson@sonymobile.com>");
+MODULE_DESCRIPTION("Qualcomm SMD HCI driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 8428893..fc9b257 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -33,6 +33,7 @@
 #define RTL_ROM_LMP_8723B	0x8723
 #define RTL_ROM_LMP_8821A	0x8821
 #define RTL_ROM_LMP_8761A	0x8761
+#define RTL_ROM_LMP_8822B	0x8822
 
 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version)
 {
@@ -78,11 +79,15 @@
 	const unsigned char *patch_length_base, *patch_offset_base;
 	u32 patch_offset = 0;
 	u16 patch_length, num_patches;
-	const u16 project_id_to_lmp_subver[] = {
-		RTL_ROM_LMP_8723A,
-		RTL_ROM_LMP_8723B,
-		RTL_ROM_LMP_8821A,
-		RTL_ROM_LMP_8761A
+	static const struct {
+		__u16 lmp_subver;
+		__u8 id;
+	} project_id_to_lmp_subver[] = {
+		{ RTL_ROM_LMP_8723A, 0 },
+		{ RTL_ROM_LMP_8723B, 1 },
+		{ RTL_ROM_LMP_8821A, 2 },
+		{ RTL_ROM_LMP_8761A, 3 },
+		{ RTL_ROM_LMP_8822B, 8 },
 	};
 
 	ret = rtl_read_rom_version(hdev, &rom_version);
@@ -134,14 +139,20 @@
 		return -EINVAL;
 	}
 
-	if (project_id >= ARRAY_SIZE(project_id_to_lmp_subver)) {
+	/* Find project_id in table */
+	for (i = 0; i < ARRAY_SIZE(project_id_to_lmp_subver); i++) {
+		if (project_id == project_id_to_lmp_subver[i].id)
+			break;
+	}
+
+	if (i >= ARRAY_SIZE(project_id_to_lmp_subver)) {
 		BT_ERR("%s: unknown project id %d", hdev->name, project_id);
 		return -EINVAL;
 	}
 
-	if (lmp_subver != project_id_to_lmp_subver[project_id]) {
+	if (lmp_subver != project_id_to_lmp_subver[i].lmp_subver) {
 		BT_ERR("%s: firmware is for %x but this is a %x", hdev->name,
-		       project_id_to_lmp_subver[project_id], lmp_subver);
+		       project_id_to_lmp_subver[i].lmp_subver, lmp_subver);
 		return -EINVAL;
 	}
 
@@ -257,6 +268,26 @@
 	return ret;
 }
 
+static int rtl_load_config(struct hci_dev *hdev, const char *name, u8 **buff)
+{
+	const struct firmware *fw;
+	int ret;
+
+	BT_INFO("%s: rtl: loading %s", hdev->name, name);
+	ret = request_firmware(&fw, name, &hdev->dev);
+	if (ret < 0) {
+		BT_ERR("%s: Failed to load %s", hdev->name, name);
+		return ret;
+	}
+
+	ret = fw->size;
+	*buff = kmemdup(fw->data, ret, GFP_KERNEL);
+
+	release_firmware(fw);
+
+	return ret;
+}
+
 static int btrtl_setup_rtl8723a(struct hci_dev *hdev)
 {
 	const struct firmware *fw;
@@ -296,25 +327,74 @@
 	unsigned char *fw_data = NULL;
 	const struct firmware *fw;
 	int ret;
+	int cfg_sz;
+	u8 *cfg_buff = NULL;
+	u8 *tbuff;
+	char *cfg_name = NULL;
+
+	switch (lmp_subver) {
+	case RTL_ROM_LMP_8723B:
+		cfg_name = "rtl_bt/rtl8723b_config.bin";
+		break;
+	case RTL_ROM_LMP_8821A:
+		cfg_name = "rtl_bt/rtl8821a_config.bin";
+		break;
+	case RTL_ROM_LMP_8761A:
+		cfg_name = "rtl_bt/rtl8761a_config.bin";
+		break;
+	case RTL_ROM_LMP_8822B:
+		cfg_name = "rtl_bt/rtl8822b_config.bin";
+		break;
+	default:
+		BT_ERR("%s: rtl: no config according to lmp_subver %04x",
+		       hdev->name, lmp_subver);
+		break;
+	}
+
+	if (cfg_name) {
+		cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff);
+		if (cfg_sz < 0)
+			cfg_sz = 0;
+	} else
+		cfg_sz = 0;
 
 	BT_INFO("%s: rtl: loading %s", hdev->name, fw_name);
 	ret = request_firmware(&fw, fw_name, &hdev->dev);
 	if (ret < 0) {
 		BT_ERR("%s: Failed to load %s", hdev->name, fw_name);
-		return ret;
+		goto err_req_fw;
 	}
 
 	ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data);
 	if (ret < 0)
 		goto out;
 
+	if (cfg_sz) {
+		tbuff = kzalloc(ret + cfg_sz, GFP_KERNEL);
+		if (!tbuff) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		memcpy(tbuff, fw_data, ret);
+		kfree(fw_data);
+
+		memcpy(tbuff + ret, cfg_buff, cfg_sz);
+		ret += cfg_sz;
+
+		fw_data = tbuff;
+	}
+
+	BT_INFO("cfg_sz %d, total size %d", cfg_sz, ret);
+
 	ret = rtl_download_firmware(hdev, fw_data, ret);
-	kfree(fw_data);
-	if (ret < 0)
-		goto out;
 
 out:
 	release_firmware(fw);
+	kfree(fw_data);
+err_req_fw:
+	if (cfg_sz)
+		kfree(cfg_buff);
 	return ret;
 }
 
@@ -377,6 +457,9 @@
 	case RTL_ROM_LMP_8761A:
 		return btrtl_setup_rtl8723b(hdev, lmp_subver,
 					    "rtl_bt/rtl8761a_fw.bin");
+	case RTL_ROM_LMP_8822B:
+		return btrtl_setup_rtl8723b(hdev, lmp_subver,
+					    "rtl_bt/rtl8822b_fw.bin");
 	default:
 		BT_INFO("rtl: assuming no firmware upload needed.");
 		return 0;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 811f9b9..6bd63b8 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -62,6 +62,7 @@
 #define BTUSB_REALTEK		0x20000
 #define BTUSB_BCM2045		0x40000
 #define BTUSB_IFNUM_2		0x80000
+#define BTUSB_CW6622		0x100000
 
 static const struct usb_device_id btusb_table[] = {
 	/* Generic Bluetooth USB device */
@@ -248,9 +249,11 @@
 
 	/* QCA ROME chipset */
 	{ USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME },
+	{ USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME },
 	{ USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME },
+	{ USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME },
 
 	/* Broadcom BCM2035 */
 	{ USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 },
@@ -290,7 +293,8 @@
 	{ USB_DEVICE(0x0400, 0x080a), .driver_info = BTUSB_BROKEN_ISOC },
 
 	/* CONWISE Technology based adapters with buggy SCO support */
-	{ USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC },
+	{ USB_DEVICE(0x0e5e, 0x6622),
+	  .driver_info = BTUSB_BROKEN_ISOC | BTUSB_CW6622},
 
 	/* Roper Class 1 Bluetooth Dongle (Silicon Wave based) */
 	{ USB_DEVICE(0x1310, 0x0001), .driver_info = BTUSB_SWAVE },
@@ -2221,9 +2225,8 @@
 	err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
 				  TASK_INTERRUPTIBLE,
 				  msecs_to_jiffies(5000));
-	if (err == 1) {
+	if (err == -EINTR) {
 		BT_ERR("%s: Firmware loading interrupted", hdev->name);
-		err = -EINTR;
 		goto done;
 	}
 
@@ -2275,7 +2278,7 @@
 				  TASK_INTERRUPTIBLE,
 				  msecs_to_jiffies(1000));
 
-	if (err == 1) {
+	if (err == -EINTR) {
 		BT_ERR("%s: Device boot interrupted", hdev->name);
 		return -EINTR;
 	}
@@ -2845,6 +2848,9 @@
 	hdev->send   = btusb_send_frame;
 	hdev->notify = btusb_notify;
 
+	if (id->driver_info & BTUSB_CW6622)
+		set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);
+
 	if (id->driver_info & BTUSB_BCM2045)
 		set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks);
 
diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c
index 485281b..ef51c9c 100644
--- a/drivers/bluetooth/btwilink.c
+++ b/drivers/bluetooth/btwilink.c
@@ -245,6 +245,7 @@
 {
 	struct ti_st *hst;
 	long len;
+	int pkt_type;
 
 	hst = hci_get_drvdata(hdev);
 
@@ -258,6 +259,7 @@
 	 * Freeing skb memory is taken care in shared transport layer,
 	 * so don't free skb memory here.
 	 */
+	pkt_type = hci_skb_pkt_type(skb);
 	len = hst->st_write(skb);
 	if (len < 0) {
 		kfree_skb(skb);
@@ -268,7 +270,7 @@
 
 	/* ST accepted our skb. So, Go ahead and do rest */
 	hdev->stat.byte_tx += len;
-	ti_st_tx_complete(hst, hci_skb_pkt_type(skb));
+	ti_st_tx_complete(hst, pkt_type);
 
 	return 0;
 }
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 1c97eda..5ccb90e 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -798,7 +798,7 @@
 
 static const struct hci_uart_proto bcm_proto = {
 	.id		= HCI_UART_BCM,
-	.name		= "BCM",
+	.name		= "Broadcom",
 	.manufacturer	= 15,
 	.init_speed	= 115200,
 	.oper_speed	= 4000000,
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index d7d23ce..a2c921f 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -90,7 +90,8 @@
 /* ---- BCSP CRC calculation ---- */
 
 /* Table for calculating CRC for polynomial 0x1021, LSB processed first,
-initial value 0xffff, bits shifted in reverse order. */
+ * initial value 0xffff, bits shifted in reverse order.
+ */
 
 static const u16 crc_table[] = {
 	0x0000, 0x1081, 0x2102, 0x3183,
@@ -174,7 +175,7 @@
 }
 
 static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data,
-		int len, int pkt_type)
+					int len, int pkt_type)
 {
 	struct sk_buff *nskb;
 	u8 hdr[4], chan;
@@ -213,6 +214,7 @@
 		/* Vendor specific commands */
 		if (hci_opcode_ogf(__le16_to_cpu(opcode)) == 0x3f) {
 			u8 desc = *(data + HCI_COMMAND_HDR_SIZE);
+
 			if ((desc & 0xf0) == 0xc0) {
 				data += HCI_COMMAND_HDR_SIZE + 1;
 				len  -= HCI_COMMAND_HDR_SIZE + 1;
@@ -271,8 +273,8 @@
 	/* Put CRC */
 	if (bcsp->use_crc) {
 		bcsp_txmsg_crc = bitrev16(bcsp_txmsg_crc);
-		bcsp_slip_one_byte(nskb, (u8) ((bcsp_txmsg_crc >> 8) & 0x00ff));
-		bcsp_slip_one_byte(nskb, (u8) (bcsp_txmsg_crc & 0x00ff));
+		bcsp_slip_one_byte(nskb, (u8)((bcsp_txmsg_crc >> 8) & 0x00ff));
+		bcsp_slip_one_byte(nskb, (u8)(bcsp_txmsg_crc & 0x00ff));
 	}
 
 	bcsp_slip_msgdelim(nskb);
@@ -287,7 +289,8 @@
 	struct sk_buff *skb;
 
 	/* First of all, check for unreliable messages in the queue,
-	   since they have priority */
+	 * since they have priority
+	 */
 
 	skb = skb_dequeue(&bcsp->unrel);
 	if (skb != NULL) {
@@ -414,7 +417,7 @@
 
 	/* spot "conf" pkts and reply with a "conf rsp" pkt */
 	if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 &&
-			!memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) {
+	    !memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) {
 		struct sk_buff *nskb = alloc_skb(4, GFP_ATOMIC);
 
 		BT_DBG("Found a LE conf pkt");
@@ -428,7 +431,7 @@
 	}
 	/* Spot "sync" pkts. If we find one...disaster! */
 	else if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 &&
-			!memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) {
+		 !memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) {
 		BT_ERR("Found a LE sync pkt, card has reset");
 	}
 }
@@ -446,7 +449,7 @@
 		default:
 			memcpy(skb_put(bcsp->rx_skb, 1), &byte, 1);
 			if ((bcsp->rx_skb->data[0] & 0x40) != 0 &&
-					bcsp->rx_state != BCSP_W4_CRC)
+			    bcsp->rx_state != BCSP_W4_CRC)
 				bcsp_crc_update(&bcsp->message_crc, byte);
 			bcsp->rx_count--;
 		}
@@ -457,7 +460,7 @@
 		case 0xdc:
 			memcpy(skb_put(bcsp->rx_skb, 1), &c0, 1);
 			if ((bcsp->rx_skb->data[0] & 0x40) != 0 &&
-					bcsp->rx_state != BCSP_W4_CRC)
+			    bcsp->rx_state != BCSP_W4_CRC)
 				bcsp_crc_update(&bcsp->message_crc, 0xc0);
 			bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC;
 			bcsp->rx_count--;
@@ -466,7 +469,7 @@
 		case 0xdd:
 			memcpy(skb_put(bcsp->rx_skb, 1), &db, 1);
 			if ((bcsp->rx_skb->data[0] & 0x40) != 0 &&
-					bcsp->rx_state != BCSP_W4_CRC)
+			    bcsp->rx_state != BCSP_W4_CRC)
 				bcsp_crc_update(&bcsp->message_crc, 0xdb);
 			bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC;
 			bcsp->rx_count--;
@@ -485,13 +488,28 @@
 static void bcsp_complete_rx_pkt(struct hci_uart *hu)
 {
 	struct bcsp_struct *bcsp = hu->priv;
-	int pass_up;
+	int pass_up = 0;
 
 	if (bcsp->rx_skb->data[0] & 0x80) {	/* reliable pkt */
 		BT_DBG("Received seqno %u from card", bcsp->rxseq_txack);
-		bcsp->rxseq_txack++;
-		bcsp->rxseq_txack %= 0x8;
-		bcsp->txack_req    = 1;
+
+		/* check the rx sequence number is as expected */
+		if ((bcsp->rx_skb->data[0] & 0x07) == bcsp->rxseq_txack) {
+			bcsp->rxseq_txack++;
+			bcsp->rxseq_txack %= 0x8;
+		} else {
+			/* handle re-transmitted packet or
+			 * when packet was missed
+			 */
+			BT_ERR("Out-of-order packet arrived, got %u expected %u",
+			       bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack);
+
+			/* do not process out-of-order packet payload */
+			pass_up = 2;
+		}
+
+		/* send current txack value to all received reliable packets */
+		bcsp->txack_req = 1;
 
 		/* If needed, transmit an ack pkt */
 		hci_uart_tx_wakeup(hu);
@@ -500,26 +518,33 @@
 	bcsp->rxack = (bcsp->rx_skb->data[0] >> 3) & 0x07;
 	BT_DBG("Request for pkt %u from card", bcsp->rxack);
 
+	/* handle received ACK indications,
+	 * including those from out-of-order packets
+	 */
 	bcsp_pkt_cull(bcsp);
-	if ((bcsp->rx_skb->data[1] & 0x0f) == 6 &&
-			bcsp->rx_skb->data[0] & 0x80) {
-		hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT;
-		pass_up = 1;
-	} else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 &&
-			bcsp->rx_skb->data[0] & 0x80) {
-		hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT;
-		pass_up = 1;
-	} else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) {
-		hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT;
-		pass_up = 1;
-	} else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 &&
-			!(bcsp->rx_skb->data[0] & 0x80)) {
-		bcsp_handle_le_pkt(hu);
-		pass_up = 0;
-	} else
-		pass_up = 0;
 
-	if (!pass_up) {
+	if (pass_up != 2) {
+		if ((bcsp->rx_skb->data[1] & 0x0f) == 6 &&
+		    (bcsp->rx_skb->data[0] & 0x80)) {
+			hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT;
+			pass_up = 1;
+		} else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 &&
+			   (bcsp->rx_skb->data[0] & 0x80)) {
+			hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT;
+			pass_up = 1;
+		} else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) {
+			hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT;
+			pass_up = 1;
+		} else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 &&
+			   !(bcsp->rx_skb->data[0] & 0x80)) {
+			bcsp_handle_le_pkt(hu);
+			pass_up = 0;
+		} else {
+			pass_up = 0;
+		}
+	}
+
+	if (pass_up == 0) {
 		struct hci_event_hdr hdr;
 		u8 desc = (bcsp->rx_skb->data[1] & 0x0f);
 
@@ -537,18 +562,23 @@
 				hci_recv_frame(hu->hdev, bcsp->rx_skb);
 			} else {
 				BT_ERR("Packet for unknown channel (%u %s)",
-					bcsp->rx_skb->data[1] & 0x0f,
-					bcsp->rx_skb->data[0] & 0x80 ?
-					"reliable" : "unreliable");
+				       bcsp->rx_skb->data[1] & 0x0f,
+				       bcsp->rx_skb->data[0] & 0x80 ?
+				       "reliable" : "unreliable");
 				kfree_skb(bcsp->rx_skb);
 			}
 		} else
 			kfree_skb(bcsp->rx_skb);
-	} else {
+	} else if (pass_up == 1) {
 		/* Pull out BCSP hdr */
 		skb_pull(bcsp->rx_skb, 4);
 
 		hci_recv_frame(hu->hdev, bcsp->rx_skb);
+	} else {
+		/* ignore packet payload of already ACKed re-transmitted
+		 * packets or when a packet was missed in the BCSP window
+		 */
+		kfree_skb(bcsp->rx_skb);
 	}
 
 	bcsp->rx_state = BCSP_W4_PKT_DELIMITER;
@@ -567,7 +597,7 @@
 	const unsigned char *ptr;
 
 	BT_DBG("hu %p count %d rx_state %d rx_count %ld",
-		hu, count, bcsp->rx_state, bcsp->rx_count);
+	       hu, count, bcsp->rx_state, bcsp->rx_count);
 
 	ptr = data;
 	while (count) {
@@ -586,24 +616,14 @@
 
 		switch (bcsp->rx_state) {
 		case BCSP_W4_BCSP_HDR:
-			if ((0xff & (u8) ~ (bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] +
-					bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) {
+			if ((0xff & (u8)~(bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] +
+			    bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) {
 				BT_ERR("Error in BCSP hdr checksum");
 				kfree_skb(bcsp->rx_skb);
 				bcsp->rx_state = BCSP_W4_PKT_DELIMITER;
 				bcsp->rx_count = 0;
 				continue;
 			}
-			if (bcsp->rx_skb->data[0] & 0x80	/* reliable pkt */
-						&& (bcsp->rx_skb->data[0] & 0x07) != bcsp->rxseq_txack) {
-				BT_ERR("Out-of-order packet arrived, got %u expected %u",
-					bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack);
-
-				kfree_skb(bcsp->rx_skb);
-				bcsp->rx_state = BCSP_W4_PKT_DELIMITER;
-				bcsp->rx_count = 0;
-				continue;
-			}
 			bcsp->rx_state = BCSP_W4_DATA;
 			bcsp->rx_count = (bcsp->rx_skb->data[1] >> 4) +
 					(bcsp->rx_skb->data[2] << 4);	/* May be 0 */
@@ -620,8 +640,8 @@
 		case BCSP_W4_CRC:
 			if (bitrev16(bcsp->message_crc) != bscp_get_crc(bcsp)) {
 				BT_ERR("Checksum failed: computed %04x received %04x",
-					bitrev16(bcsp->message_crc),
-					bscp_get_crc(bcsp));
+				       bitrev16(bcsp->message_crc),
+				       bscp_get_crc(bcsp));
 
 				kfree_skb(bcsp->rx_skb);
 				bcsp->rx_state = BCSP_W4_PKT_DELIMITER;
@@ -679,7 +699,7 @@
 	/* Arrange to retransmit all messages in the relq. */
 static void bcsp_timed_event(unsigned long arg)
 {
-	struct hci_uart *hu = (struct hci_uart *) arg;
+	struct hci_uart *hu = (struct hci_uart *)arg;
 	struct bcsp_struct *bcsp = hu->priv;
 	struct sk_buff *skb;
 	unsigned long flags;
@@ -715,7 +735,7 @@
 
 	init_timer(&bcsp->tbcsp);
 	bcsp->tbcsp.function = bcsp_timed_event;
-	bcsp->tbcsp.data     = (u_long) hu;
+	bcsp->tbcsp.data     = (u_long)hu;
 
 	bcsp->rx_state = BCSP_W4_PKT_DELIMITER;
 
diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c
index ed0a420..9e27128 100644
--- a/drivers/bluetooth/hci_intel.c
+++ b/drivers/bluetooth/hci_intel.c
@@ -128,7 +128,7 @@
 				  TASK_INTERRUPTIBLE,
 				  msecs_to_jiffies(1000));
 
-	if (err == 1) {
+	if (err == -EINTR) {
 		bt_dev_err(hu->hdev, "Device boot interrupted");
 		return -EINTR;
 	}
@@ -151,7 +151,7 @@
 				  TASK_INTERRUPTIBLE,
 				  msecs_to_jiffies(1000));
 
-	if (err == 1) {
+	if (err == -EINTR) {
 		bt_dev_err(hu->hdev, "LPM transaction interrupted");
 		return -EINTR;
 	}
@@ -813,7 +813,7 @@
 	err = wait_on_bit_timeout(&intel->flags, STATE_DOWNLOADING,
 				  TASK_INTERRUPTIBLE,
 				  msecs_to_jiffies(5000));
-	if (err == 1) {
+	if (err == -EINTR) {
 		bt_dev_err(hdev, "Firmware loading interrupted");
 		err = -EINTR;
 		goto done;
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index dda9739..9497c46 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -697,34 +697,36 @@
 	case HCIUARTSETPROTO:
 		if (!test_and_set_bit(HCI_UART_PROTO_SET, &hu->flags)) {
 			err = hci_uart_set_proto(hu, arg);
-			if (err) {
+			if (err)
 				clear_bit(HCI_UART_PROTO_SET, &hu->flags);
-				return err;
-			}
 		} else
-			return -EBUSY;
+			err = -EBUSY;
 		break;
 
 	case HCIUARTGETPROTO:
 		if (test_bit(HCI_UART_PROTO_SET, &hu->flags))
-			return hu->proto->id;
-		return -EUNATCH;
+			err = hu->proto->id;
+		else
+			err = -EUNATCH;
+		break;
 
 	case HCIUARTGETDEVICE:
 		if (test_bit(HCI_UART_REGISTERED, &hu->flags))
-			return hu->hdev->id;
-		return -EUNATCH;
+			err = hu->hdev->id;
+		else
+			err = -EUNATCH;
+		break;
 
 	case HCIUARTSETFLAGS:
 		if (test_bit(HCI_UART_PROTO_SET, &hu->flags))
-			return -EBUSY;
-		err = hci_uart_set_flags(hu, arg);
-		if (err)
-			return err;
+			err = -EBUSY;
+		else
+			err = hci_uart_set_flags(hu, arg);
 		break;
 
 	case HCIUARTGETFLAGS:
-		return hu->hdev_flags;
+		err = hu->hdev_flags;
+		break;
 
 	default:
 		err = n_tty_ioctl_helper(tty, file, cmd, arg);
@@ -810,6 +812,9 @@
 #ifdef CONFIG_BT_HCIUART_AG6XX
 	ag6xx_init();
 #endif
+#ifdef CONFIG_BT_HCIUART_MRVL
+	mrvl_init();
+#endif
 
 	return 0;
 }
@@ -845,6 +850,9 @@
 #ifdef CONFIG_BT_HCIUART_AG6XX
 	ag6xx_deinit();
 #endif
+#ifdef CONFIG_BT_HCIUART_MRVL
+	mrvl_deinit();
+#endif
 
 	/* Release tty registration of line discipline */
 	err = tty_unregister_ldisc(N_HCI);
diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c
new file mode 100644
index 0000000..bbc4b39
--- /dev/null
+++ b/drivers/bluetooth/hci_mrvl.c
@@ -0,0 +1,387 @@
+/*
+ *
+ *  Bluetooth HCI UART driver for marvell devices
+ *
+ *  Copyright (C) 2016  Marvell International Ltd.
+ *  Copyright (C) 2016  Intel Corporation
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/tty.h>
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "hci_uart.h"
+
+#define HCI_FW_REQ_PKT 0xA5
+#define HCI_CHIP_VER_PKT 0xAA
+
+#define MRVL_ACK 0x5A
+#define MRVL_NAK 0xBF
+#define MRVL_RAW_DATA 0x1F
+
+enum {
+	STATE_CHIP_VER_PENDING,
+	STATE_FW_REQ_PENDING,
+};
+
+struct mrvl_data {
+	struct sk_buff *rx_skb;
+	struct sk_buff_head txq;
+	struct sk_buff_head rawq;
+	unsigned long flags;
+	unsigned int tx_len;
+	u8 id, rev;
+};
+
+struct hci_mrvl_pkt {
+	__le16 lhs;
+	__le16 rhs;
+} __packed;
+#define HCI_MRVL_PKT_SIZE 4
+
+static int mrvl_open(struct hci_uart *hu)
+{
+	struct mrvl_data *mrvl;
+
+	BT_DBG("hu %p", hu);
+
+	mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL);
+	if (!mrvl)
+		return -ENOMEM;
+
+	skb_queue_head_init(&mrvl->txq);
+	skb_queue_head_init(&mrvl->rawq);
+
+	set_bit(STATE_CHIP_VER_PENDING, &mrvl->flags);
+
+	hu->priv = mrvl;
+	return 0;
+}
+
+static int mrvl_close(struct hci_uart *hu)
+{
+	struct mrvl_data *mrvl = hu->priv;
+
+	BT_DBG("hu %p", hu);
+
+	skb_queue_purge(&mrvl->txq);
+	skb_queue_purge(&mrvl->rawq);
+	kfree_skb(mrvl->rx_skb);
+	kfree(mrvl);
+
+	hu->priv = NULL;
+	return 0;
+}
+
+static int mrvl_flush(struct hci_uart *hu)
+{
+	struct mrvl_data *mrvl = hu->priv;
+
+	BT_DBG("hu %p", hu);
+
+	skb_queue_purge(&mrvl->txq);
+	skb_queue_purge(&mrvl->rawq);
+
+	return 0;
+}
+
+static struct sk_buff *mrvl_dequeue(struct hci_uart *hu)
+{
+	struct mrvl_data *mrvl = hu->priv;
+	struct sk_buff *skb;
+
+	skb = skb_dequeue(&mrvl->txq);
+	if (!skb) {
+		/* Any raw data ? */
+		skb = skb_dequeue(&mrvl->rawq);
+	} else {
+		/* Prepend skb with frame type */
+		memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1);
+	}
+
+	return skb;
+}
+
+static int mrvl_enqueue(struct hci_uart *hu, struct sk_buff *skb)
+{
+	struct mrvl_data *mrvl = hu->priv;
+
+	skb_queue_tail(&mrvl->txq, skb);
+	return 0;
+}
+
+static void mrvl_send_ack(struct hci_uart *hu, unsigned char type)
+{
+	struct mrvl_data *mrvl = hu->priv;
+	struct sk_buff *skb;
+
+	/* No H4 payload, only 1 byte header */
+	skb = bt_skb_alloc(0, GFP_ATOMIC);
+	if (!skb) {
+		bt_dev_err(hu->hdev, "Unable to alloc ack/nak packet");
+		return;
+	}
+	hci_skb_pkt_type(skb) = type;
+
+	skb_queue_tail(&mrvl->txq, skb);
+	hci_uart_tx_wakeup(hu);
+}
+
+static int mrvl_recv_fw_req(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_mrvl_pkt *pkt = (void *)skb->data;
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct mrvl_data *mrvl = hu->priv;
+	int ret = 0;
+
+	if ((pkt->lhs ^ pkt->rhs) != 0xffff) {
+		bt_dev_err(hdev, "Corrupted mrvl header");
+		mrvl_send_ack(hu, MRVL_NAK);
+		ret = -EINVAL;
+		goto done;
+	}
+	mrvl_send_ack(hu, MRVL_ACK);
+
+	if (!test_bit(STATE_FW_REQ_PENDING, &mrvl->flags)) {
+		bt_dev_err(hdev, "Received unexpected firmware request");
+		ret = -EINVAL;
+		goto done;
+	}
+
+	mrvl->tx_len = le16_to_cpu(pkt->lhs);
+
+	clear_bit(STATE_FW_REQ_PENDING, &mrvl->flags);
+	smp_mb__after_atomic();
+	wake_up_bit(&mrvl->flags, STATE_FW_REQ_PENDING);
+
+done:
+	kfree_skb(skb);
+	return ret;
+}
+
+static int mrvl_recv_chip_ver(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct hci_mrvl_pkt *pkt = (void *)skb->data;
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct mrvl_data *mrvl = hu->priv;
+	u16 version = le16_to_cpu(pkt->lhs);
+	int ret = 0;
+
+	if ((pkt->lhs ^ pkt->rhs) != 0xffff) {
+		bt_dev_err(hdev, "Corrupted mrvl header");
+		mrvl_send_ack(hu, MRVL_NAK);
+		ret = -EINVAL;
+		goto done;
+	}
+	mrvl_send_ack(hu, MRVL_ACK);
+
+	if (!test_bit(STATE_CHIP_VER_PENDING, &mrvl->flags)) {
+		bt_dev_err(hdev, "Received unexpected chip version");
+		goto done;
+	}
+
+	mrvl->id = version;
+	mrvl->rev = version >> 8;
+
+	bt_dev_info(hdev, "Controller id = %x, rev = %x", mrvl->id, mrvl->rev);
+
+	clear_bit(STATE_CHIP_VER_PENDING, &mrvl->flags);
+	smp_mb__after_atomic();
+	wake_up_bit(&mrvl->flags, STATE_CHIP_VER_PENDING);
+
+done:
+	kfree_skb(skb);
+	return ret;
+}
+
+#define HCI_RECV_CHIP_VER \
+	.type = HCI_CHIP_VER_PKT, \
+	.hlen = HCI_MRVL_PKT_SIZE, \
+	.loff = 0, \
+	.lsize = 0, \
+	.maxlen = HCI_MRVL_PKT_SIZE
+
+#define HCI_RECV_FW_REQ \
+	.type = HCI_FW_REQ_PKT, \
+	.hlen = HCI_MRVL_PKT_SIZE, \
+	.loff = 0, \
+	.lsize = 0, \
+	.maxlen = HCI_MRVL_PKT_SIZE
+
+static const struct h4_recv_pkt mrvl_recv_pkts[] = {
+	{ H4_RECV_ACL,       .recv = hci_recv_frame     },
+	{ H4_RECV_SCO,       .recv = hci_recv_frame     },
+	{ H4_RECV_EVENT,     .recv = hci_recv_frame     },
+	{ HCI_RECV_FW_REQ,   .recv = mrvl_recv_fw_req   },
+	{ HCI_RECV_CHIP_VER, .recv = mrvl_recv_chip_ver },
+};
+
+static int mrvl_recv(struct hci_uart *hu, const void *data, int count)
+{
+	struct mrvl_data *mrvl = hu->priv;
+
+	if (!test_bit(HCI_UART_REGISTERED, &hu->flags))
+		return -EUNATCH;
+
+	mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count,
+				    mrvl_recv_pkts,
+				    ARRAY_SIZE(mrvl_recv_pkts));
+	if (IS_ERR(mrvl->rx_skb)) {
+		int err = PTR_ERR(mrvl->rx_skb);
+		bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err);
+		mrvl->rx_skb = NULL;
+		return err;
+	}
+
+	return count;
+}
+
+static int mrvl_load_firmware(struct hci_dev *hdev, const char *name)
+{
+	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct mrvl_data *mrvl = hu->priv;
+	const struct firmware *fw = NULL;
+	const u8 *fw_ptr, *fw_max;
+	int err;
+
+	err = request_firmware(&fw, name, &hdev->dev);
+	if (err < 0) {
+		bt_dev_err(hdev, "Failed to load firmware file %s", name);
+		return err;
+	}
+
+	fw_ptr = fw->data;
+	fw_max = fw->data + fw->size;
+
+	bt_dev_info(hdev, "Loading %s", name);
+
+	set_bit(STATE_FW_REQ_PENDING, &mrvl->flags);
+
+	while (fw_ptr <= fw_max) {
+		struct sk_buff *skb;
+
+		/* Controller drives the firmware load by sending firmware
+		 * request packets containing the expected fragment size.
+		 */
+		err = wait_on_bit_timeout(&mrvl->flags, STATE_FW_REQ_PENDING,
+					  TASK_INTERRUPTIBLE,
+					  msecs_to_jiffies(2000));
+		if (err == 1) {
+			bt_dev_err(hdev, "Firmware load interrupted");
+			err = -EINTR;
+			break;
+		} else if (err) {
+			bt_dev_err(hdev, "Firmware request timeout");
+			err = -ETIMEDOUT;
+			break;
+		}
+
+		bt_dev_dbg(hdev, "Firmware request, expecting %d bytes",
+			   mrvl->tx_len);
+
+		if (fw_ptr == fw_max) {
+			/* Controller requests a null size once firmware is
+			 * fully loaded. If controller expects more data, there
+			 * is an issue.
+			 */
+			if (!mrvl->tx_len) {
+				bt_dev_info(hdev, "Firmware loading complete");
+			} else {
+				bt_dev_err(hdev, "Firmware loading failure");
+				err = -EINVAL;
+			}
+			break;
+		}
+
+		if (fw_ptr + mrvl->tx_len > fw_max) {
+			mrvl->tx_len = fw_max - fw_ptr;
+			bt_dev_dbg(hdev, "Adjusting tx_len to %d",
+				   mrvl->tx_len);
+		}
+
+		skb = bt_skb_alloc(mrvl->tx_len, GFP_KERNEL);
+		if (!skb) {
+			bt_dev_err(hdev, "Failed to alloc mem for FW packet");
+			err = -ENOMEM;
+			break;
+		}
+		bt_cb(skb)->pkt_type = MRVL_RAW_DATA;
+
+		memcpy(skb_put(skb, mrvl->tx_len), fw_ptr, mrvl->tx_len);
+		fw_ptr += mrvl->tx_len;
+
+		set_bit(STATE_FW_REQ_PENDING, &mrvl->flags);
+
+		skb_queue_tail(&mrvl->rawq, skb);
+		hci_uart_tx_wakeup(hu);
+	}
+
+	release_firmware(fw);
+	return err;
+}
+
+static int mrvl_setup(struct hci_uart *hu)
+{
+	int err;
+
+	hci_uart_set_flow_control(hu, true);
+
+	err = mrvl_load_firmware(hu->hdev, "mrvl/helper_uart_3000000.bin");
+	if (err) {
+		bt_dev_err(hu->hdev, "Unable to download firmware helper");
+		return -EINVAL;
+	}
+
+	hci_uart_set_baudrate(hu, 3000000);
+	hci_uart_set_flow_control(hu, false);
+
+	err = mrvl_load_firmware(hu->hdev, "mrvl/uart8897_bt.bin");
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static const struct hci_uart_proto mrvl_proto = {
+	.id		= HCI_UART_MRVL,
+	.name		= "Marvell",
+	.init_speed	= 115200,
+	.open		= mrvl_open,
+	.close		= mrvl_close,
+	.flush		= mrvl_flush,
+	.setup		= mrvl_setup,
+	.recv		= mrvl_recv,
+	.enqueue	= mrvl_enqueue,
+	.dequeue	= mrvl_dequeue,
+};
+
+int __init mrvl_init(void)
+{
+	return hci_uart_register_proto(&mrvl_proto);
+}
+
+int __exit mrvl_deinit(void)
+{
+	return hci_uart_unregister_proto(&mrvl_proto);
+}
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 683c2b6..6c867fb 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -397,7 +397,7 @@
 	skb_queue_head_init(&qca->txq);
 	skb_queue_head_init(&qca->tx_wait_q);
 	spin_lock_init(&qca->hci_ibs_lock);
-	qca->workqueue = create_singlethread_workqueue("qca_wq");
+	qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
 	if (!qca->workqueue) {
 		BT_ERR("QCA Workqueue not initialized properly");
 		kfree(qca);
diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h
index 839bad1..0701395 100644
--- a/drivers/bluetooth/hci_uart.h
+++ b/drivers/bluetooth/hci_uart.h
@@ -35,7 +35,7 @@
 #define HCIUARTGETFLAGS		_IOR('U', 204, int)
 
 /* UART protocols */
-#define HCI_UART_MAX_PROTO	10
+#define HCI_UART_MAX_PROTO	12
 
 #define HCI_UART_H4	0
 #define HCI_UART_BCSP	1
@@ -47,6 +47,8 @@
 #define HCI_UART_BCM	7
 #define HCI_UART_QCA	8
 #define HCI_UART_AG6XX	9
+#define HCI_UART_NOKIA	10
+#define HCI_UART_MRVL	11
 
 #define HCI_UART_RAW_DEVICE	0
 #define HCI_UART_RESET_ON_INIT	1
@@ -189,3 +191,8 @@
 int ag6xx_init(void);
 int ag6xx_deinit(void);
 #endif
+
+#ifdef CONFIG_BT_HCIUART_MRVL
+int mrvl_init(void);
+int mrvl_deinit(void);
+#endif
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 3ff229b..c4a75a1 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -377,21 +377,7 @@
 	.fops	= &vhci_fops,
 	.minor	= VHCI_MINOR,
 };
-
-static int __init vhci_init(void)
-{
-	BT_INFO("Virtual HCI driver ver %s", VERSION);
-
-	return misc_register(&vhci_miscdev);
-}
-
-static void __exit vhci_exit(void)
-{
-	misc_deregister(&vhci_miscdev);
-}
-
-module_init(vhci_init);
-module_exit(vhci_exit);
+module_misc_device(vhci_miscdev);
 
 module_param(amp, bool, 0644);
 MODULE_PARM_DESC(amp, "Create AMP controller device");
diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c
index 5755907f..8900823 100644
--- a/drivers/bus/arm-cci.c
+++ b/drivers/bus/arm-cci.c
@@ -144,15 +144,12 @@
 	int num_cntrs;
 	atomic_t active_events;
 	struct mutex reserve_mutex;
-	struct list_head entry;
+	struct hlist_node node;
 	cpumask_t cpus;
 };
 
 #define to_cci_pmu(c)	(container_of(c, struct cci_pmu, pmu))
 
-static DEFINE_MUTEX(cci_pmu_mutex);
-static LIST_HEAD(cci_pmu_list);
-
 enum cci_models {
 #ifdef CONFIG_ARM_CCI400_PMU
 	CCI400_R0,
@@ -551,7 +548,7 @@
 	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_wrq, 0xB),
 	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_cd_hs, 0xC),
 	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_rq_stall_addr_hazard, 0xD),
-	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snopp_rq_stall_tt_full, 0xE),
+	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_stall_tt_full, 0xE),
 	CCI5xx_GLOBAL_EVENT_EXT_ATTR_ENTRY(cci_snoop_rq_tzmp1_prot, 0xF),
 	NULL
 };
@@ -1506,25 +1503,21 @@
 	return perf_pmu_register(&cci_pmu->pmu, name, -1);
 }
 
-static int cci_pmu_offline_cpu(unsigned int cpu)
+static int cci_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 {
-	struct cci_pmu *cci_pmu;
+	struct cci_pmu *cci_pmu = hlist_entry_safe(node, struct cci_pmu, node);
 	unsigned int target;
 
-	mutex_lock(&cci_pmu_mutex);
-	list_for_each_entry(cci_pmu, &cci_pmu_list, entry) {
-		if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
-			continue;
-		target = cpumask_any_but(cpu_online_mask, cpu);
-		if (target >= nr_cpu_ids)
-			continue;
-		/*
-		 * TODO: migrate context once core races on event->ctx have
-		 * been fixed.
-		 */
-		cpumask_set_cpu(target, &cci_pmu->cpus);
-	}
-	mutex_unlock(&cci_pmu_mutex);
+	if (!cpumask_test_and_clear_cpu(cpu, &cci_pmu->cpus))
+		return 0;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+	/*
+	 * TODO: migrate context once core races on event->ctx have
+	 * been fixed.
+	 */
+	cpumask_set_cpu(target, &cci_pmu->cpus);
 	return 0;
 }
 
@@ -1768,10 +1761,8 @@
 	if (ret)
 		return ret;
 
-	mutex_lock(&cci_pmu_mutex);
-	list_add(&cci_pmu->entry, &cci_pmu_list);
-	mutex_unlock(&cci_pmu_mutex);
-
+	cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
+					 &cci_pmu->node);
 	pr_info("ARM %s PMU driver probed", cci_pmu->model->name);
 	return 0;
 }
@@ -1804,9 +1795,9 @@
 {
 	int ret;
 
-	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCI_ONLINE,
-					"AP_PERF_ARM_CCI_ONLINE", NULL,
-					cci_pmu_offline_cpu);
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCI_ONLINE,
+				      "AP_PERF_ARM_CCI_ONLINE", NULL,
+				      cci_pmu_offline_cpu);
 	if (ret)
 		return ret;
 
diff --git a/drivers/bus/arm-ccn.c b/drivers/bus/arm-ccn.c
index 97a9185..d1074d9 100644
--- a/drivers/bus/arm-ccn.c
+++ b/drivers/bus/arm-ccn.c
@@ -167,7 +167,7 @@
 	struct hrtimer hrtimer;
 
 	cpumask_t cpu;
-	struct list_head entry;
+	struct hlist_node node;
 
 	struct pmu pmu;
 };
@@ -187,11 +187,9 @@
 	struct arm_ccn_component *xp;
 
 	struct arm_ccn_dt dt;
+	int mn_id;
 };
 
-static DEFINE_MUTEX(arm_ccn_mutex);
-static LIST_HEAD(arm_ccn_list);
-
 static int arm_ccn_node_to_xp(int node)
 {
 	return node / CCN_NUM_XP_PORTS;
@@ -212,6 +210,7 @@
 #define CCN_CONFIG_TYPE(_config)	(((_config) >> 8) & 0xff)
 #define CCN_CONFIG_EVENT(_config)	(((_config) >> 16) & 0xff)
 #define CCN_CONFIG_PORT(_config)	(((_config) >> 24) & 0x3)
+#define CCN_CONFIG_BUS(_config)		(((_config) >> 24) & 0x3)
 #define CCN_CONFIG_VC(_config)		(((_config) >> 26) & 0x7)
 #define CCN_CONFIG_DIR(_config)		(((_config) >> 29) & 0x1)
 #define CCN_CONFIG_MASK(_config)	(((_config) >> 30) & 0xf)
@@ -241,6 +240,7 @@
 static CCN_FORMAT_ATTR(type, "config:8-15");
 static CCN_FORMAT_ATTR(event, "config:16-23");
 static CCN_FORMAT_ATTR(port, "config:24-25");
+static CCN_FORMAT_ATTR(bus, "config:24-25");
 static CCN_FORMAT_ATTR(vc, "config:26-28");
 static CCN_FORMAT_ATTR(dir, "config:29-29");
 static CCN_FORMAT_ATTR(mask, "config:30-33");
@@ -253,6 +253,7 @@
 	&arm_ccn_pmu_format_attr_type.attr.attr,
 	&arm_ccn_pmu_format_attr_event.attr.attr,
 	&arm_ccn_pmu_format_attr_port.attr.attr,
+	&arm_ccn_pmu_format_attr_bus.attr.attr,
 	&arm_ccn_pmu_format_attr_vc.attr.attr,
 	&arm_ccn_pmu_format_attr_dir.attr.attr,
 	&arm_ccn_pmu_format_attr_mask.attr.attr,
@@ -328,6 +329,7 @@
 static ssize_t arm_ccn_pmu_event_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
+	struct arm_ccn *ccn = pmu_to_arm_ccn(dev_get_drvdata(dev));
 	struct arm_ccn_pmu_event *event = container_of(attr,
 			struct arm_ccn_pmu_event, attr);
 	ssize_t res;
@@ -349,10 +351,17 @@
 		break;
 	case CCN_TYPE_XP:
 		res += snprintf(buf + res, PAGE_SIZE - res,
-				",xp=?,port=?,vc=?,dir=?");
+				",xp=?,vc=?");
 		if (event->event == CCN_EVENT_WATCHPOINT)
 			res += snprintf(buf + res, PAGE_SIZE - res,
-					",cmp_l=?,cmp_h=?,mask=?");
+					",port=?,dir=?,cmp_l=?,cmp_h=?,mask=?");
+		else
+			res += snprintf(buf + res, PAGE_SIZE - res,
+					",bus=?");
+
+		break;
+	case CCN_TYPE_MN:
+		res += snprintf(buf + res, PAGE_SIZE - res, ",node=%d", ccn->mn_id);
 		break;
 	default:
 		res += snprintf(buf + res, PAGE_SIZE - res, ",node=?");
@@ -383,9 +392,9 @@
 }
 
 static struct arm_ccn_pmu_event arm_ccn_pmu_events[] = {
-	CCN_EVENT_MN(eobarrier, "dir=0,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
-	CCN_EVENT_MN(ecbarrier, "dir=0,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
-	CCN_EVENT_MN(dvmop, "dir=0,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_MN(eobarrier, "dir=1,vc=0,cmp_h=0x1c00", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_MN(ecbarrier, "dir=1,vc=0,cmp_h=0x1e00", CCN_IDX_MASK_OPCODE),
+	CCN_EVENT_MN(dvmop, "dir=1,vc=0,cmp_h=0x2800", CCN_IDX_MASK_OPCODE),
 	CCN_EVENT_HNI(txdatflits, "dir=1,vc=3", CCN_IDX_MASK_ANY),
 	CCN_EVENT_HNI(rxdatflits, "dir=0,vc=3", CCN_IDX_MASK_ANY),
 	CCN_EVENT_HNI(txreqflits, "dir=1,vc=0", CCN_IDX_MASK_ANY),
@@ -733,9 +742,10 @@
 
 	if (has_branch_stack(event) || event->attr.exclude_user ||
 			event->attr.exclude_kernel || event->attr.exclude_hv ||
-			event->attr.exclude_idle) {
+			event->attr.exclude_idle || event->attr.exclude_host ||
+			event->attr.exclude_guest) {
 		dev_warn(ccn->dev, "Can't exclude execution levels!\n");
-		return -EOPNOTSUPP;
+		return -EINVAL;
 	}
 
 	if (event->cpu < 0) {
@@ -759,6 +769,12 @@
 
 	/* Validate node/xp vs topology */
 	switch (type) {
+	case CCN_TYPE_MN:
+		if (node_xp != ccn->mn_id) {
+			dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp);
+			return -EINVAL;
+		}
+		break;
 	case CCN_TYPE_XP:
 		if (node_xp >= ccn->num_xps) {
 			dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp);
@@ -886,6 +902,10 @@
 	struct arm_ccn_component *xp;
 	u32 val, dt_cfg;
 
+	/* Nothing to do for cycle counter */
+	if (hw->idx == CCN_IDX_PMU_CYCLE_COUNTER)
+		return;
+
 	if (CCN_CONFIG_TYPE(event->attr.config) == CCN_TYPE_XP)
 		xp = &ccn->xp[CCN_CONFIG_XP(event->attr.config)];
 	else
@@ -917,38 +937,17 @@
 			arm_ccn_pmu_read_counter(ccn, hw->idx));
 	hw->state = 0;
 
-	/*
-	 * Pin the timer, so that the overflows are handled by the chosen
-	 * event->cpu (this is the same one as presented in "cpumask"
-	 * attribute).
-	 */
-	if (!ccn->irq)
-		hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
-				HRTIMER_MODE_REL_PINNED);
-
 	/* Set the DT bus input, engaging the counter */
 	arm_ccn_pmu_xp_dt_config(event, 1);
 }
 
 static void arm_ccn_pmu_event_stop(struct perf_event *event, int flags)
 {
-	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
 	struct hw_perf_event *hw = &event->hw;
-	u64 timeout;
 
 	/* Disable counting, setting the DT bus to pass-through mode */
 	arm_ccn_pmu_xp_dt_config(event, 0);
 
-	if (!ccn->irq)
-		hrtimer_cancel(&ccn->dt.hrtimer);
-
-	/* Let the DT bus drain */
-	timeout = arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) +
-			ccn->num_xps;
-	while (arm_ccn_pmu_read_counter(ccn, CCN_IDX_PMU_CYCLE_COUNTER) <
-			timeout)
-		cpu_relax();
-
 	if (flags & PERF_EF_UPDATE)
 		arm_ccn_pmu_event_update(event);
 
@@ -988,7 +987,7 @@
 
 	/* Comparison values */
 	writel(cmp_l & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_L(wp));
-	writel((cmp_l >> 32) & 0xefffffff,
+	writel((cmp_l >> 32) & 0x7fffffff,
 			source->base + CCN_XP_DT_CMP_VAL_L(wp) + 4);
 	writel(cmp_h & 0xffffffff, source->base + CCN_XP_DT_CMP_VAL_H(wp));
 	writel((cmp_h >> 32) & 0x0fffffff,
@@ -996,7 +995,7 @@
 
 	/* Mask */
 	writel(mask_l & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_L(wp));
-	writel((mask_l >> 32) & 0xefffffff,
+	writel((mask_l >> 32) & 0x7fffffff,
 			source->base + CCN_XP_DT_CMP_MASK_L(wp) + 4);
 	writel(mask_h & 0xffffffff, source->base + CCN_XP_DT_CMP_MASK_H(wp));
 	writel((mask_h >> 32) & 0x0fffffff,
@@ -1014,7 +1013,7 @@
 	hw->event_base = CCN_XP_DT_CONFIG__DT_CFG__XP_PMU_EVENT(hw->config_base);
 
 	id = (CCN_CONFIG_VC(event->attr.config) << 4) |
-			(CCN_CONFIG_PORT(event->attr.config) << 3) |
+			(CCN_CONFIG_BUS(event->attr.config) << 3) |
 			(CCN_CONFIG_EVENT(event->attr.config) << 0);
 
 	val = readl(source->base + CCN_XP_PMU_EVENT_SEL);
@@ -1099,15 +1098,31 @@
 	spin_unlock(&ccn->dt.config_lock);
 }
 
+static int arm_ccn_pmu_active_counters(struct arm_ccn *ccn)
+{
+	return bitmap_weight(ccn->dt.pmu_counters_mask,
+			     CCN_NUM_PMU_EVENT_COUNTERS + 1);
+}
+
 static int arm_ccn_pmu_event_add(struct perf_event *event, int flags)
 {
 	int err;
 	struct hw_perf_event *hw = &event->hw;
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
 
 	err = arm_ccn_pmu_event_alloc(event);
 	if (err)
 		return err;
 
+	/*
+	 * Pin the timer, so that the overflows are handled by the chosen
+	 * event->cpu (this is the same one as presented in "cpumask"
+	 * attribute).
+	 */
+	if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 1)
+		hrtimer_start(&ccn->dt.hrtimer, arm_ccn_pmu_timer_period(),
+			      HRTIMER_MODE_REL_PINNED);
+
 	arm_ccn_pmu_event_config(event);
 
 	hw->state = PERF_HES_STOPPED;
@@ -1120,9 +1135,14 @@
 
 static void arm_ccn_pmu_event_del(struct perf_event *event, int flags)
 {
+	struct arm_ccn *ccn = pmu_to_arm_ccn(event->pmu);
+
 	arm_ccn_pmu_event_stop(event, PERF_EF_UPDATE);
 
 	arm_ccn_pmu_event_release(event);
+
+	if (!ccn->irq && arm_ccn_pmu_active_counters(ccn) == 0)
+		hrtimer_cancel(&ccn->dt.hrtimer);
 }
 
 static void arm_ccn_pmu_event_read(struct perf_event *event)
@@ -1130,6 +1150,24 @@
 	arm_ccn_pmu_event_update(event);
 }
 
+static void arm_ccn_pmu_enable(struct pmu *pmu)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
+
+	u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
+	val |= CCN_DT_PMCR__PMU_EN;
+	writel(val, ccn->dt.base + CCN_DT_PMCR);
+}
+
+static void arm_ccn_pmu_disable(struct pmu *pmu)
+{
+	struct arm_ccn *ccn = pmu_to_arm_ccn(pmu);
+
+	u32 val = readl(ccn->dt.base + CCN_DT_PMCR);
+	val &= ~CCN_DT_PMCR__PMU_EN;
+	writel(val, ccn->dt.base + CCN_DT_PMCR);
+}
+
 static irqreturn_t arm_ccn_pmu_overflow_handler(struct arm_ccn_dt *dt)
 {
 	u32 pmovsr = readl(dt->base + CCN_DT_PMOVSR);
@@ -1173,30 +1211,24 @@
 }
 
 
-static int arm_ccn_pmu_offline_cpu(unsigned int cpu)
+static int arm_ccn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 {
-	struct arm_ccn_dt *dt;
+	struct arm_ccn_dt *dt = hlist_entry_safe(node, struct arm_ccn_dt, node);
+	struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
 	unsigned int target;
 
-	mutex_lock(&arm_ccn_mutex);
-	list_for_each_entry(dt, &arm_ccn_list, entry) {
-		struct arm_ccn *ccn = container_of(dt, struct arm_ccn, dt);
-
-		if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu))
-			continue;
-		target = cpumask_any_but(cpu_online_mask, cpu);
-		if (target >= nr_cpu_ids)
-			continue;
-		perf_pmu_migrate_context(&dt->pmu, cpu, target);
-		cpumask_set_cpu(target, &dt->cpu);
-		if (ccn->irq)
-			WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0);
-	}
-	mutex_unlock(&arm_ccn_mutex);
+	if (!cpumask_test_and_clear_cpu(cpu, &dt->cpu))
+		return 0;
+	target = cpumask_any_but(cpu_online_mask, cpu);
+	if (target >= nr_cpu_ids)
+		return 0;
+	perf_pmu_migrate_context(&dt->pmu, cpu, target);
+	cpumask_set_cpu(target, &dt->cpu);
+	if (ccn->irq)
+		WARN_ON(irq_set_affinity_hint(ccn->irq, &dt->cpu) != 0);
 	return 0;
 }
 
-
 static DEFINE_IDA(arm_ccn_pmu_ida);
 
 static int arm_ccn_pmu_init(struct arm_ccn *ccn)
@@ -1252,6 +1284,8 @@
 		.start = arm_ccn_pmu_event_start,
 		.stop = arm_ccn_pmu_event_stop,
 		.read = arm_ccn_pmu_event_read,
+		.pmu_enable = arm_ccn_pmu_enable,
+		.pmu_disable = arm_ccn_pmu_disable,
 	};
 
 	/* No overflow interrupt? Have to use a timer instead. */
@@ -1278,9 +1312,8 @@
 	if (err)
 		goto error_pmu_register;
 
-	mutex_lock(&arm_ccn_mutex);
-	list_add(&ccn->dt.entry, &arm_ccn_list);
-	mutex_unlock(&arm_ccn_mutex);
+	cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+					 &ccn->dt.node);
 	return 0;
 
 error_pmu_register:
@@ -1296,10 +1329,8 @@
 {
 	int i;
 
-	mutex_lock(&arm_ccn_mutex);
-	list_del(&ccn->dt.entry);
-	mutex_unlock(&arm_ccn_mutex);
-
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+					    &ccn->dt.node);
 	if (ccn->irq)
 		irq_set_affinity_hint(ccn->irq, NULL);
 	for (i = 0; i < ccn->num_xps; i++)
@@ -1361,6 +1392,8 @@
 
 	switch (type) {
 	case CCN_TYPE_MN:
+		ccn->mn_id = id;
+		return 0;
 	case CCN_TYPE_DT:
 		return 0;
 	case CCN_TYPE_XP:
@@ -1471,8 +1504,9 @@
 		/* Can set 'disable' bits, so can acknowledge interrupts */
 		writel(CCN_MN_ERRINT_STATUS__PMU_EVENTS__ENABLE,
 				ccn->base + CCN_MN_ERRINT_STATUS);
-		err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler, 0,
-				dev_name(ccn->dev), ccn);
+		err = devm_request_irq(ccn->dev, irq, arm_ccn_irq_handler,
+				       IRQF_NOBALANCING | IRQF_NO_THREAD,
+				       dev_name(ccn->dev), ccn);
 		if (err)
 			return err;
 
@@ -1527,9 +1561,9 @@
 {
 	int i, ret;
 
-	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE,
-					"AP_PERF_ARM_CCN_ONLINE", NULL,
-					arm_ccn_pmu_offline_cpu);
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_CCN_ONLINE,
+				      "AP_PERF_ARM_CCN_ONLINE", NULL,
+				      arm_ccn_pmu_offline_cpu);
 	if (ret)
 		return ret;
 
@@ -1541,7 +1575,7 @@
 
 static void __exit arm_ccn_exit(void)
 {
-	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_ARM_CCN_ONLINE);
+	cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_CCN_ONLINE);
 	platform_driver_unregister(&arm_ccn_driver);
 }
 
diff --git a/drivers/bus/mips_cdmm.c b/drivers/bus/mips_cdmm.c
index cad49bc..1b14256 100644
--- a/drivers/bus/mips_cdmm.c
+++ b/drivers/bus/mips_cdmm.c
@@ -596,19 +596,20 @@
 BUILD_PERDEV_HELPER(cpu_up)         /* int mips_cdmm_cpu_up_helper(...) */
 
 /**
- * mips_cdmm_bus_down() - Tear down the CDMM bus.
- * @data:	Pointer to unsigned int CPU number.
+ * mips_cdmm_cpu_down_prep() - Callback for CPUHP DOWN_PREP:
+ *			       Tear down the CDMM bus.
+ * @cpu:	unsigned int CPU number.
  *
  * This function is executed on the hotplugged CPU and calls the CDMM
  * driver cpu_down callback for all devices on that CPU.
  */
-static long mips_cdmm_bus_down(void *data)
+static int mips_cdmm_cpu_down_prep(unsigned int cpu)
 {
 	struct mips_cdmm_bus *bus;
 	long ret;
 
 	/* Inform all the devices on the bus */
-	ret = bus_for_each_dev(&mips_cdmm_bustype, NULL, data,
+	ret = bus_for_each_dev(&mips_cdmm_bustype, NULL, &cpu,
 			       mips_cdmm_cpu_down_helper);
 
 	/*
@@ -623,8 +624,8 @@
 }
 
 /**
- * mips_cdmm_bus_up() - Bring up the CDMM bus.
- * @data:	Pointer to unsigned int CPU number.
+ * mips_cdmm_cpu_online() - Callback for CPUHP ONLINE: Bring up the CDMM bus.
+ * @cpu:	unsigned int CPU number.
  *
  * This work_on_cpu callback function is executed on a given CPU to discover
  * CDMM devices on that CPU, or to call the CDMM driver cpu_up callback for all
@@ -634,7 +635,7 @@
  * initialisation. When CPUs are brought online the function is
  * invoked directly on the hotplugged CPU.
  */
-static long mips_cdmm_bus_up(void *data)
+static int mips_cdmm_cpu_online(unsigned int cpu)
 {
 	struct mips_cdmm_bus *bus;
 	long ret;
@@ -651,51 +652,13 @@
 		mips_cdmm_bus_discover(bus);
 	else
 		/* Inform all the devices on the bus */
-		ret = bus_for_each_dev(&mips_cdmm_bustype, NULL, data,
+		ret = bus_for_each_dev(&mips_cdmm_bustype, NULL, &cpu,
 				       mips_cdmm_cpu_up_helper);
 
 	return ret;
 }
 
 /**
- * mips_cdmm_cpu_notify() - Take action when a CPU is going online or offline.
- * @nb:		CPU notifier block .
- * @action:	Event that has taken place (CPU_*).
- * @data:	CPU number.
- *
- * This notifier is used to keep the CDMM buses updated as CPUs are offlined and
- * onlined. When CPUs go offline or come back online, so does their CDMM bus, so
- * devices must be informed. Also when CPUs come online for the first time the
- * devices on the CDMM bus need discovering.
- *
- * Returns:	NOTIFY_OK if event was used.
- *		NOTIFY_DONE if we didn't care.
- */
-static int mips_cdmm_cpu_notify(struct notifier_block *nb,
-				unsigned long action, void *data)
-{
-	unsigned int cpu = (unsigned int)data;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		mips_cdmm_bus_up(&cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-		mips_cdmm_bus_down(&cpu);
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block mips_cdmm_cpu_nb = {
-	.notifier_call = mips_cdmm_cpu_notify,
-};
-
-/**
  * mips_cdmm_init() - Initialise CDMM bus.
  *
  * Initialise CDMM bus, discover CDMM devices for online CPUs, and arrange for
@@ -703,7 +666,6 @@
  */
 static int __init mips_cdmm_init(void)
 {
-	unsigned int cpu;
 	int ret;
 
 	/* Register the bus */
@@ -712,19 +674,11 @@
 		return ret;
 
 	/* We want to be notified about new CPUs */
-	ret = register_cpu_notifier(&mips_cdmm_cpu_nb);
-	if (ret) {
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "bus/cdmm:online",
+				mips_cdmm_cpu_online, mips_cdmm_cpu_down_prep);
+	if (ret < 0)
 		pr_warn("cdmm: Failed to register CPU notifier\n");
-		goto out;
-	}
 
-	/* Discover devices on CDMM of online CPUs */
-	for_each_online_cpu(cpu)
-		work_on_cpu(cpu, mips_cdmm_bus_up, &cpu);
-
-	return 0;
-out:
-	bus_unregister(&mips_cdmm_bustype);
 	return ret;
 }
 subsys_initcall(mips_cdmm_init);
diff --git a/drivers/bus/vexpress-config.c b/drivers/bus/vexpress-config.c
index c3cb76b..9efdf1d 100644
--- a/drivers/bus/vexpress-config.c
+++ b/drivers/bus/vexpress-config.c
@@ -178,6 +178,7 @@
 
 	parent = class_find_device(vexpress_config_class, NULL, bridge,
 			vexpress_config_node_match);
+	of_node_put(bridge);
 	if (WARN_ON(!parent))
 		return -ENODEV;
 
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c
index 44660f1..35d46da 100644
--- a/drivers/char/bfin-otp.c
+++ b/drivers/char/bfin-otp.c
@@ -230,45 +230,7 @@
 	.name     = DRIVER_NAME,
 	.fops     = &bfin_otp_fops,
 };
-
-/**
- *	bfin_otp_init - Initialize module
- *
- *	Registers the device and notifier handler. Actual device
- *	initialization is handled by bfin_otp_open().
- */
-static int __init bfin_otp_init(void)
-{
-	int ret;
-
-	stampit();
-
-	ret = misc_register(&bfin_otp_misc_device);
-	if (ret) {
-		pr_init(KERN_ERR PFX "unable to register a misc device\n");
-		return ret;
-	}
-
-	pr_init(KERN_INFO PFX "initialized\n");
-
-	return 0;
-}
-
-/**
- *	bfin_otp_exit - Deinitialize module
- *
- *	Unregisters the device and notifier handler. Actual device
- *	deinitialization is handled by bfin_otp_close().
- */
-static void __exit bfin_otp_exit(void)
-{
-	stampit();
-
-	misc_deregister(&bfin_otp_misc_device);
-}
-
-module_init(bfin_otp_init);
-module_exit(bfin_otp_exit);
+module_misc_device(bfin_otp_misc_device);
 
 MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
 MODULE_DESCRIPTION("Blackfin OTP Memory Interface");
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 56ad5a5..8c0770b 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -244,7 +244,7 @@
 
 config HW_RANDOM_MXC_RNGA
 	tristate "Freescale i.MX RNGA Random Number Generator"
-	depends on ARCH_HAS_RNGA
+	depends on SOC_IMX31
 	default HW_RANDOM
 	---help---
 	  This driver provides kernel-side support for the Random Number
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index a33163d..5bb1985 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -381,6 +381,9 @@
 	char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
 	int err = 0;
 
+	if (!pfn_valid(PFN_DOWN(p)))
+		return -EIO;
+
 	read = 0;
 	if (p < (unsigned long) high_memory) {
 		low_count = count;
@@ -509,6 +512,9 @@
 	char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
 	int err = 0;
 
+	if (!pfn_valid(PFN_DOWN(p)))
+		return -EIO;
+
 	if (p < (unsigned long) high_memory) {
 		unsigned long to_write = min_t(unsigned long, count,
 					       (unsigned long)high_memory - p);
diff --git a/drivers/char/mwave/3780i.c b/drivers/char/mwave/3780i.c
index 2874004..972c40a1 100644
--- a/drivers/char/mwave/3780i.c
+++ b/drivers/char/mwave/3780i.c
@@ -124,7 +124,7 @@
 		MKBYTE(rSlaveControl));
 
 	rSlaveControl_Save = rSlaveControl;
-	rSlaveControl.ConfigMode = TRUE;
+	rSlaveControl.ConfigMode = true;
 
 	PRINTK_2(TRACE_3780I,
 		"3780i::dsp3780i_WriteGenCfg entry rSlaveControl+ConfigMode %x\n",
@@ -155,7 +155,7 @@
 
 	MKBYTE(rSlaveControl) = InByteDsp(DSP_IsaSlaveControl);
 	rSlaveControl_Save = rSlaveControl;
-	rSlaveControl.ConfigMode = TRUE;
+	rSlaveControl.ConfigMode = true;
 	OutByteDsp(DSP_IsaSlaveControl, MKBYTE(rSlaveControl));
 	OutByteDsp(DSP_ConfigAddress, (unsigned char) uIndex);
 	ucValue = InByteDsp(DSP_ConfigData);
@@ -230,7 +230,7 @@
 			rUartCfg1.BaseIO = 3;
 			break;
 		}
-		rUartCfg2.Enable = TRUE;
+		rUartCfg2.Enable = true;
 	}
 
 	rHBridgeCfg1.Reserved = rHBridgeCfg2.Reserved = 0;
@@ -238,7 +238,7 @@
 	rHBridgeCfg1.IrqPulse = pSettings->bDspIrqPulse;
 	rHBridgeCfg1.Irq = (unsigned char) pIrqMap[pSettings->usDspIrq];
 	rHBridgeCfg1.AccessMode = 1;
-	rHBridgeCfg2.Enable = TRUE;
+	rHBridgeCfg2.Enable = true;
 
 
 	rBusmasterCfg2.Reserved = 0;
@@ -278,8 +278,8 @@
 	* soft-reset active for 10ms.
 	*/
 	rSlaveControl.ClockControl = 0;
-	rSlaveControl.SoftReset = TRUE;
-	rSlaveControl.ConfigMode = FALSE;
+	rSlaveControl.SoftReset = true;
+	rSlaveControl.ConfigMode = false;
 	rSlaveControl.Reserved = 0;
 
 	PRINTK_4(TRACE_3780I,
@@ -302,7 +302,7 @@
 	for (i = 0; i < 11; i++)
 		udelay(2000);
 
-	rSlaveControl.SoftReset = FALSE;
+	rSlaveControl.SoftReset = false;
 	OutWordDsp(DSP_IsaSlaveControl, MKWORD(rSlaveControl));
 
 	MKWORD(tval) = InWordDsp(DSP_IsaSlaveControl);
@@ -326,10 +326,10 @@
 	}
 
 
-	rHBridgeControl.EnableDspInt = FALSE;
-	rHBridgeControl.MemAutoInc = TRUE;
-	rHBridgeControl.IoAutoInc = FALSE;
-	rHBridgeControl.DiagnosticMode = FALSE;
+	rHBridgeControl.EnableDspInt = false;
+	rHBridgeControl.MemAutoInc = true;
+	rHBridgeControl.IoAutoInc = false;
+	rHBridgeControl.DiagnosticMode = false;
 
 	PRINTK_3(TRACE_3780I,
 		"3780i::dsp3780i_EnableDSP DSP_HBridgeControl %x rHBridgeControl %x\n",
@@ -345,7 +345,7 @@
 	ChipID = ReadMsaCfg(DSP_ChipID);
 
 	PRINTK_2(TRACE_3780I,
-		"3780i::dsp3780I_EnableDSP exiting bRC=TRUE, ChipID %x\n",
+		"3780i::dsp3780I_EnableDSP exiting bRC=true, ChipID %x\n",
 		ChipID);
 
 	return 0;
@@ -361,8 +361,8 @@
 	PRINTK_1(TRACE_3780I, "3780i::dsp3780i_DisableDSP entry\n");
 
 	rSlaveControl.ClockControl = 0;
-	rSlaveControl.SoftReset = TRUE;
-	rSlaveControl.ConfigMode = FALSE;
+	rSlaveControl.SoftReset = true;
+	rSlaveControl.ConfigMode = false;
 	rSlaveControl.Reserved = 0;
 	spin_lock_irqsave(&dsp_lock, flags);
 	OutWordDsp(DSP_IsaSlaveControl, MKWORD(rSlaveControl));
@@ -398,14 +398,14 @@
 	PRINTK_2(TRACE_3780I, "3780i::dsp3780i_Reset rHBridgeControl %x\n",
 		MKWORD(rHBridgeControl));
 
-	rHBridgeControl.EnableDspInt = FALSE;
+	rHBridgeControl.EnableDspInt = false;
 	OutWordDsp(DSP_HBridgeControl, MKWORD(rHBridgeControl));
 	spin_unlock_irqrestore(&dsp_lock, flags);
 
 	/* Reset the core via the boot domain register */
-	rBootDomain.ResetCore = TRUE;
-	rBootDomain.Halt = TRUE;
-	rBootDomain.NMI = TRUE;
+	rBootDomain.ResetCore = true;
+	rBootDomain.Halt = true;
+	rBootDomain.NMI = true;
 	rBootDomain.Reserved = 0;
 
 	PRINTK_2(TRACE_3780I, "3780i::dsp3780i_Reset rBootDomain %x\n",
@@ -438,26 +438,26 @@
 
 
 	/* Transition the core to a running state */
-	rBootDomain.ResetCore = TRUE;
-	rBootDomain.Halt = FALSE;
-	rBootDomain.NMI = TRUE;
+	rBootDomain.ResetCore = true;
+	rBootDomain.Halt = false;
+	rBootDomain.NMI = true;
 	rBootDomain.Reserved = 0;
 	WriteMsaCfg(DSP_MspBootDomain, MKWORD(rBootDomain));
 
 	udelay(5);
 
-	rBootDomain.ResetCore = FALSE;
+	rBootDomain.ResetCore = false;
 	WriteMsaCfg(DSP_MspBootDomain, MKWORD(rBootDomain));
 	udelay(5);
 
-	rBootDomain.NMI = FALSE;
+	rBootDomain.NMI = false;
 	WriteMsaCfg(DSP_MspBootDomain, MKWORD(rBootDomain));
 	udelay(5);
 
 	/* Enable DSP to PC interrupt */
 	spin_lock_irqsave(&dsp_lock, flags);
 	MKWORD(rHBridgeControl) = InWordDsp(DSP_HBridgeControl);
-	rHBridgeControl.EnableDspInt = TRUE;
+	rHBridgeControl.EnableDspInt = true;
 
 	PRINTK_2(TRACE_3780I, "3780i::dsp3780i_Run rHBridgeControl %x\n",
 		MKWORD(rHBridgeControl));
@@ -466,7 +466,7 @@
 	spin_unlock_irqrestore(&dsp_lock, flags);
 
 
-	PRINTK_1(TRACE_3780I, "3780i::dsp3780i_Run exit bRC=TRUE\n");
+	PRINTK_1(TRACE_3780I, "3780i::dsp3780i_Run exit bRC=true\n");
 
 	return 0;
 }
@@ -508,7 +508,7 @@
 
 
 	PRINTK_1(TRACE_3780I,
-		"3780I::dsp3780I_ReadDStore exit bRC=TRUE\n");
+		"3780I::dsp3780I_ReadDStore exit bRC=true\n");
 
 	return 0;
 }
@@ -550,7 +550,7 @@
 
 
 	PRINTK_1(TRACE_3780I,
-		"3780I::dsp3780I_ReadAndClearDStore exit bRC=TRUE\n");
+		"3780I::dsp3780I_ReadAndClearDStore exit bRC=true\n");
 
 	return 0;
 }
@@ -592,7 +592,7 @@
 
 
 	PRINTK_1(TRACE_3780I,
-		"3780I::dsp3780D_WriteDStore exit bRC=TRUE\n");
+		"3780I::dsp3780D_WriteDStore exit bRC=true\n");
 
 	return 0;
 }
@@ -640,7 +640,7 @@
 	}
 
 	PRINTK_1(TRACE_3780I,
-		"3780I::dsp3780I_ReadIStore exit bRC=TRUE\n");
+		"3780I::dsp3780I_ReadIStore exit bRC=true\n");
 
 	return 0;
 }
@@ -689,7 +689,7 @@
 	}
 
 	PRINTK_1(TRACE_3780I,
-		"3780I::dsp3780I_WriteIStore exit bRC=TRUE\n");
+		"3780I::dsp3780I_WriteIStore exit bRC=true\n");
 
 	return 0;
 }
@@ -713,7 +713,7 @@
 	*/
 	spin_lock_irqsave(&dsp_lock, flags);
 	MKWORD(rHBridgeControl) = InWordDsp(DSP_HBridgeControl);
-	rHBridgeControl.EnableDspInt = FALSE;
+	rHBridgeControl.EnableDspInt = false;
 	OutWordDsp(DSP_HBridgeControl, MKWORD(rHBridgeControl));
 
 	*pusIPCSource = InWordDsp(DSP_Interrupt);
@@ -725,7 +725,7 @@
 
 	OutWordDsp(DSP_Interrupt, (unsigned short) ~(*pusIPCSource));
 
-	rHBridgeControl.EnableDspInt = TRUE;
+	rHBridgeControl.EnableDspInt = true;
 	OutWordDsp(DSP_HBridgeControl, MKWORD(rHBridgeControl));
 	spin_unlock_irqrestore(&dsp_lock, flags);
 
diff --git a/drivers/char/mwave/3780i.h b/drivers/char/mwave/3780i.h
index fba6ab1..9ccb6b2 100644
--- a/drivers/char/mwave/3780i.h
+++ b/drivers/char/mwave/3780i.h
@@ -101,7 +101,7 @@
 } DSP_UART_CFG_1;
 
 typedef struct {
-	unsigned char Enable:1;	/* RW: Enable I/O and IRQ: 0=FALSE, 1=TRUE */
+	unsigned char Enable:1;	/* RW: Enable I/O and IRQ: 0=false, 1=true */
 	unsigned char Reserved:7;	/* 0: Reserved */
 } DSP_UART_CFG_2;
 
@@ -114,7 +114,7 @@
 } DSP_HBRIDGE_CFG_1;
 
 typedef struct {
-	unsigned char Enable:1;	/* RW: enable I/O and IRQ: 0=FALSE, 1=TRUE */
+	unsigned char Enable:1;	/* RW: enable I/O and IRQ: 0=false, 1=true */
 	unsigned char Reserved:7;	/* 0: Reserved */
 } DSP_HBRIDGE_CFG_2;
 
@@ -133,12 +133,12 @@
 
 
 typedef struct {
-	unsigned char GateIOCHRDY:1;	/* RW: Enable IOCHRDY gating: 0=FALSE, 1=TRUE */
+	unsigned char GateIOCHRDY:1;	/* RW: Enable IOCHRDY gating: 0=false, 1=true */
 	unsigned char Reserved:7;	/* 0: Reserved */
 } DSP_ISA_PROT_CFG;
 
 typedef struct {
-	unsigned char Enable:1;	/* RW: Enable low power suspend/resume 0=FALSE, 1=TRUE */
+	unsigned char Enable:1;	/* RW: Enable low power suspend/resume 0=false, 1=true */
 	unsigned char Reserved:7;	/* 0: Reserved */
 } DSP_POWER_MGMT_CFG;
 
diff --git a/drivers/char/mwave/mwavedd.c b/drivers/char/mwave/mwavedd.c
index 164544a..3a3ff2e 100644
--- a/drivers/char/mwave/mwavedd.c
+++ b/drivers/char/mwave/mwavedd.c
@@ -296,8 +296,8 @@
 				pDrvData->IPCs[ipcnum].usIntCount);
 
 			mutex_lock(&mwave_mutex);
-			pDrvData->IPCs[ipcnum].bIsHere = FALSE;
-			pDrvData->IPCs[ipcnum].bIsEnabled = TRUE;
+			pDrvData->IPCs[ipcnum].bIsHere = false;
+			pDrvData->IPCs[ipcnum].bIsEnabled = true;
 			mutex_unlock(&mwave_mutex);
 	
 			PRINTK_2(TRACE_MWAVE,
@@ -324,7 +324,7 @@
 				pDrvData->IPCs[ipcnum].usIntCount);
 	
 			mutex_lock(&mwave_mutex);
-			if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
+			if (pDrvData->IPCs[ipcnum].bIsEnabled == true) {
 				DECLARE_WAITQUEUE(wait, current);
 
 				PRINTK_2(TRACE_MWAVE,
@@ -332,7 +332,7 @@
 					" ipc %x going to sleep\n",
 					ipcnum);
 				add_wait_queue(&pDrvData->IPCs[ipcnum].ipc_wait_queue, &wait);
-				pDrvData->IPCs[ipcnum].bIsHere = TRUE;
+				pDrvData->IPCs[ipcnum].bIsHere = true;
 				set_current_state(TASK_INTERRUPTIBLE);
 				/* check whether an event was signalled by */
 				/* the interrupt handler while we were gone */
@@ -355,7 +355,7 @@
 						" application\n",
 						ipcnum);
 				}
-				pDrvData->IPCs[ipcnum].bIsHere = FALSE;
+				pDrvData->IPCs[ipcnum].bIsHere = false;
 				remove_wait_queue(&pDrvData->IPCs[ipcnum].ipc_wait_queue, &wait);
 				set_current_state(TASK_RUNNING);
 				PRINTK_2(TRACE_MWAVE,
@@ -384,9 +384,9 @@
 				return -EINVAL;
 			}
 			mutex_lock(&mwave_mutex);
-			if (pDrvData->IPCs[ipcnum].bIsEnabled == TRUE) {
-				pDrvData->IPCs[ipcnum].bIsEnabled = FALSE;
-				if (pDrvData->IPCs[ipcnum].bIsHere == TRUE) {
+			if (pDrvData->IPCs[ipcnum].bIsEnabled == true) {
+				pDrvData->IPCs[ipcnum].bIsEnabled = false;
+				if (pDrvData->IPCs[ipcnum].bIsHere == true) {
 					wake_up_interruptible(&pDrvData->IPCs[ipcnum].ipc_wait_queue);
 				}
 			}
@@ -541,7 +541,7 @@
 
 	if (pDrvData->device_registered) {
 		device_unregister(&mwave_device);
-		pDrvData->device_registered = FALSE;
+		pDrvData->device_registered = false;
 	}
 #endif
 
@@ -576,16 +576,16 @@
 
 	memset(&mwave_s_mdd, 0, sizeof(MWAVE_DEVICE_DATA));
 
-	pDrvData->bBDInitialized = FALSE;
-	pDrvData->bResourcesClaimed = FALSE;
-	pDrvData->bDSPEnabled = FALSE;
-	pDrvData->bDSPReset = FALSE;
-	pDrvData->bMwaveDevRegistered = FALSE;
+	pDrvData->bBDInitialized = false;
+	pDrvData->bResourcesClaimed = false;
+	pDrvData->bDSPEnabled = false;
+	pDrvData->bDSPReset = false;
+	pDrvData->bMwaveDevRegistered = false;
 	pDrvData->sLine = -1;
 
 	for (i = 0; i < ARRAY_SIZE(pDrvData->IPCs); i++) {
-		pDrvData->IPCs[i].bIsEnabled = FALSE;
-		pDrvData->IPCs[i].bIsHere = FALSE;
+		pDrvData->IPCs[i].bIsEnabled = false;
+		pDrvData->IPCs[i].bIsHere = false;
 		pDrvData->IPCs[i].usIntCount = 0;	/* no ints received yet */
 		init_waitqueue_head(&pDrvData->IPCs[i].ipc_wait_queue);
 	}
@@ -601,7 +601,7 @@
 				" Failed to initialize board data\n");
 		goto cleanup_error;
 	}
-	pDrvData->bBDInitialized = TRUE;
+	pDrvData->bBDInitialized = true;
 
 	retval = tp3780I_CalcResources(&pDrvData->rBDData);
 	PRINTK_2(TRACE_MWAVE,
@@ -626,7 +626,7 @@
 				" Failed to claim resources\n");
 		goto cleanup_error;
 	}
-	pDrvData->bResourcesClaimed = TRUE;
+	pDrvData->bResourcesClaimed = true;
 
 	retval = tp3780I_EnableDSP(&pDrvData->rBDData);
 	PRINTK_2(TRACE_MWAVE,
@@ -639,7 +639,7 @@
 				" Failed to enable DSP\n");
 		goto cleanup_error;
 	}
-	pDrvData->bDSPEnabled = TRUE;
+	pDrvData->bDSPEnabled = true;
 
 	if (misc_register(&mwave_misc_dev) < 0) {
 		PRINTK_ERROR(KERN_ERR_MWAVE
@@ -647,7 +647,7 @@
 				" Failed to register misc device\n");
 		goto cleanup_error;
 	}
-	pDrvData->bMwaveDevRegistered = TRUE;
+	pDrvData->bMwaveDevRegistered = true;
 
 	pDrvData->sLine = register_serial_portandirq(
 		pDrvData->rBDData.rDspSettings.usUartBaseIO,
@@ -668,7 +668,7 @@
 
 	if (device_register(&mwave_device))
 		goto cleanup_error;
-	pDrvData->device_registered = TRUE;
+	pDrvData->device_registered = true;
 	for (i = 0; i < ARRAY_SIZE(mwave_dev_attrs); i++) {
 		if(device_create_file(&mwave_device, mwave_dev_attrs[i])) {
 			PRINTK_ERROR(KERN_ERR_MWAVE
diff --git a/drivers/char/mwave/mwavedd.h b/drivers/char/mwave/mwavedd.h
index 7e0d530..37e0a49 100644
--- a/drivers/char/mwave/mwavedd.h
+++ b/drivers/char/mwave/mwavedd.h
@@ -125,8 +125,8 @@
 
 typedef struct _MWAVE_IPC {
 	unsigned short usIntCount;	/* 0=none, 1=first, 2=greater than 1st */
-	BOOLEAN bIsEnabled;
-	BOOLEAN bIsHere;
+	bool bIsEnabled;
+	bool bIsHere;
 	/* entry spin lock */
 	wait_queue_head_t ipc_wait_queue;
 } MWAVE_IPC;
@@ -135,12 +135,12 @@
 	THINKPAD_BD_DATA rBDData;	/* board driver's data area */
 	unsigned long ulIPCSource_ISR;	/* IPC source bits for recently processed intr, set during ISR processing */
 	unsigned long ulIPCSource_DPC;	/* IPC source bits for recently processed intr, set during DPC processing */
-	BOOLEAN bBDInitialized;
-	BOOLEAN bResourcesClaimed;
-	BOOLEAN bDSPEnabled;
-	BOOLEAN bDSPReset;
+	bool bBDInitialized;
+	bool bResourcesClaimed;
+	bool bDSPEnabled;
+	bool bDSPReset;
 	MWAVE_IPC IPCs[16];
-	BOOLEAN bMwaveDevRegistered;
+	bool bMwaveDevRegistered;
 	short sLine;
 	int nr_registered_attrs;
 	int device_registered;
diff --git a/drivers/char/mwave/smapi.c b/drivers/char/mwave/smapi.c
index 6187fd1..8c5411a 100644
--- a/drivers/char/mwave/smapi.c
+++ b/drivers/char/mwave/smapi.c
@@ -493,7 +493,7 @@
 }
 
 
-int smapi_set_DSP_power_state(BOOLEAN bOn)
+int smapi_set_DSP_power_state(bool bOn)
 {
 	int bRC = -EIO;
 	unsigned short usAX, usBX, usCX, usDX, usDI, usSI;
@@ -556,7 +556,7 @@
 			PRINTK_ERROR("smapi::smapi_init, ERROR unable to read from SMAPI port\n");
 		} else {
 			PRINTK_2(TRACE_SMAPI,
-				"smapi::smapi_init, exit TRUE g_usSmapiPort %x\n",
+				"smapi::smapi_init, exit true g_usSmapiPort %x\n",
 				g_usSmapiPort);
 			retval = 0;
 			//SmapiQuerySystemID();
diff --git a/drivers/char/mwave/smapi.h b/drivers/char/mwave/smapi.h
index 64b2ec1..ebc206b 100644
--- a/drivers/char/mwave/smapi.h
+++ b/drivers/char/mwave/smapi.h
@@ -49,10 +49,6 @@
 #ifndef _LINUX_SMAPI_H
 #define _LINUX_SMAPI_H
 
-#define TRUE 1
-#define FALSE 0
-#define BOOLEAN int
-
 typedef struct {
 	int bDSPPresent;
 	int bDSPEnabled;
@@ -74,7 +70,7 @@
 int smapi_init(void);
 int smapi_query_DSP_cfg(SMAPI_DSP_SETTINGS * pSettings);
 int smapi_set_DSP_cfg(void);
-int smapi_set_DSP_power_state(BOOLEAN bOn);
+int smapi_set_DSP_power_state(bool bOn);
 
 
 #endif
diff --git a/drivers/char/mwave/tp3780i.c b/drivers/char/mwave/tp3780i.c
index 04e6d6a..5e1618a 100644
--- a/drivers/char/mwave/tp3780i.c
+++ b/drivers/char/mwave/tp3780i.c
@@ -80,13 +80,13 @@
 	WriteMsaCfg(DSP_GpioModeControl_15_8, MKWORD(rGpioMode));
 
 	MKWORD(rGpioDriverEnable) = 0;
-	rGpioDriverEnable.Enable10 = TRUE;
-	rGpioDriverEnable.Mask10 = TRUE;
+	rGpioDriverEnable.Enable10 = true;
+	rGpioDriverEnable.Mask10 = true;
 	WriteMsaCfg(DSP_GpioDriverEnable_15_8, MKWORD(rGpioDriverEnable));
 
 	MKWORD(rGpioOutputData) = 0;
 	rGpioOutputData.Latch10 = 0;
-	rGpioOutputData.Mask10 = TRUE;
+	rGpioOutputData.Mask10 = true;
 	WriteMsaCfg(DSP_GpioOutputData_15_8, MKWORD(rGpioOutputData));
 
 	PRINTK_1(TRACE_TP3780I, "tp3780i::EnableSRAM exit\n");
@@ -127,7 +127,7 @@
 				PRINTK_2(TRACE_TP3780I,
 					"tp3780i::DspInterrupt usIntCount %x\n",
 					pDrvData->IPCs[usPCNum - 1].usIntCount);
-				if (pDrvData->IPCs[usPCNum - 1].bIsEnabled == TRUE) {
+				if (pDrvData->IPCs[usPCNum - 1].bIsEnabled == true) {
 					PRINTK_2(TRACE_TP3780I,
 						"tp3780i::DspInterrupt, waking up usPCNum %x\n",
 						usPCNum - 1);
@@ -160,8 +160,8 @@
 
 	PRINTK_2(TRACE_TP3780I, "tp3780i::tp3780I_InitializeBoardData entry pBDData %p\n", pBDData);
 
-	pBDData->bDSPEnabled = FALSE;
-	pSettings->bInterruptClaimed = FALSE;
+	pBDData->bDSPEnabled = false;
+	pSettings->bInterruptClaimed = false;
 
 	retval = smapi_init();
 	if (retval) {
@@ -269,7 +269,7 @@
 
 	if (pSettings->bInterruptClaimed) {
 		free_irq(pSettings->usDspIrq, NULL);
-		pSettings->bInterruptClaimed = FALSE;
+		pSettings->bInterruptClaimed = false;
 	}
 
 	PRINTK_2(TRACE_TP3780I,
@@ -283,7 +283,7 @@
 int tp3780I_EnableDSP(THINKPAD_BD_DATA * pBDData)
 {
 	DSP_3780I_CONFIG_SETTINGS *pSettings = &pBDData->rDspSettings;
-	BOOLEAN bDSPPoweredUp = FALSE, bInterruptAllocated = FALSE;
+	bool bDSPPoweredUp = false, bInterruptAllocated = false;
 
 	PRINTK_2(TRACE_TP3780I, "tp3780i::tp3780I_EnableDSP entry pBDData %p\n", pBDData);
 
@@ -336,14 +336,14 @@
 		}
 	}
 
-	pSettings->bDspIrqActiveLow = pSettings->bDspIrqPulse = TRUE;
-	pSettings->bUartIrqActiveLow = pSettings->bUartIrqPulse = TRUE;
+	pSettings->bDspIrqActiveLow = pSettings->bDspIrqPulse = true;
+	pSettings->bUartIrqActiveLow = pSettings->bUartIrqPulse = true;
 
 	if (pBDData->bShareDspIrq) {
-		pSettings->bDspIrqActiveLow = FALSE;
+		pSettings->bDspIrqActiveLow = false;
 	}
 	if (pBDData->bShareUartIrq) {
-		pSettings->bUartIrqActiveLow = FALSE;
+		pSettings->bUartIrqActiveLow = false;
 	}
 
 	pSettings->usNumTransfers = TP_CFG_NumTransfers;
@@ -373,16 +373,16 @@
 		PRINTK_3(TRACE_TP3780I,
 			"tp3780i::tp3780I_EnableDSP, got interrupt %x bShareDspIrq %x\n",
 			pSettings->usDspIrq, pBDData->bShareDspIrq);
-		bInterruptAllocated = TRUE;
-		pSettings->bInterruptClaimed = TRUE;
+		bInterruptAllocated = true;
+		pSettings->bInterruptClaimed = true;
 	}
 
-	smapi_set_DSP_power_state(FALSE);
-	if (smapi_set_DSP_power_state(TRUE)) {
-		PRINTK_ERROR(KERN_ERR_MWAVE "tp3780i::tp3780I_EnableDSP: Error: smapi_set_DSP_power_state(TRUE) failed\n");
+	smapi_set_DSP_power_state(false);
+	if (smapi_set_DSP_power_state(true)) {
+		PRINTK_ERROR(KERN_ERR_MWAVE "tp3780i::tp3780I_EnableDSP: Error: smapi_set_DSP_power_state(true) failed\n");
 		goto exit_cleanup;
 	} else {
-		bDSPPoweredUp = TRUE;
+		bDSPPoweredUp = true;
 	}
 
 	if (dsp3780I_EnableDSP(pSettings, s_ausThinkpadIrqToField, s_ausThinkpadDmaToField)) {
@@ -392,7 +392,7 @@
 
 	EnableSRAM(pBDData);
 
-	pBDData->bDSPEnabled = TRUE;
+	pBDData->bDSPEnabled = true;
 
 	PRINTK_1(TRACE_TP3780I, "tp3780i::tp3780I_EnableDSP exit\n");
 
@@ -401,10 +401,10 @@
 exit_cleanup:
 	PRINTK_ERROR("tp3780i::tp3780I_EnableDSP: Cleaning up\n");
 	if (bDSPPoweredUp)
-		smapi_set_DSP_power_state(FALSE);
+		smapi_set_DSP_power_state(false);
 	if (bInterruptAllocated) {
 		free_irq(pSettings->usDspIrq, NULL);
-		pSettings->bInterruptClaimed = FALSE;
+		pSettings->bInterruptClaimed = false;
 	}
 	return -EIO;
 }
@@ -421,10 +421,10 @@
 		dsp3780I_DisableDSP(&pBDData->rDspSettings);
 		if (pSettings->bInterruptClaimed) {
 			free_irq(pSettings->usDspIrq, NULL);
-			pSettings->bInterruptClaimed = FALSE;
+			pSettings->bInterruptClaimed = false;
 		}
-		smapi_set_DSP_power_state(FALSE);
-		pBDData->bDSPEnabled = FALSE;
+		smapi_set_DSP_power_state(false);
+		pBDData->bDSPEnabled = false;
 	}
 
 	PRINTK_2(TRACE_TP3780I, "tp3780i::tp3780I_DisableDSP exit retval %x\n", retval);
@@ -516,7 +516,7 @@
 	int retval = 0;
 	DSP_3780I_CONFIG_SETTINGS *pSettings = &pBDData->rDspSettings;
 	unsigned short usDspBaseIO = pSettings->usDspBaseIO;
-	BOOLEAN bRC = 0;
+	bool bRC = 0;
 
 	PRINTK_6(TRACE_TP3780I,
 		"tp3780i::tp3780I_ReadWriteDspDStore entry pBDData %p, uOpcode %x, pvBuffer %p, uCount %x, ulDSPAddr %lx\n",
@@ -552,7 +552,7 @@
 	int retval = 0;
 	DSP_3780I_CONFIG_SETTINGS *pSettings = &pBDData->rDspSettings;
 	unsigned short usDspBaseIO = pSettings->usDspBaseIO;
-	BOOLEAN bRC = 0;
+	bool bRC = 0;
 
 	PRINTK_6(TRACE_TP3780I,
 		"tp3780i::tp3780I_ReadWriteDspIStore entry pBDData %p, uOpcode %x, pvBuffer %p, uCount %x, ulDSPAddr %lx\n",
diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c
index f8a483c..d233688 100644
--- a/drivers/char/ppdev.c
+++ b/drivers/char/ppdev.c
@@ -286,7 +286,7 @@
 	struct parport *port;
 	struct pardevice *pdev = NULL;
 	char *name;
-	int fl;
+	struct pardev_cb ppdev_cb;
 
 	name = kasprintf(GFP_KERNEL, CHRDEV "%x", minor);
 	if (name == NULL)
@@ -299,9 +299,11 @@
 		return -ENXIO;
 	}
 
-	fl = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
-	pdev = parport_register_device(port, name, NULL,
-				       NULL, pp_irq, fl, pp);
+	memset(&ppdev_cb, 0, sizeof(ppdev_cb));
+	ppdev_cb.irq_func = pp_irq;
+	ppdev_cb.flags = (pp->flags & PP_EXCL) ? PARPORT_FLAG_EXCL : 0;
+	ppdev_cb.private = pp;
+	pdev = parport_register_dev_model(port, name, &ppdev_cb, minor);
 	parport_put_port(port);
 
 	if (!pdev) {
@@ -799,10 +801,23 @@
 	device_destroy(ppdev_class, MKDEV(PP_MAJOR, port->number));
 }
 
+static int pp_probe(struct pardevice *par_dev)
+{
+	struct device_driver *drv = par_dev->dev.driver;
+	int len = strlen(drv->name);
+
+	if (strncmp(par_dev->name, drv->name, len))
+		return -ENODEV;
+
+	return 0;
+}
+
 static struct parport_driver pp_driver = {
 	.name		= CHRDEV,
-	.attach		= pp_attach,
+	.probe		= pp_probe,
+	.match_port	= pp_attach,
 	.detach		= pp_detach,
+	.devmodel	= true,
 };
 
 static int __init ppdev_init(void)
diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c
index 94006f9..10e5632 100644
--- a/drivers/char/snsc.c
+++ b/drivers/char/snsc.c
@@ -385,13 +385,18 @@
 
 	event_nasid = ia64_sn_get_console_nasid();
 
+	snsc_class = class_create(THIS_MODULE, SYSCTL_BASENAME);
+	if (IS_ERR(snsc_class)) {
+		printk("%s: failed to allocate class\n", __func__);
+		return PTR_ERR(snsc_class);
+	}
+
 	if (alloc_chrdev_region(&first_dev, 0, num_cnodes,
 				SYSCTL_BASENAME) < 0) {
 		printk("%s: failed to register SN system controller device\n",
 		       __func__);
 		return -ENODEV;
 	}
-	snsc_class = class_create(THIS_MODULE, SYSCTL_BASENAME);
 
 	for (cnode = 0; cnode < num_cnodes; cnode++) {
 			geoid = cnodeid_get_geoid(cnode);
diff --git a/drivers/char/tile-srom.c b/drivers/char/tile-srom.c
index 69f6b4a..398800e 100644
--- a/drivers/char/tile-srom.c
+++ b/drivers/char/tile-srom.c
@@ -331,13 +331,11 @@
 /**
  * srom_setup_minor() - Initialize per-minor information.
  * @srom: Per-device SROM state.
- * @index: Device to set up.
+ * @devhdl: Partition device handle.
  */
-static int srom_setup_minor(struct srom_dev *srom, int index)
+static int srom_setup_minor(struct srom_dev *srom, int devhdl)
 {
-	struct device *dev;
-	int devhdl = srom->hv_devhdl;
-
+	srom->hv_devhdl = devhdl;
 	mutex_init(&srom->lock);
 
 	if (_srom_read(devhdl, &srom->total_size,
@@ -350,9 +348,7 @@
 		       SROM_PAGE_SIZE_OFF, sizeof(srom->page_size)) < 0)
 		return -EIO;
 
-	dev = device_create(srom_class, &srom_parent->dev,
-			    MKDEV(srom_major, index), srom, "%d", index);
-	return PTR_ERR_OR_ZERO(dev);
+	return 0;
 }
 
 /** srom_init() - Initialize the driver's module. */
@@ -365,7 +361,7 @@
 	 * Start with a plausible number of partitions; the krealloc() call
 	 * below will yield about log(srom_devs) additional allocations.
 	 */
-	srom_devices = kzalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
+	srom_devices = kmalloc(4 * sizeof(struct srom_dev), GFP_KERNEL);
 
 	/* Discover the number of srom partitions. */
 	for (i = 0; ; i++) {
@@ -373,7 +369,7 @@
 		char buf[20];
 		struct srom_dev *new_srom_devices =
 			krealloc(srom_devices, (i+1) * sizeof(struct srom_dev),
-				 GFP_KERNEL | __GFP_ZERO);
+				 GFP_KERNEL);
 		if (!new_srom_devices) {
 			result = -ENOMEM;
 			goto fail_mem;
@@ -387,7 +383,9 @@
 					  i, devhdl);
 			break;
 		}
-		srom_devices[i].hv_devhdl = devhdl;
+		result = srom_setup_minor(&srom_devices[i], devhdl);
+		if (result != 0)
+			goto fail_mem;
 	}
 	srom_devs = i;
 
@@ -431,9 +429,13 @@
 	srom_class->dev_groups = srom_dev_groups;
 	srom_class->devnode = srom_devnode;
 
-	/* Do per-partition initialization */
+	/* Create per-partition devices */
 	for (i = 0; i < srom_devs; i++) {
-		result = srom_setup_minor(srom_devices + i, i);
+		struct device *dev =
+			device_create(srom_class, &srom_parent->dev,
+				      MKDEV(srom_major, i), srom_devices + i,
+				      "%d", i);
+		result = PTR_ERR_OR_ZERO(dev);
 		if (result < 0)
 			goto fail_class;
 	}
diff --git a/drivers/char/tpm/st33zp24/st33zp24.c b/drivers/char/tpm/st33zp24/st33zp24.c
index c2ee304..6f060c7 100644
--- a/drivers/char/tpm/st33zp24/st33zp24.c
+++ b/drivers/char/tpm/st33zp24/st33zp24.c
@@ -589,8 +589,6 @@
 		chip->flags |= TPM_CHIP_FLAG_IRQ;
 
 		disable_irq_nosync(tpm_dev->irq);
-
-		tpm_gen_interrupt(chip);
 	}
 
 	return tpm_chip_register(chip);
diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c
index f5d4521..912ad30 100644
--- a/drivers/char/tpm/tpm-dev.c
+++ b/drivers/char/tpm/tpm-dev.c
@@ -145,7 +145,7 @@
 		return -EPIPE;
 	}
 	out_size = tpm_transmit(priv->chip, priv->data_buffer,
-				sizeof(priv->data_buffer));
+				sizeof(priv->data_buffer), 0);
 
 	tpm_put_ops(priv->chip);
 	if (out_size < 0) {
diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
index 1abe2d7..8de6187 100644
--- a/drivers/char/tpm/tpm-interface.c
+++ b/drivers/char/tpm/tpm-interface.c
@@ -330,13 +330,16 @@
 /*
  * Internal kernel interface to transmit TPM commands
  */
-ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
-		     size_t bufsiz)
+ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz,
+		     unsigned int flags)
 {
 	ssize_t rc;
 	u32 count, ordinal;
 	unsigned long stop;
 
+	if (bufsiz < TPM_HEADER_SIZE)
+		return -EINVAL;
+
 	if (bufsiz > TPM_BUFSIZE)
 		bufsiz = TPM_BUFSIZE;
 
@@ -350,7 +353,8 @@
 		return -E2BIG;
 	}
 
-	mutex_lock(&chip->tpm_mutex);
+	if (!(flags & TPM_TRANSMIT_UNLOCKED))
+		mutex_lock(&chip->tpm_mutex);
 
 	rc = chip->ops->send(chip, (u8 *) buf, count);
 	if (rc < 0) {
@@ -393,20 +397,21 @@
 		dev_err(&chip->dev,
 			"tpm_transmit: tpm_recv: error %zd\n", rc);
 out:
-	mutex_unlock(&chip->tpm_mutex);
+	if (!(flags & TPM_TRANSMIT_UNLOCKED))
+		mutex_unlock(&chip->tpm_mutex);
 	return rc;
 }
 
 #define TPM_DIGEST_SIZE 20
 #define TPM_RET_CODE_IDX 6
 
-ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd,
-			 int len, const char *desc)
+ssize_t tpm_transmit_cmd(struct tpm_chip *chip, const void *cmd,
+			 int len, unsigned int flags, const char *desc)
 {
-	struct tpm_output_header *header;
+	const struct tpm_output_header *header;
 	int err;
 
-	len = tpm_transmit(chip, (u8 *) cmd, len);
+	len = tpm_transmit(chip, (const u8 *)cmd, len, flags);
 	if (len <  0)
 		return len;
 	else if (len < TPM_HEADER_SIZE)
@@ -453,26 +458,13 @@
 		tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 		tpm_cmd.params.getcap_in.subcap = subcap_id;
 	}
-	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc);
+	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
+			      desc);
 	if (!rc)
 		*cap = tpm_cmd.params.getcap_out.cap;
 	return rc;
 }
-
-void tpm_gen_interrupt(struct tpm_chip *chip)
-{
-	struct	tpm_cmd_t tpm_cmd;
-	ssize_t rc;
-
-	tpm_cmd.header.in = tpm_getcap_header;
-	tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
-	tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
-	tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-
-	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-			      "attempting to determine the timeouts");
-}
-EXPORT_SYMBOL_GPL(tpm_gen_interrupt);
+EXPORT_SYMBOL_GPL(tpm_getcap);
 
 #define TPM_ORD_STARTUP cpu_to_be32(153)
 #define TPM_ST_CLEAR cpu_to_be16(1)
@@ -490,7 +482,7 @@
 	start_cmd.header.in = tpm_startup_header;
 
 	start_cmd.params.startup_in.startup_type = startup_type;
-	return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE,
+	return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
 				"attempting to start the TPM");
 }
 
@@ -521,7 +513,8 @@
 	tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP;
 	tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 	tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
-	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL);
+	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
+			      NULL);
 
 	if (rc == TPM_ERR_INVALID_POSTINIT) {
 		/* The TPM is not started, we are the first to talk to it.
@@ -535,7 +528,7 @@
 		tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 		tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT;
 		rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
-				  NULL);
+				      0, NULL);
 	}
 	if (rc) {
 		dev_err(&chip->dev,
@@ -596,7 +589,7 @@
 	tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4);
 	tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION;
 
-	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE,
+	rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, 0,
 			      "attempting to determine the durations");
 	if (rc)
 		return rc;
@@ -633,7 +626,7 @@
 #define TPM_ORD_CONTINUE_SELFTEST 83
 #define CONTINUE_SELFTEST_RESULT_SIZE 10
 
-static struct tpm_input_header continue_selftest_header = {
+static const struct tpm_input_header continue_selftest_header = {
 	.tag = TPM_TAG_RQU_COMMAND,
 	.length = cpu_to_be32(10),
 	.ordinal = cpu_to_be32(TPM_ORD_CONTINUE_SELFTEST),
@@ -652,14 +645,14 @@
 	struct tpm_cmd_t cmd;
 
 	cmd.header.in = continue_selftest_header;
-	rc = tpm_transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE,
+	rc = tpm_transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE, 0,
 			      "continue selftest");
 	return rc;
 }
 
 #define TPM_ORDINAL_PCRREAD cpu_to_be32(21)
 #define READ_PCR_RESULT_SIZE 30
-static struct tpm_input_header pcrread_header = {
+static const struct tpm_input_header pcrread_header = {
 	.tag = TPM_TAG_RQU_COMMAND,
 	.length = cpu_to_be32(14),
 	.ordinal = TPM_ORDINAL_PCRREAD
@@ -672,7 +665,7 @@
 
 	cmd.header.in = pcrread_header;
 	cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
-	rc = tpm_transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE,
+	rc = tpm_transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE, 0,
 			      "attempting to read a pcr value");
 
 	if (rc == 0)
@@ -745,7 +738,7 @@
  */
 #define TPM_ORD_PCR_EXTEND cpu_to_be32(20)
 #define EXTEND_PCR_RESULT_SIZE 34
-static struct tpm_input_header pcrextend_header = {
+static const struct tpm_input_header pcrextend_header = {
 	.tag = TPM_TAG_RQU_COMMAND,
 	.length = cpu_to_be32(34),
 	.ordinal = TPM_ORD_PCR_EXTEND
@@ -770,7 +763,7 @@
 	cmd.header.in = pcrextend_header;
 	cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
 	memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
-	rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
+	rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, 0,
 			      "attempting extend a PCR value");
 
 	tpm_put_ops(chip);
@@ -792,7 +785,7 @@
 	unsigned int loops;
 	unsigned int delay_msec = 100;
 	unsigned long duration;
-	struct tpm_cmd_t cmd;
+	u8 dummy[TPM_DIGEST_SIZE];
 
 	duration = tpm_calc_ordinal_duration(chip, TPM_ORD_CONTINUE_SELFTEST);
 
@@ -807,9 +800,8 @@
 
 	do {
 		/* Attempt to read a PCR value */
-		cmd.header.in = pcrread_header;
-		cmd.params.pcrread_in.pcr_idx = cpu_to_be32(0);
-		rc = tpm_transmit(chip, (u8 *) &cmd, READ_PCR_RESULT_SIZE);
+		rc = tpm_pcr_read_dev(chip, 0, dummy);
+
 		/* Some buggy TPMs will not respond to tpm_tis_ready() for
 		 * around 300ms while the self test is ongoing, keep trying
 		 * until the self test duration expires. */
@@ -824,7 +816,6 @@
 		if (rc < TPM_HEADER_SIZE)
 			return -EFAULT;
 
-		rc = be32_to_cpu(cmd.header.out.return_code);
 		if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) {
 			dev_info(&chip->dev,
 				 "TPM is disabled/deactivated (0x%X)\n", rc);
@@ -879,7 +870,7 @@
 	if (chip == NULL)
 		return -ENODEV;
 
-	rc = tpm_transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd");
+	rc = tpm_transmit_cmd(chip, cmd, buflen, 0, "attempting tpm_cmd");
 
 	tpm_put_ops(chip);
 	return rc;
@@ -949,7 +940,7 @@
 #define TPM_ORD_SAVESTATE cpu_to_be32(152)
 #define SAVESTATE_RESULT_SIZE 10
 
-static struct tpm_input_header savestate_header = {
+static const struct tpm_input_header savestate_header = {
 	.tag = TPM_TAG_RQU_COMMAND,
 	.length = cpu_to_be32(10),
 	.ordinal = TPM_ORD_SAVESTATE
@@ -981,14 +972,15 @@
 		cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(tpm_suspend_pcr);
 		memcpy(cmd.params.pcrextend_in.hash, dummy_hash,
 		       TPM_DIGEST_SIZE);
-		rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE,
+		rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, 0,
 				      "extending dummy pcr before suspend");
 	}
 
 	/* now do the actual savestate */
 	for (try = 0; try < TPM_RETRY; try++) {
 		cmd.header.in = savestate_header;
-		rc = tpm_transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL);
+		rc = tpm_transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, 0,
+				      NULL);
 
 		/*
 		 * If the TPM indicates that it is too busy to respond to
@@ -1032,7 +1024,7 @@
 EXPORT_SYMBOL_GPL(tpm_pm_resume);
 
 #define TPM_GETRANDOM_RESULT_SIZE	18
-static struct tpm_input_header tpm_getrandom_header = {
+static const struct tpm_input_header tpm_getrandom_header = {
 	.tag = TPM_TAG_RQU_COMMAND,
 	.length = cpu_to_be32(14),
 	.ordinal = TPM_ORD_GET_RANDOM
@@ -1072,8 +1064,8 @@
 		tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes);
 
 		err = tpm_transmit_cmd(chip, &tpm_cmd,
-				   TPM_GETRANDOM_RESULT_SIZE + num_bytes,
-				   "attempting get random");
+				       TPM_GETRANDOM_RESULT_SIZE + num_bytes,
+				       0, "attempting get random");
 		if (err)
 			break;
 
diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c
index b46cf70..a76ab4a 100644
--- a/drivers/char/tpm/tpm-sysfs.c
+++ b/drivers/char/tpm/tpm-sysfs.c
@@ -22,7 +22,7 @@
 
 #define READ_PUBEK_RESULT_SIZE 314
 #define TPM_ORD_READPUBEK cpu_to_be32(124)
-static struct tpm_input_header tpm_readpubek_header = {
+static const struct tpm_input_header tpm_readpubek_header = {
 	.tag = TPM_TAG_RQU_COMMAND,
 	.length = cpu_to_be32(30),
 	.ordinal = TPM_ORD_READPUBEK
@@ -39,7 +39,7 @@
 	struct tpm_chip *chip = to_tpm_chip(dev);
 
 	tpm_cmd.header.in = tpm_readpubek_header;
-	err = tpm_transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE,
+	err = tpm_transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE, 0,
 			       "attempting to read the PUBEK");
 	if (err)
 		goto out;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index 3e32d5b..4d183c9 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -476,32 +476,35 @@
 extern const struct file_operations tpm_fops;
 extern struct idr dev_nums_idr;
 
+enum tpm_transmit_flags {
+	TPM_TRANSMIT_UNLOCKED	= BIT(0),
+};
+
+ssize_t tpm_transmit(struct tpm_chip *chip, const u8 *buf, size_t bufsiz,
+		     unsigned int flags);
+ssize_t tpm_transmit_cmd(struct tpm_chip *chip, const void *cmd, int len,
+			 unsigned int flags, const char *desc);
 ssize_t tpm_getcap(struct tpm_chip *chip, __be32 subcap_id, cap_t *cap,
 		   const char *desc);
-ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf,
-		     size_t bufsiz);
-ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd, int len,
-			 const char *desc);
-extern int tpm_get_timeouts(struct tpm_chip *);
-extern void tpm_gen_interrupt(struct tpm_chip *);
+int tpm_get_timeouts(struct tpm_chip *);
 int tpm1_auto_startup(struct tpm_chip *chip);
-extern int tpm_do_selftest(struct tpm_chip *);
-extern unsigned long tpm_calc_ordinal_duration(struct tpm_chip *, u32);
-extern int tpm_pm_suspend(struct device *);
-extern int tpm_pm_resume(struct device *);
-extern int wait_for_tpm_stat(struct tpm_chip *, u8, unsigned long,
-			     wait_queue_head_t *, bool);
+int tpm_do_selftest(struct tpm_chip *chip);
+unsigned long tpm_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal);
+int tpm_pm_suspend(struct device *dev);
+int tpm_pm_resume(struct device *dev);
+int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout,
+		      wait_queue_head_t *queue, bool check_cancel);
 
 struct tpm_chip *tpm_chip_find_get(int chip_num);
 __must_check int tpm_try_get_ops(struct tpm_chip *chip);
 void tpm_put_ops(struct tpm_chip *chip);
 
-extern struct tpm_chip *tpm_chip_alloc(struct device *dev,
-				       const struct tpm_class_ops *ops);
-extern struct tpm_chip *tpmm_chip_alloc(struct device *pdev,
-				       const struct tpm_class_ops *ops);
-extern int tpm_chip_register(struct tpm_chip *chip);
-extern void tpm_chip_unregister(struct tpm_chip *chip);
+struct tpm_chip *tpm_chip_alloc(struct device *dev,
+				const struct tpm_class_ops *ops);
+struct tpm_chip *tpmm_chip_alloc(struct device *pdev,
+				 const struct tpm_class_ops *ops);
+int tpm_chip_register(struct tpm_chip *chip);
+void tpm_chip_unregister(struct tpm_chip *chip);
 
 void tpm_sysfs_add_device(struct tpm_chip *chip);
 
@@ -528,8 +531,7 @@
 			u32 *value, const char *desc);
 
 int tpm2_auto_startup(struct tpm_chip *chip);
-extern void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type);
-extern unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *, u32);
-extern int tpm2_gen_interrupt(struct tpm_chip *chip);
-extern int tpm2_probe(struct tpm_chip *chip);
+void tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type);
+unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal);
+int tpm2_probe(struct tpm_chip *chip);
 #endif
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index 08c7e23..7df55d58 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -282,7 +282,7 @@
 	       sizeof(cmd.params.pcrread_in.pcr_select));
 	cmd.params.pcrread_in.pcr_select[pcr_idx >> 3] = 1 << (pcr_idx & 0x7);
 
-	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
+	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0,
 			      "attempting to read a pcr value");
 	if (rc == 0) {
 		buf = cmd.params.pcrread_out.digest;
@@ -330,7 +330,7 @@
 	cmd.params.pcrextend_in.hash_alg = cpu_to_be16(TPM2_ALG_SHA1);
 	memcpy(cmd.params.pcrextend_in.digest, hash, TPM_DIGEST_SIZE);
 
-	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
+	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0,
 			      "attempting extend a PCR value");
 
 	return rc;
@@ -376,7 +376,7 @@
 		cmd.header.in = tpm2_getrandom_header;
 		cmd.params.getrandom_in.size = cpu_to_be16(num_bytes);
 
-		err = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
+		err = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0,
 				       "attempting get random");
 		if (err)
 			break;
@@ -434,12 +434,12 @@
 }
 
 /**
- * tpm2_seal_trusted() - seal a trusted key
- * @chip_num: A specific chip number for the request or TPM_ANY_NUM
- * @options: authentication values and other options
+ * tpm2_seal_trusted() - seal the payload of a trusted key
+ * @chip_num: TPM chip to use
  * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
  *
- * Returns < 0 on error and 0 on success.
+ * Return: < 0 on error and 0 on success.
  */
 int tpm2_seal_trusted(struct tpm_chip *chip,
 		      struct trusted_key_payload *payload,
@@ -512,7 +512,7 @@
 		goto out;
 	}
 
-	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "sealing data");
+	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, 0, "sealing data");
 	if (rc)
 		goto out;
 
@@ -538,10 +538,18 @@
 	return rc;
 }
 
-static int tpm2_load(struct tpm_chip *chip,
-		     struct trusted_key_payload *payload,
-		     struct trusted_key_options *options,
-		     u32 *blob_handle)
+/**
+ * tpm2_load_cmd() - execute a TPM2_Load command
+ * @chip_num: TPM chip to use
+ * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
+ *
+ * Return: same as with tpm_transmit_cmd
+ */
+static int tpm2_load_cmd(struct tpm_chip *chip,
+			 struct trusted_key_payload *payload,
+			 struct trusted_key_options *options,
+			 u32 *blob_handle, unsigned int flags)
 {
 	struct tpm_buf buf;
 	unsigned int private_len;
@@ -576,7 +584,7 @@
 		goto out;
 	}
 
-	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "loading blob");
+	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags, "loading blob");
 	if (!rc)
 		*blob_handle = be32_to_cpup(
 			(__be32 *) &buf.data[TPM_HEADER_SIZE]);
@@ -590,7 +598,16 @@
 	return rc;
 }
 
-static void tpm2_flush_context(struct tpm_chip *chip, u32 handle)
+/**
+ * tpm2_flush_context_cmd() - execute a TPM2_FlushContext command
+ * @chip_num: TPM chip to use
+ * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
+ *
+ * Return: same as with tpm_transmit_cmd
+ */
+static void tpm2_flush_context_cmd(struct tpm_chip *chip, u32 handle,
+				   unsigned int flags)
 {
 	struct tpm_buf buf;
 	int rc;
@@ -604,7 +621,8 @@
 
 	tpm_buf_append_u32(&buf, handle);
 
-	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "flushing context");
+	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags,
+			      "flushing context");
 	if (rc)
 		dev_warn(&chip->dev, "0x%08x was not flushed, rc=%d\n", handle,
 			 rc);
@@ -612,10 +630,18 @@
 	tpm_buf_destroy(&buf);
 }
 
-static int tpm2_unseal(struct tpm_chip *chip,
-		       struct trusted_key_payload *payload,
-		       struct trusted_key_options *options,
-		       u32 blob_handle)
+/**
+ * tpm2_unseal_cmd() - execute a TPM2_Unload command
+ * @chip_num: TPM chip to use
+ * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
+ *
+ * Return: same as with tpm_transmit_cmd
+ */
+static int tpm2_unseal_cmd(struct tpm_chip *chip,
+			   struct trusted_key_payload *payload,
+			   struct trusted_key_options *options,
+			   u32 blob_handle, unsigned int flags)
 {
 	struct tpm_buf buf;
 	u16 data_len;
@@ -635,7 +661,7 @@
 			     options->blobauth /* hmac */,
 			     TPM_DIGEST_SIZE);
 
-	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, "unsealing");
+	rc = tpm_transmit_cmd(chip, buf.data, PAGE_SIZE, flags, "unsealing");
 	if (rc > 0)
 		rc = -EPERM;
 
@@ -654,12 +680,12 @@
 }
 
 /**
- * tpm_unseal_trusted() - unseal a trusted key
- * @chip_num: A specific chip number for the request or TPM_ANY_NUM
- * @options: authentication values and other options
+ * tpm_unseal_trusted() - unseal the payload of a trusted key
+ * @chip_num: TPM chip to use
  * @payload: the key data in clear and encrypted form
+ * @options: authentication values and other options
  *
- * Returns < 0 on error and 0 on success.
+ * Return: < 0 on error and 0 on success.
  */
 int tpm2_unseal_trusted(struct tpm_chip *chip,
 			struct trusted_key_payload *payload,
@@ -668,14 +694,17 @@
 	u32 blob_handle;
 	int rc;
 
-	rc = tpm2_load(chip, payload, options, &blob_handle);
+	mutex_lock(&chip->tpm_mutex);
+	rc = tpm2_load_cmd(chip, payload, options, &blob_handle,
+			   TPM_TRANSMIT_UNLOCKED);
 	if (rc)
-		return rc;
+		goto out;
 
-	rc = tpm2_unseal(chip, payload, options, blob_handle);
-
-	tpm2_flush_context(chip, blob_handle);
-
+	rc = tpm2_unseal_cmd(chip, payload, options, blob_handle,
+			     TPM_TRANSMIT_UNLOCKED);
+	tpm2_flush_context_cmd(chip, blob_handle, TPM_TRANSMIT_UNLOCKED);
+out:
+	mutex_unlock(&chip->tpm_mutex);
 	return rc;
 }
 
@@ -701,12 +730,13 @@
 	cmd.params.get_tpm_pt_in.property_id = cpu_to_be32(property_id);
 	cmd.params.get_tpm_pt_in.property_cnt = cpu_to_be32(1);
 
-	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), desc);
+	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0, desc);
 	if (!rc)
 		*value = be32_to_cpu(cmd.params.get_tpm_pt_out.value);
 
 	return rc;
 }
+EXPORT_SYMBOL_GPL(tpm2_get_tpm_pt);
 
 #define TPM2_STARTUP_IN_SIZE \
 	(sizeof(struct tpm_input_header) + \
@@ -735,7 +765,7 @@
 	cmd.header.in = tpm2_startup_header;
 
 	cmd.params.startup_in.startup_type = cpu_to_be16(startup_type);
-	return tpm_transmit_cmd(chip, &cmd, sizeof(cmd),
+	return tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0,
 				"attempting to start the TPM");
 }
 
@@ -763,7 +793,7 @@
 	cmd.header.in = tpm2_shutdown_header;
 	cmd.params.startup_in.startup_type = cpu_to_be16(shutdown_type);
 
-	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), "stopping the TPM");
+	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0, "stopping the TPM");
 
 	/* In places where shutdown command is sent there's no much we can do
 	 * except print the error code on a system failure.
@@ -828,7 +858,7 @@
 	cmd.header.in = tpm2_selftest_header;
 	cmd.params.selftest_in.full_test = full;
 
-	rc = tpm_transmit_cmd(chip, &cmd, TPM2_SELF_TEST_IN_SIZE,
+	rc = tpm_transmit_cmd(chip, &cmd, TPM2_SELF_TEST_IN_SIZE, 0,
 			      "continue selftest");
 
 	/* At least some prototype chips seem to give RC_TESTING error
@@ -880,7 +910,7 @@
 		cmd.params.pcrread_in.pcr_select[1] = 0x00;
 		cmd.params.pcrread_in.pcr_select[2] = 0x00;
 
-		rc = tpm_transmit_cmd(chip, (u8 *) &cmd, sizeof(cmd), NULL);
+		rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), 0, NULL);
 		if (rc < 0)
 			break;
 
@@ -895,23 +925,6 @@
 }
 
 /**
- * tpm2_gen_interrupt() - generate an interrupt
- * @chip: TPM chip to use
- *
- * 0 is returned when the operation is successful. If a negative number is
- * returned it remarks a POSIX error code. If a positive number is returned
- * it remarks a TPM error.
- */
-int tpm2_gen_interrupt(struct tpm_chip *chip)
-{
-	u32 dummy;
-
-	return tpm2_get_tpm_pt(chip, 0x100, &dummy,
-			       "attempting to generate an interrupt");
-}
-EXPORT_SYMBOL_GPL(tpm2_gen_interrupt);
-
-/**
  * tpm2_probe() - probe TPM 2.0
  * @chip: TPM chip to use
  *
@@ -928,11 +941,9 @@
 	cmd.params.get_tpm_pt_in.property_id = cpu_to_be32(0x100);
 	cmd.params.get_tpm_pt_in.property_cnt = cpu_to_be32(1);
 
-	rc = tpm_transmit(chip, (const char *) &cmd, sizeof(cmd));
+	rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd),  0, NULL);
 	if (rc <  0)
 		return rc;
-	else if (rc < TPM_HEADER_SIZE)
-		return -EFAULT;
 
 	if (be16_to_cpu(cmd.header.out.tag) == TPM2_ST_NO_SESSIONS)
 		chip->flags |= TPM_CHIP_FLAG_TPM2;
@@ -957,7 +968,7 @@
 		goto out;
 
 	rc = tpm2_do_selftest(chip);
-	if (rc != TPM2_RC_INITIALIZE) {
+	if (rc != 0 && rc != TPM2_RC_INITIALIZE) {
 		dev_err(&chip->dev, "TPM self test failed\n");
 		goto out;
 	}
@@ -974,7 +985,6 @@
 		}
 	}
 
-	return rc;
 out:
 	if (rc > 0)
 		rc = -ENODEV;
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index 018c3825..a7c870a 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -19,7 +19,6 @@
 #include <linux/highmem.h>
 #include <linux/rculist.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
 #include "tpm.h"
 
 #define ACPI_SIG_TPM2 "TPM2"
@@ -34,14 +33,14 @@
 	CRB_ACPI_START_INDEX = 1,
 };
 
-enum crb_ca_request {
-	CRB_CA_REQ_GO_IDLE	= BIT(0),
-	CRB_CA_REQ_CMD_READY	= BIT(1),
+enum crb_ctrl_req {
+	CRB_CTRL_REQ_CMD_READY	= BIT(0),
+	CRB_CTRL_REQ_GO_IDLE	= BIT(1),
 };
 
-enum crb_ca_status {
-	CRB_CA_STS_ERROR	= BIT(0),
-	CRB_CA_STS_TPM_IDLE	= BIT(1),
+enum crb_ctrl_sts {
+	CRB_CTRL_STS_ERROR	= BIT(0),
+	CRB_CTRL_STS_TPM_IDLE	= BIT(1),
 };
 
 enum crb_start {
@@ -67,7 +66,7 @@
 } __packed;
 
 enum crb_status {
-	CRB_STS_COMPLETE	= BIT(0),
+	CRB_DRV_STS_COMPLETE	= BIT(0),
 };
 
 enum crb_flags {
@@ -81,6 +80,7 @@
 	struct crb_control_area __iomem *cca;
 	u8 __iomem *cmd;
 	u8 __iomem *rsp;
+	u32 cmd_size;
 };
 
 static SIMPLE_DEV_PM_OPS(crb_pm, tpm_pm_suspend, tpm_pm_resume);
@@ -92,7 +92,7 @@
 
 	if ((ioread32(&priv->cca->start) & CRB_START_INVOKE) !=
 	    CRB_START_INVOKE)
-		sts |= CRB_STS_COMPLETE;
+		sts |= CRB_DRV_STS_COMPLETE;
 
 	return sts;
 }
@@ -106,7 +106,7 @@
 	if (count < 6)
 		return -EIO;
 
-	if (ioread32(&priv->cca->sts) & CRB_CA_STS_ERROR)
+	if (ioread32(&priv->cca->sts) & CRB_CTRL_STS_ERROR)
 		return -EIO;
 
 	memcpy_fromio(buf, priv->rsp, 6);
@@ -142,11 +142,14 @@
 	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
 	int rc = 0;
 
-	if (len > ioread32(&priv->cca->cmd_size)) {
-		dev_err(&chip->dev,
-			"invalid command count value %x %zx\n",
-			(unsigned int) len,
-			(size_t) ioread32(&priv->cca->cmd_size));
+	/* Zero the cancel register so that the next command will not get
+	 * canceled.
+	 */
+	iowrite32(0, &priv->cca->cancel);
+
+	if (len > priv->cmd_size) {
+		dev_err(&chip->dev, "invalid command count value %zd %d\n",
+			len, priv->cmd_size);
 		return -E2BIG;
 	}
 
@@ -156,7 +159,7 @@
 	wmb();
 
 	if (priv->flags & CRB_FL_CRB_START)
-		iowrite32(cpu_to_le32(CRB_START_INVOKE), &priv->cca->start);
+		iowrite32(CRB_START_INVOKE, &priv->cca->start);
 
 	if (priv->flags & CRB_FL_ACPI_START)
 		rc = crb_do_acpi_start(chip);
@@ -168,15 +171,10 @@
 {
 	struct crb_priv *priv = dev_get_drvdata(&chip->dev);
 
-	iowrite32(cpu_to_le32(CRB_CANCEL_INVOKE), &priv->cca->cancel);
-
-	/* Make sure that cmd is populated before issuing cancel. */
-	wmb();
+	iowrite32(CRB_CANCEL_INVOKE, &priv->cca->cancel);
 
 	if ((priv->flags & CRB_FL_ACPI_START) && crb_do_acpi_start(chip))
 		dev_err(&chip->dev, "ACPI Start failed\n");
-
-	iowrite32(0, &priv->cca->cancel);
 }
 
 static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
@@ -194,8 +192,8 @@
 	.send = crb_send,
 	.cancel = crb_cancel,
 	.req_canceled = crb_req_canceled,
-	.req_complete_mask = CRB_STS_COMPLETE,
-	.req_complete_val = CRB_STS_COMPLETE,
+	.req_complete_mask = CRB_DRV_STS_COMPLETE,
+	.req_complete_val = CRB_DRV_STS_COMPLETE,
 };
 
 static int crb_init(struct acpi_device *device, struct crb_priv *priv)
@@ -265,8 +263,7 @@
 	acpi_dev_free_resource_list(&resources);
 
 	if (resource_type(&io_res) != IORESOURCE_MEM) {
-		dev_err(dev,
-			FW_BUG "TPM2 ACPI table does not define a memory resource\n");
+		dev_err(dev, FW_BUG "TPM2 ACPI table does not define a memory resource\n");
 		return -EINVAL;
 	}
 
@@ -302,6 +299,7 @@
 		dev_err(dev, FW_BUG "overlapping command and response buffer sizes are not identical");
 		return -EINVAL;
 	}
+	priv->cmd_size = cmd_size;
 
 	priv->rsp = priv->cmd;
 	return 0;
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index d66f51b..e3bf31b 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -440,7 +440,6 @@
 		0x00, 0x00, 0x00, 0xf1
 	};
 	size_t len = sizeof(cmd_getticks);
-	bool itpm;
 	u16 vendor;
 
 	rc = tpm_tis_read16(priv, TPM_DID_VID(0), &vendor);
@@ -451,8 +450,6 @@
 	if (vendor != TPM_VID_INTEL)
 		return 0;
 
-	itpm = false;
-
 	rc = tpm_tis_send_data(chip, cmd_getticks, len);
 	if (rc == 0)
 		goto out;
@@ -460,8 +457,6 @@
 	tpm_tis_ready(chip);
 	release_locality(chip, priv->locality, 0);
 
-	itpm = true;
-
 	rc = tpm_tis_send_data(chip, cmd_getticks, len);
 	if (rc == 0) {
 		dev_info(&chip->dev, "Detected an iTPM.\n");
@@ -526,6 +521,18 @@
 	return IRQ_HANDLED;
 }
 
+static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
+{
+	const char *desc = "attempting to generate an interrupt";
+	u32 cap2;
+	cap_t cap;
+
+	if (chip->flags & TPM_CHIP_FLAG_TPM2)
+		return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
+	else
+		return tpm_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc);
+}
+
 /* Register the IRQ and issue a command that will cause an interrupt. If an
  * irq is seen then leave the chip setup for IRQ operation, otherwise reverse
  * everything and leave in polling mode. Returns 0 on success.
@@ -575,10 +582,9 @@
 	/* Generate an interrupt by having the core call through to
 	 * tpm_tis_send
 	 */
-	if (chip->flags & TPM_CHIP_FLAG_TPM2)
-		tpm2_gen_interrupt(chip);
-	else
-		tpm_gen_interrupt(chip);
+	rc = tpm_tis_gen_interrupt(chip);
+	if (rc < 0)
+		return rc;
 
 	/* tpm_tis_send will either confirm the interrupt is working or it
 	 * will call disable_irq which undoes all of the above.
diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c
index b098d2d..67549ce 100644
--- a/drivers/char/ttyprintk.c
+++ b/drivers/char/ttyprintk.c
@@ -31,60 +31,53 @@
  * printk messages (also suitable for logging service):
  * - any cr is replaced by nl
  * - adds a ttyprintk source tag in front of each line
- * - too long message is fragmeted, with '\'nl between fragments
- * - TPK_STR_SIZE isn't really the write_room limiting factor, bcause
+ * - too long message is fragmented, with '\'nl between fragments
+ * - TPK_STR_SIZE isn't really the write_room limiting factor, because
  *   it is emptied on the fly during preformatting.
  */
 #define TPK_STR_SIZE 508 /* should be bigger then max expected line length */
 #define TPK_MAX_ROOM 4096 /* we could assume 4K for instance */
-static const char *tpk_tag = "[U] "; /* U for User */
 static int tpk_curr;
 
+static char tpk_buffer[TPK_STR_SIZE + 4];
+
+static void tpk_flush(void)
+{
+	if (tpk_curr > 0) {
+		tpk_buffer[tpk_curr] = '\0';
+		pr_info("[U] %s\n", tpk_buffer);
+		tpk_curr = 0;
+	}
+}
+
 static int tpk_printk(const unsigned char *buf, int count)
 {
-	static char tmp[TPK_STR_SIZE + 4];
 	int i = tpk_curr;
 
 	if (buf == NULL) {
-		/* flush tmp[] */
-		if (tpk_curr > 0) {
-			/* non nl or cr terminated message - add nl */
-			tmp[tpk_curr + 0] = '\n';
-			tmp[tpk_curr + 1] = '\0';
-			printk(KERN_INFO "%s%s", tpk_tag, tmp);
-			tpk_curr = 0;
-		}
+		tpk_flush();
 		return i;
 	}
 
 	for (i = 0; i < count; i++) {
-		tmp[tpk_curr] = buf[i];
-		if (tpk_curr < TPK_STR_SIZE) {
-			switch (buf[i]) {
-			case '\r':
-				/* replace cr with nl */
-				tmp[tpk_curr + 0] = '\n';
-				tmp[tpk_curr + 1] = '\0';
-				printk(KERN_INFO "%s%s", tpk_tag, tmp);
-				tpk_curr = 0;
-				if ((i + 1) < count && buf[i + 1] == '\n')
-					i++;
-				break;
-			case '\n':
-				tmp[tpk_curr + 1] = '\0';
-				printk(KERN_INFO "%s%s", tpk_tag, tmp);
-				tpk_curr = 0;
-				break;
-			default:
-				tpk_curr++;
-			}
-		} else {
+		if (tpk_curr >= TPK_STR_SIZE) {
 			/* end of tmp buffer reached: cut the message in two */
-			tmp[tpk_curr + 1] = '\\';
-			tmp[tpk_curr + 2] = '\n';
-			tmp[tpk_curr + 3] = '\0';
-			printk(KERN_INFO "%s%s", tpk_tag, tmp);
-			tpk_curr = 0;
+			tpk_buffer[tpk_curr++] = '\\';
+			tpk_flush();
+		}
+
+		switch (buf[i]) {
+		case '\r':
+			tpk_flush();
+			if ((i + 1) < count && buf[i + 1] == '\n')
+				i++;
+			break;
+		case '\n':
+			tpk_flush();
+			break;
+		default:
+			tpk_buffer[tpk_curr++] = buf[i];
+			break;
 		}
 	}
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index d2406fe..5da47e26 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -165,6 +165,12 @@
 	 */
 	struct virtqueue *c_ivq, *c_ovq;
 
+	/*
+	 * A control packet buffer for guest->host requests, protected
+	 * by c_ovq_lock.
+	 */
+	struct virtio_console_control cpkt;
+
 	/* Array of per-port IO virtqueues */
 	struct virtqueue **in_vqs, **out_vqs;
 
@@ -560,28 +566,29 @@
 				  unsigned int event, unsigned int value)
 {
 	struct scatterlist sg[1];
-	struct virtio_console_control cpkt;
 	struct virtqueue *vq;
 	unsigned int len;
 
 	if (!use_multiport(portdev))
 		return 0;
 
-	cpkt.id = cpu_to_virtio32(portdev->vdev, port_id);
-	cpkt.event = cpu_to_virtio16(portdev->vdev, event);
-	cpkt.value = cpu_to_virtio16(portdev->vdev, value);
-
 	vq = portdev->c_ovq;
 
-	sg_init_one(sg, &cpkt, sizeof(cpkt));
-
 	spin_lock(&portdev->c_ovq_lock);
-	if (virtqueue_add_outbuf(vq, sg, 1, &cpkt, GFP_ATOMIC) == 0) {
+
+	portdev->cpkt.id = cpu_to_virtio32(portdev->vdev, port_id);
+	portdev->cpkt.event = cpu_to_virtio16(portdev->vdev, event);
+	portdev->cpkt.value = cpu_to_virtio16(portdev->vdev, value);
+
+	sg_init_one(sg, &portdev->cpkt, sizeof(struct virtio_console_control));
+
+	if (virtqueue_add_outbuf(vq, sg, 1, &portdev->cpkt, GFP_ATOMIC) == 0) {
 		virtqueue_kick(vq);
 		while (!virtqueue_get_buf(vq, &len)
 			&& !virtqueue_is_broken(vq))
 			cpu_relax();
 	}
+
 	spin_unlock(&portdev->c_ovq_lock);
 	return 0;
 }
diff --git a/drivers/char/xillybus/xillybus_core.c b/drivers/char/xillybus/xillybus_core.c
index dcd19f3..b6c9cde 100644
--- a/drivers/char/xillybus/xillybus_core.c
+++ b/drivers/char/xillybus/xillybus_core.c
@@ -655,10 +655,10 @@
 
 	version = channel->wr_buffers[0]->addr;
 
-	/* Check version number. Accept anything below 0x82 for now. */
+	/* Check version number. Reject anything above 0x82. */
 	if (*version > 0x82) {
 		dev_err(endpoint->dev,
-			"No support for IDT version 0x%02x. Maybe the xillybus driver needs an upgarde. Aborting.\n",
+			"No support for IDT version 0x%02x. Maybe the xillybus driver needs an upgrade. Aborting.\n",
 			*version);
 		return -ENODEV;
 	}
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index e2d9bd7..6a8ac04 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -31,22 +31,12 @@
 
 source "drivers/clk/versatile/Kconfig"
 
-config COMMON_CLK_MAX_GEN
-        bool
-
 config COMMON_CLK_MAX77686
-	tristate "Clock driver for Maxim 77686 MFD"
-	depends on MFD_MAX77686
-	select COMMON_CLK_MAX_GEN
+	tristate "Clock driver for Maxim 77620/77686/77802 MFD"
+	depends on MFD_MAX77686 || MFD_MAX77620
 	---help---
-	  This driver supports Maxim 77686 crystal oscillator clock. 
-
-config COMMON_CLK_MAX77802
-	tristate "Clock driver for Maxim 77802 PMIC"
-	depends on MFD_MAX77686
-	select COMMON_CLK_MAX_GEN
-	---help---
-	  This driver supports Maxim 77802 crystal oscillator clock.
+	  This driver supports Maxim 77620/77686/77802 crystal oscillator
+	  clock.
 
 config COMMON_CLK_RK808
 	tristate "Clock driver for RK808/RK818"
@@ -210,6 +200,7 @@
 
 source "drivers/clk/bcm/Kconfig"
 source "drivers/clk/hisilicon/Kconfig"
+source "drivers/clk/mediatek/Kconfig"
 source "drivers/clk/meson/Kconfig"
 source "drivers/clk/mvebu/Kconfig"
 source "drivers/clk/qcom/Kconfig"
@@ -218,5 +209,6 @@
 source "drivers/clk/sunxi-ng/Kconfig"
 source "drivers/clk/tegra/Kconfig"
 source "drivers/clk/ti/Kconfig"
+source "drivers/clk/uniphier/Kconfig"
 
 endmenu
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 3b6f9cf..925081e 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -26,10 +26,7 @@
 obj-$(CONFIG_COMMON_CLK_CS2000_CP)	+= clk-cs2000-cp.o
 obj-$(CONFIG_ARCH_EFM32)		+= clk-efm32gg.o
 obj-$(CONFIG_ARCH_HIGHBANK)		+= clk-highbank.o
-obj-$(CONFIG_MACH_LOONGSON32)		+= clk-ls1x.o
-obj-$(CONFIG_COMMON_CLK_MAX_GEN)	+= clk-max-gen.o
 obj-$(CONFIG_COMMON_CLK_MAX77686)	+= clk-max77686.o
-obj-$(CONFIG_COMMON_CLK_MAX77802)	+= clk-max77802.o
 obj-$(CONFIG_ARCH_MB86S7X)		+= clk-mb86s7x.o
 obj-$(CONFIG_ARCH_MOXART)		+= clk-moxart.o
 obj-$(CONFIG_ARCH_NOMADIK)		+= clk-nomadik.o
@@ -63,6 +60,7 @@
 obj-$(CONFIG_ARCH_MXC)			+= imx/
 obj-$(CONFIG_MACH_INGENIC)		+= ingenic/
 obj-$(CONFIG_COMMON_CLK_KEYSTONE)	+= keystone/
+obj-$(CONFIG_MACH_LOONGSON32)		+= loongson1/
 obj-$(CONFIG_ARCH_MEDIATEK)		+= mediatek/
 obj-$(CONFIG_COMMON_CLK_AMLOGIC)	+= meson/
 obj-$(CONFIG_MACH_PIC32)		+= microchip/
@@ -86,6 +84,7 @@
 obj-$(CONFIG_ARCH_SUNXI)		+= sunxi-ng/
 obj-$(CONFIG_ARCH_TEGRA)		+= tegra/
 obj-y					+= ti/
+obj-$(CONFIG_CLK_UNIPHIER)		+= uniphier/
 obj-$(CONFIG_ARCH_U8500)		+= ux500/
 obj-$(CONFIG_COMMON_CLK_VERSATILE)	+= versatile/
 obj-$(CONFIG_X86)			+= x86/
diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c
index 7f6bec8..4e1cd5a 100644
--- a/drivers/clk/at91/clk-generated.c
+++ b/drivers/clk/at91/clk-generated.c
@@ -233,14 +233,16 @@
 					>> AT91_PMC_PCR_GCKDIV_OFFSET;
 }
 
-static struct clk * __init
-at91_clk_register_generated(struct regmap *regmap,  spinlock_t *lock, const char
-			    *name, const char **parent_names, u8 num_parents,
-			    u8 id, const struct clk_range *range)
+static struct clk_hw * __init
+at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock,
+			    const char *name, const char **parent_names,
+			    u8 num_parents, u8 id,
+			    const struct clk_range *range)
 {
 	struct clk_generated *gck;
-	struct clk *clk = NULL;
 	struct clk_init_data init;
+	struct clk_hw *hw;
+	int ret;
 
 	gck = kzalloc(sizeof(*gck), GFP_KERNEL);
 	if (!gck)
@@ -258,13 +260,15 @@
 	gck->lock = lock;
 	gck->range = *range;
 
-	clk = clk_register(NULL, &gck->hw);
-	if (IS_ERR(clk))
+	hw = &gck->hw;
+	ret = clk_hw_register(NULL, &gck->hw);
+	if (ret) {
 		kfree(gck);
-	else
+		hw = ERR_PTR(ret);
+	} else
 		clk_generated_startup(gck);
 
-	return clk;
+	return hw;
 }
 
 static void __init of_sama5d2_clk_generated_setup(struct device_node *np)
@@ -272,7 +276,7 @@
 	int num;
 	u32 id;
 	const char *name;
-	struct clk *clk;
+	struct clk_hw *hw;
 	unsigned int num_parents;
 	const char *parent_names[GENERATED_SOURCE_MAX];
 	struct device_node *gcknp;
@@ -306,13 +310,13 @@
 		of_at91_get_clk_range(gcknp, "atmel,clk-output-range",
 				      &range);
 
-		clk = at91_clk_register_generated(regmap, &pmc_pcr_lock, name,
+		hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, name,
 						  parent_names, num_parents,
 						  id, &range);
-		if (IS_ERR(clk))
+		if (IS_ERR(hw))
 			continue;
 
-		of_clk_add_provider(gcknp, of_clk_src_simple_get, clk);
+		of_clk_add_hw_provider(gcknp, of_clk_hw_simple_get, hw);
 	}
 }
 CLK_OF_DECLARE(of_sama5d2_clk_generated_setup, "atmel,sama5d2-clk-generated",
diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c
index 8e20c8a..e0daa4a 100644
--- a/drivers/clk/at91/clk-h32mx.c
+++ b/drivers/clk/at91/clk-h32mx.c
@@ -92,7 +92,7 @@
 	struct clk_init_data init;
 	const char *parent_name;
 	struct regmap *regmap;
-	struct clk *clk;
+	int ret;
 
 	regmap = syscon_node_to_regmap(of_get_parent(np));
 	if (IS_ERR(regmap))
@@ -113,13 +113,13 @@
 	h32mxclk->hw.init = &init;
 	h32mxclk->regmap = regmap;
 
-	clk = clk_register(NULL, &h32mxclk->hw);
-	if (IS_ERR(clk)) {
+	ret = clk_hw_register(NULL, &h32mxclk->hw);
+	if (ret) {
 		kfree(h32mxclk);
 		return;
 	}
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, &h32mxclk->hw);
 }
 CLK_OF_DECLARE(of_sama5d4_clk_h32mx_setup, "atmel,sama5d4-clk-h32mx",
 	       of_sama5d4_clk_h32mx_setup);
diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c
index 58b5bac..c813c27 100644
--- a/drivers/clk/at91/clk-main.c
+++ b/drivers/clk/at91/clk-main.c
@@ -128,15 +128,16 @@
 	.is_prepared = clk_main_osc_is_prepared,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_main_osc(struct regmap *regmap,
 			   const char *name,
 			   const char *parent_name,
 			   bool bypass)
 {
 	struct clk_main_osc *osc;
-	struct clk *clk = NULL;
 	struct clk_init_data init;
+	struct clk_hw *hw;
+	int ret;
 
 	if (!name || !parent_name)
 		return ERR_PTR(-EINVAL);
@@ -160,16 +161,19 @@
 				   AT91_PMC_MOSCEN,
 				   AT91_PMC_OSCBYPASS | AT91_PMC_KEY);
 
-	clk = clk_register(NULL, &osc->hw);
-	if (IS_ERR(clk))
+	hw = &osc->hw;
+	ret = clk_hw_register(NULL, &osc->hw);
+	if (ret) {
 		kfree(osc);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *name = np->name;
 	const char *parent_name;
 	struct regmap *regmap;
@@ -183,11 +187,11 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_main_osc(regmap, name, parent_name, bypass);
-	if (IS_ERR(clk))
+	hw = at91_clk_register_main_osc(regmap, name, parent_name, bypass);
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91rm9200_clk_main_osc, "atmel,at91rm9200-clk-main-osc",
 	       of_at91rm9200_clk_main_osc_setup);
@@ -271,14 +275,15 @@
 	.recalc_accuracy = clk_main_rc_osc_recalc_accuracy,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_main_rc_osc(struct regmap *regmap,
 			      const char *name,
 			      u32 frequency, u32 accuracy)
 {
 	struct clk_main_rc_osc *osc;
-	struct clk *clk = NULL;
 	struct clk_init_data init;
+	struct clk_hw *hw;
+	int ret;
 
 	if (!name || !frequency)
 		return ERR_PTR(-EINVAL);
@@ -298,16 +303,19 @@
 	osc->frequency = frequency;
 	osc->accuracy = accuracy;
 
-	clk = clk_register(NULL, &osc->hw);
-	if (IS_ERR(clk))
+	hw = &osc->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
 		kfree(osc);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	u32 frequency = 0;
 	u32 accuracy = 0;
 	const char *name = np->name;
@@ -321,11 +329,11 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_main_rc_osc(regmap, name, frequency, accuracy);
-	if (IS_ERR(clk))
+	hw = at91_clk_register_main_rc_osc(regmap, name, frequency, accuracy);
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91sam9x5_clk_main_rc_osc, "atmel,at91sam9x5-clk-main-rc-osc",
 	       of_at91sam9x5_clk_main_rc_osc_setup);
@@ -395,14 +403,15 @@
 	.recalc_rate = clk_rm9200_main_recalc_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_rm9200_main(struct regmap *regmap,
 			      const char *name,
 			      const char *parent_name)
 {
 	struct clk_rm9200_main *clkmain;
-	struct clk *clk = NULL;
 	struct clk_init_data init;
+	struct clk_hw *hw;
+	int ret;
 
 	if (!name)
 		return ERR_PTR(-EINVAL);
@@ -423,16 +432,19 @@
 	clkmain->hw.init = &init;
 	clkmain->regmap = regmap;
 
-	clk = clk_register(NULL, &clkmain->hw);
-	if (IS_ERR(clk))
+	hw = &clkmain->hw;
+	ret = clk_hw_register(NULL, &clkmain->hw);
+	if (ret) {
 		kfree(clkmain);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91rm9200_clk_main_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *parent_name;
 	const char *name = np->name;
 	struct regmap *regmap;
@@ -444,11 +456,11 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_rm9200_main(regmap, name, parent_name);
-	if (IS_ERR(clk))
+	hw = at91_clk_register_rm9200_main(regmap, name, parent_name);
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91rm9200_clk_main, "atmel,at91rm9200-clk-main",
 	       of_at91rm9200_clk_main_setup);
@@ -529,16 +541,17 @@
 	.get_parent = clk_sam9x5_main_get_parent,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_sam9x5_main(struct regmap *regmap,
 			      const char *name,
 			      const char **parent_names,
 			      int num_parents)
 {
 	struct clk_sam9x5_main *clkmain;
-	struct clk *clk = NULL;
 	struct clk_init_data init;
 	unsigned int status;
+	struct clk_hw *hw;
+	int ret;
 
 	if (!name)
 		return ERR_PTR(-EINVAL);
@@ -561,16 +574,19 @@
 	regmap_read(clkmain->regmap, AT91_CKGR_MOR, &status);
 	clkmain->parent = status & AT91_PMC_MOSCEN ? 1 : 0;
 
-	clk = clk_register(NULL, &clkmain->hw);
-	if (IS_ERR(clk))
+	hw = &clkmain->hw;
+	ret = clk_hw_register(NULL, &clkmain->hw);
+	if (ret) {
 		kfree(clkmain);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91sam9x5_clk_main_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *parent_names[2];
 	unsigned int num_parents;
 	const char *name = np->name;
@@ -587,12 +603,12 @@
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91_clk_register_sam9x5_main(regmap, name, parent_names,
+	hw = at91_clk_register_sam9x5_main(regmap, name, parent_names,
 					    num_parents);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91sam9x5_clk_main, "atmel,at91sam9x5-clk-main",
 	       of_at91sam9x5_clk_main_setup);
diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c
index d1021e1..e9cba9f 100644
--- a/drivers/clk/at91/clk-master.c
+++ b/drivers/clk/at91/clk-master.c
@@ -120,7 +120,7 @@
 	.get_parent = clk_master_get_parent,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_master(struct regmap *regmap,
 		const char *name, int num_parents,
 		const char **parent_names,
@@ -128,8 +128,9 @@
 		const struct clk_master_characteristics *characteristics)
 {
 	struct clk_master *master;
-	struct clk *clk = NULL;
 	struct clk_init_data init;
+	struct clk_hw *hw;
+	int ret;
 
 	if (!name || !num_parents || !parent_names)
 		return ERR_PTR(-EINVAL);
@@ -149,12 +150,14 @@
 	master->characteristics = characteristics;
 	master->regmap = regmap;
 
-	clk = clk_register(NULL, &master->hw);
-	if (IS_ERR(clk)) {
+	hw = &master->hw;
+	ret = clk_hw_register(NULL, &master->hw);
+	if (ret) {
 		kfree(master);
+		hw = ERR_PTR(ret);
 	}
 
-	return clk;
+	return hw;
 }
 
 
@@ -198,7 +201,7 @@
 of_at91_clk_master_setup(struct device_node *np,
 			 const struct clk_master_layout *layout)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	unsigned int num_parents;
 	const char *parent_names[MASTER_SOURCE_MAX];
 	const char *name = np->name;
@@ -221,13 +224,13 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_master(regmap, name, num_parents,
+	hw = at91_clk_register_master(regmap, name, num_parents,
 				       parent_names, layout,
 				       characteristics);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto out_free_characteristics;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 	return;
 
 out_free_characteristics:
diff --git a/drivers/clk/at91/clk-peripheral.c b/drivers/clk/at91/clk-peripheral.c
index fd16072..dc29fd9 100644
--- a/drivers/clk/at91/clk-peripheral.c
+++ b/drivers/clk/at91/clk-peripheral.c
@@ -104,13 +104,14 @@
 	.is_enabled = clk_peripheral_is_enabled,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_peripheral(struct regmap *regmap, const char *name,
 			     const char *parent_name, u32 id)
 {
 	struct clk_peripheral *periph;
-	struct clk *clk = NULL;
 	struct clk_init_data init;
+	struct clk_hw *hw;
+	int ret;
 
 	if (!name || !parent_name || id > PERIPHERAL_ID_MAX)
 		return ERR_PTR(-EINVAL);
@@ -129,11 +130,14 @@
 	periph->hw.init = &init;
 	periph->regmap = regmap;
 
-	clk = clk_register(NULL, &periph->hw);
-	if (IS_ERR(clk))
+	hw = &periph->hw;
+	ret = clk_hw_register(NULL, &periph->hw);
+	if (ret) {
 		kfree(periph);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void clk_sam9x5_peripheral_autodiv(struct clk_sam9x5_peripheral *periph)
@@ -327,14 +331,15 @@
 	.set_rate = clk_sam9x5_peripheral_set_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_sam9x5_peripheral(struct regmap *regmap, spinlock_t *lock,
 				    const char *name, const char *parent_name,
 				    u32 id, const struct clk_range *range)
 {
 	struct clk_sam9x5_peripheral *periph;
-	struct clk *clk = NULL;
 	struct clk_init_data init;
+	struct clk_hw *hw;
+	int ret;
 
 	if (!name || !parent_name)
 		return ERR_PTR(-EINVAL);
@@ -357,13 +362,15 @@
 	periph->auto_div = true;
 	periph->range = *range;
 
-	clk = clk_register(NULL, &periph->hw);
-	if (IS_ERR(clk))
+	hw = &periph->hw;
+	ret = clk_hw_register(NULL, &periph->hw);
+	if (ret) {
 		kfree(periph);
-	else
+		hw = ERR_PTR(ret);
+	} else
 		clk_sam9x5_peripheral_autodiv(periph);
 
-	return clk;
+	return hw;
 }
 
 static void __init
@@ -371,7 +378,7 @@
 {
 	int num;
 	u32 id;
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *parent_name;
 	const char *name;
 	struct device_node *periphclknp;
@@ -400,7 +407,7 @@
 			name = periphclknp->name;
 
 		if (type == PERIPHERAL_AT91RM9200) {
-			clk = at91_clk_register_peripheral(regmap, name,
+			hw = at91_clk_register_peripheral(regmap, name,
 							   parent_name, id);
 		} else {
 			struct clk_range range = CLK_RANGE(0, 0);
@@ -409,17 +416,17 @@
 					      "atmel,clk-output-range",
 					      &range);
 
-			clk = at91_clk_register_sam9x5_peripheral(regmap,
+			hw = at91_clk_register_sam9x5_peripheral(regmap,
 								  &pmc_pcr_lock,
 								  name,
 								  parent_name,
 								  id, &range);
 		}
 
-		if (IS_ERR(clk))
+		if (IS_ERR(hw))
 			continue;
 
-		of_clk_add_provider(periphclknp, of_clk_src_simple_get, clk);
+		of_clk_add_hw_provider(periphclknp, of_clk_hw_simple_get, hw);
 	}
 }
 
diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c
index fb2e0b5..45ad168 100644
--- a/drivers/clk/at91/clk-pll.c
+++ b/drivers/clk/at91/clk-pll.c
@@ -296,17 +296,18 @@
 	.set_rate = clk_pll_set_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_pll(struct regmap *regmap, const char *name,
 		      const char *parent_name, u8 id,
 		      const struct clk_pll_layout *layout,
 		      const struct clk_pll_characteristics *characteristics)
 {
 	struct clk_pll *pll;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
 	int offset = PLL_REG(id);
 	unsigned int pllr;
+	int ret;
 
 	if (id > PLL_MAX_ID)
 		return ERR_PTR(-EINVAL);
@@ -330,12 +331,14 @@
 	pll->div = PLL_DIV(pllr);
 	pll->mul = PLL_MUL(pllr, layout);
 
-	clk = clk_register(NULL, &pll->hw);
-	if (IS_ERR(clk)) {
+	hw = &pll->hw;
+	ret = clk_hw_register(NULL, &pll->hw);
+	if (ret) {
 		kfree(pll);
+		hw = ERR_PTR(ret);
 	}
 
-	return clk;
+	return hw;
 }
 
 
@@ -465,7 +468,7 @@
 		      const struct clk_pll_layout *layout)
 {
 	u32 id;
-	struct clk *clk;
+	struct clk_hw *hw;
 	struct regmap *regmap;
 	const char *parent_name;
 	const char *name = np->name;
@@ -486,12 +489,12 @@
 	if (!characteristics)
 		return;
 
-	clk = at91_clk_register_pll(regmap, name, parent_name, id, layout,
+	hw = at91_clk_register_pll(regmap, name, parent_name, id, layout,
 				    characteristics);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto out_free_characteristics;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 	return;
 
 out_free_characteristics:
diff --git a/drivers/clk/at91/clk-plldiv.c b/drivers/clk/at91/clk-plldiv.c
index 2bed264..b4afaf2 100644
--- a/drivers/clk/at91/clk-plldiv.c
+++ b/drivers/clk/at91/clk-plldiv.c
@@ -75,13 +75,14 @@
 	.set_rate = clk_plldiv_set_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_plldiv(struct regmap *regmap, const char *name,
 			 const char *parent_name)
 {
 	struct clk_plldiv *plldiv;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	plldiv = kzalloc(sizeof(*plldiv), GFP_KERNEL);
 	if (!plldiv)
@@ -96,18 +97,20 @@
 	plldiv->hw.init = &init;
 	plldiv->regmap = regmap;
 
-	clk = clk_register(NULL, &plldiv->hw);
-
-	if (IS_ERR(clk))
+	hw = &plldiv->hw;
+	ret = clk_hw_register(NULL, &plldiv->hw);
+	if (ret) {
 		kfree(plldiv);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init
 of_at91sam9x5_clk_plldiv_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *parent_name;
 	const char *name = np->name;
 	struct regmap *regmap;
@@ -120,12 +123,11 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_plldiv(regmap, name, parent_name);
-	if (IS_ERR(clk))
+	hw = at91_clk_register_plldiv(regmap, name, parent_name);
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
-	return;
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91sam9x5_clk_plldiv, "atmel,at91sam9x5-clk-plldiv",
 	       of_at91sam9x5_clk_plldiv_setup);
diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 25d5906..190122e64 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c
@@ -170,15 +170,16 @@
 	.set_rate = clk_programmable_set_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_programmable(struct regmap *regmap,
 			       const char *name, const char **parent_names,
 			       u8 num_parents, u8 id,
 			       const struct clk_programmable_layout *layout)
 {
 	struct clk_programmable *prog;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	if (id > PROG_ID_MAX)
 		return ERR_PTR(-EINVAL);
@@ -198,11 +199,14 @@
 	prog->hw.init = &init;
 	prog->regmap = regmap;
 
-	clk = clk_register(NULL, &prog->hw);
-	if (IS_ERR(clk))
+	hw = &prog->hw;
+	ret = clk_hw_register(NULL, &prog->hw);
+	if (ret) {
 		kfree(prog);
+		hw = &prog->hw;
+	}
 
-	return clk;
+	return hw;
 }
 
 static const struct clk_programmable_layout at91rm9200_programmable_layout = {
@@ -229,7 +233,7 @@
 {
 	int num;
 	u32 id;
-	struct clk *clk;
+	struct clk_hw *hw;
 	unsigned int num_parents;
 	const char *parent_names[PROG_SOURCE_MAX];
 	const char *name;
@@ -257,13 +261,13 @@
 		if (of_property_read_string(np, "clock-output-names", &name))
 			name = progclknp->name;
 
-		clk = at91_clk_register_programmable(regmap, name,
+		hw = at91_clk_register_programmable(regmap, name,
 						     parent_names, num_parents,
 						     id, layout);
-		if (IS_ERR(clk))
+		if (IS_ERR(hw))
 			continue;
 
-		of_clk_add_provider(progclknp, of_clk_src_simple_get, clk);
+		of_clk_add_hw_provider(progclknp, of_clk_hw_simple_get, hw);
 	}
 }
 
diff --git a/drivers/clk/at91/clk-slow.c b/drivers/clk/at91/clk-slow.c
index 61090b1..560a8b9 100644
--- a/drivers/clk/at91/clk-slow.c
+++ b/drivers/clk/at91/clk-slow.c
@@ -13,42 +13,11 @@
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
 #include <linux/clk/at91_pmc.h>
-#include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 
 #include "pmc.h"
-#include "sckc.h"
-
-#define SLOW_CLOCK_FREQ		32768
-#define SLOWCK_SW_CYCLES	5
-#define SLOWCK_SW_TIME_USEC	((SLOWCK_SW_CYCLES * USEC_PER_SEC) / \
-				 SLOW_CLOCK_FREQ)
-
-#define	AT91_SCKC_CR			0x00
-#define		AT91_SCKC_RCEN		(1 << 0)
-#define		AT91_SCKC_OSC32EN	(1 << 1)
-#define		AT91_SCKC_OSC32BYP	(1 << 2)
-#define		AT91_SCKC_OSCSEL	(1 << 3)
-
-struct clk_slow_osc {
-	struct clk_hw hw;
-	void __iomem *sckcr;
-	unsigned long startup_usec;
-};
-
-#define to_clk_slow_osc(hw) container_of(hw, struct clk_slow_osc, hw)
-
-struct clk_slow_rc_osc {
-	struct clk_hw hw;
-	void __iomem *sckcr;
-	unsigned long frequency;
-	unsigned long accuracy;
-	unsigned long startup_usec;
-};
-
-#define to_clk_slow_rc_osc(hw) container_of(hw, struct clk_slow_rc_osc, hw)
 
 struct clk_sam9260_slow {
 	struct clk_hw hw;
@@ -57,328 +26,6 @@
 
 #define to_clk_sam9260_slow(hw) container_of(hw, struct clk_sam9260_slow, hw)
 
-struct clk_sam9x5_slow {
-	struct clk_hw hw;
-	void __iomem *sckcr;
-	u8 parent;
-};
-
-#define to_clk_sam9x5_slow(hw) container_of(hw, struct clk_sam9x5_slow, hw)
-
-static int clk_slow_osc_prepare(struct clk_hw *hw)
-{
-	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
-	void __iomem *sckcr = osc->sckcr;
-	u32 tmp = readl(sckcr);
-
-	if (tmp & AT91_SCKC_OSC32BYP)
-		return 0;
-
-	writel(tmp | AT91_SCKC_OSC32EN, sckcr);
-
-	usleep_range(osc->startup_usec, osc->startup_usec + 1);
-
-	return 0;
-}
-
-static void clk_slow_osc_unprepare(struct clk_hw *hw)
-{
-	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
-	void __iomem *sckcr = osc->sckcr;
-	u32 tmp = readl(sckcr);
-
-	if (tmp & AT91_SCKC_OSC32BYP)
-		return;
-
-	writel(tmp & ~AT91_SCKC_OSC32EN, sckcr);
-}
-
-static int clk_slow_osc_is_prepared(struct clk_hw *hw)
-{
-	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
-	void __iomem *sckcr = osc->sckcr;
-	u32 tmp = readl(sckcr);
-
-	if (tmp & AT91_SCKC_OSC32BYP)
-		return 1;
-
-	return !!(tmp & AT91_SCKC_OSC32EN);
-}
-
-static const struct clk_ops slow_osc_ops = {
-	.prepare = clk_slow_osc_prepare,
-	.unprepare = clk_slow_osc_unprepare,
-	.is_prepared = clk_slow_osc_is_prepared,
-};
-
-static struct clk * __init
-at91_clk_register_slow_osc(void __iomem *sckcr,
-			   const char *name,
-			   const char *parent_name,
-			   unsigned long startup,
-			   bool bypass)
-{
-	struct clk_slow_osc *osc;
-	struct clk *clk = NULL;
-	struct clk_init_data init;
-
-	if (!sckcr || !name || !parent_name)
-		return ERR_PTR(-EINVAL);
-
-	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
-	if (!osc)
-		return ERR_PTR(-ENOMEM);
-
-	init.name = name;
-	init.ops = &slow_osc_ops;
-	init.parent_names = &parent_name;
-	init.num_parents = 1;
-	init.flags = CLK_IGNORE_UNUSED;
-
-	osc->hw.init = &init;
-	osc->sckcr = sckcr;
-	osc->startup_usec = startup;
-
-	if (bypass)
-		writel((readl(sckcr) & ~AT91_SCKC_OSC32EN) | AT91_SCKC_OSC32BYP,
-		       sckcr);
-
-	clk = clk_register(NULL, &osc->hw);
-	if (IS_ERR(clk))
-		kfree(osc);
-
-	return clk;
-}
-
-void __init of_at91sam9x5_clk_slow_osc_setup(struct device_node *np,
-					     void __iomem *sckcr)
-{
-	struct clk *clk;
-	const char *parent_name;
-	const char *name = np->name;
-	u32 startup;
-	bool bypass;
-
-	parent_name = of_clk_get_parent_name(np, 0);
-	of_property_read_string(np, "clock-output-names", &name);
-	of_property_read_u32(np, "atmel,startup-time-usec", &startup);
-	bypass = of_property_read_bool(np, "atmel,osc-bypass");
-
-	clk = at91_clk_register_slow_osc(sckcr, name, parent_name, startup,
-					 bypass);
-	if (IS_ERR(clk))
-		return;
-
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
-}
-
-static unsigned long clk_slow_rc_osc_recalc_rate(struct clk_hw *hw,
-						 unsigned long parent_rate)
-{
-	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
-
-	return osc->frequency;
-}
-
-static unsigned long clk_slow_rc_osc_recalc_accuracy(struct clk_hw *hw,
-						     unsigned long parent_acc)
-{
-	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
-
-	return osc->accuracy;
-}
-
-static int clk_slow_rc_osc_prepare(struct clk_hw *hw)
-{
-	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
-	void __iomem *sckcr = osc->sckcr;
-
-	writel(readl(sckcr) | AT91_SCKC_RCEN, sckcr);
-
-	usleep_range(osc->startup_usec, osc->startup_usec + 1);
-
-	return 0;
-}
-
-static void clk_slow_rc_osc_unprepare(struct clk_hw *hw)
-{
-	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
-	void __iomem *sckcr = osc->sckcr;
-
-	writel(readl(sckcr) & ~AT91_SCKC_RCEN, sckcr);
-}
-
-static int clk_slow_rc_osc_is_prepared(struct clk_hw *hw)
-{
-	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
-
-	return !!(readl(osc->sckcr) & AT91_SCKC_RCEN);
-}
-
-static const struct clk_ops slow_rc_osc_ops = {
-	.prepare = clk_slow_rc_osc_prepare,
-	.unprepare = clk_slow_rc_osc_unprepare,
-	.is_prepared = clk_slow_rc_osc_is_prepared,
-	.recalc_rate = clk_slow_rc_osc_recalc_rate,
-	.recalc_accuracy = clk_slow_rc_osc_recalc_accuracy,
-};
-
-static struct clk * __init
-at91_clk_register_slow_rc_osc(void __iomem *sckcr,
-			      const char *name,
-			      unsigned long frequency,
-			      unsigned long accuracy,
-			      unsigned long startup)
-{
-	struct clk_slow_rc_osc *osc;
-	struct clk *clk = NULL;
-	struct clk_init_data init;
-
-	if (!sckcr || !name)
-		return ERR_PTR(-EINVAL);
-
-	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
-	if (!osc)
-		return ERR_PTR(-ENOMEM);
-
-	init.name = name;
-	init.ops = &slow_rc_osc_ops;
-	init.parent_names = NULL;
-	init.num_parents = 0;
-	init.flags = CLK_IGNORE_UNUSED;
-
-	osc->hw.init = &init;
-	osc->sckcr = sckcr;
-	osc->frequency = frequency;
-	osc->accuracy = accuracy;
-	osc->startup_usec = startup;
-
-	clk = clk_register(NULL, &osc->hw);
-	if (IS_ERR(clk))
-		kfree(osc);
-
-	return clk;
-}
-
-void __init of_at91sam9x5_clk_slow_rc_osc_setup(struct device_node *np,
-						void __iomem *sckcr)
-{
-	struct clk *clk;
-	u32 frequency = 0;
-	u32 accuracy = 0;
-	u32 startup = 0;
-	const char *name = np->name;
-
-	of_property_read_string(np, "clock-output-names", &name);
-	of_property_read_u32(np, "clock-frequency", &frequency);
-	of_property_read_u32(np, "clock-accuracy", &accuracy);
-	of_property_read_u32(np, "atmel,startup-time-usec", &startup);
-
-	clk = at91_clk_register_slow_rc_osc(sckcr, name, frequency, accuracy,
-					    startup);
-	if (IS_ERR(clk))
-		return;
-
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
-}
-
-static int clk_sam9x5_slow_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clk_sam9x5_slow *slowck = to_clk_sam9x5_slow(hw);
-	void __iomem *sckcr = slowck->sckcr;
-	u32 tmp;
-
-	if (index > 1)
-		return -EINVAL;
-
-	tmp = readl(sckcr);
-
-	if ((!index && !(tmp & AT91_SCKC_OSCSEL)) ||
-	    (index && (tmp & AT91_SCKC_OSCSEL)))
-		return 0;
-
-	if (index)
-		tmp |= AT91_SCKC_OSCSEL;
-	else
-		tmp &= ~AT91_SCKC_OSCSEL;
-
-	writel(tmp, sckcr);
-
-	usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1);
-
-	return 0;
-}
-
-static u8 clk_sam9x5_slow_get_parent(struct clk_hw *hw)
-{
-	struct clk_sam9x5_slow *slowck = to_clk_sam9x5_slow(hw);
-
-	return !!(readl(slowck->sckcr) & AT91_SCKC_OSCSEL);
-}
-
-static const struct clk_ops sam9x5_slow_ops = {
-	.set_parent = clk_sam9x5_slow_set_parent,
-	.get_parent = clk_sam9x5_slow_get_parent,
-};
-
-static struct clk * __init
-at91_clk_register_sam9x5_slow(void __iomem *sckcr,
-			      const char *name,
-			      const char **parent_names,
-			      int num_parents)
-{
-	struct clk_sam9x5_slow *slowck;
-	struct clk *clk = NULL;
-	struct clk_init_data init;
-
-	if (!sckcr || !name || !parent_names || !num_parents)
-		return ERR_PTR(-EINVAL);
-
-	slowck = kzalloc(sizeof(*slowck), GFP_KERNEL);
-	if (!slowck)
-		return ERR_PTR(-ENOMEM);
-
-	init.name = name;
-	init.ops = &sam9x5_slow_ops;
-	init.parent_names = parent_names;
-	init.num_parents = num_parents;
-	init.flags = 0;
-
-	slowck->hw.init = &init;
-	slowck->sckcr = sckcr;
-	slowck->parent = !!(readl(sckcr) & AT91_SCKC_OSCSEL);
-
-	clk = clk_register(NULL, &slowck->hw);
-	if (IS_ERR(clk))
-		kfree(slowck);
-
-	return clk;
-}
-
-void __init of_at91sam9x5_clk_slow_setup(struct device_node *np,
-					 void __iomem *sckcr)
-{
-	struct clk *clk;
-	const char *parent_names[2];
-	unsigned int num_parents;
-	const char *name = np->name;
-
-	num_parents = of_clk_get_parent_count(np);
-	if (num_parents == 0 || num_parents > 2)
-		return;
-
-	of_clk_parent_fill(np, parent_names, num_parents);
-
-	of_property_read_string(np, "clock-output-names", &name);
-
-	clk = at91_clk_register_sam9x5_slow(sckcr, name, parent_names,
-					    num_parents);
-	if (IS_ERR(clk))
-		return;
-
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
-}
-
 static u8 clk_sam9260_slow_get_parent(struct clk_hw *hw)
 {
 	struct clk_sam9260_slow *slowck = to_clk_sam9260_slow(hw);
@@ -393,15 +40,16 @@
 	.get_parent = clk_sam9260_slow_get_parent,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_sam9260_slow(struct regmap *regmap,
 			       const char *name,
 			       const char **parent_names,
 			       int num_parents)
 {
 	struct clk_sam9260_slow *slowck;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	if (!name)
 		return ERR_PTR(-EINVAL);
@@ -422,16 +70,19 @@
 	slowck->hw.init = &init;
 	slowck->regmap = regmap;
 
-	clk = clk_register(NULL, &slowck->hw);
-	if (IS_ERR(clk))
+	hw = &slowck->hw;
+	ret = clk_hw_register(NULL, &slowck->hw);
+	if (ret) {
 		kfree(slowck);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91sam9260_clk_slow_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *parent_names[2];
 	unsigned int num_parents;
 	const char *name = np->name;
@@ -448,12 +99,12 @@
 
 	of_property_read_string(np, "clock-output-names", &name);
 
-	clk = at91_clk_register_sam9260_slow(regmap, name, parent_names,
+	hw = at91_clk_register_sam9260_slow(regmap, name, parent_names,
 					     num_parents);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 
 CLK_OF_DECLARE(at91sam9260_clk_slow, "atmel,at91sam9260-clk-slow",
diff --git a/drivers/clk/at91/clk-smd.c b/drivers/clk/at91/clk-smd.c
index 3c04b06..965c662 100644
--- a/drivers/clk/at91/clk-smd.c
+++ b/drivers/clk/at91/clk-smd.c
@@ -111,13 +111,14 @@
 	.set_rate = at91sam9x5_clk_smd_set_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91sam9x5_clk_register_smd(struct regmap *regmap, const char *name,
 			    const char **parent_names, u8 num_parents)
 {
 	struct at91sam9x5_clk_smd *smd;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	smd = kzalloc(sizeof(*smd), GFP_KERNEL);
 	if (!smd)
@@ -132,16 +133,19 @@
 	smd->hw.init = &init;
 	smd->regmap = regmap;
 
-	clk = clk_register(NULL, &smd->hw);
-	if (IS_ERR(clk))
+	hw = &smd->hw;
+	ret = clk_hw_register(NULL, &smd->hw);
+	if (ret) {
 		kfree(smd);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91sam9x5_clk_smd_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	unsigned int num_parents;
 	const char *parent_names[SMD_SOURCE_MAX];
 	const char *name = np->name;
@@ -159,12 +163,12 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91sam9x5_clk_register_smd(regmap, name, parent_names,
+	hw = at91sam9x5_clk_register_smd(regmap, name, parent_names,
 					  num_parents);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91sam9x5_clk_smd, "atmel,at91sam9x5-clk-smd",
 	       of_at91sam9x5_clk_smd_setup);
diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c
index 8f35d81..86a3680 100644
--- a/drivers/clk/at91/clk-system.c
+++ b/drivers/clk/at91/clk-system.c
@@ -88,13 +88,14 @@
 	.is_prepared = clk_system_is_prepared,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_system(struct regmap *regmap, const char *name,
 			 const char *parent_name, u8 id)
 {
 	struct clk_system *sys;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	if (!parent_name || id > SYSTEM_MAX_ID)
 		return ERR_PTR(-EINVAL);
@@ -113,18 +114,21 @@
 	sys->hw.init = &init;
 	sys->regmap = regmap;
 
-	clk = clk_register(NULL, &sys->hw);
-	if (IS_ERR(clk))
+	hw = &sys->hw;
+	ret = clk_hw_register(NULL, &sys->hw);
+	if (ret) {
 		kfree(sys);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91rm9200_clk_sys_setup(struct device_node *np)
 {
 	int num;
 	u32 id;
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *name;
 	struct device_node *sysclknp;
 	const char *parent_name;
@@ -147,11 +151,11 @@
 
 		parent_name = of_clk_get_parent_name(sysclknp, 0);
 
-		clk = at91_clk_register_system(regmap, name, parent_name, id);
-		if (IS_ERR(clk))
+		hw = at91_clk_register_system(regmap, name, parent_name, id);
+		if (IS_ERR(hw))
 			continue;
 
-		of_clk_add_provider(sysclknp, of_clk_src_simple_get, clk);
+		of_clk_add_hw_provider(sysclknp, of_clk_hw_simple_get, hw);
 	}
 }
 CLK_OF_DECLARE(at91rm9200_clk_sys, "atmel,at91rm9200-clk-system",
diff --git a/drivers/clk/at91/clk-usb.c b/drivers/clk/at91/clk-usb.c
index d80bdb0..791770a 100644
--- a/drivers/clk/at91/clk-usb.c
+++ b/drivers/clk/at91/clk-usb.c
@@ -192,13 +192,14 @@
 	.set_rate = at91sam9x5_clk_usb_set_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91sam9x5_clk_register_usb(struct regmap *regmap, const char *name,
 			    const char **parent_names, u8 num_parents)
 {
 	struct at91sam9x5_clk_usb *usb;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	usb = kzalloc(sizeof(*usb), GFP_KERNEL);
 	if (!usb)
@@ -214,20 +215,24 @@
 	usb->hw.init = &init;
 	usb->regmap = regmap;
 
-	clk = clk_register(NULL, &usb->hw);
-	if (IS_ERR(clk))
+	hw = &usb->hw;
+	ret = clk_hw_register(NULL, &usb->hw);
+	if (ret) {
 		kfree(usb);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
-static struct clk * __init
+static struct clk_hw * __init
 at91sam9n12_clk_register_usb(struct regmap *regmap, const char *name,
 			     const char *parent_name)
 {
 	struct at91sam9x5_clk_usb *usb;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	usb = kzalloc(sizeof(*usb), GFP_KERNEL);
 	if (!usb)
@@ -242,11 +247,14 @@
 	usb->hw.init = &init;
 	usb->regmap = regmap;
 
-	clk = clk_register(NULL, &usb->hw);
-	if (IS_ERR(clk))
+	hw = &usb->hw;
+	ret = clk_hw_register(NULL, &usb->hw);
+	if (ret) {
 		kfree(usb);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static unsigned long at91rm9200_clk_usb_recalc_rate(struct clk_hw *hw,
@@ -334,13 +342,14 @@
 	.set_rate = at91rm9200_clk_usb_set_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91rm9200_clk_register_usb(struct regmap *regmap, const char *name,
 			    const char *parent_name, const u32 *divisors)
 {
 	struct at91rm9200_clk_usb *usb;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	usb = kzalloc(sizeof(*usb), GFP_KERNEL);
 	if (!usb)
@@ -356,16 +365,19 @@
 	usb->regmap = regmap;
 	memcpy(usb->divisors, divisors, sizeof(usb->divisors));
 
-	clk = clk_register(NULL, &usb->hw);
-	if (IS_ERR(clk))
+	hw = &usb->hw;
+	ret = clk_hw_register(NULL, &usb->hw);
+	if (ret) {
 		kfree(usb);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91sam9x5_clk_usb_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	unsigned int num_parents;
 	const char *parent_names[USB_SOURCE_MAX];
 	const char *name = np->name;
@@ -383,19 +395,19 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91sam9x5_clk_register_usb(regmap, name, parent_names,
-					  num_parents);
-	if (IS_ERR(clk))
+	hw = at91sam9x5_clk_register_usb(regmap, name, parent_names,
+					 num_parents);
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91sam9x5_clk_usb, "atmel,at91sam9x5-clk-usb",
 	       of_at91sam9x5_clk_usb_setup);
 
 static void __init of_at91sam9n12_clk_usb_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *parent_name;
 	const char *name = np->name;
 	struct regmap *regmap;
@@ -410,18 +422,18 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91sam9n12_clk_register_usb(regmap, name, parent_name);
-	if (IS_ERR(clk))
+	hw = at91sam9n12_clk_register_usb(regmap, name, parent_name);
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91sam9n12_clk_usb, "atmel,at91sam9n12-clk-usb",
 	       of_at91sam9n12_clk_usb_setup);
 
 static void __init of_at91rm9200_clk_usb_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *parent_name;
 	const char *name = np->name;
 	u32 divisors[4] = {0, 0, 0, 0};
@@ -440,12 +452,11 @@
 	regmap = syscon_node_to_regmap(of_get_parent(np));
 	if (IS_ERR(regmap))
 		return;
-
-	clk = at91rm9200_clk_register_usb(regmap, name, parent_name, divisors);
-	if (IS_ERR(clk))
+	hw = at91rm9200_clk_register_usb(regmap, name, parent_name, divisors);
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(at91rm9200_clk_usb, "atmel,at91rm9200-clk-usb",
 	       of_at91rm9200_clk_usb_setup);
diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c
index 61fcf39..aadabd9 100644
--- a/drivers/clk/at91/clk-utmi.c
+++ b/drivers/clk/at91/clk-utmi.c
@@ -77,13 +77,14 @@
 	.recalc_rate = clk_utmi_recalc_rate,
 };
 
-static struct clk * __init
+static struct clk_hw * __init
 at91_clk_register_utmi(struct regmap *regmap,
 		       const char *name, const char *parent_name)
 {
 	struct clk_utmi *utmi;
-	struct clk *clk = NULL;
+	struct clk_hw *hw;
 	struct clk_init_data init;
+	int ret;
 
 	utmi = kzalloc(sizeof(*utmi), GFP_KERNEL);
 	if (!utmi)
@@ -98,16 +99,19 @@
 	utmi->hw.init = &init;
 	utmi->regmap = regmap;
 
-	clk = clk_register(NULL, &utmi->hw);
-	if (IS_ERR(clk))
+	hw = &utmi->hw;
+	ret = clk_hw_register(NULL, &utmi->hw);
+	if (ret) {
 		kfree(utmi);
+		hw = ERR_PTR(ret);
+	}
 
-	return clk;
+	return hw;
 }
 
 static void __init of_at91sam9x5_clk_utmi_setup(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *parent_name;
 	const char *name = np->name;
 	struct regmap *regmap;
@@ -120,11 +124,11 @@
 	if (IS_ERR(regmap))
 		return;
 
-	clk = at91_clk_register_utmi(regmap, name, parent_name);
-	if (IS_ERR(clk))
+	hw = at91_clk_register_utmi(regmap, name, parent_name);
+	if (IS_ERR(hw))
 		return;
 
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
 	return;
 }
 CLK_OF_DECLARE(at91sam9x5_clk_utmi, "atmel,at91sam9x5-clk-utmi",
diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c
index 1184d76..ab6ecef 100644
--- a/drivers/clk/at91/sckc.c
+++ b/drivers/clk/at91/sckc.c
@@ -12,11 +12,382 @@
 
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
+#include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/io.h>
 
-#include "sckc.h"
+#define SLOW_CLOCK_FREQ		32768
+#define SLOWCK_SW_CYCLES	5
+#define SLOWCK_SW_TIME_USEC	((SLOWCK_SW_CYCLES * USEC_PER_SEC) / \
+				 SLOW_CLOCK_FREQ)
+
+#define	AT91_SCKC_CR			0x00
+#define		AT91_SCKC_RCEN		(1 << 0)
+#define		AT91_SCKC_OSC32EN	(1 << 1)
+#define		AT91_SCKC_OSC32BYP	(1 << 2)
+#define		AT91_SCKC_OSCSEL	(1 << 3)
+
+struct clk_slow_osc {
+	struct clk_hw hw;
+	void __iomem *sckcr;
+	unsigned long startup_usec;
+};
+
+#define to_clk_slow_osc(hw) container_of(hw, struct clk_slow_osc, hw)
+
+struct clk_sama5d4_slow_osc {
+	struct clk_hw hw;
+	void __iomem *sckcr;
+	unsigned long startup_usec;
+	bool prepared;
+};
+
+#define to_clk_sama5d4_slow_osc(hw) container_of(hw, struct clk_sama5d4_slow_osc, hw)
+
+struct clk_slow_rc_osc {
+	struct clk_hw hw;
+	void __iomem *sckcr;
+	unsigned long frequency;
+	unsigned long accuracy;
+	unsigned long startup_usec;
+};
+
+#define to_clk_slow_rc_osc(hw) container_of(hw, struct clk_slow_rc_osc, hw)
+
+struct clk_sam9x5_slow {
+	struct clk_hw hw;
+	void __iomem *sckcr;
+	u8 parent;
+};
+
+#define to_clk_sam9x5_slow(hw) container_of(hw, struct clk_sam9x5_slow, hw)
+
+static int clk_slow_osc_prepare(struct clk_hw *hw)
+{
+	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+	u32 tmp = readl(sckcr);
+
+	if (tmp & (AT91_SCKC_OSC32BYP | AT91_SCKC_OSC32EN))
+		return 0;
+
+	writel(tmp | AT91_SCKC_OSC32EN, sckcr);
+
+	usleep_range(osc->startup_usec, osc->startup_usec + 1);
+
+	return 0;
+}
+
+static void clk_slow_osc_unprepare(struct clk_hw *hw)
+{
+	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+	u32 tmp = readl(sckcr);
+
+	if (tmp & AT91_SCKC_OSC32BYP)
+		return;
+
+	writel(tmp & ~AT91_SCKC_OSC32EN, sckcr);
+}
+
+static int clk_slow_osc_is_prepared(struct clk_hw *hw)
+{
+	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+	u32 tmp = readl(sckcr);
+
+	if (tmp & AT91_SCKC_OSC32BYP)
+		return 1;
+
+	return !!(tmp & AT91_SCKC_OSC32EN);
+}
+
+static const struct clk_ops slow_osc_ops = {
+	.prepare = clk_slow_osc_prepare,
+	.unprepare = clk_slow_osc_unprepare,
+	.is_prepared = clk_slow_osc_is_prepared,
+};
+
+static struct clk_hw * __init
+at91_clk_register_slow_osc(void __iomem *sckcr,
+			   const char *name,
+			   const char *parent_name,
+			   unsigned long startup,
+			   bool bypass)
+{
+	struct clk_slow_osc *osc;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	if (!sckcr || !name || !parent_name)
+		return ERR_PTR(-EINVAL);
+
+	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
+	if (!osc)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &slow_osc_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.flags = CLK_IGNORE_UNUSED;
+
+	osc->hw.init = &init;
+	osc->sckcr = sckcr;
+	osc->startup_usec = startup;
+
+	if (bypass)
+		writel((readl(sckcr) & ~AT91_SCKC_OSC32EN) | AT91_SCKC_OSC32BYP,
+		       sckcr);
+
+	hw = &osc->hw;
+	ret = clk_hw_register(NULL, &osc->hw);
+	if (ret) {
+		kfree(osc);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+
+static void __init
+of_at91sam9x5_clk_slow_osc_setup(struct device_node *np, void __iomem *sckcr)
+{
+	struct clk_hw *hw;
+	const char *parent_name;
+	const char *name = np->name;
+	u32 startup;
+	bool bypass;
+
+	parent_name = of_clk_get_parent_name(np, 0);
+	of_property_read_string(np, "clock-output-names", &name);
+	of_property_read_u32(np, "atmel,startup-time-usec", &startup);
+	bypass = of_property_read_bool(np, "atmel,osc-bypass");
+
+	hw = at91_clk_register_slow_osc(sckcr, name, parent_name, startup,
+					 bypass);
+	if (IS_ERR(hw))
+		return;
+
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
+}
+
+static unsigned long clk_slow_rc_osc_recalc_rate(struct clk_hw *hw,
+						 unsigned long parent_rate)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+
+	return osc->frequency;
+}
+
+static unsigned long clk_slow_rc_osc_recalc_accuracy(struct clk_hw *hw,
+						     unsigned long parent_acc)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+
+	return osc->accuracy;
+}
+
+static int clk_slow_rc_osc_prepare(struct clk_hw *hw)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+
+	writel(readl(sckcr) | AT91_SCKC_RCEN, sckcr);
+
+	usleep_range(osc->startup_usec, osc->startup_usec + 1);
+
+	return 0;
+}
+
+static void clk_slow_rc_osc_unprepare(struct clk_hw *hw)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+
+	writel(readl(sckcr) & ~AT91_SCKC_RCEN, sckcr);
+}
+
+static int clk_slow_rc_osc_is_prepared(struct clk_hw *hw)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+
+	return !!(readl(osc->sckcr) & AT91_SCKC_RCEN);
+}
+
+static const struct clk_ops slow_rc_osc_ops = {
+	.prepare = clk_slow_rc_osc_prepare,
+	.unprepare = clk_slow_rc_osc_unprepare,
+	.is_prepared = clk_slow_rc_osc_is_prepared,
+	.recalc_rate = clk_slow_rc_osc_recalc_rate,
+	.recalc_accuracy = clk_slow_rc_osc_recalc_accuracy,
+};
+
+static struct clk_hw * __init
+at91_clk_register_slow_rc_osc(void __iomem *sckcr,
+			      const char *name,
+			      unsigned long frequency,
+			      unsigned long accuracy,
+			      unsigned long startup)
+{
+	struct clk_slow_rc_osc *osc;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	if (!sckcr || !name)
+		return ERR_PTR(-EINVAL);
+
+	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
+	if (!osc)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &slow_rc_osc_ops;
+	init.parent_names = NULL;
+	init.num_parents = 0;
+	init.flags = CLK_IGNORE_UNUSED;
+
+	osc->hw.init = &init;
+	osc->sckcr = sckcr;
+	osc->frequency = frequency;
+	osc->accuracy = accuracy;
+	osc->startup_usec = startup;
+
+	hw = &osc->hw;
+	ret = clk_hw_register(NULL, &osc->hw);
+	if (ret) {
+		kfree(osc);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+
+static void __init
+of_at91sam9x5_clk_slow_rc_osc_setup(struct device_node *np, void __iomem *sckcr)
+{
+	struct clk_hw *hw;
+	u32 frequency = 0;
+	u32 accuracy = 0;
+	u32 startup = 0;
+	const char *name = np->name;
+
+	of_property_read_string(np, "clock-output-names", &name);
+	of_property_read_u32(np, "clock-frequency", &frequency);
+	of_property_read_u32(np, "clock-accuracy", &accuracy);
+	of_property_read_u32(np, "atmel,startup-time-usec", &startup);
+
+	hw = at91_clk_register_slow_rc_osc(sckcr, name, frequency, accuracy,
+					    startup);
+	if (IS_ERR(hw))
+		return;
+
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
+}
+
+static int clk_sam9x5_slow_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct clk_sam9x5_slow *slowck = to_clk_sam9x5_slow(hw);
+	void __iomem *sckcr = slowck->sckcr;
+	u32 tmp;
+
+	if (index > 1)
+		return -EINVAL;
+
+	tmp = readl(sckcr);
+
+	if ((!index && !(tmp & AT91_SCKC_OSCSEL)) ||
+	    (index && (tmp & AT91_SCKC_OSCSEL)))
+		return 0;
+
+	if (index)
+		tmp |= AT91_SCKC_OSCSEL;
+	else
+		tmp &= ~AT91_SCKC_OSCSEL;
+
+	writel(tmp, sckcr);
+
+	usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1);
+
+	return 0;
+}
+
+static u8 clk_sam9x5_slow_get_parent(struct clk_hw *hw)
+{
+	struct clk_sam9x5_slow *slowck = to_clk_sam9x5_slow(hw);
+
+	return !!(readl(slowck->sckcr) & AT91_SCKC_OSCSEL);
+}
+
+static const struct clk_ops sam9x5_slow_ops = {
+	.set_parent = clk_sam9x5_slow_set_parent,
+	.get_parent = clk_sam9x5_slow_get_parent,
+};
+
+static struct clk_hw * __init
+at91_clk_register_sam9x5_slow(void __iomem *sckcr,
+			      const char *name,
+			      const char **parent_names,
+			      int num_parents)
+{
+	struct clk_sam9x5_slow *slowck;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	if (!sckcr || !name || !parent_names || !num_parents)
+		return ERR_PTR(-EINVAL);
+
+	slowck = kzalloc(sizeof(*slowck), GFP_KERNEL);
+	if (!slowck)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &sam9x5_slow_ops;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+	init.flags = 0;
+
+	slowck->hw.init = &init;
+	slowck->sckcr = sckcr;
+	slowck->parent = !!(readl(sckcr) & AT91_SCKC_OSCSEL);
+
+	hw = &slowck->hw;
+	ret = clk_hw_register(NULL, &slowck->hw);
+	if (ret) {
+		kfree(slowck);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+
+static void __init
+of_at91sam9x5_clk_slow_setup(struct device_node *np, void __iomem *sckcr)
+{
+	struct clk_hw *hw;
+	const char *parent_names[2];
+	unsigned int num_parents;
+	const char *name = np->name;
+
+	num_parents = of_clk_get_parent_count(np);
+	if (num_parents == 0 || num_parents > 2)
+		return;
+
+	of_clk_parent_fill(np, parent_names, num_parents);
+
+	of_property_read_string(np, "clock-output-names", &name);
+
+	hw = at91_clk_register_sam9x5_slow(sckcr, name, parent_names,
+					    num_parents);
+	if (IS_ERR(hw))
+		return;
+
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
+}
 
 static const struct of_device_id sckc_clk_ids[] __initconst = {
 	/* Slow clock */
@@ -55,3 +426,94 @@
 }
 CLK_OF_DECLARE(at91sam9x5_clk_sckc, "atmel,at91sam9x5-sckc",
 	       of_at91sam9x5_sckc_setup);
+
+static int clk_sama5d4_slow_osc_prepare(struct clk_hw *hw)
+{
+	struct clk_sama5d4_slow_osc *osc = to_clk_sama5d4_slow_osc(hw);
+
+	if (osc->prepared)
+		return 0;
+
+	/*
+	 * Assume that if it has already been selected (for example by the
+	 * bootloader), enough time has aready passed.
+	 */
+	if ((readl(osc->sckcr) & AT91_SCKC_OSCSEL)) {
+		osc->prepared = true;
+		return 0;
+	}
+
+	usleep_range(osc->startup_usec, osc->startup_usec + 1);
+	osc->prepared = true;
+
+	return 0;
+}
+
+static int clk_sama5d4_slow_osc_is_prepared(struct clk_hw *hw)
+{
+	struct clk_sama5d4_slow_osc *osc = to_clk_sama5d4_slow_osc(hw);
+
+	return osc->prepared;
+}
+
+static const struct clk_ops sama5d4_slow_osc_ops = {
+	.prepare = clk_sama5d4_slow_osc_prepare,
+	.is_prepared = clk_sama5d4_slow_osc_is_prepared,
+};
+
+static void __init of_sama5d4_sckc_setup(struct device_node *np)
+{
+	void __iomem *regbase = of_iomap(np, 0);
+	struct clk_hw *hw;
+	struct clk_sama5d4_slow_osc *osc;
+	struct clk_init_data init;
+	const char *xtal_name;
+	const char *parent_names[2] = { "slow_rc_osc", "slow_osc" };
+	bool bypass;
+	int ret;
+
+	if (!regbase)
+		return;
+
+	hw = clk_hw_register_fixed_rate_with_accuracy(NULL, parent_names[0],
+						      NULL, 0, 32768,
+						      250000000);
+	if (IS_ERR(hw))
+		return;
+
+	xtal_name = of_clk_get_parent_name(np, 0);
+
+	bypass = of_property_read_bool(np, "atmel,osc-bypass");
+
+	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
+	if (!osc)
+		return;
+
+	init.name = parent_names[1];
+	init.ops = &sama5d4_slow_osc_ops;
+	init.parent_names = &xtal_name;
+	init.num_parents = 1;
+	init.flags = CLK_IGNORE_UNUSED;
+
+	osc->hw.init = &init;
+	osc->sckcr = regbase;
+	osc->startup_usec = 1200000;
+
+	if (bypass)
+		writel((readl(regbase) | AT91_SCKC_OSC32BYP), regbase);
+
+	hw = &osc->hw;
+	ret = clk_hw_register(NULL, &osc->hw);
+	if (ret) {
+		kfree(osc);
+		return;
+	}
+
+	hw = at91_clk_register_sam9x5_slow(regbase, "slowck", parent_names, 2);
+	if (IS_ERR(hw))
+		return;
+
+	of_clk_add_hw_provider(np, of_clk_hw_simple_get, hw);
+}
+CLK_OF_DECLARE(sama5d4_clk_sckc, "atmel,sama5d4-sckc",
+	       of_sama5d4_sckc_setup);
diff --git a/drivers/clk/at91/sckc.h b/drivers/clk/at91/sckc.h
deleted file mode 100644
index 836fcf5..0000000
--- a/drivers/clk/at91/sckc.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * drivers/clk/at91/sckc.h
- *
- *  Copyright (C) 2013 Boris BREZILLON <b.brezillon@overkiz.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __AT91_SCKC_H_
-#define __AT91_SCKC_H_
-
-extern void __init of_at91sam9x5_clk_slow_osc_setup(struct device_node *np,
-						    void __iomem *sckcr);
-extern void __init of_at91sam9x5_clk_slow_rc_osc_setup(struct device_node *np,
-						       void __iomem *sckcr);
-extern void __init of_at91sam9x5_clk_slow_setup(struct device_node *np,
-						void __iomem *sckcr);
-
-#endif /* __AT91_SCKC_H_ */
diff --git a/drivers/clk/axis/clk-artpec6.c b/drivers/clk/axis/clk-artpec6.c
index ffc988b..da1a073 100644
--- a/drivers/clk/axis/clk-artpec6.c
+++ b/drivers/clk/axis/clk-artpec6.c
@@ -113,8 +113,8 @@
 	of_clk_add_provider(np, of_clk_src_onecell_get, &clkdata->clk_data);
 }
 
-CLK_OF_DECLARE(artpec6_clkctrl, "axis,artpec6-clkctrl",
-	       of_artpec6_clkctrl_setup);
+CLK_OF_DECLARE_DRIVER(artpec6_clkctrl, "axis,artpec6-clkctrl",
+		      of_artpec6_clkctrl_setup);
 
 static int artpec6_clkctrl_probe(struct platform_device *pdev)
 {
diff --git a/drivers/clk/bcm/Kconfig b/drivers/clk/bcm/Kconfig
index f287845..f21e9b7 100644
--- a/drivers/clk/bcm/Kconfig
+++ b/drivers/clk/bcm/Kconfig
@@ -19,8 +19,36 @@
 	  in the BCM281xx and BCM21664 families.
 
 config COMMON_CLK_IPROC
-	bool
+	bool "Broadcom iProc clock support"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
 	depends on COMMON_CLK
+	default ARCH_BCM_IPROC
 	help
 	  Enable common clock framework support for Broadcom SoCs
 	  based on the iProc architecture
+
+if COMMON_CLK_IPROC
+
+config CLK_BCM_CYGNUS
+	bool "Broadcom Cygnus clock support"
+	depends on ARCH_BCM_CYGNUS || COMPILE_TEST
+	default ARCH_BCM_CYGNUS
+	help
+	  Enable common clock framework support for the Broadcom Cygnus SoC
+
+config CLK_BCM_NSP
+	bool "Broadcom Northstar/Northstar Plus clock support"
+	depends on ARCH_BCM_5301X || ARCH_BCM_NSP || COMPILE_TEST
+	default ARCH_BCM_5301X || ARCH_BCM_NSP
+	help
+	  Enable common clock framework support for the Broadcom Northstar and
+	  Northstar Plus SoCs
+
+config CLK_BCM_NS2
+	bool "Broadcom Northstar 2 clock support"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	default ARCH_BCM_IPROC
+	help
+	  Enable common clock framework support for the Broadcom Northstar 2 SoC
+
+endif
diff --git a/drivers/clk/bcm/Makefile b/drivers/clk/bcm/Makefile
index 1d79bd2..d9dc848 100644
--- a/drivers/clk/bcm/Makefile
+++ b/drivers/clk/bcm/Makefile
@@ -6,7 +6,7 @@
 obj-$(CONFIG_COMMON_CLK_IPROC)	+= clk-iproc-armpll.o clk-iproc-pll.o clk-iproc-asiu.o
 obj-$(CONFIG_ARCH_BCM2835)	+= clk-bcm2835.o
 obj-$(CONFIG_ARCH_BCM2835)	+= clk-bcm2835-aux.o
-obj-$(CONFIG_COMMON_CLK_IPROC)	+= clk-ns2.o
-obj-$(CONFIG_ARCH_BCM_CYGNUS)	+= clk-cygnus.o
-obj-$(CONFIG_ARCH_BCM_NSP)	+= clk-nsp.o
-obj-$(CONFIG_ARCH_BCM_5301X)	+= clk-nsp.o
+obj-$(CONFIG_ARCH_BCM_53573)	+= clk-bcm53573-ilp.o
+obj-$(CONFIG_CLK_BCM_CYGNUS)	+= clk-cygnus.o
+obj-$(CONFIG_CLK_BCM_NSP)	+= clk-nsp.o
+obj-$(CONFIG_CLK_BCM_NS2)	+= clk-ns2.o
diff --git a/drivers/clk/bcm/clk-bcm2835-aux.c b/drivers/clk/bcm/clk-bcm2835-aux.c
index 3a177ad..bd750cf 100644
--- a/drivers/clk/bcm/clk-bcm2835-aux.c
+++ b/drivers/clk/bcm/clk-bcm2835-aux.c
@@ -25,7 +25,7 @@
 static int bcm2835_aux_clk_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct clk_onecell_data *onecell;
+	struct clk_hw_onecell_data *onecell;
 	const char *parent;
 	struct clk *parent_clk;
 	struct resource *res;
@@ -41,28 +41,24 @@
 	if (IS_ERR(reg))
 		return PTR_ERR(reg);
 
-	onecell = devm_kmalloc(dev, sizeof(*onecell), GFP_KERNEL);
+	onecell = devm_kmalloc(dev, sizeof(*onecell) + sizeof(*onecell->hws) *
+			       BCM2835_AUX_CLOCK_COUNT, GFP_KERNEL);
 	if (!onecell)
 		return -ENOMEM;
-	onecell->clk_num = BCM2835_AUX_CLOCK_COUNT;
-	onecell->clks = devm_kcalloc(dev, BCM2835_AUX_CLOCK_COUNT,
-				     sizeof(*onecell->clks), GFP_KERNEL);
-	if (!onecell->clks)
-		return -ENOMEM;
+	onecell->num = BCM2835_AUX_CLOCK_COUNT;
 
 	gate = reg + BCM2835_AUXENB;
-	onecell->clks[BCM2835_AUX_CLOCK_UART] =
-		clk_register_gate(dev, "aux_uart", parent, 0, gate, 0, 0, NULL);
+	onecell->hws[BCM2835_AUX_CLOCK_UART] =
+		clk_hw_register_gate(dev, "aux_uart", parent, 0, gate, 0, 0, NULL);
 
-	onecell->clks[BCM2835_AUX_CLOCK_SPI1] =
-		clk_register_gate(dev, "aux_spi1", parent, 0, gate, 1, 0, NULL);
+	onecell->hws[BCM2835_AUX_CLOCK_SPI1] =
+		clk_hw_register_gate(dev, "aux_spi1", parent, 0, gate, 1, 0, NULL);
 
-	onecell->clks[BCM2835_AUX_CLOCK_SPI2] =
-		clk_register_gate(dev, "aux_spi2", parent, 0, gate, 2, 0, NULL);
+	onecell->hws[BCM2835_AUX_CLOCK_SPI2] =
+		clk_hw_register_gate(dev, "aux_spi2", parent, 0, gate, 2, 0, NULL);
 
-	of_clk_add_provider(pdev->dev.of_node, of_clk_src_onecell_get, onecell);
-
-	return 0;
+	return of_clk_add_hw_provider(pdev->dev.of_node, of_clk_hw_onecell_get,
+				      onecell);
 }
 
 static const struct of_device_id bcm2835_aux_clk_of_match[] = {
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 7a79708..b68bf57 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -36,6 +36,7 @@
 
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
+#include <linux/clk.h>
 #include <linux/clk/bcm2835.h>
 #include <linux/debugfs.h>
 #include <linux/module.h>
@@ -302,8 +303,8 @@
 	spinlock_t regs_lock; /* spinlock for all clocks */
 	const char *osc_name;
 
-	struct clk_onecell_data onecell;
-	struct clk *clks[];
+	/* Must be last */
+	struct clk_hw_onecell_data onecell;
 };
 
 static inline void cprman_write(struct bcm2835_cprman *cprman, u32 reg, u32 val)
@@ -344,24 +345,24 @@
  */
 void __init bcm2835_init_clocks(void)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
 	int ret;
 
-	clk = clk_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 126000000);
-	if (IS_ERR(clk))
+	hw = clk_hw_register_fixed_rate(NULL, "apb_pclk", NULL, 0, 126000000);
+	if (IS_ERR(hw))
 		pr_err("apb_pclk not registered\n");
 
-	clk = clk_register_fixed_rate(NULL, "uart0_pclk", NULL, 0, 3000000);
-	if (IS_ERR(clk))
+	hw = clk_hw_register_fixed_rate(NULL, "uart0_pclk", NULL, 0, 3000000);
+	if (IS_ERR(hw))
 		pr_err("uart0_pclk not registered\n");
-	ret = clk_register_clkdev(clk, NULL, "20201000.uart");
+	ret = clk_hw_register_clkdev(hw, NULL, "20201000.uart");
 	if (ret)
 		pr_err("uart0_pclk alias not registered\n");
 
-	clk = clk_register_fixed_rate(NULL, "uart1_pclk", NULL, 0, 125000000);
-	if (IS_ERR(clk))
+	hw = clk_hw_register_fixed_rate(NULL, "uart1_pclk", NULL, 0, 125000000);
+	if (IS_ERR(hw))
 		pr_err("uart1_pclk not registered\n");
-	ret = clk_register_clkdev(clk, NULL, "20215000.uart");
+	ret = clk_hw_register_clkdev(hw, NULL, "20215000.uart");
 	if (ret)
 		pr_err("uart1_pclk alias not registered\n");
 }
@@ -443,6 +444,8 @@
 	/* Number of fractional bits in the divider */
 	u32 frac_bits;
 
+	u32 flags;
+
 	bool is_vpu_clock;
 	bool is_mash_clock;
 };
@@ -1006,16 +1009,28 @@
 	return 0;
 }
 
+static bool
+bcm2835_clk_is_pllc(struct clk_hw *hw)
+{
+	if (!hw)
+		return false;
+
+	return strncmp(clk_hw_get_name(hw), "pllc", 4) == 0;
+}
+
 static int bcm2835_clock_determine_rate(struct clk_hw *hw,
 					struct clk_rate_request *req)
 {
 	struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw);
 	struct clk_hw *parent, *best_parent = NULL;
+	bool current_parent_is_pllc;
 	unsigned long rate, best_rate = 0;
 	unsigned long prate, best_prate = 0;
 	size_t i;
 	u32 div;
 
+	current_parent_is_pllc = bcm2835_clk_is_pllc(clk_hw_get_parent(hw));
+
 	/*
 	 * Select parent clock that results in the closest but lower rate
 	 */
@@ -1023,6 +1038,17 @@
 		parent = clk_hw_get_parent_by_index(hw, i);
 		if (!parent)
 			continue;
+
+		/*
+		 * Don't choose a PLLC-derived clock as our parent
+		 * unless it had been manually set that way.  PLLC's
+		 * frequency gets adjusted by the firmware due to
+		 * over-temp or under-voltage conditions, without
+		 * prior notification to our clock consumer.
+		 */
+		if (bcm2835_clk_is_pllc(parent) && !current_parent_is_pllc)
+			continue;
+
 		prate = clk_hw_get_rate(parent);
 		div = bcm2835_clock_choose_div(hw, req->rate, prate, true);
 		rate = bcm2835_clock_rate_from_divisor(clock, prate, div);
@@ -1121,11 +1147,12 @@
 	.debug_init = bcm2835_clock_debug_init,
 };
 
-static struct clk *bcm2835_register_pll(struct bcm2835_cprman *cprman,
-					const struct bcm2835_pll_data *data)
+static struct clk_hw *bcm2835_register_pll(struct bcm2835_cprman *cprman,
+					   const struct bcm2835_pll_data *data)
 {
 	struct bcm2835_pll *pll;
 	struct clk_init_data init;
+	int ret;
 
 	memset(&init, 0, sizeof(init));
 
@@ -1144,17 +1171,20 @@
 	pll->data = data;
 	pll->hw.init = &init;
 
-	return devm_clk_register(cprman->dev, &pll->hw);
+	ret = devm_clk_hw_register(cprman->dev, &pll->hw);
+	if (ret)
+		return NULL;
+	return &pll->hw;
 }
 
-static struct clk *
+static struct clk_hw *
 bcm2835_register_pll_divider(struct bcm2835_cprman *cprman,
 			     const struct bcm2835_pll_divider_data *data)
 {
 	struct bcm2835_pll_divider *divider;
 	struct clk_init_data init;
-	struct clk *clk;
 	const char *divider_name;
+	int ret;
 
 	if (data->fixed_divider != 1) {
 		divider_name = devm_kasprintf(cprman->dev, GFP_KERNEL,
@@ -1188,32 +1218,33 @@
 	divider->cprman = cprman;
 	divider->data = data;
 
-	clk = devm_clk_register(cprman->dev, &divider->div.hw);
-	if (IS_ERR(clk))
-		return clk;
+	ret = devm_clk_hw_register(cprman->dev, &divider->div.hw);
+	if (ret)
+		return ERR_PTR(ret);
 
 	/*
 	 * PLLH's channels have a fixed divide by 10 afterwards, which
 	 * is what our consumers are actually using.
 	 */
 	if (data->fixed_divider != 1) {
-		return clk_register_fixed_factor(cprman->dev, data->name,
-						 divider_name,
-						 CLK_SET_RATE_PARENT,
-						 1,
-						 data->fixed_divider);
+		return clk_hw_register_fixed_factor(cprman->dev, data->name,
+						    divider_name,
+						    CLK_SET_RATE_PARENT,
+						    1,
+						    data->fixed_divider);
 	}
 
-	return clk;
+	return &divider->div.hw;
 }
 
-static struct clk *bcm2835_register_clock(struct bcm2835_cprman *cprman,
+static struct clk_hw *bcm2835_register_clock(struct bcm2835_cprman *cprman,
 					  const struct bcm2835_clock_data *data)
 {
 	struct bcm2835_clock *clock;
 	struct clk_init_data init;
 	const char *parents[1 << CM_SRC_BITS];
 	size_t i;
+	int ret;
 
 	/*
 	 * Replace our "xosc" references with the oscillator's
@@ -1230,13 +1261,19 @@
 	init.parent_names = parents;
 	init.num_parents = data->num_mux_parents;
 	init.name = data->name;
-	init.flags = CLK_IGNORE_UNUSED;
+	init.flags = data->flags | CLK_IGNORE_UNUSED;
 
 	if (data->is_vpu_clock) {
 		init.ops = &bcm2835_vpu_clock_clk_ops;
 	} else {
 		init.ops = &bcm2835_clock_clk_ops;
 		init.flags |= CLK_SET_RATE_GATE | CLK_SET_PARENT_GATE;
+
+		/* If the clock wasn't actually enabled at boot, it's not
+		 * critical.
+		 */
+		if (!(cprman_read(cprman, data->ctl_reg) & CM_ENABLE))
+			init.flags &= ~CLK_IS_CRITICAL;
 	}
 
 	clock = devm_kzalloc(cprman->dev, sizeof(*clock), GFP_KERNEL);
@@ -1247,7 +1284,10 @@
 	clock->data = data;
 	clock->hw.init = &init;
 
-	return devm_clk_register(cprman->dev, &clock->hw);
+	ret = devm_clk_hw_register(cprman->dev, &clock->hw);
+	if (ret)
+		return ERR_PTR(ret);
+	return &clock->hw;
 }
 
 static struct clk *bcm2835_register_gate(struct bcm2835_cprman *cprman,
@@ -1259,8 +1299,8 @@
 				 CM_GATE_BIT, 0, &cprman->regs_lock);
 }
 
-typedef struct clk *(*bcm2835_clk_register)(struct bcm2835_cprman *cprman,
-					    const void *data);
+typedef struct clk_hw *(*bcm2835_clk_register)(struct bcm2835_cprman *cprman,
+					       const void *data);
 struct bcm2835_clk_desc {
 	bcm2835_clk_register clk_register;
 	const void *data;
@@ -1649,6 +1689,7 @@
 		.div_reg = CM_VPUDIV,
 		.int_bits = 12,
 		.frac_bits = 8,
+		.flags = CLK_IS_CRITICAL,
 		.is_vpu_clock = true),
 
 	/* clocks with per parent mux */
@@ -1705,13 +1746,15 @@
 		.div_reg = CM_GP1DIV,
 		.int_bits = 12,
 		.frac_bits = 12,
+		.flags = CLK_IS_CRITICAL,
 		.is_mash_clock = true),
 	[BCM2835_CLOCK_GP2]	= REGISTER_PER_CLK(
 		.name = "gp2",
 		.ctl_reg = CM_GP2CTL,
 		.div_reg = CM_GP2DIV,
 		.int_bits = 12,
-		.frac_bits = 12),
+		.frac_bits = 12,
+		.flags = CLK_IS_CRITICAL),
 
 	/* HDMI state machine */
 	[BCM2835_CLOCK_HSM]	= REGISTER_PER_CLK(
@@ -1790,18 +1833,38 @@
 		.ctl_reg = CM_PERIICTL),
 };
 
+/*
+ * Permanently take a reference on the parent of the SDRAM clock.
+ *
+ * While the SDRAM is being driven by its dedicated PLL most of the
+ * time, there is a little loop running in the firmware that
+ * periodically switches the SDRAM to using our CM clock to do PVT
+ * recalibration, with the assumption that the previously configured
+ * SDRAM parent is still enabled and running.
+ */
+static int bcm2835_mark_sdc_parent_critical(struct clk *sdc)
+{
+	struct clk *parent = clk_get_parent(sdc);
+
+	if (IS_ERR(parent))
+		return PTR_ERR(parent);
+
+	return clk_prepare_enable(parent);
+}
+
 static int bcm2835_clk_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct clk **clks;
+	struct clk_hw **hws;
 	struct bcm2835_cprman *cprman;
 	struct resource *res;
 	const struct bcm2835_clk_desc *desc;
 	const size_t asize = ARRAY_SIZE(clk_desc_array);
 	size_t i;
+	int ret;
 
-	cprman = devm_kzalloc(dev,
-			      sizeof(*cprman) + asize * sizeof(*clks),
+	cprman = devm_kzalloc(dev, sizeof(*cprman) +
+			      sizeof(*cprman->onecell.hws) * asize,
 			      GFP_KERNEL);
 	if (!cprman)
 		return -ENOMEM;
@@ -1819,18 +1882,21 @@
 
 	platform_set_drvdata(pdev, cprman);
 
-	cprman->onecell.clk_num = asize;
-	cprman->onecell.clks = cprman->clks;
-	clks = cprman->clks;
+	cprman->onecell.num = asize;
+	hws = cprman->onecell.hws;
 
 	for (i = 0; i < asize; i++) {
 		desc = &clk_desc_array[i];
 		if (desc->clk_register && desc->data)
-			clks[i] = desc->clk_register(cprman, desc->data);
+			hws[i] = desc->clk_register(cprman, desc->data);
 	}
 
-	return of_clk_add_provider(dev->of_node, of_clk_src_onecell_get,
-				   &cprman->onecell);
+	ret = bcm2835_mark_sdc_parent_critical(hws[BCM2835_CLOCK_SDRAM]->clk);
+	if (ret)
+		return ret;
+
+	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
+				      &cprman->onecell);
 }
 
 static const struct of_device_id bcm2835_clk_of_match[] = {
diff --git a/drivers/clk/bcm/clk-bcm53573-ilp.c b/drivers/clk/bcm/clk-bcm53573-ilp.c
new file mode 100644
index 0000000..36eb371
--- /dev/null
+++ b/drivers/clk/bcm/clk-bcm53573-ilp.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright (C) 2016 Rafał Miłecki <rafal@milecki.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define PMU_XTAL_FREQ_RATIO			0x66c
+#define  XTAL_ALP_PER_4ILP			0x00001fff
+#define  XTAL_CTL_EN				0x80000000
+#define PMU_SLOW_CLK_PERIOD			0x6dc
+
+struct bcm53573_ilp {
+	struct clk_hw hw;
+	struct regmap *regmap;
+};
+
+static int bcm53573_ilp_enable(struct clk_hw *hw)
+{
+	struct bcm53573_ilp *ilp = container_of(hw, struct bcm53573_ilp, hw);
+
+	regmap_write(ilp->regmap, PMU_SLOW_CLK_PERIOD, 0x10199);
+	regmap_write(ilp->regmap, 0x674, 0x10000);
+
+	return 0;
+}
+
+static void bcm53573_ilp_disable(struct clk_hw *hw)
+{
+	struct bcm53573_ilp *ilp = container_of(hw, struct bcm53573_ilp, hw);
+
+	regmap_write(ilp->regmap, PMU_SLOW_CLK_PERIOD, 0);
+	regmap_write(ilp->regmap, 0x674, 0);
+}
+
+static unsigned long bcm53573_ilp_recalc_rate(struct clk_hw *hw,
+					      unsigned long parent_rate)
+{
+	struct bcm53573_ilp *ilp = container_of(hw, struct bcm53573_ilp, hw);
+	struct regmap *regmap = ilp->regmap;
+	u32 last_val, cur_val;
+	int sum = 0, num = 0, loop_num = 0;
+	int avg;
+
+	/* Enable measurement */
+	regmap_write(regmap, PMU_XTAL_FREQ_RATIO, XTAL_CTL_EN);
+
+	/* Read initial value */
+	regmap_read(regmap, PMU_XTAL_FREQ_RATIO, &last_val);
+	last_val &= XTAL_ALP_PER_4ILP;
+
+	/*
+	 * At minimum we should loop for a bit to let hardware do the
+	 * measurement. This isn't very accurate however, so for a better
+	 * precision lets try getting 20 different values for and use average.
+	 */
+	while (num < 20) {
+		regmap_read(regmap, PMU_XTAL_FREQ_RATIO, &cur_val);
+		cur_val &= XTAL_ALP_PER_4ILP;
+
+		if (cur_val != last_val) {
+			/* Got different value, use it */
+			sum += cur_val;
+			num++;
+			loop_num = 0;
+			last_val = cur_val;
+		} else if (++loop_num > 5000) {
+			/* Same value over and over, give up */
+			sum += cur_val;
+			num++;
+			break;
+		}
+
+		cpu_relax();
+	}
+
+	/* Disable measurement to save power */
+	regmap_write(regmap, PMU_XTAL_FREQ_RATIO, 0x0);
+
+	avg = sum / num;
+
+	return parent_rate * 4 / avg;
+}
+
+static const struct clk_ops bcm53573_ilp_clk_ops = {
+	.enable = bcm53573_ilp_enable,
+	.disable = bcm53573_ilp_disable,
+	.recalc_rate = bcm53573_ilp_recalc_rate,
+};
+
+static void bcm53573_ilp_init(struct device_node *np)
+{
+	struct bcm53573_ilp *ilp;
+	struct clk_init_data init = { };
+	const char *parent_name;
+	int err;
+
+	ilp = kzalloc(sizeof(*ilp), GFP_KERNEL);
+	if (!ilp)
+		return;
+
+	parent_name = of_clk_get_parent_name(np, 0);
+	if (!parent_name) {
+		err = -ENOENT;
+		goto err_free_ilp;
+	}
+
+	ilp->regmap = syscon_node_to_regmap(of_get_parent(np));
+	if (IS_ERR(ilp->regmap)) {
+		err = PTR_ERR(ilp->regmap);
+		goto err_free_ilp;
+	}
+
+	init.name = np->name;
+	init.ops = &bcm53573_ilp_clk_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	ilp->hw.init = &init;
+	err = clk_hw_register(NULL, &ilp->hw);
+	if (err)
+		goto err_free_ilp;
+
+	err = of_clk_add_hw_provider(np, of_clk_hw_simple_get, &ilp->hw);
+	if (err)
+		goto err_clk_hw_unregister;
+
+	return;
+
+err_clk_hw_unregister:
+	clk_hw_unregister(&ilp->hw);
+err_free_ilp:
+	kfree(ilp);
+	pr_err("Failed to init ILP clock: %d\n", err);
+}
+
+/* We need it very early for arch code, before device model gets ready */
+CLK_OF_DECLARE(bcm53573_ilp_clk, "brcm,bcm53573-ilp", bcm53573_ilp_init);
diff --git a/drivers/clk/bcm/clk-kona-setup.c b/drivers/clk/bcm/clk-kona-setup.c
index 526b0b0..c37a7f0 100644
--- a/drivers/clk/bcm/clk-kona-setup.c
+++ b/drivers/clk/bcm/clk-kona-setup.c
@@ -586,8 +586,8 @@
 	}
 
 	/* There is at least one parent, so allocate a selector array */
-
-	parent_sel = kmalloc(parent_count * sizeof(*parent_sel), GFP_KERNEL);
+	parent_sel = kmalloc_array(parent_count, sizeof(*parent_sel),
+				   GFP_KERNEL);
 	if (!parent_sel) {
 		pr_err("%s: error allocating %u parent selectors\n", __func__,
 				parent_count);
@@ -696,77 +696,69 @@
 	bcm_clk->type = bcm_clk_none;
 }
 
-static void kona_clk_teardown(struct clk *clk)
+static void kona_clk_teardown(struct clk_hw *hw)
 {
-	struct clk_hw *hw;
 	struct kona_clk *bcm_clk;
 
-	if (!clk)
+	if (!hw)
 		return;
 
-	hw = __clk_get_hw(clk);
-	if (!hw) {
-		pr_err("%s: clk %p has null hw pointer\n", __func__, clk);
-		return;
-	}
-	clk_unregister(clk);
+	clk_hw_unregister(hw);
 
 	bcm_clk = to_kona_clk(hw);
 	bcm_clk_teardown(bcm_clk);
 }
 
-struct clk *kona_clk_setup(struct kona_clk *bcm_clk)
+static int kona_clk_setup(struct kona_clk *bcm_clk)
 {
+	int ret;
 	struct clk_init_data *init_data = &bcm_clk->init_data;
-	struct clk *clk = NULL;
 
 	switch (bcm_clk->type) {
 	case bcm_clk_peri:
-		if (peri_clk_setup(bcm_clk->u.data, init_data))
-			return NULL;
+		ret = peri_clk_setup(bcm_clk->u.data, init_data);
+		if (ret)
+			return ret;
 		break;
 	default:
 		pr_err("%s: clock type %d invalid for %s\n", __func__,
 			(int)bcm_clk->type, init_data->name);
-		return NULL;
+		return -EINVAL;
 	}
 
 	/* Make sure everything makes sense before we set it up */
 	if (!kona_clk_valid(bcm_clk)) {
 		pr_err("%s: clock data invalid for %s\n", __func__,
 			init_data->name);
+		ret = -EINVAL;
 		goto out_teardown;
 	}
 
 	bcm_clk->hw.init = init_data;
-	clk = clk_register(NULL, &bcm_clk->hw);
-	if (IS_ERR(clk)) {
-		pr_err("%s: error registering clock %s (%ld)\n", __func__,
-			init_data->name, PTR_ERR(clk));
+	ret = clk_hw_register(NULL, &bcm_clk->hw);
+	if (ret) {
+		pr_err("%s: error registering clock %s (%d)\n", __func__,
+			init_data->name, ret);
 		goto out_teardown;
 	}
-	BUG_ON(!clk);
 
-	return clk;
+	return 0;
 out_teardown:
 	bcm_clk_teardown(bcm_clk);
 
-	return NULL;
+	return ret;
 }
 
 static void ccu_clks_teardown(struct ccu_data *ccu)
 {
 	u32 i;
 
-	for (i = 0; i < ccu->clk_data.clk_num; i++)
-		kona_clk_teardown(ccu->clk_data.clks[i]);
-	kfree(ccu->clk_data.clks);
+	for (i = 0; i < ccu->clk_num; i++)
+		kona_clk_teardown(&ccu->kona_clks[i].hw);
 }
 
 static void kona_ccu_teardown(struct ccu_data *ccu)
 {
-	kfree(ccu->clk_data.clks);
-	ccu->clk_data.clks = NULL;
 	if (!ccu->base)
 		return;
 
@@ -793,6 +785,20 @@
 	return true;
 }
 
+static struct clk_hw *
+of_clk_kona_onecell_get(struct of_phandle_args *clkspec, void *data)
+{
+	struct ccu_data *ccu = data;
+	unsigned int idx = clkspec->args[0];
+
+	if (idx >= ccu->clk_num) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return &ccu->kona_clks[idx].hw;
+}
+
 /*
  * Set up a CCU.  Call the provided ccu_clks_setup callback to
  * initialize the array of clocks provided by the CCU.
@@ -805,18 +811,6 @@
 	unsigned int i;
 	int ret;
 
-	if (ccu->clk_data.clk_num) {
-		size_t size;
-
-		size = ccu->clk_data.clk_num * sizeof(*ccu->clk_data.clks);
-		ccu->clk_data.clks = kzalloc(size, GFP_KERNEL);
-		if (!ccu->clk_data.clks) {
-			pr_err("%s: unable to allocate %u clocks for %s\n",
-				__func__, ccu->clk_data.clk_num, node->name);
-			return;
-		}
-	}
-
 	ret = of_address_to_resource(node, 0, &res);
 	if (ret) {
 		pr_err("%s: no valid CCU registers found for %s\n", __func__,
@@ -851,13 +845,13 @@
 	 * the clock framework clock array (in ccu->data).  Then
 	 * register as a provider for these clocks.
 	 */
-	for (i = 0; i < ccu->clk_data.clk_num; i++) {
+	for (i = 0; i < ccu->clk_num; i++) {
 		if (!ccu->kona_clks[i].ccu)
 			continue;
-		ccu->clk_data.clks[i] = kona_clk_setup(&ccu->kona_clks[i]);
+		kona_clk_setup(&ccu->kona_clks[i]);
 	}
 
-	ret = of_clk_add_provider(node, of_clk_src_onecell_get, &ccu->clk_data);
+	ret = of_clk_add_hw_provider(node, of_clk_kona_onecell_get, ccu);
 	if (ret) {
 		pr_err("%s: error adding ccu %s as provider (%d)\n", __func__,
 				node->name, ret);
diff --git a/drivers/clk/bcm/clk-kona.c b/drivers/clk/bcm/clk-kona.c
index 3a15347..eee64b9 100644
--- a/drivers/clk/bcm/clk-kona.c
+++ b/drivers/clk/bcm/clk-kona.c
@@ -1256,19 +1256,18 @@
 {
 	unsigned long flags;
 	unsigned int which;
-	struct clk **clks = ccu->clk_data.clks;
 	struct kona_clk *kona_clks = ccu->kona_clks;
 	bool success = true;
 
 	flags = ccu_lock(ccu);
 	__ccu_write_enable(ccu);
 
-	for (which = 0; which < ccu->clk_data.clk_num; which++) {
-		struct kona_clk *bcm_clk;
+	for (which = 0; which < ccu->clk_num; which++) {
+		struct kona_clk *bcm_clk = &kona_clks[which];
 
-		if (!clks[which])
+		if (!bcm_clk->ccu)
 			continue;
-		bcm_clk = &kona_clks[which];
+
 		success &= __kona_clk_init(bcm_clk);
 	}
 
diff --git a/drivers/clk/bcm/clk-kona.h b/drivers/clk/bcm/clk-kona.h
index 906576e..f4b39bb 100644
--- a/drivers/clk/bcm/clk-kona.h
+++ b/drivers/clk/bcm/clk-kona.h
@@ -481,7 +481,7 @@
 	bool write_enabled;	/* write access is currently enabled */
 	struct ccu_policy policy;
 	struct device_node *node;
-	struct clk_onecell_data clk_data;
+	size_t clk_num;
 	const char *name;
 	u32 range;		/* byte range of address space */
 	struct kona_clk kona_clks[];	/* must be last */
@@ -491,9 +491,7 @@
 #define KONA_CCU_COMMON(_prefix, _name, _ccuname)			    \
 	.name		= #_name "_ccu",				    \
 	.lock		= __SPIN_LOCK_UNLOCKED(_name ## _ccu_data.lock),    \
-	.clk_data	= {						    \
-		.clk_num = _prefix ## _ ## _ccuname ## _CCU_CLOCK_COUNT,    \
-	}
+	.clk_num	= _prefix ## _ ## _ccuname ## _CCU_CLOCK_COUNT
 
 /* Exported globals */
 
@@ -505,7 +503,6 @@
 extern u64 scaled_div_build(struct bcm_clk_div *div, u32 div_value,
 				u32 billionths);
 
-extern struct clk *kona_clk_setup(struct kona_clk *bcm_clk);
 extern void __init kona_dt_ccu_setup(struct ccu_data *ccu,
 				struct device_node *node);
 extern bool __init kona_ccu_init(struct ccu_data *ccu);
diff --git a/drivers/clk/berlin/berlin2-avpll.c b/drivers/clk/berlin/berlin2-avpll.c
index fd0f26c..cfcae46 100644
--- a/drivers/clk/berlin/berlin2-avpll.c
+++ b/drivers/clk/berlin/berlin2-avpll.c
@@ -188,7 +188,7 @@
 	.recalc_rate	= berlin2_avpll_vco_recalc_rate,
 };
 
-struct clk * __init berlin2_avpll_vco_register(void __iomem *base,
+int __init berlin2_avpll_vco_register(void __iomem *base,
 			       const char *name, const char *parent_name,
 			       u8 vco_flags, unsigned long flags)
 {
@@ -197,7 +197,7 @@
 
 	vco = kzalloc(sizeof(*vco), GFP_KERNEL);
 	if (!vco)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	vco->base = base;
 	vco->flags = vco_flags;
@@ -208,7 +208,7 @@
 	init.num_parents = 1;
 	init.flags = flags;
 
-	return clk_register(NULL, &vco->hw);
+	return clk_hw_register(NULL, &vco->hw);
 }
 
 struct berlin2_avpll_channel {
@@ -364,7 +364,7 @@
  */
 static const u8 quirk_index[] __initconst = { 0, 6, 5, 4, 3, 2, 1, 7 };
 
-struct clk * __init berlin2_avpll_channel_register(void __iomem *base,
+int __init berlin2_avpll_channel_register(void __iomem *base,
 			   const char *name, u8 index, const char *parent_name,
 			   u8 ch_flags, unsigned long flags)
 {
@@ -373,7 +373,7 @@
 
 	ch = kzalloc(sizeof(*ch), GFP_KERNEL);
 	if (!ch)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	ch->base = base;
 	if (ch_flags & BERLIN2_AVPLL_SCRAMBLE_QUIRK)
@@ -389,5 +389,5 @@
 	init.num_parents = 1;
 	init.flags = flags;
 
-	return clk_register(NULL, &ch->hw);
+	return clk_hw_register(NULL, &ch->hw);
 }
diff --git a/drivers/clk/berlin/berlin2-avpll.h b/drivers/clk/berlin/berlin2-avpll.h
index a37f506..17e3111 100644
--- a/drivers/clk/berlin/berlin2-avpll.h
+++ b/drivers/clk/berlin/berlin2-avpll.h
@@ -19,17 +19,13 @@
 #ifndef __BERLIN2_AVPLL_H
 #define __BERLIN2_AVPLL_H
 
-struct clk;
-
 #define BERLIN2_AVPLL_BIT_QUIRK		BIT(0)
 #define BERLIN2_AVPLL_SCRAMBLE_QUIRK	BIT(1)
 
-struct clk * __init
-berlin2_avpll_vco_register(void __iomem *base, const char *name,
+int berlin2_avpll_vco_register(void __iomem *base, const char *name,
 	   const char *parent_name, u8 vco_flags, unsigned long flags);
 
-struct clk * __init
-berlin2_avpll_channel_register(void __iomem *base, const char *name,
+int berlin2_avpll_channel_register(void __iomem *base, const char *name,
 		       u8 index, const char *parent_name, u8 ch_flags,
 		       unsigned long flags);
 
diff --git a/drivers/clk/berlin/berlin2-div.c b/drivers/clk/berlin/berlin2-div.c
index 81ff97f..41ab2d3 100644
--- a/drivers/clk/berlin/berlin2-div.c
+++ b/drivers/clk/berlin/berlin2-div.c
@@ -234,7 +234,7 @@
 	.get_parent	= berlin2_div_get_parent,
 };
 
-struct clk * __init
+struct clk_hw * __init
 berlin2_div_register(const struct berlin2_div_map *map,
 		     void __iomem *base, const char *name, u8 div_flags,
 		     const char **parent_names, int num_parents,
@@ -259,7 +259,7 @@
 	if ((div_flags & BERLIN2_DIV_HAS_MUX) == 0)
 		mux_ops = NULL;
 
-	return clk_register_composite(NULL, name, parent_names, num_parents,
+	return clk_hw_register_composite(NULL, name, parent_names, num_parents,
 				      &div->hw, mux_ops, &div->hw, rate_ops,
 				      &div->hw, gate_ops, flags);
 }
diff --git a/drivers/clk/berlin/berlin2-div.h b/drivers/clk/berlin/berlin2-div.h
index 15e3384..e835ddf 100644
--- a/drivers/clk/berlin/berlin2-div.h
+++ b/drivers/clk/berlin/berlin2-div.h
@@ -19,7 +19,7 @@
 #ifndef __BERLIN2_DIV_H
 #define __BERLIN2_DIV_H
 
-struct clk;
+struct clk_hw;
 
 #define BERLIN2_DIV_HAS_GATE		BIT(0)
 #define BERLIN2_DIV_HAS_MUX		BIT(1)
@@ -80,7 +80,7 @@
 	u8 div_flags;
 };
 
-struct clk * __init
+struct clk_hw *
 berlin2_div_register(const struct berlin2_div_map *map,
 	     void __iomem *base,  const char *name, u8 div_flags,
 	     const char **parent_names, int num_parents,
diff --git a/drivers/clk/berlin/berlin2-pll.c b/drivers/clk/berlin/berlin2-pll.c
index 1c2294d..4ffbe80 100644
--- a/drivers/clk/berlin/berlin2-pll.c
+++ b/drivers/clk/berlin/berlin2-pll.c
@@ -84,7 +84,7 @@
 	.recalc_rate	= berlin2_pll_recalc_rate,
 };
 
-struct clk * __init
+int __init
 berlin2_pll_register(const struct berlin2_pll_map *map,
 		     void __iomem *base, const char *name,
 		     const char *parent_name, unsigned long flags)
@@ -94,7 +94,7 @@
 
 	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
 	if (!pll)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	/* copy pll_map to allow __initconst */
 	memcpy(&pll->map, map, sizeof(*map));
@@ -106,5 +106,5 @@
 	init.num_parents = 1;
 	init.flags = flags;
 
-	return clk_register(NULL, &pll->hw);
+	return clk_hw_register(NULL, &pll->hw);
 }
diff --git a/drivers/clk/berlin/berlin2-pll.h b/drivers/clk/berlin/berlin2-pll.h
index 8831ce2..583e024 100644
--- a/drivers/clk/berlin/berlin2-pll.h
+++ b/drivers/clk/berlin/berlin2-pll.h
@@ -19,8 +19,6 @@
 #ifndef __BERLIN2_PLL_H
 #define __BERLIN2_PLL_H
 
-struct clk;
-
 struct berlin2_pll_map {
 	const u8 vcodiv[16];
 	u8 mult;
@@ -29,9 +27,8 @@
 	u8 divsel_shift;
 };
 
-struct clk * __init
-berlin2_pll_register(const struct berlin2_pll_map *map,
-		     void __iomem *base, const char *name,
-		     const char *parent_name, unsigned long flags);
+int berlin2_pll_register(const struct berlin2_pll_map *map,
+			 void __iomem *base, const char *name,
+			 const char *parent_name, unsigned long flags);
 
 #endif /* __BERLIN2_PLL_H */
diff --git a/drivers/clk/berlin/bg2.c b/drivers/clk/berlin/bg2.c
index 23e0e3b..edf3b96 100644
--- a/drivers/clk/berlin/bg2.c
+++ b/drivers/clk/berlin/bg2.c
@@ -92,8 +92,7 @@
  */
 
 #define	MAX_CLKS 41
-static struct clk *clks[MAX_CLKS];
-static struct clk_onecell_data clk_data;
+static struct clk_hw_onecell_data *clk_data;
 static DEFINE_SPINLOCK(lock);
 static void __iomem *gbase;
 
@@ -505,8 +504,17 @@
 	struct device_node *parent_np = of_get_parent(np);
 	const char *parent_names[9];
 	struct clk *clk;
+	struct clk_hw *hw;
+	struct clk_hw **hws;
 	u8 avpll_flags = 0;
-	int n;
+	int n, ret;
+
+	clk_data = kzalloc(sizeof(*clk_data) +
+			   sizeof(*clk_data->hws) * MAX_CLKS, GFP_KERNEL);
+	if (!clk_data)
+		return;
+	clk_data->num = MAX_CLKS;
+	hws = clk_data->hws;
 
 	gbase = of_iomap(parent_np, 0);
 	if (!gbase)
@@ -526,118 +534,118 @@
 	}
 
 	/* simple register PLLs */
-	clk = berlin2_pll_register(&bg2_pll_map, gbase + REG_SYSPLLCTL0,
+	ret = berlin2_pll_register(&bg2_pll_map, gbase + REG_SYSPLLCTL0,
 				   clk_names[SYSPLL], clk_names[REFCLK], 0);
-	if (IS_ERR(clk))
+	if (ret)
 		goto bg2_fail;
 
-	clk = berlin2_pll_register(&bg2_pll_map, gbase + REG_MEMPLLCTL0,
+	ret = berlin2_pll_register(&bg2_pll_map, gbase + REG_MEMPLLCTL0,
 				   clk_names[MEMPLL], clk_names[REFCLK], 0);
-	if (IS_ERR(clk))
+	if (ret)
 		goto bg2_fail;
 
-	clk = berlin2_pll_register(&bg2_pll_map, gbase + REG_CPUPLLCTL0,
+	ret = berlin2_pll_register(&bg2_pll_map, gbase + REG_CPUPLLCTL0,
 				   clk_names[CPUPLL], clk_names[REFCLK], 0);
-	if (IS_ERR(clk))
+	if (ret)
 		goto bg2_fail;
 
 	if (of_device_is_compatible(np, "marvell,berlin2-global-register"))
 		avpll_flags |= BERLIN2_AVPLL_SCRAMBLE_QUIRK;
 
 	/* audio/video VCOs */
-	clk = berlin2_avpll_vco_register(gbase + REG_AVPLLCTL0, "avpll_vcoA",
+	ret = berlin2_avpll_vco_register(gbase + REG_AVPLLCTL0, "avpll_vcoA",
 			 clk_names[REFCLK], avpll_flags, 0);
-	if (IS_ERR(clk))
+	if (ret)
 		goto bg2_fail;
 
 	for (n = 0; n < 8; n++) {
-		clk = berlin2_avpll_channel_register(gbase + REG_AVPLLCTL0,
+		ret = berlin2_avpll_channel_register(gbase + REG_AVPLLCTL0,
 			     clk_names[AVPLL_A1 + n], n, "avpll_vcoA",
 			     avpll_flags, 0);
-		if (IS_ERR(clk))
+		if (ret)
 			goto bg2_fail;
 	}
 
-	clk = berlin2_avpll_vco_register(gbase + REG_AVPLLCTL31, "avpll_vcoB",
+	ret = berlin2_avpll_vco_register(gbase + REG_AVPLLCTL31, "avpll_vcoB",
 				 clk_names[REFCLK], BERLIN2_AVPLL_BIT_QUIRK |
 				 avpll_flags, 0);
-	if (IS_ERR(clk))
+	if (ret)
 		goto bg2_fail;
 
 	for (n = 0; n < 8; n++) {
-		clk = berlin2_avpll_channel_register(gbase + REG_AVPLLCTL31,
+		ret = berlin2_avpll_channel_register(gbase + REG_AVPLLCTL31,
 			     clk_names[AVPLL_B1 + n], n, "avpll_vcoB",
 			     BERLIN2_AVPLL_BIT_QUIRK | avpll_flags, 0);
-		if (IS_ERR(clk))
+		if (ret)
 			goto bg2_fail;
 	}
 
 	/* reference clock bypass switches */
 	parent_names[0] = clk_names[SYSPLL];
 	parent_names[1] = clk_names[REFCLK];
-	clk = clk_register_mux(NULL, "syspll_byp", parent_names, 2,
+	hw = clk_hw_register_mux(NULL, "syspll_byp", parent_names, 2,
 			       0, gbase + REG_CLKSWITCH0, 0, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
-	clk_names[SYSPLL] = __clk_get_name(clk);
+	clk_names[SYSPLL] = clk_hw_get_name(hw);
 
 	parent_names[0] = clk_names[MEMPLL];
 	parent_names[1] = clk_names[REFCLK];
-	clk = clk_register_mux(NULL, "mempll_byp", parent_names, 2,
+	hw = clk_hw_register_mux(NULL, "mempll_byp", parent_names, 2,
 			       0, gbase + REG_CLKSWITCH0, 1, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
-	clk_names[MEMPLL] = __clk_get_name(clk);
+	clk_names[MEMPLL] = clk_hw_get_name(hw);
 
 	parent_names[0] = clk_names[CPUPLL];
 	parent_names[1] = clk_names[REFCLK];
-	clk = clk_register_mux(NULL, "cpupll_byp", parent_names, 2,
+	hw = clk_hw_register_mux(NULL, "cpupll_byp", parent_names, 2,
 			       0, gbase + REG_CLKSWITCH0, 2, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
-	clk_names[CPUPLL] = __clk_get_name(clk);
+	clk_names[CPUPLL] = clk_hw_get_name(hw);
 
 	/* clock muxes */
 	parent_names[0] = clk_names[AVPLL_B3];
 	parent_names[1] = clk_names[AVPLL_A3];
-	clk = clk_register_mux(NULL, clk_names[AUDIO1_PLL], parent_names, 2,
+	hw = clk_hw_register_mux(NULL, clk_names[AUDIO1_PLL], parent_names, 2,
 			       0, gbase + REG_CLKSELECT2, 29, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
 
 	parent_names[0] = clk_names[VIDEO0_PLL];
 	parent_names[1] = clk_names[VIDEO_EXT0];
-	clk = clk_register_mux(NULL, clk_names[VIDEO0_IN], parent_names, 2,
+	hw = clk_hw_register_mux(NULL, clk_names[VIDEO0_IN], parent_names, 2,
 			       0, gbase + REG_CLKSELECT3, 4, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
 
 	parent_names[0] = clk_names[VIDEO1_PLL];
 	parent_names[1] = clk_names[VIDEO_EXT0];
-	clk = clk_register_mux(NULL, clk_names[VIDEO1_IN], parent_names, 2,
+	hw = clk_hw_register_mux(NULL, clk_names[VIDEO1_IN], parent_names, 2,
 			       0, gbase + REG_CLKSELECT3, 6, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
 
 	parent_names[0] = clk_names[AVPLL_A2];
 	parent_names[1] = clk_names[AVPLL_B2];
-	clk = clk_register_mux(NULL, clk_names[VIDEO1_PLL], parent_names, 2,
+	hw = clk_hw_register_mux(NULL, clk_names[VIDEO1_PLL], parent_names, 2,
 			       0, gbase + REG_CLKSELECT3, 7, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
 
 	parent_names[0] = clk_names[VIDEO2_PLL];
 	parent_names[1] = clk_names[VIDEO_EXT0];
-	clk = clk_register_mux(NULL, clk_names[VIDEO2_IN], parent_names, 2,
+	hw = clk_hw_register_mux(NULL, clk_names[VIDEO2_IN], parent_names, 2,
 			       0, gbase + REG_CLKSELECT3, 9, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
 
 	parent_names[0] = clk_names[AVPLL_B1];
 	parent_names[1] = clk_names[AVPLL_A5];
-	clk = clk_register_mux(NULL, clk_names[VIDEO2_PLL], parent_names, 2,
+	hw = clk_hw_register_mux(NULL, clk_names[VIDEO2_PLL], parent_names, 2,
 			       0, gbase + REG_CLKSELECT3, 10, 1, 0, &lock);
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		goto bg2_fail;
 
 	/* clock divider cells */
@@ -648,7 +656,7 @@
 		for (k = 0; k < dd->num_parents; k++)
 			parent_names[k] = clk_names[dd->parent_ids[k]];
 
-		clks[CLKID_SYS + n] = berlin2_div_register(&dd->map, gbase,
+		hws[CLKID_SYS + n] = berlin2_div_register(&dd->map, gbase,
 				dd->name, dd->div_flags, parent_names,
 				dd->num_parents, dd->flags, &lock);
 	}
@@ -657,18 +665,18 @@
 	for (n = 0; n < ARRAY_SIZE(bg2_gates); n++) {
 		const struct berlin2_gate_data *gd = &bg2_gates[n];
 
-		clks[CLKID_GETH0 + n] = clk_register_gate(NULL, gd->name,
+		hws[CLKID_GETH0 + n] = clk_hw_register_gate(NULL, gd->name,
 			    gd->parent_name, gd->flags, gbase + REG_CLKENABLE,
 			    gd->bit_idx, 0, &lock);
 	}
 
 	/* twdclk is derived from cpu/3 */
-	clks[CLKID_TWD] =
-		clk_register_fixed_factor(NULL, "twd", "cpu", 0, 1, 3);
+	hws[CLKID_TWD] =
+		clk_hw_register_fixed_factor(NULL, "twd", "cpu", 0, 1, 3);
 
 	/* check for errors on leaf clocks */
 	for (n = 0; n < MAX_CLKS; n++) {
-		if (!IS_ERR(clks[n]))
+		if (!IS_ERR(hws[n]))
 			continue;
 
 		pr_err("%s: Unable to register leaf clock %d\n",
@@ -677,9 +685,7 @@
 	}
 
 	/* register clk-provider */
-	clk_data.clks = clks;
-	clk_data.clk_num = MAX_CLKS;
-	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
 
 	return;
 
diff --git a/drivers/clk/berlin/bg2q.c b/drivers/clk/berlin/bg2q.c
index f144547..0718e83 100644
--- a/drivers/clk/berlin/bg2q.c
+++ b/drivers/clk/berlin/bg2q.c
@@ -46,8 +46,7 @@
 #define REG_SDIO1XIN_CLKCTL	0x015c
 
 #define	MAX_CLKS 28
-static struct clk *clks[MAX_CLKS];
-static struct clk_onecell_data clk_data;
+static struct clk_hw_onecell_data *clk_data;
 static DEFINE_SPINLOCK(lock);
 static void __iomem *gbase;
 static void __iomem *cpupll_base;
@@ -293,7 +292,15 @@
 	struct device_node *parent_np = of_get_parent(np);
 	const char *parent_names[9];
 	struct clk *clk;
-	int n;
+	struct clk_hw **hws;
+	int n, ret;
+
+	clk_data = kzalloc(sizeof(*clk_data) +
+			   sizeof(*clk_data->hws) * MAX_CLKS, GFP_KERNEL);
+	if (!clk_data)
+		return;
+	clk_data->num = MAX_CLKS;
+	hws = clk_data->hws;
 
 	gbase = of_iomap(parent_np, 0);
 	if (!gbase) {
@@ -317,14 +324,14 @@
 	}
 
 	/* simple register PLLs */
-	clk = berlin2_pll_register(&bg2q_pll_map, gbase + REG_SYSPLLCTL0,
+	ret = berlin2_pll_register(&bg2q_pll_map, gbase + REG_SYSPLLCTL0,
 				   clk_names[SYSPLL], clk_names[REFCLK], 0);
-	if (IS_ERR(clk))
+	if (ret)
 		goto bg2q_fail;
 
-	clk = berlin2_pll_register(&bg2q_pll_map, cpupll_base,
+	ret = berlin2_pll_register(&bg2q_pll_map, cpupll_base,
 				   clk_names[CPUPLL], clk_names[REFCLK], 0);
-	if (IS_ERR(clk))
+	if (ret)
 		goto bg2q_fail;
 
 	/* TODO: add BG2Q AVPLL */
@@ -342,7 +349,7 @@
 		for (k = 0; k < dd->num_parents; k++)
 			parent_names[k] = clk_names[dd->parent_ids[k]];
 
-		clks[CLKID_SYS + n] = berlin2_div_register(&dd->map, gbase,
+		hws[CLKID_SYS + n] = berlin2_div_register(&dd->map, gbase,
 				dd->name, dd->div_flags, parent_names,
 				dd->num_parents, dd->flags, &lock);
 	}
@@ -351,22 +358,22 @@
 	for (n = 0; n < ARRAY_SIZE(bg2q_gates); n++) {
 		const struct berlin2_gate_data *gd = &bg2q_gates[n];
 
-		clks[CLKID_GFX2DAXI + n] = clk_register_gate(NULL, gd->name,
+		hws[CLKID_GFX2DAXI + n] = clk_hw_register_gate(NULL, gd->name,
 			    gd->parent_name, gd->flags, gbase + REG_CLKENABLE,
 			    gd->bit_idx, 0, &lock);
 	}
 
 	/* cpuclk divider is fixed to 1 */
-	clks[CLKID_CPU] =
-		clk_register_fixed_factor(NULL, "cpu", clk_names[CPUPLL],
+	hws[CLKID_CPU] =
+		clk_hw_register_fixed_factor(NULL, "cpu", clk_names[CPUPLL],
 					  0, 1, 1);
 	/* twdclk is derived from cpu/3 */
-	clks[CLKID_TWD] =
-		clk_register_fixed_factor(NULL, "twd", "cpu", 0, 1, 3);
+	hws[CLKID_TWD] =
+		clk_hw_register_fixed_factor(NULL, "twd", "cpu", 0, 1, 3);
 
 	/* check for errors on leaf clocks */
 	for (n = 0; n < MAX_CLKS; n++) {
-		if (!IS_ERR(clks[n]))
+		if (!IS_ERR(hws[n]))
 			continue;
 
 		pr_err("%s: Unable to register leaf clock %d\n",
@@ -375,9 +382,7 @@
 	}
 
 	/* register clk-provider */
-	clk_data.clks = clks;
-	clk_data.clk_num = MAX_CLKS;
-	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
 
 	return;
 
diff --git a/drivers/clk/clk-asm9260.c b/drivers/clk/clk-asm9260.c
index 90897af..ea85685 100644
--- a/drivers/clk/clk-asm9260.c
+++ b/drivers/clk/clk-asm9260.c
@@ -68,8 +68,7 @@
 #define HW_LCDCLKDIV		0x01fc
 #define HW_ADCANACLKDIV		0x0200
 
-static struct clk *clks[MAX_CLKS];
-static struct clk_onecell_data clk_data;
+static struct clk_hw_onecell_data *clk_data;
 static DEFINE_SPINLOCK(asm9260_clk_lock);
 
 struct asm9260_div_clk {
@@ -267,12 +266,20 @@
 
 static void __init asm9260_acc_init(struct device_node *np)
 {
-	struct clk *clk;
+	struct clk_hw *hw;
+	struct clk_hw **hws;
 	const char *ref_clk, *pll_clk = "pll";
 	u32 rate;
 	int n;
 	u32 accuracy = 0;
 
+	clk_data = kzalloc(sizeof(*clk_data) +
+			   sizeof(*clk_data->hws) * MAX_CLKS, GFP_KERNEL);
+	if (!clk_data)
+		return;
+	clk_data->num = MAX_CLKS;
+	hws = clk_data->hws;
+
 	base = of_io_request_and_map(np, 0, np->name);
 	if (IS_ERR(base))
 		panic("%s: unable to map resource", np->name);
@@ -282,10 +289,10 @@
 
 	ref_clk = of_clk_get_parent_name(np, 0);
 	accuracy = clk_get_accuracy(__clk_lookup(ref_clk));
-	clk = clk_register_fixed_rate_with_accuracy(NULL, pll_clk,
+	hw = clk_hw_register_fixed_rate_with_accuracy(NULL, pll_clk,
 			ref_clk, 0, rate, accuracy);
 
-	if (IS_ERR(clk))
+	if (IS_ERR(hw))
 		panic("%s: can't register REFCLK. Check DT!", np->name);
 
 	for (n = 0; n < ARRAY_SIZE(asm9260_mux_clks); n++) {
@@ -293,7 +300,7 @@
 
 		mc->parent_names[0] = ref_clk;
 		mc->parent_names[1] = pll_clk;
-		clk = clk_register_mux_table(NULL, mc->name, mc->parent_names,
+		hw = clk_hw_register_mux_table(NULL, mc->name, mc->parent_names,
 				mc->num_parents, mc->flags, base + mc->offset,
 				0, mc->mask, 0, mc->table, &asm9260_clk_lock);
 	}
@@ -302,7 +309,7 @@
 	for (n = 0; n < ARRAY_SIZE(asm9260_mux_gates); n++) {
 		const struct asm9260_gate_data *gd = &asm9260_mux_gates[n];
 
-		clk = clk_register_gate(NULL, gd->name,
+		hw = clk_hw_register_gate(NULL, gd->name,
 			gd->parent_name, gd->flags | CLK_SET_RATE_PARENT,
 			base + gd->reg, gd->bit_idx, 0, &asm9260_clk_lock);
 	}
@@ -311,7 +318,7 @@
 	for (n = 0; n < ARRAY_SIZE(asm9260_div_clks); n++) {
 		const struct asm9260_div_clk *dc = &asm9260_div_clks[n];
 
-		clks[dc->idx] = clk_register_divider(NULL, dc->name,
+		hws[dc->idx] = clk_hw_register_divider(NULL, dc->name,
 				dc->parent_name, CLK_SET_RATE_PARENT,
 				base + dc->reg, 0, 8, CLK_DIVIDER_ONE_BASED,
 				&asm9260_clk_lock);
@@ -321,14 +328,14 @@
 	for (n = 0; n < ARRAY_SIZE(asm9260_ahb_gates); n++) {
 		const struct asm9260_gate_data *gd = &asm9260_ahb_gates[n];
 
-		clks[gd->idx] = clk_register_gate(NULL, gd->name,
+		hws[gd->idx] = clk_hw_register_gate(NULL, gd->name,
 				gd->parent_name, gd->flags, base + gd->reg,
 				gd->bit_idx, 0, &asm9260_clk_lock);
 	}
 
 	/* check for errors on leaf clocks */
 	for (n = 0; n < MAX_CLKS; n++) {
-		if (!IS_ERR(clks[n]))
+		if (!IS_ERR(hws[n]))
 			continue;
 
 		pr_err("%s: Unable to register leaf clock %d\n",
@@ -337,9 +344,7 @@
 	}
 
 	/* register clk-provider */
-	clk_data.clks = clks;
-	clk_data.clk_num = MAX_CLKS;
-	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, clk_data);
 	return;
 fail:
 	iounmap(base);
diff --git a/drivers/clk/clk-axi-clkgen.c b/drivers/clk/clk-axi-clkgen.c
index 3294db3..5e918e7 100644
--- a/drivers/clk/clk-axi-clkgen.c
+++ b/drivers/clk/clk-axi-clkgen.c
@@ -392,8 +392,8 @@
 	const char *parent_names[2];
 	const char *clk_name;
 	struct resource *mem;
-	struct clk *clk;
 	unsigned int i;
+	int ret;
 
 	if (!pdev->dev.of_node)
 		return -ENODEV;
@@ -433,12 +433,12 @@
 	axi_clkgen_mmcm_enable(axi_clkgen, false);
 
 	axi_clkgen->clk_hw.init = &init;
-	clk = devm_clk_register(&pdev->dev, &axi_clkgen->clk_hw);
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
+	ret = devm_clk_hw_register(&pdev->dev, &axi_clkgen->clk_hw);
+	if (ret)
+		return ret;
 
-	return of_clk_add_provider(pdev->dev.of_node, of_clk_src_simple_get,
-				    clk);
+	return of_clk_add_hw_provider(pdev->dev.of_node, of_clk_hw_simple_get,
+				      &axi_clkgen->clk_hw);
 }
 
 static int axi_clkgen_remove(struct platform_device *pdev)
diff --git a/drivers/clk/clk-axm5516.c b/drivers/clk/clk-axm5516.c
index c7c91a5..5d7ae33 100644
--- a/drivers/clk/clk-axm5516.c
+++ b/drivers/clk/clk-axm5516.c
@@ -516,6 +516,19 @@
 	[AXXIA_CLK_MMC]      = &clk_mmc_mux.aclk,
 };
 
+static struct clk_hw *
+of_clk_axmclk_get(struct of_phandle_args *clkspec, void *unused)
+{
+	unsigned int idx = clkspec->args[0];
+
+	if (idx >= ARRAY_SIZE(axmclk_clocks)) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return &axmclk_clocks[idx]->hw;
+}
+
 static const struct regmap_config axmclk_regmap_config = {
 	.reg_bits	= 32,
 	.reg_stride	= 4,
@@ -530,21 +543,14 @@
 };
 MODULE_DEVICE_TABLE(of, axmclk_match_table);
 
-struct axmclk_priv {
-	struct clk_onecell_data onecell;
-	struct clk *clks[];
-};
-
 static int axmclk_probe(struct platform_device *pdev)
 {
 	void __iomem *base;
 	struct resource *res;
 	int i, ret;
 	struct device *dev = &pdev->dev;
-	struct clk *clk;
 	struct regmap *regmap;
 	size_t num_clks;
-	struct axmclk_priv *priv;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	base = devm_ioremap_resource(dev, res);
@@ -557,29 +563,18 @@
 
 	num_clks = ARRAY_SIZE(axmclk_clocks);
 	pr_info("axmclk: supporting %zu clocks\n", num_clks);
-	priv = devm_kzalloc(dev, sizeof(*priv) + sizeof(*priv->clks) * num_clks,
-			    GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->onecell.clks = priv->clks;
-	priv->onecell.clk_num = num_clks;
 
 	/* Update each entry with the allocated regmap and register the clock
 	 * with the common clock framework
 	 */
 	for (i = 0; i < num_clks; i++) {
 		axmclk_clocks[i]->regmap = regmap;
-		clk = devm_clk_register(dev, &axmclk_clocks[i]->hw);
-		if (IS_ERR(clk))
-			return PTR_ERR(clk);
-		priv->clks[i] = clk;
+		ret = devm_clk_hw_register(dev, &axmclk_clocks[i]->hw);
+		if (ret)
+			return ret;
 	}
 
-	ret = of_clk_add_provider(dev->of_node,
-				  of_clk_src_onecell_get, &priv->onecell);
-
-	return ret;
+	return of_clk_add_hw_provider(dev->of_node, of_clk_axmclk_get, NULL);
 }
 
 static int axmclk_remove(struct platform_device *pdev)
diff --git a/drivers/clk/clk-cdce706.c b/drivers/clk/clk-cdce706.c
index 01877f64..f21d909 100644
--- a/drivers/clk/clk-cdce706.c
+++ b/drivers/clk/clk-cdce706.c
@@ -71,7 +71,6 @@
 	struct cdce706_dev_data *dev_data;
 	unsigned idx;
 	unsigned parent;
-	struct clk *clk;
 	struct clk_hw hw;
 	unsigned div;
 	unsigned mul;
@@ -81,8 +80,6 @@
 struct cdce706_dev_data {
 	struct i2c_client *client;
 	struct regmap *regmap;
-	struct clk_onecell_data onecell;
-	struct clk *clks[6];
 	struct clk *clkin_clk[2];
 	const char *clkin_name[2];
 	struct cdce706_hw_data clkin[1];
@@ -455,18 +452,19 @@
 			       struct clk_init_data *init)
 {
 	unsigned i;
+	int ret;
 
 	for (i = 0; i < num_hw; ++i, ++hw) {
 		init->name = clk_names[i];
 		hw->dev_data = cdce;
 		hw->idx = i;
 		hw->hw.init = init;
-		hw->clk = devm_clk_register(&cdce->client->dev,
+		ret = devm_clk_hw_register(&cdce->client->dev,
 					    &hw->hw);
-		if (IS_ERR(hw->clk)) {
+		if (ret) {
 			dev_err(&cdce->client->dev, "Failed to register %s\n",
 				clk_names[i]);
-			return PTR_ERR(hw->clk);
+			return ret;
 		}
 	}
 	return 0;
@@ -613,13 +611,23 @@
 			cdce->clkout[i].parent);
 	}
 
-	ret = cdce706_register_hw(cdce, cdce->clkout,
-				  ARRAY_SIZE(cdce->clkout),
-				  cdce706_clkout_name, &init);
-	for (i = 0; i < ARRAY_SIZE(cdce->clkout); ++i)
-		cdce->clks[i] = cdce->clkout[i].clk;
+	return cdce706_register_hw(cdce, cdce->clkout,
+				   ARRAY_SIZE(cdce->clkout),
+				   cdce706_clkout_name, &init);
+}
 
-	return ret;
+static struct clk_hw *
+of_clk_cdce_get(struct of_phandle_args *clkspec, void *data)
+{
+	struct cdce706_dev_data *cdce = data;
+	unsigned int idx = clkspec->args[0];
+
+	if (idx >= ARRAY_SIZE(cdce->clkout)) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return &cdce->clkout[idx].hw;
 }
 
 static int cdce706_probe(struct i2c_client *client,
@@ -657,12 +665,8 @@
 	ret = cdce706_register_clkouts(cdce);
 	if (ret < 0)
 		return ret;
-	cdce->onecell.clks = cdce->clks;
-	cdce->onecell.clk_num = ARRAY_SIZE(cdce->clks);
-	ret = of_clk_add_provider(client->dev.of_node, of_clk_src_onecell_get,
-				  &cdce->onecell);
-
-	return ret;
+	return of_clk_add_hw_provider(client->dev.of_node, of_clk_cdce_get,
+				      cdce);
 }
 
 static int cdce706_remove(struct i2c_client *client)
diff --git a/drivers/clk/clk-cdce925.c b/drivers/clk/clk-cdce925.c
index 089bf88..b8459c1 100644
--- a/drivers/clk/clk-cdce925.c
+++ b/drivers/clk/clk-cdce925.c
@@ -62,8 +62,6 @@
 	struct i2c_client *i2c_client;
 	struct clk_cdce925_pll pll[NUMBER_OF_PLLS];
 	struct clk_cdce925_output clk[NUMBER_OF_OUTPUTS];
-	struct clk *dt_clk[NUMBER_OF_OUTPUTS];
-	struct clk_onecell_data onecell;
 };
 
 /* ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** ** */
@@ -557,6 +555,20 @@
 		return -EIO;
 }
 
+static struct clk_hw *
+of_clk_cdce925_get(struct of_phandle_args *clkspec, void *_data)
+{
+	struct clk_cdce925_chip *data = _data;
+	unsigned int idx = clkspec->args[0];
+
+	if (idx >= ARRAY_SIZE(data->clk)) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return &data->clk[idx].hw;
+}
+
 /* The CDCE925 uses a funky way to read/write registers. Bulk mode is
  * just weird, so just use the single byte mode exclusively. */
 static struct regmap_bus regmap_cdce925_bus = {
@@ -572,7 +584,6 @@
 	const char *parent_name;
 	const char *pll_clk_name[NUMBER_OF_PLLS] = {NULL,};
 	struct clk_init_data init;
-	struct clk *clk;
 	u32 value;
 	int i;
 	int err;
@@ -622,10 +633,9 @@
 		data->pll[i].chip = data;
 		data->pll[i].hw.init = &init;
 		data->pll[i].index = i;
-		clk = devm_clk_register(&client->dev, &data->pll[i].hw);
-		if (IS_ERR(clk)) {
+		err = devm_clk_hw_register(&client->dev, &data->pll[i].hw);
+		if (err) {
 			dev_err(&client->dev, "Failed register PLL %d\n", i);
-			err = PTR_ERR(clk);
 			goto error;
 		}
 		sprintf(child_name, "PLL%d", i+1);
@@ -634,7 +644,7 @@
 			continue;
 		if (!of_property_read_u32(np_output,
 			"clock-frequency", &value)) {
-			err = clk_set_rate(clk, value);
+			err = clk_set_rate(data->pll[i].hw.clk, value);
 			if (err)
 				dev_err(&client->dev,
 					"unable to set PLL frequency %ud\n",
@@ -663,14 +673,12 @@
 	data->clk[0].hw.init = &init;
 	data->clk[0].index = 0;
 	data->clk[0].pdiv = 1;
-	clk = devm_clk_register(&client->dev, &data->clk[0].hw);
+	err = devm_clk_hw_register(&client->dev, &data->clk[0].hw);
 	kfree(init.name); /* clock framework made a copy of the name */
-	if (IS_ERR(clk)) {
+	if (err) {
 		dev_err(&client->dev, "clock registration Y1 failed\n");
-		err = PTR_ERR(clk);
 		goto error;
 	}
-	data->dt_clk[0] = clk;
 
 	/* Register output clocks Y2 .. Y5*/
 	init.ops = &cdce925_clk_ops;
@@ -695,21 +703,17 @@
 			init.parent_names = &pll_clk_name[1];
 			break;
 		}
-		clk = devm_clk_register(&client->dev, &data->clk[i].hw);
+		err = devm_clk_hw_register(&client->dev, &data->clk[i].hw);
 		kfree(init.name); /* clock framework made a copy of the name */
-		if (IS_ERR(clk)) {
+		if (err) {
 			dev_err(&client->dev, "clock registration failed\n");
-			err = PTR_ERR(clk);
 			goto error;
 		}
-		data->dt_clk[i] = clk;
 	}
 
 	/* Register the output clocks */
-	data->onecell.clk_num = NUMBER_OF_OUTPUTS;
-	data->onecell.clks = data->dt_clk;
-	err = of_clk_add_provider(client->dev.of_node, of_clk_src_onecell_get,
-		&data->onecell);
+	err = of_clk_add_hw_provider(client->dev.of_node, of_clk_cdce925_get,
+				  data);
 	if (err)
 		dev_err(&client->dev, "unable to add OF clock provider\n");
 
diff --git a/drivers/clk/clk-clps711x.c b/drivers/clk/clk-clps711x.c
index adaf109..9193f64 100644
--- a/drivers/clk/clk-clps711x.c
+++ b/drivers/clk/clk-clps711x.c
@@ -40,9 +40,8 @@
 };
 
 struct clps711x_clk {
-	struct clk_onecell_data	clk_data;
-	spinlock_t		lock;
-	struct clk		*clks[CLPS711X_CLK_MAX];
+	spinlock_t			lock;
+	struct clk_hw_onecell_data	clk_data;
 };
 
 static struct clps711x_clk * __init _clps711x_clk_init(void __iomem *base,
@@ -55,7 +54,9 @@
 	if (!base)
 		return ERR_PTR(-ENOMEM);
 
-	clps711x_clk = kzalloc(sizeof(*clps711x_clk), GFP_KERNEL);
+	clps711x_clk = kzalloc(sizeof(*clps711x_clk) +
+			sizeof(*clps711x_clk->clk_data.hws) * CLPS711X_CLK_MAX,
+			GFP_KERNEL);
 	if (!clps711x_clk)
 		return ERR_PTR(-ENOMEM);
 
@@ -106,40 +107,40 @@
 	tmp |= SYSCON1_TC2M | SYSCON1_TC2S;
 	writel(tmp, base + CLPS711X_SYSCON1);
 
-	clps711x_clk->clks[CLPS711X_CLK_DUMMY] =
-		clk_register_fixed_rate(NULL, "dummy", NULL, 0, 0);
-	clps711x_clk->clks[CLPS711X_CLK_CPU] =
-		clk_register_fixed_rate(NULL, "cpu", NULL, 0, f_cpu);
-	clps711x_clk->clks[CLPS711X_CLK_BUS] =
-		clk_register_fixed_rate(NULL, "bus", NULL, 0, f_bus);
-	clps711x_clk->clks[CLPS711X_CLK_PLL] =
-		clk_register_fixed_rate(NULL, "pll", NULL, 0, f_pll);
-	clps711x_clk->clks[CLPS711X_CLK_TIMERREF] =
-		clk_register_fixed_rate(NULL, "timer_ref", NULL, 0, f_tim);
-	clps711x_clk->clks[CLPS711X_CLK_TIMER1] =
-		clk_register_divider_table(NULL, "timer1", "timer_ref", 0,
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_DUMMY] =
+		clk_hw_register_fixed_rate(NULL, "dummy", NULL, 0, 0);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_CPU] =
+		clk_hw_register_fixed_rate(NULL, "cpu", NULL, 0, f_cpu);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_BUS] =
+		clk_hw_register_fixed_rate(NULL, "bus", NULL, 0, f_bus);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_PLL] =
+		clk_hw_register_fixed_rate(NULL, "pll", NULL, 0, f_pll);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_TIMERREF] =
+		clk_hw_register_fixed_rate(NULL, "timer_ref", NULL, 0, f_tim);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_TIMER1] =
+		clk_hw_register_divider_table(NULL, "timer1", "timer_ref", 0,
 					   base + CLPS711X_SYSCON1, 5, 1, 0,
 					   timer_div_table, &clps711x_clk->lock);
-	clps711x_clk->clks[CLPS711X_CLK_TIMER2] =
-		clk_register_divider_table(NULL, "timer2", "timer_ref", 0,
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_TIMER2] =
+		clk_hw_register_divider_table(NULL, "timer2", "timer_ref", 0,
 					   base + CLPS711X_SYSCON1, 7, 1, 0,
 					   timer_div_table, &clps711x_clk->lock);
-	clps711x_clk->clks[CLPS711X_CLK_PWM] =
-		clk_register_fixed_rate(NULL, "pwm", NULL, 0, f_pwm);
-	clps711x_clk->clks[CLPS711X_CLK_SPIREF] =
-		clk_register_fixed_rate(NULL, "spi_ref", NULL, 0, f_spi);
-	clps711x_clk->clks[CLPS711X_CLK_SPI] =
-		clk_register_divider_table(NULL, "spi", "spi_ref", 0,
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_PWM] =
+		clk_hw_register_fixed_rate(NULL, "pwm", NULL, 0, f_pwm);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_SPIREF] =
+		clk_hw_register_fixed_rate(NULL, "spi_ref", NULL, 0, f_spi);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_SPI] =
+		clk_hw_register_divider_table(NULL, "spi", "spi_ref", 0,
 					   base + CLPS711X_SYSCON1, 16, 2, 0,
 					   spi_div_table, &clps711x_clk->lock);
-	clps711x_clk->clks[CLPS711X_CLK_UART] =
-		clk_register_fixed_factor(NULL, "uart", "bus", 0, 1, 10);
-	clps711x_clk->clks[CLPS711X_CLK_TICK] =
-		clk_register_fixed_rate(NULL, "tick", NULL, 0, 64);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_UART] =
+		clk_hw_register_fixed_factor(NULL, "uart", "bus", 0, 1, 10);
+	clps711x_clk->clk_data.hws[CLPS711X_CLK_TICK] =
+		clk_hw_register_fixed_rate(NULL, "tick", NULL, 0, 64);
 	for (i = 0; i < CLPS711X_CLK_MAX; i++)
-		if (IS_ERR(clps711x_clk->clks[i]))
+		if (IS_ERR(clps711x_clk->clk_data.hws[i]))
 			pr_err("clk %i: register failed with %ld\n",
-			       i, PTR_ERR(clps711x_clk->clks[i]));
+			       i, PTR_ERR(clps711x_clk->clk_data.hws[i]));
 
 	return clps711x_clk;
 }
@@ -153,17 +154,17 @@
 	BUG_ON(IS_ERR(clps711x_clk));
 
 	/* Clocksource */
-	clk_register_clkdev(clps711x_clk->clks[CLPS711X_CLK_TIMER1],
+	clk_hw_register_clkdev(clps711x_clk->clk_data.hws[CLPS711X_CLK_TIMER1],
 			    NULL, "clps711x-timer.0");
-	clk_register_clkdev(clps711x_clk->clks[CLPS711X_CLK_TIMER2],
+	clk_hw_register_clkdev(clps711x_clk->clk_data.hws[CLPS711X_CLK_TIMER2],
 			    NULL, "clps711x-timer.1");
 
 	/* Drivers */
-	clk_register_clkdev(clps711x_clk->clks[CLPS711X_CLK_PWM],
+	clk_hw_register_clkdev(clps711x_clk->clk_data.hws[CLPS711X_CLK_PWM],
 			    NULL, "clps711x-pwm");
-	clk_register_clkdev(clps711x_clk->clks[CLPS711X_CLK_UART],
+	clk_hw_register_clkdev(clps711x_clk->clk_data.hws[CLPS711X_CLK_UART],
 			    NULL, "clps711x-uart.0");
-	clk_register_clkdev(clps711x_clk->clks[CLPS711X_CLK_UART],
+	clk_hw_register_clkdev(clps711x_clk->clk_data.hws[CLPS711X_CLK_UART],
 			    NULL, "clps711x-uart.1");
 }
 
@@ -179,10 +180,9 @@
 	clps711x_clk = _clps711x_clk_init(base, fref);
 	BUG_ON(IS_ERR(clps711x_clk));
 
-	clps711x_clk->clk_data.clks = clps711x_clk->clks;
-	clps711x_clk->clk_data.clk_num = CLPS711X_CLK_MAX;
-	of_clk_add_provider(np, of_clk_src_onecell_get,
-			    &clps711x_clk->clk_data);
+	clps711x_clk->clk_data.num = CLPS711X_CLK_MAX;
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
+			       &clps711x_clk->clk_data);
 }
 CLK_OF_DECLARE(clps711x, "cirrus,ep7209-clk", clps711x_clk_init_dt);
 #endif
diff --git a/drivers/clk/clk-cs2000-cp.c b/drivers/clk/clk-cs2000-cp.c
index 7379de8..021f3da 100644
--- a/drivers/clk/clk-cs2000-cp.c
+++ b/drivers/clk/clk-cs2000-cp.c
@@ -59,7 +59,6 @@
 	struct i2c_client *client;
 	struct clk *clk_in;
 	struct clk *ref_clk;
-	struct clk *clk_out;
 };
 
 static const struct of_device_id cs2000_of_match[] = {
@@ -371,7 +370,6 @@
 	struct device_node *np = dev->of_node;
 	struct clk_init_data init;
 	const char *name = np->name;
-	struct clk *clk;
 	static const char *parent_names[CLK_MAX];
 	int ch = 0; /* it uses ch0 only at this point */
 	int rate;
@@ -400,18 +398,16 @@
 
 	priv->hw.init = &init;
 
-	clk = clk_register(dev, &priv->hw);
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
+	ret = clk_hw_register(dev, &priv->hw);
+	if (ret)
+		return ret;
 
-	ret = of_clk_add_provider(np, of_clk_src_simple_get, clk);
+	ret = of_clk_add_hw_provider(np, of_clk_hw_simple_get, &priv->hw);
 	if (ret < 0) {
-		clk_unregister(clk);
+		clk_hw_unregister(&priv->hw);
 		return ret;
 	}
 
-	priv->clk_out = clk;
-
 	return 0;
 }
 
@@ -454,7 +450,7 @@
 
 	of_clk_del_provider(np);
 
-	clk_unregister(priv->clk_out);
+	clk_hw_unregister(&priv->hw);
 
 	return 0;
 }
diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index a0f55bc..96386ff 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -352,7 +352,7 @@
 
 	/* if read only, just return current value */
 	if (divider->flags & CLK_DIVIDER_READ_ONLY) {
-		bestdiv = readl(divider->reg) >> divider->shift;
+		bestdiv = clk_readl(divider->reg) >> divider->shift;
 		bestdiv &= div_mask(divider->width);
 		bestdiv = _get_div(divider->table, bestdiv, divider->flags,
 			divider->width);
diff --git a/drivers/clk/clk-efm32gg.c b/drivers/clk/clk-efm32gg.c
index 22e4c65..8802a2d 100644
--- a/drivers/clk/clk-efm32gg.c
+++ b/drivers/clk/clk-efm32gg.c
@@ -10,24 +10,31 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/slab.h>
 
 #include <dt-bindings/clock/efm32-cmu.h>
 
 #define CMU_HFPERCLKEN0		0x44
+#define CMU_MAX_CLKS		37
 
-static struct clk *clk[37];
-static struct clk_onecell_data clk_data = {
-	.clks = clk,
-	.clk_num = ARRAY_SIZE(clk),
-};
+static struct clk_hw_onecell_data *clk_data;
 
 static void __init efm32gg_cmu_init(struct device_node *np)
 {
 	int i;
 	void __iomem *base;
+	struct clk_hw **hws;
 
-	for (i = 0; i < ARRAY_SIZE(clk); ++i)
-		clk[i] = ERR_PTR(-ENOENT);
+	clk_data = kzalloc(sizeof(*clk_data) +
+			   sizeof(*clk_data->hws) * CMU_MAX_CLKS, GFP_KERNEL);
+
+	if (!clk_data)
+		return;
+
+	hws = clk_data->hws;
+
+	for (i = 0; i < CMU_MAX_CLKS; ++i)
+		hws[i] = ERR_PTR(-ENOENT);
 
 	base = of_iomap(np, 0);
 	if (!base) {
@@ -35,46 +42,46 @@
 		return;
 	}
 
-	clk[clk_HFXO] = clk_register_fixed_rate(NULL, "HFXO", NULL,
-			0, 48000000);
+	hws[clk_HFXO] = clk_hw_register_fixed_rate(NULL, "HFXO", NULL, 0,
+						   48000000);
 
-	clk[clk_HFPERCLKUSART0] = clk_register_gate(NULL, "HFPERCLK.USART0",
+	hws[clk_HFPERCLKUSART0] = clk_hw_register_gate(NULL, "HFPERCLK.USART0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 0, 0, NULL);
-	clk[clk_HFPERCLKUSART1] = clk_register_gate(NULL, "HFPERCLK.USART1",
+	hws[clk_HFPERCLKUSART1] = clk_hw_register_gate(NULL, "HFPERCLK.USART1",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 1, 0, NULL);
-	clk[clk_HFPERCLKUSART2] = clk_register_gate(NULL, "HFPERCLK.USART2",
+	hws[clk_HFPERCLKUSART2] = clk_hw_register_gate(NULL, "HFPERCLK.USART2",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 2, 0, NULL);
-	clk[clk_HFPERCLKUART0] = clk_register_gate(NULL, "HFPERCLK.UART0",
+	hws[clk_HFPERCLKUART0] = clk_hw_register_gate(NULL, "HFPERCLK.UART0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 3, 0, NULL);
-	clk[clk_HFPERCLKUART1] = clk_register_gate(NULL, "HFPERCLK.UART1",
+	hws[clk_HFPERCLKUART1] = clk_hw_register_gate(NULL, "HFPERCLK.UART1",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 4, 0, NULL);
-	clk[clk_HFPERCLKTIMER0] = clk_register_gate(NULL, "HFPERCLK.TIMER0",
+	hws[clk_HFPERCLKTIMER0] = clk_hw_register_gate(NULL, "HFPERCLK.TIMER0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 5, 0, NULL);
-	clk[clk_HFPERCLKTIMER1] = clk_register_gate(NULL, "HFPERCLK.TIMER1",
+	hws[clk_HFPERCLKTIMER1] = clk_hw_register_gate(NULL, "HFPERCLK.TIMER1",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 6, 0, NULL);
-	clk[clk_HFPERCLKTIMER2] = clk_register_gate(NULL, "HFPERCLK.TIMER2",
+	hws[clk_HFPERCLKTIMER2] = clk_hw_register_gate(NULL, "HFPERCLK.TIMER2",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 7, 0, NULL);
-	clk[clk_HFPERCLKTIMER3] = clk_register_gate(NULL, "HFPERCLK.TIMER3",
+	hws[clk_HFPERCLKTIMER3] = clk_hw_register_gate(NULL, "HFPERCLK.TIMER3",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 8, 0, NULL);
-	clk[clk_HFPERCLKACMP0] = clk_register_gate(NULL, "HFPERCLK.ACMP0",
+	hws[clk_HFPERCLKACMP0] = clk_hw_register_gate(NULL, "HFPERCLK.ACMP0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 9, 0, NULL);
-	clk[clk_HFPERCLKACMP1] = clk_register_gate(NULL, "HFPERCLK.ACMP1",
+	hws[clk_HFPERCLKACMP1] = clk_hw_register_gate(NULL, "HFPERCLK.ACMP1",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 10, 0, NULL);
-	clk[clk_HFPERCLKI2C0] = clk_register_gate(NULL, "HFPERCLK.I2C0",
+	hws[clk_HFPERCLKI2C0] = clk_hw_register_gate(NULL, "HFPERCLK.I2C0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 11, 0, NULL);
-	clk[clk_HFPERCLKI2C1] = clk_register_gate(NULL, "HFPERCLK.I2C1",
+	hws[clk_HFPERCLKI2C1] = clk_hw_register_gate(NULL, "HFPERCLK.I2C1",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 12, 0, NULL);
-	clk[clk_HFPERCLKGPIO] = clk_register_gate(NULL, "HFPERCLK.GPIO",
+	hws[clk_HFPERCLKGPIO] = clk_hw_register_gate(NULL, "HFPERCLK.GPIO",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 13, 0, NULL);
-	clk[clk_HFPERCLKVCMP] = clk_register_gate(NULL, "HFPERCLK.VCMP",
+	hws[clk_HFPERCLKVCMP] = clk_hw_register_gate(NULL, "HFPERCLK.VCMP",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 14, 0, NULL);
-	clk[clk_HFPERCLKPRS] = clk_register_gate(NULL, "HFPERCLK.PRS",
+	hws[clk_HFPERCLKPRS] = clk_hw_register_gate(NULL, "HFPERCLK.PRS",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 15, 0, NULL);
-	clk[clk_HFPERCLKADC0] = clk_register_gate(NULL, "HFPERCLK.ADC0",
+	hws[clk_HFPERCLKADC0] = clk_hw_register_gate(NULL, "HFPERCLK.ADC0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 16, 0, NULL);
-	clk[clk_HFPERCLKDAC0] = clk_register_gate(NULL, "HFPERCLK.DAC0",
+	hws[clk_HFPERCLKDAC0] = clk_hw_register_gate(NULL, "HFPERCLK.DAC0",
 			"HFXO", 0, base + CMU_HFPERCLKEN0, 17, 0, NULL);
 
-	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &clk_data);
 }
 CLK_OF_DECLARE(efm32ggcmu, "efm32gg,cmu", efm32gg_cmu_init);
diff --git a/drivers/clk/clk-fixed-factor.c b/drivers/clk/clk-fixed-factor.c
index 4db3be2..a5d402d 100644
--- a/drivers/clk/clk-fixed-factor.c
+++ b/drivers/clk/clk-fixed-factor.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/of.h>
+#include <linux/platform_device.h>
 
 /*
  * DOC: basic fixed multiplier and divider clock that cannot gate
@@ -147,27 +148,25 @@
 	{ /* Sentinel */ },
 };
 
-/**
- * of_fixed_factor_clk_setup() - Setup function for simple fixed factor clock
- */
-void __init of_fixed_factor_clk_setup(struct device_node *node)
+static struct clk *_of_fixed_factor_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
 	const char *clk_name = node->name;
 	const char *parent_name;
 	unsigned long flags = 0;
 	u32 div, mult;
+	int ret;
 
 	if (of_property_read_u32(node, "clock-div", &div)) {
 		pr_err("%s Fixed factor clock <%s> must have a clock-div property\n",
 			__func__, node->name);
-		return;
+		return ERR_PTR(-EIO);
 	}
 
 	if (of_property_read_u32(node, "clock-mult", &mult)) {
 		pr_err("%s Fixed factor clock <%s> must have a clock-mult property\n",
 			__func__, node->name);
-		return;
+		return ERR_PTR(-EIO);
 	}
 
 	of_property_read_string(node, "clock-output-names", &clk_name);
@@ -178,10 +177,67 @@
 
 	clk = clk_register_fixed_factor(NULL, clk_name, parent_name, flags,
 					mult, div);
-	if (!IS_ERR(clk))
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (IS_ERR(clk))
+		return clk;
+
+	ret = of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (ret) {
+		clk_unregister(clk);
+		return ERR_PTR(ret);
+	}
+
+	return clk;
 }
-EXPORT_SYMBOL_GPL(of_fixed_factor_clk_setup);
+
+/**
+ * of_fixed_factor_clk_setup() - Setup function for simple fixed factor clock
+ */
+void __init of_fixed_factor_clk_setup(struct device_node *node)
+{
+	_of_fixed_factor_clk_setup(node);
+}
 CLK_OF_DECLARE(fixed_factor_clk, "fixed-factor-clock",
 		of_fixed_factor_clk_setup);
+
+static int of_fixed_factor_clk_remove(struct platform_device *pdev)
+{
+	struct clk *clk = platform_get_drvdata(pdev);
+
+	clk_unregister_fixed_factor(clk);
+
+	return 0;
+}
+
+static int of_fixed_factor_clk_probe(struct platform_device *pdev)
+{
+	struct clk *clk;
+
+	/*
+	 * This function is not executed when of_fixed_factor_clk_setup
+	 * succeeded.
+	 */
+	clk = _of_fixed_factor_clk_setup(pdev->dev.of_node);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	platform_set_drvdata(pdev, clk);
+
+	return 0;
+}
+
+static const struct of_device_id of_fixed_factor_clk_ids[] = {
+	{ .compatible = "fixed-factor-clock" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, of_fixed_factor_clk_ids);
+
+static struct platform_driver of_fixed_factor_clk_driver = {
+	.driver = {
+		.name = "of_fixed_factor_clk",
+		.of_match_table = of_fixed_factor_clk_ids,
+	},
+	.probe = of_fixed_factor_clk_probe,
+	.remove = of_fixed_factor_clk_remove,
+};
+builtin_platform_driver(of_fixed_factor_clk_driver);
 #endif
diff --git a/drivers/clk/clk-fixed-rate.c b/drivers/clk/clk-fixed-rate.c
index 2edb393..b5c46b3 100644
--- a/drivers/clk/clk-fixed-rate.c
+++ b/drivers/clk/clk-fixed-rate.c
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <linux/err.h>
 #include <linux/of.h>
+#include <linux/platform_device.h>
 
 /*
  * DOC: basic fixed-rate clock that cannot gate
@@ -157,18 +158,16 @@
 EXPORT_SYMBOL_GPL(clk_hw_unregister_fixed_rate);
 
 #ifdef CONFIG_OF
-/**
- * of_fixed_clk_setup() - Setup function for simple fixed rate clock
- */
-void of_fixed_clk_setup(struct device_node *node)
+static struct clk *_of_fixed_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
 	const char *clk_name = node->name;
 	u32 rate;
 	u32 accuracy = 0;
+	int ret;
 
 	if (of_property_read_u32(node, "clock-frequency", &rate))
-		return;
+		return ERR_PTR(-EIO);
 
 	of_property_read_u32(node, "clock-accuracy", &accuracy);
 
@@ -176,9 +175,66 @@
 
 	clk = clk_register_fixed_rate_with_accuracy(NULL, clk_name, NULL,
 						    0, rate, accuracy);
-	if (!IS_ERR(clk))
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (IS_ERR(clk))
+		return clk;
+
+	ret = of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (ret) {
+		clk_unregister(clk);
+		return ERR_PTR(ret);
+	}
+
+	return clk;
 }
-EXPORT_SYMBOL_GPL(of_fixed_clk_setup);
+
+/**
+ * of_fixed_clk_setup() - Setup function for simple fixed rate clock
+ */
+void __init of_fixed_clk_setup(struct device_node *node)
+{
+	_of_fixed_clk_setup(node);
+}
 CLK_OF_DECLARE(fixed_clk, "fixed-clock", of_fixed_clk_setup);
+
+static int of_fixed_clk_remove(struct platform_device *pdev)
+{
+	struct clk *clk = platform_get_drvdata(pdev);
+
+	clk_unregister_fixed_rate(clk);
+
+	return 0;
+}
+
+static int of_fixed_clk_probe(struct platform_device *pdev)
+{
+	struct clk *clk;
+
+	/*
+	 * This function is not executed when of_fixed_clk_setup
+	 * succeeded.
+	 */
+	clk = _of_fixed_clk_setup(pdev->dev.of_node);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	platform_set_drvdata(pdev, clk);
+
+	return 0;
+}
+
+static const struct of_device_id of_fixed_clk_ids[] = {
+	{ .compatible = "fixed-clock" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, of_fixed_clk_ids);
+
+static struct platform_driver of_fixed_clk_driver = {
+	.driver = {
+		.name = "of_fixed_clk",
+		.of_match_table = of_fixed_clk_ids,
+	},
+	.probe = of_fixed_clk_probe,
+	.remove = of_fixed_clk_remove,
+};
+builtin_platform_driver(of_fixed_clk_driver);
 #endif
diff --git a/drivers/clk/clk-ls1x.c b/drivers/clk/clk-ls1x.c
deleted file mode 100644
index 5097831..0000000
--- a/drivers/clk/clk-ls1x.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright (c) 2012 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/clkdev.h>
-#include <linux/clk-provider.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-
-#include <loongson1.h>
-
-#define OSC		(33 * 1000000)
-#define DIV_APB		2
-
-static DEFINE_SPINLOCK(_lock);
-
-static int ls1x_pll_clk_enable(struct clk_hw *hw)
-{
-	return 0;
-}
-
-static void ls1x_pll_clk_disable(struct clk_hw *hw)
-{
-}
-
-static unsigned long ls1x_pll_recalc_rate(struct clk_hw *hw,
-					  unsigned long parent_rate)
-{
-	u32 pll, rate;
-
-	pll = __raw_readl(LS1X_CLK_PLL_FREQ);
-	rate = 12 + (pll & 0x3f) + (((pll >> 8) & 0x3ff) >> 10);
-	rate *= OSC;
-	rate >>= 1;
-
-	return rate;
-}
-
-static const struct clk_ops ls1x_pll_clk_ops = {
-	.enable = ls1x_pll_clk_enable,
-	.disable = ls1x_pll_clk_disable,
-	.recalc_rate = ls1x_pll_recalc_rate,
-};
-
-static struct clk *__init clk_register_pll(struct device *dev,
-					   const char *name,
-					   const char *parent_name,
-					   unsigned long flags)
-{
-	struct clk_hw *hw;
-	struct clk *clk;
-	struct clk_init_data init;
-
-	/* allocate the divider */
-	hw = kzalloc(sizeof(struct clk_hw), GFP_KERNEL);
-	if (!hw) {
-		pr_err("%s: could not allocate clk_hw\n", __func__);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	init.name = name;
-	init.ops = &ls1x_pll_clk_ops;
-	init.flags = flags | CLK_IS_BASIC;
-	init.parent_names = (parent_name ? &parent_name : NULL);
-	init.num_parents = (parent_name ? 1 : 0);
-	hw->init = &init;
-
-	/* register the clock */
-	clk = clk_register(dev, hw);
-
-	if (IS_ERR(clk))
-		kfree(hw);
-
-	return clk;
-}
-
-static const char * const cpu_parents[] = { "cpu_clk_div", "osc_33m_clk", };
-static const char * const ahb_parents[] = { "ahb_clk_div", "osc_33m_clk", };
-static const char * const dc_parents[] = { "dc_clk_div", "osc_33m_clk", };
-
-void __init ls1x_clk_init(void)
-{
-	struct clk *clk;
-
-	clk = clk_register_fixed_rate(NULL, "osc_33m_clk", NULL, 0, OSC);
-	clk_register_clkdev(clk, "osc_33m_clk", NULL);
-
-	/* clock derived from 33 MHz OSC clk */
-	clk = clk_register_pll(NULL, "pll_clk", "osc_33m_clk", 0);
-	clk_register_clkdev(clk, "pll_clk", NULL);
-
-	/* clock derived from PLL clk */
-	/*                                 _____
-	 *         _______________________|     |
-	 * OSC ___/                       | MUX |___ CPU CLK
-	 *        \___ PLL ___ CPU DIV ___|     |
-	 *                                |_____|
-	 */
-	clk = clk_register_divider(NULL, "cpu_clk_div", "pll_clk",
-				   CLK_GET_RATE_NOCACHE, LS1X_CLK_PLL_DIV,
-				   DIV_CPU_SHIFT, DIV_CPU_WIDTH,
-				   CLK_DIVIDER_ONE_BASED |
-				   CLK_DIVIDER_ROUND_CLOSEST, &_lock);
-	clk_register_clkdev(clk, "cpu_clk_div", NULL);
-	clk = clk_register_mux(NULL, "cpu_clk", cpu_parents,
-			       ARRAY_SIZE(cpu_parents),
-			       CLK_SET_RATE_NO_REPARENT, LS1X_CLK_PLL_DIV,
-			       BYPASS_CPU_SHIFT, BYPASS_CPU_WIDTH, 0, &_lock);
-	clk_register_clkdev(clk, "cpu_clk", NULL);
-
-	/*                                 _____
-	 *         _______________________|     |
-	 * OSC ___/                       | MUX |___ DC  CLK
-	 *        \___ PLL ___ DC  DIV ___|     |
-	 *                                |_____|
-	 */
-	clk = clk_register_divider(NULL, "dc_clk_div", "pll_clk",
-				   0, LS1X_CLK_PLL_DIV, DIV_DC_SHIFT,
-				   DIV_DC_WIDTH, CLK_DIVIDER_ONE_BASED, &_lock);
-	clk_register_clkdev(clk, "dc_clk_div", NULL);
-	clk = clk_register_mux(NULL, "dc_clk", dc_parents,
-			       ARRAY_SIZE(dc_parents),
-			       CLK_SET_RATE_NO_REPARENT, LS1X_CLK_PLL_DIV,
-			       BYPASS_DC_SHIFT, BYPASS_DC_WIDTH, 0, &_lock);
-	clk_register_clkdev(clk, "dc_clk", NULL);
-
-	/*                                 _____
-	 *         _______________________|     |
-	 * OSC ___/                       | MUX |___ DDR CLK
-	 *        \___ PLL ___ DDR DIV ___|     |
-	 *                                |_____|
-	 */
-	clk = clk_register_divider(NULL, "ahb_clk_div", "pll_clk",
-				   0, LS1X_CLK_PLL_DIV, DIV_DDR_SHIFT,
-				   DIV_DDR_WIDTH, CLK_DIVIDER_ONE_BASED,
-				   &_lock);
-	clk_register_clkdev(clk, "ahb_clk_div", NULL);
-	clk = clk_register_mux(NULL, "ahb_clk", ahb_parents,
-			       ARRAY_SIZE(ahb_parents),
-			       CLK_SET_RATE_NO_REPARENT, LS1X_CLK_PLL_DIV,
-			       BYPASS_DDR_SHIFT, BYPASS_DDR_WIDTH, 0, &_lock);
-	clk_register_clkdev(clk, "ahb_clk", NULL);
-	clk_register_clkdev(clk, "stmmaceth", NULL);
-
-	/* clock derived from AHB clk */
-	/* APB clk is always half of the AHB clk */
-	clk = clk_register_fixed_factor(NULL, "apb_clk", "ahb_clk", 0, 1,
-					DIV_APB);
-	clk_register_clkdev(clk, "apb_clk", NULL);
-	clk_register_clkdev(clk, "ls1x_i2c", NULL);
-	clk_register_clkdev(clk, "ls1x_pwmtimer", NULL);
-	clk_register_clkdev(clk, "ls1x_spi", NULL);
-	clk_register_clkdev(clk, "ls1x_wdt", NULL);
-	clk_register_clkdev(clk, "serial8250", NULL);
-}
diff --git a/drivers/clk/clk-max-gen.c b/drivers/clk/clk-max-gen.c
deleted file mode 100644
index 35af9cb..0000000
--- a/drivers/clk/clk-max-gen.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * clk-max-gen.c - Generic clock driver for Maxim PMICs clocks
- *
- * Copyright (C) 2014 Google, Inc
- *
- * Copyright (C) 2012 Samsung Electornics
- * Jonghwa Lee <jonghwa3.lee@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver is based on clk-max77686.c
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/regmap.h>
-#include <linux/platform_device.h>
-#include <linux/clk-provider.h>
-#include <linux/mutex.h>
-#include <linux/clkdev.h>
-#include <linux/of.h>
-#include <linux/export.h>
-
-#include "clk-max-gen.h"
-
-struct max_gen_clk {
-	struct regmap *regmap;
-	u32 mask;
-	u32 reg;
-	struct clk_hw hw;
-};
-
-static struct max_gen_clk *to_max_gen_clk(struct clk_hw *hw)
-{
-	return container_of(hw, struct max_gen_clk, hw);
-}
-
-static int max_gen_clk_prepare(struct clk_hw *hw)
-{
-	struct max_gen_clk *max_gen = to_max_gen_clk(hw);
-
-	return regmap_update_bits(max_gen->regmap, max_gen->reg,
-				  max_gen->mask, max_gen->mask);
-}
-
-static void max_gen_clk_unprepare(struct clk_hw *hw)
-{
-	struct max_gen_clk *max_gen = to_max_gen_clk(hw);
-
-	regmap_update_bits(max_gen->regmap, max_gen->reg,
-			   max_gen->mask, ~max_gen->mask);
-}
-
-static int max_gen_clk_is_prepared(struct clk_hw *hw)
-{
-	struct max_gen_clk *max_gen = to_max_gen_clk(hw);
-	int ret;
-	u32 val;
-
-	ret = regmap_read(max_gen->regmap, max_gen->reg, &val);
-
-	if (ret < 0)
-		return -EINVAL;
-
-	return val & max_gen->mask;
-}
-
-static unsigned long max_gen_recalc_rate(struct clk_hw *hw,
-					 unsigned long parent_rate)
-{
-	return 32768;
-}
-
-struct clk_ops max_gen_clk_ops = {
-	.prepare	= max_gen_clk_prepare,
-	.unprepare	= max_gen_clk_unprepare,
-	.is_prepared	= max_gen_clk_is_prepared,
-	.recalc_rate	= max_gen_recalc_rate,
-};
-EXPORT_SYMBOL_GPL(max_gen_clk_ops);
-
-static struct clk *max_gen_clk_register(struct device *dev,
-					struct max_gen_clk *max_gen)
-{
-	struct clk *clk;
-	struct clk_hw *hw = &max_gen->hw;
-	int ret;
-
-	clk = devm_clk_register(dev, hw);
-	if (IS_ERR(clk))
-		return clk;
-
-	ret = clk_register_clkdev(clk, hw->init->name, NULL);
-
-	if (ret)
-		return ERR_PTR(ret);
-
-	return clk;
-}
-
-int max_gen_clk_probe(struct platform_device *pdev, struct regmap *regmap,
-		      u32 reg, struct clk_init_data *clks_init, int num_init)
-{
-	int i, ret;
-	struct max_gen_clk *max_gen_clks;
-	struct clk **clocks;
-	struct device *dev = pdev->dev.parent;
-	const char *clk_name;
-	struct clk_init_data *init;
-
-	clocks = devm_kzalloc(dev, sizeof(struct clk *) * num_init, GFP_KERNEL);
-	if (!clocks)
-		return -ENOMEM;
-
-	max_gen_clks = devm_kzalloc(dev, sizeof(struct max_gen_clk)
-				    * num_init, GFP_KERNEL);
-	if (!max_gen_clks)
-		return -ENOMEM;
-
-	for (i = 0; i < num_init; i++) {
-		max_gen_clks[i].regmap = regmap;
-		max_gen_clks[i].mask = 1 << i;
-		max_gen_clks[i].reg = reg;
-
-		init = devm_kzalloc(dev, sizeof(*init), GFP_KERNEL);
-		if (!init)
-			return -ENOMEM;
-
-		if (dev->of_node &&
-		    !of_property_read_string_index(dev->of_node,
-						   "clock-output-names",
-						   i, &clk_name))
-			init->name = clk_name;
-		else
-			init->name = clks_init[i].name;
-
-		init->ops = clks_init[i].ops;
-		init->flags = clks_init[i].flags;
-
-		max_gen_clks[i].hw.init = init;
-
-		clocks[i] = max_gen_clk_register(dev, &max_gen_clks[i]);
-		if (IS_ERR(clocks[i])) {
-			ret = PTR_ERR(clocks[i]);
-			dev_err(dev, "failed to register %s\n",
-				max_gen_clks[i].hw.init->name);
-			return ret;
-		}
-	}
-
-	platform_set_drvdata(pdev, clocks);
-
-	if (dev->of_node) {
-		struct clk_onecell_data *of_data;
-
-		of_data = devm_kzalloc(dev, sizeof(*of_data), GFP_KERNEL);
-		if (!of_data)
-			return -ENOMEM;
-
-		of_data->clks = clocks;
-		of_data->clk_num = num_init;
-		ret = of_clk_add_provider(dev->of_node, of_clk_src_onecell_get,
-					  of_data);
-
-		if (ret) {
-			dev_err(dev, "failed to register OF clock provider\n");
-			return ret;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(max_gen_clk_probe);
-
-int max_gen_clk_remove(struct platform_device *pdev, int num_init)
-{
-	struct device *dev = pdev->dev.parent;
-
-	if (dev->of_node)
-		of_clk_del_provider(dev->of_node);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(max_gen_clk_remove);
diff --git a/drivers/clk/clk-max-gen.h b/drivers/clk/clk-max-gen.h
deleted file mode 100644
index 997e86f..0000000
--- a/drivers/clk/clk-max-gen.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * clk-max-gen.h - Generic clock driver for Maxim PMICs clocks
- *
- * Copyright (C) 2014 Google, Inc
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __CLK_MAX_GEN_H__
-#define __CLK_MAX_GEN_H__
-
-#include <linux/types.h>
-#include <linux/device.h>
-#include <linux/clkdev.h>
-#include <linux/regmap.h>
-#include <linux/platform_device.h>
-
-int max_gen_clk_probe(struct platform_device *pdev, struct regmap *regmap,
-		      u32 reg, struct clk_init_data *clks_init, int num_init);
-int max_gen_clk_remove(struct platform_device *pdev, int num_init);
-extern struct clk_ops max_gen_clk_ops;
-
-#endif /* __CLK_MAX_GEN_H__ */
diff --git a/drivers/clk/clk-max77686.c b/drivers/clk/clk-max77686.c
index 9b6f277..b637f59 100644
--- a/drivers/clk/clk-max77686.c
+++ b/drivers/clk/clk-max77686.c
@@ -1,5 +1,5 @@
 /*
- * clk-max77686.c - Clock driver for Maxim 77686
+ * clk-max77686.c - Clock driver for Maxim 77686/MAX77802
  *
  * Copyright (C) 2012 Samsung Electornics
  * Jonghwa Lee <jonghwa3.lee@samsung.com>
@@ -25,46 +25,284 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/mfd/max77620.h>
 #include <linux/mfd/max77686.h>
 #include <linux/mfd/max77686-private.h>
 #include <linux/clk-provider.h>
 #include <linux/mutex.h>
 #include <linux/clkdev.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
 
 #include <dt-bindings/clock/maxim,max77686.h>
-#include "clk-max-gen.h"
+#include <dt-bindings/clock/maxim,max77802.h>
+#include <dt-bindings/clock/maxim,max77620.h>
 
-static struct clk_init_data max77686_clks_init[MAX77686_CLKS_NUM] = {
+#define MAX77802_CLOCK_LOW_JITTER_SHIFT 0x3
+
+enum max77686_chip_name {
+	CHIP_MAX77686,
+	CHIP_MAX77802,
+	CHIP_MAX77620,
+};
+
+struct max77686_hw_clk_info {
+	const char *name;
+	u32 clk_reg;
+	u32 clk_enable_mask;
+	u32 flags;
+};
+
+struct max77686_clk_init_data {
+	struct regmap *regmap;
+	struct clk_hw hw;
+	struct clk_init_data clk_idata;
+	const struct max77686_hw_clk_info *clk_info;
+};
+
+struct max77686_clk_driver_data {
+	enum max77686_chip_name chip;
+	struct max77686_clk_init_data *max_clk_data;
+	size_t num_clks;
+};
+
+static const struct
+max77686_hw_clk_info max77686_hw_clks_info[MAX77686_CLKS_NUM] = {
 	[MAX77686_CLK_AP] = {
 		.name = "32khz_ap",
-		.ops = &max_gen_clk_ops,
+		.clk_reg = MAX77686_REG_32KHZ,
+		.clk_enable_mask = BIT(MAX77686_CLK_AP),
 	},
 	[MAX77686_CLK_CP] = {
 		.name = "32khz_cp",
-		.ops = &max_gen_clk_ops,
+		.clk_reg = MAX77686_REG_32KHZ,
+		.clk_enable_mask = BIT(MAX77686_CLK_CP),
 	},
 	[MAX77686_CLK_PMIC] = {
 		.name = "32khz_pmic",
-		.ops = &max_gen_clk_ops,
+		.clk_reg = MAX77686_REG_32KHZ,
+		.clk_enable_mask = BIT(MAX77686_CLK_PMIC),
 	},
 };
 
+static const struct
+max77686_hw_clk_info max77802_hw_clks_info[MAX77802_CLKS_NUM] = {
+	[MAX77802_CLK_32K_AP] = {
+		.name = "32khz_ap",
+		.clk_reg = MAX77802_REG_32KHZ,
+		.clk_enable_mask = BIT(MAX77802_CLK_32K_AP),
+	},
+	[MAX77802_CLK_32K_CP] = {
+		.name = "32khz_cp",
+		.clk_reg = MAX77802_REG_32KHZ,
+		.clk_enable_mask = BIT(MAX77802_CLK_32K_CP),
+	},
+};
+
+static const struct
+max77686_hw_clk_info max77620_hw_clks_info[MAX77620_CLKS_NUM] = {
+	[MAX77620_CLK_32K_OUT0] = {
+		.name = "32khz_out0",
+		.clk_reg = MAX77620_REG_CNFG1_32K,
+		.clk_enable_mask = MAX77620_CNFG1_32K_OUT0_EN,
+	},
+};
+
+static struct max77686_clk_init_data *to_max77686_clk_init_data(
+				struct clk_hw *hw)
+{
+	return container_of(hw, struct max77686_clk_init_data, hw);
+}
+
+static int max77686_clk_prepare(struct clk_hw *hw)
+{
+	struct max77686_clk_init_data *max77686 = to_max77686_clk_init_data(hw);
+
+	return regmap_update_bits(max77686->regmap, max77686->clk_info->clk_reg,
+				  max77686->clk_info->clk_enable_mask,
+				  max77686->clk_info->clk_enable_mask);
+}
+
+static void max77686_clk_unprepare(struct clk_hw *hw)
+{
+	struct max77686_clk_init_data *max77686 = to_max77686_clk_init_data(hw);
+
+	regmap_update_bits(max77686->regmap, max77686->clk_info->clk_reg,
+			   max77686->clk_info->clk_enable_mask,
+			   ~max77686->clk_info->clk_enable_mask);
+}
+
+static int max77686_clk_is_prepared(struct clk_hw *hw)
+{
+	struct max77686_clk_init_data *max77686 = to_max77686_clk_init_data(hw);
+	int ret;
+	u32 val;
+
+	ret = regmap_read(max77686->regmap, max77686->clk_info->clk_reg, &val);
+
+	if (ret < 0)
+		return -EINVAL;
+
+	return val & max77686->clk_info->clk_enable_mask;
+}
+
+static unsigned long max77686_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	return 32768;
+}
+
+static struct clk_ops max77686_clk_ops = {
+	.prepare	= max77686_clk_prepare,
+	.unprepare	= max77686_clk_unprepare,
+	.is_prepared	= max77686_clk_is_prepared,
+	.recalc_rate	= max77686_recalc_rate,
+};
+
+static struct clk_hw *
+of_clk_max77686_get(struct of_phandle_args *clkspec, void *data)
+{
+	struct max77686_clk_driver_data *drv_data = data;
+	unsigned int idx = clkspec->args[0];
+
+	if (idx >= drv_data->num_clks) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return &drv_data->max_clk_data[idx].hw;
+}
+
 static int max77686_clk_probe(struct platform_device *pdev)
 {
-	struct max77686_dev *iodev = dev_get_drvdata(pdev->dev.parent);
+	struct device *dev = &pdev->dev;
+	struct device *parent = dev->parent;
+	const struct platform_device_id *id = platform_get_device_id(pdev);
+	struct max77686_clk_driver_data *drv_data;
+	const struct max77686_hw_clk_info *hw_clks;
+	struct regmap *regmap;
+	int i, ret, num_clks;
 
-	return max_gen_clk_probe(pdev, iodev->regmap, MAX77686_REG_32KHZ,
-				 max77686_clks_init, MAX77686_CLKS_NUM);
+	drv_data = devm_kzalloc(dev, sizeof(*drv_data), GFP_KERNEL);
+	if (!drv_data)
+		return -ENOMEM;
+
+	regmap = dev_get_regmap(parent, NULL);
+	if (!regmap) {
+		dev_err(dev, "Failed to get rtc regmap\n");
+		return -ENODEV;
+	}
+
+	drv_data->chip = id->driver_data;
+
+	switch (drv_data->chip) {
+	case CHIP_MAX77686:
+		num_clks = MAX77686_CLKS_NUM;
+		hw_clks = max77686_hw_clks_info;
+		break;
+
+	case CHIP_MAX77802:
+		num_clks = MAX77802_CLKS_NUM;
+		hw_clks = max77802_hw_clks_info;
+		break;
+
+	case CHIP_MAX77620:
+		num_clks = MAX77620_CLKS_NUM;
+		hw_clks = max77620_hw_clks_info;
+		break;
+
+	default:
+		dev_err(dev, "Unknown Chip ID\n");
+		return -EINVAL;
+	}
+
+	drv_data->max_clk_data = devm_kcalloc(dev, num_clks,
+					      sizeof(*drv_data->max_clk_data),
+					      GFP_KERNEL);
+	if (!drv_data->max_clk_data)
+		return -ENOMEM;
+
+	for (i = 0; i < num_clks; i++) {
+		struct max77686_clk_init_data *max_clk_data;
+		const char *clk_name;
+
+		max_clk_data = &drv_data->max_clk_data[i];
+
+		max_clk_data->regmap = regmap;
+		max_clk_data->clk_info = &hw_clks[i];
+		max_clk_data->clk_idata.flags = hw_clks[i].flags;
+		max_clk_data->clk_idata.ops = &max77686_clk_ops;
+
+		if (parent->of_node &&
+		    !of_property_read_string_index(parent->of_node,
+						   "clock-output-names",
+						   i, &clk_name))
+			max_clk_data->clk_idata.name = clk_name;
+		else
+			max_clk_data->clk_idata.name = hw_clks[i].name;
+
+		max_clk_data->hw.init = &max_clk_data->clk_idata;
+
+		ret = devm_clk_hw_register(dev, &max_clk_data->hw);
+		if (ret) {
+			dev_err(dev, "Failed to clock register: %d\n", ret);
+			return ret;
+		}
+
+		ret = clk_hw_register_clkdev(&max_clk_data->hw,
+					     max_clk_data->clk_idata.name, NULL);
+		if (ret < 0) {
+			dev_err(dev, "Failed to clkdev register: %d\n", ret);
+			return ret;
+		}
+	}
+
+	if (parent->of_node) {
+		ret = of_clk_add_hw_provider(parent->of_node, of_clk_max77686_get,
+					     drv_data);
+
+		if (ret < 0) {
+			dev_err(dev, "Failed to register OF clock provider: %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	/* MAX77802: Enable low-jitter mode on the 32khz clocks. */
+	if (drv_data->chip == CHIP_MAX77802) {
+		ret = regmap_update_bits(regmap, MAX77802_REG_32KHZ,
+					 1 << MAX77802_CLOCK_LOW_JITTER_SHIFT,
+					 1 << MAX77802_CLOCK_LOW_JITTER_SHIFT);
+		if (ret < 0) {
+			dev_err(dev, "Failed to config low-jitter: %d\n", ret);
+			goto remove_of_clk_provider;
+		}
+	}
+
+	return 0;
+
+remove_of_clk_provider:
+	if (parent->of_node)
+		of_clk_del_provider(parent->of_node);
+
+	return ret;
 }
 
 static int max77686_clk_remove(struct platform_device *pdev)
 {
-	return max_gen_clk_remove(pdev, MAX77686_CLKS_NUM);
+	struct device *parent = pdev->dev.parent;
+
+	if (parent->of_node)
+		of_clk_del_provider(parent->of_node);
+
+	return 0;
 }
 
 static const struct platform_device_id max77686_clk_id[] = {
-	{ "max77686-clk", 0},
-	{ },
+	{ "max77686-clk", .driver_data = CHIP_MAX77686, },
+	{ "max77802-clk", .driver_data = CHIP_MAX77802, },
+	{ "max77620-clock", .driver_data = CHIP_MAX77620, },
+	{},
 };
 MODULE_DEVICE_TABLE(platform, max77686_clk_id);
 
diff --git a/drivers/clk/clk-max77802.c b/drivers/clk/clk-max77802.c
deleted file mode 100644
index 355dd2e..0000000
--- a/drivers/clk/clk-max77802.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * clk-max77802.c - Clock driver for Maxim 77802
- *
- * Copyright (C) 2014 Google, Inc
- *
- * Copyright (C) 2012 Samsung Electornics
- * Jonghwa Lee <jonghwa3.lee@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver is based on clk-max77686.c
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/mfd/max77686-private.h>
-#include <linux/clk-provider.h>
-#include <linux/mutex.h>
-#include <linux/clkdev.h>
-
-#include <dt-bindings/clock/maxim,max77802.h>
-#include "clk-max-gen.h"
-
-#define MAX77802_CLOCK_OPMODE_MASK	0x1
-#define MAX77802_CLOCK_LOW_JITTER_SHIFT 0x3
-
-static struct clk_init_data max77802_clks_init[MAX77802_CLKS_NUM] = {
-	[MAX77802_CLK_32K_AP] = {
-		.name = "32khz_ap",
-		.ops = &max_gen_clk_ops,
-	},
-	[MAX77802_CLK_32K_CP] = {
-		.name = "32khz_cp",
-		.ops = &max_gen_clk_ops,
-	},
-};
-
-static int max77802_clk_probe(struct platform_device *pdev)
-{
-	struct max77686_dev *iodev = dev_get_drvdata(pdev->dev.parent);
-	int ret;
-
-	ret = max_gen_clk_probe(pdev, iodev->regmap, MAX77802_REG_32KHZ,
-				max77802_clks_init, MAX77802_CLKS_NUM);
-
-	if (ret) {
-		dev_err(&pdev->dev, "generic probe failed %d\n", ret);
-		return ret;
-	}
-
-	/* Enable low-jitter mode on the 32khz clocks. */
-	ret = regmap_update_bits(iodev->regmap, MAX77802_REG_32KHZ,
-				 1 << MAX77802_CLOCK_LOW_JITTER_SHIFT,
-				 1 << MAX77802_CLOCK_LOW_JITTER_SHIFT);
-	if (ret < 0)
-		dev_err(&pdev->dev, "failed to enable low-jitter mode\n");
-
-	return ret;
-}
-
-static int max77802_clk_remove(struct platform_device *pdev)
-{
-	return max_gen_clk_remove(pdev, MAX77802_CLKS_NUM);
-}
-
-static const struct platform_device_id max77802_clk_id[] = {
-	{ "max77802-clk", 0},
-	{ },
-};
-MODULE_DEVICE_TABLE(platform, max77802_clk_id);
-
-static struct platform_driver max77802_clk_driver = {
-	.driver = {
-		.name  = "max77802-clk",
-	},
-	.probe = max77802_clk_probe,
-	.remove = max77802_clk_remove,
-	.id_table = max77802_clk_id,
-};
-
-module_platform_driver(max77802_clk_driver);
-
-MODULE_DESCRIPTION("MAXIM 77802 Clock Driver");
-MODULE_AUTHOR("Javier Martinez Canillas <javier@osg.samsung.com");
-MODULE_LICENSE("GPL");
diff --git a/drivers/clk/clk-mb86s7x.c b/drivers/clk/clk-mb86s7x.c
index e081775..2a83a3ff 100644
--- a/drivers/clk/clk-mb86s7x.c
+++ b/drivers/clk/clk-mb86s7x.c
@@ -327,10 +327,11 @@
 	.set_rate = clc_set_rate,
 };
 
-struct clk *mb86s7x_clclk_register(struct device *cpu_dev)
+static struct clk_hw *mb86s7x_clclk_register(struct device *cpu_dev)
 {
 	struct clk_init_data init;
 	struct cl_clk *clc;
+	int ret;
 
 	clc = kzalloc(sizeof(*clc), GFP_KERNEL);
 	if (!clc)
@@ -344,14 +345,17 @@
 	init.flags = CLK_GET_RATE_NOCACHE;
 	init.num_parents = 0;
 
-	return devm_clk_register(cpu_dev, &clc->hw);
+	ret = devm_clk_hw_register(cpu_dev, &clc->hw);
+	if (ret)
+		return ERR_PTR(ret);
+	return &clc->hw;
 }
 
 static int mb86s7x_clclk_of_init(void)
 {
 	int cpu, ret = -ENODEV;
 	struct device_node *np;
-	struct clk *clk;
+	struct clk_hw *hw;
 
 	np = of_find_compatible_node(NULL, NULL, "fujitsu,mb86s70-scb-1.0");
 	if (!np || !of_device_is_available(np))
@@ -365,12 +369,12 @@
 			continue;
 		}
 
-		clk = mb86s7x_clclk_register(cpu_dev);
-		if (IS_ERR(clk)) {
+		hw = mb86s7x_clclk_register(cpu_dev);
+		if (IS_ERR(hw)) {
 			pr_err("failed to register cpu%d clock\n", cpu);
 			continue;
 		}
-		if (clk_register_clkdev(clk, NULL, dev_name(cpu_dev))) {
+		if (clk_hw_register_clkdev(hw, NULL, dev_name(cpu_dev))) {
 			pr_err("failed to register cpu%d clock lookup\n", cpu);
 			continue;
 		}
diff --git a/drivers/clk/clk-moxart.c b/drivers/clk/clk-moxart.c
index f37f719..b86dac8 100644
--- a/drivers/clk/clk-moxart.c
+++ b/drivers/clk/clk-moxart.c
@@ -19,7 +19,8 @@
 static void __init moxart_of_pll_clk_init(struct device_node *node)
 {
 	static void __iomem *base;
-	struct clk *clk, *ref_clk;
+	struct clk_hw *hw;
+	struct clk *ref_clk;
 	unsigned int mul;
 	const char *name = node->name;
 	const char *parent_name;
@@ -42,14 +43,14 @@
 		return;
 	}
 
-	clk = clk_register_fixed_factor(NULL, name, parent_name, 0, mul, 1);
-	if (IS_ERR(clk)) {
+	hw = clk_hw_register_fixed_factor(NULL, name, parent_name, 0, mul, 1);
+	if (IS_ERR(hw)) {
 		pr_err("%s: failed to register clock\n", node->full_name);
 		return;
 	}
 
-	clk_register_clkdev(clk, NULL, name);
-	of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	clk_hw_register_clkdev(hw, NULL, name);
+	of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(moxart_pll_clock, "moxa,moxart-pll-clock",
 	       moxart_of_pll_clk_init);
@@ -57,7 +58,8 @@
 static void __init moxart_of_apb_clk_init(struct device_node *node)
 {
 	static void __iomem *base;
-	struct clk *clk, *pll_clk;
+	struct clk_hw *hw;
+	struct clk *pll_clk;
 	unsigned int div, val;
 	unsigned int div_idx[] = { 2, 3, 4, 6, 8};
 	const char *name = node->name;
@@ -85,14 +87,14 @@
 		return;
 	}
 
-	clk = clk_register_fixed_factor(NULL, name, parent_name, 0, 1, div);
-	if (IS_ERR(clk)) {
+	hw = clk_hw_register_fixed_factor(NULL, name, parent_name, 0, 1, div);
+	if (IS_ERR(hw)) {
 		pr_err("%s: failed to register clock\n", node->full_name);
 		return;
 	}
 
-	clk_register_clkdev(clk, NULL, name);
-	of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	clk_hw_register_clkdev(hw, NULL, name);
+	of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw);
 }
 CLK_OF_DECLARE(moxart_apb_clock, "moxa,moxart-apb-clock",
 	       moxart_of_apb_clk_init);
diff --git a/drivers/clk/clk-nspire.c b/drivers/clk/clk-nspire.c
index 64f196a..f861011 100644
--- a/drivers/clk/clk-nspire.c
+++ b/drivers/clk/clk-nspire.c
@@ -69,7 +69,7 @@
 {
 	u32 val;
 	void __iomem *io;
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *clk_name = node->name;
 	const char *parent_name;
 	struct nspire_clk_info info;
@@ -85,10 +85,10 @@
 	of_property_read_string(node, "clock-output-names", &clk_name);
 	parent_name = of_clk_get_parent_name(node, 0);
 
-	clk = clk_register_fixed_factor(NULL, clk_name, parent_name, 0,
-					1, info.base_ahb_ratio);
-	if (!IS_ERR(clk))
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	hw = clk_hw_register_fixed_factor(NULL, clk_name, parent_name, 0,
+					  1, info.base_ahb_ratio);
+	if (!IS_ERR(hw))
+		of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw);
 }
 
 static void __init nspire_ahbdiv_setup_cx(struct device_node *node)
@@ -111,7 +111,7 @@
 {
 	u32 val;
 	void __iomem *io;
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *clk_name = node->name;
 	struct nspire_clk_info info;
 
@@ -125,9 +125,10 @@
 
 	of_property_read_string(node, "clock-output-names", &clk_name);
 
-	clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, info.base_clock);
-	if (!IS_ERR(clk))
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	hw = clk_hw_register_fixed_rate(NULL, clk_name, NULL, 0,
+					info.base_clock);
+	if (!IS_ERR(hw))
+		of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw);
 	else
 		return;
 
diff --git a/drivers/clk/clk-palmas.c b/drivers/clk/clk-palmas.c
index 8328863..31f590c 100644
--- a/drivers/clk/clk-palmas.c
+++ b/drivers/clk/clk-palmas.c
@@ -41,7 +41,6 @@
 
 struct palmas_clock_info {
 	struct device *dev;
-	struct clk *clk;
 	struct clk_hw hw;
 	struct palmas *palmas;
 	const struct palmas_clk32k_desc *clk_desc;
@@ -218,7 +217,7 @@
 	}
 
 	if (cinfo->ext_control_pin) {
-		ret = clk_prepare(cinfo->clk);
+		ret = clk_prepare(cinfo->hw.clk);
 		if (ret < 0) {
 			dev_err(cinfo->dev, "Clock prep failed, %d\n", ret);
 			return ret;
@@ -242,7 +241,6 @@
 	struct device_node *node = pdev->dev.of_node;
 	const struct palmas_clks_of_match_data *match_data;
 	struct palmas_clock_info *cinfo;
-	struct clk *clk;
 	int ret;
 
 	match_data = of_device_get_match_data(&pdev->dev);
@@ -261,22 +259,20 @@
 
 	cinfo->clk_desc = &match_data->desc;
 	cinfo->hw.init = &match_data->init;
-	clk = devm_clk_register(&pdev->dev, &cinfo->hw);
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
+	ret = devm_clk_hw_register(&pdev->dev, &cinfo->hw);
+	if (ret) {
 		dev_err(&pdev->dev, "Fail to register clock %s, %d\n",
 			match_data->desc.clk_name, ret);
 		return ret;
 	}
 
-	cinfo->clk = clk;
 	ret = palmas_clks_init_configure(cinfo);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "Clock config failed, %d\n", ret);
 		return ret;
 	}
 
-	ret = of_clk_add_provider(node, of_clk_src_simple_get, cinfo->clk);
+	ret = of_clk_add_hw_provider(node, of_clk_hw_simple_get, &cinfo->hw);
 	if (ret < 0)
 		dev_err(&pdev->dev, "Fail to add clock driver, %d\n", ret);
 	return ret;
diff --git a/drivers/clk/clk-pwm.c b/drivers/clk/clk-pwm.c
index 1630a1f..8cb9d11 100644
--- a/drivers/clk/clk-pwm.c
+++ b/drivers/clk/clk-pwm.c
@@ -61,7 +61,6 @@
 	struct pwm_device *pwm;
 	struct pwm_args pargs;
 	const char *clk_name;
-	struct clk *clk;
 	int ret;
 
 	clk_pwm = devm_kzalloc(&pdev->dev, sizeof(*clk_pwm), GFP_KERNEL);
@@ -107,11 +106,11 @@
 
 	clk_pwm->pwm = pwm;
 	clk_pwm->hw.init = &init;
-	clk = devm_clk_register(&pdev->dev, &clk_pwm->hw);
-	if (IS_ERR(clk))
-		return PTR_ERR(clk);
+	ret = devm_clk_hw_register(&pdev->dev, &clk_pwm->hw);
+	if (ret)
+		return ret;
 
-	return of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	return of_clk_add_hw_provider(node, of_clk_hw_simple_get, &clk_pwm->hw);
 }
 
 static int clk_pwm_remove(struct platform_device *pdev)
diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c
index 58566a17..20b1055 100644
--- a/drivers/clk/clk-qoriq.c
+++ b/drivers/clk/clk-qoriq.c
@@ -766,7 +766,11 @@
 	if (!hwc)
 		return NULL;
 
-	hwc->reg = cg->regs + 0x20 * idx;
+	if (cg->info.flags & CG_VER3)
+		hwc->reg = cg->regs + 0x70000 + 0x20 * idx;
+	else
+		hwc->reg = cg->regs + 0x20 * idx;
+
 	hwc->info = cg->info.cmux_groups[cg->info.cmux_to_group[idx]];
 
 	/*
diff --git a/drivers/clk/clk-rk808.c b/drivers/clk/clk-rk808.c
index 7438303..6461f28 100644
--- a/drivers/clk/clk-rk808.c
+++ b/drivers/clk/clk-rk808.c
@@ -22,11 +22,8 @@
 #include <linux/mfd/rk808.h>
 #include <linux/i2c.h>
 
-#define RK808_NR_OUTPUT 2
-
 struct rk808_clkout {
 	struct rk808 *rk808;
-	struct clk_onecell_data clk_data;
 	struct clk_hw		clkout1_hw;
 	struct clk_hw		clkout2_hw;
 };
@@ -85,14 +82,28 @@
 	.recalc_rate = rk808_clkout_recalc_rate,
 };
 
+static struct clk_hw *
+of_clk_rk808_get(struct of_phandle_args *clkspec, void *data)
+{
+	struct rk808_clkout *rk808_clkout = data;
+	unsigned int idx = clkspec->args[0];
+
+	if (idx >= 2) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return idx ? &rk808_clkout->clkout2_hw : &rk808_clkout->clkout1_hw;
+}
+
 static int rk808_clkout_probe(struct platform_device *pdev)
 {
 	struct rk808 *rk808 = dev_get_drvdata(pdev->dev.parent);
 	struct i2c_client *client = rk808->i2c;
 	struct device_node *node = client->dev.of_node;
 	struct clk_init_data init = {};
-	struct clk **clk_table;
 	struct rk808_clkout *rk808_clkout;
+	int ret;
 
 	rk808_clkout = devm_kzalloc(&client->dev,
 				    sizeof(*rk808_clkout), GFP_KERNEL);
@@ -101,11 +112,6 @@
 
 	rk808_clkout->rk808 = rk808;
 
-	clk_table = devm_kcalloc(&client->dev, RK808_NR_OUTPUT,
-				 sizeof(struct clk *), GFP_KERNEL);
-	if (!clk_table)
-		return -ENOMEM;
-
 	init.parent_names = NULL;
 	init.num_parents = 0;
 	init.name = "rk808-clkout1";
@@ -116,10 +122,9 @@
 	of_property_read_string_index(node, "clock-output-names",
 				      0, &init.name);
 
-	clk_table[0] = devm_clk_register(&client->dev,
-					 &rk808_clkout->clkout1_hw);
-	if (IS_ERR(clk_table[0]))
-		return PTR_ERR(clk_table[0]);
+	ret = devm_clk_hw_register(&client->dev, &rk808_clkout->clkout1_hw);
+	if (ret)
+		return ret;
 
 	init.name = "rk808-clkout2";
 	init.ops = &rk808_clkout2_ops;
@@ -129,16 +134,11 @@
 	of_property_read_string_index(node, "clock-output-names",
 				      1, &init.name);
 
-	clk_table[1] = devm_clk_register(&client->dev,
-					 &rk808_clkout->clkout2_hw);
-	if (IS_ERR(clk_table[1]))
-		return PTR_ERR(clk_table[1]);
+	ret = devm_clk_hw_register(&client->dev, &rk808_clkout->clkout2_hw);
+	if (ret)
+		return ret;
 
-	rk808_clkout->clk_data.clks = clk_table;
-	rk808_clkout->clk_data.clk_num = RK808_NR_OUTPUT;
-
-	return of_clk_add_provider(node, of_clk_src_onecell_get,
-				   &rk808_clkout->clk_data);
+	return of_clk_add_hw_provider(node, of_clk_rk808_get, rk808_clkout);
 }
 
 static int rk808_clkout_remove(struct platform_device *pdev)
diff --git a/drivers/clk/clk-scpi.c b/drivers/clk/clk-scpi.c
index 6962ee5..2a3e9d8 100644
--- a/drivers/clk/clk-scpi.c
+++ b/drivers/clk/clk-scpi.c
@@ -146,13 +146,13 @@
 	{}
 };
 
-static struct clk *
+static int
 scpi_clk_ops_init(struct device *dev, const struct of_device_id *match,
 		  struct scpi_clk *sclk, const char *name)
 {
 	struct clk_init_data init;
-	struct clk *clk;
 	unsigned long min = 0, max = 0;
+	int ret;
 
 	init.name = name;
 	init.flags = 0;
@@ -164,18 +164,18 @@
 	if (init.ops == &scpi_dvfs_ops) {
 		sclk->info = sclk->scpi_ops->dvfs_get_info(sclk->id);
 		if (IS_ERR(sclk->info))
-			return NULL;
+			return PTR_ERR(sclk->info);
 	} else if (init.ops == &scpi_clk_ops) {
 		if (sclk->scpi_ops->clk_get_range(sclk->id, &min, &max) || !max)
-			return NULL;
+			return -EINVAL;
 	} else {
-		return NULL;
+		return -EINVAL;
 	}
 
-	clk = devm_clk_register(dev, &sclk->hw);
-	if (!IS_ERR(clk) && max)
+	ret = devm_clk_hw_register(dev, &sclk->hw);
+	if (!ret && max)
 		clk_hw_set_rate_range(&sclk->hw, min, max);
-	return clk;
+	return ret;
 }
 
 struct scpi_clk_data {
@@ -183,7 +183,7 @@
 	unsigned int clk_num;
 };
 
-static struct clk *
+static struct clk_hw *
 scpi_of_clk_src_get(struct of_phandle_args *clkspec, void *data)
 {
 	struct scpi_clk *sclk;
@@ -193,7 +193,7 @@
 	for (count = 0; count < clk_data->clk_num; count++) {
 		sclk = clk_data->clk[count];
 		if (idx == sclk->id)
-			return sclk->hw.clk;
+			return &sclk->hw;
 	}
 
 	return ERR_PTR(-EINVAL);
@@ -202,8 +202,7 @@
 static int scpi_clk_add(struct device *dev, struct device_node *np,
 			const struct of_device_id *match)
 {
-	struct clk **clks;
-	int idx, count;
+	int idx, count, err;
 	struct scpi_clk_data *clk_data;
 
 	count = of_property_count_strings(np, "clock-output-names");
@@ -222,10 +221,6 @@
 	if (!clk_data->clk)
 		return -ENOMEM;
 
-	clks = devm_kcalloc(dev, count, sizeof(*clks), GFP_KERNEL);
-	if (!clks)
-		return -ENOMEM;
-
 	for (idx = 0; idx < count; idx++) {
 		struct scpi_clk *sclk;
 		const char *name;
@@ -249,15 +244,15 @@
 
 		sclk->id = val;
 
-		clks[idx] = scpi_clk_ops_init(dev, match, sclk, name);
-		if (IS_ERR_OR_NULL(clks[idx]))
+		err = scpi_clk_ops_init(dev, match, sclk, name);
+		if (err)
 			dev_err(dev, "failed to register clock '%s'\n", name);
 		else
 			dev_dbg(dev, "Registered clock '%s'\n", name);
 		clk_data->clk[idx] = sclk;
 	}
 
-	return of_clk_add_provider(np, scpi_of_clk_src_get, clk_data);
+	return of_clk_add_hw_provider(np, scpi_of_clk_src_get, clk_data);
 }
 
 static int scpi_clocks_remove(struct platform_device *pdev)
diff --git a/drivers/clk/clk-si514.c b/drivers/clk/clk-si514.c
index ceef25b..09b6718 100644
--- a/drivers/clk/clk-si514.c
+++ b/drivers/clk/clk-si514.c
@@ -305,7 +305,6 @@
 {
 	struct clk_si514 *data;
 	struct clk_init_data init;
-	struct clk *clk;
 	int err;
 
 	data = devm_kzalloc(&client->dev, sizeof(*data), GFP_KERNEL);
@@ -330,13 +329,13 @@
 
 	i2c_set_clientdata(client, data);
 
-	clk = devm_clk_register(&client->dev, &data->hw);
-	if (IS_ERR(clk)) {
+	err = devm_clk_hw_register(&client->dev, &data->hw);
+	if (err) {
 		dev_err(&client->dev, "clock registration failed\n");
-		return PTR_ERR(clk);
+		return err;
 	}
-	err = of_clk_add_provider(client->dev.of_node, of_clk_src_simple_get,
-			clk);
+	err = of_clk_add_hw_provider(client->dev.of_node, of_clk_hw_simple_get,
+				     &data->hw);
 	if (err) {
 		dev_err(&client->dev, "unable to add clk provider\n");
 		return err;
diff --git a/drivers/clk/clk-si5351.c b/drivers/clk/clk-si5351.c
index b1bc12c..b051db4 100644
--- a/drivers/clk/clk-si5351.c
+++ b/drivers/clk/clk-si5351.c
@@ -54,7 +54,6 @@
 	enum si5351_variant	variant;
 	struct i2c_client	*client;
 	struct regmap		*regmap;
-	struct clk_onecell_data onecell;
 
 	struct clk		*pxtal;
 	const char		*pxtal_name;
@@ -66,6 +65,7 @@
 	struct si5351_hw_data	pll[2];
 	struct si5351_hw_data	*msynth;
 	struct si5351_hw_data	*clkout;
+	size_t			num_clkout;
 };
 
 static const char * const si5351_input_names[] = {
@@ -1307,11 +1307,31 @@
 	of_node_put(child);
 	return -EINVAL;
 }
+
+static struct clk_hw *
+si53351_of_clk_get(struct of_phandle_args *clkspec, void *data)
+{
+	struct si5351_driver_data *drvdata = data;
+	unsigned int idx = clkspec->args[0];
+
+	if (idx >= drvdata->num_clkout) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return &drvdata->clkout[idx].hw;
+}
 #else
 static int si5351_dt_parse(struct i2c_client *client, enum si5351_variant variant)
 {
 	return 0;
 }
+
+static struct clk_hw *
+si53351_of_clk_get(struct of_phandle_args *clkspec, void *data)
+{
+	return NULL;
+}
 #endif /* CONFIG_OF */
 
 static int si5351_i2c_probe(struct i2c_client *client,
@@ -1321,7 +1341,6 @@
 	struct si5351_platform_data *pdata;
 	struct si5351_driver_data *drvdata;
 	struct clk_init_data init;
-	struct clk *clk;
 	const char *parent_names[4];
 	u8 num_parents, num_clocks;
 	int ret, n;
@@ -1438,10 +1457,9 @@
 		init.num_parents = 1;
 	}
 	drvdata->xtal.init = &init;
-	clk = devm_clk_register(&client->dev, &drvdata->xtal);
-	if (IS_ERR(clk)) {
+	ret = devm_clk_hw_register(&client->dev, &drvdata->xtal);
+	if (ret) {
 		dev_err(&client->dev, "unable to register %s\n", init.name);
-		ret = PTR_ERR(clk);
 		goto err_clk;
 	}
 
@@ -1456,11 +1474,10 @@
 			init.num_parents = 1;
 		}
 		drvdata->clkin.init = &init;
-		clk = devm_clk_register(&client->dev, &drvdata->clkin);
-		if (IS_ERR(clk)) {
+		ret = devm_clk_hw_register(&client->dev, &drvdata->clkin);
+		if (ret) {
 			dev_err(&client->dev, "unable to register %s\n",
 				init.name);
-			ret = PTR_ERR(clk);
 			goto err_clk;
 		}
 	}
@@ -1480,10 +1497,9 @@
 	init.flags = 0;
 	init.parent_names = parent_names;
 	init.num_parents = num_parents;
-	clk = devm_clk_register(&client->dev, &drvdata->pll[0].hw);
-	if (IS_ERR(clk)) {
+	ret = devm_clk_hw_register(&client->dev, &drvdata->pll[0].hw);
+	if (ret) {
 		dev_err(&client->dev, "unable to register %s\n", init.name);
-		ret = PTR_ERR(clk);
 		goto err_clk;
 	}
 
@@ -1505,10 +1521,9 @@
 		init.parent_names = parent_names;
 		init.num_parents = num_parents;
 	}
-	clk = devm_clk_register(&client->dev, &drvdata->pll[1].hw);
-	if (IS_ERR(clk)) {
+	ret = devm_clk_hw_register(&client->dev, &drvdata->pll[1].hw);
+	if (ret) {
 		dev_err(&client->dev, "unable to register %s\n", init.name);
-		ret = PTR_ERR(clk);
 		goto err_clk;
 	}
 
@@ -1524,13 +1539,9 @@
 				       sizeof(*drvdata->msynth), GFP_KERNEL);
 	drvdata->clkout = devm_kzalloc(&client->dev, num_clocks *
 				       sizeof(*drvdata->clkout), GFP_KERNEL);
+	drvdata->num_clkout = num_clocks;
 
-	drvdata->onecell.clk_num = num_clocks;
-	drvdata->onecell.clks = devm_kzalloc(&client->dev,
-		num_clocks * sizeof(*drvdata->onecell.clks), GFP_KERNEL);
-
-	if (WARN_ON(!drvdata->msynth || !drvdata->clkout ||
-		    !drvdata->onecell.clks)) {
+	if (WARN_ON(!drvdata->msynth || !drvdata->clkout)) {
 		ret = -ENOMEM;
 		goto err_clk;
 	}
@@ -1547,11 +1558,11 @@
 			init.flags |= CLK_SET_RATE_PARENT;
 		init.parent_names = parent_names;
 		init.num_parents = 2;
-		clk = devm_clk_register(&client->dev, &drvdata->msynth[n].hw);
-		if (IS_ERR(clk)) {
+		ret = devm_clk_hw_register(&client->dev,
+					   &drvdata->msynth[n].hw);
+		if (ret) {
 			dev_err(&client->dev, "unable to register %s\n",
 				init.name);
-			ret = PTR_ERR(clk);
 			goto err_clk;
 		}
 	}
@@ -1575,19 +1586,19 @@
 			init.flags |= CLK_SET_RATE_PARENT;
 		init.parent_names = parent_names;
 		init.num_parents = num_parents;
-		clk = devm_clk_register(&client->dev, &drvdata->clkout[n].hw);
-		if (IS_ERR(clk)) {
+		ret = devm_clk_hw_register(&client->dev,
+					   &drvdata->clkout[n].hw);
+		if (ret) {
 			dev_err(&client->dev, "unable to register %s\n",
 				init.name);
-			ret = PTR_ERR(clk);
 			goto err_clk;
 		}
-		drvdata->onecell.clks[n] = clk;
 
 		/* set initial clkout rate */
 		if (pdata->clkout[n].rate != 0) {
 			int ret;
-			ret = clk_set_rate(clk, pdata->clkout[n].rate);
+			ret = clk_set_rate(drvdata->clkout[n].hw.clk,
+					   pdata->clkout[n].rate);
 			if (ret != 0) {
 				dev_err(&client->dev, "Cannot set rate : %d\n",
 					ret);
@@ -1595,8 +1606,8 @@
 		}
 	}
 
-	ret = of_clk_add_provider(client->dev.of_node, of_clk_src_onecell_get,
-				  &drvdata->onecell);
+	ret = of_clk_add_hw_provider(client->dev.of_node, si53351_of_clk_get,
+				     drvdata);
 	if (ret) {
 		dev_err(&client->dev, "unable to add clk provider\n");
 		goto err_clk;
diff --git a/drivers/clk/clk-si570.c b/drivers/clk/clk-si570.c
index d566485..646af1d 100644
--- a/drivers/clk/clk-si570.c
+++ b/drivers/clk/clk-si570.c
@@ -408,7 +408,6 @@
 {
 	struct clk_si570 *data;
 	struct clk_init_data init;
-	struct clk *clk;
 	u32 initial_fout, factory_fout, stability;
 	int err;
 	enum clk_si570_variant variant = id->driver_data;
@@ -462,13 +461,13 @@
 	if (err)
 		return err;
 
-	clk = devm_clk_register(&client->dev, &data->hw);
-	if (IS_ERR(clk)) {
+	err = devm_clk_hw_register(&client->dev, &data->hw);
+	if (err) {
 		dev_err(&client->dev, "clock registration failed\n");
-		return PTR_ERR(clk);
+		return err;
 	}
-	err = of_clk_add_provider(client->dev.of_node, of_clk_src_simple_get,
-			clk);
+	err = of_clk_add_hw_provider(client->dev.of_node, of_clk_hw_simple_get,
+				     &data->hw);
 	if (err) {
 		dev_err(&client->dev, "unable to add clk provider\n");
 		return err;
@@ -477,7 +476,7 @@
 	/* Read the requested initial output frequency from device tree */
 	if (!of_property_read_u32(client->dev.of_node, "clock-frequency",
 				&initial_fout)) {
-		err = clk_set_rate(clk, initial_fout);
+		err = clk_set_rate(data->hw.clk, initial_fout);
 		if (err) {
 			of_clk_del_provider(client->dev.of_node);
 			return err;
diff --git a/drivers/clk/clk-twl6040.c b/drivers/clk/clk-twl6040.c
index 697c667..7b222a5 100644
--- a/drivers/clk/clk-twl6040.c
+++ b/drivers/clk/clk-twl6040.c
@@ -26,60 +26,73 @@
 #include <linux/mfd/twl6040.h>
 #include <linux/clk-provider.h>
 
-struct twl6040_clk {
+struct twl6040_pdmclk {
 	struct twl6040 *twl6040;
 	struct device *dev;
-	struct clk_hw mcpdm_fclk;
-	struct clk *clk;
+	struct clk_hw pdmclk_hw;
 	int enabled;
 };
 
-static int twl6040_bitclk_is_enabled(struct clk_hw *hw)
+static int twl6040_pdmclk_is_prepared(struct clk_hw *hw)
 {
-	struct twl6040_clk *twl6040_clk = container_of(hw, struct twl6040_clk,
-						       mcpdm_fclk);
-	return twl6040_clk->enabled;
+	struct twl6040_pdmclk *pdmclk = container_of(hw, struct twl6040_pdmclk,
+						     pdmclk_hw);
+
+	return pdmclk->enabled;
 }
 
-static int twl6040_bitclk_prepare(struct clk_hw *hw)
+static int twl6040_pdmclk_prepare(struct clk_hw *hw)
 {
-	struct twl6040_clk *twl6040_clk = container_of(hw, struct twl6040_clk,
-						       mcpdm_fclk);
+	struct twl6040_pdmclk *pdmclk = container_of(hw, struct twl6040_pdmclk,
+						     pdmclk_hw);
 	int ret;
 
-	ret = twl6040_power(twl6040_clk->twl6040, 1);
+	ret = twl6040_power(pdmclk->twl6040, 1);
 	if (!ret)
-		twl6040_clk->enabled = 1;
+		pdmclk->enabled = 1;
 
 	return ret;
 }
 
-static void twl6040_bitclk_unprepare(struct clk_hw *hw)
+static void twl6040_pdmclk_unprepare(struct clk_hw *hw)
 {
-	struct twl6040_clk *twl6040_clk = container_of(hw, struct twl6040_clk,
-						       mcpdm_fclk);
+	struct twl6040_pdmclk *pdmclk = container_of(hw, struct twl6040_pdmclk,
+						     pdmclk_hw);
 	int ret;
 
-	ret = twl6040_power(twl6040_clk->twl6040, 0);
+	ret = twl6040_power(pdmclk->twl6040, 0);
 	if (!ret)
-		twl6040_clk->enabled = 0;
+		pdmclk->enabled = 0;
+
 }
 
-static const struct clk_ops twl6040_mcpdm_ops = {
-	.is_enabled = twl6040_bitclk_is_enabled,
-	.prepare = twl6040_bitclk_prepare,
-	.unprepare = twl6040_bitclk_unprepare,
+static unsigned long twl6040_pdmclk_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
+{
+	struct twl6040_pdmclk *pdmclk = container_of(hw, struct twl6040_pdmclk,
+						     pdmclk_hw);
+
+	return twl6040_get_sysclk(pdmclk->twl6040);
+}
+
+static const struct clk_ops twl6040_pdmclk_ops = {
+	.is_prepared = twl6040_pdmclk_is_prepared,
+	.prepare = twl6040_pdmclk_prepare,
+	.unprepare = twl6040_pdmclk_unprepare,
+	.recalc_rate = twl6040_pdmclk_recalc_rate,
 };
 
-static struct clk_init_data wm831x_clkout_init = {
-	.name = "mcpdm_fclk",
-	.ops = &twl6040_mcpdm_ops,
+static struct clk_init_data twl6040_pdmclk_init = {
+	.name = "pdmclk",
+	.ops = &twl6040_pdmclk_ops,
+	.flags = CLK_GET_RATE_NOCACHE,
 };
 
-static int twl6040_clk_probe(struct platform_device *pdev)
+static int twl6040_pdmclk_probe(struct platform_device *pdev)
 {
 	struct twl6040 *twl6040 = dev_get_drvdata(pdev->dev.parent);
-	struct twl6040_clk *clkdata;
+	struct twl6040_pdmclk *clkdata;
+	int ret;
 
 	clkdata = devm_kzalloc(&pdev->dev, sizeof(*clkdata), GFP_KERNEL);
 	if (!clkdata)
@@ -88,26 +101,28 @@
 	clkdata->dev = &pdev->dev;
 	clkdata->twl6040 = twl6040;
 
-	clkdata->mcpdm_fclk.init = &wm831x_clkout_init;
-	clkdata->clk = devm_clk_register(&pdev->dev, &clkdata->mcpdm_fclk);
-	if (IS_ERR(clkdata->clk))
-		return PTR_ERR(clkdata->clk);
+	clkdata->pdmclk_hw.init = &twl6040_pdmclk_init;
+	ret = devm_clk_hw_register(&pdev->dev, &clkdata->pdmclk_hw);
+	if (ret)
+		return ret;
 
 	platform_set_drvdata(pdev, clkdata);
 
-	return 0;
+	return of_clk_add_hw_provider(pdev->dev.parent->of_node,
+				      of_clk_hw_simple_get,
+				      &clkdata->pdmclk_hw);
 }
 
-static struct platform_driver twl6040_clk_driver = {
+static struct platform_driver twl6040_pdmclk_driver = {
 	.driver = {
-		.name = "twl6040-clk",
+		.name = "twl6040-pdmclk",
 	},
-	.probe = twl6040_clk_probe,
+	.probe = twl6040_pdmclk_probe,
 };
 
-module_platform_driver(twl6040_clk_driver);
+module_platform_driver(twl6040_pdmclk_driver);
 
 MODULE_DESCRIPTION("TWL6040 clock driver for McPDM functional clock");
 MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
-MODULE_ALIAS("platform:twl6040-clk");
+MODULE_ALIAS("platform:twl6040-pdmclk");
 MODULE_LICENSE("GPL");
diff --git a/drivers/clk/clk-vt8500.c b/drivers/clk/clk-vt8500.c
index 37368a3..4161a6f 100644
--- a/drivers/clk/clk-vt8500.c
+++ b/drivers/clk/clk-vt8500.c
@@ -232,7 +232,7 @@
 static __init void vtwm_device_clk_init(struct device_node *node)
 {
 	u32 en_reg, div_reg;
-	struct clk *clk;
+	struct clk_hw *hw;
 	struct clk_device *dev_clk;
 	const char *clk_name = node->name;
 	const char *parent_name;
@@ -301,13 +301,14 @@
 
 	dev_clk->hw.init = &init;
 
-	clk = clk_register(NULL, &dev_clk->hw);
-	if (WARN_ON(IS_ERR(clk))) {
+	hw = &dev_clk->hw;
+	rc = clk_hw_register(NULL, hw);
+	if (WARN_ON(rc)) {
 		kfree(dev_clk);
 		return;
 	}
-	rc = of_clk_add_provider(node, of_clk_src_simple_get, clk);
-	clk_register_clkdev(clk, clk_name, NULL);
+	rc = of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw);
+	clk_hw_register_clkdev(hw, clk_name, NULL);
 }
 CLK_OF_DECLARE(vt8500_device, "via,vt8500-device-clock", vtwm_device_clk_init);
 
@@ -681,7 +682,7 @@
 static __init void vtwm_pll_clk_init(struct device_node *node, int pll_type)
 {
 	u32 reg;
-	struct clk *clk;
+	struct clk_hw *hw;
 	struct clk_pll *pll_clk;
 	const char *clk_name = node->name;
 	const char *parent_name;
@@ -714,13 +715,14 @@
 
 	pll_clk->hw.init = &init;
 
-	clk = clk_register(NULL, &pll_clk->hw);
-	if (WARN_ON(IS_ERR(clk))) {
+	hw = &pll_clk->hw;
+	rc = clk_hw_register(NULL, &pll_clk->hw);
+	if (WARN_ON(rc)) {
 		kfree(pll_clk);
 		return;
 	}
-	rc = of_clk_add_provider(node, of_clk_src_simple_get, clk);
-	clk_register_clkdev(clk, clk_name, NULL);
+	rc = of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw);
+	clk_hw_register_clkdev(hw, clk_name, NULL);
 }
 
 
diff --git a/drivers/clk/clk-wm831x.c b/drivers/clk/clk-wm831x.c
index 88def4b..f4fdac5 100644
--- a/drivers/clk/clk-wm831x.c
+++ b/drivers/clk/clk-wm831x.c
@@ -24,9 +24,6 @@
 	struct clk_hw xtal_hw;
 	struct clk_hw fll_hw;
 	struct clk_hw clkout_hw;
-	struct clk *xtal;
-	struct clk *fll;
-	struct clk *clkout;
 	bool xtal_ena;
 };
 
@@ -370,19 +367,19 @@
 	clkdata->xtal_ena = ret & WM831X_XTAL_ENA;
 
 	clkdata->xtal_hw.init = &wm831x_xtal_init;
-	clkdata->xtal = devm_clk_register(&pdev->dev, &clkdata->xtal_hw);
-	if (IS_ERR(clkdata->xtal))
-		return PTR_ERR(clkdata->xtal);
+	ret = devm_clk_hw_register(&pdev->dev, &clkdata->xtal_hw);
+	if (ret)
+		return ret;
 
 	clkdata->fll_hw.init = &wm831x_fll_init;
-	clkdata->fll = devm_clk_register(&pdev->dev, &clkdata->fll_hw);
-	if (IS_ERR(clkdata->fll))
-		return PTR_ERR(clkdata->fll);
+	ret = devm_clk_hw_register(&pdev->dev, &clkdata->fll_hw);
+	if (ret)
+		return ret;
 
 	clkdata->clkout_hw.init = &wm831x_clkout_init;
-	clkdata->clkout = devm_clk_register(&pdev->dev, &clkdata->clkout_hw);
-	if (IS_ERR(clkdata->clkout))
-		return PTR_ERR(clkdata->clkout);
+	ret = devm_clk_hw_register(&pdev->dev, &clkdata->clkout_hw);
+	if (ret)
+		return ret;
 
 	platform_set_drvdata(pdev, clkdata);
 
diff --git a/drivers/clk/clk-xgene.c b/drivers/clk/clk-xgene.c
index 3433132..5daddf5 100644
--- a/drivers/clk/clk-xgene.c
+++ b/drivers/clk/clk-xgene.c
@@ -217,6 +217,226 @@
 	xgene_pllclk_init(np, PLL_TYPE_PCP);
 }
 
+/**
+ * struct xgene_clk_pmd - PMD clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @reg:	register containing the fractional scale multiplier (scaler)
+ * @shift:	shift to the unit bit field
+ * @denom:	1/denominator unit
+ * @lock:	register lock
+ * Flags:
+ * XGENE_CLK_PMD_SCALE_INVERTED - By default the scaler is the value read
+ *	from the register plus one. For example,
+ *		0 for (0 + 1) / denom,
+ *		1 for (1 + 1) / denom and etc.
+ *	If this flag is set, it is
+ *		0 for (denom - 0) / denom,
+ *		1 for (denom - 1) / denom and etc.
+ *
+ */
+struct xgene_clk_pmd {
+	struct clk_hw	hw;
+	void __iomem	*reg;
+	u8		shift;
+	u32		mask;
+	u64		denom;
+	u32		flags;
+	spinlock_t	*lock;
+};
+
+#define to_xgene_clk_pmd(_hw) container_of(_hw, struct xgene_clk_pmd, hw)
+
+#define XGENE_CLK_PMD_SCALE_INVERTED	BIT(0)
+#define XGENE_CLK_PMD_SHIFT		8
+#define XGENE_CLK_PMD_WIDTH		3
+
+static unsigned long xgene_clk_pmd_recalc_rate(struct clk_hw *hw,
+					       unsigned long parent_rate)
+{
+	struct xgene_clk_pmd *fd = to_xgene_clk_pmd(hw);
+	unsigned long flags = 0;
+	u64 ret, scale;
+	u32 val;
+
+	if (fd->lock)
+		spin_lock_irqsave(fd->lock, flags);
+	else
+		__acquire(fd->lock);
+
+	val = clk_readl(fd->reg);
+
+	if (fd->lock)
+		spin_unlock_irqrestore(fd->lock, flags);
+	else
+		__release(fd->lock);
+
+	ret = (u64)parent_rate;
+
+	scale = (val & fd->mask) >> fd->shift;
+	if (fd->flags & XGENE_CLK_PMD_SCALE_INVERTED)
+		scale = fd->denom - scale;
+	else
+		scale++;
+
+	/* freq = parent_rate * scaler / denom */
+	do_div(ret, fd->denom);
+	ret *= scale;
+	if (ret == 0)
+		ret = (u64)parent_rate;
+
+	return ret;
+}
+
+static long xgene_clk_pmd_round_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long *parent_rate)
+{
+	struct xgene_clk_pmd *fd = to_xgene_clk_pmd(hw);
+	u64 ret, scale;
+
+	if (!rate || rate >= *parent_rate)
+		return *parent_rate;
+
+	/* freq = parent_rate * scaler / denom */
+	ret = rate * fd->denom;
+	scale = DIV_ROUND_UP_ULL(ret, *parent_rate);
+
+	ret = (u64)*parent_rate * scale;
+	do_div(ret, fd->denom);
+
+	return ret;
+}
+
+static int xgene_clk_pmd_set_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long parent_rate)
+{
+	struct xgene_clk_pmd *fd = to_xgene_clk_pmd(hw);
+	unsigned long flags = 0;
+	u64 scale, ret;
+	u32 val;
+
+	/*
+	 * Compute the scaler:
+	 *
+	 * freq = parent_rate * scaler / denom, or
+	 * scaler = freq * denom / parent_rate
+	 */
+	ret = rate * fd->denom;
+	scale = DIV_ROUND_UP_ULL(ret, (u64)parent_rate);
+
+	/* Check if inverted */
+	if (fd->flags & XGENE_CLK_PMD_SCALE_INVERTED)
+		scale = fd->denom - scale;
+	else
+		scale--;
+
+	if (fd->lock)
+		spin_lock_irqsave(fd->lock, flags);
+	else
+		__acquire(fd->lock);
+
+	val = clk_readl(fd->reg);
+	val &= ~fd->mask;
+	val |= (scale << fd->shift);
+	clk_writel(val, fd->reg);
+
+	if (fd->lock)
+		spin_unlock_irqrestore(fd->lock, flags);
+	else
+		__release(fd->lock);
+
+	return 0;
+}
+
+static const struct clk_ops xgene_clk_pmd_ops = {
+	.recalc_rate = xgene_clk_pmd_recalc_rate,
+	.round_rate = xgene_clk_pmd_round_rate,
+	.set_rate = xgene_clk_pmd_set_rate,
+};
+
+static struct clk *
+xgene_register_clk_pmd(struct device *dev,
+		       const char *name, const char *parent_name,
+		       unsigned long flags, void __iomem *reg, u8 shift,
+		       u8 width, u64 denom, u32 clk_flags, spinlock_t *lock)
+{
+	struct xgene_clk_pmd *fd;
+	struct clk_init_data init;
+	struct clk *clk;
+
+	fd = kzalloc(sizeof(*fd), GFP_KERNEL);
+	if (!fd)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &xgene_clk_pmd_ops;
+	init.flags = flags;
+	init.parent_names = parent_name ? &parent_name : NULL;
+	init.num_parents = parent_name ? 1 : 0;
+
+	fd->reg = reg;
+	fd->shift = shift;
+	fd->mask = (BIT(width) - 1) << shift;
+	fd->denom = denom;
+	fd->flags = clk_flags;
+	fd->lock = lock;
+	fd->hw.init = &init;
+
+	clk = clk_register(dev, &fd->hw);
+	if (IS_ERR(clk)) {
+		pr_err("%s: could not register clk %s\n", __func__, name);
+		kfree(fd);
+		return NULL;
+	}
+
+	return clk;
+}
+
+static void xgene_pmdclk_init(struct device_node *np)
+{
+	const char *clk_name = np->full_name;
+	void __iomem *csr_reg;
+	struct resource res;
+	struct clk *clk;
+	u64 denom;
+	u32 flags = 0;
+	int rc;
+
+	/* Check if the entry is disabled */
+	if (!of_device_is_available(np))
+		return;
+
+	/* Parse the DTS register for resource */
+	rc = of_address_to_resource(np, 0, &res);
+	if (rc != 0) {
+		pr_err("no DTS register for %s\n", np->full_name);
+		return;
+	}
+	csr_reg = of_iomap(np, 0);
+	if (!csr_reg) {
+		pr_err("Unable to map resource for %s\n", np->full_name);
+		return;
+	}
+	of_property_read_string(np, "clock-output-names", &clk_name);
+
+	denom = BIT(XGENE_CLK_PMD_WIDTH);
+	flags |= XGENE_CLK_PMD_SCALE_INVERTED;
+
+	clk = xgene_register_clk_pmd(NULL, clk_name,
+				     of_clk_get_parent_name(np, 0), 0,
+				     csr_reg, XGENE_CLK_PMD_SHIFT,
+				     XGENE_CLK_PMD_WIDTH, denom,
+				     flags, &clk_lock);
+	if (!IS_ERR(clk)) {
+		of_clk_add_provider(np, of_clk_src_simple_get, clk);
+		clk_register_clkdev(clk, clk_name, NULL);
+		pr_debug("Add %s clock\n", clk_name);
+	} else {
+		if (csr_reg)
+			iounmap(csr_reg);
+	}
+}
+
 /* IP Clock */
 struct xgene_dev_parameters {
 	void __iomem *csr_reg;		/* CSR for IP clock */
@@ -543,6 +763,7 @@
 
 CLK_OF_DECLARE(xgene_socpll_clock, "apm,xgene-socpll-clock", xgene_socpllclk_init);
 CLK_OF_DECLARE(xgene_pcppll_clock, "apm,xgene-pcppll-clock", xgene_pcppllclk_init);
+CLK_OF_DECLARE(xgene_pmd_clock, "apm,xgene-pmd-clock", xgene_pmdclk_init);
 CLK_OF_DECLARE(xgene_socpll_v2_clock, "apm,xgene-socpll-v2-clock",
 	       xgene_socpllclk_init);
 CLK_OF_DECLARE(xgene_pcppll_v2_clock, "apm,xgene-pcppll-v2-clock",
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 820a939..0fb39fe 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1908,10 +1908,6 @@
 
 	clk_prepare_lock();
 
-	/* bail early if nothing to do */
-	if (degrees == clk->core->phase)
-		goto out;
-
 	trace_clk_set_phase(clk->core, degrees);
 
 	if (clk->core->ops->set_phase)
@@ -1922,7 +1918,6 @@
 	if (!ret)
 		clk->core->phase = degrees;
 
-out:
 	clk_prepare_unlock();
 
 	return ret;
@@ -2449,8 +2444,16 @@
 	hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
 		struct clk_core *parent = __clk_init_parent(orphan);
 
-		if (parent)
-			clk_core_reparent(orphan, parent);
+		/*
+		 * we could call __clk_set_parent, but that would result in a
+		 * redundant call to the .set_rate op, if it exists
+		 */
+		if (parent) {
+			__clk_set_parent_before(orphan, parent);
+			__clk_set_parent_after(orphan, parent, NULL);
+			__clk_recalc_accuracies(orphan);
+			__clk_recalc_rates(orphan, 0);
+		}
 	}
 
 	/*
@@ -2491,7 +2494,7 @@
 
 	/* This is to allow this function to be chained to others */
 	if (IS_ERR_OR_NULL(hw))
-		return (struct clk *) hw;
+		return ERR_CAST(hw);
 
 	clk = kzalloc(sizeof(*clk), GFP_KERNEL);
 	if (!clk)
@@ -3166,19 +3169,14 @@
 			      struct of_phandle_args *clkspec)
 {
 	struct clk *clk;
-	struct clk_hw *hw = ERR_PTR(-EPROBE_DEFER);
 
-	if (provider->get_hw) {
-		hw = provider->get_hw(clkspec, provider->data);
-	} else if (provider->get) {
-		clk = provider->get(clkspec, provider->data);
-		if (!IS_ERR(clk))
-			hw = __clk_get_hw(clk);
-		else
-			hw = ERR_CAST(clk);
-	}
+	if (provider->get_hw)
+		return provider->get_hw(clkspec, provider->data);
 
-	return hw;
+	clk = provider->get(clkspec, provider->data);
+	if (IS_ERR(clk))
+		return ERR_CAST(clk);
+	return __clk_get_hw(clk);
 }
 
 struct clk *__of_clk_get_from_provider(struct of_phandle_args *clkspec,
@@ -3186,7 +3184,7 @@
 {
 	struct of_clk_provider *provider;
 	struct clk *clk = ERR_PTR(-EPROBE_DEFER);
-	struct clk_hw *hw = ERR_PTR(-EPROBE_DEFER);
+	struct clk_hw *hw;
 
 	if (!clkspec)
 		return ERR_PTR(-EINVAL);
@@ -3194,12 +3192,13 @@
 	/* Check if we have such a provider in our array */
 	mutex_lock(&of_clk_mutex);
 	list_for_each_entry(provider, &of_clk_providers, link) {
-		if (provider->node == clkspec->np)
+		if (provider->node == clkspec->np) {
 			hw = __of_clk_get_hw_from_provider(provider, clkspec);
-		if (!IS_ERR(hw)) {
 			clk = __clk_create_clk(hw, dev_id, con_id);
+		}
 
-			if (!IS_ERR(clk) && !__clk_get(clk)) {
+		if (!IS_ERR(clk)) {
+			if (!__clk_get(clk)) {
 				__clk_free_clk(clk);
 				clk = ERR_PTR(-ENOENT);
 			}
@@ -3451,6 +3450,10 @@
 					&clk_provider_list, node) {
 			if (force || parent_ready(clk_provider->np)) {
 
+				/* Don't populate platform devices */
+				of_node_set_flag(clk_provider->np,
+						 OF_POPULATED);
+
 				clk_provider->clk_init_cb(clk_provider->np);
 				of_clk_set_defaults(clk_provider->np, true);
 
diff --git a/drivers/clk/h8300/clk-div.c b/drivers/clk/h8300/clk-div.c
index 4bf44a25..715b882 100644
--- a/drivers/clk/h8300/clk-div.c
+++ b/drivers/clk/h8300/clk-div.c
@@ -14,7 +14,7 @@
 static void __init h8300_div_clk_setup(struct device_node *node)
 {
 	unsigned int num_parents;
-	struct clk *clk;
+	struct clk_hw *hw;
 	const char *clk_name = node->name;
 	const char *parent_name;
 	void __iomem *divcr = NULL;
@@ -38,15 +38,15 @@
 
 	parent_name = of_clk_get_parent_name(node, 0);
 	of_property_read_u32(node, "renesas,width", &width);
-	clk = clk_register_divider(NULL, clk_name, parent_name,
+	hw = clk_hw_register_divider(NULL, clk_name, parent_name,
 				   CLK_SET_RATE_GATE, divcr, offset, width,
 				   CLK_DIVIDER_POWER_OF_TWO, &clklock);
-	if (!IS_ERR(clk)) {
-		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	if (!IS_ERR(hw)) {
+		of_clk_add_hw_provider(node, of_clk_hw_simple_get, hw);
 		return;
 	}
 	pr_err("%s: failed to register %s div clock (%ld)\n",
-	       __func__, clk_name, PTR_ERR(clk));
+	       __func__, clk_name, PTR_ERR(hw));
 error:
 	if (divcr)
 		iounmap(divcr);
diff --git a/drivers/clk/h8300/clk-h8s2678.c b/drivers/clk/h8300/clk-h8s2678.c
index c9c2fd5..a263124 100644
--- a/drivers/clk/h8300/clk-h8s2678.c
+++ b/drivers/clk/h8300/clk-h8s2678.c
@@ -84,11 +84,11 @@
 static void __init h8s2678_pll_clk_setup(struct device_node *node)
 {
 	unsigned int num_parents;
-	struct clk *clk;
 	const char *clk_name = node->name;
 	const char *parent_name;
 	struct pll_clock *pll_clock;
 	struct clk_init_data init;
+	int ret;
 
 	num_parents = of_clk_get_parent_count(node);
 	if (!num_parents) {
@@ -121,14 +121,14 @@
 	init.num_parents = 1;
 	pll_clock->hw.init = &init;
 
-	clk = clk_register(NULL, &pll_clock->hw);
-	if (IS_ERR(clk)) {
-		pr_err("%s: failed to register %s div clock (%ld)\n",
-		       __func__, clk_name, PTR_ERR(clk));
+	ret = clk_hw_register(NULL, &pll_clock->hw);
+	if (ret) {
+		pr_err("%s: failed to register %s div clock (%d)\n",
+		       __func__, clk_name, ret);
 		goto unmap_pllcr;
 	}
 
-	of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	of_clk_add_hw_provider(node, of_clk_hw_simple_get, &pll_clock->hw);
 	return;
 
 unmap_pllcr:
diff --git a/drivers/clk/imx/clk-imx35.c b/drivers/clk/imx/clk-imx35.c
index b0978d3..203cad6 100644
--- a/drivers/clk/imx/clk-imx35.c
+++ b/drivers/clk/imx/clk-imx35.c
@@ -66,20 +66,22 @@
 static const char *ipg_per_sel[] = {"ahb_per_div", "arm_per_div"};
 
 enum mx35_clks {
-	ckih, mpll, ppll, mpll_075, arm, hsp, hsp_div, hsp_sel, ahb, ipg,
-	arm_per_div, ahb_per_div, ipg_per, uart_sel, uart_div, esdhc_sel,
-	esdhc1_div, esdhc2_div, esdhc3_div, spdif_sel, spdif_div_pre,
-	spdif_div_post, ssi_sel, ssi1_div_pre, ssi1_div_post, ssi2_div_pre,
-	ssi2_div_post, usb_sel, usb_div, nfc_div, asrc_gate, pata_gate,
-	audmux_gate, can1_gate, can2_gate, cspi1_gate, cspi2_gate, ect_gate,
-	edio_gate, emi_gate, epit1_gate, epit2_gate, esai_gate, esdhc1_gate,
-	esdhc2_gate, esdhc3_gate, fec_gate, gpio1_gate, gpio2_gate, gpio3_gate,
-	gpt_gate, i2c1_gate, i2c2_gate, i2c3_gate, iomuxc_gate, ipu_gate,
-	kpp_gate, mlb_gate, mshc_gate, owire_gate, pwm_gate, rngc_gate,
-	rtc_gate, rtic_gate, scc_gate, sdma_gate, spba_gate, spdif_gate,
-	ssi1_gate, ssi2_gate, uart1_gate, uart2_gate, uart3_gate, usbotg_gate,
-	wdog_gate, max_gate, admux_gate, csi_gate, csi_div, csi_sel, iim_gate,
-	gpu2d_gate, ckil, clk_max
+	/*  0 */ ckih, mpll, ppll, mpll_075, arm, hsp, hsp_div, hsp_sel, ahb,
+	/*  9 */ ipg, arm_per_div, ahb_per_div, ipg_per, uart_sel, uart_div,
+	/* 15 */ esdhc_sel, esdhc1_div, esdhc2_div, esdhc3_div, spdif_sel,
+	/* 20 */ spdif_div_pre, spdif_div_post, ssi_sel, ssi1_div_pre,
+	/* 24 */ ssi1_div_post, ssi2_div_pre, ssi2_div_post, usb_sel, usb_div,
+	/* 29 */ nfc_div, asrc_gate, pata_gate, audmux_gate, can1_gate,
+	/* 34 */ can2_gate, cspi1_gate, cspi2_gate, ect_gate, edio_gate,
+	/* 39 */ emi_gate, epit1_gate, epit2_gate, esai_gate, esdhc1_gate,
+	/* 44 */ esdhc2_gate, esdhc3_gate, fec_gate, gpio1_gate, gpio2_gate,
+	/* 49 */ gpio3_gate, gpt_gate, i2c1_gate, i2c2_gate, i2c3_gate,
+	/* 54 */ iomuxc_gate, ipu_gate, kpp_gate, mlb_gate, mshc_gate,
+	/* 59 */ owire_gate, pwm_gate, rngc_gate, rtc_gate, rtic_gate, scc_gate,
+	/* 65 */ sdma_gate, spba_gate, spdif_gate, ssi1_gate, ssi2_gate,
+	/* 70 */ uart1_gate, uart2_gate, uart3_gate, usbotg_gate, wdog_gate,
+	/* 75 */ max_gate, admux_gate, csi_gate, csi_div, csi_sel, iim_gate,
+	/* 81 */ gpu2d_gate, ckil, clk_max
 };
 
 static struct clk *clk[clk_max];
@@ -115,7 +117,7 @@
 	}
 
 	clk[ckih] = imx_clk_fixed("ckih", 24000000);
-	clk[ckil] = imx_clk_fixed("ckih", 32768);
+	clk[ckil] = imx_clk_fixed("ckil", 32768);
 	clk[mpll] = imx_clk_pllv1(IMX_PLLV1_IMX35, "mpll", "ckih", base + MX35_CCM_MPCTL);
 	clk[ppll] = imx_clk_pllv1(IMX_PLLV1_IMX35, "ppll", "ckih", base + MX35_CCM_PPCTL);
 
diff --git a/drivers/clk/imx/clk-imx51-imx53.c b/drivers/clk/imx/clk-imx51-imx53.c
index 29d4c44..1e3c9ea 100644
--- a/drivers/clk/imx/clk-imx51-imx53.c
+++ b/drivers/clk/imx/clk-imx51-imx53.c
@@ -126,6 +126,7 @@
 static const char *mx51_spdif1_com_sel[] = { "spdif1_podf", "ssi2_root_gate", };
 static const char *step_sels[] = { "lp_apm", };
 static const char *cpu_podf_sels[] = { "pll1_sw", "step_sel" };
+static const char *ieee1588_sels[] = { "pll3_sw", "pll4_sw", "dummy" /* usbphy2_clk */, "dummy" /* fec_phy_clk */ };
 
 static struct clk *clk[IMX5_CLK_END];
 static struct clk_onecell_data clk_data;
@@ -543,6 +544,25 @@
 	clk[IMX5_CLK_I2C3_GATE]		= imx_clk_gate2("i2c3_gate", "per_root", MXC_CCM_CCGR1, 22);
 	clk[IMX5_CLK_SATA_GATE]		= imx_clk_gate2("sata_gate", "ipg", MXC_CCM_CCGR4, 2);
 
+	clk[IMX5_CLK_FIRI_SEL]		= imx_clk_mux("firi_sel", MXC_CCM_CSCMR2, 12, 2,
+						standard_pll_sel, ARRAY_SIZE(standard_pll_sel));
+	clk[IMX5_CLK_FIRI_PRED]		= imx_clk_divider("firi_pred", "firi_sel", MXC_CCM_CSCDR3, 6, 3);
+	clk[IMX5_CLK_FIRI_PODF]		= imx_clk_divider("firi_podf", "firi_pred", MXC_CCM_CSCDR3, 0, 6);
+	clk[IMX5_CLK_FIRI_SERIAL_GATE]	= imx_clk_gate2("firi_serial_gate", "firi_podf", MXC_CCM_CCGR1, 28);
+	clk[IMX5_CLK_FIRI_IPG_GATE]	= imx_clk_gate2("firi_ipg_gate", "ipg", MXC_CCM_CCGR1, 26);
+
+	clk[IMX5_CLK_CSI0_MCLK1_SEL]	= imx_clk_mux("csi0_mclk1_sel", MXC_CCM_CSCMR2, 22, 2,
+						standard_pll_sel, ARRAY_SIZE(standard_pll_sel));
+	clk[IMX5_CLK_CSI0_MCLK1_PRED]	= imx_clk_divider("csi0_mclk1_pred", "csi0_mclk1_sel", MXC_CCM_CSCDR4, 6, 3);
+	clk[IMX5_CLK_CSI0_MCLK1_PODF]	= imx_clk_divider("csi0_mclk1_podf", "csi0_mclk1_pred", MXC_CCM_CSCDR4, 0, 6);
+	clk[IMX5_CLK_CSI0_MCLK1_GATE]	= imx_clk_gate2("csi0_mclk1_serial_gate", "csi0_mclk1_podf", MXC_CCM_CCGR6, 4);
+
+	clk[IMX5_CLK_IEEE1588_SEL]	= imx_clk_mux("ieee1588_sel", MXC_CCM_CSCMR2, 14, 2,
+						ieee1588_sels, ARRAY_SIZE(ieee1588_sels));
+	clk[IMX5_CLK_IEEE1588_PRED]	= imx_clk_divider("ieee1588_pred", "ieee1588_sel", MXC_CCM_CSCDR2, 6, 3);
+	clk[IMX5_CLK_IEEE1588_PODF]	= imx_clk_divider("ieee1588_podf", "ieee1588_pred", MXC_CCM_CSCDR2, 0, 6);
+	clk[IMX5_CLK_IEEE1588_GATE]	= imx_clk_gate2("ieee1588_serial_gate", "ieee1588_podf", MXC_CCM_CCGR7, 6);
+
 	clk[IMX5_CLK_CKO1_SEL]		= imx_clk_mux("cko1_sel", MXC_CCM_CCOSR, 0, 4,
 						mx53_cko1_sel, ARRAY_SIZE(mx53_cko1_sel));
 	clk[IMX5_CLK_CKO1_PODF]		= imx_clk_divider("cko1_podf", "cko1_sel", MXC_CCM_CCOSR, 4, 3);
diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c
index ba1c1ae..ce8ea10 100644
--- a/drivers/clk/imx/clk-imx6q.c
+++ b/drivers/clk/imx/clk-imx6q.c
@@ -318,11 +318,16 @@
 		clk[IMX6QDL_CLK_IPG_PER_SEL] = imx_clk_mux("ipg_per_sel", base + 0x1c, 6, 1, ipg_per_sels, ARRAY_SIZE(ipg_per_sels));
 		clk[IMX6QDL_CLK_UART_SEL] = imx_clk_mux("uart_sel", base + 0x24, 6, 1, uart_sels, ARRAY_SIZE(uart_sels));
 		clk[IMX6QDL_CLK_GPU2D_CORE_SEL] = imx_clk_mux("gpu2d_core_sel", base + 0x18, 16, 2, gpu2d_core_sels_2, ARRAY_SIZE(gpu2d_core_sels_2));
+	} else if (clk_on_imx6dl()) {
+		clk[IMX6QDL_CLK_MLB_SEL] = imx_clk_mux("mlb_sel",   base + 0x18, 16, 2, gpu2d_core_sels,   ARRAY_SIZE(gpu2d_core_sels));
 	} else {
 		clk[IMX6QDL_CLK_GPU2D_CORE_SEL] = imx_clk_mux("gpu2d_core_sel",   base + 0x18, 16, 2, gpu2d_core_sels,   ARRAY_SIZE(gpu2d_core_sels));
 	}
 	clk[IMX6QDL_CLK_GPU3D_CORE_SEL]   = imx_clk_mux("gpu3d_core_sel",   base + 0x18, 4,  2, gpu3d_core_sels,   ARRAY_SIZE(gpu3d_core_sels));
-	clk[IMX6QDL_CLK_GPU3D_SHADER_SEL] = imx_clk_mux("gpu3d_shader_sel", base + 0x18, 8,  2, gpu3d_shader_sels, ARRAY_SIZE(gpu3d_shader_sels));
+	if (clk_on_imx6dl())
+		clk[IMX6QDL_CLK_GPU2D_CORE_SEL] = imx_clk_mux("gpu2d_core_sel", base + 0x18, 8,  2, gpu3d_shader_sels, ARRAY_SIZE(gpu3d_shader_sels));
+	else
+		clk[IMX6QDL_CLK_GPU3D_SHADER_SEL] = imx_clk_mux("gpu3d_shader_sel", base + 0x18, 8,  2, gpu3d_shader_sels, ARRAY_SIZE(gpu3d_shader_sels));
 	clk[IMX6QDL_CLK_IPU1_SEL]         = imx_clk_mux("ipu1_sel",         base + 0x3c, 9,  2, ipu_sels,          ARRAY_SIZE(ipu_sels));
 	clk[IMX6QDL_CLK_IPU2_SEL]         = imx_clk_mux("ipu2_sel",         base + 0x3c, 14, 2, ipu_sels,          ARRAY_SIZE(ipu_sels));
 	clk[IMX6QDL_CLK_LDB_DI0_SEL]      = imx_clk_mux_flags("ldb_di0_sel", base + 0x2c, 9,  3, ldb_di_sels,      ARRAY_SIZE(ldb_di_sels), CLK_SET_RATE_PARENT);
@@ -400,9 +405,15 @@
 		clk[IMX6QDL_CLK_LDB_DI0_DIV_3_5] = imx_clk_fixed_factor("ldb_di0_div_3_5", "ldb_di0_sel", 2, 7);
 		clk[IMX6QDL_CLK_LDB_DI1_DIV_3_5] = imx_clk_fixed_factor("ldb_di1_div_3_5", "ldb_di1_sel", 2, 7);
 	}
-	clk[IMX6QDL_CLK_GPU2D_CORE_PODF]  = imx_clk_divider("gpu2d_core_podf",  "gpu2d_core_sel",    base + 0x18, 23, 3);
+	if (clk_on_imx6dl())
+		clk[IMX6QDL_CLK_MLB_PODF]  = imx_clk_divider("mlb_podf",  "mlb_sel",    base + 0x18, 23, 3);
+	else
+		clk[IMX6QDL_CLK_GPU2D_CORE_PODF]  = imx_clk_divider("gpu2d_core_podf",  "gpu2d_core_sel",    base + 0x18, 23, 3);
 	clk[IMX6QDL_CLK_GPU3D_CORE_PODF]  = imx_clk_divider("gpu3d_core_podf",  "gpu3d_core_sel",    base + 0x18, 26, 3);
-	clk[IMX6QDL_CLK_GPU3D_SHADER]     = imx_clk_divider("gpu3d_shader",     "gpu3d_shader_sel",  base + 0x18, 29, 3);
+	if (clk_on_imx6dl())
+		clk[IMX6QDL_CLK_GPU2D_CORE_PODF]  = imx_clk_divider("gpu2d_core_podf",     "gpu2d_core_sel",  base + 0x18, 29, 3);
+	else
+		clk[IMX6QDL_CLK_GPU3D_SHADER]     = imx_clk_divider("gpu3d_shader",     "gpu3d_shader_sel",  base + 0x18, 29, 3);
 	clk[IMX6QDL_CLK_IPU1_PODF]        = imx_clk_divider("ipu1_podf",        "ipu1_sel",          base + 0x3c, 11, 3);
 	clk[IMX6QDL_CLK_IPU2_PODF]        = imx_clk_divider("ipu2_podf",        "ipu2_sel",          base + 0x3c, 16, 3);
 	clk[IMX6QDL_CLK_LDB_DI0_PODF]     = imx_clk_divider_flags("ldb_di0_podf", "ldb_di0_div_3_5", base + 0x20, 10, 1, 0);
@@ -473,14 +484,7 @@
 	clk[IMX6QDL_CLK_ESAI_MEM]     = imx_clk_gate2_shared("esai_mem", "ahb",             base + 0x6c, 16, &share_count_esai);
 	clk[IMX6QDL_CLK_GPT_IPG]      = imx_clk_gate2("gpt_ipg",       "ipg",               base + 0x6c, 20);
 	clk[IMX6QDL_CLK_GPT_IPG_PER]  = imx_clk_gate2("gpt_ipg_per",   "ipg_per",           base + 0x6c, 22);
-	if (clk_on_imx6dl())
-		/*
-		 * The multiplexer and divider of imx6q clock gpu3d_shader get
-		 * redefined/reused as gpu2d_core_sel and gpu2d_core_podf on imx6dl.
-		 */
-		clk[IMX6QDL_CLK_GPU2D_CORE] = imx_clk_gate2("gpu2d_core", "gpu3d_shader", base + 0x6c, 24);
-	else
-		clk[IMX6QDL_CLK_GPU2D_CORE] = imx_clk_gate2("gpu2d_core", "gpu2d_core_podf", base + 0x6c, 24);
+	clk[IMX6QDL_CLK_GPU2D_CORE] = imx_clk_gate2("gpu2d_core", "gpu2d_core_podf", base + 0x6c, 24);
 	clk[IMX6QDL_CLK_GPU3D_CORE]   = imx_clk_gate2("gpu3d_core",    "gpu3d_core_podf",   base + 0x6c, 26);
 	clk[IMX6QDL_CLK_HDMI_IAHB]    = imx_clk_gate2("hdmi_iahb",     "ahb",               base + 0x70, 0);
 	clk[IMX6QDL_CLK_HDMI_ISFR]    = imx_clk_gate2("hdmi_isfr",     "video_27m",         base + 0x70, 4);
@@ -511,7 +515,7 @@
 		 * The multiplexer and divider of the imx6q clock gpu2d get
 		 * redefined/reused as mlb_sys_sel and mlb_sys_clk_podf on imx6dl.
 		 */
-		clk[IMX6QDL_CLK_MLB] = imx_clk_gate2("mlb",            "gpu2d_core_podf",   base + 0x74, 18);
+		clk[IMX6QDL_CLK_MLB] = imx_clk_gate2("mlb",            "mlb_podf",   base + 0x74, 18);
 	else
 		clk[IMX6QDL_CLK_MLB] = imx_clk_gate2("mlb",            "axi",               base + 0x74, 18);
 	clk[IMX6QDL_CLK_MMDC_CH0_AXI] = imx_clk_gate2("mmdc_ch0_axi",  "mmdc_ch0_axi_podf", base + 0x74, 20);
@@ -629,6 +633,24 @@
 	if (IS_ENABLED(CONFIG_PCI_IMX6))
 		clk_set_parent(clk[IMX6QDL_CLK_LVDS1_SEL], clk[IMX6QDL_CLK_SATA_REF_100M]);
 
+	/*
+	 * Initialize the GPU clock muxes, so that the maximum specified clock
+	 * rates for the respective SoC are not exceeded.
+	 */
+	if (clk_on_imx6dl()) {
+		clk_set_parent(clk[IMX6QDL_CLK_GPU3D_CORE_SEL],
+			       clk[IMX6QDL_CLK_PLL2_PFD1_594M]);
+		clk_set_parent(clk[IMX6QDL_CLK_GPU2D_CORE_SEL],
+			       clk[IMX6QDL_CLK_PLL2_PFD1_594M]);
+	} else if (clk_on_imx6q()) {
+		clk_set_parent(clk[IMX6QDL_CLK_GPU3D_CORE_SEL],
+			       clk[IMX6QDL_CLK_MMDC_CH0_AXI]);
+		clk_set_parent(clk[IMX6QDL_CLK_GPU3D_SHADER_SEL],
+			       clk[IMX6QDL_CLK_PLL2_PFD1_594M]);
+		clk_set_parent(clk[IMX6QDL_CLK_GPU2D_CORE_SEL],
+			       clk[IMX6QDL_CLK_PLL3_USB_OTG]);
+	}
+
 	imx_register_uart_clocks(uart_clks);
 }
 CLK_OF_DECLARE(imx6q, "fsl,imx6q-ccm", imx6q_clocks_init);
diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c
index 6ed4f8f..e7c7353 100644
--- a/drivers/clk/imx/clk-imx7d.c
+++ b/drivers/clk/imx/clk-imx7d.c
@@ -22,43 +22,63 @@
 
 #include "clk.h"
 
+static u32 share_count_sai1;
+static u32 share_count_sai2;
+static u32 share_count_sai3;
+
+static struct clk_div_table test_div_table[] = {
+	{ .val = 3, .div = 1, },
+	{ .val = 2, .div = 1, },
+	{ .val = 1, .div = 2, },
+	{ .val = 0, .div = 4, },
+	{ }
+};
+
+static struct clk_div_table post_div_table[] = {
+	{ .val = 3, .div = 4, },
+	{ .val = 2, .div = 1, },
+	{ .val = 1, .div = 2, },
+	{ .val = 0, .div = 1, },
+	{ }
+};
+
 static struct clk *clks[IMX7D_CLK_END];
 static const char *arm_a7_sel[] = { "osc", "pll_arm_main_clk",
 	"pll_enet_500m_clk", "pll_dram_main_clk",
-	"pll_sys_main_clk", "pll_sys_pfd0_392m_clk", "pll_audio_main_clk",
+	"pll_sys_main_clk", "pll_sys_pfd0_392m_clk", "pll_audio_post_div",
 	"pll_usb_main_clk", };
 
 static const char *arm_m4_sel[] = { "osc", "pll_sys_main_240m_clk",
 	"pll_enet_250m_clk", "pll_sys_pfd2_270m_clk",
-	"pll_dram_533m_clk", "pll_audio_main_clk", "pll_video_main_clk",
+	"pll_dram_533m_clk", "pll_audio_post_div", "pll_video_main_clk",
 	"pll_usb_main_clk", };
 
 static const char *arm_m0_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_125m_clk", "pll_sys_pfd2_135m_clk",
-	"pll_dram_533m_clk", "pll_audio_main_clk", "pll_video_main_clk",
+	"pll_dram_533m_clk", "pll_audio_post_div", "pll_video_main_clk",
 	"pll_usb_main_clk", };
 
 static const char *axi_sel[] = { "osc", "pll_sys_pfd1_332m_clk",
 	"pll_dram_533m_clk", "pll_enet_250m_clk", "pll_sys_pfd5_clk",
-	"pll_audio_main_clk", "pll_video_main_clk", "pll_sys_pfd7_clk", };
+	"pll_audio_post_div", "pll_video_main_clk", "pll_sys_pfd7_clk", };
 
 static const char *disp_axi_sel[] = { "osc", "pll_sys_pfd1_332m_clk",
 	"pll_dram_533m_clk", "pll_enet_250m_clk", "pll_sys_pfd6_clk",
-	"pll_sys_pfd7_clk", "pll_audio_main_clk", "pll_video_main_clk", };
+	"pll_sys_pfd7_clk", "pll_audio_post_div", "pll_video_main_clk", };
 
 static const char *enet_axi_sel[] = { "osc", "pll_sys_pfd2_270m_clk",
 	"pll_dram_533m_clk", "pll_enet_250m_clk",
-	"pll_sys_main_240m_clk", "pll_audio_main_clk", "pll_video_main_clk",
+	"pll_sys_main_240m_clk", "pll_audio_post_div", "pll_video_main_clk",
 	"pll_sys_pfd4_clk", };
 
 static const char *nand_usdhc_bus_sel[] = { "osc", "pll_sys_pfd2_270m_clk",
 	"pll_dram_533m_clk", "pll_sys_main_240m_clk",
 	"pll_sys_pfd2_135m_clk", "pll_sys_pfd6_clk", "pll_enet_250m_clk",
-	"pll_audio_main_clk", };
+	"pll_audio_post_div", };
 
 static const char *ahb_channel_sel[] = { "osc", "pll_sys_pfd2_270m_clk",
 	"pll_dram_533m_clk", "pll_sys_pfd0_392m_clk",
-	"pll_enet_125m_clk", "pll_usb_main_clk", "pll_audio_main_clk",
+	"pll_enet_125m_clk", "pll_usb_main_clk", "pll_audio_post_div",
 	"pll_video_main_clk", };
 
 static const char *dram_phym_sel[] = { "pll_dram_main_clk",
@@ -69,13 +89,13 @@
 
 static const char *dram_phym_alt_sel[] = { "osc", "pll_dram_533m_clk",
 	"pll_sys_main_clk", "pll_enet_500m_clk",
-	"pll_usb_main_clk", "pll_sys_pfd7_clk", "pll_audio_main_clk",
+	"pll_usb_main_clk", "pll_sys_pfd7_clk", "pll_audio_post_div",
 	"pll_video_main_clk", };
 
 static const char *dram_alt_sel[] = { "osc", "pll_dram_533m_clk",
 	"pll_sys_main_clk", "pll_enet_500m_clk",
 	"pll_enet_250m_clk", "pll_sys_pfd0_392m_clk",
-	"pll_audio_main_clk", "pll_sys_pfd2_270m_clk", };
+	"pll_audio_post_div", "pll_sys_pfd2_270m_clk", };
 
 static const char *usb_hsic_sel[] = { "osc", "pll_sys_main_clk",
 	"pll_usb_main_clk", "pll_sys_pfd3_clk", "pll_sys_pfd4_clk",
@@ -101,53 +121,53 @@
 
 static const char *mipi_dsi_sel[] = { "osc", "pll_sys_pfd5_clk",
 	"pll_sys_pfd3_clk", "pll_sys_main_clk", "pll_sys_pfd0_196m_clk",
-	"pll_dram_533m_clk", "pll_video_main_clk", "pll_audio_main_clk", };
+	"pll_dram_533m_clk", "pll_video_main_clk", "pll_audio_post_div", };
 
 static const char *mipi_csi_sel[] = { "osc", "pll_sys_pfd4_clk",
 	"pll_sys_pfd3_clk", "pll_sys_main_clk", "pll_sys_pfd0_196m_clk",
-	"pll_dram_533m_clk", "pll_video_main_clk", "pll_audio_main_clk", };
+	"pll_dram_533m_clk", "pll_video_main_clk", "pll_audio_post_div", };
 
 static const char *mipi_dphy_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_dram_533m_clk", "pll_sys_pfd5_clk", "ref_1m_clk", "ext_clk_2",
 	"pll_video_main_clk", "ext_clk_3", };
 
 static const char *sai1_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
-	"pll_audio_main_clk", "pll_dram_533m_clk", "pll_video_main_clk",
+	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_main_clk",
 	"pll_sys_pfd4_clk", "pll_enet_125m_clk", "ext_clk_2", };
 
 static const char *sai2_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
-	"pll_audio_main_clk", "pll_dram_533m_clk", "pll_video_main_clk",
+	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_main_clk",
 	"pll_sys_pfd4_clk", "pll_enet_125m_clk", "ext_clk_2", };
 
 static const char *sai3_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
-	"pll_audio_main_clk", "pll_dram_533m_clk", "pll_video_main_clk",
+	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_main_clk",
 	"pll_sys_pfd4_clk", "pll_enet_125m_clk", "ext_clk_3", };
 
 static const char *spdif_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
-	"pll_audio_main_clk", "pll_dram_533m_clk", "pll_video_main_clk",
+	"pll_audio_post_div", "pll_dram_533m_clk", "pll_video_main_clk",
 	"pll_sys_pfd4_clk", "pll_enet_125m_clk", "ext_3_clk", };
 
 static const char *enet1_ref_sel[] = { "osc", "pll_enet_125m_clk",
 	"pll_enet_50m_clk", "pll_enet_25m_clk",
-	"pll_sys_main_120m_clk", "pll_audio_main_clk", "pll_video_main_clk",
+	"pll_sys_main_120m_clk", "pll_audio_post_div", "pll_video_main_clk",
 	"ext_clk_4", };
 
 static const char *enet1_time_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_audio_main_clk", "ext_clk_1", "ext_clk_2", "ext_clk_3",
+	"pll_audio_post_div", "ext_clk_1", "ext_clk_2", "ext_clk_3",
 	"ext_clk_4", "pll_video_main_clk", };
 
 static const char *enet2_ref_sel[] = { "osc", "pll_enet_125m_clk",
 	"pll_enet_50m_clk", "pll_enet_25m_clk",
-	"pll_sys_main_120m_clk", "pll_audio_main_clk", "pll_video_main_clk",
+	"pll_sys_main_120m_clk", "pll_audio_post_div", "pll_video_main_clk",
 	"ext_clk_4", };
 
 static const char *enet2_time_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_audio_main_clk", "ext_clk_1", "ext_clk_2", "ext_clk_3",
+	"pll_audio_post_div", "ext_clk_1", "ext_clk_2", "ext_clk_3",
 	"ext_clk_4", "pll_video_main_clk", };
 
 static const char *enet_phy_ref_sel[] = { "osc", "pll_enet_25m_clk",
 	"pll_enet_50m_clk", "pll_enet_125m_clk",
-	"pll_dram_533m_clk", "pll_audio_main_clk", "pll_video_main_clk",
+	"pll_dram_533m_clk", "pll_audio_post_div", "pll_video_main_clk",
 	"pll_sys_pfd3_clk", };
 
 static const char *eim_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
@@ -188,22 +208,22 @@
 
 static const char *i2c1_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_50m_clk", "pll_dram_533m_clk",
-	"pll_audio_main_clk", "pll_video_main_clk", "pll_usb_main_clk",
+	"pll_audio_post_div", "pll_video_main_clk", "pll_usb_main_clk",
 	"pll_sys_pfd2_135m_clk", };
 
 static const char *i2c2_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_50m_clk", "pll_dram_533m_clk",
-	"pll_audio_main_clk", "pll_video_main_clk", "pll_usb_main_clk",
+	"pll_audio_post_div", "pll_video_main_clk", "pll_usb_main_clk",
 	"pll_sys_pfd2_135m_clk", };
 
 static const char *i2c3_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_50m_clk", "pll_dram_533m_clk",
-	"pll_audio_main_clk", "pll_video_main_clk", "pll_usb_main_clk",
+	"pll_audio_post_div", "pll_video_main_clk", "pll_usb_main_clk",
 	"pll_sys_pfd2_135m_clk", };
 
 static const char *i2c4_sel[] = { "osc", "pll_sys_main_120m_clk",
 	"pll_enet_50m_clk", "pll_dram_533m_clk",
-	"pll_audio_main_clk", "pll_video_main_clk", "pll_usb_main_clk",
+	"pll_audio_post_div", "pll_video_main_clk", "pll_usb_main_clk",
 	"pll_sys_pfd2_135m_clk", };
 
 static const char *uart1_sel[] = { "osc", "pll_sys_main_240m_clk",
@@ -262,32 +282,32 @@
 	"pll_usb_main_clk", };
 
 static const char *pwm1_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_main_clk",
+	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
 	"ext_clk_1", "ref_1m_clk", "pll_video_main_clk", };
 
 static const char *pwm2_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_main_clk",
+	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
 	"ext_clk_1", "ref_1m_clk", "pll_video_main_clk", };
 
 static const char *pwm3_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_main_clk",
+	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
 	"ext_clk_2", "ref_1m_clk", "pll_video_main_clk", };
 
 static const char *pwm4_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_main_clk",
+	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
 	"ext_clk_2", "ref_1m_clk", "pll_video_main_clk", };
 
 static const char *flextimer1_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_main_clk",
+	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
 	"ext_clk_3", "ref_1m_clk", "pll_video_main_clk", };
 
 static const char *flextimer2_sel[] = { "osc", "pll_enet_100m_clk",
-	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_main_clk",
+	"pll_sys_main_120m_clk", "pll_enet_40m_clk", "pll_audio_post_div",
 	"ext_clk_3", "ref_1m_clk", "pll_video_main_clk", };
 
 static const char *sim1_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 	"pll_sys_main_120m_clk", "pll_dram_533m_clk",
-	"pll_usb_main_clk", "pll_audio_main_clk", "pll_enet_125m_clk",
+	"pll_usb_main_clk", "pll_audio_post_div", "pll_enet_125m_clk",
 	"pll_sys_pfd7_clk", };
 
 static const char *sim2_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
@@ -297,19 +317,19 @@
 
 static const char *gpt1_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_main_clk",
-	"ref_1m_clk", "pll_audio_main_clk", "ext_clk_1", };
+	"ref_1m_clk", "pll_audio_post_div", "ext_clk_1", };
 
 static const char *gpt2_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_main_clk",
-	"ref_1m_clk", "pll_audio_main_clk", "ext_clk_2", };
+	"ref_1m_clk", "pll_audio_post_div", "ext_clk_2", };
 
 static const char *gpt3_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_main_clk",
-	"ref_1m_clk", "pll_audio_main_clk", "ext_clk_3", };
+	"ref_1m_clk", "pll_audio_post_div", "ext_clk_3", };
 
 static const char *gpt4_sel[] = { "osc", "pll_enet_100m_clk",
 	"pll_sys_pfd0_392m_clk", "pll_enet_40m_clk", "pll_video_main_clk",
-	"ref_1m_clk", "pll_audio_main_clk", "ext_clk_4", };
+	"ref_1m_clk", "pll_audio_post_div", "ext_clk_4", };
 
 static const char *trace_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 	"pll_sys_main_120m_clk", "pll_dram_533m_clk",
@@ -323,12 +343,12 @@
 
 static const char *csi_mclk_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 	"pll_sys_main_120m_clk", "pll_dram_533m_clk",
-	"pll_enet_125m_clk", "pll_audio_main_clk", "pll_video_main_clk",
+	"pll_enet_125m_clk", "pll_audio_post_div", "pll_video_main_clk",
 	"pll_usb_main_clk", };
 
 static const char *audio_mclk_sel[] = { "osc", "pll_sys_pfd2_135m_clk",
 	"pll_sys_main_120m_clk", "pll_dram_533m_clk",
-	"pll_enet_125m_clk", "pll_audio_main_clk", "pll_video_main_clk",
+	"pll_enet_125m_clk", "pll_audio_post_div", "pll_video_main_clk",
 	"pll_usb_main_clk", };
 
 static const char *wrclk_sel[] = { "osc", "pll_enet_40m_clk",
@@ -342,13 +362,13 @@
 
 static const char *clko2_sel[] = { "osc", "pll_sys_main_240m_clk",
 	"pll_sys_pfd0_392m_clk", "pll_sys_pfd1_166m_clk", "pll_sys_pfd4_clk",
-	"pll_audio_main_clk", "pll_video_main_clk", "ckil", };
+	"pll_audio_post_div", "pll_video_main_clk", "ckil", };
 
 static const char *lvds1_sel[] = { "pll_arm_main_clk",
 	"pll_sys_main_clk", "pll_sys_pfd0_392m_clk", "pll_sys_pfd1_332m_clk",
 	"pll_sys_pfd2_270m_clk", "pll_sys_pfd3_clk", "pll_sys_pfd4_clk",
 	"pll_sys_pfd5_clk", "pll_sys_pfd6_clk", "pll_sys_pfd7_clk",
-	"pll_audio_main_clk", "pll_video_main_clk", "pll_enet_500m_clk",
+	"pll_audio_post_div", "pll_video_main_clk", "pll_enet_500m_clk",
 	"pll_enet_250m_clk", "pll_enet_125m_clk", "pll_enet_100m_clk",
 	"pll_enet_50m_clk", "pll_enet_40m_clk", "pll_enet_25m_clk",
 	"pll_dram_main_clk", };
@@ -430,6 +450,11 @@
 	clks[IMX7D_PLL_AUDIO_MAIN_CLK] = imx_clk_gate("pll_audio_main_clk", "pll_audio_main_bypass", base + 0xf0, 13);
 	clks[IMX7D_PLL_VIDEO_MAIN_CLK] = imx_clk_gate("pll_video_main_clk", "pll_video_main_bypass", base + 0x130, 13);
 
+	clks[IMX7D_PLL_AUDIO_TEST_DIV]  = clk_register_divider_table(NULL, "pll_audio_test_div", "pll_audio_main_clk",
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0xf0, 19, 2, 0, test_div_table, &imx_ccm_lock);
+	clks[IMX7D_PLL_AUDIO_POST_DIV] = clk_register_divider_table(NULL, "pll_audio_post_div", "pll_audio_test_div",
+				CLK_SET_RATE_PARENT | CLK_SET_RATE_GATE, base + 0xf0, 22, 2, 0, post_div_table, &imx_ccm_lock);
+
 	clks[IMX7D_PLL_SYS_PFD0_392M_CLK] = imx_clk_pfd("pll_sys_pfd0_392m_clk", "pll_sys_main_clk", base + 0xc0, 0);
 	clks[IMX7D_PLL_SYS_PFD1_332M_CLK] = imx_clk_pfd("pll_sys_pfd1_332m_clk", "pll_sys_main_clk", base + 0xc0, 1);
 	clks[IMX7D_PLL_SYS_PFD2_270M_CLK] = imx_clk_pfd("pll_sys_pfd2_270m_clk", "pll_sys_main_clk", base + 0xc0, 2);
@@ -779,6 +804,7 @@
 	clks[IMX7D_DRAM_PHYM_ALT_ROOT_CLK] = imx_clk_gate4("dram_phym_alt_root_clk", "dram_phym_alt_post_div", base + 0x4130, 0);
 	clks[IMX7D_DRAM_ALT_ROOT_CLK] = imx_clk_gate4("dram_alt_root_clk", "dram_alt_post_div", base + 0x4130, 0);
 	clks[IMX7D_USB_HSIC_ROOT_CLK] = imx_clk_gate4("usb_hsic_root_clk", "usb_hsic_post_div", base + 0x4420, 0);
+	clks[IMX7D_SDMA_CORE_CLK] = imx_clk_gate4("sdma_root_clk", "ahb_root_clk", base + 0x4480, 0);
 	clks[IMX7D_PCIE_CTRL_ROOT_CLK] = imx_clk_gate4("pcie_ctrl_root_clk", "pcie_ctrl_post_div", base + 0x4600, 0);
 	clks[IMX7D_PCIE_PHY_ROOT_CLK] = imx_clk_gate4("pcie_phy_root_clk", "pcie_phy_post_div", base + 0x4600, 0);
 	clks[IMX7D_EPDC_PIXEL_ROOT_CLK] = imx_clk_gate4("epdc_pixel_root_clk", "epdc_pixel_post_div", base + 0x44a0, 0);
@@ -786,9 +812,12 @@
 	clks[IMX7D_MIPI_DSI_ROOT_CLK] = imx_clk_gate4("mipi_dsi_root_clk", "mipi_dsi_post_div", base + 0x4650, 0);
 	clks[IMX7D_MIPI_CSI_ROOT_CLK] = imx_clk_gate4("mipi_csi_root_clk", "mipi_csi_post_div", base + 0x4640, 0);
 	clks[IMX7D_MIPI_DPHY_ROOT_CLK] = imx_clk_gate4("mipi_dphy_root_clk", "mipi_dphy_post_div", base + 0x4660, 0);
-	clks[IMX7D_SAI1_ROOT_CLK] = imx_clk_gate4("sai1_root_clk", "sai1_post_div", base + 0x48c0, 0);
-	clks[IMX7D_SAI2_ROOT_CLK] = imx_clk_gate4("sai2_root_clk", "sai2_post_div", base + 0x48d0, 0);
-	clks[IMX7D_SAI3_ROOT_CLK] = imx_clk_gate4("sai3_root_clk", "sai3_post_div", base + 0x48e0, 0);
+	clks[IMX7D_SAI1_ROOT_CLK] = imx_clk_gate2_shared2("sai1_root_clk", "sai1_post_div", base + 0x48c0, 0, &share_count_sai1);
+	clks[IMX7D_SAI1_IPG_CLK]  = imx_clk_gate2_shared2("sai1_ipg_clk",  "ipg_root_clk",  base + 0x48c0, 0, &share_count_sai1);
+	clks[IMX7D_SAI2_ROOT_CLK] = imx_clk_gate2_shared2("sai2_root_clk", "sai2_post_div", base + 0x48d0, 0, &share_count_sai2);
+	clks[IMX7D_SAI2_IPG_CLK]  = imx_clk_gate2_shared2("sai2_ipg_clk",  "ipg_root_clk",  base + 0x48d0, 0, &share_count_sai2);
+	clks[IMX7D_SAI3_ROOT_CLK] = imx_clk_gate2_shared2("sai3_root_clk", "sai3_post_div", base + 0x48e0, 0, &share_count_sai3);
+	clks[IMX7D_SAI3_IPG_CLK]  = imx_clk_gate2_shared2("sai3_ipg_clk",  "ipg_root_clk",  base + 0x48e0, 0, &share_count_sai3);
 	clks[IMX7D_SPDIF_ROOT_CLK] = imx_clk_gate4("spdif_root_clk", "spdif_post_div", base + 0x44d0, 0);
 	clks[IMX7D_ENET1_REF_ROOT_CLK] = imx_clk_gate4("enet1_ref_root_clk", "enet1_ref_post_div", base + 0x44e0, 0);
 	clks[IMX7D_ENET1_TIME_ROOT_CLK] = imx_clk_gate4("enet1_time_root_clk", "enet1_time_post_div", base + 0x44f0, 0);
@@ -860,8 +889,6 @@
 	/* use old gpt clk setting, gpt1 root clk must be twice as gpt counter freq */
 	clk_set_parent(clks[IMX7D_GPT1_ROOT_SRC], clks[IMX7D_OSC_24M_CLK]);
 
-	clk_set_parent(clks[IMX7D_ENET_AXI_ROOT_SRC], clks[IMX7D_PLL_ENET_MAIN_250M_CLK]);
-
 	/* set uart module clock's parent clock source that must be great then 80MHz */
 	clk_set_parent(clks[IMX7D_UART1_ROOT_SRC], clks[IMX7D_OSC_24M_CLK]);
 
diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h
index a81c038..3799ff8 100644
--- a/drivers/clk/imx/clk.h
+++ b/drivers/clk/imx/clk.h
@@ -134,6 +134,15 @@
 			shift, 0x3, 0, &imx_ccm_lock, share_count);
 }
 
+static inline struct clk *imx_clk_gate2_shared2(const char *name,
+		const char *parent, void __iomem *reg, u8 shift,
+		unsigned int *share_count)
+{
+	return clk_register_gate2(NULL, name, parent, CLK_SET_RATE_PARENT |
+				  CLK_OPS_PARENT_ENABLE, reg, shift, 0x3, 0,
+				  &imx_ccm_lock, share_count);
+}
+
 static inline struct clk *imx_clk_gate2_cgr(const char *name,
 		const char *parent, void __iomem *reg, u8 shift, u8 cgr_val)
 {
diff --git a/drivers/clk/loongson1/Makefile b/drivers/clk/loongson1/Makefile
new file mode 100644
index 0000000..b7f6a16
--- /dev/null
+++ b/drivers/clk/loongson1/Makefile
@@ -0,0 +1,3 @@
+obj-y				+= clk.o
+obj-$(CONFIG_LOONGSON1_LS1B)	+= clk-loongson1b.o
+obj-$(CONFIG_LOONGSON1_LS1C)	+= clk-loongson1c.o
diff --git a/drivers/clk/loongson1/clk-loongson1b.c b/drivers/clk/loongson1/clk-loongson1b.c
new file mode 100644
index 0000000..f36a97e
--- /dev/null
+++ b/drivers/clk/loongson1/clk-loongson1b.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2012-2016 Zhang, Keguang <keguang.zhang@gmail.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/io.h>
+#include <linux/err.h>
+
+#include <loongson1.h>
+#include "clk.h"
+
+#define OSC		(33 * 1000000)
+#define DIV_APB		2
+
+static DEFINE_SPINLOCK(_lock);
+
+static unsigned long ls1x_pll_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	u32 pll, rate;
+
+	pll = __raw_readl(LS1X_CLK_PLL_FREQ);
+	rate = 12 + (pll & GENMASK(5, 0));
+	rate *= OSC;
+	rate >>= 1;
+
+	return rate;
+}
+
+static const struct clk_ops ls1x_pll_clk_ops = {
+	.recalc_rate = ls1x_pll_recalc_rate,
+};
+
+static const char *const cpu_parents[] = { "cpu_clk_div", "osc_clk", };
+static const char *const ahb_parents[] = { "ahb_clk_div", "osc_clk", };
+static const char *const dc_parents[] = { "dc_clk_div", "osc_clk", };
+
+void __init ls1x_clk_init(void)
+{
+	struct clk_hw *hw;
+
+	hw = clk_hw_register_fixed_rate(NULL, "osc_clk", NULL, 0, OSC);
+	clk_hw_register_clkdev(hw, "osc_clk", NULL);
+
+	/* clock derived from 33 MHz OSC clk */
+	hw = clk_hw_register_pll(NULL, "pll_clk", "osc_clk",
+				 &ls1x_pll_clk_ops, 0);
+	clk_hw_register_clkdev(hw, "pll_clk", NULL);
+
+	/* clock derived from PLL clk */
+	/*                                 _____
+	 *         _______________________|     |
+	 * OSC ___/                       | MUX |___ CPU CLK
+	 *        \___ PLL ___ CPU DIV ___|     |
+	 *                                |_____|
+	 */
+	hw = clk_hw_register_divider(NULL, "cpu_clk_div", "pll_clk",
+				   CLK_GET_RATE_NOCACHE, LS1X_CLK_PLL_DIV,
+				   DIV_CPU_SHIFT, DIV_CPU_WIDTH,
+				   CLK_DIVIDER_ONE_BASED |
+				   CLK_DIVIDER_ROUND_CLOSEST, &_lock);
+	clk_hw_register_clkdev(hw, "cpu_clk_div", NULL);
+	hw = clk_hw_register_mux(NULL, "cpu_clk", cpu_parents,
+			       ARRAY_SIZE(cpu_parents),
+			       CLK_SET_RATE_NO_REPARENT, LS1X_CLK_PLL_DIV,
+			       BYPASS_CPU_SHIFT, BYPASS_CPU_WIDTH, 0, &_lock);
+	clk_hw_register_clkdev(hw, "cpu_clk", NULL);
+
+	/*                                 _____
+	 *         _______________________|     |
+	 * OSC ___/                       | MUX |___ DC  CLK
+	 *        \___ PLL ___ DC  DIV ___|     |
+	 *                                |_____|
+	 */
+	hw = clk_hw_register_divider(NULL, "dc_clk_div", "pll_clk",
+				   0, LS1X_CLK_PLL_DIV, DIV_DC_SHIFT,
+				   DIV_DC_WIDTH, CLK_DIVIDER_ONE_BASED, &_lock);
+	clk_hw_register_clkdev(hw, "dc_clk_div", NULL);
+	hw = clk_hw_register_mux(NULL, "dc_clk", dc_parents,
+			       ARRAY_SIZE(dc_parents),
+			       CLK_SET_RATE_NO_REPARENT, LS1X_CLK_PLL_DIV,
+			       BYPASS_DC_SHIFT, BYPASS_DC_WIDTH, 0, &_lock);
+	clk_hw_register_clkdev(hw, "dc_clk", NULL);
+
+	/*                                 _____
+	 *         _______________________|     |
+	 * OSC ___/                       | MUX |___ DDR CLK
+	 *        \___ PLL ___ DDR DIV ___|     |
+	 *                                |_____|
+	 */
+	hw = clk_hw_register_divider(NULL, "ahb_clk_div", "pll_clk",
+				   0, LS1X_CLK_PLL_DIV, DIV_DDR_SHIFT,
+				   DIV_DDR_WIDTH, CLK_DIVIDER_ONE_BASED,
+				   &_lock);
+	clk_hw_register_clkdev(hw, "ahb_clk_div", NULL);
+	hw = clk_hw_register_mux(NULL, "ahb_clk", ahb_parents,
+			       ARRAY_SIZE(ahb_parents),
+			       CLK_SET_RATE_NO_REPARENT, LS1X_CLK_PLL_DIV,
+			       BYPASS_DDR_SHIFT, BYPASS_DDR_WIDTH, 0, &_lock);
+	clk_hw_register_clkdev(hw, "ahb_clk", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-dma", NULL);
+	clk_hw_register_clkdev(hw, "stmmaceth", NULL);
+
+	/* clock derived from AHB clk */
+	/* APB clk is always half of the AHB clk */
+	hw = clk_hw_register_fixed_factor(NULL, "apb_clk", "ahb_clk", 0, 1,
+					DIV_APB);
+	clk_hw_register_clkdev(hw, "apb_clk", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-ac97", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-i2c", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-nand", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-pwmtimer", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-spi", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-wdt", NULL);
+	clk_hw_register_clkdev(hw, "serial8250", NULL);
+}
diff --git a/drivers/clk/loongson1/clk-loongson1c.c b/drivers/clk/loongson1/clk-loongson1c.c
new file mode 100644
index 0000000..3466f73
--- /dev/null
+++ b/drivers/clk/loongson1/clk-loongson1c.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016 Yang Ling <gnaygnil@gmail.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+
+#include <loongson1.h>
+#include "clk.h"
+
+#define OSC		(24 * 1000000)
+#define DIV_APB		1
+
+static DEFINE_SPINLOCK(_lock);
+
+static unsigned long ls1x_pll_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	u32 pll, rate;
+
+	pll = __raw_readl(LS1X_CLK_PLL_FREQ);
+	rate = ((pll >> 8) & 0xff) + ((pll >> 16) & 0xff);
+	rate *= OSC;
+	rate >>= 2;
+
+	return rate;
+}
+
+static const struct clk_ops ls1x_pll_clk_ops = {
+	.recalc_rate = ls1x_pll_recalc_rate,
+};
+
+static const struct clk_div_table ahb_div_table[] = {
+	[0] = { .val = 0, .div = 2 },
+	[1] = { .val = 1, .div = 4 },
+	[2] = { .val = 2, .div = 3 },
+	[3] = { .val = 3, .div = 3 },
+};
+
+void __init ls1x_clk_init(void)
+{
+	struct clk_hw *hw;
+
+	hw = clk_hw_register_fixed_rate(NULL, "osc_clk", NULL, 0, OSC);
+	clk_hw_register_clkdev(hw, "osc_clk", NULL);
+
+	/* clock derived from 24 MHz OSC clk */
+	hw = clk_hw_register_pll(NULL, "pll_clk", "osc_clk",
+				&ls1x_pll_clk_ops, 0);
+	clk_hw_register_clkdev(hw, "pll_clk", NULL);
+
+	hw = clk_hw_register_divider(NULL, "cpu_clk_div", "pll_clk",
+				   CLK_GET_RATE_NOCACHE, LS1X_CLK_PLL_DIV,
+				   DIV_CPU_SHIFT, DIV_CPU_WIDTH,
+				   CLK_DIVIDER_ONE_BASED |
+				   CLK_DIVIDER_ROUND_CLOSEST, &_lock);
+	clk_hw_register_clkdev(hw, "cpu_clk_div", NULL);
+	hw = clk_hw_register_fixed_factor(NULL, "cpu_clk", "cpu_clk_div",
+					0, 1, 1);
+	clk_hw_register_clkdev(hw, "cpu_clk", NULL);
+
+	hw = clk_hw_register_divider(NULL, "dc_clk_div", "pll_clk",
+				   0, LS1X_CLK_PLL_DIV, DIV_DC_SHIFT,
+				   DIV_DC_WIDTH, CLK_DIVIDER_ONE_BASED, &_lock);
+	clk_hw_register_clkdev(hw, "dc_clk_div", NULL);
+	hw = clk_hw_register_fixed_factor(NULL, "dc_clk", "dc_clk_div",
+					0, 1, 1);
+	clk_hw_register_clkdev(hw, "dc_clk", NULL);
+
+	hw = clk_hw_register_divider_table(NULL, "ahb_clk_div", "cpu_clk_div",
+				0, LS1X_CLK_PLL_FREQ, DIV_DDR_SHIFT,
+				DIV_DDR_WIDTH, CLK_DIVIDER_ALLOW_ZERO,
+				ahb_div_table, &_lock);
+	clk_hw_register_clkdev(hw, "ahb_clk_div", NULL);
+	hw = clk_hw_register_fixed_factor(NULL, "ahb_clk", "ahb_clk_div",
+					0, 1, 1);
+	clk_hw_register_clkdev(hw, "ahb_clk", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-dma", NULL);
+	clk_hw_register_clkdev(hw, "stmmaceth", NULL);
+
+	/* clock derived from AHB clk */
+	hw = clk_hw_register_fixed_factor(NULL, "apb_clk", "ahb_clk", 0, 1,
+					DIV_APB);
+	clk_hw_register_clkdev(hw, "apb_clk", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-ac97", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-i2c", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-nand", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-pwmtimer", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-spi", NULL);
+	clk_hw_register_clkdev(hw, "ls1x-wdt", NULL);
+	clk_hw_register_clkdev(hw, "serial8250", NULL);
+}
diff --git a/drivers/clk/loongson1/clk.c b/drivers/clk/loongson1/clk.c
new file mode 100644
index 0000000..cfcfd14
--- /dev/null
+++ b/drivers/clk/loongson1/clk.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2012-2016 Zhang, Keguang <keguang.zhang@gmail.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+
+struct clk_hw *__init clk_hw_register_pll(struct device *dev,
+					  const char *name,
+					  const char *parent_name,
+					  const struct clk_ops *ops,
+					  unsigned long flags)
+{
+	int ret;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+
+	/* allocate the divider */
+	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
+	if (!hw)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = ops;
+	init.flags = flags | CLK_IS_BASIC;
+	init.parent_names = (parent_name ? &parent_name : NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+	hw->init = &init;
+
+	/* register the clock */
+	ret = clk_hw_register(dev, hw);
+	if (ret) {
+		kfree(hw);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
diff --git a/drivers/clk/loongson1/clk.h b/drivers/clk/loongson1/clk.h
new file mode 100644
index 0000000..085d74b
--- /dev/null
+++ b/drivers/clk/loongson1/clk.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2012-2016 Zhang, Keguang <keguang.zhang@gmail.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __LOONGSON1_CLK_H
+#define __LOONGSON1_CLK_H
+
+struct clk_hw *clk_hw_register_pll(struct device *dev,
+				   const char *name,
+				   const char *parent_name,
+				   const struct clk_ops *ops,
+				   unsigned long flags);
+
+#endif /* __LOONGSON1_CLK_H */
diff --git a/drivers/clk/mediatek/Kconfig b/drivers/clk/mediatek/Kconfig
new file mode 100644
index 0000000..380c372
--- /dev/null
+++ b/drivers/clk/mediatek/Kconfig
@@ -0,0 +1,21 @@
+#
+# MediaTek SoC drivers
+#
+config COMMON_CLK_MEDIATEK
+	bool
+	---help---
+	  Mediatek SoCs' clock support.
+
+config COMMON_CLK_MT8135
+	bool "Clock driver for Mediatek MT8135"
+	select COMMON_CLK_MEDIATEK
+	default ARCH_MEDIATEK
+	---help---
+	  This driver supports Mediatek MT8135 clocks.
+
+config COMMON_CLK_MT8173
+	bool "Clock driver for Mediatek MT8173"
+	select COMMON_CLK_MEDIATEK
+	default ARCH_MEDIATEK
+	---help---
+	  This driver supports Mediatek MT8173 clocks.
diff --git a/drivers/clk/mediatek/Makefile b/drivers/clk/mediatek/Makefile
index 95fdfac..32e7222 100644
--- a/drivers/clk/mediatek/Makefile
+++ b/drivers/clk/mediatek/Makefile
@@ -1,4 +1,4 @@
-obj-y += clk-mtk.o clk-pll.o clk-gate.o clk-apmixed.o
+obj-$(CONFIG_COMMON_CLK_MEDIATEK) += clk-mtk.o clk-pll.o clk-gate.o clk-apmixed.o
 obj-$(CONFIG_RESET_CONTROLLER) += reset.o
-obj-y += clk-mt8135.o
-obj-y += clk-mt8173.o
+obj-$(CONFIG_COMMON_CLK_MT8135) += clk-mt8135.o
+obj-$(CONFIG_COMMON_CLK_MT8173) += clk-mt8173.o
diff --git a/drivers/clk/mediatek/clk-gate.c b/drivers/clk/mediatek/clk-gate.c
index 2a76901..d8787bf 100644
--- a/drivers/clk/mediatek/clk-gate.c
+++ b/drivers/clk/mediatek/clk-gate.c
@@ -97,7 +97,7 @@
 	.disable	= mtk_cg_disable_inv,
 };
 
-struct clk * __init mtk_clk_register_gate(
+struct clk *mtk_clk_register_gate(
 		const char *name,
 		const char *parent_name,
 		struct regmap *regmap,
diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c
index 10c9860..0ac3aee 100644
--- a/drivers/clk/mediatek/clk-mt8173.c
+++ b/drivers/clk/mediatek/clk-mt8173.c
@@ -1074,8 +1074,10 @@
 	}
 
 	mt8173_pll_clk_data = clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK);
-	if (!clk_data)
+	if (!clk_data) {
+		iounmap(base);
 		return;
+	}
 
 	mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data);
 
diff --git a/drivers/clk/mediatek/clk-mtk.c b/drivers/clk/mediatek/clk-mtk.c
index 5ada644..bb30f70 100644
--- a/drivers/clk/mediatek/clk-mtk.c
+++ b/drivers/clk/mediatek/clk-mtk.c
@@ -24,7 +24,7 @@
 #include "clk-mtk.h"
 #include "clk-gate.h"
 
-struct clk_onecell_data * __init mtk_alloc_clk_data(unsigned int clk_num)
+struct clk_onecell_data *mtk_alloc_clk_data(unsigned int clk_num)
 {
 	int i;
 	struct clk_onecell_data *clk_data;
@@ -49,7 +49,7 @@
 	return NULL;
 }
 
-void __init mtk_clk_register_fixed_clks(const struct mtk_fixed_clk *clks,
+void mtk_clk_register_fixed_clks(const struct mtk_fixed_clk *clks,
 		int num, struct clk_onecell_data *clk_data)
 {
 	int i;
@@ -72,7 +72,7 @@
 	}
 }
 
-void __init mtk_clk_register_factors(const struct mtk_fixed_factor *clks,
+void mtk_clk_register_factors(const struct mtk_fixed_factor *clks,
 		int num, struct clk_onecell_data *clk_data)
 {
 	int i;
@@ -95,7 +95,7 @@
 	}
 }
 
-int __init mtk_clk_register_gates(struct device_node *node,
+int mtk_clk_register_gates(struct device_node *node,
 		const struct mtk_gate *clks,
 		int num, struct clk_onecell_data *clk_data)
 {
@@ -135,7 +135,7 @@
 	return 0;
 }
 
-struct clk * __init mtk_clk_register_composite(const struct mtk_composite *mc,
+struct clk *mtk_clk_register_composite(const struct mtk_composite *mc,
 		void __iomem *base, spinlock_t *lock)
 {
 	struct clk *clk;
@@ -222,7 +222,7 @@
 	return ERR_PTR(ret);
 }
 
-void __init mtk_clk_register_composites(const struct mtk_composite *mcs,
+void mtk_clk_register_composites(const struct mtk_composite *mcs,
 		int num, void __iomem *base, spinlock_t *lock,
 		struct clk_onecell_data *clk_data)
 {
diff --git a/drivers/clk/mediatek/clk-pll.c b/drivers/clk/mediatek/clk-pll.c
index 966cab1..0c2deac 100644
--- a/drivers/clk/mediatek/clk-pll.c
+++ b/drivers/clk/mediatek/clk-pll.c
@@ -313,7 +313,7 @@
 	return clk;
 }
 
-void __init mtk_clk_register_plls(struct device_node *node,
+void mtk_clk_register_plls(struct device_node *node,
 		const struct mtk_pll_data *plls, int num_plls, struct clk_onecell_data *clk_data)
 {
 	void __iomem *base;
diff --git a/drivers/clk/meson/Makefile b/drivers/clk/meson/Makefile
index 197e401..3495834 100644
--- a/drivers/clk/meson/Makefile
+++ b/drivers/clk/meson/Makefile
@@ -3,5 +3,5 @@
 #
 
 obj-$(CONFIG_COMMON_CLK_AMLOGIC) += clk-pll.o clk-cpu.o clk-mpll.o
-obj-$(CONFIG_COMMON_CLK_MESON8B) += meson8b-clkc.o
-obj-$(CONFIG_COMMON_CLK_GXBB)	 += gxbb.o
+obj-$(CONFIG_COMMON_CLK_MESON8B) += meson8b.o
+obj-$(CONFIG_COMMON_CLK_GXBB)	 += gxbb.o gxbb-aoclk.o
diff --git a/drivers/clk/meson/clkc.h b/drivers/clk/meson/clkc.h
index 53326c3..9bb70e7 100644
--- a/drivers/clk/meson/clkc.h
+++ b/drivers/clk/meson/clkc.h
@@ -98,7 +98,7 @@
 };
 
 #define MESON_GATE(_name, _reg, _bit)					\
-struct clk_gate gxbb_##_name = { 						\
+struct clk_gate _name = { 						\
 	.reg = (void __iomem *) _reg, 					\
 	.bit_idx = (_bit), 						\
 	.lock = &clk_lock,						\
diff --git a/drivers/clk/meson/gxbb-aoclk.c b/drivers/clk/meson/gxbb-aoclk.c
new file mode 100644
index 0000000..b45c5fb
--- /dev/null
+++ b/drivers/clk/meson/gxbb-aoclk.c
@@ -0,0 +1,191 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (c) 2016 BayLibre, SAS.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016 BayLibre, SAS.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/reset-controller.h>
+#include <linux/init.h>
+#include <dt-bindings/clock/gxbb-aoclkc.h>
+#include <dt-bindings/reset/gxbb-aoclkc.h>
+
+static DEFINE_SPINLOCK(gxbb_aoclk_lock);
+
+struct gxbb_aoclk_reset_controller {
+	struct reset_controller_dev reset;
+	unsigned int *data;
+	void __iomem *base;
+};
+
+static int gxbb_aoclk_do_reset(struct reset_controller_dev *rcdev,
+			       unsigned long id)
+{
+	struct gxbb_aoclk_reset_controller *reset =
+		container_of(rcdev, struct gxbb_aoclk_reset_controller, reset);
+
+	writel(BIT(reset->data[id]), reset->base);
+
+	return 0;
+}
+
+static const struct reset_control_ops gxbb_aoclk_reset_ops = {
+	.reset = gxbb_aoclk_do_reset,
+};
+
+#define GXBB_AO_GATE(_name, _bit)					\
+static struct clk_gate _name##_ao = {					\
+	.reg = (void __iomem *)0,					\
+	.bit_idx = (_bit),						\
+	.lock = &gxbb_aoclk_lock,					\
+	.hw.init = &(struct clk_init_data) {				\
+		.name = #_name "_ao",					\
+		.ops = &clk_gate_ops,					\
+		.parent_names = (const char *[]){ "clk81" },		\
+		.num_parents = 1,					\
+		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),	\
+	},								\
+}
+
+GXBB_AO_GATE(remote, 0);
+GXBB_AO_GATE(i2c_master, 1);
+GXBB_AO_GATE(i2c_slave, 2);
+GXBB_AO_GATE(uart1, 3);
+GXBB_AO_GATE(uart2, 5);
+GXBB_AO_GATE(ir_blaster, 6);
+
+static unsigned int gxbb_aoclk_reset[] = {
+	[RESET_AO_REMOTE] = 16,
+	[RESET_AO_I2C_MASTER] = 18,
+	[RESET_AO_I2C_SLAVE] = 19,
+	[RESET_AO_UART1] = 17,
+	[RESET_AO_UART2] = 22,
+	[RESET_AO_IR_BLASTER] = 23,
+};
+
+static struct clk_gate *gxbb_aoclk_gate[] = {
+	[CLKID_AO_REMOTE] = &remote_ao,
+	[CLKID_AO_I2C_MASTER] = &i2c_master_ao,
+	[CLKID_AO_I2C_SLAVE] = &i2c_slave_ao,
+	[CLKID_AO_UART1] = &uart1_ao,
+	[CLKID_AO_UART2] = &uart2_ao,
+	[CLKID_AO_IR_BLASTER] = &ir_blaster_ao,
+};
+
+static struct clk_hw_onecell_data gxbb_aoclk_onecell_data = {
+	.hws = {
+		[CLKID_AO_REMOTE] = &remote_ao.hw,
+		[CLKID_AO_I2C_MASTER] = &i2c_master_ao.hw,
+		[CLKID_AO_I2C_SLAVE] = &i2c_slave_ao.hw,
+		[CLKID_AO_UART1] = &uart1_ao.hw,
+		[CLKID_AO_UART2] = &uart2_ao.hw,
+		[CLKID_AO_IR_BLASTER] = &ir_blaster_ao.hw,
+	},
+	.num = ARRAY_SIZE(gxbb_aoclk_gate),
+};
+
+static int gxbb_aoclkc_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	void __iomem *base;
+	int ret, clkid;
+	struct device *dev = &pdev->dev;
+	struct gxbb_aoclk_reset_controller *rstc;
+
+	rstc = devm_kzalloc(dev, sizeof(*rstc), GFP_KERNEL);
+	if (!rstc)
+		return -ENOMEM;
+
+	/* Generic clocks */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	/* Reset Controller */
+	rstc->base = base;
+	rstc->data = gxbb_aoclk_reset;
+	rstc->reset.ops = &gxbb_aoclk_reset_ops;
+	rstc->reset.nr_resets = ARRAY_SIZE(gxbb_aoclk_reset);
+	rstc->reset.of_node = dev->of_node;
+	ret = devm_reset_controller_register(dev, &rstc->reset);
+
+	/*
+	 * Populate base address and register all clks
+	 */
+	for (clkid = 0; clkid < gxbb_aoclk_onecell_data.num; clkid++) {
+		gxbb_aoclk_gate[clkid]->reg = base;
+
+		ret = devm_clk_hw_register(dev,
+					gxbb_aoclk_onecell_data.hws[clkid]);
+		if (ret)
+			return ret;
+	}
+
+	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
+			&gxbb_aoclk_onecell_data);
+}
+
+static const struct of_device_id gxbb_aoclkc_match_table[] = {
+	{ .compatible = "amlogic,gxbb-aoclkc" },
+	{ }
+};
+
+static struct platform_driver gxbb_aoclkc_driver = {
+	.probe		= gxbb_aoclkc_probe,
+	.driver		= {
+		.name	= "gxbb-aoclkc",
+		.of_match_table = gxbb_aoclkc_match_table,
+	},
+};
+builtin_platform_driver(gxbb_aoclkc_driver);
diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c
index a4c6684b..9d9af44 100644
--- a/drivers/clk/meson/gxbb.c
+++ b/drivers/clk/meson/gxbb.c
@@ -565,90 +565,93 @@
 };
 
 /* Everything Else (EE) domain gates */
-static MESON_GATE(ddr, HHI_GCLK_MPEG0, 0);
-static MESON_GATE(dos, HHI_GCLK_MPEG0, 1);
-static MESON_GATE(isa, HHI_GCLK_MPEG0, 5);
-static MESON_GATE(pl301, HHI_GCLK_MPEG0, 6);
-static MESON_GATE(periphs, HHI_GCLK_MPEG0, 7);
-static MESON_GATE(spicc, HHI_GCLK_MPEG0, 8);
-static MESON_GATE(i2c, HHI_GCLK_MPEG0, 9);
-static MESON_GATE(sar_adc, HHI_GCLK_MPEG0, 10);
-static MESON_GATE(smart_card, HHI_GCLK_MPEG0, 11);
-static MESON_GATE(rng0, HHI_GCLK_MPEG0, 12);
-static MESON_GATE(uart0, HHI_GCLK_MPEG0, 13);
-static MESON_GATE(sdhc, HHI_GCLK_MPEG0, 14);
-static MESON_GATE(stream, HHI_GCLK_MPEG0, 15);
-static MESON_GATE(async_fifo, HHI_GCLK_MPEG0, 16);
-static MESON_GATE(sdio, HHI_GCLK_MPEG0, 17);
-static MESON_GATE(abuf, HHI_GCLK_MPEG0, 18);
-static MESON_GATE(hiu_iface, HHI_GCLK_MPEG0, 19);
-static MESON_GATE(assist_misc, HHI_GCLK_MPEG0, 23);
-static MESON_GATE(spi, HHI_GCLK_MPEG0, 30);
+static MESON_GATE(gxbb_ddr, HHI_GCLK_MPEG0, 0);
+static MESON_GATE(gxbb_dos, HHI_GCLK_MPEG0, 1);
+static MESON_GATE(gxbb_isa, HHI_GCLK_MPEG0, 5);
+static MESON_GATE(gxbb_pl301, HHI_GCLK_MPEG0, 6);
+static MESON_GATE(gxbb_periphs, HHI_GCLK_MPEG0, 7);
+static MESON_GATE(gxbb_spicc, HHI_GCLK_MPEG0, 8);
+static MESON_GATE(gxbb_i2c, HHI_GCLK_MPEG0, 9);
+static MESON_GATE(gxbb_sar_adc, HHI_GCLK_MPEG0, 10);
+static MESON_GATE(gxbb_smart_card, HHI_GCLK_MPEG0, 11);
+static MESON_GATE(gxbb_rng0, HHI_GCLK_MPEG0, 12);
+static MESON_GATE(gxbb_uart0, HHI_GCLK_MPEG0, 13);
+static MESON_GATE(gxbb_sdhc, HHI_GCLK_MPEG0, 14);
+static MESON_GATE(gxbb_stream, HHI_GCLK_MPEG0, 15);
+static MESON_GATE(gxbb_async_fifo, HHI_GCLK_MPEG0, 16);
+static MESON_GATE(gxbb_sdio, HHI_GCLK_MPEG0, 17);
+static MESON_GATE(gxbb_abuf, HHI_GCLK_MPEG0, 18);
+static MESON_GATE(gxbb_hiu_iface, HHI_GCLK_MPEG0, 19);
+static MESON_GATE(gxbb_assist_misc, HHI_GCLK_MPEG0, 23);
+static MESON_GATE(gxbb_emmc_a, HHI_GCLK_MPEG0, 24);
+static MESON_GATE(gxbb_emmc_b, HHI_GCLK_MPEG0, 25);
+static MESON_GATE(gxbb_emmc_c, HHI_GCLK_MPEG0, 26);
+static MESON_GATE(gxbb_spi, HHI_GCLK_MPEG0, 30);
 
-static MESON_GATE(i2s_spdif, HHI_GCLK_MPEG1, 2);
-static MESON_GATE(eth, HHI_GCLK_MPEG1, 3);
-static MESON_GATE(demux, HHI_GCLK_MPEG1, 4);
-static MESON_GATE(aiu_glue, HHI_GCLK_MPEG1, 6);
-static MESON_GATE(iec958, HHI_GCLK_MPEG1, 7);
-static MESON_GATE(i2s_out, HHI_GCLK_MPEG1, 8);
-static MESON_GATE(amclk, HHI_GCLK_MPEG1, 9);
-static MESON_GATE(aififo2, HHI_GCLK_MPEG1, 10);
-static MESON_GATE(mixer, HHI_GCLK_MPEG1, 11);
-static MESON_GATE(mixer_iface, HHI_GCLK_MPEG1, 12);
-static MESON_GATE(adc, HHI_GCLK_MPEG1, 13);
-static MESON_GATE(blkmv, HHI_GCLK_MPEG1, 14);
-static MESON_GATE(aiu, HHI_GCLK_MPEG1, 15);
-static MESON_GATE(uart1, HHI_GCLK_MPEG1, 16);
-static MESON_GATE(g2d, HHI_GCLK_MPEG1, 20);
-static MESON_GATE(usb0, HHI_GCLK_MPEG1, 21);
-static MESON_GATE(usb1, HHI_GCLK_MPEG1, 22);
-static MESON_GATE(reset, HHI_GCLK_MPEG1, 23);
-static MESON_GATE(nand, HHI_GCLK_MPEG1, 24);
-static MESON_GATE(dos_parser, HHI_GCLK_MPEG1, 25);
-static MESON_GATE(usb, HHI_GCLK_MPEG1, 26);
-static MESON_GATE(vdin1, HHI_GCLK_MPEG1, 28);
-static MESON_GATE(ahb_arb0, HHI_GCLK_MPEG1, 29);
-static MESON_GATE(efuse, HHI_GCLK_MPEG1, 30);
-static MESON_GATE(boot_rom, HHI_GCLK_MPEG1, 31);
+static MESON_GATE(gxbb_i2s_spdif, HHI_GCLK_MPEG1, 2);
+static MESON_GATE(gxbb_eth, HHI_GCLK_MPEG1, 3);
+static MESON_GATE(gxbb_demux, HHI_GCLK_MPEG1, 4);
+static MESON_GATE(gxbb_aiu_glue, HHI_GCLK_MPEG1, 6);
+static MESON_GATE(gxbb_iec958, HHI_GCLK_MPEG1, 7);
+static MESON_GATE(gxbb_i2s_out, HHI_GCLK_MPEG1, 8);
+static MESON_GATE(gxbb_amclk, HHI_GCLK_MPEG1, 9);
+static MESON_GATE(gxbb_aififo2, HHI_GCLK_MPEG1, 10);
+static MESON_GATE(gxbb_mixer, HHI_GCLK_MPEG1, 11);
+static MESON_GATE(gxbb_mixer_iface, HHI_GCLK_MPEG1, 12);
+static MESON_GATE(gxbb_adc, HHI_GCLK_MPEG1, 13);
+static MESON_GATE(gxbb_blkmv, HHI_GCLK_MPEG1, 14);
+static MESON_GATE(gxbb_aiu, HHI_GCLK_MPEG1, 15);
+static MESON_GATE(gxbb_uart1, HHI_GCLK_MPEG1, 16);
+static MESON_GATE(gxbb_g2d, HHI_GCLK_MPEG1, 20);
+static MESON_GATE(gxbb_usb0, HHI_GCLK_MPEG1, 21);
+static MESON_GATE(gxbb_usb1, HHI_GCLK_MPEG1, 22);
+static MESON_GATE(gxbb_reset, HHI_GCLK_MPEG1, 23);
+static MESON_GATE(gxbb_nand, HHI_GCLK_MPEG1, 24);
+static MESON_GATE(gxbb_dos_parser, HHI_GCLK_MPEG1, 25);
+static MESON_GATE(gxbb_usb, HHI_GCLK_MPEG1, 26);
+static MESON_GATE(gxbb_vdin1, HHI_GCLK_MPEG1, 28);
+static MESON_GATE(gxbb_ahb_arb0, HHI_GCLK_MPEG1, 29);
+static MESON_GATE(gxbb_efuse, HHI_GCLK_MPEG1, 30);
+static MESON_GATE(gxbb_boot_rom, HHI_GCLK_MPEG1, 31);
 
-static MESON_GATE(ahb_data_bus, HHI_GCLK_MPEG2, 1);
-static MESON_GATE(ahb_ctrl_bus, HHI_GCLK_MPEG2, 2);
-static MESON_GATE(hdmi_intr_sync, HHI_GCLK_MPEG2, 3);
-static MESON_GATE(hdmi_pclk, HHI_GCLK_MPEG2, 4);
-static MESON_GATE(usb1_ddr_bridge, HHI_GCLK_MPEG2, 8);
-static MESON_GATE(usb0_ddr_bridge, HHI_GCLK_MPEG2, 9);
-static MESON_GATE(mmc_pclk, HHI_GCLK_MPEG2, 11);
-static MESON_GATE(dvin, HHI_GCLK_MPEG2, 12);
-static MESON_GATE(uart2, HHI_GCLK_MPEG2, 15);
-static MESON_GATE(sana, HHI_GCLK_MPEG2, 22);
-static MESON_GATE(vpu_intr, HHI_GCLK_MPEG2, 25);
-static MESON_GATE(sec_ahb_ahb3_bridge, HHI_GCLK_MPEG2, 26);
-static MESON_GATE(clk81_a53, HHI_GCLK_MPEG2, 29);
+static MESON_GATE(gxbb_ahb_data_bus, HHI_GCLK_MPEG2, 1);
+static MESON_GATE(gxbb_ahb_ctrl_bus, HHI_GCLK_MPEG2, 2);
+static MESON_GATE(gxbb_hdmi_intr_sync, HHI_GCLK_MPEG2, 3);
+static MESON_GATE(gxbb_hdmi_pclk, HHI_GCLK_MPEG2, 4);
+static MESON_GATE(gxbb_usb1_ddr_bridge, HHI_GCLK_MPEG2, 8);
+static MESON_GATE(gxbb_usb0_ddr_bridge, HHI_GCLK_MPEG2, 9);
+static MESON_GATE(gxbb_mmc_pclk, HHI_GCLK_MPEG2, 11);
+static MESON_GATE(gxbb_dvin, HHI_GCLK_MPEG2, 12);
+static MESON_GATE(gxbb_uart2, HHI_GCLK_MPEG2, 15);
+static MESON_GATE(gxbb_sana, HHI_GCLK_MPEG2, 22);
+static MESON_GATE(gxbb_vpu_intr, HHI_GCLK_MPEG2, 25);
+static MESON_GATE(gxbb_sec_ahb_ahb3_bridge, HHI_GCLK_MPEG2, 26);
+static MESON_GATE(gxbb_clk81_a53, HHI_GCLK_MPEG2, 29);
 
-static MESON_GATE(vclk2_venci0, HHI_GCLK_OTHER, 1);
-static MESON_GATE(vclk2_venci1, HHI_GCLK_OTHER, 2);
-static MESON_GATE(vclk2_vencp0, HHI_GCLK_OTHER, 3);
-static MESON_GATE(vclk2_vencp1, HHI_GCLK_OTHER, 4);
-static MESON_GATE(gclk_venci_int0, HHI_GCLK_OTHER, 8);
-static MESON_GATE(gclk_vencp_int, HHI_GCLK_OTHER, 9);
-static MESON_GATE(dac_clk, HHI_GCLK_OTHER, 10);
-static MESON_GATE(aoclk_gate, HHI_GCLK_OTHER, 14);
-static MESON_GATE(iec958_gate, HHI_GCLK_OTHER, 16);
-static MESON_GATE(enc480p, HHI_GCLK_OTHER, 20);
-static MESON_GATE(rng1, HHI_GCLK_OTHER, 21);
-static MESON_GATE(gclk_venci_int1, HHI_GCLK_OTHER, 22);
-static MESON_GATE(vclk2_venclmcc, HHI_GCLK_OTHER, 24);
-static MESON_GATE(vclk2_vencl, HHI_GCLK_OTHER, 25);
-static MESON_GATE(vclk_other, HHI_GCLK_OTHER, 26);
-static MESON_GATE(edp, HHI_GCLK_OTHER, 31);
+static MESON_GATE(gxbb_vclk2_venci0, HHI_GCLK_OTHER, 1);
+static MESON_GATE(gxbb_vclk2_venci1, HHI_GCLK_OTHER, 2);
+static MESON_GATE(gxbb_vclk2_vencp0, HHI_GCLK_OTHER, 3);
+static MESON_GATE(gxbb_vclk2_vencp1, HHI_GCLK_OTHER, 4);
+static MESON_GATE(gxbb_gclk_venci_int0, HHI_GCLK_OTHER, 8);
+static MESON_GATE(gxbb_gclk_vencp_int, HHI_GCLK_OTHER, 9);
+static MESON_GATE(gxbb_dac_clk, HHI_GCLK_OTHER, 10);
+static MESON_GATE(gxbb_aoclk_gate, HHI_GCLK_OTHER, 14);
+static MESON_GATE(gxbb_iec958_gate, HHI_GCLK_OTHER, 16);
+static MESON_GATE(gxbb_enc480p, HHI_GCLK_OTHER, 20);
+static MESON_GATE(gxbb_rng1, HHI_GCLK_OTHER, 21);
+static MESON_GATE(gxbb_gclk_venci_int1, HHI_GCLK_OTHER, 22);
+static MESON_GATE(gxbb_vclk2_venclmcc, HHI_GCLK_OTHER, 24);
+static MESON_GATE(gxbb_vclk2_vencl, HHI_GCLK_OTHER, 25);
+static MESON_GATE(gxbb_vclk_other, HHI_GCLK_OTHER, 26);
+static MESON_GATE(gxbb_edp, HHI_GCLK_OTHER, 31);
 
 /* Always On (AO) domain gates */
 
-static MESON_GATE(ao_media_cpu, HHI_GCLK_AO, 0);
-static MESON_GATE(ao_ahb_sram, HHI_GCLK_AO, 1);
-static MESON_GATE(ao_ahb_bus, HHI_GCLK_AO, 2);
-static MESON_GATE(ao_iface, HHI_GCLK_AO, 3);
-static MESON_GATE(ao_i2c, HHI_GCLK_AO, 4);
+static MESON_GATE(gxbb_ao_media_cpu, HHI_GCLK_AO, 0);
+static MESON_GATE(gxbb_ao_ahb_sram, HHI_GCLK_AO, 1);
+static MESON_GATE(gxbb_ao_ahb_bus, HHI_GCLK_AO, 2);
+static MESON_GATE(gxbb_ao_iface, HHI_GCLK_AO, 3);
+static MESON_GATE(gxbb_ao_i2c, HHI_GCLK_AO, 4);
 
 /* Array of all clocks provided by this provider */
 
@@ -748,6 +751,9 @@
 		[CLKID_AO_AHB_BUS]	    = &gxbb_ao_ahb_bus.hw,
 		[CLKID_AO_IFACE]	    = &gxbb_ao_iface.hw,
 		[CLKID_AO_I2C]		    = &gxbb_ao_i2c.hw,
+		[CLKID_SD_EMMC_A]	    = &gxbb_emmc_a.hw,
+		[CLKID_SD_EMMC_B]	    = &gxbb_emmc_b.hw,
+		[CLKID_SD_EMMC_C]	    = &gxbb_emmc_c.hw,
 	},
 	.num = NR_CLKS,
 };
@@ -847,6 +853,9 @@
 	&gxbb_ao_ahb_bus,
 	&gxbb_ao_iface,
 	&gxbb_ao_i2c,
+	&gxbb_emmc_a,
+	&gxbb_emmc_b,
+	&gxbb_emmc_c,
 };
 
 static int gxbb_clkc_probe(struct platform_device *pdev)
@@ -937,8 +946,4 @@
 	},
 };
 
-static int __init gxbb_clkc_init(void)
-{
-	return platform_driver_register(&gxbb_driver);
-}
-device_initcall(gxbb_clkc_init);
+builtin_platform_driver(gxbb_driver);
diff --git a/drivers/clk/meson/gxbb.h b/drivers/clk/meson/gxbb.h
index a2adf34..ae461b1 100644
--- a/drivers/clk/meson/gxbb.h
+++ b/drivers/clk/meson/gxbb.h
@@ -170,11 +170,11 @@
  */
 #define CLKID_SYS_PLL		  0
 /* CLKID_CPUCLK */
-#define CLKID_HDMI_PLL		  2
+/* CLKID_HDMI_PLL */
 #define CLKID_FIXED_PLL		  3
-#define CLKID_FCLK_DIV2		  4
-#define CLKID_FCLK_DIV3		  5
-#define CLKID_FCLK_DIV4		  6
+/* CLKID_FCLK_DIV2 */
+/* CLKID_FCLK_DIV3 */
+/* CLKID_FCLK_DIV4 */
 #define CLKID_FCLK_DIV5		  7
 #define CLKID_FCLK_DIV7		  8
 #define CLKID_GP0_PLL		  9
@@ -262,8 +262,11 @@
 #define CLKID_AO_AHB_BUS	  91
 #define CLKID_AO_IFACE		  92
 #define CLKID_AO_I2C		  93
+/* CLKID_SD_EMMC_A */
+/* CLKID_SD_EMMC_B */
+/* CLKID_SD_EMMC_C */
 
-#define NR_CLKS			  94
+#define NR_CLKS			  97
 
 /* include the CLKIDs that have been made part of the stable DT binding */
 #include <dt-bindings/clock/gxbb-clkc.h>
diff --git a/drivers/clk/meson/meson8b-clkc.c b/drivers/clk/meson/meson8b-clkc.c
deleted file mode 100644
index 4c9413c..0000000
--- a/drivers/clk/meson/meson8b-clkc.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * AmLogic S805 / Meson8b Clock Controller Driver
- *
- * Copyright (c) 2015 Endless Mobile, Inc.
- * Author: Carlo Caione <carlo@endlessm.com>
- *
- * Copyright (c) 2016 BayLibre, Inc.
- * Michael Turquette <mturquette@baylibre.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/clk.h>
-#include <linux/clk-provider.h>
-#include <linux/of_address.h>
-#include <dt-bindings/clock/meson8b-clkc.h>
-#include <linux/platform_device.h>
-#include <linux/init.h>
-
-#include "clkc.h"
-
-/*
- * Clock controller register offsets
- *
- * Register offsets from the HardKernel[0] data sheet are listed in comment
- * blocks below. Those offsets must be multiplied by 4 before adding them to
- * the base address to get the right value
- *
- * [0] http://dn.odroid.com/S805/Datasheet/S805_Datasheet%20V0.8%2020150126.pdf
- */
-#define MESON8B_REG_SYS_CPU_CNTL1	0x015c /* 0x57 offset in data sheet */
-#define MESON8B_REG_HHI_MPEG		0x0174 /* 0x5d offset in data sheet */
-#define MESON8B_REG_MALI		0x01b0 /* 0x6c offset in data sheet */
-#define MESON8B_REG_PLL_FIXED		0x0280
-#define MESON8B_REG_PLL_SYS		0x0300
-#define MESON8B_REG_PLL_VID		0x0320
-
-static DEFINE_SPINLOCK(clk_lock);
-
-static const struct pll_rate_table sys_pll_rate_table[] = {
-	PLL_RATE(312000000, 52, 1, 2),
-	PLL_RATE(336000000, 56, 1, 2),
-	PLL_RATE(360000000, 60, 1, 2),
-	PLL_RATE(384000000, 64, 1, 2),
-	PLL_RATE(408000000, 68, 1, 2),
-	PLL_RATE(432000000, 72, 1, 2),
-	PLL_RATE(456000000, 76, 1, 2),
-	PLL_RATE(480000000, 80, 1, 2),
-	PLL_RATE(504000000, 84, 1, 2),
-	PLL_RATE(528000000, 88, 1, 2),
-	PLL_RATE(552000000, 92, 1, 2),
-	PLL_RATE(576000000, 96, 1, 2),
-	PLL_RATE(600000000, 50, 1, 1),
-	PLL_RATE(624000000, 52, 1, 1),
-	PLL_RATE(648000000, 54, 1, 1),
-	PLL_RATE(672000000, 56, 1, 1),
-	PLL_RATE(696000000, 58, 1, 1),
-	PLL_RATE(720000000, 60, 1, 1),
-	PLL_RATE(744000000, 62, 1, 1),
-	PLL_RATE(768000000, 64, 1, 1),
-	PLL_RATE(792000000, 66, 1, 1),
-	PLL_RATE(816000000, 68, 1, 1),
-	PLL_RATE(840000000, 70, 1, 1),
-	PLL_RATE(864000000, 72, 1, 1),
-	PLL_RATE(888000000, 74, 1, 1),
-	PLL_RATE(912000000, 76, 1, 1),
-	PLL_RATE(936000000, 78, 1, 1),
-	PLL_RATE(960000000, 80, 1, 1),
-	PLL_RATE(984000000, 82, 1, 1),
-	PLL_RATE(1008000000, 84, 1, 1),
-	PLL_RATE(1032000000, 86, 1, 1),
-	PLL_RATE(1056000000, 88, 1, 1),
-	PLL_RATE(1080000000, 90, 1, 1),
-	PLL_RATE(1104000000, 92, 1, 1),
-	PLL_RATE(1128000000, 94, 1, 1),
-	PLL_RATE(1152000000, 96, 1, 1),
-	PLL_RATE(1176000000, 98, 1, 1),
-	PLL_RATE(1200000000, 50, 1, 0),
-	PLL_RATE(1224000000, 51, 1, 0),
-	PLL_RATE(1248000000, 52, 1, 0),
-	PLL_RATE(1272000000, 53, 1, 0),
-	PLL_RATE(1296000000, 54, 1, 0),
-	PLL_RATE(1320000000, 55, 1, 0),
-	PLL_RATE(1344000000, 56, 1, 0),
-	PLL_RATE(1368000000, 57, 1, 0),
-	PLL_RATE(1392000000, 58, 1, 0),
-	PLL_RATE(1416000000, 59, 1, 0),
-	PLL_RATE(1440000000, 60, 1, 0),
-	PLL_RATE(1464000000, 61, 1, 0),
-	PLL_RATE(1488000000, 62, 1, 0),
-	PLL_RATE(1512000000, 63, 1, 0),
-	PLL_RATE(1536000000, 64, 1, 0),
-	{ /* sentinel */ },
-};
-
-static const struct clk_div_table cpu_div_table[] = {
-	{ .val = 1, .div = 1 },
-	{ .val = 2, .div = 2 },
-	{ .val = 3, .div = 3 },
-	{ .val = 2, .div = 4 },
-	{ .val = 3, .div = 6 },
-	{ .val = 4, .div = 8 },
-	{ .val = 5, .div = 10 },
-	{ .val = 6, .div = 12 },
-	{ .val = 7, .div = 14 },
-	{ .val = 8, .div = 16 },
-	{ /* sentinel */ },
-};
-
-static struct clk_fixed_rate meson8b_xtal = {
-	.fixed_rate = 24000000,
-	.hw.init = &(struct clk_init_data){
-		.name = "xtal",
-		.num_parents = 0,
-		.ops = &clk_fixed_rate_ops,
-	},
-};
-
-static struct meson_clk_pll meson8b_fixed_pll = {
-	.m = {
-		.reg_off = MESON8B_REG_PLL_FIXED,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = MESON8B_REG_PLL_FIXED,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = MESON8B_REG_PLL_FIXED,
-		.shift   = 16,
-		.width   = 2,
-	},
-	.lock = &clk_lock,
-	.hw.init = &(struct clk_init_data){
-		.name = "fixed_pll",
-		.ops = &meson_clk_pll_ro_ops,
-		.parent_names = (const char *[]){ "xtal" },
-		.num_parents = 1,
-		.flags = CLK_GET_RATE_NOCACHE,
-	},
-};
-
-static struct meson_clk_pll meson8b_vid_pll = {
-	.m = {
-		.reg_off = MESON8B_REG_PLL_VID,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = MESON8B_REG_PLL_VID,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = MESON8B_REG_PLL_VID,
-		.shift   = 16,
-		.width   = 2,
-	},
-	.lock = &clk_lock,
-	.hw.init = &(struct clk_init_data){
-		.name = "vid_pll",
-		.ops = &meson_clk_pll_ro_ops,
-		.parent_names = (const char *[]){ "xtal" },
-		.num_parents = 1,
-		.flags = CLK_GET_RATE_NOCACHE,
-	},
-};
-
-static struct meson_clk_pll meson8b_sys_pll = {
-	.m = {
-		.reg_off = MESON8B_REG_PLL_SYS,
-		.shift   = 0,
-		.width   = 9,
-	},
-	.n = {
-		.reg_off = MESON8B_REG_PLL_SYS,
-		.shift   = 9,
-		.width   = 5,
-	},
-	.od = {
-		.reg_off = MESON8B_REG_PLL_SYS,
-		.shift   = 16,
-		.width   = 2,
-	},
-	.rate_table = sys_pll_rate_table,
-	.rate_count = ARRAY_SIZE(sys_pll_rate_table),
-	.lock = &clk_lock,
-	.hw.init = &(struct clk_init_data){
-		.name = "sys_pll",
-		.ops = &meson_clk_pll_ops,
-		.parent_names = (const char *[]){ "xtal" },
-		.num_parents = 1,
-		.flags = CLK_GET_RATE_NOCACHE,
-	},
-};
-
-static struct clk_fixed_factor meson8b_fclk_div2 = {
-	.mult = 1,
-	.div = 2,
-	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div2",
-		.ops = &clk_fixed_factor_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
-		.num_parents = 1,
-	},
-};
-
-static struct clk_fixed_factor meson8b_fclk_div3 = {
-	.mult = 1,
-	.div = 3,
-	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div3",
-		.ops = &clk_fixed_factor_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
-		.num_parents = 1,
-	},
-};
-
-static struct clk_fixed_factor meson8b_fclk_div4 = {
-	.mult = 1,
-	.div = 4,
-	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div4",
-		.ops = &clk_fixed_factor_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
-		.num_parents = 1,
-	},
-};
-
-static struct clk_fixed_factor meson8b_fclk_div5 = {
-	.mult = 1,
-	.div = 5,
-	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div5",
-		.ops = &clk_fixed_factor_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
-		.num_parents = 1,
-	},
-};
-
-static struct clk_fixed_factor meson8b_fclk_div7 = {
-	.mult = 1,
-	.div = 7,
-	.hw.init = &(struct clk_init_data){
-		.name = "fclk_div7",
-		.ops = &clk_fixed_factor_ops,
-		.parent_names = (const char *[]){ "fixed_pll" },
-		.num_parents = 1,
-	},
-};
-
-/*
- * FIXME cpu clocks and the legacy composite clocks (e.g. clk81) are both PLL
- * post-dividers and should be modeled with their respective PLLs via the
- * forthcoming coordinated clock rates feature
- */
-static struct meson_clk_cpu meson8b_cpu_clk = {
-	.reg_off = MESON8B_REG_SYS_CPU_CNTL1,
-	.div_table = cpu_div_table,
-	.clk_nb.notifier_call = meson_clk_cpu_notifier_cb,
-	.hw.init = &(struct clk_init_data){
-		.name = "cpu_clk",
-		.ops = &meson_clk_cpu_ops,
-		.parent_names = (const char *[]){ "sys_pll" },
-		.num_parents = 1,
-	},
-};
-
-static u32 mux_table_clk81[]	= { 6, 5, 7 };
-
-struct clk_mux meson8b_mpeg_clk_sel = {
-	.reg = (void *)MESON8B_REG_HHI_MPEG,
-	.mask = 0x7,
-	.shift = 12,
-	.flags = CLK_MUX_READ_ONLY,
-	.table = mux_table_clk81,
-	.lock = &clk_lock,
-	.hw.init = &(struct clk_init_data){
-		.name = "mpeg_clk_sel",
-		.ops = &clk_mux_ro_ops,
-		/*
-		 * FIXME bits 14:12 selects from 8 possible parents:
-		 * xtal, 1'b0 (wtf), fclk_div7, mpll_clkout1, mpll_clkout2,
-		 * fclk_div4, fclk_div3, fclk_div5
-		 */
-		.parent_names = (const char *[]){ "fclk_div3", "fclk_div4",
-			"fclk_div5" },
-		.num_parents = 3,
-		.flags = (CLK_SET_RATE_NO_REPARENT | CLK_IGNORE_UNUSED),
-	},
-};
-
-struct clk_divider meson8b_mpeg_clk_div = {
-	.reg = (void *)MESON8B_REG_HHI_MPEG,
-	.shift = 0,
-	.width = 7,
-	.lock = &clk_lock,
-	.hw.init = &(struct clk_init_data){
-		.name = "mpeg_clk_div",
-		.ops = &clk_divider_ops,
-		.parent_names = (const char *[]){ "mpeg_clk_sel" },
-		.num_parents = 1,
-		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),
-	},
-};
-
-struct clk_gate meson8b_clk81 = {
-	.reg = (void *)MESON8B_REG_HHI_MPEG,
-	.bit_idx = 7,
-	.lock = &clk_lock,
-	.hw.init = &(struct clk_init_data){
-		.name = "clk81",
-		.ops = &clk_gate_ops,
-		.parent_names = (const char *[]){ "mpeg_clk_div" },
-		.num_parents = 1,
-		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),
-	},
-};
-
-static struct clk_hw_onecell_data meson8b_hw_onecell_data = {
-	.hws = {
-		[CLKID_XTAL] = &meson8b_xtal.hw,
-		[CLKID_PLL_FIXED] = &meson8b_fixed_pll.hw,
-		[CLKID_PLL_VID] = &meson8b_vid_pll.hw,
-		[CLKID_PLL_SYS] = &meson8b_sys_pll.hw,
-		[CLKID_FCLK_DIV2] = &meson8b_fclk_div2.hw,
-		[CLKID_FCLK_DIV3] = &meson8b_fclk_div3.hw,
-		[CLKID_FCLK_DIV4] = &meson8b_fclk_div4.hw,
-		[CLKID_FCLK_DIV5] = &meson8b_fclk_div5.hw,
-		[CLKID_FCLK_DIV7] = &meson8b_fclk_div7.hw,
-		[CLKID_CPUCLK] = &meson8b_cpu_clk.hw,
-		[CLKID_MPEG_SEL] = &meson8b_mpeg_clk_sel.hw,
-		[CLKID_MPEG_DIV] = &meson8b_mpeg_clk_div.hw,
-		[CLKID_CLK81] = &meson8b_clk81.hw,
-	},
-	.num = CLK_NR_CLKS,
-};
-
-static struct meson_clk_pll *const meson8b_clk_plls[] = {
-	&meson8b_fixed_pll,
-	&meson8b_vid_pll,
-	&meson8b_sys_pll,
-};
-
-static int meson8b_clkc_probe(struct platform_device *pdev)
-{
-	void __iomem *clk_base;
-	int ret, clkid, i;
-	struct clk_hw *parent_hw;
-	struct clk *parent_clk;
-	struct device *dev = &pdev->dev;
-
-	/*  Generic clocks and PLLs */
-	clk_base = of_iomap(dev->of_node, 1);
-	if (!clk_base) {
-		pr_err("%s: Unable to map clk base\n", __func__);
-		return -ENXIO;
-	}
-
-	/* Populate base address for PLLs */
-	for (i = 0; i < ARRAY_SIZE(meson8b_clk_plls); i++)
-		meson8b_clk_plls[i]->base = clk_base;
-
-	/* Populate the base address for CPU clk */
-	meson8b_cpu_clk.base = clk_base;
-
-	/* Populate the base address for the MPEG clks */
-	meson8b_mpeg_clk_sel.reg = clk_base + (u32)meson8b_mpeg_clk_sel.reg;
-	meson8b_mpeg_clk_div.reg = clk_base + (u32)meson8b_mpeg_clk_div.reg;
-	meson8b_clk81.reg = clk_base + (u32)meson8b_clk81.reg;
-
-	/*
-	 * register all clks
-	 * CLKID_UNUSED = 0, so skip it and start with CLKID_XTAL = 1
-	 */
-	for (clkid = CLKID_XTAL; clkid < CLK_NR_CLKS; clkid++) {
-		/* array might be sparse */
-		if (!meson8b_hw_onecell_data.hws[clkid])
-			continue;
-
-		/* FIXME convert to devm_clk_register */
-		ret = devm_clk_hw_register(dev, meson8b_hw_onecell_data.hws[clkid]);
-		if (ret)
-			goto iounmap;
-	}
-
-	/*
-	 * Register CPU clk notifier
-	 *
-	 * FIXME this is wrong for a lot of reasons. First, the muxes should be
-	 * struct clk_hw objects. Second, we shouldn't program the muxes in
-	 * notifier handlers. The tricky programming sequence will be handled
-	 * by the forthcoming coordinated clock rates mechanism once that
-	 * feature is released.
-	 *
-	 * Furthermore, looking up the parent this way is terrible. At some
-	 * point we will stop allocating a default struct clk when registering
-	 * a new clk_hw, and this hack will no longer work. Releasing the ccr
-	 * feature before that time solves the problem :-)
-	 */
-	parent_hw = clk_hw_get_parent(&meson8b_cpu_clk.hw);
-	parent_clk = parent_hw->clk;
-	ret = clk_notifier_register(parent_clk, &meson8b_cpu_clk.clk_nb);
-	if (ret) {
-		pr_err("%s: failed to register clock notifier for cpu_clk\n",
-				__func__);
-		goto iounmap;
-	}
-
-	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
-			&meson8b_hw_onecell_data);
-
-iounmap:
-	iounmap(clk_base);
-	return ret;
-}
-
-static const struct of_device_id meson8b_clkc_match_table[] = {
-	{ .compatible = "amlogic,meson8b-clkc" },
-	{ }
-};
-
-static struct platform_driver meson8b_driver = {
-	.probe		= meson8b_clkc_probe,
-	.driver		= {
-		.name	= "meson8b-clkc",
-		.of_match_table = meson8b_clkc_match_table,
-	},
-};
-
-static int __init meson8b_clkc_init(void)
-{
-	return platform_driver_register(&meson8b_driver);
-}
-device_initcall(meson8b_clkc_init);
diff --git a/drivers/clk/meson/meson8b.c b/drivers/clk/meson/meson8b.c
new file mode 100644
index 0000000..3f1be46
--- /dev/null
+++ b/drivers/clk/meson/meson8b.c
@@ -0,0 +1,676 @@
+/*
+ * AmLogic S805 / Meson8b Clock Controller Driver
+ *
+ * Copyright (c) 2015 Endless Mobile, Inc.
+ * Author: Carlo Caione <carlo@endlessm.com>
+ *
+ * Copyright (c) 2016 BayLibre, Inc.
+ * Michael Turquette <mturquette@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+
+#include "clkc.h"
+#include "meson8b.h"
+
+static DEFINE_SPINLOCK(clk_lock);
+
+static const struct pll_rate_table sys_pll_rate_table[] = {
+	PLL_RATE(312000000, 52, 1, 2),
+	PLL_RATE(336000000, 56, 1, 2),
+	PLL_RATE(360000000, 60, 1, 2),
+	PLL_RATE(384000000, 64, 1, 2),
+	PLL_RATE(408000000, 68, 1, 2),
+	PLL_RATE(432000000, 72, 1, 2),
+	PLL_RATE(456000000, 76, 1, 2),
+	PLL_RATE(480000000, 80, 1, 2),
+	PLL_RATE(504000000, 84, 1, 2),
+	PLL_RATE(528000000, 88, 1, 2),
+	PLL_RATE(552000000, 92, 1, 2),
+	PLL_RATE(576000000, 96, 1, 2),
+	PLL_RATE(600000000, 50, 1, 1),
+	PLL_RATE(624000000, 52, 1, 1),
+	PLL_RATE(648000000, 54, 1, 1),
+	PLL_RATE(672000000, 56, 1, 1),
+	PLL_RATE(696000000, 58, 1, 1),
+	PLL_RATE(720000000, 60, 1, 1),
+	PLL_RATE(744000000, 62, 1, 1),
+	PLL_RATE(768000000, 64, 1, 1),
+	PLL_RATE(792000000, 66, 1, 1),
+	PLL_RATE(816000000, 68, 1, 1),
+	PLL_RATE(840000000, 70, 1, 1),
+	PLL_RATE(864000000, 72, 1, 1),
+	PLL_RATE(888000000, 74, 1, 1),
+	PLL_RATE(912000000, 76, 1, 1),
+	PLL_RATE(936000000, 78, 1, 1),
+	PLL_RATE(960000000, 80, 1, 1),
+	PLL_RATE(984000000, 82, 1, 1),
+	PLL_RATE(1008000000, 84, 1, 1),
+	PLL_RATE(1032000000, 86, 1, 1),
+	PLL_RATE(1056000000, 88, 1, 1),
+	PLL_RATE(1080000000, 90, 1, 1),
+	PLL_RATE(1104000000, 92, 1, 1),
+	PLL_RATE(1128000000, 94, 1, 1),
+	PLL_RATE(1152000000, 96, 1, 1),
+	PLL_RATE(1176000000, 98, 1, 1),
+	PLL_RATE(1200000000, 50, 1, 0),
+	PLL_RATE(1224000000, 51, 1, 0),
+	PLL_RATE(1248000000, 52, 1, 0),
+	PLL_RATE(1272000000, 53, 1, 0),
+	PLL_RATE(1296000000, 54, 1, 0),
+	PLL_RATE(1320000000, 55, 1, 0),
+	PLL_RATE(1344000000, 56, 1, 0),
+	PLL_RATE(1368000000, 57, 1, 0),
+	PLL_RATE(1392000000, 58, 1, 0),
+	PLL_RATE(1416000000, 59, 1, 0),
+	PLL_RATE(1440000000, 60, 1, 0),
+	PLL_RATE(1464000000, 61, 1, 0),
+	PLL_RATE(1488000000, 62, 1, 0),
+	PLL_RATE(1512000000, 63, 1, 0),
+	PLL_RATE(1536000000, 64, 1, 0),
+	{ /* sentinel */ },
+};
+
+static const struct clk_div_table cpu_div_table[] = {
+	{ .val = 1, .div = 1 },
+	{ .val = 2, .div = 2 },
+	{ .val = 3, .div = 3 },
+	{ .val = 2, .div = 4 },
+	{ .val = 3, .div = 6 },
+	{ .val = 4, .div = 8 },
+	{ .val = 5, .div = 10 },
+	{ .val = 6, .div = 12 },
+	{ .val = 7, .div = 14 },
+	{ .val = 8, .div = 16 },
+	{ /* sentinel */ },
+};
+
+static struct clk_fixed_rate meson8b_xtal = {
+	.fixed_rate = 24000000,
+	.hw.init = &(struct clk_init_data){
+		.name = "xtal",
+		.num_parents = 0,
+		.ops = &clk_fixed_rate_ops,
+	},
+};
+
+static struct meson_clk_pll meson8b_fixed_pll = {
+	.m = {
+		.reg_off = HHI_MPLL_CNTL,
+		.shift   = 0,
+		.width   = 9,
+	},
+	.n = {
+		.reg_off = HHI_MPLL_CNTL,
+		.shift   = 9,
+		.width   = 5,
+	},
+	.od = {
+		.reg_off = HHI_MPLL_CNTL,
+		.shift   = 16,
+		.width   = 2,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "fixed_pll",
+		.ops = &meson_clk_pll_ro_ops,
+		.parent_names = (const char *[]){ "xtal" },
+		.num_parents = 1,
+		.flags = CLK_GET_RATE_NOCACHE,
+	},
+};
+
+static struct meson_clk_pll meson8b_vid_pll = {
+	.m = {
+		.reg_off = HHI_VID_PLL_CNTL,
+		.shift   = 0,
+		.width   = 9,
+	},
+	.n = {
+		.reg_off = HHI_VID_PLL_CNTL,
+		.shift   = 9,
+		.width   = 5,
+	},
+	.od = {
+		.reg_off = HHI_VID_PLL_CNTL,
+		.shift   = 16,
+		.width   = 2,
+	},
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "vid_pll",
+		.ops = &meson_clk_pll_ro_ops,
+		.parent_names = (const char *[]){ "xtal" },
+		.num_parents = 1,
+		.flags = CLK_GET_RATE_NOCACHE,
+	},
+};
+
+static struct meson_clk_pll meson8b_sys_pll = {
+	.m = {
+		.reg_off = HHI_SYS_PLL_CNTL,
+		.shift   = 0,
+		.width   = 9,
+	},
+	.n = {
+		.reg_off = HHI_SYS_PLL_CNTL,
+		.shift   = 9,
+		.width   = 5,
+	},
+	.od = {
+		.reg_off = HHI_SYS_PLL_CNTL,
+		.shift   = 16,
+		.width   = 2,
+	},
+	.rate_table = sys_pll_rate_table,
+	.rate_count = ARRAY_SIZE(sys_pll_rate_table),
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "sys_pll",
+		.ops = &meson_clk_pll_ops,
+		.parent_names = (const char *[]){ "xtal" },
+		.num_parents = 1,
+		.flags = CLK_GET_RATE_NOCACHE,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div2 = {
+	.mult = 1,
+	.div = 2,
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div2",
+		.ops = &clk_fixed_factor_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div3 = {
+	.mult = 1,
+	.div = 3,
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div3",
+		.ops = &clk_fixed_factor_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div4 = {
+	.mult = 1,
+	.div = 4,
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div4",
+		.ops = &clk_fixed_factor_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div5 = {
+	.mult = 1,
+	.div = 5,
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div5",
+		.ops = &clk_fixed_factor_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+static struct clk_fixed_factor meson8b_fclk_div7 = {
+	.mult = 1,
+	.div = 7,
+	.hw.init = &(struct clk_init_data){
+		.name = "fclk_div7",
+		.ops = &clk_fixed_factor_ops,
+		.parent_names = (const char *[]){ "fixed_pll" },
+		.num_parents = 1,
+	},
+};
+
+/*
+ * FIXME cpu clocks and the legacy composite clocks (e.g. clk81) are both PLL
+ * post-dividers and should be modeled with their respective PLLs via the
+ * forthcoming coordinated clock rates feature
+ */
+static struct meson_clk_cpu meson8b_cpu_clk = {
+	.reg_off = HHI_SYS_CPU_CLK_CNTL1,
+	.div_table = cpu_div_table,
+	.clk_nb.notifier_call = meson_clk_cpu_notifier_cb,
+	.hw.init = &(struct clk_init_data){
+		.name = "cpu_clk",
+		.ops = &meson_clk_cpu_ops,
+		.parent_names = (const char *[]){ "sys_pll" },
+		.num_parents = 1,
+	},
+};
+
+static u32 mux_table_clk81[]	= { 6, 5, 7 };
+
+struct clk_mux meson8b_mpeg_clk_sel = {
+	.reg = (void *)HHI_MPEG_CLK_CNTL,
+	.mask = 0x7,
+	.shift = 12,
+	.flags = CLK_MUX_READ_ONLY,
+	.table = mux_table_clk81,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpeg_clk_sel",
+		.ops = &clk_mux_ro_ops,
+		/*
+		 * FIXME bits 14:12 selects from 8 possible parents:
+		 * xtal, 1'b0 (wtf), fclk_div7, mpll_clkout1, mpll_clkout2,
+		 * fclk_div4, fclk_div3, fclk_div5
+		 */
+		.parent_names = (const char *[]){ "fclk_div3", "fclk_div4",
+			"fclk_div5" },
+		.num_parents = 3,
+		.flags = (CLK_SET_RATE_NO_REPARENT | CLK_IGNORE_UNUSED),
+	},
+};
+
+struct clk_divider meson8b_mpeg_clk_div = {
+	.reg = (void *)HHI_MPEG_CLK_CNTL,
+	.shift = 0,
+	.width = 7,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "mpeg_clk_div",
+		.ops = &clk_divider_ops,
+		.parent_names = (const char *[]){ "mpeg_clk_sel" },
+		.num_parents = 1,
+		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),
+	},
+};
+
+struct clk_gate meson8b_clk81 = {
+	.reg = (void *)HHI_MPEG_CLK_CNTL,
+	.bit_idx = 7,
+	.lock = &clk_lock,
+	.hw.init = &(struct clk_init_data){
+		.name = "clk81",
+		.ops = &clk_gate_ops,
+		.parent_names = (const char *[]){ "mpeg_clk_div" },
+		.num_parents = 1,
+		.flags = (CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED),
+	},
+};
+
+/* Everything Else (EE) domain gates */
+
+static MESON_GATE(meson8b_ddr, HHI_GCLK_MPEG0, 0);
+static MESON_GATE(meson8b_dos, HHI_GCLK_MPEG0, 1);
+static MESON_GATE(meson8b_isa, HHI_GCLK_MPEG0, 5);
+static MESON_GATE(meson8b_pl301, HHI_GCLK_MPEG0, 6);
+static MESON_GATE(meson8b_periphs, HHI_GCLK_MPEG0, 7);
+static MESON_GATE(meson8b_spicc, HHI_GCLK_MPEG0, 8);
+static MESON_GATE(meson8b_i2c, HHI_GCLK_MPEG0, 9);
+static MESON_GATE(meson8b_sar_adc, HHI_GCLK_MPEG0, 10);
+static MESON_GATE(meson8b_smart_card, HHI_GCLK_MPEG0, 11);
+static MESON_GATE(meson8b_rng0, HHI_GCLK_MPEG0, 12);
+static MESON_GATE(meson8b_uart0, HHI_GCLK_MPEG0, 13);
+static MESON_GATE(meson8b_sdhc, HHI_GCLK_MPEG0, 14);
+static MESON_GATE(meson8b_stream, HHI_GCLK_MPEG0, 15);
+static MESON_GATE(meson8b_async_fifo, HHI_GCLK_MPEG0, 16);
+static MESON_GATE(meson8b_sdio, HHI_GCLK_MPEG0, 17);
+static MESON_GATE(meson8b_abuf, HHI_GCLK_MPEG0, 18);
+static MESON_GATE(meson8b_hiu_iface, HHI_GCLK_MPEG0, 19);
+static MESON_GATE(meson8b_assist_misc, HHI_GCLK_MPEG0, 23);
+static MESON_GATE(meson8b_spi, HHI_GCLK_MPEG0, 30);
+
+static MESON_GATE(meson8b_i2s_spdif, HHI_GCLK_MPEG1, 2);
+static MESON_GATE(meson8b_eth, HHI_GCLK_MPEG1, 3);
+static MESON_GATE(meson8b_demux, HHI_GCLK_MPEG1, 4);
+static MESON_GATE(meson8b_aiu_glue, HHI_GCLK_MPEG1, 6);
+static MESON_GATE(meson8b_iec958, HHI_GCLK_MPEG1, 7);
+static MESON_GATE(meson8b_i2s_out, HHI_GCLK_MPEG1, 8);
+static MESON_GATE(meson8b_amclk, HHI_GCLK_MPEG1, 9);
+static MESON_GATE(meson8b_aififo2, HHI_GCLK_MPEG1, 10);
+static MESON_GATE(meson8b_mixer, HHI_GCLK_MPEG1, 11);
+static MESON_GATE(meson8b_mixer_iface, HHI_GCLK_MPEG1, 12);
+static MESON_GATE(meson8b_adc, HHI_GCLK_MPEG1, 13);
+static MESON_GATE(meson8b_blkmv, HHI_GCLK_MPEG1, 14);
+static MESON_GATE(meson8b_aiu, HHI_GCLK_MPEG1, 15);
+static MESON_GATE(meson8b_uart1, HHI_GCLK_MPEG1, 16);
+static MESON_GATE(meson8b_g2d, HHI_GCLK_MPEG1, 20);
+static MESON_GATE(meson8b_usb0, HHI_GCLK_MPEG1, 21);
+static MESON_GATE(meson8b_usb1, HHI_GCLK_MPEG1, 22);
+static MESON_GATE(meson8b_reset, HHI_GCLK_MPEG1, 23);
+static MESON_GATE(meson8b_nand, HHI_GCLK_MPEG1, 24);
+static MESON_GATE(meson8b_dos_parser, HHI_GCLK_MPEG1, 25);
+static MESON_GATE(meson8b_usb, HHI_GCLK_MPEG1, 26);
+static MESON_GATE(meson8b_vdin1, HHI_GCLK_MPEG1, 28);
+static MESON_GATE(meson8b_ahb_arb0, HHI_GCLK_MPEG1, 29);
+static MESON_GATE(meson8b_efuse, HHI_GCLK_MPEG1, 30);
+static MESON_GATE(meson8b_boot_rom, HHI_GCLK_MPEG1, 31);
+
+static MESON_GATE(meson8b_ahb_data_bus, HHI_GCLK_MPEG2, 1);
+static MESON_GATE(meson8b_ahb_ctrl_bus, HHI_GCLK_MPEG2, 2);
+static MESON_GATE(meson8b_hdmi_intr_sync, HHI_GCLK_MPEG2, 3);
+static MESON_GATE(meson8b_hdmi_pclk, HHI_GCLK_MPEG2, 4);
+static MESON_GATE(meson8b_usb1_ddr_bridge, HHI_GCLK_MPEG2, 8);
+static MESON_GATE(meson8b_usb0_ddr_bridge, HHI_GCLK_MPEG2, 9);
+static MESON_GATE(meson8b_mmc_pclk, HHI_GCLK_MPEG2, 11);
+static MESON_GATE(meson8b_dvin, HHI_GCLK_MPEG2, 12);
+static MESON_GATE(meson8b_uart2, HHI_GCLK_MPEG2, 15);
+static MESON_GATE(meson8b_sana, HHI_GCLK_MPEG2, 22);
+static MESON_GATE(meson8b_vpu_intr, HHI_GCLK_MPEG2, 25);
+static MESON_GATE(meson8b_sec_ahb_ahb3_bridge, HHI_GCLK_MPEG2, 26);
+static MESON_GATE(meson8b_clk81_a9, HHI_GCLK_MPEG2, 29);
+
+static MESON_GATE(meson8b_vclk2_venci0, HHI_GCLK_OTHER, 1);
+static MESON_GATE(meson8b_vclk2_venci1, HHI_GCLK_OTHER, 2);
+static MESON_GATE(meson8b_vclk2_vencp0, HHI_GCLK_OTHER, 3);
+static MESON_GATE(meson8b_vclk2_vencp1, HHI_GCLK_OTHER, 4);
+static MESON_GATE(meson8b_gclk_venci_int, HHI_GCLK_OTHER, 8);
+static MESON_GATE(meson8b_gclk_vencp_int, HHI_GCLK_OTHER, 9);
+static MESON_GATE(meson8b_dac_clk, HHI_GCLK_OTHER, 10);
+static MESON_GATE(meson8b_aoclk_gate, HHI_GCLK_OTHER, 14);
+static MESON_GATE(meson8b_iec958_gate, HHI_GCLK_OTHER, 16);
+static MESON_GATE(meson8b_enc480p, HHI_GCLK_OTHER, 20);
+static MESON_GATE(meson8b_rng1, HHI_GCLK_OTHER, 21);
+static MESON_GATE(meson8b_gclk_vencl_int, HHI_GCLK_OTHER, 22);
+static MESON_GATE(meson8b_vclk2_venclmcc, HHI_GCLK_OTHER, 24);
+static MESON_GATE(meson8b_vclk2_vencl, HHI_GCLK_OTHER, 25);
+static MESON_GATE(meson8b_vclk2_other, HHI_GCLK_OTHER, 26);
+static MESON_GATE(meson8b_edp, HHI_GCLK_OTHER, 31);
+
+/* Always On (AO) domain gates */
+
+static MESON_GATE(meson8b_ao_media_cpu, HHI_GCLK_AO, 0);
+static MESON_GATE(meson8b_ao_ahb_sram, HHI_GCLK_AO, 1);
+static MESON_GATE(meson8b_ao_ahb_bus, HHI_GCLK_AO, 2);
+static MESON_GATE(meson8b_ao_iface, HHI_GCLK_AO, 3);
+
+static struct clk_hw_onecell_data meson8b_hw_onecell_data = {
+	.hws = {
+		[CLKID_XTAL] = &meson8b_xtal.hw,
+		[CLKID_PLL_FIXED] = &meson8b_fixed_pll.hw,
+		[CLKID_PLL_VID] = &meson8b_vid_pll.hw,
+		[CLKID_PLL_SYS] = &meson8b_sys_pll.hw,
+		[CLKID_FCLK_DIV2] = &meson8b_fclk_div2.hw,
+		[CLKID_FCLK_DIV3] = &meson8b_fclk_div3.hw,
+		[CLKID_FCLK_DIV4] = &meson8b_fclk_div4.hw,
+		[CLKID_FCLK_DIV5] = &meson8b_fclk_div5.hw,
+		[CLKID_FCLK_DIV7] = &meson8b_fclk_div7.hw,
+		[CLKID_CPUCLK] = &meson8b_cpu_clk.hw,
+		[CLKID_MPEG_SEL] = &meson8b_mpeg_clk_sel.hw,
+		[CLKID_MPEG_DIV] = &meson8b_mpeg_clk_div.hw,
+		[CLKID_CLK81] = &meson8b_clk81.hw,
+		[CLKID_DDR]		    = &meson8b_ddr.hw,
+		[CLKID_DOS]		    = &meson8b_dos.hw,
+		[CLKID_ISA]		    = &meson8b_isa.hw,
+		[CLKID_PL301]		    = &meson8b_pl301.hw,
+		[CLKID_PERIPHS]		    = &meson8b_periphs.hw,
+		[CLKID_SPICC]		    = &meson8b_spicc.hw,
+		[CLKID_I2C]		    = &meson8b_i2c.hw,
+		[CLKID_SAR_ADC]		    = &meson8b_sar_adc.hw,
+		[CLKID_SMART_CARD]	    = &meson8b_smart_card.hw,
+		[CLKID_RNG0]		    = &meson8b_rng0.hw,
+		[CLKID_UART0]		    = &meson8b_uart0.hw,
+		[CLKID_SDHC]		    = &meson8b_sdhc.hw,
+		[CLKID_STREAM]		    = &meson8b_stream.hw,
+		[CLKID_ASYNC_FIFO]	    = &meson8b_async_fifo.hw,
+		[CLKID_SDIO]		    = &meson8b_sdio.hw,
+		[CLKID_ABUF]		    = &meson8b_abuf.hw,
+		[CLKID_HIU_IFACE]	    = &meson8b_hiu_iface.hw,
+		[CLKID_ASSIST_MISC]	    = &meson8b_assist_misc.hw,
+		[CLKID_SPI]		    = &meson8b_spi.hw,
+		[CLKID_I2S_SPDIF]	    = &meson8b_i2s_spdif.hw,
+		[CLKID_ETH]		    = &meson8b_eth.hw,
+		[CLKID_DEMUX]		    = &meson8b_demux.hw,
+		[CLKID_AIU_GLUE]	    = &meson8b_aiu_glue.hw,
+		[CLKID_IEC958]		    = &meson8b_iec958.hw,
+		[CLKID_I2S_OUT]		    = &meson8b_i2s_out.hw,
+		[CLKID_AMCLK]		    = &meson8b_amclk.hw,
+		[CLKID_AIFIFO2]		    = &meson8b_aififo2.hw,
+		[CLKID_MIXER]		    = &meson8b_mixer.hw,
+		[CLKID_MIXER_IFACE]	    = &meson8b_mixer_iface.hw,
+		[CLKID_ADC]		    = &meson8b_adc.hw,
+		[CLKID_BLKMV]		    = &meson8b_blkmv.hw,
+		[CLKID_AIU]		    = &meson8b_aiu.hw,
+		[CLKID_UART1]		    = &meson8b_uart1.hw,
+		[CLKID_G2D]		    = &meson8b_g2d.hw,
+		[CLKID_USB0]		    = &meson8b_usb0.hw,
+		[CLKID_USB1]		    = &meson8b_usb1.hw,
+		[CLKID_RESET]		    = &meson8b_reset.hw,
+		[CLKID_NAND]		    = &meson8b_nand.hw,
+		[CLKID_DOS_PARSER]	    = &meson8b_dos_parser.hw,
+		[CLKID_USB]		    = &meson8b_usb.hw,
+		[CLKID_VDIN1]		    = &meson8b_vdin1.hw,
+		[CLKID_AHB_ARB0]	    = &meson8b_ahb_arb0.hw,
+		[CLKID_EFUSE]		    = &meson8b_efuse.hw,
+		[CLKID_BOOT_ROM]	    = &meson8b_boot_rom.hw,
+		[CLKID_AHB_DATA_BUS]	    = &meson8b_ahb_data_bus.hw,
+		[CLKID_AHB_CTRL_BUS]	    = &meson8b_ahb_ctrl_bus.hw,
+		[CLKID_HDMI_INTR_SYNC]	    = &meson8b_hdmi_intr_sync.hw,
+		[CLKID_HDMI_PCLK]	    = &meson8b_hdmi_pclk.hw,
+		[CLKID_USB1_DDR_BRIDGE]	    = &meson8b_usb1_ddr_bridge.hw,
+		[CLKID_USB0_DDR_BRIDGE]	    = &meson8b_usb0_ddr_bridge.hw,
+		[CLKID_MMC_PCLK]	    = &meson8b_mmc_pclk.hw,
+		[CLKID_DVIN]		    = &meson8b_dvin.hw,
+		[CLKID_UART2]		    = &meson8b_uart2.hw,
+		[CLKID_SANA]		    = &meson8b_sana.hw,
+		[CLKID_VPU_INTR]	    = &meson8b_vpu_intr.hw,
+		[CLKID_SEC_AHB_AHB3_BRIDGE] = &meson8b_sec_ahb_ahb3_bridge.hw,
+		[CLKID_CLK81_A9]	    = &meson8b_clk81_a9.hw,
+		[CLKID_VCLK2_VENCI0]	    = &meson8b_vclk2_venci0.hw,
+		[CLKID_VCLK2_VENCI1]	    = &meson8b_vclk2_venci1.hw,
+		[CLKID_VCLK2_VENCP0]	    = &meson8b_vclk2_vencp0.hw,
+		[CLKID_VCLK2_VENCP1]	    = &meson8b_vclk2_vencp1.hw,
+		[CLKID_GCLK_VENCI_INT]	    = &meson8b_gclk_venci_int.hw,
+		[CLKID_GCLK_VENCP_INT]	    = &meson8b_gclk_vencp_int.hw,
+		[CLKID_DAC_CLK]		    = &meson8b_dac_clk.hw,
+		[CLKID_AOCLK_GATE]	    = &meson8b_aoclk_gate.hw,
+		[CLKID_IEC958_GATE]	    = &meson8b_iec958_gate.hw,
+		[CLKID_ENC480P]		    = &meson8b_enc480p.hw,
+		[CLKID_RNG1]		    = &meson8b_rng1.hw,
+		[CLKID_GCLK_VENCL_INT]	    = &meson8b_gclk_vencl_int.hw,
+		[CLKID_VCLK2_VENCLMCC]	    = &meson8b_vclk2_venclmcc.hw,
+		[CLKID_VCLK2_VENCL]	    = &meson8b_vclk2_vencl.hw,
+		[CLKID_VCLK2_OTHER]	    = &meson8b_vclk2_other.hw,
+		[CLKID_EDP]		    = &meson8b_edp.hw,
+		[CLKID_AO_MEDIA_CPU]	    = &meson8b_ao_media_cpu.hw,
+		[CLKID_AO_AHB_SRAM]	    = &meson8b_ao_ahb_sram.hw,
+		[CLKID_AO_AHB_BUS]	    = &meson8b_ao_ahb_bus.hw,
+		[CLKID_AO_IFACE]	    = &meson8b_ao_iface.hw,
+	},
+	.num = CLK_NR_CLKS,
+};
+
+static struct meson_clk_pll *const meson8b_clk_plls[] = {
+	&meson8b_fixed_pll,
+	&meson8b_vid_pll,
+	&meson8b_sys_pll,
+};
+
+static struct clk_gate *meson8b_clk_gates[] = {
+	&meson8b_clk81,
+	&meson8b_ddr,
+	&meson8b_dos,
+	&meson8b_isa,
+	&meson8b_pl301,
+	&meson8b_periphs,
+	&meson8b_spicc,
+	&meson8b_i2c,
+	&meson8b_sar_adc,
+	&meson8b_smart_card,
+	&meson8b_rng0,
+	&meson8b_uart0,
+	&meson8b_sdhc,
+	&meson8b_stream,
+	&meson8b_async_fifo,
+	&meson8b_sdio,
+	&meson8b_abuf,
+	&meson8b_hiu_iface,
+	&meson8b_assist_misc,
+	&meson8b_spi,
+	&meson8b_i2s_spdif,
+	&meson8b_eth,
+	&meson8b_demux,
+	&meson8b_aiu_glue,
+	&meson8b_iec958,
+	&meson8b_i2s_out,
+	&meson8b_amclk,
+	&meson8b_aififo2,
+	&meson8b_mixer,
+	&meson8b_mixer_iface,
+	&meson8b_adc,
+	&meson8b_blkmv,
+	&meson8b_aiu,
+	&meson8b_uart1,
+	&meson8b_g2d,
+	&meson8b_usb0,
+	&meson8b_usb1,
+	&meson8b_reset,
+	&meson8b_nand,
+	&meson8b_dos_parser,
+	&meson8b_usb,
+	&meson8b_vdin1,
+	&meson8b_ahb_arb0,
+	&meson8b_efuse,
+	&meson8b_boot_rom,
+	&meson8b_ahb_data_bus,
+	&meson8b_ahb_ctrl_bus,
+	&meson8b_hdmi_intr_sync,
+	&meson8b_hdmi_pclk,
+	&meson8b_usb1_ddr_bridge,
+	&meson8b_usb0_ddr_bridge,
+	&meson8b_mmc_pclk,
+	&meson8b_dvin,
+	&meson8b_uart2,
+	&meson8b_sana,
+	&meson8b_vpu_intr,
+	&meson8b_sec_ahb_ahb3_bridge,
+	&meson8b_clk81_a9,
+	&meson8b_vclk2_venci0,
+	&meson8b_vclk2_venci1,
+	&meson8b_vclk2_vencp0,
+	&meson8b_vclk2_vencp1,
+	&meson8b_gclk_venci_int,
+	&meson8b_gclk_vencp_int,
+	&meson8b_dac_clk,
+	&meson8b_aoclk_gate,
+	&meson8b_iec958_gate,
+	&meson8b_enc480p,
+	&meson8b_rng1,
+	&meson8b_gclk_vencl_int,
+	&meson8b_vclk2_venclmcc,
+	&meson8b_vclk2_vencl,
+	&meson8b_vclk2_other,
+	&meson8b_edp,
+	&meson8b_ao_media_cpu,
+	&meson8b_ao_ahb_sram,
+	&meson8b_ao_ahb_bus,
+	&meson8b_ao_iface,
+};
+
+static int meson8b_clkc_probe(struct platform_device *pdev)
+{
+	void __iomem *clk_base;
+	int ret, clkid, i;
+	struct clk_hw *parent_hw;
+	struct clk *parent_clk;
+	struct device *dev = &pdev->dev;
+
+	/*  Generic clocks and PLLs */
+	clk_base = of_iomap(dev->of_node, 1);
+	if (!clk_base) {
+		pr_err("%s: Unable to map clk base\n", __func__);
+		return -ENXIO;
+	}
+
+	/* Populate base address for PLLs */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_plls); i++)
+		meson8b_clk_plls[i]->base = clk_base;
+
+	/* Populate the base address for CPU clk */
+	meson8b_cpu_clk.base = clk_base;
+
+	/* Populate the base address for the MPEG clks */
+	meson8b_mpeg_clk_sel.reg = clk_base + (u32)meson8b_mpeg_clk_sel.reg;
+	meson8b_mpeg_clk_div.reg = clk_base + (u32)meson8b_mpeg_clk_div.reg;
+	meson8b_clk81.reg = clk_base + (u32)meson8b_clk81.reg;
+
+	/* Populate base address for gates */
+	for (i = 0; i < ARRAY_SIZE(meson8b_clk_gates); i++)
+		meson8b_clk_gates[i]->reg = clk_base +
+			(u32)meson8b_clk_gates[i]->reg;
+
+	/*
+	 * register all clks
+	 * CLKID_UNUSED = 0, so skip it and start with CLKID_XTAL = 1
+	 */
+	for (clkid = CLKID_XTAL; clkid < CLK_NR_CLKS; clkid++) {
+		/* array might be sparse */
+		if (!meson8b_hw_onecell_data.hws[clkid])
+			continue;
+
+		/* FIXME convert to devm_clk_register */
+		ret = devm_clk_hw_register(dev, meson8b_hw_onecell_data.hws[clkid]);
+		if (ret)
+			goto iounmap;
+	}
+
+	/*
+	 * Register CPU clk notifier
+	 *
+	 * FIXME this is wrong for a lot of reasons. First, the muxes should be
+	 * struct clk_hw objects. Second, we shouldn't program the muxes in
+	 * notifier handlers. The tricky programming sequence will be handled
+	 * by the forthcoming coordinated clock rates mechanism once that
+	 * feature is released.
+	 *
+	 * Furthermore, looking up the parent this way is terrible. At some
+	 * point we will stop allocating a default struct clk when registering
+	 * a new clk_hw, and this hack will no longer work. Releasing the ccr
+	 * feature before that time solves the problem :-)
+	 */
+	parent_hw = clk_hw_get_parent(&meson8b_cpu_clk.hw);
+	parent_clk = parent_hw->clk;
+	ret = clk_notifier_register(parent_clk, &meson8b_cpu_clk.clk_nb);
+	if (ret) {
+		pr_err("%s: failed to register clock notifier for cpu_clk\n",
+				__func__);
+		goto iounmap;
+	}
+
+	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
+			&meson8b_hw_onecell_data);
+
+iounmap:
+	iounmap(clk_base);
+	return ret;
+}
+
+static const struct of_device_id meson8b_clkc_match_table[] = {
+	{ .compatible = "amlogic,meson8b-clkc" },
+	{ }
+};
+
+static struct platform_driver meson8b_driver = {
+	.probe		= meson8b_clkc_probe,
+	.driver		= {
+		.name	= "meson8b-clkc",
+		.of_match_table = meson8b_clkc_match_table,
+	},
+};
+
+builtin_platform_driver(meson8b_driver);
diff --git a/drivers/clk/meson/meson8b.h b/drivers/clk/meson/meson8b.h
new file mode 100644
index 0000000..010e958
--- /dev/null
+++ b/drivers/clk/meson/meson8b.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2015 Endless Mobile, Inc.
+ * Author: Carlo Caione <carlo@endlessm.com>
+ *
+ * Copyright (c) 2016 BayLibre, Inc.
+ * Michael Turquette <mturquette@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MESON8B_H
+#define __MESON8B_H
+
+/*
+ * Clock controller register offsets
+ *
+ * Register offsets from the HardKernel[0] data sheet are listed in comment
+ * blocks below. Those offsets must be multiplied by 4 before adding them to
+ * the base address to get the right value
+ *
+ * [0] http://dn.odroid.com/S805/Datasheet/S805_Datasheet%20V0.8%2020150126.pdf
+ */
+#define HHI_GCLK_MPEG0			0x140 /* 0x50 offset in data sheet */
+#define HHI_GCLK_MPEG1			0x144 /* 0x51 offset in data sheet */
+#define HHI_GCLK_MPEG2			0x148 /* 0x52 offset in data sheet */
+#define HHI_GCLK_OTHER			0x150 /* 0x54 offset in data sheet */
+#define HHI_GCLK_AO			0x154 /* 0x55 offset in data sheet */
+#define HHI_SYS_CPU_CLK_CNTL1		0x15c /* 0x57 offset in data sheet */
+#define HHI_MPEG_CLK_CNTL		0x174 /* 0x5d offset in data sheet */
+#define HHI_MPLL_CNTL			0x280 /* 0xa0 offset in data sheet */
+#define HHI_SYS_PLL_CNTL		0x300 /* 0xc0 offset in data sheet */
+#define HHI_VID_PLL_CNTL		0x320 /* 0xc8 offset in data sheet */
+
+/*
+ * CLKID index values
+ *
+ * These indices are entirely contrived and do not map onto the hardware.
+ * Migrate them out of this header and into the DT header file when they need
+ * to be exposed to client nodes in DT: include/dt-bindings/clock/meson8b-clkc.h
+ */
+
+/* CLKID_UNUSED */
+/* CLKID_XTAL */
+/* CLKID_PLL_FIXED */
+/* CLKID_PLL_VID */
+/* CLKID_PLL_SYS */
+/* CLKID_FCLK_DIV2 */
+/* CLKID_FCLK_DIV3 */
+/* CLKID_FCLK_DIV4 */
+/* CLKID_FCLK_DIV5 */
+/* CLKID_FCLK_DIV7 */
+/* CLKID_CLK81 */
+/* CLKID_MALI */
+/* CLKID_CPUCLK */
+/* CLKID_ZERO */
+/* CLKID_MPEG_SEL */
+/* CLKID_MPEG_DIV */
+#define CLKID_DDR		16
+#define CLKID_DOS		17
+#define CLKID_ISA		18
+#define CLKID_PL301		19
+#define CLKID_PERIPHS		20
+#define CLKID_SPICC		21
+#define CLKID_I2C		22
+#define CLKID_SAR_ADC		23
+#define CLKID_SMART_CARD	24
+#define CLKID_RNG0		25
+#define CLKID_UART0		26
+#define CLKID_SDHC		27
+#define CLKID_STREAM		28
+#define CLKID_ASYNC_FIFO	29
+#define CLKID_SDIO		30
+#define CLKID_ABUF		31
+#define CLKID_HIU_IFACE		32
+#define CLKID_ASSIST_MISC	33
+#define CLKID_SPI		34
+#define CLKID_I2S_SPDIF		35
+#define CLKID_ETH		36
+#define CLKID_DEMUX		37
+#define CLKID_AIU_GLUE		38
+#define CLKID_IEC958		39
+#define CLKID_I2S_OUT		40
+#define CLKID_AMCLK		41
+#define CLKID_AIFIFO2		42
+#define CLKID_MIXER		43
+#define CLKID_MIXER_IFACE	44
+#define CLKID_ADC		45
+#define CLKID_BLKMV		46
+#define CLKID_AIU		47
+#define CLKID_UART1		48
+#define CLKID_G2D		49
+#define CLKID_USB0		50
+#define CLKID_USB1		51
+#define CLKID_RESET		52
+#define CLKID_NAND		53
+#define CLKID_DOS_PARSER	54
+#define CLKID_USB		55
+#define CLKID_VDIN1		56
+#define CLKID_AHB_ARB0		57
+#define CLKID_EFUSE		58
+#define CLKID_BOOT_ROM		59
+#define CLKID_AHB_DATA_BUS	60
+#define CLKID_AHB_CTRL_BUS	61
+#define CLKID_HDMI_INTR_SYNC	62
+#define CLKID_HDMI_PCLK		63
+#define CLKID_USB1_DDR_BRIDGE	64
+#define CLKID_USB0_DDR_BRIDGE	65
+#define CLKID_MMC_PCLK		66
+#define CLKID_DVIN		67
+#define CLKID_UART2		68
+#define CLKID_SANA		69
+#define CLKID_VPU_INTR		70
+#define CLKID_SEC_AHB_AHB3_BRIDGE	71
+#define CLKID_CLK81_A9		72
+#define CLKID_VCLK2_VENCI0	73
+#define CLKID_VCLK2_VENCI1	74
+#define CLKID_VCLK2_VENCP0	75
+#define CLKID_VCLK2_VENCP1	76
+#define CLKID_GCLK_VENCI_INT	77
+#define CLKID_GCLK_VENCP_INT	78
+#define CLKID_DAC_CLK		79
+#define CLKID_AOCLK_GATE	80
+#define CLKID_IEC958_GATE	81
+#define CLKID_ENC480P		82
+#define CLKID_RNG1		83
+#define CLKID_GCLK_VENCL_INT	84
+#define CLKID_VCLK2_VENCLMCC	85
+#define CLKID_VCLK2_VENCL	86
+#define CLKID_VCLK2_OTHER	87
+#define CLKID_EDP		88
+#define CLKID_AO_MEDIA_CPU	89
+#define CLKID_AO_AHB_SRAM	90
+#define CLKID_AO_AHB_BUS	91
+#define CLKID_AO_IFACE		92
+
+#define CLK_NR_CLKS		93
+
+/* include the CLKIDs that have been made part of the stable DT binding */
+#include <dt-bindings/clock/meson8b-clkc.h>
+
+#endif /* __MESON8B_H */
diff --git a/drivers/clk/microchip/clk-core.c b/drivers/clk/microchip/clk-core.c
index ca85cea..c3b3014 100644
--- a/drivers/clk/microchip/clk-core.c
+++ b/drivers/clk/microchip/clk-core.c
@@ -199,9 +199,9 @@
 
 	spin_unlock_irqrestore(&pb->core->reg_lock, flags);
 
-	/* wait again, for pbdivready */
-	err = readl_poll_timeout_atomic(pb->ctrl_reg, v, v & PB_DIV_READY,
-					1, LOCK_TIMEOUT_US);
+	/* wait again for DIV_READY */
+	err = readl_poll_timeout(pb->ctrl_reg, v, v & PB_DIV_READY,
+				 1, LOCK_TIMEOUT_US);
 	if (err)
 		return err;
 
diff --git a/drivers/clk/microchip/clk-pic32mzda.c b/drivers/clk/microchip/clk-pic32mzda.c
index 51f5438..9f73477 100644
--- a/drivers/clk/microchip/clk-pic32mzda.c
+++ b/drivers/clk/microchip/clk-pic32mzda.c
@@ -118,6 +118,7 @@
 	.status_reg = 0x1d0,
 	.enable_mask = BIT(1),
 	.status_mask = BIT(4),
+	.fixed_rate = 32768,
 	.init_data = {
 		.name = "sosc_clk",
 		.parent_names = NULL,
diff --git a/drivers/clk/mmp/clk-mmp2.c b/drivers/clk/mmp/clk-mmp2.c
index 383f6a4..0380234 100644
--- a/drivers/clk/mmp/clk-mmp2.c
+++ b/drivers/clk/mmp/clk-mmp2.c
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 #include <linux/err.h>
+#include <linux/clk/mmp.h>
 
 #include "clk.h"
 
diff --git a/drivers/clk/mvebu/Kconfig b/drivers/clk/mvebu/Kconfig
index 3165da7..fddc8ac 100644
--- a/drivers/clk/mvebu/Kconfig
+++ b/drivers/clk/mvebu/Kconfig
@@ -24,6 +24,9 @@
 	bool
 	select MVEBU_CLK_COMMON
 
+config ARMADA_37XX_CLK
+       bool
+
 config ARMADA_XP_CLK
 	bool
 	select MVEBU_CLK_COMMON
diff --git a/drivers/clk/mvebu/Makefile b/drivers/clk/mvebu/Makefile
index 7172ef6..d9ae97f 100644
--- a/drivers/clk/mvebu/Makefile
+++ b/drivers/clk/mvebu/Makefile
@@ -6,6 +6,9 @@
 obj-$(CONFIG_ARMADA_375_CLK)	+= armada-375.o
 obj-$(CONFIG_ARMADA_38X_CLK)	+= armada-38x.o
 obj-$(CONFIG_ARMADA_39X_CLK)	+= armada-39x.o
+obj-$(CONFIG_ARMADA_37XX_CLK)	+= armada-37xx-xtal.o
+obj-$(CONFIG_ARMADA_37XX_CLK)	+= armada-37xx-tbg.o
+obj-$(CONFIG_ARMADA_37XX_CLK)	+= armada-37xx-periph.o
 obj-$(CONFIG_ARMADA_XP_CLK)	+= armada-xp.o
 obj-$(CONFIG_ARMADA_AP806_SYSCON) += ap806-system-controller.o
 obj-$(CONFIG_ARMADA_CP110_SYSCON) += cp110-system-controller.o
diff --git a/drivers/clk/mvebu/armada-37xx-periph.c b/drivers/clk/mvebu/armada-37xx-periph.c
new file mode 100644
index 0000000..45905fc
--- /dev/null
+++ b/drivers/clk/mvebu/armada-37xx-periph.c
@@ -0,0 +1,447 @@
+/*
+ * Marvell Armada 37xx SoC Peripheral clocks
+ *
+ * Copyright (C) 2016 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2 or later. This program is licensed "as is"
+ * without any warranty of any kind, whether express or implied.
+ *
+ * Most of the peripheral clocks can be modelled like this:
+ *             _____    _______    _______
+ * TBG-A-P  --|     |  |       |  |       |   ______
+ * TBG-B-P  --| Mux |--| /div1 |--| /div2 |--| Gate |--> perip_clk
+ * TBG-A-S  --|     |  |       |  |       |  |______|
+ * TBG-B-S  --|_____|  |_______|  |_______|
+ *
+ * However some clocks may use only one or two block or and use the
+ * xtal clock as parent.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define TBG_SEL		0x0
+#define DIV_SEL0	0x4
+#define DIV_SEL1	0x8
+#define DIV_SEL2	0xC
+#define CLK_SEL		0x10
+#define CLK_DIS		0x14
+
+struct clk_periph_driver_data {
+	struct clk_hw_onecell_data *hw_data;
+	spinlock_t lock;
+};
+
+struct clk_double_div {
+	struct clk_hw hw;
+	void __iomem *reg1;
+	u8 shift1;
+	void __iomem *reg2;
+	u8 shift2;
+};
+
+#define to_clk_double_div(_hw) container_of(_hw, struct clk_double_div, hw)
+
+struct clk_periph_data {
+	const char *name;
+	const char * const *parent_names;
+	int num_parents;
+	struct clk_hw *mux_hw;
+	struct clk_hw *rate_hw;
+	struct clk_hw *gate_hw;
+	bool is_double_div;
+};
+
+static const struct clk_div_table clk_table6[] = {
+	{ .val = 1, .div = 1, },
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .val = 5, .div = 5, },
+	{ .val = 6, .div = 6, },
+	{ .val = 0, .div = 0, }, /* last entry */
+};
+
+static const struct clk_div_table clk_table1[] = {
+	{ .val = 0, .div = 1, },
+	{ .val = 1, .div = 2, },
+	{ .val = 0, .div = 0, }, /* last entry */
+};
+
+static const struct clk_div_table clk_table2[] = {
+	{ .val = 0, .div = 2, },
+	{ .val = 1, .div = 4, },
+	{ .val = 0, .div = 0, }, /* last entry */
+};
+static const struct clk_ops clk_double_div_ops;
+
+#define PERIPH_GATE(_name, _bit)		\
+struct clk_gate gate_##_name = {		\
+	.reg = (void *)CLK_DIS,			\
+	.bit_idx = _bit,			\
+	.hw.init = &(struct clk_init_data){	\
+		.ops =  &clk_gate_ops,		\
+	}					\
+};
+
+#define PERIPH_MUX(_name, _shift)		\
+struct clk_mux mux_##_name = {			\
+	.reg = (void *)TBG_SEL,			\
+	.shift = _shift,			\
+	.mask = 3,				\
+	.hw.init = &(struct clk_init_data){	\
+		.ops =  &clk_mux_ro_ops,	\
+	}					\
+};
+
+#define PERIPH_DOUBLEDIV(_name, _reg1, _reg2, _shift1, _shift2)	\
+struct clk_double_div rate_##_name = {		\
+	.reg1 = (void *)_reg1,			\
+	.reg2 = (void *)_reg2,			\
+	.shift1 = _shift1,			\
+	.shift2 = _shift2,			\
+	.hw.init = &(struct clk_init_data){	\
+		.ops =  &clk_double_div_ops,	\
+	}					\
+};
+
+#define PERIPH_DIV(_name, _reg, _shift, _table)	\
+struct clk_divider rate_##_name = {		\
+	.reg = (void *)_reg,			\
+	.table = _table,			\
+	.shift = _shift,			\
+	.hw.init = &(struct clk_init_data){	\
+		.ops =  &clk_divider_ro_ops,	\
+	}					\
+};
+
+#define PERIPH_CLK_FULL_DD(_name, _bit, _shift, _reg1, _reg2, _shift1, _shift2)\
+static PERIPH_GATE(_name, _bit);			    \
+static PERIPH_MUX(_name, _shift);			    \
+static PERIPH_DOUBLEDIV(_name, _reg1, _reg2, _shift1, _shift2);
+
+#define PERIPH_CLK_FULL(_name, _bit, _shift, _reg, _shift1, _table)	\
+static PERIPH_GATE(_name, _bit);			    \
+static PERIPH_MUX(_name, _shift);			    \
+static PERIPH_DIV(_name, _reg, _shift1, _table);
+
+#define PERIPH_CLK_GATE_DIV(_name, _bit,  _reg, _shift, _table)	\
+static PERIPH_GATE(_name, _bit);			\
+static PERIPH_DIV(_name, _reg, _shift, _table);
+
+#define PERIPH_CLK_MUX_DIV(_name, _shift,  _reg, _shift_div, _table)	\
+static PERIPH_MUX(_name, _shift);			    \
+static PERIPH_DIV(_name, _reg, _shift_div, _table);
+
+#define PERIPH_CLK_MUX_DD(_name, _shift, _reg1, _reg2, _shift1, _shift2)\
+static PERIPH_MUX(_name, _shift);			    \
+static PERIPH_DOUBLEDIV(_name, _reg1, _reg2, _shift1, _shift2);
+
+#define REF_CLK_FULL(_name)				\
+	{ .name = #_name,				\
+	  .parent_names = (const char *[]){ "TBG-A-P",	\
+	      "TBG-B-P", "TBG-A-S", "TBG-B-S"},		\
+	  .num_parents = 4,				\
+	  .mux_hw = &mux_##_name.hw,			\
+	  .gate_hw = &gate_##_name.hw,			\
+	  .rate_hw = &rate_##_name.hw,			\
+	}
+
+#define REF_CLK_FULL_DD(_name)				\
+	{ .name = #_name,				\
+	  .parent_names = (const char *[]){ "TBG-A-P",	\
+	      "TBG-B-P", "TBG-A-S", "TBG-B-S"},		\
+	  .num_parents = 4,				\
+	  .mux_hw = &mux_##_name.hw,			\
+	  .gate_hw = &gate_##_name.hw,			\
+	  .rate_hw = &rate_##_name.hw,			\
+	  .is_double_div = true,			\
+	}
+
+#define REF_CLK_GATE(_name, _parent_name)			\
+	{ .name = #_name,					\
+	  .parent_names = (const char *[]){ _parent_name},	\
+	  .num_parents = 1,					\
+	  .gate_hw = &gate_##_name.hw,				\
+	}
+
+#define REF_CLK_GATE_DIV(_name, _parent_name)			\
+	{ .name = #_name,					\
+	  .parent_names = (const char *[]){ _parent_name},	\
+	  .num_parents = 1,					\
+	  .gate_hw = &gate_##_name.hw,				\
+	  .rate_hw = &rate_##_name.hw,				\
+	}
+
+#define REF_CLK_MUX_DIV(_name)				\
+	{ .name = #_name,				\
+	  .parent_names = (const char *[]){ "TBG-A-P",	\
+	      "TBG-B-P", "TBG-A-S", "TBG-B-S"},		\
+	  .num_parents = 4,				\
+	  .mux_hw = &mux_##_name.hw,			\
+	  .rate_hw = &rate_##_name.hw,			\
+	}
+
+#define REF_CLK_MUX_DD(_name)				\
+	{ .name = #_name,				\
+	  .parent_names = (const char *[]){ "TBG-A-P",	\
+	      "TBG-B-P", "TBG-A-S", "TBG-B-S"},		\
+	  .num_parents = 4,				\
+	  .mux_hw = &mux_##_name.hw,			\
+	  .rate_hw = &rate_##_name.hw,			\
+	  .is_double_div = true,			\
+	}
+
+/* NB periph clocks */
+PERIPH_CLK_FULL_DD(mmc, 2, 0, DIV_SEL2, DIV_SEL2, 16, 13);
+PERIPH_CLK_FULL_DD(sata_host, 3, 2, DIV_SEL2, DIV_SEL2, 10, 7);
+PERIPH_CLK_FULL_DD(sec_at, 6, 4, DIV_SEL1, DIV_SEL1, 3, 0);
+PERIPH_CLK_FULL_DD(sec_dap, 7, 6, DIV_SEL1, DIV_SEL1, 9, 6);
+PERIPH_CLK_FULL_DD(tscem, 8, 8, DIV_SEL1, DIV_SEL1, 15, 12);
+PERIPH_CLK_FULL(tscem_tmx, 10, 10, DIV_SEL1, 18, clk_table6);
+static PERIPH_GATE(avs, 11);
+PERIPH_CLK_FULL_DD(pwm, 13, 14, DIV_SEL0, DIV_SEL0, 3, 0);
+PERIPH_CLK_FULL_DD(sqf, 12, 12, DIV_SEL1, DIV_SEL1, 27, 24);
+static PERIPH_GATE(i2c_2, 16);
+static PERIPH_GATE(i2c_1, 17);
+PERIPH_CLK_GATE_DIV(ddr_phy, 19, DIV_SEL0, 18, clk_table2);
+PERIPH_CLK_FULL_DD(ddr_fclk, 21, 16, DIV_SEL0, DIV_SEL0, 15, 12);
+PERIPH_CLK_FULL(trace, 22, 18, DIV_SEL0, 20, clk_table6);
+PERIPH_CLK_FULL(counter, 23, 20, DIV_SEL0, 23, clk_table6);
+PERIPH_CLK_FULL_DD(eip97, 24, 24, DIV_SEL2, DIV_SEL2, 22, 19);
+PERIPH_CLK_MUX_DIV(cpu, 22, DIV_SEL0, 28, clk_table6);
+
+static struct clk_periph_data data_nb[] ={
+	REF_CLK_FULL_DD(mmc),
+	REF_CLK_FULL_DD(sata_host),
+	REF_CLK_FULL_DD(sec_at),
+	REF_CLK_FULL_DD(sec_dap),
+	REF_CLK_FULL_DD(tscem),
+	REF_CLK_FULL(tscem_tmx),
+	REF_CLK_GATE(avs, "xtal"),
+	REF_CLK_FULL_DD(sqf),
+	REF_CLK_FULL_DD(pwm),
+	REF_CLK_GATE(i2c_2, "xtal"),
+	REF_CLK_GATE(i2c_1, "xtal"),
+	REF_CLK_GATE_DIV(ddr_phy, "TBG-A-S"),
+	REF_CLK_FULL_DD(ddr_fclk),
+	REF_CLK_FULL(trace),
+	REF_CLK_FULL(counter),
+	REF_CLK_FULL_DD(eip97),
+	REF_CLK_MUX_DIV(cpu),
+	{ },
+};
+
+/* SB periph clocks */
+PERIPH_CLK_MUX_DD(gbe_50, 6, DIV_SEL2, DIV_SEL2, 6, 9);
+PERIPH_CLK_MUX_DD(gbe_core, 8, DIV_SEL1, DIV_SEL1, 18, 21);
+PERIPH_CLK_MUX_DD(gbe_125, 10, DIV_SEL1, DIV_SEL1, 6, 9);
+static PERIPH_GATE(gbe1_50, 0);
+static PERIPH_GATE(gbe0_50, 1);
+static PERIPH_GATE(gbe1_125, 2);
+static PERIPH_GATE(gbe0_125, 3);
+PERIPH_CLK_GATE_DIV(gbe1_core, 4, DIV_SEL1, 13, clk_table1);
+PERIPH_CLK_GATE_DIV(gbe0_core, 5, DIV_SEL1, 14, clk_table1);
+PERIPH_CLK_GATE_DIV(gbe_bm, 12, DIV_SEL1, 0, clk_table1);
+PERIPH_CLK_FULL_DD(sdio, 11, 14, DIV_SEL0, DIV_SEL0, 3, 6);
+PERIPH_CLK_FULL_DD(usb32_usb2_sys, 16, 16, DIV_SEL0, DIV_SEL0, 9, 12);
+PERIPH_CLK_FULL_DD(usb32_ss_sys, 17, 18, DIV_SEL0, DIV_SEL0, 15, 18);
+
+static struct clk_periph_data data_sb[] = {
+	REF_CLK_MUX_DD(gbe_50),
+	REF_CLK_MUX_DD(gbe_core),
+	REF_CLK_MUX_DD(gbe_125),
+	REF_CLK_GATE(gbe1_50, "gbe_50"),
+	REF_CLK_GATE(gbe0_50, "gbe_50"),
+	REF_CLK_GATE(gbe1_125, "gbe_125"),
+	REF_CLK_GATE(gbe0_125, "gbe_125"),
+	REF_CLK_GATE_DIV(gbe1_core, "gbe_core"),
+	REF_CLK_GATE_DIV(gbe0_core, "gbe_core"),
+	REF_CLK_GATE_DIV(gbe_bm, "gbe_core"),
+	REF_CLK_FULL_DD(sdio),
+	REF_CLK_FULL_DD(usb32_usb2_sys),
+	REF_CLK_FULL_DD(usb32_ss_sys),
+	{ },
+};
+
+static unsigned int get_div(void __iomem *reg, int shift)
+{
+	u32 val;
+
+	val = (readl(reg) >> shift) & 0x7;
+	if (val > 6)
+		return 0;
+	return val;
+}
+
+static unsigned long clk_double_div_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct clk_double_div *double_div = to_clk_double_div(hw);
+	unsigned int div;
+
+	div = get_div(double_div->reg1, double_div->shift1);
+	div *= get_div(double_div->reg2, double_div->shift2);
+
+	return DIV_ROUND_UP_ULL((u64)parent_rate, div);
+}
+
+static const struct clk_ops clk_double_div_ops = {
+	.recalc_rate = clk_double_div_recalc_rate,
+};
+
+static const struct of_device_id armada_3700_periph_clock_of_match[] = {
+	{ .compatible = "marvell,armada-3700-periph-clock-nb",
+	  .data = data_nb, },
+	{ .compatible = "marvell,armada-3700-periph-clock-sb",
+	.data = data_sb, },
+	{ }
+};
+static int armada_3700_add_composite_clk(const struct clk_periph_data *data,
+					 void __iomem *reg, spinlock_t *lock,
+					 struct device *dev, struct clk_hw *hw)
+{
+	const struct clk_ops *mux_ops = NULL, *gate_ops = NULL,
+		*rate_ops = NULL;
+	struct clk_hw *mux_hw = NULL, *gate_hw = NULL, *rate_hw = NULL;
+
+	if (data->mux_hw) {
+		struct clk_mux *mux;
+
+		mux_hw = data->mux_hw;
+		mux = to_clk_mux(mux_hw);
+		mux->lock = lock;
+		mux_ops = mux_hw->init->ops;
+		mux->reg = reg + (u64)mux->reg;
+	}
+
+	if (data->gate_hw) {
+		struct clk_gate *gate;
+
+		gate_hw = data->gate_hw;
+		gate = to_clk_gate(gate_hw);
+		gate->lock = lock;
+		gate_ops = gate_hw->init->ops;
+		gate->reg = reg + (u64)gate->reg;
+	}
+
+	if (data->rate_hw) {
+		rate_hw = data->rate_hw;
+		rate_ops = rate_hw->init->ops;
+		if (data->is_double_div) {
+			struct clk_double_div *rate;
+
+			rate =  to_clk_double_div(rate_hw);
+			rate->reg1 = reg + (u64)rate->reg1;
+			rate->reg2 = reg + (u64)rate->reg2;
+		} else {
+			struct clk_divider *rate = to_clk_divider(rate_hw);
+			const struct clk_div_table *clkt;
+			int table_size = 0;
+
+			rate->reg = reg + (u64)rate->reg;
+			for (clkt = rate->table; clkt->div; clkt++)
+				table_size++;
+			rate->width = order_base_2(table_size);
+			rate->lock = lock;
+		}
+	}
+
+	hw = clk_hw_register_composite(dev, data->name, data->parent_names,
+				       data->num_parents, mux_hw,
+				       mux_ops, rate_hw, rate_ops,
+				       gate_hw, gate_ops, CLK_IGNORE_UNUSED);
+
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+
+	return 0;
+}
+
+static int armada_3700_periph_clock_probe(struct platform_device *pdev)
+{
+	struct clk_periph_driver_data *driver_data;
+	struct device_node *np = pdev->dev.of_node;
+	const struct clk_periph_data *data;
+	struct device *dev = &pdev->dev;
+	int num_periph = 0, i, ret;
+	struct resource *res;
+	void __iomem *reg;
+
+	data = of_device_get_match_data(dev);
+	if (!data)
+		return -ENODEV;
+
+	while (data[num_periph].name)
+		num_periph++;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reg = devm_ioremap_resource(dev, res);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
+
+	driver_data = devm_kzalloc(dev, sizeof(*driver_data), GFP_KERNEL);
+	if (!driver_data)
+		return -ENOMEM;
+
+	driver_data->hw_data = devm_kzalloc(dev, sizeof(*driver_data->hw_data) +
+			    sizeof(*driver_data->hw_data->hws) * num_periph,
+			    GFP_KERNEL);
+	if (!driver_data->hw_data)
+		return -ENOMEM;
+	driver_data->hw_data->num = num_periph;
+
+	spin_lock_init(&driver_data->lock);
+
+	for (i = 0; i < num_periph; i++) {
+		struct clk_hw *hw = driver_data->hw_data->hws[i];
+
+		if (armada_3700_add_composite_clk(&data[i], reg,
+						  &driver_data->lock, dev, hw))
+			dev_err(dev, "Can't register periph clock %s\n",
+			       data[i].name);
+
+	}
+
+	ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get,
+				  driver_data->hw_data);
+	if (ret) {
+		for (i = 0; i < num_periph; i++)
+			clk_hw_unregister(driver_data->hw_data->hws[i]);
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, driver_data);
+	return 0;
+}
+
+static int armada_3700_periph_clock_remove(struct platform_device *pdev)
+{
+	struct clk_periph_driver_data *data = platform_get_drvdata(pdev);
+	struct clk_hw_onecell_data *hw_data = data->hw_data;
+	int i;
+
+	of_clk_del_provider(pdev->dev.of_node);
+
+	for (i = 0; i < hw_data->num; i++)
+		clk_hw_unregister(hw_data->hws[i]);
+
+	return 0;
+}
+
+static struct platform_driver armada_3700_periph_clock_driver = {
+	.probe = armada_3700_periph_clock_probe,
+	.remove = armada_3700_periph_clock_remove,
+	.driver		= {
+		.name	= "marvell-armada-3700-periph-clock",
+		.of_match_table = armada_3700_periph_clock_of_match,
+	},
+};
+
+builtin_platform_driver(armada_3700_periph_clock_driver);
diff --git a/drivers/clk/mvebu/armada-37xx-tbg.c b/drivers/clk/mvebu/armada-37xx-tbg.c
new file mode 100644
index 0000000..aa80db1
--- /dev/null
+++ b/drivers/clk/mvebu/armada-37xx-tbg.c
@@ -0,0 +1,158 @@
+/*
+ * Marvell Armada 37xx SoC Time Base Generator clocks
+ *
+ * Copyright (C) 2016 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2 or later. This program is licensed "as is"
+ * without any warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define NUM_TBG	    4
+
+#define TBG_CTRL0		0x4
+#define TBG_CTRL1		0x8
+#define TBG_CTRL7		0x20
+#define TBG_CTRL8		0x30
+
+#define TBG_DIV_MASK		0x1FF
+
+#define TBG_A_REFDIV		0
+#define TBG_B_REFDIV		16
+
+#define TBG_A_FBDIV		2
+#define TBG_B_FBDIV		18
+
+#define TBG_A_VCODIV_SE		0
+#define TBG_B_VCODIV_SE		16
+
+#define TBG_A_VCODIV_DIFF	1
+#define TBG_B_VCODIV_DIFF	17
+
+struct tbg_def {
+	char *name;
+	u32 refdiv_offset;
+	u32 fbdiv_offset;
+	u32 vcodiv_reg;
+	u32 vcodiv_offset;
+};
+
+static const struct tbg_def tbg[NUM_TBG] = {
+	{"TBG-A-P", TBG_A_REFDIV, TBG_A_FBDIV, TBG_CTRL8, TBG_A_VCODIV_DIFF},
+	{"TBG-B-P", TBG_B_REFDIV, TBG_B_FBDIV, TBG_CTRL8, TBG_B_VCODIV_DIFF},
+	{"TBG-A-S", TBG_A_REFDIV, TBG_A_FBDIV, TBG_CTRL1, TBG_A_VCODIV_SE},
+	{"TBG-B-S", TBG_B_REFDIV, TBG_B_FBDIV, TBG_CTRL1, TBG_B_VCODIV_SE},
+};
+
+static unsigned int tbg_get_mult(void __iomem *reg, const struct tbg_def *ptbg)
+{
+	u32 val;
+
+	val = readl(reg + TBG_CTRL0);
+
+	return ((val >> ptbg->fbdiv_offset) & TBG_DIV_MASK) << 2;
+}
+
+static unsigned int tbg_get_div(void __iomem *reg, const struct tbg_def *ptbg)
+{
+	u32 val;
+	unsigned int div;
+
+	val = readl(reg + TBG_CTRL7);
+
+	div = (val >> ptbg->refdiv_offset) & TBG_DIV_MASK;
+	if (div == 0)
+		div = 1;
+	val = readl(reg + ptbg->vcodiv_reg);
+
+	div *= 1 << ((val >>  ptbg->vcodiv_offset) & TBG_DIV_MASK);
+
+	return div;
+}
+
+
+static int armada_3700_tbg_clock_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct clk_hw_onecell_data *hw_tbg_data;
+	struct device *dev = &pdev->dev;
+	const char *parent_name;
+	struct resource *res;
+	struct clk *parent;
+	void __iomem *reg;
+	int i, ret;
+
+	hw_tbg_data = devm_kzalloc(&pdev->dev, sizeof(*hw_tbg_data)
+				   + sizeof(*hw_tbg_data->hws) * NUM_TBG,
+				   GFP_KERNEL);
+	if (!hw_tbg_data)
+		return -ENOMEM;
+	hw_tbg_data->num = NUM_TBG;
+	platform_set_drvdata(pdev, hw_tbg_data);
+
+	parent = devm_clk_get(dev, NULL);
+	if (IS_ERR(parent)) {
+		dev_err(dev, "Could get the clock parent\n");
+		return -EINVAL;
+	}
+	parent_name = __clk_get_name(parent);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	reg = devm_ioremap_resource(dev, res);
+	if (IS_ERR(reg))
+		return PTR_ERR(reg);
+
+	for (i = 0; i < NUM_TBG; i++) {
+		const char *name;
+		unsigned int mult, div;
+
+		name = tbg[i].name;
+		mult = tbg_get_mult(reg, &tbg[i]);
+		div = tbg_get_div(reg, &tbg[i]);
+		hw_tbg_data->hws[i] = clk_hw_register_fixed_factor(NULL, name,
+						parent_name, 0, mult, div);
+		if (IS_ERR(hw_tbg_data->hws[i]))
+			dev_err(dev, "Can't register TBG clock %s\n", name);
+	}
+
+	ret = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, hw_tbg_data);
+
+	return ret;
+}
+
+static int armada_3700_tbg_clock_remove(struct platform_device *pdev)
+{
+	int i;
+	struct clk_hw_onecell_data *hw_tbg_data = platform_get_drvdata(pdev);
+
+	of_clk_del_provider(pdev->dev.of_node);
+	for (i = 0; i < hw_tbg_data->num; i++)
+		clk_hw_unregister_fixed_factor(hw_tbg_data->hws[i]);
+
+	return 0;
+}
+
+static const struct of_device_id armada_3700_tbg_clock_of_match[] = {
+	{ .compatible = "marvell,armada-3700-tbg-clock", },
+	{ }
+};
+
+static struct platform_driver armada_3700_tbg_clock_driver = {
+	.probe = armada_3700_tbg_clock_probe,
+	.remove = armada_3700_tbg_clock_remove,
+	.driver		= {
+		.name	= "marvell-armada-3700-tbg-clock",
+		.of_match_table = armada_3700_tbg_clock_of_match,
+	},
+};
+
+builtin_platform_driver(armada_3700_tbg_clock_driver);
diff --git a/drivers/clk/mvebu/armada-37xx-xtal.c b/drivers/clk/mvebu/armada-37xx-xtal.c
new file mode 100644
index 0000000..612d65e
--- /dev/null
+++ b/drivers/clk/mvebu/armada-37xx-xtal.c
@@ -0,0 +1,91 @@
+/*
+ * Marvell Armada 37xx SoC xtal clocks
+ *
+ * Copyright (C) 2016 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/mfd/syscon.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#define NB_GPIO1_LATCH	0xC
+#define XTAL_MODE	    BIT(31)
+
+static int armada_3700_xtal_clock_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	const char *xtal_name = "xtal";
+	struct device_node *parent;
+	struct regmap *regmap;
+	struct clk_hw *xtal_hw;
+	unsigned int rate;
+	u32 reg;
+	int ret;
+
+	xtal_hw = devm_kzalloc(&pdev->dev, sizeof(*xtal_hw), GFP_KERNEL);
+	if (!xtal_hw)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, xtal_hw);
+
+	parent = np->parent;
+	if (!parent) {
+		dev_err(&pdev->dev, "no parent\n");
+		return -ENODEV;
+	}
+
+	regmap = syscon_node_to_regmap(parent);
+	if (IS_ERR(regmap)) {
+		dev_err(&pdev->dev, "cannot get regmap\n");
+		return PTR_ERR(regmap);
+	}
+
+	ret = regmap_read(regmap, NB_GPIO1_LATCH, &reg);
+	if (ret) {
+		dev_err(&pdev->dev, "cannot read from regmap\n");
+		return ret;
+	}
+
+	if (reg & XTAL_MODE)
+		rate = 40000000;
+	else
+		rate = 25000000;
+
+	of_property_read_string_index(np, "clock-output-names", 0, &xtal_name);
+	xtal_hw = clk_hw_register_fixed_rate(NULL, xtal_name, NULL, 0, rate);
+	if (IS_ERR(xtal_hw))
+		return PTR_ERR(xtal_hw);
+	ret = of_clk_add_hw_provider(np, of_clk_hw_simple_get, xtal_hw);
+
+	return ret;
+}
+
+static int armada_3700_xtal_clock_remove(struct platform_device *pdev)
+{
+	of_clk_del_provider(pdev->dev.of_node);
+
+	return 0;
+}
+
+static const struct of_device_id armada_3700_xtal_clock_of_match[] = {
+	{ .compatible = "marvell,armada-3700-xtal-clock", },
+	{ }
+};
+
+static struct platform_driver armada_3700_xtal_clock_driver = {
+	.probe = armada_3700_xtal_clock_probe,
+	.remove = armada_3700_xtal_clock_remove,
+	.driver		= {
+		.name	= "marvell-armada-3700-xtal-clock",
+		.of_match_table = armada_3700_xtal_clock_of_match,
+	},
+};
+
+builtin_platform_driver(armada_3700_xtal_clock_driver);
diff --git a/drivers/clk/mvebu/armada-39x.c b/drivers/clk/mvebu/armada-39x.c
index efb974d..4fdfd32 100644
--- a/drivers/clk/mvebu/armada-39x.c
+++ b/drivers/clk/mvebu/armada-39x.c
@@ -142,6 +142,8 @@
 	{ "pex3", NULL, 7 },
 	{ "pex0", NULL, 8 },
 	{ "usb3h0", NULL, 9 },
+	{ "usb3h1", NULL, 10 },
+	{ "sata0", NULL, 15 },
 	{ "sdio", NULL, 17 },
 	{ "xor0", NULL, 22 },
 	{ "xor1", NULL, 28 },
diff --git a/drivers/clk/mvebu/cp110-system-controller.c b/drivers/clk/mvebu/cp110-system-controller.c
index 7fa42d6..f2303da 100644
--- a/drivers/clk/mvebu/cp110-system-controller.c
+++ b/drivers/clk/mvebu/cp110-system-controller.c
@@ -81,13 +81,6 @@
 #define CP110_GATE_EIP150		25
 #define CP110_GATE_EIP197		26
 
-static struct clk *cp110_clks[CP110_CLK_NUM];
-
-static struct clk_onecell_data cp110_clk_data = {
-	.clks = cp110_clks,
-	.clk_num = CP110_CLK_NUM,
-};
-
 struct cp110_gate_clk {
 	struct clk_hw hw;
 	struct regmap *regmap;
@@ -142,6 +135,8 @@
 	if (!gate)
 		return ERR_PTR(-ENOMEM);
 
+	memset(&init, 0, sizeof(init));
+
 	init.name = name;
 	init.ops = &cp110_gate_ops;
 	init.parent_names = &parent_name;
@@ -194,7 +189,8 @@
 	struct regmap *regmap;
 	struct device_node *np = pdev->dev.of_node;
 	const char *ppv2_name, *apll_name, *core_name, *eip_name, *nand_name;
-	struct clk *clk;
+	struct clk_onecell_data *cp110_clk_data;
+	struct clk *clk, **cp110_clks;
 	u32 nand_clk_ctrl;
 	int i, ret;
 
@@ -207,6 +203,20 @@
 	if (ret)
 		return ret;
 
+	cp110_clks = devm_kcalloc(&pdev->dev, sizeof(struct clk *),
+				  CP110_CLK_NUM, GFP_KERNEL);
+	if (!cp110_clks)
+		return -ENOMEM;
+
+	cp110_clk_data = devm_kzalloc(&pdev->dev,
+				      sizeof(*cp110_clk_data),
+				      GFP_KERNEL);
+	if (!cp110_clk_data)
+		return -ENOMEM;
+
+	cp110_clk_data->clks = cp110_clks;
+	cp110_clk_data->clk_num = CP110_CLK_NUM;
+
 	/* Register the APLL which is the root of the clk tree */
 	of_property_read_string_index(np, "core-clock-output-names",
 				      CP110_CORE_APLL, &apll_name);
@@ -334,10 +344,12 @@
 		cp110_clks[CP110_MAX_CORE_CLOCKS + i] = clk;
 	}
 
-	ret = of_clk_add_provider(np, cp110_of_clk_get, &cp110_clk_data);
+	ret = of_clk_add_provider(np, cp110_of_clk_get, cp110_clk_data);
 	if (ret)
 		goto fail_clk_add;
 
+	platform_set_drvdata(pdev, cp110_clks);
+
 	return 0;
 
 fail_clk_add:
@@ -364,6 +376,7 @@
 
 static int cp110_syscon_clk_remove(struct platform_device *pdev)
 {
+	struct clk **cp110_clks = platform_get_drvdata(pdev);
 	int i;
 
 	of_clk_del_provider(pdev->dev.of_node);
diff --git a/drivers/clk/nxp/clk-lpc18xx-creg.c b/drivers/clk/nxp/clk-lpc18xx-creg.c
index 9e35749..c6e802e 100644
--- a/drivers/clk/nxp/clk-lpc18xx-creg.c
+++ b/drivers/clk/nxp/clk-lpc18xx-creg.c
@@ -184,7 +184,8 @@
 
 	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_creg_early_data);
 }
-CLK_OF_DECLARE(lpc18xx_creg_clk, "nxp,lpc1850-creg-clk", lpc18xx_creg_clk_init);
+CLK_OF_DECLARE_DRIVER(lpc18xx_creg_clk, "nxp,lpc1850-creg-clk",
+		      lpc18xx_creg_clk_init);
 
 static struct clk *clk_creg[CREG_CLK_MAX];
 static struct clk_onecell_data clk_creg_data = {
diff --git a/drivers/clk/nxp/clk-lpc32xx.c b/drivers/clk/nxp/clk-lpc32xx.c
index 90d740a..34c9735 100644
--- a/drivers/clk/nxp/clk-lpc32xx.c
+++ b/drivers/clk/nxp/clk-lpc32xx.c
@@ -1513,6 +1513,7 @@
 	if (IS_ERR(clk_regmap)) {
 		pr_err("failed to regmap system control block: %ld\n",
 			PTR_ERR(clk_regmap));
+		iounmap(base);
 		return;
 	}
 
diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index 95e3b3e..0146d3c 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -87,6 +87,23 @@
 	  Say Y if you want to use audio devices such as i2s, pcm,
 	  SLIMBus, etc.
 
+config MDM_GCC_9615
+	tristate "MDM9615 Global Clock Controller"
+	depends on COMMON_CLK_QCOM
+	help
+	  Support for the global clock controller on mdm9615 devices.
+	  Say Y if you want to use peripheral devices such as UART, SPI,
+	  i2c, USB, SD/eMMC, etc.
+
+config MDM_LCC_9615
+	tristate "MDM9615 LPASS Clock Controller"
+	select MDM_GCC_9615
+	depends on COMMON_CLK_QCOM
+	help
+	  Support for the LPASS clock controller on mdm9615 devices.
+	  Say Y if you want to use audio devices such as i2s, pcm,
+	  SLIMBus, etc.
+
 config MSM_MMCC_8960
 	tristate "MSM8960 Multimedia Clock Controller"
 	select MSM_GCC_8960
@@ -117,6 +134,7 @@
 
 config MSM_GCC_8996
 	tristate "MSM8996 Global Clock Controller"
+	select QCOM_GDSC
 	depends on COMMON_CLK_QCOM
 	help
 	  Support for the global clock controller on msm8996 devices.
@@ -126,6 +144,7 @@
 config MSM_MMCC_8996
 	tristate "MSM8996 Multimedia Clock Controller"
 	select MSM_GCC_8996
+	select QCOM_GDSC
 	depends on COMMON_CLK_QCOM
 	help
 	  Support for the multimedia clock controller on msm8996 devices.
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 2a25f4e..1fb1f54 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -12,17 +12,20 @@
 clk-qcom-y += reset.o
 clk-qcom-$(CONFIG_QCOM_GDSC) += gdsc.o
 
+# Keep alphabetically sorted by config
 obj-$(CONFIG_APQ_GCC_8084) += gcc-apq8084.o
 obj-$(CONFIG_APQ_MMCC_8084) += mmcc-apq8084.o
 obj-$(CONFIG_IPQ_GCC_4019) += gcc-ipq4019.o
 obj-$(CONFIG_IPQ_GCC_806X) += gcc-ipq806x.o
 obj-$(CONFIG_IPQ_LCC_806X) += lcc-ipq806x.o
+obj-$(CONFIG_MDM_GCC_9615) += gcc-mdm9615.o
+obj-$(CONFIG_MDM_LCC_9615) += lcc-mdm9615.o
 obj-$(CONFIG_MSM_GCC_8660) += gcc-msm8660.o
 obj-$(CONFIG_MSM_GCC_8916) += gcc-msm8916.o
 obj-$(CONFIG_MSM_GCC_8960) += gcc-msm8960.o
-obj-$(CONFIG_MSM_LCC_8960) += lcc-msm8960.o
 obj-$(CONFIG_MSM_GCC_8974) += gcc-msm8974.o
 obj-$(CONFIG_MSM_GCC_8996) += gcc-msm8996.o
+obj-$(CONFIG_MSM_LCC_8960) += lcc-msm8960.o
 obj-$(CONFIG_MSM_MMCC_8960) += mmcc-msm8960.o
 obj-$(CONFIG_MSM_MMCC_8974) += mmcc-msm8974.o
 obj-$(CONFIG_MSM_MMCC_8996) += mmcc-msm8996.o
diff --git a/drivers/clk/qcom/clk-regmap.c b/drivers/clk/qcom/clk-regmap.c
index a58ba39..1c856d3 100644
--- a/drivers/clk/qcom/clk-regmap.c
+++ b/drivers/clk/qcom/clk-regmap.c
@@ -101,14 +101,13 @@
  * clk_regmap struct via this function so that the regmap is initialized
  * and so that the clock is registered with the common clock framework.
  */
-struct clk *devm_clk_register_regmap(struct device *dev,
-				     struct clk_regmap *rclk)
+int devm_clk_register_regmap(struct device *dev, struct clk_regmap *rclk)
 {
 	if (dev && dev_get_regmap(dev, NULL))
 		rclk->regmap = dev_get_regmap(dev, NULL);
 	else if (dev && dev->parent)
 		rclk->regmap = dev_get_regmap(dev->parent, NULL);
 
-	return devm_clk_register(dev, &rclk->hw);
+	return devm_clk_hw_register(dev, &rclk->hw);
 }
 EXPORT_SYMBOL_GPL(devm_clk_register_regmap);
diff --git a/drivers/clk/qcom/clk-regmap.h b/drivers/clk/qcom/clk-regmap.h
index 491a63d..90d95cd 100644
--- a/drivers/clk/qcom/clk-regmap.h
+++ b/drivers/clk/qcom/clk-regmap.h
@@ -39,7 +39,6 @@
 int clk_is_enabled_regmap(struct clk_hw *hw);
 int clk_enable_regmap(struct clk_hw *hw);
 void clk_disable_regmap(struct clk_hw *hw);
-struct clk *
-devm_clk_register_regmap(struct device *dev, struct clk_regmap *rclk);
+int devm_clk_register_regmap(struct device *dev, struct clk_regmap *rclk);
 
 #endif
diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c
index f7c226a..fffcbaf 100644
--- a/drivers/clk/qcom/common.c
+++ b/drivers/clk/qcom/common.c
@@ -27,8 +27,8 @@
 
 struct qcom_cc {
 	struct qcom_reset_controller reset;
-	struct clk_onecell_data data;
-	struct clk *clks[];
+	struct clk_regmap **rclks;
+	size_t num_rclks;
 };
 
 const
@@ -102,8 +102,8 @@
 	struct device_node *clocks_node;
 	struct clk_fixed_factor *factor;
 	struct clk_fixed_rate *fixed;
-	struct clk *clk;
 	struct clk_init_data init_data = { };
+	int ret;
 
 	clocks_node = of_find_node_by_path("/clocks");
 	if (clocks_node)
@@ -121,9 +121,9 @@
 		init_data.name = path;
 		init_data.ops = &clk_fixed_rate_ops;
 
-		clk = devm_clk_register(dev, &fixed->hw);
-		if (IS_ERR(clk))
-			return PTR_ERR(clk);
+		ret = devm_clk_hw_register(dev, &fixed->hw);
+		if (ret)
+			return ret;
 	}
 	of_node_put(node);
 
@@ -141,9 +141,9 @@
 		init_data.flags = 0;
 		init_data.ops = &clk_fixed_factor_ops;
 
-		clk = devm_clk_register(dev, &factor->hw);
-		if (IS_ERR(clk))
-			return PTR_ERR(clk);
+		ret = devm_clk_hw_register(dev, &factor->hw);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
@@ -174,42 +174,48 @@
 }
 EXPORT_SYMBOL_GPL(qcom_cc_register_sleep_clk);
 
+static struct clk_hw *qcom_cc_clk_hw_get(struct of_phandle_args *clkspec,
+					 void *data)
+{
+	struct qcom_cc *cc = data;
+	unsigned int idx = clkspec->args[0];
+
+	if (idx >= cc->num_rclks) {
+		pr_err("%s: invalid index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return cc->rclks[idx] ? &cc->rclks[idx]->hw : ERR_PTR(-ENOENT);
+}
+
 int qcom_cc_really_probe(struct platform_device *pdev,
 			 const struct qcom_cc_desc *desc, struct regmap *regmap)
 {
 	int i, ret;
 	struct device *dev = &pdev->dev;
-	struct clk *clk;
-	struct clk_onecell_data *data;
-	struct clk **clks;
 	struct qcom_reset_controller *reset;
 	struct qcom_cc *cc;
 	struct gdsc_desc *scd;
 	size_t num_clks = desc->num_clks;
 	struct clk_regmap **rclks = desc->clks;
 
-	cc = devm_kzalloc(dev, sizeof(*cc) + sizeof(*clks) * num_clks,
-			  GFP_KERNEL);
+	cc = devm_kzalloc(dev, sizeof(*cc), GFP_KERNEL);
 	if (!cc)
 		return -ENOMEM;
 
-	clks = cc->clks;
-	data = &cc->data;
-	data->clks = clks;
-	data->clk_num = num_clks;
+	cc->rclks = rclks;
+	cc->num_rclks = num_clks;
 
 	for (i = 0; i < num_clks; i++) {
-		if (!rclks[i]) {
-			clks[i] = ERR_PTR(-ENOENT);
+		if (!rclks[i])
 			continue;
-		}
-		clk = devm_clk_register_regmap(dev, rclks[i]);
-		if (IS_ERR(clk))
-			return PTR_ERR(clk);
-		clks[i] = clk;
+
+		ret = devm_clk_register_regmap(dev, rclks[i]);
+		if (ret)
+			return ret;
 	}
 
-	ret = of_clk_add_provider(dev->of_node, of_clk_src_onecell_get, data);
+	ret = of_clk_add_hw_provider(dev->of_node, qcom_cc_clk_hw_get, cc);
 	if (ret)
 		return ret;
 
diff --git a/drivers/clk/qcom/gcc-ipq4019.c b/drivers/clk/qcom/gcc-ipq4019.c
index 3cd1af0..b593065 100644
--- a/drivers/clk/qcom/gcc-ipq4019.c
+++ b/drivers/clk/qcom/gcc-ipq4019.c
@@ -1332,7 +1332,6 @@
 	.probe		= gcc_ipq4019_probe,
 	.driver		= {
 		.name	= "qcom,gcc-ipq4019",
-		.owner	= THIS_MODULE,
 		.of_match_table = gcc_ipq4019_match_table,
 	},
 };
diff --git a/drivers/clk/qcom/gcc-mdm9615.c b/drivers/clk/qcom/gcc-mdm9615.c
new file mode 100644
index 0000000..581a17f
--- /dev/null
+++ b/drivers/clk/qcom/gcc-mdm9615.c
@@ -0,0 +1,1727 @@
+/*
+ * Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) BayLibre, SAS.
+ * Author : Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+#include <linux/reset-controller.h>
+
+#include <dt-bindings/clock/qcom,gcc-mdm9615.h>
+#include <dt-bindings/reset/qcom,gcc-mdm9615.h>
+
+#include "common.h"
+#include "clk-regmap.h"
+#include "clk-pll.h"
+#include "clk-rcg.h"
+#include "clk-branch.h"
+#include "reset.h"
+
+static struct clk_fixed_factor cxo = {
+	.mult = 1,
+	.div = 1,
+	.hw.init = &(struct clk_init_data){
+		.name = "cxo",
+		.parent_names = (const char *[]){ "cxo_board" },
+		.num_parents = 1,
+		.ops = &clk_fixed_factor_ops,
+	},
+};
+
+static struct clk_pll pll0 = {
+	.l_reg = 0x30c4,
+	.m_reg = 0x30c8,
+	.n_reg = 0x30cc,
+	.config_reg = 0x30d4,
+	.mode_reg = 0x30c0,
+	.status_reg = 0x30d8,
+	.status_bit = 16,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "pll0",
+		.parent_names = (const char *[]){ "cxo" },
+		.num_parents = 1,
+		.ops = &clk_pll_ops,
+	},
+};
+
+static struct clk_regmap pll0_vote = {
+	.enable_reg = 0x34c0,
+	.enable_mask = BIT(0),
+	.hw.init = &(struct clk_init_data){
+		.name = "pll0_vote",
+		.parent_names = (const char *[]){ "pll8" },
+		.num_parents = 1,
+		.ops = &clk_pll_vote_ops,
+	},
+};
+
+static struct clk_regmap pll4_vote = {
+	.enable_reg = 0x34c0,
+	.enable_mask = BIT(4),
+	.hw.init = &(struct clk_init_data){
+		.name = "pll4_vote",
+		.parent_names = (const char *[]){ "pll4" },
+		.num_parents = 1,
+		.ops = &clk_pll_vote_ops,
+	},
+};
+
+static struct clk_pll pll8 = {
+	.l_reg = 0x3144,
+	.m_reg = 0x3148,
+	.n_reg = 0x314c,
+	.config_reg = 0x3154,
+	.mode_reg = 0x3140,
+	.status_reg = 0x3158,
+	.status_bit = 16,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "pll8",
+		.parent_names = (const char *[]){ "cxo" },
+		.num_parents = 1,
+		.ops = &clk_pll_ops,
+	},
+};
+
+static struct clk_regmap pll8_vote = {
+	.enable_reg = 0x34c0,
+	.enable_mask = BIT(8),
+	.hw.init = &(struct clk_init_data){
+		.name = "pll8_vote",
+		.parent_names = (const char *[]){ "pll8" },
+		.num_parents = 1,
+		.ops = &clk_pll_vote_ops,
+	},
+};
+
+static struct clk_pll pll14 = {
+	.l_reg = 0x31c4,
+	.m_reg = 0x31c8,
+	.n_reg = 0x31cc,
+	.config_reg = 0x31d4,
+	.mode_reg = 0x31c0,
+	.status_reg = 0x31d8,
+	.status_bit = 16,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "pll14",
+		.parent_names = (const char *[]){ "cxo" },
+		.num_parents = 1,
+		.ops = &clk_pll_ops,
+	},
+};
+
+static struct clk_regmap pll14_vote = {
+	.enable_reg = 0x34c0,
+	.enable_mask = BIT(11),
+	.hw.init = &(struct clk_init_data){
+		.name = "pll14_vote",
+		.parent_names = (const char *[]){ "pll14" },
+		.num_parents = 1,
+		.ops = &clk_pll_vote_ops,
+	},
+};
+
+enum {
+	P_CXO,
+	P_PLL8,
+	P_PLL14,
+};
+
+static const struct parent_map gcc_cxo_pll8_map[] = {
+	{ P_CXO, 0 },
+	{ P_PLL8, 3 }
+};
+
+static const char * const gcc_cxo_pll8[] = {
+	"cxo",
+	"pll8_vote",
+};
+
+static const struct parent_map gcc_cxo_pll14_map[] = {
+	{ P_CXO, 0 },
+	{ P_PLL14, 4 }
+};
+
+static const char * const gcc_cxo_pll14[] = {
+	"cxo",
+	"pll14_vote",
+};
+
+static const struct parent_map gcc_cxo_map[] = {
+	{ P_CXO, 0 },
+};
+
+static const char * const gcc_cxo[] = {
+	"cxo",
+};
+
+static struct freq_tbl clk_tbl_gsbi_uart[] = {
+	{  1843200, P_PLL8, 2,  6, 625 },
+	{  3686400, P_PLL8, 2, 12, 625 },
+	{  7372800, P_PLL8, 2, 24, 625 },
+	{ 14745600, P_PLL8, 2, 48, 625 },
+	{ 16000000, P_PLL8, 4,  1,   6 },
+	{ 24000000, P_PLL8, 4,  1,   4 },
+	{ 32000000, P_PLL8, 4,  1,   3 },
+	{ 40000000, P_PLL8, 1,  5,  48 },
+	{ 46400000, P_PLL8, 1, 29, 240 },
+	{ 48000000, P_PLL8, 4,  1,   2 },
+	{ 51200000, P_PLL8, 1,  2,  15 },
+	{ 56000000, P_PLL8, 1,  7,  48 },
+	{ 58982400, P_PLL8, 1, 96, 625 },
+	{ 64000000, P_PLL8, 2,  1,   3 },
+	{ }
+};
+
+static struct clk_rcg gsbi1_uart_src = {
+	.ns_reg = 0x29d4,
+	.md_reg = 0x29d0,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 16,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_uart,
+	.clkr = {
+		.enable_reg = 0x29d4,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi1_uart_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi1_uart_clk = {
+	.halt_reg = 0x2fcc,
+	.halt_bit = 10,
+	.clkr = {
+		.enable_reg = 0x29d4,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi1_uart_clk",
+			.parent_names = (const char *[]){
+				"gsbi1_uart_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gsbi2_uart_src = {
+	.ns_reg = 0x29f4,
+	.md_reg = 0x29f0,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 16,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_uart,
+	.clkr = {
+		.enable_reg = 0x29f4,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi2_uart_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi2_uart_clk = {
+	.halt_reg = 0x2fcc,
+	.halt_bit = 6,
+	.clkr = {
+		.enable_reg = 0x29f4,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi2_uart_clk",
+			.parent_names = (const char *[]){
+				"gsbi2_uart_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gsbi3_uart_src = {
+	.ns_reg = 0x2a14,
+	.md_reg = 0x2a10,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 16,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_uart,
+	.clkr = {
+		.enable_reg = 0x2a14,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi3_uart_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi3_uart_clk = {
+	.halt_reg = 0x2fcc,
+	.halt_bit = 2,
+	.clkr = {
+		.enable_reg = 0x2a14,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi3_uart_clk",
+			.parent_names = (const char *[]){
+				"gsbi3_uart_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gsbi4_uart_src = {
+	.ns_reg = 0x2a34,
+	.md_reg = 0x2a30,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 16,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_uart,
+	.clkr = {
+		.enable_reg = 0x2a34,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi4_uart_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi4_uart_clk = {
+	.halt_reg = 0x2fd0,
+	.halt_bit = 26,
+	.clkr = {
+		.enable_reg = 0x2a34,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi4_uart_clk",
+			.parent_names = (const char *[]){
+				"gsbi4_uart_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gsbi5_uart_src = {
+	.ns_reg = 0x2a54,
+	.md_reg = 0x2a50,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 16,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_uart,
+	.clkr = {
+		.enable_reg = 0x2a54,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi5_uart_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi5_uart_clk = {
+	.halt_reg = 0x2fd0,
+	.halt_bit = 22,
+	.clkr = {
+		.enable_reg = 0x2a54,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi5_uart_clk",
+			.parent_names = (const char *[]){
+				"gsbi5_uart_src",
+			},
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct freq_tbl clk_tbl_gsbi_qup[] = {
+	{   960000, P_CXO,  4, 1,  5 },
+	{  4800000, P_CXO,  4, 0,  1 },
+	{  9600000, P_CXO,  2, 0,  1 },
+	{ 15060000, P_PLL8, 1, 2, 51 },
+	{ 24000000, P_PLL8, 4, 1,  4 },
+	{ 25600000, P_PLL8, 1, 1, 15 },
+	{ 48000000, P_PLL8, 4, 1,  2 },
+	{ 51200000, P_PLL8, 1, 2, 15 },
+	{ }
+};
+
+static struct clk_rcg gsbi1_qup_src = {
+	.ns_reg = 0x29cc,
+	.md_reg = 0x29c8,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_qup,
+	.clkr = {
+		.enable_reg = 0x29cc,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi1_qup_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi1_qup_clk = {
+	.halt_reg = 0x2fcc,
+	.halt_bit = 9,
+	.clkr = {
+		.enable_reg = 0x29cc,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi1_qup_clk",
+			.parent_names = (const char *[]){ "gsbi1_qup_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gsbi2_qup_src = {
+	.ns_reg = 0x29ec,
+	.md_reg = 0x29e8,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_qup,
+	.clkr = {
+		.enable_reg = 0x29ec,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi2_qup_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi2_qup_clk = {
+	.halt_reg = 0x2fcc,
+	.halt_bit = 4,
+	.clkr = {
+		.enable_reg = 0x29ec,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi2_qup_clk",
+			.parent_names = (const char *[]){ "gsbi2_qup_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gsbi3_qup_src = {
+	.ns_reg = 0x2a0c,
+	.md_reg = 0x2a08,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_qup,
+	.clkr = {
+		.enable_reg = 0x2a0c,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi3_qup_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi3_qup_clk = {
+	.halt_reg = 0x2fcc,
+	.halt_bit = 0,
+	.clkr = {
+		.enable_reg = 0x2a0c,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi3_qup_clk",
+			.parent_names = (const char *[]){ "gsbi3_qup_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gsbi4_qup_src = {
+	.ns_reg = 0x2a2c,
+	.md_reg = 0x2a28,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_qup,
+	.clkr = {
+		.enable_reg = 0x2a2c,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi4_qup_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi4_qup_clk = {
+	.halt_reg = 0x2fd0,
+	.halt_bit = 24,
+	.clkr = {
+		.enable_reg = 0x2a2c,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi4_qup_clk",
+			.parent_names = (const char *[]){ "gsbi4_qup_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gsbi5_qup_src = {
+	.ns_reg = 0x2a4c,
+	.md_reg = 0x2a48,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_gsbi_qup,
+	.clkr = {
+		.enable_reg = 0x2a4c,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi5_qup_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	},
+};
+
+static struct clk_branch gsbi5_qup_clk = {
+	.halt_reg = 0x2fd0,
+	.halt_bit = 20,
+	.clkr = {
+		.enable_reg = 0x2a4c,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi5_qup_clk",
+			.parent_names = (const char *[]){ "gsbi5_qup_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl clk_tbl_gp[] = {
+	{ 9600000, P_CXO,  2, 0, 0 },
+	{ 19200000, P_CXO,  1, 0, 0 },
+	{ }
+};
+
+static struct clk_rcg gp0_src = {
+	.ns_reg = 0x2d24,
+	.md_reg = 0x2d00,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_map,
+	},
+	.freq_tbl = clk_tbl_gp,
+	.clkr = {
+		.enable_reg = 0x2d24,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gp0_src",
+			.parent_names = gcc_cxo,
+			.num_parents = 1,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_PARENT_GATE,
+		},
+	}
+};
+
+static struct clk_branch gp0_clk = {
+	.halt_reg = 0x2fd8,
+	.halt_bit = 7,
+	.clkr = {
+		.enable_reg = 0x2d24,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gp0_clk",
+			.parent_names = (const char *[]){ "gp0_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gp1_src = {
+	.ns_reg = 0x2d44,
+	.md_reg = 0x2d40,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_map,
+	},
+	.freq_tbl = clk_tbl_gp,
+	.clkr = {
+		.enable_reg = 0x2d44,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gp1_src",
+			.parent_names = gcc_cxo,
+			.num_parents = 1,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch gp1_clk = {
+	.halt_reg = 0x2fd8,
+	.halt_bit = 6,
+	.clkr = {
+		.enable_reg = 0x2d44,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gp1_clk",
+			.parent_names = (const char *[]){ "gp1_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg gp2_src = {
+	.ns_reg = 0x2d64,
+	.md_reg = 0x2d60,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_map,
+	},
+	.freq_tbl = clk_tbl_gp,
+	.clkr = {
+		.enable_reg = 0x2d64,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "gp2_src",
+			.parent_names = gcc_cxo,
+			.num_parents = 1,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch gp2_clk = {
+	.halt_reg = 0x2fd8,
+	.halt_bit = 5,
+	.clkr = {
+		.enable_reg = 0x2d64,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "gp2_clk",
+			.parent_names = (const char *[]){ "gp2_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch pmem_clk = {
+	.hwcg_reg = 0x25a0,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fc8,
+	.halt_bit = 20,
+	.clkr = {
+		.enable_reg = 0x25a0,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "pmem_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_rcg prng_src = {
+	.ns_reg = 0x2e80,
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 4,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.clkr = {
+		.hw.init = &(struct clk_init_data){
+			.name = "prng_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+		},
+	},
+};
+
+static struct clk_branch prng_clk = {
+	.halt_reg = 0x2fd8,
+	.halt_check = BRANCH_HALT_VOTED,
+	.halt_bit = 10,
+	.clkr = {
+		.enable_reg = 0x3080,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "prng_clk",
+			.parent_names = (const char *[]){ "prng_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static const struct freq_tbl clk_tbl_sdc[] = {
+	{    144000, P_CXO,   1, 1, 133 },
+	{    400000, P_PLL8,  4, 1, 240 },
+	{  16000000, P_PLL8,  4, 1,   6 },
+	{  17070000, P_PLL8,  1, 2,  45 },
+	{  20210000, P_PLL8,  1, 1,  19 },
+	{  24000000, P_PLL8,  4, 1,   4 },
+	{  38400000, P_PLL8,  2, 1,   5 },
+	{  48000000, P_PLL8,  4, 1,   2 },
+	{  64000000, P_PLL8,  3, 1,   2 },
+	{  76800000, P_PLL8,  1, 1,   5 },
+	{ }
+};
+
+static struct clk_rcg sdc1_src = {
+	.ns_reg = 0x282c,
+	.md_reg = 0x2828,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_sdc,
+	.clkr = {
+		.enable_reg = 0x282c,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "sdc1_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch sdc1_clk = {
+	.halt_reg = 0x2fc8,
+	.halt_bit = 6,
+	.clkr = {
+		.enable_reg = 0x282c,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "sdc1_clk",
+			.parent_names = (const char *[]){ "sdc1_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg sdc2_src = {
+	.ns_reg = 0x284c,
+	.md_reg = 0x2848,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_sdc,
+	.clkr = {
+		.enable_reg = 0x284c,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "sdc2_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch sdc2_clk = {
+	.halt_reg = 0x2fc8,
+	.halt_bit = 5,
+	.clkr = {
+		.enable_reg = 0x284c,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "sdc2_clk",
+			.parent_names = (const char *[]){ "sdc2_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl clk_tbl_usb[] = {
+	{ 60000000, P_PLL8, 1, 5, 32 },
+	{ }
+};
+
+static struct clk_rcg usb_hs1_xcvr_src = {
+	.ns_reg = 0x290c,
+	.md_reg = 0x2908,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_usb,
+	.clkr = {
+		.enable_reg = 0x290c,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hs1_xcvr_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch usb_hs1_xcvr_clk = {
+	.halt_reg = 0x2fc8,
+	.halt_bit = 0,
+	.clkr = {
+		.enable_reg = 0x290c,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hs1_xcvr_clk",
+			.parent_names = (const char *[]){ "usb_hs1_xcvr_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg usb_hsic_xcvr_fs_src = {
+	.ns_reg = 0x2928,
+	.md_reg = 0x2924,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_usb,
+	.clkr = {
+		.enable_reg = 0x2928,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hsic_xcvr_fs_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch usb_hsic_xcvr_fs_clk = {
+	.halt_reg = 0x2fc8,
+	.halt_bit = 9,
+	.clkr = {
+		.enable_reg = 0x2928,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hsic_xcvr_fs_clk",
+			.parent_names =
+				(const char *[]){ "usb_hsic_xcvr_fs_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl clk_tbl_usb_hs1_system[] = {
+	{ 60000000, P_PLL8, 1, 5, 32 },
+	{ }
+};
+
+static struct clk_rcg usb_hs1_system_src = {
+	.ns_reg = 0x36a4,
+	.md_reg = 0x36a0,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_usb_hs1_system,
+	.clkr = {
+		.enable_reg = 0x36a4,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hs1_system_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch usb_hs1_system_clk = {
+	.halt_reg = 0x2fc8,
+	.halt_bit = 4,
+	.clkr = {
+		.enable_reg = 0x36a4,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.parent_names =
+				(const char *[]){ "usb_hs1_system_src" },
+			.num_parents = 1,
+			.name = "usb_hs1_system_clk",
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT | CLK_IGNORE_UNUSED,
+		},
+	},
+};
+
+static const struct freq_tbl clk_tbl_usb_hsic_system[] = {
+	{ 64000000, P_PLL8, 1, 1, 6 },
+	{ }
+};
+
+static struct clk_rcg usb_hsic_system_src = {
+	.ns_reg = 0x2b58,
+	.md_reg = 0x2b54,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll8_map,
+	},
+	.freq_tbl = clk_tbl_usb_hsic_system,
+	.clkr = {
+		.enable_reg = 0x2b58,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hsic_system_src",
+			.parent_names = gcc_cxo_pll8,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch usb_hsic_system_clk = {
+	.halt_reg = 0x2fc8,
+	.halt_bit = 7,
+	.clkr = {
+		.enable_reg = 0x2b58,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.parent_names =
+				(const char *[]){ "usb_hsic_system_src" },
+			.num_parents = 1,
+			.name = "usb_hsic_system_clk",
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static const struct freq_tbl clk_tbl_usb_hsic_hsic[] = {
+	{ 48000000, P_PLL14, 1, 0, 0 },
+	{ }
+};
+
+static struct clk_rcg usb_hsic_hsic_src = {
+	.ns_reg = 0x2b50,
+	.md_reg = 0x2b4c,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = gcc_cxo_pll14_map,
+	},
+	.freq_tbl = clk_tbl_usb_hsic_hsic,
+	.clkr = {
+		.enable_reg = 0x2b50,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hsic_hsic_src",
+			.parent_names = gcc_cxo_pll14,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	}
+};
+
+static struct clk_branch usb_hsic_hsic_clk = {
+	.halt_check = BRANCH_HALT_DELAY,
+	.clkr = {
+		.enable_reg = 0x2b50,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.parent_names = (const char *[]){ "usb_hsic_hsic_src" },
+			.num_parents = 1,
+			.name = "usb_hsic_hsic_clk",
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch usb_hsic_hsio_cal_clk = {
+	.halt_reg = 0x2fc8,
+	.halt_bit = 8,
+	.clkr = {
+		.enable_reg = 0x2b48,
+		.enable_mask = BIT(0),
+		.hw.init = &(struct clk_init_data){
+			.parent_names = (const char *[]){ "cxo" },
+			.num_parents = 1,
+			.name = "usb_hsic_hsio_cal_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch ce1_core_clk = {
+	.hwcg_reg = 0x2724,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fd4,
+	.halt_bit = 27,
+	.clkr = {
+		.enable_reg = 0x2724,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "ce1_core_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch ce1_h_clk = {
+	.halt_reg = 0x2fd4,
+	.halt_bit = 1,
+	.clkr = {
+		.enable_reg = 0x2720,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "ce1_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch dma_bam_h_clk = {
+	.hwcg_reg = 0x25c0,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fc8,
+	.halt_bit = 12,
+	.clkr = {
+		.enable_reg = 0x25c0,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "dma_bam_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch gsbi1_h_clk = {
+	.hwcg_reg = 0x29c0,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fcc,
+	.halt_bit = 11,
+	.clkr = {
+		.enable_reg = 0x29c0,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi1_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch gsbi2_h_clk = {
+	.hwcg_reg = 0x29e0,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fcc,
+	.halt_bit = 7,
+	.clkr = {
+		.enable_reg = 0x29e0,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi2_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch gsbi3_h_clk = {
+	.hwcg_reg = 0x2a00,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fcc,
+	.halt_bit = 3,
+	.clkr = {
+		.enable_reg = 0x2a00,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi3_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch gsbi4_h_clk = {
+	.hwcg_reg = 0x2a20,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fd0,
+	.halt_bit = 27,
+	.clkr = {
+		.enable_reg = 0x2a20,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi4_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch gsbi5_h_clk = {
+	.hwcg_reg = 0x2a40,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fd0,
+	.halt_bit = 23,
+	.clkr = {
+		.enable_reg = 0x2a40,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "gsbi5_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch usb_hs1_h_clk = {
+	.hwcg_reg = 0x2900,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fc8,
+	.halt_bit = 1,
+	.clkr = {
+		.enable_reg = 0x2900,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hs1_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch usb_hsic_h_clk = {
+	.halt_reg = 0x2fcc,
+	.halt_bit = 28,
+	.clkr = {
+		.enable_reg = 0x2920,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "usb_hsic_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch sdc1_h_clk = {
+	.hwcg_reg = 0x2820,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fc8,
+	.halt_bit = 11,
+	.clkr = {
+		.enable_reg = 0x2820,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "sdc1_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch sdc2_h_clk = {
+	.hwcg_reg = 0x2840,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fc8,
+	.halt_bit = 10,
+	.clkr = {
+		.enable_reg = 0x2840,
+		.enable_mask = BIT(4),
+		.hw.init = &(struct clk_init_data){
+			.name = "sdc2_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch adm0_clk = {
+	.halt_reg = 0x2fdc,
+	.halt_check = BRANCH_HALT_VOTED,
+	.halt_bit = 14,
+	.clkr = {
+		.enable_reg = 0x3080,
+		.enable_mask = BIT(2),
+		.hw.init = &(struct clk_init_data){
+			.name = "adm0_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch adm0_pbus_clk = {
+	.hwcg_reg = 0x2208,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fdc,
+	.halt_check = BRANCH_HALT_VOTED,
+	.halt_bit = 13,
+	.clkr = {
+		.enable_reg = 0x3080,
+		.enable_mask = BIT(3),
+		.hw.init = &(struct clk_init_data){
+			.name = "adm0_pbus_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch pmic_arb0_h_clk = {
+	.halt_reg = 0x2fd8,
+	.halt_check = BRANCH_HALT_VOTED,
+	.halt_bit = 22,
+	.clkr = {
+		.enable_reg = 0x3080,
+		.enable_mask = BIT(8),
+		.hw.init = &(struct clk_init_data){
+			.name = "pmic_arb0_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch pmic_arb1_h_clk = {
+	.halt_reg = 0x2fd8,
+	.halt_check = BRANCH_HALT_VOTED,
+	.halt_bit = 21,
+	.clkr = {
+		.enable_reg = 0x3080,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "pmic_arb1_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch pmic_ssbi2_clk = {
+	.halt_reg = 0x2fd8,
+	.halt_check = BRANCH_HALT_VOTED,
+	.halt_bit = 23,
+	.clkr = {
+		.enable_reg = 0x3080,
+		.enable_mask = BIT(7),
+		.hw.init = &(struct clk_init_data){
+			.name = "pmic_ssbi2_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_branch rpm_msg_ram_h_clk = {
+	.hwcg_reg = 0x27e0,
+	.hwcg_bit = 6,
+	.halt_reg = 0x2fd8,
+	.halt_check = BRANCH_HALT_VOTED,
+	.halt_bit = 12,
+	.clkr = {
+		.enable_reg = 0x3080,
+		.enable_mask = BIT(6),
+		.hw.init = &(struct clk_init_data){
+			.name = "rpm_msg_ram_h_clk",
+			.ops = &clk_branch_ops,
+		},
+	},
+};
+
+static struct clk_hw *gcc_mdm9615_hws[] = {
+	&cxo.hw,
+};
+
+static struct clk_regmap *gcc_mdm9615_clks[] = {
+	[PLL0] = &pll0.clkr,
+	[PLL0_VOTE] = &pll0_vote,
+	[PLL4_VOTE] = &pll4_vote,
+	[PLL8] = &pll8.clkr,
+	[PLL8_VOTE] = &pll8_vote,
+	[PLL14] = &pll14.clkr,
+	[PLL14_VOTE] = &pll14_vote,
+	[GSBI1_UART_SRC] = &gsbi1_uart_src.clkr,
+	[GSBI1_UART_CLK] = &gsbi1_uart_clk.clkr,
+	[GSBI2_UART_SRC] = &gsbi2_uart_src.clkr,
+	[GSBI2_UART_CLK] = &gsbi2_uart_clk.clkr,
+	[GSBI3_UART_SRC] = &gsbi3_uart_src.clkr,
+	[GSBI3_UART_CLK] = &gsbi3_uart_clk.clkr,
+	[GSBI4_UART_SRC] = &gsbi4_uart_src.clkr,
+	[GSBI4_UART_CLK] = &gsbi4_uart_clk.clkr,
+	[GSBI5_UART_SRC] = &gsbi5_uart_src.clkr,
+	[GSBI5_UART_CLK] = &gsbi5_uart_clk.clkr,
+	[GSBI1_QUP_SRC] = &gsbi1_qup_src.clkr,
+	[GSBI1_QUP_CLK] = &gsbi1_qup_clk.clkr,
+	[GSBI2_QUP_SRC] = &gsbi2_qup_src.clkr,
+	[GSBI2_QUP_CLK] = &gsbi2_qup_clk.clkr,
+	[GSBI3_QUP_SRC] = &gsbi3_qup_src.clkr,
+	[GSBI3_QUP_CLK] = &gsbi3_qup_clk.clkr,
+	[GSBI4_QUP_SRC] = &gsbi4_qup_src.clkr,
+	[GSBI4_QUP_CLK] = &gsbi4_qup_clk.clkr,
+	[GSBI5_QUP_SRC] = &gsbi5_qup_src.clkr,
+	[GSBI5_QUP_CLK] = &gsbi5_qup_clk.clkr,
+	[GP0_SRC] = &gp0_src.clkr,
+	[GP0_CLK] = &gp0_clk.clkr,
+	[GP1_SRC] = &gp1_src.clkr,
+	[GP1_CLK] = &gp1_clk.clkr,
+	[GP2_SRC] = &gp2_src.clkr,
+	[GP2_CLK] = &gp2_clk.clkr,
+	[PMEM_A_CLK] = &pmem_clk.clkr,
+	[PRNG_SRC] = &prng_src.clkr,
+	[PRNG_CLK] = &prng_clk.clkr,
+	[SDC1_SRC] = &sdc1_src.clkr,
+	[SDC1_CLK] = &sdc1_clk.clkr,
+	[SDC2_SRC] = &sdc2_src.clkr,
+	[SDC2_CLK] = &sdc2_clk.clkr,
+	[USB_HS1_XCVR_SRC] = &usb_hs1_xcvr_src.clkr,
+	[USB_HS1_XCVR_CLK] = &usb_hs1_xcvr_clk.clkr,
+	[USB_HS1_SYSTEM_CLK_SRC] = &usb_hs1_system_src.clkr,
+	[USB_HS1_SYSTEM_CLK] = &usb_hs1_system_clk.clkr,
+	[USB_HSIC_XCVR_FS_SRC] = &usb_hsic_xcvr_fs_src.clkr,
+	[USB_HSIC_XCVR_FS_CLK] = &usb_hsic_xcvr_fs_clk.clkr,
+	[USB_HSIC_SYSTEM_CLK_SRC] = &usb_hsic_system_src.clkr,
+	[USB_HSIC_SYSTEM_CLK] = &usb_hsic_system_clk.clkr,
+	[USB_HSIC_HSIC_CLK_SRC] = &usb_hsic_hsic_src.clkr,
+	[USB_HSIC_HSIC_CLK] = &usb_hsic_hsic_clk.clkr,
+	[USB_HSIC_HSIO_CAL_CLK] = &usb_hsic_hsio_cal_clk.clkr,
+	[CE1_CORE_CLK] = &ce1_core_clk.clkr,
+	[CE1_H_CLK] = &ce1_h_clk.clkr,
+	[DMA_BAM_H_CLK] = &dma_bam_h_clk.clkr,
+	[GSBI1_H_CLK] = &gsbi1_h_clk.clkr,
+	[GSBI2_H_CLK] = &gsbi2_h_clk.clkr,
+	[GSBI3_H_CLK] = &gsbi3_h_clk.clkr,
+	[GSBI4_H_CLK] = &gsbi4_h_clk.clkr,
+	[GSBI5_H_CLK] = &gsbi5_h_clk.clkr,
+	[USB_HS1_H_CLK] = &usb_hs1_h_clk.clkr,
+	[USB_HSIC_H_CLK] = &usb_hsic_h_clk.clkr,
+	[SDC1_H_CLK] = &sdc1_h_clk.clkr,
+	[SDC2_H_CLK] = &sdc2_h_clk.clkr,
+	[ADM0_CLK] = &adm0_clk.clkr,
+	[ADM0_PBUS_CLK] = &adm0_pbus_clk.clkr,
+	[PMIC_ARB0_H_CLK] = &pmic_arb0_h_clk.clkr,
+	[PMIC_ARB1_H_CLK] = &pmic_arb1_h_clk.clkr,
+	[PMIC_SSBI2_CLK] = &pmic_ssbi2_clk.clkr,
+	[RPM_MSG_RAM_H_CLK] = &rpm_msg_ram_h_clk.clkr,
+};
+
+static const struct qcom_reset_map gcc_mdm9615_resets[] = {
+	[DMA_BAM_RESET] = { 0x25c0, 7 },
+	[CE1_H_RESET] = { 0x2720, 7 },
+	[CE1_CORE_RESET] = { 0x2724, 7 },
+	[SDC1_RESET] = { 0x2830 },
+	[SDC2_RESET] = { 0x2850 },
+	[ADM0_C2_RESET] = { 0x220c, 4 },
+	[ADM0_C1_RESET] = { 0x220c, 3 },
+	[ADM0_C0_RESET] = { 0x220c, 2 },
+	[ADM0_PBUS_RESET] = { 0x220c, 1 },
+	[ADM0_RESET] = { 0x220c },
+	[USB_HS1_RESET] = { 0x2910 },
+	[USB_HSIC_RESET] = { 0x2934 },
+	[GSBI1_RESET] = { 0x29dc },
+	[GSBI2_RESET] = { 0x29fc },
+	[GSBI3_RESET] = { 0x2a1c },
+	[GSBI4_RESET] = { 0x2a3c },
+	[GSBI5_RESET] = { 0x2a5c },
+	[PDM_RESET] = { 0x2CC0, 12 },
+};
+
+static const struct regmap_config gcc_mdm9615_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+	.max_register	= 0x3660,
+	.fast_io	= true,
+};
+
+static const struct qcom_cc_desc gcc_mdm9615_desc = {
+	.config = &gcc_mdm9615_regmap_config,
+	.clks = gcc_mdm9615_clks,
+	.num_clks = ARRAY_SIZE(gcc_mdm9615_clks),
+	.resets = gcc_mdm9615_resets,
+	.num_resets = ARRAY_SIZE(gcc_mdm9615_resets),
+};
+
+static const struct of_device_id gcc_mdm9615_match_table[] = {
+	{ .compatible = "qcom,gcc-mdm9615" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, gcc_mdm9615_match_table);
+
+static int gcc_mdm9615_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct regmap *regmap;
+	int ret;
+	int i;
+
+	regmap = qcom_cc_map(pdev, &gcc_mdm9615_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	for (i = 0; i < ARRAY_SIZE(gcc_mdm9615_hws); i++) {
+		ret = devm_clk_hw_register(dev, gcc_mdm9615_hws[i]);
+		if (ret)
+			return ret;
+	}
+
+	return qcom_cc_really_probe(pdev, &gcc_mdm9615_desc, regmap);
+}
+
+static struct platform_driver gcc_mdm9615_driver = {
+	.probe		= gcc_mdm9615_probe,
+	.driver		= {
+		.name	= "gcc-mdm9615",
+		.of_match_table = gcc_mdm9615_match_table,
+	},
+};
+
+static int __init gcc_mdm9615_init(void)
+{
+	return platform_driver_register(&gcc_mdm9615_driver);
+}
+core_initcall(gcc_mdm9615_init);
+
+static void __exit gcc_mdm9615_exit(void)
+{
+	platform_driver_unregister(&gcc_mdm9615_driver);
+}
+module_exit(gcc_mdm9615_exit);
+
+MODULE_DESCRIPTION("QCOM GCC MDM9615 Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:gcc-mdm9615");
diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c
index bbf732b..fe03e6f 100644
--- a/drivers/clk/qcom/gcc-msm8996.c
+++ b/drivers/clk/qcom/gcc-msm8996.c
@@ -2592,9 +2592,9 @@
 };
 
 static struct clk_branch gcc_pcie_2_pipe_clk = {
-	.halt_reg = 0x6e108,
+	.halt_reg = 0x6e018,
 	.clkr = {
-		.enable_reg = 0x6e108,
+		.enable_reg = 0x6e018,
 		.enable_mask = BIT(0),
 		.hw.init = &(struct clk_init_data){
 			.name = "gcc_pcie_2_pipe_clk",
@@ -3329,6 +3329,8 @@
 	[GCC_USB_20_BCR] = { 0x12000 },
 	[GCC_QUSB2PHY_PRIM_BCR] = { 0x12038 },
 	[GCC_QUSB2PHY_SEC_BCR] = { 0x1203c },
+	[GCC_USB3_PHY_BCR] = { 0x50020 },
+	[GCC_USB3PHY_PHY_BCR] = { 0x50024 },
 	[GCC_USB_PHY_CFG_AHB2PHY_BCR] = { 0x6a000 },
 	[GCC_SDCC1_BCR] = { 0x13000 },
 	[GCC_SDCC2_BCR] = { 0x14000 },
@@ -3404,6 +3406,8 @@
 	[GCC_PCIE_2_BCR] = { 0x6e000 },
 	[GCC_PCIE_2_PHY_BCR] = { 0x6e038 },
 	[GCC_PCIE_PHY_BCR] = { 0x6f000 },
+	[GCC_PCIE_PHY_COM_BCR] = { 0x6f014 },
+	[GCC_PCIE_PHY_COM_NOCSR_BCR] = { 0x6f00c },
 	[GCC_DCD_BCR] = { 0x70000 },
 	[GCC_OBT_ODT_BCR] = { 0x73000 },
 	[GCC_UFS_BCR] = { 0x75000 },
@@ -3447,9 +3451,8 @@
 
 static int gcc_msm8996_probe(struct platform_device *pdev)
 {
-	struct clk *clk;
 	struct device *dev = &pdev->dev;
-	int i;
+	int i, ret;
 	struct regmap *regmap;
 
 	regmap = qcom_cc_map(pdev, &gcc_msm8996_desc);
@@ -3463,9 +3466,9 @@
 	regmap_update_bits(regmap, 0x52008, BIT(21), BIT(21));
 
 	for (i = 0; i < ARRAY_SIZE(gcc_msm8996_hws); i++) {
-		clk = devm_clk_register(dev, gcc_msm8996_hws[i]);
-		if (IS_ERR(clk))
-			return PTR_ERR(clk);
+		ret = devm_clk_hw_register(dev, gcc_msm8996_hws[i]);
+		if (ret)
+			return ret;
 	}
 
 	return qcom_cc_really_probe(pdev, &gcc_msm8996_desc, regmap);
diff --git a/drivers/clk/qcom/lcc-mdm9615.c b/drivers/clk/qcom/lcc-mdm9615.c
new file mode 100644
index 0000000..3237ef4
--- /dev/null
+++ b/drivers/clk/qcom/lcc-mdm9615.c
@@ -0,0 +1,580 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) BayLibre, SAS.
+ * Author : Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/clk-provider.h>
+#include <linux/regmap.h>
+
+#include <dt-bindings/clock/qcom,lcc-mdm9615.h>
+
+#include "common.h"
+#include "clk-regmap.h"
+#include "clk-pll.h"
+#include "clk-rcg.h"
+#include "clk-branch.h"
+#include "clk-regmap-divider.h"
+#include "clk-regmap-mux.h"
+
+static struct clk_pll pll4 = {
+	.l_reg = 0x4,
+	.m_reg = 0x8,
+	.n_reg = 0xc,
+	.config_reg = 0x14,
+	.mode_reg = 0x0,
+	.status_reg = 0x18,
+	.status_bit = 16,
+	.clkr.hw.init = &(struct clk_init_data){
+		.name = "pll4",
+		.parent_names = (const char *[]){ "cxo" },
+		.num_parents = 1,
+		.ops = &clk_pll_ops,
+	},
+};
+
+enum {
+	P_CXO,
+	P_PLL4,
+};
+
+static const struct parent_map lcc_cxo_pll4_map[] = {
+	{ P_CXO, 0 },
+	{ P_PLL4, 2 }
+};
+
+static const char * const lcc_cxo_pll4[] = {
+	"cxo",
+	"pll4_vote",
+};
+
+static struct freq_tbl clk_tbl_aif_osr_492[] = {
+	{   512000, P_PLL4, 4, 1, 240 },
+	{   768000, P_PLL4, 4, 1, 160 },
+	{  1024000, P_PLL4, 4, 1, 120 },
+	{  1536000, P_PLL4, 4, 1,  80 },
+	{  2048000, P_PLL4, 4, 1,  60 },
+	{  3072000, P_PLL4, 4, 1,  40 },
+	{  4096000, P_PLL4, 4, 1,  30 },
+	{  6144000, P_PLL4, 4, 1,  20 },
+	{  8192000, P_PLL4, 4, 1,  15 },
+	{ 12288000, P_PLL4, 4, 1,  10 },
+	{ 24576000, P_PLL4, 4, 1,   5 },
+	{ 27000000, P_CXO,  1, 0,   0 },
+	{ }
+};
+
+static struct freq_tbl clk_tbl_aif_osr_393[] = {
+	{   512000, P_PLL4, 4, 1, 192 },
+	{   768000, P_PLL4, 4, 1, 128 },
+	{  1024000, P_PLL4, 4, 1,  96 },
+	{  1536000, P_PLL4, 4, 1,  64 },
+	{  2048000, P_PLL4, 4, 1,  48 },
+	{  3072000, P_PLL4, 4, 1,  32 },
+	{  4096000, P_PLL4, 4, 1,  24 },
+	{  6144000, P_PLL4, 4, 1,  16 },
+	{  8192000, P_PLL4, 4, 1,  12 },
+	{ 12288000, P_PLL4, 4, 1,   8 },
+	{ 24576000, P_PLL4, 4, 1,   4 },
+	{ 27000000, P_CXO,  1, 0,   0 },
+	{ }
+};
+
+static struct clk_rcg mi2s_osr_src = {
+	.ns_reg = 0x48,
+	.md_reg = 0x4c,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 24,
+		.m_val_shift = 8,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = lcc_cxo_pll4_map,
+	},
+	.freq_tbl = clk_tbl_aif_osr_393,
+	.clkr = {
+		.enable_reg = 0x48,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "mi2s_osr_src",
+			.parent_names = lcc_cxo_pll4,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	},
+};
+
+static const char * const lcc_mi2s_parents[] = {
+	"mi2s_osr_src",
+};
+
+static struct clk_branch mi2s_osr_clk = {
+	.halt_reg = 0x50,
+	.halt_bit = 1,
+	.halt_check = BRANCH_HALT_ENABLE,
+	.clkr = {
+		.enable_reg = 0x48,
+		.enable_mask = BIT(17),
+		.hw.init = &(struct clk_init_data){
+			.name = "mi2s_osr_clk",
+			.parent_names = lcc_mi2s_parents,
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_regmap_div mi2s_div_clk = {
+	.reg = 0x48,
+	.shift = 10,
+	.width = 4,
+	.clkr = {
+		.enable_reg = 0x48,
+		.enable_mask = BIT(15),
+		.hw.init = &(struct clk_init_data){
+			.name = "mi2s_div_clk",
+			.parent_names = lcc_mi2s_parents,
+			.num_parents = 1,
+			.ops = &clk_regmap_div_ops,
+		},
+	},
+};
+
+static struct clk_branch mi2s_bit_div_clk = {
+	.halt_reg = 0x50,
+	.halt_bit = 0,
+	.halt_check = BRANCH_HALT_ENABLE,
+	.clkr = {
+		.enable_reg = 0x48,
+		.enable_mask = BIT(15),
+		.hw.init = &(struct clk_init_data){
+			.name = "mi2s_bit_div_clk",
+			.parent_names = (const char *[]){ "mi2s_div_clk" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_regmap_mux mi2s_bit_clk = {
+	.reg = 0x48,
+	.shift = 14,
+	.width = 1,
+	.clkr = {
+		.hw.init = &(struct clk_init_data){
+			.name = "mi2s_bit_clk",
+			.parent_names = (const char *[]){
+				"mi2s_bit_div_clk",
+				"mi2s_codec_clk",
+			},
+			.num_parents = 2,
+			.ops = &clk_regmap_mux_closest_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+#define CLK_AIF_OSR_DIV(prefix, _ns, _md, hr)			\
+static struct clk_rcg prefix##_osr_src = {			\
+	.ns_reg = _ns,						\
+	.md_reg = _md,						\
+	.mn = {							\
+		.mnctr_en_bit = 8,				\
+		.mnctr_reset_bit = 7,				\
+		.mnctr_mode_shift = 5,				\
+		.n_val_shift = 24,				\
+		.m_val_shift = 8,				\
+		.width = 8,					\
+	},							\
+	.p = {							\
+		.pre_div_shift = 3,				\
+		.pre_div_width = 2,				\
+	},							\
+	.s = {							\
+		.src_sel_shift = 0,				\
+		.parent_map = lcc_cxo_pll4_map,			\
+	},							\
+	.freq_tbl = clk_tbl_aif_osr_393,			\
+	.clkr = {						\
+		.enable_reg = _ns,				\
+		.enable_mask = BIT(9),				\
+		.hw.init = &(struct clk_init_data){		\
+			.name = #prefix "_osr_src",		\
+			.parent_names = lcc_cxo_pll4,		\
+			.num_parents = 2,			\
+			.ops = &clk_rcg_ops,			\
+			.flags = CLK_SET_RATE_GATE,		\
+		},						\
+	},							\
+};								\
+								\
+static const char * const lcc_##prefix##_parents[] = {		\
+	#prefix "_osr_src",					\
+};								\
+								\
+static struct clk_branch prefix##_osr_clk = {			\
+	.halt_reg = hr,						\
+	.halt_bit = 1,						\
+	.halt_check = BRANCH_HALT_ENABLE,			\
+	.clkr = {						\
+		.enable_reg = _ns,				\
+		.enable_mask = BIT(21),				\
+		.hw.init = &(struct clk_init_data){		\
+			.name = #prefix "_osr_clk",		\
+			.parent_names = lcc_##prefix##_parents,	\
+			.num_parents = 1,			\
+			.ops = &clk_branch_ops,			\
+			.flags = CLK_SET_RATE_PARENT,		\
+		},						\
+	},							\
+};								\
+								\
+static struct clk_regmap_div prefix##_div_clk = {		\
+	.reg = _ns,						\
+	.shift = 10,						\
+	.width = 8,						\
+	.clkr = {						\
+		.hw.init = &(struct clk_init_data){		\
+			.name = #prefix "_div_clk",		\
+			.parent_names = lcc_##prefix##_parents,	\
+			.num_parents = 1,			\
+			.ops = &clk_regmap_div_ops,		\
+		},						\
+	},							\
+};								\
+								\
+static struct clk_branch prefix##_bit_div_clk = {		\
+	.halt_reg = hr,						\
+	.halt_bit = 0,						\
+	.halt_check = BRANCH_HALT_ENABLE,			\
+	.clkr = {						\
+		.enable_reg = _ns,				\
+		.enable_mask = BIT(19),				\
+		.hw.init = &(struct clk_init_data){		\
+			.name = #prefix "_bit_div_clk",		\
+			.parent_names = (const char *[]){	\
+				#prefix "_div_clk"		\
+			},					\
+			.num_parents = 1,			\
+			.ops = &clk_branch_ops,			\
+			.flags = CLK_SET_RATE_PARENT,		\
+		},						\
+	},							\
+};								\
+								\
+static struct clk_regmap_mux prefix##_bit_clk = {		\
+	.reg = _ns,						\
+	.shift = 18,						\
+	.width = 1,						\
+	.clkr = {						\
+		.hw.init = &(struct clk_init_data){		\
+			.name = #prefix "_bit_clk",		\
+			.parent_names = (const char *[]){	\
+				#prefix "_bit_div_clk",		\
+				#prefix "_codec_clk",		\
+			},					\
+			.num_parents = 2,			\
+			.ops = &clk_regmap_mux_closest_ops,	\
+			.flags = CLK_SET_RATE_PARENT,		\
+		},						\
+	},							\
+}
+
+CLK_AIF_OSR_DIV(codec_i2s_mic, 0x60, 0x64, 0x68);
+CLK_AIF_OSR_DIV(spare_i2s_mic, 0x78, 0x7c, 0x80);
+CLK_AIF_OSR_DIV(codec_i2s_spkr, 0x6c, 0x70, 0x74);
+CLK_AIF_OSR_DIV(spare_i2s_spkr, 0x84, 0x88, 0x8c);
+
+static struct freq_tbl clk_tbl_pcm_492[] = {
+	{   256000, P_PLL4, 4, 1, 480 },
+	{   512000, P_PLL4, 4, 1, 240 },
+	{   768000, P_PLL4, 4, 1, 160 },
+	{  1024000, P_PLL4, 4, 1, 120 },
+	{  1536000, P_PLL4, 4, 1,  80 },
+	{  2048000, P_PLL4, 4, 1,  60 },
+	{  3072000, P_PLL4, 4, 1,  40 },
+	{  4096000, P_PLL4, 4, 1,  30 },
+	{  6144000, P_PLL4, 4, 1,  20 },
+	{  8192000, P_PLL4, 4, 1,  15 },
+	{ 12288000, P_PLL4, 4, 1,  10 },
+	{ 24576000, P_PLL4, 4, 1,   5 },
+	{ 27000000, P_CXO,  1, 0,   0 },
+	{ }
+};
+
+static struct freq_tbl clk_tbl_pcm_393[] = {
+	{   256000, P_PLL4, 4, 1, 384 },
+	{   512000, P_PLL4, 4, 1, 192 },
+	{   768000, P_PLL4, 4, 1, 128 },
+	{  1024000, P_PLL4, 4, 1,  96 },
+	{  1536000, P_PLL4, 4, 1,  64 },
+	{  2048000, P_PLL4, 4, 1,  48 },
+	{  3072000, P_PLL4, 4, 1,  32 },
+	{  4096000, P_PLL4, 4, 1,  24 },
+	{  6144000, P_PLL4, 4, 1,  16 },
+	{  8192000, P_PLL4, 4, 1,  12 },
+	{ 12288000, P_PLL4, 4, 1,   8 },
+	{ 24576000, P_PLL4, 4, 1,   4 },
+	{ 27000000, P_CXO,  1, 0,   0 },
+	{ }
+};
+
+static struct clk_rcg pcm_src = {
+	.ns_reg = 0x54,
+	.md_reg = 0x58,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 16,
+		.m_val_shift = 16,
+		.width = 16,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = lcc_cxo_pll4_map,
+	},
+	.freq_tbl = clk_tbl_pcm_393,
+	.clkr = {
+		.enable_reg = 0x54,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "pcm_src",
+			.parent_names = lcc_cxo_pll4,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	},
+};
+
+static struct clk_branch pcm_clk_out = {
+	.halt_reg = 0x5c,
+	.halt_bit = 0,
+	.halt_check = BRANCH_HALT_ENABLE,
+	.clkr = {
+		.enable_reg = 0x54,
+		.enable_mask = BIT(11),
+		.hw.init = &(struct clk_init_data){
+			.name = "pcm_clk_out",
+			.parent_names = (const char *[]){ "pcm_src" },
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_regmap_mux pcm_clk = {
+	.reg = 0x54,
+	.shift = 10,
+	.width = 1,
+	.clkr = {
+		.hw.init = &(struct clk_init_data){
+			.name = "pcm_clk",
+			.parent_names = (const char *[]){
+				"pcm_clk_out",
+				"pcm_codec_clk",
+			},
+			.num_parents = 2,
+			.ops = &clk_regmap_mux_closest_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_rcg slimbus_src = {
+	.ns_reg = 0xcc,
+	.md_reg = 0xd0,
+	.mn = {
+		.mnctr_en_bit = 8,
+		.mnctr_reset_bit = 7,
+		.mnctr_mode_shift = 5,
+		.n_val_shift = 24,
+		.m_val_shift = 8,
+		.width = 8,
+	},
+	.p = {
+		.pre_div_shift = 3,
+		.pre_div_width = 2,
+	},
+	.s = {
+		.src_sel_shift = 0,
+		.parent_map = lcc_cxo_pll4_map,
+	},
+	.freq_tbl = clk_tbl_aif_osr_393,
+	.clkr = {
+		.enable_reg = 0xcc,
+		.enable_mask = BIT(9),
+		.hw.init = &(struct clk_init_data){
+			.name = "slimbus_src",
+			.parent_names = lcc_cxo_pll4,
+			.num_parents = 2,
+			.ops = &clk_rcg_ops,
+			.flags = CLK_SET_RATE_GATE,
+		},
+	},
+};
+
+static const char * const lcc_slimbus_parents[] = {
+	"slimbus_src",
+};
+
+static struct clk_branch audio_slimbus_clk = {
+	.halt_reg = 0xd4,
+	.halt_bit = 0,
+	.halt_check = BRANCH_HALT_ENABLE,
+	.clkr = {
+		.enable_reg = 0xcc,
+		.enable_mask = BIT(10),
+		.hw.init = &(struct clk_init_data){
+			.name = "audio_slimbus_clk",
+			.parent_names = lcc_slimbus_parents,
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_branch sps_slimbus_clk = {
+	.halt_reg = 0xd4,
+	.halt_bit = 1,
+	.halt_check = BRANCH_HALT_ENABLE,
+	.clkr = {
+		.enable_reg = 0xcc,
+		.enable_mask = BIT(12),
+		.hw.init = &(struct clk_init_data){
+			.name = "sps_slimbus_clk",
+			.parent_names = lcc_slimbus_parents,
+			.num_parents = 1,
+			.ops = &clk_branch_ops,
+			.flags = CLK_SET_RATE_PARENT,
+		},
+	},
+};
+
+static struct clk_regmap *lcc_mdm9615_clks[] = {
+	[PLL4] = &pll4.clkr,
+	[MI2S_OSR_SRC] = &mi2s_osr_src.clkr,
+	[MI2S_OSR_CLK] = &mi2s_osr_clk.clkr,
+	[MI2S_DIV_CLK] = &mi2s_div_clk.clkr,
+	[MI2S_BIT_DIV_CLK] = &mi2s_bit_div_clk.clkr,
+	[MI2S_BIT_CLK] = &mi2s_bit_clk.clkr,
+	[PCM_SRC] = &pcm_src.clkr,
+	[PCM_CLK_OUT] = &pcm_clk_out.clkr,
+	[PCM_CLK] = &pcm_clk.clkr,
+	[SLIMBUS_SRC] = &slimbus_src.clkr,
+	[AUDIO_SLIMBUS_CLK] = &audio_slimbus_clk.clkr,
+	[SPS_SLIMBUS_CLK] = &sps_slimbus_clk.clkr,
+	[CODEC_I2S_MIC_OSR_SRC] = &codec_i2s_mic_osr_src.clkr,
+	[CODEC_I2S_MIC_OSR_CLK] = &codec_i2s_mic_osr_clk.clkr,
+	[CODEC_I2S_MIC_DIV_CLK] = &codec_i2s_mic_div_clk.clkr,
+	[CODEC_I2S_MIC_BIT_DIV_CLK] = &codec_i2s_mic_bit_div_clk.clkr,
+	[CODEC_I2S_MIC_BIT_CLK] = &codec_i2s_mic_bit_clk.clkr,
+	[SPARE_I2S_MIC_OSR_SRC] = &spare_i2s_mic_osr_src.clkr,
+	[SPARE_I2S_MIC_OSR_CLK] = &spare_i2s_mic_osr_clk.clkr,
+	[SPARE_I2S_MIC_DIV_CLK] = &spare_i2s_mic_div_clk.clkr,
+	[SPARE_I2S_MIC_BIT_DIV_CLK] = &spare_i2s_mic_bit_div_clk.clkr,
+	[SPARE_I2S_MIC_BIT_CLK] = &spare_i2s_mic_bit_clk.clkr,
+	[CODEC_I2S_SPKR_OSR_SRC] = &codec_i2s_spkr_osr_src.clkr,
+	[CODEC_I2S_SPKR_OSR_CLK] = &codec_i2s_spkr_osr_clk.clkr,
+	[CODEC_I2S_SPKR_DIV_CLK] = &codec_i2s_spkr_div_clk.clkr,
+	[CODEC_I2S_SPKR_BIT_DIV_CLK] = &codec_i2s_spkr_bit_div_clk.clkr,
+	[CODEC_I2S_SPKR_BIT_CLK] = &codec_i2s_spkr_bit_clk.clkr,
+	[SPARE_I2S_SPKR_OSR_SRC] = &spare_i2s_spkr_osr_src.clkr,
+	[SPARE_I2S_SPKR_OSR_CLK] = &spare_i2s_spkr_osr_clk.clkr,
+	[SPARE_I2S_SPKR_DIV_CLK] = &spare_i2s_spkr_div_clk.clkr,
+	[SPARE_I2S_SPKR_BIT_DIV_CLK] = &spare_i2s_spkr_bit_div_clk.clkr,
+	[SPARE_I2S_SPKR_BIT_CLK] = &spare_i2s_spkr_bit_clk.clkr,
+};
+
+static const struct regmap_config lcc_mdm9615_regmap_config = {
+	.reg_bits	= 32,
+	.reg_stride	= 4,
+	.val_bits	= 32,
+	.max_register	= 0xfc,
+	.fast_io	= true,
+};
+
+static const struct qcom_cc_desc lcc_mdm9615_desc = {
+	.config = &lcc_mdm9615_regmap_config,
+	.clks = lcc_mdm9615_clks,
+	.num_clks = ARRAY_SIZE(lcc_mdm9615_clks),
+};
+
+static const struct of_device_id lcc_mdm9615_match_table[] = {
+	{ .compatible = "qcom,lcc-mdm9615" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, lcc_mdm9615_match_table);
+
+static int lcc_mdm9615_probe(struct platform_device *pdev)
+{
+	u32 val;
+	struct regmap *regmap;
+
+	regmap = qcom_cc_map(pdev, &lcc_mdm9615_desc);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	/* Use the correct frequency plan depending on speed of PLL4 */
+	regmap_read(regmap, 0x4, &val);
+	if (val == 0x12) {
+		slimbus_src.freq_tbl = clk_tbl_aif_osr_492;
+		mi2s_osr_src.freq_tbl = clk_tbl_aif_osr_492;
+		codec_i2s_mic_osr_src.freq_tbl = clk_tbl_aif_osr_492;
+		spare_i2s_mic_osr_src.freq_tbl = clk_tbl_aif_osr_492;
+		codec_i2s_spkr_osr_src.freq_tbl = clk_tbl_aif_osr_492;
+		spare_i2s_spkr_osr_src.freq_tbl = clk_tbl_aif_osr_492;
+		pcm_src.freq_tbl = clk_tbl_pcm_492;
+	}
+	/* Enable PLL4 source on the LPASS Primary PLL Mux */
+	regmap_write(regmap, 0xc4, 0x1);
+
+	return qcom_cc_really_probe(pdev, &lcc_mdm9615_desc, regmap);
+}
+
+static struct platform_driver lcc_mdm9615_driver = {
+	.probe		= lcc_mdm9615_probe,
+	.driver		= {
+		.name	= "lcc-mdm9615",
+		.of_match_table = lcc_mdm9615_match_table,
+	},
+};
+module_platform_driver(lcc_mdm9615_driver);
+
+MODULE_DESCRIPTION("QCOM LCC MDM9615 Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:lcc-mdm9615");
diff --git a/drivers/clk/qcom/mmcc-msm8996.c b/drivers/clk/qcom/mmcc-msm8996.c
index 847dd9da..ca97e11 100644
--- a/drivers/clk/qcom/mmcc-msm8996.c
+++ b/drivers/clk/qcom/mmcc-msm8996.c
@@ -2888,6 +2888,14 @@
 	&gpll0_div.hw,
 };
 
+static struct gdsc mmagic_bimc_gdsc = {
+	.gdscr = 0x529c,
+	.pd = {
+		.name = "mmagic_bimc",
+	},
+	.pwrsts = PWRSTS_OFF_ON,
+};
+
 static struct gdsc mmagic_video_gdsc = {
 	.gdscr = 0x119c,
 	.gds_hw_ctrl = 0x120c,
@@ -3201,6 +3209,7 @@
 };
 
 static struct gdsc *mmcc_msm8996_gdscs[] = {
+	[MMAGIC_BIMC_GDSC] = &mmagic_bimc_gdsc,
 	[MMAGIC_VIDEO_GDSC] = &mmagic_video_gdsc,
 	[MMAGIC_MDSS_GDSC] = &mmagic_mdss_gdsc,
 	[MMAGIC_CAMSS_GDSC] = &mmagic_camss_gdsc,
@@ -3305,9 +3314,8 @@
 
 static int mmcc_msm8996_probe(struct platform_device *pdev)
 {
-	struct clk *clk;
 	struct device *dev = &pdev->dev;
-	int i;
+	int i, ret;
 	struct regmap *regmap;
 
 	regmap = qcom_cc_map(pdev, &mmcc_msm8996_desc);
@@ -3320,9 +3328,9 @@
 	regmap_update_bits(regmap, 0x5054, BIT(15), 0);
 
 	for (i = 0; i < ARRAY_SIZE(mmcc_msm8996_hws); i++) {
-		clk = devm_clk_register(dev, mmcc_msm8996_hws[i]);
-		if (IS_ERR(clk))
-			return PTR_ERR(clk);
+		ret = devm_clk_hw_register(dev, mmcc_msm8996_hws[i]);
+		if (ret)
+			return ret;
 	}
 
 	return qcom_cc_really_probe(pdev, &mmcc_msm8996_desc, regmap);
diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c
index 5093a25..9375777 100644
--- a/drivers/clk/renesas/clk-mstp.c
+++ b/drivers/clk/renesas/clk-mstp.c
@@ -167,7 +167,7 @@
 	unsigned int i;
 
 	group = kzalloc(sizeof(*group), GFP_KERNEL);
-	clks = kmalloc(MSTP_MAX_CLOCKS * sizeof(*clks), GFP_KERNEL);
+	clks = kmalloc_array(MSTP_MAX_CLOCKS, sizeof(*clks), GFP_KERNEL);
 	if (group == NULL || clks == NULL) {
 		kfree(group);
 		kfree(clks);
diff --git a/drivers/clk/renesas/clk-rz.c b/drivers/clk/renesas/clk-rz.c
index f6312c6..5adb934 100644
--- a/drivers/clk/renesas/clk-rz.c
+++ b/drivers/clk/renesas/clk-rz.c
@@ -25,10 +25,31 @@
 #define CPG_FRQCR	0x10
 #define CPG_FRQCR2	0x14
 
+#define PPR0		0xFCFE3200
+#define PIBC0		0xFCFE7000
+
+#define MD_CLK(x)	((x >> 2) & 1)	/* P0_2 */
+
 /* -----------------------------------------------------------------------------
  * Initialization
  */
 
+static u16 __init rz_cpg_read_mode_pins(void)
+{
+	void __iomem *ppr0, *pibc0;
+	u16 modes;
+
+	ppr0 = ioremap_nocache(PPR0, 2);
+	pibc0 = ioremap_nocache(PIBC0, 2);
+	BUG_ON(!ppr0 || !pibc0);
+	iowrite16(4, pibc0);	/* enable input buffer */
+	modes = ioread16(ppr0);
+	iounmap(ppr0);
+	iounmap(pibc0);
+
+	return modes;
+}
+
 static struct clk * __init
 rz_cpg_register_clock(struct device_node *np, struct rz_cpg *cpg, const char *name)
 {
@@ -37,8 +58,7 @@
 	static const unsigned frqcr_tab[4] = { 3, 2, 0, 1 };
 
 	if (strcmp(name, "pll") == 0) {
-		/* FIXME: cpg_mode should be read from GPIO. But no GPIO support yet */
-		unsigned cpg_mode = 0; /* hardcoded to EXTAL for now */
+		unsigned int cpg_mode = MD_CLK(rz_cpg_read_mode_pins());
 		const char *parent_name = of_clk_get_parent_name(np, cpg_mode);
 
 		mult = cpg_mode ? (32 / 4) : 30;
diff --git a/drivers/clk/renesas/r8a7795-cpg-mssr.c b/drivers/clk/renesas/r8a7795-cpg-mssr.c
index d359c92..f255e45 100644
--- a/drivers/clk/renesas/r8a7795-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7795-cpg-mssr.c
@@ -69,6 +69,7 @@
 	DEF_FIXED(".s1",        CLK_S1,            CLK_PLL1_DIV2,  3, 1),
 	DEF_FIXED(".s2",        CLK_S2,            CLK_PLL1_DIV2,  4, 1),
 	DEF_FIXED(".s3",        CLK_S3,            CLK_PLL1_DIV2,  6, 1),
+	DEF_FIXED(".sdsrc",     CLK_SDSRC,         CLK_PLL1_DIV2,  2, 1),
 
 	/* Core Clock Outputs */
 	DEF_FIXED("ztr",        R8A7795_CLK_ZTR,   CLK_PLL1_DIV2,  6, 1),
@@ -87,10 +88,10 @@
 	DEF_FIXED("s3d2",       R8A7795_CLK_S3D2,  CLK_S3,         2, 1),
 	DEF_FIXED("s3d4",       R8A7795_CLK_S3D4,  CLK_S3,         4, 1),
 
-	DEF_GEN3_SD("sd0",      R8A7795_CLK_SD0,   CLK_PLL1_DIV2, 0x0074),
-	DEF_GEN3_SD("sd1",      R8A7795_CLK_SD1,   CLK_PLL1_DIV2, 0x0078),
-	DEF_GEN3_SD("sd2",      R8A7795_CLK_SD2,   CLK_PLL1_DIV2, 0x0268),
-	DEF_GEN3_SD("sd3",      R8A7795_CLK_SD3,   CLK_PLL1_DIV2, 0x026c),
+	DEF_GEN3_SD("sd0",      R8A7795_CLK_SD0,   CLK_SDSRC,     0x0074),
+	DEF_GEN3_SD("sd1",      R8A7795_CLK_SD1,   CLK_SDSRC,     0x0078),
+	DEF_GEN3_SD("sd2",      R8A7795_CLK_SD2,   CLK_SDSRC,     0x0268),
+	DEF_GEN3_SD("sd3",      R8A7795_CLK_SD3,   CLK_SDSRC,     0x026c),
 
 	DEF_FIXED("cl",         R8A7795_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
 	DEF_FIXED("cp",         R8A7795_CLK_CP,    CLK_EXTAL,      2, 1),
@@ -122,6 +123,10 @@
 	DEF_MOD("sys-dmac2",		 217,	R8A7795_CLK_S3D1),
 	DEF_MOD("sys-dmac1",		 218,	R8A7795_CLK_S3D1),
 	DEF_MOD("sys-dmac0",		 219,	R8A7795_CLK_S3D1),
+	DEF_MOD("cmt3",			 300,	R8A7795_CLK_R),
+	DEF_MOD("cmt2",			 301,	R8A7795_CLK_R),
+	DEF_MOD("cmt1",			 302,	R8A7795_CLK_R),
+	DEF_MOD("cmt0",			 303,	R8A7795_CLK_R),
 	DEF_MOD("scif2",		 310,	R8A7795_CLK_S3D4),
 	DEF_MOD("sdif3",		 311,	R8A7795_CLK_SD3),
 	DEF_MOD("sdif2",		 312,	R8A7795_CLK_SD2),
diff --git a/drivers/clk/renesas/r8a7796-cpg-mssr.c b/drivers/clk/renesas/r8a7796-cpg-mssr.c
index c84b549..eb347ed 100644
--- a/drivers/clk/renesas/r8a7796-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7796-cpg-mssr.c
@@ -45,6 +45,7 @@
 	CLK_S3,
 	CLK_SDSRC,
 	CLK_SSPSRC,
+	CLK_RINT,
 
 	/* Module Clocks */
 	MOD_CLK_BASE
@@ -69,6 +70,7 @@
 	DEF_FIXED(".s1",        CLK_S1,            CLK_PLL1_DIV2,  3, 1),
 	DEF_FIXED(".s2",        CLK_S2,            CLK_PLL1_DIV2,  4, 1),
 	DEF_FIXED(".s3",        CLK_S3,            CLK_PLL1_DIV2,  6, 1),
+	DEF_FIXED(".sdsrc",     CLK_SDSRC,         CLK_PLL1_DIV2,  2, 1),
 
 	/* Core Clock Outputs */
 	DEF_FIXED("ztr",        R8A7796_CLK_ZTR,   CLK_PLL1_DIV2,  6, 1),
@@ -92,13 +94,42 @@
 	DEF_FIXED("s3d2",       R8A7796_CLK_S3D2,  CLK_S3,         2, 1),
 	DEF_FIXED("s3d4",       R8A7796_CLK_S3D4,  CLK_S3,         4, 1),
 
+	DEF_GEN3_SD("sd0",      R8A7796_CLK_SD0,   CLK_SDSRC,    0x0074),
+	DEF_GEN3_SD("sd1",      R8A7796_CLK_SD1,   CLK_SDSRC,    0x0078),
+	DEF_GEN3_SD("sd2",      R8A7796_CLK_SD2,   CLK_SDSRC,    0x0268),
+	DEF_GEN3_SD("sd3",      R8A7796_CLK_SD3,   CLK_SDSRC,    0x026c),
+
 	DEF_FIXED("cl",         R8A7796_CLK_CL,    CLK_PLL1_DIV2, 48, 1),
 	DEF_FIXED("cp",         R8A7796_CLK_CP,    CLK_EXTAL,      2, 1),
+
+	DEF_DIV6_RO("osc",      R8A7796_CLK_OSC,   CLK_EXTAL, CPG_RCKCR,  8),
+	DEF_DIV6_RO("r_int",    CLK_RINT,          CLK_EXTAL, CPG_RCKCR, 32),
+
+	DEF_BASE("r",           R8A7796_CLK_R,     CLK_TYPE_GEN3_R, CLK_RINT),
 };
 
 static const struct mssr_mod_clk r8a7796_mod_clks[] __initconst = {
+	DEF_MOD("cmt3",			 300,	R8A7796_CLK_R),
+	DEF_MOD("cmt2",			 301,	R8A7796_CLK_R),
+	DEF_MOD("cmt1",			 302,	R8A7796_CLK_R),
+	DEF_MOD("cmt0",			 303,	R8A7796_CLK_R),
 	DEF_MOD("scif2",		 310,	R8A7796_CLK_S3D4),
+	DEF_MOD("sdif3",		 311,	R8A7796_CLK_SD3),
+	DEF_MOD("sdif2",		 312,	R8A7796_CLK_SD2),
+	DEF_MOD("sdif1",		 313,	R8A7796_CLK_SD1),
+	DEF_MOD("sdif0",		 314,	R8A7796_CLK_SD0),
+	DEF_MOD("rwdt0",		 402,	R8A7796_CLK_R),
 	DEF_MOD("intc-ap",		 408,	R8A7796_CLK_S3D1),
+	DEF_MOD("thermal",		 522,	R8A7796_CLK_CP),
+	DEF_MOD("etheravb",		 812,	R8A7796_CLK_S0D6),
+	DEF_MOD("gpio7",		 905,	R8A7796_CLK_S3D4),
+	DEF_MOD("gpio6",		 906,	R8A7796_CLK_S3D4),
+	DEF_MOD("gpio5",		 907,	R8A7796_CLK_S3D4),
+	DEF_MOD("gpio4",		 908,	R8A7796_CLK_S3D4),
+	DEF_MOD("gpio3",		 909,	R8A7796_CLK_S3D4),
+	DEF_MOD("gpio2",		 910,	R8A7796_CLK_S3D4),
+	DEF_MOD("gpio1",		 911,	R8A7796_CLK_S3D4),
+	DEF_MOD("gpio0",		 912,	R8A7796_CLK_S3D4),
 };
 
 static const unsigned int r8a7796_crit_mod_clks[] __initconst = {
diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index f47a2fa..b5f2c8e 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile
@@ -8,6 +8,7 @@
 obj-y	+= clk-cpu.o
 obj-y	+= clk-inverter.o
 obj-y	+= clk-mmc-phase.o
+obj-y	+= clk-ddr.o
 obj-$(CONFIG_RESET_CONTROLLER)	+= softrst.o
 
 obj-y	+= clk-rk3036.o
diff --git a/drivers/clk/rockchip/clk-ddr.c b/drivers/clk/rockchip/clk-ddr.c
new file mode 100644
index 0000000..8feba93
--- /dev/null
+++ b/drivers/clk/rockchip/clk-ddr.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016 Rockchip Electronics Co. Ltd.
+ * Author: Lin Huang <hl@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <soc/rockchip/rockchip_sip.h>
+#include "clk.h"
+
+struct rockchip_ddrclk {
+	struct clk_hw	hw;
+	void __iomem	*reg_base;
+	int		mux_offset;
+	int		mux_shift;
+	int		mux_width;
+	int		div_shift;
+	int		div_width;
+	int		ddr_flag;
+	spinlock_t	*lock;
+};
+
+#define to_rockchip_ddrclk_hw(hw) container_of(hw, struct rockchip_ddrclk, hw)
+
+static int rockchip_ddrclk_sip_set_rate(struct clk_hw *hw, unsigned long drate,
+					unsigned long prate)
+{
+	struct rockchip_ddrclk *ddrclk = to_rockchip_ddrclk_hw(hw);
+	unsigned long flags;
+	struct arm_smccc_res res;
+
+	spin_lock_irqsave(ddrclk->lock, flags);
+	arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, drate, 0,
+		      ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE,
+		      0, 0, 0, 0, &res);
+	spin_unlock_irqrestore(ddrclk->lock, flags);
+
+	return res.a0;
+}
+
+static unsigned long
+rockchip_ddrclk_sip_recalc_rate(struct clk_hw *hw,
+				unsigned long parent_rate)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
+		      ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE,
+		      0, 0, 0, 0, &res);
+
+	return res.a0;
+}
+
+static long rockchip_ddrclk_sip_round_rate(struct clk_hw *hw,
+					   unsigned long rate,
+					   unsigned long *prate)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, rate, 0,
+		      ROCKCHIP_SIP_CONFIG_DRAM_ROUND_RATE,
+		      0, 0, 0, 0, &res);
+
+	return res.a0;
+}
+
+static u8 rockchip_ddrclk_get_parent(struct clk_hw *hw)
+{
+	struct rockchip_ddrclk *ddrclk = to_rockchip_ddrclk_hw(hw);
+	int num_parents = clk_hw_get_num_parents(hw);
+	u32 val;
+
+	val = clk_readl(ddrclk->reg_base +
+			ddrclk->mux_offset) >> ddrclk->mux_shift;
+	val &= GENMASK(ddrclk->mux_width - 1, 0);
+
+	if (val >= num_parents)
+		return -EINVAL;
+
+	return val;
+}
+
+static const struct clk_ops rockchip_ddrclk_sip_ops = {
+	.recalc_rate = rockchip_ddrclk_sip_recalc_rate,
+	.set_rate = rockchip_ddrclk_sip_set_rate,
+	.round_rate = rockchip_ddrclk_sip_round_rate,
+	.get_parent = rockchip_ddrclk_get_parent,
+};
+
+struct clk *rockchip_clk_register_ddrclk(const char *name, int flags,
+					 const char *const *parent_names,
+					 u8 num_parents, int mux_offset,
+					 int mux_shift, int mux_width,
+					 int div_shift, int div_width,
+					 int ddr_flag, void __iomem *reg_base,
+					 spinlock_t *lock)
+{
+	struct rockchip_ddrclk *ddrclk;
+	struct clk_init_data init;
+	struct clk *clk;
+
+	ddrclk = kzalloc(sizeof(*ddrclk), GFP_KERNEL);
+	if (!ddrclk)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+
+	init.flags = flags;
+	init.flags |= CLK_SET_RATE_NO_REPARENT;
+
+	switch (ddr_flag) {
+	case ROCKCHIP_DDRCLK_SIP:
+		init.ops = &rockchip_ddrclk_sip_ops;
+		break;
+	default:
+		pr_err("%s: unsupported ddrclk type %d\n", __func__, ddr_flag);
+		kfree(ddrclk);
+		return ERR_PTR(-EINVAL);
+	}
+
+	ddrclk->reg_base = reg_base;
+	ddrclk->lock = lock;
+	ddrclk->hw.init = &init;
+	ddrclk->mux_offset = mux_offset;
+	ddrclk->mux_shift = mux_shift;
+	ddrclk->mux_width = mux_width;
+	ddrclk->div_shift = div_shift;
+	ddrclk->div_width = div_width;
+	ddrclk->ddr_flag = ddr_flag;
+
+	clk = clk_register(NULL, &ddrclk->hw);
+	if (IS_ERR(clk)) {
+		pr_err("%s: could not register ddrclk %s\n", __func__,	name);
+		kfree(ddrclk);
+		return NULL;
+	}
+
+	return clk;
+}
diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c
index db81e45..9c1373e 100644
--- a/drivers/clk/rockchip/clk-pll.c
+++ b/drivers/clk/rockchip/clk-pll.c
@@ -837,7 +837,7 @@
 		u8 num_parents, int con_offset, int grf_lock_offset,
 		int lock_shift, int mode_offset, int mode_shift,
 		struct rockchip_pll_rate_table *rate_table,
-		u8 clk_pll_flags)
+		unsigned long flags, u8 clk_pll_flags)
 {
 	const char *pll_parents[3];
 	struct clk_init_data init;
@@ -892,7 +892,7 @@
 	init.name = pll_name;
 
 	/* keep all plls untouched for now */
-	init.flags = CLK_IGNORE_UNUSED;
+	init.flags = flags | CLK_IGNORE_UNUSED;
 
 	init.parent_names = &parent_names[0];
 	init.num_parents = 1;
diff --git a/drivers/clk/rockchip/clk-rk3399.c b/drivers/clk/rockchip/clk-rk3399.c
index c109d80..8387c7a 100644
--- a/drivers/clk/rockchip/clk-rk3399.c
+++ b/drivers/clk/rockchip/clk-rk3399.c
@@ -100,8 +100,10 @@
 	RK3036_PLL_RATE( 297000000, 1, 99, 4, 2, 1, 0),
 	RK3036_PLL_RATE( 216000000, 1, 72, 4, 2, 1, 0),
 	RK3036_PLL_RATE( 148500000, 1, 99, 4, 4, 1, 0),
+	RK3036_PLL_RATE( 106500000, 1, 71, 4, 4, 1, 0),
 	RK3036_PLL_RATE(  96000000, 1, 64, 4, 4, 1, 0),
 	RK3036_PLL_RATE(  74250000, 2, 99, 4, 4, 1, 0),
+	RK3036_PLL_RATE(  65000000, 1, 65, 6, 4, 1, 0),
 	RK3036_PLL_RATE(  54000000, 1, 54, 6, 4, 1, 0),
 	RK3036_PLL_RATE(  27000000, 1, 27, 6, 4, 1, 0),
 	{ /* sentinel */ },
@@ -118,6 +120,10 @@
 						    "clk_core_b_bpll_src",
 						    "clk_core_b_dpll_src",
 						    "clk_core_b_gpll_src" };
+PNAME(mux_ddrclk_p)				= { "clk_ddrc_lpll_src",
+						    "clk_ddrc_bpll_src",
+						    "clk_ddrc_dpll_src",
+						    "clk_ddrc_gpll_src" };
 PNAME(mux_aclk_cci_p)				= { "cpll_aclk_cci_src",
 						    "gpll_aclk_cci_src",
 						    "npll_aclk_cci_src",
@@ -373,6 +379,7 @@
 	RK3399_CPUCLKB_RATE(2184000000, 1, 11, 11),
 	RK3399_CPUCLKB_RATE(2088000000, 1, 10, 10),
 	RK3399_CPUCLKB_RATE(2040000000, 1, 10, 10),
+	RK3399_CPUCLKB_RATE(2016000000, 1, 9, 9),
 	RK3399_CPUCLKB_RATE(1992000000, 1, 9, 9),
 	RK3399_CPUCLKB_RATE(1896000000, 1, 9, 9),
 	RK3399_CPUCLKB_RATE(1800000000, 1, 8, 8),
@@ -578,7 +585,7 @@
 	COMPOSITE(0, "clk_spdif_div", mux_pll_src_cpll_gpll_p, 0,
 			RK3399_CLKSEL_CON(32), 7, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3399_CLKGATE_CON(8), 13, GFLAGS),
-	COMPOSITE_FRACMUX(0, "clk_spdif_frac", "clk_spdif_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_spdif_frac", "clk_spdif_div", 0,
 			RK3399_CLKSEL_CON(99), 0,
 			RK3399_CLKGATE_CON(8), 14, GFLAGS,
 			&rk3399_spdif_fracmux),
@@ -592,7 +599,7 @@
 	COMPOSITE(0, "clk_i2s0_div", mux_pll_src_cpll_gpll_p, 0,
 			RK3399_CLKSEL_CON(28), 7, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3399_CLKGATE_CON(8), 3, GFLAGS),
-	COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_i2s0_frac", "clk_i2s0_div", 0,
 			RK3399_CLKSEL_CON(96), 0,
 			RK3399_CLKGATE_CON(8), 4, GFLAGS,
 			&rk3399_i2s0_fracmux),
@@ -602,7 +609,7 @@
 	COMPOSITE(0, "clk_i2s1_div", mux_pll_src_cpll_gpll_p, 0,
 			RK3399_CLKSEL_CON(29), 7, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3399_CLKGATE_CON(8), 6, GFLAGS),
-	COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_i2s1_frac", "clk_i2s1_div", 0,
 			RK3399_CLKSEL_CON(97), 0,
 			RK3399_CLKGATE_CON(8), 7, GFLAGS,
 			&rk3399_i2s1_fracmux),
@@ -612,7 +619,7 @@
 	COMPOSITE(0, "clk_i2s2_div", mux_pll_src_cpll_gpll_p, 0,
 			RK3399_CLKSEL_CON(30), 7, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3399_CLKGATE_CON(8), 9, GFLAGS),
-	COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_i2s2_frac", "clk_i2s2_div", 0,
 			RK3399_CLKSEL_CON(98), 0,
 			RK3399_CLKGATE_CON(8), 10, GFLAGS,
 			&rk3399_i2s2_fracmux),
@@ -631,7 +638,7 @@
 	COMPOSITE_NOMUX(0, "clk_uart0_div", "clk_uart0_src", 0,
 			RK3399_CLKSEL_CON(33), 0, 7, DFLAGS,
 			RK3399_CLKGATE_CON(9), 0, GFLAGS),
-	COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_uart0_frac", "clk_uart0_div", 0,
 			RK3399_CLKSEL_CON(100), 0,
 			RK3399_CLKGATE_CON(9), 1, GFLAGS,
 			&rk3399_uart0_fracmux),
@@ -641,7 +648,7 @@
 	COMPOSITE_NOMUX(0, "clk_uart1_div", "clk_uart_src", 0,
 			RK3399_CLKSEL_CON(34), 0, 7, DFLAGS,
 			RK3399_CLKGATE_CON(9), 2, GFLAGS),
-	COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_uart1_frac", "clk_uart1_div", 0,
 			RK3399_CLKSEL_CON(101), 0,
 			RK3399_CLKGATE_CON(9), 3, GFLAGS,
 			&rk3399_uart1_fracmux),
@@ -649,7 +656,7 @@
 	COMPOSITE_NOMUX(0, "clk_uart2_div", "clk_uart_src", 0,
 			RK3399_CLKSEL_CON(35), 0, 7, DFLAGS,
 			RK3399_CLKGATE_CON(9), 4, GFLAGS),
-	COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_uart2_frac", "clk_uart2_div", 0,
 			RK3399_CLKSEL_CON(102), 0,
 			RK3399_CLKGATE_CON(9), 5, GFLAGS,
 			&rk3399_uart2_fracmux),
@@ -657,7 +664,7 @@
 	COMPOSITE_NOMUX(0, "clk_uart3_div", "clk_uart_src", 0,
 			RK3399_CLKSEL_CON(36), 0, 7, DFLAGS,
 			RK3399_CLKGATE_CON(9), 6, GFLAGS),
-	COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_uart3_frac", "clk_uart3_div", 0,
 			RK3399_CLKSEL_CON(103), 0,
 			RK3399_CLKGATE_CON(9), 7, GFLAGS,
 			&rk3399_uart3_fracmux),
@@ -833,9 +840,9 @@
 
 	/* perihp */
 	GATE(0, "cpll_aclk_perihp_src", "cpll", CLK_IGNORE_UNUSED,
-			RK3399_CLKGATE_CON(5), 0, GFLAGS),
-	GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(5), 1, GFLAGS),
+	GATE(0, "gpll_aclk_perihp_src", "gpll", CLK_IGNORE_UNUSED,
+			RK3399_CLKGATE_CON(5), 0, GFLAGS),
 	COMPOSITE(ACLK_PERIHP, "aclk_perihp", mux_aclk_perihp_p, CLK_IGNORE_UNUSED,
 			RK3399_CLKSEL_CON(14), 7, 1, MFLAGS, 0, 5, DFLAGS,
 			RK3399_CLKGATE_CON(5), 2, GFLAGS),
@@ -846,9 +853,9 @@
 			RK3399_CLKSEL_CON(14), 12, 2, DFLAGS,
 			RK3399_CLKGATE_CON(5), 4, GFLAGS),
 
-	GATE(ACLK_PERF_PCIE, "aclk_perf_pcie", "aclk_perihp", CLK_IGNORE_UNUSED,
+	GATE(ACLK_PERF_PCIE, "aclk_perf_pcie", "aclk_perihp", 0,
 			RK3399_CLKGATE_CON(20), 2, GFLAGS),
-	GATE(ACLK_PCIE, "aclk_pcie", "aclk_perihp", CLK_IGNORE_UNUSED,
+	GATE(ACLK_PCIE, "aclk_pcie", "aclk_perihp", 0,
 			RK3399_CLKGATE_CON(20), 10, GFLAGS),
 	GATE(0, "aclk_perihp_noc", "aclk_perihp", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(20), 12, GFLAGS),
@@ -923,9 +930,9 @@
 			RK3399_CLKGATE_CON(6), 14, GFLAGS),
 
 	GATE(0, "cpll_aclk_emmc_src", "cpll", CLK_IGNORE_UNUSED,
-			RK3399_CLKGATE_CON(6), 12, GFLAGS),
-	GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED,
 			RK3399_CLKGATE_CON(6), 13, GFLAGS),
+	GATE(0, "gpll_aclk_emmc_src", "gpll", CLK_IGNORE_UNUSED,
+			RK3399_CLKGATE_CON(6), 12, GFLAGS),
 	COMPOSITE_NOGATE(ACLK_EMMC, "aclk_emmc", mux_aclk_emmc_p, CLK_IGNORE_UNUSED,
 			RK3399_CLKSEL_CON(21), 7, 1, MFLAGS, 0, 5, DFLAGS),
 	GATE(ACLK_EMMC_CORE, "aclk_emmccore", "aclk_emmc", CLK_IGNORE_UNUSED,
@@ -1071,7 +1078,7 @@
 	/* vio */
 	COMPOSITE(ACLK_VIO, "aclk_vio", mux_pll_src_cpll_gpll_ppll_p, CLK_IGNORE_UNUSED,
 			RK3399_CLKSEL_CON(42), 6, 2, MFLAGS, 0, 5, DFLAGS,
-			RK3399_CLKGATE_CON(11), 10, GFLAGS),
+			RK3399_CLKGATE_CON(11), 0, GFLAGS),
 	COMPOSITE_NOMUX(PCLK_VIO, "pclk_vio", "aclk_vio", 0,
 			RK3399_CLKSEL_CON(43), 0, 5, DFLAGS,
 			RK3399_CLKGATE_CON(11), 1, GFLAGS),
@@ -1161,7 +1168,7 @@
 			RK3399_CLKSEL_CON(49), 8, 2, MFLAGS, 0, 8, DFLAGS,
 			RK3399_CLKGATE_CON(10), 12, GFLAGS),
 
-	COMPOSITE_FRACMUX_NOGATE(0, "dclk_vop0_frac", "dclk_vop0_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX_NOGATE(DCLK_VOP0_FRAC, "dclk_vop0_frac", "dclk_vop0_div", 0,
 			RK3399_CLKSEL_CON(106), 0,
 			&rk3399_dclk_vop0_fracmux),
 
@@ -1191,7 +1198,7 @@
 			RK3399_CLKSEL_CON(50), 8, 2, MFLAGS, 0, 8, DFLAGS,
 			RK3399_CLKGATE_CON(10), 13, GFLAGS),
 
-	COMPOSITE_FRACMUX_NOGATE(0, "dclk_vop1_frac", "dclk_vop1_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX_NOGATE(DCLK_VOP1_FRAC, "dclk_vop1_frac", "dclk_vop1_div", 0,
 			RK3399_CLKSEL_CON(107), 0,
 			&rk3399_dclk_vop1_fracmux),
 
@@ -1305,7 +1312,7 @@
 	/* testout */
 	MUX(0, "clk_test_pre", mux_pll_src_cpll_gpll_p, CLK_SET_RATE_PARENT,
 			RK3399_CLKSEL_CON(58), 7, 1, MFLAGS),
-	COMPOSITE_FRAC(0, "clk_test_frac", "clk_test_pre", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRAC(0, "clk_test_frac", "clk_test_pre", 0,
 			RK3399_CLKSEL_CON(105), 0,
 			RK3399_CLKGATE_CON(13), 9, GFLAGS),
 
@@ -1377,6 +1384,18 @@
 	COMPOSITE_NOMUX(0, "clk_test", "clk_test_pre", CLK_IGNORE_UNUSED,
 			RK3368_CLKSEL_CON(58), 0, 5, DFLAGS,
 			RK3368_CLKGATE_CON(13), 11, GFLAGS),
+
+	/* ddrc */
+	GATE(0, "clk_ddrc_lpll_src", "lpll", 0, RK3399_CLKGATE_CON(3),
+	     0, GFLAGS),
+	GATE(0, "clk_ddrc_bpll_src", "bpll", 0, RK3399_CLKGATE_CON(3),
+	     1, GFLAGS),
+	GATE(0, "clk_ddrc_dpll_src", "dpll", 0, RK3399_CLKGATE_CON(3),
+	     2, GFLAGS),
+	GATE(0, "clk_ddrc_gpll_src", "gpll", 0, RK3399_CLKGATE_CON(3),
+	     3, GFLAGS),
+	COMPOSITE_DDRCLK(SCLK_DDRC, "sclk_ddrc", mux_ddrclk_p, 0,
+		       RK3399_CLKSEL_CON(6), 4, 2, 0, 0, ROCKCHIP_DDRCLK_SIP),
 };
 
 static struct rockchip_clk_branch rk3399_clk_pmu_branches[] __initdata = {
@@ -1398,7 +1417,7 @@
 			RK3399_PMU_CLKSEL_CON(1), 13, 1, MFLAGS, 8, 5, DFLAGS,
 			RK3399_PMU_CLKGATE_CON(0), 8, GFLAGS),
 
-	COMPOSITE_FRACMUX_NOGATE(0, "clk_wifi_frac", "clk_wifi_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX_NOGATE(0, "clk_wifi_frac", "clk_wifi_div", 0,
 			RK3399_PMU_CLKSEL_CON(7), 0,
 			&rk3399_pmuclk_wifi_fracmux),
 
@@ -1426,7 +1445,7 @@
 			RK3399_PMU_CLKSEL_CON(5), 10, 1, MFLAGS, 0, 7, DFLAGS,
 			RK3399_PMU_CLKGATE_CON(0), 5, GFLAGS),
 
-	COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_div", CLK_SET_RATE_PARENT,
+	COMPOSITE_FRACMUX(0, "clk_uart4_frac", "clk_uart4_div", 0,
 			RK3399_PMU_CLKSEL_CON(6), 0,
 			RK3399_PMU_CLKGATE_CON(0), 6, GFLAGS,
 			&rk3399_uart4_pmu_fracmux),
@@ -1468,6 +1487,9 @@
 	"aclk_cci_pre",
 	"aclk_gic",
 	"aclk_gic_noc",
+	"aclk_hdcp_noc",
+	"hclk_hdcp_noc",
+	"pclk_hdcp_noc",
 	"pclk_perilp0",
 	"pclk_perilp0",
 	"hclk_perilp0",
@@ -1484,9 +1506,14 @@
 	"hclk_perilp1",
 	"hclk_perilp1_noc",
 	"aclk_dmac0_perilp",
+	"aclk_emmc_noc",
 	"gpll_hclk_perilp1_src",
 	"gpll_aclk_perilp0_src",
 	"gpll_aclk_perihp_src",
+	"aclk_vio_noc",
+
+	/* ddrc */
+	"sclk_ddrc"
 };
 
 static const char *const rk3399_pmucru_critical_clocks[] __initconst = {
diff --git a/drivers/clk/rockchip/clk-rockchip.c b/drivers/clk/rockchip/clk-rockchip.c
index 4cf838d5..2c9bb81 100644
--- a/drivers/clk/rockchip/clk-rockchip.c
+++ b/drivers/clk/rockchip/clk-rockchip.c
@@ -49,14 +49,19 @@
 	}
 
 	reg = of_iomap(node, 0);
+	if (!reg)
+		return;
 
 	clk_data = kzalloc(sizeof(struct clk_onecell_data), GFP_KERNEL);
-	if (!clk_data)
+	if (!clk_data) {
+		iounmap(reg);
 		return;
+	}
 
 	clk_data->clks = kzalloc(qty * sizeof(struct clk *), GFP_KERNEL);
 	if (!clk_data->clks) {
 		kfree(clk_data);
+		iounmap(reg);
 		return;
 	}
 
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 7ffd134..b886be3 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -385,7 +385,7 @@
 				list->con_offset, grf_lock_offset,
 				list->lock_shift, list->mode_offset,
 				list->mode_shift, list->rate_table,
-				list->pll_flags);
+				list->flags, list->pll_flags);
 		if (IS_ERR(clk)) {
 			pr_err("%s: failed to register clock %s\n", __func__,
 				list->name);
@@ -484,6 +484,15 @@
 				list->gate_offset, list->gate_shift,
 				list->gate_flags, flags, &ctx->lock);
 			break;
+		case branch_ddrclk:
+			clk = rockchip_clk_register_ddrclk(
+				list->name, list->flags,
+				list->parent_names, list->num_parents,
+				list->muxdiv_offset, list->mux_shift,
+				list->mux_width, list->div_shift,
+				list->div_width, list->div_flags,
+				ctx->reg_base, &ctx->lock);
+			break;
 		}
 
 		/* none of the cases above matched */
diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index 2194ffa..1653edd 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h
@@ -238,7 +238,7 @@
 		u8 num_parents, int con_offset, int grf_lock_offset,
 		int lock_shift, int mode_offset, int mode_shift,
 		struct rockchip_pll_rate_table *rate_table,
-		u8 clk_pll_flags);
+		unsigned long flags, u8 clk_pll_flags);
 
 struct rockchip_cpuclk_clksel {
 	int reg;
@@ -281,6 +281,20 @@
 				const char *const *parent_names, u8 num_parents,
 				void __iomem *reg, int shift);
 
+/*
+ * DDRCLK flags, including method of setting the rate
+ * ROCKCHIP_DDRCLK_SIP: use SIP call to bl31 to change ddrclk rate.
+ */
+#define ROCKCHIP_DDRCLK_SIP		BIT(0)
+
+struct clk *rockchip_clk_register_ddrclk(const char *name, int flags,
+					 const char *const *parent_names,
+					 u8 num_parents, int mux_offset,
+					 int mux_shift, int mux_width,
+					 int div_shift, int div_width,
+					 int ddr_flags, void __iomem *reg_base,
+					 spinlock_t *lock);
+
 #define ROCKCHIP_INVERTER_HIWORD_MASK	BIT(0)
 
 struct clk *rockchip_clk_register_inverter(const char *name,
@@ -299,6 +313,7 @@
 	branch_mmc,
 	branch_inverter,
 	branch_factor,
+	branch_ddrclk,
 };
 
 struct rockchip_clk_branch {
@@ -488,6 +503,24 @@
 		.child		= ch,				\
 	}
 
+#define COMPOSITE_DDRCLK(_id, cname, pnames, f, mo, ms, mw,	\
+			 ds, dw, df)				\
+	{							\
+		.id		= _id,				\
+		.branch_type	= branch_ddrclk,		\
+		.name		= cname,			\
+		.parent_names	= pnames,			\
+		.num_parents	= ARRAY_SIZE(pnames),		\
+		.flags		= f,				\
+		.muxdiv_offset  = mo,                           \
+		.mux_shift      = ms,                           \
+		.mux_width      = mw,                           \
+		.div_shift      = ds,                           \
+		.div_width      = dw,                           \
+		.div_flags	= df,				\
+		.gate_offset    = -1,                           \
+	}
+
 #define MUX(_id, cname, pnames, f, o, s, w, mf)			\
 	{							\
 		.id		= _id,				\
diff --git a/drivers/clk/samsung/clk-exynos-audss.c b/drivers/clk/samsung/clk-exynos-audss.c
index bdf8b97..51d152f 100644
--- a/drivers/clk/samsung/clk-exynos-audss.c
+++ b/drivers/clk/samsung/clk-exynos-audss.c
@@ -14,18 +14,13 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
 #include <linux/syscore_ops.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 
 #include <dt-bindings/clock/exynos-audss-clk.h>
 
-enum exynos_audss_clk_type {
-	TYPE_EXYNOS4210,
-	TYPE_EXYNOS5250,
-	TYPE_EXYNOS5420,
-};
-
 static DEFINE_SPINLOCK(lock);
 static struct clk **clk_table;
 static void __iomem *reg_base;
@@ -44,9 +39,9 @@
 
 #ifdef CONFIG_PM_SLEEP
 static unsigned long reg_save[][2] = {
-	{ASS_CLK_SRC,  0},
-	{ASS_CLK_DIV,  0},
-	{ASS_CLK_GATE, 0},
+	{ ASS_CLK_SRC,  0 },
+	{ ASS_CLK_DIV,  0 },
+	{ ASS_CLK_GATE, 0 },
 };
 
 static int exynos_audss_clk_suspend(void)
@@ -73,14 +68,43 @@
 };
 #endif /* CONFIG_PM_SLEEP */
 
+struct exynos_audss_clk_drvdata {
+	unsigned int has_adma_clk:1;
+	unsigned int has_mst_clk:1;
+	unsigned int enable_epll:1;
+	unsigned int num_clks;
+};
+
+static const struct exynos_audss_clk_drvdata exynos4210_drvdata = {
+	.num_clks	= EXYNOS_AUDSS_MAX_CLKS - 1,
+};
+
+static const struct exynos_audss_clk_drvdata exynos5410_drvdata = {
+	.num_clks	= EXYNOS_AUDSS_MAX_CLKS - 1,
+	.has_mst_clk	= 1,
+};
+
+static const struct exynos_audss_clk_drvdata exynos5420_drvdata = {
+	.num_clks	= EXYNOS_AUDSS_MAX_CLKS,
+	.has_adma_clk	= 1,
+	.enable_epll	= 1,
+};
+
 static const struct of_device_id exynos_audss_clk_of_match[] = {
-	{ .compatible = "samsung,exynos4210-audss-clock",
-	  .data = (void *)TYPE_EXYNOS4210, },
-	{ .compatible = "samsung,exynos5250-audss-clock",
-	  .data = (void *)TYPE_EXYNOS5250, },
-	{ .compatible = "samsung,exynos5420-audss-clock",
-	  .data = (void *)TYPE_EXYNOS5420, },
-	{},
+	{
+		.compatible	= "samsung,exynos4210-audss-clock",
+		.data		= &exynos4210_drvdata,
+	}, {
+		.compatible	= "samsung,exynos5250-audss-clock",
+		.data		= &exynos4210_drvdata,
+	}, {
+		.compatible	= "samsung,exynos5410-audss-clock",
+		.data		= &exynos5410_drvdata,
+	}, {
+		.compatible	= "samsung,exynos5420-audss-clock",
+		.data		= &exynos5420_drvdata,
+	},
+	{ },
 };
 
 static void exynos_audss_clk_teardown(void)
@@ -106,19 +130,17 @@
 /* register exynos_audss clocks */
 static int exynos_audss_clk_probe(struct platform_device *pdev)
 {
-	int i, ret = 0;
-	struct resource *res;
 	const char *mout_audss_p[] = {"fin_pll", "fout_epll"};
 	const char *mout_i2s_p[] = {"mout_audss", "cdclk0", "sclk_audio0"};
 	const char *sclk_pcm_p = "sclk_pcm0";
 	struct clk *pll_ref, *pll_in, *cdclk, *sclk_audio, *sclk_pcm_in;
-	const struct of_device_id *match;
-	enum exynos_audss_clk_type variant;
+	const struct exynos_audss_clk_drvdata *variant;
+	struct resource *res;
+	int i, ret = 0;
 
-	match = of_match_node(exynos_audss_clk_of_match, pdev->dev.of_node);
-	if (!match)
+	variant = of_device_get_match_data(&pdev->dev);
+	if (!variant)
 		return -EINVAL;
-	variant = (enum exynos_audss_clk_type)match->data;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	reg_base = devm_ioremap_resource(&pdev->dev, res);
@@ -126,7 +148,7 @@
 		dev_err(&pdev->dev, "failed to map audss registers\n");
 		return PTR_ERR(reg_base);
 	}
-	/* EPLL don't have to be enabled for boards other than Exynos5420 */
+
 	epll = ERR_PTR(-ENODEV);
 
 	clk_table = devm_kzalloc(&pdev->dev,
@@ -136,10 +158,7 @@
 		return -ENOMEM;
 
 	clk_data.clks = clk_table;
-	if (variant == TYPE_EXYNOS5420)
-		clk_data.clk_num = EXYNOS_AUDSS_MAX_CLKS;
-	else
-		clk_data.clk_num = EXYNOS_AUDSS_MAX_CLKS - 1;
+	clk_data.clk_num = variant->num_clks;
 
 	pll_ref = devm_clk_get(&pdev->dev, "pll_ref");
 	pll_in = devm_clk_get(&pdev->dev, "pll_in");
@@ -148,13 +167,13 @@
 	if (!IS_ERR(pll_in)) {
 		mout_audss_p[1] = __clk_get_name(pll_in);
 
-		if (variant == TYPE_EXYNOS5420) {
+		if (variant->enable_epll) {
 			epll = pll_in;
 
 			ret = clk_prepare_enable(epll);
 			if (ret) {
 				dev_err(&pdev->dev,
-						"failed to prepare the epll clock\n");
+					"failed to prepare the epll clock\n");
 				return ret;
 			}
 		}
@@ -210,7 +229,7 @@
 				sclk_pcm_p, CLK_SET_RATE_PARENT,
 				reg_base + ASS_CLK_GATE, 5, 0, &lock);
 
-	if (variant == TYPE_EXYNOS5420) {
+	if (variant->has_adma_clk) {
 		clk_table[EXYNOS_ADMA] = clk_register_gate(NULL, "adma",
 				"dout_srp", CLK_SET_RATE_PARENT,
 				reg_base + ASS_CLK_GATE, 9, 0, &lock);
@@ -234,9 +253,6 @@
 #ifdef CONFIG_PM_SLEEP
 	register_syscore_ops(&exynos_audss_clk_syscore_ops);
 #endif
-
-	dev_info(&pdev->dev, "setup completed\n");
-
 	return 0;
 
 unregister:
diff --git a/drivers/clk/samsung/clk-exynos5260.c b/drivers/clk/samsung/clk-exynos5260.c
index a43642c..fd1d9bf 100644
--- a/drivers/clk/samsung/clk-exynos5260.c
+++ b/drivers/clk/samsung/clk-exynos5260.c
@@ -131,21 +131,21 @@
 			EN_IP_AUD, 4, 0, 0),
 };
 
+static const struct samsung_cmu_info aud_cmu __initconst = {
+	.mux_clks	= aud_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(aud_mux_clks),
+	.div_clks	= aud_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(aud_div_clks),
+	.gate_clks	= aud_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(aud_gate_clks),
+	.nr_clk_ids	= AUD_NR_CLK,
+	.clk_regs	= aud_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(aud_clk_regs),
+};
+
 static void __init exynos5260_clk_aud_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.mux_clks = aud_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(aud_mux_clks);
-	cmu.div_clks = aud_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(aud_div_clks);
-	cmu.gate_clks = aud_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(aud_gate_clks);
-	cmu.nr_clk_ids = AUD_NR_CLK;
-	cmu.clk_regs = aud_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(aud_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &aud_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_aud, "samsung,exynos5260-clock-aud",
@@ -321,21 +321,21 @@
 			EN_IP_DISP, 25, 0, 0),
 };
 
+static const struct samsung_cmu_info disp_cmu __initconst = {
+	.mux_clks	= disp_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(disp_mux_clks),
+	.div_clks	= disp_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(disp_div_clks),
+	.gate_clks	= disp_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(disp_gate_clks),
+	.nr_clk_ids	= DISP_NR_CLK,
+	.clk_regs	= disp_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(disp_clk_regs),
+};
+
 static void __init exynos5260_clk_disp_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.mux_clks = disp_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(disp_mux_clks);
-	cmu.div_clks = disp_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(disp_div_clks);
-	cmu.gate_clks = disp_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(disp_gate_clks);
-	cmu.nr_clk_ids = DISP_NR_CLK;
-	cmu.clk_regs = disp_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(disp_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &disp_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_disp, "samsung,exynos5260-clock-disp",
@@ -385,21 +385,21 @@
 		pll2550_24mhz_tbl),
 };
 
+static const struct samsung_cmu_info egl_cmu __initconst = {
+	.pll_clks	= egl_pll_clks,
+	.nr_pll_clks	= ARRAY_SIZE(egl_pll_clks),
+	.mux_clks	= egl_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(egl_mux_clks),
+	.div_clks	= egl_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(egl_div_clks),
+	.nr_clk_ids	= EGL_NR_CLK,
+	.clk_regs	= egl_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(egl_clk_regs),
+};
+
 static void __init exynos5260_clk_egl_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.pll_clks = egl_pll_clks;
-	cmu.nr_pll_clks =  ARRAY_SIZE(egl_pll_clks);
-	cmu.mux_clks = egl_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(egl_mux_clks);
-	cmu.div_clks = egl_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(egl_div_clks);
-	cmu.nr_clk_ids = EGL_NR_CLK;
-	cmu.clk_regs = egl_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(egl_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &egl_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_egl, "samsung,exynos5260-clock-egl",
@@ -487,19 +487,19 @@
 			EN_IP_FSYS_SECURE_SMMU_RTIC, 12, 0, 0),
 };
 
+static const struct samsung_cmu_info fsys_cmu __initconst = {
+	.mux_clks	= fsys_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(fsys_mux_clks),
+	.gate_clks	= fsys_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(fsys_gate_clks),
+	.nr_clk_ids	= FSYS_NR_CLK,
+	.clk_regs	= fsys_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(fsys_clk_regs),
+};
+
 static void __init exynos5260_clk_fsys_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.mux_clks = fsys_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(fsys_mux_clks);
-	cmu.gate_clks = fsys_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(fsys_gate_clks);
-	cmu.nr_clk_ids = FSYS_NR_CLK;
-	cmu.clk_regs = fsys_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(fsys_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &fsys_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_fsys, "samsung,exynos5260-clock-fsys",
@@ -576,21 +576,21 @@
 			EN_IP_G2D_SECURE_SMMU_G2D, 15, 0, 0),
 };
 
+static const struct samsung_cmu_info g2d_cmu __initconst = {
+	.mux_clks	= g2d_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(g2d_mux_clks),
+	.div_clks	= g2d_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(g2d_div_clks),
+	.gate_clks	= g2d_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(g2d_gate_clks),
+	.nr_clk_ids	= G2D_NR_CLK,
+	.clk_regs	= g2d_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(g2d_clk_regs),
+};
+
 static void __init exynos5260_clk_g2d_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.mux_clks = g2d_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(g2d_mux_clks);
-	cmu.div_clks = g2d_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(g2d_div_clks);
-	cmu.gate_clks = g2d_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(g2d_gate_clks);
-	cmu.nr_clk_ids = G2D_NR_CLK;
-	cmu.clk_regs = g2d_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(g2d_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &g2d_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_g2d, "samsung,exynos5260-clock-g2d",
@@ -637,23 +637,23 @@
 		pll2550_24mhz_tbl),
 };
 
+static const struct samsung_cmu_info g3d_cmu __initconst = {
+	.pll_clks	= g3d_pll_clks,
+	.nr_pll_clks	= ARRAY_SIZE(g3d_pll_clks),
+	.mux_clks	= g3d_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(g3d_mux_clks),
+	.div_clks	= g3d_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(g3d_div_clks),
+	.gate_clks	= g3d_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(g3d_gate_clks),
+	.nr_clk_ids	= G3D_NR_CLK,
+	.clk_regs	= g3d_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(g3d_clk_regs),
+};
+
 static void __init exynos5260_clk_g3d_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.pll_clks = g3d_pll_clks;
-	cmu.nr_pll_clks =  ARRAY_SIZE(g3d_pll_clks);
-	cmu.mux_clks = g3d_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(g3d_mux_clks);
-	cmu.div_clks = g3d_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(g3d_div_clks);
-	cmu.gate_clks = g3d_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(g3d_gate_clks);
-	cmu.nr_clk_ids = G3D_NR_CLK;
-	cmu.clk_regs = g3d_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(g3d_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &g3d_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_g3d, "samsung,exynos5260-clock-g3d",
@@ -772,21 +772,21 @@
 			EN_IP_GSCL_SECURE_SMMU_MSCL1, 20, 0, 0),
 };
 
+static const struct samsung_cmu_info gscl_cmu __initconst = {
+	.mux_clks	= gscl_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(gscl_mux_clks),
+	.div_clks	= gscl_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(gscl_div_clks),
+	.gate_clks	= gscl_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(gscl_gate_clks),
+	.nr_clk_ids	= GSCL_NR_CLK,
+	.clk_regs	= gscl_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(gscl_clk_regs),
+};
+
 static void __init exynos5260_clk_gscl_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.mux_clks = gscl_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(gscl_mux_clks);
-	cmu.div_clks = gscl_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(gscl_div_clks);
-	cmu.gate_clks = gscl_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(gscl_gate_clks);
-	cmu.nr_clk_ids = GSCL_NR_CLK;
-	cmu.clk_regs = gscl_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(gscl_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &gscl_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_gscl, "samsung,exynos5260-clock-gscl",
@@ -891,21 +891,21 @@
 			EN_SCLK_ISP, 9, CLK_SET_RATE_PARENT, 0),
 };
 
+static const struct samsung_cmu_info isp_cmu __initconst = {
+	.mux_clks	= isp_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(isp_mux_clks),
+	.div_clks	= isp_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(isp_div_clks),
+	.gate_clks	= isp_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(isp_gate_clks),
+	.nr_clk_ids	= ISP_NR_CLK,
+	.clk_regs	= isp_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(isp_clk_regs),
+};
+
 static void __init exynos5260_clk_isp_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.mux_clks = isp_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(isp_mux_clks);
-	cmu.div_clks = isp_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(isp_div_clks);
-	cmu.gate_clks = isp_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(isp_gate_clks);
-	cmu.nr_clk_ids = ISP_NR_CLK;
-	cmu.clk_regs = isp_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(isp_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &isp_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_isp, "samsung,exynos5260-clock-isp",
@@ -955,21 +955,21 @@
 		pll2550_24mhz_tbl),
 };
 
+static const struct samsung_cmu_info kfc_cmu __initconst = {
+	.pll_clks	= kfc_pll_clks,
+	.nr_pll_clks	= ARRAY_SIZE(kfc_pll_clks),
+	.mux_clks	= kfc_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(kfc_mux_clks),
+	.div_clks	= kfc_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(kfc_div_clks),
+	.nr_clk_ids	= KFC_NR_CLK,
+	.clk_regs	= kfc_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(kfc_clk_regs),
+};
+
 static void __init exynos5260_clk_kfc_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.pll_clks = kfc_pll_clks;
-	cmu.nr_pll_clks =  ARRAY_SIZE(kfc_pll_clks);
-	cmu.mux_clks = kfc_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(kfc_mux_clks);
-	cmu.div_clks = kfc_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(kfc_div_clks);
-	cmu.nr_clk_ids = KFC_NR_CLK;
-	cmu.clk_regs = kfc_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(kfc_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &kfc_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_kfc, "samsung,exynos5260-clock-kfc",
@@ -1011,21 +1011,21 @@
 			EN_IP_MFC_SECURE_SMMU2_MFC, 7, 0, 0),
 };
 
+static const struct samsung_cmu_info mfc_cmu __initconst = {
+	.mux_clks	= mfc_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(mfc_mux_clks),
+	.div_clks	= mfc_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(mfc_div_clks),
+	.gate_clks	= mfc_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(mfc_gate_clks),
+	.nr_clk_ids	= MFC_NR_CLK,
+	.clk_regs	= mfc_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(mfc_clk_regs),
+};
+
 static void __init exynos5260_clk_mfc_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.mux_clks = mfc_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(mfc_mux_clks);
-	cmu.div_clks = mfc_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(mfc_div_clks);
-	cmu.gate_clks = mfc_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(mfc_gate_clks);
-	cmu.nr_clk_ids = MFC_NR_CLK;
-	cmu.clk_regs = mfc_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(mfc_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &mfc_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_mfc, "samsung,exynos5260-clock-mfc",
@@ -1158,23 +1158,23 @@
 		pll2550_24mhz_tbl),
 };
 
+static const struct samsung_cmu_info mif_cmu __initconst = {
+	.pll_clks	= mif_pll_clks,
+	.nr_pll_clks	= ARRAY_SIZE(mif_pll_clks),
+	.mux_clks	= mif_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(mif_mux_clks),
+	.div_clks	= mif_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(mif_div_clks),
+	.gate_clks	= mif_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(mif_gate_clks),
+	.nr_clk_ids	= MIF_NR_CLK,
+	.clk_regs	= mif_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(mif_clk_regs),
+};
+
 static void __init exynos5260_clk_mif_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.pll_clks = mif_pll_clks;
-	cmu.nr_pll_clks =  ARRAY_SIZE(mif_pll_clks);
-	cmu.mux_clks = mif_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(mif_mux_clks);
-	cmu.div_clks = mif_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(mif_div_clks);
-	cmu.gate_clks = mif_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(mif_gate_clks);
-	cmu.nr_clk_ids = MIF_NR_CLK;
-	cmu.clk_regs = mif_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(mif_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &mif_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_mif, "samsung,exynos5260-clock-mif",
@@ -1366,21 +1366,21 @@
 		EN_IP_PERI_SECURE_TZPC, 20, 0, 0),
 };
 
+static const struct samsung_cmu_info peri_cmu __initconst = {
+	.mux_clks	= peri_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(peri_mux_clks),
+	.div_clks	= peri_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(peri_div_clks),
+	.gate_clks	= peri_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(peri_gate_clks),
+	.nr_clk_ids	= PERI_NR_CLK,
+	.clk_regs	= peri_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(peri_clk_regs),
+};
+
 static void __init exynos5260_clk_peri_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.mux_clks = peri_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(peri_mux_clks);
-	cmu.div_clks = peri_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(peri_div_clks);
-	cmu.gate_clks = peri_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(peri_gate_clks);
-	cmu.nr_clk_ids = PERI_NR_CLK;
-	cmu.clk_regs = peri_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(peri_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &peri_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_peri, "samsung,exynos5260-clock-peri",
@@ -1818,25 +1818,25 @@
 		pll2650_24mhz_tbl),
 };
 
+static const struct samsung_cmu_info top_cmu __initconst = {
+	.pll_clks	= top_pll_clks,
+	.nr_pll_clks	= ARRAY_SIZE(top_pll_clks),
+	.mux_clks	= top_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(top_mux_clks),
+	.div_clks	= top_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(top_div_clks),
+	.gate_clks	= top_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(top_gate_clks),
+	.fixed_clks	= fixed_rate_clks,
+	.nr_fixed_clks	= ARRAY_SIZE(fixed_rate_clks),
+	.nr_clk_ids	= TOP_NR_CLK,
+	.clk_regs	= top_clk_regs,
+	.nr_clk_regs	= ARRAY_SIZE(top_clk_regs),
+};
+
 static void __init exynos5260_clk_top_init(struct device_node *np)
 {
-	struct samsung_cmu_info cmu = { NULL };
-
-	cmu.pll_clks = top_pll_clks;
-	cmu.nr_pll_clks =  ARRAY_SIZE(top_pll_clks);
-	cmu.mux_clks = top_mux_clks;
-	cmu.nr_mux_clks = ARRAY_SIZE(top_mux_clks);
-	cmu.div_clks = top_div_clks;
-	cmu.nr_div_clks = ARRAY_SIZE(top_div_clks);
-	cmu.gate_clks = top_gate_clks;
-	cmu.nr_gate_clks = ARRAY_SIZE(top_gate_clks);
-	cmu.fixed_clks = fixed_rate_clks;
-	cmu.nr_fixed_clks = ARRAY_SIZE(fixed_rate_clks);
-	cmu.nr_clk_ids = TOP_NR_CLK;
-	cmu.clk_regs = top_clk_regs;
-	cmu.nr_clk_regs = ARRAY_SIZE(top_clk_regs);
-
-	samsung_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &top_cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_top, "samsung,exynos5260-clock-top",
diff --git a/drivers/clk/samsung/clk-exynos5410.c b/drivers/clk/samsung/clk-exynos5410.c
index 54ec486..fc471a4 100644
--- a/drivers/clk/samsung/clk-exynos5410.c
+++ b/drivers/clk/samsung/clk-exynos5410.c
@@ -14,6 +14,7 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/clk.h>
 
 #include "clk.h"
 
@@ -21,6 +22,8 @@
 #define APLL_CON0               0x100
 #define CPLL_LOCK               0x10020
 #define CPLL_CON0               0x10120
+#define EPLL_LOCK               0x10040
+#define EPLL_CON0               0x10130
 #define MPLL_LOCK               0x4000
 #define MPLL_CON0               0x4100
 #define BPLL_LOCK               0x20010
@@ -58,7 +61,7 @@
 
 /* list of PLLs */
 enum exynos5410_plls {
-	apll, cpll, mpll,
+	apll, cpll, epll, mpll,
 	bpll, kpll,
 	nr_plls                 /* number of PLLs */
 };
@@ -67,6 +70,7 @@
 PNAME(apll_p)		= { "fin_pll", "fout_apll", };
 PNAME(bpll_p)		= { "fin_pll", "fout_bpll", };
 PNAME(cpll_p)		= { "fin_pll", "fout_cpll" };
+PNAME(epll_p)		= { "fin_pll", "fout_epll" };
 PNAME(mpll_p)		= { "fin_pll", "fout_mpll", };
 PNAME(kpll_p)		= { "fin_pll", "fout_kpll", };
 
@@ -95,6 +99,8 @@
 	MUX(0, "sclk_bpll", bpll_p, SRC_CDREX, 0, 1),
 	MUX(0, "sclk_bpll_muxed", bpll_user_p, SRC_TOP2, 24, 1),
 
+	MUX(0, "sclk_epll", epll_p, SRC_TOP2, 12, 1),
+
 	MUX(0, "sclk_cpll", cpll_p, SRC_TOP2, 8, 1),
 
 	MUX(0, "sclk_mpll_bpll", mpll_bpll_p, SRC_TOP1, 20, 1),
@@ -176,6 +182,8 @@
 	GATE(CLK_MMC0, "sdmmc0", "aclk200", GATE_BUS_FSYS0, 12, 0, 0),
 	GATE(CLK_MMC1, "sdmmc1", "aclk200", GATE_BUS_FSYS0, 13, 0, 0),
 	GATE(CLK_MMC2, "sdmmc2", "aclk200", GATE_BUS_FSYS0, 14, 0, 0),
+	GATE(CLK_PDMA1, "pdma1", "aclk200", GATE_BUS_FSYS0, 2, 0, 0),
+	GATE(CLK_PDMA0, "pdma0", "aclk200", GATE_BUS_FSYS0, 1, 0, 0),
 
 	GATE(CLK_SCLK_USBPHY301, "sclk_usbphy301", "dout_usbphy301",
 	     GATE_TOP_SCLK_FSYS, 7, CLK_SET_RATE_PARENT, 0),
@@ -217,11 +225,26 @@
 	GATE(CLK_USBD301, "usbd301", "aclk200_fsys", GATE_IP_FSYS, 20, 0, 0),
 };
 
-static const struct samsung_pll_clock exynos5410_plls[nr_plls] __initconst = {
+static const struct samsung_pll_rate_table exynos5410_pll2550x_24mhz_tbl[] __initconst = {
+	PLL_36XX_RATE(400000000U, 200, 3, 2, 0),
+	PLL_36XX_RATE(333000000U, 111, 2, 2, 0),
+	PLL_36XX_RATE(300000000U, 100, 2, 2, 0),
+	PLL_36XX_RATE(266000000U, 266, 3, 3, 0),
+	PLL_36XX_RATE(200000000U, 200, 3, 3, 0),
+	PLL_36XX_RATE(192000000U, 192, 3, 3, 0),
+	PLL_36XX_RATE(166000000U, 166, 3, 3, 0),
+	PLL_36XX_RATE(133000000U, 266, 3, 4, 0),
+	PLL_36XX_RATE(100000000U, 200, 3, 4, 0),
+	PLL_36XX_RATE(66000000U,  176, 2, 5, 0),
+};
+
+static struct samsung_pll_clock exynos5410_plls[nr_plls] __initdata = {
 	[apll] = PLL(pll_35xx, CLK_FOUT_APLL, "fout_apll", "fin_pll", APLL_LOCK,
 		APLL_CON0, NULL),
 	[cpll] = PLL(pll_35xx, CLK_FOUT_CPLL, "fout_cpll", "fin_pll", CPLL_LOCK,
 		CPLL_CON0, NULL),
+	[epll] = PLL(pll_2650x, CLK_FOUT_EPLL, "fout_epll", "fin_pll", EPLL_LOCK,
+		EPLL_CON0, NULL),
 	[mpll] = PLL(pll_35xx, CLK_FOUT_MPLL, "fout_mpll", "fin_pll", MPLL_LOCK,
 		MPLL_CON0, NULL),
 	[bpll] = PLL(pll_35xx, CLK_FOUT_BPLL, "fout_bpll", "fin_pll", BPLL_LOCK,
@@ -230,29 +253,27 @@
 		KPLL_CON0, NULL),
 };
 
+static const struct samsung_cmu_info cmu __initconst = {
+	.pll_clks	= exynos5410_plls,
+	.nr_pll_clks	= ARRAY_SIZE(exynos5410_plls),
+	.mux_clks	= exynos5410_mux_clks,
+	.nr_mux_clks	= ARRAY_SIZE(exynos5410_mux_clks),
+	.div_clks	= exynos5410_div_clks,
+	.nr_div_clks	= ARRAY_SIZE(exynos5410_div_clks),
+	.gate_clks	= exynos5410_gate_clks,
+	.nr_gate_clks	= ARRAY_SIZE(exynos5410_gate_clks),
+	.nr_clk_ids	= CLK_NR_CLKS,
+};
+
 /* register exynos5410 clocks */
 static void __init exynos5410_clk_init(struct device_node *np)
 {
-	struct samsung_clk_provider *ctx;
-	void __iomem *reg_base;
+	struct clk *xxti = of_clk_get(np, 0);
 
-	reg_base = of_iomap(np, 0);
-	if (!reg_base)
-		panic("%s: failed to map registers\n", __func__);
+	if (!IS_ERR(xxti) && clk_get_rate(xxti) == 24 * MHZ)
+		exynos5410_plls[epll].rate_table = exynos5410_pll2550x_24mhz_tbl;
 
-	ctx = samsung_clk_init(np, reg_base, CLK_NR_CLKS);
-
-	samsung_clk_register_pll(ctx, exynos5410_plls,
-			ARRAY_SIZE(exynos5410_plls), reg_base);
-
-	samsung_clk_register_mux(ctx, exynos5410_mux_clks,
-			ARRAY_SIZE(exynos5410_mux_clks));
-	samsung_clk_register_div(ctx, exynos5410_div_clks,
-			ARRAY_SIZE(exynos5410_div_clks));
-	samsung_clk_register_gate(ctx, exynos5410_gate_clks,
-			ARRAY_SIZE(exynos5410_gate_clks));
-
-	samsung_clk_of_add_provider(np, ctx);
+	samsung_cmu_register_one(np, &cmu);
 
 	pr_debug("Exynos5410: clock setup completed.\n");
 }
diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index bb196ca..8c8b495 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -131,6 +131,9 @@
 #define TOP_SPARE2		0x10b08
 #define BPLL_LOCK		0x20010
 #define BPLL_CON0		0x20110
+#define SRC_CDREX		0x20200
+#define DIV_CDREX0		0x20500
+#define DIV_CDREX1		0x20504
 #define KPLL_LOCK		0x28000
 #define KPLL_CON0		0x28100
 #define SRC_KFC			0x28200
@@ -244,6 +247,9 @@
 	GATE_TOP_SCLK_FSYS,
 	GATE_TOP_SCLK_PERIC,
 	TOP_SPARE2,
+	SRC_CDREX,
+	DIV_CDREX0,
+	DIV_CDREX1,
 	SRC_KFC,
 	DIV_KFC0,
 };
@@ -448,6 +454,8 @@
 			 "mout_sclk_epll", "mout_sclk_rpll"};
 PNAME(mout_mau_epll_clk_p) = {"mout_sclk_epll", "mout_sclk_dpll",
 				"mout_sclk_mpll", "mout_sclk_spll"};
+PNAME(mout_mclk_cdrex_p) = {"mout_bpll", "mout_mx_mspll_ccore"};
+
 /* List of parents specific to exynos5800 */
 PNAME(mout_epll2_5800_p)	= { "mout_sclk_epll", "ff_dout_epll2" };
 PNAME(mout_group1_5800_p)	= { "mout_sclk_cpll", "mout_sclk_dpll",
@@ -465,6 +473,9 @@
 PNAME(mout_group7_5800_p)	= { "mout_sclk_cpll", "mout_sclk_dpll",
 					"mout_sclk_mpll", "mout_sclk_spll",
 					"mout_epll2", "mout_sclk_ipll" };
+PNAME(mout_mx_mspll_ccore_p)	= {"sclk_bpll", "mout_sclk_dpll",
+					"mout_sclk_mpll", "ff_dout_spll2",
+					"mout_sclk_spll", "mout_sclk_epll"};
 PNAME(mout_mau_epll_clk_5800_p)	= { "mout_sclk_epll", "mout_sclk_dpll",
 					"mout_sclk_mpll",
 					"ff_dout_spll2" };
@@ -523,6 +534,8 @@
 	MUX(0, "mout_aclk300_disp1", mout_group5_5800_p, SRC_TOP2, 24, 2),
 	MUX(0, "mout_aclk300_gscl", mout_group5_5800_p, SRC_TOP2, 28, 2),
 
+	MUX(CLK_MOUT_MX_MSPLL_CCORE, "mout_mx_mspll_ccore",
+			mout_mx_mspll_ccore_p, SRC_TOP7, 16, 2),
 	MUX(0, "mout_mau_epll_clk", mout_mau_epll_clk_5800_p, SRC_TOP7,
 			20, 2),
 	MUX(0, "sclk_bpll", mout_bpll_p, SRC_TOP7, 24, 1),
@@ -601,6 +614,8 @@
 	MUX(0, "mout_aclk300_disp1", mout_group1_p, SRC_TOP2, 24, 2),
 	MUX(0, "mout_aclk300_gscl", mout_group1_p, SRC_TOP2, 28, 2),
 
+	MUX(CLK_MOUT_MX_MSPLL_CCORE, "mout_mx_mspll_ccore",
+			mout_group5_5800_p, SRC_TOP7, 16, 2),
 	MUX(0, "mout_mau_epll_clk", mout_mau_epll_clk_p, SRC_TOP7, 20, 2),
 
 	MUX(0, "mout_fimd1", mout_group3_p, SRC_DISP10, 4, 1),
@@ -744,6 +759,12 @@
 
 	MUX(0, "mout_fimd1_final", mout_fimd1_final_p, TOP_SPARE2, 8, 1),
 
+	/* CDREX block */
+	MUX_F(CLK_MOUT_MCLK_CDREX, "mout_mclk_cdrex", mout_mclk_cdrex_p,
+			SRC_CDREX, 4, 1, CLK_SET_RATE_PARENT, 0),
+	MUX_F(CLK_MOUT_BPLL, "mout_bpll", mout_bpll_p, SRC_CDREX, 0, 1,
+			CLK_SET_RATE_PARENT, 0),
+
 	/* MAU Block */
 	MUX(CLK_MOUT_MAUDIO0, "mout_maudio0", mout_maudio0_p, SRC_MAU, 28, 3),
 
@@ -836,6 +857,21 @@
 	DIV(CLK_DOUT_ACLK400_DISP1, "dout_aclk400_disp1",
 			"mout_aclk400_disp1", DIV_TOP2, 4, 3),
 
+	/* CDREX Block */
+	DIV(CLK_DOUT_PCLK_CDREX, "dout_pclk_cdrex", "dout_aclk_cdrex1",
+			DIV_CDREX0, 28, 3),
+	DIV_F(CLK_DOUT_SCLK_CDREX, "dout_sclk_cdrex", "mout_mclk_cdrex",
+			DIV_CDREX0, 24, 3, CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DOUT_ACLK_CDREX1, "dout_aclk_cdrex1", "dout_clk2x_phy0",
+			DIV_CDREX0, 16, 3),
+	DIV(CLK_DOUT_CCLK_DREX0, "dout_cclk_drex0", "dout_clk2x_phy0",
+			DIV_CDREX0, 8, 3),
+	DIV(CLK_DOUT_CLK2X_PHY0, "dout_clk2x_phy0", "dout_sclk_cdrex",
+			DIV_CDREX0, 3, 5),
+
+	DIV(CLK_DOUT_PCLK_CORE_MEM, "dout_pclk_core_mem", "mout_mclk_cdrex",
+			DIV_CDREX1, 8, 3),
+
 	/* Audio Block */
 	DIV(0, "dout_maudio0", "mout_maudio0", DIV_MAU, 20, 4),
 	DIV(0, "dout_maupcm0", "dout_maudio0", DIV_MAU, 24, 8),
@@ -1364,6 +1400,7 @@
 	if (_get_rate("fin_pll") == 24 * MHZ) {
 		exynos5x_plls[apll].rate_table = exynos5420_pll2550x_24mhz_tbl;
 		exynos5x_plls[kpll].rate_table = exynos5420_pll2550x_24mhz_tbl;
+		exynos5x_plls[bpll].rate_table = exynos5420_pll2550x_24mhz_tbl;
 	}
 
 	samsung_clk_register_pll(ctx, exynos5x_plls, ARRAY_SIZE(exynos5x_plls),
diff --git a/drivers/clk/samsung/clk-exynos5440.c b/drivers/clk/samsung/clk-exynos5440.c
index a57d01b..a80f3ef 100644
--- a/drivers/clk/samsung/clk-exynos5440.c
+++ b/drivers/clk/samsung/clk-exynos5440.c
@@ -112,6 +112,11 @@
 	.priority = 128,
 };
 
+static const struct samsung_pll_clock exynos5440_plls[] __initconst = {
+	PLL(pll_2550x, CLK_CPLLA, "cplla", "xtal", 0, 0x4c, NULL),
+	PLL(pll_2550x, CLK_CPLLB, "cpllb", "xtal", 0, 0x50, NULL),
+};
+
 /* register exynos5440 clocks */
 static void __init exynos5440_clk_init(struct device_node *np)
 {
@@ -129,8 +134,8 @@
 	samsung_clk_of_register_fixed_ext(ctx, exynos5440_fixed_rate_ext_clks,
 		ARRAY_SIZE(exynos5440_fixed_rate_ext_clks), ext_clk_match);
 
-	samsung_clk_register_pll2550x("cplla", "xtal", reg_base + 0x1c, 0x10);
-	samsung_clk_register_pll2550x("cpllb", "xtal", reg_base + 0x20, 0x10);
+	samsung_clk_register_pll(ctx, exynos5440_plls,
+			ARRAY_SIZE(exynos5440_plls), ctx->reg_base);
 
 	samsung_clk_register_fixed_rate(ctx, exynos5440_fixed_rate_clks,
 			ARRAY_SIZE(exynos5440_fixed_rate_clks));
diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c
index 48139bd..9617825 100644
--- a/drivers/clk/samsung/clk-pll.c
+++ b/drivers/clk/samsung/clk-pll.c
@@ -890,22 +890,14 @@
 #define PLL2550X_M_SHIFT      (4)
 #define PLL2550X_S_SHIFT      (0)
 
-struct samsung_clk_pll2550x {
-	struct clk_hw		hw;
-	const void __iomem	*reg_base;
-	unsigned long		offset;
-};
-
-#define to_clk_pll2550x(_hw) container_of(_hw, struct samsung_clk_pll2550x, hw)
-
 static unsigned long samsung_pll2550x_recalc_rate(struct clk_hw *hw,
 				unsigned long parent_rate)
 {
-	struct samsung_clk_pll2550x *pll = to_clk_pll2550x(hw);
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
 	u32 r, p, m, s, pll_stat;
 	u64 fvco = parent_rate;
 
-	pll_stat = readl_relaxed(pll->reg_base + pll->offset * 3);
+	pll_stat = readl_relaxed(pll->con_reg);
 	r = (pll_stat >> PLL2550X_R_SHIFT) & PLL2550X_R_MASK;
 	if (!r)
 		return 0;
@@ -923,43 +915,6 @@
 	.recalc_rate = samsung_pll2550x_recalc_rate,
 };
 
-struct clk * __init samsung_clk_register_pll2550x(const char *name,
-			const char *pname, const void __iomem *reg_base,
-			const unsigned long offset)
-{
-	struct samsung_clk_pll2550x *pll;
-	struct clk *clk;
-	struct clk_init_data init;
-
-	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll) {
-		pr_err("%s: could not allocate pll clk %s\n", __func__, name);
-		return NULL;
-	}
-
-	init.name = name;
-	init.ops = &samsung_pll2550x_clk_ops;
-	init.flags = CLK_GET_RATE_NOCACHE;
-	init.parent_names = &pname;
-	init.num_parents = 1;
-
-	pll->hw.init = &init;
-	pll->reg_base = reg_base;
-	pll->offset = offset;
-
-	clk = clk_register(NULL, &pll->hw);
-	if (IS_ERR(clk)) {
-		pr_err("%s: failed to register pll clock %s\n", __func__,
-				name);
-		kfree(pll);
-	}
-
-	if (clk_register_clkdev(clk, name, NULL))
-		pr_err("%s: failed to register lookup for %s", __func__, name);
-
-	return clk;
-}
-
 /*
  * PLL2550xx Clock Type
  */
@@ -1063,6 +1018,102 @@
 };
 
 /*
+ * PLL2650x Clock Type
+ */
+
+/* Maximum lock time can be 3000 * PDIV cycles */
+#define PLL2650X_LOCK_FACTOR		3000
+
+#define PLL2650X_M_MASK			0x1ff
+#define PLL2650X_P_MASK			0x3f
+#define PLL2650X_S_MASK			0x7
+#define PLL2650X_K_MASK			0xffff
+#define PLL2650X_LOCK_STAT_MASK		0x1
+#define PLL2650X_M_SHIFT		16
+#define PLL2650X_P_SHIFT		8
+#define PLL2650X_S_SHIFT		0
+#define PLL2650X_K_SHIFT		0
+#define PLL2650X_LOCK_STAT_SHIFT	29
+#define PLL2650X_PLL_ENABLE_SHIFT	31
+
+static unsigned long samsung_pll2650x_recalc_rate(struct clk_hw *hw,
+				unsigned long parent_rate)
+{
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	u64 fout = parent_rate;
+	u32 mdiv, pdiv, sdiv, pll_con0, pll_con1;
+	s16 kdiv;
+
+	pll_con0 = readl_relaxed(pll->con_reg);
+	mdiv = (pll_con0 >> PLL2650X_M_SHIFT) & PLL2650X_M_MASK;
+	pdiv = (pll_con0 >> PLL2650X_P_SHIFT) & PLL2650X_P_MASK;
+	sdiv = (pll_con0 >> PLL2650X_S_SHIFT) & PLL2650X_S_MASK;
+
+	pll_con1 = readl_relaxed(pll->con_reg + 4);
+	kdiv = (s16)((pll_con1 >> PLL2650X_K_SHIFT) & PLL2650X_K_MASK);
+
+	fout *= (mdiv << 16) + kdiv;
+	do_div(fout, (pdiv << sdiv));
+	fout >>= 16;
+
+	return (unsigned long)fout;
+}
+
+static int samsung_pll2650x_set_rate(struct clk_hw *hw, unsigned long drate,
+					unsigned long prate)
+{
+	struct samsung_clk_pll *pll = to_clk_pll(hw);
+	const struct samsung_pll_rate_table *rate;
+	u32 con0, con1;
+
+	/* Get required rate settings from table */
+	rate = samsung_get_pll_settings(pll, drate);
+	if (!rate) {
+		pr_err("%s: Invalid rate : %lu for pll clk %s\n", __func__,
+			drate, clk_hw_get_name(hw));
+		return -EINVAL;
+	}
+
+	con0 = readl_relaxed(pll->con_reg);
+	con1 = readl_relaxed(pll->con_reg + 4);
+
+	/* Set PLL lock time. */
+	writel_relaxed(rate->pdiv * PLL2650X_LOCK_FACTOR, pll->lock_reg);
+
+	/* Change PLL PMS values */
+	con0 &= ~((PLL2650X_M_MASK << PLL2650X_M_SHIFT) |
+			(PLL2650X_P_MASK << PLL2650X_P_SHIFT) |
+			(PLL2650X_S_MASK << PLL2650X_S_SHIFT));
+	con0 |= (rate->mdiv << PLL2650X_M_SHIFT) |
+			(rate->pdiv << PLL2650X_P_SHIFT) |
+			(rate->sdiv << PLL2650X_S_SHIFT);
+	con0 |= (1 << PLL2650X_PLL_ENABLE_SHIFT);
+	writel_relaxed(con0, pll->con_reg);
+
+	con1 &= ~(PLL2650X_K_MASK << PLL2650X_K_SHIFT);
+	con1 |= ((rate->kdiv & PLL2650X_K_MASK) << PLL2650X_K_SHIFT);
+	writel_relaxed(con1, pll->con_reg + 4);
+
+	do {
+		cpu_relax();
+		con0 = readl_relaxed(pll->con_reg);
+	} while (!(con0 & (PLL2650X_LOCK_STAT_MASK
+			<< PLL2650X_LOCK_STAT_SHIFT)));
+
+	return 0;
+}
+
+static const struct clk_ops samsung_pll2650x_clk_ops = {
+	.recalc_rate = samsung_pll2650x_recalc_rate,
+	.round_rate = samsung_pll_round_rate,
+	.set_rate = samsung_pll2650x_set_rate,
+};
+
+static const struct clk_ops samsung_pll2650x_clk_min_ops = {
+	.recalc_rate = samsung_pll2650x_recalc_rate,
+};
+
+/*
  * PLL2650XX Clock Type
  */
 
@@ -1263,12 +1314,21 @@
 		else
 			init.ops = &samsung_s3c2440_mpll_clk_ops;
 		break;
+	case pll_2550x:
+		init.ops = &samsung_pll2550x_clk_ops;
+		break;
 	case pll_2550xx:
 		if (!pll->rate_table)
 			init.ops = &samsung_pll2550xx_clk_min_ops;
 		else
 			init.ops = &samsung_pll2550xx_clk_ops;
 		break;
+	case pll_2650x:
+		if (!pll->rate_table)
+			init.ops = &samsung_pll2650x_clk_min_ops;
+		else
+			init.ops = &samsung_pll2650x_clk_ops;
+		break;
 	case pll_2650xx:
 		if (!pll->rate_table)
 			init.ops = &samsung_pll2650xx_clk_min_ops;
diff --git a/drivers/clk/samsung/clk-pll.h b/drivers/clk/samsung/clk-pll.h
index 213de9a..a1ca023 100644
--- a/drivers/clk/samsung/clk-pll.h
+++ b/drivers/clk/samsung/clk-pll.h
@@ -31,7 +31,9 @@
 	pll_s3c2410_mpll,
 	pll_s3c2410_upll,
 	pll_s3c2440_mpll,
+	pll_2550x,
 	pll_2550xx,
+	pll_2650x,
 	pll_2650xx,
 	pll_1450x,
 	pll_1451x,
diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c
index 546bd79..a485f3b 100644
--- a/drivers/clk/st/clk-flexgen.c
+++ b/drivers/clk/st/clk-flexgen.c
@@ -15,6 +15,11 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 
+struct clkgen_data {
+	unsigned long flags;
+	bool mode;
+};
+
 struct flexgen {
 	struct clk_hw hw;
 
@@ -28,9 +33,14 @@
 	struct clk_gate fgate;
 	/* Final divisor */
 	struct clk_divider fdiv;
+	/* Asynchronous mode control */
+	struct clk_gate sync;
+	/* hw control flags */
+	bool control_mode;
 };
 
 #define to_flexgen(_hw) container_of(_hw, struct flexgen, hw)
+#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, hw)
 
 static int flexgen_enable(struct clk_hw *hw)
 {
@@ -139,12 +149,21 @@
 	struct flexgen *flexgen = to_flexgen(hw);
 	struct clk_hw *pdiv_hw = &flexgen->pdiv.hw;
 	struct clk_hw *fdiv_hw = &flexgen->fdiv.hw;
+	struct clk_hw *sync_hw = &flexgen->sync.hw;
+	struct clk_gate *config = to_clk_gate(sync_hw);
 	unsigned long div = 0;
 	int ret = 0;
+	u32 reg;
 
 	__clk_hw_set_clk(pdiv_hw, hw);
 	__clk_hw_set_clk(fdiv_hw, hw);
 
+	if (flexgen->control_mode) {
+		reg = readl(config->reg);
+		reg &= ~BIT(config->bit_idx);
+		writel(reg, config->reg);
+	}
+
 	div = clk_best_div(parent_rate, rate);
 
 	/*
@@ -178,7 +197,7 @@
 static struct clk *clk_register_flexgen(const char *name,
 				const char **parent_names, u8 num_parents,
 				void __iomem *reg, spinlock_t *lock, u32 idx,
-				unsigned long flexgen_flags) {
+				unsigned long flexgen_flags, bool mode) {
 	struct flexgen *fgxbar;
 	struct clk *clk;
 	struct clk_init_data init;
@@ -227,6 +246,13 @@
 	fgxbar->fdiv.reg = fdiv_reg;
 	fgxbar->fdiv.width = 6;
 
+	/* Final divider sync config */
+	fgxbar->sync.lock = lock;
+	fgxbar->sync.reg = fdiv_reg;
+	fgxbar->sync.bit_idx = 7;
+
+	fgxbar->control_mode = mode;
+
 	fgxbar->hw.init = &init;
 
 	clk = clk_register(NULL, &fgxbar->hw);
@@ -259,6 +285,27 @@
 	return parents;
 }
 
+static const struct clkgen_data clkgen_audio = {
+	.flags = CLK_SET_RATE_PARENT,
+};
+
+static const struct clkgen_data clkgen_video = {
+	.flags = CLK_SET_RATE_PARENT,
+	.mode = 1,
+};
+
+static const struct of_device_id flexgen_of_match[] = {
+	{
+		.compatible = "st,flexgen-audio",
+		.data = &clkgen_audio,
+	},
+	{
+		.compatible = "st,flexgen-video",
+		.data = &clkgen_video,
+	},
+	{}
+};
+
 static void __init st_of_flexgen_setup(struct device_node *np)
 {
 	struct device_node *pnode;
@@ -267,7 +314,11 @@
 	const char **parents;
 	int num_parents, i;
 	spinlock_t *rlock = NULL;
+	const struct of_device_id *match;
+	struct clkgen_data *data = NULL;
+	unsigned long flex_flags = 0;
 	int ret;
+	bool clk_mode = 0;
 
 	pnode = of_get_parent(np);
 	if (!pnode)
@@ -281,6 +332,13 @@
 	if (!parents)
 		return;
 
+	match = of_match_node(flexgen_of_match, np);
+	if (match) {
+		data = (struct clkgen_data *)match->data;
+		flex_flags = data->flags;
+		clk_mode = data->mode;
+	}
+
 	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
 	if (!clk_data)
 		goto err;
@@ -307,7 +365,6 @@
 	for (i = 0; i < clk_data->clk_num; i++) {
 		struct clk *clk;
 		const char *clk_name;
-		unsigned long flex_flags = 0;
 
 		if (of_property_read_string_index(np, "clock-output-names",
 						  i, &clk_name)) {
@@ -323,7 +380,7 @@
 			continue;
 
 		clk = clk_register_flexgen(clk_name, parents, num_parents,
-					   reg, rlock, i, flex_flags);
+					   reg, rlock, i, flex_flags, clk_mode);
 
 		if (IS_ERR(clk))
 			goto err;
diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c
index 09afeb8..14819d9 100644
--- a/drivers/clk/st/clkgen-fsyn.c
+++ b/drivers/clk/st/clkgen-fsyn.c
@@ -42,79 +42,6 @@
 	unsigned long nsdiv;
 };
 
-static const struct stm_fs fs216c65_rtbl[] = {
-	{ .mdiv = 0x1f, .pe = 0x0,	.sdiv = 0x7,	.nsdiv = 0 },	/* 312.5 Khz */
-	{ .mdiv = 0x17, .pe = 0x25ed,	.sdiv = 0x1,	.nsdiv = 0 },	/* 27    MHz */
-	{ .mdiv = 0x1a, .pe = 0x7b36,	.sdiv = 0x2,	.nsdiv = 1 },	/* 36.87 MHz */
-	{ .mdiv = 0x13, .pe = 0x0,	.sdiv = 0x2,	.nsdiv = 1 },	/* 48    MHz */
-	{ .mdiv = 0x11, .pe = 0x1c72,	.sdiv = 0x1,	.nsdiv = 1 },	/* 108   MHz */
-};
-
-static const struct stm_fs fs432c65_rtbl[] = {
-	{ .mdiv = 0x1f, .pe = 0x0,	.sdiv = 0x7,	.nsdiv = 0 },	/* 625     Khz */
-	{ .mdiv = 0x13, .pe = 0x777c,	.sdiv = 0x4,	.nsdiv = 1 },	/* 25.175  MHz */
-	{ .mdiv = 0x19, .pe = 0x4d35,	.sdiv = 0x2,	.nsdiv = 0 },	/* 25.200  MHz */
-	{ .mdiv = 0x11, .pe = 0x1c72,	.sdiv = 0x4,	.nsdiv = 1 },	/* 27.000  MHz */
-	{ .mdiv = 0x17, .pe = 0x28f5,	.sdiv = 0x2,	.nsdiv = 0 },	/* 27.027  MHz */
-	{ .mdiv = 0x16, .pe = 0x3359,	.sdiv = 0x2,	.nsdiv = 0 },	/* 28.320  MHz */
-	{ .mdiv = 0x1f, .pe = 0x2083,	.sdiv = 0x3,	.nsdiv = 1 },	/* 30.240  MHz */
-	{ .mdiv = 0x1e, .pe = 0x430d,	.sdiv = 0x3,	.nsdiv = 1 },	/* 31.500  MHz */
-	{ .mdiv = 0x17, .pe = 0x0,	.sdiv = 0x3,	.nsdiv = 1 },	/* 40.000  MHz */
-	{ .mdiv = 0x19, .pe = 0x121a,	.sdiv = 0x1,	.nsdiv = 0 },	/* 49.500  MHz */
-	{ .mdiv = 0x13, .pe = 0x6667,	.sdiv = 0x3,	.nsdiv = 1 },	/* 50.000  MHz */
-	{ .mdiv = 0x10, .pe = 0x1ee6,	.sdiv = 0x3,	.nsdiv = 1 },	/* 57.284  MHz */
-	{ .mdiv = 0x1d, .pe = 0x3b14,	.sdiv = 0x2,	.nsdiv = 1 },	/* 65.000  MHz */
-	{ .mdiv = 0x12, .pe = 0x7c65,	.sdiv = 0x1,	.nsdiv = 0 },	/* 71.000  MHz */
-	{ .mdiv = 0x19, .pe = 0xecd,	.sdiv = 0x2,	.nsdiv = 1 },	/* 74.176  MHz */
-	{ .mdiv = 0x19, .pe = 0x121a,	.sdiv = 0x2,	.nsdiv = 1 },	/* 74.250  MHz */
-	{ .mdiv = 0x19, .pe = 0x3334,	.sdiv = 0x2,	.nsdiv = 1 },	/* 75.000  MHz */
-	{ .mdiv = 0x18, .pe = 0x5138,	.sdiv = 0x2,	.nsdiv = 1 },	/* 78.800  MHz */
-	{ .mdiv = 0x1d, .pe = 0x77d,	.sdiv = 0x0,	.nsdiv = 0 },	/* 85.500  MHz */
-	{ .mdiv = 0x1c, .pe = 0x13d5,	.sdiv = 0x0,	.nsdiv = 0 },	/* 88.750  MHz */
-	{ .mdiv = 0x11, .pe = 0x1c72,	.sdiv = 0x2,	.nsdiv = 1 },	/* 108.000 MHz */
-	{ .mdiv = 0x17, .pe = 0x28f5,	.sdiv = 0x0,	.nsdiv = 0 },	/* 108.108 MHz */
-	{ .mdiv = 0x10, .pe = 0x6e26,	.sdiv = 0x2,	.nsdiv = 1 },	/* 118.963 MHz */
-	{ .mdiv = 0x15, .pe = 0x3e63,	.sdiv = 0x0,	.nsdiv = 0 },	/* 119.000 MHz */
-	{ .mdiv = 0x1c, .pe = 0x471d,	.sdiv = 0x1,	.nsdiv = 1 },	/* 135.000 MHz */
-	{ .mdiv = 0x19, .pe = 0xecd,	.sdiv = 0x1,	.nsdiv = 1 },	/* 148.352 MHz */
-	{ .mdiv = 0x19, .pe = 0x121a,	.sdiv = 0x1,	.nsdiv = 1 },	/* 148.500 MHz */
-	{ .mdiv = 0x19, .pe = 0x121a,	.sdiv = 0x0,	.nsdiv = 1 },	/* 297     MHz */
-};
-
-static const struct stm_fs fs660c32_rtbl[] = {
-	{ .mdiv = 0x14, .pe = 0x376b,	.sdiv = 0x4,	.nsdiv = 1 },	/* 25.175  MHz */
-	{ .mdiv = 0x14, .pe = 0x30c3,	.sdiv = 0x4,	.nsdiv = 1 },	/* 25.200  MHz */
-	{ .mdiv = 0x10, .pe = 0x71c7,	.sdiv = 0x4,	.nsdiv = 1 },	/* 27.000  MHz */
-	{ .mdiv = 0x00, .pe = 0x47af,	.sdiv = 0x3,	.nsdiv = 0 },	/* 27.027  MHz */
-	{ .mdiv = 0x0e, .pe = 0x4e1a,	.sdiv = 0x4,	.nsdiv = 1 },	/* 28.320  MHz */
-	{ .mdiv = 0x0b, .pe = 0x534d,	.sdiv = 0x4,	.nsdiv = 1 },	/* 30.240  MHz */
-	{ .mdiv = 0x17, .pe = 0x6fbf,	.sdiv = 0x2,	.nsdiv = 0 },	/* 31.500  MHz */
-	{ .mdiv = 0x01, .pe = 0x0,	.sdiv = 0x4,	.nsdiv = 1 },	/* 40.000  MHz */
-	{ .mdiv = 0x15, .pe = 0x2aab,	.sdiv = 0x3,	.nsdiv = 1 },	/* 49.500  MHz */
-	{ .mdiv = 0x14, .pe = 0x6666,	.sdiv = 0x3,	.nsdiv = 1 },	/* 50.000  MHz */
-	{ .mdiv = 0x1d, .pe = 0x395f,	.sdiv = 0x1,	.nsdiv = 0 },	/* 57.284  MHz */
-	{ .mdiv = 0x08, .pe = 0x4ec5,	.sdiv = 0x3,	.nsdiv = 1 },	/* 65.000  MHz */
-	{ .mdiv = 0x05, .pe = 0x1770,	.sdiv = 0x3,	.nsdiv = 1 },	/* 71.000  MHz */
-	{ .mdiv = 0x03, .pe = 0x4ba7,	.sdiv = 0x3,	.nsdiv = 1 },	/* 74.176  MHz */
-	{ .mdiv = 0x0f, .pe = 0x3426,	.sdiv = 0x1,	.nsdiv = 0 },	/* 74.250  MHz */
-	{ .mdiv = 0x0e, .pe = 0x7777,	.sdiv = 0x1,	.nsdiv = 0 },	/* 75.000  MHz */
-	{ .mdiv = 0x01, .pe = 0x4053,	.sdiv = 0x3,	.nsdiv = 1 },	/* 78.800  MHz */
-	{ .mdiv = 0x09, .pe = 0x15b5,	.sdiv = 0x1,	.nsdiv = 0 },	/* 85.500  MHz */
-	{ .mdiv = 0x1b, .pe = 0x3f19,	.sdiv = 0x2,	.nsdiv = 1 },	/* 88.750  MHz */
-	{ .mdiv = 0x10, .pe = 0x71c7,	.sdiv = 0x2,	.nsdiv = 1 },	/* 108.000 MHz */
-	{ .mdiv = 0x00, .pe = 0x47af,	.sdiv = 0x1,	.nsdiv = 0 },	/* 108.108 MHz */
-	{ .mdiv = 0x0c, .pe = 0x3118,	.sdiv = 0x2,	.nsdiv = 1 },	/* 118.963 MHz */
-	{ .mdiv = 0x0c, .pe = 0x2f54,	.sdiv = 0x2,	.nsdiv = 1 },	/* 119.000 MHz */
-	{ .mdiv = 0x07, .pe = 0xe39,	.sdiv = 0x2,	.nsdiv = 1 },	/* 135.000 MHz */
-	{ .mdiv = 0x03, .pe = 0x4ba7,	.sdiv = 0x2,	.nsdiv = 1 },	/* 148.352 MHz */
-	{ .mdiv = 0x0f, .pe = 0x3426,	.sdiv = 0x0,	.nsdiv = 0 },	/* 148.500 MHz */
-	{ .mdiv = 0x03, .pe = 0x4ba7,	.sdiv = 0x1,	.nsdiv = 1 },	/* 296.704 MHz */
-	{ .mdiv = 0x03, .pe = 0x471c,	.sdiv = 0x1,	.nsdiv = 1 },	/* 297.000 MHz */
-	{ .mdiv = 0x00, .pe = 0x295f,	.sdiv = 0x1,	.nsdiv = 1 },	/* 326.700 MHz */
-	{ .mdiv = 0x1f, .pe = 0x3633,	.sdiv = 0x0,	.nsdiv = 1 },	/* 333.000 MHz */
-	{ .mdiv = 0x1c, .pe = 0x0,	.sdiv = 0x0,	.nsdiv = 1 },	/* 352.000 Mhz */
-};
-
 struct clkgen_quadfs_data {
 	bool reset_present;
 	bool bwfilter_present;
@@ -138,174 +65,18 @@
 	struct clkgen_field nsdiv[QUADFS_MAX_CHAN];
 
 	const struct clk_ops *pll_ops;
-	const struct stm_fs *rtbl;
-	u8 rtbl_cnt;
+	int  (*get_params)(unsigned long, unsigned long, struct stm_fs *);
 	int  (*get_rate)(unsigned long , const struct stm_fs *,
 			unsigned long *);
 };
 
-static const struct clk_ops st_quadfs_pll_c65_ops;
 static const struct clk_ops st_quadfs_pll_c32_ops;
-static const struct clk_ops st_quadfs_fs216c65_ops;
-static const struct clk_ops st_quadfs_fs432c65_ops;
 static const struct clk_ops st_quadfs_fs660c32_ops;
 
-static int clk_fs216c65_get_rate(unsigned long, const struct stm_fs *,
-		unsigned long *);
-static int clk_fs432c65_get_rate(unsigned long, const struct stm_fs *,
-		unsigned long *);
+static int clk_fs660c32_dig_get_params(unsigned long input,
+		unsigned long output, struct stm_fs *fs);
 static int clk_fs660c32_dig_get_rate(unsigned long, const struct stm_fs *,
 		unsigned long *);
-/*
- * Values for all of the standalone instances of this clock
- * generator found in STiH415 and STiH416 SYSCFG register banks. Note
- * that the individual channel standby control bits (nsb) are in the
- * first register along with the PLL control bits.
- */
-static const struct clkgen_quadfs_data st_fs216c65_416 = {
-	/* 416 specific */
-	.npda	= CLKGEN_FIELD(0x0, 0x1, 14),
-	.nsb	= { CLKGEN_FIELD(0x0, 0x1, 10),
-		    CLKGEN_FIELD(0x0, 0x1, 11),
-		    CLKGEN_FIELD(0x0, 0x1, 12),
-		    CLKGEN_FIELD(0x0, 0x1, 13) },
-	.nsdiv_present = true,
-	.nsdiv	= { CLKGEN_FIELD(0x0, 0x1, 18),
-		    CLKGEN_FIELD(0x0, 0x1, 19),
-		    CLKGEN_FIELD(0x0, 0x1, 20),
-		    CLKGEN_FIELD(0x0, 0x1, 21) },
-	.mdiv	= { CLKGEN_FIELD(0x4, 0x1f, 0),
-		    CLKGEN_FIELD(0x14, 0x1f, 0),
-		    CLKGEN_FIELD(0x24, 0x1f, 0),
-		    CLKGEN_FIELD(0x34, 0x1f, 0) },
-	.en	= { CLKGEN_FIELD(0x10, 0x1, 0),
-		    CLKGEN_FIELD(0x20, 0x1, 0),
-		    CLKGEN_FIELD(0x30, 0x1, 0),
-		    CLKGEN_FIELD(0x40, 0x1, 0) },
-	.ndiv	= CLKGEN_FIELD(0x0, 0x1, 15),
-	.bwfilter_present = true,
-	.ref_bw = CLKGEN_FIELD(0x0, 0x3, 16),
-	.pe	= { CLKGEN_FIELD(0x8, 0xffff, 0),
-		    CLKGEN_FIELD(0x18, 0xffff, 0),
-		    CLKGEN_FIELD(0x28, 0xffff, 0),
-		    CLKGEN_FIELD(0x38, 0xffff, 0) },
-	.sdiv	= { CLKGEN_FIELD(0xC, 0x7, 0),
-		    CLKGEN_FIELD(0x1C, 0x7, 0),
-		    CLKGEN_FIELD(0x2C, 0x7, 0),
-		    CLKGEN_FIELD(0x3C, 0x7, 0) },
-	.pll_ops	= &st_quadfs_pll_c65_ops,
-	.rtbl		= fs216c65_rtbl,
-	.rtbl_cnt	= ARRAY_SIZE(fs216c65_rtbl),
-	.get_rate	= clk_fs216c65_get_rate,
-};
-
-static const struct clkgen_quadfs_data st_fs432c65_416 = {
-	.npda	= CLKGEN_FIELD(0x0, 0x1, 14),
-	.nsb	= { CLKGEN_FIELD(0x0, 0x1, 10),
-		    CLKGEN_FIELD(0x0, 0x1, 11),
-		    CLKGEN_FIELD(0x0, 0x1, 12),
-		    CLKGEN_FIELD(0x0, 0x1, 13) },
-	.nsdiv_present = true,
-	.nsdiv	= { CLKGEN_FIELD(0x0, 0x1, 18),
-		   CLKGEN_FIELD(0x0, 0x1, 19),
-		   CLKGEN_FIELD(0x0, 0x1, 20),
-		   CLKGEN_FIELD(0x0, 0x1, 21) },
-	.mdiv	= { CLKGEN_FIELD(0x4, 0x1f, 0),
-		    CLKGEN_FIELD(0x14, 0x1f, 0),
-		    CLKGEN_FIELD(0x24, 0x1f, 0),
-		    CLKGEN_FIELD(0x34, 0x1f, 0) },
-	.en	= { CLKGEN_FIELD(0x10, 0x1, 0),
-		    CLKGEN_FIELD(0x20, 0x1, 0),
-		    CLKGEN_FIELD(0x30, 0x1, 0),
-		    CLKGEN_FIELD(0x40, 0x1, 0) },
-	.ndiv	= CLKGEN_FIELD(0x0, 0x1, 15),
-	.bwfilter_present = true,
-	.ref_bw = CLKGEN_FIELD(0x0, 0x3, 16),
-	.pe	= { CLKGEN_FIELD(0x8, 0xffff, 0),
-		    CLKGEN_FIELD(0x18, 0xffff, 0),
-		    CLKGEN_FIELD(0x28, 0xffff, 0),
-		    CLKGEN_FIELD(0x38, 0xffff, 0) },
-	.sdiv	= { CLKGEN_FIELD(0xC, 0x7, 0),
-		    CLKGEN_FIELD(0x1C, 0x7, 0),
-		    CLKGEN_FIELD(0x2C, 0x7, 0),
-		    CLKGEN_FIELD(0x3C, 0x7, 0) },
-	.pll_ops	= &st_quadfs_pll_c65_ops,
-	.rtbl		= fs432c65_rtbl,
-	.rtbl_cnt	= ARRAY_SIZE(fs432c65_rtbl),
-	.get_rate	= clk_fs432c65_get_rate,
-};
-
-static const struct clkgen_quadfs_data st_fs660c32_E_416 = {
-	.npda	= CLKGEN_FIELD(0x0, 0x1, 14),
-	.nsb	= { CLKGEN_FIELD(0x0, 0x1, 10),
-		    CLKGEN_FIELD(0x0, 0x1, 11),
-		    CLKGEN_FIELD(0x0, 0x1, 12),
-		    CLKGEN_FIELD(0x0, 0x1, 13) },
-	.nsdiv_present = true,
-	.nsdiv	= { CLKGEN_FIELD(0x0, 0x1, 18),
-		    CLKGEN_FIELD(0x0, 0x1, 19),
-		    CLKGEN_FIELD(0x0, 0x1, 20),
-		    CLKGEN_FIELD(0x0, 0x1, 21) },
-	.mdiv	= { CLKGEN_FIELD(0x4, 0x1f, 0),
-		    CLKGEN_FIELD(0x14, 0x1f, 0),
-		    CLKGEN_FIELD(0x24, 0x1f, 0),
-		    CLKGEN_FIELD(0x34, 0x1f, 0) },
-	.en	= { CLKGEN_FIELD(0x10, 0x1, 0),
-		    CLKGEN_FIELD(0x20, 0x1, 0),
-		    CLKGEN_FIELD(0x30, 0x1, 0),
-		    CLKGEN_FIELD(0x40, 0x1, 0) },
-	.ndiv	= CLKGEN_FIELD(0x0, 0x7, 15),
-	.pe	= { CLKGEN_FIELD(0x8, 0x7fff, 0),
-		    CLKGEN_FIELD(0x18, 0x7fff, 0),
-		    CLKGEN_FIELD(0x28, 0x7fff, 0),
-		    CLKGEN_FIELD(0x38, 0x7fff, 0) },
-	.sdiv	= { CLKGEN_FIELD(0xC, 0xf, 0),
-		    CLKGEN_FIELD(0x1C, 0xf, 0),
-		    CLKGEN_FIELD(0x2C, 0xf, 0),
-		    CLKGEN_FIELD(0x3C, 0xf, 0) },
-	.lockstatus_present = true,
-	.lock_status = CLKGEN_FIELD(0xAC, 0x1, 0),
-	.pll_ops	= &st_quadfs_pll_c32_ops,
-	.rtbl		= fs660c32_rtbl,
-	.rtbl_cnt	= ARRAY_SIZE(fs660c32_rtbl),
-	.get_rate	= clk_fs660c32_dig_get_rate,
-};
-
-static const struct clkgen_quadfs_data st_fs660c32_F_416 = {
-	.npda	= CLKGEN_FIELD(0x0, 0x1, 14),
-	.nsb	= { CLKGEN_FIELD(0x0, 0x1, 10),
-		    CLKGEN_FIELD(0x0, 0x1, 11),
-		    CLKGEN_FIELD(0x0, 0x1, 12),
-		    CLKGEN_FIELD(0x0, 0x1, 13) },
-	.nsdiv_present = true,
-	.nsdiv	= { CLKGEN_FIELD(0x0, 0x1, 18),
-		    CLKGEN_FIELD(0x0, 0x1, 19),
-		    CLKGEN_FIELD(0x0, 0x1, 20),
-		    CLKGEN_FIELD(0x0, 0x1, 21) },
-	.mdiv	= { CLKGEN_FIELD(0x4, 0x1f, 0),
-		    CLKGEN_FIELD(0x14, 0x1f, 0),
-		    CLKGEN_FIELD(0x24, 0x1f, 0),
-		    CLKGEN_FIELD(0x34, 0x1f, 0) },
-	.en	= { CLKGEN_FIELD(0x10, 0x1, 0),
-		    CLKGEN_FIELD(0x20, 0x1, 0),
-		    CLKGEN_FIELD(0x30, 0x1, 0),
-		    CLKGEN_FIELD(0x40, 0x1, 0) },
-	.ndiv	= CLKGEN_FIELD(0x0, 0x7, 15),
-	.pe	= { CLKGEN_FIELD(0x8, 0x7fff, 0),
-		    CLKGEN_FIELD(0x18, 0x7fff, 0),
-		    CLKGEN_FIELD(0x28, 0x7fff, 0),
-		    CLKGEN_FIELD(0x38, 0x7fff, 0) },
-	.sdiv	= { CLKGEN_FIELD(0xC, 0xf, 0),
-		    CLKGEN_FIELD(0x1C, 0xf, 0),
-		    CLKGEN_FIELD(0x2C, 0xf, 0),
-		    CLKGEN_FIELD(0x3C, 0xf, 0) },
-	.lockstatus_present = true,
-	.lock_status = CLKGEN_FIELD(0xEC, 0x1, 0),
-	.pll_ops	= &st_quadfs_pll_c32_ops,
-	.rtbl		= fs660c32_rtbl,
-	.rtbl_cnt	= ARRAY_SIZE(fs660c32_rtbl),
-	.get_rate	= clk_fs660c32_dig_get_rate,
-};
 
 static const struct clkgen_quadfs_data st_fs660c32_C = {
 	.nrst_present = true,
@@ -345,8 +116,7 @@
 	.powerup_polarity = 1,
 	.standby_polarity = 1,
 	.pll_ops	= &st_quadfs_pll_c32_ops,
-	.rtbl		= fs660c32_rtbl,
-	.rtbl_cnt	= ARRAY_SIZE(fs660c32_rtbl),
+	.get_params	= clk_fs660c32_dig_get_params,
 	.get_rate	= clk_fs660c32_dig_get_rate,
 };
 
@@ -388,8 +158,7 @@
 	.powerup_polarity = 1,
 	.standby_polarity = 1,
 	.pll_ops	= &st_quadfs_pll_c32_ops,
-	.rtbl		= fs660c32_rtbl,
-	.rtbl_cnt	= ARRAY_SIZE(fs660c32_rtbl),
+	.get_params	= clk_fs660c32_dig_get_params,
 	.get_rate	= clk_fs660c32_dig_get_rate,};
 
 /**
@@ -605,12 +374,6 @@
 	return 0;
 }
 
-static const struct clk_ops st_quadfs_pll_c65_ops = {
-	.enable		= quadfs_pll_enable,
-	.disable	= quadfs_pll_disable,
-	.is_enabled	= quadfs_pll_is_enabled,
-};
-
 static const struct clk_ops st_quadfs_pll_c32_ops = {
 	.enable		= quadfs_pll_enable,
 	.disable	= quadfs_pll_disable,
@@ -797,48 +560,6 @@
 	return fs->data->standby_polarity ? !nsb : !!nsb;
 }
 
-#define P15			(uint64_t)(1 << 15)
-
-static int clk_fs216c65_get_rate(unsigned long input, const struct stm_fs *fs,
-		unsigned long *rate)
-{
-	uint64_t res;
-	unsigned long ns;
-	unsigned long nd = 8; /* ndiv stuck at 0 => val = 8 */
-	unsigned long s;
-	long m;
-
-	m = fs->mdiv - 32;
-	s = 1 << (fs->sdiv + 1);
-	ns = (fs->nsdiv ? 1 : 3);
-
-	res = (uint64_t)(s * ns * P15 * (uint64_t)(m + 33));
-	res = res - (s * ns * fs->pe);
-	*rate = div64_u64(P15 * nd * input * 32, res);
-
-	return 0;
-}
-
-static int clk_fs432c65_get_rate(unsigned long input, const struct stm_fs *fs,
-		unsigned long *rate)
-{
-	uint64_t res;
-	unsigned long nd = 16; /* ndiv value; stuck at 0 (30Mhz input) */
-	long m;
-	unsigned long sd;
-	unsigned long ns;
-
-	m = fs->mdiv - 32;
-	sd = 1 << (fs->sdiv + 1);
-	ns = (fs->nsdiv ? 1 : 3);
-
-	res = (uint64_t)(sd * ns * P15 * (uint64_t)(m + 33));
-	res = res - (sd * ns * fs->pe);
-	*rate = div64_u64(P15 * nd * input * 32, res);
-
-	return 0;
-}
-
 #define P20		(uint64_t)(1 << 20)
 
 static int clk_fs660c32_dig_get_rate(unsigned long input,
@@ -864,6 +585,107 @@
 	return 0;
 }
 
+
+static int clk_fs660c32_get_pe(int m, int si, unsigned long *deviation,
+		signed long input, unsigned long output, uint64_t *p,
+		struct stm_fs *fs)
+{
+	unsigned long new_freq, new_deviation;
+	struct stm_fs fs_tmp;
+	uint64_t val;
+
+	val = (uint64_t)output << si;
+
+	*p = (uint64_t)input * P20 - (32LL  + (uint64_t)m) * val * (P20 / 32LL);
+
+	*p = div64_u64(*p, val);
+
+	if (*p > 32767LL)
+		return 1;
+
+	fs_tmp.mdiv = (unsigned long) m;
+	fs_tmp.pe = (unsigned long)*p;
+	fs_tmp.sdiv = si;
+	fs_tmp.nsdiv = 1;
+
+	clk_fs660c32_dig_get_rate(input, &fs_tmp, &new_freq);
+
+	new_deviation = abs(output - new_freq);
+
+	if (new_deviation < *deviation) {
+		fs->mdiv = m;
+		fs->pe = (unsigned long)*p;
+		fs->sdiv = si;
+		fs->nsdiv = 1;
+		*deviation = new_deviation;
+	}
+	return 0;
+}
+
+static int clk_fs660c32_dig_get_params(unsigned long input,
+		unsigned long output, struct stm_fs *fs)
+{
+	int si;	/* sdiv_reg (8 downto 0) */
+	int m; /* md value */
+	unsigned long new_freq, new_deviation;
+	/* initial condition to say: "infinite deviation" */
+	unsigned long deviation = ~0;
+	uint64_t p, p1, p2;	/* pe value */
+	int r1, r2;
+
+	struct stm_fs fs_tmp;
+
+	for (si = 0; (si <= 8) && deviation; si++) {
+
+		/* Boundary test to avoid useless iteration */
+		r1 = clk_fs660c32_get_pe(0, si, &deviation,
+				input, output, &p1, fs);
+		r2 = clk_fs660c32_get_pe(31, si, &deviation,
+				input, output, &p2, fs);
+
+		/* No solution */
+		if (r1 && r2 && (p1 > p2))
+			continue;
+
+		/* Try to find best deviation */
+		for (m = 1; (m < 31) && deviation; m++)
+			clk_fs660c32_get_pe(m, si, &deviation,
+					input, output, &p, fs);
+
+	}
+
+	if (deviation == ~0) /* No solution found */
+		return -1;
+
+	/* pe fine tuning if deviation not 0: +/- 2 around computed pe value */
+	if (deviation) {
+		fs_tmp.mdiv = fs->mdiv;
+		fs_tmp.sdiv = fs->sdiv;
+		fs_tmp.nsdiv = fs->nsdiv;
+
+		if (fs->pe > 2)
+			p2 = fs->pe - 2;
+		else
+			p2 = 0;
+
+		for (; p2 < 32768ll && (p2 <= (fs->pe + 2)); p2++) {
+			fs_tmp.pe = (unsigned long)p2;
+
+			clk_fs660c32_dig_get_rate(input, &fs_tmp, &new_freq);
+
+			new_deviation = abs(output - new_freq);
+
+			/* Check if this is a better solution */
+			if (new_deviation < deviation) {
+				fs->pe = (unsigned long)p2;
+				deviation = new_deviation;
+
+			}
+		}
+	}
+	return 0;
+}
+
 static int quadfs_fsynt_get_hw_value_for_recalc(struct st_clk_quadfs_fsynth *fs,
 		struct stm_fs *params)
 {
@@ -899,38 +721,14 @@
 	struct st_clk_quadfs_fsynth *fs = to_quadfs_fsynth(hw);
 	int (*clk_fs_get_rate)(unsigned long ,
 				const struct stm_fs *, unsigned long *);
-	struct stm_fs prev_params;
-	unsigned long prev_rate, rate = 0;
-	unsigned long diff_rate, prev_diff_rate = ~0;
-	int index;
+	int (*clk_fs_get_params)(unsigned long, unsigned long, struct stm_fs *);
+	unsigned long rate = 0;
 
 	clk_fs_get_rate = fs->data->get_rate;
+	clk_fs_get_params = fs->data->get_params;
 
-	for (index = 0; index < fs->data->rtbl_cnt; index++) {
-		prev_rate = rate;
-
-		*params = fs->data->rtbl[index];
-		prev_params = *params;
-
-		clk_fs_get_rate(prate, &fs->data->rtbl[index], &rate);
-
-		diff_rate = abs(drate - rate);
-
-		if (diff_rate > prev_diff_rate) {
-			rate = prev_rate;
-			*params = prev_params;
-			break;
-		}
-
-		prev_diff_rate = diff_rate;
-
-		if (drate == rate)
-			return rate;
-	}
-
-
-	if (index == fs->data->rtbl_cnt)
-		*params = prev_params;
+	if (!clk_fs_get_params(prate, drate, params))
+		clk_fs_get_rate(prate, params, &rate);
 
 	return rate;
 }
@@ -1063,34 +861,6 @@
 	return clk;
 }
 
-static const struct of_device_id quadfs_of_match[] = {
-	{
-		.compatible = "st,stih416-quadfs216",
-		.data = &st_fs216c65_416
-	},
-	{
-		.compatible = "st,stih416-quadfs432",
-		.data = &st_fs432c65_416
-	},
-	{
-		.compatible = "st,stih416-quadfs660-E",
-		.data = &st_fs660c32_E_416
-	},
-	{
-		.compatible = "st,stih416-quadfs660-F",
-		.data = &st_fs660c32_F_416
-	},
-	{
-		.compatible = "st,stih407-quadfs660-C",
-		.data = &st_fs660c32_C
-	},
-	{
-		.compatible = "st,stih407-quadfs660-D",
-		.data = &st_fs660c32_D
-	},
-	{}
-};
-
 static void __init st_of_create_quadfs_fsynths(
 		struct device_node *np, const char *pll_name,
 		struct clkgen_quadfs_data *quadfs, void __iomem *reg,
@@ -1150,18 +920,14 @@
 	of_clk_add_provider(np, of_clk_src_onecell_get, clk_data);
 }
 
-static void __init st_of_quadfs_setup(struct device_node *np)
+static void __init st_of_quadfs_setup(struct device_node *np,
+		struct clkgen_quadfs_data *data)
 {
-	const struct of_device_id *match;
 	struct clk *clk;
 	const char *pll_name, *clk_parent_name;
 	void __iomem *reg;
 	spinlock_t *lock;
 
-	match = of_match_node(quadfs_of_match, np);
-	if (WARN_ON(!match))
-		return;
-
 	reg = of_iomap(np, 0);
 	if (!reg)
 		return;
@@ -1180,8 +946,8 @@
 
 	spin_lock_init(lock);
 
-	clk = st_clk_register_quadfs_pll(pll_name, clk_parent_name,
-			(struct clkgen_quadfs_data *) match->data, reg, lock);
+	clk = st_clk_register_quadfs_pll(pll_name, clk_parent_name, data,
+			reg, lock);
 	if (IS_ERR(clk))
 		goto err_exit;
 	else
@@ -1190,11 +956,20 @@
 			__clk_get_name(clk_get_parent(clk)),
 			(unsigned int)clk_get_rate(clk));
 
-	st_of_create_quadfs_fsynths(np, pll_name,
-				    (struct clkgen_quadfs_data *)match->data,
-				    reg, lock);
+	st_of_create_quadfs_fsynths(np, pll_name, data, reg, lock);
 
 err_exit:
 	kfree(pll_name); /* No longer need local copy of the PLL name */
 }
-CLK_OF_DECLARE(quadfs, "st,quadfs", st_of_quadfs_setup);
+
+static void __init st_of_quadfs660C_setup(struct device_node *np)
+{
+	st_of_quadfs_setup(np, (struct clkgen_quadfs_data *) &st_fs660c32_C);
+}
+CLK_OF_DECLARE(quadfs660C, "st,quadfs-pll", st_of_quadfs660C_setup);
+
+static void __init st_of_quadfs660D_setup(struct device_node *np)
+{
+	st_of_quadfs_setup(np, (struct clkgen_quadfs_data *) &st_fs660c32_D);
+}
+CLK_OF_DECLARE(quadfs660D, "st,quadfs", st_of_quadfs660D_setup);
diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c
index b1e10ff..c514d39 100644
--- a/drivers/clk/st/clkgen-mux.c
+++ b/drivers/clk/st/clkgen-mux.c
@@ -19,9 +19,6 @@
 #include <linux/clk-provider.h>
 #include "clkgen.h"
 
-static DEFINE_SPINLOCK(clkgena_divmux_lock);
-static DEFINE_SPINLOCK(clkgenf_lock);
-
 static const char ** __init clkgen_mux_get_parents(struct device_node *np,
 						       int *num_parents)
 {
@@ -40,498 +37,6 @@
 	return parents;
 }
 
-/**
- * DOC: Clock mux with a programmable divider on each of its three inputs.
- *      The mux has an input setting which effectively gates its output.
- *
- * Traits of this clock:
- * prepare - clk_(un)prepare only ensures parent is (un)prepared
- * enable - clk_enable and clk_disable are functional & control gating
- * rate - set rate is supported
- * parent - set/get parent
- */
-
-#define NUM_INPUTS 3
-
-struct clkgena_divmux {
-	struct clk_hw hw;
-	/* Subclassed mux and divider structures */
-	struct clk_mux mux;
-	struct clk_divider div[NUM_INPUTS];
-	/* Enable/running feedback register bits for each input */
-	void __iomem *feedback_reg[NUM_INPUTS];
-	int feedback_bit_idx;
-
-	u8              muxsel;
-};
-
-#define to_clkgena_divmux(_hw) container_of(_hw, struct clkgena_divmux, hw)
-
-struct clkgena_divmux_data {
-	int num_outputs;
-	int mux_offset;
-	int mux_offset2;
-	int mux_start_bit;
-	int div_offsets[NUM_INPUTS];
-	int fb_offsets[NUM_INPUTS];
-	int fb_start_bit_idx;
-};
-
-#define CKGAX_CLKOPSRC_SWITCH_OFF 0x3
-
-static int clkgena_divmux_is_running(struct clkgena_divmux *mux)
-{
-	u32 regval = readl(mux->feedback_reg[mux->muxsel]);
-	u32 running = regval & BIT(mux->feedback_bit_idx);
-	return !!running;
-}
-
-static int clkgena_divmux_enable(struct clk_hw *hw)
-{
-	struct clkgena_divmux *genamux = to_clkgena_divmux(hw);
-	struct clk_hw *mux_hw = &genamux->mux.hw;
-	unsigned long timeout;
-	int ret = 0;
-
-	__clk_hw_set_clk(mux_hw, hw);
-
-	ret = clk_mux_ops.set_parent(mux_hw, genamux->muxsel);
-	if (ret)
-		return ret;
-
-	timeout = jiffies + msecs_to_jiffies(10);
-
-	while (!clkgena_divmux_is_running(genamux)) {
-		if (time_after(jiffies, timeout))
-			return -ETIMEDOUT;
-		cpu_relax();
-	}
-
-	return 0;
-}
-
-static void clkgena_divmux_disable(struct clk_hw *hw)
-{
-	struct clkgena_divmux *genamux = to_clkgena_divmux(hw);
-	struct clk_hw *mux_hw = &genamux->mux.hw;
-
-	__clk_hw_set_clk(mux_hw, hw);
-
-	clk_mux_ops.set_parent(mux_hw, CKGAX_CLKOPSRC_SWITCH_OFF);
-}
-
-static int clkgena_divmux_is_enabled(struct clk_hw *hw)
-{
-	struct clkgena_divmux *genamux = to_clkgena_divmux(hw);
-	struct clk_hw *mux_hw = &genamux->mux.hw;
-
-	__clk_hw_set_clk(mux_hw, hw);
-
-	return (s8)clk_mux_ops.get_parent(mux_hw) > 0;
-}
-
-static u8 clkgena_divmux_get_parent(struct clk_hw *hw)
-{
-	struct clkgena_divmux *genamux = to_clkgena_divmux(hw);
-	struct clk_hw *mux_hw = &genamux->mux.hw;
-
-	__clk_hw_set_clk(mux_hw, hw);
-
-	genamux->muxsel = clk_mux_ops.get_parent(mux_hw);
-	if ((s8)genamux->muxsel < 0) {
-		pr_debug("%s: %s: Invalid parent, setting to default.\n",
-		      __func__, clk_hw_get_name(hw));
-		genamux->muxsel = 0;
-	}
-
-	return genamux->muxsel;
-}
-
-static int clkgena_divmux_set_parent(struct clk_hw *hw, u8 index)
-{
-	struct clkgena_divmux *genamux = to_clkgena_divmux(hw);
-
-	if (index >= CKGAX_CLKOPSRC_SWITCH_OFF)
-		return -EINVAL;
-
-	genamux->muxsel = index;
-
-	/*
-	 * If the mux is already enabled, call enable directly to set the
-	 * new mux position and wait for it to start running again. Otherwise
-	 * do nothing.
-	 */
-	if (clkgena_divmux_is_enabled(hw))
-		clkgena_divmux_enable(hw);
-
-	return 0;
-}
-
-static unsigned long clkgena_divmux_recalc_rate(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clkgena_divmux *genamux = to_clkgena_divmux(hw);
-	struct clk_hw *div_hw = &genamux->div[genamux->muxsel].hw;
-
-	__clk_hw_set_clk(div_hw, hw);
-
-	return clk_divider_ops.recalc_rate(div_hw, parent_rate);
-}
-
-static int clkgena_divmux_set_rate(struct clk_hw *hw, unsigned long rate,
-				unsigned long parent_rate)
-{
-	struct clkgena_divmux *genamux = to_clkgena_divmux(hw);
-	struct clk_hw *div_hw = &genamux->div[genamux->muxsel].hw;
-
-	__clk_hw_set_clk(div_hw, hw);
-
-	return clk_divider_ops.set_rate(div_hw, rate, parent_rate);
-}
-
-static long clkgena_divmux_round_rate(struct clk_hw *hw, unsigned long rate,
-				   unsigned long *prate)
-{
-	struct clkgena_divmux *genamux = to_clkgena_divmux(hw);
-	struct clk_hw *div_hw = &genamux->div[genamux->muxsel].hw;
-
-	__clk_hw_set_clk(div_hw, hw);
-
-	return clk_divider_ops.round_rate(div_hw, rate, prate);
-}
-
-static const struct clk_ops clkgena_divmux_ops = {
-	.enable = clkgena_divmux_enable,
-	.disable = clkgena_divmux_disable,
-	.is_enabled = clkgena_divmux_is_enabled,
-	.get_parent = clkgena_divmux_get_parent,
-	.set_parent = clkgena_divmux_set_parent,
-	.round_rate = clkgena_divmux_round_rate,
-	.recalc_rate = clkgena_divmux_recalc_rate,
-	.set_rate = clkgena_divmux_set_rate,
-};
-
-/**
- * clk_register_genamux - register a genamux clock with the clock framework
- */
-static struct clk * __init clk_register_genamux(const char *name,
-				const char **parent_names, u8 num_parents,
-				void __iomem *reg,
-				const struct clkgena_divmux_data *muxdata,
-				u32 idx)
-{
-	/*
-	 * Fixed constants across all ClockgenA variants
-	 */
-	const int mux_width = 2;
-	const int divider_width = 5;
-	struct clkgena_divmux *genamux;
-	struct clk *clk;
-	struct clk_init_data init;
-	int i;
-
-	genamux = kzalloc(sizeof(*genamux), GFP_KERNEL);
-	if (!genamux)
-		return ERR_PTR(-ENOMEM);
-
-	init.name = name;
-	init.ops = &clkgena_divmux_ops;
-	init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE;
-	init.parent_names = parent_names;
-	init.num_parents = num_parents;
-
-	genamux->mux.lock  = &clkgena_divmux_lock;
-	genamux->mux.mask = BIT(mux_width) - 1;
-	genamux->mux.shift = muxdata->mux_start_bit + (idx * mux_width);
-	if (genamux->mux.shift > 31) {
-		/*
-		 * We have spilled into the second mux register so
-		 * adjust the register address and the bit shift accordingly
-		 */
-		genamux->mux.reg = reg + muxdata->mux_offset2;
-		genamux->mux.shift -= 32;
-	} else {
-		genamux->mux.reg   = reg + muxdata->mux_offset;
-	}
-
-	for (i = 0; i < NUM_INPUTS; i++) {
-		/*
-		 * Divider config for each input
-		 */
-		void __iomem *divbase = reg + muxdata->div_offsets[i];
-		genamux->div[i].width = divider_width;
-		genamux->div[i].reg = divbase + (idx * sizeof(u32));
-
-		/*
-		 * Mux enabled/running feedback register for each input.
-		 */
-		genamux->feedback_reg[i] = reg + muxdata->fb_offsets[i];
-	}
-
-	genamux->feedback_bit_idx = muxdata->fb_start_bit_idx + idx;
-	genamux->hw.init = &init;
-
-	clk = clk_register(NULL, &genamux->hw);
-	if (IS_ERR(clk)) {
-		kfree(genamux);
-		goto err;
-	}
-
-	pr_debug("%s: parent %s rate %lu\n",
-			__clk_get_name(clk),
-			__clk_get_name(clk_get_parent(clk)),
-			clk_get_rate(clk));
-err:
-	return clk;
-}
-
-static struct clkgena_divmux_data st_divmux_c65hs = {
-	.num_outputs = 4,
-	.mux_offset = 0x14,
-	.mux_start_bit = 0,
-	.div_offsets = { 0x800, 0x900, 0xb00 },
-	.fb_offsets = { 0x18, 0x1c, 0x20 },
-	.fb_start_bit_idx = 0,
-};
-
-static struct clkgena_divmux_data st_divmux_c65ls = {
-	.num_outputs = 14,
-	.mux_offset = 0x14,
-	.mux_offset2 = 0x24,
-	.mux_start_bit = 8,
-	.div_offsets = { 0x810, 0xa10, 0xb10 },
-	.fb_offsets = { 0x18, 0x1c, 0x20 },
-	.fb_start_bit_idx = 4,
-};
-
-static struct clkgena_divmux_data st_divmux_c32odf0 = {
-	.num_outputs = 8,
-	.mux_offset = 0x1c,
-	.mux_start_bit = 0,
-	.div_offsets = { 0x800, 0x900, 0xa60 },
-	.fb_offsets = { 0x2c, 0x24, 0x28 },
-	.fb_start_bit_idx = 0,
-};
-
-static struct clkgena_divmux_data st_divmux_c32odf1 = {
-	.num_outputs = 8,
-	.mux_offset = 0x1c,
-	.mux_start_bit = 16,
-	.div_offsets = { 0x820, 0x980, 0xa80 },
-	.fb_offsets = { 0x2c, 0x24, 0x28 },
-	.fb_start_bit_idx = 8,
-};
-
-static struct clkgena_divmux_data st_divmux_c32odf2 = {
-	.num_outputs = 8,
-	.mux_offset = 0x20,
-	.mux_start_bit = 0,
-	.div_offsets = { 0x840, 0xa20, 0xb10 },
-	.fb_offsets = { 0x2c, 0x24, 0x28 },
-	.fb_start_bit_idx = 16,
-};
-
-static struct clkgena_divmux_data st_divmux_c32odf3 = {
-	.num_outputs = 8,
-	.mux_offset = 0x20,
-	.mux_start_bit = 16,
-	.div_offsets = { 0x860, 0xa40, 0xb30 },
-	.fb_offsets = { 0x2c, 0x24, 0x28 },
-	.fb_start_bit_idx = 24,
-};
-
-static const struct of_device_id clkgena_divmux_of_match[] = {
-	{
-		.compatible = "st,clkgena-divmux-c65-hs",
-		.data = &st_divmux_c65hs,
-	},
-	{
-		.compatible = "st,clkgena-divmux-c65-ls",
-		.data = &st_divmux_c65ls,
-	},
-	{
-		.compatible = "st,clkgena-divmux-c32-odf0",
-		.data = &st_divmux_c32odf0,
-	},
-	{
-		.compatible = "st,clkgena-divmux-c32-odf1",
-		.data = &st_divmux_c32odf1,
-	},
-	{
-		.compatible = "st,clkgena-divmux-c32-odf2",
-		.data = &st_divmux_c32odf2,
-	},
-	{
-		.compatible = "st,clkgena-divmux-c32-odf3",
-		.data = &st_divmux_c32odf3,
-	},
-	{}
-};
-
-static void __iomem * __init clkgen_get_register_base(struct device_node *np)
-{
-	struct device_node *pnode;
-	void __iomem *reg;
-
-	pnode = of_get_parent(np);
-	if (!pnode)
-		return NULL;
-
-	reg = of_iomap(pnode, 0);
-
-	of_node_put(pnode);
-	return reg;
-}
-
-static void __init st_of_clkgena_divmux_setup(struct device_node *np)
-{
-	const struct of_device_id *match;
-	const struct clkgena_divmux_data *data;
-	struct clk_onecell_data *clk_data;
-	void __iomem *reg;
-	const char **parents;
-	int num_parents = 0, i;
-
-	match = of_match_node(clkgena_divmux_of_match, np);
-	if (WARN_ON(!match))
-		return;
-
-	data = match->data;
-
-	reg = clkgen_get_register_base(np);
-	if (!reg)
-		return;
-
-	parents = clkgen_mux_get_parents(np, &num_parents);
-	if (IS_ERR(parents))
-		goto err_parents;
-
-	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
-	if (!clk_data)
-		goto err_alloc;
-
-	clk_data->clk_num = data->num_outputs;
-	clk_data->clks = kcalloc(clk_data->clk_num, sizeof(struct clk *),
-				 GFP_KERNEL);
-
-	if (!clk_data->clks)
-		goto err_alloc_clks;
-
-	for (i = 0; i < clk_data->clk_num; i++) {
-		struct clk *clk;
-		const char *clk_name;
-
-		if (of_property_read_string_index(np, "clock-output-names",
-						  i, &clk_name))
-			break;
-
-		/*
-		 * If we read an empty clock name then the output is unused
-		 */
-		if (*clk_name == '\0')
-			continue;
-
-		clk = clk_register_genamux(clk_name, parents, num_parents,
-					   reg, data, i);
-
-		if (IS_ERR(clk))
-			goto err;
-
-		clk_data->clks[i] = clk;
-	}
-
-	kfree(parents);
-
-	of_clk_add_provider(np, of_clk_src_onecell_get, clk_data);
-	return;
-err:
-	kfree(clk_data->clks);
-err_alloc_clks:
-	kfree(clk_data);
-err_alloc:
-	kfree(parents);
-err_parents:
-	iounmap(reg);
-}
-CLK_OF_DECLARE(clkgenadivmux, "st,clkgena-divmux", st_of_clkgena_divmux_setup);
-
-struct clkgena_prediv_data {
-	u32 offset;
-	u8 shift;
-	struct clk_div_table *table;
-};
-
-static struct clk_div_table prediv_table16[] = {
-	{ .val = 0, .div = 1 },
-	{ .val = 1, .div = 16 },
-	{ .div = 0 },
-};
-
-static struct clkgena_prediv_data prediv_c65_data = {
-	.offset = 0x4c,
-	.shift = 31,
-	.table = prediv_table16,
-};
-
-static struct clkgena_prediv_data prediv_c32_data = {
-	.offset = 0x50,
-	.shift = 1,
-	.table = prediv_table16,
-};
-
-static const struct of_device_id clkgena_prediv_of_match[] = {
-	{ .compatible = "st,clkgena-prediv-c65", .data = &prediv_c65_data },
-	{ .compatible = "st,clkgena-prediv-c32", .data = &prediv_c32_data },
-	{}
-};
-
-static void __init st_of_clkgena_prediv_setup(struct device_node *np)
-{
-	const struct of_device_id *match;
-	void __iomem *reg;
-	const char *parent_name, *clk_name;
-	struct clk *clk;
-	const struct clkgena_prediv_data *data;
-
-	match = of_match_node(clkgena_prediv_of_match, np);
-	if (!match) {
-		pr_err("%s: No matching data\n", __func__);
-		return;
-	}
-
-	data = match->data;
-
-	reg = clkgen_get_register_base(np);
-	if (!reg)
-		return;
-
-	parent_name = of_clk_get_parent_name(np, 0);
-	if (!parent_name)
-		goto err;
-
-	if (of_property_read_string_index(np, "clock-output-names",
-					  0, &clk_name))
-		goto err;
-
-	clk = clk_register_divider_table(NULL, clk_name, parent_name,
-					 CLK_GET_RATE_NOCACHE,
-					 reg + data->offset, data->shift, 1,
-					 0, data->table, NULL);
-	if (IS_ERR(clk))
-		goto err;
-
-	of_clk_add_provider(np, of_clk_src_simple_get, clk);
-	pr_debug("%s: parent %s rate %u\n",
-		__clk_get_name(clk),
-		__clk_get_name(clk_get_parent(clk)),
-		(unsigned int)clk_get_rate(clk));
-
-	return;
-err:
-	iounmap(reg);
-}
-CLK_OF_DECLARE(clkgenaprediv, "st,clkgena-prediv", st_of_clkgena_prediv_setup);
-
 struct clkgen_mux_data {
 	u32 offset;
 	u8 shift;
@@ -541,49 +46,6 @@
 	u8 mux_flags;
 };
 
-static struct clkgen_mux_data clkgen_mux_c_vcc_hd_416 = {
-	.offset = 0,
-	.shift = 0,
-	.width = 1,
-};
-
-static struct clkgen_mux_data clkgen_mux_f_vcc_fvdp_416 = {
-	.offset = 0,
-	.shift = 0,
-	.width = 1,
-};
-
-static struct clkgen_mux_data clkgen_mux_f_vcc_hva_416 = {
-	.offset = 0,
-	.shift = 0,
-	.width = 1,
-};
-
-static struct clkgen_mux_data clkgen_mux_f_vcc_hd_416 = {
-	.offset = 0,
-	.shift = 16,
-	.width = 1,
-	.lock = &clkgenf_lock,
-};
-
-static struct clkgen_mux_data clkgen_mux_c_vcc_sd_416 = {
-	.offset = 0,
-	.shift = 17,
-	.width = 1,
-	.lock = &clkgenf_lock,
-};
-
-static struct clkgen_mux_data stih415_a9_mux_data = {
-	.offset = 0,
-	.shift = 1,
-	.width = 2,
-	.lock = &clkgen_a9_lock,
-};
-static struct clkgen_mux_data stih416_a9_mux_data = {
-	.offset = 0,
-	.shift = 0,
-	.width = 2,
-};
 static struct clkgen_mux_data stih407_a9_mux_data = {
 	.offset = 0x1a4,
 	.shift = 0,
@@ -591,58 +53,13 @@
 	.lock = &clkgen_a9_lock,
 };
 
-static const struct of_device_id mux_of_match[] = {
-	{
-		.compatible = "st,stih416-clkgenc-vcc-hd",
-		.data = &clkgen_mux_c_vcc_hd_416,
-	},
-	{
-		.compatible = "st,stih416-clkgenf-vcc-fvdp",
-		.data = &clkgen_mux_f_vcc_fvdp_416,
-	},
-	{
-		.compatible = "st,stih416-clkgenf-vcc-hva",
-		.data = &clkgen_mux_f_vcc_hva_416,
-	},
-	{
-		.compatible = "st,stih416-clkgenf-vcc-hd",
-		.data = &clkgen_mux_f_vcc_hd_416,
-	},
-	{
-		.compatible = "st,stih416-clkgenf-vcc-sd",
-		.data = &clkgen_mux_c_vcc_sd_416,
-	},
-	{
-		.compatible = "st,stih415-clkgen-a9-mux",
-		.data = &stih415_a9_mux_data,
-	},
-	{
-		.compatible = "st,stih416-clkgen-a9-mux",
-		.data = &stih416_a9_mux_data,
-	},
-	{
-		.compatible = "st,stih407-clkgen-a9-mux",
-		.data = &stih407_a9_mux_data,
-	},
-	{}
-};
-
-static void __init st_of_clkgen_mux_setup(struct device_node *np)
+static void __init st_of_clkgen_mux_setup(struct device_node *np,
+		struct clkgen_mux_data *data)
 {
-	const struct of_device_id *match;
 	struct clk *clk;
 	void __iomem *reg;
 	const char **parents;
-	int num_parents;
-	const struct clkgen_mux_data *data;
-
-	match = of_match_node(mux_of_match, np);
-	if (!match) {
-		pr_err("%s: No matching data\n", __func__);
-		return;
-	}
-
-	data = match->data;
+	int num_parents = 0;
 
 	reg = of_iomap(np, 0);
 	if (!reg) {
@@ -679,161 +96,10 @@
 err_parents:
 	iounmap(reg);
 }
-CLK_OF_DECLARE(clkgen_mux, "st,clkgen-mux", st_of_clkgen_mux_setup);
 
-#define VCC_MAX_CHANNELS 16
-
-#define VCC_GATE_OFFSET 0x0
-#define VCC_MUX_OFFSET 0x4
-#define VCC_DIV_OFFSET 0x8
-
-struct clkgen_vcc_data {
-	spinlock_t *lock;
-	unsigned long clk_flags;
-};
-
-static struct clkgen_vcc_data st_clkgenc_vcc_416 = {
-	.clk_flags = CLK_SET_RATE_PARENT,
-};
-
-static struct clkgen_vcc_data st_clkgenf_vcc_416 = {
-	.lock = &clkgenf_lock,
-};
-
-static const struct of_device_id vcc_of_match[] = {
-	{ .compatible = "st,stih416-clkgenc", .data = &st_clkgenc_vcc_416 },
-	{ .compatible = "st,stih416-clkgenf", .data = &st_clkgenf_vcc_416 },
-	{}
-};
-
-static void __init st_of_clkgen_vcc_setup(struct device_node *np)
+static void __init st_of_clkgen_a9_mux_setup(struct device_node *np)
 {
-	const struct of_device_id *match;
-	void __iomem *reg;
-	const char **parents;
-	int num_parents, i;
-	struct clk_onecell_data *clk_data;
-	const struct clkgen_vcc_data *data;
-
-	match = of_match_node(vcc_of_match, np);
-	if (WARN_ON(!match))
-		return;
-	data = match->data;
-
-	reg = of_iomap(np, 0);
-	if (!reg)
-		return;
-
-	parents = clkgen_mux_get_parents(np, &num_parents);
-	if (IS_ERR(parents))
-		goto err_parents;
-
-	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
-	if (!clk_data)
-		goto err_alloc;
-
-	clk_data->clk_num = VCC_MAX_CHANNELS;
-	clk_data->clks = kcalloc(clk_data->clk_num, sizeof(struct clk *),
-				 GFP_KERNEL);
-
-	if (!clk_data->clks)
-		goto err_alloc_clks;
-
-	for (i = 0; i < clk_data->clk_num; i++) {
-		struct clk *clk;
-		const char *clk_name;
-		struct clk_gate *gate;
-		struct clk_divider *div;
-		struct clk_mux *mux;
-
-		if (of_property_read_string_index(np, "clock-output-names",
-						  i, &clk_name))
-			break;
-
-		/*
-		 * If we read an empty clock name then the output is unused
-		 */
-		if (*clk_name == '\0')
-			continue;
-
-		gate = kzalloc(sizeof(*gate), GFP_KERNEL);
-		if (!gate)
-			goto err;
-
-		div = kzalloc(sizeof(*div), GFP_KERNEL);
-		if (!div) {
-			kfree(gate);
-			goto err;
-		}
-
-		mux = kzalloc(sizeof(*mux), GFP_KERNEL);
-		if (!mux) {
-			kfree(gate);
-			kfree(div);
-			goto err;
-		}
-
-		gate->reg = reg + VCC_GATE_OFFSET;
-		gate->bit_idx = i;
-		gate->flags = CLK_GATE_SET_TO_DISABLE;
-		gate->lock = data->lock;
-
-		div->reg = reg + VCC_DIV_OFFSET;
-		div->shift = 2 * i;
-		div->width = 2;
-		div->flags = CLK_DIVIDER_POWER_OF_TWO |
-			CLK_DIVIDER_ROUND_CLOSEST;
-
-		mux->reg = reg + VCC_MUX_OFFSET;
-		mux->shift = 2 * i;
-		mux->mask = 0x3;
-
-		clk = clk_register_composite(NULL, clk_name, parents,
-					     num_parents,
-					     &mux->hw, &clk_mux_ops,
-					     &div->hw, &clk_divider_ops,
-					     &gate->hw, &clk_gate_ops,
-					     data->clk_flags |
-					     CLK_GET_RATE_NOCACHE);
-		if (IS_ERR(clk)) {
-			kfree(gate);
-			kfree(div);
-			kfree(mux);
-			goto err;
-		}
-
-		pr_debug("%s: parent %s rate %u\n",
-			__clk_get_name(clk),
-			__clk_get_name(clk_get_parent(clk)),
-			(unsigned int)clk_get_rate(clk));
-
-		clk_data->clks[i] = clk;
-	}
-
-	kfree(parents);
-
-	of_clk_add_provider(np, of_clk_src_onecell_get, clk_data);
-	return;
-
-err:
-	for (i = 0; i < clk_data->clk_num; i++) {
-		struct clk_composite *composite;
-
-		if (!clk_data->clks[i])
-			continue;
-
-		composite = to_clk_composite(__clk_get_hw(clk_data->clks[i]));
-		kfree(to_clk_gate(composite->gate_hw));
-		kfree(to_clk_divider(composite->rate_hw));
-		kfree(to_clk_mux(composite->mux_hw));
-	}
-
-	kfree(clk_data->clks);
-err_alloc_clks:
-	kfree(clk_data);
-err_alloc:
-	kfree(parents);
-err_parents:
-	iounmap(reg);
+	st_of_clkgen_mux_setup(np, &stih407_a9_mux_data);
 }
-CLK_OF_DECLARE(clkgen_vcc, "st,clkgen-vcc", st_of_clkgen_vcc_setup);
+CLK_OF_DECLARE(clkgen_a9mux, "st,stih407-clkgen-a9-mux",
+		st_of_clkgen_a9_mux_setup);
diff --git a/drivers/clk/st/clkgen-pll.c b/drivers/clk/st/clkgen-pll.c
index 0b5990e..25bda48 100644
--- a/drivers/clk/st/clkgen-pll.c
+++ b/drivers/clk/st/clkgen-pll.c
@@ -26,14 +26,6 @@
 DEFINE_SPINLOCK(clkgen_a9_lock);
 
 /*
- * Common PLL configuration register bits for PLL800 and PLL1600 C65
- */
-#define C65_MDIV_PLL800_MASK	(0xff)
-#define C65_MDIV_PLL1600_MASK	(0x7)
-#define C65_NDIV_MASK		(0xff)
-#define C65_PDIV_MASK		(0x7)
-
-/*
  * PLL configuration register bits for PLL3200 C32
  */
 #define C32_NDIV_MASK (0xff)
@@ -70,144 +62,10 @@
 	const struct clk_ops *ops;
 };
 
-static const struct clk_ops st_pll1600c65_ops;
-static const struct clk_ops st_pll800c65_ops;
 static const struct clk_ops stm_pll3200c32_ops;
 static const struct clk_ops stm_pll3200c32_a9_ops;
-static const struct clk_ops st_pll1200c32_ops;
 static const struct clk_ops stm_pll4600c28_ops;
 
-static const struct clkgen_pll_data st_pll1600c65_ax = {
-	.pdn_status	= CLKGEN_FIELD(0x0, 0x1,			19),
-	.pdn_ctrl	= CLKGEN_FIELD(0x10,	0x1,			0),
-	.locked_status	= CLKGEN_FIELD(0x0, 0x1,			31),
-	.mdiv		= CLKGEN_FIELD(0x0, C65_MDIV_PLL1600_MASK,	0),
-	.ndiv		= CLKGEN_FIELD(0x0, C65_NDIV_MASK,		8),
-	.ops		= &st_pll1600c65_ops
-};
-
-static const struct clkgen_pll_data st_pll800c65_ax = {
-	.pdn_status	= CLKGEN_FIELD(0x0,	0x1,			19),
-	.pdn_ctrl	= CLKGEN_FIELD(0xC,	0x1,			1),
-	.locked_status	= CLKGEN_FIELD(0x0,	0x1,			31),
-	.mdiv		= CLKGEN_FIELD(0x0,	C65_MDIV_PLL800_MASK,	0),
-	.ndiv		= CLKGEN_FIELD(0x0,	C65_NDIV_MASK,		8),
-	.pdiv		= CLKGEN_FIELD(0x0,	C65_PDIV_MASK,		16),
-	.ops		= &st_pll800c65_ops
-};
-
-static const struct clkgen_pll_data st_pll3200c32_a1x_0 = {
-	.pdn_status	= CLKGEN_FIELD(0x0,	0x1,			31),
-	.pdn_ctrl	= CLKGEN_FIELD(0x18,	0x1,			0),
-	.locked_status	= CLKGEN_FIELD(0x4,	0x1,			31),
-	.ndiv		= CLKGEN_FIELD(0x0,	C32_NDIV_MASK,		0x0),
-	.idf		= CLKGEN_FIELD(0x4,	C32_IDF_MASK,		0x0),
-	.num_odfs = 4,
-	.odf =	{	CLKGEN_FIELD(0x54,	C32_ODF_MASK,		4),
-			CLKGEN_FIELD(0x54,	C32_ODF_MASK,		10),
-			CLKGEN_FIELD(0x54,	C32_ODF_MASK,		16),
-			CLKGEN_FIELD(0x54,	C32_ODF_MASK,		22) },
-	.odf_gate = {	CLKGEN_FIELD(0x54,	0x1,			0),
-			CLKGEN_FIELD(0x54,	0x1,			1),
-			CLKGEN_FIELD(0x54,	0x1,			2),
-			CLKGEN_FIELD(0x54,	0x1,			3) },
-	.ops		= &stm_pll3200c32_ops,
-};
-
-static const struct clkgen_pll_data st_pll3200c32_a1x_1 = {
-	.pdn_status	= CLKGEN_FIELD(0xC,	0x1,			31),
-	.pdn_ctrl	= CLKGEN_FIELD(0x18,	0x1,			1),
-	.locked_status	= CLKGEN_FIELD(0x10,	0x1,			31),
-	.ndiv		= CLKGEN_FIELD(0xC,	C32_NDIV_MASK,		0x0),
-	.idf		= CLKGEN_FIELD(0x10,	C32_IDF_MASK,		0x0),
-	.num_odfs = 4,
-	.odf = {	CLKGEN_FIELD(0x58,	C32_ODF_MASK,		4),
-			CLKGEN_FIELD(0x58,	C32_ODF_MASK,		10),
-			CLKGEN_FIELD(0x58,	C32_ODF_MASK,		16),
-			CLKGEN_FIELD(0x58,	C32_ODF_MASK,		22) },
-	.odf_gate = {	CLKGEN_FIELD(0x58,	0x1,			0),
-			CLKGEN_FIELD(0x58,	0x1,			1),
-			CLKGEN_FIELD(0x58,	0x1,			2),
-			CLKGEN_FIELD(0x58,	0x1,			3) },
-	.ops		= &stm_pll3200c32_ops,
-};
-
-/* 415 specific */
-static const struct clkgen_pll_data st_pll3200c32_a9_415 = {
-	.pdn_status	= CLKGEN_FIELD(0x0,	0x1,			0),
-	.pdn_ctrl	= CLKGEN_FIELD(0x0,	0x1,			0),
-	.locked_status	= CLKGEN_FIELD(0x6C,	0x1,			0),
-	.ndiv		= CLKGEN_FIELD(0x0,	C32_NDIV_MASK,		9),
-	.idf		= CLKGEN_FIELD(0x0,	C32_IDF_MASK,		22),
-	.num_odfs = 1,
-	.odf =		{ CLKGEN_FIELD(0x0,	C32_ODF_MASK,		3) },
-	.odf_gate =	{ CLKGEN_FIELD(0x0,	0x1,			28) },
-	.ops		= &stm_pll3200c32_ops,
-};
-
-static const struct clkgen_pll_data st_pll3200c32_ddr_415 = {
-	.pdn_status	= CLKGEN_FIELD(0x0,	0x1,			0),
-	.pdn_ctrl	= CLKGEN_FIELD(0x0,	0x1,			0),
-	.locked_status	= CLKGEN_FIELD(0x100,	0x1,			0),
-	.ndiv		= CLKGEN_FIELD(0x8,	C32_NDIV_MASK,		0),
-	.idf		= CLKGEN_FIELD(0x0,	C32_IDF_MASK,		25),
-	.num_odfs = 2,
-	.odf		= { CLKGEN_FIELD(0x8,	C32_ODF_MASK,		8),
-			    CLKGEN_FIELD(0x8,	C32_ODF_MASK,		14) },
-	.odf_gate	= { CLKGEN_FIELD(0x4,	0x1,			28),
-			    CLKGEN_FIELD(0x4,	0x1,			29) },
-	.ops		= &stm_pll3200c32_ops,
-};
-
-static const struct clkgen_pll_data st_pll1200c32_gpu_415 = {
-	.pdn_status	= CLKGEN_FIELD(0x4,	0x1,			0),
-	.pdn_ctrl	= CLKGEN_FIELD(0x4,	0x1,			0),
-	.locked_status	= CLKGEN_FIELD(0x168,	0x1,			0),
-	.ldf		= CLKGEN_FIELD(0x0,	C32_LDF_MASK,		3),
-	.idf		= CLKGEN_FIELD(0x0,	C32_IDF_MASK,		0),
-	.num_odfs = 0,
-	.odf		= { CLKGEN_FIELD(0x0,	C32_ODF_MASK,		10) },
-	.ops		= &st_pll1200c32_ops,
-};
-
-/* 416 specific */
-static const struct clkgen_pll_data st_pll3200c32_a9_416 = {
-	.pdn_status	= CLKGEN_FIELD(0x0,	0x1,			0),
-	.pdn_ctrl	= CLKGEN_FIELD(0x0,	0x1,			0),
-	.locked_status	= CLKGEN_FIELD(0x6C,	0x1,			0),
-	.ndiv		= CLKGEN_FIELD(0x8,	C32_NDIV_MASK,		0),
-	.idf		= CLKGEN_FIELD(0x0,	C32_IDF_MASK,		25),
-	.num_odfs = 1,
-	.odf		= { CLKGEN_FIELD(0x8,	C32_ODF_MASK,		8) },
-	.odf_gate	= { CLKGEN_FIELD(0x4,	0x1,			28) },
-	.ops		= &stm_pll3200c32_ops,
-};
-
-static const struct clkgen_pll_data st_pll3200c32_ddr_416 = {
-	.pdn_status	= CLKGEN_FIELD(0x0,	0x1,			0),
-	.pdn_ctrl	= CLKGEN_FIELD(0x0,	0x1,			0),
-	.locked_status	= CLKGEN_FIELD(0x10C,	0x1,			0),
-	.ndiv		= CLKGEN_FIELD(0x8,	C32_NDIV_MASK,		0),
-	.idf		= CLKGEN_FIELD(0x0,	C32_IDF_MASK,		25),
-	.num_odfs = 2,
-	.odf		= { CLKGEN_FIELD(0x8,	C32_ODF_MASK,		8),
-			    CLKGEN_FIELD(0x8,	C32_ODF_MASK,		14) },
-	.odf_gate	= { CLKGEN_FIELD(0x4,	0x1,			28),
-			    CLKGEN_FIELD(0x4,	0x1,			29) },
-	.ops		= &stm_pll3200c32_ops,
-};
-
-static const struct clkgen_pll_data st_pll1200c32_gpu_416 = {
-	.pdn_status	= CLKGEN_FIELD(0x8E4,	0x1,			3),
-	.pdn_ctrl	= CLKGEN_FIELD(0x8E4,	0x1,			3),
-	.locked_status	= CLKGEN_FIELD(0x90C,	0x1,			0),
-	.ldf		= CLKGEN_FIELD(0x0,	C32_LDF_MASK,		3),
-	.idf		= CLKGEN_FIELD(0x0,	C32_IDF_MASK,		0),
-	.num_odfs = 0,
-	.odf		= { CLKGEN_FIELD(0x0,	C32_ODF_MASK,		10) },
-	.ops		= &st_pll1200c32_ops,
-};
-
 static const struct clkgen_pll_data st_pll3200c32_407_a0 = {
 	/* 407 A0 */
 	.pdn_status	= CLKGEN_FIELD(0x2a0,	0x1,			8),
@@ -410,57 +268,6 @@
 		spin_unlock_irqrestore(pll->lock, flags);
 }
 
-static unsigned long recalc_stm_pll800c65(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clkgen_pll *pll = to_clkgen_pll(hw);
-	unsigned long mdiv, ndiv, pdiv;
-	unsigned long rate;
-	uint64_t res;
-
-	if (!clkgen_pll_is_enabled(hw) || !clkgen_pll_is_locked(hw))
-		return 0;
-
-	pdiv = CLKGEN_READ(pll, pdiv);
-	mdiv = CLKGEN_READ(pll, mdiv);
-	ndiv = CLKGEN_READ(pll, ndiv);
-
-	if (!mdiv)
-		mdiv++; /* mdiv=0 or 1 => MDIV=1 */
-
-	res = (uint64_t)2 * (uint64_t)parent_rate * (uint64_t)ndiv;
-	rate = (unsigned long)div64_u64(res, mdiv * (1 << pdiv));
-
-	pr_debug("%s:%s rate %lu\n", clk_hw_get_name(hw), __func__, rate);
-
-	return rate;
-
-}
-
-static unsigned long recalc_stm_pll1600c65(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clkgen_pll *pll = to_clkgen_pll(hw);
-	unsigned long mdiv, ndiv;
-	unsigned long rate;
-
-	if (!clkgen_pll_is_enabled(hw) || !clkgen_pll_is_locked(hw))
-		return 0;
-
-	mdiv = CLKGEN_READ(pll, mdiv);
-	ndiv = CLKGEN_READ(pll, ndiv);
-
-	if (!mdiv)
-		mdiv = 1;
-
-	/* Note: input is divided by 1000 to avoid overflow */
-	rate = ((2 * (parent_rate / 1000) * ndiv) / mdiv) * 1000;
-
-	pr_debug("%s:%s rate %lu\n", clk_hw_get_name(hw), __func__, rate);
-
-	return rate;
-}
-
 static int clk_pll3200c32_get_params(unsigned long input, unsigned long output,
 			  struct stm_pll *pll)
 {
@@ -608,33 +415,6 @@
 	return 0;
 }
 
-static unsigned long recalc_stm_pll1200c32(struct clk_hw *hw,
-		unsigned long parent_rate)
-{
-	struct clkgen_pll *pll = to_clkgen_pll(hw);
-	unsigned long odf, ldf, idf;
-	unsigned long rate;
-
-	if (!clkgen_pll_is_enabled(hw) || !clkgen_pll_is_locked(hw))
-		return 0;
-
-	odf = CLKGEN_READ(pll, odf[0]);
-	ldf = CLKGEN_READ(pll, ldf);
-	idf = CLKGEN_READ(pll, idf);
-
-	if (!idf) /* idf==0 means 1 */
-		idf = 1;
-	if (!odf) /* odf==0 means 1 */
-		odf = 1;
-
-	/* Note: input is divided by 1000 to avoid overflow */
-	rate = (((parent_rate / 1000) * ldf) / (odf * idf)) * 1000;
-
-	pr_debug("%s:%s rate %lu\n", clk_hw_get_name(hw), __func__, rate);
-
-	return rate;
-}
-
 /* PLL output structure
  * FVCO >> /2 >> FVCOBY2 (no output)
  *                 |> Divider (ODF) >> PHI
@@ -792,20 +572,6 @@
 	return 0;
 }
 
-static const struct clk_ops st_pll1600c65_ops = {
-	.enable		= clkgen_pll_enable,
-	.disable	= clkgen_pll_disable,
-	.is_enabled	= clkgen_pll_is_enabled,
-	.recalc_rate	= recalc_stm_pll1600c65,
-};
-
-static const struct clk_ops st_pll800c65_ops = {
-	.enable		= clkgen_pll_enable,
-	.disable	= clkgen_pll_disable,
-	.is_enabled	= clkgen_pll_is_enabled,
-	.recalc_rate	= recalc_stm_pll800c65,
-};
-
 static const struct clk_ops stm_pll3200c32_ops = {
 	.enable		= clkgen_pll_enable,
 	.disable	= clkgen_pll_disable,
@@ -822,13 +588,6 @@
 	.set_rate	= set_rate_stm_pll3200c32,
 };
 
-static const struct clk_ops st_pll1200c32_ops = {
-	.enable		= clkgen_pll_enable,
-	.disable	= clkgen_pll_disable,
-	.is_enabled	= clkgen_pll_is_enabled,
-	.recalc_rate	= recalc_stm_pll1200c32,
-};
-
 static const struct clk_ops stm_pll4600c28_ops = {
 	.enable		= clkgen_pll_enable,
 	.disable	= clkgen_pll_disable,
@@ -877,22 +636,6 @@
 	return clk;
 }
 
-static struct clk * __init clkgen_c65_lsdiv_register(const char *parent_name,
-						     const char *clk_name)
-{
-	struct clk *clk;
-
-	clk = clk_register_fixed_factor(NULL, clk_name, parent_name, 0, 1, 2);
-	if (IS_ERR(clk))
-		return clk;
-
-	pr_debug("%s: parent %s rate %lu\n",
-			__clk_get_name(clk),
-			__clk_get_name(clk_get_parent(clk)),
-			clk_get_rate(clk));
-	return clk;
-}
-
 static void __iomem * __init clkgen_get_register_base(
 				struct device_node *np)
 {
@@ -909,89 +652,6 @@
 	return reg;
 }
 
-#define CLKGENAx_PLL0_OFFSET 0x0
-#define CLKGENAx_PLL1_OFFSET 0x4
-
-static void __init clkgena_c65_pll_setup(struct device_node *np)
-{
-	const int num_pll_outputs = 3;
-	struct clk_onecell_data *clk_data;
-	const char *parent_name;
-	void __iomem *reg;
-	const char *clk_name;
-
-	parent_name = of_clk_get_parent_name(np, 0);
-	if (!parent_name)
-		return;
-
-	reg = clkgen_get_register_base(np);
-	if (!reg)
-		return;
-
-	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
-	if (!clk_data)
-		return;
-
-	clk_data->clk_num = num_pll_outputs;
-	clk_data->clks = kzalloc(clk_data->clk_num * sizeof(struct clk *),
-				 GFP_KERNEL);
-
-	if (!clk_data->clks)
-		goto err;
-
-	if (of_property_read_string_index(np, "clock-output-names",
-					  0, &clk_name))
-		goto err;
-
-	/*
-	 * PLL0 HS (high speed) output
-	 */
-	clk_data->clks[0] = clkgen_pll_register(parent_name,
-			(struct clkgen_pll_data *) &st_pll1600c65_ax,
-			reg + CLKGENAx_PLL0_OFFSET, 0, clk_name, NULL);
-
-	if (IS_ERR(clk_data->clks[0]))
-		goto err;
-
-	if (of_property_read_string_index(np, "clock-output-names",
-					  1, &clk_name))
-		goto err;
-
-	/*
-	 * PLL0 LS (low speed) output, which is a fixed divide by 2 of the
-	 * high speed output.
-	 */
-	clk_data->clks[1] = clkgen_c65_lsdiv_register(__clk_get_name
-						      (clk_data->clks[0]),
-						      clk_name);
-
-	if (IS_ERR(clk_data->clks[1]))
-		goto err;
-
-	if (of_property_read_string_index(np, "clock-output-names",
-					  2, &clk_name))
-		goto err;
-
-	/*
-	 * PLL1 output
-	 */
-	clk_data->clks[2] = clkgen_pll_register(parent_name,
-			(struct clkgen_pll_data *) &st_pll800c65_ax,
-			reg + CLKGENAx_PLL1_OFFSET, 0, clk_name, NULL);
-
-	if (IS_ERR(clk_data->clks[2]))
-		goto err;
-
-	of_clk_add_provider(np, of_clk_src_onecell_get, clk_data);
-	return;
-
-err:
-	kfree(clk_data->clks);
-	kfree(clk_data);
-}
-CLK_OF_DECLARE(clkgena_c65_plls,
-	       "st,clkgena-plls-c65", clkgena_c65_pll_setup);
-
 static struct clk * __init clkgen_odf_register(const char *parent_name,
 					       void __iomem *reg,
 					       struct clkgen_pll_data *pll_data,
@@ -1042,72 +702,17 @@
 	return clk;
 }
 
-static const struct of_device_id c32_pll_of_match[] = {
-	{
-		.compatible = "st,plls-c32-a1x-0",
-		.data = &st_pll3200c32_a1x_0,
-	},
-	{
-		.compatible = "st,plls-c32-a1x-1",
-		.data = &st_pll3200c32_a1x_1,
-	},
-	{
-		.compatible = "st,stih415-plls-c32-a9",
-		.data = &st_pll3200c32_a9_415,
-	},
-	{
-		.compatible = "st,stih415-plls-c32-ddr",
-		.data = &st_pll3200c32_ddr_415,
-	},
-	{
-		.compatible = "st,stih416-plls-c32-a9",
-		.data = &st_pll3200c32_a9_416,
-	},
-	{
-		.compatible = "st,stih416-plls-c32-ddr",
-		.data = &st_pll3200c32_ddr_416,
-	},
-	{
-		.compatible = "st,stih407-plls-c32-a0",
-		.data = &st_pll3200c32_407_a0,
-	},
-	{
-		.compatible = "st,plls-c32-cx_0",
-		.data = &st_pll3200c32_cx_0,
-	},
-	{
-		.compatible = "st,plls-c32-cx_1",
-		.data = &st_pll3200c32_cx_1,
-	},
-	{
-		.compatible = "st,stih407-plls-c32-a9",
-		.data = &st_pll3200c32_407_a9,
-	},
-	{
-		.compatible = "st,stih418-plls-c28-a9",
-		.data = &st_pll4600c28_418_a9,
-	},
-	{}
-};
 
-static void __init clkgen_c32_pll_setup(struct device_node *np)
+static void __init clkgen_c32_pll_setup(struct device_node *np,
+		struct clkgen_pll_data *data)
 {
-	const struct of_device_id *match;
 	struct clk *clk;
 	const char *parent_name, *pll_name;
 	void __iomem *pll_base;
 	int num_odfs, odf;
 	struct clk_onecell_data *clk_data;
-	struct clkgen_pll_data	*data;
 	unsigned long pll_flags = 0;
 
-	match = of_match_node(c32_pll_of_match, np);
-	if (!match) {
-		pr_err("%s: No matching data\n", __func__);
-		return;
-	}
-
-	data = (struct clkgen_pll_data *) match->data;
 
 	parent_name = of_clk_get_parent_name(np, 0);
 	if (!parent_name)
@@ -1166,59 +771,30 @@
 	kfree(clk_data->clks);
 	kfree(clk_data);
 }
-CLK_OF_DECLARE(clkgen_c32_pll, "st,clkgen-plls-c32", clkgen_c32_pll_setup);
-
-static const struct of_device_id c32_gpu_pll_of_match[] = {
-	{
-		.compatible = "st,stih415-gpu-pll-c32",
-		.data = &st_pll1200c32_gpu_415,
-	},
-	{
-		.compatible = "st,stih416-gpu-pll-c32",
-		.data = &st_pll1200c32_gpu_416,
-	},
-	{}
-};
-
-static void __init clkgengpu_c32_pll_setup(struct device_node *np)
+static void __init clkgen_c32_pll0_setup(struct device_node *np)
 {
-	const struct of_device_id *match;
-	struct clk *clk;
-	const char *parent_name;
-	void __iomem *reg;
-	const char *clk_name;
-	struct clkgen_pll_data	*data;
-
-	match = of_match_node(c32_gpu_pll_of_match, np);
-	if (!match) {
-		pr_err("%s: No matching data\n", __func__);
-		return;
-	}
-
-	data = (struct clkgen_pll_data *)match->data;
-
-	parent_name = of_clk_get_parent_name(np, 0);
-	if (!parent_name)
-		return;
-
-	reg = clkgen_get_register_base(np);
-	if (!reg)
-		return;
-
-	if (of_property_read_string_index(np, "clock-output-names",
-					  0, &clk_name))
-		return;
-
-	/*
-	 * PLL 1200MHz output
-	 */
-	clk = clkgen_pll_register(parent_name, data, reg,
-				  0, clk_name, data->lock);
-
-	if (!IS_ERR(clk))
-		of_clk_add_provider(np, of_clk_src_simple_get, clk);
-
-	return;
+	clkgen_c32_pll_setup(np,
+			(struct clkgen_pll_data *) &st_pll3200c32_cx_0);
 }
-CLK_OF_DECLARE(clkgengpu_c32_pll,
-	       "st,clkgengpu-pll-c32", clkgengpu_c32_pll_setup);
+CLK_OF_DECLARE(c32_pll0, "st,clkgen-pll0", clkgen_c32_pll0_setup);
+
+static void __init clkgen_c32_pll1_setup(struct device_node *np)
+{
+	clkgen_c32_pll_setup(np,
+			(struct clkgen_pll_data *) &st_pll3200c32_cx_1);
+}
+CLK_OF_DECLARE(c32_pll1, "st,clkgen-pll1", clkgen_c32_pll1_setup);
+
+static void __init clkgen_c32_plla9_setup(struct device_node *np)
+{
+	clkgen_c32_pll_setup(np,
+			(struct clkgen_pll_data *) &st_pll3200c32_407_a9);
+}
+CLK_OF_DECLARE(c32_plla9, "st,stih407-clkgen-plla9", clkgen_c32_plla9_setup);
+
+static void __init clkgen_c28_plla9_setup(struct device_node *np)
+{
+	clkgen_c32_pll_setup(np,
+			(struct clkgen_pll_data *) &st_pll4600c28_418_a9);
+}
+CLK_OF_DECLARE(c28_plla9, "st,stih418-clkgen-plla9", clkgen_c28_plla9_setup);
diff --git a/drivers/clk/sunxi-ng/Kconfig b/drivers/clk/sunxi-ng/Kconfig
index 2afcbd3..254d952 100644
--- a/drivers/clk/sunxi-ng/Kconfig
+++ b/drivers/clk/sunxi-ng/Kconfig
@@ -1,5 +1,6 @@
 config SUNXI_CCU
 	bool "Clock support for Allwinner SoCs"
+	depends on ARCH_SUNXI || COMPILE_TEST
 	default ARCH_SUNXI
 
 if SUNXI_CCU
@@ -19,6 +20,10 @@
 config SUNXI_CCU_MUX
 	bool
 
+config SUNXI_CCU_MULT
+	bool
+	select SUNXI_CCU_MUX
+
 config SUNXI_CCU_PHASE
 	bool
 
@@ -51,6 +56,40 @@
 
 # SoC Drivers
 
+config SUN6I_A31_CCU
+	bool "Support for the Allwinner A31/A31s CCU"
+	select SUNXI_CCU_DIV
+	select SUNXI_CCU_NK
+	select SUNXI_CCU_NKM
+	select SUNXI_CCU_NM
+	select SUNXI_CCU_MP
+	select SUNXI_CCU_PHASE
+	default MACH_SUN6I
+
+config SUN8I_A23_CCU
+	bool "Support for the Allwinner A23 CCU"
+	select SUNXI_CCU_DIV
+	select SUNXI_CCU_MULT
+	select SUNXI_CCU_NK
+	select SUNXI_CCU_NKM
+	select SUNXI_CCU_NKMP
+	select SUNXI_CCU_NM
+	select SUNXI_CCU_MP
+	select SUNXI_CCU_PHASE
+	default MACH_SUN8I
+
+config SUN8I_A33_CCU
+	bool "Support for the Allwinner A33 CCU"
+	select SUNXI_CCU_DIV
+	select SUNXI_CCU_MULT
+	select SUNXI_CCU_NK
+	select SUNXI_CCU_NKM
+	select SUNXI_CCU_NKMP
+	select SUNXI_CCU_NM
+	select SUNXI_CCU_MP
+	select SUNXI_CCU_PHASE
+	default MACH_SUN8I
+
 config SUN8I_H3_CCU
 	bool "Support for the Allwinner H3 CCU"
 	select SUNXI_CCU_DIV
diff --git a/drivers/clk/sunxi-ng/Makefile b/drivers/clk/sunxi-ng/Makefile
index 633ce64..106cba2 100644
--- a/drivers/clk/sunxi-ng/Makefile
+++ b/drivers/clk/sunxi-ng/Makefile
@@ -7,6 +7,7 @@
 obj-$(CONFIG_SUNXI_CCU_FRAC)	+= ccu_frac.o
 obj-$(CONFIG_SUNXI_CCU_GATE)	+= ccu_gate.o
 obj-$(CONFIG_SUNXI_CCU_MUX)	+= ccu_mux.o
+obj-$(CONFIG_SUNXI_CCU_MULT)	+= ccu_mult.o
 obj-$(CONFIG_SUNXI_CCU_PHASE)	+= ccu_phase.o
 
 # Multi-factor clocks
@@ -17,4 +18,7 @@
 obj-$(CONFIG_SUNXI_CCU_MP)	+= ccu_mp.o
 
 # SoC support
+obj-$(CONFIG_SUN6I_A31_CCU)	+= ccu-sun6i-a31.o
+obj-$(CONFIG_SUN8I_A23_CCU)	+= ccu-sun8i-a23.o
+obj-$(CONFIG_SUN8I_A33_CCU)	+= ccu-sun8i-a33.o
 obj-$(CONFIG_SUN8I_H3_CCU)	+= ccu-sun8i-h3.o
diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.c b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
new file mode 100644
index 0000000..7959646
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.c
@@ -0,0 +1,1239 @@
+/*
+ * Copyright (c) 2016 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai <wens@csie.org>
+ *
+ * Based on ccu-sun8i-h3.c by Maxime Ripard.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_mult.h"
+#include "ccu_mux.h"
+#include "ccu_nk.h"
+#include "ccu_nkm.h"
+#include "ccu_nkmp.h"
+#include "ccu_nm.h"
+#include "ccu_phase.h"
+
+#include "ccu-sun6i-a31.h"
+
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_cpu_clk, "pll-cpu",
+				     "osc24M", 0x000,
+				     8, 5,	/* N */
+				     4, 2,	/* K */
+				     0, 2,	/* M */
+				     BIT(31),	/* gate */
+				     BIT(28),	/* lock */
+				     0);
+
+/*
+ * The Audio PLL is supposed to have 4 outputs: 3 fixed factors from
+ * the base (2x, 4x and 8x), and one variable divider (the one true
+ * pll audio).
+ *
+ * We don't have any need for the variable divider for now, so we just
+ * hardcode it to match with the clock names
+ */
+#define SUN6I_A31_PLL_AUDIO_REG	0x008
+
+static SUNXI_CCU_NM_WITH_GATE_LOCK(pll_audio_base_clk, "pll-audio-base",
+				   "osc24M", 0x008,
+				   8, 7,	/* N */
+				   0, 5,	/* M */
+				   BIT(31),	/* gate */
+				   BIT(28),	/* lock */
+				   CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_video0_clk, "pll-video0",
+					"osc24M", 0x010,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve",
+					"osc24M", 0x018,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr_clk, "pll-ddr",
+				    "osc24M", 0x020,
+				    8, 5,	/* N */
+				    4, 2,	/* K */
+				    0, 2,	/* M */
+				    BIT(31),	/* gate */
+				    BIT(28),	/* lock */
+				    CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph_clk, "pll-periph",
+					   "osc24M", 0x028,
+					   8, 5,	/* N */
+					   4, 2,	/* K */
+					   BIT(31),	/* gate */
+					   BIT(28),	/* lock */
+					   2,		/* post-div */
+					   CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_video1_clk, "pll-video1",
+					"osc24M", 0x030,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_gpu_clk, "pll-gpu",
+					"osc24M", 0x038,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+/*
+ * The MIPI PLL has 2 modes: "MIPI" and "HDMI".
+ *
+ * The MIPI mode is a standard NKM-style clock. The HDMI mode is an
+ * integer / fractional clock with switchable multipliers and dividers.
+ * This is not supported here. We hardcode the PLL to MIPI mode.
+ */
+#define SUN6I_A31_PLL_MIPI_REG	0x040
+
+static const char * const pll_mipi_parents[] = { "pll-video0", "pll-video1" };
+static SUNXI_CCU_NKM_WITH_MUX_GATE_LOCK(pll_mipi_clk, "pll-mipi",
+					pll_mipi_parents, 0x040,
+					8, 4,	/* N */
+					4, 2,	/* K */
+					0, 4,	/* M */
+					21, 0,	/* mux */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll9_clk, "pll9",
+					"osc24M", 0x044,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll10_clk, "pll10",
+					"osc24M", 0x048,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static const char * const cpux_parents[] = { "osc32k", "osc24M",
+					     "pll-cpu", "pll-cpu" };
+static SUNXI_CCU_MUX(cpu_clk, "cpu", cpux_parents,
+		     0x050, 16, 2, CLK_SET_RATE_PARENT | CLK_IS_CRITICAL);
+
+static struct clk_div_table axi_div_table[] = {
+	{ .val = 0, .div = 1 },
+	{ .val = 1, .div = 2 },
+	{ .val = 2, .div = 3 },
+	{ .val = 3, .div = 4 },
+	{ .val = 4, .div = 4 },
+	{ .val = 5, .div = 4 },
+	{ .val = 6, .div = 4 },
+	{ .val = 7, .div = 4 },
+	{ /* Sentinel */ },
+};
+
+static SUNXI_CCU_DIV_TABLE(axi_clk, "axi", "cpu",
+			   0x050, 0, 3, axi_div_table, 0);
+
+static const char * const ahb1_parents[] = { "osc32k", "osc24M",
+					     "axi", "pll-periph" };
+
+static struct ccu_div ahb1_clk = {
+	.div		= _SUNXI_CCU_DIV_FLAGS(4, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+	.mux		= {
+		.shift	= 12,
+		.width	= 2,
+
+		.variable_prediv	= {
+			.index	= 3,
+			.shift	= 6,
+			.width	= 2,
+		},
+	},
+
+	.common		= {
+		.reg		= 0x054,
+		.features	= CCU_FEATURE_VARIABLE_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("ahb1",
+						      ahb1_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct clk_div_table apb1_div_table[] = {
+	{ .val = 0, .div = 2 },
+	{ .val = 1, .div = 2 },
+	{ .val = 2, .div = 4 },
+	{ .val = 3, .div = 8 },
+	{ /* Sentinel */ },
+};
+
+static SUNXI_CCU_DIV_TABLE(apb1_clk, "apb1", "ahb1",
+			   0x054, 8, 2, apb1_div_table, 0);
+
+static const char * const apb2_parents[] = { "osc32k", "osc24M",
+					     "pll-periph", "pll-periph" };
+static SUNXI_CCU_MP_WITH_MUX(apb2_clk, "apb2", apb2_parents, 0x058,
+			     0, 5,	/* M */
+			     16, 2,	/* P */
+			     24, 2,	/* mux */
+			     0);
+
+static SUNXI_CCU_GATE(ahb1_mipidsi_clk,	"ahb1-mipidsi",	"ahb1",
+		      0x060, BIT(1), 0);
+static SUNXI_CCU_GATE(ahb1_ss_clk,	"ahb1-ss",	"ahb1",
+		      0x060, BIT(5), 0);
+static SUNXI_CCU_GATE(ahb1_dma_clk,	"ahb1-dma",	"ahb1",
+		      0x060, BIT(6), 0);
+static SUNXI_CCU_GATE(ahb1_mmc0_clk,	"ahb1-mmc0",	"ahb1",
+		      0x060, BIT(8), 0);
+static SUNXI_CCU_GATE(ahb1_mmc1_clk,	"ahb1-mmc1",	"ahb1",
+		      0x060, BIT(9), 0);
+static SUNXI_CCU_GATE(ahb1_mmc2_clk,	"ahb1-mmc2",	"ahb1",
+		      0x060, BIT(10), 0);
+static SUNXI_CCU_GATE(ahb1_mmc3_clk,	"ahb1-mmc3",	"ahb1",
+		      0x060, BIT(12), 0);
+static SUNXI_CCU_GATE(ahb1_nand1_clk,	"ahb1-nand1",	"ahb1",
+		      0x060, BIT(13), 0);
+static SUNXI_CCU_GATE(ahb1_nand0_clk,	"ahb1-nand0",	"ahb1",
+		      0x060, BIT(13), 0);
+static SUNXI_CCU_GATE(ahb1_sdram_clk,	"ahb1-sdram",	"ahb1",
+		      0x060, BIT(14), 0);
+static SUNXI_CCU_GATE(ahb1_emac_clk,	"ahb1-emac",	"ahb1",
+		      0x060, BIT(17), 0);
+static SUNXI_CCU_GATE(ahb1_ts_clk,	"ahb1-ts",	"ahb1",
+		      0x060, BIT(18), 0);
+static SUNXI_CCU_GATE(ahb1_hstimer_clk,	"ahb1-hstimer",	"ahb1",
+		      0x060, BIT(19), 0);
+static SUNXI_CCU_GATE(ahb1_spi0_clk,	"ahb1-spi0",	"ahb1",
+		      0x060, BIT(20), 0);
+static SUNXI_CCU_GATE(ahb1_spi1_clk,	"ahb1-spi1",	"ahb1",
+		      0x060, BIT(21), 0);
+static SUNXI_CCU_GATE(ahb1_spi2_clk,	"ahb1-spi2",	"ahb1",
+		      0x060, BIT(22), 0);
+static SUNXI_CCU_GATE(ahb1_spi3_clk,	"ahb1-spi3",	"ahb1",
+		      0x060, BIT(23), 0);
+static SUNXI_CCU_GATE(ahb1_otg_clk,	"ahb1-otg",	"ahb1",
+		      0x060, BIT(24), 0);
+static SUNXI_CCU_GATE(ahb1_ehci0_clk,	"ahb1-ehci0",	"ahb1",
+		      0x060, BIT(26), 0);
+static SUNXI_CCU_GATE(ahb1_ehci1_clk,	"ahb1-ehci1",	"ahb1",
+		      0x060, BIT(27), 0);
+static SUNXI_CCU_GATE(ahb1_ohci0_clk,	"ahb1-ohci0",	"ahb1",
+		      0x060, BIT(29), 0);
+static SUNXI_CCU_GATE(ahb1_ohci1_clk,	"ahb1-ohci1",	"ahb1",
+		      0x060, BIT(30), 0);
+static SUNXI_CCU_GATE(ahb1_ohci2_clk,	"ahb1-ohci2",	"ahb1",
+		      0x060, BIT(31), 0);
+
+static SUNXI_CCU_GATE(ahb1_ve_clk,	"ahb1-ve",	"ahb1",
+		      0x064, BIT(0), 0);
+static SUNXI_CCU_GATE(ahb1_lcd0_clk,	"ahb1-lcd0",	"ahb1",
+		      0x064, BIT(4), 0);
+static SUNXI_CCU_GATE(ahb1_lcd1_clk,	"ahb1-lcd1",	"ahb1",
+		      0x064, BIT(5), 0);
+static SUNXI_CCU_GATE(ahb1_csi_clk,	"ahb1-csi",	"ahb1",
+		      0x064, BIT(8), 0);
+static SUNXI_CCU_GATE(ahb1_hdmi_clk,	"ahb1-hdmi",	"ahb1",
+		      0x064, BIT(11), 0);
+static SUNXI_CCU_GATE(ahb1_be0_clk,	"ahb1-be0",	"ahb1",
+		      0x064, BIT(12), 0);
+static SUNXI_CCU_GATE(ahb1_be1_clk,	"ahb1-be1",	"ahb1",
+		      0x064, BIT(13), 0);
+static SUNXI_CCU_GATE(ahb1_fe0_clk,	"ahb1-fe0",	"ahb1",
+		      0x064, BIT(14), 0);
+static SUNXI_CCU_GATE(ahb1_fe1_clk,	"ahb1-fe1",	"ahb1",
+		      0x064, BIT(15), 0);
+static SUNXI_CCU_GATE(ahb1_mp_clk,	"ahb1-mp",	"ahb1",
+		      0x064, BIT(18), 0);
+static SUNXI_CCU_GATE(ahb1_gpu_clk,	"ahb1-gpu",	"ahb1",
+		      0x064, BIT(20), 0);
+static SUNXI_CCU_GATE(ahb1_deu0_clk,	"ahb1-deu0",	"ahb1",
+		      0x064, BIT(23), 0);
+static SUNXI_CCU_GATE(ahb1_deu1_clk,	"ahb1-deu1",	"ahb1",
+		      0x064, BIT(24), 0);
+static SUNXI_CCU_GATE(ahb1_drc0_clk,	"ahb1-drc0",	"ahb1",
+		      0x064, BIT(25), 0);
+static SUNXI_CCU_GATE(ahb1_drc1_clk,	"ahb1-drc1",	"ahb1",
+		      0x064, BIT(26), 0);
+
+static SUNXI_CCU_GATE(apb1_codec_clk,	"apb1-codec",	"apb1",
+		      0x068, BIT(0), 0);
+static SUNXI_CCU_GATE(apb1_spdif_clk,	"apb1-spdif",	"apb1",
+		      0x068, BIT(1), 0);
+static SUNXI_CCU_GATE(apb1_digital_mic_clk,	"apb1-digital-mic",	"apb1",
+		      0x068, BIT(4), 0);
+static SUNXI_CCU_GATE(apb1_pio_clk,	"apb1-pio",	"apb1",
+		      0x068, BIT(5), 0);
+static SUNXI_CCU_GATE(apb1_daudio0_clk,	"apb1-daudio0",	"apb1",
+		      0x068, BIT(12), 0);
+static SUNXI_CCU_GATE(apb1_daudio1_clk,	"apb1-daudio1",	"apb1",
+		      0x068, BIT(13), 0);
+
+static SUNXI_CCU_GATE(apb2_i2c0_clk,	"apb2-i2c0",	"apb2",
+		      0x06c, BIT(0), 0);
+static SUNXI_CCU_GATE(apb2_i2c1_clk,	"apb2-i2c1",	"apb2",
+		      0x06c, BIT(1), 0);
+static SUNXI_CCU_GATE(apb2_i2c2_clk,	"apb2-i2c2",	"apb2",
+		      0x06c, BIT(2), 0);
+static SUNXI_CCU_GATE(apb2_i2c3_clk,	"apb2-i2c3",	"apb2",
+		      0x06c, BIT(3), 0);
+static SUNXI_CCU_GATE(apb2_uart0_clk,	"apb2-uart0",	"apb2",
+		      0x06c, BIT(16), 0);
+static SUNXI_CCU_GATE(apb2_uart1_clk,	"apb2-uart1",	"apb2",
+		      0x06c, BIT(17), 0);
+static SUNXI_CCU_GATE(apb2_uart2_clk,	"apb2-uart2",	"apb2",
+		      0x06c, BIT(18), 0);
+static SUNXI_CCU_GATE(apb2_uart3_clk,	"apb2-uart3",	"apb2",
+		      0x06c, BIT(19), 0);
+static SUNXI_CCU_GATE(apb2_uart4_clk,	"apb2-uart4",	"apb2",
+		      0x06c, BIT(20), 0);
+static SUNXI_CCU_GATE(apb2_uart5_clk,	"apb2-uart5",	"apb2",
+		      0x06c, BIT(21), 0);
+
+static const char * const mod0_default_parents[] = { "osc24M", "pll-periph" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand0_clk, "nand0", mod0_default_parents,
+				  0x080,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand1_clk, "nand1", mod0_default_parents,
+				  0x084,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc0_clk, "mmc0", mod0_default_parents,
+				  0x088,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc0_sample_clk, "mmc0_sample", "mmc0",
+		       0x088, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc0_output_clk, "mmc0_output", "mmc0",
+		       0x088, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc1_clk, "mmc1", mod0_default_parents,
+				  0x08c,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc1_sample_clk, "mmc1_sample", "mmc1",
+		       0x08c, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc1_output_clk, "mmc1_output", "mmc1",
+		       0x08c, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc2_clk, "mmc2", mod0_default_parents,
+				  0x090,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc2_sample_clk, "mmc2_sample", "mmc2",
+		       0x090, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc2_output_clk, "mmc2_output", "mmc2",
+		       0x090, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc3_clk, "mmc3", mod0_default_parents,
+				  0x094,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc3_sample_clk, "mmc3_sample", "mmc3",
+		       0x094, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc3_output_clk, "mmc3_output", "mmc3",
+		       0x094, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(ts_clk, "ts", mod0_default_parents, 0x098,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(ss_clk, "ss", mod0_default_parents, 0x09c,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", mod0_default_parents, 0x0a0,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi2_clk, "spi2", mod0_default_parents, 0x0a8,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi3_clk, "spi3", mod0_default_parents, 0x0ac,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static const char * const daudio_parents[] = { "pll-audio-8x", "pll-audio-4x",
+					       "pll-audio-2x", "pll-audio" };
+static SUNXI_CCU_MUX_WITH_GATE(daudio0_clk, "daudio0", daudio_parents,
+			       0x0b0, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_MUX_WITH_GATE(daudio1_clk, "daudio1", daudio_parents,
+			       0x0b4, 16, 2, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_GATE(spdif_clk, "spdif", "pll-audio",
+			     0x0c0, 0, 4, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(usb_phy0_clk,	"usb-phy0",	"osc24M",
+		      0x0cc, BIT(8), 0);
+static SUNXI_CCU_GATE(usb_phy1_clk,	"usb-phy1",	"osc24M",
+		      0x0cc, BIT(9), 0);
+static SUNXI_CCU_GATE(usb_phy2_clk,	"usb-phy2",	"osc24M",
+		      0x0cc, BIT(10), 0);
+static SUNXI_CCU_GATE(usb_ohci0_clk,	"usb-ohci0",	"osc24M",
+		      0x0cc, BIT(16), 0);
+static SUNXI_CCU_GATE(usb_ohci1_clk,	"usb-ohci1",	"osc24M",
+		      0x0cc, BIT(17), 0);
+static SUNXI_CCU_GATE(usb_ohci2_clk,	"usb-ohci2",	"osc24M",
+		      0x0cc, BIT(18), 0);
+
+/* TODO emac clk not supported yet */
+
+static const char * const dram_parents[] = { "pll-ddr", "pll-periph" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(mdfs_clk, "mdfs", dram_parents, 0x0f0,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  CLK_IS_CRITICAL);
+
+static SUNXI_CCU_M_WITH_MUX(sdram0_clk, "sdram0", dram_parents,
+			    0x0f4, 0, 4, 4, 1, CLK_IS_CRITICAL);
+static SUNXI_CCU_M_WITH_MUX(sdram1_clk, "sdram1", dram_parents,
+			    0x0f4, 8, 4, 12, 1, CLK_IS_CRITICAL);
+
+static SUNXI_CCU_GATE(dram_ve_clk,	"dram-ve",	"mdfs",
+		      0x100, BIT(0), 0);
+static SUNXI_CCU_GATE(dram_csi_isp_clk,	"dram-csi-isp",	"mdfs",
+		      0x100, BIT(1), 0);
+static SUNXI_CCU_GATE(dram_ts_clk,	"dram-ts",	"mdfs",
+		      0x100, BIT(3), 0);
+static SUNXI_CCU_GATE(dram_drc0_clk,	"dram-drc0",	"mdfs",
+		      0x100, BIT(16), 0);
+static SUNXI_CCU_GATE(dram_drc1_clk,	"dram-drc1",	"mdfs",
+		      0x100, BIT(17), 0);
+static SUNXI_CCU_GATE(dram_deu0_clk,	"dram-deu0",	"mdfs",
+		      0x100, BIT(18), 0);
+static SUNXI_CCU_GATE(dram_deu1_clk,	"dram-deu1",	"mdfs",
+		      0x100, BIT(19), 0);
+static SUNXI_CCU_GATE(dram_fe0_clk,	"dram-fe0",	"mdfs",
+		      0x100, BIT(24), 0);
+static SUNXI_CCU_GATE(dram_fe1_clk,	"dram-fe1",	"mdfs",
+		      0x100, BIT(25), 0);
+static SUNXI_CCU_GATE(dram_be0_clk,	"dram-be0",	"mdfs",
+		      0x100, BIT(26), 0);
+static SUNXI_CCU_GATE(dram_be1_clk,	"dram-be1",	"mdfs",
+		      0x100, BIT(27), 0);
+static SUNXI_CCU_GATE(dram_mp_clk,	"dram-mp",	"mdfs",
+		      0x100, BIT(28), 0);
+
+static const char * const de_parents[] = { "pll-video0", "pll-video1",
+					   "pll-periph-2x", "pll-gpu",
+					   "pll9", "pll10" };
+static SUNXI_CCU_M_WITH_MUX_GATE(be0_clk, "be0", de_parents,
+				 0x104, 0, 4, 24, 3, BIT(31), 0);
+static SUNXI_CCU_M_WITH_MUX_GATE(be1_clk, "be1", de_parents,
+				 0x108, 0, 4, 24, 3, BIT(31), 0);
+static SUNXI_CCU_M_WITH_MUX_GATE(fe0_clk, "fe0", de_parents,
+				 0x10c, 0, 4, 24, 3, BIT(31), 0);
+static SUNXI_CCU_M_WITH_MUX_GATE(fe1_clk, "fe1", de_parents,
+				 0x110, 0, 4, 24, 3, BIT(31), 0);
+
+static const char * const mp_parents[] = { "pll-video0", "pll-video1",
+					   "pll9", "pll10" };
+static SUNXI_CCU_M_WITH_MUX_GATE(mp_clk, "mp", mp_parents,
+				 0x114, 0, 4, 24, 3, BIT(31), 0);
+
+static const char * const lcd_ch0_parents[] = { "pll-video0", "pll-video1",
+						"pll-video0-2x",
+						"pll-video1-2x", "pll-mipi" };
+static SUNXI_CCU_MUX_WITH_GATE(lcd0_ch0_clk, "lcd0-ch0", lcd_ch0_parents,
+			       0x118, 24, 2, BIT(31), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_MUX_WITH_GATE(lcd1_ch0_clk, "lcd1-ch0", lcd_ch0_parents,
+			       0x11c, 24, 2, BIT(31), CLK_SET_RATE_PARENT);
+
+static const char * const lcd_ch1_parents[] = { "pll-video0", "pll-video1",
+						"pll-video0-2x",
+						"pll-video1-2x" };
+static SUNXI_CCU_M_WITH_MUX_GATE(lcd0_ch1_clk, "lcd0-ch1", lcd_ch1_parents,
+				 0x12c, 0, 4, 24, 3, BIT(31),
+				 CLK_SET_RATE_PARENT);
+static SUNXI_CCU_M_WITH_MUX_GATE(lcd1_ch1_clk, "lcd1-ch1", lcd_ch1_parents,
+				 0x12c, 0, 4, 24, 3, BIT(31),
+				 CLK_SET_RATE_PARENT);
+
+static const char * const csi_sclk_parents[] = { "pll-video0", "pll-video1",
+						 "pll9", "pll10", "pll-mipi",
+						 "pll-ve" };
+static SUNXI_CCU_M_WITH_MUX_GATE(csi0_sclk_clk, "csi0-sclk", csi_sclk_parents,
+				 0x134, 16, 4, 24, 3, BIT(31), 0);
+
+static const char * const csi_mclk_parents[] = { "pll-video0", "pll-video1",
+						 "osc24M" };
+static const u8 csi_mclk_table[] = { 0, 1, 5 };
+static struct ccu_div csi0_mclk_clk = {
+	.enable		= BIT(15),
+	.div		= _SUNXI_CCU_DIV(0, 4),
+	.mux		= _SUNXI_CCU_MUX_TABLE(8, 3, csi_mclk_table),
+	.common		= {
+		.reg		= 0x134,
+		.hw.init	= CLK_HW_INIT_PARENTS("csi0-mclk",
+						      csi_mclk_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_div csi1_mclk_clk = {
+	.enable		= BIT(15),
+	.div		= _SUNXI_CCU_DIV(0, 4),
+	.mux		= _SUNXI_CCU_MUX_TABLE(8, 3, csi_mclk_table),
+	.common		= {
+		.reg		= 0x138,
+		.hw.init	= CLK_HW_INIT_PARENTS("csi1-mclk",
+						      csi_mclk_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve",
+			     0x13c, 16, 3, BIT(31), 0);
+
+static SUNXI_CCU_GATE(codec_clk,	"codec",	"pll-audio",
+		      0x140, BIT(31), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_GATE(avs_clk,		"avs",		"osc24M",
+		      0x144, BIT(31), 0);
+static SUNXI_CCU_GATE(digital_mic_clk,	"digital-mic",	"pll-audio",
+		      0x148, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(hdmi_clk, "hdmi", lcd_ch1_parents,
+				 0x150, 0, 4, 24, 2, BIT(31),
+				 CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(hdmi_ddc_clk, "hdmi-ddc", "osc24M", 0x150, BIT(31), 0);
+
+static SUNXI_CCU_GATE(ps_clk, "ps", "lcd1-ch1", 0x140, BIT(31), 0);
+
+static const char * const mbus_parents[] = { "osc24M", "pll-periph",
+					     "pll-ddr" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(mbus0_clk, "mbus0", mbus_parents, 0x15c,
+				  0, 3,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  CLK_IS_CRITICAL);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mbus1_clk, "mbus1", mbus_parents, 0x160,
+				  0, 3,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  CLK_IS_CRITICAL);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(mipi_dsi_clk, "mipi-dsi", lcd_ch1_parents,
+				 0x168, 16, 3, 24, 2, BIT(31),
+				 CLK_SET_RATE_PARENT);
+static SUNXI_CCU_M_WITH_MUX_GATE(mipi_dsi_dphy_clk, "mipi-dsi-dphy",
+				 lcd_ch1_parents, 0x168, 0, 3, 8, 2,
+				 BIT(15), CLK_SET_RATE_PARENT);
+static SUNXI_CCU_M_WITH_MUX_GATE(mipi_csi_dphy_clk, "mipi-csi-dphy",
+				 lcd_ch1_parents, 0x16c, 0, 3, 8, 2,
+				 BIT(15), 0);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(iep_drc0_clk, "iep-drc0", de_parents,
+				 0x180, 0, 3, 24, 2, BIT(31), 0);
+static SUNXI_CCU_M_WITH_MUX_GATE(iep_drc1_clk, "iep-drc1", de_parents,
+				 0x184, 0, 3, 24, 2, BIT(31), 0);
+static SUNXI_CCU_M_WITH_MUX_GATE(iep_deu0_clk, "iep-deu0", de_parents,
+				 0x188, 0, 3, 24, 2, BIT(31), 0);
+static SUNXI_CCU_M_WITH_MUX_GATE(iep_deu1_clk, "iep-deu1", de_parents,
+				 0x18c, 0, 3, 24, 2, BIT(31), 0);
+
+static const char * const gpu_parents[] = { "pll-gpu", "pll-periph-2x",
+					    "pll-video0", "pll-video1",
+					    "pll9", "pll10" };
+static const struct ccu_mux_fixed_prediv gpu_predivs[] = {
+	{ .index = 1, .div = 3, },
+};
+
+static struct ccu_div gpu_core_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV(0, 3),
+	.mux		= {
+		.shift		= 24,
+		.width		= 3,
+		.fixed_predivs	= gpu_predivs,
+		.n_predivs	= ARRAY_SIZE(gpu_predivs),
+	},
+	.common		= {
+		.reg		= 0x1a0,
+		.features	= CCU_FEATURE_FIXED_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("gpu-core",
+						      gpu_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_div gpu_memory_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV(0, 3),
+	.mux		= {
+		.shift		= 24,
+		.width		= 3,
+		.fixed_predivs	= gpu_predivs,
+		.n_predivs	= ARRAY_SIZE(gpu_predivs),
+	},
+	.common		= {
+		.reg		= 0x1a4,
+		.features	= CCU_FEATURE_FIXED_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("gpu-memory",
+						      gpu_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_div gpu_hyd_clk = {
+	.enable		= BIT(31),
+	.div		= _SUNXI_CCU_DIV(0, 3),
+	.mux		= {
+		.shift		= 24,
+		.width		= 3,
+		.fixed_predivs	= gpu_predivs,
+		.n_predivs	= ARRAY_SIZE(gpu_predivs),
+	},
+	.common		= {
+		.reg		= 0x1a8,
+		.features	= CCU_FEATURE_FIXED_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("gpu-hyd",
+						      gpu_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static SUNXI_CCU_M_WITH_MUX_GATE(ats_clk, "ats", mod0_default_parents, 0x1b0,
+				 0, 3,		/* M */
+				 24, 2,		/* mux */
+				 BIT(31),	/* gate */
+				 0);
+
+static SUNXI_CCU_M_WITH_MUX_GATE(trace_clk, "trace", mod0_default_parents,
+				 0x1b0,
+				 0, 3,		/* M */
+				 24, 2,		/* mux */
+				 BIT(31),	/* gate */
+				 0);
+
+static const char * const clk_out_parents[] = { "osc24M", "osc32k", "osc24M",
+						"axi", "ahb1" };
+static const u8 clk_out_table[] = { 0, 1, 2, 11, 13 };
+
+static const struct ccu_mux_fixed_prediv clk_out_predivs[] = {
+	{ .index = 0, .div = 750, },
+	{ .index = 3, .div = 4, },
+	{ .index = 4, .div = 4, },
+};
+
+static struct ccu_mp out_a_clk = {
+	.enable		= BIT(31),
+	.m		= _SUNXI_CCU_DIV(8, 5),
+	.p		= _SUNXI_CCU_DIV(20, 2),
+	.mux		= {
+		.shift		= 24,
+		.width		= 4,
+		.table		= clk_out_table,
+		.fixed_predivs	= clk_out_predivs,
+		.n_predivs	= ARRAY_SIZE(clk_out_predivs),
+	},
+	.common		= {
+		.reg		= 0x300,
+		.features	= CCU_FEATURE_FIXED_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("out-a",
+						      clk_out_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_mp out_b_clk = {
+	.enable		= BIT(31),
+	.m		= _SUNXI_CCU_DIV(8, 5),
+	.p		= _SUNXI_CCU_DIV(20, 2),
+	.mux		= {
+		.shift		= 24,
+		.width		= 4,
+		.table		= clk_out_table,
+		.fixed_predivs	= clk_out_predivs,
+		.n_predivs	= ARRAY_SIZE(clk_out_predivs),
+	},
+	.common		= {
+		.reg		= 0x304,
+		.features	= CCU_FEATURE_FIXED_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("out-b",
+						      clk_out_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_mp out_c_clk = {
+	.enable		= BIT(31),
+	.m		= _SUNXI_CCU_DIV(8, 5),
+	.p		= _SUNXI_CCU_DIV(20, 2),
+	.mux		= {
+		.shift		= 24,
+		.width		= 4,
+		.table		= clk_out_table,
+		.fixed_predivs	= clk_out_predivs,
+		.n_predivs	= ARRAY_SIZE(clk_out_predivs),
+	},
+	.common		= {
+		.reg		= 0x308,
+		.features	= CCU_FEATURE_FIXED_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("out-c",
+						      clk_out_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct ccu_common *sun6i_a31_ccu_clks[] = {
+	&pll_cpu_clk.common,
+	&pll_audio_base_clk.common,
+	&pll_video0_clk.common,
+	&pll_ve_clk.common,
+	&pll_ddr_clk.common,
+	&pll_periph_clk.common,
+	&pll_video1_clk.common,
+	&pll_gpu_clk.common,
+	&pll_mipi_clk.common,
+	&pll9_clk.common,
+	&pll10_clk.common,
+	&cpu_clk.common,
+	&axi_clk.common,
+	&ahb1_clk.common,
+	&apb1_clk.common,
+	&apb2_clk.common,
+	&ahb1_mipidsi_clk.common,
+	&ahb1_ss_clk.common,
+	&ahb1_dma_clk.common,
+	&ahb1_mmc0_clk.common,
+	&ahb1_mmc1_clk.common,
+	&ahb1_mmc2_clk.common,
+	&ahb1_mmc3_clk.common,
+	&ahb1_nand1_clk.common,
+	&ahb1_nand0_clk.common,
+	&ahb1_sdram_clk.common,
+	&ahb1_emac_clk.common,
+	&ahb1_ts_clk.common,
+	&ahb1_hstimer_clk.common,
+	&ahb1_spi0_clk.common,
+	&ahb1_spi1_clk.common,
+	&ahb1_spi2_clk.common,
+	&ahb1_spi3_clk.common,
+	&ahb1_otg_clk.common,
+	&ahb1_ehci0_clk.common,
+	&ahb1_ehci1_clk.common,
+	&ahb1_ohci0_clk.common,
+	&ahb1_ohci1_clk.common,
+	&ahb1_ohci2_clk.common,
+	&ahb1_ve_clk.common,
+	&ahb1_lcd0_clk.common,
+	&ahb1_lcd1_clk.common,
+	&ahb1_csi_clk.common,
+	&ahb1_hdmi_clk.common,
+	&ahb1_be0_clk.common,
+	&ahb1_be1_clk.common,
+	&ahb1_fe0_clk.common,
+	&ahb1_fe1_clk.common,
+	&ahb1_mp_clk.common,
+	&ahb1_gpu_clk.common,
+	&ahb1_deu0_clk.common,
+	&ahb1_deu1_clk.common,
+	&ahb1_drc0_clk.common,
+	&ahb1_drc1_clk.common,
+	&apb1_codec_clk.common,
+	&apb1_spdif_clk.common,
+	&apb1_digital_mic_clk.common,
+	&apb1_pio_clk.common,
+	&apb1_daudio0_clk.common,
+	&apb1_daudio1_clk.common,
+	&apb2_i2c0_clk.common,
+	&apb2_i2c1_clk.common,
+	&apb2_i2c2_clk.common,
+	&apb2_i2c3_clk.common,
+	&apb2_uart0_clk.common,
+	&apb2_uart1_clk.common,
+	&apb2_uart2_clk.common,
+	&apb2_uart3_clk.common,
+	&apb2_uart4_clk.common,
+	&apb2_uart5_clk.common,
+	&nand0_clk.common,
+	&nand1_clk.common,
+	&mmc0_clk.common,
+	&mmc0_sample_clk.common,
+	&mmc0_output_clk.common,
+	&mmc1_clk.common,
+	&mmc1_sample_clk.common,
+	&mmc1_output_clk.common,
+	&mmc2_clk.common,
+	&mmc2_sample_clk.common,
+	&mmc2_output_clk.common,
+	&mmc3_clk.common,
+	&mmc3_sample_clk.common,
+	&mmc3_output_clk.common,
+	&ts_clk.common,
+	&ss_clk.common,
+	&spi0_clk.common,
+	&spi1_clk.common,
+	&spi2_clk.common,
+	&spi3_clk.common,
+	&daudio0_clk.common,
+	&daudio1_clk.common,
+	&spdif_clk.common,
+	&usb_phy0_clk.common,
+	&usb_phy1_clk.common,
+	&usb_phy2_clk.common,
+	&usb_ohci0_clk.common,
+	&usb_ohci1_clk.common,
+	&usb_ohci2_clk.common,
+	&mdfs_clk.common,
+	&sdram0_clk.common,
+	&sdram1_clk.common,
+	&dram_ve_clk.common,
+	&dram_csi_isp_clk.common,
+	&dram_ts_clk.common,
+	&dram_drc0_clk.common,
+	&dram_drc1_clk.common,
+	&dram_deu0_clk.common,
+	&dram_deu1_clk.common,
+	&dram_fe0_clk.common,
+	&dram_fe1_clk.common,
+	&dram_be0_clk.common,
+	&dram_be1_clk.common,
+	&dram_mp_clk.common,
+	&be0_clk.common,
+	&be1_clk.common,
+	&fe0_clk.common,
+	&fe1_clk.common,
+	&mp_clk.common,
+	&lcd0_ch0_clk.common,
+	&lcd1_ch0_clk.common,
+	&lcd0_ch1_clk.common,
+	&lcd1_ch1_clk.common,
+	&csi0_sclk_clk.common,
+	&csi0_mclk_clk.common,
+	&csi1_mclk_clk.common,
+	&ve_clk.common,
+	&codec_clk.common,
+	&avs_clk.common,
+	&digital_mic_clk.common,
+	&hdmi_clk.common,
+	&hdmi_ddc_clk.common,
+	&ps_clk.common,
+	&mbus0_clk.common,
+	&mbus1_clk.common,
+	&mipi_dsi_clk.common,
+	&mipi_dsi_dphy_clk.common,
+	&mipi_csi_dphy_clk.common,
+	&iep_drc0_clk.common,
+	&iep_drc1_clk.common,
+	&iep_deu0_clk.common,
+	&iep_deu1_clk.common,
+	&gpu_core_clk.common,
+	&gpu_memory_clk.common,
+	&gpu_hyd_clk.common,
+	&ats_clk.common,
+	&trace_clk.common,
+	&out_a_clk.common,
+	&out_b_clk.common,
+	&out_c_clk.common,
+};
+
+/* We hardcode the divider to 4 for now */
+static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
+			"pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_2x_clk, "pll-audio-2x",
+			"pll-audio-base", 2, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_4x_clk, "pll-audio-4x",
+			"pll-audio-base", 1, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_8x_clk, "pll-audio-8x",
+			"pll-audio-base", 1, 2, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_periph_2x_clk, "pll-periph-2x",
+			"pll-periph", 1, 2, 0);
+static CLK_FIXED_FACTOR(pll_video0_2x_clk, "pll-video0-2x",
+			"pll-video0", 1, 2, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_video1_2x_clk, "pll-video1-2x",
+			"pll-video1", 1, 2, CLK_SET_RATE_PARENT);
+
+static struct clk_hw_onecell_data sun6i_a31_hw_clks = {
+	.hws	= {
+		[CLK_PLL_CPU]		= &pll_cpu_clk.common.hw,
+		[CLK_PLL_AUDIO_BASE]	= &pll_audio_base_clk.common.hw,
+		[CLK_PLL_AUDIO]		= &pll_audio_clk.hw,
+		[CLK_PLL_AUDIO_2X]	= &pll_audio_2x_clk.hw,
+		[CLK_PLL_AUDIO_4X]	= &pll_audio_4x_clk.hw,
+		[CLK_PLL_AUDIO_8X]	= &pll_audio_8x_clk.hw,
+		[CLK_PLL_VIDEO0]	= &pll_video0_clk.common.hw,
+		[CLK_PLL_VIDEO0_2X]	= &pll_video0_2x_clk.hw,
+		[CLK_PLL_VE]		= &pll_ve_clk.common.hw,
+		[CLK_PLL_DDR]		= &pll_ddr_clk.common.hw,
+		[CLK_PLL_PERIPH]	= &pll_periph_clk.common.hw,
+		[CLK_PLL_PERIPH_2X]	= &pll_periph_2x_clk.hw,
+		[CLK_PLL_VIDEO1]	= &pll_video1_clk.common.hw,
+		[CLK_PLL_VIDEO1_2X]	= &pll_video1_2x_clk.hw,
+		[CLK_PLL_GPU]		= &pll_gpu_clk.common.hw,
+		[CLK_PLL_MIPI]		= &pll_mipi_clk.common.hw,
+		[CLK_PLL9]		= &pll9_clk.common.hw,
+		[CLK_PLL10]		= &pll10_clk.common.hw,
+		[CLK_CPU]		= &cpu_clk.common.hw,
+		[CLK_AXI]		= &axi_clk.common.hw,
+		[CLK_AHB1]		= &ahb1_clk.common.hw,
+		[CLK_APB1]		= &apb1_clk.common.hw,
+		[CLK_APB2]		= &apb2_clk.common.hw,
+		[CLK_AHB1_MIPIDSI]	= &ahb1_mipidsi_clk.common.hw,
+		[CLK_AHB1_SS]		= &ahb1_ss_clk.common.hw,
+		[CLK_AHB1_DMA]		= &ahb1_dma_clk.common.hw,
+		[CLK_AHB1_MMC0]		= &ahb1_mmc0_clk.common.hw,
+		[CLK_AHB1_MMC1]		= &ahb1_mmc1_clk.common.hw,
+		[CLK_AHB1_MMC2]		= &ahb1_mmc2_clk.common.hw,
+		[CLK_AHB1_MMC3]		= &ahb1_mmc3_clk.common.hw,
+		[CLK_AHB1_NAND1]	= &ahb1_nand1_clk.common.hw,
+		[CLK_AHB1_NAND0]	= &ahb1_nand0_clk.common.hw,
+		[CLK_AHB1_SDRAM]	= &ahb1_sdram_clk.common.hw,
+		[CLK_AHB1_EMAC]		= &ahb1_emac_clk.common.hw,
+		[CLK_AHB1_TS]		= &ahb1_ts_clk.common.hw,
+		[CLK_AHB1_HSTIMER]	= &ahb1_hstimer_clk.common.hw,
+		[CLK_AHB1_SPI0]		= &ahb1_spi0_clk.common.hw,
+		[CLK_AHB1_SPI1]		= &ahb1_spi1_clk.common.hw,
+		[CLK_AHB1_SPI2]		= &ahb1_spi2_clk.common.hw,
+		[CLK_AHB1_SPI3]		= &ahb1_spi3_clk.common.hw,
+		[CLK_AHB1_OTG]		= &ahb1_otg_clk.common.hw,
+		[CLK_AHB1_EHCI0]	= &ahb1_ehci0_clk.common.hw,
+		[CLK_AHB1_EHCI1]	= &ahb1_ehci1_clk.common.hw,
+		[CLK_AHB1_OHCI0]	= &ahb1_ohci0_clk.common.hw,
+		[CLK_AHB1_OHCI1]	= &ahb1_ohci1_clk.common.hw,
+		[CLK_AHB1_OHCI2]	= &ahb1_ohci2_clk.common.hw,
+		[CLK_AHB1_VE]		= &ahb1_ve_clk.common.hw,
+		[CLK_AHB1_LCD0]		= &ahb1_lcd0_clk.common.hw,
+		[CLK_AHB1_LCD1]		= &ahb1_lcd1_clk.common.hw,
+		[CLK_AHB1_CSI]		= &ahb1_csi_clk.common.hw,
+		[CLK_AHB1_HDMI]		= &ahb1_hdmi_clk.common.hw,
+		[CLK_AHB1_BE0]		= &ahb1_be0_clk.common.hw,
+		[CLK_AHB1_BE1]		= &ahb1_be1_clk.common.hw,
+		[CLK_AHB1_FE0]		= &ahb1_fe0_clk.common.hw,
+		[CLK_AHB1_FE1]		= &ahb1_fe1_clk.common.hw,
+		[CLK_AHB1_MP]		= &ahb1_mp_clk.common.hw,
+		[CLK_AHB1_GPU]		= &ahb1_gpu_clk.common.hw,
+		[CLK_AHB1_DEU0]		= &ahb1_deu0_clk.common.hw,
+		[CLK_AHB1_DEU1]		= &ahb1_deu1_clk.common.hw,
+		[CLK_AHB1_DRC0]		= &ahb1_drc0_clk.common.hw,
+		[CLK_AHB1_DRC1]		= &ahb1_drc1_clk.common.hw,
+		[CLK_APB1_CODEC]	= &apb1_codec_clk.common.hw,
+		[CLK_APB1_SPDIF]	= &apb1_spdif_clk.common.hw,
+		[CLK_APB1_DIGITAL_MIC]	= &apb1_digital_mic_clk.common.hw,
+		[CLK_APB1_PIO]		= &apb1_pio_clk.common.hw,
+		[CLK_APB1_DAUDIO0]	= &apb1_daudio0_clk.common.hw,
+		[CLK_APB1_DAUDIO1]	= &apb1_daudio1_clk.common.hw,
+		[CLK_APB2_I2C0]		= &apb2_i2c0_clk.common.hw,
+		[CLK_APB2_I2C1]		= &apb2_i2c1_clk.common.hw,
+		[CLK_APB2_I2C2]		= &apb2_i2c2_clk.common.hw,
+		[CLK_APB2_I2C3]		= &apb2_i2c3_clk.common.hw,
+		[CLK_APB2_UART0]	= &apb2_uart0_clk.common.hw,
+		[CLK_APB2_UART1]	= &apb2_uart1_clk.common.hw,
+		[CLK_APB2_UART2]	= &apb2_uart2_clk.common.hw,
+		[CLK_APB2_UART3]	= &apb2_uart3_clk.common.hw,
+		[CLK_APB2_UART4]	= &apb2_uart4_clk.common.hw,
+		[CLK_APB2_UART5]	= &apb2_uart5_clk.common.hw,
+		[CLK_NAND0]		= &nand0_clk.common.hw,
+		[CLK_NAND1]		= &nand1_clk.common.hw,
+		[CLK_MMC0]		= &mmc0_clk.common.hw,
+		[CLK_MMC0_SAMPLE]	= &mmc0_sample_clk.common.hw,
+		[CLK_MMC0_OUTPUT]	= &mmc0_output_clk.common.hw,
+		[CLK_MMC1]		= &mmc1_clk.common.hw,
+		[CLK_MMC1_SAMPLE]	= &mmc1_sample_clk.common.hw,
+		[CLK_MMC1_OUTPUT]	= &mmc1_output_clk.common.hw,
+		[CLK_MMC2]		= &mmc2_clk.common.hw,
+		[CLK_MMC2_SAMPLE]	= &mmc2_sample_clk.common.hw,
+		[CLK_MMC2_OUTPUT]	= &mmc2_output_clk.common.hw,
+		[CLK_MMC3]		= &mmc3_clk.common.hw,
+		[CLK_MMC3_SAMPLE]	= &mmc3_sample_clk.common.hw,
+		[CLK_MMC3_OUTPUT]	= &mmc3_output_clk.common.hw,
+		[CLK_TS]		= &ts_clk.common.hw,
+		[CLK_SS]		= &ss_clk.common.hw,
+		[CLK_SPI0]		= &spi0_clk.common.hw,
+		[CLK_SPI1]		= &spi1_clk.common.hw,
+		[CLK_SPI2]		= &spi2_clk.common.hw,
+		[CLK_SPI3]		= &spi3_clk.common.hw,
+		[CLK_DAUDIO0]		= &daudio0_clk.common.hw,
+		[CLK_DAUDIO1]		= &daudio1_clk.common.hw,
+		[CLK_SPDIF]		= &spdif_clk.common.hw,
+		[CLK_USB_PHY0]		= &usb_phy0_clk.common.hw,
+		[CLK_USB_PHY1]		= &usb_phy1_clk.common.hw,
+		[CLK_USB_PHY2]		= &usb_phy2_clk.common.hw,
+		[CLK_USB_OHCI0]		= &usb_ohci0_clk.common.hw,
+		[CLK_USB_OHCI1]		= &usb_ohci1_clk.common.hw,
+		[CLK_USB_OHCI2]		= &usb_ohci2_clk.common.hw,
+		[CLK_MDFS]		= &mdfs_clk.common.hw,
+		[CLK_SDRAM0]		= &sdram0_clk.common.hw,
+		[CLK_SDRAM1]		= &sdram1_clk.common.hw,
+		[CLK_DRAM_VE]		= &dram_ve_clk.common.hw,
+		[CLK_DRAM_CSI_ISP]	= &dram_csi_isp_clk.common.hw,
+		[CLK_DRAM_TS]		= &dram_ts_clk.common.hw,
+		[CLK_DRAM_DRC0]		= &dram_drc0_clk.common.hw,
+		[CLK_DRAM_DRC1]		= &dram_drc1_clk.common.hw,
+		[CLK_DRAM_DEU0]		= &dram_deu0_clk.common.hw,
+		[CLK_DRAM_DEU1]		= &dram_deu1_clk.common.hw,
+		[CLK_DRAM_FE0]		= &dram_fe0_clk.common.hw,
+		[CLK_DRAM_FE1]		= &dram_fe1_clk.common.hw,
+		[CLK_DRAM_BE0]		= &dram_be0_clk.common.hw,
+		[CLK_DRAM_BE1]		= &dram_be1_clk.common.hw,
+		[CLK_DRAM_MP]		= &dram_mp_clk.common.hw,
+		[CLK_BE0]		= &be0_clk.common.hw,
+		[CLK_BE1]		= &be1_clk.common.hw,
+		[CLK_FE0]		= &fe0_clk.common.hw,
+		[CLK_FE1]		= &fe1_clk.common.hw,
+		[CLK_MP]		= &mp_clk.common.hw,
+		[CLK_LCD0_CH0]		= &lcd0_ch0_clk.common.hw,
+		[CLK_LCD1_CH0]		= &lcd1_ch0_clk.common.hw,
+		[CLK_LCD0_CH1]		= &lcd0_ch1_clk.common.hw,
+		[CLK_LCD1_CH1]		= &lcd1_ch1_clk.common.hw,
+		[CLK_CSI0_SCLK]		= &csi0_sclk_clk.common.hw,
+		[CLK_CSI0_MCLK]		= &csi0_mclk_clk.common.hw,
+		[CLK_CSI1_MCLK]		= &csi1_mclk_clk.common.hw,
+		[CLK_VE]		= &ve_clk.common.hw,
+		[CLK_CODEC]		= &codec_clk.common.hw,
+		[CLK_AVS]		= &avs_clk.common.hw,
+		[CLK_DIGITAL_MIC]	= &digital_mic_clk.common.hw,
+		[CLK_HDMI]		= &hdmi_clk.common.hw,
+		[CLK_HDMI_DDC]		= &hdmi_ddc_clk.common.hw,
+		[CLK_PS]		= &ps_clk.common.hw,
+		[CLK_MBUS0]		= &mbus0_clk.common.hw,
+		[CLK_MBUS1]		= &mbus1_clk.common.hw,
+		[CLK_MIPI_DSI]		= &mipi_dsi_clk.common.hw,
+		[CLK_MIPI_DSI_DPHY]	= &mipi_dsi_dphy_clk.common.hw,
+		[CLK_MIPI_CSI_DPHY]	= &mipi_csi_dphy_clk.common.hw,
+		[CLK_IEP_DRC0]		= &iep_drc0_clk.common.hw,
+		[CLK_IEP_DRC1]		= &iep_drc1_clk.common.hw,
+		[CLK_IEP_DEU0]		= &iep_deu0_clk.common.hw,
+		[CLK_IEP_DEU1]		= &iep_deu1_clk.common.hw,
+		[CLK_GPU_CORE]		= &gpu_core_clk.common.hw,
+		[CLK_GPU_MEMORY]	= &gpu_memory_clk.common.hw,
+		[CLK_GPU_HYD]		= &gpu_hyd_clk.common.hw,
+		[CLK_ATS]		= &ats_clk.common.hw,
+		[CLK_TRACE]		= &trace_clk.common.hw,
+		[CLK_OUT_A]		= &out_a_clk.common.hw,
+		[CLK_OUT_B]		= &out_b_clk.common.hw,
+		[CLK_OUT_C]		= &out_c_clk.common.hw,
+	},
+	.num	= CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun6i_a31_ccu_resets[] = {
+	[RST_USB_PHY0]		= { 0x0cc, BIT(0) },
+	[RST_USB_PHY1]		= { 0x0cc, BIT(1) },
+	[RST_USB_PHY2]		= { 0x0cc, BIT(2) },
+
+	[RST_AHB1_MIPI_DSI]	= { 0x2c0, BIT(1) },
+	[RST_AHB1_SS]		= { 0x2c0, BIT(5) },
+	[RST_AHB1_DMA]		= { 0x2c0, BIT(6) },
+	[RST_AHB1_MMC0]		= { 0x2c0, BIT(8) },
+	[RST_AHB1_MMC1]		= { 0x2c0, BIT(9) },
+	[RST_AHB1_MMC2]		= { 0x2c0, BIT(10) },
+	[RST_AHB1_MMC3]		= { 0x2c0, BIT(11) },
+	[RST_AHB1_NAND1]	= { 0x2c0, BIT(12) },
+	[RST_AHB1_NAND0]	= { 0x2c0, BIT(13) },
+	[RST_AHB1_SDRAM]	= { 0x2c0, BIT(14) },
+	[RST_AHB1_EMAC]		= { 0x2c0, BIT(17) },
+	[RST_AHB1_TS]		= { 0x2c0, BIT(18) },
+	[RST_AHB1_HSTIMER]	= { 0x2c0, BIT(19) },
+	[RST_AHB1_SPI0]		= { 0x2c0, BIT(20) },
+	[RST_AHB1_SPI1]		= { 0x2c0, BIT(21) },
+	[RST_AHB1_SPI2]		= { 0x2c0, BIT(22) },
+	[RST_AHB1_SPI3]		= { 0x2c0, BIT(23) },
+	[RST_AHB1_OTG]		= { 0x2c0, BIT(24) },
+	[RST_AHB1_EHCI0]	= { 0x2c0, BIT(26) },
+	[RST_AHB1_EHCI1]	= { 0x2c0, BIT(27) },
+	[RST_AHB1_OHCI0]	= { 0x2c0, BIT(29) },
+	[RST_AHB1_OHCI1]	= { 0x2c0, BIT(30) },
+	[RST_AHB1_OHCI2]	= { 0x2c0, BIT(31) },
+
+	[RST_AHB1_VE]		= { 0x2c4, BIT(0) },
+	[RST_AHB1_LCD0]		= { 0x2c4, BIT(4) },
+	[RST_AHB1_LCD1]		= { 0x2c4, BIT(5) },
+	[RST_AHB1_CSI]		= { 0x2c4, BIT(8) },
+	[RST_AHB1_HDMI]		= { 0x2c4, BIT(11) },
+	[RST_AHB1_BE0]		= { 0x2c4, BIT(12) },
+	[RST_AHB1_BE1]		= { 0x2c4, BIT(13) },
+	[RST_AHB1_FE0]		= { 0x2c4, BIT(14) },
+	[RST_AHB1_FE1]		= { 0x2c4, BIT(15) },
+	[RST_AHB1_MP]		= { 0x2c4, BIT(18) },
+	[RST_AHB1_GPU]		= { 0x2c4, BIT(20) },
+	[RST_AHB1_DEU0]		= { 0x2c4, BIT(23) },
+	[RST_AHB1_DEU1]		= { 0x2c4, BIT(24) },
+	[RST_AHB1_DRC0]		= { 0x2c4, BIT(25) },
+	[RST_AHB1_DRC1]		= { 0x2c4, BIT(26) },
+	[RST_AHB1_LVDS]		= { 0x2c8, BIT(0) },
+
+	[RST_APB1_CODEC]	= { 0x2d0, BIT(0) },
+	[RST_APB1_SPDIF]	= { 0x2d0, BIT(1) },
+	[RST_APB1_DIGITAL_MIC]	= { 0x2d0, BIT(4) },
+	[RST_APB1_DAUDIO0]	= { 0x2d0, BIT(12) },
+	[RST_APB1_DAUDIO1]	= { 0x2d0, BIT(13) },
+
+	[RST_APB2_I2C0]		= { 0x2d8, BIT(0) },
+	[RST_APB2_I2C1]		= { 0x2d8, BIT(1) },
+	[RST_APB2_I2C2]		= { 0x2d8, BIT(2) },
+	[RST_APB2_I2C3]		= { 0x2d8, BIT(3) },
+	[RST_APB2_UART0]	= { 0x2d8, BIT(16) },
+	[RST_APB2_UART1]	= { 0x2d8, BIT(17) },
+	[RST_APB2_UART2]	= { 0x2d8, BIT(18) },
+	[RST_APB2_UART3]	= { 0x2d8, BIT(19) },
+	[RST_APB2_UART4]	= { 0x2d8, BIT(20) },
+	[RST_APB2_UART5]	= { 0x2d8, BIT(21) },
+};
+
+static const struct sunxi_ccu_desc sun6i_a31_ccu_desc = {
+	.ccu_clks	= sun6i_a31_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun6i_a31_ccu_clks),
+
+	.hw_clks	= &sun6i_a31_hw_clks,
+
+	.resets		= sun6i_a31_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun6i_a31_ccu_resets),
+};
+
+static struct ccu_mux_nb sun6i_a31_cpu_nb = {
+	.common		= &cpu_clk.common,
+	.cm		= &cpu_clk.mux,
+	.delay_us	= 1, /* > 8 clock cycles at 24 MHz */
+	.bypass_index	= 1, /* index of 24 MHz oscillator */
+};
+
+static void __init sun6i_a31_ccu_setup(struct device_node *node)
+{
+	void __iomem *reg;
+	u32 val;
+
+	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+	if (IS_ERR(reg)) {
+		pr_err("%s: Could not map the clock registers\n",
+		       of_node_full_name(node));
+		return;
+	}
+
+	/* Force the PLL-Audio-1x divider to 4 */
+	val = readl(reg + SUN6I_A31_PLL_AUDIO_REG);
+	val &= ~GENMASK(19, 16);
+	writel(val | (3 << 16), reg + SUN6I_A31_PLL_AUDIO_REG);
+
+	/* Force PLL-MIPI to MIPI mode */
+	val = readl(reg + SUN6I_A31_PLL_MIPI_REG);
+	val &= BIT(16);
+	writel(val, reg + SUN6I_A31_PLL_MIPI_REG);
+
+	sunxi_ccu_probe(node, reg, &sun6i_a31_ccu_desc);
+
+	ccu_mux_notifier_register(pll_cpu_clk.common.hw.clk,
+				  &sun6i_a31_cpu_nb);
+}
+CLK_OF_DECLARE(sun6i_a31_ccu, "allwinner,sun6i-a31-ccu",
+	       sun6i_a31_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun6i-a31.h b/drivers/clk/sunxi-ng/ccu-sun6i-a31.h
new file mode 100644
index 0000000..4e43401
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun6i-a31.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2016 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai <wens@csie.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN6I_A31_H_
+#define _CCU_SUN6I_A31_H_
+
+#include <dt-bindings/clock/sun6i-a31-ccu.h>
+#include <dt-bindings/reset/sun6i-a31-ccu.h>
+
+#define CLK_PLL_CPU		0
+#define CLK_PLL_AUDIO_BASE	1
+#define CLK_PLL_AUDIO		2
+#define CLK_PLL_AUDIO_2X	3
+#define CLK_PLL_AUDIO_4X	4
+#define CLK_PLL_AUDIO_8X	5
+#define CLK_PLL_VIDEO0		6
+#define CLK_PLL_VIDEO0_2X	7
+#define CLK_PLL_VE		8
+#define CLK_PLL_DDR		9
+
+/* The PLL_PERIPH clock is exported */
+
+#define CLK_PLL_PERIPH_2X	11
+#define CLK_PLL_VIDEO1		12
+#define CLK_PLL_VIDEO1_2X	13
+#define CLK_PLL_GPU		14
+#define CLK_PLL_MIPI		15
+#define CLK_PLL9		16
+#define CLK_PLL10		17
+
+/* The CPUX clock is exported */
+
+#define CLK_AXI			19
+#define CLK_AHB1		20
+#define CLK_APB1		21
+#define CLK_APB2		22
+
+/* All the bus gates are exported */
+
+/* The first bunch of module clocks are exported */
+
+/* EMAC clock is not implemented */
+
+#define CLK_MDFS		107
+#define CLK_SDRAM0		108
+#define CLK_SDRAM1		109
+
+/* All the DRAM gates are exported */
+
+/* Some more module clocks are exported */
+
+#define CLK_MBUS0		141
+#define CLK_MBUS1		142
+
+/* Some more module clocks and external clock outputs are exported */
+
+#define CLK_NUMBER		(CLK_OUT_C + 1)
+
+#endif /* _CCU_SUN6I_A31_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a23-a33.h b/drivers/clk/sunxi-ng/ccu-sun8i-a23-a33.h
new file mode 100644
index 0000000..62c0f8d
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-a23-a33.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2016 Maxime Ripard
+ *
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CCU_SUN8I_A23_A33_H_
+#define _CCU_SUN8I_A23_A33_H_
+
+#include <dt-bindings/clock/sun8i-a23-a33-ccu.h>
+#include <dt-bindings/reset/sun8i-a23-a33-ccu.h>
+
+#define CLK_PLL_CPUX		0
+#define CLK_PLL_AUDIO_BASE	1
+#define CLK_PLL_AUDIO		2
+#define CLK_PLL_AUDIO_2X	3
+#define CLK_PLL_AUDIO_4X	4
+#define CLK_PLL_AUDIO_8X	5
+#define CLK_PLL_VIDEO		6
+#define CLK_PLL_VIDEO_2X	7
+#define CLK_PLL_VE		8
+#define CLK_PLL_DDR0		9
+#define CLK_PLL_PERIPH		10
+#define CLK_PLL_PERIPH_2X	11
+#define CLK_PLL_GPU		12
+#define CLK_PLL_MIPI		13
+#define CLK_PLL_HSIC		14
+#define CLK_PLL_DE		15
+#define CLK_PLL_DDR1		16
+#define CLK_PLL_DDR		17
+
+/* The CPUX clock is exported */
+
+#define CLK_AXI			19
+#define CLK_AHB1		20
+#define CLK_APB1		21
+#define CLK_APB2		22
+
+/* All the bus gates are exported */
+
+/* The first part of the mod clocks is exported */
+
+#define CLK_DRAM		79
+
+/* Some more module clocks are exported */
+
+#define CLK_MBUS		95
+
+/* And the last module clocks are exported */
+
+#define CLK_NUMBER		(CLK_ATS + 1)
+
+#endif /* _CCU_SUN8I_A23_A33_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c
new file mode 100644
index 0000000..2646d98
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright (c) 2016 Maxime Ripard. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_mult.h"
+#include "ccu_nk.h"
+#include "ccu_nkm.h"
+#include "ccu_nkmp.h"
+#include "ccu_nm.h"
+#include "ccu_phase.h"
+
+#include "ccu-sun8i-a23-a33.h"
+
+
+static struct ccu_nkmp pll_cpux_clk = {
+	.enable = BIT(31),
+	.lock	= BIT(28),
+
+	.n	= _SUNXI_CCU_MULT(8, 5),
+	.k	= _SUNXI_CCU_MULT(4, 2),
+	.m	= _SUNXI_CCU_DIV(0, 2),
+	.p	= _SUNXI_CCU_DIV_MAX(16, 2, 4),
+
+	.common	= {
+		.reg		= 0x000,
+		.hw.init	= CLK_HW_INIT("pll-cpux", "osc24M",
+					      &ccu_nkmp_ops,
+					      0),
+	},
+};
+
+/*
+ * The Audio PLL is supposed to have 4 outputs: 3 fixed factors from
+ * the base (2x, 4x and 8x), and one variable divider (the one true
+ * pll audio).
+ *
+ * We don't have any need for the variable divider for now, so we just
+ * hardcode it to match with the clock names
+ */
+#define SUN8I_A23_PLL_AUDIO_REG	0x008
+
+static SUNXI_CCU_NM_WITH_GATE_LOCK(pll_audio_base_clk, "pll-audio-base",
+				   "osc24M", 0x008,
+				   8, 7,		/* N */
+				   0, 5,		/* M */
+				   BIT(31),		/* gate */
+				   BIT(28),		/* lock */
+				   CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_video_clk, "pll-video",
+					"osc24M", 0x010,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve",
+					"osc24M", 0x018,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr_clk, "pll-ddr",
+				    "osc24M", 0x020,
+				    8, 5,		/* N */
+				    4, 2,		/* K */
+				    0, 2,		/* M */
+				    BIT(31),		/* gate */
+				    BIT(28),		/* lock */
+				    0);
+
+static SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph_clk, "pll-periph",
+					   "osc24M", 0x028,
+					   8, 5,	/* N */
+					   4, 2,	/* K */
+					   BIT(31),	/* gate */
+					   BIT(28),	/* lock */
+					   2,		/* post-div */
+					   CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_gpu_clk, "pll-gpu",
+					"osc24M", 0x038,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+/*
+ * The MIPI PLL has 2 modes: "MIPI" and "HDMI".
+ *
+ * The MIPI mode is a standard NKM-style clock. The HDMI mode is an
+ * integer / fractional clock with switchable multipliers and dividers.
+ * This is not supported here. We hardcode the PLL to MIPI mode.
+ */
+#define SUN8I_A23_PLL_MIPI_REG	0x040
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_mipi_clk, "pll-mipi",
+				    "pll-video", 0x040,
+				    8, 4,		/* N */
+				    4, 2,		/* K */
+				    0, 4,		/* M */
+				    BIT(31),		/* gate */
+				    BIT(28),		/* lock */
+				    CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_hsic_clk, "pll-hsic",
+					"osc24M", 0x044,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_de_clk, "pll-de",
+					"osc24M", 0x048,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static const char * const cpux_parents[] = { "osc32k", "osc24M",
+					     "pll-cpux" , "pll-cpux" };
+static SUNXI_CCU_MUX(cpux_clk, "cpux", cpux_parents,
+		     0x050, 16, 2, CLK_IS_CRITICAL);
+
+static SUNXI_CCU_M(axi_clk, "axi", "cpux", 0x050, 0, 2, 0);
+
+static const char * const ahb1_parents[] = { "osc32k", "osc24M",
+					     "axi" , "pll-periph" };
+static struct ccu_div ahb1_clk = {
+	.div		= _SUNXI_CCU_DIV_FLAGS(4, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+	.mux		= {
+		.shift	= 12,
+		.width	= 2,
+
+		.variable_prediv	= {
+			.index	= 3,
+			.shift	= 6,
+			.width	= 2,
+		},
+	},
+
+	.common		= {
+		.reg		= 0x054,
+		.features	= CCU_FEATURE_VARIABLE_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("ahb1",
+						      ahb1_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct clk_div_table apb1_div_table[] = {
+	{ .val = 0, .div = 2 },
+	{ .val = 1, .div = 2 },
+	{ .val = 2, .div = 4 },
+	{ .val = 3, .div = 8 },
+	{ /* Sentinel */ },
+};
+static SUNXI_CCU_DIV_TABLE(apb1_clk, "apb1", "ahb1",
+			   0x054, 8, 2, apb1_div_table, 0);
+
+static const char * const apb2_parents[] = { "osc32k", "osc24M",
+					     "pll-periph" , "pll-periph" };
+static SUNXI_CCU_MP_WITH_MUX(apb2_clk, "apb2", apb2_parents, 0x058,
+			     0, 5,	/* M */
+			     16, 2,	/* P */
+			     24, 2,	/* mux */
+			     0);
+
+static SUNXI_CCU_GATE(bus_mipi_dsi_clk,	"bus-mipi-dsi",	"ahb1",
+		      0x060, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_dma_clk,	"bus-dma",	"ahb1",
+		      0x060, BIT(6), 0);
+static SUNXI_CCU_GATE(bus_mmc0_clk,	"bus-mmc0",	"ahb1",
+		      0x060, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_mmc1_clk,	"bus-mmc1",	"ahb1",
+		      0x060, BIT(9), 0);
+static SUNXI_CCU_GATE(bus_mmc2_clk,	"bus-mmc2",	"ahb1",
+		      0x060, BIT(10), 0);
+static SUNXI_CCU_GATE(bus_nand_clk,	"bus-nand",	"ahb1",
+		      0x060, BIT(13), 0);
+static SUNXI_CCU_GATE(bus_dram_clk,	"bus-dram",	"ahb1",
+		      0x060, BIT(14), 0);
+static SUNXI_CCU_GATE(bus_hstimer_clk,	"bus-hstimer",	"ahb1",
+		      0x060, BIT(19), 0);
+static SUNXI_CCU_GATE(bus_spi0_clk,	"bus-spi0",	"ahb1",
+		      0x060, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_spi1_clk,	"bus-spi1",	"ahb1",
+		      0x060, BIT(21), 0);
+static SUNXI_CCU_GATE(bus_otg_clk,	"bus-otg",	"ahb1",
+		      0x060, BIT(24), 0);
+static SUNXI_CCU_GATE(bus_ehci_clk,	"bus-ehci",	"ahb1",
+		      0x060, BIT(26), 0);
+static SUNXI_CCU_GATE(bus_ohci_clk,	"bus-ohci",	"ahb1",
+		      0x060, BIT(29), 0);
+
+static SUNXI_CCU_GATE(bus_ve_clk,	"bus-ve",	"ahb1",
+		      0x064, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_lcd_clk,	"bus-lcd",	"ahb1",
+		      0x064, BIT(4), 0);
+static SUNXI_CCU_GATE(bus_csi_clk,	"bus-csi",	"ahb1",
+		      0x064, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_de_be_clk,	"bus-de-be",	"ahb1",
+		      0x064, BIT(12), 0);
+static SUNXI_CCU_GATE(bus_de_fe_clk,	"bus-de-fe",	"ahb1",
+		      0x064, BIT(14), 0);
+static SUNXI_CCU_GATE(bus_gpu_clk,	"bus-gpu",	"ahb1",
+		      0x064, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_msgbox_clk,	"bus-msgbox",	"ahb1",
+		      0x064, BIT(21), 0);
+static SUNXI_CCU_GATE(bus_spinlock_clk,	"bus-spinlock",	"ahb1",
+		      0x064, BIT(22), 0);
+static SUNXI_CCU_GATE(bus_drc_clk,	"bus-drc",	"ahb1",
+		      0x064, BIT(25), 0);
+
+static SUNXI_CCU_GATE(bus_codec_clk,	"bus-codec",	"apb1",
+		      0x068, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_pio_clk,	"bus-pio",	"apb1",
+		      0x068, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_i2s0_clk,	"bus-i2s0",	"apb1",
+		      0x068, BIT(12), 0);
+static SUNXI_CCU_GATE(bus_i2s1_clk,	"bus-i2s1",	"apb1",
+		      0x068, BIT(13), 0);
+
+static SUNXI_CCU_GATE(bus_i2c0_clk,	"bus-i2c0",	"apb2",
+		      0x06c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_i2c1_clk,	"bus-i2c1",	"apb2",
+		      0x06c, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_i2c2_clk,	"bus-i2c2",	"apb2",
+		      0x06c, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_uart0_clk,	"bus-uart0",	"apb2",
+		      0x06c, BIT(16), 0);
+static SUNXI_CCU_GATE(bus_uart1_clk,	"bus-uart1",	"apb2",
+		      0x06c, BIT(17), 0);
+static SUNXI_CCU_GATE(bus_uart2_clk,	"bus-uart2",	"apb2",
+		      0x06c, BIT(18), 0);
+static SUNXI_CCU_GATE(bus_uart3_clk,	"bus-uart3",	"apb2",
+		      0x06c, BIT(19), 0);
+static SUNXI_CCU_GATE(bus_uart4_clk,	"bus-uart4",	"apb2",
+		      0x06c, BIT(20), 0);
+
+static const char * const mod0_default_parents[] = { "osc24M", "pll-periph" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand_clk, "nand", mod0_default_parents, 0x080,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc0_clk, "mmc0", mod0_default_parents, 0x088,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc0_sample_clk, "mmc0_sample", "mmc0",
+		       0x088, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc0_output_clk, "mmc0_output", "mmc0",
+		       0x088, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc1_clk, "mmc1", mod0_default_parents, 0x08c,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc1_sample_clk, "mmc1_sample", "mmc1",
+		       0x08c, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc1_output_clk, "mmc1_output", "mmc1",
+		       0x08c, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc2_clk, "mmc2", mod0_default_parents, 0x090,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc2_sample_clk, "mmc2_sample", "mmc2",
+		       0x090, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc2_output_clk, "mmc2_output", "mmc2",
+		       0x090, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", mod0_default_parents, 0x0a0,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x",
+					    "pll-audio-2x", "pll-audio" };
+static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents,
+			       0x0b0, 16, 2, BIT(31), 0);
+
+static SUNXI_CCU_MUX_WITH_GATE(i2s1_clk, "i2s1", i2s_parents,
+			       0x0b4, 16, 2, BIT(31), 0);
+
+/* TODO: the parent for most of the USB clocks is not known */
+static SUNXI_CCU_GATE(usb_phy0_clk,	"usb-phy0",	"osc24M",
+		      0x0cc, BIT(8), 0);
+static SUNXI_CCU_GATE(usb_phy1_clk,	"usb-phy1",	"osc24M",
+		      0x0cc, BIT(9), 0);
+static SUNXI_CCU_GATE(usb_hsic_clk,	"usb-hsic",	"pll-hsic",
+		      0x0cc, BIT(10), 0);
+static SUNXI_CCU_GATE(usb_hsic_12M_clk,	"usb-hsic-12M",	"osc24M",
+		      0x0cc, BIT(11), 0);
+static SUNXI_CCU_GATE(usb_ohci_clk,	"usb-ohci",	"osc24M",
+		      0x0cc, BIT(16), 0);
+
+static SUNXI_CCU_GATE(dram_ve_clk,	"dram-ve",	"pll-ddr",
+		      0x100, BIT(0), 0);
+static SUNXI_CCU_GATE(dram_csi_clk,	"dram-csi",	"pll-ddr",
+		      0x100, BIT(1), 0);
+static SUNXI_CCU_GATE(dram_drc_clk,	"dram-drc",	"pll-ddr",
+		      0x100, BIT(16), 0);
+static SUNXI_CCU_GATE(dram_de_fe_clk,	"dram-de-fe",	"pll-ddr",
+		      0x100, BIT(24), 0);
+static SUNXI_CCU_GATE(dram_de_be_clk,	"dram-de-be",	"pll-ddr",
+		      0x100, BIT(26), 0);
+
+static const char * const de_parents[] = { "pll-video", "pll-periph-2x",
+					   "pll-gpu", "pll-de" };
+static const u8 de_table[] = { 0, 2, 3, 5 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(de_be_clk, "de-be",
+				       de_parents, de_table,
+				       0x104, 0, 4, 24, 3, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(de_fe_clk, "de-fe",
+				       de_parents, de_table,
+				       0x10c, 0, 4, 24, 3, BIT(31), 0);
+
+static const char * const lcd_ch0_parents[] = { "pll-video", "pll-video-2x",
+						"pll-mipi" };
+static const u8 lcd_ch0_table[] = { 0, 2, 4 };
+static SUNXI_CCU_MUX_TABLE_WITH_GATE(lcd_ch0_clk, "lcd-ch0",
+				     lcd_ch0_parents, lcd_ch0_table,
+				     0x118, 24, 3, BIT(31),
+				     CLK_SET_RATE_PARENT);
+
+static const char * const lcd_ch1_parents[] = { "pll-video", "pll-video-2x" };
+static const u8 lcd_ch1_table[] = { 0, 2 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(lcd_ch1_clk, "lcd-ch1",
+				       lcd_ch1_parents, lcd_ch1_table,
+				       0x12c, 0, 4, 24, 2, BIT(31), 0);
+
+static const char * const csi_sclk_parents[] = { "pll-video", "pll-de",
+						 "pll-mipi", "pll-ve" };
+static const u8 csi_sclk_table[] = { 0, 3, 4, 5 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi_sclk_clk, "csi-sclk",
+				       csi_sclk_parents, csi_sclk_table,
+				       0x134, 16, 4, 24, 3, BIT(31), 0);
+
+static const char * const csi_mclk_parents[] = { "pll-video", "pll-de",
+						 "osc24M" };
+static const u8 csi_mclk_table[] = { 0, 3, 5 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi_mclk_clk, "csi-mclk",
+				       csi_mclk_parents, csi_mclk_table,
+				       0x134, 0, 5, 8, 3, BIT(15), 0);
+
+static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve",
+			     0x13c, 16, 3, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(ac_dig_clk,	"ac-dig",	"pll-audio",
+		      0x140, BIT(31), 0);
+static SUNXI_CCU_GATE(avs_clk,		"avs",		"osc24M",
+		      0x144, BIT(31), 0);
+
+static const char * const mbus_parents[] = { "osc24M", "pll-periph-2x",
+					     "pll-ddr" };
+static SUNXI_CCU_M_WITH_MUX_GATE(mbus_clk, "mbus", mbus_parents,
+				 0x15c, 0, 3, 24, 2, BIT(31), CLK_IS_CRITICAL);
+
+static const char * const dsi_sclk_parents[] = { "pll-video", "pll-video-2x" };
+static const u8 dsi_sclk_table[] = { 0, 2 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(dsi_sclk_clk, "dsi-sclk",
+				       dsi_sclk_parents, dsi_sclk_table,
+				       0x168, 16, 4, 24, 2, BIT(31), 0);
+
+static const char * const dsi_dphy_parents[] = { "pll-video", "pll-periph" };
+static const u8 dsi_dphy_table[] = { 0, 2 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(dsi_dphy_clk, "dsi-dphy",
+				       dsi_dphy_parents, dsi_dphy_table,
+				       0x168, 0, 4, 8, 2, BIT(15), 0);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(drc_clk, "drc",
+				       de_parents, de_table,
+				       0x180, 0, 4, 24, 3, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu",
+			     0x1a0, 0, 3, BIT(31), 0);
+
+static const char * const ats_parents[] = { "osc24M", "pll-periph" };
+static SUNXI_CCU_M_WITH_MUX_GATE(ats_clk, "ats", ats_parents,
+				 0x1b0, 0, 3, 24, 2, BIT(31), 0);
+
+static struct ccu_common *sun8i_a23_ccu_clks[] = {
+	&pll_cpux_clk.common,
+	&pll_audio_base_clk.common,
+	&pll_video_clk.common,
+	&pll_ve_clk.common,
+	&pll_ddr_clk.common,
+	&pll_periph_clk.common,
+	&pll_gpu_clk.common,
+	&pll_mipi_clk.common,
+	&pll_hsic_clk.common,
+	&pll_de_clk.common,
+	&cpux_clk.common,
+	&axi_clk.common,
+	&ahb1_clk.common,
+	&apb1_clk.common,
+	&apb2_clk.common,
+	&bus_mipi_dsi_clk.common,
+	&bus_dma_clk.common,
+	&bus_mmc0_clk.common,
+	&bus_mmc1_clk.common,
+	&bus_mmc2_clk.common,
+	&bus_nand_clk.common,
+	&bus_dram_clk.common,
+	&bus_hstimer_clk.common,
+	&bus_spi0_clk.common,
+	&bus_spi1_clk.common,
+	&bus_otg_clk.common,
+	&bus_ehci_clk.common,
+	&bus_ohci_clk.common,
+	&bus_ve_clk.common,
+	&bus_lcd_clk.common,
+	&bus_csi_clk.common,
+	&bus_de_fe_clk.common,
+	&bus_de_be_clk.common,
+	&bus_gpu_clk.common,
+	&bus_msgbox_clk.common,
+	&bus_spinlock_clk.common,
+	&bus_drc_clk.common,
+	&bus_codec_clk.common,
+	&bus_pio_clk.common,
+	&bus_i2s0_clk.common,
+	&bus_i2s1_clk.common,
+	&bus_i2c0_clk.common,
+	&bus_i2c1_clk.common,
+	&bus_i2c2_clk.common,
+	&bus_uart0_clk.common,
+	&bus_uart1_clk.common,
+	&bus_uart2_clk.common,
+	&bus_uart3_clk.common,
+	&bus_uart4_clk.common,
+	&nand_clk.common,
+	&mmc0_clk.common,
+	&mmc0_sample_clk.common,
+	&mmc0_output_clk.common,
+	&mmc1_clk.common,
+	&mmc1_sample_clk.common,
+	&mmc1_output_clk.common,
+	&mmc2_clk.common,
+	&mmc2_sample_clk.common,
+	&mmc2_output_clk.common,
+	&spi0_clk.common,
+	&spi1_clk.common,
+	&i2s0_clk.common,
+	&i2s1_clk.common,
+	&usb_phy0_clk.common,
+	&usb_phy1_clk.common,
+	&usb_hsic_clk.common,
+	&usb_hsic_12M_clk.common,
+	&usb_ohci_clk.common,
+	&dram_ve_clk.common,
+	&dram_csi_clk.common,
+	&dram_drc_clk.common,
+	&dram_de_fe_clk.common,
+	&dram_de_be_clk.common,
+	&de_be_clk.common,
+	&de_fe_clk.common,
+	&lcd_ch0_clk.common,
+	&lcd_ch1_clk.common,
+	&csi_sclk_clk.common,
+	&csi_mclk_clk.common,
+	&ve_clk.common,
+	&ac_dig_clk.common,
+	&avs_clk.common,
+	&mbus_clk.common,
+	&dsi_sclk_clk.common,
+	&dsi_dphy_clk.common,
+	&drc_clk.common,
+	&gpu_clk.common,
+	&ats_clk.common,
+};
+
+/* We hardcode the divider to 4 for now */
+static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
+			"pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_2x_clk, "pll-audio-2x",
+			"pll-audio-base", 2, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_4x_clk, "pll-audio-4x",
+			"pll-audio-base", 1, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_8x_clk, "pll-audio-8x",
+			"pll-audio-base", 1, 2, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_periph_2x_clk, "pll-periph-2x",
+			"pll-periph", 1, 2, 0);
+static CLK_FIXED_FACTOR(pll_video_2x_clk, "pll-video-2x",
+			"pll-video", 1, 2, 0);
+
+static struct clk_hw_onecell_data sun8i_a23_hw_clks = {
+	.hws	= {
+		[CLK_PLL_CPUX]		= &pll_cpux_clk.common.hw,
+		[CLK_PLL_AUDIO_BASE]	= &pll_audio_base_clk.common.hw,
+		[CLK_PLL_AUDIO]		= &pll_audio_clk.hw,
+		[CLK_PLL_AUDIO_2X]	= &pll_audio_2x_clk.hw,
+		[CLK_PLL_AUDIO_4X]	= &pll_audio_4x_clk.hw,
+		[CLK_PLL_AUDIO_8X]	= &pll_audio_8x_clk.hw,
+		[CLK_PLL_VIDEO]		= &pll_video_clk.common.hw,
+		[CLK_PLL_VIDEO_2X]	= &pll_video_2x_clk.hw,
+		[CLK_PLL_VE]		= &pll_ve_clk.common.hw,
+		[CLK_PLL_DDR0]		= &pll_ddr_clk.common.hw,
+		[CLK_PLL_PERIPH]	= &pll_periph_clk.common.hw,
+		[CLK_PLL_PERIPH_2X]	= &pll_periph_2x_clk.hw,
+		[CLK_PLL_GPU]		= &pll_gpu_clk.common.hw,
+		[CLK_PLL_MIPI]		= &pll_mipi_clk.common.hw,
+		[CLK_PLL_HSIC]		= &pll_hsic_clk.common.hw,
+		[CLK_PLL_DE]		= &pll_de_clk.common.hw,
+		[CLK_CPUX]		= &cpux_clk.common.hw,
+		[CLK_AXI]		= &axi_clk.common.hw,
+		[CLK_AHB1]		= &ahb1_clk.common.hw,
+		[CLK_APB1]		= &apb1_clk.common.hw,
+		[CLK_APB2]		= &apb2_clk.common.hw,
+		[CLK_BUS_MIPI_DSI]	= &bus_mipi_dsi_clk.common.hw,
+		[CLK_BUS_DMA]		= &bus_dma_clk.common.hw,
+		[CLK_BUS_MMC0]		= &bus_mmc0_clk.common.hw,
+		[CLK_BUS_MMC1]		= &bus_mmc1_clk.common.hw,
+		[CLK_BUS_MMC2]		= &bus_mmc2_clk.common.hw,
+		[CLK_BUS_NAND]		= &bus_nand_clk.common.hw,
+		[CLK_BUS_DRAM]		= &bus_dram_clk.common.hw,
+		[CLK_BUS_HSTIMER]	= &bus_hstimer_clk.common.hw,
+		[CLK_BUS_SPI0]		= &bus_spi0_clk.common.hw,
+		[CLK_BUS_SPI1]		= &bus_spi1_clk.common.hw,
+		[CLK_BUS_OTG]		= &bus_otg_clk.common.hw,
+		[CLK_BUS_EHCI]		= &bus_ehci_clk.common.hw,
+		[CLK_BUS_OHCI]		= &bus_ohci_clk.common.hw,
+		[CLK_BUS_VE]		= &bus_ve_clk.common.hw,
+		[CLK_BUS_LCD]		= &bus_lcd_clk.common.hw,
+		[CLK_BUS_CSI]		= &bus_csi_clk.common.hw,
+		[CLK_BUS_DE_BE]		= &bus_de_be_clk.common.hw,
+		[CLK_BUS_DE_FE]		= &bus_de_fe_clk.common.hw,
+		[CLK_BUS_GPU]		= &bus_gpu_clk.common.hw,
+		[CLK_BUS_MSGBOX]	= &bus_msgbox_clk.common.hw,
+		[CLK_BUS_SPINLOCK]	= &bus_spinlock_clk.common.hw,
+		[CLK_BUS_DRC]		= &bus_drc_clk.common.hw,
+		[CLK_BUS_CODEC]		= &bus_codec_clk.common.hw,
+		[CLK_BUS_PIO]		= &bus_pio_clk.common.hw,
+		[CLK_BUS_I2S0]		= &bus_i2s0_clk.common.hw,
+		[CLK_BUS_I2S1]		= &bus_i2s1_clk.common.hw,
+		[CLK_BUS_I2C0]		= &bus_i2c0_clk.common.hw,
+		[CLK_BUS_I2C1]		= &bus_i2c1_clk.common.hw,
+		[CLK_BUS_I2C2]		= &bus_i2c2_clk.common.hw,
+		[CLK_BUS_UART0]		= &bus_uart0_clk.common.hw,
+		[CLK_BUS_UART1]		= &bus_uart1_clk.common.hw,
+		[CLK_BUS_UART2]		= &bus_uart2_clk.common.hw,
+		[CLK_BUS_UART3]		= &bus_uart3_clk.common.hw,
+		[CLK_BUS_UART4]		= &bus_uart4_clk.common.hw,
+		[CLK_NAND]		= &nand_clk.common.hw,
+		[CLK_MMC0]		= &mmc0_clk.common.hw,
+		[CLK_MMC0_SAMPLE]	= &mmc0_sample_clk.common.hw,
+		[CLK_MMC0_OUTPUT]	= &mmc0_output_clk.common.hw,
+		[CLK_MMC1]		= &mmc1_clk.common.hw,
+		[CLK_MMC1_SAMPLE]	= &mmc1_sample_clk.common.hw,
+		[CLK_MMC1_OUTPUT]	= &mmc1_output_clk.common.hw,
+		[CLK_MMC2]		= &mmc2_clk.common.hw,
+		[CLK_MMC2_SAMPLE]	= &mmc2_sample_clk.common.hw,
+		[CLK_MMC2_OUTPUT]	= &mmc2_output_clk.common.hw,
+		[CLK_SPI0]		= &spi0_clk.common.hw,
+		[CLK_SPI1]		= &spi1_clk.common.hw,
+		[CLK_I2S0]		= &i2s0_clk.common.hw,
+		[CLK_I2S1]		= &i2s1_clk.common.hw,
+		[CLK_USB_PHY0]		= &usb_phy0_clk.common.hw,
+		[CLK_USB_PHY1]		= &usb_phy1_clk.common.hw,
+		[CLK_USB_HSIC]		= &usb_hsic_clk.common.hw,
+		[CLK_USB_HSIC_12M]	= &usb_hsic_12M_clk.common.hw,
+		[CLK_USB_OHCI]		= &usb_ohci_clk.common.hw,
+		[CLK_DRAM_VE]		= &dram_ve_clk.common.hw,
+		[CLK_DRAM_CSI]		= &dram_csi_clk.common.hw,
+		[CLK_DRAM_DRC]		= &dram_drc_clk.common.hw,
+		[CLK_DRAM_DE_FE]	= &dram_de_fe_clk.common.hw,
+		[CLK_DRAM_DE_BE]	= &dram_de_be_clk.common.hw,
+		[CLK_DE_BE]		= &de_be_clk.common.hw,
+		[CLK_DE_FE]		= &de_fe_clk.common.hw,
+		[CLK_LCD_CH0]		= &lcd_ch0_clk.common.hw,
+		[CLK_LCD_CH1]		= &lcd_ch1_clk.common.hw,
+		[CLK_CSI_SCLK]		= &csi_sclk_clk.common.hw,
+		[CLK_CSI_MCLK]		= &csi_mclk_clk.common.hw,
+		[CLK_VE]		= &ve_clk.common.hw,
+		[CLK_AC_DIG]		= &ac_dig_clk.common.hw,
+		[CLK_AVS]		= &avs_clk.common.hw,
+		[CLK_MBUS]		= &mbus_clk.common.hw,
+		[CLK_DSI_SCLK]		= &dsi_sclk_clk.common.hw,
+		[CLK_DSI_DPHY]		= &dsi_dphy_clk.common.hw,
+		[CLK_DRC]		= &drc_clk.common.hw,
+		[CLK_GPU]		= &gpu_clk.common.hw,
+		[CLK_ATS]		= &ats_clk.common.hw,
+	},
+	.num	= CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun8i_a23_ccu_resets[] = {
+	[RST_USB_PHY0]		=  { 0x0cc, BIT(0) },
+	[RST_USB_PHY1]		=  { 0x0cc, BIT(1) },
+	[RST_USB_HSIC]		=  { 0x0cc, BIT(2) },
+
+	[RST_MBUS]		=  { 0x0fc, BIT(31) },
+
+	[RST_BUS_MIPI_DSI]	=  { 0x2c0, BIT(1) },
+	[RST_BUS_DMA]		=  { 0x2c0, BIT(6) },
+	[RST_BUS_MMC0]		=  { 0x2c0, BIT(8) },
+	[RST_BUS_MMC1]		=  { 0x2c0, BIT(9) },
+	[RST_BUS_MMC2]		=  { 0x2c0, BIT(10) },
+	[RST_BUS_NAND]		=  { 0x2c0, BIT(13) },
+	[RST_BUS_DRAM]		=  { 0x2c0, BIT(14) },
+	[RST_BUS_HSTIMER]	=  { 0x2c0, BIT(19) },
+	[RST_BUS_SPI0]		=  { 0x2c0, BIT(20) },
+	[RST_BUS_SPI1]		=  { 0x2c0, BIT(21) },
+	[RST_BUS_OTG]		=  { 0x2c0, BIT(24) },
+	[RST_BUS_EHCI]		=  { 0x2c0, BIT(26) },
+	[RST_BUS_OHCI]		=  { 0x2c0, BIT(29) },
+
+	[RST_BUS_VE]		=  { 0x2c4, BIT(0) },
+	[RST_BUS_LCD]		=  { 0x2c4, BIT(4) },
+	[RST_BUS_CSI]		=  { 0x2c4, BIT(8) },
+	[RST_BUS_DE_BE]		=  { 0x2c4, BIT(12) },
+	[RST_BUS_DE_FE]		=  { 0x2c4, BIT(14) },
+	[RST_BUS_GPU]		=  { 0x2c4, BIT(20) },
+	[RST_BUS_MSGBOX]	=  { 0x2c4, BIT(21) },
+	[RST_BUS_SPINLOCK]	=  { 0x2c4, BIT(22) },
+	[RST_BUS_DRC]		=  { 0x2c4, BIT(25) },
+
+	[RST_BUS_LVDS]		=  { 0x2c8, BIT(0) },
+
+	[RST_BUS_CODEC]		=  { 0x2d0, BIT(0) },
+	[RST_BUS_I2S0]		=  { 0x2d0, BIT(12) },
+	[RST_BUS_I2S1]		=  { 0x2d0, BIT(13) },
+
+	[RST_BUS_I2C0]		=  { 0x2d8, BIT(0) },
+	[RST_BUS_I2C1]		=  { 0x2d8, BIT(1) },
+	[RST_BUS_I2C2]		=  { 0x2d8, BIT(2) },
+	[RST_BUS_UART0]		=  { 0x2d8, BIT(16) },
+	[RST_BUS_UART1]		=  { 0x2d8, BIT(17) },
+	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
+	[RST_BUS_UART3]		=  { 0x2d8, BIT(19) },
+	[RST_BUS_UART4]		=  { 0x2d8, BIT(20) },
+};
+
+static const struct sunxi_ccu_desc sun8i_a23_ccu_desc = {
+	.ccu_clks	= sun8i_a23_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun8i_a23_ccu_clks),
+
+	.hw_clks	= &sun8i_a23_hw_clks,
+
+	.resets		= sun8i_a23_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun8i_a23_ccu_resets),
+};
+
+static void __init sun8i_a23_ccu_setup(struct device_node *node)
+{
+	void __iomem *reg;
+	u32 val;
+
+	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+	if (IS_ERR(reg)) {
+		pr_err("%s: Could not map the clock registers\n",
+		       of_node_full_name(node));
+		return;
+	}
+
+	/* Force the PLL-Audio-1x divider to 4 */
+	val = readl(reg + SUN8I_A23_PLL_AUDIO_REG);
+	val &= ~GENMASK(19, 16);
+	writel(val | (3 << 16), reg + SUN8I_A23_PLL_AUDIO_REG);
+
+	/* Force PLL-MIPI to MIPI mode */
+	val = readl(reg + SUN8I_A23_PLL_MIPI_REG);
+	val &= ~BIT(16);
+	writel(val, reg + SUN8I_A23_PLL_MIPI_REG);
+
+	sunxi_ccu_probe(node, reg, &sun8i_a23_ccu_desc);
+}
+CLK_OF_DECLARE(sun8i_a23_ccu, "allwinner,sun8i-a23-ccu",
+	       sun8i_a23_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a33.c b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
new file mode 100644
index 0000000..96b40ca
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-a33.c
@@ -0,0 +1,780 @@
+/*
+ * Copyright (c) 2016 Maxime Ripard. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/of_address.h>
+
+#include "ccu_common.h"
+#include "ccu_reset.h"
+
+#include "ccu_div.h"
+#include "ccu_gate.h"
+#include "ccu_mp.h"
+#include "ccu_mult.h"
+#include "ccu_nk.h"
+#include "ccu_nkm.h"
+#include "ccu_nkmp.h"
+#include "ccu_nm.h"
+#include "ccu_phase.h"
+
+#include "ccu-sun8i-a23-a33.h"
+
+static struct ccu_nkmp pll_cpux_clk = {
+	.enable = BIT(31),
+	.lock	= BIT(28),
+
+	.n	= _SUNXI_CCU_MULT(8, 5),
+	.k	= _SUNXI_CCU_MULT(4, 2),
+	.m	= _SUNXI_CCU_DIV(0, 2),
+	.p	= _SUNXI_CCU_DIV_MAX(16, 2, 4),
+
+	.common	= {
+		.reg		= 0x000,
+		.hw.init	= CLK_HW_INIT("pll-cpux", "osc24M",
+					      &ccu_nkmp_ops,
+					      0),
+	},
+};
+
+/*
+ * The Audio PLL is supposed to have 4 outputs: 3 fixed factors from
+ * the base (2x, 4x and 8x), and one variable divider (the one true
+ * pll audio).
+ *
+ * We don't have any need for the variable divider for now, so we just
+ * hardcode it to match with the clock names
+ */
+#define SUN8I_A33_PLL_AUDIO_REG	0x008
+
+static SUNXI_CCU_NM_WITH_GATE_LOCK(pll_audio_base_clk, "pll-audio-base",
+				   "osc24M", 0x008,
+				   8, 7,		/* N */
+				   0, 5,		/* M */
+				   BIT(31),		/* gate */
+				   BIT(28),		/* lock */
+				   CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_video_clk, "pll-video",
+					"osc24M", 0x010,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve",
+					"osc24M", 0x018,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr0_clk, "pll-ddr0",
+				    "osc24M", 0x020,
+				    8, 5,		/* N */
+				    4, 2,		/* K */
+				    0, 2,		/* M */
+				    BIT(31),		/* gate */
+				    BIT(28),		/* lock */
+				    0);
+
+static SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph_clk, "pll-periph",
+					   "osc24M", 0x028,
+					   8, 5,	/* N */
+					   4, 2,	/* K */
+					   BIT(31),	/* gate */
+					   BIT(28),	/* lock */
+					   2,		/* post-div */
+					   CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_gpu_clk, "pll-gpu",
+					"osc24M", 0x038,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+/*
+ * The MIPI PLL has 2 modes: "MIPI" and "HDMI".
+ *
+ * The MIPI mode is a standard NKM-style clock. The HDMI mode is an
+ * integer / fractional clock with switchable multipliers and dividers.
+ * This is not supported here. We hardcode the PLL to MIPI mode.
+ */
+#define SUN8I_A33_PLL_MIPI_REG	0x040
+static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_mipi_clk, "pll-mipi",
+				    "pll-video", 0x040,
+				    8, 4,		/* N */
+				    4, 2,		/* K */
+				    0, 4,		/* M */
+				    BIT(31),		/* gate */
+				    BIT(28),		/* lock */
+				    CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_hsic_clk, "pll-hsic",
+					"osc24M", 0x044,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_de_clk, "pll-de",
+					"osc24M", 0x048,
+					8, 7,		/* N */
+					0, 4,		/* M */
+					BIT(24),	/* frac enable */
+					BIT(25),	/* frac select */
+					270000000,	/* frac rate 0 */
+					297000000,	/* frac rate 1 */
+					BIT(31),	/* gate */
+					BIT(28),	/* lock */
+					CLK_SET_RATE_UNGATE);
+
+/* TODO: Fix N */
+static SUNXI_CCU_N_WITH_GATE_LOCK(pll_ddr1_clk, "pll-ddr1",
+				  "osc24M", 0x04c,
+				  8, 6,			/* N */
+				  BIT(31),		/* gate */
+				  BIT(28),		/* lock */
+				  CLK_SET_RATE_UNGATE);
+
+static const char * const cpux_parents[] = { "osc32k", "osc24M",
+					     "pll-cpux" , "pll-cpux" };
+static SUNXI_CCU_MUX(cpux_clk, "cpux", cpux_parents,
+		     0x050, 16, 2, CLK_IS_CRITICAL);
+
+static SUNXI_CCU_M(axi_clk, "axi", "cpux", 0x050, 0, 2, 0);
+
+static const char * const ahb1_parents[] = { "osc32k", "osc24M",
+					     "axi" , "pll-periph" };
+static struct ccu_div ahb1_clk = {
+	.div		= _SUNXI_CCU_DIV_FLAGS(4, 2, CLK_DIVIDER_POWER_OF_TWO),
+
+	.mux		= {
+		.shift	= 12,
+		.width	= 2,
+
+		.variable_prediv	= {
+			.index	= 3,
+			.shift	= 6,
+			.width	= 2,
+		},
+	},
+
+	.common		= {
+		.reg		= 0x054,
+		.features	= CCU_FEATURE_VARIABLE_PREDIV,
+		.hw.init	= CLK_HW_INIT_PARENTS("ahb1",
+						      ahb1_parents,
+						      &ccu_div_ops,
+						      0),
+	},
+};
+
+static struct clk_div_table apb1_div_table[] = {
+	{ .val = 0, .div = 2 },
+	{ .val = 1, .div = 2 },
+	{ .val = 2, .div = 4 },
+	{ .val = 3, .div = 8 },
+	{ /* Sentinel */ },
+};
+static SUNXI_CCU_DIV_TABLE(apb1_clk, "apb1", "ahb1",
+			   0x054, 8, 2, apb1_div_table, 0);
+
+static const char * const apb2_parents[] = { "osc32k", "osc24M",
+					     "pll-periph" , "pll-periph" };
+static SUNXI_CCU_MP_WITH_MUX(apb2_clk, "apb2", apb2_parents, 0x058,
+			     0, 5,	/* M */
+			     16, 2,	/* P */
+			     24, 2,	/* mux */
+			     0);
+
+static SUNXI_CCU_GATE(bus_mipi_dsi_clk,	"bus-mipi-dsi",	"ahb1",
+		      0x060, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_ss_clk,	"bus-ss",	"ahb1",
+		      0x060, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_dma_clk,	"bus-dma",	"ahb1",
+		      0x060, BIT(6), 0);
+static SUNXI_CCU_GATE(bus_mmc0_clk,	"bus-mmc0",	"ahb1",
+		      0x060, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_mmc1_clk,	"bus-mmc1",	"ahb1",
+		      0x060, BIT(9), 0);
+static SUNXI_CCU_GATE(bus_mmc2_clk,	"bus-mmc2",	"ahb1",
+		      0x060, BIT(10), 0);
+static SUNXI_CCU_GATE(bus_nand_clk,	"bus-nand",	"ahb1",
+		      0x060, BIT(13), 0);
+static SUNXI_CCU_GATE(bus_dram_clk,	"bus-dram",	"ahb1",
+		      0x060, BIT(14), 0);
+static SUNXI_CCU_GATE(bus_hstimer_clk,	"bus-hstimer",	"ahb1",
+		      0x060, BIT(19), 0);
+static SUNXI_CCU_GATE(bus_spi0_clk,	"bus-spi0",	"ahb1",
+		      0x060, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_spi1_clk,	"bus-spi1",	"ahb1",
+		      0x060, BIT(21), 0);
+static SUNXI_CCU_GATE(bus_otg_clk,	"bus-otg",	"ahb1",
+		      0x060, BIT(24), 0);
+static SUNXI_CCU_GATE(bus_ehci_clk,	"bus-ehci",	"ahb1",
+		      0x060, BIT(26), 0);
+static SUNXI_CCU_GATE(bus_ohci_clk,	"bus-ohci",	"ahb1",
+		      0x060, BIT(29), 0);
+
+static SUNXI_CCU_GATE(bus_ve_clk,	"bus-ve",	"ahb1",
+		      0x064, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_lcd_clk,	"bus-lcd",	"ahb1",
+		      0x064, BIT(4), 0);
+static SUNXI_CCU_GATE(bus_csi_clk,	"bus-csi",	"ahb1",
+		      0x064, BIT(8), 0);
+static SUNXI_CCU_GATE(bus_de_be_clk,	"bus-de-be",	"ahb1",
+		      0x064, BIT(12), 0);
+static SUNXI_CCU_GATE(bus_de_fe_clk,	"bus-de-fe",	"ahb1",
+		      0x064, BIT(14), 0);
+static SUNXI_CCU_GATE(bus_gpu_clk,	"bus-gpu",	"ahb1",
+		      0x064, BIT(20), 0);
+static SUNXI_CCU_GATE(bus_msgbox_clk,	"bus-msgbox",	"ahb1",
+		      0x064, BIT(21), 0);
+static SUNXI_CCU_GATE(bus_spinlock_clk,	"bus-spinlock",	"ahb1",
+		      0x064, BIT(22), 0);
+static SUNXI_CCU_GATE(bus_drc_clk,	"bus-drc",	"ahb1",
+		      0x064, BIT(25), 0);
+static SUNXI_CCU_GATE(bus_sat_clk,	"bus-sat",	"ahb1",
+		      0x064, BIT(26), 0);
+
+static SUNXI_CCU_GATE(bus_codec_clk,	"bus-codec",	"apb1",
+		      0x068, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_pio_clk,	"bus-pio",	"apb1",
+		      0x068, BIT(5), 0);
+static SUNXI_CCU_GATE(bus_i2s0_clk,	"bus-i2s0",	"apb1",
+		      0x068, BIT(12), 0);
+static SUNXI_CCU_GATE(bus_i2s1_clk,	"bus-i2s1",	"apb1",
+		      0x068, BIT(13), 0);
+
+static SUNXI_CCU_GATE(bus_i2c0_clk,	"bus-i2c0",	"apb2",
+		      0x06c, BIT(0), 0);
+static SUNXI_CCU_GATE(bus_i2c1_clk,	"bus-i2c1",	"apb2",
+		      0x06c, BIT(1), 0);
+static SUNXI_CCU_GATE(bus_i2c2_clk,	"bus-i2c2",	"apb2",
+		      0x06c, BIT(2), 0);
+static SUNXI_CCU_GATE(bus_uart0_clk,	"bus-uart0",	"apb2",
+		      0x06c, BIT(16), 0);
+static SUNXI_CCU_GATE(bus_uart1_clk,	"bus-uart1",	"apb2",
+		      0x06c, BIT(17), 0);
+static SUNXI_CCU_GATE(bus_uart2_clk,	"bus-uart2",	"apb2",
+		      0x06c, BIT(18), 0);
+static SUNXI_CCU_GATE(bus_uart3_clk,	"bus-uart3",	"apb2",
+		      0x06c, BIT(19), 0);
+static SUNXI_CCU_GATE(bus_uart4_clk,	"bus-uart4",	"apb2",
+		      0x06c, BIT(20), 0);
+
+static const char * const mod0_default_parents[] = { "osc24M", "pll-periph" };
+static SUNXI_CCU_MP_WITH_MUX_GATE(nand_clk, "nand", mod0_default_parents, 0x080,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc0_clk, "mmc0", mod0_default_parents, 0x088,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc0_sample_clk, "mmc0_sample", "mmc0",
+		       0x088, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc0_output_clk, "mmc0_output", "mmc0",
+		       0x088, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc1_clk, "mmc1", mod0_default_parents, 0x08c,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc1_sample_clk, "mmc1_sample", "mmc1",
+		       0x08c, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc1_output_clk, "mmc1_output", "mmc1",
+		       0x08c, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(mmc2_clk, "mmc2", mod0_default_parents, 0x090,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_PHASE(mmc2_sample_clk, "mmc2_sample", "mmc2",
+		       0x090, 20, 3, 0);
+static SUNXI_CCU_PHASE(mmc2_output_clk, "mmc2_output", "mmc2",
+		       0x090, 8, 3, 0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(ss_clk, "ss", mod0_default_parents, 0x09c,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi0_clk, "spi0", mod0_default_parents, 0x0a0,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static SUNXI_CCU_MP_WITH_MUX_GATE(spi1_clk, "spi1", mod0_default_parents, 0x0a4,
+				  0, 4,		/* M */
+				  16, 2,	/* P */
+				  24, 2,	/* mux */
+				  BIT(31),	/* gate */
+				  0);
+
+static const char * const i2s_parents[] = { "pll-audio-8x", "pll-audio-4x",
+					    "pll-audio-2x", "pll-audio" };
+static SUNXI_CCU_MUX_WITH_GATE(i2s0_clk, "i2s0", i2s_parents,
+			       0x0b0, 16, 2, BIT(31), 0);
+
+static SUNXI_CCU_MUX_WITH_GATE(i2s1_clk, "i2s1", i2s_parents,
+			       0x0b4, 16, 2, BIT(31), 0);
+
+/* TODO: the parent for most of the USB clocks is not known */
+static SUNXI_CCU_GATE(usb_phy0_clk,	"usb-phy0",	"osc24M",
+		      0x0cc, BIT(8), 0);
+static SUNXI_CCU_GATE(usb_phy1_clk,	"usb-phy1",	"osc24M",
+		      0x0cc, BIT(9), 0);
+static SUNXI_CCU_GATE(usb_hsic_clk,	"usb-hsic",	"pll-hsic",
+		      0x0cc, BIT(10), 0);
+static SUNXI_CCU_GATE(usb_hsic_12M_clk,	"usb-hsic-12M",	"osc24M",
+		      0x0cc, BIT(11), 0);
+static SUNXI_CCU_GATE(usb_ohci_clk,	"usb-ohci",	"osc24M",
+		      0x0cc, BIT(16), 0);
+
+static SUNXI_CCU_M(dram_clk, "dram", "pll-ddr",
+		   0x0f4, 0, 4, CLK_IS_CRITICAL);
+
+static const char * const pll_ddr_parents[] = { "pll-ddr0", "pll-ddr1" };
+static SUNXI_CCU_MUX(pll_ddr_clk, "pll-ddr", pll_ddr_parents,
+		     0x0f8, 16, 1, 0);
+
+static SUNXI_CCU_GATE(dram_ve_clk,	"dram-ve",	"dram",
+		      0x100, BIT(0), 0);
+static SUNXI_CCU_GATE(dram_csi_clk,	"dram-csi",	"dram",
+		      0x100, BIT(1), 0);
+static SUNXI_CCU_GATE(dram_drc_clk,	"dram-drc",	"dram",
+		      0x100, BIT(16), 0);
+static SUNXI_CCU_GATE(dram_de_fe_clk,	"dram-de-fe",	"dram",
+		      0x100, BIT(24), 0);
+static SUNXI_CCU_GATE(dram_de_be_clk,	"dram-de-be",	"dram",
+		      0x100, BIT(26), 0);
+
+static const char * const de_parents[] = { "pll-video", "pll-periph-2x",
+					   "pll-gpu", "pll-de" };
+static const u8 de_table[] = { 0, 2, 3, 5 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(de_be_clk, "de-be",
+				       de_parents, de_table,
+				       0x104, 0, 4, 24, 3, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(de_fe_clk, "de-fe",
+				       de_parents, de_table,
+				       0x10c, 0, 4, 24, 3, BIT(31), 0);
+
+static const char * const lcd_ch0_parents[] = { "pll-video", "pll-video-2x",
+						"pll-mipi" };
+static const u8 lcd_ch0_table[] = { 0, 2, 4 };
+static SUNXI_CCU_MUX_TABLE_WITH_GATE(lcd_ch0_clk, "lcd-ch0",
+				     lcd_ch0_parents, lcd_ch0_table,
+				     0x118, 24, 3, BIT(31),
+				     CLK_SET_RATE_PARENT);
+
+static const char * const lcd_ch1_parents[] = { "pll-video", "pll-video-2x" };
+static const u8 lcd_ch1_table[] = { 0, 2 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(lcd_ch1_clk, "lcd-ch1",
+				       lcd_ch1_parents, lcd_ch1_table,
+				       0x12c, 0, 4, 24, 2, BIT(31), 0);
+
+static const char * const csi_sclk_parents[] = { "pll-video", "pll-de",
+						 "pll-mipi", "pll-ve" };
+static const u8 csi_sclk_table[] = { 0, 3, 4, 5 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi_sclk_clk, "csi-sclk",
+				       csi_sclk_parents, csi_sclk_table,
+				       0x134, 16, 4, 24, 3, BIT(31), 0);
+
+static const char * const csi_mclk_parents[] = { "pll-video", "pll-de",
+						 "osc24M" };
+static const u8 csi_mclk_table[] = { 0, 3, 5 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(csi_mclk_clk, "csi-mclk",
+				       csi_mclk_parents, csi_mclk_table,
+				       0x134, 0, 5, 8, 3, BIT(15), 0);
+
+static SUNXI_CCU_M_WITH_GATE(ve_clk, "ve", "pll-ve",
+			     0x13c, 16, 3, BIT(31), CLK_SET_RATE_PARENT);
+
+static SUNXI_CCU_GATE(ac_dig_clk,	"ac-dig",	"pll-audio",
+		      0x140, BIT(31), 0);
+static SUNXI_CCU_GATE(ac_dig_4x_clk,	"ac-dig-4x",	"pll-audio-4x",
+		      0x140, BIT(30), 0);
+static SUNXI_CCU_GATE(avs_clk,		"avs",		"osc24M",
+		      0x144, BIT(31), 0);
+
+static const char * const mbus_parents[] = { "osc24M", "pll-periph-2x",
+					     "pll-ddr0", "pll-ddr1" };
+static SUNXI_CCU_M_WITH_MUX_GATE(mbus_clk, "mbus", mbus_parents,
+				 0x15c, 0, 3, 24, 2, BIT(31), CLK_IS_CRITICAL);
+
+static const char * const dsi_sclk_parents[] = { "pll-video", "pll-video-2x" };
+static const u8 dsi_sclk_table[] = { 0, 2 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(dsi_sclk_clk, "dsi-sclk",
+				       dsi_sclk_parents, dsi_sclk_table,
+				       0x168, 16, 4, 24, 2, BIT(31), 0);
+
+static const char * const dsi_dphy_parents[] = { "pll-video", "pll-periph" };
+static const u8 dsi_dphy_table[] = { 0, 2 };
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(dsi_dphy_clk, "dsi-dphy",
+				       dsi_dphy_parents, dsi_dphy_table,
+				       0x168, 0, 4, 8, 2, BIT(15), 0);
+
+static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(drc_clk, "drc",
+				       de_parents, de_table,
+				       0x180, 0, 4, 24, 3, BIT(31), 0);
+
+static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu",
+			     0x1a0, 0, 3, BIT(31), 0);
+
+static const char * const ats_parents[] = { "osc24M", "pll-periph" };
+static SUNXI_CCU_M_WITH_MUX_GATE(ats_clk, "ats", ats_parents,
+				 0x1b0, 0, 3, 24, 2, BIT(31), 0);
+
+static struct ccu_common *sun8i_a33_ccu_clks[] = {
+	&pll_cpux_clk.common,
+	&pll_audio_base_clk.common,
+	&pll_video_clk.common,
+	&pll_ve_clk.common,
+	&pll_ddr0_clk.common,
+	&pll_periph_clk.common,
+	&pll_gpu_clk.common,
+	&pll_mipi_clk.common,
+	&pll_hsic_clk.common,
+	&pll_de_clk.common,
+	&pll_ddr1_clk.common,
+	&pll_ddr_clk.common,
+	&cpux_clk.common,
+	&axi_clk.common,
+	&ahb1_clk.common,
+	&apb1_clk.common,
+	&apb2_clk.common,
+	&bus_mipi_dsi_clk.common,
+	&bus_ss_clk.common,
+	&bus_dma_clk.common,
+	&bus_mmc0_clk.common,
+	&bus_mmc1_clk.common,
+	&bus_mmc2_clk.common,
+	&bus_nand_clk.common,
+	&bus_dram_clk.common,
+	&bus_hstimer_clk.common,
+	&bus_spi0_clk.common,
+	&bus_spi1_clk.common,
+	&bus_otg_clk.common,
+	&bus_ehci_clk.common,
+	&bus_ohci_clk.common,
+	&bus_ve_clk.common,
+	&bus_lcd_clk.common,
+	&bus_csi_clk.common,
+	&bus_de_fe_clk.common,
+	&bus_de_be_clk.common,
+	&bus_gpu_clk.common,
+	&bus_msgbox_clk.common,
+	&bus_spinlock_clk.common,
+	&bus_drc_clk.common,
+	&bus_sat_clk.common,
+	&bus_codec_clk.common,
+	&bus_pio_clk.common,
+	&bus_i2s0_clk.common,
+	&bus_i2s1_clk.common,
+	&bus_i2c0_clk.common,
+	&bus_i2c1_clk.common,
+	&bus_i2c2_clk.common,
+	&bus_uart0_clk.common,
+	&bus_uart1_clk.common,
+	&bus_uart2_clk.common,
+	&bus_uart3_clk.common,
+	&bus_uart4_clk.common,
+	&nand_clk.common,
+	&mmc0_clk.common,
+	&mmc0_sample_clk.common,
+	&mmc0_output_clk.common,
+	&mmc1_clk.common,
+	&mmc1_sample_clk.common,
+	&mmc1_output_clk.common,
+	&mmc2_clk.common,
+	&mmc2_sample_clk.common,
+	&mmc2_output_clk.common,
+	&ss_clk.common,
+	&spi0_clk.common,
+	&spi1_clk.common,
+	&i2s0_clk.common,
+	&i2s1_clk.common,
+	&usb_phy0_clk.common,
+	&usb_phy1_clk.common,
+	&usb_hsic_clk.common,
+	&usb_hsic_12M_clk.common,
+	&usb_ohci_clk.common,
+	&dram_clk.common,
+	&dram_ve_clk.common,
+	&dram_csi_clk.common,
+	&dram_drc_clk.common,
+	&dram_de_fe_clk.common,
+	&dram_de_be_clk.common,
+	&de_be_clk.common,
+	&de_fe_clk.common,
+	&lcd_ch0_clk.common,
+	&lcd_ch1_clk.common,
+	&csi_sclk_clk.common,
+	&csi_mclk_clk.common,
+	&ve_clk.common,
+	&ac_dig_clk.common,
+	&ac_dig_4x_clk.common,
+	&avs_clk.common,
+	&mbus_clk.common,
+	&dsi_sclk_clk.common,
+	&dsi_dphy_clk.common,
+	&drc_clk.common,
+	&gpu_clk.common,
+	&ats_clk.common,
+};
+
+/* We hardcode the divider to 4 for now */
+static CLK_FIXED_FACTOR(pll_audio_clk, "pll-audio",
+			"pll-audio-base", 4, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_2x_clk, "pll-audio-2x",
+			"pll-audio-base", 2, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_4x_clk, "pll-audio-4x",
+			"pll-audio-base", 1, 1, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_audio_8x_clk, "pll-audio-8x",
+			"pll-audio-base", 1, 2, CLK_SET_RATE_PARENT);
+static CLK_FIXED_FACTOR(pll_periph_2x_clk, "pll-periph-2x",
+			"pll-periph", 1, 2, 0);
+static CLK_FIXED_FACTOR(pll_video_2x_clk, "pll-video-2x",
+			"pll-video", 1, 2, 0);
+
+static struct clk_hw_onecell_data sun8i_a33_hw_clks = {
+	.hws	= {
+		[CLK_PLL_CPUX]		= &pll_cpux_clk.common.hw,
+		[CLK_PLL_AUDIO_BASE]	= &pll_audio_base_clk.common.hw,
+		[CLK_PLL_AUDIO]		= &pll_audio_clk.hw,
+		[CLK_PLL_AUDIO_2X]	= &pll_audio_2x_clk.hw,
+		[CLK_PLL_AUDIO_4X]	= &pll_audio_4x_clk.hw,
+		[CLK_PLL_AUDIO_8X]	= &pll_audio_8x_clk.hw,
+		[CLK_PLL_VIDEO]		= &pll_video_clk.common.hw,
+		[CLK_PLL_VIDEO_2X]	= &pll_video_2x_clk.hw,
+		[CLK_PLL_VE]		= &pll_ve_clk.common.hw,
+		[CLK_PLL_DDR0]		= &pll_ddr0_clk.common.hw,
+		[CLK_PLL_PERIPH]	= &pll_periph_clk.common.hw,
+		[CLK_PLL_PERIPH_2X]	= &pll_periph_2x_clk.hw,
+		[CLK_PLL_GPU]		= &pll_gpu_clk.common.hw,
+		[CLK_PLL_MIPI]		= &pll_mipi_clk.common.hw,
+		[CLK_PLL_HSIC]		= &pll_hsic_clk.common.hw,
+		[CLK_PLL_DE]		= &pll_de_clk.common.hw,
+		[CLK_PLL_DDR1]		= &pll_ddr1_clk.common.hw,
+		[CLK_PLL_DDR]		= &pll_ddr_clk.common.hw,
+		[CLK_CPUX]		= &cpux_clk.common.hw,
+		[CLK_AXI]		= &axi_clk.common.hw,
+		[CLK_AHB1]		= &ahb1_clk.common.hw,
+		[CLK_APB1]		= &apb1_clk.common.hw,
+		[CLK_APB2]		= &apb2_clk.common.hw,
+		[CLK_BUS_MIPI_DSI]	= &bus_mipi_dsi_clk.common.hw,
+		[CLK_BUS_SS]		= &bus_ss_clk.common.hw,
+		[CLK_BUS_DMA]		= &bus_dma_clk.common.hw,
+		[CLK_BUS_MMC0]		= &bus_mmc0_clk.common.hw,
+		[CLK_BUS_MMC1]		= &bus_mmc1_clk.common.hw,
+		[CLK_BUS_MMC2]		= &bus_mmc2_clk.common.hw,
+		[CLK_BUS_NAND]		= &bus_nand_clk.common.hw,
+		[CLK_BUS_DRAM]		= &bus_dram_clk.common.hw,
+		[CLK_BUS_HSTIMER]	= &bus_hstimer_clk.common.hw,
+		[CLK_BUS_SPI0]		= &bus_spi0_clk.common.hw,
+		[CLK_BUS_SPI1]		= &bus_spi1_clk.common.hw,
+		[CLK_BUS_OTG]		= &bus_otg_clk.common.hw,
+		[CLK_BUS_EHCI]		= &bus_ehci_clk.common.hw,
+		[CLK_BUS_OHCI]		= &bus_ohci_clk.common.hw,
+		[CLK_BUS_VE]		= &bus_ve_clk.common.hw,
+		[CLK_BUS_LCD]		= &bus_lcd_clk.common.hw,
+		[CLK_BUS_CSI]		= &bus_csi_clk.common.hw,
+		[CLK_BUS_DE_BE]		= &bus_de_be_clk.common.hw,
+		[CLK_BUS_DE_FE]		= &bus_de_fe_clk.common.hw,
+		[CLK_BUS_GPU]		= &bus_gpu_clk.common.hw,
+		[CLK_BUS_MSGBOX]	= &bus_msgbox_clk.common.hw,
+		[CLK_BUS_SPINLOCK]	= &bus_spinlock_clk.common.hw,
+		[CLK_BUS_DRC]		= &bus_drc_clk.common.hw,
+		[CLK_BUS_SAT]		= &bus_sat_clk.common.hw,
+		[CLK_BUS_CODEC]		= &bus_codec_clk.common.hw,
+		[CLK_BUS_PIO]		= &bus_pio_clk.common.hw,
+		[CLK_BUS_I2S0]		= &bus_i2s0_clk.common.hw,
+		[CLK_BUS_I2S1]		= &bus_i2s1_clk.common.hw,
+		[CLK_BUS_I2C0]		= &bus_i2c0_clk.common.hw,
+		[CLK_BUS_I2C1]		= &bus_i2c1_clk.common.hw,
+		[CLK_BUS_I2C2]		= &bus_i2c2_clk.common.hw,
+		[CLK_BUS_UART0]		= &bus_uart0_clk.common.hw,
+		[CLK_BUS_UART1]		= &bus_uart1_clk.common.hw,
+		[CLK_BUS_UART2]		= &bus_uart2_clk.common.hw,
+		[CLK_BUS_UART3]		= &bus_uart3_clk.common.hw,
+		[CLK_BUS_UART4]		= &bus_uart4_clk.common.hw,
+		[CLK_NAND]		= &nand_clk.common.hw,
+		[CLK_MMC0]		= &mmc0_clk.common.hw,
+		[CLK_MMC0_SAMPLE]	= &mmc0_sample_clk.common.hw,
+		[CLK_MMC0_OUTPUT]	= &mmc0_output_clk.common.hw,
+		[CLK_MMC1]		= &mmc1_clk.common.hw,
+		[CLK_MMC1_SAMPLE]	= &mmc1_sample_clk.common.hw,
+		[CLK_MMC1_OUTPUT]	= &mmc1_output_clk.common.hw,
+		[CLK_MMC2]		= &mmc2_clk.common.hw,
+		[CLK_MMC2_SAMPLE]	= &mmc2_sample_clk.common.hw,
+		[CLK_MMC2_OUTPUT]	= &mmc2_output_clk.common.hw,
+		[CLK_SS]		= &ss_clk.common.hw,
+		[CLK_SPI0]		= &spi0_clk.common.hw,
+		[CLK_SPI1]		= &spi1_clk.common.hw,
+		[CLK_I2S0]		= &i2s0_clk.common.hw,
+		[CLK_I2S1]		= &i2s1_clk.common.hw,
+		[CLK_USB_PHY0]		= &usb_phy0_clk.common.hw,
+		[CLK_USB_PHY1]		= &usb_phy1_clk.common.hw,
+		[CLK_USB_HSIC]		= &usb_hsic_clk.common.hw,
+		[CLK_USB_HSIC_12M]	= &usb_hsic_12M_clk.common.hw,
+		[CLK_USB_OHCI]		= &usb_ohci_clk.common.hw,
+		[CLK_DRAM]		= &dram_clk.common.hw,
+		[CLK_DRAM_VE]		= &dram_ve_clk.common.hw,
+		[CLK_DRAM_CSI]		= &dram_csi_clk.common.hw,
+		[CLK_DRAM_DRC]		= &dram_drc_clk.common.hw,
+		[CLK_DRAM_DE_FE]	= &dram_de_fe_clk.common.hw,
+		[CLK_DRAM_DE_BE]	= &dram_de_be_clk.common.hw,
+		[CLK_DE_BE]		= &de_be_clk.common.hw,
+		[CLK_DE_FE]		= &de_fe_clk.common.hw,
+		[CLK_LCD_CH0]		= &lcd_ch0_clk.common.hw,
+		[CLK_LCD_CH1]		= &lcd_ch1_clk.common.hw,
+		[CLK_CSI_SCLK]		= &csi_sclk_clk.common.hw,
+		[CLK_CSI_MCLK]		= &csi_mclk_clk.common.hw,
+		[CLK_VE]		= &ve_clk.common.hw,
+		[CLK_AC_DIG]		= &ac_dig_clk.common.hw,
+		[CLK_AC_DIG_4X]		= &ac_dig_4x_clk.common.hw,
+		[CLK_AVS]		= &avs_clk.common.hw,
+		[CLK_MBUS]		= &mbus_clk.common.hw,
+		[CLK_DSI_SCLK]		= &dsi_sclk_clk.common.hw,
+		[CLK_DSI_DPHY]		= &dsi_dphy_clk.common.hw,
+		[CLK_DRC]		= &drc_clk.common.hw,
+		[CLK_GPU]		= &gpu_clk.common.hw,
+		[CLK_ATS]		= &ats_clk.common.hw,
+	},
+	.num	= CLK_NUMBER,
+};
+
+static struct ccu_reset_map sun8i_a33_ccu_resets[] = {
+	[RST_USB_PHY0]		=  { 0x0cc, BIT(0) },
+	[RST_USB_PHY1]		=  { 0x0cc, BIT(1) },
+	[RST_USB_HSIC]		=  { 0x0cc, BIT(2) },
+
+	[RST_MBUS]		=  { 0x0fc, BIT(31) },
+
+	[RST_BUS_MIPI_DSI]	=  { 0x2c0, BIT(1) },
+	[RST_BUS_SS]		=  { 0x2c0, BIT(5) },
+	[RST_BUS_DMA]		=  { 0x2c0, BIT(6) },
+	[RST_BUS_MMC0]		=  { 0x2c0, BIT(8) },
+	[RST_BUS_MMC1]		=  { 0x2c0, BIT(9) },
+	[RST_BUS_MMC2]		=  { 0x2c0, BIT(10) },
+	[RST_BUS_NAND]		=  { 0x2c0, BIT(13) },
+	[RST_BUS_DRAM]		=  { 0x2c0, BIT(14) },
+	[RST_BUS_HSTIMER]	=  { 0x2c0, BIT(19) },
+	[RST_BUS_SPI0]		=  { 0x2c0, BIT(20) },
+	[RST_BUS_SPI1]		=  { 0x2c0, BIT(21) },
+	[RST_BUS_OTG]		=  { 0x2c0, BIT(24) },
+	[RST_BUS_EHCI]		=  { 0x2c0, BIT(26) },
+	[RST_BUS_OHCI]		=  { 0x2c0, BIT(29) },
+
+	[RST_BUS_VE]		=  { 0x2c4, BIT(0) },
+	[RST_BUS_LCD]		=  { 0x2c4, BIT(4) },
+	[RST_BUS_CSI]		=  { 0x2c4, BIT(8) },
+	[RST_BUS_DE_BE]		=  { 0x2c4, BIT(12) },
+	[RST_BUS_DE_FE]		=  { 0x2c4, BIT(14) },
+	[RST_BUS_GPU]		=  { 0x2c4, BIT(20) },
+	[RST_BUS_MSGBOX]	=  { 0x2c4, BIT(21) },
+	[RST_BUS_SPINLOCK]	=  { 0x2c4, BIT(22) },
+	[RST_BUS_DRC]		=  { 0x2c4, BIT(25) },
+	[RST_BUS_SAT]		=  { 0x2c4, BIT(26) },
+
+	[RST_BUS_LVDS]		=  { 0x2c8, BIT(0) },
+
+	[RST_BUS_CODEC]		=  { 0x2d0, BIT(0) },
+	[RST_BUS_I2S0]		=  { 0x2d0, BIT(12) },
+	[RST_BUS_I2S1]		=  { 0x2d0, BIT(13) },
+
+	[RST_BUS_I2C0]		=  { 0x2d8, BIT(0) },
+	[RST_BUS_I2C1]		=  { 0x2d8, BIT(1) },
+	[RST_BUS_I2C2]		=  { 0x2d8, BIT(2) },
+	[RST_BUS_UART0]		=  { 0x2d8, BIT(16) },
+	[RST_BUS_UART1]		=  { 0x2d8, BIT(17) },
+	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
+	[RST_BUS_UART3]		=  { 0x2d8, BIT(19) },
+	[RST_BUS_UART4]		=  { 0x2d8, BIT(20) },
+};
+
+static const struct sunxi_ccu_desc sun8i_a33_ccu_desc = {
+	.ccu_clks	= sun8i_a33_ccu_clks,
+	.num_ccu_clks	= ARRAY_SIZE(sun8i_a33_ccu_clks),
+
+	.hw_clks	= &sun8i_a33_hw_clks,
+
+	.resets		= sun8i_a33_ccu_resets,
+	.num_resets	= ARRAY_SIZE(sun8i_a33_ccu_resets),
+};
+
+static void __init sun8i_a33_ccu_setup(struct device_node *node)
+{
+	void __iomem *reg;
+	u32 val;
+
+	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
+	if (IS_ERR(reg)) {
+		pr_err("%s: Could not map the clock registers\n",
+		       of_node_full_name(node));
+		return;
+	}
+
+	/* Force the PLL-Audio-1x divider to 4 */
+	val = readl(reg + SUN8I_A33_PLL_AUDIO_REG);
+	val &= ~GENMASK(19, 16);
+	writel(val | (3 << 16), reg + SUN8I_A33_PLL_AUDIO_REG);
+
+	/* Force PLL-MIPI to MIPI mode */
+	val = readl(reg + SUN8I_A33_PLL_MIPI_REG);
+	val &= ~BIT(16);
+	writel(val, reg + SUN8I_A33_PLL_MIPI_REG);
+
+	sunxi_ccu_probe(node, reg, &sun8i_a33_ccu_desc);
+}
+CLK_OF_DECLARE(sun8i_a33_ccu, "allwinner,sun8i-a33-ccu",
+	       sun8i_a33_ccu_setup);
diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
index 9af35954..4d70590 100644
--- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
+++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c
@@ -184,15 +184,15 @@
 			     0);
 
 static const char * const ahb2_parents[] = { "ahb1" , "pll-periph0" };
+static const struct ccu_mux_fixed_prediv ahb2_fixed_predivs[] = {
+	{ .index = 1, .div = 2 },
+};
 static struct ccu_mux ahb2_clk = {
 	.mux		= {
 		.shift	= 0,
 		.width	= 1,
-
-		.fixed_prediv	= {
-			.index	= 1,
-			.div	= 2,
-		},
+		.fixed_predivs	= ahb2_fixed_predivs,
+		.n_predivs	= ARRAY_SIZE(ahb2_fixed_predivs),
 	},
 
 	.common		= {
@@ -783,14 +783,14 @@
 	[RST_BUS_I2S1]		=  { 0x2d0, BIT(13) },
 	[RST_BUS_I2S2]		=  { 0x2d0, BIT(14) },
 
-	[RST_BUS_I2C0]		=  { 0x2d4, BIT(0) },
-	[RST_BUS_I2C1]		=  { 0x2d4, BIT(1) },
-	[RST_BUS_I2C2]		=  { 0x2d4, BIT(2) },
-	[RST_BUS_UART0]		=  { 0x2d4, BIT(16) },
-	[RST_BUS_UART1]		=  { 0x2d4, BIT(17) },
-	[RST_BUS_UART2]		=  { 0x2d4, BIT(18) },
-	[RST_BUS_UART3]		=  { 0x2d4, BIT(19) },
-	[RST_BUS_SCR]		=  { 0x2d4, BIT(20) },
+	[RST_BUS_I2C0]		=  { 0x2d8, BIT(0) },
+	[RST_BUS_I2C1]		=  { 0x2d8, BIT(1) },
+	[RST_BUS_I2C2]		=  { 0x2d8, BIT(2) },
+	[RST_BUS_UART0]		=  { 0x2d8, BIT(16) },
+	[RST_BUS_UART1]		=  { 0x2d8, BIT(17) },
+	[RST_BUS_UART2]		=  { 0x2d8, BIT(18) },
+	[RST_BUS_UART3]		=  { 0x2d8, BIT(19) },
+	[RST_BUS_SCR]		=  { 0x2d8, BIT(20) },
 };
 
 static const struct sunxi_ccu_desc sun8i_h3_ccu_desc = {
diff --git a/drivers/clk/sunxi-ng/ccu_common.c b/drivers/clk/sunxi-ng/ccu_common.c
index fc17b52..51d4bac 100644
--- a/drivers/clk/sunxi-ng/ccu_common.c
+++ b/drivers/clk/sunxi-ng/ccu_common.c
@@ -31,7 +31,7 @@
 		return;
 
 	WARN_ON(readl_relaxed_poll_timeout(common->base + common->reg, reg,
-					   !(reg & lock), 100, 70000));
+					   reg & lock, 100, 70000));
 }
 
 int sunxi_ccu_probe(struct device_node *node, void __iomem *reg,
diff --git a/drivers/clk/sunxi-ng/ccu_div.h b/drivers/clk/sunxi-ng/ccu_div.h
index 653ade5..34c3388 100644
--- a/drivers/clk/sunxi-ng/ccu_div.h
+++ b/drivers/clk/sunxi-ng/ccu_div.h
@@ -19,10 +19,29 @@
 #include "ccu_common.h"
 #include "ccu_mux.h"
 
+/**
+ * struct _ccu_div - Internal divider description
+ * @shift: Bit offset of the divider in its register
+ * @width: Width of the divider field in its register
+ * @max: Maximum value allowed for that divider. This is the
+ *       arithmetic value, not the maximum value to be set in the
+ *       register.
+ * @flags: clk_divider flags to apply on this divider
+ * @table: Divider table pointer (if applicable)
+ *
+ * That structure represents a single divider, and is meant to be
+ * embedded in other structures representing the various clock
+ * classes.
+ *
+ * It is basically a wrapper around the clk_divider functions
+ * arguments.
+ */
 struct _ccu_div {
 	u8			shift;
 	u8			width;
 
+	u32			max;
+
 	u32			flags;
 
 	struct clk_div_table	*table;
@@ -36,14 +55,25 @@
 		.table	= _table,					\
 	}
 
-#define _SUNXI_CCU_DIV_FLAGS(_shift, _width, _flags)			\
-	_SUNXI_CCU_DIV_TABLE_FLAGS(_shift, _width, NULL, _flags)
-
 #define _SUNXI_CCU_DIV_TABLE(_shift, _width, _table)			\
 	_SUNXI_CCU_DIV_TABLE_FLAGS(_shift, _width, _table, 0)
 
+#define _SUNXI_CCU_DIV_MAX_FLAGS(_shift, _width, _max, _flags) \
+	{								\
+		.shift	= _shift,					\
+		.width	= _width,					\
+		.flags	= _flags,					\
+		.max	= _max,						\
+	}
+
+#define _SUNXI_CCU_DIV_FLAGS(_shift, _width, _flags)			\
+	_SUNXI_CCU_DIV_MAX_FLAGS(_shift, _width, 0, _flags)
+
+#define _SUNXI_CCU_DIV_MAX(_shift, _width, _max)			\
+	_SUNXI_CCU_DIV_MAX_FLAGS(_shift, _width, _max, 0)
+
 #define _SUNXI_CCU_DIV(_shift, _width)					\
-	_SUNXI_CCU_DIV_TABLE_FLAGS(_shift, _width, NULL, 0)
+	_SUNXI_CCU_DIV_FLAGS(_shift, _width, 0)
 
 struct ccu_div {
 	u32			enable;
@@ -77,13 +107,16 @@
 				      _shift, _width, _table, 0,	\
 				      _flags)
 
-#define SUNXI_CCU_M_WITH_MUX_GATE(_struct, _name, _parents, _reg,	\
-				  _mshift, _mwidth, _muxshift, _muxwidth, \
-				  _gate, _flags)			\
+#define SUNXI_CCU_M_WITH_MUX_TABLE_GATE(_struct, _name,			\
+					_parents, _table,		\
+					_reg,				\
+					_mshift, _mwidth,		\
+					_muxshift, _muxwidth,		\
+					_gate, _flags)			\
 	struct ccu_div _struct = {					\
 		.enable	= _gate,					\
 		.div	= _SUNXI_CCU_DIV(_mshift, _mwidth),		\
-		.mux	= SUNXI_CLK_MUX(_muxshift, _muxwidth),		\
+		.mux	= _SUNXI_CCU_MUX_TABLE(_muxshift, _muxwidth, _table), \
 		.common	= {						\
 			.reg		= _reg,				\
 			.hw.init	= CLK_HW_INIT_PARENTS(_name,	\
@@ -93,12 +126,23 @@
 		},							\
 	}
 
+#define SUNXI_CCU_M_WITH_MUX_GATE(_struct, _name, _parents, _reg,	\
+				  _mshift, _mwidth, _muxshift, _muxwidth, \
+				  _gate, _flags)			\
+	SUNXI_CCU_M_WITH_MUX_TABLE_GATE(_struct, _name,			\
+					_parents, NULL,			\
+					_reg, _mshift, _mwidth,		\
+					_muxshift, _muxwidth,		\
+					_gate, _flags)
+
 #define SUNXI_CCU_M_WITH_MUX(_struct, _name, _parents, _reg,		\
 			     _mshift, _mwidth, _muxshift, _muxwidth,	\
 			     _flags)					\
-	SUNXI_CCU_M_WITH_MUX_GATE(_struct, _name, _parents, _reg,	\
-				  _mshift, _mwidth, _muxshift, _muxwidth, \
-				  0, _flags)
+	SUNXI_CCU_M_WITH_MUX_TABLE_GATE(_struct, _name,			\
+					_parents, NULL,			\
+					_reg, _mshift, _mwidth,		\
+					_muxshift, _muxwidth,		\
+					0, _flags)
 
 
 #define SUNXI_CCU_M_WITH_GATE(_struct, _name, _parent, _reg,		\
diff --git a/drivers/clk/sunxi-ng/ccu_mp.c b/drivers/clk/sunxi-ng/ccu_mp.c
index cbf33ef..ebb1b31 100644
--- a/drivers/clk/sunxi-ng/ccu_mp.c
+++ b/drivers/clk/sunxi-ng/ccu_mp.c
@@ -21,9 +21,9 @@
 	unsigned int best_m = 0, best_p = 0;
 	unsigned int _m, _p;
 
-	for (_p = 0; _p <= max_p; _p++) {
+	for (_p = 1; _p <= max_p; _p <<= 1) {
 		for (_m = 1; _m <= max_m; _m++) {
-			unsigned long tmp_rate = (parent >> _p) / _m;
+			unsigned long tmp_rate = parent / _p / _m;
 
 			if (tmp_rate > rate)
 				continue;
@@ -46,13 +46,15 @@
 				       void *data)
 {
 	struct ccu_mp *cmp = data;
+	unsigned int max_m, max_p;
 	unsigned int m, p;
 
-	ccu_mp_find_best(parent_rate, rate,
-			 1 << cmp->m.width, (1 << cmp->p.width) - 1,
-			 &m, &p);
+	max_m = cmp->m.max ?: 1 << cmp->m.width;
+	max_p = cmp->p.max ?: 1 << ((1 << cmp->p.width) - 1);
 
-	return (parent_rate >> p) / m;
+	ccu_mp_find_best(parent_rate, rate, max_m, max_p, &m, &p);
+
+	return parent_rate / p / m;
 }
 
 static void ccu_mp_disable(struct clk_hw *hw)
@@ -108,13 +110,14 @@
 {
 	struct ccu_mp *cmp = hw_to_ccu_mp(hw);
 	unsigned long flags;
+	unsigned int max_m, max_p;
 	unsigned int m, p;
 	u32 reg;
 
-	ccu_mp_find_best(parent_rate, rate,
-			 1 << cmp->m.width, (1 << cmp->p.width) - 1,
-			 &m, &p);
+	max_m = cmp->m.max ?: 1 << cmp->m.width;
+	max_p = cmp->p.max ?: 1 << ((1 << cmp->p.width) - 1);
 
+	ccu_mp_find_best(parent_rate, rate, max_m, max_p, &m, &p);
 
 	spin_lock_irqsave(cmp->common.lock, flags);
 
@@ -122,7 +125,7 @@
 	reg &= ~GENMASK(cmp->m.width + cmp->m.shift - 1, cmp->m.shift);
 	reg &= ~GENMASK(cmp->p.width + cmp->p.shift - 1, cmp->p.shift);
 
-	writel(reg | (p << cmp->p.shift) | ((m - 1) << cmp->m.shift),
+	writel(reg | (ilog2(p) << cmp->p.shift) | ((m - 1) << cmp->m.shift),
 	       cmp->common.base + cmp->common.reg);
 
 	spin_unlock_irqrestore(cmp->common.lock, flags);
diff --git a/drivers/clk/sunxi-ng/ccu_mp.h b/drivers/clk/sunxi-ng/ccu_mp.h
index 3cf12bf..edf9215 100644
--- a/drivers/clk/sunxi-ng/ccu_mp.h
+++ b/drivers/clk/sunxi-ng/ccu_mp.h
@@ -44,7 +44,7 @@
 		.enable	= _gate,					\
 		.m	= _SUNXI_CCU_DIV(_mshift, _mwidth),		\
 		.p	= _SUNXI_CCU_DIV(_pshift, _pwidth),		\
-		.mux	= SUNXI_CLK_MUX(_muxshift, _muxwidth),		\
+		.mux	= _SUNXI_CCU_MUX(_muxshift, _muxwidth),		\
 		.common	= {						\
 			.reg		= _reg,				\
 			.hw.init	= CLK_HW_INIT_PARENTS(_name,	\
diff --git a/drivers/clk/sunxi-ng/ccu_mult.c b/drivers/clk/sunxi-ng/ccu_mult.c
new file mode 100644
index 0000000..010e942
--- /dev/null
+++ b/drivers/clk/sunxi-ng/ccu_mult.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2016 Maxime Ripard
+ * Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+#include <linux/clk-provider.h>
+
+#include "ccu_gate.h"
+#include "ccu_mult.h"
+
+static void ccu_mult_find_best(unsigned long parent, unsigned long rate,
+			       unsigned int max_n, unsigned int *n)
+{
+	*n = rate / parent;
+}
+
+static unsigned long ccu_mult_round_rate(struct ccu_mux_internal *mux,
+					unsigned long parent_rate,
+					unsigned long rate,
+					void *data)
+{
+	struct ccu_mult *cm = data;
+	unsigned int n;
+
+	ccu_mult_find_best(parent_rate, rate, 1 << cm->mult.width, &n);
+
+	return parent_rate * n;
+}
+
+static void ccu_mult_disable(struct clk_hw *hw)
+{
+	struct ccu_mult *cm = hw_to_ccu_mult(hw);
+
+	return ccu_gate_helper_disable(&cm->common, cm->enable);
+}
+
+static int ccu_mult_enable(struct clk_hw *hw)
+{
+	struct ccu_mult *cm = hw_to_ccu_mult(hw);
+
+	return ccu_gate_helper_enable(&cm->common, cm->enable);
+}
+
+static int ccu_mult_is_enabled(struct clk_hw *hw)
+{
+	struct ccu_mult *cm = hw_to_ccu_mult(hw);
+
+	return ccu_gate_helper_is_enabled(&cm->common, cm->enable);
+}
+
+static unsigned long ccu_mult_recalc_rate(struct clk_hw *hw,
+					unsigned long parent_rate)
+{
+	struct ccu_mult *cm = hw_to_ccu_mult(hw);
+	unsigned long val;
+	u32 reg;
+
+	reg = readl(cm->common.base + cm->common.reg);
+	val = reg >> cm->mult.shift;
+	val &= (1 << cm->mult.width) - 1;
+
+	ccu_mux_helper_adjust_parent_for_prediv(&cm->common, &cm->mux, -1,
+						&parent_rate);
+
+	return parent_rate * (val + 1);
+}
+
+static int ccu_mult_determine_rate(struct clk_hw *hw,
+				struct clk_rate_request *req)
+{
+	struct ccu_mult *cm = hw_to_ccu_mult(hw);
+
+	return ccu_mux_helper_determine_rate(&cm->common, &cm->mux,
+					     req, ccu_mult_round_rate, cm);
+}
+
+static int ccu_mult_set_rate(struct clk_hw *hw, unsigned long rate,
+			   unsigned long parent_rate)
+{
+	struct ccu_mult *cm = hw_to_ccu_mult(hw);
+	unsigned long flags;
+	unsigned int n;
+	u32 reg;
+
+	ccu_mux_helper_adjust_parent_for_prediv(&cm->common, &cm->mux, -1,
+						&parent_rate);
+
+	ccu_mult_find_best(parent_rate, rate, 1 << cm->mult.width, &n);
+
+	spin_lock_irqsave(cm->common.lock, flags);
+
+	reg = readl(cm->common.base + cm->common.reg);
+	reg &= ~GENMASK(cm->mult.width + cm->mult.shift - 1, cm->mult.shift);
+
+	writel(reg | ((n - 1) << cm->mult.shift),
+	       cm->common.base + cm->common.reg);
+
+	spin_unlock_irqrestore(cm->common.lock, flags);
+
+	return 0;
+}
+
+static u8 ccu_mult_get_parent(struct clk_hw *hw)
+{
+	struct ccu_mult *cm = hw_to_ccu_mult(hw);
+
+	return ccu_mux_helper_get_parent(&cm->common, &cm->mux);
+}
+
+static int ccu_mult_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct ccu_mult *cm = hw_to_ccu_mult(hw);
+
+	return ccu_mux_helper_set_parent(&cm->common, &cm->mux, index);
+}
+
+const struct clk_ops ccu_mult_ops = {
+	.disable	= ccu_mult_disable,
+	.enable		= ccu_mult_enable,
+	.is_enabled	= ccu_mult_is_enabled,
+
+	.get_parent	= ccu_mult_get_parent,
+	.set_parent	= ccu_mult_set_parent,
+
+	.determine_rate	= ccu_mult_determine_rate,
+	.recalc_rate	= ccu_mult_recalc_rate,
+	.set_rate	= ccu_mult_set_rate,
+};
diff --git a/drivers/clk/sunxi-ng/ccu_mult.h b/drivers/clk/sunxi-ng/ccu_mult.h
index 609db66..5d2c8dc 100644
--- a/drivers/clk/sunxi-ng/ccu_mult.h
+++ b/drivers/clk/sunxi-ng/ccu_mult.h
@@ -1,6 +1,9 @@
 #ifndef _CCU_MULT_H_
 #define _CCU_MULT_H_
 
+#include "ccu_common.h"
+#include "ccu_mux.h"
+
 struct _ccu_mult {
 	u8	shift;
 	u8	width;
@@ -12,4 +15,36 @@
 		.width	= _width,		\
 	}
 
+struct ccu_mult {
+	u32			enable;
+
+	struct _ccu_mult	mult;
+	struct ccu_mux_internal	mux;
+	struct ccu_common	common;
+};
+
+#define SUNXI_CCU_N_WITH_GATE_LOCK(_struct, _name, _parent, _reg,	\
+				   _mshift, _mwidth, _gate, _lock,	\
+				   _flags)				\
+	struct ccu_mult _struct = {					\
+		.enable	= _gate,					\
+		.mult	= _SUNXI_CCU_MULT(_mshift, _mwidth),		\
+		.common	= {						\
+			.reg		= _reg,				\
+			.hw.init	= CLK_HW_INIT(_name,		\
+						      _parent,		\
+						      &ccu_mult_ops,	\
+						      _flags),		\
+		},							\
+	}
+
+static inline struct ccu_mult *hw_to_ccu_mult(struct clk_hw *hw)
+{
+	struct ccu_common *common = hw_to_ccu_common(hw);
+
+	return container_of(common, struct ccu_mult, common);
+}
+
+extern const struct clk_ops ccu_mult_ops;
+
 #endif /* _CCU_MULT_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu_mux.c b/drivers/clk/sunxi-ng/ccu_mux.c
index 58fc36e..a43ad52 100644
--- a/drivers/clk/sunxi-ng/ccu_mux.c
+++ b/drivers/clk/sunxi-ng/ccu_mux.c
@@ -8,7 +8,9 @@
  * the License, or (at your option) any later version.
  */
 
+#include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/delay.h>
 
 #include "ccu_gate.h"
 #include "ccu_mux.h"
@@ -18,8 +20,9 @@
 					     int parent_index,
 					     unsigned long *parent_rate)
 {
-	u8 prediv = 1;
+	u16 prediv = 1;
 	u32 reg;
+	int i;
 
 	if (!((common->features & CCU_FEATURE_FIXED_PREDIV) ||
 	      (common->features & CCU_FEATURE_VARIABLE_PREDIV)))
@@ -32,8 +35,9 @@
 	}
 
 	if (common->features & CCU_FEATURE_FIXED_PREDIV)
-		if (parent_index == cm->fixed_prediv.index)
-			prediv = cm->fixed_prediv.div;
+		for (i = 0; i < cm->n_predivs; i++)
+			if (parent_index == cm->fixed_predivs[i].index)
+				prediv = cm->fixed_predivs[i].div;
 
 	if (common->features & CCU_FEATURE_VARIABLE_PREDIV)
 		if (parent_index == cm->variable_prediv.index) {
@@ -107,6 +111,15 @@
 	parent = reg >> cm->shift;
 	parent &= (1 << cm->width) - 1;
 
+	if (cm->table) {
+		int num_parents = clk_hw_get_num_parents(&common->hw);
+		int i;
+
+		for (i = 0; i < num_parents; i++)
+			if (cm->table[i] == parent)
+				return i;
+	}
+
 	return parent;
 }
 
@@ -117,6 +130,9 @@
 	unsigned long flags;
 	u32 reg;
 
+	if (cm->table)
+		index = cm->table[index];
+
 	spin_lock_irqsave(common->lock, flags);
 
 	reg = readl(common->base + common->reg);
@@ -185,3 +201,37 @@
 	.determine_rate	= __clk_mux_determine_rate,
 	.recalc_rate	= ccu_mux_recalc_rate,
 };
+
+/*
+ * This clock notifier is called when the frequency of the of the parent
+ * PLL clock is to be changed. The idea is to switch the parent to a
+ * stable clock, such as the main oscillator, while the PLL frequency
+ * stabilizes.
+ */
+static int ccu_mux_notifier_cb(struct notifier_block *nb,
+			       unsigned long event, void *data)
+{
+	struct ccu_mux_nb *mux = to_ccu_mux_nb(nb);
+	int ret = 0;
+
+	if (event == PRE_RATE_CHANGE) {
+		mux->original_index = ccu_mux_helper_get_parent(mux->common,
+								mux->cm);
+		ret = ccu_mux_helper_set_parent(mux->common, mux->cm,
+						mux->bypass_index);
+	} else if (event == POST_RATE_CHANGE) {
+		ret = ccu_mux_helper_set_parent(mux->common, mux->cm,
+						mux->original_index);
+	}
+
+	udelay(mux->delay_us);
+
+	return notifier_from_errno(ret);
+}
+
+int ccu_mux_notifier_register(struct clk *clk, struct ccu_mux_nb *mux_nb)
+{
+	mux_nb->clk_nb.notifier_call = ccu_mux_notifier_cb;
+
+	return clk_notifier_register(clk, &mux_nb->clk_nb);
+}
diff --git a/drivers/clk/sunxi-ng/ccu_mux.h b/drivers/clk/sunxi-ng/ccu_mux.h
index 9450826..47aba3a 100644
--- a/drivers/clk/sunxi-ng/ccu_mux.h
+++ b/drivers/clk/sunxi-ng/ccu_mux.h
@@ -5,14 +5,18 @@
 
 #include "ccu_common.h"
 
-struct ccu_mux_internal {
-	u8	shift;
-	u8	width;
+struct ccu_mux_fixed_prediv {
+	u8	index;
+	u16	div;
+};
 
-	struct {
-		u8	index;
-		u8	div;
-	} fixed_prediv;
+struct ccu_mux_internal {
+	u8		shift;
+	u8		width;
+	const u8	*table;
+
+	const struct ccu_mux_fixed_prediv	*fixed_predivs;
+	u8		n_predivs;
 
 	struct {
 		u8	index;
@@ -21,12 +25,16 @@
 	} variable_prediv;
 };
 
-#define SUNXI_CLK_MUX(_shift, _width)	\
-	{					\
-		.shift	= _shift,		\
-		.width	= _width,		\
+#define _SUNXI_CCU_MUX_TABLE(_shift, _width, _table)	\
+	{						\
+		.shift	= _shift,			\
+		.width	= _width,			\
+		.table	= _table,			\
 	}
 
+#define _SUNXI_CCU_MUX(_shift, _width) \
+	_SUNXI_CCU_MUX_TABLE(_shift, _width, NULL)
+
 struct ccu_mux {
 	u16			reg;
 	u32			enable;
@@ -35,9 +43,12 @@
 	struct ccu_common	common;
 };
 
-#define SUNXI_CCU_MUX(_struct, _name, _parents, _reg, _shift, _width, _flags) \
+#define SUNXI_CCU_MUX_TABLE_WITH_GATE(_struct, _name, _parents, _table,	\
+				     _reg, _shift, _width, _gate,	\
+				     _flags)				\
 	struct ccu_mux _struct = {					\
-		.mux	= SUNXI_CLK_MUX(_shift, _width),		\
+		.enable	= _gate,					\
+		.mux	= _SUNXI_CCU_MUX_TABLE(_shift, _width, _table),	\
 		.common	= {						\
 			.reg		= _reg,				\
 			.hw.init	= CLK_HW_INIT_PARENTS(_name,	\
@@ -49,17 +60,14 @@
 
 #define SUNXI_CCU_MUX_WITH_GATE(_struct, _name, _parents, _reg,		\
 				_shift, _width, _gate, _flags)		\
-	struct ccu_mux _struct = {					\
-		.enable	= _gate,					\
-		.mux	= SUNXI_CLK_MUX(_shift, _width),		\
-		.common	= {						\
-			.reg		= _reg,				\
-			.hw.init	= CLK_HW_INIT_PARENTS(_name,	\
-							      _parents, \
-							      &ccu_mux_ops, \
-							      _flags),	\
-		}							\
-	}
+	SUNXI_CCU_MUX_TABLE_WITH_GATE(_struct, _name, _parents, NULL,	\
+				      _reg, _shift, _width, _gate,	\
+				      _flags)
+
+#define SUNXI_CCU_MUX(_struct, _name, _parents, _reg, _shift, _width,	\
+		      _flags)						\
+	SUNXI_CCU_MUX_TABLE_WITH_GATE(_struct, _name, _parents, NULL,	\
+				      _reg, _shift, _width, 0, _flags)
 
 static inline struct ccu_mux *hw_to_ccu_mux(struct clk_hw *hw)
 {
@@ -88,4 +96,18 @@
 			      struct ccu_mux_internal *cm,
 			      u8 index);
 
+struct ccu_mux_nb {
+	struct notifier_block	clk_nb;
+	struct ccu_common	*common;
+	struct ccu_mux_internal	*cm;
+
+	u32	delay_us;	/* How many us to wait after reparenting */
+	u8	bypass_index;	/* Which parent to temporarily use */
+	u8	original_index;	/* This is set by the notifier callback */
+};
+
+#define to_ccu_mux_nb(_nb) container_of(_nb, struct ccu_mux_nb, clk_nb)
+
+int ccu_mux_notifier_register(struct clk *clk, struct ccu_mux_nb *mux_nb);
+
 #endif /* _CCU_MUX_H_ */
diff --git a/drivers/clk/sunxi-ng/ccu_nk.c b/drivers/clk/sunxi-ng/ccu_nk.c
index 4470ffc..d6fafb3 100644
--- a/drivers/clk/sunxi-ng/ccu_nk.c
+++ b/drivers/clk/sunxi-ng/ccu_nk.c
@@ -14,9 +14,9 @@
 #include "ccu_gate.h"
 #include "ccu_nk.h"
 
-void ccu_nk_find_best(unsigned long parent, unsigned long rate,
-		      unsigned int max_n, unsigned int max_k,
-		      unsigned int *n, unsigned int *k)
+static void ccu_nk_find_best(unsigned long parent, unsigned long rate,
+			     unsigned int max_n, unsigned int max_k,
+			     unsigned int *n, unsigned int *k)
 {
 	unsigned long best_rate = 0;
 	unsigned int best_k = 0, best_n = 0;
diff --git a/drivers/clk/sunxi-ng/ccu_nkm.c b/drivers/clk/sunxi-ng/ccu_nkm.c
index 2071822..059fdc3 100644
--- a/drivers/clk/sunxi-ng/ccu_nkm.c
+++ b/drivers/clk/sunxi-ng/ccu_nkm.c
@@ -93,19 +93,30 @@
 	return parent_rate * (n + 1) * (k + 1) / (m + 1);
 }
 
-static long ccu_nkm_round_rate(struct clk_hw *hw, unsigned long rate,
-			      unsigned long *parent_rate)
+static unsigned long ccu_nkm_round_rate(struct ccu_mux_internal *mux,
+					unsigned long parent_rate,
+					unsigned long rate,
+					void *data)
 {
-	struct ccu_nkm *nkm = hw_to_ccu_nkm(hw);
+	struct ccu_nkm *nkm = data;
 	struct _ccu_nkm _nkm;
 
 	_nkm.max_n = 1 << nkm->n.width;
 	_nkm.max_k = 1 << nkm->k.width;
-	_nkm.max_m = 1 << nkm->m.width;
+	_nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
 
-	ccu_nkm_find_best(*parent_rate, rate, &_nkm);
+	ccu_nkm_find_best(parent_rate, rate, &_nkm);
 
-	return *parent_rate * _nkm.n * _nkm.k / _nkm.m;
+	return parent_rate * _nkm.n * _nkm.k / _nkm.m;
+}
+
+static int ccu_nkm_determine_rate(struct clk_hw *hw,
+				  struct clk_rate_request *req)
+{
+	struct ccu_nkm *nkm = hw_to_ccu_nkm(hw);
+
+	return ccu_mux_helper_determine_rate(&nkm->common, &nkm->mux,
+					     req, ccu_nkm_round_rate, nkm);
 }
 
 static int ccu_nkm_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -118,7 +129,7 @@
 
 	_nkm.max_n = 1 << nkm->n.width;
 	_nkm.max_k = 1 << nkm->k.width;
-	_nkm.max_m = 1 << nkm->m.width;
+	_nkm.max_m = nkm->m.max ?: 1 << nkm->m.width;
 
 	ccu_nkm_find_best(parent_rate, rate, &_nkm);
 
@@ -142,12 +153,29 @@
 	return 0;
 }
 
+static u8 ccu_nkm_get_parent(struct clk_hw *hw)
+{
+	struct ccu_nkm *nkm = hw_to_ccu_nkm(hw);
+
+	return ccu_mux_helper_get_parent(&nkm->common, &nkm->mux);
+}
+
+static int ccu_nkm_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct ccu_nkm *nkm = hw_to_ccu_nkm(hw);
+
+	return ccu_mux_helper_set_parent(&nkm->common, &nkm->mux, index);
+}
+
 const struct clk_ops ccu_nkm_ops = {
 	.disable	= ccu_nkm_disable,
 	.enable		= ccu_nkm_enable,
 	.is_enabled	= ccu_nkm_is_enabled,
 
+	.get_parent	= ccu_nkm_get_parent,
+	.set_parent	= ccu_nkm_set_parent,
+
+	.determine_rate	= ccu_nkm_determine_rate,
 	.recalc_rate	= ccu_nkm_recalc_rate,
-	.round_rate	= ccu_nkm_round_rate,
 	.set_rate	= ccu_nkm_set_rate,
 };
diff --git a/drivers/clk/sunxi-ng/ccu_nkm.h b/drivers/clk/sunxi-ng/ccu_nkm.h
index 1936ac1..35493fd 100644
--- a/drivers/clk/sunxi-ng/ccu_nkm.h
+++ b/drivers/clk/sunxi-ng/ccu_nkm.h
@@ -32,10 +32,33 @@
 	struct _ccu_mult	n;
 	struct _ccu_mult	k;
 	struct _ccu_div		m;
+	struct ccu_mux_internal	mux;
 
 	struct ccu_common	common;
 };
 
+#define SUNXI_CCU_NKM_WITH_MUX_GATE_LOCK(_struct, _name, _parents, _reg, \
+					 _nshift, _nwidth,		\
+					 _kshift, _kwidth,		\
+					 _mshift, _mwidth,		\
+					 _muxshift, _muxwidth,		\
+					 _gate, _lock, _flags)		\
+	struct ccu_nkm _struct = {					\
+		.enable		= _gate,				\
+		.lock		= _lock,				\
+		.k		= _SUNXI_CCU_MULT(_kshift, _kwidth),	\
+		.n		= _SUNXI_CCU_MULT(_nshift, _nwidth),	\
+		.m		= _SUNXI_CCU_DIV(_mshift, _mwidth),	\
+		.mux		= _SUNXI_CCU_MUX(_muxshift, _muxwidth),	\
+		.common		= {					\
+			.reg		= _reg,				\
+			.hw.init	= CLK_HW_INIT_PARENTS(_name,	\
+						      _parents,		\
+						      &ccu_nkm_ops,	\
+						      _flags),		\
+		},							\
+	}
+
 #define SUNXI_CCU_NKM_WITH_GATE_LOCK(_struct, _name, _parent, _reg,	\
 				     _nshift, _nwidth,			\
 				     _kshift, _kwidth,			\
diff --git a/drivers/clk/sunxi-ng/ccu_nkmp.c b/drivers/clk/sunxi-ng/ccu_nkmp.c
index 9f2b98e..9769dee 100644
--- a/drivers/clk/sunxi-ng/ccu_nkmp.c
+++ b/drivers/clk/sunxi-ng/ccu_nkmp.c
@@ -29,14 +29,14 @@
 	unsigned long _n, _k, _m, _p;
 
 	for (_k = 1; _k <= nkmp->max_k; _k++) {
-		for (_p = 0; _p <= nkmp->max_p; _p++) {
+		for (_p = 1; _p <= nkmp->max_p; _p <<= 1) {
 			unsigned long tmp_rate;
 
-			rational_best_approximation(rate / _k, parent >> _p,
+			rational_best_approximation(rate / _k, parent / _p,
 						    nkmp->max_n, nkmp->max_m,
 						    &_n, &_m);
 
-			tmp_rate = (parent * _n * _k >> _p) / _m;
+			tmp_rate = parent * _n * _k / (_m * _p);
 
 			if (tmp_rate > rate)
 				continue;
@@ -110,13 +110,12 @@
 
 	_nkmp.max_n = 1 << nkmp->n.width;
 	_nkmp.max_k = 1 << nkmp->k.width;
-	_nkmp.max_m = 1 << nkmp->m.width;
-	_nkmp.max_p = (1 << nkmp->p.width) - 1;
+	_nkmp.max_m = nkmp->m.max ?: 1 << nkmp->m.width;
+	_nkmp.max_p = nkmp->p.max ?: 1 << ((1 << nkmp->p.width) - 1);
 
-	ccu_nkmp_find_best(*parent_rate, rate,
-			   &_nkmp);
+	ccu_nkmp_find_best(*parent_rate, rate, &_nkmp);
 
-	return (*parent_rate * _nkmp.n * _nkmp.k >> _nkmp.p) / _nkmp.m;
+	return *parent_rate * _nkmp.n * _nkmp.k / (_nkmp.m * _nkmp.p);
 }
 
 static int ccu_nkmp_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -129,8 +128,8 @@
 
 	_nkmp.max_n = 1 << nkmp->n.width;
 	_nkmp.max_k = 1 << nkmp->k.width;
-	_nkmp.max_m = 1 << nkmp->m.width;
-	_nkmp.max_p = (1 << nkmp->p.width) - 1;
+	_nkmp.max_m = nkmp->m.max ?: 1 << nkmp->m.width;
+	_nkmp.max_p = nkmp->p.max ?: 1 << ((1 << nkmp->p.width) - 1);
 
 	ccu_nkmp_find_best(parent_rate, rate, &_nkmp);
 
@@ -145,7 +144,7 @@
 	reg |= (_nkmp.n - 1) << nkmp->n.shift;
 	reg |= (_nkmp.k - 1) << nkmp->k.shift;
 	reg |= (_nkmp.m - 1) << nkmp->m.shift;
-	reg |= _nkmp.p << nkmp->p.shift;
+	reg |= ilog2(_nkmp.p) << nkmp->p.shift;
 
 	writel(reg, nkmp->common.base + nkmp->common.reg);
 
diff --git a/drivers/clk/sunxi-ng/ccu_nm.c b/drivers/clk/sunxi-ng/ccu_nm.c
index e35ddd8..b61bdd8 100644
--- a/drivers/clk/sunxi-ng/ccu_nm.c
+++ b/drivers/clk/sunxi-ng/ccu_nm.c
@@ -61,11 +61,13 @@
 			      unsigned long *parent_rate)
 {
 	struct ccu_nm *nm = hw_to_ccu_nm(hw);
+	unsigned long max_n, max_m;
 	unsigned long n, m;
 
-	rational_best_approximation(rate, *parent_rate,
-				    1 << nm->n.width, 1 << nm->m.width,
-				    &n, &m);
+	max_n = 1 << nm->n.width;
+	max_m = nm->m.max ?: 1 << nm->m.width;
+
+	rational_best_approximation(rate, *parent_rate, max_n, max_m, &n, &m);
 
 	return *parent_rate * n / m;
 }
@@ -75,6 +77,7 @@
 {
 	struct ccu_nm *nm = hw_to_ccu_nm(hw);
 	unsigned long flags;
+	unsigned long max_n, max_m;
 	unsigned long n, m;
 	u32 reg;
 
@@ -83,9 +86,10 @@
 	else
 		ccu_frac_helper_disable(&nm->common, &nm->frac);
 
-	rational_best_approximation(rate, parent_rate,
-				    1 << nm->n.width, 1 << nm->m.width,
-				    &n, &m);
+	max_n = 1 << nm->n.width;
+	max_m = nm->m.max ?: 1 << nm->m.width;
+
+	rational_best_approximation(rate, parent_rate, max_n, max_m, &n, &m);
 
 	spin_lock_irqsave(nm->common.lock, flags);
 
diff --git a/drivers/clk/sunxi/clk-a10-pll2.c b/drivers/clk/sunxi/clk-a10-pll2.c
index 0ee1f36..d8eab90 100644
--- a/drivers/clk/sunxi/clk-a10-pll2.c
+++ b/drivers/clk/sunxi/clk-a10-pll2.c
@@ -73,7 +73,7 @@
 					  SUN4I_PLL2_PRE_DIV_WIDTH,
 					  CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
 					  &sun4i_a10_pll2_lock);
-	if (!prediv_clk) {
+	if (IS_ERR(prediv_clk)) {
 		pr_err("Couldn't register the prediv clock\n");
 		goto err_free_array;
 	}
@@ -106,7 +106,7 @@
 					  &mult->hw, &clk_multiplier_ops,
 					  &gate->hw, &clk_gate_ops,
 					  CLK_SET_RATE_PARENT);
-	if (!base_clk) {
+	if (IS_ERR(base_clk)) {
 		pr_err("Couldn't register the base multiplier clock\n");
 		goto err_free_multiplier;
 	}
diff --git a/drivers/clk/sunxi/clk-mod0.c b/drivers/clk/sunxi/clk-mod0.c
index b38d71c..e54266c 100644
--- a/drivers/clk/sunxi/clk-mod0.c
+++ b/drivers/clk/sunxi/clk-mod0.c
@@ -91,7 +91,8 @@
 	sunxi_factors_register(node, &sun4i_a10_mod0_data,
 			       &sun4i_a10_mod0_lock, reg);
 }
-CLK_OF_DECLARE(sun4i_a10_mod0, "allwinner,sun4i-a10-mod0-clk", sun4i_a10_mod0_setup);
+CLK_OF_DECLARE_DRIVER(sun4i_a10_mod0, "allwinner,sun4i-a10-mod0-clk",
+		      sun4i_a10_mod0_setup);
 
 static int sun4i_a10_mod0_clk_probe(struct platform_device *pdev)
 {
diff --git a/drivers/clk/sunxi/clk-sun8i-apb0.c b/drivers/clk/sunxi/clk-sun8i-apb0.c
index a5666e1..ea1eed2 100644
--- a/drivers/clk/sunxi/clk-sun8i-apb0.c
+++ b/drivers/clk/sunxi/clk-sun8i-apb0.c
@@ -82,8 +82,8 @@
 	of_address_to_resource(node, 0, &res);
 	release_mem_region(res.start, resource_size(&res));
 }
-CLK_OF_DECLARE(sun8i_a23_apb0, "allwinner,sun8i-a23-apb0-clk",
-	       sun8i_a23_apb0_setup);
+CLK_OF_DECLARE_DRIVER(sun8i_a23_apb0, "allwinner,sun8i-a23-apb0-clk",
+		      sun8i_a23_apb0_setup);
 
 static int sun8i_a23_apb0_clk_probe(struct platform_device *pdev)
 {
diff --git a/drivers/clk/sunxi/clk-sun8i-mbus.c b/drivers/clk/sunxi/clk-sun8i-mbus.c
index 411d303..b200ebf 100644
--- a/drivers/clk/sunxi/clk-sun8i-mbus.c
+++ b/drivers/clk/sunxi/clk-sun8i-mbus.c
@@ -48,7 +48,7 @@
 		return;
 
 	reg = of_io_request_and_map(node, 0, of_node_full_name(node));
-	if (!reg) {
+	if (IS_ERR(reg)) {
 		pr_err("Could not get registers for sun8i-mbus-clk\n");
 		goto err_free_parents;
 	}
diff --git a/drivers/clk/tegra/clk-tegra114.c b/drivers/clk/tegra/clk-tegra114.c
index 64da7b7..933b5dd 100644
--- a/drivers/clk/tegra/clk-tegra114.c
+++ b/drivers/clk/tegra/clk-tegra114.c
@@ -428,7 +428,7 @@
 	.div_nmp = &pllp_nmp,
 	.freq_table = pll_d_freq_table,
 	.flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON |
-		 TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+		 TEGRA_PLL_HAS_LOCK_ENABLE,
 };
 
 static struct tegra_clk_pll_params pll_d2_params = {
@@ -446,7 +446,7 @@
 	.div_nmp = &pllp_nmp,
 	.freq_table = pll_d_freq_table,
 	.flags = TEGRA_PLL_HAS_CPCON | TEGRA_PLL_SET_LFCON |
-		 TEGRA_PLL_USE_LOCK | TEGRA_PLL_HAS_LOCK_ENABLE,
+		 TEGRA_PLL_HAS_LOCK_ENABLE,
 };
 
 static const struct pdiv_map pllu_p[] = {
diff --git a/drivers/clk/uniphier/Kconfig b/drivers/clk/uniphier/Kconfig
new file mode 100644
index 0000000..5512377
--- /dev/null
+++ b/drivers/clk/uniphier/Kconfig
@@ -0,0 +1,9 @@
+config CLK_UNIPHIER
+	bool "Clock driver for UniPhier SoCs"
+	depends on ARCH_UNIPHIER || COMPILE_TEST
+	depends on OF && MFD_SYSCON
+	default ARCH_UNIPHIER
+	help
+	  Support for clock controllers on UniPhier SoCs.
+	  Say Y if you want to control clocks provided by System Control
+	  block, Media I/O block, Peripheral Block.
diff --git a/drivers/clk/uniphier/Makefile b/drivers/clk/uniphier/Makefile
new file mode 100644
index 0000000..f27b3603
--- /dev/null
+++ b/drivers/clk/uniphier/Makefile
@@ -0,0 +1,8 @@
+obj-y	+= clk-uniphier-core.o
+obj-y	+= clk-uniphier-fixed-factor.o
+obj-y	+= clk-uniphier-fixed-rate.o
+obj-y	+= clk-uniphier-gate.o
+obj-y	+= clk-uniphier-mux.o
+obj-y	+= clk-uniphier-sys.o
+obj-y	+= clk-uniphier-mio.o
+obj-y	+= clk-uniphier-peri.o
diff --git a/drivers/clk/uniphier/clk-uniphier-core.c b/drivers/clk/uniphier/clk-uniphier-core.c
new file mode 100644
index 0000000..5ffb898
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier-core.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/init.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include "clk-uniphier.h"
+
+static struct clk_hw *uniphier_clk_register(struct device *dev,
+					    struct regmap *regmap,
+					const struct uniphier_clk_data *data)
+{
+	switch (data->type) {
+	case UNIPHIER_CLK_TYPE_FIXED_FACTOR:
+		return uniphier_clk_register_fixed_factor(dev, data->name,
+							  &data->data.factor);
+	case UNIPHIER_CLK_TYPE_FIXED_RATE:
+		return uniphier_clk_register_fixed_rate(dev, data->name,
+							&data->data.rate);
+	case UNIPHIER_CLK_TYPE_GATE:
+		return uniphier_clk_register_gate(dev, regmap, data->name,
+						  &data->data.gate);
+	case UNIPHIER_CLK_TYPE_MUX:
+		return uniphier_clk_register_mux(dev, regmap, data->name,
+						 &data->data.mux);
+	default:
+		dev_err(dev, "unsupported clock type\n");
+		return ERR_PTR(-EINVAL);
+	}
+}
+
+static int uniphier_clk_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct clk_hw_onecell_data *hw_data;
+	const struct uniphier_clk_data *p, *data;
+	struct regmap *regmap;
+	struct device_node *parent;
+	int clk_num = 0;
+
+	data = of_device_get_match_data(dev);
+	if (WARN_ON(!data))
+		return -EINVAL;
+
+	parent = of_get_parent(dev->of_node); /* parent should be syscon node */
+	regmap = syscon_node_to_regmap(parent);
+	of_node_put(parent);
+	if (IS_ERR(regmap)) {
+		dev_err(dev, "failed to get regmap (error %ld)\n",
+			PTR_ERR(regmap));
+		return PTR_ERR(regmap);
+	}
+
+	for (p = data; p->name; p++)
+		clk_num = max(clk_num, p->idx + 1);
+
+	hw_data = devm_kzalloc(dev,
+			sizeof(*hw_data) + clk_num * sizeof(struct clk_hw *),
+			GFP_KERNEL);
+	if (!hw_data)
+		return -ENOMEM;
+
+	hw_data->num = clk_num;
+
+	/* avoid returning NULL for unused idx */
+	for (; clk_num >= 0; clk_num--)
+		hw_data->hws[clk_num] = ERR_PTR(-EINVAL);
+
+	for (p = data; p->name; p++) {
+		struct clk_hw *hw;
+
+		dev_dbg(dev, "register %s (index=%d)\n", p->name, p->idx);
+		hw = uniphier_clk_register(dev, regmap, p);
+		if (IS_ERR(hw)) {
+			dev_err(dev, "failed to register %s (error %ld)\n",
+				p->name, PTR_ERR(hw));
+			return PTR_ERR(hw);
+		}
+
+		if (p->idx >= 0)
+			hw_data->hws[p->idx] = hw;
+	}
+
+	return of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
+				      hw_data);
+}
+
+static int uniphier_clk_remove(struct platform_device *pdev)
+{
+	of_clk_del_provider(pdev->dev.of_node);
+
+	return 0;
+}
+
+static const struct of_device_id uniphier_clk_match[] = {
+	/* System clock */
+	{
+		.compatible = "socionext,uniphier-ld4-clock",
+		.data = uniphier_ld4_sys_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pro4-clock",
+		.data = uniphier_pro4_sys_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-sld8-clock",
+		.data = uniphier_sld8_sys_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pro5-clock",
+		.data = uniphier_pro5_sys_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pxs2-clock",
+		.data = uniphier_pxs2_sys_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld11-clock",
+		.data = uniphier_ld11_sys_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld20-clock",
+		.data = uniphier_ld20_sys_clk_data,
+	},
+	/* Media I/O clock */
+	{
+		.compatible = "socionext,uniphier-sld3-mio-clock",
+		.data = uniphier_sld3_mio_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld4-mio-clock",
+		.data = uniphier_sld3_mio_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pro4-mio-clock",
+		.data = uniphier_sld3_mio_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-sld8-mio-clock",
+		.data = uniphier_sld3_mio_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pro5-mio-clock",
+		.data = uniphier_pro5_mio_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pxs2-mio-clock",
+		.data = uniphier_pro5_mio_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld11-mio-clock",
+		.data = uniphier_sld3_mio_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld20-mio-clock",
+		.data = uniphier_pro5_mio_clk_data,
+	},
+	/* Peripheral clock */
+	{
+		.compatible = "socionext,uniphier-ld4-peri-clock",
+		.data = uniphier_ld4_peri_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pro4-peri-clock",
+		.data = uniphier_pro4_peri_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-sld8-peri-clock",
+		.data = uniphier_ld4_peri_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pro5-peri-clock",
+		.data = uniphier_pro4_peri_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-pxs2-peri-clock",
+		.data = uniphier_pro4_peri_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld11-peri-clock",
+		.data = uniphier_pro4_peri_clk_data,
+	},
+	{
+		.compatible = "socionext,uniphier-ld20-peri-clock",
+		.data = uniphier_pro4_peri_clk_data,
+	},
+	{ /* sentinel */ }
+};
+
+static struct platform_driver uniphier_clk_driver = {
+	.probe = uniphier_clk_probe,
+	.remove = uniphier_clk_remove,
+	.driver = {
+		.name = "uniphier-clk",
+		.of_match_table = uniphier_clk_match,
+	},
+};
+builtin_platform_driver(uniphier_clk_driver);
diff --git a/drivers/clk/uniphier/clk-uniphier-fixed-factor.c b/drivers/clk/uniphier/clk-uniphier-fixed-factor.c
new file mode 100644
index 0000000..da2d9f4
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier-fixed-factor.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+
+#include "clk-uniphier.h"
+
+struct clk_hw *uniphier_clk_register_fixed_factor(struct device *dev,
+						  const char *name,
+			const struct uniphier_clk_fixed_factor_data *data)
+{
+	struct clk_fixed_factor *fix;
+	struct clk_init_data init;
+	int ret;
+
+	fix = devm_kzalloc(dev, sizeof(*fix), GFP_KERNEL);
+	if (!fix)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &clk_fixed_factor_ops;
+	init.flags = data->parent_name ? CLK_SET_RATE_PARENT : 0;
+	init.parent_names = data->parent_name ? &data->parent_name : NULL;
+	init.num_parents = data->parent_name ? 1 : 0;
+
+	fix->mult = data->mult;
+	fix->div = data->div;
+	fix->hw.init = &init;
+
+	ret = devm_clk_hw_register(dev, &fix->hw);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return &fix->hw;
+}
diff --git a/drivers/clk/uniphier/clk-uniphier-fixed-rate.c b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c
new file mode 100644
index 0000000..0ad0d46
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier-fixed-rate.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+
+#include "clk-uniphier.h"
+
+struct clk_hw *uniphier_clk_register_fixed_rate(struct device *dev,
+						const char *name,
+				const struct uniphier_clk_fixed_rate_data *data)
+{
+	struct clk_fixed_rate *fixed;
+	struct clk_init_data init;
+	int ret;
+
+	/* allocate fixed-rate clock */
+	fixed = devm_kzalloc(dev, sizeof(*fixed), GFP_KERNEL);
+	if (!fixed)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &clk_fixed_rate_ops;
+	init.parent_names = NULL;
+	init.num_parents = 0;
+
+	fixed->fixed_rate = data->fixed_rate;
+	fixed->hw.init = &init;
+
+	ret = devm_clk_hw_register(dev, &fixed->hw);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return &fixed->hw;
+}
diff --git a/drivers/clk/uniphier/clk-uniphier-gate.c b/drivers/clk/uniphier/clk-uniphier-gate.c
new file mode 100644
index 0000000..49142d4
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier-gate.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+#include "clk-uniphier.h"
+
+struct uniphier_clk_gate {
+	struct clk_hw hw;
+	struct regmap *regmap;
+	unsigned int reg;
+	unsigned int bit;
+};
+
+#define to_uniphier_clk_gate(_hw) \
+				container_of(_hw, struct uniphier_clk_gate, hw)
+
+static int uniphier_clk_gate_endisable(struct clk_hw *hw, int enable)
+{
+	struct uniphier_clk_gate *gate = to_uniphier_clk_gate(hw);
+
+	return regmap_write_bits(gate->regmap, gate->reg, BIT(gate->bit),
+				 enable ? BIT(gate->bit) : 0);
+}
+
+static int uniphier_clk_gate_enable(struct clk_hw *hw)
+{
+	return uniphier_clk_gate_endisable(hw, 1);
+}
+
+static void uniphier_clk_gate_disable(struct clk_hw *hw)
+{
+	if (uniphier_clk_gate_endisable(hw, 0) < 0)
+		pr_warn("failed to disable clk\n");
+}
+
+static int uniphier_clk_gate_is_enabled(struct clk_hw *hw)
+{
+	struct uniphier_clk_gate *gate = to_uniphier_clk_gate(hw);
+	unsigned int val;
+
+	if (regmap_read(gate->regmap, gate->reg, &val) < 0)
+		pr_warn("is_enabled() may return wrong result\n");
+
+	return !!(val & BIT(gate->bit));
+}
+
+static const struct clk_ops uniphier_clk_gate_ops = {
+	.enable = uniphier_clk_gate_enable,
+	.disable = uniphier_clk_gate_disable,
+	.is_enabled = uniphier_clk_gate_is_enabled,
+};
+
+struct clk_hw *uniphier_clk_register_gate(struct device *dev,
+					  struct regmap *regmap,
+					  const char *name,
+				const struct uniphier_clk_gate_data *data)
+{
+	struct uniphier_clk_gate *gate;
+	struct clk_init_data init;
+	int ret;
+
+	gate = devm_kzalloc(dev, sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &uniphier_clk_gate_ops;
+	init.flags = data->parent_name ? CLK_SET_RATE_PARENT : 0;
+	init.parent_names = data->parent_name ? &data->parent_name : NULL;
+	init.num_parents = data->parent_name ? 1 : 0;
+
+	gate->regmap = regmap;
+	gate->reg = data->reg;
+	gate->bit = data->bit;
+	gate->hw.init = &init;
+
+	ret = devm_clk_hw_register(dev, &gate->hw);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return &gate->hw;
+}
diff --git a/drivers/clk/uniphier/clk-uniphier-mio.c b/drivers/clk/uniphier/clk-uniphier-mio.c
new file mode 100644
index 0000000..6aa7ec7
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier-mio.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "clk-uniphier.h"
+
+#define UNIPHIER_MIO_CLK_SD_FIXED					\
+	UNIPHIER_CLK_FACTOR("sd-44m", -1, "sd-133m", 1, 3),		\
+	UNIPHIER_CLK_FACTOR("sd-33m", -1, "sd-200m", 1, 6),		\
+	UNIPHIER_CLK_FACTOR("sd-50m", -1, "sd-200m", 1, 4),		\
+	UNIPHIER_CLK_FACTOR("sd-67m", -1, "sd-200m", 1, 3),		\
+	UNIPHIER_CLK_FACTOR("sd-100m", -1, "sd-200m", 1, 2),		\
+	UNIPHIER_CLK_FACTOR("sd-40m", -1, "sd-200m", 1, 5),		\
+	UNIPHIER_CLK_FACTOR("sd-25m", -1, "sd-200m", 1, 8),		\
+	UNIPHIER_CLK_FACTOR("sd-22m", -1, "sd-133m", 1, 6)
+
+#define UNIPHIER_MIO_CLK_SD(_idx, ch)					\
+	{								\
+		.name = "sd" #ch "-sel",				\
+		.type = UNIPHIER_CLK_TYPE_MUX,				\
+		.idx = -1,						\
+		.data.mux = {						\
+			.parent_names = {				\
+				"sd-44m",				\
+				"sd-33m",				\
+				"sd-50m",				\
+				"sd-67m",				\
+				"sd-100m",				\
+				"sd-40m",				\
+				"sd-25m",				\
+				"sd-22m",				\
+			},						\
+			.num_parents = 8,				\
+			.reg = 0x30 + 0x200 * (ch),			\
+			.masks = {					\
+				0x00031000,				\
+				0x00031000,				\
+				0x00031000,				\
+				0x00031000,				\
+				0x00001300,				\
+				0x00001300,				\
+				0x00001300,				\
+				0x00001300,				\
+			},						\
+			.vals = {					\
+				0x00000000,				\
+				0x00010000,				\
+				0x00020000,				\
+				0x00030000,				\
+				0x00001000,				\
+				0x00001100,				\
+				0x00001200,				\
+				0x00001300,				\
+			},						\
+		},							\
+	},								\
+	UNIPHIER_CLK_GATE("sd" #ch, (_idx), "sd" #ch "-sel", 0x20 + 0x200 * (ch), 8)
+
+#define UNIPHIER_MIO_CLK_USB2(idx, ch)					\
+	UNIPHIER_CLK_GATE("usb2" #ch, (idx), "usb2", 0x20 + 0x200 * (ch), 28)
+
+#define UNIPHIER_MIO_CLK_USB2_PHY(idx, ch)				\
+	UNIPHIER_CLK_GATE("usb2" #ch "-phy", (idx), "usb2", 0x20 + 0x200 * (ch), 29)
+
+#define UNIPHIER_MIO_CLK_DMAC(idx)					\
+	UNIPHIER_CLK_GATE("miodmac", (idx), "stdmac", 0x20, 25)
+
+const struct uniphier_clk_data uniphier_sld3_mio_clk_data[] = {
+	UNIPHIER_MIO_CLK_SD_FIXED,
+	UNIPHIER_MIO_CLK_SD(0, 0),
+	UNIPHIER_MIO_CLK_SD(1, 1),
+	UNIPHIER_MIO_CLK_SD(2, 2),
+	UNIPHIER_MIO_CLK_DMAC(7),
+	UNIPHIER_MIO_CLK_USB2(8, 0),
+	UNIPHIER_MIO_CLK_USB2(9, 1),
+	UNIPHIER_MIO_CLK_USB2(10, 2),
+	UNIPHIER_MIO_CLK_USB2(11, 3),
+	UNIPHIER_MIO_CLK_USB2_PHY(12, 0),
+	UNIPHIER_MIO_CLK_USB2_PHY(13, 1),
+	UNIPHIER_MIO_CLK_USB2_PHY(14, 2),
+	UNIPHIER_MIO_CLK_USB2_PHY(15, 3),
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_pro5_mio_clk_data[] = {
+	UNIPHIER_MIO_CLK_SD_FIXED,
+	UNIPHIER_MIO_CLK_SD(0, 0),
+	UNIPHIER_MIO_CLK_SD(1, 1),
+	{ /* sentinel */ }
+};
diff --git a/drivers/clk/uniphier/clk-uniphier-mux.c b/drivers/clk/uniphier/clk-uniphier-mux.c
new file mode 100644
index 0000000..15a2f2c
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier-mux.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/regmap.h>
+
+#include "clk-uniphier.h"
+
+struct uniphier_clk_mux {
+	struct clk_hw hw;
+	struct regmap *regmap;
+	unsigned int reg;
+	const unsigned int *masks;
+	const unsigned int *vals;
+};
+
+#define to_uniphier_clk_mux(_hw) container_of(_hw, struct uniphier_clk_mux, hw)
+
+static int uniphier_clk_mux_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct uniphier_clk_mux *mux = to_uniphier_clk_mux(hw);
+
+	return regmap_write_bits(mux->regmap, mux->reg, mux->masks[index],
+				 mux->vals[index]);
+}
+
+static u8 uniphier_clk_mux_get_parent(struct clk_hw *hw)
+{
+	struct uniphier_clk_mux *mux = to_uniphier_clk_mux(hw);
+	int num_parents = clk_hw_get_num_parents(hw);
+	int ret;
+	u32 val;
+	u8 i;
+
+	ret = regmap_read(mux->regmap, mux->reg, &val);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < num_parents; i++)
+		if ((mux->masks[i] & val) == mux->vals[i])
+			return i;
+
+	return -EINVAL;
+}
+
+static const struct clk_ops uniphier_clk_mux_ops = {
+	.determine_rate = __clk_mux_determine_rate,
+	.set_parent = uniphier_clk_mux_set_parent,
+	.get_parent = uniphier_clk_mux_get_parent,
+};
+
+struct clk_hw *uniphier_clk_register_mux(struct device *dev,
+					 struct regmap *regmap,
+					 const char *name,
+				const struct uniphier_clk_mux_data *data)
+{
+	struct uniphier_clk_mux *mux;
+	struct clk_init_data init;
+	int ret;
+
+	mux = devm_kzalloc(dev, sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &uniphier_clk_mux_ops;
+	init.flags = CLK_SET_RATE_PARENT;
+	init.parent_names = data->parent_names;
+	init.num_parents = data->num_parents,
+
+	mux->regmap = regmap;
+	mux->reg = data->reg;
+	mux->masks = data->masks;
+	mux->vals = data->vals;
+	mux->hw.init = &init;
+
+	ret = devm_clk_hw_register(dev, &mux->hw);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return &mux->hw;
+}
diff --git a/drivers/clk/uniphier/clk-uniphier-peri.c b/drivers/clk/uniphier/clk-uniphier-peri.c
new file mode 100644
index 0000000..521c80e
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier-peri.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "clk-uniphier.h"
+
+#define UNIPHIER_PERI_CLK_UART(idx, ch)					\
+	UNIPHIER_CLK_GATE("uart" #ch, (idx), "uart", 0x24, 19 + (ch))
+
+#define UNIPHIER_PERI_CLK_I2C_COMMON					\
+	UNIPHIER_CLK_GATE("i2c-common", -1, "i2c", 0x20, 1)
+
+#define UNIPHIER_PERI_CLK_I2C(idx, ch)					\
+	UNIPHIER_CLK_GATE("i2c" #ch, (idx), "i2c-common", 0x24, 5 + (ch))
+
+#define UNIPHIER_PERI_CLK_FI2C(idx, ch)					\
+	UNIPHIER_CLK_GATE("i2c" #ch, (idx), "i2c", 0x24, 24 + (ch))
+
+const struct uniphier_clk_data uniphier_ld4_peri_clk_data[] = {
+	UNIPHIER_PERI_CLK_UART(0, 0),
+	UNIPHIER_PERI_CLK_UART(1, 1),
+	UNIPHIER_PERI_CLK_UART(2, 2),
+	UNIPHIER_PERI_CLK_UART(3, 3),
+	UNIPHIER_PERI_CLK_I2C_COMMON,
+	UNIPHIER_PERI_CLK_I2C(4, 0),
+	UNIPHIER_PERI_CLK_I2C(5, 1),
+	UNIPHIER_PERI_CLK_I2C(6, 2),
+	UNIPHIER_PERI_CLK_I2C(7, 3),
+	UNIPHIER_PERI_CLK_I2C(8, 4),
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_pro4_peri_clk_data[] = {
+	UNIPHIER_PERI_CLK_UART(0, 0),
+	UNIPHIER_PERI_CLK_UART(1, 1),
+	UNIPHIER_PERI_CLK_UART(2, 2),
+	UNIPHIER_PERI_CLK_UART(3, 3),
+	UNIPHIER_PERI_CLK_FI2C(4, 0),
+	UNIPHIER_PERI_CLK_FI2C(5, 1),
+	UNIPHIER_PERI_CLK_FI2C(6, 2),
+	UNIPHIER_PERI_CLK_FI2C(7, 3),
+	UNIPHIER_PERI_CLK_FI2C(8, 4),
+	UNIPHIER_PERI_CLK_FI2C(9, 5),
+	UNIPHIER_PERI_CLK_FI2C(10, 6),
+	{ /* sentinel */ }
+};
diff --git a/drivers/clk/uniphier/clk-uniphier-sys.c b/drivers/clk/uniphier/clk-uniphier-sys.c
new file mode 100644
index 0000000..5d02999
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier-sys.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/stddef.h>
+
+#include "clk-uniphier.h"
+
+#define UNIPHIER_SLD3_SYS_CLK_SD					\
+	UNIPHIER_CLK_FACTOR("sd-200m", -1, "spll", 1, 8),		\
+	UNIPHIER_CLK_FACTOR("sd-133m", -1, "vpll27a", 1, 2)
+
+#define UNIPHIER_PRO5_SYS_CLK_SD					\
+	UNIPHIER_CLK_FACTOR("sd-200m", -1, "spll", 1, 12),		\
+	UNIPHIER_CLK_FACTOR("sd-133m", -1, "spll", 1, 18)
+
+#define UNIPHIER_LD20_SYS_CLK_SD					\
+	UNIPHIER_CLK_FACTOR("sd-200m", -1, "spll", 1, 10),		\
+	UNIPHIER_CLK_FACTOR("sd-133m", -1, "spll", 1, 15)
+
+#define UNIPHIER_SLD3_SYS_CLK_STDMAC(idx)				\
+	UNIPHIER_CLK_GATE("stdmac", (idx), NULL, 0x2104, 10)
+
+#define UNIPHIER_LD11_SYS_CLK_STDMAC(idx)				\
+	UNIPHIER_CLK_GATE("stdmac", (idx), NULL, 0x210c, 8)
+
+#define UNIPHIER_PRO4_SYS_CLK_GIO(idx)					\
+	UNIPHIER_CLK_GATE("gio", (idx), NULL, 0x2104, 6)
+
+#define UNIPHIER_PRO4_SYS_CLK_USB3(idx, ch)				\
+	UNIPHIER_CLK_GATE("usb3" #ch, (idx), NULL, 0x2104, 16 + (ch))
+
+const struct uniphier_clk_data uniphier_sld3_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 65, 1),		/* 1597.44 MHz */
+	UNIPHIER_CLK_FACTOR("upll", -1, "ref", 6000, 512),	/* 288 MHz */
+	UNIPHIER_CLK_FACTOR("a2pll", -1, "ref", 24, 1),		/* 589.824 MHz */
+	UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 5625, 512),	/* 270 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "a2pll", 1, 16),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 16),
+	UNIPHIER_SLD3_SYS_CLK_SD,
+	UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
+	UNIPHIER_SLD3_SYS_CLK_STDMAC(8),
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_ld4_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 65, 1),		/* 1597.44 MHz */
+	UNIPHIER_CLK_FACTOR("upll", -1, "ref", 6000, 512),	/* 288 MHz */
+	UNIPHIER_CLK_FACTOR("a2pll", -1, "ref", 24, 1),		/* 589.824 MHz */
+	UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 5625, 512),	/* 270 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "a2pll", 1, 16),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 16),
+	UNIPHIER_SLD3_SYS_CLK_SD,
+	UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
+	UNIPHIER_SLD3_SYS_CLK_STDMAC(8),		/* Ether, HSC, MIO */
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_pro4_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 64, 1),		/* 1600 MHz */
+	UNIPHIER_CLK_FACTOR("upll", -1, "ref", 288, 25),	/* 288 MHz */
+	UNIPHIER_CLK_FACTOR("a2pll", -1, "upll", 256, 125),	/* 589.824 MHz */
+	UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 270, 25),	/* 270 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "a2pll", 1, 8),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 32),
+	UNIPHIER_SLD3_SYS_CLK_SD,
+	UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
+	UNIPHIER_SLD3_SYS_CLK_STDMAC(8),		/* HSC, MIO, RLE */
+	UNIPHIER_PRO4_SYS_CLK_GIO(12),			/* Ether, SATA, USB3 */
+	UNIPHIER_PRO4_SYS_CLK_USB3(14, 0),
+	UNIPHIER_PRO4_SYS_CLK_USB3(15, 1),
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_sld8_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 64, 1),		/* 1600 MHz */
+	UNIPHIER_CLK_FACTOR("upll", -1, "ref", 288, 25),	/* 288 MHz */
+	UNIPHIER_CLK_FACTOR("vpll27a", -1, "ref", 270, 25),	/* 270 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 20),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 16),
+	UNIPHIER_SLD3_SYS_CLK_SD,
+	UNIPHIER_CLK_FACTOR("usb2", -1, "upll", 1, 12),
+	UNIPHIER_SLD3_SYS_CLK_STDMAC(8),		/* Ether, HSC, MIO */
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_pro5_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 120, 1),		/* 2400 MHz */
+	UNIPHIER_CLK_FACTOR("dapll1", -1, "ref", 128, 1),	/* 2560 MHz */
+	UNIPHIER_CLK_FACTOR("dapll2", -1, "ref", 144, 125),	/* 2949.12 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "dapll2", 1, 40),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 48),
+	UNIPHIER_PRO5_SYS_CLK_SD,
+	UNIPHIER_SLD3_SYS_CLK_STDMAC(8),			/* HSC */
+	UNIPHIER_PRO4_SYS_CLK_GIO(12),				/* PCIe, USB3 */
+	UNIPHIER_PRO4_SYS_CLK_USB3(14, 0),
+	UNIPHIER_PRO4_SYS_CLK_USB3(15, 1),
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_pxs2_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 96, 1),		/* 2400 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 27),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 48),
+	UNIPHIER_PRO5_SYS_CLK_SD,
+	UNIPHIER_SLD3_SYS_CLK_STDMAC(8),			/* HSC, RLE */
+	/* GIO is always clock-enabled: no function for 0x2104 bit6 */
+	UNIPHIER_PRO4_SYS_CLK_USB3(14, 0),
+	UNIPHIER_PRO4_SYS_CLK_USB3(15, 1),
+	/* The document mentions 0x2104 bit 18, but not functional */
+	UNIPHIER_CLK_GATE("usb30-phy", 16, NULL, 0x2104, 19),
+	UNIPHIER_CLK_GATE("usb31-phy", 20, NULL, 0x2104, 20),
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_ld11_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 80, 1),		/* 2000 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 34),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 40),
+	UNIPHIER_LD11_SYS_CLK_STDMAC(8),			/* HSC, MIO */
+	UNIPHIER_CLK_FACTOR("usb2", -1, "ref", 24, 25),
+	{ /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_ld20_sys_clk_data[] = {
+	UNIPHIER_CLK_FACTOR("spll", -1, "ref", 80, 1),		/* 2000 MHz */
+	UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 34),
+	UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 40),
+	UNIPHIER_LD20_SYS_CLK_SD,
+	UNIPHIER_LD11_SYS_CLK_STDMAC(8),			/* HSC */
+	/* GIO is always clock-enabled: no function for 0x210c bit5 */
+	/*
+	 * clock for USB Link is enabled by the logic "OR" of bit 14 and bit 15.
+	 * We do not use bit 15 here.
+	 */
+	UNIPHIER_CLK_GATE("usb30", 14, NULL, 0x210c, 14),
+	UNIPHIER_CLK_GATE("usb30-phy0", 16, NULL, 0x210c, 12),
+	UNIPHIER_CLK_GATE("usb30-phy1", 17, NULL, 0x210c, 13),
+	{ /* sentinel */ }
+};
diff --git a/drivers/clk/uniphier/clk-uniphier.h b/drivers/clk/uniphier/clk-uniphier.h
new file mode 100644
index 0000000..3ae1840
--- /dev/null
+++ b/drivers/clk/uniphier/clk-uniphier.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2016 Socionext Inc.
+ *   Author: Masahiro Yamada <yamada.masahiro@socionext.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __CLK_UNIPHIER_H__
+#define __CLK_UNIPHIER_H__
+
+struct clk_hw;
+struct device;
+struct regmap;
+
+#define UNIPHIER_CLK_MUX_MAX_PARENTS	8
+
+enum uniphier_clk_type {
+	UNIPHIER_CLK_TYPE_FIXED_FACTOR,
+	UNIPHIER_CLK_TYPE_FIXED_RATE,
+	UNIPHIER_CLK_TYPE_GATE,
+	UNIPHIER_CLK_TYPE_MUX,
+};
+
+struct uniphier_clk_fixed_factor_data {
+	const char *parent_name;
+	unsigned int mult;
+	unsigned int div;
+};
+
+struct uniphier_clk_fixed_rate_data {
+	unsigned long fixed_rate;
+};
+
+struct uniphier_clk_gate_data {
+	const char *parent_name;
+	unsigned int reg;
+	unsigned int bit;
+};
+
+struct uniphier_clk_mux_data {
+	const char *parent_names[UNIPHIER_CLK_MUX_MAX_PARENTS];
+	unsigned int num_parents;
+	unsigned int reg;
+	unsigned int masks[UNIPHIER_CLK_MUX_MAX_PARENTS];
+	unsigned int vals[UNIPHIER_CLK_MUX_MAX_PARENTS];
+};
+
+struct uniphier_clk_data {
+	const char *name;
+	enum uniphier_clk_type type;
+	int idx;
+	union {
+		struct uniphier_clk_fixed_factor_data factor;
+		struct uniphier_clk_fixed_rate_data rate;
+		struct uniphier_clk_gate_data gate;
+		struct uniphier_clk_mux_data mux;
+	} data;
+};
+
+#define UNIPHIER_CLK_FACTOR(_name, _idx, _parent, _mult, _div)	\
+	{							\
+		.name = (_name),				\
+		.type = UNIPHIER_CLK_TYPE_FIXED_FACTOR,		\
+		.idx = (_idx),					\
+		.data.factor = {				\
+			.parent_name = (_parent),		\
+			.mult = (_mult),			\
+			.div = (_div),				\
+		},						\
+	}
+
+
+#define UNIPHIER_CLK_GATE(_name, _idx, _parent, _reg, _bit)	\
+	{							\
+		.name = (_name),				\
+		.type = UNIPHIER_CLK_TYPE_GATE,			\
+		.idx = (_idx),					\
+		.data.gate = {					\
+			.parent_name = (_parent),		\
+			.reg = (_reg),				\
+			.bit = (_bit),				\
+		},						\
+	}
+
+
+struct clk_hw *uniphier_clk_register_fixed_factor(struct device *dev,
+						  const char *name,
+			const struct uniphier_clk_fixed_factor_data *data);
+struct clk_hw *uniphier_clk_register_fixed_rate(struct device *dev,
+						const char *name,
+			const struct uniphier_clk_fixed_rate_data *data);
+struct clk_hw *uniphier_clk_register_gate(struct device *dev,
+					  struct regmap *regmap,
+					  const char *name,
+				const struct uniphier_clk_gate_data *data);
+struct clk_hw *uniphier_clk_register_mux(struct device *dev,
+					 struct regmap *regmap,
+					 const char *name,
+				const struct uniphier_clk_mux_data *data);
+
+extern const struct uniphier_clk_data uniphier_sld3_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_ld4_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_pro4_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_sld8_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_pro5_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_pxs2_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_ld11_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_ld20_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_sld3_mio_clk_data[];
+extern const struct uniphier_clk_data uniphier_pro5_mio_clk_data[];
+extern const struct uniphier_clk_data uniphier_ld4_peri_clk_data[];
+extern const struct uniphier_clk_data uniphier_pro4_peri_clk_data[];
+
+#endif /* __CLK_UNIPHIER_H__ */
diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c
index 5e9b652..4faa944 100644
--- a/drivers/clk/versatile/clk-icst.c
+++ b/drivers/clk/versatile/clk-icst.c
@@ -27,6 +27,26 @@
 /* Magic unlocking token used on all Versatile boards */
 #define VERSATILE_LOCK_VAL	0xA05F
 
+#define VERSATILE_AUX_OSC_BITS 0x7FFFF
+#define INTEGRATOR_AP_CM_BITS 0xFF
+#define INTEGRATOR_AP_SYS_BITS 0xFF
+#define INTEGRATOR_CP_CM_CORE_BITS 0x7FF
+#define INTEGRATOR_CP_CM_MEM_BITS 0x7FF000
+
+#define INTEGRATOR_AP_PCI_25_33_MHZ BIT(8)
+
+/**
+ * enum icst_control_type - the type of ICST control register
+ */
+enum icst_control_type {
+	ICST_VERSATILE, /* The standard type, all control bits available */
+	ICST_INTEGRATOR_AP_CM, /* Only 8 bits of VDW available */
+	ICST_INTEGRATOR_AP_SYS, /* Only 8 bits of VDW available */
+	ICST_INTEGRATOR_AP_PCI, /* Odd bit pattern storage */
+	ICST_INTEGRATOR_CP_CM_CORE, /* Only 8 bits of VDW and 3 bits of OD */
+	ICST_INTEGRATOR_CP_CM_MEM, /* Only 8 bits of VDW and 3 bits of OD */
+};
+
 /**
  * struct clk_icst - ICST VCO clock wrapper
  * @hw: corresponding clock hardware entry
@@ -34,6 +54,7 @@
  * @lockreg: VCO lock register address
  * @params: parameters for this ICST instance
  * @rate: current rate
+ * @ctype: the type of control register for the ICST
  */
 struct clk_icst {
 	struct clk_hw hw;
@@ -42,6 +63,7 @@
 	u32 lockreg_off;
 	struct icst_params *params;
 	unsigned long rate;
+	enum icst_control_type ctype;
 };
 
 #define to_icst(_hw) container_of(_hw, struct clk_icst, hw)
@@ -59,6 +81,76 @@
 	ret = regmap_read(icst->map, icst->vcoreg_off, &val);
 	if (ret)
 		return ret;
+
+	/*
+	 * The Integrator/AP core clock can only access the low eight
+	 * bits of the v PLL divider. Bit 8 is tied low and always zero,
+	 * r is hardwired to 22 and output divider s is hardwired to 1
+	 * (divide by 2) according to the document
+	 * "Integrator CM926EJ-S, CM946E-S, CM966E-S, CM1026EJ-S and
+	 * CM1136JF-S User Guide" ARM DUI 0138E, page 3-13 thru 3-14.
+	 */
+	if (icst->ctype == ICST_INTEGRATOR_AP_CM) {
+		vco->v = val & INTEGRATOR_AP_CM_BITS;
+		vco->r = 22;
+		vco->s = 1;
+		return 0;
+	}
+
+	/*
+	 * The Integrator/AP system clock on the base board can only
+	 * access the low eight bits of the v PLL divider. Bit 8 is tied low
+	 * and always zero, r is hardwired to 46, and the output divider is
+	 * hardwired to 3 (divide by 4) according to the document
+	 * "Integrator AP ASIC Development Motherboard" ARM DUI 0098B,
+	 * page 3-16.
+	 */
+	if (icst->ctype == ICST_INTEGRATOR_AP_SYS) {
+		vco->v = val & INTEGRATOR_AP_SYS_BITS;
+		vco->r = 46;
+		vco->s = 3;
+		return 0;
+	}
+
+	/*
+	 * The Integrator/AP PCI clock is using an odd pattern to create
+	 * the child clock, basically a single bit called DIVX/Y is used
+	 * to select between two different hardwired values: setting the
+	 * bit to 0 yields v = 17, r = 22 and OD = 1, whereas setting the
+	 * bit to 1 yields v = 14, r = 14 and OD = 1 giving the frequencies
+	 * 33 or 25 MHz respectively.
+	 */
+	if (icst->ctype == ICST_INTEGRATOR_AP_PCI) {
+		bool divxy = !!(val & INTEGRATOR_AP_PCI_25_33_MHZ);
+
+		vco->v = divxy ? 17 : 14;
+		vco->r = divxy ? 22 : 14;
+		vco->s = 1;
+		return 0;
+	}
+
+	/*
+	 * The Integrator/CP core clock can access the low eight bits
+	 * of the v PLL divider. Bit 8 is tied low and always zero,
+	 * r is hardwired to 22 and the output divider s is accessible
+	 * in bits 8 thru 10 according to the document
+	 * "Integrator/CM940T, CM920T, CM740T, and CM720T User Guide"
+	 * ARM DUI 0157A, page 3-20 thru 3-23 and 4-10.
+	 */
+	if (icst->ctype == ICST_INTEGRATOR_CP_CM_CORE) {
+		vco->v = val & 0xFF;
+		vco->r = 22;
+		vco->s = (val >> 8) & 7;
+		return 0;
+	}
+
+	if (icst->ctype == ICST_INTEGRATOR_CP_CM_MEM) {
+		vco->v = (val >> 12) & 0xFF;
+		vco->r = 22;
+		vco->s = (val >> 20) & 7;
+		return 0;
+	}
+
 	vco->v = val & 0x1ff;
 	vco->r = (val >> 9) & 0x7f;
 	vco->s = (val >> 16) & 03;
@@ -72,22 +164,62 @@
  */
 static int vco_set(struct clk_icst *icst, struct icst_vco vco)
 {
+	u32 mask;
 	u32 val;
 	int ret;
 
-	ret = regmap_read(icst->map, icst->vcoreg_off, &val);
-	if (ret)
-		return ret;
+	/* Mask the bits used by the VCO */
+	switch (icst->ctype) {
+	case ICST_INTEGRATOR_AP_CM:
+		mask = INTEGRATOR_AP_CM_BITS;
+		val = vco.v & 0xFF;
+		if (vco.v & 0x100)
+			pr_err("ICST error: tried to set bit 8 of VDW\n");
+		if (vco.s != 1)
+			pr_err("ICST error: tried to use VOD != 1\n");
+		if (vco.r != 22)
+			pr_err("ICST error: tried to use RDW != 22\n");
+		break;
+	case ICST_INTEGRATOR_AP_SYS:
+		mask = INTEGRATOR_AP_SYS_BITS;
+		val = vco.v & 0xFF;
+		if (vco.v & 0x100)
+			pr_err("ICST error: tried to set bit 8 of VDW\n");
+		if (vco.s != 3)
+			pr_err("ICST error: tried to use VOD != 1\n");
+		if (vco.r != 46)
+			pr_err("ICST error: tried to use RDW != 22\n");
+		break;
+	case ICST_INTEGRATOR_CP_CM_CORE:
+		mask = INTEGRATOR_CP_CM_CORE_BITS; /* Uses 12 bits */
+		val = (vco.v & 0xFF) | vco.s << 8;
+		if (vco.v & 0x100)
+			pr_err("ICST error: tried to set bit 8 of VDW\n");
+		if (vco.r != 22)
+			pr_err("ICST error: tried to use RDW != 22\n");
+		break;
+	case ICST_INTEGRATOR_CP_CM_MEM:
+		mask = INTEGRATOR_CP_CM_MEM_BITS; /* Uses 12 bits */
+		val = ((vco.v & 0xFF) << 12) | (vco.s << 20);
+		if (vco.v & 0x100)
+			pr_err("ICST error: tried to set bit 8 of VDW\n");
+		if (vco.r != 22)
+			pr_err("ICST error: tried to use RDW != 22\n");
+		break;
+	default:
+		/* Regular auxilary oscillator */
+		mask = VERSATILE_AUX_OSC_BITS;
+		val = vco.v | (vco.r << 9) | (vco.s << 16);
+		break;
+	}
 
-	/* Mask the 18 bits used by the VCO */
-	val &= ~0x7ffff;
-	val |= vco.v | (vco.r << 9) | (vco.s << 16);
+	pr_debug("ICST: new val = 0x%08x\n", val);
 
 	/* This magic unlocks the VCO so it can be controlled */
 	ret = regmap_write(icst->map, icst->lockreg_off, VERSATILE_LOCK_VAL);
 	if (ret)
 		return ret;
-	ret = regmap_write(icst->map, icst->vcoreg_off, val);
+	ret = regmap_update_bits(icst->map, icst->vcoreg_off, mask, val);
 	if (ret)
 		return ret;
 	/* This locks the VCO again */
@@ -121,6 +253,46 @@
 	struct clk_icst *icst = to_icst(hw);
 	struct icst_vco vco;
 
+	if (icst->ctype == ICST_INTEGRATOR_AP_CM ||
+	    icst->ctype == ICST_INTEGRATOR_CP_CM_CORE) {
+		if (rate <= 12000000)
+			return 12000000;
+		if (rate >= 160000000)
+			return 160000000;
+		/* Slam to closest megahertz */
+		return DIV_ROUND_CLOSEST(rate, 1000000) * 1000000;
+	}
+
+	if (icst->ctype == ICST_INTEGRATOR_CP_CM_MEM) {
+		if (rate <= 6000000)
+			return 6000000;
+		if (rate >= 66000000)
+			return 66000000;
+		/* Slam to closest 0.5 megahertz */
+		return DIV_ROUND_CLOSEST(rate, 500000) * 500000;
+	}
+
+	if (icst->ctype == ICST_INTEGRATOR_AP_SYS) {
+		/* Divides between 3 and 50 MHz in steps of 0.25 MHz */
+		if (rate <= 3000000)
+			return 3000000;
+		if (rate >= 50000000)
+			return 5000000;
+		/* Slam to closest 0.25 MHz */
+		return DIV_ROUND_CLOSEST(rate, 250000) * 250000;
+	}
+
+	if (icst->ctype == ICST_INTEGRATOR_AP_PCI) {
+		/*
+		 * If we're below or less than halfway from 25 to 33 MHz
+		 * select 25 MHz
+		 */
+		if (rate <= 25000000 || rate < 29000000)
+			return 25000000;
+		/* Else just return the default frequency */
+		return 33000000;
+	}
+
 	vco = icst_hz_to_vco(icst->params, rate);
 	return icst_hz(icst->params, vco);
 }
@@ -131,6 +303,36 @@
 	struct clk_icst *icst = to_icst(hw);
 	struct icst_vco vco;
 
+	if (icst->ctype == ICST_INTEGRATOR_AP_PCI) {
+		/* This clock is especially primitive */
+		unsigned int val;
+		int ret;
+
+		if (rate == 25000000) {
+			val = 0;
+		} else if (rate == 33000000) {
+			val = INTEGRATOR_AP_PCI_25_33_MHZ;
+		} else {
+			pr_err("ICST: cannot set PCI frequency %lu\n",
+			       rate);
+			return -EINVAL;
+		}
+		ret = regmap_write(icst->map, icst->lockreg_off,
+				   VERSATILE_LOCK_VAL);
+		if (ret)
+			return ret;
+		ret = regmap_update_bits(icst->map, icst->vcoreg_off,
+					 INTEGRATOR_AP_PCI_25_33_MHZ,
+					 val);
+		if (ret)
+			return ret;
+		/* This locks the VCO again */
+		ret = regmap_write(icst->map, icst->lockreg_off, 0);
+		if (ret)
+			return ret;
+		return 0;
+	}
+
 	if (parent_rate)
 		icst->params->ref = parent_rate;
 	vco = icst_hz_to_vco(icst->params, rate);
@@ -148,7 +350,8 @@
 				  const struct clk_icst_desc *desc,
 				  const char *name,
 				  const char *parent_name,
-				  struct regmap *map)
+				  struct regmap *map,
+				  enum icst_control_type ctype)
 {
 	struct clk *clk;
 	struct clk_icst *icst;
@@ -178,6 +381,7 @@
 	icst->params = pclone;
 	icst->vcoreg_off = desc->vco_offset;
 	icst->lockreg_off = desc->lock_offset;
+	icst->ctype = ctype;
 
 	clk = clk_register(dev, &icst->hw);
 	if (IS_ERR(clk)) {
@@ -206,7 +410,8 @@
 		pr_err("could not initialize ICST regmap\n");
 		return ERR_CAST(map);
 	}
-	return icst_clk_setup(dev, desc, name, parent_name, map);
+	return icst_clk_setup(dev, desc, name, parent_name, map,
+			      ICST_VERSATILE);
 }
 EXPORT_SYMBOL_GPL(icst_clk_register);
 
@@ -239,6 +444,56 @@
 	.idx2s		= icst307_idx2s,
 };
 
+/**
+ * The core modules on the Integrator/AP and Integrator/CP have
+ * especially crippled ICST525 control.
+ */
+static const struct icst_params icst525_apcp_cm_params = {
+	.vco_max	= ICST525_VCO_MAX_5V,
+	.vco_min	= ICST525_VCO_MIN,
+	/* Minimum 12 MHz, VDW = 4 */
+	.vd_min		= 12,
+	/*
+	 * Maximum 160 MHz, VDW = 152 for all core modules, but
+	 * CM926EJ-S, CM1026EJ-S and CM1136JF-S can actually
+	 * go to 200 MHz (max VDW = 192).
+	 */
+	.vd_max		= 192,
+	/* r is hardcoded to 22 and this is the actual divisor, +2 */
+	.rd_min		= 24,
+	.rd_max		= 24,
+	.s2div		= icst525_s2div,
+	.idx2s		= icst525_idx2s,
+};
+
+static const struct icst_params icst525_ap_sys_params = {
+	.vco_max	= ICST525_VCO_MAX_5V,
+	.vco_min	= ICST525_VCO_MIN,
+	/* Minimum 3 MHz, VDW = 4 */
+	.vd_min		= 3,
+	/* Maximum 50 MHz, VDW = 192 */
+	.vd_max		= 50,
+	/* r is hardcoded to 46 and this is the actual divisor, +2 */
+	.rd_min		= 48,
+	.rd_max		= 48,
+	.s2div		= icst525_s2div,
+	.idx2s		= icst525_idx2s,
+};
+
+static const struct icst_params icst525_ap_pci_params = {
+	.vco_max	= ICST525_VCO_MAX_5V,
+	.vco_min	= ICST525_VCO_MIN,
+	/* Minimum 25 MHz */
+	.vd_min		= 25,
+	/* Maximum 33 MHz */
+	.vd_max		= 33,
+	/* r is hardcoded to 14 or 22 and this is the actual divisors +2 */
+	.rd_min		= 16,
+	.rd_max		= 24,
+	.s2div		= icst525_s2div,
+	.idx2s		= icst525_idx2s,
+};
+
 static void __init of_syscon_icst_setup(struct device_node *np)
 {
 	struct device_node *parent;
@@ -247,6 +502,7 @@
 	const char *name = np->name;
 	const char *parent_name;
 	struct clk *regclk;
+	enum icst_control_type ctype;
 
 	/* We do not release this reference, we are using it perpetually */
 	parent = of_get_parent(np);
@@ -269,11 +525,28 @@
 		return;
 	}
 
-	if (of_device_is_compatible(np, "arm,syscon-icst525"))
+	if (of_device_is_compatible(np, "arm,syscon-icst525")) {
 		icst_desc.params = &icst525_params;
-	else if (of_device_is_compatible(np, "arm,syscon-icst307"))
+		ctype = ICST_VERSATILE;
+	} else if (of_device_is_compatible(np, "arm,syscon-icst307")) {
 		icst_desc.params = &icst307_params;
-	else {
+		ctype = ICST_VERSATILE;
+	} else if (of_device_is_compatible(np, "arm,syscon-icst525-integratorap-cm")) {
+		icst_desc.params = &icst525_apcp_cm_params;
+		ctype = ICST_INTEGRATOR_AP_CM;
+	} else if (of_device_is_compatible(np, "arm,syscon-icst525-integratorap-sys")) {
+		icst_desc.params = &icst525_ap_sys_params;
+		ctype = ICST_INTEGRATOR_AP_SYS;
+	} else if (of_device_is_compatible(np, "arm,syscon-icst525-integratorap-pci")) {
+		icst_desc.params = &icst525_ap_pci_params;
+		ctype = ICST_INTEGRATOR_AP_PCI;
+	} else if (of_device_is_compatible(np, "arm,syscon-icst525-integratorcp-cm-core")) {
+		icst_desc.params = &icst525_apcp_cm_params;
+		ctype = ICST_INTEGRATOR_CP_CM_CORE;
+	} else if (of_device_is_compatible(np, "arm,syscon-icst525-integratorcp-cm-mem")) {
+		icst_desc.params = &icst525_apcp_cm_params;
+		ctype = ICST_INTEGRATOR_CP_CM_MEM;
+	} else {
 		pr_err("unknown ICST clock %s\n", name);
 		return;
 	}
@@ -281,7 +554,7 @@
 	/* Parent clock name is not the same as node parent */
 	parent_name = of_clk_get_parent_name(np, 0);
 
-	regclk = icst_clk_setup(NULL, &icst_desc, name, parent_name, map);
+	regclk = icst_clk_setup(NULL, &icst_desc, name, parent_name, map, ctype);
 	if (IS_ERR(regclk)) {
 		pr_err("error setting up syscon ICST clock %s\n", name);
 		return;
@@ -294,5 +567,14 @@
 	       "arm,syscon-icst525", of_syscon_icst_setup);
 CLK_OF_DECLARE(arm_syscon_icst307_clk,
 	       "arm,syscon-icst307", of_syscon_icst_setup);
-
+CLK_OF_DECLARE(arm_syscon_integratorap_cm_clk,
+	       "arm,syscon-icst525-integratorap-cm", of_syscon_icst_setup);
+CLK_OF_DECLARE(arm_syscon_integratorap_sys_clk,
+	       "arm,syscon-icst525-integratorap-sys", of_syscon_icst_setup);
+CLK_OF_DECLARE(arm_syscon_integratorap_pci_clk,
+	       "arm,syscon-icst525-integratorap-pci", of_syscon_icst_setup);
+CLK_OF_DECLARE(arm_syscon_integratorcp_cm_core_clk,
+	       "arm,syscon-icst525-integratorcp-cm-core", of_syscon_icst_setup);
+CLK_OF_DECLARE(arm_syscon_integratorcp_cm_mem_clk,
+	       "arm,syscon-icst525-integratorcp-cm-mem", of_syscon_icst_setup);
 #endif
diff --git a/drivers/clk/zte/Makefile b/drivers/clk/zte/Makefile
index 74005aa..83374bf 100644
--- a/drivers/clk/zte/Makefile
+++ b/drivers/clk/zte/Makefile
@@ -1,2 +1,3 @@
 obj-y := clk.o
 obj-$(CONFIG_SOC_ZX296702) += clk-zx296702.o
+obj-$(CONFIG_ARCH_ZX) += clk-zx296718.o
diff --git a/drivers/clk/zte/clk-zx296718.c b/drivers/clk/zte/clk-zx296718.c
new file mode 100644
index 0000000..707d629
--- /dev/null
+++ b/drivers/clk/zte/clk-zx296718.c
@@ -0,0 +1,924 @@
+/*
+ * Copyright (C) 2015 - 2016 ZTE Corporation.
+ * Copyright (C) 2016 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk-provider.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+
+#include <dt-bindings/clock/zx296718-clock.h>
+#include "clk.h"
+
+/* TOP CRM */
+#define TOP_CLK_MUX0	0x04
+#define TOP_CLK_MUX1	0x08
+#define TOP_CLK_MUX2	0x0c
+#define TOP_CLK_MUX3	0x10
+#define TOP_CLK_MUX4	0x14
+#define TOP_CLK_MUX5	0x18
+#define TOP_CLK_MUX6	0x1c
+#define TOP_CLK_MUX7	0x20
+#define TOP_CLK_MUX9	0x28
+
+
+#define TOP_CLK_GATE0	0x34
+#define TOP_CLK_GATE1	0x38
+#define TOP_CLK_GATE2	0x3c
+#define TOP_CLK_GATE3	0x40
+#define TOP_CLK_GATE4	0x44
+#define TOP_CLK_GATE5	0x48
+#define TOP_CLK_GATE6	0x4c
+
+#define TOP_CLK_DIV0	0x58
+
+#define PLL_CPU_REG	0x80
+#define PLL_VGA_REG	0xb0
+#define PLL_DDR_REG	0xa0
+
+/* LSP0 CRM */
+#define LSP0_TIMER3_CLK	0x4
+#define LSP0_TIMER4_CLK	0x8
+#define LSP0_TIMER5_CLK	0xc
+#define LSP0_UART3_CLK	0x10
+#define LSP0_UART1_CLK	0x14
+#define LSP0_UART2_CLK	0x18
+#define LSP0_SPIFC0_CLK	0x1c
+#define LSP0_I2C4_CLK	0x20
+#define LSP0_I2C5_CLK	0x24
+#define LSP0_SSP0_CLK	0x28
+#define LSP0_SSP1_CLK	0x2c
+#define LSP0_USIM0_CLK	0x30
+#define LSP0_GPIO_CLK	0x34
+#define LSP0_I2C3_CLK	0x38
+
+/* LSP1 CRM */
+#define LSP1_UART4_CLK	0x08
+#define LSP1_UART5_CLK	0x0c
+#define LSP1_PWM_CLK	0x10
+#define LSP1_I2C2_CLK	0x14
+#define LSP1_SSP2_CLK	0x1c
+#define LSP1_SSP3_CLK	0x20
+#define LSP1_SSP4_CLK	0x24
+#define LSP1_USIM1_CLK	0x28
+
+/* audio lsp */
+#define AUDIO_I2S0_DIV_CFG1	0x10
+#define AUDIO_I2S0_DIV_CFG2	0x14
+#define AUDIO_I2S0_CLK		0x18
+#define AUDIO_I2S1_DIV_CFG1	0x20
+#define AUDIO_I2S1_DIV_CFG2	0x24
+#define AUDIO_I2S1_CLK		0x28
+#define AUDIO_I2S2_DIV_CFG1	0x30
+#define AUDIO_I2S2_DIV_CFG2	0x34
+#define AUDIO_I2S2_CLK		0x38
+#define AUDIO_I2S3_DIV_CFG1	0x40
+#define AUDIO_I2S3_DIV_CFG2	0x44
+#define AUDIO_I2S3_CLK		0x48
+#define AUDIO_I2C0_CLK		0x50
+#define AUDIO_SPDIF0_DIV_CFG1	0x60
+#define AUDIO_SPDIF0_DIV_CFG2	0x64
+#define AUDIO_SPDIF0_CLK	0x68
+#define AUDIO_SPDIF1_DIV_CFG1	0x70
+#define AUDIO_SPDIF1_DIV_CFG2	0x74
+#define AUDIO_SPDIF1_CLK	0x78
+#define AUDIO_TIMER_CLK		0x80
+#define AUDIO_TDM_CLK		0x90
+#define AUDIO_TS_CLK		0xa0
+
+static DEFINE_SPINLOCK(clk_lock);
+
+static struct zx_pll_config pll_cpu_table[] = {
+	PLL_RATE(1312000000, 0x00103621, 0x04aaaaaa),
+	PLL_RATE(1407000000, 0x00103a21, 0x04aaaaaa),
+	PLL_RATE(1503000000, 0x00103e21, 0x04aaaaaa),
+	PLL_RATE(1600000000, 0x00104221, 0x04aaaaaa),
+};
+
+PNAME(osc) = {
+	"osc24m",
+	"osc32k",
+};
+
+PNAME(dbg_wclk_p) = {
+	"clk334m",
+	"clk466m",
+	"clk396m",
+	"clk250m",
+};
+
+PNAME(a72_coreclk_p) = {
+	"osc24m",
+	"pll_mm0_1188m",
+	"pll_mm1_1296m",
+	"clk1000m",
+	"clk648m",
+	"clk1600m",
+	"pll_audio_1800m",
+	"pll_vga_1800m",
+};
+
+PNAME(cpu_periclk_p) = {
+	"osc24m",
+	"clk500m",
+	"clk594m",
+	"clk466m",
+	"clk294m",
+	"clk334m",
+	"clk250m",
+	"clk125m",
+};
+
+PNAME(a53_coreclk_p) = {
+	"osc24m",
+	"clk1000m",
+	"pll_mm0_1188m",
+	"clk648m",
+	"clk500m",
+	"clk800m",
+	"clk1600m",
+	"pll_audio_1800m",
+};
+
+PNAME(sec_wclk_p) = {
+	"osc24m",
+	"clk396m",
+	"clk334m",
+	"clk297m",
+	"clk250m",
+	"clk198m",
+	"clk148m5",
+	"clk99m",
+};
+
+PNAME(sd_nand_wclk_p) = {
+	"osc24m",
+	"clk49m5",
+	"clk99m",
+	"clk198m",
+	"clk167m",
+	"clk148m5",
+	"clk125m",
+	"clk216m",
+};
+
+PNAME(emmc_wclk_p) = {
+	"osc24m",
+	"clk198m",
+	"clk99m",
+	"clk396m",
+	"clk334m",
+	"clk297m",
+	"clk250m",
+	"clk148m5",
+};
+
+PNAME(clk32_p) = {
+	"osc32k",
+	"clk32k768",
+};
+
+PNAME(usb_ref24m_p) = {
+	"osc32k",
+	"clk32k768",
+};
+
+PNAME(sys_noc_alck_p) = {
+	"osc24m",
+	"clk250m",
+	"clk198m",
+	"clk148m5",
+	"clk108m",
+	"clk54m",
+	"clk216m",
+	"clk240m",
+};
+
+PNAME(vde_aclk_p) = {
+	"clk334m",
+	"clk594m",
+	"clk500m",
+	"clk432m",
+	"clk480m",
+	"clk297m",
+	"clk_vga",  /*600MHz*/
+	"clk294m",
+};
+
+PNAME(vce_aclk_p) = {
+	"clk334m",
+	"clk594m",
+	"clk500m",
+	"clk432m",
+	"clk396m",
+	"clk297m",
+	"clk_vga",  /*600MHz*/
+	"clk294m",
+};
+
+PNAME(hde_aclk_p) = {
+	"clk334m",
+	"clk594m",
+	"clk500m",
+	"clk432m",
+	"clk396m",
+	"clk297m",
+	"clk_vga",  /*600MHz*/
+	"clk294m",
+};
+
+PNAME(gpu_aclk_p) = {
+	"clk334m",
+	"clk648m",
+	"clk594m",
+	"clk500m",
+	"clk396m",
+	"clk297m",
+	"clk_vga",  /*600MHz*/
+	"clk294m",
+};
+
+PNAME(sappu_aclk_p) = {
+	"clk396m",
+	"clk500m",
+	"clk250m",
+	"clk148m5",
+};
+
+PNAME(sappu_wclk_p) = {
+	"clk198m",
+	"clk396m",
+	"clk334m",
+	"clk297m",
+	"clk250m",
+	"clk148m5",
+	"clk125m",
+	"clk99m",
+};
+
+PNAME(vou_aclk_p) = {
+	"clk334m",
+	"clk594m",
+	"clk500m",
+	"clk432m",
+	"clk396m",
+	"clk297m",
+	"clk_vga",  /*600MHz*/
+	"clk294m",
+};
+
+PNAME(vou_main_wclk_p) = {
+	"clk108m",
+	"clk594m",
+	"clk297m",
+	"clk148m5",
+	"clk74m25",
+	"clk54m",
+	"clk27m",
+	"clk_vga",
+};
+
+PNAME(vou_aux_wclk_p) = {
+	"clk108m",
+	"clk148m5",
+	"clk74m25",
+	"clk54m",
+	"clk27m",
+	"clk_vga",
+	"clk54m_mm0",
+	"clk"
+};
+
+PNAME(vou_ppu_wclk_p) = {
+	"clk334m",
+	"clk432m",
+	"clk396m",
+	"clk297m",
+	"clk250m",
+	"clk125m",
+	"clk198m",
+	"clk99m",
+};
+
+PNAME(vga_i2c_wclk_p) = {
+	"osc24m",
+	"clk99m",
+};
+
+PNAME(viu_m0_aclk_p) = {
+	"clk334m",
+	"clk432m",
+	"clk396m",
+	"clk297m",
+	"clk250m",
+	"clk125m",
+	"clk198m",
+	"osc24m",
+};
+
+PNAME(viu_m1_aclk_p) = {
+	"clk198m",
+	"clk250m",
+	"clk297m",
+	"clk125m",
+	"clk396m",
+	"clk334m",
+	"clk148m5",
+	"osc24m",
+};
+
+PNAME(viu_clk_p) = {
+	"clk198m",
+	"clk334m",
+	"clk297m",
+	"clk250m",
+	"clk396m",
+	"clk125m",
+	"clk99m",
+	"clk148m5",
+};
+
+PNAME(viu_jpeg_clk_p) = {
+	"clk334m",
+	"clk480m",
+	"clk432m",
+	"clk396m",
+	"clk297m",
+	"clk250m",
+	"clk125m",
+	"clk198m",
+};
+
+PNAME(ts_sys_clk_p) = {
+	"clk192m",
+	"clk167m",
+	"clk125m",
+	"clk99m",
+};
+
+PNAME(wdt_ares_p) = {
+	"osc24m",
+	"clk32k"
+};
+
+static struct clk_zx_pll zx296718_pll_clk[] = {
+	ZX296718_PLL("pll_cpu",	"osc24m",	PLL_CPU_REG,	pll_cpu_table),
+};
+
+static struct zx_clk_fixed_factor top_ffactor_clk[] = {
+	FFACTOR(0, "clk4m",		"osc24m", 1, 6,  0),
+	FFACTOR(0, "clk2m",		"osc24m", 1, 12, 0),
+	/* pll cpu */
+	FFACTOR(0, "clk1600m",		"pll_cpu", 1, 1, CLK_SET_RATE_PARENT),
+	FFACTOR(0, "clk800m",		"pll_cpu", 1, 2, CLK_SET_RATE_PARENT),
+	/* pll mac */
+	FFACTOR(0, "clk25m",		"pll_mac", 1, 40, 0),
+	FFACTOR(0, "clk125m",		"pll_mac", 1, 8, 0),
+	FFACTOR(0, "clk250m",		"pll_mac", 1, 4, 0),
+	FFACTOR(0, "clk50m",		"pll_mac", 1, 20, 0),
+	FFACTOR(0, "clk500m",		"pll_mac", 1, 2, 0),
+	FFACTOR(0, "clk1000m",		"pll_mac", 1, 1, 0),
+	FFACTOR(0, "clk334m",		"pll_mac", 1, 3, 0),
+	FFACTOR(0, "clk167m",		"pll_mac", 1, 6, 0),
+	/* pll mm */
+	FFACTOR(0, "clk54m_mm0",	"pll_mm0", 1, 22, 0),
+	FFACTOR(0, "clk74m25",		"pll_mm0", 1, 16, 0),
+	FFACTOR(0, "clk148m5",		"pll_mm0", 1, 8, 0),
+	FFACTOR(0, "clk297m",		"pll_mm0", 1, 4, 0),
+	FFACTOR(0, "clk594m",		"pll_mm0", 1, 2, 0),
+	FFACTOR(0, "pll_mm0_1188m",	"pll_mm0", 1, 1, 0),
+	FFACTOR(0, "clk396m",		"pll_mm0", 1, 3, 0),
+	FFACTOR(0, "clk198m",		"pll_mm0", 1, 6, 0),
+	FFACTOR(0, "clk99m",		"pll_mm0", 1, 12, 0),
+	FFACTOR(0, "clk49m5",		"pll_mm0", 1, 24, 0),
+	/* pll mm */
+	FFACTOR(0, "clk324m",		"pll_mm1", 1, 4, 0),
+	FFACTOR(0, "clk648m",		"pll_mm1", 1, 2, 0),
+	FFACTOR(0, "pll_mm1_1296m",	"pll_mm1", 1, 1, 0),
+	FFACTOR(0, "clk216m",		"pll_mm1", 1, 6, 0),
+	FFACTOR(0, "clk432m",		"pll_mm1", 1, 3, 0),
+	FFACTOR(0, "clk108m",		"pll_mm1", 1, 12, 0),
+	FFACTOR(0, "clk72m",		"pll_mm1", 1, 18, 0),
+	FFACTOR(0, "clk27m",		"pll_mm1", 1, 48, 0),
+	FFACTOR(0, "clk54m",		"pll_mm1", 1, 24, 0),
+	/* vga */
+	FFACTOR(0, "pll_vga_1800m",	"pll_vga", 1, 1, 0),
+	FFACTOR(0, "clk_vga",		"pll_vga", 1, 2, 0),
+	/* pll ddr */
+	FFACTOR(0, "clk466m",		"pll_ddr", 1, 2, 0),
+
+	/* pll audio */
+	FFACTOR(0, "pll_audio_1800m",	"pll_audio", 1, 1, 0),
+	FFACTOR(0, "clk32k768",		"pll_audio", 1, 27000, 0),
+	FFACTOR(0, "clk16m384",		"pll_audio", 1, 54, 0),
+	FFACTOR(0, "clk294m",		"pll_audio", 1, 3, 0),
+
+	/* pll hsic*/
+	FFACTOR(0, "clk240m",		"pll_hsic", 1, 4, 0),
+	FFACTOR(0, "clk480m",		"pll_hsic", 1, 2, 0),
+	FFACTOR(0, "clk192m",		"pll_hsic", 1, 5, 0),
+	FFACTOR(0, "clk_pll_24m",	"pll_hsic", 1, 40, 0),
+	FFACTOR(0, "emmc_mux_div2",	"emmc_mux", 1, 2, CLK_SET_RATE_PARENT),
+};
+
+static struct clk_div_table noc_div_table[] = {
+	{ .val = 1, .div = 2, },
+	{ .val = 3, .div = 4, },
+};
+static struct zx_clk_div top_div_clk[] = {
+	DIV_T(0, "sys_noc_hclk", "sys_noc_aclk", TOP_CLK_DIV0, 0, 2, 0, noc_div_table),
+	DIV_T(0, "sys_noc_pclk", "sys_noc_aclk", TOP_CLK_DIV0, 4, 2, 0, noc_div_table),
+};
+
+static struct zx_clk_mux top_mux_clk[] = {
+	MUX(0, "dbg_mux",	 dbg_wclk_p,	  TOP_CLK_MUX0, 12, 2),
+	MUX(0, "a72_mux",	 a72_coreclk_p,	  TOP_CLK_MUX0, 8, 3),
+	MUX(0, "cpu_peri_mux",	 cpu_periclk_p,	  TOP_CLK_MUX0, 4, 3),
+	MUX_F(0, "a53_mux",	 a53_coreclk_p,	  TOP_CLK_MUX0, 0, 3, CLK_SET_RATE_PARENT, 0),
+	MUX(0, "sys_noc_aclk",	 sys_noc_alck_p,  TOP_CLK_MUX1, 0, 3),
+	MUX(0, "sec_mux",	 sec_wclk_p,	  TOP_CLK_MUX2, 16, 3),
+	MUX(0, "sd1_mux",	 sd_nand_wclk_p,  TOP_CLK_MUX2, 12, 3),
+	MUX(0, "sd0_mux",	 sd_nand_wclk_p,  TOP_CLK_MUX2, 8, 3),
+	MUX(0, "emmc_mux",	 emmc_wclk_p,	  TOP_CLK_MUX2, 4, 3),
+	MUX(0, "nand_mux",	 sd_nand_wclk_p,  TOP_CLK_MUX2, 0, 3),
+	MUX(0, "usb_ref24m_mux", usb_ref24m_p,	  TOP_CLK_MUX9, 16, 1),
+	MUX(0, "clk32k",	 clk32_p,	  TOP_CLK_MUX9, 12, 1),
+	MUX_F(0, "wdt_mux",	 wdt_ares_p,	  TOP_CLK_MUX9, 8, 1, CLK_SET_RATE_PARENT, 0),
+	MUX(0, "timer_mux",	 osc,		  TOP_CLK_MUX9, 4, 1),
+	MUX(0, "vde_mux",	 vde_aclk_p,	  TOP_CLK_MUX4,  0, 3),
+	MUX(0, "vce_mux",	 vce_aclk_p,	  TOP_CLK_MUX4,  4, 3),
+	MUX(0, "hde_mux",	 hde_aclk_p,	  TOP_CLK_MUX4,  8, 3),
+	MUX(0, "gpu_mux",	 gpu_aclk_p,	  TOP_CLK_MUX5,  0, 3),
+	MUX(0, "sappu_a_mux",	 sappu_aclk_p,	  TOP_CLK_MUX5,  4, 2),
+	MUX(0, "sappu_w_mux",	 sappu_wclk_p,	  TOP_CLK_MUX5,  8, 3),
+	MUX(0, "vou_a_mux",	 vou_aclk_p,	  TOP_CLK_MUX7,  0, 3),
+	MUX(0, "vou_main_w_mux", vou_main_wclk_p, TOP_CLK_MUX7,  4, 3),
+	MUX(0, "vou_aux_w_mux",	 vou_aux_wclk_p,  TOP_CLK_MUX7,  8, 3),
+	MUX(0, "vou_ppu_w_mux",	 vou_ppu_wclk_p,  TOP_CLK_MUX7, 12, 3),
+	MUX(0, "vga_i2c_mux",	 vga_i2c_wclk_p,  TOP_CLK_MUX7, 16, 1),
+	MUX(0, "viu_m0_a_mux",	 viu_m0_aclk_p,	  TOP_CLK_MUX6,  0, 3),
+	MUX(0, "viu_m1_a_mux",	 viu_m1_aclk_p,	  TOP_CLK_MUX6,  4, 3),
+	MUX(0, "viu_w_mux",	 viu_clk_p,	  TOP_CLK_MUX6,  8, 3),
+	MUX(0, "viu_jpeg_w_mux", viu_jpeg_clk_p,  TOP_CLK_MUX6, 12, 3),
+	MUX(0, "ts_sys_mux",	 ts_sys_clk_p,    TOP_CLK_MUX6, 16, 2),
+};
+
+static struct zx_clk_gate top_gate_clk[] = {
+	GATE(CPU_DBG_GATE,    "dbg_wclk",        "dbg_mux",        TOP_CLK_GATE0, 4, CLK_SET_RATE_PARENT, 0),
+	GATE(A72_GATE,        "a72_coreclk",     "a72_mux",        TOP_CLK_GATE0, 3, CLK_SET_RATE_PARENT, 0),
+	GATE(CPU_PERI_GATE,   "cpu_peri",        "cpu_peri_mux",   TOP_CLK_GATE0, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(A53_GATE,        "a53_coreclk",     "a53_mux",        TOP_CLK_GATE0, 0, CLK_SET_RATE_PARENT, 0),
+	GATE(SD1_WCLK,        "sd1_wclk",        "sd1_mux",        TOP_CLK_GATE1, 13, CLK_SET_RATE_PARENT, 0),
+	GATE(SD0_WCLK,        "sd0_wclk",        "sd0_mux",        TOP_CLK_GATE1, 9, CLK_SET_RATE_PARENT, 0),
+	GATE(EMMC_WCLK,       "emmc_wclk",       "emmc_mux_div2",  TOP_CLK_GATE0, 5, CLK_SET_RATE_PARENT, 0),
+	GATE(EMMC_NAND_AXI,   "emmc_nand_aclk",  "sys_noc_aclk",   TOP_CLK_GATE1, 4, CLK_SET_RATE_PARENT, 0),
+	GATE(NAND_WCLK,       "nand_wclk",       "nand_mux",       TOP_CLK_GATE0, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(EMMC_NAND_AHB,   "emmc_nand_hclk",  "sys_noc_hclk",   TOP_CLK_GATE1, 0, CLK_SET_RATE_PARENT, 0),
+	GATE(0,               "lsp1_pclk",       "sys_noc_pclk",   TOP_CLK_GATE2, 31, 0,                  0),
+	GATE(LSP1_148M5,      "lsp1_148m5",      "clk148m5",       TOP_CLK_GATE2, 30, 0,                  0),
+	GATE(LSP1_99M,        "lsp1_99m",        "clk99m",         TOP_CLK_GATE2, 29, 0,                  0),
+	GATE(LSP1_24M,        "lsp1_24m",        "osc24m",         TOP_CLK_GATE2, 28, 0,                  0),
+	GATE(LSP0_74M25,      "lsp0_74m25",      "clk74m25",       TOP_CLK_GATE2, 25, 0,                  0),
+	GATE(0,               "lsp0_pclk",       "sys_noc_pclk",   TOP_CLK_GATE2, 24, 0,                  0),
+	GATE(LSP0_32K,        "lsp0_32k",        "osc32k",         TOP_CLK_GATE2, 23, 0,                  0),
+	GATE(LSP0_148M5,      "lsp0_148m5",      "clk148m5",       TOP_CLK_GATE2, 22, 0,                  0),
+	GATE(LSP0_99M,        "lsp0_99m",        "clk99m",         TOP_CLK_GATE2, 21, 0,                  0),
+	GATE(LSP0_24M,        "lsp0_24m",        "osc24m",         TOP_CLK_GATE2, 20, 0,                  0),
+	GATE(AUDIO_99M,       "audio_99m",       "clk99m",         TOP_CLK_GATE5, 27, 0,                  0),
+	GATE(AUDIO_24M,       "audio_24m",       "osc24m",         TOP_CLK_GATE5, 28, 0,                  0),
+	GATE(AUDIO_16M384,    "audio_16m384",    "clk16m384",      TOP_CLK_GATE5, 29, 0,                  0),
+	GATE(AUDIO_32K,       "audio_32k",       "clk32k",         TOP_CLK_GATE5, 30, 0,                  0),
+	GATE(WDT_WCLK,        "wdt_wclk",        "wdt_mux",        TOP_CLK_GATE6, 9, CLK_SET_RATE_PARENT, 0),
+	GATE(TIMER_WCLK,      "timer_wclk",      "timer_mux",      TOP_CLK_GATE6, 5, CLK_SET_RATE_PARENT, 0),
+	GATE(VDE_ACLK,        "vde_aclk",        "vde_mux",        TOP_CLK_GATE3, 0,  CLK_SET_RATE_PARENT, 0),
+	GATE(VCE_ACLK,        "vce_aclk",        "vce_mux",        TOP_CLK_GATE3, 4,  CLK_SET_RATE_PARENT, 0),
+	GATE(HDE_ACLK,        "hde_aclk",        "hde_mux",        TOP_CLK_GATE3, 8,  CLK_SET_RATE_PARENT, 0),
+	GATE(GPU_ACLK,        "gpu_aclk",        "gpu_mux",        TOP_CLK_GATE3, 16, CLK_SET_RATE_PARENT, 0),
+	GATE(SAPPU_ACLK,      "sappu_aclk",      "sappu_a_mux",    TOP_CLK_GATE3, 20, CLK_SET_RATE_PARENT, 0),
+	GATE(SAPPU_WCLK,      "sappu_wclk",      "sappu_w_mux",    TOP_CLK_GATE3, 22, CLK_SET_RATE_PARENT, 0),
+	GATE(VOU_ACLK,        "vou_aclk",        "vou_a_mux",      TOP_CLK_GATE4, 16, CLK_SET_RATE_PARENT, 0),
+	GATE(VOU_MAIN_WCLK,   "vou_main_wclk",   "vou_main_w_mux", TOP_CLK_GATE4, 18, CLK_SET_RATE_PARENT, 0),
+	GATE(VOU_AUX_WCLK,    "vou_aux_wclk",    "vou_aux_w_mux",  TOP_CLK_GATE4, 19, CLK_SET_RATE_PARENT, 0),
+	GATE(VOU_PPU_WCLK,    "vou_ppu_wclk",    "vou_ppu_w_mux",  TOP_CLK_GATE4, 20, CLK_SET_RATE_PARENT, 0),
+	GATE(MIPI_CFG_CLK,    "mipi_cfg_clk",    "osc24m",         TOP_CLK_GATE4, 21, 0,                   0),
+	GATE(VGA_I2C_WCLK,    "vga_i2c_wclk",    "vga_i2c_mux",    TOP_CLK_GATE4, 23, CLK_SET_RATE_PARENT, 0),
+	GATE(MIPI_REF_CLK,    "mipi_ref_clk",    "clk27m",         TOP_CLK_GATE4, 24, 0,                   0),
+	GATE(HDMI_OSC_CEC,    "hdmi_osc_cec",    "clk2m",          TOP_CLK_GATE4, 22, 0,                   0),
+	GATE(HDMI_OSC_CLK,    "hdmi_osc_clk",    "clk240m",        TOP_CLK_GATE4, 25, 0,                   0),
+	GATE(HDMI_XCLK,       "hdmi_xclk",       "osc24m",         TOP_CLK_GATE4, 26, 0,                   0),
+	GATE(VIU_M0_ACLK,     "viu_m0_aclk",     "viu_m0_a_mux",   TOP_CLK_GATE4, 0,  CLK_SET_RATE_PARENT, 0),
+	GATE(VIU_M1_ACLK,     "viu_m1_aclk",     "viu_m1_a_mux",   TOP_CLK_GATE4, 1,  CLK_SET_RATE_PARENT, 0),
+	GATE(VIU_WCLK,        "viu_wclk",        "viu_w_mux",      TOP_CLK_GATE4, 2,  CLK_SET_RATE_PARENT, 0),
+	GATE(VIU_JPEG_WCLK,   "viu_jpeg_wclk",   "viu_jpeg_w_mux", TOP_CLK_GATE4, 3,  CLK_SET_RATE_PARENT, 0),
+	GATE(VIU_CFG_CLK,     "viu_cfg_clk",     "osc24m",         TOP_CLK_GATE4, 6,  0,                   0),
+	GATE(TS_SYS_WCLK,     "ts_sys_wclk",     "ts_sys_mux",     TOP_CLK_GATE5, 2,  CLK_SET_RATE_PARENT, 0),
+	GATE(TS_SYS_108M,     "ts_sys_108m",     "clk108m",        TOP_CLK_GATE5, 3,  0,                   0),
+	GATE(USB20_HCLK,      "usb20_hclk",      "sys_noc_hclk",   TOP_CLK_GATE2, 12, 0,                   0),
+	GATE(USB20_PHY_CLK,   "usb20_phy_clk",   "usb_ref24m_mux", TOP_CLK_GATE2, 13, 0,                   0),
+	GATE(USB21_HCLK,      "usb21_hclk",      "sys_noc_hclk",   TOP_CLK_GATE2, 14, 0,                   0),
+	GATE(USB21_PHY_CLK,   "usb21_phy_clk",   "usb_ref24m_mux", TOP_CLK_GATE2, 15, 0,                   0),
+	GATE(GMAC_RMIICLK,    "gmac_rmii_clk",   "clk50m",         TOP_CLK_GATE2, 3, 0,                    0),
+	GATE(GMAC_PCLK,       "gmac_pclk",       "clk198m",        TOP_CLK_GATE2, 1, 0,                    0),
+	GATE(GMAC_ACLK,       "gmac_aclk",       "clk49m5",        TOP_CLK_GATE2, 0, 0,                    0),
+	GATE(GMAC_RFCLK,      "gmac_refclk",     "clk25m",         TOP_CLK_GATE2, 4, 0,                    0),
+	GATE(SD1_AHB,         "sd1_hclk",        "sys_noc_hclk",   TOP_CLK_GATE1, 12,  0,                  0),
+	GATE(SD0_AHB,         "sd0_hclk",        "sys_noc_hclk",   TOP_CLK_GATE1, 8,  0,                   0),
+	GATE(TEMPSENSOR_GATE, "tempsensor_gate", "clk4m",          TOP_CLK_GATE5, 31,  0,                  0),
+};
+
+static struct clk_hw_onecell_data top_hw_onecell_data = {
+	.num = TOP_NR_CLKS,
+	.hws = {
+		[TOP_NR_CLKS - 1] = NULL,
+	},
+};
+
+static int __init top_clocks_init(struct device_node *np)
+{
+	void __iomem *reg_base;
+	int i, ret;
+
+	reg_base = of_iomap(np, 0);
+	if (!reg_base) {
+		pr_err("%s: Unable to map clk base\n", __func__);
+		return -ENXIO;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(zx296718_pll_clk); i++) {
+		zx296718_pll_clk[i].reg_base += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &zx296718_pll_clk[i].hw);
+		if (ret) {
+			pr_warn("top clk %s init error!\n",
+				zx296718_pll_clk[i].hw.init->name);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(top_ffactor_clk); i++) {
+		if (top_ffactor_clk[i].id)
+			top_hw_onecell_data.hws[top_ffactor_clk[i].id] =
+					&top_ffactor_clk[i].factor.hw;
+
+		ret = clk_hw_register(NULL, &top_ffactor_clk[i].factor.hw);
+		if (ret) {
+			pr_warn("top clk %s init error!\n",
+				top_ffactor_clk[i].factor.hw.init->name);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(top_mux_clk); i++) {
+		if (top_mux_clk[i].id)
+			top_hw_onecell_data.hws[top_mux_clk[i].id] =
+					&top_mux_clk[i].mux.hw;
+
+		top_mux_clk[i].mux.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &top_mux_clk[i].mux.hw);
+		if (ret) {
+			pr_warn("top clk %s init error!\n",
+				top_mux_clk[i].mux.hw.init->name);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(top_gate_clk); i++) {
+		if (top_gate_clk[i].id)
+			top_hw_onecell_data.hws[top_gate_clk[i].id] =
+					&top_gate_clk[i].gate.hw;
+
+		top_gate_clk[i].gate.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &top_gate_clk[i].gate.hw);
+		if (ret) {
+			pr_warn("top clk %s init error!\n",
+				top_gate_clk[i].gate.hw.init->name);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(top_div_clk); i++) {
+		if (top_div_clk[i].id)
+			top_hw_onecell_data.hws[top_div_clk[i].id] =
+					&top_div_clk[i].div.hw;
+
+		top_div_clk[i].div.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &top_div_clk[i].div.hw);
+		if (ret) {
+			pr_warn("top clk %s init error!\n",
+				top_div_clk[i].div.hw.init->name);
+		}
+	}
+
+	if (of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &top_hw_onecell_data))
+		panic("could not register clk provider\n");
+	pr_info("top clk init over, nr:%d\n", TOP_NR_CLKS);
+
+	return 0;
+}
+
+static struct clk_div_table common_even_div_table[] = {
+	{ .val = 0, .div = 1, },
+	{ .val = 1, .div = 2, },
+	{ .val = 3, .div = 4, },
+	{ .val = 5, .div = 6, },
+	{ .val = 7, .div = 8, },
+	{ .val = 9, .div = 10, },
+	{ .val = 11, .div = 12, },
+	{ .val = 13, .div = 14, },
+	{ .val = 15, .div = 16, },
+};
+
+static struct clk_div_table common_div_table[] = {
+	{ .val = 0, .div = 1, },
+	{ .val = 1, .div = 2, },
+	{ .val = 2, .div = 3, },
+	{ .val = 3, .div = 4, },
+	{ .val = 4, .div = 5, },
+	{ .val = 5, .div = 6, },
+	{ .val = 6, .div = 7, },
+	{ .val = 7, .div = 8, },
+	{ .val = 8, .div = 9, },
+	{ .val = 9, .div = 10, },
+	{ .val = 10, .div = 11, },
+	{ .val = 11, .div = 12, },
+	{ .val = 12, .div = 13, },
+	{ .val = 13, .div = 14, },
+	{ .val = 14, .div = 15, },
+	{ .val = 15, .div = 16, },
+};
+
+PNAME(lsp0_wclk_common_p) = {
+	"lsp0_24m",
+	"lsp0_99m",
+};
+
+PNAME(lsp0_wclk_timer3_p) = {
+	"timer3_div",
+	"lsp0_32k"
+};
+
+PNAME(lsp0_wclk_timer4_p) = {
+	"timer4_div",
+	"lsp0_32k"
+};
+
+PNAME(lsp0_wclk_timer5_p) = {
+	"timer5_div",
+	"lsp0_32k"
+};
+
+PNAME(lsp0_wclk_spifc0_p) = {
+	"lsp0_148m5",
+	"lsp0_24m",
+	"lsp0_99m",
+	"lsp0_74m25"
+};
+
+PNAME(lsp0_wclk_ssp_p) = {
+	"lsp0_148m5",
+	"lsp0_99m",
+	"lsp0_24m",
+};
+
+static struct zx_clk_mux lsp0_mux_clk[] = {
+	MUX(0, "timer3_wclk_mux", lsp0_wclk_timer3_p, LSP0_TIMER3_CLK, 4, 1),
+	MUX(0, "timer4_wclk_mux", lsp0_wclk_timer4_p, LSP0_TIMER4_CLK, 4, 1),
+	MUX(0, "timer5_wclk_mux", lsp0_wclk_timer5_p, LSP0_TIMER5_CLK, 4, 1),
+	MUX(0, "uart3_wclk_mux",  lsp0_wclk_common_p, LSP0_UART3_CLK,  4, 1),
+	MUX(0, "uart1_wclk_mux",  lsp0_wclk_common_p, LSP0_UART1_CLK,  4, 1),
+	MUX(0, "uart2_wclk_mux",  lsp0_wclk_common_p, LSP0_UART2_CLK,  4, 1),
+	MUX(0, "spifc0_wclk_mux", lsp0_wclk_spifc0_p, LSP0_SPIFC0_CLK, 4, 2),
+	MUX(0, "i2c4_wclk_mux",   lsp0_wclk_common_p, LSP0_I2C4_CLK,   4, 1),
+	MUX(0, "i2c5_wclk_mux",   lsp0_wclk_common_p, LSP0_I2C5_CLK,   4, 1),
+	MUX(0, "ssp0_wclk_mux",   lsp0_wclk_ssp_p,    LSP0_SSP0_CLK,   4, 1),
+	MUX(0, "ssp1_wclk_mux",   lsp0_wclk_ssp_p,    LSP0_SSP1_CLK,   4, 1),
+	MUX(0, "i2c3_wclk_mux",   lsp0_wclk_common_p, LSP0_I2C3_CLK,   4, 1),
+};
+
+static struct zx_clk_gate lsp0_gate_clk[] = {
+	GATE(LSP0_TIMER3_WCLK, "timer3_wclk", "timer3_wclk_mux", LSP0_TIMER3_CLK, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_TIMER4_WCLK, "timer4_wclk", "timer4_wclk_mux", LSP0_TIMER4_CLK, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_TIMER5_WCLK, "timer5_wclk", "timer5_wclk_mux", LSP0_TIMER5_CLK, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_UART3_WCLK,  "uart3_wclk",  "uart3_wclk_mux",  LSP0_UART3_CLK,  1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_UART1_WCLK,  "uart1_wclk",  "uart1_wclk_mux",  LSP0_UART1_CLK,  1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_UART2_WCLK,  "uart2_wclk",  "uart2_wclk_mux",  LSP0_UART2_CLK,  1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_SPIFC0_WCLK, "spifc0_wclk", "spifc0_wclk_mux", LSP0_SPIFC0_CLK, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_I2C4_WCLK,   "i2c4_wclk",   "i2c4_wclk_mux",   LSP0_I2C4_CLK,   1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_I2C5_WCLK,   "i2c5_wclk",   "i2c5_wclk_mux",   LSP0_I2C5_CLK,   1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_SSP0_WCLK,   "ssp0_wclk",   "ssp0_div",        LSP0_SSP0_CLK,   1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_SSP1_WCLK,   "ssp1_wclk",   "ssp1_div",        LSP0_SSP1_CLK,   1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP0_I2C3_WCLK,   "i2c3_wclk",   "i2c3_wclk_mux",   LSP0_I2C3_CLK,   1, CLK_SET_RATE_PARENT, 0),
+};
+
+static struct zx_clk_div lsp0_div_clk[] = {
+	DIV_T(0, "timer3_div", "lsp0_24m", LSP0_TIMER3_CLK,  12, 4, 0, common_even_div_table),
+	DIV_T(0, "timer4_div", "lsp0_24m", LSP0_TIMER4_CLK,  12, 4, 0, common_even_div_table),
+	DIV_T(0, "timer5_div", "lsp0_24m", LSP0_TIMER5_CLK,  12, 4, 0, common_even_div_table),
+	DIV_T(0, "ssp0_div", "ssp0_wclk_mux", LSP0_SSP0_CLK, 12, 4, 0, common_even_div_table),
+	DIV_T(0, "ssp1_div", "ssp1_wclk_mux", LSP0_SSP1_CLK, 12, 4, 0, common_even_div_table),
+};
+
+static struct clk_hw_onecell_data lsp0_hw_onecell_data = {
+	.num = LSP0_NR_CLKS,
+	.hws = {
+		[LSP0_NR_CLKS - 1] = NULL,
+	},
+};
+
+static int __init lsp0_clocks_init(struct device_node *np)
+{
+	void __iomem *reg_base;
+	int i, ret;
+
+	reg_base = of_iomap(np, 0);
+	if (!reg_base) {
+		pr_err("%s: Unable to map clk base\n", __func__);
+		return -ENXIO;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(lsp0_mux_clk); i++) {
+		if (lsp0_mux_clk[i].id)
+			lsp0_hw_onecell_data.hws[lsp0_mux_clk[i].id] =
+					&lsp0_mux_clk[i].mux.hw;
+
+		lsp0_mux_clk[i].mux.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &lsp0_mux_clk[i].mux.hw);
+		if (ret) {
+			pr_warn("lsp0 clk %s init error!\n",
+				lsp0_mux_clk[i].mux.hw.init->name);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(lsp0_gate_clk); i++) {
+		if (lsp0_gate_clk[i].id)
+			lsp0_hw_onecell_data.hws[lsp0_gate_clk[i].id] =
+					&lsp0_gate_clk[i].gate.hw;
+
+		lsp0_gate_clk[i].gate.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &lsp0_gate_clk[i].gate.hw);
+		if (ret) {
+			pr_warn("lsp0 clk %s init error!\n",
+				lsp0_gate_clk[i].gate.hw.init->name);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(lsp0_div_clk); i++) {
+		if (lsp0_div_clk[i].id)
+			lsp0_hw_onecell_data.hws[lsp0_div_clk[i].id] =
+					&lsp0_div_clk[i].div.hw;
+
+		lsp0_div_clk[i].div.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &lsp0_div_clk[i].div.hw);
+		if (ret) {
+			pr_warn("lsp0 clk %s init error!\n",
+				lsp0_div_clk[i].div.hw.init->name);
+		}
+	}
+
+	if (of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &lsp0_hw_onecell_data))
+		panic("could not register clk provider\n");
+	pr_info("lsp0-clk init over:%d\n", LSP0_NR_CLKS);
+
+	return 0;
+}
+
+PNAME(lsp1_wclk_common_p) = {
+	"lsp1_24m",
+	"lsp1_99m",
+};
+
+PNAME(lsp1_wclk_ssp_p) = {
+	"lsp1_148m5",
+	"lsp1_99m",
+	"lsp1_24m",
+};
+
+static struct zx_clk_mux lsp1_mux_clk[] = {
+	MUX(0, "uart4_wclk_mux", lsp1_wclk_common_p, LSP1_UART4_CLK, 4, 1),
+	MUX(0, "uart5_wclk_mux", lsp1_wclk_common_p, LSP1_UART5_CLK, 4, 1),
+	MUX(0, "pwm_wclk_mux",   lsp1_wclk_common_p, LSP1_PWM_CLK,   4, 1),
+	MUX(0, "i2c2_wclk_mux",  lsp1_wclk_common_p, LSP1_I2C2_CLK,  4, 1),
+	MUX(0, "ssp2_wclk_mux",  lsp1_wclk_ssp_p,    LSP1_SSP2_CLK,  4, 2),
+	MUX(0, "ssp3_wclk_mux",  lsp1_wclk_ssp_p,    LSP1_SSP3_CLK,  4, 2),
+	MUX(0, "ssp4_wclk_mux",  lsp1_wclk_ssp_p,    LSP1_SSP4_CLK,  4, 2),
+	MUX(0, "usim1_wclk_mux", lsp1_wclk_common_p, LSP1_USIM1_CLK, 4, 1),
+};
+
+static struct zx_clk_div lsp1_div_clk[] = {
+	DIV_T(0, "pwm_div",  "pwm_wclk_mux",  LSP1_PWM_CLK,  12, 4, CLK_SET_RATE_PARENT, common_div_table),
+	DIV_T(0, "ssp2_div", "ssp2_wclk_mux", LSP1_SSP2_CLK, 12, 4, CLK_SET_RATE_PARENT, common_even_div_table),
+	DIV_T(0, "ssp3_div", "ssp3_wclk_mux", LSP1_SSP3_CLK, 12, 4, CLK_SET_RATE_PARENT, common_even_div_table),
+	DIV_T(0, "ssp4_div", "ssp4_wclk_mux", LSP1_SSP4_CLK, 12, 4, CLK_SET_RATE_PARENT, common_even_div_table),
+};
+
+static struct zx_clk_gate lsp1_gate_clk[] = {
+	GATE(LSP1_UART4_WCLK, "lsp1_uart4_wclk", "uart4_wclk_mux", LSP1_UART4_CLK, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP1_UART5_WCLK, "lsp1_uart5_wclk", "uart5_wclk_mux", LSP1_UART5_CLK, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP1_PWM_WCLK,   "lsp1_pwm_wclk",   "pwm_div",        LSP1_PWM_CLK,   1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP1_PWM_PCLK,   "lsp1_pwm_pclk",   "lsp1_pclk",      LSP1_PWM_CLK,   0, 0,		   0),
+	GATE(LSP1_I2C2_WCLK,  "lsp1_i2c2_wclk",  "i2c2_wclk_mux",  LSP1_I2C2_CLK,  1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP1_SSP2_WCLK,  "lsp1_ssp2_wclk",  "ssp2_div",       LSP1_SSP2_CLK,  1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP1_SSP3_WCLK,  "lsp1_ssp3_wclk",  "ssp3_div",       LSP1_SSP3_CLK,  1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP1_SSP4_WCLK,  "lsp1_ssp4_wclk",  "ssp4_div",       LSP1_SSP4_CLK,  1, CLK_SET_RATE_PARENT, 0),
+	GATE(LSP1_USIM1_WCLK, "lsp1_usim1_wclk", "usim1_wclk_mux", LSP1_USIM1_CLK, 1, CLK_SET_RATE_PARENT, 0),
+};
+
+static struct clk_hw_onecell_data lsp1_hw_onecell_data = {
+	.num = LSP1_NR_CLKS,
+	.hws = {
+		[LSP1_NR_CLKS - 1] = NULL,
+	},
+};
+
+static int __init lsp1_clocks_init(struct device_node *np)
+{
+	void __iomem *reg_base;
+	int i, ret;
+
+	reg_base = of_iomap(np, 0);
+	if (!reg_base) {
+		pr_err("%s: Unable to map clk base\n", __func__);
+		return -ENXIO;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(lsp1_mux_clk); i++) {
+		if (lsp1_mux_clk[i].id)
+			lsp1_hw_onecell_data.hws[lsp1_mux_clk[i].id] =
+					&lsp0_mux_clk[i].mux.hw;
+
+		lsp1_mux_clk[i].mux.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &lsp1_mux_clk[i].mux.hw);
+		if (ret) {
+			pr_warn("lsp1 clk %s init error!\n",
+				lsp1_mux_clk[i].mux.hw.init->name);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(lsp1_gate_clk); i++) {
+		if (lsp1_gate_clk[i].id)
+			lsp1_hw_onecell_data.hws[lsp1_gate_clk[i].id] =
+					&lsp1_gate_clk[i].gate.hw;
+
+		lsp1_gate_clk[i].gate.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &lsp1_gate_clk[i].gate.hw);
+		if (ret) {
+			pr_warn("lsp1 clk %s init error!\n",
+				lsp1_gate_clk[i].gate.hw.init->name);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(lsp1_div_clk); i++) {
+		if (lsp1_div_clk[i].id)
+			lsp1_hw_onecell_data.hws[lsp1_div_clk[i].id] =
+					&lsp1_div_clk[i].div.hw;
+
+		lsp1_div_clk[i].div.reg += (uintptr_t)reg_base;
+		ret = clk_hw_register(NULL, &lsp1_div_clk[i].div.hw);
+		if (ret) {
+			pr_warn("lsp1 clk %s init error!\n",
+				lsp1_div_clk[i].div.hw.init->name);
+		}
+	}
+
+	if (of_clk_add_hw_provider(np, of_clk_hw_onecell_get, &lsp1_hw_onecell_data))
+		panic("could not register clk provider\n");
+	pr_info("lsp1-clk init over, nr:%d\n", LSP1_NR_CLKS);
+
+	return 0;
+}
+
+static const struct of_device_id zx_clkc_match_table[] = {
+	{ .compatible = "zte,zx296718-topcrm", .data = &top_clocks_init },
+	{ .compatible = "zte,zx296718-lsp0crm", .data = &lsp0_clocks_init },
+	{ .compatible = "zte,zx296718-lsp1crm", .data = &lsp1_clocks_init },
+	{ }
+};
+
+static int zx_clkc_probe(struct platform_device *pdev)
+{
+	int (*init_fn)(struct device_node *np);
+	struct device_node *np = pdev->dev.of_node;
+
+	init_fn = of_device_get_match_data(&pdev->dev);
+	if (!init_fn) {
+		dev_err(&pdev->dev, "Error: No device match found\n");
+		return -ENODEV;
+	}
+
+	return init_fn(np);
+}
+
+static struct platform_driver zx_clk_driver = {
+	.probe		= zx_clkc_probe,
+	.driver		= {
+		.name	= "zx296718-clkc",
+		.of_match_table = zx_clkc_match_table,
+	},
+};
+
+static int __init zx_clk_init(void)
+{
+	return platform_driver_register(&zx_clk_driver);
+}
+core_initcall(zx_clk_init);
diff --git a/drivers/clk/zte/clk.c b/drivers/clk/zte/clk.c
index 7c73c53..c4c1251 100644
--- a/drivers/clk/zte/clk.c
+++ b/drivers/clk/zte/clk.c
@@ -21,8 +21,8 @@
 #define to_clk_zx_audio(_hw) container_of(_hw, struct clk_zx_audio, hw)
 
 #define CFG0_CFG1_OFFSET 4
-#define LOCK_FLAG BIT(30)
-#define POWER_DOWN BIT(31)
+#define LOCK_FLAG 30
+#define POWER_DOWN 31
 
 static int rate_to_idx(struct clk_zx_pll *zx_pll, unsigned long rate)
 {
@@ -50,8 +50,8 @@
 	hw_cfg1 = readl_relaxed(zx_pll->reg_base + CFG0_CFG1_OFFSET);
 
 	/* For matching the value in lookup table */
-	hw_cfg0 &= ~LOCK_FLAG;
-	hw_cfg0 |= POWER_DOWN;
+	hw_cfg0 &= ~BIT(zx_pll->lock_bit);
+	hw_cfg0 |= BIT(zx_pll->pd_bit);
 
 	for (i = 0; i < zx_pll->count; i++) {
 		if (hw_cfg0 == config[i].cfg0 && hw_cfg1 == config[i].cfg1)
@@ -108,10 +108,10 @@
 	u32 reg;
 
 	reg = readl_relaxed(zx_pll->reg_base);
-	writel_relaxed(reg & ~POWER_DOWN, zx_pll->reg_base);
+	writel_relaxed(reg & ~BIT(zx_pll->pd_bit), zx_pll->reg_base);
 
 	return readl_relaxed_poll_timeout(zx_pll->reg_base, reg,
-					  reg & LOCK_FLAG, 0, 100);
+					  reg & BIT(zx_pll->lock_bit), 0, 100);
 }
 
 static void zx_pll_disable(struct clk_hw *hw)
@@ -120,7 +120,7 @@
 	u32 reg;
 
 	reg = readl_relaxed(zx_pll->reg_base);
-	writel_relaxed(reg | POWER_DOWN, zx_pll->reg_base);
+	writel_relaxed(reg | BIT(zx_pll->pd_bit), zx_pll->reg_base);
 }
 
 static int zx_pll_is_enabled(struct clk_hw *hw)
@@ -130,10 +130,10 @@
 
 	reg = readl_relaxed(zx_pll->reg_base);
 
-	return !(reg & POWER_DOWN);
+	return !(reg & BIT(zx_pll->pd_bit));
 }
 
-static const struct clk_ops zx_pll_ops = {
+const struct clk_ops zx_pll_ops = {
 	.recalc_rate = zx_pll_recalc_rate,
 	.round_rate = zx_pll_round_rate,
 	.set_rate = zx_pll_set_rate,
@@ -141,6 +141,7 @@
 	.disable = zx_pll_disable,
 	.is_enabled = zx_pll_is_enabled,
 };
+EXPORT_SYMBOL(zx_pll_ops);
 
 struct clk *clk_register_zx_pll(const char *name, const char *parent_name,
 				unsigned long flags, void __iomem *reg_base,
@@ -164,6 +165,8 @@
 	zx_pll->reg_base = reg_base;
 	zx_pll->lookup_table = lookup_table;
 	zx_pll->count = count;
+	zx_pll->lock_bit = LOCK_FLAG;
+	zx_pll->pd_bit = POWER_DOWN;
 	zx_pll->lock = lock;
 	zx_pll->hw.init = &init;
 
diff --git a/drivers/clk/zte/clk.h b/drivers/clk/zte/clk.h
index 65ae08b..0df3474 100644
--- a/drivers/clk/zte/clk.h
+++ b/drivers/clk/zte/clk.h
@@ -12,6 +12,26 @@
 #include <linux/clk-provider.h>
 #include <linux/spinlock.h>
 
+#define PNAME(x) static const char *x[]
+
+#define CLK_HW_INIT(_name, _parent, _ops, _flags)			\
+	&(struct clk_init_data) {					\
+		.flags		= _flags,				\
+		.name		= _name,				\
+		.parent_names	= (const char *[]) { _parent },		\
+		.num_parents	= 1,					\
+		.ops		= _ops,					\
+	}
+
+#define CLK_HW_INIT_PARENTS(_name, _parents, _ops, _flags)		\
+	&(struct clk_init_data) {					\
+		.flags		= _flags,				\
+		.name		= _name,				\
+		.parent_names	= _parents,				\
+		.num_parents	= ARRAY_SIZE(_parents),			\
+		.ops		= _ops,					\
+	}
+
 struct zx_pll_config {
 	unsigned long rate;
 	u32 cfg0;
@@ -24,8 +44,115 @@
 	const struct zx_pll_config *lookup_table; /* order by rate asc */
 	int count;
 	spinlock_t *lock;
+	u8 pd_bit;		/* power down bit */
+	u8 lock_bit;		/* pll lock flag bit */
 };
 
+#define PLL_RATE(_rate, _cfg0, _cfg1)	\
+{					\
+	.rate = _rate,			\
+	.cfg0 = _cfg0,			\
+	.cfg1 = _cfg1,			\
+}
+
+#define ZX_PLL(_name, _parent, _reg, _table, _pd, _lock)		\
+{									\
+	.reg_base	= (void __iomem *) _reg,			\
+	.lookup_table	= _table,					\
+	.count		= ARRAY_SIZE(_table),				\
+	.pd_bit		= _pd,						\
+	.lock_bit	= _lock,					\
+	.hw.init	 = CLK_HW_INIT(_name, _parent, &zx_pll_ops,	\
+				CLK_GET_RATE_NOCACHE),			\
+}
+
+#define ZX296718_PLL(_name, _parent, _reg, _table)			\
+ZX_PLL(_name, _parent, _reg, _table, 0, 30)
+
+struct zx_clk_gate {
+	struct clk_gate gate;
+	u16		id;
+};
+
+#define GATE(_id, _name, _parent, _reg, _bit, _flag, _gflags)		\
+{									\
+	.gate = {							\
+		.reg = (void __iomem *) _reg,				\
+		.bit_idx = (_bit),					\
+		.flags = _gflags,					\
+		.lock = &clk_lock,					\
+		.hw.init = CLK_HW_INIT(_name,				\
+					_parent,			\
+					&clk_gate_ops,			\
+					_flag | CLK_IGNORE_UNUSED),	\
+	},								\
+	.id	= _id,							\
+}
+
+struct zx_clk_fixed_factor {
+	struct clk_fixed_factor factor;
+	u16	id;
+};
+
+#define FFACTOR(_id, _name, _parent, _mult, _div, _flag)		\
+{									\
+	.factor = {							\
+		.div		= _div,					\
+		.mult		= _mult,				\
+		.hw.init	= CLK_HW_INIT(_name,			\
+					      _parent,			\
+					      &clk_fixed_factor_ops,	\
+					      _flag),			\
+	},								\
+	.id = _id,							\
+}
+
+struct zx_clk_mux {
+	struct clk_mux mux;
+	u16	id;
+};
+
+#define MUX_F(_id, _name, _parent, _reg, _shift, _width, _flag, _mflag)	\
+{									\
+	.mux = {							\
+		.reg		= (void __iomem *) _reg,		\
+		.mask		= BIT(_width) - 1,			\
+		.shift		= _shift,				\
+		.flags		= _mflag,				\
+		.lock		= &clk_lock,				\
+		.hw.init	= CLK_HW_INIT_PARENTS(_name,		\
+						      _parent,		\
+						      &clk_mux_ops,	\
+						      _flag),		\
+	},								\
+	.id = _id,							\
+}
+
+#define MUX(_id, _name, _parent, _reg, _shift, _width)			\
+MUX_F(_id, _name, _parent, _reg, _shift, _width, 0, 0)
+
+struct zx_clk_div {
+	struct clk_divider div;
+	u16	id;
+};
+
+#define DIV_T(_id, _name, _parent, _reg, _shift, _width, _flag, _table)	\
+{									\
+	.div = {							\
+		.reg		= (void __iomem *) _reg,		\
+		.shift		= _shift,				\
+		.width		= _width,				\
+		.flags		= 0,					\
+		.table		= _table,				\
+		.lock		= &clk_lock,				\
+		.hw.init	= CLK_HW_INIT(_name,			\
+					      _parent,			\
+					      &clk_divider_ops,		\
+					      _flag),			\
+	},								\
+	.id = _id,							\
+}
+
 struct clk *clk_register_zx_pll(const char *name, const char *parent_name,
 	unsigned long flags, void __iomem *reg_base,
 	const struct zx_pll_config *lookup_table, int count, spinlock_t *lock);
@@ -38,4 +165,6 @@
 struct clk *clk_register_zx_audio(const char *name,
 				  const char * const parent_name,
 				  unsigned long flags, void __iomem *reg_base);
+
+extern const struct clk_ops zx_pll_ops;
 #endif
diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index 5677886..8a753fd 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -305,6 +305,16 @@
 	  This must be disabled for hardware validation purposes to detect any
 	  hardware anomalies of missing events.
 
+config FSL_ERRATUM_A008585
+	bool "Workaround for Freescale/NXP Erratum A-008585"
+	default y
+	depends on ARM_ARCH_TIMER && ARM64
+	help
+	  This option enables a workaround for Freescale/NXP Erratum
+	  A-008585 ("ARM generic timer may contain an erroneous
+	  value").  The workaround will only be active if the
+	  fsl,erratum-a008585 property is found in the timer node.
+
 config ARM_GLOBAL_TIMER
 	bool "Support for the ARM global timer" if COMPILE_TEST
 	select CLKSRC_OF if OF
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c
index 28bce3f..73c487d 100644
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -8,6 +8,9 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+#define pr_fmt(fmt)	"arm_arch_timer: " fmt
+
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
@@ -91,6 +94,43 @@
  * Architected system timer support.
  */
 
+#ifdef CONFIG_FSL_ERRATUM_A008585
+DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled);
+EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled);
+
+static int fsl_a008585_enable = -1;
+
+static int __init early_fsl_a008585_cfg(char *buf)
+{
+	int ret;
+	bool val;
+
+	ret = strtobool(buf, &val);
+	if (ret)
+		return ret;
+
+	fsl_a008585_enable = val;
+	return 0;
+}
+early_param("clocksource.arm_arch_timer.fsl-a008585", early_fsl_a008585_cfg);
+
+u32 __fsl_a008585_read_cntp_tval_el0(void)
+{
+	return __fsl_a008585_read_reg(cntp_tval_el0);
+}
+
+u32 __fsl_a008585_read_cntv_tval_el0(void)
+{
+	return __fsl_a008585_read_reg(cntv_tval_el0);
+}
+
+u64 __fsl_a008585_read_cntvct_el0(void)
+{
+	return __fsl_a008585_read_reg(cntvct_el0);
+}
+EXPORT_SYMBOL(__fsl_a008585_read_cntvct_el0);
+#endif /* CONFIG_FSL_ERRATUM_A008585 */
+
 static __always_inline
 void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
 			  struct clock_event_device *clk)
@@ -240,6 +280,40 @@
 	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
 }
 
+#ifdef CONFIG_FSL_ERRATUM_A008585
+static __always_inline void fsl_a008585_set_next_event(const int access,
+		unsigned long evt, struct clock_event_device *clk)
+{
+	unsigned long ctrl;
+	u64 cval = evt + arch_counter_get_cntvct();
+
+	ctrl = arch_timer_reg_read(access, ARCH_TIMER_REG_CTRL, clk);
+	ctrl |= ARCH_TIMER_CTRL_ENABLE;
+	ctrl &= ~ARCH_TIMER_CTRL_IT_MASK;
+
+	if (access == ARCH_TIMER_PHYS_ACCESS)
+		write_sysreg(cval, cntp_cval_el0);
+	else if (access == ARCH_TIMER_VIRT_ACCESS)
+		write_sysreg(cval, cntv_cval_el0);
+
+	arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk);
+}
+
+static int fsl_a008585_set_next_event_virt(unsigned long evt,
+					   struct clock_event_device *clk)
+{
+	fsl_a008585_set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk);
+	return 0;
+}
+
+static int fsl_a008585_set_next_event_phys(unsigned long evt,
+					   struct clock_event_device *clk)
+{
+	fsl_a008585_set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk);
+	return 0;
+}
+#endif /* CONFIG_FSL_ERRATUM_A008585 */
+
 static int arch_timer_set_next_event_virt(unsigned long evt,
 					  struct clock_event_device *clk)
 {
@@ -268,6 +342,19 @@
 	return 0;
 }
 
+static void fsl_a008585_set_sne(struct clock_event_device *clk)
+{
+#ifdef CONFIG_FSL_ERRATUM_A008585
+	if (!static_branch_unlikely(&arch_timer_read_ool_enabled))
+		return;
+
+	if (arch_timer_uses_ppi == VIRT_PPI)
+		clk->set_next_event = fsl_a008585_set_next_event_virt;
+	else
+		clk->set_next_event = fsl_a008585_set_next_event_phys;
+#endif
+}
+
 static void __arch_timer_setup(unsigned type,
 			       struct clock_event_device *clk)
 {
@@ -296,6 +383,8 @@
 		default:
 			BUG();
 		}
+
+		fsl_a008585_set_sne(clk);
 	} else {
 		clk->features |= CLOCK_EVT_FEAT_DYNIRQ;
 		clk->name = "arch_mem_timer";
@@ -370,16 +459,33 @@
 		arch_timer_ppi[PHYS_NONSECURE_PPI]);
 }
 
+static u32 check_ppi_trigger(int irq)
+{
+	u32 flags = irq_get_trigger_type(irq);
+
+	if (flags != IRQF_TRIGGER_HIGH && flags != IRQF_TRIGGER_LOW) {
+		pr_warn("WARNING: Invalid trigger for IRQ%d, assuming level low\n", irq);
+		pr_warn("WARNING: Please fix your firmware\n");
+		flags = IRQF_TRIGGER_LOW;
+	}
+
+	return flags;
+}
+
 static int arch_timer_starting_cpu(unsigned int cpu)
 {
 	struct clock_event_device *clk = this_cpu_ptr(arch_timer_evt);
+	u32 flags;
 
 	__arch_timer_setup(ARCH_CP15_TIMER, clk);
 
-	enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], 0);
+	flags = check_ppi_trigger(arch_timer_ppi[arch_timer_uses_ppi]);
+	enable_percpu_irq(arch_timer_ppi[arch_timer_uses_ppi], flags);
 
-	if (arch_timer_has_nonsecure_ppi())
-		enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], 0);
+	if (arch_timer_has_nonsecure_ppi()) {
+		flags = check_ppi_trigger(arch_timer_ppi[PHYS_NONSECURE_PPI]);
+		enable_percpu_irq(arch_timer_ppi[PHYS_NONSECURE_PPI], flags);
+	}
 
 	arch_counter_set_user_access();
 	if (evtstrm_enable)
@@ -495,15 +601,19 @@
 			arch_timer_read_counter = arch_counter_get_cntvct;
 		else
 			arch_timer_read_counter = arch_counter_get_cntpct;
+
+		clocksource_counter.archdata.vdso_direct = true;
+
+#ifdef CONFIG_FSL_ERRATUM_A008585
+		/*
+		 * Don't use the vdso fastpath if errata require using
+		 * the out-of-line counter accessor.
+		 */
+		if (static_branch_unlikely(&arch_timer_read_ool_enabled))
+			clocksource_counter.archdata.vdso_direct = false;
+#endif
 	} else {
 		arch_timer_read_counter = arch_counter_get_cntvct_mem;
-
-		/* If the clocksource name is "arch_sys_counter" the
-		 * VDSO will attempt to read the CP15-based counter.
-		 * Ensure this does not happen when CP15-based
-		 * counter is not available.
-		 */
-		clocksource_counter.name = "arch_mem_counter";
 	}
 
 	start_count = arch_timer_read_counter();
@@ -780,6 +890,15 @@
 
 	arch_timer_c3stop = !of_property_read_bool(np, "always-on");
 
+#ifdef CONFIG_FSL_ERRATUM_A008585
+	if (fsl_a008585_enable < 0)
+		fsl_a008585_enable = of_property_read_bool(np, "fsl,erratum-a008585");
+	if (fsl_a008585_enable) {
+		static_branch_enable(&arch_timer_read_ool_enabled);
+		pr_info("Enabling workaround for FSL erratum A-008585\n");
+	}
+#endif
+
 	/*
 	 * If we cannot rely on firmware initializing the timer registers then
 	 * we should use the physical timers instead.
diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c
index 7e3fd37..92f6e4d 100644
--- a/drivers/clocksource/bcm_kona_timer.c
+++ b/drivers/clocksource/bcm_kona_timer.c
@@ -66,10 +66,10 @@
 
 }
 
-static void
+static int
 kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw)
 {
-	int loop_limit = 4;
+	int loop_limit = 3;
 
 	/*
 	 * Read 64-bit free running counter
@@ -83,18 +83,19 @@
 	 *      if new hi-word is equal to previously read hi-word then stop.
 	 */
 
-	while (--loop_limit) {
+	do {
 		*msw = readl(timer_base + KONA_GPTIMER_STCHI_OFFSET);
 		*lsw = readl(timer_base + KONA_GPTIMER_STCLO_OFFSET);
 		if (*msw == readl(timer_base + KONA_GPTIMER_STCHI_OFFSET))
 			break;
-	}
+	} while (--loop_limit);
 	if (!loop_limit) {
 		pr_err("bcm_kona_timer: getting counter failed.\n");
 		pr_err(" Timer will be impacted\n");
+		return -ETIMEDOUT;
 	}
 
-	return;
+	return 0;
 }
 
 static int kona_timer_set_next_event(unsigned long clc,
@@ -112,8 +113,11 @@
 
 	uint32_t lsw, msw;
 	uint32_t reg;
+	int ret;
 
-	kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+	ret = kona_timer_get_counter(timers.tmr_regs, &msw, &lsw);
+	if (ret)
+		return ret;
 
 	/* Load the "next" event tick value */
 	writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET);
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index d91e872..7a960cd 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -109,12 +109,15 @@
 {
 	int ret;
 
-	if (!cpu_has_counter || !gic_frequency)
+	if (!gic_frequency)
 		return -ENXIO;
 
 	ret = setup_percpu_irq(gic_timer_irq, &gic_compare_irqaction);
-	if (ret < 0)
+	if (ret < 0) {
+		pr_err("GIC timer IRQ %d setup failed: %d\n",
+		       gic_timer_irq, ret);
 		return ret;
+	}
 
 	cpuhp_setup_state(CPUHP_AP_MIPS_GIC_TIMER_STARTING,
 			  "AP_MIPS_GIC_TIMER_STARTING", gic_starting_cpu,
@@ -164,7 +167,7 @@
 	gic_start_count();
 }
 
-static void __init gic_clocksource_of_init(struct device_node *node)
+static int __init gic_clocksource_of_init(struct device_node *node)
 {
 	struct clk *clk;
 	int ret;
diff --git a/drivers/clocksource/moxart_timer.c b/drivers/clocksource/moxart_timer.c
index 8414544..2a8f470 100644
--- a/drivers/clocksource/moxart_timer.c
+++ b/drivers/clocksource/moxart_timer.c
@@ -21,6 +21,7 @@
 #include <linux/io.h>
 #include <linux/clocksource.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 
 #define TIMER1_BASE		0x00
 #define TIMER2_BASE		0x10
@@ -36,75 +37,109 @@
 #define TIMER_INTR_MASK		0x38
 
 /*
- * TIMER_CR flags:
+ * Moxart TIMER_CR flags:
  *
- * TIMEREG_CR_*_CLOCK	0: PCLK, 1: EXT1CLK
- * TIMEREG_CR_*_INT	overflow interrupt enable bit
+ * MOXART_CR_*_CLOCK	0: PCLK, 1: EXT1CLK
+ * MOXART_CR_*_INT	overflow interrupt enable bit
  */
-#define TIMEREG_CR_1_ENABLE	BIT(0)
-#define TIMEREG_CR_1_CLOCK	BIT(1)
-#define TIMEREG_CR_1_INT	BIT(2)
-#define TIMEREG_CR_2_ENABLE	BIT(3)
-#define TIMEREG_CR_2_CLOCK	BIT(4)
-#define TIMEREG_CR_2_INT	BIT(5)
-#define TIMEREG_CR_3_ENABLE	BIT(6)
-#define TIMEREG_CR_3_CLOCK	BIT(7)
-#define TIMEREG_CR_3_INT	BIT(8)
-#define TIMEREG_CR_COUNT_UP	BIT(9)
+#define MOXART_CR_1_ENABLE	BIT(0)
+#define MOXART_CR_1_CLOCK	BIT(1)
+#define MOXART_CR_1_INT	BIT(2)
+#define MOXART_CR_2_ENABLE	BIT(3)
+#define MOXART_CR_2_CLOCK	BIT(4)
+#define MOXART_CR_2_INT	BIT(5)
+#define MOXART_CR_3_ENABLE	BIT(6)
+#define MOXART_CR_3_CLOCK	BIT(7)
+#define MOXART_CR_3_INT	BIT(8)
+#define MOXART_CR_COUNT_UP	BIT(9)
 
-#define TIMER1_ENABLE		(TIMEREG_CR_2_ENABLE | TIMEREG_CR_1_ENABLE)
-#define TIMER1_DISABLE		(TIMEREG_CR_2_ENABLE)
+#define MOXART_TIMER1_ENABLE	(MOXART_CR_2_ENABLE | MOXART_CR_1_ENABLE)
+#define MOXART_TIMER1_DISABLE	(MOXART_CR_2_ENABLE)
 
-static void __iomem *base;
-static unsigned int clock_count_per_tick;
+/*
+ * The ASpeed variant of the IP block has a different layout
+ * for the control register
+ */
+#define ASPEED_CR_1_ENABLE	BIT(0)
+#define ASPEED_CR_1_CLOCK	BIT(1)
+#define ASPEED_CR_1_INT		BIT(2)
+#define ASPEED_CR_2_ENABLE	BIT(4)
+#define ASPEED_CR_2_CLOCK	BIT(5)
+#define ASPEED_CR_2_INT		BIT(6)
+#define ASPEED_CR_3_ENABLE	BIT(8)
+#define ASPEED_CR_3_CLOCK	BIT(9)
+#define ASPEED_CR_3_INT		BIT(10)
+
+#define ASPEED_TIMER1_ENABLE   (ASPEED_CR_2_ENABLE | ASPEED_CR_1_ENABLE)
+#define ASPEED_TIMER1_DISABLE  (ASPEED_CR_2_ENABLE)
+
+struct moxart_timer {
+	void __iomem *base;
+	unsigned int t1_disable_val;
+	unsigned int t1_enable_val;
+	unsigned int count_per_tick;
+	struct clock_event_device clkevt;
+};
+
+static inline struct moxart_timer *to_moxart(struct clock_event_device *evt)
+{
+	return container_of(evt, struct moxart_timer, clkevt);
+}
+
+static inline void moxart_disable(struct clock_event_device *evt)
+{
+	struct moxart_timer *timer = to_moxart(evt);
+
+	writel(timer->t1_disable_val, timer->base + TIMER_CR);
+}
+
+static inline void moxart_enable(struct clock_event_device *evt)
+{
+	struct moxart_timer *timer = to_moxart(evt);
+
+	writel(timer->t1_enable_val, timer->base + TIMER_CR);
+}
 
 static int moxart_shutdown(struct clock_event_device *evt)
 {
-	writel(TIMER1_DISABLE, base + TIMER_CR);
+	moxart_disable(evt);
 	return 0;
 }
 
 static int moxart_set_oneshot(struct clock_event_device *evt)
 {
-	writel(TIMER1_DISABLE, base + TIMER_CR);
-	writel(~0, base + TIMER1_BASE + REG_LOAD);
+	moxart_disable(evt);
+	writel(~0, to_moxart(evt)->base + TIMER1_BASE + REG_LOAD);
 	return 0;
 }
 
 static int moxart_set_periodic(struct clock_event_device *evt)
 {
-	writel(clock_count_per_tick, base + TIMER1_BASE + REG_LOAD);
-	writel(TIMER1_ENABLE, base + TIMER_CR);
+	struct moxart_timer *timer = to_moxart(evt);
+
+	moxart_disable(evt);
+	writel(timer->count_per_tick, timer->base + TIMER1_BASE + REG_LOAD);
+	writel(0, timer->base + TIMER1_BASE + REG_MATCH1);
+	moxart_enable(evt);
 	return 0;
 }
 
 static int moxart_clkevt_next_event(unsigned long cycles,
-				    struct clock_event_device *unused)
+				    struct clock_event_device *evt)
 {
+	struct moxart_timer *timer = to_moxart(evt);
 	u32 u;
 
-	writel(TIMER1_DISABLE, base + TIMER_CR);
+	moxart_disable(evt);
 
-	u = readl(base + TIMER1_BASE + REG_COUNT) - cycles;
-	writel(u, base + TIMER1_BASE + REG_MATCH1);
+	u = readl(timer->base + TIMER1_BASE + REG_COUNT) - cycles;
+	writel(u, timer->base + TIMER1_BASE + REG_MATCH1);
 
-	writel(TIMER1_ENABLE, base + TIMER_CR);
+	moxart_enable(evt);
 
 	return 0;
 }
 
-static struct clock_event_device moxart_clockevent = {
-	.name			= "moxart_timer",
-	.rating			= 200,
-	.features		= CLOCK_EVT_FEAT_PERIODIC |
-				  CLOCK_EVT_FEAT_ONESHOT,
-	.set_state_shutdown	= moxart_shutdown,
-	.set_state_periodic	= moxart_set_periodic,
-	.set_state_oneshot	= moxart_set_oneshot,
-	.tick_resume		= moxart_set_oneshot,
-	.set_next_event		= moxart_clkevt_next_event,
-};
-
 static irqreturn_t moxart_timer_interrupt(int irq, void *dev_id)
 {
 	struct clock_event_device *evt = dev_id;
@@ -112,21 +147,19 @@
 	return IRQ_HANDLED;
 }
 
-static struct irqaction moxart_timer_irq = {
-	.name		= "moxart-timer",
-	.flags		= IRQF_TIMER,
-	.handler	= moxart_timer_interrupt,
-	.dev_id		= &moxart_clockevent,
-};
-
 static int __init moxart_timer_init(struct device_node *node)
 {
 	int ret, irq;
 	unsigned long pclk;
 	struct clk *clk;
+	struct moxart_timer *timer;
 
-	base = of_iomap(node, 0);
-	if (!base) {
+	timer = kzalloc(sizeof(*timer), GFP_KERNEL);
+	if (!timer)
+		return -ENOMEM;
+
+	timer->base = of_iomap(node, 0);
+	if (!timer->base) {
 		pr_err("%s: of_iomap failed\n", node->full_name);
 		return -ENXIO;
 	}
@@ -137,12 +170,6 @@
 		return -EINVAL;
 	}
 
-	ret = setup_irq(irq, &moxart_timer_irq);
-	if (ret) {
-		pr_err("%s: setup_irq failed\n", node->full_name);
-		return ret;
-	}
-
 	clk = of_clk_get(node, 0);
 	if (IS_ERR(clk))  {
 		pr_err("%s: of_clk_get failed\n", node->full_name);
@@ -151,7 +178,32 @@
 
 	pclk = clk_get_rate(clk);
 
-	ret = clocksource_mmio_init(base + TIMER2_BASE + REG_COUNT,
+	if (of_device_is_compatible(node, "moxa,moxart-timer")) {
+		timer->t1_enable_val = MOXART_TIMER1_ENABLE;
+		timer->t1_disable_val = MOXART_TIMER1_DISABLE;
+	} else if (of_device_is_compatible(node, "aspeed,ast2400-timer")) {
+		timer->t1_enable_val = ASPEED_TIMER1_ENABLE;
+		timer->t1_disable_val = ASPEED_TIMER1_DISABLE;
+	} else {
+		pr_err("%s: unknown platform\n", node->full_name);
+		return -EINVAL;
+	}
+
+	timer->count_per_tick = DIV_ROUND_CLOSEST(pclk, HZ);
+
+	timer->clkevt.name = node->name;
+	timer->clkevt.rating = 200;
+	timer->clkevt.features = CLOCK_EVT_FEAT_PERIODIC |
+					CLOCK_EVT_FEAT_ONESHOT;
+	timer->clkevt.set_state_shutdown = moxart_shutdown;
+	timer->clkevt.set_state_periodic = moxart_set_periodic;
+	timer->clkevt.set_state_oneshot = moxart_set_oneshot;
+	timer->clkevt.tick_resume = moxart_set_oneshot;
+	timer->clkevt.set_next_event = moxart_clkevt_next_event;
+	timer->clkevt.cpumask = cpumask_of(0);
+	timer->clkevt.irq = irq;
+
+	ret = clocksource_mmio_init(timer->base + TIMER2_BASE + REG_COUNT,
 				    "moxart_timer", pclk, 200, 32,
 				    clocksource_mmio_readl_down);
 	if (ret) {
@@ -159,13 +211,26 @@
 		return ret;
 	}
 
-	clock_count_per_tick = DIV_ROUND_CLOSEST(pclk, HZ);
+	ret = request_irq(irq, moxart_timer_interrupt, IRQF_TIMER,
+			  node->name, &timer->clkevt);
+	if (ret) {
+		pr_err("%s: setup_irq failed\n", node->full_name);
+		return ret;
+	}
 
-	writel(~0, base + TIMER2_BASE + REG_LOAD);
-	writel(TIMEREG_CR_2_ENABLE, base + TIMER_CR);
+	/* Clear match registers */
+	writel(0, timer->base + TIMER1_BASE + REG_MATCH1);
+	writel(0, timer->base + TIMER1_BASE + REG_MATCH2);
+	writel(0, timer->base + TIMER2_BASE + REG_MATCH1);
+	writel(0, timer->base + TIMER2_BASE + REG_MATCH2);
 
-	moxart_clockevent.cpumask = cpumask_of(0);
-	moxart_clockevent.irq = irq;
+	/*
+	 * Start timer 2 rolling as our main wall clock source, keep timer 1
+	 * disabled
+	 */
+	writel(0, timer->base + TIMER_CR);
+	writel(~0, timer->base + TIMER2_BASE + REG_LOAD);
+	writel(timer->t1_disable_val, timer->base + TIMER_CR);
 
 	/*
 	 * documentation is not publicly available:
@@ -173,9 +238,9 @@
 	 * max_delta 0xfffffffe should be ok because count
 	 * register size is u32
 	 */
-	clockevents_config_and_register(&moxart_clockevent, pclk,
-					0x4, 0xfffffffe);
+	clockevents_config_and_register(&timer->clkevt, pclk, 0x4, 0xfffffffe);
 
 	return 0;
 }
 CLOCKSOURCE_OF_DECLARE(moxart, "moxa,moxart-timer", moxart_timer_init);
+CLOCKSOURCE_OF_DECLARE(aspeed, "aspeed,ast2400-timer", moxart_timer_init);
diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c
index 937e10b..3e1cb51 100644
--- a/drivers/clocksource/pxa_timer.c
+++ b/drivers/clocksource/pxa_timer.c
@@ -21,6 +21,8 @@
 #include <linux/of_irq.h>
 #include <linux/sched_clock.h>
 
+#include <clocksource/pxa.h>
+
 #include <asm/div64.h>
 
 #define OSMR0		0x00	/* OS Timer 0 Match Register */
diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c
index 97669ee..c83452c 100644
--- a/drivers/clocksource/sun4i_timer.c
+++ b/drivers/clocksource/sun4i_timer.c
@@ -123,12 +123,16 @@
 	.set_next_event = sun4i_clkevt_next_event,
 };
 
+static void sun4i_timer_clear_interrupt(void)
+{
+	writel(TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_ST_REG);
+}
 
 static irqreturn_t sun4i_timer_interrupt(int irq, void *dev_id)
 {
 	struct clock_event_device *evt = (struct clock_event_device *)dev_id;
 
-	writel(0x1, timer_base + TIMER_IRQ_ST_REG);
+	sun4i_timer_clear_interrupt();
 	evt->event_handler(evt);
 
 	return IRQ_HANDLED;
@@ -208,6 +212,9 @@
 	/* Make sure timer is stopped before playing with interrupts */
 	sun4i_clkevt_time_stop(0);
 
+	/* clear timer0 interrupt */
+	sun4i_timer_clear_interrupt();
+
 	sun4i_clockevent.cpumask = cpu_possible_mask;
 	sun4i_clockevent.irq = irq;
 
diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c
index 719b478..3c39e6f 100644
--- a/drivers/clocksource/time-armada-370-xp.c
+++ b/drivers/clocksource/time-armada-370-xp.c
@@ -338,7 +338,6 @@
 	struct clk *clk = of_clk_get_by_name(np, "fixed");
 	int ret;
 
-	clk = of_clk_get(np, 0);
 	if (IS_ERR(clk)) {
 		pr_err("Failed to get clock");
 		return PTR_ERR(clk);
diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c
index a7d9a08..a8e6c7d 100644
--- a/drivers/clocksource/time-pistachio.c
+++ b/drivers/clocksource/time-pistachio.c
@@ -202,10 +202,10 @@
 	rate = clk_get_rate(fast_clk);
 
 	/* Disable irq's for clocksource usage */
-	gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 0);
-	gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 1);
-	gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 2);
-	gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 3);
+	gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 0);
+	gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 1);
+	gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 2);
+	gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 3);
 
 	/* Enable timer block */
 	writel(TIMER_ME_GLOBAL, pcs_gpt.base);
diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c
index 1ffac0c..6555821 100644
--- a/drivers/clocksource/timer-atmel-pit.c
+++ b/drivers/clocksource/timer-atmel-pit.c
@@ -149,24 +149,13 @@
 {
 	struct pit_data *data = dev_id;
 
-	/*
-	 * irqs should be disabled here, but as the irq is shared they are only
-	 * guaranteed to be off if the timer irq is registered first.
-	 */
-	WARN_ON_ONCE(!irqs_disabled());
-
 	/* The PIT interrupt may be disabled, and is shared */
 	if (clockevent_state_periodic(&data->clkevt) &&
 	    (pit_read(data->base, AT91_PIT_SR) & AT91_PIT_PITS)) {
-		unsigned nr_ticks;
-
 		/* Get number of ticks performed before irq, and ack it */
-		nr_ticks = PIT_PICNT(pit_read(data->base, AT91_PIT_PIVR));
-		do {
-			data->cnt += data->cycle;
-			data->clkevt.event_handler(&data->clkevt);
-			nr_ticks--;
-		} while (nr_ticks);
+		data->cnt += data->cycle * PIT_PICNT(pit_read(data->base,
+							      AT91_PIT_PIVR));
+		data->clkevt.event_handler(&data->clkevt);
 
 		return IRQ_HANDLED;
 	}
@@ -177,11 +166,41 @@
 /*
  * Set up both clocksource and clockevent support.
  */
-static int __init at91sam926x_pit_common_init(struct pit_data *data)
+static int __init at91sam926x_pit_dt_init(struct device_node *node)
 {
-	unsigned long	pit_rate;
-	unsigned	bits;
-	int		ret;
+	unsigned long   pit_rate;
+	unsigned        bits;
+	int             ret;
+	struct pit_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->base = of_iomap(node, 0);
+	if (!data->base) {
+		pr_err("Could not map PIT address\n");
+		return -ENXIO;
+	}
+
+	data->mck = of_clk_get(node, 0);
+	if (IS_ERR(data->mck)) {
+		pr_err("Unable to get mck clk\n");
+		return PTR_ERR(data->mck);
+	}
+
+	ret = clk_prepare_enable(data->mck);
+	if (ret) {
+		pr_err("Unable to enable mck\n");
+		return ret;
+	}
+
+	/* Get the interrupts property */
+	data->irq = irq_of_parse_and_map(node, 0);
+	if (!data->irq) {
+		pr_err("Unable to get IRQ from DT\n");
+		return -EINVAL;
+	}
 
 	/*
 	 * Use our actual MCK to figure out how many MCK/16 ticks per
@@ -236,39 +255,5 @@
 
 	return 0;
 }
-
-static int __init at91sam926x_pit_dt_init(struct device_node *node)
-{
-	struct pit_data *data;
-
-	data = kzalloc(sizeof(*data), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	data->base = of_iomap(node, 0);
-	if (!data->base) {
-		pr_err("Could not map PIT address\n");
-		return -ENXIO;
-	}
-
-	data->mck = of_clk_get(node, 0);
-	if (IS_ERR(data->mck))
-		/* Fallback on clkdev for !CCF-based boards */
-		data->mck = clk_get(NULL, "mck");
-
-	if (IS_ERR(data->mck)) {
-		pr_err("Unable to get mck clk\n");
-		return PTR_ERR(data->mck);
-	}
-
-	/* Get the interrupts property */
-	data->irq = irq_of_parse_and_map(node, 0);
-	if (!data->irq) {
-		pr_err("Unable to get IRQ from DT\n");
-		return -EINVAL;
-	}
-
-	return at91sam926x_pit_common_init(data);
-}
 CLOCKSOURCE_OF_DECLARE(at91sam926x_pit, "atmel,at91sam9260-pit",
 		       at91sam926x_pit_dt_init);
diff --git a/drivers/clocksource/timer-oxnas-rps.c b/drivers/clocksource/timer-oxnas-rps.c
index bd887e2..d630bf4 100644
--- a/drivers/clocksource/timer-oxnas-rps.c
+++ b/drivers/clocksource/timer-oxnas-rps.c
@@ -295,3 +295,5 @@
 
 CLOCKSOURCE_OF_DECLARE(ox810se_rps,
 		       "oxsemi,ox810se-rps-timer", oxnas_rps_timer_init);
+CLOCKSOURCE_OF_DECLARE(ox820_rps,
+		       "oxsemi,ox820se-rps-timer", oxnas_rps_timer_init);
diff --git a/drivers/clocksource/timer-ti-32k.c b/drivers/clocksource/timer-ti-32k.c
index 92b7e39..cf5b14e 100644
--- a/drivers/clocksource/timer-ti-32k.c
+++ b/drivers/clocksource/timer-ti-32k.c
@@ -65,7 +65,7 @@
 	return container_of(cs, struct ti_32k, cs);
 }
 
-static cycle_t ti_32k_read_cycles(struct clocksource *cs)
+static cycle_t notrace ti_32k_read_cycles(struct clocksource *cs)
 {
 	struct ti_32k *ti = to_ti_32k(cs);
 
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 74919aa..d8b164a 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -194,7 +194,7 @@
 	  If in doubt, say N.
 
 config CPU_FREQ_GOV_SCHEDUTIL
-	tristate "'schedutil' cpufreq policy governor"
+	bool "'schedutil' cpufreq policy governor"
 	depends on CPU_FREQ && SMP
 	select CPU_FREQ_GOV_ATTR_SET
 	select IRQ_WORK
@@ -208,9 +208,6 @@
 	  frequency tipping point is at utilization/capacity equal to 80% in
 	  both cases.
 
-	  To compile this driver as a module, choose M here: the module will
-	  be called cpufreq_schedutil.
-
 	  If in doubt, say N.
 
 comment "CPU frequency scaling drivers"
@@ -225,7 +222,7 @@
 	help
 	  This adds a generic DT based cpufreq driver for frequency management.
 	  It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
-	  systems which share clock and voltage across all CPUs.
+	  systems.
 
 	  If in doubt, say N.
 
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 8882b8e..1b2f28f 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -19,10 +19,19 @@
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
+#include <linux/dmi.h>
 #include <linux/vmalloc.h>
 
+#include <asm/unaligned.h>
+
 #include <acpi/cppc_acpi.h>
 
+/* Minimum struct length needed for the DMI processor entry we want */
+#define DMI_ENTRY_PROCESSOR_MIN_LENGTH	48
+
+/* Offest in the DMI processor structure for the max frequency */
+#define DMI_PROCESSOR_MAX_SPEED  0x14
+
 /*
  * These structs contain information parsed from per CPU
  * ACPI _CPC structures.
@@ -30,19 +39,52 @@
  * performance capabilities, desired performance level
  * requested etc.
  */
-static struct cpudata **all_cpu_data;
+static struct cppc_cpudata **all_cpu_data;
+
+/* Capture the max KHz from DMI */
+static u64 cppc_dmi_max_khz;
+
+/* Callback function used to retrieve the max frequency from DMI */
+static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
+{
+	const u8 *dmi_data = (const u8 *)dm;
+	u16 *mhz = (u16 *)private;
+
+	if (dm->type == DMI_ENTRY_PROCESSOR &&
+	    dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) {
+		u16 val = (u16)get_unaligned((const u16 *)
+				(dmi_data + DMI_PROCESSOR_MAX_SPEED));
+		*mhz = val > *mhz ? val : *mhz;
+	}
+}
+
+/* Look up the max frequency in DMI */
+static u64 cppc_get_dmi_max_khz(void)
+{
+	u16 mhz = 0;
+
+	dmi_walk(cppc_find_dmi_mhz, &mhz);
+
+	/*
+	 * Real stupid fallback value, just in case there is no
+	 * actual value set.
+	 */
+	mhz = mhz ? mhz : 1;
+
+	return (1000 * mhz);
+}
 
 static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
 		unsigned int target_freq,
 		unsigned int relation)
 {
-	struct cpudata *cpu;
+	struct cppc_cpudata *cpu;
 	struct cpufreq_freqs freqs;
 	int ret = 0;
 
 	cpu = all_cpu_data[policy->cpu];
 
-	cpu->perf_ctrls.desired_perf = target_freq;
+	cpu->perf_ctrls.desired_perf = (u64)target_freq * policy->max / cppc_dmi_max_khz;
 	freqs.old = policy->cur;
 	freqs.new = target_freq;
 
@@ -66,7 +108,7 @@
 static void cppc_cpufreq_stop_cpu(struct cpufreq_policy *policy)
 {
 	int cpu_num = policy->cpu;
-	struct cpudata *cpu = all_cpu_data[cpu_num];
+	struct cppc_cpudata *cpu = all_cpu_data[cpu_num];
 	int ret;
 
 	cpu->perf_ctrls.desired_perf = cpu->perf_caps.lowest_perf;
@@ -79,7 +121,7 @@
 
 static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
 {
-	struct cpudata *cpu;
+	struct cppc_cpudata *cpu;
 	unsigned int cpu_num = policy->cpu;
 	int ret = 0;
 
@@ -94,10 +136,13 @@
 		return ret;
 	}
 
-	policy->min = cpu->perf_caps.lowest_perf;
-	policy->max = cpu->perf_caps.highest_perf;
+	cppc_dmi_max_khz = cppc_get_dmi_max_khz();
+
+	policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf;
+	policy->max = cppc_dmi_max_khz;
 	policy->cpuinfo.min_freq = policy->min;
 	policy->cpuinfo.max_freq = policy->max;
+	policy->cpuinfo.transition_latency = cppc_get_transition_latency(cpu_num);
 	policy->shared_type = cpu->shared_type;
 
 	if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
@@ -112,7 +157,8 @@
 	cpu->cur_policy = policy;
 
 	/* Set policy->cur to max now. The governors will adjust later. */
-	policy->cur = cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
+	policy->cur = cppc_dmi_max_khz;
+	cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
 
 	ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls);
 	if (ret)
@@ -134,7 +180,7 @@
 static int __init cppc_cpufreq_init(void)
 {
 	int i, ret = 0;
-	struct cpudata *cpu;
+	struct cppc_cpudata *cpu;
 
 	if (acpi_disabled)
 		return -ENODEV;
@@ -144,7 +190,7 @@
 		return -ENOMEM;
 
 	for_each_possible_cpu(i) {
-		all_cpu_data[i] = kzalloc(sizeof(struct cpudata), GFP_KERNEL);
+		all_cpu_data[i] = kzalloc(sizeof(struct cppc_cpudata), GFP_KERNEL);
 		if (!all_cpu_data[i])
 			goto out;
 
@@ -175,7 +221,7 @@
 
 static void __exit cppc_cpufreq_exit(void)
 {
-	struct cpudata *cpu;
+	struct cppc_cpudata *cpu;
 	int i;
 
 	cpufreq_unregister_driver(&cppc_cpufreq_driver);
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 0bb44d5..7126762 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -11,6 +11,8 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 
+#include "cpufreq-dt.h"
+
 static const struct of_device_id machines[] __initconst = {
 	{ .compatible = "allwinner,sun4i-a10", },
 	{ .compatible = "allwinner,sun5i-a10s", },
@@ -40,6 +42,7 @@
 	{ .compatible = "samsung,exynos5250", },
 #ifndef CONFIG_BL_SWITCHER
 	{ .compatible = "samsung,exynos5420", },
+	{ .compatible = "samsung,exynos5433", },
 	{ .compatible = "samsung,exynos5800", },
 #endif
 
@@ -51,6 +54,7 @@
 	{ .compatible = "renesas,r8a7779", },
 	{ .compatible = "renesas,r8a7790", },
 	{ .compatible = "renesas,r8a7791", },
+	{ .compatible = "renesas,r8a7792", },
 	{ .compatible = "renesas,r8a7793", },
 	{ .compatible = "renesas,r8a7794", },
 	{ .compatible = "renesas,sh73a0", },
@@ -68,12 +72,16 @@
 
 	{ .compatible = "sigma,tango4" },
 
+	{ .compatible = "ti,am33xx", },
+	{ .compatible = "ti,dra7", },
 	{ .compatible = "ti,omap2", },
 	{ .compatible = "ti,omap3", },
 	{ .compatible = "ti,omap4", },
 	{ .compatible = "ti,omap5", },
 
 	{ .compatible = "xlnx,zynq-7000", },
+
+	{ }
 };
 
 static int __init cpufreq_dt_platdev_init(void)
@@ -89,7 +97,8 @@
 	if (!match)
 		return -ENODEV;
 
-	return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
-							       NULL, 0));
+	return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt",
+			       -1, match->data,
+			       sizeof(struct cpufreq_dt_platform_data)));
 }
 device_initcall(cpufreq_dt_platdev_init);
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 3957de8..5c07ae0 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include <linux/thermal.h>
 
+#include "cpufreq-dt.h"
+
 struct private_data {
 	struct device *cpu_dev;
 	struct thermal_cooling_device *cdev;
@@ -353,6 +355,7 @@
 
 static int dt_cpufreq_probe(struct platform_device *pdev)
 {
+	struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev);
 	int ret;
 
 	/*
@@ -366,7 +369,8 @@
 	if (ret)
 		return ret;
 
-	dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
+	if (data && data->have_governor_per_policy)
+		dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY;
 
 	ret = cpufreq_register_driver(&dt_cpufreq_driver);
 	if (ret)
diff --git a/drivers/cpufreq/cpufreq-dt.h b/drivers/cpufreq/cpufreq-dt.h
new file mode 100644
index 0000000..54d774e
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-dt.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2016 Linaro
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CPUFREQ_DT_H__
+#define __CPUFREQ_DT_H__
+
+#include <linux/types.h>
+
+struct cpufreq_dt_platform_data {
+	bool have_governor_per_policy;
+};
+
+#endif /* __CPUFREQ_DT_H__ */
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3dd4884..6e6c1fb 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -916,58 +916,18 @@
 	.release	= cpufreq_sysfs_release,
 };
 
-static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
+static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
+			       struct device *dev)
 {
-	struct device *cpu_dev;
-
-	pr_debug("%s: Adding symlink for CPU: %u\n", __func__, cpu);
-
-	if (!policy)
-		return 0;
-
-	cpu_dev = get_cpu_device(cpu);
-	if (WARN_ON(!cpu_dev))
-		return 0;
-
-	return sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq");
+	dev_dbg(dev, "%s: Adding symlink\n", __func__);
+	return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
 }
 
-static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
+static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
+				   struct device *dev)
 {
-	struct device *cpu_dev;
-
-	pr_debug("%s: Removing symlink for CPU: %u\n", __func__, cpu);
-
-	cpu_dev = get_cpu_device(cpu);
-	if (WARN_ON(!cpu_dev))
-		return;
-
-	sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
-}
-
-/* Add/remove symlinks for all related CPUs */
-static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
-{
-	unsigned int j;
-	int ret = 0;
-
-	/* Some related CPUs might not be present (physically hotplugged) */
-	for_each_cpu(j, policy->real_cpus) {
-		ret = add_cpu_dev_symlink(policy, j);
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-
-static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy)
-{
-	unsigned int j;
-
-	/* Some related CPUs might not be present (physically hotplugged) */
-	for_each_cpu(j, policy->real_cpus)
-		remove_cpu_dev_symlink(policy, j);
+	dev_dbg(dev, "%s: Removing symlink\n", __func__);
+	sysfs_remove_link(&dev->kobj, "cpufreq");
 }
 
 static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
@@ -999,7 +959,7 @@
 			return ret;
 	}
 
-	return cpufreq_add_dev_symlink(policy);
+	return 0;
 }
 
 __weak struct cpufreq_governor *cpufreq_default_governor(void)
@@ -1073,13 +1033,9 @@
 
 static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
 {
-	struct device *dev = get_cpu_device(cpu);
 	struct cpufreq_policy *policy;
 	int ret;
 
-	if (WARN_ON(!dev))
-		return NULL;
-
 	policy = kzalloc(sizeof(*policy), GFP_KERNEL);
 	if (!policy)
 		return NULL;
@@ -1133,7 +1089,6 @@
 
 	down_write(&policy->rwsem);
 	cpufreq_stats_free_table(policy);
-	cpufreq_remove_dev_symlink(policy);
 	kobj = &policy->kobj;
 	cmp = &policy->kobj_unregister;
 	up_write(&policy->rwsem);
@@ -1215,8 +1170,8 @@
 	if (new_policy) {
 		/* related_cpus should at least include policy->cpus. */
 		cpumask_copy(policy->related_cpus, policy->cpus);
-		/* Remember CPUs present at the policy creation time. */
-		cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask);
+		/* Clear mask of registered CPUs */
+		cpumask_clear(policy->real_cpus);
 	}
 
 	/*
@@ -1331,6 +1286,8 @@
 	return ret;
 }
 
+static int cpufreq_offline(unsigned int cpu);
+
 /**
  * cpufreq_add_dev - the cpufreq interface for a CPU device.
  * @dev: CPU device.
@@ -1340,25 +1297,31 @@
 {
 	struct cpufreq_policy *policy;
 	unsigned cpu = dev->id;
+	int ret;
 
 	dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
 
-	if (cpu_online(cpu))
-		return cpufreq_online(cpu);
+	if (cpu_online(cpu)) {
+		ret = cpufreq_online(cpu);
+		if (ret)
+			return ret;
+	}
 
-	/*
-	 * A hotplug notifier will follow and we will handle it as CPU online
-	 * then.  For now, just create the sysfs link, unless there is no policy
-	 * or the link is already present.
-	 */
+	/* Create sysfs link on CPU registration */
 	policy = per_cpu(cpufreq_cpu_data, cpu);
 	if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
 		return 0;
 
-	return add_cpu_dev_symlink(policy, cpu);
+	ret = add_cpu_dev_symlink(policy, dev);
+	if (ret) {
+		cpumask_clear_cpu(cpu, policy->real_cpus);
+		cpufreq_offline(cpu);
+	}
+
+	return ret;
 }
 
-static void cpufreq_offline(unsigned int cpu)
+static int cpufreq_offline(unsigned int cpu)
 {
 	struct cpufreq_policy *policy;
 	int ret;
@@ -1368,7 +1331,7 @@
 	policy = cpufreq_cpu_get_raw(cpu);
 	if (!policy) {
 		pr_debug("%s: No cpu_data found\n", __func__);
-		return;
+		return 0;
 	}
 
 	down_write(&policy->rwsem);
@@ -1417,6 +1380,7 @@
 
 unlock:
 	up_write(&policy->rwsem);
+	return 0;
 }
 
 /**
@@ -1436,7 +1400,7 @@
 		cpufreq_offline(cpu);
 
 	cpumask_clear_cpu(cpu, policy->real_cpus);
-	remove_cpu_dev_symlink(policy, cpu);
+	remove_cpu_dev_symlink(policy, dev);
 
 	if (cpumask_empty(policy->real_cpus))
 		cpufreq_policy_free(policy, true);
@@ -2332,28 +2296,6 @@
 }
 EXPORT_SYMBOL(cpufreq_update_policy);
 
-static int cpufreq_cpu_callback(struct notifier_block *nfb,
-					unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (unsigned long)hcpu;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		cpufreq_online(cpu);
-		break;
-
-	case CPU_DOWN_PREPARE:
-		cpufreq_offline(cpu);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block __refdata cpufreq_cpu_notifier = {
-	.notifier_call = cpufreq_cpu_callback,
-};
-
 /*********************************************************************
  *               BOOST						     *
  *********************************************************************/
@@ -2455,6 +2397,7 @@
 /*********************************************************************
  *               REGISTER / UNREGISTER CPUFREQ DRIVER                *
  *********************************************************************/
+static enum cpuhp_state hp_online;
 
 /**
  * cpufreq_register_driver - register a CPU Frequency driver
@@ -2517,7 +2460,14 @@
 		goto err_if_unreg;
 	}
 
-	register_hotcpu_notifier(&cpufreq_cpu_notifier);
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
+					cpufreq_online,
+					cpufreq_offline);
+	if (ret < 0)
+		goto err_if_unreg;
+	hp_online = ret;
+	ret = 0;
+
 	pr_debug("driver %s up and running\n", driver_data->name);
 	goto out;
 
@@ -2556,7 +2506,7 @@
 	get_online_cpus();
 	subsys_interface_unregister(&cpufreq_interface);
 	remove_boost_sysfs_file();
-	unregister_hotcpu_notifier(&cpufreq_cpu_notifier);
+	cpuhp_remove_state_nocalls(hp_online);
 
 	write_lock_irqsave(&cpufreq_driver_lock, flags);
 
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index e415349..642dd0f 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -260,7 +260,7 @@
 }
 
 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
-				    unsigned long util, unsigned long max)
+				    unsigned int flags)
 {
 	struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
 	struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index be9eade..806f203 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -181,6 +181,8 @@
  * @cpu:		CPU number for this instance data
  * @update_util:	CPUFreq utility callback information
  * @update_util_set:	CPUFreq utility callback is set
+ * @iowait_boost:	iowait-related boost fraction
+ * @last_update:	Time of the last update.
  * @pstate:		Stores P state limits for this CPU
  * @vid:		Stores VID limits for this CPU
  * @pid:		Stores PID parameters for this CPU
@@ -206,6 +208,7 @@
 	struct vid_data vid;
 	struct _pid pid;
 
+	u64	last_update;
 	u64	last_sample_time;
 	u64	prev_aperf;
 	u64	prev_mperf;
@@ -216,6 +219,7 @@
 	struct acpi_processor_performance acpi_perf_data;
 	bool valid_pss_table;
 #endif
+	unsigned int iowait_boost;
 };
 
 static struct cpudata **all_cpu_data;
@@ -229,6 +233,7 @@
  * @p_gain_pct:		PID proportional gain
  * @i_gain_pct:		PID integral gain
  * @d_gain_pct:		PID derivative gain
+ * @boost_iowait:	Whether or not to use iowait boosting.
  *
  * Stores per CPU model static PID configuration data.
  */
@@ -240,6 +245,7 @@
 	int p_gain_pct;
 	int d_gain_pct;
 	int i_gain_pct;
+	bool boost_iowait;
 };
 
 /**
@@ -1029,7 +1035,7 @@
 	},
 };
 
-static struct cpu_defaults silvermont_params = {
+static const struct cpu_defaults silvermont_params = {
 	.pid_policy = {
 		.sample_rate_ms = 10,
 		.deadband = 0,
@@ -1037,6 +1043,7 @@
 		.p_gain_pct = 14,
 		.d_gain_pct = 0,
 		.i_gain_pct = 4,
+		.boost_iowait = true,
 	},
 	.funcs = {
 		.get_max = atom_get_max_pstate,
@@ -1050,7 +1057,7 @@
 	},
 };
 
-static struct cpu_defaults airmont_params = {
+static const struct cpu_defaults airmont_params = {
 	.pid_policy = {
 		.sample_rate_ms = 10,
 		.deadband = 0,
@@ -1058,6 +1065,7 @@
 		.p_gain_pct = 14,
 		.d_gain_pct = 0,
 		.i_gain_pct = 4,
+		.boost_iowait = true,
 	},
 	.funcs = {
 		.get_max = atom_get_max_pstate,
@@ -1071,7 +1079,7 @@
 	},
 };
 
-static struct cpu_defaults knl_params = {
+static const struct cpu_defaults knl_params = {
 	.pid_policy = {
 		.sample_rate_ms = 10,
 		.deadband = 0,
@@ -1091,7 +1099,7 @@
 	},
 };
 
-static struct cpu_defaults bxt_params = {
+static const struct cpu_defaults bxt_params = {
 	.pid_policy = {
 		.sample_rate_ms = 10,
 		.deadband = 0,
@@ -1099,6 +1107,7 @@
 		.p_gain_pct = 14,
 		.d_gain_pct = 0,
 		.i_gain_pct = 4,
+		.boost_iowait = true,
 	},
 	.funcs = {
 		.get_max = core_get_max_pstate,
@@ -1222,36 +1231,18 @@
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 {
 	struct sample *sample = &cpu->sample;
-	u64 cummulative_iowait, delta_iowait_us;
-	u64 delta_iowait_mperf;
-	u64 mperf, now;
-	int32_t cpu_load;
+	int32_t busy_frac, boost;
 
-	cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
+	busy_frac = div_fp(sample->mperf, sample->tsc);
 
-	/*
-	 * Convert iowait time into number of IO cycles spent at max_freq.
-	 * IO is considered as busy only for the cpu_load algorithm. For
-	 * performance this is not needed since we always try to reach the
-	 * maximum P-State, so we are already boosting the IOs.
-	 */
-	delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
-	delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
-		cpu->pstate.max_pstate, MSEC_PER_SEC);
+	boost = cpu->iowait_boost;
+	cpu->iowait_boost >>= 1;
 
-	mperf = cpu->sample.mperf + delta_iowait_mperf;
-	cpu->prev_cummulative_iowait = cummulative_iowait;
+	if (busy_frac < boost)
+		busy_frac = boost;
 
-	/*
-	 * The load can be estimated as the ratio of the mperf counter
-	 * running at a constant frequency during active periods
-	 * (C0) and the time stamp counter running at the same frequency
-	 * also during C-states.
-	 */
-	cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
-	cpu->sample.busy_scaled = cpu_load;
-
-	return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
+	sample->busy_scaled = busy_frac * 100;
+	return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled);
 }
 
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
@@ -1325,15 +1316,29 @@
 		sample->mperf,
 		sample->aperf,
 		sample->tsc,
-		get_avg_frequency(cpu));
+		get_avg_frequency(cpu),
+		fp_toint(cpu->iowait_boost * 100));
 }
 
 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
-				     unsigned long util, unsigned long max)
+				     unsigned int flags)
 {
 	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
-	u64 delta_ns = time - cpu->sample.time;
+	u64 delta_ns;
 
+	if (pid_params.boost_iowait) {
+		if (flags & SCHED_CPUFREQ_IOWAIT) {
+			cpu->iowait_boost = int_tofp(1);
+		} else if (cpu->iowait_boost) {
+			/* Clear iowait_boost if the CPU may have been idle. */
+			delta_ns = time - cpu->last_update;
+			if (delta_ns > TICK_NSEC)
+				cpu->iowait_boost = 0;
+		}
+		cpu->last_update = time;
+	}
+
+	delta_ns = time - cpu->sample.time;
 	if ((s64)delta_ns >= pid_params.sample_rate_ns) {
 		bool sample_taken = intel_pstate_sample(cpu, time);
 
diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index be42f10..1b9bcd7 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -123,7 +123,7 @@
 
 	priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk");
 	if (IS_ERR(priv.cpu_clk)) {
-		dev_err(priv.dev, "Unable to get cpuclk");
+		dev_err(priv.dev, "Unable to get cpuclk\n");
 		return PTR_ERR(priv.cpu_clk);
 	}
 
@@ -132,7 +132,7 @@
 
 	priv.ddr_clk = of_clk_get_by_name(np, "ddrclk");
 	if (IS_ERR(priv.ddr_clk)) {
-		dev_err(priv.dev, "Unable to get ddrclk");
+		dev_err(priv.dev, "Unable to get ddrclk\n");
 		err = PTR_ERR(priv.ddr_clk);
 		goto out_cpu;
 	}
@@ -142,7 +142,7 @@
 
 	priv.powersave_clk = of_clk_get_by_name(np, "powersave");
 	if (IS_ERR(priv.powersave_clk)) {
-		dev_err(priv.dev, "Unable to get powersave");
+		dev_err(priv.dev, "Unable to get powersave\n");
 		err = PTR_ERR(priv.powersave_clk);
 		goto out_ddr;
 	}
@@ -155,7 +155,7 @@
 	if (!err)
 		return 0;
 
-	dev_err(priv.dev, "Failed to register cpufreq driver");
+	dev_err(priv.dev, "Failed to register cpufreq driver\n");
 
 	clk_disable_unprepare(priv.powersave_clk);
 out_ddr:
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 87796e0..d3ffde8 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -145,11 +145,30 @@
 /* Use following macros for conversions between pstate_id and index */
 static inline int idx_to_pstate(unsigned int i)
 {
+	if (unlikely(i >= powernv_pstate_info.nr_pstates)) {
+		pr_warn_once("index %u is out of bound\n", i);
+		return powernv_freqs[powernv_pstate_info.nominal].driver_data;
+	}
+
 	return powernv_freqs[i].driver_data;
 }
 
 static inline unsigned int pstate_to_idx(int pstate)
 {
+	int min = powernv_freqs[powernv_pstate_info.min].driver_data;
+	int max = powernv_freqs[powernv_pstate_info.max].driver_data;
+
+	if (min > 0) {
+		if (unlikely((pstate < max) || (pstate > min))) {
+			pr_warn_once("pstate %d is out of bound\n", pstate);
+			return powernv_pstate_info.nominal;
+		}
+	} else {
+		if (unlikely((pstate > max) || (pstate < min))) {
+			pr_warn_once("pstate %d is out of bound\n", pstate);
+			return powernv_pstate_info.nominal;
+		}
+	}
 	/*
 	 * abs() is deliberately used so that is works with
 	 * both monotonically increasing and decreasing
@@ -593,7 +612,7 @@
 	} else {
 		gpstate_idx = calc_global_pstate(gpstates->elapsed_time,
 						 gpstates->highest_lpstate_idx,
-						 freq_data.pstate_id);
+						 gpstates->last_lpstate_idx);
 	}
 
 	/*
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index e8a7bf5..ea7a4e1 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -105,7 +105,6 @@
 static struct platform_driver scpi_cpufreq_platdrv = {
 	.driver = {
 		.name	= "scpi-cpufreq",
-		.owner	= THIS_MODULE,
 	},
 	.probe		= scpi_cpufreq_probe,
 	.remove		= scpi_cpufreq_remove,
diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c
index 0404203..b366e6d 100644
--- a/drivers/cpufreq/sti-cpufreq.c
+++ b/drivers/cpufreq/sti-cpufreq.c
@@ -163,7 +163,7 @@
 
 	reg_fields = sti_cpufreq_match();
 	if (!reg_fields) {
-		dev_err(dev, "This SoC doesn't support voltage scaling");
+		dev_err(dev, "This SoC doesn't support voltage scaling\n");
 		return -ENODEV;
 	}
 
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index d5657d5..71e586d 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -749,65 +749,52 @@
 	put_cpu();
 }
 
-/**
- * cpuidle_coupled_cpu_notify - notifier called during hotplug transitions
- * @nb: notifier block
- * @action: hotplug transition
- * @hcpu: target cpu number
- *
- * Called when a cpu is brought on or offline using hotplug.  Updates the
- * coupled cpu set appropriately
- */
-static int cpuidle_coupled_cpu_notify(struct notifier_block *nb,
-		unsigned long action, void *hcpu)
+static int coupled_cpu_online(unsigned int cpu)
 {
-	int cpu = (unsigned long)hcpu;
 	struct cpuidle_device *dev;
 
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_UP_PREPARE:
-	case CPU_DOWN_PREPARE:
-	case CPU_ONLINE:
-	case CPU_DEAD:
-	case CPU_UP_CANCELED:
-	case CPU_DOWN_FAILED:
-		break;
-	default:
-		return NOTIFY_OK;
-	}
-
 	mutex_lock(&cpuidle_lock);
 
 	dev = per_cpu(cpuidle_devices, cpu);
-	if (!dev || !dev->coupled)
-		goto out;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_UP_PREPARE:
-	case CPU_DOWN_PREPARE:
-		cpuidle_coupled_prevent_idle(dev->coupled);
-		break;
-	case CPU_ONLINE:
-	case CPU_DEAD:
+	if (dev && dev->coupled) {
 		cpuidle_coupled_update_online_cpus(dev->coupled);
-		/* Fall through */
-	case CPU_UP_CANCELED:
-	case CPU_DOWN_FAILED:
 		cpuidle_coupled_allow_idle(dev->coupled);
-		break;
 	}
 
-out:
 	mutex_unlock(&cpuidle_lock);
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block cpuidle_coupled_cpu_notifier = {
-	.notifier_call = cpuidle_coupled_cpu_notify,
-};
+static int coupled_cpu_up_prepare(unsigned int cpu)
+{
+	struct cpuidle_device *dev;
+
+	mutex_lock(&cpuidle_lock);
+
+	dev = per_cpu(cpuidle_devices, cpu);
+	if (dev && dev->coupled)
+		cpuidle_coupled_prevent_idle(dev->coupled);
+
+	mutex_unlock(&cpuidle_lock);
+	return 0;
+}
 
 static int __init cpuidle_coupled_init(void)
 {
-	return register_cpu_notifier(&cpuidle_coupled_cpu_notifier);
+	int ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_COUPLED_PREPARE,
+					"cpuidle/coupled:prepare",
+					coupled_cpu_up_prepare,
+					coupled_cpu_online);
+	if (ret)
+		return ret;
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					"cpuidle/coupled:online",
+					coupled_cpu_online,
+					coupled_cpu_up_prepare);
+	if (ret < 0)
+		cpuhp_remove_state_nocalls(CPUHP_CPUIDLE_COUPLED_PREPARE);
+	return ret;
 }
 core_initcall(cpuidle_coupled_init);
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index 4ba3d3f..f440d38 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -121,6 +121,7 @@
 		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
 		if (!dev) {
 			pr_err("Failed to allocate cpuidle device\n");
+			ret = -ENOMEM;
 			goto out_fail;
 		}
 		dev->cpu = cpu;
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index f7ca891..7fe442c 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -119,39 +119,29 @@
 		.enter = snooze_loop },
 };
 
-static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
-			unsigned long action, void *hcpu)
+static int powernv_cpuidle_cpu_online(unsigned int cpu)
 {
-	int hotcpu = (unsigned long)hcpu;
-	struct cpuidle_device *dev =
-				per_cpu(cpuidle_devices, hotcpu);
+	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
 
 	if (dev && cpuidle_get_driver()) {
-		switch (action) {
-		case CPU_ONLINE:
-		case CPU_ONLINE_FROZEN:
-			cpuidle_pause_and_lock();
-			cpuidle_enable_device(dev);
-			cpuidle_resume_and_unlock();
-			break;
-
-		case CPU_DEAD:
-		case CPU_DEAD_FROZEN:
-			cpuidle_pause_and_lock();
-			cpuidle_disable_device(dev);
-			cpuidle_resume_and_unlock();
-			break;
-
-		default:
-			return NOTIFY_DONE;
-		}
+		cpuidle_pause_and_lock();
+		cpuidle_enable_device(dev);
+		cpuidle_resume_and_unlock();
 	}
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block setup_hotplug_notifier = {
-	.notifier_call = powernv_cpuidle_add_cpu_notifier,
-};
+static int powernv_cpuidle_cpu_dead(unsigned int cpu)
+{
+	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+	if (dev && cpuidle_get_driver()) {
+		cpuidle_pause_and_lock();
+		cpuidle_disable_device(dev);
+		cpuidle_resume_and_unlock();
+	}
+	return 0;
+}
 
 /*
  * powernv_cpuidle_driver_init()
@@ -355,7 +345,14 @@
 		return retval;
 	}
 
-	register_cpu_notifier(&setup_hotplug_notifier);
+	retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					   "cpuidle/powernv:online",
+					   powernv_cpuidle_cpu_online, NULL);
+	WARN_ON(retval < 0);
+	retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
+					   "cpuidle/powernv:dead", NULL,
+					   powernv_cpuidle_cpu_dead);
+	WARN_ON(retval < 0);
 	printk(KERN_DEBUG "powernv_idle_driver registered\n");
 	return 0;
 }
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 07135e0..166ccd7 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -171,39 +171,29 @@
 		.enter = &shared_cede_loop },
 };
 
-static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
-			unsigned long action, void *hcpu)
+static int pseries_cpuidle_cpu_online(unsigned int cpu)
 {
-	int hotcpu = (unsigned long)hcpu;
-	struct cpuidle_device *dev =
-				per_cpu(cpuidle_devices, hotcpu);
+	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
 
 	if (dev && cpuidle_get_driver()) {
-		switch (action) {
-		case CPU_ONLINE:
-		case CPU_ONLINE_FROZEN:
-			cpuidle_pause_and_lock();
-			cpuidle_enable_device(dev);
-			cpuidle_resume_and_unlock();
-			break;
-
-		case CPU_DEAD:
-		case CPU_DEAD_FROZEN:
-			cpuidle_pause_and_lock();
-			cpuidle_disable_device(dev);
-			cpuidle_resume_and_unlock();
-			break;
-
-		default:
-			return NOTIFY_DONE;
-		}
+		cpuidle_pause_and_lock();
+		cpuidle_enable_device(dev);
+		cpuidle_resume_and_unlock();
 	}
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block setup_hotplug_notifier = {
-	.notifier_call = pseries_cpuidle_add_cpu_notifier,
-};
+static int pseries_cpuidle_cpu_dead(unsigned int cpu)
+{
+	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+	if (dev && cpuidle_get_driver()) {
+		cpuidle_pause_and_lock();
+		cpuidle_disable_device(dev);
+		cpuidle_resume_and_unlock();
+	}
+	return 0;
+}
 
 /*
  * pseries_cpuidle_driver_init()
@@ -273,7 +263,14 @@
 		return retval;
 	}
 
-	register_cpu_notifier(&setup_hotplug_notifier);
+	retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					   "cpuidle/pseries:online",
+					   pseries_cpuidle_cpu_online, NULL);
+	WARN_ON(retval < 0);
+	retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
+					   "cpuidle/pseries:DEAD", NULL,
+					   pseries_cpuidle_cpu_dead);
+	WARN_ON(retval < 0);
 	printk(KERN_DEBUG "pseries_idle_driver registered\n");
 	return 0;
 }
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 1af94e2..9b035b7 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -550,4 +550,6 @@
 	  This driver interfaces with the hardware crypto accelerator.
 	  Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode.
 
+source "drivers/crypto/chelsio/Kconfig"
+
 endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 3c6432dd..ad7250f 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -31,3 +31,4 @@
 obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
 obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/
 obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index ea8189f..b304421 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -441,6 +441,9 @@
 			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
 
+	if (!ctx->authsize)
+		return 0;
+
 	/* NULL encryption / decryption */
 	if (!ctx->enckeylen)
 		return aead_null_set_sh_desc(aead);
@@ -553,7 +556,10 @@
 
 	/* Read and write assoclen bytes */
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
-	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
+	if (alg->caam.geniv)
+		append_math_add_imm_u32(desc, VARSEQOUTLEN, REG3, IMM, ivsize);
+	else
+		append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
 	/* Skip assoc data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
@@ -562,6 +568,14 @@
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG |
 			     KEY_VLF);
 
+	if (alg->caam.geniv) {
+		append_seq_load(desc, ivsize, LDST_CLASS_1_CCB |
+				LDST_SRCDST_BYTE_CONTEXT |
+				(ctx1_iv_off << LDST_OFFSET_SHIFT));
+		append_move(desc, MOVE_SRC_CLASS1CTX | MOVE_DEST_CLASS2INFIFO |
+			    (ctx1_iv_off << MOVE_OFFSET_SHIFT) | ivsize);
+	}
+
 	/* Load Counter into CONTEXT1 reg */
 	if (is_rfc3686)
 		append_load_imm_u32(desc, be32_to_cpu(1), LDST_IMM |
@@ -614,7 +628,7 @@
 		keys_fit_inline = true;
 
 	/* aead_givencrypt shared descriptor */
-	desc = ctx->sh_desc_givenc;
+	desc = ctx->sh_desc_enc;
 
 	/* Note: Context registers are saved. */
 	init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686);
@@ -645,13 +659,13 @@
 	append_operation(desc, ctx->class2_alg_type |
 			 OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT);
 
-	/* ivsize + cryptlen = seqoutlen - authsize */
-	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
-
 	/* Read and write assoclen bytes */
 	append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
+	/* ivsize + cryptlen = seqoutlen - authsize */
+	append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize);
+
 	/* Skip assoc data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
 
@@ -697,7 +711,7 @@
 	ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc,
 					      desc_bytes(desc),
 					      DMA_TO_DEVICE);
-	if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) {
+	if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) {
 		dev_err(jrdev, "unable to map shared descriptor\n");
 		return -ENOMEM;
 	}
@@ -2147,7 +2161,7 @@
 
 	init_aead_job(req, edesc, all_contig, encrypt);
 
-	if (ivsize && (is_rfc3686 || !(alg->caam.geniv && encrypt)))
+	if (ivsize && ((is_rfc3686 && encrypt) || !alg->caam.geniv))
 		append_load_as_imm(desc, req->iv, ivsize,
 				   LDST_CLASS_1_CCB |
 				   LDST_SRCDST_BYTE_CONTEXT |
@@ -2534,20 +2548,6 @@
 	return ret;
 }
 
-static int aead_givdecrypt(struct aead_request *req)
-{
-	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	unsigned int ivsize = crypto_aead_ivsize(aead);
-
-	if (req->cryptlen < ivsize)
-		return -EINVAL;
-
-	req->cryptlen -= ivsize;
-	req->assoclen += ivsize;
-
-	return aead_decrypt(req);
-}
-
 /*
  * allocate and map the ablkcipher extended descriptor for ablkcipher
  */
@@ -3207,7 +3207,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
@@ -3253,7 +3253,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
@@ -3299,7 +3299,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
@@ -3345,7 +3345,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
@@ -3391,7 +3391,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
@@ -3437,7 +3437,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = AES_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
@@ -3483,7 +3483,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
@@ -3531,7 +3531,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
@@ -3579,7 +3579,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
@@ -3627,7 +3627,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
@@ -3675,7 +3675,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
@@ -3723,7 +3723,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES3_EDE_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
@@ -3769,7 +3769,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
@@ -3815,7 +3815,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
@@ -3861,7 +3861,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
@@ -3907,7 +3907,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
@@ -3953,7 +3953,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
@@ -3999,7 +3999,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = DES_BLOCK_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
@@ -4048,7 +4048,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = MD5_DIGEST_SIZE,
 		},
@@ -4099,7 +4099,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA1_DIGEST_SIZE,
 		},
@@ -4150,7 +4150,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA224_DIGEST_SIZE,
 		},
@@ -4201,7 +4201,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA256_DIGEST_SIZE,
 		},
@@ -4252,7 +4252,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA384_DIGEST_SIZE,
 		},
@@ -4303,7 +4303,7 @@
 			.setkey = aead_setkey,
 			.setauthsize = aead_setauthsize,
 			.encrypt = aead_encrypt,
-			.decrypt = aead_givdecrypt,
+			.decrypt = aead_decrypt,
 			.ivsize = CTR_RFC3686_IV_SIZE,
 			.maxauthsize = SHA512_DIGEST_SIZE,
 		},
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index f1ecc8d..36365b3 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1898,6 +1898,7 @@
 			 template->name);
 		snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
 			 template->driver_name);
+		t_alg->ahash_alg.setkey = NULL;
 	}
 	alg->cra_module = THIS_MODULE;
 	alg->cra_init = caam_hash_cra_init;
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
new file mode 100644
index 0000000..4ce67fb
--- /dev/null
+++ b/drivers/crypto/chelsio/Kconfig
@@ -0,0 +1,19 @@
+config CRYPTO_DEV_CHELSIO
+	tristate "Chelsio Crypto Co-processor Driver"
+	depends on CHELSIO_T4
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	select CRYPTO_SHA512
+	---help---
+	  The Chelsio Crypto Co-processor driver for T6 adapters.
+
+	  For general information about Chelsio and our products, visit
+	  our website at <http://www.chelsio.com>.
+
+	  For customer support, please visit our customer support page at
+	  <http://www.chelsio.com/support.html>.
+
+	  Please send feedback to <linux-bugs@chelsio.com>.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called chcr.
diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile
new file mode 100644
index 0000000..bebdf06
--- /dev/null
+++ b/drivers/crypto/chelsio/Makefile
@@ -0,0 +1,4 @@
+ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+
+obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o
+chcr-objs :=  chcr_core.o chcr_algo.o
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
new file mode 100644
index 0000000..e4ddb92
--- /dev/null
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -0,0 +1,1525 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Written and Maintained by:
+ *	Manoj Malviya (manojmalviya@chelsio.com)
+ *	Atul Gupta (atul.gupta@chelsio.com)
+ *	Jitendra Lulla (jlulla@chelsio.com)
+ *	Yeshaswi M R Gowda (yeshaswi@chelsio.com)
+ *	Harsh Jain (harsh@chelsio.com)
+ */
+
+#define pr_fmt(fmt) "chcr:" fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/cryptohash.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/internal/hash.h>
+
+#include "t4fw_api.h"
+#include "t4_msg.h"
+#include "chcr_core.h"
+#include "chcr_algo.h"
+#include "chcr_crypto.h"
+
+static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx)
+{
+	return ctx->crypto_ctx->ablkctx;
+}
+
+static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx)
+{
+	return ctx->crypto_ctx->hmacctx;
+}
+
+static inline struct uld_ctx *ULD_CTX(struct chcr_context *ctx)
+{
+	return ctx->dev->u_ctx;
+}
+
+static inline int is_ofld_imm(const struct sk_buff *skb)
+{
+	return (skb->len <= CRYPTO_MAX_IMM_TX_PKT_LEN);
+}
+
+/*
+ *	sgl_len - calculates the size of an SGL of the given capacity
+ *	@n: the number of SGL entries
+ *	Calculates the number of flits needed for a scatter/gather list that
+ *	can hold the given number of entries.
+ */
+static inline unsigned int sgl_len(unsigned int n)
+{
+	n--;
+	return (3 * n) / 2 + (n & 1) + 2;
+}
+
+/*
+ *	chcr_handle_resp - Unmap the DMA buffers associated with the request
+ *	@req: crypto request
+ */
+int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
+		     int error_status)
+{
+	struct crypto_tfm *tfm = req->tfm;
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct chcr_req_ctx ctx_req;
+	struct cpl_fw6_pld *fw6_pld;
+	unsigned int digestsize, updated_digestsize;
+
+	switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_BLKCIPHER:
+		ctx_req.req.ablk_req = (struct ablkcipher_request *)req;
+		ctx_req.ctx.ablk_ctx =
+			ablkcipher_request_ctx(ctx_req.req.ablk_req);
+		if (!error_status) {
+			fw6_pld = (struct cpl_fw6_pld *)input;
+			memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2],
+			       AES_BLOCK_SIZE);
+		}
+		dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst,
+			     ABLK_CTX(ctx)->dst_nents, DMA_FROM_DEVICE);
+		if (ctx_req.ctx.ablk_ctx->skb) {
+			kfree_skb(ctx_req.ctx.ablk_ctx->skb);
+			ctx_req.ctx.ablk_ctx->skb = NULL;
+		}
+		break;
+
+	case CRYPTO_ALG_TYPE_AHASH:
+		ctx_req.req.ahash_req = (struct ahash_request *)req;
+		ctx_req.ctx.ahash_ctx =
+			ahash_request_ctx(ctx_req.req.ahash_req);
+		digestsize =
+			crypto_ahash_digestsize(crypto_ahash_reqtfm(
+							ctx_req.req.ahash_req));
+		updated_digestsize = digestsize;
+		if (digestsize == SHA224_DIGEST_SIZE)
+			updated_digestsize = SHA256_DIGEST_SIZE;
+		else if (digestsize == SHA384_DIGEST_SIZE)
+			updated_digestsize = SHA512_DIGEST_SIZE;
+		if (ctx_req.ctx.ahash_ctx->skb)
+			ctx_req.ctx.ahash_ctx->skb = NULL;
+		if (ctx_req.ctx.ahash_ctx->result == 1) {
+			ctx_req.ctx.ahash_ctx->result = 0;
+			memcpy(ctx_req.req.ahash_req->result, input +
+			       sizeof(struct cpl_fw6_pld),
+			       digestsize);
+		} else {
+			memcpy(ctx_req.ctx.ahash_ctx->partial_hash, input +
+			       sizeof(struct cpl_fw6_pld),
+			       updated_digestsize);
+		}
+		kfree(ctx_req.ctx.ahash_ctx->dummy_payload_ptr);
+		ctx_req.ctx.ahash_ctx->dummy_payload_ptr = NULL;
+		break;
+	}
+	return 0;
+}
+
+/*
+ *	calc_tx_flits_ofld - calculate # of flits for an offload packet
+ *	@skb: the packet
+ *	Returns the number of flits needed for the given offload packet.
+ *	These packets are already fully constructed and no additional headers
+ *	will be added.
+ */
+static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
+{
+	unsigned int flits, cnt;
+
+	if (is_ofld_imm(skb))
+		return DIV_ROUND_UP(skb->len, 8);
+
+	flits = skb_transport_offset(skb) / 8;   /* headers */
+	cnt = skb_shinfo(skb)->nr_frags;
+	if (skb_tail_pointer(skb) != skb_transport_header(skb))
+		cnt++;
+	return flits + sgl_len(cnt);
+}
+
+static struct shash_desc *chcr_alloc_shash(unsigned int ds)
+{
+	struct crypto_shash *base_hash = NULL;
+	struct shash_desc *desc;
+
+	switch (ds) {
+	case SHA1_DIGEST_SIZE:
+		base_hash = crypto_alloc_shash("sha1-generic", 0, 0);
+		break;
+	case SHA224_DIGEST_SIZE:
+		base_hash = crypto_alloc_shash("sha224-generic", 0, 0);
+		break;
+	case SHA256_DIGEST_SIZE:
+		base_hash = crypto_alloc_shash("sha256-generic", 0, 0);
+		break;
+	case SHA384_DIGEST_SIZE:
+		base_hash = crypto_alloc_shash("sha384-generic", 0, 0);
+		break;
+	case SHA512_DIGEST_SIZE:
+		base_hash = crypto_alloc_shash("sha512-generic", 0, 0);
+		break;
+	}
+	if (IS_ERR(base_hash)) {
+		pr_err("Can not allocate sha-generic algo.\n");
+		return (void *)base_hash;
+	}
+
+	desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(base_hash),
+		       GFP_KERNEL);
+	if (!desc)
+		return ERR_PTR(-ENOMEM);
+	desc->tfm = base_hash;
+	desc->flags = crypto_shash_get_flags(base_hash);
+	return desc;
+}
+
+static int chcr_compute_partial_hash(struct shash_desc *desc,
+				     char *iopad, char *result_hash,
+				     int digest_size)
+{
+	struct sha1_state sha1_st;
+	struct sha256_state sha256_st;
+	struct sha512_state sha512_st;
+	int error;
+
+	if (digest_size == SHA1_DIGEST_SIZE) {
+		error = crypto_shash_init(desc) ?:
+			crypto_shash_update(desc, iopad, SHA1_BLOCK_SIZE) ?:
+			crypto_shash_export(desc, (void *)&sha1_st);
+		memcpy(result_hash, sha1_st.state, SHA1_DIGEST_SIZE);
+	} else if (digest_size == SHA224_DIGEST_SIZE) {
+		error = crypto_shash_init(desc) ?:
+			crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?:
+			crypto_shash_export(desc, (void *)&sha256_st);
+		memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE);
+
+	} else if (digest_size == SHA256_DIGEST_SIZE) {
+		error = crypto_shash_init(desc) ?:
+			crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?:
+			crypto_shash_export(desc, (void *)&sha256_st);
+		memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE);
+
+	} else if (digest_size == SHA384_DIGEST_SIZE) {
+		error = crypto_shash_init(desc) ?:
+			crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?:
+			crypto_shash_export(desc, (void *)&sha512_st);
+		memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE);
+
+	} else if (digest_size == SHA512_DIGEST_SIZE) {
+		error = crypto_shash_init(desc) ?:
+			crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?:
+			crypto_shash_export(desc, (void *)&sha512_st);
+		memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE);
+	} else {
+		error = -EINVAL;
+		pr_err("Unknown digest size %d\n", digest_size);
+	}
+	return error;
+}
+
+static void chcr_change_order(char *buf, int ds)
+{
+	int i;
+
+	if (ds == SHA512_DIGEST_SIZE) {
+		for (i = 0; i < (ds / sizeof(u64)); i++)
+			*((__be64 *)buf + i) =
+				cpu_to_be64(*((u64 *)buf + i));
+	} else {
+		for (i = 0; i < (ds / sizeof(u32)); i++)
+			*((__be32 *)buf + i) =
+				cpu_to_be32(*((u32 *)buf + i));
+	}
+}
+
+static inline int is_hmac(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct chcr_alg_template *chcr_crypto_alg =
+		container_of(__crypto_ahash_alg(alg), struct chcr_alg_template,
+			     alg.hash);
+	if ((chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK) ==
+	    CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
+		return 1;
+	return 0;
+}
+
+static inline unsigned int ch_nents(struct scatterlist *sg,
+				    unsigned int *total_size)
+{
+	unsigned int nents;
+
+	for (nents = 0, *total_size = 0; sg; sg = sg_next(sg)) {
+		nents++;
+		*total_size += sg->length;
+	}
+	return nents;
+}
+
+static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl,
+			   struct scatterlist *sg,
+			   struct phys_sge_parm *sg_param)
+{
+	struct phys_sge_pairs *to;
+	unsigned int out_buf_size = sg_param->obsize;
+	unsigned int nents = sg_param->nents, i, j, tot_len = 0;
+
+	phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL)
+				    | CPL_RX_PHYS_DSGL_ISRDMA_V(0));
+	phys_cpl->pcirlxorder_to_noofsgentr =
+		htonl(CPL_RX_PHYS_DSGL_PCIRLXORDER_V(0) |
+		      CPL_RX_PHYS_DSGL_PCINOSNOOP_V(0) |
+		      CPL_RX_PHYS_DSGL_PCITPHNTENB_V(0) |
+		      CPL_RX_PHYS_DSGL_PCITPHNT_V(0) |
+		      CPL_RX_PHYS_DSGL_DCAID_V(0) |
+		      CPL_RX_PHYS_DSGL_NOOFSGENTR_V(nents));
+	phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR;
+	phys_cpl->rss_hdr_int.qid = htons(sg_param->qid);
+	phys_cpl->rss_hdr_int.hash_val = 0;
+	to = (struct phys_sge_pairs *)((unsigned char *)phys_cpl +
+				       sizeof(struct cpl_rx_phys_dsgl));
+
+	for (i = 0; nents; to++) {
+		for (j = i; (nents && (j < (8 + i))); j++, nents--) {
+			to->len[j] = htons(sg->length);
+			to->addr[j] = cpu_to_be64(sg_dma_address(sg));
+			if (out_buf_size) {
+				if (tot_len + sg_dma_len(sg) >= out_buf_size) {
+					to->len[j] = htons(out_buf_size -
+							   tot_len);
+					return;
+				}
+				tot_len += sg_dma_len(sg);
+			}
+			sg = sg_next(sg);
+		}
+	}
+}
+
+static inline unsigned
+int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl,
+			 struct scatterlist *sg, struct phys_sge_parm *sg_param)
+{
+	if (!sg || !sg_param->nents)
+		return 0;
+
+	sg_param->nents = dma_map_sg(dev, sg, sg_param->nents, DMA_FROM_DEVICE);
+	if (sg_param->nents == 0) {
+		pr_err("CHCR : DMA mapping failed\n");
+		return -EINVAL;
+	}
+	write_phys_cpl(phys_cpl, sg, sg_param);
+	return 0;
+}
+
+static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct chcr_alg_template *chcr_crypto_alg =
+		container_of(alg, struct chcr_alg_template, alg.crypto);
+
+	return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK;
+}
+
+static inline void
+write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags,
+			struct scatterlist *sg, unsigned int count)
+{
+	struct page *spage;
+	unsigned int page_len;
+
+	skb->len += count;
+	skb->data_len += count;
+	skb->truesize += count;
+	while (count > 0) {
+		if (sg && (!(sg->length)))
+			break;
+		spage = sg_page(sg);
+		get_page(spage);
+		page_len = min(sg->length, count);
+		skb_fill_page_desc(skb, *frags, spage, sg->offset, page_len);
+		(*frags)++;
+		count -= page_len;
+		sg = sg_next(sg);
+	}
+}
+
+static int generate_copy_rrkey(struct ablk_ctx *ablkctx,
+			       struct _key_ctx *key_ctx)
+{
+	if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
+		get_aes_decrypt_key(key_ctx->key, ablkctx->key,
+				    ablkctx->enckey_len << 3);
+		memset(key_ctx->key + ablkctx->enckey_len, 0,
+		       CHCR_AES_MAX_KEY_LEN - ablkctx->enckey_len);
+	} else {
+		memcpy(key_ctx->key,
+		       ablkctx->key + (ablkctx->enckey_len >> 1),
+		       ablkctx->enckey_len >> 1);
+		get_aes_decrypt_key(key_ctx->key + (ablkctx->enckey_len >> 1),
+				    ablkctx->key, ablkctx->enckey_len << 2);
+	}
+	return 0;
+}
+
+static inline void create_wreq(struct chcr_context *ctx,
+			       struct fw_crypto_lookaside_wr *wreq,
+			       void *req, struct sk_buff *skb,
+			       int kctx_len, int hash_sz,
+			       unsigned int phys_dsgl)
+{
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct ulp_txpkt *ulptx = (struct ulp_txpkt *)(wreq + 1);
+	struct ulptx_idata *sc_imm = (struct ulptx_idata *)(ulptx + 1);
+	int iv_loc = IV_DSGL;
+	int qid = u_ctx->lldi.rxq_ids[ctx->tx_channel_id];
+	unsigned int immdatalen = 0, nr_frags = 0;
+
+	if (is_ofld_imm(skb)) {
+		immdatalen = skb->data_len;
+		iv_loc = IV_IMMEDIATE;
+	} else {
+		nr_frags = skb_shinfo(skb)->nr_frags;
+	}
+
+	wreq->op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen,
+						     (kctx_len >> 4));
+	wreq->pld_size_hash_size =
+		htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(sgl_lengths[nr_frags]) |
+		      FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(hash_sz));
+	wreq->len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP(
+				    (calc_tx_flits_ofld(skb) * 8), 16)));
+	wreq->cookie = cpu_to_be64((uintptr_t)req);
+	wreq->rx_chid_to_rx_q_id =
+		FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid,
+				(hash_sz) ? IV_NOP : iv_loc);
+
+	ulptx->cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id);
+	ulptx->len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8),
+					 16) - ((sizeof(*wreq)) >> 4)));
+
+	sc_imm->cmd_more = FILL_CMD_MORE(immdatalen);
+	sc_imm->len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + kctx_len +
+				  ((hash_sz) ? DUMMY_BYTES :
+				  (sizeof(struct cpl_rx_phys_dsgl) +
+				   phys_dsgl)) + immdatalen);
+}
+
+/**
+ *	create_cipher_wr - form the WR for cipher operations
+ *	@req: cipher req.
+ *	@ctx: crypto driver context of the request.
+ *	@qid: ingress qid where response of this WR should be received.
+ *	@op_type:	encryption or decryption
+ */
+static struct sk_buff
+*create_cipher_wr(struct crypto_async_request *req_base,
+		  struct chcr_context *ctx, unsigned short qid,
+		  unsigned short op_type)
+{
+	struct ablkcipher_request *req = (struct ablkcipher_request *)req_base;
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	struct sk_buff *skb = NULL;
+	struct _key_ctx *key_ctx;
+	struct fw_crypto_lookaside_wr *wreq;
+	struct cpl_tx_sec_pdu *sec_cpl;
+	struct cpl_rx_phys_dsgl *phys_cpl;
+	struct chcr_blkcipher_req_ctx *req_ctx = ablkcipher_request_ctx(req);
+	struct phys_sge_parm sg_param;
+	unsigned int frags = 0, transhdr_len, phys_dsgl, dst_bufsize = 0;
+	unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len;
+
+	if (!req->info)
+		return ERR_PTR(-EINVAL);
+	ablkctx->dst_nents = ch_nents(req->dst, &dst_bufsize);
+	ablkctx->enc = op_type;
+
+	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
+	    (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE))
+		return ERR_PTR(-EINVAL);
+
+	phys_dsgl = get_space_for_phys_dsgl(ablkctx->dst_nents);
+
+	kctx_len = sizeof(*key_ctx) +
+		(DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16);
+	transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl);
+	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),
+			GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+	wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len);
+
+	sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET);
+	sec_cpl->op_ivinsrtofst =
+		FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1, 1);
+
+	sec_cpl->pldlen = htonl(ivsize + req->nbytes);
+	sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(0, 0,
+								ivsize + 1, 0);
+
+	sec_cpl->cipherstop_lo_authinsert =  FILL_SEC_CPL_AUTHINSERT(0, 0,
+								     0, 0);
+	sec_cpl->seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0,
+							 ablkctx->ciph_mode,
+							 0, 0, ivsize >> 1, 1);
+	sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0,
+							  0, 1, phys_dsgl);
+
+	key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl));
+	key_ctx->ctx_hdr = ablkctx->key_ctx_hdr;
+	if (op_type == CHCR_DECRYPT_OP) {
+		if (generate_copy_rrkey(ablkctx, key_ctx))
+			goto map_fail1;
+	} else {
+		if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) {
+			memcpy(key_ctx->key, ablkctx->key, ablkctx->enckey_len);
+		} else {
+			memcpy(key_ctx->key, ablkctx->key +
+			       (ablkctx->enckey_len >> 1),
+			       ablkctx->enckey_len >> 1);
+			memcpy(key_ctx->key +
+			       (ablkctx->enckey_len >> 1),
+			       ablkctx->key,
+			       ablkctx->enckey_len >> 1);
+		}
+	}
+	phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)key_ctx + kctx_len);
+
+	memcpy(ablkctx->iv, req->info, ivsize);
+	sg_init_table(&ablkctx->iv_sg, 1);
+	sg_set_buf(&ablkctx->iv_sg, ablkctx->iv, ivsize);
+	sg_param.nents = ablkctx->dst_nents;
+	sg_param.obsize = dst_bufsize;
+	sg_param.qid = qid;
+	sg_param.align = 1;
+	if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst,
+				 &sg_param))
+		goto map_fail1;
+
+	skb_set_transport_header(skb, transhdr_len);
+	write_sg_data_page_desc(skb, &frags, &ablkctx->iv_sg, ivsize);
+	write_sg_data_page_desc(skb, &frags, req->src, req->nbytes);
+	create_wreq(ctx, wreq, req, skb, kctx_len, 0, phys_dsgl);
+	req_ctx->skb = skb;
+	skb_get(skb);
+	return skb;
+map_fail1:
+	kfree_skb(skb);
+	return ERR_PTR(-ENOMEM);
+}
+
+static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			       unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	struct ablkcipher_alg *alg = crypto_ablkcipher_alg(tfm);
+	unsigned int ck_size, context_size;
+	u16 alignment = 0;
+
+	if ((keylen < alg->min_keysize) || (keylen > alg->max_keysize))
+		goto badkey_err;
+
+	memcpy(ablkctx->key, key, keylen);
+	ablkctx->enckey_len = keylen;
+	if (keylen == AES_KEYSIZE_128) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128;
+	} else if (keylen == AES_KEYSIZE_192) {
+		alignment = 8;
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192;
+	} else if (keylen == AES_KEYSIZE_256) {
+		ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256;
+	} else {
+		goto badkey_err;
+	}
+
+	context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD +
+			keylen + alignment) >> 4;
+
+	ablkctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY,
+						0, 0, context_size);
+	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CBC;
+	return 0;
+badkey_err:
+	crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	ablkctx->enckey_len = 0;
+	return -EINVAL;
+}
+
+static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx)
+{
+	int ret = 0;
+	struct sge_ofld_txq *q;
+	struct adapter *adap = netdev2adap(dev);
+
+	local_bh_disable();
+	q = &adap->sge.ofldtxq[idx];
+	spin_lock(&q->sendq.lock);
+	if (q->full)
+		ret = -1;
+	spin_unlock(&q->sendq.lock);
+	local_bh_enable();
+	return ret;
+}
+
+static int chcr_aes_encrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_async_request *req_base = &req->base;
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct sk_buff *skb;
+
+	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+					    ctx->tx_channel_id))) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+
+	skb = create_cipher_wr(req_base, ctx,
+			       u_ctx->lldi.rxq_ids[ctx->tx_channel_id],
+			       CHCR_ENCRYPT_OP);
+	if (IS_ERR(skb)) {
+		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
+		return  PTR_ERR(skb);
+	}
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+	chcr_send_wr(skb);
+	return -EINPROGRESS;
+}
+
+static int chcr_aes_decrypt(struct ablkcipher_request *req)
+{
+	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct crypto_async_request *req_base = &req->base;
+	struct uld_ctx *u_ctx = ULD_CTX(ctx);
+	struct sk_buff *skb;
+
+	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+					    ctx->tx_channel_id))) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+
+	skb = create_cipher_wr(req_base, ctx, u_ctx->lldi.rxq_ids[0],
+			       CHCR_DECRYPT_OP);
+	if (IS_ERR(skb)) {
+		pr_err("chcr : %s : Failed to form WR. No memory\n", __func__);
+		return PTR_ERR(skb);
+	}
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+	chcr_send_wr(skb);
+	return -EINPROGRESS;
+}
+
+static int chcr_device_init(struct chcr_context *ctx)
+{
+	struct uld_ctx *u_ctx;
+	unsigned int id;
+	int err = 0, rxq_perchan, rxq_idx;
+
+	id = smp_processor_id();
+	if (!ctx->dev) {
+		err = assign_chcr_device(&ctx->dev);
+		if (err) {
+			pr_err("chcr device assignment fails\n");
+			goto out;
+		}
+		u_ctx = ULD_CTX(ctx);
+		rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan;
+		ctx->dev->tx_channel_id = 0;
+		rxq_idx = ctx->dev->tx_channel_id * rxq_perchan;
+		rxq_idx += id % rxq_perchan;
+		spin_lock(&ctx->dev->lock_chcr_dev);
+		ctx->tx_channel_id = rxq_idx;
+		spin_unlock(&ctx->dev->lock_chcr_dev);
+	}
+out:
+	return err;
+}
+
+static int chcr_cra_init(struct crypto_tfm *tfm)
+{
+	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
+	return chcr_device_init(crypto_tfm_ctx(tfm));
+}
+
+static int get_alg_config(struct algo_param *params,
+			  unsigned int auth_size)
+{
+	switch (auth_size) {
+	case SHA1_DIGEST_SIZE:
+		params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_160;
+		params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA1;
+		params->result_size = SHA1_DIGEST_SIZE;
+		break;
+	case SHA224_DIGEST_SIZE:
+		params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+		params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA224;
+		params->result_size = SHA256_DIGEST_SIZE;
+		break;
+	case SHA256_DIGEST_SIZE:
+		params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256;
+		params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA256;
+		params->result_size = SHA256_DIGEST_SIZE;
+		break;
+	case SHA384_DIGEST_SIZE:
+		params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512;
+		params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA512_384;
+		params->result_size = SHA512_DIGEST_SIZE;
+		break;
+	case SHA512_DIGEST_SIZE:
+		params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512;
+		params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA512_512;
+		params->result_size = SHA512_DIGEST_SIZE;
+		break;
+	default:
+		pr_err("chcr : ERROR, unsupported digest size\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static inline int
+write_buffer_data_page_desc(struct chcr_ahash_req_ctx *req_ctx,
+			    struct sk_buff *skb, unsigned int *frags, char *bfr,
+			    u8 bfr_len)
+{
+	void *page_ptr = NULL;
+
+	skb->len += bfr_len;
+	skb->data_len += bfr_len;
+	skb->truesize += bfr_len;
+	page_ptr = kmalloc(CHCR_HASH_MAX_BLOCK_SIZE_128, GFP_ATOMIC | GFP_DMA);
+	if (!page_ptr)
+		return -ENOMEM;
+	get_page(virt_to_page(page_ptr));
+	req_ctx->dummy_payload_ptr = page_ptr;
+	memcpy(page_ptr, bfr, bfr_len);
+	skb_fill_page_desc(skb, *frags, virt_to_page(page_ptr),
+			   offset_in_page(page_ptr), bfr_len);
+	(*frags)++;
+	return 0;
+}
+
+/**
+ *	create_final_hash_wr - Create hash work request
+ *	@req - Cipher req base
+ */
+static struct sk_buff *create_final_hash_wr(struct ahash_request *req,
+					    struct hash_wr_param *param)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+	struct sk_buff *skb = NULL;
+	struct _key_ctx *key_ctx;
+	struct fw_crypto_lookaside_wr *wreq;
+	struct cpl_tx_sec_pdu *sec_cpl;
+	unsigned int frags = 0, transhdr_len, iopad_alignment = 0;
+	unsigned int digestsize = crypto_ahash_digestsize(tfm);
+	unsigned int kctx_len = sizeof(*key_ctx);
+	u8 hash_size_in_response = 0;
+
+	iopad_alignment = KEYCTX_ALIGN_PAD(digestsize);
+	kctx_len += param->alg_prm.result_size + iopad_alignment;
+	if (param->opad_needed)
+		kctx_len += param->alg_prm.result_size + iopad_alignment;
+
+	if (req_ctx->result)
+		hash_size_in_response = digestsize;
+	else
+		hash_size_in_response = param->alg_prm.result_size;
+	transhdr_len = HASH_TRANSHDR_SIZE(kctx_len);
+	skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)),
+			GFP_ATOMIC);
+	if (!skb)
+		return skb;
+
+	skb_reserve(skb, sizeof(struct sge_opaque_hdr));
+	wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len);
+	memset(wreq, 0, transhdr_len);
+
+	sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET);
+	sec_cpl->op_ivinsrtofst =
+		FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0, 0);
+	sec_cpl->pldlen = htonl(param->bfr_len + param->sg_len);
+
+	sec_cpl->aadstart_cipherstop_hi =
+		FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, 0, 0);
+	sec_cpl->cipherstop_lo_authinsert =
+		FILL_SEC_CPL_AUTHINSERT(0, 1, 0, 0);
+	sec_cpl->seqno_numivs =
+		FILL_SEC_CPL_SCMD0_SEQNO(0, 0, 0, param->alg_prm.auth_mode,
+					 param->opad_needed, 0, 0);
+
+	sec_cpl->ivgen_hdrlen =
+		FILL_SEC_CPL_IVGEN_HDRLEN(param->last, param->more, 0, 1, 0, 0);
+
+	key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl));
+	memcpy(key_ctx->key, req_ctx->partial_hash, param->alg_prm.result_size);
+
+	if (param->opad_needed)
+		memcpy(key_ctx->key + ((param->alg_prm.result_size <= 32) ? 32 :
+				       CHCR_HASH_MAX_DIGEST_SIZE),
+		       hmacctx->opad, param->alg_prm.result_size);
+
+	key_ctx->ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY,
+					    param->alg_prm.mk_size, 0,
+					    param->opad_needed,
+					    (kctx_len >> 4));
+	sec_cpl->scmd1 = cpu_to_be64((u64)param->scmd1);
+
+	skb_set_transport_header(skb, transhdr_len);
+	if (param->bfr_len != 0)
+		write_buffer_data_page_desc(req_ctx, skb, &frags, req_ctx->bfr,
+					    param->bfr_len);
+	if (param->sg_len != 0)
+		write_sg_data_page_desc(skb, &frags, req->src, param->sg_len);
+
+	create_wreq(ctx, wreq, req, skb, kctx_len, hash_size_in_response,
+		    0);
+	req_ctx->skb = skb;
+	skb_get(skb);
+	return skb;
+}
+
+static int chcr_ahash_update(struct ahash_request *req)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+	struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+	struct uld_ctx *u_ctx = NULL;
+	struct sk_buff *skb;
+	u8 remainder = 0, bs;
+	unsigned int nbytes = req->nbytes;
+	struct hash_wr_param params;
+
+	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+
+	u_ctx = ULD_CTX(ctx);
+	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+					    ctx->tx_channel_id))) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+
+	if (nbytes + req_ctx->bfr_len >= bs) {
+		remainder = (nbytes + req_ctx->bfr_len) % bs;
+		nbytes = nbytes + req_ctx->bfr_len - remainder;
+	} else {
+		sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->bfr +
+				   req_ctx->bfr_len, nbytes, 0);
+		req_ctx->bfr_len += nbytes;
+		return 0;
+	}
+
+	params.opad_needed = 0;
+	params.more = 1;
+	params.last = 0;
+	params.sg_len = nbytes - req_ctx->bfr_len;
+	params.bfr_len = req_ctx->bfr_len;
+	params.scmd1 = 0;
+	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+	req_ctx->result = 0;
+	req_ctx->data_len += params.sg_len + params.bfr_len;
+	skb = create_final_hash_wr(req, &params);
+	if (!skb)
+		return -ENOMEM;
+
+	req_ctx->bfr_len = remainder;
+	if (remainder)
+		sg_pcopy_to_buffer(req->src, sg_nents(req->src),
+				   req_ctx->bfr, remainder, req->nbytes -
+				   remainder);
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+	chcr_send_wr(skb);
+
+	return -EINPROGRESS;
+}
+
+static void create_last_hash_block(char *bfr_ptr, unsigned int bs, u64 scmd1)
+{
+	memset(bfr_ptr, 0, bs);
+	*bfr_ptr = 0x80;
+	if (bs == 64)
+		*(__be64 *)(bfr_ptr + 56) = cpu_to_be64(scmd1  << 3);
+	else
+		*(__be64 *)(bfr_ptr + 120) =  cpu_to_be64(scmd1  << 3);
+}
+
+static int chcr_ahash_final(struct ahash_request *req)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+	struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+	struct hash_wr_param params;
+	struct sk_buff *skb;
+	struct uld_ctx *u_ctx = NULL;
+	u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+
+	u_ctx = ULD_CTX(ctx);
+	if (is_hmac(crypto_ahash_tfm(rtfm)))
+		params.opad_needed = 1;
+	else
+		params.opad_needed = 0;
+	params.sg_len = 0;
+	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+	req_ctx->result = 1;
+	params.bfr_len = req_ctx->bfr_len;
+	req_ctx->data_len += params.bfr_len + params.sg_len;
+	if (req_ctx->bfr && (req_ctx->bfr_len == 0)) {
+		create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len);
+		params.last = 0;
+		params.more = 1;
+		params.scmd1 = 0;
+		params.bfr_len = bs;
+
+	} else {
+		params.scmd1 = req_ctx->data_len;
+		params.last = 1;
+		params.more = 0;
+	}
+	skb = create_final_hash_wr(req, &params);
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+	chcr_send_wr(skb);
+	return -EINPROGRESS;
+}
+
+static int chcr_ahash_finup(struct ahash_request *req)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+	struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+	struct uld_ctx *u_ctx = NULL;
+	struct sk_buff *skb;
+	struct hash_wr_param params;
+	u8  bs;
+
+	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+	u_ctx = ULD_CTX(ctx);
+
+	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+					    ctx->tx_channel_id))) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+
+	if (is_hmac(crypto_ahash_tfm(rtfm)))
+		params.opad_needed = 1;
+	else
+		params.opad_needed = 0;
+
+	params.sg_len = req->nbytes;
+	params.bfr_len = req_ctx->bfr_len;
+	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+	req_ctx->data_len += params.bfr_len + params.sg_len;
+	req_ctx->result = 1;
+	if (req_ctx->bfr && (req_ctx->bfr_len + req->nbytes) == 0) {
+		create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len);
+		params.last = 0;
+		params.more = 1;
+		params.scmd1 = 0;
+		params.bfr_len = bs;
+	} else {
+		params.scmd1 = req_ctx->data_len;
+		params.last = 1;
+		params.more = 0;
+	}
+
+	skb = create_final_hash_wr(req, &params);
+	if (!skb)
+		return -ENOMEM;
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+	chcr_send_wr(skb);
+
+	return -EINPROGRESS;
+}
+
+static int chcr_ahash_digest(struct ahash_request *req)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req);
+	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req);
+	struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+	struct uld_ctx *u_ctx = NULL;
+	struct sk_buff *skb;
+	struct hash_wr_param params;
+	u8  bs;
+
+	rtfm->init(req);
+	bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+
+	u_ctx = ULD_CTX(ctx);
+	if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0],
+					    ctx->tx_channel_id))) {
+		if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG))
+			return -EBUSY;
+	}
+
+	if (is_hmac(crypto_ahash_tfm(rtfm)))
+		params.opad_needed = 1;
+	else
+		params.opad_needed = 0;
+
+	params.last = 0;
+	params.more = 0;
+	params.sg_len = req->nbytes;
+	params.bfr_len = 0;
+	params.scmd1 = 0;
+	get_alg_config(&params.alg_prm, crypto_ahash_digestsize(rtfm));
+	req_ctx->result = 1;
+	req_ctx->data_len += params.bfr_len + params.sg_len;
+
+	if (req_ctx->bfr && req->nbytes == 0) {
+		create_last_hash_block(req_ctx->bfr, bs, 0);
+		params.more = 1;
+		params.bfr_len = bs;
+	}
+
+	skb = create_final_hash_wr(req, &params);
+	if (!skb)
+		return -ENOMEM;
+
+	skb->dev = u_ctx->lldi.ports[0];
+	set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id);
+	chcr_send_wr(skb);
+	return -EINPROGRESS;
+}
+
+static int chcr_ahash_export(struct ahash_request *areq, void *out)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct chcr_ahash_req_ctx *state = out;
+
+	state->bfr_len = req_ctx->bfr_len;
+	state->data_len = req_ctx->data_len;
+	memcpy(state->bfr, req_ctx->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128);
+	memcpy(state->partial_hash, req_ctx->partial_hash,
+	       CHCR_HASH_MAX_DIGEST_SIZE);
+	return 0;
+}
+
+static int chcr_ahash_import(struct ahash_request *areq, const void *in)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct chcr_ahash_req_ctx *state = (struct chcr_ahash_req_ctx *)in;
+
+	req_ctx->bfr_len = state->bfr_len;
+	req_ctx->data_len = state->data_len;
+	req_ctx->dummy_payload_ptr = NULL;
+	memcpy(req_ctx->bfr, state->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128);
+	memcpy(req_ctx->partial_hash, state->partial_hash,
+	       CHCR_HASH_MAX_DIGEST_SIZE);
+	return 0;
+}
+
+static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
+			     unsigned int keylen)
+{
+	struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+	unsigned int digestsize = crypto_ahash_digestsize(tfm);
+	unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+	unsigned int i, err = 0, updated_digestsize;
+
+	/*
+	 * use the key to calculate the ipad and opad. ipad will sent with the
+	 * first request's data. opad will be sent with the final hash result
+	 * ipad in hmacctx->ipad and opad in hmacctx->opad location
+	 */
+	if (!hmacctx->desc)
+		return -EINVAL;
+	if (keylen > bs) {
+		err = crypto_shash_digest(hmacctx->desc, key, keylen,
+					  hmacctx->ipad);
+		if (err)
+			goto out;
+		keylen = digestsize;
+	} else {
+		memcpy(hmacctx->ipad, key, keylen);
+	}
+	memset(hmacctx->ipad + keylen, 0, bs - keylen);
+	memcpy(hmacctx->opad, hmacctx->ipad, bs);
+
+	for (i = 0; i < bs / sizeof(int); i++) {
+		*((unsigned int *)(&hmacctx->ipad) + i) ^= IPAD_DATA;
+		*((unsigned int *)(&hmacctx->opad) + i) ^= OPAD_DATA;
+	}
+
+	updated_digestsize = digestsize;
+	if (digestsize == SHA224_DIGEST_SIZE)
+		updated_digestsize = SHA256_DIGEST_SIZE;
+	else if (digestsize == SHA384_DIGEST_SIZE)
+		updated_digestsize = SHA512_DIGEST_SIZE;
+	err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->ipad,
+					hmacctx->ipad, digestsize);
+	if (err)
+		goto out;
+	chcr_change_order(hmacctx->ipad, updated_digestsize);
+
+	err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->opad,
+					hmacctx->opad, digestsize);
+	if (err)
+		goto out;
+	chcr_change_order(hmacctx->opad, updated_digestsize);
+out:
+	return err;
+}
+
+static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			       unsigned int key_len)
+{
+	struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm);
+	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
+	int status = 0;
+	unsigned short context_size = 0;
+
+	if ((key_len == (AES_KEYSIZE_128 << 1)) ||
+	    (key_len == (AES_KEYSIZE_256 << 1))) {
+		memcpy(ablkctx->key, key, key_len);
+		ablkctx->enckey_len = key_len;
+		context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4;
+		ablkctx->key_ctx_hdr =
+			FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ?
+					 CHCR_KEYCTX_CIPHER_KEY_SIZE_128 :
+					 CHCR_KEYCTX_CIPHER_KEY_SIZE_256,
+					 CHCR_KEYCTX_NO_KEY, 1,
+					 0, context_size);
+		ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
+	} else {
+		crypto_tfm_set_flags((struct crypto_tfm *)tfm,
+				     CRYPTO_TFM_RES_BAD_KEY_LEN);
+		ablkctx->enckey_len = 0;
+		status = -EINVAL;
+	}
+	return status;
+}
+
+static int chcr_sha_init(struct ahash_request *areq)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	int digestsize =  crypto_ahash_digestsize(tfm);
+
+	req_ctx->data_len = 0;
+	req_ctx->dummy_payload_ptr = NULL;
+	req_ctx->bfr_len = 0;
+	req_ctx->skb = NULL;
+	req_ctx->result = 0;
+	copy_hash_init_values(req_ctx->partial_hash, digestsize);
+	return 0;
+}
+
+static int chcr_sha_cra_init(struct crypto_tfm *tfm)
+{
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct chcr_ahash_req_ctx));
+	return chcr_device_init(crypto_tfm_ctx(tfm));
+}
+
+static int chcr_hmac_init(struct ahash_request *areq)
+{
+	struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq);
+	struct crypto_ahash *rtfm = crypto_ahash_reqtfm(areq);
+	struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm));
+	struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+	unsigned int digestsize = crypto_ahash_digestsize(rtfm);
+	unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm));
+
+	chcr_sha_init(areq);
+	req_ctx->data_len = bs;
+	if (is_hmac(crypto_ahash_tfm(rtfm))) {
+		if (digestsize == SHA224_DIGEST_SIZE)
+			memcpy(req_ctx->partial_hash, hmacctx->ipad,
+			       SHA256_DIGEST_SIZE);
+		else if (digestsize == SHA384_DIGEST_SIZE)
+			memcpy(req_ctx->partial_hash, hmacctx->ipad,
+			       SHA512_DIGEST_SIZE);
+		else
+			memcpy(req_ctx->partial_hash, hmacctx->ipad,
+			       digestsize);
+	}
+	return 0;
+}
+
+static int chcr_hmac_cra_init(struct crypto_tfm *tfm)
+{
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+	unsigned int digestsize =
+		crypto_ahash_digestsize(__crypto_ahash_cast(tfm));
+
+	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+				 sizeof(struct chcr_ahash_req_ctx));
+	hmacctx->desc = chcr_alloc_shash(digestsize);
+	if (IS_ERR(hmacctx->desc))
+		return PTR_ERR(hmacctx->desc);
+	return chcr_device_init(crypto_tfm_ctx(tfm));
+}
+
+static void chcr_free_shash(struct shash_desc *desc)
+{
+	crypto_free_shash(desc->tfm);
+	kfree(desc);
+}
+
+static void chcr_hmac_cra_exit(struct crypto_tfm *tfm)
+{
+	struct chcr_context *ctx = crypto_tfm_ctx(tfm);
+	struct hmac_ctx *hmacctx = HMAC_CTX(ctx);
+
+	if (hmacctx->desc) {
+		chcr_free_shash(hmacctx->desc);
+		hmacctx->desc = NULL;
+	}
+}
+
+static struct chcr_alg_template driver_algs[] = {
+	/* AES-CBC */
+	{
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.is_registered = 0,
+		.alg.crypto = {
+			.cra_name		= "cbc(aes)",
+			.cra_driver_name	= "cbc(aes-chcr)",
+			.cra_priority		= CHCR_CRA_PRIORITY,
+			.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				CRYPTO_ALG_ASYNC,
+			.cra_blocksize		= AES_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct chcr_context)
+				+ sizeof(struct ablk_ctx),
+			.cra_alignmask		= 0,
+			.cra_type		= &crypto_ablkcipher_type,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= chcr_cra_init,
+			.cra_exit		= NULL,
+			.cra_u.ablkcipher	= {
+				.min_keysize	= AES_MIN_KEY_SIZE,
+				.max_keysize	= AES_MAX_KEY_SIZE,
+				.ivsize		= AES_BLOCK_SIZE,
+				.setkey			= chcr_aes_cbc_setkey,
+				.encrypt		= chcr_aes_encrypt,
+				.decrypt		= chcr_aes_decrypt,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_ABLKCIPHER,
+		.is_registered = 0,
+		.alg.crypto =   {
+			.cra_name		= "xts(aes)",
+			.cra_driver_name	= "xts(aes-chcr)",
+			.cra_priority		= CHCR_CRA_PRIORITY,
+			.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
+				CRYPTO_ALG_ASYNC,
+			.cra_blocksize		= AES_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct chcr_context) +
+				sizeof(struct ablk_ctx),
+			.cra_alignmask		= 0,
+			.cra_type		= &crypto_ablkcipher_type,
+			.cra_module		= THIS_MODULE,
+			.cra_init		= chcr_cra_init,
+			.cra_exit		= NULL,
+			.cra_u = {
+				.ablkcipher = {
+					.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+					.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+					.ivsize		= AES_BLOCK_SIZE,
+					.setkey		= chcr_aes_xts_setkey,
+					.encrypt	= chcr_aes_encrypt,
+					.decrypt	= chcr_aes_decrypt,
+				}
+			}
+		}
+	},
+	/* SHA */
+	{
+		.type = CRYPTO_ALG_TYPE_AHASH,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA1_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha1",
+				.cra_driver_name = "sha1-chcr",
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AHASH,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA256_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha256",
+				.cra_driver_name = "sha256-chcr",
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AHASH,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA224_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha224",
+				.cra_driver_name = "sha224-chcr",
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AHASH,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA384_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha384",
+				.cra_driver_name = "sha384-chcr",
+				.cra_blocksize = SHA384_BLOCK_SIZE,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_AHASH,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA512_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "sha512",
+				.cra_driver_name = "sha512-chcr",
+				.cra_blocksize = SHA512_BLOCK_SIZE,
+			}
+		}
+	},
+	/* HMAC */
+	{
+		.type = CRYPTO_ALG_TYPE_HMAC,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA1_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "hmac(sha1)",
+				.cra_driver_name = "hmac(sha1-chcr)",
+				.cra_blocksize = SHA1_BLOCK_SIZE,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_HMAC,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA224_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "hmac(sha224)",
+				.cra_driver_name = "hmac(sha224-chcr)",
+				.cra_blocksize = SHA224_BLOCK_SIZE,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_HMAC,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA256_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "hmac(sha256)",
+				.cra_driver_name = "hmac(sha256-chcr)",
+				.cra_blocksize = SHA256_BLOCK_SIZE,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_HMAC,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA384_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "hmac(sha384)",
+				.cra_driver_name = "hmac(sha384-chcr)",
+				.cra_blocksize = SHA384_BLOCK_SIZE,
+			}
+		}
+	},
+	{
+		.type = CRYPTO_ALG_TYPE_HMAC,
+		.is_registered = 0,
+		.alg.hash = {
+			.halg.digestsize = SHA512_DIGEST_SIZE,
+			.halg.base = {
+				.cra_name = "hmac(sha512)",
+				.cra_driver_name = "hmac(sha512-chcr)",
+				.cra_blocksize = SHA512_BLOCK_SIZE,
+			}
+		}
+	},
+};
+
+/*
+ *	chcr_unregister_alg - Deregister crypto algorithms with
+ *	kernel framework.
+ */
+static int chcr_unregister_alg(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
+		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			if (driver_algs[i].is_registered)
+				crypto_unregister_alg(
+						&driver_algs[i].alg.crypto);
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			if (driver_algs[i].is_registered)
+				crypto_unregister_ahash(
+						&driver_algs[i].alg.hash);
+			break;
+		}
+		driver_algs[i].is_registered = 0;
+	}
+	return 0;
+}
+
+#define SZ_AHASH_CTX sizeof(struct chcr_context)
+#define SZ_AHASH_H_CTX (sizeof(struct chcr_context) + sizeof(struct hmac_ctx))
+#define SZ_AHASH_REQ_CTX sizeof(struct chcr_ahash_req_ctx)
+#define AHASH_CRA_FLAGS (CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC)
+
+/*
+ *	chcr_register_alg - Register crypto algorithms with kernel framework.
+ */
+static int chcr_register_alg(void)
+{
+	struct crypto_alg ai;
+	struct ahash_alg *a_hash;
+	int err = 0, i;
+	char *name = NULL;
+
+	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
+		if (driver_algs[i].is_registered)
+			continue;
+		switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) {
+		case CRYPTO_ALG_TYPE_ABLKCIPHER:
+			err = crypto_register_alg(&driver_algs[i].alg.crypto);
+			name = driver_algs[i].alg.crypto.cra_driver_name;
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			a_hash = &driver_algs[i].alg.hash;
+			a_hash->update = chcr_ahash_update;
+			a_hash->final = chcr_ahash_final;
+			a_hash->finup = chcr_ahash_finup;
+			a_hash->digest = chcr_ahash_digest;
+			a_hash->export = chcr_ahash_export;
+			a_hash->import = chcr_ahash_import;
+			a_hash->halg.statesize = SZ_AHASH_REQ_CTX;
+			a_hash->halg.base.cra_priority = CHCR_CRA_PRIORITY;
+			a_hash->halg.base.cra_module = THIS_MODULE;
+			a_hash->halg.base.cra_flags = AHASH_CRA_FLAGS;
+			a_hash->halg.base.cra_alignmask = 0;
+			a_hash->halg.base.cra_exit = NULL;
+			a_hash->halg.base.cra_type = &crypto_ahash_type;
+
+			if (driver_algs[i].type == CRYPTO_ALG_TYPE_HMAC) {
+				a_hash->halg.base.cra_init = chcr_hmac_cra_init;
+				a_hash->halg.base.cra_exit = chcr_hmac_cra_exit;
+				a_hash->init = chcr_hmac_init;
+				a_hash->setkey = chcr_ahash_setkey;
+				a_hash->halg.base.cra_ctxsize = SZ_AHASH_H_CTX;
+			} else {
+				a_hash->init = chcr_sha_init;
+				a_hash->halg.base.cra_ctxsize = SZ_AHASH_CTX;
+				a_hash->halg.base.cra_init = chcr_sha_cra_init;
+			}
+			err = crypto_register_ahash(&driver_algs[i].alg.hash);
+			ai = driver_algs[i].alg.hash.halg.base;
+			name = ai.cra_driver_name;
+			break;
+		}
+		if (err) {
+			pr_err("chcr : %s : Algorithm registration failed\n",
+			       name);
+			goto register_err;
+		} else {
+			driver_algs[i].is_registered = 1;
+		}
+	}
+	return 0;
+
+register_err:
+	chcr_unregister_alg();
+	return err;
+}
+
+/*
+ *	start_crypto - Register the crypto algorithms.
+ *	This should called once when the first device comesup. After this
+ *	kernel will start calling driver APIs for crypto operations.
+ */
+int start_crypto(void)
+{
+	return chcr_register_alg();
+}
+
+/*
+ *	stop_crypto - Deregister all the crypto algorithms with kernel.
+ *	This should be called once when the last device goes down. After this
+ *	kernel will not call the driver API for crypto operations.
+ */
+int stop_crypto(void)
+{
+	chcr_unregister_alg();
+	return 0;
+}
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
new file mode 100644
index 0000000..ec64fbc
--- /dev/null
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -0,0 +1,471 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __CHCR_ALGO_H__
+#define __CHCR_ALGO_H__
+
+/* Crypto key context */
+#define KEY_CONTEXT_CTX_LEN_S           24
+#define KEY_CONTEXT_CTX_LEN_M           0xff
+#define KEY_CONTEXT_CTX_LEN_V(x)        ((x) << KEY_CONTEXT_CTX_LEN_S)
+#define KEY_CONTEXT_CTX_LEN_G(x) \
+	(((x) >> KEY_CONTEXT_CTX_LEN_S) & KEY_CONTEXT_CTX_LEN_M)
+
+#define KEY_CONTEXT_DUAL_CK_S      12
+#define KEY_CONTEXT_DUAL_CK_M      0x1
+#define KEY_CONTEXT_DUAL_CK_V(x)   ((x) << KEY_CONTEXT_DUAL_CK_S)
+#define KEY_CONTEXT_DUAL_CK_G(x)   \
+(((x) >> KEY_CONTEXT_DUAL_CK_S) & KEY_CONTEXT_DUAL_CK_M)
+#define KEY_CONTEXT_DUAL_CK_F      KEY_CONTEXT_DUAL_CK_V(1U)
+
+#define KEY_CONTEXT_SALT_PRESENT_S      10
+#define KEY_CONTEXT_SALT_PRESENT_M      0x1
+#define KEY_CONTEXT_SALT_PRESENT_V(x)   ((x) << KEY_CONTEXT_SALT_PRESENT_S)
+#define KEY_CONTEXT_SALT_PRESENT_G(x)   \
+	(((x) >> KEY_CONTEXT_SALT_PRESENT_S) & \
+	 KEY_CONTEXT_SALT_PRESENT_M)
+#define KEY_CONTEXT_SALT_PRESENT_F      KEY_CONTEXT_SALT_PRESENT_V(1U)
+
+#define KEY_CONTEXT_VALID_S     0
+#define KEY_CONTEXT_VALID_M     0x1
+#define KEY_CONTEXT_VALID_V(x)  ((x) << KEY_CONTEXT_VALID_S)
+#define KEY_CONTEXT_VALID_G(x)  \
+	(((x) >> KEY_CONTEXT_VALID_S) & \
+	 KEY_CONTEXT_VALID_M)
+#define KEY_CONTEXT_VALID_F     KEY_CONTEXT_VALID_V(1U)
+
+#define KEY_CONTEXT_CK_SIZE_S           6
+#define KEY_CONTEXT_CK_SIZE_M           0xf
+#define KEY_CONTEXT_CK_SIZE_V(x)        ((x) << KEY_CONTEXT_CK_SIZE_S)
+#define KEY_CONTEXT_CK_SIZE_G(x)        \
+	(((x) >> KEY_CONTEXT_CK_SIZE_S) & KEY_CONTEXT_CK_SIZE_M)
+
+#define KEY_CONTEXT_MK_SIZE_S           2
+#define KEY_CONTEXT_MK_SIZE_M           0xf
+#define KEY_CONTEXT_MK_SIZE_V(x)        ((x) << KEY_CONTEXT_MK_SIZE_S)
+#define KEY_CONTEXT_MK_SIZE_G(x)        \
+	(((x) >> KEY_CONTEXT_MK_SIZE_S) & KEY_CONTEXT_MK_SIZE_M)
+
+#define KEY_CONTEXT_OPAD_PRESENT_S      11
+#define KEY_CONTEXT_OPAD_PRESENT_M      0x1
+#define KEY_CONTEXT_OPAD_PRESENT_V(x)   ((x) << KEY_CONTEXT_OPAD_PRESENT_S)
+#define KEY_CONTEXT_OPAD_PRESENT_G(x)   \
+	(((x) >> KEY_CONTEXT_OPAD_PRESENT_S) & \
+	 KEY_CONTEXT_OPAD_PRESENT_M)
+#define KEY_CONTEXT_OPAD_PRESENT_F      KEY_CONTEXT_OPAD_PRESENT_V(1U)
+
+#define CHCR_HASH_MAX_DIGEST_SIZE 64
+#define CHCR_MAX_SHA_DIGEST_SIZE 64
+
+#define IPSEC_TRUNCATED_ICV_SIZE 12
+#define TLS_TRUNCATED_HMAC_SIZE 10
+#define CBCMAC_DIGEST_SIZE 16
+#define MAX_HASH_NAME 20
+
+#define SHA1_INIT_STATE_5X4B    5
+#define SHA256_INIT_STATE_8X4B  8
+#define SHA512_INIT_STATE_8X8B  8
+#define SHA1_INIT_STATE         SHA1_INIT_STATE_5X4B
+#define SHA224_INIT_STATE       SHA256_INIT_STATE_8X4B
+#define SHA256_INIT_STATE       SHA256_INIT_STATE_8X4B
+#define SHA384_INIT_STATE       SHA512_INIT_STATE_8X8B
+#define SHA512_INIT_STATE       SHA512_INIT_STATE_8X8B
+
+#define DUMMY_BYTES 16
+
+#define IPAD_DATA 0x36363636
+#define OPAD_DATA 0x5c5c5c5c
+
+#define TRANSHDR_SIZE(alignedkctx_len)\
+	(sizeof(struct ulptx_idata) +\
+	 sizeof(struct ulp_txpkt) +\
+	 sizeof(struct fw_crypto_lookaside_wr) +\
+	 sizeof(struct cpl_tx_sec_pdu) +\
+	 (alignedkctx_len))
+#define CIPHER_TRANSHDR_SIZE(alignedkctx_len, sge_pairs) \
+	(TRANSHDR_SIZE(alignedkctx_len) + sge_pairs +\
+	 sizeof(struct cpl_rx_phys_dsgl))
+#define HASH_TRANSHDR_SIZE(alignedkctx_len)\
+	(TRANSHDR_SIZE(alignedkctx_len) + DUMMY_BYTES)
+
+#define SEC_CPL_OFFSET (sizeof(struct fw_crypto_lookaside_wr) + \
+			sizeof(struct ulp_txpkt) + \
+			sizeof(struct ulptx_idata))
+
+#define FILL_SEC_CPL_OP_IVINSR(id, len, hldr, ofst)      \
+	htonl( \
+	       CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | \
+	       CPL_TX_SEC_PDU_RXCHID_V((id)) | \
+	       CPL_TX_SEC_PDU_ACKFOLLOWS_V(0) | \
+	       CPL_TX_SEC_PDU_ULPTXLPBK_V(1) | \
+	       CPL_TX_SEC_PDU_CPLLEN_V((len)) | \
+	       CPL_TX_SEC_PDU_PLACEHOLDER_V((hldr)) | \
+	       CPL_TX_SEC_PDU_IVINSRTOFST_V((ofst)))
+
+#define  FILL_SEC_CPL_CIPHERSTOP_HI(a_start, a_stop, c_start, c_stop_hi) \
+	htonl( \
+	       CPL_TX_SEC_PDU_AADSTART_V((a_start)) | \
+	       CPL_TX_SEC_PDU_AADSTOP_V((a_stop)) | \
+	       CPL_TX_SEC_PDU_CIPHERSTART_V((c_start)) | \
+	       CPL_TX_SEC_PDU_CIPHERSTOP_HI_V((c_stop_hi)))
+
+#define  FILL_SEC_CPL_AUTHINSERT(c_stop_lo, a_start, a_stop, a_inst) \
+	htonl( \
+	       CPL_TX_SEC_PDU_CIPHERSTOP_LO_V((c_stop_lo)) | \
+		CPL_TX_SEC_PDU_AUTHSTART_V((a_start)) | \
+		CPL_TX_SEC_PDU_AUTHSTOP_V((a_stop)) | \
+		CPL_TX_SEC_PDU_AUTHINSERT_V((a_inst)))
+
+#define  FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size, nivs)  \
+		htonl( \
+		SCMD_SEQ_NO_CTRL_V(0) | \
+		SCMD_STATUS_PRESENT_V(0) | \
+		SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_GENERIC) | \
+		SCMD_ENC_DEC_CTRL_V((ctrl)) | \
+		SCMD_CIPH_AUTH_SEQ_CTRL_V((seq)) | \
+		SCMD_CIPH_MODE_V((cmode)) | \
+		SCMD_AUTH_MODE_V((amode)) | \
+		SCMD_HMAC_CTRL_V((opad)) | \
+		SCMD_IV_SIZE_V((size)) | \
+		SCMD_NUM_IVS_V((nivs)))
+
+#define FILL_SEC_CPL_IVGEN_HDRLEN(last, more, ctx_in, mac, ivdrop, len) htonl( \
+		SCMD_ENB_DBGID_V(0) | \
+		SCMD_IV_GEN_CTRL_V(0) | \
+		SCMD_LAST_FRAG_V((last)) | \
+		SCMD_MORE_FRAGS_V((more)) | \
+		SCMD_TLS_COMPPDU_V(0) | \
+		SCMD_KEY_CTX_INLINE_V((ctx_in)) | \
+		SCMD_TLS_FRAG_ENABLE_V(0) | \
+		SCMD_MAC_ONLY_V((mac)) |  \
+		SCMD_AADIVDROP_V((ivdrop)) | \
+		SCMD_HDR_LEN_V((len)))
+
+#define  FILL_KEY_CTX_HDR(ck_size, mk_size, d_ck, opad, ctx_len) \
+		htonl(KEY_CONTEXT_VALID_V(1) | \
+		      KEY_CONTEXT_CK_SIZE_V((ck_size)) | \
+		      KEY_CONTEXT_MK_SIZE_V(mk_size) | \
+		      KEY_CONTEXT_DUAL_CK_V((d_ck)) | \
+		      KEY_CONTEXT_OPAD_PRESENT_V((opad)) | \
+		      KEY_CONTEXT_SALT_PRESENT_V(1) | \
+		      KEY_CONTEXT_CTX_LEN_V((ctx_len)))
+
+#define FILL_WR_OP_CCTX_SIZE(len, ctx_len) \
+		htonl( \
+			FW_CRYPTO_LOOKASIDE_WR_OPCODE_V( \
+			FW_CRYPTO_LOOKASIDE_WR) | \
+			FW_CRYPTO_LOOKASIDE_WR_COMPL_V(0) | \
+			FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V((len)) | \
+			FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \
+			FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len)))
+
+#define FILL_WR_RX_Q_ID(cid, qid, wr_iv) \
+		htonl( \
+			FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \
+			FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \
+			FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \
+			FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv)))
+
+#define FILL_ULPTX_CMD_DEST(cid) \
+	htonl(ULPTX_CMD_V(ULP_TX_PKT) | \
+	      ULP_TXPKT_DEST_V(0) | \
+	      ULP_TXPKT_DATAMODIFY_V(0) | \
+	      ULP_TXPKT_CHANNELID_V((cid)) | \
+	      ULP_TXPKT_RO_V(1) | \
+	      ULP_TXPKT_FID_V(0))
+
+#define KEYCTX_ALIGN_PAD(bs) ({unsigned int _bs = (bs);\
+			      _bs == SHA1_DIGEST_SIZE ? 12 : 0; })
+
+#define FILL_PLD_SIZE_HASH_SIZE(payload_sgl_len, sgl_lengths, total_frags) \
+	htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(payload_sgl_len ? \
+						sgl_lengths[total_frags] : 0) |\
+	      FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(0))
+
+#define FILL_LEN_PKD(calc_tx_flits_ofld, skb) \
+	htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP((\
+					   calc_tx_flits_ofld(skb) * 8), 16)))
+
+#define FILL_CMD_MORE(immdatalen) htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) |\
+					ULP_TX_SC_MORE_V((immdatalen) ? 0 : 1))
+
+#define MAX_NK 8
+#define CRYPTO_MAX_IMM_TX_PKT_LEN 256
+
+struct algo_param {
+	unsigned int auth_mode;
+	unsigned int mk_size;
+	unsigned int result_size;
+};
+
+struct hash_wr_param {
+	unsigned int opad_needed;
+	unsigned int more;
+	unsigned int last;
+	struct algo_param alg_prm;
+	unsigned int sg_len;
+	unsigned int bfr_len;
+	u64 scmd1;
+};
+
+enum {
+	AES_KEYLENGTH_128BIT = 128,
+	AES_KEYLENGTH_192BIT = 192,
+	AES_KEYLENGTH_256BIT = 256
+};
+
+enum {
+	KEYLENGTH_3BYTES = 3,
+	KEYLENGTH_4BYTES = 4,
+	KEYLENGTH_6BYTES = 6,
+	KEYLENGTH_8BYTES = 8
+};
+
+enum {
+	NUMBER_OF_ROUNDS_10 = 10,
+	NUMBER_OF_ROUNDS_12 = 12,
+	NUMBER_OF_ROUNDS_14 = 14,
+};
+
+/*
+ * CCM defines values of 4, 6, 8, 10, 12, 14, and 16 octets,
+ * where they indicate the size of the integrity check value (ICV)
+ */
+enum {
+	AES_CCM_ICV_4   = 4,
+	AES_CCM_ICV_6   = 6,
+	AES_CCM_ICV_8   = 8,
+	AES_CCM_ICV_10  = 10,
+	AES_CCM_ICV_12  = 12,
+	AES_CCM_ICV_14  = 14,
+	AES_CCM_ICV_16 = 16
+};
+
+struct hash_op_params {
+	unsigned char mk_size;
+	unsigned char pad_align;
+	unsigned char auth_mode;
+	char hash_name[MAX_HASH_NAME];
+	unsigned short block_size;
+	unsigned short word_size;
+	unsigned short ipad_size;
+};
+
+struct phys_sge_pairs {
+	__be16 len[8];
+	__be64 addr[8];
+};
+
+struct phys_sge_parm {
+	unsigned int nents;
+	unsigned int obsize;
+	unsigned short qid;
+	unsigned char align;
+};
+
+struct crypto_result {
+	struct completion completion;
+	int err;
+};
+
+static const u32 sha1_init[SHA1_DIGEST_SIZE / 4] = {
+		SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4,
+};
+
+static const u32 sha224_init[SHA256_DIGEST_SIZE / 4] = {
+		SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
+		SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
+};
+
+static const u32 sha256_init[SHA256_DIGEST_SIZE / 4] = {
+		SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+		SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
+};
+
+static const u64 sha384_init[SHA512_DIGEST_SIZE / 8] = {
+		SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3,
+		SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7,
+};
+
+static const u64 sha512_init[SHA512_DIGEST_SIZE / 8] = {
+		SHA512_H0, SHA512_H1, SHA512_H2, SHA512_H3,
+		SHA512_H4, SHA512_H5, SHA512_H6, SHA512_H7,
+};
+
+static inline void copy_hash_init_values(char *key, int digestsize)
+{
+	u8 i;
+	__be32 *dkey = (__be32 *)key;
+	u64 *ldkey = (u64 *)key;
+	__be64 *sha384 = (__be64 *)sha384_init;
+	__be64 *sha512 = (__be64 *)sha512_init;
+
+	switch (digestsize) {
+	case SHA1_DIGEST_SIZE:
+		for (i = 0; i < SHA1_INIT_STATE; i++)
+			dkey[i] = cpu_to_be32(sha1_init[i]);
+		break;
+	case SHA224_DIGEST_SIZE:
+		for (i = 0; i < SHA224_INIT_STATE; i++)
+			dkey[i] = cpu_to_be32(sha224_init[i]);
+		break;
+	case SHA256_DIGEST_SIZE:
+		for (i = 0; i < SHA256_INIT_STATE; i++)
+			dkey[i] = cpu_to_be32(sha256_init[i]);
+		break;
+	case SHA384_DIGEST_SIZE:
+		for (i = 0; i < SHA384_INIT_STATE; i++)
+			ldkey[i] = be64_to_cpu(sha384[i]);
+		break;
+	case SHA512_DIGEST_SIZE:
+		for (i = 0; i < SHA512_INIT_STATE; i++)
+			ldkey[i] = be64_to_cpu(sha512[i]);
+		break;
+	}
+}
+
+static const u8 sgl_lengths[20] = {
+	0, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15
+};
+
+/* Number of len fields(8) * size of one addr field */
+#define PHYSDSGL_MAX_LEN_SIZE 16
+
+static inline u16 get_space_for_phys_dsgl(unsigned int sgl_entr)
+{
+	/* len field size + addr field size */
+	return ((sgl_entr >> 3) + ((sgl_entr % 8) ?
+				   1 : 0)) * PHYSDSGL_MAX_LEN_SIZE +
+		(sgl_entr << 3) + ((sgl_entr % 2 ? 1 : 0) << 3);
+}
+
+/* The AES s-transform matrix (s-box). */
+static const u8 aes_sbox[256] = {
+	99,  124, 119, 123, 242, 107, 111, 197, 48,  1,   103, 43,  254, 215,
+	171, 118, 202, 130, 201, 125, 250, 89,  71,  240, 173, 212, 162, 175,
+	156, 164, 114, 192, 183, 253, 147, 38,  54,  63,  247, 204, 52,  165,
+	229, 241, 113, 216, 49,  21, 4,   199, 35,  195, 24,  150, 5, 154, 7,
+	18,  128, 226, 235, 39,  178, 117, 9,   131, 44,  26,  27,  110, 90,
+	160, 82,  59,  214, 179, 41,  227, 47,  132, 83,  209, 0,   237, 32,
+	252, 177, 91,  106, 203, 190, 57,  74,  76,  88,  207, 208, 239, 170,
+	251, 67,  77,  51,  133, 69,  249, 2,   127, 80,  60,  159, 168, 81,
+	163, 64,  143, 146, 157, 56,  245, 188, 182, 218, 33,  16,  255, 243,
+	210, 205, 12,  19,  236, 95,  151, 68,  23,  196, 167, 126, 61,  100,
+	93,  25,  115, 96,  129, 79,  220, 34,  42,  144, 136, 70,  238, 184,
+	20,  222, 94,  11,  219, 224, 50,  58,  10,  73,  6,   36,  92,  194,
+	211, 172, 98,  145, 149, 228, 121, 231, 200, 55,  109, 141, 213, 78,
+	169, 108, 86,  244, 234, 101, 122, 174, 8, 186, 120, 37,  46,  28, 166,
+	180, 198, 232, 221, 116, 31,  75,  189, 139, 138, 112, 62,  181, 102,
+	72,  3,   246, 14,  97,  53,  87,  185, 134, 193, 29,  158, 225, 248,
+	152, 17,  105, 217, 142, 148, 155, 30,  135, 233, 206, 85,  40,  223,
+	140, 161, 137, 13,  191, 230, 66,  104, 65,  153, 45,  15,  176, 84,
+	187, 22
+};
+
+static u32 aes_ks_subword(const u32 w)
+{
+	u8 bytes[4];
+
+	*(u32 *)(&bytes[0]) = w;
+	bytes[0] = aes_sbox[bytes[0]];
+	bytes[1] = aes_sbox[bytes[1]];
+	bytes[2] = aes_sbox[bytes[2]];
+	bytes[3] = aes_sbox[bytes[3]];
+	return *(u32 *)(&bytes[0]);
+}
+
+static u32 round_constant[11] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000,
+	0x10000000, 0x20000000, 0x40000000, 0x80000000,
+	0x1B000000, 0x36000000, 0x6C000000
+};
+
+/* dec_key - OUTPUT - Reverse round key
+ * key - INPUT - key
+ * keylength - INPUT - length of the key in number of bits
+ */
+static inline void get_aes_decrypt_key(unsigned char *dec_key,
+				       const unsigned char *key,
+				       unsigned int keylength)
+{
+	u32 temp;
+	u32 w_ring[MAX_NK];
+	int i, j, k = 0;
+	u8  nr, nk;
+
+	switch (keylength) {
+	case AES_KEYLENGTH_128BIT:
+		nk = KEYLENGTH_4BYTES;
+		nr = NUMBER_OF_ROUNDS_10;
+		break;
+
+	case AES_KEYLENGTH_192BIT:
+		nk = KEYLENGTH_6BYTES;
+		nr = NUMBER_OF_ROUNDS_12;
+		break;
+	case AES_KEYLENGTH_256BIT:
+		nk = KEYLENGTH_8BYTES;
+		nr = NUMBER_OF_ROUNDS_14;
+		break;
+	default:
+		return;
+	}
+	for (i = 0; i < nk; i++ )
+		w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]);
+
+	i = 0;
+	temp = w_ring[nk - 1];
+	while(i + nk < (nr + 1) * 4) {
+		if(!(i % nk)) {
+			/* RotWord(temp) */
+			temp = (temp << 8) | (temp >> 24);
+			temp = aes_ks_subword(temp);
+			temp ^= round_constant[i / nk];
+		}
+		else if (nk == 8 && (i % 4 == 0))
+			temp = aes_ks_subword(temp);
+		w_ring[i % nk] ^= temp;
+		temp = w_ring[i % nk];
+		i++;
+	}
+	for (k = 0, j = i % nk; k < nk; k++) {
+		*((u32 *)dec_key + k) = htonl(w_ring[j]);
+		j--;
+		if(j < 0)
+			j += nk;
+	}
+}
+
+#endif /* __CHCR_ALGO_H__ */
diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
new file mode 100644
index 0000000..fb5f9bb
--- /dev/null
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -0,0 +1,238 @@
+/**
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (C) 2011-2016 Chelsio Communications.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ *
+ * Written and Maintained by:
+ * Manoj Malviya (manojmalviya@chelsio.com)
+ * Atul Gupta (atul.gupta@chelsio.com)
+ * Jitendra Lulla (jlulla@chelsio.com)
+ * Yeshaswi M R Gowda (yeshaswi@chelsio.com)
+ * Harsh Jain (harsh@chelsio.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <crypto/aes.h>
+#include <crypto/hash.h>
+
+#include "t4_msg.h"
+#include "chcr_core.h"
+#include "cxgb4_uld.h"
+
+static LIST_HEAD(uld_ctx_list);
+static DEFINE_MUTEX(dev_mutex);
+static atomic_t dev_count;
+
+typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input);
+static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input);
+static void *chcr_uld_add(const struct cxgb4_lld_info *lld);
+static int chcr_uld_state_change(void *handle, enum cxgb4_state state);
+
+static chcr_handler_func work_handlers[NUM_CPL_CMDS] = {
+	[CPL_FW6_PLD] = cpl_fw6_pld_handler,
+};
+
+static struct cxgb4_uld_info chcr_uld_info = {
+	.name = DRV_MODULE_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	.rxq_size = 1024,
+	.add = chcr_uld_add,
+	.state_change = chcr_uld_state_change,
+	.rx_handler = chcr_uld_rx_handler,
+};
+
+int assign_chcr_device(struct chcr_dev **dev)
+{
+	struct uld_ctx *u_ctx;
+
+	/*
+	 * Which device to use if multiple devices are available TODO
+	 * May be select the device based on round robin. One session
+	 * must go to the same device to maintain the ordering.
+	 */
+	mutex_lock(&dev_mutex); /* TODO ? */
+	u_ctx = list_first_entry(&uld_ctx_list, struct uld_ctx, entry);
+	if (!u_ctx) {
+		mutex_unlock(&dev_mutex);
+		return -ENXIO;
+	}
+
+	*dev = u_ctx->dev;
+	mutex_unlock(&dev_mutex);
+	return 0;
+}
+
+static int chcr_dev_add(struct uld_ctx *u_ctx)
+{
+	struct chcr_dev *dev;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENXIO;
+
+	spin_lock_init(&dev->lock_chcr_dev);
+	u_ctx->dev = dev;
+	dev->u_ctx = u_ctx;
+	atomic_inc(&dev_count);
+	return 0;
+}
+
+static int chcr_dev_remove(struct uld_ctx *u_ctx)
+{
+	kfree(u_ctx->dev);
+	u_ctx->dev = NULL;
+	atomic_dec(&dev_count);
+	return 0;
+}
+
+static int cpl_fw6_pld_handler(struct chcr_dev *dev,
+			       unsigned char *input)
+{
+	struct crypto_async_request *req;
+	struct cpl_fw6_pld *fw6_pld;
+	u32 ack_err_status = 0;
+	int error_status = 0;
+
+	fw6_pld = (struct cpl_fw6_pld *)input;
+	req = (struct crypto_async_request *)(uintptr_t)be64_to_cpu(
+						    fw6_pld->data[1]);
+
+	ack_err_status =
+		ntohl(*(__be32 *)((unsigned char *)&fw6_pld->data[0] + 4));
+	if (ack_err_status) {
+		if (CHK_MAC_ERR_BIT(ack_err_status) ||
+		    CHK_PAD_ERR_BIT(ack_err_status))
+			error_status = -EINVAL;
+	}
+	/* call completion callback with failure status */
+	if (req) {
+		if (!chcr_handle_resp(req, input, error_status))
+			req->complete(req, error_status);
+		else
+			return -EINVAL;
+	} else {
+		pr_err("Incorrect request address from the firmware\n");
+		return -EFAULT;
+	}
+	return 0;
+}
+
+int chcr_send_wr(struct sk_buff *skb)
+{
+	return cxgb4_ofld_send(skb->dev, skb);
+}
+
+static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
+{
+	struct uld_ctx *u_ctx;
+
+	/* Create the device and add it in the device list */
+	u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL);
+	if (!u_ctx) {
+		u_ctx = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+	u_ctx->lldi = *lld;
+	mutex_lock(&dev_mutex);
+	list_add_tail(&u_ctx->entry, &uld_ctx_list);
+	mutex_unlock(&dev_mutex);
+out:
+	return u_ctx;
+}
+
+int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
+			const struct pkt_gl *pgl)
+{
+	struct uld_ctx *u_ctx = (struct uld_ctx *)handle;
+	struct chcr_dev *dev = u_ctx->dev;
+	const struct cpl_act_establish *rpl = (struct cpl_act_establish
+					       *)rsp;
+
+	if (rpl->ot.opcode != CPL_FW6_PLD) {
+		pr_err("Unsupported opcode\n");
+		return 0;
+	}
+
+	if (!pgl)
+		work_handlers[rpl->ot.opcode](dev, (unsigned char *)&rsp[1]);
+	else
+		work_handlers[rpl->ot.opcode](dev, pgl->va);
+	return 0;
+}
+
+static int chcr_uld_state_change(void *handle, enum cxgb4_state state)
+{
+	struct uld_ctx *u_ctx = handle;
+	int ret = 0;
+
+	switch (state) {
+	case CXGB4_STATE_UP:
+		if (!u_ctx->dev) {
+			ret = chcr_dev_add(u_ctx);
+			if (ret != 0)
+				return ret;
+		}
+		if (atomic_read(&dev_count) == 1)
+			ret = start_crypto();
+		break;
+
+	case CXGB4_STATE_DETACH:
+		if (u_ctx->dev) {
+			mutex_lock(&dev_mutex);
+			chcr_dev_remove(u_ctx);
+			mutex_unlock(&dev_mutex);
+		}
+		if (!atomic_read(&dev_count))
+			stop_crypto();
+		break;
+
+	case CXGB4_STATE_START_RECOVERY:
+	case CXGB4_STATE_DOWN:
+	default:
+		break;
+	}
+	return ret;
+}
+
+static int __init chcr_crypto_init(void)
+{
+	if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) {
+		pr_err("ULD register fail: No chcr crypto support in cxgb4");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void __exit chcr_crypto_exit(void)
+{
+	struct uld_ctx *u_ctx, *tmp;
+
+	if (atomic_read(&dev_count))
+		stop_crypto();
+
+	/* Remove all devices from list */
+	mutex_lock(&dev_mutex);
+	list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) {
+		if (u_ctx->dev)
+			chcr_dev_remove(u_ctx);
+		kfree(u_ctx);
+	}
+	mutex_unlock(&dev_mutex);
+	cxgb4_unregister_uld(CXGB4_ULD_CRYPTO);
+}
+
+module_init(chcr_crypto_init);
+module_exit(chcr_crypto_exit);
+
+MODULE_DESCRIPTION("Crypto Co-processor for Chelsio Terminator cards.");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chelsio Communications");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
new file mode 100644
index 0000000..2a5c671
--- /dev/null
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -0,0 +1,80 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __CHCR_CORE_H__
+#define __CHCR_CORE_H__
+
+#include <crypto/algapi.h>
+#include "t4_hw.h"
+#include "cxgb4.h"
+#include "cxgb4_uld.h"
+
+#define DRV_MODULE_NAME "chcr"
+#define DRV_VERSION "1.0.0.0"
+
+#define MAX_PENDING_REQ_TO_HW 20
+#define CHCR_TEST_RESPONSE_TIMEOUT 1000
+
+#define PAD_ERROR_BIT		1
+#define CHK_PAD_ERR_BIT(x)	(((x) >> PAD_ERROR_BIT) & 1)
+
+#define MAC_ERROR_BIT		0
+#define CHK_MAC_ERR_BIT(x)	(((x) >> MAC_ERROR_BIT) & 1)
+
+struct uld_ctx;
+
+struct chcr_dev {
+	/* Request submited to h/w and waiting for response. */
+	spinlock_t lock_chcr_dev;
+	struct crypto_queue pending_queue;
+	struct uld_ctx *u_ctx;
+	unsigned char tx_channel_id;
+};
+
+struct uld_ctx {
+	struct list_head entry;
+	struct cxgb4_lld_info lldi;
+	struct chcr_dev *dev;
+};
+
+int assign_chcr_device(struct chcr_dev **dev);
+int chcr_send_wr(struct sk_buff *skb);
+int start_crypto(void);
+int stop_crypto(void);
+int chcr_uld_rx_handler(void *handle, const __be64 *rsp,
+			const struct pkt_gl *pgl);
+int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
+		     int err);
+#endif /* __CHCR_CORE_H__ */
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
new file mode 100644
index 0000000..d7d7560
--- /dev/null
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -0,0 +1,203 @@
+/*
+ * This file is part of the Chelsio T6 Crypto driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __CHCR_CRYPTO_H__
+#define __CHCR_CRYPTO_H__
+
+/* Define following if h/w is not dropping the AAD and IV data before
+ * giving the processed data
+ */
+
+#define CHCR_CRA_PRIORITY 300
+
+#define CHCR_AES_MAX_KEY_LEN  (2 * (AES_MAX_KEY_SIZE)) /* consider xts */
+#define CHCR_MAX_CRYPTO_IV_LEN 16 /* AES IV len */
+
+#define CHCR_MAX_AUTHENC_AES_KEY_LEN 32 /* max aes key length*/
+#define CHCR_MAX_AUTHENC_SHA_KEY_LEN 128 /* max sha key length*/
+
+#define CHCR_GIVENCRYPT_OP 2
+/* CPL/SCMD parameters */
+
+#define CHCR_ENCRYPT_OP 0
+#define CHCR_DECRYPT_OP 1
+
+#define CHCR_SCMD_SEQ_NO_CTRL_32BIT     1
+#define CHCR_SCMD_SEQ_NO_CTRL_48BIT     2
+#define CHCR_SCMD_SEQ_NO_CTRL_64BIT     3
+
+#define CHCR_SCMD_PROTO_VERSION_GENERIC 4
+
+#define CHCR_SCMD_AUTH_CTRL_AUTH_CIPHER 0
+#define CHCR_SCMD_AUTH_CTRL_CIPHER_AUTH 1
+
+#define CHCR_SCMD_CIPHER_MODE_NOP           0
+#define CHCR_SCMD_CIPHER_MODE_AES_CBC       1
+#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES   4
+#define CHCR_SCMD_CIPHER_MODE_AES_XTS       6
+
+#define CHCR_SCMD_AUTH_MODE_NOP             0
+#define CHCR_SCMD_AUTH_MODE_SHA1            1
+#define CHCR_SCMD_AUTH_MODE_SHA224          2
+#define CHCR_SCMD_AUTH_MODE_SHA256          3
+#define CHCR_SCMD_AUTH_MODE_SHA512_224      5
+#define CHCR_SCMD_AUTH_MODE_SHA512_256      6
+#define CHCR_SCMD_AUTH_MODE_SHA512_384      7
+#define CHCR_SCMD_AUTH_MODE_SHA512_512      8
+
+#define CHCR_SCMD_HMAC_CTRL_NOP             0
+#define CHCR_SCMD_HMAC_CTRL_NO_TRUNC        1
+
+#define CHCR_SCMD_IVGEN_CTRL_HW             0
+#define CHCR_SCMD_IVGEN_CTRL_SW             1
+/* This are not really mac key size. They are intermediate values
+ * of sha engine and its size
+ */
+#define CHCR_KEYCTX_MAC_KEY_SIZE_128        0
+#define CHCR_KEYCTX_MAC_KEY_SIZE_160        1
+#define CHCR_KEYCTX_MAC_KEY_SIZE_192        2
+#define CHCR_KEYCTX_MAC_KEY_SIZE_256        3
+#define CHCR_KEYCTX_MAC_KEY_SIZE_512        4
+#define CHCR_KEYCTX_CIPHER_KEY_SIZE_128     0
+#define CHCR_KEYCTX_CIPHER_KEY_SIZE_192     1
+#define CHCR_KEYCTX_CIPHER_KEY_SIZE_256     2
+#define CHCR_KEYCTX_NO_KEY                  15
+
+#define CHCR_CPL_FW4_PLD_IV_OFFSET          (5 * 64) /* bytes. flt #5 and #6 */
+#define CHCR_CPL_FW4_PLD_HASH_RESULT_OFFSET (7 * 64) /* bytes. flt #7 */
+#define CHCR_CPL_FW4_PLD_DATA_SIZE          (4 * 64) /* bytes. flt #4 to #7 */
+
+#define KEY_CONTEXT_HDR_SALT_AND_PAD	    16
+#define flits_to_bytes(x)  (x * 8)
+
+#define IV_NOP                  0
+#define IV_IMMEDIATE            1
+#define IV_DSGL			2
+
+#define CRYPTO_ALG_SUB_TYPE_MASK            0x0f000000
+#define CRYPTO_ALG_SUB_TYPE_HASH_HMAC       0x01000000
+#define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\
+			      CRYPTO_ALG_SUB_TYPE_HASH_HMAC)
+
+#define MAX_SALT                4
+#define MAX_SCRATCH_PAD_SIZE    32
+
+#define CHCR_HASH_MAX_BLOCK_SIZE_64  64
+#define CHCR_HASH_MAX_BLOCK_SIZE_128 128
+
+/* Aligned to 128 bit boundary */
+struct _key_ctx {
+	__be32 ctx_hdr;
+	u8 salt[MAX_SALT];
+	__be64 reserverd;
+	unsigned char key[0];
+};
+
+struct ablk_ctx {
+	u8 enc;
+	unsigned int processed_len;
+	__be32 key_ctx_hdr;
+	unsigned int enckey_len;
+	unsigned int dst_nents;
+	struct scatterlist iv_sg;
+	u8 key[CHCR_AES_MAX_KEY_LEN];
+	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
+	unsigned char ciph_mode;
+};
+
+struct hmac_ctx {
+	struct shash_desc *desc;
+	u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128];
+	u8 opad[CHCR_HASH_MAX_BLOCK_SIZE_128];
+};
+
+struct __crypto_ctx {
+	struct hmac_ctx hmacctx[0];
+	struct ablk_ctx ablkctx[0];
+};
+
+struct chcr_context {
+	struct chcr_dev *dev;
+	unsigned char tx_channel_id;
+	struct __crypto_ctx crypto_ctx[0];
+};
+
+struct chcr_ahash_req_ctx {
+	u32 result;
+	char bfr[CHCR_HASH_MAX_BLOCK_SIZE_128];
+	u8 bfr_len;
+	/* DMA the partial hash in it */
+	u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE];
+	u64 data_len;  /* Data len till time */
+	void *dummy_payload_ptr;
+	/* SKB which is being sent to the hardware for processing */
+	struct sk_buff *skb;
+};
+
+struct chcr_blkcipher_req_ctx {
+	struct sk_buff *skb;
+};
+
+struct chcr_alg_template {
+	u32 type;
+	u32 is_registered;
+	union {
+		struct crypto_alg crypto;
+		struct ahash_alg hash;
+	} alg;
+};
+
+struct chcr_req_ctx {
+	union {
+		struct ahash_request *ahash_req;
+		struct ablkcipher_request *ablk_req;
+	} req;
+	union {
+		struct chcr_ahash_req_ctx *ahash_ctx;
+		struct chcr_blkcipher_req_ctx *ablk_ctx;
+	} ctx;
+};
+
+struct sge_opaque_hdr {
+	void *dev;
+	dma_addr_t addr[MAX_SKB_FRAGS + 1];
+};
+
+typedef struct sk_buff *(*create_wr_t)(struct crypto_async_request *req,
+				       struct chcr_context *ctx,
+				       unsigned short qid,
+				       unsigned short op_type);
+
+#endif /* __CHCR_CRYPTO_H__ */
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 769148d..20f35df 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -1260,8 +1260,8 @@
 			.setkey = qat_alg_ablkcipher_xts_setkey,
 			.decrypt = qat_alg_ablkcipher_decrypt,
 			.encrypt = qat_alg_ablkcipher_encrypt,
-			.min_keysize = AES_MIN_KEY_SIZE,
-			.max_keysize = AES_MAX_KEY_SIZE,
+			.min_keysize = 2 * AES_MIN_KEY_SIZE,
+			.max_keysize = 2 * AES_MAX_KEY_SIZE,
 			.ivsize = AES_BLOCK_SIZE,
 		},
 	},
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index cfb2541..24353ec3 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -129,8 +129,8 @@
 
 		blkcipher_walk_init(&walk, dst, src, nbytes);
 
-		iv = (u8 *)walk.iv;
 		ret = blkcipher_walk_virt(desc, &walk);
+		iv = walk.iv;
 		memset(tweak, 0, AES_BLOCK_SIZE);
 		aes_p8_encrypt(iv, tweak, &ctx->tweak_key);
 
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 803f395..29f600f 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -459,7 +459,7 @@
 	}
 
 	pgoff = linear_page_index(vma, pmd_addr);
-	phys = pgoff_to_phys(dax_dev, pgoff, PAGE_SIZE);
+	phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
 	if (phys == -1) {
 		dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
 				pgoff);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index dfb1685..1f01e98 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -116,6 +116,9 @@
 	if (rc)
 		return rc;
 
+	/* adjust the dax_region resource to the start of data */
+	res.start += le64_to_cpu(pfn_sb->dataoff);
+
 	nd_region = to_nd_region(dev->parent);
 	dax_region = alloc_dax_region(dev, nd_region->id, &res,
 			le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP);
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index a5be56e..41254e7 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -76,7 +76,7 @@
 
 config ARM_EXYNOS_BUS_DEVFREQ
 	tristate "ARM EXYNOS Generic Memory Bus DEVFREQ Driver"
-	depends on ARCH_EXYNOS
+	depends on ARCH_EXYNOS || COMPILE_TEST
 	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	select DEVFREQ_GOV_PASSIVE
 	select DEVFREQ_EVENT_EXYNOS_PPMU
@@ -91,14 +91,26 @@
 	  This does not yet operate with optimal voltages.
 
 config ARM_TEGRA_DEVFREQ
-       tristate "Tegra DEVFREQ Driver"
-       depends on ARCH_TEGRA_124_SOC
-       select DEVFREQ_GOV_SIMPLE_ONDEMAND
-       select PM_OPP
-       help
-         This adds the DEVFREQ driver for the Tegra family of SoCs.
-         It reads ACTMON counters of memory controllers and adjusts the
-         operating frequencies and voltages with OPP support.
+	tristate "Tegra DEVFREQ Driver"
+	depends on ARCH_TEGRA_124_SOC
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select PM_OPP
+	help
+	  This adds the DEVFREQ driver for the Tegra family of SoCs.
+	  It reads ACTMON counters of memory controllers and adjusts the
+	  operating frequencies and voltages with OPP support.
+
+config ARM_RK3399_DMC_DEVFREQ
+	tristate "ARM RK3399 DMC DEVFREQ Driver"
+	depends on ARCH_ROCKCHIP
+	select DEVFREQ_EVENT_ROCKCHIP_DFI
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select PM_DEVFREQ_EVENT
+	select PM_OPP
+	help
+          This adds the DEVFREQ driver for the RK3399 DMC(Dynamic Memory Controller).
+          It sets the frequency for the memory controller and reads the usage counts
+          from hardware.
 
 source "drivers/devfreq/event/Kconfig"
 
diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile
index 09f11d9..fbff40a 100644
--- a/drivers/devfreq/Makefile
+++ b/drivers/devfreq/Makefile
@@ -8,6 +8,7 @@
 
 # DEVFREQ Drivers
 obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ)	+= exynos-bus.o
+obj-$(CONFIG_ARM_RK3399_DMC_DEVFREQ)	+= rk3399_dmc.o
 obj-$(CONFIG_ARM_TEGRA_DEVFREQ)		+= tegra-devfreq.o
 
 # DEVFREQ Event Drivers
diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig
index eb6f74a..0fdae86 100644
--- a/drivers/devfreq/event/Kconfig
+++ b/drivers/devfreq/event/Kconfig
@@ -15,7 +15,7 @@
 
 config DEVFREQ_EVENT_EXYNOS_NOCP
 	tristate "EXYNOS NoC (Network On Chip) Probe DEVFREQ event Driver"
-	depends on ARCH_EXYNOS
+	depends on ARCH_EXYNOS || COMPILE_TEST
 	select PM_OPP
 	help
 	  This add the devfreq-event driver for Exynos SoC. It provides NoC
@@ -23,11 +23,18 @@
 
 config DEVFREQ_EVENT_EXYNOS_PPMU
 	tristate "EXYNOS PPMU (Platform Performance Monitoring Unit) DEVFREQ event Driver"
-	depends on ARCH_EXYNOS
+	depends on ARCH_EXYNOS || COMPILE_TEST
 	select PM_OPP
 	help
 	  This add the devfreq-event driver for Exynos SoC. It provides PPMU
 	  (Platform Performance Monitoring Unit) counters to estimate the
 	  utilization of each module.
 
+config DEVFREQ_EVENT_ROCKCHIP_DFI
+	tristate "ROCKCHIP DFI DEVFREQ event Driver"
+	depends on ARCH_ROCKCHIP
+	help
+	  This add the devfreq-event driver for Rockchip SoC. It provides DFI
+	  (DDR Monitor Module) driver to count ddr load.
+
 endif # PM_DEVFREQ_EVENT
diff --git a/drivers/devfreq/event/Makefile b/drivers/devfreq/event/Makefile
index 3d6afd3..dda7090 100644
--- a/drivers/devfreq/event/Makefile
+++ b/drivers/devfreq/event/Makefile
@@ -2,3 +2,4 @@
 
 obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_NOCP) += exynos-nocp.o
 obj-$(CONFIG_DEVFREQ_EVENT_EXYNOS_PPMU) += exynos-ppmu.o
+obj-$(CONFIG_DEVFREQ_EVENT_ROCKCHIP_DFI) += rockchip-dfi.o
diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index 845bf25..f55cf0e 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -406,8 +406,6 @@
 		of_property_read_string(node, "event-name", &desc[j].name);
 
 		j++;
-
-		of_node_put(node);
 	}
 	info->desc = desc;
 
diff --git a/drivers/devfreq/event/rockchip-dfi.c b/drivers/devfreq/event/rockchip-dfi.c
new file mode 100644
index 0000000..43fcc5a
--- /dev/null
+++ b/drivers/devfreq/event/rockchip-dfi.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd
+ * Author: Lin Huang <hl@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/devfreq-event.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/of.h>
+
+#define RK3399_DMC_NUM_CH	2
+
+/* DDRMON_CTRL */
+#define DDRMON_CTRL	0x04
+#define CLR_DDRMON_CTRL	(0x1f0000 << 0)
+#define LPDDR4_EN	(0x10001 << 4)
+#define HARDWARE_EN	(0x10001 << 3)
+#define LPDDR3_EN	(0x10001 << 2)
+#define SOFTWARE_EN	(0x10001 << 1)
+#define SOFTWARE_DIS	(0x10000 << 1)
+#define TIME_CNT_EN	(0x10001 << 0)
+
+#define DDRMON_CH0_COUNT_NUM		0x28
+#define DDRMON_CH0_DFI_ACCESS_NUM	0x2c
+#define DDRMON_CH1_COUNT_NUM		0x3c
+#define DDRMON_CH1_DFI_ACCESS_NUM	0x40
+
+/* pmu grf */
+#define PMUGRF_OS_REG2	0x308
+#define DDRTYPE_SHIFT	13
+#define DDRTYPE_MASK	7
+
+enum {
+	DDR3 = 3,
+	LPDDR3 = 6,
+	LPDDR4 = 7,
+	UNUSED = 0xFF
+};
+
+struct dmc_usage {
+	u32 access;
+	u32 total;
+};
+
+/*
+ * The dfi controller can monitor DDR load. It has an upper and lower threshold
+ * for the operating points. Whenever the usage leaves these bounds an event is
+ * generated to indicate the DDR frequency should be changed.
+ */
+struct rockchip_dfi {
+	struct devfreq_event_dev *edev;
+	struct devfreq_event_desc *desc;
+	struct dmc_usage ch_usage[RK3399_DMC_NUM_CH];
+	struct device *dev;
+	void __iomem *regs;
+	struct regmap *regmap_pmu;
+	struct clk *clk;
+};
+
+static void rockchip_dfi_start_hardware_counter(struct devfreq_event_dev *edev)
+{
+	struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+	void __iomem *dfi_regs = info->regs;
+	u32 val;
+	u32 ddr_type;
+
+	/* get ddr type */
+	regmap_read(info->regmap_pmu, PMUGRF_OS_REG2, &val);
+	ddr_type = (val >> DDRTYPE_SHIFT) & DDRTYPE_MASK;
+
+	/* clear DDRMON_CTRL setting */
+	writel_relaxed(CLR_DDRMON_CTRL, dfi_regs + DDRMON_CTRL);
+
+	/* set ddr type to dfi */
+	if (ddr_type == LPDDR3)
+		writel_relaxed(LPDDR3_EN, dfi_regs + DDRMON_CTRL);
+	else if (ddr_type == LPDDR4)
+		writel_relaxed(LPDDR4_EN, dfi_regs + DDRMON_CTRL);
+
+	/* enable count, use software mode */
+	writel_relaxed(SOFTWARE_EN, dfi_regs + DDRMON_CTRL);
+}
+
+static void rockchip_dfi_stop_hardware_counter(struct devfreq_event_dev *edev)
+{
+	struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+	void __iomem *dfi_regs = info->regs;
+
+	writel_relaxed(SOFTWARE_DIS, dfi_regs + DDRMON_CTRL);
+}
+
+static int rockchip_dfi_get_busier_ch(struct devfreq_event_dev *edev)
+{
+	struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+	u32 tmp, max = 0;
+	u32 i, busier_ch = 0;
+	void __iomem *dfi_regs = info->regs;
+
+	rockchip_dfi_stop_hardware_counter(edev);
+
+	/* Find out which channel is busier */
+	for (i = 0; i < RK3399_DMC_NUM_CH; i++) {
+		info->ch_usage[i].access = readl_relaxed(dfi_regs +
+				DDRMON_CH0_DFI_ACCESS_NUM + i * 20) * 4;
+		info->ch_usage[i].total = readl_relaxed(dfi_regs +
+				DDRMON_CH0_COUNT_NUM + i * 20);
+		tmp = info->ch_usage[i].access;
+		if (tmp > max) {
+			busier_ch = i;
+			max = tmp;
+		}
+	}
+	rockchip_dfi_start_hardware_counter(edev);
+
+	return busier_ch;
+}
+
+static int rockchip_dfi_disable(struct devfreq_event_dev *edev)
+{
+	struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+
+	rockchip_dfi_stop_hardware_counter(edev);
+	clk_disable_unprepare(info->clk);
+
+	return 0;
+}
+
+static int rockchip_dfi_enable(struct devfreq_event_dev *edev)
+{
+	struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+	int ret;
+
+	ret = clk_prepare_enable(info->clk);
+	if (ret) {
+		dev_err(&edev->dev, "failed to enable dfi clk: %d\n", ret);
+		return ret;
+	}
+
+	rockchip_dfi_start_hardware_counter(edev);
+	return 0;
+}
+
+static int rockchip_dfi_set_event(struct devfreq_event_dev *edev)
+{
+	return 0;
+}
+
+static int rockchip_dfi_get_event(struct devfreq_event_dev *edev,
+				  struct devfreq_event_data *edata)
+{
+	struct rockchip_dfi *info = devfreq_event_get_drvdata(edev);
+	int busier_ch;
+
+	busier_ch = rockchip_dfi_get_busier_ch(edev);
+
+	edata->load_count = info->ch_usage[busier_ch].access;
+	edata->total_count = info->ch_usage[busier_ch].total;
+
+	return 0;
+}
+
+static const struct devfreq_event_ops rockchip_dfi_ops = {
+	.disable = rockchip_dfi_disable,
+	.enable = rockchip_dfi_enable,
+	.get_event = rockchip_dfi_get_event,
+	.set_event = rockchip_dfi_set_event,
+};
+
+static const struct of_device_id rockchip_dfi_id_match[] = {
+	{ .compatible = "rockchip,rk3399-dfi" },
+	{ },
+};
+
+static int rockchip_dfi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rockchip_dfi *data;
+	struct resource *res;
+	struct devfreq_event_desc *desc;
+	struct device_node *np = pdev->dev.of_node, *node;
+
+	data = devm_kzalloc(dev, sizeof(struct rockchip_dfi), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	data->regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(data->regs))
+		return PTR_ERR(data->regs);
+
+	data->clk = devm_clk_get(dev, "pclk_ddr_mon");
+	if (IS_ERR(data->clk)) {
+		dev_err(dev, "Cannot get the clk dmc_clk\n");
+		return PTR_ERR(data->clk);
+	};
+
+	/* try to find the optional reference to the pmu syscon */
+	node = of_parse_phandle(np, "rockchip,pmu", 0);
+	if (node) {
+		data->regmap_pmu = syscon_node_to_regmap(node);
+		if (IS_ERR(data->regmap_pmu))
+			return PTR_ERR(data->regmap_pmu);
+	}
+	data->dev = dev;
+
+	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	desc->ops = &rockchip_dfi_ops;
+	desc->driver_data = data;
+	desc->name = np->name;
+	data->desc = desc;
+
+	data->edev = devm_devfreq_event_add_edev(&pdev->dev, desc);
+	if (IS_ERR(data->edev)) {
+		dev_err(&pdev->dev,
+			"failed to add devfreq-event device\n");
+		return PTR_ERR(data->edev);
+	}
+
+	platform_set_drvdata(pdev, data);
+
+	return 0;
+}
+
+static struct platform_driver rockchip_dfi_driver = {
+	.probe	= rockchip_dfi_probe,
+	.driver = {
+		.name	= "rockchip-dfi",
+		.of_match_table = rockchip_dfi_id_match,
+	},
+};
+module_platform_driver(rockchip_dfi_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Lin Huang <hl@rock-chips.com>");
+MODULE_DESCRIPTION("Rockchip DFI driver");
diff --git a/drivers/devfreq/rk3399_dmc.c b/drivers/devfreq/rk3399_dmc.c
new file mode 100644
index 0000000..e24b73d
--- /dev/null
+++ b/drivers/devfreq/rk3399_dmc.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd.
+ * Author: Lin Huang <hl@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq-event.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/regulator/consumer.h>
+#include <linux/rwsem.h>
+#include <linux/suspend.h>
+
+#include <soc/rockchip/rockchip_sip.h>
+
+struct dram_timing {
+	unsigned int ddr3_speed_bin;
+	unsigned int pd_idle;
+	unsigned int sr_idle;
+	unsigned int sr_mc_gate_idle;
+	unsigned int srpd_lite_idle;
+	unsigned int standby_idle;
+	unsigned int auto_pd_dis_freq;
+	unsigned int dram_dll_dis_freq;
+	unsigned int phy_dll_dis_freq;
+	unsigned int ddr3_odt_dis_freq;
+	unsigned int ddr3_drv;
+	unsigned int ddr3_odt;
+	unsigned int phy_ddr3_ca_drv;
+	unsigned int phy_ddr3_dq_drv;
+	unsigned int phy_ddr3_odt;
+	unsigned int lpddr3_odt_dis_freq;
+	unsigned int lpddr3_drv;
+	unsigned int lpddr3_odt;
+	unsigned int phy_lpddr3_ca_drv;
+	unsigned int phy_lpddr3_dq_drv;
+	unsigned int phy_lpddr3_odt;
+	unsigned int lpddr4_odt_dis_freq;
+	unsigned int lpddr4_drv;
+	unsigned int lpddr4_dq_odt;
+	unsigned int lpddr4_ca_odt;
+	unsigned int phy_lpddr4_ca_drv;
+	unsigned int phy_lpddr4_ck_cs_drv;
+	unsigned int phy_lpddr4_dq_drv;
+	unsigned int phy_lpddr4_odt;
+};
+
+struct rk3399_dmcfreq {
+	struct device *dev;
+	struct devfreq *devfreq;
+	struct devfreq_simple_ondemand_data ondemand_data;
+	struct clk *dmc_clk;
+	struct devfreq_event_dev *edev;
+	struct mutex lock;
+	struct dram_timing timing;
+
+	/*
+	 * DDR Converser of Frequency (DCF) is used to implement DDR frequency
+	 * conversion without the participation of CPU, we will implement and
+	 * control it in arm trust firmware.
+	 */
+	wait_queue_head_t	wait_dcf_queue;
+	int irq;
+	int wait_dcf_flag;
+	struct regulator *vdd_center;
+	unsigned long rate, target_rate;
+	unsigned long volt, target_volt;
+	struct dev_pm_opp *curr_opp;
+};
+
+static int rk3399_dmcfreq_target(struct device *dev, unsigned long *freq,
+				 u32 flags)
+{
+	struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long old_clk_rate = dmcfreq->rate;
+	unsigned long target_volt, target_rate;
+	int err;
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(opp)) {
+		rcu_read_unlock();
+		return PTR_ERR(opp);
+	}
+
+	target_rate = dev_pm_opp_get_freq(opp);
+	target_volt = dev_pm_opp_get_voltage(opp);
+
+	dmcfreq->rate = dev_pm_opp_get_freq(dmcfreq->curr_opp);
+	dmcfreq->volt = dev_pm_opp_get_voltage(dmcfreq->curr_opp);
+
+	rcu_read_unlock();
+
+	if (dmcfreq->rate == target_rate)
+		return 0;
+
+	mutex_lock(&dmcfreq->lock);
+
+	/*
+	 * If frequency scaling from low to high, adjust voltage first.
+	 * If frequency scaling from high to low, adjust frequency first.
+	 */
+	if (old_clk_rate < target_rate) {
+		err = regulator_set_voltage(dmcfreq->vdd_center, target_volt,
+					    target_volt);
+		if (err) {
+			dev_err(dev, "Cannot to set voltage %lu uV\n",
+				target_volt);
+			goto out;
+		}
+	}
+	dmcfreq->wait_dcf_flag = 1;
+
+	err = clk_set_rate(dmcfreq->dmc_clk, target_rate);
+	if (err) {
+		dev_err(dev, "Cannot to set frequency %lu (%d)\n",
+			target_rate, err);
+		regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt,
+				      dmcfreq->volt);
+		goto out;
+	}
+
+	/*
+	 * Wait until bcf irq happen, it means freq scaling finish in
+	 * arm trust firmware, use 100ms as timeout time.
+	 */
+	if (!wait_event_timeout(dmcfreq->wait_dcf_queue,
+				!dmcfreq->wait_dcf_flag, HZ / 10))
+		dev_warn(dev, "Timeout waiting for dcf interrupt\n");
+
+	/*
+	 * Check the dpll rate,
+	 * There only two result we will get,
+	 * 1. Ddr frequency scaling fail, we still get the old rate.
+	 * 2. Ddr frequency scaling sucessful, we get the rate we set.
+	 */
+	dmcfreq->rate = clk_get_rate(dmcfreq->dmc_clk);
+
+	/* If get the incorrect rate, set voltage to old value. */
+	if (dmcfreq->rate != target_rate) {
+		dev_err(dev, "Get wrong ddr frequency, Request frequency %lu,\
+			Current frequency %lu\n", target_rate, dmcfreq->rate);
+		regulator_set_voltage(dmcfreq->vdd_center, dmcfreq->volt,
+				      dmcfreq->volt);
+		goto out;
+	} else if (old_clk_rate > target_rate)
+		err = regulator_set_voltage(dmcfreq->vdd_center, target_volt,
+					    target_volt);
+	if (err)
+		dev_err(dev, "Cannot to set vol %lu uV\n", target_volt);
+
+	dmcfreq->curr_opp = opp;
+out:
+	mutex_unlock(&dmcfreq->lock);
+	return err;
+}
+
+static int rk3399_dmcfreq_get_dev_status(struct device *dev,
+					 struct devfreq_dev_status *stat)
+{
+	struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+	struct devfreq_event_data edata;
+	int ret = 0;
+
+	ret = devfreq_event_get_event(dmcfreq->edev, &edata);
+	if (ret < 0)
+		return ret;
+
+	stat->current_frequency = dmcfreq->rate;
+	stat->busy_time = edata.load_count;
+	stat->total_time = edata.total_count;
+
+	return ret;
+}
+
+static int rk3399_dmcfreq_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+
+	*freq = dmcfreq->rate;
+
+	return 0;
+}
+
+static struct devfreq_dev_profile rk3399_devfreq_dmc_profile = {
+	.polling_ms	= 200,
+	.target		= rk3399_dmcfreq_target,
+	.get_dev_status	= rk3399_dmcfreq_get_dev_status,
+	.get_cur_freq	= rk3399_dmcfreq_get_cur_freq,
+};
+
+static __maybe_unused int rk3399_dmcfreq_suspend(struct device *dev)
+{
+	struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+	int ret = 0;
+
+	ret = devfreq_event_disable_edev(dmcfreq->edev);
+	if (ret < 0) {
+		dev_err(dev, "failed to disable the devfreq-event devices\n");
+		return ret;
+	}
+
+	ret = devfreq_suspend_device(dmcfreq->devfreq);
+	if (ret < 0) {
+		dev_err(dev, "failed to suspend the devfreq devices\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static __maybe_unused int rk3399_dmcfreq_resume(struct device *dev)
+{
+	struct rk3399_dmcfreq *dmcfreq = dev_get_drvdata(dev);
+	int ret = 0;
+
+	ret = devfreq_event_enable_edev(dmcfreq->edev);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable the devfreq-event devices\n");
+		return ret;
+	}
+
+	ret = devfreq_resume_device(dmcfreq->devfreq);
+	if (ret < 0) {
+		dev_err(dev, "failed to resume the devfreq devices\n");
+		return ret;
+	}
+	return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(rk3399_dmcfreq_pm, rk3399_dmcfreq_suspend,
+			 rk3399_dmcfreq_resume);
+
+static irqreturn_t rk3399_dmc_irq(int irq, void *dev_id)
+{
+	struct rk3399_dmcfreq *dmcfreq = dev_id;
+	struct arm_smccc_res res;
+
+	dmcfreq->wait_dcf_flag = 0;
+	wake_up(&dmcfreq->wait_dcf_queue);
+
+	/* Clear the DCF interrupt */
+	arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
+		      ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ,
+		      0, 0, 0, 0, &res);
+
+	return IRQ_HANDLED;
+}
+
+static int of_get_ddr_timings(struct dram_timing *timing,
+			      struct device_node *np)
+{
+	int ret = 0;
+
+	ret = of_property_read_u32(np, "rockchip,ddr3_speed_bin",
+				   &timing->ddr3_speed_bin);
+	ret |= of_property_read_u32(np, "rockchip,pd_idle",
+				    &timing->pd_idle);
+	ret |= of_property_read_u32(np, "rockchip,sr_idle",
+				    &timing->sr_idle);
+	ret |= of_property_read_u32(np, "rockchip,sr_mc_gate_idle",
+				    &timing->sr_mc_gate_idle);
+	ret |= of_property_read_u32(np, "rockchip,srpd_lite_idle",
+				    &timing->srpd_lite_idle);
+	ret |= of_property_read_u32(np, "rockchip,standby_idle",
+				    &timing->standby_idle);
+	ret |= of_property_read_u32(np, "rockchip,auto_pd_dis_freq",
+				    &timing->auto_pd_dis_freq);
+	ret |= of_property_read_u32(np, "rockchip,dram_dll_dis_freq",
+				    &timing->dram_dll_dis_freq);
+	ret |= of_property_read_u32(np, "rockchip,phy_dll_dis_freq",
+				    &timing->phy_dll_dis_freq);
+	ret |= of_property_read_u32(np, "rockchip,ddr3_odt_dis_freq",
+				    &timing->ddr3_odt_dis_freq);
+	ret |= of_property_read_u32(np, "rockchip,ddr3_drv",
+				    &timing->ddr3_drv);
+	ret |= of_property_read_u32(np, "rockchip,ddr3_odt",
+				    &timing->ddr3_odt);
+	ret |= of_property_read_u32(np, "rockchip,phy_ddr3_ca_drv",
+				    &timing->phy_ddr3_ca_drv);
+	ret |= of_property_read_u32(np, "rockchip,phy_ddr3_dq_drv",
+				    &timing->phy_ddr3_dq_drv);
+	ret |= of_property_read_u32(np, "rockchip,phy_ddr3_odt",
+				    &timing->phy_ddr3_odt);
+	ret |= of_property_read_u32(np, "rockchip,lpddr3_odt_dis_freq",
+				    &timing->lpddr3_odt_dis_freq);
+	ret |= of_property_read_u32(np, "rockchip,lpddr3_drv",
+				    &timing->lpddr3_drv);
+	ret |= of_property_read_u32(np, "rockchip,lpddr3_odt",
+				    &timing->lpddr3_odt);
+	ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_ca_drv",
+				    &timing->phy_lpddr3_ca_drv);
+	ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_dq_drv",
+				    &timing->phy_lpddr3_dq_drv);
+	ret |= of_property_read_u32(np, "rockchip,phy_lpddr3_odt",
+				    &timing->phy_lpddr3_odt);
+	ret |= of_property_read_u32(np, "rockchip,lpddr4_odt_dis_freq",
+				    &timing->lpddr4_odt_dis_freq);
+	ret |= of_property_read_u32(np, "rockchip,lpddr4_drv",
+				    &timing->lpddr4_drv);
+	ret |= of_property_read_u32(np, "rockchip,lpddr4_dq_odt",
+				    &timing->lpddr4_dq_odt);
+	ret |= of_property_read_u32(np, "rockchip,lpddr4_ca_odt",
+				    &timing->lpddr4_ca_odt);
+	ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ca_drv",
+				    &timing->phy_lpddr4_ca_drv);
+	ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_ck_cs_drv",
+				    &timing->phy_lpddr4_ck_cs_drv);
+	ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_dq_drv",
+				    &timing->phy_lpddr4_dq_drv);
+	ret |= of_property_read_u32(np, "rockchip,phy_lpddr4_odt",
+				    &timing->phy_lpddr4_odt);
+
+	return ret;
+}
+
+static int rk3399_dmcfreq_probe(struct platform_device *pdev)
+{
+	struct arm_smccc_res res;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = pdev->dev.of_node;
+	struct rk3399_dmcfreq *data;
+	int ret, irq, index, size;
+	uint32_t *timing;
+	struct dev_pm_opp *opp;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "Cannot get the dmc interrupt resource\n");
+		return -EINVAL;
+	}
+	data = devm_kzalloc(dev, sizeof(struct rk3399_dmcfreq), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	mutex_init(&data->lock);
+
+	data->vdd_center = devm_regulator_get(dev, "center");
+	if (IS_ERR(data->vdd_center)) {
+		dev_err(dev, "Cannot get the regulator \"center\"\n");
+		return PTR_ERR(data->vdd_center);
+	}
+
+	data->dmc_clk = devm_clk_get(dev, "dmc_clk");
+	if (IS_ERR(data->dmc_clk)) {
+		dev_err(dev, "Cannot get the clk dmc_clk\n");
+		return PTR_ERR(data->dmc_clk);
+	};
+
+	data->irq = irq;
+	ret = devm_request_irq(dev, irq, rk3399_dmc_irq, 0,
+			       dev_name(dev), data);
+	if (ret) {
+		dev_err(dev, "Failed to request dmc irq: %d\n", ret);
+		return ret;
+	}
+
+	init_waitqueue_head(&data->wait_dcf_queue);
+	data->wait_dcf_flag = 0;
+
+	data->edev = devfreq_event_get_edev_by_phandle(dev, 0);
+	if (IS_ERR(data->edev))
+		return -EPROBE_DEFER;
+
+	ret = devfreq_event_enable_edev(data->edev);
+	if (ret < 0) {
+		dev_err(dev, "failed to enable devfreq-event devices\n");
+		return ret;
+	}
+
+	/*
+	 * Get dram timing and pass it to arm trust firmware,
+	 * the dram drvier in arm trust firmware will get these
+	 * timing and to do dram initial.
+	 */
+	if (!of_get_ddr_timings(&data->timing, np)) {
+		timing = &data->timing.ddr3_speed_bin;
+		size = sizeof(struct dram_timing) / 4;
+		for (index = 0; index < size; index++) {
+			arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, *timing++, index,
+				      ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM,
+				      0, 0, 0, 0, &res);
+			if (res.a0) {
+				dev_err(dev, "Failed to set dram param: %ld\n",
+					res.a0);
+				return -EINVAL;
+			}
+		}
+	}
+
+	arm_smccc_smc(ROCKCHIP_SIP_DRAM_FREQ, 0, 0,
+		      ROCKCHIP_SIP_CONFIG_DRAM_INIT,
+		      0, 0, 0, 0, &res);
+
+	/*
+	 * We add a devfreq driver to our parent since it has a device tree node
+	 * with operating points.
+	 */
+	if (dev_pm_opp_of_add_table(dev)) {
+		dev_err(dev, "Invalid operating-points in device tree.\n");
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	of_property_read_u32(np, "upthreshold",
+			     &data->ondemand_data.upthreshold);
+	of_property_read_u32(np, "downdifferential",
+			     &data->ondemand_data.downdifferential);
+
+	data->rate = clk_get_rate(data->dmc_clk);
+
+	rcu_read_lock();
+	opp = devfreq_recommended_opp(dev, &data->rate, 0);
+	if (IS_ERR(opp)) {
+		rcu_read_unlock();
+		return PTR_ERR(opp);
+	}
+	rcu_read_unlock();
+	data->curr_opp = opp;
+
+	rk3399_devfreq_dmc_profile.initial_freq = data->rate;
+
+	data->devfreq = devfreq_add_device(dev,
+					   &rk3399_devfreq_dmc_profile,
+					   "simple_ondemand",
+					   &data->ondemand_data);
+	if (IS_ERR(data->devfreq))
+		return PTR_ERR(data->devfreq);
+	devm_devfreq_register_opp_notifier(dev, data->devfreq);
+
+	data->dev = dev;
+	platform_set_drvdata(pdev, data);
+
+	return 0;
+}
+
+static const struct of_device_id rk3399dmc_devfreq_of_match[] = {
+	{ .compatible = "rockchip,rk3399-dmc" },
+	{ },
+};
+
+static struct platform_driver rk3399_dmcfreq_driver = {
+	.probe	= rk3399_dmcfreq_probe,
+	.driver = {
+		.name	= "rk3399-dmc-freq",
+		.pm	= &rk3399_dmcfreq_pm,
+		.of_match_table = rk3399dmc_devfreq_of_match,
+	},
+};
+module_platform_driver(rk3399_dmcfreq_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Lin Huang <hl@rock-chips.com>");
+MODULE_DESCRIPTION("RK3399 dmcfreq driver with devfreq framework");
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index e434ffe..832cbd6 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -2067,7 +2067,7 @@
 err_clk_disable:
 	clk_disable_unprepare(atxdmac->clk);
 err_free_irq:
-	free_irq(atxdmac->irq, atxdmac->dma.dev);
+	free_irq(atxdmac->irq, atxdmac);
 	return ret;
 }
 
@@ -2081,7 +2081,7 @@
 	dma_async_device_unregister(&atxdmac->dma);
 	clk_disable_unprepare(atxdmac->clk);
 
-	free_irq(atxdmac->irq, atxdmac->dma.dev);
+	free_irq(atxdmac->irq, atxdmac);
 
 	for (i = 0; i < atxdmac->dma.chancnt; i++) {
 		struct at_xdmac_chan *atchan = &atxdmac->chan[i];
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index edf053f..da18b18 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -46,9 +46,9 @@
 		u8 _dmsize = _is_slave ? _sconfig->dst_maxburst :	\
 			DW_DMA_MSIZE_16;			\
 		u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ?		\
-			_dwc->p_master : _dwc->m_master;		\
+			_dwc->dws.p_master : _dwc->dws.m_master;	\
 		u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ?		\
-			_dwc->p_master : _dwc->m_master;		\
+			_dwc->dws.p_master : _dwc->dws.m_master;	\
 								\
 		(DWC_CTLL_DST_MSIZE(_dmsize)			\
 		 | DWC_CTLL_SRC_MSIZE(_smsize)			\
@@ -143,12 +143,16 @@
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	u32 cfghi = DWC_CFGH_FIFO_MODE;
 	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+	bool hs_polarity = dwc->dws.hs_polarity;
 
 	if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
 		return;
 
-	cfghi |= DWC_CFGH_DST_PER(dwc->dst_id);
-	cfghi |= DWC_CFGH_SRC_PER(dwc->src_id);
+	cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
+	cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
+
+	/* Set polarity of handshake interface */
+	cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0;
 
 	channel_writel(dwc, CFG_LO, cfglo);
 	channel_writel(dwc, CFG_HI, cfghi);
@@ -209,7 +213,7 @@
 static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
 {
 	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
-	u8		lms = DWC_LLP_LMS(dwc->m_master);
+	u8		lms = DWC_LLP_LMS(dwc->dws.m_master);
 	unsigned long	was_soft_llp;
 
 	/* ASSERT:  channel is idle */
@@ -662,7 +666,7 @@
 	struct dw_desc		*prev;
 	size_t			xfer_count;
 	size_t			offset;
-	u8			m_master = dwc->m_master;
+	u8			m_master = dwc->dws.m_master;
 	unsigned int		src_width;
 	unsigned int		dst_width;
 	unsigned int		data_width = dw->pdata->data_width[m_master];
@@ -740,7 +744,7 @@
 	struct dw_desc		*prev;
 	struct dw_desc		*first;
 	u32			ctllo;
-	u8			m_master = dwc->m_master;
+	u8			m_master = dwc->dws.m_master;
 	u8			lms = DWC_LLP_LMS(m_master);
 	dma_addr_t		reg;
 	unsigned int		reg_width;
@@ -895,12 +899,7 @@
 		return false;
 
 	/* We have to copy data since dws can be temporary storage */
-
-	dwc->src_id = dws->src_id;
-	dwc->dst_id = dws->dst_id;
-
-	dwc->m_master = dws->m_master;
-	dwc->p_master = dws->p_master;
+	memcpy(&dwc->dws, dws, sizeof(struct dw_dma_slave));
 
 	return true;
 }
@@ -1167,11 +1166,7 @@
 	spin_lock_irqsave(&dwc->lock, flags);
 
 	/* Clear custom channel configuration */
-	dwc->src_id = 0;
-	dwc->dst_id = 0;
-
-	dwc->m_master = 0;
-	dwc->p_master = 0;
+	memset(&dwc->dws, 0, sizeof(struct dw_dma_slave));
 
 	clear_bit(DW_DMA_IS_INITIALIZED, &dwc->flags);
 
@@ -1264,7 +1259,7 @@
 	struct dw_cyclic_desc		*retval = NULL;
 	struct dw_desc			*desc;
 	struct dw_desc			*last = NULL;
-	u8				lms = DWC_LLP_LMS(dwc->m_master);
+	u8				lms = DWC_LLP_LMS(dwc->dws.m_master);
 	unsigned long			was_cyclic;
 	unsigned int			reg_width;
 	unsigned int			periods;
@@ -1576,11 +1571,7 @@
 				(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
 		} else {
 			dwc->block_size = pdata->block_size;
-
-			/* Check if channel supports multi block transfer */
-			channel_writel(dwc, LLP, DWC_LLP_LOC(0xffffffff));
-			dwc->nollp = DWC_LLP_LOC(channel_readl(dwc, LLP)) == 0;
-			channel_writel(dwc, LLP, 0);
+			dwc->nollp = pdata->is_nollp;
 		}
 	}
 
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 4b7bd78..f65dd10 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -245,10 +245,7 @@
 	bool			nollp;
 
 	/* custom slave configuration */
-	u8			src_id;
-	u8			dst_id;
-	u8			m_master;
-	u8			p_master;
+	struct dw_dma_slave	dws;
 
 	/* configuration passed via .device_config */
 	struct dma_slave_config dma_sconfig;
diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c
index aad167e..de2a2a2 100644
--- a/drivers/dma/fsl_raid.c
+++ b/drivers/dma/fsl_raid.c
@@ -836,6 +836,7 @@
 		rc = of_property_read_u32(np, "reg", &off);
 		if (rc) {
 			dev_err(dev, "Reg property not found in JQ node\n");
+			of_node_put(np);
 			return -ENODEV;
 		}
 		/* Find out the Job Rings present under each JQ */
diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c
index c5f21ef..29d04ca 100644
--- a/drivers/dma/hsu/hsu.c
+++ b/drivers/dma/hsu/hsu.c
@@ -200,10 +200,9 @@
  *      is not a normal timeout interrupt, ie. hsu_dma_get_status() returned 0.
  *
  *      Return:
- *      IRQ_NONE for invalid channel number, IRQ_HANDLED otherwise.
+ *      0 for invalid channel number, 1 otherwise.
  */
-irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr,
-			   u32 status)
+int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status)
 {
 	struct hsu_dma_chan *hsuc;
 	struct hsu_dma_desc *desc;
@@ -211,7 +210,7 @@
 
 	/* Sanity check */
 	if (nr >= chip->hsu->nr_channels)
-		return IRQ_NONE;
+		return 0;
 
 	hsuc = &chip->hsu->chan[nr];
 
@@ -230,7 +229,7 @@
 	}
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
 
-	return IRQ_HANDLED;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(hsu_dma_do_irq);
 
diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c
index 9916058..b51639f 100644
--- a/drivers/dma/hsu/pci.c
+++ b/drivers/dma/hsu/pci.c
@@ -29,7 +29,7 @@
 	u32 dmaisr;
 	u32 status;
 	unsigned short i;
-	irqreturn_t ret = IRQ_NONE;
+	int ret = 0;
 	int err;
 
 	dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
@@ -37,14 +37,14 @@
 		if (dmaisr & 0x1) {
 			err = hsu_dma_get_status(chip, i, &status);
 			if (err > 0)
-				ret |= IRQ_HANDLED;
+				ret |= 1;
 			else if (err == 0)
 				ret |= hsu_dma_do_irq(chip, i, status);
 		}
 		dmaisr >>= 1;
 	}
 
-	return ret;
+	return IRQ_RETVAL(ret);
 }
 
 static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c
index a4c53be..624f1e1 100644
--- a/drivers/dma/img-mdc-dma.c
+++ b/drivers/dma/img-mdc-dma.c
@@ -861,7 +861,6 @@
 {
 	struct mdc_dma *mdma;
 	struct resource *res;
-	const struct of_device_id *match;
 	unsigned int i;
 	u32 val;
 	int ret;
@@ -871,8 +870,7 @@
 		return -ENOMEM;
 	platform_set_drvdata(pdev, mdma);
 
-	match = of_match_device(mdc_dma_of_match, &pdev->dev);
-	mdma->soc = match->data;
+	mdma->soc = of_device_get_match_data(&pdev->dev);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mdma->regs = devm_ioremap_resource(&pdev->dev, res);
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 03ec76f..3cb4738 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -648,15 +648,11 @@
 	writel_relaxed(val, sdma->regs + chnenbl);
 }
 
-static void sdma_handle_channel_loop(struct sdma_channel *sdmac)
-{
-	if (sdmac->desc.callback)
-		sdmac->desc.callback(sdmac->desc.callback_param);
-}
-
 static void sdma_update_channel_loop(struct sdma_channel *sdmac)
 {
 	struct sdma_buffer_descriptor *bd;
+	int error = 0;
+	enum dma_status	old_status = sdmac->status;
 
 	/*
 	 * loop mode. Iterate over descriptors, re-setup them and
@@ -668,17 +664,42 @@
 		if (bd->mode.status & BD_DONE)
 			break;
 
-		if (bd->mode.status & BD_RROR)
+		if (bd->mode.status & BD_RROR) {
+			bd->mode.status &= ~BD_RROR;
 			sdmac->status = DMA_ERROR;
+			error = -EIO;
+		}
 
+	       /*
+		* We use bd->mode.count to calculate the residue, since contains
+		* the number of bytes present in the current buffer descriptor.
+		*/
+
+		sdmac->chn_real_count = bd->mode.count;
 		bd->mode.status |= BD_DONE;
+		bd->mode.count = sdmac->period_len;
+
+		/*
+		 * The callback is called from the interrupt context in order
+		 * to reduce latency and to avoid the risk of altering the
+		 * SDMA transaction status by the time the client tasklet is
+		 * executed.
+		 */
+
+		if (sdmac->desc.callback)
+			sdmac->desc.callback(sdmac->desc.callback_param);
+
 		sdmac->buf_tail++;
 		sdmac->buf_tail %= sdmac->num_bd;
+
+		if (error)
+			sdmac->status = old_status;
 	}
 }
 
-static void mxc_sdma_handle_channel_normal(struct sdma_channel *sdmac)
+static void mxc_sdma_handle_channel_normal(unsigned long data)
 {
+	struct sdma_channel *sdmac = (struct sdma_channel *) data;
 	struct sdma_buffer_descriptor *bd;
 	int i, error = 0;
 
@@ -705,16 +726,6 @@
 		sdmac->desc.callback(sdmac->desc.callback_param);
 }
 
-static void sdma_tasklet(unsigned long data)
-{
-	struct sdma_channel *sdmac = (struct sdma_channel *) data;
-
-	if (sdmac->flags & IMX_DMA_SG_LOOP)
-		sdma_handle_channel_loop(sdmac);
-	else
-		mxc_sdma_handle_channel_normal(sdmac);
-}
-
 static irqreturn_t sdma_int_handler(int irq, void *dev_id)
 {
 	struct sdma_engine *sdma = dev_id;
@@ -731,8 +742,8 @@
 
 		if (sdmac->flags & IMX_DMA_SG_LOOP)
 			sdma_update_channel_loop(sdmac);
-
-		tasklet_schedule(&sdmac->tasklet);
+		else
+			tasklet_schedule(&sdmac->tasklet);
 
 		__clear_bit(channel, &stat);
 	}
@@ -1353,7 +1364,8 @@
 	u32 residue;
 
 	if (sdmac->flags & IMX_DMA_SG_LOOP)
-		residue = (sdmac->num_bd - sdmac->buf_tail) * sdmac->period_len;
+		residue = (sdmac->num_bd - sdmac->buf_tail) *
+			   sdmac->period_len - sdmac->chn_real_count;
 	else
 		residue = sdmac->chn_count - sdmac->chn_real_count;
 
@@ -1732,7 +1744,7 @@
 		dma_cookie_init(&sdmac->chan);
 		sdmac->channel = i;
 
-		tasklet_init(&sdmac->tasklet, sdma_tasklet,
+		tasklet_init(&sdmac->tasklet, mxc_sdma_handle_channel_normal,
 			     (unsigned long) sdmac);
 		/*
 		 * Add the channel to the DMAC list. Do not add channel 0 though
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index dc7850a..3f56f9c 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -638,7 +638,7 @@
 		vd_last_issued = list_entry(vc->desc_issued.prev,
 					    struct virt_dma_desc, node);
 		pxad_desc_chain(vd_last_issued, vd);
-		if (is_chan_running(chan) || is_desc_completed(vd_last_issued))
+		if (is_chan_running(chan) || is_desc_completed(vd))
 			return true;
 	}
 
@@ -671,6 +671,7 @@
 	struct virt_dma_desc *vd, *tmp;
 	unsigned int dcsr;
 	unsigned long flags;
+	bool vd_completed;
 	dma_cookie_t last_started = 0;
 
 	BUG_ON(!chan);
@@ -681,15 +682,17 @@
 
 	spin_lock_irqsave(&chan->vc.lock, flags);
 	list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) {
+		vd_completed = is_desc_completed(vd);
 		dev_dbg(&chan->vc.chan.dev->device,
-			"%s(): checking txd %p[%x]: completed=%d\n",
-			__func__, vd, vd->tx.cookie, is_desc_completed(vd));
+			"%s(): checking txd %p[%x]: completed=%d dcsr=0x%x\n",
+			__func__, vd, vd->tx.cookie, vd_completed,
+			dcsr);
 		last_started = vd->tx.cookie;
 		if (to_pxad_sw_desc(vd)->cyclic) {
 			vchan_cyclic_callback(vd);
 			break;
 		}
-		if (is_desc_completed(vd)) {
+		if (vd_completed) {
 			list_del(&vd->node);
 			vchan_cookie_complete(vd);
 		} else {
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index 749f1bd..06ecdc3 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -600,27 +600,30 @@
 {
 	struct usb_dmac_chan *chan = dev;
 	irqreturn_t ret = IRQ_NONE;
-	u32 mask = USB_DMACHCR_TE;
-	u32 check_bits = USB_DMACHCR_TE | USB_DMACHCR_SP;
+	u32 mask = 0;
 	u32 chcr;
+	bool xfer_end = false;
 
 	spin_lock(&chan->vc.lock);
 
 	chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
-	if (chcr & check_bits)
-		mask |= USB_DMACHCR_DE | check_bits;
+	if (chcr & (USB_DMACHCR_TE | USB_DMACHCR_SP)) {
+		mask |= USB_DMACHCR_DE | USB_DMACHCR_TE | USB_DMACHCR_SP;
+		if (chcr & USB_DMACHCR_DE)
+			xfer_end = true;
+		ret |= IRQ_HANDLED;
+	}
 	if (chcr & USB_DMACHCR_NULL) {
 		/* An interruption of TE will happen after we set FTE */
 		mask |= USB_DMACHCR_NULL;
 		chcr |= USB_DMACHCR_FTE;
 		ret |= IRQ_HANDLED;
 	}
-	usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
+	if (mask)
+		usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
 
-	if (chcr & check_bits) {
+	if (xfer_end)
 		usb_dmac_isr_transfer_end(chan);
-		ret |= IRQ_HANDLED;
-	}
 
 	spin_unlock(&chan->vc.lock);
 
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index d0c1dab..82d85cce 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -251,6 +251,14 @@
 	  Support for error detection and correction the Intel
 	  Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers.
 
+config EDAC_SKX
+	tristate "Intel Skylake server Integrated MC"
+	depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL
+	depends on PCI_MMCONFIG
+	help
+	  Support for error detection and correction the Intel
+	  Skylake server Integrated Memory Controllers.
+
 config EDAC_MPC85XX
 	tristate "Freescale MPC83xx / MPC85xx"
 	depends on EDAC_MM_EDAC && FSL_SOC
@@ -258,6 +266,13 @@
 	  Support for error detection and correction on the Freescale
 	  MPC8349, MPC8560, MPC8540, MPC8548, T4240
 
+config EDAC_LAYERSCAPE
+	tristate "Freescale Layerscape DDR"
+	depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE
+	help
+	  Support for error detection and correction on Freescale memory
+	  controllers on Layerscape SoCs.
+
 config EDAC_MV64X60
 	tristate "Marvell MV64x60"
 	depends on EDAC_MM_EDAC && MV64X60
@@ -398,6 +413,41 @@
 	  Support for error detection and correction on the
 	  Altera Ethernet FIFO Memory for Altera SoCs.
 
+config EDAC_ALTERA_NAND
+	bool "Altera NAND FIFO ECC"
+	depends on EDAC_ALTERA=y && MTD_NAND_DENALI
+	help
+	  Support for error detection and correction on the
+	  Altera NAND FIFO Memory for Altera SoCs.
+
+config EDAC_ALTERA_DMA
+	bool "Altera DMA FIFO ECC"
+	depends on EDAC_ALTERA=y && PL330_DMA=y
+	help
+	  Support for error detection and correction on the
+	  Altera DMA FIFO Memory for Altera SoCs.
+
+config EDAC_ALTERA_USB
+	bool "Altera USB FIFO ECC"
+	depends on EDAC_ALTERA=y && USB_DWC2
+	help
+	  Support for error detection and correction on the
+	  Altera USB FIFO Memory for Altera SoCs.
+
+config EDAC_ALTERA_QSPI
+	bool "Altera QSPI FIFO ECC"
+	depends on EDAC_ALTERA=y && SPI_CADENCE_QUADSPI
+	help
+	  Support for error detection and correction on the
+	  Altera QSPI FIFO Memory for Altera SoCs.
+
+config EDAC_ALTERA_SDMMC
+	bool "Altera SDMMC FIFO ECC"
+	depends on EDAC_ALTERA=y && MMC_DW
+	help
+	  Support for error detection and correction on the
+	  Altera SDMMC FIFO Memory for Altera SoCs.
+
 config EDAC_SYNOPSYS
 	tristate "Synopsys DDR Memory Controller"
 	depends on EDAC_MM_EDAC && ARCH_ZYNQ
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index f9e4a3e..88e472e 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -31,6 +31,7 @@
 obj-$(CONFIG_EDAC_I7300)		+= i7300_edac.o
 obj-$(CONFIG_EDAC_I7CORE)		+= i7core_edac.o
 obj-$(CONFIG_EDAC_SBRIDGE)		+= sb_edac.o
+obj-$(CONFIG_EDAC_SKX)			+= skx_edac.o
 obj-$(CONFIG_EDAC_E7XXX)		+= e7xxx_edac.o
 obj-$(CONFIG_EDAC_E752X)		+= e752x_edac.o
 obj-$(CONFIG_EDAC_I82443BXGX)		+= i82443bxgx_edac.o
@@ -50,7 +51,13 @@
 obj-$(CONFIG_EDAC_AMD64)		+= amd64_edac_mod.o
 
 obj-$(CONFIG_EDAC_PASEMI)		+= pasemi_edac.o
-obj-$(CONFIG_EDAC_MPC85XX)		+= mpc85xx_edac.o
+
+mpc85xx_edac_mod-y			:= fsl_ddr_edac.o mpc85xx_edac.o
+obj-$(CONFIG_EDAC_MPC85XX)		+= mpc85xx_edac_mod.o
+
+layerscape_edac_mod-y			:= fsl_ddr_edac.o layerscape_edac.o
+obj-$(CONFIG_EDAC_LAYERSCAPE)		+= layerscape_edac_mod.o
+
 obj-$(CONFIG_EDAC_MV64X60)		+= mv64x60_edac.o
 obj-$(CONFIG_EDAC_CELL)			+= cell_edac.o
 obj-$(CONFIG_EDAC_PPC4XX)		+= ppc4xx_edac.o
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 2398d07..58d3e2b 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -203,7 +203,7 @@
 	if (!mci->debugfs)
 		return;
 
-	edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci,
+	edac_debugfs_create_file("altr_trigger", S_IWUSR, mci->debugfs, mci,
 				 &altr_sdr_mc_debug_inject_fops);
 }
 
@@ -680,7 +680,7 @@
 	if (!drvdata->debugfs_dir)
 		return;
 
-	if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR,
+	if (!edac_debugfs_create_file("altr_trigger", S_IWUSR,
 				      drvdata->debugfs_dir, edac_dci,
 				      priv->inject_fops))
 		debugfs_remove_recursive(drvdata->debugfs_dir);
@@ -1108,7 +1108,6 @@
 	.setup = altr_check_ecc_deps,
 	.ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR),
 	.ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR),
-	.dbgfs_name = "altr_ocram_trigger",
 	.alloc_mem = ocram_alloc_mem,
 	.free_mem = ocram_free_mem,
 	.ecc_enable_mask = ALTR_OCR_ECC_EN,
@@ -1125,7 +1124,6 @@
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
 	.irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM,
-	.dbgfs_name = "altr_ocram_trigger",
 	.ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL,
 	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
 	.ce_set_mask = ALTR_A10_ECC_TSERRA,
@@ -1228,7 +1226,6 @@
 	.setup = altr_l2_check_deps,
 	.ce_clear_mask = 0,
 	.ue_clear_mask = 0,
-	.dbgfs_name = "altr_l2_trigger",
 	.alloc_mem = l2_alloc_mem,
 	.free_mem = l2_free_mem,
 	.ecc_enable_mask = ALTR_L2_ECC_EN,
@@ -1244,7 +1241,6 @@
 	.ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR,
 	.ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR,
 	.irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2,
-	.dbgfs_name = "altr_l2_trigger",
 	.alloc_mem = l2_alloc_mem,
 	.free_mem = l2_free_mem,
 	.ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL,
@@ -1266,7 +1262,6 @@
 	.setup = altr_check_ecc_deps,
 	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
 	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
-	.dbgfs_name = "altr_trigger",
 	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
 	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
 	.ce_set_mask = ALTR_A10_ECC_TSERRA,
@@ -1285,6 +1280,292 @@
 
 #endif	/* CONFIG_EDAC_ALTERA_ETHERNET */
 
+/********************** NAND Device Functions **********************/
+
+#ifdef CONFIG_EDAC_ALTERA_NAND
+
+static const struct edac_device_prv_data a10_nandecc_data = {
+	.setup = altr_check_ecc_deps,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_TSERRA,
+	.ue_set_mask = ALTR_A10_ECC_TDERRA,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
+};
+
+static int __init socfpga_init_nand_ecc(void)
+{
+	return altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc");
+}
+
+early_initcall(socfpga_init_nand_ecc);
+
+#endif	/* CONFIG_EDAC_ALTERA_NAND */
+
+/********************** DMA Device Functions **********************/
+
+#ifdef CONFIG_EDAC_ALTERA_DMA
+
+static const struct edac_device_prv_data a10_dmaecc_data = {
+	.setup = altr_check_ecc_deps,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_TSERRA,
+	.ue_set_mask = ALTR_A10_ECC_TDERRA,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
+};
+
+static int __init socfpga_init_dma_ecc(void)
+{
+	return altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc");
+}
+
+early_initcall(socfpga_init_dma_ecc);
+
+#endif	/* CONFIG_EDAC_ALTERA_DMA */
+
+/********************** USB Device Functions **********************/
+
+#ifdef CONFIG_EDAC_ALTERA_USB
+
+static const struct edac_device_prv_data a10_usbecc_data = {
+	.setup = altr_check_ecc_deps,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_TSERRA,
+	.ue_set_mask = ALTR_A10_ECC_TDERRA,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
+};
+
+static int __init socfpga_init_usb_ecc(void)
+{
+	return altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc");
+}
+
+early_initcall(socfpga_init_usb_ecc);
+
+#endif	/* CONFIG_EDAC_ALTERA_USB */
+
+/********************** QSPI Device Functions **********************/
+
+#ifdef CONFIG_EDAC_ALTERA_QSPI
+
+static const struct edac_device_prv_data a10_qspiecc_data = {
+	.setup = altr_check_ecc_deps,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_TSERRA,
+	.ue_set_mask = ALTR_A10_ECC_TDERRA,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
+};
+
+static int __init socfpga_init_qspi_ecc(void)
+{
+	return altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc");
+}
+
+early_initcall(socfpga_init_qspi_ecc);
+
+#endif	/* CONFIG_EDAC_ALTERA_QSPI */
+
+/********************* SDMMC Device Functions **********************/
+
+#ifdef CONFIG_EDAC_ALTERA_SDMMC
+
+static const struct edac_device_prv_data a10_sdmmceccb_data;
+static int altr_portb_setup(struct altr_edac_device_dev *device)
+{
+	struct edac_device_ctl_info *dci;
+	struct altr_edac_device_dev *altdev;
+	char *ecc_name = "sdmmcb-ecc";
+	int edac_idx, rc;
+	struct device_node *np;
+	const struct edac_device_prv_data *prv = &a10_sdmmceccb_data;
+
+	rc = altr_check_ecc_deps(device);
+	if (rc)
+		return rc;
+
+	np = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc");
+	if (!np) {
+		edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n");
+		return -ENODEV;
+	}
+
+	/* Create the PortB EDAC device */
+	edac_idx = edac_device_alloc_index();
+	dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, 1,
+					 ecc_name, 1, 0, NULL, 0, edac_idx);
+	if (!dci) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "%s: Unable to allocate PortB EDAC device\n",
+			    ecc_name);
+		return -ENOMEM;
+	}
+
+	/* Initialize the PortB EDAC device structure from PortA structure */
+	altdev = dci->pvt_info;
+	*altdev = *device;
+
+	if (!devres_open_group(&altdev->ddev, altr_portb_setup, GFP_KERNEL))
+		return -ENOMEM;
+
+	/* Update PortB specific values */
+	altdev->edac_dev_name = ecc_name;
+	altdev->edac_idx = edac_idx;
+	altdev->edac_dev = dci;
+	altdev->data = prv;
+	dci->dev = &altdev->ddev;
+	dci->ctl_name = "Altera ECC Manager";
+	dci->mod_name = ecc_name;
+	dci->dev_name = ecc_name;
+
+	/* Update the IRQs for PortB */
+	altdev->sb_irq = irq_of_parse_and_map(np, 2);
+	if (!altdev->sb_irq) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "Error PortB SBIRQ alloc\n");
+		rc = -ENODEV;
+		goto err_release_group_1;
+	}
+	rc = devm_request_irq(&altdev->ddev, altdev->sb_irq,
+			      prv->ecc_irq_handler,
+			      IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+			      ecc_name, altdev);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "PortB SBERR IRQ error\n");
+		goto err_release_group_1;
+	}
+
+	altdev->db_irq = irq_of_parse_and_map(np, 3);
+	if (!altdev->db_irq) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "Error PortB DBIRQ alloc\n");
+		rc = -ENODEV;
+		goto err_release_group_1;
+	}
+	rc = devm_request_irq(&altdev->ddev, altdev->db_irq,
+			      prv->ecc_irq_handler,
+			      IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+			      ecc_name, altdev);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE, "PortB DBERR IRQ error\n");
+		goto err_release_group_1;
+	}
+
+	rc = edac_device_add_device(dci);
+	if (rc) {
+		edac_printk(KERN_ERR, EDAC_DEVICE,
+			    "edac_device_add_device portB failed\n");
+		rc = -ENOMEM;
+		goto err_release_group_1;
+	}
+	altr_create_edacdev_dbgfs(dci, prv);
+
+	list_add(&altdev->next, &altdev->edac->a10_ecc_devices);
+
+	devres_remove_group(&altdev->ddev, altr_portb_setup);
+
+	return 0;
+
+err_release_group_1:
+	edac_device_free_ctl_info(dci);
+	devres_release_group(&altdev->ddev, altr_portb_setup);
+	edac_printk(KERN_ERR, EDAC_DEVICE,
+		    "%s:Error setting up EDAC device: %d\n", ecc_name, rc);
+	return rc;
+}
+
+static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id)
+{
+	struct altr_edac_device_dev *ad = dev_id;
+	void __iomem  *base = ad->base;
+	const struct edac_device_prv_data *priv = ad->data;
+
+	if (irq == ad->sb_irq) {
+		writel(priv->ce_clear_mask,
+		       base + ALTR_A10_ECC_INTSTAT_OFST);
+		edac_device_handle_ce(ad->edac_dev, 0, 0, ad->edac_dev_name);
+		return IRQ_HANDLED;
+	} else if (irq == ad->db_irq) {
+		writel(priv->ue_clear_mask,
+		       base + ALTR_A10_ECC_INTSTAT_OFST);
+		edac_device_handle_ue(ad->edac_dev, 0, 0, ad->edac_dev_name);
+		return IRQ_HANDLED;
+	}
+
+	WARN_ONCE(1, "Unhandled IRQ%d on Port B.", irq);
+
+	return IRQ_NONE;
+}
+
+static const struct edac_device_prv_data a10_sdmmcecca_data = {
+	.setup = altr_portb_setup,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_SERRPENA,
+	.ue_set_mask = ALTR_A10_ECC_DERRPENA,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
+};
+
+static const struct edac_device_prv_data a10_sdmmceccb_data = {
+	.setup = altr_portb_setup,
+	.ce_clear_mask = ALTR_A10_ECC_SERRPENB,
+	.ue_clear_mask = ALTR_A10_ECC_DERRPENB,
+	.ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL,
+	.ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+	.ce_set_mask = ALTR_A10_ECC_TSERRB,
+	.ue_set_mask = ALTR_A10_ECC_TDERRB,
+	.set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+	.ecc_irq_handler = altr_edac_a10_ecc_irq_portb,
+	.inject_fops = &altr_edac_a10_device_inject_fops,
+};
+
+static int __init socfpga_init_sdmmc_ecc(void)
+{
+	int rc = -ENODEV;
+	struct device_node *child = of_find_compatible_node(NULL, NULL,
+						"altr,socfpga-sdmmc-ecc");
+	if (!child) {
+		edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n");
+		return -ENODEV;
+	}
+
+	if (!of_device_is_available(child))
+		goto exit;
+
+	if (validate_parent_available(child))
+		goto exit;
+
+	rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK,
+				     a10_sdmmcecca_data.ecc_enable_mask, 1);
+exit:
+	of_node_put(child);
+	return rc;
+}
+
+early_initcall(socfpga_init_sdmmc_ecc);
+
+#endif	/* CONFIG_EDAC_ALTERA_SDMMC */
+
 /********************* Arria10 EDAC Device Functions *************************/
 static const struct of_device_id altr_edac_a10_device_of_match[] = {
 #ifdef CONFIG_EDAC_ALTERA_L2C
@@ -1298,6 +1579,21 @@
 	{ .compatible = "altr,socfpga-eth-mac-ecc",
 	  .data = &a10_enetecc_data },
 #endif
+#ifdef CONFIG_EDAC_ALTERA_NAND
+	{ .compatible = "altr,socfpga-nand-ecc", .data = &a10_nandecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_DMA
+	{ .compatible = "altr,socfpga-dma-ecc", .data = &a10_dmaecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_USB
+	{ .compatible = "altr,socfpga-usb-ecc", .data = &a10_usbecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_QSPI
+	{ .compatible = "altr,socfpga-qspi-ecc", .data = &a10_qspiecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_SDMMC
+	{ .compatible = "altr,socfpga-sdmmc-ecc", .data = &a10_sdmmcecca_data },
+#endif
 	{},
 };
 MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match);
@@ -1451,11 +1747,11 @@
 		rc = -ENODEV;
 		goto err_release_group1;
 	}
-	rc = devm_request_irq(edac->dev, altdev->sb_irq,
-			      prv->ecc_irq_handler,
-			      IRQF_SHARED, ecc_name, altdev);
+	rc = devm_request_irq(edac->dev, altdev->sb_irq, prv->ecc_irq_handler,
+			      IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+			      ecc_name, altdev);
 	if (rc) {
-		edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
+		edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n");
 		goto err_release_group1;
 	}
 
@@ -1465,9 +1761,9 @@
 		rc = -ENODEV;
 		goto err_release_group1;
 	}
-	rc = devm_request_irq(edac->dev, altdev->db_irq,
-			      prv->ecc_irq_handler,
-			      IRQF_SHARED, ecc_name, altdev);
+	rc = devm_request_irq(edac->dev, altdev->db_irq, prv->ecc_irq_handler,
+			      IRQF_ONESHOT | IRQF_TRIGGER_HIGH,
+			      ecc_name, altdev);
 	if (rc) {
 		edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
 		goto err_release_group1;
@@ -1526,7 +1822,7 @@
 	return 0;
 }
 
-struct irq_domain_ops a10_eccmgr_ic_ops = {
+static struct irq_domain_ops a10_eccmgr_ic_ops = {
 	.map = a10_eccmgr_irqdomain_map,
 	.xlate = irq_domain_xlate_twocell,
 };
@@ -1584,15 +1880,19 @@
 	for_each_child_of_node(pdev->dev.of_node, child) {
 		if (!of_device_is_available(child))
 			continue;
-		if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc"))
+
+		if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc") || 
+		    of_device_is_compatible(child, "altr,socfpga-a10-ocram-ecc") ||
+		    of_device_is_compatible(child, "altr,socfpga-eth-mac-ecc") ||
+		    of_device_is_compatible(child, "altr,socfpga-nand-ecc") ||
+		    of_device_is_compatible(child, "altr,socfpga-dma-ecc") ||
+		    of_device_is_compatible(child, "altr,socfpga-usb-ecc") ||
+		    of_device_is_compatible(child, "altr,socfpga-qspi-ecc") ||
+		    of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc"))
+
 			altr_edac_a10_device_add(edac, child);
-		else if ((of_device_is_compatible(child,
-					"altr,socfpga-a10-ocram-ecc")) ||
-			 (of_device_is_compatible(child,
-					"altr,socfpga-eth-mac-ecc")))
-			altr_edac_a10_device_add(edac, child);
-		else if (of_device_is_compatible(child,
-						 "altr,sdram-edac-a10"))
+
+		else if (of_device_is_compatible(child, "altr,sdram-edac-a10"))
 			of_platform_populate(pdev->dev.of_node,
 					     altr_sdram_ctrl_of_match,
 					     NULL, &pdev->dev);
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 687d8e7..cbc9629 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -250,6 +250,8 @@
 #define ALTR_A10_ECC_INTTEST_OFST       0x24
 #define ALTR_A10_ECC_TSERRA             BIT(0)
 #define ALTR_A10_ECC_TDERRA             BIT(8)
+#define ALTR_A10_ECC_TSERRB             BIT(16)
+#define ALTR_A10_ECC_TDERRB             BIT(24)
 
 /* ECC Manager Defines */
 #define A10_SYSMGR_ECC_INTMASK_SET_OFST   0x94
@@ -288,6 +290,9 @@
 /* Arria 10 Ethernet ECC Management Group Defines */
 #define ALTR_A10_COMMON_ECC_EN_CTL      BIT(0)
 
+/* Arria 10 SDMMC ECC Management Group Defines */
+#define ALTR_A10_SDMMC_IRQ_MASK         (BIT(16) | BIT(15))
+
 /* A10 ECC Controller memory initialization timeout */
 #define ALTR_A10_ECC_INIT_WATCHDOG_10US      10000
 
@@ -298,7 +303,6 @@
 	int ce_clear_mask;
 	int ue_clear_mask;
 	int irq_status_mask;
-	char dbgfs_name[20];
 	void * (*alloc_mem)(size_t size, void **other);
 	void (*free_mem)(void *p, size_t size, void *other);
 	int ecc_enable_mask;
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 8c0ec21..ee181c5 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -1425,11 +1425,17 @@
 
 		if (intlv_addr & 0x2) {
 			u8 shift = intlv_addr & 0x1 ? 9 : 6;
-			u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2;
+			u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) & 1;
 
 			return ((sys_addr >> shift) & 1) ^ temp;
 		}
 
+		if (intlv_addr & 0x4) {
+			u8 shift = intlv_addr & 0x1 ? 9 : 8;
+
+			return (sys_addr >> shift) & 1;
+		}
+
 		return (sys_addr >> (12 + hweight8(intlv_en))) & 1;
 	}
 
@@ -1726,8 +1732,11 @@
 	if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4))
 		return -EINVAL;
 
-	channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en,
-					     num_dcts_intlv, dct_sel);
+	if (pvt->model >= 0x60)
+		channel = f1x_determine_channel(pvt, sys_addr, false, intlv_en);
+	else
+		channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en,
+						     num_dcts_intlv, dct_sel);
 
 	/* Verify we stay within the MAX number of channels allowed */
 	if (channel > 3)
@@ -2961,6 +2970,15 @@
 	}
 }
 
+static const struct x86_cpu_id amd64_cpuids[] = {
+	{ X86_VENDOR_AMD, 0xF,	X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
+	{ X86_VENDOR_AMD, 0x10, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
+	{ X86_VENDOR_AMD, 0x15, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
+	{ X86_VENDOR_AMD, 0x16, X86_MODEL_ANY,	X86_FEATURE_ANY, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
+
 static int __init amd64_edac_init(void)
 {
 	int err = -ENODEV;
diff --git a/drivers/edac/fsl_ddr_edac.c b/drivers/edac/fsl_ddr_edac.c
new file mode 100644
index 0000000..9774f52
--- /dev/null
+++ b/drivers/edac/fsl_ddr_edac.c
@@ -0,0 +1,633 @@
+/*
+ * Freescale Memory Controller kernel module
+ *
+ * Support Power-based SoCs including MPC85xx, MPC86xx, MPC83xx and
+ * ARM-based Layerscape SoCs including LS2xxx. Originally split
+ * out from mpc85xx_edac EDAC driver.
+ *
+ * Parts Copyrighted (c) 2013 by Freescale Semiconductor, Inc.
+ *
+ * Author: Dave Jiang <djiang@mvista.com>
+ *
+ * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/ctype.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
+#include <linux/edac.h>
+#include <linux/smp.h>
+#include <linux/gfp.h>
+
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include "edac_module.h"
+#include "edac_core.h"
+#include "fsl_ddr_edac.h"
+
+#define EDAC_MOD_STR	"fsl_ddr_edac"
+
+static int edac_mc_idx;
+
+static u32 orig_ddr_err_disable;
+static u32 orig_ddr_err_sbe;
+static bool little_endian;
+
+static inline u32 ddr_in32(void __iomem *addr)
+{
+	return little_endian ? ioread32(addr) : ioread32be(addr);
+}
+
+static inline void ddr_out32(void __iomem *addr, u32 value)
+{
+	if (little_endian)
+		iowrite32(value, addr);
+	else
+		iowrite32be(value, addr);
+}
+
+/************************ MC SYSFS parts ***********************************/
+
+#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
+
+static ssize_t fsl_mc_inject_data_hi_show(struct device *dev,
+					  struct device_attribute *mattr,
+					  char *data)
+{
+	struct mem_ctl_info *mci = to_mci(dev);
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	return sprintf(data, "0x%08x",
+		       ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI));
+}
+
+static ssize_t fsl_mc_inject_data_lo_show(struct device *dev,
+					  struct device_attribute *mattr,
+					      char *data)
+{
+	struct mem_ctl_info *mci = to_mci(dev);
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	return sprintf(data, "0x%08x",
+		       ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO));
+}
+
+static ssize_t fsl_mc_inject_ctrl_show(struct device *dev,
+				       struct device_attribute *mattr,
+					   char *data)
+{
+	struct mem_ctl_info *mci = to_mci(dev);
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	return sprintf(data, "0x%08x",
+		       ddr_in32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT));
+}
+
+static ssize_t fsl_mc_inject_data_hi_store(struct device *dev,
+					   struct device_attribute *mattr,
+					       const char *data, size_t count)
+{
+	struct mem_ctl_info *mci = to_mci(dev);
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	unsigned long val;
+	int rc;
+
+	if (isdigit(*data)) {
+		rc = kstrtoul(data, 0, &val);
+		if (rc)
+			return rc;
+
+		ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI, val);
+		return count;
+	}
+	return 0;
+}
+
+static ssize_t fsl_mc_inject_data_lo_store(struct device *dev,
+					   struct device_attribute *mattr,
+					       const char *data, size_t count)
+{
+	struct mem_ctl_info *mci = to_mci(dev);
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	unsigned long val;
+	int rc;
+
+	if (isdigit(*data)) {
+		rc = kstrtoul(data, 0, &val);
+		if (rc)
+			return rc;
+
+		ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO, val);
+		return count;
+	}
+	return 0;
+}
+
+static ssize_t fsl_mc_inject_ctrl_store(struct device *dev,
+					struct device_attribute *mattr,
+					       const char *data, size_t count)
+{
+	struct mem_ctl_info *mci = to_mci(dev);
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	unsigned long val;
+	int rc;
+
+	if (isdigit(*data)) {
+		rc = kstrtoul(data, 0, &val);
+		if (rc)
+			return rc;
+
+		ddr_out32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT, val);
+		return count;
+	}
+	return 0;
+}
+
+DEVICE_ATTR(inject_data_hi, S_IRUGO | S_IWUSR,
+	    fsl_mc_inject_data_hi_show, fsl_mc_inject_data_hi_store);
+DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR,
+	    fsl_mc_inject_data_lo_show, fsl_mc_inject_data_lo_store);
+DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR,
+	    fsl_mc_inject_ctrl_show, fsl_mc_inject_ctrl_store);
+
+static struct attribute *fsl_ddr_dev_attrs[] = {
+	&dev_attr_inject_data_hi.attr,
+	&dev_attr_inject_data_lo.attr,
+	&dev_attr_inject_ctrl.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(fsl_ddr_dev);
+
+/**************************** MC Err device ***************************/
+
+/*
+ * Taken from table 8-55 in the MPC8641 User's Manual and/or 9-61 in the
+ * MPC8572 User's Manual.  Each line represents a syndrome bit column as a
+ * 64-bit value, but split into an upper and lower 32-bit chunk.  The labels
+ * below correspond to Freescale's manuals.
+ */
+static unsigned int ecc_table[16] = {
+	/* MSB           LSB */
+	/* [0:31]    [32:63] */
+	0xf00fe11e, 0xc33c0ff7,	/* Syndrome bit 7 */
+	0x00ff00ff, 0x00fff0ff,
+	0x0f0f0f0f, 0x0f0fff00,
+	0x11113333, 0x7777000f,
+	0x22224444, 0x8888222f,
+	0x44448888, 0xffff4441,
+	0x8888ffff, 0x11118882,
+	0xffff1111, 0x22221114,	/* Syndrome bit 0 */
+};
+
+/*
+ * Calculate the correct ECC value for a 64-bit value specified by high:low
+ */
+static u8 calculate_ecc(u32 high, u32 low)
+{
+	u32 mask_low;
+	u32 mask_high;
+	int bit_cnt;
+	u8 ecc = 0;
+	int i;
+	int j;
+
+	for (i = 0; i < 8; i++) {
+		mask_high = ecc_table[i * 2];
+		mask_low = ecc_table[i * 2 + 1];
+		bit_cnt = 0;
+
+		for (j = 0; j < 32; j++) {
+			if ((mask_high >> j) & 1)
+				bit_cnt ^= (high >> j) & 1;
+			if ((mask_low >> j) & 1)
+				bit_cnt ^= (low >> j) & 1;
+		}
+
+		ecc |= bit_cnt << i;
+	}
+
+	return ecc;
+}
+
+/*
+ * Create the syndrome code which is generated if the data line specified by
+ * 'bit' failed.  Eg generate an 8-bit codes seen in Table 8-55 in the MPC8641
+ * User's Manual and 9-61 in the MPC8572 User's Manual.
+ */
+static u8 syndrome_from_bit(unsigned int bit) {
+	int i;
+	u8 syndrome = 0;
+
+	/*
+	 * Cycle through the upper or lower 32-bit portion of each value in
+	 * ecc_table depending on if 'bit' is in the upper or lower half of
+	 * 64-bit data.
+	 */
+	for (i = bit < 32; i < 16; i += 2)
+		syndrome |= ((ecc_table[i] >> (bit % 32)) & 1) << (i / 2);
+
+	return syndrome;
+}
+
+/*
+ * Decode data and ecc syndrome to determine what went wrong
+ * Note: This can only decode single-bit errors
+ */
+static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc,
+		       int *bad_data_bit, int *bad_ecc_bit)
+{
+	int i;
+	u8 syndrome;
+
+	*bad_data_bit = -1;
+	*bad_ecc_bit = -1;
+
+	/*
+	 * Calculate the ECC of the captured data and XOR it with the captured
+	 * ECC to find an ECC syndrome value we can search for
+	 */
+	syndrome = calculate_ecc(cap_high, cap_low) ^ cap_ecc;
+
+	/* Check if a data line is stuck... */
+	for (i = 0; i < 64; i++) {
+		if (syndrome == syndrome_from_bit(i)) {
+			*bad_data_bit = i;
+			return;
+		}
+	}
+
+	/* If data is correct, check ECC bits for errors... */
+	for (i = 0; i < 8; i++) {
+		if ((syndrome >> i) & 0x1) {
+			*bad_ecc_bit = i;
+			return;
+		}
+	}
+}
+
+#define make64(high, low) (((u64)(high) << 32) | (low))
+
+static void fsl_mc_check(struct mem_ctl_info *mci)
+{
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	struct csrow_info *csrow;
+	u32 bus_width;
+	u32 err_detect;
+	u32 syndrome;
+	u64 err_addr;
+	u32 pfn;
+	int row_index;
+	u32 cap_high;
+	u32 cap_low;
+	int bad_data_bit;
+	int bad_ecc_bit;
+
+	err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT);
+	if (!err_detect)
+		return;
+
+	fsl_mc_printk(mci, KERN_ERR, "Err Detect Register: %#8.8x\n",
+		      err_detect);
+
+	/* no more processing if not ECC bit errors */
+	if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) {
+		ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect);
+		return;
+	}
+
+	syndrome = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ECC);
+
+	/* Mask off appropriate bits of syndrome based on bus width */
+	bus_width = (ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG) &
+		     DSC_DBW_MASK) ? 32 : 64;
+	if (bus_width == 64)
+		syndrome &= 0xff;
+	else
+		syndrome &= 0xffff;
+
+	err_addr = make64(
+		ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_EXT_ADDRESS),
+		ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ADDRESS));
+	pfn = err_addr >> PAGE_SHIFT;
+
+	for (row_index = 0; row_index < mci->nr_csrows; row_index++) {
+		csrow = mci->csrows[row_index];
+		if ((pfn >= csrow->first_page) && (pfn <= csrow->last_page))
+			break;
+	}
+
+	cap_high = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_HI);
+	cap_low = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_LO);
+
+	/*
+	 * Analyze single-bit errors on 64-bit wide buses
+	 * TODO: Add support for 32-bit wide buses
+	 */
+	if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) {
+		sbe_ecc_decode(cap_high, cap_low, syndrome,
+				&bad_data_bit, &bad_ecc_bit);
+
+		if (bad_data_bit != -1)
+			fsl_mc_printk(mci, KERN_ERR,
+				"Faulty Data bit: %d\n", bad_data_bit);
+		if (bad_ecc_bit != -1)
+			fsl_mc_printk(mci, KERN_ERR,
+				"Faulty ECC bit: %d\n", bad_ecc_bit);
+
+		fsl_mc_printk(mci, KERN_ERR,
+			"Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n",
+			cap_high ^ (1 << (bad_data_bit - 32)),
+			cap_low ^ (1 << bad_data_bit),
+			syndrome ^ (1 << bad_ecc_bit));
+	}
+
+	fsl_mc_printk(mci, KERN_ERR,
+			"Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n",
+			cap_high, cap_low, syndrome);
+	fsl_mc_printk(mci, KERN_ERR, "Err addr: %#8.8llx\n", err_addr);
+	fsl_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn);
+
+	/* we are out of range */
+	if (row_index == mci->nr_csrows)
+		fsl_mc_printk(mci, KERN_ERR, "PFN out of range!\n");
+
+	if (err_detect & DDR_EDE_SBE)
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
+				     pfn, err_addr & ~PAGE_MASK, syndrome,
+				     row_index, 0, -1,
+				     mci->ctl_name, "");
+
+	if (err_detect & DDR_EDE_MBE)
+		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
+				     pfn, err_addr & ~PAGE_MASK, syndrome,
+				     row_index, 0, -1,
+				     mci->ctl_name, "");
+
+	ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect);
+}
+
+static irqreturn_t fsl_mc_isr(int irq, void *dev_id)
+{
+	struct mem_ctl_info *mci = dev_id;
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	u32 err_detect;
+
+	err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT);
+	if (!err_detect)
+		return IRQ_NONE;
+
+	fsl_mc_check(mci);
+
+	return IRQ_HANDLED;
+}
+
+static void fsl_ddr_init_csrows(struct mem_ctl_info *mci)
+{
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+	struct csrow_info *csrow;
+	struct dimm_info *dimm;
+	u32 sdram_ctl;
+	u32 sdtype;
+	enum mem_type mtype;
+	u32 cs_bnds;
+	int index;
+
+	sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG);
+
+	sdtype = sdram_ctl & DSC_SDTYPE_MASK;
+	if (sdram_ctl & DSC_RD_EN) {
+		switch (sdtype) {
+		case 0x02000000:
+			mtype = MEM_RDDR;
+			break;
+		case 0x03000000:
+			mtype = MEM_RDDR2;
+			break;
+		case 0x07000000:
+			mtype = MEM_RDDR3;
+			break;
+		case 0x05000000:
+			mtype = MEM_RDDR4;
+			break;
+		default:
+			mtype = MEM_UNKNOWN;
+			break;
+		}
+	} else {
+		switch (sdtype) {
+		case 0x02000000:
+			mtype = MEM_DDR;
+			break;
+		case 0x03000000:
+			mtype = MEM_DDR2;
+			break;
+		case 0x07000000:
+			mtype = MEM_DDR3;
+			break;
+		case 0x05000000:
+			mtype = MEM_DDR4;
+			break;
+		default:
+			mtype = MEM_UNKNOWN;
+			break;
+		}
+	}
+
+	for (index = 0; index < mci->nr_csrows; index++) {
+		u32 start;
+		u32 end;
+
+		csrow = mci->csrows[index];
+		dimm = csrow->channels[0]->dimm;
+
+		cs_bnds = ddr_in32(pdata->mc_vbase + FSL_MC_CS_BNDS_0 +
+				   (index * FSL_MC_CS_BNDS_OFS));
+
+		start = (cs_bnds & 0xffff0000) >> 16;
+		end   = (cs_bnds & 0x0000ffff);
+
+		if (start == end)
+			continue;	/* not populated */
+
+		start <<= (24 - PAGE_SHIFT);
+		end   <<= (24 - PAGE_SHIFT);
+		end    |= (1 << (24 - PAGE_SHIFT)) - 1;
+
+		csrow->first_page = start;
+		csrow->last_page = end;
+
+		dimm->nr_pages = end + 1 - start;
+		dimm->grain = 8;
+		dimm->mtype = mtype;
+		dimm->dtype = DEV_UNKNOWN;
+		if (sdram_ctl & DSC_X32_EN)
+			dimm->dtype = DEV_X32;
+		dimm->edac_mode = EDAC_SECDED;
+	}
+}
+
+int fsl_mc_err_probe(struct platform_device *op)
+{
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[2];
+	struct fsl_mc_pdata *pdata;
+	struct resource r;
+	u32 sdram_ctl;
+	int res;
+
+	if (!devres_open_group(&op->dev, fsl_mc_err_probe, GFP_KERNEL))
+		return -ENOMEM;
+
+	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
+	layers[0].size = 4;
+	layers[0].is_virt_csrow = true;
+	layers[1].type = EDAC_MC_LAYER_CHANNEL;
+	layers[1].size = 1;
+	layers[1].is_virt_csrow = false;
+	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
+			    sizeof(*pdata));
+	if (!mci) {
+		devres_release_group(&op->dev, fsl_mc_err_probe);
+		return -ENOMEM;
+	}
+
+	pdata = mci->pvt_info;
+	pdata->name = "fsl_mc_err";
+	mci->pdev = &op->dev;
+	pdata->edac_idx = edac_mc_idx++;
+	dev_set_drvdata(mci->pdev, mci);
+	mci->ctl_name = pdata->name;
+	mci->dev_name = pdata->name;
+
+	/*
+	 * Get the endianness of DDR controller registers.
+	 * Default is big endian.
+	 */
+	little_endian = of_property_read_bool(op->dev.of_node, "little-endian");
+
+	res = of_address_to_resource(op->dev.of_node, 0, &r);
+	if (res) {
+		pr_err("%s: Unable to get resource for MC err regs\n",
+		       __func__);
+		goto err;
+	}
+
+	if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r),
+				     pdata->name)) {
+		pr_err("%s: Error while requesting mem region\n",
+		       __func__);
+		res = -EBUSY;
+		goto err;
+	}
+
+	pdata->mc_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r));
+	if (!pdata->mc_vbase) {
+		pr_err("%s: Unable to setup MC err regs\n", __func__);
+		res = -ENOMEM;
+		goto err;
+	}
+
+	sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG);
+	if (!(sdram_ctl & DSC_ECC_EN)) {
+		/* no ECC */
+		pr_warn("%s: No ECC DIMMs discovered\n", __func__);
+		res = -ENODEV;
+		goto err;
+	}
+
+	edac_dbg(3, "init mci\n");
+	mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR |
+			 MEM_FLAG_DDR2 | MEM_FLAG_RDDR2 |
+			 MEM_FLAG_DDR3 | MEM_FLAG_RDDR3 |
+			 MEM_FLAG_DDR4 | MEM_FLAG_RDDR4;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
+	mci->edac_cap = EDAC_FLAG_SECDED;
+	mci->mod_name = EDAC_MOD_STR;
+
+	if (edac_op_state == EDAC_OPSTATE_POLL)
+		mci->edac_check = fsl_mc_check;
+
+	mci->ctl_page_to_phys = NULL;
+
+	mci->scrub_mode = SCRUB_SW_SRC;
+
+	fsl_ddr_init_csrows(mci);
+
+	/* store the original error disable bits */
+	orig_ddr_err_disable = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DISABLE);
+	ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, 0);
+
+	/* clear all error bits */
+	ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, ~0);
+
+	res = edac_mc_add_mc_with_groups(mci, fsl_ddr_dev_groups);
+	if (res) {
+		edac_dbg(3, "failed edac_mc_add_mc()\n");
+		goto err;
+	}
+
+	if (edac_op_state == EDAC_OPSTATE_INT) {
+		ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN,
+			  DDR_EIE_MBEE | DDR_EIE_SBEE);
+
+		/* store the original error management threshold */
+		orig_ddr_err_sbe = ddr_in32(pdata->mc_vbase +
+					    FSL_MC_ERR_SBE) & 0xff0000;
+
+		/* set threshold to 1 error per interrupt */
+		ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, 0x10000);
+
+		/* register interrupts */
+		pdata->irq = platform_get_irq(op, 0);
+		res = devm_request_irq(&op->dev, pdata->irq,
+				       fsl_mc_isr,
+				       IRQF_SHARED,
+				       "[EDAC] MC err", mci);
+		if (res < 0) {
+			pr_err("%s: Unable to request irq %d for FSL DDR DRAM ERR\n",
+			       __func__, pdata->irq);
+			res = -ENODEV;
+			goto err2;
+		}
+
+		pr_info(EDAC_MOD_STR " acquired irq %d for MC\n",
+		       pdata->irq);
+	}
+
+	devres_remove_group(&op->dev, fsl_mc_err_probe);
+	edac_dbg(3, "success\n");
+	pr_info(EDAC_MOD_STR " MC err registered\n");
+
+	return 0;
+
+err2:
+	edac_mc_del_mc(&op->dev);
+err:
+	devres_release_group(&op->dev, fsl_mc_err_probe);
+	edac_mc_free(mci);
+	return res;
+}
+
+int fsl_mc_err_remove(struct platform_device *op)
+{
+	struct mem_ctl_info *mci = dev_get_drvdata(&op->dev);
+	struct fsl_mc_pdata *pdata = mci->pvt_info;
+
+	edac_dbg(0, "\n");
+
+	if (edac_op_state == EDAC_OPSTATE_INT) {
+		ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, 0);
+	}
+
+	ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE,
+		  orig_ddr_err_disable);
+	ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, orig_ddr_err_sbe);
+
+	edac_mc_del_mc(&op->dev);
+	edac_mc_free(mci);
+	return 0;
+}
diff --git a/drivers/edac/fsl_ddr_edac.h b/drivers/edac/fsl_ddr_edac.h
new file mode 100644
index 0000000..4ccee29
--- /dev/null
+++ b/drivers/edac/fsl_ddr_edac.h
@@ -0,0 +1,79 @@
+/*
+ * Freescale Memory Controller kernel module
+ *
+ * Support  Power-based SoCs including MPC85xx, MPC86xx, MPC83xx and
+ * ARM-based Layerscape SoCs including LS2xxx. Originally split
+ * out from mpc85xx_edac EDAC driver.
+ *
+ * Author: Dave Jiang <djiang@mvista.com>
+ *
+ * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ *
+ */
+#ifndef _FSL_DDR_EDAC_H_
+#define _FSL_DDR_EDAC_H_
+
+#define fsl_mc_printk(mci, level, fmt, arg...) \
+	edac_mc_chipset_printk(mci, level, "FSL_DDR", fmt, ##arg)
+
+/*
+ * DRAM error defines
+ */
+
+/* DDR_SDRAM_CFG */
+#define FSL_MC_DDR_SDRAM_CFG	0x0110
+#define FSL_MC_CS_BNDS_0		0x0000
+#define FSL_MC_CS_BNDS_OFS		0x0008
+
+#define FSL_MC_DATA_ERR_INJECT_HI	0x0e00
+#define FSL_MC_DATA_ERR_INJECT_LO	0x0e04
+#define FSL_MC_ECC_ERR_INJECT	0x0e08
+#define FSL_MC_CAPTURE_DATA_HI	0x0e20
+#define FSL_MC_CAPTURE_DATA_LO	0x0e24
+#define FSL_MC_CAPTURE_ECC		0x0e28
+#define FSL_MC_ERR_DETECT		0x0e40
+#define FSL_MC_ERR_DISABLE		0x0e44
+#define FSL_MC_ERR_INT_EN		0x0e48
+#define FSL_MC_CAPTURE_ATRIBUTES	0x0e4c
+#define FSL_MC_CAPTURE_ADDRESS	0x0e50
+#define FSL_MC_CAPTURE_EXT_ADDRESS	0x0e54
+#define FSL_MC_ERR_SBE		0x0e58
+
+#define DSC_MEM_EN	0x80000000
+#define DSC_ECC_EN	0x20000000
+#define DSC_RD_EN	0x10000000
+#define DSC_DBW_MASK	0x00180000
+#define DSC_DBW_32	0x00080000
+#define DSC_DBW_64	0x00000000
+
+#define DSC_SDTYPE_MASK		0x07000000
+#define DSC_X32_EN	0x00000020
+
+/* Err_Int_En */
+#define DDR_EIE_MSEE	0x1	/* memory select */
+#define DDR_EIE_SBEE	0x4	/* single-bit ECC error */
+#define DDR_EIE_MBEE	0x8	/* multi-bit ECC error */
+
+/* Err_Detect */
+#define DDR_EDE_MSE		0x1	/* memory select */
+#define DDR_EDE_SBE		0x4	/* single-bit ECC error */
+#define DDR_EDE_MBE		0x8	/* multi-bit ECC error */
+#define DDR_EDE_MME		0x80000000	/* multiple memory errors */
+
+/* Err_Disable */
+#define DDR_EDI_MSED	0x1	/* memory select disable */
+#define	DDR_EDI_SBED	0x4	/* single-bit ECC error disable */
+#define	DDR_EDI_MBED	0x8	/* multi-bit ECC error disable */
+
+struct fsl_mc_pdata {
+	char *name;
+	int edac_idx;
+	void __iomem *mc_vbase;
+	int irq;
+};
+int fsl_mc_err_probe(struct platform_device *op);
+int fsl_mc_err_remove(struct platform_device *op);
+#endif
diff --git a/drivers/edac/layerscape_edac.c b/drivers/edac/layerscape_edac.c
new file mode 100644
index 0000000..6c59d89
--- /dev/null
+++ b/drivers/edac/layerscape_edac.c
@@ -0,0 +1,73 @@
+/*
+ * Freescale Memory Controller kernel module
+ *
+ * Author: York Sun <york.sun@nxp.com>
+ *
+ * Copyright 2016 NXP Semiconductor
+ *
+ * Derived from mpc85xx_edac.c
+ * Author: Dave Jiang <djiang@mvista.com>
+ *
+ * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under
+ * the terms of the GNU General Public License version 2. This program
+ * is licensed "as is" without any warranty of any kind, whether express
+ * or implied.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include "edac_core.h"
+#include "fsl_ddr_edac.h"
+
+static const struct of_device_id fsl_ddr_mc_err_of_match[] = {
+	{ .compatible = "fsl,qoriq-memory-controller", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, fsl_ddr_mc_err_of_match);
+
+static struct platform_driver fsl_ddr_mc_err_driver = {
+	.probe = fsl_mc_err_probe,
+	.remove = fsl_mc_err_remove,
+	.driver = {
+		.name = "fsl_ddr_mc_err",
+		.of_match_table = fsl_ddr_mc_err_of_match,
+	},
+};
+
+static int __init fsl_ddr_mc_init(void)
+{
+	int res;
+
+	/* make sure error reporting method is sane */
+	switch (edac_op_state) {
+	case EDAC_OPSTATE_POLL:
+	case EDAC_OPSTATE_INT:
+		break;
+	default:
+		edac_op_state = EDAC_OPSTATE_INT;
+		break;
+	}
+
+	res = platform_driver_register(&fsl_ddr_mc_err_driver);
+	if (res) {
+		pr_err("MC fails to register\n");
+		return res;
+	}
+
+	return 0;
+}
+
+module_init(fsl_ddr_mc_init);
+
+static void __exit fsl_ddr_mc_exit(void)
+{
+	platform_driver_unregister(&fsl_ddr_mc_err_driver);
+}
+
+module_exit(fsl_ddr_mc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("NXP Semiconductor");
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state,
+		 "EDAC Error Reporting state: 0=Poll, 2=Interrupt");
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 9b6800a..daaac2c 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -148,12 +148,12 @@
 };
 
 /* Scalable MCA error strings */
-static const char * const f17h_ls_mce_desc[] = {
+static const char * const smca_ls_mce_desc[] = {
 	"Load queue parity",
 	"Store queue parity",
 	"Miss address buffer payload parity",
 	"L1 TLB parity",
-	"",						/* reserved */
+	"Reserved",
 	"DC tag error type 6",
 	"DC tag error type 1",
 	"Internal error type 1",
@@ -172,7 +172,7 @@
 	"L2 fill data error",
 };
 
-static const char * const f17h_if_mce_desc[] = {
+static const char * const smca_if_mce_desc[] = {
 	"microtag probe port parity error",
 	"IC microtag or full tag multi-hit error",
 	"IC full tag parity",
@@ -185,19 +185,22 @@
 	"BPQ snoop parity on Thread 1",
 	"L1 BTB multi-match error",
 	"L2 BTB multi-match error",
+	"L2 Cache Response Poison error",
+	"System Read Data error",
 };
 
-static const char * const f17h_l2_mce_desc[] = {
+static const char * const smca_l2_mce_desc[] = {
 	"L2M tag multi-way-hit error",
 	"L2M tag ECC error",
 	"L2M data ECC error",
 	"HW assert",
 };
 
-static const char * const f17h_de_mce_desc[] = {
+static const char * const smca_de_mce_desc[] = {
 	"uop cache tag parity error",
 	"uop cache data parity error",
 	"Insn buffer parity error",
+	"uop queue parity error",
 	"Insn dispatch queue parity error",
 	"Fetch address FIFO parity",
 	"Patch RAM data parity",
@@ -205,7 +208,7 @@
 	"uop buffer parity"
 };
 
-static const char * const f17h_ex_mce_desc[] = {
+static const char * const smca_ex_mce_desc[] = {
 	"Watchdog timeout error",
 	"Phy register file parity",
 	"Flag register file parity",
@@ -214,18 +217,22 @@
 	"EX payload parity",
 	"Checkpoint queue parity",
 	"Retire dispatch queue parity",
+	"Retire status queue parity error",
+	"Scheduling queue parity error",
+	"Branch buffer queue parity error",
 };
 
-static const char * const f17h_fp_mce_desc[] = {
+static const char * const smca_fp_mce_desc[] = {
 	"Physical register file parity",
 	"Freelist parity error",
 	"Schedule queue parity",
 	"NSQ parity error",
 	"Retire queue parity",
 	"Status register file parity",
+	"Hardware assertion",
 };
 
-static const char * const f17h_l3_mce_desc[] = {
+static const char * const smca_l3_mce_desc[] = {
 	"Shadow tag macro ECC error",
 	"Shadow tag macro multi-way-hit error",
 	"L3M tag ECC error",
@@ -236,7 +243,7 @@
 	"L3 HW assert",
 };
 
-static const char * const f17h_cs_mce_desc[] = {
+static const char * const smca_cs_mce_desc[] = {
 	"Illegal request from transport layer",
 	"Address violation",
 	"Security violation",
@@ -248,14 +255,14 @@
 	"ECC error on probe filter access",
 };
 
-static const char * const f17h_pie_mce_desc[] = {
+static const char * const smca_pie_mce_desc[] = {
 	"HW assert",
 	"Internal PIE register security violation",
 	"Error on GMI link",
 	"Poison data written to internal PIE register",
 };
 
-static const char * const f17h_umc_mce_desc[] = {
+static const char * const smca_umc_mce_desc[] = {
 	"DRAM ECC error",
 	"Data poison error on DRAM",
 	"SDP parity error",
@@ -264,18 +271,39 @@
 	"Write data CRC error",
 };
 
-static const char * const f17h_pb_mce_desc[] = {
+static const char * const smca_pb_mce_desc[] = {
 	"Parameter Block RAM ECC error",
 };
 
-static const char * const f17h_psp_mce_desc[] = {
+static const char * const smca_psp_mce_desc[] = {
 	"PSP RAM ECC or parity error",
 };
 
-static const char * const f17h_smu_mce_desc[] = {
+static const char * const smca_smu_mce_desc[] = {
 	"SMU RAM ECC or parity error",
 };
 
+struct smca_mce_desc {
+	const char * const *descs;
+	unsigned int num_descs;
+};
+
+static struct smca_mce_desc smca_mce_descs[] = {
+	[SMCA_LS]	= { smca_ls_mce_desc,	ARRAY_SIZE(smca_ls_mce_desc)	},
+	[SMCA_IF]	= { smca_if_mce_desc,	ARRAY_SIZE(smca_if_mce_desc)	},
+	[SMCA_L2_CACHE]	= { smca_l2_mce_desc,	ARRAY_SIZE(smca_l2_mce_desc)	},
+	[SMCA_DE]	= { smca_de_mce_desc,	ARRAY_SIZE(smca_de_mce_desc)	},
+	[SMCA_EX]	= { smca_ex_mce_desc,	ARRAY_SIZE(smca_ex_mce_desc)	},
+	[SMCA_FP]	= { smca_fp_mce_desc,	ARRAY_SIZE(smca_fp_mce_desc)	},
+	[SMCA_L3_CACHE]	= { smca_l3_mce_desc,	ARRAY_SIZE(smca_l3_mce_desc)	},
+	[SMCA_CS]	= { smca_cs_mce_desc,	ARRAY_SIZE(smca_cs_mce_desc)	},
+	[SMCA_PIE]	= { smca_pie_mce_desc,	ARRAY_SIZE(smca_pie_mce_desc)	},
+	[SMCA_UMC]	= { smca_umc_mce_desc,	ARRAY_SIZE(smca_umc_mce_desc)	},
+	[SMCA_PB]	= { smca_pb_mce_desc,	ARRAY_SIZE(smca_pb_mce_desc)	},
+	[SMCA_PSP]	= { smca_psp_mce_desc,	ARRAY_SIZE(smca_psp_mce_desc)	},
+	[SMCA_SMU]	= { smca_smu_mce_desc,	ARRAY_SIZE(smca_smu_mce_desc)	},
+};
+
 static bool f12h_mc0_mce(u16 ec, u8 xec)
 {
 	bool ret = false;
@@ -820,175 +848,35 @@
 	pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
 }
 
-static void decode_f17h_core_errors(const char *ip_name, u8 xec,
-				   unsigned int mca_type)
-{
-	const char * const *error_desc_array;
-	size_t len;
-
-	pr_emerg(HW_ERR "%s Error: ", ip_name);
-
-	switch (mca_type) {
-	case SMCA_LS:
-		error_desc_array = f17h_ls_mce_desc;
-		len = ARRAY_SIZE(f17h_ls_mce_desc) - 1;
-
-		if (xec == 0x4) {
-			pr_cont("Unrecognized LS MCA error code.\n");
-			return;
-		}
-		break;
-
-	case SMCA_IF:
-		error_desc_array = f17h_if_mce_desc;
-		len = ARRAY_SIZE(f17h_if_mce_desc) - 1;
-		break;
-
-	case SMCA_L2_CACHE:
-		error_desc_array = f17h_l2_mce_desc;
-		len = ARRAY_SIZE(f17h_l2_mce_desc) - 1;
-		break;
-
-	case SMCA_DE:
-		error_desc_array = f17h_de_mce_desc;
-		len = ARRAY_SIZE(f17h_de_mce_desc) - 1;
-		break;
-
-	case SMCA_EX:
-		error_desc_array = f17h_ex_mce_desc;
-		len = ARRAY_SIZE(f17h_ex_mce_desc) - 1;
-		break;
-
-	case SMCA_FP:
-		error_desc_array = f17h_fp_mce_desc;
-		len = ARRAY_SIZE(f17h_fp_mce_desc) - 1;
-		break;
-
-	case SMCA_L3_CACHE:
-		error_desc_array = f17h_l3_mce_desc;
-		len = ARRAY_SIZE(f17h_l3_mce_desc) - 1;
-		break;
-
-	default:
-		pr_cont("Corrupted MCA core error info.\n");
-		return;
-	}
-
-	if (xec > len) {
-		pr_cont("Unrecognized %s MCA bank error code.\n",
-			 amd_core_mcablock_names[mca_type]);
-		return;
-	}
-
-	pr_cont("%s.\n", error_desc_array[xec]);
-}
-
-static void decode_df_errors(u8 xec, unsigned int mca_type)
-{
-	const char * const *error_desc_array;
-	size_t len;
-
-	pr_emerg(HW_ERR "Data Fabric Error: ");
-
-	switch (mca_type) {
-	case  SMCA_CS:
-		error_desc_array = f17h_cs_mce_desc;
-		len = ARRAY_SIZE(f17h_cs_mce_desc) - 1;
-		break;
-
-	case SMCA_PIE:
-		error_desc_array = f17h_pie_mce_desc;
-		len = ARRAY_SIZE(f17h_pie_mce_desc) - 1;
-		break;
-
-	default:
-		pr_cont("Corrupted MCA Data Fabric info.\n");
-		return;
-	}
-
-	if (xec > len) {
-		pr_cont("Unrecognized %s MCA bank error code.\n",
-			 amd_df_mcablock_names[mca_type]);
-		return;
-	}
-
-	pr_cont("%s.\n", error_desc_array[xec]);
-}
-
 /* Decode errors according to Scalable MCA specification */
 static void decode_smca_errors(struct mce *m)
 {
-	u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
-	unsigned int hwid, mca_type, i;
-	u8 xec = XEC(m->status, xec_mask);
-	const char * const *error_desc_array;
+	struct smca_hwid_mcatype *type;
+	unsigned int bank_type;
 	const char *ip_name;
-	u32 low, high;
-	size_t len;
+	u8 xec = XEC(m->status, xec_mask);
 
-	if (rdmsr_safe(addr, &low, &high)) {
-		pr_emerg("Invalid IP block specified, error information is unreliable.\n");
+	if (m->bank >= ARRAY_SIZE(smca_banks))
 		return;
-	}
 
-	hwid = high & MCI_IPID_HWID;
-	mca_type = (high & MCI_IPID_MCATYPE) >> 16;
+	if (boot_cpu_data.x86 >= 0x17 && m->bank == 4)
+		pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n");
 
-	pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
-
-	/*
-	 * Based on hwid and mca_type values, decode errors from respective IPs.
-	 * Note: mca_type values make sense only in the context of an hwid.
-	 */
-	for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
-		if (amd_hwids[i].hwid == hwid)
-			break;
-
-	switch (i) {
-	case SMCA_F17H_CORE:
-		ip_name = (mca_type == SMCA_L3_CACHE) ?
-			  "L3 Cache" : "F17h Core";
-		return decode_f17h_core_errors(ip_name, xec, mca_type);
-		break;
-
-	case SMCA_DF:
-		return decode_df_errors(xec, mca_type);
-		break;
-
-	case SMCA_UMC:
-		error_desc_array = f17h_umc_mce_desc;
-		len = ARRAY_SIZE(f17h_umc_mce_desc) - 1;
-		break;
-
-	case SMCA_PB:
-		error_desc_array = f17h_pb_mce_desc;
-		len = ARRAY_SIZE(f17h_pb_mce_desc) - 1;
-		break;
-
-	case SMCA_PSP:
-		error_desc_array = f17h_psp_mce_desc;
-		len = ARRAY_SIZE(f17h_psp_mce_desc) - 1;
-		break;
-
-	case SMCA_SMU:
-		error_desc_array = f17h_smu_mce_desc;
-		len = ARRAY_SIZE(f17h_smu_mce_desc) - 1;
-		break;
-
-	default:
-		pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
+	type = smca_banks[m->bank].type;
+	if (!type)
 		return;
+
+	bank_type = type->bank_type;
+	ip_name = smca_bank_names[bank_type].long_name;
+
+	pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
+
+	/* Only print the decode of valid error codes */
+	if (xec < smca_mce_descs[bank_type].num_descs &&
+			(type->xec_bitmap & BIT_ULL(xec))) {
+		pr_emerg(HW_ERR "%s Error: ", ip_name);
+		pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
 	}
-
-	ip_name = amd_hwids[i].name;
-	pr_emerg(HW_ERR "%s Error: ", ip_name);
-
-	if (xec > len) {
-		pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
-		return;
-	}
-
-	pr_cont("%s.\n", error_desc_array[xec]);
 }
 
 static inline void amd_decode_err_code(u16 ec)
@@ -1078,6 +966,8 @@
 		u32 low, high;
 		u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
 
+		pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
+
 		if (!rdmsr_safe(addr, &low, &high) &&
 		    (low & MCI_CONFIG_MCAX))
 			pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
@@ -1091,12 +981,20 @@
 	pr_cont("]: 0x%016llx\n", m->status);
 
 	if (m->status & MCI_STATUS_ADDRV)
-		pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
+		pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr);
 
 	if (boot_cpu_has(X86_FEATURE_SMCA)) {
+		if (m->status & MCI_STATUS_SYNDV)
+			pr_cont(", Syndrome: 0x%016llx", m->synd);
+
+		pr_cont(", IPID: 0x%016llx", m->ipid);
+
+		pr_cont("\n");
+
 		decode_smca_errors(m);
 		goto err_code;
-	}
+	} else
+		pr_cont("\n");
 
 	if (!fam_ops)
 		goto err_code;
diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c
index ca63d0d..ff05675 100644
--- a/drivers/edac/mpc85xx_edac.c
+++ b/drivers/edac/mpc85xx_edac.c
@@ -27,15 +27,12 @@
 #include "edac_module.h"
 #include "edac_core.h"
 #include "mpc85xx_edac.h"
+#include "fsl_ddr_edac.h"
 
 static int edac_dev_idx;
 #ifdef CONFIG_PCI
 static int edac_pci_idx;
 #endif
-static int edac_mc_idx;
-
-static u32 orig_ddr_err_disable;
-static u32 orig_ddr_err_sbe;
 
 /*
  * PCI Err defines
@@ -46,103 +43,6 @@
 #endif
 
 static u32 orig_l2_err_disable;
-#ifdef CONFIG_FSL_SOC_BOOKE
-static u32 orig_hid1[2];
-#endif
-
-/************************ MC SYSFS parts ***********************************/
-
-#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
-
-static ssize_t mpc85xx_mc_inject_data_hi_show(struct device *dev,
-					      struct device_attribute *mattr,
-					      char *data)
-{
-	struct mem_ctl_info *mci = to_mci(dev);
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	return sprintf(data, "0x%08x",
-		       in_be32(pdata->mc_vbase +
-			       MPC85XX_MC_DATA_ERR_INJECT_HI));
-}
-
-static ssize_t mpc85xx_mc_inject_data_lo_show(struct device *dev,
-					      struct device_attribute *mattr,
-					      char *data)
-{
-	struct mem_ctl_info *mci = to_mci(dev);
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	return sprintf(data, "0x%08x",
-		       in_be32(pdata->mc_vbase +
-			       MPC85XX_MC_DATA_ERR_INJECT_LO));
-}
-
-static ssize_t mpc85xx_mc_inject_ctrl_show(struct device *dev,
-					   struct device_attribute *mattr,
-					   char *data)
-{
-	struct mem_ctl_info *mci = to_mci(dev);
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	return sprintf(data, "0x%08x",
-		       in_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT));
-}
-
-static ssize_t mpc85xx_mc_inject_data_hi_store(struct device *dev,
-					       struct device_attribute *mattr,
-					       const char *data, size_t count)
-{
-	struct mem_ctl_info *mci = to_mci(dev);
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	if (isdigit(*data)) {
-		out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_HI,
-			 simple_strtoul(data, NULL, 0));
-		return count;
-	}
-	return 0;
-}
-
-static ssize_t mpc85xx_mc_inject_data_lo_store(struct device *dev,
-					       struct device_attribute *mattr,
-					       const char *data, size_t count)
-{
-	struct mem_ctl_info *mci = to_mci(dev);
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	if (isdigit(*data)) {
-		out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_LO,
-			 simple_strtoul(data, NULL, 0));
-		return count;
-	}
-	return 0;
-}
-
-static ssize_t mpc85xx_mc_inject_ctrl_store(struct device *dev,
-					       struct device_attribute *mattr,
-					       const char *data, size_t count)
-{
-	struct mem_ctl_info *mci = to_mci(dev);
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	if (isdigit(*data)) {
-		out_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT,
-			 simple_strtoul(data, NULL, 0));
-		return count;
-	}
-	return 0;
-}
-
-DEVICE_ATTR(inject_data_hi, S_IRUGO | S_IWUSR,
-	    mpc85xx_mc_inject_data_hi_show, mpc85xx_mc_inject_data_hi_store);
-DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR,
-	    mpc85xx_mc_inject_data_lo_show, mpc85xx_mc_inject_data_lo_store);
-DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR,
-	    mpc85xx_mc_inject_ctrl_show, mpc85xx_mc_inject_ctrl_store);
-
-static struct attribute *mpc85xx_dev_attrs[] = {
-	&dev_attr_inject_data_hi.attr,
-	&dev_attr_inject_data_lo.attr,
-	&dev_attr_inject_ctrl.attr,
-	NULL
-};
-
-ATTRIBUTE_GROUPS(mpc85xx_dev);
 
 /**************************** PCI Err device ***************************/
 #ifdef CONFIG_PCI
@@ -160,18 +60,18 @@
 		return;
 	}
 
-	printk(KERN_ERR "PCI error(s) detected\n");
-	printk(KERN_ERR "PCI/X ERR_DR register: %#08x\n", err_detect);
+	pr_err("PCI error(s) detected\n");
+	pr_err("PCI/X ERR_DR register: %#08x\n", err_detect);
 
-	printk(KERN_ERR "PCI/X ERR_ATTRIB register: %#08x\n",
+	pr_err("PCI/X ERR_ATTRIB register: %#08x\n",
 	       in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ATTRIB));
-	printk(KERN_ERR "PCI/X ERR_ADDR register: %#08x\n",
+	pr_err("PCI/X ERR_ADDR register: %#08x\n",
 	       in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR));
-	printk(KERN_ERR "PCI/X ERR_EXT_ADDR register: %#08x\n",
+	pr_err("PCI/X ERR_EXT_ADDR register: %#08x\n",
 	       in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EXT_ADDR));
-	printk(KERN_ERR "PCI/X ERR_DL register: %#08x\n",
+	pr_err("PCI/X ERR_DL register: %#08x\n",
 	       in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DL));
-	printk(KERN_ERR "PCI/X ERR_DH register: %#08x\n",
+	pr_err("PCI/X ERR_DH register: %#08x\n",
 	       in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DH));
 
 	/* clear error bits */
@@ -187,14 +87,14 @@
 static void mpc85xx_pcie_check(struct edac_pci_ctl_info *pci)
 {
 	struct mpc85xx_pci_pdata *pdata = pci->pvt_info;
-	u32 err_detect;
+	u32 err_detect, err_cap_stat;
 
 	err_detect = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR);
+	err_cap_stat = in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR);
 
 	pr_err("PCIe error(s) detected\n");
 	pr_err("PCIe ERR_DR register: 0x%08x\n", err_detect);
-	pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n",
-			in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR));
+	pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n", err_cap_stat);
 	pr_err("PCIe ERR_CAP_R0 register: 0x%08x\n",
 			in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R0));
 	pr_err("PCIe ERR_CAP_R1 register: 0x%08x\n",
@@ -206,6 +106,9 @@
 
 	/* clear error bits */
 	out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, err_detect);
+
+	/* reset error capture */
+	out_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR, err_cap_stat | 0x1);
 }
 
 static int mpc85xx_pcie_find_capability(struct device_node *np)
@@ -267,7 +170,6 @@
 
 	pdata = pci->pvt_info;
 	pdata->name = "mpc85xx_pci_err";
-	pdata->irq = NO_IRQ;
 
 	plat_data = op->dev.platform_data;
 	if (!plat_data) {
@@ -297,8 +199,7 @@
 
 	res = of_address_to_resource(of_node, 0, &r);
 	if (res) {
-		printk(KERN_ERR "%s: Unable to get resource for "
-		       "PCI err regs\n", __func__);
+		pr_err("%s: Unable to get resource for PCI err regs\n", __func__);
 		goto err;
 	}
 
@@ -307,15 +208,14 @@
 
 	if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r),
 					pdata->name)) {
-		printk(KERN_ERR "%s: Error while requesting mem region\n",
-		       __func__);
+		pr_err("%s: Error while requesting mem region\n", __func__);
 		res = -EBUSY;
 		goto err;
 	}
 
 	pdata->pci_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r));
 	if (!pdata->pci_vbase) {
-		printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__);
+		pr_err("%s: Unable to setup PCI err regs\n", __func__);
 		res = -ENOMEM;
 		goto err;
 	}
@@ -344,6 +244,9 @@
 	/* clear error bits */
 	out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, ~0);
 
+	/* reset error capture */
+	out_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR, 0x1);
+
 	if (edac_pci_add_device(pci, pdata->edac_idx) > 0) {
 		edac_dbg(3, "failed edac_pci_add_device()\n");
 		goto err;
@@ -356,15 +259,14 @@
 				       IRQF_SHARED,
 				       "[EDAC] PCI err", pci);
 		if (res < 0) {
-			printk(KERN_ERR
-			       "%s: Unable to request irq %d for "
-			       "MPC85xx PCI err\n", __func__, pdata->irq);
+			pr_err("%s: Unable to request irq %d for MPC85xx PCI err\n",
+				__func__, pdata->irq);
 			irq_dispose_mapping(pdata->irq);
 			res = -ENODEV;
 			goto err2;
 		}
 
-		printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for PCI Err\n",
+		pr_info(EDAC_MOD_STR " acquired irq %d for PCI Err\n",
 		       pdata->irq);
 	}
 
@@ -386,7 +288,7 @@
 
 	devres_remove_group(&op->dev, mpc85xx_pci_err_probe);
 	edac_dbg(3, "success\n");
-	printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n");
+	pr_info(EDAC_MOD_STR " PCI err registered\n");
 
 	return 0;
 
@@ -529,17 +431,17 @@
 	if (!(err_detect & L2_EDE_MASK))
 		return;
 
-	printk(KERN_ERR "ECC Error in CPU L2 cache\n");
-	printk(KERN_ERR "L2 Error Detect Register: 0x%08x\n", err_detect);
-	printk(KERN_ERR "L2 Error Capture Data High Register: 0x%08x\n",
+	pr_err("ECC Error in CPU L2 cache\n");
+	pr_err("L2 Error Detect Register: 0x%08x\n", err_detect);
+	pr_err("L2 Error Capture Data High Register: 0x%08x\n",
 	       in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTDATAHI));
-	printk(KERN_ERR "L2 Error Capture Data Lo Register: 0x%08x\n",
+	pr_err("L2 Error Capture Data Lo Register: 0x%08x\n",
 	       in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTDATALO));
-	printk(KERN_ERR "L2 Error Syndrome Register: 0x%08x\n",
+	pr_err("L2 Error Syndrome Register: 0x%08x\n",
 	       in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTECC));
-	printk(KERN_ERR "L2 Error Attributes Capture Register: 0x%08x\n",
+	pr_err("L2 Error Attributes Capture Register: 0x%08x\n",
 	       in_be32(pdata->l2_vbase + MPC85XX_L2_ERRATTR));
-	printk(KERN_ERR "L2 Error Address Capture Register: 0x%08x\n",
+	pr_err("L2 Error Address Capture Register: 0x%08x\n",
 	       in_be32(pdata->l2_vbase + MPC85XX_L2_ERRADDR));
 
 	/* clear error detect register */
@@ -588,7 +490,6 @@
 
 	pdata = edac_dev->pvt_info;
 	pdata->name = "mpc85xx_l2_err";
-	pdata->irq = NO_IRQ;
 	edac_dev->dev = &op->dev;
 	dev_set_drvdata(edac_dev->dev, edac_dev);
 	edac_dev->ctl_name = pdata->name;
@@ -596,8 +497,7 @@
 
 	res = of_address_to_resource(op->dev.of_node, 0, &r);
 	if (res) {
-		printk(KERN_ERR "%s: Unable to get resource for "
-		       "L2 err regs\n", __func__);
+		pr_err("%s: Unable to get resource for L2 err regs\n", __func__);
 		goto err;
 	}
 
@@ -606,15 +506,14 @@
 
 	if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r),
 				     pdata->name)) {
-		printk(KERN_ERR "%s: Error while requesting mem region\n",
-		       __func__);
+		pr_err("%s: Error while requesting mem region\n", __func__);
 		res = -EBUSY;
 		goto err;
 	}
 
 	pdata->l2_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r));
 	if (!pdata->l2_vbase) {
-		printk(KERN_ERR "%s: Unable to setup L2 err regs\n", __func__);
+		pr_err("%s: Unable to setup L2 err regs\n", __func__);
 		res = -ENOMEM;
 		goto err;
 	}
@@ -646,16 +545,14 @@
 				       mpc85xx_l2_isr, IRQF_SHARED,
 				       "[EDAC] L2 err", edac_dev);
 		if (res < 0) {
-			printk(KERN_ERR
-			       "%s: Unable to request irq %d for "
-			       "MPC85xx L2 err\n", __func__, pdata->irq);
+			pr_err("%s: Unable to request irq %d for MPC85xx L2 err\n",
+				__func__, pdata->irq);
 			irq_dispose_mapping(pdata->irq);
 			res = -ENODEV;
 			goto err2;
 		}
 
-		printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for L2 Err\n",
-		       pdata->irq);
+		pr_info(EDAC_MOD_STR " acquired irq %d for L2 Err\n", pdata->irq);
 
 		edac_dev->op_state = OP_RUNNING_INTERRUPT;
 
@@ -665,7 +562,7 @@
 	devres_remove_group(&op->dev, mpc85xx_l2_err_probe);
 
 	edac_dbg(3, "success\n");
-	printk(KERN_INFO EDAC_MOD_STR " L2 err registered\n");
+	pr_info(EDAC_MOD_STR " L2 err registered\n");
 
 	return 0;
 
@@ -729,466 +626,6 @@
 	},
 };
 
-/**************************** MC Err device ***************************/
-
-/*
- * Taken from table 8-55 in the MPC8641 User's Manual and/or 9-61 in the
- * MPC8572 User's Manual.  Each line represents a syndrome bit column as a
- * 64-bit value, but split into an upper and lower 32-bit chunk.  The labels
- * below correspond to Freescale's manuals.
- */
-static unsigned int ecc_table[16] = {
-	/* MSB           LSB */
-	/* [0:31]    [32:63] */
-	0xf00fe11e, 0xc33c0ff7,	/* Syndrome bit 7 */
-	0x00ff00ff, 0x00fff0ff,
-	0x0f0f0f0f, 0x0f0fff00,
-	0x11113333, 0x7777000f,
-	0x22224444, 0x8888222f,
-	0x44448888, 0xffff4441,
-	0x8888ffff, 0x11118882,
-	0xffff1111, 0x22221114,	/* Syndrome bit 0 */
-};
-
-/*
- * Calculate the correct ECC value for a 64-bit value specified by high:low
- */
-static u8 calculate_ecc(u32 high, u32 low)
-{
-	u32 mask_low;
-	u32 mask_high;
-	int bit_cnt;
-	u8 ecc = 0;
-	int i;
-	int j;
-
-	for (i = 0; i < 8; i++) {
-		mask_high = ecc_table[i * 2];
-		mask_low = ecc_table[i * 2 + 1];
-		bit_cnt = 0;
-
-		for (j = 0; j < 32; j++) {
-			if ((mask_high >> j) & 1)
-				bit_cnt ^= (high >> j) & 1;
-			if ((mask_low >> j) & 1)
-				bit_cnt ^= (low >> j) & 1;
-		}
-
-		ecc |= bit_cnt << i;
-	}
-
-	return ecc;
-}
-
-/*
- * Create the syndrome code which is generated if the data line specified by
- * 'bit' failed.  Eg generate an 8-bit codes seen in Table 8-55 in the MPC8641
- * User's Manual and 9-61 in the MPC8572 User's Manual.
- */
-static u8 syndrome_from_bit(unsigned int bit) {
-	int i;
-	u8 syndrome = 0;
-
-	/*
-	 * Cycle through the upper or lower 32-bit portion of each value in
-	 * ecc_table depending on if 'bit' is in the upper or lower half of
-	 * 64-bit data.
-	 */
-	for (i = bit < 32; i < 16; i += 2)
-		syndrome |= ((ecc_table[i] >> (bit % 32)) & 1) << (i / 2);
-
-	return syndrome;
-}
-
-/*
- * Decode data and ecc syndrome to determine what went wrong
- * Note: This can only decode single-bit errors
- */
-static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc,
-		       int *bad_data_bit, int *bad_ecc_bit)
-{
-	int i;
-	u8 syndrome;
-
-	*bad_data_bit = -1;
-	*bad_ecc_bit = -1;
-
-	/*
-	 * Calculate the ECC of the captured data and XOR it with the captured
-	 * ECC to find an ECC syndrome value we can search for
-	 */
-	syndrome = calculate_ecc(cap_high, cap_low) ^ cap_ecc;
-
-	/* Check if a data line is stuck... */
-	for (i = 0; i < 64; i++) {
-		if (syndrome == syndrome_from_bit(i)) {
-			*bad_data_bit = i;
-			return;
-		}
-	}
-
-	/* If data is correct, check ECC bits for errors... */
-	for (i = 0; i < 8; i++) {
-		if ((syndrome >> i) & 0x1) {
-			*bad_ecc_bit = i;
-			return;
-		}
-	}
-}
-
-#define make64(high, low) (((u64)(high) << 32) | (low))
-
-static void mpc85xx_mc_check(struct mem_ctl_info *mci)
-{
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	struct csrow_info *csrow;
-	u32 bus_width;
-	u32 err_detect;
-	u32 syndrome;
-	u64 err_addr;
-	u32 pfn;
-	int row_index;
-	u32 cap_high;
-	u32 cap_low;
-	int bad_data_bit;
-	int bad_ecc_bit;
-
-	err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT);
-	if (!err_detect)
-		return;
-
-	mpc85xx_mc_printk(mci, KERN_ERR, "Err Detect Register: %#8.8x\n",
-			  err_detect);
-
-	/* no more processing if not ECC bit errors */
-	if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) {
-		out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect);
-		return;
-	}
-
-	syndrome = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ECC);
-
-	/* Mask off appropriate bits of syndrome based on bus width */
-	bus_width = (in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG) &
-			DSC_DBW_MASK) ? 32 : 64;
-	if (bus_width == 64)
-		syndrome &= 0xff;
-	else
-		syndrome &= 0xffff;
-
-	err_addr = make64(
-		in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_EXT_ADDRESS),
-		in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ADDRESS));
-	pfn = err_addr >> PAGE_SHIFT;
-
-	for (row_index = 0; row_index < mci->nr_csrows; row_index++) {
-		csrow = mci->csrows[row_index];
-		if ((pfn >= csrow->first_page) && (pfn <= csrow->last_page))
-			break;
-	}
-
-	cap_high = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_HI);
-	cap_low = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_LO);
-
-	/*
-	 * Analyze single-bit errors on 64-bit wide buses
-	 * TODO: Add support for 32-bit wide buses
-	 */
-	if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) {
-		sbe_ecc_decode(cap_high, cap_low, syndrome,
-				&bad_data_bit, &bad_ecc_bit);
-
-		if (bad_data_bit != -1)
-			mpc85xx_mc_printk(mci, KERN_ERR,
-				"Faulty Data bit: %d\n", bad_data_bit);
-		if (bad_ecc_bit != -1)
-			mpc85xx_mc_printk(mci, KERN_ERR,
-				"Faulty ECC bit: %d\n", bad_ecc_bit);
-
-		mpc85xx_mc_printk(mci, KERN_ERR,
-			"Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n",
-			cap_high ^ (1 << (bad_data_bit - 32)),
-			cap_low ^ (1 << bad_data_bit),
-			syndrome ^ (1 << bad_ecc_bit));
-	}
-
-	mpc85xx_mc_printk(mci, KERN_ERR,
-			"Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n",
-			cap_high, cap_low, syndrome);
-	mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8llx\n", err_addr);
-	mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn);
-
-	/* we are out of range */
-	if (row_index == mci->nr_csrows)
-		mpc85xx_mc_printk(mci, KERN_ERR, "PFN out of range!\n");
-
-	if (err_detect & DDR_EDE_SBE)
-		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
-				     pfn, err_addr & ~PAGE_MASK, syndrome,
-				     row_index, 0, -1,
-				     mci->ctl_name, "");
-
-	if (err_detect & DDR_EDE_MBE)
-		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
-				     pfn, err_addr & ~PAGE_MASK, syndrome,
-				     row_index, 0, -1,
-				     mci->ctl_name, "");
-
-	out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect);
-}
-
-static irqreturn_t mpc85xx_mc_isr(int irq, void *dev_id)
-{
-	struct mem_ctl_info *mci = dev_id;
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	u32 err_detect;
-
-	err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT);
-	if (!err_detect)
-		return IRQ_NONE;
-
-	mpc85xx_mc_check(mci);
-
-	return IRQ_HANDLED;
-}
-
-static void mpc85xx_init_csrows(struct mem_ctl_info *mci)
-{
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-	struct csrow_info *csrow;
-	struct dimm_info *dimm;
-	u32 sdram_ctl;
-	u32 sdtype;
-	enum mem_type mtype;
-	u32 cs_bnds;
-	int index;
-
-	sdram_ctl = in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG);
-
-	sdtype = sdram_ctl & DSC_SDTYPE_MASK;
-	if (sdram_ctl & DSC_RD_EN) {
-		switch (sdtype) {
-		case DSC_SDTYPE_DDR:
-			mtype = MEM_RDDR;
-			break;
-		case DSC_SDTYPE_DDR2:
-			mtype = MEM_RDDR2;
-			break;
-		case DSC_SDTYPE_DDR3:
-			mtype = MEM_RDDR3;
-			break;
-		default:
-			mtype = MEM_UNKNOWN;
-			break;
-		}
-	} else {
-		switch (sdtype) {
-		case DSC_SDTYPE_DDR:
-			mtype = MEM_DDR;
-			break;
-		case DSC_SDTYPE_DDR2:
-			mtype = MEM_DDR2;
-			break;
-		case DSC_SDTYPE_DDR3:
-			mtype = MEM_DDR3;
-			break;
-		default:
-			mtype = MEM_UNKNOWN;
-			break;
-		}
-	}
-
-	for (index = 0; index < mci->nr_csrows; index++) {
-		u32 start;
-		u32 end;
-
-		csrow = mci->csrows[index];
-		dimm = csrow->channels[0]->dimm;
-
-		cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 +
-				  (index * MPC85XX_MC_CS_BNDS_OFS));
-
-		start = (cs_bnds & 0xffff0000) >> 16;
-		end   = (cs_bnds & 0x0000ffff);
-
-		if (start == end)
-			continue;	/* not populated */
-
-		start <<= (24 - PAGE_SHIFT);
-		end   <<= (24 - PAGE_SHIFT);
-		end    |= (1 << (24 - PAGE_SHIFT)) - 1;
-
-		csrow->first_page = start;
-		csrow->last_page = end;
-
-		dimm->nr_pages = end + 1 - start;
-		dimm->grain = 8;
-		dimm->mtype = mtype;
-		dimm->dtype = DEV_UNKNOWN;
-		if (sdram_ctl & DSC_X32_EN)
-			dimm->dtype = DEV_X32;
-		dimm->edac_mode = EDAC_SECDED;
-	}
-}
-
-static int mpc85xx_mc_err_probe(struct platform_device *op)
-{
-	struct mem_ctl_info *mci;
-	struct edac_mc_layer layers[2];
-	struct mpc85xx_mc_pdata *pdata;
-	struct resource r;
-	u32 sdram_ctl;
-	int res;
-
-	if (!devres_open_group(&op->dev, mpc85xx_mc_err_probe, GFP_KERNEL))
-		return -ENOMEM;
-
-	layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
-	layers[0].size = 4;
-	layers[0].is_virt_csrow = true;
-	layers[1].type = EDAC_MC_LAYER_CHANNEL;
-	layers[1].size = 1;
-	layers[1].is_virt_csrow = false;
-	mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers,
-			    sizeof(*pdata));
-	if (!mci) {
-		devres_release_group(&op->dev, mpc85xx_mc_err_probe);
-		return -ENOMEM;
-	}
-
-	pdata = mci->pvt_info;
-	pdata->name = "mpc85xx_mc_err";
-	pdata->irq = NO_IRQ;
-	mci->pdev = &op->dev;
-	pdata->edac_idx = edac_mc_idx++;
-	dev_set_drvdata(mci->pdev, mci);
-	mci->ctl_name = pdata->name;
-	mci->dev_name = pdata->name;
-
-	res = of_address_to_resource(op->dev.of_node, 0, &r);
-	if (res) {
-		printk(KERN_ERR "%s: Unable to get resource for MC err regs\n",
-		       __func__);
-		goto err;
-	}
-
-	if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r),
-				     pdata->name)) {
-		printk(KERN_ERR "%s: Error while requesting mem region\n",
-		       __func__);
-		res = -EBUSY;
-		goto err;
-	}
-
-	pdata->mc_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r));
-	if (!pdata->mc_vbase) {
-		printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__);
-		res = -ENOMEM;
-		goto err;
-	}
-
-	sdram_ctl = in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG);
-	if (!(sdram_ctl & DSC_ECC_EN)) {
-		/* no ECC */
-		printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__);
-		res = -ENODEV;
-		goto err;
-	}
-
-	edac_dbg(3, "init mci\n");
-	mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 |
-	    MEM_FLAG_DDR | MEM_FLAG_DDR2;
-	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
-	mci->edac_cap = EDAC_FLAG_SECDED;
-	mci->mod_name = EDAC_MOD_STR;
-	mci->mod_ver = MPC85XX_REVISION;
-
-	if (edac_op_state == EDAC_OPSTATE_POLL)
-		mci->edac_check = mpc85xx_mc_check;
-
-	mci->ctl_page_to_phys = NULL;
-
-	mci->scrub_mode = SCRUB_SW_SRC;
-
-	mpc85xx_init_csrows(mci);
-
-	/* store the original error disable bits */
-	orig_ddr_err_disable =
-	    in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE);
-	out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE, 0);
-
-	/* clear all error bits */
-	out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, ~0);
-
-	if (edac_mc_add_mc_with_groups(mci, mpc85xx_dev_groups)) {
-		edac_dbg(3, "failed edac_mc_add_mc()\n");
-		goto err;
-	}
-
-	if (edac_op_state == EDAC_OPSTATE_INT) {
-		out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_INT_EN,
-			 DDR_EIE_MBEE | DDR_EIE_SBEE);
-
-		/* store the original error management threshold */
-		orig_ddr_err_sbe = in_be32(pdata->mc_vbase +
-					   MPC85XX_MC_ERR_SBE) & 0xff0000;
-
-		/* set threshold to 1 error per interrupt */
-		out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, 0x10000);
-
-		/* register interrupts */
-		pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0);
-		res = devm_request_irq(&op->dev, pdata->irq,
-				       mpc85xx_mc_isr,
-				       IRQF_SHARED,
-				       "[EDAC] MC err", mci);
-		if (res < 0) {
-			printk(KERN_ERR "%s: Unable to request irq %d for "
-			       "MPC85xx DRAM ERR\n", __func__, pdata->irq);
-			irq_dispose_mapping(pdata->irq);
-			res = -ENODEV;
-			goto err2;
-		}
-
-		printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for MC\n",
-		       pdata->irq);
-	}
-
-	devres_remove_group(&op->dev, mpc85xx_mc_err_probe);
-	edac_dbg(3, "success\n");
-	printk(KERN_INFO EDAC_MOD_STR " MC err registered\n");
-
-	return 0;
-
-err2:
-	edac_mc_del_mc(&op->dev);
-err:
-	devres_release_group(&op->dev, mpc85xx_mc_err_probe);
-	edac_mc_free(mci);
-	return res;
-}
-
-static int mpc85xx_mc_err_remove(struct platform_device *op)
-{
-	struct mem_ctl_info *mci = dev_get_drvdata(&op->dev);
-	struct mpc85xx_mc_pdata *pdata = mci->pvt_info;
-
-	edac_dbg(0, "\n");
-
-	if (edac_op_state == EDAC_OPSTATE_INT) {
-		out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_INT_EN, 0);
-		irq_dispose_mapping(pdata->irq);
-	}
-
-	out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE,
-		 orig_ddr_err_disable);
-	out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, orig_ddr_err_sbe);
-
-	edac_mc_del_mc(&op->dev);
-	edac_mc_free(mci);
-	return 0;
-}
-
 static const struct of_device_id mpc85xx_mc_err_of_match[] = {
 /* deprecate the fsl,85.. forms in the future, 2.6.30? */
 	{ .compatible = "fsl,8540-memory-controller", },
@@ -1217,22 +654,14 @@
 MODULE_DEVICE_TABLE(of, mpc85xx_mc_err_of_match);
 
 static struct platform_driver mpc85xx_mc_err_driver = {
-	.probe = mpc85xx_mc_err_probe,
-	.remove = mpc85xx_mc_err_remove,
+	.probe = fsl_mc_err_probe,
+	.remove = fsl_mc_err_remove,
 	.driver = {
 		.name = "mpc85xx_mc_err",
 		.of_match_table = mpc85xx_mc_err_of_match,
 	},
 };
 
-#ifdef CONFIG_FSL_SOC_BOOKE
-static void __init mpc85xx_mc_clear_rfxe(void *data)
-{
-	orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1);
-	mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~HID1_RFXE));
-}
-#endif
-
 static struct platform_driver * const drivers[] = {
 	&mpc85xx_mc_err_driver,
 	&mpc85xx_l2_err_driver,
@@ -1246,8 +675,7 @@
 	int res = 0;
 	u32 __maybe_unused pvr = 0;
 
-	printk(KERN_INFO "Freescale(R) MPC85xx EDAC driver, "
-	       "(C) 2006 Montavista Software\n");
+	pr_info("Freescale(R) MPC85xx EDAC driver, (C) 2006 Montavista Software\n");
 
 	/* make sure error reporting method is sane */
 	switch (edac_op_state) {
@@ -1261,44 +689,15 @@
 
 	res = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 	if (res)
-		printk(KERN_WARNING EDAC_MOD_STR "drivers fail to register\n");
-
-#ifdef CONFIG_FSL_SOC_BOOKE
-	pvr = mfspr(SPRN_PVR);
-
-	if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
-	    (PVR_VER(pvr) == PVR_VER_E500V2)) {
-		/*
-		 * need to clear HID1[RFXE] to disable machine check int
-		 * so we can catch it
-		 */
-		if (edac_op_state == EDAC_OPSTATE_INT)
-			on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0);
-	}
-#endif
+		pr_warn(EDAC_MOD_STR "drivers fail to register\n");
 
 	return 0;
 }
 
 module_init(mpc85xx_mc_init);
 
-#ifdef CONFIG_FSL_SOC_BOOKE
-static void __exit mpc85xx_mc_restore_hid1(void *data)
-{
-	mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]);
-}
-#endif
-
 static void __exit mpc85xx_mc_exit(void)
 {
-#ifdef CONFIG_FSL_SOC_BOOKE
-	u32 pvr = mfspr(SPRN_PVR);
-
-	if ((PVR_VER(pvr) == PVR_VER_E500V1) ||
-	    (PVR_VER(pvr) == PVR_VER_E500V2)) {
-		on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0);
-	}
-#endif
 	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
 }
 
diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h
index 9352e88..3f6fb16 100644
--- a/drivers/edac/mpc85xx_edac.h
+++ b/drivers/edac/mpc85xx_edac.h
@@ -17,65 +17,6 @@
 #define mpc85xx_printk(level, fmt, arg...) \
 	edac_printk(level, "MPC85xx", fmt, ##arg)
 
-#define mpc85xx_mc_printk(mci, level, fmt, arg...) \
-	edac_mc_chipset_printk(mci, level, "MPC85xx", fmt, ##arg)
-
-/*
- * DRAM error defines
- */
-
-/* DDR_SDRAM_CFG */
-#define MPC85XX_MC_DDR_SDRAM_CFG	0x0110
-#define MPC85XX_MC_CS_BNDS_0		0x0000
-#define MPC85XX_MC_CS_BNDS_1		0x0008
-#define MPC85XX_MC_CS_BNDS_2		0x0010
-#define MPC85XX_MC_CS_BNDS_3		0x0018
-#define MPC85XX_MC_CS_BNDS_OFS		0x0008
-
-#define MPC85XX_MC_DATA_ERR_INJECT_HI	0x0e00
-#define MPC85XX_MC_DATA_ERR_INJECT_LO	0x0e04
-#define MPC85XX_MC_ECC_ERR_INJECT	0x0e08
-#define MPC85XX_MC_CAPTURE_DATA_HI	0x0e20
-#define MPC85XX_MC_CAPTURE_DATA_LO	0x0e24
-#define MPC85XX_MC_CAPTURE_ECC		0x0e28
-#define MPC85XX_MC_ERR_DETECT		0x0e40
-#define MPC85XX_MC_ERR_DISABLE		0x0e44
-#define MPC85XX_MC_ERR_INT_EN		0x0e48
-#define MPC85XX_MC_CAPTURE_ATRIBUTES	0x0e4c
-#define MPC85XX_MC_CAPTURE_ADDRESS	0x0e50
-#define MPC85XX_MC_CAPTURE_EXT_ADDRESS	0x0e54
-#define MPC85XX_MC_ERR_SBE		0x0e58
-
-#define DSC_MEM_EN	0x80000000
-#define DSC_ECC_EN	0x20000000
-#define DSC_RD_EN	0x10000000
-#define DSC_DBW_MASK	0x00180000
-#define DSC_DBW_32	0x00080000
-#define DSC_DBW_64	0x00000000
-
-#define DSC_SDTYPE_MASK		0x07000000
-
-#define DSC_SDTYPE_DDR		0x02000000
-#define DSC_SDTYPE_DDR2		0x03000000
-#define DSC_SDTYPE_DDR3		0x07000000
-#define DSC_X32_EN	0x00000020
-
-/* Err_Int_En */
-#define DDR_EIE_MSEE	0x1	/* memory select */
-#define DDR_EIE_SBEE	0x4	/* single-bit ECC error */
-#define DDR_EIE_MBEE	0x8	/* multi-bit ECC error */
-
-/* Err_Detect */
-#define DDR_EDE_MSE		0x1	/* memory select */
-#define DDR_EDE_SBE		0x4	/* single-bit ECC error */
-#define DDR_EDE_MBE		0x8	/* multi-bit ECC error */
-#define DDR_EDE_MME		0x80000000	/* multiple memory errors */
-
-/* Err_Disable */
-#define DDR_EDI_MSED	0x1	/* memory select disable */
-#define	DDR_EDI_SBED	0x4	/* single-bit ECC error disable */
-#define	DDR_EDI_MBED	0x8	/* multi-bit ECC error disable */
-
 /*
  * L2 Err defines
  */
@@ -149,13 +90,6 @@
 #define MPC85XX_PCIE_ERR_CAP_R2		0x0030
 #define MPC85XX_PCIE_ERR_CAP_R3		0x0034
 
-struct mpc85xx_mc_pdata {
-	char *name;
-	int edac_idx;
-	void __iomem *mc_vbase;
-	int irq;
-};
-
 struct mpc85xx_l2_pdata {
 	char *name;
 	int edac_idx;
diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c
index 6c54127..cb9b857 100644
--- a/drivers/edac/mv64x60_edac.c
+++ b/drivers/edac/mv64x60_edac.c
@@ -118,7 +118,6 @@
 
 	pdata->pci_hose = pdev->id;
 	pdata->name = "mpc85xx_pci_err";
-	pdata->irq = NO_IRQ;
 	platform_set_drvdata(pdev, pci);
 	pci->dev = &pdev->dev;
 	pci->dev_name = dev_name(&pdev->dev);
@@ -291,7 +290,6 @@
 
 	pdata = edac_dev->pvt_info;
 	pdata->name = "mv64x60_sram_err";
-	pdata->irq = NO_IRQ;
 	edac_dev->dev = &pdev->dev;
 	platform_set_drvdata(pdev, edac_dev);
 	edac_dev->dev_name = dev_name(&pdev->dev);
@@ -459,7 +457,6 @@
 
 	pdata = edac_dev->pvt_info;
 	pdata->name = "mv64x60_cpu_err";
-	pdata->irq = NO_IRQ;
 	edac_dev->dev = &pdev->dev;
 	platform_set_drvdata(pdev, edac_dev);
 	edac_dev->dev_name = dev_name(&pdev->dev);
@@ -727,7 +724,6 @@
 	mci->pdev = &pdev->dev;
 	platform_set_drvdata(pdev, mci);
 	pdata->name = "mv64x60_mc_err";
-	pdata->irq = NO_IRQ;
 	mci->dev_name = dev_name(&pdev->dev);
 	pdata->edac_idx = edac_mc_idx++;
 
diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c
index d3a64ba..691ce25 100644
--- a/drivers/edac/ppc4xx_edac.c
+++ b/drivers/edac/ppc4xx_edac.c
@@ -1029,8 +1029,6 @@
 	pdata			= mci->pvt_info;
 
 	pdata->dcr_host		= *dcr_host;
-	pdata->irqs.sec		= NO_IRQ;
-	pdata->irqs.ded		= NO_IRQ;
 
 	/* Initialize controller capabilities and configuration */
 
@@ -1111,7 +1109,7 @@
 	ded_irq = irq_of_parse_and_map(np, INTMAP_ECCDED_INDEX);
 	sec_irq = irq_of_parse_and_map(np, INTMAP_ECCSEC_INDEX);
 
-	if (ded_irq == NO_IRQ || sec_irq == NO_IRQ) {
+	if (!ded_irq || !sec_irq) {
 		ppc4xx_edac_mc_printk(KERN_ERR, mci,
 				      "Unable to map interrupts.\n");
 		status = -ENODEV;
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 4fb2eb7..5477522 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -552,9 +552,9 @@
 /* Knight's Landing Support */
 /*
  * KNL's memory channels are swizzled between memory controllers.
- * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2
+ * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2
  */
-#define knl_channel_remap(channel) ((channel + 3) % 6)
+#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3)
 
 /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */
 #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC       0x7840
@@ -1286,7 +1286,7 @@
 	mc = GET_BITFIELD(reg, entry*3, (entry*3)+2);
 	chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1);
 
-	return knl_channel_remap(mc*3 + chan);
+	return knl_channel_remap(mc, chan);
 }
 
 /*
@@ -2474,7 +2474,7 @@
 
 	/* Check if everything were registered */
 	if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
-	    !pvt-> pci_tad || !pvt->pci_ras  || !pvt->pci_ta)
+	    !pvt->pci_ras || !pvt->pci_ta)
 		goto enodev;
 
 	if (saw_chan_mask != 0x0f)
@@ -2563,8 +2563,7 @@
 
 	/* Check if everything were registered */
 	if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 ||
-	    !pvt->pci_br1 || !pvt->pci_tad || !pvt->pci_ras  ||
-	    !pvt->pci_ta)
+	    !pvt->pci_br1 || !pvt->pci_ras || !pvt->pci_ta)
 		goto enodev;
 
 	if (saw_chan_mask != 0x0f && /* -EN */
@@ -2997,8 +2996,15 @@
 		} else {
 			char A = *("A");
 
-			channel = knl_channel_remap(channel);
+			/*
+			 * Reported channel is in range 0-2, so we can't map it
+			 * back to mc. To figure out mc we check machine check
+			 * bank register that reported this error.
+			 * bank15 means mc0 and bank16 means mc1.
+			 */
+			channel = knl_channel_remap(m->bank == 16, channel);
 			channel_mask = 1 << channel;
+
 			snprintf(msg, sizeof(msg),
 				"%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
 				overflow ? " OVERFLOW" : "",
diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c
new file mode 100644
index 0000000..0ff4878
--- /dev/null
+++ b/drivers/edac/skx_edac.c
@@ -0,0 +1,1121 @@
+/*
+ * EDAC driver for Intel(R) Xeon(R) Skylake processors
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/edac.h>
+#include <linux/mmzone.h>
+#include <linux/smp.h>
+#include <linux/bitmap.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
+#include <asm/processor.h>
+#include <asm/mce.h>
+
+#include "edac_core.h"
+
+#define SKX_REVISION    " Ver: 1.0 "
+
+/*
+ * Debug macros
+ */
+#define skx_printk(level, fmt, arg...)			\
+	edac_printk(level, "skx", fmt, ##arg)
+
+#define skx_mc_printk(mci, level, fmt, arg...)		\
+	edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
+
+/*
+ * Get a bit field at register value <v>, from bit <lo> to bit <hi>
+ */
+#define GET_BITFIELD(v, lo, hi) \
+	(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
+
+static LIST_HEAD(skx_edac_list);
+
+static u64 skx_tolm, skx_tohm;
+
+#define NUM_IMC			2	/* memory controllers per socket */
+#define NUM_CHANNELS		3	/* channels per memory controller */
+#define NUM_DIMMS		2	/* Max DIMMS per channel */
+
+#define	MASK26	0x3FFFFFF		/* Mask for 2^26 */
+#define MASK29	0x1FFFFFFF		/* Mask for 2^29 */
+
+/*
+ * Each cpu socket contains some pci devices that provide global
+ * information, and also some that are local to each of the two
+ * memory controllers on the die.
+ */
+struct skx_dev {
+	struct list_head	list;
+	u8			bus[4];
+	struct pci_dev	*sad_all;
+	struct pci_dev	*util_all;
+	u32	mcroute;
+	struct skx_imc {
+		struct mem_ctl_info *mci;
+		u8	mc;	/* system wide mc# */
+		u8	lmc;	/* socket relative mc# */
+		u8	src_id, node_id;
+		struct skx_channel {
+			struct pci_dev *cdev;
+			struct skx_dimm {
+				u8	close_pg;
+				u8	bank_xor_enable;
+				u8	fine_grain_bank;
+				u8	rowbits;
+				u8	colbits;
+			} dimms[NUM_DIMMS];
+		} chan[NUM_CHANNELS];
+	} imc[NUM_IMC];
+};
+static int skx_num_sockets;
+
+struct skx_pvt {
+	struct skx_imc	*imc;
+};
+
+struct decoded_addr {
+	struct skx_dev *dev;
+	u64	addr;
+	int	socket;
+	int	imc;
+	int	channel;
+	u64	chan_addr;
+	int	sktways;
+	int	chanways;
+	int	dimm;
+	int	rank;
+	int	channel_rank;
+	u64	rank_address;
+	int	row;
+	int	column;
+	int	bank_address;
+	int	bank_group;
+};
+
+static struct skx_dev *get_skx_dev(u8 bus, u8 idx)
+{
+	struct skx_dev *d;
+
+	list_for_each_entry(d, &skx_edac_list, list) {
+		if (d->bus[idx] == bus)
+			return d;
+	}
+
+	return NULL;
+}
+
+enum munittype {
+	CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD
+};
+
+struct munit {
+	u16	did;
+	u16	devfn[NUM_IMC];
+	u8	busidx;
+	u8	per_socket;
+	enum munittype mtype;
+};
+
+/*
+ * List of PCI device ids that we need together with some device
+ * number and function numbers to tell which memory controller the
+ * device belongs to.
+ */
+static const struct munit skx_all_munits[] = {
+	{ 0x2054, { }, 1, 1, SAD_ALL },
+	{ 0x2055, { }, 1, 1, UTIL_ALL },
+	{ 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 },
+	{ 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 },
+	{ 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 },
+	{ 0x208e, { }, 1, 0, SAD },
+	{ }
+};
+
+/*
+ * We use the per-socket device 0x2016 to count how many sockets are present,
+ * and to detemine which PCI buses are associated with each socket. Allocate
+ * and build the full list of all the skx_dev structures that we need here.
+ */
+static int get_all_bus_mappings(void)
+{
+	struct pci_dev *pdev, *prev;
+	struct skx_dev *d;
+	u32 reg;
+	int ndev = 0;
+
+	prev = NULL;
+	for (;;) {
+		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev);
+		if (!pdev)
+			break;
+		ndev++;
+		d = kzalloc(sizeof(*d), GFP_KERNEL);
+		if (!d) {
+			pci_dev_put(pdev);
+			return -ENOMEM;
+		}
+		pci_read_config_dword(pdev, 0xCC, &reg);
+		d->bus[0] =  GET_BITFIELD(reg, 0, 7);
+		d->bus[1] =  GET_BITFIELD(reg, 8, 15);
+		d->bus[2] =  GET_BITFIELD(reg, 16, 23);
+		d->bus[3] =  GET_BITFIELD(reg, 24, 31);
+		edac_dbg(2, "busses: %x, %x, %x, %x\n",
+			 d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
+		list_add_tail(&d->list, &skx_edac_list);
+		skx_num_sockets++;
+		prev = pdev;
+	}
+
+	return ndev;
+}
+
+static int get_all_munits(const struct munit *m)
+{
+	struct pci_dev *pdev, *prev;
+	struct skx_dev *d;
+	u32 reg;
+	int i = 0, ndev = 0;
+
+	prev = NULL;
+	for (;;) {
+		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev);
+		if (!pdev)
+			break;
+		ndev++;
+		if (m->per_socket == NUM_IMC) {
+			for (i = 0; i < NUM_IMC; i++)
+				if (m->devfn[i] == pdev->devfn)
+					break;
+			if (i == NUM_IMC)
+				goto fail;
+		}
+		d = get_skx_dev(pdev->bus->number, m->busidx);
+		if (!d)
+			goto fail;
+
+		/* Be sure that the device is enabled */
+		if (unlikely(pci_enable_device(pdev) < 0)) {
+			skx_printk(KERN_ERR,
+				"Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did);
+			goto fail;
+		}
+
+		switch (m->mtype) {
+		case CHAN0: case CHAN1: case CHAN2:
+			pci_dev_get(pdev);
+			d->imc[i].chan[m->mtype].cdev = pdev;
+			break;
+		case SAD_ALL:
+			pci_dev_get(pdev);
+			d->sad_all = pdev;
+			break;
+		case UTIL_ALL:
+			pci_dev_get(pdev);
+			d->util_all = pdev;
+			break;
+		case SAD:
+			/*
+			 * one of these devices per core, including cores
+			 * that don't exist on this SKU. Ignore any that
+			 * read a route table of zero, make sure all the
+			 * non-zero values match.
+			 */
+			pci_read_config_dword(pdev, 0xB4, &reg);
+			if (reg != 0) {
+				if (d->mcroute == 0)
+					d->mcroute = reg;
+				else if (d->mcroute != reg) {
+					skx_printk(KERN_ERR,
+						"mcroute mismatch\n");
+					goto fail;
+				}
+			}
+			ndev--;
+			break;
+		}
+
+		prev = pdev;
+	}
+
+	return ndev;
+fail:
+	pci_dev_put(pdev);
+	return -ENODEV;
+}
+
+const struct x86_cpu_id skx_cpuids[] = {
+	{ X86_VENDOR_INTEL, 6, 0x55, 0, 0 },	/* Skylake */
+	{ }
+};
+MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
+
+static u8 get_src_id(struct skx_dev *d)
+{
+	u32 reg;
+
+	pci_read_config_dword(d->util_all, 0xF0, &reg);
+
+	return GET_BITFIELD(reg, 12, 14);
+}
+
+static u8 skx_get_node_id(struct skx_dev *d)
+{
+	u32 reg;
+
+	pci_read_config_dword(d->util_all, 0xF4, &reg);
+
+	return GET_BITFIELD(reg, 0, 2);
+}
+
+static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
+			 int maxval, char *name)
+{
+	u32 val = GET_BITFIELD(reg, lobit, hibit);
+
+	if (val < minval || val > maxval) {
+		edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg);
+		return -EINVAL;
+	}
+	return val + add;
+}
+
+#define IS_DIMM_PRESENT(mtr)		GET_BITFIELD((mtr), 15, 15)
+
+#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks")
+#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
+#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols")
+
+static int get_width(u32 mtr)
+{
+	switch (GET_BITFIELD(mtr, 8, 9)) {
+	case 0:
+		return DEV_X4;
+	case 1:
+		return DEV_X8;
+	case 2:
+		return DEV_X16;
+	}
+	return DEV_UNKNOWN;
+}
+
+static int skx_get_hi_lo(void)
+{
+	struct pci_dev *pdev;
+	u32 reg;
+
+	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL);
+	if (!pdev) {
+		edac_dbg(0, "Can't get tolm/tohm\n");
+		return -ENODEV;
+	}
+
+	pci_read_config_dword(pdev, 0xD0, &reg);
+	skx_tolm = reg;
+	pci_read_config_dword(pdev, 0xD4, &reg);
+	skx_tohm = reg;
+	pci_read_config_dword(pdev, 0xD8, &reg);
+	skx_tohm |= (u64)reg << 32;
+
+	pci_dev_put(pdev);
+	edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm);
+
+	return 0;
+}
+
+static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
+			 struct skx_imc *imc, int chan, int dimmno)
+{
+	int  banks = 16, ranks, rows, cols, npages;
+	u64 size;
+
+	if (!IS_DIMM_PRESENT(mtr))
+		return 0;
+	ranks = numrank(mtr);
+	rows = numrow(mtr);
+	cols = numcol(mtr);
+
+	/*
+	 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
+	 */
+	size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
+	npages = MiB_TO_PAGES(size);
+
+	edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
+		 imc->mc, chan, dimmno, size, npages,
+		 banks, ranks, rows, cols);
+
+	imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
+	imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
+	imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
+	imc->chan[chan].dimms[dimmno].rowbits = rows;
+	imc->chan[chan].dimms[dimmno].colbits = cols;
+
+	dimm->nr_pages = npages;
+	dimm->grain = 32;
+	dimm->dtype = get_width(mtr);
+	dimm->mtype = MEM_DDR4;
+	dimm->edac_mode = EDAC_SECDED; /* likely better than this */
+	snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
+		 imc->src_id, imc->lmc, chan, dimmno);
+
+	return 1;
+}
+
+#define SKX_GET_MTMTR(dev, reg) \
+	pci_read_config_dword((dev), 0x87c, &reg)
+
+static bool skx_check_ecc(struct pci_dev *pdev)
+{
+	u32 mtmtr;
+
+	SKX_GET_MTMTR(pdev, mtmtr);
+
+	return !!GET_BITFIELD(mtmtr, 2, 2);
+}
+
+static int skx_get_dimm_config(struct mem_ctl_info *mci)
+{
+	struct skx_pvt *pvt = mci->pvt_info;
+	struct skx_imc *imc = pvt->imc;
+	struct dimm_info *dimm;
+	int i, j;
+	u32 mtr, amap;
+	int ndimms;
+
+	for (i = 0; i < NUM_CHANNELS; i++) {
+		ndimms = 0;
+		pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
+		for (j = 0; j < NUM_DIMMS; j++) {
+			dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
+					     mci->n_layers, i, j, 0);
+			pci_read_config_dword(imc->chan[i].cdev,
+					0x80 + 4*j, &mtr);
+			ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j);
+		}
+		if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
+			skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
+			return -ENODEV;
+		}
+	}
+
+	return 0;
+}
+
+static void skx_unregister_mci(struct skx_imc *imc)
+{
+	struct mem_ctl_info *mci = imc->mci;
+
+	if (!mci)
+		return;
+
+	edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
+
+	/* Remove MC sysfs nodes */
+	edac_mc_del_mc(mci->pdev);
+
+	edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
+	kfree(mci->ctl_name);
+	edac_mc_free(mci);
+}
+
+static int skx_register_mci(struct skx_imc *imc)
+{
+	struct mem_ctl_info *mci;
+	struct edac_mc_layer layers[2];
+	struct pci_dev *pdev = imc->chan[0].cdev;
+	struct skx_pvt *pvt;
+	int rc;
+
+	/* allocate a new MC control structure */
+	layers[0].type = EDAC_MC_LAYER_CHANNEL;
+	layers[0].size = NUM_CHANNELS;
+	layers[0].is_virt_csrow = false;
+	layers[1].type = EDAC_MC_LAYER_SLOT;
+	layers[1].size = NUM_DIMMS;
+	layers[1].is_virt_csrow = true;
+	mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
+			    sizeof(struct skx_pvt));
+
+	if (unlikely(!mci))
+		return -ENOMEM;
+
+	edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
+
+	/* Associate skx_dev and mci for future usage */
+	imc->mci = mci;
+	pvt = mci->pvt_info;
+	pvt->imc = imc;
+
+	mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
+				  imc->node_id, imc->lmc);
+	mci->mtype_cap = MEM_FLAG_DDR4;
+	mci->edac_ctl_cap = EDAC_FLAG_NONE;
+	mci->edac_cap = EDAC_FLAG_NONE;
+	mci->mod_name = "skx_edac.c";
+	mci->dev_name = pci_name(imc->chan[0].cdev);
+	mci->mod_ver = SKX_REVISION;
+	mci->ctl_page_to_phys = NULL;
+
+	rc = skx_get_dimm_config(mci);
+	if (rc < 0)
+		goto fail;
+
+	/* record ptr to the generic device */
+	mci->pdev = &pdev->dev;
+
+	/* add this new MC control structure to EDAC's list of MCs */
+	if (unlikely(edac_mc_add_mc(mci))) {
+		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
+		rc = -EINVAL;
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	kfree(mci->ctl_name);
+	edac_mc_free(mci);
+	imc->mci = NULL;
+	return rc;
+}
+
+#define	SKX_MAX_SAD 24
+
+#define SKX_GET_SAD(d, i, reg)	\
+	pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &reg)
+#define SKX_GET_ILV(d, i, reg)	\
+	pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &reg)
+
+#define	SKX_SAD_MOD3MODE(sad)	GET_BITFIELD((sad), 30, 31)
+#define	SKX_SAD_MOD3(sad)	GET_BITFIELD((sad), 27, 27)
+#define SKX_SAD_LIMIT(sad)	(((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26)
+#define	SKX_SAD_MOD3ASMOD2(sad)	GET_BITFIELD((sad), 5, 6)
+#define	SKX_SAD_ATTR(sad)	GET_BITFIELD((sad), 3, 4)
+#define	SKX_SAD_INTERLEAVE(sad)	GET_BITFIELD((sad), 1, 2)
+#define SKX_SAD_ENABLE(sad)	GET_BITFIELD((sad), 0, 0)
+
+#define SKX_ILV_REMOTE(tgt)	(((tgt) & 8) == 0)
+#define SKX_ILV_TARGET(tgt)	((tgt) & 7)
+
+static bool skx_sad_decode(struct decoded_addr *res)
+{
+	struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list);
+	u64 addr = res->addr;
+	int i, idx, tgt, lchan, shift;
+	u32 sad, ilv;
+	u64 limit, prev_limit;
+	int remote = 0;
+
+	/* Simple sanity check for I/O space or out of range */
+	if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) {
+		edac_dbg(0, "Address %llx out of range\n", addr);
+		return false;
+	}
+
+restart:
+	prev_limit = 0;
+	for (i = 0; i < SKX_MAX_SAD; i++) {
+		SKX_GET_SAD(d, i, sad);
+		limit = SKX_SAD_LIMIT(sad);
+		if (SKX_SAD_ENABLE(sad)) {
+			if (addr >= prev_limit && addr <= limit)
+				goto sad_found;
+		}
+		prev_limit = limit + 1;
+	}
+	edac_dbg(0, "No SAD entry for %llx\n", addr);
+	return false;
+
+sad_found:
+	SKX_GET_ILV(d, i, ilv);
+
+	switch (SKX_SAD_INTERLEAVE(sad)) {
+	case 0:
+		idx = GET_BITFIELD(addr, 6, 8);
+		break;
+	case 1:
+		idx = GET_BITFIELD(addr, 8, 10);
+		break;
+	case 2:
+		idx = GET_BITFIELD(addr, 12, 14);
+		break;
+	case 3:
+		idx = GET_BITFIELD(addr, 30, 32);
+		break;
+	}
+
+	tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3);
+
+	/* If point to another node, find it and start over */
+	if (SKX_ILV_REMOTE(tgt)) {
+		if (remote) {
+			edac_dbg(0, "Double remote!\n");
+			return false;
+		}
+		remote = 1;
+		list_for_each_entry(d, &skx_edac_list, list) {
+			if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
+				goto restart;
+		}
+		edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt));
+		return false;
+	}
+
+	if (SKX_SAD_MOD3(sad) == 0)
+		lchan = SKX_ILV_TARGET(tgt);
+	else {
+		switch (SKX_SAD_MOD3MODE(sad)) {
+		case 0:
+			shift = 6;
+			break;
+		case 1:
+			shift = 8;
+			break;
+		case 2:
+			shift = 12;
+			break;
+		default:
+			edac_dbg(0, "illegal mod3mode\n");
+			return false;
+		}
+		switch (SKX_SAD_MOD3ASMOD2(sad)) {
+		case 0:
+			lchan = (addr >> shift) % 3;
+			break;
+		case 1:
+			lchan = (addr >> shift) % 2;
+			break;
+		case 2:
+			lchan = (addr >> shift) % 2;
+			lchan = (lchan << 1) | ~lchan;
+			break;
+		case 3:
+			lchan = ((addr >> shift) % 2) << 1;
+			break;
+		}
+		lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1);
+	}
+
+	res->dev = d;
+	res->socket = d->imc[0].src_id;
+	res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2);
+	res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19);
+
+	edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n",
+		 res->addr, res->socket, res->imc, res->channel);
+	return true;
+}
+
+#define	SKX_MAX_TAD 8
+
+#define SKX_GET_TADBASE(d, mc, i, reg)			\
+	pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &reg)
+#define SKX_GET_TADWAYNESS(d, mc, i, reg)		\
+	pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &reg)
+#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg)	\
+	pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &reg)
+
+#define	SKX_TAD_BASE(b)		((u64)GET_BITFIELD((b), 12, 31) << 26)
+#define SKX_TAD_SKT_GRAN(b)	GET_BITFIELD((b), 4, 5)
+#define SKX_TAD_CHN_GRAN(b)	GET_BITFIELD((b), 6, 7)
+#define	SKX_TAD_LIMIT(b)	(((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26)
+#define	SKX_TAD_OFFSET(b)	((u64)GET_BITFIELD((b), 4, 23) << 26)
+#define	SKX_TAD_SKTWAYS(b)	(1 << GET_BITFIELD((b), 10, 11))
+#define	SKX_TAD_CHNWAYS(b)	(GET_BITFIELD((b), 8, 9) + 1)
+
+/* which bit used for both socket and channel interleave */
+static int skx_granularity[] = { 6, 8, 12, 30 };
+
+static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits)
+{
+	addr >>= shift;
+	addr /= ways;
+	addr <<= shift;
+
+	return addr | (lowbits & ((1ull << shift) - 1));
+}
+
+static bool skx_tad_decode(struct decoded_addr *res)
+{
+	int i;
+	u32 base, wayness, chnilvoffset;
+	int skt_interleave_bit, chn_interleave_bit;
+	u64 channel_addr;
+
+	for (i = 0; i < SKX_MAX_TAD; i++) {
+		SKX_GET_TADBASE(res->dev, res->imc, i, base);
+		SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness);
+		if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness))
+			goto tad_found;
+	}
+	edac_dbg(0, "No TAD entry for %llx\n", res->addr);
+	return false;
+
+tad_found:
+	res->sktways = SKX_TAD_SKTWAYS(wayness);
+	res->chanways = SKX_TAD_CHNWAYS(wayness);
+	skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)];
+	chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)];
+
+	SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset);
+	channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset);
+
+	if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) {
+		/* Must handle channel first, then socket */
+		channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+						 res->chanways, channel_addr);
+		channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+						 res->sktways, channel_addr);
+	} else {
+		/* Handle socket then channel. Preserve low bits from original address */
+		channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit,
+						 res->sktways, res->addr);
+		channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit,
+						 res->chanways, res->addr);
+	}
+
+	res->chan_addr = channel_addr;
+
+	edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n",
+		 res->addr, res->chan_addr, res->sktways, res->chanways);
+	return true;
+}
+
+#define SKX_MAX_RIR 4
+
+#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg)		\
+	pci_read_config_dword((d)->imc[mc].chan[ch].cdev,	\
+			      0x108 + 4 * (i), &reg)
+#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg)		\
+	pci_read_config_dword((d)->imc[mc].chan[ch].cdev,	\
+			      0x120 + 16 * idx + 4 * (i), &reg)
+
+#define	SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31)
+#define	SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29)
+#define	SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29))
+#define	SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19)
+#define	SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26))
+
+static bool skx_rir_decode(struct decoded_addr *res)
+{
+	int i, idx, chan_rank;
+	int shift;
+	u32 rirway, rirlv;
+	u64 rank_addr, prev_limit = 0, limit;
+
+	if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg)
+		shift = 6;
+	else
+		shift = 13;
+
+	for (i = 0; i < SKX_MAX_RIR; i++) {
+		SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway);
+		limit = SKX_RIR_LIMIT(rirway);
+		if (SKX_RIR_VALID(rirway)) {
+			if (prev_limit <= res->chan_addr &&
+			    res->chan_addr <= limit)
+				goto rir_found;
+		}
+		prev_limit = limit;
+	}
+	edac_dbg(0, "No RIR entry for %llx\n", res->addr);
+	return false;
+
+rir_found:
+	rank_addr = res->chan_addr >> shift;
+	rank_addr /= SKX_RIR_WAYS(rirway);
+	rank_addr <<= shift;
+	rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0);
+
+	res->rank_address = rank_addr;
+	idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway);
+
+	SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv);
+	res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv);
+	chan_rank = SKX_RIR_CHAN_RANK(rirlv);
+	res->channel_rank = chan_rank;
+	res->dimm = chan_rank / 4;
+	res->rank = chan_rank % 4;
+
+	edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n",
+		 res->addr, res->dimm, res->rank,
+		 res->channel_rank, res->rank_address);
+	return true;
+}
+
+static u8 skx_close_row[] = {
+	15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
+};
+static u8 skx_close_column[] = {
+	3, 4, 5, 14, 19, 23, 24, 25, 26, 27
+};
+static u8 skx_open_row[] = {
+	14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
+};
+static u8 skx_open_column[] = {
+	3, 4, 5, 6, 7, 8, 9, 10, 11, 12
+};
+static u8 skx_open_fine_column[] = {
+	3, 4, 5, 7, 8, 9, 10, 11, 12, 13
+};
+
+static int skx_bits(u64 addr, int nbits, u8 *bits)
+{
+	int i, res = 0;
+
+	for (i = 0; i < nbits; i++)
+		res |= ((addr >> bits[i]) & 1) << i;
+	return res;
+}
+
+static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
+{
+	int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
+
+	if (do_xor)
+		ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
+
+	return ret;
+}
+
+static bool skx_mad_decode(struct decoded_addr *r)
+{
+	struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm];
+	int bg0 = dimm->fine_grain_bank ? 6 : 13;
+
+	if (dimm->close_pg) {
+		r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row);
+		r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column);
+		r->column |= 0x400; /* C10 is autoprecharge, always set */
+		r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28);
+		r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21);
+	} else {
+		r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row);
+		if (dimm->fine_grain_bank)
+			r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column);
+		else
+			r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column);
+		r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23);
+		r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21);
+	}
+	r->row &= (1u << dimm->rowbits) - 1;
+
+	edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n",
+		 r->addr, r->row, r->column, r->bank_address,
+		 r->bank_group);
+	return true;
+}
+
+static bool skx_decode(struct decoded_addr *res)
+{
+
+	return skx_sad_decode(res) && skx_tad_decode(res) &&
+		skx_rir_decode(res) && skx_mad_decode(res);
+}
+
+#ifdef CONFIG_EDAC_DEBUG
+/*
+ * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr.
+ * Write an address to this file to exercise the address decode
+ * logic in this driver.
+ */
+static struct dentry *skx_test;
+static u64 skx_fake_addr;
+
+static int debugfs_u64_set(void *data, u64 val)
+{
+	struct decoded_addr res;
+
+	res.addr = val;
+	skx_decode(&res);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
+
+static struct dentry *mydebugfs_create(const char *name, umode_t mode,
+				       struct dentry *parent, u64 *value)
+{
+	return debugfs_create_file(name, mode, parent, value, &fops_u64_wo);
+}
+
+static void setup_skx_debug(void)
+{
+	skx_test = debugfs_create_dir("skx_edac_test", NULL);
+	mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr);
+}
+
+static void teardown_skx_debug(void)
+{
+	debugfs_remove_recursive(skx_test);
+}
+#else
+static void setup_skx_debug(void)
+{
+}
+
+static void teardown_skx_debug(void)
+{
+}
+#endif /*CONFIG_EDAC_DEBUG*/
+
+static void skx_mce_output_error(struct mem_ctl_info *mci,
+				 const struct mce *m,
+				 struct decoded_addr *res)
+{
+	enum hw_event_mc_err_type tp_event;
+	char *type, *optype, msg[256];
+	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
+	bool overflow = GET_BITFIELD(m->status, 62, 62);
+	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
+	bool recoverable;
+	u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
+	u32 mscod = GET_BITFIELD(m->status, 16, 31);
+	u32 errcode = GET_BITFIELD(m->status, 0, 15);
+	u32 optypenum = GET_BITFIELD(m->status, 4, 6);
+
+	recoverable = GET_BITFIELD(m->status, 56, 56);
+
+	if (uncorrected_error) {
+		if (ripv) {
+			type = "FATAL";
+			tp_event = HW_EVENT_ERR_FATAL;
+		} else {
+			type = "NON_FATAL";
+			tp_event = HW_EVENT_ERR_UNCORRECTED;
+		}
+	} else {
+		type = "CORRECTED";
+		tp_event = HW_EVENT_ERR_CORRECTED;
+	}
+
+	/*
+	 * According with Table 15-9 of the Intel Architecture spec vol 3A,
+	 * memory errors should fit in this mask:
+	 *	000f 0000 1mmm cccc (binary)
+	 * where:
+	 *	f = Correction Report Filtering Bit. If 1, subsequent errors
+	 *	    won't be shown
+	 *	mmm = error type
+	 *	cccc = channel
+	 * If the mask doesn't match, report an error to the parsing logic
+	 */
+	if (!((errcode & 0xef80) == 0x80)) {
+		optype = "Can't parse: it is not a mem";
+	} else {
+		switch (optypenum) {
+		case 0:
+			optype = "generic undef request error";
+			break;
+		case 1:
+			optype = "memory read error";
+			break;
+		case 2:
+			optype = "memory write error";
+			break;
+		case 3:
+			optype = "addr/cmd error";
+			break;
+		case 4:
+			optype = "memory scrubbing error";
+			break;
+		default:
+			optype = "reserved";
+			break;
+		}
+	}
+
+	snprintf(msg, sizeof(msg),
+		 "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
+		 overflow ? " OVERFLOW" : "",
+		 (uncorrected_error && recoverable) ? " recoverable" : "",
+		 mscod, errcode,
+		 res->socket, res->imc, res->rank,
+		 res->bank_group, res->bank_address, res->row, res->column);
+
+	edac_dbg(0, "%s\n", msg);
+
+	/* Call the helper to output message */
+	edac_mc_handle_error(tp_event, mci, core_err_cnt,
+			     m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
+			     res->channel, res->dimm, -1,
+			     optype, msg);
+}
+
+static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
+			       void *data)
+{
+	struct mce *mce = (struct mce *)data;
+	struct decoded_addr res;
+	struct mem_ctl_info *mci;
+	char *type;
+
+	if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
+		return NOTIFY_DONE;
+
+	/* ignore unless this is memory related with an address */
+	if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
+		return NOTIFY_DONE;
+
+	res.addr = mce->addr;
+	if (!skx_decode(&res))
+		return NOTIFY_DONE;
+	mci = res.dev->imc[res.imc].mci;
+
+	if (mce->mcgstatus & MCG_STATUS_MCIP)
+		type = "Exception";
+	else
+		type = "Event";
+
+	skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
+
+	skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx "
+			  "Bank %d: %016Lx\n", mce->extcpu, type,
+			  mce->mcgstatus, mce->bank, mce->status);
+	skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc);
+	skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr);
+	skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc);
+
+	skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET "
+			  "%u APIC %x\n", mce->cpuvendor, mce->cpuid,
+			  mce->time, mce->socketid, mce->apicid);
+
+	skx_mce_output_error(mci, mce, &res);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block skx_mce_dec = {
+	.notifier_call = skx_mce_check_error,
+};
+
+static void skx_remove(void)
+{
+	int i, j;
+	struct skx_dev *d, *tmp;
+
+	edac_dbg(0, "\n");
+
+	list_for_each_entry_safe(d, tmp, &skx_edac_list, list) {
+		list_del(&d->list);
+		for (i = 0; i < NUM_IMC; i++) {
+			skx_unregister_mci(&d->imc[i]);
+			for (j = 0; j < NUM_CHANNELS; j++)
+				pci_dev_put(d->imc[i].chan[j].cdev);
+		}
+		pci_dev_put(d->util_all);
+		pci_dev_put(d->sad_all);
+
+		kfree(d);
+	}
+}
+
+/*
+ * skx_init:
+ *	make sure we are running on the correct cpu model
+ *	search for all the devices we need
+ *	check which DIMMs are present.
+ */
+int __init skx_init(void)
+{
+	const struct x86_cpu_id *id;
+	const struct munit *m;
+	int rc = 0, i;
+	u8 mc = 0, src_id, node_id;
+	struct skx_dev *d;
+
+	edac_dbg(2, "\n");
+
+	id = x86_match_cpu(skx_cpuids);
+	if (!id)
+		return -ENODEV;
+
+	rc = skx_get_hi_lo();
+	if (rc)
+		return rc;
+
+	rc = get_all_bus_mappings();
+	if (rc < 0)
+		goto fail;
+	if (rc == 0) {
+		edac_dbg(2, "No memory controllers found\n");
+		return -ENODEV;
+	}
+
+	for (m = skx_all_munits; m->did; m++) {
+		rc = get_all_munits(m);
+		if (rc < 0)
+			goto fail;
+		if (rc != m->per_socket * skx_num_sockets) {
+			edac_dbg(2, "Expected %d, got %d of %x\n",
+				 m->per_socket * skx_num_sockets, rc, m->did);
+			rc = -ENODEV;
+			goto fail;
+		}
+	}
+
+	list_for_each_entry(d, &skx_edac_list, list) {
+		src_id = get_src_id(d);
+		node_id = skx_get_node_id(d);
+		edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
+		for (i = 0; i < NUM_IMC; i++) {
+			d->imc[i].mc = mc++;
+			d->imc[i].lmc = i;
+			d->imc[i].src_id = src_id;
+			d->imc[i].node_id = node_id;
+			rc = skx_register_mci(&d->imc[i]);
+			if (rc < 0)
+				goto fail;
+		}
+	}
+
+	/* Ensure that the OPSTATE is set correctly for POLL or NMI */
+	opstate_init();
+
+	setup_skx_debug();
+
+	mce_register_decode_chain(&skx_mce_dec);
+
+	return 0;
+fail:
+	skx_remove();
+	return rc;
+}
+
+static void __exit skx_exit(void)
+{
+	edac_dbg(2, "\n");
+	mce_unregister_decode_chain(&skx_mce_dec);
+	skx_remove();
+	teardown_skx_debug();
+}
+
+module_init(skx_init);
+module_exit(skx_exit);
+
+module_param(edac_op_state, int, 0444);
+MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Tony Luck");
+MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors");
diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c
index 1b8c07e..2a9a11a 100644
--- a/drivers/edac/wq.c
+++ b/drivers/edac/wq.c
@@ -27,7 +27,7 @@
 
 int edac_workqueue_setup(void)
 {
-	wq = create_singlethread_workqueue("edac-poller");
+	wq = alloc_ordered_workqueue("edac-poller", WQ_MEM_RECLAIM);
 	if (!wq)
 		return -ENODEV;
 	else
diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index 3d89e60..04788d9 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -96,6 +96,12 @@
 	  Say Y here to enable support for USB peripheral and USB host
 	  detection by palmas usb.
 
+config EXTCON_QCOM_SPMI_MISC
+	tristate "Qualcomm USB extcon support"
+	help
+	  Say Y here to enable SPMI PMIC based USB cable detection
+	  support on Qualcomm PMICs such as PM8941.
+
 config EXTCON_RT8973A
 	tristate "Richtek RT8973A EXTCON support"
 	depends on I2C
diff --git a/drivers/extcon/Makefile b/drivers/extcon/Makefile
index 972c813..31a0a99 100644
--- a/drivers/extcon/Makefile
+++ b/drivers/extcon/Makefile
@@ -14,6 +14,7 @@
 obj-$(CONFIG_EXTCON_MAX77843)	+= extcon-max77843.o
 obj-$(CONFIG_EXTCON_MAX8997)	+= extcon-max8997.o
 obj-$(CONFIG_EXTCON_PALMAS)	+= extcon-palmas.o
+obj-$(CONFIG_EXTCON_QCOM_SPMI_MISC) += extcon-qcom-spmi-misc.o
 obj-$(CONFIG_EXTCON_RT8973A)	+= extcon-rt8973a.o
 obj-$(CONFIG_EXTCON_SM5502)	+= extcon-sm5502.o
 obj-$(CONFIG_EXTCON_USB_GPIO)	+= extcon-usb-gpio.o
diff --git a/drivers/extcon/extcon-adc-jack.c b/drivers/extcon/extcon-adc-jack.c
index 44e48aa..bc53870 100644
--- a/drivers/extcon/extcon-adc-jack.c
+++ b/drivers/extcon/extcon-adc-jack.c
@@ -3,6 +3,9 @@
  *
  * Analog Jack extcon driver with ADC-based detection capability.
  *
+ * Copyright (C) 2016 Samsung Electronics
+ * Chanwoo Choi <cw00.choi@samsung.com>
+ *
  * Copyright (C) 2012 Samsung Electronics
  * MyungJoo Ham <myungjoo.ham@samsung.com>
  *
@@ -58,7 +61,7 @@
 	struct adc_jack_data *data = container_of(to_delayed_work(work),
 			struct adc_jack_data,
 			handler);
-	u32 state = 0;
+	struct adc_jack_cond *def;
 	int ret, adc_val;
 	int i;
 
@@ -70,17 +73,18 @@
 
 	/* Get state from adc value with adc_conditions */
 	for (i = 0; i < data->num_conditions; i++) {
-		struct adc_jack_cond *def = &data->adc_conditions[i];
-		if (!def->state)
-			break;
+		def = &data->adc_conditions[i];
 		if (def->min_adc <= adc_val && def->max_adc >= adc_val) {
-			state = def->state;
-			break;
+			extcon_set_state_sync(data->edev, def->id, true);
+			return;
 		}
 	}
-	/* if no def has met, it means state = 0 (no cables attached) */
 
-	extcon_set_state(data->edev, state);
+	/* Set the detached state if adc value is not included in the range */
+	for (i = 0; i < data->num_conditions; i++) {
+		def = &data->adc_conditions[i];
+		extcon_set_state_sync(data->edev, def->id, false);
+	}
 }
 
 static irqreturn_t adc_jack_irq_thread(int irq, void *_data)
@@ -114,16 +118,14 @@
 		return -ENOMEM;
 	}
 
-	if (!pdata->adc_conditions ||
-			!pdata->adc_conditions[0].state) {
+	if (!pdata->adc_conditions) {
 		dev_err(&pdev->dev, "error: adc_conditions not defined.\n");
 		return -EINVAL;
 	}
 	data->adc_conditions = pdata->adc_conditions;
 
 	/* Check the length of array and set num_conditions */
-	for (i = 0; data->adc_conditions[i].state; i++)
-		;
+	for (i = 0; data->adc_conditions[i].id != EXTCON_NONE; i++);
 	data->num_conditions = i;
 
 	data->chan = iio_channel_get(&pdev->dev, pdata->consumer_channel);
@@ -158,6 +160,7 @@
 	if (data->wakeup_source)
 		device_init_wakeup(&pdev->dev, 1);
 
+	adc_jack_handler(&data->handler.work);
 	return 0;
 }
 
diff --git a/drivers/extcon/extcon-arizona.c b/drivers/extcon/extcon-arizona.c
index 1d8e0a5..56e6c4c 100644
--- a/drivers/extcon/extcon-arizona.c
+++ b/drivers/extcon/extcon-arizona.c
@@ -183,7 +183,7 @@
 		if (clamp)
 			val = ARIZONA_RMV_SHRT_HP1L;
 		break;
-	};
+	}
 
 	snd_soc_dapm_mutex_lock(arizona->dapm);
 
@@ -614,7 +614,7 @@
 	}
 
 	/* If the cable was removed while measuring ignore the result */
-	ret = extcon_get_cable_state_(info->edev, EXTCON_MECHANICAL);
+	ret = extcon_get_state(info->edev, EXTCON_MECHANICAL);
 	if (ret < 0) {
 		dev_err(arizona->dev, "Failed to check cable state: %d\n",
 			ret);
@@ -649,7 +649,7 @@
 	else
 		report = EXTCON_JACK_HEADPHONE;
 
-	ret = extcon_set_cable_state_(info->edev, report, true);
+	ret = extcon_set_state_sync(info->edev, report, true);
 	if (ret != 0)
 		dev_err(arizona->dev, "Failed to report HP/line: %d\n",
 			ret);
@@ -732,7 +732,7 @@
 			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
 
 	/* Just report headphone */
-	ret = extcon_set_cable_state_(info->edev, EXTCON_JACK_HEADPHONE, true);
+	ret = extcon_set_state_sync(info->edev, EXTCON_JACK_HEADPHONE, true);
 	if (ret != 0)
 		dev_err(arizona->dev, "Failed to report headphone: %d\n", ret);
 
@@ -789,7 +789,7 @@
 			   ARIZONA_ACCDET_MODE_MASK, ARIZONA_ACCDET_MODE_MIC);
 
 	/* Just report headphone */
-	ret = extcon_set_cable_state_(info->edev, EXTCON_JACK_HEADPHONE, true);
+	ret = extcon_set_state_sync(info->edev, EXTCON_JACK_HEADPHONE, true);
 	if (ret != 0)
 		dev_err(arizona->dev, "Failed to report headphone: %d\n", ret);
 
@@ -829,7 +829,7 @@
 	mutex_lock(&info->lock);
 
 	/* If the cable was removed while measuring ignore the result */
-	ret = extcon_get_cable_state_(info->edev, EXTCON_MECHANICAL);
+	ret = extcon_get_state(info->edev, EXTCON_MECHANICAL);
 	if (ret < 0) {
 		dev_err(arizona->dev, "Failed to check cable state: %d\n",
 				ret);
@@ -914,7 +914,7 @@
 
 		arizona_identify_headphone(info);
 
-		ret = extcon_set_cable_state_(info->edev,
+		ret = extcon_set_state_sync(info->edev,
 					      EXTCON_JACK_MICROPHONE, true);
 		if (ret != 0)
 			dev_err(arizona->dev, "Headset report failed: %d\n",
@@ -1108,7 +1108,7 @@
 
 	if (info->last_jackdet == present) {
 		dev_dbg(arizona->dev, "Detected jack\n");
-		ret = extcon_set_cable_state_(info->edev,
+		ret = extcon_set_state_sync(info->edev,
 					      EXTCON_MECHANICAL, true);
 
 		if (ret != 0)
@@ -1149,10 +1149,13 @@
 					 info->micd_ranges[i].key, 0);
 		input_sync(info->input);
 
-		ret = extcon_update_state(info->edev, 0xffffffff, 0);
-		if (ret != 0)
-			dev_err(arizona->dev, "Removal report failed: %d\n",
-				ret);
+		for (i = 0; i < ARRAY_SIZE(arizona_cable) - 1; i++) {
+			ret = extcon_set_state_sync(info->edev,
+					arizona_cable[i], false);
+			if (ret != 0)
+				dev_err(arizona->dev,
+					"Removal report failed: %d\n", ret);
+		}
 
 		regmap_update_bits(arizona->regmap,
 				   ARIZONA_JACK_DETECT_DEBOUNCE,
diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c
index fd55c2f..42f41e8 100644
--- a/drivers/extcon/extcon-axp288.c
+++ b/drivers/extcon/extcon-axp288.c
@@ -189,19 +189,19 @@
 
 	switch (chrg_type) {
 	case DET_STAT_SDP:
-		dev_dbg(info->dev, "sdp cable is connecetd\n");
+		dev_dbg(info->dev, "sdp cable is connected\n");
 		notify_otg = true;
 		notify_charger = true;
 		cable = EXTCON_CHG_USB_SDP;
 		break;
 	case DET_STAT_CDP:
-		dev_dbg(info->dev, "cdp cable is connecetd\n");
+		dev_dbg(info->dev, "cdp cable is connected\n");
 		notify_otg = true;
 		notify_charger = true;
 		cable = EXTCON_CHG_USB_CDP;
 		break;
 	case DET_STAT_DCP:
-		dev_dbg(info->dev, "dcp cable is connecetd\n");
+		dev_dbg(info->dev, "dcp cable is connected\n");
 		notify_charger = true;
 		cable = EXTCON_CHG_USB_DCP;
 		break;
@@ -226,7 +226,7 @@
 	}
 
 	if (notify_charger)
-		extcon_set_cable_state_(info->edev, cable, vbus_attach);
+		extcon_set_state_sync(info->edev, cable, vbus_attach);
 
 	/* Clear the flags on disconnect event */
 	if (!vbus_attach)
diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index d023789..ebed22f 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -49,7 +49,8 @@
 	state = gpiod_get_value_cansleep(data->id_gpiod);
 	if (data->pdata->gpio_active_low)
 		state = !state;
-	extcon_set_state(data->edev, state);
+
+	extcon_set_state_sync(data->edev, data->pdata->extcon_id, state);
 }
 
 static irqreturn_t gpio_irq_handler(int irq, void *dev_id)
diff --git a/drivers/extcon/extcon-max14577.c b/drivers/extcon/extcon-max14577.c
index 852a711..12e26c4 100644
--- a/drivers/extcon/extcon-max14577.c
+++ b/drivers/extcon/extcon-max14577.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2013,2014 Samsung Electronics
  * Chanwoo Choi <cw00.choi@samsung.com>
- * Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ * Krzysztof Kozlowski <krzk@kernel.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -357,7 +357,7 @@
 	if (ret < 0)
 		return ret;
 
-	extcon_set_cable_state_(info->edev, EXTCON_JIG, attached);
+	extcon_set_state_sync(info->edev, EXTCON_JIG, attached);
 
 	return 0;
 }
@@ -454,24 +454,24 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_USB, attached);
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+		extcon_set_state_sync(info->edev, EXTCON_USB, attached);
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_DEDICATED_CHG:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_DCP,
 					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_DOWNSTREAM_PORT:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_CDP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_CDP,
 					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_SPECIAL_500MA:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SLOW,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SLOW,
 					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_SPECIAL_1A:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_FAST,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_FAST,
 					attached);
 		break;
 	case MAX14577_CHARGER_TYPE_NONE:
@@ -791,6 +791,6 @@
 module_platform_driver(max14577_muic_driver);
 
 MODULE_DESCRIPTION("Maxim 14577/77836 Extcon driver");
-MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>, Krzysztof Kozlowski <k.kozlowski@samsung.com>");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>, Krzysztof Kozlowski <krzk@kernel.org>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:extcon-max14577");
diff --git a/drivers/extcon/extcon-max3355.c b/drivers/extcon/extcon-max3355.c
index c24abec..533e16a 100644
--- a/drivers/extcon/extcon-max3355.c
+++ b/drivers/extcon/extcon-max3355.c
@@ -39,16 +39,16 @@
 		 * As we don't have event for USB peripheral cable attached,
 		 * we simulate USB peripheral attach here.
 		 */
-		extcon_set_cable_state_(data->edev, EXTCON_USB_HOST, false);
-		extcon_set_cable_state_(data->edev, EXTCON_USB, true);
+		extcon_set_state_sync(data->edev, EXTCON_USB_HOST, false);
+		extcon_set_state_sync(data->edev, EXTCON_USB, true);
 	} else {
 		/*
 		 * ID = 0 means USB HOST cable attached.
 		 * As we don't have event for USB peripheral cable detached,
 		 * we simulate USB peripheral detach here.
 		 */
-		extcon_set_cable_state_(data->edev, EXTCON_USB, false);
-		extcon_set_cable_state_(data->edev, EXTCON_USB_HOST, true);
+		extcon_set_state_sync(data->edev, EXTCON_USB, false);
+		extcon_set_state_sync(data->edev, EXTCON_USB_HOST, true);
 	}
 
 	return IRQ_HANDLED;
diff --git a/drivers/extcon/extcon-max77693.c b/drivers/extcon/extcon-max77693.c
index f17cb76..68dbcb8 100644
--- a/drivers/extcon/extcon-max77693.c
+++ b/drivers/extcon/extcon-max77693.c
@@ -505,8 +505,8 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_DOCK, attached);
-		extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL, attached);
+		extcon_set_state_sync(info->edev, EXTCON_DOCK, attached);
+		extcon_set_state_sync(info->edev, EXTCON_DISP_MHL, attached);
 		goto out;
 	case MAX77693_MUIC_ADC_AUDIO_MODE_REMOTE:	/* Dock-Desk */
 		dock_id = EXTCON_DOCK;
@@ -514,8 +514,8 @@
 	case MAX77693_MUIC_ADC_AV_CABLE_NOLOAD:		/* Dock-Audio */
 		dock_id = EXTCON_DOCK;
 		if (!attached) {
-			extcon_set_cable_state_(info->edev, EXTCON_USB, false);
-			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+			extcon_set_state_sync(info->edev, EXTCON_USB, false);
+			extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 						false);
 		}
 		break;
@@ -530,7 +530,7 @@
 					attached);
 	if (ret < 0)
 		return ret;
-	extcon_set_cable_state_(info->edev, dock_id, attached);
+	extcon_set_state_sync(info->edev, dock_id, attached);
 
 out:
 	return 0;
@@ -596,7 +596,7 @@
 						attached);
 		if (ret < 0)
 			return ret;
-		extcon_set_cable_state_(info->edev, EXTCON_USB_HOST, attached);
+		extcon_set_state_sync(info->edev, EXTCON_USB_HOST, attached);
 		break;
 	case MAX77693_MUIC_GND_AV_CABLE_LOAD:
 		/* Audio Video Cable with load, PATH:AUDIO */
@@ -604,14 +604,14 @@
 						attached);
 		if (ret < 0)
 			return ret;
-		extcon_set_cable_state_(info->edev, EXTCON_USB, attached);
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+		extcon_set_state_sync(info->edev, EXTCON_USB, attached);
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 					attached);
 		break;
 	case MAX77693_MUIC_GND_MHL:
 	case MAX77693_MUIC_GND_MHL_VB:
 		/* MHL or MHL with USB/TA cable */
-		extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL, attached);
+		extcon_set_state_sync(info->edev, EXTCON_DISP_MHL, attached);
 		break;
 	default:
 		dev_err(info->dev, "failed to detect %s cable of gnd type\n",
@@ -653,7 +653,7 @@
 	if (ret < 0)
 		return ret;
 
-	extcon_set_cable_state_(info->edev, EXTCON_JIG, attached);
+	extcon_set_state_sync(info->edev, EXTCON_JIG, attached);
 
 	return 0;
 }
@@ -807,10 +807,10 @@
 			 * - Support charging through micro-usb port without
 			 *   data connection
 			 */
-			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+			extcon_set_state_sync(info->edev, EXTCON_CHG_USB_DCP,
 						attached);
 			if (!cable_attached)
-				extcon_set_cable_state_(info->edev,
+				extcon_set_state_sync(info->edev,
 					EXTCON_DISP_MHL, cable_attached);
 			break;
 		}
@@ -834,13 +834,13 @@
 			 * - Support charging through micro-usb port without
 			 *   data connection.
 			 */
-			extcon_set_cable_state_(info->edev, EXTCON_USB,
+			extcon_set_state_sync(info->edev, EXTCON_USB,
 						attached);
-			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+			extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 						attached);
 
 			if (!cable_attached)
-				extcon_set_cable_state_(info->edev, EXTCON_DOCK,
+				extcon_set_state_sync(info->edev, EXTCON_DOCK,
 							cable_attached);
 			break;
 		case MAX77693_MUIC_ADC_RESERVED_ACC_3:		/* Dock-Smart */
@@ -869,9 +869,9 @@
 			if (ret < 0)
 				return ret;
 
-			extcon_set_cable_state_(info->edev, EXTCON_DOCK,
+			extcon_set_state_sync(info->edev, EXTCON_DOCK,
 						attached);
-			extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL,
+			extcon_set_state_sync(info->edev, EXTCON_DISP_MHL,
 						attached);
 			break;
 		}
@@ -905,28 +905,28 @@
 			if (ret < 0)
 				return ret;
 
-			extcon_set_cable_state_(info->edev, EXTCON_USB,
+			extcon_set_state_sync(info->edev, EXTCON_USB,
 						attached);
-			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+			extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 						attached);
 			break;
 		case MAX77693_CHARGER_TYPE_DEDICATED_CHG:
 			/* Only TA cable */
-			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+			extcon_set_state_sync(info->edev, EXTCON_CHG_USB_DCP,
 						attached);
 			break;
 		}
 		break;
 	case MAX77693_CHARGER_TYPE_DOWNSTREAM_PORT:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_CDP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_CDP,
 					attached);
 		break;
 	case MAX77693_CHARGER_TYPE_APPLE_500MA:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SLOW,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SLOW,
 					attached);
 		break;
 	case MAX77693_CHARGER_TYPE_APPLE_1A_2A:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_FAST,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_FAST,
 					attached);
 		break;
 	case MAX77693_CHARGER_TYPE_DEAD_BATTERY:
diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c
index b188bd6..5d11fdf 100644
--- a/drivers/extcon/extcon-max77843.c
+++ b/drivers/extcon/extcon-max77843.c
@@ -346,7 +346,7 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_USB_HOST, attached);
+		extcon_set_state_sync(info->edev, EXTCON_USB_HOST, attached);
 		break;
 	case MAX77843_MUIC_GND_MHL_VB:
 	case MAX77843_MUIC_GND_MHL:
@@ -356,7 +356,7 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL, attached);
+		extcon_set_state_sync(info->edev, EXTCON_DISP_MHL, attached);
 		break;
 	default:
 		dev_err(info->dev, "failed to detect %s accessory(gnd:0x%x)\n",
@@ -392,7 +392,7 @@
 	if (ret < 0)
 		return ret;
 
-	extcon_set_cable_state_(info->edev, EXTCON_JIG, attached);
+	extcon_set_state_sync(info->edev, EXTCON_JIG, attached);
 
 	return 0;
 }
@@ -486,8 +486,8 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_USB, attached);
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+		extcon_set_state_sync(info->edev, EXTCON_USB, attached);
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 					attached);
 		break;
 	case MAX77843_MUIC_CHG_DOWNSTREAM:
@@ -497,7 +497,7 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_CDP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_CDP,
 					attached);
 		break;
 	case MAX77843_MUIC_CHG_DEDICATED:
@@ -507,7 +507,7 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_DCP,
 					attached);
 		break;
 	case MAX77843_MUIC_CHG_SPECIAL_500MA:
@@ -517,7 +517,7 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SLOW,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SLOW,
 					attached);
 		break;
 	case MAX77843_MUIC_CHG_SPECIAL_1A:
@@ -527,7 +527,7 @@
 		if (ret < 0)
 			return ret;
 
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_FAST,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_FAST,
 					attached);
 		break;
 	case MAX77843_MUIC_CHG_GND:
@@ -536,10 +536,10 @@
 
 		/* Charger cable on MHL accessory is attach or detach */
 		if (gnd_type == MAX77843_MUIC_GND_MHL_VB)
-			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+			extcon_set_state_sync(info->edev, EXTCON_CHG_USB_DCP,
 						true);
 		else if (gnd_type == MAX77843_MUIC_GND_MHL)
-			extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+			extcon_set_state_sync(info->edev, EXTCON_CHG_USB_DCP,
 						false);
 		break;
 	case MAX77843_MUIC_CHG_NONE:
diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c
index 9a89320..4a0612f 100644
--- a/drivers/extcon/extcon-max8997.c
+++ b/drivers/extcon/extcon-max8997.c
@@ -331,11 +331,11 @@
 
 	switch (usb_type) {
 	case MAX8997_USB_HOST:
-		extcon_set_cable_state_(info->edev, EXTCON_USB_HOST, attached);
+		extcon_set_state_sync(info->edev, EXTCON_USB_HOST, attached);
 		break;
 	case MAX8997_USB_DEVICE:
-		extcon_set_cable_state_(info->edev, EXTCON_USB, attached);
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+		extcon_set_state_sync(info->edev, EXTCON_USB, attached);
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 					attached);
 		break;
 	default:
@@ -361,7 +361,7 @@
 	switch (cable_type) {
 	case MAX8997_MUIC_ADC_AV_CABLE_NOLOAD:
 	case MAX8997_MUIC_ADC_FACTORY_MODE_UART_ON:
-		extcon_set_cable_state_(info->edev, EXTCON_DOCK, attached);
+		extcon_set_state_sync(info->edev, EXTCON_DOCK, attached);
 		break;
 	default:
 		dev_err(info->dev, "failed to detect %s dock device\n",
@@ -384,7 +384,7 @@
 		return ret;
 	}
 
-	extcon_set_cable_state_(info->edev, EXTCON_JIG, attached);
+	extcon_set_state_sync(info->edev, EXTCON_JIG, attached);
 
 	return 0;
 }
@@ -406,7 +406,7 @@
 			return ret;
 		break;
 	case MAX8997_MUIC_ADC_MHL:
-		extcon_set_cable_state_(info->edev, EXTCON_DISP_MHL, attached);
+		extcon_set_state_sync(info->edev, EXTCON_DISP_MHL, attached);
 		break;
 	case MAX8997_MUIC_ADC_FACTORY_MODE_USB_OFF:
 	case MAX8997_MUIC_ADC_FACTORY_MODE_USB_ON:
@@ -489,19 +489,19 @@
 		}
 		break;
 	case MAX8997_CHARGER_TYPE_DOWNSTREAM_PORT:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_CDP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_CDP,
 					attached);
 		break;
 	case MAX8997_CHARGER_TYPE_DEDICATED_CHG:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_DCP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_DCP,
 					attached);
 		break;
 	case MAX8997_CHARGER_TYPE_500MA:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SLOW,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SLOW,
 					attached);
 		break;
 	case MAX8997_CHARGER_TYPE_1A:
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_FAST,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_FAST,
 					attached);
 		break;
 	default:
diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c
index caff46c..634ba70 100644
--- a/drivers/extcon/extcon-palmas.c
+++ b/drivers/extcon/extcon-palmas.c
@@ -61,7 +61,7 @@
 	if (vbus_line_state & PALMAS_INT3_LINE_STATE_VBUS) {
 		if (palmas_usb->linkstat != PALMAS_USB_STATE_VBUS) {
 			palmas_usb->linkstat = PALMAS_USB_STATE_VBUS;
-			extcon_set_cable_state_(edev, EXTCON_USB, true);
+			extcon_set_state_sync(edev, EXTCON_USB, true);
 			dev_info(palmas_usb->dev, "USB cable is attached\n");
 		} else {
 			dev_dbg(palmas_usb->dev,
@@ -70,7 +70,7 @@
 	} else if (!(vbus_line_state & PALMAS_INT3_LINE_STATE_VBUS)) {
 		if (palmas_usb->linkstat == PALMAS_USB_STATE_VBUS) {
 			palmas_usb->linkstat = PALMAS_USB_STATE_DISCONNECT;
-			extcon_set_cable_state_(edev, EXTCON_USB, false);
+			extcon_set_state_sync(edev, EXTCON_USB, false);
 			dev_info(palmas_usb->dev, "USB cable is detached\n");
 		} else {
 			dev_dbg(palmas_usb->dev,
@@ -98,7 +98,7 @@
 			PALMAS_USB_ID_INT_LATCH_CLR,
 			PALMAS_USB_ID_INT_EN_HI_CLR_ID_GND);
 		palmas_usb->linkstat = PALMAS_USB_STATE_ID;
-		extcon_set_cable_state_(edev, EXTCON_USB_HOST, true);
+		extcon_set_state_sync(edev, EXTCON_USB_HOST, true);
 		dev_info(palmas_usb->dev, "USB-HOST cable is attached\n");
 	} else if ((set & PALMAS_USB_ID_INT_SRC_ID_FLOAT) &&
 				(id_src & PALMAS_USB_ID_INT_SRC_ID_FLOAT)) {
@@ -106,17 +106,17 @@
 			PALMAS_USB_ID_INT_LATCH_CLR,
 			PALMAS_USB_ID_INT_EN_HI_CLR_ID_FLOAT);
 		palmas_usb->linkstat = PALMAS_USB_STATE_DISCONNECT;
-		extcon_set_cable_state_(edev, EXTCON_USB_HOST, false);
+		extcon_set_state_sync(edev, EXTCON_USB_HOST, false);
 		dev_info(palmas_usb->dev, "USB-HOST cable is detached\n");
 	} else if ((palmas_usb->linkstat == PALMAS_USB_STATE_ID) &&
 				(!(set & PALMAS_USB_ID_INT_SRC_ID_GND))) {
 		palmas_usb->linkstat = PALMAS_USB_STATE_DISCONNECT;
-		extcon_set_cable_state_(edev, EXTCON_USB_HOST, false);
+		extcon_set_state_sync(edev, EXTCON_USB_HOST, false);
 		dev_info(palmas_usb->dev, "USB-HOST cable is detached\n");
 	} else if ((palmas_usb->linkstat == PALMAS_USB_STATE_DISCONNECT) &&
 				(id_src & PALMAS_USB_ID_INT_SRC_ID_GND)) {
 		palmas_usb->linkstat = PALMAS_USB_STATE_ID;
-		extcon_set_cable_state_(edev, EXTCON_USB_HOST, true);
+		extcon_set_state_sync(edev, EXTCON_USB_HOST, true);
 		dev_info(palmas_usb->dev, " USB-HOST cable is attached\n");
 	}
 
@@ -137,10 +137,10 @@
 	id = gpiod_get_value_cansleep(palmas_usb->id_gpiod);
 
 	if (id) {
-		extcon_set_cable_state_(edev, EXTCON_USB_HOST, false);
+		extcon_set_state_sync(edev, EXTCON_USB_HOST, false);
 		dev_info(palmas_usb->dev, "USB-HOST cable is detached\n");
 	} else {
-		extcon_set_cable_state_(edev, EXTCON_USB_HOST, true);
+		extcon_set_state_sync(edev, EXTCON_USB_HOST, true);
 		dev_info(palmas_usb->dev, "USB-HOST cable is attached\n");
 	}
 }
diff --git a/drivers/extcon/extcon-qcom-spmi-misc.c b/drivers/extcon/extcon-qcom-spmi-misc.c
new file mode 100644
index 0000000..ca957a5
--- /dev/null
+++ b/drivers/extcon/extcon-qcom-spmi-misc.c
@@ -0,0 +1,170 @@
+/**
+ * extcon-qcom-spmi-misc.c - Qualcomm USB extcon driver to support USB ID
+ *				detection based on extcon-usb-gpio.c.
+ *
+ * Copyright (C) 2016 Linaro, Ltd.
+ * Stephen Boyd <stephen.boyd@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/extcon.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+
+#define USB_ID_DEBOUNCE_MS	5	/* ms */
+
+struct qcom_usb_extcon_info {
+	struct extcon_dev *edev;
+	int irq;
+	struct delayed_work wq_detcable;
+	unsigned long debounce_jiffies;
+};
+
+static const unsigned int qcom_usb_extcon_cable[] = {
+	EXTCON_USB_HOST,
+	EXTCON_NONE,
+};
+
+static void qcom_usb_extcon_detect_cable(struct work_struct *work)
+{
+	bool id;
+	int ret;
+	struct qcom_usb_extcon_info *info = container_of(to_delayed_work(work),
+						    struct qcom_usb_extcon_info,
+						    wq_detcable);
+
+	/* check ID and update cable state */
+	ret = irq_get_irqchip_state(info->irq, IRQCHIP_STATE_LINE_LEVEL, &id);
+	if (ret)
+		return;
+
+	extcon_set_state(info->edev, EXTCON_USB_HOST, !id);
+}
+
+static irqreturn_t qcom_usb_irq_handler(int irq, void *dev_id)
+{
+	struct qcom_usb_extcon_info *info = dev_id;
+
+	queue_delayed_work(system_power_efficient_wq, &info->wq_detcable,
+			   info->debounce_jiffies);
+
+	return IRQ_HANDLED;
+}
+
+static int qcom_usb_extcon_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct qcom_usb_extcon_info *info;
+	int ret;
+
+	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->edev = devm_extcon_dev_allocate(dev, qcom_usb_extcon_cable);
+	if (IS_ERR(info->edev)) {
+		dev_err(dev, "failed to allocate extcon device\n");
+		return -ENOMEM;
+	}
+
+	ret = devm_extcon_dev_register(dev, info->edev);
+	if (ret < 0) {
+		dev_err(dev, "failed to register extcon device\n");
+		return ret;
+	}
+
+	info->debounce_jiffies = msecs_to_jiffies(USB_ID_DEBOUNCE_MS);
+	INIT_DELAYED_WORK(&info->wq_detcable, qcom_usb_extcon_detect_cable);
+
+	info->irq = platform_get_irq_byname(pdev, "usb_id");
+	if (info->irq < 0)
+		return info->irq;
+
+	ret = devm_request_threaded_irq(dev, info->irq, NULL,
+					qcom_usb_irq_handler,
+					IRQF_TRIGGER_RISING |
+					IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+					pdev->name, info);
+	if (ret < 0) {
+		dev_err(dev, "failed to request handler for ID IRQ\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, info);
+	device_init_wakeup(dev, 1);
+
+	/* Perform initial detection */
+	qcom_usb_extcon_detect_cable(&info->wq_detcable.work);
+
+	return 0;
+}
+
+static int qcom_usb_extcon_remove(struct platform_device *pdev)
+{
+	struct qcom_usb_extcon_info *info = platform_get_drvdata(pdev);
+
+	cancel_delayed_work_sync(&info->wq_detcable);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int qcom_usb_extcon_suspend(struct device *dev)
+{
+	struct qcom_usb_extcon_info *info = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (device_may_wakeup(dev))
+		ret = enable_irq_wake(info->irq);
+
+	return ret;
+}
+
+static int qcom_usb_extcon_resume(struct device *dev)
+{
+	struct qcom_usb_extcon_info *info = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (device_may_wakeup(dev))
+		ret = disable_irq_wake(info->irq);
+
+	return ret;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(qcom_usb_extcon_pm_ops,
+			 qcom_usb_extcon_suspend, qcom_usb_extcon_resume);
+
+static const struct of_device_id qcom_usb_extcon_dt_match[] = {
+	{ .compatible = "qcom,pm8941-misc", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, qcom_usb_extcon_dt_match);
+
+static struct platform_driver qcom_usb_extcon_driver = {
+	.probe		= qcom_usb_extcon_probe,
+	.remove		= qcom_usb_extcon_remove,
+	.driver		= {
+		.name	= "extcon-pm8941-misc",
+		.pm	= &qcom_usb_extcon_pm_ops,
+		.of_match_table = qcom_usb_extcon_dt_match,
+	},
+};
+module_platform_driver(qcom_usb_extcon_driver);
+
+MODULE_DESCRIPTION("QCOM USB ID extcon driver");
+MODULE_AUTHOR("Stephen Boyd <stephen.boyd@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/extcon/extcon-rt8973a.c b/drivers/extcon/extcon-rt8973a.c
index 97e074d..174c388 100644
--- a/drivers/extcon/extcon-rt8973a.c
+++ b/drivers/extcon/extcon-rt8973a.c
@@ -398,9 +398,9 @@
 		return ret;
 
 	/* Change the state of external accessory */
-	extcon_set_cable_state_(info->edev, id, attached);
+	extcon_set_state_sync(info->edev, id, attached);
 	if (id == EXTCON_USB)
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 					attached);
 
 	return 0;
diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c
index df769a1..b223256 100644
--- a/drivers/extcon/extcon-sm5502.c
+++ b/drivers/extcon/extcon-sm5502.c
@@ -411,9 +411,9 @@
 		return ret;
 
 	/* Change the state of external accessory */
-	extcon_set_cable_state_(info->edev, id, attached);
+	extcon_set_state_sync(info->edev, id, attached);
 	if (id == EXTCON_USB)
-		extcon_set_cable_state_(info->edev, EXTCON_CHG_USB_SDP,
+		extcon_set_state_sync(info->edev, EXTCON_CHG_USB_SDP,
 					attached);
 
 	return 0;
diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c
index 2512660..a27d350 100644
--- a/drivers/extcon/extcon-usb-gpio.c
+++ b/drivers/extcon/extcon-usb-gpio.c
@@ -63,16 +63,16 @@
 		 * As we don't have event for USB peripheral cable attached,
 		 * we simulate USB peripheral attach here.
 		 */
-		extcon_set_cable_state_(info->edev, EXTCON_USB_HOST, false);
-		extcon_set_cable_state_(info->edev, EXTCON_USB, true);
+		extcon_set_state_sync(info->edev, EXTCON_USB_HOST, false);
+		extcon_set_state_sync(info->edev, EXTCON_USB, true);
 	} else {
 		/*
 		 * ID = 0 means USB HOST cable attached.
 		 * As we don't have event for USB peripheral cable detached,
 		 * we simulate USB peripheral detach here.
 		 */
-		extcon_set_cable_state_(info->edev, EXTCON_USB, false);
-		extcon_set_cable_state_(info->edev, EXTCON_USB_HOST, true);
+		extcon_set_state_sync(info->edev, EXTCON_USB, false);
+		extcon_set_state_sync(info->edev, EXTCON_USB_HOST, true);
 	}
 }
 
diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
index 8682efc..7829846 100644
--- a/drivers/extcon/extcon.c
+++ b/drivers/extcon/extcon.c
@@ -38,43 +38,159 @@
 #define SUPPORTED_CABLE_MAX	32
 #define CABLE_NAME_MAX		30
 
-static const char *extcon_name[] =  {
-	[EXTCON_NONE]			= "NONE",
+struct __extcon_info {
+	unsigned int type;
+	unsigned int id;
+	const char *name;
+
+} extcon_info[] = {
+	[EXTCON_NONE] = {
+		.type = EXTCON_TYPE_MISC,
+		.id = EXTCON_NONE,
+		.name = "NONE",
+	},
 
 	/* USB external connector */
-	[EXTCON_USB]			= "USB",
-	[EXTCON_USB_HOST]		= "USB-HOST",
+	[EXTCON_USB] = {
+		.type = EXTCON_TYPE_USB,
+		.id = EXTCON_USB,
+		.name = "USB",
+	},
+	[EXTCON_USB_HOST] = {
+		.type = EXTCON_TYPE_USB,
+		.id = EXTCON_USB_HOST,
+		.name = "USB_HOST",
+	},
 
 	/* Charging external connector */
-	[EXTCON_CHG_USB_SDP]		= "SDP",
-	[EXTCON_CHG_USB_DCP]		= "DCP",
-	[EXTCON_CHG_USB_CDP]		= "CDP",
-	[EXTCON_CHG_USB_ACA]		= "ACA",
-	[EXTCON_CHG_USB_FAST]		= "FAST-CHARGER",
-	[EXTCON_CHG_USB_SLOW]		= "SLOW-CHARGER",
+	[EXTCON_CHG_USB_SDP] = {
+		.type = EXTCON_TYPE_CHG | EXTCON_TYPE_USB,
+		.id = EXTCON_CHG_USB_SDP,
+		.name = "SDP",
+	},
+	[EXTCON_CHG_USB_DCP] = {
+		.type = EXTCON_TYPE_CHG | EXTCON_TYPE_USB,
+		.id = EXTCON_CHG_USB_DCP,
+		.name = "DCP",
+	},
+	[EXTCON_CHG_USB_CDP] = {
+		.type = EXTCON_TYPE_CHG | EXTCON_TYPE_USB,
+		.id = EXTCON_CHG_USB_CDP,
+		.name = "CDP",
+	},
+	[EXTCON_CHG_USB_ACA] = {
+		.type = EXTCON_TYPE_CHG | EXTCON_TYPE_USB,
+		.id = EXTCON_CHG_USB_ACA,
+		.name = "ACA",
+	},
+	[EXTCON_CHG_USB_FAST] = {
+		.type = EXTCON_TYPE_CHG | EXTCON_TYPE_USB,
+		.id = EXTCON_CHG_USB_FAST,
+		.name = "FAST-CHARGER",
+	},
+	[EXTCON_CHG_USB_SLOW] = {
+		.type = EXTCON_TYPE_CHG | EXTCON_TYPE_USB,
+		.id = EXTCON_CHG_USB_SLOW,
+		.name = "SLOW-CHARGER",
+	},
+	[EXTCON_CHG_WPT] = {
+		.type = EXTCON_TYPE_CHG,
+		.id = EXTCON_CHG_WPT,
+		.name = "WPT",
+	},
 
 	/* Jack external connector */
-	[EXTCON_JACK_MICROPHONE]	= "MICROPHONE",
-	[EXTCON_JACK_HEADPHONE]		= "HEADPHONE",
-	[EXTCON_JACK_LINE_IN]		= "LINE-IN",
-	[EXTCON_JACK_LINE_OUT]		= "LINE-OUT",
-	[EXTCON_JACK_VIDEO_IN]		= "VIDEO-IN",
-	[EXTCON_JACK_VIDEO_OUT]		= "VIDEO-OUT",
-	[EXTCON_JACK_SPDIF_IN]		= "SPDIF-IN",
-	[EXTCON_JACK_SPDIF_OUT]		= "SPDIF-OUT",
+	[EXTCON_JACK_MICROPHONE] = {
+		.type = EXTCON_TYPE_JACK,
+		.id = EXTCON_JACK_MICROPHONE,
+		.name = "MICROPHONE",
+	},
+	[EXTCON_JACK_HEADPHONE] = {
+		.type = EXTCON_TYPE_JACK,
+		.id = EXTCON_JACK_HEADPHONE,
+		.name = "HEADPHONE",
+	},
+	[EXTCON_JACK_LINE_IN] = {
+		.type = EXTCON_TYPE_JACK,
+		.id = EXTCON_JACK_LINE_IN,
+		.name = "LINE-IN",
+	},
+	[EXTCON_JACK_LINE_OUT] = {
+		.type = EXTCON_TYPE_JACK,
+		.id = EXTCON_JACK_LINE_OUT,
+		.name = "LINE-OUT",
+	},
+	[EXTCON_JACK_VIDEO_IN] = {
+		.type = EXTCON_TYPE_JACK,
+		.id = EXTCON_JACK_VIDEO_IN,
+		.name = "VIDEO-IN",
+	},
+	[EXTCON_JACK_VIDEO_OUT] = {
+		.type = EXTCON_TYPE_JACK,
+		.id = EXTCON_JACK_VIDEO_OUT,
+		.name = "VIDEO-OUT",
+	},
+	[EXTCON_JACK_SPDIF_IN] = {
+		.type = EXTCON_TYPE_JACK,
+		.id = EXTCON_JACK_SPDIF_IN,
+		.name = "SPDIF-IN",
+	},
+	[EXTCON_JACK_SPDIF_OUT] = {
+		.type = EXTCON_TYPE_JACK,
+		.id = EXTCON_JACK_SPDIF_OUT,
+		.name = "SPDIF-OUT",
+	},
 
 	/* Display external connector */
-	[EXTCON_DISP_HDMI]		= "HDMI",
-	[EXTCON_DISP_MHL]		= "MHL",
-	[EXTCON_DISP_DVI]		= "DVI",
-	[EXTCON_DISP_VGA]		= "VGA",
+	[EXTCON_DISP_HDMI] = {
+		.type = EXTCON_TYPE_DISP,
+		.id = EXTCON_DISP_HDMI,
+		.name = "HDMI",
+	},
+	[EXTCON_DISP_MHL] = {
+		.type = EXTCON_TYPE_DISP,
+		.id = EXTCON_DISP_MHL,
+		.name = "MHL",
+	},
+	[EXTCON_DISP_DVI] = {
+		.type = EXTCON_TYPE_DISP,
+		.id = EXTCON_DISP_DVI,
+		.name = "DVI",
+	},
+	[EXTCON_DISP_VGA] = {
+		.type = EXTCON_TYPE_DISP,
+		.id = EXTCON_DISP_VGA,
+		.name = "VGA",
+	},
+	[EXTCON_DISP_DP] = {
+		.type = EXTCON_TYPE_DISP | EXTCON_TYPE_USB,
+		.id = EXTCON_DISP_DP,
+		.name = "DP",
+	},
+	[EXTCON_DISP_HMD] = {
+		.type = EXTCON_TYPE_DISP | EXTCON_TYPE_USB,
+		.id = EXTCON_DISP_HMD,
+		.name = "HMD",
+	},
 
 	/* Miscellaneous external connector */
-	[EXTCON_DOCK]			= "DOCK",
-	[EXTCON_JIG]			= "JIG",
-	[EXTCON_MECHANICAL]		= "MECHANICAL",
+	[EXTCON_DOCK] = {
+		.type = EXTCON_TYPE_MISC,
+		.id = EXTCON_DOCK,
+		.name = "DOCK",
+	},
+	[EXTCON_JIG] = {
+		.type = EXTCON_TYPE_MISC,
+		.id = EXTCON_JIG,
+		.name = "JIG",
+	},
+	[EXTCON_MECHANICAL] = {
+		.type = EXTCON_TYPE_MISC,
+		.id = EXTCON_MECHANICAL,
+		.name = "MECHANICAL",
+	},
 
-	NULL,
+	{ /* sentinel */ }
 };
 
 /**
@@ -95,6 +211,16 @@
 	struct device_attribute attr_state;
 
 	struct attribute *attrs[3]; /* to be fed to attr_g.attrs */
+
+	union extcon_property_value usb_propval[EXTCON_PROP_USB_CNT];
+	union extcon_property_value chg_propval[EXTCON_PROP_CHG_CNT];
+	union extcon_property_value jack_propval[EXTCON_PROP_JACK_CNT];
+	union extcon_property_value disp_propval[EXTCON_PROP_DISP_CNT];
+
+	unsigned long usb_bits[BITS_TO_LONGS(EXTCON_PROP_USB_CNT)];
+	unsigned long chg_bits[BITS_TO_LONGS(EXTCON_PROP_CHG_CNT)];
+	unsigned long jack_bits[BITS_TO_LONGS(EXTCON_PROP_JACK_CNT)];
+	unsigned long disp_bits[BITS_TO_LONGS(EXTCON_PROP_DISP_CNT)];
 };
 
 static struct class *extcon_class;
@@ -147,14 +273,93 @@
 	return -EINVAL;
 }
 
-static bool is_extcon_changed(u32 prev, u32 new, int idx, bool *attached)
+static int get_extcon_type(unsigned int prop)
 {
-	if (((prev >> idx) & 0x1) != ((new >> idx) & 0x1)) {
-		*attached = ((new >> idx) & 0x1) ? true : false;
-		return true;
+	switch (prop) {
+	case EXTCON_PROP_USB_MIN ... EXTCON_PROP_USB_MAX:
+		return EXTCON_TYPE_USB;
+	case EXTCON_PROP_CHG_MIN ... EXTCON_PROP_CHG_MAX:
+		return EXTCON_TYPE_CHG;
+	case EXTCON_PROP_JACK_MIN ... EXTCON_PROP_JACK_MAX:
+		return EXTCON_TYPE_JACK;
+	case EXTCON_PROP_DISP_MIN ... EXTCON_PROP_DISP_MAX:
+		return EXTCON_TYPE_DISP;
+	default:
+		return -EINVAL;
+	}
+}
+
+static bool is_extcon_attached(struct extcon_dev *edev, unsigned int index)
+{
+	return !!(edev->state & BIT(index));
+}
+
+static bool is_extcon_changed(struct extcon_dev *edev, int index,
+				bool new_state)
+{
+	int state = !!(edev->state & BIT(index));
+	return (state != new_state);
+}
+
+static bool is_extcon_property_supported(unsigned int id, unsigned int prop)
+{
+	int type;
+
+	/* Check whether the property is supported or not. */
+	type = get_extcon_type(prop);
+	if (type < 0)
+		return false;
+
+	/* Check whether a specific extcon id supports the property or not. */
+	return !!(extcon_info[id].type & type);
+}
+
+static int is_extcon_property_capability(struct extcon_dev *edev,
+				unsigned int id, int index,unsigned int prop)
+{
+	struct extcon_cable *cable;
+	int type, ret;
+
+	/* Check whether the property is supported or not. */
+	type = get_extcon_type(prop);
+	if (type < 0)
+		return type;
+
+	cable = &edev->cables[index];
+
+	switch (type) {
+	case EXTCON_TYPE_USB:
+		ret = test_bit(prop - EXTCON_PROP_USB_MIN, cable->usb_bits);
+		break;
+	case EXTCON_TYPE_CHG:
+		ret = test_bit(prop - EXTCON_PROP_CHG_MIN, cable->chg_bits);
+		break;
+	case EXTCON_TYPE_JACK:
+		ret = test_bit(prop - EXTCON_PROP_JACK_MIN, cable->jack_bits);
+		break;
+	case EXTCON_TYPE_DISP:
+		ret = test_bit(prop - EXTCON_PROP_DISP_MIN, cable->disp_bits);
+		break;
+	default:
+		ret = -EINVAL;
 	}
 
-	return false;
+	return ret;
+}
+
+static void init_property(struct extcon_dev *edev, unsigned int id, int index)
+{
+	unsigned int type = extcon_info[id].type;
+	struct extcon_cable *cable = &edev->cables[index];
+
+	if (EXTCON_TYPE_USB & type)
+		memset(cable->usb_propval, 0, sizeof(cable->usb_propval));
+	if (EXTCON_TYPE_CHG & type)
+		memset(cable->chg_propval, 0, sizeof(cable->chg_propval));
+	if (EXTCON_TYPE_JACK & type)
+		memset(cable->jack_propval, 0, sizeof(cable->jack_propval));
+	if (EXTCON_TYPE_DISP & type)
+		memset(cable->disp_propval, 0, sizeof(cable->disp_propval));
 }
 
 static ssize_t state_show(struct device *dev, struct device_attribute *attr,
@@ -168,32 +373,13 @@
 
 	for (i = 0; i < edev->max_supported; i++) {
 		count += sprintf(buf + count, "%s=%d\n",
-				extcon_name[edev->supported_cable[i]],
+				extcon_info[edev->supported_cable[i]].name,
 				 !!(edev->state & (1 << i)));
 	}
 
 	return count;
 }
-
-static ssize_t state_store(struct device *dev, struct device_attribute *attr,
-			   const char *buf, size_t count)
-{
-	u32 state;
-	ssize_t ret = 0;
-	struct extcon_dev *edev = dev_get_drvdata(dev);
-
-	ret = sscanf(buf, "0x%x", &state);
-	if (ret == 0)
-		ret = -EINVAL;
-	else
-		ret = extcon_set_state(edev, state);
-
-	if (ret < 0)
-		return ret;
-
-	return count;
-}
-static DEVICE_ATTR_RW(state);
+static DEVICE_ATTR_RO(state);
 
 static ssize_t name_show(struct device *dev, struct device_attribute *attr,
 		char *buf)
@@ -212,7 +398,7 @@
 	int i = cable->cable_index;
 
 	return sprintf(buf, "%s\n",
-			extcon_name[cable->edev->supported_cable[i]]);
+			extcon_info[cable->edev->supported_cable[i]].name);
 }
 
 static ssize_t cable_state_show(struct device *dev,
@@ -224,26 +410,17 @@
 	int i = cable->cable_index;
 
 	return sprintf(buf, "%d\n",
-		       extcon_get_cable_state_(cable->edev,
-					       cable->edev->supported_cable[i]));
+		extcon_get_state(cable->edev, cable->edev->supported_cable[i]));
 }
 
 /**
- * extcon_update_state() - Update the cable attach states of the extcon device
- *			   only for the masked bits.
- * @edev:	the extcon device
- * @mask:	the bit mask to designate updated bits.
- * @state:	new cable attach status for @edev
+ * extcon_sync()	- Synchronize the states for both the attached/detached
+ * @edev:		the extcon device that has the cable.
  *
- * Changing the state sends uevent with environment variable containing
- * the name of extcon device (envp[0]) and the state output (envp[1]).
- * Tizen uses this format for extcon device to get events from ports.
- * Android uses this format as well.
- *
- * Note that the notifier provides which bits are changed in the state
- * variable with the val parameter (second) to the callback.
+ * This function send a notification to synchronize the all states of a
+ * specific external connector
  */
-int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state)
+int extcon_sync(struct extcon_dev *edev, unsigned int id)
 {
 	char name_buf[120];
 	char state_buf[120];
@@ -252,100 +429,8 @@
 	int env_offset = 0;
 	int length;
 	int index;
+	int state;
 	unsigned long flags;
-	bool attached;
-
-	if (!edev)
-		return -EINVAL;
-
-	spin_lock_irqsave(&edev->lock, flags);
-
-	if (edev->state != ((edev->state & ~mask) | (state & mask))) {
-		u32 old_state;
-
-		if (check_mutually_exclusive(edev, (edev->state & ~mask) |
-						   (state & mask))) {
-			spin_unlock_irqrestore(&edev->lock, flags);
-			return -EPERM;
-		}
-
-		old_state = edev->state;
-		edev->state &= ~mask;
-		edev->state |= state & mask;
-
-		for (index = 0; index < edev->max_supported; index++) {
-			if (is_extcon_changed(old_state, edev->state, index,
-					      &attached))
-				raw_notifier_call_chain(&edev->nh[index],
-							attached, edev);
-		}
-
-		/* This could be in interrupt handler */
-		prop_buf = (char *)get_zeroed_page(GFP_ATOMIC);
-		if (prop_buf) {
-			length = name_show(&edev->dev, NULL, prop_buf);
-			if (length > 0) {
-				if (prop_buf[length - 1] == '\n')
-					prop_buf[length - 1] = 0;
-				snprintf(name_buf, sizeof(name_buf),
-					"NAME=%s", prop_buf);
-				envp[env_offset++] = name_buf;
-			}
-			length = state_show(&edev->dev, NULL, prop_buf);
-			if (length > 0) {
-				if (prop_buf[length - 1] == '\n')
-					prop_buf[length - 1] = 0;
-				snprintf(state_buf, sizeof(state_buf),
-					"STATE=%s", prop_buf);
-				envp[env_offset++] = state_buf;
-			}
-			envp[env_offset] = NULL;
-			/* Unlock early before uevent */
-			spin_unlock_irqrestore(&edev->lock, flags);
-
-			kobject_uevent_env(&edev->dev.kobj, KOBJ_CHANGE, envp);
-			free_page((unsigned long)prop_buf);
-		} else {
-			/* Unlock early before uevent */
-			spin_unlock_irqrestore(&edev->lock, flags);
-
-			dev_err(&edev->dev, "out of memory in extcon_set_state\n");
-			kobject_uevent(&edev->dev.kobj, KOBJ_CHANGE);
-		}
-	} else {
-		/* No changes */
-		spin_unlock_irqrestore(&edev->lock, flags);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(extcon_update_state);
-
-/**
- * extcon_set_state() - Set the cable attach states of the extcon device.
- * @edev:	the extcon device
- * @state:	new cable attach status for @edev
- *
- * Note that notifier provides which bits are changed in the state
- * variable with the val parameter (second) to the callback.
- */
-int extcon_set_state(struct extcon_dev *edev, u32 state)
-{
-	if (!edev)
-		return -EINVAL;
-
-	return extcon_update_state(edev, 0xffffffff, state);
-}
-EXPORT_SYMBOL_GPL(extcon_set_state);
-
-/**
- * extcon_get_cable_state_() - Get the status of a specific cable.
- * @edev:	the extcon device that has the cable.
- * @id:		the unique id of each external connector in extcon enumeration.
- */
-int extcon_get_cable_state_(struct extcon_dev *edev, const unsigned int id)
-{
-	int index;
 
 	if (!edev)
 		return -EINVAL;
@@ -354,26 +439,92 @@
 	if (index < 0)
 		return index;
 
-	if (edev->max_supported && edev->max_supported <= index)
-		return -EINVAL;
+	spin_lock_irqsave(&edev->lock, flags);
 
-	return !!(edev->state & (1 << index));
+	state = !!(edev->state & BIT(index));
+	raw_notifier_call_chain(&edev->nh[index], state, edev);
+
+	/* This could be in interrupt handler */
+	prop_buf = (char *)get_zeroed_page(GFP_ATOMIC);
+	if (!prop_buf) {
+		/* Unlock early before uevent */
+		spin_unlock_irqrestore(&edev->lock, flags);
+
+		dev_err(&edev->dev, "out of memory in extcon_set_state\n");
+		kobject_uevent(&edev->dev.kobj, KOBJ_CHANGE);
+
+		return 0;
+	}
+
+	length = name_show(&edev->dev, NULL, prop_buf);
+	if (length > 0) {
+		if (prop_buf[length - 1] == '\n')
+			prop_buf[length - 1] = 0;
+		snprintf(name_buf, sizeof(name_buf), "NAME=%s", prop_buf);
+		envp[env_offset++] = name_buf;
+	}
+
+	length = state_show(&edev->dev, NULL, prop_buf);
+	if (length > 0) {
+		if (prop_buf[length - 1] == '\n')
+			prop_buf[length - 1] = 0;
+		snprintf(state_buf, sizeof(state_buf), "STATE=%s", prop_buf);
+		envp[env_offset++] = state_buf;
+	}
+	envp[env_offset] = NULL;
+
+	/* Unlock early before uevent */
+	spin_unlock_irqrestore(&edev->lock, flags);
+	kobject_uevent_env(&edev->dev.kobj, KOBJ_CHANGE, envp);
+	free_page((unsigned long)prop_buf);
+
+	return 0;
 }
-EXPORT_SYMBOL_GPL(extcon_get_cable_state_);
+EXPORT_SYMBOL_GPL(extcon_sync);
 
 /**
- * extcon_set_cable_state_() - Set the status of a specific cable.
+ * extcon_get_state() - Get the state of a external connector.
+ * @edev:	the extcon device that has the cable.
+ * @id:		the unique id of each external connector in extcon enumeration.
+ */
+int extcon_get_state(struct extcon_dev *edev, const unsigned int id)
+{
+	int index, state;
+	unsigned long flags;
+
+	if (!edev)
+		return -EINVAL;
+
+	index = find_cable_index_by_id(edev, id);
+	if (index < 0)
+		return index;
+
+	spin_lock_irqsave(&edev->lock, flags);
+	state = is_extcon_attached(edev, index);
+	spin_unlock_irqrestore(&edev->lock, flags);
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(extcon_get_state);
+
+/**
+ * extcon_set_state() - Set the state of a external connector.
+ *			without a notification.
  * @edev:		the extcon device that has the cable.
  * @id:			the unique id of each external connector
  *			in extcon enumeration.
  * @state:		the new cable status. The default semantics is
  *			true: attached / false: detached.
+ *
+ * This function only set the state of a external connector without
+ * a notification. To synchronize the data of a external connector,
+ * use extcon_set_state_sync() and extcon_sync().
  */
-int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id,
+int extcon_set_state(struct extcon_dev *edev, unsigned int id,
 				bool cable_state)
 {
-	u32 state;
-	int index;
+	unsigned long flags;
+	int index, ret = 0;
 
 	if (!edev)
 		return -EINVAL;
@@ -382,13 +533,338 @@
 	if (index < 0)
 		return index;
 
-	if (edev->max_supported && edev->max_supported <= index)
+	spin_lock_irqsave(&edev->lock, flags);
+
+	/* Check whether the external connector's state is changed. */
+	if (!is_extcon_changed(edev, index, cable_state))
+		goto out;
+
+	if (check_mutually_exclusive(edev,
+		(edev->state & ~BIT(index)) | (cable_state & BIT(index)))) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	/*
+	 * Initialize the value of extcon property before setting
+	 * the detached state for an external connector.
+	 */
+	if (!cable_state)
+		init_property(edev, id, index);
+
+	/* Update the state for a external connector. */
+	if (cable_state)
+		edev->state |= BIT(index);
+	else
+		edev->state &= ~(BIT(index));
+out:
+	spin_unlock_irqrestore(&edev->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(extcon_set_state);
+
+/**
+ * extcon_set_state_sync() - Set the state of a external connector
+ *			with a notification.
+ * @edev:		the extcon device that has the cable.
+ * @id:			the unique id of each external connector
+ *			in extcon enumeration.
+ * @state:		the new cable status. The default semantics is
+ *			true: attached / false: detached.
+ *
+ * This function set the state of external connector and synchronize the data
+ * by usning a notification.
+ */
+int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id,
+				bool cable_state)
+{
+	int ret, index;
+	unsigned long flags;
+
+	index = find_cable_index_by_id(edev, id);
+	if (index < 0)
+		return index;
+
+	/* Check whether the external connector's state is changed. */
+	spin_lock_irqsave(&edev->lock, flags);
+	ret = is_extcon_changed(edev, index, cable_state);
+	spin_unlock_irqrestore(&edev->lock, flags);
+	if (!ret)
+		return 0;
+
+	ret = extcon_set_state(edev, id, cable_state);
+	if (ret < 0)
+		return ret;
+
+	return extcon_sync(edev, id);
+}
+EXPORT_SYMBOL_GPL(extcon_set_state_sync);
+
+/**
+ * extcon_get_property() - Get the property value of a specific cable.
+ * @edev:		the extcon device that has the cable.
+ * @id:			the unique id of each external connector
+ *			in extcon enumeration.
+ * @prop:		the property id among enum extcon_property.
+ * @prop_val:		the pointer which store the value of property.
+ *
+ * When getting the property value of external connector, the external connector
+ * should be attached. If detached state, function just return 0 without
+ * property value. Also, the each property should be included in the list of
+ * supported properties according to the type of external connectors.
+ *
+ * Returns 0 if success or error number if fail
+ */
+int extcon_get_property(struct extcon_dev *edev, unsigned int id,
+				unsigned int prop,
+				union extcon_property_value *prop_val)
+{
+	struct extcon_cable *cable;
+	unsigned long flags;
+	int index, ret = 0;
+
+	*prop_val = (union extcon_property_value)(0);
+
+	if (!edev)
 		return -EINVAL;
 
-	state = cable_state ? (1 << index) : 0;
-	return extcon_update_state(edev, 1 << index, state);
+	/* Check whether the property is supported or not */
+	if (!is_extcon_property_supported(id, prop))
+		return -EINVAL;
+
+	/* Find the cable index of external connector by using id */
+	index = find_cable_index_by_id(edev, id);
+	if (index < 0)
+		return index;
+
+	spin_lock_irqsave(&edev->lock, flags);
+
+	/* Check whether the property is available or not. */
+	if (!is_extcon_property_capability(edev, id, index, prop)) {
+		spin_unlock_irqrestore(&edev->lock, flags);
+		return -EPERM;
+	}
+
+	/*
+	 * Check whether the external connector is attached.
+	 * If external connector is detached, the user can not
+	 * get the property value.
+	 */
+	if (!is_extcon_attached(edev, index)) {
+		spin_unlock_irqrestore(&edev->lock, flags);
+		return 0;
+	}
+
+	cable = &edev->cables[index];
+
+	/* Get the property value according to extcon type */
+	switch (prop) {
+	case EXTCON_PROP_USB_MIN ... EXTCON_PROP_USB_MAX:
+		*prop_val = cable->usb_propval[prop - EXTCON_PROP_USB_MIN];
+		break;
+	case EXTCON_PROP_CHG_MIN ... EXTCON_PROP_CHG_MAX:
+		*prop_val = cable->chg_propval[prop - EXTCON_PROP_CHG_MIN];
+		break;
+	case EXTCON_PROP_JACK_MIN ... EXTCON_PROP_JACK_MAX:
+		*prop_val = cable->jack_propval[prop - EXTCON_PROP_JACK_MIN];
+		break;
+	case EXTCON_PROP_DISP_MIN ... EXTCON_PROP_DISP_MAX:
+		*prop_val = cable->disp_propval[prop - EXTCON_PROP_DISP_MIN];
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	spin_unlock_irqrestore(&edev->lock, flags);
+
+	return ret;
 }
-EXPORT_SYMBOL_GPL(extcon_set_cable_state_);
+EXPORT_SYMBOL_GPL(extcon_get_property);
+
+/**
+ * extcon_set_property() - Set the property value of a specific cable.
+ * @edev:		the extcon device that has the cable.
+ * @id:			the unique id of each external connector
+ *			in extcon enumeration.
+ * @prop:		the property id among enum extcon_property.
+ * @prop_val:		the pointer including the new value of property.
+ *
+ * The each property should be included in the list of supported properties
+ * according to the type of external connectors.
+ *
+ * Returns 0 if success or error number if fail
+ */
+int extcon_set_property(struct extcon_dev *edev, unsigned int id,
+				unsigned int prop,
+				union extcon_property_value prop_val)
+{
+	struct extcon_cable *cable;
+	unsigned long flags;
+	int index, ret = 0;
+
+	if (!edev)
+		return -EINVAL;
+
+	/* Check whether the property is supported or not */
+	if (!is_extcon_property_supported(id, prop))
+		return -EINVAL;
+
+	/* Find the cable index of external connector by using id */
+	index = find_cable_index_by_id(edev, id);
+	if (index < 0)
+		return index;
+
+	spin_lock_irqsave(&edev->lock, flags);
+
+	/* Check whether the property is available or not. */
+	if (!is_extcon_property_capability(edev, id, index, prop)) {
+		spin_unlock_irqrestore(&edev->lock, flags);
+		return -EPERM;
+	}
+
+	cable = &edev->cables[index];
+
+	/* Set the property value according to extcon type */
+	switch (prop) {
+	case EXTCON_PROP_USB_MIN ... EXTCON_PROP_USB_MAX:
+		cable->usb_propval[prop - EXTCON_PROP_USB_MIN] = prop_val;
+		break;
+	case EXTCON_PROP_CHG_MIN ... EXTCON_PROP_CHG_MAX:
+		cable->chg_propval[prop - EXTCON_PROP_CHG_MIN] = prop_val;
+		break;
+	case EXTCON_PROP_JACK_MIN ... EXTCON_PROP_JACK_MAX:
+		cable->jack_propval[prop - EXTCON_PROP_JACK_MIN] = prop_val;
+		break;
+	case EXTCON_PROP_DISP_MIN ... EXTCON_PROP_DISP_MAX:
+		cable->disp_propval[prop - EXTCON_PROP_DISP_MIN] = prop_val;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	spin_unlock_irqrestore(&edev->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(extcon_set_property);
+
+/**
+ * extcon_set_property_sync() - Set the property value of a specific cable
+			with a notification.
+ * @prop_val:		the pointer including the new value of property.
+ *
+ * When setting the property value of external connector, the external connector
+ * should be attached. The each property should be included in the list of
+ * supported properties according to the type of external connectors.
+ *
+ * Returns 0 if success or error number if fail
+ */
+int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id,
+				unsigned int prop,
+				union extcon_property_value prop_val)
+{
+	int ret;
+
+	ret = extcon_set_property(edev, id, prop, prop_val);
+	if (ret < 0)
+		return ret;
+
+	return extcon_sync(edev, id);
+}
+EXPORT_SYMBOL_GPL(extcon_set_property_sync);
+
+/**
+ * extcon_get_property_capability() - Get the capability of property
+ *			of an external connector.
+ * @edev:		the extcon device that has the cable.
+ * @id:			the unique id of each external connector
+ *			in extcon enumeration.
+ * @prop:		the property id among enum extcon_property.
+ *
+ * Returns 1 if the property is available or 0 if not available.
+ */
+int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id,
+					unsigned int prop)
+{
+	int index;
+
+	if (!edev)
+		return -EINVAL;
+
+	/* Check whether the property is supported or not */
+	if (!is_extcon_property_supported(id, prop))
+		return -EINVAL;
+
+	/* Find the cable index of external connector by using id */
+	index = find_cable_index_by_id(edev, id);
+	if (index < 0)
+		return index;
+
+	return is_extcon_property_capability(edev, id, index, prop);
+}
+EXPORT_SYMBOL_GPL(extcon_get_property_capability);
+
+/**
+ * extcon_set_property_capability() - Set the capability of a property
+ *			of an external connector.
+ * @edev:		the extcon device that has the cable.
+ * @id:			the unique id of each external connector
+ *			in extcon enumeration.
+ * @prop:		the property id among enum extcon_property.
+ *
+ * This function set the capability of a property for an external connector
+ * to mark the bit in capability bitmap which mean the available state of
+ * a property.
+ *
+ * Returns 0 if success or error number if fail
+ */
+int extcon_set_property_capability(struct extcon_dev *edev, unsigned int id,
+					unsigned int prop)
+{
+	struct extcon_cable *cable;
+	int index, type, ret = 0;
+
+	if (!edev)
+		return -EINVAL;
+
+	/* Check whether the property is supported or not. */
+	if (!is_extcon_property_supported(id, prop))
+		return -EINVAL;
+
+	/* Find the cable index of external connector by using id. */
+	index = find_cable_index_by_id(edev, id);
+	if (index < 0)
+		return index;
+
+	type = get_extcon_type(prop);
+	if (type < 0)
+		return type;
+
+	cable = &edev->cables[index];
+
+	switch (type) {
+	case EXTCON_TYPE_USB:
+		__set_bit(prop - EXTCON_PROP_USB_MIN, cable->usb_bits);
+		break;
+	case EXTCON_TYPE_CHG:
+		__set_bit(prop - EXTCON_PROP_CHG_MIN, cable->chg_bits);
+		break;
+	case EXTCON_TYPE_JACK:
+		__set_bit(prop - EXTCON_PROP_JACK_MIN, cable->jack_bits);
+		break;
+	case EXTCON_TYPE_DISP:
+		__set_bit(prop - EXTCON_PROP_DISP_MIN, cable->disp_bits);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(extcon_set_property_capability);
 
 /**
  * extcon_get_extcon_dev() - Get the extcon device instance from the name
@@ -428,7 +904,7 @@
 			     struct notifier_block *nb)
 {
 	unsigned long flags;
-	int ret, idx;
+	int ret, idx = -EINVAL;
 
 	if (!nb)
 		return -EINVAL;
@@ -846,13 +1322,13 @@
 		return ERR_PTR(-EINVAL);
 
 	if (!dev->of_node) {
-		dev_err(dev, "device does not have a device node entry\n");
+		dev_dbg(dev, "device does not have a device node entry\n");
 		return ERR_PTR(-EINVAL);
 	}
 
 	node = of_parse_phandle(dev->of_node, "extcon", index);
 	if (!node) {
-		dev_err(dev, "failed to get phandle in %s node\n",
+		dev_dbg(dev, "failed to get phandle in %s node\n",
 			dev->of_node->full_name);
 		return ERR_PTR(-ENODEV);
 	}
diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
index 4388937..ce2bc2a 100644
--- a/drivers/firmware/arm_scpi.c
+++ b/drivers/firmware/arm_scpi.c
@@ -709,9 +709,10 @@
 		struct mbox_client *cl = &pchan->cl;
 		struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
 
-		if (of_address_to_resource(shmem, 0, &res)) {
+		ret = of_address_to_resource(shmem, 0, &res);
+		of_node_put(shmem);
+		if (ret) {
 			dev_err(dev, "failed to get SCPI payload mem resource\n");
-			ret = -EINVAL;
 			goto err;
 		}
 
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 829eec8..2fe1a13 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -23,6 +23,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/errno.h>
+#include <linux/cpu.h>
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -238,33 +239,14 @@
 	return count;
 }
 
-/**
- * dcdbas_smi_request: generate SMI request
- *
- * Called with smi_data_lock.
- */
-int dcdbas_smi_request(struct smi_cmd *smi_cmd)
+static int raise_smi(void *par)
 {
-	cpumask_var_t old_mask;
-	int ret = 0;
+	struct smi_cmd *smi_cmd = par;
 
-	if (smi_cmd->magic != SMI_CMD_MAGIC) {
-		dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
-			 __func__);
-		return -EBADR;
-	}
-
-	/* SMI requires CPU 0 */
-	if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
-		return -ENOMEM;
-
-	cpumask_copy(old_mask, &current->cpus_allowed);
-	set_cpus_allowed_ptr(current, cpumask_of(0));
 	if (smp_processor_id() != 0) {
 		dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
 			__func__);
-		ret = -EBUSY;
-		goto out;
+		return -EBUSY;
 	}
 
 	/* generate SMI */
@@ -280,9 +262,28 @@
 		: "memory"
 	);
 
-out:
-	set_cpus_allowed_ptr(current, old_mask);
-	free_cpumask_var(old_mask);
+	return 0;
+}
+/**
+ * dcdbas_smi_request: generate SMI request
+ *
+ * Called with smi_data_lock.
+ */
+int dcdbas_smi_request(struct smi_cmd *smi_cmd)
+{
+	int ret;
+
+	if (smi_cmd->magic != SMI_CMD_MAGIC) {
+		dev_info(&dcdbas_pdev->dev, "%s: invalid magic value\n",
+			 __func__);
+		return -EBADR;
+	}
+
+	/* SMI requires CPU 0 */
+	get_online_cpus();
+	ret = smp_call_on_cpu(0, raise_smi, smi_cmd, true);
+	put_online_cpus();
+
 	return ret;
 }
 
diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c
index 94a58a0..44c0139 100644
--- a/drivers/firmware/dmi-id.c
+++ b/drivers/firmware/dmi-id.c
@@ -229,14 +229,14 @@
 
 	ret = device_register(dmi_dev);
 	if (ret)
-		goto fail_free_dmi_dev;
+		goto fail_put_dmi_dev;
 
 	return 0;
 
-fail_free_dmi_dev:
-	kfree(dmi_dev);
-fail_class_unregister:
+fail_put_dmi_dev:
+	put_device(dmi_dev);
 
+fail_class_unregister:
 	class_unregister(&dmi_class);
 
 	return ret;
diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 6394152..c981be1 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -112,6 +112,23 @@
 
 	  Most users should say N.
 
+config EFI_TEST
+	tristate "EFI Runtime Service Tests Support"
+	depends on EFI
+	default n
+	help
+	  This driver uses the efi.<service> function pointers directly instead
+	  of going through the efivar API, because it is not trying to test the
+	  kernel subsystem, just for testing the UEFI runtime service
+	  interfaces which are provided by the firmware. This driver is used
+	  by the Firmware Test Suite (FWTS) for testing the UEFI runtime
+	  interfaces readiness of the firmware.
+	  Details for FWTS are available from:
+	  <https://wiki.ubuntu.com/FirmwareTestSuite>
+
+	  Say Y here to enable the runtime services support via /dev/efi_test.
+	  If unsure, say N.
+
 endmenu
 
 config UEFI_CPER
diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile
index a219640..c8a439f 100644
--- a/drivers/firmware/efi/Makefile
+++ b/drivers/firmware/efi/Makefile
@@ -10,7 +10,7 @@
 KASAN_SANITIZE_runtime-wrappers.o	:= n
 
 obj-$(CONFIG_EFI)			+= efi.o vars.o reboot.o memattr.o
-obj-$(CONFIG_EFI)			+= capsule.o
+obj-$(CONFIG_EFI)			+= capsule.o memmap.o
 obj-$(CONFIG_EFI_VARS)			+= efivars.o
 obj-$(CONFIG_EFI_ESRT)			+= esrt.o
 obj-$(CONFIG_EFI_VARS_PSTORE)		+= efi-pstore.o
@@ -20,6 +20,7 @@
 obj-$(CONFIG_EFI_STUB)			+= libstub/
 obj-$(CONFIG_EFI_FAKE_MEMMAP)		+= fake_mem.o
 obj-$(CONFIG_EFI_BOOTLOADER_CONTROL)	+= efibc.o
+obj-$(CONFIG_EFI_TEST)			+= test/
 
 arm-obj-$(CONFIG_EFI)			:= arm-init.o arm-runtime.o
 obj-$(CONFIG_ARM)			+= $(arm-obj-y)
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index c49d50e..8efe130 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -26,9 +26,9 @@
 
 u64 efi_system_table;
 
-static int __init is_normal_ram(efi_memory_desc_t *md)
+static int __init is_memory(efi_memory_desc_t *md)
 {
-	if (md->attribute & EFI_MEMORY_WB)
+	if (md->attribute & (EFI_MEMORY_WB|EFI_MEMORY_WT|EFI_MEMORY_WC))
 		return 1;
 	return 0;
 }
@@ -152,9 +152,9 @@
 }
 
 /*
- * Return true for RAM regions we want to permanently reserve.
+ * Return true for regions that can be used as System RAM.
  */
-static __init int is_reserve_region(efi_memory_desc_t *md)
+static __init int is_usable_memory(efi_memory_desc_t *md)
 {
 	switch (md->type) {
 	case EFI_LOADER_CODE:
@@ -163,18 +163,22 @@
 	case EFI_BOOT_SERVICES_DATA:
 	case EFI_CONVENTIONAL_MEMORY:
 	case EFI_PERSISTENT_MEMORY:
-		return 0;
+		/*
+		 * According to the spec, these regions are no longer reserved
+		 * after calling ExitBootServices(). However, we can only use
+		 * them as System RAM if they can be mapped writeback cacheable.
+		 */
+		return (md->attribute & EFI_MEMORY_WB);
 	default:
 		break;
 	}
-	return is_normal_ram(md);
+	return false;
 }
 
 static __init void reserve_regions(void)
 {
 	efi_memory_desc_t *md;
 	u64 paddr, npages, size;
-	int resv;
 
 	if (efi_enabled(EFI_DBG))
 		pr_info("Processing EFI memory map:\n");
@@ -191,32 +195,29 @@
 		paddr = md->phys_addr;
 		npages = md->num_pages;
 
-		resv = is_reserve_region(md);
 		if (efi_enabled(EFI_DBG)) {
 			char buf[64];
 
-			pr_info("  0x%012llx-0x%012llx %s%s\n",
+			pr_info("  0x%012llx-0x%012llx %s\n",
 				paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
-				efi_md_typeattr_format(buf, sizeof(buf), md),
-				resv ? "*" : "");
+				efi_md_typeattr_format(buf, sizeof(buf), md));
 		}
 
 		memrange_efi_to_native(&paddr, &npages);
 		size = npages << PAGE_SHIFT;
 
-		if (is_normal_ram(md))
+		if (is_memory(md)) {
 			early_init_dt_add_memory_arch(paddr, size);
 
-		if (resv)
-			memblock_mark_nomap(paddr, size);
-
+			if (!is_usable_memory(md))
+				memblock_mark_nomap(paddr, size);
+		}
 	}
-
-	set_bit(EFI_MEMMAP, &efi.flags);
 }
 
 void __init efi_init(void)
 {
+	struct efi_memory_map_data data;
 	struct efi_fdt_params params;
 
 	/* Grab UEFI information placed in FDT by stub */
@@ -225,9 +226,12 @@
 
 	efi_system_table = params.system_table;
 
-	efi.memmap.phys_map = params.mmap;
-	efi.memmap.map = early_memremap_ro(params.mmap, params.mmap_size);
-	if (efi.memmap.map == NULL) {
+	data.desc_version = params.desc_ver;
+	data.desc_size = params.desc_size;
+	data.size = params.mmap_size;
+	data.phys_map = params.mmap;
+
+	if (efi_memmap_init_early(&data) < 0) {
 		/*
 		* If we are booting via UEFI, the UEFI memory map is the only
 		* description of memory we have, so there is little point in
@@ -235,9 +239,6 @@
 		*/
 		panic("Unable to map EFI memory map.\n");
 	}
-	efi.memmap.map_end = efi.memmap.map + params.mmap_size;
-	efi.memmap.desc_size = params.desc_size;
-	efi.memmap.desc_version = params.desc_ver;
 
 	WARN(efi.memmap.desc_version != 1,
 	     "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
@@ -248,7 +249,8 @@
 
 	reserve_regions();
 	efi_memattr_init();
-	early_memunmap(efi.memmap.map, params.mmap_size);
+	efi_esrt_init();
+	efi_memmap_unmap();
 
 	memblock_reserve(params.mmap & PAGE_MASK,
 			 PAGE_ALIGN(params.mmap_size +
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index c394b81..7c75a8d 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -39,6 +39,26 @@
 	.mmlist			= LIST_HEAD_INIT(efi_mm.mmlist),
 };
 
+#ifdef CONFIG_ARM64_PTDUMP
+#include <asm/ptdump.h>
+
+static struct ptdump_info efi_ptdump_info = {
+	.mm		= &efi_mm,
+	.markers	= (struct addr_marker[]){
+		{ 0,		"UEFI runtime start" },
+		{ TASK_SIZE_64,	"UEFI runtime end" }
+	},
+	.base_addr	= 0,
+};
+
+static int __init ptdump_init(void)
+{
+	return ptdump_register(&efi_ptdump_info, "efi_page_tables");
+}
+device_initcall(ptdump_init);
+
+#endif
+
 static bool __init efi_virtmap_init(void)
 {
 	efi_memory_desc_t *md;
@@ -114,14 +134,12 @@
 
 	pr_info("Remapping and enabling EFI services.\n");
 
-	mapsize = efi.memmap.map_end - efi.memmap.map;
+	mapsize = efi.memmap.desc_size * efi.memmap.nr_map;
 
-	efi.memmap.map = memremap(efi.memmap.phys_map, mapsize, MEMREMAP_WB);
-	if (!efi.memmap.map) {
+	if (efi_memmap_init_late(efi.memmap.phys_map, mapsize)) {
 		pr_err("Failed to remap EFI memory map\n");
 		return -ENOMEM;
 	}
-	efi.memmap.map_end = efi.memmap.map + mapsize;
 
 	if (!efi_virtmap_init()) {
 		pr_err("UEFI virtual mapping missing or invalid -- runtime services will not be available\n");
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c
index c99c24b..9ae6c11 100644
--- a/drivers/firmware/efi/capsule-loader.c
+++ b/drivers/firmware/efi/capsule-loader.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/efi.h>
+#include <linux/vmalloc.h>
 
 #define NO_FURTHER_WRITE_ACTION -1
 
@@ -108,14 +109,15 @@
 	int ret;
 	void *cap_hdr_temp;
 
-	cap_hdr_temp = kmap(cap_info->pages[0]);
+	cap_hdr_temp = vmap(cap_info->pages, cap_info->index,
+			VM_MAP, PAGE_KERNEL);
 	if (!cap_hdr_temp) {
-		pr_debug("%s: kmap() failed\n", __func__);
+		pr_debug("%s: vmap() failed\n", __func__);
 		return -EFAULT;
 	}
 
 	ret = efi_capsule_update(cap_hdr_temp, cap_info->pages);
-	kunmap(cap_info->pages[0]);
+	vunmap(cap_hdr_temp);
 	if (ret) {
 		pr_err("%s: efi_capsule_update() failed\n", __func__);
 		return ret;
diff --git a/drivers/firmware/efi/capsule.c b/drivers/firmware/efi/capsule.c
index 53b9fd2..6eedff4 100644
--- a/drivers/firmware/efi/capsule.c
+++ b/drivers/firmware/efi/capsule.c
@@ -190,9 +190,9 @@
  * map the capsule described by @capsule with its data in @pages and
  * send it to the firmware via the UpdateCapsule() runtime service.
  *
- * @capsule must be a virtual mapping of the first page in @pages
- * (@pages[0]) in the kernel address space. That is, a
- * capsule_header_t that describes the entire contents of the capsule
+ * @capsule must be a virtual mapping of the complete capsule update in the
+ * kernel address space, as the capsule can be consumed immediately.
+ * A capsule_header_t that describes the entire contents of the capsule
  * must be at the start of the first data page.
  *
  * Even though this function will validate that the firmware supports
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 30a24d0..1c33d74 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -125,16 +125,19 @@
  * @entry: deleting entry
  * @turn_off_scanning: Check if a scanning flag should be turned off
  */
-static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
+static inline int __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
 						bool turn_off_scanning)
 {
 	if (entry->deleting) {
 		list_del(&entry->list);
 		efivar_entry_iter_end();
 		efivar_unregister(entry);
-		efivar_entry_iter_begin();
+		if (efivar_entry_iter_begin())
+			return -EINTR;
 	} else if (turn_off_scanning)
 		entry->scanning = false;
+
+	return 0;
 }
 
 /**
@@ -144,13 +147,18 @@
  * @head: list head
  * @stop: a flag checking if scanning will stop
  */
-static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
+static int efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
 				       struct efivar_entry *next,
 				       struct list_head *head, bool stop)
 {
-	__efi_pstore_scan_sysfs_exit(pos, true);
+	int ret = __efi_pstore_scan_sysfs_exit(pos, true);
+
+	if (ret)
+		return ret;
+
 	if (stop)
-		__efi_pstore_scan_sysfs_exit(next, &next->list != head);
+		ret = __efi_pstore_scan_sysfs_exit(next, &next->list != head);
+	return ret;
 }
 
 /**
@@ -172,13 +180,17 @@
 	struct efivar_entry *entry, *n;
 	struct list_head *head = &efivar_sysfs_list;
 	int size = 0;
+	int ret;
 
 	if (!*pos) {
 		list_for_each_entry_safe(entry, n, head, list) {
 			efi_pstore_scan_sysfs_enter(entry, n, head);
 
 			size = efi_pstore_read_func(entry, data);
-			efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
+			ret = efi_pstore_scan_sysfs_exit(entry, n, head,
+							 size < 0);
+			if (ret)
+				return ret;
 			if (size)
 				break;
 		}
@@ -190,7 +202,9 @@
 		efi_pstore_scan_sysfs_enter((*pos), n, head);
 
 		size = efi_pstore_read_func((*pos), data);
-		efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+		ret = efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+		if (ret)
+			return ret;
 		if (size)
 			break;
 	}
@@ -232,7 +246,10 @@
 	if (!*data.buf)
 		return -ENOMEM;
 
-	efivar_entry_iter_begin();
+	if (efivar_entry_iter_begin()) {
+		kfree(*data.buf);
+		return -EINTR;
+	}
 	size = efi_pstore_sysfs_entry_iter(&data,
 					   (struct efivar_entry **)&psi->data);
 	efivar_entry_iter_end();
@@ -347,7 +364,8 @@
 	edata.time = time;
 	edata.name = efi_name;
 
-	efivar_entry_iter_begin();
+	if (efivar_entry_iter_begin())
+		return -EINTR;
 	found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
 
 	if (found && !entry->scanning) {
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 5a2631a..1ac199c 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/ucs2_string.h>
+#include <linux/memblock.h>
 
 #include <asm/early_ioremap.h>
 
@@ -347,56 +348,31 @@
 
 /*
  * Find the efi memory descriptor for a given physical address.  Given a
- * physicall address, determine if it exists within an EFI Memory Map entry,
+ * physical address, determine if it exists within an EFI Memory Map entry,
  * and if so, populate the supplied memory descriptor with the appropriate
  * data.
  */
 int __init efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md)
 {
-	struct efi_memory_map *map = &efi.memmap;
-	phys_addr_t p, e;
+	efi_memory_desc_t *md;
 
 	if (!efi_enabled(EFI_MEMMAP)) {
 		pr_err_once("EFI_MEMMAP is not enabled.\n");
 		return -EINVAL;
 	}
 
-	if (!map) {
-		pr_err_once("efi.memmap is not set.\n");
-		return -EINVAL;
-	}
 	if (!out_md) {
 		pr_err_once("out_md is null.\n");
 		return -EINVAL;
         }
-	if (WARN_ON_ONCE(!map->phys_map))
-		return -EINVAL;
-	if (WARN_ON_ONCE(map->nr_map == 0) || WARN_ON_ONCE(map->desc_size == 0))
-		return -EINVAL;
 
-	e = map->phys_map + map->nr_map * map->desc_size;
-	for (p = map->phys_map; p < e; p += map->desc_size) {
-		efi_memory_desc_t *md;
+	for_each_efi_memory_desc(md) {
 		u64 size;
 		u64 end;
 
-		/*
-		 * If a driver calls this after efi_free_boot_services,
-		 * ->map will be NULL, and the target may also not be mapped.
-		 * So just always get our own virtual map on the CPU.
-		 *
-		 */
-		md = early_memremap(p, sizeof (*md));
-		if (!md) {
-			pr_err_once("early_memremap(%pa, %zu) failed.\n",
-				    &p, sizeof (*md));
-			return -ENOMEM;
-		}
-
 		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
 		    md->type != EFI_BOOT_SERVICES_DATA &&
 		    md->type != EFI_RUNTIME_SERVICES_DATA) {
-			early_memunmap(md, sizeof (*md));
 			continue;
 		}
 
@@ -404,11 +380,8 @@
 		end = md->phys_addr + size;
 		if (phys_addr >= md->phys_addr && phys_addr < end) {
 			memcpy(out_md, md, sizeof(*out_md));
-			early_memunmap(md, sizeof (*md));
 			return 0;
 		}
-
-		early_memunmap(md, sizeof (*md));
 	}
 	pr_err_once("requested map not found.\n");
 	return -ENOENT;
@@ -424,6 +397,35 @@
 	return end;
 }
 
+void __init __weak efi_arch_mem_reserve(phys_addr_t addr, u64 size) {}
+
+/**
+ * efi_mem_reserve - Reserve an EFI memory region
+ * @addr: Physical address to reserve
+ * @size: Size of reservation
+ *
+ * Mark a region as reserved from general kernel allocation and
+ * prevent it being released by efi_free_boot_services().
+ *
+ * This function should be called drivers once they've parsed EFI
+ * configuration tables to figure out where their data lives, e.g.
+ * efi_esrt_init().
+ */
+void __init efi_mem_reserve(phys_addr_t addr, u64 size)
+{
+	if (!memblock_is_region_reserved(addr, size))
+		memblock_reserve(addr, size);
+
+	/*
+	 * Some architectures (x86) reserve all boot services ranges
+	 * until efi_free_boot_services() because of buggy firmware
+	 * implementations. This means the above memblock_reserve() is
+	 * superfluous on x86 and instead what it needs to do is
+	 * ensure the @start, @size is not freed.
+	 */
+	efi_arch_mem_reserve(addr, size);
+}
+
 static __initdata efi_config_table_type_t common_tables[] = {
 	{ACPI_20_TABLE_GUID, "ACPI 2.0", &efi.acpi20},
 	{ACPI_TABLE_GUID, "ACPI", &efi.acpi},
@@ -657,9 +659,12 @@
 		}
 
 		if (subnode) {
-			node = of_get_flat_dt_subnode_by_name(node, subnode);
-			if (node < 0)
+			int err = of_get_flat_dt_subnode_by_name(node, subnode);
+
+			if (err < 0)
 				return 0;
+
+			node = err;
 		}
 
 		return __find_uefi_params(node, info, dt_params[i].params);
@@ -808,6 +813,9 @@
 	case EFI_NOT_FOUND:
 		err = -ENOENT;
 		break;
+	case EFI_ABORTED:
+		err = -EINTR;
+		break;
 	default:
 		err = -EINVAL;
 	}
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 116b244..3e626fd 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -510,7 +510,8 @@
 		vendor = del_var->VendorGuid;
 	}
 
-	efivar_entry_iter_begin();
+	if (efivar_entry_iter_begin())
+		return -EINTR;
 	entry = efivar_entry_find(name, vendor, &efivar_sysfs_list, true);
 	if (!entry)
 		err = -EINVAL;
@@ -575,7 +576,10 @@
 		return ret;
 
 	kobject_uevent(&new_var->kobj, KOBJ_ADD);
-	efivar_entry_add(new_var, &efivar_sysfs_list);
+	if (efivar_entry_add(new_var, &efivar_sysfs_list)) {
+		efivar_unregister(new_var);
+		return -EINTR;
+	}
 
 	return 0;
 }
@@ -690,7 +694,10 @@
 
 static int efivar_sysfs_destroy(struct efivar_entry *entry, void *data)
 {
-	efivar_entry_remove(entry);
+	int err = efivar_entry_remove(entry);
+
+	if (err)
+		return err;
 	efivar_unregister(entry);
 	return 0;
 }
@@ -698,7 +705,14 @@
 static void efivars_sysfs_exit(void)
 {
 	/* Remove all entries and destroy */
-	__efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list, NULL, NULL);
+	int err;
+
+	err = __efivar_entry_iter(efivar_sysfs_destroy, &efivar_sysfs_list,
+				  NULL, NULL);
+	if (err) {
+		pr_err("efivars: Failed to destroy sysfs entries\n");
+		return;
+	}
 
 	if (efivars_new_var)
 		sysfs_remove_bin_file(&efivars_kset->kobj, efivars_new_var);
diff --git a/drivers/firmware/efi/esrt.c b/drivers/firmware/efi/esrt.c
index 75feb3f..1491407 100644
--- a/drivers/firmware/efi/esrt.c
+++ b/drivers/firmware/efi/esrt.c
@@ -16,6 +16,7 @@
 #include <linux/device.h>
 #include <linux/efi.h>
 #include <linux/init.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
@@ -235,7 +236,7 @@
 };
 
 /*
- * remap the table, copy it to kmalloced pages, and unmap it.
+ * remap the table, validate it, mark it reserved and unmap it.
  */
 void __init efi_esrt_init(void)
 {
@@ -335,7 +336,7 @@
 
 	end = esrt_data + size;
 	pr_info("Reserving ESRT space from %pa to %pa.\n", &esrt_data, &end);
-	memblock_reserve(esrt_data, esrt_data_size);
+	efi_mem_reserve(esrt_data, esrt_data_size);
 
 	pr_debug("esrt-init: loaded.\n");
 err_memunmap:
@@ -382,28 +383,18 @@
 static int __init esrt_sysfs_init(void)
 {
 	int error;
-	struct efi_system_resource_table __iomem *ioesrt;
 
 	pr_debug("esrt-sysfs: loading.\n");
 	if (!esrt_data || !esrt_data_size)
 		return -ENOSYS;
 
-	ioesrt = ioremap(esrt_data, esrt_data_size);
-	if (!ioesrt) {
-		pr_err("ioremap(%pa, %zu) failed.\n", &esrt_data,
+	esrt = memremap(esrt_data, esrt_data_size, MEMREMAP_WB);
+	if (!esrt) {
+		pr_err("memremap(%pa, %zu) failed.\n", &esrt_data,
 		       esrt_data_size);
 		return -ENOMEM;
 	}
 
-	esrt = kmalloc(esrt_data_size, GFP_KERNEL);
-	if (!esrt) {
-		pr_err("kmalloc failed. (wanted %zu bytes)\n", esrt_data_size);
-		iounmap(ioesrt);
-		return -ENOMEM;
-	}
-
-	memcpy_fromio(esrt, ioesrt, esrt_data_size);
-
 	esrt_kobj = kobject_create_and_add("esrt", efi_kobj);
 	if (!esrt_kobj) {
 		pr_err("Firmware table registration failed.\n");
@@ -429,8 +420,6 @@
 	if (error)
 		goto err_cleanup_list;
 
-	memblock_remove(esrt_data, esrt_data_size);
-
 	pr_debug("esrt-sysfs: loaded.\n");
 
 	return 0;
diff --git a/drivers/firmware/efi/fake_mem.c b/drivers/firmware/efi/fake_mem.c
index 48430ab..520a40e 100644
--- a/drivers/firmware/efi/fake_mem.c
+++ b/drivers/firmware/efi/fake_mem.c
@@ -35,17 +35,13 @@
 
 #define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM
 
-struct fake_mem {
-	struct range range;
-	u64 attribute;
-};
-static struct fake_mem fake_mems[EFI_MAX_FAKEMEM];
+static struct efi_mem_range fake_mems[EFI_MAX_FAKEMEM];
 static int nr_fake_mem;
 
 static int __init cmp_fake_mem(const void *x1, const void *x2)
 {
-	const struct fake_mem *m1 = x1;
-	const struct fake_mem *m2 = x2;
+	const struct efi_mem_range *m1 = x1;
+	const struct efi_mem_range *m2 = x2;
 
 	if (m1->range.start < m2->range.start)
 		return -1;
@@ -56,40 +52,21 @@
 
 void __init efi_fake_memmap(void)
 {
-	u64 start, end, m_start, m_end, m_attr;
 	int new_nr_map = efi.memmap.nr_map;
 	efi_memory_desc_t *md;
 	phys_addr_t new_memmap_phy;
 	void *new_memmap;
-	void *old, *new;
 	int i;
 
-	if (!nr_fake_mem || !efi_enabled(EFI_MEMMAP))
+	if (!nr_fake_mem)
 		return;
 
 	/* count up the number of EFI memory descriptor */
-	for_each_efi_memory_desc(md) {
-		start = md->phys_addr;
-		end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+	for (i = 0; i < nr_fake_mem; i++) {
+		for_each_efi_memory_desc(md) {
+			struct range *r = &fake_mems[i].range;
 
-		for (i = 0; i < nr_fake_mem; i++) {
-			/* modifying range */
-			m_start = fake_mems[i].range.start;
-			m_end = fake_mems[i].range.end;
-
-			if (m_start <= start) {
-				/* split into 2 parts */
-				if (start < m_end && m_end < end)
-					new_nr_map++;
-			}
-			if (start < m_start && m_start < end) {
-				/* split into 3 parts */
-				if (m_end < end)
-					new_nr_map += 2;
-				/* split into 2 parts */
-				if (end <= m_end)
-					new_nr_map++;
-			}
+			new_nr_map += efi_memmap_split_count(md, r);
 		}
 	}
 
@@ -107,85 +84,13 @@
 		return;
 	}
 
-	for (old = efi.memmap.map, new = new_memmap;
-	     old < efi.memmap.map_end;
-	     old += efi.memmap.desc_size, new += efi.memmap.desc_size) {
-
-		/* copy original EFI memory descriptor */
-		memcpy(new, old, efi.memmap.desc_size);
-		md = new;
-		start = md->phys_addr;
-		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
-
-		for (i = 0; i < nr_fake_mem; i++) {
-			/* modifying range */
-			m_start = fake_mems[i].range.start;
-			m_end = fake_mems[i].range.end;
-			m_attr = fake_mems[i].attribute;
-
-			if (m_start <= start && end <= m_end)
-				md->attribute |= m_attr;
-
-			if (m_start <= start &&
-			    (start < m_end && m_end < end)) {
-				/* first part */
-				md->attribute |= m_attr;
-				md->num_pages = (m_end - md->phys_addr + 1) >>
-					EFI_PAGE_SHIFT;
-				/* latter part */
-				new += efi.memmap.desc_size;
-				memcpy(new, old, efi.memmap.desc_size);
-				md = new;
-				md->phys_addr = m_end + 1;
-				md->num_pages = (end - md->phys_addr + 1) >>
-					EFI_PAGE_SHIFT;
-			}
-
-			if ((start < m_start && m_start < end) && m_end < end) {
-				/* first part */
-				md->num_pages = (m_start - md->phys_addr) >>
-					EFI_PAGE_SHIFT;
-				/* middle part */
-				new += efi.memmap.desc_size;
-				memcpy(new, old, efi.memmap.desc_size);
-				md = new;
-				md->attribute |= m_attr;
-				md->phys_addr = m_start;
-				md->num_pages = (m_end - m_start + 1) >>
-					EFI_PAGE_SHIFT;
-				/* last part */
-				new += efi.memmap.desc_size;
-				memcpy(new, old, efi.memmap.desc_size);
-				md = new;
-				md->phys_addr = m_end + 1;
-				md->num_pages = (end - m_end) >>
-					EFI_PAGE_SHIFT;
-			}
-
-			if ((start < m_start && m_start < end) &&
-			    (end <= m_end)) {
-				/* first part */
-				md->num_pages = (m_start - md->phys_addr) >>
-					EFI_PAGE_SHIFT;
-				/* latter part */
-				new += efi.memmap.desc_size;
-				memcpy(new, old, efi.memmap.desc_size);
-				md = new;
-				md->phys_addr = m_start;
-				md->num_pages = (end - md->phys_addr + 1) >>
-					EFI_PAGE_SHIFT;
-				md->attribute |= m_attr;
-			}
-		}
-	}
+	for (i = 0; i < nr_fake_mem; i++)
+		efi_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]);
 
 	/* swap into new EFI memmap */
-	efi_unmap_memmap();
-	efi.memmap.map = new_memmap;
-	efi.memmap.phys_map = new_memmap_phy;
-	efi.memmap.nr_map = new_nr_map;
-	efi.memmap.map_end = efi.memmap.map + efi.memmap.nr_map * efi.memmap.desc_size;
-	set_bit(EFI_MEMMAP, &efi.flags);
+	early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map);
+
+	efi_memmap_install(new_memmap_phy, new_nr_map);
 
 	/* print new EFI memmap */
 	efi_print_memmap();
@@ -223,7 +128,7 @@
 			p++;
 	}
 
-	sort(fake_mems, nr_fake_mem, sizeof(struct fake_mem),
+	sort(fake_mems, nr_fake_mem, sizeof(struct efi_mem_range),
 	     cmp_fake_mem, NULL);
 
 	for (i = 0; i < nr_fake_mem; i++)
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 3bd127f9..aded106 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -41,6 +41,8 @@
 #define EFI_ALLOC_ALIGN		EFI_PAGE_SIZE
 #endif
 
+#define EFI_MMAP_NR_SLACK_SLOTS	8
+
 struct file_info {
 	efi_file_handle_t *handle;
 	u64 size;
@@ -63,49 +65,62 @@
 	}
 }
 
+static inline bool mmap_has_headroom(unsigned long buff_size,
+				     unsigned long map_size,
+				     unsigned long desc_size)
+{
+	unsigned long slack = buff_size - map_size;
+
+	return slack / desc_size >= EFI_MMAP_NR_SLACK_SLOTS;
+}
+
 efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
-				efi_memory_desc_t **map,
-				unsigned long *map_size,
-				unsigned long *desc_size,
-				u32 *desc_ver,
-				unsigned long *key_ptr)
+				struct efi_boot_memmap *map)
 {
 	efi_memory_desc_t *m = NULL;
 	efi_status_t status;
 	unsigned long key;
 	u32 desc_version;
 
-	*map_size = sizeof(*m) * 32;
+	*map->desc_size =	sizeof(*m);
+	*map->map_size =	*map->desc_size * 32;
+	*map->buff_size =	*map->map_size;
 again:
-	/*
-	 * Add an additional efi_memory_desc_t because we're doing an
-	 * allocation which may be in a new descriptor region.
-	 */
-	*map_size += sizeof(*m);
 	status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
-				*map_size, (void **)&m);
+				*map->map_size, (void **)&m);
 	if (status != EFI_SUCCESS)
 		goto fail;
 
-	*desc_size = 0;
+	*map->desc_size = 0;
 	key = 0;
-	status = efi_call_early(get_memory_map, map_size, m,
-				&key, desc_size, &desc_version);
-	if (status == EFI_BUFFER_TOO_SMALL) {
+	status = efi_call_early(get_memory_map, map->map_size, m,
+				&key, map->desc_size, &desc_version);
+	if (status == EFI_BUFFER_TOO_SMALL ||
+	    !mmap_has_headroom(*map->buff_size, *map->map_size,
+			       *map->desc_size)) {
 		efi_call_early(free_pool, m);
+		/*
+		 * Make sure there is some entries of headroom so that the
+		 * buffer can be reused for a new map after allocations are
+		 * no longer permitted.  Its unlikely that the map will grow to
+		 * exceed this headroom once we are ready to trigger
+		 * ExitBootServices()
+		 */
+		*map->map_size += *map->desc_size * EFI_MMAP_NR_SLACK_SLOTS;
+		*map->buff_size = *map->map_size;
 		goto again;
 	}
 
 	if (status != EFI_SUCCESS)
 		efi_call_early(free_pool, m);
 
-	if (key_ptr && status == EFI_SUCCESS)
-		*key_ptr = key;
-	if (desc_ver && status == EFI_SUCCESS)
-		*desc_ver = desc_version;
+	if (map->key_ptr && status == EFI_SUCCESS)
+		*map->key_ptr = key;
+	if (map->desc_ver && status == EFI_SUCCESS)
+		*map->desc_ver = desc_version;
 
 fail:
-	*map = m;
+	*map->map = m;
 	return status;
 }
 
@@ -113,13 +128,20 @@
 unsigned long get_dram_base(efi_system_table_t *sys_table_arg)
 {
 	efi_status_t status;
-	unsigned long map_size;
+	unsigned long map_size, buff_size;
 	unsigned long membase  = EFI_ERROR;
 	struct efi_memory_map map;
 	efi_memory_desc_t *md;
+	struct efi_boot_memmap boot_map;
 
-	status = efi_get_memory_map(sys_table_arg, (efi_memory_desc_t **)&map.map,
-				    &map_size, &map.desc_size, NULL, NULL);
+	boot_map.map =		(efi_memory_desc_t **)&map.map;
+	boot_map.map_size =	&map_size;
+	boot_map.desc_size =	&map.desc_size;
+	boot_map.desc_ver =	NULL;
+	boot_map.key_ptr =	NULL;
+	boot_map.buff_size =	&buff_size;
+
+	status = efi_get_memory_map(sys_table_arg, &boot_map);
 	if (status != EFI_SUCCESS)
 		return membase;
 
@@ -144,15 +166,22 @@
 			    unsigned long size, unsigned long align,
 			    unsigned long *addr, unsigned long max)
 {
-	unsigned long map_size, desc_size;
+	unsigned long map_size, desc_size, buff_size;
 	efi_memory_desc_t *map;
 	efi_status_t status;
 	unsigned long nr_pages;
 	u64 max_addr = 0;
 	int i;
+	struct efi_boot_memmap boot_map;
 
-	status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size,
-				    NULL, NULL);
+	boot_map.map =		&map;
+	boot_map.map_size =	&map_size;
+	boot_map.desc_size =	&desc_size;
+	boot_map.desc_ver =	NULL;
+	boot_map.key_ptr =	NULL;
+	boot_map.buff_size =	&buff_size;
+
+	status = efi_get_memory_map(sys_table_arg, &boot_map);
 	if (status != EFI_SUCCESS)
 		goto fail;
 
@@ -230,14 +259,21 @@
 			   unsigned long size, unsigned long align,
 			   unsigned long *addr)
 {
-	unsigned long map_size, desc_size;
+	unsigned long map_size, desc_size, buff_size;
 	efi_memory_desc_t *map;
 	efi_status_t status;
 	unsigned long nr_pages;
 	int i;
+	struct efi_boot_memmap boot_map;
 
-	status = efi_get_memory_map(sys_table_arg, &map, &map_size, &desc_size,
-				    NULL, NULL);
+	boot_map.map =		&map;
+	boot_map.map_size =	&map_size;
+	boot_map.desc_size =	&desc_size;
+	boot_map.desc_ver =	NULL;
+	boot_map.key_ptr =	NULL;
+	boot_map.buff_size =	&buff_size;
+
+	status = efi_get_memory_map(sys_table_arg, &boot_map);
 	if (status != EFI_SUCCESS)
 		goto fail;
 
@@ -704,3 +740,76 @@
 	*cmd_line_len = options_bytes;
 	return (char *)cmdline_addr;
 }
+
+/*
+ * Handle calling ExitBootServices according to the requirements set out by the
+ * spec.  Obtains the current memory map, and returns that info after calling
+ * ExitBootServices.  The client must specify a function to perform any
+ * processing of the memory map data prior to ExitBootServices.  A client
+ * specific structure may be passed to the function via priv.  The client
+ * function may be called multiple times.
+ */
+efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table_arg,
+				    void *handle,
+				    struct efi_boot_memmap *map,
+				    void *priv,
+				    efi_exit_boot_map_processing priv_func)
+{
+	efi_status_t status;
+
+	status = efi_get_memory_map(sys_table_arg, map);
+
+	if (status != EFI_SUCCESS)
+		goto fail;
+
+	status = priv_func(sys_table_arg, map, priv);
+	if (status != EFI_SUCCESS)
+		goto free_map;
+
+	status = efi_call_early(exit_boot_services, handle, *map->key_ptr);
+
+	if (status == EFI_INVALID_PARAMETER) {
+		/*
+		 * The memory map changed between efi_get_memory_map() and
+		 * exit_boot_services().  Per the UEFI Spec v2.6, Section 6.4:
+		 * EFI_BOOT_SERVICES.ExitBootServices we need to get the
+		 * updated map, and try again.  The spec implies one retry
+		 * should be sufficent, which is confirmed against the EDK2
+		 * implementation.  Per the spec, we can only invoke
+		 * get_memory_map() and exit_boot_services() - we cannot alloc
+		 * so efi_get_memory_map() cannot be used, and we must reuse
+		 * the buffer.  For all practical purposes, the headroom in the
+		 * buffer should account for any changes in the map so the call
+		 * to get_memory_map() is expected to succeed here.
+		 */
+		*map->map_size = *map->buff_size;
+		status = efi_call_early(get_memory_map,
+					map->map_size,
+					*map->map,
+					map->key_ptr,
+					map->desc_size,
+					map->desc_ver);
+
+		/* exit_boot_services() was called, thus cannot free */
+		if (status != EFI_SUCCESS)
+			goto fail;
+
+		status = priv_func(sys_table_arg, map, priv);
+		/* exit_boot_services() was called, thus cannot free */
+		if (status != EFI_SUCCESS)
+			goto fail;
+
+		status = efi_call_early(exit_boot_services, handle, *map->key_ptr);
+	}
+
+	/* exit_boot_services() was called, thus cannot free */
+	if (status != EFI_SUCCESS)
+		goto fail;
+
+	return EFI_SUCCESS;
+
+free_map:
+	efi_call_early(free_pool, *map->map);
+fail:
+	return status;
+}
diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
index e58abfa..a6a9311 100644
--- a/drivers/firmware/efi/libstub/fdt.c
+++ b/drivers/firmware/efi/libstub/fdt.c
@@ -152,6 +152,27 @@
 #define EFI_FDT_ALIGN EFI_PAGE_SIZE
 #endif
 
+struct exit_boot_struct {
+	efi_memory_desc_t *runtime_map;
+	int *runtime_entry_count;
+};
+
+static efi_status_t exit_boot_func(efi_system_table_t *sys_table_arg,
+				   struct efi_boot_memmap *map,
+				   void *priv)
+{
+	struct exit_boot_struct *p = priv;
+	/*
+	 * Update the memory map with virtual addresses. The function will also
+	 * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME
+	 * entries so that we can pass it straight to SetVirtualAddressMap()
+	 */
+	efi_get_virtmap(*map->map, *map->map_size, *map->desc_size,
+			p->runtime_map, p->runtime_entry_count);
+
+	return EFI_SUCCESS;
+}
+
 /*
  * Allocate memory for a new FDT, then add EFI, commandline, and
  * initrd related fields to the FDT.  This routine increases the
@@ -175,13 +196,22 @@
 					    unsigned long fdt_addr,
 					    unsigned long fdt_size)
 {
-	unsigned long map_size, desc_size;
+	unsigned long map_size, desc_size, buff_size;
 	u32 desc_ver;
 	unsigned long mmap_key;
 	efi_memory_desc_t *memory_map, *runtime_map;
 	unsigned long new_fdt_size;
 	efi_status_t status;
 	int runtime_entry_count = 0;
+	struct efi_boot_memmap map;
+	struct exit_boot_struct priv;
+
+	map.map =	&runtime_map;
+	map.map_size =	&map_size;
+	map.desc_size =	&desc_size;
+	map.desc_ver =	&desc_ver;
+	map.key_ptr =	&mmap_key;
+	map.buff_size =	&buff_size;
 
 	/*
 	 * Get a copy of the current memory map that we will use to prepare
@@ -189,8 +219,7 @@
 	 * subsequent allocations adding entries, since they could not affect
 	 * the number of EFI_MEMORY_RUNTIME regions.
 	 */
-	status = efi_get_memory_map(sys_table, &runtime_map, &map_size,
-				    &desc_size, &desc_ver, &mmap_key);
+	status = efi_get_memory_map(sys_table, &map);
 	if (status != EFI_SUCCESS) {
 		pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n");
 		return status;
@@ -199,6 +228,7 @@
 	pr_efi(sys_table,
 	       "Exiting boot services and installing virtual address map...\n");
 
+	map.map = &memory_map;
 	/*
 	 * Estimate size of new FDT, and allocate memory for it. We
 	 * will allocate a bigger buffer if this ends up being too
@@ -218,8 +248,7 @@
 		 * we can get the memory map key  needed for
 		 * exit_boot_services().
 		 */
-		status = efi_get_memory_map(sys_table, &memory_map, &map_size,
-					    &desc_size, &desc_ver, &mmap_key);
+		status = efi_get_memory_map(sys_table, &map);
 		if (status != EFI_SUCCESS)
 			goto fail_free_new_fdt;
 
@@ -250,16 +279,11 @@
 		}
 	}
 
-	/*
-	 * Update the memory map with virtual addresses. The function will also
-	 * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME
-	 * entries so that we can pass it straight into SetVirtualAddressMap()
-	 */
-	efi_get_virtmap(memory_map, map_size, desc_size, runtime_map,
-			&runtime_entry_count);
-
-	/* Now we are ready to exit_boot_services.*/
-	status = sys_table->boottime->exit_boot_services(handle, mmap_key);
+	sys_table->boottime->free_pool(memory_map);
+	priv.runtime_map = runtime_map;
+	priv.runtime_entry_count = &runtime_entry_count;
+	status = efi_exit_boot_services(sys_table, handle, &map, &priv,
+					exit_boot_func);
 
 	if (status == EFI_SUCCESS) {
 		efi_set_virtual_address_map_t *svam;
diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c
index 53f6d3f..0c9f58c 100644
--- a/drivers/firmware/efi/libstub/random.c
+++ b/drivers/firmware/efi/libstub/random.c
@@ -73,12 +73,20 @@
 			      unsigned long random_seed)
 {
 	unsigned long map_size, desc_size, total_slots = 0, target_slot;
+	unsigned long buff_size;
 	efi_status_t status;
 	efi_memory_desc_t *memory_map;
 	int map_offset;
+	struct efi_boot_memmap map;
 
-	status = efi_get_memory_map(sys_table_arg, &memory_map, &map_size,
-				    &desc_size, NULL, NULL);
+	map.map =	&memory_map;
+	map.map_size =	&map_size;
+	map.desc_size =	&desc_size;
+	map.desc_ver =	NULL;
+	map.key_ptr =	NULL;
+	map.buff_size =	&buff_size;
+
+	status = efi_get_memory_map(sys_table_arg, &map);
 	if (status != EFI_SUCCESS)
 		return status;
 
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
new file mode 100644
index 0000000..f03ddec
--- /dev/null
+++ b/drivers/firmware/efi/memmap.c
@@ -0,0 +1,303 @@
+/*
+ * Common EFI memory map functions.
+ */
+
+#define pr_fmt(fmt) "efi: " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/efi.h>
+#include <linux/io.h>
+#include <asm/early_ioremap.h>
+
+/**
+ * __efi_memmap_init - Common code for mapping the EFI memory map
+ * @data: EFI memory map data
+ * @late: Use early or late mapping function?
+ *
+ * This function takes care of figuring out which function to use to
+ * map the EFI memory map in efi.memmap based on how far into the boot
+ * we are.
+ *
+ * During bootup @late should be %false since we only have access to
+ * the early_memremap*() functions as the vmalloc space isn't setup.
+ * Once the kernel is fully booted we can fallback to the more robust
+ * memremap*() API.
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+static int __init
+__efi_memmap_init(struct efi_memory_map_data *data, bool late)
+{
+	struct efi_memory_map map;
+	phys_addr_t phys_map;
+
+	if (efi_enabled(EFI_PARAVIRT))
+		return 0;
+
+	phys_map = data->phys_map;
+
+	if (late)
+		map.map = memremap(phys_map, data->size, MEMREMAP_WB);
+	else
+		map.map = early_memremap(phys_map, data->size);
+
+	if (!map.map) {
+		pr_err("Could not map the memory map!\n");
+		return -ENOMEM;
+	}
+
+	map.phys_map = data->phys_map;
+	map.nr_map = data->size / data->desc_size;
+	map.map_end = map.map + data->size;
+
+	map.desc_version = data->desc_version;
+	map.desc_size = data->desc_size;
+	map.late = late;
+
+	set_bit(EFI_MEMMAP, &efi.flags);
+
+	efi.memmap = map;
+
+	return 0;
+}
+
+/**
+ * efi_memmap_init_early - Map the EFI memory map data structure
+ * @data: EFI memory map data
+ *
+ * Use early_memremap() to map the passed in EFI memory map and assign
+ * it to efi.memmap.
+ */
+int __init efi_memmap_init_early(struct efi_memory_map_data *data)
+{
+	/* Cannot go backwards */
+	WARN_ON(efi.memmap.late);
+
+	return __efi_memmap_init(data, false);
+}
+
+void __init efi_memmap_unmap(void)
+{
+	if (!efi.memmap.late) {
+		unsigned long size;
+
+		size = efi.memmap.desc_size * efi.memmap.nr_map;
+		early_memunmap(efi.memmap.map, size);
+	} else {
+		memunmap(efi.memmap.map);
+	}
+
+	efi.memmap.map = NULL;
+	clear_bit(EFI_MEMMAP, &efi.flags);
+}
+
+/**
+ * efi_memmap_init_late - Map efi.memmap with memremap()
+ * @phys_addr: Physical address of the new EFI memory map
+ * @size: Size in bytes of the new EFI memory map
+ *
+ * Setup a mapping of the EFI memory map using ioremap_cache(). This
+ * function should only be called once the vmalloc space has been
+ * setup and is therefore not suitable for calling during early EFI
+ * initialise, e.g. in efi_init(). Additionally, it expects
+ * efi_memmap_init_early() to have already been called.
+ *
+ * The reason there are two EFI memmap initialisation
+ * (efi_memmap_init_early() and this late version) is because the
+ * early EFI memmap should be explicitly unmapped once EFI
+ * initialisation is complete as the fixmap space used to map the EFI
+ * memmap (via early_memremap()) is a scarce resource.
+ *
+ * This late mapping is intended to persist for the duration of
+ * runtime so that things like efi_mem_desc_lookup() and
+ * efi_mem_attributes() always work.
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size)
+{
+	struct efi_memory_map_data data = {
+		.phys_map = addr,
+		.size = size,
+	};
+
+	/* Did we forget to unmap the early EFI memmap? */
+	WARN_ON(efi.memmap.map);
+
+	/* Were we already called? */
+	WARN_ON(efi.memmap.late);
+
+	/*
+	 * It makes no sense to allow callers to register different
+	 * values for the following fields. Copy them out of the
+	 * existing early EFI memmap.
+	 */
+	data.desc_version = efi.memmap.desc_version;
+	data.desc_size = efi.memmap.desc_size;
+
+	return __efi_memmap_init(&data, true);
+}
+
+/**
+ * efi_memmap_install - Install a new EFI memory map in efi.memmap
+ * @addr: Physical address of the memory map
+ * @nr_map: Number of entries in the memory map
+ *
+ * Unlike efi_memmap_init_*(), this function does not allow the caller
+ * to switch from early to late mappings. It simply uses the existing
+ * mapping function and installs the new memmap.
+ *
+ * Returns zero on success, a negative error code on failure.
+ */
+int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map)
+{
+	struct efi_memory_map_data data;
+
+	efi_memmap_unmap();
+
+	data.phys_map = addr;
+	data.size = efi.memmap.desc_size * nr_map;
+	data.desc_version = efi.memmap.desc_version;
+	data.desc_size = efi.memmap.desc_size;
+
+	return __efi_memmap_init(&data, efi.memmap.late);
+}
+
+/**
+ * efi_memmap_split_count - Count number of additional EFI memmap entries
+ * @md: EFI memory descriptor to split
+ * @range: Address range (start, end) to split around
+ *
+ * Returns the number of additional EFI memmap entries required to
+ * accomodate @range.
+ */
+int __init efi_memmap_split_count(efi_memory_desc_t *md, struct range *range)
+{
+	u64 m_start, m_end;
+	u64 start, end;
+	int count = 0;
+
+	start = md->phys_addr;
+	end = start + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+	/* modifying range */
+	m_start = range->start;
+	m_end = range->end;
+
+	if (m_start <= start) {
+		/* split into 2 parts */
+		if (start < m_end && m_end < end)
+			count++;
+	}
+
+	if (start < m_start && m_start < end) {
+		/* split into 3 parts */
+		if (m_end < end)
+			count += 2;
+		/* split into 2 parts */
+		if (end <= m_end)
+			count++;
+	}
+
+	return count;
+}
+
+/**
+ * efi_memmap_insert - Insert a memory region in an EFI memmap
+ * @old_memmap: The existing EFI memory map structure
+ * @buf: Address of buffer to store new map
+ * @mem: Memory map entry to insert
+ *
+ * It is suggested that you call efi_memmap_split_count() first
+ * to see how large @buf needs to be.
+ */
+void __init efi_memmap_insert(struct efi_memory_map *old_memmap, void *buf,
+			      struct efi_mem_range *mem)
+{
+	u64 m_start, m_end, m_attr;
+	efi_memory_desc_t *md;
+	u64 start, end;
+	void *old, *new;
+
+	/* modifying range */
+	m_start = mem->range.start;
+	m_end = mem->range.end;
+	m_attr = mem->attribute;
+
+	/*
+	 * The EFI memory map deals with regions in EFI_PAGE_SIZE
+	 * units. Ensure that the region described by 'mem' is aligned
+	 * correctly.
+	 */
+	if (!IS_ALIGNED(m_start, EFI_PAGE_SIZE) ||
+	    !IS_ALIGNED(m_end + 1, EFI_PAGE_SIZE)) {
+		WARN_ON(1);
+		return;
+	}
+
+	for (old = old_memmap->map, new = buf;
+	     old < old_memmap->map_end;
+	     old += old_memmap->desc_size, new += old_memmap->desc_size) {
+
+		/* copy original EFI memory descriptor */
+		memcpy(new, old, old_memmap->desc_size);
+		md = new;
+		start = md->phys_addr;
+		end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1;
+
+		if (m_start <= start && end <= m_end)
+			md->attribute |= m_attr;
+
+		if (m_start <= start &&
+		    (start < m_end && m_end < end)) {
+			/* first part */
+			md->attribute |= m_attr;
+			md->num_pages = (m_end - md->phys_addr + 1) >>
+				EFI_PAGE_SHIFT;
+			/* latter part */
+			new += old_memmap->desc_size;
+			memcpy(new, old, old_memmap->desc_size);
+			md = new;
+			md->phys_addr = m_end + 1;
+			md->num_pages = (end - md->phys_addr + 1) >>
+				EFI_PAGE_SHIFT;
+		}
+
+		if ((start < m_start && m_start < end) && m_end < end) {
+			/* first part */
+			md->num_pages = (m_start - md->phys_addr) >>
+				EFI_PAGE_SHIFT;
+			/* middle part */
+			new += old_memmap->desc_size;
+			memcpy(new, old, old_memmap->desc_size);
+			md = new;
+			md->attribute |= m_attr;
+			md->phys_addr = m_start;
+			md->num_pages = (m_end - m_start + 1) >>
+				EFI_PAGE_SHIFT;
+			/* last part */
+			new += old_memmap->desc_size;
+			memcpy(new, old, old_memmap->desc_size);
+			md = new;
+			md->phys_addr = m_end + 1;
+			md->num_pages = (end - m_end) >>
+				EFI_PAGE_SHIFT;
+		}
+
+		if ((start < m_start && m_start < end) &&
+		    (end <= m_end)) {
+			/* first part */
+			md->num_pages = (m_start - md->phys_addr) >>
+				EFI_PAGE_SHIFT;
+			/* latter part */
+			new += old_memmap->desc_size;
+			memcpy(new, old, old_memmap->desc_size);
+			md = new;
+			md->phys_addr = m_start;
+			md->num_pages = (end - md->phys_addr + 1) >>
+				EFI_PAGE_SHIFT;
+			md->attribute |= m_attr;
+		}
+	}
+}
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 5c55227..8e64b77 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -14,10 +14,6 @@
 
 #include <asm/setup.h>
 
-static void *efi_runtime_map;
-static int nr_efi_runtime_map;
-static u32 efi_memdesc_size;
-
 struct efi_runtime_map_entry {
 	efi_memory_desc_t md;
 	struct kobject kobj;   /* kobject for each entry */
@@ -106,7 +102,8 @@
 static struct kset *map_kset;
 
 static struct efi_runtime_map_entry *
-add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
+add_sysfs_runtime_map_entry(struct kobject *kobj, int nr,
+			    efi_memory_desc_t *md)
 {
 	int ret;
 	struct efi_runtime_map_entry *entry;
@@ -124,8 +121,7 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memcpy(&entry->md, efi_runtime_map + nr * efi_memdesc_size,
-	       sizeof(efi_memory_desc_t));
+	memcpy(&entry->md, md, sizeof(efi_memory_desc_t));
 
 	kobject_init(&entry->kobj, &map_ktype);
 	entry->kobj.kset = map_kset;
@@ -142,12 +138,12 @@
 
 int efi_get_runtime_map_size(void)
 {
-	return nr_efi_runtime_map * efi_memdesc_size;
+	return efi.memmap.nr_map * efi.memmap.desc_size;
 }
 
 int efi_get_runtime_map_desc_size(void)
 {
-	return efi_memdesc_size;
+	return efi.memmap.desc_size;
 }
 
 int efi_runtime_map_copy(void *buf, size_t bufsz)
@@ -157,38 +153,33 @@
 	if (sz > bufsz)
 		sz = bufsz;
 
-	memcpy(buf, efi_runtime_map, sz);
+	memcpy(buf, efi.memmap.map, sz);
 	return 0;
 }
 
-void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
-{
-	efi_runtime_map = map;
-	nr_efi_runtime_map = nr_entries;
-	efi_memdesc_size = desc_size;
-}
-
 int __init efi_runtime_map_init(struct kobject *efi_kobj)
 {
 	int i, j, ret = 0;
 	struct efi_runtime_map_entry *entry;
+	efi_memory_desc_t *md;
 
-	if (!efi_runtime_map)
+	if (!efi_enabled(EFI_MEMMAP))
 		return 0;
 
-	map_entries = kzalloc(nr_efi_runtime_map * sizeof(entry), GFP_KERNEL);
+	map_entries = kzalloc(efi.memmap.nr_map * sizeof(entry), GFP_KERNEL);
 	if (!map_entries) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	for (i = 0; i < nr_efi_runtime_map; i++) {
-		entry = add_sysfs_runtime_map_entry(efi_kobj, i);
+	i = 0;
+	for_each_efi_memory_desc(md) {
+		entry = add_sysfs_runtime_map_entry(efi_kobj, i, md);
 		if (IS_ERR(entry)) {
 			ret = PTR_ERR(entry);
 			goto out_add_entry;
 		}
-		*(map_entries + i) = entry;
+		*(map_entries + i++) = entry;
 	}
 
 	return 0;
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 4195877..ae54870b 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -14,11 +14,13 @@
  * This file is released under the GPLv2.
  */
 
+#define pr_fmt(fmt)	"efi: " fmt
+
 #include <linux/bug.h>
 #include <linux/efi.h>
 #include <linux/irqflags.h>
 #include <linux/mutex.h>
-#include <linux/spinlock.h>
+#include <linux/semaphore.h>
 #include <linux/stringify.h>
 #include <asm/efi.h>
 
@@ -81,20 +83,21 @@
  * +------------------------------------+-------------------------------+
  *
  * Due to the fact that the EFI pstore may write to the variable store in
- * interrupt context, we need to use a spinlock for at least the groups that
+ * interrupt context, we need to use a lock for at least the groups that
  * contain SetVariable() and QueryVariableInfo(). That leaves little else, as
  * none of the remaining functions are actually ever called at runtime.
- * So let's just use a single spinlock to serialize all Runtime Services calls.
+ * So let's just use a single lock to serialize all Runtime Services calls.
  */
-static DEFINE_SPINLOCK(efi_runtime_lock);
+static DEFINE_SEMAPHORE(efi_runtime_lock);
 
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
 	efi_status_t status;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(get_time, tm, tc);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -102,9 +105,10 @@
 {
 	efi_status_t status;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(set_time, tm);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -114,9 +118,10 @@
 {
 	efi_status_t status;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(get_wakeup_time, enabled, pending, tm);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -124,9 +129,10 @@
 {
 	efi_status_t status;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(set_wakeup_time, enabled, tm);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -138,10 +144,11 @@
 {
 	efi_status_t status;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(get_variable, name, vendor, attr, data_size,
 			       data);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -151,9 +158,10 @@
 {
 	efi_status_t status;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(get_next_variable, name_size, name, vendor);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -165,10 +173,11 @@
 {
 	efi_status_t status;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
 			       data);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -179,12 +188,12 @@
 {
 	efi_status_t status;
 
-	if (!spin_trylock(&efi_runtime_lock))
+	if (down_trylock(&efi_runtime_lock))
 		return EFI_NOT_READY;
 
 	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
 			       data);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -199,10 +208,11 @@
 	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
 		return EFI_UNSUPPORTED;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(query_variable_info, attr, storage_space,
 			       remaining_space, max_variable_size);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -217,12 +227,12 @@
 	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
 		return EFI_UNSUPPORTED;
 
-	if (!spin_trylock(&efi_runtime_lock))
+	if (down_trylock(&efi_runtime_lock))
 		return EFI_NOT_READY;
 
 	status = efi_call_virt(query_variable_info, attr, storage_space,
 			       remaining_space, max_variable_size);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -230,9 +240,10 @@
 {
 	efi_status_t status;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(get_next_high_mono_count, count);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -241,9 +252,13 @@
 				  unsigned long data_size,
 				  efi_char16_t *data)
 {
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock)) {
+		pr_warn("failed to invoke the reset_system() runtime service:\n"
+			"could not get exclusive access to the firmware\n");
+		return;
+	}
 	__efi_call_virt(reset_system, reset_type, status, data_size, data);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 }
 
 static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
@@ -255,9 +270,10 @@
 	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
 		return EFI_UNSUPPORTED;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(update_capsule, capsules, count, sg_list);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
@@ -271,10 +287,11 @@
 	if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
 		return EFI_UNSUPPORTED;
 
-	spin_lock(&efi_runtime_lock);
+	if (down_interruptible(&efi_runtime_lock))
+		return EFI_ABORTED;
 	status = efi_call_virt(query_capsule_caps, capsules, count, max_size,
 			       reset_type);
-	spin_unlock(&efi_runtime_lock);
+	up(&efi_runtime_lock);
 	return status;
 }
 
diff --git a/drivers/firmware/efi/test/Makefile b/drivers/firmware/efi/test/Makefile
new file mode 100644
index 0000000..bcd4577
--- /dev/null
+++ b/drivers/firmware/efi/test/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_EFI_TEST)			+= efi_test.o
diff --git a/drivers/firmware/efi/test/efi_test.c b/drivers/firmware/efi/test/efi_test.c
new file mode 100644
index 0000000..f61bb52
--- /dev/null
+++ b/drivers/firmware/efi/test/efi_test.c
@@ -0,0 +1,749 @@
+/*
+ * EFI Test Driver for Runtime Services
+ *
+ * Copyright(C) 2012-2016 Canonical Ltd.
+ *
+ * This driver exports EFI runtime services interfaces into userspace, which
+ * allow to use and test UEFI runtime services provided by firmware.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "efi_test.h"
+
+MODULE_AUTHOR("Ivan Hu <ivan.hu@canonical.com>");
+MODULE_DESCRIPTION("EFI Test Driver");
+MODULE_LICENSE("GPL");
+
+/*
+ * Count the bytes in 'str', including the terminating NULL.
+ *
+ * Note this function returns the number of *bytes*, not the number of
+ * ucs2 characters.
+ */
+static inline size_t user_ucs2_strsize(efi_char16_t  __user *str)
+{
+	efi_char16_t *s = str, c;
+	size_t len;
+
+	if (!str)
+		return 0;
+
+	/* Include terminating NULL */
+	len = sizeof(efi_char16_t);
+
+	if (get_user(c, s++)) {
+		/* Can't read userspace memory for size */
+		return 0;
+	}
+
+	while (c != 0) {
+		if (get_user(c, s++)) {
+			/* Can't read userspace memory for size */
+			return 0;
+		}
+		len += sizeof(efi_char16_t);
+	}
+	return len;
+}
+
+/*
+ * Allocate a buffer and copy a ucs2 string from user space into it.
+ */
+static inline int
+copy_ucs2_from_user_len(efi_char16_t **dst, efi_char16_t __user *src,
+			size_t len)
+{
+	efi_char16_t *buf;
+
+	if (!src) {
+		*dst = NULL;
+		return 0;
+	}
+
+	if (!access_ok(VERIFY_READ, src, 1))
+		return -EFAULT;
+
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf) {
+		*dst = NULL;
+		return -ENOMEM;
+	}
+	*dst = buf;
+
+	if (copy_from_user(*dst, src, len)) {
+		kfree(buf);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Count the bytes in 'str', including the terminating NULL.
+ *
+ * Just a wrap for user_ucs2_strsize
+ */
+static inline int
+get_ucs2_strsize_from_user(efi_char16_t __user *src, size_t *len)
+{
+	if (!access_ok(VERIFY_READ, src, 1))
+		return -EFAULT;
+
+	*len = user_ucs2_strsize(src);
+	if (*len == 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ * Calculate the required buffer allocation size and copy a ucs2 string
+ * from user space into it.
+ *
+ * This function differs from copy_ucs2_from_user_len() because it
+ * calculates the size of the buffer to allocate by taking the length of
+ * the string 'src'.
+ *
+ * If a non-zero value is returned, the caller MUST NOT access 'dst'.
+ *
+ * It is the caller's responsibility to free 'dst'.
+ */
+static inline int
+copy_ucs2_from_user(efi_char16_t **dst, efi_char16_t __user *src)
+{
+	size_t len;
+
+	if (!access_ok(VERIFY_READ, src, 1))
+		return -EFAULT;
+
+	len = user_ucs2_strsize(src);
+	if (len == 0)
+		return -EFAULT;
+	return copy_ucs2_from_user_len(dst, src, len);
+}
+
+/*
+ * Copy a ucs2 string to a user buffer.
+ *
+ * This function is a simple wrapper around copy_to_user() that does
+ * nothing if 'src' is NULL, which is useful for reducing the amount of
+ * NULL checking the caller has to do.
+ *
+ * 'len' specifies the number of bytes to copy.
+ */
+static inline int
+copy_ucs2_to_user_len(efi_char16_t __user *dst, efi_char16_t *src, size_t len)
+{
+	if (!src)
+		return 0;
+
+	if (!access_ok(VERIFY_WRITE, dst, 1))
+		return -EFAULT;
+
+	return copy_to_user(dst, src, len);
+}
+
+static long efi_runtime_get_variable(unsigned long arg)
+{
+	struct efi_getvariable __user *getvariable_user;
+	struct efi_getvariable getvariable;
+	unsigned long datasize, prev_datasize, *dz;
+	efi_guid_t vendor_guid, *vd = NULL;
+	efi_status_t status;
+	efi_char16_t *name = NULL;
+	u32 attr, *at;
+	void *data = NULL;
+	int rv = 0;
+
+	getvariable_user = (struct efi_getvariable __user *)arg;
+
+	if (copy_from_user(&getvariable, getvariable_user,
+			   sizeof(getvariable)))
+		return -EFAULT;
+	if (getvariable.data_size &&
+	    get_user(datasize, getvariable.data_size))
+		return -EFAULT;
+	if (getvariable.vendor_guid) {
+		if (copy_from_user(&vendor_guid, getvariable.vendor_guid,
+					sizeof(vendor_guid)))
+			return -EFAULT;
+		vd = &vendor_guid;
+	}
+
+	if (getvariable.variable_name) {
+		rv = copy_ucs2_from_user(&name, getvariable.variable_name);
+		if (rv)
+			return rv;
+	}
+
+	at = getvariable.attributes ? &attr : NULL;
+	dz = getvariable.data_size ? &datasize : NULL;
+
+	if (getvariable.data_size && getvariable.data) {
+		data = kmalloc(datasize, GFP_KERNEL);
+		if (!data) {
+			kfree(name);
+			return -ENOMEM;
+		}
+	}
+
+	prev_datasize = datasize;
+	status = efi.get_variable(name, vd, at, dz, data);
+	kfree(name);
+
+	if (put_user(status, getvariable.status)) {
+		rv = -EFAULT;
+		goto out;
+	}
+
+	if (status != EFI_SUCCESS) {
+		if (status == EFI_BUFFER_TOO_SMALL) {
+			if (dz && put_user(datasize, getvariable.data_size)) {
+				rv = -EFAULT;
+				goto out;
+			}
+		}
+		rv = -EINVAL;
+		goto out;
+	}
+
+	if (prev_datasize < datasize) {
+		rv = -EINVAL;
+		goto out;
+	}
+
+	if (data) {
+		if (copy_to_user(getvariable.data, data, datasize)) {
+			rv = -EFAULT;
+			goto out;
+		}
+	}
+
+	if (at && put_user(attr, getvariable.attributes)) {
+		rv = -EFAULT;
+		goto out;
+	}
+
+	if (dz && put_user(datasize, getvariable.data_size))
+		rv = -EFAULT;
+
+out:
+	kfree(data);
+	return rv;
+
+}
+
+static long efi_runtime_set_variable(unsigned long arg)
+{
+	struct efi_setvariable __user *setvariable_user;
+	struct efi_setvariable setvariable;
+	efi_guid_t vendor_guid;
+	efi_status_t status;
+	efi_char16_t *name = NULL;
+	void *data;
+	int rv = 0;
+
+	setvariable_user = (struct efi_setvariable __user *)arg;
+
+	if (copy_from_user(&setvariable, setvariable_user, sizeof(setvariable)))
+		return -EFAULT;
+	if (copy_from_user(&vendor_guid, setvariable.vendor_guid,
+				sizeof(vendor_guid)))
+		return -EFAULT;
+
+	if (setvariable.variable_name) {
+		rv = copy_ucs2_from_user(&name, setvariable.variable_name);
+		if (rv)
+			return rv;
+	}
+
+	data = kmalloc(setvariable.data_size, GFP_KERNEL);
+	if (!data) {
+		kfree(name);
+		return -ENOMEM;
+	}
+	if (copy_from_user(data, setvariable.data, setvariable.data_size)) {
+		rv = -EFAULT;
+		goto out;
+	}
+
+	status = efi.set_variable(name, &vendor_guid,
+				setvariable.attributes,
+				setvariable.data_size, data);
+
+	if (put_user(status, setvariable.status)) {
+		rv = -EFAULT;
+		goto out;
+	}
+
+	rv = status == EFI_SUCCESS ? 0 : -EINVAL;
+
+out:
+	kfree(data);
+	kfree(name);
+
+	return rv;
+}
+
+static long efi_runtime_get_time(unsigned long arg)
+{
+	struct efi_gettime __user *gettime_user;
+	struct efi_gettime  gettime;
+	efi_status_t status;
+	efi_time_cap_t cap;
+	efi_time_t efi_time;
+
+	gettime_user = (struct efi_gettime __user *)arg;
+	if (copy_from_user(&gettime, gettime_user, sizeof(gettime)))
+		return -EFAULT;
+
+	status = efi.get_time(gettime.time ? &efi_time : NULL,
+			      gettime.capabilities ? &cap : NULL);
+
+	if (put_user(status, gettime.status))
+		return -EFAULT;
+
+	if (status != EFI_SUCCESS)
+		return -EINVAL;
+
+	if (gettime.capabilities) {
+		efi_time_cap_t __user *cap_local;
+
+		cap_local = (efi_time_cap_t *)gettime.capabilities;
+		if (put_user(cap.resolution, &(cap_local->resolution)) ||
+			put_user(cap.accuracy, &(cap_local->accuracy)) ||
+			put_user(cap.sets_to_zero, &(cap_local->sets_to_zero)))
+			return -EFAULT;
+	}
+	if (gettime.time) {
+		if (copy_to_user(gettime.time, &efi_time, sizeof(efi_time_t)))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static long efi_runtime_set_time(unsigned long arg)
+{
+	struct efi_settime __user *settime_user;
+	struct efi_settime settime;
+	efi_status_t status;
+	efi_time_t efi_time;
+
+	settime_user = (struct efi_settime __user *)arg;
+	if (copy_from_user(&settime, settime_user, sizeof(settime)))
+		return -EFAULT;
+	if (copy_from_user(&efi_time, settime.time,
+					sizeof(efi_time_t)))
+		return -EFAULT;
+	status = efi.set_time(&efi_time);
+
+	if (put_user(status, settime.status))
+		return -EFAULT;
+
+	return status == EFI_SUCCESS ? 0 : -EINVAL;
+}
+
+static long efi_runtime_get_waketime(unsigned long arg)
+{
+	struct efi_getwakeuptime __user *getwakeuptime_user;
+	struct efi_getwakeuptime getwakeuptime;
+	efi_bool_t enabled, pending;
+	efi_status_t status;
+	efi_time_t efi_time;
+
+	getwakeuptime_user = (struct efi_getwakeuptime __user *)arg;
+	if (copy_from_user(&getwakeuptime, getwakeuptime_user,
+				sizeof(getwakeuptime)))
+		return -EFAULT;
+
+	status = efi.get_wakeup_time(
+		getwakeuptime.enabled ? (efi_bool_t *)&enabled : NULL,
+		getwakeuptime.pending ? (efi_bool_t *)&pending : NULL,
+		getwakeuptime.time ? &efi_time : NULL);
+
+	if (put_user(status, getwakeuptime.status))
+		return -EFAULT;
+
+	if (status != EFI_SUCCESS)
+		return -EINVAL;
+
+	if (getwakeuptime.enabled && put_user(enabled,
+						getwakeuptime.enabled))
+		return -EFAULT;
+
+	if (getwakeuptime.time) {
+		if (copy_to_user(getwakeuptime.time, &efi_time,
+				sizeof(efi_time_t)))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+static long efi_runtime_set_waketime(unsigned long arg)
+{
+	struct efi_setwakeuptime __user *setwakeuptime_user;
+	struct efi_setwakeuptime setwakeuptime;
+	efi_bool_t enabled;
+	efi_status_t status;
+	efi_time_t efi_time;
+
+	setwakeuptime_user = (struct efi_setwakeuptime __user *)arg;
+
+	if (copy_from_user(&setwakeuptime, setwakeuptime_user,
+				sizeof(setwakeuptime)))
+		return -EFAULT;
+
+	enabled = setwakeuptime.enabled;
+	if (setwakeuptime.time) {
+		if (copy_from_user(&efi_time, setwakeuptime.time,
+					sizeof(efi_time_t)))
+			return -EFAULT;
+
+		status = efi.set_wakeup_time(enabled, &efi_time);
+	} else
+		status = efi.set_wakeup_time(enabled, NULL);
+
+	if (put_user(status, setwakeuptime.status))
+		return -EFAULT;
+
+	return status == EFI_SUCCESS ? 0 : -EINVAL;
+}
+
+static long efi_runtime_get_nextvariablename(unsigned long arg)
+{
+	struct efi_getnextvariablename __user *getnextvariablename_user;
+	struct efi_getnextvariablename getnextvariablename;
+	unsigned long name_size, prev_name_size = 0, *ns = NULL;
+	efi_status_t status;
+	efi_guid_t *vd = NULL;
+	efi_guid_t vendor_guid;
+	efi_char16_t *name = NULL;
+	int rv;
+
+	getnextvariablename_user = (struct efi_getnextvariablename __user *)arg;
+
+	if (copy_from_user(&getnextvariablename, getnextvariablename_user,
+			   sizeof(getnextvariablename)))
+		return -EFAULT;
+
+	if (getnextvariablename.variable_name_size) {
+		if (get_user(name_size, getnextvariablename.variable_name_size))
+			return -EFAULT;
+		ns = &name_size;
+		prev_name_size = name_size;
+	}
+
+	if (getnextvariablename.vendor_guid) {
+		if (copy_from_user(&vendor_guid,
+				getnextvariablename.vendor_guid,
+				sizeof(vendor_guid)))
+			return -EFAULT;
+		vd = &vendor_guid;
+	}
+
+	if (getnextvariablename.variable_name) {
+		size_t name_string_size = 0;
+
+		rv = get_ucs2_strsize_from_user(
+				getnextvariablename.variable_name,
+				&name_string_size);
+		if (rv)
+			return rv;
+		/*
+		 * The name_size may be smaller than the real buffer size where
+		 * variable name located in some use cases. The most typical
+		 * case is passing a 0 to get the required buffer size for the
+		 * 1st time call. So we need to copy the content from user
+		 * space for at least the string size of variable name, or else
+		 * the name passed to UEFI may not be terminated as we expected.
+		 */
+		rv = copy_ucs2_from_user_len(&name,
+				getnextvariablename.variable_name,
+				prev_name_size > name_string_size ?
+				prev_name_size : name_string_size);
+		if (rv)
+			return rv;
+	}
+
+	status = efi.get_next_variable(ns, name, vd);
+
+	if (put_user(status, getnextvariablename.status)) {
+		rv = -EFAULT;
+		goto out;
+	}
+
+	if (status != EFI_SUCCESS) {
+		if (status == EFI_BUFFER_TOO_SMALL) {
+			if (ns && put_user(*ns,
+				getnextvariablename.variable_name_size)) {
+				rv = -EFAULT;
+				goto out;
+			}
+		}
+		rv = -EINVAL;
+		goto out;
+	}
+
+	if (name) {
+		if (copy_ucs2_to_user_len(getnextvariablename.variable_name,
+						name, prev_name_size)) {
+			rv = -EFAULT;
+			goto out;
+		}
+	}
+
+	if (ns) {
+		if (put_user(*ns, getnextvariablename.variable_name_size)) {
+			rv = -EFAULT;
+			goto out;
+		}
+	}
+
+	if (vd) {
+		if (copy_to_user(getnextvariablename.vendor_guid, vd,
+							sizeof(efi_guid_t)))
+			rv = -EFAULT;
+	}
+
+out:
+	kfree(name);
+	return rv;
+}
+
+static long efi_runtime_get_nexthighmonocount(unsigned long arg)
+{
+	struct efi_getnexthighmonotoniccount __user *getnexthighmonocount_user;
+	struct efi_getnexthighmonotoniccount getnexthighmonocount;
+	efi_status_t status;
+	u32 count;
+
+	getnexthighmonocount_user = (struct
+			efi_getnexthighmonotoniccount __user *)arg;
+
+	if (copy_from_user(&getnexthighmonocount,
+			   getnexthighmonocount_user,
+			   sizeof(getnexthighmonocount)))
+		return -EFAULT;
+
+	status = efi.get_next_high_mono_count(
+		getnexthighmonocount.high_count ? &count : NULL);
+
+	if (put_user(status, getnexthighmonocount.status))
+		return -EFAULT;
+
+	if (status != EFI_SUCCESS)
+		return -EINVAL;
+
+	if (getnexthighmonocount.high_count &&
+	    put_user(count, getnexthighmonocount.high_count))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long efi_runtime_query_variableinfo(unsigned long arg)
+{
+	struct efi_queryvariableinfo __user *queryvariableinfo_user;
+	struct efi_queryvariableinfo queryvariableinfo;
+	efi_status_t status;
+	u64 max_storage, remaining, max_size;
+
+	queryvariableinfo_user = (struct efi_queryvariableinfo __user *)arg;
+
+	if (copy_from_user(&queryvariableinfo, queryvariableinfo_user,
+			   sizeof(queryvariableinfo)))
+		return -EFAULT;
+
+	status = efi.query_variable_info(queryvariableinfo.attributes,
+					 &max_storage, &remaining, &max_size);
+
+	if (put_user(status, queryvariableinfo.status))
+		return -EFAULT;
+
+	if (status != EFI_SUCCESS)
+		return -EINVAL;
+
+	if (put_user(max_storage,
+		     queryvariableinfo.maximum_variable_storage_size))
+		return -EFAULT;
+
+	if (put_user(remaining,
+		     queryvariableinfo.remaining_variable_storage_size))
+		return -EFAULT;
+
+	if (put_user(max_size, queryvariableinfo.maximum_variable_size))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long efi_runtime_query_capsulecaps(unsigned long arg)
+{
+	struct efi_querycapsulecapabilities __user *qcaps_user;
+	struct efi_querycapsulecapabilities qcaps;
+	efi_capsule_header_t *capsules;
+	efi_status_t status;
+	u64 max_size;
+	int i, reset_type;
+	int rv = 0;
+
+	qcaps_user = (struct efi_querycapsulecapabilities __user *)arg;
+
+	if (copy_from_user(&qcaps, qcaps_user, sizeof(qcaps)))
+		return -EFAULT;
+
+	capsules = kcalloc(qcaps.capsule_count + 1,
+			   sizeof(efi_capsule_header_t), GFP_KERNEL);
+	if (!capsules)
+		return -ENOMEM;
+
+	for (i = 0; i < qcaps.capsule_count; i++) {
+		efi_capsule_header_t *c;
+		/*
+		 * We cannot dereference qcaps.capsule_header_array directly to
+		 * obtain the address of the capsule as it resides in the
+		 * user space
+		 */
+		if (get_user(c, qcaps.capsule_header_array + i)) {
+			rv = -EFAULT;
+			goto out;
+		}
+		if (copy_from_user(&capsules[i], c,
+				sizeof(efi_capsule_header_t))) {
+			rv = -EFAULT;
+			goto out;
+		}
+	}
+
+	qcaps.capsule_header_array = &capsules;
+
+	status = efi.query_capsule_caps((efi_capsule_header_t **)
+					qcaps.capsule_header_array,
+					qcaps.capsule_count,
+					&max_size, &reset_type);
+
+	if (put_user(status, qcaps.status)) {
+		rv = -EFAULT;
+		goto out;
+	}
+
+	if (status != EFI_SUCCESS) {
+		rv = -EINVAL;
+		goto out;
+	}
+
+	if (put_user(max_size, qcaps.maximum_capsule_size)) {
+		rv = -EFAULT;
+		goto out;
+	}
+
+	if (put_user(reset_type, qcaps.reset_type))
+		rv = -EFAULT;
+
+out:
+	kfree(capsules);
+	return rv;
+}
+
+static long efi_test_ioctl(struct file *file, unsigned int cmd,
+							unsigned long arg)
+{
+	switch (cmd) {
+	case EFI_RUNTIME_GET_VARIABLE:
+		return efi_runtime_get_variable(arg);
+
+	case EFI_RUNTIME_SET_VARIABLE:
+		return efi_runtime_set_variable(arg);
+
+	case EFI_RUNTIME_GET_TIME:
+		return efi_runtime_get_time(arg);
+
+	case EFI_RUNTIME_SET_TIME:
+		return efi_runtime_set_time(arg);
+
+	case EFI_RUNTIME_GET_WAKETIME:
+		return efi_runtime_get_waketime(arg);
+
+	case EFI_RUNTIME_SET_WAKETIME:
+		return efi_runtime_set_waketime(arg);
+
+	case EFI_RUNTIME_GET_NEXTVARIABLENAME:
+		return efi_runtime_get_nextvariablename(arg);
+
+	case EFI_RUNTIME_GET_NEXTHIGHMONOTONICCOUNT:
+		return efi_runtime_get_nexthighmonocount(arg);
+
+	case EFI_RUNTIME_QUERY_VARIABLEINFO:
+		return efi_runtime_query_variableinfo(arg);
+
+	case EFI_RUNTIME_QUERY_CAPSULECAPABILITIES:
+		return efi_runtime_query_capsulecaps(arg);
+	}
+
+	return -ENOTTY;
+}
+
+static int efi_test_open(struct inode *inode, struct file *file)
+{
+	/*
+	 * nothing special to do here
+	 * We do accept multiple open files at the same time as we
+	 * synchronize on the per call operation.
+	 */
+	return 0;
+}
+
+static int efi_test_close(struct inode *inode, struct file *file)
+{
+	return 0;
+}
+
+/*
+ *	The various file operations we support.
+ */
+static const struct file_operations efi_test_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= efi_test_ioctl,
+	.open		= efi_test_open,
+	.release	= efi_test_close,
+	.llseek		= no_llseek,
+};
+
+static struct miscdevice efi_test_dev = {
+	MISC_DYNAMIC_MINOR,
+	"efi_test",
+	&efi_test_fops
+};
+
+static int __init efi_test_init(void)
+{
+	int ret;
+
+	ret = misc_register(&efi_test_dev);
+	if (ret) {
+		pr_err("efi_test: can't misc_register on minor=%d\n",
+			MISC_DYNAMIC_MINOR);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void __exit efi_test_exit(void)
+{
+	misc_deregister(&efi_test_dev);
+}
+
+module_init(efi_test_init);
+module_exit(efi_test_exit);
diff --git a/drivers/firmware/efi/test/efi_test.h b/drivers/firmware/efi/test/efi_test.h
new file mode 100644
index 0000000..a33a6c6
--- /dev/null
+++ b/drivers/firmware/efi/test/efi_test.h
@@ -0,0 +1,110 @@
+/*
+ * EFI Test driver Header
+ *
+ * Copyright(C) 2012-2016 Canonical Ltd.
+ *
+ */
+
+#ifndef _DRIVERS_FIRMWARE_EFI_TEST_H_
+#define _DRIVERS_FIRMWARE_EFI_TEST_H_
+
+#include <linux/efi.h>
+
+struct efi_getvariable {
+	efi_char16_t	*variable_name;
+	efi_guid_t	*vendor_guid;
+	u32		*attributes;
+	unsigned long	*data_size;
+	void		*data;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_setvariable {
+	efi_char16_t	*variable_name;
+	efi_guid_t	*vendor_guid;
+	u32		attributes;
+	unsigned long	data_size;
+	void		*data;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_getnextvariablename {
+	unsigned long	*variable_name_size;
+	efi_char16_t	*variable_name;
+	efi_guid_t	*vendor_guid;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_queryvariableinfo {
+	u32		attributes;
+	u64		*maximum_variable_storage_size;
+	u64		*remaining_variable_storage_size;
+	u64		*maximum_variable_size;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_gettime {
+	efi_time_t	*time;
+	efi_time_cap_t	*capabilities;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_settime {
+	efi_time_t	*time;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_getwakeuptime {
+	efi_bool_t	*enabled;
+	efi_bool_t	*pending;
+	efi_time_t	*time;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_setwakeuptime {
+	efi_bool_t	enabled;
+	efi_time_t	*time;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_getnexthighmonotoniccount {
+	u32		*high_count;
+	efi_status_t	*status;
+} __packed;
+
+struct efi_querycapsulecapabilities {
+	efi_capsule_header_t	**capsule_header_array;
+	unsigned long		capsule_count;
+	u64			*maximum_capsule_size;
+	int			*reset_type;
+	efi_status_t		*status;
+} __packed;
+
+#define EFI_RUNTIME_GET_VARIABLE \
+	_IOWR('p', 0x01, struct efi_getvariable)
+#define EFI_RUNTIME_SET_VARIABLE \
+	_IOW('p', 0x02, struct efi_setvariable)
+
+#define EFI_RUNTIME_GET_TIME \
+	_IOR('p', 0x03, struct efi_gettime)
+#define EFI_RUNTIME_SET_TIME \
+	_IOW('p', 0x04, struct efi_settime)
+
+#define EFI_RUNTIME_GET_WAKETIME \
+	_IOR('p', 0x05, struct efi_getwakeuptime)
+#define EFI_RUNTIME_SET_WAKETIME \
+	_IOW('p', 0x06, struct efi_setwakeuptime)
+
+#define EFI_RUNTIME_GET_NEXTVARIABLENAME \
+	_IOWR('p', 0x07, struct efi_getnextvariablename)
+
+#define EFI_RUNTIME_QUERY_VARIABLEINFO \
+	_IOR('p', 0x08, struct efi_queryvariableinfo)
+
+#define EFI_RUNTIME_GET_NEXTHIGHMONOTONICCOUNT \
+	_IOR('p', 0x09, struct efi_getnexthighmonotoniccount)
+
+#define EFI_RUNTIME_QUERY_CAPSULECAPABILITIES \
+	_IOR('p', 0x0A, struct efi_querycapsulecapabilities)
+
+#endif /* _DRIVERS_FIRMWARE_EFI_TEST_H_ */
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index d3b7513..9336ffd 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -37,6 +37,14 @@
 /* Private pointer to registered efivars */
 static struct efivars *__efivars;
 
+/*
+ * efivars_lock protects three things:
+ * 1) efivarfs_list and efivars_sysfs_list
+ * 2) ->ops calls
+ * 3) (un)registration of __efivars
+ */
+static DEFINE_SEMAPHORE(efivars_lock);
+
 static bool efivar_wq_enabled = true;
 DECLARE_WORK(efivar_work, NULL);
 EXPORT_SYMBOL_GPL(efivar_work);
@@ -434,7 +442,10 @@
 		return -ENOMEM;
 	}
 
-	spin_lock_irq(&__efivars->lock);
+	if (down_interruptible(&efivars_lock)) {
+		err = -EINTR;
+		goto free;
+	}
 
 	/*
 	 * Per EFI spec, the maximum storage allocated for both
@@ -450,7 +461,7 @@
 		switch (status) {
 		case EFI_SUCCESS:
 			if (duplicates)
-				spin_unlock_irq(&__efivars->lock);
+				up(&efivars_lock);
 
 			variable_name_size = var_name_strnsize(variable_name,
 							       variable_name_size);
@@ -476,8 +487,12 @@
 					status = EFI_NOT_FOUND;
 			}
 
-			if (duplicates)
-				spin_lock_irq(&__efivars->lock);
+			if (duplicates) {
+				if (down_interruptible(&efivars_lock)) {
+					err = -EINTR;
+					goto free;
+				}
+			}
 
 			break;
 		case EFI_NOT_FOUND:
@@ -491,8 +506,8 @@
 
 	} while (status != EFI_NOT_FOUND);
 
-	spin_unlock_irq(&__efivars->lock);
-
+	up(&efivars_lock);
+free:
 	kfree(variable_name);
 
 	return err;
@@ -503,24 +518,34 @@
  * efivar_entry_add - add entry to variable list
  * @entry: entry to add to list
  * @head: list head
+ *
+ * Returns 0 on success, or a kernel error code on failure.
  */
-void efivar_entry_add(struct efivar_entry *entry, struct list_head *head)
+int efivar_entry_add(struct efivar_entry *entry, struct list_head *head)
 {
-	spin_lock_irq(&__efivars->lock);
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
 	list_add(&entry->list, head);
-	spin_unlock_irq(&__efivars->lock);
+	up(&efivars_lock);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(efivar_entry_add);
 
 /**
  * efivar_entry_remove - remove entry from variable list
  * @entry: entry to remove from list
+ *
+ * Returns 0 on success, or a kernel error code on failure.
  */
-void efivar_entry_remove(struct efivar_entry *entry)
+int efivar_entry_remove(struct efivar_entry *entry)
 {
-	spin_lock_irq(&__efivars->lock);
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
 	list_del(&entry->list);
-	spin_unlock_irq(&__efivars->lock);
+	up(&efivars_lock);
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(efivar_entry_remove);
 
@@ -537,10 +562,8 @@
  */
 static void efivar_entry_list_del_unlock(struct efivar_entry *entry)
 {
-	lockdep_assert_held(&__efivars->lock);
-
 	list_del(&entry->list);
-	spin_unlock_irq(&__efivars->lock);
+	up(&efivars_lock);
 }
 
 /**
@@ -563,8 +586,6 @@
 	const struct efivar_operations *ops = __efivars->ops;
 	efi_status_t status;
 
-	lockdep_assert_held(&__efivars->lock);
-
 	status = ops->set_variable(entry->var.VariableName,
 				   &entry->var.VendorGuid,
 				   0, 0, NULL);
@@ -581,20 +602,22 @@
  * variable list. It is the caller's responsibility to free @entry
  * once we return.
  *
- * Returns 0 on success, or a converted EFI status code if
- * set_variable() fails.
+ * Returns 0 on success, -EINTR if we can't grab the semaphore,
+ * converted EFI status code if set_variable() fails.
  */
 int efivar_entry_delete(struct efivar_entry *entry)
 {
 	const struct efivar_operations *ops = __efivars->ops;
 	efi_status_t status;
 
-	spin_lock_irq(&__efivars->lock);
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
+
 	status = ops->set_variable(entry->var.VariableName,
 				   &entry->var.VendorGuid,
 				   0, 0, NULL);
 	if (!(status == EFI_SUCCESS || status == EFI_NOT_FOUND)) {
-		spin_unlock_irq(&__efivars->lock);
+		up(&efivars_lock);
 		return efi_status_to_err(status);
 	}
 
@@ -620,9 +643,9 @@
  * If @head is not NULL a lookup is performed to determine whether
  * the entry is already on the list.
  *
- * Returns 0 on success, -EEXIST if a lookup is performed and the entry
- * already exists on the list, or a converted EFI status code if
- * set_variable() fails.
+ * Returns 0 on success, -EINTR if we can't grab the semaphore,
+ * -EEXIST if a lookup is performed and the entry already exists on
+ * the list, or a converted EFI status code if set_variable() fails.
  */
 int efivar_entry_set(struct efivar_entry *entry, u32 attributes,
 		     unsigned long size, void *data, struct list_head *head)
@@ -632,10 +655,10 @@
 	efi_char16_t *name = entry->var.VariableName;
 	efi_guid_t vendor = entry->var.VendorGuid;
 
-	spin_lock_irq(&__efivars->lock);
-
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
 	if (head && efivar_entry_find(name, vendor, head, false)) {
-		spin_unlock_irq(&__efivars->lock);
+		up(&efivars_lock);
 		return -EEXIST;
 	}
 
@@ -644,7 +667,7 @@
 		status = ops->set_variable(name, &vendor,
 					   attributes, size, data);
 
-	spin_unlock_irq(&__efivars->lock);
+	up(&efivars_lock);
 
 	return efi_status_to_err(status);
 
@@ -658,30 +681,29 @@
  * from crash/panic handlers.
  *
  * Crucially, this function will not block if it cannot acquire
- * __efivars->lock. Instead, it returns -EBUSY.
+ * efivars_lock. Instead, it returns -EBUSY.
  */
 static int
 efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor,
 			     u32 attributes, unsigned long size, void *data)
 {
 	const struct efivar_operations *ops = __efivars->ops;
-	unsigned long flags;
 	efi_status_t status;
 
-	if (!spin_trylock_irqsave(&__efivars->lock, flags))
+	if (down_trylock(&efivars_lock))
 		return -EBUSY;
 
 	status = check_var_size_nonblocking(attributes,
 					    size + ucs2_strsize(name, 1024));
 	if (status != EFI_SUCCESS) {
-		spin_unlock_irqrestore(&__efivars->lock, flags);
+		up(&efivars_lock);
 		return -ENOSPC;
 	}
 
 	status = ops->set_variable_nonblocking(name, &vendor, attributes,
 					       size, data);
 
-	spin_unlock_irqrestore(&__efivars->lock, flags);
+	up(&efivars_lock);
 	return efi_status_to_err(status);
 }
 
@@ -706,7 +728,6 @@
 			  bool block, unsigned long size, void *data)
 {
 	const struct efivar_operations *ops = __efivars->ops;
-	unsigned long flags;
 	efi_status_t status;
 
 	if (!ops->query_variable_store)
@@ -727,21 +748,22 @@
 						    size, data);
 
 	if (!block) {
-		if (!spin_trylock_irqsave(&__efivars->lock, flags))
+		if (down_trylock(&efivars_lock))
 			return -EBUSY;
 	} else {
-		spin_lock_irqsave(&__efivars->lock, flags);
+		if (down_interruptible(&efivars_lock))
+			return -EINTR;
 	}
 
 	status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
 	if (status != EFI_SUCCESS) {
-		spin_unlock_irqrestore(&__efivars->lock, flags);
+		up(&efivars_lock);
 		return -ENOSPC;
 	}
 
 	status = ops->set_variable(name, &vendor, attributes, size, data);
 
-	spin_unlock_irqrestore(&__efivars->lock, flags);
+	up(&efivars_lock);
 
 	return efi_status_to_err(status);
 }
@@ -771,8 +793,6 @@
 	int strsize1, strsize2;
 	bool found = false;
 
-	lockdep_assert_held(&__efivars->lock);
-
 	list_for_each_entry_safe(entry, n, head, list) {
 		strsize1 = ucs2_strsize(name, 1024);
 		strsize2 = ucs2_strsize(entry->var.VariableName, 1024);
@@ -814,10 +834,11 @@
 
 	*size = 0;
 
-	spin_lock_irq(&__efivars->lock);
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
 	status = ops->get_variable(entry->var.VariableName,
 				   &entry->var.VendorGuid, NULL, size, NULL);
-	spin_unlock_irq(&__efivars->lock);
+	up(&efivars_lock);
 
 	if (status != EFI_BUFFER_TOO_SMALL)
 		return efi_status_to_err(status);
@@ -843,8 +864,6 @@
 	const struct efivar_operations *ops = __efivars->ops;
 	efi_status_t status;
 
-	lockdep_assert_held(&__efivars->lock);
-
 	status = ops->get_variable(entry->var.VariableName,
 				   &entry->var.VendorGuid,
 				   attributes, size, data);
@@ -866,11 +885,12 @@
 	const struct efivar_operations *ops = __efivars->ops;
 	efi_status_t status;
 
-	spin_lock_irq(&__efivars->lock);
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
 	status = ops->get_variable(entry->var.VariableName,
 				   &entry->var.VendorGuid,
 				   attributes, size, data);
-	spin_unlock_irq(&__efivars->lock);
+	up(&efivars_lock);
 
 	return efi_status_to_err(status);
 }
@@ -917,7 +937,8 @@
 	 * set_variable call, and removal of the variable from the efivars
 	 * list (in the case of an authenticated delete).
 	 */
-	spin_lock_irq(&__efivars->lock);
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
 
 	/*
 	 * Ensure that the available space hasn't shrunk below the safe level
@@ -957,7 +978,7 @@
 	if (status == EFI_NOT_FOUND)
 		efivar_entry_list_del_unlock(entry);
 	else
-		spin_unlock_irq(&__efivars->lock);
+		up(&efivars_lock);
 
 	if (status && status != EFI_BUFFER_TOO_SMALL)
 		return efi_status_to_err(status);
@@ -965,7 +986,7 @@
 	return 0;
 
 out:
-	spin_unlock_irq(&__efivars->lock);
+	up(&efivars_lock);
 	return err;
 
 }
@@ -978,9 +999,9 @@
  * efivar_entry_iter_end() is called. This function is usually used in
  * conjunction with __efivar_entry_iter() or efivar_entry_iter().
  */
-void efivar_entry_iter_begin(void)
+int efivar_entry_iter_begin(void)
 {
-	spin_lock_irq(&__efivars->lock);
+	return down_interruptible(&efivars_lock);
 }
 EXPORT_SYMBOL_GPL(efivar_entry_iter_begin);
 
@@ -991,7 +1012,7 @@
  */
 void efivar_entry_iter_end(void)
 {
-	spin_unlock_irq(&__efivars->lock);
+	up(&efivars_lock);
 }
 EXPORT_SYMBOL_GPL(efivar_entry_iter_end);
 
@@ -1067,7 +1088,9 @@
 {
 	int err = 0;
 
-	efivar_entry_iter_begin();
+	err = efivar_entry_iter_begin();
+	if (err)
+		return err;
 	err = __efivar_entry_iter(func, head, data, NULL);
 	efivar_entry_iter_end();
 
@@ -1112,12 +1135,18 @@
 		     const struct efivar_operations *ops,
 		     struct kobject *kobject)
 {
-	spin_lock_init(&efivars->lock);
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
+
 	efivars->ops = ops;
 	efivars->kobject = kobject;
 
 	__efivars = efivars;
 
+	pr_info("Registered efivars operations\n");
+
+	up(&efivars_lock);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(efivars_register);
@@ -1133,6 +1162,9 @@
 {
 	int rv;
 
+	if (down_interruptible(&efivars_lock))
+		return -EINTR;
+
 	if (!__efivars) {
 		printk(KERN_ERR "efivars not registered\n");
 		rv = -EINVAL;
@@ -1144,10 +1176,12 @@
 		goto out;
 	}
 
+	pr_info("Unregistered efivars operations\n");
 	__efivars = NULL;
 
 	rv = 0;
 out:
+	up(&efivars_lock);
 	return rv;
 }
 EXPORT_SYMBOL_GPL(efivars_unregister);
diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c
index f1ab05e..c463871 100644
--- a/drivers/firmware/google/gsmi.c
+++ b/drivers/firmware/google/gsmi.c
@@ -910,8 +910,7 @@
 	gsmi_buf_free(gsmi_dev.param_buf);
 	gsmi_buf_free(gsmi_dev.data_buf);
 	gsmi_buf_free(gsmi_dev.name_buf);
-	if (gsmi_dev.dma_pool)
-		dma_pool_destroy(gsmi_dev.dma_pool);
+	dma_pool_destroy(gsmi_dev.dma_pool);
 	platform_device_unregister(gsmi_dev.pdev);
 	pr_info("gsmi: failed to load: %d\n", ret);
 	return ret;
diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index d614102..cd84934 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -21,6 +21,7 @@
 
 config FPGA_MGR_ZYNQ_FPGA
 	tristate "Xilinx Zynq FPGA"
+	depends on ARCH_ZYNQ || COMPILE_TEST
 	depends on HAS_DMA
 	help
 	  FPGA manager driver support for Xilinx Zynq FPGAs.
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 98dd47a..24caedb 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -50,6 +50,7 @@
 config OF_GPIO
 	def_bool y
 	depends on OF
+	depends on HAS_IOMEM
 
 config GPIO_ACPI
 	def_bool y
@@ -188,7 +189,7 @@
 config GPIO_ETRAXFS
 	bool "Axis ETRAX FS General I/O"
 	depends on CRIS || COMPILE_TEST
-	depends on OF
+	depends on OF_GPIO
 	select GPIO_GENERIC
 	select GPIOLIB_IRQCHIP
 	help
@@ -214,7 +215,7 @@
 
 config GPIO_GRGPIO
 	tristate "Aeroflex Gaisler GRGPIO support"
-	depends on OF
+	depends on OF_GPIO
 	select GPIO_GENERIC
 	select IRQ_DOMAIN
 	help
@@ -312,7 +313,7 @@
 config GPIO_MVEBU
 	def_bool y
 	depends on PLAT_ORION
-	depends on OF
+	depends on OF_GPIO
 	select GENERIC_IRQ_CHIP
 
 config GPIO_MXC
@@ -405,7 +406,7 @@
 	bool "NVIDIA Tegra GPIO support"
 	default ARCH_TEGRA
 	depends on ARCH_TEGRA || COMPILE_TEST
-	depends on OF
+	depends on OF_GPIO
 	help
 	  Say yes here to support GPIO pins on NVIDIA Tegra SoCs.
 
@@ -1099,7 +1100,7 @@
 
 config GPIO_74X164
 	tristate "74x164 serial-in/parallel-out 8-bits shift register"
-	depends on OF
+	depends on OF_GPIO
 	help
 	  Driver for 74x164 compatible serial-in/parallel-out 8-outputs
 	  shift registers. This driver can be used to provide access
@@ -1130,6 +1131,7 @@
 
 config GPIO_MCP23S08
 	tristate "Microchip MCP23xxx I/O expander"
+	depends on OF_GPIO
 	select GPIOLIB_IRQCHIP
 	help
 	  SPI/I2C driver for Microchip MCP23S08/MCP23S17/MCP23008/MCP23017
diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c
index 0880736..946d091 100644
--- a/drivers/gpio/gpio-max730x.c
+++ b/drivers/gpio/gpio-max730x.c
@@ -192,6 +192,10 @@
 	ts->chip.parent = dev;
 	ts->chip.owner = THIS_MODULE;
 
+	ret = gpiochip_add_data(&ts->chip, ts);
+	if (ret)
+		goto exit_destroy;
+
 	/*
 	 * initialize pullups according to platform data and cache the
 	 * register values for later use.
@@ -213,10 +217,6 @@
 		}
 	}
 
-	ret = gpiochip_add_data(&ts->chip, ts);
-	if (ret)
-		goto exit_destroy;
-
 	return ret;
 
 exit_destroy:
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index ac22efc..99d37b5 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -564,7 +564,7 @@
 	mcp->chip.direction_output = mcp23s08_direction_output;
 	mcp->chip.set = mcp23s08_set;
 	mcp->chip.dbg_show = mcp23s08_dbg_show;
-#ifdef CONFIG_OF
+#ifdef CONFIG_OF_GPIO
 	mcp->chip.of_gpio_n_cells = 2;
 	mcp->chip.of_node = dev->of_node;
 #endif
diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c
index 0c99e8fb..8d8ee0e 100644
--- a/drivers/gpio/gpio-sa1100.c
+++ b/drivers/gpio/gpio-sa1100.c
@@ -155,7 +155,7 @@
 {
 	irq_set_chip_and_handler(irq, &sa1100_gpio_irq_chip,
 				 handle_edge_irq);
-	irq_set_noprobe(irq);
+	irq_set_probe(irq);
 
 	return 0;
 }
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 75e7b39..a28feb3 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -16,7 +16,6 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/io.h>
-#include <linux/io-mapping.h>
 #include <linux/gpio/consumer.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8ebc5f1..700c56b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -426,6 +426,8 @@
 
 	/* custom LRU management */
 	struct amdgpu_mman_lru			log2_size[AMDGPU_TTM_LRU_SIZE];
+	/* guard for log2_size array, don't add anything in between */
+	struct amdgpu_mman_lru			guard;
 };
 
 int amdgpu_copy_buffer(struct amdgpu_ring *ring,
@@ -646,9 +648,9 @@
 void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
 int amdgpu_gart_init(struct amdgpu_device *adev);
 void amdgpu_gart_fini(struct amdgpu_device *adev);
-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 			int pages);
-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 		     int pages, struct page **pagelist,
 		     dma_addr_t *dma_addr, uint32_t flags);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 9831753..fe872b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -321,6 +321,19 @@
 			    (le16_to_cpu(path->usConnObjectId) &
 			     OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT;
 
+			/* Skip TV/CV support */
+			if ((le16_to_cpu(path->usDeviceTag) ==
+			     ATOM_DEVICE_TV1_SUPPORT) ||
+			    (le16_to_cpu(path->usDeviceTag) ==
+			     ATOM_DEVICE_CV_SUPPORT))
+				continue;
+
+			if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) {
+				DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n",
+					  con_obj_id, le16_to_cpu(path->usDeviceTag));
+				continue;
+			}
+
 			connector_type =
 				object_connector_convert[con_obj_id];
 			connector_object_id = con_obj_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 49de926..10b5ddf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -200,16 +200,7 @@
 	atpx->is_hybrid = false;
 	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
 		printk("ATPX Hybrid Graphics\n");
-#if 1
-		/* This is a temporary hack until the D3 cold support
-		 * makes it upstream.  The ATPX power_control method seems
-		 * to still work on even if the system should be using
-		 * the new standardized hybrid D3 cold ACPI interface.
-		 */
-		atpx->functions.power_cntl = true;
-#else
 		atpx->functions.power_cntl = false;
-#endif
 		atpx->is_hybrid = true;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index df7ab245..39c01b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1708,11 +1708,11 @@
 
 	DRM_INFO("amdgpu: finishing device.\n");
 	adev->shutdown = true;
+	drm_crtc_force_disable_all(adev->ddev);
 	/* evict vram memory */
 	amdgpu_bo_evict_vram(adev);
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
-	drm_crtc_force_disable_all(adev->ddev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_fini(adev);
 	kfree(adev->ip_block_status);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 921bce2..0feea34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -221,7 +221,7 @@
  * Unbinds the requested pages from the gart page table and
  * replaces them with the dummy page (all asics).
  */
-void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset,
+void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 			int pages)
 {
 	unsigned t;
@@ -268,7 +268,7 @@
  * (all asics).
  * Returns 0 for success, -EINVAL for failure.
  */
-int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset,
+int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
 		     int pages, struct page **pagelist, dma_addr_t *dma_addr,
 		     uint32_t flags)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a31d7ef..ec1282a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -280,7 +280,7 @@
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 {
 	unsigned i;
-	int r;
+	int r, ret = 0;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
@@ -301,10 +301,11 @@
 			} else {
 				/* still not good, but we can live with it */
 				DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r);
+				ret = r;
 			}
 		}
 	}
-	return 0;
+	return ret;
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index ff63b88..5cc7052 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -305,7 +305,7 @@
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	char *table = NULL;
-	int size, i;
+	int size;
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_get_pp_table(adev, &table);
@@ -315,10 +315,7 @@
 	if (size >= PAGE_SIZE)
 		size = PAGE_SIZE - 1;
 
-	for (i = 0; i < size; i++) {
-		sprintf(buf + i, "%02x", table[i]);
-	}
-	sprintf(buf + i, "\n");
+	memcpy(buf, table, size);
 
 	return size;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index b7742e6..716f2af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -251,8 +251,8 @@
 
 	adev = amdgpu_get_adev(bo->bdev);
 	ring = adev->mman.buffer_funcs_ring;
-	old_start = old_mem->start << PAGE_SHIFT;
-	new_start = new_mem->start << PAGE_SHIFT;
+	old_start = (u64)old_mem->start << PAGE_SHIFT;
+	new_start = (u64)new_mem->start << PAGE_SHIFT;
 
 	switch (old_mem->mem_type) {
 	case TTM_PL_VRAM:
@@ -335,7 +335,7 @@
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+	r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
 out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
@@ -368,7 +368,7 @@
 	if (unlikely(r)) {
 		return r;
 	}
-	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+	r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@@ -950,6 +950,8 @@
 	struct list_head *res = lru->lru[tbo->mem.mem_type];
 
 	lru->lru[tbo->mem.mem_type] = &tbo->lru;
+	while ((++lru)->lru[tbo->mem.mem_type] == res)
+		lru->lru[tbo->mem.mem_type] = &tbo->lru;
 
 	return res;
 }
@@ -960,6 +962,8 @@
 	struct list_head *res = lru->swap_lru;
 
 	lru->swap_lru = &tbo->swap;
+	while ((++lru)->swap_lru == res)
+		lru->swap_lru = &tbo->swap;
 
 	return res;
 }
@@ -1011,6 +1015,10 @@
 		lru->swap_lru = &adev->mman.bdev.glob->swap_lru;
 	}
 
+	for (j = 0; j < TTM_NUM_MEM_TYPES; ++j)
+		adev->mman.guard.lru[j] = NULL;
+	adev->mman.guard.swap_lru = NULL;
+
 	adev->mman.initialized = true;
 	r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
 				adev->mc.real_vram_size >> PAGE_SHIFT);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b11f4e8..4aa993d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1187,7 +1187,8 @@
 		r = 0;
 	}
 
-error:
 	fence_put(fence);
+
+error:
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8e642fc..80120fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1535,7 +1535,7 @@
 	r = amd_sched_entity_init(&ring->sched, &vm->entity,
 				  rq, amdgpu_sched_jobs);
 	if (r)
-		return r;
+		goto err;
 
 	vm->page_directory_fence = NULL;
 
@@ -1565,6 +1565,9 @@
 error_free_sched_entity:
 	amd_sched_entity_fini(&ring->sched, &vm->entity);
 
+err:
+	drm_free_large(vm->page_tables);
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index e2f0e5d..a5c94b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -5779,6 +5779,7 @@
 		break;
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
+	case CHIP_MULLINS:
 	default: BUG();
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index ee64669..77fdd99 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -52,6 +52,7 @@
 static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
 static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
 static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
+static int cik_sdma_soft_reset(void *handle);
 
 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
 MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
@@ -1037,6 +1038,8 @@
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	cik_sdma_soft_reset(handle);
+
 	return cik_sdma_hw_init(adev);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index d869d05..425413f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2755,8 +2755,7 @@
 	u64 wb_gpu_addr;
 	u32 *buf;
 	struct bonaire_mqd *mqd;
-
-	gfx_v7_0_cp_compute_enable(adev, true);
+	struct amdgpu_ring *ring;
 
 	/* fix up chicken bits */
 	tmp = RREG32(mmCP_CPF_DEBUG);
@@ -2791,7 +2790,7 @@
 
 	/* init the queues.  Just two for now. */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
+		ring = &adev->gfx.compute_ring[i];
 
 		if (ring->mqd_obj == NULL) {
 			r = amdgpu_bo_create(adev,
@@ -2970,6 +2969,13 @@
 		amdgpu_bo_unreserve(ring->mqd_obj);
 
 		ring->ready = true;
+	}
+
+	gfx_v7_0_cp_compute_enable(adev, true);
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		ring = &adev->gfx.compute_ring[i];
+
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
 			ring->ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index bff8668..b818461 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -270,7 +270,8 @@
 
 static const u32 golden_settings_polaris11_a11[] =
 {
-	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+	mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -279,7 +280,7 @@
 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
 	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
 	mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
-	mmSQ_CONFIG, 0x07f80000, 0x07180000,
+	mmSQ_CONFIG, 0x07f80000, 0x01180000,
 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
@@ -301,8 +302,8 @@
 static const u32 golden_settings_polaris10_a11[] =
 {
 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
-	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
-	mmCB_HW_CONTROL_2, 0, 0x0f000000,
+	mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -409,6 +410,7 @@
 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
 	mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
 	mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
+	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
@@ -505,8 +507,10 @@
 	mmGB_GPU_ID, 0x0000000f, 0x00000000,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
 	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 	mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
+	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 	mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index d24a82b..0b0f086 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -144,6 +144,7 @@
 		break;
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
+	case CHIP_MULLINS:
 		return 0;
 	default: BUG();
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 717359d..2aee2c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -103,6 +103,11 @@
 	mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104
 };
 
+static const u32 golden_settings_stoney_common[] =
+{
+	mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004,
+	mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000
+};
 
 static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 {
@@ -142,6 +147,9 @@
 		amdgpu_program_register_sequence(adev,
 						 stoney_mgcg_cgcg_init,
 						 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
+		amdgpu_program_register_sequence(adev,
+						 golden_settings_stoney_common,
+						 (const u32)ARRAY_SIZE(golden_settings_stoney_common));
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 1351c7e..a64715d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -714,7 +714,7 @@
 		DRM_ERROR("amdgpu: IB test timed out\n");
 		r = -ETIMEDOUT;
 		goto err1;
-	} else if (r) {
+	} else if (r < 0) {
 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err1;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index e621eba..a7d3cb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -184,7 +184,7 @@
 							sizeof(u32)) + inx;
 
 	pr_debug("kfd: get kernel queue doorbell\n"
-			 "     doorbell offset   == 0x%08d\n"
+			 "     doorbell offset   == 0x%08X\n"
 			 "     kernel address    == 0x%08lX\n",
 		*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
 
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index ef312bb..963a24d 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -405,7 +405,7 @@
 	spin_lock(&sched->job_list_lock);
 	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
 					 struct amd_sched_job, node);
-	if (s_job)
+	if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT)
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
 
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index a978381..9b17a66 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -387,7 +387,7 @@
 	atmel_hlcdc_crtc_finish_page_flip(drm_crtc_to_atmel_hlcdc_crtc(c));
 }
 
-void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc)
+static void atmel_hlcdc_crtc_reset(struct drm_crtc *crtc)
 {
 	struct atmel_hlcdc_crtc_state *state;
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 016c191..52c527f 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -320,19 +320,19 @@
 			u32 *coeff_tab = heo_upscaling_ycoef;
 			u32 max_memsize;
 
-			if (state->crtc_w < state->src_w)
+			if (state->crtc_h < state->src_h)
 				coeff_tab = heo_downscaling_ycoef;
 			for (i = 0; i < ARRAY_SIZE(heo_upscaling_ycoef); i++)
 				atmel_hlcdc_layer_update_cfg(&plane->layer,
 							     33 + i,
 							     0xffffffff,
 							     coeff_tab[i]);
-			factor = ((8 * 256 * state->src_w) - (256 * 4)) /
-				 state->crtc_w;
+			factor = ((8 * 256 * state->src_h) - (256 * 4)) /
+				 state->crtc_h;
 			factor++;
-			max_memsize = ((factor * state->crtc_w) + (256 * 4)) /
+			max_memsize = ((factor * state->crtc_h) + (256 * 4)) /
 				      2048;
-			if (max_memsize > state->src_w)
+			if (max_memsize > state->src_h)
 				factor--;
 			factor_reg |= (factor << 16) | 0x80000000;
 		}
diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c
index 80446e2..76bcb43 100644
--- a/drivers/gpu/drm/cirrus/cirrus_main.c
+++ b/drivers/gpu/drm/cirrus/cirrus_main.c
@@ -185,14 +185,23 @@
 		goto out;
 	}
 
+	/*
+	 * cirrus_modeset_init() is initializing/registering the emulated fbdev
+	 * and DRM internals can access/test some of the fields in
+	 * mode_config->funcs as part of the fbdev registration process.
+	 * Make sure dev->mode_config.funcs is properly set to avoid
+	 * dereferencing a NULL pointer.
+	 * FIXME: mode_config.funcs assignment should probably be done in
+	 * cirrus_modeset_init() (that's a common pattern seen in other DRM
+	 * drivers).
+	 */
+	dev->mode_config.funcs = &cirrus_mode_funcs;
 	r = cirrus_modeset_init(cdev);
 	if (r) {
 		dev_err(&dev->pdev->dev, "Fatal error during modeset init: %d\n", r);
 		goto out;
 	}
 
-	dev->mode_config.funcs = (void *)&cirrus_mode_funcs;
-
 	return 0;
 out:
 	cirrus_driver_unload(dev);
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index fa39307..2a3ded4 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -475,7 +475,7 @@
 					val,
 					-1,
 					&replaced);
-		state->color_mgmt_changed = replaced;
+		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->ctm_property) {
 		ret = drm_atomic_replace_property_blob_from_id(crtc,
@@ -483,7 +483,7 @@
 					val,
 					sizeof(struct drm_color_ctm),
 					&replaced);
-		state->color_mgmt_changed = replaced;
+		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->gamma_lut_property) {
 		ret = drm_atomic_replace_property_blob_from_id(crtc,
@@ -491,7 +491,7 @@
 					val,
 					-1,
 					&replaced);
-		state->color_mgmt_changed = replaced;
+		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (crtc->funcs->atomic_set_property)
 		return crtc->funcs->atomic_set_property(crtc, state, property, val);
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index f1d9f05..ddebe54 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -1121,16 +1121,14 @@
 	struct drm_connector *connector;
 	int ret;
 
-	mutex_lock(&dev->mode_config.mutex);
-
-	drm_for_each_connector(connector, dev) {
+	/* FIXME: taking the mode config mutex ends up in a clash with
+	 * fbcon/backlight registration */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		ret = drm_connector_register(connector);
 		if (ret)
 			goto err;
 	}
 
-	mutex_unlock(&dev->mode_config.mutex);
-
 	return 0;
 
 err:
@@ -5406,6 +5404,9 @@
 	struct drm_pending_vblank_event *e = NULL;
 	int ret = -EINVAL;
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS ||
 	    page_flip->reserved != 0)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 7df26d4..637a0aa 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -74,6 +74,8 @@
 #define EDID_QUIRK_FORCE_8BPC			(1 << 8)
 /* Force 12bpc */
 #define EDID_QUIRK_FORCE_12BPC			(1 << 9)
+/* Force 6bpc */
+#define EDID_QUIRK_FORCE_6BPC			(1 << 10)
 
 struct detailed_mode_closure {
 	struct drm_connector *connector;
@@ -100,6 +102,9 @@
 	/* Unknown Acer */
 	{ "ACR", 2423, EDID_QUIRK_FIRST_DETAILED_PREFERRED },
 
+	/* AEO model 0 reports 8 bpc, but is a 6 bpc panel */
+	{ "AEO", 0, EDID_QUIRK_FORCE_6BPC },
+
 	/* Belinea 10 15 55 */
 	{ "MAX", 1516, EDID_QUIRK_PREFER_LARGE_60 },
 	{ "MAX", 0x77e, EDID_QUIRK_PREFER_LARGE_60 },
@@ -3862,6 +3867,20 @@
 	/* HDMI deep color modes supported? Assign to info, if so */
 	drm_assign_hdmi_deep_color_info(edid, info, connector);
 
+	/*
+	 * Digital sink with "DFP 1.x compliant TMDS" according to EDID 1.3?
+	 *
+	 * For such displays, the DFP spec 1.0, section 3.10 "EDID support"
+	 * tells us to assume 8 bpc color depth if the EDID doesn't have
+	 * extensions which tell otherwise.
+	 */
+	if ((info->bpc == 0) && (edid->revision < 4) &&
+	    (edid->input & DRM_EDID_DIGITAL_TYPE_DVI)) {
+		info->bpc = 8;
+		DRM_DEBUG("%s: Assigning DFP sink color depth as %d bpc.\n",
+			  connector->name, info->bpc);
+	}
+
 	/* Only defined for 1.4 with digital displays */
 	if (edid->revision < 4)
 		return;
@@ -4082,6 +4101,9 @@
 
 	drm_add_display_info(edid, &connector->display_info, connector);
 
+	if (quirks & EDID_QUIRK_FORCE_6BPC)
+		connector->display_info.bpc = 6;
+
 	if (quirks & EDID_QUIRK_FORCE_8BPC)
 		connector->display_info.bpc = 8;
 
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index ce54e98..0a06f91 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -464,7 +464,7 @@
 
 	/* Sometimes user space wants everything disabled, so don't steal the
 	 * display if there's a master. */
-	if (lockless_dereference(dev->master))
+	if (READ_ONCE(dev->master))
 		return false;
 
 	drm_for_each_crtc(crtc, dev) {
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index 57676f8..a628975 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -1015,6 +1015,7 @@
 	return 0;
 }
 
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
 typedef struct drm_mode_fb_cmd232 {
 	u32 fb_id;
 	u32 width;
@@ -1071,6 +1072,7 @@
 
 	return 0;
 }
+#endif
 
 static drm_ioctl_compat_t *drm_compat_ioctls[] = {
 	[DRM_IOCTL_NR(DRM_IOCTL_VERSION32)] = compat_drm_version,
@@ -1104,7 +1106,9 @@
 	[DRM_IOCTL_NR(DRM_IOCTL_UPDATE_DRAW32)] = compat_drm_update_draw,
 #endif
 	[DRM_IOCTL_NR(DRM_IOCTL_WAIT_VBLANK32)] = compat_drm_wait_vblank,
+#if defined(CONFIG_X86) || defined(CONFIG_IA64)
 	[DRM_IOCTL_NR(DRM_IOCTL_MODE_ADDFB232)] = compat_drm_mode_addfb2,
+#endif
 };
 
 /**
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 87ef341..b382cf5 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -1333,8 +1333,6 @@
 	if (ret < 0)
 		return ret;
 
-	mutex_lock(&gpu->lock);
-
 	/*
 	 * TODO
 	 *
@@ -1348,16 +1346,18 @@
 	if (unlikely(event == ~0U)) {
 		DRM_ERROR("no free event\n");
 		ret = -EBUSY;
-		goto out_unlock;
+		goto out_pm_put;
 	}
 
 	fence = etnaviv_gpu_fence_alloc(gpu);
 	if (!fence) {
 		event_free(gpu, event);
 		ret = -ENOMEM;
-		goto out_unlock;
+		goto out_pm_put;
 	}
 
+	mutex_lock(&gpu->lock);
+
 	gpu->event[event].fence = fence;
 	submit->fence = fence->seqno;
 	gpu->active_fence = submit->fence;
@@ -1395,9 +1395,9 @@
 	hangcheck_timer_reset(gpu);
 	ret = 0;
 
-out_unlock:
 	mutex_unlock(&gpu->lock);
 
+out_pm_put:
 	etnaviv_gpu_pm_put(gpu);
 
 	return ret;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
index e016640..40ce841 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fb.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -55,11 +55,11 @@
 	flags = exynos_gem->flags;
 
 	/*
-	 * without iommu support, not support physically non-continuous memory
-	 * for framebuffer.
+	 * Physically non-contiguous memory type for framebuffer is not
+	 * supported without IOMMU.
 	 */
 	if (IS_NONCONTIG_BUFFER(flags)) {
-		DRM_ERROR("cannot use this gem memory type for fb.\n");
+		DRM_ERROR("Non-contiguous GEM memory is not supported.\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 0525c56..147ef0d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -1753,32 +1753,6 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int fimc_suspend(struct device *dev)
-{
-	struct fimc_context *ctx = get_fimc_context(dev);
-
-	DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-	if (pm_runtime_suspended(dev))
-		return 0;
-
-	return fimc_clk_ctrl(ctx, false);
-}
-
-static int fimc_resume(struct device *dev)
-{
-	struct fimc_context *ctx = get_fimc_context(dev);
-
-	DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-	if (!pm_runtime_suspended(dev))
-		return fimc_clk_ctrl(ctx, true);
-
-	return 0;
-}
-#endif
-
 static int fimc_runtime_suspend(struct device *dev)
 {
 	struct fimc_context *ctx = get_fimc_context(dev);
@@ -1799,7 +1773,8 @@
 #endif
 
 static const struct dev_pm_ops fimc_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(fimc_suspend, fimc_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
 	SET_RUNTIME_PM_OPS(fimc_runtime_suspend, fimc_runtime_resume, NULL)
 };
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 4bf00f5..6eca8bb 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1475,8 +1475,8 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int g2d_suspend(struct device *dev)
+#ifdef CONFIG_PM
+static int g2d_runtime_suspend(struct device *dev)
 {
 	struct g2d_data *g2d = dev_get_drvdata(dev);
 
@@ -1490,25 +1490,6 @@
 
 	flush_work(&g2d->runqueue_work);
 
-	return 0;
-}
-
-static int g2d_resume(struct device *dev)
-{
-	struct g2d_data *g2d = dev_get_drvdata(dev);
-
-	g2d->suspended = false;
-	g2d_exec_runqueue(g2d);
-
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_PM
-static int g2d_runtime_suspend(struct device *dev)
-{
-	struct g2d_data *g2d = dev_get_drvdata(dev);
-
 	clk_disable_unprepare(g2d->gate_clk);
 
 	return 0;
@@ -1523,12 +1504,16 @@
 	if (ret < 0)
 		dev_warn(dev, "failed to enable clock.\n");
 
+	g2d->suspended = false;
+	g2d_exec_runqueue(g2d);
+
 	return ret;
 }
 #endif
 
 static const struct dev_pm_ops g2d_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(g2d_suspend, g2d_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
 	SET_RUNTIME_PM_OPS(g2d_runtime_suspend, g2d_runtime_resume, NULL)
 };
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 5d20da8..52a9d26 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1760,34 +1760,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-static int gsc_suspend(struct device *dev)
-{
-	struct gsc_context *ctx = get_gsc_context(dev);
-
-	DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-	if (pm_runtime_suspended(dev))
-		return 0;
-
-	return gsc_clk_ctrl(ctx, false);
-}
-
-static int gsc_resume(struct device *dev)
-{
-	struct gsc_context *ctx = get_gsc_context(dev);
-
-	DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
-	if (!pm_runtime_suspended(dev))
-		return gsc_clk_ctrl(ctx, true);
-
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_PM
-static int gsc_runtime_suspend(struct device *dev)
+static int __maybe_unused gsc_runtime_suspend(struct device *dev)
 {
 	struct gsc_context *ctx = get_gsc_context(dev);
 
@@ -1796,7 +1769,7 @@
 	return  gsc_clk_ctrl(ctx, false);
 }
 
-static int gsc_runtime_resume(struct device *dev)
+static int __maybe_unused gsc_runtime_resume(struct device *dev)
 {
 	struct gsc_context *ctx = get_gsc_context(dev);
 
@@ -1804,10 +1777,10 @@
 
 	return  gsc_clk_ctrl(ctx, true);
 }
-#endif
 
 static const struct dev_pm_ops gsc_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(gsc_suspend, gsc_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
 	SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
 };
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 404367a..6591e40 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -794,29 +794,6 @@
 	return 0;
 }
 
-
-#ifdef CONFIG_PM_SLEEP
-static int rotator_suspend(struct device *dev)
-{
-	struct rot_context *rot = dev_get_drvdata(dev);
-
-	if (pm_runtime_suspended(dev))
-		return 0;
-
-	return rotator_clk_crtl(rot, false);
-}
-
-static int rotator_resume(struct device *dev)
-{
-	struct rot_context *rot = dev_get_drvdata(dev);
-
-	if (!pm_runtime_suspended(dev))
-		return rotator_clk_crtl(rot, true);
-
-	return 0;
-}
-#endif
-
 static int rotator_runtime_suspend(struct device *dev)
 {
 	struct rot_context *rot = dev_get_drvdata(dev);
@@ -833,7 +810,8 @@
 #endif
 
 static const struct dev_pm_ops rotator_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(rotator_suspend, rotator_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
 	SET_RUNTIME_PM_OPS(rotator_runtime_suspend, rotator_runtime_resume,
 									NULL)
 };
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 95ddd56..5de36d8 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1281,6 +1281,11 @@
 
 	intel_runtime_pm_enable(dev_priv);
 
+	/* Everything is in place, we can now relax! */
+	DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
+		 driver.name, driver.major, driver.minor, driver.patchlevel,
+		 driver.date, pci_name(pdev), dev_priv->drm.primary->index);
+
 	intel_runtime_pm_put(dev_priv);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 21f9390..f68c789 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -882,11 +882,12 @@
 
 	struct i915_ctx_hang_stats hang_stats;
 
-	/* Unique identifier for this context, used by the hw for tracking */
 	unsigned long flags;
 #define CONTEXT_NO_ZEROMAP		BIT(0)
 #define CONTEXT_NO_ERROR_CAPTURE	BIT(1)
-	unsigned hw_id;
+
+	/* Unique identifier for this context, used by the hw for tracking */
+	unsigned int hw_id;
 	u32 user_handle;
 
 	u32 ggtt_alignment;
@@ -1854,6 +1855,7 @@
 	enum modeset_restore modeset_restore;
 	struct mutex modeset_restore_lock;
 	struct drm_atomic_state *modeset_restore_state;
+	struct drm_modeset_acquire_ctx reset_ctx;
 
 	struct list_head vm_list; /* Global list of all address spaces */
 	struct i915_ggtt ggtt; /* VM representing the global address space */
@@ -1962,6 +1964,13 @@
 	struct i915_suspend_saved_registers regfile;
 	struct vlv_s0ix_state vlv_s0ix_state;
 
+	enum {
+		I915_SKL_SAGV_UNKNOWN = 0,
+		I915_SKL_SAGV_DISABLED,
+		I915_SKL_SAGV_ENABLED,
+		I915_SKL_SAGV_NOT_CONTROLLED
+	} skl_sagv_status;
+
 	struct {
 		/*
 		 * Raw watermark latency values:
@@ -3590,6 +3599,7 @@
 /* belongs in i915_gem_gtt.h */
 static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
 {
+	wmb();
 	if (INTEL_GEN(dev_priv) < 6)
 		intel_gtt_chipset_flush();
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1168150..a77ce99 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -879,9 +879,12 @@
 	ret = i915_gem_shmem_pread(dev, obj, args, file);
 
 	/* pread for non shmem backed objects */
-	if (ret == -EFAULT || ret == -ENODEV)
+	if (ret == -EFAULT || ret == -ENODEV) {
+		intel_runtime_pm_get(to_i915(dev));
 		ret = i915_gem_gtt_pread(dev, obj, args->size,
 					args->offset, args->data_ptr);
+		intel_runtime_pm_put(to_i915(dev));
+	}
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -1306,7 +1309,7 @@
 		 * textures). Fallback to the shmem path in that case. */
 	}
 
-	if (ret == -EFAULT) {
+	if (ret == -EFAULT || ret == -ENOSPC) {
 		if (obj->phys_handle)
 			ret = i915_gem_phys_pwrite(obj, args, file);
 		else if (i915_gem_object_has_struct_page(obj))
@@ -3169,6 +3172,8 @@
 	}
 
 	intel_ring_init_seqno(engine, engine->last_submitted_seqno);
+
+	engine->i915->gt.active_engines &= ~intel_engine_flag(engine);
 }
 
 void i915_gem_reset(struct drm_device *dev)
@@ -3186,6 +3191,7 @@
 
 	for_each_engine(engine, dev_priv)
 		i915_gem_reset_engine_cleanup(engine);
+	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 
 	i915_gem_context_reset(dev);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1978633..b35e5b6 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -943,8 +943,6 @@
 {
 	const unsigned other_rings = ~intel_engine_flag(req->engine);
 	struct i915_vma *vma;
-	uint32_t flush_domains = 0;
-	bool flush_chipset = false;
 	int ret;
 
 	list_for_each_entry(vma, vmas, exec_list) {
@@ -957,16 +955,11 @@
 		}
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-			flush_chipset |= i915_gem_clflush_object(obj, false);
-
-		flush_domains |= obj->base.write_domain;
+			i915_gem_clflush_object(obj, false);
 	}
 
-	if (flush_chipset)
-		i915_gem_chipset_flush(req->engine->i915);
-
-	if (flush_domains & I915_GEM_DOMAIN_GTT)
-		wmb();
+	/* Unconditionally flush any chipset caches (for streaming writes). */
+	i915_gem_chipset_flush(req->engine->i915);
 
 	/* Unconditionally invalidate gpu caches and ensure that we do flush
 	 * any residual writes from the previous batch.
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 10f1e32..f38ceff 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -122,8 +122,11 @@
 	has_full_48bit_ppgtt =
 	       	IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9;
 
-	if (intel_vgpu_active(dev_priv))
-		has_full_ppgtt = false; /* emulation is too hard */
+	if (intel_vgpu_active(dev_priv)) {
+		/* emulation is too hard */
+		has_full_ppgtt = false;
+		has_full_48bit_ppgtt = false;
+	}
 
 	if (!has_aliasing_ppgtt)
 		return 0;
@@ -158,7 +161,7 @@
 		return 0;
 	}
 
-	if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists)
+	if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
 		return has_full_48bit_ppgtt ? 3 : 2;
 	else
 		return has_aliasing_ppgtt ? 1 : 0;
@@ -2873,6 +2876,7 @@
 		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 
 		ppgtt->base.cleanup(&ppgtt->base);
+		kfree(ppgtt);
 	}
 
 	i915_gem_cleanup_stolen(dev);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index ce14fe0..bf2cad3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1536,6 +1536,7 @@
 #define BALANCE_LEG_MASK(port)		(7<<(8+3*(port)))
 /* Balance leg disable bits */
 #define BALANCE_LEG_DISABLE_SHIFT	23
+#define BALANCE_LEG_DISABLE(port)	(1 << (23 + (port)))
 
 /*
  * Fence registers
@@ -7144,6 +7145,15 @@
 
 #define GEN6_PCODE_MAILBOX			_MMIO(0x138124)
 #define   GEN6_PCODE_READY			(1<<31)
+#define   GEN6_PCODE_ERROR_MASK			0xFF
+#define     GEN6_PCODE_SUCCESS			0x0
+#define     GEN6_PCODE_ILLEGAL_CMD		0x1
+#define     GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x2
+#define     GEN6_PCODE_TIMEOUT			0x3
+#define     GEN6_PCODE_UNIMPLEMENTED_CMD	0xFF
+#define     GEN7_PCODE_TIMEOUT			0x2
+#define     GEN7_PCODE_ILLEGAL_DATA		0x3
+#define     GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10
 #define	  GEN6_PCODE_WRITE_RC6VIDS		0x4
 #define	  GEN6_PCODE_READ_RC6VIDS		0x5
 #define     GEN6_ENCODE_RC6_VID(mv)		(((mv) - 245) / 5)
@@ -7165,6 +7175,10 @@
 #define   HSW_PCODE_DE_WRITE_FREQ_REQ		0x17
 #define   DISPLAY_IPS_CONTROL			0x19
 #define	  HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL	0x1A
+#define   GEN9_PCODE_SAGV_CONTROL		0x21
+#define     GEN9_SAGV_DISABLE			0x0
+#define     GEN9_SAGV_IS_DISABLED		0x1
+#define     GEN9_SAGV_ENABLE			0x3
 #define GEN6_PCODE_DATA				_MMIO(0x138128)
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
 #define   GEN6_PCODE_FREQ_RING_RATIO_SHIFT	16
diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c
index f6acb5a..b81cfb3 100644
--- a/drivers/gpu/drm/i915/i915_vgpu.c
+++ b/drivers/gpu/drm/i915/i915_vgpu.c
@@ -65,9 +65,6 @@
 
 	BUILD_BUG_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 
-	if (!IS_HASWELL(dev_priv))
-		return;
-
 	magic = __raw_i915_read64(dev_priv, vgtif_reg(magic));
 	if (magic != VGT_MAGIC)
 		return;
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 6700a7b..d32f586 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -600,6 +600,8 @@
 	if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv))
 		return;
 
+	i915_audio_component_get_power(dev);
+
 	/*
 	 * Enable/disable generating the codec wake signal, overriding the
 	 * internal logic to generate the codec wake to controller.
@@ -615,6 +617,8 @@
 		I915_WRITE(HSW_AUD_CHICKENBIT, tmp);
 		usleep_range(1000, 1500);
 	}
+
+	i915_audio_component_put_power(dev);
 }
 
 /* Get CDCLK in kHz  */
@@ -648,6 +652,7 @@
 	    !IS_HASWELL(dev_priv))
 		return 0;
 
+	i915_audio_component_get_power(dev);
 	mutex_lock(&dev_priv->av_mutex);
 	/* 1. get the pipe */
 	intel_encoder = dev_priv->dig_port_map[port];
@@ -698,6 +703,7 @@
 
  unlock:
 	mutex_unlock(&dev_priv->av_mutex);
+	i915_audio_component_put_power(dev);
 	return err;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 3edb958..c3b33a1 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -41,15 +41,15 @@
  * be moved to FW_FAILED.
  */
 
-#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin"
+#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"
 MODULE_FIRMWARE(I915_CSR_KBL);
 #define KBL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 1)
 
-#define I915_CSR_SKL "i915/skl_dmc_ver1.bin"
+#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin"
 MODULE_FIRMWARE(I915_CSR_SKL);
-#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
+#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 26)
 
-#define I915_CSR_BXT "i915/bxt_dmc_ver1.bin"
+#define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin"
 MODULE_FIRMWARE(I915_CSR_BXT);
 #define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
 
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index dd1d6fe..1a7efac 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -145,7 +145,7 @@
 static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = {
 	{ 0x0000201B, 0x000000A2, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
-	{ 0x80007011, 0x000000CD, 0x0 },
+	{ 0x80007011, 0x000000CD, 0x1 },
 	{ 0x80009010, 0x000000C0, 0x1 },
 	{ 0x0000201B, 0x0000009D, 0x0 },
 	{ 0x80005012, 0x000000C0, 0x1 },
@@ -158,7 +158,7 @@
 static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = {
 	{ 0x00000018, 0x000000A2, 0x0 },
 	{ 0x00005012, 0x00000088, 0x0 },
-	{ 0x80007011, 0x000000CD, 0x0 },
+	{ 0x80007011, 0x000000CD, 0x3 },
 	{ 0x80009010, 0x000000C0, 0x3 },
 	{ 0x00000018, 0x0000009D, 0x0 },
 	{ 0x80005012, 0x000000C0, 0x3 },
@@ -388,6 +388,40 @@
 	}
 }
 
+static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port)
+{
+	int n_hdmi_entries;
+	int hdmi_level;
+	int hdmi_default_entry;
+
+	hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+
+	if (IS_BROXTON(dev_priv))
+		return hdmi_level;
+
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+		skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
+		hdmi_default_entry = 8;
+	} else if (IS_BROADWELL(dev_priv)) {
+		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+		hdmi_default_entry = 7;
+	} else if (IS_HASWELL(dev_priv)) {
+		n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
+		hdmi_default_entry = 6;
+	} else {
+		WARN(1, "ddi translation table missing\n");
+		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
+		hdmi_default_entry = 7;
+	}
+
+	/* Choose a good default if VBT is badly populated */
+	if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
+	    hdmi_level >= n_hdmi_entries)
+		hdmi_level = hdmi_default_entry;
+
+	return hdmi_level;
+}
+
 /*
  * Starting with Haswell, DDI port buffers must be programmed with correct
  * values in advance. The buffer values are different for FDI and DP modes,
@@ -399,7 +433,7 @@
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	u32 iboost_bit = 0;
-	int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry,
+	int i, n_hdmi_entries, n_dp_entries, n_edp_entries,
 	    size;
 	int hdmi_level;
 	enum port port;
@@ -410,7 +444,7 @@
 	const struct ddi_buf_trans *ddi_translations;
 
 	port = intel_ddi_get_encoder_port(encoder);
-	hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
+	hdmi_level = intel_ddi_hdmi_level(dev_priv, port);
 
 	if (IS_BROXTON(dev_priv)) {
 		if (encoder->type != INTEL_OUTPUT_HDMI)
@@ -430,7 +464,6 @@
 				skl_get_buf_trans_edp(dev_priv, &n_edp_entries);
 		ddi_translations_hdmi =
 				skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries);
-		hdmi_default_entry = 8;
 		/* If we're boosting the current, set bit 31 of trans1 */
 		if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level ||
 		    dev_priv->vbt.ddi_port_info[port].dp_boost_level)
@@ -456,7 +489,6 @@
 
 		n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
 		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
-		hdmi_default_entry = 7;
 	} else if (IS_HASWELL(dev_priv)) {
 		ddi_translations_fdi = hsw_ddi_translations_fdi;
 		ddi_translations_dp = hsw_ddi_translations_dp;
@@ -464,7 +496,6 @@
 		ddi_translations_hdmi = hsw_ddi_translations_hdmi;
 		n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp);
 		n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi);
-		hdmi_default_entry = 6;
 	} else {
 		WARN(1, "ddi translation table missing\n");
 		ddi_translations_edp = bdw_ddi_translations_dp;
@@ -474,7 +505,6 @@
 		n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp);
 		n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp);
 		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi);
-		hdmi_default_entry = 7;
 	}
 
 	switch (encoder->type) {
@@ -505,11 +535,6 @@
 	if (encoder->type != INTEL_OUTPUT_HDMI)
 		return;
 
-	/* Choose a good default if VBT is badly populated */
-	if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
-	    hdmi_level >= n_hdmi_entries)
-		hdmi_level = hdmi_default_entry;
-
 	/* Entry 9 is for HDMI: */
 	I915_WRITE(DDI_BUF_TRANS_LO(port, i),
 		   ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit);
@@ -1379,14 +1404,30 @@
 			   TRANS_CLK_SEL_DISABLED);
 }
 
-static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
-			       u32 level, enum port port, int type)
+static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv,
+				enum port port, uint8_t iboost)
 {
+	u32 tmp;
+
+	tmp = I915_READ(DISPIO_CR_TX_BMU_CR0);
+	tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port));
+	if (iboost)
+		tmp |= iboost << BALANCE_LEG_SHIFT(port);
+	else
+		tmp |= BALANCE_LEG_DISABLE(port);
+	I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp);
+}
+
+static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level)
+{
+	struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base);
+	struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev);
+	enum port port = intel_dig_port->port;
+	int type = encoder->type;
 	const struct ddi_buf_trans *ddi_translations;
 	uint8_t iboost;
 	uint8_t dp_iboost, hdmi_iboost;
 	int n_entries;
-	u32 reg;
 
 	/* VBT may override standard boost values */
 	dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level;
@@ -1428,16 +1469,10 @@
 		return;
 	}
 
-	reg = I915_READ(DISPIO_CR_TX_BMU_CR0);
-	reg &= ~BALANCE_LEG_MASK(port);
-	reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port));
+	_skl_ddi_set_iboost(dev_priv, port, iboost);
 
-	if (iboost)
-		reg |= iboost << BALANCE_LEG_SHIFT(port);
-	else
-		reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port);
-
-	I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg);
+	if (port == PORT_A && intel_dig_port->max_lanes == 4)
+		_skl_ddi_set_iboost(dev_priv, PORT_E, iboost);
 }
 
 static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv,
@@ -1568,7 +1603,7 @@
 	level = translate_signal_level(signal_levels);
 
 	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
-		skl_ddi_set_iboost(dev_priv, level, port, encoder->type);
+		skl_ddi_set_iboost(encoder, level);
 	else if (IS_BROXTON(dev_priv))
 		bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type);
 
@@ -1637,6 +1672,10 @@
 			intel_dp_stop_link_train(intel_dp);
 	} else if (type == INTEL_OUTPUT_HDMI) {
 		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+		int level = intel_ddi_hdmi_level(dev_priv, port);
+
+		if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+			skl_ddi_set_iboost(intel_encoder, level);
 
 		intel_hdmi->set_infoframes(encoder,
 					   crtc->config->has_hdmi_sink,
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c457eed..175595f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3093,40 +3093,110 @@
 
 	for_each_crtc(dev, crtc) {
 		struct intel_plane *plane = to_intel_plane(crtc->primary);
-		struct intel_plane_state *plane_state;
-
-		drm_modeset_lock_crtc(crtc, &plane->base);
-		plane_state = to_intel_plane_state(plane->base.state);
+		struct intel_plane_state *plane_state =
+			to_intel_plane_state(plane->base.state);
 
 		if (plane_state->visible)
 			plane->update_plane(&plane->base,
 					    to_intel_crtc_state(crtc->state),
 					    plane_state);
-
-		drm_modeset_unlock_crtc(crtc);
 	}
 }
 
+static int
+__intel_display_resume(struct drm_device *dev,
+		       struct drm_atomic_state *state)
+{
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
+	int i, ret;
+
+	intel_modeset_setup_hw_state(dev);
+	i915_redisable_vga(dev);
+
+	if (!state)
+		return 0;
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		/*
+		 * Force recalculation even if we restore
+		 * current state. With fast modeset this may not result
+		 * in a modeset when the state is compatible.
+		 */
+		crtc_state->mode_changed = true;
+	}
+
+	/* ignore any reset values/BIOS leftovers in the WM registers */
+	to_intel_atomic_state(state)->skip_intermediate_wm = true;
+
+	ret = drm_atomic_commit(state);
+
+	WARN_ON(ret == -EDEADLK);
+	return ret;
+}
+
 void intel_prepare_reset(struct drm_i915_private *dev_priv)
 {
+	struct drm_device *dev = &dev_priv->drm;
+	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+	struct drm_atomic_state *state;
+	int ret;
+
 	/* no reset support for gen2 */
 	if (IS_GEN2(dev_priv))
 		return;
 
-	/* reset doesn't touch the display */
+	/*
+	 * Need mode_config.mutex so that we don't
+	 * trample ongoing ->detect() and whatnot.
+	 */
+	mutex_lock(&dev->mode_config.mutex);
+	drm_modeset_acquire_init(ctx, 0);
+	while (1) {
+		ret = drm_modeset_lock_all_ctx(dev, ctx);
+		if (ret != -EDEADLK)
+			break;
+
+		drm_modeset_backoff(ctx);
+	}
+
+	/* reset doesn't touch the display, but flips might get nuked anyway, */
 	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv))
 		return;
 
-	drm_modeset_lock_all(&dev_priv->drm);
 	/*
 	 * Disabling the crtcs gracefully seems nicer. Also the
 	 * g33 docs say we should at least disable all the planes.
 	 */
-	intel_display_suspend(&dev_priv->drm);
+	state = drm_atomic_helper_duplicate_state(dev, ctx);
+	if (IS_ERR(state)) {
+		ret = PTR_ERR(state);
+		state = NULL;
+		DRM_ERROR("Duplicating state failed with %i\n", ret);
+		goto err;
+	}
+
+	ret = drm_atomic_helper_disable_all(dev, ctx);
+	if (ret) {
+		DRM_ERROR("Suspending crtc's failed with %i\n", ret);
+		goto err;
+	}
+
+	dev_priv->modeset_restore_state = state;
+	state->acquire_ctx = ctx;
+	return;
+
+err:
+	drm_atomic_state_free(state);
 }
 
 void intel_finish_reset(struct drm_i915_private *dev_priv)
 {
+	struct drm_device *dev = &dev_priv->drm;
+	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
+	int ret;
+
 	/*
 	 * Flips in the rings will be nuked by the reset,
 	 * so complete all pending flips so that user space
@@ -3138,6 +3208,8 @@
 	if (IS_GEN2(dev_priv))
 		return;
 
+	dev_priv->modeset_restore_state = NULL;
+
 	/* reset doesn't touch the display */
 	if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
 		/*
@@ -3149,29 +3221,32 @@
 		 * FIXME: Atomic will make this obsolete since we won't schedule
 		 * CS-based flips (which might get lost in gpu resets) any more.
 		 */
-		intel_update_primary_planes(&dev_priv->drm);
-		return;
+		intel_update_primary_planes(dev);
+	} else {
+		/*
+		 * The display has been reset as well,
+		 * so need a full re-initialization.
+		 */
+		intel_runtime_pm_disable_interrupts(dev_priv);
+		intel_runtime_pm_enable_interrupts(dev_priv);
+
+		intel_modeset_init_hw(dev);
+
+		spin_lock_irq(&dev_priv->irq_lock);
+		if (dev_priv->display.hpd_irq_setup)
+			dev_priv->display.hpd_irq_setup(dev_priv);
+		spin_unlock_irq(&dev_priv->irq_lock);
+
+		ret = __intel_display_resume(dev, state);
+		if (ret)
+			DRM_ERROR("Restoring old state failed with %i\n", ret);
+
+		intel_hpd_init(dev_priv);
 	}
 
-	/*
-	 * The display has been reset as well,
-	 * so need a full re-initialization.
-	 */
-	intel_runtime_pm_disable_interrupts(dev_priv);
-	intel_runtime_pm_enable_interrupts(dev_priv);
-
-	intel_modeset_init_hw(&dev_priv->drm);
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (dev_priv->display.hpd_irq_setup)
-		dev_priv->display.hpd_irq_setup(dev_priv);
-	spin_unlock_irq(&dev_priv->irq_lock);
-
-	intel_display_resume(&dev_priv->drm);
-
-	intel_hpd_init(dev_priv);
-
-	drm_modeset_unlock_all(&dev_priv->drm);
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
+	mutex_unlock(&dev->mode_config.mutex);
 }
 
 static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc)
@@ -5691,15 +5766,7 @@
 
 static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv)
 {
-	unsigned int i;
-
-	for (i = 0; i < 15; i++) {
-		if (skl_cdclk_pcu_ready(dev_priv))
-			return true;
-		udelay(10);
-	}
-
-	return false;
+	return _wait_for(skl_cdclk_pcu_ready(dev_priv), 3000, 10) == 0;
 }
 
 static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco)
@@ -12114,21 +12181,11 @@
 		pipe_config->pipe_bpp = connector->base.display_info.bpc*3;
 	}
 
-	/* Clamp bpp to default limit on screens without EDID 1.4 */
-	if (connector->base.display_info.bpc == 0) {
-		int type = connector->base.connector_type;
-		int clamp_bpp = 24;
-
-		/* Fall back to 18 bpp when DP sink capability is unknown. */
-		if (type == DRM_MODE_CONNECTOR_DisplayPort ||
-		    type == DRM_MODE_CONNECTOR_eDP)
-			clamp_bpp = 18;
-
-		if (bpp > clamp_bpp) {
-			DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of %d\n",
-				      bpp, clamp_bpp);
-			pipe_config->pipe_bpp = clamp_bpp;
-		}
+	/* Clamp bpp to 8 on screens without EDID 1.4 */
+	if (connector->base.display_info.bpc == 0 && bpp > 24) {
+		DRM_DEBUG_KMS("clamping display bpp (was %d) to default limit of 24\n",
+			      bpp);
+		pipe_config->pipe_bpp = 24;
 	}
 }
 
@@ -13702,6 +13759,13 @@
 		     intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco))
 			dev_priv->display.modeset_commit_cdclk(state);
 
+		/*
+		 * SKL workaround: bspec recommends we disable the SAGV when we
+		 * have more then one pipe enabled
+		 */
+		if (IS_SKYLAKE(dev_priv) && !skl_can_enable_sagv(state))
+			skl_disable_sagv(dev_priv);
+
 		intel_modeset_verify_disabled(dev);
 	}
 
@@ -13775,6 +13839,10 @@
 		intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state);
 	}
 
+	if (IS_SKYLAKE(dev_priv) && intel_state->modeset &&
+	    skl_can_enable_sagv(state))
+		skl_enable_sagv(dev_priv);
+
 	drm_atomic_helper_commit_hw_done(state);
 
 	if (intel_state->modeset)
@@ -16174,9 +16242,10 @@
 	struct drm_atomic_state *state = dev_priv->modeset_restore_state;
 	struct drm_modeset_acquire_ctx ctx;
 	int ret;
-	bool setup = false;
 
 	dev_priv->modeset_restore_state = NULL;
+	if (state)
+		state->acquire_ctx = &ctx;
 
 	/*
 	 * This is a cludge because with real atomic modeset mode_config.mutex
@@ -16187,43 +16256,17 @@
 	mutex_lock(&dev->mode_config.mutex);
 	drm_modeset_acquire_init(&ctx, 0);
 
-retry:
-	ret = drm_modeset_lock_all_ctx(dev, &ctx);
+	while (1) {
+		ret = drm_modeset_lock_all_ctx(dev, &ctx);
+		if (ret != -EDEADLK)
+			break;
 
-	if (ret == 0 && !setup) {
-		setup = true;
-
-		intel_modeset_setup_hw_state(dev);
-		i915_redisable_vga(dev);
-	}
-
-	if (ret == 0 && state) {
-		struct drm_crtc_state *crtc_state;
-		struct drm_crtc *crtc;
-		int i;
-
-		state->acquire_ctx = &ctx;
-
-		/* ignore any reset values/BIOS leftovers in the WM registers */
-		to_intel_atomic_state(state)->skip_intermediate_wm = true;
-
-		for_each_crtc_in_state(state, crtc, crtc_state, i) {
-			/*
-			 * Force recalculation even if we restore
-			 * current state. With fast modeset this may not result
-			 * in a modeset when the state is compatible.
-			 */
-			crtc_state->mode_changed = true;
-		}
-
-		ret = drm_atomic_commit(state);
-	}
-
-	if (ret == -EDEADLK) {
 		drm_modeset_backoff(&ctx);
-		goto retry;
 	}
 
+	if (!ret)
+		ret = __intel_display_resume(dev, state);
+
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 	mutex_unlock(&dev->mode_config.mutex);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index cc937a1..ff399b9 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1716,6 +1716,9 @@
 void skl_wm_get_hw_state(struct drm_device *dev);
 void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 			  struct skl_ddb_allocation *ddb /* out */);
+bool skl_can_enable_sagv(struct drm_atomic_state *state);
+int skl_enable_sagv(struct drm_i915_private *dev_priv);
+int skl_disable_sagv(struct drm_i915_private *dev_priv);
 uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config);
 bool ilk_disable_lp_wm(struct drm_device *dev);
 int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6);
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index 47bdf9d..b9e5a63 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -554,7 +554,6 @@
 		return;
 	}
 
-	drm_encoder_cleanup(&intel_encoder->base);
 	kfree(intel_dvo);
 	kfree(intel_connector);
 }
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 6a7ad3e..3836a1c 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -1230,12 +1230,29 @@
 	if (i915.enable_fbc >= 0)
 		return !!i915.enable_fbc;
 
+	if (!HAS_FBC(dev_priv))
+		return 0;
+
 	if (IS_BROADWELL(dev_priv))
 		return 1;
 
 	return 0;
 }
 
+static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv)
+{
+#ifdef CONFIG_INTEL_IOMMU
+	/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
+	if (intel_iommu_gfx_mapped &&
+	    (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) {
+		DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
+		return true;
+	}
+#endif
+
+	return false;
+}
+
 /**
  * intel_fbc_init - Initialize FBC
  * @dev_priv: the i915 device
@@ -1253,6 +1270,9 @@
 	fbc->active = false;
 	fbc->work.scheduled = false;
 
+	if (need_fbc_vtd_wa(dev_priv))
+		mkwrite_device_info(dev_priv)->has_fbc = false;
+
 	i915.enable_fbc = intel_sanitize_fbc_option(dev_priv);
 	DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc);
 
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index 86b00c6..3e3632c 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -782,7 +782,7 @@
 	struct intel_fbdev *ifbdev = dev_priv->fbdev;
 	struct fb_info *info;
 
-	if (!ifbdev)
+	if (!ifbdev || !ifbdev->fb)
 		return;
 
 	info = ifbdev->helper.fbdev;
@@ -827,31 +827,28 @@
 
 void intel_fbdev_output_poll_changed(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	if (dev_priv->fbdev)
-		drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper);
+	struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
+
+	if (ifbdev && ifbdev->fb)
+		drm_fb_helper_hotplug_event(&ifbdev->helper);
 }
 
 void intel_fbdev_restore_mode(struct drm_device *dev)
 {
-	int ret;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_fbdev *ifbdev = dev_priv->fbdev;
-	struct drm_fb_helper *fb_helper;
+	struct intel_fbdev *ifbdev = to_i915(dev)->fbdev;
 
 	if (!ifbdev)
 		return;
 
 	intel_fbdev_sync(ifbdev);
+	if (!ifbdev->fb)
+		return;
 
-	fb_helper = &ifbdev->helper;
-
-	ret = drm_fb_helper_restore_fbdev_mode_unlocked(fb_helper);
-	if (ret) {
+	if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) {
 		DRM_DEBUG("failed to restore crtc mode\n");
 	} else {
-		mutex_lock(&fb_helper->dev->struct_mutex);
+		mutex_lock(&dev->struct_mutex);
 		intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT);
-		mutex_unlock(&fb_helper->dev->struct_mutex);
+		mutex_unlock(&dev->struct_mutex);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index adca262..7acbbbf 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -1047,6 +1047,23 @@
 	return err;
 }
 
+static int intel_use_opregion_panel_type_callback(const struct dmi_system_id *id)
+{
+	DRM_INFO("Using panel type from OpRegion on %s\n", id->ident);
+	return 1;
+}
+
+static const struct dmi_system_id intel_use_opregion_panel_type[] = {
+	{
+		.callback = intel_use_opregion_panel_type_callback,
+		.ident = "Conrac GmbH IX45GM2",
+		.matches = {DMI_MATCH(DMI_SYS_VENDOR, "Conrac GmbH"),
+			    DMI_MATCH(DMI_PRODUCT_NAME, "IX45GM2"),
+		},
+	},
+	{ }
+};
+
 int
 intel_opregion_get_panel_type(struct drm_i915_private *dev_priv)
 {
@@ -1073,6 +1090,16 @@
 	}
 
 	/*
+	 * So far we know that some machined must use it, others must not use it.
+	 * There doesn't seem to be any way to determine which way to go, except
+	 * via a quirk list :(
+	 */
+	if (!dmi_check_system(intel_use_opregion_panel_type)) {
+		DRM_DEBUG_KMS("Ignoring OpRegion panel type (%d)\n", ret - 1);
+		return -ENODEV;
+	}
+
+	/*
 	 * FIXME On Dell XPS 13 9350 the OpRegion panel type (0) gives us
 	 * low vswing for eDP, whereas the VBT panel type (2) gives us normal
 	 * vswing instead. Low vswing results in some display flickers, so
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index f4f3fcc..2d24813 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2852,6 +2852,7 @@
 
 #define SKL_DDB_SIZE		896	/* in blocks */
 #define BXT_DDB_SIZE		512
+#define SKL_SAGV_BLOCK_TIME	30 /* µs */
 
 /*
  * Return the index of a plane in the SKL DDB and wm result arrays.  Primary
@@ -2875,6 +2876,153 @@
 	}
 }
 
+/*
+ * SAGV dynamically adjusts the system agent voltage and clock frequencies
+ * depending on power and performance requirements. The display engine access
+ * to system memory is blocked during the adjustment time. Because of the
+ * blocking time, having this enabled can cause full system hangs and/or pipe
+ * underruns if we don't meet all of the following requirements:
+ *
+ *  - <= 1 pipe enabled
+ *  - All planes can enable watermarks for latencies >= SAGV engine block time
+ *  - We're not using an interlaced display configuration
+ */
+int
+skl_enable_sagv(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED ||
+	    dev_priv->skl_sagv_status == I915_SKL_SAGV_ENABLED)
+		return 0;
+
+	DRM_DEBUG_KMS("Enabling the SAGV\n");
+	mutex_lock(&dev_priv->rps.hw_lock);
+
+	ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+				      GEN9_SAGV_ENABLE);
+
+	/* We don't need to wait for the SAGV when enabling */
+	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	/*
+	 * Some skl systems, pre-release machines in particular,
+	 * don't actually have an SAGV.
+	 */
+	if (ret == -ENXIO) {
+		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+		dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED;
+		return 0;
+	} else if (ret < 0) {
+		DRM_ERROR("Failed to enable the SAGV\n");
+		return ret;
+	}
+
+	dev_priv->skl_sagv_status = I915_SKL_SAGV_ENABLED;
+	return 0;
+}
+
+static int
+skl_do_sagv_disable(struct drm_i915_private *dev_priv)
+{
+	int ret;
+	uint32_t temp = GEN9_SAGV_DISABLE;
+
+	ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL,
+				     &temp);
+	if (ret)
+		return ret;
+	else
+		return temp & GEN9_SAGV_IS_DISABLED;
+}
+
+int
+skl_disable_sagv(struct drm_i915_private *dev_priv)
+{
+	int ret, result;
+
+	if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED ||
+	    dev_priv->skl_sagv_status == I915_SKL_SAGV_DISABLED)
+		return 0;
+
+	DRM_DEBUG_KMS("Disabling the SAGV\n");
+	mutex_lock(&dev_priv->rps.hw_lock);
+
+	/* bspec says to keep retrying for at least 1 ms */
+	ret = wait_for(result = skl_do_sagv_disable(dev_priv), 1);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	if (ret == -ETIMEDOUT) {
+		DRM_ERROR("Request to disable SAGV timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	/*
+	 * Some skl systems, pre-release machines in particular,
+	 * don't actually have an SAGV.
+	 */
+	if (result == -ENXIO) {
+		DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n");
+		dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED;
+		return 0;
+	} else if (result < 0) {
+		DRM_ERROR("Failed to disable the SAGV\n");
+		return result;
+	}
+
+	dev_priv->skl_sagv_status = I915_SKL_SAGV_DISABLED;
+	return 0;
+}
+
+bool skl_can_enable_sagv(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_atomic_state *intel_state = to_intel_atomic_state(state);
+	struct drm_crtc *crtc;
+	enum pipe pipe;
+	int level, plane;
+
+	/*
+	 * SKL workaround: bspec recommends we disable the SAGV when we have
+	 * more then one pipe enabled
+	 *
+	 * If there are no active CRTCs, no additional checks need be performed
+	 */
+	if (hweight32(intel_state->active_crtcs) == 0)
+		return true;
+	else if (hweight32(intel_state->active_crtcs) > 1)
+		return false;
+
+	/* Since we're now guaranteed to only have one active CRTC... */
+	pipe = ffs(intel_state->active_crtcs) - 1;
+	crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+
+	if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE)
+		return false;
+
+	for_each_plane(dev_priv, pipe, plane) {
+		/* Skip this plane if it's not enabled */
+		if (intel_state->wm_results.plane[pipe][plane][0] == 0)
+			continue;
+
+		/* Find the highest enabled wm level for this plane */
+		for (level = ilk_wm_max_level(dev);
+		     intel_state->wm_results.plane[pipe][plane][level] == 0; --level)
+		     { }
+
+		/*
+		 * If any of the planes on this pipe don't enable wm levels
+		 * that incur memory latencies higher then 30µs we can't enable
+		 * the SAGV
+		 */
+		if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME)
+			return false;
+	}
+
+	return true;
+}
+
 static void
 skl_ddb_get_pipe_allocation_limits(struct drm_device *dev,
 				   const struct intel_crtc_state *cstate,
@@ -3107,8 +3255,6 @@
 		total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id];
 	}
 
-	WARN_ON(cstate->plane_mask && total_data_rate == 0);
-
 	return total_data_rate;
 }
 
@@ -3344,6 +3490,8 @@
 		plane_bytes_per_line *= 4;
 		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 		plane_blocks_per_line /= 4;
+	} else if (tiling == DRM_FORMAT_MOD_NONE) {
+		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1;
 	} else {
 		plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512);
 	}
@@ -3910,9 +4058,24 @@
 	 * pretend that all pipes switched active status so that we'll
 	 * ensure a full DDB recompute.
 	 */
-	if (dev_priv->wm.distrust_bios_wm)
+	if (dev_priv->wm.distrust_bios_wm) {
+		ret = drm_modeset_lock(&dev->mode_config.connection_mutex,
+				       state->acquire_ctx);
+		if (ret)
+			return ret;
+
 		intel_state->active_pipe_changes = ~0;
 
+		/*
+		 * We usually only initialize intel_state->active_crtcs if we
+		 * we're doing a modeset; make sure this field is always
+		 * initialized during the sanitization process that happens
+		 * on the first commit too.
+		 */
+		if (!intel_state->modeset)
+			intel_state->active_crtcs = dev_priv->active_crtcs;
+	}
+
 	/*
 	 * If the modeset changes which CRTC's are active, we need to
 	 * recompute the DDB allocation for *all* active pipes, even
@@ -3941,11 +4104,33 @@
 		ret = skl_allocate_pipe_ddb(cstate, ddb);
 		if (ret)
 			return ret;
+
+		ret = drm_atomic_add_affected_planes(state, &intel_crtc->base);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
 }
 
+static void
+skl_copy_wm_for_pipe(struct skl_wm_values *dst,
+		     struct skl_wm_values *src,
+		     enum pipe pipe)
+{
+	dst->wm_linetime[pipe] = src->wm_linetime[pipe];
+	memcpy(dst->plane[pipe], src->plane[pipe],
+	       sizeof(dst->plane[pipe]));
+	memcpy(dst->plane_trans[pipe], src->plane_trans[pipe],
+	       sizeof(dst->plane_trans[pipe]));
+
+	dst->ddb.pipe[pipe] = src->ddb.pipe[pipe];
+	memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe],
+	       sizeof(dst->ddb.y_plane[pipe]));
+	memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe],
+	       sizeof(dst->ddb.plane[pipe]));
+}
+
 static int
 skl_compute_wm(struct drm_atomic_state *state)
 {
@@ -4018,8 +4203,10 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct skl_wm_values *results = &dev_priv->wm.skl_results;
+	struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw;
 	struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state);
 	struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal;
+	int pipe;
 
 	if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0)
 		return;
@@ -4031,8 +4218,12 @@
 	skl_write_wm_values(dev_priv, results);
 	skl_flush_wm_values(dev_priv, results);
 
-	/* store the new configuration */
-	dev_priv->wm.skl_hw = *results;
+	/*
+	 * Store the new configuration (but only for the pipes that have
+	 * changed; the other values weren't recomputed).
+	 */
+	for_each_pipe_masked(dev_priv, pipe, results->dirty_pipes)
+		skl_copy_wm_for_pipe(hw_vals, results, pipe);
 
 	mutex_unlock(&dev_priv->wm.wm_mutex);
 }
@@ -4892,7 +5083,8 @@
 		else
 			gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
 		dev_priv->rps.last_adj = 0;
-		I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
+		I915_WRITE(GEN6_PMINTRMSK,
+			   gen6_sanitize_rps_pm_mask(dev_priv, ~0));
 	}
 	mutex_unlock(&dev_priv->rps.hw_lock);
 
@@ -6573,9 +6765,7 @@
 
 void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv)
 {
-	if (IS_CHERRYVIEW(dev_priv))
-		return;
-	else if (IS_VALLEYVIEW(dev_priv))
+	if (IS_VALLEYVIEW(dev_priv))
 		valleyview_cleanup_gt_powersave(dev_priv);
 
 	if (!i915.enable_rc6)
@@ -7657,8 +7847,54 @@
 	}
 }
 
+static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
+{
+	uint32_t flags =
+		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
+
+	switch (flags) {
+	case GEN6_PCODE_SUCCESS:
+		return 0;
+	case GEN6_PCODE_UNIMPLEMENTED_CMD:
+	case GEN6_PCODE_ILLEGAL_CMD:
+		return -ENXIO;
+	case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+		return -EOVERFLOW;
+	case GEN6_PCODE_TIMEOUT:
+		return -ETIMEDOUT;
+	default:
+		MISSING_CASE(flags)
+		return 0;
+	}
+}
+
+static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv)
+{
+	uint32_t flags =
+		I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK;
+
+	switch (flags) {
+	case GEN6_PCODE_SUCCESS:
+		return 0;
+	case GEN6_PCODE_ILLEGAL_CMD:
+		return -ENXIO;
+	case GEN7_PCODE_TIMEOUT:
+		return -ETIMEDOUT;
+	case GEN7_PCODE_ILLEGAL_DATA:
+		return -EINVAL;
+	case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE:
+		return -EOVERFLOW;
+	default:
+		MISSING_CASE(flags);
+		return 0;
+	}
+}
+
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val)
 {
+	int status;
+
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
 	/* GEN6_PCODE_* are outside of the forcewake domain, we can
@@ -7685,12 +7921,25 @@
 	*val = I915_READ_FW(GEN6_PCODE_DATA);
 	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
+	if (INTEL_GEN(dev_priv) > 6)
+		status = gen7_check_mailbox_status(dev_priv);
+	else
+		status = gen6_check_mailbox_status(dev_priv);
+
+	if (status) {
+		DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n",
+				 status);
+		return status;
+	}
+
 	return 0;
 }
 
 int sandybridge_pcode_write(struct drm_i915_private *dev_priv,
-			       u32 mbox, u32 val)
+			    u32 mbox, u32 val)
 {
+	int status;
+
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
 	/* GEN6_PCODE_* are outside of the forcewake domain, we can
@@ -7715,6 +7964,17 @@
 
 	I915_WRITE_FW(GEN6_PCODE_DATA, 0);
 
+	if (INTEL_GEN(dev_priv) > 6)
+		status = gen7_check_mailbox_status(dev_priv);
+	else
+		status = gen6_check_mailbox_status(dev_priv);
+
+	if (status) {
+		DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n",
+				 status);
+		return status;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 2b0d1ba..cf171b4 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -255,14 +255,14 @@
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
 	uint32_t max_sleep_time = 0x1f;
-	/* Lately it was identified that depending on panel idle frame count
-	 * calculated at HW can be off by 1. So let's use what came
-	 * from VBT + 1.
-	 * There are also other cases where panel demands at least 4
-	 * but VBT is not being set. To cover these 2 cases lets use
-	 * at least 5 when VBT isn't set to be on the safest side.
+	/*
+	 * Let's respect VBT in case VBT asks a higher idle_frame value.
+	 * Let's use 6 as the minimum to cover all known cases including
+	 * the off-by-one issue that HW has in some cases. Also there are
+	 * cases where sink should be able to train
+	 * with the 5 or 6 idle patterns.
 	 */
-	uint32_t idle_frames = dev_priv->vbt.psr.idle_frames + 1;
+	uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames);
 	uint32_t val = EDP_PSR_ENABLE;
 
 	val |= max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index cca7792..1d3161b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1178,8 +1178,8 @@
 		I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) |
 					   L3_HIGH_PRIO_CREDITS(2));
 
-	/* WaInsertDummyPushConstPs:bxt */
-	if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0))
+	/* WaToEnableHwFixForPushConstHWBug:bxt */
+	if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER))
 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
@@ -1222,8 +1222,8 @@
 		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
 			   GEN8_LQSC_RO_PERF_DIS);
 
-	/* WaInsertDummyPushConstPs:kbl */
-	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0))
+	/* WaToEnableHwFixForPushConstHWBug:kbl */
+	if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER))
 		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
 				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
 
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 9f7dafc..7bf90e9 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -171,10 +171,34 @@
 	drm_fbdev_cma_hotplug_event(imxdrm->fbhelper);
 }
 
+static int imx_drm_atomic_check(struct drm_device *dev,
+				struct drm_atomic_state *state)
+{
+	int ret;
+
+	ret = drm_atomic_helper_check_modeset(dev, state);
+	if (ret)
+		return ret;
+
+	ret = drm_atomic_helper_check_planes(dev, state);
+	if (ret)
+		return ret;
+
+	/*
+	 * Check modeset again in case crtc_state->mode_changed is
+	 * updated in plane's ->atomic_check callback.
+	 */
+	ret = drm_atomic_helper_check_modeset(dev, state);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
 static const struct drm_mode_config_funcs imx_drm_mode_config_funcs = {
 	.fb_create = drm_fb_cma_create,
 	.output_poll_changed = imx_drm_output_poll_changed,
-	.atomic_check = drm_atomic_helper_check,
+	.atomic_check = imx_drm_atomic_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index 08e188b..462056e 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -76,6 +76,8 @@
 		crtc->state->event = NULL;
 	}
 	spin_unlock_irq(&crtc->dev->event_lock);
+
+	drm_crtc_vblank_off(crtc);
 }
 
 static void imx_drm_crtc_reset(struct drm_crtc *crtc)
@@ -175,6 +177,8 @@
 static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
 				  struct drm_crtc_state *old_crtc_state)
 {
+	drm_crtc_vblank_on(crtc);
+
 	spin_lock_irq(&crtc->dev->event_lock);
 	if (crtc->state->event) {
 		WARN_ON(drm_crtc_vblank_get(crtc));
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index 4ad67d0..29423e75 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -319,13 +319,14 @@
 		return -EINVAL;
 
 	/*
-	 * since we cannot touch active IDMAC channels, we do not support
-	 * resizing the enabled plane or changing its format
+	 * We support resizing active plane or changing its format by
+	 * forcing CRTC mode change and disabling-enabling plane in plane's
+	 * ->atomic_update callback.
 	 */
 	if (old_fb && (state->src_w != old_state->src_w ||
 			      state->src_h != old_state->src_h ||
 			      fb->pixel_format != old_fb->pixel_format))
-		return -EINVAL;
+		crtc_state->mode_changed = true;
 
 	eba = drm_plane_state_to_eba(state);
 
@@ -336,7 +337,7 @@
 		return -EINVAL;
 
 	if (old_fb && fb->pitches[0] != old_fb->pitches[0])
-		return -EINVAL;
+		crtc_state->mode_changed = true;
 
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_YUV420:
@@ -372,7 +373,7 @@
 			return -EINVAL;
 
 		if (old_fb && old_fb->pitches[1] != fb->pitches[1])
-			return -EINVAL;
+			crtc_state->mode_changed = true;
 	}
 
 	return 0;
@@ -392,8 +393,14 @@
 	enum ipu_color_space ics;
 
 	if (old_state->fb) {
-		ipu_plane_atomic_set_base(ipu_plane, old_state);
-		return;
+		struct drm_crtc_state *crtc_state = state->crtc->state;
+
+		if (!crtc_state->mode_changed) {
+			ipu_plane_atomic_set_base(ipu_plane, old_state);
+			return;
+		}
+
+		ipu_disable_plane(plane);
 	}
 
 	switch (ipu_plane->dp_flow) {
diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig
index 23ac804..294de45 100644
--- a/drivers/gpu/drm/mediatek/Kconfig
+++ b/drivers/gpu/drm/mediatek/Kconfig
@@ -2,6 +2,9 @@
 	tristate "DRM Support for Mediatek SoCs"
 	depends on DRM
 	depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST)
+	depends on COMMON_CLK
+	depends on HAVE_ARM_SMCCC
+	depends on OF
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
 	select DRM_MIPI_DSI
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index b4bc7f1..d0da52f 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -157,6 +157,12 @@
 	struct shrinker shrinker;
 
 	struct msm_vblank_ctrl vblank_ctrl;
+
+	/* task holding struct_mutex.. currently only used in submit path
+	 * to detect and reject faults from copy_from_user() for submit
+	 * ioctl.
+	 */
+	struct task_struct *struct_mutex_task;
 };
 
 struct msm_format {
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 6cd4af4..85f3047 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -196,11 +196,20 @@
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct drm_device *dev = obj->dev;
+	struct msm_drm_private *priv = dev->dev_private;
 	struct page **pages;
 	unsigned long pfn;
 	pgoff_t pgoff;
 	int ret;
 
+	/* This should only happen if userspace tries to pass a mmap'd
+	 * but unfaulted gem bo vaddr into submit ioctl, triggering
+	 * a page fault while struct_mutex is already held.  This is
+	 * not a valid use-case so just bail.
+	 */
+	if (priv->struct_mutex_task == current)
+		return VM_FAULT_SIGBUS;
+
 	/* Make sure we don't parallel update on a fault, nor move or remove
 	 * something from beneath our feet
 	 */
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 9766f9a..880d6a9 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -64,6 +64,14 @@
 	kfree(submit);
 }
 
+static inline unsigned long __must_check
+copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
+{
+	if (access_ok(VERIFY_READ, from, n))
+		return __copy_from_user_inatomic(to, from, n);
+	return -EFAULT;
+}
+
 static int submit_lookup_objects(struct msm_gem_submit *submit,
 		struct drm_msm_gem_submit *args, struct drm_file *file)
 {
@@ -71,6 +79,7 @@
 	int ret = 0;
 
 	spin_lock(&file->table_lock);
+	pagefault_disable();
 
 	for (i = 0; i < args->nr_bos; i++) {
 		struct drm_msm_gem_submit_bo submit_bo;
@@ -84,10 +93,15 @@
 		 */
 		submit->bos[i].flags = 0;
 
-		ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
-		if (ret) {
-			ret = -EFAULT;
-			goto out_unlock;
+		ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo));
+		if (unlikely(ret)) {
+			pagefault_enable();
+			spin_unlock(&file->table_lock);
+			ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
+			if (ret)
+				goto out;
+			spin_lock(&file->table_lock);
+			pagefault_disable();
 		}
 
 		if (submit_bo.flags & ~MSM_SUBMIT_BO_FLAGS) {
@@ -127,9 +141,12 @@
 	}
 
 out_unlock:
-	submit->nr_bos = i;
+	pagefault_enable();
 	spin_unlock(&file->table_lock);
 
+out:
+	submit->nr_bos = i;
+
 	return ret;
 }
 
@@ -377,6 +394,8 @@
 	if (ret)
 		return ret;
 
+	priv->struct_mutex_task = current;
+
 	submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds);
 	if (!submit) {
 		ret = -ENOMEM;
@@ -468,6 +487,7 @@
 	if (ret)
 		msm_gem_submit_free(submit);
 out_unlock:
+	priv->struct_mutex_task = NULL;
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 7ea8aa7..6bc712f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -175,6 +175,7 @@
 	void (*fini)(struct nvkm_device *, bool suspend);
 	resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar);
 	resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar);
+	bool cpu_coherent;
 };
 
 struct nvkm_device_quirk {
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index f2ad17a..dc57b62 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -225,6 +225,17 @@
 	if (!parent_pdev)
 		return false;
 
+	if (!parent_pdev->bridge_d3) {
+		/*
+		 * Parent PCI bridge is currently not power managed.
+		 * Since userspace can change these afterwards to be on
+		 * the safe side we stick with _DSM and prevent usage of
+		 * _PR3 from the bridge.
+		 */
+		pci_d3cold_disable(pdev);
+		return false;
+	}
+
 	parent_adev = ACPI_COMPANION(&parent_pdev->dev);
 	if (!parent_adev)
 		return false;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 528bdef..864323b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -209,7 +209,8 @@
 	nvbo->tile_flags = tile_flags;
 	nvbo->bo.bdev = &drm->ttm.bdev;
 
-	nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
+	if (!nvxx_device(&drm->device)->func->cpu_coherent)
+		nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
 
 	nvbo->page_shift = 12;
 	if (drm->client.vm) {
@@ -1151,7 +1152,7 @@
 	if (ret)
 		goto out;
 
-	ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, new_mem);
 out:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return ret;
@@ -1179,7 +1180,7 @@
 	if (ret)
 		return ret;
 
-	ret = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+	ret = ttm_bo_move_ttm(bo, true, intr, no_wait_gpu, &tmp_mem);
 	if (ret)
 		goto out;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index b1b6932..62ad030 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1614,6 +1614,7 @@
 	.fini = nvkm_device_pci_fini,
 	.resource_addr = nvkm_device_pci_resource_addr,
 	.resource_size = nvkm_device_pci_resource_size,
+	.cpu_coherent = !IS_ENABLED(CONFIG_ARM),
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 939682f..9b638bd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -245,6 +245,7 @@
 	.fini = nvkm_device_tegra_fini,
 	.resource_addr = nvkm_device_tegra_resource_addr,
 	.resource_size = nvkm_device_tegra_resource_size,
+	.cpu_coherent = false,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
index edec30f..0a7b6ed 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
@@ -37,7 +37,10 @@
 {
 	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
 	struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
+
+	mutex_lock(&chan->fifo->base.engine.subdev.mutex);
 	nvkm_ramht_remove(imem->ramht, cookie);
+	mutex_unlock(&chan->fifo->base.engine.subdev.mutex);
 }
 
 static int
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index df26570..28c1423 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -73,10 +73,12 @@
 	}
 }
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 static struct fb_deferred_io qxl_defio = {
 	.delay		= QXL_DIRTY_DELAY,
 	.deferred_io	= drm_fb_helper_deferred_io,
 };
+#endif
 
 static struct fb_ops qxlfb_ops = {
 	.owner = THIS_MODULE,
@@ -313,8 +315,10 @@
 		goto out_destroy_fbi;
 	}
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	info->fbdefio = &qxl_defio;
 	fb_deferred_io_init(info);
+#endif
 
 	qdev->fbdev_info = info;
 	qdev->fbdev_qfb = &qfbdev->qfb;
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index a97abc8..1dcf390 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -627,7 +627,9 @@
 			if (radeon_crtc->ss.refdiv) {
 				radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
 				radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-				if (rdev->family >= CHIP_RV770)
+				if (ASIC_IS_AVIVO(rdev) &&
+				    rdev->family != CHIP_RS780 &&
+				    rdev->family != CHIP_RS880)
 					radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 6de3428..ddef0d4 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -198,16 +198,7 @@
 	atpx->is_hybrid = false;
 	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
 		printk("ATPX Hybrid Graphics\n");
-#if 1
-		/* This is a temporary hack until the D3 cold support
-		 * makes it upstream.  The ATPX power_control method seems
-		 * to still work on even if the system should be using
-		 * the new standardized hybrid D3 cold ACPI interface.
-		 */
-		atpx->functions.power_cntl = true;
-#else
 		atpx->functions.power_cntl = false;
-#endif
 		atpx->is_hybrid = true;
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index ffdad81..c2e0a1c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -263,8 +263,8 @@
 
 	rdev = radeon_get_rdev(bo->bdev);
 	ridx = radeon_copy_ring_index(rdev);
-	old_start = old_mem->start << PAGE_SHIFT;
-	new_start = new_mem->start << PAGE_SHIFT;
+	old_start = (u64)old_mem->start << PAGE_SHIFT;
+	new_start = (u64)new_mem->start << PAGE_SHIFT;
 
 	switch (old_mem->mem_type) {
 	case TTM_PL_VRAM:
@@ -346,7 +346,7 @@
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
-	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
+	r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, new_mem);
 out_cleanup:
 	ttm_bo_mem_put(bo, &tmp_mem);
 	return r;
@@ -379,7 +379,7 @@
 	if (unlikely(r)) {
 		return r;
 	}
-	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
+	r = ttm_bo_move_ttm(bo, true, interruptible, no_wait_gpu, &tmp_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index e6abc09..1f78ec2 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -3015,6 +3015,12 @@
 	if (rdev->pdev->device == 0x6811 &&
 	    rdev->pdev->revision == 0x81)
 		max_mclk = 120000;
+	/* limit sclk/mclk on Jet parts for stability */
+	if (rdev->pdev->device == 0x6665 &&
+	    rdev->pdev->revision == 0xc3) {
+		max_sclk = 75000;
+		max_mclk = 80000;
+	}
 
 	if (rps->vce_active) {
 		rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
index 4de3ff0..e03004f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
@@ -125,6 +125,7 @@
 
 	/* Link drm_bridge to encoder */
 	bridge->encoder = encoder;
+	encoder->bridge = bridge;
 
 	ret = drm_bridge_attach(rcdu->ddev, bridge);
 	if (ret) {
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 3d228ad..3dea121 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -840,6 +840,21 @@
 	.destroy = tegra_output_encoder_destroy,
 };
 
+static void tegra_dsi_unprepare(struct tegra_dsi *dsi)
+{
+	int err;
+
+	if (dsi->slave)
+		tegra_dsi_unprepare(dsi->slave);
+
+	err = tegra_mipi_disable(dsi->mipi);
+	if (err < 0)
+		dev_err(dsi->dev, "failed to disable MIPI calibration: %d\n",
+			err);
+
+	pm_runtime_put(dsi->dev);
+}
+
 static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 {
 	struct tegra_output *output = encoder_to_output(encoder);
@@ -876,7 +891,26 @@
 
 	tegra_dsi_disable(dsi);
 
-	pm_runtime_put(dsi->dev);
+	tegra_dsi_unprepare(dsi);
+}
+
+static void tegra_dsi_prepare(struct tegra_dsi *dsi)
+{
+	int err;
+
+	pm_runtime_get_sync(dsi->dev);
+
+	err = tegra_mipi_enable(dsi->mipi);
+	if (err < 0)
+		dev_err(dsi->dev, "failed to enable MIPI calibration: %d\n",
+			err);
+
+	err = tegra_dsi_pad_calibrate(dsi);
+	if (err < 0)
+		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
+
+	if (dsi->slave)
+		tegra_dsi_prepare(dsi->slave);
 }
 
 static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
@@ -887,13 +921,8 @@
 	struct tegra_dsi *dsi = to_dsi(output);
 	struct tegra_dsi_state *state;
 	u32 value;
-	int err;
 
-	pm_runtime_get_sync(dsi->dev);
-
-	err = tegra_dsi_pad_calibrate(dsi);
-	if (err < 0)
-		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
+	tegra_dsi_prepare(dsi);
 
 	state = tegra_dsi_get_state(dsi);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 4054d80..42c074a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -354,7 +354,8 @@
 
 	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
-		ret = ttm_bo_move_ttm(bo, evict, no_wait_gpu, mem);
+		ret = ttm_bo_move_ttm(bo, evict, interruptible, no_wait_gpu,
+				      mem);
 	else if (bdev->driver->move)
 		ret = bdev->driver->move(bo, evict, interruptible,
 					 no_wait_gpu, mem);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 2df602a..f157a9e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -45,7 +45,7 @@
 }
 
 int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-		    bool evict,
+		    bool evict, bool interruptible,
 		    bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
 	struct ttm_tt *ttm = bo->ttm;
@@ -53,6 +53,14 @@
 	int ret;
 
 	if (old_mem->mem_type != TTM_PL_SYSTEM) {
+		ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+
+		if (unlikely(ret != 0)) {
+			if (ret != -ERESTARTSYS)
+				pr_err("Failed to expire sync object before unbinding TTM\n");
+			return ret;
+		}
+
 		ttm_tt_unbind(ttm);
 		ttm_bo_free_old_node(bo);
 		ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM,
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index d5df555..611b6b9 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -122,7 +122,7 @@
 		return 0;
 	cmd = urb->transfer_buffer;
 
-	for (i = y; i < height ; i++) {
+	for (i = y; i < y + height ; i++) {
 		const int line_offset = fb->base.pitches[0] * i;
 		const int byte_offset = line_offset + (x * bpp);
 		const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp);
@@ -203,6 +203,7 @@
 
 	ufbdev->fb_count++;
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	if (fb_defio && (info->fbdefio == NULL)) {
 		/* enable defio at last moment if not disabled by client */
 
@@ -218,6 +219,7 @@
 		info->fbdefio = fbdefio;
 		fb_deferred_io_init(info);
 	}
+#endif
 
 	pr_notice("open /dev/fb%d user=%d fb_info=%p count=%d\n",
 		  info->node, user, info, ufbdev->fb_count);
@@ -235,12 +237,14 @@
 
 	ufbdev->fb_count--;
 
+#ifdef CONFIG_DRM_FBDEV_EMULATION
 	if ((ufbdev->fb_count == 0) && (info->fbdefio)) {
 		fb_deferred_io_cleanup(info);
 		kfree(info->fbdefio);
 		info->fbdefio = NULL;
 		info->fbops->fb_mmap = udl_fb_mmap;
 	}
+#endif
 
 	pr_warn("released /dev/fb%d user=%d count=%d\n",
 		info->node, user, ufbdev->fb_count);
diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 59adcf8..3f6704c 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -144,7 +144,7 @@
 	return &vc4->bo_cache.size_list[page_index];
 }
 
-void vc4_bo_cache_purge(struct drm_device *dev)
+static void vc4_bo_cache_purge(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 8b42d31..9ecef93 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -57,21 +57,21 @@
 	switch (args->param) {
 	case DRM_VC4_PARAM_V3D_IDENT0:
 		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-		if (ret)
+		if (ret < 0)
 			return ret;
 		args->value = V3D_READ(V3D_IDENT0);
 		pm_runtime_put(&vc4->v3d->pdev->dev);
 		break;
 	case DRM_VC4_PARAM_V3D_IDENT1:
 		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-		if (ret)
+		if (ret < 0)
 			return ret;
 		args->value = V3D_READ(V3D_IDENT1);
 		pm_runtime_put(&vc4->v3d->pdev->dev);
 		break;
 	case DRM_VC4_PARAM_V3D_IDENT2:
 		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
-		if (ret)
+		if (ret < 0)
 			return ret;
 		args->value = V3D_READ(V3D_IDENT2);
 		pm_runtime_put(&vc4->v3d->pdev->dev);
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 489e3de..428e249 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -321,6 +321,15 @@
 				struct vc4_exec_info, head);
 }
 
+static inline struct vc4_exec_info *
+vc4_last_render_job(struct vc4_dev *vc4)
+{
+	if (list_empty(&vc4->render_job_list))
+		return NULL;
+	return list_last_entry(&vc4->render_job_list,
+			       struct vc4_exec_info, head);
+}
+
 /**
  * struct vc4_texture_sample_info - saves the offsets into the UBO for texture
  * setup parameters.
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 6155e8a..b262c5c 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -534,8 +534,8 @@
 		return -EINVAL;
 	}
 
-	exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
-			   GFP_KERNEL);
+	exec->bo = drm_calloc_large(exec->bo_count,
+				    sizeof(struct drm_gem_cma_object *));
 	if (!exec->bo) {
 		DRM_ERROR("Failed to allocate validated BO pointers\n");
 		return -ENOMEM;
@@ -572,8 +572,8 @@
 	spin_unlock(&file_priv->table_lock);
 
 fail:
-	kfree(handles);
-	return 0;
+	drm_free_large(handles);
+	return ret;
 }
 
 static int
@@ -608,7 +608,7 @@
 	 * read the contents back for validation, and I think the
 	 * bo->vaddr is uncached access.
 	 */
-	temp = kmalloc(temp_size, GFP_KERNEL);
+	temp = drm_malloc_ab(temp_size, 1);
 	if (!temp) {
 		DRM_ERROR("Failed to allocate storage for copying "
 			  "in bin/render CLs.\n");
@@ -675,7 +675,7 @@
 	ret = vc4_validate_shader_recs(dev, exec);
 
 fail:
-	kfree(temp);
+	drm_free_large(temp);
 	return ret;
 }
 
@@ -688,7 +688,7 @@
 	if (exec->bo) {
 		for (i = 0; i < exec->bo_count; i++)
 			drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
-		kfree(exec->bo);
+		drm_free_large(exec->bo);
 	}
 
 	while (!list_empty(&exec->unref_list)) {
@@ -942,8 +942,8 @@
 		vc4->overflow_mem = NULL;
 	}
 
-	vc4_bo_cache_destroy(dev);
-
 	if (vc4->hang_state)
 		vc4_free_hang_state(dev, vc4->hang_state);
+
+	vc4_bo_cache_destroy(dev);
 }
diff --git a/drivers/gpu/drm/vc4/vc4_irq.c b/drivers/gpu/drm/vc4/vc4_irq.c
index b0104a34..094bc6a 100644
--- a/drivers/gpu/drm/vc4/vc4_irq.c
+++ b/drivers/gpu/drm/vc4/vc4_irq.c
@@ -83,8 +83,10 @@
 
 		spin_lock_irqsave(&vc4->job_lock, irqflags);
 		current_exec = vc4_first_bin_job(vc4);
+		if (!current_exec)
+			current_exec = vc4_last_render_job(vc4);
 		if (current_exec) {
-			vc4->overflow_mem->seqno = vc4->finished_seqno + 1;
+			vc4->overflow_mem->seqno = current_exec->seqno;
 			list_add_tail(&vc4->overflow_mem->unref_head,
 				      &current_exec->unref_list);
 			vc4->overflow_mem = NULL;
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
index 46527e9..2543cf5 100644
--- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -309,8 +309,14 @@
 	 * of uniforms on each side.  However, this scheme is easy to
 	 * validate so it's all we allow for now.
 	 */
-
-	if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
+	switch (QPU_GET_FIELD(inst, QPU_SIG)) {
+	case QPU_SIG_NONE:
+	case QPU_SIG_SCOREBOARD_UNLOCK:
+	case QPU_SIG_COLOR_LOAD:
+	case QPU_SIG_LOAD_TMU0:
+	case QPU_SIG_LOAD_TMU1:
+		break;
+	default:
 		DRM_ERROR("uniforms address change must be "
 			  "normal math\n");
 		return false;
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
index 52a6fd2..e00809d 100644
--- a/drivers/gpu/host1x/mipi.c
+++ b/drivers/gpu/host1x/mipi.c
@@ -242,20 +242,6 @@
 	dev->pads = args.args[0];
 	dev->device = device;
 
-	mutex_lock(&dev->mipi->lock);
-
-	if (dev->mipi->usage_count++ == 0) {
-		err = tegra_mipi_power_up(dev->mipi);
-		if (err < 0) {
-			dev_err(dev->mipi->dev,
-				"failed to power up MIPI bricks: %d\n",
-				err);
-			return ERR_PTR(err);
-		}
-	}
-
-	mutex_unlock(&dev->mipi->lock);
-
 	return dev;
 
 put:
@@ -270,30 +256,43 @@
 
 void tegra_mipi_free(struct tegra_mipi_device *device)
 {
-	int err;
-
-	mutex_lock(&device->mipi->lock);
-
-	if (--device->mipi->usage_count == 0) {
-		err = tegra_mipi_power_down(device->mipi);
-		if (err < 0) {
-			/*
-			 * Not much that can be done here, so an error message
-			 * will have to do.
-			 */
-			dev_err(device->mipi->dev,
-				"failed to power down MIPI bricks: %d\n",
-				err);
-		}
-	}
-
-	mutex_unlock(&device->mipi->lock);
-
 	platform_device_put(device->pdev);
 	kfree(device);
 }
 EXPORT_SYMBOL(tegra_mipi_free);
 
+int tegra_mipi_enable(struct tegra_mipi_device *dev)
+{
+	int err = 0;
+
+	mutex_lock(&dev->mipi->lock);
+
+	if (dev->mipi->usage_count++ == 0)
+		err = tegra_mipi_power_up(dev->mipi);
+
+	mutex_unlock(&dev->mipi->lock);
+
+	return err;
+
+}
+EXPORT_SYMBOL(tegra_mipi_enable);
+
+int tegra_mipi_disable(struct tegra_mipi_device *dev)
+{
+	int err = 0;
+
+	mutex_lock(&dev->mipi->lock);
+
+	if (--dev->mipi->usage_count == 0)
+		err = tegra_mipi_power_down(dev->mipi);
+
+	mutex_unlock(&dev->mipi->lock);
+
+	return err;
+
+}
+EXPORT_SYMBOL(tegra_mipi_disable);
+
 static int tegra_mipi_wait(struct tegra_mipi *mipi)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies(250);
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index 99ec3ff..7f8ff39 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -779,19 +779,8 @@
 	.minor		= UHID_MINOR,
 	.name		= UHID_NAME,
 };
+module_misc_device(uhid_misc);
 
-static int __init uhid_init(void)
-{
-	return misc_register(&uhid_misc);
-}
-
-static void __exit uhid_exit(void)
-{
-	misc_deregister(&uhid_misc);
-}
-
-module_init(uhid_init);
-module_exit(uhid_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>");
 MODULE_DESCRIPTION("User-space I/O driver support for HID subsystem");
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56dd261..16f91c8 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -43,7 +43,12 @@
 {
 	struct hv_monitor_page *monitorpage;
 
-	if (channel->offermsg.monitor_allocated) {
+	/*
+	 * For channels marked as in "low latency" mode
+	 * bypass the monitor page mechanism.
+	 */
+	if ((channel->offermsg.monitor_allocated) &&
+	    (!channel->low_latency)) {
 		/* Each u32 represents 32 channels */
 		sync_set_bit(channel->offermsg.child_relid & 31,
 			(unsigned long *) vmbus_connection.send_int_page +
@@ -70,12 +75,14 @@
 {
 	struct vmbus_channel_open_channel *open_msg;
 	struct vmbus_channel_msginfo *open_info = NULL;
-	void *in, *out;
 	unsigned long flags;
 	int ret, err = 0;
-	unsigned long t;
 	struct page *page;
 
+	if (send_ringbuffer_size % PAGE_SIZE ||
+	    recv_ringbuffer_size % PAGE_SIZE)
+		return -EINVAL;
+
 	spin_lock_irqsave(&newchannel->lock, flags);
 	if (newchannel->state == CHANNEL_OPEN_STATE) {
 		newchannel->state = CHANNEL_OPENING_STATE;
@@ -95,36 +102,33 @@
 				recv_ringbuffer_size));
 
 	if (!page)
-		out = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
-					       get_order(send_ringbuffer_size +
-					       recv_ringbuffer_size));
-	else
-		out = (void *)page_address(page);
+		page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
+				   get_order(send_ringbuffer_size +
+					     recv_ringbuffer_size));
 
-	if (!out) {
+	if (!page) {
 		err = -ENOMEM;
-		goto error0;
+		goto error_set_chnstate;
 	}
 
-	in = (void *)((unsigned long)out + send_ringbuffer_size);
-
-	newchannel->ringbuffer_pages = out;
+	newchannel->ringbuffer_pages = page_address(page);
 	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
 					   recv_ringbuffer_size) >> PAGE_SHIFT;
 
-	ret = hv_ringbuffer_init(
-		&newchannel->outbound, out, send_ringbuffer_size);
+	ret = hv_ringbuffer_init(&newchannel->outbound, page,
+				 send_ringbuffer_size >> PAGE_SHIFT);
 
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
-	ret = hv_ringbuffer_init(
-		&newchannel->inbound, in, recv_ringbuffer_size);
+	ret = hv_ringbuffer_init(&newchannel->inbound,
+				 &page[send_ringbuffer_size >> PAGE_SHIFT],
+				 recv_ringbuffer_size >> PAGE_SHIFT);
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
 
@@ -132,14 +136,14 @@
 	newchannel->ringbuffer_gpadlhandle = 0;
 
 	ret = vmbus_establish_gpadl(newchannel,
-					 newchannel->outbound.ring_buffer,
-					 send_ringbuffer_size +
-					 recv_ringbuffer_size,
-					 &newchannel->ringbuffer_gpadlhandle);
+				    page_address(page),
+				    send_ringbuffer_size +
+				    recv_ringbuffer_size,
+				    &newchannel->ringbuffer_gpadlhandle);
 
 	if (ret != 0) {
 		err = ret;
-		goto error0;
+		goto error_free_pages;
 	}
 
 	/* Create and init the channel open message */
@@ -148,7 +152,7 @@
 			   GFP_KERNEL);
 	if (!open_info) {
 		err = -ENOMEM;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	init_completion(&open_info->waitevent);
@@ -164,7 +168,7 @@
 
 	if (userdatalen > MAX_USER_DEFINED_BYTES) {
 		err = -EINVAL;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	if (userdatalen)
@@ -180,14 +184,10 @@
 
 	if (ret != 0) {
 		err = ret;
-		goto error1;
+		goto error_clean_msglist;
 	}
 
-	t = wait_for_completion_timeout(&open_info->waitevent, 5*HZ);
-	if (t == 0) {
-		err = -ETIMEDOUT;
-		goto error1;
-	}
+	wait_for_completion(&open_info->waitevent);
 
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&open_info->msglistentry);
@@ -195,25 +195,27 @@
 
 	if (open_info->response.open_result.status) {
 		err = -EAGAIN;
-		goto error_gpadl;
+		goto error_free_gpadl;
 	}
 
 	newchannel->state = CHANNEL_OPENED_STATE;
 	kfree(open_info);
 	return 0;
 
-error1:
+error_clean_msglist:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&open_info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 
-error_gpadl:
+error_free_gpadl:
 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
-
-error0:
-	free_pages((unsigned long)out,
-		get_order(send_ringbuffer_size + recv_ringbuffer_size));
 	kfree(open_info);
+error_free_pages:
+	hv_ringbuffer_cleanup(&newchannel->outbound);
+	hv_ringbuffer_cleanup(&newchannel->inbound);
+	__free_pages(page,
+		     get_order(send_ringbuffer_size + recv_ringbuffer_size));
+error_set_chnstate:
 	newchannel->state = CHANNEL_OPEN_STATE;
 	return err;
 }
@@ -238,8 +240,7 @@
  * create_gpadl_header - Creates a gpadl for the specified buffer
  */
 static int create_gpadl_header(void *kbuffer, u32 size,
-					 struct vmbus_channel_msginfo **msginfo,
-					 u32 *messagecount)
+			       struct vmbus_channel_msginfo **msginfo)
 {
 	int i;
 	int pagecount;
@@ -283,7 +284,6 @@
 			gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys(
 				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
 		*msginfo = msgheader;
-		*messagecount = 1;
 
 		pfnsum = pfncount;
 		pfnleft = pagecount - pfncount;
@@ -323,7 +323,6 @@
 			}
 
 			msgbody->msgsize = msgsize;
-			(*messagecount)++;
 			gpadl_body =
 				(struct vmbus_channel_gpadl_body *)msgbody->msg;
 
@@ -352,6 +351,8 @@
 		msgheader = kzalloc(msgsize, GFP_KERNEL);
 		if (msgheader == NULL)
 			goto nomem;
+
+		INIT_LIST_HEAD(&msgheader->submsglist);
 		msgheader->msgsize = msgsize;
 
 		gpadl_header = (struct vmbus_channel_gpadl_header *)
@@ -366,7 +367,6 @@
 				kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT;
 
 		*msginfo = msgheader;
-		*messagecount = 1;
 	}
 
 	return 0;
@@ -390,8 +390,7 @@
 	struct vmbus_channel_gpadl_header *gpadlmsg;
 	struct vmbus_channel_gpadl_body *gpadl_body;
 	struct vmbus_channel_msginfo *msginfo = NULL;
-	struct vmbus_channel_msginfo *submsginfo;
-	u32 msgcount;
+	struct vmbus_channel_msginfo *submsginfo, *tmp;
 	struct list_head *curr;
 	u32 next_gpadl_handle;
 	unsigned long flags;
@@ -400,7 +399,7 @@
 	next_gpadl_handle =
 		(atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
 
-	ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount);
+	ret = create_gpadl_header(kbuffer, size, &msginfo);
 	if (ret)
 		return ret;
 
@@ -423,24 +422,21 @@
 	if (ret != 0)
 		goto cleanup;
 
-	if (msgcount > 1) {
-		list_for_each(curr, &msginfo->submsglist) {
+	list_for_each(curr, &msginfo->submsglist) {
+		submsginfo = (struct vmbus_channel_msginfo *)curr;
+		gpadl_body =
+			(struct vmbus_channel_gpadl_body *)submsginfo->msg;
 
-			submsginfo = (struct vmbus_channel_msginfo *)curr;
-			gpadl_body =
-			     (struct vmbus_channel_gpadl_body *)submsginfo->msg;
+		gpadl_body->header.msgtype =
+			CHANNELMSG_GPADL_BODY;
+		gpadl_body->gpadl = next_gpadl_handle;
 
-			gpadl_body->header.msgtype =
-				CHANNELMSG_GPADL_BODY;
-			gpadl_body->gpadl = next_gpadl_handle;
+		ret = vmbus_post_msg(gpadl_body,
+				     submsginfo->msgsize -
+				     sizeof(*submsginfo));
+		if (ret != 0)
+			goto cleanup;
 
-			ret = vmbus_post_msg(gpadl_body,
-					       submsginfo->msgsize -
-					       sizeof(*submsginfo));
-			if (ret != 0)
-				goto cleanup;
-
-		}
 	}
 	wait_for_completion(&msginfo->waitevent);
 
@@ -451,6 +447,10 @@
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&msginfo->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
+	list_for_each_entry_safe(submsginfo, tmp, &msginfo->submsglist,
+				 msglistentry) {
+		kfree(submsginfo);
+	}
 
 	kfree(msginfo);
 	return ret;
@@ -512,7 +512,6 @@
 static int vmbus_close_internal(struct vmbus_channel *channel)
 {
 	struct vmbus_channel_close_channel *msg;
-	struct tasklet_struct *tasklet;
 	int ret;
 
 	/*
@@ -524,8 +523,7 @@
 	 * To resolve the race, we can serialize them by disabling the
 	 * tasklet when the latter is running here.
 	 */
-	tasklet = hv_context.event_dpc[channel->target_cpu];
-	tasklet_disable(tasklet);
+	hv_event_tasklet_disable(channel);
 
 	/*
 	 * In case a device driver's probe() fails (e.g.,
@@ -591,7 +589,7 @@
 		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
 
 out:
-	tasklet_enable(tasklet);
+	hv_event_tasklet_enable(channel);
 
 	return ret;
 }
@@ -659,7 +657,7 @@
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs,
-				  &signal, lock);
+				  &signal, lock, channel->signal_policy);
 
 	/*
 	 * Signalling the host is conditional on many factors:
@@ -680,11 +678,6 @@
 	 * mechanism which can hurt the performance otherwise.
 	 */
 
-	if (channel->signal_policy)
-		signal = true;
-	else
-		kick_q = true;
-
 	if (((ret == 0) && kick_q && signal) ||
 	    (ret && !is_hvsock_channel(channel)))
 		vmbus_setevent(channel);
@@ -777,7 +770,7 @@
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock);
+				  &signal, lock, channel->signal_policy);
 
 	/*
 	 * Signalling the host is conditional on many factors:
@@ -795,11 +788,6 @@
 	 * enough condition that it should not matter.
 	 */
 
-	if (channel->signal_policy)
-		signal = true;
-	else
-		kick_q = true;
-
 	if (((ret == 0) && kick_q && signal) || (ret))
 		vmbus_setevent(channel);
 
@@ -861,7 +849,7 @@
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock);
+				  &signal, lock, channel->signal_policy);
 
 	if (ret == 0 && signal)
 		vmbus_setevent(channel);
@@ -926,7 +914,7 @@
 	bufferlist[2].iov_len = (packetlen_aligned - packetlen);
 
 	ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3,
-				  &signal, lock);
+				  &signal, lock, channel->signal_policy);
 
 	if (ret == 0 && signal)
 		vmbus_setevent(channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index b6c1211..96a85cd 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -21,6 +21,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
@@ -138,10 +139,32 @@
 	},
 };
 
-static u16 hv_get_dev_type(const uuid_le *guid)
+static const struct {
+	uuid_le guid;
+} vmbus_unsupported_devs[] = {
+	{ HV_AVMA1_GUID },
+	{ HV_AVMA2_GUID },
+	{ HV_RDV_GUID	},
+};
+
+static bool is_unsupported_vmbus_devs(const uuid_le *guid)
 {
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
+		if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
+			return true;
+	return false;
+}
+
+static u16 hv_get_dev_type(const struct vmbus_channel *channel)
+{
+	const uuid_le *guid = &channel->offermsg.offer.if_type;
 	u16 i;
 
+	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
+		return HV_UNKOWN;
+
 	for (i = HV_IDE; i < HV_UNKOWN; i++) {
 		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
 			return i;
@@ -251,14 +274,12 @@
  */
 static struct vmbus_channel *alloc_channel(void)
 {
-	static atomic_t chan_num = ATOMIC_INIT(0);
 	struct vmbus_channel *channel;
 
 	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
 	if (!channel)
 		return NULL;
 
-	channel->id = atomic_inc_return(&chan_num);
 	channel->acquire_ring_lock = true;
 	spin_lock_init(&channel->inbound_lock);
 	spin_lock_init(&channel->lock);
@@ -303,16 +324,32 @@
 	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
 }
 
+void hv_event_tasklet_disable(struct vmbus_channel *channel)
+{
+	struct tasklet_struct *tasklet;
+	tasklet = hv_context.event_dpc[channel->target_cpu];
+	tasklet_disable(tasklet);
+}
+
+void hv_event_tasklet_enable(struct vmbus_channel *channel)
+{
+	struct tasklet_struct *tasklet;
+	tasklet = hv_context.event_dpc[channel->target_cpu];
+	tasklet_enable(tasklet);
+
+	/* In case there is any pending event */
+	tasklet_schedule(tasklet);
+}
+
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
 {
 	unsigned long flags;
 	struct vmbus_channel *primary_channel;
 
-	vmbus_release_relid(relid);
-
 	BUG_ON(!channel->rescind);
 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
 
+	hv_event_tasklet_disable(channel);
 	if (channel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(channel->target_cpu,
@@ -321,6 +358,7 @@
 		percpu_channel_deq(channel);
 		put_cpu();
 	}
+	hv_event_tasklet_enable(channel);
 
 	if (channel->primary_channel == NULL) {
 		list_del(&channel->listentry);
@@ -338,8 +376,11 @@
 	 * We need to free the bit for init_vp_index() to work in the case
 	 * of sub-channel, when we reload drivers like hv_netvsc.
 	 */
-	cpumask_clear_cpu(channel->target_cpu,
-			  &primary_channel->alloced_cpus_in_node);
+	if (channel->affinity_policy == HV_LOCALIZED)
+		cpumask_clear_cpu(channel->target_cpu,
+				  &primary_channel->alloced_cpus_in_node);
+
+	vmbus_release_relid(relid);
 
 	free_channel(channel);
 }
@@ -405,10 +446,13 @@
 			goto err_free_chan;
 	}
 
-	dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);
+	dev_type = hv_get_dev_type(newchannel);
+	if (dev_type == HV_NIC)
+		set_channel_signal_state(newchannel, HV_SIGNAL_POLICY_EXPLICIT);
 
 	init_vp_index(newchannel, dev_type);
 
+	hv_event_tasklet_disable(newchannel);
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
@@ -418,6 +462,7 @@
 		percpu_channel_enq(newchannel);
 		put_cpu();
 	}
+	hv_event_tasklet_enable(newchannel);
 
 	/*
 	 * This state is used to indicate a successful open
@@ -463,12 +508,11 @@
 	return;
 
 err_deq_chan:
-	vmbus_release_relid(newchannel->offermsg.child_relid);
-
 	mutex_lock(&vmbus_connection.channel_mutex);
 	list_del(&newchannel->listentry);
 	mutex_unlock(&vmbus_connection.channel_mutex);
 
+	hv_event_tasklet_disable(newchannel);
 	if (newchannel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(newchannel->target_cpu,
@@ -477,6 +521,9 @@
 		percpu_channel_deq(newchannel);
 		put_cpu();
 	}
+	hv_event_tasklet_enable(newchannel);
+
+	vmbus_release_relid(newchannel->offermsg.child_relid);
 
 err_free_chan:
 	free_channel(newchannel);
@@ -522,17 +569,17 @@
 	}
 
 	/*
-	 * We distribute primary channels evenly across all the available
-	 * NUMA nodes and within the assigned NUMA node we will assign the
-	 * first available CPU to the primary channel.
-	 * The sub-channels will be assigned to the CPUs available in the
-	 * NUMA node evenly.
+	 * Based on the channel affinity policy, we will assign the NUMA
+	 * nodes.
 	 */
-	if (!primary) {
+
+	if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
 		while (true) {
 			next_node = next_numa_node_id++;
-			if (next_node == nr_node_ids)
+			if (next_node == nr_node_ids) {
 				next_node = next_numa_node_id = 0;
+				continue;
+			}
 			if (cpumask_empty(cpumask_of_node(next_node)))
 				continue;
 			break;
@@ -556,15 +603,17 @@
 
 	cur_cpu = -1;
 
-	/*
-	 * Normally Hyper-V host doesn't create more subchannels than there
-	 * are VCPUs on the node but it is possible when not all present VCPUs
-	 * on the node are initialized by guest. Clear the alloced_cpus_in_node
-	 * to start over.
-	 */
-	if (cpumask_equal(&primary->alloced_cpus_in_node,
-			  cpumask_of_node(primary->numa_node)))
-		cpumask_clear(&primary->alloced_cpus_in_node);
+	if (primary->affinity_policy == HV_LOCALIZED) {
+		/*
+		 * Normally Hyper-V host doesn't create more subchannels
+		 * than there are VCPUs on the node but it is possible when not
+		 * all present VCPUs on the node are initialized by guest.
+		 * Clear the alloced_cpus_in_node to start over.
+		 */
+		if (cpumask_equal(&primary->alloced_cpus_in_node,
+				  cpumask_of_node(primary->numa_node)))
+			cpumask_clear(&primary->alloced_cpus_in_node);
+	}
 
 	while (true) {
 		cur_cpu = cpumask_next(cur_cpu, &available_mask);
@@ -575,17 +624,24 @@
 			continue;
 		}
 
-		/*
-		 * NOTE: in the case of sub-channel, we clear the sub-channel
-		 * related bit(s) in primary->alloced_cpus_in_node in
-		 * hv_process_channel_removal(), so when we reload drivers
-		 * like hv_netvsc in SMP guest, here we're able to re-allocate
-		 * bit from primary->alloced_cpus_in_node.
-		 */
-		if (!cpumask_test_cpu(cur_cpu,
-				&primary->alloced_cpus_in_node)) {
-			cpumask_set_cpu(cur_cpu,
-					&primary->alloced_cpus_in_node);
+		if (primary->affinity_policy == HV_LOCALIZED) {
+			/*
+			 * NOTE: in the case of sub-channel, we clear the
+			 * sub-channel related bit(s) in
+			 * primary->alloced_cpus_in_node in
+			 * hv_process_channel_removal(), so when we
+			 * reload drivers like hv_netvsc in SMP guest, here
+			 * we're able to re-allocate
+			 * bit from primary->alloced_cpus_in_node.
+			 */
+			if (!cpumask_test_cpu(cur_cpu,
+					      &primary->alloced_cpus_in_node)) {
+				cpumask_set_cpu(cur_cpu,
+						&primary->alloced_cpus_in_node);
+				cpumask_set_cpu(cur_cpu, alloced_mask);
+				break;
+			}
+		} else {
 			cpumask_set_cpu(cur_cpu, alloced_mask);
 			break;
 		}
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index fcf8a02..78e6368 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -439,7 +439,7 @@
 	union hv_connection_id conn_id;
 	int ret = 0;
 	int retries = 0;
-	u32 msec = 1;
+	u32 usec = 1;
 
 	conn_id.asu32 = 0;
 	conn_id.u.id = VMBUS_MESSAGE_CONNECTION_ID;
@@ -472,9 +472,9 @@
 		}
 
 		retries++;
-		msleep(msec);
-		if (msec < 2048)
-			msec *= 2;
+		udelay(usec);
+		if (usec < 2048)
+			usec *= 2;
 	}
 	return ret;
 }
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index a1c086b..60dbd6c 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -278,7 +278,7 @@
  *
  * This routine is called normally during driver unloading or exiting.
  */
-void hv_cleanup(void)
+void hv_cleanup(bool crash)
 {
 	union hv_x64_msr_hypercall_contents hypercall_msr;
 
@@ -288,7 +288,8 @@
 	if (hv_context.hypercall_page) {
 		hypercall_msr.as_uint64 = 0;
 		wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
-		vfree(hv_context.hypercall_page);
+		if (!crash)
+			vfree(hv_context.hypercall_page);
 		hv_context.hypercall_page = NULL;
 	}
 
@@ -308,7 +309,8 @@
 
 		hypercall_msr.as_uint64 = 0;
 		wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64);
-		vfree(hv_context.tsc_page);
+		if (!crash)
+			vfree(hv_context.tsc_page);
 		hv_context.tsc_page = NULL;
 	}
 #endif
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index df35fb7..fdf8da9 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -430,16 +430,27 @@
  * currently hot added. We hot add in multiples of 128M
  * chunks; it is possible that we may not be able to bring
  * online all the pages in the region. The range
- * covered_end_pfn defines the pages that can
+ * covered_start_pfn:covered_end_pfn defines the pages that can
  * be brough online.
  */
 
 struct hv_hotadd_state {
 	struct list_head list;
 	unsigned long start_pfn;
+	unsigned long covered_start_pfn;
 	unsigned long covered_end_pfn;
 	unsigned long ha_end_pfn;
 	unsigned long end_pfn;
+	/*
+	 * A list of gaps.
+	 */
+	struct list_head gap_list;
+};
+
+struct hv_hotadd_gap {
+	struct list_head list;
+	unsigned long start_pfn;
+	unsigned long end_pfn;
 };
 
 struct balloon_state {
@@ -536,7 +547,11 @@
 	 */
 	struct task_struct *thread;
 
-	struct mutex ha_region_mutex;
+	/*
+	 * Protects ha_region_list, num_pages_onlined counter and individual
+	 * regions from ha_region_list.
+	 */
+	spinlock_t ha_lock;
 
 	/*
 	 * A list of hot-add regions.
@@ -560,18 +575,14 @@
 			      void *v)
 {
 	struct memory_notify *mem = (struct memory_notify *)v;
+	unsigned long flags;
 
 	switch (val) {
-	case MEM_GOING_ONLINE:
-		mutex_lock(&dm_device.ha_region_mutex);
-		break;
-
 	case MEM_ONLINE:
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
 		dm_device.num_pages_onlined += mem->nr_pages;
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 	case MEM_CANCEL_ONLINE:
-		if (val == MEM_ONLINE ||
-		    mutex_is_locked(&dm_device.ha_region_mutex))
-			mutex_unlock(&dm_device.ha_region_mutex);
 		if (dm_device.ha_waiting) {
 			dm_device.ha_waiting = false;
 			complete(&dm_device.ol_waitevent);
@@ -579,10 +590,11 @@
 		break;
 
 	case MEM_OFFLINE:
-		mutex_lock(&dm_device.ha_region_mutex);
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
 		dm_device.num_pages_onlined -= mem->nr_pages;
-		mutex_unlock(&dm_device.ha_region_mutex);
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 		break;
+	case MEM_GOING_ONLINE:
 	case MEM_GOING_OFFLINE:
 	case MEM_CANCEL_OFFLINE:
 		break;
@@ -595,18 +607,46 @@
 	.priority = 0
 };
 
+/* Check if the particular page is backed and can be onlined and online it. */
+static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg)
+{
+	unsigned long cur_start_pgp;
+	unsigned long cur_end_pgp;
+	struct hv_hotadd_gap *gap;
 
-static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size)
+	cur_start_pgp = (unsigned long)pfn_to_page(has->covered_start_pfn);
+	cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+
+	/* The page is not backed. */
+	if (((unsigned long)pg < cur_start_pgp) ||
+	    ((unsigned long)pg >= cur_end_pgp))
+		return;
+
+	/* Check for gaps. */
+	list_for_each_entry(gap, &has->gap_list, list) {
+		cur_start_pgp = (unsigned long)
+			pfn_to_page(gap->start_pfn);
+		cur_end_pgp = (unsigned long)
+			pfn_to_page(gap->end_pfn);
+		if (((unsigned long)pg >= cur_start_pgp) &&
+		    ((unsigned long)pg < cur_end_pgp)) {
+			return;
+		}
+	}
+
+	/* This frame is currently backed; online the page. */
+	__online_page_set_limits(pg);
+	__online_page_increment_counters(pg);
+	__online_page_free(pg);
+}
+
+static void hv_bring_pgs_online(struct hv_hotadd_state *has,
+				unsigned long start_pfn, unsigned long size)
 {
 	int i;
 
-	for (i = 0; i < size; i++) {
-		struct page *pg;
-		pg = pfn_to_page(start_pfn + i);
-		__online_page_set_limits(pg);
-		__online_page_increment_counters(pg);
-		__online_page_free(pg);
-	}
+	for (i = 0; i < size; i++)
+		hv_page_online_one(has, pfn_to_page(start_pfn + i));
 }
 
 static void hv_mem_hot_add(unsigned long start, unsigned long size,
@@ -618,9 +658,12 @@
 	unsigned long start_pfn;
 	unsigned long processed_pfn;
 	unsigned long total_pfn = pfn_count;
+	unsigned long flags;
 
 	for (i = 0; i < (size/HA_CHUNK); i++) {
 		start_pfn = start + (i * HA_CHUNK);
+
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
 		has->ha_end_pfn +=  HA_CHUNK;
 
 		if (total_pfn > HA_CHUNK) {
@@ -632,11 +675,11 @@
 		}
 
 		has->covered_end_pfn +=  processed_pfn;
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
 		init_completion(&dm_device.ol_waitevent);
-		dm_device.ha_waiting = true;
+		dm_device.ha_waiting = !memhp_auto_online;
 
-		mutex_unlock(&dm_device.ha_region_mutex);
 		nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
 		ret = add_memory(nid, PFN_PHYS((start_pfn)),
 				(HA_CHUNK << PAGE_SHIFT));
@@ -653,20 +696,23 @@
 				 */
 				do_hot_add = false;
 			}
+			spin_lock_irqsave(&dm_device.ha_lock, flags);
 			has->ha_end_pfn -= HA_CHUNK;
 			has->covered_end_pfn -=  processed_pfn;
-			mutex_lock(&dm_device.ha_region_mutex);
+			spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 			break;
 		}
 
 		/*
-		 * Wait for the memory block to be onlined.
-		 * Since the hot add has succeeded, it is ok to
-		 * proceed even if the pages in the hot added region
-		 * have not been "onlined" within the allowed time.
+		 * Wait for the memory block to be onlined when memory onlining
+		 * is done outside of kernel (memhp_auto_online). Since the hot
+		 * add has succeeded, it is ok to proceed even if the pages in
+		 * the hot added region have not been "onlined" within the
+		 * allowed time.
 		 */
-		wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-		mutex_lock(&dm_device.ha_region_mutex);
+		if (dm_device.ha_waiting)
+			wait_for_completion_timeout(&dm_device.ol_waitevent,
+						    5*HZ);
 		post_status(&dm_device);
 	}
 
@@ -675,47 +721,64 @@
 
 static void hv_online_page(struct page *pg)
 {
-	struct list_head *cur;
 	struct hv_hotadd_state *has;
 	unsigned long cur_start_pgp;
 	unsigned long cur_end_pgp;
+	unsigned long flags;
 
-	list_for_each(cur, &dm_device.ha_region_list) {
-		has = list_entry(cur, struct hv_hotadd_state, list);
-		cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn);
-		cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
+	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	list_for_each_entry(has, &dm_device.ha_region_list, list) {
+		cur_start_pgp = (unsigned long)
+			pfn_to_page(has->start_pfn);
+		cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn);
 
-		if (((unsigned long)pg >= cur_start_pgp) &&
-			((unsigned long)pg < cur_end_pgp)) {
-			/*
-			 * This frame is currently backed; online the
-			 * page.
-			 */
-			__online_page_set_limits(pg);
-			__online_page_increment_counters(pg);
-			__online_page_free(pg);
-		}
+		/* The page belongs to a different HAS. */
+		if (((unsigned long)pg < cur_start_pgp) ||
+		    ((unsigned long)pg >= cur_end_pgp))
+			continue;
+
+		hv_page_online_one(has, pg);
+		break;
 	}
+	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 }
 
-static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
+static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
 {
-	struct list_head *cur;
 	struct hv_hotadd_state *has;
+	struct hv_hotadd_gap *gap;
 	unsigned long residual, new_inc;
+	int ret = 0;
+	unsigned long flags;
 
-	if (list_empty(&dm_device.ha_region_list))
-		return false;
-
-	list_for_each(cur, &dm_device.ha_region_list) {
-		has = list_entry(cur, struct hv_hotadd_state, list);
-
+	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	list_for_each_entry(has, &dm_device.ha_region_list, list) {
 		/*
 		 * If the pfn range we are dealing with is not in the current
 		 * "hot add block", move on.
 		 */
 		if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn)
 			continue;
+
+		/*
+		 * If the current start pfn is not where the covered_end
+		 * is, create a gap and update covered_end_pfn.
+		 */
+		if (has->covered_end_pfn != start_pfn) {
+			gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
+			if (!gap) {
+				ret = -ENOMEM;
+				break;
+			}
+
+			INIT_LIST_HEAD(&gap->list);
+			gap->start_pfn = has->covered_end_pfn;
+			gap->end_pfn = start_pfn;
+			list_add_tail(&gap->list, &has->gap_list);
+
+			has->covered_end_pfn = start_pfn;
+		}
+
 		/*
 		 * If the current hot add-request extends beyond
 		 * our current limit; extend it.
@@ -732,19 +795,12 @@
 			has->end_pfn += new_inc;
 		}
 
-		/*
-		 * If the current start pfn is not where the covered_end
-		 * is, update it.
-		 */
-
-		if (has->covered_end_pfn != start_pfn)
-			has->covered_end_pfn = start_pfn;
-
-		return true;
-
+		ret = 1;
+		break;
 	}
+	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
-	return false;
+	return ret;
 }
 
 static unsigned long handle_pg_range(unsigned long pg_start,
@@ -753,17 +809,13 @@
 	unsigned long start_pfn = pg_start;
 	unsigned long pfn_cnt = pg_count;
 	unsigned long size;
-	struct list_head *cur;
 	struct hv_hotadd_state *has;
 	unsigned long pgs_ol = 0;
 	unsigned long old_covered_state;
+	unsigned long res = 0, flags;
 
-	if (list_empty(&dm_device.ha_region_list))
-		return 0;
-
-	list_for_each(cur, &dm_device.ha_region_list) {
-		has = list_entry(cur, struct hv_hotadd_state, list);
-
+	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	list_for_each_entry(has, &dm_device.ha_region_list, list) {
 		/*
 		 * If the pfn range we are dealing with is not in the current
 		 * "hot add block", move on.
@@ -783,6 +835,8 @@
 			if (pgs_ol > pfn_cnt)
 				pgs_ol = pfn_cnt;
 
+			has->covered_end_pfn +=  pgs_ol;
+			pfn_cnt -= pgs_ol;
 			/*
 			 * Check if the corresponding memory block is already
 			 * online by checking its last previously backed page.
@@ -791,10 +845,8 @@
 			 */
 			if (start_pfn > has->start_pfn &&
 			    !PageReserved(pfn_to_page(start_pfn - 1)))
-				hv_bring_pgs_online(start_pfn, pgs_ol);
+				hv_bring_pgs_online(has, start_pfn, pgs_ol);
 
-			has->covered_end_pfn +=  pgs_ol;
-			pfn_cnt -= pgs_ol;
 		}
 
 		if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) {
@@ -813,17 +865,20 @@
 			} else {
 				pfn_cnt = size;
 			}
+			spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 			hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
+			spin_lock_irqsave(&dm_device.ha_lock, flags);
 		}
 		/*
 		 * If we managed to online any pages that were given to us,
 		 * we declare success.
 		 */
-		return has->covered_end_pfn - old_covered_state;
-
+		res = has->covered_end_pfn - old_covered_state;
+		break;
 	}
+	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
-	return 0;
+	return res;
 }
 
 static unsigned long process_hot_add(unsigned long pg_start,
@@ -832,13 +887,20 @@
 					unsigned long rg_size)
 {
 	struct hv_hotadd_state *ha_region = NULL;
+	int covered;
+	unsigned long flags;
 
 	if (pfn_cnt == 0)
 		return 0;
 
-	if (!dm_device.host_specified_ha_region)
-		if (pfn_covered(pg_start, pfn_cnt))
+	if (!dm_device.host_specified_ha_region) {
+		covered = pfn_covered(pg_start, pfn_cnt);
+		if (covered < 0)
+			return 0;
+
+		if (covered)
 			goto do_pg_range;
+	}
 
 	/*
 	 * If the host has specified a hot-add range; deal with it first.
@@ -850,12 +912,17 @@
 			return 0;
 
 		INIT_LIST_HEAD(&ha_region->list);
+		INIT_LIST_HEAD(&ha_region->gap_list);
 
-		list_add_tail(&ha_region->list, &dm_device.ha_region_list);
 		ha_region->start_pfn = rg_start;
 		ha_region->ha_end_pfn = rg_start;
+		ha_region->covered_start_pfn = pg_start;
 		ha_region->covered_end_pfn = pg_start;
 		ha_region->end_pfn = rg_start + rg_size;
+
+		spin_lock_irqsave(&dm_device.ha_lock, flags);
+		list_add_tail(&ha_region->list, &dm_device.ha_region_list);
+		spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 	}
 
 do_pg_range:
@@ -882,7 +949,6 @@
 	resp.hdr.size = sizeof(struct dm_hot_add_response);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-	mutex_lock(&dm_device.ha_region_mutex);
 	pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
 	pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
 
@@ -916,7 +982,6 @@
 						rg_start, rg_sz);
 
 	dm->num_pages_added += resp.page_count;
-	mutex_unlock(&dm_device.ha_region_mutex);
 #endif
 	/*
 	 * The result field of the response structure has the
@@ -1010,7 +1075,6 @@
 static void post_status(struct hv_dynmem_device *dm)
 {
 	struct dm_status status;
-	struct sysinfo val;
 	unsigned long now = jiffies;
 	unsigned long last_post = last_post_time;
 
@@ -1022,7 +1086,6 @@
 	if (!time_after(now, (last_post_time + HZ)))
 		return;
 
-	si_meminfo(&val);
 	memset(&status, 0, sizeof(struct dm_status));
 	status.hdr.type = DM_STATUS_REPORT;
 	status.hdr.size = sizeof(struct dm_status);
@@ -1038,7 +1101,7 @@
 	 * num_pages_onlined) as committed to the host, otherwise it can try
 	 * asking us to balloon them out.
 	 */
-	status.num_avail = val.freeram;
+	status.num_avail = si_mem_available();
 	status.num_committed = vm_memory_committed() +
 		dm->num_pages_ballooned +
 		(dm->num_pages_added > dm->num_pages_onlined ?
@@ -1144,7 +1207,7 @@
 	int ret;
 	bool done = false;
 	int i;
-	struct sysinfo val;
+	long avail_pages;
 	unsigned long floor;
 
 	/* The host balloons pages in 2M granularity. */
@@ -1156,12 +1219,12 @@
 	 */
 	alloc_unit = 512;
 
-	si_meminfo(&val);
+	avail_pages = si_mem_available();
 	floor = compute_balloon_floor();
 
 	/* Refuse to balloon below the floor, keep the 2M granularity. */
-	if (val.freeram < num_pages || val.freeram - num_pages < floor) {
-		num_pages = val.freeram > floor ? (val.freeram - floor) : 0;
+	if (avail_pages < num_pages || avail_pages - num_pages < floor) {
+		num_pages = avail_pages > floor ? (avail_pages - floor) : 0;
 		num_pages -= num_pages % PAGES_IN_2M;
 	}
 
@@ -1172,7 +1235,6 @@
 		bl_resp->hdr.size = sizeof(struct dm_balloon_response);
 		bl_resp->more_pages = 1;
 
-
 		num_pages -= num_ballooned;
 		num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
 						    bl_resp, alloc_unit);
@@ -1461,7 +1523,7 @@
 	init_completion(&dm_device.host_event);
 	init_completion(&dm_device.config_event);
 	INIT_LIST_HEAD(&dm_device.ha_region_list);
-	mutex_init(&dm_device.ha_region_mutex);
+	spin_lock_init(&dm_device.ha_lock);
 	INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
 	INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
 	dm_device.host_specified_ha_region = false;
@@ -1580,8 +1642,9 @@
 static int balloon_remove(struct hv_device *dev)
 {
 	struct hv_dynmem_device *dm = hv_get_drvdata(dev);
-	struct list_head *cur, *tmp;
-	struct hv_hotadd_state *has;
+	struct hv_hotadd_state *has, *tmp;
+	struct hv_hotadd_gap *gap, *tmp_gap;
+	unsigned long flags;
 
 	if (dm->num_pages_ballooned != 0)
 		pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
@@ -1596,11 +1659,16 @@
 	restore_online_page_callback(&hv_online_page);
 	unregister_memory_notifier(&hv_memory_nb);
 #endif
-	list_for_each_safe(cur, tmp, &dm->ha_region_list) {
-		has = list_entry(cur, struct hv_hotadd_state, list);
+	spin_lock_irqsave(&dm_device.ha_lock, flags);
+	list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
+		list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) {
+			list_del(&gap->list);
+			kfree(gap);
+		}
 		list_del(&has->list);
 		kfree(has);
 	}
+	spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
 	return 0;
 }
diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 23c7079..8b2ba98 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -83,6 +83,12 @@
 	hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
 }
 
+static void fcopy_register_done(void)
+{
+	pr_debug("FCP: userspace daemon registered\n");
+	hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
+}
+
 static int fcopy_handle_handshake(u32 version)
 {
 	u32 our_ver = FCOPY_CURRENT_VERSION;
@@ -94,7 +100,8 @@
 		break;
 	case FCOPY_VERSION_1:
 		/* Daemon expects us to reply with our own version */
-		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver),
+		    fcopy_register_done))
 			return -EFAULT;
 		dm_reg_value = version;
 		break;
@@ -107,8 +114,7 @@
 		 */
 		return -EINVAL;
 	}
-	pr_debug("FCP: userspace daemon ver. %d registered\n", version);
-	hv_poll_channel(fcopy_transaction.recv_channel, fcopy_poll_wrapper);
+	pr_debug("FCP: userspace daemon ver. %d connected\n", version);
 	return 0;
 }
 
@@ -161,7 +167,7 @@
 	}
 
 	fcopy_transaction.state = HVUTIL_USERSPACE_REQ;
-	rc = hvutil_transport_send(hvt, out_src, out_len);
+	rc = hvutil_transport_send(hvt, out_src, out_len, NULL);
 	if (rc) {
 		pr_debug("FCP: failed to communicate to the daemon: %d\n", rc);
 		if (cancel_delayed_work_sync(&fcopy_timeout_work)) {
diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c
index cb1a916..5e1fdc8 100644
--- a/drivers/hv/hv_kvp.c
+++ b/drivers/hv/hv_kvp.c
@@ -102,6 +102,17 @@
 	hv_kvp_onchannelcallback(channel);
 }
 
+static void kvp_register_done(void)
+{
+	/*
+	 * If we're still negotiating with the host cancel the timeout
+	 * work to not poll the channel twice.
+	 */
+	pr_debug("KVP: userspace daemon registered\n");
+	cancel_delayed_work_sync(&kvp_host_handshake_work);
+	hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
+}
+
 static void
 kvp_register(int reg_value)
 {
@@ -116,7 +127,8 @@
 		kvp_msg->kvp_hdr.operation = reg_value;
 		strcpy(version, HV_DRV_VERSION);
 
-		hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg));
+		hvutil_transport_send(hvt, kvp_msg, sizeof(*kvp_msg),
+				      kvp_register_done);
 		kfree(kvp_msg);
 	}
 }
@@ -158,17 +170,10 @@
 	/*
 	 * We have a compatible daemon; complete the handshake.
 	 */
-	pr_debug("KVP: userspace daemon ver. %d registered\n",
-		 KVP_OP_REGISTER);
+	pr_debug("KVP: userspace daemon ver. %d connected\n",
+		 msg->kvp_hdr.operation);
 	kvp_register(dm_reg_value);
 
-	/*
-	 * If we're still negotiating with the host cancel the timeout
-	 * work to not poll the channel twice.
-	 */
-	cancel_delayed_work_sync(&kvp_host_handshake_work);
-	hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper);
-
 	return 0;
 }
 
@@ -455,7 +460,7 @@
 	}
 
 	kvp_transaction.state = HVUTIL_USERSPACE_REQ;
-	rc = hvutil_transport_send(hvt, message, sizeof(*message));
+	rc = hvutil_transport_send(hvt, message, sizeof(*message), NULL);
 	if (rc) {
 		pr_debug("KVP: failed to communicate to the daemon: %d\n", rc);
 		if (cancel_delayed_work_sync(&kvp_timeout_work)) {
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 3fba14e..a670713 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -67,11 +67,11 @@
 static __u8 *recv_buffer;
 static struct hvutil_transport *hvt;
 
-static void vss_send_op(struct work_struct *dummy);
 static void vss_timeout_func(struct work_struct *dummy);
+static void vss_handle_request(struct work_struct *dummy);
 
 static DECLARE_DELAYED_WORK(vss_timeout_work, vss_timeout_func);
-static DECLARE_WORK(vss_send_op_work, vss_send_op);
+static DECLARE_WORK(vss_handle_request_work, vss_handle_request);
 
 static void vss_poll_wrapper(void *channel)
 {
@@ -95,6 +95,12 @@
 	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
 }
 
+static void vss_register_done(void)
+{
+	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+	pr_debug("VSS: userspace daemon registered\n");
+}
+
 static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
 {
 	u32 our_ver = VSS_OP_REGISTER1;
@@ -105,16 +111,16 @@
 		dm_reg_value = VSS_OP_REGISTER;
 		break;
 	case VSS_OP_REGISTER1:
-		/* Daemon expects us to reply with our own version*/
-		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+		/* Daemon expects us to reply with our own version */
+		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver),
+					  vss_register_done))
 			return -EFAULT;
 		dm_reg_value = VSS_OP_REGISTER1;
 		break;
 	default:
 		return -EINVAL;
 	}
-	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
-	pr_debug("VSS: userspace daemon ver. %d registered\n", dm_reg_value);
+	pr_debug("VSS: userspace daemon ver. %d connected\n", dm_reg_value);
 	return 0;
 }
 
@@ -136,6 +142,11 @@
 		return vss_handle_handshake(vss_msg);
 	} else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) {
 		vss_transaction.state = HVUTIL_USERSPACE_RECV;
+
+		if (vss_msg->vss_hdr.operation == VSS_OP_HOT_BACKUP)
+			vss_transaction.msg->vss_cf.flags =
+				VSS_HBU_NO_AUTO_RECOVERY;
+
 		if (cancel_delayed_work_sync(&vss_timeout_work)) {
 			vss_respond_to_host(vss_msg->error);
 			/* Transaction is finished, reset the state. */
@@ -150,8 +161,7 @@
 	return 0;
 }
 
-
-static void vss_send_op(struct work_struct *dummy)
+static void vss_send_op(void)
 {
 	int op = vss_transaction.msg->vss_hdr.operation;
 	int rc;
@@ -168,7 +178,10 @@
 	vss_msg->vss_hdr.operation = op;
 
 	vss_transaction.state = HVUTIL_USERSPACE_REQ;
-	rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg));
+
+	schedule_delayed_work(&vss_timeout_work, VSS_USERSPACE_TIMEOUT);
+
+	rc = hvutil_transport_send(hvt, vss_msg, sizeof(*vss_msg), NULL);
 	if (rc) {
 		pr_warn("VSS: failed to communicate to the daemon: %d\n", rc);
 		if (cancel_delayed_work_sync(&vss_timeout_work)) {
@@ -182,6 +195,38 @@
 	return;
 }
 
+static void vss_handle_request(struct work_struct *dummy)
+{
+	switch (vss_transaction.msg->vss_hdr.operation) {
+	/*
+	 * Initiate a "freeze/thaw" operation in the guest.
+	 * We respond to the host once the operation is complete.
+	 *
+	 * We send the message to the user space daemon and the operation is
+	 * performed in the daemon.
+	 */
+	case VSS_OP_THAW:
+	case VSS_OP_FREEZE:
+	case VSS_OP_HOT_BACKUP:
+		if (vss_transaction.state < HVUTIL_READY) {
+			/* Userspace is not registered yet */
+			vss_respond_to_host(HV_E_FAIL);
+			return;
+		}
+		vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
+		vss_send_op();
+		return;
+	case VSS_OP_GET_DM_INFO:
+		vss_transaction.msg->dm_info.flags = 0;
+		break;
+	default:
+		break;
+	}
+
+	vss_respond_to_host(0);
+	hv_poll_channel(vss_transaction.recv_channel, vss_poll_wrapper);
+}
+
 /*
  * Send a response back to the host.
  */
@@ -266,48 +311,8 @@
 			vss_transaction.recv_req_id = requestid;
 			vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
 
-			switch (vss_msg->vss_hdr.operation) {
-				/*
-				 * Initiate a "freeze/thaw"
-				 * operation in the guest.
-				 * We respond to the host once
-				 * the operation is complete.
-				 *
-				 * We send the message to the
-				 * user space daemon and the
-				 * operation is performed in
-				 * the daemon.
-				 */
-			case VSS_OP_FREEZE:
-			case VSS_OP_THAW:
-				if (vss_transaction.state < HVUTIL_READY) {
-					/* Userspace is not registered yet */
-					vss_respond_to_host(HV_E_FAIL);
-					return;
-				}
-				vss_transaction.state = HVUTIL_HOSTMSG_RECEIVED;
-				schedule_work(&vss_send_op_work);
-				schedule_delayed_work(&vss_timeout_work,
-						      VSS_USERSPACE_TIMEOUT);
-				return;
-
-			case VSS_OP_HOT_BACKUP:
-				vss_msg->vss_cf.flags =
-					 VSS_HBU_NO_AUTO_RECOVERY;
-				vss_respond_to_host(0);
-				return;
-
-			case VSS_OP_GET_DM_INFO:
-				vss_msg->dm_info.flags = 0;
-				vss_respond_to_host(0);
-				return;
-
-			default:
-				vss_respond_to_host(0);
-				return;
-
-			}
-
+			schedule_work(&vss_handle_request_work);
+			return;
 		}
 
 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -358,6 +363,6 @@
 {
 	vss_transaction.state = HVUTIL_DEVICE_DYING;
 	cancel_delayed_work_sync(&vss_timeout_work);
-	cancel_work_sync(&vss_send_op_work);
+	cancel_work_sync(&vss_handle_request_work);
 	hvutil_transport_destroy(hvt);
 }
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index d5acaa2..4aa3cb6 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -34,22 +34,25 @@
 #define SD_MINOR	0
 #define SD_VERSION	(SD_MAJOR << 16 | SD_MINOR)
 
-#define SD_WS2008_MAJOR		1
-#define SD_WS2008_VERSION	(SD_WS2008_MAJOR << 16 | SD_MINOR)
+#define SD_MAJOR_1	1
+#define SD_VERSION_1	(SD_MAJOR_1 << 16 | SD_MINOR)
 
-#define TS_MAJOR	3
+#define TS_MAJOR	4
 #define TS_MINOR	0
 #define TS_VERSION	(TS_MAJOR << 16 | TS_MINOR)
 
-#define TS_WS2008_MAJOR		1
-#define TS_WS2008_VERSION	(TS_WS2008_MAJOR << 16 | TS_MINOR)
+#define TS_MAJOR_1	1
+#define TS_VERSION_1	(TS_MAJOR_1 << 16 | TS_MINOR)
+
+#define TS_MAJOR_3	3
+#define TS_VERSION_3	(TS_MAJOR_3 << 16 | TS_MINOR)
 
 #define HB_MAJOR	3
-#define HB_MINOR 0
+#define HB_MINOR	0
 #define HB_VERSION	(HB_MAJOR << 16 | HB_MINOR)
 
-#define HB_WS2008_MAJOR	1
-#define HB_WS2008_VERSION	(HB_WS2008_MAJOR << 16 | HB_MINOR)
+#define HB_MAJOR_1	1
+#define HB_VERSION_1	(HB_MAJOR_1 << 16 | HB_MINOR)
 
 static int sd_srv_version;
 static int ts_srv_version;
@@ -61,9 +64,14 @@
 	.util_cb = shutdown_onchannelcallback,
 };
 
+static int hv_timesync_init(struct hv_util_service *srv);
+static void hv_timesync_deinit(void);
+
 static void timesync_onchannelcallback(void *context);
 static struct hv_util_service util_timesynch = {
 	.util_cb = timesync_onchannelcallback,
+	.util_init = hv_timesync_init,
+	.util_deinit = hv_timesync_deinit,
 };
 
 static void heartbeat_onchannelcallback(void *context);
@@ -161,35 +169,43 @@
 }
 
 /*
- * Set guest time to host UTC time.
- */
-static inline void do_adj_guesttime(u64 hosttime)
-{
-	s64 host_tns;
-	struct timespec host_ts;
-
-	host_tns = (hosttime - WLTIMEDELTA) * 100;
-	host_ts = ns_to_timespec(host_tns);
-
-	do_settimeofday(&host_ts);
-}
-
-/*
  * Set the host time in a process context.
  */
 
 struct adj_time_work {
 	struct work_struct work;
 	u64	host_time;
+	u64	ref_time;
+	u8	flags;
 };
 
 static void hv_set_host_time(struct work_struct *work)
 {
 	struct adj_time_work	*wrk;
+	s64 host_tns;
+	u64 newtime;
+	struct timespec host_ts;
 
 	wrk = container_of(work, struct adj_time_work, work);
-	do_adj_guesttime(wrk->host_time);
-	kfree(wrk);
+
+	newtime = wrk->host_time;
+	if (ts_srv_version > TS_VERSION_3) {
+		/*
+		 * Some latency has been introduced since Hyper-V generated
+		 * its time sample. Take that latency into account before
+		 * using TSC reference time sample from Hyper-V.
+		 *
+		 * This sample is given by TimeSync v4 and above hosts.
+		 */
+		u64 current_tick;
+
+		rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
+		newtime += (current_tick - wrk->ref_time);
+	}
+	host_tns = (newtime - WLTIMEDELTA) * 100;
+	host_ts = ns_to_timespec(host_tns);
+
+	do_settimeofday(&host_ts);
 }
 
 /*
@@ -198,33 +214,31 @@
  * ICTIMESYNCFLAG_SYNC flag bit indicates reboot, restore events of the VM.
  * After reboot the flag ICTIMESYNCFLAG_SYNC is included in the first time
  * message after the timesync channel is opened. Since the hv_utils module is
- * loaded after hv_vmbus, the first message is usually missed. The other
- * thing is, systime is automatically set to emulated hardware clock which may
- * not be UTC time or in the same time zone. So, to override these effects, we
- * use the first 50 time samples for initial system time setting.
+ * loaded after hv_vmbus, the first message is usually missed. This bit is
+ * considered a hard request to discipline the clock.
+ *
+ * ICTIMESYNCFLAG_SAMPLE bit indicates a time sample from host. This is
+ * typically used as a hint to the guest. The guest is under no obligation
+ * to discipline the clock.
  */
-static inline void adj_guesttime(u64 hosttime, u8 flags)
+static struct adj_time_work  wrk;
+static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 flags)
 {
-	struct adj_time_work    *wrk;
-	static s32 scnt = 50;
 
-	wrk = kmalloc(sizeof(struct adj_time_work), GFP_ATOMIC);
-	if (wrk == NULL)
+	/*
+	 * This check is safe since we are executing in the
+	 * interrupt context and time synch messages arre always
+	 * delivered on the same CPU.
+	 */
+	if (work_pending(&wrk.work))
 		return;
 
-	wrk->host_time = hosttime;
-	if ((flags & ICTIMESYNCFLAG_SYNC) != 0) {
-		INIT_WORK(&wrk->work, hv_set_host_time);
-		schedule_work(&wrk->work);
-		return;
+	wrk.host_time = hosttime;
+	wrk.ref_time = reftime;
+	wrk.flags = flags;
+	if ((flags & (ICTIMESYNCFLAG_SYNC | ICTIMESYNCFLAG_SAMPLE)) != 0) {
+		schedule_work(&wrk.work);
 	}
-
-	if ((flags & ICTIMESYNCFLAG_SAMPLE) != 0 && scnt > 0) {
-		scnt--;
-		INIT_WORK(&wrk->work, hv_set_host_time);
-		schedule_work(&wrk->work);
-	} else
-		kfree(wrk);
 }
 
 /*
@@ -237,6 +251,7 @@
 	u64 requestid;
 	struct icmsg_hdr *icmsghdrp;
 	struct ictimesync_data *timedatap;
+	struct ictimesync_ref_data *refdata;
 	u8 *time_txf_buf = util_timesynch.recv_buffer;
 	struct icmsg_negotiate *negop = NULL;
 
@@ -252,11 +267,27 @@
 						time_txf_buf,
 						util_fw_version,
 						ts_srv_version);
+			pr_info("Using TimeSync version %d.%d\n",
+				ts_srv_version >> 16, ts_srv_version & 0xFFFF);
 		} else {
-			timedatap = (struct ictimesync_data *)&time_txf_buf[
-				sizeof(struct vmbuspipe_hdr) +
-				sizeof(struct icmsg_hdr)];
-			adj_guesttime(timedatap->parenttime, timedatap->flags);
+			if (ts_srv_version > TS_VERSION_3) {
+				refdata = (struct ictimesync_ref_data *)
+					&time_txf_buf[
+					sizeof(struct vmbuspipe_hdr) +
+					sizeof(struct icmsg_hdr)];
+
+				adj_guesttime(refdata->parenttime,
+						refdata->vmreferencetime,
+						refdata->flags);
+			} else {
+				timedatap = (struct ictimesync_data *)
+					&time_txf_buf[
+					sizeof(struct vmbuspipe_hdr) +
+					sizeof(struct icmsg_hdr)];
+				adj_guesttime(timedatap->parenttime,
+						0,
+						timedatap->flags);
+			}
 		}
 
 		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
@@ -350,16 +381,21 @@
 	switch (vmbus_proto_version) {
 	case (VERSION_WS2008):
 		util_fw_version = UTIL_WS2K8_FW_VERSION;
-		sd_srv_version = SD_WS2008_VERSION;
-		ts_srv_version = TS_WS2008_VERSION;
-		hb_srv_version = HB_WS2008_VERSION;
+		sd_srv_version = SD_VERSION_1;
+		ts_srv_version = TS_VERSION_1;
+		hb_srv_version = HB_VERSION_1;
 		break;
-
-	default:
+	case(VERSION_WIN10):
 		util_fw_version = UTIL_FW_VERSION;
 		sd_srv_version = SD_VERSION;
 		ts_srv_version = TS_VERSION;
 		hb_srv_version = HB_VERSION;
+		break;
+	default:
+		util_fw_version = UTIL_FW_VERSION;
+		sd_srv_version = SD_VERSION;
+		ts_srv_version = TS_VERSION_3;
+		hb_srv_version = HB_VERSION;
 	}
 
 	ret = vmbus_open(dev->channel, 4 * PAGE_SIZE, 4 * PAGE_SIZE, NULL, 0,
@@ -427,6 +463,17 @@
 	.remove =  util_remove,
 };
 
+static int hv_timesync_init(struct hv_util_service *srv)
+{
+	INIT_WORK(&wrk.work, hv_set_host_time);
+	return 0;
+}
+
+static void hv_timesync_deinit(void)
+{
+	cancel_work_sync(&wrk.work);
+}
+
 static int __init init_hyperv_utils(void)
 {
 	pr_info("Registering HyperV Utility Driver\n");
diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c
index 9a9983f..c235a95 100644
--- a/drivers/hv/hv_utils_transport.c
+++ b/drivers/hv/hv_utils_transport.c
@@ -72,6 +72,10 @@
 	hvt->outmsg = NULL;
 	hvt->outmsg_len = 0;
 
+	if (hvt->on_read)
+		hvt->on_read();
+	hvt->on_read = NULL;
+
 out_unlock:
 	mutex_unlock(&hvt->lock);
 	return ret;
@@ -219,7 +223,8 @@
 	mutex_unlock(&hvt->lock);
 }
 
-int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len)
+int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len,
+			  void (*on_read_cb)(void))
 {
 	struct cn_msg *cn_msg;
 	int ret = 0;
@@ -237,6 +242,13 @@
 		memcpy(cn_msg->data, msg, len);
 		ret = cn_netlink_send(cn_msg, 0, 0, GFP_ATOMIC);
 		kfree(cn_msg);
+		/*
+		 * We don't know when netlink messages are delivered but unlike
+		 * in CHARDEV mode we're not blocked and we can send next
+		 * messages right away.
+		 */
+		if (on_read_cb)
+			on_read_cb();
 		return ret;
 	}
 	/* HVUTIL_TRANSPORT_CHARDEV */
@@ -255,6 +267,7 @@
 	if (hvt->outmsg) {
 		memcpy(hvt->outmsg, msg, len);
 		hvt->outmsg_len = len;
+		hvt->on_read = on_read_cb;
 		wake_up_interruptible(&hvt->outmsg_q);
 	} else
 		ret = -ENOMEM;
diff --git a/drivers/hv/hv_utils_transport.h b/drivers/hv/hv_utils_transport.h
index 06254a1..d98f522 100644
--- a/drivers/hv/hv_utils_transport.h
+++ b/drivers/hv/hv_utils_transport.h
@@ -36,6 +36,7 @@
 	struct list_head list;              /* hvt_list */
 	int (*on_msg)(void *, int);         /* callback on new user message */
 	void (*on_reset)(void);             /* callback when userspace drops */
+	void (*on_read)(void);              /* callback on message read */
 	u8 *outmsg;                         /* message to the userspace */
 	int outmsg_len;                     /* its length */
 	wait_queue_head_t outmsg_q;         /* poll/read wait queue */
@@ -46,7 +47,8 @@
 					       u32 cn_idx, u32 cn_val,
 					       int (*on_msg)(void *, int),
 					       void (*on_reset)(void));
-int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len);
+int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len,
+			  void (*on_read_cb)(void));
 void hvutil_transport_destroy(struct hvutil_transport *hvt);
 
 #endif /* _HV_UTILS_TRANSPORT_H */
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 718b5c7..a5b4442 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -495,7 +495,7 @@
 
 extern int hv_init(void);
 
-extern void hv_cleanup(void);
+extern void hv_cleanup(bool crash);
 
 extern int hv_post_message(union hv_connection_id connection_id,
 			 enum hv_message_type message_type,
@@ -522,14 +522,15 @@
 /* Interface */
 
 
-int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, void *buffer,
-		   u32 buflen);
+int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
+		       struct page *pages, u32 pagecnt);
 
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info);
 
 int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info,
 		    struct kvec *kv_list,
-		    u32 kv_count, bool *signal, bool lock);
+		    u32 kv_count, bool *signal, bool lock,
+		    enum hv_signal_policy policy);
 
 int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info,
 		       void *buffer, u32 buflen, u32 *buffer_actual_len,
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index fe586bf..08043da 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -27,6 +27,8 @@
 #include <linux/mm.h>
 #include <linux/hyperv.h>
 #include <linux/uio.h>
+#include <linux/vmalloc.h>
+#include <linux/slab.h>
 
 #include "hyperv_vmbus.h"
 
@@ -66,12 +68,20 @@
  *	   arrived.
  */
 
-static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi)
+static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi,
+			      enum hv_signal_policy policy)
 {
 	virt_mb();
 	if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
 		return false;
 
+	/*
+	 * When the client wants to control signaling,
+	 * we only honour the host interrupt mask.
+	 */
+	if (policy == HV_SIGNAL_POLICY_EXPLICIT)
+		return true;
+
 	/* check interrupt_mask before read_index */
 	virt_rmb();
 	/*
@@ -162,18 +172,7 @@
 	void *ring_buffer = hv_get_ring_buffer(ring_info);
 	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
 
-	u32 frag_len;
-
-	/* wrap-around detected at the src */
-	if (destlen > ring_buffer_size - start_read_offset) {
-		frag_len = ring_buffer_size - start_read_offset;
-
-		memcpy(dest, ring_buffer + start_read_offset, frag_len);
-		memcpy(dest + frag_len, ring_buffer, destlen - frag_len);
-	} else
-
-		memcpy(dest, ring_buffer + start_read_offset, destlen);
-
+	memcpy(dest, ring_buffer + start_read_offset, destlen);
 
 	start_read_offset += destlen;
 	start_read_offset %= ring_buffer_size;
@@ -194,15 +193,8 @@
 {
 	void *ring_buffer = hv_get_ring_buffer(ring_info);
 	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
-	u32 frag_len;
 
-	/* wrap-around detected! */
-	if (srclen > ring_buffer_size - start_write_offset) {
-		frag_len = ring_buffer_size - start_write_offset;
-		memcpy(ring_buffer + start_write_offset, src, frag_len);
-		memcpy(ring_buffer, src + frag_len, srclen - frag_len);
-	} else
-		memcpy(ring_buffer + start_write_offset, src, srclen);
+	memcpy(ring_buffer + start_write_offset, src, srclen);
 
 	start_write_offset += srclen;
 	start_write_offset %= ring_buffer_size;
@@ -235,22 +227,46 @@
 
 /* Initialize the ring buffer. */
 int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
-		   void *buffer, u32 buflen)
+		       struct page *pages, u32 page_cnt)
 {
-	if (sizeof(struct hv_ring_buffer) != PAGE_SIZE)
-		return -EINVAL;
+	int i;
+	struct page **pages_wraparound;
+
+	BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
 
 	memset(ring_info, 0, sizeof(struct hv_ring_buffer_info));
 
-	ring_info->ring_buffer = (struct hv_ring_buffer *)buffer;
+	/*
+	 * First page holds struct hv_ring_buffer, do wraparound mapping for
+	 * the rest.
+	 */
+	pages_wraparound = kzalloc(sizeof(struct page *) * (page_cnt * 2 - 1),
+				   GFP_KERNEL);
+	if (!pages_wraparound)
+		return -ENOMEM;
+
+	pages_wraparound[0] = pages;
+	for (i = 0; i < 2 * (page_cnt - 1); i++)
+		pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
+
+	ring_info->ring_buffer = (struct hv_ring_buffer *)
+		vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
+
+	kfree(pages_wraparound);
+
+
+	if (!ring_info->ring_buffer)
+		return -ENOMEM;
+
 	ring_info->ring_buffer->read_index =
 		ring_info->ring_buffer->write_index = 0;
 
 	/* Set the feature bit for enabling flow control. */
 	ring_info->ring_buffer->feature_bits.value = 1;
 
-	ring_info->ring_size = buflen;
-	ring_info->ring_datasize = buflen - sizeof(struct hv_ring_buffer);
+	ring_info->ring_size = page_cnt << PAGE_SHIFT;
+	ring_info->ring_datasize = ring_info->ring_size -
+		sizeof(struct hv_ring_buffer);
 
 	spin_lock_init(&ring_info->ring_lock);
 
@@ -260,11 +276,13 @@
 /* Cleanup the ring buffer. */
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 {
+	vunmap(ring_info->ring_buffer);
 }
 
 /* Write to the ring buffer. */
 int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info,
-		    struct kvec *kv_list, u32 kv_count, bool *signal, bool lock)
+		    struct kvec *kv_list, u32 kv_count, bool *signal, bool lock,
+		    enum hv_signal_policy policy)
 {
 	int i = 0;
 	u32 bytes_avail_towrite;
@@ -326,7 +344,7 @@
 	if (lock)
 		spin_unlock_irqrestore(&outring_info->ring_lock, flags);
 
-	*signal = hv_need_to_signal(old_write, outring_info);
+	*signal = hv_need_to_signal(old_write, outring_info, policy);
 	return 0;
 }
 
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e82f7e1..a259e18 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -105,8 +105,8 @@
 
 static const char *fb_mmio_name = "fb_range";
 static struct resource *fb_mmio;
-struct resource *hyperv_mmio;
-DEFINE_SEMAPHORE(hyperv_mmio_lock);
+static struct resource *hyperv_mmio;
+static DEFINE_SEMAPHORE(hyperv_mmio_lock);
 
 static int vmbus_exists(void)
 {
@@ -874,7 +874,7 @@
 	bus_unregister(&hv_bus);
 
 err_cleanup:
-	hv_cleanup();
+	hv_cleanup(false);
 
 	return ret;
 }
@@ -961,8 +961,8 @@
 {
 	int ret = 0;
 
-	dev_set_name(&child_device_obj->device, "vmbus_%d",
-		     child_device_obj->channel->id);
+	dev_set_name(&child_device_obj->device, "vmbus-%pUl",
+		     child_device_obj->channel->offermsg.offer.if_instance.b);
 
 	child_device_obj->device.bus = &hv_bus;
 	child_device_obj->device.parent = &hv_acpi_dev->dev;
@@ -1326,7 +1326,7 @@
 	vmbus_initiate_unload(false);
 	for_each_online_cpu(cpu)
 		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
-	hv_cleanup();
+	hv_cleanup(false);
 };
 
 static void hv_crash_handler(struct pt_regs *regs)
@@ -1338,7 +1338,7 @@
 	 * for kdump.
 	 */
 	hv_synic_cleanup(NULL);
-	hv_cleanup();
+	hv_cleanup(true);
 };
 
 static int __init hv_acpi_init(void)
@@ -1398,7 +1398,7 @@
 						 &hyperv_panic_block);
 	}
 	bus_unregister(&hv_bus);
-	hv_cleanup();
+	hv_cleanup(false);
 	for_each_online_cpu(cpu) {
 		tasklet_kill(hv_context.event_dpc[cpu]);
 		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index eaf2f91..45cef3d 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -969,7 +969,6 @@
 config SENSORS_LM75
 	tristate "National Semiconductor LM75 and compatibles"
 	depends on I2C
-	depends on THERMAL || !THERMAL_OF
 	select REGMAP_I2C
 	help
 	  If you say yes here you get support for one common type of
@@ -1119,6 +1118,7 @@
 config SENSORS_LM95245
 	tristate "National Semiconductor LM95245 and compatibles"
 	depends on I2C
+	select REGMAP_I2C
 	help
 	  If you say yes here you get support for LM95235 and LM95245
 	  temperature sensor chips.
@@ -1572,7 +1572,6 @@
 config SENSORS_TMP102
 	tristate "Texas Instruments TMP102"
 	depends on I2C
-	depends on THERMAL || !THERMAL_OF
 	select REGMAP_I2C
 	help
 	  If you say yes here you get support for Texas Instruments TMP102
@@ -1823,6 +1822,13 @@
 	  This driver provides support for the Ultra45 workstation environmental
 	  sensors.
 
+config SENSORS_XGENE
+	tristate "APM X-Gene SoC hardware monitoring driver"
+	depends on XGENE_SLIMPRO_MBOX || PCC
+	help
+	  If you say yes here you get support for the temperature
+	  and power sensors for APM X-Gene SoC.
+
 if ACPI
 
 comment "ACPI drivers"
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index fe87d28..aecf4ba 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -165,6 +165,7 @@
 obj-$(CONFIG_SENSORS_W83L786NG)	+= w83l786ng.o
 obj-$(CONFIG_SENSORS_WM831X)	+= wm831x-hwmon.o
 obj-$(CONFIG_SENSORS_WM8350)	+= wm8350-hwmon.o
+obj-$(CONFIG_SENSORS_XGENE)	+= xgene-hwmon.o
 
 obj-$(CONFIG_PMBUS)		+= pmbus/
 
diff --git a/drivers/hwmon/adt7411.c b/drivers/hwmon/adt7411.c
index fc1e65a..812fbc0 100644
--- a/drivers/hwmon/adt7411.c
+++ b/drivers/hwmon/adt7411.c
@@ -7,8 +7,7 @@
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  *
- *  TODO: SPI, support for external temperature sensor
- *	  use power-down mode for suspend?, interrupt handling?
+ *  TODO: SPI, use power-down mode for suspend?, interrupt handling?
  */
 
 #include <linux/kernel.h>
@@ -31,6 +30,7 @@
 #define ADT7411_REG_CFG1			0x18
 #define ADT7411_CFG1_START_MONITOR		(1 << 0)
 #define ADT7411_CFG1_RESERVED_BIT1		(1 << 1)
+#define ADT7411_CFG1_EXT_TDM			(1 << 2)
 #define ADT7411_CFG1_RESERVED_BIT3		(1 << 3)
 
 #define ADT7411_REG_CFG2			0x19
@@ -57,6 +57,7 @@
 	unsigned long next_update;
 	int vref_cached;
 	struct i2c_client *client;
+	bool use_ext_temp;
 };
 
 /*
@@ -127,11 +128,20 @@
 static ssize_t adt7411_show_temp(struct device *dev,
 			struct device_attribute *attr, char *buf)
 {
+	int nr = to_sensor_dev_attr(attr)->index;
 	struct adt7411_data *data = dev_get_drvdata(dev);
 	struct i2c_client *client = data->client;
-	int val = adt7411_read_10_bit(client, ADT7411_REG_INT_TEMP_VDD_LSB,
-			ADT7411_REG_INT_TEMP_MSB, 0);
+	int val;
+	struct {
+		u8 low;
+		u8 high;
+	} reg[2] = {
+		{ ADT7411_REG_INT_TEMP_VDD_LSB, ADT7411_REG_INT_TEMP_MSB },
+		{ ADT7411_REG_EXT_TEMP_AIN14_LSB,
+		  ADT7411_REG_EXT_TEMP_AIN1_MSB },
+	};
 
+	val = adt7411_read_10_bit(client, reg[nr].low, reg[nr].high, 0);
 	if (val < 0)
 		return val;
 
@@ -218,11 +228,13 @@
 	return ret < 0 ? ret : count;
 }
 
+
 #define ADT7411_BIT_ATTR(__name, __reg, __bit) \
 	SENSOR_DEVICE_ATTR_2(__name, S_IRUGO | S_IWUSR, adt7411_show_bit, \
 	adt7411_set_bit, __bit, __reg)
 
-static DEVICE_ATTR(temp1_input, S_IRUGO, adt7411_show_temp, NULL);
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, adt7411_show_temp, NULL, 0);
+static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, adt7411_show_temp, NULL, 1);
 static DEVICE_ATTR(in0_input, S_IRUGO, adt7411_show_vdd, NULL);
 static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, adt7411_show_input, NULL, 0);
 static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, adt7411_show_input, NULL, 1);
@@ -237,7 +249,8 @@
 static ADT7411_BIT_ATTR(adc_ref_vdd, ADT7411_REG_CFG3, ADT7411_CFG3_REF_VDD);
 
 static struct attribute *adt7411_attrs[] = {
-	&dev_attr_temp1_input.attr,
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_temp2_input.dev_attr.attr,
 	&dev_attr_in0_input.attr,
 	&sensor_dev_attr_in1_input.dev_attr.attr,
 	&sensor_dev_attr_in2_input.dev_attr.attr,
@@ -253,7 +266,27 @@
 	NULL
 };
 
-ATTRIBUTE_GROUPS(adt7411);
+static umode_t adt7411_attrs_visible(struct kobject *kobj,
+				     struct attribute *attr, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct adt7411_data *data = dev_get_drvdata(dev);
+	bool visible = true;
+
+	if (attr == &sensor_dev_attr_temp2_input.dev_attr.attr)
+		visible = data->use_ext_temp;
+	else if (attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
+		 attr == &sensor_dev_attr_in2_input.dev_attr.attr)
+		visible = !data->use_ext_temp;
+
+	return visible ? attr->mode : 0;
+}
+
+static const struct attribute_group adt7411_group = {
+	.attrs = adt7411_attrs,
+	.is_visible = adt7411_attrs_visible,
+};
+__ATTRIBUTE_GROUPS(adt7411);
 
 static int adt7411_detect(struct i2c_client *client,
 			  struct i2c_board_info *info)
@@ -309,6 +342,8 @@
 	if (ret < 0)
 		return ret;
 
+	data->use_ext_temp = ret & ADT7411_CFG1_EXT_TDM;
+
 	/*
 	 * We must only write zero to bit 1 and only one to bit 3 according to
 	 * the datasheet.
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index f5da39a..6e60ca5 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -32,6 +32,7 @@
 #include <linux/log2.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/util_macros.h>
 
 /* Addresses to scan */
 static const unsigned short normal_i2c[] = { 0x2C, 0x2E, 0x2F, I2C_CLIENT_END };
@@ -83,6 +84,7 @@
 #define ADT7470_REG_PWM_MIN_MAX_ADDR		0x6D
 #define ADT7470_REG_PWM_TEMP_MIN_BASE_ADDR	0x6E
 #define ADT7470_REG_PWM_TEMP_MIN_MAX_ADDR	0x71
+#define ADT7470_REG_CFG_2			0x74
 #define ADT7470_REG_ACOUSTICS12			0x75
 #define ADT7470_REG_ACOUSTICS34			0x76
 #define ADT7470_REG_DEVICE			0x3D
@@ -142,6 +144,11 @@
 #define FAN_PERIOD_INVALID	65535
 #define FAN_DATA_VALID(x)	((x) && (x) != FAN_PERIOD_INVALID)
 
+/* Config registers 1 and 2 include fields for selecting the PWM frequency */
+#define ADT7470_CFG_LF		0x40
+#define ADT7470_FREQ_MASK	0x70
+#define ADT7470_FREQ_SHIFT	4
+
 struct adt7470_data {
 	struct i2c_client	*client;
 	struct mutex		lock;
@@ -170,7 +177,6 @@
 	u8			pwm_auto_temp[ADT7470_PWM_COUNT];
 
 	struct task_struct	*auto_update;
-	struct completion	auto_update_stop;
 	unsigned int		auto_update_interval;
 };
 
@@ -266,12 +272,14 @@
 		mutex_lock(&data->lock);
 		adt7470_read_temperatures(client, data);
 		mutex_unlock(&data->lock);
+
+		set_current_state(TASK_INTERRUPTIBLE);
 		if (kthread_should_stop())
 			break;
-		msleep_interruptible(data->auto_update_interval);
+
+		schedule_timeout(msecs_to_jiffies(data->auto_update_interval));
 	}
 
-	complete_all(&data->auto_update_stop);
 	return 0;
 }
 
@@ -538,6 +546,28 @@
 	return sprintf(buf, "%x\n", data->alarms_mask);
 }
 
+static ssize_t set_alarm_mask(struct device *dev,
+			      struct device_attribute *devattr,
+			      const char *buf,
+			      size_t count)
+{
+	struct adt7470_data *data = dev_get_drvdata(dev);
+	long mask;
+
+	if (kstrtoul(buf, 0, &mask))
+		return -EINVAL;
+
+	if (mask & ~0xffff)
+		return -EINVAL;
+
+	mutex_lock(&data->lock);
+	data->alarms_mask = mask;
+	adt7470_write_word_data(data->client, ADT7470_REG_ALARM1_MASK, mask);
+	mutex_unlock(&data->lock);
+
+	return count;
+}
+
 static ssize_t show_fan_max(struct device *dev,
 			    struct device_attribute *devattr,
 			    char *buf)
@@ -688,6 +718,70 @@
 	return count;
 }
 
+/* These are the valid PWM frequencies to the nearest Hz */
+static const int adt7470_freq_map[] = {
+	11, 15, 22, 29, 35, 44, 59, 88, 1400, 22500
+};
+
+static ssize_t show_pwm_freq(struct device *dev,
+			     struct device_attribute *devattr, char *buf)
+{
+	struct adt7470_data *data = adt7470_update_device(dev);
+	unsigned char cfg_reg_1;
+	unsigned char cfg_reg_2;
+	int index;
+
+	mutex_lock(&data->lock);
+	cfg_reg_1 = i2c_smbus_read_byte_data(data->client, ADT7470_REG_CFG);
+	cfg_reg_2 = i2c_smbus_read_byte_data(data->client, ADT7470_REG_CFG_2);
+	mutex_unlock(&data->lock);
+
+	index = (cfg_reg_2 & ADT7470_FREQ_MASK) >> ADT7470_FREQ_SHIFT;
+	if (!(cfg_reg_1 & ADT7470_CFG_LF))
+		index += 8;
+	if (index >= ARRAY_SIZE(adt7470_freq_map))
+		index = ARRAY_SIZE(adt7470_freq_map) - 1;
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", adt7470_freq_map[index]);
+}
+
+static ssize_t set_pwm_freq(struct device *dev,
+			    struct device_attribute *devattr,
+			    const char *buf, size_t count)
+{
+	struct adt7470_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	long freq;
+	int index;
+	int low_freq = ADT7470_CFG_LF;
+	unsigned char val;
+
+	if (kstrtol(buf, 10, &freq))
+		return -EINVAL;
+
+	/* Round the user value given to the closest available frequency */
+	index = find_closest(freq, adt7470_freq_map,
+			     ARRAY_SIZE(adt7470_freq_map));
+
+	if (index >= 8) {
+		index -= 8;
+		low_freq = 0;
+	}
+
+	mutex_lock(&data->lock);
+	/* Configuration Register 1 */
+	val = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG);
+	i2c_smbus_write_byte_data(client, ADT7470_REG_CFG,
+				  (val & ~ADT7470_CFG_LF) | low_freq);
+	/* Configuration Register 2 */
+	val = i2c_smbus_read_byte_data(client, ADT7470_REG_CFG_2);
+	i2c_smbus_write_byte_data(client, ADT7470_REG_CFG_2,
+		(val & ~ADT7470_FREQ_MASK) | (index << ADT7470_FREQ_SHIFT));
+	mutex_unlock(&data->lock);
+
+	return count;
+}
+
 static ssize_t show_pwm_max(struct device *dev,
 			    struct device_attribute *devattr,
 			    char *buf)
@@ -918,7 +1012,8 @@
 		return sprintf(buf, "0\n");
 }
 
-static DEVICE_ATTR(alarm_mask, S_IRUGO, show_alarm_mask, NULL);
+static DEVICE_ATTR(alarm_mask, S_IWUSR | S_IRUGO, show_alarm_mask,
+		   set_alarm_mask);
 static DEVICE_ATTR(num_temp_sensors, S_IWUSR | S_IRUGO, show_num_temp_sensors,
 		   set_num_temp_sensors);
 static DEVICE_ATTR(auto_update_interval, S_IWUSR | S_IRUGO,
@@ -1038,6 +1133,8 @@
 static SENSOR_DEVICE_ATTR(pwm3, S_IWUSR | S_IRUGO, show_pwm, set_pwm, 2);
 static SENSOR_DEVICE_ATTR(pwm4, S_IWUSR | S_IRUGO, show_pwm, set_pwm, 3);
 
+static DEVICE_ATTR(pwm1_freq, S_IWUSR | S_IRUGO, show_pwm_freq, set_pwm_freq);
+
 static SENSOR_DEVICE_ATTR(pwm1_auto_point1_pwm, S_IWUSR | S_IRUGO,
 		    show_pwm_min, set_pwm_min, 0);
 static SENSOR_DEVICE_ATTR(pwm2_auto_point1_pwm, S_IWUSR | S_IRUGO,
@@ -1154,6 +1251,7 @@
 	&sensor_dev_attr_fan4_alarm.dev_attr.attr,
 	&sensor_dev_attr_force_pwm_max.dev_attr.attr,
 	&sensor_dev_attr_pwm1.dev_attr.attr,
+	&dev_attr_pwm1_freq.attr,
 	&sensor_dev_attr_pwm2.dev_attr.attr,
 	&sensor_dev_attr_pwm3.dev_attr.attr,
 	&sensor_dev_attr_pwm4.dev_attr.attr,
@@ -1256,7 +1354,6 @@
 	if (IS_ERR(hwmon_dev))
 		return PTR_ERR(hwmon_dev);
 
-	init_completion(&data->auto_update_stop);
 	data->auto_update = kthread_run(adt7470_update_thread, client, "%s",
 					dev_name(hwmon_dev));
 	if (IS_ERR(data->auto_update)) {
@@ -1271,7 +1368,6 @@
 	struct adt7470_data *data = i2c_get_clientdata(client);
 
 	kthread_stop(data->auto_update);
-	wait_for_completion(&data->auto_update_stop);
 	return 0;
 }
 
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index acf9c03..34704b0 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -21,6 +21,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -36,6 +37,7 @@
 #include <linux/io.h>
 #include <linux/sched.h>
 #include <linux/ctype.h>
+#include <linux/smp.h>
 
 #include <linux/i8k.h>
 
@@ -134,11 +136,11 @@
 /*
  * Call the System Management Mode BIOS. Code provided by Jonathan Buzzard.
  */
-static int i8k_smm(struct smm_regs *regs)
+static int i8k_smm_func(void *par)
 {
 	int rc;
+	struct smm_regs *regs = par;
 	int eax = regs->eax;
-	cpumask_var_t old_mask;
 
 #ifdef DEBUG
 	int ebx = regs->ebx;
@@ -149,16 +151,8 @@
 #endif
 
 	/* SMM requires CPU 0 */
-	if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
-		return -ENOMEM;
-	cpumask_copy(old_mask, &current->cpus_allowed);
-	rc = set_cpus_allowed_ptr(current, cpumask_of(0));
-	if (rc)
-		goto out;
-	if (smp_processor_id() != 0) {
-		rc = -EBUSY;
-		goto out;
-	}
+	if (smp_processor_id() != 0)
+		return -EBUSY;
 
 #if defined(CONFIG_X86_64)
 	asm volatile("pushq %%rax\n\t"
@@ -216,10 +210,6 @@
 	if (rc != 0 || (regs->eax & 0xffff) == 0xffff || regs->eax == eax)
 		rc = -EINVAL;
 
-out:
-	set_cpus_allowed_ptr(current, old_mask);
-	free_cpumask_var(old_mask);
-
 #ifdef DEBUG
 	rettime = ktime_get();
 	delta = ktime_sub(rettime, calltime);
@@ -232,6 +222,20 @@
 }
 
 /*
+ * Call the System Management Mode BIOS.
+ */
+static int i8k_smm(struct smm_regs *regs)
+{
+	int ret;
+
+	get_online_cpus();
+	ret = smp_call_on_cpu(0, i8k_smm_func, regs, true);
+	put_online_cpus();
+
+	return ret;
+}
+
+/*
  * Read the fan status.
  */
 static int i8k_get_fan_status(int fan)
diff --git a/drivers/hwmon/ftsteutates.c b/drivers/hwmon/ftsteutates.c
index 48633e5..0f0277e 100644
--- a/drivers/hwmon/ftsteutates.c
+++ b/drivers/hwmon/ftsteutates.c
@@ -36,6 +36,10 @@
 #define FTS_EVENT_STATUS_REG		0x0006
 #define FTS_GLOBAL_CONTROL_REG		0x0007
 
+#define FTS_DEVICE_DETECT_REG_1		0x0C
+#define FTS_DEVICE_DETECT_REG_2		0x0D
+#define FTS_DEVICE_DETECT_REG_3		0x0E
+
 #define FTS_SENSOR_EVENT_REG		0x0010
 
 #define FTS_FAN_EVENT_REG		0x0014
@@ -54,6 +58,8 @@
 #define FTS_NO_TEMP_SENSORS		0x10
 #define FTS_NO_VOLT_SENSORS		0x04
 
+static const unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END };
+
 static struct i2c_device_id fts_id[] = {
 	{ "ftsteutates", 0 },
 	{ }
@@ -734,6 +740,42 @@
 /*****************************************************************************/
 /* Module initialization / remove functions				     */
 /*****************************************************************************/
+static int fts_detect(struct i2c_client *client,
+		      struct i2c_board_info *info)
+{
+	int val;
+
+	/* detection works with revsion greater or equal to 0x2b */
+	val = i2c_smbus_read_byte_data(client, FTS_DEVICE_REVISION_REG);
+	if (val < 0x2b)
+		return -ENODEV;
+
+	/* Device Detect Regs must have 0x17 0x34 and 0x54 */
+	val = i2c_smbus_read_byte_data(client, FTS_DEVICE_DETECT_REG_1);
+	if (val != 0x17)
+		return -ENODEV;
+
+	val = i2c_smbus_read_byte_data(client, FTS_DEVICE_DETECT_REG_2);
+	if (val != 0x34)
+		return -ENODEV;
+
+	val = i2c_smbus_read_byte_data(client, FTS_DEVICE_DETECT_REG_3);
+	if (val != 0x54)
+		return -ENODEV;
+
+	/*
+	 * 0x10 == Baseboard Management Controller, 0x01 == Teutates
+	 * Device ID Reg needs to be 0x11
+	 */
+	val = i2c_smbus_read_byte_data(client, FTS_DEVICE_ID_REG);
+	if (val != 0x11)
+		return -ENODEV;
+
+	strlcpy(info->type, fts_id[0].name, I2C_NAME_SIZE);
+	info->flags = 0;
+	return 0;
+}
+
 static int fts_remove(struct i2c_client *client)
 {
 	struct fts_data *data = dev_get_drvdata(&client->dev);
@@ -804,12 +846,15 @@
 /* Module Details							     */
 /*****************************************************************************/
 static struct i2c_driver fts_driver = {
+	.class = I2C_CLASS_HWMON,
 	.driver = {
 		.name = "ftsteutates",
 	},
 	.id_table = fts_id,
 	.probe = fts_probe,
 	.remove = fts_remove,
+	.detect = fts_detect,
+	.address_list = normal_i2c,
 };
 
 module_i2c_driver(fts_driver);
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index a26c385..adae684 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -12,17 +12,17 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/module.h>
+#include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/kdev_t.h>
-#include <linux/idr.h>
-#include <linux/hwmon.h>
 #include <linux/gfp.h>
-#include <linux/spinlock.h>
+#include <linux/hwmon.h>
+#include <linux/idr.h>
+#include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/thermal.h>
 
 #define HWMON_ID_PREFIX "hwmon"
 #define HWMON_ID_FORMAT HWMON_ID_PREFIX "%d"
@@ -30,9 +30,35 @@
 struct hwmon_device {
 	const char *name;
 	struct device dev;
+	const struct hwmon_chip_info *chip;
+
+	struct attribute_group group;
+	const struct attribute_group **groups;
 };
+
 #define to_hwmon_device(d) container_of(d, struct hwmon_device, dev)
 
+struct hwmon_device_attribute {
+	struct device_attribute dev_attr;
+	const struct hwmon_ops *ops;
+	enum hwmon_sensor_types type;
+	u32 attr;
+	int index;
+};
+
+#define to_hwmon_attr(d) \
+	container_of(d, struct hwmon_device_attribute, dev_attr)
+
+/*
+ * Thermal zone information
+ * In addition to the reference to the hwmon device,
+ * also provides the sensor index.
+ */
+struct hwmon_thermal_data {
+	struct hwmon_device *hwdev;	/* Reference to hwmon device */
+	int index;			/* sensor index */
+};
+
 static ssize_t
 show_name(struct device *dev, struct device_attribute *attr, char *buf)
 {
@@ -80,6 +106,498 @@
 
 static DEFINE_IDA(hwmon_ida);
 
+/* Thermal zone handling */
+
+/*
+ * The complex conditional is necessary to avoid a cyclic dependency
+ * between hwmon and thermal_sys modules.
+ */
+#if IS_REACHABLE(CONFIG_THERMAL) && defined(CONFIG_THERMAL_OF) && \
+	(!defined(CONFIG_THERMAL_HWMON) || \
+	 !(defined(MODULE) && IS_MODULE(CONFIG_THERMAL)))
+static int hwmon_thermal_get_temp(void *data, int *temp)
+{
+	struct hwmon_thermal_data *tdata = data;
+	struct hwmon_device *hwdev = tdata->hwdev;
+	int ret;
+	long t;
+
+	ret = hwdev->chip->ops->read(&hwdev->dev, hwmon_temp, hwmon_temp_input,
+				     tdata->index, &t);
+	if (ret < 0)
+		return ret;
+
+	*temp = t;
+
+	return 0;
+}
+
+static struct thermal_zone_of_device_ops hwmon_thermal_ops = {
+	.get_temp = hwmon_thermal_get_temp,
+};
+
+static int hwmon_thermal_add_sensor(struct device *dev,
+				    struct hwmon_device *hwdev, int index)
+{
+	struct hwmon_thermal_data *tdata;
+
+	tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL);
+	if (!tdata)
+		return -ENOMEM;
+
+	tdata->hwdev = hwdev;
+	tdata->index = index;
+
+	devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata,
+					     &hwmon_thermal_ops);
+
+	return 0;
+}
+#else
+static int hwmon_thermal_add_sensor(struct device *dev,
+				    struct hwmon_device *hwdev, int index)
+{
+	return 0;
+}
+#endif /* IS_REACHABLE(CONFIG_THERMAL) && ... */
+
+/* sysfs attribute management */
+
+static ssize_t hwmon_attr_show(struct device *dev,
+			       struct device_attribute *devattr, char *buf)
+{
+	struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr);
+	long val;
+	int ret;
+
+	ret = hattr->ops->read(dev, hattr->type, hattr->attr, hattr->index,
+			       &val);
+	if (ret < 0)
+		return ret;
+
+	return sprintf(buf, "%ld\n", val);
+}
+
+static ssize_t hwmon_attr_store(struct device *dev,
+				struct device_attribute *devattr,
+				const char *buf, size_t count)
+{
+	struct hwmon_device_attribute *hattr = to_hwmon_attr(devattr);
+	long val;
+	int ret;
+
+	ret = kstrtol(buf, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	ret = hattr->ops->write(dev, hattr->type, hattr->attr, hattr->index,
+				val);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+
+static int hwmon_attr_base(enum hwmon_sensor_types type)
+{
+	if (type == hwmon_in)
+		return 0;
+	return 1;
+}
+
+static struct attribute *hwmon_genattr(struct device *dev,
+				       const void *drvdata,
+				       enum hwmon_sensor_types type,
+				       u32 attr,
+				       int index,
+				       const char *template,
+				       const struct hwmon_ops *ops)
+{
+	struct hwmon_device_attribute *hattr;
+	struct device_attribute *dattr;
+	struct attribute *a;
+	umode_t mode;
+	char *name;
+
+	/* The attribute is invisible if there is no template string */
+	if (!template)
+		return ERR_PTR(-ENOENT);
+
+	mode = ops->is_visible(drvdata, type, attr, index);
+	if (!mode)
+		return ERR_PTR(-ENOENT);
+
+	if ((mode & S_IRUGO) && !ops->read)
+		return ERR_PTR(-EINVAL);
+	if ((mode & S_IWUGO) && !ops->write)
+		return ERR_PTR(-EINVAL);
+
+	if (type == hwmon_chip) {
+		name = (char *)template;
+	} else {
+		name = devm_kzalloc(dev, strlen(template) + 16, GFP_KERNEL);
+		if (!name)
+			return ERR_PTR(-ENOMEM);
+		scnprintf(name, strlen(template) + 16, template,
+			  index + hwmon_attr_base(type));
+	}
+
+	hattr = devm_kzalloc(dev, sizeof(*hattr), GFP_KERNEL);
+	if (!hattr)
+		return ERR_PTR(-ENOMEM);
+
+	hattr->type = type;
+	hattr->attr = attr;
+	hattr->index = index;
+	hattr->ops = ops;
+
+	dattr = &hattr->dev_attr;
+	dattr->show = hwmon_attr_show;
+	dattr->store = hwmon_attr_store;
+
+	a = &dattr->attr;
+	sysfs_attr_init(a);
+	a->name = name;
+	a->mode = mode;
+
+	return a;
+}
+
+static const char * const hwmon_chip_attr_templates[] = {
+	[hwmon_chip_temp_reset_history] = "temp_reset_history",
+	[hwmon_chip_in_reset_history] = "in_reset_history",
+	[hwmon_chip_curr_reset_history] = "curr_reset_history",
+	[hwmon_chip_power_reset_history] = "power_reset_history",
+	[hwmon_chip_update_interval] = "update_interval",
+	[hwmon_chip_alarms] = "alarms",
+};
+
+static const char * const hwmon_temp_attr_templates[] = {
+	[hwmon_temp_input] = "temp%d_input",
+	[hwmon_temp_type] = "temp%d_type",
+	[hwmon_temp_lcrit] = "temp%d_lcrit",
+	[hwmon_temp_lcrit_hyst] = "temp%d_lcrit_hyst",
+	[hwmon_temp_min] = "temp%d_min",
+	[hwmon_temp_min_hyst] = "temp%d_min_hyst",
+	[hwmon_temp_max] = "temp%d_max",
+	[hwmon_temp_max_hyst] = "temp%d_max_hyst",
+	[hwmon_temp_crit] = "temp%d_crit",
+	[hwmon_temp_crit_hyst] = "temp%d_crit_hyst",
+	[hwmon_temp_emergency] = "temp%d_emergency",
+	[hwmon_temp_emergency_hyst] = "temp%d_emergency_hyst",
+	[hwmon_temp_alarm] = "temp%d_alarm",
+	[hwmon_temp_lcrit_alarm] = "temp%d_lcrit_alarm",
+	[hwmon_temp_min_alarm] = "temp%d_min_alarm",
+	[hwmon_temp_max_alarm] = "temp%d_max_alarm",
+	[hwmon_temp_crit_alarm] = "temp%d_crit_alarm",
+	[hwmon_temp_emergency_alarm] = "temp%d_emergency_alarm",
+	[hwmon_temp_fault] = "temp%d_fault",
+	[hwmon_temp_offset] = "temp%d_offset",
+	[hwmon_temp_label] = "temp%d_label",
+	[hwmon_temp_lowest] = "temp%d_lowest",
+	[hwmon_temp_highest] = "temp%d_highest",
+	[hwmon_temp_reset_history] = "temp%d_reset_history",
+};
+
+static const char * const hwmon_in_attr_templates[] = {
+	[hwmon_in_input] = "in%d_input",
+	[hwmon_in_min] = "in%d_min",
+	[hwmon_in_max] = "in%d_max",
+	[hwmon_in_lcrit] = "in%d_lcrit",
+	[hwmon_in_crit] = "in%d_crit",
+	[hwmon_in_average] = "in%d_average",
+	[hwmon_in_lowest] = "in%d_lowest",
+	[hwmon_in_highest] = "in%d_highest",
+	[hwmon_in_reset_history] = "in%d_reset_history",
+	[hwmon_in_label] = "in%d_label",
+	[hwmon_in_alarm] = "in%d_alarm",
+	[hwmon_in_min_alarm] = "in%d_min_alarm",
+	[hwmon_in_max_alarm] = "in%d_max_alarm",
+	[hwmon_in_lcrit_alarm] = "in%d_lcrit_alarm",
+	[hwmon_in_crit_alarm] = "in%d_crit_alarm",
+};
+
+static const char * const hwmon_curr_attr_templates[] = {
+	[hwmon_curr_input] = "curr%d_input",
+	[hwmon_curr_min] = "curr%d_min",
+	[hwmon_curr_max] = "curr%d_max",
+	[hwmon_curr_lcrit] = "curr%d_lcrit",
+	[hwmon_curr_crit] = "curr%d_crit",
+	[hwmon_curr_average] = "curr%d_average",
+	[hwmon_curr_lowest] = "curr%d_lowest",
+	[hwmon_curr_highest] = "curr%d_highest",
+	[hwmon_curr_reset_history] = "curr%d_reset_history",
+	[hwmon_curr_label] = "curr%d_label",
+	[hwmon_curr_alarm] = "curr%d_alarm",
+	[hwmon_curr_min_alarm] = "curr%d_min_alarm",
+	[hwmon_curr_max_alarm] = "curr%d_max_alarm",
+	[hwmon_curr_lcrit_alarm] = "curr%d_lcrit_alarm",
+	[hwmon_curr_crit_alarm] = "curr%d_crit_alarm",
+};
+
+static const char * const hwmon_power_attr_templates[] = {
+	[hwmon_power_average] = "power%d_average",
+	[hwmon_power_average_interval] = "power%d_average_interval",
+	[hwmon_power_average_interval_max] = "power%d_interval_max",
+	[hwmon_power_average_interval_min] = "power%d_interval_min",
+	[hwmon_power_average_highest] = "power%d_average_highest",
+	[hwmon_power_average_lowest] = "power%d_average_lowest",
+	[hwmon_power_average_max] = "power%d_average_max",
+	[hwmon_power_average_min] = "power%d_average_min",
+	[hwmon_power_input] = "power%d_input",
+	[hwmon_power_input_highest] = "power%d_input_highest",
+	[hwmon_power_input_lowest] = "power%d_input_lowest",
+	[hwmon_power_reset_history] = "power%d_reset_history",
+	[hwmon_power_accuracy] = "power%d_accuracy",
+	[hwmon_power_cap] = "power%d_cap",
+	[hwmon_power_cap_hyst] = "power%d_cap_hyst",
+	[hwmon_power_cap_max] = "power%d_cap_max",
+	[hwmon_power_cap_min] = "power%d_cap_min",
+	[hwmon_power_max] = "power%d_max",
+	[hwmon_power_crit] = "power%d_crit",
+	[hwmon_power_label] = "power%d_label",
+	[hwmon_power_alarm] = "power%d_alarm",
+	[hwmon_power_cap_alarm] = "power%d_cap_alarm",
+	[hwmon_power_max_alarm] = "power%d_max_alarm",
+	[hwmon_power_crit_alarm] = "power%d_crit_alarm",
+};
+
+static const char * const hwmon_energy_attr_templates[] = {
+	[hwmon_energy_input] = "energy%d_input",
+	[hwmon_energy_label] = "energy%d_label",
+};
+
+static const char * const hwmon_humidity_attr_templates[] = {
+	[hwmon_humidity_input] = "humidity%d_input",
+	[hwmon_humidity_label] = "humidity%d_label",
+	[hwmon_humidity_min] = "humidity%d_min",
+	[hwmon_humidity_min_hyst] = "humidity%d_min_hyst",
+	[hwmon_humidity_max] = "humidity%d_max",
+	[hwmon_humidity_max_hyst] = "humidity%d_max_hyst",
+	[hwmon_humidity_alarm] = "humidity%d_alarm",
+	[hwmon_humidity_fault] = "humidity%d_fault",
+};
+
+static const char * const hwmon_fan_attr_templates[] = {
+	[hwmon_fan_input] = "fan%d_input",
+	[hwmon_fan_label] = "fan%d_label",
+	[hwmon_fan_min] = "fan%d_min",
+	[hwmon_fan_max] = "fan%d_max",
+	[hwmon_fan_div] = "fan%d_div",
+	[hwmon_fan_pulses] = "fan%d_pulses",
+	[hwmon_fan_target] = "fan%d_target",
+	[hwmon_fan_alarm] = "fan%d_alarm",
+	[hwmon_fan_min_alarm] = "fan%d_min_alarm",
+	[hwmon_fan_max_alarm] = "fan%d_max_alarm",
+	[hwmon_fan_fault] = "fan%d_fault",
+};
+
+static const char * const hwmon_pwm_attr_templates[] = {
+	[hwmon_pwm_input] = "pwm%d",
+	[hwmon_pwm_enable] = "pwm%d_enable",
+	[hwmon_pwm_mode] = "pwm%d_mode",
+	[hwmon_pwm_freq] = "pwm%d_freq",
+};
+
+static const char * const *__templates[] = {
+	[hwmon_chip] = hwmon_chip_attr_templates,
+	[hwmon_temp] = hwmon_temp_attr_templates,
+	[hwmon_in] = hwmon_in_attr_templates,
+	[hwmon_curr] = hwmon_curr_attr_templates,
+	[hwmon_power] = hwmon_power_attr_templates,
+	[hwmon_energy] = hwmon_energy_attr_templates,
+	[hwmon_humidity] = hwmon_humidity_attr_templates,
+	[hwmon_fan] = hwmon_fan_attr_templates,
+	[hwmon_pwm] = hwmon_pwm_attr_templates,
+};
+
+static const int __templates_size[] = {
+	[hwmon_chip] = ARRAY_SIZE(hwmon_chip_attr_templates),
+	[hwmon_temp] = ARRAY_SIZE(hwmon_temp_attr_templates),
+	[hwmon_in] = ARRAY_SIZE(hwmon_in_attr_templates),
+	[hwmon_curr] = ARRAY_SIZE(hwmon_curr_attr_templates),
+	[hwmon_power] = ARRAY_SIZE(hwmon_power_attr_templates),
+	[hwmon_energy] = ARRAY_SIZE(hwmon_energy_attr_templates),
+	[hwmon_humidity] = ARRAY_SIZE(hwmon_humidity_attr_templates),
+	[hwmon_fan] = ARRAY_SIZE(hwmon_fan_attr_templates),
+	[hwmon_pwm] = ARRAY_SIZE(hwmon_pwm_attr_templates),
+};
+
+static int hwmon_num_channel_attrs(const struct hwmon_channel_info *info)
+{
+	int i, n;
+
+	for (i = n = 0; info->config[i]; i++)
+		n += hweight32(info->config[i]);
+
+	return n;
+}
+
+static int hwmon_genattrs(struct device *dev,
+			  const void *drvdata,
+			  struct attribute **attrs,
+			  const struct hwmon_ops *ops,
+			  const struct hwmon_channel_info *info)
+{
+	const char * const *templates;
+	int template_size;
+	int i, aindex = 0;
+
+	if (info->type >= ARRAY_SIZE(__templates))
+		return -EINVAL;
+
+	templates = __templates[info->type];
+	template_size = __templates_size[info->type];
+
+	for (i = 0; info->config[i]; i++) {
+		u32 attr_mask = info->config[i];
+		u32 attr;
+
+		while (attr_mask) {
+			struct attribute *a;
+
+			attr = __ffs(attr_mask);
+			attr_mask &= ~BIT(attr);
+			if (attr >= template_size)
+				return -EINVAL;
+			a = hwmon_genattr(dev, drvdata, info->type, attr, i,
+					  templates[attr], ops);
+			if (IS_ERR(a)) {
+				if (PTR_ERR(a) != -ENOENT)
+					return PTR_ERR(a);
+				continue;
+			}
+			attrs[aindex++] = a;
+		}
+	}
+	return aindex;
+}
+
+static struct attribute **
+__hwmon_create_attrs(struct device *dev, const void *drvdata,
+		     const struct hwmon_chip_info *chip)
+{
+	int ret, i, aindex = 0, nattrs = 0;
+	struct attribute **attrs;
+
+	for (i = 0; chip->info[i]; i++)
+		nattrs += hwmon_num_channel_attrs(chip->info[i]);
+
+	if (nattrs == 0)
+		return ERR_PTR(-EINVAL);
+
+	attrs = devm_kcalloc(dev, nattrs + 1, sizeof(*attrs), GFP_KERNEL);
+	if (!attrs)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; chip->info[i]; i++) {
+		ret = hwmon_genattrs(dev, drvdata, &attrs[aindex], chip->ops,
+				     chip->info[i]);
+		if (ret < 0)
+			return ERR_PTR(ret);
+		aindex += ret;
+	}
+
+	return attrs;
+}
+
+static struct device *
+__hwmon_device_register(struct device *dev, const char *name, void *drvdata,
+			const struct hwmon_chip_info *chip,
+			const struct attribute_group **groups)
+{
+	struct hwmon_device *hwdev;
+	struct device *hdev;
+	int i, j, err, id;
+
+	/* Do not accept invalid characters in hwmon name attribute */
+	if (name && (!strlen(name) || strpbrk(name, "-* \t\n")))
+		return ERR_PTR(-EINVAL);
+
+	id = ida_simple_get(&hwmon_ida, 0, 0, GFP_KERNEL);
+	if (id < 0)
+		return ERR_PTR(id);
+
+	hwdev = kzalloc(sizeof(*hwdev), GFP_KERNEL);
+	if (hwdev == NULL) {
+		err = -ENOMEM;
+		goto ida_remove;
+	}
+
+	hdev = &hwdev->dev;
+
+	if (chip && chip->ops->is_visible) {
+		struct attribute **attrs;
+		int ngroups = 2;
+
+		if (groups)
+			for (i = 0; groups[i]; i++)
+				ngroups++;
+
+		hwdev->groups = devm_kcalloc(dev, ngroups, sizeof(*groups),
+					     GFP_KERNEL);
+		if (!hwdev->groups)
+			return ERR_PTR(-ENOMEM);
+
+		attrs = __hwmon_create_attrs(dev, drvdata, chip);
+		if (IS_ERR(attrs)) {
+			err = PTR_ERR(attrs);
+			goto free_hwmon;
+		}
+
+		hwdev->group.attrs = attrs;
+		ngroups = 0;
+		hwdev->groups[ngroups++] = &hwdev->group;
+
+		if (groups) {
+			for (i = 0; groups[i]; i++)
+				hwdev->groups[ngroups++] = groups[i];
+		}
+
+		hdev->groups = hwdev->groups;
+	} else {
+		hdev->groups = groups;
+	}
+
+	hwdev->name = name;
+	hdev->class = &hwmon_class;
+	hdev->parent = dev;
+	hdev->of_node = dev ? dev->of_node : NULL;
+	hwdev->chip = chip;
+	dev_set_drvdata(hdev, drvdata);
+	dev_set_name(hdev, HWMON_ID_FORMAT, id);
+	err = device_register(hdev);
+	if (err)
+		goto free_hwmon;
+
+	if (chip && chip->ops->is_visible && chip->ops->read &&
+	    chip->info[0]->type == hwmon_chip &&
+	    (chip->info[0]->config[0] & HWMON_C_REGISTER_TZ)) {
+		const struct hwmon_channel_info **info = chip->info;
+
+		for (i = 1; info[i]; i++) {
+			if (info[i]->type != hwmon_temp)
+				continue;
+
+			for (j = 0; info[i]->config[j]; j++) {
+				if (!chip->ops->is_visible(drvdata, hwmon_temp,
+							   hwmon_temp_input, j))
+					continue;
+				if (info[i]->config[j] & HWMON_T_INPUT)
+					hwmon_thermal_add_sensor(dev, hwdev, j);
+			}
+		}
+	}
+
+	return hdev;
+
+free_hwmon:
+	kfree(hwdev);
+ida_remove:
+	ida_simple_remove(&hwmon_ida, id);
+	return ERR_PTR(err);
+}
+
 /**
  * hwmon_device_register_with_groups - register w/ hwmon
  * @dev: the parent device
@@ -97,45 +615,37 @@
 				  void *drvdata,
 				  const struct attribute_group **groups)
 {
-	struct hwmon_device *hwdev;
-	int err, id;
-
-	/* Do not accept invalid characters in hwmon name attribute */
-	if (name && (!strlen(name) || strpbrk(name, "-* \t\n")))
-		return ERR_PTR(-EINVAL);
-
-	id = ida_simple_get(&hwmon_ida, 0, 0, GFP_KERNEL);
-	if (id < 0)
-		return ERR_PTR(id);
-
-	hwdev = kzalloc(sizeof(*hwdev), GFP_KERNEL);
-	if (hwdev == NULL) {
-		err = -ENOMEM;
-		goto ida_remove;
-	}
-
-	hwdev->name = name;
-	hwdev->dev.class = &hwmon_class;
-	hwdev->dev.parent = dev;
-	hwdev->dev.groups = groups;
-	hwdev->dev.of_node = dev ? dev->of_node : NULL;
-	dev_set_drvdata(&hwdev->dev, drvdata);
-	dev_set_name(&hwdev->dev, HWMON_ID_FORMAT, id);
-	err = device_register(&hwdev->dev);
-	if (err)
-		goto free;
-
-	return &hwdev->dev;
-
-free:
-	kfree(hwdev);
-ida_remove:
-	ida_simple_remove(&hwmon_ida, id);
-	return ERR_PTR(err);
+	return __hwmon_device_register(dev, name, drvdata, NULL, groups);
 }
 EXPORT_SYMBOL_GPL(hwmon_device_register_with_groups);
 
 /**
+ * hwmon_device_register_with_info - register w/ hwmon
+ * @dev: the parent device
+ * @name: hwmon name attribute
+ * @drvdata: driver data to attach to created device
+ * @info: Pointer to hwmon chip information
+ * @groups - pointer to list of driver specific attribute groups
+ *
+ * hwmon_device_unregister() must be called when the device is no
+ * longer needed.
+ *
+ * Returns the pointer to the new device.
+ */
+struct device *
+hwmon_device_register_with_info(struct device *dev, const char *name,
+				void *drvdata,
+				const struct hwmon_chip_info *chip,
+				const struct attribute_group **groups)
+{
+	if (chip && (!chip->ops || !chip->info))
+		return ERR_PTR(-EINVAL);
+
+	return __hwmon_device_register(dev, name, drvdata, chip, groups);
+}
+EXPORT_SYMBOL_GPL(hwmon_device_register_with_info);
+
+/**
  * hwmon_device_register - register w/ hwmon
  * @dev: the device to register
  *
@@ -213,6 +723,48 @@
 }
 EXPORT_SYMBOL_GPL(devm_hwmon_device_register_with_groups);
 
+/**
+ * devm_hwmon_device_register_with_info - register w/ hwmon
+ * @dev: the parent device
+ * @name: hwmon name attribute
+ * @drvdata: driver data to attach to created device
+ * @info: Pointer to hwmon chip information
+ * @groups - pointer to list of driver specific attribute groups
+ *
+ * Returns the pointer to the new device. The new device is automatically
+ * unregistered with the parent device.
+ */
+struct device *
+devm_hwmon_device_register_with_info(struct device *dev, const char *name,
+				     void *drvdata,
+				     const struct hwmon_chip_info *chip,
+				     const struct attribute_group **groups)
+{
+	struct device **ptr, *hwdev;
+
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	ptr = devres_alloc(devm_hwmon_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	hwdev = hwmon_device_register_with_info(dev, name, drvdata, chip,
+						groups);
+	if (IS_ERR(hwdev))
+		goto error;
+
+	*ptr = hwdev;
+	devres_add(dev, ptr);
+
+	return hwdev;
+
+error:
+	devres_free(ptr);
+	return hwdev;
+}
+EXPORT_SYMBOL_GPL(devm_hwmon_device_register_with_info);
+
 static int devm_hwmon_match(struct device *dev, void *res, void *data)
 {
 	struct device **hwdev = res;
diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c
index 55b5a8f..6d2e660 100644
--- a/drivers/hwmon/ibmpowernv.c
+++ b/drivers/hwmon/ibmpowernv.c
@@ -143,13 +143,11 @@
 		if (cpuid >= 0)
 			/*
 			 * The digital thermal sensors are associated
-			 * with a core. Let's print out the range of
-			 * cpu ids corresponding to the hardware
-			 * threads of the core.
+			 * with a core.
 			 */
 			n += snprintf(sdata->label + n,
-				      sizeof(sdata->label) - n, " %d-%d",
-				      cpuid, cpuid + threads_per_core - 1);
+				      sizeof(sdata->label) - n, " %d",
+				      cpuid);
 		else
 			n += snprintf(sdata->label + n,
 				      sizeof(sdata->label) - n, " phy%d", id);
diff --git a/drivers/hwmon/iio_hwmon.c b/drivers/hwmon/iio_hwmon.c
index 8944987..f6a7667 100644
--- a/drivers/hwmon/iio_hwmon.c
+++ b/drivers/hwmon/iio_hwmon.c
@@ -73,8 +73,11 @@
 		name = dev->of_node->name;
 
 	channels = iio_channel_get_all(dev);
-	if (IS_ERR(channels))
+	if (IS_ERR(channels)) {
+		if (PTR_ERR(channels) == -ENODEV)
+			return -EPROBE_DEFER;
 		return PTR_ERR(channels);
+	}
 
 	st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL);
 	if (st == NULL) {
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index 730d840..ad82cb2 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -491,7 +491,7 @@
 struct it87_data {
 	const struct attribute_group *groups[7];
 	enum chips type;
-	u16 features;
+	u32 features;
 	u8 peci_mask;
 	u8 old_peci_mask;
 
@@ -2011,10 +2011,11 @@
 	&sensor_dev_attr_in7_beep.dev_attr.attr,	/* 39 */
 
 	&sensor_dev_attr_in8_input.dev_attr.attr,	/* 40 */
-	&sensor_dev_attr_in9_input.dev_attr.attr,	/* 41 */
-	&sensor_dev_attr_in10_input.dev_attr.attr,	/* 41 */
-	&sensor_dev_attr_in11_input.dev_attr.attr,	/* 41 */
-	&sensor_dev_attr_in12_input.dev_attr.attr,	/* 41 */
+	&sensor_dev_attr_in9_input.dev_attr.attr,
+	&sensor_dev_attr_in10_input.dev_attr.attr,
+	&sensor_dev_attr_in11_input.dev_attr.attr,
+	&sensor_dev_attr_in12_input.dev_attr.attr,
+	NULL
 };
 
 static const struct attribute_group it87_group_in = {
diff --git a/drivers/hwmon/jc42.c b/drivers/hwmon/jc42.c
index 9d5f85f..1bf22ef 100644
--- a/drivers/hwmon/jc42.c
+++ b/drivers/hwmon/jc42.c
@@ -28,7 +28,6 @@
 #include <linux/jiffies.h>
 #include <linux/i2c.h>
 #include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
@@ -254,170 +253,148 @@
 	return ret;
 }
 
-/* sysfs functions */
-
-static ssize_t show_temp(struct device *dev, struct device_attribute *devattr,
-			 char *buf)
+static int jc42_read(struct device *dev, enum hwmon_sensor_types type,
+		     u32 attr, int channel, long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct jc42_data *data = jc42_update_device(dev);
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-	return sprintf(buf, "%d\n",
-		       jc42_temp_from_reg(data->temp[attr->index]));
-}
-
-static ssize_t show_temp_hyst(struct device *dev,
-			      struct device_attribute *devattr, char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 	struct jc42_data *data = jc42_update_device(dev);
 	int temp, hyst;
 
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	temp = jc42_temp_from_reg(data->temp[attr->index]);
-	hyst = jc42_hysteresis[(data->config & JC42_CFG_HYST_MASK)
-			       >> JC42_CFG_HYST_SHIFT];
-	return sprintf(buf, "%d\n", temp - hyst);
+	switch (attr) {
+	case hwmon_temp_input:
+		*val = jc42_temp_from_reg(data->temp[t_input]);
+		return 0;
+	case hwmon_temp_min:
+		*val = jc42_temp_from_reg(data->temp[t_min]);
+		return 0;
+	case hwmon_temp_max:
+		*val = jc42_temp_from_reg(data->temp[t_max]);
+		return 0;
+	case hwmon_temp_crit:
+		*val = jc42_temp_from_reg(data->temp[t_crit]);
+		return 0;
+	case hwmon_temp_max_hyst:
+		temp = jc42_temp_from_reg(data->temp[t_max]);
+		hyst = jc42_hysteresis[(data->config & JC42_CFG_HYST_MASK)
+						>> JC42_CFG_HYST_SHIFT];
+		*val = temp - hyst;
+		return 0;
+	case hwmon_temp_crit_hyst:
+		temp = jc42_temp_from_reg(data->temp[t_crit]);
+		hyst = jc42_hysteresis[(data->config & JC42_CFG_HYST_MASK)
+						>> JC42_CFG_HYST_SHIFT];
+		*val = temp - hyst;
+		return 0;
+	case hwmon_temp_min_alarm:
+		*val = (data->temp[t_input] >> JC42_ALARM_MIN_BIT) & 1;
+		return 0;
+	case hwmon_temp_max_alarm:
+		*val = (data->temp[t_input] >> JC42_ALARM_MAX_BIT) & 1;
+		return 0;
+	case hwmon_temp_crit_alarm:
+		*val = (data->temp[t_input] >> JC42_ALARM_CRIT_BIT) & 1;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t set_temp(struct device *dev, struct device_attribute *devattr,
-			const char *buf, size_t count)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct jc42_data *data = dev_get_drvdata(dev);
-	int err, ret = count;
-	int nr = attr->index;
-	long val;
-
-	if (kstrtol(buf, 10, &val) < 0)
-		return -EINVAL;
-	mutex_lock(&data->update_lock);
-	data->temp[nr] = jc42_temp_to_reg(val, data->extended);
-	err = i2c_smbus_write_word_swapped(data->client, temp_regs[nr],
-					   data->temp[nr]);
-	if (err < 0)
-		ret = err;
-	mutex_unlock(&data->update_lock);
-	return ret;
-}
-
-/*
- * JC42.4 compliant chips only support four hysteresis values.
- * Pick best choice and go from there.
- */
-static ssize_t set_temp_crit_hyst(struct device *dev,
-				  struct device_attribute *attr,
-				  const char *buf, size_t count)
+static int jc42_write(struct device *dev, enum hwmon_sensor_types type,
+		      u32 attr, int channel, long val)
 {
 	struct jc42_data *data = dev_get_drvdata(dev);
-	long val;
+	struct i2c_client *client = data->client;
 	int diff, hyst;
-	int err;
-	int ret = count;
+	int ret;
 
-	if (kstrtol(buf, 10, &val) < 0)
-		return -EINVAL;
+	mutex_lock(&data->update_lock);
 
-	val = clamp_val(val, (data->extended ? JC42_TEMP_MIN_EXTENDED :
-			      JC42_TEMP_MIN) - 6000, JC42_TEMP_MAX);
-	diff = jc42_temp_from_reg(data->temp[t_crit]) - val;
-
-	hyst = 0;
-	if (diff > 0) {
-		if (diff < 2250)
-			hyst = 1;	/* 1.5 degrees C */
-		else if (diff < 4500)
-			hyst = 2;	/* 3.0 degrees C */
-		else
-			hyst = 3;	/* 6.0 degrees C */
+	switch (attr) {
+	case hwmon_temp_min:
+		data->temp[t_min] = jc42_temp_to_reg(val, data->extended);
+		ret = i2c_smbus_write_word_swapped(client, temp_regs[t_min],
+						   data->temp[t_min]);
+		break;
+	case hwmon_temp_max:
+		data->temp[t_max] = jc42_temp_to_reg(val, data->extended);
+		ret = i2c_smbus_write_word_swapped(client, temp_regs[t_max],
+						   data->temp[t_max]);
+		break;
+	case hwmon_temp_crit:
+		data->temp[t_crit] = jc42_temp_to_reg(val, data->extended);
+		ret = i2c_smbus_write_word_swapped(client, temp_regs[t_crit],
+						   data->temp[t_crit]);
+		break;
+	case hwmon_temp_crit_hyst:
+		/*
+		 * JC42.4 compliant chips only support four hysteresis values.
+		 * Pick best choice and go from there.
+		 */
+		val = clamp_val(val, (data->extended ? JC42_TEMP_MIN_EXTENDED
+						     : JC42_TEMP_MIN) - 6000,
+				JC42_TEMP_MAX);
+		diff = jc42_temp_from_reg(data->temp[t_crit]) - val;
+		hyst = 0;
+		if (diff > 0) {
+			if (diff < 2250)
+				hyst = 1;	/* 1.5 degrees C */
+			else if (diff < 4500)
+				hyst = 2;	/* 3.0 degrees C */
+			else
+				hyst = 3;	/* 6.0 degrees C */
+		}
+		data->config = (data->config & ~JC42_CFG_HYST_MASK) |
+				(hyst << JC42_CFG_HYST_SHIFT);
+		ret = i2c_smbus_write_word_swapped(data->client,
+						   JC42_REG_CONFIG,
+						   data->config);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
 	}
 
-	mutex_lock(&data->update_lock);
-	data->config = (data->config & ~JC42_CFG_HYST_MASK)
-	  | (hyst << JC42_CFG_HYST_SHIFT);
-	err = i2c_smbus_write_word_swapped(data->client, JC42_REG_CONFIG,
-					   data->config);
-	if (err < 0)
-		ret = err;
 	mutex_unlock(&data->update_lock);
+
 	return ret;
 }
 
-static ssize_t show_alarm(struct device *dev,
-			  struct device_attribute *attr, char *buf)
+static umode_t jc42_is_visible(const void *_data, enum hwmon_sensor_types type,
+			       u32 attr, int channel)
 {
-	u16 bit = to_sensor_dev_attr(attr)->index;
-	struct jc42_data *data = jc42_update_device(dev);
-	u16 val;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	val = data->temp[t_input];
-	if (bit != JC42_ALARM_CRIT_BIT && (data->config & JC42_CFG_CRIT_ONLY))
-		val = 0;
-	return sprintf(buf, "%u\n", (val >> bit) & 1);
-}
-
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, t_input);
-static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp, set_temp, t_crit);
-static SENSOR_DEVICE_ATTR(temp1_min, S_IRUGO, show_temp, set_temp, t_min);
-static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, show_temp, set_temp, t_max);
-
-static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, show_temp_hyst,
-			  set_temp_crit_hyst, t_crit);
-static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IRUGO, show_temp_hyst, NULL, t_max);
-
-static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL,
-			  JC42_ALARM_CRIT_BIT);
-static SENSOR_DEVICE_ATTR(temp1_min_alarm, S_IRUGO, show_alarm, NULL,
-			  JC42_ALARM_MIN_BIT);
-static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL,
-			  JC42_ALARM_MAX_BIT);
-
-static struct attribute *jc42_attributes[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit.dev_attr.attr,
-	&sensor_dev_attr_temp1_min.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp1_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
-	NULL
-};
-
-static umode_t jc42_attribute_mode(struct kobject *kobj,
-				  struct attribute *attr, int index)
-{
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct jc42_data *data = dev_get_drvdata(dev);
+	const struct jc42_data *data = _data;
 	unsigned int config = data->config;
-	bool readonly;
+	umode_t mode = S_IRUGO;
 
-	if (attr == &sensor_dev_attr_temp1_crit.dev_attr.attr)
-		readonly = config & JC42_CFG_TCRIT_LOCK;
-	else if (attr == &sensor_dev_attr_temp1_min.dev_attr.attr ||
-		 attr == &sensor_dev_attr_temp1_max.dev_attr.attr)
-		readonly = config & JC42_CFG_EVENT_LOCK;
-	else if (attr == &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr)
-		readonly = config & (JC42_CFG_EVENT_LOCK | JC42_CFG_TCRIT_LOCK);
-	else
-		readonly = true;
-
-	return S_IRUGO | (readonly ? 0 : S_IWUSR);
+	switch (attr) {
+	case hwmon_temp_min:
+	case hwmon_temp_max:
+		if (!(config & JC42_CFG_EVENT_LOCK))
+			mode |= S_IWUSR;
+		break;
+	case hwmon_temp_crit:
+		if (!(config & JC42_CFG_TCRIT_LOCK))
+			mode |= S_IWUSR;
+		break;
+	case hwmon_temp_crit_hyst:
+		if (!(config & (JC42_CFG_EVENT_LOCK | JC42_CFG_TCRIT_LOCK)))
+			mode |= S_IWUSR;
+		break;
+	case hwmon_temp_input:
+	case hwmon_temp_max_hyst:
+	case hwmon_temp_min_alarm:
+	case hwmon_temp_max_alarm:
+	case hwmon_temp_crit_alarm:
+		break;
+	default:
+		mode = 0;
+		break;
+	}
+	return mode;
 }
 
-static const struct attribute_group jc42_group = {
-	.attrs = jc42_attributes,
-	.is_visible = jc42_attribute_mode,
-};
-__ATTRIBUTE_GROUPS(jc42);
-
 /* Return 0 if detection is successful, -ENODEV otherwise */
 static int jc42_detect(struct i2c_client *client, struct i2c_board_info *info)
 {
@@ -450,6 +427,34 @@
 	return -ENODEV;
 }
 
+static const u32 jc42_temp_config[] = {
+	HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | HWMON_T_CRIT |
+	HWMON_T_MAX_HYST | HWMON_T_CRIT_HYST |
+	HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM,
+	0
+};
+
+static const struct hwmon_channel_info jc42_temp = {
+	.type = hwmon_temp,
+	.config = jc42_temp_config,
+};
+
+static const struct hwmon_channel_info *jc42_info[] = {
+	&jc42_temp,
+	NULL
+};
+
+static const struct hwmon_ops jc42_hwmon_ops = {
+	.is_visible = jc42_is_visible,
+	.read = jc42_read,
+	.write = jc42_write,
+};
+
+static const struct hwmon_chip_info jc42_chip_info = {
+	.ops = &jc42_hwmon_ops,
+	.info = jc42_info,
+};
+
 static int jc42_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
@@ -482,9 +487,9 @@
 	}
 	data->config = config;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
-							   data,
-							   jc42_groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 data, &jc42_chip_info,
+							 NULL);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 92f9d4b..eff3b24 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -28,7 +28,6 @@
 #include <linux/err.h>
 #include <linux/of.h>
 #include <linux/regmap.h>
-#include <linux/thermal.h>
 #include "lm75.h"
 
 
@@ -88,56 +87,75 @@
 	return ((temp >> (16 - resolution)) * 1000) >> (resolution - 8);
 }
 
-/* sysfs attributes for hwmon */
-
-static int lm75_read_temp(void *dev, int *temp)
+static int lm75_read(struct device *dev, enum hwmon_sensor_types type,
+		     u32 attr, int channel, long *val)
 {
 	struct lm75_data *data = dev_get_drvdata(dev);
-	unsigned int _temp;
-	int err;
+	unsigned int regval;
+	int err, reg;
 
-	err = regmap_read(data->regmap, LM75_REG_TEMP, &_temp);
-	if (err < 0)
-		return err;
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			*val = data->sample_time;
+			break;;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+			reg = LM75_REG_TEMP;
+			break;
+		case hwmon_temp_max:
+			reg = LM75_REG_MAX;
+			break;
+		case hwmon_temp_max_hyst:
+			reg = LM75_REG_HYST;
+			break;
+		default:
+			return -EINVAL;
+		}
+		err = regmap_read(data->regmap, reg, &regval);
+		if (err < 0)
+			return err;
 
-	*temp = lm75_reg_to_mc(_temp, data->resolution);
-
+		*val = lm75_reg_to_mc(regval, data->resolution);
+		break;
+	default:
+		return -EINVAL;
+	}
 	return 0;
 }
 
-static ssize_t show_temp(struct device *dev, struct device_attribute *da,
-			 char *buf)
+static int lm75_write(struct device *dev, enum hwmon_sensor_types type,
+		      u32 attr, int channel, long temp)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
 	struct lm75_data *data = dev_get_drvdata(dev);
-	unsigned int temp = 0;
-	int err;
-
-	err = regmap_read(data->regmap, attr->index, &temp);
-	if (err < 0)
-		return err;
-
-	return sprintf(buf, "%ld\n", lm75_reg_to_mc(temp, data->resolution));
-}
-
-static ssize_t set_temp(struct device *dev, struct device_attribute *da,
-			const char *buf, size_t count)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	struct lm75_data *data = dev_get_drvdata(dev);
-	long temp;
-	int error;
 	u8 resolution;
+	int reg;
 
-	error = kstrtol(buf, 10, &temp);
-	if (error)
-		return error;
+	if (type != hwmon_temp)
+		return -EINVAL;
+
+	switch (attr) {
+	case hwmon_temp_max:
+		reg = LM75_REG_MAX;
+		break;
+	case hwmon_temp_max_hyst:
+		reg = LM75_REG_HYST;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	/*
 	 * Resolution of limit registers is assumed to be the same as the
 	 * temperature input register resolution unless given explicitly.
 	 */
-	if (attr->index && data->resolution_limits)
+	if (data->resolution_limits)
 		resolution = data->resolution_limits;
 	else
 		resolution = data->resolution;
@@ -145,46 +163,78 @@
 	temp = clamp_val(temp, LM75_TEMP_MIN, LM75_TEMP_MAX);
 	temp = DIV_ROUND_CLOSEST(temp  << (resolution - 8),
 				 1000) << (16 - resolution);
-	error = regmap_write(data->regmap, attr->index, temp);
-	if (error < 0)
-		return error;
 
-	return count;
+	return regmap_write(data->regmap, reg, temp);
 }
 
-static ssize_t show_update_interval(struct device *dev,
-				    struct device_attribute *da, char *buf)
+static umode_t lm75_is_visible(const void *data, enum hwmon_sensor_types type,
+			       u32 attr, int channel)
 {
-	struct lm75_data *data = dev_get_drvdata(dev);
-
-	return sprintf(buf, "%u\n", data->sample_time);
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			return S_IRUGO;
+		}
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+			return S_IRUGO;
+		case hwmon_temp_max:
+		case hwmon_temp_max_hyst:
+			return S_IRUGO | S_IWUSR;
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
 }
 
-static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO,
-			show_temp, set_temp, LM75_REG_MAX);
-static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IWUSR | S_IRUGO,
-			show_temp, set_temp, LM75_REG_HYST);
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, LM75_REG_TEMP);
-static DEVICE_ATTR(update_interval, S_IRUGO, show_update_interval, NULL);
-
-static struct attribute *lm75_attrs[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
-	&dev_attr_update_interval.attr,
-
-	NULL
-};
-ATTRIBUTE_GROUPS(lm75);
-
-static const struct thermal_zone_of_device_ops lm75_of_thermal_ops = {
-	.get_temp = lm75_read_temp,
-};
-
 /*-----------------------------------------------------------------------*/
 
 /* device probe and removal */
 
+/* chip configuration */
+
+static const u32 lm75_chip_config[] = {
+	HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL,
+	0
+};
+
+static const struct hwmon_channel_info lm75_chip = {
+	.type = hwmon_chip,
+	.config = lm75_chip_config,
+};
+
+static const u32 lm75_temp_config[] = {
+	HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MAX_HYST,
+	0
+};
+
+static const struct hwmon_channel_info lm75_temp = {
+	.type = hwmon_temp,
+	.config = lm75_temp_config,
+};
+
+static const struct hwmon_channel_info *lm75_info[] = {
+	&lm75_chip,
+	&lm75_temp,
+	NULL
+};
+
+static const struct hwmon_ops lm75_hwmon_ops = {
+	.is_visible = lm75_is_visible,
+	.read = lm75_read,
+	.write = lm75_write,
+};
+
+static const struct hwmon_chip_info lm75_chip_info = {
+	.ops = &lm75_hwmon_ops,
+	.info = lm75_info,
+};
+
 static bool lm75_is_writeable_reg(struct device *dev, unsigned int reg)
 {
 	return reg != LM75_REG_TEMP;
@@ -337,15 +387,12 @@
 
 	dev_dbg(dev, "Config %02x\n", new);
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
-							   data, lm75_groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 data, &lm75_chip_info,
+							 NULL);
 	if (IS_ERR(hwmon_dev))
 		return PTR_ERR(hwmon_dev);
 
-	devm_thermal_zone_of_sensor_register(hwmon_dev, 0,
-					     hwmon_dev,
-					     &lm75_of_thermal_ops);
-
 	dev_info(dev, "%s: sensor '%s'\n", dev_name(hwmon_dev), client->name);
 
 	return 0;
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 496e771..322ed92 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -89,7 +89,6 @@
 #include <linux/slab.h>
 #include <linux/jiffies.h>
 #include <linux/i2c.h>
-#include <linux/hwmon-sysfs.h>
 #include <linux/hwmon.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
@@ -326,7 +325,7 @@
 		.alert_alarms = 0x7c,
 		.max_convrate = 9,
 		.reg_local_ext = TMP451_REG_R_LOCAL_TEMPL,
-	}
+	},
 };
 
 /*
@@ -365,7 +364,10 @@
 
 struct lm90_data {
 	struct i2c_client *client;
-	const struct attribute_group *groups[6];
+	u32 channel_config[4];
+	struct hwmon_channel_info temp_info;
+	const struct hwmon_channel_info *info[3];
+	struct hwmon_chip_info chip;
 	struct mutex update_lock;
 	bool valid;		/* true if register values are valid */
 	unsigned long last_updated; /* in jiffies */
@@ -489,11 +491,11 @@
  * client->update_lock must be held when calling this function (unless we are
  * in detection or initialization steps).
  */
-static void lm90_set_convrate(struct i2c_client *client, struct lm90_data *data,
-			      unsigned int interval)
+static int lm90_set_convrate(struct i2c_client *client, struct lm90_data *data,
+			     unsigned int interval)
 {
-	int i;
 	unsigned int update_interval;
+	int i, err;
 
 	/* Shift calculations to avoid rounding errors */
 	interval <<= 6;
@@ -504,8 +506,9 @@
 		if (interval >= update_interval * 3 / 4)
 			break;
 
-	i2c_smbus_write_byte_data(client, LM90_REG_W_CONVRATE, i);
+	err = i2c_smbus_write_byte_data(client, LM90_REG_W_CONVRATE, i);
 	data->update_interval = DIV_ROUND_CLOSEST(update_interval, 64);
+	return err;
 }
 
 static int lm90_update_limits(struct device *dev)
@@ -604,19 +607,17 @@
 	return 0;
 }
 
-static struct lm90_data *lm90_update_device(struct device *dev)
+static int lm90_update_device(struct device *dev)
 {
 	struct lm90_data *data = dev_get_drvdata(dev);
 	struct i2c_client *client = data->client;
 	unsigned long next_update;
-	int val = 0;
-
-	mutex_lock(&data->update_lock);
+	int val;
 
 	if (!data->valid) {
 		val = lm90_update_limits(dev);
 		if (val < 0)
-			goto error;
+			return val;
 	}
 
 	next_update = data->last_updated +
@@ -628,53 +629,55 @@
 
 		val = lm90_read_reg(client, LM90_REG_R_LOCAL_LOW);
 		if (val < 0)
-			goto error;
+			return val;
 		data->temp8[LOCAL_LOW] = val;
 
 		val = lm90_read_reg(client, LM90_REG_R_LOCAL_HIGH);
 		if (val < 0)
-			goto error;
+			return val;
 		data->temp8[LOCAL_HIGH] = val;
 
 		if (data->reg_local_ext) {
 			val = lm90_read16(client, LM90_REG_R_LOCAL_TEMP,
 					  data->reg_local_ext);
 			if (val < 0)
-				goto error;
+				return val;
 			data->temp11[LOCAL_TEMP] = val;
 		} else {
 			val = lm90_read_reg(client, LM90_REG_R_LOCAL_TEMP);
 			if (val < 0)
-				goto error;
+				return val;
 			data->temp11[LOCAL_TEMP] = val << 8;
 		}
 		val = lm90_read16(client, LM90_REG_R_REMOTE_TEMPH,
 				  LM90_REG_R_REMOTE_TEMPL);
 		if (val < 0)
-			goto error;
+			return val;
 		data->temp11[REMOTE_TEMP] = val;
 
 		val = lm90_read_reg(client, LM90_REG_R_STATUS);
 		if (val < 0)
-			goto error;
+			return val;
 		data->alarms = val;	/* lower 8 bit of alarms */
 
 		if (data->kind == max6696) {
 			val = lm90_select_remote_channel(client, data, 1);
 			if (val < 0)
-				goto error;
+				return val;
 
 			val = lm90_read16(client, LM90_REG_R_REMOTE_TEMPH,
 					  LM90_REG_R_REMOTE_TEMPL);
-			if (val < 0)
-				goto error;
+			if (val < 0) {
+				lm90_select_remote_channel(client, data, 0);
+				return val;
+			}
 			data->temp11[REMOTE2_TEMP] = val;
 
 			lm90_select_remote_channel(client, data, 0);
 
 			val = lm90_read_reg(client, MAX6696_REG_R_STATUS2);
 			if (val < 0)
-				goto error;
+				return val;
 			data->alarms |= val << 8;
 		}
 
@@ -686,7 +689,7 @@
 		    !(data->alarms & data->alert_alarms)) {
 			val = lm90_read_reg(client, LM90_REG_R_CONFIG1);
 			if (val < 0)
-				goto error;
+				return val;
 
 			if (val & 0x80) {
 				dev_dbg(&client->dev, "Re-enabling ALERT#\n");
@@ -700,13 +703,7 @@
 		data->valid = true;
 	}
 
-error:
-	mutex_unlock(&data->update_lock);
-
-	if (val < 0)
-		return ERR_PTR(val);
-
-	return data;
+	return 0;
 }
 
 /*
@@ -832,422 +829,12 @@
 	return (val + 125) / 250 * 64;
 }
 
-/*
- * Sysfs stuff
- */
-
-static ssize_t show_temp8(struct device *dev, struct device_attribute *devattr,
-			  char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct lm90_data *data = lm90_update_device(dev);
-	int temp;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	if (data->kind == adt7461 || data->kind == tmp451)
-		temp = temp_from_u8_adt7461(data, data->temp8[attr->index]);
-	else if (data->kind == max6646)
-		temp = temp_from_u8(data->temp8[attr->index]);
-	else
-		temp = temp_from_s8(data->temp8[attr->index]);
-
-	/* +16 degrees offset for temp2 for the LM99 */
-	if (data->kind == lm99 && attr->index == 3)
-		temp += 16000;
-
-	return sprintf(buf, "%d\n", temp);
-}
-
-static ssize_t set_temp8(struct device *dev, struct device_attribute *devattr,
-			 const char *buf, size_t count)
-{
-	static const u8 reg[TEMP8_REG_NUM] = {
-		LM90_REG_W_LOCAL_LOW,
-		LM90_REG_W_LOCAL_HIGH,
-		LM90_REG_W_LOCAL_CRIT,
-		LM90_REG_W_REMOTE_CRIT,
-		MAX6659_REG_W_LOCAL_EMERG,
-		MAX6659_REG_W_REMOTE_EMERG,
-		LM90_REG_W_REMOTE_CRIT,
-		MAX6659_REG_W_REMOTE_EMERG,
-	};
-
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct lm90_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	int nr = attr->index;
-	long val;
-	int err;
-
-	err = kstrtol(buf, 10, &val);
-	if (err < 0)
-		return err;
-
-	/* +16 degrees offset for temp2 for the LM99 */
-	if (data->kind == lm99 && attr->index == 3)
-		val -= 16000;
-
-	mutex_lock(&data->update_lock);
-	if (data->kind == adt7461 || data->kind == tmp451)
-		data->temp8[nr] = temp_to_u8_adt7461(data, val);
-	else if (data->kind == max6646)
-		data->temp8[nr] = temp_to_u8(val);
-	else
-		data->temp8[nr] = temp_to_s8(val);
-
-	lm90_select_remote_channel(client, data, nr >= 6);
-	i2c_smbus_write_byte_data(client, reg[nr], data->temp8[nr]);
-	lm90_select_remote_channel(client, data, 0);
-
-	mutex_unlock(&data->update_lock);
-	return count;
-}
-
-static ssize_t show_temp11(struct device *dev, struct device_attribute *devattr,
-			   char *buf)
-{
-	struct sensor_device_attribute_2 *attr = to_sensor_dev_attr_2(devattr);
-	struct lm90_data *data = lm90_update_device(dev);
-	int temp;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	if (data->kind == adt7461 || data->kind == tmp451)
-		temp = temp_from_u16_adt7461(data, data->temp11[attr->index]);
-	else if (data->kind == max6646)
-		temp = temp_from_u16(data->temp11[attr->index]);
-	else
-		temp = temp_from_s16(data->temp11[attr->index]);
-
-	/* +16 degrees offset for temp2 for the LM99 */
-	if (data->kind == lm99 &&  attr->index <= 2)
-		temp += 16000;
-
-	return sprintf(buf, "%d\n", temp);
-}
-
-static ssize_t set_temp11(struct device *dev, struct device_attribute *devattr,
-			  const char *buf, size_t count)
-{
-	struct {
-		u8 high;
-		u8 low;
-		int channel;
-	} reg[5] = {
-		{ LM90_REG_W_REMOTE_LOWH, LM90_REG_W_REMOTE_LOWL, 0 },
-		{ LM90_REG_W_REMOTE_HIGHH, LM90_REG_W_REMOTE_HIGHL, 0 },
-		{ LM90_REG_W_REMOTE_OFFSH, LM90_REG_W_REMOTE_OFFSL, 0 },
-		{ LM90_REG_W_REMOTE_LOWH, LM90_REG_W_REMOTE_LOWL, 1 },
-		{ LM90_REG_W_REMOTE_HIGHH, LM90_REG_W_REMOTE_HIGHL, 1 }
-	};
-
-	struct sensor_device_attribute_2 *attr = to_sensor_dev_attr_2(devattr);
-	struct lm90_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	int nr = attr->nr;
-	int index = attr->index;
-	long val;
-	int err;
-
-	err = kstrtol(buf, 10, &val);
-	if (err < 0)
-		return err;
-
-	/* +16 degrees offset for temp2 for the LM99 */
-	if (data->kind == lm99 && index <= 2)
-		val -= 16000;
-
-	mutex_lock(&data->update_lock);
-	if (data->kind == adt7461 || data->kind == tmp451)
-		data->temp11[index] = temp_to_u16_adt7461(data, val);
-	else if (data->kind == max6646)
-		data->temp11[index] = temp_to_u8(val) << 8;
-	else if (data->flags & LM90_HAVE_REM_LIMIT_EXT)
-		data->temp11[index] = temp_to_s16(val);
-	else
-		data->temp11[index] = temp_to_s8(val) << 8;
-
-	lm90_select_remote_channel(client, data, reg[nr].channel);
-	i2c_smbus_write_byte_data(client, reg[nr].high,
-				  data->temp11[index] >> 8);
-	if (data->flags & LM90_HAVE_REM_LIMIT_EXT)
-		i2c_smbus_write_byte_data(client, reg[nr].low,
-					  data->temp11[index] & 0xff);
-	lm90_select_remote_channel(client, data, 0);
-
-	mutex_unlock(&data->update_lock);
-	return count;
-}
-
-static ssize_t show_temphyst(struct device *dev,
-			     struct device_attribute *devattr,
-			     char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct lm90_data *data = lm90_update_device(dev);
-	int temp;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	if (data->kind == adt7461 || data->kind == tmp451)
-		temp = temp_from_u8_adt7461(data, data->temp8[attr->index]);
-	else if (data->kind == max6646)
-		temp = temp_from_u8(data->temp8[attr->index]);
-	else
-		temp = temp_from_s8(data->temp8[attr->index]);
-
-	/* +16 degrees offset for temp2 for the LM99 */
-	if (data->kind == lm99 && attr->index == 3)
-		temp += 16000;
-
-	return sprintf(buf, "%d\n", temp - temp_from_s8(data->temp_hyst));
-}
-
-static ssize_t set_temphyst(struct device *dev, struct device_attribute *dummy,
-			    const char *buf, size_t count)
-{
-	struct lm90_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	long val;
-	int err;
-	int temp;
-
-	err = kstrtol(buf, 10, &val);
-	if (err < 0)
-		return err;
-
-	mutex_lock(&data->update_lock);
-	if (data->kind == adt7461 || data->kind == tmp451)
-		temp = temp_from_u8_adt7461(data, data->temp8[LOCAL_CRIT]);
-	else if (data->kind == max6646)
-		temp = temp_from_u8(data->temp8[LOCAL_CRIT]);
-	else
-		temp = temp_from_s8(data->temp8[LOCAL_CRIT]);
-
-	data->temp_hyst = hyst_to_reg(temp - val);
-	i2c_smbus_write_byte_data(client, LM90_REG_W_TCRIT_HYST,
-				  data->temp_hyst);
-	mutex_unlock(&data->update_lock);
-	return count;
-}
-
-static ssize_t show_alarms(struct device *dev, struct device_attribute *dummy,
-			   char *buf)
-{
-	struct lm90_data *data = lm90_update_device(dev);
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	return sprintf(buf, "%d\n", data->alarms);
-}
-
-static ssize_t show_alarm(struct device *dev, struct device_attribute
-			  *devattr, char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct lm90_data *data = lm90_update_device(dev);
-	int bitnr = attr->index;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	return sprintf(buf, "%d\n", (data->alarms >> bitnr) & 1);
-}
-
-static ssize_t show_update_interval(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	struct lm90_data *data = dev_get_drvdata(dev);
-
-	return sprintf(buf, "%u\n", data->update_interval);
-}
-
-static ssize_t set_update_interval(struct device *dev,
-				   struct device_attribute *attr,
-				   const char *buf, size_t count)
-{
-	struct lm90_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	unsigned long val;
-	int err;
-
-	err = kstrtoul(buf, 10, &val);
-	if (err)
-		return err;
-
-	mutex_lock(&data->update_lock);
-	lm90_set_convrate(client, data, clamp_val(val, 0, 100000));
-	mutex_unlock(&data->update_lock);
-
-	return count;
-}
-
-static SENSOR_DEVICE_ATTR_2(temp1_input, S_IRUGO, show_temp11, NULL,
-	0, LOCAL_TEMP);
-static SENSOR_DEVICE_ATTR_2(temp2_input, S_IRUGO, show_temp11, NULL,
-	0, REMOTE_TEMP);
-static SENSOR_DEVICE_ATTR(temp1_min, S_IWUSR | S_IRUGO, show_temp8,
-	set_temp8, LOCAL_LOW);
-static SENSOR_DEVICE_ATTR_2(temp2_min, S_IWUSR | S_IRUGO, show_temp11,
-	set_temp11, 0, REMOTE_LOW);
-static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, show_temp8,
-	set_temp8, LOCAL_HIGH);
-static SENSOR_DEVICE_ATTR_2(temp2_max, S_IWUSR | S_IRUGO, show_temp11,
-	set_temp11, 1, REMOTE_HIGH);
-static SENSOR_DEVICE_ATTR(temp1_crit, S_IWUSR | S_IRUGO, show_temp8,
-	set_temp8, LOCAL_CRIT);
-static SENSOR_DEVICE_ATTR(temp2_crit, S_IWUSR | S_IRUGO, show_temp8,
-	set_temp8, REMOTE_CRIT);
-static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IWUSR | S_IRUGO, show_temphyst,
-	set_temphyst, LOCAL_CRIT);
-static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, show_temphyst, NULL,
-	REMOTE_CRIT);
-static SENSOR_DEVICE_ATTR_2(temp2_offset, S_IWUSR | S_IRUGO, show_temp11,
-	set_temp11, 2, REMOTE_OFFSET);
-
-/* Individual alarm files */
-static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL, 0);
-static SENSOR_DEVICE_ATTR(temp2_crit_alarm, S_IRUGO, show_alarm, NULL, 1);
-static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL, 2);
-static SENSOR_DEVICE_ATTR(temp2_min_alarm, S_IRUGO, show_alarm, NULL, 3);
-static SENSOR_DEVICE_ATTR(temp2_max_alarm, S_IRUGO, show_alarm, NULL, 4);
-static SENSOR_DEVICE_ATTR(temp1_min_alarm, S_IRUGO, show_alarm, NULL, 5);
-static SENSOR_DEVICE_ATTR(temp1_max_alarm, S_IRUGO, show_alarm, NULL, 6);
-/* Raw alarm file for compatibility */
-static DEVICE_ATTR(alarms, S_IRUGO, show_alarms, NULL);
-
-static DEVICE_ATTR(update_interval, S_IRUGO | S_IWUSR, show_update_interval,
-		   set_update_interval);
-
-static struct attribute *lm90_attributes[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp2_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_min.dev_attr.attr,
-	&sensor_dev_attr_temp2_min.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
-	&sensor_dev_attr_temp2_max.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit.dev_attr.attr,
-	&sensor_dev_attr_temp2_crit.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
-
-	&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp2_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp2_fault.dev_attr.attr,
-	&sensor_dev_attr_temp2_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp2_max_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp1_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_alarm.dev_attr.attr,
-	&dev_attr_alarms.attr,
-	&dev_attr_update_interval.attr,
-	NULL
-};
-
-static const struct attribute_group lm90_group = {
-	.attrs = lm90_attributes,
-};
-
-static struct attribute *lm90_temp2_offset_attributes[] = {
-	&sensor_dev_attr_temp2_offset.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group lm90_temp2_offset_group = {
-	.attrs = lm90_temp2_offset_attributes,
-};
-
-/*
- * Additional attributes for devices with emergency sensors
- */
-static SENSOR_DEVICE_ATTR(temp1_emergency, S_IWUSR | S_IRUGO, show_temp8,
-	set_temp8, LOCAL_EMERG);
-static SENSOR_DEVICE_ATTR(temp2_emergency, S_IWUSR | S_IRUGO, show_temp8,
-	set_temp8, REMOTE_EMERG);
-static SENSOR_DEVICE_ATTR(temp1_emergency_hyst, S_IRUGO, show_temphyst,
-			  NULL, LOCAL_EMERG);
-static SENSOR_DEVICE_ATTR(temp2_emergency_hyst, S_IRUGO, show_temphyst,
-			  NULL, REMOTE_EMERG);
-
-static struct attribute *lm90_emergency_attributes[] = {
-	&sensor_dev_attr_temp1_emergency.dev_attr.attr,
-	&sensor_dev_attr_temp2_emergency.dev_attr.attr,
-	&sensor_dev_attr_temp1_emergency_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp2_emergency_hyst.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group lm90_emergency_group = {
-	.attrs = lm90_emergency_attributes,
-};
-
-static SENSOR_DEVICE_ATTR(temp1_emergency_alarm, S_IRUGO, show_alarm, NULL, 15);
-static SENSOR_DEVICE_ATTR(temp2_emergency_alarm, S_IRUGO, show_alarm, NULL, 13);
-
-static struct attribute *lm90_emergency_alarm_attributes[] = {
-	&sensor_dev_attr_temp1_emergency_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp2_emergency_alarm.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group lm90_emergency_alarm_group = {
-	.attrs = lm90_emergency_alarm_attributes,
-};
-
-/*
- * Additional attributes for devices with 3 temperature sensors
- */
-static SENSOR_DEVICE_ATTR_2(temp3_input, S_IRUGO, show_temp11, NULL,
-	0, REMOTE2_TEMP);
-static SENSOR_DEVICE_ATTR_2(temp3_min, S_IWUSR | S_IRUGO, show_temp11,
-	set_temp11, 3, REMOTE2_LOW);
-static SENSOR_DEVICE_ATTR_2(temp3_max, S_IWUSR | S_IRUGO, show_temp11,
-	set_temp11, 4, REMOTE2_HIGH);
-static SENSOR_DEVICE_ATTR(temp3_crit, S_IWUSR | S_IRUGO, show_temp8,
-	set_temp8, REMOTE2_CRIT);
-static SENSOR_DEVICE_ATTR(temp3_crit_hyst, S_IRUGO, show_temphyst, NULL,
-	REMOTE2_CRIT);
-static SENSOR_DEVICE_ATTR(temp3_emergency, S_IWUSR | S_IRUGO, show_temp8,
-	set_temp8, REMOTE2_EMERG);
-static SENSOR_DEVICE_ATTR(temp3_emergency_hyst, S_IRUGO, show_temphyst,
-			  NULL, REMOTE2_EMERG);
-
-static SENSOR_DEVICE_ATTR(temp3_crit_alarm, S_IRUGO, show_alarm, NULL, 9);
-static SENSOR_DEVICE_ATTR(temp3_fault, S_IRUGO, show_alarm, NULL, 10);
-static SENSOR_DEVICE_ATTR(temp3_min_alarm, S_IRUGO, show_alarm, NULL, 11);
-static SENSOR_DEVICE_ATTR(temp3_max_alarm, S_IRUGO, show_alarm, NULL, 12);
-static SENSOR_DEVICE_ATTR(temp3_emergency_alarm, S_IRUGO, show_alarm, NULL, 14);
-
-static struct attribute *lm90_temp3_attributes[] = {
-	&sensor_dev_attr_temp3_input.dev_attr.attr,
-	&sensor_dev_attr_temp3_min.dev_attr.attr,
-	&sensor_dev_attr_temp3_max.dev_attr.attr,
-	&sensor_dev_attr_temp3_crit.dev_attr.attr,
-	&sensor_dev_attr_temp3_crit_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp3_emergency.dev_attr.attr,
-	&sensor_dev_attr_temp3_emergency_hyst.dev_attr.attr,
-
-	&sensor_dev_attr_temp3_fault.dev_attr.attr,
-	&sensor_dev_attr_temp3_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp3_max_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp3_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp3_emergency_alarm.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group lm90_temp3_group = {
-	.attrs = lm90_temp3_attributes,
-};
-
 /* pec used for ADM1032 only */
 static ssize_t show_pec(struct device *dev, struct device_attribute *dummy,
 			char *buf)
 {
 	struct i2c_client *client = to_i2c_client(dev);
+
 	return sprintf(buf, "%d\n", !!(client->flags & I2C_CLIENT_PEC));
 }
 
@@ -1278,9 +865,431 @@
 
 static DEVICE_ATTR(pec, S_IWUSR | S_IRUGO, show_pec, set_pec);
 
-/*
- * Real code
- */
+static int lm90_get_temp11(struct lm90_data *data, int index)
+{
+	s16 temp11 = data->temp11[index];
+	int temp;
+
+	if (data->kind == adt7461 || data->kind == tmp451)
+		temp = temp_from_u16_adt7461(data, temp11);
+	else if (data->kind == max6646)
+		temp = temp_from_u16(temp11);
+	else
+		temp = temp_from_s16(temp11);
+
+	/* +16 degrees offset for temp2 for the LM99 */
+	if (data->kind == lm99 && index <= 2)
+		temp += 16000;
+
+	return temp;
+}
+
+static int lm90_set_temp11(struct lm90_data *data, int index, long val)
+{
+	static struct reg {
+		u8 high;
+		u8 low;
+	} reg[] = {
+	[REMOTE_LOW] = { LM90_REG_W_REMOTE_LOWH, LM90_REG_W_REMOTE_LOWL },
+	[REMOTE_HIGH] = { LM90_REG_W_REMOTE_HIGHH, LM90_REG_W_REMOTE_HIGHL },
+	[REMOTE_OFFSET] = { LM90_REG_W_REMOTE_OFFSH, LM90_REG_W_REMOTE_OFFSL },
+	[REMOTE2_LOW] = { LM90_REG_W_REMOTE_LOWH, LM90_REG_W_REMOTE_LOWL },
+	[REMOTE2_HIGH] = { LM90_REG_W_REMOTE_HIGHH, LM90_REG_W_REMOTE_HIGHL }
+	};
+	struct i2c_client *client = data->client;
+	struct reg *regp = &reg[index];
+	int err;
+
+	/* +16 degrees offset for temp2 for the LM99 */
+	if (data->kind == lm99 && index <= 2)
+		val -= 16000;
+
+	if (data->kind == adt7461 || data->kind == tmp451)
+		data->temp11[index] = temp_to_u16_adt7461(data, val);
+	else if (data->kind == max6646)
+		data->temp11[index] = temp_to_u8(val) << 8;
+	else if (data->flags & LM90_HAVE_REM_LIMIT_EXT)
+		data->temp11[index] = temp_to_s16(val);
+	else
+		data->temp11[index] = temp_to_s8(val) << 8;
+
+	lm90_select_remote_channel(client, data, index >= 3);
+	err = i2c_smbus_write_byte_data(client, regp->high,
+				  data->temp11[index] >> 8);
+	if (err < 0)
+		return err;
+	if (data->flags & LM90_HAVE_REM_LIMIT_EXT)
+		err = i2c_smbus_write_byte_data(client, regp->low,
+						data->temp11[index] & 0xff);
+
+	lm90_select_remote_channel(client, data, 0);
+	return err;
+}
+
+static int lm90_get_temp8(struct lm90_data *data, int index)
+{
+	s8 temp8 = data->temp8[index];
+	int temp;
+
+	if (data->kind == adt7461 || data->kind == tmp451)
+		temp = temp_from_u8_adt7461(data, temp8);
+	else if (data->kind == max6646)
+		temp = temp_from_u8(temp8);
+	else
+		temp = temp_from_s8(temp8);
+
+	/* +16 degrees offset for temp2 for the LM99 */
+	if (data->kind == lm99 && index == 3)
+		temp += 16000;
+
+	return temp;
+}
+
+static int lm90_set_temp8(struct lm90_data *data, int index, long val)
+{
+	static const u8 reg[TEMP8_REG_NUM] = {
+		LM90_REG_W_LOCAL_LOW,
+		LM90_REG_W_LOCAL_HIGH,
+		LM90_REG_W_LOCAL_CRIT,
+		LM90_REG_W_REMOTE_CRIT,
+		MAX6659_REG_W_LOCAL_EMERG,
+		MAX6659_REG_W_REMOTE_EMERG,
+		LM90_REG_W_REMOTE_CRIT,
+		MAX6659_REG_W_REMOTE_EMERG,
+	};
+	struct i2c_client *client = data->client;
+	int err;
+
+	/* +16 degrees offset for temp2 for the LM99 */
+	if (data->kind == lm99 && index == 3)
+		val -= 16000;
+
+	if (data->kind == adt7461 || data->kind == tmp451)
+		data->temp8[index] = temp_to_u8_adt7461(data, val);
+	else if (data->kind == max6646)
+		data->temp8[index] = temp_to_u8(val);
+	else
+		data->temp8[index] = temp_to_s8(val);
+
+	lm90_select_remote_channel(client, data, index >= 6);
+	err = i2c_smbus_write_byte_data(client, reg[index], data->temp8[index]);
+	lm90_select_remote_channel(client, data, 0);
+
+	return err;
+}
+
+static int lm90_get_temphyst(struct lm90_data *data, int index)
+{
+	int temp;
+
+	if (data->kind == adt7461 || data->kind == tmp451)
+		temp = temp_from_u8_adt7461(data, data->temp8[index]);
+	else if (data->kind == max6646)
+		temp = temp_from_u8(data->temp8[index]);
+	else
+		temp = temp_from_s8(data->temp8[index]);
+
+	/* +16 degrees offset for temp2 for the LM99 */
+	if (data->kind == lm99 && index == 3)
+		temp += 16000;
+
+	return temp - temp_from_s8(data->temp_hyst);
+}
+
+static int lm90_set_temphyst(struct lm90_data *data, long val)
+{
+	struct i2c_client *client = data->client;
+	int temp;
+	int err;
+
+	if (data->kind == adt7461 || data->kind == tmp451)
+		temp = temp_from_u8_adt7461(data, data->temp8[LOCAL_CRIT]);
+	else if (data->kind == max6646)
+		temp = temp_from_u8(data->temp8[LOCAL_CRIT]);
+	else
+		temp = temp_from_s8(data->temp8[LOCAL_CRIT]);
+
+	data->temp_hyst = hyst_to_reg(temp - val);
+	err = i2c_smbus_write_byte_data(client, LM90_REG_W_TCRIT_HYST,
+					data->temp_hyst);
+	return err;
+}
+
+static const u8 lm90_temp_index[3] = {
+	LOCAL_TEMP, REMOTE_TEMP, REMOTE2_TEMP
+};
+
+static const u8 lm90_temp_min_index[3] = {
+	LOCAL_LOW, REMOTE_LOW, REMOTE2_LOW
+};
+
+static const u8 lm90_temp_max_index[3] = {
+	LOCAL_HIGH, REMOTE_HIGH, REMOTE2_HIGH
+};
+
+static const u8 lm90_temp_crit_index[3] = {
+	LOCAL_CRIT, REMOTE_CRIT, REMOTE2_CRIT
+};
+
+static const u8 lm90_temp_emerg_index[3] = {
+	LOCAL_EMERG, REMOTE_EMERG, REMOTE2_EMERG
+};
+
+static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 };
+static const u8 lm90_max_alarm_bits[3] = { 0, 4, 12 };
+static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 };
+static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 };
+static const u8 lm90_fault_bits[3] = { 0, 2, 10 };
+
+static int lm90_temp_read(struct device *dev, u32 attr, int channel, long *val)
+{
+	struct lm90_data *data = dev_get_drvdata(dev);
+	int err;
+
+	mutex_lock(&data->update_lock);
+	err = lm90_update_device(dev);
+	mutex_unlock(&data->update_lock);
+	if (err)
+		return err;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		*val = lm90_get_temp11(data, lm90_temp_index[channel]);
+		break;
+	case hwmon_temp_min_alarm:
+		*val = (data->alarms >> lm90_min_alarm_bits[channel]) & 1;
+		break;
+	case hwmon_temp_max_alarm:
+		*val = (data->alarms >> lm90_max_alarm_bits[channel]) & 1;
+		break;
+	case hwmon_temp_crit_alarm:
+		*val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1;
+		break;
+	case hwmon_temp_emergency_alarm:
+		*val = (data->alarms >> lm90_emergency_alarm_bits[channel]) & 1;
+		break;
+	case hwmon_temp_fault:
+		*val = (data->alarms >> lm90_fault_bits[channel]) & 1;
+		break;
+	case hwmon_temp_min:
+		if (channel == 0)
+			*val = lm90_get_temp8(data,
+					      lm90_temp_min_index[channel]);
+		else
+			*val = lm90_get_temp11(data,
+					       lm90_temp_min_index[channel]);
+		break;
+	case hwmon_temp_max:
+		if (channel == 0)
+			*val = lm90_get_temp8(data,
+					      lm90_temp_max_index[channel]);
+		else
+			*val = lm90_get_temp11(data,
+					       lm90_temp_max_index[channel]);
+		break;
+	case hwmon_temp_crit:
+		*val = lm90_get_temp8(data, lm90_temp_crit_index[channel]);
+		break;
+	case hwmon_temp_crit_hyst:
+		*val = lm90_get_temphyst(data, lm90_temp_crit_index[channel]);
+		break;
+	case hwmon_temp_emergency:
+		*val = lm90_get_temp8(data, lm90_temp_emerg_index[channel]);
+		break;
+	case hwmon_temp_emergency_hyst:
+		*val = lm90_get_temphyst(data, lm90_temp_emerg_index[channel]);
+		break;
+	case hwmon_temp_offset:
+		*val = lm90_get_temp11(data, REMOTE_OFFSET);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int lm90_temp_write(struct device *dev, u32 attr, int channel, long val)
+{
+	struct lm90_data *data = dev_get_drvdata(dev);
+	int err;
+
+	mutex_lock(&data->update_lock);
+
+	err = lm90_update_device(dev);
+	if (err)
+		goto error;
+
+	switch (attr) {
+	case hwmon_temp_min:
+		if (channel == 0)
+			err = lm90_set_temp8(data,
+					      lm90_temp_min_index[channel],
+					      val);
+		else
+			err = lm90_set_temp11(data,
+					      lm90_temp_min_index[channel],
+					      val);
+		break;
+	case hwmon_temp_max:
+		if (channel == 0)
+			err = lm90_set_temp8(data,
+					     lm90_temp_max_index[channel],
+					     val);
+		else
+			err = lm90_set_temp11(data,
+					      lm90_temp_max_index[channel],
+					      val);
+		break;
+	case hwmon_temp_crit:
+		err = lm90_set_temp8(data, lm90_temp_crit_index[channel], val);
+		break;
+	case hwmon_temp_crit_hyst:
+		err = lm90_set_temphyst(data, val);
+		break;
+	case hwmon_temp_emergency:
+		err = lm90_set_temp8(data, lm90_temp_emerg_index[channel], val);
+		break;
+	case hwmon_temp_offset:
+		err = lm90_set_temp11(data, REMOTE_OFFSET, val);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+error:
+	mutex_unlock(&data->update_lock);
+
+	return err;
+}
+
+static umode_t lm90_temp_is_visible(const void *data, u32 attr, int channel)
+{
+	switch (attr) {
+	case hwmon_temp_input:
+	case hwmon_temp_min_alarm:
+	case hwmon_temp_max_alarm:
+	case hwmon_temp_crit_alarm:
+	case hwmon_temp_emergency_alarm:
+	case hwmon_temp_emergency_hyst:
+	case hwmon_temp_fault:
+		return S_IRUGO;
+	case hwmon_temp_min:
+	case hwmon_temp_max:
+	case hwmon_temp_crit:
+	case hwmon_temp_emergency:
+	case hwmon_temp_offset:
+		return S_IRUGO | S_IWUSR;
+	case hwmon_temp_crit_hyst:
+		if (channel == 0)
+			return S_IRUGO | S_IWUSR;
+		return S_IRUGO;
+	default:
+		return 0;
+	}
+}
+
+static int lm90_chip_read(struct device *dev, u32 attr, int channel, long *val)
+{
+	struct lm90_data *data = dev_get_drvdata(dev);
+	int err;
+
+	mutex_lock(&data->update_lock);
+	err = lm90_update_device(dev);
+	mutex_unlock(&data->update_lock);
+	if (err)
+		return err;
+
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		*val = data->update_interval;
+		break;
+	case hwmon_chip_alarms:
+		*val = data->alarms;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int lm90_chip_write(struct device *dev, u32 attr, int channel, long val)
+{
+	struct lm90_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	int err;
+
+	mutex_lock(&data->update_lock);
+
+	err = lm90_update_device(dev);
+	if (err)
+		goto error;
+
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		err = lm90_set_convrate(client, data,
+					clamp_val(val, 0, 100000));
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+error:
+	mutex_unlock(&data->update_lock);
+
+	return err;
+}
+
+static umode_t lm90_chip_is_visible(const void *data, u32 attr, int channel)
+{
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		return S_IRUGO | S_IWUSR;
+	case hwmon_chip_alarms:
+		return S_IRUGO;
+	default:
+		return 0;
+	}
+}
+
+static int lm90_read(struct device *dev, enum hwmon_sensor_types type,
+		     u32 attr, int channel, long *val)
+{
+	switch (type) {
+	case hwmon_chip:
+		return lm90_chip_read(dev, attr, channel, val);
+	case hwmon_temp:
+		return lm90_temp_read(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int lm90_write(struct device *dev, enum hwmon_sensor_types type,
+		      u32 attr, int channel, long val)
+{
+	switch (type) {
+	case hwmon_chip:
+		return lm90_chip_write(dev, attr, channel, val);
+	case hwmon_temp:
+		return lm90_temp_write(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t lm90_is_visible(const void *data, enum hwmon_sensor_types type,
+			       u32 attr, int channel)
+{
+	switch (type) {
+	case hwmon_chip:
+		return lm90_chip_is_visible(data, attr, channel);
+	case hwmon_temp:
+		return lm90_temp_is_visible(data, attr, channel);
+	default:
+		return 0;
+	}
+}
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
 static int lm90_detect(struct i2c_client *client,
@@ -1617,15 +1626,32 @@
 	regulator_disable(regulator);
 }
 
+static const u32 lm90_chip_config[] = {
+	HWMON_C_REGISTER_TZ | HWMON_C_UPDATE_INTERVAL | HWMON_C_ALARMS,
+	0
+};
+
+static const struct hwmon_channel_info lm90_chip_info = {
+	.type = hwmon_chip,
+	.config = lm90_chip_config,
+};
+
+
+static const struct hwmon_ops lm90_ops = {
+	.is_visible = lm90_is_visible,
+	.read = lm90_read,
+	.write = lm90_write,
+};
+
 static int lm90_probe(struct i2c_client *client,
 		      const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
 	struct i2c_adapter *adapter = to_i2c_adapter(dev->parent);
-	struct lm90_data *data;
+	struct hwmon_channel_info *info;
 	struct regulator *regulator;
 	struct device *hwmon_dev;
-	int groups = 0;
+	struct lm90_data *data;
 	int err;
 
 	regulator = devm_regulator_get(dev, "vcc");
@@ -1665,6 +1691,49 @@
 
 	/* Set chip capabilities */
 	data->flags = lm90_params[data->kind].flags;
+
+	data->chip.ops = &lm90_ops;
+	data->chip.info = data->info;
+
+	data->info[0] = &lm90_chip_info;
+	data->info[1] = &data->temp_info;
+
+	info = &data->temp_info;
+	info->type = hwmon_temp;
+	info->config = data->channel_config;
+
+	data->channel_config[0] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX |
+		HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM |
+		HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM;
+	data->channel_config[1] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX |
+		HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM |
+		HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT;
+
+	if (data->flags & LM90_HAVE_OFFSET)
+		data->channel_config[1] |= HWMON_T_OFFSET;
+
+	if (data->flags & LM90_HAVE_EMERGENCY) {
+		data->channel_config[0] |= HWMON_T_EMERGENCY |
+			HWMON_T_EMERGENCY_HYST;
+		data->channel_config[1] |= HWMON_T_EMERGENCY |
+			HWMON_T_EMERGENCY_HYST;
+	}
+
+	if (data->flags & LM90_HAVE_EMERGENCY_ALARM) {
+		data->channel_config[0] |= HWMON_T_EMERGENCY_ALARM;
+		data->channel_config[1] |= HWMON_T_EMERGENCY_ALARM;
+	}
+
+	if (data->flags & LM90_HAVE_TEMP3) {
+		data->channel_config[2] = HWMON_T_INPUT |
+			HWMON_T_MIN | HWMON_T_MAX |
+			HWMON_T_CRIT | HWMON_T_CRIT_HYST |
+			HWMON_T_EMERGENCY | HWMON_T_EMERGENCY_HYST |
+			HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM |
+			HWMON_T_CRIT_ALARM | HWMON_T_EMERGENCY_ALARM |
+			HWMON_T_FAULT;
+	}
+
 	data->reg_local_ext = lm90_params[data->kind].reg_local_ext;
 
 	/* Set maximum conversion rate */
@@ -1677,21 +1746,10 @@
 		return err;
 	}
 
-	/* Register sysfs hooks */
-	data->groups[groups++] = &lm90_group;
-
-	if (data->flags & LM90_HAVE_OFFSET)
-		data->groups[groups++] = &lm90_temp2_offset_group;
-
-	if (data->flags & LM90_HAVE_EMERGENCY)
-		data->groups[groups++] = &lm90_emergency_group;
-
-	if (data->flags & LM90_HAVE_EMERGENCY_ALARM)
-		data->groups[groups++] = &lm90_emergency_alarm_group;
-
-	if (data->flags & LM90_HAVE_TEMP3)
-		data->groups[groups++] = &lm90_temp3_group;
-
+	/*
+	 * The 'pec' attribute is attached to the i2c device and thus created
+	 * separately.
+	 */
 	if (client->flags & I2C_CLIENT_PEC) {
 		err = device_create_file(dev, &dev_attr_pec);
 		if (err)
@@ -1701,8 +1759,9 @@
 			return err;
 	}
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
-							   data, data->groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 data, &data->chip,
+							 NULL);
 	if (IS_ERR(hwmon_dev))
 		return PTR_ERR(hwmon_dev);
 
diff --git a/drivers/hwmon/lm95241.c b/drivers/hwmon/lm95241.c
index cdf19ad..8c573e6 100644
--- a/drivers/hwmon/lm95241.c
+++ b/drivers/hwmon/lm95241.c
@@ -15,22 +15,17 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
+#include <linux/bitops.h>
 #include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/hwmon.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/sysfs.h>
+#include <linux/slab.h>
 
 #define DEVNAME "lm95241"
 
@@ -54,26 +49,25 @@
 #define LM95241_REG_RW_REMOTE_MODEL	0x30
 
 /* LM95241 specific bitfields */
-#define CFG_STOP 0x40
-#define CFG_CR0076 0x00
-#define CFG_CR0182 0x10
-#define CFG_CR1000 0x20
-#define CFG_CR2700 0x30
-#define R1MS_SHIFT 0
-#define R2MS_SHIFT 2
-#define R1MS_MASK (0x01 << (R1MS_SHIFT))
-#define R2MS_MASK (0x01 << (R2MS_SHIFT))
-#define R1DF_SHIFT 1
-#define R2DF_SHIFT 2
-#define R1DF_MASK (0x01 << (R1DF_SHIFT))
-#define R2DF_MASK (0x01 << (R2DF_SHIFT))
-#define R1FE_MASK 0x01
-#define R2FE_MASK 0x05
-#define TT1_SHIFT 0
-#define TT2_SHIFT 4
-#define TT_OFF 0
-#define TT_ON 1
-#define TT_MASK 7
+#define CFG_STOP	BIT(6)
+#define CFG_CR0076	0x00
+#define CFG_CR0182	BIT(4)
+#define CFG_CR1000	BIT(5)
+#define CFG_CR2700	(BIT(4) | BIT(5))
+#define CFG_CRMASK	(BIT(4) | BIT(5))
+#define R1MS_MASK	BIT(0)
+#define R2MS_MASK	BIT(2)
+#define R1DF_MASK	BIT(1)
+#define R2DF_MASK	BIT(2)
+#define R1FE_MASK	BIT(0)
+#define R2FE_MASK	BIT(2)
+#define R1DM		BIT(0)
+#define R2DM		BIT(1)
+#define TT1_SHIFT	0
+#define TT2_SHIFT	4
+#define TT_OFF		0
+#define TT_ON		1
+#define TT_MASK		7
 #define NATSEMI_MAN_ID	0x01
 #define LM95231_CHIP_ID	0xA1
 #define LM95241_CHIP_ID	0xA4
@@ -91,11 +85,12 @@
 struct lm95241_data {
 	struct i2c_client *client;
 	struct mutex update_lock;
-	unsigned long last_updated, interval;	/* in jiffies */
+	unsigned long last_updated;	/* in jiffies */
+	unsigned long interval;		/* in milli-seconds */
 	char valid;		/* zero until following fields are valid */
 	/* registers values */
 	u8 temp[ARRAY_SIZE(lm95241_reg_address)];
-	u8 config, model, trutherm;
+	u8 status, config, model, trutherm;
 };
 
 /* Conversions */
@@ -118,7 +113,8 @@
 
 	mutex_lock(&data->update_lock);
 
-	if (time_after(jiffies, data->last_updated + data->interval) ||
+	if (time_after(jiffies, data->last_updated
+		       + msecs_to_jiffies(data->interval)) ||
 	    !data->valid) {
 		int i;
 
@@ -127,6 +123,9 @@
 			data->temp[i]
 			  = i2c_smbus_read_byte_data(client,
 						     lm95241_reg_address[i]);
+
+		data->status = i2c_smbus_read_byte_data(client,
+							LM95241_REG_R_STATUS);
 		data->last_updated = jiffies;
 		data->valid = 1;
 	}
@@ -136,197 +135,241 @@
 	return data;
 }
 
-/* Sysfs stuff */
-static ssize_t show_input(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct lm95241_data *data = lm95241_update_device(dev);
-	int index = to_sensor_dev_attr(attr)->index;
-
-	return snprintf(buf, PAGE_SIZE - 1, "%d\n",
-			index == 0 || (data->config & (1 << (index / 2))) ?
-		temp_from_reg_signed(data->temp[index], data->temp[index + 1]) :
-		temp_from_reg_unsigned(data->temp[index],
-				       data->temp[index + 1]));
-}
-
-static ssize_t show_type(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+static int lm95241_read_chip(struct device *dev, u32 attr, int channel,
+			     long *val)
 {
 	struct lm95241_data *data = dev_get_drvdata(dev);
 
-	return snprintf(buf, PAGE_SIZE - 1,
-		data->model & to_sensor_dev_attr(attr)->index ? "1\n" : "2\n");
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		*val = data->interval;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t set_type(struct device *dev, struct device_attribute *attr,
-			const char *buf, size_t count)
+static int lm95241_read_temp(struct device *dev, u32 attr, int channel,
+			     long *val)
+{
+	struct lm95241_data *data = lm95241_update_device(dev);
+
+	switch (attr) {
+	case hwmon_temp_input:
+		if (!channel || (data->config & BIT(channel - 1)))
+			*val = temp_from_reg_signed(data->temp[channel * 2],
+						data->temp[channel * 2 + 1]);
+		else
+			*val = temp_from_reg_unsigned(data->temp[channel * 2],
+						data->temp[channel * 2 + 1]);
+		return 0;
+	case hwmon_temp_min:
+		if (channel == 1)
+			*val = (data->config & R1DF_MASK) ? -128000 : 0;
+		else
+			*val = (data->config & R2DF_MASK) ? -128000 : 0;
+		return 0;
+	case hwmon_temp_max:
+		if (channel == 1)
+			*val = (data->config & R1DF_MASK) ? 127875 : 255875;
+		else
+			*val = (data->config & R2DF_MASK) ? 127875 : 255875;
+		return 0;
+	case hwmon_temp_type:
+		if (channel == 1)
+			*val = (data->model & R1MS_MASK) ? 1 : 2;
+		else
+			*val = (data->model & R2MS_MASK) ? 1 : 2;
+		return 0;
+	case hwmon_temp_fault:
+		if (channel == 1)
+			*val = !!(data->status & R1DM);
+		else
+			*val = !!(data->status & R2DM);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int lm95241_read(struct device *dev, enum hwmon_sensor_types type,
+			u32 attr, int channel, long *val)
+{
+	switch (type) {
+	case hwmon_chip:
+		return lm95241_read_chip(dev, attr, channel, val);
+	case hwmon_temp:
+		return lm95241_read_temp(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int lm95241_write_chip(struct device *dev, u32 attr, int channel,
+			      long val)
+{
+	struct lm95241_data *data = dev_get_drvdata(dev);
+	int convrate;
+	u8 config;
+	int ret;
+
+	mutex_lock(&data->update_lock);
+
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		config = data->config & ~CFG_CRMASK;
+		if (val < 130) {
+			convrate = 76;
+			config |= CFG_CR0076;
+		} else if (val < 590) {
+			convrate = 182;
+			config |= CFG_CR0182;
+		} else if (val < 1850) {
+			convrate = 1000;
+			config |= CFG_CR1000;
+		} else {
+			convrate = 2700;
+			config |= CFG_CR2700;
+		}
+		data->interval = convrate;
+		data->config = config;
+		ret = i2c_smbus_write_byte_data(data->client,
+						LM95241_REG_RW_CONFIG, config);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
+	}
+	mutex_unlock(&data->update_lock);
+	return ret;
+}
+
+static int lm95241_write_temp(struct device *dev, u32 attr, int channel,
+			      long val)
 {
 	struct lm95241_data *data = dev_get_drvdata(dev);
 	struct i2c_client *client = data->client;
-	unsigned long val;
-	int shift;
-	u8 mask = to_sensor_dev_attr(attr)->index;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-	if (val != 1 && val != 2)
-		return -EINVAL;
-
-	shift = mask == R1MS_MASK ? TT1_SHIFT : TT2_SHIFT;
+	int ret;
 
 	mutex_lock(&data->update_lock);
 
-	data->trutherm &= ~(TT_MASK << shift);
-	if (val == 1) {
-		data->model |= mask;
-		data->trutherm |= (TT_ON << shift);
-	} else {
-		data->model &= ~mask;
-		data->trutherm |= (TT_OFF << shift);
+	switch (attr) {
+	case hwmon_temp_min:
+		if (channel == 1) {
+			if (val < 0)
+				data->config |= R1DF_MASK;
+			else
+				data->config &= ~R1DF_MASK;
+		} else {
+			if (val < 0)
+				data->config |= R2DF_MASK;
+			else
+				data->config &= ~R2DF_MASK;
+		}
+		data->valid = 0;
+		ret = i2c_smbus_write_byte_data(client, LM95241_REG_RW_CONFIG,
+						data->config);
+		break;
+	case hwmon_temp_max:
+		if (channel == 1) {
+			if (val <= 127875)
+				data->config |= R1DF_MASK;
+			else
+				data->config &= ~R1DF_MASK;
+		} else {
+			if (val <= 127875)
+				data->config |= R2DF_MASK;
+			else
+				data->config &= ~R2DF_MASK;
+		}
+		data->valid = 0;
+		ret = i2c_smbus_write_byte_data(client, LM95241_REG_RW_CONFIG,
+						data->config);
+		break;
+	case hwmon_temp_type:
+		if (val != 1 && val != 2) {
+			ret = -EINVAL;
+			break;
+		}
+		if (channel == 1) {
+			data->trutherm &= ~(TT_MASK << TT1_SHIFT);
+			if (val == 1) {
+				data->model |= R1MS_MASK;
+				data->trutherm |= (TT_ON << TT1_SHIFT);
+			} else {
+				data->model &= ~R1MS_MASK;
+				data->trutherm |= (TT_OFF << TT1_SHIFT);
+			}
+		} else {
+			data->trutherm &= ~(TT_MASK << TT2_SHIFT);
+			if (val == 1) {
+				data->model |= R2MS_MASK;
+				data->trutherm |= (TT_ON << TT2_SHIFT);
+			} else {
+				data->model &= ~R2MS_MASK;
+				data->trutherm |= (TT_OFF << TT2_SHIFT);
+			}
+		}
+		ret = i2c_smbus_write_byte_data(client,
+						LM95241_REG_RW_REMOTE_MODEL,
+						data->model);
+		if (ret < 0)
+			break;
+		ret = i2c_smbus_write_byte_data(client, LM95241_REG_RW_TRUTHERM,
+						data->trutherm);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		break;
 	}
-	data->valid = 0;
-
-	i2c_smbus_write_byte_data(client, LM95241_REG_RW_REMOTE_MODEL,
-				  data->model);
-	i2c_smbus_write_byte_data(client, LM95241_REG_RW_TRUTHERM,
-				  data->trutherm);
 
 	mutex_unlock(&data->update_lock);
 
-	return count;
+	return ret;
 }
 
-static ssize_t show_min(struct device *dev, struct device_attribute *attr,
-			char *buf)
+static int lm95241_write(struct device *dev, enum hwmon_sensor_types type,
+			 u32 attr, int channel, long val)
 {
-	struct lm95241_data *data = dev_get_drvdata(dev);
-
-	return snprintf(buf, PAGE_SIZE - 1,
-			data->config & to_sensor_dev_attr(attr)->index ?
-			"-127000\n" : "0\n");
+	switch (type) {
+	case hwmon_chip:
+		return lm95241_write_chip(dev, attr, channel, val);
+	case hwmon_temp:
+		return lm95241_write_temp(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t set_min(struct device *dev, struct device_attribute *attr,
-		       const char *buf, size_t count)
+static umode_t lm95241_is_visible(const void *data,
+				  enum hwmon_sensor_types type,
+				  u32 attr, int channel)
 {
-	struct lm95241_data *data = dev_get_drvdata(dev);
-	long val;
-
-	if (kstrtol(buf, 10, &val) < 0)
-		return -EINVAL;
-	if (val < -128000)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-
-	if (val < 0)
-		data->config |= to_sensor_dev_attr(attr)->index;
-	else
-		data->config &= ~to_sensor_dev_attr(attr)->index;
-	data->valid = 0;
-
-	i2c_smbus_write_byte_data(data->client, LM95241_REG_RW_CONFIG,
-				  data->config);
-
-	mutex_unlock(&data->update_lock);
-
-	return count;
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			return S_IRUGO | S_IWUSR;
+		}
+		break;
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_input:
+			return S_IRUGO;
+		case hwmon_temp_fault:
+			return S_IRUGO;
+		case hwmon_temp_min:
+		case hwmon_temp_max:
+		case hwmon_temp_type:
+			return S_IRUGO | S_IWUSR;
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
 }
 
-static ssize_t show_max(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	struct lm95241_data *data = dev_get_drvdata(dev);
-
-	return snprintf(buf, PAGE_SIZE - 1,
-			data->config & to_sensor_dev_attr(attr)->index ?
-			"127000\n" : "255000\n");
-}
-
-static ssize_t set_max(struct device *dev, struct device_attribute *attr,
-		       const char *buf, size_t count)
-{
-	struct lm95241_data *data = dev_get_drvdata(dev);
-	long val;
-
-	if (kstrtol(buf, 10, &val) < 0)
-		return -EINVAL;
-	if (val >= 256000)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-
-	if (val <= 127000)
-		data->config |= to_sensor_dev_attr(attr)->index;
-	else
-		data->config &= ~to_sensor_dev_attr(attr)->index;
-	data->valid = 0;
-
-	i2c_smbus_write_byte_data(data->client, LM95241_REG_RW_CONFIG,
-				  data->config);
-
-	mutex_unlock(&data->update_lock);
-
-	return count;
-}
-
-static ssize_t show_interval(struct device *dev, struct device_attribute *attr,
-			     char *buf)
-{
-	struct lm95241_data *data = lm95241_update_device(dev);
-
-	return snprintf(buf, PAGE_SIZE - 1, "%lu\n", 1000 * data->interval
-			/ HZ);
-}
-
-static ssize_t set_interval(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
-{
-	struct lm95241_data *data = dev_get_drvdata(dev);
-	unsigned long val;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	data->interval = val * HZ / 1000;
-
-	return count;
-}
-
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_input, NULL, 0);
-static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, show_input, NULL, 2);
-static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, show_input, NULL, 4);
-static SENSOR_DEVICE_ATTR(temp2_type, S_IWUSR | S_IRUGO, show_type, set_type,
-			  R1MS_MASK);
-static SENSOR_DEVICE_ATTR(temp3_type, S_IWUSR | S_IRUGO, show_type, set_type,
-			  R2MS_MASK);
-static SENSOR_DEVICE_ATTR(temp2_min, S_IWUSR | S_IRUGO, show_min, set_min,
-			  R1DF_MASK);
-static SENSOR_DEVICE_ATTR(temp3_min, S_IWUSR | S_IRUGO, show_min, set_min,
-			  R2DF_MASK);
-static SENSOR_DEVICE_ATTR(temp2_max, S_IWUSR | S_IRUGO, show_max, set_max,
-			  R1DF_MASK);
-static SENSOR_DEVICE_ATTR(temp3_max, S_IWUSR | S_IRUGO, show_max, set_max,
-			  R2DF_MASK);
-static DEVICE_ATTR(update_interval, S_IWUSR | S_IRUGO, show_interval,
-		   set_interval);
-
-static struct attribute *lm95241_attrs[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp2_input.dev_attr.attr,
-	&sensor_dev_attr_temp3_input.dev_attr.attr,
-	&sensor_dev_attr_temp2_type.dev_attr.attr,
-	&sensor_dev_attr_temp3_type.dev_attr.attr,
-	&sensor_dev_attr_temp2_min.dev_attr.attr,
-	&sensor_dev_attr_temp3_min.dev_attr.attr,
-	&sensor_dev_attr_temp2_max.dev_attr.attr,
-	&sensor_dev_attr_temp3_max.dev_attr.attr,
-	&dev_attr_update_interval.attr,
-	NULL
-};
-ATTRIBUTE_GROUPS(lm95241);
-
 /* Return 0 if detection is successful, -ENODEV otherwise */
 static int lm95241_detect(struct i2c_client *new_client,
 			  struct i2c_board_info *info)
@@ -362,8 +405,8 @@
 static void lm95241_init_client(struct i2c_client *client,
 				struct lm95241_data *data)
 {
-	data->interval = HZ;	/* 1 sec default */
-	data->config = CFG_CR0076;
+	data->interval = 1000;
+	data->config = CFG_CR1000;
 	data->trutherm = (TT_OFF << TT1_SHIFT) | (TT_OFF << TT2_SHIFT);
 
 	i2c_smbus_write_byte_data(client, LM95241_REG_RW_CONFIG, data->config);
@@ -375,6 +418,47 @@
 				  data->model);
 }
 
+static const u32 lm95241_chip_config[] = {
+	HWMON_C_UPDATE_INTERVAL,
+	0
+};
+
+static const struct hwmon_channel_info lm95241_chip = {
+	.type = hwmon_chip,
+	.config = lm95241_chip_config,
+};
+
+static const u32 lm95241_temp_config[] = {
+	HWMON_T_INPUT,
+	HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | HWMON_T_TYPE |
+		HWMON_T_FAULT,
+	HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | HWMON_T_TYPE |
+		HWMON_T_FAULT,
+	0
+};
+
+static const struct hwmon_channel_info lm95241_temp = {
+	.type = hwmon_temp,
+	.config = lm95241_temp_config,
+};
+
+static const struct hwmon_channel_info *lm95241_info[] = {
+	&lm95241_chip,
+	&lm95241_temp,
+	NULL
+};
+
+static const struct hwmon_ops lm95241_hwmon_ops = {
+	.is_visible = lm95241_is_visible,
+	.read = lm95241_read,
+	.write = lm95241_write,
+};
+
+static const struct hwmon_chip_info lm95241_chip_info = {
+	.ops = &lm95241_hwmon_ops,
+	.info = lm95241_info,
+};
+
 static int lm95241_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -392,9 +476,10 @@
 	/* Initialize the LM95241 chip */
 	lm95241_init_client(client, data);
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
 							   data,
-							   lm95241_groups);
+							   &lm95241_chip_info,
+							   NULL);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
@@ -420,5 +505,5 @@
 module_i2c_driver(lm95241_driver);
 
 MODULE_AUTHOR("Davide Rizzo <elpa.rizzo@gmail.com>");
-MODULE_DESCRIPTION("LM95241 sensor driver");
+MODULE_DESCRIPTION("LM95231/LM95241 sensor driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/hwmon/lm95245.c b/drivers/hwmon/lm95245.c
index e7aef45..a3bfd88 100644
--- a/drivers/hwmon/lm95245.c
+++ b/drivers/hwmon/lm95245.c
@@ -15,22 +15,16 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/i2c.h>
-#include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
+#include <linux/init.h>
+#include <linux/hwmon.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/sysfs.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
 
 static const unsigned short normal_i2c[] = {
 	0x18, 0x19, 0x29, 0x4c, 0x4d, I2C_CLIENT_END };
@@ -89,6 +83,7 @@
 #define RATE_CR1000	0x02
 #define RATE_CR2500	0x03
 
+#define STATUS1_ROS		0x10
 #define STATUS1_DIODE_FAULT	0x04
 #define STATUS1_RTCRIT		0x02
 #define STATUS1_LOC		0x01
@@ -112,14 +107,9 @@
 
 /* Client data (each client gets its own) */
 struct lm95245_data {
-	struct i2c_client *client;
+	struct regmap *regmap;
 	struct mutex update_lock;
-	unsigned long last_updated;	/* in jiffies */
-	unsigned long interval;	/* in msecs */
-	bool valid;		/* zero until following fields are valid */
-	/* registers values */
-	u8 regs[ARRAY_SIZE(lm95245_reg_address)];
-	u8 config1, config2;
+	int interval;	/* in msecs */
 };
 
 /* Conversions */
@@ -135,60 +125,36 @@
 	return temp_from_reg_unsigned(val_h, val_l);
 }
 
-static struct lm95245_data *lm95245_update_device(struct device *dev)
+static int lm95245_read_conversion_rate(struct lm95245_data *data)
 {
-	struct lm95245_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
+	unsigned int rate;
+	int ret;
 
-	mutex_lock(&data->update_lock);
-
-	if (time_after(jiffies, data->last_updated
-		+ msecs_to_jiffies(data->interval)) || !data->valid) {
-		int i;
-
-		for (i = 0; i < ARRAY_SIZE(lm95245_reg_address); i++)
-			data->regs[i]
-			  = i2c_smbus_read_byte_data(client,
-						     lm95245_reg_address[i]);
-		data->last_updated = jiffies;
-		data->valid = 1;
-	}
-
-	mutex_unlock(&data->update_lock);
-
-	return data;
-}
-
-static unsigned long lm95245_read_conversion_rate(struct i2c_client *client)
-{
-	int rate;
-	unsigned long interval;
-
-	rate = i2c_smbus_read_byte_data(client, LM95245_REG_RW_CONVERS_RATE);
+	ret = regmap_read(data->regmap, LM95245_REG_RW_CONVERS_RATE, &rate);
+	if (ret < 0)
+		return ret;
 
 	switch (rate) {
 	case RATE_CR0063:
-		interval = 63;
+		data->interval = 63;
 		break;
 	case RATE_CR0364:
-		interval = 364;
+		data->interval = 364;
 		break;
 	case RATE_CR1000:
-		interval = 1000;
+		data->interval = 1000;
 		break;
 	case RATE_CR2500:
 	default:
-		interval = 2500;
+		data->interval = 2500;
 		break;
 	}
-
-	return interval;
+	return 0;
 }
 
-static unsigned long lm95245_set_conversion_rate(struct i2c_client *client,
-			unsigned long interval)
+static int lm95245_set_conversion_rate(struct lm95245_data *data, long interval)
 {
-	int rate;
+	int ret, rate;
 
 	if (interval <= 63) {
 		interval = 63;
@@ -204,220 +170,288 @@
 		rate = RATE_CR2500;
 	}
 
-	i2c_smbus_write_byte_data(client, LM95245_REG_RW_CONVERS_RATE, rate);
+	ret = regmap_write(data->regmap, LM95245_REG_RW_CONVERS_RATE, rate);
+	if (ret < 0)
+		return ret;
 
-	return interval;
+	data->interval = interval;
+	return 0;
 }
 
-/* Sysfs stuff */
-static ssize_t show_input(struct device *dev, struct device_attribute *attr,
-			  char *buf)
-{
-	struct lm95245_data *data = lm95245_update_device(dev);
-	int temp;
-	int index = to_sensor_dev_attr(attr)->index;
-
-	/*
-	 * Index 0 (Local temp) is always signed
-	 * Index 2 (Remote temp) has both signed and unsigned data
-	 * use signed calculation for remote if signed bit is set
-	 */
-	if (index == 0 || data->regs[index] & 0x80)
-		temp = temp_from_reg_signed(data->regs[index],
-			    data->regs[index + 1]);
-	else
-		temp = temp_from_reg_unsigned(data->regs[index + 2],
-			    data->regs[index + 3]);
-
-	return snprintf(buf, PAGE_SIZE - 1, "%d\n", temp);
-}
-
-static ssize_t show_limit(struct device *dev, struct device_attribute *attr,
-			 char *buf)
-{
-	struct lm95245_data *data = lm95245_update_device(dev);
-	int index = to_sensor_dev_attr(attr)->index;
-
-	return snprintf(buf, PAGE_SIZE - 1, "%d\n",
-			data->regs[index] * 1000);
-}
-
-static ssize_t set_limit(struct device *dev, struct device_attribute *attr,
-			const char *buf, size_t count)
+static int lm95245_read_temp(struct device *dev, u32 attr, int channel,
+			     long *val)
 {
 	struct lm95245_data *data = dev_get_drvdata(dev);
-	int index = to_sensor_dev_attr(attr)->index;
-	struct i2c_client *client = data->client;
-	unsigned long val;
+	struct regmap *regmap = data->regmap;
+	int ret, regl, regh, regvall, regvalh;
 
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	val /= 1000;
-
-	val = clamp_val(val, 0, (index == 6 ? 127 : 255));
-
-	mutex_lock(&data->update_lock);
-
-	data->valid = 0;
-
-	i2c_smbus_write_byte_data(client, lm95245_reg_address[index], val);
-
-	mutex_unlock(&data->update_lock);
-
-	return count;
+	switch (attr) {
+	case hwmon_temp_input:
+		regl = channel ? LM95245_REG_R_REMOTE_TEMPL_S :
+				 LM95245_REG_R_LOCAL_TEMPL_S;
+		regh = channel ? LM95245_REG_R_REMOTE_TEMPH_S :
+				 LM95245_REG_R_LOCAL_TEMPH_S;
+		ret = regmap_read(regmap, regl, &regvall);
+		if (ret < 0)
+			return ret;
+		ret = regmap_read(regmap, regh, &regvalh);
+		if (ret < 0)
+			return ret;
+		/*
+		 * Local temp is always signed.
+		 * Remote temp has both signed and unsigned data.
+		 * Use signed calculation for remote if signed bit is set
+		 * or if reported temperature is below signed limit.
+		 */
+		if (!channel || (regvalh & 0x80) || regvalh < 0x7f) {
+			*val = temp_from_reg_signed(regvalh, regvall);
+			return 0;
+		}
+		ret = regmap_read(regmap, LM95245_REG_R_REMOTE_TEMPL_U,
+				  &regvall);
+		if (ret < 0)
+			return ret;
+		ret = regmap_read(regmap, LM95245_REG_R_REMOTE_TEMPH_U,
+				  &regvalh);
+		if (ret < 0)
+			return ret;
+		*val = temp_from_reg_unsigned(regvalh, regvall);
+		return 0;
+	case hwmon_temp_max:
+		ret = regmap_read(regmap, LM95245_REG_RW_REMOTE_OS_LIMIT,
+				  &regvalh);
+		if (ret < 0)
+			return ret;
+		*val = regvalh * 1000;
+		return 0;
+	case hwmon_temp_crit:
+		regh = channel ? LM95245_REG_RW_REMOTE_TCRIT_LIMIT :
+				 LM95245_REG_RW_LOCAL_OS_TCRIT_LIMIT;
+		ret = regmap_read(regmap, regh, &regvalh);
+		if (ret < 0)
+			return ret;
+		*val = regvalh * 1000;
+		return 0;
+	case hwmon_temp_max_hyst:
+		ret = regmap_read(regmap, LM95245_REG_RW_REMOTE_OS_LIMIT,
+				  &regvalh);
+		if (ret < 0)
+			return ret;
+		ret = regmap_read(regmap, LM95245_REG_RW_COMMON_HYSTERESIS,
+				  &regvall);
+		if (ret < 0)
+			return ret;
+		*val = (regvalh - regvall) * 1000;
+		return 0;
+	case hwmon_temp_crit_hyst:
+		regh = channel ? LM95245_REG_RW_REMOTE_TCRIT_LIMIT :
+				 LM95245_REG_RW_LOCAL_OS_TCRIT_LIMIT;
+		ret = regmap_read(regmap, regh, &regvalh);
+		if (ret < 0)
+			return ret;
+		ret = regmap_read(regmap, LM95245_REG_RW_COMMON_HYSTERESIS,
+				  &regvall);
+		if (ret < 0)
+			return ret;
+		*val = (regvalh - regvall) * 1000;
+		return 0;
+	case hwmon_temp_type:
+		ret = regmap_read(regmap, LM95245_REG_RW_CONFIG2, &regvalh);
+		if (ret < 0)
+			return ret;
+		*val = (regvalh & CFG2_REMOTE_TT) ? 1 : 2;
+		return 0;
+	case hwmon_temp_offset:
+		ret = regmap_read(regmap, LM95245_REG_RW_REMOTE_OFFL,
+				  &regvall);
+		if (ret < 0)
+			return ret;
+		ret = regmap_read(regmap, LM95245_REG_RW_REMOTE_OFFH,
+				  &regvalh);
+		if (ret < 0)
+			return ret;
+		*val = temp_from_reg_signed(regvalh, regvall);
+		return 0;
+	case hwmon_temp_max_alarm:
+		ret = regmap_read(regmap, LM95245_REG_R_STATUS1, &regvalh);
+		if (ret < 0)
+			return ret;
+		*val = !!(regvalh & STATUS1_ROS);
+		return 0;
+	case hwmon_temp_crit_alarm:
+		ret = regmap_read(regmap, LM95245_REG_R_STATUS1, &regvalh);
+		if (ret < 0)
+			return ret;
+		*val = !!(regvalh & (channel ? STATUS1_RTCRIT : STATUS1_LOC));
+		return 0;
+	case hwmon_temp_fault:
+		ret = regmap_read(regmap, LM95245_REG_R_STATUS1, &regvalh);
+		if (ret < 0)
+			return ret;
+		*val = !!(regvalh & STATUS1_DIODE_FAULT);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t show_crit_hyst(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	struct lm95245_data *data = lm95245_update_device(dev);
-	int index = to_sensor_dev_attr(attr)->index;
-	int hyst = data->regs[index] - data->regs[8];
-
-	return snprintf(buf, PAGE_SIZE - 1, "%d\n", hyst * 1000);
-}
-
-static ssize_t set_crit_hyst(struct device *dev, struct device_attribute *attr,
-			const char *buf, size_t count)
+static int lm95245_write_temp(struct device *dev, u32 attr, int channel,
+			      long val)
 {
 	struct lm95245_data *data = dev_get_drvdata(dev);
-	int index = to_sensor_dev_attr(attr)->index;
-	struct i2c_client *client = data->client;
-	unsigned long val;
-	int hyst, limit;
+	struct regmap *regmap = data->regmap;
+	unsigned int regval;
+	int ret, reg;
 
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-
-	limit = i2c_smbus_read_byte_data(client, lm95245_reg_address[index]);
-	hyst = limit - val / 1000;
-	hyst = clamp_val(hyst, 0, 31);
-	data->regs[8] = hyst;
-
-	/* shared crit hysteresis */
-	i2c_smbus_write_byte_data(client, LM95245_REG_RW_COMMON_HYSTERESIS,
-		hyst);
-
-	mutex_unlock(&data->update_lock);
-
-	return count;
+	switch (attr) {
+	case hwmon_temp_max:
+		val = clamp_val(val / 1000, 0, 255);
+		ret = regmap_write(regmap, LM95245_REG_RW_REMOTE_OS_LIMIT, val);
+		return ret;
+	case hwmon_temp_crit:
+		reg = channel ? LM95245_REG_RW_REMOTE_TCRIT_LIMIT :
+				LM95245_REG_RW_LOCAL_OS_TCRIT_LIMIT;
+		val = clamp_val(val / 1000, 0, channel ? 255 : 127);
+		ret = regmap_write(regmap, reg, val);
+		return ret;
+	case hwmon_temp_crit_hyst:
+		mutex_lock(&data->update_lock);
+		ret = regmap_read(regmap, LM95245_REG_RW_LOCAL_OS_TCRIT_LIMIT,
+				  &regval);
+		if (ret < 0) {
+			mutex_unlock(&data->update_lock);
+			return ret;
+		}
+		/* Clamp to reasonable range to prevent overflow */
+		val = clamp_val(val, -1000000, 1000000);
+		val = regval - val / 1000;
+		val = clamp_val(val, 0, 31);
+		ret = regmap_write(regmap, LM95245_REG_RW_COMMON_HYSTERESIS,
+				   val);
+		mutex_unlock(&data->update_lock);
+		return ret;
+	case hwmon_temp_offset:
+		val = clamp_val(val, -128000, 127875);
+		val = val * 256 / 1000;
+		mutex_lock(&data->update_lock);
+		ret = regmap_write(regmap, LM95245_REG_RW_REMOTE_OFFL,
+				   val & 0xe0);
+		if (ret < 0) {
+			mutex_unlock(&data->update_lock);
+			return ret;
+		}
+		ret = regmap_write(regmap, LM95245_REG_RW_REMOTE_OFFH,
+				   (val >> 8) & 0xff);
+		mutex_unlock(&data->update_lock);
+		return ret;
+	case hwmon_temp_type:
+		if (val != 1 && val != 2)
+			return -EINVAL;
+		ret = regmap_update_bits(regmap, LM95245_REG_RW_CONFIG2,
+					 CFG2_REMOTE_TT,
+					 val == 1 ? CFG2_REMOTE_TT : 0);
+		return ret;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t show_type(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+static int lm95245_read_chip(struct device *dev, u32 attr, int channel,
+			     long *val)
 {
 	struct lm95245_data *data = dev_get_drvdata(dev);
 
-	return snprintf(buf, PAGE_SIZE - 1,
-		data->config2 & CFG2_REMOTE_TT ? "1\n" : "2\n");
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		*val = data->interval;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t set_type(struct device *dev, struct device_attribute *attr,
-			const char *buf, size_t count)
+static int lm95245_write_chip(struct device *dev, u32 attr, int channel,
+			      long val)
 {
 	struct lm95245_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	unsigned long val;
+	int ret;
 
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-	if (val != 1 && val != 2)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-
-	if (val == 1)
-		data->config2 |= CFG2_REMOTE_TT;
-	else
-		data->config2 &= ~CFG2_REMOTE_TT;
-
-	data->valid = 0;
-
-	i2c_smbus_write_byte_data(client, LM95245_REG_RW_CONFIG2,
-				  data->config2);
-
-	mutex_unlock(&data->update_lock);
-
-	return count;
+	switch (attr) {
+	case hwmon_chip_update_interval:
+		mutex_lock(&data->update_lock);
+		ret = lm95245_set_conversion_rate(data, val);
+		mutex_unlock(&data->update_lock);
+		return ret;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t show_alarm(struct device *dev, struct device_attribute *attr,
-			 char *buf)
+static int lm95245_read(struct device *dev, enum hwmon_sensor_types type,
+			u32 attr, int channel, long *val)
 {
-	struct lm95245_data *data = lm95245_update_device(dev);
-	int index = to_sensor_dev_attr(attr)->index;
-
-	return snprintf(buf, PAGE_SIZE - 1, "%d\n",
-			!!(data->regs[9] & index));
+	switch (type) {
+	case hwmon_chip:
+		return lm95245_read_chip(dev, attr, channel, val);
+	case hwmon_temp:
+		return lm95245_read_temp(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t show_interval(struct device *dev, struct device_attribute *attr,
-			     char *buf)
+static int lm95245_write(struct device *dev, enum hwmon_sensor_types type,
+			 u32 attr, int channel, long val)
 {
-	struct lm95245_data *data = lm95245_update_device(dev);
-
-	return snprintf(buf, PAGE_SIZE - 1, "%lu\n", data->interval);
+	switch (type) {
+	case hwmon_chip:
+		return lm95245_write_chip(dev, attr, channel, val);
+	case hwmon_temp:
+		return lm95245_write_temp(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t set_interval(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t count)
+static umode_t lm95245_temp_is_visible(const void *data, u32 attr, int channel)
 {
-	struct lm95245_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	unsigned long val;
-
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-
-	data->interval = lm95245_set_conversion_rate(client, val);
-
-	mutex_unlock(&data->update_lock);
-
-	return count;
+	switch (attr) {
+	case hwmon_temp_input:
+	case hwmon_temp_max_alarm:
+	case hwmon_temp_max_hyst:
+	case hwmon_temp_crit_alarm:
+	case hwmon_temp_fault:
+		return S_IRUGO;
+	case hwmon_temp_type:
+	case hwmon_temp_max:
+	case hwmon_temp_crit:
+	case hwmon_temp_offset:
+		return S_IRUGO | S_IWUSR;
+	case hwmon_temp_crit_hyst:
+		return (channel == 0) ? S_IRUGO | S_IWUSR : S_IRUGO;
+	default:
+		return 0;
+	}
 }
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_input, NULL, 0);
-static SENSOR_DEVICE_ATTR(temp1_crit, S_IWUSR | S_IRUGO, show_limit,
-		set_limit, 6);
-static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IWUSR | S_IRUGO, show_crit_hyst,
-		set_crit_hyst, 6);
-static SENSOR_DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL,
-		STATUS1_LOC);
-
-static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, show_input, NULL, 2);
-static SENSOR_DEVICE_ATTR(temp2_crit, S_IWUSR | S_IRUGO, show_limit,
-		set_limit, 7);
-static SENSOR_DEVICE_ATTR(temp2_crit_hyst, S_IRUGO, show_crit_hyst, NULL, 7);
-static SENSOR_DEVICE_ATTR(temp2_crit_alarm, S_IRUGO, show_alarm, NULL,
-		STATUS1_RTCRIT);
-static SENSOR_DEVICE_ATTR(temp2_type, S_IWUSR | S_IRUGO, show_type,
-		set_type, 0);
-static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_alarm, NULL,
-		STATUS1_DIODE_FAULT);
-
-static DEVICE_ATTR(update_interval, S_IWUSR | S_IRUGO, show_interval,
-		set_interval);
-
-static struct attribute *lm95245_attrs[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp2_input.dev_attr.attr,
-	&sensor_dev_attr_temp2_crit.dev_attr.attr,
-	&sensor_dev_attr_temp2_crit_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp2_crit_alarm.dev_attr.attr,
-	&sensor_dev_attr_temp2_type.dev_attr.attr,
-	&sensor_dev_attr_temp2_fault.dev_attr.attr,
-	&dev_attr_update_interval.attr,
-	NULL
-};
-ATTRIBUTE_GROUPS(lm95245);
+static umode_t lm95245_is_visible(const void *data,
+				  enum hwmon_sensor_types type,
+				  u32 attr, int channel)
+{
+	switch (type) {
+	case hwmon_chip:
+		switch (attr) {
+		case hwmon_chip_update_interval:
+			return S_IRUGO | S_IWUSR;
+		default:
+			return 0;
+		}
+	case hwmon_temp:
+		return lm95245_temp_is_visible(data, attr, channel);
+	default:
+		return 0;
+	}
+}
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
 static int lm95245_detect(struct i2c_client *new_client,
@@ -453,44 +487,130 @@
 	return 0;
 }
 
-static void lm95245_init_client(struct i2c_client *client,
-				struct lm95245_data *data)
+static int lm95245_init_client(struct lm95245_data *data)
 {
-	data->interval = lm95245_read_conversion_rate(client);
+	int ret;
 
-	data->config1 = i2c_smbus_read_byte_data(client,
-		LM95245_REG_RW_CONFIG1);
-	data->config2 = i2c_smbus_read_byte_data(client,
-		LM95245_REG_RW_CONFIG2);
+	ret = lm95245_read_conversion_rate(data);
+	if (ret < 0)
+		return ret;
 
-	if (data->config1 & CFG_STOP) {
-		/* Clear the standby bit */
-		data->config1 &= ~CFG_STOP;
-		i2c_smbus_write_byte_data(client, LM95245_REG_RW_CONFIG1,
-			data->config1);
+	return regmap_update_bits(data->regmap, LM95245_REG_RW_CONFIG1,
+				  CFG_STOP, 0);
+}
+
+static bool lm95245_is_writeable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LM95245_REG_RW_CONFIG1:
+	case LM95245_REG_RW_CONVERS_RATE:
+	case LM95245_REG_W_ONE_SHOT:
+	case LM95245_REG_RW_CONFIG2:
+	case LM95245_REG_RW_REMOTE_OFFH:
+	case LM95245_REG_RW_REMOTE_OFFL:
+	case LM95245_REG_RW_REMOTE_OS_LIMIT:
+	case LM95245_REG_RW_LOCAL_OS_TCRIT_LIMIT:
+	case LM95245_REG_RW_REMOTE_TCRIT_LIMIT:
+	case LM95245_REG_RW_COMMON_HYSTERESIS:
+		return true;
+	default:
+		return false;
 	}
 }
 
+static bool lm95245_is_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LM95245_REG_R_STATUS1:
+	case LM95245_REG_R_STATUS2:
+	case LM95245_REG_R_LOCAL_TEMPH_S:
+	case LM95245_REG_R_LOCAL_TEMPL_S:
+	case LM95245_REG_R_REMOTE_TEMPH_S:
+	case LM95245_REG_R_REMOTE_TEMPL_S:
+	case LM95245_REG_R_REMOTE_TEMPH_U:
+	case LM95245_REG_R_REMOTE_TEMPL_U:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config lm95245_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.writeable_reg = lm95245_is_writeable_reg,
+	.volatile_reg = lm95245_is_volatile_reg,
+	.cache_type = REGCACHE_RBTREE,
+	.use_single_rw = true,
+};
+
+static const u32 lm95245_chip_config[] = {
+	HWMON_C_UPDATE_INTERVAL,
+	0
+};
+
+static const struct hwmon_channel_info lm95245_chip = {
+	.type = hwmon_chip,
+	.config = lm95245_chip_config,
+};
+
+static const u32 lm95245_temp_config[] = {
+	HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_CRIT_ALARM,
+	HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MAX_HYST | HWMON_T_CRIT |
+		HWMON_T_CRIT_HYST | HWMON_T_FAULT | HWMON_T_MAX_ALARM |
+		HWMON_T_CRIT_ALARM | HWMON_T_TYPE | HWMON_T_OFFSET,
+	0
+};
+
+static const struct hwmon_channel_info lm95245_temp = {
+	.type = hwmon_temp,
+	.config = lm95245_temp_config,
+};
+
+static const struct hwmon_channel_info *lm95245_info[] = {
+	&lm95245_chip,
+	&lm95245_temp,
+	NULL
+};
+
+static const struct hwmon_ops lm95245_hwmon_ops = {
+	.is_visible = lm95245_is_visible,
+	.read = lm95245_read,
+	.write = lm95245_write,
+};
+
+static const struct hwmon_chip_info lm95245_chip_info = {
+	.ops = &lm95245_hwmon_ops,
+	.info = lm95245_info,
+};
+
 static int lm95245_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
 	struct lm95245_data *data;
 	struct device *hwmon_dev;
+	int ret;
 
 	data = devm_kzalloc(dev, sizeof(struct lm95245_data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	data->client = client;
+	data->regmap = devm_regmap_init_i2c(client, &lm95245_regmap_config);
+	if (IS_ERR(data->regmap))
+		return PTR_ERR(data->regmap);
+
 	mutex_init(&data->update_lock);
 
 	/* Initialize the LM95245 chip */
-	lm95245_init_client(client, data);
+	ret = lm95245_init_client(data);
+	if (ret < 0)
+		return ret;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
-							   data,
-							   lm95245_groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 data,
+							 &lm95245_chip_info,
+							 NULL);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
diff --git a/drivers/hwmon/ltc4151.c b/drivers/hwmon/ltc4151.c
index c86a184..8445c9f 100644
--- a/drivers/hwmon/ltc4151.c
+++ b/drivers/hwmon/ltc4151.c
@@ -30,6 +30,7 @@
 
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/slab.h>
@@ -52,6 +53,7 @@
 	struct mutex update_lock;
 	bool valid;
 	unsigned long last_updated; /* in jiffies */
+	unsigned int shunt; /* in micro ohms */
 
 	/* Registers */
 	u8 regs[6];
@@ -111,9 +113,9 @@
 	case LTC4151_SENSE_H:
 		/*
 		 * 20uV resolution. Convert to current as measured with
-		 * an 1 mOhm sense resistor, in mA.
+		 * a given sense resistor, in mA.
 		 */
-		val = val * 20;
+		val = val * 20 * 1000 / data->shunt;
 		break;
 	case LTC4151_VIN_H:
 		/* 25 mV per increment */
@@ -176,6 +178,7 @@
 	struct device *dev = &client->dev;
 	struct ltc4151_data *data;
 	struct device *hwmon_dev;
+	u32 shunt;
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -ENODEV;
@@ -184,6 +187,15 @@
 	if (!data)
 		return -ENOMEM;
 
+	if (of_property_read_u32(client->dev.of_node,
+				 "shunt-resistor-micro-ohms", &shunt))
+		shunt = 1000; /* 1 mOhm if not set via DT */
+
+	if (shunt == 0)
+		return -EINVAL;
+
+	data->shunt = shunt;
+
 	data->client = client;
 	mutex_init(&data->update_lock);
 
@@ -199,10 +211,16 @@
 };
 MODULE_DEVICE_TABLE(i2c, ltc4151_id);
 
+static const struct of_device_id ltc4151_match[] = {
+	{ .compatible = "lltc,ltc4151" },
+	{},
+};
+
 /* This is the driver that will be inserted */
 static struct i2c_driver ltc4151_driver = {
 	.driver = {
 		.name	= "ltc4151",
+		.of_match_table = of_match_ptr(ltc4151_match),
 	},
 	.probe		= ltc4151_probe,
 	.id_table	= ltc4151_id,
diff --git a/drivers/hwmon/ltc4245.c b/drivers/hwmon/ltc4245.c
index 681b5b7..4680d89 100644
--- a/drivers/hwmon/ltc4245.c
+++ b/drivers/hwmon/ltc4245.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/bitops.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/i2c.h>
@@ -53,8 +54,6 @@
 struct ltc4245_data {
 	struct i2c_client *client;
 
-	const struct attribute_group *groups[3];
-
 	struct mutex update_lock;
 	bool valid;
 	unsigned long last_updated; /* in jiffies */
@@ -162,7 +161,7 @@
 		ltc4245_update_gpios(dev);
 
 		data->last_updated = jiffies;
-		data->valid = 1;
+		data->valid = true;
 	}
 
 	mutex_unlock(&data->update_lock);
@@ -256,214 +255,205 @@
 	return curr;
 }
 
-static ssize_t ltc4245_show_voltage(struct device *dev,
-				    struct device_attribute *da,
-				    char *buf)
+/* Map from voltage channel index to voltage register */
+
+static const s8 ltc4245_in_regs[] = {
+	LTC4245_12VIN, LTC4245_5VIN, LTC4245_3VIN, LTC4245_VEEIN,
+	LTC4245_12VOUT, LTC4245_5VOUT, LTC4245_3VOUT, LTC4245_VEEOUT,
+};
+
+/* Map from current channel index to current register */
+
+static const s8 ltc4245_curr_regs[] = {
+	LTC4245_12VSENSE, LTC4245_5VSENSE, LTC4245_3VSENSE, LTC4245_VEESENSE,
+};
+
+static int ltc4245_read_curr(struct device *dev, u32 attr, int channel,
+			     long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	const int voltage = ltc4245_get_voltage(dev, attr->index);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", voltage);
-}
-
-static ssize_t ltc4245_show_current(struct device *dev,
-				    struct device_attribute *da,
-				    char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	const unsigned int curr = ltc4245_get_current(dev, attr->index);
-
-	return snprintf(buf, PAGE_SIZE, "%u\n", curr);
-}
-
-static ssize_t ltc4245_show_power(struct device *dev,
-				  struct device_attribute *da,
-				  char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
-	const unsigned int curr = ltc4245_get_current(dev, attr->index);
-	const int output_voltage = ltc4245_get_voltage(dev, attr->index+1);
-
-	/* current in mA * voltage in mV == power in uW */
-	const unsigned int power = abs(output_voltage * curr);
-
-	return snprintf(buf, PAGE_SIZE, "%u\n", power);
-}
-
-static ssize_t ltc4245_show_alarm(struct device *dev,
-					  struct device_attribute *da,
-					  char *buf)
-{
-	struct sensor_device_attribute_2 *attr = to_sensor_dev_attr_2(da);
 	struct ltc4245_data *data = ltc4245_update_device(dev);
-	const u8 reg = data->cregs[attr->index];
-	const u32 mask = attr->nr;
 
-	return snprintf(buf, PAGE_SIZE, "%u\n", (reg & mask) ? 1 : 0);
+	switch (attr) {
+	case hwmon_curr_input:
+		*val = ltc4245_get_current(dev, ltc4245_curr_regs[channel]);
+		return 0;
+	case hwmon_curr_max_alarm:
+		*val = !!(data->cregs[LTC4245_FAULT1] & BIT(channel + 4));
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t ltc4245_show_gpio(struct device *dev,
-				 struct device_attribute *da,
-				 char *buf)
+static int ltc4245_read_in(struct device *dev, u32 attr, int channel, long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(da);
 	struct ltc4245_data *data = ltc4245_update_device(dev);
-	int val = data->gpios[attr->index];
 
-	/* handle stale GPIO's */
-	if (val < 0)
-		return val;
+	switch (attr) {
+	case hwmon_in_input:
+		if (channel < 8) {
+			*val = ltc4245_get_voltage(dev,
+						ltc4245_in_regs[channel]);
+		} else {
+			int regval = data->gpios[channel - 8];
 
-	/* Convert to millivolts and print */
-	return snprintf(buf, PAGE_SIZE, "%u\n", val * 10);
+			if (regval < 0)
+				return regval;
+			*val = regval * 10;
+		}
+		return 0;
+	case hwmon_in_min_alarm:
+		if (channel < 4)
+			*val = !!(data->cregs[LTC4245_FAULT1] & BIT(channel));
+		else
+			*val = !!(data->cregs[LTC4245_FAULT2] &
+				  BIT(channel - 4));
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-/* Construct a sensor_device_attribute structure for each register */
-
-/* Input voltages */
-static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, ltc4245_show_voltage, NULL,
-			  LTC4245_12VIN);
-static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, ltc4245_show_voltage, NULL,
-			  LTC4245_5VIN);
-static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, ltc4245_show_voltage, NULL,
-			  LTC4245_3VIN);
-static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, ltc4245_show_voltage, NULL,
-			  LTC4245_VEEIN);
-
-/* Input undervoltage alarms */
-static SENSOR_DEVICE_ATTR_2(in1_min_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 0, LTC4245_FAULT1);
-static SENSOR_DEVICE_ATTR_2(in2_min_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 1, LTC4245_FAULT1);
-static SENSOR_DEVICE_ATTR_2(in3_min_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 2, LTC4245_FAULT1);
-static SENSOR_DEVICE_ATTR_2(in4_min_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 3, LTC4245_FAULT1);
-
-/* Currents (via sense resistor) */
-static SENSOR_DEVICE_ATTR(curr1_input, S_IRUGO, ltc4245_show_current, NULL,
-			  LTC4245_12VSENSE);
-static SENSOR_DEVICE_ATTR(curr2_input, S_IRUGO, ltc4245_show_current, NULL,
-			  LTC4245_5VSENSE);
-static SENSOR_DEVICE_ATTR(curr3_input, S_IRUGO, ltc4245_show_current, NULL,
-			  LTC4245_3VSENSE);
-static SENSOR_DEVICE_ATTR(curr4_input, S_IRUGO, ltc4245_show_current, NULL,
-			  LTC4245_VEESENSE);
-
-/* Overcurrent alarms */
-static SENSOR_DEVICE_ATTR_2(curr1_max_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 4, LTC4245_FAULT1);
-static SENSOR_DEVICE_ATTR_2(curr2_max_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 5, LTC4245_FAULT1);
-static SENSOR_DEVICE_ATTR_2(curr3_max_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 6, LTC4245_FAULT1);
-static SENSOR_DEVICE_ATTR_2(curr4_max_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 7, LTC4245_FAULT1);
-
-/* Output voltages */
-static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, ltc4245_show_voltage, NULL,
-			  LTC4245_12VOUT);
-static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, ltc4245_show_voltage, NULL,
-			  LTC4245_5VOUT);
-static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, ltc4245_show_voltage, NULL,
-			  LTC4245_3VOUT);
-static SENSOR_DEVICE_ATTR(in8_input, S_IRUGO, ltc4245_show_voltage, NULL,
-			  LTC4245_VEEOUT);
-
-/* Power Bad alarms */
-static SENSOR_DEVICE_ATTR_2(in5_min_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 0, LTC4245_FAULT2);
-static SENSOR_DEVICE_ATTR_2(in6_min_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 1, LTC4245_FAULT2);
-static SENSOR_DEVICE_ATTR_2(in7_min_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 2, LTC4245_FAULT2);
-static SENSOR_DEVICE_ATTR_2(in8_min_alarm, S_IRUGO, ltc4245_show_alarm, NULL,
-			    1 << 3, LTC4245_FAULT2);
-
-/* GPIO voltages */
-static SENSOR_DEVICE_ATTR(in9_input, S_IRUGO, ltc4245_show_gpio, NULL, 0);
-static SENSOR_DEVICE_ATTR(in10_input, S_IRUGO, ltc4245_show_gpio, NULL, 1);
-static SENSOR_DEVICE_ATTR(in11_input, S_IRUGO, ltc4245_show_gpio, NULL, 2);
-
-/* Power Consumption (virtual) */
-static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO, ltc4245_show_power, NULL,
-			  LTC4245_12VSENSE);
-static SENSOR_DEVICE_ATTR(power2_input, S_IRUGO, ltc4245_show_power, NULL,
-			  LTC4245_5VSENSE);
-static SENSOR_DEVICE_ATTR(power3_input, S_IRUGO, ltc4245_show_power, NULL,
-			  LTC4245_3VSENSE);
-static SENSOR_DEVICE_ATTR(power4_input, S_IRUGO, ltc4245_show_power, NULL,
-			  LTC4245_VEESENSE);
-
-/*
- * Finally, construct an array of pointers to members of the above objects,
- * as required for sysfs_create_group()
- */
-static struct attribute *ltc4245_std_attributes[] = {
-	&sensor_dev_attr_in1_input.dev_attr.attr,
-	&sensor_dev_attr_in2_input.dev_attr.attr,
-	&sensor_dev_attr_in3_input.dev_attr.attr,
-	&sensor_dev_attr_in4_input.dev_attr.attr,
-
-	&sensor_dev_attr_in1_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_in2_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_in3_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_in4_min_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_curr1_input.dev_attr.attr,
-	&sensor_dev_attr_curr2_input.dev_attr.attr,
-	&sensor_dev_attr_curr3_input.dev_attr.attr,
-	&sensor_dev_attr_curr4_input.dev_attr.attr,
-
-	&sensor_dev_attr_curr1_max_alarm.dev_attr.attr,
-	&sensor_dev_attr_curr2_max_alarm.dev_attr.attr,
-	&sensor_dev_attr_curr3_max_alarm.dev_attr.attr,
-	&sensor_dev_attr_curr4_max_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in5_input.dev_attr.attr,
-	&sensor_dev_attr_in6_input.dev_attr.attr,
-	&sensor_dev_attr_in7_input.dev_attr.attr,
-	&sensor_dev_attr_in8_input.dev_attr.attr,
-
-	&sensor_dev_attr_in5_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_in6_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_in7_min_alarm.dev_attr.attr,
-	&sensor_dev_attr_in8_min_alarm.dev_attr.attr,
-
-	&sensor_dev_attr_in9_input.dev_attr.attr,
-
-	&sensor_dev_attr_power1_input.dev_attr.attr,
-	&sensor_dev_attr_power2_input.dev_attr.attr,
-	&sensor_dev_attr_power3_input.dev_attr.attr,
-	&sensor_dev_attr_power4_input.dev_attr.attr,
-
-	NULL,
-};
-
-static struct attribute *ltc4245_gpio_attributes[] = {
-	&sensor_dev_attr_in10_input.dev_attr.attr,
-	&sensor_dev_attr_in11_input.dev_attr.attr,
-	NULL,
-};
-
-static const struct attribute_group ltc4245_std_group = {
-	.attrs = ltc4245_std_attributes,
-};
-
-static const struct attribute_group ltc4245_gpio_group = {
-	.attrs = ltc4245_gpio_attributes,
-};
-
-static void ltc4245_sysfs_add_groups(struct ltc4245_data *data)
+static int ltc4245_read_power(struct device *dev, u32 attr, int channel,
+			      long *val)
 {
-	/* standard sysfs attributes */
-	data->groups[0] = &ltc4245_std_group;
+	unsigned long curr;
+	long voltage;
 
-	/* if we're using the extra gpio support, register it's attributes */
-	if (data->use_extra_gpios)
-		data->groups[1] = &ltc4245_gpio_group;
+	switch (attr) {
+	case hwmon_power_input:
+		(void)ltc4245_update_device(dev);
+		curr = ltc4245_get_current(dev, ltc4245_curr_regs[channel]);
+		voltage = ltc4245_get_voltage(dev, ltc4245_in_regs[channel]);
+		*val = abs(curr * voltage);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
+static int ltc4245_read(struct device *dev, enum hwmon_sensor_types type,
+			u32 attr, int channel, long *val)
+{
+
+	switch (type) {
+	case hwmon_curr:
+		return ltc4245_read_curr(dev, attr, channel, val);
+	case hwmon_power:
+		return ltc4245_read_power(dev, attr, channel, val);
+	case hwmon_in:
+		return ltc4245_read_in(dev, attr, channel - 1, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static umode_t ltc4245_is_visible(const void *_data,
+				  enum hwmon_sensor_types type,
+				  u32 attr, int channel)
+{
+	const struct ltc4245_data *data = _data;
+
+	switch (type) {
+	case hwmon_in:
+		if (channel == 0)
+			return 0;
+		switch (attr) {
+		case hwmon_in_input:
+			if (channel > 9 && !data->use_extra_gpios)
+				return 0;
+			return S_IRUGO;
+		case hwmon_in_min_alarm:
+			if (channel > 8)
+				return 0;
+			return S_IRUGO;
+		default:
+			return 0;
+		}
+	case hwmon_curr:
+		switch (attr) {
+		case hwmon_curr_input:
+		case hwmon_curr_max_alarm:
+			return S_IRUGO;
+		default:
+			return 0;
+		}
+	case hwmon_power:
+		switch (attr) {
+		case hwmon_power_input:
+			return S_IRUGO;
+		default:
+			return 0;
+		}
+	default:
+		return 0;
+	}
+}
+
+static const u32 ltc4245_in_config[] = {
+	HWMON_I_INPUT,			/* dummy, skipped in is_visible */
+	HWMON_I_INPUT | HWMON_I_MIN_ALARM,
+	HWMON_I_INPUT | HWMON_I_MIN_ALARM,
+	HWMON_I_INPUT | HWMON_I_MIN_ALARM,
+	HWMON_I_INPUT | HWMON_I_MIN_ALARM,
+	HWMON_I_INPUT | HWMON_I_MIN_ALARM,
+	HWMON_I_INPUT | HWMON_I_MIN_ALARM,
+	HWMON_I_INPUT | HWMON_I_MIN_ALARM,
+	HWMON_I_INPUT | HWMON_I_MIN_ALARM,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	0
+};
+
+static const struct hwmon_channel_info ltc4245_in = {
+	.type = hwmon_in,
+	.config = ltc4245_in_config,
+};
+
+static const u32 ltc4245_curr_config[] = {
+	HWMON_C_INPUT | HWMON_C_MAX_ALARM,
+	HWMON_C_INPUT | HWMON_C_MAX_ALARM,
+	HWMON_C_INPUT | HWMON_C_MAX_ALARM,
+	HWMON_C_INPUT | HWMON_C_MAX_ALARM,
+	0
+};
+
+static const struct hwmon_channel_info ltc4245_curr = {
+	.type = hwmon_curr,
+	.config = ltc4245_curr_config,
+};
+
+static const u32 ltc4245_power_config[] = {
+	HWMON_P_INPUT,
+	HWMON_P_INPUT,
+	HWMON_P_INPUT,
+	HWMON_P_INPUT,
+	0
+};
+
+static const struct hwmon_channel_info ltc4245_power = {
+	.type = hwmon_power,
+	.config = ltc4245_power_config,
+};
+
+static const struct hwmon_channel_info *ltc4245_info[] = {
+	&ltc4245_in,
+	&ltc4245_curr,
+	&ltc4245_power,
+	NULL
+};
+
+static const struct hwmon_ops ltc4245_hwmon_ops = {
+	.is_visible = ltc4245_is_visible,
+	.read = ltc4245_read,
+};
+
+static const struct hwmon_chip_info ltc4245_chip_info = {
+	.ops = &ltc4245_hwmon_ops,
+	.info = ltc4245_info,
+};
+
 static bool ltc4245_use_extra_gpios(struct i2c_client *client)
 {
 	struct ltc4245_platform_data *pdata = dev_get_platdata(&client->dev);
@@ -502,12 +492,10 @@
 	i2c_smbus_write_byte_data(client, LTC4245_FAULT1, 0x00);
 	i2c_smbus_write_byte_data(client, LTC4245_FAULT2, 0x00);
 
-	/* Add sysfs hooks */
-	ltc4245_sysfs_add_groups(data);
-
-	hwmon_dev = devm_hwmon_device_register_with_groups(&client->dev,
-							   client->name, data,
-							   data->groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(&client->dev,
+							 client->name, data,
+							 &ltc4245_chip_info,
+							 NULL);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c
index 69c0ac8..bef84e0 100644
--- a/drivers/hwmon/max31790.c
+++ b/drivers/hwmon/max31790.c
@@ -17,7 +17,6 @@
 
 #include <linux/err.h>
 #include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
@@ -169,362 +168,288 @@
 	return bits;
 }
 
-static ssize_t get_fan(struct device *dev,
-		       struct device_attribute *devattr, char *buf)
+static int max31790_read_fan(struct device *dev, u32 attr, int channel,
+			     long *val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 	struct max31790_data *data = max31790_update_device(dev);
 	int sr, rpm;
 
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	sr = get_tach_period(data->fan_dynamics[attr->index]);
-	rpm = RPM_FROM_REG(data->tach[attr->index], sr);
-
-	return sprintf(buf, "%d\n", rpm);
+	switch (attr) {
+	case hwmon_fan_input:
+		sr = get_tach_period(data->fan_dynamics[channel]);
+		rpm = RPM_FROM_REG(data->tach[channel], sr);
+		*val = rpm;
+		return 0;
+	case hwmon_fan_target:
+		sr = get_tach_period(data->fan_dynamics[channel]);
+		rpm = RPM_FROM_REG(data->target_count[channel], sr);
+		*val = rpm;
+		return 0;
+	case hwmon_fan_fault:
+		*val = !!(data->fault_status & (1 << channel));
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t get_fan_target(struct device *dev,
-			      struct device_attribute *devattr, char *buf)
+static int max31790_write_fan(struct device *dev, u32 attr, int channel,
+			      long val)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct max31790_data *data = max31790_update_device(dev);
-	int sr, rpm;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	sr = get_tach_period(data->fan_dynamics[attr->index]);
-	rpm = RPM_FROM_REG(data->target_count[attr->index], sr);
-
-	return sprintf(buf, "%d\n", rpm);
-}
-
-static ssize_t set_fan_target(struct device *dev,
-			      struct device_attribute *devattr,
-			      const char *buf, size_t count)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
 	struct max31790_data *data = dev_get_drvdata(dev);
 	struct i2c_client *client = data->client;
+	int target_count;
+	int err = 0;
 	u8 bits;
 	int sr;
-	int target_count;
-	unsigned long rpm;
-	int err;
-
-	err = kstrtoul(buf, 10, &rpm);
-	if (err)
-		return err;
 
 	mutex_lock(&data->update_lock);
 
-	rpm = clamp_val(rpm, FAN_RPM_MIN, FAN_RPM_MAX);
-	bits = bits_for_tach_period(rpm);
-	data->fan_dynamics[attr->index] =
-			((data->fan_dynamics[attr->index]
-			  & ~MAX31790_FAN_DYN_SR_MASK)
-			 | (bits << MAX31790_FAN_DYN_SR_SHIFT));
-	err = i2c_smbus_write_byte_data(client,
-			MAX31790_REG_FAN_DYNAMICS(attr->index),
-			data->fan_dynamics[attr->index]);
+	switch (attr) {
+	case hwmon_fan_target:
+		val = clamp_val(val, FAN_RPM_MIN, FAN_RPM_MAX);
+		bits = bits_for_tach_period(val);
+		data->fan_dynamics[channel] =
+			((data->fan_dynamics[channel] &
+			  ~MAX31790_FAN_DYN_SR_MASK) |
+			 (bits << MAX31790_FAN_DYN_SR_SHIFT));
+		err = i2c_smbus_write_byte_data(client,
+					MAX31790_REG_FAN_DYNAMICS(channel),
+					data->fan_dynamics[channel]);
+		if (err < 0)
+			break;
 
-	if (err < 0) {
-		mutex_unlock(&data->update_lock);
-		return err;
-	}
+		sr = get_tach_period(data->fan_dynamics[channel]);
+		target_count = RPM_TO_REG(val, sr);
+		target_count = clamp_val(target_count, 0x1, 0x7FF);
 
-	sr = get_tach_period(data->fan_dynamics[attr->index]);
-	target_count = RPM_TO_REG(rpm, sr);
-	target_count = clamp_val(target_count, 0x1, 0x7FF);
+		data->target_count[channel] = target_count << 5;
 
-	data->target_count[attr->index] = target_count << 5;
-
-	err = i2c_smbus_write_word_swapped(client,
-			MAX31790_REG_TARGET_COUNT(attr->index),
-			data->target_count[attr->index]);
-
-	mutex_unlock(&data->update_lock);
-
-	if (err < 0)
-		return err;
-
-	return count;
-}
-
-static ssize_t get_pwm(struct device *dev,
-		       struct device_attribute *devattr, char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct max31790_data *data = max31790_update_device(dev);
-	int pwm;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	pwm = data->pwm[attr->index] >> 8;
-
-	return sprintf(buf, "%d\n", pwm);
-}
-
-static ssize_t set_pwm(struct device *dev,
-		       struct device_attribute *devattr,
-		       const char *buf, size_t count)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct max31790_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	unsigned long pwm;
-	int err;
-
-	err = kstrtoul(buf, 10, &pwm);
-	if (err)
-		return err;
-
-	if (pwm > 255)
-		return -EINVAL;
-
-	mutex_lock(&data->update_lock);
-
-	data->pwm[attr->index] = pwm << 8;
-	err = i2c_smbus_write_word_swapped(client,
-			MAX31790_REG_PWMOUT(attr->index),
-			data->pwm[attr->index]);
-
-	mutex_unlock(&data->update_lock);
-
-	if (err < 0)
-		return err;
-
-	return count;
-}
-
-static ssize_t get_pwm_enable(struct device *dev,
-			      struct device_attribute *devattr, char *buf)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct max31790_data *data = max31790_update_device(dev);
-	int mode;
-
-	if (IS_ERR(data))
-		return PTR_ERR(data);
-
-	if (data->fan_config[attr->index] & MAX31790_FAN_CFG_RPM_MODE)
-		mode = 2;
-	else if (data->fan_config[attr->index] & MAX31790_FAN_CFG_TACH_INPUT_EN)
-		mode = 1;
-	else
-		mode = 0;
-
-	return sprintf(buf, "%d\n", mode);
-}
-
-static ssize_t set_pwm_enable(struct device *dev,
-			      struct device_attribute *devattr,
-			      const char *buf, size_t count)
-{
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
-	struct max31790_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	unsigned long mode;
-	int err;
-
-	err = kstrtoul(buf, 10, &mode);
-	if (err)
-		return err;
-
-	switch (mode) {
-	case 0:
-		data->fan_config[attr->index] =
-			data->fan_config[attr->index]
-			& ~(MAX31790_FAN_CFG_TACH_INPUT_EN
-			    | MAX31790_FAN_CFG_RPM_MODE);
-		break;
-	case 1:
-		data->fan_config[attr->index] =
-			(data->fan_config[attr->index]
-			 | MAX31790_FAN_CFG_TACH_INPUT_EN)
-			& ~MAX31790_FAN_CFG_RPM_MODE;
-		break;
-	case 2:
-		data->fan_config[attr->index] =
-			data->fan_config[attr->index]
-			| MAX31790_FAN_CFG_TACH_INPUT_EN
-			| MAX31790_FAN_CFG_RPM_MODE;
+		err = i2c_smbus_write_word_swapped(client,
+					MAX31790_REG_TARGET_COUNT(channel),
+					data->target_count[channel]);
 		break;
 	default:
-		return -EINVAL;
+		err = -EOPNOTSUPP;
+		break;
 	}
 
-	mutex_lock(&data->update_lock);
-
-	err = i2c_smbus_write_byte_data(client,
-			MAX31790_REG_FAN_CONFIG(attr->index),
-			data->fan_config[attr->index]);
-
 	mutex_unlock(&data->update_lock);
 
-	if (err < 0)
-		return err;
-
-	return count;
+	return err;
 }
 
-static ssize_t get_fan_fault(struct device *dev,
-			     struct device_attribute *devattr, char *buf)
+static umode_t max31790_fan_is_visible(const void *_data, u32 attr, int channel)
 {
-	struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+	const struct max31790_data *data = _data;
+	u8 fan_config = data->fan_config[channel % NR_CHANNEL];
+
+	switch (attr) {
+	case hwmon_fan_input:
+	case hwmon_fan_fault:
+		if (channel < NR_CHANNEL ||
+		    (fan_config & MAX31790_FAN_CFG_TACH_INPUT))
+			return S_IRUGO;
+		return 0;
+	case hwmon_fan_target:
+		if (channel < NR_CHANNEL &&
+		    !(fan_config & MAX31790_FAN_CFG_TACH_INPUT))
+			return S_IRUGO | S_IWUSR;
+		return 0;
+	default:
+		return 0;
+	}
+}
+
+static int max31790_read_pwm(struct device *dev, u32 attr, int channel,
+			     long *val)
+{
 	struct max31790_data *data = max31790_update_device(dev);
-	int fault;
+	u8 fan_config = data->fan_config[channel];
 
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
-	fault = !!(data->fault_status & (1 << attr->index));
-
-	return sprintf(buf, "%d\n", fault);
+	switch (attr) {
+	case hwmon_pwm_input:
+		*val = data->pwm[channel] >> 8;
+		return 0;
+	case hwmon_pwm_enable:
+		if (fan_config & MAX31790_FAN_CFG_RPM_MODE)
+			*val = 2;
+		else if (fan_config & MAX31790_FAN_CFG_TACH_INPUT_EN)
+			*val = 1;
+		else
+			*val = 0;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, get_fan, NULL, 0);
-static SENSOR_DEVICE_ATTR(fan2_input, S_IRUGO, get_fan, NULL, 1);
-static SENSOR_DEVICE_ATTR(fan3_input, S_IRUGO, get_fan, NULL, 2);
-static SENSOR_DEVICE_ATTR(fan4_input, S_IRUGO, get_fan, NULL, 3);
-static SENSOR_DEVICE_ATTR(fan5_input, S_IRUGO, get_fan, NULL, 4);
-static SENSOR_DEVICE_ATTR(fan6_input, S_IRUGO, get_fan, NULL, 5);
+static int max31790_write_pwm(struct device *dev, u32 attr, int channel,
+			      long val)
+{
+	struct max31790_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	u8 fan_config;
+	int err = 0;
 
-static SENSOR_DEVICE_ATTR(fan1_fault, S_IRUGO, get_fan_fault, NULL, 0);
-static SENSOR_DEVICE_ATTR(fan2_fault, S_IRUGO, get_fan_fault, NULL, 1);
-static SENSOR_DEVICE_ATTR(fan3_fault, S_IRUGO, get_fan_fault, NULL, 2);
-static SENSOR_DEVICE_ATTR(fan4_fault, S_IRUGO, get_fan_fault, NULL, 3);
-static SENSOR_DEVICE_ATTR(fan5_fault, S_IRUGO, get_fan_fault, NULL, 4);
-static SENSOR_DEVICE_ATTR(fan6_fault, S_IRUGO, get_fan_fault, NULL, 5);
+	mutex_lock(&data->update_lock);
 
-static SENSOR_DEVICE_ATTR(fan7_input, S_IRUGO, get_fan, NULL, 6);
-static SENSOR_DEVICE_ATTR(fan8_input, S_IRUGO, get_fan, NULL, 7);
-static SENSOR_DEVICE_ATTR(fan9_input, S_IRUGO, get_fan, NULL, 8);
-static SENSOR_DEVICE_ATTR(fan10_input, S_IRUGO, get_fan, NULL, 9);
-static SENSOR_DEVICE_ATTR(fan11_input, S_IRUGO, get_fan, NULL, 10);
-static SENSOR_DEVICE_ATTR(fan12_input, S_IRUGO, get_fan, NULL, 11);
+	switch (attr) {
+	case hwmon_pwm_input:
+		if (val < 0 || val > 255) {
+			err = -EINVAL;
+			break;
+		}
+		data->pwm[channel] = val << 8;
+		err = i2c_smbus_write_word_swapped(client,
+						   MAX31790_REG_PWMOUT(channel),
+						   val);
+		break;
+	case hwmon_pwm_enable:
+		fan_config = data->fan_config[channel];
+		if (val == 0) {
+			fan_config &= ~(MAX31790_FAN_CFG_TACH_INPUT_EN |
+					MAX31790_FAN_CFG_RPM_MODE);
+		} else if (val == 1) {
+			fan_config = (fan_config |
+				      MAX31790_FAN_CFG_TACH_INPUT_EN) &
+				     ~MAX31790_FAN_CFG_RPM_MODE;
+		} else if (val == 2) {
+			fan_config |= MAX31790_FAN_CFG_TACH_INPUT_EN |
+				      MAX31790_FAN_CFG_RPM_MODE;
+		} else {
+			err = -EINVAL;
+			break;
+		}
+		data->fan_config[channel] = fan_config;
+		err = i2c_smbus_write_byte_data(client,
+					MAX31790_REG_FAN_CONFIG(channel),
+					fan_config);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
 
-static SENSOR_DEVICE_ATTR(fan7_fault, S_IRUGO, get_fan_fault, NULL, 6);
-static SENSOR_DEVICE_ATTR(fan8_fault, S_IRUGO, get_fan_fault, NULL, 7);
-static SENSOR_DEVICE_ATTR(fan9_fault, S_IRUGO, get_fan_fault, NULL, 8);
-static SENSOR_DEVICE_ATTR(fan10_fault, S_IRUGO, get_fan_fault, NULL, 9);
-static SENSOR_DEVICE_ATTR(fan11_fault, S_IRUGO, get_fan_fault, NULL, 10);
-static SENSOR_DEVICE_ATTR(fan12_fault, S_IRUGO, get_fan_fault, NULL, 11);
+	mutex_unlock(&data->update_lock);
 
-static SENSOR_DEVICE_ATTR(fan1_target, S_IWUSR | S_IRUGO,
-		get_fan_target, set_fan_target, 0);
-static SENSOR_DEVICE_ATTR(fan2_target, S_IWUSR | S_IRUGO,
-		get_fan_target, set_fan_target, 1);
-static SENSOR_DEVICE_ATTR(fan3_target, S_IWUSR | S_IRUGO,
-		get_fan_target, set_fan_target, 2);
-static SENSOR_DEVICE_ATTR(fan4_target, S_IWUSR | S_IRUGO,
-		get_fan_target, set_fan_target, 3);
-static SENSOR_DEVICE_ATTR(fan5_target, S_IWUSR | S_IRUGO,
-		get_fan_target, set_fan_target, 4);
-static SENSOR_DEVICE_ATTR(fan6_target, S_IWUSR | S_IRUGO,
-		get_fan_target, set_fan_target, 5);
+	return err;
+}
 
-static SENSOR_DEVICE_ATTR(pwm1, S_IWUSR | S_IRUGO, get_pwm, set_pwm, 0);
-static SENSOR_DEVICE_ATTR(pwm2, S_IWUSR | S_IRUGO, get_pwm, set_pwm, 1);
-static SENSOR_DEVICE_ATTR(pwm3, S_IWUSR | S_IRUGO, get_pwm, set_pwm, 2);
-static SENSOR_DEVICE_ATTR(pwm4, S_IWUSR | S_IRUGO, get_pwm, set_pwm, 3);
-static SENSOR_DEVICE_ATTR(pwm5, S_IWUSR | S_IRUGO, get_pwm, set_pwm, 4);
-static SENSOR_DEVICE_ATTR(pwm6, S_IWUSR | S_IRUGO, get_pwm, set_pwm, 5);
+static umode_t max31790_pwm_is_visible(const void *_data, u32 attr, int channel)
+{
+	const struct max31790_data *data = _data;
+	u8 fan_config = data->fan_config[channel];
 
-static SENSOR_DEVICE_ATTR(pwm1_enable, S_IWUSR | S_IRUGO,
-		get_pwm_enable, set_pwm_enable, 0);
-static SENSOR_DEVICE_ATTR(pwm2_enable, S_IWUSR | S_IRUGO,
-		get_pwm_enable, set_pwm_enable, 1);
-static SENSOR_DEVICE_ATTR(pwm3_enable, S_IWUSR | S_IRUGO,
-		get_pwm_enable, set_pwm_enable, 2);
-static SENSOR_DEVICE_ATTR(pwm4_enable, S_IWUSR | S_IRUGO,
-		get_pwm_enable, set_pwm_enable, 3);
-static SENSOR_DEVICE_ATTR(pwm5_enable, S_IWUSR | S_IRUGO,
-		get_pwm_enable, set_pwm_enable, 4);
-static SENSOR_DEVICE_ATTR(pwm6_enable, S_IWUSR | S_IRUGO,
-		get_pwm_enable, set_pwm_enable, 5);
+	switch (attr) {
+	case hwmon_pwm_input:
+	case hwmon_pwm_enable:
+		if (!(fan_config & MAX31790_FAN_CFG_TACH_INPUT))
+			return S_IRUGO | S_IWUSR;
+		return 0;
+	default:
+		return 0;
+	}
+}
 
-static struct attribute *max31790_attrs[] = {
-	&sensor_dev_attr_fan1_input.dev_attr.attr,
-	&sensor_dev_attr_fan2_input.dev_attr.attr,
-	&sensor_dev_attr_fan3_input.dev_attr.attr,
-	&sensor_dev_attr_fan4_input.dev_attr.attr,
-	&sensor_dev_attr_fan5_input.dev_attr.attr,
-	&sensor_dev_attr_fan6_input.dev_attr.attr,
+static int max31790_read(struct device *dev, enum hwmon_sensor_types type,
+			 u32 attr, int channel, long *val)
+{
+	switch (type) {
+	case hwmon_fan:
+		return max31790_read_fan(dev, attr, channel, val);
+	case hwmon_pwm:
+		return max31790_read_pwm(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
 
-	&sensor_dev_attr_fan1_fault.dev_attr.attr,
-	&sensor_dev_attr_fan2_fault.dev_attr.attr,
-	&sensor_dev_attr_fan3_fault.dev_attr.attr,
-	&sensor_dev_attr_fan4_fault.dev_attr.attr,
-	&sensor_dev_attr_fan5_fault.dev_attr.attr,
-	&sensor_dev_attr_fan6_fault.dev_attr.attr,
+static int max31790_write(struct device *dev, enum hwmon_sensor_types type,
+			  u32 attr, int channel, long val)
+{
+	switch (type) {
+	case hwmon_fan:
+		return max31790_write_fan(dev, attr, channel, val);
+	case hwmon_pwm:
+		return max31790_write_pwm(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
 
-	&sensor_dev_attr_fan7_input.dev_attr.attr,
-	&sensor_dev_attr_fan8_input.dev_attr.attr,
-	&sensor_dev_attr_fan9_input.dev_attr.attr,
-	&sensor_dev_attr_fan10_input.dev_attr.attr,
-	&sensor_dev_attr_fan11_input.dev_attr.attr,
-	&sensor_dev_attr_fan12_input.dev_attr.attr,
+static umode_t max31790_is_visible(const void *data,
+				   enum hwmon_sensor_types type,
+				   u32 attr, int channel)
+{
+	switch (type) {
+	case hwmon_fan:
+		return max31790_fan_is_visible(data, attr, channel);
+	case hwmon_pwm:
+		return max31790_pwm_is_visible(data, attr, channel);
+	default:
+		return 0;
+	}
+}
 
-	&sensor_dev_attr_fan7_fault.dev_attr.attr,
-	&sensor_dev_attr_fan8_fault.dev_attr.attr,
-	&sensor_dev_attr_fan9_fault.dev_attr.attr,
-	&sensor_dev_attr_fan10_fault.dev_attr.attr,
-	&sensor_dev_attr_fan11_fault.dev_attr.attr,
-	&sensor_dev_attr_fan12_fault.dev_attr.attr,
+static const u32 max31790_fan_config[] = {
+	HWMON_F_INPUT | HWMON_F_TARGET | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_TARGET | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_TARGET | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_TARGET | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_TARGET | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_TARGET | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_FAULT,
+	HWMON_F_INPUT | HWMON_F_FAULT,
+	0
+};
 
-	&sensor_dev_attr_fan1_target.dev_attr.attr,
-	&sensor_dev_attr_fan2_target.dev_attr.attr,
-	&sensor_dev_attr_fan3_target.dev_attr.attr,
-	&sensor_dev_attr_fan4_target.dev_attr.attr,
-	&sensor_dev_attr_fan5_target.dev_attr.attr,
-	&sensor_dev_attr_fan6_target.dev_attr.attr,
+static const struct hwmon_channel_info max31790_fan = {
+	.type = hwmon_fan,
+	.config = max31790_fan_config,
+};
 
-	&sensor_dev_attr_pwm1.dev_attr.attr,
-	&sensor_dev_attr_pwm2.dev_attr.attr,
-	&sensor_dev_attr_pwm3.dev_attr.attr,
-	&sensor_dev_attr_pwm4.dev_attr.attr,
-	&sensor_dev_attr_pwm5.dev_attr.attr,
-	&sensor_dev_attr_pwm6.dev_attr.attr,
+static const u32 max31790_pwm_config[] = {
+	HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+	HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+	HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+	HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+	HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+	HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+	0
+};
 
-	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
-	&sensor_dev_attr_pwm2_enable.dev_attr.attr,
-	&sensor_dev_attr_pwm3_enable.dev_attr.attr,
-	&sensor_dev_attr_pwm4_enable.dev_attr.attr,
-	&sensor_dev_attr_pwm5_enable.dev_attr.attr,
-	&sensor_dev_attr_pwm6_enable.dev_attr.attr,
+static const struct hwmon_channel_info max31790_pwm = {
+	.type = hwmon_pwm,
+	.config = max31790_pwm_config,
+};
+
+static const struct hwmon_channel_info *max31790_info[] = {
+	&max31790_fan,
+	&max31790_pwm,
 	NULL
 };
 
-static umode_t max31790_attrs_visible(struct kobject *kobj,
-				     struct attribute *a, int n)
-{
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct max31790_data *data = dev_get_drvdata(dev);
-	struct device_attribute *devattr =
-			container_of(a, struct device_attribute, attr);
-	int index = to_sensor_dev_attr(devattr)->index % NR_CHANNEL;
-	u8 fan_config;
-
-	fan_config = data->fan_config[index];
-
-	if (n >= NR_CHANNEL * 2 && n < NR_CHANNEL * 4 &&
-	    !(fan_config & MAX31790_FAN_CFG_TACH_INPUT))
-		return 0;
-	if (n >= NR_CHANNEL * 4 && (fan_config & MAX31790_FAN_CFG_TACH_INPUT))
-		return 0;
-
-	return a->mode;
-}
-
-static const struct attribute_group max31790_group = {
-	.attrs = max31790_attrs,
-	.is_visible = max31790_attrs_visible,
+static const struct hwmon_ops max31790_hwmon_ops = {
+	.is_visible = max31790_is_visible,
+	.read = max31790_read,
+	.write = max31790_write,
 };
-__ATTRIBUTE_GROUPS(max31790);
+
+static const struct hwmon_chip_info max31790_chip_info = {
+	.ops = &max31790_hwmon_ops,
+	.info = max31790_info,
+};
 
 static int max31790_init_client(struct i2c_client *client,
 				struct max31790_data *data)
@@ -575,8 +500,10 @@
 	if (err)
 		return err;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
-			client->name, data, max31790_groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 data,
+							 &max31790_chip_info,
+							 NULL);
 
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
diff --git a/drivers/hwmon/max6650.c b/drivers/hwmon/max6650.c
index 162a520..a993b44 100644
--- a/drivers/hwmon/max6650.c
+++ b/drivers/hwmon/max6650.c
@@ -39,6 +39,7 @@
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
 #include <linux/err.h>
+#include <linux/of_device.h>
 
 /*
  * Insmod parameters
@@ -48,7 +49,7 @@
 static int fan_voltage;
 /* prescaler: Possible values are 1, 2, 4, 8, 16 or 0 for don't change */
 static int prescaler;
-/* clock: The clock frequency of the chip the driver should assume */
+/* clock: The clock frequency of the chip (max6651 can be clocked externally) */
 static int clock = 254000;
 
 module_param(fan_voltage, int, S_IRUGO);
@@ -133,6 +134,19 @@
 	MAX6650_REG_TACH3,
 };
 
+static const struct of_device_id max6650_dt_match[] = {
+	{
+		.compatible = "maxim,max6650",
+		.data = (void *)1
+	},
+	{
+		.compatible = "maxim,max6651",
+		.data = (void *)4
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, max6650_dt_match);
+
 static struct max6650_data *max6650_update_device(struct device *dev)
 {
 	struct max6650_data *data = dev_get_drvdata(dev);
@@ -171,6 +185,30 @@
 	return data;
 }
 
+/*
+ * Change the operating mode of the chip (if needed).
+ * mode is one of the MAX6650_CFG_MODE_* values.
+ */
+static int max6650_set_operating_mode(struct max6650_data *data, u8 mode)
+{
+	int result;
+	u8 config = data->config;
+
+	if (mode == (config & MAX6650_CFG_MODE_MASK))
+		return 0;
+
+	config = (config & ~MAX6650_CFG_MODE_MASK) | mode;
+
+	result = i2c_smbus_write_byte_data(data->client, MAX6650_REG_CONFIG,
+					   config);
+	if (result < 0)
+		return result;
+
+	data->config = config;
+
+	return 0;
+}
+
 static ssize_t get_fan(struct device *dev, struct device_attribute *devattr,
 		       char *buf)
 {
@@ -252,18 +290,12 @@
 	return sprintf(buf, "%d\n", rpm);
 }
 
-static ssize_t set_target(struct device *dev, struct device_attribute *devattr,
-			 const char *buf, size_t count)
+static int max6650_set_target(struct max6650_data *data, unsigned long rpm)
 {
-	struct max6650_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
 	int kscale, ktach;
-	unsigned long rpm;
-	int err;
 
-	err = kstrtoul(buf, 10, &rpm);
-	if (err)
-		return err;
+	if (rpm == 0)
+		return max6650_set_operating_mode(data, MAX6650_CFG_MODE_OFF);
 
 	rpm = clamp_val(rpm, FAN_RPM_MIN, FAN_RPM_MAX);
 
@@ -274,8 +306,6 @@
 	 *     KTACH = [(fCLK x KSCALE) / (256 x FanSpeed)] - 1
 	 */
 
-	mutex_lock(&data->update_lock);
-
 	kscale = DIV_FROM_REG(data->config);
 	ktach = ((clock * kscale) / (256 * rpm / 60)) - 1;
 	if (ktach < 0)
@@ -284,10 +314,30 @@
 		ktach = 255;
 	data->speed = ktach;
 
-	i2c_smbus_write_byte_data(client, MAX6650_REG_SPEED, data->speed);
+	return i2c_smbus_write_byte_data(data->client, MAX6650_REG_SPEED,
+					 data->speed);
+}
+
+static ssize_t set_target(struct device *dev, struct device_attribute *devattr,
+			 const char *buf, size_t count)
+{
+	struct max6650_data *data = dev_get_drvdata(dev);
+	unsigned long rpm;
+	int err;
+
+	err = kstrtoul(buf, 10, &rpm);
+	if (err)
+		return err;
+
+	mutex_lock(&data->update_lock);
+
+	err = max6650_set_target(data, rpm);
 
 	mutex_unlock(&data->update_lock);
 
+	if (err < 0)
+		return err;
+
 	return count;
 }
 
@@ -341,12 +391,11 @@
 		data->dac = 180 - (180 * pwm)/255;
 	else
 		data->dac = 76 - (76 * pwm)/255;
-
-	i2c_smbus_write_byte_data(client, MAX6650_REG_DAC, data->dac);
+	err = i2c_smbus_write_byte_data(client, MAX6650_REG_DAC, data->dac);
 
 	mutex_unlock(&data->update_lock);
 
-	return count;
+	return err < 0 ? err : count;
 }
 
 /*
@@ -355,14 +404,14 @@
  * 0 = Fan always on
  * 1 = Open loop, Voltage is set according to speed, not regulated.
  * 2 = Closed loop, RPM for all fans regulated by fan1 tachometer
+ * 3 = Fan off
  */
-
 static ssize_t get_enable(struct device *dev, struct device_attribute *devattr,
 			  char *buf)
 {
 	struct max6650_data *data = max6650_update_device(dev);
 	int mode = (data->config & MAX6650_CFG_MODE_MASK) >> 4;
-	int sysfs_modes[4] = {0, 1, 2, 1};
+	int sysfs_modes[4] = {0, 3, 2, 1};
 
 	return sprintf(buf, "%d\n", sysfs_modes[mode]);
 }
@@ -371,25 +420,25 @@
 			  const char *buf, size_t count)
 {
 	struct max6650_data *data = dev_get_drvdata(dev);
-	struct i2c_client *client = data->client;
-	int max6650_modes[3] = {0, 3, 2};
 	unsigned long mode;
 	int err;
+	const u8 max6650_modes[] = {
+		MAX6650_CFG_MODE_ON,
+		MAX6650_CFG_MODE_OPEN_LOOP,
+		MAX6650_CFG_MODE_CLOSED_LOOP,
+		MAX6650_CFG_MODE_OFF,
+		};
 
 	err = kstrtoul(buf, 10, &mode);
 	if (err)
 		return err;
 
-	if (mode > 2)
+	if (mode >= ARRAY_SIZE(max6650_modes))
 		return -EINVAL;
 
 	mutex_lock(&data->update_lock);
 
-	data->config = i2c_smbus_read_byte_data(client, MAX6650_REG_CONFIG);
-	data->config = (data->config & ~MAX6650_CFG_MODE_MASK)
-		       | (max6650_modes[mode] << 4);
-
-	i2c_smbus_write_byte_data(client, MAX6650_REG_CONFIG, data->config);
+	max6650_set_operating_mode(data, max6650_modes[mode]);
 
 	mutex_unlock(&data->update_lock);
 
@@ -566,6 +615,18 @@
 	struct device *dev = &client->dev;
 	int config;
 	int err = -EIO;
+	u32 voltage;
+	u32 prescale;
+	u32 target_rpm;
+
+	if (of_property_read_u32(dev->of_node, "maxim,fan-microvolt",
+				 &voltage))
+		voltage = fan_voltage;
+	else
+		voltage /= 1000000; /* Microvolts to volts */
+	if (of_property_read_u32(dev->of_node, "maxim,fan-prescale",
+				 &prescale))
+		prescale = prescaler;
 
 	config = i2c_smbus_read_byte_data(client, MAX6650_REG_CONFIG);
 
@@ -574,7 +635,7 @@
 		return err;
 	}
 
-	switch (fan_voltage) {
+	switch (voltage) {
 	case 0:
 		break;
 	case 5:
@@ -584,14 +645,10 @@
 		config |= MAX6650_CFG_V12;
 		break;
 	default:
-		dev_err(dev, "illegal value for fan_voltage (%d)\n",
-			fan_voltage);
+		dev_err(dev, "illegal value for fan_voltage (%d)\n", voltage);
 	}
 
-	dev_info(dev, "Fan voltage is set to %dV.\n",
-		 (config & MAX6650_CFG_V12) ? 12 : 5);
-
-	switch (prescaler) {
+	switch (prescale) {
 	case 0:
 		break;
 	case 1:
@@ -614,28 +671,13 @@
 			 | MAX6650_CFG_PRESCALER_16;
 		break;
 	default:
-		dev_err(dev, "illegal value for prescaler (%d)\n", prescaler);
+		dev_err(dev, "illegal value for prescaler (%d)\n", prescale);
 	}
 
-	dev_info(dev, "Prescaler is set to %d.\n",
+	dev_info(dev, "Fan voltage: %dV, prescaler: %d.\n",
+		 (config & MAX6650_CFG_V12) ? 12 : 5,
 		 1 << (config & MAX6650_CFG_PRESCALER_MASK));
 
-	/*
-	 * If mode is set to "full off", we change it to "open loop" and
-	 * set DAC to 255, which has the same effect. We do this because
-	 * there's no "full off" mode defined in hwmon specifications.
-	 */
-
-	if ((config & MAX6650_CFG_MODE_MASK) == MAX6650_CFG_MODE_OFF) {
-		dev_dbg(dev, "Change mode to open loop, full off.\n");
-		config = (config & ~MAX6650_CFG_MODE_MASK)
-			 | MAX6650_CFG_MODE_OPEN_LOOP;
-		if (i2c_smbus_write_byte_data(client, MAX6650_REG_DAC, 255)) {
-			dev_err(dev, "DAC write error, aborting.\n");
-			return err;
-		}
-	}
-
 	if (i2c_smbus_write_byte_data(client, MAX6650_REG_CONFIG, config)) {
 		dev_err(dev, "Config write error, aborting.\n");
 		return err;
@@ -644,6 +686,12 @@
 	data->config = config;
 	data->count = i2c_smbus_read_byte_data(client, MAX6650_REG_COUNT);
 
+	if (!of_property_read_u32(client->dev.of_node, "maxim,fan-target-rpm",
+				  &target_rpm)) {
+		max6650_set_target(data, target_rpm);
+		max6650_set_operating_mode(data, MAX6650_CFG_MODE_CLOSED_LOOP);
+	}
+
 	return 0;
 }
 
@@ -651,6 +699,8 @@
 			 const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
+	const struct of_device_id *of_id =
+		of_match_device(of_match_ptr(max6650_dt_match), dev);
 	struct max6650_data *data;
 	struct device *hwmon_dev;
 	int err;
@@ -661,7 +711,7 @@
 
 	data->client = client;
 	mutex_init(&data->update_lock);
-	data->nr_fans = id->driver_data;
+	data->nr_fans = of_id ? (int)(uintptr_t)of_id->data : id->driver_data;
 
 	/*
 	 * Initialize the max6650 chip
@@ -691,6 +741,7 @@
 static struct i2c_driver max6650_driver = {
 	.driver = {
 		.name	= "max6650",
+		.of_match_table = of_match_ptr(max6650_dt_match),
 	},
 	.probe		= max6650_probe,
 	.id_table	= max6650_id,
diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index d087a8e..ce75dd4 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -195,6 +195,8 @@
 
 #define NUM_FAN		6
 
+#define TEMP_SOURCE_VIRTUAL	0x1f
+
 /* Common and NCT6775 specific data */
 
 /* Voltage min/max registers for nr=7..14 are in bank 5 */
@@ -3940,7 +3942,7 @@
 			continue;
 
 		src = nct6775_read_value(data, data->REG_TEMP_SEL[i]) & 0x1f;
-		if (!src || (mask & (1 << src)))
+		if (!src)
 			continue;
 
 		if (src >= data->temp_label_num ||
@@ -3952,7 +3954,16 @@
 			continue;
 		}
 
-		mask |= 1 << src;
+		/*
+		 * For virtual temperature sources, the 'virtual' temperature
+		 * for each fan reflects a different temperature, and there
+		 * are no duplicates.
+		 */
+		if (src != TEMP_SOURCE_VIRTUAL) {
+			if (mask & (1 << src))
+				continue;
+			mask |= 1 << src;
+		}
 
 		/* Use fixed index for SYSTIN(1), CPUTIN(2), AUXTIN(3) */
 		if (src <= data->temp_fixed_num) {
@@ -4232,11 +4243,11 @@
 	if (err)
 		return err;
 
-	if (force_id)
+	val = (superio_inb(sioaddr, SIO_REG_DEVID) << 8) |
+		superio_inb(sioaddr, SIO_REG_DEVID + 1);
+	if (force_id && val != 0xffff)
 		val = force_id;
-	else
-		val = (superio_inb(sioaddr, SIO_REG_DEVID) << 8)
-		    | superio_inb(sioaddr, SIO_REG_DEVID + 1);
+
 	switch (val & SIO_ID_MASK) {
 	case SIO_NCT6106_ID:
 		sio_data->kind = nct6106;
diff --git a/drivers/hwmon/nct7904.c b/drivers/hwmon/nct7904.c
index 08ff89d..95a68ab 100644
--- a/drivers/hwmon/nct7904.c
+++ b/drivers/hwmon/nct7904.c
@@ -21,7 +21,6 @@
 #include <linux/i2c.h>
 #include <linux/mutex.h>
 #include <linux/hwmon.h>
-#include <linux/hwmon-sysfs.h>
 
 #define VENDOR_ID_REG		0x7A	/* Any bank */
 #define NUVOTON_ID		0x50
@@ -153,341 +152,230 @@
 	return ret;
 }
 
-/* FANIN ATTR */
-static ssize_t show_fan(struct device *dev,
-			struct device_attribute *devattr, char *buf)
+static int nct7904_read_fan(struct device *dev, u32 attr, int channel,
+			    long *val)
 {
-	int index = to_sensor_dev_attr(devattr)->index;
 	struct nct7904_data *data = dev_get_drvdata(dev);
+	unsigned int cnt, rpm;
 	int ret;
-	unsigned cnt, rpm;
 
-	ret = nct7904_read_reg16(data, BANK_0, FANIN1_HV_REG + index * 2);
-	if (ret < 0)
-		return ret;
-	cnt = ((ret & 0xff00) >> 3) | (ret & 0x1f);
-	if (cnt == 0x1fff)
-		rpm = 0;
-	else
-		rpm = 1350000 / cnt;
-	return sprintf(buf, "%u\n", rpm);
+	switch(attr) {
+	case hwmon_fan_input:
+		ret = nct7904_read_reg16(data, BANK_0,
+					 FANIN1_HV_REG + channel * 2);
+		if (ret < 0)
+			return ret;
+		cnt = ((ret & 0xff00) >> 3) | (ret & 0x1f);
+		if (cnt == 0x1fff)
+			rpm = 0;
+		else
+			rpm = 1350000 / cnt;
+		*val = rpm;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static umode_t nct7904_fanin_is_visible(struct kobject *kobj,
-					struct attribute *a, int n)
+static umode_t nct7904_fan_is_visible(const void *_data, u32 attr, int channel)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct nct7904_data *data = dev_get_drvdata(dev);
+	const struct nct7904_data *data = _data;
 
-	if (data->fanin_mask & (1 << n))
-		return a->mode;
+	if (attr == hwmon_fan_input && data->fanin_mask & (1 << channel))
+		return S_IRUGO;
 	return 0;
 }
 
-static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, show_fan, NULL, 0);
-static SENSOR_DEVICE_ATTR(fan2_input, S_IRUGO, show_fan, NULL, 1);
-static SENSOR_DEVICE_ATTR(fan3_input, S_IRUGO, show_fan, NULL, 2);
-static SENSOR_DEVICE_ATTR(fan4_input, S_IRUGO, show_fan, NULL, 3);
-static SENSOR_DEVICE_ATTR(fan5_input, S_IRUGO, show_fan, NULL, 4);
-static SENSOR_DEVICE_ATTR(fan6_input, S_IRUGO, show_fan, NULL, 5);
-static SENSOR_DEVICE_ATTR(fan7_input, S_IRUGO, show_fan, NULL, 6);
-static SENSOR_DEVICE_ATTR(fan8_input, S_IRUGO, show_fan, NULL, 7);
-static SENSOR_DEVICE_ATTR(fan9_input, S_IRUGO, show_fan, NULL, 8);
-static SENSOR_DEVICE_ATTR(fan10_input, S_IRUGO, show_fan, NULL, 9);
-static SENSOR_DEVICE_ATTR(fan11_input, S_IRUGO, show_fan, NULL, 10);
-static SENSOR_DEVICE_ATTR(fan12_input, S_IRUGO, show_fan, NULL, 11);
-
-static struct attribute *nct7904_fanin_attrs[] = {
-	&sensor_dev_attr_fan1_input.dev_attr.attr,
-	&sensor_dev_attr_fan2_input.dev_attr.attr,
-	&sensor_dev_attr_fan3_input.dev_attr.attr,
-	&sensor_dev_attr_fan4_input.dev_attr.attr,
-	&sensor_dev_attr_fan5_input.dev_attr.attr,
-	&sensor_dev_attr_fan6_input.dev_attr.attr,
-	&sensor_dev_attr_fan7_input.dev_attr.attr,
-	&sensor_dev_attr_fan8_input.dev_attr.attr,
-	&sensor_dev_attr_fan9_input.dev_attr.attr,
-	&sensor_dev_attr_fan10_input.dev_attr.attr,
-	&sensor_dev_attr_fan11_input.dev_attr.attr,
-	&sensor_dev_attr_fan12_input.dev_attr.attr,
-	NULL
+static u8 nct7904_chan_to_index[] = {
+	0,	/* Not used */
+	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+	18, 19, 20, 16
 };
 
-static const struct attribute_group nct7904_fanin_group = {
-	.attrs = nct7904_fanin_attrs,
-	.is_visible = nct7904_fanin_is_visible,
-};
-
-/* VSEN ATTR */
-static ssize_t show_voltage(struct device *dev,
-			    struct device_attribute *devattr, char *buf)
+static int nct7904_read_in(struct device *dev, u32 attr, int channel,
+			   long *val)
 {
-	int index = to_sensor_dev_attr(devattr)->index;
 	struct nct7904_data *data = dev_get_drvdata(dev);
-	int ret;
-	int volt;
+	int ret, volt, index;
 
-	ret = nct7904_read_reg16(data, BANK_0, VSEN1_HV_REG + index * 2);
-	if (ret < 0)
-		return ret;
-	volt = ((ret & 0xff00) >> 5) | (ret & 0x7);
-	if (index < 14)
-		volt *= 2; /* 0.002V scale */
-	else
-		volt *= 6; /* 0.006V scale */
+	index = nct7904_chan_to_index[channel];
 
-	return sprintf(buf, "%d\n", volt);
+	switch(attr) {
+	case hwmon_in_input:
+		ret = nct7904_read_reg16(data, BANK_0,
+					 VSEN1_HV_REG + index * 2);
+		if (ret < 0)
+			return ret;
+		volt = ((ret & 0xff00) >> 5) | (ret & 0x7);
+		if (index < 14)
+			volt *= 2; /* 0.002V scale */
+		else
+			volt *= 6; /* 0.006V scale */
+		*val = volt;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t show_ltemp(struct device *dev,
-			  struct device_attribute *devattr, char *buf)
+static umode_t nct7904_in_is_visible(const void *_data, u32 attr, int channel)
 {
-	struct nct7904_data *data = dev_get_drvdata(dev);
-	int ret;
-	int temp;
+	const struct nct7904_data *data = _data;
+	int index = nct7904_chan_to_index[channel];
 
-	ret = nct7904_read_reg16(data, BANK_0, LTD_HV_REG);
-	if (ret < 0)
-		return ret;
-	temp = ((ret & 0xff00) >> 5) | (ret & 0x7);
-	temp = sign_extend32(temp, 10) * 125;
+	if (channel > 0 && attr == hwmon_in_input &&
+	    (data->vsen_mask & BIT(index)))
+		return S_IRUGO;
 
-	return sprintf(buf, "%d\n", temp);
-}
-
-static umode_t nct7904_vsen_is_visible(struct kobject *kobj,
-				       struct attribute *a, int n)
-{
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct nct7904_data *data = dev_get_drvdata(dev);
-
-	if (data->vsen_mask & (1 << n))
-		return a->mode;
 	return 0;
 }
 
-static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, show_voltage, NULL, 0);
-static SENSOR_DEVICE_ATTR(in2_input, S_IRUGO, show_voltage, NULL, 1);
-static SENSOR_DEVICE_ATTR(in3_input, S_IRUGO, show_voltage, NULL, 2);
-static SENSOR_DEVICE_ATTR(in4_input, S_IRUGO, show_voltage, NULL, 3);
-static SENSOR_DEVICE_ATTR(in5_input, S_IRUGO, show_voltage, NULL, 4);
-static SENSOR_DEVICE_ATTR(in6_input, S_IRUGO, show_voltage, NULL, 5);
-static SENSOR_DEVICE_ATTR(in7_input, S_IRUGO, show_voltage, NULL, 6);
-static SENSOR_DEVICE_ATTR(in8_input, S_IRUGO, show_voltage, NULL, 7);
-static SENSOR_DEVICE_ATTR(in9_input, S_IRUGO, show_voltage, NULL, 8);
-static SENSOR_DEVICE_ATTR(in10_input, S_IRUGO, show_voltage, NULL, 9);
-static SENSOR_DEVICE_ATTR(in11_input, S_IRUGO, show_voltage, NULL, 10);
-static SENSOR_DEVICE_ATTR(in12_input, S_IRUGO, show_voltage, NULL, 11);
-static SENSOR_DEVICE_ATTR(in13_input, S_IRUGO, show_voltage, NULL, 12);
-static SENSOR_DEVICE_ATTR(in14_input, S_IRUGO, show_voltage, NULL, 13);
-/*
- * Next 3 voltage sensors have specific names in the Nuvoton doc
- * (3VDD, VBAT, 3VSB) but we use vacant numbers for them.
- */
-static SENSOR_DEVICE_ATTR(in15_input, S_IRUGO, show_voltage, NULL, 14);
-static SENSOR_DEVICE_ATTR(in16_input, S_IRUGO, show_voltage, NULL, 15);
-static SENSOR_DEVICE_ATTR(in20_input, S_IRUGO, show_voltage, NULL, 16);
-/* This is not a voltage, but a local temperature sensor. */
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_ltemp, NULL, 0);
-static SENSOR_DEVICE_ATTR(in17_input, S_IRUGO, show_voltage, NULL, 18);
-static SENSOR_DEVICE_ATTR(in18_input, S_IRUGO, show_voltage, NULL, 19);
-static SENSOR_DEVICE_ATTR(in19_input, S_IRUGO, show_voltage, NULL, 20);
-
-static struct attribute *nct7904_vsen_attrs[] = {
-	&sensor_dev_attr_in1_input.dev_attr.attr,
-	&sensor_dev_attr_in2_input.dev_attr.attr,
-	&sensor_dev_attr_in3_input.dev_attr.attr,
-	&sensor_dev_attr_in4_input.dev_attr.attr,
-	&sensor_dev_attr_in5_input.dev_attr.attr,
-	&sensor_dev_attr_in6_input.dev_attr.attr,
-	&sensor_dev_attr_in7_input.dev_attr.attr,
-	&sensor_dev_attr_in8_input.dev_attr.attr,
-	&sensor_dev_attr_in9_input.dev_attr.attr,
-	&sensor_dev_attr_in10_input.dev_attr.attr,
-	&sensor_dev_attr_in11_input.dev_attr.attr,
-	&sensor_dev_attr_in12_input.dev_attr.attr,
-	&sensor_dev_attr_in13_input.dev_attr.attr,
-	&sensor_dev_attr_in14_input.dev_attr.attr,
-	&sensor_dev_attr_in15_input.dev_attr.attr,
-	&sensor_dev_attr_in16_input.dev_attr.attr,
-	&sensor_dev_attr_in20_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_in17_input.dev_attr.attr,
-	&sensor_dev_attr_in18_input.dev_attr.attr,
-	&sensor_dev_attr_in19_input.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group nct7904_vsen_group = {
-	.attrs = nct7904_vsen_attrs,
-	.is_visible = nct7904_vsen_is_visible,
-};
-
-/* CPU_TEMP ATTR */
-static ssize_t show_tcpu(struct device *dev,
-			 struct device_attribute *devattr, char *buf)
+static int nct7904_read_temp(struct device *dev, u32 attr, int channel,
+			     long *val)
 {
-	int index = to_sensor_dev_attr(devattr)->index;
 	struct nct7904_data *data = dev_get_drvdata(dev);
-	int ret;
-	int temp;
+	int ret, temp;
 
-	ret = nct7904_read_reg16(data, BANK_0, T_CPU1_HV_REG + index * 2);
-	if (ret < 0)
-		return ret;
-
-	temp = ((ret & 0xff00) >> 5) | (ret & 0x7);
-	temp = sign_extend32(temp, 10) * 125;
-	return sprintf(buf, "%d\n", temp);
+	switch(attr) {
+	case hwmon_temp_input:
+		if (channel == 0)
+			ret = nct7904_read_reg16(data, BANK_0, LTD_HV_REG);
+		else
+			ret = nct7904_read_reg16(data, BANK_0,
+					T_CPU1_HV_REG + (channel - 1) * 2);
+		if (ret < 0)
+			return ret;
+		temp = ((ret & 0xff00) >> 5) | (ret & 0x7);
+		*val = sign_extend32(temp, 10) * 125;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static umode_t nct7904_tcpu_is_visible(struct kobject *kobj,
-				       struct attribute *a, int n)
+static umode_t nct7904_temp_is_visible(const void *_data, u32 attr, int channel)
 {
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct nct7904_data *data = dev_get_drvdata(dev);
+	const struct nct7904_data *data = _data;
 
-	if (data->tcpu_mask & (1 << n))
-		return a->mode;
+	if (attr == hwmon_temp_input) {
+		if (channel == 0) {
+			if (data->vsen_mask & BIT(17))
+				return S_IRUGO;
+		} else {
+			if (data->tcpu_mask & BIT(channel - 1))
+				return S_IRUGO;
+		}
+	}
+
 	return 0;
 }
 
-/* "temp1_input" reserved for local temp */
-static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, show_tcpu, NULL, 0);
-static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, show_tcpu, NULL, 1);
-static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, show_tcpu, NULL, 2);
-static SENSOR_DEVICE_ATTR(temp5_input, S_IRUGO, show_tcpu, NULL, 3);
-static SENSOR_DEVICE_ATTR(temp6_input, S_IRUGO, show_tcpu, NULL, 4);
-static SENSOR_DEVICE_ATTR(temp7_input, S_IRUGO, show_tcpu, NULL, 5);
-static SENSOR_DEVICE_ATTR(temp8_input, S_IRUGO, show_tcpu, NULL, 6);
-static SENSOR_DEVICE_ATTR(temp9_input, S_IRUGO, show_tcpu, NULL, 7);
-
-static struct attribute *nct7904_tcpu_attrs[] = {
-	&sensor_dev_attr_temp2_input.dev_attr.attr,
-	&sensor_dev_attr_temp3_input.dev_attr.attr,
-	&sensor_dev_attr_temp4_input.dev_attr.attr,
-	&sensor_dev_attr_temp5_input.dev_attr.attr,
-	&sensor_dev_attr_temp6_input.dev_attr.attr,
-	&sensor_dev_attr_temp7_input.dev_attr.attr,
-	&sensor_dev_attr_temp8_input.dev_attr.attr,
-	&sensor_dev_attr_temp9_input.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group nct7904_tcpu_group = {
-	.attrs = nct7904_tcpu_attrs,
-	.is_visible = nct7904_tcpu_is_visible,
-};
-
-/* PWM ATTR */
-static ssize_t store_pwm(struct device *dev, struct device_attribute *devattr,
-			 const char *buf, size_t count)
+static int nct7904_read_pwm(struct device *dev, u32 attr, int channel,
+			    long *val)
 {
-	int index = to_sensor_dev_attr(devattr)->index;
 	struct nct7904_data *data = dev_get_drvdata(dev);
-	unsigned long val;
 	int ret;
 
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-	if (val > 255)
-		return -EINVAL;
+	switch(attr) {
+	case hwmon_pwm_input:
+		ret = nct7904_read_reg(data, BANK_3, FANCTL1_OUT_REG + channel);
+		if (ret < 0)
+			return ret;
+		*val = ret;
+		return 0;
+	case hwmon_pwm_enable:
+		ret = nct7904_read_reg(data, BANK_3, FANCTL1_FMR_REG + channel);
+		if (ret < 0)
+			return ret;
 
-	ret = nct7904_write_reg(data, BANK_3, FANCTL1_OUT_REG + index, val);
-
-	return ret ? ret : count;
+		*val = ret ? 2 : 1;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-static ssize_t show_pwm(struct device *dev,
-			struct device_attribute *devattr, char *buf)
+static int nct7904_write_pwm(struct device *dev, u32 attr, int channel,
+			     long val)
 {
-	int index = to_sensor_dev_attr(devattr)->index;
 	struct nct7904_data *data = dev_get_drvdata(dev);
-	int val;
-
-	val = nct7904_read_reg(data, BANK_3, FANCTL1_OUT_REG + index);
-	if (val < 0)
-		return val;
-
-	return sprintf(buf, "%d\n", val);
-}
-
-static ssize_t store_enable(struct device *dev,
-			    struct device_attribute *devattr,
-			    const char *buf, size_t count)
-{
-	int index = to_sensor_dev_attr(devattr)->index;
-	struct nct7904_data *data = dev_get_drvdata(dev);
-	unsigned long val;
 	int ret;
 
-	if (kstrtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-	if (val < 1 || val > 2 || (val == 2 && !data->fan_mode[index]))
-		return -EINVAL;
-
-	ret = nct7904_write_reg(data, BANK_3, FANCTL1_FMR_REG + index,
-				val == 2 ? data->fan_mode[index] : 0);
-
-	return ret ? ret : count;
+	switch(attr) {
+	case hwmon_pwm_input:
+		if (val < 0 || val > 255)
+			return -EINVAL;
+		ret = nct7904_write_reg(data, BANK_3, FANCTL1_OUT_REG + channel,
+					val);
+		return ret;
+	case hwmon_pwm_enable:
+		if (val < 1 || val > 2 ||
+		    (val == 2 && !data->fan_mode[channel]))
+			return -EINVAL;
+		ret = nct7904_write_reg(data, BANK_3, FANCTL1_FMR_REG + channel,
+					val == 2 ? data->fan_mode[channel] : 0);
+		return ret;
+	default:
+		return -EOPNOTSUPP;
+	}
 }
 
-/* Return 1 for manual mode or 2 for SmartFan mode */
-static ssize_t show_enable(struct device *dev,
-			   struct device_attribute *devattr, char *buf)
+static umode_t nct7904_pwm_is_visible(const void *_data, u32 attr, int channel)
 {
-	int index = to_sensor_dev_attr(devattr)->index;
-	struct nct7904_data *data = dev_get_drvdata(dev);
-	int val;
-
-	val = nct7904_read_reg(data, BANK_3, FANCTL1_FMR_REG + index);
-	if (val < 0)
-		return val;
-
-	return sprintf(buf, "%d\n", val ? 2 : 1);
+	switch(attr) {
+	case hwmon_pwm_input:
+	case hwmon_pwm_enable:
+		return S_IRUGO | S_IWUSR;
+	default:
+		return 0;
+	}
 }
 
-/* 2 attributes per channel: pwm and mode */
-static SENSOR_DEVICE_ATTR(pwm1, S_IRUGO | S_IWUSR,
-			show_pwm, store_pwm, 0);
-static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR,
-			show_enable, store_enable, 0);
-static SENSOR_DEVICE_ATTR(pwm2, S_IRUGO | S_IWUSR,
-			show_pwm, store_pwm, 1);
-static SENSOR_DEVICE_ATTR(pwm2_enable, S_IRUGO | S_IWUSR,
-			show_enable, store_enable, 1);
-static SENSOR_DEVICE_ATTR(pwm3, S_IRUGO | S_IWUSR,
-			show_pwm, store_pwm, 2);
-static SENSOR_DEVICE_ATTR(pwm3_enable, S_IRUGO | S_IWUSR,
-			show_enable, store_enable, 2);
-static SENSOR_DEVICE_ATTR(pwm4, S_IRUGO | S_IWUSR,
-			show_pwm, store_pwm, 3);
-static SENSOR_DEVICE_ATTR(pwm4_enable, S_IRUGO | S_IWUSR,
-			show_enable, store_enable, 3);
+static int nct7904_read(struct device *dev, enum hwmon_sensor_types type,
+			u32 attr, int channel, long *val)
+{
+	switch (type) {
+	case hwmon_in:
+		return nct7904_read_in(dev, attr, channel, val);
+	case hwmon_fan:
+		return nct7904_read_fan(dev, attr, channel, val);
+	case hwmon_pwm:
+		return nct7904_read_pwm(dev, attr, channel, val);
+	case hwmon_temp:
+		return nct7904_read_temp(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
 
-static struct attribute *nct7904_fanctl_attrs[] = {
-	&sensor_dev_attr_pwm1.dev_attr.attr,
-	&sensor_dev_attr_pwm1_enable.dev_attr.attr,
-	&sensor_dev_attr_pwm2.dev_attr.attr,
-	&sensor_dev_attr_pwm2_enable.dev_attr.attr,
-	&sensor_dev_attr_pwm3.dev_attr.attr,
-	&sensor_dev_attr_pwm3_enable.dev_attr.attr,
-	&sensor_dev_attr_pwm4.dev_attr.attr,
-	&sensor_dev_attr_pwm4_enable.dev_attr.attr,
-	NULL
-};
+static int nct7904_write(struct device *dev, enum hwmon_sensor_types type,
+			 u32 attr, int channel, long val)
+{
+	switch (type) {
+	case hwmon_pwm:
+		return nct7904_write_pwm(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
 
-static const struct attribute_group nct7904_fanctl_group = {
-	.attrs = nct7904_fanctl_attrs,
-};
-
-static const struct attribute_group *nct7904_groups[] = {
-	&nct7904_fanin_group,
-	&nct7904_vsen_group,
-	&nct7904_tcpu_group,
-	&nct7904_fanctl_group,
-	NULL
-};
+static umode_t nct7904_is_visible(const void *data,
+				  enum hwmon_sensor_types type,
+				  u32 attr, int channel)
+{
+	switch (type) {
+	case hwmon_in:
+		return nct7904_in_is_visible(data, attr, channel);
+	case hwmon_fan:
+		return nct7904_fan_is_visible(data, attr, channel);
+	case hwmon_pwm:
+		return nct7904_pwm_is_visible(data, attr, channel);
+	case hwmon_temp:
+		return nct7904_temp_is_visible(data, attr, channel);
+	default:
+		return 0;
+	}
+}
 
 /* Return 0 if detection is successful, -ENODEV otherwise */
 static int nct7904_detect(struct i2c_client *client,
@@ -512,6 +400,103 @@
 	return 0;
 }
 
+static const u32 nct7904_in_config[] = {
+	HWMON_I_INPUT,                  /* dummy, skipped in is_visible */
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	HWMON_I_INPUT,
+	0
+};
+
+static const struct hwmon_channel_info nct7904_in = {
+	.type = hwmon_in,
+	.config = nct7904_in_config,
+};
+
+static const u32 nct7904_fan_config[] = {
+            HWMON_F_INPUT,
+            HWMON_F_INPUT,
+            HWMON_F_INPUT,
+            HWMON_F_INPUT,
+            HWMON_F_INPUT,
+            HWMON_F_INPUT,
+            HWMON_F_INPUT,
+            HWMON_F_INPUT,
+	    0
+};
+
+static const struct hwmon_channel_info nct7904_fan = {
+	.type = hwmon_fan,
+	.config = nct7904_fan_config,
+};
+
+static const u32 nct7904_pwm_config[] = {
+            HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+            HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+            HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+            HWMON_PWM_INPUT | HWMON_PWM_ENABLE,
+	    0
+};
+
+static const struct hwmon_channel_info nct7904_pwm = {
+	.type = hwmon_pwm,
+	.config = nct7904_pwm_config,
+};
+
+static const u32 nct7904_temp_config[] = {
+            HWMON_T_INPUT,
+            HWMON_T_INPUT,
+            HWMON_T_INPUT,
+            HWMON_T_INPUT,
+            HWMON_T_INPUT,
+            HWMON_T_INPUT,
+            HWMON_T_INPUT,
+            HWMON_T_INPUT,
+            HWMON_T_INPUT,
+	    0
+};
+
+static const struct hwmon_channel_info nct7904_temp = {
+	.type = hwmon_temp,
+	.config = nct7904_temp_config,
+};
+
+static const struct hwmon_channel_info *nct7904_info[] = {
+	&nct7904_in,
+	&nct7904_fan,
+	&nct7904_pwm,
+	&nct7904_temp,
+	NULL
+};
+
+static const struct hwmon_ops nct7904_hwmon_ops = {
+	.is_visible = nct7904_is_visible,
+	.read = nct7904_read,
+	.write = nct7904_write,
+};
+
+static const struct hwmon_chip_info nct7904_chip_info = {
+	.ops = &nct7904_hwmon_ops,
+	.info = nct7904_info,
+};
+
 static int nct7904_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -566,8 +551,8 @@
 	}
 
 	hwmon_dev =
-		devm_hwmon_device_register_with_groups(dev, client->name, data,
-						       nct7904_groups);
+		devm_hwmon_device_register_with_info(dev, client->name, data,
+						     &nct7904_chip_info, NULL);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index 8ef7b71..c52d07c 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -253,12 +253,9 @@
 };
 
 struct ntc_data {
-	struct device *hwmon_dev;
 	struct ntc_thermistor_platform_data *pdata;
 	const struct ntc_compensation *comp;
-	struct device *dev;
 	int n_comp;
-	char name[PLATFORM_NAME_SIZE];
 };
 
 #if defined(CONFIG_OF) && IS_ENABLED(CONFIG_IIO)
@@ -316,22 +313,22 @@
 MODULE_DEVICE_TABLE(of, ntc_match);
 
 static struct ntc_thermistor_platform_data *
-ntc_thermistor_parse_dt(struct platform_device *pdev)
+ntc_thermistor_parse_dt(struct device *dev)
 {
 	struct iio_channel *chan;
 	enum iio_chan_type type;
-	struct device_node *np = pdev->dev.of_node;
+	struct device_node *np = dev->of_node;
 	struct ntc_thermistor_platform_data *pdata;
 	int ret;
 
 	if (!np)
 		return NULL;
 
-	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 
-	chan = iio_channel_get(&pdev->dev, NULL);
+	chan = devm_iio_channel_get(dev, NULL);
 	if (IS_ERR(chan))
 		return ERR_CAST(chan);
 
@@ -359,22 +356,15 @@
 
 	return pdata;
 }
-static void ntc_iio_channel_release(struct ntc_thermistor_platform_data *pdata)
-{
-	if (pdata->chan)
-		iio_channel_release(pdata->chan);
-}
 #else
 static struct ntc_thermistor_platform_data *
-ntc_thermistor_parse_dt(struct platform_device *pdev)
+ntc_thermistor_parse_dt(struct device *dev)
 {
 	return NULL;
 }
 
 #define ntc_match	NULL
 
-static void ntc_iio_channel_release(struct ntc_thermistor_platform_data *pdata)
-{ }
 #endif
 
 static inline u64 div64_u64_safe(u64 dividend, u64 divisor)
@@ -516,9 +506,8 @@
 	return -EINVAL;
 }
 
-static int ntc_read_temp(void *dev, int *temp)
+static int ntc_read_temp(void *data, int *temp)
 {
-	struct ntc_data *data = dev_get_drvdata(dev);
 	int ohm;
 
 	ohm = ntc_thermistor_get_ohm(data);
@@ -530,14 +519,6 @@
 	return 0;
 }
 
-static ssize_t ntc_show_name(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct ntc_data *data = dev_get_drvdata(dev);
-
-	return sprintf(buf, "%s\n", data->name);
-}
-
 static ssize_t ntc_show_type(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -559,18 +540,13 @@
 
 static SENSOR_DEVICE_ATTR(temp1_type, S_IRUGO, ntc_show_type, NULL, 0);
 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, ntc_show_temp, NULL, 0);
-static DEVICE_ATTR(name, S_IRUGO, ntc_show_name, NULL);
 
-static struct attribute *ntc_attributes[] = {
-	&dev_attr_name.attr,
+static struct attribute *ntc_attrs[] = {
 	&sensor_dev_attr_temp1_type.dev_attr.attr,
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
 	NULL,
 };
-
-static const struct attribute_group ntc_attr_group = {
-	.attrs = ntc_attributes,
-};
+ATTRIBUTE_GROUPS(ntc);
 
 static const struct thermal_zone_of_device_ops ntc_of_thermal_ops = {
 	.get_temp = ntc_read_temp,
@@ -579,33 +555,34 @@
 static int ntc_thermistor_probe(struct platform_device *pdev)
 {
 	struct thermal_zone_device *tz;
+	struct device *dev = &pdev->dev;
 	const struct of_device_id *of_id =
-			of_match_device(of_match_ptr(ntc_match), &pdev->dev);
+			of_match_device(of_match_ptr(ntc_match), dev);
 	const struct platform_device_id *pdev_id;
 	struct ntc_thermistor_platform_data *pdata;
+	struct device *hwmon_dev;
 	struct ntc_data *data;
-	int ret;
 
-	pdata = ntc_thermistor_parse_dt(pdev);
+	pdata = ntc_thermistor_parse_dt(dev);
 	if (IS_ERR(pdata))
 		return PTR_ERR(pdata);
 	else if (pdata == NULL)
-		pdata = dev_get_platdata(&pdev->dev);
+		pdata = dev_get_platdata(dev);
 
 	if (!pdata) {
-		dev_err(&pdev->dev, "No platform init data supplied.\n");
+		dev_err(dev, "No platform init data supplied.\n");
 		return -ENODEV;
 	}
 
 	/* Either one of the two is required. */
 	if (!pdata->read_uv && !pdata->read_ohm) {
-		dev_err(&pdev->dev,
+		dev_err(dev,
 			"Both read_uv and read_ohm missing. Need either one of the two.\n");
 		return -EINVAL;
 	}
 
 	if (pdata->read_uv && pdata->read_ohm) {
-		dev_warn(&pdev->dev,
+		dev_warn(dev,
 			 "Only one of read_uv and read_ohm is needed; ignoring read_uv.\n");
 		pdata->read_uv = NULL;
 	}
@@ -617,20 +594,17 @@
 				 NTC_CONNECTED_POSITIVE) ||
 				(pdata->connect != NTC_CONNECTED_POSITIVE &&
 				 pdata->connect != NTC_CONNECTED_GROUND))) {
-		dev_err(&pdev->dev,
-			"Required data to use read_uv not supplied.\n");
+		dev_err(dev, "Required data to use read_uv not supplied.\n");
 		return -EINVAL;
 	}
 
-	data = devm_kzalloc(&pdev->dev, sizeof(struct ntc_data), GFP_KERNEL);
+	data = devm_kzalloc(dev, sizeof(struct ntc_data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
 	pdev_id = of_id ? of_id->data : platform_get_device_id(pdev);
 
-	data->dev = &pdev->dev;
 	data->pdata = pdata;
-	strlcpy(data->name, pdev_id->name, sizeof(data->name));
 
 	switch (pdev_id->driver_data) {
 	case TYPE_NCPXXWB473:
@@ -654,49 +628,25 @@
 		data->n_comp = ARRAY_SIZE(ncpXXxh103);
 		break;
 	default:
-		dev_err(&pdev->dev, "Unknown device type: %lu(%s)\n",
+		dev_err(dev, "Unknown device type: %lu(%s)\n",
 				pdev_id->driver_data, pdev_id->name);
 		return -EINVAL;
 	}
 
-	platform_set_drvdata(pdev, data);
-
-	ret = sysfs_create_group(&data->dev->kobj, &ntc_attr_group);
-	if (ret) {
-		dev_err(data->dev, "unable to create sysfs files\n");
-		return ret;
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev, pdev_id->name,
+							   data, ntc_groups);
+	if (IS_ERR(hwmon_dev)) {
+		dev_err(dev, "unable to register as hwmon device.\n");
+		return PTR_ERR(hwmon_dev);
 	}
 
-	data->hwmon_dev = hwmon_device_register(data->dev);
-	if (IS_ERR(data->hwmon_dev)) {
-		dev_err(data->dev, "unable to register as hwmon device.\n");
-		ret = PTR_ERR(data->hwmon_dev);
-		goto err_after_sysfs;
-	}
+	dev_info(dev, "Thermistor type: %s successfully probed.\n",
+		 pdev_id->name);
 
-	dev_info(&pdev->dev, "Thermistor type: %s successfully probed.\n",
-								pdev_id->name);
-
-	tz = devm_thermal_zone_of_sensor_register(data->dev, 0, data->dev,
+	tz = devm_thermal_zone_of_sensor_register(dev, 0, data,
 						  &ntc_of_thermal_ops);
 	if (IS_ERR(tz))
-		dev_dbg(&pdev->dev, "Failed to register to thermal fw.\n");
-
-	return 0;
-err_after_sysfs:
-	sysfs_remove_group(&data->dev->kobj, &ntc_attr_group);
-	ntc_iio_channel_release(pdata);
-	return ret;
-}
-
-static int ntc_thermistor_remove(struct platform_device *pdev)
-{
-	struct ntc_data *data = platform_get_drvdata(pdev);
-	struct ntc_thermistor_platform_data *pdata = data->pdata;
-
-	hwmon_device_unregister(data->hwmon_dev);
-	sysfs_remove_group(&data->dev->kobj, &ntc_attr_group);
-	ntc_iio_channel_release(pdata);
+		dev_dbg(dev, "Failed to register to thermal fw.\n");
 
 	return 0;
 }
@@ -707,7 +657,6 @@
 		.of_match_table = of_match_ptr(ntc_match),
 	},
 	.probe = ntc_thermistor_probe,
-	.remove = ntc_thermistor_remove,
 	.id_table = ntc_thermistor_id,
 };
 
diff --git a/drivers/hwmon/pmbus/Kconfig b/drivers/hwmon/pmbus/Kconfig
index 054d3d8..cad1229 100644
--- a/drivers/hwmon/pmbus/Kconfig
+++ b/drivers/hwmon/pmbus/Kconfig
@@ -126,12 +126,12 @@
 	  be called tps40422.
 
 config SENSORS_UCD9000
-	tristate "TI UCD90120, UCD90124, UCD9090, UCD90910"
+	tristate "TI UCD90120, UCD90124, UCD90160, UCD9090, UCD90910"
 	default n
 	help
 	  If you say yes here you get hardware monitoring support for TI
-	  UCD90120, UCD90124, UCD9090, UCD90910 Sequencer and System Health
-	  Controllers.
+	  UCD90120, UCD90124, UCD90160, UCD9090, UCD90910, Sequencer and System
+	  Health Controllers.
 
 	  This driver can also be built as a module. If so, the module will
 	  be called ucd9000.
diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c
index 0a74991..44ca8a9 100644
--- a/drivers/hwmon/pmbus/pmbus.c
+++ b/drivers/hwmon/pmbus/pmbus.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/i2c.h>
+#include <linux/i2c/pmbus.h>
 #include "pmbus.h"
 
 /*
@@ -167,14 +168,26 @@
 		       const struct i2c_device_id *id)
 {
 	struct pmbus_driver_info *info;
+	struct pmbus_platform_data *pdata = NULL;
+	struct device *dev = &client->dev;
 
-	info = devm_kzalloc(&client->dev, sizeof(struct pmbus_driver_info),
-			    GFP_KERNEL);
+	info = devm_kzalloc(dev, sizeof(struct pmbus_driver_info), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 
+	if (!strcmp(id->name, "dps460") || !strcmp(id->name, "dps800") ||
+	    !strcmp(id->name, "sgd009")) {
+		pdata = devm_kzalloc(dev, sizeof(struct pmbus_platform_data),
+				     GFP_KERNEL);
+		if (!pdata)
+			return -ENOMEM;
+
+		pdata->flags = PMBUS_SKIP_STATUS_CHECK;
+	}
+
 	info->pages = id->driver_data;
 	info->identify = pmbus_identify;
+	dev->platform_data = pdata;
 
 	return pmbus_do_probe(client, id, info);
 }
@@ -186,6 +199,8 @@
 	{"adp4000", 1},
 	{"bmr453", 1},
 	{"bmr454", 1},
+	{"dps460", 1},
+	{"dps800", 1},
 	{"mdt040", 1},
 	{"ncp4200", 1},
 	{"ncp4208", 1},
@@ -193,6 +208,7 @@
 	{"pdt006", 1},
 	{"pdt012", 1},
 	{"pmbus", 0},
+	{"sgd009", 1},
 	{"tps40400", 1},
 	{"tps544b20", 1},
 	{"tps544b25", 1},
diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index fbb1479..3e3aa95 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -28,7 +28,7 @@
 #include <linux/i2c/pmbus.h>
 #include "pmbus.h"
 
-enum chips { ucd9000, ucd90120, ucd90124, ucd9090, ucd90910 };
+enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd9090, ucd90910 };
 
 #define UCD9000_MONITOR_CONFIG		0xd5
 #define UCD9000_NUM_PAGES		0xd6
@@ -112,6 +112,7 @@
 	{"ucd9000", ucd9000},
 	{"ucd90120", ucd90120},
 	{"ucd90124", ucd90124},
+	{"ucd90160", ucd90160},
 	{"ucd9090", ucd9090},
 	{"ucd90910", ucd90910},
 	{}
diff --git a/drivers/hwmon/scpi-hwmon.c b/drivers/hwmon/scpi-hwmon.c
index 25b44e6..559a3dc 100644
--- a/drivers/hwmon/scpi-hwmon.c
+++ b/drivers/hwmon/scpi-hwmon.c
@@ -255,7 +255,6 @@
 static struct platform_driver scpi_hwmon_platdrv = {
 	.driver = {
 		.name	= "scpi-hwmon",
-		.owner	= THIS_MODULE,
 		.of_match_table = scpi_of_match,
 	},
 	.probe		= scpi_hwmon_probe,
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index 8479ac5..36bba2a 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -25,7 +25,6 @@
 #include <linux/device.h>
 #include <linux/jiffies.h>
 #include <linux/regmap.h>
-#include <linux/thermal.h>
 #include <linux/of.h>
 
 #define	DRIVER_NAME "tmp102"
@@ -79,84 +78,113 @@
 	return (val * 128) / 1000;
 }
 
-static int tmp102_read_temp(void *dev, int *temp)
+static int tmp102_read(struct device *dev, enum hwmon_sensor_types type,
+		       u32 attr, int channel, long *temp)
 {
 	struct tmp102 *tmp102 = dev_get_drvdata(dev);
-	unsigned int reg;
-	int ret;
+	unsigned int regval;
+	int err, reg;
 
-	if (time_before(jiffies, tmp102->ready_time)) {
-		dev_dbg(dev, "%s: Conversion not ready yet..\n", __func__);
-		return -EAGAIN;
+	switch (attr) {
+	case hwmon_temp_input:
+		/* Is it too early to return a conversion ? */
+		if (time_before(jiffies, tmp102->ready_time)) {
+			dev_dbg(dev, "%s: Conversion not ready yet..\n", __func__);
+			return -EAGAIN;
+		}
+		reg = TMP102_TEMP_REG;
+		break;
+	case hwmon_temp_max_hyst:
+		reg = TMP102_TLOW_REG;
+		break;
+	case hwmon_temp_max:
+		reg = TMP102_THIGH_REG;
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
 
-	ret = regmap_read(tmp102->regmap, TMP102_TEMP_REG, &reg);
-	if (ret < 0)
-		return ret;
-
-	*temp = tmp102_reg_to_mC(reg);
+	err = regmap_read(tmp102->regmap, reg, &regval);
+	if (err < 0)
+		return err;
+	*temp = tmp102_reg_to_mC(regval);
 
 	return 0;
 }
 
-static ssize_t tmp102_show_temp(struct device *dev,
-				struct device_attribute *attr,
-				char *buf)
+static int tmp102_write(struct device *dev, enum hwmon_sensor_types type,
+			u32 attr, int channel, long temp)
 {
-	struct sensor_device_attribute *sda = to_sensor_dev_attr(attr);
 	struct tmp102 *tmp102 = dev_get_drvdata(dev);
-	int regaddr = sda->index;
-	unsigned int reg;
-	int err;
+	int reg;
 
-	if (regaddr == TMP102_TEMP_REG &&
-	    time_before(jiffies, tmp102->ready_time))
-		return -EAGAIN;
+	switch (attr) {
+	case hwmon_temp_max_hyst:
+		reg = TMP102_TLOW_REG;
+		break;
+	case hwmon_temp_max:
+		reg = TMP102_THIGH_REG;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
 
-	err = regmap_read(tmp102->regmap, regaddr, &reg);
-	if (err < 0)
-		return err;
-
-	return sprintf(buf, "%d\n", tmp102_reg_to_mC(reg));
+	temp = clamp_val(temp, -256000, 255000);
+	return regmap_write(tmp102->regmap, reg, tmp102_mC_to_reg(temp));
 }
 
-static ssize_t tmp102_set_temp(struct device *dev,
-			       struct device_attribute *attr,
-			       const char *buf, size_t count)
+static umode_t tmp102_is_visible(const void *data, enum hwmon_sensor_types type,
+				 u32 attr, int channel)
 {
-	struct sensor_device_attribute *sda = to_sensor_dev_attr(attr);
-	struct tmp102 *tmp102 = dev_get_drvdata(dev);
-	int reg = sda->index;
-	long val;
-	int err;
+	if (type != hwmon_temp)
+		return 0;
 
-	if (kstrtol(buf, 10, &val) < 0)
-		return -EINVAL;
-	val = clamp_val(val, -256000, 255000);
-
-	err = regmap_write(tmp102->regmap, reg, tmp102_mC_to_reg(val));
-	return err ? : count;
+	switch (attr) {
+	case hwmon_temp_input:
+		return S_IRUGO;
+	case hwmon_temp_max_hyst:
+	case hwmon_temp_max:
+		return S_IRUGO | S_IWUSR;
+	default:
+		return 0;
+	}
 }
 
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, tmp102_show_temp, NULL,
-			  TMP102_TEMP_REG);
+static u32 tmp102_chip_config[] = {
+	HWMON_C_REGISTER_TZ,
+	0
+};
 
-static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IWUSR | S_IRUGO, tmp102_show_temp,
-			  tmp102_set_temp, TMP102_TLOW_REG);
+static const struct hwmon_channel_info tmp102_chip = {
+	.type = hwmon_chip,
+	.config = tmp102_chip_config,
+};
 
-static SENSOR_DEVICE_ATTR(temp1_max, S_IWUSR | S_IRUGO, tmp102_show_temp,
-			  tmp102_set_temp, TMP102_THIGH_REG);
+static u32 tmp102_temp_config[] = {
+	HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MAX_HYST,
+	0
+};
 
-static struct attribute *tmp102_attrs[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp1_max_hyst.dev_attr.attr,
-	&sensor_dev_attr_temp1_max.dev_attr.attr,
+static const struct hwmon_channel_info tmp102_temp = {
+	.type = hwmon_temp,
+	.config = tmp102_temp_config,
+};
+
+static const struct hwmon_channel_info *tmp102_info[] = {
+	&tmp102_chip,
+	&tmp102_temp,
 	NULL
 };
-ATTRIBUTE_GROUPS(tmp102);
 
-static const struct thermal_zone_of_device_ops tmp102_of_thermal_ops = {
-	.get_temp = tmp102_read_temp,
+static const struct hwmon_ops tmp102_hwmon_ops = {
+	.is_visible = tmp102_is_visible,
+	.read = tmp102_read,
+	.write = tmp102_write,
+};
+
+static const struct hwmon_chip_info tmp102_chip_info = {
+	.ops = &tmp102_hwmon_ops,
+	.info = tmp102_info,
 };
 
 static void tmp102_restore_config(void *data)
@@ -188,7 +216,7 @@
 };
 
 static int tmp102_probe(struct i2c_client *client,
-				  const struct i2c_device_id *id)
+			const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
 	struct device *hwmon_dev;
@@ -249,16 +277,14 @@
 		tmp102->ready_time += msecs_to_jiffies(CONVERSION_TIME_MS);
 	}
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
-							   tmp102,
-							   tmp102_groups);
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 tmp102,
+							 &tmp102_chip_info,
+							 NULL);
 	if (IS_ERR(hwmon_dev)) {
 		dev_dbg(dev, "unable to register hwmon device\n");
 		return PTR_ERR(hwmon_dev);
 	}
-	devm_thermal_zone_of_sensor_register(hwmon_dev, 0, hwmon_dev,
-					     &tmp102_of_thermal_ops);
-
 	dev_info(dev, "initialized\n");
 
 	return 0;
diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c
index 85d48d8..bfb98b9 100644
--- a/drivers/hwmon/tmp421.c
+++ b/drivers/hwmon/tmp421.c
@@ -72,6 +72,10 @@
 struct tmp421_data {
 	struct i2c_client *client;
 	struct mutex update_lock;
+	u32 temp_config[5];
+	struct hwmon_channel_info temp_info;
+	const struct hwmon_channel_info *info[2];
+	struct hwmon_chip_info chip;
 	char valid;
 	unsigned long last_updated;
 	int channels;
@@ -125,85 +129,46 @@
 	return data;
 }
 
-static ssize_t show_temp_value(struct device *dev,
-			       struct device_attribute *devattr, char *buf)
+static int tmp421_read(struct device *dev, enum hwmon_sensor_types type,
+		       u32 attr, int channel, long *val)
 {
-	int index = to_sensor_dev_attr(devattr)->index;
-	struct tmp421_data *data = tmp421_update_device(dev);
-	int temp;
+	struct tmp421_data *tmp421 = tmp421_update_device(dev);
 
-	mutex_lock(&data->update_lock);
-	if (data->config & TMP421_CONFIG_RANGE)
-		temp = temp_from_u16(data->temp[index]);
-	else
-		temp = temp_from_s16(data->temp[index]);
-	mutex_unlock(&data->update_lock);
+	switch (attr) {
+	case hwmon_temp_input:
+		if (tmp421->config & TMP421_CONFIG_RANGE)
+			*val = temp_from_u16(tmp421->temp[channel]);
+		else
+			*val = temp_from_s16(tmp421->temp[channel]);
+		return 0;
+	case hwmon_temp_fault:
+		/*
+		 * The OPEN bit signals a fault. This is bit 0 of the temperature
+		 * register (low byte).
+		 */
+		*val = tmp421->temp[channel] & 0x01;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
 
-	return sprintf(buf, "%d\n", temp);
 }
 
-static ssize_t show_fault(struct device *dev,
-			  struct device_attribute *devattr, char *buf)
+static umode_t tmp421_is_visible(const void *data, enum hwmon_sensor_types type,
+				 u32 attr, int channel)
 {
-	int index = to_sensor_dev_attr(devattr)->index;
-	struct tmp421_data *data = tmp421_update_device(dev);
-
-	/*
-	 * The OPEN bit signals a fault. This is bit 0 of the temperature
-	 * register (low byte).
-	 */
-	if (data->temp[index] & 0x01)
-		return sprintf(buf, "1\n");
-	else
-		return sprintf(buf, "0\n");
+	switch (attr) {
+	case hwmon_temp_fault:
+		if (channel == 0)
+			return 0;
+		return S_IRUGO;
+	case hwmon_temp_input:
+		return S_IRUGO;
+	default:
+		return 0;
+	}
 }
 
-static umode_t tmp421_is_visible(struct kobject *kobj, struct attribute *a,
-				int n)
-{
-	struct device *dev = container_of(kobj, struct device, kobj);
-	struct tmp421_data *data = dev_get_drvdata(dev);
-	struct device_attribute *devattr;
-	unsigned int index;
-
-	devattr = container_of(a, struct device_attribute, attr);
-	index = to_sensor_dev_attr(devattr)->index;
-
-	if (index < data->channels)
-		return a->mode;
-
-	return 0;
-}
-
-static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp_value, NULL, 0);
-static SENSOR_DEVICE_ATTR(temp2_input, S_IRUGO, show_temp_value, NULL, 1);
-static SENSOR_DEVICE_ATTR(temp2_fault, S_IRUGO, show_fault, NULL, 1);
-static SENSOR_DEVICE_ATTR(temp3_input, S_IRUGO, show_temp_value, NULL, 2);
-static SENSOR_DEVICE_ATTR(temp3_fault, S_IRUGO, show_fault, NULL, 2);
-static SENSOR_DEVICE_ATTR(temp4_input, S_IRUGO, show_temp_value, NULL, 3);
-static SENSOR_DEVICE_ATTR(temp4_fault, S_IRUGO, show_fault, NULL, 3);
-
-static struct attribute *tmp421_attr[] = {
-	&sensor_dev_attr_temp1_input.dev_attr.attr,
-	&sensor_dev_attr_temp2_input.dev_attr.attr,
-	&sensor_dev_attr_temp2_fault.dev_attr.attr,
-	&sensor_dev_attr_temp3_input.dev_attr.attr,
-	&sensor_dev_attr_temp3_fault.dev_attr.attr,
-	&sensor_dev_attr_temp4_input.dev_attr.attr,
-	&sensor_dev_attr_temp4_fault.dev_attr.attr,
-	NULL
-};
-
-static const struct attribute_group tmp421_group = {
-	.attrs = tmp421_attr,
-	.is_visible = tmp421_is_visible,
-};
-
-static const struct attribute_group *tmp421_groups[] = {
-	&tmp421_group,
-	NULL
-};
-
 static int tmp421_init_client(struct i2c_client *client)
 {
 	int config, config_orig;
@@ -289,13 +254,18 @@
 	return 0;
 }
 
+static const struct hwmon_ops tmp421_ops = {
+	.is_visible = tmp421_is_visible,
+	.read = tmp421_read,
+};
+
 static int tmp421_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
 	struct device *hwmon_dev;
 	struct tmp421_data *data;
-	int err;
+	int i, err;
 
 	data = devm_kzalloc(dev, sizeof(struct tmp421_data), GFP_KERNEL);
 	if (!data)
@@ -309,8 +279,21 @@
 	if (err)
 		return err;
 
-	hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
-							   data, tmp421_groups);
+	for (i = 0; i < data->channels; i++)
+		data->temp_config[i] = HWMON_T_INPUT | HWMON_T_FAULT;
+
+	data->chip.ops = &tmp421_ops;
+	data->chip.info = data->info;
+
+	data->info[0] = &data->temp_info;
+
+	data->temp_info.type = hwmon_temp;
+	data->temp_info.config = data->temp_config;
+
+	hwmon_dev = devm_hwmon_device_register_with_info(dev, client->name,
+							 data,
+							 &data->chip,
+							 NULL);
 	return PTR_ERR_OR_ZERO(hwmon_dev);
 }
 
diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
new file mode 100644
index 0000000..9c0dbb8
--- /dev/null
+++ b/drivers/hwmon/xgene-hwmon.c
@@ -0,0 +1,787 @@
+/*
+ * APM X-Gene SoC Hardware Monitoring Driver
+ *
+ * Copyright (c) 2016, Applied Micro Circuits Corporation
+ * Author: Loc Ho <lho@apm.com>
+ *         Hoan Tran <hotran@apm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * This driver provides the following features:
+ *  - Retrieve CPU total power (uW)
+ *  - Retrieve IO total power (uW)
+ *  - Retrieve SoC temperature (milli-degree C) and alarm
+ */
+#include <linux/acpi.h>
+#include <linux/dma-mapping.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/kfifo.h>
+#include <linux/mailbox_controller.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include <acpi/pcc.h>
+
+/* SLIMpro message defines */
+#define MSG_TYPE_DBG			0
+#define MSG_TYPE_ERR			7
+#define MSG_TYPE_PWRMGMT		9
+
+#define MSG_TYPE(v)			(((v) & 0xF0000000) >> 28)
+#define MSG_TYPE_SET(v)			(((v) << 28) & 0xF0000000)
+#define MSG_SUBTYPE(v)			(((v) & 0x0F000000) >> 24)
+#define MSG_SUBTYPE_SET(v)		(((v) << 24) & 0x0F000000)
+
+#define DBG_SUBTYPE_SENSOR_READ		4
+#define SENSOR_RD_MSG			0x04FFE902
+#define SENSOR_RD_EN_ADDR(a)		((a) & 0x000FFFFF)
+#define PMD_PWR_REG			0x20
+#define PMD_PWR_MW_REG			0x26
+#define SOC_PWR_REG			0x21
+#define SOC_PWR_MW_REG			0x27
+#define SOC_TEMP_REG			0x10
+
+#define TEMP_NEGATIVE_BIT		8
+#define SENSOR_INVALID_DATA		BIT(15)
+
+#define PWRMGMT_SUBTYPE_TPC		1
+#define TPC_ALARM			2
+#define TPC_GET_ALARM			3
+#define TPC_CMD(v)			(((v) & 0x00FF0000) >> 16)
+#define TPC_CMD_SET(v)			(((v) << 16) & 0x00FF0000)
+#define TPC_EN_MSG(hndl, cmd, type) \
+	(MSG_TYPE_SET(MSG_TYPE_PWRMGMT) | \
+	MSG_SUBTYPE_SET(hndl) | TPC_CMD_SET(cmd) | type)
+
+/* PCC defines */
+#define PCC_SIGNATURE_MASK		0x50424300
+#define PCCC_GENERATE_DB_INT		BIT(15)
+#define PCCS_CMD_COMPLETE		BIT(0)
+#define PCCS_SCI_DOORBEL		BIT(1)
+#define PCCS_PLATFORM_NOTIFICATION	BIT(3)
+/*
+ * Arbitrary retries in case the remote processor is slow to respond
+ * to PCC commands
+ */
+#define PCC_NUM_RETRIES			500
+
+#define ASYNC_MSG_FIFO_SIZE		16
+#define MBOX_OP_TIMEOUTMS		1000
+
+#define WATT_TO_mWATT(x)		((x) * 1000)
+#define mWATT_TO_uWATT(x)		((x) * 1000)
+#define CELSIUS_TO_mCELSIUS(x)		((x) * 1000)
+
+#define to_xgene_hwmon_dev(cl)		\
+	container_of(cl, struct xgene_hwmon_dev, mbox_client)
+
+struct slimpro_resp_msg {
+	u32 msg;
+	u32 param1;
+	u32 param2;
+} __packed;
+
+struct xgene_hwmon_dev {
+	struct device		*dev;
+	struct mbox_chan	*mbox_chan;
+	struct mbox_client	mbox_client;
+	int			mbox_idx;
+
+	spinlock_t		kfifo_lock;
+	struct mutex		rd_mutex;
+	struct completion	rd_complete;
+	int			resp_pending;
+	struct slimpro_resp_msg sync_msg;
+
+	struct work_struct	workq;
+	struct kfifo_rec_ptr_1	async_msg_fifo;
+
+	struct device		*hwmon_dev;
+	bool			temp_critical_alarm;
+
+	phys_addr_t		comm_base_addr;
+	void			*pcc_comm_addr;
+	u64			usecs_lat;
+};
+
+/*
+ * This function tests and clears a bitmask then returns its old value
+ */
+static u16 xgene_word_tst_and_clr(u16 *addr, u16 mask)
+{
+	u16 ret, val;
+
+	val = le16_to_cpu(READ_ONCE(*addr));
+	ret = val & mask;
+	val &= ~mask;
+	WRITE_ONCE(*addr, cpu_to_le16(val));
+
+	return ret;
+}
+
+static int xgene_hwmon_pcc_rd(struct xgene_hwmon_dev *ctx, u32 *msg)
+{
+	struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
+	u32 *ptr = (void *)(generic_comm_base + 1);
+	int rc, i;
+	u16 val;
+
+	mutex_lock(&ctx->rd_mutex);
+	init_completion(&ctx->rd_complete);
+	ctx->resp_pending = true;
+
+	/* Write signature for subspace */
+	WRITE_ONCE(generic_comm_base->signature,
+		   cpu_to_le32(PCC_SIGNATURE_MASK | ctx->mbox_idx));
+
+	/* Write to the shared command region */
+	WRITE_ONCE(generic_comm_base->command,
+		   cpu_to_le16(MSG_TYPE(msg[0]) | PCCC_GENERATE_DB_INT));
+
+	/* Flip CMD COMPLETE bit */
+	val = le16_to_cpu(READ_ONCE(generic_comm_base->status));
+	val &= ~PCCS_CMD_COMPLETE;
+	WRITE_ONCE(generic_comm_base->status, cpu_to_le16(val));
+
+	/* Copy the message to the PCC comm space */
+	for (i = 0; i < sizeof(struct slimpro_resp_msg) / 4; i++)
+		WRITE_ONCE(ptr[i], cpu_to_le32(msg[i]));
+
+	/* Ring the doorbell */
+	rc = mbox_send_message(ctx->mbox_chan, msg);
+	if (rc < 0) {
+		dev_err(ctx->dev, "Mailbox send error %d\n", rc);
+		goto err;
+	}
+	if (!wait_for_completion_timeout(&ctx->rd_complete,
+					 usecs_to_jiffies(ctx->usecs_lat))) {
+		dev_err(ctx->dev, "Mailbox operation timed out\n");
+		rc = -ETIMEDOUT;
+		goto err;
+	}
+
+	/* Check for error message */
+	if (MSG_TYPE(ctx->sync_msg.msg) == MSG_TYPE_ERR) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	msg[0] = ctx->sync_msg.msg;
+	msg[1] = ctx->sync_msg.param1;
+	msg[2] = ctx->sync_msg.param2;
+
+err:
+	mbox_chan_txdone(ctx->mbox_chan, 0);
+	ctx->resp_pending = false;
+	mutex_unlock(&ctx->rd_mutex);
+	return rc;
+}
+
+static int xgene_hwmon_rd(struct xgene_hwmon_dev *ctx, u32 *msg)
+{
+	int rc;
+
+	mutex_lock(&ctx->rd_mutex);
+	init_completion(&ctx->rd_complete);
+	ctx->resp_pending = true;
+
+	rc = mbox_send_message(ctx->mbox_chan, msg);
+	if (rc < 0) {
+		dev_err(ctx->dev, "Mailbox send error %d\n", rc);
+		goto err;
+	}
+
+	if (!wait_for_completion_timeout(&ctx->rd_complete,
+					 msecs_to_jiffies(MBOX_OP_TIMEOUTMS))) {
+		dev_err(ctx->dev, "Mailbox operation timed out\n");
+		rc = -ETIMEDOUT;
+		goto err;
+	}
+
+	/* Check for error message */
+	if (MSG_TYPE(ctx->sync_msg.msg) == MSG_TYPE_ERR) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	msg[0] = ctx->sync_msg.msg;
+	msg[1] = ctx->sync_msg.param1;
+	msg[2] = ctx->sync_msg.param2;
+
+err:
+	ctx->resp_pending = false;
+	mutex_unlock(&ctx->rd_mutex);
+	return rc;
+}
+
+static int xgene_hwmon_reg_map_rd(struct xgene_hwmon_dev *ctx, u32 addr,
+				  u32 *data)
+{
+	u32 msg[3];
+	int rc;
+
+	msg[0] = SENSOR_RD_MSG;
+	msg[1] = SENSOR_RD_EN_ADDR(addr);
+	msg[2] = 0;
+
+	if (acpi_disabled)
+		rc = xgene_hwmon_rd(ctx, msg);
+	else
+		rc = xgene_hwmon_pcc_rd(ctx, msg);
+
+	if (rc < 0)
+		return rc;
+
+	/*
+	 * Check if sensor data is valid.
+	 */
+	if (msg[1] & SENSOR_INVALID_DATA)
+		return -ENODATA;
+
+	*data = msg[1];
+
+	return rc;
+}
+
+static int xgene_hwmon_get_notification_msg(struct xgene_hwmon_dev *ctx,
+					    u32 *amsg)
+{
+	u32 msg[3];
+	int rc;
+
+	msg[0] = TPC_EN_MSG(PWRMGMT_SUBTYPE_TPC, TPC_GET_ALARM, 0);
+	msg[1] = 0;
+	msg[2] = 0;
+
+	rc = xgene_hwmon_pcc_rd(ctx, msg);
+	if (rc < 0)
+		return rc;
+
+	amsg[0] = msg[0];
+	amsg[1] = msg[1];
+	amsg[2] = msg[2];
+
+	return rc;
+}
+
+static int xgene_hwmon_get_cpu_pwr(struct xgene_hwmon_dev *ctx, u32 *val)
+{
+	u32 watt, mwatt;
+	int rc;
+
+	rc = xgene_hwmon_reg_map_rd(ctx, PMD_PWR_REG, &watt);
+	if (rc < 0)
+		return rc;
+
+	rc = xgene_hwmon_reg_map_rd(ctx, PMD_PWR_MW_REG, &mwatt);
+	if (rc < 0)
+		return rc;
+
+	*val = WATT_TO_mWATT(watt) + mwatt;
+	return 0;
+}
+
+static int xgene_hwmon_get_io_pwr(struct xgene_hwmon_dev *ctx, u32 *val)
+{
+	u32 watt, mwatt;
+	int rc;
+
+	rc = xgene_hwmon_reg_map_rd(ctx, SOC_PWR_REG, &watt);
+	if (rc < 0)
+		return rc;
+
+	rc = xgene_hwmon_reg_map_rd(ctx, SOC_PWR_MW_REG, &mwatt);
+	if (rc < 0)
+		return rc;
+
+	*val = WATT_TO_mWATT(watt) + mwatt;
+	return 0;
+}
+
+static int xgene_hwmon_get_temp(struct xgene_hwmon_dev *ctx, u32 *val)
+{
+	return xgene_hwmon_reg_map_rd(ctx, SOC_TEMP_REG, val);
+}
+
+/*
+ * Sensor temperature/power functions
+ */
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct xgene_hwmon_dev *ctx = dev_get_drvdata(dev);
+	int rc, temp;
+	u32 val;
+
+	rc = xgene_hwmon_get_temp(ctx, &val);
+	if (rc < 0)
+		return rc;
+
+	temp = sign_extend32(val, TEMP_NEGATIVE_BIT);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", CELSIUS_TO_mCELSIUS(temp));
+}
+
+static ssize_t temp1_label_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "SoC Temperature\n");
+}
+
+static ssize_t temp1_critical_alarm_show(struct device *dev,
+					 struct device_attribute *devattr,
+					 char *buf)
+{
+	struct xgene_hwmon_dev *ctx = dev_get_drvdata(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ctx->temp_critical_alarm);
+}
+
+static ssize_t power1_label_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "CPU power\n");
+}
+
+static ssize_t power2_label_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "IO power\n");
+}
+
+static ssize_t power1_input_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct xgene_hwmon_dev *ctx = dev_get_drvdata(dev);
+	u32 val;
+	int rc;
+
+	rc = xgene_hwmon_get_cpu_pwr(ctx, &val);
+	if (rc < 0)
+		return rc;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", mWATT_TO_uWATT(val));
+}
+
+static ssize_t power2_input_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct xgene_hwmon_dev *ctx = dev_get_drvdata(dev);
+	u32 val;
+	int rc;
+
+	rc = xgene_hwmon_get_io_pwr(ctx, &val);
+	if (rc < 0)
+		return rc;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", mWATT_TO_uWATT(val));
+}
+
+static DEVICE_ATTR_RO(temp1_label);
+static DEVICE_ATTR_RO(temp1_input);
+static DEVICE_ATTR_RO(temp1_critical_alarm);
+static DEVICE_ATTR_RO(power1_label);
+static DEVICE_ATTR_RO(power1_input);
+static DEVICE_ATTR_RO(power2_label);
+static DEVICE_ATTR_RO(power2_input);
+
+static struct attribute *xgene_hwmon_attrs[] = {
+	&dev_attr_temp1_label.attr,
+	&dev_attr_temp1_input.attr,
+	&dev_attr_temp1_critical_alarm.attr,
+	&dev_attr_power1_label.attr,
+	&dev_attr_power1_input.attr,
+	&dev_attr_power2_label.attr,
+	&dev_attr_power2_input.attr,
+	NULL,
+};
+
+ATTRIBUTE_GROUPS(xgene_hwmon);
+
+static int xgene_hwmon_tpc_alarm(struct xgene_hwmon_dev *ctx,
+				 struct slimpro_resp_msg *amsg)
+{
+	ctx->temp_critical_alarm = !!amsg->param2;
+	sysfs_notify(&ctx->dev->kobj, NULL, "temp1_critical_alarm");
+
+	return 0;
+}
+
+static void xgene_hwmon_process_pwrmsg(struct xgene_hwmon_dev *ctx,
+				       struct slimpro_resp_msg *amsg)
+{
+	if ((MSG_SUBTYPE(amsg->msg) == PWRMGMT_SUBTYPE_TPC) &&
+	    (TPC_CMD(amsg->msg) == TPC_ALARM))
+		xgene_hwmon_tpc_alarm(ctx, amsg);
+}
+
+/*
+ * This function is called to process async work queue
+ */
+static void xgene_hwmon_evt_work(struct work_struct *work)
+{
+	struct slimpro_resp_msg amsg;
+	struct xgene_hwmon_dev *ctx;
+	int ret;
+
+	ctx = container_of(work, struct xgene_hwmon_dev, workq);
+	while (kfifo_out_spinlocked(&ctx->async_msg_fifo, &amsg,
+				    sizeof(struct slimpro_resp_msg),
+				    &ctx->kfifo_lock)) {
+		/*
+		 * If PCC, send a consumer command to Platform to get info
+		 * If Slimpro Mailbox, get message from specific FIFO
+		 */
+		if (!acpi_disabled) {
+			ret = xgene_hwmon_get_notification_msg(ctx,
+							       (u32 *)&amsg);
+			if (ret < 0)
+				continue;
+		}
+
+		if (MSG_TYPE(amsg.msg) == MSG_TYPE_PWRMGMT)
+			xgene_hwmon_process_pwrmsg(ctx, &amsg);
+	}
+}
+
+static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
+{
+	if (IS_ERR_OR_NULL(ctx->hwmon_dev) && !ctx->resp_pending) {
+		/* Enqueue to the FIFO */
+		kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
+				    sizeof(struct slimpro_resp_msg),
+				    &ctx->kfifo_lock);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * This function is called when the SLIMpro Mailbox received a message
+ */
+static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
+{
+	struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
+
+	/*
+	 * While the driver registers with the mailbox framework, an interrupt
+	 * can be pending before the probe function completes its
+	 * initialization. If such condition occurs, just queue up the message
+	 * as the driver is not ready for servicing the callback.
+	 */
+	if (xgene_hwmon_rx_ready(ctx, msg) < 0)
+		return;
+
+	/*
+	 * Response message format:
+	 * msg[0] is the return code of the operation
+	 * msg[1] is the first parameter word
+	 * msg[2] is the second parameter word
+	 *
+	 * As message only supports dword size, just assign it.
+	 */
+
+	/* Check for sync query */
+	if (ctx->resp_pending &&
+	    ((MSG_TYPE(((u32 *)msg)[0]) == MSG_TYPE_ERR) ||
+	     (MSG_TYPE(((u32 *)msg)[0]) == MSG_TYPE_DBG &&
+	      MSG_SUBTYPE(((u32 *)msg)[0]) == DBG_SUBTYPE_SENSOR_READ) ||
+	     (MSG_TYPE(((u32 *)msg)[0]) == MSG_TYPE_PWRMGMT &&
+	      MSG_SUBTYPE(((u32 *)msg)[0]) == PWRMGMT_SUBTYPE_TPC &&
+	      TPC_CMD(((u32 *)msg)[0]) == TPC_ALARM))) {
+		ctx->sync_msg.msg = ((u32 *)msg)[0];
+		ctx->sync_msg.param1 = ((u32 *)msg)[1];
+		ctx->sync_msg.param2 = ((u32 *)msg)[2];
+
+		/* Operation waiting for response */
+		complete(&ctx->rd_complete);
+
+		return;
+	}
+
+	/* Enqueue to the FIFO */
+	kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
+			    sizeof(struct slimpro_resp_msg), &ctx->kfifo_lock);
+	/* Schedule the bottom handler */
+	schedule_work(&ctx->workq);
+}
+
+/*
+ * This function is called when the PCC Mailbox received a message
+ */
+static void xgene_hwmon_pcc_rx_cb(struct mbox_client *cl, void *msg)
+{
+	struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
+	struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
+	struct slimpro_resp_msg amsg;
+
+	/*
+	 * While the driver registers with the mailbox framework, an interrupt
+	 * can be pending before the probe function completes its
+	 * initialization. If such condition occurs, just queue up the message
+	 * as the driver is not ready for servicing the callback.
+	 */
+	if (xgene_hwmon_rx_ready(ctx, &amsg) < 0)
+		return;
+
+	msg = generic_comm_base + 1;
+	/* Check if platform sends interrupt */
+	if (!xgene_word_tst_and_clr(&generic_comm_base->status,
+				    PCCS_SCI_DOORBEL))
+		return;
+
+	/*
+	 * Response message format:
+	 * msg[0] is the return code of the operation
+	 * msg[1] is the first parameter word
+	 * msg[2] is the second parameter word
+	 *
+	 * As message only supports dword size, just assign it.
+	 */
+
+	/* Check for sync query */
+	if (ctx->resp_pending &&
+	    ((MSG_TYPE(((u32 *)msg)[0]) == MSG_TYPE_ERR) ||
+	     (MSG_TYPE(((u32 *)msg)[0]) == MSG_TYPE_DBG &&
+	      MSG_SUBTYPE(((u32 *)msg)[0]) == DBG_SUBTYPE_SENSOR_READ) ||
+	     (MSG_TYPE(((u32 *)msg)[0]) == MSG_TYPE_PWRMGMT &&
+	      MSG_SUBTYPE(((u32 *)msg)[0]) == PWRMGMT_SUBTYPE_TPC &&
+	      TPC_CMD(((u32 *)msg)[0]) == TPC_ALARM))) {
+		/* Check if platform completes command */
+		if (xgene_word_tst_and_clr(&generic_comm_base->status,
+					   PCCS_CMD_COMPLETE)) {
+			ctx->sync_msg.msg = ((u32 *)msg)[0];
+			ctx->sync_msg.param1 = ((u32 *)msg)[1];
+			ctx->sync_msg.param2 = ((u32 *)msg)[2];
+
+			/* Operation waiting for response */
+			complete(&ctx->rd_complete);
+
+			return;
+		}
+	}
+
+	/*
+	 * Platform notifies interrupt to OSPM.
+	 * OPSM schedules a consumer command to get this information
+	 * in a workqueue. Platform must wait until OSPM has issued
+	 * a consumer command that serves this notification.
+	 */
+
+	/* Enqueue to the FIFO */
+	kfifo_in_spinlocked(&ctx->async_msg_fifo, &amsg,
+			    sizeof(struct slimpro_resp_msg), &ctx->kfifo_lock);
+	/* Schedule the bottom handler */
+	schedule_work(&ctx->workq);
+}
+
+static void xgene_hwmon_tx_done(struct mbox_client *cl, void *msg, int ret)
+{
+	if (ret) {
+		dev_dbg(cl->dev, "TX did not complete: CMD sent:%x, ret:%d\n",
+			*(u16 *)msg, ret);
+	} else {
+		dev_dbg(cl->dev, "TX completed. CMD sent:%x, ret:%d\n",
+			*(u16 *)msg, ret);
+	}
+}
+
+static int xgene_hwmon_probe(struct platform_device *pdev)
+{
+	struct xgene_hwmon_dev *ctx;
+	struct mbox_client *cl;
+	int rc;
+
+	ctx = devm_kzalloc(&pdev->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->dev = &pdev->dev;
+	platform_set_drvdata(pdev, ctx);
+	cl = &ctx->mbox_client;
+
+	spin_lock_init(&ctx->kfifo_lock);
+	mutex_init(&ctx->rd_mutex);
+
+	rc = kfifo_alloc(&ctx->async_msg_fifo,
+			 sizeof(struct slimpro_resp_msg) * ASYNC_MSG_FIFO_SIZE,
+			 GFP_KERNEL);
+	if (rc)
+		goto out_mbox_free;
+
+	INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
+
+	/* Request mailbox channel */
+	cl->dev = &pdev->dev;
+	cl->tx_done = xgene_hwmon_tx_done;
+	cl->tx_block = false;
+	cl->tx_tout = MBOX_OP_TIMEOUTMS;
+	cl->knows_txdone = false;
+	if (acpi_disabled) {
+		cl->rx_callback = xgene_hwmon_rx_cb;
+		ctx->mbox_chan = mbox_request_channel(cl, 0);
+		if (IS_ERR(ctx->mbox_chan)) {
+			dev_err(&pdev->dev,
+				"SLIMpro mailbox channel request failed\n");
+			return -ENODEV;
+		}
+	} else {
+		struct acpi_pcct_hw_reduced *cppc_ss;
+
+		if (device_property_read_u32(&pdev->dev, "pcc-channel",
+					     &ctx->mbox_idx)) {
+			dev_err(&pdev->dev, "no pcc-channel property\n");
+			return -ENODEV;
+		}
+
+		cl->rx_callback = xgene_hwmon_pcc_rx_cb;
+		ctx->mbox_chan = pcc_mbox_request_channel(cl, ctx->mbox_idx);
+		if (IS_ERR(ctx->mbox_chan)) {
+			dev_err(&pdev->dev,
+				"PPC channel request failed\n");
+			return -ENODEV;
+		}
+
+		/*
+		 * The PCC mailbox controller driver should
+		 * have parsed the PCCT (global table of all
+		 * PCC channels) and stored pointers to the
+		 * subspace communication region in con_priv.
+		 */
+		cppc_ss = ctx->mbox_chan->con_priv;
+		if (!cppc_ss) {
+			dev_err(&pdev->dev, "PPC subspace not found\n");
+			rc = -ENODEV;
+			goto out_mbox_free;
+		}
+
+		if (!ctx->mbox_chan->mbox->txdone_irq) {
+			dev_err(&pdev->dev, "PCC IRQ not supported\n");
+			rc = -ENODEV;
+			goto out_mbox_free;
+		}
+
+		/*
+		 * This is the shared communication region
+		 * for the OS and Platform to communicate over.
+		 */
+		ctx->comm_base_addr = cppc_ss->base_address;
+		if (ctx->comm_base_addr) {
+			ctx->pcc_comm_addr = memremap(ctx->comm_base_addr,
+							cppc_ss->length,
+							MEMREMAP_WB);
+		} else {
+			dev_err(&pdev->dev, "Failed to get PCC comm region\n");
+			rc = -ENODEV;
+			goto out_mbox_free;
+		}
+
+		if (!ctx->pcc_comm_addr) {
+			dev_err(&pdev->dev,
+				"Failed to ioremap PCC comm region\n");
+			rc = -ENOMEM;
+			goto out_mbox_free;
+		}
+
+		/*
+		 * cppc_ss->latency is just a Nominal value. In reality
+		 * the remote processor could be much slower to reply.
+		 * So add an arbitrary amount of wait on top of Nominal.
+		 */
+		ctx->usecs_lat = PCC_NUM_RETRIES * cppc_ss->latency;
+	}
+
+	ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
+							   "apm_xgene",
+							   ctx,
+							   xgene_hwmon_groups);
+	if (IS_ERR(ctx->hwmon_dev)) {
+		dev_err(&pdev->dev, "Failed to register HW monitor device\n");
+		rc = PTR_ERR(ctx->hwmon_dev);
+		goto out;
+	}
+
+	/*
+	 * Schedule the bottom handler if there is a pending message.
+	 */
+	schedule_work(&ctx->workq);
+
+	dev_info(&pdev->dev, "APM X-Gene SoC HW monitor driver registered\n");
+
+	return 0;
+
+out:
+	if (acpi_disabled)
+		mbox_free_channel(ctx->mbox_chan);
+	else
+		pcc_mbox_free_channel(ctx->mbox_chan);
+out_mbox_free:
+	kfifo_free(&ctx->async_msg_fifo);
+
+	return rc;
+}
+
+static int xgene_hwmon_remove(struct platform_device *pdev)
+{
+	struct xgene_hwmon_dev *ctx = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(ctx->hwmon_dev);
+	kfifo_free(&ctx->async_msg_fifo);
+	if (acpi_disabled)
+		mbox_free_channel(ctx->mbox_chan);
+	else
+		pcc_mbox_free_channel(ctx->mbox_chan);
+
+	return 0;
+}
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_hwmon_acpi_match[] = {
+	{"APMC0D29", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, xgene_hwmon_acpi_match);
+#endif
+
+static const struct of_device_id xgene_hwmon_of_match[] = {
+	{.compatible = "apm,xgene-slimpro-hwmon"},
+	{}
+};
+MODULE_DEVICE_TABLE(of, xgene_hwmon_of_match);
+
+static struct platform_driver xgene_hwmon_driver __refdata = {
+	.probe = xgene_hwmon_probe,
+	.remove = xgene_hwmon_remove,
+	.driver = {
+		.name = "xgene-slimpro-hwmon",
+		.of_match_table = xgene_hwmon_of_match,
+		.acpi_match_table = ACPI_PTR(xgene_hwmon_acpi_match),
+	},
+};
+module_platform_driver(xgene_hwmon_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC hardware monitor");
+MODULE_LICENSE("GPL");
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 4d20b0b..d7325c65 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -184,8 +184,7 @@
 
 	if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) {
 		dev_err(drvdata->dev,
-			"timeout observed when probing at offset %#x\n",
-			ETB_FFCR);
+		"timeout while waiting for completion of Manual Flush\n");
 	}
 
 	/* disable trace capture */
@@ -193,8 +192,7 @@
 
 	if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) {
 		dev_err(drvdata->dev,
-			"timeout observed when probing at offset %#x\n",
-			ETB_FFCR);
+			"timeout while waiting for Formatter to Stop\n");
 	}
 
 	CS_LOCK(drvdata->base);
@@ -561,7 +559,7 @@
 };
 
 #define coresight_etb10_simple_func(name, offset)                       \
-	coresight_simple_func(struct etb_drvdata, name, offset)
+	coresight_simple_func(struct etb_drvdata, NULL, name, offset)
 
 coresight_etb10_simple_func(rdp, ETB_RAM_DEPTH_REG);
 coresight_etb10_simple_func(sts, ETB_STATUS_REG);
@@ -638,7 +636,7 @@
 	struct coresight_platform_data *pdata = NULL;
 	struct etb_drvdata *drvdata;
 	struct resource *res = &adev->res;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = adev->dev.of_node;
 
 	if (np) {
@@ -684,17 +682,13 @@
 		return -ENOMEM;
 	}
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc)
-		return -ENOMEM;
-
-	desc->type = CORESIGHT_DEV_TYPE_SINK;
-	desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
-	desc->ops = &etb_cs_ops;
-	desc->pdata = pdata;
-	desc->dev = dev;
-	desc->groups = coresight_etb_groups;
-	drvdata->csdev = coresight_register(desc);
+	desc.type = CORESIGHT_DEV_TYPE_SINK;
+	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+	desc.ops = &etb_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etb_groups;
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev))
 		return PTR_ERR(drvdata->csdev);
 
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 755125f..2cd7c71 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -27,6 +27,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "coresight-etm-perf.h"
 #include "coresight-priv.h"
 
 static struct pmu etm_pmu;
@@ -71,14 +72,48 @@
 
 static void etm_event_read(struct perf_event *event) {}
 
-static int etm_event_init(struct perf_event *event)
+static int etm_addr_filters_alloc(struct perf_event *event)
 {
-	if (event->attr.type != etm_pmu.type)
-		return -ENOENT;
+	struct etm_filters *filters;
+	int node = event->cpu == -1 ? -1 : cpu_to_node(event->cpu);
+
+	filters = kzalloc_node(sizeof(struct etm_filters), GFP_KERNEL, node);
+	if (!filters)
+		return -ENOMEM;
+
+	if (event->parent)
+		memcpy(filters, event->parent->hw.addr_filters,
+		       sizeof(*filters));
+
+	event->hw.addr_filters = filters;
 
 	return 0;
 }
 
+static void etm_event_destroy(struct perf_event *event)
+{
+	kfree(event->hw.addr_filters);
+	event->hw.addr_filters = NULL;
+}
+
+static int etm_event_init(struct perf_event *event)
+{
+	int ret = 0;
+
+	if (event->attr.type != etm_pmu.type) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	ret = etm_addr_filters_alloc(event);
+	if (ret)
+		goto out;
+
+	event->destroy = etm_event_destroy;
+out:
+	return ret;
+}
+
 static void free_event_data(struct work_struct *work)
 {
 	int cpu;
@@ -100,7 +135,7 @@
 	}
 
 	for_each_cpu(cpu, mask) {
-		if (event_data->path[cpu])
+		if (!(IS_ERR_OR_NULL(event_data->path[cpu])))
 			coresight_release_path(event_data->path[cpu]);
 	}
 
@@ -185,7 +220,7 @@
 		 * referenced later when the path is actually needed.
 		 */
 		event_data->path[cpu] = coresight_build_path(csdev);
-		if (!event_data->path[cpu])
+		if (IS_ERR(event_data->path[cpu]))
 			goto err;
 	}
 
@@ -258,7 +293,7 @@
 	event->hw.state = 0;
 
 	/* Finally enable the tracer */
-	if (source_ops(csdev)->enable(csdev, &event->attr, CS_MODE_PERF))
+	if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
 		goto fail_end_stop;
 
 out:
@@ -291,7 +326,7 @@
 		return;
 
 	/* stop tracer */
-	source_ops(csdev)->disable(csdev);
+	source_ops(csdev)->disable(csdev, event);
 
 	/* tell the core */
 	event->hw.state = PERF_HES_STOPPED;
@@ -342,6 +377,87 @@
 	etm_event_stop(event, PERF_EF_UPDATE);
 }
 
+static int etm_addr_filters_validate(struct list_head *filters)
+{
+	bool range = false, address = false;
+	int index = 0;
+	struct perf_addr_filter *filter;
+
+	list_for_each_entry(filter, filters, entry) {
+		/*
+		 * No need to go further if there's no more
+		 * room for filters.
+		 */
+		if (++index > ETM_ADDR_CMP_MAX)
+			return -EOPNOTSUPP;
+
+		/*
+		 * As taken from the struct perf_addr_filter documentation:
+		 *	@range:	1: range, 0: address
+		 *
+		 * At this time we don't allow range and start/stop filtering
+		 * to cohabitate, they have to be mutually exclusive.
+		 */
+		if ((filter->range == 1) && address)
+			return -EOPNOTSUPP;
+
+		if ((filter->range == 0) && range)
+			return -EOPNOTSUPP;
+
+		/*
+		 * For range filtering, the second address in the address
+		 * range comparator needs to be higher than the first.
+		 * Invalid otherwise.
+		 */
+		if (filter->range && filter->size == 0)
+			return -EINVAL;
+
+		/*
+		 * Everything checks out with this filter, record what we've
+		 * received before moving on to the next one.
+		 */
+		if (filter->range)
+			range = true;
+		else
+			address = true;
+	}
+
+	return 0;
+}
+
+static void etm_addr_filters_sync(struct perf_event *event)
+{
+	struct perf_addr_filters_head *head = perf_event_addr_filters(event);
+	unsigned long start, stop, *offs = event->addr_filters_offs;
+	struct etm_filters *filters = event->hw.addr_filters;
+	struct etm_filter *etm_filter;
+	struct perf_addr_filter *filter;
+	int i = 0;
+
+	list_for_each_entry(filter, &head->list, entry) {
+		start = filter->offset + offs[i];
+		stop = start + filter->size;
+		etm_filter = &filters->etm_filter[i];
+
+		if (filter->range == 1) {
+			etm_filter->start_addr = start;
+			etm_filter->stop_addr = stop;
+			etm_filter->type = ETM_ADDR_TYPE_RANGE;
+		} else {
+			if (filter->filter == 1) {
+				etm_filter->start_addr = start;
+				etm_filter->type = ETM_ADDR_TYPE_START;
+			} else {
+				etm_filter->stop_addr = stop;
+				etm_filter->type = ETM_ADDR_TYPE_STOP;
+			}
+		}
+		i++;
+	}
+
+	filters->nr_filters = i;
+}
+
 int etm_perf_symlink(struct coresight_device *csdev, bool link)
 {
 	char entry[sizeof("cpu9999999")];
@@ -371,18 +487,21 @@
 {
 	int ret;
 
-	etm_pmu.capabilities	= PERF_PMU_CAP_EXCLUSIVE;
+	etm_pmu.capabilities		= PERF_PMU_CAP_EXCLUSIVE;
 
-	etm_pmu.attr_groups	= etm_pmu_attr_groups;
-	etm_pmu.task_ctx_nr	= perf_sw_context;
-	etm_pmu.read		= etm_event_read;
-	etm_pmu.event_init	= etm_event_init;
-	etm_pmu.setup_aux	= etm_setup_aux;
-	etm_pmu.free_aux	= etm_free_aux;
-	etm_pmu.start		= etm_event_start;
-	etm_pmu.stop		= etm_event_stop;
-	etm_pmu.add		= etm_event_add;
-	etm_pmu.del		= etm_event_del;
+	etm_pmu.attr_groups		= etm_pmu_attr_groups;
+	etm_pmu.task_ctx_nr		= perf_sw_context;
+	etm_pmu.read			= etm_event_read;
+	etm_pmu.event_init		= etm_event_init;
+	etm_pmu.setup_aux		= etm_setup_aux;
+	etm_pmu.free_aux		= etm_free_aux;
+	etm_pmu.start			= etm_event_start;
+	etm_pmu.stop			= etm_event_stop;
+	etm_pmu.add			= etm_event_add;
+	etm_pmu.del			= etm_event_del;
+	etm_pmu.addr_filters_sync	= etm_addr_filters_sync;
+	etm_pmu.addr_filters_validate	= etm_addr_filters_validate;
+	etm_pmu.nr_addr_filters		= ETM_ADDR_CMP_MAX;
 
 	ret = perf_pmu_register(&etm_pmu, CORESIGHT_ETM_PMU_NAME, -1);
 	if (ret == 0)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index 87f5a13..3ffc9fe 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -18,8 +18,42 @@
 #ifndef _CORESIGHT_ETM_PERF_H
 #define _CORESIGHT_ETM_PERF_H
 
+#include "coresight-priv.h"
+
 struct coresight_device;
 
+/*
+ * In both ETMv3 and v4 the maximum number of address comparator implentable
+ * is 8.  The actual number is implementation specific and will be checked
+ * when filters are applied.
+ */
+#define ETM_ADDR_CMP_MAX	8
+
+/**
+ * struct etm_filter - single instruction range or start/stop configuration.
+ * @start_addr:	The address to start tracing on.
+ * @stop_addr:	The address to stop tracing on.
+ * @type:	Is this a range or start/stop filter.
+ */
+struct etm_filter {
+	unsigned long start_addr;
+	unsigned long stop_addr;
+	enum etm_addr_type type;
+};
+
+/**
+ * struct etm_filters - set of filters for a session
+ * @etm_filter:	All the filters for this session.
+ * @nr_filters:	Number of filters
+ * @ssstatus:	Status of the start/stop logic.
+ */
+struct etm_filters {
+	struct etm_filter	etm_filter[ETM_ADDR_CMP_MAX];
+	unsigned int		nr_filters;
+	bool			ssstatus;
+};
+
+
 #ifdef CONFIG_CORESIGHT
 int etm_perf_symlink(struct coresight_device *csdev, bool link);
 
diff --git a/drivers/hwtracing/coresight/coresight-etm.h b/drivers/hwtracing/coresight/coresight-etm.h
index 51597cb..4a18ee4 100644
--- a/drivers/hwtracing/coresight/coresight-etm.h
+++ b/drivers/hwtracing/coresight/coresight-etm.h
@@ -259,14 +259,6 @@
 	struct etm_config		config;
 };
 
-enum etm_addr_type {
-	ETM_ADDR_TYPE_NONE,
-	ETM_ADDR_TYPE_SINGLE,
-	ETM_ADDR_TYPE_RANGE,
-	ETM_ADDR_TYPE_START,
-	ETM_ADDR_TYPE_STOP,
-};
-
 static inline void etm_writel(struct etm_drvdata *drvdata,
 			      u32 val, u32 off)
 {
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
index 02d4b62..e9b0719 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-sysfs.c
@@ -18,6 +18,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/sysfs.h>
 #include "coresight-etm.h"
+#include "coresight-priv.h"
 
 static ssize_t nr_addr_cmp_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
@@ -1222,7 +1223,7 @@
 };
 
 #define coresight_etm3x_simple_func(name, offset)			\
-	coresight_simple_func(struct etm_drvdata, name, offset)
+	coresight_simple_func(struct etm_drvdata, NULL, name, offset)
 
 coresight_etm3x_simple_func(etmccr, ETMCCR);
 coresight_etm3x_simple_func(etmccer, ETMCCER);
diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c
index 2de4cad..3fe368b 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x.c
@@ -311,9 +311,10 @@
 #define ETM3X_SUPPORTED_OPTIONS (ETMCR_CYC_ACC | ETMCR_TIMESTAMP_EN)
 
 static int etm_parse_event_config(struct etm_drvdata *drvdata,
-				  struct perf_event_attr *attr)
+				  struct perf_event *event)
 {
 	struct etm_config *config = &drvdata->config;
+	struct perf_event_attr *attr = &event->attr;
 
 	if (!attr)
 		return -EINVAL;
@@ -459,7 +460,7 @@
 }
 
 static int etm_enable_perf(struct coresight_device *csdev,
-			   struct perf_event_attr *attr)
+			   struct perf_event *event)
 {
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
@@ -467,7 +468,7 @@
 		return -EINVAL;
 
 	/* Configure the tracer based on the session's specifics */
-	etm_parse_event_config(drvdata, attr);
+	etm_parse_event_config(drvdata, event);
 	/* And enable it */
 	etm_enable_hw(drvdata);
 
@@ -504,7 +505,7 @@
 }
 
 static int etm_enable(struct coresight_device *csdev,
-		      struct perf_event_attr *attr, u32 mode)
+		      struct perf_event *event, u32 mode)
 {
 	int ret;
 	u32 val;
@@ -521,7 +522,7 @@
 		ret = etm_enable_sysfs(csdev);
 		break;
 	case CS_MODE_PERF:
-		ret = etm_enable_perf(csdev, attr);
+		ret = etm_enable_perf(csdev, event);
 		break;
 	default:
 		ret = -EINVAL;
@@ -601,7 +602,8 @@
 	dev_info(drvdata->dev, "ETM tracing disabled\n");
 }
 
-static void etm_disable(struct coresight_device *csdev)
+static void etm_disable(struct coresight_device *csdev,
+			struct perf_event *event)
 {
 	u32 mode;
 	struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -756,13 +758,9 @@
 	struct coresight_platform_data *pdata = NULL;
 	struct etm_drvdata *drvdata;
 	struct resource *res = &adev->res;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = adev->dev.of_node;
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc)
-		return -ENOMEM;
-
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
@@ -825,13 +823,13 @@
 	etm_init_trace_id(drvdata);
 	etm_set_default(&drvdata->config);
 
-	desc->type = CORESIGHT_DEV_TYPE_SOURCE;
-	desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
-	desc->ops = &etm_cs_ops;
-	desc->pdata = pdata;
-	desc->dev = dev;
-	desc->groups = coresight_etm_groups;
-	drvdata->csdev = coresight_register(desc);
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+	desc.ops = &etm_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etm_groups;
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev)) {
 		ret = PTR_ERR(drvdata->csdev);
 		goto err_arch_supported;
@@ -893,6 +891,11 @@
 		.mask	= 0x0003ffff,
 		.data	= "ETM 3.3",
 	},
+	{	/* ETM 3.5 - Cortex-A5 */
+		.id	= 0x0003b955,
+		.mask	= 0x0003ffff,
+		.data	= "ETM 3.5",
+	},
 	{	/* ETM 3.5 */
 		.id	= 0x0003b956,
 		.mask	= 0x0003ffff,
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
index 7c84308..b9b1e9c 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
@@ -18,6 +18,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/sysfs.h>
 #include "coresight-etm4x.h"
+#include "coresight-priv.h"
 
 static int etm4_set_mode_exclude(struct etmv4_drvdata *drvdata, bool exclude)
 {
@@ -2039,15 +2040,42 @@
 	NULL,
 };
 
-#define coresight_etm4x_simple_func(name, offset)			\
-	coresight_simple_func(struct etmv4_drvdata, name, offset)
+struct etmv4_reg {
+	void __iomem *addr;
+	u32 data;
+};
 
-coresight_etm4x_simple_func(trcoslsr, TRCOSLSR);
+static void do_smp_cross_read(void *data)
+{
+	struct etmv4_reg *reg = data;
+
+	reg->data = readl_relaxed(reg->addr);
+}
+
+static u32 etmv4_cross_read(const struct device *dev, u32 offset)
+{
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
+	struct etmv4_reg reg;
+
+	reg.addr = drvdata->base + offset;
+	/*
+	 * smp cross call ensures the CPU will be powered up before
+	 * accessing the ETMv4 trace core registers
+	 */
+	smp_call_function_single(drvdata->cpu, do_smp_cross_read, &reg, 1);
+	return reg.data;
+}
+
+#define coresight_etm4x_simple_func(name, offset)			\
+	coresight_simple_func(struct etmv4_drvdata, NULL, name, offset)
+
+#define coresight_etm4x_cross_read(name, offset)			\
+	coresight_simple_func(struct etmv4_drvdata, etmv4_cross_read,	\
+			      name, offset)
+
 coresight_etm4x_simple_func(trcpdcr, TRCPDCR);
 coresight_etm4x_simple_func(trcpdsr, TRCPDSR);
 coresight_etm4x_simple_func(trclsr, TRCLSR);
-coresight_etm4x_simple_func(trcconfig, TRCCONFIGR);
-coresight_etm4x_simple_func(trctraceid, TRCTRACEIDR);
 coresight_etm4x_simple_func(trcauthstatus, TRCAUTHSTATUS);
 coresight_etm4x_simple_func(trcdevid, TRCDEVID);
 coresight_etm4x_simple_func(trcdevtype, TRCDEVTYPE);
@@ -2055,6 +2083,9 @@
 coresight_etm4x_simple_func(trcpidr1, TRCPIDR1);
 coresight_etm4x_simple_func(trcpidr2, TRCPIDR2);
 coresight_etm4x_simple_func(trcpidr3, TRCPIDR3);
+coresight_etm4x_cross_read(trcoslsr, TRCOSLSR);
+coresight_etm4x_cross_read(trcconfig, TRCCONFIGR);
+coresight_etm4x_cross_read(trctraceid, TRCTRACEIDR);
 
 static struct attribute *coresight_etmv4_mgmt_attrs[] = {
 	&dev_attr_trcoslsr.attr,
@@ -2073,19 +2104,19 @@
 	NULL,
 };
 
-coresight_etm4x_simple_func(trcidr0, TRCIDR0);
-coresight_etm4x_simple_func(trcidr1, TRCIDR1);
-coresight_etm4x_simple_func(trcidr2, TRCIDR2);
-coresight_etm4x_simple_func(trcidr3, TRCIDR3);
-coresight_etm4x_simple_func(trcidr4, TRCIDR4);
-coresight_etm4x_simple_func(trcidr5, TRCIDR5);
+coresight_etm4x_cross_read(trcidr0, TRCIDR0);
+coresight_etm4x_cross_read(trcidr1, TRCIDR1);
+coresight_etm4x_cross_read(trcidr2, TRCIDR2);
+coresight_etm4x_cross_read(trcidr3, TRCIDR3);
+coresight_etm4x_cross_read(trcidr4, TRCIDR4);
+coresight_etm4x_cross_read(trcidr5, TRCIDR5);
 /* trcidr[6,7] are reserved */
-coresight_etm4x_simple_func(trcidr8, TRCIDR8);
-coresight_etm4x_simple_func(trcidr9, TRCIDR9);
-coresight_etm4x_simple_func(trcidr10, TRCIDR10);
-coresight_etm4x_simple_func(trcidr11, TRCIDR11);
-coresight_etm4x_simple_func(trcidr12, TRCIDR12);
-coresight_etm4x_simple_func(trcidr13, TRCIDR13);
+coresight_etm4x_cross_read(trcidr8, TRCIDR8);
+coresight_etm4x_cross_read(trcidr9, TRCIDR9);
+coresight_etm4x_cross_read(trcidr10, TRCIDR10);
+coresight_etm4x_cross_read(trcidr11, TRCIDR11);
+coresight_etm4x_cross_read(trcidr12, TRCIDR12);
+coresight_etm4x_cross_read(trcidr13, TRCIDR13);
 
 static struct attribute *coresight_etmv4_trcidr_attrs[] = {
 	&dev_attr_trcidr0.attr,
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 1a5e0d1..4db8d6a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -33,7 +33,6 @@
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
 #include <linux/pm_runtime.h>
-#include <linux/perf_event.h>
 #include <asm/sections.h>
 #include <asm/local.h>
 
@@ -46,7 +45,9 @@
 /* The number of ETMv4 currently registered */
 static int etm4_count;
 static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
-static void etm4_set_default(struct etmv4_config *config);
+static void etm4_set_default_config(struct etmv4_config *config);
+static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
+				  struct perf_event *event);
 
 static enum cpuhp_state hp_online;
 
@@ -79,22 +80,8 @@
 static int etm4_trace_id(struct coresight_device *csdev)
 {
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-	unsigned long flags;
-	int trace_id = -1;
 
-	if (!local_read(&drvdata->mode))
-		return drvdata->trcid;
-
-	spin_lock_irqsave(&drvdata->spinlock, flags);
-
-	CS_UNLOCK(drvdata->base);
-	trace_id = readl_relaxed(drvdata->base + TRCTRACEIDR);
-	trace_id &= ETM_TRACEID_MASK;
-	CS_LOCK(drvdata->base);
-
-	spin_unlock_irqrestore(&drvdata->spinlock, flags);
-
-	return trace_id;
+	return drvdata->trcid;
 }
 
 static void etm4_enable_hw(void *info)
@@ -113,8 +100,7 @@
 	/* wait for TRCSTATR.IDLE to go up */
 	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
 		dev_err(drvdata->dev,
-			"timeout observed when probing at offset %#x\n",
-			TRCSTATR);
+			"timeout while waiting for Idle Trace Status\n");
 
 	writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR);
 	writel_relaxed(config->cfg, drvdata->base + TRCCONFIGR);
@@ -180,14 +166,20 @@
 	writel_relaxed(config->vmid_mask0, drvdata->base + TRCVMIDCCTLR0);
 	writel_relaxed(config->vmid_mask1, drvdata->base + TRCVMIDCCTLR1);
 
+	/*
+	 * Request to keep the trace unit powered and also
+	 * emulation of powerdown
+	 */
+	writel_relaxed(readl_relaxed(drvdata->base + TRCPDCR) | TRCPDCR_PU,
+		       drvdata->base + TRCPDCR);
+
 	/* Enable the trace unit */
 	writel_relaxed(1, drvdata->base + TRCPRGCTLR);
 
 	/* wait for TRCSTATR.IDLE to go back down to '0' */
 	if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
 		dev_err(drvdata->dev,
-			"timeout observed when probing at offset %#x\n",
-			TRCSTATR);
+			"timeout while waiting for Idle Trace Status\n");
 
 	CS_LOCK(drvdata->base);
 
@@ -195,12 +187,16 @@
 }
 
 static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
-				   struct perf_event_attr *attr)
+				   struct perf_event *event)
 {
+	int ret = 0;
 	struct etmv4_config *config = &drvdata->config;
+	struct perf_event_attr *attr = &event->attr;
 
-	if (!attr)
-		return -EINVAL;
+	if (!attr) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/* Clear configuration from previous run */
 	memset(config, 0, sizeof(struct etmv4_config));
@@ -212,14 +208,12 @@
 		config->mode = ETM_MODE_EXCL_USER;
 
 	/* Always start from the default config */
-	etm4_set_default(config);
+	etm4_set_default_config(config);
 
-	/*
-	 * By default the tracers are configured to trace the whole address
-	 * range.  Narrow the field only if requested by user space.
-	 */
-	if (config->mode)
-		etm4_config_trace_mode(config);
+	/* Configure filters specified on the perf cmd line, if any. */
+	ret = etm4_set_event_filters(drvdata, event);
+	if (ret)
+		goto out;
 
 	/* Go from generic option to ETMv4 specifics */
 	if (attr->config & BIT(ETM_OPT_CYCACC))
@@ -227,23 +221,30 @@
 	if (attr->config & BIT(ETM_OPT_TS))
 		config->cfg |= ETMv4_MODE_TIMESTAMP;
 
-	return 0;
+out:
+	return ret;
 }
 
 static int etm4_enable_perf(struct coresight_device *csdev,
-			    struct perf_event_attr *attr)
+			    struct perf_event *event)
 {
+	int ret = 0;
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
-	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
-		return -EINVAL;
+	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id())) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	/* Configure the tracer based on the session's specifics */
-	etm4_parse_event_config(drvdata, attr);
+	ret = etm4_parse_event_config(drvdata, event);
+	if (ret)
+		goto out;
 	/* And enable it */
 	etm4_enable_hw(drvdata);
 
-	return 0;
+out:
+	return ret;
 }
 
 static int etm4_enable_sysfs(struct coresight_device *csdev)
@@ -274,7 +275,7 @@
 }
 
 static int etm4_enable(struct coresight_device *csdev,
-		       struct perf_event_attr *attr, u32 mode)
+		       struct perf_event *event, u32 mode)
 {
 	int ret;
 	u32 val;
@@ -291,7 +292,7 @@
 		ret = etm4_enable_sysfs(csdev);
 		break;
 	case CS_MODE_PERF:
-		ret = etm4_enable_perf(csdev, attr);
+		ret = etm4_enable_perf(csdev, event);
 		break;
 	default:
 		ret = -EINVAL;
@@ -311,6 +312,11 @@
 
 	CS_UNLOCK(drvdata->base);
 
+	/* power can be removed from the trace unit now */
+	control = readl_relaxed(drvdata->base + TRCPDCR);
+	control &= ~TRCPDCR_PU;
+	writel_relaxed(control, drvdata->base + TRCPDCR);
+
 	control = readl_relaxed(drvdata->base + TRCPRGCTLR);
 
 	/* EN, bit[0] Trace unit enable bit */
@@ -326,14 +332,28 @@
 	dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu);
 }
 
-static int etm4_disable_perf(struct coresight_device *csdev)
+static int etm4_disable_perf(struct coresight_device *csdev,
+			     struct perf_event *event)
 {
+	u32 control;
+	struct etm_filters *filters = event->hw.addr_filters;
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	if (WARN_ON_ONCE(drvdata->cpu != smp_processor_id()))
 		return -EINVAL;
 
 	etm4_disable_hw(drvdata);
+
+	/*
+	 * Check if the start/stop logic was active when the unit was stopped.
+	 * That way we can re-enable the start/stop logic when the process is
+	 * scheduled again.  Configuration of the start/stop logic happens in
+	 * function etm4_set_event_filters().
+	 */
+	control = readl_relaxed(drvdata->base + TRCVICTLR);
+	/* TRCVICTLR::SSSTATUS, bit[9] */
+	filters->ssstatus = (control & BIT(9));
+
 	return 0;
 }
 
@@ -362,7 +382,8 @@
 	dev_info(drvdata->dev, "ETM tracing disabled\n");
 }
 
-static void etm4_disable(struct coresight_device *csdev)
+static void etm4_disable(struct coresight_device *csdev,
+			 struct perf_event *event)
 {
 	u32 mode;
 	struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -381,7 +402,7 @@
 		etm4_disable_sysfs(csdev);
 		break;
 	case CS_MODE_PERF:
-		etm4_disable_perf(csdev);
+		etm4_disable_perf(csdev, event);
 		break;
 	}
 
@@ -564,21 +585,8 @@
 	CS_LOCK(drvdata->base);
 }
 
-static void etm4_set_default(struct etmv4_config *config)
+static void etm4_set_default_config(struct etmv4_config *config)
 {
-	if (WARN_ON_ONCE(!config))
-		return;
-
-	/*
-	 * Make default initialisation trace everything
-	 *
-	 * Select the "always true" resource selector on the
-	 * "Enablign Event" line and configure address range comparator
-	 * '0' to trace all the possible address range.  From there
-	 * configure the "include/exclude" engine to include address
-	 * range comparator '0'.
-	 */
-
 	/* disable all events tracing */
 	config->eventctrl0 = 0x0;
 	config->eventctrl1 = 0x0;
@@ -594,6 +602,108 @@
 
 	/* TRCVICTLR::EVENT = 0x01, select the always on logic */
 	config->vinst_ctrl |= BIT(0);
+}
+
+static u64 etm4_get_access_type(struct etmv4_config *config)
+{
+	u64 access_type = 0;
+
+	/*
+	 * EXLEVEL_NS, bits[15:12]
+	 * The Exception levels are:
+	 *   Bit[12] Exception level 0 - Application
+	 *   Bit[13] Exception level 1 - OS
+	 *   Bit[14] Exception level 2 - Hypervisor
+	 *   Bit[15] Never implemented
+	 *
+	 * Always stay away from hypervisor mode.
+	 */
+	access_type = ETM_EXLEVEL_NS_HYP;
+
+	if (config->mode & ETM_MODE_EXCL_KERN)
+		access_type |= ETM_EXLEVEL_NS_OS;
+
+	if (config->mode & ETM_MODE_EXCL_USER)
+		access_type |= ETM_EXLEVEL_NS_APP;
+
+	/*
+	 * EXLEVEL_S, bits[11:8], don't trace anything happening
+	 * in secure state.
+	 */
+	access_type |= (ETM_EXLEVEL_S_APP	|
+			ETM_EXLEVEL_S_OS	|
+			ETM_EXLEVEL_S_HYP);
+
+	return access_type;
+}
+
+static void etm4_set_comparator_filter(struct etmv4_config *config,
+				       u64 start, u64 stop, int comparator)
+{
+	u64 access_type = etm4_get_access_type(config);
+
+	/* First half of default address comparator */
+	config->addr_val[comparator] = start;
+	config->addr_acc[comparator] = access_type;
+	config->addr_type[comparator] = ETM_ADDR_TYPE_RANGE;
+
+	/* Second half of default address comparator */
+	config->addr_val[comparator + 1] = stop;
+	config->addr_acc[comparator + 1] = access_type;
+	config->addr_type[comparator + 1] = ETM_ADDR_TYPE_RANGE;
+
+	/*
+	 * Configure the ViewInst function to include this address range
+	 * comparator.
+	 *
+	 * @comparator is divided by two since it is the index in the
+	 * etmv4_config::addr_val array but register TRCVIIECTLR deals with
+	 * address range comparator _pairs_.
+	 *
+	 * Therefore:
+	 *	index 0 -> compatator pair 0
+	 *	index 2 -> comparator pair 1
+	 *	index 4 -> comparator pair 2
+	 *	...
+	 *	index 14 -> comparator pair 7
+	 */
+	config->viiectlr |= BIT(comparator / 2);
+}
+
+static void etm4_set_start_stop_filter(struct etmv4_config *config,
+				       u64 address, int comparator,
+				       enum etm_addr_type type)
+{
+	int shift;
+	u64 access_type = etm4_get_access_type(config);
+
+	/* Configure the comparator */
+	config->addr_val[comparator] = address;
+	config->addr_acc[comparator] = access_type;
+	config->addr_type[comparator] = type;
+
+	/*
+	 * Configure ViewInst Start-Stop control register.
+	 * Addresses configured to start tracing go from bit 0 to n-1,
+	 * while those configured to stop tracing from 16 to 16 + n-1.
+	 */
+	shift = (type == ETM_ADDR_TYPE_START ? 0 : 16);
+	config->vissctlr |= BIT(shift + comparator);
+}
+
+static void etm4_set_default_filter(struct etmv4_config *config)
+{
+	u64 start, stop;
+
+	/*
+	 * Configure address range comparator '0' to encompass all
+	 * possible addresses.
+	 */
+	start = 0x0;
+	stop = ~0x0;
+
+	etm4_set_comparator_filter(config, start, stop,
+				   ETM_DEFAULT_ADDR_COMP);
 
 	/*
 	 * TRCVICTLR::SSSTATUS == 1, the start-stop logic is
@@ -601,45 +711,158 @@
 	 */
 	config->vinst_ctrl |= BIT(9);
 
-	/*
-	 * Configure address range comparator '0' to encompass all
-	 * possible addresses.
-	 */
-
-	/* First half of default address comparator: start at address 0 */
-	config->addr_val[ETM_DEFAULT_ADDR_COMP] = 0x0;
-	/* trace instruction addresses */
-	config->addr_acc[ETM_DEFAULT_ADDR_COMP] &= ~(BIT(0) | BIT(1));
-	/* EXLEVEL_NS, bits[12:15], only trace application and kernel space */
-	config->addr_acc[ETM_DEFAULT_ADDR_COMP] |= ETM_EXLEVEL_NS_HYP;
-	/* EXLEVEL_S, bits[11:8], don't trace anything in secure state */
-	config->addr_acc[ETM_DEFAULT_ADDR_COMP] |= (ETM_EXLEVEL_S_APP |
-						    ETM_EXLEVEL_S_OS |
-						    ETM_EXLEVEL_S_HYP);
-	config->addr_type[ETM_DEFAULT_ADDR_COMP] = ETM_ADDR_TYPE_RANGE;
-
-	/*
-	 * Second half of default address comparator: go all
-	 * the way to the top.
-	*/
-	config->addr_val[ETM_DEFAULT_ADDR_COMP + 1] = ~0x0;
-	/* trace instruction addresses */
-	config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] &= ~(BIT(0) | BIT(1));
-	/* Address comparator type must be equal for both halves */
-	config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] =
-					config->addr_acc[ETM_DEFAULT_ADDR_COMP];
-	config->addr_type[ETM_DEFAULT_ADDR_COMP + 1] = ETM_ADDR_TYPE_RANGE;
-
-	/*
-	 * Configure the ViewInst function to filter on address range
-	 * comparator '0'.
-	 */
-	config->viiectlr = BIT(0);
-
-	/* no start-stop filtering for ViewInst */
+	/* No start-stop filtering for ViewInst */
 	config->vissctlr = 0x0;
 }
 
+static void etm4_set_default(struct etmv4_config *config)
+{
+	if (WARN_ON_ONCE(!config))
+		return;
+
+	/*
+	 * Make default initialisation trace everything
+	 *
+	 * Select the "always true" resource selector on the
+	 * "Enablign Event" line and configure address range comparator
+	 * '0' to trace all the possible address range.  From there
+	 * configure the "include/exclude" engine to include address
+	 * range comparator '0'.
+	 */
+	etm4_set_default_config(config);
+	etm4_set_default_filter(config);
+}
+
+static int etm4_get_next_comparator(struct etmv4_drvdata *drvdata, u32 type)
+{
+	int nr_comparator, index = 0;
+	struct etmv4_config *config = &drvdata->config;
+
+	/*
+	 * nr_addr_cmp holds the number of comparator _pair_, so time 2
+	 * for the total number of comparators.
+	 */
+	nr_comparator = drvdata->nr_addr_cmp * 2;
+
+	/* Go through the tally of comparators looking for a free one. */
+	while (index < nr_comparator) {
+		switch (type) {
+		case ETM_ADDR_TYPE_RANGE:
+			if (config->addr_type[index] == ETM_ADDR_TYPE_NONE &&
+			    config->addr_type[index + 1] == ETM_ADDR_TYPE_NONE)
+				return index;
+
+			/* Address range comparators go in pairs */
+			index += 2;
+			break;
+		case ETM_ADDR_TYPE_START:
+		case ETM_ADDR_TYPE_STOP:
+			if (config->addr_type[index] == ETM_ADDR_TYPE_NONE)
+				return index;
+
+			/* Start/stop address can have odd indexes */
+			index += 1;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	/* If we are here all the comparators have been used. */
+	return -ENOSPC;
+}
+
+static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
+				  struct perf_event *event)
+{
+	int i, comparator, ret = 0;
+	u64 address;
+	struct etmv4_config *config = &drvdata->config;
+	struct etm_filters *filters = event->hw.addr_filters;
+
+	if (!filters)
+		goto default_filter;
+
+	/* Sync events with what Perf got */
+	perf_event_addr_filters_sync(event);
+
+	/*
+	 * If there are no filters to deal with simply go ahead with
+	 * the default filter, i.e the entire address range.
+	 */
+	if (!filters->nr_filters)
+		goto default_filter;
+
+	for (i = 0; i < filters->nr_filters; i++) {
+		struct etm_filter *filter = &filters->etm_filter[i];
+		enum etm_addr_type type = filter->type;
+
+		/* See if a comparator is free. */
+		comparator = etm4_get_next_comparator(drvdata, type);
+		if (comparator < 0) {
+			ret = comparator;
+			goto out;
+		}
+
+		switch (type) {
+		case ETM_ADDR_TYPE_RANGE:
+			etm4_set_comparator_filter(config,
+						   filter->start_addr,
+						   filter->stop_addr,
+						   comparator);
+			/*
+			 * TRCVICTLR::SSSTATUS == 1, the start-stop logic is
+			 * in the started state
+			 */
+			config->vinst_ctrl |= BIT(9);
+
+			/* No start-stop filtering for ViewInst */
+			config->vissctlr = 0x0;
+			break;
+		case ETM_ADDR_TYPE_START:
+		case ETM_ADDR_TYPE_STOP:
+			/* Get the right start or stop address */
+			address = (type == ETM_ADDR_TYPE_START ?
+				   filter->start_addr :
+				   filter->stop_addr);
+
+			/* Configure comparator */
+			etm4_set_start_stop_filter(config, address,
+						   comparator, type);
+
+			/*
+			 * If filters::ssstatus == 1, trace acquisition was
+			 * started but the process was yanked away before the
+			 * the stop address was hit.  As such the start/stop
+			 * logic needs to be re-started so that tracing can
+			 * resume where it left.
+			 *
+			 * The start/stop logic status when a process is
+			 * scheduled out is checked in function
+			 * etm4_disable_perf().
+			 */
+			if (filters->ssstatus)
+				config->vinst_ctrl |= BIT(9);
+
+			/* No include/exclude filtering for ViewInst */
+			config->viiectlr = 0x0;
+			break;
+		default:
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	goto out;
+
+
+default_filter:
+	etm4_set_default_filter(config);
+
+out:
+	return ret;
+}
+
 void etm4_config_trace_mode(struct etmv4_config *config)
 {
 	u32 addr_acc, mode;
@@ -727,13 +950,9 @@
 	struct coresight_platform_data *pdata = NULL;
 	struct etmv4_drvdata *drvdata;
 	struct resource *res = &adev->res;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = adev->dev.of_node;
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc)
-		return -ENOMEM;
-
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
 		return -ENOMEM;
@@ -788,13 +1007,13 @@
 	etm4_init_trace_id(drvdata);
 	etm4_set_default(&drvdata->config);
 
-	desc->type = CORESIGHT_DEV_TYPE_SOURCE;
-	desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
-	desc->ops = &etm4_cs_ops;
-	desc->pdata = pdata;
-	desc->dev = dev;
-	desc->groups = coresight_etmv4_groups;
-	drvdata->csdev = coresight_register(desc);
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
+	desc.ops = &etm4_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_etmv4_groups;
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev)) {
 		ret = PTR_ERR(drvdata->csdev);
 		goto err_arch_supported;
@@ -826,12 +1045,12 @@
 }
 
 static struct amba_id etm4_ids[] = {
-	{       /* ETM 4.0 - Qualcomm */
-		.id	= 0x0003b95d,
-		.mask	= 0x0003ffff,
+	{       /* ETM 4.0 - Cortex-A53  */
+		.id	= 0x000bb95d,
+		.mask	= 0x000fffff,
 		.data	= "ETM 4.0",
 	},
-	{       /* ETM 4.0 - Juno board */
+	{       /* ETM 4.0 - Cortex-A57 */
 		.id	= 0x000bb95e,
 		.mask	= 0x000fffff,
 		.data	= "ETM 4.0",
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index 5359c51..ba8d3f8 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -183,6 +183,9 @@
 #define TRCSTATR_IDLE_BIT		0
 #define ETM_DEFAULT_ADDR_COMP		0
 
+/* PowerDown Control Register bits */
+#define TRCPDCR_PU			BIT(3)
+
 /* secure state access levels */
 #define ETM_EXLEVEL_S_APP		BIT(8)
 #define ETM_EXLEVEL_S_OS		BIT(9)
@@ -407,14 +410,6 @@
 	ETM_CTX_CTXID_VMID,
 };
 
-enum etm_addr_type {
-	ETM_ADDR_TYPE_NONE,
-	ETM_ADDR_TYPE_SINGLE,
-	ETM_ADDR_TYPE_RANGE,
-	ETM_ADDR_TYPE_START,
-	ETM_ADDR_TYPE_STOP,
-};
-
 extern const struct attribute_group *coresight_etmv4_groups[];
 void etm4_config_trace_mode(struct etmv4_config *config);
 #endif
diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c
index 05df789..860fe6e 100644
--- a/drivers/hwtracing/coresight/coresight-funnel.c
+++ b/drivers/hwtracing/coresight/coresight-funnel.c
@@ -176,7 +176,7 @@
 	struct coresight_platform_data *pdata = NULL;
 	struct funnel_drvdata *drvdata;
 	struct resource *res = &adev->res;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = adev->dev.of_node;
 
 	if (np) {
@@ -207,17 +207,13 @@
 	drvdata->base = base;
 	pm_runtime_put(&adev->dev);
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc)
-		return -ENOMEM;
-
-	desc->type = CORESIGHT_DEV_TYPE_LINK;
-	desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
-	desc->ops = &funnel_cs_ops;
-	desc->pdata = pdata;
-	desc->dev = dev;
-	desc->groups = coresight_funnel_groups;
-	drvdata->csdev = coresight_register(desc);
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG;
+	desc.ops = &funnel_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_funnel_groups;
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev))
 		return PTR_ERR(drvdata->csdev);
 
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index ad975c5..196a14b 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -16,6 +16,7 @@
 #include <linux/bitops.h>
 #include <linux/io.h>
 #include <linux/coresight.h>
+#include <linux/pm_runtime.h>
 
 /*
  * Coresight management registers (0xf00-0xfcc)
@@ -37,16 +38,32 @@
 #define ETM_MODE_EXCL_KERN	BIT(30)
 #define ETM_MODE_EXCL_USER	BIT(31)
 
-#define coresight_simple_func(type, name, offset)			\
+typedef u32 (*coresight_read_fn)(const struct device *, u32 offset);
+#define coresight_simple_func(type, func, name, offset)			\
 static ssize_t name##_show(struct device *_dev,				\
 			   struct device_attribute *attr, char *buf)	\
 {									\
 	type *drvdata = dev_get_drvdata(_dev->parent);			\
-	return scnprintf(buf, PAGE_SIZE, "0x%x\n",			\
-			 readl_relaxed(drvdata->base + offset));	\
+	coresight_read_fn fn = func;					\
+	u32 val;							\
+	pm_runtime_get_sync(_dev->parent);				\
+	if (fn)								\
+		val = fn(_dev->parent, offset);				\
+	else								\
+		val = readl_relaxed(drvdata->base + offset);		\
+	pm_runtime_put_sync(_dev->parent);				\
+	return scnprintf(buf, PAGE_SIZE, "0x%x\n", val);		\
 }									\
 static DEVICE_ATTR_RO(name)
 
+enum etm_addr_type {
+	ETM_ADDR_TYPE_NONE,
+	ETM_ADDR_TYPE_SINGLE,
+	ETM_ADDR_TYPE_RANGE,
+	ETM_ADDR_TYPE_START,
+	ETM_ADDR_TYPE_STOP,
+};
+
 enum cs_mode {
 	CS_MODE_DISABLED,
 	CS_MODE_SYSFS,
diff --git a/drivers/hwtracing/coresight/coresight-replicator-qcom.c b/drivers/hwtracing/coresight/coresight-replicator-qcom.c
index 700f710..0a3d15f 100644
--- a/drivers/hwtracing/coresight/coresight-replicator-qcom.c
+++ b/drivers/hwtracing/coresight/coresight-replicator-qcom.c
@@ -102,7 +102,7 @@
 	struct resource *res = &adev->res;
 	struct coresight_platform_data *pdata = NULL;
 	struct replicator_state *drvdata;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = adev->dev.of_node;
 	void __iomem *base;
 
@@ -134,16 +134,12 @@
 	dev_set_drvdata(dev, drvdata);
 	pm_runtime_put(&adev->dev);
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc)
-		return -ENOMEM;
-
-	desc->type = CORESIGHT_DEV_TYPE_LINK;
-	desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
-	desc->ops = &replicator_cs_ops;
-	desc->pdata = adev->dev.platform_data;
-	desc->dev = &adev->dev;
-	drvdata->csdev = coresight_register(desc);
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
+	desc.ops = &replicator_cs_ops;
+	desc.pdata = adev->dev.platform_data;
+	desc.dev = &adev->dev;
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev))
 		return PTR_ERR(drvdata->csdev);
 
diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c
index c6982e3..3756e71 100644
--- a/drivers/hwtracing/coresight/coresight-replicator.c
+++ b/drivers/hwtracing/coresight/coresight-replicator.c
@@ -69,7 +69,7 @@
 	struct device *dev = &pdev->dev;
 	struct coresight_platform_data *pdata = NULL;
 	struct replicator_drvdata *drvdata;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = pdev->dev.of_node;
 
 	if (np) {
@@ -95,18 +95,12 @@
 	pm_runtime_enable(&pdev->dev);
 	platform_set_drvdata(pdev, drvdata);
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc) {
-		ret = -ENOMEM;
-		goto out_disable_pm;
-	}
-
-	desc->type = CORESIGHT_DEV_TYPE_LINK;
-	desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
-	desc->ops = &replicator_cs_ops;
-	desc->pdata = pdev->dev.platform_data;
-	desc->dev = &pdev->dev;
-	drvdata->csdev = coresight_register(desc);
+	desc.type = CORESIGHT_DEV_TYPE_LINK;
+	desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT;
+	desc.ops = &replicator_cs_ops;
+	desc.pdata = pdev->dev.platform_data;
+	desc.dev = &pdev->dev;
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev)) {
 		ret = PTR_ERR(drvdata->csdev);
 		goto out_disable_pm;
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index 73be58a..49e0f1b 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -105,10 +105,12 @@
 /**
  * struct channel_space - central management entity for extended ports
  * @base:		memory mapped base address where channels start.
+ * @phys:		physical base address of channel region.
  * @guaraneed:		is the channel delivery guaranteed.
  */
 struct channel_space {
 	void __iomem		*base;
+	phys_addr_t		phys;
 	unsigned long		*guaranteed;
 };
 
@@ -196,7 +198,7 @@
 }
 
 static int stm_enable(struct coresight_device *csdev,
-		      struct perf_event_attr *attr, u32 mode)
+		      struct perf_event *event, u32 mode)
 {
 	u32 val;
 	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -258,7 +260,8 @@
 		stm_hwevent_disable_hw(drvdata);
 }
 
-static void stm_disable(struct coresight_device *csdev)
+static void stm_disable(struct coresight_device *csdev,
+			struct perf_event *event)
 {
 	struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
@@ -353,7 +356,24 @@
 	if (!drvdata || !drvdata->csdev)
 		return;
 
-	stm_disable(drvdata->csdev);
+	stm_disable(drvdata->csdev, NULL);
+}
+
+static phys_addr_t
+stm_mmio_addr(struct stm_data *stm_data, unsigned int master,
+	      unsigned int channel, unsigned int nr_chans)
+{
+	struct stm_drvdata *drvdata = container_of(stm_data,
+						   struct stm_drvdata, stm);
+	phys_addr_t addr;
+
+	addr = drvdata->chs.phys + channel * BYTES_PER_CHANNEL;
+
+	if (offset_in_page(addr) ||
+	    offset_in_page(nr_chans * BYTES_PER_CHANNEL))
+		return 0;
+
+	return addr;
 }
 
 static long stm_generic_set_options(struct stm_data *stm_data,
@@ -616,7 +636,7 @@
 static DEVICE_ATTR_RW(traceid);
 
 #define coresight_stm_simple_func(name, offset)	\
-	coresight_simple_func(struct stm_drvdata, name, offset)
+	coresight_simple_func(struct stm_drvdata, NULL, name, offset)
 
 coresight_stm_simple_func(tcsr, STMTCSR);
 coresight_stm_simple_func(tsfreqr, STMTSFREQR);
@@ -761,7 +781,9 @@
 	drvdata->stm.sw_end = 1;
 	drvdata->stm.hw_override = true;
 	drvdata->stm.sw_nchannels = drvdata->numsp;
+	drvdata->stm.sw_mmiosz = BYTES_PER_CHANNEL;
 	drvdata->stm.packet = stm_generic_packet;
+	drvdata->stm.mmio_addr = stm_mmio_addr;
 	drvdata->stm.link = stm_generic_link;
 	drvdata->stm.unlink = stm_generic_unlink;
 	drvdata->stm.set_options = stm_generic_set_options;
@@ -778,7 +800,7 @@
 	struct resource *res = &adev->res;
 	struct resource ch_res;
 	size_t res_size, bitmap_size;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = adev->dev.of_node;
 
 	if (np) {
@@ -808,6 +830,7 @@
 	ret = stm_get_resource_byname(np, "stm-stimulus-base", &ch_res);
 	if (ret)
 		return ret;
+	drvdata->chs.phys = ch_res.start;
 
 	base = devm_ioremap_resource(dev, &ch_res);
 	if (IS_ERR(base))
@@ -843,19 +866,13 @@
 		return -EPROBE_DEFER;
 	}
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc) {
-		ret = -ENOMEM;
-		goto stm_unregister;
-	}
-
-	desc->type = CORESIGHT_DEV_TYPE_SOURCE;
-	desc->subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
-	desc->ops = &stm_cs_ops;
-	desc->pdata = pdata;
-	desc->dev = dev;
-	desc->groups = coresight_stm_groups;
-	drvdata->csdev = coresight_register(desc);
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE;
+	desc.ops = &stm_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_stm_groups;
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev)) {
 		ret = PTR_ERR(drvdata->csdev);
 		goto stm_unregister;
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 466af86..d6941ea 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -22,7 +22,7 @@
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
 
-void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
+static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 {
 	CS_UNLOCK(drvdata->base);
 
@@ -48,6 +48,7 @@
 	int i;
 
 	bufp = drvdata->buf;
+	drvdata->len = 0;
 	while (1) {
 		for (i = 0; i < drvdata->memwidth; i++) {
 			read_data = readl_relaxed(drvdata->base + TMC_RRD);
@@ -55,6 +56,7 @@
 				return;
 			memcpy(bufp, &read_data, 4);
 			bufp += 4;
+			drvdata->len += 4;
 		}
 	}
 }
@@ -166,7 +168,7 @@
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	/* Free memory outside the spinlock if need be */
-	if (!used && buf)
+	if (!used)
 		kfree(buf);
 
 	if (!ret)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 688be9e..886ea83 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -20,7 +20,7 @@
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
 
-void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
+static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata)
 {
 	u32 axictl;
 
@@ -64,11 +64,17 @@
 	rwp = readl_relaxed(drvdata->base + TMC_RWP);
 	val = readl_relaxed(drvdata->base + TMC_STS);
 
-	/* How much memory do we still have */
-	if (val & BIT(0))
+	/*
+	 * Adjust the buffer to point to the beginning of the trace data
+	 * and update the available trace data.
+	 */
+	if (val & TMC_STS_FULL) {
 		drvdata->buf = drvdata->vaddr + rwp - drvdata->paddr;
-	else
+		drvdata->len = drvdata->size;
+	} else {
 		drvdata->buf = drvdata->vaddr;
+		drvdata->len = rwp - drvdata->paddr;
+	}
 }
 
 static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c
index 9e02ac9..d8517d2 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.c
+++ b/drivers/hwtracing/coresight/coresight-tmc.c
@@ -38,8 +38,7 @@
 	if (coresight_timeout(drvdata->base,
 			      TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
 		dev_err(drvdata->dev,
-			"timeout observed when probing at offset %#x\n",
-			TMC_STS);
+			"timeout while waiting for TMC to be Ready\n");
 	}
 }
 
@@ -56,8 +55,7 @@
 	if (coresight_timeout(drvdata->base,
 			      TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
 		dev_err(drvdata->dev,
-			"timeout observed when probing at offset %#x\n",
-			TMC_FFCR);
+		"timeout while waiting for completion of Manual Flush\n");
 	}
 
 	tmc_wait_for_tmcready(drvdata);
@@ -140,8 +138,8 @@
 						   struct tmc_drvdata, miscdev);
 	char *bufp = drvdata->buf + *ppos;
 
-	if (*ppos + len > drvdata->size)
-		len = drvdata->size - *ppos;
+	if (*ppos + len > drvdata->len)
+		len = drvdata->len - *ppos;
 
 	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
 		if (bufp == (char *)(drvdata->vaddr + drvdata->size))
@@ -160,7 +158,7 @@
 	*ppos += len;
 
 	dev_dbg(drvdata->dev, "%s: %zu bytes copied, %d bytes left\n",
-		__func__, len, (int)(drvdata->size - *ppos));
+		__func__, len, (int)(drvdata->len - *ppos));
 	return len;
 }
 
@@ -220,7 +218,7 @@
 }
 
 #define coresight_tmc_simple_func(name, offset)			\
-	coresight_simple_func(struct tmc_drvdata, name, offset)
+	coresight_simple_func(struct tmc_drvdata, NULL, name, offset)
 
 coresight_tmc_simple_func(rsz, TMC_RSZ);
 coresight_tmc_simple_func(sts, TMC_STS);
@@ -249,8 +247,8 @@
 	NULL,
 };
 
-ssize_t trigger_cntr_show(struct device *dev,
-			  struct device_attribute *attr, char *buf)
+static ssize_t trigger_cntr_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
 {
 	struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent);
 	unsigned long val = drvdata->trigger_cntr;
@@ -304,27 +302,32 @@
 	struct coresight_platform_data *pdata = NULL;
 	struct tmc_drvdata *drvdata;
 	struct resource *res = &adev->res;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = adev->dev.of_node;
 
 	if (np) {
 		pdata = of_get_coresight_platform_data(dev, np);
-		if (IS_ERR(pdata))
-			return PTR_ERR(pdata);
+		if (IS_ERR(pdata)) {
+			ret = PTR_ERR(pdata);
+			goto out;
+		}
 		adev->dev.platform_data = pdata;
 	}
 
+	ret = -ENOMEM;
 	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
-		return -ENOMEM;
+		goto out;
 
 	drvdata->dev = &adev->dev;
 	dev_set_drvdata(dev, drvdata);
 
 	/* Validity for the resource is already checked by the AMBA core */
 	base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
+	if (IS_ERR(base)) {
+		ret = PTR_ERR(base);
+		goto out;
+	}
 
 	drvdata->base = base;
 
@@ -347,33 +350,28 @@
 
 	pm_runtime_put(&adev->dev);
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc) {
-		ret = -ENOMEM;
-		goto err_devm_kzalloc;
-	}
-
-	desc->pdata = pdata;
-	desc->dev = dev;
-	desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
-	desc->groups = coresight_tmc_groups;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	desc.groups = coresight_tmc_groups;
 
 	if (drvdata->config_type == TMC_CONFIG_TYPE_ETB) {
-		desc->type = CORESIGHT_DEV_TYPE_SINK;
-		desc->ops = &tmc_etb_cs_ops;
+		desc.type = CORESIGHT_DEV_TYPE_SINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+		desc.ops = &tmc_etb_cs_ops;
 	} else if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) {
-		desc->type = CORESIGHT_DEV_TYPE_SINK;
-		desc->ops = &tmc_etr_cs_ops;
+		desc.type = CORESIGHT_DEV_TYPE_SINK;
+		desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_BUFFER;
+		desc.ops = &tmc_etr_cs_ops;
 	} else {
-		desc->type = CORESIGHT_DEV_TYPE_LINKSINK;
-		desc->subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
-		desc->ops = &tmc_etf_cs_ops;
+		desc.type = CORESIGHT_DEV_TYPE_LINKSINK;
+		desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_FIFO;
+		desc.ops = &tmc_etf_cs_ops;
 	}
 
-	drvdata->csdev = coresight_register(desc);
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev)) {
 		ret = PTR_ERR(drvdata->csdev);
-		goto err_devm_kzalloc;
+		goto out;
 	}
 
 	drvdata->miscdev.name = pdata->name;
@@ -381,16 +379,8 @@
 	drvdata->miscdev.fops = &tmc_fops;
 	ret = misc_register(&drvdata->miscdev);
 	if (ret)
-		goto err_misc_register;
-
-	return 0;
-
-err_misc_register:
-	coresight_unregister(drvdata->csdev);
-err_devm_kzalloc:
-	if (drvdata->config_type == TMC_CONFIG_TYPE_ETR)
-		dma_free_coherent(dev, drvdata->size,
-				drvdata->vaddr, drvdata->paddr);
+		coresight_unregister(drvdata->csdev);
+out:
 	return ret;
 }
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index 5c5fe2a..44b3ae3 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -98,7 +98,8 @@
  * @buf:	area of memory where trace data get sent.
  * @paddr:	DMA start location in RAM.
  * @vaddr:	virtual representation of @paddr.
- * @size:	@buf size.
+ * @size:	trace buffer size.
+ * @len:	size of the available trace.
  * @mode:	how this TMC is being used.
  * @config_type: TMC variant, must be of type @tmc_config_type.
  * @memwidth:	width of the memory interface databus, in bytes.
@@ -115,6 +116,7 @@
 	dma_addr_t		paddr;
 	void __iomem		*vaddr;
 	u32			size;
+	u32			len;
 	local_t			mode;
 	enum tmc_config_type	config_type;
 	enum tmc_mem_intf_width	memwidth;
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index 4e471e2..0673baf 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -119,7 +119,7 @@
 	struct coresight_platform_data *pdata = NULL;
 	struct tpiu_drvdata *drvdata;
 	struct resource *res = &adev->res;
-	struct coresight_desc *desc;
+	struct coresight_desc desc = { 0 };
 	struct device_node *np = adev->dev.of_node;
 
 	if (np) {
@@ -154,16 +154,12 @@
 
 	pm_runtime_put(&adev->dev);
 
-	desc = devm_kzalloc(dev, sizeof(*desc), GFP_KERNEL);
-	if (!desc)
-		return -ENOMEM;
-
-	desc->type = CORESIGHT_DEV_TYPE_SINK;
-	desc->subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT;
-	desc->ops = &tpiu_cs_ops;
-	desc->pdata = pdata;
-	desc->dev = dev;
-	drvdata->csdev = coresight_register(desc);
+	desc.type = CORESIGHT_DEV_TYPE_SINK;
+	desc.subtype.sink_subtype = CORESIGHT_DEV_SUBTYPE_SINK_PORT;
+	desc.ops = &tpiu_cs_ops;
+	desc.pdata = pdata;
+	desc.dev = dev;
+	drvdata->csdev = coresight_register(&desc);
 	if (IS_ERR(drvdata->csdev))
 		return PTR_ERR(drvdata->csdev);
 
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index d08d1ab..7bf00a0 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -257,7 +257,7 @@
 {
 	if (atomic_dec_return(csdev->refcnt) == 0) {
 		if (source_ops(csdev)->disable) {
-			source_ops(csdev)->disable(csdev);
+			source_ops(csdev)->disable(csdev, NULL);
 			csdev->enable = false;
 		}
 	}
@@ -429,7 +429,7 @@
 
 	path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
 	if (!path)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(path);
 
@@ -725,7 +725,8 @@
 		/* We have found at least one orphan connection */
 		if (conn->child_dev == NULL) {
 			/* Does it match this newly added device? */
-			if (!strcmp(dev_name(&csdev->dev), conn->child_name)) {
+			if (conn->child_name &&
+			    !strcmp(dev_name(&csdev->dev), conn->child_name)) {
 				conn->child_dev = csdev;
 			} else {
 				/* This component still has an orphan */
@@ -893,7 +894,7 @@
 	int nr_refcnts = 1;
 	atomic_t *refcnts = NULL;
 	struct coresight_device *csdev;
-	struct coresight_connection *conns;
+	struct coresight_connection *conns = NULL;
 
 	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
 	if (!csdev) {
@@ -921,16 +922,20 @@
 
 	csdev->nr_inport = desc->pdata->nr_inport;
 	csdev->nr_outport = desc->pdata->nr_outport;
-	conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
-	if (!conns) {
-		ret = -ENOMEM;
-		goto err_kzalloc_conns;
-	}
 
-	for (i = 0; i < csdev->nr_outport; i++) {
-		conns[i].outport = desc->pdata->outports[i];
-		conns[i].child_name = desc->pdata->child_names[i];
-		conns[i].child_port = desc->pdata->child_ports[i];
+	/* Initialise connections if there is at least one outport */
+	if (csdev->nr_outport) {
+		conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
+		if (!conns) {
+			ret = -ENOMEM;
+			goto err_kzalloc_conns;
+		}
+
+		for (i = 0; i < csdev->nr_outport; i++) {
+			conns[i].outport = desc->pdata->outports[i];
+			conns[i].child_name = desc->pdata->child_names[i];
+			conns[i].child_port = desc->pdata->child_ports[i];
+		}
 	}
 
 	csdev->conns = conns;
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index b68da18..629e031 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -166,7 +166,7 @@
 
 			rdev = of_coresight_get_endpoint_device(rparent);
 			if (!rdev)
-				continue;
+				return ERR_PTR(-EPROBE_DEFER);
 
 			pdata->child_names[i] = dev_name(rdev);
 			pdata->child_ports[i] = rendpoint.id;
@@ -184,6 +184,7 @@
 			break;
 		}
 	}
+	of_node_put(dn);
 
 	return pdata;
 }
diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c
index f233726..1bb97f6 100644
--- a/drivers/i2c/busses/i2c-at91.c
+++ b/drivers/i2c/busses/i2c-at91.c
@@ -38,6 +38,7 @@
 #define AT91_I2C_TIMEOUT	msecs_to_jiffies(100)	/* transfer timeout */
 #define AT91_I2C_DMA_THRESHOLD	8			/* enable DMA if transfer size is bigger than this threshold */
 #define AUTOSUSPEND_TIMEOUT		2000
+#define AT91_I2C_MAX_ALT_CMD_DATA_SIZE	256
 
 /* AT91 TWI register definitions */
 #define	AT91_TWI_CR		0x0000	/* Control Register */
@@ -141,6 +142,7 @@
 	unsigned twi_cwgr_reg;
 	struct at91_twi_pdata *pdata;
 	bool use_dma;
+	bool use_alt_cmd;
 	bool recv_len_abort;
 	u32 fifo_size;
 	struct at91_twi_dma dma;
@@ -269,7 +271,7 @@
 
 	/* send stop when last byte has been written */
 	if (--dev->buf_len == 0)
-		if (!dev->pdata->has_alt_cmd)
+		if (!dev->use_alt_cmd)
 			at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
 	dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len);
@@ -292,7 +294,7 @@
 	 * we just have to enable TXCOMP one.
 	 */
 	at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP);
-	if (!dev->pdata->has_alt_cmd)
+	if (!dev->use_alt_cmd)
 		at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 }
 
@@ -410,7 +412,7 @@
 	}
 
 	/* send stop if second but last byte has been read */
-	if (!dev->pdata->has_alt_cmd && dev->buf_len == 1)
+	if (!dev->use_alt_cmd && dev->buf_len == 1)
 		at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP);
 
 	dev_dbg(dev->dev, "read 0x%x, to go %d\n", *dev->buf, dev->buf_len);
@@ -426,7 +428,7 @@
 	dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg[0]),
 			 dev->buf_len, DMA_FROM_DEVICE);
 
-	if (!dev->pdata->has_alt_cmd) {
+	if (!dev->use_alt_cmd) {
 		/* The last two bytes have to be read without using dma */
 		dev->buf += dev->buf_len - 2;
 		dev->buf_len = 2;
@@ -443,7 +445,7 @@
 	struct dma_chan *chan_rx = dma->chan_rx;
 	size_t buf_len;
 
-	buf_len = (dev->pdata->has_alt_cmd) ? dev->buf_len : dev->buf_len - 2;
+	buf_len = (dev->use_alt_cmd) ? dev->buf_len : dev->buf_len - 2;
 	dma->direction = DMA_FROM_DEVICE;
 
 	/* Keep in mind that we won't use dma to read the last two bytes */
@@ -651,7 +653,7 @@
 		unsigned start_flags = AT91_TWI_START;
 
 		/* if only one byte is to be read, immediately stop transfer */
-		if (!has_alt_cmd && dev->buf_len <= 1 &&
+		if (!dev->use_alt_cmd && dev->buf_len <= 1 &&
 		    !(dev->msg->flags & I2C_M_RECV_LEN))
 			start_flags |= AT91_TWI_STOP;
 		at91_twi_write(dev, AT91_TWI_CR, start_flags);
@@ -745,7 +747,7 @@
 	int ret;
 	unsigned int_addr_flag = 0;
 	struct i2c_msg *m_start = msg;
-	bool is_read, use_alt_cmd = false;
+	bool is_read;
 
 	dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num);
 
@@ -768,14 +770,16 @@
 		at91_twi_write(dev, AT91_TWI_IADR, internal_address);
 	}
 
+	dev->use_alt_cmd = false;
 	is_read = (m_start->flags & I2C_M_RD);
 	if (dev->pdata->has_alt_cmd) {
-		if (m_start->len > 0) {
+		if (m_start->len > 0 &&
+		    m_start->len < AT91_I2C_MAX_ALT_CMD_DATA_SIZE) {
 			at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMEN);
 			at91_twi_write(dev, AT91_TWI_ACR,
 				       AT91_TWI_ACR_DATAL(m_start->len) |
 				       ((is_read) ? AT91_TWI_ACR_DIR : 0));
-			use_alt_cmd = true;
+			dev->use_alt_cmd = true;
 		} else {
 			at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMDIS);
 		}
@@ -784,7 +788,7 @@
 	at91_twi_write(dev, AT91_TWI_MMR,
 		       (m_start->addr << 16) |
 		       int_addr_flag |
-		       ((!use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0));
+		       ((!dev->use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0));
 
 	dev->buf_len = m_start->len;
 	dev->buf = m_start->buf;
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index 19c8438..95f7cac 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -158,7 +158,7 @@
 
 	if (status & BIT(IS_M_START_BUSY_SHIFT)) {
 		iproc_i2c->xfer_is_done = 1;
-		complete_all(&iproc_i2c->done);
+		complete(&iproc_i2c->done);
 	}
 
 	writel(status, iproc_i2c->base + IS_OFFSET);
diff --git a/drivers/i2c/busses/i2c-bcm-kona.c b/drivers/i2c/busses/i2c-bcm-kona.c
index ac9f476..258cb9a 100644
--- a/drivers/i2c/busses/i2c-bcm-kona.c
+++ b/drivers/i2c/busses/i2c-bcm-kona.c
@@ -229,7 +229,7 @@
 		       dev->base + TXFCR_OFFSET);
 
 	writel(status & ~ISR_RESERVED_MASK, dev->base + ISR_OFFSET);
-	complete_all(&dev->done);
+	complete(&dev->done);
 
 	return IRQ_HANDLED;
 }
@@ -643,7 +643,7 @@
 			if (rc < 0) {
 				dev_err(dev->device,
 					"restart cmd failed rc = %d\n", rc);
-					goto xfer_send_stop;
+				goto xfer_send_stop;
 			}
 		}
 
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index 3f5a4d7..385b57b 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -228,7 +228,7 @@
 		return IRQ_NONE;
 
 	brcmstb_i2c_enable_disable_irq(dev, INT_DISABLE);
-	complete_all(&dev->done);
+	complete(&dev->done);
 
 	dev_dbg(dev->device, "isr handled");
 	return IRQ_HANDLED;
diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c
index 90bbd9f..3c16a2f 100644
--- a/drivers/i2c/busses/i2c-cadence.c
+++ b/drivers/i2c/busses/i2c-cadence.c
@@ -767,7 +767,7 @@
  * depending on the scaling direction.
  *
  * Return:	NOTIFY_STOP if the rate change should be aborted, NOTIFY_OK
- *		to acknowedge the change, NOTIFY_DONE if the notification is
+ *		to acknowledge the change, NOTIFY_DONE if the notification is
  *		considered irrelevant.
  */
 static int cdns_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long
diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index a0d95ff..2d5ff863 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -215,7 +215,7 @@
 	msg->outsize = request_len;
 	msg->insize = response_len;
 
-	result = cros_ec_cmd_xfer(bus->ec, msg);
+	result = cros_ec_cmd_xfer_status(bus->ec, msg);
 	if (result < 0) {
 		dev_err(dev, "Error transferring EC i2c message %d\n", result);
 		goto exit;
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c
index c6922b8..fcd973d 100644
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -367,13 +367,17 @@
 	dev_dbg(dev->dev, "Fast-mode HCNT:LCNT = %d:%d\n", hcnt, lcnt);
 
 	/* Configure SDA Hold Time if required */
-	if (dev->sda_hold_time) {
-		reg = dw_readl(dev, DW_IC_COMP_VERSION);
-		if (reg >= DW_IC_SDA_HOLD_MIN_VERS)
+	reg = dw_readl(dev, DW_IC_COMP_VERSION);
+	if (reg >= DW_IC_SDA_HOLD_MIN_VERS) {
+		if (dev->sda_hold_time) {
 			dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
-		else
-			dev_warn(dev->dev,
-				"Hardware too old to adjust SDA hold time.");
+		} else {
+			/* Keep previous hold time setting if no one set it */
+			dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD);
+		}
+	} else {
+		dev_warn(dev->dev,
+			"Hardware too old to adjust SDA hold time.\n");
 	}
 
 	/* Configure Tx/Rx FIFO threshold levels */
diff --git a/drivers/i2c/busses/i2c-eg20t.c b/drivers/i2c/busses/i2c-eg20t.c
index 137125b..5ce71ce 100644
--- a/drivers/i2c/busses/i2c-eg20t.c
+++ b/drivers/i2c/busses/i2c-eg20t.c
@@ -773,13 +773,6 @@
 	/* Set the number of I2C channel instance */
 	adap_info->ch_num = id->driver_data;
 
-	ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
-		  KBUILD_MODNAME, adap_info);
-	if (ret) {
-		pch_pci_err(pdev, "request_irq FAILED\n");
-		goto err_request_irq;
-	}
-
 	for (i = 0; i < adap_info->ch_num; i++) {
 		pch_adap = &adap_info->pch_data[i].pch_adapter;
 		adap_info->pch_i2c_suspended = false;
@@ -797,6 +790,17 @@
 
 		pch_adap->dev.of_node = pdev->dev.of_node;
 		pch_adap->dev.parent = &pdev->dev;
+	}
+
+	ret = request_irq(pdev->irq, pch_i2c_handler, IRQF_SHARED,
+		  KBUILD_MODNAME, adap_info);
+	if (ret) {
+		pch_pci_err(pdev, "request_irq FAILED\n");
+		goto err_request_irq;
+	}
+
+	for (i = 0; i < adap_info->ch_num; i++) {
+		pch_adap = &adap_info->pch_data[i].pch_adapter;
 
 		pch_i2c_init(&adap_info->pch_data[i]);
 
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 5ef9b73..26298af 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1486,7 +1486,9 @@
 		priv->features |= FEATURE_IRQ;
 		priv->features |= FEATURE_SMBUS_PEC;
 		priv->features |= FEATURE_BLOCK_BUFFER;
-		priv->features |= FEATURE_TCO;
+		/* If we have ACPI based watchdog use that instead */
+		if (!acpi_has_watchdog())
+			priv->features |= FEATURE_TCO;
 		priv->features |= FEATURE_HOST_NOTIFY;
 		break;
 
diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c
index 71d3929..76e2898 100644
--- a/drivers/i2c/busses/i2c-meson.c
+++ b/drivers/i2c/busses/i2c-meson.c
@@ -211,7 +211,7 @@
 		meson_i2c_add_token(i2c, TOKEN_STOP);
 	} else {
 		i2c->state = STATE_IDLE;
-		complete_all(&i2c->done);
+		complete(&i2c->done);
 	}
 }
 
@@ -238,7 +238,7 @@
 		dev_dbg(i2c->dev, "error bit set\n");
 		i2c->error = -ENXIO;
 		i2c->state = STATE_IDLE;
-		complete_all(&i2c->done);
+		complete(&i2c->done);
 		goto out;
 	}
 
@@ -269,7 +269,7 @@
 		break;
 	case STATE_STOP:
 		i2c->state = STATE_IDLE;
-		complete_all(&i2c->done);
+		complete(&i2c->done);
 		break;
 	case STATE_IDLE:
 		break;
diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
index dfa7a4b..ac88a52 100644
--- a/drivers/i2c/busses/i2c-ocores.c
+++ b/drivers/i2c/busses/i2c-ocores.c
@@ -379,6 +379,7 @@
 			if (!clock_frequency_present) {
 				dev_err(&pdev->dev,
 					"Missing required parameter 'opencores,ip-clock-frequency'\n");
+				clk_disable_unprepare(i2c->clk);
 				return -ENODEV;
 			}
 			i2c->ip_clock_khz = clock_frequency / 1000;
@@ -467,20 +468,21 @@
 		default:
 			dev_err(&pdev->dev, "Unsupported I/O width (%d)\n",
 				i2c->reg_io_width);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto err_clk;
 		}
 	}
 
 	ret = ocores_init(&pdev->dev, i2c);
 	if (ret)
-		return ret;
+		goto err_clk;
 
 	init_waitqueue_head(&i2c->wait);
 	ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0,
 			       pdev->name, i2c);
 	if (ret) {
 		dev_err(&pdev->dev, "Cannot claim IRQ\n");
-		return ret;
+		goto err_clk;
 	}
 
 	/* hook up driver to tree */
@@ -494,7 +496,7 @@
 	ret = i2c_add_adapter(&i2c->adap);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to add adapter\n");
-		return ret;
+		goto err_clk;
 	}
 
 	/* add in known devices to the bus */
@@ -504,6 +506,10 @@
 	}
 
 	return 0;
+
+err_clk:
+	clk_disable_unprepare(i2c->clk);
+	return ret;
 }
 
 static int ocores_i2c_remove(struct platform_device *pdev)
diff --git a/drivers/i2c/busses/i2c-qup.c b/drivers/i2c/busses/i2c-qup.c
index 501bd15..a8497cf 100644
--- a/drivers/i2c/busses/i2c-qup.c
+++ b/drivers/i2c/busses/i2c-qup.c
@@ -1599,7 +1599,8 @@
 #ifdef CONFIG_PM_SLEEP
 static int qup_i2c_suspend(struct device *device)
 {
-	qup_i2c_pm_suspend_runtime(device);
+	if (!pm_runtime_suspended(device))
+		return qup_i2c_pm_suspend_runtime(device);
 	return 0;
 }
 
diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index 52407f3..9bd849d 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -378,7 +378,7 @@
 	}
 
 	dma_addr = dma_map_single(chan->device->dev, buf, len, dir);
-	if (dma_mapping_error(dev, dma_addr)) {
+	if (dma_mapping_error(chan->device->dev, dma_addr)) {
 		dev_dbg(dev, "dma map failed, using PIO\n");
 		return;
 	}
diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c
index 2bc8b01..5c5b7ca 100644
--- a/drivers/i2c/busses/i2c-rk3x.c
+++ b/drivers/i2c/busses/i2c-rk3x.c
@@ -918,7 +918,7 @@
  * Code adapted from i2c-cadence.c.
  *
  * Return:	NOTIFY_STOP if the rate change should be aborted, NOTIFY_OK
- *		to acknowedge the change, NOTIFY_DONE if the notification is
+ *		to acknowledge the change, NOTIFY_DONE if the notification is
  *		considered irrelevant.
  */
 static int rk3x_i2c_clk_notifier_cb(struct notifier_block *nb, unsigned long
@@ -1111,6 +1111,15 @@
 	return ret < 0 ? ret : num;
 }
 
+static __maybe_unused int rk3x_i2c_resume(struct device *dev)
+{
+	struct rk3x_i2c *i2c = dev_get_drvdata(dev);
+
+	rk3x_i2c_adapt_div(i2c, clk_get_rate(i2c->clk));
+
+	return 0;
+}
+
 static u32 rk3x_i2c_func(struct i2c_adapter *adap)
 {
 	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL | I2C_FUNC_PROTOCOL_MANGLING;
@@ -1334,12 +1343,15 @@
 	return 0;
 }
 
+static SIMPLE_DEV_PM_OPS(rk3x_i2c_pm_ops, NULL, rk3x_i2c_resume);
+
 static struct platform_driver rk3x_i2c_driver = {
 	.probe   = rk3x_i2c_probe,
 	.remove  = rk3x_i2c_remove,
 	.driver  = {
 		.name  = "rk3x-i2c",
 		.of_match_table = rk3x_i2c_match,
+		.pm = &rk3x_i2c_pm_ops,
 	},
 };
 
diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index 6fb3e26..05b1eea 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c
@@ -610,7 +610,7 @@
 		return;
 
 	dma_addr = dma_map_single(chan->device->dev, pd->msg->buf, pd->msg->len, dir);
-	if (dma_mapping_error(pd->dev, dma_addr)) {
+	if (dma_mapping_error(chan->device->dev, dma_addr)) {
 		dev_dbg(pd->dev, "dma map failed, using PIO\n");
 		return;
 	}
diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c
index 8de073a..b3893f6 100644
--- a/drivers/i2c/muxes/i2c-demux-pinctrl.c
+++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c
@@ -37,8 +37,6 @@
 	struct i2c_demux_pinctrl_chan chan[];
 };
 
-static struct property status_okay = { .name = "status", .length = 3, .value = "ok" };
-
 static int i2c_demux_master_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 {
 	struct i2c_demux_pinctrl_priv *priv = adap->algo_data;
@@ -68,7 +66,7 @@
 	adap = of_find_i2c_adapter_by_node(priv->chan[new_chan].parent_np);
 	if (!adap) {
 		ret = -ENODEV;
-		goto err;
+		goto err_with_revert;
 	}
 
 	p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name);
@@ -103,8 +101,11 @@
 
  err_with_put:
 	i2c_put_adapter(adap);
+ err_with_revert:
+	of_changeset_revert(&priv->chan[new_chan].chgset);
  err:
 	dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret);
+	priv->cur_chan = -EINVAL;
 	return ret;
 }
 
@@ -190,6 +191,7 @@
 {
 	struct device_node *np = pdev->dev.of_node;
 	struct i2c_demux_pinctrl_priv *priv;
+	struct property *props;
 	int num_chan, i, j, err;
 
 	num_chan = of_count_phandle_with_args(np, "i2c-parent", NULL);
@@ -200,7 +202,10 @@
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv)
 			   + num_chan * sizeof(struct i2c_demux_pinctrl_chan), GFP_KERNEL);
-	if (!priv)
+
+	props = devm_kcalloc(&pdev->dev, num_chan, sizeof(*props), GFP_KERNEL);
+
+	if (!priv || !props)
 		return -ENOMEM;
 
 	err = of_property_read_string(np, "i2c-bus-name", &priv->bus_name);
@@ -218,8 +223,12 @@
 		}
 		priv->chan[i].parent_np = adap_np;
 
+		props[i].name = devm_kstrdup(&pdev->dev, "status", GFP_KERNEL);
+		props[i].value = devm_kstrdup(&pdev->dev, "ok", GFP_KERNEL);
+		props[i].length = 3;
+
 		of_changeset_init(&priv->chan[i].chgset);
-		of_changeset_update_property(&priv->chan[i].chgset, adap_np, &status_okay);
+		of_changeset_update_property(&priv->chan[i].chgset, adap_np, &props[i]);
 	}
 
 	priv->num_chan = num_chan;
diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index 528e755..3278ebf 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -164,7 +164,7 @@
 	/* Only select the channel if its different from the last channel */
 	if (data->last_chan != regval) {
 		ret = pca954x_reg_write(muxc->parent, client, regval);
-		data->last_chan = regval;
+		data->last_chan = ret ? 0 : regval;
 	}
 
 	return ret;
diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index 89d7820..78f148e 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -20,6 +20,8 @@
 config BMA220
     tristate "Bosch BMA220 3-Axis Accelerometer Driver"
 	depends on SPI
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
     help
       Say yes here to add support for the Bosch BMA220 triaxial
       acceleration sensor.
@@ -234,7 +236,8 @@
 config STK8BA50
 	tristate "Sensortek STK8BA50 3-Axis Accelerometer Driver"
 	depends on I2C
-	depends on IIO_TRIGGER
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
 	help
 	  Say yes here to get support for the Sensortek STK8BA50 3-axis
 	  accelerometer.
diff --git a/drivers/iio/accel/bma220_spi.c b/drivers/iio/accel/bma220_spi.c
index 1098d10..5099f29 100644
--- a/drivers/iio/accel/bma220_spi.c
+++ b/drivers/iio/accel/bma220_spi.c
@@ -253,7 +253,7 @@
 	if (ret < 0)
 		return ret;
 
-	ret = iio_triggered_buffer_setup(indio_dev, NULL,
+	ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time,
 					 bma220_trigger_handler, NULL);
 	if (ret < 0) {
 		dev_err(&spi->dev, "iio triggered buffer setup failed\n");
diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index bf17aae..59b380d 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -67,6 +67,9 @@
 #define BMC150_ACCEL_REG_PMU_BW		0x10
 #define BMC150_ACCEL_DEF_BW			125
 
+#define BMC150_ACCEL_REG_RESET			0x14
+#define BMC150_ACCEL_RESET_VAL			0xB6
+
 #define BMC150_ACCEL_REG_INT_MAP_0		0x19
 #define BMC150_ACCEL_INT_MAP_0_BIT_SLOPE	BIT(2)
 
@@ -1497,6 +1500,14 @@
 	int ret, i;
 	unsigned int val;
 
+	/*
+	 * Reset chip to get it in a known good state. A delay of 1.8ms after
+	 * reset is required according to the data sheets of supported chips.
+	 */
+	regmap_write(data->regmap, BMC150_ACCEL_REG_RESET,
+		     BMC150_ACCEL_RESET_VAL);
+	usleep_range(1800, 2500);
+
 	ret = regmap_read(data->regmap, BMC150_ACCEL_REG_CHIP_ID, &val);
 	if (ret < 0) {
 		dev_err(dev, "Error: Reading chip id\n");
diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c
index 3a9f106..9d72d4b 100644
--- a/drivers/iio/accel/kxsd9.c
+++ b/drivers/iio/accel/kxsd9.c
@@ -160,11 +160,13 @@
 		if (ret < 0)
 			goto error_ret;
 		*val = ret;
+		ret = IIO_VAL_INT;
 		break;
 	case IIO_CHAN_INFO_SCALE:
 		ret = spi_w8r8(st->us, KXSD9_READ(KXSD9_REG_CTRL_C));
 		if (ret < 0)
 			goto error_ret;
+		*val = 0;
 		*val2 = kxsd9_micro_scales[ret & KXSD9_FS_MASK];
 		ret = IIO_VAL_INT_PLUS_MICRO;
 		break;
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 1de31bd..7675772 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -389,6 +389,7 @@
 config ROCKCHIP_SARADC
 	tristate "Rockchip SARADC driver"
 	depends on ARCH_ROCKCHIP || (ARM && COMPILE_TEST)
+	depends on RESET_CONTROLLER
 	help
 	  Say yes here to build support for the SARADC found in SoCs from
 	  Rockchip.
diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c
index b616376..9704090 100644
--- a/drivers/iio/adc/ad799x.c
+++ b/drivers/iio/adc/ad799x.c
@@ -527,6 +527,7 @@
 static const struct iio_info ad7991_info = {
 	.read_raw = &ad799x_read_raw,
 	.driver_module = THIS_MODULE,
+	.update_scan_mode = ad799x_update_scan_mode,
 };
 
 static const struct iio_info ad7993_4_7_8_noirq_info = {
diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
index 52430ba..0438c68 100644
--- a/drivers/iio/adc/at91_adc.c
+++ b/drivers/iio/adc/at91_adc.c
@@ -381,8 +381,8 @@
 		st->ts_bufferedmeasure = false;
 		input_report_key(st->ts_input, BTN_TOUCH, 0);
 		input_sync(st->ts_input);
-	} else if (status & AT91_ADC_EOC(3)) {
-		/* Conversion finished */
+	} else if (status & AT91_ADC_EOC(3) && st->ts_input) {
+		/* Conversion finished and we've a touchscreen */
 		if (st->ts_bufferedmeasure) {
 			/*
 			 * Last measurement is always discarded, since it can
diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index f9ad6c2..85d7012 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -21,6 +21,8 @@
 #include <linux/of_device.h>
 #include <linux/clk.h>
 #include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/reset.h>
 #include <linux/regulator/consumer.h>
 #include <linux/iio/iio.h>
 
@@ -53,6 +55,7 @@
 	struct clk		*clk;
 	struct completion	completion;
 	struct regulator	*vref;
+	struct reset_control	*reset;
 	const struct rockchip_saradc_data *data;
 	u16			last_val;
 };
@@ -190,6 +193,16 @@
 };
 MODULE_DEVICE_TABLE(of, rockchip_saradc_match);
 
+/**
+ * Reset SARADC Controller.
+ */
+static void rockchip_saradc_reset_controller(struct reset_control *reset)
+{
+	reset_control_assert(reset);
+	usleep_range(10, 20);
+	reset_control_deassert(reset);
+}
+
 static int rockchip_saradc_probe(struct platform_device *pdev)
 {
 	struct rockchip_saradc *info = NULL;
@@ -218,6 +231,20 @@
 	if (IS_ERR(info->regs))
 		return PTR_ERR(info->regs);
 
+	/*
+	 * The reset should be an optional property, as it should work
+	 * with old devicetrees as well
+	 */
+	info->reset = devm_reset_control_get(&pdev->dev, "saradc-apb");
+	if (IS_ERR(info->reset)) {
+		ret = PTR_ERR(info->reset);
+		if (ret != -ENOENT)
+			return ret;
+
+		dev_dbg(&pdev->dev, "no reset control found\n");
+		info->reset = NULL;
+	}
+
 	init_completion(&info->completion);
 
 	irq = platform_get_irq(pdev, 0);
@@ -252,6 +279,9 @@
 		return PTR_ERR(info->vref);
 	}
 
+	if (info->reset)
+		rockchip_saradc_reset_controller(info->reset);
+
 	/*
 	 * Use a default value for the converter clock.
 	 * This may become user-configurable in the future.
diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c
index 1ef39877..066abaf 100644
--- a/drivers/iio/adc/ti-ads1015.c
+++ b/drivers/iio/adc/ti-ads1015.c
@@ -489,7 +489,8 @@
 #ifdef CONFIG_OF
 static int ads1015_get_channels_config_of(struct i2c_client *client)
 {
-	struct ads1015_data *data = i2c_get_clientdata(client);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct ads1015_data *data = iio_priv(indio_dev);
 	struct device_node *node;
 
 	if (!client->dev.of_node ||
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 8a36875..c3cfacc 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -32,6 +32,7 @@
 
 struct tiadc_device {
 	struct ti_tscadc_dev *mfd_tscadc;
+	struct mutex fifo1_lock; /* to protect fifo access */
 	int channels;
 	u8 channel_line[8];
 	u8 channel_step[8];
@@ -359,6 +360,7 @@
 		int *val, int *val2, long mask)
 {
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
+	int ret = IIO_VAL_INT;
 	int i, map_val;
 	unsigned int fifo1count, read, stepid;
 	bool found = false;
@@ -372,13 +374,14 @@
 	if (!step_en)
 		return -EINVAL;
 
+	mutex_lock(&adc_dev->fifo1_lock);
 	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
 	while (fifo1count--)
 		tiadc_readl(adc_dev, REG_FIFO1);
 
 	am335x_tsc_se_set_once(adc_dev->mfd_tscadc, step_en);
 
-	timeout = jiffies + usecs_to_jiffies
+	timeout = jiffies + msecs_to_jiffies
 				(IDLE_TIMEOUT * adc_dev->channels);
 	/* Wait for Fifo threshold interrupt */
 	while (1) {
@@ -388,7 +391,8 @@
 
 		if (time_after(jiffies, timeout)) {
 			am335x_tsc_se_adc_done(adc_dev->mfd_tscadc);
-			return -EAGAIN;
+			ret = -EAGAIN;
+			goto err_unlock;
 		}
 	}
 	map_val = adc_dev->channel_step[chan->scan_index];
@@ -414,8 +418,11 @@
 	am335x_tsc_se_adc_done(adc_dev->mfd_tscadc);
 
 	if (found == false)
-		return -EBUSY;
-	return IIO_VAL_INT;
+		ret =  -EBUSY;
+
+err_unlock:
+	mutex_unlock(&adc_dev->fifo1_lock);
+	return ret;
 }
 
 static const struct iio_info tiadc_info = {
@@ -483,6 +490,7 @@
 
 	tiadc_step_config(indio_dev);
 	tiadc_writel(adc_dev, REG_FIFO1THR, FIFO1_THRESHOLD);
+	mutex_init(&adc_dev->fifo1_lock);
 
 	err = tiadc_channel_init(indio_dev, adc_dev->channels);
 	if (err < 0)
diff --git a/drivers/iio/chemical/atlas-ph-sensor.c b/drivers/iio/chemical/atlas-ph-sensor.c
index ae038a5..407f141 100644
--- a/drivers/iio/chemical/atlas-ph-sensor.c
+++ b/drivers/iio/chemical/atlas-ph-sensor.c
@@ -434,7 +434,7 @@
 			break;
 		case IIO_ELECTRICALCONDUCTIVITY:
 			*val = 1; /* 0.00001 */
-			*val = 100000;
+			*val2 = 100000;
 			break;
 		case IIO_CONCENTRATION:
 			*val = 0; /* 0.000000001 */
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index e81f434..dc33c1d 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -56,8 +56,8 @@
 	{HID_USAGE_SENSOR_ALS, 0, 1, 0},
 	{HID_USAGE_SENSOR_ALS, HID_USAGE_SENSOR_UNITS_LUX, 1, 0},
 
-	{HID_USAGE_SENSOR_PRESSURE, 0, 100000, 0},
-	{HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 1, 0},
+	{HID_USAGE_SENSOR_PRESSURE, 0, 100, 0},
+	{HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 0, 1000},
 };
 
 static int pow_10(unsigned power)
diff --git a/drivers/iio/dac/stx104.c b/drivers/iio/dac/stx104.c
index 792a971..bebbd00 100644
--- a/drivers/iio/dac/stx104.c
+++ b/drivers/iio/dac/stx104.c
@@ -65,6 +65,16 @@
 	unsigned int out_state;
 };
 
+/**
+ * struct stx104_dev - STX104 device private data structure
+ * @indio_dev:	IIO device
+ * @chip:	instance of the gpio_chip
+ */
+struct stx104_dev {
+	struct iio_dev *indio_dev;
+	struct gpio_chip *chip;
+};
+
 static int stx104_read_raw(struct iio_dev *indio_dev,
 	struct iio_chan_spec const *chan, int *val, int *val2, long mask)
 {
@@ -107,6 +117,7 @@
 static int stx104_gpio_get_direction(struct gpio_chip *chip,
 	unsigned int offset)
 {
+	/* GPIO 0-3 are input only, while the rest are output only */
 	if (offset < 4)
 		return 1;
 
@@ -169,6 +180,7 @@
 	struct iio_dev *indio_dev;
 	struct stx104_iio *priv;
 	struct stx104_gpio *stx104gpio;
+	struct stx104_dev *stx104dev;
 	int err;
 
 	indio_dev = devm_iio_device_alloc(dev, sizeof(*priv));
@@ -179,6 +191,10 @@
 	if (!stx104gpio)
 		return -ENOMEM;
 
+	stx104dev = devm_kzalloc(dev, sizeof(*stx104dev), GFP_KERNEL);
+	if (!stx104dev)
+		return -ENOMEM;
+
 	if (!devm_request_region(dev, base[id], STX104_EXTENT,
 		dev_name(dev))) {
 		dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n",
@@ -199,12 +215,6 @@
 	outw(0, base[id] + 4);
 	outw(0, base[id] + 6);
 
-	err = devm_iio_device_register(dev, indio_dev);
-	if (err) {
-		dev_err(dev, "IIO device registering failed (%d)\n", err);
-		return err;
-	}
-
 	stx104gpio->chip.label = dev_name(dev);
 	stx104gpio->chip.parent = dev;
 	stx104gpio->chip.owner = THIS_MODULE;
@@ -220,7 +230,9 @@
 
 	spin_lock_init(&stx104gpio->lock);
 
-	dev_set_drvdata(dev, stx104gpio);
+	stx104dev->indio_dev = indio_dev;
+	stx104dev->chip = &stx104gpio->chip;
+	dev_set_drvdata(dev, stx104dev);
 
 	err = gpiochip_add_data(&stx104gpio->chip, stx104gpio);
 	if (err) {
@@ -228,14 +240,22 @@
 		return err;
 	}
 
+	err = iio_device_register(indio_dev);
+	if (err) {
+		dev_err(dev, "IIO device registering failed (%d)\n", err);
+		gpiochip_remove(&stx104gpio->chip);
+		return err;
+	}
+
 	return 0;
 }
 
 static int stx104_remove(struct device *dev, unsigned int id)
 {
-	struct stx104_gpio *const stx104gpio = dev_get_drvdata(dev);
+	struct stx104_dev *const stx104dev = dev_get_drvdata(dev);
 
-	gpiochip_remove(&stx104gpio->chip);
+	iio_device_unregister(stx104dev->indio_dev);
+	gpiochip_remove(stx104dev->chip);
 
 	return 0;
 }
diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig
index 738a86d..d041243 100644
--- a/drivers/iio/humidity/Kconfig
+++ b/drivers/iio/humidity/Kconfig
@@ -6,6 +6,8 @@
 config AM2315
     tristate "Aosong AM2315 relative humidity and temperature sensor"
     depends on I2C
+    select IIO_BUFFER
+    select IIO_TRIGGERED_BUFFER
     help
       If you say yes here you get support for the Aosong AM2315
       relative humidity and ambient temperature sensor.
diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c
index 3e200f6..ff96b6d 100644
--- a/drivers/iio/humidity/am2315.c
+++ b/drivers/iio/humidity/am2315.c
@@ -244,7 +244,7 @@
 	indio_dev->channels = am2315_channels;
 	indio_dev->num_channels = ARRAY_SIZE(am2315_channels);
 
-	ret = iio_triggered_buffer_setup(indio_dev, NULL,
+	ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time,
 					 am2315_trigger_handler, NULL);
 	if (ret < 0) {
 		dev_err(&client->dev, "iio triggered buffer setup failed\n");
diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
index a03832a..e0c9c70 100644
--- a/drivers/iio/humidity/hdc100x.c
+++ b/drivers/iio/humidity/hdc100x.c
@@ -142,7 +142,7 @@
 	struct i2c_client *client = data->client;
 	int delay = data->adc_int_us[chan->address];
 	int ret;
-	int val;
+	__be16 val;
 
 	/* start measurement */
 	ret = i2c_smbus_write_byte(client, chan->address);
@@ -154,26 +154,13 @@
 	/* wait for integration time to pass */
 	usleep_range(delay, delay + 1000);
 
-	/*
-	 * i2c_smbus_read_word_data cannot() be used here due to the command
-	 * value not being understood and causes NAKs preventing any reading
-	 * from being accessed.
-	 */
-	ret = i2c_smbus_read_byte(client);
+	/* read measurement */
+	ret = i2c_master_recv(data->client, (char *)&val, sizeof(val));
 	if (ret < 0) {
-		dev_err(&client->dev, "cannot read high byte measurement");
+		dev_err(&client->dev, "cannot read sensor data\n");
 		return ret;
 	}
-	val = ret << 8;
-
-	ret = i2c_smbus_read_byte(client);
-	if (ret < 0) {
-		dev_err(&client->dev, "cannot read low byte measurement");
-		return ret;
-	}
-	val |= ret;
-
-	return val;
+	return be16_to_cpu(val);
 }
 
 static int hdc100x_get_heater_status(struct hdc100x_data *data)
@@ -272,8 +259,8 @@
 	struct iio_dev *indio_dev;
 	struct hdc100x_data *data;
 
-	if (!i2c_check_functionality(client->adapter,
-				I2C_FUNC_SMBUS_WORD_DATA | I2C_FUNC_SMBUS_BYTE))
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA |
+				     I2C_FUNC_SMBUS_BYTE | I2C_FUNC_I2C))
 		return -EOPNOTSUPP;
 
 	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 90462fc..158aaf4 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -107,9 +107,10 @@
 {
 	struct iio_dev *indio_dev = filp->private_data;
 	struct iio_buffer *rb = indio_dev->buffer;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 	size_t datum_size;
 	size_t to_wait;
-	int ret;
+	int ret = 0;
 
 	if (!indio_dev->info)
 		return -ENODEV;
@@ -131,19 +132,29 @@
 	else
 		to_wait = min_t(size_t, n / datum_size, rb->watermark);
 
+	add_wait_queue(&rb->pollq, &wait);
 	do {
-		ret = wait_event_interruptible(rb->pollq,
-		      iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size));
-		if (ret)
-			return ret;
+		if (!indio_dev->info) {
+			ret = -ENODEV;
+			break;
+		}
 
-		if (!indio_dev->info)
-			return -ENODEV;
+		if (!iio_buffer_ready(indio_dev, rb, to_wait, n / datum_size)) {
+			if (signal_pending(current)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
+
+			wait_woken(&wait, TASK_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+			continue;
+		}
 
 		ret = rb->access->read_first_n(rb, n, buf);
 		if (ret == 0 && (filp->f_flags & O_NONBLOCK))
 			ret = -EAGAIN;
-	 } while (ret == 0);
+	} while (ret == 0);
+	remove_wait_queue(&rb->pollq, &wait);
 
 	return ret;
 }
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index f914d5d..d2b8899 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -613,9 +613,8 @@
 			return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
 	case IIO_VAL_FRACTIONAL:
 		tmp = div_s64((s64)vals[0] * 1000000000LL, vals[1]);
-		vals[1] = do_div(tmp, 1000000000LL);
-		vals[0] = tmp;
-		return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
+		vals[0] = (int)div_s64_rem(tmp, 1000000000, &vals[1]);
+		return sprintf(buf, "%d.%09u\n", vals[0], abs(vals[1]));
 	case IIO_VAL_FRACTIONAL_LOG2:
 		tmp = (s64)vals[0] * 1000000000LL >> vals[1];
 		vals[1] = do_div(tmp, 1000000000LL);
diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig
index 7c566f5..3574945 100644
--- a/drivers/iio/light/Kconfig
+++ b/drivers/iio/light/Kconfig
@@ -76,7 +76,6 @@
 config BH1780
 	tristate "ROHM BH1780 ambient light sensor"
 	depends on I2C
-	depends on !SENSORS_BH1780
 	help
 	 Say Y here to build support for the ROHM BH1780GLI ambient
 	 light sensor.
@@ -238,6 +237,8 @@
 	tristate "MAX44000 Ambient and Infrared Proximity Sensor"
 	depends on I2C
 	select REGMAP_I2C
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
 	help
 	 Say Y here if you want to build support for Maxim Integrated's
 	 MAX44000 ambient and infrared proximity sensor device.
diff --git a/drivers/iio/pressure/Kconfig b/drivers/iio/pressure/Kconfig
index d130cdc..7fa65ab 100644
--- a/drivers/iio/pressure/Kconfig
+++ b/drivers/iio/pressure/Kconfig
@@ -8,8 +8,6 @@
 config BMP280
 	tristate "Bosch Sensortec BMP180/BMP280 pressure sensor I2C driver"
 	depends on (I2C || SPI_MASTER)
-	depends on !(BMP085_I2C=y || BMP085_I2C=m)
-	depends on !(BMP085_SPI=y || BMP085_SPI=m)
 	select REGMAP
 	select BMP280_I2C if (I2C)
 	select BMP280_SPI if (SPI_MASTER)
diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c
index 6943688..e5a533c 100644
--- a/drivers/iio/pressure/bmp280-core.c
+++ b/drivers/iio/pressure/bmp280-core.c
@@ -970,7 +970,7 @@
 	data->vdda = devm_regulator_get(dev, "vdda");
 	if (IS_ERR(data->vdda)) {
 		dev_err(dev, "failed to get VDDA regulator\n");
-		ret = PTR_ERR(data->vddd);
+		ret = PTR_ERR(data->vdda);
 		goto out_disable_vddd;
 	}
 	ret = regulator_enable(data->vdda);
@@ -1079,7 +1079,8 @@
 #ifdef CONFIG_PM
 static int bmp280_runtime_suspend(struct device *dev)
 {
-	struct bmp280_data *data = dev_get_drvdata(dev);
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct bmp280_data *data = iio_priv(indio_dev);
 	int ret;
 
 	ret = regulator_disable(data->vdda);
@@ -1090,7 +1091,8 @@
 
 static int bmp280_runtime_resume(struct device *dev)
 {
-	struct bmp280_data *data = dev_get_drvdata(dev);
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct bmp280_data *data = iio_priv(indio_dev);
 	int ret;
 
 	ret = regulator_enable(data->vddd);
diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index 2e3a70e..5656deb 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c
@@ -397,7 +397,7 @@
 		return ret;
 	}
 
-	ret = iio_triggered_buffer_setup(indio_dev, NULL,
+	ret = iio_triggered_buffer_setup(indio_dev, iio_pollfunc_store_time,
 		&as3935_trigger_handler, NULL);
 
 	if (ret) {
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e6dfa1b..5f65a78 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2462,18 +2462,24 @@
 
 	if (addr->dev_addr.bound_dev_if) {
 		ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
-		if (!ndev)
-			return -ENODEV;
+		if (!ndev) {
+			ret = -ENODEV;
+			goto err2;
+		}
 
 		if (ndev->flags & IFF_LOOPBACK) {
 			dev_put(ndev);
-			if (!id_priv->id.device->get_netdev)
-				return -EOPNOTSUPP;
+			if (!id_priv->id.device->get_netdev) {
+				ret = -EOPNOTSUPP;
+				goto err2;
+			}
 
 			ndev = id_priv->id.device->get_netdev(id_priv->id.device,
 							      id_priv->id.port_num);
-			if (!ndev)
-				return -ENODEV;
+			if (!ndev) {
+				ret = -ENODEV;
+				goto err2;
+			}
 		}
 
 		route->path_rec->net = &init_net;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 3a3c5d7..51c79b2 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -106,7 +106,6 @@
 	atomic_t		refcount;
 	enum mcast_group_state	state;
 	struct ib_sa_query	*query;
-	int			query_id;
 	u16			pkey_index;
 	u8			leave_state;
 	int			retries;
@@ -340,11 +339,7 @@
 				       member->multicast.comp_mask,
 				       3000, GFP_KERNEL, join_handler, group,
 				       &group->query);
-	if (ret >= 0) {
-		group->query_id = ret;
-		ret = 0;
-	}
-	return ret;
+	return (ret > 0) ? 0 : ret;
 }
 
 static int send_leave(struct mcast_group *group, u8 leave_state)
@@ -364,11 +359,7 @@
 				       IB_SA_MCMEMBER_REC_JOIN_STATE,
 				       3000, GFP_KERNEL, leave_handler,
 				       group, &group->query);
-	if (ret >= 0) {
-		group->query_id = ret;
-		ret = 0;
-	}
-	return ret;
+	return (ret > 0) ? 0 : ret;
 }
 
 static void join_group(struct mcast_group *group, struct mcast_member *member,
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index 23f38cf..afe8b28 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_CXGB4
 	tristate "Chelsio T4/T5 RDMA Driver"
 	depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n)
+	select CHELSIO_LIB
 	select GENERIC_ALLOCATOR
 	---help---
 	  This is an iWARP/RDMA driver for the Chelsio T4 and T5
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index e11cf72..fa40b685 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -1,4 +1,5 @@
 ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
 
 obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
 
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 3aca7f6..71c8867 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -49,6 +49,7 @@
 
 #include <rdma/ib_addr.h>
 
+#include <libcxgb_cm.h>
 #include "iw_cxgb4.h"
 #include "clip_tbl.h"
 
@@ -239,15 +240,13 @@
 
 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
 {
-	struct cpl_tid_release *req;
+	u32 len = roundup(sizeof(struct cpl_tid_release), 16);
 
-	skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+	skb = get_skb(skb, len, GFP_KERNEL);
 	if (!skb)
 		return;
-	req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
-	INIT_TP_WR(req, hwtid);
-	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
-	set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+
+	cxgb_mk_tid_release(skb, len, hwtid, 0);
 	c4iw_ofld_send(rdev, skb);
 	return;
 }
@@ -333,6 +332,8 @@
 
 	spin_lock_irqsave(&ep->com.dev->lock, flags);
 	_remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
+	if (idr_is_empty(&ep->com.dev->hwtid_idr))
+		wake_up(&ep->com.dev->wait);
 	spin_unlock_irqrestore(&ep->com.dev->lock, flags);
 }
 
@@ -464,72 +465,6 @@
 	return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
 }
 
-static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
-{
-	int i;
-
-	egress_dev = get_real_dev(egress_dev);
-	for (i = 0; i < dev->rdev.lldi.nports; i++)
-		if (dev->rdev.lldi.ports[i] == egress_dev)
-			return 1;
-	return 0;
-}
-
-static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
-				     __u8 *peer_ip, __be16 local_port,
-				     __be16 peer_port, u8 tos,
-				     __u32 sin6_scope_id)
-{
-	struct dst_entry *dst = NULL;
-
-	if (IS_ENABLED(CONFIG_IPV6)) {
-		struct flowi6 fl6;
-
-		memset(&fl6, 0, sizeof(fl6));
-		memcpy(&fl6.daddr, peer_ip, 16);
-		memcpy(&fl6.saddr, local_ip, 16);
-		if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
-			fl6.flowi6_oif = sin6_scope_id;
-		dst = ip6_route_output(&init_net, NULL, &fl6);
-		if (!dst)
-			goto out;
-		if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
-		    !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
-			dst_release(dst);
-			dst = NULL;
-		}
-	}
-
-out:
-	return dst;
-}
-
-static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
-				 __be32 peer_ip, __be16 local_port,
-				 __be16 peer_port, u8 tos)
-{
-	struct rtable *rt;
-	struct flowi4 fl4;
-	struct neighbour *n;
-
-	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
-				   peer_port, local_port, IPPROTO_TCP,
-				   tos, 0);
-	if (IS_ERR(rt))
-		return NULL;
-	n = dst_neigh_lookup(&rt->dst, &peer_ip);
-	if (!n)
-		return NULL;
-	if (!our_interface(dev, n->dev) &&
-	    !(n->dev->flags & IFF_LOOPBACK)) {
-		neigh_release(n);
-		dst_release(&rt->dst);
-		return NULL;
-	}
-	neigh_release(n);
-	return &rt->dst;
-}
-
 static void arp_failure_discard(void *handle, struct sk_buff *skb)
 {
 	pr_err(MOD "ARP failure\n");
@@ -704,58 +639,34 @@
 
 static int send_halfclose(struct c4iw_ep *ep)
 {
-	struct cpl_close_con_req *req;
 	struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
-	int wrlen = roundup(sizeof *req, 16);
+	u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
 
 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 	if (WARN_ON(!skb))
 		return -ENOMEM;
 
-	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
-	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
-	req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
-	memset(req, 0, wrlen);
-	INIT_TP_WR(req, ep->hwtid);
-	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
-						    ep->hwtid));
+	cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
+			      NULL, arp_failure_discard);
+
 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 
 static int send_abort(struct c4iw_ep *ep)
 {
-	struct cpl_abort_req *req;
-	int wrlen = roundup(sizeof *req, 16);
+	u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
 	struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
 
 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
 	if (WARN_ON(!req_skb))
 		return -ENOMEM;
 
-	set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
-	t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
-	req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
-	memset(req, 0, wrlen);
-	INIT_TP_WR(req, ep->hwtid);
-	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
-	req->cmd = CPL_ABORT_SEND_RST;
+	cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
+			  ep, abort_arp_failure);
+
 	return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
 }
 
-static void best_mtu(const unsigned short *mtus, unsigned short mtu,
-		     unsigned int *idx, int use_ts, int ipv6)
-{
-	unsigned short hdr_size = (ipv6 ?
-				   sizeof(struct ipv6hdr) :
-				   sizeof(struct iphdr)) +
-				  sizeof(struct tcphdr) +
-				  (use_ts ?
-				   round_up(TCPOLEN_TIMESTAMP, 4) : 0);
-	unsigned short data_size = mtu - hdr_size;
-
-	cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
-}
-
 static int send_connect(struct c4iw_ep *ep)
 {
 	struct cpl_act_open_req *req = NULL;
@@ -768,7 +679,7 @@
 	u64 opt0;
 	u32 opt2;
 	unsigned int mtu_idx;
-	int wscale;
+	u32 wscale;
 	int win, sizev4, sizev6, wrlen;
 	struct sockaddr_in *la = (struct sockaddr_in *)
 				 &ep->com.local_addr;
@@ -815,10 +726,10 @@
 	}
 	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
 
-	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
-		 enable_tcp_timestamps,
-		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
-	wscale = compute_wscale(rcv_win);
+	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+		      enable_tcp_timestamps,
+		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+	wscale = cxgb_compute_wscale(rcv_win);
 
 	/*
 	 * Specify the largest window that will fit in opt0. The
@@ -1445,9 +1356,9 @@
 
 static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
 {
-	struct cpl_rx_data_ack *req;
 	struct sk_buff *skb;
-	int wrlen = roundup(sizeof *req, 16);
+	u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
+	u32 credit_dack;
 
 	PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -1464,15 +1375,12 @@
 	if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
 		credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
 
-	req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
-	memset(req, 0, wrlen);
-	INIT_TP_WR(req, ep->hwtid);
-	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
-						    ep->hwtid));
-	req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
-				       RX_DACK_CHANGE_F |
-				       RX_DACK_MODE_V(dack_mode));
-	set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
+	credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
+		      RX_DACK_MODE_V(dack_mode);
+
+	cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
+			    credit_dack);
+
 	c4iw_ofld_send(&ep->com.dev->rdev, skb);
 	return credits;
 }
@@ -1827,8 +1735,12 @@
 				(ep->mpa_pkt + sizeof(*mpa));
 			ep->ird = ntohs(mpa_v2_params->ird) &
 				MPA_V2_IRD_ORD_MASK;
+			ep->ird = min_t(u32, ep->ird,
+					cur_max_read_depth(ep->com.dev));
 			ep->ord = ntohs(mpa_v2_params->ord) &
 				MPA_V2_IRD_ORD_MASK;
+			ep->ord = min_t(u32, ep->ord,
+					cur_max_read_depth(ep->com.dev));
 			PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird,
 			     ep->ord);
 			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
@@ -1966,7 +1878,7 @@
 	struct sk_buff *skb;
 	struct fw_ofld_connection_wr *req;
 	unsigned int mtu_idx;
-	int wscale;
+	u32 wscale;
 	struct sockaddr_in *sin;
 	int win;
 
@@ -1991,10 +1903,10 @@
 			htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
 	req->tcb.tx_max = (__force __be32) jiffies;
 	req->tcb.rcv_adv = htons(1);
-	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
-		 enable_tcp_timestamps,
-		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
-	wscale = compute_wscale(rcv_win);
+	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+		      enable_tcp_timestamps,
+		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+	wscale = cxgb_compute_wscale(rcv_win);
 
 	/*
 	 * Specify the largest window that will fit in opt0. The
@@ -2048,15 +1960,6 @@
 		status != CPL_ERR_CONN_EXIST);
 }
 
-/* Returns whether a CPL status conveys negative advice.
- */
-static int is_neg_adv(unsigned int status)
-{
-	return status == CPL_ERR_RTX_NEG_ADVICE ||
-	       status == CPL_ERR_PERSIST_NEG_ADVICE ||
-	       status == CPL_ERR_KEEPALV_NEG_ADVICE;
-}
-
 static char *neg_adv_str(unsigned int status)
 {
 	switch (status) {
@@ -2113,8 +2016,10 @@
 		}
 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
 					n, pdev, rt_tos2priority(tos));
-		if (!ep->l2t)
+		if (!ep->l2t) {
+			dev_put(pdev);
 			goto out;
+		}
 		ep->mtu = pdev->mtu;
 		ep->tx_chan = cxgb4_port_chan(pdev);
 		ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
@@ -2210,16 +2115,21 @@
 
 	/* find a route */
 	if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
-		ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
-				     raddr->sin_addr.s_addr, laddr->sin_port,
-				     raddr->sin_port, ep->com.cm_id->tos);
+		ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
+					  laddr->sin_addr.s_addr,
+					  raddr->sin_addr.s_addr,
+					  laddr->sin_port,
+					  raddr->sin_port, ep->com.cm_id->tos);
 		iptype = 4;
 		ra = (__u8 *)&raddr->sin_addr;
 	} else {
-		ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
-				      raddr6->sin6_addr.s6_addr,
-				      laddr6->sin6_port, raddr6->sin6_port, 0,
-				      raddr6->sin6_scope_id);
+		ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
+					   get_real_dev,
+					   laddr6->sin6_addr.s6_addr,
+					   raddr6->sin6_addr.s6_addr,
+					   laddr6->sin6_port,
+					   raddr6->sin6_port, 0,
+					   raddr6->sin6_scope_id);
 		iptype = 6;
 		ra = (__u8 *)&raddr6->sin6_addr;
 	}
@@ -2291,7 +2201,7 @@
 	PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
 	     status, status2errno(status));
 
-	if (is_neg_adv(status)) {
+	if (cxgb_is_neg_adv(status)) {
 		PDBG("%s Connection problems for atid %u status %u (%s)\n",
 		     __func__, atid, status, neg_adv_str(status));
 		ep->stats.connect_neg_adv++;
@@ -2418,7 +2328,7 @@
 	unsigned int mtu_idx;
 	u64 opt0;
 	u32 opt2;
-	int wscale;
+	u32 wscale;
 	struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
 	int win;
 	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
@@ -2439,10 +2349,10 @@
 	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
 						    ep->hwtid));
 
-	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
-		 enable_tcp_timestamps && req->tcpopt.tstamp,
-		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
-	wscale = compute_wscale(rcv_win);
+	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+		      enable_tcp_timestamps && req->tcpopt.tstamp,
+		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+	wscale = cxgb_compute_wscale(rcv_win);
 
 	/*
 	 * Specify the largest window that will fit in opt0. The
@@ -2514,42 +2424,6 @@
 	return;
 }
 
-static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
-		       int *iptype, __u8 *local_ip, __u8 *peer_ip,
-		       __be16 *local_port, __be16 *peer_port)
-{
-	int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
-		      ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
-		      T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-	int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
-		     IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
-		     T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-	struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
-	struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
-	struct tcphdr *tcp = (struct tcphdr *)
-			     ((u8 *)(req + 1) + eth_len + ip_len);
-
-	if (ip->version == 4) {
-		PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
-		     ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
-		     ntohs(tcp->dest));
-		*iptype = 4;
-		memcpy(peer_ip, &ip->saddr, 4);
-		memcpy(local_ip, &ip->daddr, 4);
-	} else {
-		PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
-		     ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
-		     ntohs(tcp->dest));
-		*iptype = 6;
-		memcpy(peer_ip, ip6->saddr.s6_addr, 16);
-		memcpy(local_ip, ip6->daddr.s6_addr, 16);
-	}
-	*peer_port = tcp->source;
-	*local_port = tcp->dest;
-
-	return;
-}
-
 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct c4iw_ep *child_ep = NULL, *parent_ep;
@@ -2578,8 +2452,8 @@
 		goto reject;
 	}
 
-	get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
-		   local_ip, peer_ip, &local_port, &peer_port);
+	cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
+			&iptype, local_ip, peer_ip, &local_port, &peer_port);
 
 	/* Find output route */
 	if (iptype == 4)  {
@@ -2587,18 +2461,19 @@
 		     , __func__, parent_ep, hwtid,
 		     local_ip, peer_ip, ntohs(local_port),
 		     ntohs(peer_port), peer_mss);
-		dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
-				 local_port, peer_port,
-				 tos);
+		dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+				      *(__be32 *)local_ip, *(__be32 *)peer_ip,
+				      local_port, peer_port, tos);
 	} else {
 		PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
 		     , __func__, parent_ep, hwtid,
 		     local_ip, peer_ip, ntohs(local_port),
 		     ntohs(peer_port), peer_mss);
-		dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
-				  PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
-				  ((struct sockaddr_in6 *)
-				  &parent_ep->com.local_addr)->sin6_scope_id);
+		dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+				local_ip, peer_ip, local_port, peer_port,
+				PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+				((struct sockaddr_in6 *)
+				 &parent_ep->com.local_addr)->sin6_scope_id);
 	}
 	if (!dst) {
 		printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
@@ -2831,18 +2706,18 @@
 {
 	struct cpl_abort_req_rss *req = cplhdr(skb);
 	struct c4iw_ep *ep;
-	struct cpl_abort_rpl *rpl;
 	struct sk_buff *rpl_skb;
 	struct c4iw_qp_attributes attrs;
 	int ret;
 	int release = 0;
 	unsigned int tid = GET_TID(req);
+	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
 	ep = get_ep_from_tid(dev, tid);
 	if (!ep)
 		return 0;
 
-	if (is_neg_adv(req->status)) {
+	if (cxgb_is_neg_adv(req->status)) {
 		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
 		     __func__, ep->hwtid, req->status,
 		     neg_adv_str(req->status));
@@ -2935,11 +2810,9 @@
 		release = 1;
 		goto out;
 	}
-	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
-	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
-	INIT_TP_WR(rpl, ep->hwtid);
-	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
-	rpl->cmd = CPL_ABORT_NO_RST;
+
+	cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
+
 	c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
 out:
 	if (release)
@@ -3136,7 +3009,7 @@
 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
 		if (conn_param->ord > ep->ird) {
 			if (RELAXED_IRD_NEGOTIATION) {
-				ep->ord = ep->ird;
+				conn_param->ord = ep->ird;
 			} else {
 				ep->ird = conn_param->ird;
 				ep->ord = conn_param->ord;
@@ -3371,9 +3244,11 @@
 		PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
 		     __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
 		     ra, ntohs(raddr->sin_port));
-		ep->dst = find_route(dev, laddr->sin_addr.s_addr,
-				     raddr->sin_addr.s_addr, laddr->sin_port,
-				     raddr->sin_port, cm_id->tos);
+		ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+					  laddr->sin_addr.s_addr,
+					  raddr->sin_addr.s_addr,
+					  laddr->sin_port,
+					  raddr->sin_port, cm_id->tos);
 	} else {
 		iptype = 6;
 		ra = (__u8 *)&raddr6->sin6_addr;
@@ -3392,10 +3267,12 @@
 		     __func__, laddr6->sin6_addr.s6_addr,
 		     ntohs(laddr6->sin6_port),
 		     raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
-		ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
-				      raddr6->sin6_addr.s6_addr,
-				      laddr6->sin6_port, raddr6->sin6_port, 0,
-				      raddr6->sin6_scope_id);
+		ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+					   laddr6->sin6_addr.s6_addr,
+					   raddr6->sin6_addr.s6_addr,
+					   laddr6->sin6_port,
+					   raddr6->sin6_port, 0,
+					   raddr6->sin6_scope_id);
 	}
 	if (!ep->dst) {
 		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
@@ -4037,8 +3914,9 @@
 	     ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
 	     ntohs(tcph->source), iph->tos);
 
-	dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
-			 iph->tos);
+	dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+			      iph->daddr, iph->saddr, tcph->dest,
+			      tcph->source, iph->tos);
 	if (!dst) {
 		pr_err("%s - failed to find dst entry!\n",
 		       __func__);
@@ -4313,7 +4191,7 @@
 		kfree_skb(skb);
 		return 0;
 	}
-	if (is_neg_adv(req->status)) {
+	if (cxgb_is_neg_adv(req->status)) {
 		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
 		     __func__, ep->hwtid, req->status,
 		     neg_adv_str(req->status));
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 812ab72..ac926c9 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -1016,15 +1016,15 @@
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
 	struct c4iw_cq *chp;
-	int ret;
+	int ret = 0;
 	unsigned long flag;
 
 	chp = to_c4iw_cq(ibcq);
 	spin_lock_irqsave(&chp->lock, flag);
-	ret = t4_arm_cq(&chp->cq,
-			(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+	t4_arm_cq(&chp->cq,
+		  (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED);
+	if (flags & IB_CQ_REPORT_MISSED_EVENTS)
+		ret = t4_cq_notempty(&chp->cq);
 	spin_unlock_irqrestore(&chp->lock, flag);
-	if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS))
-		ret = 0;
 	return ret;
 }
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 071d733..93e3d27 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -872,9 +872,13 @@
 static void c4iw_dealloc(struct uld_ctx *ctx)
 {
 	c4iw_rdev_close(&ctx->dev->rdev);
+	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
 	idr_destroy(&ctx->dev->cqidr);
+	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
 	idr_destroy(&ctx->dev->qpidr);
+	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
 	idr_destroy(&ctx->dev->mmidr);
+	wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
 	idr_destroy(&ctx->dev->hwtid_idr);
 	idr_destroy(&ctx->dev->stid_idr);
 	idr_destroy(&ctx->dev->atid_idr);
@@ -992,6 +996,7 @@
 	mutex_init(&devp->rdev.stats.lock);
 	mutex_init(&devp->db_mutex);
 	INIT_LIST_HEAD(&devp->db_fc_list);
+	init_waitqueue_head(&devp->wait);
 	devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
 
 	if (c4iw_debugfs_root) {
@@ -1475,6 +1480,10 @@
 
 static struct cxgb4_uld_info c4iw_uld_info = {
 	.name = DRV_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	.rxq_size = 511,
+	.ciq = true,
+	.lro = false,
 	.add = c4iw_uld_add,
 	.rx_handler = c4iw_uld_rx_handler,
 	.state_change = c4iw_uld_state_change,
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index aa47e0a..cdcf3ee 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -263,6 +263,7 @@
 	struct idr stid_idr;
 	struct list_head db_fc_list;
 	u32 avail_ird;
+	wait_queue_head_t wait;
 };
 
 static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -881,15 +882,6 @@
 	return cm_id->provider_data;
 }
 
-static inline int compute_wscale(int win)
-{
-	int wscale = 0;
-
-	while (wscale < 14 && (65535<<wscale) < win)
-		wscale++;
-	return wscale;
-}
-
 static inline int ocqp_supported(const struct cxgb4_lld_info *infop)
 {
 #if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index edb1172..6904352 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -683,7 +683,7 @@
 	return 0;
 }
 
-void _free_qp(struct kref *kref)
+static void _free_qp(struct kref *kref)
 {
 	struct c4iw_qp *qhp;
 
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 6126bbe..02173f4 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -634,6 +634,11 @@
 	return (CQE_GENBIT(cqe) == cq->gen);
 }
 
+static inline int t4_cq_notempty(struct t4_cq *cq)
+{
+	return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]);
+}
+
 static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
 {
 	int ret;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 79575ee..a26a9a0 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -47,7 +47,7 @@
 #include <linux/topology.h>
 #include <linux/cpumask.h>
 #include <linux/module.h>
-#include <linux/cpumask.h>
+#include <linux/interrupt.h>
 
 #include "hfi.h"
 #include "affinity.h"
@@ -56,7 +56,7 @@
 
 struct hfi1_affinity_node_list node_affinity = {
 	.list = LIST_HEAD_INIT(node_affinity.list),
-	.lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
+	.lock = __MUTEX_INITIALIZER(node_affinity.lock)
 };
 
 /* Name of IRQ types, indexed by enum irq_type */
@@ -160,14 +160,14 @@
 	struct list_head *pos, *q;
 	struct hfi1_affinity_node *entry;
 
-	spin_lock(&node_affinity.lock);
+	mutex_lock(&node_affinity.lock);
 	list_for_each_safe(pos, q, &node_affinity.list) {
 		entry = list_entry(pos, struct hfi1_affinity_node,
 				   list);
 		list_del(pos);
 		kfree(entry);
 	}
-	spin_unlock(&node_affinity.lock);
+	mutex_unlock(&node_affinity.lock);
 	kfree(hfi1_per_node_cntr);
 }
 
@@ -234,9 +234,8 @@
 	if (cpumask_first(local_mask) >= nr_cpu_ids)
 		local_mask = topology_core_cpumask(0);
 
-	spin_lock(&node_affinity.lock);
+	mutex_lock(&node_affinity.lock);
 	entry = node_affinity_lookup(dd->node);
-	spin_unlock(&node_affinity.lock);
 
 	/*
 	 * If this is the first time this NUMA node's affinity is used,
@@ -247,6 +246,7 @@
 		if (!entry) {
 			dd_dev_err(dd,
 				   "Unable to allocate global affinity node\n");
+			mutex_unlock(&node_affinity.lock);
 			return -ENOMEM;
 		}
 		init_cpu_mask_set(&entry->def_intr);
@@ -303,15 +303,113 @@
 					     &entry->general_intr_mask);
 		}
 
-		spin_lock(&node_affinity.lock);
 		node_affinity_add_tail(entry);
-		spin_unlock(&node_affinity.lock);
 	}
-
+	mutex_unlock(&node_affinity.lock);
 	return 0;
 }
 
-int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+/*
+ * Function updates the irq affinity hint for msix after it has been changed
+ * by the user using the /proc/irq interface. This function only accepts
+ * one cpu in the mask.
+ */
+static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
+{
+	struct sdma_engine *sde = msix->arg;
+	struct hfi1_devdata *dd = sde->dd;
+	struct hfi1_affinity_node *entry;
+	struct cpu_mask_set *set;
+	int i, old_cpu;
+
+	if (cpu > num_online_cpus() || cpu == sde->cpu)
+		return;
+
+	mutex_lock(&node_affinity.lock);
+	entry = node_affinity_lookup(dd->node);
+	if (!entry)
+		goto unlock;
+
+	old_cpu = sde->cpu;
+	sde->cpu = cpu;
+	cpumask_clear(&msix->mask);
+	cpumask_set_cpu(cpu, &msix->mask);
+	dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
+		   msix->msix.vector, irq_type_names[msix->type],
+		   sde->this_idx, cpu);
+	irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+	/*
+	 * Set the new cpu in the hfi1_affinity_node and clean
+	 * the old cpu if it is not used by any other IRQ
+	 */
+	set = &entry->def_intr;
+	cpumask_set_cpu(cpu, &set->mask);
+	cpumask_set_cpu(cpu, &set->used);
+	for (i = 0; i < dd->num_msix_entries; i++) {
+		struct hfi1_msix_entry *other_msix;
+
+		other_msix = &dd->msix_entries[i];
+		if (other_msix->type != IRQ_SDMA || other_msix == msix)
+			continue;
+
+		if (cpumask_test_cpu(old_cpu, &other_msix->mask))
+			goto unlock;
+	}
+	cpumask_clear_cpu(old_cpu, &set->mask);
+	cpumask_clear_cpu(old_cpu, &set->used);
+unlock:
+	mutex_unlock(&node_affinity.lock);
+}
+
+static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	int cpu = cpumask_first(mask);
+	struct hfi1_msix_entry *msix = container_of(notify,
+						    struct hfi1_msix_entry,
+						    notify);
+
+	/* Only one CPU configuration supported currently */
+	hfi1_update_sdma_affinity(msix, cpu);
+}
+
+static void hfi1_irq_notifier_release(struct kref *ref)
+{
+	/*
+	 * This is required by affinity notifier. We don't have anything to
+	 * free here.
+	 */
+}
+
+static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+	struct irq_affinity_notify *notify = &msix->notify;
+
+	notify->irq = msix->msix.vector;
+	notify->notify = hfi1_irq_notifier_notify;
+	notify->release = hfi1_irq_notifier_release;
+
+	if (irq_set_affinity_notifier(notify->irq, notify))
+		pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
+		       notify->irq);
+}
+
+static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+	struct irq_affinity_notify *notify = &msix->notify;
+
+	if (irq_set_affinity_notifier(notify->irq, NULL))
+		pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
+		       notify->irq);
+}
+
+/*
+ * Function sets the irq affinity for msix.
+ * It *must* be called with node_affinity.lock held.
+ */
+static int get_irq_affinity(struct hfi1_devdata *dd,
+			    struct hfi1_msix_entry *msix)
 {
 	int ret;
 	cpumask_var_t diff;
@@ -329,9 +427,7 @@
 	if (!ret)
 		return -ENOMEM;
 
-	spin_lock(&node_affinity.lock);
 	entry = node_affinity_lookup(dd->node);
-	spin_unlock(&node_affinity.lock);
 
 	switch (msix->type) {
 	case IRQ_SDMA:
@@ -361,7 +457,6 @@
 	 * finds its CPU here.
 	 */
 	if (cpu == -1 && set) {
-		spin_lock(&node_affinity.lock);
 		if (cpumask_equal(&set->mask, &set->used)) {
 			/*
 			 * We've used up all the CPUs, bump up the generation
@@ -373,17 +468,6 @@
 		cpumask_andnot(diff, &set->mask, &set->used);
 		cpu = cpumask_first(diff);
 		cpumask_set_cpu(cpu, &set->used);
-		spin_unlock(&node_affinity.lock);
-	}
-
-	switch (msix->type) {
-	case IRQ_SDMA:
-		sde->cpu = cpu;
-		break;
-	case IRQ_GENERAL:
-	case IRQ_RCVCTXT:
-	case IRQ_OTHER:
-		break;
 	}
 
 	cpumask_set_cpu(cpu, &msix->mask);
@@ -392,10 +476,25 @@
 		    extra, cpu);
 	irq_set_affinity_hint(msix->msix.vector, &msix->mask);
 
+	if (msix->type == IRQ_SDMA) {
+		sde->cpu = cpu;
+		hfi1_setup_sdma_notifier(msix);
+	}
+
 	free_cpumask_var(diff);
 	return 0;
 }
 
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+	int ret;
+
+	mutex_lock(&node_affinity.lock);
+	ret = get_irq_affinity(dd, msix);
+	mutex_unlock(&node_affinity.lock);
+	return ret;
+}
+
 void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
 			   struct hfi1_msix_entry *msix)
 {
@@ -403,13 +502,13 @@
 	struct hfi1_ctxtdata *rcd;
 	struct hfi1_affinity_node *entry;
 
-	spin_lock(&node_affinity.lock);
+	mutex_lock(&node_affinity.lock);
 	entry = node_affinity_lookup(dd->node);
-	spin_unlock(&node_affinity.lock);
 
 	switch (msix->type) {
 	case IRQ_SDMA:
 		set = &entry->def_intr;
+		hfi1_cleanup_sdma_notifier(msix);
 		break;
 	case IRQ_GENERAL:
 		/* Don't do accounting for general contexts */
@@ -421,21 +520,21 @@
 			set = &entry->rcv_intr;
 		break;
 	default:
+		mutex_unlock(&node_affinity.lock);
 		return;
 	}
 
 	if (set) {
-		spin_lock(&node_affinity.lock);
 		cpumask_andnot(&set->used, &set->used, &msix->mask);
 		if (cpumask_empty(&set->used) && set->gen) {
 			set->gen--;
 			cpumask_copy(&set->used, &set->mask);
 		}
-		spin_unlock(&node_affinity.lock);
 	}
 
 	irq_set_affinity_hint(msix->msix.vector, NULL);
 	cpumask_clear(&msix->mask);
+	mutex_unlock(&node_affinity.lock);
 }
 
 /* This should be called with node_affinity.lock held */
@@ -536,7 +635,7 @@
 	if (!ret)
 		goto free_available_mask;
 
-	spin_lock(&affinity->lock);
+	mutex_lock(&affinity->lock);
 	/*
 	 * If we've used all available HW threads, clear the mask and start
 	 * overloading.
@@ -644,7 +743,8 @@
 		cpu = -1;
 	else
 		cpumask_set_cpu(cpu, &set->used);
-	spin_unlock(&affinity->lock);
+
+	mutex_unlock(&affinity->lock);
 	hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
 
 	free_cpumask_var(intrs_mask);
@@ -665,49 +765,53 @@
 
 	if (cpu < 0)
 		return;
-	spin_lock(&affinity->lock);
+
+	mutex_lock(&affinity->lock);
 	cpumask_clear_cpu(cpu, &set->used);
 	hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
 	if (cpumask_empty(&set->used) && set->gen) {
 		set->gen--;
 		cpumask_copy(&set->used, &set->mask);
 	}
-	spin_unlock(&affinity->lock);
+	mutex_unlock(&affinity->lock);
 }
 
-/* Prevents concurrent reads and writes of the sdma_affinity attrib */
-static DEFINE_MUTEX(sdma_affinity_mutex);
-
 int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
 			   size_t count)
 {
 	struct hfi1_affinity_node *entry;
-	struct cpumask mask;
+	cpumask_var_t mask;
 	int ret, i;
 
-	spin_lock(&node_affinity.lock);
+	mutex_lock(&node_affinity.lock);
 	entry = node_affinity_lookup(dd->node);
-	spin_unlock(&node_affinity.lock);
 
-	if (!entry)
-		return -EINVAL;
-
-	ret = cpulist_parse(buf, &mask);
-	if (ret)
-		return ret;
-
-	if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) {
-		dd_dev_warn(dd, "Invalid CPU mask\n");
-		return -EINVAL;
+	if (!entry) {
+		ret = -EINVAL;
+		goto unlock;
 	}
 
-	mutex_lock(&sdma_affinity_mutex);
+	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+	if (!ret) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	ret = cpulist_parse(buf, mask);
+	if (ret)
+		goto out;
+
+	if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) {
+		dd_dev_warn(dd, "Invalid CPU mask\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
 	/* reset the SDMA interrupt affinity details */
 	init_cpu_mask_set(&entry->def_intr);
-	cpumask_copy(&entry->def_intr.mask, &mask);
-	/*
-	 * Reassign the affinity for each SDMA interrupt.
-	 */
+	cpumask_copy(&entry->def_intr.mask, mask);
+
+	/* Reassign the affinity for each SDMA interrupt. */
 	for (i = 0; i < dd->num_msix_entries; i++) {
 		struct hfi1_msix_entry *msix;
 
@@ -715,13 +819,15 @@
 		if (msix->type != IRQ_SDMA)
 			continue;
 
-		ret = hfi1_get_irq_affinity(dd, msix);
+		ret = get_irq_affinity(dd, msix);
 
 		if (ret)
 			break;
 	}
-
-	mutex_unlock(&sdma_affinity_mutex);
+out:
+	free_cpumask_var(mask);
+unlock:
+	mutex_unlock(&node_affinity.lock);
 	return ret ? ret : strnlen(buf, PAGE_SIZE);
 }
 
@@ -729,15 +835,15 @@
 {
 	struct hfi1_affinity_node *entry;
 
-	spin_lock(&node_affinity.lock);
+	mutex_lock(&node_affinity.lock);
 	entry = node_affinity_lookup(dd->node);
-	spin_unlock(&node_affinity.lock);
 
-	if (!entry)
+	if (!entry) {
+		mutex_unlock(&node_affinity.lock);
 		return -EINVAL;
+	}
 
-	mutex_lock(&sdma_affinity_mutex);
 	cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
-	mutex_unlock(&sdma_affinity_mutex);
+	mutex_unlock(&node_affinity.lock);
 	return strnlen(buf, PAGE_SIZE);
 }
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 8879cf7..b89ea3c 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -121,8 +121,7 @@
 	int num_core_siblings;
 	int num_online_nodes;
 	int num_online_cpus;
-	/* protect affinity node list */
-	spinlock_t lock;
+	struct mutex lock; /* protects affinity nodes */
 };
 
 int node_affinity_init(void);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index b32638d..9bf5f23 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -971,7 +971,9 @@
 	FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
 	FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
 	FLAG_ENTRY0("Failed LNI(ConfigLT)",    FAILED_LNI_CONFIGLT),
-	FLAG_ENTRY0("Host Handshake Timeout",  HOST_HANDSHAKE_TIMEOUT)
+	FLAG_ENTRY0("Host Handshake Timeout",  HOST_HANDSHAKE_TIMEOUT),
+	FLAG_ENTRY0("External Device Request Timeout",
+		    EXTERNAL_DEVICE_REQ_TIMEOUT),
 };
 
 /*
@@ -6825,7 +6827,6 @@
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
 				     OPA_LINKDOWN_REASON_SPEED_POLICY);
 		set_link_state(ppd, HLS_DN_OFFLINE);
-		tune_serdes(ppd);
 		start_link(ppd);
 	}
 }
@@ -6998,12 +6999,10 @@
 	 * If there is no cable attached, turn the DC off. Otherwise,
 	 * start the link bring up.
 	 */
-	if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) {
+	if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd))
 		dc_shutdown(ppd->dd);
-	} else {
-		tune_serdes(ppd);
+	else
 		start_link(ppd);
-	}
 }
 
 void handle_link_bounce(struct work_struct *work)
@@ -7016,7 +7015,6 @@
 	 */
 	if (ppd->host_link_state & HLS_UP) {
 		set_link_state(ppd, HLS_DN_OFFLINE);
-		tune_serdes(ppd);
 		start_link(ppd);
 	} else {
 		dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
@@ -7531,7 +7529,6 @@
 		set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
 				     OPA_LINKDOWN_REASON_WIDTH_POLICY);
 		set_link_state(ppd, HLS_DN_OFFLINE);
-		tune_serdes(ppd);
 		start_link(ppd);
 	}
 }
@@ -9161,6 +9158,12 @@
  */
 int start_link(struct hfi1_pportdata *ppd)
 {
+	/*
+	 * Tune the SerDes to a ballpark setting for optimal signal and bit
+	 * error rate.  Needs to be done before starting the link.
+	 */
+	tune_serdes(ppd);
+
 	if (!ppd->link_enabled) {
 		dd_dev_info(ppd->dd,
 			    "%s: stopping link start because link is disabled\n",
@@ -9401,8 +9404,6 @@
 		 */
 		set_qsfp_int_n(ppd, 1);
 
-		tune_serdes(ppd);
-
 		start_link(ppd);
 	}
 
@@ -9490,6 +9491,73 @@
 	write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00);
 }
 
+/*
+ * Perform a test read on the QSFP.  Return 0 on success, -ERRNO
+ * on error.
+ */
+static int test_qsfp_read(struct hfi1_pportdata *ppd)
+{
+	int ret;
+	u8 status;
+
+	/* report success if not a QSFP */
+	if (ppd->port_type != PORT_TYPE_QSFP)
+		return 0;
+
+	/* read byte 2, the status byte */
+	ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1);
+	if (ret < 0)
+		return ret;
+	if (ret != 1)
+		return -EIO;
+
+	return 0; /* success */
+}
+
+/*
+ * Values for QSFP retry.
+ *
+ * Give up after 10s (20 x 500ms).  The overall timeout was empirically
+ * arrived at from experience on a large cluster.
+ */
+#define MAX_QSFP_RETRIES 20
+#define QSFP_RETRY_WAIT 500 /* msec */
+
+/*
+ * Try a QSFP read.  If it fails, schedule a retry for later.
+ * Called on first link activation after driver load.
+ */
+static void try_start_link(struct hfi1_pportdata *ppd)
+{
+	if (test_qsfp_read(ppd)) {
+		/* read failed */
+		if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) {
+			dd_dev_err(ppd->dd, "QSFP not responding, giving up\n");
+			return;
+		}
+		dd_dev_info(ppd->dd,
+			    "QSFP not responding, waiting and retrying %d\n",
+			    (int)ppd->qsfp_retry_count);
+		ppd->qsfp_retry_count++;
+		queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
+				   msecs_to_jiffies(QSFP_RETRY_WAIT));
+		return;
+	}
+	ppd->qsfp_retry_count = 0;
+
+	start_link(ppd);
+}
+
+/*
+ * Workqueue function to start the link after a delay.
+ */
+void handle_start_link(struct work_struct *work)
+{
+	struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
+						  start_link_work.work);
+	try_start_link(ppd);
+}
+
 int bringup_serdes(struct hfi1_pportdata *ppd)
 {
 	struct hfi1_devdata *dd = ppd->dd;
@@ -9525,14 +9593,8 @@
 		set_qsfp_int_n(ppd, 1);
 	}
 
-	/*
-	 * Tune the SerDes to a ballpark setting for
-	 * optimal signal and bit error rate
-	 * Needs to be done before starting the link
-	 */
-	tune_serdes(ppd);
-
-	return start_link(ppd);
+	try_start_link(ppd);
+	return 0;
 }
 
 void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
@@ -9549,6 +9611,10 @@
 	ppd->driver_link_ready = 0;
 	ppd->link_enabled = 0;
 
+	ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */
+	flush_delayed_work(&ppd->start_link_work);
+	cancel_delayed_work_sync(&ppd->start_link_work);
+
 	ppd->offline_disabled_reason =
 			HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
 	set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
@@ -9648,12 +9714,12 @@
 		hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
 }
 
-struct hfi1_message_header *hfi1_get_msgheader(
-				struct hfi1_devdata *dd, __le32 *rhf_addr)
+struct ib_header *hfi1_get_msgheader(
+	struct hfi1_devdata *dd, __le32 *rhf_addr)
 {
 	u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
 
-	return (struct hfi1_message_header *)
+	return (struct ib_header *)
 		(rhf_addr - dd->rhf_offset + offset);
 }
 
@@ -11489,10 +11555,10 @@
 	    !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
 		/* reset the tail and hdr addresses, and sequence count */
 		write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
-				rcd->rcvhdrq_phys);
+				rcd->rcvhdrq_dma);
 		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
-					rcd->rcvhdrqtailaddr_phys);
+					rcd->rcvhdrqtailaddr_dma);
 		rcd->seq_cnt = 1;
 
 		/* reset the cached receive header queue head value */
@@ -11557,9 +11623,9 @@
 		 * update with a dummy tail address and then disable
 		 * receive context.
 		 */
-		if (dd->rcvhdrtail_dummy_physaddr) {
+		if (dd->rcvhdrtail_dummy_dma) {
 			write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
-					dd->rcvhdrtail_dummy_physaddr);
+					dd->rcvhdrtail_dummy_dma);
 			/* Enabling RcvCtxtCtrl.TailUpd is intentional. */
 			rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 		}
@@ -11570,7 +11636,7 @@
 		rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
 	if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
 		rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
-	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
+	if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
 		rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
 	if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
 		/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -11642,7 +11708,7 @@
 		 * so it doesn't contain an address that is invalid.
 		 */
 		write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
-				dd->rcvhdrtail_dummy_physaddr);
+				dd->rcvhdrtail_dummy_dma);
 }
 
 u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
@@ -12865,7 +12931,7 @@
  */
 static int set_up_context_variables(struct hfi1_devdata *dd)
 {
-	int num_kernel_contexts;
+	unsigned long num_kernel_contexts;
 	int total_contexts;
 	int ret;
 	unsigned ngroups;
@@ -12894,9 +12960,9 @@
 	 */
 	if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
 		dd_dev_err(dd,
-			   "Reducing # kernel rcv contexts to: %d, from %d\n",
+			   "Reducing # kernel rcv contexts to: %d, from %lu\n",
 			   (int)(dd->chip_send_contexts - num_vls - 1),
-			   (int)num_kernel_contexts);
+			   num_kernel_contexts);
 		num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
 	}
 	/*
@@ -13319,9 +13385,9 @@
 		/*
 		 * Give up after 1ms - maximum wait time.
 		 *
-		 * RBuf size is 148KiB.  Slowest possible is PCIe Gen1 x1 at
+		 * RBuf size is 136KiB.  Slowest possible is PCIe Gen1 x1 at
 		 * 250MB/s bandwidth.  Lower rate to 66% for overhead to get:
-		 *	148 KB / (66% * 250MB/s) = 920us
+		 *	136 KB / (66% * 250MB/s) = 844us
 		 */
 		if (count++ > 500) {
 			dd_dev_err(dd,
@@ -14500,6 +14566,11 @@
 	if (ret)
 		goto bail_cleanup;
 
+	/* call before get_platform_config(), after init_chip_resources() */
+	ret = eprom_init(dd);
+	if (ret)
+		goto bail_free_rcverr;
+
 	/* Needs to be called before hfi1_firmware_init */
 	get_platform_config(dd);
 
@@ -14620,10 +14691,6 @@
 	if (ret)
 		goto bail_free_cntrs;
 
-	ret = eprom_init(dd);
-	if (ret)
-		goto bail_free_rcverr;
-
 	goto bail;
 
 bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index ed11107..9234525 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -82,7 +82,7 @@
  */
 #define CM_VAU 3
 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
-#define CM_GLOBAL_CREDITS 0x940
+#define CM_GLOBAL_CREDITS 0x880
 /* Number of PKey entries in the HW */
 #define MAX_PKEY_VALUES 16
 
@@ -254,12 +254,14 @@
 #define FAILED_LNI_VERIFY_CAP2		BIT(10)
 #define FAILED_LNI_CONFIGLT		BIT(11)
 #define HOST_HANDSHAKE_TIMEOUT		BIT(12)
+#define EXTERNAL_DEVICE_REQ_TIMEOUT	BIT(13)
 
 #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
 			| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
 			| FAILED_LNI_VERIFY_CAP1 \
 			| FAILED_LNI_VERIFY_CAP2 \
-			| FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
+			| FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \
+			| EXTERNAL_DEVICE_REQ_TIMEOUT)
 
 /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
 #define HOST_REQ_DONE		BIT(0)
@@ -706,6 +708,7 @@
 void handle_link_down(struct work_struct *work);
 void handle_link_downgrade(struct work_struct *work);
 void handle_link_bounce(struct work_struct *work);
+void handle_start_link(struct work_struct *work);
 void handle_sma_message(struct work_struct *work);
 void reset_qsfp(struct hfi1_pportdata *ppd);
 void qsfp_event(struct work_struct *work);
@@ -1335,7 +1338,7 @@
 u64 get_all_cpu_total(u64 __percpu *cntr);
 void hfi1_start_cleanup(struct hfi1_devdata *dd);
 void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct hfi1_message_header *hfi1_get_msgheader(
+struct ib_header *hfi1_get_msgheader(
 				struct hfi1_devdata *dd, __le32 *rhf_addr);
 int hfi1_init_ctxt(struct send_context *sc);
 void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index fcc9c21..da7be21bed 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -320,14 +320,6 @@
 /* RHF receive type error - bypass packet errors */
 #define RHF_RTE_BYPASS_NO_ERR		0x0
 
-/*
- * This structure contains the first field common to all protocols
- * that employ this chip.
- */
-struct hfi1_message_header {
-	__be16 lrh[4];
-};
-
 /* IB - LRH header constants */
 #define HFI1_LRH_GRH 0x0003      /* 1. word of IB LRH - next header: GRH */
 #define HFI1_LRH_BTH 0x0002      /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index dbab9d9..632ba21 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -59,6 +59,40 @@
 
 static struct dentry *hfi1_dbg_root;
 
+/* wrappers to enforce srcu in seq file */
+static ssize_t hfi1_seq_read(
+	struct file *file,
+	char __user *buf,
+	size_t size,
+	loff_t *ppos)
+{
+	struct dentry *d = file->f_path.dentry;
+	int srcu_idx;
+	ssize_t r;
+
+	r = debugfs_use_file_start(d, &srcu_idx);
+	if (likely(!r))
+		r = seq_read(file, buf, size, ppos);
+	debugfs_use_file_finish(srcu_idx);
+	return r;
+}
+
+static loff_t hfi1_seq_lseek(
+	struct file *file,
+	loff_t offset,
+	int whence)
+{
+	struct dentry *d = file->f_path.dentry;
+	int srcu_idx;
+	loff_t r;
+
+	r = debugfs_use_file_start(d, &srcu_idx);
+	if (likely(!r))
+		r = seq_lseek(file, offset, whence);
+	debugfs_use_file_finish(srcu_idx);
+	return r;
+}
+
 #define private2dd(file) (file_inode(file)->i_private)
 #define private2ppd(file) (file_inode(file)->i_private)
 
@@ -87,8 +121,8 @@
 static const struct file_operations _##name##_file_ops = { \
 	.owner   = THIS_MODULE, \
 	.open    = _##name##_open, \
-	.read    = seq_read, \
-	.llseek  = seq_lseek, \
+	.read    = hfi1_seq_read, \
+	.llseek  = hfi1_seq_lseek, \
 	.release = seq_release \
 }
 
@@ -105,11 +139,9 @@
 	DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
 
 static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
 {
 	struct hfi1_opcode_stats_perctx *opstats;
 
-	rcu_read_lock();
 	if (*pos >= ARRAY_SIZE(opstats->stats))
 		return NULL;
 	return pos;
@@ -126,9 +158,7 @@
 }
 
 static void _opcode_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
 {
-	rcu_read_unlock();
 }
 
 static int _opcode_stats_seq_show(struct seq_file *s, void *v)
@@ -223,28 +253,32 @@
 DEBUGFS_FILE_OPS(ctx_stats);
 
 static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
+	__acquires(RCU)
 {
 	struct qp_iter *iter;
 	loff_t n = *pos;
 
-	rcu_read_lock();
 	iter = qp_iter_init(s->private);
+
+	/* stop calls rcu_read_unlock */
+	rcu_read_lock();
+
 	if (!iter)
 		return NULL;
 
-	while (n--) {
+	do {
 		if (qp_iter_next(iter)) {
 			kfree(iter);
 			return NULL;
 		}
-	}
+	} while (n--);
 
 	return iter;
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
 				loff_t *pos)
+	__must_hold(RCU)
 {
 	struct qp_iter *iter = iter_ptr;
 
@@ -259,7 +293,7 @@
 }
 
 static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
-__releases(RCU)
+	__releases(RCU)
 {
 	rcu_read_unlock();
 }
@@ -281,12 +315,10 @@
 DEBUGFS_FILE_OPS(qp_stats);
 
 static void *_sdes_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
 {
 	struct hfi1_ibdev *ibd;
 	struct hfi1_devdata *dd;
 
-	rcu_read_lock();
 	ibd = (struct hfi1_ibdev *)s->private;
 	dd = dd_from_dev(ibd);
 	if (!dd->per_sdma || *pos >= dd->num_sdma)
@@ -306,9 +338,7 @@
 }
 
 static void _sdes_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
 {
-	rcu_read_unlock();
 }
 
 static int _sdes_seq_show(struct seq_file *s, void *v)
@@ -335,11 +365,9 @@
 	struct hfi1_devdata *dd;
 	ssize_t rval;
 
-	rcu_read_lock();
 	dd = private2dd(file);
 	avail = hfi1_read_cntrs(dd, NULL, &counters);
 	rval =  simple_read_from_buffer(buf, count, ppos, counters, avail);
-	rcu_read_unlock();
 	return rval;
 }
 
@@ -352,11 +380,9 @@
 	struct hfi1_devdata *dd;
 	ssize_t rval;
 
-	rcu_read_lock();
 	dd = private2dd(file);
 	avail = hfi1_read_cntrs(dd, &names, NULL);
 	rval =  simple_read_from_buffer(buf, count, ppos, names, avail);
-	rcu_read_unlock();
 	return rval;
 }
 
@@ -379,11 +405,9 @@
 	struct hfi1_devdata *dd;
 	ssize_t rval;
 
-	rcu_read_lock();
 	dd = private2dd(file);
 	avail = hfi1_read_portcntrs(dd->pport, &names, NULL);
 	rval = simple_read_from_buffer(buf, count, ppos, names, avail);
-	rcu_read_unlock();
 	return rval;
 }
 
@@ -396,11 +420,9 @@
 	struct hfi1_pportdata *ppd;
 	ssize_t rval;
 
-	rcu_read_lock();
 	ppd = private2ppd(file);
 	avail = hfi1_read_portcntrs(ppd, NULL, &counters);
 	rval = simple_read_from_buffer(buf, count, ppos, counters, avail);
-	rcu_read_unlock();
 	return rval;
 }
 
@@ -430,16 +452,13 @@
 	int used;
 	int i;
 
-	rcu_read_lock();
 	ppd = private2ppd(file);
 	dd = ppd->dd;
 	size = PAGE_SIZE;
 	used = 0;
 	tmp = kmalloc(size, GFP_KERNEL);
-	if (!tmp) {
-		rcu_read_unlock();
+	if (!tmp)
 		return -ENOMEM;
-	}
 
 	scratch0 = read_csr(dd, ASIC_CFG_SCRATCH);
 	used += scnprintf(tmp + used, size - used,
@@ -466,7 +485,6 @@
 	used += scnprintf(tmp + used, size - used, "Write bits to clear\n");
 
 	ret = simple_read_from_buffer(buf, count, ppos, tmp, used);
-	rcu_read_unlock();
 	kfree(tmp);
 	return ret;
 }
@@ -482,15 +500,12 @@
 	u64 scratch0;
 	u64 clear;
 
-	rcu_read_lock();
 	ppd = private2ppd(file);
 	dd = ppd->dd;
 
 	buff = kmalloc(count + 1, GFP_KERNEL);
-	if (!buff) {
-		ret = -ENOMEM;
-		goto do_return;
-	}
+	if (!buff)
+		return -ENOMEM;
 
 	ret = copy_from_user(buff, buf, count);
 	if (ret > 0) {
@@ -523,8 +538,6 @@
 
  do_free:
 	kfree(buff);
- do_return:
-	rcu_read_unlock();
 	return ret;
 }
 
@@ -538,18 +551,14 @@
 	char *tmp;
 	int ret;
 
-	rcu_read_lock();
 	ppd = private2ppd(file);
 	tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!tmp) {
-		rcu_read_unlock();
+	if (!tmp)
 		return -ENOMEM;
-	}
 
 	ret = qsfp_dump(ppd, tmp, PAGE_SIZE);
 	if (ret > 0)
 		ret = simple_read_from_buffer(buf, count, ppos, tmp, ret);
-	rcu_read_unlock();
 	kfree(tmp);
 	return ret;
 }
@@ -565,7 +574,6 @@
 	int offset;
 	int total_written;
 
-	rcu_read_lock();
 	ppd = private2ppd(file);
 
 	/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -573,16 +581,12 @@
 	offset = *ppos & 0xffff;
 
 	/* explicitly reject invalid address 0 to catch cp and cat */
-	if (i2c_addr == 0) {
-		ret = -EINVAL;
-		goto _return;
-	}
+	if (i2c_addr == 0)
+		return -EINVAL;
 
 	buff = kmalloc(count, GFP_KERNEL);
-	if (!buff) {
-		ret = -ENOMEM;
-		goto _return;
-	}
+	if (!buff)
+		return -ENOMEM;
 
 	ret = copy_from_user(buff, buf, count);
 	if (ret > 0) {
@@ -602,8 +606,6 @@
 
  _free:
 	kfree(buff);
- _return:
-	rcu_read_unlock();
 	return ret;
 }
 
@@ -632,7 +634,6 @@
 	int offset;
 	int total_read;
 
-	rcu_read_lock();
 	ppd = private2ppd(file);
 
 	/* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */
@@ -640,16 +641,12 @@
 	offset = *ppos & 0xffff;
 
 	/* explicitly reject invalid address 0 to catch cp and cat */
-	if (i2c_addr == 0) {
-		ret = -EINVAL;
-		goto _return;
-	}
+	if (i2c_addr == 0)
+		return -EINVAL;
 
 	buff = kmalloc(count, GFP_KERNEL);
-	if (!buff) {
-		ret = -ENOMEM;
-		goto _return;
-	}
+	if (!buff)
+		return -ENOMEM;
 
 	total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count);
 	if (total_read < 0) {
@@ -669,8 +666,6 @@
 
  _free:
 	kfree(buff);
- _return:
-	rcu_read_unlock();
 	return ret;
 }
 
@@ -697,26 +692,20 @@
 	int ret;
 	int total_written;
 
-	rcu_read_lock();
-	if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
-		ret = -EINVAL;
-		goto _return;
-	}
+	if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */
+		return -EINVAL;
 
 	ppd = private2ppd(file);
 
 	buff = kmalloc(count, GFP_KERNEL);
-	if (!buff) {
-		ret = -ENOMEM;
-		goto _return;
-	}
+	if (!buff)
+		return -ENOMEM;
 
 	ret = copy_from_user(buff, buf, count);
 	if (ret > 0) {
 		ret = -EFAULT;
 		goto _free;
 	}
-
 	total_written = qsfp_write(ppd, target, *ppos, buff, count);
 	if (total_written < 0) {
 		ret = total_written;
@@ -729,8 +718,6 @@
 
  _free:
 	kfree(buff);
- _return:
-	rcu_read_unlock();
 	return ret;
 }
 
@@ -757,7 +744,6 @@
 	int ret;
 	int total_read;
 
-	rcu_read_lock();
 	if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */
 		ret = -EINVAL;
 		goto _return;
@@ -790,7 +776,6 @@
  _free:
 	kfree(buff);
  _return:
-	rcu_read_unlock();
 	return ret;
 }
 
@@ -948,6 +933,43 @@
 	DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
 };
 
+static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
+{
+	if (*pos >= num_online_cpus())
+		return NULL;
+
+	return pos;
+}
+
+static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	++*pos;
+	if (*pos >= num_online_cpus())
+		return NULL;
+
+	return pos;
+}
+
+static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v)
+{
+	/* nothing allocated */
+}
+
+static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v)
+{
+	struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+	struct hfi1_devdata *dd = dd_from_dev(ibd);
+	loff_t *spos = v;
+	loff_t i = *spos;
+
+	sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i);
+	return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
+DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
+DEBUGFS_FILE_OPS(sdma_cpu_list);
+
 void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
 {
 	char name[sizeof("port0counters") + 1];
@@ -976,6 +998,7 @@
 	DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
 	DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
 	DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
+	DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
 	/* dev counter files */
 	for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
 		DEBUGFS_FILE_CREATE(cntr_ops[i].name,
@@ -1006,7 +1029,6 @@
 	debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
 out:
 	ibd->hfi1_ibdev_dbg = NULL;
-	synchronize_rcu();
 }
 
 /*
@@ -1031,9 +1053,7 @@
 };
 
 static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
 {
-	rcu_read_lock();
 	if (*pos >= ARRAY_SIZE(hfi1_statnames))
 		return NULL;
 	return pos;
@@ -1051,9 +1071,7 @@
 }
 
 static void _driver_stats_names_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
 {
-	rcu_read_unlock();
 }
 
 static int _driver_stats_names_seq_show(struct seq_file *s, void *v)
@@ -1069,9 +1087,7 @@
 DEBUGFS_FILE_OPS(driver_stats_names);
 
 static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos)
-__acquires(RCU)
 {
-	rcu_read_lock();
 	if (*pos >= ARRAY_SIZE(hfi1_statnames))
 		return NULL;
 	return pos;
@@ -1086,9 +1102,7 @@
 }
 
 static void _driver_stats_seq_stop(struct seq_file *s, void *v)
-__releases(RCU)
 {
-	rcu_read_unlock();
 }
 
 static u64 hfi1_sps_ints(void)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 8246dc7..6563e4d3 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -276,7 +276,7 @@
 static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
 		       struct hfi1_packet *packet)
 {
-	struct hfi1_message_header *rhdr = packet->hdr;
+	struct ib_header *rhdr = packet->hdr;
 	u32 rte = rhf_rcv_type_err(packet->rhf);
 	int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -288,10 +288,9 @@
 
 	if (packet->rhf & RHF_TID_ERR) {
 		/* For TIDERR and RC QPs preemptively schedule a NAK */
-		struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr;
-		struct hfi1_other_headers *ohdr = NULL;
+		struct ib_other_headers *ohdr = NULL;
 		u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
-		u16 lid  = be16_to_cpu(hdr->lrh[1]);
+		u16 lid  = be16_to_cpu(rhdr->lrh[1]);
 		u32 qp_num;
 		u32 rcv_flags = 0;
 
@@ -301,14 +300,14 @@
 
 		/* Check for GRH */
 		if (lnh == HFI1_LRH_BTH) {
-			ohdr = &hdr->u.oth;
+			ohdr = &rhdr->u.oth;
 		} else if (lnh == HFI1_LRH_GRH) {
 			u32 vtf;
 
-			ohdr = &hdr->u.l.oth;
-			if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+			ohdr = &rhdr->u.l.oth;
+			if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
 				goto drop;
-			vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+			vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
 			if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
 				goto drop;
 			rcv_flags |= HFI1_HAS_GRH;
@@ -344,7 +343,7 @@
 			case IB_QPT_RC:
 				hfi1_rc_hdrerr(
 					rcd,
-					hdr,
+					rhdr,
 					rcv_flags,
 					qp);
 				break;
@@ -452,8 +451,8 @@
 			       bool do_cnp)
 {
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	struct hfi1_ib_header *hdr = pkt->hdr;
-	struct hfi1_other_headers *ohdr = pkt->ohdr;
+	struct ib_header *hdr = pkt->hdr;
+	struct ib_other_headers *ohdr = pkt->ohdr;
 	struct ib_grh *grh = NULL;
 	u32 rqpn = 0, bth1;
 	u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
@@ -487,7 +486,7 @@
 		return;
 	}
 
-	sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf);
+	sc = hdr2sc(hdr, pkt->rhf);
 
 	bth1 = be32_to_cpu(ohdr->bth[1]);
 	if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
@@ -599,8 +598,8 @@
 		__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
 					 dd->rhf_offset;
 		struct rvt_qp *qp;
-		struct hfi1_ib_header *hdr;
-		struct hfi1_other_headers *ohdr;
+		struct ib_header *hdr;
+		struct ib_other_headers *ohdr;
 		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 		u64 rhf = rhf_to_cpu(rhf_addr);
 		u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -616,8 +615,8 @@
 		if (etype != RHF_RCV_TYPE_IB)
 			goto next;
 
-		hdr = (struct hfi1_ib_header *)
-			hfi1_get_msgheader(dd, rhf_addr);
+		hdr = hfi1_get_msgheader(dd, rhf_addr);
+
 		lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 
 		if (lnh == HFI1_LRH_BTH)
@@ -888,14 +887,15 @@
 }
 
 static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
-				      struct hfi1_packet packet,
+				      struct hfi1_packet *packet,
 				      struct hfi1_devdata *dd)
 {
 	struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
-	struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd,
-							     packet.rhf_addr);
+	struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+						   packet->rhf_addr);
+	u8 etype = rhf_rcv_type(packet->rhf);
 
-	if (hdr2sc(hdr, packet.rhf) != 0xf) {
+	if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
 		int hwstate = read_logical_state(dd);
 
 		if (hwstate != LSTATE_ACTIVE) {
@@ -979,7 +979,7 @@
 			/* Auto activate link on non-SC15 packet receive */
 			if (unlikely(rcd->ppd->host_link_state ==
 				     HLS_UP_ARMED) &&
-			    set_armed_to_active(rcd, packet, dd))
+			    set_armed_to_active(rcd, &packet, dd))
 				goto bail;
 			last = process_rcv_packet(&packet, thread);
 		}
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 36b7794..e70c223 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -49,7 +49,26 @@
 #include "common.h"
 #include "eprom.h"
 
+/*
+ * The EPROM is logically divided into three partitions:
+ *	partition 0: the first 128K, visible from PCI ROM BAR
+ *	partition 1: 4K config file (sector size)
+ *	partition 2: the rest
+ */
+#define P0_SIZE (128 * 1024)
+#define P1_SIZE   (4 * 1024)
+#define P1_START P0_SIZE
+#define P2_START (P0_SIZE + P1_SIZE)
+
+/* controller page size, in bytes */
+#define EP_PAGE_SIZE 256
+#define EP_PAGE_MASK (EP_PAGE_SIZE - 1)
+#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32))
+
+/* controller commands */
 #define CMD_SHIFT 24
+#define CMD_NOP			    (0)
+#define CMD_READ_DATA(addr)	    ((0x03 << CMD_SHIFT) | addr)
 #define CMD_RELEASE_POWERDOWN_NOID  ((0xab << CMD_SHIFT))
 
 /* controller interface speeds */
@@ -61,6 +80,90 @@
  * Double it for safety.
  */
 #define EPROM_TIMEOUT 80000 /* ms */
+
+/*
+ * Read a 256 byte (64 dword) EPROM page.
+ * All callers have verified the offset is at a page boundary.
+ */
+static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
+{
+	int i;
+
+	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
+	for (i = 0; i < EP_PAGE_DWORDS; i++)
+		result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
+	write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
+}
+
+/*
+ * Read length bytes starting at offset from the start of the EPROM.
+ */
+static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest)
+{
+	u32 buffer[EP_PAGE_DWORDS];
+	u32 end;
+	u32 start_offset;
+	u32 read_start;
+	u32 bytes;
+
+	if (len == 0)
+		return 0;
+
+	end = start + len;
+
+	/*
+	 * Make sure the read range is not outside of the controller read
+	 * command address range.  Note that '>' is correct below - the end
+	 * of the range is OK if it stops at the limit, but no higher.
+	 */
+	if (end > (1 << CMD_SHIFT))
+		return -EINVAL;
+
+	/* read the first partial page */
+	start_offset = start & EP_PAGE_MASK;
+	if (start_offset) {
+		/* partial starting page */
+
+		/* align and read the page that contains the start */
+		read_start = start & ~EP_PAGE_MASK;
+		read_page(dd, read_start, buffer);
+
+		/* the rest of the page is available data */
+		bytes = EP_PAGE_SIZE - start_offset;
+
+		if (len <= bytes) {
+			/* end is within this page */
+			memcpy(dest, (u8 *)buffer + start_offset, len);
+			return 0;
+		}
+
+		memcpy(dest, (u8 *)buffer + start_offset, bytes);
+
+		start += bytes;
+		len -= bytes;
+		dest += bytes;
+	}
+	/* start is now page aligned */
+
+	/* read whole pages */
+	while (len >= EP_PAGE_SIZE) {
+		read_page(dd, start, buffer);
+		memcpy(dest, buffer, EP_PAGE_SIZE);
+
+		start += EP_PAGE_SIZE;
+		len -= EP_PAGE_SIZE;
+		dest += EP_PAGE_SIZE;
+	}
+
+	/* read the last partial page */
+	if (len) {
+		read_page(dd, start, buffer);
+		memcpy(dest, buffer, len);
+	}
+
+	return 0;
+}
+
 /*
  * Initialize the EPROM handler.
  */
@@ -100,3 +203,85 @@
 done_asic:
 	return ret;
 }
+
+/* magic character sequence that trails an image */
+#define IMAGE_TRAIL_MAGIC "egamiAPO"
+
+/*
+ * Read all of partition 1.  The actual file is at the front.  Adjust
+ * the returned size if a trailing image magic is found.
+ */
+static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
+					  u32 *size)
+{
+	void *buffer;
+	void *p;
+	u32 length;
+	int ret;
+
+	buffer = kmalloc(P1_SIZE, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	ret = read_length(dd, P1_START, P1_SIZE, buffer);
+	if (ret) {
+		kfree(buffer);
+		return ret;
+	}
+
+	/* scan for image magic that may trail the actual data */
+	p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
+	if (p)
+		length = p - buffer;
+	else
+		length = P1_SIZE;
+
+	*data = buffer;
+	*size = length;
+	return 0;
+}
+
+/*
+ * Read the platform configuration file from the EPROM.
+ *
+ * On success, an allocated buffer containing the data and its size are
+ * returned.  It is up to the caller to free this buffer.
+ *
+ * Return value:
+ *   0	      - success
+ *   -ENXIO   - no EPROM is available
+ *   -EBUSY   - not able to acquire access to the EPROM
+ *   -ENOENT  - no recognizable file written
+ *   -ENOMEM  - buffer could not be allocated
+ */
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
+{
+	u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */
+	int ret;
+
+	if (!dd->eprom_available)
+		return -ENXIO;
+
+	ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
+	if (ret)
+		return -EBUSY;
+
+	/* read the last page of P0 for the EPROM format magic */
+	ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+	if (ret)
+		goto done;
+
+	/* last dword of P0 contains a magic indicator */
+	if (directory[EP_PAGE_DWORDS - 1] == 0) {
+		/* partition format */
+		ret = read_partition_platform_config(dd, data, size);
+		goto done;
+	}
+
+	/* nothing recognized */
+	ret = -ENOENT;
+
+done:
+	release_chip_resource(dd, CR_EPROM);
+	return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index d41f0b1..e774184 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -45,8 +45,8 @@
  *
  */
 
-struct hfi1_cmd;
 struct hfi1_devdata;
 
 int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret,
+			       u32 *size_ret);
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1ecbec1..677efa0 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -58,7 +58,6 @@
 #include "trace.h"
 #include "user_sdma.h"
 #include "user_exp_rcv.h"
-#include "eprom.h"
 #include "aspm.h"
 #include "mmu_rb.h"
 
@@ -183,6 +182,7 @@
 	if (fd) {
 		fd->rec_cpu_num = -1; /* no cpu affinity by default */
 		fd->mm = current->mm;
+		atomic_inc(&fd->mm->mm_count);
 	}
 
 	fp->private_data = fd;
@@ -222,7 +222,7 @@
 		ret = assign_ctxt(fp, &uinfo);
 		if (ret < 0)
 			return ret;
-		setup_ctxt(fp);
+		ret = setup_ctxt(fp);
 		if (ret)
 			return ret;
 		ret = user_init(fp);
@@ -439,9 +439,10 @@
 	struct hfi1_filedata *fd = fp->private_data;
 	struct hfi1_ctxtdata *uctxt = fd->uctxt;
 	struct hfi1_devdata *dd;
-	unsigned long flags, pfn;
+	unsigned long flags;
 	u64 token = vma->vm_pgoff << PAGE_SHIFT,
 		memaddr = 0;
+	void *memvirt = NULL;
 	u8 subctxt, mapio = 0, vmf = 0, type;
 	ssize_t memlen = 0;
 	int ret = 0;
@@ -492,7 +493,8 @@
 		 * second or third page allocated for credit returns (if number
 		 * of enabled contexts > 64 and 128 respectively).
 		 */
-		memaddr = dd->cr_base[uctxt->numa_id].pa +
+		memvirt = dd->cr_base[uctxt->numa_id].va;
+		memaddr = virt_to_phys(memvirt) +
 			(((u64)uctxt->sc->hw_free -
 			  (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
 		memlen = PAGE_SIZE;
@@ -507,8 +509,8 @@
 		mapio = 1;
 		break;
 	case RCV_HDRQ:
-		memaddr = uctxt->rcvhdrq_phys;
 		memlen = uctxt->rcvhdrq_size;
+		memvirt = uctxt->rcvhdrq;
 		break;
 	case RCV_EGRBUF: {
 		unsigned long addr;
@@ -532,14 +534,21 @@
 		vma->vm_flags &= ~VM_MAYWRITE;
 		addr = vma->vm_start;
 		for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
+			memlen = uctxt->egrbufs.buffers[i].len;
+			memvirt = uctxt->egrbufs.buffers[i].addr;
 			ret = remap_pfn_range(
 				vma, addr,
-				uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT,
-				uctxt->egrbufs.buffers[i].len,
+				/*
+				 * virt_to_pfn() does the same, but
+				 * it's not available on x86_64
+				 * when CONFIG_MMU is enabled.
+				 */
+				PFN_DOWN(__pa(memvirt)),
+				memlen,
 				vma->vm_page_prot);
 			if (ret < 0)
 				goto done;
-			addr += uctxt->egrbufs.buffers[i].len;
+			addr += memlen;
 		}
 		ret = 0;
 		goto done;
@@ -595,8 +604,8 @@
 			ret = -EPERM;
 			goto done;
 		}
-		memaddr = uctxt->rcvhdrqtailaddr_phys;
 		memlen = PAGE_SIZE;
+		memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
 		flags &= ~VM_MAYWRITE;
 		break;
 	case SUBCTXT_UREGS:
@@ -649,16 +658,24 @@
 		  "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
 		    ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
 		    vma->vm_end - vma->vm_start, vma->vm_flags);
-	pfn = (unsigned long)(memaddr >> PAGE_SHIFT);
 	if (vmf) {
-		vma->vm_pgoff = pfn;
+		vma->vm_pgoff = PFN_DOWN(memaddr);
 		vma->vm_ops = &vm_ops;
 		ret = 0;
 	} else if (mapio) {
-		ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+		ret = io_remap_pfn_range(vma, vma->vm_start,
+					 PFN_DOWN(memaddr),
+					 memlen,
 					 vma->vm_page_prot);
+	} else if (memvirt) {
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      PFN_DOWN(__pa(memvirt)),
+				      memlen,
+				      vma->vm_page_prot);
 	} else {
-		ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+		ret = remap_pfn_range(vma, vma->vm_start,
+				      PFN_DOWN(memaddr),
+				      memlen,
 				      vma->vm_page_prot);
 	}
 done:
@@ -779,6 +796,7 @@
 	mutex_unlock(&hfi1_mutex);
 	hfi1_free_ctxtdata(dd, uctxt);
 done:
+	mmdrop(fdata->mm);
 	kobject_put(&dd->kobj);
 	kfree(fdata);
 	return 0;
@@ -959,14 +977,16 @@
 	 */
 	uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
 			     uctxt->dd->node);
-	if (!uctxt->sc)
-		return -ENOMEM;
-
+	if (!uctxt->sc) {
+		ret = -ENOMEM;
+		goto ctxdata_free;
+	}
 	hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
 		  uctxt->sc->hw_context);
 	ret = sc_enable(uctxt->sc);
 	if (ret)
-		return ret;
+		goto ctxdata_free;
+
 	/*
 	 * Setup shared context resources if the user-level has requested
 	 * shared contexts and this is the 'master' process.
@@ -980,7 +1000,7 @@
 		 * send context because it will be done during file close
 		 */
 		if (ret)
-			return ret;
+			goto ctxdata_free;
 	}
 	uctxt->userversion = uinfo->userversion;
 	uctxt->flags = hfi1_cap_mask; /* save current flag state */
@@ -1000,6 +1020,11 @@
 	fd->uctxt = uctxt;
 
 	return 0;
+
+ctxdata_free:
+	dd->rcd[ctxt] = NULL;
+	hfi1_free_ctxtdata(dd, uctxt);
+	return ret;
 }
 
 static int init_subctxts(struct hfi1_ctxtdata *uctxt,
@@ -1258,7 +1283,7 @@
 					       uctxt->rcvhdrq);
 	binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt,
 					       fd->subctxt,
-					       uctxt->egrbufs.rcvtids[0].phys);
+					       uctxt->egrbufs.rcvtids[0].dma);
 	binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
 						 fd->subctxt, 0);
 	/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 1000e0f..7eef11b 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -64,6 +64,8 @@
 #include <linux/kthread.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
+#include <rdma/ib_hdrs.h>
+#include <linux/rhashtable.h>
 #include <rdma/rdma_vt.h>
 
 #include "chip_registers.h"
@@ -171,12 +173,12 @@
 	u32 threshold;           /* head update threshold */
 	struct eager_buffer {
 		void *addr;
-		dma_addr_t phys;
+		dma_addr_t dma;
 		ssize_t len;
 	} *buffers;
 	struct {
 		void *addr;
-		dma_addr_t phys;
+		dma_addr_t dma;
 	} *rcvtids;
 };
 
@@ -207,8 +209,8 @@
 	/* size of each of the rcvhdrq entries */
 	u16 rcvhdrqentsize;
 	/* mmap of hdrq, must fit in 44 bits */
-	dma_addr_t rcvhdrq_phys;
-	dma_addr_t rcvhdrqtailaddr_phys;
+	dma_addr_t rcvhdrq_dma;
+	dma_addr_t rcvhdrqtailaddr_dma;
 	struct ctxt_eager_bufs egrbufs;
 	/* this receive context's assigned PIO ACK send context */
 	struct send_context *sc;
@@ -350,7 +352,7 @@
 	struct hfi1_ctxtdata *rcd;
 	__le32 *rhf_addr;
 	struct rvt_qp *qp;
-	struct hfi1_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	u64 rhf;
 	u32 maxcnt;
 	u32 rhqoff;
@@ -529,6 +531,7 @@
 	void *arg;
 	char name[MAX_NAME_SIZE];
 	cpumask_t mask;
+	struct irq_affinity_notify notify;
 };
 
 /* per-SL CCA information */
@@ -605,6 +608,7 @@
 	struct work_struct freeze_work;
 	struct work_struct link_downgrade_work;
 	struct work_struct link_bounce_work;
+	struct delayed_work start_link_work;
 	/* host link state variables */
 	struct mutex hls_lock;
 	u32 host_link_state;
@@ -659,6 +663,7 @@
 	u8 linkinit_reason;
 	u8 local_tx_rate;	/* rate given to 8051 firmware */
 	u8 last_pstate;		/* info only */
+	u8 qsfp_retry_count;
 
 	/* placeholders for IB MAD packet settings */
 	u8 overrun_threshold;
@@ -1058,8 +1063,6 @@
 	u8 psxmitwait_supported;
 	/* cycle length of PS* counters in HW (in picoseconds) */
 	u16 psxmitwait_check_rate;
-	/* high volume overflow errors deferred to tasklet */
-	struct tasklet_struct error_tasklet;
 
 	/* MSI-X information */
 	struct hfi1_msix_entry *msix_entries;
@@ -1162,7 +1165,7 @@
 
 	/* receive context tail dummy address */
 	__le64 *rcvhdrtail_dummy_kvaddr;
-	dma_addr_t rcvhdrtail_dummy_physaddr;
+	dma_addr_t rcvhdrtail_dummy_dma;
 
 	bool eprom_available;	/* true if EPROM is available for this device */
 	bool aspm_supported;	/* Does HW support ASPM */
@@ -1173,6 +1176,7 @@
 	atomic_t aspm_disabled_cnt;
 
 	struct hfi1_affinity *affinity;
+	struct rhashtable sdma_rht;
 	struct kobject kobj;
 };
 
@@ -1266,15 +1270,32 @@
 void receive_interrupt_work(struct work_struct *work);
 
 /* extract service channel from header and rhf */
-static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
 {
 	return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
 	       ((!!(rhf_dc_info(rhf))) << 4);
 }
 
+#define HFI1_JKEY_WIDTH       16
+#define HFI1_JKEY_MASK        (BIT(16) - 1)
+#define HFI1_ADMIN_JKEY_RANGE 32
+
+/*
+ * J_KEYs are split and allocated in the following groups:
+ *   0 - 31    - users with administrator privileges
+ *  32 - 63    - kernel protocols using KDETH packets
+ *  64 - 65535 - all other users using KDETH packets
+ */
 static inline u16 generate_jkey(kuid_t uid)
 {
-	return from_kuid(current_user_ns(), uid) & 0xffff;
+	u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK;
+
+	if (capable(CAP_SYS_ADMIN))
+		jkey &= HFI1_ADMIN_JKEY_RANGE - 1;
+	else if (jkey < 64)
+		jkey |= BIT(HFI1_JKEY_WIDTH - 1);
+
+	return jkey;
 }
 
 /*
@@ -1584,7 +1605,7 @@
 static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
 			       bool do_cnp)
 {
-	struct hfi1_other_headers *ohdr = pkt->ohdr;
+	struct ib_other_headers *ohdr = pkt->ohdr;
 	u32 bth1;
 
 	bth1 = be32_to_cpu(ohdr->bth[1]);
@@ -1656,7 +1677,6 @@
 struct hfi1_devdata *hfi1_init_dd(struct pci_dev *,
 				  const struct pci_device_id *);
 void hfi1_free_devdata(struct hfi1_devdata *);
-void cc_state_reclaim(struct rcu_head *rcu);
 struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
 
 /* LED beaconing functions */
@@ -1788,7 +1808,7 @@
 extern unsigned int hfi1_cu;
 extern unsigned int user_credit_return_threshold;
 extern int num_user_contexts;
-extern unsigned n_krcvqs;
+extern unsigned long n_krcvqs;
 extern uint krcvqs[];
 extern int krcvqsset;
 extern uint kdeth_qp;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index a358d23..60db615 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -94,7 +94,7 @@
 MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL");
 
 /* computed based on above array */
-unsigned n_krcvqs;
+unsigned long n_krcvqs;
 
 static unsigned hfi1_rcvarr_split = 25;
 module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO);
@@ -336,6 +336,7 @@
 	}
 	return rcd;
 bail:
+	dd->rcd[ctxt] = NULL;
 	kfree(rcd->egrbufs.rcvtids);
 	kfree(rcd->egrbufs.buffers);
 	kfree(rcd);
@@ -500,6 +501,7 @@
 	INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade);
 	INIT_WORK(&ppd->sma_message_work, handle_sma_message);
 	INIT_WORK(&ppd->link_bounce_work, handle_link_bounce);
+	INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link);
 	INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work);
 	INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event);
 
@@ -708,7 +710,7 @@
 	/* allocate dummy tail memory for all receive contexts */
 	dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent(
 		&dd->pcidev->dev, sizeof(u64),
-		&dd->rcvhdrtail_dummy_physaddr,
+		&dd->rcvhdrtail_dummy_dma,
 		GFP_KERNEL);
 
 	if (!dd->rcvhdrtail_dummy_kvaddr) {
@@ -941,12 +943,12 @@
 
 	if (rcd->rcvhdrq) {
 		dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
-				  rcd->rcvhdrq, rcd->rcvhdrq_phys);
+				  rcd->rcvhdrq, rcd->rcvhdrq_dma);
 		rcd->rcvhdrq = NULL;
 		if (rcd->rcvhdrtail_kvaddr) {
 			dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
 					  (void *)rcd->rcvhdrtail_kvaddr,
-					  rcd->rcvhdrqtailaddr_phys);
+					  rcd->rcvhdrqtailaddr_dma);
 			rcd->rcvhdrtail_kvaddr = NULL;
 		}
 	}
@@ -955,11 +957,11 @@
 	kfree(rcd->egrbufs.rcvtids);
 
 	for (e = 0; e < rcd->egrbufs.alloced; e++) {
-		if (rcd->egrbufs.buffers[e].phys)
+		if (rcd->egrbufs.buffers[e].dma)
 			dma_free_coherent(&dd->pcidev->dev,
 					  rcd->egrbufs.buffers[e].len,
 					  rcd->egrbufs.buffers[e].addr,
-					  rcd->egrbufs.buffers[e].phys);
+					  rcd->egrbufs.buffers[e].dma);
 	}
 	kfree(rcd->egrbufs.buffers);
 
@@ -1333,7 +1335,7 @@
 		spin_unlock(&ppd->cc_state_lock);
 
 		if (cc_state)
-			call_rcu(&cc_state->rcu, cc_state_reclaim);
+			kfree_rcu(cc_state, rcu);
 	}
 
 	free_credit_return(dd);
@@ -1353,7 +1355,7 @@
 	if (dd->rcvhdrtail_dummy_kvaddr) {
 		dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
 				  (void *)dd->rcvhdrtail_dummy_kvaddr,
-				  dd->rcvhdrtail_dummy_physaddr);
+				  dd->rcvhdrtail_dummy_dma);
 		dd->rcvhdrtail_dummy_kvaddr = NULL;
 	}
 
@@ -1576,7 +1578,7 @@
 	u64 reg;
 
 	if (!rcd->rcvhdrq) {
-		dma_addr_t phys_hdrqtail;
+		dma_addr_t dma_hdrqtail;
 		gfp_t gfp_flags;
 
 		/*
@@ -1589,7 +1591,7 @@
 		gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
 			GFP_USER : GFP_KERNEL;
 		rcd->rcvhdrq = dma_zalloc_coherent(
-			&dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
+			&dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
 			gfp_flags | __GFP_COMP);
 
 		if (!rcd->rcvhdrq) {
@@ -1601,11 +1603,11 @@
 
 		if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
 			rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
-				&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+				&dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
 				gfp_flags);
 			if (!rcd->rcvhdrtail_kvaddr)
 				goto bail_free;
-			rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
+			rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
 		}
 
 		rcd->rcvhdrq_size = amt;
@@ -1633,7 +1635,7 @@
 	 * before enabling any receive context
 	 */
 	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
-			dd->rcvhdrtail_dummy_physaddr);
+			dd->rcvhdrtail_dummy_dma);
 
 	return 0;
 
@@ -1644,7 +1646,7 @@
 	vfree(rcd->user_event_mask);
 	rcd->user_event_mask = NULL;
 	dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
-			  rcd->rcvhdrq_phys);
+			  rcd->rcvhdrq_dma);
 	rcd->rcvhdrq = NULL;
 bail:
 	return -ENOMEM;
@@ -1705,15 +1707,15 @@
 		rcd->egrbufs.buffers[idx].addr =
 			dma_zalloc_coherent(&dd->pcidev->dev,
 					    rcd->egrbufs.rcvtid_size,
-					    &rcd->egrbufs.buffers[idx].phys,
+					    &rcd->egrbufs.buffers[idx].dma,
 					    gfp_flags);
 		if (rcd->egrbufs.buffers[idx].addr) {
 			rcd->egrbufs.buffers[idx].len =
 				rcd->egrbufs.rcvtid_size;
 			rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr =
 				rcd->egrbufs.buffers[idx].addr;
-			rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys =
-				rcd->egrbufs.buffers[idx].phys;
+			rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma =
+				rcd->egrbufs.buffers[idx].dma;
 			rcd->egrbufs.alloced++;
 			alloced_bytes += rcd->egrbufs.rcvtid_size;
 			idx++;
@@ -1754,14 +1756,14 @@
 			for (i = 0, j = 0, offset = 0; j < idx; i++) {
 				if (i >= rcd->egrbufs.count)
 					break;
-				rcd->egrbufs.rcvtids[i].phys =
-					rcd->egrbufs.buffers[j].phys + offset;
+				rcd->egrbufs.rcvtids[i].dma =
+					rcd->egrbufs.buffers[j].dma + offset;
 				rcd->egrbufs.rcvtids[i].addr =
 					rcd->egrbufs.buffers[j].addr + offset;
 				rcd->egrbufs.alloced++;
-				if ((rcd->egrbufs.buffers[j].phys + offset +
+				if ((rcd->egrbufs.buffers[j].dma + offset +
 				     new_size) ==
-				    (rcd->egrbufs.buffers[j].phys +
+				    (rcd->egrbufs.buffers[j].dma +
 				     rcd->egrbufs.buffers[j].len)) {
 					j++;
 					offset = 0;
@@ -1813,7 +1815,7 @@
 
 	for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
 		hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
-			     rcd->egrbufs.rcvtids[idx].phys, order);
+			     rcd->egrbufs.rcvtids[idx].dma, order);
 		cond_resched();
 	}
 	goto bail;
@@ -1825,9 +1827,9 @@
 		dma_free_coherent(&dd->pcidev->dev,
 				  rcd->egrbufs.buffers[idx].len,
 				  rcd->egrbufs.buffers[idx].addr,
-				  rcd->egrbufs.buffers[idx].phys);
+				  rcd->egrbufs.buffers[idx].dma);
 		rcd->egrbufs.buffers[idx].addr = NULL;
-		rcd->egrbufs.buffers[idx].phys = 0;
+		rcd->egrbufs.buffers[idx].dma = 0;
 		rcd->egrbufs.buffers[idx].len = 0;
 	}
 bail:
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 1263abe..9487c9b 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1013,7 +1013,6 @@
 			 * offline.
 			 */
 			set_link_state(ppd, HLS_DN_OFFLINE);
-			tune_serdes(ppd);
 			start_link(ppd);
 		} else {
 			set_link_state(ppd, link_state);
@@ -1407,12 +1406,6 @@
 		if (key == okey)
 			continue;
 		/*
-		 * Don't update pkeys[2], if an HFI port without MgmtAllowed
-		 * by neighbor is a switch.
-		 */
-		if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
-			continue;
-		/*
 		 * The SM gives us the complete PKey table. We have
 		 * to ensure that we put the PKeys in the matching
 		 * slots.
@@ -1819,6 +1812,11 @@
 	u32 len = OPA_AM_CI_LEN(am) + 1;
 	int ret;
 
+	if (dd->pport->port_type != PORT_TYPE_QSFP) {
+		smp->status |= IB_SMP_INVALID_FIELD;
+		return reply((struct ib_mad_hdr *)smp);
+	}
+
 #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */
 #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1)
 #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK)
@@ -2599,7 +2597,7 @@
 	u8 lq, num_vls;
 	u8 res_lli, res_ler;
 	u64 port_mask;
-	unsigned long port_num;
+	u8 port_num;
 	unsigned long vl;
 	u32 vl_select_mask;
 	int vfi;
@@ -2633,9 +2631,9 @@
 	 */
 	port_mask = be64_to_cpu(req->port_select_mask[3]);
 	port_num = find_first_bit((unsigned long *)&port_mask,
-				  sizeof(port_mask));
+				  sizeof(port_mask) * 8);
 
-	if ((u8)port_num != port) {
+	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		return reply((struct ib_mad_hdr *)pmp);
 	}
@@ -2837,7 +2835,7 @@
 	 */
 	port_mask = be64_to_cpu(req->port_select_mask[3]);
 	port_num = find_first_bit((unsigned long *)&port_mask,
-				  sizeof(port_mask));
+				  sizeof(port_mask) * 8);
 
 	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3010,7 +3008,7 @@
 	 */
 	port_mask = be64_to_cpu(req->port_select_mask[3]);
 	port_num = find_first_bit((unsigned long *)&port_mask,
-				  sizeof(port_mask));
+				  sizeof(port_mask) * 8);
 
 	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3247,7 +3245,7 @@
 	 */
 	port_mask = be64_to_cpu(req->port_select_mask[3]);
 	port_num = find_first_bit((unsigned long *)&port_mask,
-				  sizeof(port_mask));
+				  sizeof(port_mask) * 8);
 
 	if (port_num != port) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3398,7 +3396,7 @@
 
 	spin_unlock(&ppd->cc_state_lock);
 
-	call_rcu(&old_cc_state->rcu, cc_state_reclaim);
+	kfree_rcu(old_cc_state, rcu);
 }
 
 static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
@@ -3553,13 +3551,6 @@
 	return reply((struct ib_mad_hdr *)smp);
 }
 
-void cc_state_reclaim(struct rcu_head *rcu)
-{
-	struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu);
-
-	kfree(cc_state);
-}
-
 static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
 				   struct ib_device *ibdev, u8 port,
 				   u32 *resp_len)
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index ac1bf4a..50a3a36 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -551,11 +551,11 @@
 }
 
 /*
- * Obtain the credit return addresses, kernel virtual and physical, for the
+ * Obtain the credit return addresses, kernel virtual and bus, for the
  * given sc.
  *
  * To understand this routine:
- * o va and pa are arrays of struct credit_return.  One for each physical
+ * o va and dma are arrays of struct credit_return.  One for each physical
  *   send context, per NUMA.
  * o Each send context always looks in its relative location in a struct
  *   credit_return for its credit return.
@@ -563,14 +563,14 @@
  *   with the same value.  Use the address of the first send context in the
  *   group.
  */
-static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa)
+static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma)
 {
 	u32 gc = group_context(sc->hw_context, sc->group);
 	u32 index = sc->hw_context & 0x7;
 
 	sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index];
-	*pa = (unsigned long)
-	       &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc];
+	*dma = (unsigned long)
+	       &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc];
 }
 
 /*
@@ -710,7 +710,7 @@
 {
 	struct send_context_info *sci;
 	struct send_context *sc = NULL;
-	dma_addr_t pa;
+	dma_addr_t dma;
 	unsigned long flags;
 	u64 reg;
 	u32 thresh;
@@ -763,7 +763,7 @@
 
 	sc->sw_index = sw_index;
 	sc->hw_context = hw_context;
-	cr_group_addresses(sc, &pa);
+	cr_group_addresses(sc, &dma);
 	sc->credits = sci->credits;
 
 /* PIO Send Memory Address details */
@@ -805,7 +805,7 @@
 			((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
 
 	/* set up credit return */
-	reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
+	reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
 	write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg);
 
 	/*
@@ -2064,7 +2064,7 @@
 		dd->cr_base[i].va = dma_zalloc_coherent(
 					&dd->pcidev->dev,
 					bytes,
-					&dd->cr_base[i].pa,
+					&dd->cr_base[i].dma,
 					GFP_KERNEL);
 		if (!dd->cr_base[i].va) {
 			set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2097,7 +2097,7 @@
 					  TXE_NUM_CONTEXTS *
 					  sizeof(struct credit_return),
 					  dd->cr_base[i].va,
-					  dd->cr_base[i].pa);
+					  dd->cr_base[i].dma);
 		}
 	}
 	kfree(dd->cr_base);
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 464cbd2..e709eaf 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -154,7 +154,7 @@
 /* NUMA indexed credit return array */
 struct credit_return_base {
 	struct credit_return *va;
-	dma_addr_t pa;
+	dma_addr_t dma;
 };
 
 /* send context configuration sizes (one per type) */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 8c25e1b..aa77736 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -165,9 +165,6 @@
 	preempt_enable();
 }
 
-/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
-#define USE_SHIFTS 1
-#ifdef USE_SHIFTS
 /*
  * Handle carry bytes using shifts and masks.
  *
@@ -187,37 +184,55 @@
 #define mshift(x) (8 * (x))
 
 /*
- * Read nbytes bytes from "from" and return them in the LSB bytes
- * of pbuf->carry.  Other bytes are zeroed.  Any previous value
- * pbuf->carry is lost.
+ * Jump copy - no-loop copy for < 8 bytes.
+ */
+static inline void jcopy(u8 *dest, const u8 *src, u32 n)
+{
+	switch (n) {
+	case 7:
+		*dest++ = *src++;
+		/* fall through */
+	case 6:
+		*dest++ = *src++;
+		/* fall through */
+	case 5:
+		*dest++ = *src++;
+		/* fall through */
+	case 4:
+		*dest++ = *src++;
+		/* fall through */
+	case 3:
+		*dest++ = *src++;
+		/* fall through */
+	case 2:
+		*dest++ = *src++;
+		/* fall through */
+	case 1:
+		*dest++ = *src++;
+		/* fall through */
+	}
+}
+
+/*
+ * Read nbytes from "from" and and place them in the low bytes
+ * of pbuf->carry.  Other bytes are left as-is.  Any previous
+ * value in pbuf->carry is lost.
  *
  * NOTES:
  * o do not read from from if nbytes is zero
- * o from may _not_ be u64 aligned
- * o nbytes must not span a QW boundary
+ * o from may _not_ be u64 aligned.
  */
 static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
 				  unsigned int nbytes)
 {
-	unsigned long off;
-
-	if (nbytes == 0) {
-		pbuf->carry.val64 = 0;
-	} else {
-		/* align our pointer */
-		off = (unsigned long)from & 0x7;
-		from = (void *)((unsigned long)from & ~0x7l);
-		pbuf->carry.val64 = ((*(u64 *)from)
-				<< zshift(nbytes + off))/* zero upper bytes */
-				>> zshift(nbytes);	/* place at bottom */
-	}
+	pbuf->carry.val64 = 0;
+	jcopy(&pbuf->carry.val8[0], from, nbytes);
 	pbuf->carry_bytes = nbytes;
 }
 
 /*
- * Read nbytes bytes from "from" and put them at the next significant bytes
- * of pbuf->carry.  Unused bytes are zeroed.  It is expected that the extra
- * read does not overfill carry.
+ * Read nbytes bytes from "from" and put them at the end of pbuf->carry.
+ * It is expected that the extra read does not overfill carry.
  *
  * NOTES:
  * o from may _not_ be u64 aligned
@@ -226,55 +241,8 @@
 static inline void read_extra_bytes(struct pio_buf *pbuf,
 				    const void *from, unsigned int nbytes)
 {
-	unsigned long off = (unsigned long)from & 0x7;
-	unsigned int room, xbytes;
-
-	/* align our pointer */
-	from = (void *)((unsigned long)from & ~0x7l);
-
-	/* check count first - don't read anything if count is zero */
-	while (nbytes) {
-		/* find the number of bytes in this u64 */
-		room = 8 - off;	/* this u64 has room for this many bytes */
-		xbytes = min(room, nbytes);
-
-		/*
-		 * shift down to zero lower bytes, shift up to zero upper
-		 * bytes, shift back down to move into place
-		 */
-		pbuf->carry.val64 |= (((*(u64 *)from)
-					>> mshift(off))
-					<< zshift(xbytes))
-					>> zshift(xbytes + pbuf->carry_bytes);
-		off = 0;
-		pbuf->carry_bytes += xbytes;
-		nbytes -= xbytes;
-		from += sizeof(u64);
-	}
-}
-
-/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
-	unsigned int remaining;
-
-	if (zbytes == 0)	/* nothing to do */
-		return;
-
-	remaining = pbuf->carry_bytes - zbytes;	/* remaining bytes */
-
-	/* NOTE: zshift only guaranteed to work if remaining != 0 */
-	if (remaining)
-		pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
-					>> zshift(remaining);
-	else
-		pbuf->carry.val64 = 0;
-	pbuf->carry_bytes = remaining;
+	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
+	pbuf->carry_bytes += nbytes;
 }
 
 /*
@@ -323,126 +291,6 @@
 	return 0;
 }
 
-#else /* USE_SHIFTS */
-/*
- * Handle carry bytes using byte copies.
- *
- * NOTE: the value the unused portion of carry is left uninitialized.
- */
-
-/*
- * Jump copy - no-loop copy for < 8 bytes.
- */
-static inline void jcopy(u8 *dest, const u8 *src, u32 n)
-{
-	switch (n) {
-	case 7:
-		*dest++ = *src++;
-	case 6:
-		*dest++ = *src++;
-	case 5:
-		*dest++ = *src++;
-	case 4:
-		*dest++ = *src++;
-	case 3:
-		*dest++ = *src++;
-	case 2:
-		*dest++ = *src++;
-	case 1:
-		*dest++ = *src++;
-	}
-}
-
-/*
- * Read nbytes from "from" and and place them in the low bytes
- * of pbuf->carry.  Other bytes are left as-is.  Any previous
- * value in pbuf->carry is lost.
- *
- * NOTES:
- * o do not read from from if nbytes is zero
- * o from may _not_ be u64 aligned.
- */
-static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
-				  unsigned int nbytes)
-{
-	jcopy(&pbuf->carry.val8[0], from, nbytes);
-	pbuf->carry_bytes = nbytes;
-}
-
-/*
- * Read nbytes bytes from "from" and put them at the end of pbuf->carry.
- * It is expected that the extra read does not overfill carry.
- *
- * NOTES:
- * o from may _not_ be u64 aligned
- * o nbytes may span a QW boundary
- */
-static inline void read_extra_bytes(struct pio_buf *pbuf,
-				    const void *from, unsigned int nbytes)
-{
-	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], from, nbytes);
-	pbuf->carry_bytes += nbytes;
-}
-
-/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * We do not care about the value of unused bytes in carry, so just
- * reduce the byte count.
- *
- * NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
-	pbuf->carry_bytes -= zbytes;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the low bytes of
- * pbuf->carry.
- */
-static inline void merge_write8(
-	struct pio_buf *pbuf,
-	void *dest,
-	const void *src)
-{
-	u32 remainder = 8 - pbuf->carry_bytes;
-
-	jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
-	writeq(pbuf->carry.val64, dest);
-	jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
-}
-
-/*
- * Write a quad word using all bytes of carry.
- */
-static inline void carry8_write8(union mix carry, void *dest)
-{
-	writeq(carry.val64, dest);
-}
-
-/*
- * Write a quad word using all the valid bytes of carry.  If carry
- * has zero valid bytes, nothing is written.
- * Returns 0 on nothing written, non-zero on quad word written.
- */
-static inline int carry_write8(struct pio_buf *pbuf, void *dest)
-{
-	if (pbuf->carry_bytes) {
-		u64 zero = 0;
-
-		jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
-		      8 - pbuf->carry_bytes);
-		writeq(pbuf->carry.val64, dest);
-		return 1;
-	}
-
-	return 0;
-}
-#endif /* USE_SHIFTS */
-
 /*
  * Segmented PIO Copy - start
  *
@@ -550,8 +398,8 @@
 {
 	void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
 	void __iomem *dend;			/* 8-byte data end */
-	unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
-	unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
+	unsigned long qw_to_write = nbytes >> 3;
+	unsigned long bytes_left = nbytes & 0x7;
 
 	/* calculate 8-byte data end */
 	dend = dest + (qw_to_write * sizeof(u64));
@@ -621,16 +469,46 @@
 		dest += sizeof(u64);
 	}
 
-	/* adjust carry */
-	if (pbuf->carry_bytes < bytes_left) {
-		/* need to read more */
-		read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
-	} else {
-		/* remove invalid bytes */
-		zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
-	}
-
 	pbuf->qw_written += qw_to_write;
+
+	/* handle carry and left-over bytes */
+	if (pbuf->carry_bytes + bytes_left >= 8) {
+		unsigned long nread;
+
+		/* there is enough to fill another qw - fill carry */
+		nread = 8 - pbuf->carry_bytes;
+		read_extra_bytes(pbuf, from, nread);
+
+		/*
+		 * One more write - but need to make sure dest is correct.
+		 * Check for wrap and the possibility the write
+		 * should be in SOP space.
+		 *
+		 * The two checks immediately below cannot both be true, hence
+		 * the else. If we have wrapped, we cannot still be within the
+		 * first block. Conversely, if we are still in the first block,
+		 * we cannot have wrapped. We do the wrap check first as that
+		 * is more likely.
+		 */
+		/* adjust if we have wrapped */
+		if (dest >= pbuf->end)
+			dest -= pbuf->size;
+		/* jump to the SOP range if within the first block */
+		else if (pbuf->qw_written < PIO_BLOCK_QWS)
+			dest += SOP_DISTANCE;
+
+		/* flush out full carry */
+		carry8_write8(pbuf->carry, dest);
+		pbuf->qw_written++;
+
+		/* now adjust and read the rest of the bytes into carry */
+		bytes_left -= nread;
+		from += nread; /* from is now not aligned */
+		read_low_bytes(pbuf, from, bytes_left);
+	} else {
+		/* not enough to fill another qw, append the rest to carry */
+		read_extra_bytes(pbuf, from, bytes_left);
+	}
 }
 
 /*
@@ -771,6 +649,9 @@
 			read_extra_bytes(pbuf, from, to_fill);
 			from += to_fill;
 			nbytes -= to_fill;
+			/* may not be enough valid bytes left to align */
+			if (extra > nbytes)
+				extra = nbytes;
 
 			/* ...now write carry */
 			dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
@@ -798,6 +679,15 @@
 			read_low_bytes(pbuf, from, extra);
 			from += extra;
 			nbytes -= extra;
+			/*
+			 * If no bytes are left, return early - we are done.
+			 * NOTE: This short-circuit is *required* because
+			 * "extra" may have been reduced in size and "from"
+			 * is not aligned, as required when leaving this
+			 * if block.
+			 */
+			if (nbytes == 0)
+				return;
 		}
 
 		/* at this point, from is QW aligned */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 965c8ae..2024331 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -47,29 +47,39 @@
 
 #include "hfi.h"
 #include "efivar.h"
+#include "eprom.h"
 
 void get_platform_config(struct hfi1_devdata *dd)
 {
 	int ret = 0;
 	unsigned long size = 0;
 	u8 *temp_platform_config = NULL;
+	u32 esize;
+
+	ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
+					 &esize);
+	if (!ret) {
+		/* success */
+		size = esize;
+		goto success;
+	}
+	/* fail, try EFI variable */
 
 	ret = read_hfi1_efi_var(dd, "configuration", &size,
 				(void **)&temp_platform_config);
-	if (ret) {
-		dd_dev_info(dd,
-			    "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
-			    __func__);
-		/* fall back to request firmware */
-		platform_config_load = 1;
-		goto bail;
-	}
+	if (!ret)
+		goto success;
 
+	dd_dev_info(dd,
+		    "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+		    __func__);
+	/* fall back to request firmware */
+	platform_config_load = 1;
+	return;
+
+success:
 	dd->platform_config.data = temp_platform_config;
 	dd->platform_config.size = size;
-
-bail:
-	/* exit */;
 }
 
 void free_platform_config(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index a5aa351..9fc75e7 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -202,8 +202,7 @@
 	write_seqlock_irqsave(&dev->iowait_lock, flags);
 	if (!list_empty(&priv->s_iowait.list)) {
 		list_del_init(&priv->s_iowait.list);
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	}
 	write_sequnlock_irqrestore(&dev->iowait_lock, flags);
 }
@@ -450,13 +449,14 @@
  */
 void hfi1_schedule_send(struct rvt_qp *qp)
 {
+	lockdep_assert_held(&qp->s_lock);
 	if (hfi1_send_ok(qp))
 		_hfi1_schedule_send(qp);
 }
 
 /**
- * hfi1_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
+ * hfi1_get_credit - handle credit in aeth
+ * @qp: the qp
  * @aeth: the Acknowledge Extended Transport Header
  *
  * The QP s_lock should be held.
@@ -465,6 +465,7 @@
 {
 	u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
 
+	lockdep_assert_held(&qp->s_lock);
 	/*
 	 * If the credit is invalid, we can send
 	 * as many packets as we like.  Otherwise, we have to
@@ -503,8 +504,7 @@
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	/* Notify hfi1_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rvt_put_qp(qp);
 }
 
 static int iowait_sleep(
@@ -544,7 +544,7 @@
 			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
 			list_add_tail(&priv->s_iowait.list, &sde->dmawait);
 			trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
-			atomic_inc(&qp->refcount);
+			rvt_get_qp(qp);
 		}
 		write_sequnlock(&dev->iowait_lock);
 		qp->s_flags &= ~RVT_S_BUSY;
@@ -656,10 +656,6 @@
 
 	iter->dev = dev;
 	iter->specials = dev->rdi.ibdev.phys_port_cnt * 2;
-	if (qp_iter_next(iter)) {
-		kfree(iter);
-		return NULL;
-	}
 
 	return iter;
 }
@@ -812,6 +808,13 @@
 		kfree(priv);
 		return ERR_PTR(-ENOMEM);
 	}
+	iowait_init(
+		&priv->s_iowait,
+		1,
+		_hfi1_do_send,
+		iowait_sleep,
+		iowait_wakeup,
+		iowait_sdma_drained);
 	setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
 	qp->s_timer.function = hfi1_rc_timeout;
 	return priv;
@@ -852,6 +855,7 @@
 
 void flush_qp_waiters(struct rvt_qp *qp)
 {
+	lockdep_assert_held(&qp->s_lock);
 	flush_iowait(qp);
 	hfi1_stop_rc_timers(qp);
 }
@@ -877,13 +881,6 @@
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
-	iowait_init(
-		&priv->s_iowait,
-		1,
-		_hfi1_do_send,
-		iowait_sleep,
-		iowait_wakeup,
-		iowait_sdma_drained);
 	priv->r_adefered = 0;
 	clear_ahg(qp);
 }
@@ -967,8 +964,7 @@
 	if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
 		qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
 		list_del_init(&priv->s_iowait.list);
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	}
 	write_sequnlock(&dev->iowait_lock);
 
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index a207717..1869f63 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -161,7 +161,7 @@
 	bus->algo.getsda = hfi1_getsda;
 	bus->algo.getscl = hfi1_getscl;
 	bus->algo.udelay = 5;
-	bus->algo.timeout = usecs_to_jiffies(50);
+	bus->algo.timeout = usecs_to_jiffies(100000);
 	bus->algo.data = bus;
 
 	bus->adapter.owner = THIS_MODULE;
@@ -706,8 +706,8 @@
 		   u8 *data)
 {
 	struct hfi1_pportdata *ppd;
-	u32 excess_len = 0;
-	int ret = 0;
+	u32 excess_len = len;
+	int ret = 0, offset = 0;
 
 	if (port_num > dd->num_pports || port_num < 1) {
 		dd_dev_info(dd, "%s: Invalid port number %d\n",
@@ -740,6 +740,34 @@
 	}
 
 	memcpy(data, &ppd->qsfp_info.cache[addr], len);
+
+	if (addr <= QSFP_MONITOR_VAL_END &&
+	    (addr + len) >= QSFP_MONITOR_VAL_START) {
+		/* Overlap with the dynamic channel monitor range */
+		if (addr < QSFP_MONITOR_VAL_START) {
+			if (addr + len <= QSFP_MONITOR_VAL_END)
+				len = addr + len - QSFP_MONITOR_VAL_START;
+			else
+				len = QSFP_MONITOR_RANGE;
+			offset = QSFP_MONITOR_VAL_START - addr;
+			addr = QSFP_MONITOR_VAL_START;
+		} else if (addr == QSFP_MONITOR_VAL_START) {
+			offset = 0;
+			if (addr + len > QSFP_MONITOR_VAL_END)
+				len = QSFP_MONITOR_RANGE;
+		} else {
+			offset = 0;
+			if (addr + len > QSFP_MONITOR_VAL_END)
+				len = QSFP_MONITOR_VAL_END - addr + 1;
+		}
+		/* Refresh the values of the dynamic monitors from the cable */
+		ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len);
+		if (ret != len) {
+			ret = -EAGAIN;
+			goto set_zeroes;
+		}
+	}
+
 	return 0;
 
 set_zeroes:
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index 69275eb..36cf5235 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -74,6 +74,9 @@
 /* Defined fields that Intel requires of qualified cables */
 /* Byte 0 is Identifier, not checked */
 /* Byte 1 is reserved "status MSB" */
+#define QSFP_MONITOR_VAL_START 22
+#define QSFP_MONITOR_VAL_END 81
+#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1)
 #define QSFP_TX_CTRL_BYTE_OFFS 86
 #define QSFP_PWR_CTRL_BYTE_OFFS 93
 #define QSFP_CDR_CTRL_BYTE_OFFS 98
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 5da190e..8bc5013 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -55,7 +55,7 @@
 #include "trace.h"
 
 /* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
+#define OP(x) RC_OP(x)
 
 /**
  * hfi1_add_retry_timer - add/start a retry timer
@@ -68,6 +68,7 @@
 	struct ib_qp *ibqp = &qp->ibqp;
 	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
 
+	lockdep_assert_held(&qp->s_lock);
 	qp->s_flags |= RVT_S_TIMER;
 	/* 4.096 usec. * (1 << qp->timeout) */
 	qp->s_timer.expires = jiffies + qp->timeout_jiffies +
@@ -86,6 +87,7 @@
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
+	lockdep_assert_held(&qp->s_lock);
 	qp->s_flags |= RVT_S_WAIT_RNR;
 	qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
 	add_timer(&priv->s_rnr_timer);
@@ -103,6 +105,7 @@
 	struct ib_qp *ibqp = &qp->ibqp;
 	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
 
+	lockdep_assert_held(&qp->s_lock);
 	qp->s_flags |= RVT_S_TIMER;
 	/* 4.096 usec. * (1 << qp->timeout) */
 	mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
@@ -120,6 +123,7 @@
 {
 	int rval = 0;
 
+	lockdep_assert_held(&qp->s_lock);
 	/* Remove QP from retry */
 	if (qp->s_flags & RVT_S_TIMER) {
 		qp->s_flags &= ~RVT_S_TIMER;
@@ -138,6 +142,7 @@
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
+	lockdep_assert_held(&qp->s_lock);
 	/* Remove QP from all timers */
 	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
 		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
@@ -158,6 +163,7 @@
 	int rval = 0;
 	struct hfi1_qp_priv *priv = qp->priv;
 
+	lockdep_assert_held(&qp->s_lock);
 	/* Remove QP from rnr timer */
 	if (qp->s_flags & RVT_S_WAIT_RNR) {
 		qp->s_flags &= ~RVT_S_WAIT_RNR;
@@ -178,18 +184,6 @@
 	del_timer_sync(&priv->s_rnr_timer);
 }
 
-/* only opcode mask for adaptive pio */
-const u32 rc_only_opcode =
-	BIT(OP(SEND_ONLY) & 0x1f) |
-	BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
-	BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
-	BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
-	BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
-	BIT(OP(ACKNOWLEDGE & 0x1f)) |
-	BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
-	BIT(OP(COMPARE_SWAP & 0x1f)) |
-	BIT(OP(FETCH_ADD & 0x1f));
-
 static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 		       u32 psn, u32 pmtu)
 {
@@ -216,7 +210,7 @@
  * Note the QP s_lock must be held.
  */
 static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
-		       struct hfi1_other_headers *ohdr,
+		       struct ib_other_headers *ohdr,
 		       struct hfi1_pkt_state *ps)
 {
 	struct rvt_ack_entry *e;
@@ -228,6 +222,7 @@
 	u32 pmtu = qp->pmtu;
 	struct hfi1_qp_priv *priv = qp->priv;
 
+	lockdep_assert_held(&qp->s_lock);
 	/* Don't send an ACK if we aren't supposed to. */
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto bail;
@@ -299,10 +294,7 @@
 			len = 0;
 			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
 			ohdr->u.at.aeth = hfi1_compute_aeth(qp);
-			ohdr->u.at.atomic_ack_eth[0] =
-				cpu_to_be32(e->atomic_data >> 32);
-			ohdr->u.at.atomic_ack_eth[1] =
-				cpu_to_be32(e->atomic_data);
+			ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
 			hwords += sizeof(ohdr->u.at) / sizeof(u32);
 			bth2 = mask_psn(e->psn);
 			e->sent = 1;
@@ -390,7 +382,7 @@
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 	struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
-	struct hfi1_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct rvt_sge_state *ss;
 	struct rvt_swqe *wqe;
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -403,6 +395,7 @@
 	int middle = 0;
 	int delta;
 
+	lockdep_assert_held(&qp->s_lock);
 	ps->s_txreq = get_txreq(ps->dev, qp);
 	if (IS_ERR(ps->s_txreq))
 		goto bail_no_tx;
@@ -566,8 +559,9 @@
 				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 			}
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->rdma_wr.remote_addr);
+			put_ib_reth_vaddr(
+				wqe->rdma_wr.remote_addr,
+				&ohdr->u.rc.reth);
 			ohdr->u.rc.reth.rkey =
 				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -608,8 +602,9 @@
 				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
 			}
-			ohdr->u.rc.reth.vaddr =
-				cpu_to_be64(wqe->rdma_wr.remote_addr);
+			put_ib_reth_vaddr(
+				wqe->rdma_wr.remote_addr,
+				&ohdr->u.rc.reth);
 			ohdr->u.rc.reth.rkey =
 				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -640,20 +635,18 @@
 			}
 			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
-				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->atomic_wr.swap);
-				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-					wqe->atomic_wr.compare_add);
+				put_ib_ateth_swap(wqe->atomic_wr.swap,
+						  &ohdr->u.atomic_eth);
+				put_ib_ateth_compare(wqe->atomic_wr.compare_add,
+						     &ohdr->u.atomic_eth);
 			} else {
 				qp->s_state = OP(FETCH_ADD);
-				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->atomic_wr.compare_add);
-				ohdr->u.atomic_eth.compare_data = 0;
+				put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+						  &ohdr->u.atomic_eth);
+				put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
 			}
-			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-				wqe->atomic_wr.remote_addr >> 32);
-			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-				wqe->atomic_wr.remote_addr);
+			put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+					   &ohdr->u.atomic_eth);
 			ohdr->u.atomic_eth.rkey = cpu_to_be32(
 				wqe->atomic_wr.rkey);
 			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -779,8 +772,9 @@
 		 * See restart_rc().
 		 */
 		len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
-		ohdr->u.rc.reth.vaddr =
-			cpu_to_be64(wqe->rdma_wr.remote_addr + len);
+		put_ib_reth_vaddr(
+			wqe->rdma_wr.remote_addr + len,
+			&ohdr->u.rc.reth);
 		ohdr->u.rc.reth.rkey =
 			cpu_to_be32(wqe->rdma_wr.rkey);
 		ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
@@ -841,7 +835,7 @@
  *
  * This is called from hfi1_rc_rcv() and handle_receive_interrupt().
  * Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
+ * send side QP state and send engine.
  */
 void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
 		      int is_fecn)
@@ -856,8 +850,8 @@
 	u32 vl, plen;
 	struct send_context *sc;
 	struct pio_buf *pbuf;
-	struct hfi1_ib_header hdr;
-	struct hfi1_other_headers *ohdr;
+	struct ib_header hdr;
+	struct ib_other_headers *ohdr;
 	unsigned long flags;
 
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
@@ -917,7 +911,7 @@
 	if (!pbuf) {
 		/*
 		 * We have no room to send at the moment.  Pass
-		 * responsibility for sending the ACK to the send tasklet
+		 * responsibility for sending the ACK to the send engine
 		 * so that when enough buffer space becomes available,
 		 * the ACK is sent ahead of other outgoing packets.
 		 */
@@ -932,16 +926,19 @@
 	return;
 
 queue_ack:
-	this_cpu_inc(*ibp->rvp.rc_qacks);
 	spin_lock_irqsave(&qp->s_lock, flags);
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+		goto unlock;
+	this_cpu_inc(*ibp->rvp.rc_qacks);
 	qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 	qp->s_nak_state = qp->r_nak_state;
 	qp->s_ack_psn = qp->r_ack_psn;
 	if (is_fecn)
 		qp->s_flags |= RVT_S_ECN;
 
-	/* Schedule the send tasklet. */
+	/* Schedule the send engine. */
 	hfi1_schedule_send(qp);
+unlock:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 
@@ -960,6 +957,7 @@
 	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 	u32 opcode;
 
+	lockdep_assert_held(&qp->s_lock);
 	qp->s_cur = n;
 
 	/*
@@ -1027,7 +1025,7 @@
 	qp->s_psn = psn;
 	/*
 	 * Set RVT_S_WAIT_PSN as rc_complete() may start the timer
-	 * asynchronously before the send tasklet can get scheduled.
+	 * asynchronously before the send engine can get scheduled.
 	 * Doing it in hfi1_make_rc_req() is too late.
 	 */
 	if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
@@ -1045,6 +1043,8 @@
 	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	struct hfi1_ibport *ibp;
 
+	lockdep_assert_held(&qp->r_lock);
+	lockdep_assert_held(&qp->s_lock);
 	if (qp->s_retry == 0) {
 		if (qp->s_mig_state == IB_MIG_ARMED) {
 			hfi1_migrate_qp(qp);
@@ -1121,6 +1121,7 @@
 	struct rvt_swqe *wqe;
 	u32 n = qp->s_last;
 
+	lockdep_assert_held(&qp->s_lock);
 	/* Find the work request corresponding to the given PSN. */
 	for (;;) {
 		wqe = rvt_get_swqe_ptr(qp, n);
@@ -1141,15 +1142,16 @@
 /*
  * This should be called with the QP s_lock held and interrupts disabled.
  */
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
 {
-	struct hfi1_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct rvt_swqe *wqe;
 	struct ib_wc wc;
 	unsigned i;
 	u32 opcode;
 	u32 psn;
 
+	lockdep_assert_held(&qp->s_lock);
 	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 
@@ -1241,6 +1243,7 @@
 	struct ib_wc wc;
 	unsigned i;
 
+	lockdep_assert_held(&qp->s_lock);
 	/*
 	 * Don't decrement refcount and don't generate a
 	 * completion if the SWQE is being resent until the send
@@ -1340,6 +1343,7 @@
 	int diff;
 	unsigned long to;
 
+	lockdep_assert_held(&qp->s_lock);
 	/*
 	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
 	 * requests and implicitly NAK RDMA read and atomic requests issued
@@ -1389,7 +1393,7 @@
 				restart_rc(qp, qp->s_last_psn + 1, 0);
 				if (list_empty(&qp->rspwait)) {
 					qp->r_flags |= RVT_R_RSP_SEND;
-					atomic_inc(&qp->refcount);
+					rvt_get_qp(qp);
 					list_add_tail(&qp->rspwait,
 						      &rcd->qp_wait_list);
 				}
@@ -1555,6 +1559,7 @@
 {
 	struct rvt_swqe *wqe;
 
+	lockdep_assert_held(&qp->s_lock);
 	/* Remove QP from retry timer */
 	hfi1_stop_rc_timers(qp);
 
@@ -1573,7 +1578,7 @@
 	restart_rc(qp, qp->s_last_psn + 1, 0);
 	if (list_empty(&qp->rspwait)) {
 		qp->r_flags |= RVT_R_RSP_SEND;
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 }
@@ -1595,7 +1600,7 @@
  * Called at interrupt level.
  */
 static void rc_rcv_resp(struct hfi1_ibport *ibp,
-			struct hfi1_other_headers *ohdr,
+			struct ib_other_headers *ohdr,
 			void *data, u32 tlen, struct rvt_qp *qp,
 			u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
 			struct hfi1_ctxtdata *rcd)
@@ -1649,14 +1654,10 @@
 	case OP(ATOMIC_ACKNOWLEDGE):
 	case OP(RDMA_READ_RESPONSE_FIRST):
 		aeth = be32_to_cpu(ohdr->u.aeth);
-		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-			__be32 *p = ohdr->u.at.atomic_ack_eth;
-
-			val = ((u64)be32_to_cpu(p[0]) << 32) |
-				be32_to_cpu(p[1]);
-		} else {
+		if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+			val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+		else
 			val = 0;
-		}
 		if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
@@ -1782,7 +1783,7 @@
 {
 	if (list_empty(&qp->rspwait)) {
 		qp->r_flags |= RVT_R_RSP_NAK;
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 }
@@ -1796,8 +1797,7 @@
 		return;
 	list_del_init(&qp->rspwait);
 	qp->r_flags &= ~RVT_R_RSP_NAK;
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rvt_put_qp(qp);
 }
 
 /**
@@ -1815,7 +1815,7 @@
  * Return 1 if no more processing is needed; otherwise return 0 to
  * schedule a response to be sent.
  */
-static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
+static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
 				 struct rvt_qp *qp, u32 opcode, u32 psn,
 				 int diff, struct hfi1_ctxtdata *rcd)
 {
@@ -1923,7 +1923,7 @@
 		}
 		if (len != 0) {
 			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
+			u64 vaddr = get_ib_reth_vaddr(reth);
 			int ok;
 
 			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
@@ -1946,7 +1946,7 @@
 	case OP(FETCH_ADD): {
 		/*
 		 * If we didn't find the atomic request in the ack queue
-		 * or the send tasklet is already backed up to send an
+		 * or the send engine is already backed up to send an
 		 * earlier entry, we can ignore this request.
 		 */
 		if (!e || e->opcode != (u8)opcode || old_req)
@@ -2123,13 +2123,13 @@
 void hfi1_rc_rcv(struct hfi1_packet *packet)
 {
 	struct hfi1_ctxtdata *rcd = packet->rcd;
-	struct hfi1_ib_header *hdr = packet->hdr;
+	struct ib_header *hdr = packet->hdr;
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
 	struct rvt_qp *qp = packet->qp;
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	struct hfi1_other_headers *ohdr = packet->ohdr;
+	struct ib_other_headers *ohdr = packet->ohdr;
 	u32 bth0, opcode;
 	u32 hdrsize = packet->hlen;
 	u32 psn;
@@ -2143,6 +2143,7 @@
 	int copy_last = 0;
 	u32 rkey;
 
+	lockdep_assert_held(&qp->r_lock);
 	bth0 = be32_to_cpu(ohdr->bth[0]);
 	if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
 		return;
@@ -2342,7 +2343,7 @@
 		qp->r_sge.sg_list = NULL;
 		if (qp->r_len != 0) {
 			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
+			u64 vaddr = get_ib_reth_vaddr(reth);
 			int ok;
 
 			/* Check rkey & NAK */
@@ -2397,7 +2398,7 @@
 		len = be32_to_cpu(reth->length);
 		if (len) {
 			u32 rkey = be32_to_cpu(reth->rkey);
-			u64 vaddr = be64_to_cpu(reth->vaddr);
+			u64 vaddr = get_ib_reth_vaddr(reth);
 			int ok;
 
 			/* Check rkey & NAK */
@@ -2432,7 +2433,7 @@
 		qp->r_nak_state = 0;
 		qp->r_head_ack_queue = next;
 
-		/* Schedule the send tasklet. */
+		/* Schedule the send engine. */
 		qp->s_flags |= RVT_S_RESP_PENDING;
 		hfi1_schedule_send(qp);
 
@@ -2469,8 +2470,7 @@
 			e->rdma_sge.mr = NULL;
 		}
 		ateth = &ohdr->u.atomic_eth;
-		vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
-			be32_to_cpu(ateth->vaddr[1]);
+		vaddr = get_ib_ateth_vaddr(ateth);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
@@ -2481,11 +2481,11 @@
 			goto nack_acc_unlck;
 		/* Perform atomic OP and save result. */
 		maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
-		sdata = be64_to_cpu(ateth->swap_data);
+		sdata = get_ib_ateth_swap(ateth);
 		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
 			(u64)atomic64_add_return(sdata, maddr) - sdata :
 			(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
-				      be64_to_cpu(ateth->compare_data),
+				      get_ib_ateth_compare(ateth),
 				      sdata);
 		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
@@ -2499,7 +2499,7 @@
 		qp->r_nak_state = 0;
 		qp->r_head_ack_queue = next;
 
-		/* Schedule the send tasklet. */
+		/* Schedule the send engine. */
 		qp->s_flags |= RVT_S_RESP_PENDING;
 		hfi1_schedule_send(qp);
 
@@ -2575,12 +2575,12 @@
 
 void hfi1_rc_hdrerr(
 	struct hfi1_ctxtdata *rcd,
-	struct hfi1_ib_header *hdr,
+	struct ib_header *hdr,
 	u32 rcv_flags,
 	struct rvt_qp *qp)
 {
 	int has_grh = rcv_flags & HFI1_HAS_GRH;
-	struct hfi1_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	int diff;
 	u32 opcode;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 48d5094..a1576ae 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -262,7 +262,7 @@
  *
  * The s_lock will be acquired around the hfi1_migrate_qp() call.
  */
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
 		       int has_grh, struct rvt_qp *qp, u32 bth0)
 {
 	__be64 guid;
@@ -352,7 +352,7 @@
  *
  * This is called from hfi1_do_send() to
  * forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the tasklet, we still
+ * Note that although we are single threaded due to the send engine, we still
  * have to protect against post_send().  We don't have to worry about
  * receive interrupts since this is a connected protocol and all packets
  * will pass through here.
@@ -765,7 +765,7 @@
 	}
 }
 
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
 			  u32 bth0, u32 bth2, int middle,
 			  struct hfi1_pkt_state *ps)
 {
@@ -846,7 +846,7 @@
  * @work: contains a pointer to the QP
  *
  * Process entries in the send work queue until credit or queue is
- * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
+ * exhausted.  Only allow one CPU to send a packet per QP.
  * Otherwise, two threads could send packets out of order.
  */
 void hfi1_do_send(struct rvt_qp *qp)
@@ -909,7 +909,7 @@
 			spin_unlock_irqrestore(&qp->s_lock, ps.flags);
 			/*
 			 * If the packet cannot be sent now, return and
-			 * the send tasklet will be woken up later.
+			 * the send engine will be woken up later.
 			 */
 			if (hfi1_verbs_send(qp, &ps))
 				return;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index f9befc0..fd39bca 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -726,6 +726,34 @@
 }
 
 /**
+ * sdma_engine_get_vl() - return vl for a given sdma engine
+ * @sde: sdma engine
+ *
+ * This function returns the vl mapped to a given engine, or an error if
+ * the mapping can't be found. The mapping fields are protected by RCU.
+ */
+int sdma_engine_get_vl(struct sdma_engine *sde)
+{
+	struct hfi1_devdata *dd = sde->dd;
+	struct sdma_vl_map *m;
+	u8 vl;
+
+	if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
+		return -EINVAL;
+
+	rcu_read_lock();
+	m = rcu_dereference(dd->sdma_map);
+	if (unlikely(!m)) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	vl = m->engine_to_vl[sde->this_idx];
+	rcu_read_unlock();
+
+	return vl;
+}
+
+/**
  * sdma_select_engine_vl() - select sdma engine
  * @dd: devdata
  * @selector: a spreading factor
@@ -788,6 +816,326 @@
 	return sdma_select_engine_vl(dd, selector, vl);
 }
 
+struct sdma_rht_map_elem {
+	u32 mask;
+	u8 ctr;
+	struct sdma_engine *sde[0];
+};
+
+struct sdma_rht_node {
+	unsigned long cpu_id;
+	struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
+	struct rhash_head node;
+};
+
+#define NR_CPUS_HINT 192
+
+static const struct rhashtable_params sdma_rht_params = {
+	.nelem_hint = NR_CPUS_HINT,
+	.head_offset = offsetof(struct sdma_rht_node, node),
+	.key_offset = offsetof(struct sdma_rht_node, cpu_id),
+	.key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+	.max_size = NR_CPUS,
+	.min_size = 8,
+	.automatic_shrinking = true,
+};
+
+/*
+ * sdma_select_user_engine() - select sdma engine based on user setup
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns an sdma engine for a user sdma request.
+ * User defined sdma engine affinity setting is honored when applicable,
+ * otherwise system default sdma engine mapping is used. To ensure correct
+ * ordering, the mapping from <selector, vl> to sde must remain unchanged.
+ */
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+					    u32 selector, u8 vl)
+{
+	struct sdma_rht_node *rht_node;
+	struct sdma_engine *sde = NULL;
+	const struct cpumask *current_mask = tsk_cpus_allowed(current);
+	unsigned long cpu_id;
+
+	/*
+	 * To ensure that always the same sdma engine(s) will be
+	 * selected make sure the process is pinned to this CPU only.
+	 */
+	if (cpumask_weight(current_mask) != 1)
+		goto out;
+
+	cpu_id = smp_processor_id();
+	rcu_read_lock();
+	rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+					  sdma_rht_params);
+
+	if (rht_node && rht_node->map[vl]) {
+		struct sdma_rht_map_elem *map = rht_node->map[vl];
+
+		sde = map->sde[selector & map->mask];
+	}
+	rcu_read_unlock();
+
+	if (sde)
+		return sde;
+
+out:
+	return sdma_select_engine_vl(dd, selector, vl);
+}
+
+static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
+{
+	int i;
+
+	for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
+		map->sde[map->ctr + i] = map->sde[i];
+}
+
+static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
+				 struct sdma_engine *sde)
+{
+	unsigned int i, pow;
+
+	/* only need to check the first ctr entries for a match */
+	for (i = 0; i < map->ctr; i++) {
+		if (map->sde[i] == sde) {
+			memmove(&map->sde[i], &map->sde[i + 1],
+				(map->ctr - i - 1) * sizeof(map->sde[0]));
+			map->ctr--;
+			pow = roundup_pow_of_two(map->ctr ? : 1);
+			map->mask = pow - 1;
+			sdma_populate_sde_map(map);
+			break;
+		}
+	}
+}
+
+/*
+ * Prevents concurrent reads and writes of the sdma engine cpu_mask
+ */
+static DEFINE_MUTEX(process_to_sde_mutex);
+
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+				size_t count)
+{
+	struct hfi1_devdata *dd = sde->dd;
+	cpumask_var_t mask, new_mask;
+	unsigned long cpu;
+	int ret, vl, sz;
+
+	vl = sdma_engine_get_vl(sde);
+	if (unlikely(vl < 0))
+		return -EINVAL;
+
+	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+	if (!ret)
+		return -ENOMEM;
+
+	ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
+	if (!ret) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+	ret = cpulist_parse(buf, mask);
+	if (ret)
+		goto out_free;
+
+	if (!cpumask_subset(mask, cpu_online_mask)) {
+		dd_dev_warn(sde->dd, "Invalid CPU mask\n");
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	sz = sizeof(struct sdma_rht_map_elem) +
+			(TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
+
+	mutex_lock(&process_to_sde_mutex);
+
+	for_each_cpu(cpu, mask) {
+		struct sdma_rht_node *rht_node;
+
+		/* Check if we have this already mapped */
+		if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
+			cpumask_set_cpu(cpu, new_mask);
+			continue;
+		}
+
+		rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+						  sdma_rht_params);
+		if (!rht_node) {
+			rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
+			if (!rht_node) {
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+			if (!rht_node->map[vl]) {
+				kfree(rht_node);
+				ret = -ENOMEM;
+				goto out;
+			}
+			rht_node->cpu_id = cpu;
+			rht_node->map[vl]->mask = 0;
+			rht_node->map[vl]->ctr = 1;
+			rht_node->map[vl]->sde[0] = sde;
+
+			ret = rhashtable_insert_fast(&dd->sdma_rht,
+						     &rht_node->node,
+						     sdma_rht_params);
+			if (ret) {
+				kfree(rht_node->map[vl]);
+				kfree(rht_node);
+				dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
+					   cpu);
+				goto out;
+			}
+
+		} else {
+			int ctr, pow;
+
+			/* Add new user mappings */
+			if (!rht_node->map[vl])
+				rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+
+			if (!rht_node->map[vl]) {
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			rht_node->map[vl]->ctr++;
+			ctr = rht_node->map[vl]->ctr;
+			rht_node->map[vl]->sde[ctr - 1] = sde;
+			pow = roundup_pow_of_two(ctr);
+			rht_node->map[vl]->mask = pow - 1;
+
+			/* Populate the sde map table */
+			sdma_populate_sde_map(rht_node->map[vl]);
+		}
+		cpumask_set_cpu(cpu, new_mask);
+	}
+
+	/* Clean up old mappings */
+	for_each_cpu(cpu, cpu_online_mask) {
+		struct sdma_rht_node *rht_node;
+
+		/* Don't cleanup sdes that are set in the new mask */
+		if (cpumask_test_cpu(cpu, mask))
+			continue;
+
+		rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+						  sdma_rht_params);
+		if (rht_node) {
+			bool empty = true;
+			int i;
+
+			/* Remove mappings for old sde */
+			for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+				if (rht_node->map[i])
+					sdma_cleanup_sde_map(rht_node->map[i],
+							     sde);
+
+			/* Free empty hash table entries */
+			for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+				if (!rht_node->map[i])
+					continue;
+
+				if (rht_node->map[i]->ctr) {
+					empty = false;
+					break;
+				}
+			}
+
+			if (empty) {
+				ret = rhashtable_remove_fast(&dd->sdma_rht,
+							     &rht_node->node,
+							     sdma_rht_params);
+				WARN_ON(ret);
+
+				for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+					kfree(rht_node->map[i]);
+
+				kfree(rht_node);
+			}
+		}
+	}
+
+	cpumask_copy(&sde->cpu_mask, new_mask);
+out:
+	mutex_unlock(&process_to_sde_mutex);
+out_free:
+	free_cpumask_var(mask);
+	free_cpumask_var(new_mask);
+	return ret ? : strnlen(buf, PAGE_SIZE);
+}
+
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+	mutex_lock(&process_to_sde_mutex);
+	if (cpumask_empty(&sde->cpu_mask))
+		snprintf(buf, PAGE_SIZE, "%s\n", "empty");
+	else
+		cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
+	mutex_unlock(&process_to_sde_mutex);
+	return strnlen(buf, PAGE_SIZE);
+}
+
+static void sdma_rht_free(void *ptr, void *arg)
+{
+	struct sdma_rht_node *rht_node = ptr;
+	int i;
+
+	for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+		kfree(rht_node->map[i]);
+
+	kfree(rht_node);
+}
+
+/**
+ * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings
+ * @s: seq file
+ * @dd: hfi1_devdata
+ * @cpuid: cpu id
+ *
+ * This routine dumps the process to sde mappings per cpu
+ */
+void sdma_seqfile_dump_cpu_list(struct seq_file *s,
+				struct hfi1_devdata *dd,
+				unsigned long cpuid)
+{
+	struct sdma_rht_node *rht_node;
+	int i, j;
+
+	rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+					  sdma_rht_params);
+	if (!rht_node)
+		return;
+
+	seq_printf(s, "cpu%3lu: ", cpuid);
+	for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+		if (!rht_node->map[i] || !rht_node->map[i]->ctr)
+			continue;
+
+		seq_printf(s, " vl%d: [", i);
+
+		for (j = 0; j < rht_node->map[i]->ctr; j++) {
+			if (!rht_node->map[i]->sde[j])
+				continue;
+
+			if (j > 0)
+				seq_puts(s, ",");
+
+			seq_printf(s, " sdma%2d",
+				   rht_node->map[i]->sde[j]->this_idx);
+		}
+		seq_puts(s, " ]");
+	}
+
+	seq_puts(s, "\n");
+}
+
 /*
  * Free the indicated map struct
  */
@@ -1161,6 +1509,10 @@
 	dd->num_sdma = num_engines;
 	if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
 		goto bail;
+
+	if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+		goto bail;
+
 	dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
 	return 0;
 
@@ -1252,6 +1604,7 @@
 		sdma_finalput(&sde->state);
 	}
 	sdma_clean(dd, dd->num_sdma);
+	rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
 }
 
 /*
@@ -2086,6 +2439,11 @@
  * @sde: sdma engine to use
  * @wait: wait structure to use when full (may be NULL)
  * @tx_list: list of sdma_txreqs to submit
+ * @count: pointer to a u32 which, after return will contain the total number of
+ *         sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ *         whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ *         which are added to SDMA engine flush list if the SDMA engine state is
+ *         not running.
  *
  * The call submits the list into the ring.
  *
@@ -2100,18 +2458,18 @@
  * side locking.
  *
  * Return:
- * > 0 - Success (value is number of sdma_txreq's submitted),
+ * 0 - Success,
  * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
  * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
  */
 int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
-		     struct list_head *tx_list)
+		     struct list_head *tx_list, u32 *count_out)
 {
 	struct sdma_txreq *tx, *tx_next;
 	int ret = 0;
 	unsigned long flags;
 	u16 tail = INVALID_TAIL;
-	int count = 0;
+	u32 submit_count = 0, flush_count = 0, total_count;
 
 	spin_lock_irqsave(&sde->tail_lock, flags);
 retry:
@@ -2127,33 +2485,34 @@
 		}
 		list_del_init(&tx->list);
 		tail = submit_tx(sde, tx);
-		count++;
+		submit_count++;
 		if (tail != INVALID_TAIL &&
-		    (count & SDMA_TAIL_UPDATE_THRESH) == 0) {
+		    (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
 			sdma_update_tail(sde, tail);
 			tail = INVALID_TAIL;
 		}
 	}
 update_tail:
+	total_count = submit_count + flush_count;
 	if (wait)
-		iowait_sdma_add(wait, count);
+		iowait_sdma_add(wait, total_count);
 	if (tail != INVALID_TAIL)
 		sdma_update_tail(sde, tail);
 	spin_unlock_irqrestore(&sde->tail_lock, flags);
-	return ret == 0 ? count : ret;
+	*count_out = total_count;
+	return ret;
 unlock_noconn:
 	spin_lock(&sde->flushlist_lock);
 	list_for_each_entry_safe(tx, tx_next, tx_list, list) {
 		tx->wait = wait;
 		list_del_init(&tx->list);
-		if (wait)
-			iowait_sdma_inc(wait);
 		tx->next_descq_idx = 0;
 #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
 		tx->sn = sde->tail_sn++;
 		trace_hfi1_sdma_in_sn(sde, tx->sn);
 #endif
 		list_add_tail(&tx->list, &sde->flushlist);
+		flush_count++;
 		if (wait) {
 			wait->tx_count++;
 			wait->count += tx->num_desc;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 8f50c99..56257ea 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -413,6 +413,8 @@
 	spinlock_t flushlist_lock;
 	/* private: */
 	struct list_head flushlist;
+	struct cpumask cpu_mask;
+	struct kobject kobj;
 };
 
 int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -847,7 +849,8 @@
 		    struct sdma_txreq *tx);
 int sdma_send_txlist(struct sdma_engine *sde,
 		     struct iowait *wait,
-		     struct list_head *tx_list);
+		     struct list_head *tx_list,
+		     u32 *count);
 
 int sdma_ahg_alloc(struct sdma_engine *sde);
 void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
@@ -1058,7 +1061,15 @@
 	u32 selector,
 	u8 vl);
 
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+					    u32 selector, u8 vl);
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+				size_t count);
+int sdma_engine_get_vl(struct sdma_engine *sde);
 void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
+void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd,
+				unsigned long cpuid);
 
 #ifdef CONFIG_SDMA_VERBOSITY
 void sdma_dumpstate(struct sdma_engine *);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 74c84c6..edba224 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -766,13 +766,95 @@
 	return ret;
 }
 
+struct sde_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct sdma_engine *sde, char *buf);
+	ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt);
+};
+
+static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	struct sde_attribute *sde_attr =
+		container_of(attr, struct sde_attribute, attr);
+	struct sdma_engine *sde =
+		container_of(kobj, struct sdma_engine, kobj);
+
+	if (!sde_attr->show)
+		return -EINVAL;
+
+	return sde_attr->show(sde, buf);
+}
+
+static ssize_t sde_store(struct kobject *kobj, struct attribute *attr,
+			 const char *buf, size_t count)
+{
+	struct sde_attribute *sde_attr =
+		container_of(attr, struct sde_attribute, attr);
+	struct sdma_engine *sde =
+		container_of(kobj, struct sdma_engine, kobj);
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!sde_attr->store)
+		return -EINVAL;
+
+	return sde_attr->store(sde, buf, count);
+}
+
+static const struct sysfs_ops sde_sysfs_ops = {
+	.show = sde_show,
+	.store = sde_store,
+};
+
+static struct kobj_type sde_ktype = {
+	.sysfs_ops = &sde_sysfs_ops,
+};
+
+#define SDE_ATTR(_name, _mode, _show, _store) \
+	struct sde_attribute sde_attr_##_name = \
+		__ATTR(_name, _mode, _show, _store)
+
+static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+	return sdma_get_cpu_to_sde_map(sde, buf);
+}
+
+static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde,
+					const char *buf, size_t count)
+{
+	return sdma_set_cpu_to_sde_map(sde, buf, count);
+}
+
+static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf)
+{
+	int vl;
+
+	vl = sdma_engine_get_vl(sde);
+	if (vl < 0)
+		return vl;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", vl);
+}
+
+static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
+		sde_show_cpu_to_sde_map,
+		sde_store_cpu_to_sde_map);
+static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL);
+
+static struct sde_attribute *sde_attribs[] = {
+	&sde_attr_cpu_list,
+	&sde_attr_vl
+};
+
 /*
  * Register and create our files in /sys/class/infiniband.
  */
 int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
 {
 	struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
-	int i, ret;
+	struct device *class_dev = &dev->dev;
+	int i, j, ret;
 
 	for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
 		ret = device_create_file(&dev->dev, hfi1_attributes[i]);
@@ -780,10 +862,29 @@
 			goto bail;
 	}
 
+	for (i = 0; i < dd->num_sdma; i++) {
+		ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
+					   &sde_ktype, &class_dev->kobj,
+					   "sdma%d", i);
+		if (ret)
+			goto bail;
+
+		for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) {
+			ret = sysfs_create_file(&dd->per_sdma[i].kobj,
+						&sde_attribs[j]->attr);
+			if (ret)
+				goto bail;
+		}
+	}
+
 	return 0;
 bail:
 	for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
 		device_remove_file(&dev->dev, hfi1_attributes[i]);
+
+	for (i = 0; i < dd->num_sdma; i++)
+		kobject_del(&dd->per_sdma[i].kobj);
+
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 4cfb137..01f525c 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -47,9 +47,9 @@
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
+u8 ibhdr_exhdr_len(struct ib_header *hdr)
 {
-	struct hfi1_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	u8 opcode;
 	u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
 
@@ -67,16 +67,11 @@
 #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
 #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
 #define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %lld"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
+#define ATOMICACKETH_PRN "origdata %llx"
+#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
 
 #define OP(transport, op) IB_OPCODE_## transport ## _ ## op
 
-static u64 ib_u64_get(__be32 *p)
-{
-	return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]);
-}
-
 static const char *parse_syndrome(u8 syndrome)
 {
 	switch (syndrome >> 5) {
@@ -113,8 +108,7 @@
 	case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 	case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 		trace_seq_printf(p, RETH_PRN " " IMM_PRN,
-				 (unsigned long long)ib_u64_get(
-				 (__be32 *)&eh->rc.reth.vaddr),
+				 get_ib_reth_vaddr(&eh->rc.reth),
 				 be32_to_cpu(eh->rc.reth.rkey),
 				 be32_to_cpu(eh->rc.reth.length),
 				 be32_to_cpu(eh->rc.imm_data));
@@ -126,8 +120,7 @@
 	case OP(RC, RDMA_WRITE_ONLY):
 	case OP(UC, RDMA_WRITE_ONLY):
 		trace_seq_printf(p, RETH_PRN,
-				 (unsigned long long)ib_u64_get(
-				 (__be32 *)&eh->rc.reth.vaddr),
+				 get_ib_reth_vaddr(&eh->rc.reth),
 				 be32_to_cpu(eh->rc.reth.rkey),
 				 be32_to_cpu(eh->rc.reth.length));
 		break;
@@ -145,20 +138,16 @@
 				 be32_to_cpu(eh->at.aeth) >> 24,
 				 parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24),
 				 be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK,
-				 (unsigned long long)
-				 ib_u64_get(eh->at.atomic_ack_eth));
+				 ib_u64_get(&eh->at.atomic_ack_eth));
 		break;
 	/* atomiceth */
 	case OP(RC, COMPARE_SWAP):
 	case OP(RC, FETCH_ADD):
 		trace_seq_printf(p, ATOMICETH_PRN,
-				 (unsigned long long)ib_u64_get(
-				 eh->atomic_eth.vaddr),
+				 get_ib_ateth_vaddr(&eh->atomic_eth),
 				 eh->atomic_eth.rkey,
-				 (unsigned long long)ib_u64_get(
-				 (__be32 *)&eh->atomic_eth.swap_data),
-				 (unsigned long long)ib_u64_get(
-				 (__be32 *)&eh->atomic_eth.compare_data));
+				 get_ib_ateth_swap(&eh->atomic_eth),
+				 get_ib_ateth_compare(&eh->atomic_eth));
 		break;
 	/* deth */
 	case OP(UD, SEND_ONLY):
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 31654bb..26ae789 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -67,9 +67,9 @@
 			     __field(u64, hw_free)
 			     __field(void __iomem *, piobase)
 			     __field(u16, rcvhdrq_cnt)
-			     __field(u64, rcvhdrq_phys)
+			     __field(u64, rcvhdrq_dma)
 			     __field(u32, eager_cnt)
-			     __field(u64, rcvegr_phys)
+			     __field(u64, rcvegr_dma)
 			     ),
 	    TP_fast_assign(DD_DEV_ASSIGN(dd);
 			   __entry->ctxt = uctxt->ctxt;
@@ -77,10 +77,9 @@
 			   __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
 			   __entry->piobase = uctxt->sc->base_addr;
 			   __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
-			   __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+			   __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
 			   __entry->eager_cnt = uctxt->egrbufs.alloced;
-			   __entry->rcvegr_phys =
-			   uctxt->egrbufs.rcvtids[0].phys;
+			   __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
 			   ),
 	    TP_printk("[%s] ctxt %u " UCTXT_FMT,
 		      __get_str(dev),
@@ -89,9 +88,9 @@
 		      __entry->hw_free,
 		      __entry->piobase,
 		      __entry->rcvhdrq_cnt,
-		      __entry->rcvhdrq_phys,
+		      __entry->rcvhdrq_dma,
 		      __entry->eager_cnt,
-		      __entry->rcvegr_phys
+		      __entry->rcvegr_dma
 		      )
 );
 
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index c3e41ae..382fcda 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -55,7 +55,7 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hfi1_ibhdrs
 
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
+u8 ibhdr_exhdr_len(struct ib_header *hdr);
 const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
 
 #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
@@ -74,7 +74,7 @@
 
 DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
 		    TP_PROTO(struct hfi1_devdata *dd,
-			     struct hfi1_ib_header *hdr),
+			     struct ib_header *hdr),
 		    TP_ARGS(dd, hdr),
 		    TP_STRUCT__entry(
 			DD_DEV_ENTRY(dd)
@@ -102,7 +102,7 @@
 			__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
 			),
 		      TP_fast_assign(
-			struct hfi1_other_headers *ohdr;
+			struct ib_other_headers *ohdr;
 
 			DD_DEV_ASSIGN(dd);
 			/* LRH */
@@ -185,19 +185,19 @@
 );
 
 DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
-	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
 DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
-	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
 DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
-	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
 DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
-	     TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+	     TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
 	     TP_ARGS(dd, hdr));
 
 #endif /* __HFI1_TRACE_IBHDRS_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 9ba1f61..11e02b2 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -260,7 +260,7 @@
 TRACE_EVENT(snoop_capture,
 	    TP_PROTO(struct hfi1_devdata *dd,
 		     int hdr_len,
-		     struct hfi1_ib_header *hdr,
+		     struct ib_header *hdr,
 		     int data_len,
 		     void *data),
 	    TP_ARGS(dd, hdr_len, hdr, data_len, data),
@@ -279,7 +279,7 @@
 			     __dynamic_array(u8, raw_pkt, data_len)
 			     ),
 	    TP_fast_assign(
-		struct hfi1_other_headers *ohdr;
+		struct ib_other_headers *ohdr;
 
 		__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
 		if (__entry->lnh == HFI1_LRH_BTH)
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index a726d96..5e6d1ba 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -50,14 +50,7 @@
 #include "qp.h"
 
 /* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/* only opcode mask for adaptive pio */
-const u32 uc_only_opcode =
-	BIT(OP(SEND_ONLY) & 0x1f) |
-	BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
-	BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
-	BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+#define OP(x) UC_OP(x)
 
 /**
  * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
@@ -70,7 +63,7 @@
 int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
-	struct hfi1_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct rvt_swqe *wqe;
 	u32 hwords = 5;
 	u32 bth0 = 0;
@@ -304,12 +297,12 @@
 void hfi1_uc_rcv(struct hfi1_packet *packet)
 {
 	struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
-	struct hfi1_ib_header *hdr = packet->hdr;
+	struct ib_header *hdr = packet->hdr;
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
 	struct rvt_qp *qp = packet->qp;
-	struct hfi1_other_headers *ohdr = packet->ohdr;
+	struct ib_other_headers *ohdr = packet->ohdr;
 	u32 bth0, opcode;
 	u32 hdrsize = packet->hlen;
 	u32 psn;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index f01e8e1..97ae24b 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -271,7 +271,7 @@
 int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
-	struct hfi1_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct hfi1_pportdata *ppd;
 	struct hfi1_ibport *ibp;
@@ -510,8 +510,8 @@
 	u32 bth0, plen, vl, hwords = 5;
 	u16 lrh0;
 	u8 sl = ibp->sc_to_sl[sc5];
-	struct hfi1_ib_header hdr;
-	struct hfi1_other_headers *ohdr;
+	struct ib_header hdr;
+	struct ib_other_headers *ohdr;
 	struct pio_buf *pbuf;
 	struct send_context *ctxt = qp_to_send_context(qp, sc5);
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -559,8 +559,8 @@
 
 /*
  * opa_smp_check() - Do the regular pkey checking, and the additional
- * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26
- * ("SMA Packet Checks").
+ * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section
+ * 9.10.25 ("SMA Packet Checks").
  *
  * Note that:
  *   - Checks are done using the pkey directly from the packet's BTH,
@@ -603,23 +603,28 @@
 
 	/*
 	 * SMPs fall into one of four (disjoint) categories:
-	 * SMA request, SMA response, trap, or trap repress.
-	 * Our response depends, in part, on which type of
-	 * SMP we're processing.
+	 * SMA request, SMA response, SMA trap, or SMA trap repress.
+	 * Our response depends, in part, on which type of SMP we're
+	 * processing.
 	 *
-	 * If this is not an SMA request, or trap repress:
-	 *   - accept MAD if the port is running an SM
-	 *   - pkey == FULL_MGMT_P_KEY =>
-	 *       reply with unsupported method (i.e., just mark
-	 *       the smp's status field here, and let it be
-	 *       processed normally)
-	 *   - pkey != LIM_MGMT_P_KEY =>
-	 *       increment port recv constraint errors, drop MAD
-	 * If this is an SMA request or trap repress:
+	 * If this is an SMA response, skip the check here.
+	 *
+	 * If this is an SMA request or SMA trap repress:
 	 *   - pkey != FULL_MGMT_P_KEY =>
 	 *       increment port recv constraint errors, drop MAD
+	 *
+	 * Otherwise:
+	 *    - accept if the port is running an SM
+	 *    - drop MAD if it's an SMA trap
+	 *    - pkey == FULL_MGMT_P_KEY =>
+	 *        reply with unsupported method
+	 *    - pkey != FULL_MGMT_P_KEY =>
+	 *	  increment port recv constraint errors, drop MAD
 	 */
 	switch (smp->method) {
+	case IB_MGMT_METHOD_GET_RESP:
+	case IB_MGMT_METHOD_REPORT_RESP:
+		break;
 	case IB_MGMT_METHOD_GET:
 	case IB_MGMT_METHOD_SET:
 	case IB_MGMT_METHOD_REPORT:
@@ -629,23 +634,17 @@
 			return 1;
 		}
 		break;
-	case IB_MGMT_METHOD_SEND:
-	case IB_MGMT_METHOD_TRAP:
-	case IB_MGMT_METHOD_GET_RESP:
-	case IB_MGMT_METHOD_REPORT_RESP:
+	default:
 		if (ibp->rvp.port_cap_flags & IB_PORT_SM)
 			return 0;
+		if (smp->method == IB_MGMT_METHOD_TRAP)
+			return 1;
 		if (pkey == FULL_MGMT_P_KEY) {
 			smp->status |= IB_SMP_UNSUP_METHOD;
 			return 0;
 		}
-		if (pkey != LIM_MGMT_P_KEY) {
-			ingress_pkey_table_fail(ppd, pkey, slid);
-			return 1;
-		}
-		break;
-	default:
-		break;
+		ingress_pkey_table_fail(ppd, pkey, slid);
+		return 1;
 	}
 	return 0;
 }
@@ -665,7 +664,7 @@
  */
 void hfi1_ud_rcv(struct hfi1_packet *packet)
 {
-	struct hfi1_other_headers *ohdr = packet->ohdr;
+	struct ib_other_headers *ohdr = packet->ohdr;
 	int opcode;
 	u32 hdrsize = packet->hlen;
 	struct ib_wc wc;
@@ -675,13 +674,13 @@
 	int mgmt_pkey_idx = -1;
 	struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-	struct hfi1_ib_header *hdr = packet->hdr;
+	struct ib_header *hdr = packet->hdr;
 	u32 rcv_flags = packet->rcv_flags;
 	void *data = packet->ebuf;
 	u32 tlen = packet->tlen;
 	struct rvt_qp *qp = packet->qp;
 	bool has_grh = rcv_flags & HFI1_HAS_GRH;
-	u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+	u8 sc5 = hdr2sc(hdr, packet->rhf);
 	u32 bth1;
 	u8 sl_from_sc, sl;
 	u16 slid;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 0ecf279..a761f80 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -114,6 +114,8 @@
 #define KDETH_HCRC_LOWER_SHIFT    24
 #define KDETH_HCRC_LOWER_MASK     0xff
 
+#define AHG_KDETH_INTR_SHIFT 12
+
 #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
 #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
 
@@ -546,7 +548,7 @@
 	u8 opcode, sc, vl;
 	int req_queued = 0;
 	u16 dlid;
-	u8 selector;
+	u32 selector;
 
 	if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
 		hfi1_cdbg(
@@ -751,12 +753,9 @@
 
 	dlid = be16_to_cpu(req->hdr.lrh[1]);
 	selector = dlid_to_selector(dlid);
+	selector += uctxt->ctxt + fd->subctxt;
+	req->sde = sdma_select_user_engine(dd, selector, vl);
 
-	/* Have to select the engine */
-	req->sde = sdma_select_engine_vl(dd,
-					 (u32)(uctxt->ctxt + fd->subctxt +
-					       selector),
-					 vl);
 	if (!req->sde || !sdma_running(req->sde)) {
 		ret = -ECOMM;
 		goto free_req;
@@ -892,7 +891,7 @@
 
 static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
 {
-	int ret = 0;
+	int ret = 0, count;
 	unsigned npkts = 0;
 	struct user_sdma_txreq *tx = NULL;
 	struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -1088,23 +1087,18 @@
 		npkts++;
 	}
 dosend:
-	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
-	if (list_empty(&req->txps)) {
-		req->seqsubmitted = req->seqnum;
-		if (req->seqnum == req->info.npkts) {
-			set_bit(SDMA_REQ_SEND_DONE, &req->flags);
-			/*
-			 * The txreq has already been submitted to the HW queue
-			 * so we can free the AHG entry now. Corruption will not
-			 * happen due to the sequential manner in which
-			 * descriptors are processed.
-			 */
-			if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
-				sdma_ahg_free(req->sde, req->ahg_idx);
-		}
-	} else if (ret > 0) {
-		req->seqsubmitted += ret;
-		ret = 0;
+	ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+	req->seqsubmitted += count;
+	if (req->seqsubmitted == req->info.npkts) {
+		set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+		/*
+		 * The txreq has already been submitted to the HW queue
+		 * so we can free the AHG entry now. Corruption will not
+		 * happen due to the sequential manner in which
+		 * descriptors are processed.
+		 */
+		if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+			sdma_ahg_free(req->sde, req->ahg_idx);
 	}
 	return ret;
 
@@ -1480,7 +1474,8 @@
 		/* Clear KDETH.SH on last packet */
 		if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
 			val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
-								INTR) >> 16);
+						     INTR) <<
+					   AHG_KDETH_INTR_SHIFT);
 			val &= cpu_to_le16(~(1U << 13));
 			AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
 		} else {
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2b35954..f2f6b5a 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -76,7 +76,7 @@
 module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
 MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
 
-unsigned int hfi1_max_cqes = 0x2FFFF;
+unsigned int hfi1_max_cqes = 0x2FFFFF;
 module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
 MODULE_PARM_DESC(max_cqes,
 		 "Maximum number of completion queue entries to support");
@@ -89,7 +89,7 @@
 module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
 
-unsigned int hfi1_max_qps = 16384;
+unsigned int hfi1_max_qps = 32768;
 module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
 MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
 
@@ -335,7 +335,7 @@
 	[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST]        = 12 + 8 + 4,
 	[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY]        = 12 + 8 + 4,
 	[IB_OPCODE_RC_ACKNOWLEDGE]                    = 12 + 8 + 4,
-	[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE]             = 12 + 8 + 4,
+	[IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE]             = 12 + 8 + 4 + 8,
 	[IB_OPCODE_RC_COMPARE_SWAP]                   = 12 + 8 + 28,
 	[IB_OPCODE_RC_FETCH_ADD]                      = 12 + 8 + 28,
 	[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE]      = 12 + 8 + 4,
@@ -403,6 +403,28 @@
 	[IB_OPCODE_CNP]				      = &hfi1_cnp_rcv
 };
 
+#define OPMASK 0x1f
+
+static const u32 pio_opmask[BIT(3)] = {
+	/* RC */
+	[IB_OPCODE_RC >> 5] =
+		BIT(RC_OP(SEND_ONLY) & OPMASK) |
+		BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+		BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+		BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) |
+		BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) |
+		BIT(RC_OP(ACKNOWLEDGE) & OPMASK) |
+		BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) |
+		BIT(RC_OP(COMPARE_SWAP) & OPMASK) |
+		BIT(RC_OP(FETCH_ADD) & OPMASK),
+	/* UC */
+	[IB_OPCODE_UC >> 5] =
+		BIT(UC_OP(SEND_ONLY) & OPMASK) |
+		BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+		BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+		BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK),
+};
+
 /*
  * System image GUID.
  */
@@ -567,7 +589,7 @@
 void hfi1_ib_rcv(struct hfi1_packet *packet)
 {
 	struct hfi1_ctxtdata *rcd = packet->rcd;
-	struct hfi1_ib_header *hdr = packet->hdr;
+	struct ib_header *hdr = packet->hdr;
 	u32 tlen = packet->tlen;
 	struct hfi1_pportdata *ppd = rcd->ppd;
 	struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -719,7 +741,7 @@
 	if (tx->wqe) {
 		hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
-		struct hfi1_ib_header *hdr;
+		struct ib_header *hdr;
 
 		hdr = &tx->phdr.hdr;
 		hfi1_rc_send_complete(qp, hdr);
@@ -748,7 +770,7 @@
 			qp->s_flags |= RVT_S_WAIT_KMEM;
 			list_add_tail(&priv->s_iowait.list, &dev->memwait);
 			trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
-			atomic_inc(&qp->refcount);
+			rvt_get_qp(qp);
 		}
 		write_sequnlock(&dev->iowait_lock);
 		qp->s_flags &= ~RVT_S_BUSY;
@@ -959,7 +981,7 @@
 			was_empty = list_empty(&sc->piowait);
 			list_add_tail(&priv->s_iowait.list, &sc->piowait);
 			trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
-			atomic_inc(&qp->refcount);
+			rvt_get_qp(qp);
 			/* counting: only call wantpiobuf_intr if first user */
 			if (was_empty)
 				hfi1_sc_wantpiobuf_intr(sc, 1);
@@ -1200,7 +1222,7 @@
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct hfi1_qp_priv *priv = qp->priv;
-	struct hfi1_ib_header *h = &tx->phdr.hdr;
+	struct ib_header *h = &tx->phdr.hdr;
 
 	if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
 		return dd->process_pio_send;
@@ -1210,22 +1232,18 @@
 	case IB_QPT_GSI:
 	case IB_QPT_UD:
 		break;
-	case IB_QPT_RC:
-		if (piothreshold &&
-		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
-		    (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
-		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
-		    !sdma_txreq_built(&tx->txreq))
-			return dd->process_pio_send;
-		break;
 	case IB_QPT_UC:
+	case IB_QPT_RC: {
+		u8 op = get_opcode(h);
+
 		if (piothreshold &&
 		    qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
-		    (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+		    (BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
 		    iowait_sdma_pending(&priv->s_iowait) == 0 &&
 		    !sdma_txreq_built(&tx->txreq))
 			return dd->process_pio_send;
 		break;
+	}
 	default:
 		break;
 	}
@@ -1244,8 +1262,8 @@
 {
 	struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct hfi1_qp_priv *priv = qp->priv;
-	struct hfi1_other_headers *ohdr;
-	struct hfi1_ib_header *hdr;
+	struct ib_other_headers *ohdr;
+	struct ib_header *hdr;
 	send_routine sr;
 	int ret;
 	u8 lnh;
@@ -1754,7 +1772,7 @@
 {
 	struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
 	struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-	struct hfi1_ib_header *hdr = packet->hdr;
+	struct ib_header *hdr = packet->hdr;
 	struct rvt_qp *qp = packet->qp;
 	u32 lqpn, rqpn = 0;
 	u16 rlid = 0;
@@ -1781,7 +1799,7 @@
 		return;
 	}
 
-	sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+	sc5 = hdr2sc(hdr, packet->rhf);
 	sl = ibp->sc_to_sl[sc5];
 	lqpn = qp->ibqp.qp_num;
 
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index d1b101c..1c3815d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -60,6 +60,7 @@
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_mad.h>
+#include <rdma/ib_hdrs.h>
 #include <rdma/rdma_vt.h>
 #include <rdma/rdmavt_qp.h>
 #include <rdma/rdmavt_cq.h>
@@ -80,16 +81,6 @@
  */
 #define HFI1_UVERBS_ABI_VERSION       2
 
-#define IB_SEQ_NAK	(3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK                      0x20
-#define IB_NAK_PSN_ERROR                0x60
-#define IB_NAK_INVALID_REQUEST          0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR      0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST       0x64
-
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
@@ -104,80 +95,16 @@
 
 #define HFI1_VENDOR_IPG		cpu_to_be16(0xFFA0)
 
-#define IB_BTH_REQ_ACK		BIT(31)
-#define IB_BTH_SOLICITED	BIT(23)
-#define IB_BTH_MIG_REQ		BIT(22)
-
-#define IB_GRH_VERSION		6
-#define IB_GRH_VERSION_MASK	0xF
-#define IB_GRH_VERSION_SHIFT	28
-#define IB_GRH_TCLASS_MASK	0xFF
-#define IB_GRH_TCLASS_SHIFT	20
-#define IB_GRH_FLOW_MASK	0xFFFFF
-#define IB_GRH_FLOW_SHIFT	0
-#define IB_GRH_NEXT_HDR		0x1B
-
 #define IB_DEFAULT_GID_PREFIX	cpu_to_be64(0xfe80000000000000ULL)
 
+#define RC_OP(x) IB_OPCODE_RC_##x
+#define UC_OP(x) IB_OPCODE_UC_##x
+
 /* flags passed by hfi1_ib_rcv() */
 enum {
 	HFI1_HAS_GRH = (1 << 0),
 };
 
-struct ib_reth {
-	__be64 vaddr;
-	__be32 rkey;
-	__be32 length;
-} __packed;
-
-struct ib_atomic_eth {
-	__be32 vaddr[2];        /* unaligned so access as 2 32-bit words */
-	__be32 rkey;
-	__be64 swap_data;
-	__be64 compare_data;
-} __packed;
-
-union ib_ehdrs {
-	struct {
-		__be32 deth[2];
-		__be32 imm_data;
-	} ud;
-	struct {
-		struct ib_reth reth;
-		__be32 imm_data;
-	} rc;
-	struct {
-		__be32 aeth;
-		__be32 atomic_ack_eth[2];
-	} at;
-	__be32 imm_data;
-	__be32 aeth;
-	__be32 ieth;
-	struct ib_atomic_eth atomic_eth;
-}  __packed;
-
-struct hfi1_other_headers {
-	__be32 bth[3];
-	union ib_ehdrs u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data).  Only the first 56 bytes of the IB header
- * will be in the eager header buffer.  The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct hfi1_ib_header {
-	__be16 lrh[4];
-	union {
-		struct {
-			struct ib_grh grh;
-			struct hfi1_other_headers oth;
-		} l;
-		struct hfi1_other_headers oth;
-	} u;
-} __packed;
-
 struct hfi1_ahg_info {
 	u32 ahgdesc[2];
 	u16 tx_flags;
@@ -187,7 +114,7 @@
 
 struct hfi1_sdma_header {
 	__le64 pbc;
-	struct hfi1_ib_header hdr;
+	struct ib_header hdr;
 } __packed;
 
 /*
@@ -386,7 +313,7 @@
 
 void hfi1_rc_hdrerr(
 	struct hfi1_ctxtdata *rcd,
-	struct hfi1_ib_header *hdr,
+	struct ib_header *hdr,
 	u32 rcv_flags,
 	struct rvt_qp *qp);
 
@@ -400,7 +327,7 @@
 void hfi1_del_timers_sync(struct rvt_qp *qp);
 void hfi1_stop_rc_timers(struct rvt_qp *qp);
 
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
 
 void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 
@@ -423,7 +350,7 @@
 extern const u32 rc_only_opcode;
 extern const u32 uc_only_opcode;
 
-static inline u8 get_opcode(struct hfi1_ib_header *h)
+static inline u8 get_opcode(struct ib_header *h)
 {
 	u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
 
@@ -433,13 +360,13 @@
 		return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
 }
 
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
 		       int has_grh, struct rvt_qp *qp, u32 bth0);
 
 u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
 		  struct ib_global_route *grh, u32 hwords, u32 nwords);
 
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
 			  u32 bth0, u32 bth2, int middle,
 			  struct hfi1_pkt_state *ps);
 
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index d8fb056..094ab82 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -109,7 +109,7 @@
 			qp->s_flags |= RVT_S_WAIT_TX;
 			list_add_tail(&priv->s_iowait.list, &dev->txwait);
 			trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
-			atomic_inc(&qp->refcount);
+			rvt_get_qp(qp);
 		}
 		qp->s_flags &= ~RVT_S_BUSY;
 	}
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index b738acd..8ec09e4 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -232,7 +232,7 @@
 	struct i40e_client *client;
 	struct i40iw_hw hw;
 	struct i40iw_cm_core cm_core;
-	unsigned long *mem_resources;
+	u8 *mem_resources;
 	unsigned long *allocated_qps;
 	unsigned long *allocated_cqs;
 	unsigned long *allocated_mrs;
@@ -435,8 +435,8 @@
 	*next = resource_num + 1;
 	if (*next == max_resources)
 		*next = 0;
-	spin_unlock_irqrestore(&iwdev->resource_lock, flags);
 	*req_resource_num = resource_num;
+	spin_unlock_irqrestore(&iwdev->resource_lock, flags);
 
 	return 0;
 }
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 5026dc7..7ca0638 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -535,8 +535,8 @@
 		buf += hdr_len;
 	}
 
-	if (pd_len)
-		memcpy(buf, pdata->addr, pd_len);
+	if (pdata && pdata->addr)
+		memcpy(buf, pdata->addr, pdata->size);
 
 	atomic_set(&sqbuf->refcount, 1);
 
@@ -3347,26 +3347,6 @@
 }
 
 /**
- * i40iw_loopback_nop - Send a nop
- * @qp: associated hw qp
- */
-static void i40iw_loopback_nop(struct i40iw_sc_qp *qp)
-{
-	u64 *wqe;
-	u64 header;
-
-	wqe = qp->qp_uk.sq_base->elem;
-	set_64bit_val(wqe, 0, 0);
-	set_64bit_val(wqe, 8, 0);
-	set_64bit_val(wqe, 16, 0);
-
-	header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) |
-	    LS_64(0, I40IWQPSQ_SIGCOMPL) |
-	    LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID);
-	set_64bit_val(wqe, 24, header);
-}
-
-/**
  * i40iw_qp_disconnect - free qp and close cm
  * @iwqp: associate qp for the connection
  */
@@ -3638,7 +3618,7 @@
 	} else {
 		if (iwqp->page)
 			iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
-		i40iw_loopback_nop(&iwqp->sc_qp);
+		dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0);
 	}
 
 	if (iwqp->page)
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 3ee0cad..0c92a40 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -265,6 +265,7 @@
 		info.dont_send_fin = false;
 	if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR))
 		info.reset_tcp_conn = true;
+	iwqp->hw_iwarp_state = state;
 	i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0);
 }
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 6e90813..445e230 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -100,7 +100,7 @@
 	.notifier_call = i40iw_net_event
 };
 
-static int i40iw_notifiers_registered;
+static atomic_t i40iw_notifiers_registered;
 
 /**
  * i40iw_find_i40e_handler - find a handler given a client info
@@ -1342,12 +1342,11 @@
  */
 static void i40iw_register_notifiers(void)
 {
-	if (!i40iw_notifiers_registered) {
+	if (atomic_inc_return(&i40iw_notifiers_registered) == 1) {
 		register_inetaddr_notifier(&i40iw_inetaddr_notifier);
 		register_inet6addr_notifier(&i40iw_inetaddr6_notifier);
 		register_netevent_notifier(&i40iw_net_notifier);
 	}
-	i40iw_notifiers_registered++;
 }
 
 /**
@@ -1429,8 +1428,7 @@
 			i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx);
 		/* fallthrough */
 	case INET_NOTIFIER:
-		if (i40iw_notifiers_registered > 0) {
-			i40iw_notifiers_registered--;
+		if (!atomic_dec_return(&i40iw_notifiers_registered)) {
 			unregister_netevent_notifier(&i40iw_net_notifier);
 			unregister_inetaddr_notifier(&i40iw_inetaddr_notifier);
 			unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier);
@@ -1558,6 +1556,10 @@
 	enum i40iw_status_code status;
 	struct i40iw_handler *hdl;
 
+	hdl = i40iw_find_netdev(ldev->netdev);
+	if (hdl)
+		return 0;
+
 	hdl = kzalloc(sizeof(*hdl), GFP_KERNEL);
 	if (!hdl)
 		return -ENOMEM;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 0e8db0a..6fd043b 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -673,8 +673,11 @@
 {
 	if (!mem)
 		return I40IW_ERR_PARAM;
+	/*
+	 * mem->va points to the parent of mem, so both mem and mem->va
+	 * can not be touched once mem->va is freed
+	 */
 	kfree(mem->va);
-	mem->va = NULL;
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 2360338..6329c97 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -794,7 +794,6 @@
 	return &iwqp->ibqp;
 error:
 	i40iw_free_qp_resources(iwdev, iwqp, qp_num);
-	kfree(mem);
 	return ERR_PTR(err_code);
 }
 
@@ -1926,8 +1925,7 @@
 		}
 		if (iwpbl->pbl_allocated)
 			i40iw_free_pble(iwdev->pble_rsrc, palloc);
-		kfree(iwpbl->iwmr);
-		iwpbl->iwmr = NULL;
+		kfree(iwmr);
 		return 0;
 	}
 
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index d6fc8a6..5df63da 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -576,8 +576,8 @@
 		checksum == cpu_to_be16(0xffff);
 }
 
-static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
-			   unsigned tail, struct mlx4_cqe *cqe, int is_eth)
+static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
+			    unsigned tail, struct mlx4_cqe *cqe, int is_eth)
 {
 	struct mlx4_ib_proxy_sqp_hdr *hdr;
 
@@ -600,8 +600,6 @@
 		wc->slid        = be16_to_cpu(hdr->tun.slid_mac_47_32);
 		wc->sl          = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
 	}
-
-	return 0;
 }
 
 static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries,
@@ -689,12 +687,6 @@
 	is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
 		MLX4_CQE_OPCODE_ERROR;
 
-	if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
-		     is_send)) {
-		pr_warn("Completion for NOP opcode detected!\n");
-		return -EINVAL;
-	}
-
 	/* Resize CQ in progress */
 	if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) {
 		if (cq->resize_buf) {
@@ -720,12 +712,6 @@
 		 */
 		mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
 				       be32_to_cpu(cqe->vlan_my_qpn));
-		if (unlikely(!mqp)) {
-			pr_warn("CQ %06x with entry for unknown QPN %06x\n",
-			       cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
-			return -EINVAL;
-		}
-
 		*cur_qp = to_mibqp(mqp);
 	}
 
@@ -738,11 +724,6 @@
 		/* SRQ is also in the radix tree */
 		msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev,
 				       srq_num);
-		if (unlikely(!msrq)) {
-			pr_warn("CQ %06x with entry for unknown SRQN %06x\n",
-				cq->mcq.cqn, srq_num);
-			return -EINVAL;
-		}
 	}
 
 	if (is_send) {
@@ -852,9 +833,11 @@
 		if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
 			if ((*cur_qp)->mlx4_ib_qp_type &
 			    (MLX4_IB_QPT_PROXY_SMI_OWNER |
-			     MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
-				return use_tunnel_data(*cur_qp, cq, wc, tail,
-						       cqe, is_eth);
+			     MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
+				use_tunnel_data(*cur_qp, cq, wc, tail, cqe,
+						is_eth);
+				return 0;
+			}
 		}
 
 		wc->slid	   = be16_to_cpu(cqe->rlid);
@@ -891,7 +874,6 @@
 	struct mlx4_ib_qp *cur_qp = NULL;
 	unsigned long flags;
 	int npolled;
-	int err = 0;
 	struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
 
 	spin_lock_irqsave(&cq->lock, flags);
@@ -901,8 +883,7 @@
 	}
 
 	for (npolled = 0; npolled < num_entries; ++npolled) {
-		err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
-		if (err)
+		if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled))
 			break;
 	}
 
@@ -911,10 +892,7 @@
 out:
 	spin_unlock_irqrestore(&cq->lock, flags);
 
-	if (err == 0 || err == -EAGAIN)
-		return npolled;
-	else
-		return err;
+	return npolled;
 }
 
 int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 9c2e53d..0f21c3a 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1128,6 +1128,27 @@
 
 		/* Generate GUID changed event */
 		if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
+			if (mlx4_is_master(dev->dev)) {
+				union ib_gid gid;
+				int err = 0;
+
+				if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
+					err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
+				else
+					gid.global.subnet_prefix =
+						eqe->event.port_mgmt_change.params.port_info.gid_prefix;
+				if (err) {
+					pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
+						port, err);
+				} else {
+					pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
+						 port,
+						 (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
+						 be64_to_cpu(gid.global.subnet_prefix));
+					atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
+						     be64_to_cpu(gid.global.subnet_prefix));
+				}
+			}
 			mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
 			/*if master, notify all slaves*/
 			if (mlx4_is_master(dev->dev))
@@ -2202,6 +2223,8 @@
 		if (err)
 			goto demux_err;
 		dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
+		atomic64_set(&dev->sriov.demux[i].subnet_prefix,
+			     be64_to_cpu(gid.global.subnet_prefix));
 		err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
 				      &dev->sriov.sqps[i]);
 		if (err)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 2af44c2..87ba9bc 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2202,6 +2202,9 @@
 	bool per_port = !!(ibdev->dev->caps.flags2 &
 		MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
 
+	if (mlx4_is_slave(ibdev->dev))
+		return 0;
+
 	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
 		/* i == 1 means we are building port counters */
 		if (i && !per_port)
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 8f7ad07..097bfcc 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -489,7 +489,7 @@
 		if (!group->members[i])
 			leave_state |= (1 << i);
 
-	return leave_state & (group->rec.scope_join_state & 7);
+	return leave_state & (group->rec.scope_join_state & 0xf);
 }
 
 static int join_group(struct mcast_group *group, int slave, u8 join_mask)
@@ -564,8 +564,8 @@
 		} else
 			mcg_warn_group(group, "DRIVER BUG\n");
 	} else if (group->state == MCAST_LEAVE_SENT) {
-		if (group->rec.scope_join_state & 7)
-			group->rec.scope_join_state &= 0xf8;
+		if (group->rec.scope_join_state & 0xf)
+			group->rec.scope_join_state &= 0xf0;
 		group->state = MCAST_IDLE;
 		mutex_unlock(&group->lock);
 		if (release_group(group, 1))
@@ -605,7 +605,7 @@
 static int handle_join_req(struct mcast_group *group, u8 join_mask,
 			   struct mcast_req *req)
 {
-	u8 group_join_state = group->rec.scope_join_state & 7;
+	u8 group_join_state = group->rec.scope_join_state & 0xf;
 	int ref = 0;
 	u16 status;
 	struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
@@ -690,8 +690,8 @@
 			u8 cur_join_state;
 
 			resp_join_state = ((struct ib_sa_mcmember_data *)
-						group->response_sa_mad.data)->scope_join_state & 7;
-			cur_join_state = group->rec.scope_join_state & 7;
+						group->response_sa_mad.data)->scope_join_state & 0xf;
+			cur_join_state = group->rec.scope_join_state & 0xf;
 
 			if (method == IB_MGMT_METHOD_GET_RESP) {
 				/* successfull join */
@@ -710,7 +710,7 @@
 		req = list_first_entry(&group->pending_list, struct mcast_req,
 				       group_list);
 		sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
-		req_join_state = sa_data->scope_join_state & 0x7;
+		req_join_state = sa_data->scope_join_state & 0xf;
 
 		/* For a leave request, we will immediately answer the VF, and
 		 * update our internal counters. The actual leave will be sent
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7c5832e..686ab48 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -448,7 +448,7 @@
 	struct workqueue_struct *wq;
 	struct workqueue_struct *ud_wq;
 	spinlock_t ud_lock;
-	__be64 subnet_prefix;
+	atomic64_t subnet_prefix;
 	__be64 guid_cache[128];
 	struct mlx4_ib_dev *dev;
 	/* the following lock protects both mcg_table and mcg_mgid0_list */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 768085f..7fb9629 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2493,24 +2493,27 @@
 		sqp->ud_header.grh.flow_label    =
 			ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
 		sqp->ud_header.grh.hop_limit     = ah->av.ib.hop_limit;
-		if (is_eth)
+		if (is_eth) {
 			memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16);
-		else {
-		if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
-			/* When multi-function is enabled, the ib_core gid
-			 * indexes don't necessarily match the hw ones, so
-			 * we must use our own cache */
-			sqp->ud_header.grh.source_gid.global.subnet_prefix =
-				to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
-						       subnet_prefix;
-			sqp->ud_header.grh.source_gid.global.interface_id =
-				to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
-					       guid_cache[ah->av.ib.gid_index];
-		} else
-			ib_get_cached_gid(ib_dev,
-					  be32_to_cpu(ah->av.ib.port_pd) >> 24,
-					  ah->av.ib.gid_index,
-					  &sqp->ud_header.grh.source_gid, NULL);
+		} else {
+			if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
+				/* When multi-function is enabled, the ib_core gid
+				 * indexes don't necessarily match the hw ones, so
+				 * we must use our own cache
+				 */
+				sqp->ud_header.grh.source_gid.global.subnet_prefix =
+					cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov.
+								    demux[sqp->qp.port - 1].
+								    subnet_prefix)));
+				sqp->ud_header.grh.source_gid.global.interface_id =
+					to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
+						       guid_cache[ah->av.ib.gid_index];
+			} else {
+				ib_get_cached_gid(ib_dev,
+						  be32_to_cpu(ah->av.ib.port_pd) >> 24,
+						  ah->av.ib.gid_index,
+						  &sqp->ud_header.grh.source_gid, NULL);
+			}
 		}
 		memcpy(sqp->ud_header.grh.destination_gid.raw,
 		       ah->av.ib.dgid, 16);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 308a358..5de9a65 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -553,12 +553,6 @@
 		 * from the table.
 		 */
 		mqp = __mlx5_qp_lookup(dev->mdev, qpn);
-		if (unlikely(!mqp)) {
-			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n",
-				     cq->mcq.cqn, qpn);
-			return -EINVAL;
-		}
-
 		*cur_qp = to_mibqp(mqp);
 	}
 
@@ -619,13 +613,6 @@
 		read_lock(&dev->mdev->priv.mkey_table.lock);
 		mmkey = __mlx5_mr_lookup(dev->mdev,
 					 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
-		if (unlikely(!mmkey)) {
-			read_unlock(&dev->mdev->priv.mkey_table.lock);
-			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
-				     cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
-			return -EINVAL;
-		}
-
 		mr = to_mibmr(mmkey);
 		get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
 		mr->sig->sig_err_exists = true;
@@ -676,7 +663,6 @@
 	unsigned long flags;
 	int soft_polled = 0;
 	int npolled;
-	int err = 0;
 
 	spin_lock_irqsave(&cq->lock, flags);
 	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
@@ -688,8 +674,7 @@
 		soft_polled = poll_soft_wc(cq, num_entries, wc);
 
 	for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
-		err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
-		if (err)
+		if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
 			break;
 	}
 
@@ -698,10 +683,7 @@
 out:
 	spin_unlock_irqrestore(&cq->lock, flags);
 
-	if (err == 0 || err == -EAGAIN)
-		return soft_polled + npolled;
-	else
-		return err;
+	return soft_polled + npolled;
 }
 
 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
@@ -747,14 +729,16 @@
 
 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 			  struct ib_ucontext *context, struct mlx5_ib_cq *cq,
-			  int entries, struct mlx5_create_cq_mbox_in **cqb,
+			  int entries, u32 **cqb,
 			  int *cqe_size, int *index, int *inlen)
 {
 	struct mlx5_ib_create_cq ucmd;
 	size_t ucmdlen;
 	int page_shift;
+	__be64 *pas;
 	int npages;
 	int ncont;
+	void *cqc;
 	int err;
 
 	ucmdlen =
@@ -792,14 +776,20 @@
 	mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
 		    ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
 
-	*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont;
+	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont;
 	*cqb = mlx5_vzalloc(*inlen);
 	if (!*cqb) {
 		err = -ENOMEM;
 		goto err_db;
 	}
-	mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0);
-	(*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+
+	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+	mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0);
+
+	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+	MLX5_SET(cqc, cqc, log_page_size,
+		 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 
 	*index = to_mucontext(context)->uuari.uars[0].index;
 
@@ -834,9 +824,10 @@
 
 static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 			    int entries, int cqe_size,
-			    struct mlx5_create_cq_mbox_in **cqb,
-			    int *index, int *inlen)
+			    u32 **cqb, int *index, int *inlen)
 {
+	__be64 *pas;
+	void *cqc;
 	int err;
 
 	err = mlx5_db_alloc(dev->mdev, &cq->db);
@@ -853,15 +844,21 @@
 
 	init_cq_buf(cq, &cq->buf);
 
-	*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
+	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages;
 	*cqb = mlx5_vzalloc(*inlen);
 	if (!*cqb) {
 		err = -ENOMEM;
 		goto err_buf;
 	}
-	mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas);
 
-	(*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
+	mlx5_fill_page_array(&cq->buf.buf, pas);
+
+	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
+	MLX5_SET(cqc, cqc, log_page_size,
+		 cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
 	*index = dev->mdev->priv.uuari.uars[0].index;
 
 	return 0;
@@ -895,11 +892,12 @@
 {
 	int entries = attr->cqe;
 	int vector = attr->comp_vector;
-	struct mlx5_create_cq_mbox_in *cqb = NULL;
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct mlx5_ib_cq *cq;
 	int uninitialized_var(index);
 	int uninitialized_var(inlen);
+	u32 *cqb = NULL;
+	void *cqc;
 	int cqe_size;
 	unsigned int irqn;
 	int eqn;
@@ -945,19 +943,20 @@
 		INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
 	}
 
-	cq->cqe_size = cqe_size;
-	cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
-
-	if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
-		cqb->ctx.cqe_sz_flags |= (1 << 1);
-
-	cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index);
 	err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn);
 	if (err)
 		goto err_cqb;
 
-	cqb->ctx.c_eqn = cpu_to_be16(eqn);
-	cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma);
+	cq->cqe_size = cqe_size;
+
+	cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context);
+	MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+	MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
+	MLX5_SET(cqc, cqc, uar_page, index);
+	MLX5_SET(cqc, cqc, c_eqn, eqn);
+	MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
+	if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN)
+		MLX5_SET(cqc, cqc, oi, 1);
 
 	err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen);
 	if (err)
@@ -1088,27 +1087,15 @@
 
 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 {
-	struct mlx5_modify_cq_mbox_in *in;
 	struct mlx5_ib_dev *dev = to_mdev(cq->device);
 	struct mlx5_ib_cq *mcq = to_mcq(cq);
 	int err;
-	u32 fsel;
 
 	if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
 		return -ENOSYS;
 
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
-	if (!in)
-		return -ENOMEM;
-
-	in->cqn = cpu_to_be32(mcq->mcq.cqn);
-	fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
-	in->ctx.cq_period = cpu_to_be16(cq_period);
-	in->ctx.cq_max_count = cpu_to_be16(cq_count);
-	in->field_select = cpu_to_be32(fsel);
-	err = mlx5_core_modify_cq(dev->mdev, &mcq->mcq, in, sizeof(*in));
-	kfree(in);
-
+	err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq,
+					     cq_period, cq_count);
 	if (err)
 		mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
 
@@ -1241,9 +1228,11 @@
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
 	struct mlx5_ib_cq *cq = to_mcq(ibcq);
-	struct mlx5_modify_cq_mbox_in *in;
+	void *cqc;
+	u32 *in;
 	int err;
 	int npas;
+	__be64 *pas;
 	int page_shift;
 	int inlen;
 	int uninitialized_var(cqe_size);
@@ -1285,28 +1274,37 @@
 	if (err)
 		goto ex;
 
-	inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
+	inlen = MLX5_ST_SZ_BYTES(modify_cq_in) +
+		MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas;
+
 	in = mlx5_vzalloc(inlen);
 	if (!in) {
 		err = -ENOMEM;
 		goto ex_resize;
 	}
 
+	pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas);
 	if (udata)
 		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
-				     in->pas, 0);
+				     pas, 0);
 	else
-		mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
+		mlx5_fill_page_array(&cq->resize_buf->buf, pas);
 
-	in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
-				       MLX5_MODIFY_CQ_MASK_PG_OFFSET |
-				       MLX5_MODIFY_CQ_MASK_PG_SIZE);
-	in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
-	in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
-	in->ctx.page_offset = 0;
-	in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
-	in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
-	in->cqn = cpu_to_be32(cq->mcq.cqn);
+	MLX5_SET(modify_cq_in, in,
+		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
+		 MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
+		 MLX5_MODIFY_CQ_MASK_PG_OFFSET |
+		 MLX5_MODIFY_CQ_MASK_PG_SIZE);
+
+	cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
+
+	MLX5_SET(cqc, cqc, log_page_size,
+		 page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size));
+	MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries));
+
+	MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE);
+	MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn);
 
 	err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen);
 	if (err)
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index a84bb76..551aa0e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -37,7 +37,6 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
-#include <linux/io-mapping.h>
 #if defined(CONFIG_X86)
 #include <asm/pat.h>
 #endif
@@ -233,23 +232,19 @@
 			 const union ib_gid *gid,
 			 const struct ib_gid_attr *attr)
 {
-	struct mlx5_ib_dev *dev	= to_mdev(device);
-	u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
-	u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
 	void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
 	enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
 
 	if (ll != IB_LINK_LAYER_ETHERNET)
 		return -EINVAL;
 
-	memset(in, 0, sizeof(in));
-
 	ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
 
 	MLX5_SET(set_roce_address_in, in, roce_address_index, index);
 	MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-
-	memset(out, 0, sizeof(out));
 	return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
 }
 
@@ -289,7 +284,9 @@
 
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
-	return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+	if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
+		return !MLX5_CAP_GEN(dev->mdev, ib_virt);
+	return 0;
 }
 
 enum {
@@ -752,8 +749,7 @@
 				     &props->active_width);
 	if (err)
 		goto out;
-	err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
-					 port);
+	err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
 	if (err)
 		goto out;
 
@@ -1429,6 +1425,13 @@
 					     dmac_47_16),
 				ib_spec->eth.val.dst_mac);
 
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+					     smac_47_16),
+				ib_spec->eth.mask.src_mac);
+		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+					     smac_47_16),
+				ib_spec->eth.val.src_mac);
+
 		if (ib_spec->eth.mask.vlan_tag) {
 			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
 				 vlan_tag, 1);
@@ -1850,6 +1853,7 @@
 					   int domain)
 {
 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
+	struct mlx5_ib_qp *mqp = to_mqp(qp);
 	struct mlx5_ib_flow_handler *handler = NULL;
 	struct mlx5_flow_destination *dst = NULL;
 	struct mlx5_ib_flow_prio *ft_prio;
@@ -1876,7 +1880,10 @@
 	}
 
 	dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
-	dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
+	if (mqp->flags & MLX5_IB_QP_RSS)
+		dst->tir_num = mqp->rss_qp.tirn;
+	else
+		dst->tir_num = mqp->raw_packet_qp.rq.tirn;
 
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
 		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 40df2cc..996b54e 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -71,7 +71,7 @@
 
 	addr = addr >> page_shift;
 	tmp = (unsigned long)addr;
-	m = find_first_bit(&tmp, sizeof(tmp));
+	m = find_first_bit(&tmp, BITS_PER_LONG);
 	skip = 1 << m;
 	mask = skip - 1;
 	i = 0;
@@ -81,7 +81,7 @@
 		for (k = 0; k < len; k++) {
 			if (!(i & mask)) {
 				tmp = (unsigned long)pfn;
-				m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp)));
+				m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG));
 				skip = 1 << m;
 				mask = skip - 1;
 				base = pfn;
@@ -89,7 +89,7 @@
 			} else {
 				if (base + p != pfn) {
 					tmp = (unsigned long)p;
-					m = find_first_bit(&tmp, sizeof(tmp));
+					m = find_first_bit(&tmp, BITS_PER_LONG);
 					skip = 1 << m;
 					mask = skip - 1;
 					base = pfn;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 372385d..67cc741 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -402,6 +402,7 @@
 	/* QP uses 1 as its source QP number */
 	MLX5_IB_QP_SQPN_QP1			= 1 << 6,
 	MLX5_IB_QP_CAP_SCATTER_FCS		= 1 << 7,
+	MLX5_IB_QP_RSS				= 1 << 8,
 };
 
 struct mlx5_umr_wr {
@@ -504,7 +505,7 @@
 	int			umred;
 	int			npages;
 	struct mlx5_ib_dev     *dev;
-	struct mlx5_create_mkey_mbox_out out;
+	u32 out[MLX5_ST_SZ_DW(create_mkey_out)];
 	struct mlx5_core_sig_ctx    *sig;
 	int			live;
 	void			*descs_alloc;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4b02130..6f7e347 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -135,20 +135,10 @@
 		return;
 	}
 
-	if (mr->out.hdr.status) {
-		mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n",
-			     mr->out.hdr.status,
-			     be32_to_cpu(mr->out.hdr.syndrome));
-		kfree(mr);
-		dev->fill_delay = 1;
-		mod_timer(&dev->delay_timer, jiffies + HZ);
-		return;
-	}
-
 	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 	key = dev->mdev->priv.mkey_key++;
 	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
-	mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+	mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
 
 	cache->last_add = jiffies;
 
@@ -170,16 +160,19 @@
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
 	struct mlx5_cache_ent *ent = &cache->ent[c];
-	struct mlx5_create_mkey_mbox_in *in;
+	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	struct mlx5_ib_mr *mr;
 	int npages = 1 << ent->order;
+	void *mkc;
+	u32 *in;
 	int err = 0;
 	int i;
 
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	in = kzalloc(inlen, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	for (i = 0; i < num; i++) {
 		if (ent->pending >= MAX_PENDING_REG_MR) {
 			err = -EAGAIN;
@@ -194,18 +187,22 @@
 		mr->order = ent->order;
 		mr->umred = 1;
 		mr->dev = dev;
-		in->seg.status = MLX5_MKEY_STATUS_FREE;
-		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
-		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
-		in->seg.log2_page_size = 12;
+
+		MLX5_SET(mkc, mkc, free, 1);
+		MLX5_SET(mkc, mkc, umr_en, 1);
+		MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+
+		MLX5_SET(mkc, mkc, qpn, 0xffffff);
+		MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
+		MLX5_SET(mkc, mkc, log_page_size, 12);
 
 		spin_lock_irq(&ent->lock);
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in,
-					    sizeof(*in), reg_mr_callback,
-					    mr, &mr->out);
+		err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
+					       in, inlen,
+					       mr->out, sizeof(mr->out),
+					       reg_mr_callback, mr);
 		if (err) {
 			spin_lock_irq(&ent->lock);
 			ent->pending--;
@@ -670,30 +667,38 @@
 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	struct mlx5_core_dev *mdev = dev->mdev;
-	struct mlx5_create_mkey_mbox_in *in;
-	struct mlx5_mkey_seg *seg;
 	struct mlx5_ib_mr *mr;
+	void *mkc;
+	u32 *in;
 	int err;
 
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	in = kzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		goto err_free;
 	}
 
-	seg = &in->seg;
-	seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
-	seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
-	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-	seg->start_addr = 0;
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
-	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL,
-				    NULL);
+	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
+	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
+	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
+	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
+	MLX5_SET(mkc, mkc, lr, 1);
+
+	MLX5_SET(mkc, mkc, length64, 1);
+	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+	MLX5_SET64(mkc, mkc, start_addr, 0);
+
+	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
 	if (err)
 		goto err_in;
 
@@ -1063,9 +1068,11 @@
 				     int page_shift, int access_flags)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	struct mlx5_create_mkey_mbox_in *in;
 	struct mlx5_ib_mr *mr;
+	__be64 *pas;
+	void *mkc;
 	int inlen;
+	u32 *in;
 	int err;
 	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
 
@@ -1073,31 +1080,41 @@
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
+	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
+		sizeof(*pas) * ((npages + 1) / 2) * 2;
 	in = mlx5_vzalloc(inlen);
 	if (!in) {
 		err = -ENOMEM;
 		goto err_1;
 	}
-	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
+	pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+	mlx5_ib_populate_pas(dev, umem, page_shift, pas,
 			     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
 
-	/* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
+	/* The pg_access bit allows setting the access flags
 	 * in the page list submitted with the command. */
-	in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
-	in->seg.flags = convert_access(access_flags) |
-		MLX5_ACCESS_MODE_MTT;
-	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
-	in->seg.start_addr = cpu_to_be64(virt_addr);
-	in->seg.len = cpu_to_be64(length);
-	in->seg.bsfs_octo_size = 0;
-	in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
-	in->seg.log2_page_size = page_shift;
-	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
-							 1 << page_shift));
-	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL,
-				    NULL, NULL);
+	MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
+
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+	MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
+	MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
+	MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
+	MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
+	MLX5_SET(mkc, mkc, lr, 1);
+
+	MLX5_SET64(mkc, mkc, start_addr, virt_addr);
+	MLX5_SET64(mkc, mkc, len, length);
+	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+	MLX5_SET(mkc, mkc, bsf_octword_size, 0);
+	MLX5_SET(mkc, mkc, translations_octword_size,
+		 get_octo_len(virt_addr, length, 1 << page_shift));
+	MLX5_SET(mkc, mkc, log_page_size, page_shift);
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+		 get_octo_len(virt_addr, length, 1 << page_shift));
+
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
 	if (err) {
 		mlx5_ib_warn(dev, "create mkey failed\n");
 		goto err_2;
@@ -1523,30 +1540,32 @@
 			       u32 max_num_sg)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	struct mlx5_create_mkey_mbox_in *in;
-	struct mlx5_ib_mr *mr;
+	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	int ndescs = ALIGN(max_num_sg, 4);
+	struct mlx5_ib_mr *mr;
+	void *mkc;
+	u32 *in;
 	int err;
 
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	in = kzalloc(inlen, GFP_KERNEL);
 	if (!in) {
 		err = -ENOMEM;
 		goto err_free;
 	}
 
-	in->seg.status = MLX5_MKEY_STATUS_FREE;
-	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
-	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+	MLX5_SET(mkc, mkc, free, 1);
+	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
 
 	if (mr_type == IB_MR_TYPE_MEM_REG) {
-		mr->access_mode = MLX5_ACCESS_MODE_MTT;
-		in->seg.log2_page_size = PAGE_SHIFT;
-
+		mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+		MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
 		err = mlx5_alloc_priv_descs(pd->device, mr,
 					    ndescs, sizeof(u64));
 		if (err)
@@ -1555,7 +1574,7 @@
 		mr->desc_size = sizeof(u64);
 		mr->max_descs = ndescs;
 	} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
-		mr->access_mode = MLX5_ACCESS_MODE_KLM;
+		mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
 
 		err = mlx5_alloc_priv_descs(pd->device, mr,
 					    ndescs, sizeof(struct mlx5_klm));
@@ -1566,9 +1585,8 @@
 	} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
 		u32 psv_index[2];
 
-		in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) |
-							   MLX5_MKEY_BSF_EN);
-		in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
+		MLX5_SET(mkc, mkc, bsf_en, 1);
+		MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
 		mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
 		if (!mr->sig) {
 			err = -ENOMEM;
@@ -1581,7 +1599,7 @@
 		if (err)
 			goto err_free_sig;
 
-		mr->access_mode = MLX5_ACCESS_MODE_KLM;
+		mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
 		mr->sig->psv_memory.psv_idx = psv_index[0];
 		mr->sig->psv_wire.psv_idx = psv_index[1];
 
@@ -1595,9 +1613,10 @@
 		goto err_free_in;
 	}
 
-	in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode;
-	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in),
-				    NULL, NULL, NULL);
+	MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
+	MLX5_SET(mkc, mkc, umr_en, 1);
+
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
 	if (err)
 		goto err_destroy_psv;
 
@@ -1633,8 +1652,10 @@
 			       struct ib_udata *udata)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	struct mlx5_create_mkey_mbox_in *in = NULL;
+	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	struct mlx5_ib_mw *mw = NULL;
+	u32 *in = NULL;
+	void *mkc;
 	int ndescs;
 	int err;
 	struct mlx5_ib_alloc_mw req = {};
@@ -1658,23 +1679,24 @@
 	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
 
 	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	in = kzalloc(inlen, GFP_KERNEL);
 	if (!mw || !in) {
 		err = -ENOMEM;
 		goto free;
 	}
 
-	in->seg.status = MLX5_MKEY_STATUS_FREE;
-	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
-	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
-	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM |
-		MLX5_PERM_LOCAL_READ;
-	if (type == IB_MW_TYPE_2)
-		in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
-	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
-	err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in),
-				    NULL, NULL, NULL);
+	MLX5_SET(mkc, mkc, free, 1);
+	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
+	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
+	MLX5_SET(mkc, mkc, umr_en, 1);
+	MLX5_SET(mkc, mkc, lr, 1);
+	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS);
+	MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+	err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
 	if (err)
 		goto free;
 
@@ -1811,7 +1833,7 @@
 				   mr->desc_size * mr->max_descs,
 				   DMA_TO_DEVICE);
 
-	if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
 		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
 	else
 		n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0dd7d93..9529b46 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -726,7 +726,7 @@
 static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 			  struct mlx5_ib_qp *qp, struct ib_udata *udata,
 			  struct ib_qp_init_attr *attr,
-			  struct mlx5_create_qp_mbox_in **in,
+			  u32 **in,
 			  struct mlx5_ib_create_qp_resp *resp, int *inlen,
 			  struct mlx5_ib_qp_base *base)
 {
@@ -739,6 +739,8 @@
 	u32 offset = 0;
 	int uuarn;
 	int ncont = 0;
+	__be64 *pas;
+	void *qpc;
 	int err;
 
 	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
@@ -795,20 +797,24 @@
 		ubuffer->umem = NULL;
 	}
 
-	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
+	*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+		 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont;
 	*in = mlx5_vzalloc(*inlen);
 	if (!*in) {
 		err = -ENOMEM;
 		goto err_umem;
 	}
-	if (ubuffer->umem)
-		mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift,
-				     (*in)->pas, 0);
-	(*in)->ctx.log_pg_sz_remote_qpn =
-		cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
-	(*in)->ctx.params2 = cpu_to_be32(offset << 6);
 
-	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
+	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
+	if (ubuffer->umem)
+		mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
+
+	qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+
+	MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET(qpc, qpc, page_offset, offset);
+
+	MLX5_SET(qpc, qpc, uar_page, uar_index);
 	resp->uuar_index = uuarn;
 	qp->uuarn = uuarn;
 
@@ -857,12 +863,13 @@
 static int create_kernel_qp(struct mlx5_ib_dev *dev,
 			    struct ib_qp_init_attr *init_attr,
 			    struct mlx5_ib_qp *qp,
-			    struct mlx5_create_qp_mbox_in **in, int *inlen,
+			    u32 **in, int *inlen,
 			    struct mlx5_ib_qp_base *base)
 {
 	enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
 	struct mlx5_uuar_info *uuari;
 	int uar_index;
+	void *qpc;
 	int uuarn;
 	int err;
 
@@ -902,25 +909,29 @@
 	}
 
 	qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
-	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages;
+	*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+		 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
 	*in = mlx5_vzalloc(*inlen);
 	if (!*in) {
 		err = -ENOMEM;
 		goto err_buf;
 	}
-	(*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index);
-	(*in)->ctx.log_pg_sz_remote_qpn =
-		cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
+
+	qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
+	MLX5_SET(qpc, qpc, uar_page, uar_index);
+	MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
 	/* Set "fast registration enabled" for all kernel QPs */
-	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
-	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
+	MLX5_SET(qpc, qpc, fre, 1);
+	MLX5_SET(qpc, qpc, rlky, 1);
 
 	if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
-		(*in)->ctx.deth_sqpn = cpu_to_be32(1);
+		MLX5_SET(qpc, qpc, deth_sqpn, 1);
 		qp->flags |= MLX5_IB_QP_SQPN_QP1;
 	}
 
-	mlx5_fill_page_array(&qp->buf, (*in)->pas);
+	mlx5_fill_page_array(&qp->buf,
+			     (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
 
 	err = mlx5_db_alloc(dev->mdev, &qp->db);
 	if (err) {
@@ -974,15 +985,15 @@
 	free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
 }
 
-static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
+static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
 {
 	if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) ||
 	    (attr->qp_type == IB_QPT_XRC_INI))
-		return cpu_to_be32(MLX5_SRQ_RQ);
+		return MLX5_SRQ_RQ;
 	else if (!qp->has_rq)
-		return cpu_to_be32(MLX5_ZERO_LEN_RQ);
+		return MLX5_ZERO_LEN_RQ;
 	else
-		return cpu_to_be32(MLX5_NON_ZERO_RQ);
+		return MLX5_NON_ZERO_RQ;
 }
 
 static int is_connected(enum ib_qp_type qp_type)
@@ -996,13 +1007,10 @@
 static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 				    struct mlx5_ib_sq *sq, u32 tdn)
 {
-	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(tisc, tisc, transport_domain, tdn);
-
 	return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
 }
 
@@ -1191,7 +1199,7 @@
 }
 
 static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
-				struct mlx5_create_qp_mbox_in *in,
+				u32 *in,
 				struct ib_pd *pd)
 {
 	struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
@@ -1449,6 +1457,7 @@
 	kvfree(in);
 	/* qpn is reserved for that QP */
 	qp->trans_qp.base.mqp.qpn = 0;
+	qp->flags |= MLX5_IB_QP_RSS;
 	return 0;
 
 err:
@@ -1461,18 +1470,18 @@
 			    struct ib_udata *udata, struct mlx5_ib_qp *qp)
 {
 	struct mlx5_ib_resources *devr = &dev->devr;
+	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
 	struct mlx5_core_dev *mdev = dev->mdev;
-	struct mlx5_ib_qp_base *base;
 	struct mlx5_ib_create_qp_resp resp;
-	struct mlx5_create_qp_mbox_in *in;
-	struct mlx5_ib_create_qp ucmd;
 	struct mlx5_ib_cq *send_cq;
 	struct mlx5_ib_cq *recv_cq;
 	unsigned long flags;
-	int inlen = sizeof(*in);
-	int err;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
+	struct mlx5_ib_create_qp ucmd;
+	struct mlx5_ib_qp_base *base;
 	void *qpc;
+	u32 *in;
+	int err;
 
 	base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
 	       &qp->raw_packet_qp.rq.base :
@@ -1600,7 +1609,7 @@
 		if (err)
 			return err;
 	} else {
-		in = mlx5_vzalloc(sizeof(*in));
+		in = mlx5_vzalloc(inlen);
 		if (!in)
 			return -ENOMEM;
 
@@ -1610,26 +1619,29 @@
 	if (is_sqp(init_attr->qp_type))
 		qp->port = init_attr->port_num;
 
-	in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 |
-				    MLX5_QP_PM_MIGRATED << 11);
+	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+
+	MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type));
+	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
 
 	if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR)
-		in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn);
+		MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn);
 	else
-		in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE);
+		MLX5_SET(qpc, qpc, latency_sensitive, 1);
+
 
 	if (qp->wq_sig)
-		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG);
+		MLX5_SET(qpc, qpc, wq_signature, 1);
 
 	if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK)
-		in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST);
+		MLX5_SET(qpc, qpc, block_lb_mc, 1);
 
 	if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
-		in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER);
+		MLX5_SET(qpc, qpc, cd_master, 1);
 	if (qp->flags & MLX5_IB_QP_MANAGED_SEND)
-		in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND);
+		MLX5_SET(qpc, qpc, cd_slave_send, 1);
 	if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
-		in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV);
+		MLX5_SET(qpc, qpc, cd_slave_receive, 1);
 
 	if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
 		int rcqe_sz;
@@ -1639,71 +1651,68 @@
 		scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq);
 
 		if (rcqe_sz == 128)
-			in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE;
+			MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
 		else
-			in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE;
+			MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
 
 		if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) {
 			if (scqe_sz == 128)
-				in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE;
+				MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE);
 			else
-				in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE;
+				MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
 		}
 	}
 
 	if (qp->rq.wqe_cnt) {
-		in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4);
-		in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3;
+		MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4);
+		MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt));
 	}
 
-	in->ctx.rq_type_srqn = get_rx_type(qp, init_attr);
+	MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr));
 
 	if (qp->sq.wqe_cnt)
-		in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11);
+		MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt));
 	else
-		in->ctx.sq_crq_size |= cpu_to_be16(0x8000);
+		MLX5_SET(qpc, qpc, no_sq, 1);
 
 	/* Set default resources */
 	switch (init_attr->qp_type) {
 	case IB_QPT_XRC_TGT:
-		in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
-		in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
-		in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
-		in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn);
+		MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+		MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn);
+		MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
+		MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn);
 		break;
 	case IB_QPT_XRC_INI:
-		in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn);
-		in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
-		in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn);
+		MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
+		MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+		MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn);
 		break;
 	default:
 		if (init_attr->srq) {
-			in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn);
-			in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn);
+			MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn);
+			MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn);
 		} else {
-			in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn);
-			in->ctx.rq_type_srqn |=
-				cpu_to_be32(to_msrq(devr->s1)->msrq.srqn);
+			MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn);
+			MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn);
 		}
 	}
 
 	if (init_attr->send_cq)
-		in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn);
+		MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn);
 
 	if (init_attr->recv_cq)
-		in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn);
+		MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn);
 
-	in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma);
+	MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
 
-	if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
-		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
-		/* 0xffffff means we ask to work with cqe version 0 */
+	/* 0xffffff means we ask to work with cqe version 0 */
+	if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1)
 		MLX5_SET(qpc, qpc, user_index, uidx);
-	}
+
 	/* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
 	if (init_attr->qp_type == IB_QPT_UD &&
 	    (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
-		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 		MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
 		qp->flags |= MLX5_IB_QP_LSO;
 	}
@@ -1860,7 +1869,6 @@
 {
 	struct mlx5_ib_cq *send_cq, *recv_cq;
 	struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
-	struct mlx5_modify_qp_mbox_in *in;
 	unsigned long flags;
 	int err;
 
@@ -1873,16 +1881,12 @@
 	       &qp->raw_packet_qp.rq.base :
 	       &qp->trans_qp.base;
 
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
-	if (!in)
-		return;
-
 	if (qp->state != IB_QPS_RESET) {
 		if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
 			mlx5_ib_qp_disable_pagefaults(qp);
 			err = mlx5_core_qp_modify(dev->mdev,
-						  MLX5_CMD_OP_2RST_QP, in, 0,
-						  &base->mqp);
+						  MLX5_CMD_OP_2RST_QP, 0,
+						  NULL, &base->mqp);
 		} else {
 			err = modify_raw_packet_qp(dev, qp,
 						   MLX5_CMD_OP_2RST_QP);
@@ -1924,8 +1928,6 @@
 				     base->mqp.qpn);
 	}
 
-	kfree(in);
-
 	if (qp->create_type == MLX5_QP_KERNEL)
 		destroy_qp_kernel(dev, qp);
 	else if (qp->create_type == MLX5_QP_USER)
@@ -2511,7 +2513,6 @@
 	struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
 	struct mlx5_ib_cq *send_cq, *recv_cq;
 	struct mlx5_qp_context *context;
-	struct mlx5_modify_qp_mbox_in *in;
 	struct mlx5_ib_pd *pd;
 	enum mlx5_qp_state mlx5_cur, mlx5_new;
 	enum mlx5_qp_optpar optpar;
@@ -2520,11 +2521,10 @@
 	int err;
 	u16 op;
 
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
-	if (!in)
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
 		return -ENOMEM;
 
-	context = &in->ctx;
 	err = to_mlx5_st(ibqp->qp_type);
 	if (err < 0) {
 		mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
@@ -2689,12 +2689,11 @@
 	op = optab[mlx5_cur][mlx5_new];
 	optpar = ib_mask_to_mlx5_opt(attr_mask);
 	optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
-	in->optparam = cpu_to_be32(optpar);
 
 	if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET)
 		err = modify_raw_packet_qp(dev, qp, op);
 	else
-		err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event,
+		err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
 					  &base->mqp);
 	if (err)
 		goto out;
@@ -2735,7 +2734,7 @@
 	}
 
 out:
-	kfree(in);
+	kfree(context);
 	return err;
 }
 
@@ -2968,7 +2967,7 @@
 
 	memset(umr, 0, sizeof(*umr));
 
-	if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
 		/* KLMs take twice the size of MTTs */
 		ndescs *= 2;
 
@@ -3111,9 +3110,9 @@
 
 	memset(seg, 0, sizeof(*seg));
 
-	if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
+	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT)
 		seg->log2_page_size = ilog2(mr->ibmr.page_size);
-	else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+	else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
 		/* KLMs take twice the size of MTTs */
 		ndescs *= 2;
 
@@ -3454,7 +3453,7 @@
 	memset(seg, 0, sizeof(*seg));
 
 	seg->flags = get_umr_flags(wr->access_flags) |
-				   MLX5_ACCESS_MODE_KLM;
+				   MLX5_MKC_ACCESS_MODE_KLMS;
 	seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
 	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
 				    MLX5_MKEY_BSF_EN | pdn);
@@ -3658,12 +3657,8 @@
 		     struct ib_send_wr *wr, unsigned *idx,
 		     int *size, int nreq)
 {
-	int err = 0;
-
-	if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) {
-		err = -ENOMEM;
-		return err;
-	}
+	if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
+		return -ENOMEM;
 
 	*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
 	*seg = mlx5_get_send_wqe(qp, *idx);
@@ -3679,7 +3674,7 @@
 	*seg += sizeof(**ctrl);
 	*size = sizeof(**ctrl) / 16;
 
-	return err;
+	return 0;
 }
 
 static void finish_wqe(struct mlx5_ib_qp *qp,
@@ -3758,7 +3753,7 @@
 		num_sge = wr->num_sge;
 		if (unlikely(num_sge > qp->sq.max_gs)) {
 			mlx5_ib_warn(dev, "\n");
-			err = -ENOMEM;
+			err = -EINVAL;
 			*bad_wr = wr;
 			goto out;
 		}
@@ -4320,21 +4315,24 @@
 static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 			 struct ib_qp_attr *qp_attr)
 {
-	struct mlx5_query_qp_mbox_out *outb;
+	int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
 	struct mlx5_qp_context *context;
 	int mlx5_state;
+	u32 *outb;
 	int err = 0;
 
-	outb = kzalloc(sizeof(*outb), GFP_KERNEL);
+	outb = kzalloc(outlen, GFP_KERNEL);
 	if (!outb)
 		return -ENOMEM;
 
-	context = &outb->ctx;
 	err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
-				 sizeof(*outb));
+				 outlen);
 	if (err)
 		goto out;
 
+	/* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
+	context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc);
+
 	mlx5_state = be32_to_cpu(context->flags) >> 28;
 
 	qp->state		     = to_ib_qp_state(mlx5_state);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 16740dc..67fc0b6 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1156,18 +1156,18 @@
 	attr->max_srq =
 		(rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >>
 		OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET;
-	attr->max_send_sge = ((rsp->max_write_send_sge &
+	attr->max_send_sge = ((rsp->max_recv_send_sge &
 			       OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
 			      OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
-	attr->max_recv_sge = (rsp->max_write_send_sge &
-			      OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
-	    OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT;
+	attr->max_recv_sge = (rsp->max_recv_send_sge &
+			      OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT;
 	attr->max_srq_sge = (rsp->max_srq_rqe_sge &
 			      OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >>
 	    OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET;
-	attr->max_rdma_sge = (rsp->max_write_send_sge &
-			      OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >>
-	    OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT;
+	attr->max_rdma_sge = (rsp->max_wr_rd_sge &
+			      OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >>
+	    OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT;
 	attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
 				OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
 	    OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 0efc966..37df448 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -554,9 +554,9 @@
 	OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK		= 0x18,
 	OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT		= 0,
 	OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK		= 0xFFFF,
-	OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT	= 16,
-	OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK		= 0xFFFF <<
-				OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT,
+	OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT	= 16,
+	OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK		= 0xFFFF <<
+				OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT,
 
 	OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT	= 0,
 	OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK	= 0xFFFF,
@@ -612,6 +612,8 @@
 	OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET		= 0,
 	OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK		= 0xFFFF <<
 				OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET,
+	OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT		= 0,
+	OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK		= 0xFFFF,
 };
 
 struct ocrdma_mbx_query_config {
@@ -619,7 +621,7 @@
 	struct ocrdma_mbx_rsp rsp;
 	u32 qp_srq_cq_ird_ord;
 	u32 max_pd_ca_ack_delay;
-	u32 max_write_send_sge;
+	u32 max_recv_send_sge;
 	u32 max_ird_ord_per_qp;
 	u32 max_shared_ird_ord;
 	u32 max_mr;
@@ -639,6 +641,8 @@
 	u32 max_wqes_rqes_per_q;
 	u32 max_cq_cqes_per_cq;
 	u32 max_srq_rqe_sge;
+	u32 max_wr_rd_sge;
+	u32 ird_pgsz_num_pages;
 };
 
 struct ocrdma_fw_ver_rsp {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index b1a3d91..0aa8547 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -125,8 +125,8 @@
 					IB_DEVICE_SYS_IMAGE_GUID |
 					IB_DEVICE_LOCAL_DMA_LKEY |
 					IB_DEVICE_MEM_MGT_EXTENSIONS;
-	attr->max_sge = dev->attr.max_send_sge;
-	attr->max_sge_rd = attr->max_sge;
+	attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
+	attr->max_sge_rd = dev->attr.max_rdma_sge;
 	attr->max_cq = dev->attr.max_cq;
 	attr->max_cqe = dev->attr.max_cqe;
 	attr->max_mr = dev->attr.max_mr;
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index bbf0a16..a3e21a2 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -52,6 +52,7 @@
 #include <linux/kref.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
+#include <rdma/ib_hdrs.h>
 #include <rdma/rdma_vt.h>
 
 #include "qib_common.h"
@@ -1131,7 +1132,6 @@
 extern struct qib_devdata *qib_lookup(int unit);
 extern u32 qib_cpulist_count;
 extern unsigned long *qib_cpulist;
-extern u16 qpt_mask;
 extern unsigned qib_cc_table_size;
 
 int qib_init(struct qib_devdata *, int);
diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c
index 5e75b43..5bad8e3 100644
--- a/drivers/infiniband/hw/qib/qib_debugfs.c
+++ b/drivers/infiniband/hw/qib/qib_debugfs.c
@@ -189,27 +189,32 @@
 DEBUGFS_FILE(ctx_stats)
 
 static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
+	__acquires(RCU)
 {
 	struct qib_qp_iter *iter;
 	loff_t n = *pos;
 
-	rcu_read_lock();
 	iter = qib_qp_iter_init(s->private);
+
+	/* stop calls rcu_read_unlock */
+	rcu_read_lock();
+
 	if (!iter)
 		return NULL;
 
-	while (n--) {
+	do {
 		if (qib_qp_iter_next(iter)) {
 			kfree(iter);
 			return NULL;
 		}
-	}
+	} while (n--);
 
 	return iter;
 }
 
 static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
 				   loff_t *pos)
+	__must_hold(RCU)
 {
 	struct qib_qp_iter *iter = iter_ptr;
 
@@ -224,6 +229,7 @@
 }
 
 static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
+	__releases(RCU)
 {
 	rcu_read_unlock();
 }
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 67ee643..728e0a0 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -319,8 +319,8 @@
 		ret = 1;
 	else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
 		/* For TIDERR and RC QPs premptively schedule a NAK */
-		struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
-		struct qib_other_headers *ohdr = NULL;
+		struct ib_header *hdr = (struct ib_header *)rhdr;
+		struct ib_other_headers *ohdr = NULL;
 		struct qib_ibport *ibp = &ppd->ibport_data;
 		struct qib_devdata *dd = ppd->dd;
 		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -588,8 +588,7 @@
 				qib_schedule_send(qp);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	}
 
 bail:
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index fcdf3791..c3edc03 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -328,26 +328,12 @@
 
 	pos = *ppos;
 
-	if (pos != 0) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (pos != 0 || count != sizeof(struct qib_flash))
+		return -EINVAL;
 
-	if (count != sizeof(struct qib_flash)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	tmp = kmalloc(count, GFP_KERNEL);
-	if (!tmp) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	if (copy_from_user(tmp, buf, count)) {
-		ret = -EFAULT;
-		goto bail_tmp;
-	}
+	tmp = memdup_user(buf, count);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
 
 	dd = private2dd(file);
 	if (qib_eeprom_write(dd, pos, tmp, count)) {
@@ -361,8 +347,6 @@
 
 bail_tmp:
 	kfree(tmp);
-
-bail:
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ce40340..ded2717 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1415,7 +1415,7 @@
 	u32 *hdr;
 	u64 pbc;
 	const unsigned hdrwords = 7;
-	static struct qib_ib_header ibhdr = {
+	static struct ib_header ibhdr = {
 		.lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH),
 		.lrh[1] = IB_LID_PERMISSIVE,
 		.lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC),
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 9cc0aae1..99d31ef 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -41,14 +41,6 @@
 
 #include "qib.h"
 
-/*
- * mask field which was present in now deleted qib_qpn_table
- * is not present in rvt_qpn_table. Defining the same field
- * as qpt_mask here instead of adding the mask field to
- * rvt_qpn_table.
- */
-u16 qpt_mask;
-
 static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
 			      struct rvt_qpn_map *map, unsigned off)
 {
@@ -57,7 +49,7 @@
 
 static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
 					struct rvt_qpn_map *map, unsigned off,
-					unsigned n)
+					unsigned n, u16 qpt_mask)
 {
 	if (qpt_mask) {
 		off++;
@@ -179,6 +171,7 @@
 	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
 	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
 					      verbs_dev);
+	u16 qpt_mask = dd->qpn_mask;
 
 	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 		unsigned n;
@@ -215,7 +208,7 @@
 				goto bail;
 			}
 			offset = find_next_offset(qpt, map, offset,
-				dd->n_krcv_queues);
+				dd->n_krcv_queues, qpt_mask);
 			qpn = mk_qpn(qpt, map, offset);
 			/*
 			 * This test differs from alloc_pidmap().
@@ -573,10 +566,6 @@
 		return NULL;
 
 	iter->dev = dev;
-	if (qib_qp_iter_next(iter)) {
-		kfree(iter);
-		return NULL;
-	}
 
 	return iter;
 }
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 444028a..2097512 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -75,7 +75,7 @@
  * Note the QP s_lock must be held.
  */
 static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
-			   struct qib_other_headers *ohdr, u32 pmtu)
+			   struct ib_other_headers *ohdr, u32 pmtu)
 {
 	struct rvt_ack_entry *e;
 	u32 hwords;
@@ -154,10 +154,7 @@
 			len = 0;
 			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
 			ohdr->u.at.aeth = qib_compute_aeth(qp);
-			ohdr->u.at.atomic_ack_eth[0] =
-				cpu_to_be32(e->atomic_data >> 32);
-			ohdr->u.at.atomic_ack_eth[1] =
-				cpu_to_be32(e->atomic_data);
+			ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
 			hwords += sizeof(ohdr->u.at) / sizeof(u32);
 			bth2 = e->psn & QIB_PSN_MASK;
 			e->sent = 1;
@@ -234,7 +231,7 @@
 {
 	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
-	struct qib_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct rvt_sge_state *ss;
 	struct rvt_swqe *wqe;
 	u32 hwords;
@@ -444,20 +441,18 @@
 			}
 			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
-				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->atomic_wr.swap);
-				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-					wqe->atomic_wr.compare_add);
+				put_ib_ateth_swap(wqe->atomic_wr.swap,
+						  &ohdr->u.atomic_eth);
+				put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+						  &ohdr->u.atomic_eth);
 			} else {
 				qp->s_state = OP(FETCH_ADD);
-				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-					wqe->atomic_wr.compare_add);
-				ohdr->u.atomic_eth.compare_data = 0;
+				put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+						  &ohdr->u.atomic_eth);
+				put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
 			}
-			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-				wqe->atomic_wr.remote_addr >> 32);
-			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-				wqe->atomic_wr.remote_addr);
+			put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+					   &ohdr->u.atomic_eth);
 			ohdr->u.atomic_eth.rkey = cpu_to_be32(
 				wqe->atomic_wr.rkey);
 			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -632,8 +627,8 @@
 	u32 hwords;
 	u32 pbufn;
 	u32 __iomem *piobuf;
-	struct qib_ib_header hdr;
-	struct qib_other_headers *ohdr;
+	struct ib_header hdr;
+	struct ib_other_headers *ohdr;
 	u32 control;
 	unsigned long flags;
 
@@ -942,9 +937,9 @@
 /*
  * This should be called with the QP s_lock held and interrupts disabled.
  */
-void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
+void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
 {
-	struct qib_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct rvt_swqe *wqe;
 	struct ib_wc wc;
 	unsigned i;
@@ -1177,7 +1172,7 @@
 				qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 				if (list_empty(&qp->rspwait)) {
 					qp->r_flags |= RVT_R_RSP_SEND;
-					atomic_inc(&qp->refcount);
+					rvt_get_qp(qp);
 					list_add_tail(&qp->rspwait,
 						      &rcd->qp_wait_list);
 				}
@@ -1361,7 +1356,7 @@
 	qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 	if (list_empty(&qp->rspwait)) {
 		qp->r_flags |= RVT_R_RSP_SEND;
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 }
@@ -1383,7 +1378,7 @@
  * Called at interrupt level.
  */
 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
-			    struct qib_other_headers *ohdr,
+			    struct ib_other_headers *ohdr,
 			    void *data, u32 tlen,
 			    struct rvt_qp *qp,
 			    u32 opcode,
@@ -1463,12 +1458,9 @@
 	case OP(ATOMIC_ACKNOWLEDGE):
 	case OP(RDMA_READ_RESPONSE_FIRST):
 		aeth = be32_to_cpu(ohdr->u.aeth);
-		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-			__be32 *p = ohdr->u.at.atomic_ack_eth;
-
-			val = ((u64) be32_to_cpu(p[0]) << 32) |
-				be32_to_cpu(p[1]);
-		} else
+		if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+			val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+		else
 			val = 0;
 		if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
@@ -1608,7 +1600,7 @@
  * Return 1 if no more processing is needed; otherwise return 0 to
  * schedule a response to be sent.
  */
-static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
+static int qib_rc_rcv_error(struct ib_other_headers *ohdr,
 			    void *data,
 			    struct rvt_qp *qp,
 			    u32 opcode,
@@ -1640,7 +1632,7 @@
 			 */
 			if (list_empty(&qp->rspwait)) {
 				qp->r_flags |= RVT_R_RSP_NAK;
-				atomic_inc(&qp->refcount);
+				rvt_get_qp(qp);
 				list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 			}
 		}
@@ -1848,11 +1840,11 @@
  * for the given QP.
  * Called at interrupt level.
  */
-void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
-	struct qib_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	u32 opcode;
 	u32 hdrsize;
 	u32 psn;
@@ -2177,8 +2169,7 @@
 			e->rdma_sge.mr = NULL;
 		}
 		ateth = &ohdr->u.atomic_eth;
-		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
-			be32_to_cpu(ateth->vaddr[1]);
+		vaddr = get_ib_ateth_vaddr(ateth);
 		if (unlikely(vaddr & (sizeof(u64) - 1)))
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
@@ -2189,11 +2180,11 @@
 			goto nack_acc_unlck;
 		/* Perform atomic OP and save result. */
 		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-		sdata = be64_to_cpu(ateth->swap_data);
+		sdata = get_ib_ateth_swap(ateth);
 		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
 			(u64) atomic64_add_return(sdata, maddr) - sdata :
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-				      be64_to_cpu(ateth->compare_data),
+				      get_ib_ateth_compare(ateth),
 				      sdata);
 		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
@@ -2233,7 +2224,7 @@
 	/* Queue RNR NAK for later */
 	if (list_empty(&qp->rspwait)) {
 		qp->r_flags |= RVT_R_RSP_NAK;
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 	return;
@@ -2245,7 +2236,7 @@
 	/* Queue NAK for later */
 	if (list_empty(&qp->rspwait)) {
 		qp->r_flags |= RVT_R_RSP_NAK;
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 	return;
@@ -2259,7 +2250,7 @@
 	/* Queue NAK for later */
 	if (list_empty(&qp->rspwait)) {
 		qp->r_flags |= RVT_R_RSP_NAK;
-		atomic_inc(&qp->refcount);
+		rvt_get_qp(qp);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 	return;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index b677792..de1bde5 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -265,7 +265,7 @@
  *
  * The s_lock will be acquired around the qib_migrate_qp() call.
  */
-int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
 		      int has_grh, struct rvt_qp *qp, u32 bth0)
 {
 	__be64 guid;
@@ -680,7 +680,7 @@
 	return sizeof(struct ib_grh) / sizeof(u32);
 }
 
-void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
 			 u32 bth0, u32 bth2)
 {
 	struct qib_qp_priv *priv = qp->priv;
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 1d61bd0..5b2d483 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -48,7 +48,7 @@
 int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
 {
 	struct qib_qp_priv *priv = qp->priv;
-	struct qib_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct rvt_swqe *wqe;
 	u32 hwords;
 	u32 bth0;
@@ -236,10 +236,10 @@
  * for the given QP.
  * Called at interrupt level.
  */
-void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
-	struct qib_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	u32 opcode;
 	u32 hdrsize;
 	u32 psn;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 10d0625..f45cad1 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -245,7 +245,7 @@
 int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
 {
 	struct qib_qp_priv *priv = qp->priv;
-	struct qib_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct qib_pportdata *ppd;
 	struct qib_ibport *ibp;
@@ -435,10 +435,10 @@
  * for the given QP.
  * Called at interrupt level.
  */
-void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
-	struct qib_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	int opcode;
 	u32 hdrsize;
 	u32 pad;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index fd1dfbc..876ebb4 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -313,7 +313,7 @@
  * for the given QP.
  * Called at interrupt level.
  */
-static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 		       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
@@ -366,10 +366,10 @@
 {
 	struct qib_pportdata *ppd = rcd->ppd;
 	struct qib_ibport *ibp = &ppd->ibport_data;
-	struct qib_ib_header *hdr = rhdr;
+	struct ib_header *hdr = rhdr;
 	struct qib_devdata *dd = ppd->dd;
 	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
-	struct qib_other_headers *ohdr;
+	struct ib_other_headers *ohdr;
 	struct rvt_qp *qp;
 	u32 qp_num;
 	int lnh;
@@ -841,7 +841,7 @@
 	if (tx->wqe)
 		qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
 	else if (qp->ibqp.qp_type == IB_QPT_RC) {
-		struct qib_ib_header *hdr;
+		struct ib_header *hdr;
 
 		if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
 			hdr = &tx->align_buf->hdr;
@@ -889,7 +889,7 @@
 	return ret;
 }
 
-static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
+static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
 			      u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 			      u32 plen, u32 dwords)
 {
@@ -1025,7 +1025,7 @@
 	return ret;
 }
 
-static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
+static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
 			      u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 			      u32 plen, u32 dwords)
 {
@@ -1133,7 +1133,7 @@
  * Return zero if packet is sent or queued OK.
  * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
  */
-int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
 		   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@ -1606,8 +1606,6 @@
 	/* Only need to initialize non-zero fields. */
 	setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
 
-	qpt_mask = dd->qpn_mask;
-
 	INIT_LIST_HEAD(&dev->piowait);
 	INIT_LIST_HEAD(&dev->dmawait);
 	INIT_LIST_HEAD(&dev->txwait);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 736ced6..94fd30f 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -45,6 +45,7 @@
 #include <linux/completion.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_hdrs.h>
 #include <rdma/rdma_vt.h>
 #include <rdma/rdmavt_cq.h>
 
@@ -63,16 +64,6 @@
  */
 #define QIB_UVERBS_ABI_VERSION       2
 
-#define IB_SEQ_NAK	(3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK                      0x20
-#define IB_NAK_PSN_ERROR                0x60
-#define IB_NAK_INVALID_REQUEST          0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR      0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST       0x64
-
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
@@ -87,22 +78,9 @@
 
 #define QIB_VENDOR_IPG		cpu_to_be16(0xFFA0)
 
-#define IB_BTH_REQ_ACK		(1 << 31)
-#define IB_BTH_SOLICITED	(1 << 23)
-#define IB_BTH_MIG_REQ		(1 << 22)
-
 /* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
 #define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
 
-#define IB_GRH_VERSION		6
-#define IB_GRH_VERSION_MASK	0xF
-#define IB_GRH_VERSION_SHIFT	28
-#define IB_GRH_TCLASS_MASK	0xFF
-#define IB_GRH_TCLASS_SHIFT	20
-#define IB_GRH_FLOW_MASK	0xFFFFF
-#define IB_GRH_FLOW_SHIFT	0
-#define IB_GRH_NEXT_HDR		0x1B
-
 #define IB_DEFAULT_GID_PREFIX	cpu_to_be64(0xfe80000000000000ULL)
 
 /* Values for set/get portinfo VLCap OperationalVLs */
@@ -129,61 +107,9 @@
 	}
 }
 
-struct ib_reth {
-	__be64 vaddr;
-	__be32 rkey;
-	__be32 length;
-} __packed;
-
-struct ib_atomic_eth {
-	__be32 vaddr[2];        /* unaligned so access as 2 32-bit words */
-	__be32 rkey;
-	__be64 swap_data;
-	__be64 compare_data;
-} __packed;
-
-struct qib_other_headers {
-	__be32 bth[3];
-	union {
-		struct {
-			__be32 deth[2];
-			__be32 imm_data;
-		} ud;
-		struct {
-			struct ib_reth reth;
-			__be32 imm_data;
-		} rc;
-		struct {
-			__be32 aeth;
-			__be32 atomic_ack_eth[2];
-		} at;
-		__be32 imm_data;
-		__be32 aeth;
-		__be32 ieth;
-		struct ib_atomic_eth atomic_eth;
-	} u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data).  Only the first 56 bytes of the IB header
- * will be in the eager header buffer.  The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct qib_ib_header {
-	__be16 lrh[4];
-	union {
-		struct {
-			struct ib_grh grh;
-			struct qib_other_headers oth;
-		} l;
-		struct qib_other_headers oth;
-	} u;
-} __packed;
-
 struct qib_pio_header {
 	__le32 pbc[2];
-	struct qib_ib_header hdr;
+	struct ib_header hdr;
 } __packed;
 
 /*
@@ -191,7 +117,7 @@
  * is made common.
  */
 struct qib_qp_priv {
-	struct qib_ib_header *s_hdr;    /* next packet header to send */
+	struct ib_header *s_hdr;        /* next packet header to send */
 	struct list_head iowait;        /* link for wait PIO buf */
 	atomic_t s_dma_busy;
 	struct qib_verbs_txreq *s_tx;
@@ -376,7 +302,7 @@
 
 void qib_put_txreq(struct qib_verbs_txreq *tx);
 
-int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
 		   u32 hdrwords, struct rvt_sge_state *ss, u32 len);
 
 void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
@@ -384,10 +310,10 @@
 
 void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
 
-void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
-void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
@@ -398,13 +324,13 @@
 
 void qib_rc_rnr_retry(unsigned long arg);
 
-void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr);
+void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
 
 void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 
 int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
 
-void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 void mr_rcu_callback(struct rcu_head *list);
@@ -413,13 +339,13 @@
 
 void qib_migrate_qp(struct rvt_qp *qp);
 
-int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
 		      int has_grh, struct rvt_qp *qp, u32 bth0);
 
 u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 		 struct ib_global_route *grh, u32 hwords, u32 nwords);
 
-void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
 			 u32 bth0, u32 bth2);
 
 void _qib_do_send(struct work_struct *work);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index c229b9f..0a89a95 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -664,7 +664,8 @@
 		return err;
 	}
 
-	if (pci_register_driver(&usnic_ib_pci_driver)) {
+	err = pci_register_driver(&usnic_ib_pci_driver);
+	if (err) {
 		usnic_err("Unable to register with PCI\n");
 		goto out_umem_fini;
 	}
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 80c4b6b..46b6497 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -294,7 +294,7 @@
 {
 	rvt_deinit_mregion(&mr->mr);
 	rvt_free_lkey(&mr->mr);
-	vfree(mr);
+	kfree(mr);
 }
 
 /**
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index bdb540f..6500c3b 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -488,60 +488,23 @@
 	spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
 	if (removed) {
 		synchronize_rcu();
-		if (atomic_dec_and_test(&qp->refcount))
-			wake_up(&qp->wait);
+		rvt_put_qp(qp);
 	}
 }
 
 /**
- * reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
+ * rvt_init_qp - initialize the QP state to the reset state
+ * @qp: the QP to init or reinit
  * @type: the QP type
- * r and s lock are required to be held by the caller
+ *
+ * This function is called from both rvt_create_qp() and
+ * rvt_reset_qp().   The difference is that the reset
+ * patch the necessary locks to protect against concurent
+ * access.
  */
-static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
-		  enum ib_qp_type type)
-	__releases(&qp->s_lock)
-	__releases(&qp->s_hlock)
-	__releases(&qp->r_lock)
-	__acquires(&qp->r_lock)
-	__acquires(&qp->s_hlock)
-	__acquires(&qp->s_lock)
+static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			enum ib_qp_type type)
 {
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-
-		/* Let drivers flush their waitlist */
-		rdi->driver_f.flush_qp_waiters(qp);
-		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
-		spin_unlock(&qp->s_lock);
-		spin_unlock(&qp->s_hlock);
-		spin_unlock_irq(&qp->r_lock);
-
-		/* Stop the send queue and the retry timer */
-		rdi->driver_f.stop_send_queue(qp);
-
-		/* Wait for things to stop */
-		rdi->driver_f.quiesce_qp(qp);
-
-		/* take qp out the hash and wait for it to be unused */
-		rvt_remove_qp(rdi, qp);
-		wait_event(qp->wait, !atomic_read(&qp->refcount));
-
-		/* grab the lock b/c it was locked at call time */
-		spin_lock_irq(&qp->r_lock);
-		spin_lock(&qp->s_hlock);
-		spin_lock(&qp->s_lock);
-
-		rvt_clear_mr_refs(qp, 1);
-	}
-
-	/*
-	 * Let the driver do any tear down it needs to for a qp
-	 * that has been reset
-	 */
-	rdi->driver_f.notify_qp_reset(qp);
-
 	qp->remote_qpn = 0;
 	qp->qkey = 0;
 	qp->qp_access_flags = 0;
@@ -587,6 +550,60 @@
 }
 
 /**
+ * rvt_reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ *
+ * r_lock, s_hlock, and s_lock are required to be held by the caller
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			 enum ib_qp_type type)
+	__must_hold(&qp->s_lock)
+	__must_hold(&qp->s_hlock)
+	__must_hold(&qp->r_lock)
+{
+	lockdep_assert_held(&qp->r_lock);
+	lockdep_assert_held(&qp->s_hlock);
+	lockdep_assert_held(&qp->s_lock);
+	if (qp->state != IB_QPS_RESET) {
+		qp->state = IB_QPS_RESET;
+
+		/* Let drivers flush their waitlist */
+		rdi->driver_f.flush_qp_waiters(qp);
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
+		spin_unlock(&qp->s_lock);
+		spin_unlock(&qp->s_hlock);
+		spin_unlock_irq(&qp->r_lock);
+
+		/* Stop the send queue and the retry timer */
+		rdi->driver_f.stop_send_queue(qp);
+
+		/* Wait for things to stop */
+		rdi->driver_f.quiesce_qp(qp);
+
+		/* take qp out the hash and wait for it to be unused */
+		rvt_remove_qp(rdi, qp);
+		wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+		/* grab the lock b/c it was locked at call time */
+		spin_lock_irq(&qp->r_lock);
+		spin_lock(&qp->s_hlock);
+		spin_lock(&qp->s_lock);
+
+		rvt_clear_mr_refs(qp, 1);
+		/*
+		 * Let the driver do any tear down or re-init it needs to for
+		 * a qp that has been reset
+		 */
+		rdi->driver_f.notify_qp_reset(qp);
+	}
+	rvt_init_qp(rdi, qp, type);
+	lockdep_assert_held(&qp->r_lock);
+	lockdep_assert_held(&qp->s_hlock);
+	lockdep_assert_held(&qp->s_lock);
+}
+
+/**
  * rvt_create_qp - create a queue pair for a device
  * @ibpd: the protection domain who's device we create the queue pair for
  * @init_attr: the attributes of the queue pair
@@ -766,7 +783,7 @@
 		}
 		qp->ibqp.qp_num = err;
 		qp->port_num = init_attr->port_num;
-		rvt_reset_qp(rdi, qp, init_attr->qp_type);
+		rvt_init_qp(rdi, qp, init_attr->qp_type);
 		break;
 
 	default:
@@ -873,7 +890,8 @@
 	free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
 
 bail_rq_wq:
-	vfree(qp->r_rq.wq);
+	if (!qp->ip)
+		vfree(qp->r_rq.wq);
 
 bail_driver_priv:
 	rdi->driver_f.qp_priv_free(rdi, qp);
@@ -905,6 +923,8 @@
 	int ret = 0;
 	struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
 
+	lockdep_assert_held(&qp->r_lock);
+	lockdep_assert_held(&qp->s_lock);
 	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
 		goto bail;
 
@@ -979,7 +999,7 @@
 	struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
 	unsigned long flags;
 
-	atomic_inc(&qp->refcount);
+	rvt_get_qp(qp);
 	spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
 
 	if (qp->ibqp.qp_num <= 1) {
@@ -996,7 +1016,7 @@
 }
 
 /**
- * qib_modify_qp - modify the attributes of a queue pair
+ * rvt_modify_qp - modify the attributes of a queue pair
  * @ibqp: the queue pair who's attributes we're modifying
  * @attr: the new attributes
  * @attr_mask: the mask of attributes to modify
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 55f0e8f..ddd5927 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -362,15 +362,34 @@
 		return err;
 	}
 
-	err = rxe_net_init();
+	err = rxe_net_ipv4_init();
 	if (err) {
-		pr_err("rxe: unable to init\n");
+		pr_err("rxe: unable to init ipv4 tunnel\n");
 		rxe_cache_exit();
-		return err;
+		goto exit;
 	}
+
+	err = rxe_net_ipv6_init();
+	if (err) {
+		pr_err("rxe: unable to init ipv6 tunnel\n");
+		rxe_cache_exit();
+		goto exit;
+	}
+
+	err = register_netdevice_notifier(&rxe_net_notifier);
+	if (err) {
+		pr_err("rxe: Failed to rigister netdev notifier\n");
+		goto exit;
+	}
+
 	pr_info("rxe: loaded\n");
 
 	return 0;
+
+exit:
+	rxe_release_udp_tunnel(recv_sockets.sk4);
+	rxe_release_udp_tunnel(recv_sockets.sk6);
+	return err;
 }
 
 static void __exit rxe_module_exit(void)
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 36f67de..1c59ef2 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -689,7 +689,14 @@
 					qp->req.need_retry = 1;
 					rxe_run_task(&qp->req.task, 1);
 				}
+
+				if (pkt) {
+					rxe_drop_ref(pkt->qp);
+					kfree_skb(skb);
+				}
+
 				goto exit;
+
 			} else {
 				wqe->status = IB_WC_RETRY_EXC_ERR;
 				state = COMPST_ERROR;
@@ -716,6 +723,12 @@
 		case COMPST_ERROR:
 			do_complete(qp, wqe);
 			rxe_qp_error(qp);
+
+			if (pkt) {
+				rxe_drop_ref(pkt->qp);
+				kfree_skb(skb);
+			}
+
 			goto exit;
 		}
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 0b8d2ea..eedf2f1 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -275,9 +275,10 @@
 	return sock;
 }
 
-static void rxe_release_udp_tunnel(struct socket *sk)
+void rxe_release_udp_tunnel(struct socket *sk)
 {
-	udp_tunnel_sock_release(sk);
+	if (sk)
+		udp_tunnel_sock_release(sk);
 }
 
 static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port,
@@ -658,51 +659,45 @@
 	return NOTIFY_OK;
 }
 
-static struct notifier_block rxe_net_notifier = {
+struct notifier_block rxe_net_notifier = {
 	.notifier_call = rxe_notify,
 };
 
-int rxe_net_init(void)
+int rxe_net_ipv4_init(void)
 {
-	int err;
+	spin_lock_init(&dev_list_lock);
+
+	recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
+				htons(ROCE_V2_UDP_DPORT), false);
+	if (IS_ERR(recv_sockets.sk4)) {
+		recv_sockets.sk4 = NULL;
+		pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+int rxe_net_ipv6_init(void)
+{
+#if IS_ENABLED(CONFIG_IPV6)
 
 	spin_lock_init(&dev_list_lock);
 
 	recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
-			htons(ROCE_V2_UDP_DPORT), true);
+						htons(ROCE_V2_UDP_DPORT), true);
 	if (IS_ERR(recv_sockets.sk6)) {
 		recv_sockets.sk6 = NULL;
 		pr_err("rxe: Failed to create IPv6 UDP tunnel\n");
 		return -1;
 	}
-
-	recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
-			htons(ROCE_V2_UDP_DPORT), false);
-	if (IS_ERR(recv_sockets.sk4)) {
-		rxe_release_udp_tunnel(recv_sockets.sk6);
-		recv_sockets.sk4 = NULL;
-		recv_sockets.sk6 = NULL;
-		pr_err("rxe: Failed to create IPv4 UDP tunnel\n");
-		return -1;
-	}
-
-	err = register_netdevice_notifier(&rxe_net_notifier);
-	if (err) {
-		rxe_release_udp_tunnel(recv_sockets.sk6);
-		rxe_release_udp_tunnel(recv_sockets.sk4);
-		pr_err("rxe: Failed to rigister netdev notifier\n");
-	}
-
-	return err;
+#endif
+	return 0;
 }
 
 void rxe_net_exit(void)
 {
-	if (recv_sockets.sk6)
-		rxe_release_udp_tunnel(recv_sockets.sk6);
-
-	if (recv_sockets.sk4)
-		rxe_release_udp_tunnel(recv_sockets.sk4);
-
+	rxe_release_udp_tunnel(recv_sockets.sk6);
+	rxe_release_udp_tunnel(recv_sockets.sk4);
 	unregister_netdevice_notifier(&rxe_net_notifier);
 }
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 7b06f76..0daf7f0 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -44,10 +44,13 @@
 };
 
 extern struct rxe_recv_sockets recv_sockets;
+extern struct notifier_block rxe_net_notifier;
+void rxe_release_udp_tunnel(struct socket *sk);
 
 struct rxe_dev *rxe_net_add(struct net_device *ndev);
 
-int rxe_net_init(void);
+int rxe_net_ipv4_init(void);
+int rxe_net_ipv6_init(void);
 void rxe_net_exit(void);
 
 #endif /* RXE_NET_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 3d464c2..144d2f1 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -312,7 +312,7 @@
 		 * make a copy of the skb to post to the next qp
 		 */
 		skb_copy = (mce->qp_list.next != &mcg->qp_list) ?
-				skb_clone(skb, GFP_KERNEL) : NULL;
+				skb_clone(skb, GFP_ATOMIC) : NULL;
 
 		pkt->qp = qp;
 		rxe_add_ref(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 33b2d9d..13a848a 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -511,24 +511,21 @@
 }
 
 static void update_wqe_state(struct rxe_qp *qp,
-			     struct rxe_send_wqe *wqe,
-			     struct rxe_pkt_info *pkt,
-			     enum wqe_state *prev_state)
+		struct rxe_send_wqe *wqe,
+		struct rxe_pkt_info *pkt)
 {
-	enum wqe_state prev_state_ = wqe->state;
-
 	if (pkt->mask & RXE_END_MASK) {
 		if (qp_type(qp) == IB_QPT_RC)
 			wqe->state = wqe_state_pending;
 	} else {
 		wqe->state = wqe_state_processing;
 	}
-
-	*prev_state = prev_state_;
 }
 
-static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
-			 struct rxe_pkt_info *pkt, int payload)
+static void update_wqe_psn(struct rxe_qp *qp,
+			   struct rxe_send_wqe *wqe,
+			   struct rxe_pkt_info *pkt,
+			   int payload)
 {
 	/* number of packets left to send including current one */
 	int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu;
@@ -546,10 +543,35 @@
 		qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK;
 	else
 		qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK;
+}
 
+static void save_state(struct rxe_send_wqe *wqe,
+		       struct rxe_qp *qp,
+		       struct rxe_send_wqe *rollback_wqe,
+		       struct rxe_qp *rollback_qp)
+{
+	rollback_wqe->state     = wqe->state;
+	rollback_wqe->first_psn = wqe->first_psn;
+	rollback_wqe->last_psn  = wqe->last_psn;
+	rollback_qp->req.psn    = qp->req.psn;
+}
+
+static void rollback_state(struct rxe_send_wqe *wqe,
+			   struct rxe_qp *qp,
+			   struct rxe_send_wqe *rollback_wqe,
+			   struct rxe_qp *rollback_qp)
+{
+	wqe->state     = rollback_wqe->state;
+	wqe->first_psn = rollback_wqe->first_psn;
+	wqe->last_psn  = rollback_wqe->last_psn;
+	qp->req.psn    = rollback_qp->req.psn;
+}
+
+static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
+			 struct rxe_pkt_info *pkt, int payload)
+{
 	qp->req.opcode = pkt->opcode;
 
-
 	if (pkt->mask & RXE_END_MASK)
 		qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index);
 
@@ -571,7 +593,8 @@
 	int mtu;
 	int opcode;
 	int ret;
-	enum wqe_state prev_state;
+	struct rxe_qp rollback_qp;
+	struct rxe_send_wqe rollback_wqe;
 
 next_wqe:
 	if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -688,13 +711,21 @@
 		goto err;
 	}
 
-	update_wqe_state(qp, wqe, &pkt, &prev_state);
+	/*
+	 * To prevent a race on wqe access between requester and completer,
+	 * wqe members state and psn need to be set before calling
+	 * rxe_xmit_packet().
+	 * Otherwise, completer might initiate an unjustified retry flow.
+	 */
+	save_state(wqe, qp, &rollback_wqe, &rollback_qp);
+	update_wqe_state(qp, wqe, &pkt);
+	update_wqe_psn(qp, wqe, &pkt, payload);
 	ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
 	if (ret) {
 		qp->need_req_skb = 1;
 		kfree_skb(skb);
 
-		wqe->state = prev_state;
+		rollback_state(wqe, qp, &rollback_wqe, &rollback_qp);
 
 		if (ret == -EAGAIN) {
 			rxe_run_task(&qp->req.task, 1);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index ebb03b4..3e0f0f2 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -972,11 +972,13 @@
 	free_rd_atomic_resource(qp, res);
 	rxe_advance_resp_resource(qp);
 
+	memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb));
+
 	res->type = RXE_ATOMIC_MASK;
 	res->atomic.skb = skb;
-	res->first_psn = qp->resp.psn;
-	res->last_psn = qp->resp.psn;
-	res->cur_psn = qp->resp.psn;
+	res->first_psn = ack_pkt.psn;
+	res->last_psn  = ack_pkt.psn;
+	res->cur_psn   = ack_pkt.psn;
 
 	rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy);
 	if (rc) {
@@ -1116,8 +1118,7 @@
 				rc = RESPST_CLEANUP;
 				goto out;
 			}
-			bth_set_psn(SKB_TO_PKT(skb_copy),
-				    qp->resp.psn - 1);
+
 			/* Resend the result. */
 			rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
 					     pkt, skb_copy);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 4f7d9b4..9dbfcc0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -478,6 +478,7 @@
 		struct ipoib_ah *address, u32 qpn);
 void ipoib_reap_ah(struct work_struct *work);
 
+struct ipoib_path *__path_find(struct net_device *dev, void *gid);
 void ipoib_mark_paths_invalid(struct net_device *dev);
 void ipoib_flush_paths(struct net_device *dev);
 int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 951d9ab..4ad297d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1318,6 +1318,8 @@
 	}
 }
 
+#define QPN_AND_OPTIONS_OFFSET	4
+
 static void ipoib_cm_tx_start(struct work_struct *work)
 {
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
@@ -1326,6 +1328,7 @@
 	struct ipoib_neigh *neigh;
 	struct ipoib_cm_tx *p;
 	unsigned long flags;
+	struct ipoib_path *path;
 	int ret;
 
 	struct ib_sa_path_rec pathrec;
@@ -1338,7 +1341,19 @@
 		p = list_entry(priv->cm.start_list.next, typeof(*p), list);
 		list_del_init(&p->list);
 		neigh = p->neigh;
+
 		qpn = IPOIB_QPN(neigh->daddr);
+		/*
+		 * As long as the search is with these 2 locks,
+		 * path existence indicates its validity.
+		 */
+		path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+		if (!path) {
+			pr_info("%s ignore not valid path %pI6\n",
+				__func__,
+				neigh->daddr + QPN_AND_OPTIONS_OFFSET);
+			goto free_neigh;
+		}
 		memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
 
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -1350,6 +1365,7 @@
 		spin_lock_irqsave(&priv->lock, flags);
 
 		if (ret) {
+free_neigh:
 			neigh = p->neigh;
 			if (neigh) {
 				neigh->cm = NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index dc6d241..be11d5d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1161,8 +1161,17 @@
 	}
 
 	if (level == IPOIB_FLUSH_LIGHT) {
+		int oper_up;
 		ipoib_mark_paths_invalid(dev);
+		/* Set IPoIB operation as down to prevent races between:
+		 * the flush flow which leaves MCG and on the fly joins
+		 * which can happen during that time. mcast restart task
+		 * should deal with join requests we missed.
+		 */
+		oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
 		ipoib_mcast_dev_flush(dev);
+		if (oper_up)
+			set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
 		ipoib_flush_ah(dev);
 	}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 74bcaa0..cc1c1b0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -485,7 +485,7 @@
 	return -EINVAL;
 }
 
-static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
+struct ipoib_path *__path_find(struct net_device *dev, void *gid)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct rb_node *n = priv->path_tree.rb_node;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index ba6be06..cae9bbc 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -403,6 +403,7 @@
 	INIT_LIST_HEAD(&isert_conn->node);
 	init_completion(&isert_conn->login_comp);
 	init_completion(&isert_conn->login_req_comp);
+	init_waitqueue_head(&isert_conn->rem_wait);
 	kref_init(&isert_conn->kref);
 	mutex_init(&isert_conn->mutex);
 	INIT_WORK(&isert_conn->release_work, isert_release_work);
@@ -448,7 +449,7 @@
 
 	isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL);
 	if (!isert_conn->login_rsp_buf) {
-		isert_err("Unable to allocate isert_conn->login_rspbuf\n");
+		ret = -ENOMEM;
 		goto out_unmap_login_req_buf;
 	}
 
@@ -578,7 +579,8 @@
 	BUG_ON(!device);
 
 	isert_free_rx_descriptors(isert_conn);
-	if (isert_conn->cm_id)
+	if (isert_conn->cm_id &&
+	    !isert_conn->dev_removed)
 		rdma_destroy_id(isert_conn->cm_id);
 
 	if (isert_conn->qp) {
@@ -593,7 +595,10 @@
 
 	isert_device_put(device);
 
-	kfree(isert_conn);
+	if (isert_conn->dev_removed)
+		wake_up_interruptible(&isert_conn->rem_wait);
+	else
+		kfree(isert_conn);
 }
 
 static void
@@ -753,6 +758,7 @@
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
 	struct isert_np *isert_np = cma_id->context;
+	struct isert_conn *isert_conn;
 	int ret = 0;
 
 	isert_info("%s (%d): status %d id %p np %p\n",
@@ -773,10 +779,21 @@
 		break;
 	case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */
-	case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */
 		ret = isert_disconnected_handler(cma_id, event->event);
 		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		isert_conn = cma_id->qp->qp_context;
+		isert_conn->dev_removed = true;
+		isert_disconnected_handler(cma_id, event->event);
+		wait_event_interruptible(isert_conn->rem_wait,
+					 isert_conn->state == ISER_CONN_DOWN);
+		kfree(isert_conn);
+		/*
+		 * return non-zero from the callback to destroy
+		 * the rdma cm id
+		 */
+		return 1;
 	case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
 	case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_CONNECT_ERROR:
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index fc791ef..c02ada5 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -158,6 +158,8 @@
 	struct work_struct	release_work;
 	bool                    logout_posted;
 	bool                    snd_w_inv;
+	wait_queue_head_t	rem_wait;
+	bool			dev_removed;
 };
 
 #define ISERT_MAX_CQ 64
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index dfa23b0..883bbfe 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -522,6 +522,11 @@
 	if (ret)
 		goto err_query_port;
 
+	snprintf(sport->port_guid, sizeof(sport->port_guid),
+		"0x%016llx%016llx",
+		be64_to_cpu(sport->gid.global.subnet_prefix),
+		be64_to_cpu(sport->gid.global.interface_id));
+
 	if (!sport->mad_agent) {
 		memset(&reg_req, 0, sizeof(reg_req));
 		reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
@@ -2548,10 +2553,6 @@
 			       sdev->device->name, i);
 			goto err_ring;
 		}
-		snprintf(sport->port_guid, sizeof(sport->port_guid),
-			"0x%016llx%016llx",
-			be64_to_cpu(sport->gid.global.subnet_prefix),
-			be64_to_cpu(sport->gid.global.interface_id));
 	}
 
 	spin_lock(&srpt_dev_lock);
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 5d11fea..f3135ae 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -950,6 +950,12 @@
 		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
 				INPUT_DEVICE_ID_MATCH_ABSBIT,
 		.evbit = { BIT_MASK(EV_ABS) },
+		.absbit = { BIT_MASK(ABS_Z) },
+	},
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_ABSBIT,
+		.evbit = { BIT_MASK(EV_ABS) },
 		.absbit = { BIT_MASK(ABS_WHEEL) },
 	},
 	{
diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c
index 7d61439..0c07e10 100644
--- a/drivers/input/keyboard/tegra-kbc.c
+++ b/drivers/input/keyboard/tegra-kbc.c
@@ -376,7 +376,7 @@
 	/* Reset the KBC controller to clear all previous status.*/
 	reset_control_assert(kbc->rst);
 	udelay(100);
-	reset_control_assert(kbc->rst);
+	reset_control_deassert(kbc->rst);
 	udelay(100);
 
 	tegra_kbc_config_pins(kbc);
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 65ebbd1..92595b9 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -1013,23 +1013,12 @@
 	.minor		= UINPUT_MINOR,
 	.name		= UINPUT_NAME,
 };
+module_misc_device(uinput_misc);
+
 MODULE_ALIAS_MISCDEV(UINPUT_MINOR);
 MODULE_ALIAS("devname:" UINPUT_NAME);
 
-static int __init uinput_init(void)
-{
-	return misc_register(&uinput_misc);
-}
-
-static void __exit uinput_exit(void)
-{
-	misc_deregister(&uinput_misc);
-}
-
 MODULE_AUTHOR("Aristeu Sergio Rozanski Filho");
 MODULE_DESCRIPTION("User level driver support for input subsystem");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("0.3");
-
-module_init(uinput_init);
-module_exit(uinput_exit);
diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c
index faa295e..c83bce8 100644
--- a/drivers/input/rmi4/rmi_driver.c
+++ b/drivers/input/rmi4/rmi_driver.c
@@ -553,7 +553,6 @@
 		goto free_struct_buff;
 
 	reg = find_first_bit(rdesc->presense_map, RMI_REG_DESC_PRESENSE_BITS);
-	map_offset = 0;
 	for (i = 0; i < rdesc->num_registers; i++) {
 		struct rmi_register_desc_item *item = &rdesc->registers[i];
 		int reg_size = struct_buf[offset];
@@ -576,6 +575,8 @@
 		item->reg = reg;
 		item->reg_size = reg_size;
 
+		map_offset = 0;
+
 		do {
 			for (b = 0; b < 7; b++) {
 				if (struct_buf[offset] & (0x1 << b))
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index b4d3408..405252a 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -1305,6 +1305,7 @@
 	serio->write		= i8042_aux_write;
 	serio->start		= i8042_start;
 	serio->stop		= i8042_stop;
+	serio->ps2_cmd_mutex	= &i8042_mutex;
 	serio->port_data	= port;
 	serio->dev.parent	= &i8042_platform_device->dev;
 	if (idx < 0) {
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index a61b215..1ce3ecb 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1473,7 +1473,6 @@
 
 	ads784x_hwmon_unregister(spi, ts);
 
-	regulator_disable(ts->reg);
 	regulator_put(ts->reg);
 
 	if (!ts->get_pendown_state) {
diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c
index 7379fe1..f502c84 100644
--- a/drivers/input/touchscreen/silead.c
+++ b/drivers/input/touchscreen/silead.c
@@ -390,9 +390,10 @@
 		data->max_fingers = 5; /* Most devices handle up-to 5 fingers */
 	}
 
-	error = device_property_read_string(dev, "touchscreen-fw-name", &str);
+	error = device_property_read_string(dev, "firmware-name", &str);
 	if (!error)
-		snprintf(data->fw_name, sizeof(data->fw_name), "%s", str);
+		snprintf(data->fw_name, sizeof(data->fw_name),
+			 "silead/%s", str);
 	else
 		dev_dbg(dev, "Firmware file name read error. Using default.");
 }
@@ -410,14 +411,14 @@
 		if (!acpi_id)
 			return -ENODEV;
 
-		snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw",
-			acpi_id->id);
+		snprintf(data->fw_name, sizeof(data->fw_name),
+			 "silead/%s.fw", acpi_id->id);
 
 		for (i = 0; i < strlen(data->fw_name); i++)
 			data->fw_name[i] = tolower(data->fw_name[i]);
 	} else {
-		snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw",
-			id->name);
+		snprintf(data->fw_name, sizeof(data->fw_name),
+			 "silead/%s.fw", id->name);
 	}
 
 	return 0;
@@ -426,7 +427,8 @@
 static int silead_ts_set_default_fw_name(struct silead_ts_data *data,
 					 const struct i2c_device_id *id)
 {
-	snprintf(data->fw_name, sizeof(data->fw_name), "%s.fw", id->name);
+	snprintf(data->fw_name, sizeof(data->fw_name),
+		 "silead/%s.fw", id->name);
 	return 0;
 }
 #endif
@@ -464,7 +466,7 @@
 		return -ENODEV;
 
 	/* Power GPIO pin */
-	data->gpio_power = gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
+	data->gpio_power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
 	if (IS_ERR(data->gpio_power)) {
 		if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER)
 			dev_err(dev, "Shutdown GPIO request failed\n");
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 96de97a..4025291 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -940,15 +940,13 @@
  * Writes the command to the IOMMUs command buffer and informs the
  * hardware about the new command.
  */
-static int iommu_queue_command_sync(struct amd_iommu *iommu,
-				    struct iommu_cmd *cmd,
-				    bool sync)
+static int __iommu_queue_command_sync(struct amd_iommu *iommu,
+				      struct iommu_cmd *cmd,
+				      bool sync)
 {
 	u32 left, tail, head, next_tail;
-	unsigned long flags;
 
 again:
-	spin_lock_irqsave(&iommu->lock, flags);
 
 	head      = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 	tail      = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
@@ -957,15 +955,14 @@
 
 	if (left <= 2) {
 		struct iommu_cmd sync_cmd;
-		volatile u64 sem = 0;
 		int ret;
 
-		build_completion_wait(&sync_cmd, (u64)&sem);
+		iommu->cmd_sem = 0;
+
+		build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
 		copy_cmd_to_buffer(iommu, &sync_cmd, tail);
 
-		spin_unlock_irqrestore(&iommu->lock, flags);
-
-		if ((ret = wait_on_sem(&sem)) != 0)
+		if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
 			return ret;
 
 		goto again;
@@ -976,9 +973,21 @@
 	/* We need to sync now to make sure all commands are processed */
 	iommu->need_sync = sync;
 
+	return 0;
+}
+
+static int iommu_queue_command_sync(struct amd_iommu *iommu,
+				    struct iommu_cmd *cmd,
+				    bool sync)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&iommu->lock, flags);
+	ret = __iommu_queue_command_sync(iommu, cmd, sync);
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
-	return 0;
+	return ret;
 }
 
 static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
@@ -993,19 +1002,29 @@
 static int iommu_completion_wait(struct amd_iommu *iommu)
 {
 	struct iommu_cmd cmd;
-	volatile u64 sem = 0;
+	unsigned long flags;
 	int ret;
 
 	if (!iommu->need_sync)
 		return 0;
 
-	build_completion_wait(&cmd, (u64)&sem);
 
-	ret = iommu_queue_command_sync(iommu, &cmd, false);
+	build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
+
+	spin_lock_irqsave(&iommu->lock, flags);
+
+	iommu->cmd_sem = 0;
+
+	ret = __iommu_queue_command_sync(iommu, &cmd, false);
 	if (ret)
-		return ret;
+		goto out_unlock;
 
-	return wait_on_sem(&sem);
+	ret = wait_on_sem(&iommu->cmd_sem);
+
+out_unlock:
+	spin_unlock_irqrestore(&iommu->lock, flags);
+
+	return ret;
 }
 
 static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index caf5e38..9652848 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -524,6 +524,8 @@
 	struct irq_domain *ir_domain;
 	struct irq_domain *msi_domain;
 #endif
+
+	volatile u64 __aligned(8) cmd_sem;
 };
 
 #define ACPIHID_UID_LEN 256
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index ce80117..641e887 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -879,7 +879,7 @@
 	 * We may have concurrent producers, so we need to be careful
 	 * not to touch any of the shadow cmdq state.
 	 */
-	queue_read(cmd, Q_ENT(q, idx), q->ent_dwords);
+	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
 	dev_err(smmu->dev, "skipping command in error state:\n");
 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
@@ -890,7 +890,7 @@
 		return;
 	}
 
-	queue_write(cmd, Q_ENT(q, idx), q->ent_dwords);
+	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }
 
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -1034,6 +1034,9 @@
 		case STRTAB_STE_0_CFG_S2_TRANS:
 			ste_live = true;
 			break;
+		case STRTAB_STE_0_CFG_ABORT:
+			if (disable_bypass)
+				break;
 		default:
 			BUG(); /* STE corruption */
 		}
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 4f49fe2..2db74eb 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -686,8 +686,7 @@
 
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 {
-	int flags, ret;
-	u32 fsr, fsynr, resume;
+	u32 fsr, fsynr;
 	unsigned long iova;
 	struct iommu_domain *domain = dev;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -701,34 +700,15 @@
 	if (!(fsr & FSR_FAULT))
 		return IRQ_NONE;
 
-	if (fsr & FSR_IGN)
-		dev_err_ratelimited(smmu->dev,
-				    "Unexpected context fault (fsr 0x%x)\n",
-				    fsr);
-
 	fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
-	flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
-
 	iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
-	if (!report_iommu_fault(domain, smmu->dev, iova, flags)) {
-		ret = IRQ_HANDLED;
-		resume = RESUME_RETRY;
-	} else {
-		dev_err_ratelimited(smmu->dev,
-		    "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
-		    iova, fsynr, cfg->cbndx);
-		ret = IRQ_NONE;
-		resume = RESUME_TERMINATE;
-	}
 
-	/* Clear the faulting FSR */
+	dev_err_ratelimited(smmu->dev,
+	"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+			    fsr, iova, fsynr, cfg->cbndx);
+
 	writel(fsr, cb_base + ARM_SMMU_CB_FSR);
-
-	/* Retry or terminate any stalled transactions */
-	if (fsr & FSR_SS)
-		writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME);
-
-	return ret;
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
@@ -837,7 +817,7 @@
 	}
 
 	/* SCTLR */
-	reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
+	reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
 	if (stage1)
 		reg |= SCTLR_S1_ASIDPNE;
 #ifdef __BIG_ENDIAN
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 08a1e2f..00c8a08 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -68,7 +68,8 @@
 	if (!iovad)
 		return;
 
-	put_iova_domain(iovad);
+	if (iovad->granule)
+		put_iova_domain(iovad);
 	kfree(iovad);
 	domain->iova_cookie = NULL;
 }
@@ -151,12 +152,15 @@
 	}
 }
 
-static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
 		dma_addr_t dma_limit)
 {
+	struct iova_domain *iovad = domain->iova_cookie;
 	unsigned long shift = iova_shift(iovad);
 	unsigned long length = iova_align(iovad, size) >> shift;
 
+	if (domain->geometry.force_aperture)
+		dma_limit = min(dma_limit, domain->geometry.aperture_end);
 	/*
 	 * Enforce size-alignment to be safe - there could perhaps be an
 	 * attribute to control this per-device, or at least per-domain...
@@ -314,7 +318,7 @@
 	if (!pages)
 		return NULL;
 
-	iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+	iova = __alloc_iova(domain, size, dev->coherent_dma_mask);
 	if (!iova)
 		goto out_free_pages;
 
@@ -386,7 +390,7 @@
 	phys_addr_t phys = page_to_phys(page) + offset;
 	size_t iova_off = iova_offset(iovad, phys);
 	size_t len = iova_align(iovad, size + iova_off);
-	struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+	struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev));
 
 	if (!iova)
 		return DMA_ERROR_CODE;
@@ -538,7 +542,7 @@
 		prev = s;
 	}
 
-	iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+	iova = __alloc_iova(domain, iova_len, dma_get_mask(dev));
 	if (!iova)
 		goto out_restore_sg;
 
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 8c61399..def8ca1 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -286,12 +286,14 @@
 	int prot = IOMMU_READ;
 	arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
 
-	if (attr & ARM_V7S_PTE_AP_RDONLY)
+	if (!(attr & ARM_V7S_PTE_AP_RDONLY))
 		prot |= IOMMU_WRITE;
 	if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
 		prot |= IOMMU_MMIO;
 	else if (pte & ARM_V7S_ATTR_C)
 		prot |= IOMMU_CACHE;
+	if (pte & ARM_V7S_ATTR_XN(lvl))
+		prot |= IOMMU_NOEXEC;
 
 	return prot;
 }
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 9ed0a84..3dab13b 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -55,19 +55,19 @@
 	bool                            enable_4GB;
 };
 
-static int compare_of(struct device *dev, void *data)
+static inline int compare_of(struct device *dev, void *data)
 {
 	return dev->of_node == data;
 }
 
-static int mtk_iommu_bind(struct device *dev)
+static inline int mtk_iommu_bind(struct device *dev)
 {
 	struct mtk_iommu_data *data = dev_get_drvdata(dev);
 
 	return component_bind_all(dev, &data->smi_imu);
 }
 
-static void mtk_iommu_unbind(struct device *dev)
+static inline void mtk_iommu_unbind(struct device *dev)
 {
 	struct mtk_iommu_data *data = dev_get_drvdata(dev);
 
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index a04d491..3b44b1d 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -101,8 +101,7 @@
 		zpci_dma_exit_device(zdev);
 
 	zdev->dma_table = s390_domain->dma_table;
-	rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
-				zdev->start_dma + zdev->iommu_size - 1,
+	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 				(u64) zdev->dma_table);
 	if (rc)
 		goto out_restore;
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 7f87289..82b0b5d 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -39,6 +39,7 @@
 	bool
 	depends on PCI
 	depends on PCI_MSI
+	select ACPI_IORT if ACPI
 
 config ARM_NVIC
 	bool
@@ -156,6 +157,13 @@
 	select GENERIC_IRQ_CHIP
 	select IRQ_DOMAIN
 
+config JCORE_AIC
+	bool "J-Core integrated AIC"
+	depends on OF && (SUPERH || COMPILE_TEST)
+	select IRQ_DOMAIN
+	help
+	  Support for the J-Core integrated AIC.
+
 config RENESAS_INTC_IRQPIN
 	bool
 	select IRQ_DOMAIN
@@ -251,6 +259,9 @@
 config MVEBU_ODMI
 	bool
 
+config MVEBU_PIC
+	bool
+
 config LS_SCFG_MSI
 	def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE
 	depends on PCI && PCI_MSI
@@ -264,3 +275,7 @@
 	select IRQ_DOMAIN
 	help
 	  Support the EZchip NPS400 global interrupt controller
+
+config STM32_EXTI
+	bool
+	select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 4c203b6..b372e79 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -40,6 +40,7 @@
 obj-$(CONFIG_IMGPDC_IRQ)		+= irq-imgpdc.o
 obj-$(CONFIG_IRQ_MIPS_CPU)		+= irq-mips-cpu.o
 obj-$(CONFIG_SIRF_IRQ)			+= irq-sirfsoc.o
+obj-$(CONFIG_JCORE_AIC)			+= irq-jcore-aic.o
 obj-$(CONFIG_RENESAS_INTC_IRQPIN)	+= irq-renesas-intc-irqpin.o
 obj-$(CONFIG_RENESAS_IRQC)		+= irq-renesas-irqc.o
 obj-$(CONFIG_VERSATILE_FPGA_IRQ)	+= irq-versatile-fpga.o
@@ -68,6 +69,8 @@
 obj-$(CONFIG_IMX_GPCV2)			+= irq-imx-gpcv2.o
 obj-$(CONFIG_PIC32_EVIC)		+= irq-pic32-evic.o
 obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
+obj-$(CONFIG_MVEBU_PIC)			+= irq-mvebu-pic.o
 obj-$(CONFIG_LS_SCFG_MSI)		+= irq-ls-scfg-msi.o
 obj-$(CONFIG_EZNPS_GIC)			+= irq-eznps.o
 obj-$(CONFIG_ARCH_ASPEED)		+= irq-aspeed-vic.o
+obj-$(CONFIG_STM32_EXTI) 		+= irq-stm32-exti.o
diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c
index 112e17c..37f952d 100644
--- a/drivers/irqchip/irq-atmel-aic.c
+++ b/drivers/irqchip/irq-atmel-aic.c
@@ -176,6 +176,7 @@
 {
 	struct irq_domain_chip_generic *dgc = d->gc;
 	struct irq_chip_generic *gc;
+	unsigned long flags;
 	unsigned smr;
 	int idx;
 	int ret;
@@ -194,11 +195,11 @@
 
 	gc = dgc->gc[idx];
 
-	irq_gc_lock(gc);
+	irq_gc_lock_irqsave(gc, flags);
 	smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq));
 	aic_common_set_priority(intspec[2], &smr);
 	irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
-	irq_gc_unlock(gc);
+	irq_gc_unlock_irqrestore(gc, flags);
 
 	return ret;
 }
diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
index 4f0d068..2a624d8 100644
--- a/drivers/irqchip/irq-atmel-aic5.c
+++ b/drivers/irqchip/irq-atmel-aic5.c
@@ -258,6 +258,7 @@
 				 unsigned int *out_type)
 {
 	struct irq_chip_generic *bgc = irq_get_domain_generic_chip(d, 0);
+	unsigned long flags;
 	unsigned smr;
 	int ret;
 
@@ -269,12 +270,12 @@
 	if (ret)
 		return ret;
 
-	irq_gc_lock(bgc);
+	irq_gc_lock_irqsave(bgc, flags);
 	irq_reg_writel(bgc, *out_hwirq, AT91_AIC5_SSR);
 	smr = irq_reg_readl(bgc, AT91_AIC5_SMR);
 	aic_common_set_priority(intspec[2], &smr);
 	irq_reg_writel(bgc, smr, AT91_AIC5_SMR);
-	irq_gc_unlock(bgc);
+	irq_gc_unlock_irqrestore(bgc, flags);
 
 	return ret;
 }
diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c
index 4cbffba..ecafd29 100644
--- a/drivers/irqchip/irq-gic-pm.c
+++ b/drivers/irqchip/irq-gic-pm.c
@@ -64,7 +64,6 @@
 
 static int gic_get_clocks(struct device *dev, const struct gic_clk_data *data)
 {
-	struct clk *clk;
 	unsigned int i;
 	int ret;
 
@@ -76,28 +75,16 @@
 		return ret;
 
 	for (i = 0; i < data->num_clocks; i++) {
-		clk = of_clk_get_by_name(dev->of_node, data->clocks[i]);
-		if (IS_ERR(clk)) {
-			dev_err(dev, "failed to get clock %s\n",
-				data->clocks[i]);
-			ret = PTR_ERR(clk);
-			goto error;
-		}
-
-		ret = pm_clk_add_clk(dev, clk);
+		ret = of_pm_clk_add_clk(dev, data->clocks[i]);
 		if (ret) {
-			dev_err(dev, "failed to add clock at index %d\n", i);
-			clk_put(clk);
-			goto error;
+			dev_err(dev, "failed to add clock %s\n",
+				data->clocks[i]);
+			pm_clk_destroy(dev);
+			return ret;
 		}
 	}
 
 	return 0;
-
-error:
-	pm_clk_destroy(dev);
-
-	return ret;
 }
 
 static int gic_probe(struct platform_device *pdev)
diff --git a/drivers/irqchip/irq-gic-v3-its-pci-msi.c b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
index aee60ed..aee1c60 100644
--- a/drivers/irqchip/irq-gic-v3-its-pci-msi.c
+++ b/drivers/irqchip/irq-gic-v3-its-pci-msi.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi_iort.h>
 #include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
@@ -106,34 +107,91 @@
 	{},
 };
 
-static int __init its_pci_msi_init(void)
+static int __init its_pci_msi_init_one(struct fwnode_handle *handle,
+				       const char *name)
+{
+	struct irq_domain *parent;
+
+	parent = irq_find_matching_fwnode(handle, DOMAIN_BUS_NEXUS);
+	if (!parent || !msi_get_domain_info(parent)) {
+		pr_err("%s: Unable to locate ITS domain\n", name);
+		return -ENXIO;
+	}
+
+	if (!pci_msi_create_irq_domain(handle, &its_pci_msi_domain_info,
+				       parent)) {
+		pr_err("%s: Unable to create PCI domain\n", name);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int __init its_pci_of_msi_init(void)
 {
 	struct device_node *np;
-	struct irq_domain *parent;
 
 	for (np = of_find_matching_node(NULL, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
 		if (!of_property_read_bool(np, "msi-controller"))
 			continue;
 
-		parent = irq_find_matching_host(np, DOMAIN_BUS_NEXUS);
-		if (!parent || !msi_get_domain_info(parent)) {
-			pr_err("%s: unable to locate ITS domain\n",
-			       np->full_name);
+		if (its_pci_msi_init_one(of_node_to_fwnode(np), np->full_name))
 			continue;
-		}
-
-		if (!pci_msi_create_irq_domain(of_node_to_fwnode(np),
-					       &its_pci_msi_domain_info,
-					       parent)) {
-			pr_err("%s: unable to create PCI domain\n",
-			       np->full_name);
-			continue;
-		}
 
 		pr_info("PCI/MSI: %s domain created\n", np->full_name);
 	}
 
 	return 0;
 }
+
+#ifdef CONFIG_ACPI
+
+static int __init
+its_pci_msi_parse_madt(struct acpi_subtable_header *header,
+		       const unsigned long end)
+{
+	struct acpi_madt_generic_translator *its_entry;
+	struct fwnode_handle *dom_handle;
+	const char *node_name;
+	int err = -ENXIO;
+
+	its_entry = (struct acpi_madt_generic_translator *)header;
+	node_name = kasprintf(GFP_KERNEL, "ITS@0x%lx",
+			      (long)its_entry->base_address);
+	dom_handle = iort_find_domain_token(its_entry->translation_id);
+	if (!dom_handle) {
+		pr_err("%s: Unable to locate ITS domain handle\n", node_name);
+		goto out;
+	}
+
+	err = its_pci_msi_init_one(dom_handle, node_name);
+	if (!err)
+		pr_info("PCI/MSI: %s domain created\n", node_name);
+
+out:
+	kfree(node_name);
+	return err;
+}
+
+static int __init its_pci_acpi_msi_init(void)
+{
+	acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
+			      its_pci_msi_parse_madt, 0);
+	return 0;
+}
+#else
+static int __init its_pci_acpi_msi_init(void)
+{
+	return 0;
+}
+#endif
+
+static int __init its_pci_msi_init(void)
+{
+	its_pci_of_msi_init();
+	its_pci_acpi_msi_init();
+
+	return 0;
+}
 early_initcall(its_pci_msi_init);
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 7ceaba8..35c851c 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -15,10 +15,13 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi.h>
 #include <linux/bitmap.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/acpi_iort.h>
 #include <linux/log2.h>
 #include <linux/mm.h>
 #include <linux/msi.h>
@@ -75,7 +78,7 @@
 	raw_spinlock_t		lock;
 	struct list_head	entry;
 	void __iomem		*base;
-	unsigned long		phys_base;
+	phys_addr_t		phys_base;
 	struct its_cmd_block	*cmd_base;
 	struct its_cmd_block	*cmd_write;
 	struct its_baser	tables[GITS_BASER_NR_REGS];
@@ -115,6 +118,7 @@
 static LIST_HEAD(its_nodes);
 static DEFINE_SPINLOCK(its_lock);
 static struct rdists *gic_rdists;
+static struct irq_domain *its_parent;
 
 #define gic_data_rdist()		(raw_cpu_ptr(gic_rdists->rdist))
 #define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
@@ -1437,6 +1441,11 @@
 		fwspec.param[0] = GIC_IRQ_TYPE_LPI;
 		fwspec.param[1] = hwirq;
 		fwspec.param[2] = IRQ_TYPE_EDGE_RISING;
+	} else if (is_fwnode_irqchip(domain->parent->fwnode)) {
+		fwspec.fwnode = domain->parent->fwnode;
+		fwspec.param_count = 2;
+		fwspec.param[0] = hwirq;
+		fwspec.param[1] = IRQ_TYPE_EDGE_RISING;
 	} else {
 		return -EINVAL;
 	}
@@ -1545,7 +1554,12 @@
 	u32 val;
 
 	val = readl_relaxed(base + GITS_CTLR);
-	if (val & GITS_CTLR_QUIESCENT)
+	/*
+	 * GIC architecture specification requires the ITS to be both
+	 * disabled and quiescent for writes to GITS_BASER<n> or
+	 * GITS_CBASER to not have UNPREDICTABLE results.
+	 */
+	if ((val & GITS_CTLR_QUIESCENT) && !(val & GITS_CTLR_ENABLE))
 		return 0;
 
 	/* Disable the generation of all interrupts to this ITS */
@@ -1609,44 +1623,59 @@
 	gic_enable_quirks(iidr, its_quirks, its);
 }
 
-static int __init its_probe(struct device_node *node,
-			    struct irq_domain *parent)
+static int its_init_domain(struct fwnode_handle *handle, struct its_node *its)
 {
-	struct resource res;
+	struct irq_domain *inner_domain;
+	struct msi_domain_info *info;
+
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	inner_domain = irq_domain_create_tree(handle, &its_domain_ops, its);
+	if (!inner_domain) {
+		kfree(info);
+		return -ENOMEM;
+	}
+
+	inner_domain->parent = its_parent;
+	inner_domain->bus_token = DOMAIN_BUS_NEXUS;
+	info->ops = &its_msi_domain_ops;
+	info->data = its;
+	inner_domain->host_data = info;
+
+	return 0;
+}
+
+static int __init its_probe_one(struct resource *res,
+				struct fwnode_handle *handle, int numa_node)
+{
 	struct its_node *its;
 	void __iomem *its_base;
-	struct irq_domain *inner_domain;
 	u32 val;
 	u64 baser, tmp;
 	int err;
 
-	err = of_address_to_resource(node, 0, &res);
-	if (err) {
-		pr_warn("%s: no regs?\n", node->full_name);
-		return -ENXIO;
-	}
-
-	its_base = ioremap(res.start, resource_size(&res));
+	its_base = ioremap(res->start, resource_size(res));
 	if (!its_base) {
-		pr_warn("%s: unable to map registers\n", node->full_name);
+		pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start);
 		return -ENOMEM;
 	}
 
 	val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK;
 	if (val != 0x30 && val != 0x40) {
-		pr_warn("%s: no ITS detected, giving up\n", node->full_name);
+		pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start);
 		err = -ENODEV;
 		goto out_unmap;
 	}
 
 	err = its_force_quiescent(its_base);
 	if (err) {
-		pr_warn("%s: failed to quiesce, giving up\n",
-			node->full_name);
+		pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start);
 		goto out_unmap;
 	}
 
-	pr_info("ITS: %s\n", node->full_name);
+	pr_info("ITS %pR\n", res);
 
 	its = kzalloc(sizeof(*its), GFP_KERNEL);
 	if (!its) {
@@ -1658,9 +1687,9 @@
 	INIT_LIST_HEAD(&its->entry);
 	INIT_LIST_HEAD(&its->its_device_list);
 	its->base = its_base;
-	its->phys_base = res.start;
+	its->phys_base = res->start;
 	its->ite_size = ((readl_relaxed(its_base + GITS_TYPER) >> 4) & 0xf) + 1;
-	its->numa_node = of_node_to_nid(node);
+	its->numa_node = numa_node;
 
 	its->cmd_base = kzalloc(ITS_CMD_QUEUE_SZ, GFP_KERNEL);
 	if (!its->cmd_base) {
@@ -1707,28 +1736,9 @@
 	writeq_relaxed(0, its->base + GITS_CWRITER);
 	writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
 
-	if (of_property_read_bool(node, "msi-controller")) {
-		struct msi_domain_info *info;
-
-		info = kzalloc(sizeof(*info), GFP_KERNEL);
-		if (!info) {
-			err = -ENOMEM;
-			goto out_free_tables;
-		}
-
-		inner_domain = irq_domain_add_tree(node, &its_domain_ops, its);
-		if (!inner_domain) {
-			err = -ENOMEM;
-			kfree(info);
-			goto out_free_tables;
-		}
-
-		inner_domain->parent = parent;
-		inner_domain->bus_token = DOMAIN_BUS_NEXUS;
-		info->ops = &its_msi_domain_ops;
-		info->data = its;
-		inner_domain->host_data = info;
-	}
+	err = its_init_domain(handle, its);
+	if (err)
+		goto out_free_tables;
 
 	spin_lock(&its_lock);
 	list_add(&its->entry, &its_nodes);
@@ -1744,7 +1754,7 @@
 	kfree(its);
 out_unmap:
 	iounmap(its_base);
-	pr_err("ITS: failed probing %s (%d)\n", node->full_name, err);
+	pr_err("ITS@%pa: failed probing (%d)\n", &res->start, err);
 	return err;
 }
 
@@ -1772,15 +1782,91 @@
 	{},
 };
 
-int __init its_init(struct device_node *node, struct rdists *rdists,
-	     struct irq_domain *parent_domain)
+static int __init its_of_probe(struct device_node *node)
 {
 	struct device_node *np;
+	struct resource res;
 
 	for (np = of_find_matching_node(node, its_device_id); np;
 	     np = of_find_matching_node(np, its_device_id)) {
-		its_probe(np, parent_domain);
+		if (!of_property_read_bool(np, "msi-controller")) {
+			pr_warn("%s: no msi-controller property, ITS ignored\n",
+				np->full_name);
+			continue;
+		}
+
+		if (of_address_to_resource(np, 0, &res)) {
+			pr_warn("%s: no regs?\n", np->full_name);
+			continue;
+		}
+
+		its_probe_one(&res, &np->fwnode, of_node_to_nid(np));
 	}
+	return 0;
+}
+
+#ifdef CONFIG_ACPI
+
+#define ACPI_GICV3_ITS_MEM_SIZE (SZ_128K)
+
+static int __init gic_acpi_parse_madt_its(struct acpi_subtable_header *header,
+					  const unsigned long end)
+{
+	struct acpi_madt_generic_translator *its_entry;
+	struct fwnode_handle *dom_handle;
+	struct resource res;
+	int err;
+
+	its_entry = (struct acpi_madt_generic_translator *)header;
+	memset(&res, 0, sizeof(res));
+	res.start = its_entry->base_address;
+	res.end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1;
+	res.flags = IORESOURCE_MEM;
+
+	dom_handle = irq_domain_alloc_fwnode((void *)its_entry->base_address);
+	if (!dom_handle) {
+		pr_err("ITS@%pa: Unable to allocate GICv3 ITS domain token\n",
+		       &res.start);
+		return -ENOMEM;
+	}
+
+	err = iort_register_domain_token(its_entry->translation_id, dom_handle);
+	if (err) {
+		pr_err("ITS@%pa: Unable to register GICv3 ITS domain token (ITS ID %d) to IORT\n",
+		       &res.start, its_entry->translation_id);
+		goto dom_err;
+	}
+
+	err = its_probe_one(&res, dom_handle, NUMA_NO_NODE);
+	if (!err)
+		return 0;
+
+	iort_deregister_domain_token(its_entry->translation_id);
+dom_err:
+	irq_domain_free_fwnode(dom_handle);
+	return err;
+}
+
+static void __init its_acpi_probe(void)
+{
+	acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR,
+			      gic_acpi_parse_madt_its, 0);
+}
+#else
+static void __init its_acpi_probe(void) { }
+#endif
+
+int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
+		    struct irq_domain *parent_domain)
+{
+	struct device_node *of_node;
+
+	its_parent = parent_domain;
+	of_node = to_of_node(handle);
+	if (of_node)
+		its_of_probe(of_node);
+	else
+		its_acpi_probe();
 
 	if (list_empty(&its_nodes)) {
 		pr_warn("ITS: No ITS available, not enabling LPIs\n");
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 6fc56c3..9b81bd8 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -495,6 +495,14 @@
 	/* Set priority mask register */
 	gic_write_pmr(DEFAULT_PMR_VALUE);
 
+	/*
+	 * Some firmwares hand over to the kernel with the BPR changed from
+	 * its reset value (and with a value large enough to prevent
+	 * any pre-emptive interrupts from working at all). Writing a zero
+	 * to BPR restores is reset value.
+	 */
+	gic_write_bpr1(0);
+
 	if (static_key_true(&supports_deactivate)) {
 		/* EOI drops priority only (mode 1) */
 		gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop);
@@ -548,7 +556,7 @@
 static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
 				   unsigned long cluster_id)
 {
-	int cpu = *base_cpu;
+	int next_cpu, cpu = *base_cpu;
 	unsigned long mpidr = cpu_logical_map(cpu);
 	u16 tlist = 0;
 
@@ -562,9 +570,10 @@
 
 		tlist |= 1 << (mpidr & 0xf);
 
-		cpu = cpumask_next(cpu, mask);
-		if (cpu >= nr_cpu_ids)
+		next_cpu = cpumask_next(cpu, mask);
+		if (next_cpu >= nr_cpu_ids)
 			goto out;
+		cpu = next_cpu;
 
 		mpidr = cpu_logical_map(cpu);
 
@@ -667,13 +676,20 @@
 #endif
 
 #ifdef CONFIG_CPU_PM
+/* Check whether it's single security state view */
+static bool gic_dist_security_disabled(void)
+{
+	return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS;
+}
+
 static int gic_cpu_pm_notifier(struct notifier_block *self,
 			       unsigned long cmd, void *v)
 {
 	if (cmd == CPU_PM_EXIT) {
-		gic_enable_redist(true);
+		if (gic_dist_security_disabled())
+			gic_enable_redist(true);
 		gic_cpu_sys_reg_init();
-	} else if (cmd == CPU_PM_ENTER) {
+	} else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) {
 		gic_write_grpen1(0);
 		gic_enable_redist(false);
 	}
@@ -903,7 +919,6 @@
 				 u64 redist_stride,
 				 struct fwnode_handle *handle)
 {
-	struct device_node *node;
 	u32 typer;
 	int gic_irqs;
 	int err;
@@ -944,10 +959,8 @@
 
 	set_handle_irq(gic_handle_irq);
 
-	node = to_of_node(handle);
-	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis() &&
-	    node) /* Temp hack to prevent ITS init for ACPI */
-		its_init(node, &gic_data.rdists, gic_data.domain);
+	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
+		its_init(handle, &gic_data.rdists, gic_data.domain);
 
 	gic_smp_init();
 	gic_dist_init();
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index c2cab57..58e5b4e 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -91,7 +91,27 @@
 #endif
 };
 
-static DEFINE_RAW_SPINLOCK(irq_controller_lock);
+#ifdef CONFIG_BL_SWITCHER
+
+static DEFINE_RAW_SPINLOCK(cpu_map_lock);
+
+#define gic_lock_irqsave(f)		\
+	raw_spin_lock_irqsave(&cpu_map_lock, (f))
+#define gic_unlock_irqrestore(f)	\
+	raw_spin_unlock_irqrestore(&cpu_map_lock, (f))
+
+#define gic_lock()			raw_spin_lock(&cpu_map_lock)
+#define gic_unlock()			raw_spin_unlock(&cpu_map_lock)
+
+#else
+
+#define gic_lock_irqsave(f)		do { (void)(f); } while(0)
+#define gic_unlock_irqrestore(f)	do { (void)(f); } while(0)
+
+#define gic_lock()			do { } while(0)
+#define gic_unlock()			do { } while(0)
+
+#endif
 
 /*
  * The GIC mapping of CPU interfaces does not necessarily match
@@ -317,12 +337,12 @@
 	if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
 		return -EINVAL;
 
-	raw_spin_lock_irqsave(&irq_controller_lock, flags);
+	gic_lock_irqsave(flags);
 	mask = 0xff << shift;
 	bit = gic_cpu_map[cpu] << shift;
 	val = readl_relaxed(reg) & ~mask;
 	writel_relaxed(val | bit, reg);
-	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
+	gic_unlock_irqrestore(flags);
 
 	return IRQ_SET_MASK_OK_DONE;
 }
@@ -374,9 +394,7 @@
 
 	chained_irq_enter(chip, desc);
 
-	raw_spin_lock(&irq_controller_lock);
 	status = readl_relaxed(gic_data_cpu_base(chip_data) + GIC_CPU_INTACK);
-	raw_spin_unlock(&irq_controller_lock);
 
 	gic_irq = (status & GICC_IAR_INT_ID_MASK);
 	if (gic_irq == GICC_INT_SPURIOUS)
@@ -769,7 +787,14 @@
 	int cpu;
 	unsigned long flags, map = 0;
 
-	raw_spin_lock_irqsave(&irq_controller_lock, flags);
+	if (unlikely(nr_cpu_ids == 1)) {
+		/* Only one CPU? let's do a self-IPI... */
+		writel_relaxed(2 << 24 | irq,
+			       gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
+		return;
+	}
+
+	gic_lock_irqsave(flags);
 
 	/* Convert our logical CPU mask into a physical one. */
 	for_each_cpu(cpu, mask)
@@ -784,7 +809,7 @@
 	/* this always happens on GIC0 */
 	writel_relaxed(map << 16 | irq, gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT);
 
-	raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
+	gic_unlock_irqrestore(flags);
 }
 #endif
 
@@ -852,7 +877,7 @@
 	cur_target_mask = 0x01010101 << cur_cpu_id;
 	ror_val = (cur_cpu_id - new_cpu_id) & 31;
 
-	raw_spin_lock(&irq_controller_lock);
+	gic_lock();
 
 	/* Update the target interface for this logical CPU */
 	gic_cpu_map[cpu] = 1 << new_cpu_id;
@@ -872,7 +897,7 @@
 		}
 	}
 
-	raw_spin_unlock(&irq_controller_lock);
+	gic_unlock();
 
 	/*
 	 * Now let's migrate and clear any potential SGIs that might be
@@ -914,7 +939,7 @@
 	return gic_dist_physaddr + GIC_DIST_SOFTINT;
 }
 
-void __init gic_init_physaddr(struct device_node *node)
+static void __init gic_init_physaddr(struct device_node *node)
 {
 	struct resource res;
 	if (of_address_to_resource(node, 0, &res) == 0) {
diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c
new file mode 100644
index 0000000..84b01de
--- /dev/null
+++ b/drivers/irqchip/irq-jcore-aic.c
@@ -0,0 +1,95 @@
+/*
+ * J-Core SoC AIC driver
+ *
+ * Copyright (C) 2015-2016 Smart Energy Instruments, Inc.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define JCORE_AIC_MAX_HWIRQ	127
+#define JCORE_AIC1_MIN_HWIRQ	16
+#define JCORE_AIC2_MIN_HWIRQ	64
+
+#define JCORE_AIC1_INTPRI_REG	8
+
+static struct irq_chip jcore_aic;
+
+static int jcore_aic_irqdomain_map(struct irq_domain *d, unsigned int irq,
+				   irq_hw_number_t hwirq)
+{
+	struct irq_chip *aic = d->host_data;
+
+	irq_set_chip_and_handler(irq, aic, handle_simple_irq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops jcore_aic_irqdomain_ops = {
+	.map = jcore_aic_irqdomain_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static void noop(struct irq_data *data)
+{
+}
+
+static int __init aic_irq_of_init(struct device_node *node,
+				  struct device_node *parent)
+{
+	unsigned min_irq = JCORE_AIC2_MIN_HWIRQ;
+	unsigned dom_sz = JCORE_AIC_MAX_HWIRQ+1;
+	struct irq_domain *domain;
+
+	pr_info("Initializing J-Core AIC\n");
+
+	/* AIC1 needs priority initialization to receive interrupts. */
+	if (of_device_is_compatible(node, "jcore,aic1")) {
+		unsigned cpu;
+
+		for_each_present_cpu(cpu) {
+			void __iomem *base = of_iomap(node, cpu);
+
+			if (!base) {
+				pr_err("Unable to map AIC for cpu %u\n", cpu);
+				return -ENOMEM;
+			}
+			__raw_writel(0xffffffff, base + JCORE_AIC1_INTPRI_REG);
+			iounmap(base);
+		}
+		min_irq = JCORE_AIC1_MIN_HWIRQ;
+	}
+
+	/*
+	 * The irq chip framework requires either mask/unmask or enable/disable
+	 * function pointers to be provided, but the hardware does not have any
+	 * such mechanism; the only interrupt masking is at the cpu level and
+	 * it affects all interrupts. We provide dummy mask/unmask. The hardware
+	 * handles all interrupt control and clears pending status when the cpu
+	 * accepts the interrupt.
+	 */
+	jcore_aic.irq_mask = noop;
+	jcore_aic.irq_unmask = noop;
+	jcore_aic.name = "AIC";
+
+	domain = irq_domain_add_linear(node, dom_sz, &jcore_aic_irqdomain_ops,
+				       &jcore_aic);
+	if (!domain)
+		return -ENOMEM;
+	irq_create_strict_mappings(domain, min_irq, min_irq, dom_sz - min_irq);
+
+	return 0;
+}
+
+IRQCHIP_DECLARE(jcore_aic2, "jcore,aic2", aic_irq_of_init);
+IRQCHIP_DECLARE(jcore_aic1, "jcore,aic1", aic_irq_of_init);
diff --git a/drivers/irqchip/irq-keystone.c b/drivers/irqchip/irq-keystone.c
index deb89d6..54a5e87 100644
--- a/drivers/irqchip/irq-keystone.c
+++ b/drivers/irqchip/irq-keystone.c
@@ -109,7 +109,7 @@
 			dev_dbg(kirq->dev, "dispatch bit %d, virq %d\n",
 				src, virq);
 			if (!virq)
-				dev_warn(kirq->dev, "sporious irq detected hwirq %d, virq %d\n",
+				dev_warn(kirq->dev, "spurious irq detected hwirq %d, virq %d\n",
 					 src, virq);
 			generic_handle_irq(virq);
 		}
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index c5f33c3..c0178a1 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -371,18 +371,13 @@
 	bitmap_and(pending, pending, intrmask, gic_shared_intrs);
 	bitmap_and(pending, pending, pcpu_mask, gic_shared_intrs);
 
-	intr = find_first_bit(pending, gic_shared_intrs);
-	while (intr != gic_shared_intrs) {
+	for_each_set_bit(intr, pending, gic_shared_intrs) {
 		virq = irq_linear_revmap(gic_irq_domain,
 					 GIC_SHARED_TO_HWIRQ(intr));
 		if (chained)
 			generic_handle_irq(virq);
 		else
 			do_IRQ(virq);
-
-		/* go to next pending bit */
-		bitmap_clear(pending, intr, 1);
-		intr = find_first_bit(pending, gic_shared_intrs);
 	}
 }
 
@@ -518,18 +513,13 @@
 
 	bitmap_and(&pending, &pending, &masked, GIC_NUM_LOCAL_INTRS);
 
-	intr = find_first_bit(&pending, GIC_NUM_LOCAL_INTRS);
-	while (intr != GIC_NUM_LOCAL_INTRS) {
+	for_each_set_bit(intr, &pending, GIC_NUM_LOCAL_INTRS) {
 		virq = irq_linear_revmap(gic_irq_domain,
 					 GIC_LOCAL_TO_HWIRQ(intr));
 		if (chained)
 			generic_handle_irq(virq);
 		else
 			do_IRQ(virq);
-
-		/* go to next pending bit */
-		bitmap_clear(&pending, intr, 1);
-		intr = find_first_bit(&pending, GIC_NUM_LOCAL_INTRS);
 	}
 }
 
@@ -638,27 +628,6 @@
 	if (!gic_local_irq_is_routable(intr))
 		return -EPERM;
 
-	/*
-	 * HACK: These are all really percpu interrupts, but the rest
-	 * of the MIPS kernel code does not use the percpu IRQ API for
-	 * the CP0 timer and performance counter interrupts.
-	 */
-	switch (intr) {
-	case GIC_LOCAL_INT_TIMER:
-	case GIC_LOCAL_INT_PERFCTR:
-	case GIC_LOCAL_INT_FDC:
-		irq_set_chip_and_handler(virq,
-					 &gic_all_vpes_local_irq_controller,
-					 handle_percpu_irq);
-		break;
-	default:
-		irq_set_chip_and_handler(virq,
-					 &gic_local_irq_controller,
-					 handle_percpu_devid_irq);
-		irq_set_percpu_devid(virq);
-		break;
-	}
-
 	spin_lock_irqsave(&gic_lock, flags);
 	for (i = 0; i < gic_vpes; i++) {
 		u32 val = GIC_MAP_TO_PIN_MSK | gic_cpu_pin;
@@ -713,9 +682,6 @@
 	unsigned long flags;
 	int i;
 
-	irq_set_chip_and_handler(virq, &gic_level_irq_controller,
-				 handle_level_irq);
-
 	spin_lock_irqsave(&gic_lock, flags);
 	gic_map_to_pin(intr, gic_cpu_pin);
 	gic_map_to_vpe(intr, mips_cm_vp_id(vpe));
@@ -727,12 +693,42 @@
 	return 0;
 }
 
-static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq,
-			      irq_hw_number_t hw)
+static int gic_setup_dev_chip(struct irq_domain *d, unsigned int virq,
+			      unsigned int hwirq)
 {
-	if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS)
-		return gic_local_irq_domain_map(d, virq, hw);
-	return gic_shared_irq_domain_map(d, virq, hw, 0);
+	struct irq_chip *chip;
+	int err;
+
+	if (hwirq >= GIC_SHARED_HWIRQ_BASE) {
+		err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+						    &gic_level_irq_controller,
+						    NULL);
+	} else {
+		switch (GIC_HWIRQ_TO_LOCAL(hwirq)) {
+		case GIC_LOCAL_INT_TIMER:
+		case GIC_LOCAL_INT_PERFCTR:
+		case GIC_LOCAL_INT_FDC:
+			/*
+			 * HACK: These are all really percpu interrupts, but
+			 * the rest of the MIPS kernel code does not use the
+			 * percpu IRQ API for them.
+			 */
+			chip = &gic_all_vpes_local_irq_controller;
+			irq_set_handler(virq, handle_percpu_irq);
+			break;
+
+		default:
+			chip = &gic_local_irq_controller;
+			irq_set_handler(virq, handle_percpu_devid_irq);
+			irq_set_percpu_devid(virq);
+			break;
+		}
+
+		err = irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+						    chip, NULL);
+	}
+
+	return err;
 }
 
 static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
@@ -743,15 +739,12 @@
 	int cpu, ret, i;
 
 	if (spec->type == GIC_DEVICE) {
-		/* verify that it doesn't conflict with an IPI irq */
-		if (test_bit(spec->hwirq, ipi_resrv))
+		/* verify that shared irqs don't conflict with an IPI irq */
+		if ((spec->hwirq >= GIC_SHARED_HWIRQ_BASE) &&
+		    test_bit(GIC_HWIRQ_TO_SHARED(spec->hwirq), ipi_resrv))
 			return -EBUSY;
 
-		hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
-
-		return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
-						     &gic_level_irq_controller,
-						     NULL);
+		return gic_setup_dev_chip(d, virq, spec->hwirq);
 	} else {
 		base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
 		if (base_hwirq == gic_shared_intrs) {
@@ -771,11 +764,13 @@
 			hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i);
 
 			ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq,
-							    &gic_edge_irq_controller,
+							    &gic_level_irq_controller,
 							    NULL);
 			if (ret)
 				goto error;
 
+			irq_set_handler(virq + i, handle_level_irq);
+
 			ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu);
 			if (ret)
 				goto error;
@@ -818,7 +813,6 @@
 }
 
 static const struct irq_domain_ops gic_irq_domain_ops = {
-	.map = gic_irq_domain_map,
 	.alloc = gic_irq_domain_alloc,
 	.free = gic_irq_domain_free,
 	.match = gic_irq_domain_match,
@@ -849,29 +843,20 @@
 	struct irq_fwspec *fwspec = arg;
 	struct gic_irq_spec spec = {
 		.type = GIC_DEVICE,
-		.hwirq = fwspec->param[1],
 	};
 	int i, ret;
-	bool is_shared = fwspec->param[0] == GIC_SHARED;
 
-	if (is_shared) {
-		ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec);
-		if (ret)
-			return ret;
-	}
+	if (fwspec->param[0] == GIC_SHARED)
+		spec.hwirq = GIC_SHARED_TO_HWIRQ(fwspec->param[1]);
+	else
+		spec.hwirq = GIC_LOCAL_TO_HWIRQ(fwspec->param[1]);
+
+	ret = irq_domain_alloc_irqs_parent(d, virq, nr_irqs, &spec);
+	if (ret)
+		return ret;
 
 	for (i = 0; i < nr_irqs; i++) {
-		irq_hw_number_t hwirq;
-
-		if (is_shared)
-			hwirq = GIC_SHARED_TO_HWIRQ(spec.hwirq + i);
-		else
-			hwirq = GIC_LOCAL_TO_HWIRQ(spec.hwirq + i);
-
-		ret = irq_domain_set_hwirq_and_chip(d, virq + i,
-						    hwirq,
-						    &gic_level_irq_controller,
-						    NULL);
+		ret = gic_setup_dev_chip(d, virq + i, spec.hwirq + i);
 		if (ret)
 			goto error;
 	}
@@ -890,10 +875,20 @@
 	return;
 }
 
+static void gic_dev_domain_activate(struct irq_domain *domain,
+				    struct irq_data *d)
+{
+	if (GIC_HWIRQ_TO_LOCAL(d->hwirq) < GIC_NUM_LOCAL_INTRS)
+		gic_local_irq_domain_map(domain, d->irq, d->hwirq);
+	else
+		gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0);
+}
+
 static struct irq_domain_ops gic_dev_domain_ops = {
 	.xlate = gic_dev_domain_xlate,
 	.alloc = gic_dev_domain_alloc,
 	.free = gic_dev_domain_free,
+	.activate = gic_dev_domain_activate,
 };
 
 static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr,
diff --git a/drivers/irqchip/irq-mvebu-pic.c b/drivers/irqchip/irq-mvebu-pic.c
new file mode 100644
index 0000000..eec6395
--- /dev/null
+++ b/drivers/irqchip/irq-mvebu-pic.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2016 Marvell
+ *
+ * Yehuda Yitschak <yehuday@marvell.com>
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+
+#define PIC_CAUSE	       0x0
+#define PIC_MASK	       0x4
+
+#define PIC_MAX_IRQS		32
+#define PIC_MAX_IRQ_MASK	((1UL << PIC_MAX_IRQS) - 1)
+
+struct mvebu_pic {
+	void __iomem *base;
+	u32 parent_irq;
+	struct irq_domain *domain;
+	struct irq_chip irq_chip;
+};
+
+static void mvebu_pic_reset(struct mvebu_pic *pic)
+{
+	/* ACK and mask all interrupts */
+	writel(0, pic->base + PIC_MASK);
+	writel(PIC_MAX_IRQ_MASK, pic->base + PIC_CAUSE);
+}
+
+static void mvebu_pic_eoi_irq(struct irq_data *d)
+{
+	struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+
+	writel(1 << d->hwirq, pic->base + PIC_CAUSE);
+}
+
+static void mvebu_pic_mask_irq(struct irq_data *d)
+{
+	struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+	u32 reg;
+
+	reg =  readl(pic->base + PIC_MASK);
+	reg |= (1 << d->hwirq);
+	writel(reg, pic->base + PIC_MASK);
+}
+
+static void mvebu_pic_unmask_irq(struct irq_data *d)
+{
+	struct mvebu_pic *pic = irq_data_get_irq_chip_data(d);
+	u32 reg;
+
+	reg = readl(pic->base + PIC_MASK);
+	reg &= ~(1 << d->hwirq);
+	writel(reg, pic->base + PIC_MASK);
+}
+
+static int mvebu_pic_irq_map(struct irq_domain *domain, unsigned int virq,
+			     irq_hw_number_t hwirq)
+{
+	struct mvebu_pic *pic = domain->host_data;
+
+	irq_set_percpu_devid(virq);
+	irq_set_chip_data(virq, pic);
+	irq_set_chip_and_handler(virq, &pic->irq_chip,
+				 handle_percpu_devid_irq);
+	irq_set_status_flags(virq, IRQ_LEVEL);
+	irq_set_probe(virq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mvebu_pic_domain_ops = {
+	.map = mvebu_pic_irq_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+static void mvebu_pic_handle_cascade_irq(struct irq_desc *desc)
+{
+	struct mvebu_pic *pic = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned long irqmap, irqn;
+	unsigned int cascade_irq;
+
+	irqmap = readl_relaxed(pic->base + PIC_CAUSE);
+	chained_irq_enter(chip, desc);
+
+	for_each_set_bit(irqn, &irqmap, BITS_PER_LONG) {
+		cascade_irq = irq_find_mapping(pic->domain, irqn);
+		generic_handle_irq(cascade_irq);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void mvebu_pic_enable_percpu_irq(void *data)
+{
+	struct mvebu_pic *pic = data;
+
+	mvebu_pic_reset(pic);
+	enable_percpu_irq(pic->parent_irq, IRQ_TYPE_NONE);
+}
+
+static void mvebu_pic_disable_percpu_irq(void *data)
+{
+	struct mvebu_pic *pic = data;
+
+	disable_percpu_irq(pic->parent_irq);
+}
+
+static int mvebu_pic_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct mvebu_pic *pic;
+	struct irq_chip *irq_chip;
+	struct resource *res;
+
+	pic = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_pic), GFP_KERNEL);
+	if (!pic)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pic->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(pic->base))
+		return PTR_ERR(pic->base);
+
+	irq_chip = &pic->irq_chip;
+	irq_chip->name = dev_name(&pdev->dev);
+	irq_chip->irq_mask = mvebu_pic_mask_irq;
+	irq_chip->irq_unmask = mvebu_pic_unmask_irq;
+	irq_chip->irq_eoi = mvebu_pic_eoi_irq;
+
+	pic->parent_irq = irq_of_parse_and_map(node, 0);
+	if (pic->parent_irq <= 0) {
+		dev_err(&pdev->dev, "Failed to parse parent interrupt\n");
+		return -EINVAL;
+	}
+
+	pic->domain = irq_domain_add_linear(node, PIC_MAX_IRQS,
+					    &mvebu_pic_domain_ops, pic);
+	if (!pic->domain) {
+		dev_err(&pdev->dev, "Failed to allocate irq domain\n");
+		return -ENOMEM;
+	}
+
+	irq_set_chained_handler(pic->parent_irq, mvebu_pic_handle_cascade_irq);
+	irq_set_handler_data(pic->parent_irq, pic);
+
+	on_each_cpu(mvebu_pic_enable_percpu_irq, pic, 1);
+
+	platform_set_drvdata(pdev, pic);
+
+	return 0;
+}
+
+static int mvebu_pic_remove(struct platform_device *pdev)
+{
+	struct mvebu_pic *pic = platform_get_drvdata(pdev);
+
+	on_each_cpu(mvebu_pic_disable_percpu_irq, pic, 1);
+	irq_domain_remove(pic->domain);
+
+	return 0;
+}
+
+static const struct of_device_id mvebu_pic_of_match[] = {
+	{ .compatible = "marvell,armada-8k-pic", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mvebu_pic_of_match);
+
+static struct platform_driver mvebu_pic_driver = {
+	.probe  = mvebu_pic_probe,
+	.remove = mvebu_pic_remove,
+	.driver = {
+		.name = "mvebu-pic",
+		.of_match_table = mvebu_pic_of_match,
+	},
+};
+module_platform_driver(mvebu_pic_driver);
+
+MODULE_AUTHOR("Yehuda Yitschak <yehuday@marvell.com>");
+MODULE_AUTHOR("Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:mvebu_pic");
+
diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c
new file mode 100644
index 0000000..491568c
--- /dev/null
+++ b/drivers/irqchip/irq-stm32-exti.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) Maxime Coquelin 2015
+ * Author:  Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#define EXTI_IMR	0x0
+#define EXTI_EMR	0x4
+#define EXTI_RTSR	0x8
+#define EXTI_FTSR	0xc
+#define EXTI_SWIER	0x10
+#define EXTI_PR		0x14
+
+static void stm32_irq_handler(struct irq_desc *desc)
+{
+	struct irq_domain *domain = irq_desc_get_handler_data(desc);
+	struct irq_chip_generic *gc = domain->gc->gc[0];
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	unsigned long pending;
+	int n;
+
+	chained_irq_enter(chip, desc);
+
+	while ((pending = irq_reg_readl(gc, EXTI_PR))) {
+		for_each_set_bit(n, &pending, BITS_PER_LONG) {
+			generic_handle_irq(irq_find_mapping(domain, n));
+			irq_reg_writel(gc, BIT(n), EXTI_PR);
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int stm32_irq_set_type(struct irq_data *data, unsigned int type)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+	int pin = data->hwirq;
+	u32 rtsr, ftsr;
+
+	irq_gc_lock(gc);
+
+	rtsr = irq_reg_readl(gc, EXTI_RTSR);
+	ftsr = irq_reg_readl(gc, EXTI_FTSR);
+
+	switch (type) {
+	case IRQ_TYPE_EDGE_RISING:
+		rtsr |= BIT(pin);
+		ftsr &= ~BIT(pin);
+		break;
+	case IRQ_TYPE_EDGE_FALLING:
+		rtsr &= ~BIT(pin);
+		ftsr |= BIT(pin);
+		break;
+	case IRQ_TYPE_EDGE_BOTH:
+		rtsr |= BIT(pin);
+		ftsr |= BIT(pin);
+		break;
+	default:
+		irq_gc_unlock(gc);
+		return -EINVAL;
+	}
+
+	irq_reg_writel(gc, rtsr, EXTI_RTSR);
+	irq_reg_writel(gc, ftsr, EXTI_FTSR);
+
+	irq_gc_unlock(gc);
+
+	return 0;
+}
+
+static int stm32_irq_set_wake(struct irq_data *data, unsigned int on)
+{
+	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
+	int pin = data->hwirq;
+	u32 emr;
+
+	irq_gc_lock(gc);
+
+	emr = irq_reg_readl(gc, EXTI_EMR);
+	if (on)
+		emr |= BIT(pin);
+	else
+		emr &= ~BIT(pin);
+	irq_reg_writel(gc, emr, EXTI_EMR);
+
+	irq_gc_unlock(gc);
+
+	return 0;
+}
+
+static int stm32_exti_alloc(struct irq_domain *d, unsigned int virq,
+			    unsigned int nr_irqs, void *data)
+{
+	struct irq_chip_generic *gc = d->gc->gc[0];
+	struct irq_fwspec *fwspec = data;
+	irq_hw_number_t hwirq;
+
+	hwirq = fwspec->param[0];
+
+	irq_map_generic_chip(d, virq, hwirq);
+	irq_domain_set_info(d, virq, hwirq, &gc->chip_types->chip, gc,
+			    handle_simple_irq, NULL, NULL);
+
+	return 0;
+}
+
+static void stm32_exti_free(struct irq_domain *d, unsigned int virq,
+			    unsigned int nr_irqs)
+{
+	struct irq_data *data = irq_domain_get_irq_data(d, virq);
+
+	irq_domain_reset_irq_data(data);
+}
+
+struct irq_domain_ops irq_exti_domain_ops = {
+	.map	= irq_map_generic_chip,
+	.xlate	= irq_domain_xlate_onetwocell,
+	.alloc  = stm32_exti_alloc,
+	.free	= stm32_exti_free,
+};
+
+static int __init stm32_exti_init(struct device_node *node,
+				  struct device_node *parent)
+{
+	unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN;
+	int nr_irqs, nr_exti, ret, i;
+	struct irq_chip_generic *gc;
+	struct irq_domain *domain;
+	void *base;
+
+	base = of_iomap(node, 0);
+	if (!base) {
+		pr_err("%s: Unable to map registers\n", node->full_name);
+		return -ENOMEM;
+	}
+
+	/* Determine number of irqs supported */
+	writel_relaxed(~0UL, base + EXTI_RTSR);
+	nr_exti = fls(readl_relaxed(base + EXTI_RTSR));
+	writel_relaxed(0, base + EXTI_RTSR);
+
+	pr_info("%s: %d External IRQs detected\n", node->full_name, nr_exti);
+
+	domain = irq_domain_add_linear(node, nr_exti,
+				       &irq_exti_domain_ops, NULL);
+	if (!domain) {
+		pr_err("%s: Could not register interrupt domain.\n",
+				node->name);
+		ret = -ENOMEM;
+		goto out_unmap;
+	}
+
+	ret = irq_alloc_domain_generic_chips(domain, nr_exti, 1, "exti",
+					     handle_edge_irq, clr, 0, 0);
+	if (ret) {
+		pr_err("%s: Could not allocate generic interrupt chip.\n",
+			node->full_name);
+		goto out_free_domain;
+	}
+
+	gc = domain->gc->gc[0];
+	gc->reg_base                         = base;
+	gc->chip_types->type               = IRQ_TYPE_EDGE_BOTH;
+	gc->chip_types->chip.name          = gc->chip_types[0].chip.name;
+	gc->chip_types->chip.irq_ack       = irq_gc_ack_set_bit;
+	gc->chip_types->chip.irq_mask      = irq_gc_mask_clr_bit;
+	gc->chip_types->chip.irq_unmask    = irq_gc_mask_set_bit;
+	gc->chip_types->chip.irq_set_type  = stm32_irq_set_type;
+	gc->chip_types->chip.irq_set_wake  = stm32_irq_set_wake;
+	gc->chip_types->regs.ack           = EXTI_PR;
+	gc->chip_types->regs.mask          = EXTI_IMR;
+	gc->chip_types->handler            = handle_edge_irq;
+
+	nr_irqs = of_irq_count(node);
+	for (i = 0; i < nr_irqs; i++) {
+		unsigned int irq = irq_of_parse_and_map(node, i);
+
+		irq_set_handler_data(irq, domain);
+		irq_set_chained_handler(irq, stm32_irq_handler);
+	}
+
+	return 0;
+
+out_free_domain:
+	irq_domain_remove(domain);
+out_unmap:
+	iounmap(base);
+	return ret;
+}
+
+IRQCHIP_DECLARE(stm32_exti, "st,stm32-exti", stm32_exti_init);
diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c
index 292991c..e3fa1cd 100644
--- a/drivers/isdn/hardware/mISDN/avmfritz.c
+++ b/drivers/isdn/hardware/mISDN/avmfritz.c
@@ -284,7 +284,7 @@
 					  AVM_HDLC_STATUS_1));
 }
 
-void
+static void
 write_ctrl(struct bchannel *bch, int which) {
 	struct fritzcard *fc = bch->hw;
 	struct hdlc_hw *hdlc;
@@ -741,7 +741,7 @@
 	modehdlc(&fc->bch[1], -1);
 }
 
-void
+static void
 clear_pending_hdlc_ints(struct fritzcard *fc)
 {
 	u32 val;
@@ -962,7 +962,7 @@
 	return err;
 }
 
-int
+static int
 setup_fritz(struct fritzcard *fc)
 {
 	u32 val, ver;
diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 28543d7..480c2d7 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -564,19 +564,19 @@
 #define	MAX_TDM_CHAN 32
 
 
-inline void
+static inline void
 enablepcibridge(struct hfc_multi *c)
 {
 	HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x3); /* was _io before */
 }
 
-inline void
+static inline void
 disablepcibridge(struct hfc_multi *c)
 {
 	HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x2); /* was _io before */
 }
 
-inline unsigned char
+static inline unsigned char
 readpcibridge(struct hfc_multi *hc, unsigned char address)
 {
 	unsigned short cipv;
@@ -604,7 +604,7 @@
 	return data;
 }
 
-inline void
+static inline void
 writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data)
 {
 	unsigned short cipv;
@@ -634,14 +634,14 @@
 	outl(datav, hc->pci_iobase);
 }
 
-inline void
+static inline void
 cpld_set_reg(struct hfc_multi *hc, unsigned char reg)
 {
 	/* Do data pin read low byte */
 	HFC_outb(hc, R_GPIO_OUT1, reg);
 }
 
-inline void
+static inline void
 cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val)
 {
 	cpld_set_reg(hc, reg);
@@ -653,7 +653,7 @@
 	return;
 }
 
-inline unsigned char
+static inline unsigned char
 cpld_read_reg(struct hfc_multi *hc, unsigned char reg)
 {
 	unsigned char bytein;
@@ -670,14 +670,14 @@
 	return bytein;
 }
 
-inline void
+static inline void
 vpm_write_address(struct hfc_multi *hc, unsigned short addr)
 {
 	cpld_write_reg(hc, 0, 0xff & addr);
 	cpld_write_reg(hc, 1, 0x01 & (addr >> 8));
 }
 
-inline unsigned short
+static inline unsigned short
 vpm_read_address(struct hfc_multi *c)
 {
 	unsigned short addr;
@@ -691,7 +691,7 @@
 	return addr & 0x1ff;
 }
 
-inline unsigned char
+static inline unsigned char
 vpm_in(struct hfc_multi *c, int which, unsigned short addr)
 {
 	unsigned char res;
@@ -712,7 +712,7 @@
 	return res;
 }
 
-inline void
+static inline void
 vpm_out(struct hfc_multi *c, int which, unsigned short addr,
 	unsigned char data)
 {
@@ -1024,7 +1024,7 @@
 }
 
 /* This must be called AND hc must be locked irqsave!!! */
-inline void
+static inline void
 plxsd_checksync(struct hfc_multi *hc, int rm)
 {
 	if (hc->syncronized) {
diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c
index aa9b6c3..8d338ba 100644
--- a/drivers/isdn/hardware/mISDN/mISDNipac.c
+++ b/drivers/isdn/hardware/mISDN/mISDNipac.c
@@ -113,7 +113,7 @@
 	pr_debug("%s: TE newstate %x\n", isac->name, dch->state);
 }
 
-void
+static void
 isac_empty_fifo(struct isac_hw *isac, int count)
 {
 	u8 *ptr;
diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c
index 7416755..3b067ea 100644
--- a/drivers/isdn/hardware/mISDN/w6692.c
+++ b/drivers/isdn/hardware/mISDN/w6692.c
@@ -848,7 +848,7 @@
 	}
 }
 
-void initW6692(struct w6692_hw *card)
+static void initW6692(struct w6692_hw *card)
 {
 	u8	val;
 
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 9dcc9b1..7a628c6 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -584,6 +584,18 @@
 	  This driver can also be built as a module. If so the module
 	  will be called leds-sead3.
 
+config LEDS_IS31FL319X
+	tristate "LED Support for ISSI IS31FL319x I2C LED controller family"
+	depends on LEDS_CLASS && I2C && OF
+	select REGMAP_I2C
+	help
+	  This option enables support for LEDs connected to ISSI IS31FL319x
+	  fancy LED driver chips accessed via the I2C bus.
+	  Driver supports individual PWM brightness control for each channel.
+
+	  This driver can also be built as a module. If so the module will be
+	  called leds-is31fl319x.
+
 config LEDS_IS31FL32XX
 	tristate "LED support for ISSI IS31FL32XX I2C LED controller family"
 	depends on LEDS_CLASS && I2C && OF
@@ -631,6 +643,22 @@
 	  This option enabled support for the LEDs on the ARM Versatile
 	  and RealView boards. Say Y to enabled these.
 
+config LEDS_PM8058
+	tristate "LED Support for the Qualcomm PM8058 PMIC"
+	depends on MFD_PM8921_CORE
+	depends on LEDS_CLASS
+	help
+	  Choose this option if you want to use the LED drivers in
+	  the Qualcomm PM8058 PMIC.
+
+config LEDS_MLXCPLD
+	tristate "LED support for the Mellanox boards"
+	depends on X86_64 && DMI
+	depends on LEDS_CLASS
+	help
+	  This option enabled support for the LEDs on the Mellanox
+	  boards. Say Y to enabled these.
+
 comment "LED Triggers"
 source "drivers/leds/trigger/Kconfig"
 
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 0684c86..3965070 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -67,7 +67,10 @@
 obj-$(CONFIG_LEDS_KTD2692)		+= leds-ktd2692.o
 obj-$(CONFIG_LEDS_POWERNV)		+= leds-powernv.o
 obj-$(CONFIG_LEDS_SEAD3)		+= leds-sead3.o
+obj-$(CONFIG_LEDS_IS31FL319X)		+= leds-is31fl319x.o
 obj-$(CONFIG_LEDS_IS31FL32XX)		+= leds-is31fl32xx.o
+obj-$(CONFIG_LEDS_PM8058)		+= leds-pm8058.o
+obj-$(CONFIG_LEDS_MLXCPLD)		+= leds-mlxcpld.o
 
 # LED SPI Drivers
 obj-$(CONFIG_LEDS_DAC124S085)		+= leds-dac124s085.o
diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index c92702a..431123b 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -11,7 +11,7 @@
  *
  */
 
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
@@ -81,21 +81,23 @@
 	down_read(&led_cdev->trigger_lock);
 
 	if (!led_cdev->trigger)
-		len += sprintf(buf+len, "[none] ");
+		len += scnprintf(buf+len, PAGE_SIZE - len, "[none] ");
 	else
-		len += sprintf(buf+len, "none ");
+		len += scnprintf(buf+len, PAGE_SIZE - len, "none ");
 
 	list_for_each_entry(trig, &trigger_list, next_trig) {
 		if (led_cdev->trigger && !strcmp(led_cdev->trigger->name,
 							trig->name))
-			len += sprintf(buf+len, "[%s] ", trig->name);
+			len += scnprintf(buf+len, PAGE_SIZE - len, "[%s] ",
+					 trig->name);
 		else
-			len += sprintf(buf+len, "%s ", trig->name);
+			len += scnprintf(buf+len, PAGE_SIZE - len, "%s ",
+					 trig->name);
 	}
 	up_read(&led_cdev->trigger_lock);
 	up_read(&triggers_list_lock);
 
-	len += sprintf(len+buf, "\n");
+	len += scnprintf(len+buf, PAGE_SIZE - len, "\n");
 	return len;
 }
 EXPORT_SYMBOL_GPL(led_trigger_show);
@@ -108,6 +110,9 @@
 	char *envp[2];
 	const char *name;
 
+	if (!led_cdev->trigger && !trig)
+		return;
+
 	name = trig ? trig->name : "none";
 	event = kasprintf(GFP_KERNEL, "TRIGGER=%s", name);
 
@@ -136,7 +141,9 @@
 	if (event) {
 		envp[0] = event;
 		envp[1] = NULL;
-		kobject_uevent_env(&led_cdev->dev->kobj, KOBJ_CHANGE, envp);
+		if (kobject_uevent_env(&led_cdev->dev->kobj, KOBJ_CHANGE, envp))
+			dev_err(led_cdev->dev,
+				"%s: Error sending uevent\n", __func__);
 		kfree(event);
 	}
 }
@@ -357,7 +364,3 @@
 	kfree(trig);
 }
 EXPORT_SYMBOL_GPL(led_trigger_unregister_simple);
-
-MODULE_AUTHOR("Richard Purdie");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("LED Triggers Core");
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 9b991d4..d400dca 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -26,15 +26,19 @@
 	struct gpio_desc *gpiod;
 	u8 can_sleep;
 	u8 blinking;
-	int (*platform_gpio_blink_set)(struct gpio_desc *desc, int state,
-			unsigned long *delay_on, unsigned long *delay_off);
+	gpio_blink_set_t platform_gpio_blink_set;
 };
 
+static inline struct gpio_led_data *
+			cdev_to_gpio_led_data(struct led_classdev *led_cdev)
+{
+	return container_of(led_cdev, struct gpio_led_data, cdev);
+}
+
 static void gpio_led_set(struct led_classdev *led_cdev,
 	enum led_brightness value)
 {
-	struct gpio_led_data *led_dat =
-		container_of(led_cdev, struct gpio_led_data, cdev);
+	struct gpio_led_data *led_dat = cdev_to_gpio_led_data(led_cdev);
 	int level;
 
 	if (value == LED_OFF)
@@ -64,8 +68,7 @@
 static int gpio_blink_set(struct led_classdev *led_cdev,
 	unsigned long *delay_on, unsigned long *delay_off)
 {
-	struct gpio_led_data *led_dat =
-		container_of(led_cdev, struct gpio_led_data, cdev);
+	struct gpio_led_data *led_dat = cdev_to_gpio_led_data(led_cdev);
 
 	led_dat->blinking = 1;
 	return led_dat->platform_gpio_blink_set(led_dat->gpiod, GPIO_LED_BLINK,
@@ -74,8 +77,7 @@
 
 static int create_gpio_led(const struct gpio_led *template,
 	struct gpio_led_data *led_dat, struct device *parent,
-	int (*blink_set)(struct gpio_desc *, int, unsigned long *,
-			 unsigned long *))
+	gpio_blink_set_t blink_set)
 {
 	int ret, state;
 
@@ -120,10 +122,13 @@
 		led_dat->platform_gpio_blink_set = blink_set;
 		led_dat->cdev.blink_set = gpio_blink_set;
 	}
-	if (template->default_state == LEDS_GPIO_DEFSTATE_KEEP)
-		state = !!gpiod_get_value_cansleep(led_dat->gpiod);
-	else
+	if (template->default_state == LEDS_GPIO_DEFSTATE_KEEP) {
+		state = gpiod_get_value_cansleep(led_dat->gpiod);
+		if (state < 0)
+			return state;
+	} else {
 		state = (template->default_state == LEDS_GPIO_DEFSTATE_ON);
+	}
 	led_dat->cdev.brightness = state ? LED_FULL : LED_OFF;
 	if (!template->retain_state_suspended)
 		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
@@ -134,7 +139,7 @@
 	if (ret < 0)
 		return ret;
 
-	return led_classdev_register(parent, &led_dat->cdev);
+	return devm_led_classdev_register(parent, &led_dat->cdev);
 }
 
 struct gpio_leds_priv {
@@ -154,7 +159,6 @@
 	struct fwnode_handle *child;
 	struct gpio_leds_priv *priv;
 	int count, ret;
-	struct device_node *np;
 
 	count = device_get_child_node_count(dev);
 	if (!count)
@@ -168,26 +172,22 @@
 		struct gpio_led_data *led_dat = &priv->leds[priv->num_leds];
 		struct gpio_led led = {};
 		const char *state = NULL;
+		struct device_node *np = to_of_node(child);
 
 		led.gpiod = devm_get_gpiod_from_child(dev, NULL, child);
 		if (IS_ERR(led.gpiod)) {
 			fwnode_handle_put(child);
-			ret = PTR_ERR(led.gpiod);
-			goto err;
+			return ERR_CAST(led.gpiod);
 		}
 
-		np = to_of_node(child);
-
-		if (fwnode_property_present(child, "label")) {
-			fwnode_property_read_string(child, "label", &led.name);
-		} else {
-			if (IS_ENABLED(CONFIG_OF) && !led.name && np)
-				led.name = np->name;
-			if (!led.name) {
-				ret = -EINVAL;
-				goto err;
-			}
+		ret = fwnode_property_read_string(child, "label", &led.name);
+		if (ret && IS_ENABLED(CONFIG_OF) && np)
+			led.name = np->name;
+		if (!led.name) {
+			fwnode_handle_put(child);
+			return ERR_PTR(-EINVAL);
 		}
+
 		fwnode_property_read_string(child, "linux,default-trigger",
 					    &led.default_trigger);
 
@@ -209,18 +209,13 @@
 		ret = create_gpio_led(&led, led_dat, dev, NULL);
 		if (ret < 0) {
 			fwnode_handle_put(child);
-			goto err;
+			return ERR_PTR(ret);
 		}
 		led_dat->cdev.dev->of_node = np;
 		priv->num_leds++;
 	}
 
 	return priv;
-
-err:
-	for (count = priv->num_leds - 1; count >= 0; count--)
-		led_classdev_unregister(&priv->leds[count].cdev);
-	return ERR_PTR(ret);
 }
 
 static const struct of_device_id of_gpio_leds_match[] = {
@@ -248,13 +243,8 @@
 			ret = create_gpio_led(&pdata->leds[i],
 					      &priv->leds[i],
 					      &pdev->dev, pdata->gpio_blink_set);
-			if (ret < 0) {
-				/* On failure: unwind the led creations */
-				for (i = i - 1; i >= 0; i--)
-					led_classdev_unregister(
-							&priv->leds[i].cdev);
+			if (ret < 0)
 				return ret;
-			}
 		}
 	} else {
 		priv = gpio_leds_create(pdev);
@@ -267,17 +257,6 @@
 	return 0;
 }
 
-static int gpio_led_remove(struct platform_device *pdev)
-{
-	struct gpio_leds_priv *priv = platform_get_drvdata(pdev);
-	int i;
-
-	for (i = 0; i < priv->num_leds; i++)
-		led_classdev_unregister(&priv->leds[i].cdev);
-
-	return 0;
-}
-
 static void gpio_led_shutdown(struct platform_device *pdev)
 {
 	struct gpio_leds_priv *priv = platform_get_drvdata(pdev);
@@ -292,7 +271,6 @@
 
 static struct platform_driver gpio_led_driver = {
 	.probe		= gpio_led_probe,
-	.remove		= gpio_led_remove,
 	.shutdown	= gpio_led_shutdown,
 	.driver		= {
 		.name	= "leds-gpio",
diff --git a/drivers/leds/leds-is31fl319x.c b/drivers/leds/leds-is31fl319x.c
new file mode 100644
index 0000000..f123309
--- /dev/null
+++ b/drivers/leds/leds-is31fl319x.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright 2015-16 Golden Delicious Computers
+ *
+ * Author: Nikolaus Schaller <hns@goldelico.com>
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License.  See the file COPYING in the main
+ * directory of this archive for more details.
+ *
+ * LED driver for the IS31FL319{0,1,3,6,9} to drive 1, 3, 6 or 9 light
+ * effect LEDs.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+/* register numbers */
+#define IS31FL319X_SHUTDOWN		0x00
+#define IS31FL319X_CTRL1		0x01
+#define IS31FL319X_CTRL2		0x02
+#define IS31FL319X_CONFIG1		0x03
+#define IS31FL319X_CONFIG2		0x04
+#define IS31FL319X_RAMP_MODE		0x05
+#define IS31FL319X_BREATH_MASK		0x06
+#define IS31FL319X_PWM(channel)		(0x07 + channel)
+#define IS31FL319X_DATA_UPDATE		0x10
+#define IS31FL319X_T0(channel)		(0x11 + channel)
+#define IS31FL319X_T123_1		0x1a
+#define IS31FL319X_T123_2		0x1b
+#define IS31FL319X_T123_3		0x1c
+#define IS31FL319X_T4(channel)		(0x1d + channel)
+#define IS31FL319X_TIME_UPDATE		0x26
+#define IS31FL319X_RESET		0xff
+
+#define IS31FL319X_REG_CNT		(IS31FL319X_RESET + 1)
+
+#define IS31FL319X_MAX_LEDS		9
+
+/* CS (Current Setting) in CONFIG2 register */
+#define IS31FL319X_CONFIG2_CS_SHIFT	4
+#define IS31FL319X_CONFIG2_CS_MASK	0x7
+#define IS31FL319X_CONFIG2_CS_STEP_REF	12
+
+#define IS31FL319X_CURRENT_MIN		((u32)5000)
+#define IS31FL319X_CURRENT_MAX		((u32)40000)
+#define IS31FL319X_CURRENT_STEP		((u32)5000)
+#define IS31FL319X_CURRENT_DEFAULT	((u32)20000)
+
+/* Audio gain in CONFIG2 register */
+#define IS31FL319X_AUDIO_GAIN_DB_MAX	((u32)21)
+#define IS31FL319X_AUDIO_GAIN_DB_STEP	((u32)3)
+
+/*
+ * regmap is used as a cache of chip's register space,
+ * to avoid reading back brightness values from chip,
+ * which is known to hang.
+ */
+struct is31fl319x_chip {
+	const struct is31fl319x_chipdef *cdef;
+	struct i2c_client               *client;
+	struct regmap                   *regmap;
+	struct mutex                    lock;
+	u32                             audio_gain_db;
+
+	struct is31fl319x_led {
+		struct is31fl319x_chip  *chip;
+		struct led_classdev     cdev;
+		u32                     max_microamp;
+		bool                    configured;
+	} leds[IS31FL319X_MAX_LEDS];
+};
+
+struct is31fl319x_chipdef {
+	int num_leds;
+};
+
+static const struct is31fl319x_chipdef is31fl3190_cdef = {
+	.num_leds = 1,
+};
+
+static const struct is31fl319x_chipdef is31fl3193_cdef = {
+	.num_leds = 3,
+};
+
+static const struct is31fl319x_chipdef is31fl3196_cdef = {
+	.num_leds = 6,
+};
+
+static const struct is31fl319x_chipdef is31fl3199_cdef = {
+	.num_leds = 9,
+};
+
+static const struct of_device_id of_is31fl319x_match[] = {
+	{ .compatible = "issi,is31fl3190", .data = &is31fl3190_cdef, },
+	{ .compatible = "issi,is31fl3191", .data = &is31fl3190_cdef, },
+	{ .compatible = "issi,is31fl3193", .data = &is31fl3193_cdef, },
+	{ .compatible = "issi,is31fl3196", .data = &is31fl3196_cdef, },
+	{ .compatible = "issi,is31fl3199", .data = &is31fl3199_cdef, },
+	{ .compatible = "si-en,sn3199",    .data = &is31fl3199_cdef, },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, of_is31fl319x_match);
+
+static int is31fl319x_brightness_set(struct led_classdev *cdev,
+				     enum led_brightness brightness)
+{
+	struct is31fl319x_led *led = container_of(cdev, struct is31fl319x_led,
+						  cdev);
+	struct is31fl319x_chip *is31 = led->chip;
+	int chan = led - is31->leds;
+	int ret;
+	int i;
+	u8 ctrl1 = 0, ctrl2 = 0;
+
+	dev_dbg(&is31->client->dev, "%s %d: %d\n", __func__, chan, brightness);
+
+	mutex_lock(&is31->lock);
+
+	/* update PWM register */
+	ret = regmap_write(is31->regmap, IS31FL319X_PWM(chan), brightness);
+	if (ret < 0)
+		goto out;
+
+	/* read current brightness of all PWM channels */
+	for (i = 0; i < is31->cdef->num_leds; i++) {
+		unsigned int pwm_value;
+		bool on;
+
+		/*
+		 * since neither cdev nor the chip can provide
+		 * the current setting, we read from the regmap cache
+		 */
+
+		ret = regmap_read(is31->regmap, IS31FL319X_PWM(i), &pwm_value);
+		dev_dbg(&is31->client->dev, "%s read %d: ret=%d: %d\n",
+			__func__, i, ret, pwm_value);
+		on = ret >= 0 && pwm_value > LED_OFF;
+
+		if (i < 3)
+			ctrl1 |= on << i;       /* 0..2 => bit 0..2 */
+		else if (i < 6)
+			ctrl1 |= on << (i + 1); /* 3..5 => bit 4..6 */
+		else
+			ctrl2 |= on << (i - 6); /* 6..8 => bit 0..2 */
+	}
+
+	if (ctrl1 > 0 || ctrl2 > 0) {
+		dev_dbg(&is31->client->dev, "power up %02x %02x\n",
+			ctrl1, ctrl2);
+		regmap_write(is31->regmap, IS31FL319X_CTRL1, ctrl1);
+		regmap_write(is31->regmap, IS31FL319X_CTRL2, ctrl2);
+		/* update PWMs */
+		regmap_write(is31->regmap, IS31FL319X_DATA_UPDATE, 0x00);
+		/* enable chip from shut down */
+		ret = regmap_write(is31->regmap, IS31FL319X_SHUTDOWN, 0x01);
+	} else {
+		dev_dbg(&is31->client->dev, "power down\n");
+		/* shut down (no need to clear CTRL1/2) */
+		ret = regmap_write(is31->regmap, IS31FL319X_SHUTDOWN, 0x00);
+	}
+
+out:
+	mutex_unlock(&is31->lock);
+
+	return ret;
+}
+
+static int is31fl319x_parse_child_dt(const struct device *dev,
+				     const struct device_node *child,
+				     struct is31fl319x_led *led)
+{
+	struct led_classdev *cdev = &led->cdev;
+	int ret;
+
+	if (of_property_read_string(child, "label", &cdev->name))
+		cdev->name = child->name;
+
+	ret = of_property_read_string(child, "linux,default-trigger",
+				      &cdev->default_trigger);
+	if (ret < 0 && ret != -EINVAL) /* is optional */
+		return ret;
+
+	led->max_microamp = IS31FL319X_CURRENT_DEFAULT;
+	ret = of_property_read_u32(child, "led-max-microamp",
+				   &led->max_microamp);
+	if (!ret) {
+		if (led->max_microamp < IS31FL319X_CURRENT_MIN)
+			return -EINVAL;	/* not supported */
+		led->max_microamp = min(led->max_microamp,
+					  IS31FL319X_CURRENT_MAX);
+	}
+
+	return 0;
+}
+
+static int is31fl319x_parse_dt(struct device *dev,
+			       struct is31fl319x_chip *is31)
+{
+	struct device_node *np = dev->of_node, *child;
+	const struct of_device_id *of_dev_id;
+	int count;
+	int ret;
+
+	if (!np)
+		return -ENODEV;
+
+	of_dev_id = of_match_device(of_is31fl319x_match, dev);
+	if (!of_dev_id) {
+		dev_err(dev, "Failed to match device with supported chips\n");
+		return -EINVAL;
+	}
+
+	is31->cdef = of_dev_id->data;
+
+	count = of_get_child_count(np);
+
+	dev_dbg(dev, "probe %s with %d leds defined in DT\n",
+		of_dev_id->compatible, count);
+
+	if (!count || count > is31->cdef->num_leds) {
+		dev_err(dev, "Number of leds defined must be between 1 and %u\n",
+			is31->cdef->num_leds);
+		return -ENODEV;
+	}
+
+	for_each_child_of_node(np, child) {
+		struct is31fl319x_led *led;
+		u32 reg;
+
+		ret = of_property_read_u32(child, "reg", &reg);
+		if (ret) {
+			dev_err(dev, "Failed to read led 'reg' property\n");
+			goto put_child_node;
+		}
+
+		if (reg < 1 || reg > is31->cdef->num_leds) {
+			dev_err(dev, "invalid led reg %u\n", reg);
+			ret = -EINVAL;
+			goto put_child_node;
+		}
+
+		led = &is31->leds[reg - 1];
+
+		if (led->configured) {
+			dev_err(dev, "led %u is already configured\n", reg);
+			ret = -EINVAL;
+			goto put_child_node;
+		}
+
+		ret = is31fl319x_parse_child_dt(dev, child, led);
+		if (ret) {
+			dev_err(dev, "led %u DT parsing failed\n", reg);
+			goto put_child_node;
+		}
+
+		led->configured = true;
+	}
+
+	is31->audio_gain_db = 0;
+	ret = of_property_read_u32(np, "audio-gain-db", &is31->audio_gain_db);
+	if (!ret)
+		is31->audio_gain_db = min(is31->audio_gain_db,
+					  IS31FL319X_AUDIO_GAIN_DB_MAX);
+
+	return 0;
+
+put_child_node:
+	of_node_put(child);
+	return ret;
+}
+
+static bool is31fl319x_readable_reg(struct device *dev, unsigned int reg)
+{ /* we have no readable registers */
+	return false;
+}
+
+static bool is31fl319x_volatile_reg(struct device *dev, unsigned int reg)
+{ /* volatile registers are not cached */
+	switch (reg) {
+	case IS31FL319X_DATA_UPDATE:
+	case IS31FL319X_TIME_UPDATE:
+	case IS31FL319X_RESET:
+		return true; /* always write-through */
+	default:
+		return false;
+	}
+}
+
+static const struct reg_default is31fl319x_reg_defaults[] = {
+	{ IS31FL319X_CONFIG1, 0x00},
+	{ IS31FL319X_CONFIG2, 0x00},
+	{ IS31FL319X_PWM(0), 0x00},
+	{ IS31FL319X_PWM(1), 0x00},
+	{ IS31FL319X_PWM(2), 0x00},
+	{ IS31FL319X_PWM(3), 0x00},
+	{ IS31FL319X_PWM(4), 0x00},
+	{ IS31FL319X_PWM(5), 0x00},
+	{ IS31FL319X_PWM(6), 0x00},
+	{ IS31FL319X_PWM(7), 0x00},
+	{ IS31FL319X_PWM(8), 0x00},
+};
+
+static struct regmap_config regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = IS31FL319X_REG_CNT,
+	.cache_type = REGCACHE_FLAT,
+	.readable_reg = is31fl319x_readable_reg,
+	.volatile_reg = is31fl319x_volatile_reg,
+	.reg_defaults = is31fl319x_reg_defaults,
+	.num_reg_defaults = ARRAY_SIZE(is31fl319x_reg_defaults),
+};
+
+static inline int is31fl319x_microamp_to_cs(struct device *dev, u32 microamp)
+{ /* round down to nearest supported value (range check done by caller) */
+	u32 step = microamp / IS31FL319X_CURRENT_STEP;
+
+	return ((IS31FL319X_CONFIG2_CS_STEP_REF - step) &
+		IS31FL319X_CONFIG2_CS_MASK) <<
+		IS31FL319X_CONFIG2_CS_SHIFT; /* CS encoding */
+}
+
+static inline int is31fl319x_db_to_gain(u32 dezibel)
+{ /* round down to nearest supported value (range check done by caller) */
+	return dezibel / IS31FL319X_AUDIO_GAIN_DB_STEP;
+}
+
+static int is31fl319x_probe(struct i2c_client *client,
+			    const struct i2c_device_id *id)
+{
+	struct is31fl319x_chip *is31;
+	struct device *dev = &client->dev;
+	struct i2c_adapter *adapter = to_i2c_adapter(dev->parent);
+	int err;
+	int i = 0;
+	u32 aggregated_led_microamp = IS31FL319X_CURRENT_MAX;
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
+		return -EIO;
+
+	is31 = devm_kzalloc(&client->dev, sizeof(*is31), GFP_KERNEL);
+	if (!is31)
+		return -ENOMEM;
+
+	mutex_init(&is31->lock);
+
+	err = is31fl319x_parse_dt(&client->dev, is31);
+	if (err)
+		goto free_mutex;
+
+	is31->client = client;
+	is31->regmap = devm_regmap_init_i2c(client, &regmap_config);
+	if (IS_ERR(is31->regmap)) {
+		dev_err(&client->dev, "failed to allocate register map\n");
+		err = PTR_ERR(is31->regmap);
+		goto free_mutex;
+	}
+
+	i2c_set_clientdata(client, is31);
+
+	/* check for write-reply from chip (we can't read any registers) */
+	err = regmap_write(is31->regmap, IS31FL319X_RESET, 0x00);
+	if (err < 0) {
+		dev_err(&client->dev, "no response from chip write: err = %d\n",
+			err);
+		err = -EIO; /* does not answer */
+		goto free_mutex;
+	}
+
+	/*
+	 * Kernel conventions require per-LED led-max-microamp property.
+	 * But the chip does not allow to limit individual LEDs.
+	 * So we take minimum from all subnodes for safety of hardware.
+	 */
+	for (i = 0; i < is31->cdef->num_leds; i++)
+		if (is31->leds[i].configured &&
+		    is31->leds[i].max_microamp < aggregated_led_microamp)
+			aggregated_led_microamp = is31->leds[i].max_microamp;
+
+	regmap_write(is31->regmap, IS31FL319X_CONFIG2,
+		     is31fl319x_microamp_to_cs(dev, aggregated_led_microamp) |
+		     is31fl319x_db_to_gain(is31->audio_gain_db));
+
+	for (i = 0; i < is31->cdef->num_leds; i++) {
+		struct is31fl319x_led *led = &is31->leds[i];
+
+		if (!led->configured)
+			continue;
+
+		led->chip = is31;
+		led->cdev.brightness_set_blocking = is31fl319x_brightness_set;
+
+		err = devm_led_classdev_register(&client->dev, &led->cdev);
+		if (err < 0)
+			goto free_mutex;
+	}
+
+	return 0;
+
+free_mutex:
+	mutex_destroy(&is31->lock);
+	return err;
+}
+
+static int is31fl319x_remove(struct i2c_client *client)
+{
+	struct is31fl319x_chip *is31 = i2c_get_clientdata(client);
+
+	mutex_destroy(&is31->lock);
+	return 0;
+}
+
+/*
+ * i2c-core (and modalias) requires that id_table be properly filled,
+ * even though it is not used for DeviceTree based instantiation.
+ */
+static const struct i2c_device_id is31fl319x_id[] = {
+	{ "is31fl3190" },
+	{ "is31fl3191" },
+	{ "is31fl3193" },
+	{ "is31fl3196" },
+	{ "is31fl3199" },
+	{ "sn3199" },
+	{},
+};
+MODULE_DEVICE_TABLE(i2c, is31fl319x_id);
+
+static struct i2c_driver is31fl319x_driver = {
+	.driver   = {
+		.name           = "leds-is31fl319x",
+		.of_match_table = of_match_ptr(of_is31fl319x_match),
+	},
+	.probe    = is31fl319x_probe,
+	.remove   = is31fl319x_remove,
+	.id_table = is31fl319x_id,
+};
+
+module_i2c_driver(is31fl319x_driver);
+
+MODULE_AUTHOR("H. Nikolaus Schaller <hns@goldelico.com>");
+MODULE_AUTHOR("Andrey Utkin <andrey_utkin@fastmail.com>");
+MODULE_DESCRIPTION("IS31FL319X LED driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/leds-mlxcpld.c b/drivers/leds/leds-mlxcpld.c
new file mode 100644
index 0000000..197ab9b
--- /dev/null
+++ b/drivers/leds/leds-mlxcpld.c
@@ -0,0 +1,430 @@
+/*
+ * drivers/leds/leds-mlxcpld.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/dmi.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/io.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#define MLXPLAT_CPLD_LPC_REG_BASE_ADRR     0x2500 /* LPC bus access */
+
+/* Color codes for LEDs */
+#define MLXCPLD_LED_OFFSET_HALF		0x01 /* Offset from solid: 3Hz blink */
+#define MLXCPLD_LED_OFFSET_FULL		0x02 /* Offset from solid: 6Hz blink */
+#define MLXCPLD_LED_IS_OFF		0x00 /* Off */
+#define MLXCPLD_LED_RED_STATIC_ON	0x05 /* Solid red */
+#define MLXCPLD_LED_RED_BLINK_HALF	(MLXCPLD_LED_RED_STATIC_ON + \
+					 MLXCPLD_LED_OFFSET_HALF)
+#define MLXCPLD_LED_RED_BLINK_FULL	(MLXCPLD_LED_RED_STATIC_ON + \
+					 MLXCPLD_LED_OFFSET_FULL)
+#define MLXCPLD_LED_GREEN_STATIC_ON	0x0D /* Solid green */
+#define MLXCPLD_LED_GREEN_BLINK_HALF	(MLXCPLD_LED_GREEN_STATIC_ON + \
+					 MLXCPLD_LED_OFFSET_HALF)
+#define MLXCPLD_LED_GREEN_BLINK_FULL	(MLXCPLD_LED_GREEN_STATIC_ON + \
+					 MLXCPLD_LED_OFFSET_FULL)
+#define MLXCPLD_LED_BLINK_3HZ		167 /* ~167 msec off/on */
+#define MLXCPLD_LED_BLINK_6HZ		83 /* ~83 msec off/on */
+
+/**
+ * mlxcpld_param - LED access parameters:
+ * @offset - offset for LED access in CPLD device
+ * @mask - mask for LED access in CPLD device
+ * @base_color - base color code for LED
+**/
+struct mlxcpld_param {
+	u8 offset;
+	u8 mask;
+	u8 base_color;
+};
+
+/**
+ * mlxcpld_led_priv - LED private data:
+ * @cled - LED class device instance
+ * @param - LED CPLD access parameters
+**/
+struct mlxcpld_led_priv {
+	struct led_classdev cdev;
+	struct mlxcpld_param param;
+};
+
+#define cdev_to_priv(c)		container_of(c, struct mlxcpld_led_priv, cdev)
+
+/**
+ * mlxcpld_led_profile - system LED profile (defined per system class):
+ * @offset - offset for LED access in CPLD device
+ * @mask - mask for LED access in CPLD device
+ * @base_color - base color code
+ * @brightness - default brightness setting (on/off)
+ * @name - LED name
+**/
+struct mlxcpld_led_profile {
+	u8 offset;
+	u8 mask;
+	u8 base_color;
+	enum led_brightness brightness;
+	const char *name;
+};
+
+/**
+ * mlxcpld_led_pdata - system LED private data
+ * @pdev - platform device pointer
+ * @pled - LED class device instance
+ * @profile - system configuration profile
+ * @num_led_instances - number of LED instances
+ * @lock - device access lock
+**/
+struct mlxcpld_led_pdata {
+	struct platform_device *pdev;
+	struct mlxcpld_led_priv *pled;
+	struct mlxcpld_led_profile *profile;
+	int num_led_instances;
+	spinlock_t lock;
+};
+
+static struct mlxcpld_led_pdata *mlxcpld_led;
+
+/* Default profile fit the next Mellanox systems:
+ * "msx6710", "msx6720", "msb7700", "msn2700", "msx1410",
+ * "msn2410", "msb7800", "msn2740"
+ */
+static struct mlxcpld_led_profile mlxcpld_led_default_profile[] = {
+	{
+		0x21, 0xf0, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:fan1:green",
+	},
+	{
+		0x21, 0xf0, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:fan1:red",
+	},
+	{
+		0x21, 0x0f, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:fan2:green",
+	},
+	{
+		0x21, 0x0f, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:fan2:red",
+	},
+	{
+		0x22, 0xf0, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:fan3:green",
+	},
+	{
+		0x22, 0xf0, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:fan3:red",
+	},
+	{
+		0x22, 0x0f, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:fan4:green",
+	},
+	{
+		0x22, 0x0f, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:fan4:red",
+	},
+	{
+		0x20, 0x0f, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:psu:green",
+	},
+	{
+		0x20, 0x0f, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:psu:red",
+	},
+	{
+		0x20, 0xf0, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:status:green",
+	},
+	{
+		0x20, 0xf0, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:status:red",
+	},
+};
+
+/* Profile fit the Mellanox systems based on "msn2100" */
+static struct mlxcpld_led_profile mlxcpld_led_msn2100_profile[] = {
+	{
+		0x21, 0xf0, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:fan:green",
+	},
+	{
+		0x21, 0xf0, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:fan:red",
+	},
+	{
+		0x23, 0xf0, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:psu1:green",
+	},
+	{
+		0x23, 0xf0, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:psu1:red",
+	},
+	{
+		0x23, 0x0f, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:psu2:green",
+	},
+	{
+		0x23, 0x0f, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:psu2:red",
+	},
+	{
+		0x20, 0xf0, MLXCPLD_LED_GREEN_STATIC_ON, 1,
+		"mlxcpld:status:green",
+	},
+	{
+		0x20, 0xf0, MLXCPLD_LED_RED_STATIC_ON, LED_OFF,
+		"mlxcpld:status:red",
+	},
+	{
+		0x24, 0xf0, MLXCPLD_LED_GREEN_STATIC_ON, LED_OFF,
+		"mlxcpld:uid:blue",
+	},
+};
+
+enum mlxcpld_led_platform_types {
+	MLXCPLD_LED_PLATFORM_DEFAULT,
+	MLXCPLD_LED_PLATFORM_MSN2100,
+};
+
+static const char *mlx_product_names[] = {
+	"DEFAULT",
+	"MSN2100",
+};
+
+static enum
+mlxcpld_led_platform_types mlxcpld_led_platform_check_sys_type(void)
+{
+	const char *mlx_product_name;
+	int i;
+
+	mlx_product_name = dmi_get_system_info(DMI_PRODUCT_NAME);
+	if (!mlx_product_name)
+		return MLXCPLD_LED_PLATFORM_DEFAULT;
+
+	for (i = 1;  i < ARRAY_SIZE(mlx_product_names); i++) {
+		if (strstr(mlx_product_name, mlx_product_names[i]))
+			return i;
+	}
+
+	return MLXCPLD_LED_PLATFORM_DEFAULT;
+}
+
+static void mlxcpld_led_bus_access_func(u16 base, u8 offset, u8 rw_flag,
+					u8 *data)
+{
+	u32 addr = base + offset;
+
+	if (rw_flag == 0)
+		outb(*data, addr);
+	else
+		*data = inb(addr);
+}
+
+static void mlxcpld_led_store_hw(u8 mask, u8 off, u8 vset)
+{
+	u8 nib, val;
+
+	/*
+	 * Each LED is controlled through low or high nibble of the relevant
+	 * CPLD register. Register offset is specified by off parameter.
+	 * Parameter vset provides color code: 0x0 for off, 0x5 for solid red,
+	 * 0x6 for 3Hz blink red, 0xd for solid green, 0xe for 3Hz blink
+	 * green.
+	 * Parameter mask specifies which nibble is used for specific LED: mask
+	 * 0xf0 - lower nibble is to be used (bits from 0 to 3), mask 0x0f -
+	 * higher nibble (bits from 4 to 7).
+	 */
+	spin_lock(&mlxcpld_led->lock);
+	mlxcpld_led_bus_access_func(MLXPLAT_CPLD_LPC_REG_BASE_ADRR, off, 1,
+				    &val);
+	nib = (mask == 0xf0) ? vset : (vset << 4);
+	val = (val & mask) | nib;
+	mlxcpld_led_bus_access_func(MLXPLAT_CPLD_LPC_REG_BASE_ADRR, off, 0,
+				    &val);
+	spin_unlock(&mlxcpld_led->lock);
+}
+
+static void mlxcpld_led_brightness_set(struct led_classdev *led,
+				       enum led_brightness value)
+{
+	struct mlxcpld_led_priv *pled = cdev_to_priv(led);
+
+	if (value) {
+		mlxcpld_led_store_hw(pled->param.mask, pled->param.offset,
+				     pled->param.base_color);
+		return;
+	}
+
+	mlxcpld_led_store_hw(pled->param.mask, pled->param.offset,
+			     MLXCPLD_LED_IS_OFF);
+}
+
+static int mlxcpld_led_blink_set(struct led_classdev *led,
+				 unsigned long *delay_on,
+				 unsigned long *delay_off)
+{
+	struct mlxcpld_led_priv *pled = cdev_to_priv(led);
+
+	/*
+	 * HW supports two types of blinking: full (6Hz) and half (3Hz).
+	 * For delay on/off zero default setting 3Hz is used.
+	 */
+	if (!(*delay_on == 0 && *delay_off == 0) &&
+	    !(*delay_on == MLXCPLD_LED_BLINK_3HZ &&
+	      *delay_off == MLXCPLD_LED_BLINK_3HZ) &&
+	    !(*delay_on == MLXCPLD_LED_BLINK_6HZ &&
+	      *delay_off == MLXCPLD_LED_BLINK_6HZ))
+		return -EINVAL;
+
+	if (*delay_on == MLXCPLD_LED_BLINK_6HZ)
+		mlxcpld_led_store_hw(pled->param.mask, pled->param.offset,
+				     pled->param.base_color +
+				     MLXCPLD_LED_OFFSET_FULL);
+	else
+		mlxcpld_led_store_hw(pled->param.mask, pled->param.offset,
+				     pled->param.base_color +
+				     MLXCPLD_LED_OFFSET_HALF);
+
+	return 0;
+}
+
+static int mlxcpld_led_config(struct device *dev,
+			      struct mlxcpld_led_pdata *cpld)
+{
+	int i;
+	int err;
+
+	cpld->pled = devm_kzalloc(dev, sizeof(struct mlxcpld_led_priv) *
+				  cpld->num_led_instances, GFP_KERNEL);
+	if (!cpld->pled)
+		return -ENOMEM;
+
+	for (i = 0; i < cpld->num_led_instances; i++) {
+		cpld->pled[i].cdev.name = cpld->profile[i].name;
+		cpld->pled[i].cdev.brightness = cpld->profile[i].brightness;
+		cpld->pled[i].cdev.max_brightness = 1;
+		cpld->pled[i].cdev.brightness_set = mlxcpld_led_brightness_set;
+		cpld->pled[i].cdev.blink_set = mlxcpld_led_blink_set;
+		cpld->pled[i].cdev.flags = LED_CORE_SUSPENDRESUME;
+		err = devm_led_classdev_register(dev, &cpld->pled[i].cdev);
+		if (err)
+			return err;
+
+		cpld->pled[i].param.offset = mlxcpld_led->profile[i].offset;
+		cpld->pled[i].param.mask = mlxcpld_led->profile[i].mask;
+		cpld->pled[i].param.base_color =
+					mlxcpld_led->profile[i].base_color;
+
+		if (mlxcpld_led->profile[i].brightness)
+			mlxcpld_led_brightness_set(&cpld->pled[i].cdev,
+					mlxcpld_led->profile[i].brightness);
+	}
+
+	return 0;
+}
+
+static int __init mlxcpld_led_probe(struct platform_device *pdev)
+{
+	enum mlxcpld_led_platform_types mlxcpld_led_plat =
+					mlxcpld_led_platform_check_sys_type();
+
+	mlxcpld_led = devm_kzalloc(&pdev->dev, sizeof(*mlxcpld_led),
+				   GFP_KERNEL);
+	if (!mlxcpld_led)
+		return -ENOMEM;
+
+	mlxcpld_led->pdev = pdev;
+
+	switch (mlxcpld_led_plat) {
+	case MLXCPLD_LED_PLATFORM_MSN2100:
+		mlxcpld_led->profile = mlxcpld_led_msn2100_profile;
+		mlxcpld_led->num_led_instances =
+				ARRAY_SIZE(mlxcpld_led_msn2100_profile);
+		break;
+
+	default:
+		mlxcpld_led->profile = mlxcpld_led_default_profile;
+		mlxcpld_led->num_led_instances =
+				ARRAY_SIZE(mlxcpld_led_default_profile);
+		break;
+	}
+
+	spin_lock_init(&mlxcpld_led->lock);
+
+	return mlxcpld_led_config(&pdev->dev, mlxcpld_led);
+}
+
+static struct platform_driver mlxcpld_led_driver = {
+	.driver = {
+		.name	= KBUILD_MODNAME,
+	},
+};
+
+static int __init mlxcpld_led_init(void)
+{
+	struct platform_device *pdev;
+	int err;
+
+	pdev = platform_device_register_simple(KBUILD_MODNAME, -1, NULL, 0);
+	if (IS_ERR(pdev)) {
+		pr_err("Device allocation failed\n");
+		return PTR_ERR(pdev);
+	}
+
+	err = platform_driver_probe(&mlxcpld_led_driver, mlxcpld_led_probe);
+	if (err) {
+		pr_err("Probe platform driver failed\n");
+		platform_device_unregister(pdev);
+	}
+
+	return err;
+}
+
+static void __exit mlxcpld_led_exit(void)
+{
+	platform_device_unregister(mlxcpld_led->pdev);
+	platform_driver_unregister(&mlxcpld_led_driver);
+}
+
+module_init(mlxcpld_led_init);
+module_exit(mlxcpld_led_exit);
+
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox board LED driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:leds_mlxcpld");
diff --git a/drivers/leds/leds-pm8058.c b/drivers/leds/leds-pm8058.c
new file mode 100644
index 0000000..a526743
--- /dev/null
+++ b/drivers/leds/leds-pm8058.c
@@ -0,0 +1,191 @@
+/* Copyright (c) 2010, 2011, 2016 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/regmap.h>
+
+#define PM8058_LED_TYPE_COMMON	0x00
+#define PM8058_LED_TYPE_KEYPAD	0x01
+#define PM8058_LED_TYPE_FLASH	0x02
+
+#define PM8058_LED_TYPE_COMMON_MASK	0xf8
+#define PM8058_LED_TYPE_KEYPAD_MASK	0xf0
+#define PM8058_LED_TYPE_COMMON_SHIFT	3
+#define PM8058_LED_TYPE_KEYPAD_SHIFT	4
+
+struct pm8058_led {
+	struct regmap *map;
+	u32 reg;
+	u32 ledtype;
+	struct led_classdev cdev;
+};
+
+static void pm8058_led_set(struct led_classdev *cled,
+	enum led_brightness value)
+{
+	struct pm8058_led *led;
+	int ret = 0;
+	unsigned int mask = 0;
+	unsigned int val = 0;
+
+	led = container_of(cled, struct pm8058_led, cdev);
+	switch (led->ledtype) {
+	case PM8058_LED_TYPE_COMMON:
+		mask = PM8058_LED_TYPE_COMMON_MASK;
+		val = value << PM8058_LED_TYPE_COMMON_SHIFT;
+		break;
+	case PM8058_LED_TYPE_KEYPAD:
+	case PM8058_LED_TYPE_FLASH:
+		mask = PM8058_LED_TYPE_KEYPAD_MASK;
+		val = value << PM8058_LED_TYPE_KEYPAD_SHIFT;
+		break;
+	default:
+		break;
+	}
+
+	ret = regmap_update_bits(led->map, led->reg, mask, val);
+	if (ret)
+		pr_err("Failed to set LED brightness\n");
+}
+
+static enum led_brightness pm8058_led_get(struct led_classdev *cled)
+{
+	struct pm8058_led *led;
+	int ret;
+	unsigned int val;
+
+	led = container_of(cled, struct pm8058_led, cdev);
+
+	ret = regmap_read(led->map, led->reg, &val);
+	if (ret) {
+		pr_err("Failed to get LED brightness\n");
+		return LED_OFF;
+	}
+
+	switch (led->ledtype) {
+	case PM8058_LED_TYPE_COMMON:
+		val &= PM8058_LED_TYPE_COMMON_MASK;
+		val >>= PM8058_LED_TYPE_COMMON_SHIFT;
+		break;
+	case PM8058_LED_TYPE_KEYPAD:
+	case PM8058_LED_TYPE_FLASH:
+		val &= PM8058_LED_TYPE_KEYPAD_MASK;
+		val >>= PM8058_LED_TYPE_KEYPAD_SHIFT;
+		break;
+	default:
+		val = LED_OFF;
+		break;
+	}
+
+	return val;
+}
+
+static int pm8058_led_probe(struct platform_device *pdev)
+{
+	struct pm8058_led *led;
+	struct device_node *np = pdev->dev.of_node;
+	int ret;
+	struct regmap *map;
+	const char *state;
+	enum led_brightness maxbright;
+
+	led = devm_kzalloc(&pdev->dev, sizeof(*led), GFP_KERNEL);
+	if (!led)
+		return -ENOMEM;
+
+	led->ledtype = (u32)of_device_get_match_data(&pdev->dev);
+
+	map = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!map) {
+		dev_err(&pdev->dev, "Parent regmap unavailable.\n");
+		return -ENXIO;
+	}
+	led->map = map;
+
+	ret = of_property_read_u32(np, "reg", &led->reg);
+	if (ret) {
+		dev_err(&pdev->dev, "no register offset specified\n");
+		return -EINVAL;
+	}
+
+	/* Use label else node name */
+	led->cdev.name = of_get_property(np, "label", NULL) ? : np->name;
+	led->cdev.default_trigger =
+		of_get_property(np, "linux,default-trigger", NULL);
+	led->cdev.brightness_set = pm8058_led_set;
+	led->cdev.brightness_get = pm8058_led_get;
+	if (led->ledtype == PM8058_LED_TYPE_COMMON)
+		maxbright = 31; /* 5 bits */
+	else
+		maxbright = 15; /* 4 bits */
+	led->cdev.max_brightness = maxbright;
+
+	state = of_get_property(np, "default-state", NULL);
+	if (state) {
+		if (!strcmp(state, "keep")) {
+			led->cdev.brightness = pm8058_led_get(&led->cdev);
+		} else if (!strcmp(state, "on")) {
+			led->cdev.brightness = maxbright;
+			pm8058_led_set(&led->cdev, maxbright);
+		} else {
+			led->cdev.brightness = LED_OFF;
+			pm8058_led_set(&led->cdev, LED_OFF);
+		}
+	}
+
+	if (led->ledtype == PM8058_LED_TYPE_KEYPAD ||
+	    led->ledtype == PM8058_LED_TYPE_FLASH)
+		led->cdev.flags	= LED_CORE_SUSPENDRESUME;
+
+	ret = devm_led_classdev_register(&pdev->dev, &led->cdev);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to register led \"%s\"\n",
+			led->cdev.name);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id pm8058_leds_id_table[] = {
+	{
+		.compatible = "qcom,pm8058-led",
+		.data = (void *)PM8058_LED_TYPE_COMMON
+	},
+	{
+		.compatible = "qcom,pm8058-keypad-led",
+		.data = (void *)PM8058_LED_TYPE_KEYPAD
+	},
+	{
+		.compatible = "qcom,pm8058-flash-led",
+		.data = (void *)PM8058_LED_TYPE_FLASH
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, pm8058_leds_id_table);
+
+static struct platform_driver pm8058_led_driver = {
+	.probe		= pm8058_led_probe,
+	.driver		= {
+		.name	= "pm8058-leds",
+		.of_match_table = pm8058_leds_id_table,
+	},
+};
+module_platform_driver(pm8058_led_driver);
+
+MODULE_DESCRIPTION("PM8058 LEDs driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:pm8058-leds");
diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 9ebd2cf..c784ddcd 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -1171,27 +1171,10 @@
 	.nodename	= "lightnvm/control",
 	.fops		= &_ctl_fops,
 };
+module_misc_device(_nvm_misc);
 
 MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR);
 
-static int __init nvm_mod_init(void)
-{
-	int ret;
-
-	ret = misc_register(&_nvm_misc);
-	if (ret)
-		pr_err("nvm: misc_register failed for control device");
-
-	return ret;
-}
-
-static void __exit nvm_mod_exit(void)
-{
-	misc_deregister(&_nvm_misc);
-}
-
 MODULE_AUTHOR("Matias Bjorling <m@bjorling.me>");
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION("0.1");
-module_init(nvm_mod_init);
-module_exit(nvm_mod_exit);
diff --git a/drivers/macintosh/ams/ams-i2c.c b/drivers/macintosh/ams/ams-i2c.c
index 978eda8..8a3ba56 100644
--- a/drivers/macintosh/ams/ams-i2c.c
+++ b/drivers/macintosh/ams/ams-i2c.c
@@ -73,7 +73,6 @@
 static struct i2c_driver ams_i2c_driver = {
 	.driver = {
 		.name   = "ams",
-		.owner  = THIS_MODULE,
 	},
 	.probe          = ams_i2c_probe,
 	.remove         = ams_i2c_remove,
diff --git a/drivers/macintosh/windfarm_pm112.c b/drivers/macintosh/windfarm_pm112.c
index 3024685..96d16fc 100644
--- a/drivers/macintosh/windfarm_pm112.c
+++ b/drivers/macintosh/windfarm_pm112.c
@@ -668,7 +668,6 @@
 	.remove = wf_pm112_remove,
 	.driver = {
 		.name = "windfarm",
-		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/macintosh/windfarm_pm72.c b/drivers/macintosh/windfarm_pm72.c
index 2f506b9..e88cfb3 100644
--- a/drivers/macintosh/windfarm_pm72.c
+++ b/drivers/macintosh/windfarm_pm72.c
@@ -789,7 +789,6 @@
 	.remove	= wf_pm72_remove,
 	.driver	= {
 		.name = "windfarm",
-		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/macintosh/windfarm_rm31.c b/drivers/macintosh/windfarm_rm31.c
index 82fc86a..bdfcb8a 100644
--- a/drivers/macintosh/windfarm_rm31.c
+++ b/drivers/macintosh/windfarm_rm31.c
@@ -682,7 +682,6 @@
 	.remove	= wf_rm31_remove,
 	.driver	= {
 		.name = "windfarm",
-		.owner	= THIS_MODULE,
 	},
 };
 
diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index 97c3729..7817d40 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -127,6 +127,7 @@
 config BCM_PDC_MBOX
 	tristate "Broadcom PDC Mailbox"
 	depends on ARM64 || COMPILE_TEST
+	depends on HAS_DMA
 	default ARCH_BCM_IPROC
 	help
 	  Mailbox implementation for the Broadcom PDC ring manager,
diff --git a/drivers/mailbox/bcm-pdc-mailbox.c b/drivers/mailbox/bcm-pdc-mailbox.c
index cbe0c1e..c19dd82 100644
--- a/drivers/mailbox/bcm-pdc-mailbox.c
+++ b/drivers/mailbox/bcm-pdc-mailbox.c
@@ -469,7 +469,7 @@
  * this directory for a SPU.
  * @pdcs: PDC state structure
  */
-void pdc_setup_debugfs(struct pdc_state *pdcs)
+static void pdc_setup_debugfs(struct pdc_state *pdcs)
 {
 	char spu_stats_name[16];
 
@@ -485,7 +485,7 @@
 						  &pdc_debugfs_stats);
 }
 
-void pdc_free_debugfs(void)
+static void pdc_free_debugfs(void)
 {
 	if (debugfs_dir && simple_empty(debugfs_dir)) {
 		debugfs_remove_recursive(debugfs_dir);
@@ -1191,10 +1191,11 @@
 {
 	struct pdc_state *pdcs = chan->con_priv;
 
-	if (pdcs)
-		dev_dbg(&pdcs->pdev->dev,
-			"Shutdown mailbox channel for PDC %u", pdcs->pdc_idx);
+	if (!pdcs)
+		return;
 
+	dev_dbg(&pdcs->pdev->dev,
+		"Shutdown mailbox channel for PDC %u", pdcs->pdc_idx);
 	pdc_ring_free(pdcs);
 }
 
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index 043828d..08c87fa 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -59,6 +59,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/init.h>
+#include <linux/interrupt.h>
 #include <linux/list.h>
 #include <linux/platform_device.h>
 #include <linux/mailbox_controller.h>
@@ -68,11 +69,16 @@
 #include "mailbox.h"
 
 #define MAX_PCC_SUBSPACES	256
+#define MBOX_IRQ_NAME		"pcc-mbox"
 
 static struct mbox_chan *pcc_mbox_channels;
 
 /* Array of cached virtual address for doorbell registers */
 static void __iomem **pcc_doorbell_vaddr;
+/* Array of cached virtual address for doorbell ack registers */
+static void __iomem **pcc_doorbell_ack_vaddr;
+/* Array of doorbell interrupts */
+static int *pcc_doorbell_irq;
 
 static struct mbox_controller pcc_mbox_ctrl = {};
 /**
@@ -91,79 +97,6 @@
 	return &pcc_mbox_channels[id];
 }
 
-/**
- * pcc_mbox_request_channel - PCC clients call this function to
- *		request a pointer to their PCC subspace, from which they
- *		can get the details of communicating with the remote.
- * @cl: Pointer to Mailbox client, so we know where to bind the
- *		Channel.
- * @subspace_id: The PCC Subspace index as parsed in the PCC client
- *		ACPI package. This is used to lookup the array of PCC
- *		subspaces as parsed by the PCC Mailbox controller.
- *
- * Return: Pointer to the Mailbox Channel if successful or
- *		ERR_PTR.
- */
-struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
-		int subspace_id)
-{
-	struct device *dev = pcc_mbox_ctrl.dev;
-	struct mbox_chan *chan;
-	unsigned long flags;
-
-	/*
-	 * Each PCC Subspace is a Mailbox Channel.
-	 * The PCC Clients get their PCC Subspace ID
-	 * from their own tables and pass it here.
-	 * This returns a pointer to the PCC subspace
-	 * for the Client to operate on.
-	 */
-	chan = get_pcc_channel(subspace_id);
-
-	if (IS_ERR(chan) || chan->cl) {
-		dev_err(dev, "Channel not found for idx: %d\n", subspace_id);
-		return ERR_PTR(-EBUSY);
-	}
-
-	spin_lock_irqsave(&chan->lock, flags);
-	chan->msg_free = 0;
-	chan->msg_count = 0;
-	chan->active_req = NULL;
-	chan->cl = cl;
-	init_completion(&chan->tx_complete);
-
-	if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
-		chan->txdone_method |= TXDONE_BY_ACK;
-
-	spin_unlock_irqrestore(&chan->lock, flags);
-
-	return chan;
-}
-EXPORT_SYMBOL_GPL(pcc_mbox_request_channel);
-
-/**
- * pcc_mbox_free_channel - Clients call this to free their Channel.
- *
- * @chan: Pointer to the mailbox channel as returned by
- *		pcc_mbox_request_channel()
- */
-void pcc_mbox_free_channel(struct mbox_chan *chan)
-{
-	unsigned long flags;
-
-	if (!chan || !chan->cl)
-		return;
-
-	spin_lock_irqsave(&chan->lock, flags);
-	chan->cl = NULL;
-	chan->active_req = NULL;
-	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
-		chan->txdone_method = TXDONE_BY_POLL;
-
-	spin_unlock_irqrestore(&chan->lock, flags);
-}
-EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);
-
 /*
  * PCC can be used with perf critical drivers such as CPPC
  * So it makes sense to locally cache the virtual address and
@@ -225,6 +158,167 @@
 }
 
 /**
+ * pcc_map_interrupt - Map a PCC subspace GSI to a linux IRQ number
+ * @interrupt: GSI number.
+ * @flags: interrupt flags
+ *
+ * Returns: a valid linux IRQ number on success
+ *		0 or -EINVAL on failure
+ */
+static int pcc_map_interrupt(u32 interrupt, u32 flags)
+{
+	int trigger, polarity;
+
+	if (!interrupt)
+		return 0;
+
+	trigger = (flags & ACPI_PCCT_INTERRUPT_MODE) ? ACPI_EDGE_SENSITIVE
+			: ACPI_LEVEL_SENSITIVE;
+
+	polarity = (flags & ACPI_PCCT_INTERRUPT_POLARITY) ? ACPI_ACTIVE_LOW
+			: ACPI_ACTIVE_HIGH;
+
+	return acpi_register_gsi(NULL, interrupt, trigger, polarity);
+}
+
+/**
+ * pcc_mbox_irq - PCC mailbox interrupt handler
+ */
+static irqreturn_t pcc_mbox_irq(int irq, void *p)
+{
+	struct acpi_generic_address *doorbell_ack;
+	struct acpi_pcct_hw_reduced *pcct_ss;
+	struct mbox_chan *chan = p;
+	u64 doorbell_ack_preserve;
+	u64 doorbell_ack_write;
+	u64 doorbell_ack_val;
+	int ret;
+
+	pcct_ss = chan->con_priv;
+
+	mbox_chan_received_data(chan, NULL);
+
+	if (pcct_ss->header.type == ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2) {
+		struct acpi_pcct_hw_reduced_type2 *pcct2_ss = chan->con_priv;
+		u32 id = chan - pcc_mbox_channels;
+
+		doorbell_ack = &pcct2_ss->doorbell_ack_register;
+		doorbell_ack_preserve = pcct2_ss->ack_preserve_mask;
+		doorbell_ack_write = pcct2_ss->ack_write_mask;
+
+		ret = read_register(pcc_doorbell_ack_vaddr[id],
+				    &doorbell_ack_val,
+				    doorbell_ack->bit_width);
+		if (ret)
+			return IRQ_NONE;
+
+		ret = write_register(pcc_doorbell_ack_vaddr[id],
+				     (doorbell_ack_val & doorbell_ack_preserve)
+					| doorbell_ack_write,
+				     doorbell_ack->bit_width);
+		if (ret)
+			return IRQ_NONE;
+	}
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * pcc_mbox_request_channel - PCC clients call this function to
+ *		request a pointer to their PCC subspace, from which they
+ *		can get the details of communicating with the remote.
+ * @cl: Pointer to Mailbox client, so we know where to bind the
+ *		Channel.
+ * @subspace_id: The PCC Subspace index as parsed in the PCC client
+ *		ACPI package. This is used to lookup the array of PCC
+ *		subspaces as parsed by the PCC Mailbox controller.
+ *
+ * Return: Pointer to the Mailbox Channel if successful or
+ *		ERR_PTR.
+ */
+struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
+		int subspace_id)
+{
+	struct device *dev = pcc_mbox_ctrl.dev;
+	struct mbox_chan *chan;
+	unsigned long flags;
+
+	/*
+	 * Each PCC Subspace is a Mailbox Channel.
+	 * The PCC Clients get their PCC Subspace ID
+	 * from their own tables and pass it here.
+	 * This returns a pointer to the PCC subspace
+	 * for the Client to operate on.
+	 */
+	chan = get_pcc_channel(subspace_id);
+
+	if (IS_ERR(chan) || chan->cl) {
+		dev_err(dev, "Channel not found for idx: %d\n", subspace_id);
+		return ERR_PTR(-EBUSY);
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->msg_free = 0;
+	chan->msg_count = 0;
+	chan->active_req = NULL;
+	chan->cl = cl;
+	init_completion(&chan->tx_complete);
+
+	if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
+		chan->txdone_method |= TXDONE_BY_ACK;
+
+	if (pcc_doorbell_irq[subspace_id] > 0) {
+		int rc;
+
+		rc = devm_request_irq(dev, pcc_doorbell_irq[subspace_id],
+				      pcc_mbox_irq, 0, MBOX_IRQ_NAME, chan);
+		if (unlikely(rc)) {
+			dev_err(dev, "failed to register PCC interrupt %d\n",
+				pcc_doorbell_irq[subspace_id]);
+			chan = ERR_PTR(rc);
+		}
+	}
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(pcc_mbox_request_channel);
+
+/**
+ * pcc_mbox_free_channel - Clients call this to free their Channel.
+ *
+ * @chan: Pointer to the mailbox channel as returned by
+ *		pcc_mbox_request_channel()
+ */
+void pcc_mbox_free_channel(struct mbox_chan *chan)
+{
+	u32 id = chan - pcc_mbox_channels;
+	unsigned long flags;
+
+	if (!chan || !chan->cl)
+		return;
+
+	if (id >= pcc_mbox_ctrl.num_chans) {
+		pr_debug("pcc_mbox_free_channel: Invalid mbox_chan passed\n");
+		return;
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->cl = NULL;
+	chan->active_req = NULL;
+	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
+		chan->txdone_method = TXDONE_BY_POLL;
+
+	if (pcc_doorbell_irq[id] > 0)
+		devm_free_irq(chan->mbox->dev, pcc_doorbell_irq[id], chan);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+EXPORT_SYMBOL_GPL(pcc_mbox_free_channel);
+
+
+/**
  * pcc_send_data - Called from Mailbox Controller code. Used
  *		here only to ring the channel doorbell. The PCC client
  *		specific read/write is done in the client driver in
@@ -296,8 +390,10 @@
 	if (pcc_mbox_ctrl.num_chans <= MAX_PCC_SUBSPACES) {
 		pcct_ss = (struct acpi_pcct_hw_reduced *) header;
 
-		if (pcct_ss->header.type !=
-				ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE) {
+		if ((pcct_ss->header.type !=
+				ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE)
+		    && (pcct_ss->header.type !=
+				ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2)) {
 			pr_err("Incorrect PCC Subspace type detected\n");
 			return -EINVAL;
 		}
@@ -307,6 +403,43 @@
 }
 
 /**
+ * pcc_parse_subspace_irq - Parse the PCC IRQ and PCC ACK register
+ *		There should be one entry per PCC client.
+ * @id: PCC subspace index.
+ * @pcct_ss: Pointer to the ACPI subtable header under the PCCT.
+ *
+ * Return: 0 for Success, else errno.
+ *
+ * This gets called for each entry in the PCC table.
+ */
+static int pcc_parse_subspace_irq(int id,
+				  struct acpi_pcct_hw_reduced *pcct_ss)
+{
+	pcc_doorbell_irq[id] = pcc_map_interrupt(pcct_ss->doorbell_interrupt,
+						 (u32)pcct_ss->flags);
+	if (pcc_doorbell_irq[id] <= 0) {
+		pr_err("PCC GSI %d not registered\n",
+		       pcct_ss->doorbell_interrupt);
+		return -EINVAL;
+	}
+
+	if (pcct_ss->header.type
+		== ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2) {
+		struct acpi_pcct_hw_reduced_type2 *pcct2_ss = (void *)pcct_ss;
+
+		pcc_doorbell_ack_vaddr[id] = acpi_os_ioremap(
+				pcct2_ss->doorbell_ack_register.address,
+				pcct2_ss->doorbell_ack_register.bit_width / 8);
+		if (!pcc_doorbell_ack_vaddr[id]) {
+			pr_err("Failed to ioremap PCC ACK register\n");
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+/**
  * acpi_pcc_probe - Parse the ACPI tree for the PCCT.
  *
  * Return: 0 for Success, else errno.
@@ -316,7 +449,9 @@
 	acpi_size pcct_tbl_header_size;
 	struct acpi_table_header *pcct_tbl;
 	struct acpi_subtable_header *pcct_entry;
-	int count, i;
+	struct acpi_table_pcct *acpi_pcct_tbl;
+	int count, i, rc;
+	int sum = 0;
 	acpi_status status = AE_OK;
 
 	/* Search for PCCT */
@@ -333,37 +468,66 @@
 			sizeof(struct acpi_table_pcct),
 			ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE,
 			parse_pcc_subspace, MAX_PCC_SUBSPACES);
+	sum += (count > 0) ? count : 0;
 
-	if (count <= 0) {
+	count = acpi_table_parse_entries(ACPI_SIG_PCCT,
+			sizeof(struct acpi_table_pcct),
+			ACPI_PCCT_TYPE_HW_REDUCED_SUBSPACE_TYPE2,
+			parse_pcc_subspace, MAX_PCC_SUBSPACES);
+	sum += (count > 0) ? count : 0;
+
+	if (sum == 0 || sum >= MAX_PCC_SUBSPACES) {
 		pr_err("Error parsing PCC subspaces from PCCT\n");
 		return -EINVAL;
 	}
 
 	pcc_mbox_channels = kzalloc(sizeof(struct mbox_chan) *
-			count, GFP_KERNEL);
-
+			sum, GFP_KERNEL);
 	if (!pcc_mbox_channels) {
 		pr_err("Could not allocate space for PCC mbox channels\n");
 		return -ENOMEM;
 	}
 
-	pcc_doorbell_vaddr = kcalloc(count, sizeof(void *), GFP_KERNEL);
+	pcc_doorbell_vaddr = kcalloc(sum, sizeof(void *), GFP_KERNEL);
 	if (!pcc_doorbell_vaddr) {
-		kfree(pcc_mbox_channels);
-		return -ENOMEM;
+		rc = -ENOMEM;
+		goto err_free_mbox;
+	}
+
+	pcc_doorbell_ack_vaddr = kcalloc(sum, sizeof(void *), GFP_KERNEL);
+	if (!pcc_doorbell_ack_vaddr) {
+		rc = -ENOMEM;
+		goto err_free_db_vaddr;
+	}
+
+	pcc_doorbell_irq = kcalloc(sum, sizeof(int), GFP_KERNEL);
+	if (!pcc_doorbell_irq) {
+		rc = -ENOMEM;
+		goto err_free_db_ack_vaddr;
 	}
 
 	/* Point to the first PCC subspace entry */
 	pcct_entry = (struct acpi_subtable_header *) (
 		(unsigned long) pcct_tbl + sizeof(struct acpi_table_pcct));
 
-	for (i = 0; i < count; i++) {
+	acpi_pcct_tbl = (struct acpi_table_pcct *) pcct_tbl;
+	if (acpi_pcct_tbl->flags & ACPI_PCCT_DOORBELL)
+		pcc_mbox_ctrl.txdone_irq = true;
+
+	for (i = 0; i < sum; i++) {
 		struct acpi_generic_address *db_reg;
 		struct acpi_pcct_hw_reduced *pcct_ss;
 		pcc_mbox_channels[i].con_priv = pcct_entry;
 
+		pcct_ss = (struct acpi_pcct_hw_reduced *) pcct_entry;
+
+		if (pcc_mbox_ctrl.txdone_irq) {
+			rc = pcc_parse_subspace_irq(i, pcct_ss);
+			if (rc < 0)
+				goto err;
+		}
+
 		/* If doorbell is in system memory cache the virt address */
-		pcct_ss = (struct acpi_pcct_hw_reduced *)pcct_entry;
 		db_reg = &pcct_ss->doorbell_register;
 		if (db_reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY)
 			pcc_doorbell_vaddr[i] = acpi_os_ioremap(db_reg->address,
@@ -372,11 +536,21 @@
 			((unsigned long) pcct_entry + pcct_entry->length);
 	}
 
-	pcc_mbox_ctrl.num_chans = count;
+	pcc_mbox_ctrl.num_chans = sum;
 
 	pr_info("Detected %d PCC Subspaces\n", pcc_mbox_ctrl.num_chans);
 
 	return 0;
+
+err:
+	kfree(pcc_doorbell_irq);
+err_free_db_ack_vaddr:
+	kfree(pcc_doorbell_ack_vaddr);
+err_free_db_vaddr:
+	kfree(pcc_doorbell_vaddr);
+err_free_mbox:
+	kfree(pcc_mbox_channels);
+	return rc;
 }
 
 /**
diff --git a/drivers/mcb/Kconfig b/drivers/mcb/Kconfig
index e9a6976..76d9c51 100644
--- a/drivers/mcb/Kconfig
+++ b/drivers/mcb/Kconfig
@@ -28,4 +28,13 @@
 
 	   If build as a module, the module is called mcb-pci.ko
 
+config MCB_LPC
+	   tristate "LPC (non PCI) based MCB carrier"
+	   default n
+	   help
+
+	   This is a MCB carrier on a LPC or non PCI device.
+
+	   If build as a module, the module is called mcb-lpc.ko
+
 endif # MCB
diff --git a/drivers/mcb/Makefile b/drivers/mcb/Makefile
index 1ae1413..bcc7745 100644
--- a/drivers/mcb/Makefile
+++ b/drivers/mcb/Makefile
@@ -5,3 +5,4 @@
 mcb-y += mcb-parse.o
 
 obj-$(CONFIG_MCB_PCI) += mcb-pci.o
+obj-$(CONFIG_MCB_LPC) += mcb-lpc.o
diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
index 6f2c852..921a5d2 100644
--- a/drivers/mcb/mcb-core.c
+++ b/drivers/mcb/mcb-core.c
@@ -233,6 +233,7 @@
 	dev->dev.bus = &mcb_bus_type;
 	dev->dev.parent = bus->dev.parent;
 	dev->dev.release = mcb_release_dev;
+	dev->dma_dev = bus->carrier;
 
 	device_id = dev->id;
 	dev_set_name(&dev->dev, "mcb%d-16z%03d-%d:%d:%d",
@@ -369,7 +370,6 @@
 	if (!dev)
 		return NULL;
 
-	INIT_LIST_HEAD(&dev->bus_list);
 	dev->bus = bus;
 
 	return dev;
@@ -405,20 +405,6 @@
 	return 0;
 }
 
-static int __mcb_bus_add_child(struct device *dev, void *data)
-{
-	struct mcb_device *mdev = to_mcb_device(dev);
-	struct mcb_bus *child;
-
-	BUG_ON(!mdev->is_added);
-	child = mdev->subordinate;
-
-	if (child)
-		mcb_bus_add_devices(child);
-
-	return 0;
-}
-
 /**
  * mcb_bus_add_devices() - Add devices in the bus' internal device list
  * @bus: The @mcb_bus we add the devices
@@ -428,8 +414,6 @@
 void mcb_bus_add_devices(const struct mcb_bus *bus)
 {
 	bus_for_each_dev(&mcb_bus_type, NULL, NULL, __mcb_bus_add_devices);
-	bus_for_each_dev(&mcb_bus_type, NULL, NULL, __mcb_bus_add_child);
-
 }
 EXPORT_SYMBOL_GPL(mcb_bus_add_devices);
 
diff --git a/drivers/mcb/mcb-internal.h b/drivers/mcb/mcb-internal.h
index 5254e02..d6e6933 100644
--- a/drivers/mcb/mcb-internal.h
+++ b/drivers/mcb/mcb-internal.h
@@ -112,6 +112,15 @@
 	u32 size;
 } __packed;
 
+struct chameleon_bar {
+	u32 addr;
+	u32 size;
+};
+
+#define BAR_CNT(x) ((x) & 0x07)
+#define CHAMELEON_BAR_MAX	6
+#define BAR_DESC_SIZE(x)	((x) * sizeof(struct chameleon_bar) + sizeof(__le32))
+
 int chameleon_parse_cells(struct mcb_bus *bus, phys_addr_t mapbase,
 			  void __iomem *base);
 
diff --git a/drivers/mcb/mcb-lpc.c b/drivers/mcb/mcb-lpc.c
new file mode 100644
index 0000000..d072c08
--- /dev/null
+++ b/drivers/mcb/mcb-lpc.c
@@ -0,0 +1,158 @@
+/*
+ * MEN Chameleon Bus.
+ *
+ * Copyright (C) 2014 MEN Mikroelektronik GmbH (www.men.de)
+ * Author: Andreas Werner <andreas.werner@men.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/mcb.h>
+#include <linux/io.h>
+#include "mcb-internal.h"
+
+struct priv {
+	struct mcb_bus *bus;
+	struct resource *mem;
+	void __iomem *base;
+};
+
+static int mcb_lpc_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct priv *priv;
+	int ret = 0;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!priv->mem) {
+		dev_err(&pdev->dev, "No Memory resource\n");
+		return -ENODEV;
+	}
+
+	res = devm_request_mem_region(&pdev->dev, priv->mem->start,
+				      resource_size(priv->mem),
+				      KBUILD_MODNAME);
+	if (!res) {
+		dev_err(&pdev->dev, "Failed to request IO memory\n");
+		return -EBUSY;
+	}
+
+	priv->base = devm_ioremap(&pdev->dev, priv->mem->start,
+				  resource_size(priv->mem));
+	if (!priv->base) {
+		dev_err(&pdev->dev, "Cannot ioremap\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, priv);
+
+	priv->bus = mcb_alloc_bus(&pdev->dev);
+	if (IS_ERR(priv->bus))
+		return PTR_ERR(priv->bus);
+
+	ret = chameleon_parse_cells(priv->bus, priv->mem->start, priv->base);
+	if (ret < 0) {
+		mcb_release_bus(priv->bus);
+		return ret;
+	}
+
+	dev_dbg(&pdev->dev, "Found %d cells\n", ret);
+
+	mcb_bus_add_devices(priv->bus);
+
+	return 0;
+
+}
+
+static int mcb_lpc_remove(struct platform_device *pdev)
+{
+	struct priv *priv = platform_get_drvdata(pdev);
+
+	mcb_release_bus(priv->bus);
+
+	return 0;
+}
+
+static struct platform_device *mcb_lpc_pdev;
+
+static int mcb_lpc_create_platform_device(const struct dmi_system_id *id)
+{
+	struct resource *res = id->driver_data;
+	int ret;
+
+	mcb_lpc_pdev = platform_device_alloc("mcb-lpc", -1);
+	if (!mcb_lpc_pdev)
+		return -ENOMEM;
+
+	ret = platform_device_add_resources(mcb_lpc_pdev, res, 1);
+	if (ret)
+		goto out_put;
+
+	ret = platform_device_add(mcb_lpc_pdev);
+	if (ret)
+		goto out_put;
+
+	return 0;
+
+out_put:
+	platform_device_put(mcb_lpc_pdev);
+	return ret;
+}
+
+static struct resource sc24_fpga_resource = {
+	.start = 0xe000e000,
+	.end = 0xe000e000 + CHAM_HEADER_SIZE,
+	.flags = IORESOURCE_MEM,
+};
+
+static struct platform_driver mcb_lpc_driver = {
+	.driver		= {
+		.name = "mcb-lpc",
+	},
+	.probe		= mcb_lpc_probe,
+	.remove		= mcb_lpc_remove,
+};
+
+static const struct dmi_system_id mcb_lpc_dmi_table[] = {
+	{
+		.ident = "SC24",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MEN"),
+			DMI_MATCH(DMI_PRODUCT_VERSION, "14SC24"),
+		},
+		.driver_data = (void *)&sc24_fpga_resource,
+		.callback = mcb_lpc_create_platform_device,
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(dmi, mcb_lpc_dmi_table);
+
+static int __init mcb_lpc_init(void)
+{
+	if (!dmi_check_system(mcb_lpc_dmi_table))
+		return -ENODEV;
+
+	return platform_driver_register(&mcb_lpc_driver);
+}
+
+static void __exit mcb_lpc_exit(void)
+{
+	platform_device_unregister(mcb_lpc_pdev);
+	platform_driver_unregister(&mcb_lpc_driver);
+}
+
+module_init(mcb_lpc_init);
+module_exit(mcb_lpc_exit);
+
+MODULE_AUTHOR("Andreas Werner <andreas.werner@men.de>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MCB over LPC support");
diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c
index dbecbed..4ca2739 100644
--- a/drivers/mcb/mcb-parse.c
+++ b/drivers/mcb/mcb-parse.c
@@ -26,19 +26,20 @@
 }
 
 static int chameleon_parse_bdd(struct mcb_bus *bus,
-			phys_addr_t mapbase,
+			struct chameleon_bar *cb,
 			void __iomem *base)
 {
 	return 0;
 }
 
 static int chameleon_parse_gdd(struct mcb_bus *bus,
-			phys_addr_t mapbase,
-			void __iomem *base)
+			struct chameleon_bar *cb,
+			void __iomem *base, int bar_count)
 {
 	struct chameleon_gdd __iomem *gdd =
 		(struct chameleon_gdd __iomem *) base;
 	struct mcb_device *mdev;
+	u32 dev_mapbase;
 	u32 offset;
 	u32 size;
 	int ret;
@@ -61,13 +62,39 @@
 	mdev->group = GDD_GRP(reg2);
 	mdev->inst = GDD_INS(reg2);
 
+	/*
+	 * If the BAR is missing, dev_mapbase is zero, or if the
+	 * device is IO mapped we just print a warning and go on with the
+	 * next device, instead of completely stop the gdd parser
+	 */
+	if (mdev->bar > bar_count - 1) {
+		pr_info("No BAR for 16z%03d\n", mdev->id);
+		ret = 0;
+		goto err;
+	}
+
+	dev_mapbase = cb[mdev->bar].addr;
+	if (!dev_mapbase) {
+		pr_info("BAR not assigned for 16z%03d\n", mdev->id);
+		ret = 0;
+		goto err;
+	}
+
+	if (dev_mapbase & 0x01) {
+		pr_info("IO mapped Device (16z%03d) not yet supported\n",
+			mdev->id);
+		ret = 0;
+		goto err;
+	}
+
 	pr_debug("Found a 16z%03d\n", mdev->id);
 
 	mdev->irq.start = GDD_IRQ(reg1);
 	mdev->irq.end = GDD_IRQ(reg1);
 	mdev->irq.flags = IORESOURCE_IRQ;
 
-	mdev->mem.start = mapbase + offset;
+	mdev->mem.start = dev_mapbase + offset;
+
 	mdev->mem.end = mdev->mem.start + size - 1;
 	mdev->mem.flags = IORESOURCE_MEM;
 
@@ -85,13 +112,76 @@
 	return ret;
 }
 
+static void chameleon_parse_bar(void __iomem *base,
+				struct chameleon_bar *cb, int bar_count)
+{
+	char __iomem *p = base;
+	int i;
+
+	/* skip reg1 */
+	p += sizeof(__le32);
+
+	for (i = 0; i < bar_count; i++) {
+		cb[i].addr = readl(p);
+		cb[i].size = readl(p + 4);
+
+		p += sizeof(struct chameleon_bar);
+	}
+}
+
+static int chameleon_get_bar(char __iomem **base, phys_addr_t mapbase,
+			     struct chameleon_bar **cb)
+{
+	struct chameleon_bar *c;
+	int bar_count;
+	__le32 reg;
+	u32 dtype;
+
+	/*
+	 * For those devices which are not connected
+	 * to the PCI Bus (e.g. LPC) there is a bar
+	 * descriptor located directly after the
+	 * chameleon header. This header is comparable
+	 * to a PCI header.
+	 */
+	dtype = get_next_dtype(*base);
+	if (dtype == CHAMELEON_DTYPE_BAR) {
+		reg = readl(*base);
+
+		bar_count = BAR_CNT(reg);
+		if (bar_count <= 0 && bar_count > CHAMELEON_BAR_MAX)
+			return -ENODEV;
+
+		c = kcalloc(bar_count, sizeof(struct chameleon_bar),
+			    GFP_KERNEL);
+		if (!c)
+			return -ENOMEM;
+
+		chameleon_parse_bar(*base, c, bar_count);
+		*base += BAR_DESC_SIZE(bar_count);
+	} else {
+		c = kzalloc(sizeof(struct chameleon_bar), GFP_KERNEL);
+		if (!c)
+			return -ENOMEM;
+
+		bar_count = 1;
+		c->addr = mapbase;
+	}
+
+	*cb = c;
+
+	return bar_count;
+}
+
 int chameleon_parse_cells(struct mcb_bus *bus, phys_addr_t mapbase,
 			void __iomem *base)
 {
-	char __iomem *p = base;
 	struct chameleon_fpga_header *header;
-	uint32_t dtype;
+	struct chameleon_bar *cb;
+	char __iomem *p = base;
 	int num_cells = 0;
+	uint32_t dtype;
+	int bar_count;
 	int ret = 0;
 	u32 hsize;
 
@@ -108,8 +198,8 @@
 	if (header->magic != CHAMELEONV2_MAGIC) {
 		pr_err("Unsupported chameleon version 0x%x\n",
 				header->magic);
-		kfree(header);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto free_header;
 	}
 	p += hsize;
 
@@ -119,16 +209,20 @@
 	snprintf(bus->name, CHAMELEON_FILENAME_LEN + 1, "%s",
 		 header->filename);
 
+	bar_count = chameleon_get_bar(&p, mapbase, &cb);
+	if (bar_count < 0)
+		goto free_header;
+
 	for_each_chameleon_cell(dtype, p) {
 		switch (dtype) {
 		case CHAMELEON_DTYPE_GENERAL:
-			ret = chameleon_parse_gdd(bus, mapbase, p);
+			ret = chameleon_parse_gdd(bus, cb, p, bar_count);
 			if (ret < 0)
-				goto out;
+				goto free_bar;
 			p += sizeof(struct chameleon_gdd);
 			break;
 		case CHAMELEON_DTYPE_BRIDGE:
-			chameleon_parse_bdd(bus, mapbase, p);
+			chameleon_parse_bdd(bus, cb, p);
 			p += sizeof(struct chameleon_bdd);
 			break;
 		case CHAMELEON_DTYPE_END:
@@ -136,8 +230,8 @@
 		default:
 			pr_err("Invalid chameleon descriptor type 0x%x\n",
 				dtype);
-			kfree(header);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto free_bar;
 		}
 		num_cells++;
 	}
@@ -145,11 +239,15 @@
 	if (num_cells == 0)
 		num_cells = -EINVAL;
 
+	kfree(cb);
 	kfree(header);
 	return num_cells;
 
-out:
+free_bar:
+	kfree(cb);
+free_header:
 	kfree(header);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(chameleon_parse_cells);
diff --git a/drivers/mcb/mcb-pci.c b/drivers/mcb/mcb-pci.c
index b15a034..af4d2f26 100644
--- a/drivers/mcb/mcb-pci.c
+++ b/drivers/mcb/mcb-pci.c
@@ -46,6 +46,7 @@
 		dev_err(&pdev->dev, "Failed to enable PCI device\n");
 		return -ENODEV;
 	}
+	pci_set_master(pdev);
 
 	priv->mapbase = pci_resource_start(pdev, 0);
 	if (!priv->mapbase) {
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 95a4ca6..849ad44 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -760,7 +760,8 @@
 	if (!d->nr_stripes ||
 	    d->nr_stripes > INT_MAX ||
 	    d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) {
-		pr_err("nr_stripes too large");
+		pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
+			(unsigned)d->nr_stripes);
 		return -ENOMEM;
 	}
 
@@ -1820,7 +1821,7 @@
 	free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
 
 	if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
-	    !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
+	    !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
 	    !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
 	    !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
 	    !init_fifo(&ca->free_inc,	free << 2, GFP_KERNEL) ||
@@ -1844,7 +1845,7 @@
 				struct block_device *bdev, struct cache *ca)
 {
 	char name[BDEVNAME_SIZE];
-	const char *err = NULL;
+	const char *err = NULL; /* must be set for any error case */
 	int ret = 0;
 
 	memcpy(&ca->sb, sb, sizeof(struct cache_sb));
@@ -1861,8 +1862,13 @@
 		ca->discard = CACHE_DISCARD(&ca->sb);
 
 	ret = cache_alloc(ca);
-	if (ret != 0)
+	if (ret != 0) {
+		if (ret == -ENOMEM)
+			err = "cache_alloc(): -ENOMEM";
+		else
+			err = "cache_alloc(): unknown error";
 		goto err;
+	}
 
 	if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) {
 		err = "error calling kobject_add";
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 6fff794..13041ee 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -2183,19 +2183,29 @@
 static ssize_t
 location_store(struct mddev *mddev, const char *buf, size_t len)
 {
+	int rv;
 
+	rv = mddev_lock(mddev);
+	if (rv)
+		return rv;
 	if (mddev->pers) {
-		if (!mddev->pers->quiesce)
-			return -EBUSY;
-		if (mddev->recovery || mddev->sync_thread)
-			return -EBUSY;
+		if (!mddev->pers->quiesce) {
+			rv = -EBUSY;
+			goto out;
+		}
+		if (mddev->recovery || mddev->sync_thread) {
+			rv = -EBUSY;
+			goto out;
+		}
 	}
 
 	if (mddev->bitmap || mddev->bitmap_info.file ||
 	    mddev->bitmap_info.offset) {
 		/* bitmap already configured.  Only option is to clear it */
-		if (strncmp(buf, "none", 4) != 0)
-			return -EBUSY;
+		if (strncmp(buf, "none", 4) != 0) {
+			rv = -EBUSY;
+			goto out;
+		}
 		if (mddev->pers) {
 			mddev->pers->quiesce(mddev, 1);
 			bitmap_destroy(mddev);
@@ -2214,21 +2224,25 @@
 			/* nothing to be done */;
 		else if (strncmp(buf, "file:", 5) == 0) {
 			/* Not supported yet */
-			return -EINVAL;
+			rv = -EINVAL;
+			goto out;
 		} else {
-			int rv;
 			if (buf[0] == '+')
 				rv = kstrtoll(buf+1, 10, &offset);
 			else
 				rv = kstrtoll(buf, 10, &offset);
 			if (rv)
-				return rv;
-			if (offset == 0)
-				return -EINVAL;
+				goto out;
+			if (offset == 0) {
+				rv = -EINVAL;
+				goto out;
+			}
 			if (mddev->bitmap_info.external == 0 &&
 			    mddev->major_version == 0 &&
-			    offset != mddev->bitmap_info.default_offset)
-				return -EINVAL;
+			    offset != mddev->bitmap_info.default_offset) {
+				rv = -EINVAL;
+				goto out;
+			}
 			mddev->bitmap_info.offset = offset;
 			if (mddev->pers) {
 				struct bitmap *bitmap;
@@ -2245,7 +2259,7 @@
 				mddev->pers->quiesce(mddev, 0);
 				if (rv) {
 					bitmap_destroy(mddev);
-					return rv;
+					goto out;
 				}
 			}
 		}
@@ -2257,6 +2271,11 @@
 		set_bit(MD_CHANGE_DEVS, &mddev->flags);
 		md_wakeup_thread(mddev->thread);
 	}
+	rv = 0;
+out:
+	mddev_unlock(mddev);
+	if (rv)
+		return rv;
 	return len;
 }
 
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 6571c81..8625040 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1879,7 +1879,7 @@
 	__cache_size_refresh();
 	mutex_unlock(&dm_bufio_clients_lock);
 
-	dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache");
+	dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0);
 	if (!dm_bufio_wq)
 		return -ENOMEM;
 
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4e9784b..8742957 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -181,7 +181,7 @@
 	u8 key[0];
 };
 
-#define MIN_IOS        16
+#define MIN_IOS        64
 
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -1453,7 +1453,7 @@
 	unsigned i;
 	int err;
 
-	cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_skcipher *),
+	cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *),
 			   GFP_KERNEL);
 	if (!cc->tfms)
 		return -ENOMEM;
@@ -1924,6 +1924,13 @@
 		return DM_MAPIO_REMAPPED;
 	}
 
+	/*
+	 * Check if bio is too large, split as needed.
+	 */
+	if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
+	    bio_data_dir(bio) == WRITE)
+		dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
+
 	io = dm_per_bio_data(bio, cc->per_bio_data_size);
 	crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
 	io->ctx.req = (struct skcipher_request *)(io + 1);
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 97e446d..6a2e8dd 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -289,15 +289,13 @@
 		pb->bio_submitted = true;
 
 		/*
-		 * Map reads as normal only if corrupt_bio_byte set.
+		 * Error reads if neither corrupt_bio_byte or drop_writes are set.
+		 * Otherwise, flakey_end_io() will decide if the reads should be modified.
 		 */
 		if (bio_data_dir(bio) == READ) {
-			/* If flags were specified, only corrupt those that match. */
-			if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
-			    all_corrupt_bio_flags_match(bio, fc))
-				goto map_bio;
-			else
+			if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags))
 				return -EIO;
+			goto map_bio;
 		}
 
 		/*
@@ -334,14 +332,21 @@
 	struct flakey_c *fc = ti->private;
 	struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
 
-	/*
-	 * Corrupt successful READs while in down state.
-	 */
 	if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
-		if (fc->corrupt_bio_byte)
+		if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
+		    all_corrupt_bio_flags_match(bio, fc)) {
+			/*
+			 * Corrupt successful matching READs while in down state.
+			 */
 			corrupt_bio_data(bio, fc);
-		else
+
+		} else if (!test_bit(DROP_WRITES, &fc->flags)) {
+			/*
+			 * Error read during the down_interval if drop_writes
+			 * wasn't configured.
+			 */
 			return -EIO;
+		}
 	}
 
 	return error;
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 4ab6803..49e4d8d 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -259,12 +259,12 @@
 		goto out;
 	sector++;
 
-	bio = bio_alloc(GFP_KERNEL, block->vec_cnt);
+	atomic_inc(&lc->io_blocks);
+	bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
 	if (!bio) {
 		DMERR("Couldn't alloc log bio");
 		goto error;
 	}
-	atomic_inc(&lc->io_blocks);
 	bio->bi_iter.bi_size = 0;
 	bio->bi_iter.bi_sector = sector;
 	bio->bi_bdev = lc->logdev->bdev;
@@ -282,7 +282,7 @@
 		if (ret != block->vecs[i].bv_len) {
 			atomic_inc(&lc->io_blocks);
 			submit_bio(bio);
-			bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i);
+			bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
 			if (!bio) {
 				DMERR("Couldn't alloc log bio");
 				goto error;
@@ -459,9 +459,9 @@
 		goto bad;
 	}
 
-	ret = -EINVAL;
 	lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write");
-	if (!lc->log_kthread) {
+	if (IS_ERR(lc->log_kthread)) {
+		ret = PTR_ERR(lc->log_kthread);
 		ti->error = "Couldn't alloc kthread";
 		dm_put_device(ti, lc->dev);
 		dm_put_device(ti, lc->logdev);
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 4ca2d1d..07fc1ad 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -291,9 +291,10 @@
 	core->nr_regions = le64_to_cpu(disk->nr_regions);
 }
 
-static int rw_header(struct log_c *lc, int rw)
+static int rw_header(struct log_c *lc, int op)
 {
-	lc->io_req.bi_op = rw;
+	lc->io_req.bi_op = op;
+	lc->io_req.bi_op_flags = 0;
 
 	return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
 }
@@ -316,7 +317,7 @@
 {
 	int r;
 
-	r = rw_header(log, READ);
+	r = rw_header(log, REQ_OP_READ);
 	if (r)
 		return r;
 
@@ -630,7 +631,7 @@
 	header_to_disk(&lc->header, lc->disk_header);
 
 	/* write the new header */
-	r = rw_header(lc, WRITE);
+	r = rw_header(lc, REQ_OP_WRITE);
 	if (!r) {
 		r = flush_header(lc);
 		if (r)
@@ -698,7 +699,7 @@
 			log_clear_bit(lc, lc->clean_bits, i);
 	}
 
-	r = rw_header(lc, WRITE);
+	r = rw_header(lc, REQ_OP_WRITE);
 	if (r)
 		fail_log_device(lc);
 	else {
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 1b9795d..8abde6b 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -191,7 +191,6 @@
 #define RT_FLAG_RS_BITMAP_LOADED	2
 #define RT_FLAG_UPDATE_SBS		3
 #define RT_FLAG_RESHAPE_RS		4
-#define RT_FLAG_KEEP_RS_FROZEN		5
 
 /* Array elements of 64 bit needed for rebuild/failed disk bits */
 #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -861,6 +860,9 @@
 {
 	unsigned long min_region_size = rs->ti->len / (1 << 21);
 
+	if (rs_is_raid0(rs))
+		return 0;
+
 	if (!region_size) {
 		/*
 		 * Choose a reasonable default.	 All figures in sectors.
@@ -930,6 +932,8 @@
 			rebuild_cnt++;
 
 	switch (rs->raid_type->level) {
+	case 0:
+		break;
 	case 1:
 		if (rebuild_cnt >= rs->md.raid_disks)
 			goto too_many;
@@ -2335,6 +2339,13 @@
 		case 0:
 			break;
 		default:
+			/*
+			 * We have to keep any raid0 data/metadata device pairs or
+			 * the MD raid0 personality will fail to start the array.
+			 */
+			if (rs_is_raid0(rs))
+				continue;
+
 			dev = container_of(rdev, struct raid_dev, rdev);
 			if (dev->meta_dev)
 				dm_put_device(ti, dev->meta_dev);
@@ -2579,7 +2590,6 @@
 		} else {
 			/* Process raid1 without delta_disks */
 			mddev->raid_disks = rs->raid_disks;
-			set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
 			reshape = false;
 		}
 	} else {
@@ -2590,7 +2600,6 @@
 	if (reshape) {
 		set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags);
 		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
-		set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
 	} else if (mddev->raid_disks < rs->raid_disks)
 		/* Create new superblocks and bitmaps, if any new disks */
 		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
@@ -2902,7 +2911,6 @@
 			goto bad;
 
 		set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
-		set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
 		/* Takeover ain't recovery, so disable recovery */
 		rs_setup_recovery(rs, MaxSector);
 		rs_set_new(rs);
@@ -3386,21 +3394,28 @@
 {
 	struct raid_set *rs = ti->private;
 
-	if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
-		if (!rs->md.suspended)
-			mddev_suspend(&rs->md);
-		rs->md.ro = 1;
-	}
+	if (!rs->md.suspended)
+		mddev_suspend(&rs->md);
+
+	rs->md.ro = 1;
 }
 
 static void attempt_restore_of_faulty_devices(struct raid_set *rs)
 {
 	int i;
-	uint64_t failed_devices, cleared_failed_devices = 0;
+	uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS];
 	unsigned long flags;
+	bool cleared = false;
 	struct dm_raid_superblock *sb;
+	struct mddev *mddev = &rs->md;
 	struct md_rdev *r;
 
+	/* RAID personalities have to provide hot add/remove methods or we need to bail out. */
+	if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk)
+		return;
+
+	memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
+
 	for (i = 0; i < rs->md.raid_disks; i++) {
 		r = &rs->dev[i].rdev;
 		if (test_bit(Faulty, &r->flags) && r->sb_page &&
@@ -3420,7 +3435,7 @@
 			 * ourselves.
 			 */
 			if ((r->raid_disk >= 0) &&
-			    (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0))
+			    (mddev->pers->hot_remove_disk(mddev, r) != 0))
 				/* Failed to revive this device, try next */
 				continue;
 
@@ -3430,22 +3445,30 @@
 			clear_bit(Faulty, &r->flags);
 			clear_bit(WriteErrorSeen, &r->flags);
 			clear_bit(In_sync, &r->flags);
-			if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
+			if (mddev->pers->hot_add_disk(mddev, r)) {
 				r->raid_disk = -1;
 				r->saved_raid_disk = -1;
 				r->flags = flags;
 			} else {
 				r->recovery_offset = 0;
-				cleared_failed_devices |= 1 << i;
+				set_bit(i, (void *) cleared_failed_devices);
+				cleared = true;
 			}
 		}
 	}
-	if (cleared_failed_devices) {
+
+	/* If any failed devices could be cleared, update all sbs failed_devices bits */
+	if (cleared) {
+		uint64_t failed_devices[DISKS_ARRAY_ELEMS];
+
 		rdev_for_each(r, &rs->md) {
 			sb = page_address(r->sb_page);
-			failed_devices = le64_to_cpu(sb->failed_devices);
-			failed_devices &= ~cleared_failed_devices;
-			sb->failed_devices = cpu_to_le64(failed_devices);
+			sb_retrieve_failed_devices(sb, failed_devices);
+
+			for (i = 0; i < DISKS_ARRAY_ELEMS; i++)
+				failed_devices[i] &= ~cleared_failed_devices[i];
+
+			sb_update_failed_devices(sb, failed_devices);
 		}
 	}
 }
@@ -3610,26 +3633,15 @@
 		 * devices are reachable again.
 		 */
 		attempt_restore_of_faulty_devices(rs);
-	} else {
-		mddev->ro = 0;
-		mddev->in_sync = 0;
-
-		/*
-		 * When passing in flags to the ctr, we expect userspace
-		 * to reset them because they made it to the superblocks
-		 * and reload the mapping anyway.
-		 *
-		 * -> only unfreeze recovery in case of a table reload or
-		 *    we'll have a bogus recovery/reshape position
-		 *    retrieved from the superblock by the ctr because
-		 *    the ongoing recovery/reshape will change it after read.
-		 */
-		if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
-			clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
-		if (mddev->suspended)
-			mddev_resume(mddev);
 	}
+
+	mddev->ro = 0;
+	mddev->in_sync = 0;
+
+	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+	if (mddev->suspended)
+		mddev_resume(mddev);
 }
 
 static struct target_type raid_target = {
diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c
index 4ace1da..6c25213 100644
--- a/drivers/md/dm-round-robin.c
+++ b/drivers/md/dm-round-robin.c
@@ -210,14 +210,17 @@
 	struct path_info *pi = NULL;
 	struct dm_path *current_path = NULL;
 
+	local_irq_save(flags);
 	current_path = *this_cpu_ptr(s->current_path);
 	if (current_path) {
 		percpu_counter_dec(&s->repeat_count);
-		if (percpu_counter_read_positive(&s->repeat_count) > 0)
+		if (percpu_counter_read_positive(&s->repeat_count) > 0) {
+			local_irq_restore(flags);
 			return current_path;
+		}
 	}
 
-	spin_lock_irqsave(&s->lock, flags);
+	spin_lock(&s->lock);
 	if (!list_empty(&s->valid_paths)) {
 		pi = list_entry(s->valid_paths.next, struct path_info, list);
 		list_move_tail(&pi->list, &s->valid_paths);
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 41573f1..34a840d 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -834,8 +834,10 @@
 		goto err;
 	}
 	cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
-	if (!cinfo->ack_lockres)
+	if (!cinfo->ack_lockres) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	/* get sync CR lock on ACK. */
 	if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
 		pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
@@ -849,8 +851,10 @@
 	pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
 	snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
 	cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
-	if (!cinfo->bitmap_lockres)
+	if (!cinfo->bitmap_lockres) {
+		ret = -ENOMEM;
 		goto err;
+	}
 	if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
 		pr_err("Failed to get bitmap lock\n");
 		ret = -EINVAL;
@@ -858,8 +862,10 @@
 	}
 
 	cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
-	if (!cinfo->resync_lockres)
+	if (!cinfo->resync_lockres) {
+		ret = -ENOMEM;
 		goto err;
+	}
 
 	return 0;
 err:
diff --git a/drivers/md/md.c b/drivers/md/md.c
index d646f6e..915e84d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1604,11 +1604,8 @@
 			mddev->new_chunk_sectors = mddev->chunk_sectors;
 		}
 
-		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) {
+		if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
 			set_bit(MD_HAS_JOURNAL, &mddev->flags);
-			if (mddev->recovery_cp == MaxSector)
-				set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
-		}
 	} else if (mddev->pers == NULL) {
 		/* Insist of good event counter while assembling, except for
 		 * spares (which don't need an event count) */
@@ -5851,6 +5848,9 @@
 			working++;
 			if (test_bit(In_sync, &rdev->flags))
 				insync++;
+			else if (test_bit(Journal, &rdev->flags))
+				/* TODO: add journal count to md_u.h */
+				;
 			else
 				spare++;
 		}
@@ -7610,16 +7610,12 @@
 
 int md_setup_cluster(struct mddev *mddev, int nodes)
 {
-	int err;
-
-	err = request_module("md-cluster");
-	if (err) {
-		pr_err("md-cluster module not found.\n");
-		return -ENOENT;
-	}
-
+	if (!md_cluster_ops)
+		request_module("md-cluster");
 	spin_lock(&pers_lock);
+	/* ensure module won't be unloaded */
 	if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
+		pr_err("can't find md-cluster module or get it's reference.\n");
 		spin_unlock(&pers_lock);
 		return -ENOENT;
 	}
@@ -7862,6 +7858,7 @@
 	 */
 
 	do {
+		int mddev2_minor = -1;
 		mddev->curr_resync = 2;
 
 	try_again:
@@ -7891,10 +7888,14 @@
 				prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
 				if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
 				    mddev2->curr_resync >= mddev->curr_resync) {
-					printk(KERN_INFO "md: delaying %s of %s"
-					       " until %s has finished (they"
-					       " share one or more physical units)\n",
-					       desc, mdname(mddev), mdname(mddev2));
+					if (mddev2_minor != mddev2->md_minor) {
+						mddev2_minor = mddev2->md_minor;
+						printk(KERN_INFO "md: delaying %s of %s"
+						       " until %s has finished (they"
+						       " share one or more physical units)\n",
+						       desc, mdname(mddev),
+						       mdname(mddev2));
+					}
 					mddev_put(mddev2);
 					if (signal_pending(current))
 						flush_signals(current);
@@ -8275,16 +8276,13 @@
 static void md_start_sync(struct work_struct *ws)
 {
 	struct mddev *mddev = container_of(ws, struct mddev, del_work);
-	int ret = 0;
 
 	mddev->sync_thread = md_register_thread(md_do_sync,
 						mddev,
 						"resync");
 	if (!mddev->sync_thread) {
-		if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
-			printk(KERN_ERR "%s: could not start resync"
-			       " thread...\n",
-			       mdname(mddev));
+		printk(KERN_ERR "%s: could not start resync thread...\n",
+		       mdname(mddev));
 		/* leave the spares where they are, it shouldn't hurt */
 		clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
 		clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0e4efcd..be1a9fc 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1064,6 +1064,8 @@
 	int max_sectors;
 	int sectors;
 
+	md_write_start(mddev, bio);
+
 	/*
 	 * Register the new request and wait if the reconstruction
 	 * thread has put up a bar for new requests.
@@ -1445,8 +1447,6 @@
 		return;
 	}
 
-	md_write_start(mddev, bio);
-
 	do {
 
 		/*
@@ -2465,20 +2465,21 @@
 
 	while (sect_to_write) {
 		struct bio *wbio;
+		sector_t wsector;
 		if (sectors > sect_to_write)
 			sectors = sect_to_write;
 		/* Write at 'sector' for 'sectors' */
 		wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
 		bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
-		wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
-				   choose_data_offset(r10_bio, rdev) +
-				   (sector - r10_bio->sector));
+		wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
+		wbio->bi_iter.bi_sector = wsector +
+				   choose_data_offset(r10_bio, rdev);
 		wbio->bi_bdev = rdev->bdev;
 		bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
 
 		if (submit_bio_wait(wbio) < 0)
 			/* Failure! */
-			ok = rdev_set_badblocks(rdev, sector,
+			ok = rdev_set_badblocks(rdev, wsector,
 						sectors, 0)
 				&& ok;
 
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 51f76dd..1b1ab4a 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -96,7 +96,6 @@
 	spinlock_t no_space_stripes_lock;
 
 	bool need_cache_flush;
-	bool in_teardown;
 };
 
 /*
@@ -704,31 +703,22 @@
 
 	mddev = log->rdev->mddev;
 	/*
-	 * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and
-	 * wait for this thread to finish. This thread waits for
-	 * MD_CHANGE_PENDING clear, which is supposed to be done in
-	 * md_check_recovery(). md_check_recovery() tries to get
-	 * reconfig_mutex. Since r5l_quiesce already holds the mutex,
-	 * md_check_recovery() fails, so the PENDING never get cleared. The
-	 * in_teardown check workaround this issue.
+	 * Discard could zero data, so before discard we must make sure
+	 * superblock is updated to new log tail. Updating superblock (either
+	 * directly call md_update_sb() or depend on md thread) must hold
+	 * reconfig mutex. On the other hand, raid5_quiesce is called with
+	 * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
+	 * for all IO finish, hence waitting for reclaim thread, while reclaim
+	 * thread is calling this function and waitting for reconfig mutex. So
+	 * there is a deadlock. We workaround this issue with a trylock.
+	 * FIXME: we could miss discard if we can't take reconfig mutex
 	 */
-	if (!log->in_teardown) {
-		set_mask_bits(&mddev->flags, 0,
-			      BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
-		md_wakeup_thread(mddev->thread);
-		wait_event(mddev->sb_wait,
-			!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
-			log->in_teardown);
-		/*
-		 * r5l_quiesce could run after in_teardown check and hold
-		 * mutex first. Superblock might get updated twice.
-		 */
-		if (log->in_teardown)
-			md_update_sb(mddev, 1);
-	} else {
-		WARN_ON(!mddev_is_locked(mddev));
-		md_update_sb(mddev, 1);
-	}
+	set_mask_bits(&mddev->flags, 0,
+		BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
+	if (!mddev_trylock(mddev))
+		return;
+	md_update_sb(mddev, 1);
+	mddev_unlock(mddev);
 
 	/* discard IO error really doesn't matter, ignore it */
 	if (log->last_checkpoint < end) {
@@ -827,7 +817,6 @@
 	if (!log || state == 2)
 		return;
 	if (state == 0) {
-		log->in_teardown = 0;
 		/*
 		 * This is a special case for hotadd. In suspend, the array has
 		 * no journal. In resume, journal is initialized as well as the
@@ -838,11 +827,6 @@
 		log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
 					log->rdev->mddev, "reclaim");
 	} else if (state == 1) {
-		/*
-		 * at this point all stripes are finished, so io_unit is at
-		 * least in STRIPE_END state
-		 */
-		log->in_teardown = 1;
 		/* make sure r5l_write_super_and_discard_space exits */
 		mddev = log->rdev->mddev;
 		wake_up(&mddev->sb_wait);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8912407..5287e79 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -659,6 +659,7 @@
 {
 	struct stripe_head *sh;
 	int hash = stripe_hash_locks_hash(sector);
+	int inc_empty_inactive_list_flag;
 
 	pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
 
@@ -703,7 +704,12 @@
 					atomic_inc(&conf->active_stripes);
 				BUG_ON(list_empty(&sh->lru) &&
 				       !test_bit(STRIPE_EXPANDING, &sh->state));
+				inc_empty_inactive_list_flag = 0;
+				if (!list_empty(conf->inactive_list + hash))
+					inc_empty_inactive_list_flag = 1;
 				list_del_init(&sh->lru);
+				if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+					atomic_inc(&conf->empty_inactive_list_nr);
 				if (sh->group) {
 					sh->group->stripes_cnt--;
 					sh->group = NULL;
@@ -762,6 +768,7 @@
 	sector_t head_sector, tmp_sec;
 	int hash;
 	int dd_idx;
+	int inc_empty_inactive_list_flag;
 
 	/* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
 	tmp_sec = sh->sector;
@@ -779,7 +786,12 @@
 				atomic_inc(&conf->active_stripes);
 			BUG_ON(list_empty(&head->lru) &&
 			       !test_bit(STRIPE_EXPANDING, &head->state));
+			inc_empty_inactive_list_flag = 0;
+			if (!list_empty(conf->inactive_list + hash))
+				inc_empty_inactive_list_flag = 1;
 			list_del_init(&head->lru);
+			if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
+				atomic_inc(&conf->empty_inactive_list_nr);
 			if (head->group) {
 				head->group->stripes_cnt--;
 				head->group = NULL;
@@ -993,7 +1005,6 @@
 
 			set_bit(STRIPE_IO_STARTED, &sh->state);
 
-			bio_reset(bi);
 			bi->bi_bdev = rdev->bdev;
 			bio_set_op_attrs(bi, op, op_flags);
 			bi->bi_end_io = op_is_write(op)
@@ -1045,7 +1056,6 @@
 
 			set_bit(STRIPE_IO_STARTED, &sh->state);
 
-			bio_reset(rbi);
 			rbi->bi_bdev = rrdev->bdev;
 			bio_set_op_attrs(rbi, op, op_flags);
 			BUG_ON(!op_is_write(op));
@@ -1978,9 +1988,11 @@
 	put_cpu();
 }
 
-static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
+static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
+	int disks)
 {
 	struct stripe_head *sh;
+	int i;
 
 	sh = kmem_cache_zalloc(sc, gfp);
 	if (sh) {
@@ -1989,6 +2001,17 @@
 		INIT_LIST_HEAD(&sh->batch_list);
 		INIT_LIST_HEAD(&sh->lru);
 		atomic_set(&sh->count, 1);
+		for (i = 0; i < disks; i++) {
+			struct r5dev *dev = &sh->dev[i];
+
+			bio_init(&dev->req);
+			dev->req.bi_io_vec = &dev->vec;
+			dev->req.bi_max_vecs = 1;
+
+			bio_init(&dev->rreq);
+			dev->rreq.bi_io_vec = &dev->rvec;
+			dev->rreq.bi_max_vecs = 1;
+		}
 	}
 	return sh;
 }
@@ -1996,7 +2019,7 @@
 {
 	struct stripe_head *sh;
 
-	sh = alloc_stripe(conf->slab_cache, gfp);
+	sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
 	if (!sh)
 		return 0;
 
@@ -2167,7 +2190,7 @@
 	mutex_lock(&conf->cache_size_mutex);
 
 	for (i = conf->max_nr_stripes; i; i--) {
-		nsh = alloc_stripe(sc, GFP_KERNEL);
+		nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
 		if (!nsh)
 			break;
 
@@ -2299,6 +2322,7 @@
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
 		bi->bi_error);
 	if (i == disks) {
+		bio_reset(bi);
 		BUG();
 		return;
 	}
@@ -2399,6 +2423,7 @@
 		}
 	}
 	rdev_dec_pending(rdev, conf->mddev);
+	bio_reset(bi);
 	clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
 	raid5_release_stripe(sh);
@@ -2436,6 +2461,7 @@
 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
 		bi->bi_error);
 	if (i == disks) {
+		bio_reset(bi);
 		BUG();
 		return;
 	}
@@ -2472,6 +2498,7 @@
 	if (sh->batch_head && bi->bi_error && !replacement)
 		set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
 
+	bio_reset(bi);
 	if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
 		clear_bit(R5_LOCKED, &sh->dev[i].flags);
 	set_bit(STRIPE_HANDLE, &sh->state);
@@ -2485,16 +2512,6 @@
 {
 	struct r5dev *dev = &sh->dev[i];
 
-	bio_init(&dev->req);
-	dev->req.bi_io_vec = &dev->vec;
-	dev->req.bi_max_vecs = 1;
-	dev->req.bi_private = sh;
-
-	bio_init(&dev->rreq);
-	dev->rreq.bi_io_vec = &dev->rvec;
-	dev->rreq.bi_max_vecs = 1;
-	dev->rreq.bi_private = sh;
-
 	dev->flags = 0;
 	dev->sector = raid5_compute_blocknr(sh, i, previous);
 }
@@ -4628,7 +4645,9 @@
 	}
 
 	if (!bio_list_empty(&s.return_bi)) {
-		if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) {
+		if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
+				(s.failed <= conf->max_degraded ||
+					conf->mddev->external == 0)) {
 			spin_lock_irq(&conf->device_lock);
 			bio_list_merge(&conf->return_bi, &s.return_bi);
 			spin_unlock_irq(&conf->device_lock);
@@ -6330,22 +6349,20 @@
 	return 0;
 }
 
+static int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+	struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
+
+	free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
+	return 0;
+}
+
 static void raid5_free_percpu(struct r5conf *conf)
 {
-	unsigned long cpu;
-
 	if (!conf->percpu)
 		return;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	unregister_cpu_notifier(&conf->cpu_notify);
-#endif
-
-	get_online_cpus();
-	for_each_possible_cpu(cpu)
-		free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-	put_online_cpus();
-
+	cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
 	free_percpu(conf->percpu);
 }
 
@@ -6364,64 +6381,28 @@
 	kfree(conf);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
-			      void *hcpu)
+static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node)
 {
-	struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify);
-	long cpu = (long)hcpu;
+	struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node);
 	struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu);
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		if (alloc_scratch_buffer(conf, percpu)) {
-			pr_err("%s: failed memory allocation for cpu%ld\n",
-			       __func__, cpu);
-			return notifier_from_errno(-ENOMEM);
-		}
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-		free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-		break;
-	default:
-		break;
+	if (alloc_scratch_buffer(conf, percpu)) {
+		pr_err("%s: failed memory allocation for cpu%u\n",
+		       __func__, cpu);
+		return -ENOMEM;
 	}
-	return NOTIFY_OK;
+	return 0;
 }
-#endif
 
 static int raid5_alloc_percpu(struct r5conf *conf)
 {
-	unsigned long cpu;
 	int err = 0;
 
 	conf->percpu = alloc_percpu(struct raid5_percpu);
 	if (!conf->percpu)
 		return -ENOMEM;
 
-#ifdef CONFIG_HOTPLUG_CPU
-	conf->cpu_notify.notifier_call = raid456_cpu_notify;
-	conf->cpu_notify.priority = 0;
-	err = register_cpu_notifier(&conf->cpu_notify);
-	if (err)
-		return err;
-#endif
-
-	get_online_cpus();
-	for_each_present_cpu(cpu) {
-		err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
-		if (err) {
-			pr_err("%s: failed memory allocation for cpu%ld\n",
-			       __func__, cpu);
-			break;
-		}
-	}
-	put_online_cpus();
-
+	err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node);
 	if (!err) {
 		conf->scribble_disks = max(conf->raid_disks,
 			conf->previous_raid_disks);
@@ -6620,6 +6601,16 @@
 	}
 
 	conf->min_nr_stripes = NR_STRIPES;
+	if (mddev->reshape_position != MaxSector) {
+		int stripes = max_t(int,
+			((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4,
+			((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4);
+		conf->min_nr_stripes = max(NR_STRIPES, stripes);
+		if (conf->min_nr_stripes != NR_STRIPES)
+			printk(KERN_INFO
+				"md/raid:%s: force stripe size %d for reshape\n",
+				mdname(mddev), conf->min_nr_stripes);
+	}
 	memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
 		 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
 	atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
@@ -6826,11 +6817,14 @@
 	if (IS_ERR(conf))
 		return PTR_ERR(conf);
 
-	if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) {
-		printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n",
-		       mdname(mddev));
-		mddev->ro = 1;
-		set_disk_ro(mddev->gendisk, 1);
+	if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
+		if (!journal_dev) {
+			pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
+			       mdname(mddev));
+			mddev->ro = 1;
+			set_disk_ro(mddev->gendisk, 1);
+		} else if (mddev->recovery_cp == MaxSector)
+			set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
 	}
 
 	conf->min_offset_diff = min_offset_diff;
@@ -7953,10 +7947,21 @@
 
 static int __init raid5_init(void)
 {
+	int ret;
+
 	raid5_wq = alloc_workqueue("raid5wq",
 		WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0);
 	if (!raid5_wq)
 		return -ENOMEM;
+
+	ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE,
+				      "md/raid5:prepare",
+				      raid456_cpu_up_prepare,
+				      raid456_cpu_dead);
+	if (ret) {
+		destroy_workqueue(raid5_wq);
+		return ret;
+	}
 	register_md_personality(&raid6_personality);
 	register_md_personality(&raid5_personality);
 	register_md_personality(&raid4_personality);
@@ -7968,6 +7973,7 @@
 	unregister_md_personality(&raid6_personality);
 	unregister_md_personality(&raid5_personality);
 	unregister_md_personality(&raid4_personality);
+	cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE);
 	destroy_workqueue(raid5_wq);
 }
 
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 517d4b6..57ec49f 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -512,9 +512,7 @@
 	} __percpu *percpu;
 	int scribble_disks;
 	int scribble_sectors;
-#ifdef CONFIG_HOTPLUG_CPU
-	struct notifier_block	cpu_notify;
-#endif
+	struct hlist_node node;
 
 	/*
 	 * Free stripes pool
diff --git a/drivers/media/cec-edid.c b/drivers/media/cec-edid.c
index 7001824..5719b99 100644
--- a/drivers/media/cec-edid.c
+++ b/drivers/media/cec-edid.c
@@ -70,7 +70,10 @@
 				u8 tag = edid[i] >> 5;
 				u8 len = edid[i] & 0x1f;
 
-				if (tag == 3 && len >= 5 && i + len <= end)
+				if (tag == 3 && len >= 5 && i + len <= end &&
+				    edid[i + 1] == 0x03 &&
+				    edid[i + 2] == 0x0c &&
+				    edid[i + 3] == 0x00)
 					return i + 4;
 				i += len + 1;
 			} while (i < end);
diff --git a/drivers/media/dvb-frontends/rtl2832_sdr.c b/drivers/media/dvb-frontends/rtl2832_sdr.c
index 6e22af3..e038e88 100644
--- a/drivers/media/dvb-frontends/rtl2832_sdr.c
+++ b/drivers/media/dvb-frontends/rtl2832_sdr.c
@@ -392,7 +392,6 @@
 		dev_dbg(&pdev->dev, "alloc urb=%d\n", i);
 		dev->urb_list[i] = usb_alloc_urb(0, GFP_ATOMIC);
 		if (!dev->urb_list[i]) {
-			dev_dbg(&pdev->dev, "failed\n");
 			for (j = 0; j < i; j++)
 				usb_free_urb(dev->urb_list[j]);
 			return -ENOMEM;
diff --git a/drivers/media/pci/cx23885/cx23885-417.c b/drivers/media/pci/cx23885/cx23885-417.c
index efec2d1..4d080da 100644
--- a/drivers/media/pci/cx23885/cx23885-417.c
+++ b/drivers/media/pci/cx23885/cx23885-417.c
@@ -1552,6 +1552,7 @@
 	q->mem_ops = &vb2_dma_sg_memops;
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &dev->lock;
+	q->dev = &dev->pci->dev;
 
 	err = vb2_queue_init(q);
 	if (err < 0)
diff --git a/drivers/media/pci/saa7134/saa7134-dvb.c b/drivers/media/pci/saa7134/saa7134-dvb.c
index db987e5..59a4b5f 100644
--- a/drivers/media/pci/saa7134/saa7134-dvb.c
+++ b/drivers/media/pci/saa7134/saa7134-dvb.c
@@ -1238,6 +1238,7 @@
 	q->buf_struct_size = sizeof(struct saa7134_buf);
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &dev->lock;
+	q->dev = &dev->pci->dev;
 	ret = vb2_queue_init(q);
 	if (ret) {
 		vb2_dvb_dealloc_frontends(&dev->frontends);
diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c
index ca417a4..791a516 100644
--- a/drivers/media/pci/saa7134/saa7134-empress.c
+++ b/drivers/media/pci/saa7134/saa7134-empress.c
@@ -295,6 +295,7 @@
 	q->buf_struct_size = sizeof(struct saa7134_buf);
 	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
 	q->lock = &dev->lock;
+	q->dev = &dev->pci->dev;
 	err = vb2_queue_init(q);
 	if (err)
 		return err;
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index f25344b..552b635 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -169,7 +169,7 @@
 config VIDEO_MEDIATEK_VCODEC
 	tristate "Mediatek Video Codec driver"
 	depends on MTK_IOMMU || COMPILE_TEST
-	depends on VIDEO_DEV && VIDEO_V4L2
+	depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA
 	depends on ARCH_MEDIATEK || COMPILE_TEST
 	select VIDEOBUF2_DMA_CONTIG
 	select V4L2_MEM2MEM_DEV
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
index 94f0a42..3a8e695 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
@@ -23,7 +23,6 @@
 #include <media/v4l2-ioctl.h>
 #include <media/videobuf2-core.h>
 
-#include "mtk_vcodec_util.h"
 
 #define MTK_VCODEC_DRV_NAME	"mtk_vcodec_drv"
 #define MTK_VCODEC_ENC_NAME	"mtk-vcodec-enc"
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
index 3ed3f2d..2c5719a 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc.c
@@ -487,7 +487,6 @@
 	struct mtk_q_data *q_data;
 	int ret, i;
 	struct mtk_video_fmt *fmt;
-	unsigned int pitch_w_div16;
 	struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
 
 	vq = v4l2_m2m_get_vq(ctx->m2m_ctx, f->type);
@@ -530,15 +529,6 @@
 	q_data->coded_width = f->fmt.pix_mp.width;
 	q_data->coded_height = f->fmt.pix_mp.height;
 
-	pitch_w_div16 = DIV_ROUND_UP(q_data->visible_width, 16);
-	if (pitch_w_div16 % 8 != 0) {
-		/* Adjust returned width/height, so application could correctly
-		 * allocate hw required memory
-		 */
-		q_data->visible_height += 32;
-		vidioc_try_fmt(f, q_data->fmt);
-	}
-
 	q_data->field = f->fmt.pix_mp.field;
 	ctx->colorspace = f->fmt.pix_mp.colorspace;
 	ctx->ycbcr_enc = f->fmt.pix_mp.ycbcr_enc;
@@ -878,7 +868,8 @@
 {
 	struct mtk_vcodec_ctx *ctx = priv;
 	int ret;
-	struct vb2_buffer *dst_buf;
+	struct vb2_buffer *src_buf, *dst_buf;
+	struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2;
 	struct mtk_vcodec_mem bs_buf;
 	struct venc_done_result enc_result;
 
@@ -911,6 +902,15 @@
 		mtk_v4l2_err("venc_if_encode failed=%d", ret);
 		return -EINVAL;
 	}
+	src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx);
+	if (src_buf) {
+		src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf);
+		dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf);
+		dst_buf->timestamp = src_buf->timestamp;
+		dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode;
+	} else {
+		mtk_v4l2_err("No timestamp for the header buffer.");
+	}
 
 	ctx->state = MTK_STATE_HEADER;
 	dst_buf->planes[0].bytesused = enc_result.bs_size;
@@ -1003,7 +1003,7 @@
 	struct mtk_vcodec_mem bs_buf;
 	struct venc_done_result enc_result;
 	int ret, i;
-	struct vb2_v4l2_buffer *vb2_v4l2;
+	struct vb2_v4l2_buffer *dst_vb2_v4l2, *src_vb2_v4l2;
 
 	/* check dst_buf, dst_buf may be removed in device_run
 	 * to stored encdoe header so we need check dst_buf and
@@ -1043,9 +1043,14 @@
 	ret = venc_if_encode(ctx, VENC_START_OPT_ENCODE_FRAME,
 			     &frm_buf, &bs_buf, &enc_result);
 
-	vb2_v4l2 = container_of(dst_buf, struct vb2_v4l2_buffer, vb2_buf);
+	src_vb2_v4l2 = to_vb2_v4l2_buffer(src_buf);
+	dst_vb2_v4l2 = to_vb2_v4l2_buffer(dst_buf);
+
+	dst_buf->timestamp = src_buf->timestamp;
+	dst_vb2_v4l2->timecode = src_vb2_v4l2->timecode;
+
 	if (enc_result.is_key_frm)
-		vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME;
+		dst_vb2_v4l2->flags |= V4L2_BUF_FLAG_KEYFRAME;
 
 	if (ret) {
 		v4l2_m2m_buf_done(to_vb2_v4l2_buffer(src_buf),
@@ -1217,7 +1222,7 @@
 			0, V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE);
 	v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_PROFILE,
 			V4L2_MPEG_VIDEO_H264_PROFILE_HIGH,
-			0, V4L2_MPEG_VIDEO_H264_PROFILE_MAIN);
+			0, V4L2_MPEG_VIDEO_H264_PROFILE_HIGH);
 	v4l2_ctrl_new_std_menu(handler, ops, V4L2_CID_MPEG_VIDEO_H264_LEVEL,
 			V4L2_MPEG_VIDEO_H264_LEVEL_4_2,
 			0, V4L2_MPEG_VIDEO_H264_LEVEL_4_0);
@@ -1288,5 +1293,10 @@
 
 void mtk_vcodec_enc_release(struct mtk_vcodec_ctx *ctx)
 {
-	venc_if_deinit(ctx);
+	int ret = venc_if_deinit(ctx);
+
+	if (ret)
+		mtk_v4l2_err("venc_if_deinit failed=%d", ret);
+
+	ctx->state = MTK_STATE_FREE;
 }
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
index c7806ec..5cd2151 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c
@@ -218,11 +218,15 @@
 	mtk_v4l2_debug(1, "[%d] encoder", ctx->id);
 	mutex_lock(&dev->dev_mutex);
 
+	/*
+	 * Call v4l2_m2m_ctx_release to make sure the worker thread is not
+	 * running after venc_if_deinit.
+	 */
+	v4l2_m2m_ctx_release(ctx->m2m_ctx);
 	mtk_vcodec_enc_release(ctx);
 	v4l2_fh_del(&ctx->fh);
 	v4l2_fh_exit(&ctx->fh);
 	v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
-	v4l2_m2m_ctx_release(ctx->m2m_ctx);
 
 	list_del_init(&ctx->list);
 	dev->num_instances--;
diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
index 33e890f..1213185 100644
--- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
+++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_intr.h
@@ -16,7 +16,6 @@
 #define _MTK_VCODEC_INTR_H_
 
 #define MTK_INST_IRQ_RECEIVED		0x1
-#define MTK_INST_WORK_THREAD_ABORT_DONE	0x2
 
 struct mtk_vcodec_ctx;
 
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
index 9a60052..63d4be4 100644
--- a/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_h264_if.c
@@ -61,6 +61,8 @@
 
 /*
  * struct venc_h264_vpu_config - Structure for h264 encoder configuration
+ *                               AP-W/R : AP is writer/reader on this item
+ *                               VPU-W/R: VPU is write/reader on this item
  * @input_fourcc: input fourcc
  * @bitrate: target bitrate (in bps)
  * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
@@ -94,13 +96,13 @@
 
 /*
  * struct venc_h264_vpu_buf - Structure for buffer information
- * @align: buffer alignment (in bytes)
+ *                            AP-W/R : AP is writer/reader on this item
+ *                            VPU-W/R: VPU is write/reader on this item
  * @iova: IO virtual address
  * @vpua: VPU side memory addr which is used by RC_CODE
  * @size: buffer size (in bytes)
  */
 struct venc_h264_vpu_buf {
-	u32 align;
 	u32 iova;
 	u32 vpua;
 	u32 size;
@@ -108,6 +110,8 @@
 
 /*
  * struct venc_h264_vsi - Structure for VPU driver control and info share
+ *                        AP-W/R : AP is writer/reader on this item
+ *                        VPU-W/R: VPU is write/reader on this item
  * This structure is allocated in VPU side and shared to AP side.
  * @config: h264 encoder configuration
  * @work_bufs: working buffer information in VPU side
@@ -150,12 +154,6 @@
 	struct mtk_vcodec_ctx *ctx;
 };
 
-static inline void h264_write_reg(struct venc_h264_inst *inst, u32 addr,
-				  u32 val)
-{
-	writel(val, inst->hw_base + addr);
-}
-
 static inline u32 h264_read_reg(struct venc_h264_inst *inst, u32 addr)
 {
 	return readl(inst->hw_base + addr);
@@ -214,6 +212,8 @@
 		return 40;
 	case V4L2_MPEG_VIDEO_H264_LEVEL_4_1:
 		return 41;
+	case V4L2_MPEG_VIDEO_H264_LEVEL_4_2:
+		return 42;
 	default:
 		mtk_vcodec_debug(inst, "unsupported level %d", level);
 		return 31;
diff --git a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
index 60bbcd2..6d97584 100644
--- a/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
+++ b/drivers/media/platform/mtk-vcodec/venc/venc_vp8_if.c
@@ -56,6 +56,8 @@
 
 /*
  * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration
+ *                              AP-W/R : AP is writer/reader on this item
+ *                              VPU-W/R: VPU is write/reader on this item
  * @input_fourcc: input fourcc
  * @bitrate: target bitrate (in bps)
  * @pic_w: picture width. Picture size is visible stream resolution, in pixels,
@@ -83,14 +85,14 @@
 };
 
 /*
- * struct venc_vp8_vpu_buf -Structure for buffer information
- * @align: buffer alignment (in bytes)
+ * struct venc_vp8_vpu_buf - Structure for buffer information
+ *                           AP-W/R : AP is writer/reader on this item
+ *                           VPU-W/R: VPU is write/reader on this item
  * @iova: IO virtual address
  * @vpua: VPU side memory addr which is used by RC_CODE
  * @size: buffer size (in bytes)
  */
 struct venc_vp8_vpu_buf {
-	u32 align;
 	u32 iova;
 	u32 vpua;
 	u32 size;
@@ -98,6 +100,8 @@
 
 /*
  * struct venc_vp8_vsi - Structure for VPU driver control and info share
+ *                       AP-W/R : AP is writer/reader on this item
+ *                       VPU-W/R: VPU is write/reader on this item
  * This structure is allocated in VPU side and shared to AP side.
  * @config: vp8 encoder configuration
  * @work_bufs: working buffer information in VPU side
@@ -138,12 +142,6 @@
 	struct mtk_vcodec_ctx *ctx;
 };
 
-static inline void vp8_enc_write_reg(struct venc_vp8_inst *inst, u32 addr,
-				     u32 val)
-{
-	writel(val, inst->hw_base + addr);
-}
-
 static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr)
 {
 	return readl(inst->hw_base + addr);
diff --git a/drivers/media/platform/rcar-fcp.c b/drivers/media/platform/rcar-fcp.c
index 6a7bcc3..bc50c69 100644
--- a/drivers/media/platform/rcar-fcp.c
+++ b/drivers/media/platform/rcar-fcp.c
@@ -99,10 +99,16 @@
  */
 int rcar_fcp_enable(struct rcar_fcp_device *fcp)
 {
+	int error;
+
 	if (!fcp)
 		return 0;
 
-	return pm_runtime_get_sync(fcp->dev);
+	error = pm_runtime_get_sync(fcp->dev);
+	if (error < 0)
+		return error;
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(rcar_fcp_enable);
 
diff --git a/drivers/media/radio/si470x/radio-si470x-usb.c b/drivers/media/radio/si470x/radio-si470x-usb.c
index 091d793..4b132c2 100644
--- a/drivers/media/radio/si470x/radio-si470x-usb.c
+++ b/drivers/media/radio/si470x/radio-si470x-usb.c
@@ -627,7 +627,6 @@
 
 	radio->int_in_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!radio->int_in_urb) {
-		dev_info(&intf->dev, "could not allocate int_in_urb");
 		retval = -ENOMEM;
 		goto err_intbuffer;
 	}
diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c
index 65f80b8..86cc70fe25 100644
--- a/drivers/media/rc/imon.c
+++ b/drivers/media/rc/imon.c
@@ -2211,16 +2211,11 @@
 		goto exit;
 	}
 	rx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!rx_urb) {
-		dev_err(dev, "%s: usb_alloc_urb failed for IR urb", __func__);
+	if (!rx_urb)
 		goto rx_urb_alloc_failed;
-	}
 	tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!tx_urb) {
-		dev_err(dev, "%s: usb_alloc_urb failed for display urb",
-			__func__);
+	if (!tx_urb)
 		goto tx_urb_alloc_failed;
-	}
 
 	mutex_init(&ictx->lock);
 	spin_lock_init(&ictx->kc_lock);
@@ -2305,10 +2300,8 @@
 	int ret = -ENOMEM;
 
 	rx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!rx_urb) {
-		pr_err("usb_alloc_urb failed for IR urb\n");
+	if (!rx_urb)
 		goto rx_urb_alloc_failed;
-	}
 
 	mutex_lock(&ictx->lock);
 
diff --git a/drivers/media/rc/redrat3.c b/drivers/media/rc/redrat3.c
index 399f44d..ec8016d 100644
--- a/drivers/media/rc/redrat3.c
+++ b/drivers/media/rc/redrat3.c
@@ -970,10 +970,8 @@
 
 	/* set up bulk-in endpoint */
 	rr3->read_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!rr3->read_urb) {
-		dev_err(dev, "Read urb allocation failure\n");
+	if (!rr3->read_urb)
 		goto error;
-	}
 
 	rr3->ep_in = ep_in;
 	rr3->bulk_in_buf = usb_alloc_coherent(udev,
diff --git a/drivers/media/usb/airspy/airspy.c b/drivers/media/usb/airspy/airspy.c
index fe031b0..3c556ee 100644
--- a/drivers/media/usb/airspy/airspy.c
+++ b/drivers/media/usb/airspy/airspy.c
@@ -426,7 +426,6 @@
 		dev_dbg(s->dev, "alloc urb=%d\n", i);
 		s->urb_list[i] = usb_alloc_urb(0, GFP_ATOMIC);
 		if (!s->urb_list[i]) {
-			dev_dbg(s->dev, "failed\n");
 			for (j = 0; j < i; j++)
 				usb_free_urb(s->urb_list[j]);
 			return -ENOMEM;
diff --git a/drivers/media/usb/as102/as102_usb_drv.c b/drivers/media/usb/as102/as102_usb_drv.c
index 0e8030c..68c3a80 100644
--- a/drivers/media/usb/as102/as102_usb_drv.c
+++ b/drivers/media/usb/as102/as102_usb_drv.c
@@ -270,8 +270,6 @@
 
 		urb = usb_alloc_urb(0, GFP_ATOMIC);
 		if (urb == NULL) {
-			dev_dbg(&dev->bus_adap.usb_dev->dev,
-				"%s: usb_alloc_urb failed\n", __func__);
 			as102_free_usb_stream_buffer(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/media/usb/au0828/au0828-video.c b/drivers/media/usb/au0828/au0828-video.c
index 82b0269..13b8387 100644
--- a/drivers/media/usb/au0828/au0828-video.c
+++ b/drivers/media/usb/au0828/au0828-video.c
@@ -245,7 +245,6 @@
 	for (i = 0; i < dev->isoc_ctl.num_bufs; i++) {
 		urb = usb_alloc_urb(max_packets, GFP_KERNEL);
 		if (!urb) {
-			au0828_isocdbg("cannot alloc isoc_ctl.urb %i\n", i);
 			au0828_uninit_isoc(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c
index c1aa1ab2..13620cd 100644
--- a/drivers/media/usb/cpia2/cpia2_usb.c
+++ b/drivers/media/usb/cpia2/cpia2_usb.c
@@ -662,7 +662,6 @@
 		}
 		urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL);
 		if (!urb) {
-			ERR("%s: usb_alloc_urb error!\n", __func__);
 			for (j = 0; j < i; j++)
 				usb_free_urb(cam->sbuf[j].urb);
 			return -ENOMEM;
diff --git a/drivers/media/usb/cx231xx/cx231xx-audio.c b/drivers/media/usb/cx231xx/cx231xx-audio.c
index a6a9508..4cd5fa9 100644
--- a/drivers/media/usb/cx231xx/cx231xx-audio.c
+++ b/drivers/media/usb/cx231xx/cx231xx-audio.c
@@ -293,7 +293,6 @@
 		memset(dev->adev.transfer_buffer[i], 0x80, sb_size);
 		urb = usb_alloc_urb(CX231XX_ISO_NUM_AUDIO_PACKETS, GFP_ATOMIC);
 		if (!urb) {
-			dev_err(dev->dev, "usb_alloc_urb failed!\n");
 			for (j = 0; j < i; j++) {
 				usb_free_urb(dev->adev.urb[j]);
 				kfree(dev->adev.transfer_buffer[j]);
@@ -355,7 +354,6 @@
 		memset(dev->adev.transfer_buffer[i], 0x80, sb_size);
 		urb = usb_alloc_urb(CX231XX_NUM_AUDIO_PACKETS, GFP_ATOMIC);
 		if (!urb) {
-			dev_err(dev->dev, "usb_alloc_urb failed!\n");
 			for (j = 0; j < i; j++) {
 				usb_free_urb(dev->adev.urb[j]);
 				kfree(dev->adev.transfer_buffer[j]);
diff --git a/drivers/media/usb/cx231xx/cx231xx-core.c b/drivers/media/usb/cx231xx/cx231xx-core.c
index 630f4fc..8ec05cb 100644
--- a/drivers/media/usb/cx231xx/cx231xx-core.c
+++ b/drivers/media/usb/cx231xx/cx231xx-core.c
@@ -1035,8 +1035,6 @@
 	for (i = 0; i < dev->video_mode.isoc_ctl.num_bufs; i++) {
 		urb = usb_alloc_urb(max_packets, GFP_KERNEL);
 		if (!urb) {
-			dev_err(dev->dev,
-				"cannot alloc isoc_ctl.urb %i\n", i);
 			cx231xx_uninit_isoc(dev);
 			return -ENOMEM;
 		}
@@ -1172,8 +1170,6 @@
 	for (i = 0; i < dev->video_mode.bulk_ctl.num_bufs; i++) {
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			dev_err(dev->dev,
-				"cannot alloc bulk_ctl.urb %i\n", i);
 			cx231xx_uninit_bulk(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/media/usb/cx231xx/cx231xx-vbi.c b/drivers/media/usb/cx231xx/cx231xx-vbi.c
index 15bb573..76e9019 100644
--- a/drivers/media/usb/cx231xx/cx231xx-vbi.c
+++ b/drivers/media/usb/cx231xx/cx231xx-vbi.c
@@ -442,8 +442,6 @@
 
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			dev_err(dev->dev,
-				"cannot alloc bulk_ctl.urb %i\n", i);
 			cx231xx_uninit_vbi_isoc(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c
index bf890c3..2679797 100644
--- a/drivers/media/usb/dvb-usb/dib0700_core.c
+++ b/drivers/media/usb/dvb-usb/dib0700_core.c
@@ -783,10 +783,8 @@
 	/* Starting in firmware 1.20, the RC info is provided on a bulk pipe */
 
 	purb = usb_alloc_urb(0, GFP_KERNEL);
-	if (purb == NULL) {
-		err("rc usb alloc urb failed");
+	if (purb == NULL)
 		return -ENOMEM;
-	}
 
 	purb->transfer_buffer = kzalloc(RC_MSG_SIZE_V1_20, GFP_KERNEL);
 	if (purb->transfer_buffer == NULL) {
diff --git a/drivers/media/usb/em28xx/em28xx-audio.c b/drivers/media/usb/em28xx/em28xx-audio.c
index 49a5f95..78f3687 100644
--- a/drivers/media/usb/em28xx/em28xx-audio.c
+++ b/drivers/media/usb/em28xx/em28xx-audio.c
@@ -850,7 +850,6 @@
 
 		urb = usb_alloc_urb(npackets, GFP_ATOMIC);
 		if (!urb) {
-			em28xx_errdev("usb_alloc_urb failed!\n");
 			em28xx_audio_free_urb(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/media/usb/em28xx/em28xx-core.c b/drivers/media/usb/em28xx/em28xx-core.c
index 3745607..eebd5d7 100644
--- a/drivers/media/usb/em28xx/em28xx-core.c
+++ b/drivers/media/usb/em28xx/em28xx-core.c
@@ -934,7 +934,6 @@
 	for (i = 0; i < usb_bufs->num_bufs; i++) {
 		urb = usb_alloc_urb(usb_bufs->num_packets, GFP_KERNEL);
 		if (!urb) {
-			em28xx_err("cannot alloc usb_ctl.urb %i\n", i);
 			em28xx_uninit_usb_xfer(dev, mode);
 			return -ENOMEM;
 		}
diff --git a/drivers/media/usb/gspca/benq.c b/drivers/media/usb/gspca/benq.c
index 790baed..5fa67b7 100644
--- a/drivers/media/usb/gspca/benq.c
+++ b/drivers/media/usb/gspca/benq.c
@@ -95,10 +95,8 @@
 #define SD_NPKT 32
 	for (n = 0; n < 4; n++) {
 		urb = usb_alloc_urb(SD_NPKT, GFP_KERNEL);
-		if (!urb) {
-			pr_err("usb_alloc_urb failed\n");
+		if (!urb)
 			return -ENOMEM;
-		}
 		gspca_dev->urb[n] = urb;
 		urb->transfer_buffer = usb_alloc_coherent(gspca_dev->dev,
 						SD_PKT_SZ * SD_NPKT,
diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c
index b17bd7e..af2395a 100644
--- a/drivers/media/usb/gspca/gspca.c
+++ b/drivers/media/usb/gspca/gspca.c
@@ -795,10 +795,8 @@
 
 	for (n = 0; n < nurbs; n++) {
 		urb = usb_alloc_urb(npkt, GFP_KERNEL);
-		if (!urb) {
-			pr_err("usb_alloc_urb failed\n");
+		if (!urb)
 			return -ENOMEM;
-		}
 		gspca_dev->urb[n] = urb;
 		urb->transfer_buffer = usb_alloc_coherent(gspca_dev->dev,
 						bsize,
diff --git a/drivers/media/usb/gspca/konica.c b/drivers/media/usb/gspca/konica.c
index 0712b1b..40aaaa9 100644
--- a/drivers/media/usb/gspca/konica.c
+++ b/drivers/media/usb/gspca/konica.c
@@ -208,10 +208,8 @@
 		packet_size =
 			le16_to_cpu(alt->endpoint[i].desc.wMaxPacketSize);
 		urb = usb_alloc_urb(SD_NPKT, GFP_KERNEL);
-		if (!urb) {
-			pr_err("usb_alloc_urb failed\n");
+		if (!urb)
 			return -ENOMEM;
-		}
 		gspca_dev->urb[n] = urb;
 		urb->transfer_buffer = usb_alloc_coherent(gspca_dev->dev,
 						packet_size * SD_NPKT,
diff --git a/drivers/media/usb/hackrf/hackrf.c b/drivers/media/usb/hackrf/hackrf.c
index b1e229a..c2c8d12 100644
--- a/drivers/media/usb/hackrf/hackrf.c
+++ b/drivers/media/usb/hackrf/hackrf.c
@@ -691,7 +691,6 @@
 		dev_dbg(dev->dev, "alloc urb=%d\n", i);
 		dev->urb_list[i] = usb_alloc_urb(0, GFP_ATOMIC);
 		if (!dev->urb_list[i]) {
-			dev_dbg(dev->dev, "failed\n");
 			for (j = 0; j < i; j++)
 				usb_free_urb(dev->urb_list[j]);
 			return -ENOMEM;
diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c
index 2a3a8b4..6d43d75 100644
--- a/drivers/media/usb/hdpvr/hdpvr-video.c
+++ b/drivers/media/usb/hdpvr/hdpvr-video.c
@@ -155,10 +155,8 @@
 		buf->dev = dev;
 
 		urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!urb) {
-			v4l2_err(&dev->v4l2_dev, "cannot allocate urb\n");
+		if (!urb)
 			goto exit_urb;
-		}
 		buf->urb = urb;
 
 		mem = usb_alloc_coherent(dev->udev, dev->bulk_in_size, GFP_KERNEL,
diff --git a/drivers/media/usb/msi2500/msi2500.c b/drivers/media/usb/msi2500/msi2500.c
index e7f167d..367eb7e 100644
--- a/drivers/media/usb/msi2500/msi2500.c
+++ b/drivers/media/usb/msi2500/msi2500.c
@@ -509,7 +509,6 @@
 	for (i = 0; i < MAX_ISO_BUFS; i++) {
 		urb = usb_alloc_urb(ISO_FRAMES_PER_DESC, GFP_KERNEL);
 		if (urb == NULL) {
-			dev_err(dev->dev, "Failed to allocate urb %d\n", i);
 			msi2500_isoc_cleanup(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c
index b51b27a..c4454c9 100644
--- a/drivers/media/usb/pwc/pwc-if.c
+++ b/drivers/media/usb/pwc/pwc-if.c
@@ -410,7 +410,6 @@
 	for (i = 0; i < MAX_ISO_BUFS; i++) {
 		urb = usb_alloc_urb(ISO_FRAMES_PER_DESC, GFP_KERNEL);
 		if (urb == NULL) {
-			PWC_ERROR("Failed to allocate urb %d\n", i);
 			pwc_isoc_cleanup(pdev);
 			return -ENOMEM;
 		}
diff --git a/drivers/media/usb/s2255/s2255drv.c b/drivers/media/usb/s2255/s2255drv.c
index 43ba71a..9458eb0 100644
--- a/drivers/media/usb/s2255/s2255drv.c
+++ b/drivers/media/usb/s2255/s2255drv.c
@@ -2113,11 +2113,8 @@
 	pipe_info->state = 1;
 	pipe_info->err_count = 0;
 	pipe_info->stream_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!pipe_info->stream_urb) {
-		dev_err(&dev->udev->dev,
-			"ReadStream: Unable to alloc URB\n");
+	if (!pipe_info->stream_urb)
 		return -ENOMEM;
-	}
 	/* transfer buffer allocated in board_init */
 	usb_fill_bulk_urb(pipe_info->stream_urb, dev->udev,
 			  pipe,
@@ -2290,10 +2287,8 @@
 	}
 
 	dev->fw_data->fw_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->fw_data->fw_urb) {
-		dev_err(&interface->dev, "out of memory!\n");
+	if (!dev->fw_data->fw_urb)
 		goto errorFWURB;
-	}
 
 	dev->fw_data->pfw_data = kzalloc(CHUNK_SIZE, GFP_KERNEL);
 	if (!dev->fw_data->pfw_data) {
diff --git a/drivers/media/usb/stk1160/stk1160-video.c b/drivers/media/usb/stk1160/stk1160-video.c
index 6ecb0b4..ce8ebbe 100644
--- a/drivers/media/usb/stk1160/stk1160-video.c
+++ b/drivers/media/usb/stk1160/stk1160-video.c
@@ -457,10 +457,8 @@
 	for (i = 0; i < num_bufs; i++) {
 
 		urb = usb_alloc_urb(max_packets, GFP_KERNEL);
-		if (!urb) {
-			stk1160_err("cannot alloc urb[%d]\n", i);
+		if (!urb)
 			goto free_i_bufs;
-		}
 		dev->isoc_ctl.urb[i] = urb;
 
 #ifndef CONFIG_DMA_NONCOHERENT
diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c
index c21c4c0..db200c9 100644
--- a/drivers/media/usb/stkwebcam/stk-webcam.c
+++ b/drivers/media/usb/stkwebcam/stk-webcam.c
@@ -452,10 +452,8 @@
 			STK_ERROR("isobuf data already allocated\n");
 		if (dev->isobufs[i].urb == NULL) {
 			urb = usb_alloc_urb(ISO_FRAMES_PER_DESC, GFP_KERNEL);
-			if (urb == NULL) {
-				STK_ERROR("Failed to allocate URB %d\n", i);
+			if (urb == NULL)
 				goto isobufs_out;
-			}
 			dev->isobufs[i].urb = urb;
 		} else {
 			STK_ERROR("Killing URB\n");
diff --git a/drivers/media/usb/tm6000/tm6000-dvb.c b/drivers/media/usb/tm6000/tm6000-dvb.c
index 095f5db..0426b21 100644
--- a/drivers/media/usb/tm6000/tm6000-dvb.c
+++ b/drivers/media/usb/tm6000/tm6000-dvb.c
@@ -129,10 +129,8 @@
 	}
 
 	dvb->bulk_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (dvb->bulk_urb == NULL) {
-		printk(KERN_ERR "tm6000: couldn't allocate urb\n");
+	if (dvb->bulk_urb == NULL)
 		return -ENOMEM;
-	}
 
 	pipe = usb_rcvbulkpipe(dev->udev, dev->bulk_in.endp->desc.bEndpointAddress
 							  & USB_ENDPOINT_NUMBER_MASK);
diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c
index fa5e8bd..dee7e7d 100644
--- a/drivers/media/usb/tm6000/tm6000-video.c
+++ b/drivers/media/usb/tm6000/tm6000-video.c
@@ -635,7 +635,6 @@
 	for (i = 0; i < dev->isoc_ctl.num_bufs; i++) {
 		urb = usb_alloc_urb(max_packets, GFP_KERNEL);
 		if (!urb) {
-			tm6000_err("cannot alloc isoc_ctl.urb %i\n", i);
 			tm6000_uninit_isoc(dev);
 			usb_free_urb(urb);
 			return -ENOMEM;
diff --git a/drivers/media/usb/usbvision/usbvision-core.c b/drivers/media/usb/usbvision/usbvision-core.c
index 52ac439..c23bf73 100644
--- a/drivers/media/usb/usbvision/usbvision-core.c
+++ b/drivers/media/usb/usbvision/usbvision-core.c
@@ -2303,11 +2303,8 @@
 		struct urb *urb;
 
 		urb = usb_alloc_urb(USBVISION_URB_FRAMES, GFP_KERNEL);
-		if (urb == NULL) {
-			dev_err(&usbvision->dev->dev,
-				"%s: usb_alloc_urb() failed\n", __func__);
+		if (urb == NULL)
 			return -ENOMEM;
-		}
 		usbvision->sbuf[buf_idx].urb = urb;
 		usbvision->sbuf[buf_idx].data =
 			usb_alloc_coherent(usbvision->dev,
diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c
index 7433ba5..cc128db 100644
--- a/drivers/media/usb/zr364xx/zr364xx.c
+++ b/drivers/media/usb/zr364xx/zr364xx.c
@@ -1045,10 +1045,8 @@
 	pipe_info->state = 1;
 	pipe_info->err_count = 0;
 	pipe_info->stream_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!pipe_info->stream_urb) {
-		dev_err(&cam->udev->dev, "ReadStream: Unable to alloc URB\n");
+	if (!pipe_info->stream_urb)
 		return -ENOMEM;
-	}
 	/* transfer buffer allocated in board_init */
 	usb_fill_bulk_urb(pipe_info->stream_urb, cam->udev,
 			  pipe,
diff --git a/drivers/memory/of_memory.c b/drivers/memory/of_memory.c
index 9daf94b..568f05e 100644
--- a/drivers/memory/of_memory.c
+++ b/drivers/memory/of_memory.c
@@ -16,6 +16,7 @@
 #include <linux/gfp.h>
 #include <memory/jedec_ddr.h>
 #include <linux/export.h>
+#include "of_memory.h"
 
 /**
  * of_get_min_tck() - extract min timing values for ddr
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index 869c83f..f00f3e7 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -2185,7 +2185,7 @@
 	return 0;
 }
 
-static int gpmc_probe_dt_children(struct platform_device *pdev)
+static void gpmc_probe_dt_children(struct platform_device *pdev)
 {
 	int ret;
 	struct device_node *child;
@@ -2200,11 +2200,11 @@
 		else
 			ret = gpmc_probe_generic_child(pdev, child);
 
-		if (ret)
-			return ret;
+		if (ret) {
+			dev_err(&pdev->dev, "failed to probe DT child '%s': %d\n",
+				child->name, ret);
+		}
 	}
-
-	return 0;
 }
 #else
 static int gpmc_probe_dt(struct platform_device *pdev)
@@ -2212,9 +2212,8 @@
 	return 0;
 }
 
-static int gpmc_probe_dt_children(struct platform_device *pdev)
+static void gpmc_probe_dt_children(struct platform_device *pdev)
 {
-	return 0;
 }
 #endif /* CONFIG_OF */
 
@@ -2369,16 +2368,10 @@
 		goto setup_irq_failed;
 	}
 
-	rc = gpmc_probe_dt_children(pdev);
-	if (rc < 0) {
-		dev_err(gpmc->dev, "failed to probe DT children\n");
-		goto dt_children_failed;
-	}
+	gpmc_probe_dt_children(pdev);
 
 	return 0;
 
-dt_children_failed:
-	gpmc_free_irq(gpmc);
 setup_irq_failed:
 	gpmc_gpio_exit(gpmc);
 gpio_init_failed:
diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
index bd3aa45..c8dee47 100644
--- a/drivers/mfd/lpc_ich.c
+++ b/drivers/mfd/lpc_ich.c
@@ -984,6 +984,10 @@
 	int ret;
 	struct resource *res;
 
+	/* If we have ACPI based watchdog use that instead */
+	if (acpi_has_watchdog())
+		return -ENODEV;
+
 	/* Setup power management base register */
 	pci_read_config_dword(dev, priv->abase, &base_addr_cfg);
 	base_addr = base_addr_cfg & 0x0000ff80;
diff --git a/drivers/mfd/tps65218.c b/drivers/mfd/tps65218.c
index 80b9dc3..ba610ad 100644
--- a/drivers/mfd/tps65218.c
+++ b/drivers/mfd/tps65218.c
@@ -219,6 +219,7 @@
 	struct tps65218 *tps;
 	const struct of_device_id *match;
 	int ret;
+	unsigned int chipid;
 
 	match = of_match_device(of_tps65218_match_table, &client->dev);
 	if (!match) {
@@ -250,6 +251,14 @@
 	if (ret < 0)
 		return ret;
 
+	ret = tps65218_reg_read(tps, TPS65218_REG_CHIPID, &chipid);
+	if (ret) {
+		dev_err(tps->dev, "Failed to read chipid: %d\n", ret);
+		return ret;
+	}
+
+	tps->rev = chipid & TPS65218_CHIPID_REV_MASK;
+
 	ret = of_platform_populate(client->dev.of_node, NULL, NULL,
 				   &client->dev);
 	if (ret < 0)
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index a216b46..64971ba 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -345,16 +345,6 @@
 	  This driver can also be built as a module.  If so, the module
 	  will be called tsl2550.
 
-config SENSORS_BH1780
-	tristate "ROHM BH1780GLI ambient light sensor"
-	depends on I2C && SYSFS
-	help
-	  If you say yes here you get support for the ROHM BH1780GLI
-	  ambient light sensor.
-
-	  This driver can also be built as a module.  If so, the module
-	  will be called bh1780gli.
-
 config SENSORS_BH1770
          tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor"
          depends on I2C
@@ -439,34 +429,6 @@
 	  line and the Linux version on the second line, but that's
 	  still useful.
 
-config BMP085
-	tristate
-	depends on SYSFS
-
-config BMP085_I2C
-	tristate "BMP085 digital pressure sensor on I2C"
-	select BMP085
-	select REGMAP_I2C
-	depends on I2C && SYSFS
-	help
-	  Say Y here if you want to support Bosch Sensortec's digital pressure
-	  sensor hooked to an I2C bus.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called bmp085-i2c.
-
-config BMP085_SPI
-	tristate "BMP085 digital pressure sensor on SPI"
-	select BMP085
-	select REGMAP_SPI
-	depends on SPI_MASTER && SYSFS
-	help
-	  Say Y here if you want to support Bosch Sensortec's digital pressure
-	  sensor hooked to an SPI bus.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called bmp085-spi.
-
 config PCH_PHUB
 	tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) PHUB"
 	select GENERIC_NET_UTILS
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 7410c6d..3198336 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -9,9 +9,6 @@
 obj-$(CONFIG_INTEL_MID_PTI)	+= pti.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
 obj-$(CONFIG_ATMEL_TCLIB)	+= atmel_tclib.o
-obj-$(CONFIG_BMP085)		+= bmp085.o
-obj-$(CONFIG_BMP085_I2C)	+= bmp085-i2c.o
-obj-$(CONFIG_BMP085_SPI)	+= bmp085-spi.o
 obj-$(CONFIG_DUMMY_IRQ)		+= dummy-irq.o
 obj-$(CONFIG_ICS932S401)	+= ics932s401.o
 obj-$(CONFIG_LKDTM)		+= lkdtm.o
@@ -19,7 +16,6 @@
 obj-$(CONFIG_TIFM_7XX1)       	+= tifm_7xx1.o
 obj-$(CONFIG_PHANTOM)		+= phantom.o
 obj-$(CONFIG_QCOM_COINCELL)	+= qcom-coincell.o
-obj-$(CONFIG_SENSORS_BH1780)	+= bh1780gli.o
 obj-$(CONFIG_SENSORS_BH1770)	+= bh1770glc.o
 obj-$(CONFIG_SENSORS_APDS990X)	+= apds990x.o
 obj-$(CONFIG_SGI_IOC4)		+= ioc4.o
diff --git a/drivers/misc/bh1780gli.c b/drivers/misc/bh1780gli.c
deleted file mode 100644
index 7f90ce5..0000000
--- a/drivers/misc/bh1780gli.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * bh1780gli.c
- * ROHM Ambient Light Sensor Driver
- *
- * Copyright (C) 2010 Texas Instruments
- * Author: Hemanth V <hemanthv@ti.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-#include <linux/i2c.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/module.h>
-#include <linux/of.h>
-
-#define BH1780_REG_CONTROL	0x80
-#define BH1780_REG_PARTID	0x8A
-#define BH1780_REG_MANFID	0x8B
-#define BH1780_REG_DLOW	0x8C
-#define BH1780_REG_DHIGH	0x8D
-
-#define BH1780_REVMASK		(0xf)
-#define BH1780_POWMASK		(0x3)
-#define BH1780_POFF		(0x0)
-#define BH1780_PON		(0x3)
-
-/* power on settling time in ms */
-#define BH1780_PON_DELAY	2
-
-struct bh1780_data {
-	struct i2c_client *client;
-	int power_state;
-	/* lock for sysfs operations */
-	struct mutex lock;
-};
-
-static int bh1780_write(struct bh1780_data *ddata, u8 reg, u8 val, char *msg)
-{
-	int ret = i2c_smbus_write_byte_data(ddata->client, reg, val);
-	if (ret < 0)
-		dev_err(&ddata->client->dev,
-			"i2c_smbus_write_byte_data failed error %d Register (%s)\n",
-			ret, msg);
-	return ret;
-}
-
-static int bh1780_read(struct bh1780_data *ddata, u8 reg, char *msg)
-{
-	int ret = i2c_smbus_read_byte_data(ddata->client, reg);
-	if (ret < 0)
-		dev_err(&ddata->client->dev,
-			"i2c_smbus_read_byte_data failed error %d Register (%s)\n",
-			ret, msg);
-	return ret;
-}
-
-static ssize_t bh1780_show_lux(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct bh1780_data *ddata = platform_get_drvdata(pdev);
-	int lsb, msb;
-
-	lsb = bh1780_read(ddata, BH1780_REG_DLOW, "DLOW");
-	if (lsb < 0)
-		return lsb;
-
-	msb = bh1780_read(ddata, BH1780_REG_DHIGH, "DHIGH");
-	if (msb < 0)
-		return msb;
-
-	return sprintf(buf, "%d\n", (msb << 8) | lsb);
-}
-
-static ssize_t bh1780_show_power_state(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct bh1780_data *ddata = platform_get_drvdata(pdev);
-	int state;
-
-	state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL");
-	if (state < 0)
-		return state;
-
-	return sprintf(buf, "%d\n", state & BH1780_POWMASK);
-}
-
-static ssize_t bh1780_store_power_state(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t count)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct bh1780_data *ddata = platform_get_drvdata(pdev);
-	unsigned long val;
-	int error;
-
-	error = kstrtoul(buf, 0, &val);
-	if (error)
-		return error;
-
-	if (val < BH1780_POFF || val > BH1780_PON)
-		return -EINVAL;
-
-	mutex_lock(&ddata->lock);
-
-	error = bh1780_write(ddata, BH1780_REG_CONTROL, val, "CONTROL");
-	if (error < 0) {
-		mutex_unlock(&ddata->lock);
-		return error;
-	}
-
-	msleep(BH1780_PON_DELAY);
-	ddata->power_state = val;
-	mutex_unlock(&ddata->lock);
-
-	return count;
-}
-
-static DEVICE_ATTR(lux, S_IRUGO, bh1780_show_lux, NULL);
-
-static DEVICE_ATTR(power_state, S_IWUSR | S_IRUGO,
-		bh1780_show_power_state, bh1780_store_power_state);
-
-static struct attribute *bh1780_attributes[] = {
-	&dev_attr_power_state.attr,
-	&dev_attr_lux.attr,
-	NULL
-};
-
-static const struct attribute_group bh1780_attr_group = {
-	.attrs = bh1780_attributes,
-};
-
-static int bh1780_probe(struct i2c_client *client,
-						const struct i2c_device_id *id)
-{
-	int ret;
-	struct bh1780_data *ddata;
-	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-
-	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE))
-		return -EIO;
-
-	ddata = devm_kzalloc(&client->dev, sizeof(struct bh1780_data),
-			     GFP_KERNEL);
-	if (ddata == NULL)
-		return -ENOMEM;
-
-	ddata->client = client;
-	i2c_set_clientdata(client, ddata);
-
-	ret = bh1780_read(ddata, BH1780_REG_PARTID, "PART ID");
-	if (ret < 0)
-		return ret;
-
-	dev_info(&client->dev, "Ambient Light Sensor, Rev : %d\n",
-			(ret & BH1780_REVMASK));
-
-	mutex_init(&ddata->lock);
-
-	return sysfs_create_group(&client->dev.kobj, &bh1780_attr_group);
-}
-
-static int bh1780_remove(struct i2c_client *client)
-{
-	sysfs_remove_group(&client->dev.kobj, &bh1780_attr_group);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int bh1780_suspend(struct device *dev)
-{
-	struct bh1780_data *ddata;
-	int state, ret;
-	struct i2c_client *client = to_i2c_client(dev);
-
-	ddata = i2c_get_clientdata(client);
-	state = bh1780_read(ddata, BH1780_REG_CONTROL, "CONTROL");
-	if (state < 0)
-		return state;
-
-	ddata->power_state = state & BH1780_POWMASK;
-
-	ret = bh1780_write(ddata, BH1780_REG_CONTROL, BH1780_POFF,
-				"CONTROL");
-
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int bh1780_resume(struct device *dev)
-{
-	struct bh1780_data *ddata;
-	int state, ret;
-	struct i2c_client *client = to_i2c_client(dev);
-
-	ddata = i2c_get_clientdata(client);
-	state = ddata->power_state;
-	ret = bh1780_write(ddata, BH1780_REG_CONTROL, state,
-				"CONTROL");
-
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-#endif /* CONFIG_PM_SLEEP */
-
-static SIMPLE_DEV_PM_OPS(bh1780_pm, bh1780_suspend, bh1780_resume);
-
-static const struct i2c_device_id bh1780_id[] = {
-	{ "bh1780", 0 },
-	{ },
-};
-
-MODULE_DEVICE_TABLE(i2c, bh1780_id);
-
-#ifdef CONFIG_OF
-static const struct of_device_id of_bh1780_match[] = {
-	{ .compatible = "rohm,bh1780gli", },
-	{},
-};
-
-MODULE_DEVICE_TABLE(of, of_bh1780_match);
-#endif
-
-static struct i2c_driver bh1780_driver = {
-	.probe		= bh1780_probe,
-	.remove		= bh1780_remove,
-	.id_table	= bh1780_id,
-	.driver = {
-		.name = "bh1780",
-		.pm	= &bh1780_pm,
-		.of_match_table = of_match_ptr(of_bh1780_match),
-	},
-};
-
-module_i2c_driver(bh1780_driver);
-
-MODULE_DESCRIPTION("BH1780GLI Ambient Light Sensor Driver");
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Hemanth V <hemanthv@ti.com>");
diff --git a/drivers/misc/bmp085-i2c.c b/drivers/misc/bmp085-i2c.c
deleted file mode 100644
index f35c218..0000000
--- a/drivers/misc/bmp085-i2c.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2012  Bosch Sensortec GmbH
- * Copyright (c) 2012  Unixphere AB
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/i2c.h>
-#include <linux/err.h>
-#include "bmp085.h"
-
-#define BMP085_I2C_ADDRESS	0x77
-
-static const unsigned short normal_i2c[] = { BMP085_I2C_ADDRESS,
-							I2C_CLIENT_END };
-
-static int bmp085_i2c_detect(struct i2c_client *client,
-			     struct i2c_board_info *info)
-{
-	if (client->addr != BMP085_I2C_ADDRESS)
-		return -ENODEV;
-
-	return bmp085_detect(&client->dev);
-}
-
-static int bmp085_i2c_probe(struct i2c_client *client,
-				      const struct i2c_device_id *id)
-{
-	int err;
-	struct regmap *regmap = devm_regmap_init_i2c(client,
-						     &bmp085_regmap_config);
-
-	if (IS_ERR(regmap)) {
-		err = PTR_ERR(regmap);
-		dev_err(&client->dev, "Failed to init regmap: %d\n", err);
-		return err;
-	}
-
-	return bmp085_probe(&client->dev, regmap, client->irq);
-}
-
-static int bmp085_i2c_remove(struct i2c_client *client)
-{
-	return bmp085_remove(&client->dev);
-}
-
-static const struct i2c_device_id bmp085_id[] = {
-	{ BMP085_NAME, 0 },
-	{ "bmp180", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, bmp085_id);
-
-static struct i2c_driver bmp085_i2c_driver = {
-	.driver = {
-		.name	= BMP085_NAME,
-	},
-	.id_table	= bmp085_id,
-	.probe		= bmp085_i2c_probe,
-	.remove		= bmp085_i2c_remove,
-
-	.detect		= bmp085_i2c_detect,
-	.address_list	= normal_i2c
-};
-
-module_i2c_driver(bmp085_i2c_driver);
-
-MODULE_AUTHOR("Eric Andersson <eric.andersson@unixphere.com>");
-MODULE_DESCRIPTION("BMP085 I2C bus driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085-spi.c b/drivers/misc/bmp085-spi.c
deleted file mode 100644
index 17ecbf9..0000000
--- a/drivers/misc/bmp085-spi.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2012  Bosch Sensortec GmbH
- * Copyright (c) 2012  Unixphere AB
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/spi/spi.h>
-#include <linux/err.h>
-#include "bmp085.h"
-
-static int bmp085_spi_probe(struct spi_device *client)
-{
-	int err;
-	struct regmap *regmap;
-
-	client->bits_per_word = 8;
-	err = spi_setup(client);
-	if (err < 0) {
-		dev_err(&client->dev, "spi_setup failed!\n");
-		return err;
-	}
-
-	regmap = devm_regmap_init_spi(client, &bmp085_regmap_config);
-	if (IS_ERR(regmap)) {
-		err = PTR_ERR(regmap);
-		dev_err(&client->dev, "Failed to init regmap: %d\n", err);
-		return err;
-	}
-
-	return bmp085_probe(&client->dev, regmap, client->irq);
-}
-
-static int bmp085_spi_remove(struct spi_device *client)
-{
-	return bmp085_remove(&client->dev);
-}
-
-static const struct of_device_id bmp085_of_match[] = {
-	{ .compatible = "bosch,bmp085", },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, bmp085_of_match);
-
-static const struct spi_device_id bmp085_id[] = {
-	{ "bmp180", 0 },
-	{ "bmp181", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(spi, bmp085_id);
-
-static struct spi_driver bmp085_spi_driver = {
-	.driver = {
-		.name	= BMP085_NAME,
-		.of_match_table = bmp085_of_match
-	},
-	.id_table	= bmp085_id,
-	.probe		= bmp085_spi_probe,
-	.remove		= bmp085_spi_remove
-};
-
-module_spi_driver(bmp085_spi_driver);
-
-MODULE_AUTHOR("Eric Andersson <eric.andersson@unixphere.com>");
-MODULE_DESCRIPTION("BMP085 SPI bus driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085.c b/drivers/misc/bmp085.c
deleted file mode 100644
index 9b313f7..0000000
--- a/drivers/misc/bmp085.c
+++ /dev/null
@@ -1,506 +0,0 @@
-/*  Copyright (c) 2010  Christoph Mair <christoph.mair@gmail.com>
- *  Copyright (c) 2012  Bosch Sensortec GmbH
- *  Copyright (c) 2012  Unixphere AB
- *
- *  This driver supports the bmp085 and bmp18x digital barometric pressure
- *  and temperature sensors from Bosch Sensortec. The datasheets
- *  are available from their website:
- *  http://www.bosch-sensortec.com/content/language1/downloads/BST-BMP085-DS000-05.pdf
- *  http://www.bosch-sensortec.com/content/language1/downloads/BST-BMP180-DS000-07.pdf
- *
- *  A pressure measurement is issued by reading from pressure0_input.
- *  The return value ranges from 30000 to 110000 pascal with a resulution
- *  of 1 pascal (0.01 millibar) which enables measurements from 9000m above
- *  to 500m below sea level.
- *
- *  The temperature can be read from temp0_input. Values range from
- *  -400 to 850 representing the ambient temperature in degree celsius
- *  multiplied by 10.The resolution is 0.1 celsius.
- *
- *  Because ambient pressure is temperature dependent, a temperature
- *  measurement will be executed automatically even if the user is reading
- *  from pressure0_input. This happens if the last temperature measurement
- *  has been executed more then one second ago.
- *
- *  To decrease RMS noise from pressure measurements, the bmp085 can
- *  autonomously calculate the average of up to eight samples. This is
- *  set up by writing to the oversampling sysfs file. Accepted values
- *  are 0, 1, 2 and 3. 2^x when x is the value written to this file
- *  specifies the number of samples used to calculate the ambient pressure.
- *  RMS noise is specified with six pascal (without averaging) and decreases
- *  down to 3 pascal when using an oversampling setting of 3.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program; if not, write to the Free Software
- *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include "bmp085.h"
-#include <linux/interrupt.h>
-#include <linux/completion.h>
-#include <linux/gpio.h>
-
-#define BMP085_CHIP_ID			0x55
-#define BMP085_CALIBRATION_DATA_START	0xAA
-#define BMP085_CALIBRATION_DATA_LENGTH	11	/* 16 bit values */
-#define BMP085_CHIP_ID_REG		0xD0
-#define BMP085_CTRL_REG			0xF4
-#define BMP085_TEMP_MEASUREMENT		0x2E
-#define BMP085_PRESSURE_MEASUREMENT	0x34
-#define BMP085_CONVERSION_REGISTER_MSB	0xF6
-#define BMP085_CONVERSION_REGISTER_LSB	0xF7
-#define BMP085_CONVERSION_REGISTER_XLSB	0xF8
-#define BMP085_TEMP_CONVERSION_TIME	5
-
-struct bmp085_calibration_data {
-	s16 AC1, AC2, AC3;
-	u16 AC4, AC5, AC6;
-	s16 B1, B2;
-	s16 MB, MC, MD;
-};
-
-struct bmp085_data {
-	struct	device *dev;
-	struct  regmap *regmap;
-	struct	mutex lock;
-	struct	bmp085_calibration_data calibration;
-	u8	oversampling_setting;
-	u32	raw_temperature;
-	u32	raw_pressure;
-	u32	temp_measurement_period;
-	unsigned long last_temp_measurement;
-	u8	chip_id;
-	s32	b6; /* calculated temperature correction coefficient */
-	int	irq;
-	struct	completion done;
-};
-
-static irqreturn_t bmp085_eoc_isr(int irq, void *devid)
-{
-	struct bmp085_data *data = devid;
-
-	complete(&data->done);
-
-	return IRQ_HANDLED;
-}
-
-static s32 bmp085_read_calibration_data(struct bmp085_data *data)
-{
-	u16 tmp[BMP085_CALIBRATION_DATA_LENGTH];
-	struct bmp085_calibration_data *cali = &(data->calibration);
-	s32 status = regmap_bulk_read(data->regmap,
-				BMP085_CALIBRATION_DATA_START, (u8 *)tmp,
-				(BMP085_CALIBRATION_DATA_LENGTH << 1));
-	if (status < 0)
-		return status;
-
-	cali->AC1 =  be16_to_cpu(tmp[0]);
-	cali->AC2 =  be16_to_cpu(tmp[1]);
-	cali->AC3 =  be16_to_cpu(tmp[2]);
-	cali->AC4 =  be16_to_cpu(tmp[3]);
-	cali->AC5 =  be16_to_cpu(tmp[4]);
-	cali->AC6 = be16_to_cpu(tmp[5]);
-	cali->B1 = be16_to_cpu(tmp[6]);
-	cali->B2 = be16_to_cpu(tmp[7]);
-	cali->MB = be16_to_cpu(tmp[8]);
-	cali->MC = be16_to_cpu(tmp[9]);
-	cali->MD = be16_to_cpu(tmp[10]);
-	return 0;
-}
-
-static s32 bmp085_update_raw_temperature(struct bmp085_data *data)
-{
-	u16 tmp;
-	s32 status;
-
-	mutex_lock(&data->lock);
-
-	init_completion(&data->done);
-
-	status = regmap_write(data->regmap, BMP085_CTRL_REG,
-			      BMP085_TEMP_MEASUREMENT);
-	if (status < 0) {
-		dev_err(data->dev,
-			"Error while requesting temperature measurement.\n");
-		goto exit;
-	}
-	wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
-					    BMP085_TEMP_CONVERSION_TIME));
-
-	status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
-				 &tmp, sizeof(tmp));
-	if (status < 0) {
-		dev_err(data->dev,
-			"Error while reading temperature measurement result\n");
-		goto exit;
-	}
-	data->raw_temperature = be16_to_cpu(tmp);
-	data->last_temp_measurement = jiffies;
-	status = 0;	/* everything ok, return 0 */
-
-exit:
-	mutex_unlock(&data->lock);
-	return status;
-}
-
-static s32 bmp085_update_raw_pressure(struct bmp085_data *data)
-{
-	u32 tmp = 0;
-	s32 status;
-
-	mutex_lock(&data->lock);
-
-	init_completion(&data->done);
-
-	status = regmap_write(data->regmap, BMP085_CTRL_REG,
-			BMP085_PRESSURE_MEASUREMENT +
-			(data->oversampling_setting << 6));
-	if (status < 0) {
-		dev_err(data->dev,
-			"Error while requesting pressure measurement.\n");
-		goto exit;
-	}
-
-	/* wait for the end of conversion */
-	wait_for_completion_timeout(&data->done, 1 + msecs_to_jiffies(
-					2+(3 << data->oversampling_setting)));
-	/* copy data into a u32 (4 bytes), but skip the first byte. */
-	status = regmap_bulk_read(data->regmap, BMP085_CONVERSION_REGISTER_MSB,
-				 ((u8 *)&tmp)+1, 3);
-	if (status < 0) {
-		dev_err(data->dev,
-			"Error while reading pressure measurement results\n");
-		goto exit;
-	}
-	data->raw_pressure = be32_to_cpu((tmp));
-	data->raw_pressure >>= (8-data->oversampling_setting);
-	status = 0;	/* everything ok, return 0 */
-
-exit:
-	mutex_unlock(&data->lock);
-	return status;
-}
-
-/*
- * This function starts the temperature measurement and returns the value
- * in tenth of a degree celsius.
- */
-static s32 bmp085_get_temperature(struct bmp085_data *data, int *temperature)
-{
-	struct bmp085_calibration_data *cali = &data->calibration;
-	long x1, x2;
-	int status;
-
-	status = bmp085_update_raw_temperature(data);
-	if (status < 0)
-		goto exit;
-
-	x1 = ((data->raw_temperature - cali->AC6) * cali->AC5) >> 15;
-	x2 = (cali->MC << 11) / (x1 + cali->MD);
-	data->b6 = x1 + x2 - 4000;
-	/* if NULL just update b6. Used for pressure only measurements */
-	if (temperature != NULL)
-		*temperature = (x1+x2+8) >> 4;
-
-exit:
-	return status;
-}
-
-/*
- * This function starts the pressure measurement and returns the value
- * in millibar. Since the pressure depends on the ambient temperature,
- * a temperature measurement is executed according to the given temperature
- * measurement period (default is 1 sec boundary). This period could vary
- * and needs to be adjusted according to the sensor environment, i.e. if big
- * temperature variations then the temperature needs to be read out often.
- */
-static s32 bmp085_get_pressure(struct bmp085_data *data, int *pressure)
-{
-	struct bmp085_calibration_data *cali = &data->calibration;
-	s32 x1, x2, x3, b3;
-	u32 b4, b7;
-	s32 p;
-	int status;
-
-	/* alt least every second force an update of the ambient temperature */
-	if ((data->last_temp_measurement == 0) ||
-	    time_is_before_jiffies(data->last_temp_measurement + 1*HZ)) {
-		status = bmp085_get_temperature(data, NULL);
-		if (status < 0)
-			return status;
-	}
-
-	status = bmp085_update_raw_pressure(data);
-	if (status < 0)
-		return status;
-
-	x1 = (data->b6 * data->b6) >> 12;
-	x1 *= cali->B2;
-	x1 >>= 11;
-
-	x2 = cali->AC2 * data->b6;
-	x2 >>= 11;
-
-	x3 = x1 + x2;
-
-	b3 = (((((s32)cali->AC1) * 4 + x3) << data->oversampling_setting) + 2);
-	b3 >>= 2;
-
-	x1 = (cali->AC3 * data->b6) >> 13;
-	x2 = (cali->B1 * ((data->b6 * data->b6) >> 12)) >> 16;
-	x3 = (x1 + x2 + 2) >> 2;
-	b4 = (cali->AC4 * (u32)(x3 + 32768)) >> 15;
-
-	b7 = ((u32)data->raw_pressure - b3) *
-					(50000 >> data->oversampling_setting);
-	p = ((b7 < 0x80000000) ? ((b7 << 1) / b4) : ((b7 / b4) * 2));
-
-	x1 = p >> 8;
-	x1 *= x1;
-	x1 = (x1 * 3038) >> 16;
-	x2 = (-7357 * p) >> 16;
-	p += (x1 + x2 + 3791) >> 4;
-
-	*pressure = p;
-
-	return 0;
-}
-
-/*
- * This function sets the chip-internal oversampling. Valid values are 0..3.
- * The chip will use 2^oversampling samples for internal averaging.
- * This influences the measurement time and the accuracy; larger values
- * increase both. The datasheet gives an overview on how measurement time,
- * accuracy and noise correlate.
- */
-static void bmp085_set_oversampling(struct bmp085_data *data,
-						unsigned char oversampling)
-{
-	if (oversampling > 3)
-		oversampling = 3;
-	data->oversampling_setting = oversampling;
-}
-
-/*
- * Returns the currently selected oversampling. Range: 0..3
- */
-static unsigned char bmp085_get_oversampling(struct bmp085_data *data)
-{
-	return data->oversampling_setting;
-}
-
-/* sysfs callbacks */
-static ssize_t set_oversampling(struct device *dev,
-				struct device_attribute *attr,
-				const char *buf, size_t count)
-{
-	struct bmp085_data *data = dev_get_drvdata(dev);
-	unsigned long oversampling;
-	int err = kstrtoul(buf, 10, &oversampling);
-
-	if (err == 0) {
-		mutex_lock(&data->lock);
-		bmp085_set_oversampling(data, oversampling);
-		mutex_unlock(&data->lock);
-		return count;
-	}
-
-	return err;
-}
-
-static ssize_t show_oversampling(struct device *dev,
-				 struct device_attribute *attr, char *buf)
-{
-	struct bmp085_data *data = dev_get_drvdata(dev);
-
-	return sprintf(buf, "%u\n", bmp085_get_oversampling(data));
-}
-static DEVICE_ATTR(oversampling, S_IWUSR | S_IRUGO,
-					show_oversampling, set_oversampling);
-
-
-static ssize_t show_temperature(struct device *dev,
-				struct device_attribute *attr, char *buf)
-{
-	int temperature;
-	int status;
-	struct bmp085_data *data = dev_get_drvdata(dev);
-
-	status = bmp085_get_temperature(data, &temperature);
-	if (status < 0)
-		return status;
-	else
-		return sprintf(buf, "%d\n", temperature);
-}
-static DEVICE_ATTR(temp0_input, S_IRUGO, show_temperature, NULL);
-
-
-static ssize_t show_pressure(struct device *dev,
-			     struct device_attribute *attr, char *buf)
-{
-	int pressure;
-	int status;
-	struct bmp085_data *data = dev_get_drvdata(dev);
-
-	status = bmp085_get_pressure(data, &pressure);
-	if (status < 0)
-		return status;
-	else
-		return sprintf(buf, "%d\n", pressure);
-}
-static DEVICE_ATTR(pressure0_input, S_IRUGO, show_pressure, NULL);
-
-
-static struct attribute *bmp085_attributes[] = {
-	&dev_attr_temp0_input.attr,
-	&dev_attr_pressure0_input.attr,
-	&dev_attr_oversampling.attr,
-	NULL
-};
-
-static const struct attribute_group bmp085_attr_group = {
-	.attrs = bmp085_attributes,
-};
-
-int bmp085_detect(struct device *dev)
-{
-	struct bmp085_data *data = dev_get_drvdata(dev);
-	unsigned int id;
-	int ret;
-
-	ret = regmap_read(data->regmap, BMP085_CHIP_ID_REG, &id);
-	if (ret < 0)
-		return ret;
-
-	if (id != data->chip_id)
-		return -ENODEV;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bmp085_detect);
-
-static void bmp085_get_of_properties(struct bmp085_data *data)
-{
-#ifdef CONFIG_OF
-	struct device_node *np = data->dev->of_node;
-	u32 prop;
-
-	if (!np)
-		return;
-
-	if (!of_property_read_u32(np, "chip-id", &prop))
-		data->chip_id = prop & 0xff;
-
-	if (!of_property_read_u32(np, "temp-measurement-period", &prop))
-		data->temp_measurement_period = (prop/100)*HZ;
-
-	if (!of_property_read_u32(np, "default-oversampling", &prop))
-		data->oversampling_setting = prop & 0xff;
-#endif
-}
-
-static int bmp085_init_client(struct bmp085_data *data)
-{
-	int status = bmp085_read_calibration_data(data);
-
-	if (status < 0)
-		return status;
-
-	/* default settings */
-	data->chip_id = BMP085_CHIP_ID;
-	data->last_temp_measurement = 0;
-	data->temp_measurement_period = 1*HZ;
-	data->oversampling_setting = 3;
-
-	bmp085_get_of_properties(data);
-
-	mutex_init(&data->lock);
-
-	return 0;
-}
-
-struct regmap_config bmp085_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8
-};
-EXPORT_SYMBOL_GPL(bmp085_regmap_config);
-
-int bmp085_probe(struct device *dev, struct regmap *regmap, int irq)
-{
-	struct bmp085_data *data;
-	int err = 0;
-
-	data = kzalloc(sizeof(struct bmp085_data), GFP_KERNEL);
-	if (!data) {
-		err = -ENOMEM;
-		goto exit;
-	}
-
-	dev_set_drvdata(dev, data);
-	data->dev = dev;
-	data->regmap = regmap;
-	data->irq = irq;
-
-	if (data->irq > 0) {
-		err = devm_request_irq(dev, data->irq, bmp085_eoc_isr,
-					      IRQF_TRIGGER_RISING, "bmp085",
-					      data);
-		if (err < 0)
-			goto exit_free;
-	}
-
-	/* Initialize the BMP085 chip */
-	err = bmp085_init_client(data);
-	if (err < 0)
-		goto exit_free;
-
-	err = bmp085_detect(dev);
-	if (err < 0) {
-		dev_err(dev, "%s: chip_id failed!\n", BMP085_NAME);
-		goto exit_free;
-	}
-
-	/* Register sysfs hooks */
-	err = sysfs_create_group(&dev->kobj, &bmp085_attr_group);
-	if (err)
-		goto exit_free;
-
-	dev_info(dev, "Successfully initialized %s!\n", BMP085_NAME);
-
-	return 0;
-
-exit_free:
-	kfree(data);
-exit:
-	return err;
-}
-EXPORT_SYMBOL_GPL(bmp085_probe);
-
-int bmp085_remove(struct device *dev)
-{
-	struct bmp085_data *data = dev_get_drvdata(dev);
-
-	sysfs_remove_group(&data->dev->kobj, &bmp085_attr_group);
-	kfree(data);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bmp085_remove);
-
-MODULE_AUTHOR("Christoph Mair <christoph.mair@gmail.com>");
-MODULE_DESCRIPTION("BMP085 driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/misc/bmp085.h b/drivers/misc/bmp085.h
deleted file mode 100644
index 8b8e3b1..0000000
--- a/drivers/misc/bmp085.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright (c) 2012  Bosch Sensortec GmbH
- * Copyright (c) 2012  Unixphere AB
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _BMP085_H
-#define _BMP085_H
-
-#include <linux/regmap.h>
-
-#define BMP085_NAME		"bmp085"
-
-extern struct regmap_config bmp085_regmap_config;
-
-int bmp085_probe(struct device *dev, struct regmap *regmap, int irq);
-int bmp085_remove(struct device *dev);
-int bmp085_detect(struct device *dev);
-
-#endif
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index bdee9a0..c466ee2 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -90,8 +90,7 @@
 	 */
 	mutex_lock(&afu->contexts_lock);
 	idr_preload(GFP_KERNEL);
-	i = idr_alloc(&ctx->afu->contexts_idr, ctx,
-		      ctx->afu->adapter->native->sl_ops->min_pe,
+	i = idr_alloc(&ctx->afu->contexts_idr, ctx, ctx->afu->adapter->min_pe,
 		      ctx->afu->num_procs, GFP_NOWAIT);
 	idr_preload_end();
 	mutex_unlock(&afu->contexts_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index de09053..344a0ff 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -561,7 +561,6 @@
 	u64 (*timebase_read)(struct cxl *adapter);
 	int capi_mode;
 	bool needs_reset_before_disable;
-	int min_pe;
 };
 
 struct cxl_native {
@@ -603,6 +602,7 @@
 	struct bin_attribute cxl_attr;
 	int adapter_num;
 	int user_irqs;
+	int min_pe;
 	u64 ps_size;
 	u16 psl_rev;
 	u16 base_image;
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 3bcdaee..e606fdc 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -924,7 +924,7 @@
 	return fail_psl_irq(afu, &irq_info);
 }
 
-void native_irq_wait(struct cxl_context *ctx)
+static void native_irq_wait(struct cxl_context *ctx)
 {
 	u64 dsisr;
 	int timeout = 1000;
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index d152e2d..6f0c4ac 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -379,7 +379,7 @@
 
 static int init_implementation_adapter_psl_regs(struct cxl *adapter, struct pci_dev *dev)
 {
-	u64 psl_dsnctl;
+	u64 psl_dsnctl, psl_fircntl;
 	u64 chipid;
 	u64 capp_unit_id;
 	int rc;
@@ -398,8 +398,11 @@
 	cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x20000000200ULL);
 	/* snoop write mask */
 	cxl_p1_write(adapter, CXL_PSL_SNWRALLOC, 0x00000000FFFFFFFFULL);
-	/* set fir_accum */
-	cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, 0x0800000000000000ULL);
+	/* set fir_cntl to recommended value for production env */
+	psl_fircntl = (0x2ULL << (63-3)); /* ce_report */
+	psl_fircntl |= (0x1ULL << (63-6)); /* FIR_report */
+	psl_fircntl |= 0x1ULL; /* ce_thresh */
+	cxl_p1_write(adapter, CXL_PSL_FIR_CNTL, psl_fircntl);
 	/* for debugging with trace arrays */
 	cxl_p1_write(adapter, CXL_PSL_TRACE, 0x0000FF7C00000000ULL);
 
@@ -1521,14 +1524,15 @@
 	.write_timebase_ctrl = write_timebase_ctrl_xsl,
 	.timebase_read = timebase_read_xsl,
 	.capi_mode = OPAL_PHB_CAPI_MODE_DMA,
-	.min_pe = 1, /* Workaround for Mellanox CX4 HW bug */
 };
 
 static void set_sl_ops(struct cxl *adapter, struct pci_dev *dev)
 {
 	if (dev->vendor == PCI_VENDOR_ID_MELLANOX && dev->device == 0x1013) {
+		/* Mellanox CX-4 */
 		dev_info(&adapter->dev, "Device uses an XSL\n");
 		adapter->native->sl_ops = &xsl_ops;
+		adapter->min_pe = 1; /* Workaround for CX-4 hardware bug */
 	} else {
 		dev_info(&adapter->dev, "Device uses a PSL\n");
 		adapter->native->sl_ops = &psl_ops;
diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index dee8def..3519ace 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -221,7 +221,7 @@
 	/* Setup the PHB using arch provided callback */
 	phb->ops = &cxl_pcie_pci_ops;
 	phb->cfg_addr = NULL;
-	phb->cfg_data = 0;
+	phb->cfg_data = NULL;
 	phb->private_data = afu;
 	phb->controller_ops = cxl_pci_controller_ops;
 
@@ -230,6 +230,11 @@
 	if (phb->bus == NULL)
 		return -ENXIO;
 
+	/* Set release hook on root bus */
+	pci_set_host_bridge_release(to_pci_host_bridge(phb->bus->bridge),
+				    pcibios_free_controller_deferred,
+				    (void *) phb);
+
 	/* Claim resources. This might need some rework as well depending
 	 * whether we are doing probe-only or not, like assigning unassigned
 	 * resources etc...
@@ -256,7 +261,10 @@
 	afu->phb = NULL;
 
 	pci_remove_root_bus(phb->bus);
-	pcibios_free_controller(phb);
+	/*
+	 * We don't free phb here - that's handled by
+	 * pcibios_free_controller_deferred()
+	 */
 }
 
 static bool _cxl_pci_is_vphb_device(struct pci_controller *phb)
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 2c6c7c8..5afe4cd 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -121,9 +121,8 @@
 	 * this chip is clocked very slowly
 	 */
 	status = spi_sync(at25->spi, &m);
-	dev_dbg(&at25->spi->dev,
-		"read %Zd bytes at %d --> %d\n",
-		count, offset, (int) status);
+	dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n",
+		count, offset, status);
 
 	mutex_unlock(&at25->lock);
 	return status;
@@ -167,8 +166,7 @@
 		*cp = AT25_WREN;
 		status = spi_write(at25->spi, cp, 1);
 		if (status < 0) {
-			dev_dbg(&at25->spi->dev, "WREN --> %d\n",
-					(int) status);
+			dev_dbg(&at25->spi->dev, "WREN --> %d\n", status);
 			break;
 		}
 
@@ -196,9 +194,8 @@
 		memcpy(cp, buf, segment);
 		status = spi_write(at25->spi, bounce,
 				segment + at25->addrlen + 1);
-		dev_dbg(&at25->spi->dev,
-				"write %u bytes at %u --> %d\n",
-				segment, offset, (int) status);
+		dev_dbg(&at25->spi->dev, "write %u bytes at %u --> %d\n",
+			segment, offset, status);
 		if (status < 0)
 			break;
 
@@ -225,8 +222,7 @@
 
 		if ((sr < 0) || (sr & AT25_SR_nRDY)) {
 			dev_err(&at25->spi->dev,
-				"write %d bytes offset %d, "
-				"timeout after %u msecs\n",
+				"write %u bytes offset %u, timeout after %u msecs\n",
 				segment, offset,
 				jiffies_to_msecs(jiffies -
 					(timeout - EE_TIMEOUT)));
@@ -368,9 +364,7 @@
 		return PTR_ERR(at25->nvmem);
 
 	dev_info(&spi->dev, "%d %s %s eeprom%s, pagesize %u\n",
-		(chip.byte_len < 1024)
-			? chip.byte_len
-			: (chip.byte_len / 1024),
+		(chip.byte_len < 1024) ? chip.byte_len : (chip.byte_len / 1024),
 		(chip.byte_len < 1024) ? "Byte" : "KByte",
 		at25->chip.name,
 		(chip.flags & EE_READONLY) ? " (readonly)" : "",
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index a70b853..6c1f49a 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -1351,6 +1351,19 @@
 };
 
 /**
+ * genwqe_devnode() - Set default access mode for genwqe devices.
+ *
+ * Default mode should be rw for everybody. Do not change default
+ * device name.
+ */
+static char *genwqe_devnode(struct device *dev, umode_t *mode)
+{
+	if (mode)
+		*mode = 0666;
+	return NULL;
+}
+
+/**
  * genwqe_init_module() - Driver registration and initialization
  */
 static int __init genwqe_init_module(void)
@@ -1363,6 +1376,8 @@
 		return -ENOMEM;
 	}
 
+	class_genwqe->devnode = genwqe_devnode;
+
 	debugfs_genwqe = debugfs_create_dir(GENWQE_DEVNAME, NULL);
 	if (!debugfs_genwqe) {
 		rc = -ENOMEM;
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
index 353ee0c..ddfeefe 100644
--- a/drivers/misc/genwqe/card_ddcb.c
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -1048,8 +1048,6 @@
 			"[%s] **err: could not allocate DDCB **\n", __func__);
 		return -ENOMEM;
 	}
-	memset(queue->ddcb_vaddr, 0, queue_size);
-
 	queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) *
 				  queue->ddcb_max, GFP_KERNEL);
 	if (!queue->ddcb_req) {
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 222367c..8a679ec 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -220,8 +220,8 @@
 	if (get_order(size) > MAX_ORDER)
 		return NULL;
 
-	return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
-				  GFP_KERNEL);
+	return dma_zalloc_coherent(&cd->pci_dev->dev, size, dma_handle,
+				   GFP_KERNEL);
 }
 
 void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c
index d6a901c..fea8ff4 100644
--- a/drivers/misc/hpilo.c
+++ b/drivers/misc/hpilo.c
@@ -688,7 +688,8 @@
 
 static int ilo_map_device(struct pci_dev *pdev, struct ilo_hwinfo *hw)
 {
-	int error = -ENOMEM;
+	int bar;
+	unsigned long off;
 
 	/* map the memory mapped i/o registers */
 	hw->mmio_vaddr = pci_iomap(pdev, 1, 0);
@@ -698,7 +699,15 @@
 	}
 
 	/* map the adapter shared memory region */
-	hw->ram_vaddr = pci_iomap(pdev, 2, max_ccb * ILOHW_CCB_SZ);
+	if (pdev->subsystem_device == 0x00E4) {
+		bar = 5;
+		/* Last 8k is reserved for CCBs */
+		off = pci_resource_len(pdev, bar) - 0x2000;
+	} else {
+		bar = 2;
+		off = 0;
+	}
+	hw->ram_vaddr = pci_iomap_range(pdev, bar, off, max_ccb * ILOHW_CCB_SZ);
 	if (hw->ram_vaddr == NULL) {
 		dev_err(&pdev->dev, "Error mapping shared mem\n");
 		goto mmio_free;
@@ -717,7 +726,7 @@
 mmio_free:
 	pci_iounmap(pdev, hw->mmio_vaddr);
 out:
-	return error;
+	return -ENOMEM;
 }
 
 static void ilo_remove(struct pci_dev *pdev)
@@ -899,7 +908,7 @@
 	class_destroy(ilo_class);
 }
 
-MODULE_VERSION("1.4.1");
+MODULE_VERSION("1.5.0");
 MODULE_ALIAS(ILO_NAME);
 MODULE_DESCRIPTION(ILO_NAME);
 MODULE_AUTHOR("David Altobelli <david.altobelli@hpe.com>");
diff --git a/drivers/misc/lkdtm_rodata.c b/drivers/misc/lkdtm_rodata.c
index 166b1db..3564477 100644
--- a/drivers/misc/lkdtm_rodata.c
+++ b/drivers/misc/lkdtm_rodata.c
@@ -4,7 +4,7 @@
  */
 #include "lkdtm.h"
 
-void lkdtm_rodata_do_nothing(void)
+void notrace lkdtm_rodata_do_nothing(void)
 {
 	/* Does nothing. We just want an architecture agnostic "return". */
 }
diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c
index 5525a20..1dd6114 100644
--- a/drivers/misc/lkdtm_usercopy.c
+++ b/drivers/misc/lkdtm_usercopy.c
@@ -9,7 +9,15 @@
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 
-static size_t cache_size = 1024;
+/*
+ * Many of the tests here end up using const sizes, but those would
+ * normally be ignored by hardened usercopy, so force the compiler
+ * into choosing the non-const path to make sure we trigger the
+ * hardened usercopy checks by added "unconst" to all the const copies,
+ * and making sure "cache_size" isn't optimized into a const.
+ */
+static volatile size_t unconst = 0;
+static volatile size_t cache_size = 1024;
 static struct kmem_cache *bad_cache;
 
 static const unsigned char test_text[] = "This is a test.\n";
@@ -67,14 +75,14 @@
 	if (to_user) {
 		pr_info("attempting good copy_to_user of local stack\n");
 		if (copy_to_user((void __user *)user_addr, good_stack,
-				 sizeof(good_stack))) {
+				 unconst + sizeof(good_stack))) {
 			pr_warn("copy_to_user failed unexpectedly?!\n");
 			goto free_user;
 		}
 
 		pr_info("attempting bad copy_to_user of distant stack\n");
 		if (copy_to_user((void __user *)user_addr, bad_stack,
-				 sizeof(good_stack))) {
+				 unconst + sizeof(good_stack))) {
 			pr_warn("copy_to_user failed, but lacked Oops\n");
 			goto free_user;
 		}
@@ -88,14 +96,14 @@
 
 		pr_info("attempting good copy_from_user of local stack\n");
 		if (copy_from_user(good_stack, (void __user *)user_addr,
-				   sizeof(good_stack))) {
+				   unconst + sizeof(good_stack))) {
 			pr_warn("copy_from_user failed unexpectedly?!\n");
 			goto free_user;
 		}
 
 		pr_info("attempting bad copy_from_user of distant stack\n");
 		if (copy_from_user(bad_stack, (void __user *)user_addr,
-				   sizeof(good_stack))) {
+				   unconst + sizeof(good_stack))) {
 			pr_warn("copy_from_user failed, but lacked Oops\n");
 			goto free_user;
 		}
@@ -109,7 +117,7 @@
 {
 	unsigned long user_addr;
 	unsigned char *one, *two;
-	const size_t size = 1024;
+	size_t size = unconst + 1024;
 
 	one = kmalloc(size, GFP_KERNEL);
 	two = kmalloc(size, GFP_KERNEL);
@@ -285,13 +293,14 @@
 
 	pr_info("attempting good copy_to_user from kernel rodata\n");
 	if (copy_to_user((void __user *)user_addr, test_text,
-			 sizeof(test_text))) {
+			 unconst + sizeof(test_text))) {
 		pr_warn("copy_to_user failed unexpectedly?!\n");
 		goto free_user;
 	}
 
 	pr_info("attempting bad copy_to_user from kernel text\n");
-	if (copy_to_user((void __user *)user_addr, vm_mmap, PAGE_SIZE)) {
+	if (copy_to_user((void __user *)user_addr, vm_mmap,
+			 unconst + PAGE_SIZE)) {
 		pr_warn("copy_to_user failed, but lacked Oops\n");
 		goto free_user;
 	}
diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index a039a5d..7ae89b4 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -47,7 +47,6 @@
 void mei_amthif_reset_params(struct mei_device *dev)
 {
 	/* reset iamthif parameters. */
-	dev->iamthif_current_cb = NULL;
 	dev->iamthif_canceled = false;
 	dev->iamthif_state = MEI_IAMTHIF_IDLE;
 	dev->iamthif_stall_timer = 0;
@@ -67,8 +66,12 @@
 	struct mei_cl *cl = &dev->iamthif_cl;
 	int ret;
 
-	if (mei_cl_is_connected(cl))
-		return 0;
+	mutex_lock(&dev->device_lock);
+
+	if (mei_cl_is_connected(cl)) {
+		ret = 0;
+		goto out;
+	}
 
 	dev->iamthif_state = MEI_IAMTHIF_IDLE;
 
@@ -77,179 +80,37 @@
 	ret = mei_cl_link(cl);
 	if (ret < 0) {
 		dev_err(dev->dev, "amthif: failed cl_link %d\n", ret);
-		return ret;
+		goto out;
 	}
 
 	ret = mei_cl_connect(cl, me_cl, NULL);
 
-	return ret;
-}
-
-/**
- * mei_amthif_read - read data from AMTHIF client
- *
- * @dev: the device structure
- * @file: pointer to file object
- * @ubuf: pointer to user data in user space
- * @length: data length to read
- * @offset: data read offset
- *
- * Locking: called under "dev->device_lock" lock
- *
- * Return:
- *  returned data length on success,
- *  zero if no data to read,
- *  negative on failure.
- */
-int mei_amthif_read(struct mei_device *dev, struct file *file,
-	       char __user *ubuf, size_t length, loff_t *offset)
-{
-	struct mei_cl *cl = file->private_data;
-	struct mei_cl_cb *cb;
-	int rets;
-	int wait_ret;
-
-	dev_dbg(dev->dev, "checking amthif data\n");
-	cb = mei_cl_read_cb(cl, file);
-
-	/* Check for if we can block or not*/
-	if (cb == NULL && file->f_flags & O_NONBLOCK)
-		return -EAGAIN;
-
-
-	dev_dbg(dev->dev, "waiting for amthif data\n");
-	while (cb == NULL) {
-		/* unlock the Mutex */
-		mutex_unlock(&dev->device_lock);
-
-		wait_ret = wait_event_interruptible(cl->rx_wait,
-					!list_empty(&cl->rd_completed) ||
-					!mei_cl_is_connected(cl));
-
-		/* Locking again the Mutex */
-		mutex_lock(&dev->device_lock);
-
-		if (wait_ret)
-			return -ERESTARTSYS;
-
-		if (!mei_cl_is_connected(cl)) {
-			rets = -EBUSY;
-			goto out;
-		}
-
-		cb = mei_cl_read_cb(cl, file);
-	}
-
-	if (cb->status) {
-		rets = cb->status;
-		dev_dbg(dev->dev, "read operation failed %d\n", rets);
-		goto free;
-	}
-
-	dev_dbg(dev->dev, "Got amthif data\n");
-	/* if the whole message will fit remove it from the list */
-	if (cb->buf_idx >= *offset && length >= (cb->buf_idx - *offset))
-		list_del_init(&cb->list);
-	else if (cb->buf_idx <= *offset) {
-		/* end of the message has been reached */
-		list_del_init(&cb->list);
-		rets = 0;
-		goto free;
-	}
-		/* else means that not full buffer will be read and do not
-		 * remove message from deletion list
-		 */
-
-	dev_dbg(dev->dev, "amthif cb->buf.size - %zu cb->buf_idx - %zu\n",
-		cb->buf.size, cb->buf_idx);
-
-	/* length is being truncated to PAGE_SIZE, however,
-	 * the buf_idx may point beyond */
-	length = min_t(size_t, length, (cb->buf_idx - *offset));
-
-	if (copy_to_user(ubuf, cb->buf.data + *offset, length)) {
-		dev_dbg(dev->dev, "failed to copy data to userland\n");
-		rets = -EFAULT;
-	} else {
-		rets = length;
-		if ((*offset + length) < cb->buf_idx) {
-			*offset += length;
-			goto out;
-		}
-	}
-free:
-	dev_dbg(dev->dev, "free amthif cb memory.\n");
-	*offset = 0;
-	mei_io_cb_free(cb);
 out:
-	return rets;
+	mutex_unlock(&dev->device_lock);
+	return ret;
 }
 
 /**
  * mei_amthif_read_start - queue message for sending read credential
  *
  * @cl: host client
- * @file: file pointer of message recipient
+ * @fp: file pointer of message recipient
  *
  * Return: 0 on success, <0 on failure.
  */
-static int mei_amthif_read_start(struct mei_cl *cl, const struct file *file)
+static int mei_amthif_read_start(struct mei_cl *cl, const struct file *fp)
 {
 	struct mei_device *dev = cl->dev;
 	struct mei_cl_cb *cb;
-	int rets;
 
-	cb = mei_io_cb_init(cl, MEI_FOP_READ, file);
-	if (!cb) {
-		rets = -ENOMEM;
-		goto err;
-	}
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, mei_cl_mtu(cl), MEI_FOP_READ, fp);
+	if (!cb)
+		return -ENOMEM;
 
-	rets = mei_io_cb_alloc_buf(cb, mei_cl_mtu(cl));
-	if (rets)
-		goto err;
-
-	list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+	cl->rx_flow_ctrl_creds++;
 
 	dev->iamthif_state = MEI_IAMTHIF_READING;
-	dev->iamthif_fp = cb->fp;
-	dev->iamthif_current_cb = cb;
-
-	return 0;
-err:
-	mei_io_cb_free(cb);
-	return rets;
-}
-
-/**
- * mei_amthif_send_cmd - send amthif command to the ME
- *
- * @cl: the host client
- * @cb: mei call back struct
- *
- * Return: 0 on success, <0 on failure.
- */
-static int mei_amthif_send_cmd(struct mei_cl *cl, struct mei_cl_cb *cb)
-{
-	struct mei_device *dev;
-	int ret;
-
-	if (!cl->dev || !cb)
-		return -ENODEV;
-
-	dev = cl->dev;
-
-	dev->iamthif_state = MEI_IAMTHIF_WRITING;
-	dev->iamthif_current_cb = cb;
-	dev->iamthif_fp = cb->fp;
-	dev->iamthif_canceled = false;
-
-	ret = mei_cl_write(cl, cb, false);
-	if (ret < 0)
-		return ret;
-
-	if (cb->completed)
-		cb->status = mei_amthif_read_start(cl, cb->fp);
+	cl->fp = cb->fp;
 
 	return 0;
 }
@@ -265,20 +126,32 @@
 {
 	struct mei_cl *cl = &dev->iamthif_cl;
 	struct mei_cl_cb *cb;
+	int ret;
 
 	dev->iamthif_canceled = false;
-	dev->iamthif_state = MEI_IAMTHIF_IDLE;
-	dev->iamthif_fp = NULL;
 
 	dev_dbg(dev->dev, "complete amthif cmd_list cb.\n");
 
 	cb = list_first_entry_or_null(&dev->amthif_cmd_list.list,
 					typeof(*cb), list);
-	if (!cb)
+	if (!cb) {
+		dev->iamthif_state = MEI_IAMTHIF_IDLE;
+		cl->fp = NULL;
 		return 0;
+	}
 
 	list_del_init(&cb->list);
-	return mei_amthif_send_cmd(cl, cb);
+	dev->iamthif_state = MEI_IAMTHIF_WRITING;
+	cl->fp = cb->fp;
+
+	ret = mei_cl_write(cl, cb, false);
+	if (ret < 0)
+		return ret;
+
+	if (cb->completed)
+		cb->status = mei_amthif_read_start(cl, cb->fp);
+
+	return 0;
 }
 
 /**
@@ -299,8 +172,7 @@
 	/*
 	 * The previous request is still in processing, queue this one.
 	 */
-	if (dev->iamthif_state > MEI_IAMTHIF_IDLE &&
-	    dev->iamthif_state < MEI_IAMTHIF_READ_COMPLETE)
+	if (dev->iamthif_state != MEI_IAMTHIF_IDLE)
 		return 0;
 
 	return mei_amthif_run_next_cmd(dev);
@@ -309,7 +181,6 @@
 /**
  * mei_amthif_poll - the amthif poll function
  *
- * @dev: the device structure
  * @file: pointer to file structure
  * @wait: pointer to poll_table structure
  *
@@ -317,26 +188,19 @@
  *
  * Locking: called under "dev->device_lock" lock
  */
-
-unsigned int mei_amthif_poll(struct mei_device *dev,
-		struct file *file, poll_table *wait)
+unsigned int mei_amthif_poll(struct file *file, poll_table *wait)
 {
+	struct mei_cl *cl = file->private_data;
+	struct mei_cl_cb *cb = mei_cl_read_cb(cl, file);
 	unsigned int mask = 0;
 
-	poll_wait(file, &dev->iamthif_cl.rx_wait, wait);
-
-	if (dev->iamthif_state == MEI_IAMTHIF_READ_COMPLETE &&
-	    dev->iamthif_fp == file) {
-
+	poll_wait(file, &cl->rx_wait, wait);
+	if (cb)
 		mask |= POLLIN | POLLRDNORM;
-		mei_amthif_run_next_cmd(dev);
-	}
 
 	return mask;
 }
 
-
-
 /**
  * mei_amthif_irq_write - write iamthif command in irq thread context.
  *
@@ -393,7 +257,6 @@
 		return 0;
 
 	dev_dbg(dev->dev, "completed amthif read.\n ");
-	dev->iamthif_current_cb = NULL;
 	dev->iamthif_stall_timer = 0;
 
 	return 0;
@@ -409,115 +272,63 @@
 {
 	struct mei_device *dev = cl->dev;
 
-	if (cb->fop_type == MEI_FOP_WRITE) {
+	dev_dbg(dev->dev, "completing amthif call back.\n");
+	switch (cb->fop_type) {
+	case MEI_FOP_WRITE:
 		if (!cb->status) {
 			dev->iamthif_stall_timer = MEI_IAMTHIF_STALL_TIMER;
+			mei_schedule_stall_timer(dev);
 			mei_io_cb_free(cb);
 			return;
 		}
-		/*
-		 * in case of error enqueue the write cb to complete read list
-		 * so it can be propagated to the reader
-		 */
-		list_add_tail(&cb->list, &cl->rd_completed);
-		wake_up_interruptible(&cl->rx_wait);
-		return;
-	}
+		dev->iamthif_state = MEI_IAMTHIF_IDLE;
+		cl->fp = NULL;
+		if (!dev->iamthif_canceled) {
+			/*
+			 * in case of error enqueue the write cb to complete
+			 * read list so it can be propagated to the reader
+			 */
+			list_add_tail(&cb->list, &cl->rd_completed);
+			wake_up_interruptible(&cl->rx_wait);
+		} else {
+			mei_io_cb_free(cb);
+		}
+		break;
+	case MEI_FOP_READ:
+		if (!dev->iamthif_canceled) {
+			list_add_tail(&cb->list, &cl->rd_completed);
+			dev_dbg(dev->dev, "amthif read completed\n");
+			wake_up_interruptible(&cl->rx_wait);
+		} else {
+			mei_io_cb_free(cb);
+		}
 
-	if (!dev->iamthif_canceled) {
-		dev->iamthif_state = MEI_IAMTHIF_READ_COMPLETE;
 		dev->iamthif_stall_timer = 0;
-		list_add_tail(&cb->list, &cl->rd_completed);
-		dev_dbg(dev->dev, "amthif read completed\n");
-	} else {
 		mei_amthif_run_next_cmd(dev);
+		break;
+	default:
+		WARN_ON(1);
 	}
-
-	dev_dbg(dev->dev, "completing amthif call back.\n");
-	wake_up_interruptible(&cl->rx_wait);
 }
 
 /**
  * mei_clear_list - removes all callbacks associated with file
  *		from mei_cb_list
  *
- * @dev: device structure.
  * @file: file structure
  * @mei_cb_list: callbacks list
  *
  * mei_clear_list is called to clear resources associated with file
  * when application calls close function or Ctrl-C was pressed
- *
- * Return: true if callback removed from the list, false otherwise
  */
-static bool mei_clear_list(struct mei_device *dev,
-		const struct file *file, struct list_head *mei_cb_list)
+static void mei_clear_list(const struct file *file,
+			   struct list_head *mei_cb_list)
 {
-	struct mei_cl *cl = &dev->iamthif_cl;
 	struct mei_cl_cb *cb, *next;
-	bool removed = false;
 
-	/* list all list member */
-	list_for_each_entry_safe(cb, next, mei_cb_list, list) {
-		/* check if list member associated with a file */
-		if (file == cb->fp) {
-			/* check if cb equal to current iamthif cb */
-			if (dev->iamthif_current_cb == cb) {
-				dev->iamthif_current_cb = NULL;
-				/* send flow control to iamthif client */
-				mei_hbm_cl_flow_control_req(dev, cl);
-			}
-			/* free all allocated buffers */
+	list_for_each_entry_safe(cb, next, mei_cb_list, list)
+		if (file == cb->fp)
 			mei_io_cb_free(cb);
-			removed = true;
-		}
-	}
-	return removed;
-}
-
-/**
- * mei_clear_lists - removes all callbacks associated with file
- *
- * @dev: device structure
- * @file: file structure
- *
- * mei_clear_lists is called to clear resources associated with file
- * when application calls close function or Ctrl-C was pressed
- *
- * Return: true if callback removed from the list, false otherwise
- */
-static bool mei_clear_lists(struct mei_device *dev, const struct file *file)
-{
-	bool removed = false;
-	struct mei_cl *cl = &dev->iamthif_cl;
-
-	/* remove callbacks associated with a file */
-	mei_clear_list(dev, file, &dev->amthif_cmd_list.list);
-	if (mei_clear_list(dev, file, &cl->rd_completed))
-		removed = true;
-
-	mei_clear_list(dev, file, &dev->ctrl_rd_list.list);
-
-	if (mei_clear_list(dev, file, &dev->ctrl_wr_list.list))
-		removed = true;
-
-	if (mei_clear_list(dev, file, &dev->write_waiting_list.list))
-		removed = true;
-
-	if (mei_clear_list(dev, file, &dev->write_list.list))
-		removed = true;
-
-	/* check if iamthif_current_cb not NULL */
-	if (dev->iamthif_current_cb && !removed) {
-		/* check file and iamthif current cb association */
-		if (dev->iamthif_current_cb->fp == file) {
-			/* remove cb */
-			mei_io_cb_free(dev->iamthif_current_cb);
-			dev->iamthif_current_cb = NULL;
-			removed = true;
-		}
-	}
-	return removed;
 }
 
 /**
@@ -530,23 +341,21 @@
 */
 int mei_amthif_release(struct mei_device *dev, struct file *file)
 {
+	struct mei_cl *cl = file->private_data;
+
 	if (dev->iamthif_open_count > 0)
 		dev->iamthif_open_count--;
 
-	if (dev->iamthif_fp == file &&
-	    dev->iamthif_state != MEI_IAMTHIF_IDLE) {
+	if (cl->fp == file && dev->iamthif_state != MEI_IAMTHIF_IDLE) {
 
 		dev_dbg(dev->dev, "amthif canceled iamthif state %d\n",
 		    dev->iamthif_state);
 		dev->iamthif_canceled = true;
-		if (dev->iamthif_state == MEI_IAMTHIF_READ_COMPLETE) {
-			dev_dbg(dev->dev, "run next amthif iamthif cb\n");
-			mei_amthif_run_next_cmd(dev);
-		}
 	}
 
-	if (mei_clear_lists(dev, file))
-		dev->iamthif_state = MEI_IAMTHIF_IDLE;
+	mei_clear_list(file, &dev->amthif_cmd_list.list);
+	mei_clear_list(file, &cl->rd_completed);
+	mei_clear_list(file, &dev->ctrl_rd_list.list);
 
 	return 0;
 }
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 1f33fea..8cac7ef 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -126,7 +126,8 @@
 		goto out;
 
 	/* wait on event only if there is no other waiter */
-	if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) {
+	/* synchronized under device mutex */
+	if (!waitqueue_active(&cl->rx_wait)) {
 
 		mutex_unlock(&bus->device_lock);
 
@@ -142,7 +143,7 @@
 		mutex_lock(&bus->device_lock);
 
 		if (!mei_cl_is_connected(cl)) {
-			rets = -EBUSY;
+			rets = -ENODEV;
 			goto out;
 		}
 	}
@@ -234,7 +235,7 @@
 	/* Prepare for the next read */
 	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
 		mutex_lock(&bus->device_lock);
-		mei_cl_read_start(cldev->cl, 0, NULL);
+		mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
 		mutex_unlock(&bus->device_lock);
 	}
 }
@@ -324,7 +325,7 @@
 
 	if (cldev->events_mask & BIT(MEI_CL_EVENT_RX)) {
 		mutex_lock(&bus->device_lock);
-		ret = mei_cl_read_start(cldev->cl, 0, NULL);
+		ret = mei_cl_read_start(cldev->cl, mei_cl_mtu(cldev->cl), NULL);
 		mutex_unlock(&bus->device_lock);
 		if (ret && ret != -EBUSY)
 			return ret;
@@ -983,12 +984,10 @@
 		container_of(work, struct mei_device, bus_rescan_work);
 	struct mei_me_client *me_cl;
 
-	mutex_lock(&bus->device_lock);
 	me_cl = mei_me_cl_by_uuid(bus, &mei_amthif_guid);
 	if (me_cl)
 		mei_amthif_host_init(bus, me_cl);
 	mei_me_cl_put(me_cl);
-	mutex_unlock(&bus->device_lock);
 
 	mei_cl_bus_rescan(bus);
 }
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 641c1a5..6fe0235 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -358,8 +358,9 @@
  *
  * Return: mei_cl_cb pointer or NULL;
  */
-struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, enum mei_cb_file_ops type,
-				 const struct file *fp)
+static struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl,
+					enum mei_cb_file_ops type,
+					const struct file *fp)
 {
 	struct mei_cl_cb *cb;
 
@@ -420,31 +421,6 @@
 }
 
 /**
- * mei_io_cb_alloc_buf - allocate callback buffer
- *
- * @cb: io callback structure
- * @length: size of the buffer
- *
- * Return: 0 on success
- *         -EINVAL if cb is NULL
- *         -ENOMEM if allocation failed
- */
-int mei_io_cb_alloc_buf(struct mei_cl_cb *cb, size_t length)
-{
-	if (!cb)
-		return -EINVAL;
-
-	if (length == 0)
-		return 0;
-
-	cb->buf.data = kmalloc(length, GFP_KERNEL);
-	if (!cb->buf.data)
-		return -ENOMEM;
-	cb->buf.size = length;
-	return 0;
-}
-
-/**
  * mei_cl_alloc_cb - a convenient wrapper for allocating read cb
  *
  * @cl: host client
@@ -455,24 +431,59 @@
  * Return: cb on success and NULL on failure
  */
 struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
-				  enum mei_cb_file_ops type,
+				  enum mei_cb_file_ops fop_type,
 				  const struct file *fp)
 {
 	struct mei_cl_cb *cb;
 
-	cb = mei_io_cb_init(cl, type, fp);
+	cb = mei_io_cb_init(cl, fop_type, fp);
 	if (!cb)
 		return NULL;
 
-	if (mei_io_cb_alloc_buf(cb, length)) {
+	if (length == 0)
+		return cb;
+
+	cb->buf.data = kmalloc(length, GFP_KERNEL);
+	if (!cb->buf.data) {
 		mei_io_cb_free(cb);
 		return NULL;
 	}
+	cb->buf.size = length;
 
 	return cb;
 }
 
 /**
+ * mei_cl_enqueue_ctrl_wr_cb - a convenient wrapper for allocating
+ *     and enqueuing of the control commands cb
+ *
+ * @cl: host client
+ * @length: size of the buffer
+ * @type: operation type
+ * @fp: associated file pointer (might be NULL)
+ *
+ * Return: cb on success and NULL on failure
+ * Locking: called under "dev->device_lock" lock
+ */
+struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length,
+					    enum mei_cb_file_ops fop_type,
+					    const struct file *fp)
+{
+	struct mei_cl_cb *cb;
+
+	/* for RX always allocate at least client's mtu */
+	if (length)
+		length = max_t(size_t, length, mei_cl_mtu(cl));
+
+	cb = mei_cl_alloc_cb(cl, length, fop_type, fp);
+	if (!cb)
+		return NULL;
+
+	list_add_tail(&cb->list, &cl->dev->ctrl_wr_list.list);
+	return cb;
+}
+
+/**
  * mei_cl_read_cb - find this cl's callback in the read list
  *     for a specific file
  *
@@ -754,7 +765,8 @@
 	mei_io_list_flush(&dev->ctrl_rd_list, cl);
 	mei_io_list_flush(&dev->ctrl_wr_list, cl);
 	mei_cl_wake_all(cl);
-	cl->mei_flow_ctrl_creds = 0;
+	cl->rx_flow_ctrl_creds = 0;
+	cl->tx_flow_ctrl_creds = 0;
 	cl->timer_count = 0;
 
 	if (!cl->me_cl)
@@ -764,7 +776,7 @@
 		cl->me_cl->connect_count--;
 
 	if (cl->me_cl->connect_count == 0)
-		cl->me_cl->mei_flow_ctrl_creds = 0;
+		cl->me_cl->tx_flow_ctrl_creds = 0;
 
 	mei_me_cl_put(cl->me_cl);
 	cl->me_cl = NULL;
@@ -814,6 +826,7 @@
 
 	list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
 	cl->timer_count = MEI_CONNECT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
 
 	return 0;
 }
@@ -867,13 +880,11 @@
 
 	cl->state = MEI_FILE_DISCONNECTING;
 
-	cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT, NULL);
-	rets = cb ? 0 : -ENOMEM;
-	if (rets)
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT, NULL);
+	if (!cb) {
+		rets = -ENOMEM;
 		goto out;
-
-	cl_dbg(dev, cl, "add disconnect cb to control write list\n");
-	list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+	}
 
 	if (mei_hbuf_acquire(dev)) {
 		rets = mei_cl_send_disconnect(cl, cb);
@@ -1001,6 +1012,7 @@
 
 	list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
 	cl->timer_count = MEI_CONNECT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
 	return 0;
 }
 
@@ -1042,14 +1054,14 @@
  *
  * @cl: host client
  * @me_cl: me client
- * @file: pointer to file structure
+ * @fp: pointer to file structure
  *
  * Locking: called under "dev->device_lock" lock
  *
  * Return: 0 on success, <0 on failure.
  */
 int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
-		  const struct file *file)
+		   const struct file *fp)
 {
 	struct mei_device *dev;
 	struct mei_cl_cb *cb;
@@ -1076,12 +1088,11 @@
 		goto nortpm;
 	}
 
-	cb = mei_io_cb_init(cl, MEI_FOP_CONNECT, file);
-	rets = cb ? 0 : -ENOMEM;
-	if (rets)
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_CONNECT, fp);
+	if (!cb) {
+		rets = -ENOMEM;
 		goto out;
-
-	list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+	}
 
 	/* run hbuf acquire last so we don't have to undo */
 	if (!mei_cl_is_other_connecting(cl) && mei_hbuf_acquire(dev)) {
@@ -1159,50 +1170,42 @@
 	return ERR_PTR(ret);
 }
 
-
-
 /**
- * mei_cl_flow_ctrl_creds - checks flow_control credits for cl.
+ * mei_cl_tx_flow_ctrl_creds - checks flow_control credits for cl.
  *
  * @cl: host client
- * @fp: the file pointer associated with the pointer
  *
- * Return: 1 if mei_flow_ctrl_creds >0, 0 - otherwise.
+ * Return: 1 if tx_flow_ctrl_creds >0, 0 - otherwise.
  */
-static int mei_cl_flow_ctrl_creds(struct mei_cl *cl, const struct file *fp)
+static int mei_cl_tx_flow_ctrl_creds(struct mei_cl *cl)
 {
-	int rets;
-
 	if (WARN_ON(!cl || !cl->me_cl))
 		return -EINVAL;
 
-	if (cl->mei_flow_ctrl_creds > 0)
+	if (cl->tx_flow_ctrl_creds > 0)
 		return 1;
 
-	if (mei_cl_is_fixed_address(cl)) {
-		rets = mei_cl_read_start(cl, mei_cl_mtu(cl), fp);
-		if (rets && rets != -EBUSY)
-			return rets;
+	if (mei_cl_is_fixed_address(cl))
 		return 1;
-	}
 
 	if (mei_cl_is_single_recv_buf(cl)) {
-		if (cl->me_cl->mei_flow_ctrl_creds > 0)
+		if (cl->me_cl->tx_flow_ctrl_creds > 0)
 			return 1;
 	}
 	return 0;
 }
 
 /**
- * mei_cl_flow_ctrl_reduce - reduces flow_control.
+ * mei_cl_tx_flow_ctrl_creds_reduce - reduces transmit flow control credits
+ *   for a client
  *
- * @cl: private data of the file object
+ * @cl: host client
  *
  * Return:
  *	0 on success
  *	-EINVAL when ctrl credits are <= 0
  */
-static int mei_cl_flow_ctrl_reduce(struct mei_cl *cl)
+static int mei_cl_tx_flow_ctrl_creds_reduce(struct mei_cl *cl)
 {
 	if (WARN_ON(!cl || !cl->me_cl))
 		return -EINVAL;
@@ -1211,13 +1214,13 @@
 		return 0;
 
 	if (mei_cl_is_single_recv_buf(cl)) {
-		if (WARN_ON(cl->me_cl->mei_flow_ctrl_creds <= 0))
+		if (WARN_ON(cl->me_cl->tx_flow_ctrl_creds <= 0))
 			return -EINVAL;
-		cl->me_cl->mei_flow_ctrl_creds--;
+		cl->me_cl->tx_flow_ctrl_creds--;
 	} else {
-		if (WARN_ON(cl->mei_flow_ctrl_creds <= 0))
+		if (WARN_ON(cl->tx_flow_ctrl_creds <= 0))
 			return -EINVAL;
-		cl->mei_flow_ctrl_creds--;
+		cl->tx_flow_ctrl_creds--;
 	}
 	return 0;
 }
@@ -1292,7 +1295,7 @@
  * mei_cl_notify_request - send notification stop/start request
  *
  * @cl: host client
- * @file: associate request with file
+ * @fp: associate request with file
  * @request: 1 for start or 0 for stop
  *
  * Locking: called under "dev->device_lock" lock
@@ -1300,7 +1303,7 @@
  * Return: 0 on such and error otherwise.
  */
 int mei_cl_notify_request(struct mei_cl *cl,
-			  const struct file *file, u8 request)
+			  const struct file *fp, u8 request)
 {
 	struct mei_device *dev;
 	struct mei_cl_cb *cb;
@@ -1325,7 +1328,7 @@
 	}
 
 	fop_type = mei_cl_notify_req2fop(request);
-	cb = mei_io_cb_init(cl, fop_type, file);
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, fop_type, fp);
 	if (!cb) {
 		rets = -ENOMEM;
 		goto out;
@@ -1336,9 +1339,7 @@
 			rets = -ENODEV;
 			goto out;
 		}
-		list_add_tail(&cb->list, &dev->ctrl_rd_list.list);
-	} else {
-		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+		list_move_tail(&cb->list, &dev->ctrl_rd_list.list);
 	}
 
 	mutex_unlock(&dev->device_lock);
@@ -1436,25 +1437,6 @@
 }
 
 /**
- * mei_cl_is_read_fc_cb - check if read cb is waiting for flow control
- *                        for given host client
- *
- * @cl: host client
- *
- * Return: true, if found at least one cb.
- */
-static bool mei_cl_is_read_fc_cb(struct mei_cl *cl)
-{
-	struct mei_device *dev = cl->dev;
-	struct mei_cl_cb *cb;
-
-	list_for_each_entry(cb, &dev->ctrl_wr_list.list, list)
-		if (cb->fop_type == MEI_FOP_READ && cb->cl == cl)
-			return true;
-	return false;
-}
-
-/**
  * mei_cl_read_start - the start read client message function.
  *
  * @cl: host client
@@ -1477,26 +1459,22 @@
 	if (!mei_cl_is_connected(cl))
 		return -ENODEV;
 
-	/* HW currently supports only one pending read */
-	if (!list_empty(&cl->rd_pending) || mei_cl_is_read_fc_cb(cl))
-		return -EBUSY;
-
 	if (!mei_me_cl_is_active(cl->me_cl)) {
 		cl_err(dev, cl, "no such me client\n");
 		return  -ENOTTY;
 	}
 
-	/* always allocate at least client max message */
-	length = max_t(size_t, length, mei_cl_mtu(cl));
-	cb = mei_cl_alloc_cb(cl, length, MEI_FOP_READ, fp);
+	if (mei_cl_is_fixed_address(cl) || cl == &dev->iamthif_cl)
+		return 0;
+
+	/* HW currently supports only one pending read */
+	if (cl->rx_flow_ctrl_creds)
+		return -EBUSY;
+
+	cb = mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, fp);
 	if (!cb)
 		return -ENOMEM;
 
-	if (mei_cl_is_fixed_address(cl)) {
-		list_add_tail(&cb->list, &cl->rd_pending);
-		return 0;
-	}
-
 	rets = pm_runtime_get(dev->dev);
 	if (rets < 0 && rets != -EINPROGRESS) {
 		pm_runtime_put_noidle(dev->dev);
@@ -1504,16 +1482,15 @@
 		goto nortpm;
 	}
 
+	rets = 0;
 	if (mei_hbuf_acquire(dev)) {
 		rets = mei_hbm_cl_flow_control_req(dev, cl);
 		if (rets < 0)
 			goto out;
 
-		list_add_tail(&cb->list, &cl->rd_pending);
-	} else {
-		rets = 0;
-		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
+		list_move_tail(&cb->list, &cl->rd_pending);
 	}
+	cl->rx_flow_ctrl_creds++;
 
 out:
 	cl_dbg(dev, cl, "rpm: autosuspend\n");
@@ -1557,7 +1534,7 @@
 
 	first_chunk = cb->buf_idx == 0;
 
-	rets = first_chunk ? mei_cl_flow_ctrl_creds(cl, cb->fp) : 1;
+	rets = first_chunk ? mei_cl_tx_flow_ctrl_creds(cl) : 1;
 	if (rets < 0)
 		return rets;
 
@@ -1605,7 +1582,7 @@
 	cb->completed = mei_hdr.msg_complete == 1;
 
 	if (first_chunk) {
-		if (mei_cl_flow_ctrl_reduce(cl))
+		if (mei_cl_tx_flow_ctrl_creds_reduce(cl))
 			return -EIO;
 	}
 
@@ -1663,7 +1640,7 @@
 	mei_hdr.msg_complete = 0;
 	mei_hdr.internal = cb->internal;
 
-	rets = mei_cl_flow_ctrl_creds(cl, cb->fp);
+	rets = mei_cl_tx_flow_ctrl_creds(cl);
 	if (rets < 0)
 		goto err;
 
@@ -1691,7 +1668,7 @@
 	if (rets)
 		goto err;
 
-	rets = mei_cl_flow_ctrl_reduce(cl);
+	rets = mei_cl_tx_flow_ctrl_creds_reduce(cl);
 	if (rets)
 		goto err;
 
@@ -1761,6 +1738,9 @@
 
 	case MEI_FOP_READ:
 		list_add_tail(&cb->list, &cl->rd_completed);
+		if (!mei_cl_is_fixed_address(cl) &&
+		    !WARN_ON(!cl->rx_flow_ctrl_creds))
+			cl->rx_flow_ctrl_creds--;
 		if (!mei_cl_bus_rx_event(cl))
 			wake_up_interruptible(&cl->rx_wait);
 		break;
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index 0d7a3a1..d2bfabe 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -82,11 +82,7 @@
 /*
  * MEI IO Functions
  */
-struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl, enum mei_cb_file_ops type,
-				 const struct file *fp);
 void mei_io_cb_free(struct mei_cl_cb *priv_cb);
-int mei_io_cb_alloc_buf(struct mei_cl_cb *cb, size_t length);
-
 
 /**
  * mei_io_list_init - Sets up a queue list.
@@ -118,6 +114,9 @@
 struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
 				  enum mei_cb_file_ops type,
 				  const struct file *fp);
+struct mei_cl_cb *mei_cl_enqueue_ctrl_wr_cb(struct mei_cl *cl, size_t length,
+					    enum mei_cb_file_ops type,
+					    const struct file *fp);
 int mei_cl_flush_queues(struct mei_cl *cl, const struct file *fp);
 
 /*
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 085f3aa..dd7f15a 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -161,6 +161,7 @@
  * @dev: the device structure
  * @cl: client
  * @hbm_cmd: host bus message command
+ * @buf: message buffer
  * @len: buffer length
  *
  * Return: 0 on success, <0 on failure.
@@ -276,6 +277,7 @@
 
 	dev->hbm_state = MEI_HBM_STARTING;
 	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
 	return 0;
 }
 
@@ -311,6 +313,7 @@
 	}
 	dev->hbm_state = MEI_HBM_ENUM_CLIENTS;
 	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
 	return 0;
 }
 
@@ -339,7 +342,7 @@
 
 	me_cl->props = res->client_properties;
 	me_cl->client_id = res->me_addr;
-	me_cl->mei_flow_ctrl_creds = 0;
+	me_cl->tx_flow_ctrl_creds = 0;
 
 	mei_me_cl_add(dev, me_cl);
 
@@ -561,6 +564,7 @@
 	}
 
 	dev->init_clients_timer = MEI_CLIENTS_INIT_TIMEOUT;
+	mei_schedule_stall_timer(dev);
 
 	return 0;
 }
@@ -636,23 +640,22 @@
 }
 
 /**
- * mei_hbm_add_single_flow_creds - adds single buffer credentials.
+ * mei_hbm_add_single_tx_flow_ctrl_creds - adds single buffer credentials.
  *
  * @dev: the device structure
- * @flow: flow control.
+ * @fctrl: flow control response bus message
  *
  * Return: 0 on success, < 0 otherwise
  */
-static int mei_hbm_add_single_flow_creds(struct mei_device *dev,
-				  struct hbm_flow_control *flow)
+static int mei_hbm_add_single_tx_flow_ctrl_creds(struct mei_device *dev,
+						 struct hbm_flow_control *fctrl)
 {
 	struct mei_me_client *me_cl;
 	int rets;
 
-	me_cl = mei_me_cl_by_id(dev, flow->me_addr);
+	me_cl = mei_me_cl_by_id(dev, fctrl->me_addr);
 	if (!me_cl) {
-		dev_err(dev->dev, "no such me client %d\n",
-			flow->me_addr);
+		dev_err(dev->dev, "no such me client %d\n", fctrl->me_addr);
 		return -ENOENT;
 	}
 
@@ -661,9 +664,9 @@
 		goto out;
 	}
 
-	me_cl->mei_flow_ctrl_creds++;
+	me_cl->tx_flow_ctrl_creds++;
 	dev_dbg(dev->dev, "recv flow ctrl msg ME %d (single) creds = %d.\n",
-	    flow->me_addr, me_cl->mei_flow_ctrl_creds);
+		fctrl->me_addr, me_cl->tx_flow_ctrl_creds);
 
 	rets = 0;
 out:
@@ -675,24 +678,24 @@
  * mei_hbm_cl_flow_control_res - flow control response from me
  *
  * @dev: the device structure
- * @flow_control: flow control response bus message
+ * @fctrl: flow control response bus message
  */
-static void mei_hbm_cl_flow_control_res(struct mei_device *dev,
-					struct hbm_flow_control *flow_control)
+static void mei_hbm_cl_tx_flow_ctrl_creds_res(struct mei_device *dev,
+					       struct hbm_flow_control *fctrl)
 {
 	struct mei_cl *cl;
 
-	if (!flow_control->host_addr) {
+	if (!fctrl->host_addr) {
 		/* single receive buffer */
-		mei_hbm_add_single_flow_creds(dev, flow_control);
+		mei_hbm_add_single_tx_flow_ctrl_creds(dev, fctrl);
 		return;
 	}
 
-	cl = mei_hbm_cl_find_by_cmd(dev, flow_control);
+	cl = mei_hbm_cl_find_by_cmd(dev, fctrl);
 	if (cl) {
-		cl->mei_flow_ctrl_creds++;
+		cl->tx_flow_ctrl_creds++;
 		cl_dbg(dev, cl, "flow control creds = %d.\n",
-				cl->mei_flow_ctrl_creds);
+				cl->tx_flow_ctrl_creds);
 	}
 }
 
@@ -871,10 +874,10 @@
 		cl->state = MEI_FILE_DISCONNECTING;
 		cl->timer_count = 0;
 
-		cb = mei_io_cb_init(cl, MEI_FOP_DISCONNECT_RSP, NULL);
+		cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DISCONNECT_RSP,
+					       NULL);
 		if (!cb)
 			return -ENOMEM;
-		list_add_tail(&cb->list, &dev->ctrl_wr_list.list);
 	}
 	return 0;
 }
@@ -1022,7 +1025,7 @@
 
 	struct mei_hbm_cl_cmd *cl_cmd;
 	struct hbm_client_connect_request *disconnect_req;
-	struct hbm_flow_control *flow_control;
+	struct hbm_flow_control *fctrl;
 
 	/* read the message to our buffer */
 	BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf));
@@ -1102,8 +1105,8 @@
 	case MEI_FLOW_CONTROL_CMD:
 		dev_dbg(dev->dev, "hbm: client flow control response: message received.\n");
 
-		flow_control = (struct hbm_flow_control *) mei_msg;
-		mei_hbm_cl_flow_control_res(dev, flow_control);
+		fctrl = (struct hbm_flow_control *)mei_msg;
+		mei_hbm_cl_tx_flow_ctrl_creds_res(dev, fctrl);
 		break;
 
 	case MEI_PG_ISOLATION_ENTRY_RES_CMD:
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index 0dcb854..7ad15d6 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -125,6 +125,9 @@
 #define MEI_DEV_ID_BXT_M      0x1A9A  /* Broxton M */
 #define MEI_DEV_ID_APL_I      0x5A9A  /* Apollo Lake I */
 
+#define MEI_DEV_ID_KBP        0xA2BA  /* Kaby Point */
+#define MEI_DEV_ID_KBP_2      0xA2BB  /* Kaby Point 2 */
+
 /*
  * MEI HW Section
  */
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index e2fb44c..56c2101 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -18,6 +18,7 @@
 
 #include <linux/kthread.h>
 #include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
 
 #include "mei_dev.h"
 #include "hbm.h"
@@ -1063,6 +1064,8 @@
 		}
 	}
 
+	pm_runtime_set_active(dev->dev);
+
 	hcsr = mei_hcsr_read(dev);
 	/* H_RST may be found lit before reset is started,
 	 * for example if preceding reset flow hasn't completed.
@@ -1263,8 +1266,14 @@
 static bool mei_me_fw_type_sps(struct pci_dev *pdev)
 {
 	u32 reg;
-	/* Read ME FW Status check for SPS Firmware */
-	pci_read_config_dword(pdev, PCI_CFG_HFS_1, &reg);
+	unsigned int devfn;
+
+	/*
+	 * Read ME FW Status register to check for SPS Firmware
+	 * The SPS FW is only signaled in pci function 0
+	 */
+	devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	pci_bus_read_config_dword(pdev->bus, devfn, PCI_CFG_HFS_1, &reg);
 	trace_mei_pci_cfg_read(&pdev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg);
 	/* if bits [19:16] = 15, running SPS Firmware */
 	return (reg & 0xf0000) == 0xf0000;
diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c
index 4a6c1b8..e6e5e55 100644
--- a/drivers/misc/mei/hw-txe.c
+++ b/drivers/misc/mei/hw-txe.c
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include <linux/irqreturn.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/mei.h>
 
@@ -935,6 +936,8 @@
 		return ret;
 	}
 
+	pm_runtime_set_active(dev->dev);
+
 	/* enable input ready interrupts:
 	 * SEC_IPC_HOST_INT_MASK.IPC_INPUT_READY_INT_MASK
 	 */
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index f7c8dfd..9a9c248 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -94,7 +94,7 @@
 	cancel_work_sync(&dev->reset_work);
 	cancel_work_sync(&dev->bus_rescan_work);
 
-	cancel_delayed_work(&dev->timer_work);
+	cancel_delayed_work_sync(&dev->timer_work);
 }
 EXPORT_SYMBOL_GPL(mei_cancel_work);
 
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index 3831a7b..5a4893c 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -102,26 +102,25 @@
 {
 	struct mei_device *dev = cl->dev;
 	struct mei_cl_cb *cb;
-	unsigned char *buffer = NULL;
 	size_t buf_sz;
 
 	cb = list_first_entry_or_null(&cl->rd_pending, struct mei_cl_cb, list);
 	if (!cb) {
-		cl_err(dev, cl, "pending read cb not found\n");
-		goto out;
+		if (!mei_cl_is_fixed_address(cl)) {
+			cl_err(dev, cl, "pending read cb not found\n");
+			goto discard;
+		}
+		cb = mei_cl_alloc_cb(cl, mei_cl_mtu(cl), MEI_FOP_READ, cl->fp);
+		if (!cb)
+			goto discard;
+		list_add_tail(&cb->list, &cl->rd_pending);
 	}
 
 	if (!mei_cl_is_connected(cl)) {
 		cl_dbg(dev, cl, "not connected\n");
-		cb->status = -ENODEV;
-		goto out;
-	}
-
-	if (cb->buf.size == 0 || cb->buf.data == NULL) {
-		cl_err(dev, cl, "response buffer is not allocated.\n");
 		list_move_tail(&cb->list, &complete_list->list);
-		cb->status = -ENOMEM;
-		goto out;
+		cb->status = -ENODEV;
+		goto discard;
 	}
 
 	buf_sz = mei_hdr->length + cb->buf_idx;
@@ -132,25 +131,19 @@
 
 		list_move_tail(&cb->list, &complete_list->list);
 		cb->status = -EMSGSIZE;
-		goto out;
+		goto discard;
 	}
 
 	if (cb->buf.size < buf_sz) {
 		cl_dbg(dev, cl, "message overflow. size %zu len %d idx %zu\n",
 			cb->buf.size, mei_hdr->length, cb->buf_idx);
-		buffer = krealloc(cb->buf.data, buf_sz, GFP_KERNEL);
 
-		if (!buffer) {
-			cb->status = -ENOMEM;
-			list_move_tail(&cb->list, &complete_list->list);
-			goto out;
-		}
-		cb->buf.data = buffer;
-		cb->buf.size = buf_sz;
+		list_move_tail(&cb->list, &complete_list->list);
+		cb->status = -EMSGSIZE;
+		goto discard;
 	}
 
-	buffer = cb->buf.data + cb->buf_idx;
-	mei_read_slots(dev, buffer, mei_hdr->length);
+	mei_read_slots(dev, cb->buf.data + cb->buf_idx, mei_hdr->length);
 
 	cb->buf_idx += mei_hdr->length;
 
@@ -162,10 +155,10 @@
 		pm_request_autosuspend(dev->dev);
 	}
 
-out:
-	if (!buffer)
-		mei_irq_discard_msg(dev, mei_hdr);
+	return 0;
 
+discard:
+	mei_irq_discard_msg(dev, mei_hdr);
 	return 0;
 }
 
@@ -216,6 +209,9 @@
 	int slots;
 	int ret;
 
+	if (!list_empty(&cl->rd_pending))
+		return 0;
+
 	msg_slots = mei_data2slots(sizeof(struct hbm_flow_control));
 	slots = mei_hbuf_empty_slots(dev);
 
@@ -463,6 +459,19 @@
 	mei_reset(dev);
 }
 
+#define MEI_STALL_TIMER_FREQ (2 * HZ)
+/**
+ * mei_schedule_stall_timer - re-arm stall_timer work
+ *
+ * Schedule stall timer
+ *
+ * @dev: the device structure
+ */
+void mei_schedule_stall_timer(struct mei_device *dev)
+{
+	schedule_delayed_work(&dev->timer_work, MEI_STALL_TIMER_FREQ);
+}
+
 /**
  * mei_timer - timer function.
  *
@@ -472,10 +481,9 @@
 void mei_timer(struct work_struct *work)
 {
 	struct mei_cl *cl;
-
 	struct mei_device *dev = container_of(work,
 					struct mei_device, timer_work.work);
-
+	bool reschedule_timer = false;
 
 	mutex_lock(&dev->device_lock);
 
@@ -490,6 +498,7 @@
 				mei_reset(dev);
 				goto out;
 			}
+			reschedule_timer = true;
 		}
 	}
 
@@ -504,6 +513,7 @@
 				mei_connect_timeout(cl);
 				goto out;
 			}
+			reschedule_timer = true;
 		}
 	}
 
@@ -514,19 +524,16 @@
 		if (--dev->iamthif_stall_timer == 0) {
 			dev_err(dev->dev, "timer: amthif  hanged.\n");
 			mei_reset(dev);
-			dev->iamthif_canceled = false;
-			dev->iamthif_state = MEI_IAMTHIF_IDLE;
 
-			mei_io_cb_free(dev->iamthif_current_cb);
-			dev->iamthif_current_cb = NULL;
-
-			dev->iamthif_fp = NULL;
 			mei_amthif_run_next_cmd(dev);
+			goto out;
 		}
+		reschedule_timer = true;
 	}
 
 out:
-	if (dev->dev_state != MEI_DEV_DISABLED)
-		schedule_delayed_work(&dev->timer_work, 2 * HZ);
+	if (dev->dev_state != MEI_DEV_DISABLED && reschedule_timer)
+		mei_schedule_stall_timer(dev);
+
 	mutex_unlock(&dev->device_lock);
 }
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 52635b0..fa50635 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -71,6 +71,7 @@
 		goto err_unlock;
 	}
 
+	cl->fp = file;
 	file->private_data = cl;
 
 	mutex_unlock(&dev->device_lock);
@@ -138,9 +139,8 @@
 	struct mei_cl *cl = file->private_data;
 	struct mei_device *dev;
 	struct mei_cl_cb *cb = NULL;
+	bool nonblock = !!(file->f_flags & O_NONBLOCK);
 	int rets;
-	int err;
-
 
 	if (WARN_ON(!cl || !cl->dev))
 		return -ENODEV;
@@ -164,11 +164,6 @@
 		goto out;
 	}
 
-	if (cl == &dev->iamthif_cl) {
-		rets = mei_amthif_read(dev, file, ubuf, length, offset);
-		goto out;
-	}
-
 	cb = mei_cl_read_cb(cl, file);
 	if (cb)
 		goto copy_buffer;
@@ -176,24 +171,29 @@
 	if (*offset > 0)
 		*offset = 0;
 
-	err = mei_cl_read_start(cl, length, file);
-	if (err && err != -EBUSY) {
-		cl_dbg(dev, cl, "mei start read failure status = %d\n", err);
-		rets = err;
+	rets = mei_cl_read_start(cl, length, file);
+	if (rets && rets != -EBUSY) {
+		cl_dbg(dev, cl, "mei start read failure status = %d\n", rets);
 		goto out;
 	}
 
-	if (list_empty(&cl->rd_completed) && !waitqueue_active(&cl->rx_wait)) {
-		if (file->f_flags & O_NONBLOCK) {
-			rets = -EAGAIN;
-			goto out;
-		}
+	if (nonblock) {
+		rets = -EAGAIN;
+		goto out;
+	}
 
+	if (rets == -EBUSY &&
+	    !mei_cl_enqueue_ctrl_wr_cb(cl, length, MEI_FOP_READ, file)) {
+		rets = -ENOMEM;
+		goto out;
+	}
+
+	do {
 		mutex_unlock(&dev->device_lock);
 
 		if (wait_event_interruptible(cl->rx_wait,
-				(!list_empty(&cl->rd_completed)) ||
-				(!mei_cl_is_connected(cl)))) {
+					     (!list_empty(&cl->rd_completed)) ||
+					     (!mei_cl_is_connected(cl)))) {
 
 			if (signal_pending(current))
 				return -EINTR;
@@ -202,16 +202,12 @@
 
 		mutex_lock(&dev->device_lock);
 		if (!mei_cl_is_connected(cl)) {
-			rets = -EBUSY;
+			rets = -ENODEV;
 			goto out;
 		}
-	}
 
-	cb = mei_cl_read_cb(cl, file);
-	if (!cb) {
-		rets = 0;
-		goto out;
-	}
+		cb = mei_cl_read_cb(cl, file);
+	} while (!cb);
 
 copy_buffer:
 	/* now copy the data to user space */
@@ -609,24 +605,24 @@
 		goto out;
 	}
 
-	if (cl == &dev->iamthif_cl) {
-		mask = mei_amthif_poll(dev, file, wait);
-		goto out;
-	}
-
 	if (notify_en) {
 		poll_wait(file, &cl->ev_wait, wait);
 		if (cl->notify_ev)
 			mask |= POLLPRI;
 	}
 
+	if (cl == &dev->iamthif_cl) {
+		mask |= mei_amthif_poll(file, wait);
+		goto out;
+	}
+
 	if (req_events & (POLLIN | POLLRDNORM)) {
 		poll_wait(file, &cl->rx_wait, wait);
 
 		if (!list_empty(&cl->rd_completed))
 			mask |= POLLIN | POLLRDNORM;
 		else
-			mei_cl_read_start(cl, 0, file);
+			mei_cl_read_start(cl, mei_cl_mtu(cl), file);
 	}
 
 out:
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index e5e3250..1169fd9 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -80,18 +80,13 @@
 enum iamthif_states {
 	MEI_IAMTHIF_IDLE,
 	MEI_IAMTHIF_WRITING,
-	MEI_IAMTHIF_FLOW_CONTROL,
 	MEI_IAMTHIF_READING,
-	MEI_IAMTHIF_READ_COMPLETE
 };
 
 enum mei_file_transaction_states {
 	MEI_IDLE,
 	MEI_WRITING,
 	MEI_WRITE_COMPLETE,
-	MEI_FLOW_CONTROL,
-	MEI_READING,
-	MEI_READ_COMPLETE
 };
 
 /**
@@ -146,7 +141,7 @@
  * @refcnt: struct reference count
  * @props: client properties
  * @client_id: me client id
- * @mei_flow_ctrl_creds: flow control credits
+ * @tx_flow_ctrl_creds: flow control credits
  * @connect_count: number connections to this client
  * @bus_added: added to bus
  */
@@ -155,7 +150,7 @@
 	struct kref refcnt;
 	struct mei_client_properties props;
 	u8 client_id;
-	u8 mei_flow_ctrl_creds;
+	u8 tx_flow_ctrl_creds;
 	u8 connect_count;
 	u8 bus_added;
 };
@@ -202,10 +197,11 @@
  * @ev_async: event async notification
  * @status: connection status
  * @me_cl: fw client connected
+ * @fp: file associated with client
  * @host_client_id: host id
- * @mei_flow_ctrl_creds: transmit flow credentials
+ * @tx_flow_ctrl_creds: transmit flow credentials
+ * @rx_flow_ctrl_creds: receive flow credentials
  * @timer_count:  watchdog timer for operation completion
- * @reserved: reserved for alignment
  * @notify_en: notification - enabled/disabled
  * @notify_ev: pending notification event
  * @writing_state: state of the tx
@@ -225,10 +221,11 @@
 	struct fasync_struct *ev_async;
 	int status;
 	struct mei_me_client *me_cl;
+	const struct file *fp;
 	u8 host_client_id;
-	u8 mei_flow_ctrl_creds;
+	u8 tx_flow_ctrl_creds;
+	u8 rx_flow_ctrl_creds;
 	u8 timer_count;
-	u8 reserved;
 	u8 notify_en;
 	u8 notify_ev;
 	enum mei_file_transaction_states writing_state;
@@ -400,9 +397,7 @@
  * @override_fixed_address: force allow fixed address behavior
  *
  * @amthif_cmd_list : amthif list for cmd waiting
- * @iamthif_fp : file for current amthif operation
  * @iamthif_cl  : amthif host client
- * @iamthif_current_cb : amthif current operation callback
  * @iamthif_open_count : number of opened amthif connections
  * @iamthif_stall_timer : timer to detect amthif hang
  * @iamthif_state : amthif processor state
@@ -484,10 +479,7 @@
 
 	/* amthif list for cmd waiting */
 	struct mei_cl_cb amthif_cmd_list;
-	/* driver managed amthif list for reading completed amthif cmd data */
-	const struct file *iamthif_fp;
 	struct mei_cl iamthif_cl;
-	struct mei_cl_cb *iamthif_current_cb;
 	long iamthif_open_count;
 	u32 iamthif_stall_timer;
 	enum iamthif_states iamthif_state;
@@ -556,6 +548,7 @@
  */
 
 void mei_timer(struct work_struct *work);
+void mei_schedule_stall_timer(struct mei_device *dev);
 int mei_irq_read_handler(struct mei_device *dev,
 		struct mei_cl_cb *cmpl_list, s32 *slots);
 
@@ -569,11 +562,7 @@
 
 int mei_amthif_host_init(struct mei_device *dev, struct mei_me_client *me_cl);
 
-int mei_amthif_read(struct mei_device *dev, struct file *file,
-		char __user *ubuf, size_t length, loff_t *offset);
-
-unsigned int mei_amthif_poll(struct mei_device *dev,
-		struct file *file, poll_table *wait);
+unsigned int mei_amthif_poll(struct file *file, poll_table *wait);
 
 int mei_amthif_release(struct mei_device *dev, struct file *file);
 
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 64e64da..f3ffd88 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -85,12 +85,15 @@
 
 	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT, mei_me_pch8_cfg)},
 	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_2, mei_me_pch8_cfg)},
-	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_cfg)},
-	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_cfg)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H, mei_me_pch8_sps_cfg)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_SPT_H_2, mei_me_pch8_sps_cfg)},
 
 	{MEI_PCI_DEVICE(MEI_DEV_ID_BXT_M, mei_me_pch8_cfg)},
 	{MEI_PCI_DEVICE(MEI_DEV_ID_APL_I, mei_me_pch8_cfg)},
 
+	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP, mei_me_pch8_cfg)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_KBP_2, mei_me_pch8_cfg)},
+
 	/* required last entry */
 	{0, }
 };
@@ -217,8 +220,6 @@
 
 	pci_set_drvdata(pdev, dev);
 
-	schedule_delayed_work(&dev->timer_work, HZ);
-
 	/*
 	* For not wake-able HW runtime pm framework
 	* can't be used on pci device level.
@@ -400,6 +401,9 @@
 
 	dev_dbg(&pdev->dev, "rpm: me: runtime suspend ret=%d\n", ret);
 
+	if (ret && ret != -EAGAIN)
+		schedule_work(&dev->reset_work);
+
 	return ret;
 }
 
@@ -423,6 +427,9 @@
 
 	dev_dbg(&pdev->dev, "rpm: me: runtime resume ret = %d\n", ret);
 
+	if (ret)
+		schedule_work(&dev->reset_work);
+
 	return ret;
 }
 
diff --git a/drivers/misc/mei/pci-txe.c b/drivers/misc/mei/pci-txe.c
index 30cc306..58ffd30 100644
--- a/drivers/misc/mei/pci-txe.c
+++ b/drivers/misc/mei/pci-txe.c
@@ -347,6 +347,10 @@
 	dev_dbg(&pdev->dev, "rpm: txe: runtime suspend ret=%d\n", ret);
 
 	mutex_unlock(&dev->device_lock);
+
+	if (ret && ret != -EAGAIN)
+		schedule_work(&dev->reset_work);
+
 	return ret;
 }
 
@@ -372,6 +376,9 @@
 
 	dev_dbg(&pdev->dev, "rpm: txe: runtime resume ret = %d\n", ret);
 
+	if (ret)
+		schedule_work(&dev->reset_work);
+
 	return ret;
 }
 
diff --git a/drivers/misc/mic/scif/scif_dma.c b/drivers/misc/mic/scif/scif_dma.c
index cd01a0e..64d5760 100644
--- a/drivers/misc/mic/scif/scif_dma.c
+++ b/drivers/misc/mic/scif/scif_dma.c
@@ -115,7 +115,6 @@
  */
 static
 void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
-			    struct scif_endpt *ep,
 			    u64 start, u64 len)
 {
 	struct list_head *item, *tmp;
@@ -128,7 +127,6 @@
 
 	list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
 		window = list_entry(item, struct scif_window, list);
-		ep = (struct scif_endpt *)window->ep;
 		if (!len)
 			break;
 		start_va = window->va_for_temp;
@@ -146,7 +144,7 @@
 	struct scif_endpt *ep = mmn->ep;
 
 	spin_lock(&ep->rma_info.tc_lock);
-	__scif_rma_destroy_tcw(mmn, ep, start, len);
+	__scif_rma_destroy_tcw(mmn, start, len);
 	spin_unlock(&ep->rma_info.tc_lock);
 }
 
@@ -169,7 +167,7 @@
 	spin_lock(&ep->rma_info.tc_lock);
 	list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
 		mmn = list_entry(item, struct scif_mmu_notif, list);
-		__scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX);
+		__scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
 	}
 	spin_unlock(&ep->rma_info.tc_lock);
 }
diff --git a/drivers/misc/mic/scif/scif_mmap.c b/drivers/misc/mic/scif/scif_mmap.c
index 49cb8f7..9282116 100644
--- a/drivers/misc/mic/scif/scif_mmap.c
+++ b/drivers/misc/mic/scif/scif_mmap.c
@@ -552,7 +552,7 @@
 {
 	struct scif_endpt *ep;
 	struct vma_pvt *vmapvt = vma->vm_private_data;
-	int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int nr_pages = vma_pages(vma);
 	s64 offset;
 	struct scif_rma_req req;
 	struct scif_window *window = NULL;
@@ -614,7 +614,7 @@
 	struct scif_window *window = NULL;
 	struct scif_endpt *ep = (struct scif_endpt *)epd;
 	s64 start_offset = vma->vm_pgoff << PAGE_SHIFT;
-	int nr_pages = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	int nr_pages = vma_pages(vma);
 	int err;
 	struct vma_pvt *vmapvt;
 
diff --git a/drivers/misc/pch_phub.c b/drivers/misc/pch_phub.c
index 4810e03..e42bdc9 100644
--- a/drivers/misc/pch_phub.c
+++ b/drivers/misc/pch_phub.c
@@ -28,6 +28,7 @@
 #include <linux/if_ether.h>
 #include <linux/ctype.h>
 #include <linux/dmi.h>
+#include <linux/of.h>
 
 #define PHUB_STATUS 0x00		/* Status Register offset */
 #define PHUB_CONTROL 0x04		/* Control Register offset */
@@ -57,6 +58,7 @@
 
 /* CM-iTC */
 #define CLKCFG_UART_48MHZ			(1 << 16)
+#define CLKCFG_UART_25MHZ			(2 << 16)
 #define CLKCFG_BAUDDIV				(2 << 20)
 #define CLKCFG_PLL2VCO				(8 << 9)
 #define CLKCFG_UARTCLKSEL			(1 << 18)
@@ -711,6 +713,12 @@
 
 	if (id->driver_data == 1) { /* EG20T PCH */
 		const char *board_name;
+		unsigned int prefetch = 0x000affaa;
+
+		if (pdev->dev.of_node)
+			of_property_read_u32(pdev->dev.of_node,
+						  "intel,eg20t-prefetch",
+						  &prefetch);
 
 		ret = sysfs_create_file(&pdev->dev.kobj,
 					&dev_attr_pch_mac.attr);
@@ -736,11 +744,21 @@
 						CLKCFG_UART_MASK);
 
 		/* set the prefech value */
-		iowrite32(0x000affaa, chip->pch_phub_base_address + 0x14);
+		iowrite32(prefetch, chip->pch_phub_base_address + 0x14);
 		/* set the interrupt delay value */
 		iowrite32(0x25, chip->pch_phub_base_address + 0x44);
 		chip->pch_opt_rom_start_address = PCH_PHUB_ROM_START_ADDR_EG20T;
 		chip->pch_mac_start_address = PCH_PHUB_MAC_START_ADDR_EG20T;
+
+		/* quirk for MIPS Boston platform */
+		if (pdev->dev.of_node) {
+			if (of_machine_is_compatible("img,boston")) {
+				pch_phub_read_modify_write_reg(chip,
+					(unsigned int)CLKCFG_REG_OFFSET,
+					CLKCFG_UART_25MHZ,
+					CLKCFG_UART_MASK);
+			}
+		}
 	} else if (id->driver_data == 2) { /* ML7213 IOH */
 		ret = sysfs_create_bin_file(&pdev->dev.kobj, &pch_bin_attr);
 		if (ret)
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index 9ec262a..ec09010 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -381,18 +381,12 @@
 		return -EINVAL;
 	}
 
-	dg = kmalloc(send_info.len, GFP_KERNEL);
-	if (!dg) {
+	dg = memdup_user((void __user *)(uintptr_t)send_info.addr,
+			 send_info.len);
+	if (IS_ERR(dg)) {
 		vmci_ioctl_err(
 			"cannot allocate memory to dispatch datagram\n");
-		return -ENOMEM;
-	}
-
-	if (copy_from_user(dg, (void __user *)(uintptr_t)send_info.addr,
-			   send_info.len)) {
-		vmci_ioctl_err("error getting datagram\n");
-		kfree(dg);
-		return -EFAULT;
+		return PTR_ERR(dg);
 	}
 
 	if (VMCI_DG_SIZE(dg) != send_info.len) {
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 48a5dd7..2206d44 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1726,6 +1726,7 @@
 			break;
 
 		if (req_op(next) == REQ_OP_DISCARD ||
+		    req_op(next) == REQ_OP_SECURE_ERASE ||
 		    req_op(next) == REQ_OP_FLUSH)
 			break;
 
@@ -2150,6 +2151,7 @@
 	struct mmc_card *card = md->queue.card;
 	struct mmc_host *host = card->host;
 	unsigned long flags;
+	bool req_is_special = mmc_req_is_special(req);
 
 	if (req && !mq->mqrq_prev->req)
 		/* claim host only for the first request */
@@ -2190,8 +2192,7 @@
 	}
 
 out:
-	if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) ||
-	    mmc_req_is_special(req))
+	if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) || req_is_special)
 		/*
 		 * Release host when there are no more requests
 		 * and after special request(discard, flush) is done.
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index bf14642..7080572 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -33,7 +33,8 @@
 	/*
 	 * We only like normal block requests and discards.
 	 */
-	if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD) {
+	if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD &&
+	    req_op(req) != REQ_OP_SECURE_ERASE) {
 		blk_dump_rq_flags(req, "MMC bad request");
 		return BLKPREP_KILL;
 	}
@@ -64,6 +65,8 @@
 		spin_unlock_irq(q->queue_lock);
 
 		if (req || mq->mqrq_prev->req) {
+			bool req_is_special = mmc_req_is_special(req);
+
 			set_current_state(TASK_RUNNING);
 			mq->issue_fn(mq, req);
 			cond_resched();
@@ -79,7 +82,7 @@
 			 * has been finished. Do not assign it to previous
 			 * request.
 			 */
-			if (mmc_req_is_special(req))
+			if (req_is_special)
 				mq->mqrq_cur->req = NULL;
 
 			mq->mqrq_prev->brq.mrq.data = NULL;
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index d625311..fee5e12 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -4,7 +4,9 @@
 static inline bool mmc_req_is_special(struct request *req)
 {
 	return req &&
-		(req_op(req) == REQ_OP_FLUSH || req_op(req) == REQ_OP_DISCARD);
+		(req_op(req) == REQ_OP_FLUSH ||
+		 req_op(req) == REQ_OP_DISCARD ||
+		 req_op(req) == REQ_OP_SECURE_ERASE);
 }
 
 struct request;
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 32380d5..767af20 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -1112,11 +1112,12 @@
 
 		div = (host->bus_hz != clock) ? DIV_ROUND_UP(div, 2) : 0;
 
-		dev_info(&slot->mmc->class_dev,
-			 "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ div = %d)\n",
-			 slot->id, host->bus_hz, clock,
-			 div ? ((host->bus_hz / div) >> 1) :
-			 host->bus_hz, div);
+		if (clock != slot->__clk_old || force_clkinit)
+			dev_info(&slot->mmc->class_dev,
+				 "Bus speed (slot %d) = %dHz (slot req %dHz, actual %dHZ div = %d)\n",
+				 slot->id, host->bus_hz, clock,
+				 div ? ((host->bus_hz / div) >> 1) :
+				 host->bus_hz, div);
 
 		/* disable clock */
 		mci_writel(host, CLKENA, 0);
@@ -1139,6 +1140,9 @@
 
 		/* inform CIU */
 		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
+
+		/* keep the last clock value that was requested from core */
+		slot->__clk_old = clock;
 	}
 
 	host->current_speed = clock;
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 9e740bc..e8cd2de 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -249,6 +249,8 @@
  * @queue_node: List node for placing this node in the @queue list of
  *	&struct dw_mci.
  * @clock: Clock rate configured by set_ios(). Protected by host->lock.
+ * @__clk_old: The last clock value that was requested from core.
+ *	Keeping track of this helps us to avoid spamming the console.
  * @flags: Random state bits associated with the slot.
  * @id: Number of this slot.
  * @sdio_id: Number of this slot in the SDIO interrupt registers.
@@ -263,6 +265,7 @@
 	struct list_head	queue_node;
 
 	unsigned int		clock;
+	unsigned int		__clk_old;
 
 	unsigned long		flags;
 #define DW_MMC_CARD_PRESENT	0
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index f23d65e..be3c49f 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -1016,14 +1016,16 @@
 
 		/* Only reconfigure if we have a different burst size */
 		if (*bp != burst) {
-			struct dma_slave_config cfg;
-
-			cfg.src_addr = host->phys_base + OMAP_MMC_REG(host, DATA);
-			cfg.dst_addr = host->phys_base + OMAP_MMC_REG(host, DATA);
-			cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
-			cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
-			cfg.src_maxburst = burst;
-			cfg.dst_maxburst = burst;
+			struct dma_slave_config cfg = {
+				.src_addr = host->phys_base +
+					    OMAP_MMC_REG(host, DATA),
+				.dst_addr = host->phys_base +
+					    OMAP_MMC_REG(host, DATA),
+				.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES,
+				.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES,
+				.src_maxburst = burst,
+				.dst_maxburst = burst,
+			};
 
 			if (dmaengine_slave_config(c, &cfg))
 				goto use_pio;
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 24ebc9a..5f2f24a 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -1409,11 +1409,18 @@
 static int omap_hsmmc_setup_dma_transfer(struct omap_hsmmc_host *host,
 					struct mmc_request *req)
 {
-	struct dma_slave_config cfg;
 	struct dma_async_tx_descriptor *tx;
 	int ret = 0, i;
 	struct mmc_data *data = req->data;
 	struct dma_chan *chan;
+	struct dma_slave_config cfg = {
+		.src_addr = host->mapbase + OMAP_HSMMC_DATA,
+		.dst_addr = host->mapbase + OMAP_HSMMC_DATA,
+		.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.src_maxburst = data->blksz / 4,
+		.dst_maxburst = data->blksz / 4,
+	};
 
 	/* Sanity check: all the SG entries must be aligned by block size. */
 	for (i = 0; i < data->sg_len; i++) {
@@ -1433,13 +1440,6 @@
 
 	chan = omap_hsmmc_get_dma_chan(host, data);
 
-	cfg.src_addr = host->mapbase + OMAP_HSMMC_DATA;
-	cfg.dst_addr = host->mapbase + OMAP_HSMMC_DATA;
-	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	cfg.src_maxburst = data->blksz / 4;
-	cfg.dst_maxburst = data->blksz / 4;
-
 	ret = dmaengine_slave_config(chan, &cfg);
 	if (ret)
 		return ret;
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
index c95ba83..ed92ce72 100644
--- a/drivers/mmc/host/sdhci-st.c
+++ b/drivers/mmc/host/sdhci-st.c
@@ -28,6 +28,7 @@
 
 struct st_mmc_platform_data {
 	struct  reset_control *rstc;
+	struct  clk *icnclk;
 	void __iomem *top_ioaddr;
 };
 
@@ -353,7 +354,7 @@
 	struct sdhci_host *host;
 	struct st_mmc_platform_data *pdata;
 	struct sdhci_pltfm_host *pltfm_host;
-	struct clk *clk;
+	struct clk *clk, *icnclk;
 	int ret = 0;
 	u16 host_version;
 	struct resource *res;
@@ -365,6 +366,11 @@
 		return PTR_ERR(clk);
 	}
 
+	/* ICN clock isn't compulsory, but use it if it's provided. */
+	icnclk = devm_clk_get(&pdev->dev, "icn");
+	if (IS_ERR(icnclk))
+		icnclk = NULL;
+
 	rstc = devm_reset_control_get(&pdev->dev, NULL);
 	if (IS_ERR(rstc))
 		rstc = NULL;
@@ -389,6 +395,7 @@
 	}
 
 	clk_prepare_enable(clk);
+	clk_prepare_enable(icnclk);
 
 	/* Configure the FlashSS Top registers for setting eMMC TX/RX delay */
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
@@ -400,6 +407,7 @@
 	}
 
 	pltfm_host->clk = clk;
+	pdata->icnclk = icnclk;
 
 	/* Configure the Arasan HC inside the flashSS */
 	st_mmcss_cconfig(np, host);
@@ -422,6 +430,7 @@
 	return 0;
 
 err_out:
+	clk_disable_unprepare(icnclk);
 	clk_disable_unprepare(clk);
 err_of:
 	sdhci_pltfm_free(pdev);
@@ -442,6 +451,8 @@
 
 	ret = sdhci_pltfm_unregister(pdev);
 
+	clk_disable_unprepare(pdata->icnclk);
+
 	if (rstc)
 		reset_control_assert(rstc);
 
@@ -462,6 +473,7 @@
 	if (pdata->rstc)
 		reset_control_assert(pdata->rstc);
 
+	clk_disable_unprepare(pdata->icnclk);
 	clk_disable_unprepare(pltfm_host->clk);
 out:
 	return ret;
@@ -475,6 +487,7 @@
 	struct device_node *np = dev->of_node;
 
 	clk_prepare_enable(pltfm_host->clk);
+	clk_prepare_enable(pdata->icnclk);
 
 	if (pdata->rstc)
 		reset_control_deassert(pdata->rstc);
diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c
index 1e819f9..bb3e0d1 100644
--- a/drivers/mmc/host/vub300.c
+++ b/drivers/mmc/host/vub300.c
@@ -2116,13 +2116,11 @@
 	command_out_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!command_out_urb) {
 		retval = -ENOMEM;
-		dev_err(&udev->dev, "not enough memory for command_out_urb\n");
 		goto error0;
 	}
 	command_res_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!command_res_urb) {
 		retval = -ENOMEM;
-		dev_err(&udev->dev, "not enough memory for command_res_urb\n");
 		goto error1;
 	}
 	/* this also allocates memory for our VUB300 mmc host device */
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index cc07ba0..27fa8b8 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -240,6 +240,9 @@
 	unsigned long flags;
 	u32 val;
 
+	/* Reset ECC hardware */
+	davinci_nand_readl(info, NAND_4BIT_ECC1_OFFSET);
+
 	spin_lock_irqsave(&davinci_nand_lock, flags);
 
 	/* Start 4-bit ECC calculation for read/write */
diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c
index 25a4fbd..d54f666 100644
--- a/drivers/mtd/nand/mtk_ecc.c
+++ b/drivers/mtd/nand/mtk_ecc.c
@@ -366,7 +366,8 @@
 		   u8 *data, u32 bytes)
 {
 	dma_addr_t addr;
-	u32 *p, len, i;
+	u8 *p;
+	u32 len, i, val;
 	int ret = 0;
 
 	addr = dma_map_single(ecc->dev, data, bytes, DMA_TO_DEVICE);
@@ -392,11 +393,14 @@
 
 	/* Program ECC bytes to OOB: per sector oob = FDM + ECC + SPARE */
 	len = (config->strength * ECC_PARITY_BITS + 7) >> 3;
-	p = (u32 *)(data + bytes);
+	p = data + bytes;
 
 	/* write the parity bytes generated by the ECC back to the OOB region */
-	for (i = 0; i < len; i++)
-		p[i] = readl(ecc->regs + ECC_ENCPAR(i));
+	for (i = 0; i < len; i++) {
+		if ((i % 4) == 0)
+			val = readl(ecc->regs + ECC_ENCPAR(i / 4));
+		p[i] = (val >> ((i % 4) * 8)) & 0xff;
+	}
 timeout:
 
 	dma_unmap_single(ecc->dev, addr, bytes, DMA_TO_DEVICE);
diff --git a/drivers/mtd/nand/mtk_nand.c b/drivers/mtd/nand/mtk_nand.c
index ddaa2ac..5223a21 100644
--- a/drivers/mtd/nand/mtk_nand.c
+++ b/drivers/mtd/nand/mtk_nand.c
@@ -93,6 +93,9 @@
 #define		NFI_FSM_MASK		(0xf << 16)
 #define NFI_ADDRCNTR		(0x70)
 #define		CNTR_MASK		GENMASK(16, 12)
+#define		ADDRCNTR_SEC_SHIFT	(12)
+#define		ADDRCNTR_SEC(val) \
+		(((val) & CNTR_MASK) >> ADDRCNTR_SEC_SHIFT)
 #define NFI_STRADDR		(0x80)
 #define NFI_BYTELEN		(0x84)
 #define NFI_CSEL		(0x90)
@@ -699,7 +702,7 @@
 	}
 
 	ret = readl_poll_timeout_atomic(nfc->regs + NFI_ADDRCNTR, reg,
-					(reg & CNTR_MASK) >= chip->ecc.steps,
+					ADDRCNTR_SEC(reg) >= chip->ecc.steps,
 					10, MTK_TIMEOUT);
 	if (ret)
 		dev_err(dev, "hwecc write timeout\n");
@@ -902,7 +905,7 @@
 		dev_warn(nfc->dev, "read ahb/dma done timeout\n");
 
 	rc = readl_poll_timeout_atomic(nfc->regs + NFI_BYTELEN, reg,
-				       (reg & CNTR_MASK) >= sectors, 10,
+				       ADDRCNTR_SEC(reg) >= sectors, 10,
 				       MTK_TIMEOUT);
 	if (rc < 0) {
 		dev_err(nfc->dev, "subpage done timeout\n");
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 5173fad..57cbe2b 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -943,7 +943,7 @@
 	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	int stepsize = nand_chip->ecc.bytes == 9 ? 16 : 26;
 
-	if (section > nand_chip->ecc.steps)
+	if (section >= nand_chip->ecc.steps)
 		return -ERANGE;
 
 	if (!section) {
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index a59361c..5513bfd9 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -2169,7 +2169,7 @@
 	return 0;
 
 return_error:
-	if (info->dma)
+	if (!IS_ERR_OR_NULL(info->dma))
 		dma_release_channel(info->dma);
 	if (nand_chip->ecc.priv) {
 		nand_bch_free(nand_chip->ecc.priv);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 0c5415b..95c32f2 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -149,6 +149,8 @@
     tristate "IP-VLAN support"
     depends on INET
     depends on IPV6
+    depends on NETFILTER
+    depends on NET_L3_MASTER_DEV
     ---help---
       This allows one to create virtual devices off of a main interface
       and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1f276fa..3f31ca3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -152,7 +152,7 @@
 MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; "
 			    "0 for slow, 1 for fast");
 module_param(ad_select, charp, 0);
-MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; "
+MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; "
 			    "0 for stable (default), 1 for bandwidth, "
 			    "2 for count");
 module_param(min_links, int, 0);
@@ -1341,9 +1341,10 @@
 			    slave_dev->name);
 	}
 
-	/* already enslaved */
-	if (slave_dev->flags & IFF_SLAVE) {
-		netdev_dbg(bond_dev, "Error: Device was already enslaved\n");
+	/* already in-use? */
+	if (netdev_is_rx_handler_busy(slave_dev)) {
+		netdev_err(bond_dev,
+			   "Error: Device is in use and cannot be enslaved\n");
 		return -EBUSY;
 	}
 
@@ -4627,7 +4628,7 @@
 
 	netdev_dbg(bond_dev, "Begin bond_init\n");
 
-	bond->wq = create_singlethread_workqueue(bond_dev->name);
+	bond->wq = alloc_ordered_workqueue(bond_dev->name, WQ_MEM_RECLAIM);
 	if (!bond->wq)
 		return -ENOMEM;
 
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index e21f7cc..8d6208c 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -21,6 +21,7 @@
 #include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
+#include <linux/workqueue.h>
 #include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/skb.h>
@@ -501,9 +502,8 @@
 /*
  * CAN device restart for bus-off recovery
  */
-static void can_restart(unsigned long data)
+static void can_restart(struct net_device *dev)
 {
-	struct net_device *dev = (struct net_device *)data;
 	struct can_priv *priv = netdev_priv(dev);
 	struct net_device_stats *stats = &dev->stats;
 	struct sk_buff *skb;
@@ -543,6 +543,14 @@
 		netdev_err(dev, "Error %d during restart", err);
 }
 
+static void can_restart_work(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct can_priv *priv = container_of(dwork, struct can_priv, restart_work);
+
+	can_restart(priv->dev);
+}
+
 int can_restart_now(struct net_device *dev)
 {
 	struct can_priv *priv = netdev_priv(dev);
@@ -556,8 +564,8 @@
 	if (priv->state != CAN_STATE_BUS_OFF)
 		return -EBUSY;
 
-	/* Runs as soon as possible in the timer context */
-	mod_timer(&priv->restart_timer, jiffies);
+	cancel_delayed_work_sync(&priv->restart_work);
+	can_restart(dev);
 
 	return 0;
 }
@@ -578,8 +586,8 @@
 	netif_carrier_off(dev);
 
 	if (priv->restart_ms)
-		mod_timer(&priv->restart_timer,
-			  jiffies + (priv->restart_ms * HZ) / 1000);
+		schedule_delayed_work(&priv->restart_work,
+				      msecs_to_jiffies(priv->restart_ms));
 }
 EXPORT_SYMBOL_GPL(can_bus_off);
 
@@ -688,6 +696,7 @@
 		return NULL;
 
 	priv = netdev_priv(dev);
+	priv->dev = dev;
 
 	if (echo_skb_max) {
 		priv->echo_skb_max = echo_skb_max;
@@ -697,7 +706,7 @@
 
 	priv->state = CAN_STATE_STOPPED;
 
-	init_timer(&priv->restart_timer);
+	INIT_DELAYED_WORK(&priv->restart_work, can_restart_work);
 
 	return dev;
 }
@@ -778,8 +787,6 @@
 	if (!netif_carrier_ok(dev))
 		netif_carrier_on(dev);
 
-	setup_timer(&priv->restart_timer, can_restart, (unsigned long)dev);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(open_candev);
@@ -794,7 +801,7 @@
 {
 	struct can_priv *priv = netdev_priv(dev);
 
-	del_timer_sync(&priv->restart_timer);
+	cancel_delayed_work_sync(&priv->restart_work);
 	can_flush_echo_skb(dev);
 }
 EXPORT_SYMBOL_GPL(close_candev);
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index 41c0fc9..16f7cad 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -1268,11 +1268,10 @@
 	struct flexcan_priv *priv = netdev_priv(dev);
 	int err;
 
-	err = flexcan_chip_disable(priv);
-	if (err)
-		return err;
-
 	if (netif_running(dev)) {
+		err = flexcan_chip_disable(priv);
+		if (err)
+			return err;
 		netif_stop_queue(dev);
 		netif_device_detach(dev);
 	}
@@ -1285,13 +1284,17 @@
 {
 	struct net_device *dev = dev_get_drvdata(device);
 	struct flexcan_priv *priv = netdev_priv(dev);
+	int err;
 
 	priv->can.state = CAN_STATE_ERROR_ACTIVE;
 	if (netif_running(dev)) {
 		netif_device_attach(dev);
 		netif_start_queue(dev);
+		err = flexcan_chip_enable(priv);
+		if (err)
+			return err;
 	}
-	return flexcan_chip_enable(priv);
+	return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(flexcan_pm_ops, flexcan_suspend, flexcan_resume);
diff --git a/drivers/net/can/ifi_canfd/ifi_canfd.c b/drivers/net/can/ifi_canfd/ifi_canfd.c
index 2d1d22e..368bb07 100644
--- a/drivers/net/can/ifi_canfd/ifi_canfd.c
+++ b/drivers/net/can/ifi_canfd/ifi_canfd.c
@@ -81,6 +81,10 @@
 #define IFI_CANFD_TIME_SET_TIMEA_4_12_6_6	BIT(15)
 
 #define IFI_CANFD_TDELAY			0x1c
+#define IFI_CANFD_TDELAY_DEFAULT		0xb
+#define IFI_CANFD_TDELAY_MASK			0x3fff
+#define IFI_CANFD_TDELAY_ABS			BIT(14)
+#define IFI_CANFD_TDELAY_EN			BIT(15)
 
 #define IFI_CANFD_ERROR				0x20
 #define IFI_CANFD_ERROR_TX_OFFSET		0
@@ -641,7 +645,7 @@
 	struct ifi_canfd_priv *priv = netdev_priv(ndev);
 	const struct can_bittiming *bt = &priv->can.bittiming;
 	const struct can_bittiming *dbt = &priv->can.data_bittiming;
-	u16 brp, sjw, tseg1, tseg2;
+	u16 brp, sjw, tseg1, tseg2, tdc;
 
 	/* Configure bit timing */
 	brp = bt->brp - 2;
@@ -664,6 +668,11 @@
 	       (brp << IFI_CANFD_TIME_PRESCALE_OFF) |
 	       (sjw << IFI_CANFD_TIME_SJW_OFF_7_9_8_8),
 	       priv->base + IFI_CANFD_FTIME);
+
+	/* Configure transmitter delay */
+	tdc = (dbt->brp * (dbt->phase_seg1 + 1)) & IFI_CANFD_TDELAY_MASK;
+	writel(IFI_CANFD_TDELAY_EN | IFI_CANFD_TDELAY_ABS | tdc,
+	       priv->base + IFI_CANFD_TDELAY);
 }
 
 static void ifi_canfd_set_filter(struct net_device *ndev, const u32 id,
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 71f0e79..b3d0275 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -600,7 +600,6 @@
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			netdev_err(netdev, "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -752,10 +751,8 @@
 
 	/* create a URB, and a buffer for it, and copy the data to the URB */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
+	if (!urb)
 		goto nomem;
-	}
 
 	buf = usb_alloc_coherent(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma);
 	if (!buf) {
@@ -1007,10 +1004,8 @@
 		dev->tx_contexts[i].echo_index = MAX_TX_URBS;
 
 	dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->intr_urb) {
-		dev_err(&intf->dev, "Couldn't alloc intr URB\n");
+	if (!dev->intr_urb)
 		goto cleanup_candev;
-	}
 
 	dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL);
 	if (!dev->intr_in_buffer)
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 784a900..be928ce 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -558,8 +558,6 @@
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			dev_warn(dev->udev->dev.parent,
-				 "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -730,7 +728,6 @@
 	/* create a URB, and a buffer for it, and copy the data to the URB */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
 		stats->tx_dropped++;
 		dev_kfree_skb(skb);
 		goto nourbmem;
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 6f0cbc3..77e3cc0 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -493,10 +493,8 @@
 
 	/* create a URB, and a buffer for it */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(netdev, "No memory left for URB\n");
+	if (!urb)
 		goto nomem_urb;
-	}
 
 	hf = usb_alloc_coherent(dev->udev, sizeof(*hf), GFP_ATOMIC,
 				&urb->transfer_dma);
@@ -600,11 +598,8 @@
 
 			/* alloc rx urb */
 			urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (!urb) {
-				netdev_err(netdev,
-					   "No memory left for URB\n");
+			if (!urb)
 				return -ENOMEM;
-			}
 
 			/* alloc rx buffer */
 			buf = usb_alloc_coherent(dev->udev,
diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
index 6f1f3b6..d51e0c4 100644
--- a/drivers/net/can/usb/kvaser_usb.c
+++ b/drivers/net/can/usb/kvaser_usb.c
@@ -787,10 +787,8 @@
 	int err;
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
+	if (!urb)
 		return -ENOMEM;
-	}
 
 	buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC);
 	if (!buf) {
@@ -1393,8 +1391,6 @@
 
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			dev_warn(dev->udev->dev.parent,
-				 "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -1670,7 +1666,6 @@
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
 		stats->tx_dropped++;
 		dev_kfree_skb(skb);
 		return NETDEV_TX_OK;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index bfb91d8..c06382c 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -399,7 +399,6 @@
 		/* create a URB, and a buffer for it, to receive usb messages */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			netdev_err(netdev, "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -454,7 +453,6 @@
 		/* create a URB and a buffer for it, to transmit usb messages */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			netdev_err(netdev, "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
@@ -651,10 +649,8 @@
 
 	/* first allocate a urb to handle the asynchronous steps */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(dev->netdev, "no memory left for urb\n");
+	if (!urb)
 		return -ENOMEM;
-	}
 
 	/* also allocate enough space for the commands to send */
 	buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_ATOMIC);
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index a731720..108a30e 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -623,10 +623,8 @@
 
 	/* create a URB, and a buffer for it, and copy the data to the URB */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(netdev, "No memory left for URBs\n");
+	if (!urb)
 		goto nomem;
-	}
 
 	buf = usb_alloc_coherent(priv->udev, size, GFP_ATOMIC,
 				 &urb->transfer_dma);
@@ -748,7 +746,6 @@
 		/* create a URB, and a buffer for it */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			netdev_err(netdev, "No memory left for URBs\n");
 			err = -ENOMEM;
 			break;
 		}
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 8f45443..0659846 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -16,6 +16,7 @@
 	select FIXED_PHY
 	select BCM7XXX_PHY
 	select MDIO_BCM_UNIMAC
+	select B53
 	---help---
 	  This enables support for the Broadcom Starfighter 2 Ethernet
 	  switch chips.
@@ -24,4 +25,13 @@
 
 source "drivers/net/dsa/mv88e6xxx/Kconfig"
 
+config NET_DSA_QCA8K
+	tristate "Qualcomm Atheros QCA8K Ethernet switch family support"
+	depends on NET_DSA
+	select NET_DSA_TAG_QCA
+	select REGMAP
+	---help---
+	  This enables support for the Qualcomm Atheros QCA8K Ethernet
+	  switch chips.
+
 endmenu
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index ca1e71b..8346e4f 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_BCM_SF2)	+= bcm_sf2.o
+obj-$(CONFIG_NET_DSA_QCA8K)	+= qca8k.o
 
 obj-y				+= b53/
 obj-y				+= mv88e6xxx/
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index bda37d3..7717b19 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -167,6 +167,65 @@
 
 #define B53_MIBS_SIZE	ARRAY_SIZE(b53_mibs)
 
+static const struct b53_mib_desc b53_mibs_58xx[] = {
+	{ 8, 0x00, "TxOctets" },
+	{ 4, 0x08, "TxDropPkts" },
+	{ 4, 0x0c, "TxQPKTQ0" },
+	{ 4, 0x10, "TxBroadcastPkts" },
+	{ 4, 0x14, "TxMulticastPkts" },
+	{ 4, 0x18, "TxUnicastPKts" },
+	{ 4, 0x1c, "TxCollisions" },
+	{ 4, 0x20, "TxSingleCollision" },
+	{ 4, 0x24, "TxMultipleCollision" },
+	{ 4, 0x28, "TxDeferredCollision" },
+	{ 4, 0x2c, "TxLateCollision" },
+	{ 4, 0x30, "TxExcessiveCollision" },
+	{ 4, 0x34, "TxFrameInDisc" },
+	{ 4, 0x38, "TxPausePkts" },
+	{ 4, 0x3c, "TxQPKTQ1" },
+	{ 4, 0x40, "TxQPKTQ2" },
+	{ 4, 0x44, "TxQPKTQ3" },
+	{ 4, 0x48, "TxQPKTQ4" },
+	{ 4, 0x4c, "TxQPKTQ5" },
+	{ 8, 0x50, "RxOctets" },
+	{ 4, 0x58, "RxUndersizePkts" },
+	{ 4, 0x5c, "RxPausePkts" },
+	{ 4, 0x60, "RxPkts64Octets" },
+	{ 4, 0x64, "RxPkts65to127Octets" },
+	{ 4, 0x68, "RxPkts128to255Octets" },
+	{ 4, 0x6c, "RxPkts256to511Octets" },
+	{ 4, 0x70, "RxPkts512to1023Octets" },
+	{ 4, 0x74, "RxPkts1024toMaxPktsOctets" },
+	{ 4, 0x78, "RxOversizePkts" },
+	{ 4, 0x7c, "RxJabbers" },
+	{ 4, 0x80, "RxAlignmentErrors" },
+	{ 4, 0x84, "RxFCSErrors" },
+	{ 8, 0x88, "RxGoodOctets" },
+	{ 4, 0x90, "RxDropPkts" },
+	{ 4, 0x94, "RxUnicastPkts" },
+	{ 4, 0x98, "RxMulticastPkts" },
+	{ 4, 0x9c, "RxBroadcastPkts" },
+	{ 4, 0xa0, "RxSAChanges" },
+	{ 4, 0xa4, "RxFragments" },
+	{ 4, 0xa8, "RxJumboPkt" },
+	{ 4, 0xac, "RxSymblErr" },
+	{ 4, 0xb0, "InRangeErrCount" },
+	{ 4, 0xb4, "OutRangeErrCount" },
+	{ 4, 0xb8, "EEELpiEvent" },
+	{ 4, 0xbc, "EEELpiDuration" },
+	{ 4, 0xc0, "RxDiscard" },
+	{ 4, 0xc8, "TxQPKTQ6" },
+	{ 4, 0xcc, "TxQPKTQ7" },
+	{ 4, 0xd0, "TxPkts64Octets" },
+	{ 4, 0xd4, "TxPkts65to127Octets" },
+	{ 4, 0xd8, "TxPkts128to255Octets" },
+	{ 4, 0xdc, "TxPkts256to511Ocets" },
+	{ 4, 0xe0, "TxPkts512to1023Ocets" },
+	{ 4, 0xe4, "TxPkts1024toMaxPktOcets" },
+};
+
+#define B53_MIBS_58XX_SIZE	ARRAY_SIZE(b53_mibs_58xx)
+
 static int b53_do_vlan_op(struct b53_device *dev, u8 op)
 {
 	unsigned int i;
@@ -418,7 +477,7 @@
 
 static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	unsigned int i;
 	u16 pvlan;
 
@@ -436,7 +495,7 @@
 static int b53_enable_port(struct dsa_switch *ds, int port,
 			   struct phy_device *phy)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	unsigned int cpu_port = dev->cpu_port;
 	u16 pvlan;
 
@@ -461,7 +520,7 @@
 static void b53_disable_port(struct dsa_switch *ds, int port,
 			     struct phy_device *phy)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	u8 reg;
 
 	/* Disable Tx/Rx for the port */
@@ -570,7 +629,7 @@
 
 static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg)
 {
-	struct b53_device *priv = ds_to_priv(ds);
+	struct b53_device *priv = ds->priv;
 	u16 value = 0;
 	int ret;
 
@@ -585,7 +644,7 @@
 
 static int b53_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val)
 {
-	struct b53_device *priv = ds_to_priv(ds);
+	struct b53_device *priv = ds->priv;
 
 	if (priv->ops->phy_write16)
 		return priv->ops->phy_write16(priv, addr, reg, val);
@@ -635,6 +694,8 @@
 		return b53_mibs_65;
 	else if (is63xx(dev))
 		return b53_mibs_63xx;
+	else if (is58xx(dev))
+		return b53_mibs_58xx;
 	else
 		return b53_mibs;
 }
@@ -645,13 +706,15 @@
 		return B53_MIBS_65_SIZE;
 	else if (is63xx(dev))
 		return B53_MIBS_63XX_SIZE;
+	else if (is58xx(dev))
+		return B53_MIBS_58XX_SIZE;
 	else
 		return B53_MIBS_SIZE;
 }
 
 static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	const struct b53_mib_desc *mibs = b53_get_mib(dev);
 	unsigned int mib_size = b53_get_mib_size(dev);
 	unsigned int i;
@@ -664,7 +727,7 @@
 static void b53_get_ethtool_stats(struct dsa_switch *ds, int port,
 				  uint64_t *data)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	const struct b53_mib_desc *mibs = b53_get_mib(dev);
 	unsigned int mib_size = b53_get_mib_size(dev);
 	const struct b53_mib_desc *s;
@@ -696,19 +759,14 @@
 
 static int b53_get_sset_count(struct dsa_switch *ds)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 
 	return b53_get_mib_size(dev);
 }
 
-static int b53_set_addr(struct dsa_switch *ds, u8 *addr)
-{
-	return 0;
-}
-
 static int b53_setup(struct dsa_switch *ds)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	unsigned int port;
 	int ret;
 
@@ -739,7 +797,7 @@
 static void b53_adjust_link(struct dsa_switch *ds, int port,
 			    struct phy_device *phydev)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	u8 rgmii_ctrl = 0, reg = 0, off;
 
 	if (!phy_is_pseudo_fixed_link(phydev))
@@ -873,7 +931,7 @@
 			    const struct switchdev_obj_port_vlan *vlan,
 			    struct switchdev_trans *trans)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 
 	if ((is5325(dev) || is5365(dev)) && vlan->vid_begin == 0)
 		return -EOPNOTSUPP;
@@ -890,7 +948,7 @@
 			 const struct switchdev_obj_port_vlan *vlan,
 			 struct switchdev_trans *trans)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
 	bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
 	unsigned int cpu_port = dev->cpu_port;
@@ -924,7 +982,7 @@
 static int b53_vlan_del(struct dsa_switch *ds, int port,
 			const struct switchdev_obj_port_vlan *vlan)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
 	unsigned int cpu_port = dev->cpu_port;
 	struct b53_vlan *vl;
@@ -970,7 +1028,7 @@
 			 struct switchdev_obj_port_vlan *vlan,
 			 int (*cb)(struct switchdev_obj *obj))
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	u16 vid, vid_start = 0, pvid;
 	struct b53_vlan *vl;
 	int err = 0;
@@ -1129,7 +1187,7 @@
 			   const struct switchdev_obj_port_fdb *fdb,
 			   struct switchdev_trans *trans)
 {
-	struct b53_device *priv = ds_to_priv(ds);
+	struct b53_device *priv = ds->priv;
 
 	/* 5325 and 5365 require some more massaging, but could
 	 * be supported eventually
@@ -1144,7 +1202,7 @@
 			const struct switchdev_obj_port_fdb *fdb,
 			struct switchdev_trans *trans)
 {
-	struct b53_device *priv = ds_to_priv(ds);
+	struct b53_device *priv = ds->priv;
 
 	if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
 		pr_err("%s: failed to add MAC address\n", __func__);
@@ -1153,7 +1211,7 @@
 static int b53_fdb_del(struct dsa_switch *ds, int port,
 		       const struct switchdev_obj_port_fdb *fdb)
 {
-	struct b53_device *priv = ds_to_priv(ds);
+	struct b53_device *priv = ds->priv;
 
 	return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
 }
@@ -1212,7 +1270,7 @@
 			struct switchdev_obj_port_fdb *fdb,
 			int (*cb)(struct switchdev_obj *obj))
 {
-	struct b53_device *priv = ds_to_priv(ds);
+	struct b53_device *priv = ds->priv;
 	struct net_device *dev = ds->ports[port].netdev;
 	struct b53_arl_entry results[2];
 	unsigned int count = 0;
@@ -1251,10 +1309,22 @@
 static int b53_br_join(struct dsa_switch *ds, int port,
 		       struct net_device *bridge)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
+	s8 cpu_port = ds->dst->cpu_port;
 	u16 pvlan, reg;
 	unsigned int i;
 
+	/* Make this port leave the all VLANs join since we will have proper
+	 * VLAN entries from now on
+	 */
+	if (is58xx(dev)) {
+		b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, &reg);
+		reg &= ~BIT(port);
+		if ((reg & BIT(cpu_port)) == BIT(cpu_port))
+			reg &= ~BIT(cpu_port);
+		b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg);
+	}
+
 	dev->ports[port].bridge_dev = bridge;
 	b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan);
 
@@ -1284,9 +1354,10 @@
 
 static void b53_br_leave(struct dsa_switch *ds, int port)
 {
-	struct b53_device *dev = ds_to_priv(ds);
+	struct b53_device *dev = ds->priv;
 	struct net_device *bridge = dev->ports[port].bridge_dev;
 	struct b53_vlan *vl = &dev->vlans[0];
+	s8 cpu_port = ds->dst->cpu_port;
 	unsigned int i;
 	u16 pvlan, reg, pvid;
 
@@ -1316,22 +1387,27 @@
 	else
 		pvid = 0;
 
-	b53_get_vlan_entry(dev, pvid, vl);
-	vl->members |= BIT(port) | BIT(dev->cpu_port);
-	vl->untag |= BIT(port) | BIT(dev->cpu_port);
-	b53_set_vlan_entry(dev, pvid, vl);
+	/* Make this port join all VLANs without VLAN entries */
+	if (is58xx(dev)) {
+		b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, &reg);
+		reg |= BIT(port);
+		if (!(reg & BIT(cpu_port)))
+			reg |= BIT(cpu_port);
+		b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg);
+	} else {
+		b53_get_vlan_entry(dev, pvid, vl);
+		vl->members |= BIT(port) | BIT(dev->cpu_port);
+		vl->untag |= BIT(port) | BIT(dev->cpu_port);
+		b53_set_vlan_entry(dev, pvid, vl);
+	}
 }
 
-static void b53_br_set_stp_state(struct dsa_switch *ds, int port,
-				 u8 state)
+static void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state)
 {
-	struct b53_device *dev = ds_to_priv(ds);
-	u8 hw_state, cur_hw_state;
+	struct b53_device *dev = ds->priv;
+	u8 hw_state;
 	u8 reg;
 
-	b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), &reg);
-	cur_hw_state = reg & PORT_CTRL_STP_STATE_MASK;
-
 	switch (state) {
 	case BR_STATE_DISABLED:
 		hw_state = PORT_CTRL_DIS_STATE;
@@ -1353,30 +1429,28 @@
 		return;
 	}
 
-	/* Fast-age ARL entries if we are moving a port from Learning or
-	 * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
-	 * state (hw_state)
-	 */
-	if (cur_hw_state != hw_state) {
-		if (cur_hw_state >= PORT_CTRL_LEARN_STATE &&
-		    hw_state <= PORT_CTRL_LISTEN_STATE) {
-			if (b53_fast_age_port(dev, port)) {
-				dev_err(ds->dev, "fast ageing failed\n");
-				return;
-			}
-		}
-	}
-
 	b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), &reg);
 	reg &= ~PORT_CTRL_STP_STATE_MASK;
 	reg |= hw_state;
 	b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg);
 }
 
-static struct dsa_switch_driver b53_switch_ops = {
-	.tag_protocol		= DSA_TAG_PROTO_NONE,
+static void b53_br_fast_age(struct dsa_switch *ds, int port)
+{
+	struct b53_device *dev = ds->priv;
+
+	if (b53_fast_age_port(dev, port))
+		dev_err(ds->dev, "fast ageing failed\n");
+}
+
+static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds)
+{
+	return DSA_TAG_PROTO_NONE;
+}
+
+static struct dsa_switch_ops b53_switch_ops = {
+	.get_tag_protocol	= b53_get_tag_protocol,
 	.setup			= b53_setup,
-	.set_addr		= b53_set_addr,
 	.get_strings		= b53_get_strings,
 	.get_ethtool_stats	= b53_get_ethtool_stats,
 	.get_sset_count		= b53_get_sset_count,
@@ -1388,6 +1462,7 @@
 	.port_bridge_join	= b53_br_join,
 	.port_bridge_leave	= b53_br_leave,
 	.port_stp_state_set	= b53_br_set_stp_state,
+	.port_fast_age		= b53_br_fast_age,
 	.port_vlan_filtering	= b53_vlan_filtering,
 	.port_vlan_prepare	= b53_vlan_prepare,
 	.port_vlan_add		= b53_vlan_add,
@@ -1593,11 +1668,22 @@
 		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
 		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
 	},
+	{
+		.chip_id = BCM7445_DEVICE_ID,
+		.dev_name = "BCM7445",
+		.vlans	= 4096,
+		.enabled_ports = 0x1ff,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
 };
 
 static int b53_switch_init(struct b53_device *dev)
 {
-	struct dsa_switch *ds = dev->ds;
 	unsigned int i;
 	int ret;
 
@@ -1613,7 +1699,6 @@
 			dev->vta_regs[1] = chip->vta_regs[1];
 			dev->vta_regs[2] = chip->vta_regs[2];
 			dev->jumbo_pm_reg = chip->jumbo_pm_reg;
-			ds->drv = &b53_switch_ops;
 			dev->cpu_port = chip->cpu_port;
 			dev->num_vlans = chip->vlans;
 			dev->num_arl_entries = chip->arl_entries;
@@ -1681,7 +1766,8 @@
 	return 0;
 }
 
-struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
+struct b53_device *b53_switch_alloc(struct device *base,
+				    const struct b53_io_ops *ops,
 				    void *priv)
 {
 	struct dsa_switch *ds;
@@ -1700,6 +1786,7 @@
 	dev->ds = ds;
 	dev->priv = priv;
 	dev->ops = ops;
+	ds->ops = &b53_switch_ops;
 	mutex_init(&dev->reg_mutex);
 	mutex_init(&dev->stats_mutex);
 
diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
index aa87c3f..477a16b 100644
--- a/drivers/net/dsa/b53/b53_mdio.c
+++ b/drivers/net/dsa/b53/b53_mdio.c
@@ -267,7 +267,7 @@
 	return mdiobus_write_nested(bus, addr, reg, value);
 }
 
-static struct b53_io_ops b53_mdio_ops = {
+static const struct b53_io_ops b53_mdio_ops = {
 	.read8 = b53_mdio_read8,
 	.read16 = b53_mdio_read16,
 	.read32 = b53_mdio_read32,
diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c
index 77ffc43..cc9e6bd 100644
--- a/drivers/net/dsa/b53/b53_mmap.c
+++ b/drivers/net/dsa/b53/b53_mmap.c
@@ -208,7 +208,7 @@
 	return 0;
 }
 
-static struct b53_io_ops b53_mmap_ops = {
+static const struct b53_io_ops b53_mmap_ops = {
 	.read8 = b53_mmap_read8,
 	.read16 = b53_mmap_read16,
 	.read32 = b53_mmap_read32,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 835a744..f192a67 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -60,6 +60,7 @@
 	BCM53018_DEVICE_ID = 0x53018,
 	BCM53019_DEVICE_ID = 0x53019,
 	BCM58XX_DEVICE_ID = 0x5800,
+	BCM7445_DEVICE_ID = 0x7445,
 };
 
 #define B53_N_PORTS	9
@@ -174,6 +175,12 @@
 		dev->chip_id == BCM53019_DEVICE_ID;
 }
 
+static inline int is58xx(struct b53_device *dev)
+{
+	return dev->chip_id == BCM58XX_DEVICE_ID ||
+		dev->chip_id == BCM7445_DEVICE_ID;
+}
+
 #define B53_CPU_PORT_25	5
 #define B53_CPU_PORT	8
 
@@ -182,7 +189,8 @@
 	return dev->cpu_port;
 }
 
-struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops,
+struct b53_device *b53_switch_alloc(struct device *base,
+				    const struct b53_io_ops *ops,
 				    void *priv);
 
 int b53_switch_detect(struct b53_device *dev);
@@ -364,7 +372,6 @@
 
 #ifdef CONFIG_BCM47XX
 
-#include <linux/version.h>
 #include <linux/bcm47xx_nvram.h>
 #include <bcm47xx_board.h>
 static inline int b53_switch_get_reset_gpio(struct b53_device *dev)
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index 8f12bdd..dac0af4 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -258,7 +258,7 @@
  * BCM5325 and BCM5365 share most definitions below
  */
 #define B53_ARLTBL_MAC_VID_ENTRY(n)	(0x10 * (n))
-#define   ARLTBL_MAC_MASK		0xffffffffffff
+#define   ARLTBL_MAC_MASK		0xffffffffffffULL
 #define   ARLTBL_VID_S			48
 #define   ARLTBL_VID_MASK_25		0xff
 #define   ARLTBL_VID_MASK		0xfff
@@ -309,6 +309,9 @@
 /* Port VLAN mask (16 bit) IMP port is always 8, also on 5325 & co */
 #define B53_PVLAN_PORT_MASK(i)		((i) * 2)
 
+/* Join all VLANs register (16 bit) */
+#define B53_JOIN_ALL_VLAN_EN		0x50
+
 /*************************************************************************
  * 802.1Q Page Registers
  *************************************************************************/
diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c
index 2bda0b5..f89f530 100644
--- a/drivers/net/dsa/b53/b53_spi.c
+++ b/drivers/net/dsa/b53/b53_spi.c
@@ -270,7 +270,7 @@
 	return spi_write(spi, txbuf, sizeof(txbuf));
 }
 
-static struct b53_io_ops b53_spi_ops = {
+static const struct b53_io_ops b53_spi_ops = {
 	.read8 = b53_spi_read8,
 	.read16 = b53_spi_read16,
 	.read32 = b53_spi_read32,
@@ -317,8 +317,6 @@
 static struct spi_driver b53_spi_driver = {
 	.driver = {
 		.name	= "b53-switch",
-		.bus	= &spi_bus_type,
-		.owner	= THIS_MODULE,
 	},
 	.probe	= b53_spi_probe,
 	.remove	= b53_spi_remove,
diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index 3e2d4a5..8a62b6a 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -344,7 +344,7 @@
 	return ret;
 }
 
-static struct b53_io_ops b53_srab_ops = {
+static const struct b53_io_ops b53_srab_ops = {
 	.read8 = b53_srab_read8,
 	.read16 = b53_srab_read16,
 	.read32 = b53_srab_read32,
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index b2b8387..e218887 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -29,130 +29,21 @@
 #include <linux/brcmphy.h>
 #include <linux/etherdevice.h>
 #include <net/switchdev.h>
+#include <linux/platform_data/b53.h>
 
 #include "bcm_sf2.h"
 #include "bcm_sf2_regs.h"
+#include "b53/b53_priv.h"
+#include "b53/b53_regs.h"
 
-/* String, offset, and register size in bytes if different from 4 bytes */
-static const struct bcm_sf2_hw_stats bcm_sf2_mib[] = {
-	{ "TxOctets",		0x000, 8	},
-	{ "TxDropPkts",		0x020		},
-	{ "TxQPKTQ0",		0x030		},
-	{ "TxBroadcastPkts",	0x040		},
-	{ "TxMulticastPkts",	0x050		},
-	{ "TxUnicastPKts",	0x060		},
-	{ "TxCollisions",	0x070		},
-	{ "TxSingleCollision",	0x080		},
-	{ "TxMultipleCollision", 0x090		},
-	{ "TxDeferredCollision", 0x0a0		},
-	{ "TxLateCollision",	0x0b0		},
-	{ "TxExcessiveCollision", 0x0c0		},
-	{ "TxFrameInDisc",	0x0d0		},
-	{ "TxPausePkts",	0x0e0		},
-	{ "TxQPKTQ1",		0x0f0		},
-	{ "TxQPKTQ2",		0x100		},
-	{ "TxQPKTQ3",		0x110		},
-	{ "TxQPKTQ4",		0x120		},
-	{ "TxQPKTQ5",		0x130		},
-	{ "RxOctets",		0x140, 8	},
-	{ "RxUndersizePkts",	0x160		},
-	{ "RxPausePkts",	0x170		},
-	{ "RxPkts64Octets",	0x180		},
-	{ "RxPkts65to127Octets", 0x190		},
-	{ "RxPkts128to255Octets", 0x1a0		},
-	{ "RxPkts256to511Octets", 0x1b0		},
-	{ "RxPkts512to1023Octets", 0x1c0	},
-	{ "RxPkts1024toMaxPktsOctets", 0x1d0	},
-	{ "RxOversizePkts",	0x1e0		},
-	{ "RxJabbers",		0x1f0		},
-	{ "RxAlignmentErrors",	0x200		},
-	{ "RxFCSErrors",	0x210		},
-	{ "RxGoodOctets",	0x220, 8	},
-	{ "RxDropPkts",		0x240		},
-	{ "RxUnicastPkts",	0x250		},
-	{ "RxMulticastPkts",	0x260		},
-	{ "RxBroadcastPkts",	0x270		},
-	{ "RxSAChanges",	0x280		},
-	{ "RxFragments",	0x290		},
-	{ "RxJumboPkt",		0x2a0		},
-	{ "RxSymblErr",		0x2b0		},
-	{ "InRangeErrCount",	0x2c0		},
-	{ "OutRangeErrCount",	0x2d0		},
-	{ "EEELpiEvent",	0x2e0		},
-	{ "EEELpiDuration",	0x2f0		},
-	{ "RxDiscard",		0x300, 8	},
-	{ "TxQPKTQ6",		0x320		},
-	{ "TxQPKTQ7",		0x330		},
-	{ "TxPkts64Octets",	0x340		},
-	{ "TxPkts65to127Octets", 0x350		},
-	{ "TxPkts128to255Octets", 0x360		},
-	{ "TxPkts256to511Ocets", 0x370		},
-	{ "TxPkts512to1023Ocets", 0x380		},
-	{ "TxPkts1024toMaxPktOcets", 0x390	},
-};
-
-#define BCM_SF2_STATS_SIZE	ARRAY_SIZE(bcm_sf2_mib)
-
-static void bcm_sf2_sw_get_strings(struct dsa_switch *ds,
-				   int port, uint8_t *data)
+static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds)
 {
-	unsigned int i;
-
-	for (i = 0; i < BCM_SF2_STATS_SIZE; i++)
-		memcpy(data + i * ETH_GSTRING_LEN,
-		       bcm_sf2_mib[i].string, ETH_GSTRING_LEN);
-}
-
-static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds,
-					 int port, uint64_t *data)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	const struct bcm_sf2_hw_stats *s;
-	unsigned int i;
-	u64 val = 0;
-	u32 offset;
-
-	mutex_lock(&priv->stats_mutex);
-
-	/* Now fetch the per-port counters */
-	for (i = 0; i < BCM_SF2_STATS_SIZE; i++) {
-		s = &bcm_sf2_mib[i];
-
-		/* Do a latched 64-bit read if needed */
-		offset = s->reg + CORE_P_MIB_OFFSET(port);
-		if (s->sizeof_stat == 8)
-			val = core_readq(priv, offset);
-		else
-			val = core_readl(priv, offset);
-
-		data[i] = (u64)val;
-	}
-
-	mutex_unlock(&priv->stats_mutex);
-}
-
-static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds)
-{
-	return BCM_SF2_STATS_SIZE;
-}
-
-static const char *bcm_sf2_sw_drv_probe(struct device *dsa_dev,
-					struct device *host_dev, int sw_addr,
-					void **_priv)
-{
-	struct bcm_sf2_priv *priv;
-
-	priv = devm_kzalloc(dsa_dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return NULL;
-	*_priv = priv;
-
-	return "Broadcom Starfighter 2";
+	return DSA_TAG_PROTO_BRCM;
 }
 
 static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	unsigned int i;
 	u32 reg;
 
@@ -172,7 +63,7 @@
 
 static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 reg, val;
 
 	/* Enable the port memories */
@@ -237,7 +128,7 @@
 
 static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 reg;
 
 	reg = core_readl(priv, CORE_EEE_EN_CTRL);
@@ -250,7 +141,7 @@
 
 static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 reg;
 
 	reg = reg_readl(priv, REG_SPHY_CNTRL);
@@ -324,7 +215,7 @@
 static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
 			      struct phy_device *phy)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	s8 cpu_port = ds->dst[ds->index].cpu_port;
 	u32 reg;
 
@@ -365,7 +256,7 @@
 	reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
 	reg &= ~PORT_VLAN_CTRL_MASK;
 	reg |= (1 << port);
-	reg |= priv->port_sts[port].vlan_ctl_mask;
+	reg |= priv->dev->ports[port].vlan_ctl_mask;
 	core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port));
 
 	bcm_sf2_imp_vlan_setup(ds, cpu_port);
@@ -380,7 +271,7 @@
 static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
 				 struct phy_device *phy)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 off, reg;
 
 	if (priv->wol_ports_mask & (1 << port))
@@ -412,7 +303,7 @@
 static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
 			    struct phy_device *phy)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
 	int ret;
 
@@ -430,7 +321,7 @@
 static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port,
 			      struct ethtool_eee *e)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
 	u32 reg;
 
@@ -445,7 +336,7 @@
 			      struct phy_device *phydev,
 			      struct ethtool_eee *e)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_eee *p = &priv->port_sts[port].eee;
 
 	p->eee_enabled = e->eee_enabled;
@@ -461,469 +352,6 @@
 	return 0;
 }
 
-static int bcm_sf2_fast_age_op(struct bcm_sf2_priv *priv)
-{
-	unsigned int timeout = 1000;
-	u32 reg;
-
-	reg = core_readl(priv, CORE_FAST_AGE_CTRL);
-	reg |= EN_AGE_PORT | EN_AGE_VLAN | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE;
-	core_writel(priv, reg, CORE_FAST_AGE_CTRL);
-
-	do {
-		reg = core_readl(priv, CORE_FAST_AGE_CTRL);
-		if (!(reg & FAST_AGE_STR_DONE))
-			break;
-
-		cpu_relax();
-	} while (timeout--);
-
-	if (!timeout)
-		return -ETIMEDOUT;
-
-	core_writel(priv, 0, CORE_FAST_AGE_CTRL);
-
-	return 0;
-}
-
-/* Fast-ageing of ARL entries for a given port, equivalent to an ARL
- * flush for that port.
- */
-static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-
-	core_writel(priv, port, CORE_FAST_AGE_PORT);
-
-	return bcm_sf2_fast_age_op(priv);
-}
-
-static int bcm_sf2_sw_fast_age_vlan(struct bcm_sf2_priv *priv, u16 vid)
-{
-	core_writel(priv, vid, CORE_FAST_AGE_VID);
-
-	return bcm_sf2_fast_age_op(priv);
-}
-
-static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv)
-{
-	unsigned int timeout = 10;
-	u32 reg;
-
-	do {
-		reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL);
-		if (!(reg & ARLA_VTBL_STDN))
-			return 0;
-
-		usleep_range(1000, 2000);
-	} while (timeout--);
-
-	return -ETIMEDOUT;
-}
-
-static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op)
-{
-	core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL);
-
-	return bcm_sf2_vlan_op_wait(priv);
-}
-
-static void bcm_sf2_set_vlan_entry(struct bcm_sf2_priv *priv, u16 vid,
-				   struct bcm_sf2_vlan *vlan)
-{
-	int ret;
-
-	core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR);
-	core_writel(priv, vlan->untag << UNTAG_MAP_SHIFT | vlan->members,
-		    CORE_ARLA_VTBL_ENTRY);
-
-	ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_WRITE);
-	if (ret)
-		pr_err("failed to write VLAN entry\n");
-}
-
-static int bcm_sf2_get_vlan_entry(struct bcm_sf2_priv *priv, u16 vid,
-				  struct bcm_sf2_vlan *vlan)
-{
-	u32 entry;
-	int ret;
-
-	core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR);
-
-	ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_READ);
-	if (ret)
-		return ret;
-
-	entry = core_readl(priv, CORE_ARLA_VTBL_ENTRY);
-	vlan->members = entry & FWD_MAP_MASK;
-	vlan->untag = (entry >> UNTAG_MAP_SHIFT) & UNTAG_MAP_MASK;
-
-	return 0;
-}
-
-static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port,
-			      struct net_device *bridge)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	s8 cpu_port = ds->dst->cpu_port;
-	unsigned int i;
-	u32 reg, p_ctl;
-
-	/* Make this port leave the all VLANs join since we will have proper
-	 * VLAN entries from now on
-	 */
-	reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN);
-	reg &= ~BIT(port);
-	if ((reg & BIT(cpu_port)) == BIT(cpu_port))
-		reg &= ~BIT(cpu_port);
-	core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN);
-
-	priv->port_sts[port].bridge_dev = bridge;
-	p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
-
-	for (i = 0; i < priv->hw_params.num_ports; i++) {
-		if (priv->port_sts[i].bridge_dev != bridge)
-			continue;
-
-		/* Add this local port to the remote port VLAN control
-		 * membership and update the remote port bitmask
-		 */
-		reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
-		reg |= 1 << port;
-		core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
-		priv->port_sts[i].vlan_ctl_mask = reg;
-
-		p_ctl |= 1 << i;
-	}
-
-	/* Configure the local port VLAN control membership to include
-	 * remote ports and update the local port bitmask
-	 */
-	core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
-	priv->port_sts[port].vlan_ctl_mask = p_ctl;
-
-	return 0;
-}
-
-static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	struct net_device *bridge = priv->port_sts[port].bridge_dev;
-	s8 cpu_port = ds->dst->cpu_port;
-	unsigned int i;
-	u32 reg, p_ctl;
-
-	p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
-
-	for (i = 0; i < priv->hw_params.num_ports; i++) {
-		/* Don't touch the remaining ports */
-		if (priv->port_sts[i].bridge_dev != bridge)
-			continue;
-
-		reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
-		reg &= ~(1 << port);
-		core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
-		priv->port_sts[port].vlan_ctl_mask = reg;
-
-		/* Prevent self removal to preserve isolation */
-		if (port != i)
-			p_ctl &= ~(1 << i);
-	}
-
-	core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port));
-	priv->port_sts[port].vlan_ctl_mask = p_ctl;
-	priv->port_sts[port].bridge_dev = NULL;
-
-	/* Make this port join all VLANs without VLAN entries */
-	reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN);
-	reg |= BIT(port);
-	if (!(reg & BIT(cpu_port)))
-		reg |= BIT(cpu_port);
-	core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN);
-}
-
-static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port,
-					u8 state)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	u8 hw_state, cur_hw_state;
-	u32 reg;
-
-	reg = core_readl(priv, CORE_G_PCTL_PORT(port));
-	cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT);
-
-	switch (state) {
-	case BR_STATE_DISABLED:
-		hw_state = G_MISTP_DIS_STATE;
-		break;
-	case BR_STATE_LISTENING:
-		hw_state = G_MISTP_LISTEN_STATE;
-		break;
-	case BR_STATE_LEARNING:
-		hw_state = G_MISTP_LEARN_STATE;
-		break;
-	case BR_STATE_FORWARDING:
-		hw_state = G_MISTP_FWD_STATE;
-		break;
-	case BR_STATE_BLOCKING:
-		hw_state = G_MISTP_BLOCK_STATE;
-		break;
-	default:
-		pr_err("%s: invalid STP state: %d\n", __func__, state);
-		return;
-	}
-
-	/* Fast-age ARL entries if we are moving a port from Learning or
-	 * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening
-	 * state (hw_state)
-	 */
-	if (cur_hw_state != hw_state) {
-		if (cur_hw_state >= G_MISTP_LEARN_STATE &&
-		    hw_state <= G_MISTP_LISTEN_STATE) {
-			if (bcm_sf2_sw_fast_age_port(ds, port)) {
-				pr_err("%s: fast-ageing failed\n", __func__);
-				return;
-			}
-		}
-	}
-
-	reg = core_readl(priv, CORE_G_PCTL_PORT(port));
-	reg &= ~(G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT);
-	reg |= hw_state;
-	core_writel(priv, reg, CORE_G_PCTL_PORT(port));
-}
-
-/* Address Resolution Logic routines */
-static int bcm_sf2_arl_op_wait(struct bcm_sf2_priv *priv)
-{
-	unsigned int timeout = 10;
-	u32 reg;
-
-	do {
-		reg = core_readl(priv, CORE_ARLA_RWCTL);
-		if (!(reg & ARL_STRTDN))
-			return 0;
-
-		usleep_range(1000, 2000);
-	} while (timeout--);
-
-	return -ETIMEDOUT;
-}
-
-static int bcm_sf2_arl_rw_op(struct bcm_sf2_priv *priv, unsigned int op)
-{
-	u32 cmd;
-
-	if (op > ARL_RW)
-		return -EINVAL;
-
-	cmd = core_readl(priv, CORE_ARLA_RWCTL);
-	cmd &= ~IVL_SVL_SELECT;
-	cmd |= ARL_STRTDN;
-	if (op)
-		cmd |= ARL_RW;
-	else
-		cmd &= ~ARL_RW;
-	core_writel(priv, cmd, CORE_ARLA_RWCTL);
-
-	return bcm_sf2_arl_op_wait(priv);
-}
-
-static int bcm_sf2_arl_read(struct bcm_sf2_priv *priv, u64 mac,
-			    u16 vid, struct bcm_sf2_arl_entry *ent, u8 *idx,
-			    bool is_valid)
-{
-	unsigned int i;
-	int ret;
-
-	ret = bcm_sf2_arl_op_wait(priv);
-	if (ret)
-		return ret;
-
-	/* Read the 4 bins */
-	for (i = 0; i < 4; i++) {
-		u64 mac_vid;
-		u32 fwd_entry;
-
-		mac_vid = core_readq(priv, CORE_ARLA_MACVID_ENTRY(i));
-		fwd_entry = core_readl(priv, CORE_ARLA_FWD_ENTRY(i));
-		bcm_sf2_arl_to_entry(ent, mac_vid, fwd_entry);
-
-		if (ent->is_valid && is_valid) {
-			*idx = i;
-			return 0;
-		}
-
-		/* This is the MAC we just deleted */
-		if (!is_valid && (mac_vid & mac))
-			return 0;
-	}
-
-	return -ENOENT;
-}
-
-static int bcm_sf2_arl_op(struct bcm_sf2_priv *priv, int op, int port,
-			  const unsigned char *addr, u16 vid, bool is_valid)
-{
-	struct bcm_sf2_arl_entry ent;
-	u32 fwd_entry;
-	u64 mac, mac_vid = 0;
-	u8 idx = 0;
-	int ret;
-
-	/* Convert the array into a 64-bit MAC */
-	mac = bcm_sf2_mac_to_u64(addr);
-
-	/* Perform a read for the given MAC and VID */
-	core_writeq(priv, mac, CORE_ARLA_MAC);
-	core_writel(priv, vid, CORE_ARLA_VID);
-
-	/* Issue a read operation for this MAC */
-	ret = bcm_sf2_arl_rw_op(priv, 1);
-	if (ret)
-		return ret;
-
-	ret = bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid);
-	/* If this is a read, just finish now */
-	if (op)
-		return ret;
-
-	/* We could not find a matching MAC, so reset to a new entry */
-	if (ret) {
-		fwd_entry = 0;
-		idx = 0;
-	}
-
-	memset(&ent, 0, sizeof(ent));
-	ent.port = port;
-	ent.is_valid = is_valid;
-	ent.vid = vid;
-	ent.is_static = true;
-	memcpy(ent.mac, addr, ETH_ALEN);
-	bcm_sf2_arl_from_entry(&mac_vid, &fwd_entry, &ent);
-
-	core_writeq(priv, mac_vid, CORE_ARLA_MACVID_ENTRY(idx));
-	core_writel(priv, fwd_entry, CORE_ARLA_FWD_ENTRY(idx));
-
-	ret = bcm_sf2_arl_rw_op(priv, 0);
-	if (ret)
-		return ret;
-
-	/* Re-read the entry to check */
-	return bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid);
-}
-
-static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, int port,
-				  const struct switchdev_obj_port_fdb *fdb,
-				  struct switchdev_trans *trans)
-{
-	/* We do not need to do anything specific here yet */
-	return 0;
-}
-
-static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port,
-			       const struct switchdev_obj_port_fdb *fdb,
-			       struct switchdev_trans *trans)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-
-	if (bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, true))
-		pr_err("%s: failed to add MAC address\n", __func__);
-}
-
-static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, int port,
-			      const struct switchdev_obj_port_fdb *fdb)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-
-	return bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, false);
-}
-
-static int bcm_sf2_arl_search_wait(struct bcm_sf2_priv *priv)
-{
-	unsigned timeout = 1000;
-	u32 reg;
-
-	do {
-		reg = core_readl(priv, CORE_ARLA_SRCH_CTL);
-		if (!(reg & ARLA_SRCH_STDN))
-			return 0;
-
-		if (reg & ARLA_SRCH_VLID)
-			return 0;
-
-		usleep_range(1000, 2000);
-	} while (timeout--);
-
-	return -ETIMEDOUT;
-}
-
-static void bcm_sf2_arl_search_rd(struct bcm_sf2_priv *priv, u8 idx,
-				  struct bcm_sf2_arl_entry *ent)
-{
-	u64 mac_vid;
-	u32 fwd_entry;
-
-	mac_vid = core_readq(priv, CORE_ARLA_SRCH_RSLT_MACVID(idx));
-	fwd_entry = core_readl(priv, CORE_ARLA_SRCH_RSLT(idx));
-	bcm_sf2_arl_to_entry(ent, mac_vid, fwd_entry);
-}
-
-static int bcm_sf2_sw_fdb_copy(struct net_device *dev, int port,
-			       const struct bcm_sf2_arl_entry *ent,
-			       struct switchdev_obj_port_fdb *fdb,
-			       int (*cb)(struct switchdev_obj *obj))
-{
-	if (!ent->is_valid)
-		return 0;
-
-	if (port != ent->port)
-		return 0;
-
-	ether_addr_copy(fdb->addr, ent->mac);
-	fdb->vid = ent->vid;
-	fdb->ndm_state = ent->is_static ? NUD_NOARP : NUD_REACHABLE;
-
-	return cb(&fdb->obj);
-}
-
-static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port,
-			       struct switchdev_obj_port_fdb *fdb,
-			       int (*cb)(struct switchdev_obj *obj))
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	struct net_device *dev = ds->ports[port].netdev;
-	struct bcm_sf2_arl_entry results[2];
-	unsigned int count = 0;
-	int ret;
-
-	/* Start search operation */
-	core_writel(priv, ARLA_SRCH_STDN, CORE_ARLA_SRCH_CTL);
-
-	do {
-		ret = bcm_sf2_arl_search_wait(priv);
-		if (ret)
-			return ret;
-
-		/* Read both entries, then return their values back */
-		bcm_sf2_arl_search_rd(priv, 0, &results[0]);
-		ret = bcm_sf2_sw_fdb_copy(dev, port, &results[0], fdb, cb);
-		if (ret)
-			return ret;
-
-		bcm_sf2_arl_search_rd(priv, 1, &results[1]);
-		ret = bcm_sf2_sw_fdb_copy(dev, port, &results[1], fdb, cb);
-		if (ret)
-			return ret;
-
-		if (!results[0].is_valid && !results[1].is_valid)
-			break;
-
-	} while (count++ < CORE_ARLA_NUM_ENTRIES);
-
-	return 0;
-}
-
 static int bcm_sf2_sw_indir_rw(struct bcm_sf2_priv *priv, int op, int addr,
 			       int regnum, u16 val)
 {
@@ -1036,12 +464,10 @@
 
 static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv)
 {
-	intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET);
+	intrl2_0_mask_set(priv, 0xffffffff);
 	intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
-	intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR);
-	intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET);
+	intrl2_1_mask_set(priv, 0xffffffff);
 	intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR);
-	intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR);
 }
 
 static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
@@ -1082,7 +508,7 @@
 
 static int bcm_sf2_mdio_register(struct dsa_switch *ds)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct device_node *dn;
 	static int index;
 	int err;
@@ -1146,14 +572,9 @@
 		of_node_put(priv->master_mii_dn);
 }
 
-static int bcm_sf2_sw_set_addr(struct dsa_switch *ds, u8 *addr)
-{
-	return 0;
-}
-
 static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 
 	/* The BCM7xxx PHY driver expects to find the integrated PHY revision
 	 * in bits 15:8 and the patch level in bits 7:0 which is exactly what
@@ -1166,7 +587,7 @@
 static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
 				   struct phy_device *phydev)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 id_mode_dis = 0, port_mode;
 	const char *str = NULL;
 	u32 reg;
@@ -1246,7 +667,7 @@
 static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
 					 struct fixed_phy_status *status)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	u32 duplex, pause;
 	u32 reg;
 
@@ -1298,7 +719,7 @@
 
 static int bcm_sf2_sw_suspend(struct dsa_switch *ds)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	unsigned int port;
 
 	bcm_sf2_intr_disable(priv);
@@ -1318,7 +739,7 @@
 
 static int bcm_sf2_sw_resume(struct dsa_switch *ds)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	unsigned int port;
 	int ret;
 
@@ -1345,7 +766,7 @@
 			       struct ethtool_wolinfo *wol)
 {
 	struct net_device *p = ds->dst[ds->index].master_netdev;
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	struct ethtool_wolinfo pwol;
 
 	/* Get the parent device WoL settings */
@@ -1368,7 +789,7 @@
 			      struct ethtool_wolinfo *wol)
 {
 	struct net_device *p = ds->dst[ds->index].master_netdev;
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	s8 cpu_port = ds->dst[ds->index].cpu_port;
 	struct ethtool_wolinfo pwol;
 
@@ -1393,43 +814,32 @@
 	return p->ethtool_ops->set_wol(p, wol);
 }
 
-static void bcm_sf2_enable_vlan(struct bcm_sf2_priv *priv, bool enable)
+static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv)
 {
-	u32 mgmt, vc0, vc1, vc4, vc5;
+	unsigned int timeout = 10;
+	u32 reg;
 
-	mgmt = core_readl(priv, CORE_SWMODE);
-	vc0 = core_readl(priv, CORE_VLAN_CTRL0);
-	vc1 = core_readl(priv, CORE_VLAN_CTRL1);
-	vc4 = core_readl(priv, CORE_VLAN_CTRL4);
-	vc5 = core_readl(priv, CORE_VLAN_CTRL5);
+	do {
+		reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL);
+		if (!(reg & ARLA_VTBL_STDN))
+			return 0;
 
-	mgmt &= ~SW_FWDG_MODE;
+		usleep_range(1000, 2000);
+	} while (timeout--);
 
-	if (enable) {
-		vc0 |= VLAN_EN | VLAN_LEARN_MODE_IVL;
-		vc1 |= EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP;
-		vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT);
-		vc4 |= INGR_VID_CHK_DROP;
-		vc5 |= DROP_VTABLE_MISS | EN_VID_FFF_FWD;
-	} else {
-		vc0 &= ~(VLAN_EN | VLAN_LEARN_MODE_IVL);
-		vc1 &= ~(EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP);
-		vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT);
-		vc5 &= ~(DROP_VTABLE_MISS | EN_VID_FFF_FWD);
-		vc4 |= INGR_VID_CHK_VID_VIOL_IMP;
-	}
+	return -ETIMEDOUT;
+}
 
-	core_writel(priv, vc0, CORE_VLAN_CTRL0);
-	core_writel(priv, vc1, CORE_VLAN_CTRL1);
-	core_writel(priv, 0, CORE_VLAN_CTRL3);
-	core_writel(priv, vc4, CORE_VLAN_CTRL4);
-	core_writel(priv, vc5, CORE_VLAN_CTRL5);
-	core_writel(priv, mgmt, CORE_SWMODE);
+static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op)
+{
+	core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL);
+
+	return bcm_sf2_vlan_op_wait(priv);
 }
 
 static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds)
 {
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	unsigned int port;
 
 	/* Clear all VLANs */
@@ -1443,162 +853,199 @@
 	}
 }
 
-static int bcm_sf2_sw_vlan_filtering(struct dsa_switch *ds, int port,
-				     bool vlan_filtering)
-{
-	return 0;
-}
-
-static int bcm_sf2_sw_vlan_prepare(struct dsa_switch *ds, int port,
-				   const struct switchdev_obj_port_vlan *vlan,
-				   struct switchdev_trans *trans)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-
-	bcm_sf2_enable_vlan(priv, true);
-
-	return 0;
-}
-
-static void bcm_sf2_sw_vlan_add(struct dsa_switch *ds, int port,
-				const struct switchdev_obj_port_vlan *vlan,
-				struct switchdev_trans *trans)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
-	bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
-	s8 cpu_port = ds->dst->cpu_port;
-	struct bcm_sf2_vlan *vl;
-	u16 vid;
-
-	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
-		vl = &priv->vlans[vid];
-
-		bcm_sf2_get_vlan_entry(priv, vid, vl);
-
-		vl->members |= BIT(port) | BIT(cpu_port);
-		if (untagged)
-			vl->untag |= BIT(port) | BIT(cpu_port);
-		else
-			vl->untag &= ~(BIT(port) | BIT(cpu_port));
-
-		bcm_sf2_set_vlan_entry(priv, vid, vl);
-		bcm_sf2_sw_fast_age_vlan(priv, vid);
-	}
-
-	if (pvid) {
-		core_writel(priv, vlan->vid_end, CORE_DEFAULT_1Q_TAG_P(port));
-		core_writel(priv, vlan->vid_end,
-			    CORE_DEFAULT_1Q_TAG_P(cpu_port));
-		bcm_sf2_sw_fast_age_vlan(priv, vid);
-	}
-}
-
-static int bcm_sf2_sw_vlan_del(struct dsa_switch *ds, int port,
-			       const struct switchdev_obj_port_vlan *vlan)
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
-	s8 cpu_port = ds->dst->cpu_port;
-	struct bcm_sf2_vlan *vl;
-	u16 vid, pvid;
-	int ret;
-
-	pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port));
-
-	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
-		vl = &priv->vlans[vid];
-
-		ret = bcm_sf2_get_vlan_entry(priv, vid, vl);
-		if (ret)
-			return ret;
-
-		vl->members &= ~BIT(port);
-		if ((vl->members & BIT(cpu_port)) == BIT(cpu_port))
-			vl->members = 0;
-		if (pvid == vid)
-			pvid = 0;
-		if (untagged) {
-			vl->untag &= ~BIT(port);
-			if ((vl->untag & BIT(port)) == BIT(cpu_port))
-				vl->untag = 0;
-		}
-
-		bcm_sf2_set_vlan_entry(priv, vid, vl);
-		bcm_sf2_sw_fast_age_vlan(priv, vid);
-	}
-
-	core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(port));
-	core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(cpu_port));
-	bcm_sf2_sw_fast_age_vlan(priv, vid);
-
-	return 0;
-}
-
-static int bcm_sf2_sw_vlan_dump(struct dsa_switch *ds, int port,
-				struct switchdev_obj_port_vlan *vlan,
-				int (*cb)(struct switchdev_obj *obj))
-{
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	struct bcm_sf2_port_status *p = &priv->port_sts[port];
-	struct bcm_sf2_vlan *vl;
-	u16 vid, pvid;
-	int err = 0;
-
-	pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port));
-
-	for (vid = 0; vid < VLAN_N_VID; vid++) {
-		vl = &priv->vlans[vid];
-
-		if (!(vl->members & BIT(port)))
-			continue;
-
-		vlan->vid_begin = vlan->vid_end = vid;
-		vlan->flags = 0;
-
-		if (vl->untag & BIT(port))
-			vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
-		if (p->pvid == vid)
-			vlan->flags |= BRIDGE_VLAN_INFO_PVID;
-
-		err = cb(&vlan->obj);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
 static int bcm_sf2_sw_setup(struct dsa_switch *ds)
 {
-	const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME;
-	struct bcm_sf2_priv *priv = ds_to_priv(ds);
-	struct device_node *dn;
-	void __iomem **base;
+	struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
 	unsigned int port;
+
+	/* Enable all valid ports and disable those unused */
+	for (port = 0; port < priv->hw_params.num_ports; port++) {
+		/* IMP port receives special treatment */
+		if ((1 << port) & ds->enabled_port_mask)
+			bcm_sf2_port_setup(ds, port, NULL);
+		else if (dsa_is_cpu_port(ds, port))
+			bcm_sf2_imp_setup(ds, port);
+		else
+			bcm_sf2_port_disable(ds, port, NULL);
+	}
+
+	bcm_sf2_sw_configure_vlan(ds);
+
+	return 0;
+}
+
+/* The SWITCH_CORE register space is managed by b53 but operates on a page +
+ * register basis so we need to translate that into an address that the
+ * bus-glue understands.
+ */
+#define SF2_PAGE_REG_MKADDR(page, reg)	((page) << 10 | (reg) << 2)
+
+static int bcm_sf2_core_read8(struct b53_device *dev, u8 page, u8 reg,
+			      u8 *val)
+{
+	struct bcm_sf2_priv *priv = dev->priv;
+
+	*val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg));
+
+	return 0;
+}
+
+static int bcm_sf2_core_read16(struct b53_device *dev, u8 page, u8 reg,
+			       u16 *val)
+{
+	struct bcm_sf2_priv *priv = dev->priv;
+
+	*val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg));
+
+	return 0;
+}
+
+static int bcm_sf2_core_read32(struct b53_device *dev, u8 page, u8 reg,
+			       u32 *val)
+{
+	struct bcm_sf2_priv *priv = dev->priv;
+
+	*val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg));
+
+	return 0;
+}
+
+static int bcm_sf2_core_read64(struct b53_device *dev, u8 page, u8 reg,
+			       u64 *val)
+{
+	struct bcm_sf2_priv *priv = dev->priv;
+
+	*val = core_readq(priv, SF2_PAGE_REG_MKADDR(page, reg));
+
+	return 0;
+}
+
+static int bcm_sf2_core_write8(struct b53_device *dev, u8 page, u8 reg,
+			       u8 value)
+{
+	struct bcm_sf2_priv *priv = dev->priv;
+
+	core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg));
+
+	return 0;
+}
+
+static int bcm_sf2_core_write16(struct b53_device *dev, u8 page, u8 reg,
+				u16 value)
+{
+	struct bcm_sf2_priv *priv = dev->priv;
+
+	core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg));
+
+	return 0;
+}
+
+static int bcm_sf2_core_write32(struct b53_device *dev, u8 page, u8 reg,
+				u32 value)
+{
+	struct bcm_sf2_priv *priv = dev->priv;
+
+	core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg));
+
+	return 0;
+}
+
+static int bcm_sf2_core_write64(struct b53_device *dev, u8 page, u8 reg,
+				u64 value)
+{
+	struct bcm_sf2_priv *priv = dev->priv;
+
+	core_writeq(priv, value, SF2_PAGE_REG_MKADDR(page, reg));
+
+	return 0;
+}
+
+static struct b53_io_ops bcm_sf2_io_ops = {
+	.read8	= bcm_sf2_core_read8,
+	.read16	= bcm_sf2_core_read16,
+	.read32	= bcm_sf2_core_read32,
+	.read48	= bcm_sf2_core_read64,
+	.read64	= bcm_sf2_core_read64,
+	.write8	= bcm_sf2_core_write8,
+	.write16 = bcm_sf2_core_write16,
+	.write32 = bcm_sf2_core_write32,
+	.write48 = bcm_sf2_core_write64,
+	.write64 = bcm_sf2_core_write64,
+};
+
+static int bcm_sf2_sw_probe(struct platform_device *pdev)
+{
+	const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME;
+	struct device_node *dn = pdev->dev.of_node;
+	struct b53_platform_data *pdata;
+	struct bcm_sf2_priv *priv;
+	struct b53_device *dev;
+	struct dsa_switch *ds;
+	void __iomem **base;
+	struct resource *r;
 	unsigned int i;
 	u32 reg, rev;
 	int ret;
 
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv);
+	if (!dev)
+		return -ENOMEM;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	/* Auto-detection using standard registers will not work, so
+	 * provide an indication of what kind of device we are for
+	 * b53_common to work with
+	 */
+	pdata->chip_id = BCM7445_DEVICE_ID;
+	dev->pdata = pdata;
+
+	priv->dev = dev;
+	ds = dev->ds;
+
+	/* Override the parts that are non-standard wrt. normal b53 devices */
+	ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol;
+	ds->ops->setup = bcm_sf2_sw_setup;
+	ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags;
+	ds->ops->adjust_link = bcm_sf2_sw_adjust_link;
+	ds->ops->fixed_link_update = bcm_sf2_sw_fixed_link_update;
+	ds->ops->suspend = bcm_sf2_sw_suspend;
+	ds->ops->resume = bcm_sf2_sw_resume;
+	ds->ops->get_wol = bcm_sf2_sw_get_wol;
+	ds->ops->set_wol = bcm_sf2_sw_set_wol;
+	ds->ops->port_enable = bcm_sf2_port_setup;
+	ds->ops->port_disable = bcm_sf2_port_disable;
+	ds->ops->get_eee = bcm_sf2_sw_get_eee;
+	ds->ops->set_eee = bcm_sf2_sw_set_eee;
+
+	/* Avoid having DSA free our slave MDIO bus (checking for
+	 * ds->slave_mii_bus and ds->ops->phy_read being non-NULL)
+	 */
+	ds->ops->phy_read = NULL;
+
+	dev_set_drvdata(&pdev->dev, priv);
+
 	spin_lock_init(&priv->indir_lock);
 	mutex_init(&priv->stats_mutex);
 
-	/* All the interesting properties are at the parent device_node
-	 * level
-	 */
-	dn = ds->cd->of_node->parent;
-	bcm_sf2_identify_ports(priv, ds->cd->of_node);
+	bcm_sf2_identify_ports(priv, dn->child);
 
 	priv->irq0 = irq_of_parse_and_map(dn, 0);
 	priv->irq1 = irq_of_parse_and_map(dn, 1);
 
 	base = &priv->core;
 	for (i = 0; i < BCM_SF2_REGS_NUM; i++) {
-		*base = of_iomap(dn, i);
-		if (*base == NULL) {
+		r = platform_get_resource(pdev, IORESOURCE_MEM, i);
+		*base = devm_ioremap_resource(&pdev->dev, r);
+		if (IS_ERR(*base)) {
 			pr_err("unable to find register: %s\n", reg_names[i]);
-			ret = -ENOMEM;
-			goto out_unmap;
+			return PTR_ERR(*base);
 		}
 		base++;
 	}
@@ -1606,30 +1053,30 @@
 	ret = bcm_sf2_sw_rst(priv);
 	if (ret) {
 		pr_err("unable to software reset switch: %d\n", ret);
-		goto out_unmap;
+		return ret;
 	}
 
 	ret = bcm_sf2_mdio_register(ds);
 	if (ret) {
 		pr_err("failed to register MDIO bus\n");
-		goto out_unmap;
+		return ret;
 	}
 
 	/* Disable all interrupts and request them */
 	bcm_sf2_intr_disable(priv);
 
-	ret = request_irq(priv->irq0, bcm_sf2_switch_0_isr, 0,
-			  "switch_0", priv);
+	ret = devm_request_irq(&pdev->dev, priv->irq0, bcm_sf2_switch_0_isr, 0,
+			       "switch_0", priv);
 	if (ret < 0) {
 		pr_err("failed to request switch_0 IRQ\n");
 		goto out_mdio;
 	}
 
-	ret = request_irq(priv->irq1, bcm_sf2_switch_1_isr, 0,
-			  "switch_1", priv);
+	ret = devm_request_irq(&pdev->dev, priv->irq1, bcm_sf2_switch_1_isr, 0,
+			       "switch_1", priv);
 	if (ret < 0) {
 		pr_err("failed to request switch_1 IRQ\n");
-		goto out_free_irq0;
+		goto out_mdio;
 	}
 
 	/* Reset the MIB counters */
@@ -1649,19 +1096,6 @@
 				 &priv->hw_params.num_gphy))
 		priv->hw_params.num_gphy = 1;
 
-	/* Enable all valid ports and disable those unused */
-	for (port = 0; port < priv->hw_params.num_ports; port++) {
-		/* IMP port receives special treatment */
-		if ((1 << port) & ds->enabled_port_mask)
-			bcm_sf2_port_setup(ds, port, NULL);
-		else if (dsa_is_cpu_port(ds, port))
-			bcm_sf2_imp_setup(ds, port);
-		else
-			bcm_sf2_port_disable(ds, port, NULL);
-	}
-
-	bcm_sf2_sw_configure_vlan(ds);
-
 	rev = reg_readl(priv, REG_SWITCH_REVISION);
 	priv->hw_params.top_rev = (rev >> SWITCH_TOP_REV_SHIFT) &
 					SWITCH_TOP_REV_MASK;
@@ -1670,6 +1104,10 @@
 	rev = reg_readl(priv, REG_PHY_REVISION);
 	priv->hw_params.gphy_rev = rev & PHY_REVISION_MASK;
 
+	ret = b53_switch_register(dev);
+	if (ret)
+		goto out_mdio;
+
 	pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n",
 		priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff,
 		priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff,
@@ -1677,66 +1115,60 @@
 
 	return 0;
 
-out_free_irq0:
-	free_irq(priv->irq0, priv);
 out_mdio:
 	bcm_sf2_mdio_unregister(priv);
-out_unmap:
-	base = &priv->core;
-	for (i = 0; i < BCM_SF2_REGS_NUM; i++) {
-		if (*base)
-			iounmap(*base);
-		base++;
-	}
 	return ret;
 }
 
-static struct dsa_switch_driver bcm_sf2_switch_driver = {
-	.tag_protocol		= DSA_TAG_PROTO_BRCM,
-	.probe			= bcm_sf2_sw_drv_probe,
-	.setup			= bcm_sf2_sw_setup,
-	.set_addr		= bcm_sf2_sw_set_addr,
-	.get_phy_flags		= bcm_sf2_sw_get_phy_flags,
-	.get_strings		= bcm_sf2_sw_get_strings,
-	.get_ethtool_stats	= bcm_sf2_sw_get_ethtool_stats,
-	.get_sset_count		= bcm_sf2_sw_get_sset_count,
-	.adjust_link		= bcm_sf2_sw_adjust_link,
-	.fixed_link_update	= bcm_sf2_sw_fixed_link_update,
-	.suspend		= bcm_sf2_sw_suspend,
-	.resume			= bcm_sf2_sw_resume,
-	.get_wol		= bcm_sf2_sw_get_wol,
-	.set_wol		= bcm_sf2_sw_set_wol,
-	.port_enable		= bcm_sf2_port_setup,
-	.port_disable		= bcm_sf2_port_disable,
-	.get_eee		= bcm_sf2_sw_get_eee,
-	.set_eee		= bcm_sf2_sw_set_eee,
-	.port_bridge_join	= bcm_sf2_sw_br_join,
-	.port_bridge_leave	= bcm_sf2_sw_br_leave,
-	.port_stp_state_set	= bcm_sf2_sw_br_set_stp_state,
-	.port_fdb_prepare	= bcm_sf2_sw_fdb_prepare,
-	.port_fdb_add		= bcm_sf2_sw_fdb_add,
-	.port_fdb_del		= bcm_sf2_sw_fdb_del,
-	.port_fdb_dump		= bcm_sf2_sw_fdb_dump,
-	.port_vlan_filtering	= bcm_sf2_sw_vlan_filtering,
-	.port_vlan_prepare	= bcm_sf2_sw_vlan_prepare,
-	.port_vlan_add		= bcm_sf2_sw_vlan_add,
-	.port_vlan_del		= bcm_sf2_sw_vlan_del,
-	.port_vlan_dump		= bcm_sf2_sw_vlan_dump,
-};
-
-static int __init bcm_sf2_init(void)
+static int bcm_sf2_sw_remove(struct platform_device *pdev)
 {
-	register_switch_driver(&bcm_sf2_switch_driver);
+	struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+
+	/* Disable all ports and interrupts */
+	priv->wol_ports_mask = 0;
+	bcm_sf2_sw_suspend(priv->dev->ds);
+	dsa_unregister_switch(priv->dev->ds);
+	bcm_sf2_mdio_unregister(priv);
 
 	return 0;
 }
-module_init(bcm_sf2_init);
 
-static void __exit bcm_sf2_exit(void)
+#ifdef CONFIG_PM_SLEEP
+static int bcm_sf2_suspend(struct device *dev)
 {
-	unregister_switch_driver(&bcm_sf2_switch_driver);
+	struct platform_device *pdev = to_platform_device(dev);
+	struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+
+	return dsa_switch_suspend(priv->dev->ds);
 }
-module_exit(bcm_sf2_exit);
+
+static int bcm_sf2_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct bcm_sf2_priv *priv = platform_get_drvdata(pdev);
+
+	return dsa_switch_resume(priv->dev->ds);
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(bcm_sf2_pm_ops,
+			 bcm_sf2_suspend, bcm_sf2_resume);
+
+static const struct of_device_id bcm_sf2_of_match[] = {
+	{ .compatible = "brcm,bcm7445-switch-v4.0" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver bcm_sf2_driver = {
+	.probe	= bcm_sf2_sw_probe,
+	.remove	= bcm_sf2_sw_remove,
+	.driver = {
+		.name = "brcm-sf2",
+		.of_match_table = bcm_sf2_of_match,
+		.pm = &bcm_sf2_pm_ops,
+	},
+};
+module_platform_driver(bcm_sf2_driver);
 
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Driver for Broadcom Starfighter 2 ethernet switch chip");
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 463bed8..4469267 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -26,6 +26,7 @@
 #include <net/dsa.h>
 
 #include "bcm_sf2_regs.h"
+#include "b53/b53_priv.h"
 
 struct bcm_sf2_hw_params {
 	u16	top_rev;
@@ -49,72 +50,8 @@
 	unsigned int link;
 
 	struct ethtool_eee eee;
-
-	u32 vlan_ctl_mask;
-	u16 pvid;
-
-	struct net_device *bridge_dev;
 };
 
-struct bcm_sf2_arl_entry {
-	u8 port;
-	u8 mac[ETH_ALEN];
-	u16 vid;
-	u8 is_valid:1;
-	u8 is_age:1;
-	u8 is_static:1;
-};
-
-struct bcm_sf2_vlan {
-	u16 members;
-	u16 untag;
-};
-
-static inline void bcm_sf2_mac_from_u64(u64 src, u8 *dst)
-{
-	unsigned int i;
-
-	for (i = 0; i < ETH_ALEN; i++)
-		dst[ETH_ALEN - 1 - i] = (src >> (8 * i)) & 0xff;
-}
-
-static inline u64 bcm_sf2_mac_to_u64(const u8 *src)
-{
-	unsigned int i;
-	u64 dst = 0;
-
-	for (i = 0; i < ETH_ALEN; i++)
-		dst |= (u64)src[ETH_ALEN - 1 - i] << (8 * i);
-
-	return dst;
-}
-
-static inline void bcm_sf2_arl_to_entry(struct bcm_sf2_arl_entry *ent,
-					u64 mac_vid, u32 fwd_entry)
-{
-	memset(ent, 0, sizeof(*ent));
-	ent->port = fwd_entry & PORTID_MASK;
-	ent->is_valid = !!(fwd_entry & ARL_VALID);
-	ent->is_age = !!(fwd_entry & ARL_AGE);
-	ent->is_static = !!(fwd_entry & ARL_STATIC);
-	bcm_sf2_mac_from_u64(mac_vid, ent->mac);
-	ent->vid = mac_vid >> VID_SHIFT;
-}
-
-static inline void bcm_sf2_arl_from_entry(u64 *mac_vid, u32 *fwd_entry,
-					  const struct bcm_sf2_arl_entry *ent)
-{
-	*mac_vid = bcm_sf2_mac_to_u64(ent->mac);
-	*mac_vid |= (u64)(ent->vid & VID_MASK) << VID_SHIFT;
-	*fwd_entry = ent->port & PORTID_MASK;
-	if (ent->is_valid)
-		*fwd_entry |= ARL_VALID;
-	if (ent->is_static)
-		*fwd_entry |= ARL_STATIC;
-	if (ent->is_age)
-		*fwd_entry |= ARL_AGE;
-}
-
 struct bcm_sf2_priv {
 	/* Base registers, keep those in order with BCM_SF2_REGS_NAME */
 	void __iomem			*core;
@@ -134,6 +71,9 @@
 	u32				irq1_stat;
 	u32				irq1_mask;
 
+	/* Backing b53_device */
+	struct b53_device		*dev;
+
 	/* Mutex protecting access to the MIB counters */
 	struct mutex			stats_mutex;
 
@@ -155,16 +95,14 @@
 	struct device_node		*master_mii_dn;
 	struct mii_bus			*slave_mii_bus;
 	struct mii_bus			*master_mii_bus;
-
-	/* Cache of programmed VLANs */
-	struct bcm_sf2_vlan		vlans[VLAN_N_VID];
 };
 
-struct bcm_sf2_hw_stats {
-	const char	*string;
-	u16		reg;
-	u8		sizeof_stat;
-};
+static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds)
+{
+	struct b53_device *dev = ds->priv;
+
+	return dev->priv;
+}
 
 #define SF2_IO_MACRO(name) \
 static inline u32 name##_readl(struct bcm_sf2_priv *priv, u32 off)	\
@@ -205,8 +143,8 @@
 static inline void intrl2_##which##_mask_clear(struct bcm_sf2_priv *priv, \
 						u32 mask)		\
 {									\
-	intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);	\
 	priv->irq##which##_mask &= ~(mask);				\
+	intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);	\
 }									\
 static inline void intrl2_##which##_mask_set(struct bcm_sf2_priv *priv, \
 						u32 mask)		\
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 9f2a9cb..838fe37 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -115,14 +115,6 @@
 #define  RX_BCST_EN			(1 << 2)
 #define  RX_MCST_EN			(1 << 3)
 #define  RX_UCST_EN			(1 << 4)
-#define  G_MISTP_STATE_SHIFT		5
-#define  G_MISTP_NO_STP			(0 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_DIS_STATE		(1 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_BLOCK_STATE		(2 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_LISTEN_STATE		(3 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_LEARN_STATE		(4 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_FWD_STATE		(5 << G_MISTP_STATE_SHIFT)
-#define  G_MISTP_STATE_MASK		0x7
 
 #define CORE_SWMODE			0x0002c
 #define  SW_FWDG_MODE			(1 << 0)
@@ -205,75 +197,11 @@
 #define  BRCM_HDR_EN_P5			(1 << 1)
 #define  BRCM_HDR_EN_P7			(1 << 2)
 
-#define CORE_BRCM_HDR_CTRL2		0x0828
-
-#define CORE_HL_PRTC_CTRL		0x0940
-#define  ARP_EN				(1 << 0)
-#define  RARP_EN			(1 << 1)
-#define  DHCP_EN			(1 << 2)
-#define  ICMPV4_EN			(1 << 3)
-#define  ICMPV6_EN			(1 << 4)
-#define  ICMPV6_FWD_MODE		(1 << 5)
-#define  IGMP_DIP_EN			(1 << 8)
-#define  IGMP_RPTLVE_EN			(1 << 9)
-#define  IGMP_RTPLVE_FWD_MODE		(1 << 10)
-#define  IGMP_QRY_EN			(1 << 11)
-#define  IGMP_QRY_FWD_MODE		(1 << 12)
-#define  IGMP_UKN_EN			(1 << 13)
-#define  IGMP_UKN_FWD_MODE		(1 << 14)
-#define  MLD_RPTDONE_EN			(1 << 15)
-#define  MLD_RPTDONE_FWD_MODE		(1 << 16)
-#define  MLD_QRY_EN			(1 << 17)
-#define  MLD_QRY_FWD_MODE		(1 << 18)
-
 #define CORE_RST_MIB_CNT_EN		0x0950
 
 #define CORE_BRCM_HDR_RX_DIS		0x0980
 #define CORE_BRCM_HDR_TX_DIS		0x0988
 
-#define CORE_ARLA_NUM_ENTRIES		1024
-
-#define CORE_ARLA_RWCTL			0x1400
-#define  ARL_RW				(1 << 0)
-#define  IVL_SVL_SELECT			(1 << 6)
-#define  ARL_STRTDN			(1 << 7)
-
-#define CORE_ARLA_MAC			0x1408
-#define CORE_ARLA_VID			0x1420
-#define  ARLA_VIDTAB_INDX_MASK		0x1fff
-
-#define CORE_ARLA_MACVID0		0x1440
-#define  MAC_MASK			0xffffffffff
-#define  VID_SHIFT			48
-#define  VID_MASK			0xfff
-
-#define CORE_ARLA_FWD_ENTRY0		0x1460
-#define  PORTID_MASK			0x1ff
-#define  ARL_CON_SHIFT			9
-#define  ARL_CON_MASK			0x3
-#define  ARL_PRI_SHIFT			11
-#define  ARL_PRI_MASK			0x7
-#define  ARL_AGE			(1 << 14)
-#define  ARL_STATIC			(1 << 15)
-#define  ARL_VALID			(1 << 16)
-
-#define CORE_ARLA_MACVID_ENTRY(x)	(CORE_ARLA_MACVID0 + ((x) * 0x40))
-#define CORE_ARLA_FWD_ENTRY(x)		(CORE_ARLA_FWD_ENTRY0 + ((x) * 0x40))
-
-#define CORE_ARLA_SRCH_CTL		0x1540
-#define  ARLA_SRCH_VLID			(1 << 0)
-#define  IVL_SVL_SELECT			(1 << 6)
-#define  ARLA_SRCH_STDN			(1 << 7)
-
-#define CORE_ARLA_SRCH_ADR		0x1544
-#define  ARLA_SRCH_ADR_VALID		(1 << 15)
-
-#define CORE_ARLA_SRCH_RSLT_0_MACVID	0x1580
-#define CORE_ARLA_SRCH_RSLT_0		0x15a0
-
-#define CORE_ARLA_SRCH_RSLT_MACVID(x)	(CORE_ARLA_SRCH_RSLT_0_MACVID + ((x) * 0x40))
-#define CORE_ARLA_SRCH_RSLT(x)		(CORE_ARLA_SRCH_RSLT_0 + ((x) * 0x40))
-
 #define CORE_ARLA_VTBL_RWCTRL		0x1600
 #define  ARLA_VTBL_CMD_WRITE		0
 #define  ARLA_VTBL_CMD_READ		1
@@ -297,59 +225,9 @@
 #define  P_TXQ_PSM_VDD(x)		(P_TXQ_PSM_VDD_MASK << \
 					((x) * P_TXQ_PSM_VDD_SHIFT))
 
-#define	CORE_P0_MIB_OFFSET		0x8000
-#define P_MIB_SIZE			0x400
-#define CORE_P_MIB_OFFSET(x)		(CORE_P0_MIB_OFFSET + (x) * P_MIB_SIZE)
-
 #define CORE_PORT_VLAN_CTL_PORT(x)	(0xc400 + ((x) * 0x8))
 #define  PORT_VLAN_CTRL_MASK		0x1ff
 
-#define CORE_VLAN_CTRL0			0xd000
-#define  CHANGE_1P_VID_INNER		(1 << 0)
-#define  CHANGE_1P_VID_OUTER		(1 << 1)
-#define  CHANGE_1Q_VID			(1 << 3)
-#define  VLAN_LEARN_MODE_SVL		(0 << 5)
-#define  VLAN_LEARN_MODE_IVL		(3 << 5)
-#define  VLAN_EN			(1 << 7)
-
-#define CORE_VLAN_CTRL1			0xd004
-#define  EN_RSV_MCAST_FWDMAP		(1 << 2)
-#define  EN_RSV_MCAST_UNTAG		(1 << 3)
-#define  EN_IPMC_BYPASS_FWDMAP		(1 << 5)
-#define  EN_IPMC_BYPASS_UNTAG		(1 << 6)
-
-#define CORE_VLAN_CTRL2			0xd008
-#define  EN_MIIM_BYPASS_V_FWDMAP	(1 << 2)
-#define  EN_GMRP_GVRP_V_FWDMAP		(1 << 5)
-#define  EN_GMRP_GVRP_UNTAG_MAP		(1 << 6)
-
-#define CORE_VLAN_CTRL3			0xd00c
-#define  EN_DROP_NON1Q_MASK		0x1ff
-
-#define CORE_VLAN_CTRL4			0xd014
-#define  RESV_MCAST_FLOOD		(1 << 1)
-#define  EN_DOUBLE_TAG_MASK		0x3
-#define  EN_DOUBLE_TAG_SHIFT		2
-#define  EN_MGE_REV_GMRP		(1 << 4)
-#define  EN_MGE_REV_GVRP		(1 << 5)
-#define  INGR_VID_CHK_SHIFT		6
-#define  INGR_VID_CHK_MASK		0x3
-#define  INGR_VID_CHK_FWD		(0 << INGR_VID_CHK_SHIFT)
-#define  INGR_VID_CHK_DROP		(1 << INGR_VID_CHK_SHIFT)
-#define  INGR_VID_CHK_NO_CHK		(2 << INGR_VID_CHK_SHIFT)
-#define  INGR_VID_CHK_VID_VIOL_IMP	(3 << INGR_VID_CHK_SHIFT)
-
-#define CORE_VLAN_CTRL5			0xd018
-#define  EN_CPU_RX_BYP_INNER_CRCCHCK	(1 << 0)
-#define  EN_VID_FFF_FWD			(1 << 2)
-#define  DROP_VTABLE_MISS		(1 << 3)
-#define  EGRESS_DIR_FRM_BYP_TRUNK_EN	(1 << 4)
-#define  PRESV_NON1Q			(1 << 6)
-
-#define CORE_VLAN_CTRL6			0xd01c
-#define  STRICT_SFD_DETECT		(1 << 0)
-#define  DIS_ARL_BUST_LMIT		(1 << 4)
-
 #define CORE_DEFAULT_1Q_TAG_P(x)	(0xd040 + ((x) * 8))
 #define  CFI_SHIFT			12
 #define  PRI_SHIFT			13
diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index e36b408..7ce36db 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -19,7 +19,7 @@
 
 static int reg_read(struct dsa_switch *ds, int addr, int reg)
 {
-	struct mv88e6060_priv *priv = ds_to_priv(ds);
+	struct mv88e6060_priv *priv = ds->priv;
 
 	return mdiobus_read_nested(priv->bus, priv->sw_addr + addr, reg);
 }
@@ -37,7 +37,7 @@
 
 static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val)
 {
-	struct mv88e6060_priv *priv = ds_to_priv(ds);
+	struct mv88e6060_priv *priv = ds->priv;
 
 	return mdiobus_write_nested(priv->bus, priv->sw_addr + addr, reg, val);
 }
@@ -69,6 +69,11 @@
 	return NULL;
 }
 
+static enum dsa_tag_protocol mv88e6060_get_tag_protocol(struct dsa_switch *ds)
+{
+	return DSA_TAG_PROTO_TRAILER;
+}
+
 static const char *mv88e6060_drv_probe(struct device *dsa_dev,
 				       struct device *host_dev, int sw_addr,
 				       void **_priv)
@@ -247,8 +252,8 @@
 	return reg_write(ds, addr, regnum, val);
 }
 
-static struct dsa_switch_driver mv88e6060_switch_driver = {
-	.tag_protocol	= DSA_TAG_PROTO_TRAILER,
+static struct dsa_switch_ops mv88e6060_switch_ops = {
+	.get_tag_protocol = mv88e6060_get_tag_protocol,
 	.probe		= mv88e6060_drv_probe,
 	.setup		= mv88e6060_setup,
 	.set_addr	= mv88e6060_set_addr,
@@ -258,14 +263,14 @@
 
 static int __init mv88e6060_init(void)
 {
-	register_switch_driver(&mv88e6060_switch_driver);
+	register_switch_driver(&mv88e6060_switch_ops);
 	return 0;
 }
 module_init(mv88e6060_init);
 
 static void __exit mv88e6060_cleanup(void)
 {
-	unregister_switch_driver(&mv88e6060_switch_driver);
+	unregister_switch_driver(&mv88e6060_switch_ops);
 }
 module_exit(mv88e6060_cleanup);
 
diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig
index 490bc06..4866688 100644
--- a/drivers/net/dsa/mv88e6xxx/Kconfig
+++ b/drivers/net/dsa/mv88e6xxx/Kconfig
@@ -2,6 +2,18 @@
 	tristate "Marvell 88E6xxx Ethernet switch fabric support"
 	depends on NET_DSA
 	select NET_DSA_TAG_EDSA
+	select NET_DSA_TAG_DSA
 	help
 	  This driver adds support for most of the Marvell 88E6xxx models of
 	  Ethernet switch chips, except 88E6060.
+
+config NET_DSA_MV88E6XXX_GLOBAL2
+	bool "Switch Global 2 Registers support"
+	default y
+	depends on NET_DSA_MV88E6XXX
+	help
+	  This registers set at internal SMI address 0x1C provides extended
+	  features like EEPROM interface, trunking, cross-chip setup, etc.
+
+	  It is required on most chips. If the chip you compile the support for
+	  doesn't have such registers set, say N here. In doubt, say Y.
diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile
index 6e29a75..10ce820 100644
--- a/drivers/net/dsa/mv88e6xxx/Makefile
+++ b/drivers/net/dsa/mv88e6xxx/Makefile
@@ -1 +1,4 @@
-obj-$(CONFIG_NET_DSA_MV88E6XXX) += chip.o
+obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o
+mv88e6xxx-objs := chip.o
+mv88e6xxx-objs += global1.o
+mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d36aedd..883fd98 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -29,7 +29,10 @@
 #include <linux/phy.h>
 #include <net/dsa.h>
 #include <net/switchdev.h>
+
 #include "mv88e6xxx.h"
+#include "global1.h"
+#include "global2.h"
 
 static void assert_reg_lock(struct mv88e6xxx_chip *chip)
 {
@@ -95,7 +98,7 @@
 	return 0;
 }
 
-static const struct mv88e6xxx_ops mv88e6xxx_smi_single_chip_ops = {
+static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_single_chip_ops = {
 	.read = mv88e6xxx_smi_single_chip_read,
 	.write = mv88e6xxx_smi_single_chip_write,
 };
@@ -177,13 +180,12 @@
 	return 0;
 }
 
-static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = {
+static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_multi_chip_ops = {
 	.read = mv88e6xxx_smi_multi_chip_read,
 	.write = mv88e6xxx_smi_multi_chip_write,
 };
 
-static int mv88e6xxx_read(struct mv88e6xxx_chip *chip,
-			  int addr, int reg, u16 *val)
+int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val)
 {
 	int err;
 
@@ -199,8 +201,7 @@
 	return 0;
 }
 
-static int mv88e6xxx_write(struct mv88e6xxx_chip *chip,
-			   int addr, int reg, u16 val)
+int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val)
 {
 	int err;
 
@@ -216,25 +217,144 @@
 	return 0;
 }
 
-/* Indirect write to single pointer-data register with an Update bit */
-static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
-			    u16 update)
+static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg,
+			       u16 *val)
 {
-	u16 val;
-	int i, err;
+	int addr = chip->info->port_base_addr + port;
 
-	/* Wait until the previous operation is completed */
-	for (i = 0; i < 16; ++i) {
+	return mv88e6xxx_read(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg,
+				u16 val)
+{
+	int addr = chip->info->port_base_addr + port;
+
+	return mv88e6xxx_write(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy,
+			      int reg, u16 *val)
+{
+	int addr = phy; /* PHY devices addresses start at 0x0 */
+
+	if (!chip->info->ops->phy_read)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->phy_read(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy,
+			       int reg, u16 val)
+{
+	int addr = phy; /* PHY devices addresses start at 0x0 */
+
+	if (!chip->info->ops->phy_write)
+		return -EOPNOTSUPP;
+
+	return chip->info->ops->phy_write(chip, addr, reg, val);
+}
+
+static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page)
+{
+	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_PHY_PAGE))
+		return -EOPNOTSUPP;
+
+	return mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page);
+}
+
+static void mv88e6xxx_phy_page_put(struct mv88e6xxx_chip *chip, int phy)
+{
+	int err;
+
+	/* Restore PHY page Copper 0x0 for access via the registered MDIO bus */
+	err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, PHY_PAGE_COPPER);
+	if (unlikely(err)) {
+		dev_err(chip->dev, "failed to restore PHY %d page Copper (%d)\n",
+			phy, err);
+	}
+}
+
+static int mv88e6xxx_phy_page_read(struct mv88e6xxx_chip *chip, int phy,
+				   u8 page, int reg, u16 *val)
+{
+	int err;
+
+	/* There is no paging for registers 22 */
+	if (reg == PHY_PAGE)
+		return -EINVAL;
+
+	err = mv88e6xxx_phy_page_get(chip, phy, page);
+	if (!err) {
+		err = mv88e6xxx_phy_read(chip, phy, reg, val);
+		mv88e6xxx_phy_page_put(chip, phy);
+	}
+
+	return err;
+}
+
+static int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy,
+				    u8 page, int reg, u16 val)
+{
+	int err;
+
+	/* There is no paging for registers 22 */
+	if (reg == PHY_PAGE)
+		return -EINVAL;
+
+	err = mv88e6xxx_phy_page_get(chip, phy, page);
+	if (!err) {
+		err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page);
+		mv88e6xxx_phy_page_put(chip, phy);
+	}
+
+	return err;
+}
+
+static int mv88e6xxx_serdes_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+	return mv88e6xxx_phy_page_read(chip, ADDR_SERDES, SERDES_PAGE_FIBER,
+				       reg, val);
+}
+
+static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+	return mv88e6xxx_phy_page_write(chip, ADDR_SERDES, SERDES_PAGE_FIBER,
+					reg, val);
+}
+
+int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask)
+{
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		u16 val;
+		int err;
+
 		err = mv88e6xxx_read(chip, addr, reg, &val);
 		if (err)
 			return err;
 
-		if (!(val & BIT(15)))
-			break;
+		if (!(val & mask))
+			return 0;
+
+		usleep_range(1000, 2000);
 	}
 
-	if (i == 16)
-		return -ETIMEDOUT;
+	dev_err(chip->dev, "Timeout while waiting for switch\n");
+	return -ETIMEDOUT;
+}
+
+/* Indirect write to single pointer-data register with an Update bit */
+int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update)
+{
+	u16 val;
+	int err;
+
+	/* Wait until the previous operation is completed */
+	err = mv88e6xxx_wait(chip, addr, reg, BIT(15));
+	if (err)
+		return err;
 
 	/* Set the Update bit to trigger a write operation */
 	val = BIT(15) | update;
@@ -242,63 +362,27 @@
 	return mv88e6xxx_write(chip, addr, reg, val);
 }
 
-static int _mv88e6xxx_reg_read(struct mv88e6xxx_chip *chip, int addr, int reg)
+static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip)
 {
 	u16 val;
-	int err;
+	int i, err;
 
-	err = mv88e6xxx_read(chip, addr, reg, &val);
+	err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &val);
 	if (err)
 		return err;
 
-	return val;
-}
+	err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL,
+				 val & ~GLOBAL_CONTROL_PPU_ENABLE);
+	if (err)
+		return err;
 
-static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr,
-				int reg, u16 val)
-{
-	return mv88e6xxx_write(chip, addr, reg, val);
-}
-
-static int mv88e6xxx_mdio_read_direct(struct mv88e6xxx_chip *chip,
-				      int addr, int regnum)
-{
-	if (addr >= 0)
-		return _mv88e6xxx_reg_read(chip, addr, regnum);
-	return 0xffff;
-}
-
-static int mv88e6xxx_mdio_write_direct(struct mv88e6xxx_chip *chip,
-				       int addr, int regnum, u16 val)
-{
-	if (addr >= 0)
-		return _mv88e6xxx_reg_write(chip, addr, regnum, val);
-	return 0;
-}
-
-static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip)
-{
-	int ret;
-	unsigned long timeout;
-
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL,
-				   ret & ~GLOBAL_CONTROL_PPU_ENABLE);
-	if (ret)
-		return ret;
-
-	timeout = jiffies + 1 * HZ;
-	while (time_before(jiffies, timeout)) {
-		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS);
-		if (ret < 0)
-			return ret;
+	for (i = 0; i < 16; i++) {
+		err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &val);
+		if (err)
+			return err;
 
 		usleep_range(1000, 2000);
-		if ((ret & GLOBAL_STATUS_PPU_MASK) !=
-		    GLOBAL_STATUS_PPU_POLLING)
+		if ((val & GLOBAL_STATUS_PPU_MASK) != GLOBAL_STATUS_PPU_POLLING)
 			return 0;
 	}
 
@@ -307,27 +391,25 @@
 
 static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip)
 {
-	int ret, err;
-	unsigned long timeout;
+	u16 val;
+	int i, err;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL);
-	if (ret < 0)
-		return ret;
-
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL,
-				   ret | GLOBAL_CONTROL_PPU_ENABLE);
+	err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &val);
 	if (err)
 		return err;
 
-	timeout = jiffies + 1 * HZ;
-	while (time_before(jiffies, timeout)) {
-		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS);
-		if (ret < 0)
-			return ret;
+	err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL,
+				 val | GLOBAL_CONTROL_PPU_ENABLE);
+	if (err)
+		return err;
+
+	for (i = 0; i < 16; i++) {
+		err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &val);
+		if (err)
+			return err;
 
 		usleep_range(1000, 2000);
-		if ((ret & GLOBAL_STATUS_PPU_MASK) ==
-		    GLOBAL_STATUS_PPU_POLLING)
+		if ((val & GLOBAL_STATUS_PPU_MASK) == GLOBAL_STATUS_PPU_POLLING)
 			return 0;
 	}
 
@@ -400,32 +482,37 @@
 	chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer;
 }
 
-static int mv88e6xxx_mdio_read_ppu(struct mv88e6xxx_chip *chip, int addr,
-				   int regnum)
+static void mv88e6xxx_ppu_state_destroy(struct mv88e6xxx_chip *chip)
 {
-	int ret;
-
-	ret = mv88e6xxx_ppu_access_get(chip);
-	if (ret >= 0) {
-		ret = _mv88e6xxx_reg_read(chip, addr, regnum);
-		mv88e6xxx_ppu_access_put(chip);
-	}
-
-	return ret;
+	del_timer_sync(&chip->ppu_timer);
 }
 
-static int mv88e6xxx_mdio_write_ppu(struct mv88e6xxx_chip *chip, int addr,
-				    int regnum, u16 val)
+static int mv88e6xxx_phy_ppu_read(struct mv88e6xxx_chip *chip, int addr,
+				  int reg, u16 *val)
 {
-	int ret;
+	int err;
 
-	ret = mv88e6xxx_ppu_access_get(chip);
-	if (ret >= 0) {
-		ret = _mv88e6xxx_reg_write(chip, addr, regnum, val);
+	err = mv88e6xxx_ppu_access_get(chip);
+	if (!err) {
+		err = mv88e6xxx_read(chip, addr, reg, val);
 		mv88e6xxx_ppu_access_put(chip);
 	}
 
-	return ret;
+	return err;
+}
+
+static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr,
+				   int reg, u16 val)
+{
+	int err;
+
+	err = mv88e6xxx_ppu_access_get(chip);
+	if (!err) {
+		err = mv88e6xxx_write(chip, addr, reg, val);
+		mv88e6xxx_ppu_access_put(chip);
+	}
+
+	return err;
 }
 
 static bool mv88e6xxx_6065_family(struct mv88e6xxx_chip *chip)
@@ -468,21 +555,6 @@
 	return chip->info->family == MV88E6XXX_FAMILY_6352;
 }
 
-static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip)
-{
-	return chip->info->num_databases;
-}
-
-static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip)
-{
-	/* Does the device have dedicated FID registers for ATU and VTU ops? */
-	if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) ||
-	    mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip))
-		return true;
-
-	return false;
-}
-
 /* We expect the switch to perform auto negotiation if there is a real
  * phy. However, in the case of a fixed link phy, we force the port
  * settings from the fixed link settings.
@@ -490,24 +562,24 @@
 static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port,
 				  struct phy_device *phydev)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	u32 reg;
-	int ret;
+	struct mv88e6xxx_chip *chip = ds->priv;
+	u16 reg;
+	int err;
 
 	if (!phy_is_pseudo_fixed_link(phydev))
 		return;
 
 	mutex_lock(&chip->reg_lock);
 
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL);
-	if (ret < 0)
+	err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
+	if (err)
 		goto out;
 
-	reg = ret & ~(PORT_PCS_CTRL_LINK_UP |
-		      PORT_PCS_CTRL_FORCE_LINK |
-		      PORT_PCS_CTRL_DUPLEX_FULL |
-		      PORT_PCS_CTRL_FORCE_DUPLEX |
-		      PORT_PCS_CTRL_UNFORCED);
+	reg &= ~(PORT_PCS_CTRL_LINK_UP |
+		 PORT_PCS_CTRL_FORCE_LINK |
+		 PORT_PCS_CTRL_DUPLEX_FULL |
+		 PORT_PCS_CTRL_FORCE_DUPLEX |
+		 PORT_PCS_CTRL_UNFORCED);
 
 	reg |= PORT_PCS_CTRL_FORCE_LINK;
 	if (phydev->link)
@@ -536,7 +608,7 @@
 		reg |= PORT_PCS_CTRL_DUPLEX_FULL;
 
 	if ((mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip)) &&
-	    (port >= chip->info->num_ports - 2)) {
+	    (port >= mv88e6xxx_num_ports(chip) - 2)) {
 		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID)
 			reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK;
 		if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID)
@@ -545,7 +617,7 @@
 			reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK |
 				PORT_PCS_CTRL_RGMII_DELAY_TXCLK);
 	}
-	_mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg);
+	mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
 
 out:
 	mutex_unlock(&chip->reg_lock);
@@ -553,12 +625,12 @@
 
 static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip)
 {
-	int ret;
-	int i;
+	u16 val;
+	int i, err;
 
 	for (i = 0; i < 10; i++) {
-		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_OP);
-		if ((ret & GLOBAL_STATS_OP_BUSY) == 0)
+		err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_OP, &val);
+		if ((val & GLOBAL_STATS_OP_BUSY) == 0)
 			return 0;
 	}
 
@@ -567,55 +639,52 @@
 
 static int _mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port)
 {
-	int ret;
+	int err;
 
 	if (mv88e6xxx_6320_family(chip) || mv88e6xxx_6352_family(chip))
 		port = (port + 1) << 5;
 
 	/* Snapshot the hardware statistics counters for this port. */
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP,
-				   GLOBAL_STATS_OP_CAPTURE_PORT |
-				   GLOBAL_STATS_OP_HIST_RX_TX | port);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
+				 GLOBAL_STATS_OP_CAPTURE_PORT |
+				 GLOBAL_STATS_OP_HIST_RX_TX | port);
+	if (err)
+		return err;
 
 	/* Wait for the snapshotting to complete. */
-	ret = _mv88e6xxx_stats_wait(chip);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return _mv88e6xxx_stats_wait(chip);
 }
 
 static void _mv88e6xxx_stats_read(struct mv88e6xxx_chip *chip,
 				  int stat, u32 *val)
 {
-	u32 _val;
-	int ret;
+	u32 value;
+	u16 reg;
+	int err;
 
 	*val = 0;
 
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP,
-				   GLOBAL_STATS_OP_READ_CAPTURED |
-				   GLOBAL_STATS_OP_HIST_RX_TX | stat);
-	if (ret < 0)
+	err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
+				 GLOBAL_STATS_OP_READ_CAPTURED |
+				 GLOBAL_STATS_OP_HIST_RX_TX | stat);
+	if (err)
 		return;
 
-	ret = _mv88e6xxx_stats_wait(chip);
-	if (ret < 0)
+	err = _mv88e6xxx_stats_wait(chip);
+	if (err)
 		return;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_32);
-	if (ret < 0)
+	err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_32, &reg);
+	if (err)
 		return;
 
-	_val = ret << 16;
+	value = reg << 16;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_01);
-	if (ret < 0)
+	err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_01, &reg);
+	if (err)
 		return;
 
-	*val = _val | ret;
+	*val = value | reg;
 }
 
 static struct mv88e6xxx_hw_stat mv88e6xxx_hw_stats[] = {
@@ -705,22 +774,22 @@
 {
 	u32 low;
 	u32 high = 0;
-	int ret;
+	int err;
+	u16 reg;
 	u64 value;
 
 	switch (s->type) {
 	case PORT:
-		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg);
-		if (ret < 0)
+		err = mv88e6xxx_port_read(chip, port, s->reg, &reg);
+		if (err)
 			return UINT64_MAX;
 
-		low = ret;
+		low = reg;
 		if (s->sizeof_stat == 4) {
-			ret = _mv88e6xxx_reg_read(chip, REG_PORT(port),
-						  s->reg + 1);
-			if (ret < 0)
+			err = mv88e6xxx_port_read(chip, port, s->reg + 1, &reg);
+			if (err)
 				return UINT64_MAX;
-			high = ret;
+			high = reg;
 		}
 		break;
 	case BANK0:
@@ -736,7 +805,7 @@
 static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port,
 				  uint8_t *data)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	struct mv88e6xxx_hw_stat *stat;
 	int i, j;
 
@@ -752,7 +821,7 @@
 
 static int mv88e6xxx_get_sset_count(struct dsa_switch *ds)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	struct mv88e6xxx_hw_stat *stat;
 	int i, j;
 
@@ -767,7 +836,7 @@
 static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port,
 					uint64_t *data)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	struct mv88e6xxx_hw_stat *stat;
 	int ret;
 	int i, j;
@@ -798,7 +867,9 @@
 static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
 			       struct ethtool_regs *regs, void *_p)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
+	u16 reg;
 	u16 *p = _p;
 	int i;
 
@@ -809,170 +880,106 @@
 	mutex_lock(&chip->reg_lock);
 
 	for (i = 0; i < 32; i++) {
-		int ret;
 
-		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i);
-		if (ret >= 0)
-			p[i] = ret;
+		err = mv88e6xxx_port_read(chip, port, i, &reg);
+		if (!err)
+			p[i] = reg;
 	}
 
 	mutex_unlock(&chip->reg_lock);
 }
 
-static int _mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int reg, int offset,
-			   u16 mask)
-{
-	unsigned long timeout = jiffies + HZ / 10;
-
-	while (time_before(jiffies, timeout)) {
-		int ret;
-
-		ret = _mv88e6xxx_reg_read(chip, reg, offset);
-		if (ret < 0)
-			return ret;
-		if (!(ret & mask))
-			return 0;
-
-		usleep_range(1000, 2000);
-	}
-	return -ETIMEDOUT;
-}
-
-static int mv88e6xxx_mdio_wait(struct mv88e6xxx_chip *chip)
-{
-	return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-			       GLOBAL2_SMI_OP_BUSY);
-}
-
 static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip)
 {
-	return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP,
-			       GLOBAL_ATU_OP_BUSY);
-}
-
-static int mv88e6xxx_mdio_read_indirect(struct mv88e6xxx_chip *chip,
-					int addr, int regnum)
-{
-	int ret;
-
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-				   GLOBAL2_SMI_OP_22_READ | (addr << 5) |
-				   regnum);
-	if (ret < 0)
-		return ret;
-
-	ret = mv88e6xxx_mdio_wait(chip);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA);
-
-	return ret;
-}
-
-static int mv88e6xxx_mdio_write_indirect(struct mv88e6xxx_chip *chip,
-					 int addr, int regnum, u16 val)
-{
-	int ret;
-
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA, val);
-	if (ret < 0)
-		return ret;
-
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP,
-				   GLOBAL2_SMI_OP_22_WRITE | (addr << 5) |
-				   regnum);
-
-	return mv88e6xxx_mdio_wait(chip);
+	return mv88e6xxx_g1_wait(chip, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY);
 }
 
 static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port,
 			     struct ethtool_eee *e)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int reg;
+	struct mv88e6xxx_chip *chip = ds->priv;
+	u16 reg;
+	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
 		return -EOPNOTSUPP;
 
 	mutex_lock(&chip->reg_lock);
 
-	reg = mv88e6xxx_mdio_read_indirect(chip, port, 16);
-	if (reg < 0)
+	err = mv88e6xxx_phy_read(chip, port, 16, &reg);
+	if (err)
 		goto out;
 
 	e->eee_enabled = !!(reg & 0x0200);
 	e->tx_lpi_enabled = !!(reg & 0x0100);
 
-	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
-	if (reg < 0)
+	err = mv88e6xxx_port_read(chip, port, PORT_STATUS, &reg);
+	if (err)
 		goto out;
 
 	e->eee_active = !!(reg & PORT_STATUS_EEE);
-	reg = 0;
-
 out:
 	mutex_unlock(&chip->reg_lock);
-	return reg;
+
+	return err;
 }
 
 static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port,
 			     struct phy_device *phydev, struct ethtool_eee *e)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int reg;
-	int ret;
+	struct mv88e6xxx_chip *chip = ds->priv;
+	u16 reg;
+	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE))
 		return -EOPNOTSUPP;
 
 	mutex_lock(&chip->reg_lock);
 
-	ret = mv88e6xxx_mdio_read_indirect(chip, port, 16);
-	if (ret < 0)
+	err = mv88e6xxx_phy_read(chip, port, 16, &reg);
+	if (err)
 		goto out;
 
-	reg = ret & ~0x0300;
+	reg &= ~0x0300;
 	if (e->eee_enabled)
 		reg |= 0x0200;
 	if (e->tx_lpi_enabled)
 		reg |= 0x0100;
 
-	ret = mv88e6xxx_mdio_write_indirect(chip, port, 16, reg);
+	err = mv88e6xxx_phy_write(chip, port, 16, reg);
 out:
 	mutex_unlock(&chip->reg_lock);
 
-	return ret;
+	return err;
 }
 
 static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd)
 {
-	int ret;
+	u16 val;
+	int err;
 
-	if (mv88e6xxx_has_fid_reg(chip)) {
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_FID,
-					   fid);
-		if (ret < 0)
-			return ret;
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_ATU_FID)) {
+		err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_FID, fid);
+		if (err)
+			return err;
 	} else if (mv88e6xxx_num_databases(chip) == 256) {
 		/* ATU DBNum[7:4] are located in ATU Control 15:12 */
-		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val);
+		if (err)
+			return err;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL,
-					   (ret & 0xfff) |
-					   ((fid << 8) & 0xf000));
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL,
+					 (val & 0xfff) | ((fid << 8) & 0xf000));
+		if (err)
+			return err;
 
 		/* ATU DBNum[3:0] are located in ATU Operation 3:0 */
 		cmd |= fid & 0xf;
 	}
 
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_OP, cmd);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_OP, cmd);
+	if (err)
+		return err;
 
 	return _mv88e6xxx_atu_wait(chip);
 }
@@ -997,7 +1004,7 @@
 		data |= (entry->portv_trunkid << shift) & mask;
 	}
 
-	return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_DATA, data);
+	return mv88e6xxx_g1_write(chip, GLOBAL_ATU_DATA, data);
 }
 
 static int _mv88e6xxx_atu_flush_move(struct mv88e6xxx_chip *chip,
@@ -1073,57 +1080,45 @@
 				 u8 state)
 {
 	struct dsa_switch *ds = chip->ds;
-	int reg, ret = 0;
+	u16 reg;
+	int err;
 	u8 oldstate;
 
-	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL);
-	if (reg < 0)
-		return reg;
+	err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, &reg);
+	if (err)
+		return err;
 
 	oldstate = reg & PORT_CONTROL_STATE_MASK;
 
-	if (oldstate != state) {
-		/* Flush forwarding database if we're moving a port
-		 * from Learning or Forwarding state to Disabled or
-		 * Blocking or Listening state.
-		 */
-		if ((oldstate == PORT_CONTROL_STATE_LEARNING ||
-		     oldstate == PORT_CONTROL_STATE_FORWARDING) &&
-		    (state == PORT_CONTROL_STATE_DISABLED ||
-		     state == PORT_CONTROL_STATE_BLOCKING)) {
-			ret = _mv88e6xxx_atu_remove(chip, 0, port, false);
-			if (ret)
-				return ret;
-		}
+	reg &= ~PORT_CONTROL_STATE_MASK;
+	reg |= state;
 
-		reg = (reg & ~PORT_CONTROL_STATE_MASK) | state;
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL,
-					   reg);
-		if (ret)
-			return ret;
+	err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
+	if (err)
+		return err;
 
-		netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n",
-			   mv88e6xxx_port_state_names[state],
-			   mv88e6xxx_port_state_names[oldstate]);
-	}
+	netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n",
+		   mv88e6xxx_port_state_names[state],
+		   mv88e6xxx_port_state_names[oldstate]);
 
-	return ret;
+	return 0;
 }
 
 static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port)
 {
 	struct net_device *bridge = chip->ports[port].bridge_dev;
-	const u16 mask = (1 << chip->info->num_ports) - 1;
+	const u16 mask = (1 << mv88e6xxx_num_ports(chip)) - 1;
 	struct dsa_switch *ds = chip->ds;
 	u16 output_ports = 0;
-	int reg;
+	u16 reg;
+	int err;
 	int i;
 
 	/* allow CPU port or DSA link(s) to send frames to every port */
 	if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
 		output_ports = mask;
 	} else {
-		for (i = 0; i < chip->info->num_ports; ++i) {
+		for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 			/* allow sending frames to every group member */
 			if (bridge && chip->ports[i].bridge_dev == bridge)
 				output_ports |= BIT(i);
@@ -1137,20 +1132,20 @@
 	/* prevent frames from going back out of the port they came in on */
 	output_ports &= ~BIT(port);
 
-	reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN);
-	if (reg < 0)
-		return reg;
+	err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+	if (err)
+		return err;
 
 	reg &= ~mask;
 	reg |= output_ports & mask;
 
-	return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg);
+	return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
 }
 
 static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port,
 					 u8 state)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int stp_state;
 	int err;
 
@@ -1181,27 +1176,39 @@
 			   mv88e6xxx_port_state_names[stp_state]);
 }
 
+static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = _mv88e6xxx_atu_remove(chip, 0, port, false);
+	mutex_unlock(&chip->reg_lock);
+
+	if (err)
+		netdev_err(ds->ports[port].netdev, "failed to flush ATU\n");
+}
+
 static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port,
 				u16 *new, u16 *old)
 {
 	struct dsa_switch *ds = chip->ds;
-	u16 pvid;
-	int ret;
+	u16 pvid, reg;
+	int err;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, &reg);
+	if (err)
+		return err;
 
-	pvid = ret & PORT_DEFAULT_VLAN_MASK;
+	pvid = reg & PORT_DEFAULT_VLAN_MASK;
 
 	if (new) {
-		ret &= ~PORT_DEFAULT_VLAN_MASK;
-		ret |= *new & PORT_DEFAULT_VLAN_MASK;
+		reg &= ~PORT_DEFAULT_VLAN_MASK;
+		reg |= *new & PORT_DEFAULT_VLAN_MASK;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_DEFAULT_VLAN, ret);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg);
+		if (err)
+			return err;
 
 		netdev_dbg(ds->ports[port].netdev,
 			   "DefaultVID %d (was %d)\n", *new, pvid);
@@ -1227,17 +1234,16 @@
 
 static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip)
 {
-	return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP,
-			       GLOBAL_VTU_OP_BUSY);
+	return mv88e6xxx_g1_wait(chip, GLOBAL_VTU_OP, GLOBAL_VTU_OP_BUSY);
 }
 
 static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_chip *chip, u16 op)
 {
-	int ret;
+	int err;
 
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_OP, op);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_OP, op);
+	if (err)
+		return err;
 
 	return _mv88e6xxx_vtu_wait(chip);
 }
@@ -1254,23 +1260,21 @@
 }
 
 static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip,
-					struct mv88e6xxx_vtu_stu_entry *entry,
+					struct mv88e6xxx_vtu_entry *entry,
 					unsigned int nibble_offset)
 {
 	u16 regs[3];
-	int i;
-	int ret;
+	int i, err;
 
 	for (i = 0; i < 3; ++i) {
-		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
-					  GLOBAL_VTU_DATA_0_3 + i);
-		if (ret < 0)
-			return ret;
+		u16 *reg = &regs[i];
 
-		regs[i] = ret;
+		err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_DATA_0_3 + i, reg);
+		if (err)
+			return err;
 	}
 
-	for (i = 0; i < chip->info->num_ports; ++i) {
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 		unsigned int shift = (i % 4) * 4 + nibble_offset;
 		u16 reg = regs[i / 4];
 
@@ -1281,26 +1285,25 @@
 }
 
 static int mv88e6xxx_vtu_data_read(struct mv88e6xxx_chip *chip,
-				   struct mv88e6xxx_vtu_stu_entry *entry)
+				   struct mv88e6xxx_vtu_entry *entry)
 {
 	return _mv88e6xxx_vtu_stu_data_read(chip, entry, 0);
 }
 
 static int mv88e6xxx_stu_data_read(struct mv88e6xxx_chip *chip,
-				   struct mv88e6xxx_vtu_stu_entry *entry)
+				   struct mv88e6xxx_vtu_entry *entry)
 {
 	return _mv88e6xxx_vtu_stu_data_read(chip, entry, 2);
 }
 
 static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip,
-					 struct mv88e6xxx_vtu_stu_entry *entry,
+					 struct mv88e6xxx_vtu_entry *entry,
 					 unsigned int nibble_offset)
 {
 	u16 regs[3] = { 0 };
-	int i;
-	int ret;
+	int i, err;
 
-	for (i = 0; i < chip->info->num_ports; ++i) {
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 		unsigned int shift = (i % 4) * 4 + nibble_offset;
 		u8 data = entry->data[i];
 
@@ -1308,86 +1311,85 @@
 	}
 
 	for (i = 0; i < 3; ++i) {
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL,
-					   GLOBAL_VTU_DATA_0_3 + i, regs[i]);
-		if (ret < 0)
-			return ret;
+		u16 reg = regs[i];
+
+		err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_DATA_0_3 + i, reg);
+		if (err)
+			return err;
 	}
 
 	return 0;
 }
 
 static int mv88e6xxx_vtu_data_write(struct mv88e6xxx_chip *chip,
-				    struct mv88e6xxx_vtu_stu_entry *entry)
+				    struct mv88e6xxx_vtu_entry *entry)
 {
 	return _mv88e6xxx_vtu_stu_data_write(chip, entry, 0);
 }
 
 static int mv88e6xxx_stu_data_write(struct mv88e6xxx_chip *chip,
-				    struct mv88e6xxx_vtu_stu_entry *entry)
+				    struct mv88e6xxx_vtu_entry *entry)
 {
 	return _mv88e6xxx_vtu_stu_data_write(chip, entry, 2);
 }
 
 static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_chip *chip, u16 vid)
 {
-	return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID,
-				    vid & GLOBAL_VTU_VID_MASK);
+	return mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID,
+				  vid & GLOBAL_VTU_VID_MASK);
 }
 
 static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip,
-				  struct mv88e6xxx_vtu_stu_entry *entry)
+				  struct mv88e6xxx_vtu_entry *entry)
 {
-	struct mv88e6xxx_vtu_stu_entry next = { 0 };
-	int ret;
+	struct mv88e6xxx_vtu_entry next = { 0 };
+	u16 val;
+	int err;
 
-	ret = _mv88e6xxx_vtu_wait(chip);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_vtu_wait(chip);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_VID, &val);
+	if (err)
+		return err;
 
-	next.vid = ret & GLOBAL_VTU_VID_MASK;
-	next.valid = !!(ret & GLOBAL_VTU_VID_VALID);
+	next.vid = val & GLOBAL_VTU_VID_MASK;
+	next.valid = !!(val & GLOBAL_VTU_VID_VALID);
 
 	if (next.valid) {
-		ret = mv88e6xxx_vtu_data_read(chip, &next);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_vtu_data_read(chip, &next);
+		if (err)
+			return err;
 
-		if (mv88e6xxx_has_fid_reg(chip)) {
-			ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
-						  GLOBAL_VTU_FID);
-			if (ret < 0)
-				return ret;
+		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_VTU_FID)) {
+			err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_FID, &val);
+			if (err)
+				return err;
 
-			next.fid = ret & GLOBAL_VTU_FID_MASK;
+			next.fid = val & GLOBAL_VTU_FID_MASK;
 		} else if (mv88e6xxx_num_databases(chip) == 256) {
 			/* VTU DBNum[7:4] are located in VTU Operation 11:8, and
 			 * VTU DBNum[3:0] are located in VTU Operation 3:0
 			 */
-			ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
-						  GLOBAL_VTU_OP);
-			if (ret < 0)
-				return ret;
+			err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_OP, &val);
+			if (err)
+				return err;
 
-			next.fid = (ret & 0xf00) >> 4;
-			next.fid |= ret & 0xf;
+			next.fid = (val & 0xf00) >> 4;
+			next.fid |= val & 0xf;
 		}
 
 		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) {
-			ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
-						  GLOBAL_VTU_SID);
-			if (ret < 0)
-				return ret;
+			err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_SID, &val);
+			if (err)
+				return err;
 
-			next.sid = ret & GLOBAL_VTU_SID_MASK;
+			next.sid = val & GLOBAL_VTU_SID_MASK;
 		}
 	}
 
@@ -1399,8 +1401,8 @@
 				    struct switchdev_obj_port_vlan *vlan,
 				    int (*cb)(struct switchdev_obj *obj))
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	struct mv88e6xxx_vtu_stu_entry next;
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_vtu_entry next;
 	u16 pvid;
 	int err;
 
@@ -1451,38 +1453,36 @@
 }
 
 static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip,
-				    struct mv88e6xxx_vtu_stu_entry *entry)
+				    struct mv88e6xxx_vtu_entry *entry)
 {
 	u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE;
 	u16 reg = 0;
-	int ret;
+	int err;
 
-	ret = _mv88e6xxx_vtu_wait(chip);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_vtu_wait(chip);
+	if (err)
+		return err;
 
 	if (!entry->valid)
 		goto loadpurge;
 
 	/* Write port member tags */
-	ret = mv88e6xxx_vtu_data_write(chip, entry);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_vtu_data_write(chip, entry);
+	if (err)
+		return err;
 
 	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) {
 		reg = entry->sid & GLOBAL_VTU_SID_MASK;
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID,
-					   reg);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, reg);
+		if (err)
+			return err;
 	}
 
-	if (mv88e6xxx_has_fid_reg(chip)) {
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_VTU_FID)) {
 		reg = entry->fid & GLOBAL_VTU_FID_MASK;
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_FID,
-					   reg);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_FID, reg);
+		if (err)
+			return err;
 	} else if (mv88e6xxx_num_databases(chip) == 256) {
 		/* VTU DBNum[7:4] are located in VTU Operation 11:8, and
 		 * VTU DBNum[3:0] are located in VTU Operation 3:0
@@ -1494,48 +1494,49 @@
 	reg = GLOBAL_VTU_VID_VALID;
 loadpurge:
 	reg |= entry->vid & GLOBAL_VTU_VID_MASK;
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, reg);
+	if (err)
+		return err;
 
 	return _mv88e6xxx_vtu_cmd(chip, op);
 }
 
 static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid,
-				  struct mv88e6xxx_vtu_stu_entry *entry)
+				  struct mv88e6xxx_vtu_entry *entry)
 {
-	struct mv88e6xxx_vtu_stu_entry next = { 0 };
-	int ret;
+	struct mv88e6xxx_vtu_entry next = { 0 };
+	u16 val;
+	int err;
 
-	ret = _mv88e6xxx_vtu_wait(chip);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_vtu_wait(chip);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID,
-				   sid & GLOBAL_VTU_SID_MASK);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID,
+				 sid & GLOBAL_VTU_SID_MASK);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_SID);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_SID, &val);
+	if (err)
+		return err;
 
-	next.sid = ret & GLOBAL_VTU_SID_MASK;
+	next.sid = val & GLOBAL_VTU_SID_MASK;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_VID, &val);
+	if (err)
+		return err;
 
-	next.valid = !!(ret & GLOBAL_VTU_VID_VALID);
+	next.valid = !!(val & GLOBAL_VTU_VID_VALID);
 
 	if (next.valid) {
-		ret = mv88e6xxx_stu_data_read(chip, &next);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_stu_data_read(chip, &next);
+		if (err)
+			return err;
 	}
 
 	*entry = next;
@@ -1543,33 +1544,33 @@
 }
 
 static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_chip *chip,
-				    struct mv88e6xxx_vtu_stu_entry *entry)
+				    struct mv88e6xxx_vtu_entry *entry)
 {
 	u16 reg = 0;
-	int ret;
+	int err;
 
-	ret = _mv88e6xxx_vtu_wait(chip);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_vtu_wait(chip);
+	if (err)
+		return err;
 
 	if (!entry->valid)
 		goto loadpurge;
 
 	/* Write port states */
-	ret = mv88e6xxx_stu_data_write(chip, entry);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_stu_data_write(chip, entry);
+	if (err)
+		return err;
 
 	reg = GLOBAL_VTU_VID_VALID;
 loadpurge:
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, reg);
+	if (err)
+		return err;
 
 	reg = entry->sid & GLOBAL_VTU_SID_MASK;
-	ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, reg);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, reg);
+	if (err)
+		return err;
 
 	return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE);
 }
@@ -1580,7 +1581,8 @@
 	struct dsa_switch *ds = chip->ds;
 	u16 upper_mask;
 	u16 fid;
-	int ret;
+	u16 reg;
+	int err;
 
 	if (mv88e6xxx_num_databases(chip) == 4096)
 		upper_mask = 0xff;
@@ -1590,37 +1592,35 @@
 		return -EOPNOTSUPP;
 
 	/* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, &reg);
+	if (err)
+		return err;
 
-	fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
+	fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12;
 
 	if (new) {
-		ret &= ~PORT_BASE_VLAN_FID_3_0_MASK;
-		ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
+		reg &= ~PORT_BASE_VLAN_FID_3_0_MASK;
+		reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN,
-					   ret);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg);
+		if (err)
+			return err;
 	}
 
 	/* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, &reg);
+	if (err)
+		return err;
 
-	fid |= (ret & upper_mask) << 4;
+	fid |= (reg & upper_mask) << 4;
 
 	if (new) {
-		ret &= ~upper_mask;
-		ret |= (*new >> 4) & upper_mask;
+		reg &= ~upper_mask;
+		reg |= (*new >> 4) & upper_mask;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1,
-					   ret);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg);
+		if (err)
+			return err;
 
 		netdev_dbg(ds->ports[port].netdev,
 			   "FID %d (was %d)\n", *new, fid);
@@ -1647,13 +1647,13 @@
 static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid)
 {
 	DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID);
-	struct mv88e6xxx_vtu_stu_entry vlan;
+	struct mv88e6xxx_vtu_entry vlan;
 	int i, err;
 
 	bitmap_zero(fid_bitmap, MV88E6XXX_N_FID);
 
 	/* Set every FID bit used by the (un)bridged ports */
-	for (i = 0; i < chip->info->num_ports; ++i) {
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 		err = _mv88e6xxx_port_fid_get(chip, i, fid);
 		if (err)
 			return err;
@@ -1689,10 +1689,10 @@
 }
 
 static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid,
-			      struct mv88e6xxx_vtu_stu_entry *entry)
+			      struct mv88e6xxx_vtu_entry *entry)
 {
 	struct dsa_switch *ds = chip->ds;
-	struct mv88e6xxx_vtu_stu_entry vlan = {
+	struct mv88e6xxx_vtu_entry vlan = {
 		.valid = true,
 		.vid = vid,
 	};
@@ -1703,14 +1703,14 @@
 		return err;
 
 	/* exclude all ports except the CPU and DSA ports */
-	for (i = 0; i < chip->info->num_ports; ++i)
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i)
 		vlan.data[i] = dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i)
 			? GLOBAL_VTU_DATA_MEMBER_TAG_UNMODIFIED
 			: GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER;
 
 	if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) ||
 	    mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip)) {
-		struct mv88e6xxx_vtu_stu_entry vstp;
+		struct mv88e6xxx_vtu_entry vstp;
 
 		/* Adding a VTU entry requires a valid STU entry. As VSTP is not
 		 * implemented, only one STU entry is needed to cover all VTU
@@ -1737,7 +1737,7 @@
 }
 
 static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid,
-			      struct mv88e6xxx_vtu_stu_entry *entry, bool creat)
+			      struct mv88e6xxx_vtu_entry *entry, bool creat)
 {
 	int err;
 
@@ -1768,8 +1768,8 @@
 static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
 					u16 vid_begin, u16 vid_end)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	struct mv88e6xxx_vtu_stu_entry vlan;
+	struct mv88e6xxx_chip *chip = ds->priv;
+	struct mv88e6xxx_vtu_entry vlan;
 	int i, err;
 
 	if (!vid_begin)
@@ -1792,7 +1792,7 @@
 		if (vlan.vid > vid_end)
 			break;
 
-		for (i = 0; i < chip->info->num_ports; ++i) {
+		for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 			if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
 				continue;
 
@@ -1829,29 +1829,29 @@
 static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port,
 					 bool vlan_filtering)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE :
 		PORT_CONTROL_2_8021Q_DISABLED;
-	int ret;
+	u16 reg;
+	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
 		return -EOPNOTSUPP;
 
 	mutex_lock(&chip->reg_lock);
 
-	ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2);
-	if (ret < 0)
+	err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, &reg);
+	if (err)
 		goto unlock;
 
-	old = ret & PORT_CONTROL_2_8021Q_MASK;
+	old = reg & PORT_CONTROL_2_8021Q_MASK;
 
 	if (new != old) {
-		ret &= ~PORT_CONTROL_2_8021Q_MASK;
-		ret |= new & PORT_CONTROL_2_8021Q_MASK;
+		reg &= ~PORT_CONTROL_2_8021Q_MASK;
+		reg |= new & PORT_CONTROL_2_8021Q_MASK;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2,
-					   ret);
-		if (ret < 0)
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
+		if (err)
 			goto unlock;
 
 		netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n",
@@ -1859,11 +1859,11 @@
 			   mv88e6xxx_port_8021q_mode_names[old]);
 	}
 
-	ret = 0;
+	err = 0;
 unlock:
 	mutex_unlock(&chip->reg_lock);
 
-	return ret;
+	return err;
 }
 
 static int
@@ -1871,7 +1871,7 @@
 			    const struct switchdev_obj_port_vlan *vlan,
 			    struct switchdev_trans *trans)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU))
@@ -1894,7 +1894,7 @@
 static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port,
 				    u16 vid, bool untagged)
 {
-	struct mv88e6xxx_vtu_stu_entry vlan;
+	struct mv88e6xxx_vtu_entry vlan;
 	int err;
 
 	err = _mv88e6xxx_vtu_get(chip, vid, &vlan, true);
@@ -1912,7 +1912,7 @@
 				    const struct switchdev_obj_port_vlan *vlan,
 				    struct switchdev_trans *trans)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
 	bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
 	u16 vid;
@@ -1939,7 +1939,7 @@
 				    int port, u16 vid)
 {
 	struct dsa_switch *ds = chip->ds;
-	struct mv88e6xxx_vtu_stu_entry vlan;
+	struct mv88e6xxx_vtu_entry vlan;
 	int i, err;
 
 	err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false);
@@ -1954,7 +1954,7 @@
 
 	/* keep the VLAN unless all ports are excluded */
 	vlan.valid = false;
-	for (i = 0; i < chip->info->num_ports; ++i) {
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 		if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i))
 			continue;
 
@@ -1974,7 +1974,7 @@
 static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port,
 				   const struct switchdev_obj_port_vlan *vlan)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	u16 pvid, vid;
 	int err = 0;
 
@@ -2008,14 +2008,13 @@
 static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip,
 				    const unsigned char *addr)
 {
-	int i, ret;
+	int i, err;
 
 	for (i = 0; i < 3; i++) {
-		ret = _mv88e6xxx_reg_write(
-			chip, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i,
-			(addr[i * 2] << 8) | addr[i * 2 + 1]);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_MAC_01 + i,
+					 (addr[i * 2] << 8) | addr[i * 2 + 1]);
+		if (err)
+			return err;
 	}
 
 	return 0;
@@ -2024,15 +2023,16 @@
 static int _mv88e6xxx_atu_mac_read(struct mv88e6xxx_chip *chip,
 				   unsigned char *addr)
 {
-	int i, ret;
+	u16 val;
+	int i, err;
 
 	for (i = 0; i < 3; i++) {
-		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL,
-					  GLOBAL_ATU_MAC_01 + i);
-		if (ret < 0)
-			return ret;
-		addr[i * 2] = ret >> 8;
-		addr[i * 2 + 1] = ret & 0xff;
+		err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_MAC_01 + i, &val);
+		if (err)
+			return err;
+
+		addr[i * 2] = val >> 8;
+		addr[i * 2 + 1] = val & 0xff;
 	}
 
 	return 0;
@@ -2058,12 +2058,48 @@
 	return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB);
 }
 
-static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_chip *chip, int port,
-				    const unsigned char *addr, u16 vid,
-				    u8 state)
+static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
+				  struct mv88e6xxx_atu_entry *entry);
+
+static int mv88e6xxx_atu_get(struct mv88e6xxx_chip *chip, int fid,
+			     const u8 *addr, struct mv88e6xxx_atu_entry *entry)
 {
-	struct mv88e6xxx_atu_entry entry = { 0 };
-	struct mv88e6xxx_vtu_stu_entry vlan;
+	struct mv88e6xxx_atu_entry next;
+	int err;
+
+	eth_broadcast_addr(next.mac);
+
+	err = _mv88e6xxx_atu_mac_write(chip, next.mac);
+	if (err)
+		return err;
+
+	do {
+		err = _mv88e6xxx_atu_getnext(chip, fid, &next);
+		if (err)
+			return err;
+
+		if (next.state == GLOBAL_ATU_DATA_STATE_UNUSED)
+			break;
+
+		if (ether_addr_equal(next.mac, addr)) {
+			*entry = next;
+			return 0;
+		}
+	} while (!is_broadcast_ether_addr(next.mac));
+
+	memset(entry, 0, sizeof(*entry));
+	entry->fid = fid;
+	ether_addr_copy(entry->mac, addr);
+
+	return 0;
+}
+
+static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
+					const unsigned char *addr, u16 vid,
+					u8 state)
+{
+	struct mv88e6xxx_vtu_entry vlan;
+	struct mv88e6xxx_atu_entry entry;
 	int err;
 
 	/* Null VLAN ID corresponds to the port private database */
@@ -2074,12 +2110,18 @@
 	if (err)
 		return err;
 
-	entry.fid = vlan.fid;
-	entry.state = state;
-	ether_addr_copy(entry.mac, addr);
-	if (state != GLOBAL_ATU_DATA_STATE_UNUSED) {
-		entry.trunk = false;
-		entry.portv_trunkid = BIT(port);
+	err = mv88e6xxx_atu_get(chip, vlan.fid, addr, &entry);
+	if (err)
+		return err;
+
+	/* Purge the ATU entry only if no port is using it anymore */
+	if (state == GLOBAL_ATU_DATA_STATE_UNUSED) {
+		entry.portv_trunkid &= ~BIT(port);
+		if (!entry.portv_trunkid)
+			entry.state = GLOBAL_ATU_DATA_STATE_UNUSED;
+	} else {
+		entry.portv_trunkid |= BIT(port);
+		entry.state = state;
 	}
 
 	return _mv88e6xxx_atu_load(chip, &entry);
@@ -2099,61 +2141,59 @@
 				   const struct switchdev_obj_port_fdb *fdb,
 				   struct switchdev_trans *trans)
 {
-	int state = is_multicast_ether_addr(fdb->addr) ?
-		GLOBAL_ATU_DATA_STATE_MC_STATIC :
-		GLOBAL_ATU_DATA_STATE_UC_STATIC;
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 
 	mutex_lock(&chip->reg_lock);
-	if (_mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, state))
-		netdev_err(ds->ports[port].netdev,
-			   "failed to load MAC address\n");
+	if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid,
+					 GLOBAL_ATU_DATA_STATE_UC_STATIC))
+		netdev_err(ds->ports[port].netdev, "failed to load unicast MAC address\n");
 	mutex_unlock(&chip->reg_lock);
 }
 
 static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port,
 				  const struct switchdev_obj_port_fdb *fdb)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int ret;
+	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
 
 	mutex_lock(&chip->reg_lock);
-	ret = _mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid,
-				       GLOBAL_ATU_DATA_STATE_UNUSED);
+	err = mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid,
+					   GLOBAL_ATU_DATA_STATE_UNUSED);
 	mutex_unlock(&chip->reg_lock);
 
-	return ret;
+	return err;
 }
 
 static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid,
 				  struct mv88e6xxx_atu_entry *entry)
 {
 	struct mv88e6xxx_atu_entry next = { 0 };
-	int ret;
+	u16 val;
+	int err;
 
 	next.fid = fid;
 
-	ret = _mv88e6xxx_atu_wait(chip);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_atu_wait(chip);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_atu_mac_read(chip, next.mac);
-	if (ret < 0)
-		return ret;
+	err = _mv88e6xxx_atu_mac_read(chip, next.mac);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_DATA);
-	if (ret < 0)
-		return ret;
+	err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_DATA, &val);
+	if (err)
+		return err;
 
-	next.state = ret & GLOBAL_ATU_DATA_STATE_MASK;
+	next.state = val & GLOBAL_ATU_DATA_STATE_MASK;
 	if (next.state != GLOBAL_ATU_DATA_STATE_UNUSED) {
 		unsigned int mask, shift;
 
-		if (ret & GLOBAL_ATU_DATA_TRUNK) {
+		if (val & GLOBAL_ATU_DATA_TRUNK) {
 			next.trunk = true;
 			mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK;
 			shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT;
@@ -2163,17 +2203,17 @@
 			shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT;
 		}
 
-		next.portv_trunkid = (ret & mask) >> shift;
+		next.portv_trunkid = (val & mask) >> shift;
 	}
 
 	*entry = next;
 	return 0;
 }
 
-static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_chip *chip,
-					u16 fid, u16 vid, int port,
-					struct switchdev_obj_port_fdb *fdb,
-					int (*cb)(struct switchdev_obj *obj))
+static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip,
+				      u16 fid, u16 vid, int port,
+				      struct switchdev_obj *obj,
+				      int (*cb)(struct switchdev_obj *obj))
 {
 	struct mv88e6xxx_atu_entry addr = {
 		.mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
@@ -2187,72 +2227,98 @@
 	do {
 		err = _mv88e6xxx_atu_getnext(chip, fid, &addr);
 		if (err)
-			break;
+			return err;
 
 		if (addr.state == GLOBAL_ATU_DATA_STATE_UNUSED)
 			break;
 
-		if (!addr.trunk && addr.portv_trunkid & BIT(port)) {
-			bool is_static = addr.state ==
-				(is_multicast_ether_addr(addr.mac) ?
-				 GLOBAL_ATU_DATA_STATE_MC_STATIC :
-				 GLOBAL_ATU_DATA_STATE_UC_STATIC);
+		if (addr.trunk || (addr.portv_trunkid & BIT(port)) == 0)
+			continue;
 
+		if (obj->id == SWITCHDEV_OBJ_ID_PORT_FDB) {
+			struct switchdev_obj_port_fdb *fdb;
+
+			if (!is_unicast_ether_addr(addr.mac))
+				continue;
+
+			fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
 			fdb->vid = vid;
 			ether_addr_copy(fdb->addr, addr.mac);
-			fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
+			if (addr.state == GLOBAL_ATU_DATA_STATE_UC_STATIC)
+				fdb->ndm_state = NUD_NOARP;
+			else
+				fdb->ndm_state = NUD_REACHABLE;
+		} else if (obj->id == SWITCHDEV_OBJ_ID_PORT_MDB) {
+			struct switchdev_obj_port_mdb *mdb;
 
-			err = cb(&fdb->obj);
-			if (err)
-				break;
+			if (!is_multicast_ether_addr(addr.mac))
+				continue;
+
+			mdb = SWITCHDEV_OBJ_PORT_MDB(obj);
+			mdb->vid = vid;
+			ether_addr_copy(mdb->addr, addr.mac);
+		} else {
+			return -EOPNOTSUPP;
 		}
+
+		err = cb(obj);
+		if (err)
+			return err;
 	} while (!is_broadcast_ether_addr(addr.mac));
 
 	return err;
 }
 
+static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port,
+				  struct switchdev_obj *obj,
+				  int (*cb)(struct switchdev_obj *obj))
+{
+	struct mv88e6xxx_vtu_entry vlan = {
+		.vid = GLOBAL_VTU_VID_MASK, /* all ones */
+	};
+	u16 fid;
+	int err;
+
+	/* Dump port's default Filtering Information Database (VLAN ID 0) */
+	err = _mv88e6xxx_port_fid_get(chip, port, &fid);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_port_db_dump_fid(chip, fid, 0, port, obj, cb);
+	if (err)
+		return err;
+
+	/* Dump VLANs' Filtering Information Databases */
+	err = _mv88e6xxx_vtu_vid_write(chip, vlan.vid);
+	if (err)
+		return err;
+
+	do {
+		err = _mv88e6xxx_vtu_getnext(chip, &vlan);
+		if (err)
+			return err;
+
+		if (!vlan.valid)
+			break;
+
+		err = mv88e6xxx_port_db_dump_fid(chip, vlan.fid, vlan.vid, port,
+						 obj, cb);
+		if (err)
+			return err;
+	} while (vlan.vid < GLOBAL_VTU_VID_MASK);
+
+	return err;
+}
+
 static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port,
 				   struct switchdev_obj_port_fdb *fdb,
 				   int (*cb)(struct switchdev_obj *obj))
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	struct mv88e6xxx_vtu_stu_entry vlan = {
-		.vid = GLOBAL_VTU_VID_MASK, /* all ones */
-	};
-	u16 fid;
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
 	mutex_lock(&chip->reg_lock);
-
-	/* Dump port's default Filtering Information Database (VLAN ID 0) */
-	err = _mv88e6xxx_port_fid_get(chip, port, &fid);
-	if (err)
-		goto unlock;
-
-	err = _mv88e6xxx_port_fdb_dump_one(chip, fid, 0, port, fdb, cb);
-	if (err)
-		goto unlock;
-
-	/* Dump VLANs' Filtering Information Databases */
-	err = _mv88e6xxx_vtu_vid_write(chip, vlan.vid);
-	if (err)
-		goto unlock;
-
-	do {
-		err = _mv88e6xxx_vtu_getnext(chip, &vlan);
-		if (err)
-			break;
-
-		if (!vlan.valid)
-			break;
-
-		err = _mv88e6xxx_port_fdb_dump_one(chip, vlan.fid, vlan.vid,
-						   port, fdb, cb);
-		if (err)
-			break;
-	} while (vlan.vid < GLOBAL_VTU_VID_MASK);
-
-unlock:
+	err = mv88e6xxx_port_db_dump(chip, port, &fdb->obj, cb);
 	mutex_unlock(&chip->reg_lock);
 
 	return err;
@@ -2261,7 +2327,7 @@
 static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
 				      struct net_device *bridge)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int i, err = 0;
 
 	mutex_lock(&chip->reg_lock);
@@ -2269,7 +2335,7 @@
 	/* Assign the bridge and remap each port's VLANTable */
 	chip->ports[port].bridge_dev = bridge;
 
-	for (i = 0; i < chip->info->num_ports; ++i) {
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) {
 		if (chip->ports[i].bridge_dev == bridge) {
 			err = _mv88e6xxx_port_based_vlan_map(chip, i);
 			if (err)
@@ -2284,7 +2350,7 @@
 
 static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	struct net_device *bridge = chip->ports[port].bridge_dev;
 	int i;
 
@@ -2293,7 +2359,7 @@
 	/* Unassign the bridge and remap each port's VLANTable */
 	chip->ports[port].bridge_dev = NULL;
 
-	for (i = 0; i < chip->info->num_ports; ++i)
+	for (i = 0; i < mv88e6xxx_num_ports(chip); ++i)
 		if (i == port || chip->ports[i].bridge_dev == bridge)
 			if (_mv88e6xxx_port_based_vlan_map(chip, i))
 				netdev_warn(ds->ports[i].netdev,
@@ -2302,57 +2368,26 @@
 	mutex_unlock(&chip->reg_lock);
 }
 
-static int _mv88e6xxx_mdio_page_write(struct mv88e6xxx_chip *chip,
-				      int port, int page, int reg, int val)
-{
-	int ret;
-
-	ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page);
-	if (ret < 0)
-		goto restore_page_0;
-
-	ret = mv88e6xxx_mdio_write_indirect(chip, port, reg, val);
-restore_page_0:
-	mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0);
-
-	return ret;
-}
-
-static int _mv88e6xxx_mdio_page_read(struct mv88e6xxx_chip *chip,
-				     int port, int page, int reg)
-{
-	int ret;
-
-	ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page);
-	if (ret < 0)
-		goto restore_page_0;
-
-	ret = mv88e6xxx_mdio_read_indirect(chip, port, reg);
-restore_page_0:
-	mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0);
-
-	return ret;
-}
-
 static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip)
 {
 	bool ppu_active = mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE);
 	u16 is_reset = (ppu_active ? 0x8800 : 0xc800);
 	struct gpio_desc *gpiod = chip->reset;
 	unsigned long timeout;
-	int ret;
+	u16 reg;
+	int err;
 	int i;
 
 	/* Set all ports to the disabled state. */
-	for (i = 0; i < chip->info->num_ports; i++) {
-		ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL);
-		if (ret < 0)
-			return ret;
+	for (i = 0; i < mv88e6xxx_num_ports(chip); i++) {
+		err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, &reg);
+		if (err)
+			return err;
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL,
-					   ret & 0xfffc);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, i, PORT_CONTROL,
+					   reg & 0xfffc);
+		if (err)
+			return err;
 	}
 
 	/* Wait for transmit queues to drain. */
@@ -2371,65 +2406,53 @@
 	 * through global registers 0x18 and 0x19.
 	 */
 	if (ppu_active)
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000);
+		err = mv88e6xxx_g1_write(chip, 0x04, 0xc000);
 	else
-		ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400);
-	if (ret)
-		return ret;
+		err = mv88e6xxx_g1_write(chip, 0x04, 0xc400);
+	if (err)
+		return err;
 
 	/* Wait up to one second for reset to complete. */
 	timeout = jiffies + 1 * HZ;
 	while (time_before(jiffies, timeout)) {
-		ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, 0x00);
-		if (ret < 0)
-			return ret;
+		err = mv88e6xxx_g1_read(chip, 0x00, &reg);
+		if (err)
+			return err;
 
-		if ((ret & is_reset) == is_reset)
+		if ((reg & is_reset) == is_reset)
 			break;
 		usleep_range(1000, 2000);
 	}
 	if (time_after(jiffies, timeout))
-		ret = -ETIMEDOUT;
+		err = -ETIMEDOUT;
 	else
-		ret = 0;
+		err = 0;
 
-	return ret;
+	return err;
 }
 
-static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_chip *chip)
+static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip)
 {
-	int ret;
+	u16 val;
+	int err;
 
-	ret = _mv88e6xxx_mdio_page_read(chip, REG_FIBER_SERDES,
-					PAGE_FIBER_SERDES, MII_BMCR);
-	if (ret < 0)
-		return ret;
+	/* Clear Power Down bit */
+	err = mv88e6xxx_serdes_read(chip, MII_BMCR, &val);
+	if (err)
+		return err;
 
-	if (ret & BMCR_PDOWN) {
-		ret &= ~BMCR_PDOWN;
-		ret = _mv88e6xxx_mdio_page_write(chip, REG_FIBER_SERDES,
-						 PAGE_FIBER_SERDES, MII_BMCR,
-						 ret);
+	if (val & BMCR_PDOWN) {
+		val &= ~BMCR_PDOWN;
+		err = mv88e6xxx_serdes_write(chip, MII_BMCR, val);
 	}
 
-	return ret;
-}
-
-static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port,
-			       int reg, u16 *val)
-{
-	int addr = chip->info->port_base_addr + port;
-
-	if (port >= chip->info->num_ports)
-		return -EINVAL;
-
-	return mv88e6xxx_read(chip, addr, reg, val);
+	return err;
 }
 
 static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 {
 	struct dsa_switch *ds = chip->ds;
-	int ret;
+	int err;
 	u16 reg;
 
 	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
@@ -2442,7 +2465,7 @@
 		 * and all DSA ports to their maximum bandwidth and
 		 * full duplex.
 		 */
-		reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL);
+		err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, &reg);
 		if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) {
 			reg &= ~PORT_PCS_CTRL_UNFORCED;
 			reg |= PORT_PCS_CTRL_FORCE_LINK |
@@ -2457,10 +2480,9 @@
 			reg |= PORT_PCS_CTRL_UNFORCED;
 		}
 
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_PCS_CTRL, reg);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg);
+		if (err)
+			return err;
 	}
 
 	/* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
@@ -2486,28 +2508,13 @@
 		PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP |
 		PORT_CONTROL_STATE_FORWARDING;
 	if (dsa_is_cpu_port(ds, port)) {
-		if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip))
-			reg |= PORT_CONTROL_DSA_TAG;
-		if (mv88e6xxx_6352_family(chip) ||
-		    mv88e6xxx_6351_family(chip) ||
-		    mv88e6xxx_6165_family(chip) ||
-		    mv88e6xxx_6097_family(chip) ||
-		    mv88e6xxx_6320_family(chip)) {
+		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA))
 			reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA |
-				PORT_CONTROL_FORWARD_UNKNOWN |
 				PORT_CONTROL_FORWARD_UNKNOWN_MC;
-		}
-
-		if (mv88e6xxx_6352_family(chip) ||
-		    mv88e6xxx_6351_family(chip) ||
-		    mv88e6xxx_6165_family(chip) ||
-		    mv88e6xxx_6097_family(chip) ||
-		    mv88e6xxx_6095_family(chip) ||
-		    mv88e6xxx_6065_family(chip) ||
-		    mv88e6xxx_6185_family(chip) ||
-		    mv88e6xxx_6320_family(chip)) {
-			reg |= PORT_CONTROL_EGRESS_ADD_TAG;
-		}
+		else
+			reg |= PORT_CONTROL_DSA_TAG;
+		reg |= PORT_CONTROL_EGRESS_ADD_TAG |
+			PORT_CONTROL_FORWARD_UNKNOWN;
 	}
 	if (dsa_is_dsa_port(ds, port)) {
 		if (mv88e6xxx_6095_family(chip) ||
@@ -2526,26 +2533,25 @@
 				PORT_CONTROL_FORWARD_UNKNOWN_MC;
 	}
 	if (reg) {
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_CONTROL, reg);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg);
+		if (err)
+			return err;
 	}
 
 	/* If this port is connected to a SerDes, make sure the SerDes is not
 	 * powered down.
 	 */
-	if (mv88e6xxx_6352_family(chip)) {
-		ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS);
-		if (ret < 0)
-			return ret;
-		ret &= PORT_STATUS_CMODE_MASK;
-		if ((ret == PORT_STATUS_CMODE_100BASE_X) ||
-		    (ret == PORT_STATUS_CMODE_1000BASE_X) ||
-		    (ret == PORT_STATUS_CMODE_SGMII)) {
-			ret = mv88e6xxx_power_on_serdes(chip);
-			if (ret < 0)
-				return ret;
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) {
+		err = mv88e6xxx_port_read(chip, port, PORT_STATUS, &reg);
+		if (err)
+			return err;
+		reg &= PORT_STATUS_CMODE_MASK;
+		if ((reg == PORT_STATUS_CMODE_100BASE_X) ||
+		    (reg == PORT_STATUS_CMODE_1000BASE_X) ||
+		    (reg == PORT_STATUS_CMODE_SGMII)) {
+			err = mv88e6xxx_serdes_power_on(chip);
+			if (err < 0)
+				return err;
 		}
 	}
 
@@ -2579,10 +2585,9 @@
 	reg |= PORT_CONTROL_2_8021Q_DISABLED;
 
 	if (reg) {
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_CONTROL_2, reg);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg);
+		if (err)
+			return err;
 	}
 
 	/* Port Association Vector: when learning source addresses
@@ -2595,16 +2600,14 @@
 	if (dsa_is_cpu_port(ds, port))
 		reg = 0;
 
-	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR,
-				   reg);
-	if (ret)
-		return ret;
+	err = mv88e6xxx_port_write(chip, port, PORT_ASSOC_VECTOR, reg);
+	if (err)
+		return err;
 
 	/* Egress rate control 2: disable egress rate control. */
-	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2,
-				   0x0000);
-	if (ret)
-		return ret;
+	err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL_2, 0x0000);
+	if (err)
+		return err;
 
 	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
 	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
@@ -2613,107 +2616,108 @@
 		 * be paused for by the remote end or the period of
 		 * time that this port can pause the remote end.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_PAUSE_CTRL, 0x0000);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_PAUSE_CTRL, 0x0000);
+		if (err)
+			return err;
 
 		/* Port ATU control: disable limiting the number of
 		 * address database entries that this port is allowed
 		 * to use.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_ATU_CONTROL, 0x0000);
+		err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL,
+					   0x0000);
 		/* Priority Override: disable DA, SA and VTU priority
 		 * override.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_PRI_OVERRIDE, 0x0000);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE,
+					   0x0000);
+		if (err)
+			return err;
 
 		/* Port Ethertype: use the Ethertype DSA Ethertype
 		 * value.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_ETH_TYPE, ETH_P_EDSA);
-		if (ret)
-			return ret;
+		if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) {
+			err = mv88e6xxx_port_write(chip, port, PORT_ETH_TYPE,
+						   ETH_P_EDSA);
+			if (err)
+				return err;
+		}
+
 		/* Tag Remap: use an identity 802.1p prio -> switch
 		 * prio mapping.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_TAG_REGMAP_0123, 0x3210);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_0123,
+					   0x3210);
+		if (err)
+			return err;
 
 		/* Tag Remap 2: use an identity 802.1p prio -> switch
 		 * prio mapping.
 		 */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_TAG_REGMAP_4567, 0x7654);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_4567,
+					   0x7654);
+		if (err)
+			return err;
 	}
 
+	/* Rate Control: disable ingress rate limiting. */
 	if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) ||
 	    mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) ||
-	    mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) ||
 	    mv88e6xxx_6320_family(chip)) {
-		/* Rate Control: disable ingress rate limiting. */
-		ret = _mv88e6xxx_reg_write(chip, REG_PORT(port),
-					   PORT_RATE_CONTROL, 0x0001);
-		if (ret)
-			return ret;
+		err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL,
+					   0x0001);
+		if (err)
+			return err;
+	} else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) {
+		err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL,
+					   0x0000);
+		if (err)
+			return err;
 	}
 
 	/* Port Control 1: disable trunking, disable sending
 	 * learning messages to this port.
 	 */
-	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1,
-				   0x0000);
-	if (ret)
-		return ret;
+	err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000);
+	if (err)
+		return err;
 
 	/* Port based VLAN map: give each port the same default address
 	 * database, and allow bidirectional communication between the
 	 * CPU and DSA port(s), and the other ports.
 	 */
-	ret = _mv88e6xxx_port_fid_set(chip, port, 0);
-	if (ret)
-		return ret;
+	err = _mv88e6xxx_port_fid_set(chip, port, 0);
+	if (err)
+		return err;
 
-	ret = _mv88e6xxx_port_based_vlan_map(chip, port);
-	if (ret)
-		return ret;
+	err = _mv88e6xxx_port_based_vlan_map(chip, port);
+	if (err)
+		return err;
 
 	/* Default VLAN ID and priority: don't set a default VLAN
 	 * ID, and set the default packet priority to zero.
 	 */
-	ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN,
-				   0x0000);
-	if (ret)
-		return ret;
-
-	return 0;
+	return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000);
 }
 
-static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
+int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
 {
 	int err;
 
-	err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_01,
-			      (addr[0] << 8) | addr[1]);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]);
 	if (err)
 		return err;
 
-	err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_23,
-			      (addr[2] << 8) | addr[3]);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]);
 	if (err)
 		return err;
 
-	return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_45,
-			       (addr[4] << 8) | addr[5]);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]);
+	if (err)
+		return err;
+
+	return 0;
 }
 
 static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip,
@@ -2732,7 +2736,7 @@
 	/* Round to nearest multiple of coeff */
 	age_time = (msecs + coeff / 2) / coeff;
 
-	err = mv88e6xxx_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, &val);
+	err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val);
 	if (err)
 		return err;
 
@@ -2740,13 +2744,13 @@
 	val &= ~0xff0;
 	val |= age_time << 4;
 
-	return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, val);
+	return mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, val);
 }
 
 static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds,
 				     unsigned int ageing_time)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
 	mutex_lock(&chip->reg_lock);
@@ -2771,7 +2775,7 @@
 	    mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE))
 		reg |= GLOBAL_CONTROL_PPU_ENABLE;
 
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, reg);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, reg);
 	if (err)
 		return err;
 
@@ -2781,15 +2785,14 @@
 	reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT |
 		upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT;
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_MONITOR_CONTROL,
-				   reg);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_MONITOR_CONTROL, reg);
 	if (err)
 		return err;
 
 	/* Disable remote management, and set the switch's DSA device number. */
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL_2,
-				   GLOBAL_CONTROL_2_MULTIPLE_CASCADE |
-				   (ds->index & 0x1f));
+	err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL_2,
+				 GLOBAL_CONTROL_2_MULTIPLE_CASCADE |
+				 (ds->index & 0x1f));
 	if (err)
 		return err;
 
@@ -2802,8 +2805,8 @@
 	 * enable address learn messages to be sent to all message
 	 * ports.
 	 */
-	err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL,
-			      GLOBAL_ATU_CONTROL_LEARN2ALL);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL,
+				 GLOBAL_ATU_CONTROL_LEARN2ALL);
 	if (err)
 		return err;
 
@@ -2817,39 +2820,39 @@
 		return err;
 
 	/* Configure the IP ToS mapping registers. */
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_0, 0x0000);
 	if (err)
 		return err;
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_1, 0x0000);
 	if (err)
 		return err;
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_2, 0x5555);
 	if (err)
 		return err;
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_3, 0x5555);
 	if (err)
 		return err;
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_4, 0xaaaa);
 	if (err)
 		return err;
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_5, 0xaaaa);
 	if (err)
 		return err;
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_6, 0xffff);
 	if (err)
 		return err;
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_7, 0xffff);
 	if (err)
 		return err;
 
 	/* Configure the IEEE 802.1p priority mapping register. */
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_IEEE_PRI, 0xfa41);
 	if (err)
 		return err;
 
 	/* Clear the statistics counters for all ports */
-	err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP,
-				   GLOBAL_STATS_OP_FLUSH_ALL);
+	err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP,
+				 GLOBAL_STATS_OP_FLUSH_ALL);
 	if (err)
 		return err;
 
@@ -2861,277 +2864,9 @@
 	return 0;
 }
 
-static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
-					     int target, int port)
-{
-	u16 val = (target << 8) | (port & 0xf);
-
-	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val);
-}
-
-static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
-{
-	int target, port;
-	int err;
-
-	/* Initialize the routing port to the 32 possible target devices */
-	for (target = 0; target < 32; ++target) {
-		port = 0xf;
-
-		if (target < DSA_MAX_SWITCHES) {
-			port = chip->ds->rtable[target];
-			if (port == DSA_RTABLE_NONE)
-				port = 0xf;
-		}
-
-		err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
-					 bool hask, u16 mask)
-{
-	const u16 port_mask = BIT(chip->info->num_ports) - 1;
-	u16 val = (num << 12) | (mask & port_mask);
-
-	if (hask)
-		val |= GLOBAL2_TRUNK_MASK_HASK;
-
-	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val);
-}
-
-static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
-					    u16 map)
-{
-	const u16 port_mask = BIT(chip->info->num_ports) - 1;
-	u16 val = (id << 11) | (map & port_mask);
-
-	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val);
-}
-
-static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
-{
-	const u16 port_mask = BIT(chip->info->num_ports) - 1;
-	int i, err;
-
-	/* Clear all eight possible Trunk Mask vectors */
-	for (i = 0; i < 8; ++i) {
-		err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask);
-		if (err)
-			return err;
-	}
-
-	/* Clear all sixteen possible Trunk ID routing vectors */
-	for (i = 0; i < 16; ++i) {
-		err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
-{
-	int port, err;
-
-	/* Init all Ingress Rate Limit resources of all ports */
-	for (port = 0; port < chip->info->num_ports; ++port) {
-		/* XXX newer chips (like 88E6390) have different 2-bit ops */
-		err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
-				      GLOBAL2_IRL_CMD_OP_INIT_ALL |
-				      (port << 8));
-		if (err)
-			break;
-
-		/* Wait for the operation to complete */
-		err = _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD,
-				      GLOBAL2_IRL_CMD_BUSY);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-/* Indirect write to the Switch MAC/WoL/WoF register */
-static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
-					 unsigned int pointer, u8 data)
-{
-	u16 val = (pointer << 8) | data;
-
-	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val);
-}
-
-static int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
-{
-	int i, err;
-
-	for (i = 0; i < 6; i++) {
-		err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
-				  u8 data)
-{
-	u16 val = (pointer << 8) | (data & 0x7);
-
-	return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val);
-}
-
-static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
-{
-	int i, err;
-
-	/* Clear all sixteen possible Priority Override entries */
-	for (i = 0; i < 16; i++) {
-		err = mv88e6xxx_g2_pot_write(chip, i, 0);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
-{
-	return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD,
-			       GLOBAL2_EEPROM_CMD_BUSY |
-			       GLOBAL2_EEPROM_CMD_RUNNING);
-}
-
-static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
-{
-	int err;
-
-	err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd);
-	if (err)
-		return err;
-
-	return mv88e6xxx_g2_eeprom_wait(chip);
-}
-
-static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip,
-				      u8 addr, u16 *data)
-{
-	u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr;
-	int err;
-
-	err = mv88e6xxx_g2_eeprom_wait(chip);
-	if (err)
-		return err;
-
-	err = mv88e6xxx_g2_eeprom_cmd(chip, cmd);
-	if (err)
-		return err;
-
-	return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
-}
-
-static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip,
-				       u8 addr, u16 data)
-{
-	u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr;
-	int err;
-
-	err = mv88e6xxx_g2_eeprom_wait(chip);
-	if (err)
-		return err;
-
-	err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data);
-	if (err)
-		return err;
-
-	return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
-}
-
-static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
-{
-	u16 reg;
-	int err;
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
-		/* Consider the frames with reserved multicast destination
-		 * addresses matching 01:80:c2:00:00:2x as MGMT.
-		 */
-		err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X,
-				      0xffff);
-		if (err)
-			return err;
-	}
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) {
-		/* Consider the frames with reserved multicast destination
-		 * addresses matching 01:80:c2:00:00:0x as MGMT.
-		 */
-		err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X,
-				      0xffff);
-		if (err)
-			return err;
-	}
-
-	/* Ignore removed tag data on doubly tagged packets, disable
-	 * flow control messages, force flow control priority to the
-	 * highest, and send all special multicast frames to the CPU
-	 * port at the highest priority.
-	 */
-	reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4);
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
-	    mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
-		reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7;
-	err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg);
-	if (err)
-		return err;
-
-	/* Program the DSA routing table. */
-	err = mv88e6xxx_g2_set_device_mapping(chip);
-	if (err)
-		return err;
-
-	/* Clear all trunk masks and mapping. */
-	err = mv88e6xxx_g2_clear_trunk(chip);
-	if (err)
-		return err;
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) {
-		/* Disable ingress rate limiting by resetting all per port
-		 * ingress rate limit resources to their initial state.
-		 */
-		err = mv88e6xxx_g2_clear_irl(chip);
-			if (err)
-				return err;
-	}
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) {
-		/* Initialize Cross-chip Port VLAN Table to reset defaults */
-		err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR,
-				      GLOBAL2_PVT_ADDR_OP_INIT_ONES);
-		if (err)
-			return err;
-	}
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
-		/* Clear the priority override table. */
-		err = mv88e6xxx_g2_clear_pot(chip);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
 static int mv88e6xxx_setup(struct dsa_switch *ds)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 	int i;
 
@@ -3145,7 +2880,7 @@
 		goto unlock;
 
 	/* Setup Switch Port Registers */
-	for (i = 0; i < chip->info->num_ports; i++) {
+	for (i = 0; i < mv88e6xxx_num_ports(chip); i++) {
 		err = mv88e6xxx_setup_port(chip, i);
 		if (err)
 			goto unlock;
@@ -3171,98 +2906,48 @@
 
 static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
+	if (!chip->info->ops->set_switch_mac)
+		return -EOPNOTSUPP;
+
 	mutex_lock(&chip->reg_lock);
-
-	/* Has an indirect Switch MAC/WoL/WoF register in Global 2? */
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_SWITCH_MAC))
-		err = mv88e6xxx_g2_set_switch_mac(chip, addr);
-	else
-		err = mv88e6xxx_g1_set_switch_mac(chip, addr);
-
+	err = chip->info->ops->set_switch_mac(chip, addr);
 	mutex_unlock(&chip->reg_lock);
 
 	return err;
 }
 
-static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page,
-				    int reg)
-{
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int ret;
-
-	mutex_lock(&chip->reg_lock);
-	ret = _mv88e6xxx_mdio_page_read(chip, port, page, reg);
-	mutex_unlock(&chip->reg_lock);
-
-	return ret;
-}
-
-static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page,
-				     int reg, int val)
-{
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
-	int ret;
-
-	mutex_lock(&chip->reg_lock);
-	ret = _mv88e6xxx_mdio_page_write(chip, port, page, reg, val);
-	mutex_unlock(&chip->reg_lock);
-
-	return ret;
-}
-
-static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port)
-{
-	if (port >= 0 && port < chip->info->num_ports)
-		return port;
-	return -EINVAL;
-}
-
-static int mv88e6xxx_mdio_read(struct mii_bus *bus, int port, int regnum)
+static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
 {
 	struct mv88e6xxx_chip *chip = bus->priv;
-	int addr = mv88e6xxx_port_to_mdio_addr(chip, port);
-	int ret;
+	u16 val;
+	int err;
 
-	if (addr < 0)
+	if (phy >= mv88e6xxx_num_ports(chip))
 		return 0xffff;
 
 	mutex_lock(&chip->reg_lock);
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-		ret = mv88e6xxx_mdio_read_ppu(chip, addr, regnum);
-	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY))
-		ret = mv88e6xxx_mdio_read_indirect(chip, addr, regnum);
-	else
-		ret = mv88e6xxx_mdio_read_direct(chip, addr, regnum);
-
+	err = mv88e6xxx_phy_read(chip, phy, reg, &val);
 	mutex_unlock(&chip->reg_lock);
-	return ret;
+
+	return err ? err : val;
 }
 
-static int mv88e6xxx_mdio_write(struct mii_bus *bus, int port, int regnum,
-				u16 val)
+static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
 {
 	struct mv88e6xxx_chip *chip = bus->priv;
-	int addr = mv88e6xxx_port_to_mdio_addr(chip, port);
-	int ret;
+	int err;
 
-	if (addr < 0)
+	if (phy >= mv88e6xxx_num_ports(chip))
 		return 0xffff;
 
 	mutex_lock(&chip->reg_lock);
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-		ret = mv88e6xxx_mdio_write_ppu(chip, addr, regnum, val);
-	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY))
-		ret = mv88e6xxx_mdio_write_indirect(chip, addr, regnum, val);
-	else
-		ret = mv88e6xxx_mdio_write_direct(chip, addr, regnum, val);
-
+	err = mv88e6xxx_phy_write(chip, phy, reg, val);
 	mutex_unlock(&chip->reg_lock);
-	return ret;
+
+	return err;
 }
 
 static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip,
@@ -3272,9 +2957,6 @@
 	struct mii_bus *bus;
 	int err;
 
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
-		mv88e6xxx_ppu_state_init(chip);
-
 	if (np)
 		chip->mdio_np = of_get_child_by_name(np, "mdio");
 
@@ -3329,69 +3011,70 @@
 
 static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
+	u16 val;
 	int ret;
-	int val;
 
 	*temp = 0;
 
 	mutex_lock(&chip->reg_lock);
 
-	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x6);
+	ret = mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x6);
 	if (ret < 0)
 		goto error;
 
 	/* Enable temperature sensor */
-	ret = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a);
+	ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val);
 	if (ret < 0)
 		goto error;
 
-	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret | (1 << 5));
+	ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val | (1 << 5));
 	if (ret < 0)
 		goto error;
 
 	/* Wait for temperature to stabilize */
 	usleep_range(10000, 12000);
 
-	val = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a);
-	if (val < 0) {
-		ret = val;
+	ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val);
+	if (ret < 0)
 		goto error;
-	}
 
 	/* Disable temperature sensor */
-	ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret & ~(1 << 5));
+	ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val & ~(1 << 5));
 	if (ret < 0)
 		goto error;
 
 	*temp = ((val & 0x1f) - 5) * 5;
 
 error:
-	mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x0);
+	mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x0);
 	mutex_unlock(&chip->reg_lock);
 	return ret;
 }
 
 static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	u16 val;
 	int ret;
 
 	*temp = 0;
 
-	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 27);
+	mutex_lock(&chip->reg_lock);
+	ret = mv88e6xxx_phy_page_read(chip, phy, 6, 27, &val);
+	mutex_unlock(&chip->reg_lock);
 	if (ret < 0)
 		return ret;
 
-	*temp = (ret & 0xff) - 25;
+	*temp = (val & 0xff) - 25;
 
 	return 0;
 }
 
 static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP))
 		return -EOPNOTSUPP;
@@ -3404,8 +3087,9 @@
 
 static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	u16 val;
 	int ret;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
@@ -3413,36 +3097,45 @@
 
 	*temp = 0;
 
-	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+	mutex_lock(&chip->reg_lock);
+	ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+	mutex_unlock(&chip->reg_lock);
 	if (ret < 0)
 		return ret;
 
-	*temp = (((ret >> 8) & 0x1f) * 5) - 25;
+	*temp = (((val >> 8) & 0x1f) * 5) - 25;
 
 	return 0;
 }
 
 static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
-	int ret;
+	u16 val;
+	int err;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
 		return -EOPNOTSUPP;
 
-	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
-	if (ret < 0)
-		return ret;
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+	if (err)
+		goto unlock;
 	temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f);
-	return mv88e6xxx_mdio_page_write(ds, phy, 6, 26,
-					 (ret & 0xe0ff) | (temp << 8));
+	err = mv88e6xxx_phy_page_write(chip, phy, 6, 26,
+				       (val & 0xe0ff) | (temp << 8));
+unlock:
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
 }
 
 static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int phy = mv88e6xxx_6320_family(chip) ? 3 : 0;
+	u16 val;
 	int ret;
 
 	if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT))
@@ -3450,11 +3143,13 @@
 
 	*alarm = false;
 
-	ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26);
+	mutex_lock(&chip->reg_lock);
+	ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val);
+	mutex_unlock(&chip->reg_lock);
 	if (ret < 0)
 		return ret;
 
-	*alarm = !!(ret & 0x40);
+	*alarm = !!(val & 0x40);
 
 	return 0;
 }
@@ -3462,74 +3157,22 @@
 
 static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 
 	return chip->eeprom_len;
 }
 
-static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip,
-				  struct ethtool_eeprom *eeprom, u8 *data)
-{
-	unsigned int offset = eeprom->offset;
-	unsigned int len = eeprom->len;
-	u16 val;
-	int err;
-
-	eeprom->len = 0;
-
-	if (offset & 1) {
-		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-		if (err)
-			return err;
-
-		*data++ = (val >> 8) & 0xff;
-
-		offset++;
-		len--;
-		eeprom->len++;
-	}
-
-	while (len >= 2) {
-		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-		if (err)
-			return err;
-
-		*data++ = val & 0xff;
-		*data++ = (val >> 8) & 0xff;
-
-		offset += 2;
-		len -= 2;
-		eeprom->len += 2;
-	}
-
-	if (len) {
-		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-		if (err)
-			return err;
-
-		*data++ = val & 0xff;
-
-		offset++;
-		len--;
-		eeprom->len++;
-	}
-
-	return 0;
-}
-
 static int mv88e6xxx_get_eeprom(struct dsa_switch *ds,
 				struct ethtool_eeprom *eeprom, u8 *data)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
+	if (!chip->info->ops->get_eeprom)
+		return -EOPNOTSUPP;
+
 	mutex_lock(&chip->reg_lock);
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16))
-		err = mv88e6xxx_get_eeprom16(chip, eeprom, data);
-	else
-		err = -EOPNOTSUPP;
-
+	err = chip->info->ops->get_eeprom(chip, eeprom, data);
 	mutex_unlock(&chip->reg_lock);
 
 	if (err)
@@ -3540,93 +3183,139 @@
 	return 0;
 }
 
-static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip,
-				  struct ethtool_eeprom *eeprom, u8 *data)
-{
-	unsigned int offset = eeprom->offset;
-	unsigned int len = eeprom->len;
-	u16 val;
-	int err;
-
-	/* Ensure the RO WriteEn bit is set */
-	err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val);
-	if (err)
-		return err;
-
-	if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN))
-		return -EROFS;
-
-	eeprom->len = 0;
-
-	if (offset & 1) {
-		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-		if (err)
-			return err;
-
-		val = (*data++ << 8) | (val & 0xff);
-
-		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
-		if (err)
-			return err;
-
-		offset++;
-		len--;
-		eeprom->len++;
-	}
-
-	while (len >= 2) {
-		val = *data++;
-		val |= *data++ << 8;
-
-		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
-		if (err)
-			return err;
-
-		offset += 2;
-		len -= 2;
-		eeprom->len += 2;
-	}
-
-	if (len) {
-		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
-		if (err)
-			return err;
-
-		val = (val & 0xff00) | *data++;
-
-		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
-		if (err)
-			return err;
-
-		offset++;
-		len--;
-		eeprom->len++;
-	}
-
-	return 0;
-}
-
 static int mv88e6xxx_set_eeprom(struct dsa_switch *ds,
 				struct ethtool_eeprom *eeprom, u8 *data)
 {
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
+	if (!chip->info->ops->set_eeprom)
+		return -EOPNOTSUPP;
+
 	if (eeprom->magic != 0xc3ec4951)
 		return -EINVAL;
 
 	mutex_lock(&chip->reg_lock);
-
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16))
-		err = mv88e6xxx_set_eeprom16(chip, eeprom, data);
-	else
-		err = -EOPNOTSUPP;
-
+	err = chip->info->ops->set_eeprom(chip, eeprom, data);
 	mutex_unlock(&chip->reg_lock);
 
 	return err;
 }
 
+static const struct mv88e6xxx_ops mv88e6085_ops = {
+	.set_switch_mac = mv88e6xxx_g1_set_switch_mac,
+	.phy_read = mv88e6xxx_phy_ppu_read,
+	.phy_write = mv88e6xxx_phy_ppu_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6095_ops = {
+	.set_switch_mac = mv88e6xxx_g1_set_switch_mac,
+	.phy_read = mv88e6xxx_phy_ppu_read,
+	.phy_write = mv88e6xxx_phy_ppu_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6123_ops = {
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_read,
+	.phy_write = mv88e6xxx_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6131_ops = {
+	.set_switch_mac = mv88e6xxx_g1_set_switch_mac,
+	.phy_read = mv88e6xxx_phy_ppu_read,
+	.phy_write = mv88e6xxx_phy_ppu_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6161_ops = {
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_read,
+	.phy_write = mv88e6xxx_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6165_ops = {
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_read,
+	.phy_write = mv88e6xxx_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6171_ops = {
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6172_ops = {
+	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6175_ops = {
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6176_ops = {
+	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6185_ops = {
+	.set_switch_mac = mv88e6xxx_g1_set_switch_mac,
+	.phy_read = mv88e6xxx_phy_ppu_read,
+	.phy_write = mv88e6xxx_phy_ppu_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6240_ops = {
+	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6320_ops = {
+	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6321_ops = {
+	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6350_ops = {
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6351_ops = {
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
+static const struct mv88e6xxx_ops mv88e6352_ops = {
+	.get_eeprom = mv88e6xxx_g2_get_eeprom16,
+	.set_eeprom = mv88e6xxx_g2_set_eeprom16,
+	.set_switch_mac = mv88e6xxx_g2_set_switch_mac,
+	.phy_read = mv88e6xxx_g2_smi_phy_read,
+	.phy_write = mv88e6xxx_g2_smi_phy_write,
+};
+
 static const struct mv88e6xxx_info mv88e6xxx_table[] = {
 	[MV88E6085] = {
 		.prod_num = PORT_SWITCH_ID_PROD_NUM_6085,
@@ -3635,8 +3324,10 @@
 		.num_databases = 4096,
 		.num_ports = 10,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6097,
+		.ops = &mv88e6085_ops,
 	},
 
 	[MV88E6095] = {
@@ -3646,8 +3337,10 @@
 		.num_databases = 256,
 		.num_ports = 11,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6095,
+		.ops = &mv88e6095_ops,
 	},
 
 	[MV88E6123] = {
@@ -3657,8 +3350,10 @@
 		.num_databases = 4096,
 		.num_ports = 3,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
+		.ops = &mv88e6123_ops,
 	},
 
 	[MV88E6131] = {
@@ -3668,8 +3363,10 @@
 		.num_databases = 256,
 		.num_ports = 8,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
+		.ops = &mv88e6131_ops,
 	},
 
 	[MV88E6161] = {
@@ -3679,8 +3376,10 @@
 		.num_databases = 4096,
 		.num_ports = 6,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
+		.ops = &mv88e6161_ops,
 	},
 
 	[MV88E6165] = {
@@ -3690,8 +3389,10 @@
 		.num_databases = 4096,
 		.num_ports = 6,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6165,
+		.ops = &mv88e6165_ops,
 	},
 
 	[MV88E6171] = {
@@ -3701,8 +3402,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
+		.ops = &mv88e6171_ops,
 	},
 
 	[MV88E6172] = {
@@ -3712,8 +3415,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+		.ops = &mv88e6172_ops,
 	},
 
 	[MV88E6175] = {
@@ -3723,8 +3428,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
+		.ops = &mv88e6175_ops,
 	},
 
 	[MV88E6176] = {
@@ -3734,8 +3441,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+		.ops = &mv88e6176_ops,
 	},
 
 	[MV88E6185] = {
@@ -3745,8 +3454,10 @@
 		.num_databases = 256,
 		.num_ports = 10,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6185,
+		.ops = &mv88e6185_ops,
 	},
 
 	[MV88E6240] = {
@@ -3756,8 +3467,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+		.ops = &mv88e6240_ops,
 	},
 
 	[MV88E6320] = {
@@ -3767,8 +3480,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
+		.ops = &mv88e6320_ops,
 	},
 
 	[MV88E6321] = {
@@ -3778,8 +3493,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6320,
+		.ops = &mv88e6321_ops,
 	},
 
 	[MV88E6350] = {
@@ -3789,8 +3506,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
+		.ops = &mv88e6350_ops,
 	},
 
 	[MV88E6351] = {
@@ -3800,8 +3519,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6351,
+		.ops = &mv88e6351_ops,
 	},
 
 	[MV88E6352] = {
@@ -3811,8 +3532,10 @@
 		.num_databases = 4096,
 		.num_ports = 7,
 		.port_base_addr = 0x10,
+		.global1_addr = 0x1b,
 		.age_time_coeff = 15000,
 		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
+		.ops = &mv88e6352_ops,
 	},
 };
 
@@ -3850,6 +3573,10 @@
 	/* Update the compatible info with the probed one */
 	chip->info = info;
 
+	err = mv88e6xxx_g2_require(chip);
+	if (err)
+		return err;
+
 	dev_info(chip->dev, "switch 0x%x detected: %s, revision %u\n",
 		 chip->info->prod_num, chip->info->name, rev);
 
@@ -3871,6 +3598,18 @@
 	return chip;
 }
 
+static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip)
+{
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
+		mv88e6xxx_ppu_state_init(chip);
+}
+
+static void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip)
+{
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU))
+		mv88e6xxx_ppu_state_destroy(chip);
+}
+
 static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip,
 			      struct mii_bus *bus, int sw_addr)
 {
@@ -3880,7 +3619,7 @@
 
 	if (sw_addr == 0)
 		chip->smi_ops = &mv88e6xxx_smi_single_chip_ops;
-	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_MULTI_CHIP))
+	else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP))
 		chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops;
 	else
 		return -EINVAL;
@@ -3891,6 +3630,16 @@
 	return 0;
 }
 
+static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA))
+		return DSA_TAG_PROTO_EDSA;
+
+	return DSA_TAG_PROTO_DSA;
+}
+
 static const char *mv88e6xxx_drv_probe(struct device *dsa_dev,
 				       struct device *host_dev, int sw_addr,
 				       void **priv)
@@ -3918,6 +3667,8 @@
 	if (err)
 		goto free;
 
+	mv88e6xxx_phy_init(chip);
+
 	err = mv88e6xxx_mdio_register(chip, NULL);
 	if (err)
 		goto free;
@@ -3931,9 +3682,61 @@
 	return NULL;
 }
 
-static struct dsa_switch_driver mv88e6xxx_switch_driver = {
-	.tag_protocol		= DSA_TAG_PROTO_EDSA,
+static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port,
+				      const struct switchdev_obj_port_mdb *mdb,
+				      struct switchdev_trans *trans)
+{
+	/* We don't need any dynamic resource from the kernel (yet),
+	 * so skip the prepare phase.
+	 */
+
+	return 0;
+}
+
+static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port,
+				   const struct switchdev_obj_port_mdb *mdb,
+				   struct switchdev_trans *trans)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+
+	mutex_lock(&chip->reg_lock);
+	if (mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid,
+					 GLOBAL_ATU_DATA_STATE_MC_STATIC))
+		netdev_err(ds->ports[port].netdev, "failed to load multicast MAC address\n");
+	mutex_unlock(&chip->reg_lock);
+}
+
+static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port,
+				  const struct switchdev_obj_port_mdb *mdb)
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid,
+					   GLOBAL_ATU_DATA_STATE_UNUSED);
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port,
+				   struct switchdev_obj_port_mdb *mdb,
+				   int (*cb)(struct switchdev_obj *obj))
+{
+	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
+
+	mutex_lock(&chip->reg_lock);
+	err = mv88e6xxx_port_db_dump(chip, port, &mdb->obj, cb);
+	mutex_unlock(&chip->reg_lock);
+
+	return err;
+}
+
+static struct dsa_switch_ops mv88e6xxx_switch_ops = {
 	.probe			= mv88e6xxx_drv_probe,
+	.get_tag_protocol	= mv88e6xxx_get_tag_protocol,
 	.setup			= mv88e6xxx_setup,
 	.set_addr		= mv88e6xxx_set_addr,
 	.adjust_link		= mv88e6xxx_adjust_link,
@@ -3957,6 +3760,7 @@
 	.port_bridge_join	= mv88e6xxx_port_bridge_join,
 	.port_bridge_leave	= mv88e6xxx_port_bridge_leave,
 	.port_stp_state_set	= mv88e6xxx_port_stp_state_set,
+	.port_fast_age		= mv88e6xxx_port_fast_age,
 	.port_vlan_filtering	= mv88e6xxx_port_vlan_filtering,
 	.port_vlan_prepare	= mv88e6xxx_port_vlan_prepare,
 	.port_vlan_add		= mv88e6xxx_port_vlan_add,
@@ -3966,6 +3770,10 @@
 	.port_fdb_add           = mv88e6xxx_port_fdb_add,
 	.port_fdb_del           = mv88e6xxx_port_fdb_del,
 	.port_fdb_dump          = mv88e6xxx_port_fdb_dump,
+	.port_mdb_prepare       = mv88e6xxx_port_mdb_prepare,
+	.port_mdb_add           = mv88e6xxx_port_mdb_add,
+	.port_mdb_del           = mv88e6xxx_port_mdb_del,
+	.port_mdb_dump          = mv88e6xxx_port_mdb_dump,
 };
 
 static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip,
@@ -3980,7 +3788,7 @@
 
 	ds->dev = dev;
 	ds->priv = chip;
-	ds->drv = &mv88e6xxx_switch_driver;
+	ds->ops = &mv88e6xxx_switch_ops;
 
 	dev_set_drvdata(dev, ds);
 
@@ -4019,11 +3827,13 @@
 	if (err)
 		return err;
 
+	mv88e6xxx_phy_init(chip);
+
 	chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS);
 	if (IS_ERR(chip->reset))
 		return PTR_ERR(chip->reset);
 
-	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16) &&
+	if (chip->info->ops->get_eeprom &&
 	    !of_property_read_u32(np, "eeprom-length", &eeprom_len))
 		chip->eeprom_len = eeprom_len;
 
@@ -4043,8 +3853,9 @@
 static void mv88e6xxx_remove(struct mdio_device *mdiodev)
 {
 	struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev);
-	struct mv88e6xxx_chip *chip = ds_to_priv(ds);
+	struct mv88e6xxx_chip *chip = ds->priv;
 
+	mv88e6xxx_phy_destroy(chip);
 	mv88e6xxx_unregister_switch(chip);
 	mv88e6xxx_mdio_unregister(chip);
 }
@@ -4070,7 +3881,7 @@
 
 static int __init mv88e6xxx_init(void)
 {
-	register_switch_driver(&mv88e6xxx_switch_driver);
+	register_switch_driver(&mv88e6xxx_switch_ops);
 	return mdio_driver_register(&mv88e6xxx_driver);
 }
 module_init(mv88e6xxx_init);
@@ -4078,7 +3889,7 @@
 static void __exit mv88e6xxx_cleanup(void)
 {
 	mdio_driver_unregister(&mv88e6xxx_driver);
-	unregister_switch_driver(&mv88e6xxx_switch_driver);
+	unregister_switch_driver(&mv88e6xxx_switch_ops);
 }
 module_exit(mv88e6xxx_cleanup);
 
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
new file mode 100644
index 0000000..d358720
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -0,0 +1,34 @@
+/*
+ * Marvell 88E6xxx Switch Global (1) Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "mv88e6xxx.h"
+#include "global1.h"
+
+int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+	int addr = chip->info->global1_addr;
+
+	return mv88e6xxx_read(chip, addr, reg, val);
+}
+
+int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+	int addr = chip->info->global1_addr;
+
+	return mv88e6xxx_write(chip, addr, reg, val);
+}
+
+int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+{
+	return mv88e6xxx_wait(chip, chip->info->global1_addr, reg, mask);
+}
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
new file mode 100644
index 0000000..62291e6
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -0,0 +1,23 @@
+/*
+ * Marvell 88E6xxx Switch Global (1) Registers support
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_GLOBAL1_H
+#define _MV88E6XXX_GLOBAL1_H
+
+#include "mv88e6xxx.h"
+
+int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val);
+int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val);
+int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask);
+
+#endif /* _MV88E6XXX_GLOBAL1_H */
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
new file mode 100644
index 0000000..cf686e7
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -0,0 +1,491 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C)
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "mv88e6xxx.h"
+#include "global2.h"
+
+#define ADDR_GLOBAL2	0x1c
+
+static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val)
+{
+	return mv88e6xxx_read(chip, ADDR_GLOBAL2, reg, val);
+}
+
+static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val)
+{
+	return mv88e6xxx_write(chip, ADDR_GLOBAL2, reg, val);
+}
+
+static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update)
+{
+	return mv88e6xxx_update(chip, ADDR_GLOBAL2, reg, update);
+}
+
+static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask)
+{
+	return mv88e6xxx_wait(chip, ADDR_GLOBAL2, reg, mask);
+}
+
+/* Offset 0x06: Device Mapping Table register */
+
+static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip,
+					     int target, int port)
+{
+	u16 val = (target << 8) | (port & 0xf);
+
+	return mv88e6xxx_g2_update(chip, GLOBAL2_DEVICE_MAPPING, val);
+}
+
+static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip)
+{
+	int target, port;
+	int err;
+
+	/* Initialize the routing port to the 32 possible target devices */
+	for (target = 0; target < 32; ++target) {
+		port = 0xf;
+
+		if (target < DSA_MAX_SWITCHES) {
+			port = chip->ds->rtable[target];
+			if (port == DSA_RTABLE_NONE)
+				port = 0xf;
+		}
+
+		err = mv88e6xxx_g2_device_mapping_write(chip, target, port);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/* Offset 0x07: Trunk Mask Table register */
+
+static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num,
+					 bool hask, u16 mask)
+{
+	const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1;
+	u16 val = (num << 12) | (mask & port_mask);
+
+	if (hask)
+		val |= GLOBAL2_TRUNK_MASK_HASK;
+
+	return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MASK, val);
+}
+
+/* Offset 0x08: Trunk Mapping Table register */
+
+static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id,
+					    u16 map)
+{
+	const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1;
+	u16 val = (id << 11) | (map & port_mask);
+
+	return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MAPPING, val);
+}
+
+static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip)
+{
+	const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1;
+	int i, err;
+
+	/* Clear all eight possible Trunk Mask vectors */
+	for (i = 0; i < 8; ++i) {
+		err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask);
+		if (err)
+			return err;
+	}
+
+	/* Clear all sixteen possible Trunk ID routing vectors */
+	for (i = 0; i < 16; ++i) {
+		err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/* Offset 0x09: Ingress Rate Command register
+ * Offset 0x0A: Ingress Rate Data register
+ */
+
+static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip)
+{
+	int port, err;
+
+	/* Init all Ingress Rate Limit resources of all ports */
+	for (port = 0; port < mv88e6xxx_num_ports(chip); ++port) {
+		/* XXX newer chips (like 88E6390) have different 2-bit ops */
+		err = mv88e6xxx_g2_write(chip, GLOBAL2_IRL_CMD,
+					 GLOBAL2_IRL_CMD_OP_INIT_ALL |
+					 (port << 8));
+		if (err)
+			break;
+
+		/* Wait for the operation to complete */
+		err = mv88e6xxx_g2_wait(chip, GLOBAL2_IRL_CMD,
+					GLOBAL2_IRL_CMD_BUSY);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/* Offset 0x0D: Switch MAC/WoL/WoF register */
+
+static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip,
+					 unsigned int pointer, u8 data)
+{
+	u16 val = (pointer << 8) | data;
+
+	return mv88e6xxx_g2_update(chip, GLOBAL2_SWITCH_MAC, val);
+}
+
+int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr)
+{
+	int i, err;
+
+	for (i = 0; i < 6; i++) {
+		err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/* Offset 0x0F: Priority Override Table */
+
+static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer,
+				  u8 data)
+{
+	u16 val = (pointer << 8) | (data & 0x7);
+
+	return mv88e6xxx_g2_update(chip, GLOBAL2_PRIO_OVERRIDE, val);
+}
+
+static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip)
+{
+	int i, err;
+
+	/* Clear all sixteen possible Priority Override entries */
+	for (i = 0; i < 16; i++) {
+		err = mv88e6xxx_g2_pot_write(chip, i, 0);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/* Offset 0x14: EEPROM Command
+ * Offset 0x15: EEPROM Data
+ */
+
+static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip)
+{
+	return mv88e6xxx_g2_wait(chip, GLOBAL2_EEPROM_CMD,
+				 GLOBAL2_EEPROM_CMD_BUSY |
+				 GLOBAL2_EEPROM_CMD_RUNNING);
+}
+
+static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+	int err;
+
+	err = mv88e6xxx_g2_write(chip, GLOBAL2_EEPROM_CMD, cmd);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_eeprom_wait(chip);
+}
+
+static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip,
+				      u8 addr, u16 *data)
+{
+	u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr;
+	int err;
+
+	err = mv88e6xxx_g2_eeprom_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_eeprom_cmd(chip, cmd);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_read(chip, GLOBAL2_EEPROM_DATA, data);
+}
+
+static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip,
+				       u8 addr, u16 data)
+{
+	u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr;
+	int err;
+
+	err = mv88e6xxx_g2_eeprom_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_write(chip, GLOBAL2_EEPROM_DATA, data);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_eeprom_cmd(chip, cmd);
+}
+
+int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+			      struct ethtool_eeprom *eeprom, u8 *data)
+{
+	unsigned int offset = eeprom->offset;
+	unsigned int len = eeprom->len;
+	u16 val;
+	int err;
+
+	eeprom->len = 0;
+
+	if (offset & 1) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		*data++ = (val >> 8) & 0xff;
+
+		offset++;
+		len--;
+		eeprom->len++;
+	}
+
+	while (len >= 2) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		*data++ = val & 0xff;
+		*data++ = (val >> 8) & 0xff;
+
+		offset += 2;
+		len -= 2;
+		eeprom->len += 2;
+	}
+
+	if (len) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		*data++ = val & 0xff;
+
+		offset++;
+		len--;
+		eeprom->len++;
+	}
+
+	return 0;
+}
+
+int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+			      struct ethtool_eeprom *eeprom, u8 *data)
+{
+	unsigned int offset = eeprom->offset;
+	unsigned int len = eeprom->len;
+	u16 val;
+	int err;
+
+	/* Ensure the RO WriteEn bit is set */
+	err = mv88e6xxx_g2_read(chip, GLOBAL2_EEPROM_CMD, &val);
+	if (err)
+		return err;
+
+	if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN))
+		return -EROFS;
+
+	eeprom->len = 0;
+
+	if (offset & 1) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		val = (*data++ << 8) | (val & 0xff);
+
+		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+		if (err)
+			return err;
+
+		offset++;
+		len--;
+		eeprom->len++;
+	}
+
+	while (len >= 2) {
+		val = *data++;
+		val |= *data++ << 8;
+
+		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+		if (err)
+			return err;
+
+		offset += 2;
+		len -= 2;
+		eeprom->len += 2;
+	}
+
+	if (len) {
+		err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val);
+		if (err)
+			return err;
+
+		val = (val & 0xff00) | *data++;
+
+		err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val);
+		if (err)
+			return err;
+
+		offset++;
+		len--;
+		eeprom->len++;
+	}
+
+	return 0;
+}
+
+/* Offset 0x18: SMI PHY Command Register
+ * Offset 0x19: SMI PHY Data Register
+ */
+
+static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip)
+{
+	return mv88e6xxx_g2_wait(chip, GLOBAL2_SMI_PHY_CMD,
+				 GLOBAL2_SMI_PHY_CMD_BUSY);
+}
+
+static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd)
+{
+	int err;
+
+	err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_CMD, cmd);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_smi_phy_wait(chip);
+}
+
+int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg,
+			      u16 *val)
+{
+	u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg;
+	int err;
+
+	err = mv88e6xxx_g2_smi_phy_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_read(chip, GLOBAL2_SMI_PHY_DATA, val);
+}
+
+int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg,
+			       u16 val)
+{
+	u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg;
+	int err;
+
+	err = mv88e6xxx_g2_smi_phy_wait(chip);
+	if (err)
+		return err;
+
+	err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_DATA, val);
+	if (err)
+		return err;
+
+	return mv88e6xxx_g2_smi_phy_cmd(chip, cmd);
+}
+
+int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
+{
+	u16 reg;
+	int err;
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) {
+		/* Consider the frames with reserved multicast destination
+		 * addresses matching 01:80:c2:00:00:2x as MGMT.
+		 */
+		err = mv88e6xxx_g2_write(chip, GLOBAL2_MGMT_EN_2X, 0xffff);
+		if (err)
+			return err;
+	}
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) {
+		/* Consider the frames with reserved multicast destination
+		 * addresses matching 01:80:c2:00:00:0x as MGMT.
+		 */
+		err = mv88e6xxx_g2_write(chip, GLOBAL2_MGMT_EN_0X, 0xffff);
+		if (err)
+			return err;
+	}
+
+	/* Ignore removed tag data on doubly tagged packets, disable
+	 * flow control messages, force flow control priority to the
+	 * highest, and send all special multicast frames to the CPU
+	 * port at the highest priority.
+	 */
+	reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4);
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) ||
+	    mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X))
+		reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7;
+	err = mv88e6xxx_g2_write(chip, GLOBAL2_SWITCH_MGMT, reg);
+	if (err)
+		return err;
+
+	/* Program the DSA routing table. */
+	err = mv88e6xxx_g2_set_device_mapping(chip);
+	if (err)
+		return err;
+
+	/* Clear all trunk masks and mapping. */
+	err = mv88e6xxx_g2_clear_trunk(chip);
+	if (err)
+		return err;
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) {
+		/* Disable ingress rate limiting by resetting all per port
+		 * ingress rate limit resources to their initial state.
+		 */
+		err = mv88e6xxx_g2_clear_irl(chip);
+			if (err)
+				return err;
+	}
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) {
+		/* Initialize Cross-chip Port VLAN Table to reset defaults */
+		err = mv88e6xxx_g2_write(chip, GLOBAL2_PVT_ADDR,
+					 GLOBAL2_PVT_ADDR_OP_INIT_ONES);
+		if (err)
+			return err;
+	}
+
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) {
+		/* Clear the priority override table. */
+		err = mv88e6xxx_g2_clear_pot(chip);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h
new file mode 100644
index 0000000..c4bb903
--- /dev/null
+++ b/drivers/net/dsa/mv88e6xxx/global2.h
@@ -0,0 +1,88 @@
+/*
+ * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C)
+ *
+ * Copyright (c) 2008 Marvell Semiconductor
+ *
+ * Copyright (c) 2016 Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _MV88E6XXX_GLOBAL2_H
+#define _MV88E6XXX_GLOBAL2_H
+
+#include "mv88e6xxx.h"
+
+#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2
+
+static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
+{
+	return 0;
+}
+
+int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg,
+			      u16 *val);
+int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg,
+			       u16 val);
+int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr);
+int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+			      struct ethtool_eeprom *eeprom, u8 *data);
+int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+			      struct ethtool_eeprom *eeprom, u8 *data);
+int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip);
+
+#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
+
+static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip)
+{
+	if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) {
+		dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static inline int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip,
+					    int addr, int reg, u16 *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip,
+					     int addr, int reg, u16 val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip,
+					      u8 *addr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip,
+					    struct ethtool_eeprom *eeprom,
+					    u8 *data)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip,
+					    struct ethtool_eeprom *eeprom,
+					    u8 *data)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */
+
+#endif /* _MV88E6XXX_GLOBAL2_H */
diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
index 48d6ea7..e572121 100644
--- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
+++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h
@@ -30,11 +30,13 @@
 #define SMI_CMD_OP_45_READ_DATA_INC	((3 << 10) | SMI_CMD_BUSY)
 #define SMI_DATA		0x01
 
-/* Fiber/SERDES Registers are located at SMI address F, page 1 */
-#define REG_FIBER_SERDES	0x0f
-#define PAGE_FIBER_SERDES	0x01
+/* PHY Registers */
+#define PHY_PAGE		0x16
+#define PHY_PAGE_COPPER		0x00
 
-#define REG_PORT(p)		(0x10 + (p))
+#define ADDR_SERDES		0x0f
+#define SERDES_PAGE_FIBER	0x01
+
 #define PORT_STATUS		0x00
 #define PORT_STATUS_PAUSE_EN	BIT(15)
 #define PORT_STATUS_MY_PAUSE	BIT(14)
@@ -157,7 +159,6 @@
 #define PORT_TAG_REGMAP_0123	0x18
 #define PORT_TAG_REGMAP_4567	0x19
 
-#define REG_GLOBAL		0x1b
 #define GLOBAL_STATUS		0x00
 #define GLOBAL_STATUS_PPU_STATE BIT(15) /* 6351 and 6171 */
 /* Two bits for 6165, 6185 etc */
@@ -169,8 +170,8 @@
 #define GLOBAL_MAC_01		0x01
 #define GLOBAL_MAC_23		0x02
 #define GLOBAL_MAC_45		0x03
-#define GLOBAL_ATU_FID		0x01	/* 6097 6165 6351 6352 */
-#define GLOBAL_VTU_FID		0x02	/* 6097 6165 6351 6352 */
+#define GLOBAL_ATU_FID		0x01
+#define GLOBAL_VTU_FID		0x02
 #define GLOBAL_VTU_FID_MASK	0xfff
 #define GLOBAL_VTU_SID		0x03	/* 6097 6165 6351 6352 */
 #define GLOBAL_VTU_SID_MASK	0x3f
@@ -275,7 +276,6 @@
 #define GLOBAL_STATS_COUNTER_32	0x1e
 #define GLOBAL_STATS_COUNTER_01	0x1f
 
-#define REG_GLOBAL2		0x1c
 #define GLOBAL2_INT_SOURCE	0x00
 #define GLOBAL2_INT_MASK	0x01
 #define GLOBAL2_MGMT_EN_2X	0x02
@@ -329,17 +329,16 @@
 #define GLOBAL2_EEPROM_DATA	0x15
 #define GLOBAL2_PTP_AVB_OP	0x16
 #define GLOBAL2_PTP_AVB_DATA	0x17
-#define GLOBAL2_SMI_OP		0x18
-#define GLOBAL2_SMI_OP_BUSY		BIT(15)
-#define GLOBAL2_SMI_OP_CLAUSE_22	BIT(12)
-#define GLOBAL2_SMI_OP_22_WRITE		((1 << 10) | GLOBAL2_SMI_OP_BUSY | \
-					 GLOBAL2_SMI_OP_CLAUSE_22)
-#define GLOBAL2_SMI_OP_22_READ		((2 << 10) | GLOBAL2_SMI_OP_BUSY | \
-					 GLOBAL2_SMI_OP_CLAUSE_22)
-#define GLOBAL2_SMI_OP_45_WRITE_ADDR	((0 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_OP_45_WRITE_DATA	((1 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_OP_45_READ_DATA	((2 << 10) | GLOBAL2_SMI_OP_BUSY)
-#define GLOBAL2_SMI_DATA	0x19
+#define GLOBAL2_SMI_PHY_CMD			0x18
+#define GLOBAL2_SMI_PHY_CMD_BUSY		BIT(15)
+#define GLOBAL2_SMI_PHY_CMD_MODE_22		BIT(12)
+#define GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA	((0x1 << 10) | \
+						 GLOBAL2_SMI_PHY_CMD_MODE_22 | \
+						 GLOBAL2_SMI_PHY_CMD_BUSY)
+#define GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA	((0x2 << 10) | \
+						 GLOBAL2_SMI_PHY_CMD_MODE_22 | \
+						 GLOBAL2_SMI_PHY_CMD_BUSY)
+#define GLOBAL2_SMI_PHY_DATA			0x19
 #define GLOBAL2_SCRATCH_MISC	0x1a
 #define GLOBAL2_SCRATCH_BUSY		BIT(15)
 #define GLOBAL2_SCRATCH_REGISTER_SHIFT	8
@@ -384,10 +383,36 @@
 };
 
 enum mv88e6xxx_cap {
+	/* Two different tag protocols can be used by the driver. All
+	 * switches support DSA, but only later generations support
+	 * EDSA.
+	 */
+	MV88E6XXX_CAP_EDSA,
+
 	/* Energy Efficient Ethernet.
 	 */
 	MV88E6XXX_CAP_EEE,
 
+	/* Multi-chip Addressing Mode.
+	 * Some chips respond to only 2 registers of its own SMI device address
+	 * when it is non-zero, and use indirect access to internal registers.
+	 */
+	MV88E6XXX_CAP_SMI_CMD,		/* (0x00) SMI Command */
+	MV88E6XXX_CAP_SMI_DATA,		/* (0x01) SMI Data */
+
+	/* PHY Registers.
+	 */
+	MV88E6XXX_CAP_PHY_PAGE,		/* (0x16) Page Register */
+
+	/* Fiber/SERDES Registers (SMI address F).
+	 */
+	MV88E6XXX_CAP_SERDES,
+
+	/* Switch Global (1) Registers.
+	 */
+	MV88E6XXX_CAP_G1_ATU_FID,	/* (0x01) ATU FID Register */
+	MV88E6XXX_CAP_G1_VTU_FID,	/* (0x02) VTU FID Register */
+
 	/* Switch Global 2 Registers.
 	 * The device contains a second set of global 16-bit registers.
 	 */
@@ -398,16 +423,7 @@
 	MV88E6XXX_CAP_G2_IRL_DATA,	/* (0x0a) Ingress Rate Data */
 	MV88E6XXX_CAP_G2_PVT_ADDR,	/* (0x0b) Cross Chip Port VLAN Addr */
 	MV88E6XXX_CAP_G2_PVT_DATA,	/* (0x0c) Cross Chip Port VLAN Data */
-	MV88E6XXX_CAP_G2_SWITCH_MAC,	/* (0x0d) Switch MAC/WoL/WoF */
 	MV88E6XXX_CAP_G2_POT,		/* (0x0f) Priority Override Table */
-	MV88E6XXX_CAP_G2_EEPROM_CMD,	/* (0x14) EEPROM Command */
-	MV88E6XXX_CAP_G2_EEPROM_DATA,	/* (0x15) EEPROM Data */
-
-	/* Multi-chip Addressing Mode.
-	 * Some chips require an indirect SMI access when their SMI device
-	 * address is not zero. See SMI_CMD and SMI_DATA.
-	 */
-	MV88E6XXX_CAP_MULTI_CHIP,
 
 	/* PHY Polling Unit.
 	 * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING.
@@ -415,12 +431,6 @@
 	MV88E6XXX_CAP_PPU,
 	MV88E6XXX_CAP_PPU_ACTIVE,
 
-	/* SMI PHY Command and Data registers.
-	 * This requires an indirect access to PHY registers through
-	 * GLOBAL2_SMI_OP, otherwise direct access to PHY registers is done.
-	 */
-	MV88E6XXX_CAP_SMI_PHY,
-
 	/* Per VLAN Spanning Tree Unit (STU).
 	 * The Port State database, if present, is accessed through VTU
 	 * operations and dedicated SID registers. See GLOBAL_VTU_SID.
@@ -440,130 +450,148 @@
 };
 
 /* Bitmask of capabilities */
-#define MV88E6XXX_FLAG_EEE		BIT(MV88E6XXX_CAP_EEE)
-#define MV88E6XXX_FLAG_GLOBAL2		BIT(MV88E6XXX_CAP_GLOBAL2)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_2X	BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X)
-#define MV88E6XXX_FLAG_G2_MGMT_EN_0X	BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X)
-#define MV88E6XXX_FLAG_G2_IRL_CMD	BIT(MV88E6XXX_CAP_G2_IRL_CMD)
-#define MV88E6XXX_FLAG_G2_IRL_DATA	BIT(MV88E6XXX_CAP_G2_IRL_DATA)
-#define MV88E6XXX_FLAG_G2_PVT_ADDR	BIT(MV88E6XXX_CAP_G2_PVT_ADDR)
-#define MV88E6XXX_FLAG_G2_PVT_DATA	BIT(MV88E6XXX_CAP_G2_PVT_DATA)
-#define MV88E6XXX_FLAG_G2_SWITCH_MAC	BIT(MV88E6XXX_CAP_G2_SWITCH_MAC)
-#define MV88E6XXX_FLAG_G2_POT		BIT(MV88E6XXX_CAP_G2_POT)
-#define MV88E6XXX_FLAG_G2_EEPROM_CMD	BIT(MV88E6XXX_CAP_G2_EEPROM_CMD)
-#define MV88E6XXX_FLAG_G2_EEPROM_DATA	BIT(MV88E6XXX_CAP_G2_EEPROM_DATA)
-#define MV88E6XXX_FLAG_MULTI_CHIP	BIT(MV88E6XXX_CAP_MULTI_CHIP)
-#define MV88E6XXX_FLAG_PPU		BIT(MV88E6XXX_CAP_PPU)
-#define MV88E6XXX_FLAG_PPU_ACTIVE	BIT(MV88E6XXX_CAP_PPU_ACTIVE)
-#define MV88E6XXX_FLAG_SMI_PHY		BIT(MV88E6XXX_CAP_SMI_PHY)
-#define MV88E6XXX_FLAG_STU		BIT(MV88E6XXX_CAP_STU)
-#define MV88E6XXX_FLAG_TEMP		BIT(MV88E6XXX_CAP_TEMP)
-#define MV88E6XXX_FLAG_TEMP_LIMIT	BIT(MV88E6XXX_CAP_TEMP_LIMIT)
-#define MV88E6XXX_FLAG_VTU		BIT(MV88E6XXX_CAP_VTU)
+#define MV88E6XXX_FLAG_EDSA		BIT_ULL(MV88E6XXX_CAP_EDSA)
+#define MV88E6XXX_FLAG_EEE		BIT_ULL(MV88E6XXX_CAP_EEE)
 
-/* EEPROM Programming via Global2 with 16-bit data */
-#define MV88E6XXX_FLAGS_EEPROM16	\
-	(MV88E6XXX_FLAG_G2_EEPROM_CMD |	\
-	 MV88E6XXX_FLAG_G2_EEPROM_DATA)
+#define MV88E6XXX_FLAG_SMI_CMD		BIT_ULL(MV88E6XXX_CAP_SMI_CMD)
+#define MV88E6XXX_FLAG_SMI_DATA		BIT_ULL(MV88E6XXX_CAP_SMI_DATA)
+
+#define MV88E6XXX_FLAG_PHY_PAGE		BIT_ULL(MV88E6XXX_CAP_PHY_PAGE)
+
+#define MV88E6XXX_FLAG_SERDES		BIT_ULL(MV88E6XXX_CAP_SERDES)
+
+#define MV88E6XXX_FLAG_G1_ATU_FID	BIT_ULL(MV88E6XXX_CAP_G1_ATU_FID)
+#define MV88E6XXX_FLAG_G1_VTU_FID	BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID)
+
+#define MV88E6XXX_FLAG_GLOBAL2		BIT_ULL(MV88E6XXX_CAP_GLOBAL2)
+#define MV88E6XXX_FLAG_G2_MGMT_EN_2X	BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X)
+#define MV88E6XXX_FLAG_G2_MGMT_EN_0X	BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X)
+#define MV88E6XXX_FLAG_G2_IRL_CMD	BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD)
+#define MV88E6XXX_FLAG_G2_IRL_DATA	BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA)
+#define MV88E6XXX_FLAG_G2_PVT_ADDR	BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR)
+#define MV88E6XXX_FLAG_G2_PVT_DATA	BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA)
+#define MV88E6XXX_FLAG_G2_POT		BIT_ULL(MV88E6XXX_CAP_G2_POT)
+
+#define MV88E6XXX_FLAG_PPU		BIT_ULL(MV88E6XXX_CAP_PPU)
+#define MV88E6XXX_FLAG_PPU_ACTIVE	BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE)
+#define MV88E6XXX_FLAG_STU		BIT_ULL(MV88E6XXX_CAP_STU)
+#define MV88E6XXX_FLAG_TEMP		BIT_ULL(MV88E6XXX_CAP_TEMP)
+#define MV88E6XXX_FLAG_TEMP_LIMIT	BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT)
+#define MV88E6XXX_FLAG_VTU		BIT_ULL(MV88E6XXX_CAP_VTU)
 
 /* Ingress Rate Limit unit */
 #define MV88E6XXX_FLAGS_IRL		\
 	(MV88E6XXX_FLAG_G2_IRL_CMD |	\
 	 MV88E6XXX_FLAG_G2_IRL_DATA)
 
+/* Multi-chip Addressing Mode */
+#define MV88E6XXX_FLAGS_MULTI_CHIP	\
+	(MV88E6XXX_FLAG_SMI_CMD |	\
+	 MV88E6XXX_FLAG_SMI_DATA)
+
 /* Cross-chip Port VLAN Table */
 #define MV88E6XXX_FLAGS_PVT		\
 	(MV88E6XXX_FLAG_G2_PVT_ADDR |	\
 	 MV88E6XXX_FLAG_G2_PVT_DATA)
 
+/* Fiber/SERDES Registers at SMI address F, page 1 */
+#define MV88E6XXX_FLAGS_SERDES		\
+	(MV88E6XXX_FLAG_PHY_PAGE |	\
+	 MV88E6XXX_FLAG_SERDES)
+
 #define MV88E6XXX_FLAGS_FAMILY_6095	\
 	(MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
-	 MV88E6XXX_FLAG_VTU)
+	 MV88E6XXX_FLAG_VTU |		\
+	 MV88E6XXX_FLAGS_MULTI_CHIP)
 
 #define MV88E6XXX_FLAGS_FAMILY_6097	\
-	(MV88E6XXX_FLAG_GLOBAL2 |	\
+	(MV88E6XXX_FLAG_G1_ATU_FID |	\
+	 MV88E6XXX_FLAG_G1_VTU_FID |	\
+	 MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
 	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_VTU |		\
 	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
 	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6165	\
-	(MV88E6XXX_FLAG_GLOBAL2 |	\
+	(MV88E6XXX_FLAG_G1_ATU_FID |	\
+	 MV88E6XXX_FLAG_G1_VTU_FID |	\
+	 MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_VTU |		\
 	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
 	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6185	\
 	(MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU |		\
 	 MV88E6XXX_FLAG_VTU)
 
 #define MV88E6XXX_FLAGS_FAMILY_6320	\
-	(MV88E6XXX_FLAG_EEE |		\
+	(MV88E6XXX_FLAG_EDSA |		\
+	 MV88E6XXX_FLAG_EEE |		\
 	 MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
-	 MV88E6XXX_FLAG_SMI_PHY |	\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_TEMP_LIMIT |	\
 	 MV88E6XXX_FLAG_VTU |		\
-	 MV88E6XXX_FLAGS_EEPROM16 |	\
 	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
 	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6351	\
-	(MV88E6XXX_FLAG_GLOBAL2 |	\
+	(MV88E6XXX_FLAG_EDSA |		\
+	 MV88E6XXX_FLAG_G1_ATU_FID |	\
+	 MV88E6XXX_FLAG_G1_VTU_FID |	\
+	 MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
-	 MV88E6XXX_FLAG_SMI_PHY |	\
 	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_VTU |		\
 	 MV88E6XXX_FLAGS_IRL |		\
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
 	 MV88E6XXX_FLAGS_PVT)
 
 #define MV88E6XXX_FLAGS_FAMILY_6352	\
-	(MV88E6XXX_FLAG_EEE |		\
+	(MV88E6XXX_FLAG_EDSA |		\
+	 MV88E6XXX_FLAG_EEE |		\
+	 MV88E6XXX_FLAG_G1_ATU_FID |	\
+	 MV88E6XXX_FLAG_G1_VTU_FID |	\
 	 MV88E6XXX_FLAG_GLOBAL2 |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_2X |	\
 	 MV88E6XXX_FLAG_G2_MGMT_EN_0X |	\
-	 MV88E6XXX_FLAG_G2_SWITCH_MAC |	\
 	 MV88E6XXX_FLAG_G2_POT |	\
-	 MV88E6XXX_FLAG_MULTI_CHIP |	\
 	 MV88E6XXX_FLAG_PPU_ACTIVE |	\
-	 MV88E6XXX_FLAG_SMI_PHY |	\
 	 MV88E6XXX_FLAG_STU |		\
 	 MV88E6XXX_FLAG_TEMP |		\
 	 MV88E6XXX_FLAG_TEMP_LIMIT |	\
 	 MV88E6XXX_FLAG_VTU |		\
-	 MV88E6XXX_FLAGS_EEPROM16 |	\
 	 MV88E6XXX_FLAGS_IRL |		\
-	 MV88E6XXX_FLAGS_PVT)
+	 MV88E6XXX_FLAGS_MULTI_CHIP |	\
+	 MV88E6XXX_FLAGS_PVT |		\
+	 MV88E6XXX_FLAGS_SERDES)
+
+struct mv88e6xxx_ops;
 
 struct mv88e6xxx_info {
 	enum mv88e6xxx_family family;
@@ -572,8 +600,10 @@
 	unsigned int num_databases;
 	unsigned int num_ports;
 	unsigned int port_base_addr;
+	unsigned int global1_addr;
 	unsigned int age_time_coeff;
-	unsigned long flags;
+	unsigned long long flags;
+	const struct mv88e6xxx_ops *ops;
 };
 
 struct mv88e6xxx_atu_entry {
@@ -584,18 +614,15 @@
 	u8	mac[ETH_ALEN];
 };
 
-struct mv88e6xxx_vtu_stu_entry {
-	/* VTU only */
+struct mv88e6xxx_vtu_entry {
 	u16	vid;
 	u16	fid;
-
-	/* VTU and STU */
 	u8	sid;
 	bool	valid;
 	u8	data[DSA_MAX_PORTS];
 };
 
-struct mv88e6xxx_ops;
+struct mv88e6xxx_bus_ops;
 
 struct mv88e6xxx_priv_port {
 	struct net_device *bridge_dev;
@@ -616,13 +643,14 @@
 	/* The MII bus and the address on the bus that is used to
 	 * communication with the switch
 	 */
-	const struct mv88e6xxx_ops *smi_ops;
+	const struct mv88e6xxx_bus_ops *smi_ops;
 	struct mii_bus *bus;
 	int sw_addr;
 
 	/* Handles automatic disabling and re-enabling of the PHY
 	 * polling unit.
 	 */
+	const struct mv88e6xxx_bus_ops *phy_ops;
 	struct mutex		ppu_mutex;
 	int			ppu_disabled;
 	struct work_struct	ppu_work;
@@ -651,11 +679,25 @@
 	struct mii_bus *mdio_bus;
 };
 
-struct mv88e6xxx_ops {
+struct mv88e6xxx_bus_ops {
 	int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
 	int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
 };
 
+struct mv88e6xxx_ops {
+	int (*get_eeprom)(struct mv88e6xxx_chip *chip,
+			  struct ethtool_eeprom *eeprom, u8 *data);
+	int (*set_eeprom)(struct mv88e6xxx_chip *chip,
+			  struct ethtool_eeprom *eeprom, u8 *data);
+
+	int (*set_switch_mac)(struct mv88e6xxx_chip *chip, u8 *addr);
+
+	int (*phy_read)(struct mv88e6xxx_chip *chip, int addr, int reg,
+			u16 *val);
+	int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg,
+			 u16 val);
+};
+
 enum stat_type {
 	BANK0,
 	BANK1,
@@ -675,4 +717,20 @@
 	return (chip->info->flags & flags) == flags;
 }
 
+static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->num_databases;
+}
+
+static inline unsigned int mv88e6xxx_num_ports(struct mv88e6xxx_chip *chip)
+{
+	return chip->info->num_ports;
+}
+
+int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val);
+int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val);
+int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg,
+		     u16 update);
+int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask);
+
 #endif
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
new file mode 100644
index 0000000..b3df70d
--- /dev/null
+++ b/drivers/net/dsa/qca8k.c
@@ -0,0 +1,1040 @@
+/*
+ * Copyright (C) 2009 Felix Fietkau <nbd@nbd.name>
+ * Copyright (C) 2011-2012 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2016 John Crispin <john@phrozen.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+#include <net/dsa.h>
+#include <net/switchdev.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/if_bridge.h>
+#include <linux/mdio.h>
+#include <linux/etherdevice.h>
+
+#include "qca8k.h"
+
+#define MIB_DESC(_s, _o, _n)	\
+	{			\
+		.size = (_s),	\
+		.offset = (_o),	\
+		.name = (_n),	\
+	}
+
+static const struct qca8k_mib_desc ar8327_mib[] = {
+	MIB_DESC(1, 0x00, "RxBroad"),
+	MIB_DESC(1, 0x04, "RxPause"),
+	MIB_DESC(1, 0x08, "RxMulti"),
+	MIB_DESC(1, 0x0c, "RxFcsErr"),
+	MIB_DESC(1, 0x10, "RxAlignErr"),
+	MIB_DESC(1, 0x14, "RxRunt"),
+	MIB_DESC(1, 0x18, "RxFragment"),
+	MIB_DESC(1, 0x1c, "Rx64Byte"),
+	MIB_DESC(1, 0x20, "Rx128Byte"),
+	MIB_DESC(1, 0x24, "Rx256Byte"),
+	MIB_DESC(1, 0x28, "Rx512Byte"),
+	MIB_DESC(1, 0x2c, "Rx1024Byte"),
+	MIB_DESC(1, 0x30, "Rx1518Byte"),
+	MIB_DESC(1, 0x34, "RxMaxByte"),
+	MIB_DESC(1, 0x38, "RxTooLong"),
+	MIB_DESC(2, 0x3c, "RxGoodByte"),
+	MIB_DESC(2, 0x44, "RxBadByte"),
+	MIB_DESC(1, 0x4c, "RxOverFlow"),
+	MIB_DESC(1, 0x50, "Filtered"),
+	MIB_DESC(1, 0x54, "TxBroad"),
+	MIB_DESC(1, 0x58, "TxPause"),
+	MIB_DESC(1, 0x5c, "TxMulti"),
+	MIB_DESC(1, 0x60, "TxUnderRun"),
+	MIB_DESC(1, 0x64, "Tx64Byte"),
+	MIB_DESC(1, 0x68, "Tx128Byte"),
+	MIB_DESC(1, 0x6c, "Tx256Byte"),
+	MIB_DESC(1, 0x70, "Tx512Byte"),
+	MIB_DESC(1, 0x74, "Tx1024Byte"),
+	MIB_DESC(1, 0x78, "Tx1518Byte"),
+	MIB_DESC(1, 0x7c, "TxMaxByte"),
+	MIB_DESC(1, 0x80, "TxOverSize"),
+	MIB_DESC(2, 0x84, "TxByte"),
+	MIB_DESC(1, 0x8c, "TxCollision"),
+	MIB_DESC(1, 0x90, "TxAbortCol"),
+	MIB_DESC(1, 0x94, "TxMultiCol"),
+	MIB_DESC(1, 0x98, "TxSingleCol"),
+	MIB_DESC(1, 0x9c, "TxExcDefer"),
+	MIB_DESC(1, 0xa0, "TxDefer"),
+	MIB_DESC(1, 0xa4, "TxLateCol"),
+};
+
+/* The 32bit switch registers are accessed indirectly. To achieve this we need
+ * to set the page of the register. Track the last page that was set to reduce
+ * mdio writes
+ */
+static u16 qca8k_current_page = 0xffff;
+
+static void
+qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page)
+{
+	regaddr >>= 1;
+	*r1 = regaddr & 0x1e;
+
+	regaddr >>= 5;
+	*r2 = regaddr & 0x7;
+
+	regaddr >>= 3;
+	*page = regaddr & 0x3ff;
+}
+
+static u32
+qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum)
+{
+	u32 val;
+	int ret;
+
+	ret = bus->read(bus, phy_id, regnum);
+	if (ret >= 0) {
+		val = ret;
+		ret = bus->read(bus, phy_id, regnum + 1);
+		val |= ret << 16;
+	}
+
+	if (ret < 0) {
+		dev_err_ratelimited(&bus->dev,
+				    "failed to read qca8k 32bit register\n");
+		return ret;
+	}
+
+	return val;
+}
+
+static void
+qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val)
+{
+	u16 lo, hi;
+	int ret;
+
+	lo = val & 0xffff;
+	hi = (u16)(val >> 16);
+
+	ret = bus->write(bus, phy_id, regnum, lo);
+	if (ret >= 0)
+		ret = bus->write(bus, phy_id, regnum + 1, hi);
+	if (ret < 0)
+		dev_err_ratelimited(&bus->dev,
+				    "failed to write qca8k 32bit register\n");
+}
+
+static void
+qca8k_set_page(struct mii_bus *bus, u16 page)
+{
+	if (page == qca8k_current_page)
+		return;
+
+	if (bus->write(bus, 0x18, 0, page) < 0)
+		dev_err_ratelimited(&bus->dev,
+				    "failed to set qca8k page\n");
+	qca8k_current_page = page;
+}
+
+static u32
+qca8k_read(struct qca8k_priv *priv, u32 reg)
+{
+	u16 r1, r2, page;
+	u32 val;
+
+	qca8k_split_addr(reg, &r1, &r2, &page);
+
+	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+
+	qca8k_set_page(priv->bus, page);
+	val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
+
+	mutex_unlock(&priv->bus->mdio_lock);
+
+	return val;
+}
+
+static void
+qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val)
+{
+	u16 r1, r2, page;
+
+	qca8k_split_addr(reg, &r1, &r2, &page);
+
+	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+
+	qca8k_set_page(priv->bus, page);
+	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val);
+
+	mutex_unlock(&priv->bus->mdio_lock);
+}
+
+static u32
+qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val)
+{
+	u16 r1, r2, page;
+	u32 ret;
+
+	qca8k_split_addr(reg, &r1, &r2, &page);
+
+	mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED);
+
+	qca8k_set_page(priv->bus, page);
+	ret = qca8k_mii_read32(priv->bus, 0x10 | r2, r1);
+	ret &= ~mask;
+	ret |= val;
+	qca8k_mii_write32(priv->bus, 0x10 | r2, r1, ret);
+
+	mutex_unlock(&priv->bus->mdio_lock);
+
+	return ret;
+}
+
+static void
+qca8k_reg_set(struct qca8k_priv *priv, u32 reg, u32 val)
+{
+	qca8k_rmw(priv, reg, 0, val);
+}
+
+static void
+qca8k_reg_clear(struct qca8k_priv *priv, u32 reg, u32 val)
+{
+	qca8k_rmw(priv, reg, val, 0);
+}
+
+static int
+qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ctx;
+
+	*val = qca8k_read(priv, reg);
+
+	return 0;
+}
+
+static int
+qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ctx;
+
+	qca8k_write(priv, reg, val);
+
+	return 0;
+}
+
+static const struct regmap_range qca8k_readable_ranges[] = {
+	regmap_reg_range(0x0000, 0x00e4), /* Global control */
+	regmap_reg_range(0x0100, 0x0168), /* EEE control */
+	regmap_reg_range(0x0200, 0x0270), /* Parser control */
+	regmap_reg_range(0x0400, 0x0454), /* ACL */
+	regmap_reg_range(0x0600, 0x0718), /* Lookup */
+	regmap_reg_range(0x0800, 0x0b70), /* QM */
+	regmap_reg_range(0x0c00, 0x0c80), /* PKT */
+	regmap_reg_range(0x0e00, 0x0e98), /* L3 */
+	regmap_reg_range(0x1000, 0x10ac), /* MIB - Port0 */
+	regmap_reg_range(0x1100, 0x11ac), /* MIB - Port1 */
+	regmap_reg_range(0x1200, 0x12ac), /* MIB - Port2 */
+	regmap_reg_range(0x1300, 0x13ac), /* MIB - Port3 */
+	regmap_reg_range(0x1400, 0x14ac), /* MIB - Port4 */
+	regmap_reg_range(0x1500, 0x15ac), /* MIB - Port5 */
+	regmap_reg_range(0x1600, 0x16ac), /* MIB - Port6 */
+
+};
+
+static struct regmap_access_table qca8k_readable_table = {
+	.yes_ranges = qca8k_readable_ranges,
+	.n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges),
+};
+
+static struct regmap_config qca8k_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = 0x16ac, /* end MIB - Port6 range */
+	.reg_read = qca8k_regmap_read,
+	.reg_write = qca8k_regmap_write,
+	.rd_table = &qca8k_readable_table,
+};
+
+static int
+qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask)
+{
+	unsigned long timeout;
+
+	timeout = jiffies + msecs_to_jiffies(20);
+
+	/* loop until the busy flag has cleared */
+	do {
+		u32 val = qca8k_read(priv, reg);
+		int busy = val & mask;
+
+		if (!busy)
+			break;
+		cond_resched();
+	} while (!time_after_eq(jiffies, timeout));
+
+	return time_after_eq(jiffies, timeout);
+}
+
+static void
+qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb)
+{
+	u32 reg[4];
+	int i;
+
+	/* load the ARL table into an array */
+	for (i = 0; i < 4; i++)
+		reg[i] = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4));
+
+	/* vid - 83:72 */
+	fdb->vid = (reg[2] >> QCA8K_ATU_VID_S) & QCA8K_ATU_VID_M;
+	/* aging - 67:64 */
+	fdb->aging = reg[2] & QCA8K_ATU_STATUS_M;
+	/* portmask - 54:48 */
+	fdb->port_mask = (reg[1] >> QCA8K_ATU_PORT_S) & QCA8K_ATU_PORT_M;
+	/* mac - 47:0 */
+	fdb->mac[0] = (reg[1] >> QCA8K_ATU_ADDR0_S) & 0xff;
+	fdb->mac[1] = reg[1] & 0xff;
+	fdb->mac[2] = (reg[0] >> QCA8K_ATU_ADDR2_S) & 0xff;
+	fdb->mac[3] = (reg[0] >> QCA8K_ATU_ADDR3_S) & 0xff;
+	fdb->mac[4] = (reg[0] >> QCA8K_ATU_ADDR4_S) & 0xff;
+	fdb->mac[5] = reg[0] & 0xff;
+}
+
+static void
+qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac,
+		u8 aging)
+{
+	u32 reg[3] = { 0 };
+	int i;
+
+	/* vid - 83:72 */
+	reg[2] = (vid & QCA8K_ATU_VID_M) << QCA8K_ATU_VID_S;
+	/* aging - 67:64 */
+	reg[2] |= aging & QCA8K_ATU_STATUS_M;
+	/* portmask - 54:48 */
+	reg[1] = (port_mask & QCA8K_ATU_PORT_M) << QCA8K_ATU_PORT_S;
+	/* mac - 47:0 */
+	reg[1] |= mac[0] << QCA8K_ATU_ADDR0_S;
+	reg[1] |= mac[1];
+	reg[0] |= mac[2] << QCA8K_ATU_ADDR2_S;
+	reg[0] |= mac[3] << QCA8K_ATU_ADDR3_S;
+	reg[0] |= mac[4] << QCA8K_ATU_ADDR4_S;
+	reg[0] |= mac[5];
+
+	/* load the array into the ARL table */
+	for (i = 0; i < 3; i++)
+		qca8k_write(priv, QCA8K_REG_ATU_DATA0 + (i * 4), reg[i]);
+}
+
+static int
+qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port)
+{
+	u32 reg;
+
+	/* Set the command and FDB index */
+	reg = QCA8K_ATU_FUNC_BUSY;
+	reg |= cmd;
+	if (port >= 0) {
+		reg |= QCA8K_ATU_FUNC_PORT_EN;
+		reg |= (port & QCA8K_ATU_FUNC_PORT_M) << QCA8K_ATU_FUNC_PORT_S;
+	}
+
+	/* Write the function register triggering the table access */
+	qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg);
+
+	/* wait for completion */
+	if (qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY))
+		return -1;
+
+	/* Check for table full violation when adding an entry */
+	if (cmd == QCA8K_FDB_LOAD) {
+		reg = qca8k_read(priv, QCA8K_REG_ATU_FUNC);
+		if (reg & QCA8K_ATU_FUNC_FULL)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int
+qca8k_fdb_next(struct qca8k_priv *priv, struct qca8k_fdb *fdb, int port)
+{
+	int ret;
+
+	qca8k_fdb_write(priv, fdb->vid, fdb->port_mask, fdb->mac, fdb->aging);
+	ret = qca8k_fdb_access(priv, QCA8K_FDB_NEXT, port);
+	if (ret >= 0)
+		qca8k_fdb_read(priv, fdb);
+
+	return ret;
+}
+
+static int
+qca8k_fdb_add(struct qca8k_priv *priv, const u8 *mac, u16 port_mask,
+	      u16 vid, u8 aging)
+{
+	int ret;
+
+	mutex_lock(&priv->reg_mutex);
+	qca8k_fdb_write(priv, vid, port_mask, mac, aging);
+	ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1);
+	mutex_unlock(&priv->reg_mutex);
+
+	return ret;
+}
+
+static int
+qca8k_fdb_del(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, u16 vid)
+{
+	int ret;
+
+	mutex_lock(&priv->reg_mutex);
+	qca8k_fdb_write(priv, vid, port_mask, mac, 0);
+	ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1);
+	mutex_unlock(&priv->reg_mutex);
+
+	return ret;
+}
+
+static void
+qca8k_fdb_flush(struct qca8k_priv *priv)
+{
+	mutex_lock(&priv->reg_mutex);
+	qca8k_fdb_access(priv, QCA8K_FDB_FLUSH, -1);
+	mutex_unlock(&priv->reg_mutex);
+}
+
+static void
+qca8k_mib_init(struct qca8k_priv *priv)
+{
+	mutex_lock(&priv->reg_mutex);
+	qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY);
+	qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY);
+	qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP);
+	qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB);
+	mutex_unlock(&priv->reg_mutex);
+}
+
+static int
+qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode)
+{
+	u32 reg;
+
+	switch (port) {
+	case 0:
+		reg = QCA8K_REG_PORT0_PAD_CTRL;
+		break;
+	case 6:
+		reg = QCA8K_REG_PORT6_PAD_CTRL;
+		break;
+	default:
+		pr_err("Can't set PAD_CTRL on port %d\n", port);
+		return -EINVAL;
+	}
+
+	/* Configure a port to be directly connected to an external
+	 * PHY or MAC.
+	 */
+	switch (mode) {
+	case PHY_INTERFACE_MODE_RGMII:
+		qca8k_write(priv, reg,
+			    QCA8K_PORT_PAD_RGMII_EN |
+			    QCA8K_PORT_PAD_RGMII_TX_DELAY(3) |
+			    QCA8K_PORT_PAD_RGMII_RX_DELAY(3));
+
+		/* According to the datasheet, RGMII delay is enabled through
+		 * PORT5_PAD_CTRL for all ports, rather than individual port
+		 * registers
+		 */
+		qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL,
+			    QCA8K_PORT_PAD_RGMII_RX_DELAY_EN);
+		break;
+	case PHY_INTERFACE_MODE_SGMII:
+		qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN);
+		break;
+	default:
+		pr_err("xMII mode %d not supported\n", mode);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void
+qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable)
+{
+	u32 mask = QCA8K_PORT_STATUS_TXMAC;
+
+	/* Port 0 and 6 have no internal PHY */
+	if ((port > 0) && (port < 6))
+		mask |= QCA8K_PORT_STATUS_LINK_AUTO;
+
+	if (enable)
+		qca8k_reg_set(priv, QCA8K_REG_PORT_STATUS(port), mask);
+	else
+		qca8k_reg_clear(priv, QCA8K_REG_PORT_STATUS(port), mask);
+}
+
+static int
+qca8k_setup(struct dsa_switch *ds)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	int ret, i, phy_mode = -1;
+
+	/* Make sure that port 0 is the cpu port */
+	if (!dsa_is_cpu_port(ds, 0)) {
+		pr_err("port 0 is not the CPU port\n");
+		return -EINVAL;
+	}
+
+	mutex_init(&priv->reg_mutex);
+
+	/* Start by setting up the register mapping */
+	priv->regmap = devm_regmap_init(ds->dev, NULL, priv,
+					&qca8k_regmap_config);
+	if (IS_ERR(priv->regmap))
+		pr_warn("regmap initialization failed");
+
+	/* Initialize CPU port pad mode (xMII type, delays...) */
+	phy_mode = of_get_phy_mode(ds->ports[ds->dst->cpu_port].dn);
+	if (phy_mode < 0) {
+		pr_err("Can't find phy-mode for master device\n");
+		return phy_mode;
+	}
+	ret = qca8k_set_pad_ctrl(priv, QCA8K_CPU_PORT, phy_mode);
+	if (ret < 0)
+		return ret;
+
+	/* Enable CPU Port */
+	qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0,
+		      QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN);
+	qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1);
+	priv->port_sts[QCA8K_CPU_PORT].enabled = 1;
+
+	/* Enable MIB counters */
+	qca8k_mib_init(priv);
+
+	/* Enable QCA header mode on the cpu port */
+	qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(QCA8K_CPU_PORT),
+		    QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S |
+		    QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S);
+
+	/* Disable forwarding by default on all ports */
+	for (i = 0; i < QCA8K_NUM_PORTS; i++)
+		qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+			  QCA8K_PORT_LOOKUP_MEMBER, 0);
+
+	/* Disable MAC by default on all user ports */
+	for (i = 1; i < QCA8K_NUM_PORTS; i++)
+		if (ds->enabled_port_mask & BIT(i))
+			qca8k_port_set_status(priv, i, 0);
+
+	/* Forward all unknown frames to CPU port for Linux processing */
+	qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1,
+		    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S |
+		    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S |
+		    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S |
+		    BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S);
+
+	/* Setup connection between CPU port & user ports */
+	for (i = 0; i < DSA_MAX_PORTS; i++) {
+		/* CPU port gets connected to all user ports of the switch */
+		if (dsa_is_cpu_port(ds, i)) {
+			qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT),
+				  QCA8K_PORT_LOOKUP_MEMBER,
+				  ds->enabled_port_mask);
+		}
+
+		/* Invividual user ports get connected to CPU port only */
+		if (ds->enabled_port_mask & BIT(i)) {
+			int shift = 16 * (i % 2);
+
+			qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+				  QCA8K_PORT_LOOKUP_MEMBER,
+				  BIT(QCA8K_CPU_PORT));
+
+			/* Enable ARP Auto-learning by default */
+			qca8k_reg_set(priv, QCA8K_PORT_LOOKUP_CTRL(i),
+				      QCA8K_PORT_LOOKUP_LEARN);
+
+			/* For port based vlans to work we need to set the
+			 * default egress vid
+			 */
+			qca8k_rmw(priv, QCA8K_EGRESS_VLAN(i),
+				  0xffff << shift, 1 << shift);
+			qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(i),
+				    QCA8K_PORT_VLAN_CVID(1) |
+				    QCA8K_PORT_VLAN_SVID(1));
+		}
+	}
+
+	/* Flush the FDB table */
+	qca8k_fdb_flush(priv);
+
+	return 0;
+}
+
+static int
+qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+	return mdiobus_read(priv->bus, phy, regnum);
+}
+
+static int
+qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+	return mdiobus_write(priv->bus, phy, regnum, val);
+}
+
+static void
+qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++)
+		strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name,
+			ETH_GSTRING_LEN);
+}
+
+static void
+qca8k_get_ethtool_stats(struct dsa_switch *ds, int port,
+			uint64_t *data)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	const struct qca8k_mib_desc *mib;
+	u32 reg, i;
+	u64 hi;
+
+	for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) {
+		mib = &ar8327_mib[i];
+		reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset;
+
+		data[i] = qca8k_read(priv, reg);
+		if (mib->size == 2) {
+			hi = qca8k_read(priv, reg + 4);
+			data[i] |= hi << 32;
+		}
+	}
+}
+
+static int
+qca8k_get_sset_count(struct dsa_switch *ds)
+{
+	return ARRAY_SIZE(ar8327_mib);
+}
+
+static void
+qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port);
+	u32 reg;
+
+	mutex_lock(&priv->reg_mutex);
+	reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL);
+	if (enable)
+		reg |= lpi_en;
+	else
+		reg &= ~lpi_en;
+	qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg);
+	mutex_unlock(&priv->reg_mutex);
+}
+
+static int
+qca8k_eee_init(struct dsa_switch *ds, int port,
+	       struct phy_device *phy)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	struct ethtool_eee *p = &priv->port_sts[port].eee;
+	int ret;
+
+	p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full);
+
+	ret = phy_init_eee(phy, 0);
+	if (ret)
+		return ret;
+
+	qca8k_eee_enable_set(ds, port, true);
+
+	return 0;
+}
+
+static int
+qca8k_set_eee(struct dsa_switch *ds, int port,
+	      struct phy_device *phydev,
+	      struct ethtool_eee *e)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	struct ethtool_eee *p = &priv->port_sts[port].eee;
+	int ret = 0;
+
+	p->eee_enabled = e->eee_enabled;
+
+	if (e->eee_enabled) {
+		p->eee_enabled = qca8k_eee_init(ds, port, phydev);
+		if (!p->eee_enabled)
+			ret = -EOPNOTSUPP;
+	}
+	qca8k_eee_enable_set(ds, port, p->eee_enabled);
+
+	return ret;
+}
+
+static int
+qca8k_get_eee(struct dsa_switch *ds, int port,
+	      struct ethtool_eee *e)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	struct ethtool_eee *p = &priv->port_sts[port].eee;
+	struct net_device *netdev = ds->ports[port].netdev;
+	int ret;
+
+	ret = phy_ethtool_get_eee(netdev->phydev, p);
+	if (!ret)
+		e->eee_active =
+			!!(p->supported & p->advertised & p->lp_advertised);
+	else
+		e->eee_active = 0;
+
+	e->eee_enabled = p->eee_enabled;
+
+	return ret;
+}
+
+static void
+qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	u32 stp_state;
+
+	switch (state) {
+	case BR_STATE_DISABLED:
+		stp_state = QCA8K_PORT_LOOKUP_STATE_DISABLED;
+		break;
+	case BR_STATE_BLOCKING:
+		stp_state = QCA8K_PORT_LOOKUP_STATE_BLOCKING;
+		break;
+	case BR_STATE_LISTENING:
+		stp_state = QCA8K_PORT_LOOKUP_STATE_LISTENING;
+		break;
+	case BR_STATE_LEARNING:
+		stp_state = QCA8K_PORT_LOOKUP_STATE_LEARNING;
+		break;
+	case BR_STATE_FORWARDING:
+	default:
+		stp_state = QCA8K_PORT_LOOKUP_STATE_FORWARD;
+		break;
+	}
+
+	qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+		  QCA8K_PORT_LOOKUP_STATE_MASK, stp_state);
+}
+
+static int
+qca8k_port_bridge_join(struct dsa_switch *ds, int port,
+		       struct net_device *bridge)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	int port_mask = BIT(QCA8K_CPU_PORT);
+	int i;
+
+	priv->port_sts[port].bridge_dev = bridge;
+
+	for (i = 1; i < QCA8K_NUM_PORTS; i++) {
+		if (priv->port_sts[i].bridge_dev != bridge)
+			continue;
+		/* Add this port to the portvlan mask of the other ports
+		 * in the bridge
+		 */
+		qca8k_reg_set(priv,
+			      QCA8K_PORT_LOOKUP_CTRL(i),
+			      BIT(port));
+		if (i != port)
+			port_mask |= BIT(i);
+	}
+	/* Add all other ports to this ports portvlan mask */
+	qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+		  QCA8K_PORT_LOOKUP_MEMBER, port_mask);
+
+	return 0;
+}
+
+static void
+qca8k_port_bridge_leave(struct dsa_switch *ds, int port)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	int i;
+
+	for (i = 1; i < QCA8K_NUM_PORTS; i++) {
+		if (priv->port_sts[i].bridge_dev !=
+		    priv->port_sts[port].bridge_dev)
+			continue;
+		/* Remove this port to the portvlan mask of the other ports
+		 * in the bridge
+		 */
+		qca8k_reg_clear(priv,
+				QCA8K_PORT_LOOKUP_CTRL(i),
+				BIT(port));
+	}
+	priv->port_sts[port].bridge_dev = NULL;
+	/* Set the cpu port to be the only one in the portvlan mask of
+	 * this port
+	 */
+	qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port),
+		  QCA8K_PORT_LOOKUP_MEMBER, BIT(QCA8K_CPU_PORT));
+}
+
+static int
+qca8k_port_enable(struct dsa_switch *ds, int port,
+		  struct phy_device *phy)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+	qca8k_port_set_status(priv, port, 1);
+	priv->port_sts[port].enabled = 1;
+
+	return 0;
+}
+
+static void
+qca8k_port_disable(struct dsa_switch *ds, int port,
+		   struct phy_device *phy)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+	qca8k_port_set_status(priv, port, 0);
+	priv->port_sts[port].enabled = 0;
+}
+
+static int
+qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr,
+		      u16 port_mask, u16 vid)
+{
+	/* Set the vid to the port vlan id if no vid is set */
+	if (!vid)
+		vid = 1;
+
+	return qca8k_fdb_add(priv, addr, port_mask, vid,
+			     QCA8K_ATU_STATUS_STATIC);
+}
+
+static int
+qca8k_port_fdb_prepare(struct dsa_switch *ds, int port,
+		       const struct switchdev_obj_port_fdb *fdb,
+		       struct switchdev_trans *trans)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+
+	/* The FDB table for static and auto learned entries is the same. We
+	 * need to reserve an entry with no port_mask set to make sure that
+	 * when port_fdb_add is called an entry is still available. Otherwise
+	 * the last free entry might have been used up by auto learning
+	 */
+	return qca8k_port_fdb_insert(priv, fdb->addr, 0, fdb->vid);
+}
+
+static void
+qca8k_port_fdb_add(struct dsa_switch *ds, int port,
+		   const struct switchdev_obj_port_fdb *fdb,
+		   struct switchdev_trans *trans)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	u16 port_mask = BIT(port);
+
+	/* Update the FDB entry adding the port_mask */
+	qca8k_port_fdb_insert(priv, fdb->addr, port_mask, fdb->vid);
+}
+
+static int
+qca8k_port_fdb_del(struct dsa_switch *ds, int port,
+		   const struct switchdev_obj_port_fdb *fdb)
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	u16 port_mask = BIT(port);
+	u16 vid = fdb->vid;
+
+	if (!vid)
+		vid = 1;
+
+	return qca8k_fdb_del(priv, fdb->addr, port_mask, vid);
+}
+
+static int
+qca8k_port_fdb_dump(struct dsa_switch *ds, int port,
+		    struct switchdev_obj_port_fdb *fdb,
+		    int (*cb)(struct switchdev_obj *obj))
+{
+	struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv;
+	struct qca8k_fdb _fdb = { 0 };
+	int cnt = QCA8K_NUM_FDB_RECORDS;
+	int ret = 0;
+
+	mutex_lock(&priv->reg_mutex);
+	while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) {
+		if (!_fdb.aging)
+			break;
+
+		ether_addr_copy(fdb->addr, _fdb.mac);
+		fdb->vid = _fdb.vid;
+		if (_fdb.aging == QCA8K_ATU_STATUS_STATIC)
+			fdb->ndm_state = NUD_NOARP;
+		else
+			fdb->ndm_state = NUD_REACHABLE;
+
+		ret = cb(&fdb->obj);
+		if (ret)
+			break;
+	}
+	mutex_unlock(&priv->reg_mutex);
+
+	return 0;
+}
+
+static enum dsa_tag_protocol
+qca8k_get_tag_protocol(struct dsa_switch *ds)
+{
+	return DSA_TAG_PROTO_QCA;
+}
+
+static struct dsa_switch_ops qca8k_switch_ops = {
+	.get_tag_protocol	= qca8k_get_tag_protocol,
+	.setup			= qca8k_setup,
+	.get_strings		= qca8k_get_strings,
+	.phy_read		= qca8k_phy_read,
+	.phy_write		= qca8k_phy_write,
+	.get_ethtool_stats	= qca8k_get_ethtool_stats,
+	.get_sset_count		= qca8k_get_sset_count,
+	.get_eee		= qca8k_get_eee,
+	.set_eee		= qca8k_set_eee,
+	.port_enable		= qca8k_port_enable,
+	.port_disable		= qca8k_port_disable,
+	.port_stp_state_set	= qca8k_port_stp_state_set,
+	.port_bridge_join	= qca8k_port_bridge_join,
+	.port_bridge_leave	= qca8k_port_bridge_leave,
+	.port_fdb_prepare	= qca8k_port_fdb_prepare,
+	.port_fdb_add		= qca8k_port_fdb_add,
+	.port_fdb_del		= qca8k_port_fdb_del,
+	.port_fdb_dump		= qca8k_port_fdb_dump,
+};
+
+static int
+qca8k_sw_probe(struct mdio_device *mdiodev)
+{
+	struct qca8k_priv *priv;
+	u32 id;
+
+	/* allocate the private data struct so that we can probe the switches
+	 * ID register
+	 */
+	priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->bus = mdiodev->bus;
+
+	/* read the switches ID register */
+	id = qca8k_read(priv, QCA8K_REG_MASK_CTRL);
+	id >>= QCA8K_MASK_CTRL_ID_S;
+	id &= QCA8K_MASK_CTRL_ID_M;
+	if (id != QCA8K_ID_QCA8337)
+		return -ENODEV;
+
+	priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL);
+	if (!priv->ds)
+		return -ENOMEM;
+
+	priv->ds->priv = priv;
+	priv->ds->dev = &mdiodev->dev;
+	priv->ds->ops = &qca8k_switch_ops;
+	mutex_init(&priv->reg_mutex);
+	dev_set_drvdata(&mdiodev->dev, priv);
+
+	return dsa_register_switch(priv->ds, priv->ds->dev->of_node);
+}
+
+static void
+qca8k_sw_remove(struct mdio_device *mdiodev)
+{
+	struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev);
+	int i;
+
+	for (i = 0; i < QCA8K_NUM_PORTS; i++)
+		qca8k_port_set_status(priv, i, 0);
+
+	dsa_unregister_switch(priv->ds);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static void
+qca8k_set_pm(struct qca8k_priv *priv, int enable)
+{
+	int i;
+
+	for (i = 0; i < QCA8K_NUM_PORTS; i++) {
+		if (!priv->port_sts[i].enabled)
+			continue;
+
+		qca8k_port_set_status(priv, i, enable);
+	}
+}
+
+static int qca8k_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct qca8k_priv *priv = platform_get_drvdata(pdev);
+
+	qca8k_set_pm(priv, 0);
+
+	return dsa_switch_suspend(priv->ds);
+}
+
+static int qca8k_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct qca8k_priv *priv = platform_get_drvdata(pdev);
+
+	qca8k_set_pm(priv, 1);
+
+	return dsa_switch_resume(priv->ds);
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(qca8k_pm_ops,
+			 qca8k_suspend, qca8k_resume);
+
+static const struct of_device_id qca8k_of_match[] = {
+	{ .compatible = "qca,qca8337" },
+	{ /* sentinel */ },
+};
+
+static struct mdio_driver qca8kmdio_driver = {
+	.probe  = qca8k_sw_probe,
+	.remove = qca8k_sw_remove,
+	.mdiodrv.driver = {
+		.name = "qca8k",
+		.of_match_table = qca8k_of_match,
+		.pm = &qca8k_pm_ops,
+	},
+};
+
+mdio_module_driver(qca8kmdio_driver);
+
+MODULE_AUTHOR("Mathieu Olivari, John Crispin <john@phrozen.org>");
+MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:qca8k");
diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h
new file mode 100644
index 0000000..2014647
--- /dev/null
+++ b/drivers/net/dsa/qca8k.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2009 Felix Fietkau <nbd@nbd.name>
+ * Copyright (C) 2011-2012 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __QCA8K_H
+#define __QCA8K_H
+
+#include <linux/delay.h>
+#include <linux/regmap.h>
+
+#define QCA8K_NUM_PORTS					7
+
+#define PHY_ID_QCA8337					0x004dd036
+#define QCA8K_ID_QCA8337				0x13
+
+#define QCA8K_NUM_FDB_RECORDS				2048
+
+#define QCA8K_CPU_PORT					0
+
+/* Global control registers */
+#define QCA8K_REG_MASK_CTRL				0x000
+#define   QCA8K_MASK_CTRL_ID_M				0xff
+#define   QCA8K_MASK_CTRL_ID_S				8
+#define QCA8K_REG_PORT0_PAD_CTRL			0x004
+#define QCA8K_REG_PORT5_PAD_CTRL			0x008
+#define QCA8K_REG_PORT6_PAD_CTRL			0x00c
+#define   QCA8K_PORT_PAD_RGMII_EN			BIT(26)
+#define   QCA8K_PORT_PAD_RGMII_TX_DELAY(x)		\
+						((0x8 + (x & 0x3)) << 22)
+#define   QCA8K_PORT_PAD_RGMII_RX_DELAY(x)		\
+						((0x10 + (x & 0x3)) << 20)
+#define   QCA8K_PORT_PAD_RGMII_RX_DELAY_EN		BIT(24)
+#define   QCA8K_PORT_PAD_SGMII_EN			BIT(7)
+#define QCA8K_REG_MODULE_EN				0x030
+#define   QCA8K_MODULE_EN_MIB				BIT(0)
+#define QCA8K_REG_MIB					0x034
+#define   QCA8K_MIB_FLUSH				BIT(24)
+#define   QCA8K_MIB_CPU_KEEP				BIT(20)
+#define   QCA8K_MIB_BUSY				BIT(17)
+#define QCA8K_GOL_MAC_ADDR0				0x60
+#define QCA8K_GOL_MAC_ADDR1				0x64
+#define QCA8K_REG_PORT_STATUS(_i)			(0x07c + (_i) * 4)
+#define   QCA8K_PORT_STATUS_SPEED			GENMASK(2, 0)
+#define   QCA8K_PORT_STATUS_SPEED_S			0
+#define   QCA8K_PORT_STATUS_TXMAC			BIT(2)
+#define   QCA8K_PORT_STATUS_RXMAC			BIT(3)
+#define   QCA8K_PORT_STATUS_TXFLOW			BIT(4)
+#define   QCA8K_PORT_STATUS_RXFLOW			BIT(5)
+#define   QCA8K_PORT_STATUS_DUPLEX			BIT(6)
+#define   QCA8K_PORT_STATUS_LINK_UP			BIT(8)
+#define   QCA8K_PORT_STATUS_LINK_AUTO			BIT(9)
+#define   QCA8K_PORT_STATUS_LINK_PAUSE			BIT(10)
+#define QCA8K_REG_PORT_HDR_CTRL(_i)			(0x9c + (_i * 4))
+#define   QCA8K_PORT_HDR_CTRL_RX_MASK			GENMASK(3, 2)
+#define   QCA8K_PORT_HDR_CTRL_RX_S			2
+#define   QCA8K_PORT_HDR_CTRL_TX_MASK			GENMASK(1, 0)
+#define   QCA8K_PORT_HDR_CTRL_TX_S			0
+#define   QCA8K_PORT_HDR_CTRL_ALL			2
+#define   QCA8K_PORT_HDR_CTRL_MGMT			1
+#define   QCA8K_PORT_HDR_CTRL_NONE			0
+
+/* EEE control registers */
+#define QCA8K_REG_EEE_CTRL				0x100
+#define  QCA8K_REG_EEE_CTRL_LPI_EN(_i)			((_i + 1) * 2)
+
+/* ACL registers */
+#define QCA8K_REG_PORT_VLAN_CTRL0(_i)			(0x420 + (_i * 8))
+#define   QCA8K_PORT_VLAN_CVID(x)			(x << 16)
+#define   QCA8K_PORT_VLAN_SVID(x)			x
+#define QCA8K_REG_PORT_VLAN_CTRL1(_i)			(0x424 + (_i * 8))
+#define QCA8K_REG_IPV4_PRI_BASE_ADDR			0x470
+#define QCA8K_REG_IPV4_PRI_ADDR_MASK			0x474
+
+/* Lookup registers */
+#define QCA8K_REG_ATU_DATA0				0x600
+#define   QCA8K_ATU_ADDR2_S				24
+#define   QCA8K_ATU_ADDR3_S				16
+#define   QCA8K_ATU_ADDR4_S				8
+#define QCA8K_REG_ATU_DATA1				0x604
+#define   QCA8K_ATU_PORT_M				0x7f
+#define   QCA8K_ATU_PORT_S				16
+#define   QCA8K_ATU_ADDR0_S				8
+#define QCA8K_REG_ATU_DATA2				0x608
+#define   QCA8K_ATU_VID_M				0xfff
+#define   QCA8K_ATU_VID_S				8
+#define   QCA8K_ATU_STATUS_M				0xf
+#define   QCA8K_ATU_STATUS_STATIC			0xf
+#define QCA8K_REG_ATU_FUNC				0x60c
+#define   QCA8K_ATU_FUNC_BUSY				BIT(31)
+#define   QCA8K_ATU_FUNC_PORT_EN			BIT(14)
+#define   QCA8K_ATU_FUNC_MULTI_EN			BIT(13)
+#define   QCA8K_ATU_FUNC_FULL				BIT(12)
+#define   QCA8K_ATU_FUNC_PORT_M				0xf
+#define   QCA8K_ATU_FUNC_PORT_S				8
+#define QCA8K_REG_GLOBAL_FW_CTRL0			0x620
+#define   QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN		BIT(10)
+#define QCA8K_REG_GLOBAL_FW_CTRL1			0x624
+#define   QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S		24
+#define   QCA8K_GLOBAL_FW_CTRL1_BC_DP_S			16
+#define   QCA8K_GLOBAL_FW_CTRL1_MC_DP_S			8
+#define   QCA8K_GLOBAL_FW_CTRL1_UC_DP_S			0
+#define QCA8K_PORT_LOOKUP_CTRL(_i)			(0x660 + (_i) * 0xc)
+#define   QCA8K_PORT_LOOKUP_MEMBER			GENMASK(6, 0)
+#define   QCA8K_PORT_LOOKUP_STATE_MASK			GENMASK(18, 16)
+#define   QCA8K_PORT_LOOKUP_STATE_DISABLED		(0 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE_BLOCKING		(1 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE_LISTENING		(2 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE_LEARNING		(3 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE_FORWARD		(4 << 16)
+#define   QCA8K_PORT_LOOKUP_STATE			GENMASK(18, 16)
+#define   QCA8K_PORT_LOOKUP_LEARN			BIT(20)
+
+/* Pkt edit registers */
+#define QCA8K_EGRESS_VLAN(x)				(0x0c70 + (4 * (x / 2)))
+
+/* L3 registers */
+#define QCA8K_HROUTER_CONTROL				0xe00
+#define   QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_M		GENMASK(17, 16)
+#define   QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_S		16
+#define   QCA8K_HROUTER_CONTROL_ARP_AGE_MODE		1
+#define QCA8K_HROUTER_PBASED_CONTROL1			0xe08
+#define QCA8K_HROUTER_PBASED_CONTROL2			0xe0c
+#define QCA8K_HNAT_CONTROL				0xe38
+
+/* MIB registers */
+#define QCA8K_PORT_MIB_COUNTER(_i)			(0x1000 + (_i) * 0x100)
+
+/* QCA specific MII registers */
+#define MII_ATH_MMD_ADDR				0x0d
+#define MII_ATH_MMD_DATA				0x0e
+
+enum {
+	QCA8K_PORT_SPEED_10M = 0,
+	QCA8K_PORT_SPEED_100M = 1,
+	QCA8K_PORT_SPEED_1000M = 2,
+	QCA8K_PORT_SPEED_ERR = 3,
+};
+
+enum qca8k_fdb_cmd {
+	QCA8K_FDB_FLUSH	= 1,
+	QCA8K_FDB_LOAD = 2,
+	QCA8K_FDB_PURGE = 3,
+	QCA8K_FDB_NEXT = 6,
+	QCA8K_FDB_SEARCH = 7,
+};
+
+struct ar8xxx_port_status {
+	struct ethtool_eee eee;
+	struct net_device *bridge_dev;
+	int enabled;
+};
+
+struct qca8k_priv {
+	struct regmap *regmap;
+	struct mii_bus *bus;
+	struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS];
+	struct dsa_switch *ds;
+	struct mutex reg_mutex;
+};
+
+struct qca8k_mib_desc {
+	unsigned int size;
+	unsigned int offset;
+	const char *name;
+};
+
+struct qca8k_fdb {
+	u16 vid;
+	u8 port_mask;
+	u8 aging;
+	u8 mac[6];
+};
+
+#endif /* __QCA8K_H */
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 25c55ab..9133e79 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -3089,7 +3089,7 @@
 	iowrite16(new_mode, ioaddr + EL3_CMD);
 }
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 /* Setup the card so that it can receive frames with an 802.1q VLAN tag.
    Note that this must be done after each RxReset due to some backwards
    compatibility logic in the Cyclone and Tornado ASICs */
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 2ffd634..8cc7467 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -24,6 +24,7 @@
 source "drivers/net/ethernet/allwinner/Kconfig"
 source "drivers/net/ethernet/alteon/Kconfig"
 source "drivers/net/ethernet/altera/Kconfig"
+source "drivers/net/ethernet/amazon/Kconfig"
 source "drivers/net/ethernet/amd/Kconfig"
 source "drivers/net/ethernet/apm/Kconfig"
 source "drivers/net/ethernet/apple/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 1d349e9..a09423d 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -10,6 +10,7 @@
 obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/
 obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/
 obj-$(CONFIG_ALTERA_TSE) += altera/
+obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/
 obj-$(CONFIG_NET_VENDOR_AMD) += amd/
 obj-$(CONFIG_NET_XGENE) += apm/
 obj-$(CONFIG_NET_VENDOR_APPLE) += apple/
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 1d10696..8af2c88 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -66,7 +66,7 @@
  */
 #define ZEROCOPY
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define VLAN_SUPPORT
 #endif
 
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index 38eaea1..00f9ee3 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -192,8 +192,8 @@
 			goto init_error;
 
 		skb_reserve(new_skb, NET_IP_ALIGN);
-		/* Invidate the data cache of skb->data range when it is write back
-		 * cache. It will prevent overwritting the new data from DMA
+		/* Invalidate the data cache of skb->data range when it is write back
+		 * cache. It will prevent overwriting the new data from DMA
 		 */
 		blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
 					 (unsigned long)new_skb->end);
@@ -1205,7 +1205,7 @@
 	}
 	/* reserve 2 bytes for RXDWA padding */
 	skb_reserve(new_skb, NET_IP_ALIGN);
-	/* Invidate the data cache of skb->data range when it is write back
+	/* Invalidate the data cache of skb->data range when it is write back
 	 * cache. It will prevent overwritting the new data from DMA
 	 */
 	blackfin_dcache_invalidate_range((unsigned long)new_skb->head,
@@ -1599,7 +1599,7 @@
 	*(__le16 *) (&(ndev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI());
 
 	/* probe mac */
-	/*todo: how to proble? which is revision_register */
+	/*todo: how to probe? which is revision_register */
 	bfin_write_EMAC_ADDRLO(0x12345678);
 	if (bfin_read_EMAC_ADDRLO() != 0x12345678) {
 		dev_err(&pdev->dev, "Cannot detect Blackfin on-chip ethernet MAC controller!\n");
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index bca07c5..f8df824 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1105,27 +1105,6 @@
 	struct greth_private *greth = netdev_priv(dev);
 	greth->msg_enable = value;
 }
-static int greth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct greth_private *greth = netdev_priv(dev);
-	struct phy_device *phy = greth->phy;
-
-	if (!phy)
-		return -ENODEV;
-
-	return phy_ethtool_gset(phy, cmd);
-}
-
-static int greth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct greth_private *greth = netdev_priv(dev);
-	struct phy_device *phy = greth->phy;
-
-	if (!phy)
-		return -ENODEV;
-
-	return phy_ethtool_sset(phy, cmd);
-}
 
 static int greth_get_regs_len(struct net_device *dev)
 {
@@ -1157,12 +1136,12 @@
 static const struct ethtool_ops greth_ethtool_ops = {
 	.get_msglevel		= greth_get_msglevel,
 	.set_msglevel		= greth_set_msglevel,
-	.get_settings		= greth_get_settings,
-	.set_settings		= greth_set_settings,
 	.get_drvinfo		= greth_get_drvinfo,
 	.get_regs_len           = greth_get_regs_len,
 	.get_regs               = greth_get_regs,
 	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
 };
 
 static struct net_device_ops greth_netdev_ops = {
@@ -1224,7 +1203,7 @@
 static void greth_link_change(struct net_device *dev)
 {
 	struct greth_private *greth = netdev_priv(dev);
-	struct phy_device *phydev = greth->phy;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	int status_change = 0;
 	u32 ctrl;
@@ -1307,7 +1286,6 @@
 	greth->link = 0;
 	greth->speed = 0;
 	greth->duplex = -1;
-	greth->phy = phy;
 
 	return 0;
 }
@@ -1325,6 +1303,7 @@
 {
 	int ret;
 	unsigned long timeout;
+	struct net_device *ndev = greth->netdev;
 
 	greth->mdio = mdiobus_alloc();
 	if (!greth->mdio) {
@@ -1349,15 +1328,16 @@
 		goto unreg_mdio;
 	}
 
-	phy_start(greth->phy);
+	phy_start(ndev->phydev);
 
 	/* If Ethernet debug link is used make autoneg happen right away */
 	if (greth->edcl && greth_edcl == 1) {
-		phy_start_aneg(greth->phy);
+		phy_start_aneg(ndev->phydev);
 		timeout = jiffies + 6*HZ;
-		while (!phy_aneg_done(greth->phy) && time_before(jiffies, timeout)) {
+		while (!phy_aneg_done(ndev->phydev) &&
+		       time_before(jiffies, timeout)) {
 		}
-		phy_read_status(greth->phy);
+		phy_read_status(ndev->phydev);
 		greth_link_change(greth->netdev);
 	}
 
@@ -1569,8 +1549,8 @@
 
 	dma_free_coherent(&of_dev->dev, 1024, greth->tx_bd_base, greth->tx_bd_base_phys);
 
-	if (greth->phy)
-		phy_stop(greth->phy);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
 	mdiobus_unregister(greth->mdio);
 
 	unregister_netdev(ndev);
diff --git a/drivers/net/ethernet/aeroflex/greth.h b/drivers/net/ethernet/aeroflex/greth.h
index 92dd918..9c07140 100644
--- a/drivers/net/ethernet/aeroflex/greth.h
+++ b/drivers/net/ethernet/aeroflex/greth.h
@@ -123,7 +123,6 @@
 	struct napi_struct napi;
 	spinlock_t devlock;
 
-	struct phy_device *phy;
 	struct mii_bus *mdio;
 	unsigned int link;
 	unsigned int speed;
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index c83ebae..9066838 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -2961,7 +2961,7 @@
 		sizeof(info->bus_info));
 }
 
-static struct ethtool_ops et131x_ethtool_ops = {
+static const struct ethtool_ops et131x_ethtool_ops = {
 	.get_drvinfo	= et131x_get_drvinfo,
 	.get_regs_len	= et131x_get_regs_len,
 	.get_regs	= et131x_get_regs,
diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig
new file mode 100644
index 0000000..99b30353
--- /dev/null
+++ b/drivers/net/ethernet/amazon/Kconfig
@@ -0,0 +1,27 @@
+#
+# Amazon network device configuration
+#
+
+config NET_VENDOR_AMAZON
+	bool "Amazon Devices"
+	default y
+	---help---
+	  If you have a network (Ethernet) device belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all
+	  the questions about Amazon devices. If you say Y, you will be asked
+	  for your specific device in the following questions.
+
+if NET_VENDOR_AMAZON
+
+config ENA_ETHERNET
+	tristate "Elastic Network Adapter (ENA) support"
+	depends on (PCI_MSI && X86)
+	---help---
+	  This driver supports Elastic Network Adapter (ENA)"
+
+	  To compile this driver as a module, choose M here.
+	  The module will be called ena.
+
+endif #NET_VENDOR_AMAZON
diff --git a/drivers/net/ethernet/amazon/Makefile b/drivers/net/ethernet/amazon/Makefile
new file mode 100644
index 0000000..8e0b73f
--- /dev/null
+++ b/drivers/net/ethernet/amazon/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Amazon network device drivers.
+#
+
+obj-$(CONFIG_ENA_ETHERNET) += ena/
diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile
new file mode 100644
index 0000000..eaeeae06
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Elastic Network Adapter (ENA) device drivers.
+#
+
+obj-$(CONFIG_ENA_ETHERNET) += ena.o
+
+ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
new file mode 100644
index 0000000..a46e749
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -0,0 +1,973 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_ADMIN_H_
+#define _ENA_ADMIN_H_
+
+enum ena_admin_aq_opcode {
+	ENA_ADMIN_CREATE_SQ	= 1,
+
+	ENA_ADMIN_DESTROY_SQ	= 2,
+
+	ENA_ADMIN_CREATE_CQ	= 3,
+
+	ENA_ADMIN_DESTROY_CQ	= 4,
+
+	ENA_ADMIN_GET_FEATURE	= 8,
+
+	ENA_ADMIN_SET_FEATURE	= 9,
+
+	ENA_ADMIN_GET_STATS	= 11,
+};
+
+enum ena_admin_aq_completion_status {
+	ENA_ADMIN_SUCCESS			= 0,
+
+	ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE	= 1,
+
+	ENA_ADMIN_BAD_OPCODE			= 2,
+
+	ENA_ADMIN_UNSUPPORTED_OPCODE		= 3,
+
+	ENA_ADMIN_MALFORMED_REQUEST		= 4,
+
+	/* Additional status is provided in ACQ entry extended_status */
+	ENA_ADMIN_ILLEGAL_PARAMETER		= 5,
+
+	ENA_ADMIN_UNKNOWN_ERROR			= 6,
+};
+
+enum ena_admin_aq_feature_id {
+	ENA_ADMIN_DEVICE_ATTRIBUTES		= 1,
+
+	ENA_ADMIN_MAX_QUEUES_NUM		= 2,
+
+	ENA_ADMIN_RSS_HASH_FUNCTION		= 10,
+
+	ENA_ADMIN_STATELESS_OFFLOAD_CONFIG	= 11,
+
+	ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG	= 12,
+
+	ENA_ADMIN_MTU				= 14,
+
+	ENA_ADMIN_RSS_HASH_INPUT		= 18,
+
+	ENA_ADMIN_INTERRUPT_MODERATION		= 20,
+
+	ENA_ADMIN_AENQ_CONFIG			= 26,
+
+	ENA_ADMIN_LINK_CONFIG			= 27,
+
+	ENA_ADMIN_HOST_ATTR_CONFIG		= 28,
+
+	ENA_ADMIN_FEATURES_OPCODE_NUM		= 32,
+};
+
+enum ena_admin_placement_policy_type {
+	/* descriptors and headers are in host memory */
+	ENA_ADMIN_PLACEMENT_POLICY_HOST	= 1,
+
+	/* descriptors and headers are in device memory (a.k.a Low Latency
+	 * Queue)
+	 */
+	ENA_ADMIN_PLACEMENT_POLICY_DEV	= 3,
+};
+
+enum ena_admin_link_types {
+	ENA_ADMIN_LINK_SPEED_1G		= 0x1,
+
+	ENA_ADMIN_LINK_SPEED_2_HALF_G	= 0x2,
+
+	ENA_ADMIN_LINK_SPEED_5G		= 0x4,
+
+	ENA_ADMIN_LINK_SPEED_10G	= 0x8,
+
+	ENA_ADMIN_LINK_SPEED_25G	= 0x10,
+
+	ENA_ADMIN_LINK_SPEED_40G	= 0x20,
+
+	ENA_ADMIN_LINK_SPEED_50G	= 0x40,
+
+	ENA_ADMIN_LINK_SPEED_100G	= 0x80,
+
+	ENA_ADMIN_LINK_SPEED_200G	= 0x100,
+
+	ENA_ADMIN_LINK_SPEED_400G	= 0x200,
+};
+
+enum ena_admin_completion_policy_type {
+	/* completion queue entry for each sq descriptor */
+	ENA_ADMIN_COMPLETION_POLICY_DESC		= 0,
+
+	/* completion queue entry upon request in sq descriptor */
+	ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND	= 1,
+
+	/* current queue head pointer is updated in OS memory upon sq
+	 * descriptor request
+	 */
+	ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND	= 2,
+
+	/* current queue head pointer is updated in OS memory for each sq
+	 * descriptor
+	 */
+	ENA_ADMIN_COMPLETION_POLICY_HEAD		= 3,
+};
+
+/* basic stats return ena_admin_basic_stats while extanded stats return a
+ * buffer (string format) with additional statistics per queue and per
+ * device id
+ */
+enum ena_admin_get_stats_type {
+	ENA_ADMIN_GET_STATS_TYPE_BASIC		= 0,
+
+	ENA_ADMIN_GET_STATS_TYPE_EXTENDED	= 1,
+};
+
+enum ena_admin_get_stats_scope {
+	ENA_ADMIN_SPECIFIC_QUEUE	= 0,
+
+	ENA_ADMIN_ETH_TRAFFIC		= 1,
+};
+
+struct ena_admin_aq_common_desc {
+	/* 11:0 : command_id
+	 * 15:12 : reserved12
+	 */
+	u16 command_id;
+
+	/* as appears in ena_admin_aq_opcode */
+	u8 opcode;
+
+	/* 0 : phase
+	 * 1 : ctrl_data - control buffer address valid
+	 * 2 : ctrl_data_indirect - control buffer address
+	 *    points to list of pages with addresses of control
+	 *    buffers
+	 * 7:3 : reserved3
+	 */
+	u8 flags;
+};
+
+/* used in ena_admin_aq_entry. Can point directly to control data, or to a
+ * page list chunk. Used also at the end of indirect mode page list chunks,
+ * for chaining.
+ */
+struct ena_admin_ctrl_buff_info {
+	u32 length;
+
+	struct ena_common_mem_addr address;
+};
+
+struct ena_admin_sq {
+	u16 sq_idx;
+
+	/* 4:0 : reserved
+	 * 7:5 : sq_direction - 0x1 - Tx; 0x2 - Rx
+	 */
+	u8 sq_identity;
+
+	u8 reserved1;
+};
+
+struct ena_admin_aq_entry {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	union {
+		u32 inline_data_w1[3];
+
+		struct ena_admin_ctrl_buff_info control_buffer;
+	} u;
+
+	u32 inline_data_w4[12];
+};
+
+struct ena_admin_acq_common_desc {
+	/* command identifier to associate it with the aq descriptor
+	 * 11:0 : command_id
+	 * 15:12 : reserved12
+	 */
+	u16 command;
+
+	u8 status;
+
+	/* 0 : phase
+	 * 7:1 : reserved1
+	 */
+	u8 flags;
+
+	u16 extended_status;
+
+	/* serves as a hint what AQ entries can be revoked */
+	u16 sq_head_indx;
+};
+
+struct ena_admin_acq_entry {
+	struct ena_admin_acq_common_desc acq_common_descriptor;
+
+	u32 response_specific_data[14];
+};
+
+struct ena_admin_aq_create_sq_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	/* 4:0 : reserved0_w1
+	 * 7:5 : sq_direction - 0x1 - Tx, 0x2 - Rx
+	 */
+	u8 sq_identity;
+
+	u8 reserved8_w1;
+
+	/* 3:0 : placement_policy - Describing where the SQ
+	 *    descriptor ring and the SQ packet headers reside:
+	 *    0x1 - descriptors and headers are in OS memory,
+	 *    0x3 - descriptors and headers in device memory
+	 *    (a.k.a Low Latency Queue)
+	 * 6:4 : completion_policy - Describing what policy
+	 *    to use for generation completion entry (cqe) in
+	 *    the CQ associated with this SQ: 0x0 - cqe for each
+	 *    sq descriptor, 0x1 - cqe upon request in sq
+	 *    descriptor, 0x2 - current queue head pointer is
+	 *    updated in OS memory upon sq descriptor request
+	 *    0x3 - current queue head pointer is updated in OS
+	 *    memory for each sq descriptor
+	 * 7 : reserved15_w1
+	 */
+	u8 sq_caps_2;
+
+	/* 0 : is_physically_contiguous - Described if the
+	 *    queue ring memory is allocated in physical
+	 *    contiguous pages or split.
+	 * 7:1 : reserved17_w1
+	 */
+	u8 sq_caps_3;
+
+	/* associated completion queue id. This CQ must be created prior to
+	 *    SQ creation
+	 */
+	u16 cq_idx;
+
+	/* submission queue depth in entries */
+	u16 sq_depth;
+
+	/* SQ physical base address in OS memory. This field should not be
+	 * used for Low Latency queues. Has to be page aligned.
+	 */
+	struct ena_common_mem_addr sq_ba;
+
+	/* specifies queue head writeback location in OS memory. Valid if
+	 * completion_policy is set to completion_policy_head_on_demand or
+	 * completion_policy_head. Has to be cache aligned
+	 */
+	struct ena_common_mem_addr sq_head_writeback;
+
+	u32 reserved0_w7;
+
+	u32 reserved0_w8;
+};
+
+enum ena_admin_sq_direction {
+	ENA_ADMIN_SQ_DIRECTION_TX	= 1,
+
+	ENA_ADMIN_SQ_DIRECTION_RX	= 2,
+};
+
+struct ena_admin_acq_create_sq_resp_desc {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	u16 sq_idx;
+
+	u16 reserved;
+
+	/* queue doorbell address as an offset to PCIe MMIO REG BAR */
+	u32 sq_doorbell_offset;
+
+	/* low latency queue ring base address as an offset to PCIe MMIO
+	 * LLQ_MEM BAR
+	 */
+	u32 llq_descriptors_offset;
+
+	/* low latency queue headers' memory as an offset to PCIe MMIO
+	 * LLQ_MEM BAR
+	 */
+	u32 llq_headers_offset;
+};
+
+struct ena_admin_aq_destroy_sq_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	struct ena_admin_sq sq;
+};
+
+struct ena_admin_acq_destroy_sq_resp_desc {
+	struct ena_admin_acq_common_desc acq_common_desc;
+};
+
+struct ena_admin_aq_create_cq_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	/* 4:0 : reserved5
+	 * 5 : interrupt_mode_enabled - if set, cq operates
+	 *    in interrupt mode, otherwise - polling
+	 * 7:6 : reserved6
+	 */
+	u8 cq_caps_1;
+
+	/* 4:0 : cq_entry_size_words - size of CQ entry in
+	 *    32-bit words, valid values: 4, 8.
+	 * 7:5 : reserved7
+	 */
+	u8 cq_caps_2;
+
+	/* completion queue depth in # of entries. must be power of 2 */
+	u16 cq_depth;
+
+	/* msix vector assigned to this cq */
+	u32 msix_vector;
+
+	/* cq physical base address in OS memory. CQ must be physically
+	 * contiguous
+	 */
+	struct ena_common_mem_addr cq_ba;
+};
+
+struct ena_admin_acq_create_cq_resp_desc {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	u16 cq_idx;
+
+	/* actual cq depth in number of entries */
+	u16 cq_actual_depth;
+
+	u32 numa_node_register_offset;
+
+	u32 cq_head_db_register_offset;
+
+	u32 cq_interrupt_unmask_register_offset;
+};
+
+struct ena_admin_aq_destroy_cq_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	u16 cq_idx;
+
+	u16 reserved1;
+};
+
+struct ena_admin_acq_destroy_cq_resp_desc {
+	struct ena_admin_acq_common_desc acq_common_desc;
+};
+
+/* ENA AQ Get Statistics command. Extended statistics are placed in control
+ * buffer pointed by AQ entry
+ */
+struct ena_admin_aq_get_stats_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	union {
+		/* command specific inline data */
+		u32 inline_data_w1[3];
+
+		struct ena_admin_ctrl_buff_info control_buffer;
+	} u;
+
+	/* stats type as defined in enum ena_admin_get_stats_type */
+	u8 type;
+
+	/* stats scope defined in enum ena_admin_get_stats_scope */
+	u8 scope;
+
+	u16 reserved3;
+
+	/* queue id. used when scope is specific_queue */
+	u16 queue_idx;
+
+	/* device id, value 0xFFFF means mine. only privileged device can get
+	 *    stats of other device
+	 */
+	u16 device_id;
+};
+
+/* Basic Statistics Command. */
+struct ena_admin_basic_stats {
+	u32 tx_bytes_low;
+
+	u32 tx_bytes_high;
+
+	u32 tx_pkts_low;
+
+	u32 tx_pkts_high;
+
+	u32 rx_bytes_low;
+
+	u32 rx_bytes_high;
+
+	u32 rx_pkts_low;
+
+	u32 rx_pkts_high;
+
+	u32 rx_drops_low;
+
+	u32 rx_drops_high;
+};
+
+struct ena_admin_acq_get_stats_resp {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	struct ena_admin_basic_stats basic_stats;
+};
+
+struct ena_admin_get_set_feature_common_desc {
+	/* 1:0 : select - 0x1 - current value; 0x3 - default
+	 *    value
+	 * 7:3 : reserved3
+	 */
+	u8 flags;
+
+	/* as appears in ena_admin_aq_feature_id */
+	u8 feature_id;
+
+	u16 reserved16;
+};
+
+struct ena_admin_device_attr_feature_desc {
+	u32 impl_id;
+
+	u32 device_version;
+
+	/* bitmap of ena_admin_aq_feature_id */
+	u32 supported_features;
+
+	u32 reserved3;
+
+	/* Indicates how many bits are used physical address access. */
+	u32 phys_addr_width;
+
+	/* Indicates how many bits are used virtual address access. */
+	u32 virt_addr_width;
+
+	/* unicast MAC address (in Network byte order) */
+	u8 mac_addr[6];
+
+	u8 reserved7[2];
+
+	u32 max_mtu;
+};
+
+struct ena_admin_queue_feature_desc {
+	/* including LLQs */
+	u32 max_sq_num;
+
+	u32 max_sq_depth;
+
+	u32 max_cq_num;
+
+	u32 max_cq_depth;
+
+	u32 max_llq_num;
+
+	u32 max_llq_depth;
+
+	u32 max_header_size;
+
+	/* Maximum Descriptors number, including meta descriptor, allowed for
+	 *    a single Tx packet
+	 */
+	u16 max_packet_tx_descs;
+
+	/* Maximum Descriptors number allowed for a single Rx packet */
+	u16 max_packet_rx_descs;
+};
+
+struct ena_admin_set_feature_mtu_desc {
+	/* exclude L2 */
+	u32 mtu;
+};
+
+struct ena_admin_set_feature_host_attr_desc {
+	/* host OS info base address in OS memory. host info is 4KB of
+	 * physically contiguous
+	 */
+	struct ena_common_mem_addr os_info_ba;
+
+	/* host debug area base address in OS memory. debug area must be
+	 * physically contiguous
+	 */
+	struct ena_common_mem_addr debug_ba;
+
+	/* debug area size */
+	u32 debug_area_size;
+};
+
+struct ena_admin_feature_intr_moder_desc {
+	/* interrupt delay granularity in usec */
+	u16 intr_delay_resolution;
+
+	u16 reserved;
+};
+
+struct ena_admin_get_feature_link_desc {
+	/* Link speed in Mb */
+	u32 speed;
+
+	/* bit field of enum ena_admin_link types */
+	u32 supported;
+
+	/* 0 : autoneg
+	 * 1 : duplex - Full Duplex
+	 * 31:2 : reserved2
+	 */
+	u32 flags;
+};
+
+struct ena_admin_feature_aenq_desc {
+	/* bitmask for AENQ groups the device can report */
+	u32 supported_groups;
+
+	/* bitmask for AENQ groups to report */
+	u32 enabled_groups;
+};
+
+struct ena_admin_feature_offload_desc {
+	/* 0 : TX_L3_csum_ipv4
+	 * 1 : TX_L4_ipv4_csum_part - The checksum field
+	 *    should be initialized with pseudo header checksum
+	 * 2 : TX_L4_ipv4_csum_full
+	 * 3 : TX_L4_ipv6_csum_part - The checksum field
+	 *    should be initialized with pseudo header checksum
+	 * 4 : TX_L4_ipv6_csum_full
+	 * 5 : tso_ipv4
+	 * 6 : tso_ipv6
+	 * 7 : tso_ecn
+	 */
+	u32 tx;
+
+	/* Receive side supported stateless offload
+	 * 0 : RX_L3_csum_ipv4 - IPv4 checksum
+	 * 1 : RX_L4_ipv4_csum - TCP/UDP/IPv4 checksum
+	 * 2 : RX_L4_ipv6_csum - TCP/UDP/IPv6 checksum
+	 * 3 : RX_hash - Hash calculation
+	 */
+	u32 rx_supported;
+
+	u32 rx_enabled;
+};
+
+enum ena_admin_hash_functions {
+	ENA_ADMIN_TOEPLITZ	= 1,
+
+	ENA_ADMIN_CRC32		= 2,
+};
+
+struct ena_admin_feature_rss_flow_hash_control {
+	u32 keys_num;
+
+	u32 reserved;
+
+	u32 key[10];
+};
+
+struct ena_admin_feature_rss_flow_hash_function {
+	/* 7:0 : funcs - bitmask of ena_admin_hash_functions */
+	u32 supported_func;
+
+	/* 7:0 : selected_func - bitmask of
+	 *    ena_admin_hash_functions
+	 */
+	u32 selected_func;
+
+	/* initial value */
+	u32 init_val;
+};
+
+/* RSS flow hash protocols */
+enum ena_admin_flow_hash_proto {
+	ENA_ADMIN_RSS_TCP4	= 0,
+
+	ENA_ADMIN_RSS_UDP4	= 1,
+
+	ENA_ADMIN_RSS_TCP6	= 2,
+
+	ENA_ADMIN_RSS_UDP6	= 3,
+
+	ENA_ADMIN_RSS_IP4	= 4,
+
+	ENA_ADMIN_RSS_IP6	= 5,
+
+	ENA_ADMIN_RSS_IP4_FRAG	= 6,
+
+	ENA_ADMIN_RSS_NOT_IP	= 7,
+
+	ENA_ADMIN_RSS_PROTO_NUM	= 16,
+};
+
+/* RSS flow hash fields */
+enum ena_admin_flow_hash_fields {
+	/* Ethernet Dest Addr */
+	ENA_ADMIN_RSS_L2_DA	= 0,
+
+	/* Ethernet Src Addr */
+	ENA_ADMIN_RSS_L2_SA	= 1,
+
+	/* ipv4/6 Dest Addr */
+	ENA_ADMIN_RSS_L3_DA	= 2,
+
+	/* ipv4/6 Src Addr */
+	ENA_ADMIN_RSS_L3_SA	= 5,
+
+	/* tcp/udp Dest Port */
+	ENA_ADMIN_RSS_L4_DP	= 6,
+
+	/* tcp/udp Src Port */
+	ENA_ADMIN_RSS_L4_SP	= 7,
+};
+
+struct ena_admin_proto_input {
+	/* flow hash fields (bitwise according to ena_admin_flow_hash_fields) */
+	u16 fields;
+
+	u16 reserved2;
+};
+
+struct ena_admin_feature_rss_hash_control {
+	struct ena_admin_proto_input supported_fields[ENA_ADMIN_RSS_PROTO_NUM];
+
+	struct ena_admin_proto_input selected_fields[ENA_ADMIN_RSS_PROTO_NUM];
+
+	struct ena_admin_proto_input reserved2[ENA_ADMIN_RSS_PROTO_NUM];
+
+	struct ena_admin_proto_input reserved3[ENA_ADMIN_RSS_PROTO_NUM];
+};
+
+struct ena_admin_feature_rss_flow_hash_input {
+	/* supported hash input sorting
+	 * 1 : L3_sort - support swap L3 addresses if DA is
+	 *    smaller than SA
+	 * 2 : L4_sort - support swap L4 ports if DP smaller
+	 *    SP
+	 */
+	u16 supported_input_sort;
+
+	/* enabled hash input sorting
+	 * 1 : enable_L3_sort - enable swap L3 addresses if
+	 *    DA smaller than SA
+	 * 2 : enable_L4_sort - enable swap L4 ports if DP
+	 *    smaller than SP
+	 */
+	u16 enabled_input_sort;
+};
+
+enum ena_admin_os_type {
+	ENA_ADMIN_OS_LINUX	= 1,
+
+	ENA_ADMIN_OS_WIN	= 2,
+
+	ENA_ADMIN_OS_DPDK	= 3,
+
+	ENA_ADMIN_OS_FREEBSD	= 4,
+
+	ENA_ADMIN_OS_IPXE	= 5,
+};
+
+struct ena_admin_host_info {
+	/* defined in enum ena_admin_os_type */
+	u32 os_type;
+
+	/* os distribution string format */
+	u8 os_dist_str[128];
+
+	/* OS distribution numeric format */
+	u32 os_dist;
+
+	/* kernel version string format */
+	u8 kernel_ver_str[32];
+
+	/* Kernel version numeric format */
+	u32 kernel_ver;
+
+	/* 7:0 : major
+	 * 15:8 : minor
+	 * 23:16 : sub_minor
+	 */
+	u32 driver_version;
+
+	/* features bitmap */
+	u32 supported_network_features[4];
+};
+
+struct ena_admin_rss_ind_table_entry {
+	u16 cq_idx;
+
+	u16 reserved;
+};
+
+struct ena_admin_feature_rss_ind_table {
+	/* min supported table size (2^min_size) */
+	u16 min_size;
+
+	/* max supported table size (2^max_size) */
+	u16 max_size;
+
+	/* table size (2^size) */
+	u16 size;
+
+	u16 reserved;
+
+	/* index of the inline entry. 0xFFFFFFFF means invalid */
+	u32 inline_index;
+
+	/* used for updating single entry, ignored when setting the entire
+	 * table through the control buffer.
+	 */
+	struct ena_admin_rss_ind_table_entry inline_entry;
+};
+
+struct ena_admin_get_feat_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	struct ena_admin_ctrl_buff_info control_buffer;
+
+	struct ena_admin_get_set_feature_common_desc feat_common;
+
+	u32 raw[11];
+};
+
+struct ena_admin_get_feat_resp {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	union {
+		u32 raw[14];
+
+		struct ena_admin_device_attr_feature_desc dev_attr;
+
+		struct ena_admin_queue_feature_desc max_queue;
+
+		struct ena_admin_feature_aenq_desc aenq;
+
+		struct ena_admin_get_feature_link_desc link;
+
+		struct ena_admin_feature_offload_desc offload;
+
+		struct ena_admin_feature_rss_flow_hash_function flow_hash_func;
+
+		struct ena_admin_feature_rss_flow_hash_input flow_hash_input;
+
+		struct ena_admin_feature_rss_ind_table ind_table;
+
+		struct ena_admin_feature_intr_moder_desc intr_moderation;
+	} u;
+};
+
+struct ena_admin_set_feat_cmd {
+	struct ena_admin_aq_common_desc aq_common_descriptor;
+
+	struct ena_admin_ctrl_buff_info control_buffer;
+
+	struct ena_admin_get_set_feature_common_desc feat_common;
+
+	union {
+		u32 raw[11];
+
+		/* mtu size */
+		struct ena_admin_set_feature_mtu_desc mtu;
+
+		/* host attributes */
+		struct ena_admin_set_feature_host_attr_desc host_attr;
+
+		/* AENQ configuration */
+		struct ena_admin_feature_aenq_desc aenq;
+
+		/* rss flow hash function */
+		struct ena_admin_feature_rss_flow_hash_function flow_hash_func;
+
+		/* rss flow hash input */
+		struct ena_admin_feature_rss_flow_hash_input flow_hash_input;
+
+		/* rss indirection table */
+		struct ena_admin_feature_rss_ind_table ind_table;
+	} u;
+};
+
+struct ena_admin_set_feat_resp {
+	struct ena_admin_acq_common_desc acq_common_desc;
+
+	union {
+		u32 raw[14];
+	} u;
+};
+
+struct ena_admin_aenq_common_desc {
+	u16 group;
+
+	u16 syndrom;
+
+	/* 0 : phase */
+	u8 flags;
+
+	u8 reserved1[3];
+
+	u32 timestamp_low;
+
+	u32 timestamp_high;
+};
+
+/* asynchronous event notification groups */
+enum ena_admin_aenq_group {
+	ENA_ADMIN_LINK_CHANGE		= 0,
+
+	ENA_ADMIN_FATAL_ERROR		= 1,
+
+	ENA_ADMIN_WARNING		= 2,
+
+	ENA_ADMIN_NOTIFICATION		= 3,
+
+	ENA_ADMIN_KEEP_ALIVE		= 4,
+
+	ENA_ADMIN_AENQ_GROUPS_NUM	= 5,
+};
+
+enum ena_admin_aenq_notification_syndrom {
+	ENA_ADMIN_SUSPEND	= 0,
+
+	ENA_ADMIN_RESUME	= 1,
+};
+
+struct ena_admin_aenq_entry {
+	struct ena_admin_aenq_common_desc aenq_common_desc;
+
+	/* command specific inline data */
+	u32 inline_data_w4[12];
+};
+
+struct ena_admin_aenq_link_change_desc {
+	struct ena_admin_aenq_common_desc aenq_common_desc;
+
+	/* 0 : link_status */
+	u32 flags;
+};
+
+struct ena_admin_ena_mmio_req_read_less_resp {
+	u16 req_id;
+
+	u16 reg_off;
+
+	/* value is valid when poll is cleared */
+	u32 reg_val;
+};
+
+/* aq_common_desc */
+#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1)
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2
+#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2)
+
+/* sq */
+#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5)
+
+/* acq_common_desc */
+#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0)
+#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aq_create_sq_cmd */
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4)
+#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0)
+
+/* aq_create_cq_cmd */
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
+#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
+
+/* get_set_feature_common_desc */
+#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0)
+
+/* get_feature_link_desc */
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0)
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1
+#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1)
+
+/* feature_offload_desc */
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_SHIFT 1
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_SHIFT 2
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3
+#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3)
+
+/* feature_rss_flow_hash_function */
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_SELECTED_FUNC_MASK GENMASK(7, 0)
+
+/* feature_rss_flow_hash_input */
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1)
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2
+#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2)
+
+/* host_info */
+#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0)
+#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8
+#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16
+#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16)
+
+/* aenq_common_desc */
+#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0)
+
+/* aenq_link_change_desc */
+#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0)
+
+#endif /*_ENA_ADMIN_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
new file mode 100644
index 0000000..3066d9c
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -0,0 +1,2666 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ena_com.h"
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* Timeout in micro-sec */
+#define ADMIN_CMD_TIMEOUT_US (1000000)
+
+#define ENA_ASYNC_QUEUE_DEPTH 4
+#define ENA_ADMIN_QUEUE_DEPTH 32
+
+#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \
+		ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \
+		| (ENA_COMMON_SPEC_VERSION_MINOR))
+
+#define ENA_CTRL_MAJOR		0
+#define ENA_CTRL_MINOR		0
+#define ENA_CTRL_SUB_MINOR	1
+
+#define MIN_ENA_CTRL_VER \
+	(((ENA_CTRL_MAJOR) << \
+	(ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \
+	((ENA_CTRL_MINOR) << \
+	(ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \
+	(ENA_CTRL_SUB_MINOR))
+
+#define ENA_DMA_ADDR_TO_UINT32_LOW(x)	((u32)((u64)(x)))
+#define ENA_DMA_ADDR_TO_UINT32_HIGH(x)	((u32)(((u64)(x)) >> 32))
+
+#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************/
+
+enum ena_cmd_status {
+	ENA_CMD_SUBMITTED,
+	ENA_CMD_COMPLETED,
+	/* Abort - canceled by the driver */
+	ENA_CMD_ABORTED,
+};
+
+struct ena_comp_ctx {
+	struct completion wait_event;
+	struct ena_admin_acq_entry *user_cqe;
+	u32 comp_size;
+	enum ena_cmd_status status;
+	/* status from the device */
+	u8 comp_status;
+	u8 cmd_opcode;
+	bool occupied;
+};
+
+struct ena_com_stats_ctx {
+	struct ena_admin_aq_get_stats_cmd get_cmd;
+	struct ena_admin_acq_get_stats_resp get_resp;
+};
+
+static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev,
+				       struct ena_common_mem_addr *ena_addr,
+				       dma_addr_t addr)
+{
+	if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) {
+		pr_err("dma address has more bits that the device supports\n");
+		return -EINVAL;
+	}
+
+	ena_addr->mem_addr_low = (u32)addr;
+	ena_addr->mem_addr_high = (u64)addr >> 32;
+
+	return 0;
+}
+
+static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue)
+{
+	struct ena_com_admin_sq *sq = &queue->sq;
+	u16 size = ADMIN_SQ_SIZE(queue->q_depth);
+
+	sq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &sq->dma_addr,
+					  GFP_KERNEL);
+
+	if (!sq->entries) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	sq->head = 0;
+	sq->tail = 0;
+	sq->phase = 1;
+
+	sq->db_addr = NULL;
+
+	return 0;
+}
+
+static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue)
+{
+	struct ena_com_admin_cq *cq = &queue->cq;
+	u16 size = ADMIN_CQ_SIZE(queue->q_depth);
+
+	cq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &cq->dma_addr,
+					  GFP_KERNEL);
+
+	if (!cq->entries) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	cq->head = 0;
+	cq->phase = 1;
+
+	return 0;
+}
+
+static int ena_com_admin_init_aenq(struct ena_com_dev *dev,
+				   struct ena_aenq_handlers *aenq_handlers)
+{
+	struct ena_com_aenq *aenq = &dev->aenq;
+	u32 addr_low, addr_high, aenq_caps;
+	u16 size;
+
+	dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH;
+	size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH);
+	aenq->entries = dma_zalloc_coherent(dev->dmadev, size, &aenq->dma_addr,
+					    GFP_KERNEL);
+
+	if (!aenq->entries) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	aenq->head = aenq->q_depth;
+	aenq->phase = 1;
+
+	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr);
+	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr);
+
+	writel(addr_low, dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF);
+	writel(addr_high, dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF);
+
+	aenq_caps = 0;
+	aenq_caps |= dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK;
+	aenq_caps |= (sizeof(struct ena_admin_aenq_entry)
+		      << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) &
+		     ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK;
+	writel(aenq_caps, dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF);
+
+	if (unlikely(!aenq_handlers)) {
+		pr_err("aenq handlers pointer is NULL\n");
+		return -EINVAL;
+	}
+
+	aenq->aenq_handlers = aenq_handlers;
+
+	return 0;
+}
+
+static inline void comp_ctxt_release(struct ena_com_admin_queue *queue,
+				     struct ena_comp_ctx *comp_ctx)
+{
+	comp_ctx->occupied = false;
+	atomic_dec(&queue->outstanding_cmds);
+}
+
+static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue,
+					  u16 command_id, bool capture)
+{
+	if (unlikely(command_id >= queue->q_depth)) {
+		pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n",
+		       command_id, queue->q_depth);
+		return NULL;
+	}
+
+	if (unlikely(queue->comp_ctx[command_id].occupied && capture)) {
+		pr_err("Completion context is occupied\n");
+		return NULL;
+	}
+
+	if (capture) {
+		atomic_inc(&queue->outstanding_cmds);
+		queue->comp_ctx[command_id].occupied = true;
+	}
+
+	return &queue->comp_ctx[command_id];
+}
+
+static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
+						       struct ena_admin_aq_entry *cmd,
+						       size_t cmd_size_in_bytes,
+						       struct ena_admin_acq_entry *comp,
+						       size_t comp_size_in_bytes)
+{
+	struct ena_comp_ctx *comp_ctx;
+	u16 tail_masked, cmd_id;
+	u16 queue_size_mask;
+	u16 cnt;
+
+	queue_size_mask = admin_queue->q_depth - 1;
+
+	tail_masked = admin_queue->sq.tail & queue_size_mask;
+
+	/* In case of queue FULL */
+	cnt = admin_queue->sq.tail - admin_queue->sq.head;
+	if (cnt >= admin_queue->q_depth) {
+		pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n",
+			 admin_queue->sq.tail, admin_queue->sq.head,
+			 admin_queue->q_depth);
+		admin_queue->stats.out_of_space++;
+		return ERR_PTR(-ENOSPC);
+	}
+
+	cmd_id = admin_queue->curr_cmd_id;
+
+	cmd->aq_common_descriptor.flags |= admin_queue->sq.phase &
+		ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK;
+
+	cmd->aq_common_descriptor.command_id |= cmd_id &
+		ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK;
+
+	comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true);
+	if (unlikely(!comp_ctx))
+		return ERR_PTR(-EINVAL);
+
+	comp_ctx->status = ENA_CMD_SUBMITTED;
+	comp_ctx->comp_size = (u32)comp_size_in_bytes;
+	comp_ctx->user_cqe = comp;
+	comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode;
+
+	reinit_completion(&comp_ctx->wait_event);
+
+	memcpy(&admin_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes);
+
+	admin_queue->curr_cmd_id = (admin_queue->curr_cmd_id + 1) &
+		queue_size_mask;
+
+	admin_queue->sq.tail++;
+	admin_queue->stats.submitted_cmd++;
+
+	if (unlikely((admin_queue->sq.tail & queue_size_mask) == 0))
+		admin_queue->sq.phase = !admin_queue->sq.phase;
+
+	writel(admin_queue->sq.tail, admin_queue->sq.db_addr);
+
+	return comp_ctx;
+}
+
+static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue)
+{
+	size_t size = queue->q_depth * sizeof(struct ena_comp_ctx);
+	struct ena_comp_ctx *comp_ctx;
+	u16 i;
+
+	queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL);
+	if (unlikely(!queue->comp_ctx)) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < queue->q_depth; i++) {
+		comp_ctx = get_comp_ctxt(queue, i, false);
+		if (comp_ctx)
+			init_completion(&comp_ctx->wait_event);
+	}
+
+	return 0;
+}
+
+static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue,
+						     struct ena_admin_aq_entry *cmd,
+						     size_t cmd_size_in_bytes,
+						     struct ena_admin_acq_entry *comp,
+						     size_t comp_size_in_bytes)
+{
+	unsigned long flags;
+	struct ena_comp_ctx *comp_ctx;
+
+	spin_lock_irqsave(&admin_queue->q_lock, flags);
+	if (unlikely(!admin_queue->running_state)) {
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+		return ERR_PTR(-ENODEV);
+	}
+	comp_ctx = __ena_com_submit_admin_cmd(admin_queue, cmd,
+					      cmd_size_in_bytes,
+					      comp,
+					      comp_size_in_bytes);
+	if (unlikely(IS_ERR(comp_ctx)))
+		admin_queue->running_state = false;
+	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+	return comp_ctx;
+}
+
+static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
+			      struct ena_com_create_io_ctx *ctx,
+			      struct ena_com_io_sq *io_sq)
+{
+	size_t size;
+	int dev_node = 0;
+
+	memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+
+	io_sq->desc_entry_size =
+		(io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+		sizeof(struct ena_eth_io_tx_desc) :
+		sizeof(struct ena_eth_io_rx_desc);
+
+	size = io_sq->desc_entry_size * io_sq->q_depth;
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+		dev_node = dev_to_node(ena_dev->dmadev);
+		set_dev_node(ena_dev->dmadev, ctx->numa_node);
+		io_sq->desc_addr.virt_addr =
+			dma_zalloc_coherent(ena_dev->dmadev, size,
+					    &io_sq->desc_addr.phys_addr,
+					    GFP_KERNEL);
+		set_dev_node(ena_dev->dmadev, dev_node);
+		if (!io_sq->desc_addr.virt_addr) {
+			io_sq->desc_addr.virt_addr =
+				dma_zalloc_coherent(ena_dev->dmadev, size,
+						    &io_sq->desc_addr.phys_addr,
+						    GFP_KERNEL);
+		}
+	} else {
+		dev_node = dev_to_node(ena_dev->dmadev);
+		set_dev_node(ena_dev->dmadev, ctx->numa_node);
+		io_sq->desc_addr.virt_addr =
+			devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+		set_dev_node(ena_dev->dmadev, dev_node);
+		if (!io_sq->desc_addr.virt_addr) {
+			io_sq->desc_addr.virt_addr =
+				devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+		}
+	}
+
+	if (!io_sq->desc_addr.virt_addr) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	io_sq->tail = 0;
+	io_sq->next_to_comp = 0;
+	io_sq->phase = 1;
+
+	return 0;
+}
+
+static int ena_com_init_io_cq(struct ena_com_dev *ena_dev,
+			      struct ena_com_create_io_ctx *ctx,
+			      struct ena_com_io_cq *io_cq)
+{
+	size_t size;
+	int prev_node = 0;
+
+	memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr));
+
+	/* Use the basic completion descriptor for Rx */
+	io_cq->cdesc_entry_size_in_bytes =
+		(io_cq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ?
+		sizeof(struct ena_eth_io_tx_cdesc) :
+		sizeof(struct ena_eth_io_rx_cdesc_base);
+
+	size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+
+	prev_node = dev_to_node(ena_dev->dmadev);
+	set_dev_node(ena_dev->dmadev, ctx->numa_node);
+	io_cq->cdesc_addr.virt_addr =
+		dma_zalloc_coherent(ena_dev->dmadev, size,
+				    &io_cq->cdesc_addr.phys_addr, GFP_KERNEL);
+	set_dev_node(ena_dev->dmadev, prev_node);
+	if (!io_cq->cdesc_addr.virt_addr) {
+		io_cq->cdesc_addr.virt_addr =
+			dma_zalloc_coherent(ena_dev->dmadev, size,
+					    &io_cq->cdesc_addr.phys_addr,
+					    GFP_KERNEL);
+	}
+
+	if (!io_cq->cdesc_addr.virt_addr) {
+		pr_err("memory allocation failed");
+		return -ENOMEM;
+	}
+
+	io_cq->phase = 1;
+	io_cq->head = 0;
+
+	return 0;
+}
+
+static void ena_com_handle_single_admin_completion(struct ena_com_admin_queue *admin_queue,
+						   struct ena_admin_acq_entry *cqe)
+{
+	struct ena_comp_ctx *comp_ctx;
+	u16 cmd_id;
+
+	cmd_id = cqe->acq_common_descriptor.command &
+		ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK;
+
+	comp_ctx = get_comp_ctxt(admin_queue, cmd_id, false);
+	if (unlikely(!comp_ctx)) {
+		pr_err("comp_ctx is NULL. Changing the admin queue running state\n");
+		admin_queue->running_state = false;
+		return;
+	}
+
+	comp_ctx->status = ENA_CMD_COMPLETED;
+	comp_ctx->comp_status = cqe->acq_common_descriptor.status;
+
+	if (comp_ctx->user_cqe)
+		memcpy(comp_ctx->user_cqe, (void *)cqe, comp_ctx->comp_size);
+
+	if (!admin_queue->polling)
+		complete(&comp_ctx->wait_event);
+}
+
+static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_queue)
+{
+	struct ena_admin_acq_entry *cqe = NULL;
+	u16 comp_num = 0;
+	u16 head_masked;
+	u8 phase;
+
+	head_masked = admin_queue->cq.head & (admin_queue->q_depth - 1);
+	phase = admin_queue->cq.phase;
+
+	cqe = &admin_queue->cq.entries[head_masked];
+
+	/* Go over all the completions */
+	while ((cqe->acq_common_descriptor.flags &
+			ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) {
+		/* Do not read the rest of the completion entry before the
+		 * phase bit was validated
+		 */
+		rmb();
+		ena_com_handle_single_admin_completion(admin_queue, cqe);
+
+		head_masked++;
+		comp_num++;
+		if (unlikely(head_masked == admin_queue->q_depth)) {
+			head_masked = 0;
+			phase = !phase;
+		}
+
+		cqe = &admin_queue->cq.entries[head_masked];
+	}
+
+	admin_queue->cq.head += comp_num;
+	admin_queue->cq.phase = phase;
+	admin_queue->sq.head += comp_num;
+	admin_queue->stats.completed_cmd += comp_num;
+}
+
+static int ena_com_comp_status_to_errno(u8 comp_status)
+{
+	if (unlikely(comp_status != 0))
+		pr_err("admin command failed[%u]\n", comp_status);
+
+	if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR))
+		return -EINVAL;
+
+	switch (comp_status) {
+	case ENA_ADMIN_SUCCESS:
+		return 0;
+	case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE:
+		return -ENOMEM;
+	case ENA_ADMIN_UNSUPPORTED_OPCODE:
+		return -EPERM;
+	case ENA_ADMIN_BAD_OPCODE:
+	case ENA_ADMIN_MALFORMED_REQUEST:
+	case ENA_ADMIN_ILLEGAL_PARAMETER:
+	case ENA_ADMIN_UNKNOWN_ERROR:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
+						     struct ena_com_admin_queue *admin_queue)
+{
+	unsigned long flags;
+	u32 start_time;
+	int ret;
+
+	start_time = ((u32)jiffies_to_usecs(jiffies));
+
+	while (comp_ctx->status == ENA_CMD_SUBMITTED) {
+		if ((((u32)jiffies_to_usecs(jiffies)) - start_time) >
+		    ADMIN_CMD_TIMEOUT_US) {
+			pr_err("Wait for completion (polling) timeout\n");
+			/* ENA didn't have any completion */
+			spin_lock_irqsave(&admin_queue->q_lock, flags);
+			admin_queue->stats.no_completion++;
+			admin_queue->running_state = false;
+			spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+			ret = -ETIME;
+			goto err;
+		}
+
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+		ena_com_handle_admin_completion(admin_queue);
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+		msleep(100);
+	}
+
+	if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
+		pr_err("Command was aborted\n");
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+		admin_queue->stats.aborted_cmd++;
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n",
+	     comp_ctx->status);
+
+	ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
+err:
+	comp_ctxt_release(admin_queue, comp_ctx);
+	return ret;
+}
+
+static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx,
+							struct ena_com_admin_queue *admin_queue)
+{
+	unsigned long flags;
+	int ret;
+
+	wait_for_completion_timeout(&comp_ctx->wait_event,
+				    usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US));
+
+	/* In case the command wasn't completed find out the root cause.
+	 * There might be 2 kinds of errors
+	 * 1) No completion (timeout reached)
+	 * 2) There is completion but the device didn't get any msi-x interrupt.
+	 */
+	if (unlikely(comp_ctx->status == ENA_CMD_SUBMITTED)) {
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+		ena_com_handle_admin_completion(admin_queue);
+		admin_queue->stats.no_completion++;
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+
+		if (comp_ctx->status == ENA_CMD_COMPLETED)
+			pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n",
+			       comp_ctx->cmd_opcode);
+		else
+			pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n",
+			       comp_ctx->cmd_opcode, comp_ctx->status);
+
+		admin_queue->running_state = false;
+		ret = -ETIME;
+		goto err;
+	}
+
+	ret = ena_com_comp_status_to_errno(comp_ctx->comp_status);
+err:
+	comp_ctxt_release(admin_queue, comp_ctx);
+	return ret;
+}
+
+/* This method read the hardware device register through posting writes
+ * and waiting for response
+ * On timeout the function will return ENA_MMIO_READ_TIMEOUT
+ */
+static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+	volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp =
+		mmio_read->read_resp;
+	u32 mmio_read_reg, ret;
+	unsigned long flags;
+	int i;
+
+	might_sleep();
+
+	/* If readless is disabled, perform regular read */
+	if (!mmio_read->readless_supported)
+		return readl(ena_dev->reg_bar + offset);
+
+	spin_lock_irqsave(&mmio_read->lock, flags);
+	mmio_read->seq_num++;
+
+	read_resp->req_id = mmio_read->seq_num + 0xDEAD;
+	mmio_read_reg = (offset << ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) &
+			ENA_REGS_MMIO_REG_READ_REG_OFF_MASK;
+	mmio_read_reg |= mmio_read->seq_num &
+			ENA_REGS_MMIO_REG_READ_REQ_ID_MASK;
+
+	/* make sure read_resp->req_id get updated before the hw can write
+	 * there
+	 */
+	wmb();
+
+	writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+
+	for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) {
+		if (read_resp->req_id == mmio_read->seq_num)
+			break;
+
+		udelay(1);
+	}
+
+	if (unlikely(i == ENA_REG_READ_TIMEOUT)) {
+		pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n",
+		       mmio_read->seq_num, offset, read_resp->req_id,
+		       read_resp->reg_off);
+		ret = ENA_MMIO_READ_TIMEOUT;
+		goto err;
+	}
+
+	if (read_resp->reg_off != offset) {
+		pr_err("Read failure: wrong offset provided");
+		ret = ENA_MMIO_READ_TIMEOUT;
+	} else {
+		ret = read_resp->reg_val;
+	}
+err:
+	spin_unlock_irqrestore(&mmio_read->lock, flags);
+
+	return ret;
+}
+
+/* There are two types to wait for completion.
+ * Polling mode - wait until the completion is available.
+ * Async mode - wait on wait queue until the completion is ready
+ * (or the timeout expired).
+ * It is expected that the IRQ called ena_com_handle_admin_completion
+ * to mark the completions.
+ */
+static int ena_com_wait_and_process_admin_cq(struct ena_comp_ctx *comp_ctx,
+					     struct ena_com_admin_queue *admin_queue)
+{
+	if (admin_queue->polling)
+		return ena_com_wait_and_process_admin_cq_polling(comp_ctx,
+								 admin_queue);
+
+	return ena_com_wait_and_process_admin_cq_interrupts(comp_ctx,
+							    admin_queue);
+}
+
+static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev,
+				 struct ena_com_io_sq *io_sq)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_admin_aq_destroy_sq_cmd destroy_cmd;
+	struct ena_admin_acq_destroy_sq_resp_desc destroy_resp;
+	u8 direction;
+	int ret;
+
+	memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+
+	if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+		direction = ENA_ADMIN_SQ_DIRECTION_TX;
+	else
+		direction = ENA_ADMIN_SQ_DIRECTION_RX;
+
+	destroy_cmd.sq.sq_identity |= (direction <<
+		ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT) &
+		ENA_ADMIN_SQ_SQ_DIRECTION_MASK;
+
+	destroy_cmd.sq.sq_idx = io_sq->idx;
+	destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_SQ;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&destroy_cmd,
+					    sizeof(destroy_cmd),
+					    (struct ena_admin_acq_entry *)&destroy_resp,
+					    sizeof(destroy_resp));
+
+	if (unlikely(ret && (ret != -ENODEV)))
+		pr_err("failed to destroy io sq error: %d\n", ret);
+
+	return ret;
+}
+
+static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
+				  struct ena_com_io_sq *io_sq,
+				  struct ena_com_io_cq *io_cq)
+{
+	size_t size;
+
+	if (io_cq->cdesc_addr.virt_addr) {
+		size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth;
+
+		dma_free_coherent(ena_dev->dmadev, size,
+				  io_cq->cdesc_addr.virt_addr,
+				  io_cq->cdesc_addr.phys_addr);
+
+		io_cq->cdesc_addr.virt_addr = NULL;
+	}
+
+	if (io_sq->desc_addr.virt_addr) {
+		size = io_sq->desc_entry_size * io_sq->q_depth;
+
+		if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+			dma_free_coherent(ena_dev->dmadev, size,
+					  io_sq->desc_addr.virt_addr,
+					  io_sq->desc_addr.phys_addr);
+		else
+			devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr);
+
+		io_sq->desc_addr.virt_addr = NULL;
+	}
+}
+
+static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
+				u16 exp_state)
+{
+	u32 val, i;
+
+	for (i = 0; i < timeout; i++) {
+		val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+
+		if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) {
+			pr_err("Reg read timeout occurred\n");
+			return -ETIME;
+		}
+
+		if ((val & ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) ==
+			exp_state)
+			return 0;
+
+		/* The resolution of the timeout is 100ms */
+		msleep(100);
+	}
+
+	return -ETIME;
+}
+
+static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev,
+					       enum ena_admin_aq_feature_id feature_id)
+{
+	u32 feature_mask = 1 << feature_id;
+
+	/* Device attributes is always supported */
+	if ((feature_id != ENA_ADMIN_DEVICE_ATTRIBUTES) &&
+	    !(ena_dev->supported_features & feature_mask))
+		return false;
+
+	return true;
+}
+
+static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev,
+				  struct ena_admin_get_feat_resp *get_resp,
+				  enum ena_admin_aq_feature_id feature_id,
+				  dma_addr_t control_buf_dma_addr,
+				  u32 control_buff_size)
+{
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_get_feat_cmd get_cmd;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) {
+		pr_info("Feature %d isn't supported\n", feature_id);
+		return -EPERM;
+	}
+
+	memset(&get_cmd, 0x0, sizeof(get_cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	get_cmd.aq_common_descriptor.opcode = ENA_ADMIN_GET_FEATURE;
+
+	if (control_buff_size)
+		get_cmd.aq_common_descriptor.flags =
+			ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	else
+		get_cmd.aq_common_descriptor.flags = 0;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &get_cmd.control_buffer.address,
+				   control_buf_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	get_cmd.control_buffer.length = control_buff_size;
+
+	get_cmd.feat_common.feature_id = feature_id;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)
+					    &get_cmd,
+					    sizeof(get_cmd),
+					    (struct ena_admin_acq_entry *)
+					    get_resp,
+					    sizeof(*get_resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to submit get_feature command %d error: %d\n",
+		       feature_id, ret);
+
+	return ret;
+}
+
+static int ena_com_get_feature(struct ena_com_dev *ena_dev,
+			       struct ena_admin_get_feat_resp *get_resp,
+			       enum ena_admin_aq_feature_id feature_id)
+{
+	return ena_com_get_feature_ex(ena_dev,
+				      get_resp,
+				      feature_id,
+				      0,
+				      0);
+}
+
+static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	rss->hash_key =
+		dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+				    &rss->hash_key_dma_addr, GFP_KERNEL);
+
+	if (unlikely(!rss->hash_key))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	if (rss->hash_key)
+		dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
+				  rss->hash_key, rss->hash_key_dma_addr);
+	rss->hash_key = NULL;
+}
+
+static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	rss->hash_ctrl =
+		dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+				    &rss->hash_ctrl_dma_addr, GFP_KERNEL);
+
+	if (unlikely(!rss->hash_ctrl))
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	if (rss->hash_ctrl)
+		dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl),
+				  rss->hash_ctrl, rss->hash_ctrl_dma_addr);
+	rss->hash_ctrl = NULL;
+}
+
+static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev,
+					   u16 log_size)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	size_t tbl_size;
+	int ret;
+
+	ret = ena_com_get_feature(ena_dev, &get_resp,
+				  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+	if (unlikely(ret))
+		return ret;
+
+	if ((get_resp.u.ind_table.min_size > log_size) ||
+	    (get_resp.u.ind_table.max_size < log_size)) {
+		pr_err("indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n",
+		       1 << log_size, 1 << get_resp.u.ind_table.min_size,
+		       1 << get_resp.u.ind_table.max_size);
+		return -EINVAL;
+	}
+
+	tbl_size = (1ULL << log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	rss->rss_ind_tbl =
+		dma_zalloc_coherent(ena_dev->dmadev, tbl_size,
+				    &rss->rss_ind_tbl_dma_addr, GFP_KERNEL);
+	if (unlikely(!rss->rss_ind_tbl))
+		goto mem_err1;
+
+	tbl_size = (1ULL << log_size) * sizeof(u16);
+	rss->host_rss_ind_tbl =
+		devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL);
+	if (unlikely(!rss->host_rss_ind_tbl))
+		goto mem_err2;
+
+	rss->tbl_log_size = log_size;
+
+	return 0;
+
+mem_err2:
+	tbl_size = (1ULL << log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
+			  rss->rss_ind_tbl_dma_addr);
+	rss->rss_ind_tbl = NULL;
+mem_err1:
+	rss->tbl_log_size = 0;
+	return -ENOMEM;
+}
+
+static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	size_t tbl_size = (1ULL << rss->tbl_log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	if (rss->rss_ind_tbl)
+		dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl,
+				  rss->rss_ind_tbl_dma_addr);
+	rss->rss_ind_tbl = NULL;
+
+	if (rss->host_rss_ind_tbl)
+		devm_kfree(ena_dev->dmadev, rss->host_rss_ind_tbl);
+	rss->host_rss_ind_tbl = NULL;
+}
+
+static int ena_com_create_io_sq(struct ena_com_dev *ena_dev,
+				struct ena_com_io_sq *io_sq, u16 cq_idx)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_admin_aq_create_sq_cmd create_cmd;
+	struct ena_admin_acq_create_sq_resp_desc cmd_completion;
+	u8 direction;
+	int ret;
+
+	memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd));
+
+	create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ;
+
+	if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+		direction = ENA_ADMIN_SQ_DIRECTION_TX;
+	else
+		direction = ENA_ADMIN_SQ_DIRECTION_RX;
+
+	create_cmd.sq_identity |= (direction <<
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT) &
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK;
+
+	create_cmd.sq_caps_2 |= io_sq->mem_queue_type &
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK;
+
+	create_cmd.sq_caps_2 |= (ENA_ADMIN_COMPLETION_POLICY_DESC <<
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT) &
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK;
+
+	create_cmd.sq_caps_3 |=
+		ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK;
+
+	create_cmd.cq_idx = cq_idx;
+	create_cmd.sq_depth = io_sq->q_depth;
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) {
+		ret = ena_com_mem_addr_set(ena_dev,
+					   &create_cmd.sq_ba,
+					   io_sq->desc_addr.phys_addr);
+		if (unlikely(ret)) {
+			pr_err("memory address set failed\n");
+			return ret;
+		}
+	}
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&create_cmd,
+					    sizeof(create_cmd),
+					    (struct ena_admin_acq_entry *)&cmd_completion,
+					    sizeof(cmd_completion));
+	if (unlikely(ret)) {
+		pr_err("Failed to create IO SQ. error: %d\n", ret);
+		return ret;
+	}
+
+	io_sq->idx = cmd_completion.sq_idx;
+
+	io_sq->db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+		(uintptr_t)cmd_completion.sq_doorbell_offset);
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar
+				+ cmd_completion.llq_headers_offset);
+
+		io_sq->desc_addr.pbuf_dev_addr =
+			(u8 __iomem *)((uintptr_t)ena_dev->mem_bar +
+			cmd_completion.llq_descriptors_offset);
+	}
+
+	pr_debug("created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth);
+
+	return ret;
+}
+
+static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_com_io_sq *io_sq;
+	u16 qid;
+	int i;
+
+	for (i = 0; i < 1 << rss->tbl_log_size; i++) {
+		qid = rss->host_rss_ind_tbl[i];
+		if (qid >= ENA_TOTAL_NUM_QUEUES)
+			return -EINVAL;
+
+		io_sq = &ena_dev->io_sq_queues[qid];
+
+		if (io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX)
+			return -EINVAL;
+
+		rss->rss_ind_tbl[i].cq_idx = io_sq->idx;
+	}
+
+	return 0;
+}
+
+static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev)
+{
+	u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 };
+	struct ena_rss *rss = &ena_dev->rss;
+	u8 idx;
+	u16 i;
+
+	for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++)
+		dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i;
+
+	for (i = 0; i < 1 << rss->tbl_log_size; i++) {
+		if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES)
+			return -EINVAL;
+		idx = (u8)rss->rss_ind_tbl[i].cq_idx;
+
+		if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES)
+			return -EINVAL;
+
+		rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx];
+	}
+
+	return 0;
+}
+
+static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev)
+{
+	size_t size;
+
+	size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS;
+
+	ena_dev->intr_moder_tbl =
+		devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL);
+	if (!ena_dev->intr_moder_tbl)
+		return -ENOMEM;
+
+	ena_com_config_default_interrupt_moderation_table(ena_dev);
+
+	return 0;
+}
+
+static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev,
+						 u16 intr_delay_resolution)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+	unsigned int i;
+
+	if (!intr_delay_resolution) {
+		pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n");
+		intr_delay_resolution = 1;
+	}
+	ena_dev->intr_delay_resolution = intr_delay_resolution;
+
+	/* update Rx */
+	for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++)
+		intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution;
+
+	/* update Tx */
+	ena_dev->intr_moder_tx_interval /= intr_delay_resolution;
+}
+
+/*****************************************************************************/
+/*******************************      API       ******************************/
+/*****************************************************************************/
+
+int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
+				  struct ena_admin_aq_entry *cmd,
+				  size_t cmd_size,
+				  struct ena_admin_acq_entry *comp,
+				  size_t comp_size)
+{
+	struct ena_comp_ctx *comp_ctx;
+	int ret;
+
+	comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size,
+					    comp, comp_size);
+	if (unlikely(IS_ERR(comp_ctx))) {
+		pr_err("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx));
+		return PTR_ERR(comp_ctx);
+	}
+
+	ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue);
+	if (unlikely(ret)) {
+		if (admin_queue->running_state)
+			pr_err("Failed to process command. ret = %d\n", ret);
+		else
+			pr_debug("Failed to process command. ret = %d\n", ret);
+	}
+	return ret;
+}
+
+int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
+			 struct ena_com_io_cq *io_cq)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_admin_aq_create_cq_cmd create_cmd;
+	struct ena_admin_acq_create_cq_resp_desc cmd_completion;
+	int ret;
+
+	memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd));
+
+	create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ;
+
+	create_cmd.cq_caps_2 |= (io_cq->cdesc_entry_size_in_bytes / 4) &
+		ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK;
+	create_cmd.cq_caps_1 |=
+		ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK;
+
+	create_cmd.msix_vector = io_cq->msix_vector;
+	create_cmd.cq_depth = io_cq->q_depth;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &create_cmd.cq_ba,
+				   io_cq->cdesc_addr.phys_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&create_cmd,
+					    sizeof(create_cmd),
+					    (struct ena_admin_acq_entry *)&cmd_completion,
+					    sizeof(cmd_completion));
+	if (unlikely(ret)) {
+		pr_err("Failed to create IO CQ. error: %d\n", ret);
+		return ret;
+	}
+
+	io_cq->idx = cmd_completion.cq_idx;
+
+	io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+		cmd_completion.cq_interrupt_unmask_register_offset);
+
+	if (cmd_completion.cq_head_db_register_offset)
+		io_cq->cq_head_db_reg =
+			(u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+			cmd_completion.cq_head_db_register_offset);
+
+	if (cmd_completion.numa_node_register_offset)
+		io_cq->numa_node_cfg_reg =
+			(u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+			cmd_completion.numa_node_register_offset);
+
+	pr_debug("created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth);
+
+	return ret;
+}
+
+int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
+			    struct ena_com_io_sq **io_sq,
+			    struct ena_com_io_cq **io_cq)
+{
+	if (qid >= ENA_TOTAL_NUM_QUEUES) {
+		pr_err("Invalid queue number %d but the max is %d\n", qid,
+		       ENA_TOTAL_NUM_QUEUES);
+		return -EINVAL;
+	}
+
+	*io_sq = &ena_dev->io_sq_queues[qid];
+	*io_cq = &ena_dev->io_cq_queues[qid];
+
+	return 0;
+}
+
+void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_comp_ctx *comp_ctx;
+	u16 i;
+
+	if (!admin_queue->comp_ctx)
+		return;
+
+	for (i = 0; i < admin_queue->q_depth; i++) {
+		comp_ctx = get_comp_ctxt(admin_queue, i, false);
+		if (unlikely(!comp_ctx))
+			break;
+
+		comp_ctx->status = ENA_CMD_ABORTED;
+
+		complete(&comp_ctx->wait_event);
+	}
+}
+
+void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(&admin_queue->q_lock, flags);
+	while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
+		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+		msleep(20);
+		spin_lock_irqsave(&admin_queue->q_lock, flags);
+	}
+	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+}
+
+int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
+			  struct ena_com_io_cq *io_cq)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_admin_aq_destroy_cq_cmd destroy_cmd;
+	struct ena_admin_acq_destroy_cq_resp_desc destroy_resp;
+	int ret;
+
+	memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd));
+
+	destroy_cmd.cq_idx = io_cq->idx;
+	destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&destroy_cmd,
+					    sizeof(destroy_cmd),
+					    (struct ena_admin_acq_entry *)&destroy_resp,
+					    sizeof(destroy_resp));
+
+	if (unlikely(ret && (ret != -ENODEV)))
+		pr_err("Failed to destroy IO CQ. error: %d\n", ret);
+
+	return ret;
+}
+
+bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev)
+{
+	return ena_dev->admin_queue.running_state;
+}
+
+void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	unsigned long flags;
+
+	spin_lock_irqsave(&admin_queue->q_lock, flags);
+	ena_dev->admin_queue.running_state = state;
+	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
+}
+
+void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev)
+{
+	u16 depth = ena_dev->aenq.q_depth;
+
+	WARN(ena_dev->aenq.head != depth, "Invalid AENQ state\n");
+
+	/* Init head_db to mark that all entries in the queue
+	 * are initially available
+	 */
+	writel(depth, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+}
+
+int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag)
+{
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	struct ena_admin_get_feat_resp get_resp;
+	int ret;
+
+	ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG);
+	if (ret) {
+		pr_info("Can't get aenq configuration\n");
+		return ret;
+	}
+
+	if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) {
+		pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n",
+			get_resp.u.aenq.supported_groups, groups_flag);
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags = 0;
+	cmd.feat_common.feature_id = ENA_ADMIN_AENQ_CONFIG;
+	cmd.u.aenq.enabled_groups = groups_flag;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to config AENQ ret: %d\n", ret);
+
+	return ret;
+}
+
+int ena_com_get_dma_width(struct ena_com_dev *ena_dev)
+{
+	u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
+	int width;
+
+	if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) {
+		pr_err("Reg read timeout occurred\n");
+		return -ETIME;
+	}
+
+	width = (caps & ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >>
+		ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT;
+
+	pr_debug("ENA dma width: %d\n", width);
+
+	if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) {
+		pr_err("DMA width illegal value: %d\n", width);
+		return -EINVAL;
+	}
+
+	ena_dev->dma_addr_bits = width;
+
+	return width;
+}
+
+int ena_com_validate_version(struct ena_com_dev *ena_dev)
+{
+	u32 ver;
+	u32 ctrl_ver;
+	u32 ctrl_ver_masked;
+
+	/* Make sure the ENA version and the controller version are at least
+	 * as the driver expects
+	 */
+	ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_VERSION_OFF);
+	ctrl_ver = ena_com_reg_bar_read32(ena_dev,
+					  ENA_REGS_CONTROLLER_VERSION_OFF);
+
+	if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) ||
+		     (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) {
+		pr_err("Reg read timeout occurred\n");
+		return -ETIME;
+	}
+
+	pr_info("ena device version: %d.%d\n",
+		(ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >>
+			ENA_REGS_VERSION_MAJOR_VERSION_SHIFT,
+		ver & ENA_REGS_VERSION_MINOR_VERSION_MASK);
+
+	if (ver < MIN_ENA_VER) {
+		pr_err("ENA version is lower than the minimal version the driver supports\n");
+		return -1;
+	}
+
+	pr_info("ena controller version: %d.%d.%d implementation version %d\n",
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >>
+			ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT,
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >>
+			ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT,
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK),
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >>
+			ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT);
+
+	ctrl_ver_masked =
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) |
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) |
+		(ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK);
+
+	/* Validate the ctrl version without the implementation ID */
+	if (ctrl_ver_masked < MIN_ENA_CTRL_VER) {
+		pr_err("ENA ctrl version is lower than the minimal ctrl version the driver supports\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void ena_com_admin_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_com_admin_cq *cq = &admin_queue->cq;
+	struct ena_com_admin_sq *sq = &admin_queue->sq;
+	struct ena_com_aenq *aenq = &ena_dev->aenq;
+	u16 size;
+
+	if (admin_queue->comp_ctx)
+		devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx);
+	admin_queue->comp_ctx = NULL;
+	size = ADMIN_SQ_SIZE(admin_queue->q_depth);
+	if (sq->entries)
+		dma_free_coherent(ena_dev->dmadev, size, sq->entries,
+				  sq->dma_addr);
+	sq->entries = NULL;
+
+	size = ADMIN_CQ_SIZE(admin_queue->q_depth);
+	if (cq->entries)
+		dma_free_coherent(ena_dev->dmadev, size, cq->entries,
+				  cq->dma_addr);
+	cq->entries = NULL;
+
+	size = ADMIN_AENQ_SIZE(aenq->q_depth);
+	if (ena_dev->aenq.entries)
+		dma_free_coherent(ena_dev->dmadev, size, aenq->entries,
+				  aenq->dma_addr);
+	aenq->entries = NULL;
+}
+
+void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling)
+{
+	ena_dev->admin_queue.polling = polling;
+}
+
+int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+	spin_lock_init(&mmio_read->lock);
+	mmio_read->read_resp =
+		dma_zalloc_coherent(ena_dev->dmadev,
+				    sizeof(*mmio_read->read_resp),
+				    &mmio_read->read_resp_dma_addr, GFP_KERNEL);
+	if (unlikely(!mmio_read->read_resp))
+		return -ENOMEM;
+
+	ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
+
+	mmio_read->read_resp->req_id = 0x0;
+	mmio_read->seq_num = 0x0;
+	mmio_read->readless_supported = true;
+
+	return 0;
+}
+
+void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+	mmio_read->readless_supported = readless_supported;
+}
+
+void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+
+	writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
+	writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
+
+	dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp),
+			  mmio_read->read_resp, mmio_read->read_resp_dma_addr);
+
+	mmio_read->read_resp = NULL;
+}
+
+void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read;
+	u32 addr_low, addr_high;
+
+	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(mmio_read->read_resp_dma_addr);
+	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(mmio_read->read_resp_dma_addr);
+
+	writel(addr_low, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF);
+	writel(addr_high, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF);
+}
+
+int ena_com_admin_init(struct ena_com_dev *ena_dev,
+		       struct ena_aenq_handlers *aenq_handlers,
+		       bool init_spinlock)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high;
+	int ret;
+
+	dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+
+	if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) {
+		pr_err("Reg read timeout occurred\n");
+		return -ETIME;
+	}
+
+	if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) {
+		pr_err("Device isn't ready, abort com init\n");
+		return -ENODEV;
+	}
+
+	admin_queue->q_depth = ENA_ADMIN_QUEUE_DEPTH;
+
+	admin_queue->q_dmadev = ena_dev->dmadev;
+	admin_queue->polling = false;
+	admin_queue->curr_cmd_id = 0;
+
+	atomic_set(&admin_queue->outstanding_cmds, 0);
+
+	if (init_spinlock)
+		spin_lock_init(&admin_queue->q_lock);
+
+	ret = ena_com_init_comp_ctxt(admin_queue);
+	if (ret)
+		goto error;
+
+	ret = ena_com_admin_init_sq(admin_queue);
+	if (ret)
+		goto error;
+
+	ret = ena_com_admin_init_cq(admin_queue);
+	if (ret)
+		goto error;
+
+	admin_queue->sq.db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar +
+		ENA_REGS_AQ_DB_OFF);
+
+	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->sq.dma_addr);
+	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->sq.dma_addr);
+
+	writel(addr_low, ena_dev->reg_bar + ENA_REGS_AQ_BASE_LO_OFF);
+	writel(addr_high, ena_dev->reg_bar + ENA_REGS_AQ_BASE_HI_OFF);
+
+	addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->cq.dma_addr);
+	addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->cq.dma_addr);
+
+	writel(addr_low, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_LO_OFF);
+	writel(addr_high, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_HI_OFF);
+
+	aq_caps = 0;
+	aq_caps |= admin_queue->q_depth & ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK;
+	aq_caps |= (sizeof(struct ena_admin_aq_entry) <<
+			ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) &
+			ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK;
+
+	acq_caps = 0;
+	acq_caps |= admin_queue->q_depth & ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK;
+	acq_caps |= (sizeof(struct ena_admin_acq_entry) <<
+		ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) &
+		ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK;
+
+	writel(aq_caps, ena_dev->reg_bar + ENA_REGS_AQ_CAPS_OFF);
+	writel(acq_caps, ena_dev->reg_bar + ENA_REGS_ACQ_CAPS_OFF);
+	ret = ena_com_admin_init_aenq(ena_dev, aenq_handlers);
+	if (ret)
+		goto error;
+
+	admin_queue->running_state = true;
+
+	return 0;
+error:
+	ena_com_admin_destroy(ena_dev);
+
+	return ret;
+}
+
+int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
+			    struct ena_com_create_io_ctx *ctx)
+{
+	struct ena_com_io_sq *io_sq;
+	struct ena_com_io_cq *io_cq;
+	int ret;
+
+	if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) {
+		pr_err("Qid (%d) is bigger than max num of queues (%d)\n",
+		       ctx->qid, ENA_TOTAL_NUM_QUEUES);
+		return -EINVAL;
+	}
+
+	io_sq = &ena_dev->io_sq_queues[ctx->qid];
+	io_cq = &ena_dev->io_cq_queues[ctx->qid];
+
+	memset(io_sq, 0x0, sizeof(struct ena_com_io_sq));
+	memset(io_cq, 0x0, sizeof(struct ena_com_io_cq));
+
+	/* Init CQ */
+	io_cq->q_depth = ctx->queue_size;
+	io_cq->direction = ctx->direction;
+	io_cq->qid = ctx->qid;
+
+	io_cq->msix_vector = ctx->msix_vector;
+
+	io_sq->q_depth = ctx->queue_size;
+	io_sq->direction = ctx->direction;
+	io_sq->qid = ctx->qid;
+
+	io_sq->mem_queue_type = ctx->mem_queue_type;
+
+	if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX)
+		/* header length is limited to 8 bits */
+		io_sq->tx_max_header_size =
+			min_t(u32, ena_dev->tx_max_header_size, SZ_256);
+
+	ret = ena_com_init_io_sq(ena_dev, ctx, io_sq);
+	if (ret)
+		goto error;
+	ret = ena_com_init_io_cq(ena_dev, ctx, io_cq);
+	if (ret)
+		goto error;
+
+	ret = ena_com_create_io_cq(ena_dev, io_cq);
+	if (ret)
+		goto error;
+
+	ret = ena_com_create_io_sq(ena_dev, io_sq, io_cq->idx);
+	if (ret)
+		goto destroy_io_cq;
+
+	return 0;
+
+destroy_io_cq:
+	ena_com_destroy_io_cq(ena_dev, io_cq);
+error:
+	ena_com_io_queue_free(ena_dev, io_sq, io_cq);
+	return ret;
+}
+
+void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid)
+{
+	struct ena_com_io_sq *io_sq;
+	struct ena_com_io_cq *io_cq;
+
+	if (qid >= ENA_TOTAL_NUM_QUEUES) {
+		pr_err("Qid (%d) is bigger than max num of queues (%d)\n", qid,
+		       ENA_TOTAL_NUM_QUEUES);
+		return;
+	}
+
+	io_sq = &ena_dev->io_sq_queues[qid];
+	io_cq = &ena_dev->io_cq_queues[qid];
+
+	ena_com_destroy_io_sq(ena_dev, io_sq);
+	ena_com_destroy_io_cq(ena_dev, io_cq);
+
+	ena_com_io_queue_free(ena_dev, io_sq, io_cq);
+}
+
+int ena_com_get_link_params(struct ena_com_dev *ena_dev,
+			    struct ena_admin_get_feat_resp *resp)
+{
+	return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG);
+}
+
+int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
+			      struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	struct ena_admin_get_feat_resp get_resp;
+	int rc;
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_DEVICE_ATTRIBUTES);
+	if (rc)
+		return rc;
+
+	memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr,
+	       sizeof(get_resp.u.dev_attr));
+	ena_dev->supported_features = get_resp.u.dev_attr.supported_features;
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_MAX_QUEUES_NUM);
+	if (rc)
+		return rc;
+
+	memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue,
+	       sizeof(get_resp.u.max_queue));
+	ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size;
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_AENQ_CONFIG);
+	if (rc)
+		return rc;
+
+	memcpy(&get_feat_ctx->aenq, &get_resp.u.aenq,
+	       sizeof(get_resp.u.aenq));
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+	if (rc)
+		return rc;
+
+	memcpy(&get_feat_ctx->offload, &get_resp.u.offload,
+	       sizeof(get_resp.u.offload));
+
+	return 0;
+}
+
+void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev)
+{
+	ena_com_handle_admin_completion(&ena_dev->admin_queue);
+}
+
+/* ena_handle_specific_aenq_event:
+ * return the handler that is relevant to the specific event group
+ */
+static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev,
+						     u16 group)
+{
+	struct ena_aenq_handlers *aenq_handlers = dev->aenq.aenq_handlers;
+
+	if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group])
+		return aenq_handlers->handlers[group];
+
+	return aenq_handlers->unimplemented_handler;
+}
+
+/* ena_aenq_intr_handler:
+ * handles the aenq incoming events.
+ * pop events from the queue and apply the specific handler
+ */
+void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
+{
+	struct ena_admin_aenq_entry *aenq_e;
+	struct ena_admin_aenq_common_desc *aenq_common;
+	struct ena_com_aenq *aenq  = &dev->aenq;
+	ena_aenq_handler handler_cb;
+	u16 masked_head, processed = 0;
+	u8 phase;
+
+	masked_head = aenq->head & (aenq->q_depth - 1);
+	phase = aenq->phase;
+	aenq_e = &aenq->entries[masked_head]; /* Get first entry */
+	aenq_common = &aenq_e->aenq_common_desc;
+
+	/* Go over all the events */
+	while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) ==
+	       phase) {
+		pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
+			 aenq_common->group, aenq_common->syndrom,
+			 (u64)aenq_common->timestamp_low +
+				 ((u64)aenq_common->timestamp_high << 32));
+
+		/* Handle specific event*/
+		handler_cb = ena_com_get_specific_aenq_cb(dev,
+							  aenq_common->group);
+		handler_cb(data, aenq_e); /* call the actual event handler*/
+
+		/* Get next event entry */
+		masked_head++;
+		processed++;
+
+		if (unlikely(masked_head == aenq->q_depth)) {
+			masked_head = 0;
+			phase = !phase;
+		}
+		aenq_e = &aenq->entries[masked_head];
+		aenq_common = &aenq_e->aenq_common_desc;
+	}
+
+	aenq->head += processed;
+	aenq->phase = phase;
+
+	/* Don't update aenq doorbell if there weren't any processed events */
+	if (!processed)
+		return;
+
+	/* write the aenq doorbell after all AENQ descriptors were read */
+	mb();
+	writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+}
+
+int ena_com_dev_reset(struct ena_com_dev *ena_dev)
+{
+	u32 stat, timeout, cap, reset_val;
+	int rc;
+
+	stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
+	cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF);
+
+	if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) ||
+		     (cap == ENA_MMIO_READ_TIMEOUT))) {
+		pr_err("Reg read32 timeout occurred\n");
+		return -ETIME;
+	}
+
+	if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) {
+		pr_err("Device isn't ready, can't reset device\n");
+		return -EINVAL;
+	}
+
+	timeout = (cap & ENA_REGS_CAPS_RESET_TIMEOUT_MASK) >>
+			ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT;
+	if (timeout == 0) {
+		pr_err("Invalid timeout value\n");
+		return -EINVAL;
+	}
+
+	/* start reset */
+	reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK;
+	writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
+
+	/* Write again the MMIO read request address */
+	ena_com_mmio_reg_read_request_write_dev_addr(ena_dev);
+
+	rc = wait_for_reset_state(ena_dev, timeout,
+				  ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK);
+	if (rc != 0) {
+		pr_err("Reset indication didn't turn on\n");
+		return rc;
+	}
+
+	/* reset done */
+	writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF);
+	rc = wait_for_reset_state(ena_dev, timeout, 0);
+	if (rc != 0) {
+		pr_err("Reset indication didn't turn off\n");
+		return rc;
+	}
+
+	return 0;
+}
+
+static int ena_get_dev_stats(struct ena_com_dev *ena_dev,
+			     struct ena_com_stats_ctx *ctx,
+			     enum ena_admin_get_stats_type type)
+{
+	struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd;
+	struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp;
+	struct ena_com_admin_queue *admin_queue;
+	int ret;
+
+	admin_queue = &ena_dev->admin_queue;
+
+	get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS;
+	get_cmd->aq_common_descriptor.flags = 0;
+	get_cmd->type = type;
+
+	ret =  ena_com_execute_admin_command(admin_queue,
+					     (struct ena_admin_aq_entry *)get_cmd,
+					     sizeof(*get_cmd),
+					     (struct ena_admin_acq_entry *)get_resp,
+					     sizeof(*get_resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to get stats. error: %d\n", ret);
+
+	return ret;
+}
+
+int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
+				struct ena_admin_basic_stats *stats)
+{
+	struct ena_com_stats_ctx ctx;
+	int ret;
+
+	memset(&ctx, 0x0, sizeof(ctx));
+	ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC);
+	if (likely(ret == 0))
+		memcpy(stats, &ctx.get_resp.basic_stats,
+		       sizeof(ctx.get_resp.basic_stats));
+
+	return ret;
+}
+
+int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu)
+{
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) {
+		pr_info("Feature %d isn't supported\n", ENA_ADMIN_MTU);
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags = 0;
+	cmd.feat_common.feature_id = ENA_ADMIN_MTU;
+	cmd.u.mtu.mtu = mtu;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to set mtu %d. error: %d\n", mtu, ret);
+
+	return ret;
+}
+
+int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
+				 struct ena_admin_feature_offload_desc *offload)
+{
+	int ret;
+	struct ena_admin_get_feat_resp resp;
+
+	ret = ena_com_get_feature(ena_dev, &resp,
+				  ENA_ADMIN_STATELESS_OFFLOAD_CONFIG);
+	if (unlikely(ret)) {
+		pr_err("Failed to get offload capabilities %d\n", ret);
+		return ret;
+	}
+
+	memcpy(offload, &resp.u.offload, sizeof(resp.u.offload));
+
+	return 0;
+}
+
+int ena_com_set_hash_function(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	struct ena_admin_get_feat_resp get_resp;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev,
+						ENA_ADMIN_RSS_HASH_FUNCTION)) {
+		pr_info("Feature %d isn't supported\n",
+			ENA_ADMIN_RSS_HASH_FUNCTION);
+		return -EPERM;
+	}
+
+	/* Validate hash function is supported */
+	ret = ena_com_get_feature(ena_dev, &get_resp,
+				  ENA_ADMIN_RSS_HASH_FUNCTION);
+	if (unlikely(ret))
+		return ret;
+
+	if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) {
+		pr_err("Func hash %d isn't supported by device, abort\n",
+		       rss->hash_func);
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags =
+		ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_FUNCTION;
+	cmd.u.flow_hash_func.init_val = rss->hash_init_val;
+	cmd.u.flow_hash_func.selected_func = 1 << rss->hash_func;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.control_buffer.address,
+				   rss->hash_key_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	cmd.control_buffer.length = sizeof(*rss->hash_key);
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+	if (unlikely(ret)) {
+		pr_err("Failed to set hash function %d. error: %d\n",
+		       rss->hash_func, ret);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+			       enum ena_admin_hash_functions func,
+			       const u8 *key, u16 key_len, u32 init_val)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	struct ena_admin_feature_rss_flow_hash_control *hash_key =
+		rss->hash_key;
+	int rc;
+
+	/* Make sure size is a mult of DWs */
+	if (unlikely(key_len & 0x3))
+		return -EINVAL;
+
+	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+				    ENA_ADMIN_RSS_HASH_FUNCTION,
+				    rss->hash_key_dma_addr,
+				    sizeof(*rss->hash_key));
+	if (unlikely(rc))
+		return rc;
+
+	if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) {
+		pr_err("Flow hash function %d isn't supported\n", func);
+		return -EPERM;
+	}
+
+	switch (func) {
+	case ENA_ADMIN_TOEPLITZ:
+		if (key_len > sizeof(hash_key->key)) {
+			pr_err("key len (%hu) is bigger than the max supported (%zu)\n",
+			       key_len, sizeof(hash_key->key));
+			return -EINVAL;
+		}
+
+		memcpy(hash_key->key, key, key_len);
+		rss->hash_init_val = init_val;
+		hash_key->keys_num = key_len >> 2;
+		break;
+	case ENA_ADMIN_CRC32:
+		rss->hash_init_val = init_val;
+		break;
+	default:
+		pr_err("Invalid hash function (%d)\n", func);
+		return -EINVAL;
+	}
+
+	rc = ena_com_set_hash_function(ena_dev);
+
+	/* Restore the old function */
+	if (unlikely(rc))
+		ena_com_get_hash_function(ena_dev, NULL, NULL);
+
+	return rc;
+}
+
+int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
+			      enum ena_admin_hash_functions *func,
+			      u8 *key)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	struct ena_admin_feature_rss_flow_hash_control *hash_key =
+		rss->hash_key;
+	int rc;
+
+	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+				    ENA_ADMIN_RSS_HASH_FUNCTION,
+				    rss->hash_key_dma_addr,
+				    sizeof(*rss->hash_key));
+	if (unlikely(rc))
+		return rc;
+
+	rss->hash_func = get_resp.u.flow_hash_func.selected_func;
+	if (func)
+		*func = rss->hash_func;
+
+	if (key)
+		memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2);
+
+	return 0;
+}
+
+int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
+			  enum ena_admin_flow_hash_proto proto,
+			  u16 *fields)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	int rc;
+
+	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+				    ENA_ADMIN_RSS_HASH_INPUT,
+				    rss->hash_ctrl_dma_addr,
+				    sizeof(*rss->hash_ctrl));
+	if (unlikely(rc))
+		return rc;
+
+	if (fields)
+		*fields = rss->hash_ctrl->selected_fields[proto].fields;
+
+	return 0;
+}
+
+int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev,
+						ENA_ADMIN_RSS_HASH_INPUT)) {
+		pr_info("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT);
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags =
+		ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_INPUT;
+	cmd.u.flow_hash_input.enabled_input_sort =
+		ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK |
+		ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.control_buffer.address,
+				   rss->hash_ctrl_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+	cmd.control_buffer.length = sizeof(*hash_ctrl);
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+	if (unlikely(ret))
+		pr_err("Failed to set hash input. error: %d\n", ret);
+
+	return ret;
+}
+
+int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_feature_rss_hash_control *hash_ctrl =
+		rss->hash_ctrl;
+	u16 available_fields = 0;
+	int rc, i;
+
+	/* Get the supported hash input */
+	rc = ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+	if (unlikely(rc))
+		return rc;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP4].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+		ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP4].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+		ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP6].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+		ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP6].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA |
+		ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP6].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields =
+		ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA;
+
+	hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields =
+		ENA_ADMIN_RSS_L2_DA | ENA_ADMIN_RSS_L2_SA;
+
+	for (i = 0; i < ENA_ADMIN_RSS_PROTO_NUM; i++) {
+		available_fields = hash_ctrl->selected_fields[i].fields &
+				hash_ctrl->supported_fields[i].fields;
+		if (available_fields != hash_ctrl->selected_fields[i].fields) {
+			pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n",
+			       i, hash_ctrl->supported_fields[i].fields,
+			       hash_ctrl->selected_fields[i].fields);
+			return -EPERM;
+		}
+	}
+
+	rc = ena_com_set_hash_ctrl(ena_dev);
+
+	/* In case of failure, restore the old hash ctrl */
+	if (unlikely(rc))
+		ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+
+	return rc;
+}
+
+int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
+			   enum ena_admin_flow_hash_proto proto,
+			   u16 hash_fields)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl;
+	u16 supported_fields;
+	int rc;
+
+	if (proto >= ENA_ADMIN_RSS_PROTO_NUM) {
+		pr_err("Invalid proto num (%u)\n", proto);
+		return -EINVAL;
+	}
+
+	/* Get the ctrl table */
+	rc = ena_com_get_hash_ctrl(ena_dev, proto, NULL);
+	if (unlikely(rc))
+		return rc;
+
+	/* Make sure all the fields are supported */
+	supported_fields = hash_ctrl->supported_fields[proto].fields;
+	if ((hash_fields & supported_fields) != hash_fields) {
+		pr_err("proto %d doesn't support the required fields %x. supports only: %x\n",
+		       proto, hash_fields, supported_fields);
+	}
+
+	hash_ctrl->selected_fields[proto].fields = hash_fields;
+
+	rc = ena_com_set_hash_ctrl(ena_dev);
+
+	/* In case of failure, restore the old hash ctrl */
+	if (unlikely(rc))
+		ena_com_get_hash_ctrl(ena_dev, 0, NULL);
+
+	return 0;
+}
+
+int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev,
+				      u16 entry_idx, u16 entry_value)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+
+	if (unlikely(entry_idx >= (1 << rss->tbl_log_size)))
+		return -EINVAL;
+
+	if (unlikely((entry_value > ENA_TOTAL_NUM_QUEUES)))
+		return -EINVAL;
+
+	rss->host_rss_ind_tbl[entry_idx] = entry_value;
+
+	return 0;
+}
+
+int ena_com_indirect_table_set(struct ena_com_dev *ena_dev)
+{
+	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(
+		    ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) {
+		pr_info("Feature %d isn't supported\n",
+			ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG);
+		return -EPERM;
+	}
+
+	ret = ena_com_ind_tbl_convert_to_device(ena_dev);
+	if (ret) {
+		pr_err("Failed to convert host indirection table to device table\n");
+		return ret;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.aq_common_descriptor.flags =
+		ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK;
+	cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG;
+	cmd.u.ind_table.size = rss->tbl_log_size;
+	cmd.u.ind_table.inline_index = 0xFFFFFFFF;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.control_buffer.address,
+				   rss->rss_ind_tbl_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	cmd.control_buffer.length = (1ULL << rss->tbl_log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to set indirect table. error: %d\n", ret);
+
+	return ret;
+}
+
+int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl)
+{
+	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_get_feat_resp get_resp;
+	u32 tbl_size;
+	int i, rc;
+
+	tbl_size = (1ULL << rss->tbl_log_size) *
+		sizeof(struct ena_admin_rss_ind_table_entry);
+
+	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
+				    ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG,
+				    rss->rss_ind_tbl_dma_addr,
+				    tbl_size);
+	if (unlikely(rc))
+		return rc;
+
+	if (!ind_tbl)
+		return 0;
+
+	rc = ena_com_ind_tbl_convert_from_device(ena_dev);
+	if (unlikely(rc))
+		return rc;
+
+	for (i = 0; i < (1 << rss->tbl_log_size); i++)
+		ind_tbl[i] = rss->host_rss_ind_tbl[i];
+
+	return 0;
+}
+
+int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size)
+{
+	int rc;
+
+	memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss));
+
+	rc = ena_com_indirect_table_allocate(ena_dev, indr_tbl_log_size);
+	if (unlikely(rc))
+		goto err_indr_tbl;
+
+	rc = ena_com_hash_key_allocate(ena_dev);
+	if (unlikely(rc))
+		goto err_hash_key;
+
+	rc = ena_com_hash_ctrl_init(ena_dev);
+	if (unlikely(rc))
+		goto err_hash_ctrl;
+
+	return 0;
+
+err_hash_ctrl:
+	ena_com_hash_key_destroy(ena_dev);
+err_hash_key:
+	ena_com_indirect_table_destroy(ena_dev);
+err_indr_tbl:
+
+	return rc;
+}
+
+void ena_com_rss_destroy(struct ena_com_dev *ena_dev)
+{
+	ena_com_indirect_table_destroy(ena_dev);
+	ena_com_hash_key_destroy(ena_dev);
+	ena_com_hash_ctrl_destroy(ena_dev);
+
+	memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss));
+}
+
+int ena_com_allocate_host_info(struct ena_com_dev *ena_dev)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+	host_attr->host_info =
+		dma_zalloc_coherent(ena_dev->dmadev, SZ_4K,
+				    &host_attr->host_info_dma_addr, GFP_KERNEL);
+	if (unlikely(!host_attr->host_info))
+		return -ENOMEM;
+
+	return 0;
+}
+
+int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
+				u32 debug_area_size)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+	host_attr->debug_area_virt_addr =
+		dma_zalloc_coherent(ena_dev->dmadev, debug_area_size,
+				    &host_attr->debug_area_dma_addr, GFP_KERNEL);
+	if (unlikely(!host_attr->debug_area_virt_addr)) {
+		host_attr->debug_area_size = 0;
+		return -ENOMEM;
+	}
+
+	host_attr->debug_area_size = debug_area_size;
+
+	return 0;
+}
+
+void ena_com_delete_host_info(struct ena_com_dev *ena_dev)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+	if (host_attr->host_info) {
+		dma_free_coherent(ena_dev->dmadev, SZ_4K, host_attr->host_info,
+				  host_attr->host_info_dma_addr);
+		host_attr->host_info = NULL;
+	}
+}
+
+void ena_com_delete_debug_area(struct ena_com_dev *ena_dev)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+
+	if (host_attr->debug_area_virt_addr) {
+		dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size,
+				  host_attr->debug_area_virt_addr,
+				  host_attr->debug_area_dma_addr);
+		host_attr->debug_area_virt_addr = NULL;
+	}
+}
+
+int ena_com_set_host_attributes(struct ena_com_dev *ena_dev)
+{
+	struct ena_host_attribute *host_attr = &ena_dev->host_attr;
+	struct ena_com_admin_queue *admin_queue;
+	struct ena_admin_set_feat_cmd cmd;
+	struct ena_admin_set_feat_resp resp;
+
+	int ret;
+
+	if (!ena_com_check_supported_feature_id(ena_dev,
+						ENA_ADMIN_HOST_ATTR_CONFIG)) {
+		pr_warn("Set host attribute isn't supported\n");
+		return -EPERM;
+	}
+
+	memset(&cmd, 0x0, sizeof(cmd));
+	admin_queue = &ena_dev->admin_queue;
+
+	cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE;
+	cmd.feat_common.feature_id = ENA_ADMIN_HOST_ATTR_CONFIG;
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.u.host_attr.debug_ba,
+				   host_attr->debug_area_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	ret = ena_com_mem_addr_set(ena_dev,
+				   &cmd.u.host_attr.os_info_ba,
+				   host_attr->host_info_dma_addr);
+	if (unlikely(ret)) {
+		pr_err("memory address set failed\n");
+		return ret;
+	}
+
+	cmd.u.host_attr.debug_area_size = host_attr->debug_area_size;
+
+	ret = ena_com_execute_admin_command(admin_queue,
+					    (struct ena_admin_aq_entry *)&cmd,
+					    sizeof(cmd),
+					    (struct ena_admin_acq_entry *)&resp,
+					    sizeof(resp));
+
+	if (unlikely(ret))
+		pr_err("Failed to set host attributes: %d\n", ret);
+
+	return ret;
+}
+
+/* Interrupt moderation */
+bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev)
+{
+	return ena_com_check_supported_feature_id(ena_dev,
+						  ENA_ADMIN_INTERRUPT_MODERATION);
+}
+
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+						      u32 tx_coalesce_usecs)
+{
+	if (!ena_dev->intr_delay_resolution) {
+		pr_err("Illegal interrupt delay granularity value\n");
+		return -EFAULT;
+	}
+
+	ena_dev->intr_moder_tx_interval = tx_coalesce_usecs /
+		ena_dev->intr_delay_resolution;
+
+	return 0;
+}
+
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+						      u32 rx_coalesce_usecs)
+{
+	if (!ena_dev->intr_delay_resolution) {
+		pr_err("Illegal interrupt delay granularity value\n");
+		return -EFAULT;
+	}
+
+	/* We use LOWEST entry of moderation table for storing
+	 * nonadaptive interrupt coalescing values
+	 */
+	ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
+		rx_coalesce_usecs / ena_dev->intr_delay_resolution;
+
+	return 0;
+}
+
+void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev)
+{
+	if (ena_dev->intr_moder_tbl)
+		devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl);
+	ena_dev->intr_moder_tbl = NULL;
+}
+
+int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev)
+{
+	struct ena_admin_get_feat_resp get_resp;
+	u16 delay_resolution;
+	int rc;
+
+	rc = ena_com_get_feature(ena_dev, &get_resp,
+				 ENA_ADMIN_INTERRUPT_MODERATION);
+
+	if (rc) {
+		if (rc == -EPERM) {
+			pr_info("Feature %d isn't supported\n",
+				ENA_ADMIN_INTERRUPT_MODERATION);
+			rc = 0;
+		} else {
+			pr_err("Failed to get interrupt moderation admin cmd. rc: %d\n",
+			       rc);
+		}
+
+		/* no moderation supported, disable adaptive support */
+		ena_com_disable_adaptive_moderation(ena_dev);
+		return rc;
+	}
+
+	rc = ena_com_init_interrupt_moderation_table(ena_dev);
+	if (rc)
+		goto err;
+
+	/* if moderation is supported by device we set adaptive moderation */
+	delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution;
+	ena_com_update_intr_delay_resolution(ena_dev, delay_resolution);
+	ena_com_enable_adaptive_moderation(ena_dev);
+
+	return 0;
+err:
+	ena_com_destroy_interrupt_moderation(ena_dev);
+	return rc;
+}
+
+void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+	if (!intr_moder_tbl)
+		return;
+
+	intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval =
+		ENA_INTR_LOWEST_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval =
+		ENA_INTR_LOWEST_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval =
+		ENA_INTR_LOWEST_BYTES;
+
+	intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval =
+		ENA_INTR_LOW_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval =
+		ENA_INTR_LOW_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval =
+		ENA_INTR_LOW_BYTES;
+
+	intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval =
+		ENA_INTR_MID_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval =
+		ENA_INTR_MID_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval =
+		ENA_INTR_MID_BYTES;
+
+	intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval =
+		ENA_INTR_HIGH_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval =
+		ENA_INTR_HIGH_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval =
+		ENA_INTR_HIGH_BYTES;
+
+	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval =
+		ENA_INTR_HIGHEST_USECS;
+	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval =
+		ENA_INTR_HIGHEST_PKTS;
+	intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval =
+		ENA_INTR_HIGHEST_BYTES;
+}
+
+unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev)
+{
+	return ena_dev->intr_moder_tx_interval;
+}
+
+unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+	if (intr_moder_tbl)
+		return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval;
+
+	return 0;
+}
+
+void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
+					enum ena_intr_moder_level level,
+					struct ena_intr_moder_entry *entry)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+	if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
+		return;
+
+	intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval;
+	if (ena_dev->intr_delay_resolution)
+		intr_moder_tbl[level].intr_moder_interval /=
+			ena_dev->intr_delay_resolution;
+	intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval;
+
+	/* use hardcoded value until ethtool supports bytecount parameter */
+	if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED)
+		intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval;
+}
+
+void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
+				       enum ena_intr_moder_level level,
+				       struct ena_intr_moder_entry *entry)
+{
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+
+	if (level >= ENA_INTR_MAX_NUM_OF_LEVELS)
+		return;
+
+	entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval;
+	if (ena_dev->intr_delay_resolution)
+		entry->intr_moder_interval *= ena_dev->intr_delay_resolution;
+	entry->pkts_per_interval =
+	intr_moder_tbl[level].pkts_per_interval;
+	entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval;
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
new file mode 100644
index 0000000..509d7b8
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -0,0 +1,1038 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_COM
+#define ENA_COM
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/sizes.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/wait.h>
+
+#include "ena_common_defs.h"
+#include "ena_admin_defs.h"
+#include "ena_eth_io_defs.h"
+#include "ena_regs_defs.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define ENA_MAX_NUM_IO_QUEUES		128U
+/* We need to queues for each IO (on for Tx and one for Rx) */
+#define ENA_TOTAL_NUM_QUEUES		(2 * (ENA_MAX_NUM_IO_QUEUES))
+
+#define ENA_MAX_HANDLERS 256
+
+#define ENA_MAX_PHYS_ADDR_SIZE_BITS 48
+
+/* Unit in usec */
+#define ENA_REG_READ_TIMEOUT 200000
+
+#define ADMIN_SQ_SIZE(depth)	((depth) * sizeof(struct ena_admin_aq_entry))
+#define ADMIN_CQ_SIZE(depth)	((depth) * sizeof(struct ena_admin_acq_entry))
+#define ADMIN_AENQ_SIZE(depth)	((depth) * sizeof(struct ena_admin_aenq_entry))
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* ENA adaptive interrupt moderation settings */
+
+#define ENA_INTR_LOWEST_USECS           (0)
+#define ENA_INTR_LOWEST_PKTS            (3)
+#define ENA_INTR_LOWEST_BYTES           (2 * 1524)
+
+#define ENA_INTR_LOW_USECS              (32)
+#define ENA_INTR_LOW_PKTS               (12)
+#define ENA_INTR_LOW_BYTES              (16 * 1024)
+
+#define ENA_INTR_MID_USECS              (80)
+#define ENA_INTR_MID_PKTS               (48)
+#define ENA_INTR_MID_BYTES              (64 * 1024)
+
+#define ENA_INTR_HIGH_USECS             (128)
+#define ENA_INTR_HIGH_PKTS              (96)
+#define ENA_INTR_HIGH_BYTES             (128 * 1024)
+
+#define ENA_INTR_HIGHEST_USECS          (192)
+#define ENA_INTR_HIGHEST_PKTS           (128)
+#define ENA_INTR_HIGHEST_BYTES          (192 * 1024)
+
+#define ENA_INTR_INITIAL_TX_INTERVAL_USECS		196
+#define ENA_INTR_INITIAL_RX_INTERVAL_USECS		4
+#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT			6
+#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT			4
+#define ENA_INTR_MODER_LEVEL_STRIDE			2
+#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED		0xFFFFFF
+
+enum ena_intr_moder_level {
+	ENA_INTR_MODER_LOWEST = 0,
+	ENA_INTR_MODER_LOW,
+	ENA_INTR_MODER_MID,
+	ENA_INTR_MODER_HIGH,
+	ENA_INTR_MODER_HIGHEST,
+	ENA_INTR_MAX_NUM_OF_LEVELS,
+};
+
+struct ena_intr_moder_entry {
+	unsigned int intr_moder_interval;
+	unsigned int pkts_per_interval;
+	unsigned int bytes_per_interval;
+};
+
+enum queue_direction {
+	ENA_COM_IO_QUEUE_DIRECTION_TX,
+	ENA_COM_IO_QUEUE_DIRECTION_RX
+};
+
+struct ena_com_buf {
+	dma_addr_t paddr; /**< Buffer physical address */
+	u16 len; /**< Buffer length in bytes */
+};
+
+struct ena_com_rx_buf_info {
+	u16 len;
+	u16 req_id;
+};
+
+struct ena_com_io_desc_addr {
+	u8 __iomem *pbuf_dev_addr; /* LLQ address */
+	u8 *virt_addr;
+	dma_addr_t phys_addr;
+};
+
+struct ena_com_tx_meta {
+	u16 mss;
+	u16 l3_hdr_len;
+	u16 l3_hdr_offset;
+	u16 l4_hdr_len; /* In words */
+};
+
+struct ena_com_io_cq {
+	struct ena_com_io_desc_addr cdesc_addr;
+
+	/* Interrupt unmask register */
+	u32 __iomem *unmask_reg;
+
+	/* The completion queue head doorbell register */
+	u32 __iomem *cq_head_db_reg;
+
+	/* numa configuration register (for TPH) */
+	u32 __iomem *numa_node_cfg_reg;
+
+	/* The value to write to the above register to unmask
+	 * the interrupt of this queue
+	 */
+	u32 msix_vector;
+
+	enum queue_direction direction;
+
+	/* holds the number of cdesc of the current packet */
+	u16 cur_rx_pkt_cdesc_count;
+	/* save the firt cdesc idx of the current packet */
+	u16 cur_rx_pkt_cdesc_start_idx;
+
+	u16 q_depth;
+	/* Caller qid */
+	u16 qid;
+
+	/* Device queue index */
+	u16 idx;
+	u16 head;
+	u16 last_head_update;
+	u8 phase;
+	u8 cdesc_entry_size_in_bytes;
+
+} ____cacheline_aligned;
+
+struct ena_com_io_sq {
+	struct ena_com_io_desc_addr desc_addr;
+
+	u32 __iomem *db_addr;
+	u8 __iomem *header_addr;
+
+	enum queue_direction direction;
+	enum ena_admin_placement_policy_type mem_queue_type;
+
+	u32 msix_vector;
+	struct ena_com_tx_meta cached_tx_meta;
+
+	u16 q_depth;
+	u16 qid;
+
+	u16 idx;
+	u16 tail;
+	u16 next_to_comp;
+	u32 tx_max_header_size;
+	u8 phase;
+	u8 desc_entry_size;
+	u8 dma_addr_bits;
+} ____cacheline_aligned;
+
+struct ena_com_admin_cq {
+	struct ena_admin_acq_entry *entries;
+	dma_addr_t dma_addr;
+
+	u16 head;
+	u8 phase;
+};
+
+struct ena_com_admin_sq {
+	struct ena_admin_aq_entry *entries;
+	dma_addr_t dma_addr;
+
+	u32 __iomem *db_addr;
+
+	u16 head;
+	u16 tail;
+	u8 phase;
+
+};
+
+struct ena_com_stats_admin {
+	u32 aborted_cmd;
+	u32 submitted_cmd;
+	u32 completed_cmd;
+	u32 out_of_space;
+	u32 no_completion;
+};
+
+struct ena_com_admin_queue {
+	void *q_dmadev;
+	spinlock_t q_lock; /* spinlock for the admin queue */
+	struct ena_comp_ctx *comp_ctx;
+	u16 q_depth;
+	struct ena_com_admin_cq cq;
+	struct ena_com_admin_sq sq;
+
+	/* Indicate if the admin queue should poll for completion */
+	bool polling;
+
+	u16 curr_cmd_id;
+
+	/* Indicate that the ena was initialized and can
+	 * process new admin commands
+	 */
+	bool running_state;
+
+	/* Count the number of outstanding admin commands */
+	atomic_t outstanding_cmds;
+
+	struct ena_com_stats_admin stats;
+};
+
+struct ena_aenq_handlers;
+
+struct ena_com_aenq {
+	u16 head;
+	u8 phase;
+	struct ena_admin_aenq_entry *entries;
+	dma_addr_t dma_addr;
+	u16 q_depth;
+	struct ena_aenq_handlers *aenq_handlers;
+};
+
+struct ena_com_mmio_read {
+	struct ena_admin_ena_mmio_req_read_less_resp *read_resp;
+	dma_addr_t read_resp_dma_addr;
+	u16 seq_num;
+	bool readless_supported;
+	/* spin lock to ensure a single outstanding read */
+	spinlock_t lock;
+};
+
+struct ena_rss {
+	/* Indirect table */
+	u16 *host_rss_ind_tbl;
+	struct ena_admin_rss_ind_table_entry *rss_ind_tbl;
+	dma_addr_t rss_ind_tbl_dma_addr;
+	u16 tbl_log_size;
+
+	/* Hash key */
+	enum ena_admin_hash_functions hash_func;
+	struct ena_admin_feature_rss_flow_hash_control *hash_key;
+	dma_addr_t hash_key_dma_addr;
+	u32 hash_init_val;
+
+	/* Flow Control */
+	struct ena_admin_feature_rss_hash_control *hash_ctrl;
+	dma_addr_t hash_ctrl_dma_addr;
+
+};
+
+struct ena_host_attribute {
+	/* Debug area */
+	u8 *debug_area_virt_addr;
+	dma_addr_t debug_area_dma_addr;
+	u32 debug_area_size;
+
+	/* Host information */
+	struct ena_admin_host_info *host_info;
+	dma_addr_t host_info_dma_addr;
+};
+
+/* Each ena_dev is a PCI function. */
+struct ena_com_dev {
+	struct ena_com_admin_queue admin_queue;
+	struct ena_com_aenq aenq;
+	struct ena_com_io_cq io_cq_queues[ENA_TOTAL_NUM_QUEUES];
+	struct ena_com_io_sq io_sq_queues[ENA_TOTAL_NUM_QUEUES];
+	u8 __iomem *reg_bar;
+	void __iomem *mem_bar;
+	void *dmadev;
+
+	enum ena_admin_placement_policy_type tx_mem_queue_type;
+	u32 tx_max_header_size;
+	u16 stats_func; /* Selected function for extended statistic dump */
+	u16 stats_queue; /* Selected queue for extended statistic dump */
+
+	struct ena_com_mmio_read mmio_read;
+
+	struct ena_rss rss;
+	u32 supported_features;
+	u32 dma_addr_bits;
+
+	struct ena_host_attribute host_attr;
+	bool adaptive_coalescing;
+	u16 intr_delay_resolution;
+	u32 intr_moder_tx_interval;
+	struct ena_intr_moder_entry *intr_moder_tbl;
+};
+
+struct ena_com_dev_get_features_ctx {
+	struct ena_admin_queue_feature_desc max_queues;
+	struct ena_admin_device_attr_feature_desc dev_attr;
+	struct ena_admin_feature_aenq_desc aenq;
+	struct ena_admin_feature_offload_desc offload;
+};
+
+struct ena_com_create_io_ctx {
+	enum ena_admin_placement_policy_type mem_queue_type;
+	enum queue_direction direction;
+	int numa_node;
+	u32 msix_vector;
+	u16 queue_size;
+	u16 qid;
+};
+
+typedef void (*ena_aenq_handler)(void *data,
+	struct ena_admin_aenq_entry *aenq_e);
+
+/* Holds aenq handlers. Indexed by AENQ event group */
+struct ena_aenq_handlers {
+	ena_aenq_handler handlers[ENA_MAX_HANDLERS];
+	ena_aenq_handler unimplemented_handler;
+};
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* ena_com_mmio_reg_read_request_init - Init the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ *
+ * Initialize the register read mechanism.
+ *
+ * @note: This method must be the first stage in the initialization sequence.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ * @readless_supported: readless mode (enable/disable)
+ */
+void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev,
+				bool readless_supported);
+
+/* ena_com_mmio_reg_read_request_write_dev_addr - Write the mmio reg read return
+ * value physical address.
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev);
+
+/* ena_com_mmio_reg_read_request_destroy - Destroy the mmio reg read mechanism
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_admin_init - Init the admin and the async queues
+ * @ena_dev: ENA communication layer struct
+ * @aenq_handlers: Those handlers to be called upon event.
+ * @init_spinlock: Indicate if this method should init the admin spinlock or
+ * the spinlock was init before (for example, in a case of FLR).
+ *
+ * Initialize the admin submission and completion queues.
+ * Initialize the asynchronous events notification queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_admin_init(struct ena_com_dev *ena_dev,
+		       struct ena_aenq_handlers *aenq_handlers,
+		       bool init_spinlock);
+
+/* ena_com_admin_destroy - Destroy the admin and the async events queues.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @note: Before calling this method, the caller must validate that the device
+ * won't send any additional admin completions/aenq.
+ * To achieve that, a FLR is recommended.
+ */
+void ena_com_admin_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_dev_reset - Perform device FLR to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_dev_reset(struct ena_com_dev *ena_dev);
+
+/* ena_com_create_io_queue - Create io queue.
+ * @ena_dev: ENA communication layer struct
+ * @ctx - create context structure
+ *
+ * Create the submission and the completion queues.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_create_io_queue(struct ena_com_dev *ena_dev,
+			    struct ena_com_create_io_ctx *ctx);
+
+/* ena_com_destroy_io_queue - Destroy IO queue with the queue id - qid.
+ * @ena_dev: ENA communication layer struct
+ * @qid - the caller virtual queue id.
+ */
+void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid);
+
+/* ena_com_get_io_handlers - Return the io queue handlers
+ * @ena_dev: ENA communication layer struct
+ * @qid - the caller virtual queue id.
+ * @io_sq - IO submission queue handler
+ * @io_cq - IO completion queue handler.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid,
+			    struct ena_com_io_sq **io_sq,
+			    struct ena_com_io_cq **io_cq);
+
+/* ena_com_admin_aenq_enable - ENAble asynchronous event notifications
+ * @ena_dev: ENA communication layer struct
+ *
+ * After this method, aenq event can be received via AENQ.
+ */
+void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_running_state - Set the state of the admin queue
+ * @ena_dev: ENA communication layer struct
+ *
+ * Change the state of the admin queue (enable/disable)
+ */
+void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state);
+
+/* ena_com_get_admin_running_state - Get the admin queue state
+ * @ena_dev: ENA communication layer struct
+ *
+ * Retrieve the state of the admin queue (enable/disable)
+ *
+ * @return - current polling mode (enable/disable)
+ */
+bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_admin_polling_mode - Set the admin completion queue polling mode
+ * @ena_dev: ENA communication layer struct
+ * @polling: ENAble/Disable polling mode
+ *
+ * Set the admin completion mode.
+ */
+void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling);
+
+/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode
+ * @ena_dev: ENA communication layer struct
+ *
+ * Get the admin completion mode.
+ * If polling mode is on, ena_com_execute_admin_command will perform a
+ * polling on the admin completion queue for the commands completion,
+ * otherwise it will wait on wait event.
+ *
+ * @return state
+ */
+bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev);
+
+/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method go over the admin completion queue and wake up all the pending
+ * threads that wait on the commands wait event.
+ *
+ * @note: Should be called after MSI-X interrupt.
+ */
+void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev);
+
+/* ena_com_aenq_intr_handler - AENQ interrupt handler
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method go over the async event notification queue and call the proper
+ * aenq handler.
+ */
+void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data);
+
+/* ena_com_abort_admin_commands - Abort all the outstanding admin commands.
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method aborts all the outstanding admin commands.
+ * The caller should then call ena_com_wait_for_abort_completion to make sure
+ * all the commands were completed.
+ */
+void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev);
+
+/* ena_com_wait_for_abort_completion - Wait for admin commands abort.
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method wait until all the outstanding admin commands will be completed.
+ */
+void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev);
+
+/* ena_com_validate_version - Validate the device parameters
+ * @ena_dev: ENA communication layer struct
+ *
+ * This method validate the device parameters are the same as the saved
+ * parameters in ena_dev.
+ * This method is useful after device reset, to validate the device mac address
+ * and the device offloads are the same as before the reset.
+ *
+ * @return - 0 on success negative value otherwise.
+ */
+int ena_com_validate_version(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_link_params - Retrieve physical link parameters.
+ * @ena_dev: ENA communication layer struct
+ * @resp: Link parameters
+ *
+ * Retrieve the physical link parameters,
+ * like speed, auto-negotiation and full duplex support.
+ *
+ * @return - 0 on Success negative value otherwise.
+ */
+int ena_com_get_link_params(struct ena_com_dev *ena_dev,
+			    struct ena_admin_get_feat_resp *resp);
+
+/* ena_com_get_dma_width - Retrieve physical dma address width the device
+ * supports.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Retrieve the maximum physical address bits the device can handle.
+ *
+ * @return: > 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dma_width(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_aenq_config - Set aenq groups configurations
+ * @ena_dev: ENA communication layer struct
+ * @groups flag: bit fields flags of enum ena_admin_aenq_group.
+ *
+ * Configure which aenq event group the driver would like to receive.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag);
+
+/* ena_com_get_dev_attr_feat - Get device features
+ * @ena_dev: ENA communication layer struct
+ * @get_feat_ctx: returned context that contain the get features.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev,
+			      struct ena_com_dev_get_features_ctx *get_feat_ctx);
+
+/* ena_com_get_dev_basic_stats - Get device basic statistics
+ * @ena_dev: ENA communication layer struct
+ * @stats: stats return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev,
+				struct ena_admin_basic_stats *stats);
+
+/* ena_com_set_dev_mtu - Configure the device mtu.
+ * @ena_dev: ENA communication layer struct
+ * @mtu: mtu value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu);
+
+/* ena_com_get_offload_settings - Retrieve the device offloads capabilities
+ * @ena_dev: ENA communication layer struct
+ * @offlad: offload return value
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_offload_settings(struct ena_com_dev *ena_dev,
+				 struct ena_admin_feature_offload_desc *offload);
+
+/* ena_com_rss_init - Init RSS
+ * @ena_dev: ENA communication layer struct
+ * @log_size: indirection log size
+ *
+ * Allocate RSS/RFS resources.
+ * The caller then can configure rss using ena_com_set_hash_function,
+ * ena_com_set_hash_ctrl and ena_com_indirect_table_set.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size);
+
+/* ena_com_rss_destroy - Destroy rss
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free all the RSS/RFS resources.
+ */
+void ena_com_rss_destroy(struct ena_com_dev *ena_dev);
+
+/* ena_com_fill_hash_function - Fill RSS hash function
+ * @ena_dev: ENA communication layer struct
+ * @func: The hash function (Toeplitz or crc)
+ * @key: Hash key (for toeplitz hash)
+ * @key_len: key length (max length 10 DW)
+ * @init_val: initial value for the hash function
+ *
+ * Fill the ena_dev resources with the desire hash function, hash key, key_len
+ * and key initial value (if needed by the hash function).
+ * To flush the key into the device the caller should call
+ * ena_com_set_hash_function.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
+			       enum ena_admin_hash_functions func,
+			       const u8 *key, u16 key_len, u32 init_val);
+
+/* ena_com_set_hash_function - Flush the hash function and it dependencies to
+ * the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the hash function and it dependencies (key, key length and
+ * initial value) if needed.
+ *
+ * @note: Prior to this method the caller should call ena_com_fill_hash_function
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_hash_function(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_hash_function - Retrieve the hash function and the hash key
+ * from the device.
+ * @ena_dev: ENA communication layer struct
+ * @func: hash function
+ * @key: hash key
+ *
+ * Retrieve the hash function and the hash key from the device.
+ *
+ * @note: If the caller called ena_com_fill_hash_function but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
+			      enum ena_admin_hash_functions *func,
+			      u8 *key);
+
+/* ena_com_fill_hash_ctrl - Fill RSS hash control
+ * @ena_dev: ENA communication layer struct.
+ * @proto: The protocol to configure.
+ * @hash_fields: bit mask of ena_admin_flow_hash_fields
+ *
+ * Fill the ena_dev resources with the desire hash control (the ethernet
+ * fields that take part of the hash) for a specific protocol.
+ * To flush the hash control to the device, the caller should call
+ * ena_com_set_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev,
+			   enum ena_admin_flow_hash_proto proto,
+			   u16 hash_fields);
+
+/* ena_com_set_hash_ctrl - Flush the hash control resources to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the hash control (the ethernet fields that take part of the hash)
+ *
+ * @note: Prior to this method the caller should call ena_com_fill_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_hash_ctrl - Retrieve the hash control from the device.
+ * @ena_dev: ENA communication layer struct
+ * @proto: The protocol to retrieve.
+ * @fields: bit mask of ena_admin_flow_hash_fields.
+ *
+ * Retrieve the hash control from the device.
+ *
+ * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev,
+			  enum ena_admin_flow_hash_proto proto,
+			  u16 *fields);
+
+/* ena_com_set_default_hash_ctrl - Set the hash control to a default
+ * configuration.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Fill the ena_dev resources with the default hash control configuration.
+ * To flush the hash control to the device, the caller should call
+ * ena_com_set_hash_ctrl.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev);
+
+/* ena_com_indirect_table_fill_entry - Fill a single entry in the RSS
+ * indirection table
+ * @ena_dev: ENA communication layer struct.
+ * @entry_idx - indirection table entry.
+ * @entry_value - redirection value
+ *
+ * Fill a single entry of the RSS indirection table in the ena_dev resources.
+ * To flush the indirection table to the device, the called should call
+ * ena_com_indirect_table_set.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev,
+				      u16 entry_idx, u16 entry_value);
+
+/* ena_com_indirect_table_set - Flush the indirection table to the device.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Flush the indirection hash control to the device.
+ * Prior to this method the caller should call ena_com_indirect_table_fill_entry
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_set(struct ena_com_dev *ena_dev);
+
+/* ena_com_indirect_table_get - Retrieve the indirection table from the device.
+ * @ena_dev: ENA communication layer struct
+ * @ind_tbl: indirection table
+ *
+ * Retrieve the RSS indirection table from the device.
+ *
+ * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl);
+
+/* ena_com_allocate_host_info - Allocate host info resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_host_info(struct ena_com_dev *ena_dev);
+
+/* ena_com_allocate_debug_area - Allocate debug area.
+ * @ena_dev: ENA communication layer struct
+ * @debug_area_size - debug area size.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
+				u32 debug_area_size);
+
+/* ena_com_delete_debug_area - Free the debug area resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocate debug area.
+ */
+void ena_com_delete_debug_area(struct ena_com_dev *ena_dev);
+
+/* ena_com_delete_host_info - Free the host info resources.
+ * @ena_dev: ENA communication layer struct
+ *
+ * Free the allocate host info.
+ */
+void ena_com_delete_host_info(struct ena_com_dev *ena_dev);
+
+/* ena_com_set_host_attributes - Update the device with the host
+ * attributes (debug area and host info) base address.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_set_host_attributes(struct ena_com_dev *ena_dev);
+
+/* ena_com_create_io_cq - Create io completion queue.
+ * @ena_dev: ENA communication layer struct
+ * @io_cq - io completion queue handler
+
+ * Create IO completion queue.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_create_io_cq(struct ena_com_dev *ena_dev,
+			 struct ena_com_io_cq *io_cq);
+
+/* ena_com_destroy_io_cq - Destroy io completion queue.
+ * @ena_dev: ENA communication layer struct
+ * @io_cq - io completion queue handler
+
+ * Destroy IO completion queue.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
+			  struct ena_com_io_cq *io_cq);
+
+/* ena_com_execute_admin_command - Execute admin command
+ * @admin_queue: admin queue.
+ * @cmd: the admin command to execute.
+ * @cmd_size: the command size.
+ * @cmd_completion: command completion return value.
+ * @cmd_comp_size: command completion size.
+
+ * Submit an admin command and then wait until the device will return a
+ * completion.
+ * The completion will be copyed into cmd_comp.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
+				  struct ena_admin_aq_entry *cmd,
+				  size_t cmd_size,
+				  struct ena_admin_acq_entry *cmd_comp,
+				  size_t cmd_comp_size);
+
+/* ena_com_init_interrupt_moderation - Init interrupt moderation
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev);
+
+/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev);
+
+/* ena_com_interrupt_moderation_supported - Return if interrupt moderation
+ * capability is supported by the device.
+ *
+ * @return - supported or not.
+ */
+bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev);
+
+/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt
+ * moderation table back to the default parameters.
+ * @ena_dev: ENA communication layer struct
+ */
+void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev);
+
+/* ena_com_update_nonadaptive_moderation_interval_tx - Update the
+ * non-adaptive interval in Tx direction.
+ * @ena_dev: ENA communication layer struct
+ * @tx_coalesce_usecs: Interval in usec.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev,
+						      u32 tx_coalesce_usecs);
+
+/* ena_com_update_nonadaptive_moderation_interval_rx - Update the
+ * non-adaptive interval in Rx direction.
+ * @ena_dev: ENA communication layer struct
+ * @rx_coalesce_usecs: Interval in usec.
+ *
+ * @return - 0 on success, negative value on failure.
+ */
+int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev,
+						      u32 rx_coalesce_usecs);
+
+/* ena_com_get_nonadaptive_moderation_interval_tx - Retrieve the
+ * non-adaptive interval in Tx direction.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - interval in usec
+ */
+unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev);
+
+/* ena_com_get_nonadaptive_moderation_interval_rx - Retrieve the
+ * non-adaptive interval in Rx direction.
+ * @ena_dev: ENA communication layer struct
+ *
+ * @return - interval in usec
+ */
+unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev);
+
+/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt
+ * moderation table.
+ * @ena_dev: ENA communication layer struct
+ * @level: Interrupt moderation table level
+ * @entry: Entry value
+ *
+ * Update a single entry in the interrupt moderation table.
+ */
+void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev,
+					enum ena_intr_moder_level level,
+					struct ena_intr_moder_entry *entry);
+
+/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry.
+ * @ena_dev: ENA communication layer struct
+ * @level: Interrupt moderation table level
+ * @entry: Entry to fill.
+ *
+ * Initialize the entry according to the adaptive interrupt moderation table.
+ */
+void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev,
+				       enum ena_intr_moder_level level,
+				       struct ena_intr_moder_entry *entry);
+
+static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev)
+{
+	return ena_dev->adaptive_coalescing;
+}
+
+static inline void ena_com_enable_adaptive_moderation(struct ena_com_dev *ena_dev)
+{
+	ena_dev->adaptive_coalescing = true;
+}
+
+static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_dev)
+{
+	ena_dev->adaptive_coalescing = false;
+}
+
+/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay
+ * @ena_dev: ENA communication layer struct
+ * @pkts: Number of packets since the last update
+ * @bytes: Number of bytes received since the last update.
+ * @smoothed_interval: Returned interval
+ * @moder_tbl_idx: Current table level as input update new level as return
+ * value.
+ */
+static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev,
+						     unsigned int pkts,
+						     unsigned int bytes,
+						     unsigned int *smoothed_interval,
+						     unsigned int *moder_tbl_idx)
+{
+	enum ena_intr_moder_level curr_moder_idx, new_moder_idx;
+	struct ena_intr_moder_entry *curr_moder_entry;
+	struct ena_intr_moder_entry *pred_moder_entry;
+	struct ena_intr_moder_entry *new_moder_entry;
+	struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl;
+	unsigned int interval;
+
+	/* We apply adaptive moderation on Rx path only.
+	 * Tx uses static interrupt moderation.
+	 */
+	if (!pkts || !bytes)
+		/* Tx interrupt, or spurious interrupt,
+		 * in both cases we just use same delay values
+		 */
+		return;
+
+	curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx);
+	if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) {
+		pr_err("Wrong moderation index %u\n", curr_moder_idx);
+		return;
+	}
+
+	curr_moder_entry = &intr_moder_tbl[curr_moder_idx];
+	new_moder_idx = curr_moder_idx;
+
+	if (curr_moder_idx == ENA_INTR_MODER_LOWEST) {
+		if ((pkts > curr_moder_entry->pkts_per_interval) ||
+		    (bytes > curr_moder_entry->bytes_per_interval))
+			new_moder_idx =
+				(enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
+	} else {
+		pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE];
+
+		if ((pkts <= pred_moder_entry->pkts_per_interval) ||
+		    (bytes <= pred_moder_entry->bytes_per_interval))
+			new_moder_idx =
+				(enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE);
+		else if ((pkts > curr_moder_entry->pkts_per_interval) ||
+			 (bytes > curr_moder_entry->bytes_per_interval)) {
+			if (curr_moder_idx != ENA_INTR_MODER_HIGHEST)
+				new_moder_idx =
+					(enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE);
+		}
+	}
+	new_moder_entry = &intr_moder_tbl[new_moder_idx];
+
+	interval = new_moder_entry->intr_moder_interval;
+	*smoothed_interval = (
+		(interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT +
+		ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) /
+		10;
+
+	*moder_tbl_idx = new_moder_idx;
+}
+
+/* ena_com_update_intr_reg - Prepare interrupt register
+ * @intr_reg: interrupt register to update.
+ * @rx_delay_interval: Rx interval in usecs
+ * @tx_delay_interval: Tx interval in usecs
+ * @unmask: unask enable/disable
+ *
+ * Prepare interrupt update register with the supplied parameters.
+ */
+static inline void ena_com_update_intr_reg(struct ena_eth_io_intr_reg *intr_reg,
+					   u32 rx_delay_interval,
+					   u32 tx_delay_interval,
+					   bool unmask)
+{
+	intr_reg->intr_control = 0;
+	intr_reg->intr_control |= rx_delay_interval &
+		ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK;
+
+	intr_reg->intr_control |=
+		(tx_delay_interval << ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT)
+		& ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK;
+
+	if (unmask)
+		intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK;
+}
+
+#endif /* !(ENA_COM) */
diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
new file mode 100644
index 0000000..bb8d736
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_COMMON_H_
+#define _ENA_COMMON_H_
+
+#define ENA_COMMON_SPEC_VERSION_MAJOR	0 /*  */
+#define ENA_COMMON_SPEC_VERSION_MINOR	10 /*  */
+
+/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */
+struct ena_common_mem_addr {
+	u32 mem_addr_low;
+
+	u16 mem_addr_high;
+
+	/* MBZ */
+	u16 reserved16;
+};
+
+#endif /*_ENA_COMMON_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
new file mode 100644
index 0000000..539c536
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ena_eth_com.h"
+
+static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc(
+	struct ena_com_io_cq *io_cq)
+{
+	struct ena_eth_io_rx_cdesc_base *cdesc;
+	u16 expected_phase, head_masked;
+	u16 desc_phase;
+
+	head_masked = io_cq->head & (io_cq->q_depth - 1);
+	expected_phase = io_cq->phase;
+
+	cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr
+			+ (head_masked * io_cq->cdesc_entry_size_in_bytes));
+
+	desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >>
+			ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT;
+
+	if (desc_phase != expected_phase)
+		return NULL;
+
+	return cdesc;
+}
+
+static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq)
+{
+	io_cq->head++;
+
+	/* Switch phase bit in case of wrap around */
+	if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0))
+		io_cq->phase ^= 1;
+}
+
+static inline void *get_sq_desc(struct ena_com_io_sq *io_sq)
+{
+	u16 tail_masked;
+	u32 offset;
+
+	tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+
+	offset = tail_masked * io_sq->desc_entry_size;
+
+	return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset);
+}
+
+static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq)
+{
+	u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+	u32 offset = tail_masked * io_sq->desc_entry_size;
+
+	/* In case this queue isn't a LLQ */
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+		return;
+
+	memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset,
+		    io_sq->desc_addr.virt_addr + offset,
+		    io_sq->desc_entry_size);
+}
+
+static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq)
+{
+	io_sq->tail++;
+
+	/* Switch phase bit in case of wrap around */
+	if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0))
+		io_sq->phase ^= 1;
+}
+
+static inline int ena_com_write_header(struct ena_com_io_sq *io_sq,
+				       u8 *head_src, u16 header_len)
+{
+	u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1);
+	u8 __iomem *dev_head_addr =
+		io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size);
+
+	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
+		return 0;
+
+	if (unlikely(!io_sq->header_addr)) {
+		pr_err("Push buffer header ptr is NULL\n");
+		return -EINVAL;
+	}
+
+	memcpy_toio(dev_head_addr, head_src, header_len);
+
+	return 0;
+}
+
+static inline struct ena_eth_io_rx_cdesc_base *
+	ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx)
+{
+	idx &= (io_cq->q_depth - 1);
+	return (struct ena_eth_io_rx_cdesc_base *)
+		((uintptr_t)io_cq->cdesc_addr.virt_addr +
+		idx * io_cq->cdesc_entry_size_in_bytes);
+}
+
+static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq,
+					   u16 *first_cdesc_idx)
+{
+	struct ena_eth_io_rx_cdesc_base *cdesc;
+	u16 count = 0, head_masked;
+	u32 last = 0;
+
+	do {
+		cdesc = ena_com_get_next_rx_cdesc(io_cq);
+		if (!cdesc)
+			break;
+
+		ena_com_cq_inc_head(io_cq);
+		count++;
+		last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >>
+			ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT;
+	} while (!last);
+
+	if (last) {
+		*first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx;
+		count += io_cq->cur_rx_pkt_cdesc_count;
+
+		head_masked = io_cq->head & (io_cq->q_depth - 1);
+
+		io_cq->cur_rx_pkt_cdesc_count = 0;
+		io_cq->cur_rx_pkt_cdesc_start_idx = head_masked;
+
+		pr_debug("ena q_id: %d packets were completed. first desc idx %u descs# %d\n",
+			 io_cq->qid, *first_cdesc_idx, count);
+	} else {
+		io_cq->cur_rx_pkt_cdesc_count += count;
+		count = 0;
+	}
+
+	return count;
+}
+
+static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
+					     struct ena_com_tx_ctx *ena_tx_ctx)
+{
+	int rc;
+
+	if (ena_tx_ctx->meta_valid) {
+		rc = memcmp(&io_sq->cached_tx_meta,
+			    &ena_tx_ctx->ena_meta,
+			    sizeof(struct ena_com_tx_meta));
+
+		if (unlikely(rc != 0))
+			return true;
+	}
+
+	return false;
+}
+
+static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq,
+							 struct ena_com_tx_ctx *ena_tx_ctx)
+{
+	struct ena_eth_io_tx_meta_desc *meta_desc = NULL;
+	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
+
+	meta_desc = get_sq_desc(io_sq);
+	memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc));
+
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK;
+
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK;
+
+	/* bits 0-9 of the mss */
+	meta_desc->word2 |= (ena_meta->mss <<
+		ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK;
+	/* bits 10-13 of the mss */
+	meta_desc->len_ctrl |= ((ena_meta->mss >> 10) <<
+		ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK;
+
+	/* Extended meta desc */
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK;
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+	meta_desc->len_ctrl |= (io_sq->phase <<
+		ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_PHASE_MASK;
+
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK;
+	meta_desc->word2 |= ena_meta->l3_hdr_len &
+		ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK;
+	meta_desc->word2 |= (ena_meta->l3_hdr_offset <<
+		ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK;
+
+	meta_desc->word2 |= (ena_meta->l4_hdr_len <<
+		ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) &
+		ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK;
+
+	meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK;
+
+	/* Cached the meta desc */
+	memcpy(&io_sq->cached_tx_meta, ena_meta,
+	       sizeof(struct ena_com_tx_meta));
+
+	ena_com_copy_curr_sq_desc_to_dev(io_sq);
+	ena_com_sq_update_tail(io_sq);
+}
+
+static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx,
+					struct ena_eth_io_rx_cdesc_base *cdesc)
+{
+	ena_rx_ctx->l3_proto = cdesc->status &
+		ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK;
+	ena_rx_ctx->l4_proto =
+		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT;
+	ena_rx_ctx->l3_csum_err =
+		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT;
+	ena_rx_ctx->l4_csum_err =
+		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT;
+	ena_rx_ctx->hash = cdesc->hash;
+	ena_rx_ctx->frag =
+		(cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >>
+		ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT;
+
+	pr_debug("ena_rx_ctx->l3_proto %d ena_rx_ctx->l4_proto %d\nena_rx_ctx->l3_csum_err %d ena_rx_ctx->l4_csum_err %d\nhash frag %d frag: %d cdesc_status: %x\n",
+		 ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto,
+		 ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err,
+		 ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status);
+}
+
+/*****************************************************************************/
+/*****************************     API      **********************************/
+/*****************************************************************************/
+
+int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
+		       struct ena_com_tx_ctx *ena_tx_ctx,
+		       int *nb_hw_desc)
+{
+	struct ena_eth_io_tx_desc *desc = NULL;
+	struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs;
+	void *push_header = ena_tx_ctx->push_header;
+	u16 header_len = ena_tx_ctx->header_len;
+	u16 num_bufs = ena_tx_ctx->num_bufs;
+	int total_desc, i, rc;
+	bool have_meta;
+	u64 addr_hi;
+
+	WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type");
+
+	/* num_bufs +1 for potential meta desc */
+	if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) {
+		pr_err("Not enough space in the tx queue\n");
+		return -ENOMEM;
+	}
+
+	if (unlikely(header_len > io_sq->tx_max_header_size)) {
+		pr_err("header size is too large %d max header: %d\n",
+		       header_len, io_sq->tx_max_header_size);
+		return -EINVAL;
+	}
+
+	/* start with pushing the header (if needed) */
+	rc = ena_com_write_header(io_sq, push_header, header_len);
+	if (unlikely(rc))
+		return rc;
+
+	have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq,
+			ena_tx_ctx);
+	if (have_meta)
+		ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx);
+
+	/* If the caller doesn't want send packets */
+	if (unlikely(!num_bufs && !header_len)) {
+		*nb_hw_desc = have_meta ? 0 : 1;
+		return 0;
+	}
+
+	desc = get_sq_desc(io_sq);
+	memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
+
+	/* Set first desc when we don't have meta descriptor */
+	if (!have_meta)
+		desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK;
+
+	desc->buff_addr_hi_hdr_sz |= (header_len <<
+		ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) &
+		ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK;
+	desc->len_ctrl |= (io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+		ENA_ETH_IO_TX_DESC_PHASE_MASK;
+
+	desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK;
+
+	/* Bits 0-9 */
+	desc->meta_ctrl |= (ena_tx_ctx->req_id <<
+		ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) &
+		ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK;
+
+	desc->meta_ctrl |= (ena_tx_ctx->df <<
+		ENA_ETH_IO_TX_DESC_DF_SHIFT) &
+		ENA_ETH_IO_TX_DESC_DF_MASK;
+
+	/* Bits 10-15 */
+	desc->len_ctrl |= ((ena_tx_ctx->req_id >> 10) <<
+		ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT) &
+		ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK;
+
+	if (ena_tx_ctx->meta_valid) {
+		desc->meta_ctrl |= (ena_tx_ctx->tso_enable <<
+			ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT) &
+			ENA_ETH_IO_TX_DESC_TSO_EN_MASK;
+		desc->meta_ctrl |= ena_tx_ctx->l3_proto &
+			ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK;
+		desc->meta_ctrl |= (ena_tx_ctx->l4_proto <<
+			ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT) &
+			ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK;
+		desc->meta_ctrl |= (ena_tx_ctx->l3_csum_enable <<
+			ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT) &
+			ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK;
+		desc->meta_ctrl |= (ena_tx_ctx->l4_csum_enable <<
+			ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT) &
+			ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK;
+		desc->meta_ctrl |= (ena_tx_ctx->l4_csum_partial <<
+			ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT) &
+			ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK;
+	}
+
+	for (i = 0; i < num_bufs; i++) {
+		/* The first desc share the same desc as the header */
+		if (likely(i != 0)) {
+			ena_com_copy_curr_sq_desc_to_dev(io_sq);
+			ena_com_sq_update_tail(io_sq);
+
+			desc = get_sq_desc(io_sq);
+			memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc));
+
+			desc->len_ctrl |= (io_sq->phase <<
+				ENA_ETH_IO_TX_DESC_PHASE_SHIFT) &
+				ENA_ETH_IO_TX_DESC_PHASE_MASK;
+		}
+
+		desc->len_ctrl |= ena_bufs->len &
+			ENA_ETH_IO_TX_DESC_LENGTH_MASK;
+
+		addr_hi = ((ena_bufs->paddr &
+			GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
+
+		desc->buff_addr_lo = (u32)ena_bufs->paddr;
+		desc->buff_addr_hi_hdr_sz |= addr_hi &
+			ENA_ETH_IO_TX_DESC_ADDR_HI_MASK;
+		ena_bufs++;
+	}
+
+	/* set the last desc indicator */
+	desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK;
+
+	ena_com_copy_curr_sq_desc_to_dev(io_sq);
+
+	ena_com_sq_update_tail(io_sq);
+
+	total_desc = max_t(u16, num_bufs, 1);
+	total_desc += have_meta ? 1 : 0;
+
+	*nb_hw_desc = total_desc;
+	return 0;
+}
+
+int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
+		   struct ena_com_io_sq *io_sq,
+		   struct ena_com_rx_ctx *ena_rx_ctx)
+{
+	struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0];
+	struct ena_eth_io_rx_cdesc_base *cdesc = NULL;
+	u16 cdesc_idx = 0;
+	u16 nb_hw_desc;
+	u16 i;
+
+	WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
+
+	nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx);
+	if (nb_hw_desc == 0) {
+		ena_rx_ctx->descs = nb_hw_desc;
+		return 0;
+	}
+
+	pr_debug("fetch rx packet: queue %d completed desc: %d\n", io_cq->qid,
+		 nb_hw_desc);
+
+	if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) {
+		pr_err("Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc,
+		       ena_rx_ctx->max_bufs);
+		return -ENOSPC;
+	}
+
+	for (i = 0; i < nb_hw_desc; i++) {
+		cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i);
+
+		ena_buf->len = cdesc->length;
+		ena_buf->req_id = cdesc->req_id;
+		ena_buf++;
+	}
+
+	/* Update SQ head ptr */
+	io_sq->next_to_comp += nb_hw_desc;
+
+	pr_debug("[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid,
+		 io_sq->next_to_comp);
+
+	/* Get rx flags from the last pkt */
+	ena_com_rx_set_flags(ena_rx_ctx, cdesc);
+
+	ena_rx_ctx->descs = nb_hw_desc;
+	return 0;
+}
+
+int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
+			       struct ena_com_buf *ena_buf,
+			       u16 req_id)
+{
+	struct ena_eth_io_rx_desc *desc;
+
+	WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
+
+	if (unlikely(ena_com_sq_empty_space(io_sq) == 0))
+		return -ENOSPC;
+
+	desc = get_sq_desc(io_sq);
+	memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc));
+
+	desc->length = ena_buf->len;
+
+	desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK;
+	desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK;
+	desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK;
+	desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
+
+	desc->req_id = req_id;
+
+	desc->buff_addr_lo = (u32)ena_buf->paddr;
+	desc->buff_addr_hi =
+		((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32);
+
+	ena_com_sq_update_tail(io_sq);
+
+	return 0;
+}
+
+int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)
+{
+	u8 expected_phase, cdesc_phase;
+	struct ena_eth_io_tx_cdesc *cdesc;
+	u16 masked_head;
+
+	masked_head = io_cq->head & (io_cq->q_depth - 1);
+	expected_phase = io_cq->phase;
+
+	cdesc = (struct ena_eth_io_tx_cdesc *)
+		((uintptr_t)io_cq->cdesc_addr.virt_addr +
+		(masked_head * io_cq->cdesc_entry_size_in_bytes));
+
+	/* When the current completion descriptor phase isn't the same as the
+	 * expected, it mean that the device still didn't update
+	 * this completion.
+	 */
+	cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK;
+	if (cdesc_phase != expected_phase)
+		return -EAGAIN;
+
+	ena_com_cq_inc_head(io_cq);
+
+	*req_id = cdesc->req_id;
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
new file mode 100644
index 0000000..bb53c3a
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_ETH_COM_H_
+#define ENA_ETH_COM_H_
+
+#include "ena_com.h"
+
+/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */
+#define ENA_COMP_HEAD_THRESH 4
+
+struct ena_com_tx_ctx {
+	struct ena_com_tx_meta ena_meta;
+	struct ena_com_buf *ena_bufs;
+	/* For LLQ, header buffer - pushed to the device mem space */
+	void *push_header;
+
+	enum ena_eth_io_l3_proto_index l3_proto;
+	enum ena_eth_io_l4_proto_index l4_proto;
+	u16 num_bufs;
+	u16 req_id;
+	/* For regular queue, indicate the size of the header
+	 * For LLQ, indicate the size of the pushed buffer
+	 */
+	u16 header_len;
+
+	u8 meta_valid;
+	u8 tso_enable;
+	u8 l3_csum_enable;
+	u8 l4_csum_enable;
+	u8 l4_csum_partial;
+	u8 df; /* Don't fragment */
+};
+
+struct ena_com_rx_ctx {
+	struct ena_com_rx_buf_info *ena_bufs;
+	enum ena_eth_io_l3_proto_index l3_proto;
+	enum ena_eth_io_l4_proto_index l4_proto;
+	bool l3_csum_err;
+	bool l4_csum_err;
+	/* fragmented packet */
+	bool frag;
+	u32 hash;
+	u16 descs;
+	int max_bufs;
+};
+
+int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
+		       struct ena_com_tx_ctx *ena_tx_ctx,
+		       int *nb_hw_desc);
+
+int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
+		   struct ena_com_io_sq *io_sq,
+		   struct ena_com_rx_ctx *ena_rx_ctx);
+
+int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
+			       struct ena_com_buf *ena_buf,
+			       u16 req_id);
+
+int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id);
+
+static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
+				       struct ena_eth_io_intr_reg *intr_reg)
+{
+	writel(intr_reg->intr_control, io_cq->unmask_reg);
+}
+
+static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
+{
+	u16 tail, next_to_comp, cnt;
+
+	next_to_comp = io_sq->next_to_comp;
+	tail = io_sq->tail;
+	cnt = tail - next_to_comp;
+
+	return io_sq->q_depth - 1 - cnt;
+}
+
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+{
+	u16 tail;
+
+	tail = io_sq->tail;
+
+	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
+		 io_sq->qid, tail);
+
+	writel(tail, io_sq->db_addr);
+
+	return 0;
+}
+
+static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq)
+{
+	u16 unreported_comp, head;
+	bool need_update;
+
+	head = io_cq->head;
+	unreported_comp = head - io_cq->last_head_update;
+	need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH);
+
+	if (io_cq->cq_head_db_reg && need_update) {
+		pr_debug("Write completion queue doorbell for queue %d: head: %d\n",
+			 io_cq->qid, head);
+		writel(head, io_cq->cq_head_db_reg);
+		io_cq->last_head_update = head;
+	}
+
+	return 0;
+}
+
+static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq,
+					    u8 numa_node)
+{
+	struct ena_eth_io_numa_node_cfg_reg numa_cfg;
+
+	if (!io_cq->numa_node_cfg_reg)
+		return;
+
+	numa_cfg.numa_cfg = (numa_node & ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK)
+		| ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK;
+
+	writel(numa_cfg.numa_cfg, io_cq->numa_node_cfg_reg);
+}
+
+static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem)
+{
+	io_sq->next_to_comp += elem;
+}
+
+#endif /* ENA_ETH_COM_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
new file mode 100644
index 0000000..f320c58
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
@@ -0,0 +1,416 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_ETH_IO_H_
+#define _ENA_ETH_IO_H_
+
+enum ena_eth_io_l3_proto_index {
+	ENA_ETH_IO_L3_PROTO_UNKNOWN	= 0,
+
+	ENA_ETH_IO_L3_PROTO_IPV4	= 8,
+
+	ENA_ETH_IO_L3_PROTO_IPV6	= 11,
+
+	ENA_ETH_IO_L3_PROTO_FCOE	= 21,
+
+	ENA_ETH_IO_L3_PROTO_ROCE	= 22,
+};
+
+enum ena_eth_io_l4_proto_index {
+	ENA_ETH_IO_L4_PROTO_UNKNOWN		= 0,
+
+	ENA_ETH_IO_L4_PROTO_TCP			= 12,
+
+	ENA_ETH_IO_L4_PROTO_UDP			= 13,
+
+	ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE	= 23,
+};
+
+struct ena_eth_io_tx_desc {
+	/* 15:0 : length - Buffer length in bytes, must
+	 *    include any packet trailers that the ENA supposed
+	 *    to update like End-to-End CRC, Authentication GMAC
+	 *    etc. This length must not include the
+	 *    'Push_Buffer' length. This length must not include
+	 *    the 4-byte added in the end for 802.3 Ethernet FCS
+	 * 21:16 : req_id_hi - Request ID[15:10]
+	 * 22 : reserved22 - MBZ
+	 * 23 : meta_desc - MBZ
+	 * 24 : phase
+	 * 25 : reserved1 - MBZ
+	 * 26 : first - Indicates first descriptor in
+	 *    transaction
+	 * 27 : last - Indicates last descriptor in
+	 *    transaction
+	 * 28 : comp_req - Indicates whether completion
+	 *    should be posted, after packet is transmitted.
+	 *    Valid only for first descriptor
+	 * 30:29 : reserved29 - MBZ
+	 * 31 : reserved31 - MBZ
+	 */
+	u32 len_ctrl;
+
+	/* 3:0 : l3_proto_idx - L3 protocol. This field
+	 *    required when l3_csum_en,l3_csum or tso_en are set.
+	 * 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and
+	 *    DF flags of the IPv4 header is 0. Otherwise must
+	 *    be set to 1
+	 * 6:5 : reserved5
+	 * 7 : tso_en - Enable TSO, For TCP only.
+	 * 12:8 : l4_proto_idx - L4 protocol. This field need
+	 *    to be set when l4_csum_en or tso_en are set.
+	 * 13 : l3_csum_en - enable IPv4 header checksum.
+	 * 14 : l4_csum_en - enable TCP/UDP checksum.
+	 * 15 : ethernet_fcs_dis - when set, the controller
+	 *    will not append the 802.3 Ethernet Frame Check
+	 *    Sequence to the packet
+	 * 16 : reserved16
+	 * 17 : l4_csum_partial - L4 partial checksum. when
+	 *    set to 0, the ENA calculates the L4 checksum,
+	 *    where the Destination Address required for the
+	 *    TCP/UDP pseudo-header is taken from the actual
+	 *    packet L3 header. when set to 1, the ENA doesn't
+	 *    calculate the sum of the pseudo-header, instead,
+	 *    the checksum field of the L4 is used instead. When
+	 *    TSO enabled, the checksum of the pseudo-header
+	 *    must not include the tcp length field. L4 partial
+	 *    checksum should be used for IPv6 packet that
+	 *    contains Routing Headers.
+	 * 20:18 : reserved18 - MBZ
+	 * 21 : reserved21 - MBZ
+	 * 31:22 : req_id_lo - Request ID[9:0]
+	 */
+	u32 meta_ctrl;
+
+	u32 buff_addr_lo;
+
+	/* address high and header size
+	 * 15:0 : addr_hi - Buffer Pointer[47:32]
+	 * 23:16 : reserved16_w2
+	 * 31:24 : header_length - Header length. For Low
+	 *    Latency Queues, this fields indicates the number
+	 *    of bytes written to the headers' memory. For
+	 *    normal queues, if packet is TCP or UDP, and longer
+	 *    than max_header_size, then this field should be
+	 *    set to the sum of L4 header offset and L4 header
+	 *    size(without options), otherwise, this field
+	 *    should be set to 0. For both modes, this field
+	 *    must not exceed the max_header_size.
+	 *    max_header_size value is reported by the Max
+	 *    Queues Feature descriptor
+	 */
+	u32 buff_addr_hi_hdr_sz;
+};
+
+struct ena_eth_io_tx_meta_desc {
+	/* 9:0 : req_id_lo - Request ID[9:0]
+	 * 11:10 : reserved10 - MBZ
+	 * 12 : reserved12 - MBZ
+	 * 13 : reserved13 - MBZ
+	 * 14 : ext_valid - if set, offset fields in Word2
+	 *    are valid Also MSS High in Word 0 and bits [31:24]
+	 *    in Word 3
+	 * 15 : reserved15
+	 * 19:16 : mss_hi
+	 * 20 : eth_meta_type - 0: Tx Metadata Descriptor, 1:
+	 *    Extended Metadata Descriptor
+	 * 21 : meta_store - Store extended metadata in queue
+	 *    cache
+	 * 22 : reserved22 - MBZ
+	 * 23 : meta_desc - MBO
+	 * 24 : phase
+	 * 25 : reserved25 - MBZ
+	 * 26 : first - Indicates first descriptor in
+	 *    transaction
+	 * 27 : last - Indicates last descriptor in
+	 *    transaction
+	 * 28 : comp_req - Indicates whether completion
+	 *    should be posted, after packet is transmitted.
+	 *    Valid only for first descriptor
+	 * 30:29 : reserved29 - MBZ
+	 * 31 : reserved31 - MBZ
+	 */
+	u32 len_ctrl;
+
+	/* 5:0 : req_id_hi
+	 * 31:6 : reserved6 - MBZ
+	 */
+	u32 word1;
+
+	/* 7:0 : l3_hdr_len
+	 * 15:8 : l3_hdr_off
+	 * 21:16 : l4_hdr_len_in_words - counts the L4 header
+	 *    length in words. there is an explicit assumption
+	 *    that L4 header appears right after L3 header and
+	 *    L4 offset is based on l3_hdr_off+l3_hdr_len
+	 * 31:22 : mss_lo
+	 */
+	u32 word2;
+
+	u32 reserved;
+};
+
+struct ena_eth_io_tx_cdesc {
+	/* Request ID[15:0] */
+	u16 req_id;
+
+	u8 status;
+
+	/* flags
+	 * 0 : phase
+	 * 7:1 : reserved1
+	 */
+	u8 flags;
+
+	u16 sub_qid;
+
+	u16 sq_head_idx;
+};
+
+struct ena_eth_io_rx_desc {
+	/* In bytes. 0 means 64KB */
+	u16 length;
+
+	/* MBZ */
+	u8 reserved2;
+
+	/* 0 : phase
+	 * 1 : reserved1 - MBZ
+	 * 2 : first - Indicates first descriptor in
+	 *    transaction
+	 * 3 : last - Indicates last descriptor in transaction
+	 * 4 : comp_req
+	 * 5 : reserved5 - MBO
+	 * 7:6 : reserved6 - MBZ
+	 */
+	u8 ctrl;
+
+	u16 req_id;
+
+	/* MBZ */
+	u16 reserved6;
+
+	u32 buff_addr_lo;
+
+	u16 buff_addr_hi;
+
+	/* MBZ */
+	u16 reserved16_w3;
+};
+
+/* 4-word format Note: all ethernet parsing information are valid only when
+ * last=1
+ */
+struct ena_eth_io_rx_cdesc_base {
+	/* 4:0 : l3_proto_idx
+	 * 6:5 : src_vlan_cnt
+	 * 7 : reserved7 - MBZ
+	 * 12:8 : l4_proto_idx
+	 * 13 : l3_csum_err - when set, either the L3
+	 *    checksum error detected, or, the controller didn't
+	 *    validate the checksum. This bit is valid only when
+	 *    l3_proto_idx indicates IPv4 packet
+	 * 14 : l4_csum_err - when set, either the L4
+	 *    checksum error detected, or, the controller didn't
+	 *    validate the checksum. This bit is valid only when
+	 *    l4_proto_idx indicates TCP/UDP packet, and,
+	 *    ipv4_frag is not set
+	 * 15 : ipv4_frag - Indicates IPv4 fragmented packet
+	 * 23:16 : reserved16
+	 * 24 : phase
+	 * 25 : l3_csum2 - second checksum engine result
+	 * 26 : first - Indicates first descriptor in
+	 *    transaction
+	 * 27 : last - Indicates last descriptor in
+	 *    transaction
+	 * 29:28 : reserved28
+	 * 30 : buffer - 0: Metadata descriptor. 1: Buffer
+	 *    Descriptor was used
+	 * 31 : reserved31
+	 */
+	u32 status;
+
+	u16 length;
+
+	u16 req_id;
+
+	/* 32-bit hash result */
+	u32 hash;
+
+	u16 sub_qid;
+
+	u16 reserved;
+};
+
+/* 8-word format */
+struct ena_eth_io_rx_cdesc_ext {
+	struct ena_eth_io_rx_cdesc_base base;
+
+	u32 buff_addr_lo;
+
+	u16 buff_addr_hi;
+
+	u16 reserved16;
+
+	u32 reserved_w6;
+
+	u32 reserved_w7;
+};
+
+struct ena_eth_io_intr_reg {
+	/* 14:0 : rx_intr_delay
+	 * 29:15 : tx_intr_delay
+	 * 30 : intr_unmask
+	 * 31 : reserved
+	 */
+	u32 intr_control;
+};
+
+struct ena_eth_io_numa_node_cfg_reg {
+	/* 7:0 : numa
+	 * 30:8 : reserved
+	 * 31 : enabled
+	 */
+	u32 numa_cfg;
+};
+
+/* tx_desc */
+#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16
+#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0)
+#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4
+#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4)
+#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7
+#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7)
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13
+#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14)
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15
+#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15)
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17
+#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17)
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22
+#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22)
+#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0)
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24
+#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24)
+
+/* tx_meta_desc */
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0)
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14
+#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14)
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16)
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20
+#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20)
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21
+#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21)
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23
+#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23)
+#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24
+#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26
+#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27
+#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27)
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28
+#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28)
+#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8
+#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8)
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16
+#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16)
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22
+#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22)
+
+/* tx_cdesc */
+#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0)
+
+/* rx_desc */
+#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0)
+#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2
+#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2)
+#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3
+#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3)
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4
+#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4)
+
+/* rx_cdesc_base */
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0)
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5
+#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13)
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14
+#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14)
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15
+#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15)
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24
+#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24)
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25
+#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25)
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26
+#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26)
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27
+#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27)
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30
+#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30)
+
+/* intr_reg */
+#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0)
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15
+#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15)
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30
+#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30)
+
+/* numa_node_cfg_reg */
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0)
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31
+#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31)
+
+#endif /*_ENA_ETH_IO_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
new file mode 100644
index 0000000..67b2338f
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -0,0 +1,895 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/pci.h>
+
+#include "ena_netdev.h"
+
+struct ena_stats {
+	char name[ETH_GSTRING_LEN];
+	int stat_offset;
+};
+
+#define ENA_STAT_ENA_COM_ENTRY(stat) { \
+	.name = #stat, \
+	.stat_offset = offsetof(struct ena_com_stats_admin, stat) \
+}
+
+#define ENA_STAT_ENTRY(stat, stat_type) { \
+	.name = #stat, \
+	.stat_offset = offsetof(struct ena_stats_##stat_type, stat) \
+}
+
+#define ENA_STAT_RX_ENTRY(stat) \
+	ENA_STAT_ENTRY(stat, rx)
+
+#define ENA_STAT_TX_ENTRY(stat) \
+	ENA_STAT_ENTRY(stat, tx)
+
+#define ENA_STAT_GLOBAL_ENTRY(stat) \
+	ENA_STAT_ENTRY(stat, dev)
+
+static const struct ena_stats ena_stats_global_strings[] = {
+	ENA_STAT_GLOBAL_ENTRY(tx_timeout),
+	ENA_STAT_GLOBAL_ENTRY(io_suspend),
+	ENA_STAT_GLOBAL_ENTRY(io_resume),
+	ENA_STAT_GLOBAL_ENTRY(wd_expired),
+	ENA_STAT_GLOBAL_ENTRY(interface_up),
+	ENA_STAT_GLOBAL_ENTRY(interface_down),
+	ENA_STAT_GLOBAL_ENTRY(admin_q_pause),
+};
+
+static const struct ena_stats ena_stats_tx_strings[] = {
+	ENA_STAT_TX_ENTRY(cnt),
+	ENA_STAT_TX_ENTRY(bytes),
+	ENA_STAT_TX_ENTRY(queue_stop),
+	ENA_STAT_TX_ENTRY(queue_wakeup),
+	ENA_STAT_TX_ENTRY(dma_mapping_err),
+	ENA_STAT_TX_ENTRY(linearize),
+	ENA_STAT_TX_ENTRY(linearize_failed),
+	ENA_STAT_TX_ENTRY(napi_comp),
+	ENA_STAT_TX_ENTRY(tx_poll),
+	ENA_STAT_TX_ENTRY(doorbells),
+	ENA_STAT_TX_ENTRY(prepare_ctx_err),
+	ENA_STAT_TX_ENTRY(missing_tx_comp),
+	ENA_STAT_TX_ENTRY(bad_req_id),
+};
+
+static const struct ena_stats ena_stats_rx_strings[] = {
+	ENA_STAT_RX_ENTRY(cnt),
+	ENA_STAT_RX_ENTRY(bytes),
+	ENA_STAT_RX_ENTRY(refil_partial),
+	ENA_STAT_RX_ENTRY(bad_csum),
+	ENA_STAT_RX_ENTRY(page_alloc_fail),
+	ENA_STAT_RX_ENTRY(skb_alloc_fail),
+	ENA_STAT_RX_ENTRY(dma_mapping_err),
+	ENA_STAT_RX_ENTRY(bad_desc_num),
+	ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
+};
+
+static const struct ena_stats ena_stats_ena_com_strings[] = {
+	ENA_STAT_ENA_COM_ENTRY(aborted_cmd),
+	ENA_STAT_ENA_COM_ENTRY(submitted_cmd),
+	ENA_STAT_ENA_COM_ENTRY(completed_cmd),
+	ENA_STAT_ENA_COM_ENTRY(out_of_space),
+	ENA_STAT_ENA_COM_ENTRY(no_completion),
+};
+
+#define ENA_STATS_ARRAY_GLOBAL	ARRAY_SIZE(ena_stats_global_strings)
+#define ENA_STATS_ARRAY_TX	ARRAY_SIZE(ena_stats_tx_strings)
+#define ENA_STATS_ARRAY_RX	ARRAY_SIZE(ena_stats_rx_strings)
+#define ENA_STATS_ARRAY_ENA_COM	ARRAY_SIZE(ena_stats_ena_com_strings)
+
+static void ena_safe_update_stat(u64 *src, u64 *dst,
+				 struct u64_stats_sync *syncp)
+{
+	unsigned int start;
+
+	do {
+		start = u64_stats_fetch_begin_irq(syncp);
+		*(dst) = *src;
+	} while (u64_stats_fetch_retry_irq(syncp, start));
+}
+
+static void ena_queue_stats(struct ena_adapter *adapter, u64 **data)
+{
+	const struct ena_stats *ena_stats;
+	struct ena_ring *ring;
+
+	u64 *ptr;
+	int i, j;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		/* Tx stats */
+		ring = &adapter->tx_ring[i];
+
+		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
+			ena_stats = &ena_stats_tx_strings[j];
+
+			ptr = (u64 *)((uintptr_t)&ring->tx_stats +
+				(uintptr_t)ena_stats->stat_offset);
+
+			ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+		}
+
+		/* Rx stats */
+		ring = &adapter->rx_ring[i];
+
+		for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+			ena_stats = &ena_stats_rx_strings[j];
+
+			ptr = (u64 *)((uintptr_t)&ring->rx_stats +
+				(uintptr_t)ena_stats->stat_offset);
+
+			ena_safe_update_stat(ptr, (*data)++, &ring->syncp);
+		}
+	}
+}
+
+static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data)
+{
+	const struct ena_stats *ena_stats;
+	u32 *ptr;
+	int i;
+
+	for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
+		ena_stats = &ena_stats_ena_com_strings[i];
+
+		ptr = (u32 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats +
+			(uintptr_t)ena_stats->stat_offset);
+
+		*(*data)++ = *ptr;
+	}
+}
+
+static void ena_get_ethtool_stats(struct net_device *netdev,
+				  struct ethtool_stats *stats,
+				  u64 *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	const struct ena_stats *ena_stats;
+	u64 *ptr;
+	int i;
+
+	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
+		ena_stats = &ena_stats_global_strings[i];
+
+		ptr = (u64 *)((uintptr_t)&adapter->dev_stats +
+			(uintptr_t)ena_stats->stat_offset);
+
+		ena_safe_update_stat(ptr, data++, &adapter->syncp);
+	}
+
+	ena_queue_stats(adapter, &data);
+	ena_dev_admin_queue_stats(adapter, &data);
+}
+
+int ena_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	if (sset != ETH_SS_STATS)
+		return -EOPNOTSUPP;
+
+	return  adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+		+ ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
+}
+
+static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
+{
+	const struct ena_stats *ena_stats;
+	int i, j;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		/* Tx stats */
+		for (j = 0; j < ENA_STATS_ARRAY_TX; j++) {
+			ena_stats = &ena_stats_tx_strings[j];
+
+			snprintf(*data, ETH_GSTRING_LEN,
+				 "queue_%u_tx_%s", i, ena_stats->name);
+			 (*data) += ETH_GSTRING_LEN;
+		}
+		/* Rx stats */
+		for (j = 0; j < ENA_STATS_ARRAY_RX; j++) {
+			ena_stats = &ena_stats_rx_strings[j];
+
+			snprintf(*data, ETH_GSTRING_LEN,
+				 "queue_%u_rx_%s", i, ena_stats->name);
+			(*data) += ETH_GSTRING_LEN;
+		}
+	}
+}
+
+static void ena_com_dev_strings(u8 **data)
+{
+	const struct ena_stats *ena_stats;
+	int i;
+
+	for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) {
+		ena_stats = &ena_stats_ena_com_strings[i];
+
+		snprintf(*data, ETH_GSTRING_LEN,
+			 "ena_admin_q_%s", ena_stats->name);
+		(*data) += ETH_GSTRING_LEN;
+	}
+}
+
+static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	const struct ena_stats *ena_stats;
+	int i;
+
+	if (sset != ETH_SS_STATS)
+		return;
+
+	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
+		ena_stats = &ena_stats_global_strings[i];
+
+		memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
+		data += ETH_GSTRING_LEN;
+	}
+
+	ena_queue_strings(adapter, &data);
+	ena_com_dev_strings(&data);
+}
+
+static int ena_get_link_ksettings(struct net_device *netdev,
+				  struct ethtool_link_ksettings *link_ksettings)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct ena_admin_get_feature_link_desc *link;
+	struct ena_admin_get_feat_resp feat_resp;
+	int rc;
+
+	rc = ena_com_get_link_params(ena_dev, &feat_resp);
+	if (rc)
+		return rc;
+
+	link = &feat_resp.u.link;
+	link_ksettings->base.speed = link->speed;
+
+	if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) {
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     supported, Autoneg);
+		ethtool_link_ksettings_add_link_mode(link_ksettings,
+						     supported, Autoneg);
+	}
+
+	link_ksettings->base.autoneg =
+		(link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ?
+		AUTONEG_ENABLE : AUTONEG_DISABLE;
+
+	link_ksettings->base.duplex = DUPLEX_FULL;
+
+	return 0;
+}
+
+static int ena_get_coalesce(struct net_device *net_dev,
+			    struct ethtool_coalesce *coalesce)
+{
+	struct ena_adapter *adapter = netdev_priv(net_dev);
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct ena_intr_moder_entry intr_moder_entry;
+
+	if (!ena_com_interrupt_moderation_supported(ena_dev)) {
+		/* the devie doesn't support interrupt moderation */
+		return -EOPNOTSUPP;
+	}
+	coalesce->tx_coalesce_usecs =
+		ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) /
+			ena_dev->intr_delay_resolution;
+	if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) {
+		coalesce->rx_coalesce_usecs =
+			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev)
+			/ ena_dev->intr_delay_resolution;
+	} else {
+		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry);
+		coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval;
+		coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval;
+
+		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry);
+		coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval;
+		coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval;
+
+		ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry);
+		coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval;
+		coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval;
+	}
+	coalesce->use_adaptive_rx_coalesce =
+		ena_com_get_adaptive_moderation_enabled(ena_dev);
+
+	return 0;
+}
+
+static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter)
+{
+	unsigned int val;
+	int i;
+
+	val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev);
+
+	for (i = 0; i < adapter->num_queues; i++)
+		adapter->tx_ring[i].smoothed_interval = val;
+}
+
+static int ena_set_coalesce(struct net_device *net_dev,
+			    struct ethtool_coalesce *coalesce)
+{
+	struct ena_adapter *adapter = netdev_priv(net_dev);
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct ena_intr_moder_entry intr_moder_entry;
+	int rc;
+
+	if (!ena_com_interrupt_moderation_supported(ena_dev)) {
+		/* the devie doesn't support interrupt moderation */
+		return -EOPNOTSUPP;
+	}
+
+	if (coalesce->rx_coalesce_usecs_irq ||
+	    coalesce->rx_max_coalesced_frames_irq ||
+	    coalesce->tx_coalesce_usecs_irq ||
+	    coalesce->tx_max_coalesced_frames ||
+	    coalesce->tx_max_coalesced_frames_irq ||
+	    coalesce->stats_block_coalesce_usecs ||
+	    coalesce->use_adaptive_tx_coalesce ||
+	    coalesce->pkt_rate_low ||
+	    coalesce->tx_coalesce_usecs_low ||
+	    coalesce->tx_max_coalesced_frames_low ||
+	    coalesce->pkt_rate_high ||
+	    coalesce->tx_coalesce_usecs_high ||
+	    coalesce->tx_max_coalesced_frames_high ||
+	    coalesce->rate_sample_interval)
+		return -EINVAL;
+
+	rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev,
+							       coalesce->tx_coalesce_usecs);
+	if (rc)
+		return rc;
+
+	ena_update_tx_rings_intr_moderation(adapter);
+
+	if (ena_com_get_adaptive_moderation_enabled(ena_dev)) {
+		if (!coalesce->use_adaptive_rx_coalesce) {
+			ena_com_disable_adaptive_moderation(ena_dev);
+			rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
+									       coalesce->rx_coalesce_usecs);
+			return rc;
+		}
+	} else { /* was in non-adaptive mode */
+		if (coalesce->use_adaptive_rx_coalesce) {
+			ena_com_enable_adaptive_moderation(ena_dev);
+		} else {
+			rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
+									       coalesce->rx_coalesce_usecs);
+			return rc;
+		}
+	}
+
+	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low;
+	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low;
+	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry);
+
+	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs;
+	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames;
+	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry);
+
+	intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high;
+	intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high;
+	intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED;
+	ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry);
+
+	return 0;
+}
+
+static u32 ena_get_msglevel(struct net_device *netdev)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->msg_enable;
+}
+
+static void ena_set_msglevel(struct net_device *netdev, u32 value)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	adapter->msg_enable = value;
+}
+
+static void ena_get_drvinfo(struct net_device *dev,
+			    struct ethtool_drvinfo *info)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+
+	strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
+	strlcpy(info->bus_info, pci_name(adapter->pdev),
+		sizeof(info->bus_info));
+}
+
+static void ena_get_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct ena_ring *tx_ring = &adapter->tx_ring[0];
+	struct ena_ring *rx_ring = &adapter->rx_ring[0];
+
+	ring->rx_max_pending = rx_ring->ring_size;
+	ring->tx_max_pending = tx_ring->ring_size;
+	ring->rx_pending = rx_ring->ring_size;
+	ring->tx_pending = tx_ring->ring_size;
+}
+
+static u32 ena_flow_hash_to_flow_type(u16 hash_fields)
+{
+	u32 data = 0;
+
+	if (hash_fields & ENA_ADMIN_RSS_L2_DA)
+		data |= RXH_L2DA;
+
+	if (hash_fields & ENA_ADMIN_RSS_L3_DA)
+		data |= RXH_IP_DST;
+
+	if (hash_fields & ENA_ADMIN_RSS_L3_SA)
+		data |= RXH_IP_SRC;
+
+	if (hash_fields & ENA_ADMIN_RSS_L4_DP)
+		data |= RXH_L4_B_2_3;
+
+	if (hash_fields & ENA_ADMIN_RSS_L4_SP)
+		data |= RXH_L4_B_0_1;
+
+	return data;
+}
+
+static u16 ena_flow_data_to_flow_hash(u32 hash_fields)
+{
+	u16 data = 0;
+
+	if (hash_fields & RXH_L2DA)
+		data |= ENA_ADMIN_RSS_L2_DA;
+
+	if (hash_fields & RXH_IP_DST)
+		data |= ENA_ADMIN_RSS_L3_DA;
+
+	if (hash_fields & RXH_IP_SRC)
+		data |= ENA_ADMIN_RSS_L3_SA;
+
+	if (hash_fields & RXH_L4_B_2_3)
+		data |= ENA_ADMIN_RSS_L4_DP;
+
+	if (hash_fields & RXH_L4_B_0_1)
+		data |= ENA_ADMIN_RSS_L4_SP;
+
+	return data;
+}
+
+static int ena_get_rss_hash(struct ena_com_dev *ena_dev,
+			    struct ethtool_rxnfc *cmd)
+{
+	enum ena_admin_flow_hash_proto proto;
+	u16 hash_fields;
+	int rc;
+
+	cmd->data = 0;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		proto = ENA_ADMIN_RSS_TCP4;
+		break;
+	case UDP_V4_FLOW:
+		proto = ENA_ADMIN_RSS_UDP4;
+		break;
+	case TCP_V6_FLOW:
+		proto = ENA_ADMIN_RSS_TCP6;
+		break;
+	case UDP_V6_FLOW:
+		proto = ENA_ADMIN_RSS_UDP6;
+		break;
+	case IPV4_FLOW:
+		proto = ENA_ADMIN_RSS_IP4;
+		break;
+	case IPV6_FLOW:
+		proto = ENA_ADMIN_RSS_IP6;
+		break;
+	case ETHER_FLOW:
+		proto = ENA_ADMIN_RSS_NOT_IP;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+
+	rc = ena_com_get_hash_ctrl(ena_dev, proto, &hash_fields);
+	if (rc) {
+		/* If device don't have permission, return unsupported */
+		if (rc == -EPERM)
+			rc = -EOPNOTSUPP;
+		return rc;
+	}
+
+	cmd->data = ena_flow_hash_to_flow_type(hash_fields);
+
+	return 0;
+}
+
+static int ena_set_rss_hash(struct ena_com_dev *ena_dev,
+			    struct ethtool_rxnfc *cmd)
+{
+	enum ena_admin_flow_hash_proto proto;
+	u16 hash_fields;
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		proto = ENA_ADMIN_RSS_TCP4;
+		break;
+	case UDP_V4_FLOW:
+		proto = ENA_ADMIN_RSS_UDP4;
+		break;
+	case TCP_V6_FLOW:
+		proto = ENA_ADMIN_RSS_TCP6;
+		break;
+	case UDP_V6_FLOW:
+		proto = ENA_ADMIN_RSS_UDP6;
+		break;
+	case IPV4_FLOW:
+		proto = ENA_ADMIN_RSS_IP4;
+		break;
+	case IPV6_FLOW:
+		proto = ENA_ADMIN_RSS_IP6;
+		break;
+	case ETHER_FLOW:
+		proto = ENA_ADMIN_RSS_NOT_IP;
+		break;
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+		return -EOPNOTSUPP;
+	default:
+		return -EINVAL;
+	}
+
+	hash_fields = ena_flow_data_to_flow_hash(cmd->data);
+
+	return ena_com_fill_hash_ctrl(ena_dev, proto, hash_fields);
+}
+
+static int ena_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	switch (info->cmd) {
+	case ETHTOOL_SRXFH:
+		rc = ena_set_rss_hash(adapter->ena_dev, info);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+	default:
+		netif_err(adapter, drv, netdev,
+			  "Command parameter %d is not supported\n", info->cmd);
+		rc = -EOPNOTSUPP;
+	}
+
+	return (rc == -EPERM) ? -EOPNOTSUPP : rc;
+}
+
+static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
+			 u32 *rules)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int rc = 0;
+
+	switch (info->cmd) {
+	case ETHTOOL_GRXRINGS:
+		info->data = adapter->num_queues;
+		rc = 0;
+		break;
+	case ETHTOOL_GRXFH:
+		rc = ena_get_rss_hash(adapter->ena_dev, info);
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+	default:
+		netif_err(adapter, drv, netdev,
+			  "Command parameter %d is not supported\n", info->cmd);
+		rc = -EOPNOTSUPP;
+	}
+
+	return (rc == -EPERM) ? -EOPNOTSUPP : rc;
+}
+
+static u32 ena_get_rxfh_indir_size(struct net_device *netdev)
+{
+	return ENA_RX_RSS_TABLE_SIZE;
+}
+
+static u32 ena_get_rxfh_key_size(struct net_device *netdev)
+{
+	return ENA_HASH_KEY_SIZE;
+}
+
+static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			u8 *hfunc)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	enum ena_admin_hash_functions ena_func;
+	u8 func;
+	int rc;
+
+	rc = ena_com_indirect_table_get(adapter->ena_dev, indir);
+	if (rc)
+		return rc;
+
+	rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key);
+	if (rc)
+		return rc;
+
+	switch (ena_func) {
+	case ENA_ADMIN_TOEPLITZ:
+		func = ETH_RSS_HASH_TOP;
+		break;
+	case ENA_ADMIN_CRC32:
+		func = ETH_RSS_HASH_XOR;
+		break;
+	default:
+		netif_err(adapter, drv, netdev,
+			  "Command parameter is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (hfunc)
+		*hfunc = func;
+
+	return rc;
+}
+
+static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key, const u8 hfunc)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	enum ena_admin_hash_functions func;
+	int rc, i;
+
+	if (indir) {
+		for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+			rc = ena_com_indirect_table_fill_entry(ena_dev,
+							       ENA_IO_RXQ_IDX(indir[i]),
+							       i);
+			if (unlikely(rc)) {
+				netif_err(adapter, drv, netdev,
+					  "Cannot fill indirect table (index is too large)\n");
+				return rc;
+			}
+		}
+
+		rc = ena_com_indirect_table_set(ena_dev);
+		if (rc) {
+			netif_err(adapter, drv, netdev,
+				  "Cannot set indirect table\n");
+			return rc == -EPERM ? -EOPNOTSUPP : rc;
+		}
+	}
+
+	switch (hfunc) {
+	case ETH_RSS_HASH_TOP:
+		func = ENA_ADMIN_TOEPLITZ;
+		break;
+	case ETH_RSS_HASH_XOR:
+		func = ENA_ADMIN_CRC32;
+		break;
+	default:
+		netif_err(adapter, drv, netdev, "Unsupported hfunc %d\n",
+			  hfunc);
+		return -EOPNOTSUPP;
+	}
+
+	if (key) {
+		rc = ena_com_fill_hash_function(ena_dev, func, key,
+						ENA_HASH_KEY_SIZE,
+						0xFFFFFFFF);
+		if (unlikely(rc)) {
+			netif_err(adapter, drv, netdev, "Cannot fill key\n");
+			return rc == -EPERM ? -EOPNOTSUPP : rc;
+		}
+	}
+
+	return 0;
+}
+
+static void ena_get_channels(struct net_device *netdev,
+			     struct ethtool_channels *channels)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	channels->max_rx = ENA_MAX_NUM_IO_QUEUES;
+	channels->max_tx = ENA_MAX_NUM_IO_QUEUES;
+	channels->max_other = 0;
+	channels->max_combined = 0;
+	channels->rx_count = adapter->num_queues;
+	channels->tx_count = adapter->num_queues;
+	channels->other_count = 0;
+	channels->combined_count = 0;
+}
+
+static int ena_get_tunable(struct net_device *netdev,
+			   const struct ethtool_tunable *tuna, void *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = adapter->rx_copybreak;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int ena_set_tunable(struct net_device *netdev,
+			   const struct ethtool_tunable *tuna,
+			   const void *data)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+	u32 len;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		len = *(u32 *)data;
+		if (len > adapter->netdev->mtu) {
+			ret = -EINVAL;
+			break;
+		}
+		adapter->rx_copybreak = len;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static const struct ethtool_ops ena_ethtool_ops = {
+	.get_link_ksettings	= ena_get_link_ksettings,
+	.get_drvinfo		= ena_get_drvinfo,
+	.get_msglevel		= ena_get_msglevel,
+	.set_msglevel		= ena_set_msglevel,
+	.get_link		= ethtool_op_get_link,
+	.get_coalesce		= ena_get_coalesce,
+	.set_coalesce		= ena_set_coalesce,
+	.get_ringparam		= ena_get_ringparam,
+	.get_sset_count         = ena_get_sset_count,
+	.get_strings		= ena_get_strings,
+	.get_ethtool_stats      = ena_get_ethtool_stats,
+	.get_rxnfc		= ena_get_rxnfc,
+	.set_rxnfc		= ena_set_rxnfc,
+	.get_rxfh_indir_size    = ena_get_rxfh_indir_size,
+	.get_rxfh_key_size	= ena_get_rxfh_key_size,
+	.get_rxfh		= ena_get_rxfh,
+	.set_rxfh		= ena_set_rxfh,
+	.get_channels		= ena_get_channels,
+	.get_tunable		= ena_get_tunable,
+	.set_tunable		= ena_set_tunable,
+};
+
+void ena_set_ethtool_ops(struct net_device *netdev)
+{
+	netdev->ethtool_ops = &ena_ethtool_ops;
+}
+
+static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf)
+{
+	struct net_device *netdev = adapter->netdev;
+	u8 *strings_buf;
+	u64 *data_buf;
+	int strings_num;
+	int i, rc;
+
+	strings_num = ena_get_sset_count(netdev, ETH_SS_STATS);
+	if (strings_num <= 0) {
+		netif_err(adapter, drv, netdev, "Can't get stats num\n");
+		return;
+	}
+
+	strings_buf = devm_kzalloc(&adapter->pdev->dev,
+				   strings_num * ETH_GSTRING_LEN,
+				   GFP_ATOMIC);
+	if (!strings_buf) {
+		netif_err(adapter, drv, netdev,
+			  "failed to alloc strings_buf\n");
+		return;
+	}
+
+	data_buf = devm_kzalloc(&adapter->pdev->dev,
+				strings_num * sizeof(u64),
+				GFP_ATOMIC);
+	if (!data_buf) {
+		netif_err(adapter, drv, netdev,
+			  "failed to allocate data buf\n");
+		devm_kfree(&adapter->pdev->dev, strings_buf);
+		return;
+	}
+
+	ena_get_strings(netdev, ETH_SS_STATS, strings_buf);
+	ena_get_ethtool_stats(netdev, NULL, data_buf);
+
+	/* If there is a buffer, dump stats, otherwise print them to dmesg */
+	if (buf)
+		for (i = 0; i < strings_num; i++) {
+			rc = snprintf(buf, ETH_GSTRING_LEN + sizeof(u64),
+				      "%s %llu\n",
+				      strings_buf + i * ETH_GSTRING_LEN,
+				      data_buf[i]);
+			buf += rc;
+		}
+	else
+		for (i = 0; i < strings_num; i++)
+			netif_err(adapter, drv, netdev, "%s: %llu\n",
+				  strings_buf + i * ETH_GSTRING_LEN,
+				  data_buf[i]);
+
+	devm_kfree(&adapter->pdev->dev, strings_buf);
+	devm_kfree(&adapter->pdev->dev, data_buf);
+}
+
+void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf)
+{
+	if (!buf)
+		return;
+
+	ena_dump_stats_ex(adapter, buf);
+}
+
+void ena_dump_stats_to_dmesg(struct ena_adapter *adapter)
+{
+	ena_dump_stats_ex(adapter, NULL);
+}
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
new file mode 100644
index 0000000..bfeaec5
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -0,0 +1,3272 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif /* CONFIG_RFS_ACCEL */
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/numa.h>
+#include <linux/pci.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <net/ip.h>
+
+#include "ena_netdev.h"
+#include "ena_pci_id_tbl.h"
+
+static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
+
+MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
+MODULE_DESCRIPTION(DEVICE_NAME);
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_MODULE_VERSION);
+
+/* Time in jiffies before concluding the transmitter is hung. */
+#define TX_TIMEOUT  (5 * HZ)
+
+#define ENA_NAPI_BUDGET 64
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
+		NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static struct ena_aenq_handlers aenq_handlers;
+
+static struct workqueue_struct *ena_wq;
+
+MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
+
+static int ena_rss_init_default(struct ena_adapter *adapter);
+
+static void ena_tx_timeout(struct net_device *dev)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.tx_timeout++;
+	u64_stats_update_end(&adapter->syncp);
+
+	netif_err(adapter, tx_err, dev, "Transmit time out\n");
+
+	/* Change the state of the device to trigger reset */
+	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+}
+
+static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		adapter->rx_ring[i].mtu = mtu;
+}
+
+static int ena_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+	int ret;
+
+	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
+		netif_err(adapter, drv, dev,
+			  "Invalid MTU setting. new_mtu: %d\n", new_mtu);
+
+		return -EINVAL;
+	}
+
+	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
+	if (!ret) {
+		netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
+		update_rx_ring_mtu(adapter, new_mtu);
+		dev->mtu = new_mtu;
+	} else {
+		netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
+			  new_mtu);
+	}
+
+	return ret;
+}
+
+static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
+{
+#ifdef CONFIG_RFS_ACCEL
+	u32 i;
+	int rc;
+
+	adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues);
+	if (!adapter->netdev->rx_cpu_rmap)
+		return -ENOMEM;
+	for (i = 0; i < adapter->num_queues; i++) {
+		int irq_idx = ENA_IO_IRQ_IDX(i);
+
+		rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap,
+				      adapter->msix_entries[irq_idx].vector);
+		if (rc) {
+			free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
+			adapter->netdev->rx_cpu_rmap = NULL;
+			return rc;
+		}
+	}
+#endif /* CONFIG_RFS_ACCEL */
+	return 0;
+}
+
+static void ena_init_io_rings_common(struct ena_adapter *adapter,
+				     struct ena_ring *ring, u16 qid)
+{
+	ring->qid = qid;
+	ring->pdev = adapter->pdev;
+	ring->dev = &adapter->pdev->dev;
+	ring->netdev = adapter->netdev;
+	ring->napi = &adapter->ena_napi[qid].napi;
+	ring->adapter = adapter;
+	ring->ena_dev = adapter->ena_dev;
+	ring->per_napi_packets = 0;
+	ring->per_napi_bytes = 0;
+	ring->cpu = 0;
+	u64_stats_init(&ring->syncp);
+}
+
+static void ena_init_io_rings(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev;
+	struct ena_ring *txr, *rxr;
+	int i;
+
+	ena_dev = adapter->ena_dev;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		txr = &adapter->tx_ring[i];
+		rxr = &adapter->rx_ring[i];
+
+		/* TX/RX common ring state */
+		ena_init_io_rings_common(adapter, txr, i);
+		ena_init_io_rings_common(adapter, rxr, i);
+
+		/* TX specific ring state */
+		txr->ring_size = adapter->tx_ring_size;
+		txr->tx_max_header_size = ena_dev->tx_max_header_size;
+		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
+		txr->sgl_size = adapter->max_tx_sgl_size;
+		txr->smoothed_interval =
+			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
+
+		/* RX specific ring state */
+		rxr->ring_size = adapter->rx_ring_size;
+		rxr->rx_copybreak = adapter->rx_copybreak;
+		rxr->sgl_size = adapter->max_rx_sgl_size;
+		rxr->smoothed_interval =
+			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+	}
+}
+
+/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
+{
+	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
+	int size, i, node;
+
+	if (tx_ring->tx_buffer_info) {
+		netif_err(adapter, ifup,
+			  adapter->netdev, "tx_buffer_info info is not NULL");
+		return -EEXIST;
+	}
+
+	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
+	node = cpu_to_node(ena_irq->cpu);
+
+	tx_ring->tx_buffer_info = vzalloc_node(size, node);
+	if (!tx_ring->tx_buffer_info) {
+		tx_ring->tx_buffer_info = vzalloc(size);
+		if (!tx_ring->tx_buffer_info)
+			return -ENOMEM;
+	}
+
+	size = sizeof(u16) * tx_ring->ring_size;
+	tx_ring->free_tx_ids = vzalloc_node(size, node);
+	if (!tx_ring->free_tx_ids) {
+		tx_ring->free_tx_ids = vzalloc(size);
+		if (!tx_ring->free_tx_ids) {
+			vfree(tx_ring->tx_buffer_info);
+			return -ENOMEM;
+		}
+	}
+
+	/* Req id ring for TX out of order completions */
+	for (i = 0; i < tx_ring->ring_size; i++)
+		tx_ring->free_tx_ids[i] = i;
+
+	/* Reset tx statistics */
+	memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+	tx_ring->cpu = ena_irq->cpu;
+	return 0;
+}
+
+/* ena_free_tx_resources - Free I/O Tx Resources per Queue
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all transmit software resources
+ */
+static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
+{
+	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+
+	vfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	vfree(tx_ring->free_tx_ids);
+	tx_ring->free_tx_ids = NULL;
+}
+
+/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
+ * @adapter: private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
+{
+	int i, rc = 0;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rc = ena_setup_tx_resources(adapter, i);
+		if (rc)
+			goto err_setup_tx;
+	}
+
+	return 0;
+
+err_setup_tx:
+
+	netif_err(adapter, ifup, adapter->netdev,
+		  "Tx queue %d: allocation failed\n", i);
+
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ena_free_tx_resources(adapter, i);
+	return rc;
+}
+
+/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_free_tx_resources(adapter, i);
+}
+
+/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Returns 0 on success, negative on failure
+ */
+static int ena_setup_rx_resources(struct ena_adapter *adapter,
+				  u32 qid)
+{
+	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+	struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
+	int size, node;
+
+	if (rx_ring->rx_buffer_info) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "rx_buffer_info is not NULL");
+		return -EEXIST;
+	}
+
+	/* alloc extra element so in rx path
+	 * we can always prefetch rx_info + 1
+	 */
+	size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
+	node = cpu_to_node(ena_irq->cpu);
+
+	rx_ring->rx_buffer_info = vzalloc_node(size, node);
+	if (!rx_ring->rx_buffer_info) {
+		rx_ring->rx_buffer_info = vzalloc(size);
+		if (!rx_ring->rx_buffer_info)
+			return -ENOMEM;
+	}
+
+	/* Reset rx statistics */
+	memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
+
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+	rx_ring->cpu = ena_irq->cpu;
+
+	return 0;
+}
+
+/* ena_free_rx_resources - Free I/O Rx Resources
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all receive software resources
+ */
+static void ena_free_rx_resources(struct ena_adapter *adapter,
+				  u32 qid)
+{
+	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+
+	vfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+}
+
+/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
+ * @adapter: board private structure
+ *
+ * Return 0 on success, negative on failure
+ */
+static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
+{
+	int i, rc = 0;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rc = ena_setup_rx_resources(adapter, i);
+		if (rc)
+			goto err_setup_rx;
+	}
+
+	return 0;
+
+err_setup_rx:
+
+	netif_err(adapter, ifup, adapter->netdev,
+		  "Rx queue %d: allocation failed\n", i);
+
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		ena_free_rx_resources(adapter, i);
+	return rc;
+}
+
+/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all receive software resources
+ */
+static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_free_rx_resources(adapter, i);
+}
+
+static inline int ena_alloc_rx_page(struct ena_ring *rx_ring,
+				    struct ena_rx_buffer *rx_info, gfp_t gfp)
+{
+	struct ena_com_buf *ena_buf;
+	struct page *page;
+	dma_addr_t dma;
+
+	/* if previous allocated page is not used */
+	if (unlikely(rx_info->page))
+		return 0;
+
+	page = alloc_page(gfp);
+	if (unlikely(!page)) {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.page_alloc_fail++;
+		u64_stats_update_end(&rx_ring->syncp);
+		return -ENOMEM;
+	}
+
+	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE,
+			   DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.dma_mapping_err++;
+		u64_stats_update_end(&rx_ring->syncp);
+
+		__free_page(page);
+		return -EIO;
+	}
+	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+		  "alloc page %p, rx_info %p\n", page, rx_info);
+
+	rx_info->page = page;
+	rx_info->page_offset = 0;
+	ena_buf = &rx_info->ena_buf;
+	ena_buf->paddr = dma;
+	ena_buf->len = PAGE_SIZE;
+
+	return 0;
+}
+
+static void ena_free_rx_page(struct ena_ring *rx_ring,
+			     struct ena_rx_buffer *rx_info)
+{
+	struct page *page = rx_info->page;
+	struct ena_com_buf *ena_buf = &rx_info->ena_buf;
+
+	if (unlikely(!page)) {
+		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+			   "Trying to free unallocated buffer\n");
+		return;
+	}
+
+	dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE,
+		       DMA_FROM_DEVICE);
+
+	__free_page(page);
+	rx_info->page = NULL;
+}
+
+static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
+{
+	u16 next_to_use;
+	u32 i;
+	int rc;
+
+	next_to_use = rx_ring->next_to_use;
+
+	for (i = 0; i < num; i++) {
+		struct ena_rx_buffer *rx_info =
+			&rx_ring->rx_buffer_info[next_to_use];
+
+		rc = ena_alloc_rx_page(rx_ring, rx_info,
+				       __GFP_COLD | GFP_ATOMIC | __GFP_COMP);
+		if (unlikely(rc < 0)) {
+			netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
+				   "failed to alloc buffer for rx queue %d\n",
+				   rx_ring->qid);
+			break;
+		}
+		rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
+						&rx_info->ena_buf,
+						next_to_use);
+		if (unlikely(rc)) {
+			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
+				   "failed to add buffer for rx queue %d\n",
+				   rx_ring->qid);
+			break;
+		}
+		next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
+						   rx_ring->ring_size);
+	}
+
+	if (unlikely(i < num)) {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.refil_partial++;
+		u64_stats_update_end(&rx_ring->syncp);
+		netdev_warn(rx_ring->netdev,
+			    "refilled rx qid %d with only %d buffers (from %d)\n",
+			    rx_ring->qid, i, num);
+	}
+
+	if (likely(i)) {
+		/* Add memory barrier to make sure the desc were written before
+		 * issue a doorbell
+		 */
+		wmb();
+		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+	}
+
+	rx_ring->next_to_use = next_to_use;
+
+	return i;
+}
+
+static void ena_free_rx_bufs(struct ena_adapter *adapter,
+			     u32 qid)
+{
+	struct ena_ring *rx_ring = &adapter->rx_ring[qid];
+	u32 i;
+
+	for (i = 0; i < rx_ring->ring_size; i++) {
+		struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
+
+		if (rx_info->page)
+			ena_free_rx_page(rx_ring, rx_info);
+	}
+}
+
+/* ena_refill_all_rx_bufs - allocate all queues Rx buffers
+ * @adapter: board private structure
+ *
+ */
+static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
+{
+	struct ena_ring *rx_ring;
+	int i, rc, bufs_num;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rx_ring = &adapter->rx_ring[i];
+		bufs_num = rx_ring->ring_size - 1;
+		rc = ena_refill_rx_bufs(rx_ring, bufs_num);
+
+		if (unlikely(rc != bufs_num))
+			netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
+				   "refilling Queue %d failed. allocated %d buffers from: %d\n",
+				   i, rc, bufs_num);
+	}
+}
+
+static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_free_rx_bufs(adapter, i);
+}
+
+/* ena_free_tx_bufs - Free Tx Buffers per Queue
+ * @tx_ring: TX ring for which buffers be freed
+ */
+static void ena_free_tx_bufs(struct ena_ring *tx_ring)
+{
+	u32 i;
+
+	for (i = 0; i < tx_ring->ring_size; i++) {
+		struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
+		struct ena_com_buf *ena_buf;
+		int nr_frags;
+		int j;
+
+		if (!tx_info->skb)
+			continue;
+
+		netdev_notice(tx_ring->netdev,
+			      "free uncompleted tx skb qid %d idx 0x%x\n",
+			      tx_ring->qid, i);
+
+		ena_buf = tx_info->bufs;
+		dma_unmap_single(tx_ring->dev,
+				 ena_buf->paddr,
+				 ena_buf->len,
+				 DMA_TO_DEVICE);
+
+		/* unmap remaining mapped pages */
+		nr_frags = tx_info->num_of_bufs - 1;
+		for (j = 0; j < nr_frags; j++) {
+			ena_buf++;
+			dma_unmap_page(tx_ring->dev,
+				       ena_buf->paddr,
+				       ena_buf->len,
+				       DMA_TO_DEVICE);
+		}
+
+		dev_kfree_skb_any(tx_info->skb);
+	}
+	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+						  tx_ring->qid));
+}
+
+static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
+{
+	struct ena_ring *tx_ring;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		tx_ring = &adapter->tx_ring[i];
+		ena_free_tx_bufs(tx_ring);
+	}
+}
+
+static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
+{
+	u16 ena_qid;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		ena_qid = ENA_IO_TXQ_IDX(i);
+		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
+	}
+}
+
+static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
+{
+	u16 ena_qid;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		ena_qid = ENA_IO_RXQ_IDX(i);
+		ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
+	}
+}
+
+static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
+{
+	ena_destroy_all_tx_queues(adapter);
+	ena_destroy_all_rx_queues(adapter);
+}
+
+static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
+{
+	struct ena_tx_buffer *tx_info = NULL;
+
+	if (likely(req_id < tx_ring->ring_size)) {
+		tx_info = &tx_ring->tx_buffer_info[req_id];
+		if (likely(tx_info->skb))
+			return 0;
+	}
+
+	if (tx_info)
+		netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
+			  "tx_info doesn't have valid skb\n");
+	else
+		netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
+			  "Invalid req_id: %hu\n", req_id);
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.bad_req_id++;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	/* Trigger device reset */
+	set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
+	return -EFAULT;
+}
+
+static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
+{
+	struct netdev_queue *txq;
+	bool above_thresh;
+	u32 tx_bytes = 0;
+	u32 total_done = 0;
+	u16 next_to_clean;
+	u16 req_id;
+	int tx_pkts = 0;
+	int rc;
+
+	next_to_clean = tx_ring->next_to_clean;
+	txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
+
+	while (tx_pkts < budget) {
+		struct ena_tx_buffer *tx_info;
+		struct sk_buff *skb;
+		struct ena_com_buf *ena_buf;
+		int i, nr_frags;
+
+		rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
+						&req_id);
+		if (rc)
+			break;
+
+		rc = validate_tx_req_id(tx_ring, req_id);
+		if (rc)
+			break;
+
+		tx_info = &tx_ring->tx_buffer_info[req_id];
+		skb = tx_info->skb;
+
+		/* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
+		prefetch(&skb->end);
+
+		tx_info->skb = NULL;
+		tx_info->last_jiffies = 0;
+
+		if (likely(tx_info->num_of_bufs != 0)) {
+			ena_buf = tx_info->bufs;
+
+			dma_unmap_single(tx_ring->dev,
+					 dma_unmap_addr(ena_buf, paddr),
+					 dma_unmap_len(ena_buf, len),
+					 DMA_TO_DEVICE);
+
+			/* unmap remaining mapped pages */
+			nr_frags = tx_info->num_of_bufs - 1;
+			for (i = 0; i < nr_frags; i++) {
+				ena_buf++;
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(ena_buf, paddr),
+					       dma_unmap_len(ena_buf, len),
+					       DMA_TO_DEVICE);
+			}
+		}
+
+		netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+			  "tx_poll: q %d skb %p completed\n", tx_ring->qid,
+			  skb);
+
+		tx_bytes += skb->len;
+		dev_kfree_skb(skb);
+		tx_pkts++;
+		total_done += tx_info->tx_descs;
+
+		tx_ring->free_tx_ids[next_to_clean] = req_id;
+		next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+						     tx_ring->ring_size);
+	}
+
+	tx_ring->next_to_clean = next_to_clean;
+	ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
+	ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq);
+
+	netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+
+	netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
+		  "tx_poll: q %d done. total pkts: %d\n",
+		  tx_ring->qid, tx_pkts);
+
+	/* need to make the rings circular update visible to
+	 * ena_start_xmit() before checking for netif_queue_stopped().
+	 */
+	smp_mb();
+
+	above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
+		ENA_TX_WAKEUP_THRESH;
+	if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
+		__netif_tx_lock(txq, smp_processor_id());
+		above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) >
+			ENA_TX_WAKEUP_THRESH;
+		if (netif_tx_queue_stopped(txq) && above_thresh) {
+			netif_tx_wake_queue(txq);
+			u64_stats_update_begin(&tx_ring->syncp);
+			tx_ring->tx_stats.queue_wakeup++;
+			u64_stats_update_end(&tx_ring->syncp);
+		}
+		__netif_tx_unlock(txq);
+	}
+
+	tx_ring->per_napi_bytes += tx_bytes;
+	tx_ring->per_napi_packets += tx_pkts;
+
+	return tx_pkts;
+}
+
+static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
+				  struct ena_com_rx_buf_info *ena_bufs,
+				  u32 descs,
+				  u16 *next_to_clean)
+{
+	struct sk_buff *skb;
+	struct ena_rx_buffer *rx_info =
+		&rx_ring->rx_buffer_info[*next_to_clean];
+	u32 len;
+	u32 buf = 0;
+	void *va;
+
+	len = ena_bufs[0].len;
+	if (unlikely(!rx_info->page)) {
+		netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+			  "Page is NULL\n");
+		return NULL;
+	}
+
+	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+		  "rx_info %p page %p\n",
+		  rx_info, rx_info->page);
+
+	/* save virt address of first buffer */
+	va = page_address(rx_info->page) + rx_info->page_offset;
+	prefetch(va + NET_IP_ALIGN);
+
+	if (len <= rx_ring->rx_copybreak) {
+		skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+						rx_ring->rx_copybreak);
+		if (unlikely(!skb)) {
+			u64_stats_update_begin(&rx_ring->syncp);
+			rx_ring->rx_stats.skb_alloc_fail++;
+			u64_stats_update_end(&rx_ring->syncp);
+			netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+				  "Failed to allocate skb\n");
+			return NULL;
+		}
+
+		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+			  "rx allocated small packet. len %d. data_len %d\n",
+			  skb->len, skb->data_len);
+
+		/* sync this buffer for CPU use */
+		dma_sync_single_for_cpu(rx_ring->dev,
+					dma_unmap_addr(&rx_info->ena_buf, paddr),
+					len,
+					DMA_FROM_DEVICE);
+		skb_copy_to_linear_data(skb, va, len);
+		dma_sync_single_for_device(rx_ring->dev,
+					   dma_unmap_addr(&rx_info->ena_buf, paddr),
+					   len,
+					   DMA_FROM_DEVICE);
+
+		skb_put(skb, len);
+		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+		*next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
+						     rx_ring->ring_size);
+		return skb;
+	}
+
+	skb = napi_get_frags(rx_ring->napi);
+	if (unlikely(!skb)) {
+		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+			  "Failed allocating skb\n");
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.skb_alloc_fail++;
+		u64_stats_update_end(&rx_ring->syncp);
+		return NULL;
+	}
+
+	do {
+		dma_unmap_page(rx_ring->dev,
+			       dma_unmap_addr(&rx_info->ena_buf, paddr),
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
+				rx_info->page_offset, len, PAGE_SIZE);
+
+		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+			  "rx skb updated. len %d. data_len %d\n",
+			  skb->len, skb->data_len);
+
+		rx_info->page = NULL;
+		*next_to_clean =
+			ENA_RX_RING_IDX_NEXT(*next_to_clean,
+					     rx_ring->ring_size);
+		if (likely(--descs == 0))
+			break;
+		rx_info = &rx_ring->rx_buffer_info[*next_to_clean];
+		len = ena_bufs[++buf].len;
+	} while (1);
+
+	return skb;
+}
+
+/* ena_rx_checksum - indicate in skb if hw indicated a good cksum
+ * @adapter: structure containing adapter specific data
+ * @ena_rx_ctx: received packet context/metadata
+ * @skb: skb currently being received and modified
+ */
+static inline void ena_rx_checksum(struct ena_ring *rx_ring,
+				   struct ena_com_rx_ctx *ena_rx_ctx,
+				   struct sk_buff *skb)
+{
+	/* Rx csum disabled */
+	if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
+		skb->ip_summed = CHECKSUM_NONE;
+		return;
+	}
+
+	/* For fragmented packets the checksum isn't valid */
+	if (ena_rx_ctx->frag) {
+		skb->ip_summed = CHECKSUM_NONE;
+		return;
+	}
+
+	/* if IP and error */
+	if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
+		     (ena_rx_ctx->l3_csum_err))) {
+		/* ipv4 checksum error */
+		skb->ip_summed = CHECKSUM_NONE;
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.bad_csum++;
+		u64_stats_update_end(&rx_ring->syncp);
+		netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+			  "RX IPv4 header checksum error\n");
+		return;
+	}
+
+	/* if TCP/UDP */
+	if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+		   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
+		if (unlikely(ena_rx_ctx->l4_csum_err)) {
+			/* TCP/UDP checksum error */
+			u64_stats_update_begin(&rx_ring->syncp);
+			rx_ring->rx_stats.bad_csum++;
+			u64_stats_update_end(&rx_ring->syncp);
+			netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
+				  "RX L4 checksum error\n");
+			skb->ip_summed = CHECKSUM_NONE;
+			return;
+		}
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+}
+
+static void ena_set_rx_hash(struct ena_ring *rx_ring,
+			    struct ena_com_rx_ctx *ena_rx_ctx,
+			    struct sk_buff *skb)
+{
+	enum pkt_hash_types hash_type;
+
+	if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
+		if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
+			   (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
+
+			hash_type = PKT_HASH_TYPE_L4;
+		else
+			hash_type = PKT_HASH_TYPE_NONE;
+
+		/* Override hash type if the packet is fragmented */
+		if (ena_rx_ctx->frag)
+			hash_type = PKT_HASH_TYPE_NONE;
+
+		skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
+	}
+}
+
+/* ena_clean_rx_irq - Cleanup RX irq
+ * @rx_ring: RX ring to clean
+ * @napi: napi handler
+ * @budget: how many packets driver is allowed to clean
+ *
+ * Returns the number of cleaned buffers.
+ */
+static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
+			    u32 budget)
+{
+	u16 next_to_clean = rx_ring->next_to_clean;
+	u32 res_budget, work_done;
+
+	struct ena_com_rx_ctx ena_rx_ctx;
+	struct ena_adapter *adapter;
+	struct sk_buff *skb;
+	int refill_required;
+	int refill_threshold;
+	int rc = 0;
+	int total_len = 0;
+	int rx_copybreak_pkt = 0;
+
+	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+		  "%s qid %d\n", __func__, rx_ring->qid);
+	res_budget = budget;
+
+	do {
+		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
+		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
+		ena_rx_ctx.descs = 0;
+		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
+				    rx_ring->ena_com_io_sq,
+				    &ena_rx_ctx);
+		if (unlikely(rc))
+			goto error;
+
+		if (unlikely(ena_rx_ctx.descs == 0))
+			break;
+
+		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
+			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
+			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
+			  ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
+
+		/* allocate skb and fill it */
+		skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
+				 &next_to_clean);
+
+		/* exit if we failed to retrieve a buffer */
+		if (unlikely(!skb)) {
+			next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean,
+							    ena_rx_ctx.descs,
+							    rx_ring->ring_size);
+			break;
+		}
+
+		ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
+
+		ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
+
+		skb_record_rx_queue(skb, rx_ring->qid);
+
+		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
+			total_len += rx_ring->ena_bufs[0].len;
+			rx_copybreak_pkt++;
+			napi_gro_receive(napi, skb);
+		} else {
+			total_len += skb->len;
+			napi_gro_frags(napi);
+		}
+
+		res_budget--;
+	} while (likely(res_budget));
+
+	work_done = budget - res_budget;
+	rx_ring->per_napi_bytes += total_len;
+	rx_ring->per_napi_packets += work_done;
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->rx_stats.bytes += total_len;
+	rx_ring->rx_stats.cnt += work_done;
+	rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
+	u64_stats_update_end(&rx_ring->syncp);
+
+	rx_ring->next_to_clean = next_to_clean;
+
+	refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq);
+	refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER;
+
+	/* Optimization, try to batch new rx buffers */
+	if (refill_required > refill_threshold) {
+		ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq);
+		ena_refill_rx_bufs(rx_ring, refill_required);
+	}
+
+	return work_done;
+
+error:
+	adapter = netdev_priv(rx_ring->netdev);
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->rx_stats.bad_desc_num++;
+	u64_stats_update_end(&rx_ring->syncp);
+
+	/* Too many desc from the device. Trigger reset */
+	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+
+	return 0;
+}
+
+inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring,
+				       struct ena_ring *tx_ring)
+{
+	/* We apply adaptive moderation on Rx path only.
+	 * Tx uses static interrupt moderation.
+	 */
+	ena_com_calculate_interrupt_delay(rx_ring->ena_dev,
+					  rx_ring->per_napi_packets,
+					  rx_ring->per_napi_bytes,
+					  &rx_ring->smoothed_interval,
+					  &rx_ring->moder_tbl_idx);
+
+	/* Reset per napi packets/bytes */
+	tx_ring->per_napi_packets = 0;
+	tx_ring->per_napi_bytes = 0;
+	rx_ring->per_napi_packets = 0;
+	rx_ring->per_napi_bytes = 0;
+}
+
+static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+					     struct ena_ring *rx_ring)
+{
+	int cpu = get_cpu();
+	int numa_node;
+
+	/* Check only one ring since the 2 rings are running on the same cpu */
+	if (likely(tx_ring->cpu == cpu))
+		goto out;
+
+	numa_node = cpu_to_node(cpu);
+	put_cpu();
+
+	if (numa_node != NUMA_NO_NODE) {
+		ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
+		ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
+	}
+
+	tx_ring->cpu = cpu;
+	rx_ring->cpu = cpu;
+
+	return;
+out:
+	put_cpu();
+}
+
+static int ena_io_poll(struct napi_struct *napi, int budget)
+{
+	struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+	struct ena_ring *tx_ring, *rx_ring;
+	struct ena_eth_io_intr_reg intr_reg;
+
+	u32 tx_work_done;
+	u32 rx_work_done;
+	int tx_budget;
+	int napi_comp_call = 0;
+	int ret;
+
+	tx_ring = ena_napi->tx_ring;
+	rx_ring = ena_napi->rx_ring;
+
+	tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
+		napi_complete_done(napi, 0);
+		return 0;
+	}
+
+	tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
+	rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+
+	if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
+		napi_complete_done(napi, rx_work_done);
+
+		napi_comp_call = 1;
+		/* Tx and Rx share the same interrupt vector */
+		if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
+			ena_adjust_intr_moderation(rx_ring, tx_ring);
+
+		/* Update intr register: rx intr delay, tx intr delay and
+		 * interrupt unmask
+		 */
+		ena_com_update_intr_reg(&intr_reg,
+					rx_ring->smoothed_interval,
+					tx_ring->smoothed_interval,
+					true);
+
+		/* It is a shared MSI-X. Tx and Rx CQ have pointer to it.
+		 * So we use one of them to reach the intr reg
+		 */
+		ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+
+		ena_update_ring_numa_node(tx_ring, rx_ring);
+
+		ret = rx_work_done;
+	} else {
+		ret = budget;
+	}
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.napi_comp += napi_comp_call;
+	tx_ring->tx_stats.tx_poll++;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	return ret;
+}
+
+static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)data;
+
+	ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
+
+	/* Don't call the aenq handler before probe is done */
+	if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
+		ena_com_aenq_intr_handler(adapter->ena_dev, data);
+
+	return IRQ_HANDLED;
+}
+
+/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
+ * @irq: interrupt number
+ * @data: pointer to a network interface private napi device structure
+ */
+static irqreturn_t ena_intr_msix_io(int irq, void *data)
+{
+	struct ena_napi *ena_napi = data;
+
+	napi_schedule(&ena_napi->napi);
+
+	return IRQ_HANDLED;
+}
+
+static int ena_enable_msix(struct ena_adapter *adapter, int num_queues)
+{
+	int i, msix_vecs, rc;
+
+	if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
+		netif_err(adapter, probe, adapter->netdev,
+			  "Error, MSI-X is already enabled\n");
+		return -EPERM;
+	}
+
+	/* Reserved the max msix vectors we might need */
+	msix_vecs = ENA_MAX_MSIX_VEC(num_queues);
+
+	netif_dbg(adapter, probe, adapter->netdev,
+		  "trying to enable MSI-X, vectors %d\n", msix_vecs);
+
+	adapter->msix_entries = vzalloc(msix_vecs * sizeof(struct msix_entry));
+
+	if (!adapter->msix_entries)
+		return -ENOMEM;
+
+	for (i = 0; i < msix_vecs; i++)
+		adapter->msix_entries[i].entry = i;
+
+	rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, msix_vecs);
+	if (rc != 0) {
+		netif_err(adapter, probe, adapter->netdev,
+			  "Failed to enable MSI-X, vectors %d rc %d\n",
+			  msix_vecs, rc);
+		return -ENOSPC;
+	}
+
+	netif_dbg(adapter, probe, adapter->netdev, "enable MSI-X, vectors %d\n",
+		  msix_vecs);
+
+	if (msix_vecs >= 1) {
+		if (ena_init_rx_cpu_rmap(adapter))
+			netif_warn(adapter, probe, adapter->netdev,
+				   "Failed to map IRQs to CPUs\n");
+	}
+
+	adapter->msix_vecs = msix_vecs;
+	set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
+
+	return 0;
+}
+
+static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
+{
+	u32 cpu;
+
+	snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
+		 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
+		 pci_name(adapter->pdev));
+	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
+		ena_intr_msix_mgmnt;
+	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
+	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
+		adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector;
+	cpu = cpumask_first(cpu_online_mask);
+	adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
+	cpumask_set_cpu(cpu,
+			&adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
+}
+
+static void ena_setup_io_intr(struct ena_adapter *adapter)
+{
+	struct net_device *netdev;
+	int irq_idx, i, cpu;
+
+	netdev = adapter->netdev;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		irq_idx = ENA_IO_IRQ_IDX(i);
+		cpu = i % num_online_cpus();
+
+		snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
+			 "%s-Tx-Rx-%d", netdev->name, i);
+		adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
+		adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
+		adapter->irq_tbl[irq_idx].vector =
+			adapter->msix_entries[irq_idx].vector;
+		adapter->irq_tbl[irq_idx].cpu = cpu;
+
+		cpumask_set_cpu(cpu,
+				&adapter->irq_tbl[irq_idx].affinity_hint_mask);
+	}
+}
+
+static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
+{
+	unsigned long flags = 0;
+	struct ena_irq *irq;
+	int rc;
+
+	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
+	rc = request_irq(irq->vector, irq->handler, flags, irq->name,
+			 irq->data);
+	if (rc) {
+		netif_err(adapter, probe, adapter->netdev,
+			  "failed to request admin irq\n");
+		return rc;
+	}
+
+	netif_dbg(adapter, probe, adapter->netdev,
+		  "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
+		  irq->affinity_hint_mask.bits[0], irq->vector);
+
+	irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+
+	return rc;
+}
+
+static int ena_request_io_irq(struct ena_adapter *adapter)
+{
+	unsigned long flags = 0;
+	struct ena_irq *irq;
+	int rc = 0, i, k;
+
+	if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to request I/O IRQ: MSI-X is not enabled\n");
+		return -EINVAL;
+	}
+
+	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+		irq = &adapter->irq_tbl[i];
+		rc = request_irq(irq->vector, irq->handler, flags, irq->name,
+				 irq->data);
+		if (rc) {
+			netif_err(adapter, ifup, adapter->netdev,
+				  "Failed to request I/O IRQ. index %d rc %d\n",
+				   i, rc);
+			goto err;
+		}
+
+		netif_dbg(adapter, ifup, adapter->netdev,
+			  "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
+			  i, irq->affinity_hint_mask.bits[0], irq->vector);
+
+		irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
+	}
+
+	return rc;
+
+err:
+	for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
+		irq = &adapter->irq_tbl[k];
+		free_irq(irq->vector, irq->data);
+	}
+
+	return rc;
+}
+
+static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
+{
+	struct ena_irq *irq;
+
+	irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
+	synchronize_irq(irq->vector);
+	irq_set_affinity_hint(irq->vector, NULL);
+	free_irq(irq->vector, irq->data);
+}
+
+static void ena_free_io_irq(struct ena_adapter *adapter)
+{
+	struct ena_irq *irq;
+	int i;
+
+#ifdef CONFIG_RFS_ACCEL
+	if (adapter->msix_vecs >= 1) {
+		free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap);
+		adapter->netdev->rx_cpu_rmap = NULL;
+	}
+#endif /* CONFIG_RFS_ACCEL */
+
+	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) {
+		irq = &adapter->irq_tbl[i];
+		irq_set_affinity_hint(irq->vector, NULL);
+		free_irq(irq->vector, irq->data);
+	}
+}
+
+static void ena_disable_msix(struct ena_adapter *adapter)
+{
+	if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
+		pci_disable_msix(adapter->pdev);
+
+	if (adapter->msix_entries)
+		vfree(adapter->msix_entries);
+	adapter->msix_entries = NULL;
+}
+
+static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
+{
+	int i;
+
+	if (!netif_running(adapter->netdev))
+		return;
+
+	for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++)
+		synchronize_irq(adapter->irq_tbl[i].vector);
+}
+
+static void ena_del_napi(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		netif_napi_del(&adapter->ena_napi[i].napi);
+}
+
+static void ena_init_napi(struct ena_adapter *adapter)
+{
+	struct ena_napi *napi;
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		napi = &adapter->ena_napi[i];
+
+		netif_napi_add(adapter->netdev,
+			       &adapter->ena_napi[i].napi,
+			       ena_io_poll,
+			       ENA_NAPI_BUDGET);
+		napi->rx_ring = &adapter->rx_ring[i];
+		napi->tx_ring = &adapter->tx_ring[i];
+		napi->qid = i;
+	}
+}
+
+static void ena_napi_disable_all(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_disable(&adapter->ena_napi[i].napi);
+}
+
+static void ena_napi_enable_all(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_enable(&adapter->ena_napi[i].napi);
+}
+
+static void ena_restore_ethtool_params(struct ena_adapter *adapter)
+{
+	adapter->tx_usecs = 0;
+	adapter->rx_usecs = 0;
+	adapter->tx_frames = 1;
+	adapter->rx_frames = 1;
+}
+
+/* Configure the Rx forwarding */
+static int ena_rss_configure(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	int rc;
+
+	/* In case the RSS table wasn't initialized by probe */
+	if (!ena_dev->rss.tbl_log_size) {
+		rc = ena_rss_init_default(adapter);
+		if (rc && (rc != -EPERM)) {
+			netif_err(adapter, ifup, adapter->netdev,
+				  "Failed to init RSS rc: %d\n", rc);
+			return rc;
+		}
+	}
+
+	/* Set indirect table */
+	rc = ena_com_indirect_table_set(ena_dev);
+	if (unlikely(rc && rc != -EPERM))
+		return rc;
+
+	/* Configure hash function (if supported) */
+	rc = ena_com_set_hash_function(ena_dev);
+	if (unlikely(rc && (rc != -EPERM)))
+		return rc;
+
+	/* Configure hash inputs (if supported) */
+	rc = ena_com_set_hash_ctrl(ena_dev);
+	if (unlikely(rc && (rc != -EPERM)))
+		return rc;
+
+	return 0;
+}
+
+static int ena_up_complete(struct ena_adapter *adapter)
+{
+	int rc, i;
+
+	rc = ena_rss_configure(adapter);
+	if (rc)
+		return rc;
+
+	ena_init_napi(adapter);
+
+	ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
+
+	ena_refill_all_rx_bufs(adapter);
+
+	/* enable transmits */
+	netif_tx_start_all_queues(adapter->netdev);
+
+	ena_restore_ethtool_params(adapter);
+
+	ena_napi_enable_all(adapter);
+
+	/* schedule napi in case we had pending packets
+	 * from the last time we disable napi
+	 */
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_schedule(&adapter->ena_napi[i].napi);
+
+	return 0;
+}
+
+static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
+{
+	struct ena_com_create_io_ctx ctx = { 0 };
+	struct ena_com_dev *ena_dev;
+	struct ena_ring *tx_ring;
+	u32 msix_vector;
+	u16 ena_qid;
+	int rc;
+
+	ena_dev = adapter->ena_dev;
+
+	tx_ring = &adapter->tx_ring[qid];
+	msix_vector = ENA_IO_IRQ_IDX(qid);
+	ena_qid = ENA_IO_TXQ_IDX(qid);
+
+	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
+	ctx.qid = ena_qid;
+	ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
+	ctx.msix_vector = msix_vector;
+	ctx.queue_size = adapter->tx_ring_size;
+	ctx.numa_node = cpu_to_node(tx_ring->cpu);
+
+	rc = ena_com_create_io_queue(ena_dev, &ctx);
+	if (rc) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to create I/O TX queue num %d rc: %d\n",
+			  qid, rc);
+		return rc;
+	}
+
+	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+				     &tx_ring->ena_com_io_sq,
+				     &tx_ring->ena_com_io_cq);
+	if (rc) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
+			  qid, rc);
+		ena_com_destroy_io_queue(ena_dev, ena_qid);
+	}
+
+	ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
+	return rc;
+}
+
+static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	int rc, i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rc = ena_create_io_tx_queue(adapter, i);
+		if (rc)
+			goto create_err;
+	}
+
+	return 0;
+
+create_err:
+	while (i--)
+		ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
+
+	return rc;
+}
+
+static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
+{
+	struct ena_com_dev *ena_dev;
+	struct ena_com_create_io_ctx ctx = { 0 };
+	struct ena_ring *rx_ring;
+	u32 msix_vector;
+	u16 ena_qid;
+	int rc;
+
+	ena_dev = adapter->ena_dev;
+
+	rx_ring = &adapter->rx_ring[qid];
+	msix_vector = ENA_IO_IRQ_IDX(qid);
+	ena_qid = ENA_IO_RXQ_IDX(qid);
+
+	ctx.qid = ena_qid;
+	ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
+	ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+	ctx.msix_vector = msix_vector;
+	ctx.queue_size = adapter->rx_ring_size;
+	ctx.numa_node = cpu_to_node(rx_ring->cpu);
+
+	rc = ena_com_create_io_queue(ena_dev, &ctx);
+	if (rc) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to create I/O RX queue num %d rc: %d\n",
+			  qid, rc);
+		return rc;
+	}
+
+	rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+				     &rx_ring->ena_com_io_sq,
+				     &rx_ring->ena_com_io_cq);
+	if (rc) {
+		netif_err(adapter, ifup, adapter->netdev,
+			  "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
+			  qid, rc);
+		ena_com_destroy_io_queue(ena_dev, ena_qid);
+	}
+
+	ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
+
+	return rc;
+}
+
+static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	int rc, i;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		rc = ena_create_io_rx_queue(adapter, i);
+		if (rc)
+			goto create_err;
+	}
+
+	return 0;
+
+create_err:
+	while (i--)
+		ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
+
+	return rc;
+}
+
+static int ena_up(struct ena_adapter *adapter)
+{
+	int rc;
+
+	netdev_dbg(adapter->netdev, "%s\n", __func__);
+
+	ena_setup_io_intr(adapter);
+
+	rc = ena_request_io_irq(adapter);
+	if (rc)
+		goto err_req_irq;
+
+	/* allocate transmit descriptors */
+	rc = ena_setup_all_tx_resources(adapter);
+	if (rc)
+		goto err_setup_tx;
+
+	/* allocate receive descriptors */
+	rc = ena_setup_all_rx_resources(adapter);
+	if (rc)
+		goto err_setup_rx;
+
+	/* Create TX queues */
+	rc = ena_create_all_io_tx_queues(adapter);
+	if (rc)
+		goto err_create_tx_queues;
+
+	/* Create RX queues */
+	rc = ena_create_all_io_rx_queues(adapter);
+	if (rc)
+		goto err_create_rx_queues;
+
+	rc = ena_up_complete(adapter);
+	if (rc)
+		goto err_up;
+
+	if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
+		netif_carrier_on(adapter->netdev);
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.interface_up++;
+	u64_stats_update_end(&adapter->syncp);
+
+	set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+	return rc;
+
+err_up:
+	ena_destroy_all_rx_queues(adapter);
+err_create_rx_queues:
+	ena_destroy_all_tx_queues(adapter);
+err_create_tx_queues:
+	ena_free_all_io_rx_resources(adapter);
+err_setup_rx:
+	ena_free_all_io_tx_resources(adapter);
+err_setup_tx:
+	ena_free_io_irq(adapter);
+err_req_irq:
+
+	return rc;
+}
+
+static void ena_down(struct ena_adapter *adapter)
+{
+	netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
+
+	clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.interface_down++;
+	u64_stats_update_end(&adapter->syncp);
+
+	/* After this point the napi handler won't enable the tx queue */
+	ena_napi_disable_all(adapter);
+	netif_carrier_off(adapter->netdev);
+	netif_tx_disable(adapter->netdev);
+
+	/* After destroy the queue there won't be any new interrupts */
+	ena_destroy_all_io_queues(adapter);
+
+	ena_disable_io_intr_sync(adapter);
+	ena_free_io_irq(adapter);
+	ena_del_napi(adapter);
+
+	ena_free_all_tx_bufs(adapter);
+	ena_free_all_rx_bufs(adapter);
+	ena_free_all_io_tx_resources(adapter);
+	ena_free_all_io_rx_resources(adapter);
+}
+
+/* ena_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * Returns 0 on success, negative value on failure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the watchdog timer is started,
+ * and the stack is notified that the interface is ready.
+ */
+static int ena_open(struct net_device *netdev)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int rc;
+
+	/* Notify the stack of the actual queue counts. */
+	rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues);
+	if (rc) {
+		netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
+		return rc;
+	}
+
+	rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues);
+	if (rc) {
+		netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
+		return rc;
+	}
+
+	rc = ena_up(adapter);
+	if (rc)
+		return rc;
+
+	return rc;
+}
+
+/* ena_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * Returns 0, this is not allowed to fail
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the drivers control, but
+ * needs to be disabled.  A global MAC reset is issued to stop the
+ * hardware, and all transmit and receive resources are freed.
+ */
+static int ena_close(struct net_device *netdev)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+
+	netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
+
+	if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		ena_down(adapter);
+
+	return 0;
+}
+
+static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb)
+{
+	u32 mss = skb_shinfo(skb)->gso_size;
+	struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
+	u8 l4_protocol = 0;
+
+	if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
+		ena_tx_ctx->l4_csum_enable = 1;
+		if (mss) {
+			ena_tx_ctx->tso_enable = 1;
+			ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
+			ena_tx_ctx->l4_csum_partial = 0;
+		} else {
+			ena_tx_ctx->tso_enable = 0;
+			ena_meta->l4_hdr_len = 0;
+			ena_tx_ctx->l4_csum_partial = 1;
+		}
+
+		switch (ip_hdr(skb)->version) {
+		case IPVERSION:
+			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
+			if (ip_hdr(skb)->frag_off & htons(IP_DF))
+				ena_tx_ctx->df = 1;
+			if (mss)
+				ena_tx_ctx->l3_csum_enable = 1;
+			l4_protocol = ip_hdr(skb)->protocol;
+			break;
+		case 6:
+			ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
+			l4_protocol = ipv6_hdr(skb)->nexthdr;
+			break;
+		default:
+			break;
+		}
+
+		if (l4_protocol == IPPROTO_TCP)
+			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
+		else
+			ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
+
+		ena_meta->mss = mss;
+		ena_meta->l3_hdr_len = skb_network_header_len(skb);
+		ena_meta->l3_hdr_offset = skb_network_offset(skb);
+		ena_tx_ctx->meta_valid = 1;
+
+	} else {
+		ena_tx_ctx->meta_valid = 0;
+	}
+}
+
+static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
+				       struct sk_buff *skb)
+{
+	int num_frags, header_len, rc;
+
+	num_frags = skb_shinfo(skb)->nr_frags;
+	header_len = skb_headlen(skb);
+
+	if (num_frags < tx_ring->sgl_size)
+		return 0;
+
+	if ((num_frags == tx_ring->sgl_size) &&
+	    (header_len < tx_ring->tx_max_header_size))
+		return 0;
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.linearize++;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	rc = skb_linearize(skb);
+	if (unlikely(rc)) {
+		u64_stats_update_begin(&tx_ring->syncp);
+		tx_ring->tx_stats.linearize_failed++;
+		u64_stats_update_end(&tx_ring->syncp);
+	}
+
+	return rc;
+}
+
+/* Called with netif_tx_lock. */
+static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+	struct ena_tx_buffer *tx_info;
+	struct ena_com_tx_ctx ena_tx_ctx;
+	struct ena_ring *tx_ring;
+	struct netdev_queue *txq;
+	struct ena_com_buf *ena_buf;
+	void *push_hdr;
+	u32 len, last_frag;
+	u16 next_to_use;
+	u16 req_id;
+	u16 push_len;
+	u16 header_len;
+	dma_addr_t dma;
+	int qid, rc, nb_hw_desc;
+	int i = -1;
+
+	netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
+	/*  Determine which tx ring we will be placed on */
+	qid = skb_get_queue_mapping(skb);
+	tx_ring = &adapter->tx_ring[qid];
+	txq = netdev_get_tx_queue(dev, qid);
+
+	rc = ena_check_and_linearize_skb(tx_ring, skb);
+	if (unlikely(rc))
+		goto error_drop_packet;
+
+	skb_tx_timestamp(skb);
+	len = skb_headlen(skb);
+
+	next_to_use = tx_ring->next_to_use;
+	req_id = tx_ring->free_tx_ids[next_to_use];
+	tx_info = &tx_ring->tx_buffer_info[req_id];
+	tx_info->num_of_bufs = 0;
+
+	WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
+	ena_buf = tx_info->bufs;
+	tx_info->skb = skb;
+
+	if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		/* prepared the push buffer */
+		push_len = min_t(u32, len, tx_ring->tx_max_header_size);
+		header_len = push_len;
+		push_hdr = skb->data;
+	} else {
+		push_len = 0;
+		header_len = min_t(u32, len, tx_ring->tx_max_header_size);
+		push_hdr = NULL;
+	}
+
+	netif_dbg(adapter, tx_queued, dev,
+		  "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
+		  push_hdr, push_len);
+
+	if (len > push_len) {
+		dma = dma_map_single(tx_ring->dev, skb->data + push_len,
+				     len - push_len, DMA_TO_DEVICE);
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto error_report_dma_error;
+
+		ena_buf->paddr = dma;
+		ena_buf->len = len - push_len;
+
+		ena_buf++;
+		tx_info->num_of_bufs++;
+	}
+
+	last_frag = skb_shinfo(skb)->nr_frags;
+
+	for (i = 0; i < last_frag; i++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		len = skb_frag_size(frag);
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len,
+				       DMA_TO_DEVICE);
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto error_report_dma_error;
+
+		ena_buf->paddr = dma;
+		ena_buf->len = len;
+		ena_buf++;
+	}
+
+	tx_info->num_of_bufs += last_frag;
+
+	memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
+	ena_tx_ctx.ena_bufs = tx_info->bufs;
+	ena_tx_ctx.push_header = push_hdr;
+	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+	ena_tx_ctx.req_id = req_id;
+	ena_tx_ctx.header_len = header_len;
+
+	/* set flags and meta data */
+	ena_tx_csum(&ena_tx_ctx, skb);
+
+	/* prepare the packet's descriptors to dma engine */
+	rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
+				&nb_hw_desc);
+
+	if (unlikely(rc)) {
+		netif_err(adapter, tx_queued, dev,
+			  "failed to prepare tx bufs\n");
+		u64_stats_update_begin(&tx_ring->syncp);
+		tx_ring->tx_stats.queue_stop++;
+		tx_ring->tx_stats.prepare_ctx_err++;
+		u64_stats_update_end(&tx_ring->syncp);
+		netif_tx_stop_queue(txq);
+		goto error_unmap_dma;
+	}
+
+	netdev_tx_sent_queue(txq, skb->len);
+
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.cnt++;
+	tx_ring->tx_stats.bytes += skb->len;
+	u64_stats_update_end(&tx_ring->syncp);
+
+	tx_info->tx_descs = nb_hw_desc;
+	tx_info->last_jiffies = jiffies;
+
+	tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
+		tx_ring->ring_size);
+
+	/* This WMB is aimed to:
+	 * 1 - perform smp barrier before reading next_to_completion
+	 * 2 - make sure the desc were written before trigger DB
+	 */
+	wmb();
+
+	/* stop the queue when no more space available, the packet can have up
+	 * to sgl_size + 2. one for the meta descriptor and one for header
+	 * (if the header is larger than tx_max_header_size).
+	 */
+	if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) <
+		     (tx_ring->sgl_size + 2))) {
+		netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
+			  __func__, qid);
+
+		netif_tx_stop_queue(txq);
+		u64_stats_update_begin(&tx_ring->syncp);
+		tx_ring->tx_stats.queue_stop++;
+		u64_stats_update_end(&tx_ring->syncp);
+
+		/* There is a rare condition where this function decide to
+		 * stop the queue but meanwhile clean_tx_irq updates
+		 * next_to_completion and terminates.
+		 * The queue will remain stopped forever.
+		 * To solve this issue this function perform rmb, check
+		 * the wakeup condition and wake up the queue if needed.
+		 */
+		smp_rmb();
+
+		if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq)
+				> ENA_TX_WAKEUP_THRESH) {
+			netif_tx_wake_queue(txq);
+			u64_stats_update_begin(&tx_ring->syncp);
+			tx_ring->tx_stats.queue_wakeup++;
+			u64_stats_update_end(&tx_ring->syncp);
+		}
+	}
+
+	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
+		/* trigger the dma engine */
+		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+		u64_stats_update_begin(&tx_ring->syncp);
+		tx_ring->tx_stats.doorbells++;
+		u64_stats_update_end(&tx_ring->syncp);
+	}
+
+	return NETDEV_TX_OK;
+
+error_report_dma_error:
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.dma_mapping_err++;
+	u64_stats_update_end(&tx_ring->syncp);
+	netdev_warn(adapter->netdev, "failed to map skb\n");
+
+	tx_info->skb = NULL;
+
+error_unmap_dma:
+	if (i >= 0) {
+		/* save value of frag that failed */
+		last_frag = i;
+
+		/* start back at beginning and unmap skb */
+		tx_info->skb = NULL;
+		ena_buf = tx_info->bufs;
+		dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+				 dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+
+		/* unmap remaining mapped pages */
+		for (i = 0; i < last_frag; i++) {
+			ena_buf++;
+			dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
+				       dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
+		}
+	}
+
+error_drop_packet:
+
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void ena_netpoll(struct net_device *netdev)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		napi_schedule(&adapter->ena_napi[i].napi);
+}
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+
+static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
+			    void *accel_priv, select_queue_fallback_t fallback)
+{
+	u16 qid;
+	/* we suspect that this is good for in--kernel network services that
+	 * want to loop incoming skb rx to tx in normal user generated traffic,
+	 * most probably we will not get to this
+	 */
+	if (skb_rx_queue_recorded(skb))
+		qid = skb_get_rx_queue(skb);
+	else
+		qid = fallback(dev, skb);
+
+	return qid;
+}
+
+static void ena_config_host_info(struct ena_com_dev *ena_dev)
+{
+	struct ena_admin_host_info *host_info;
+	int rc;
+
+	/* Allocate only the host info */
+	rc = ena_com_allocate_host_info(ena_dev);
+	if (rc) {
+		pr_err("Cannot allocate host info\n");
+		return;
+	}
+
+	host_info = ena_dev->host_attr.host_info;
+
+	host_info->os_type = ENA_ADMIN_OS_LINUX;
+	host_info->kernel_ver = LINUX_VERSION_CODE;
+	strncpy(host_info->kernel_ver_str, utsname()->version,
+		sizeof(host_info->kernel_ver_str) - 1);
+	host_info->os_dist = 0;
+	strncpy(host_info->os_dist_str, utsname()->release,
+		sizeof(host_info->os_dist_str) - 1);
+	host_info->driver_version =
+		(DRV_MODULE_VER_MAJOR) |
+		(DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
+		(DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT);
+
+	rc = ena_com_set_host_attributes(ena_dev);
+	if (rc) {
+		if (rc == -EPERM)
+			pr_warn("Cannot set host attributes\n");
+		else
+			pr_err("Cannot set host attributes\n");
+
+		goto err;
+	}
+
+	return;
+
+err:
+	ena_com_delete_host_info(ena_dev);
+}
+
+static void ena_config_debug_area(struct ena_adapter *adapter)
+{
+	u32 debug_area_size;
+	int rc, ss_count;
+
+	ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
+	if (ss_count <= 0) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "SS count is negative\n");
+		return;
+	}
+
+	/* allocate 32 bytes for each string and 64bit for the value */
+	debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
+
+	rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
+	if (rc) {
+		pr_err("Cannot allocate debug area\n");
+		return;
+	}
+
+	rc = ena_com_set_host_attributes(adapter->ena_dev);
+	if (rc) {
+		if (rc == -EPERM)
+			netif_warn(adapter, drv, adapter->netdev,
+				   "Cannot set host attributes\n");
+		else
+			netif_err(adapter, drv, adapter->netdev,
+				  "Cannot set host attributes\n");
+		goto err;
+	}
+
+	return;
+err:
+	ena_com_delete_debug_area(adapter->ena_dev);
+}
+
+static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev,
+						 struct rtnl_link_stats64 *stats)
+{
+	struct ena_adapter *adapter = netdev_priv(netdev);
+	struct ena_admin_basic_stats ena_stats;
+	int rc;
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		return NULL;
+
+	rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats);
+	if (rc)
+		return NULL;
+
+	stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) |
+		ena_stats.tx_bytes_low;
+	stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) |
+		ena_stats.rx_bytes_low;
+
+	stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) |
+		ena_stats.rx_pkts_low;
+	stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) |
+		ena_stats.tx_pkts_low;
+
+	stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) |
+		ena_stats.rx_drops_low;
+
+	stats->multicast = 0;
+	stats->collisions = 0;
+
+	stats->rx_length_errors = 0;
+	stats->rx_crc_errors = 0;
+	stats->rx_frame_errors = 0;
+	stats->rx_fifo_errors = 0;
+	stats->rx_missed_errors = 0;
+	stats->tx_window_errors = 0;
+
+	stats->rx_errors = 0;
+	stats->tx_errors = 0;
+
+	return stats;
+}
+
+static const struct net_device_ops ena_netdev_ops = {
+	.ndo_open		= ena_open,
+	.ndo_stop		= ena_close,
+	.ndo_start_xmit		= ena_start_xmit,
+	.ndo_select_queue	= ena_select_queue,
+	.ndo_get_stats64	= ena_get_stats64,
+	.ndo_tx_timeout		= ena_tx_timeout,
+	.ndo_change_mtu		= ena_change_mtu,
+	.ndo_set_mac_address	= NULL,
+	.ndo_validate_addr	= eth_validate_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= ena_netpoll,
+#endif /* CONFIG_NET_POLL_CONTROLLER */
+};
+
+static void ena_device_io_suspend(struct work_struct *work)
+{
+	struct ena_adapter *adapter =
+		container_of(work, struct ena_adapter, suspend_io_task);
+	struct net_device *netdev = adapter->netdev;
+
+	/* ena_napi_disable_all disables only the IO handling.
+	 * We are still subject to AENQ keep alive watchdog.
+	 */
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.io_suspend++;
+	u64_stats_update_begin(&adapter->syncp);
+	ena_napi_disable_all(adapter);
+	netif_tx_lock(netdev);
+	netif_device_detach(netdev);
+	netif_tx_unlock(netdev);
+}
+
+static void ena_device_io_resume(struct work_struct *work)
+{
+	struct ena_adapter *adapter =
+		container_of(work, struct ena_adapter, resume_io_task);
+	struct net_device *netdev = adapter->netdev;
+
+	u64_stats_update_begin(&adapter->syncp);
+	adapter->dev_stats.io_resume++;
+	u64_stats_update_end(&adapter->syncp);
+
+	netif_device_attach(netdev);
+	ena_napi_enable_all(adapter);
+}
+
+static int ena_device_validate_params(struct ena_adapter *adapter,
+				      struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	struct net_device *netdev = adapter->netdev;
+	int rc;
+
+	rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
+			      adapter->mac_addr);
+	if (!rc) {
+		netif_err(adapter, drv, netdev,
+			  "Error, mac address are different\n");
+		return -EINVAL;
+	}
+
+	if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) ||
+	    (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) {
+		netif_err(adapter, drv, netdev,
+			  "Error, device doesn't support enough queues\n");
+		return -EINVAL;
+	}
+
+	if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
+		netif_err(adapter, drv, netdev,
+			  "Error, device max mtu is smaller than netdev MTU\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
+			   struct ena_com_dev_get_features_ctx *get_feat_ctx,
+			   bool *wd_state)
+{
+	struct device *dev = &pdev->dev;
+	bool readless_supported;
+	u32 aenq_groups;
+	int dma_width;
+	int rc;
+
+	rc = ena_com_mmio_reg_read_request_init(ena_dev);
+	if (rc) {
+		dev_err(dev, "failed to init mmio read less\n");
+		return rc;
+	}
+
+	/* The PCIe configuration space revision id indicate if mmio reg
+	 * read is disabled
+	 */
+	readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
+	ena_com_set_mmio_read_mode(ena_dev, readless_supported);
+
+	rc = ena_com_dev_reset(ena_dev);
+	if (rc) {
+		dev_err(dev, "Can not reset device\n");
+		goto err_mmio_read_less;
+	}
+
+	rc = ena_com_validate_version(ena_dev);
+	if (rc) {
+		dev_err(dev, "device version is too low\n");
+		goto err_mmio_read_less;
+	}
+
+	dma_width = ena_com_get_dma_width(ena_dev);
+	if (dma_width < 0) {
+		dev_err(dev, "Invalid dma width value %d", dma_width);
+		rc = dma_width;
+		goto err_mmio_read_less;
+	}
+
+	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+	if (rc) {
+		dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
+		goto err_mmio_read_less;
+	}
+
+	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+	if (rc) {
+		dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
+			rc);
+		goto err_mmio_read_less;
+	}
+
+	/* ENA admin level init */
+	rc = ena_com_admin_init(ena_dev, &aenq_handlers, true);
+	if (rc) {
+		dev_err(dev,
+			"Can not initialize ena admin queue with device\n");
+		goto err_mmio_read_less;
+	}
+
+	/* To enable the msix interrupts the driver needs to know the number
+	 * of queues. So the driver uses polling mode to retrieve this
+	 * information
+	 */
+	ena_com_set_admin_polling_mode(ena_dev, true);
+
+	/* Get Device Attributes*/
+	rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
+	if (rc) {
+		dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
+		goto err_admin_init;
+	}
+
+	/* Try to turn all the available aenq groups */
+	aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
+		BIT(ENA_ADMIN_FATAL_ERROR) |
+		BIT(ENA_ADMIN_WARNING) |
+		BIT(ENA_ADMIN_NOTIFICATION) |
+		BIT(ENA_ADMIN_KEEP_ALIVE);
+
+	aenq_groups &= get_feat_ctx->aenq.supported_groups;
+
+	rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
+	if (rc) {
+		dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
+		goto err_admin_init;
+	}
+
+	*wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
+
+	ena_config_host_info(ena_dev);
+
+	return 0;
+
+err_admin_init:
+	ena_com_admin_destroy(ena_dev);
+err_mmio_read_less:
+	ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+	return rc;
+}
+
+static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter,
+						    int io_vectors)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct device *dev = &adapter->pdev->dev;
+	int rc;
+
+	rc = ena_enable_msix(adapter, io_vectors);
+	if (rc) {
+		dev_err(dev, "Can not reserve msix vectors\n");
+		return rc;
+	}
+
+	ena_setup_mgmnt_intr(adapter);
+
+	rc = ena_request_mgmnt_irq(adapter);
+	if (rc) {
+		dev_err(dev, "Can not setup management interrupts\n");
+		goto err_disable_msix;
+	}
+
+	ena_com_set_admin_polling_mode(ena_dev, false);
+
+	ena_com_admin_aenq_enable(ena_dev);
+
+	return 0;
+
+err_disable_msix:
+	ena_disable_msix(adapter);
+
+	return rc;
+}
+
+static void ena_fw_reset_device(struct work_struct *work)
+{
+	struct ena_com_dev_get_features_ctx get_feat_ctx;
+	struct ena_adapter *adapter =
+		container_of(work, struct ena_adapter, reset_task);
+	struct net_device *netdev = adapter->netdev;
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct pci_dev *pdev = adapter->pdev;
+	bool dev_up, wd_state;
+	int rc;
+
+	del_timer_sync(&adapter->timer_service);
+
+	rtnl_lock();
+
+	dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+	ena_com_set_admin_running_state(ena_dev, false);
+
+	/* After calling ena_close the tx queues and the napi
+	 * are disabled so no one can interfere or touch the
+	 * data structures
+	 */
+	ena_close(netdev);
+
+	rc = ena_com_dev_reset(ena_dev);
+	if (rc) {
+		dev_err(&pdev->dev, "Device reset failed\n");
+		goto err;
+	}
+
+	ena_free_mgmnt_irq(adapter);
+
+	ena_disable_msix(adapter);
+
+	ena_com_abort_admin_commands(ena_dev);
+
+	ena_com_wait_for_abort_completion(ena_dev);
+
+	ena_com_admin_destroy(ena_dev);
+
+	ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+	/* Finish with the destroy part. Start the init part */
+
+	rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
+	if (rc) {
+		dev_err(&pdev->dev, "Can not initialize device\n");
+		goto err;
+	}
+	adapter->wd_state = wd_state;
+
+	rc = ena_device_validate_params(adapter, &get_feat_ctx);
+	if (rc) {
+		dev_err(&pdev->dev, "Validation of device parameters failed\n");
+		goto err_device_destroy;
+	}
+
+	rc = ena_enable_msix_and_set_admin_interrupts(adapter,
+						      adapter->num_queues);
+	if (rc) {
+		dev_err(&pdev->dev, "Enable MSI-X failed\n");
+		goto err_device_destroy;
+	}
+	/* If the interface was up before the reset bring it up */
+	if (dev_up) {
+		rc = ena_up(adapter);
+		if (rc) {
+			dev_err(&pdev->dev, "Failed to create I/O queues\n");
+			goto err_disable_msix;
+		}
+	}
+
+	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
+
+	rtnl_unlock();
+
+	dev_err(&pdev->dev, "Device reset completed successfully\n");
+
+	return;
+err_disable_msix:
+	ena_free_mgmnt_irq(adapter);
+	ena_disable_msix(adapter);
+err_device_destroy:
+	ena_com_admin_destroy(ena_dev);
+err:
+	rtnl_unlock();
+
+	dev_err(&pdev->dev,
+		"Reset attempt failed. Can not reset the device\n");
+}
+
+static void check_for_missing_tx_completions(struct ena_adapter *adapter)
+{
+	struct ena_tx_buffer *tx_buf;
+	unsigned long last_jiffies;
+	struct ena_ring *tx_ring;
+	int i, j, budget;
+	u32 missed_tx;
+
+	/* Make sure the driver doesn't turn the device in other process */
+	smp_rmb();
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		return;
+
+	budget = ENA_MONITORED_TX_QUEUES;
+
+	for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
+		tx_ring = &adapter->tx_ring[i];
+
+		for (j = 0; j < tx_ring->ring_size; j++) {
+			tx_buf = &tx_ring->tx_buffer_info[j];
+			last_jiffies = tx_buf->last_jiffies;
+			if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) {
+				netif_notice(adapter, tx_err, adapter->netdev,
+					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
+					     tx_ring->qid, j);
+
+				u64_stats_update_begin(&tx_ring->syncp);
+				missed_tx = tx_ring->tx_stats.missing_tx_comp++;
+				u64_stats_update_end(&tx_ring->syncp);
+
+				/* Clear last jiffies so the lost buffer won't
+				 * be counted twice.
+				 */
+				tx_buf->last_jiffies = 0;
+
+				if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) {
+					netif_err(adapter, tx_err, adapter->netdev,
+						  "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n",
+						  missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS);
+					set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+				}
+			}
+		}
+
+		budget--;
+		if (!budget)
+			break;
+	}
+
+	adapter->last_monitored_tx_qid = i % adapter->num_queues;
+}
+
+/* Check for keep alive expiration */
+static void check_for_missing_keep_alive(struct ena_adapter *adapter)
+{
+	unsigned long keep_alive_expired;
+
+	if (!adapter->wd_state)
+		return;
+
+	keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies
+					   + ENA_DEVICE_KALIVE_TIMEOUT);
+	if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "Keep alive watchdog timeout.\n");
+		u64_stats_update_begin(&adapter->syncp);
+		adapter->dev_stats.wd_expired++;
+		u64_stats_update_end(&adapter->syncp);
+		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+	}
+}
+
+static void check_for_admin_com_state(struct ena_adapter *adapter)
+{
+	if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "ENA admin queue is not in running state!\n");
+		u64_stats_update_begin(&adapter->syncp);
+		adapter->dev_stats.admin_q_pause++;
+		u64_stats_update_end(&adapter->syncp);
+		set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+	}
+}
+
+static void ena_update_host_info(struct ena_admin_host_info *host_info,
+				 struct net_device *netdev)
+{
+	host_info->supported_network_features[0] =
+		netdev->features & GENMASK_ULL(31, 0);
+	host_info->supported_network_features[1] =
+		(netdev->features & GENMASK_ULL(63, 32)) >> 32;
+}
+
+static void ena_timer_service(unsigned long data)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)data;
+	u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
+	struct ena_admin_host_info *host_info =
+		adapter->ena_dev->host_attr.host_info;
+
+	check_for_missing_keep_alive(adapter);
+
+	check_for_admin_com_state(adapter);
+
+	check_for_missing_tx_completions(adapter);
+
+	if (debug_area)
+		ena_dump_stats_to_buf(adapter, debug_area);
+
+	if (host_info)
+		ena_update_host_info(host_info, adapter->netdev);
+
+	if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "Trigger reset is on\n");
+		ena_dump_stats_to_dmesg(adapter);
+		queue_work(ena_wq, &adapter->reset_task);
+		return;
+	}
+
+	/* Reset the timer */
+	mod_timer(&adapter->timer_service, jiffies + HZ);
+}
+
+static int ena_calc_io_queue_num(struct pci_dev *pdev,
+				 struct ena_com_dev *ena_dev,
+				 struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	int io_sq_num, io_queue_num;
+
+	/* In case of LLQ use the llq number in the get feature cmd */
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		io_sq_num = get_feat_ctx->max_queues.max_llq_num;
+
+		if (io_sq_num == 0) {
+			dev_err(&pdev->dev,
+				"Trying to use LLQ but llq_num is 0. Fall back into regular queues\n");
+
+			ena_dev->tx_mem_queue_type =
+				ENA_ADMIN_PLACEMENT_POLICY_HOST;
+			io_sq_num = get_feat_ctx->max_queues.max_sq_num;
+		}
+	} else {
+		io_sq_num = get_feat_ctx->max_queues.max_sq_num;
+	}
+
+	io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES);
+	io_queue_num = min_t(int, io_queue_num, io_sq_num);
+	io_queue_num = min_t(int, io_queue_num,
+			     get_feat_ctx->max_queues.max_cq_num);
+	/* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */
+	io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1);
+	if (unlikely(!io_queue_num)) {
+		dev_err(&pdev->dev, "The device doesn't have io queues\n");
+		return -EFAULT;
+	}
+
+	return io_queue_num;
+}
+
+static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
+			      struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	bool has_mem_bar;
+
+	has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR);
+
+	/* Enable push mode if device supports LLQ */
+	if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0))
+		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV;
+	else
+		ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
+}
+
+static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
+				 struct net_device *netdev)
+{
+	netdev_features_t dev_features = 0;
+
+	/* Set offload features */
+	if (feat->offload.tx &
+		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
+		dev_features |= NETIF_F_IP_CSUM;
+
+	if (feat->offload.tx &
+		ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
+		dev_features |= NETIF_F_IPV6_CSUM;
+
+	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
+		dev_features |= NETIF_F_TSO;
+
+	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
+		dev_features |= NETIF_F_TSO6;
+
+	if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
+		dev_features |= NETIF_F_TSO_ECN;
+
+	if (feat->offload.rx_supported &
+		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
+		dev_features |= NETIF_F_RXCSUM;
+
+	if (feat->offload.rx_supported &
+		ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
+		dev_features |= NETIF_F_RXCSUM;
+
+	netdev->features =
+		dev_features |
+		NETIF_F_SG |
+		NETIF_F_NTUPLE |
+		NETIF_F_RXHASH |
+		NETIF_F_HIGHDMA;
+
+	netdev->hw_features |= netdev->features;
+	netdev->vlan_features |= netdev->features;
+}
+
+static void ena_set_conf_feat_params(struct ena_adapter *adapter,
+				     struct ena_com_dev_get_features_ctx *feat)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	/* Copy mac address */
+	if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
+		eth_hw_addr_random(netdev);
+		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
+	} else {
+		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
+		ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
+	}
+
+	/* Set offload features */
+	ena_set_dev_offloads(feat, netdev);
+
+	adapter->max_mtu = feat->dev_attr.max_mtu;
+}
+
+static int ena_rss_init_default(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	struct device *dev = &adapter->pdev->dev;
+	int rc, i;
+	u32 val;
+
+	rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
+	if (unlikely(rc)) {
+		dev_err(dev, "Cannot init indirect table\n");
+		goto err_rss_init;
+	}
+
+	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+		val = ethtool_rxfh_indir_default(i, adapter->num_queues);
+		rc = ena_com_indirect_table_fill_entry(ena_dev, i,
+						       ENA_IO_RXQ_IDX(val));
+		if (unlikely(rc && (rc != -EPERM))) {
+			dev_err(dev, "Cannot fill indirect table\n");
+			goto err_fill_indir;
+		}
+	}
+
+	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
+					ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
+	if (unlikely(rc && (rc != -EPERM))) {
+		dev_err(dev, "Cannot fill hash function\n");
+		goto err_fill_indir;
+	}
+
+	rc = ena_com_set_default_hash_ctrl(ena_dev);
+	if (unlikely(rc && (rc != -EPERM))) {
+		dev_err(dev, "Cannot fill hash control\n");
+		goto err_fill_indir;
+	}
+
+	return 0;
+
+err_fill_indir:
+	ena_com_rss_destroy(ena_dev);
+err_rss_init:
+
+	return rc;
+}
+
+static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
+{
+	int release_bars;
+
+	release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
+	pci_release_selected_regions(pdev, release_bars);
+}
+
+static int ena_calc_queue_size(struct pci_dev *pdev,
+			       struct ena_com_dev *ena_dev,
+			       u16 *max_tx_sgl_size,
+			       u16 *max_rx_sgl_size,
+			       struct ena_com_dev_get_features_ctx *get_feat_ctx)
+{
+	u32 queue_size = ENA_DEFAULT_RING_SIZE;
+
+	queue_size = min_t(u32, queue_size,
+			   get_feat_ctx->max_queues.max_cq_depth);
+	queue_size = min_t(u32, queue_size,
+			   get_feat_ctx->max_queues.max_sq_depth);
+
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
+		queue_size = min_t(u32, queue_size,
+				   get_feat_ctx->max_queues.max_llq_depth);
+
+	queue_size = rounddown_pow_of_two(queue_size);
+
+	if (unlikely(!queue_size)) {
+		dev_err(&pdev->dev, "Invalid queue size\n");
+		return -EFAULT;
+	}
+
+	*max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+				 get_feat_ctx->max_queues.max_packet_tx_descs);
+	*max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
+				 get_feat_ctx->max_queues.max_packet_rx_descs);
+
+	return queue_size;
+}
+
+/* ena_probe - Device Initialization Routine
+ * @pdev: PCI device information struct
+ * @ent: entry in ena_pci_tbl
+ *
+ * Returns 0 on success, negative on failure
+ *
+ * ena_probe initializes an adapter identified by a pci_dev structure.
+ * The OS initialization, configuring of the adapter private structure,
+ * and a hardware reset occur.
+ */
+static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct ena_com_dev_get_features_ctx get_feat_ctx;
+	static int version_printed;
+	struct net_device *netdev;
+	struct ena_adapter *adapter;
+	struct ena_com_dev *ena_dev = NULL;
+	static int adapters_found;
+	int io_queue_num, bars, rc;
+	int queue_size;
+	u16 tx_sgl_size = 0;
+	u16 rx_sgl_size = 0;
+	bool wd_state;
+
+	dev_dbg(&pdev->dev, "%s\n", __func__);
+
+	if (version_printed++ == 0)
+		dev_info(&pdev->dev, "%s", version);
+
+	rc = pci_enable_device_mem(pdev);
+	if (rc) {
+		dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
+		return rc;
+	}
+
+	pci_set_master(pdev);
+
+	ena_dev = vzalloc(sizeof(*ena_dev));
+	if (!ena_dev) {
+		rc = -ENOMEM;
+		goto err_disable_device;
+	}
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
+	rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+	if (rc) {
+		dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
+			rc);
+		goto err_free_ena_dev;
+	}
+
+	ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR),
+				   pci_resource_len(pdev, ENA_REG_BAR));
+	if (!ena_dev->reg_bar) {
+		dev_err(&pdev->dev, "failed to remap regs bar\n");
+		rc = -EFAULT;
+		goto err_free_region;
+	}
+
+	ena_dev->dmadev = &pdev->dev;
+
+	rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
+	if (rc) {
+		dev_err(&pdev->dev, "ena device init failed\n");
+		if (rc == -ETIME)
+			rc = -EPROBE_DEFER;
+		goto err_free_region;
+	}
+
+	ena_set_push_mode(pdev, ena_dev, &get_feat_ctx);
+
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+		ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR),
+					      pci_resource_len(pdev, ENA_MEM_BAR));
+		if (!ena_dev->mem_bar) {
+			rc = -EFAULT;
+			goto err_device_destroy;
+		}
+	}
+
+	/* initial Tx interrupt delay, Assumes 1 usec granularity.
+	* Updated during device initialization with the real granularity
+	*/
+	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
+	io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx);
+	queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size,
+					 &rx_sgl_size, &get_feat_ctx);
+	if ((queue_size <= 0) || (io_queue_num <= 0)) {
+		rc = -EFAULT;
+		goto err_device_destroy;
+	}
+
+	dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n",
+		 io_queue_num, queue_size);
+
+	/* dev zeroed in init_etherdev */
+	netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num);
+	if (!netdev) {
+		dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
+		rc = -ENOMEM;
+		goto err_device_destroy;
+	}
+
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	adapter = netdev_priv(netdev);
+	pci_set_drvdata(pdev, adapter);
+
+	adapter->ena_dev = ena_dev;
+	adapter->netdev = netdev;
+	adapter->pdev = pdev;
+
+	ena_set_conf_feat_params(adapter, &get_feat_ctx);
+
+	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+	adapter->tx_ring_size = queue_size;
+	adapter->rx_ring_size = queue_size;
+
+	adapter->max_tx_sgl_size = tx_sgl_size;
+	adapter->max_rx_sgl_size = rx_sgl_size;
+
+	adapter->num_queues = io_queue_num;
+	adapter->last_monitored_tx_qid = 0;
+
+	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
+	adapter->wd_state = wd_state;
+
+	snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
+
+	rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"Failed to query interrupt moderation feature\n");
+		goto err_netdev_destroy;
+	}
+	ena_init_io_rings(adapter);
+
+	netdev->netdev_ops = &ena_netdev_ops;
+	netdev->watchdog_timeo = TX_TIMEOUT;
+	ena_set_ethtool_ops(netdev);
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+
+	u64_stats_init(&adapter->syncp);
+
+	rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"Failed to enable and set the admin interrupts\n");
+		goto err_worker_destroy;
+	}
+	rc = ena_rss_init_default(adapter);
+	if (rc && (rc != -EPERM)) {
+		dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
+		goto err_free_msix;
+	}
+
+	ena_config_debug_area(adapter);
+
+	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
+
+	netif_carrier_off(netdev);
+
+	rc = register_netdev(netdev);
+	if (rc) {
+		dev_err(&pdev->dev, "Cannot register net device\n");
+		goto err_rss;
+	}
+
+	INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend);
+	INIT_WORK(&adapter->resume_io_task, ena_device_io_resume);
+	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
+
+	adapter->last_keep_alive_jiffies = jiffies;
+
+	init_timer(&adapter->timer_service);
+	adapter->timer_service.expires = round_jiffies(jiffies + HZ);
+	adapter->timer_service.function = ena_timer_service;
+	adapter->timer_service.data = (unsigned long)adapter;
+
+	add_timer(&adapter->timer_service);
+
+	dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
+		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
+		 netdev->dev_addr, io_queue_num);
+
+	set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
+
+	adapters_found++;
+
+	return 0;
+
+err_rss:
+	ena_com_delete_debug_area(ena_dev);
+	ena_com_rss_destroy(ena_dev);
+err_free_msix:
+	ena_com_dev_reset(ena_dev);
+	ena_free_mgmnt_irq(adapter);
+	ena_disable_msix(adapter);
+err_worker_destroy:
+	ena_com_destroy_interrupt_moderation(ena_dev);
+	del_timer(&adapter->timer_service);
+	cancel_work_sync(&adapter->suspend_io_task);
+	cancel_work_sync(&adapter->resume_io_task);
+err_netdev_destroy:
+	free_netdev(netdev);
+err_device_destroy:
+	ena_com_delete_host_info(ena_dev);
+	ena_com_admin_destroy(ena_dev);
+err_free_region:
+	ena_release_bars(ena_dev, pdev);
+err_free_ena_dev:
+	vfree(ena_dev);
+err_disable_device:
+	pci_disable_device(pdev);
+	return rc;
+}
+
+/*****************************************************************************/
+static int ena_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+	int rc;
+
+	if (numvfs > 0) {
+		rc = pci_enable_sriov(dev, numvfs);
+		if (rc != 0) {
+			dev_err(&dev->dev,
+				"pci_enable_sriov failed to enable: %d vfs with the error: %d\n",
+				numvfs, rc);
+			return rc;
+		}
+
+		return numvfs;
+	}
+
+	if (numvfs == 0) {
+		pci_disable_sriov(dev);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/* ena_remove - Device Removal Routine
+ * @pdev: PCI device information struct
+ *
+ * ena_remove is called by the PCI subsystem to alert the driver
+ * that it should release a PCI device.
+ */
+static void ena_remove(struct pci_dev *pdev)
+{
+	struct ena_adapter *adapter = pci_get_drvdata(pdev);
+	struct ena_com_dev *ena_dev;
+	struct net_device *netdev;
+
+	if (!adapter)
+		/* This device didn't load properly and it's resources
+		 * already released, nothing to do
+		 */
+		return;
+
+	ena_dev = adapter->ena_dev;
+	netdev = adapter->netdev;
+
+#ifdef CONFIG_RFS_ACCEL
+	if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) {
+		free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+		netdev->rx_cpu_rmap = NULL;
+	}
+#endif /* CONFIG_RFS_ACCEL */
+
+	unregister_netdev(netdev);
+	del_timer_sync(&adapter->timer_service);
+
+	cancel_work_sync(&adapter->reset_task);
+
+	cancel_work_sync(&adapter->suspend_io_task);
+
+	cancel_work_sync(&adapter->resume_io_task);
+
+	ena_com_dev_reset(ena_dev);
+
+	ena_free_mgmnt_irq(adapter);
+
+	ena_disable_msix(adapter);
+
+	free_netdev(netdev);
+
+	ena_com_mmio_reg_read_request_destroy(ena_dev);
+
+	ena_com_abort_admin_commands(ena_dev);
+
+	ena_com_wait_for_abort_completion(ena_dev);
+
+	ena_com_admin_destroy(ena_dev);
+
+	ena_com_rss_destroy(ena_dev);
+
+	ena_com_delete_debug_area(ena_dev);
+
+	ena_com_delete_host_info(ena_dev);
+
+	ena_release_bars(ena_dev, pdev);
+
+	pci_disable_device(pdev);
+
+	ena_com_destroy_interrupt_moderation(ena_dev);
+
+	vfree(ena_dev);
+}
+
+static struct pci_driver ena_pci_driver = {
+	.name		= DRV_MODULE_NAME,
+	.id_table	= ena_pci_tbl,
+	.probe		= ena_probe,
+	.remove		= ena_remove,
+	.sriov_configure = ena_sriov_configure,
+};
+
+static int __init ena_init(void)
+{
+	pr_info("%s", version);
+
+	ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
+	if (!ena_wq) {
+		pr_err("Failed to create workqueue\n");
+		return -ENOMEM;
+	}
+
+	return pci_register_driver(&ena_pci_driver);
+}
+
+static void __exit ena_cleanup(void)
+{
+	pci_unregister_driver(&ena_pci_driver);
+
+	if (ena_wq) {
+		destroy_workqueue(ena_wq);
+		ena_wq = NULL;
+	}
+}
+
+/******************************************************************************
+ ******************************** AENQ Handlers *******************************
+ *****************************************************************************/
+/* ena_update_on_link_change:
+ * Notify the network interface about the change in link status
+ */
+static void ena_update_on_link_change(void *adapter_data,
+				      struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+	struct ena_admin_aenq_link_change_desc *aenq_desc =
+		(struct ena_admin_aenq_link_change_desc *)aenq_e;
+	int status = aenq_desc->flags &
+		ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
+
+	if (status) {
+		netdev_dbg(adapter->netdev, "%s\n", __func__);
+		set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
+		netif_carrier_on(adapter->netdev);
+	} else {
+		clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
+		netif_carrier_off(adapter->netdev);
+	}
+}
+
+static void ena_keep_alive_wd(void *adapter_data,
+			      struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+	adapter->last_keep_alive_jiffies = jiffies;
+}
+
+static void ena_notification(void *adapter_data,
+			     struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
+
+	WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
+	     "Invalid group(%x) expected %x\n",
+	     aenq_e->aenq_common_desc.group,
+	     ENA_ADMIN_NOTIFICATION);
+
+	switch (aenq_e->aenq_common_desc.syndrom) {
+	case ENA_ADMIN_SUSPEND:
+		/* Suspend just the IO queues.
+		 * We deliberately don't suspend admin so the timer and
+		 * the keep_alive events should remain.
+		 */
+		queue_work(ena_wq, &adapter->suspend_io_task);
+		break;
+	case ENA_ADMIN_RESUME:
+		queue_work(ena_wq, &adapter->resume_io_task);
+		break;
+	default:
+		netif_err(adapter, drv, adapter->netdev,
+			  "Invalid aenq notification link state %d\n",
+			  aenq_e->aenq_common_desc.syndrom);
+	}
+}
+
+/* This handler will called for unknown event group or unimplemented handlers*/
+static void unimplemented_aenq_handler(void *data,
+				       struct ena_admin_aenq_entry *aenq_e)
+{
+	struct ena_adapter *adapter = (struct ena_adapter *)data;
+
+	netif_err(adapter, drv, adapter->netdev,
+		  "Unknown event was received or event with unimplemented handler\n");
+}
+
+static struct ena_aenq_handlers aenq_handlers = {
+	.handlers = {
+		[ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
+		[ENA_ADMIN_NOTIFICATION] = ena_notification,
+		[ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
+	},
+	.unimplemented_handler = unimplemented_aenq_handler
+};
+
+module_init(ena_init);
+module_exit(ena_cleanup);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
new file mode 100644
index 0000000..69d7e9e
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -0,0 +1,324 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_H
+#define ENA_H
+
+#include <linux/bitops.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+
+#include "ena_com.h"
+#include "ena_eth_com.h"
+
+#define DRV_MODULE_VER_MAJOR	1
+#define DRV_MODULE_VER_MINOR	0
+#define DRV_MODULE_VER_SUBMINOR 2
+
+#define DRV_MODULE_NAME		"ena"
+#ifndef DRV_MODULE_VERSION
+#define DRV_MODULE_VERSION \
+	__stringify(DRV_MODULE_VER_MAJOR) "."	\
+	__stringify(DRV_MODULE_VER_MINOR) "."	\
+	__stringify(DRV_MODULE_VER_SUBMINOR)
+#endif
+
+#define DEVICE_NAME	"Elastic Network Adapter (ENA)"
+
+/* 1 for AENQ + ADMIN */
+#define ENA_MAX_MSIX_VEC(io_queues)	(1 + (io_queues))
+
+#define ENA_REG_BAR			0
+#define ENA_MEM_BAR			2
+#define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR))
+
+#define ENA_DEFAULT_RING_SIZE	(1024)
+
+#define ENA_TX_WAKEUP_THRESH		(MAX_SKB_FRAGS + 2)
+#define ENA_DEFAULT_RX_COPYBREAK	(128 - NET_IP_ALIGN)
+
+/* limit the buffer size to 600 bytes to handle MTU changes from very
+ * small to very large, in which case the number of buffers per packet
+ * could exceed ENA_PKT_MAX_BUFS
+ */
+#define ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE 600
+
+#define ENA_MIN_MTU		128
+
+#define ENA_NAME_MAX_LEN	20
+#define ENA_IRQNAME_SIZE	40
+
+#define ENA_PKT_MAX_BUFS	19
+
+#define ENA_RX_RSS_TABLE_LOG_SIZE  7
+#define ENA_RX_RSS_TABLE_SIZE	(1 << ENA_RX_RSS_TABLE_LOG_SIZE)
+
+#define ENA_HASH_KEY_SIZE	40
+
+/* The number of tx packet completions that will be handled each NAPI poll
+ * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER.
+ */
+#define ENA_TX_POLL_BUDGET_DIVIDER	4
+
+/* Refill Rx queue when number of available descriptors is below
+ * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER
+ */
+#define ENA_RX_REFILL_THRESH_DIVIDER	8
+
+/* Number of queues to check for missing queues per timer service */
+#define ENA_MONITORED_TX_QUEUES	4
+/* Max timeout packets before device reset */
+#define MAX_NUM_OF_TIMEOUTED_PACKETS 32
+
+#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
+
+#define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
+#define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \
+	(((idx) + (n)) & ((ring_size) - 1))
+
+#define ENA_IO_TXQ_IDX(q)	(2 * (q))
+#define ENA_IO_RXQ_IDX(q)	(2 * (q) + 1)
+
+#define ENA_MGMNT_IRQ_IDX		0
+#define ENA_IO_IRQ_FIRST_IDX		1
+#define ENA_IO_IRQ_IDX(q)		(ENA_IO_IRQ_FIRST_IDX + (q))
+
+/* ENA device should send keep alive msg every 1 sec.
+ * We wait for 3 sec just to be on the safe side.
+ */
+#define ENA_DEVICE_KALIVE_TIMEOUT	(3 * HZ)
+
+#define ENA_MMIO_DISABLE_REG_READ	BIT(0)
+
+struct ena_irq {
+	irq_handler_t handler;
+	void *data;
+	int cpu;
+	u32 vector;
+	cpumask_t affinity_hint_mask;
+	char name[ENA_IRQNAME_SIZE];
+};
+
+struct ena_napi {
+	struct napi_struct napi ____cacheline_aligned;
+	struct ena_ring *tx_ring;
+	struct ena_ring *rx_ring;
+	u32 qid;
+};
+
+struct ena_tx_buffer {
+	struct sk_buff *skb;
+	/* num of ena desc for this specific skb
+	 * (includes data desc and metadata desc)
+	 */
+	u32 tx_descs;
+	/* num of buffers used by this skb */
+	u32 num_of_bufs;
+	/* Save the last jiffies to detect missing tx packets */
+	unsigned long last_jiffies;
+	struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
+} ____cacheline_aligned;
+
+struct ena_rx_buffer {
+	struct sk_buff *skb;
+	struct page *page;
+	u32 page_offset;
+	struct ena_com_buf ena_buf;
+} ____cacheline_aligned;
+
+struct ena_stats_tx {
+	u64 cnt;
+	u64 bytes;
+	u64 queue_stop;
+	u64 prepare_ctx_err;
+	u64 queue_wakeup;
+	u64 dma_mapping_err;
+	u64 linearize;
+	u64 linearize_failed;
+	u64 napi_comp;
+	u64 tx_poll;
+	u64 doorbells;
+	u64 missing_tx_comp;
+	u64 bad_req_id;
+};
+
+struct ena_stats_rx {
+	u64 cnt;
+	u64 bytes;
+	u64 refil_partial;
+	u64 bad_csum;
+	u64 page_alloc_fail;
+	u64 skb_alloc_fail;
+	u64 dma_mapping_err;
+	u64 bad_desc_num;
+	u64 rx_copybreak_pkt;
+};
+
+struct ena_ring {
+	/* Holds the empty requests for TX out of order completions */
+	u16 *free_tx_ids;
+	union {
+		struct ena_tx_buffer *tx_buffer_info;
+		struct ena_rx_buffer *rx_buffer_info;
+	};
+
+	/* cache ptr to avoid using the adapter */
+	struct device *dev;
+	struct pci_dev *pdev;
+	struct napi_struct *napi;
+	struct net_device *netdev;
+	struct ena_com_dev *ena_dev;
+	struct ena_adapter *adapter;
+	struct ena_com_io_cq *ena_com_io_cq;
+	struct ena_com_io_sq *ena_com_io_sq;
+
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 rx_copybreak;
+	u16 qid;
+	u16 mtu;
+	u16 sgl_size;
+
+	/* The maximum header length the device can handle */
+	u8 tx_max_header_size;
+
+	/* cpu for TPH */
+	int cpu;
+	 /* number of tx/rx_buffer_info's entries */
+	int ring_size;
+
+	enum ena_admin_placement_policy_type tx_mem_queue_type;
+
+	struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS];
+	u32  smoothed_interval;
+	u32  per_napi_packets;
+	u32  per_napi_bytes;
+	enum ena_intr_moder_level moder_tbl_idx;
+	struct u64_stats_sync syncp;
+	union {
+		struct ena_stats_tx tx_stats;
+		struct ena_stats_rx rx_stats;
+	};
+} ____cacheline_aligned;
+
+struct ena_stats_dev {
+	u64 tx_timeout;
+	u64 io_suspend;
+	u64 io_resume;
+	u64 wd_expired;
+	u64 interface_up;
+	u64 interface_down;
+	u64 admin_q_pause;
+};
+
+enum ena_flags_t {
+	ENA_FLAG_DEVICE_RUNNING,
+	ENA_FLAG_DEV_UP,
+	ENA_FLAG_LINK_UP,
+	ENA_FLAG_MSIX_ENABLED,
+	ENA_FLAG_TRIGGER_RESET
+};
+
+/* adapter specific private data structure */
+struct ena_adapter {
+	struct ena_com_dev *ena_dev;
+	/* OS defined structs */
+	struct net_device *netdev;
+	struct pci_dev *pdev;
+
+	/* rx packets that shorter that this len will be copied to the skb
+	 * header
+	 */
+	u32 rx_copybreak;
+	u32 max_mtu;
+
+	int num_queues;
+
+	struct msix_entry *msix_entries;
+	int msix_vecs;
+
+	u32 tx_usecs, rx_usecs; /* interrupt moderation */
+	u32 tx_frames, rx_frames; /* interrupt moderation */
+
+	u32 tx_ring_size;
+	u32 rx_ring_size;
+
+	u32 msg_enable;
+
+	u16 max_tx_sgl_size;
+	u16 max_rx_sgl_size;
+
+	u8 mac_addr[ETH_ALEN];
+
+	char name[ENA_NAME_MAX_LEN];
+
+	unsigned long flags;
+	/* TX */
+	struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES]
+		____cacheline_aligned_in_smp;
+
+	/* RX */
+	struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES]
+		____cacheline_aligned_in_smp;
+
+	struct ena_napi ena_napi[ENA_MAX_NUM_IO_QUEUES];
+
+	struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)];
+
+	/* timer service */
+	struct work_struct reset_task;
+	struct work_struct suspend_io_task;
+	struct work_struct resume_io_task;
+	struct timer_list timer_service;
+
+	bool wd_state;
+	unsigned long last_keep_alive_jiffies;
+
+	struct u64_stats_sync syncp;
+	struct ena_stats_dev dev_stats;
+
+	/* last queue index that was checked for uncompleted tx packets */
+	u32 last_monitored_tx_qid;
+};
+
+void ena_set_ethtool_ops(struct net_device *netdev);
+
+void ena_dump_stats_to_dmesg(struct ena_adapter *adapter);
+
+void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
+
+int ena_get_sset_count(struct net_device *netdev, int sset);
+
+#endif /* !(ENA_H) */
diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
new file mode 100644
index 0000000..f80d2a4
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2015 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ENA_PCI_ID_TBL_H_
+#define ENA_PCI_ID_TBL_H_
+
+#ifndef PCI_VENDOR_ID_AMAZON
+#define PCI_VENDOR_ID_AMAZON 0x1d0f
+#endif
+
+#ifndef PCI_DEV_ID_ENA_PF
+#define PCI_DEV_ID_ENA_PF	0x0ec2
+#endif
+
+#ifndef PCI_DEV_ID_ENA_LLQ_PF
+#define PCI_DEV_ID_ENA_LLQ_PF	0x1ec2
+#endif
+
+#ifndef PCI_DEV_ID_ENA_VF
+#define PCI_DEV_ID_ENA_VF	0xec20
+#endif
+
+#ifndef PCI_DEV_ID_ENA_LLQ_VF
+#define PCI_DEV_ID_ENA_LLQ_VF	0xec21
+#endif
+
+#define ENA_PCI_ID_TABLE_ENTRY(devid) \
+	{PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)},
+
+static const struct pci_device_id ena_pci_tbl[] = {
+	ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF)
+	ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF)
+	ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF)
+	ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_VF)
+	{ }
+};
+
+#endif /* ENA_PCI_ID_TBL_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
new file mode 100644
index 0000000..26097a2
--- /dev/null
+++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _ENA_REGS_H_
+#define _ENA_REGS_H_
+
+/* ena_registers offsets */
+#define ENA_REGS_VERSION_OFF		0x0
+#define ENA_REGS_CONTROLLER_VERSION_OFF		0x4
+#define ENA_REGS_CAPS_OFF		0x8
+#define ENA_REGS_CAPS_EXT_OFF		0xc
+#define ENA_REGS_AQ_BASE_LO_OFF		0x10
+#define ENA_REGS_AQ_BASE_HI_OFF		0x14
+#define ENA_REGS_AQ_CAPS_OFF		0x18
+#define ENA_REGS_ACQ_BASE_LO_OFF		0x20
+#define ENA_REGS_ACQ_BASE_HI_OFF		0x24
+#define ENA_REGS_ACQ_CAPS_OFF		0x28
+#define ENA_REGS_AQ_DB_OFF		0x2c
+#define ENA_REGS_ACQ_TAIL_OFF		0x30
+#define ENA_REGS_AENQ_CAPS_OFF		0x34
+#define ENA_REGS_AENQ_BASE_LO_OFF		0x38
+#define ENA_REGS_AENQ_BASE_HI_OFF		0x3c
+#define ENA_REGS_AENQ_HEAD_DB_OFF		0x40
+#define ENA_REGS_AENQ_TAIL_OFF		0x44
+#define ENA_REGS_INTR_MASK_OFF		0x4c
+#define ENA_REGS_DEV_CTL_OFF		0x54
+#define ENA_REGS_DEV_STS_OFF		0x58
+#define ENA_REGS_MMIO_REG_READ_OFF		0x5c
+#define ENA_REGS_MMIO_RESP_LO_OFF		0x60
+#define ENA_REGS_MMIO_RESP_HI_OFF		0x64
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF		0x68
+
+/* version register */
+#define ENA_REGS_VERSION_MINOR_VERSION_MASK		0xff
+#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT		8
+#define ENA_REGS_VERSION_MAJOR_VERSION_MASK		0xff00
+
+/* controller_version register */
+#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK		0xff
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT		8
+#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK		0xff00
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT		16
+#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK		0xff0000
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT		24
+#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK		0xff000000
+
+/* caps register */
+#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK		0x1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT		1
+#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK		0x3e
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT		8
+#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK		0xff00
+
+/* aq_caps register */
+#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK		0xffff
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT		16
+#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK		0xffff0000
+
+/* acq_caps register */
+#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK		0xffff
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT		16
+#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK		0xffff0000
+
+/* aenq_caps register */
+#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK		0xffff
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT		16
+#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK		0xffff0000
+
+/* dev_ctl register */
+#define ENA_REGS_DEV_CTL_DEV_RESET_MASK		0x1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT		1
+#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK		0x2
+#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT		2
+#define ENA_REGS_DEV_CTL_QUIESCENT_MASK		0x4
+#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT		3
+#define ENA_REGS_DEV_CTL_IO_RESUME_MASK		0x8
+
+/* dev_sts register */
+#define ENA_REGS_DEV_STS_READY_MASK		0x1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT		1
+#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK		0x2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT		2
+#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK		0x4
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT		3
+#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK		0x8
+#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT		4
+#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK		0x10
+#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT		5
+#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK		0x20
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT		6
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK		0x40
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT		7
+#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK		0x80
+
+/* mmio_reg_read register */
+#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK		0xffff
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT		16
+#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK		0xffff0000
+
+/* rss_ind_entry_update register */
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK		0xffff
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT		16
+#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK		0xffff0000
+
+#endif /*_ENA_REGS_H_ */
diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c
index dcf2a1f..dc57f27 100644
--- a/drivers/net/ethernet/amd/7990.c
+++ b/drivers/net/ethernet/amd/7990.c
@@ -45,14 +45,14 @@
 #define WRITERDP(lp, x)	out_be16(lp->base + LANCE_RDP, (x))
 #define READRDP(lp)	in_be16(lp->base + LANCE_RDP)
 
-#if defined(CONFIG_HPLANCE) || defined(CONFIG_HPLANCE_MODULE)
+#if IS_ENABLED(CONFIG_HPLANCE)
 #include "hplance.h"
 
 #undef WRITERAP
 #undef WRITERDP
 #undef READRDP
 
-#if defined(CONFIG_MVME147_NET) || defined(CONFIG_MVME147_NET_MODULE)
+#if IS_ENABLED(CONFIG_MVME147_NET)
 
 /* Lossage Factor Nine, Mr Sulu. */
 #define WRITERAP(lp, x)	(lp->writerap(lp, x))
@@ -86,7 +86,7 @@
 }
 
 #endif
-#endif /* CONFIG_HPLANCE || CONFIG_HPLANCE_MODULE */
+#endif /* IS_ENABLED(CONFIG_HPLANCE) */
 
 /* debugging output macros, various flavours */
 /* #define TEST_HITS */
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 94960055..f92cc971 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -89,7 +89,7 @@
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define AMD8111E_VLAN_TAG_USED 1
 #else
 #define AMD8111E_VLAN_TAG_USED 0
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index a9b2709..7f9216d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1708,9 +1708,9 @@
 	.ndo_set_features	= xgbe_set_features,
 };
 
-struct net_device_ops *xgbe_get_netdev_ops(void)
+const struct net_device_ops *xgbe_get_netdev_ops(void)
 {
-	return (struct net_device_ops *)&xgbe_netdev_ops;
+	return &xgbe_netdev_ops;
 }
 
 static void xgbe_rx_refresh(struct xgbe_channel *channel)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 11d9f0c..4007b42 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -623,7 +623,7 @@
 	.get_ts_info = xgbe_get_ts_info,
 };
 
-struct ethtool_ops *xgbe_get_ethtool_ops(void)
+const struct ethtool_ops *xgbe_get_ethtool_ops(void)
 {
-	return (struct ethtool_ops *)&xgbe_ethtool_ops;
+	return &xgbe_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 3eee320..9de0788 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -861,9 +861,15 @@
 	pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
 	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
 
-	if (netif_running(netdev))
+	if (netif_running(netdev)) {
 		ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
 
+		/* Schedule a restart in case the link or phy state changed
+		 * while we were powered down.
+		 */
+		schedule_work(&pdata->restart_work);
+	}
+
 	DBGPR("<--xgbe_resume\n");
 
 	return ret;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 98d9d63..5dd17dc 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -956,8 +956,9 @@
 void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *);
 void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *);
 void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *);
-struct net_device_ops *xgbe_get_netdev_ops(void);
-struct ethtool_ops *xgbe_get_ethtool_ops(void);
+const struct net_device_ops *xgbe_get_netdev_ops(void);
+const struct ethtool_ops *xgbe_get_ethtool_ops(void);
+
 #ifdef CONFIG_AMD_XGBE_DCB
 const struct dcbnl_rtnl_ops *xgbe_get_dcbnl_ops(void);
 #endif
diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig
index 300e3b5..afccb03 100644
--- a/drivers/net/ethernet/apm/xgene/Kconfig
+++ b/drivers/net/ethernet/apm/xgene/Kconfig
@@ -4,6 +4,7 @@
 	depends on ARCH_XGENE || COMPILE_TEST
 	select PHYLIB
 	select MDIO_XGENE
+	select GPIOLIB
 	help
 	  This is the Ethernet driver for the on-chip ethernet interface on the
 	  APM X-Gene SoC.
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
index 472c0fb..23d72af 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
@@ -32,12 +32,19 @@
 		SET_VAL(SB_HDRLEN, len);
 }
 
-static void xgene_cle_idt_to_hw(u32 dstqid, u32 fpsel,
+static void xgene_cle_idt_to_hw(struct xgene_enet_pdata *pdata,
+				u32 dstqid, u32 fpsel,
 				u32 nfpsel, u32 *idt_reg)
 {
-	*idt_reg =  SET_VAL(IDT_DSTQID, dstqid) |
-		    SET_VAL(IDT_FPSEL, fpsel) |
-		    SET_VAL(IDT_NFPSEL, nfpsel);
+	if (pdata->enet_id == XGENE_ENET1) {
+		*idt_reg = SET_VAL(IDT_DSTQID, dstqid) |
+			   SET_VAL(IDT_FPSEL1, fpsel)  |
+			   SET_VAL(IDT_NFPSEL1, nfpsel);
+	} else {
+		*idt_reg = SET_VAL(IDT_DSTQID, dstqid) |
+			   SET_VAL(IDT_FPSEL, fpsel)   |
+			   SET_VAL(IDT_NFPSEL, nfpsel);
+	}
 }
 
 static void xgene_cle_dbptr_to_hw(struct xgene_enet_pdata *pdata,
@@ -344,7 +351,7 @@
 		nfpsel = 0;
 		idt_reg = 0;
 
-		xgene_cle_idt_to_hw(dstqid, fpsel, nfpsel, &idt_reg);
+		xgene_cle_idt_to_hw(pdata, dstqid, fpsel, nfpsel, &idt_reg);
 		ret = xgene_cle_dram_wr(&pdata->cle, &idt_reg, 1, i,
 					RSS_IDT, CLE_CMD_WR);
 		if (ret)
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
index 33c5f6b..9ac9f8e 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
@@ -196,9 +196,13 @@
 #define IDT_DSTQID_POS		0
 #define IDT_DSTQID_LEN		12
 #define IDT_FPSEL_POS		12
-#define IDT_FPSEL_LEN		4
-#define IDT_NFPSEL_POS		16
-#define IDT_NFPSEL_LEN		4
+#define IDT_FPSEL_LEN		5
+#define IDT_NFPSEL_POS		17
+#define IDT_NFPSEL_LEN		5
+#define IDT_FPSEL1_POS		12
+#define IDT_FPSEL1_LEN		4
+#define IDT_NFPSEL1_POS		16
+#define IDT_NFPSEL1_LEN		4
 
 struct xgene_cle_ptree_branch {
 	bool valid;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
index 22a7b26..d372d42 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
@@ -54,55 +54,68 @@
 	sprintf(info->bus_info, "%s", pdev->name);
 }
 
-static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
+static int xgene_get_link_ksettings(struct net_device *ndev,
+				    struct ethtool_link_ksettings *cmd)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
-	struct phy_device *phydev = pdata->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
+	u32 supported;
 
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
 		if (phydev == NULL)
 			return -ENODEV;
 
-		return phy_ethtool_gset(phydev, cmd);
+		return phy_ethtool_ksettings_get(phydev, cmd);
 	} else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
 		if (pdata->mdio_driver) {
 			if (!phydev)
 				return -ENODEV;
 
-			return phy_ethtool_gset(phydev, cmd);
+			return phy_ethtool_ksettings_get(phydev, cmd);
 		}
 
-		cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
-				 SUPPORTED_MII;
-		cmd->advertising = cmd->supported;
-		ethtool_cmd_speed_set(cmd, SPEED_1000);
-		cmd->duplex = DUPLEX_FULL;
-		cmd->port = PORT_MII;
-		cmd->transceiver = XCVR_INTERNAL;
-		cmd->autoneg = AUTONEG_ENABLE;
+		supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
+			SUPPORTED_MII;
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.supported,
+			supported);
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.advertising,
+			supported);
+
+		cmd->base.speed = SPEED_1000;
+		cmd->base.duplex = DUPLEX_FULL;
+		cmd->base.port = PORT_MII;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 	} else {
-		cmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE;
-		cmd->advertising = cmd->supported;
-		ethtool_cmd_speed_set(cmd, SPEED_10000);
-		cmd->duplex = DUPLEX_FULL;
-		cmd->port = PORT_FIBRE;
-		cmd->transceiver = XCVR_INTERNAL;
-		cmd->autoneg = AUTONEG_DISABLE;
+		supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE;
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.supported,
+			supported);
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.advertising,
+			supported);
+
+		cmd->base.speed = SPEED_10000;
+		cmd->base.duplex = DUPLEX_FULL;
+		cmd->base.port = PORT_FIBRE;
+		cmd->base.autoneg = AUTONEG_DISABLE;
 	}
 
 	return 0;
 }
 
-static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd)
+static int xgene_set_link_ksettings(struct net_device *ndev,
+				    const struct ethtool_link_ksettings *cmd)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
-	struct phy_device *phydev = pdata->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) {
 		if (!phydev)
 			return -ENODEV;
 
-		return phy_ethtool_sset(phydev, cmd);
+		return phy_ethtool_ksettings_set(phydev, cmd);
 	}
 
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
@@ -110,7 +123,7 @@
 			if (!phydev)
 				return -ENODEV;
 
-			return phy_ethtool_sset(phydev, cmd);
+			return phy_ethtool_ksettings_set(phydev, cmd);
 		}
 	}
 
@@ -152,12 +165,12 @@
 
 static const struct ethtool_ops xgene_ethtool_ops = {
 	.get_drvinfo = xgene_get_drvinfo,
-	.get_settings = xgene_get_settings,
-	.set_settings = xgene_set_settings,
 	.get_link = ethtool_op_get_link,
 	.get_strings = xgene_get_strings,
 	.get_sset_count = xgene_get_sset_count,
-	.get_ethtool_stats = xgene_get_ethtool_stats
+	.get_ethtool_stats = xgene_get_ethtool_stats,
+	.get_link_ksettings = xgene_get_link_ksettings,
+	.set_link_ksettings = xgene_set_link_ksettings,
 };
 
 void xgene_enet_set_ethtool_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 37a0f46..c481f10 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -713,7 +713,7 @@
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
 	const struct xgene_mac_ops *mac_ops = pdata->mac_ops;
-	struct phy_device *phydev = pdata->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (phydev->link) {
 		if (pdata->phy_speed != phydev->speed) {
@@ -761,31 +761,25 @@
 	if (dev->of_node) {
 		for (i = 0 ; i < 2; i++) {
 			np = of_parse_phandle(dev->of_node, "phy-handle", i);
-			if (np)
+			phy_dev = of_phy_connect(ndev, np,
+						 &xgene_enet_adjust_link,
+						 0, pdata->phy_mode);
+			of_node_put(np);
+			if (phy_dev)
 				break;
 		}
 
-		if (!np) {
-			netdev_dbg(ndev, "No phy-handle found in DT\n");
-			return -ENODEV;
-		}
-
-		phy_dev = of_phy_connect(ndev, np, &xgene_enet_adjust_link,
-					 0, pdata->phy_mode);
-		of_node_put(np);
 		if (!phy_dev) {
 			netdev_err(ndev, "Could not connect to PHY\n");
 			return -ENODEV;
 		}
-
-		pdata->phy_dev = phy_dev;
 	} else {
 #ifdef CONFIG_ACPI
 		struct acpi_device *adev = acpi_phy_find_device(dev);
 		if (adev)
-			pdata->phy_dev =  adev->driver_data;
-
-		phy_dev = pdata->phy_dev;
+			phy_dev = adev->driver_data;
+		else
+			phy_dev = NULL;
 
 		if (!phy_dev ||
 		    phy_connect_direct(ndev, phy_dev, &xgene_enet_adjust_link,
@@ -793,6 +787,8 @@
 			netdev_err(ndev, "Could not connect to PHY\n");
 			return  -ENODEV;
 		}
+#else
+		return -ENODEV;
 #endif
 	}
 
@@ -851,8 +847,6 @@
 	if (!phy)
 		return -EIO;
 
-	pdata->phy_dev = phy;
-
 	return ret;
 }
 
@@ -892,14 +886,18 @@
 
 void xgene_enet_phy_disconnect(struct xgene_enet_pdata *pdata)
 {
-	if (pdata->phy_dev)
-		phy_disconnect(pdata->phy_dev);
+	struct net_device *ndev = pdata->ndev;
+
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
 }
 
 void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata)
 {
-	if (pdata->phy_dev)
-		phy_disconnect(pdata->phy_dev);
+	struct net_device *ndev = pdata->ndev;
+
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
 
 	mdiobus_unregister(pdata->mdio_bus);
 	mdiobus_free(pdata->mdio_bus);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index 179a44d..8456337 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
@@ -124,6 +124,12 @@
 #define MAC_READ_REG_OFFSET		0x0c
 #define MAC_COMMAND_DONE_REG_OFFSET	0x10
 
+#define PCS_ADDR_REG_OFFSET		0x00
+#define PCS_COMMAND_REG_OFFSET		0x04
+#define PCS_WRITE_REG_OFFSET		0x08
+#define PCS_READ_REG_OFFSET		0x0c
+#define PCS_COMMAND_DONE_REG_OFFSET	0x10
+
 #define MII_MGMT_CONFIG_ADDR		0x20
 #define MII_MGMT_COMMAND_ADDR		0x24
 #define MII_MGMT_ADDRESS_ADDR		0x28
@@ -231,6 +237,8 @@
 #define TCPHDR_LEN			6
 #define IPHDR_POS			6
 #define IPHDR_LEN			6
+#define MSS_POS				20
+#define MSS_LEN				2
 #define EC_POS				22	/* Enable checksum */
 #define EC_LEN				1
 #define ET_POS				23	/* Enable TSO */
@@ -247,6 +255,11 @@
 
 #define LAST_BUFFER			(0x7800ULL << BUFDATALEN_POS)
 
+#define TSO_MSS0_POS			0
+#define TSO_MSS0_LEN			14
+#define TSO_MSS1_POS			16
+#define TSO_MSS1_LEN			14
+
 struct xgene_enet_raw_desc {
 	__le64 m0;
 	__le64 m1;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index d1d6b5e..429f18f 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -19,6 +19,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/gpio.h>
 #include "xgene_enet_main.h"
 #include "xgene_enet_hw.h"
 #include "xgene_enet_sgmac.h"
@@ -72,7 +73,6 @@
 		skb = netdev_alloc_skb_ip_align(ndev, len);
 		if (unlikely(!skb))
 			return -ENOMEM;
-		buf_pool->rx_skb[tail] = skb;
 
 		dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE);
 		if (dma_mapping_error(dev, dma_addr)) {
@@ -81,6 +81,8 @@
 			return -EINVAL;
 		}
 
+		buf_pool->rx_skb[tail] = skb;
+
 		raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) |
 					   SET_VAL(BUFDATALEN, bufdatalen) |
 					   SET_BIT(COHERENT));
@@ -102,12 +104,21 @@
 
 static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool)
 {
+	struct device *dev = ndev_to_dev(buf_pool->ndev);
+	struct xgene_enet_raw_desc16 *raw_desc;
+	dma_addr_t dma_addr;
 	int i;
 
 	/* Free up the buffers held by hardware */
 	for (i = 0; i < buf_pool->slots; i++) {
-		if (buf_pool->rx_skb[i])
+		if (buf_pool->rx_skb[i]) {
 			dev_kfree_skb_any(buf_pool->rx_skb[i]);
+
+			raw_desc = &buf_pool->raw_desc16[i];
+			dma_addr = GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1));
+			dma_unmap_single(dev, dma_addr, XGENE_ENET_MAX_MTU,
+					 DMA_FROM_DEVICE);
+		}
 	}
 }
 
@@ -126,6 +137,7 @@
 static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring,
 				    struct xgene_enet_raw_desc *raw_desc)
 {
+	struct xgene_enet_pdata *pdata = netdev_priv(cp_ring->ndev);
 	struct sk_buff *skb;
 	struct device *dev;
 	skb_frag_t *frag;
@@ -133,6 +145,7 @@
 	u16 skb_index;
 	u8 status;
 	int i, ret = 0;
+	u8 mss_index;
 
 	skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0));
 	skb = cp_ring->cp_skb[skb_index];
@@ -149,6 +162,13 @@
 			       DMA_TO_DEVICE);
 	}
 
+	if (GET_BIT(ET, le64_to_cpu(raw_desc->m3))) {
+		mss_index = GET_VAL(MSS, le64_to_cpu(raw_desc->m3));
+		spin_lock(&pdata->mss_lock);
+		pdata->mss_refcnt[mss_index]--;
+		spin_unlock(&pdata->mss_lock);
+	}
+
 	/* Checking for error */
 	status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0));
 	if (unlikely(status > 2)) {
@@ -167,15 +187,53 @@
 	return ret;
 }
 
-static u64 xgene_enet_work_msg(struct sk_buff *skb)
+static int xgene_enet_setup_mss(struct net_device *ndev, u32 mss)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	bool mss_index_found = false;
+	int mss_index;
+	int i;
+
+	spin_lock(&pdata->mss_lock);
+
+	/* Reuse the slot if MSS matches */
+	for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) {
+		if (pdata->mss[i] == mss) {
+			pdata->mss_refcnt[i]++;
+			mss_index = i;
+			mss_index_found = true;
+		}
+	}
+
+	/* Overwrite the slot with ref_count = 0 */
+	for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) {
+		if (!pdata->mss_refcnt[i]) {
+			pdata->mss_refcnt[i]++;
+			pdata->mac_ops->set_mss(pdata, mss, i);
+			pdata->mss[i] = mss;
+			mss_index = i;
+			mss_index_found = true;
+		}
+	}
+
+	spin_unlock(&pdata->mss_lock);
+
+	/* No slots with ref_count = 0 available, return busy */
+	if (!mss_index_found)
+		return -EBUSY;
+
+	return mss_index;
+}
+
+static int xgene_enet_work_msg(struct sk_buff *skb, u64 *hopinfo)
 {
 	struct net_device *ndev = skb->dev;
 	struct iphdr *iph;
 	u8 l3hlen = 0, l4hlen = 0;
 	u8 ethhdr, proto = 0, csum_enable = 0;
-	u64 hopinfo = 0;
 	u32 hdr_len, mss = 0;
 	u32 i, len, nr_frags;
+	int mss_index;
 
 	ethhdr = xgene_enet_hdr_len(skb->data);
 
@@ -215,7 +273,11 @@
 			if (!mss || ((skb->len - hdr_len) <= mss))
 				goto out;
 
-			hopinfo |= SET_BIT(ET);
+			mss_index = xgene_enet_setup_mss(ndev, mss);
+			if (unlikely(mss_index < 0))
+				return -EBUSY;
+
+			*hopinfo |= SET_BIT(ET) | SET_VAL(MSS, mss_index);
 		}
 	} else if (iph->protocol == IPPROTO_UDP) {
 		l4hlen = UDP_HDR_SIZE;
@@ -223,15 +285,15 @@
 	}
 out:
 	l3hlen = ip_hdrlen(skb) >> 2;
-	hopinfo |= SET_VAL(TCPHDR, l4hlen) |
-		  SET_VAL(IPHDR, l3hlen) |
-		  SET_VAL(ETHHDR, ethhdr) |
-		  SET_VAL(EC, csum_enable) |
-		  SET_VAL(IS, proto) |
-		  SET_BIT(IC) |
-		  SET_BIT(TYPE_ETH_WORK_MESSAGE);
+	*hopinfo |= SET_VAL(TCPHDR, l4hlen) |
+		    SET_VAL(IPHDR, l3hlen) |
+		    SET_VAL(ETHHDR, ethhdr) |
+		    SET_VAL(EC, csum_enable) |
+		    SET_VAL(IS, proto) |
+		    SET_BIT(IC) |
+		    SET_BIT(TYPE_ETH_WORK_MESSAGE);
 
-	return hopinfo;
+	return 0;
 }
 
 static u16 xgene_enet_encode_len(u16 len)
@@ -271,20 +333,22 @@
 	dma_addr_t dma_addr, pbuf_addr, *frag_dma_addr;
 	skb_frag_t *frag;
 	u16 tail = tx_ring->tail;
-	u64 hopinfo;
+	u64 hopinfo = 0;
 	u32 len, hw_len;
 	u8 ll = 0, nv = 0, idx = 0;
 	bool split = false;
 	u32 size, offset, ell_bytes = 0;
 	u32 i, fidx, nr_frags, count = 1;
+	int ret;
 
 	raw_desc = &tx_ring->raw_desc[tail];
 	tail = (tail + 1) & (tx_ring->slots - 1);
 	memset(raw_desc, 0, sizeof(struct xgene_enet_raw_desc));
 
-	hopinfo = xgene_enet_work_msg(skb);
-	if (!hopinfo)
-		return -EINVAL;
+	ret = xgene_enet_work_msg(skb, &hopinfo);
+	if (ret)
+		return ret;
+
 	raw_desc->m3 = cpu_to_le64(SET_VAL(HENQNUM, tx_ring->dst_ring_num) |
 				   hopinfo);
 
@@ -424,6 +488,9 @@
 		return NETDEV_TX_OK;
 
 	count = xgene_enet_setup_tx_desc(tx_ring, skb);
+	if (count == -EBUSY)
+		return NETDEV_TX_BUSY;
+
 	if (count <= 0) {
 		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
@@ -452,7 +519,6 @@
 			       struct xgene_enet_raw_desc *raw_desc)
 {
 	struct net_device *ndev;
-	struct xgene_enet_pdata *pdata;
 	struct device *dev;
 	struct xgene_enet_desc_ring *buf_pool;
 	u32 datalen, skb_index;
@@ -461,7 +527,6 @@
 	int ret = 0;
 
 	ndev = rx_ring->ndev;
-	pdata = netdev_priv(ndev);
 	dev = ndev_to_dev(rx_ring->ndev);
 	buf_pool = rx_ring->buf_pool;
 
@@ -739,8 +804,8 @@
 	if (ret)
 		return ret;
 
-	if (pdata->phy_dev) {
-		phy_start(pdata->phy_dev);
+	if (ndev->phydev) {
+		phy_start(ndev->phydev);
 	} else {
 		schedule_delayed_work(&pdata->link_work, PHY_POLL_LINK_OFF);
 		netif_carrier_off(ndev);
@@ -763,8 +828,8 @@
 	mac_ops->tx_disable(pdata);
 	mac_ops->rx_disable(pdata);
 
-	if (pdata->phy_dev)
-		phy_stop(pdata->phy_dev);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
 	else
 		cancel_delayed_work_sync(&pdata->link_work);
 
@@ -1312,6 +1377,18 @@
 	return 0;
 }
 
+static void xgene_enet_gpiod_get(struct xgene_enet_pdata *pdata)
+{
+	struct device *dev = &pdata->pdev->dev;
+
+	if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII)
+		return;
+
+	pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN);
+	if (IS_ERR(pdata->sfp_rdy))
+		pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN);
+}
+
 static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 {
 	struct platform_device *pdev;
@@ -1401,6 +1478,8 @@
 	if (ret)
 		return ret;
 
+	xgene_enet_gpiod_get(pdata);
+
 	pdata->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pdata->clk)) {
 		/* Firmware may have set up the clock already. */
@@ -1425,6 +1504,7 @@
 	} else {
 		pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET;
 		pdata->mcx_mac_csr_addr = base_addr + BLOCK_AXG_MAC_CSR_OFFSET;
+		pdata->pcs_addr = base_addr + BLOCK_PCS_OFFSET;
 	}
 	pdata->rx_buff_cnt = NUM_PKT_BUF;
 
@@ -1454,10 +1534,8 @@
 		buf_pool = pdata->rx_ring[i]->buf_pool;
 		xgene_enet_init_bufpool(buf_pool);
 		ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt);
-		if (ret) {
-			xgene_enet_delete_desc_rings(pdata);
-			return ret;
-		}
+		if (ret)
+			goto err;
 	}
 
 	dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring[0]);
@@ -1474,7 +1552,7 @@
 		ret = pdata->cle_ops->cle_init(pdata);
 		if (ret) {
 			netdev_err(ndev, "Preclass Tree init error\n");
-			return ret;
+			goto err;
 		}
 	} else {
 		pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id);
@@ -1484,6 +1562,10 @@
 	pdata->mac_ops->init(pdata);
 
 	return ret;
+
+err:
+	xgene_enet_delete_desc_rings(pdata);
+	return ret;
 }
 
 static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata)
@@ -1631,8 +1713,8 @@
 	}
 #endif
 	if (!pdata->enet_id) {
-		free_netdev(ndev);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto err;
 	}
 
 	ret = xgene_enet_get_resources(pdata);
@@ -1643,7 +1725,7 @@
 
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
 		ndev->features |= NETIF_F_TSO;
-		pdata->mss = XGENE_ENET_MSS;
+		spin_lock_init(&pdata->mss_lock);
 	}
 	ndev->hw_features = ndev->features;
 
@@ -1655,7 +1737,7 @@
 
 	ret = xgene_enet_init_hw(pdata);
 	if (ret)
-		goto err_netdev;
+		goto err;
 
 	link_state = pdata->mac_ops->link_state;
 	if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
@@ -1665,21 +1747,32 @@
 			ret = xgene_enet_mdio_config(pdata);
 		else
 			INIT_DELAYED_WORK(&pdata->link_work, link_state);
+
+		if (ret)
+			goto err1;
 	}
-	if (ret)
-		goto err;
 
 	xgene_enet_napi_add(pdata);
 	ret = register_netdev(ndev);
 	if (ret) {
 		netdev_err(ndev, "Failed to register netdev\n");
-		goto err;
+		goto err2;
 	}
 
 	return 0;
 
-err_netdev:
-	unregister_netdev(ndev);
+err2:
+	/*
+	 * If necessary, free_netdev() will call netif_napi_del() and undo
+	 * the effects of xgene_enet_napi_add()'s calls to netif_napi_add().
+	 */
+
+	if (pdata->mdio_driver)
+		xgene_enet_phy_disconnect(pdata);
+	else if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
+		xgene_enet_mdio_remove(pdata);
+err1:
+	xgene_enet_delete_desc_rings(pdata);
 err:
 	free_netdev(ndev);
 	return ret;
@@ -1688,11 +1781,9 @@
 static int xgene_enet_remove(struct platform_device *pdev)
 {
 	struct xgene_enet_pdata *pdata;
-	const struct xgene_mac_ops *mac_ops;
 	struct net_device *ndev;
 
 	pdata = platform_get_drvdata(pdev);
-	mac_ops = pdata->mac_ops;
 	ndev = pdata->ndev;
 
 	rtnl_lock();
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index 217546e..0cda58f 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -47,7 +47,7 @@
 #define NUM_PKT_BUF	64
 #define NUM_BUFPOOL	32
 #define MAX_EXP_BUFFS	256
-#define XGENE_ENET_MSS	1448
+#define NUM_MSS_REG	4
 #define XGENE_MIN_ENET_FRAME_SIZE	60
 
 #define XGENE_MAX_ENET_IRQ	16
@@ -143,7 +143,7 @@
 	void (*rx_disable)(struct xgene_enet_pdata *pdata);
 	void (*set_speed)(struct xgene_enet_pdata *pdata);
 	void (*set_mac_addr)(struct xgene_enet_pdata *pdata);
-	void (*set_mss)(struct xgene_enet_pdata *pdata);
+	void (*set_mss)(struct xgene_enet_pdata *pdata, u16 mss, u8 index);
 	void (*link_state)(struct work_struct *work);
 };
 
@@ -174,7 +174,6 @@
 struct xgene_enet_pdata {
 	struct net_device *ndev;
 	struct mii_bus *mdio_bus;
-	struct phy_device *phy_dev;
 	int phy_speed;
 	struct clk *clk;
 	struct platform_device *pdev;
@@ -196,6 +195,7 @@
 	void __iomem *mcx_mac_addr;
 	void __iomem *mcx_mac_csr_addr;
 	void __iomem *base_addr;
+	void __iomem *pcs_addr;
 	void __iomem *ring_csr_addr;
 	void __iomem *ring_cmd_addr;
 	int phy_mode;
@@ -212,10 +212,13 @@
 	u8 eth_bufnum;
 	u8 bp_bufnum;
 	u16 ring_num;
-	u32 mss;
+	u32 mss[NUM_MSS_REG];
+	u32 mss_refcnt[NUM_MSS_REG];
+	spinlock_t mss_lock;  /* mss lock */
 	u8 tx_delay;
 	u8 rx_delay;
 	bool mdio_driver;
+	struct gpio_desc *sfp_rdy;
 };
 
 struct xgene_indirect_ctl {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
index 9c6ad0d..6475f38 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
@@ -18,6 +18,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/of_gpio.h>
+#include <linux/gpio.h>
 #include "xgene_enet_main.h"
 #include "xgene_enet_hw.h"
 #include "xgene_enet_xgmac.h"
@@ -84,6 +86,21 @@
 			   wr_addr);
 }
 
+static void xgene_enet_wr_pcs(struct xgene_enet_pdata *pdata,
+			      u32 wr_addr, u32 wr_data)
+{
+	void __iomem *addr, *wr, *cmd, *cmd_done;
+
+	addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET;
+	wr = pdata->pcs_addr + PCS_WRITE_REG_OFFSET;
+	cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET;
+	cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET;
+
+	if (!xgene_enet_wr_indirect(addr, wr, cmd, cmd_done, wr_addr, wr_data))
+		netdev_err(pdata->ndev, "PCS write failed, addr: %04x\n",
+			   wr_addr);
+}
+
 static void xgene_enet_rd_csr(struct xgene_enet_pdata *pdata,
 			      u32 offset, u32 *val)
 {
@@ -122,6 +139,7 @@
 
 	return true;
 }
+
 static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata,
 			      u32 rd_addr, u32 *rd_data)
 {
@@ -137,6 +155,25 @@
 			   rd_addr);
 }
 
+static bool xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata,
+			      u32 rd_addr, u32 *rd_data)
+{
+	void __iomem *addr, *rd, *cmd, *cmd_done;
+	bool success;
+
+	addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET;
+	rd = pdata->pcs_addr + PCS_READ_REG_OFFSET;
+	cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET;
+	cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET;
+
+	success = xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data);
+	if (!success)
+		netdev_err(pdata->ndev, "PCS read failed, addr: %04x\n",
+			   rd_addr);
+
+	return success;
+}
+
 static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata)
 {
 	struct net_device *ndev = pdata->ndev;
@@ -171,6 +208,17 @@
 	xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_0, 0);
 }
 
+static void xgene_pcs_reset(struct xgene_enet_pdata *pdata)
+{
+	u32 data;
+
+	if (!xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data))
+		return;
+
+	xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data | PCS_CTRL_PCS_RST);
+	xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data & ~PCS_CTRL_PCS_RST);
+}
+
 static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata)
 {
 	u32 addr0, addr1;
@@ -184,9 +232,22 @@
 	xgene_enet_wr_mac(pdata, HSTMACADR_MSW_ADDR, addr1);
 }
 
-static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata)
+static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata,
+				u16 mss, u8 index)
 {
-	xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR, pdata->mss);
+	u8 offset;
+	u32 data;
+
+	offset = (index < 2) ? 0 : 4;
+	xgene_enet_rd_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, &data);
+
+	if (!(index & 0x1))
+		data = SET_VAL(TSO_MSS1, data >> TSO_MSS1_POS) |
+			SET_VAL(TSO_MSS0, mss);
+	else
+		data = SET_VAL(TSO_MSS1, mss) | SET_VAL(TSO_MSS0, data);
+
+	xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, data);
 }
 
 static u32 xgene_enet_link_status(struct xgene_enet_pdata *pdata)
@@ -210,18 +271,17 @@
 	xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data);
 
 	xgene_xgmac_set_mac_addr(pdata);
-	xgene_xgmac_set_mss(pdata);
 
 	xgene_enet_rd_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, &data);
 	data |= CFG_RSIF_FPBUFF_TIMEOUT_EN;
 	xgene_enet_wr_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, data);
 
-	xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX);
-	xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0);
 	xgene_enet_rd_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, &data);
 	data |= BIT(12);
 	xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, data);
 	xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x82);
+	xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0);
+	xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX);
 }
 
 static void xgene_xgmac_rx_enable(struct xgene_enet_pdata *pdata)
@@ -359,14 +419,17 @@
 {
 	struct xgene_enet_pdata *pdata = container_of(to_delayed_work(work),
 					 struct xgene_enet_pdata, link_work);
+	struct gpio_desc *sfp_rdy = pdata->sfp_rdy;
 	struct net_device *ndev = pdata->ndev;
 	u32 link_status, poll_interval;
 
 	link_status = xgene_enet_link_status(pdata);
+	if (link_status && !IS_ERR(sfp_rdy) && !gpiod_get_value(sfp_rdy))
+		link_status = 0;
+
 	if (link_status) {
 		if (!netif_carrier_ok(ndev)) {
 			netif_carrier_on(ndev);
-			xgene_xgmac_init(pdata);
 			xgene_xgmac_rx_enable(pdata);
 			xgene_xgmac_tx_enable(pdata);
 			netdev_info(ndev, "Link is Up - 10Gbps\n");
@@ -380,6 +443,8 @@
 			netdev_info(ndev, "Link is Down\n");
 		}
 		poll_interval = PHY_POLL_LINK_OFF;
+
+		xgene_pcs_reset(pdata);
 	}
 
 	schedule_delayed_work(&pdata->link_work, poll_interval);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
index f1ea485..360ccbd 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
@@ -24,6 +24,7 @@
 #define X2_BLOCK_ETH_MAC_CSR_OFFSET	0x3000
 #define BLOCK_AXG_MAC_OFFSET		0x0800
 #define BLOCK_AXG_MAC_CSR_OFFSET	0x2000
+#define BLOCK_PCS_OFFSET		0x3800
 
 #define XGENET_CONFIG_REG_ADDR		0x20
 #define XGENET_SRST_ADDR		0x00
@@ -72,6 +73,9 @@
 #define XG_MCX_ICM_CONFIG0_REG_0_ADDR	0x00e0
 #define XG_MCX_ICM_CONFIG2_REG_0_ADDR	0x00e8
 
+#define PCS_CONTROL_1			0x0000
+#define PCS_CTRL_PCS_RST		BIT(15)
+
 extern const struct xgene_mac_ops xgene_xgmac_ops;
 extern const struct xgene_port_ops xgene_xgport_ops;
 
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index 4bff0f3..b0da969 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -771,8 +771,10 @@
 	priv->dev = dev;
 
 	priv->regs = devm_ioremap_resource(dev, &res_regs);
-	if (IS_ERR(priv->regs))
-		return PTR_ERR(priv->regs);
+	if (IS_ERR(priv->regs)) {
+		err = PTR_ERR(priv->regs);
+		goto out_put_node;
+	}
 
 	dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs);
 
diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c
index 058460b..a22403c 100644
--- a/drivers/net/ethernet/arc/emac_mdio.c
+++ b/drivers/net/ethernet/arc/emac_mdio.c
@@ -104,7 +104,7 @@
  * @bus: points to the mii_bus structure
  * Description: reset the MII bus
  */
-int arc_mdio_reset(struct mii_bus *bus)
+static int arc_mdio_reset(struct mii_bus *bus)
 {
 	struct arc_emac_priv *priv = bus->priv;
 	struct arc_emac_mdio_bus_data *data = &priv->bus_data;
diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 8fc93c5..6cac919 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -76,11 +76,19 @@
 	ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0),
 };
 
+#define ALX_FLAG_USING_MSIX	BIT(0)
+#define ALX_FLAG_USING_MSI	BIT(1)
+
 struct alx_priv {
 	struct net_device *dev;
 
 	struct alx_hw hw;
 
+	/* msi-x vectors */
+	int num_vec;
+	struct msix_entry *msix_entries;
+	char irq_lbl[IFNAMSIZ + 8];
+
 	/* all descriptor memory */
 	struct {
 		dma_addr_t dma;
@@ -105,7 +113,7 @@
 
 	u16 msg_enable;
 
-	bool msi;
+	int flags;
 
 	/* protects hw.stats */
 	spinlock_t stats_lock;
diff --git a/drivers/net/ethernet/atheros/alx/hw.c b/drivers/net/ethernet/atheros/alx/hw.c
index 1fe35e4..6ac40b0 100644
--- a/drivers/net/ethernet/atheros/alx/hw.c
+++ b/drivers/net/ethernet/atheros/alx/hw.c
@@ -1031,6 +1031,20 @@
 	alx_write_mem32(hw, ALX_WRR, val);
 }
 
+void alx_mask_msix(struct alx_hw *hw, int index, bool mask)
+{
+	u32 reg, val;
+
+	reg = ALX_MSIX_ENTRY_BASE + index * PCI_MSIX_ENTRY_SIZE +
+		PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+	val = mask ? PCI_MSIX_ENTRY_CTRL_MASKBIT : 0;
+
+	alx_write_mem32(hw, reg, val);
+	alx_post_write(hw);
+}
+
+
 bool alx_get_phy_info(struct alx_hw *hw)
 {
 	u16  devs1, devs2;
diff --git a/drivers/net/ethernet/atheros/alx/hw.h b/drivers/net/ethernet/atheros/alx/hw.h
index f289c05..0191477 100644
--- a/drivers/net/ethernet/atheros/alx/hw.h
+++ b/drivers/net/ethernet/atheros/alx/hw.h
@@ -562,6 +562,7 @@
 void alx_set_macaddr(struct alx_hw *hw, const u8 *addr);
 bool alx_phy_configured(struct alx_hw *hw);
 void alx_configure_basic(struct alx_hw *hw);
+void alx_mask_msix(struct alx_hw *hw, int index, bool mask);
 void alx_disable_rss(struct alx_hw *hw);
 bool alx_get_phy_info(struct alx_hw *hw);
 void alx_update_hw_stats(struct alx_hw *hw);
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 6453148..c0f84b7 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -51,6 +51,9 @@
 
 const char alx_drv_name[] = "alx";
 
+static bool msix = false;
+module_param(msix, bool, 0);
+MODULE_PARM_DESC(msix, "Enable msi-x interrupt support");
 
 static void alx_free_txbuf(struct alx_priv *alx, int entry)
 {
@@ -292,32 +295,29 @@
 	napi_complete(&alx->napi);
 
 	/* enable interrupt */
-	spin_lock_irqsave(&alx->irq_lock, flags);
-	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
-	alx_write_mem32(hw, ALX_IMR, alx->int_mask);
-	spin_unlock_irqrestore(&alx->irq_lock, flags);
+	if (alx->flags & ALX_FLAG_USING_MSIX) {
+		alx_mask_msix(hw, 1, false);
+	} else {
+		spin_lock_irqsave(&alx->irq_lock, flags);
+		alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
+		alx_write_mem32(hw, ALX_IMR, alx->int_mask);
+		spin_unlock_irqrestore(&alx->irq_lock, flags);
+	}
 
 	alx_post_write(hw);
 
 	return work;
 }
 
-static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
+static bool alx_intr_handle_misc(struct alx_priv *alx, u32 intr)
 {
 	struct alx_hw *hw = &alx->hw;
-	bool write_int_mask = false;
-
-	spin_lock(&alx->irq_lock);
-
-	/* ACK interrupt */
-	alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS);
-	intr &= alx->int_mask;
 
 	if (intr & ALX_ISR_FATAL) {
 		netif_warn(alx, hw, alx->dev,
 			   "fatal interrupt 0x%x, resetting\n", intr);
 		alx_schedule_reset(alx);
-		goto out;
+		return true;
 	}
 
 	if (intr & ALX_ISR_ALERT)
@@ -329,19 +329,32 @@
 		 * is cleared, the interrupt status could be cleared.
 		 */
 		alx->int_mask &= ~ALX_ISR_PHY;
-		write_int_mask = true;
+		alx_write_mem32(hw, ALX_IMR, alx->int_mask);
 		alx_schedule_link_check(alx);
 	}
 
+	return false;
+}
+
+static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
+{
+	struct alx_hw *hw = &alx->hw;
+
+	spin_lock(&alx->irq_lock);
+
+	/* ACK interrupt */
+	alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS);
+	intr &= alx->int_mask;
+
+	if (alx_intr_handle_misc(alx, intr))
+		goto out;
+
 	if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) {
 		napi_schedule(&alx->napi);
 		/* mask rx/tx interrupt, enable them when napi complete */
 		alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
-		write_int_mask = true;
-	}
-
-	if (write_int_mask)
 		alx_write_mem32(hw, ALX_IMR, alx->int_mask);
+	}
 
 	alx_write_mem32(hw, ALX_ISR, 0);
 
@@ -350,6 +363,46 @@
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t alx_intr_msix_ring(int irq, void *data)
+{
+	struct alx_priv *alx = data;
+	struct alx_hw *hw = &alx->hw;
+
+	/* mask interrupt to ACK chip */
+	alx_mask_msix(hw, 1, true);
+	/* clear interrupt status */
+	alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0));
+
+	napi_schedule(&alx->napi);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t alx_intr_msix_misc(int irq, void *data)
+{
+	struct alx_priv *alx = data;
+	struct alx_hw *hw = &alx->hw;
+	u32 intr;
+
+	/* mask interrupt to ACK chip */
+	alx_mask_msix(hw, 0, true);
+
+	/* read interrupt status */
+	intr = alx_read_mem32(hw, ALX_ISR);
+	intr &= (alx->int_mask & ~ALX_ISR_ALL_QUEUES);
+
+	if (alx_intr_handle_misc(alx, intr))
+		return IRQ_HANDLED;
+
+	/* clear interrupt status */
+	alx_write_mem32(hw, ALX_ISR, intr);
+
+	/* enable interrupt again */
+	alx_mask_msix(hw, 0, false);
+
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t alx_intr_msi(int irq, void *data)
 {
 	struct alx_priv *alx = data;
@@ -614,31 +667,136 @@
 static void alx_config_vector_mapping(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
+	u32 tbl = 0;
 
-	alx_write_mem32(hw, ALX_MSI_MAP_TBL1, 0);
+	if (alx->flags & ALX_FLAG_USING_MSIX) {
+		tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT;
+		tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
+	}
+
+	alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl);
 	alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0);
 	alx_write_mem32(hw, ALX_MSI_ID_MAP, 0);
 }
 
+static bool alx_enable_msix(struct alx_priv *alx)
+{
+	int i, err, num_vec = 2;
+
+	alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry),
+				    GFP_KERNEL);
+	if (!alx->msix_entries) {
+		netdev_warn(alx->dev, "Allocation of msix entries failed!\n");
+		return false;
+	}
+
+	for (i = 0; i < num_vec; i++)
+		alx->msix_entries[i].entry = i;
+
+	err = pci_enable_msix(alx->hw.pdev, alx->msix_entries, num_vec);
+	if (err) {
+		kfree(alx->msix_entries);
+		netdev_warn(alx->dev, "Enabling MSI-X interrupts failed!\n");
+		return false;
+	}
+
+	alx->num_vec = num_vec;
+	return true;
+}
+
+static int alx_request_msix(struct alx_priv *alx)
+{
+	struct net_device *netdev = alx->dev;
+	int i, err, vector = 0, free_vector = 0;
+
+	err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc,
+			  0, netdev->name, alx);
+	if (err)
+		goto out_err;
+
+	vector++;
+	sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name);
+
+	err = request_irq(alx->msix_entries[vector].vector,
+			  alx_intr_msix_ring, 0, alx->irq_lbl, alx);
+		if (err)
+			goto out_free;
+
+	return 0;
+
+out_free:
+	free_irq(alx->msix_entries[free_vector++].vector, alx);
+
+	vector--;
+	for (i = 0; i < vector; i++)
+		free_irq(alx->msix_entries[free_vector++].vector, alx);
+
+out_err:
+	return err;
+}
+
+static void alx_init_intr(struct alx_priv *alx, bool msix)
+{
+	if (msix) {
+		if (alx_enable_msix(alx))
+			alx->flags |= ALX_FLAG_USING_MSIX;
+	}
+
+	if (!(alx->flags & ALX_FLAG_USING_MSIX)) {
+		alx->num_vec = 1;
+
+		if (!pci_enable_msi(alx->hw.pdev))
+			alx->flags |= ALX_FLAG_USING_MSI;
+	}
+}
+
+static void alx_disable_advanced_intr(struct alx_priv *alx)
+{
+	if (alx->flags & ALX_FLAG_USING_MSIX) {
+		kfree(alx->msix_entries);
+		pci_disable_msix(alx->hw.pdev);
+		alx->flags &= ~ALX_FLAG_USING_MSIX;
+	}
+
+	if (alx->flags & ALX_FLAG_USING_MSI) {
+		pci_disable_msi(alx->hw.pdev);
+		alx->flags &= ~ALX_FLAG_USING_MSI;
+	}
+}
+
 static void alx_irq_enable(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
+	int i;
 
 	/* level-1 interrupt switch */
 	alx_write_mem32(hw, ALX_ISR, 0);
 	alx_write_mem32(hw, ALX_IMR, alx->int_mask);
 	alx_post_write(hw);
+
+	if (alx->flags & ALX_FLAG_USING_MSIX)
+		/* enable all msix irqs */
+		for (i = 0; i < alx->num_vec; i++)
+			alx_mask_msix(hw, i, false);
 }
 
 static void alx_irq_disable(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
+	int i;
 
 	alx_write_mem32(hw, ALX_ISR, ALX_ISR_DIS);
 	alx_write_mem32(hw, ALX_IMR, 0);
 	alx_post_write(hw);
 
-	synchronize_irq(alx->hw.pdev->irq);
+	if (alx->flags & ALX_FLAG_USING_MSIX) {
+		for (i = 0; i < alx->num_vec; i++) {
+			alx_mask_msix(hw, i, true);
+			synchronize_irq(alx->msix_entries[i].vector);
+		}
+	} else {
+		synchronize_irq(alx->hw.pdev->irq);
+	}
 }
 
 static int alx_request_irq(struct alx_priv *alx)
@@ -650,9 +808,18 @@
 
 	msi_ctrl = (hw->imt >> 1) << ALX_MSI_RETRANS_TM_SHIFT;
 
-	if (!pci_enable_msi(alx->hw.pdev)) {
-		alx->msi = true;
+	if (alx->flags & ALX_FLAG_USING_MSIX) {
+		alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl);
+		err = alx_request_msix(alx);
+		if (!err)
+			goto out;
 
+		/* msix request failed, realloc resources */
+		alx_disable_advanced_intr(alx);
+		alx_init_intr(alx, false);
+	}
+
+	if (alx->flags & ALX_FLAG_USING_MSI) {
 		alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER,
 				msi_ctrl | ALX_MSI_MASK_SEL_LINE);
 		err = request_irq(pdev->irq, alx_intr_msi, 0,
@@ -660,6 +827,7 @@
 		if (!err)
 			goto out;
 		/* fall back to legacy interrupt */
+		alx->flags &= ~ALX_FLAG_USING_MSI;
 		pci_disable_msi(alx->hw.pdev);
 	}
 
@@ -669,19 +837,25 @@
 out:
 	if (!err)
 		alx_config_vector_mapping(alx);
+	else
+		netdev_err(alx->dev, "IRQ registration failed!\n");
 	return err;
 }
 
 static void alx_free_irq(struct alx_priv *alx)
 {
 	struct pci_dev *pdev = alx->hw.pdev;
+	int i;
 
-	free_irq(pdev->irq, alx);
-
-	if (alx->msi) {
-		pci_disable_msi(alx->hw.pdev);
-		alx->msi = false;
+	if (alx->flags & ALX_FLAG_USING_MSIX) {
+		/* we have only 2 vectors without multi queue support */
+		for (i = 0; i < 2; i++)
+			free_irq(alx->msix_entries[i].vector, alx);
+	} else {
+		free_irq(pdev->irq, alx);
 	}
+
+	alx_disable_advanced_intr(alx);
 }
 
 static int alx_identify_hw(struct alx_priv *alx)
@@ -847,12 +1021,14 @@
 {
 	int err;
 
+	alx_init_intr(alx, msix);
+
 	if (!resume)
 		netif_carrier_off(alx->dev);
 
 	err = alx_alloc_rings(alx);
 	if (err)
-		return err;
+		goto out_disable_adv_intr;
 
 	alx_configure(alx);
 
@@ -873,6 +1049,8 @@
 
 out_free_rings:
 	alx_free_rings(alx);
+out_disable_adv_intr:
+	alx_disable_advanced_intr(alx);
 	return err;
 }
 
@@ -993,6 +1171,18 @@
 	rtnl_unlock();
 }
 
+static int alx_tpd_req(struct sk_buff *skb)
+{
+	int num;
+
+	num = skb_shinfo(skb)->nr_frags + 1;
+	/* we need one extra descriptor for LSOv2 */
+	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+		num++;
+
+	return num;
+}
+
 static int alx_tx_csum(struct sk_buff *skb, struct alx_txd *first)
 {
 	u8 cso, css;
@@ -1012,6 +1202,45 @@
 	return 0;
 }
 
+static int alx_tso(struct sk_buff *skb, struct alx_txd *first)
+{
+	int err;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	err = skb_cow_head(skb, 0);
+	if (err < 0)
+		return err;
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		struct iphdr *iph = ip_hdr(skb);
+
+		iph->check = 0;
+		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+							 0, IPPROTO_TCP, 0);
+		first->word1 |= 1 << TPD_IPV4_SHIFT;
+	} else if (skb_is_gso_v6(skb)) {
+		ipv6_hdr(skb)->payload_len = 0;
+		tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						       &ipv6_hdr(skb)->daddr,
+						       0, IPPROTO_TCP, 0);
+		/* LSOv2: the first TPD only provides the packet length */
+		first->adrl.l.pkt_len = skb->len;
+		first->word1 |= 1 << TPD_LSO_V2_SHIFT;
+	}
+
+	first->word1 |= 1 << TPD_LSO_EN_SHIFT;
+	first->word1 |= (skb_transport_offset(skb) &
+			 TPD_L4HDROFFSET_MASK) << TPD_L4HDROFFSET_SHIFT;
+	first->word1 |= (skb_shinfo(skb)->gso_size &
+			 TPD_MSS_MASK) << TPD_MSS_SHIFT;
+	return 1;
+}
+
 static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 {
 	struct alx_tx_queue *txq = &alx->txq;
@@ -1022,6 +1251,16 @@
 	first_tpd = &txq->tpd[txq->write_idx];
 	tpd = first_tpd;
 
+	if (tpd->word1 & (1 << TPD_LSO_V2_SHIFT)) {
+		if (++txq->write_idx == alx->tx_ringsz)
+			txq->write_idx = 0;
+
+		tpd = &txq->tpd[txq->write_idx];
+		tpd->len = first_tpd->len;
+		tpd->vlan_tag = first_tpd->vlan_tag;
+		tpd->word1 = first_tpd->word1;
+	}
+
 	maplen = skb_headlen(skb);
 	dma = dma_map_single(&alx->hw.pdev->dev, skb->data, maplen,
 			     DMA_TO_DEVICE);
@@ -1082,9 +1321,9 @@
 	struct alx_priv *alx = netdev_priv(netdev);
 	struct alx_tx_queue *txq = &alx->txq;
 	struct alx_txd *first;
-	int tpdreq = skb_shinfo(skb)->nr_frags + 1;
+	int tso;
 
-	if (alx_tpd_avail(alx) < tpdreq) {
+	if (alx_tpd_avail(alx) < alx_tpd_req(skb)) {
 		netif_stop_queue(alx->dev);
 		goto drop;
 	}
@@ -1092,7 +1331,10 @@
 	first = &txq->tpd[txq->write_idx];
 	memset(first, 0, sizeof(*first));
 
-	if (alx_tx_csum(skb, first))
+	tso = alx_tso(skb, first);
+	if (tso < 0)
+		goto drop;
+	else if (!tso && alx_tx_csum(skb, first))
 		goto drop;
 
 	if (alx_map_tx_skb(alx, skb) < 0)
@@ -1172,7 +1414,10 @@
 {
 	struct alx_priv *alx = netdev_priv(netdev);
 
-	if (alx->msi)
+	if (alx->flags & ALX_FLAG_USING_MSIX) {
+		alx_intr_msix_misc(0, alx);
+		alx_intr_msix_ring(0, alx);
+	} else if (alx->flags & ALX_FLAG_USING_MSI)
 		alx_intr_msi(0, alx);
 	else
 		alx_intr_legacy(0, alx);
@@ -1351,7 +1596,10 @@
 		}
 	}
 
-	netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM;
+	netdev->hw_features = NETIF_F_SG |
+			      NETIF_F_HW_CSUM |
+			      NETIF_F_TSO |
+			      NETIF_F_TSO6;
 
 	if (alx_get_perm_macaddr(hw, hw->perm_addr)) {
 		dev_warn(&pdev->dev,
@@ -1545,6 +1793,8 @@
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2400),
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
+	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_E2500),
+	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8162),
 	  .driver_data = ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG },
 	{ PCI_VDEVICE(ATTANSIC, ALX_DEV_ID_AR8171) },
diff --git a/drivers/net/ethernet/atheros/alx/reg.h b/drivers/net/ethernet/atheros/alx/reg.h
index 0959e682..1fc2d85 100644
--- a/drivers/net/ethernet/atheros/alx/reg.h
+++ b/drivers/net/ethernet/atheros/alx/reg.h
@@ -38,6 +38,7 @@
 #define ALX_DEV_ID_AR8161				0x1091
 #define ALX_DEV_ID_E2200				0xe091
 #define ALX_DEV_ID_E2400				0xe0a1
+#define ALX_DEV_ID_E2500				0xe0b1
 #define ALX_DEV_ID_AR8162				0x1090
 #define ALX_DEV_ID_AR8171				0x10A1
 #define ALX_DEV_ID_AR8172				0x10A0
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 74f0a37..17aa33c 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -1486,7 +1486,7 @@
 	b44_enable_ints(bp);
 
 	if (bp->flags & B44_FLAG_EXTERNAL_PHY)
-		phy_start(bp->phydev);
+		phy_start(dev->phydev);
 
 	netif_start_queue(dev);
 out:
@@ -1651,7 +1651,7 @@
 	netif_stop_queue(dev);
 
 	if (bp->flags & B44_FLAG_EXTERNAL_PHY)
-		phy_stop(bp->phydev);
+		phy_stop(dev->phydev);
 
 	napi_disable(&bp->napi);
 
@@ -1832,90 +1832,100 @@
 	return r;
 }
 
-static int b44_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int b44_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct b44 *bp = netdev_priv(dev);
+	u32 supported, advertising;
 
 	if (bp->flags & B44_FLAG_EXTERNAL_PHY) {
-		BUG_ON(!bp->phydev);
-		return phy_ethtool_gset(bp->phydev, cmd);
+		BUG_ON(!dev->phydev);
+		return phy_ethtool_ksettings_get(dev->phydev, cmd);
 	}
 
-	cmd->supported = (SUPPORTED_Autoneg);
-	cmd->supported |= (SUPPORTED_100baseT_Half |
-			  SUPPORTED_100baseT_Full |
-			  SUPPORTED_10baseT_Half |
-			  SUPPORTED_10baseT_Full |
-			  SUPPORTED_MII);
+	supported = (SUPPORTED_Autoneg);
+	supported |= (SUPPORTED_100baseT_Half |
+		      SUPPORTED_100baseT_Full |
+		      SUPPORTED_10baseT_Half |
+		      SUPPORTED_10baseT_Full |
+		      SUPPORTED_MII);
 
-	cmd->advertising = 0;
+	advertising = 0;
 	if (bp->flags & B44_FLAG_ADV_10HALF)
-		cmd->advertising |= ADVERTISED_10baseT_Half;
+		advertising |= ADVERTISED_10baseT_Half;
 	if (bp->flags & B44_FLAG_ADV_10FULL)
-		cmd->advertising |= ADVERTISED_10baseT_Full;
+		advertising |= ADVERTISED_10baseT_Full;
 	if (bp->flags & B44_FLAG_ADV_100HALF)
-		cmd->advertising |= ADVERTISED_100baseT_Half;
+		advertising |= ADVERTISED_100baseT_Half;
 	if (bp->flags & B44_FLAG_ADV_100FULL)
-		cmd->advertising |= ADVERTISED_100baseT_Full;
-	cmd->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-	ethtool_cmd_speed_set(cmd, ((bp->flags & B44_FLAG_100_BASE_T) ?
-				    SPEED_100 : SPEED_10));
-	cmd->duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ?
+		advertising |= ADVERTISED_100baseT_Full;
+	advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+	cmd->base.speed = (bp->flags & B44_FLAG_100_BASE_T) ?
+		SPEED_100 : SPEED_10;
+	cmd->base.duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ?
 		DUPLEX_FULL : DUPLEX_HALF;
-	cmd->port = 0;
-	cmd->phy_address = bp->phy_addr;
-	cmd->transceiver = (bp->flags & B44_FLAG_EXTERNAL_PHY) ?
-		XCVR_EXTERNAL : XCVR_INTERNAL;
-	cmd->autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ?
+	cmd->base.port = 0;
+	cmd->base.phy_address = bp->phy_addr;
+	cmd->base.autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ?
 		AUTONEG_DISABLE : AUTONEG_ENABLE;
-	if (cmd->autoneg == AUTONEG_ENABLE)
-		cmd->advertising |= ADVERTISED_Autoneg;
+	if (cmd->base.autoneg == AUTONEG_ENABLE)
+		advertising |= ADVERTISED_Autoneg;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
 	if (!netif_running(dev)){
-		ethtool_cmd_speed_set(cmd, 0);
-		cmd->duplex = 0xff;
+		cmd->base.speed = 0;
+		cmd->base.duplex = 0xff;
 	}
-	cmd->maxtxpkt = 0;
-	cmd->maxrxpkt = 0;
+
 	return 0;
 }
 
-static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int b44_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct b44 *bp = netdev_priv(dev);
 	u32 speed;
 	int ret;
+	u32 advertising;
 
 	if (bp->flags & B44_FLAG_EXTERNAL_PHY) {
-		BUG_ON(!bp->phydev);
+		BUG_ON(!dev->phydev);
 		spin_lock_irq(&bp->lock);
 		if (netif_running(dev))
 			b44_setup_phy(bp);
 
-		ret = phy_ethtool_sset(bp->phydev, cmd);
+		ret = phy_ethtool_ksettings_set(dev->phydev, cmd);
 
 		spin_unlock_irq(&bp->lock);
 
 		return ret;
 	}
 
-	speed = ethtool_cmd_speed(cmd);
+	speed = cmd->base.speed;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
 
 	/* We do not support gigabit. */
-	if (cmd->autoneg == AUTONEG_ENABLE) {
-		if (cmd->advertising &
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		if (advertising &
 		    (ADVERTISED_1000baseT_Half |
 		     ADVERTISED_1000baseT_Full))
 			return -EINVAL;
 	} else if ((speed != SPEED_100 &&
 		    speed != SPEED_10) ||
-		   (cmd->duplex != DUPLEX_HALF &&
-		    cmd->duplex != DUPLEX_FULL)) {
+		   (cmd->base.duplex != DUPLEX_HALF &&
+		    cmd->base.duplex != DUPLEX_FULL)) {
 			return -EINVAL;
 	}
 
 	spin_lock_irq(&bp->lock);
 
-	if (cmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		bp->flags &= ~(B44_FLAG_FORCE_LINK |
 			       B44_FLAG_100_BASE_T |
 			       B44_FLAG_FULL_DUPLEX |
@@ -1923,19 +1933,19 @@
 			       B44_FLAG_ADV_10FULL |
 			       B44_FLAG_ADV_100HALF |
 			       B44_FLAG_ADV_100FULL);
-		if (cmd->advertising == 0) {
+		if (advertising == 0) {
 			bp->flags |= (B44_FLAG_ADV_10HALF |
 				      B44_FLAG_ADV_10FULL |
 				      B44_FLAG_ADV_100HALF |
 				      B44_FLAG_ADV_100FULL);
 		} else {
-			if (cmd->advertising & ADVERTISED_10baseT_Half)
+			if (advertising & ADVERTISED_10baseT_Half)
 				bp->flags |= B44_FLAG_ADV_10HALF;
-			if (cmd->advertising & ADVERTISED_10baseT_Full)
+			if (advertising & ADVERTISED_10baseT_Full)
 				bp->flags |= B44_FLAG_ADV_10FULL;
-			if (cmd->advertising & ADVERTISED_100baseT_Half)
+			if (advertising & ADVERTISED_100baseT_Half)
 				bp->flags |= B44_FLAG_ADV_100HALF;
-			if (cmd->advertising & ADVERTISED_100baseT_Full)
+			if (advertising & ADVERTISED_100baseT_Full)
 				bp->flags |= B44_FLAG_ADV_100FULL;
 		}
 	} else {
@@ -1943,7 +1953,7 @@
 		bp->flags &= ~(B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX);
 		if (speed == SPEED_100)
 			bp->flags |= B44_FLAG_100_BASE_T;
-		if (cmd->duplex == DUPLEX_FULL)
+		if (cmd->base.duplex == DUPLEX_FULL)
 			bp->flags |= B44_FLAG_FULL_DUPLEX;
 	}
 
@@ -2110,8 +2120,6 @@
 
 static const struct ethtool_ops b44_ethtool_ops = {
 	.get_drvinfo		= b44_get_drvinfo,
-	.get_settings		= b44_get_settings,
-	.set_settings		= b44_set_settings,
 	.nway_reset		= b44_nway_reset,
 	.get_link		= ethtool_op_get_link,
 	.get_wol		= b44_get_wol,
@@ -2125,6 +2133,8 @@
 	.get_strings		= b44_get_strings,
 	.get_sset_count		= b44_get_sset_count,
 	.get_ethtool_stats	= b44_get_ethtool_stats,
+	.get_link_ksettings	= b44_get_link_ksettings,
+	.set_link_ksettings	= b44_set_link_ksettings,
 };
 
 static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -2137,8 +2147,8 @@
 
 	spin_lock_irq(&bp->lock);
 	if (bp->flags & B44_FLAG_EXTERNAL_PHY) {
-		BUG_ON(!bp->phydev);
-		err = phy_mii_ioctl(bp->phydev, ifr, cmd);
+		BUG_ON(!dev->phydev);
+		err = phy_mii_ioctl(dev->phydev, ifr, cmd);
 	} else {
 		err = generic_mii_ioctl(&bp->mii_if, if_mii(ifr), cmd, NULL);
 	}
@@ -2206,7 +2216,7 @@
 static void b44_adjust_link(struct net_device *dev)
 {
 	struct b44 *bp = netdev_priv(dev);
-	struct phy_device *phydev = bp->phydev;
+	struct phy_device *phydev = dev->phydev;
 	bool status_changed = 0;
 
 	BUG_ON(!phydev);
@@ -2303,7 +2313,6 @@
 			      SUPPORTED_MII);
 	phydev->advertising = phydev->supported;
 
-	bp->phydev = phydev;
 	bp->old_link = 0;
 	bp->phy_addr = phydev->mdio.addr;
 
@@ -2323,9 +2332,10 @@
 
 static void b44_unregister_phy_one(struct b44 *bp)
 {
+	struct net_device *dev = bp->dev;
 	struct mii_bus *mii_bus = bp->mii_bus;
 
-	phy_disconnect(bp->phydev);
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(mii_bus);
 	mdiobus_free(mii_bus);
 }
diff --git a/drivers/net/ethernet/broadcom/b44.h b/drivers/net/ethernet/broadcom/b44.h
index 65d88d7..89d2cf3 100644
--- a/drivers/net/ethernet/broadcom/b44.h
+++ b/drivers/net/ethernet/broadcom/b44.h
@@ -404,7 +404,6 @@
 	u32			tx_pending;
 	u8			phy_addr;
 	u8			force_copybreak;
-	struct phy_device	*phydev;
 	struct mii_bus		*mii_bus;
 	int			old_link;
 	struct mii_if_info	mii_if;
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 6c8bc5f..ae364c7 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -791,7 +791,7 @@
 	int status_changed;
 
 	priv = netdev_priv(dev);
-	phydev = priv->phydev;
+	phydev = dev->phydev;
 	status_changed = 0;
 
 	if (priv->old_link != phydev->link) {
@@ -913,7 +913,6 @@
 		priv->old_link = 0;
 		priv->old_duplex = -1;
 		priv->old_pause = -1;
-		priv->phydev = phydev;
 	}
 
 	/* mask all interrupts and request them */
@@ -1085,7 +1084,7 @@
 			 ENETDMAC_IRMASK, priv->tx_chan);
 
 	if (priv->has_phy)
-		phy_start(priv->phydev);
+		phy_start(phydev);
 	else
 		bcm_enet_adjust_link(dev);
 
@@ -1127,7 +1126,7 @@
 	free_irq(dev->irq, dev);
 
 out_phy_disconnect:
-	phy_disconnect(priv->phydev);
+	phy_disconnect(phydev);
 
 	return ret;
 }
@@ -1190,7 +1189,7 @@
 	netif_stop_queue(dev);
 	napi_disable(&priv->napi);
 	if (priv->has_phy)
-		phy_stop(priv->phydev);
+		phy_stop(dev->phydev);
 	del_timer_sync(&priv->rx_timeout);
 
 	/* mask all interrupts */
@@ -1234,10 +1233,8 @@
 	free_irq(dev->irq, dev);
 
 	/* release phy */
-	if (priv->has_phy) {
-		phy_disconnect(priv->phydev);
-		priv->phydev = NULL;
-	}
+	if (priv->has_phy)
+		phy_disconnect(dev->phydev);
 
 	return 0;
 }
@@ -1437,64 +1434,68 @@
 
 	priv = netdev_priv(dev);
 	if (priv->has_phy) {
-		if (!priv->phydev)
+		if (!dev->phydev)
 			return -ENODEV;
-		return genphy_restart_aneg(priv->phydev);
+		return genphy_restart_aneg(dev->phydev);
 	}
 
 	return -EOPNOTSUPP;
 }
 
-static int bcm_enet_get_settings(struct net_device *dev,
-				 struct ethtool_cmd *cmd)
+static int bcm_enet_get_link_ksettings(struct net_device *dev,
+				       struct ethtool_link_ksettings *cmd)
 {
 	struct bcm_enet_priv *priv;
+	u32 supported, advertising;
 
 	priv = netdev_priv(dev);
 
-	cmd->maxrxpkt = 0;
-	cmd->maxtxpkt = 0;
-
 	if (priv->has_phy) {
-		if (!priv->phydev)
+		if (!dev->phydev)
 			return -ENODEV;
-		return phy_ethtool_gset(priv->phydev, cmd);
+		return phy_ethtool_ksettings_get(dev->phydev, cmd);
 	} else {
-		cmd->autoneg = 0;
-		ethtool_cmd_speed_set(cmd, ((priv->force_speed_100)
-					    ? SPEED_100 : SPEED_10));
-		cmd->duplex = (priv->force_duplex_full) ?
+		cmd->base.autoneg = 0;
+		cmd->base.speed = (priv->force_speed_100) ?
+			SPEED_100 : SPEED_10;
+		cmd->base.duplex = (priv->force_duplex_full) ?
 			DUPLEX_FULL : DUPLEX_HALF;
-		cmd->supported = ADVERTISED_10baseT_Half  |
+		supported = ADVERTISED_10baseT_Half |
 			ADVERTISED_10baseT_Full |
 			ADVERTISED_100baseT_Half |
 			ADVERTISED_100baseT_Full;
-		cmd->advertising = 0;
-		cmd->port = PORT_MII;
-		cmd->transceiver = XCVR_EXTERNAL;
+		advertising = 0;
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.supported, supported);
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.advertising, advertising);
+		cmd->base.port = PORT_MII;
 	}
 	return 0;
 }
 
-static int bcm_enet_set_settings(struct net_device *dev,
-				 struct ethtool_cmd *cmd)
+static int bcm_enet_set_link_ksettings(struct net_device *dev,
+				       const struct ethtool_link_ksettings *cmd)
 {
 	struct bcm_enet_priv *priv;
 
 	priv = netdev_priv(dev);
 	if (priv->has_phy) {
-		if (!priv->phydev)
+		if (!dev->phydev)
 			return -ENODEV;
-		return phy_ethtool_sset(priv->phydev, cmd);
+		return phy_ethtool_ksettings_set(dev->phydev, cmd);
 	} else {
 
-		if (cmd->autoneg ||
-		    (cmd->speed != SPEED_100 && cmd->speed != SPEED_10) ||
-		    cmd->port != PORT_MII)
+		if (cmd->base.autoneg ||
+		    (cmd->base.speed != SPEED_100 &&
+		     cmd->base.speed != SPEED_10) ||
+		    cmd->base.port != PORT_MII)
 			return -EINVAL;
 
-		priv->force_speed_100 = (cmd->speed == SPEED_100) ? 1 : 0;
-		priv->force_duplex_full = (cmd->duplex == DUPLEX_FULL) ? 1 : 0;
+		priv->force_speed_100 =
+			(cmd->base.speed == SPEED_100) ? 1 : 0;
+		priv->force_duplex_full =
+			(cmd->base.duplex == DUPLEX_FULL) ? 1 : 0;
 
 		if (netif_running(dev))
 			bcm_enet_adjust_link(dev);
@@ -1588,14 +1589,14 @@
 	.get_sset_count		= bcm_enet_get_sset_count,
 	.get_ethtool_stats      = bcm_enet_get_ethtool_stats,
 	.nway_reset		= bcm_enet_nway_reset,
-	.get_settings		= bcm_enet_get_settings,
-	.set_settings		= bcm_enet_set_settings,
 	.get_drvinfo		= bcm_enet_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
 	.get_ringparam		= bcm_enet_get_ringparam,
 	.set_ringparam		= bcm_enet_set_ringparam,
 	.get_pauseparam		= bcm_enet_get_pauseparam,
 	.set_pauseparam		= bcm_enet_set_pauseparam,
+	.get_link_ksettings	= bcm_enet_get_link_ksettings,
+	.set_link_ksettings	= bcm_enet_set_link_ksettings,
 };
 
 static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -1604,9 +1605,9 @@
 
 	priv = netdev_priv(dev);
 	if (priv->has_phy) {
-		if (!priv->phydev)
+		if (!dev->phydev)
 			return -ENODEV;
-		return phy_mii_ioctl(priv->phydev, rq, cmd);
+		return phy_mii_ioctl(dev->phydev, rq, cmd);
 	} else {
 		struct mii_if_info mii;
 
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h
index f55af43..0a1b7b2 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h
@@ -290,7 +290,6 @@
 
 	/* used when a phy is connected (phylib used) */
 	struct mii_bus *mii_bus;
-	struct phy_device *phydev;
 	int old_link;
 	int old_duplex;
 	int old_pause;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index b2d3086..c3354b9 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -58,8 +58,8 @@
 static inline void intrl2_##which##_mask_clear(struct bcm_sysport_priv *priv, \
 						u32 mask)		\
 {									\
-	intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);	\
 	priv->irq##which##_mask &= ~(mask);				\
+	intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR);	\
 }									\
 static inline void intrl2_##which##_mask_set(struct bcm_sysport_priv *priv, \
 						u32 mask)		\
@@ -1692,7 +1692,7 @@
 	return 0;
 }
 
-static struct ethtool_ops bcm_sysport_ethtool_ops = {
+static const struct ethtool_ops bcm_sysport_ethtool_ops = {
 	.get_drvinfo		= bcm_sysport_get_drvinfo,
 	.get_msglevel		= bcm_sysport_get_msglvl,
 	.set_msglevel		= bcm_sysport_set_msglvl,
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 9a9745c4..c16ec3a 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -92,6 +92,7 @@
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */
 static int bgmac_probe(struct bcma_device *core)
 {
+	struct bcma_chipinfo *ci = &core->bus->chipinfo;
 	struct ssb_sprom *sprom = &core->bus->sprom;
 	struct mii_bus *mii_bus;
 	struct bgmac *bgmac;
@@ -157,9 +158,10 @@
 	dev_info(bgmac->dev, "Found PHY addr: %d%s\n", bgmac->phyaddr,
 		 bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : "");
 
-	if (!bgmac_is_bcm4707_family(core)) {
+	if (!bgmac_is_bcm4707_family(core) &&
+	    !(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) {
 		mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr);
-		if (!IS_ERR(mii_bus)) {
+		if (IS_ERR(mii_bus)) {
 			err = PTR_ERR(mii_bus);
 			goto err;
 		}
@@ -230,6 +232,21 @@
 		bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
 		bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
 		break;
+	case BCMA_CHIP_ID_BCM53573:
+		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
+		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
+		if (ci->pkg == BCMA_PKG_ID_BCM47189)
+			bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED;
+		if (core->core_unit == 0) {
+			bgmac->feature_flags |= BGMAC_FEAT_CC4_IF_SW_TYPE;
+			if (ci->pkg == BCMA_PKG_ID_BCM47189)
+				bgmac->feature_flags |=
+					BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII;
+		} else if (core->core_unit == 1) {
+			bgmac->feature_flags |= BGMAC_FEAT_IRQ_ID_OOB_6;
+			bgmac->feature_flags |= BGMAC_FEAT_CC7_IF_TYPE_RGMII;
+		}
+		break;
 	default:
 		bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
 		bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK;
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index c4751ec..6ea0e5f 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -932,7 +932,8 @@
 			et_swtype <<= 4;
 			sw_type = et_swtype;
 		} else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_EPHYRMII) {
-			sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
+			sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RMII |
+				  BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII;
 		} else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_RGMII) {
 			sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RGMII |
 				  BGMAC_CHIPCTL_1_SW_TYPE_RGMII;
@@ -940,6 +941,27 @@
 		bgmac_cco_ctl_maskset(bgmac, 1, ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK |
 						  BGMAC_CHIPCTL_1_SW_TYPE_MASK),
 				      sw_type);
+	} else if (bgmac->feature_flags & BGMAC_FEAT_CC4_IF_SW_TYPE) {
+		u32 sw_type = BGMAC_CHIPCTL_4_IF_TYPE_MII |
+			      BGMAC_CHIPCTL_4_SW_TYPE_EPHY;
+		u8 et_swtype = 0;
+		char buf[4];
+
+		if (bcm47xx_nvram_getenv("et_swtype", buf, sizeof(buf)) > 0) {
+			if (kstrtou8(buf, 0, &et_swtype))
+				dev_err(bgmac->dev, "Failed to parse et_swtype (%s)\n",
+					buf);
+			sw_type = (et_swtype & 0x0f) << 12;
+		} else if (bgmac->feature_flags & BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII) {
+			sw_type = BGMAC_CHIPCTL_4_IF_TYPE_RGMII |
+				  BGMAC_CHIPCTL_4_SW_TYPE_RGMII;
+		}
+		bgmac_cco_ctl_maskset(bgmac, 4, ~(BGMAC_CHIPCTL_4_IF_TYPE_MASK |
+						  BGMAC_CHIPCTL_4_SW_TYPE_MASK),
+				      sw_type);
+	} else if (bgmac->feature_flags & BGMAC_FEAT_CC7_IF_TYPE_RGMII) {
+		bgmac_cco_ctl_maskset(bgmac, 7, ~BGMAC_CHIPCTL_7_IF_TYPE_MASK,
+				      BGMAC_CHIPCTL_7_IF_TYPE_RGMII);
 	}
 
 	if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
@@ -1467,6 +1489,10 @@
 	 */
 	bgmac_clk_enable(bgmac, 0);
 
+	/* This seems to be fixing IRQ by assigning OOB #6 to the core */
+	if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6)
+		bgmac_idm_write(bgmac, BCMA_OOB_SEL_OUT_A30, 0x86);
+
 	bgmac_chip_reset(bgmac);
 
 	err = bgmac_dma_alloc(bgmac);
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 24a2502..80836b4 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -369,6 +369,21 @@
 #define BGMAC_CHIPCTL_1_SW_TYPE_RGMII		0x000000C0
 #define BGMAC_CHIPCTL_1_RXC_DLL_BYPASS		0x00010000
 
+#define BGMAC_CHIPCTL_4_IF_TYPE_MASK		0x00003000
+#define BGMAC_CHIPCTL_4_IF_TYPE_RMII		0x00000000
+#define BGMAC_CHIPCTL_4_IF_TYPE_MII		0x00001000
+#define BGMAC_CHIPCTL_4_IF_TYPE_RGMII		0x00002000
+#define BGMAC_CHIPCTL_4_SW_TYPE_MASK		0x0000C000
+#define BGMAC_CHIPCTL_4_SW_TYPE_EPHY		0x00000000
+#define BGMAC_CHIPCTL_4_SW_TYPE_EPHYMII		0x00004000
+#define BGMAC_CHIPCTL_4_SW_TYPE_EPHYRMII	0x00008000
+#define BGMAC_CHIPCTL_4_SW_TYPE_RGMII		0x0000C000
+
+#define BGMAC_CHIPCTL_7_IF_TYPE_MASK		0x000000C0
+#define BGMAC_CHIPCTL_7_IF_TYPE_RMII		0x00000000
+#define BGMAC_CHIPCTL_7_IF_TYPE_MII		0x00000040
+#define BGMAC_CHIPCTL_7_IF_TYPE_RGMII		0x00000080
+
 #define BGMAC_WEIGHT	64
 
 #define ETHER_MAX_LEN   1518
@@ -390,6 +405,10 @@
 #define BGMAC_FEAT_NO_CLR_MIB		BIT(13)
 #define BGMAC_FEAT_FORCE_SPEED_2500	BIT(14)
 #define BGMAC_FEAT_CMDCFG_SR_REV4	BIT(15)
+#define BGMAC_FEAT_IRQ_ID_OOB_6		BIT(16)
+#define BGMAC_FEAT_CC4_IF_SW_TYPE	BIT(17)
+#define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII	BIT(18)
+#define BGMAC_FEAT_CC7_IF_TYPE_RGMII	BIT(19)
 
 struct bgmac_slot_info {
 	union {
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 8fc3f3c..27f11a5 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -50,7 +50,7 @@
 #include <linux/log2.h>
 #include <linux/aer.h>
 
-#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+#if IS_ENABLED(CONFIG_CNIC)
 #define BCM_CNIC 1
 #include "cnic_if.h"
 #endif
@@ -6356,10 +6356,6 @@
 	struct bnx2 *bp = netdev_priv(dev);
 	int rc;
 
-	rc = bnx2_request_firmware(bp);
-	if (rc < 0)
-		goto out;
-
 	netif_carrier_off(dev);
 
 	bnx2_disable_int(bp);
@@ -6428,7 +6424,6 @@
 	bnx2_free_irq(bp);
 	bnx2_free_mem(bp);
 	bnx2_del_napi(bp);
-	bnx2_release_firmware(bp);
 	goto out;
 }
 
@@ -8575,6 +8570,12 @@
 
 	pci_set_drvdata(pdev, dev);
 
+	rc = bnx2_request_firmware(bp);
+	if (rc < 0)
+		goto error;
+
+
+	bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
 	memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
 
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
@@ -8607,6 +8608,7 @@
 	return 0;
 
 error:
+	bnx2_release_firmware(bp);
 	pci_iounmap(pdev, bp->regview);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 0e68fad..243cb97 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -492,7 +492,8 @@
 int bnx2x_get_vf_config(struct net_device *dev, int vf,
 			struct ifla_vf_info *ivi);
 int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac);
-int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos);
+int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+		      __be16 vlan_proto);
 
 /* select_queue callback */
 u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 97e8925..20fe6a8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -772,6 +772,11 @@
 		(bp->common.bc_ver & 0xff00) >> 8,
 		(bp->common.bc_ver & 0xff));
 
+	if (pci_channel_offline(bp->pdev)) {
+		BNX2X_ERR("Cannot dump MCP info while in PCI error\n");
+		return;
+	}
+
 	val = REG_RD(bp, MCP_REG_MCPR_CPU_PROGRAM_COUNTER);
 	if (val == REG_RD(bp, MCP_REG_MCPR_CPU_PROGRAM_COUNTER))
 		BNX2X_ERR("%s" "MCP PC at 0x%x\n", lvl, val);
@@ -9415,10 +9420,16 @@
 	/* Release IRQs */
 	bnx2x_free_irq(bp);
 
-	/* Reset the chip */
-	rc = bnx2x_reset_hw(bp, reset_code);
-	if (rc)
-		BNX2X_ERR("HW_RESET failed\n");
+	/* Reset the chip, unless PCI function is offline. If we reach this
+	 * point following a PCI error handling, it means device is really
+	 * in a bad state and we're about to remove it, so reset the chip
+	 * is not a good idea.
+	 */
+	if (!pci_channel_offline(bp->pdev)) {
+		rc = bnx2x_reset_hw(bp, reset_code);
+		if (rc)
+			BNX2X_ERR("HW_RESET failed\n");
+	}
 
 	/* Report UNLOAD_DONE to MCP */
 	bnx2x_send_unload_done(bp, keep_link);
@@ -12552,39 +12563,62 @@
 	return 0;
 }
 
-static int bnx2x_init_mcast_macs_list(struct bnx2x *bp,
-				      struct bnx2x_mcast_ramrod_params *p)
+struct bnx2x_mcast_list_elem_group
 {
-	int mc_count = netdev_mc_count(bp->dev);
-	struct bnx2x_mcast_list_elem *mc_mac =
-		kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC);
-	struct netdev_hw_addr *ha;
+	struct list_head mcast_group_link;
+	struct bnx2x_mcast_list_elem mcast_elems[];
+};
 
-	if (!mc_mac)
-		return -ENOMEM;
+#define MCAST_ELEMS_PER_PG \
+	((PAGE_SIZE - sizeof(struct bnx2x_mcast_list_elem_group)) / \
+	sizeof(struct bnx2x_mcast_list_elem))
 
-	INIT_LIST_HEAD(&p->mcast_list);
+static void bnx2x_free_mcast_macs_list(struct list_head *mcast_group_list)
+{
+	struct bnx2x_mcast_list_elem_group *current_mcast_group;
 
-	netdev_for_each_mc_addr(ha, bp->dev) {
-		mc_mac->mac = bnx2x_mc_addr(ha);
-		list_add_tail(&mc_mac->link, &p->mcast_list);
-		mc_mac++;
+	while (!list_empty(mcast_group_list)) {
+		current_mcast_group = list_first_entry(mcast_group_list,
+				      struct bnx2x_mcast_list_elem_group,
+				      mcast_group_link);
+		list_del(&current_mcast_group->mcast_group_link);
+		free_page((unsigned long)current_mcast_group);
 	}
-
-	p->mcast_list_len = mc_count;
-
-	return 0;
 }
 
-static void bnx2x_free_mcast_macs_list(
-	struct bnx2x_mcast_ramrod_params *p)
+static int bnx2x_init_mcast_macs_list(struct bnx2x *bp,
+				      struct bnx2x_mcast_ramrod_params *p,
+				      struct list_head *mcast_group_list)
 {
-	struct bnx2x_mcast_list_elem *mc_mac =
-		list_first_entry(&p->mcast_list, struct bnx2x_mcast_list_elem,
-				 link);
+	struct bnx2x_mcast_list_elem *mc_mac;
+	struct netdev_hw_addr *ha;
+	struct bnx2x_mcast_list_elem_group *current_mcast_group = NULL;
+	int mc_count = netdev_mc_count(bp->dev);
+	int offset = 0;
 
-	WARN_ON(!mc_mac);
-	kfree(mc_mac);
+	INIT_LIST_HEAD(&p->mcast_list);
+	netdev_for_each_mc_addr(ha, bp->dev) {
+		if (!offset) {
+			current_mcast_group =
+				(struct bnx2x_mcast_list_elem_group *)
+				__get_free_page(GFP_ATOMIC);
+			if (!current_mcast_group) {
+				bnx2x_free_mcast_macs_list(mcast_group_list);
+				BNX2X_ERR("Failed to allocate mc MAC list\n");
+				return -ENOMEM;
+			}
+			list_add(&current_mcast_group->mcast_group_link,
+				 mcast_group_list);
+		}
+		mc_mac = &current_mcast_group->mcast_elems[offset];
+		mc_mac->mac = bnx2x_mc_addr(ha);
+		list_add_tail(&mc_mac->link, &p->mcast_list);
+		offset++;
+		if (offset == MCAST_ELEMS_PER_PG)
+			offset = 0;
+	}
+	p->mcast_list_len = mc_count;
+	return 0;
 }
 
 /**
@@ -12632,8 +12666,9 @@
 				 BNX2X_UC_LIST_MAC, &ramrod_flags);
 }
 
-static int bnx2x_set_mc_list(struct bnx2x *bp)
+static int bnx2x_set_mc_list_e1x(struct bnx2x *bp)
 {
+	LIST_HEAD(mcast_group_list);
 	struct net_device *dev = bp->dev;
 	struct bnx2x_mcast_ramrod_params rparam = {NULL};
 	int rc = 0;
@@ -12649,12 +12684,9 @@
 
 	/* then, configure a new MACs list */
 	if (netdev_mc_count(dev)) {
-		rc = bnx2x_init_mcast_macs_list(bp, &rparam);
-		if (rc) {
-			BNX2X_ERR("Failed to create multicast MACs list: %d\n",
-				  rc);
+		rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list);
+		if (rc)
 			return rc;
-		}
 
 		/* Now add the new MACs */
 		rc = bnx2x_config_mcast(bp, &rparam,
@@ -12663,7 +12695,44 @@
 			BNX2X_ERR("Failed to set a new multicast configuration: %d\n",
 				  rc);
 
-		bnx2x_free_mcast_macs_list(&rparam);
+		bnx2x_free_mcast_macs_list(&mcast_group_list);
+	}
+
+	return rc;
+}
+
+static int bnx2x_set_mc_list(struct bnx2x *bp)
+{
+	LIST_HEAD(mcast_group_list);
+	struct bnx2x_mcast_ramrod_params rparam = {NULL};
+	struct net_device *dev = bp->dev;
+	int rc = 0;
+
+	/* On older adapters, we need to flush and re-add filters */
+	if (CHIP_IS_E1x(bp))
+		return bnx2x_set_mc_list_e1x(bp);
+
+	rparam.mcast_obj = &bp->mcast_obj;
+
+	if (netdev_mc_count(dev)) {
+		rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list);
+		if (rc)
+			return rc;
+
+		/* Override the curently configured set of mc filters */
+		rc = bnx2x_config_mcast(bp, &rparam,
+					BNX2X_MCAST_CMD_SET);
+		if (rc < 0)
+			BNX2X_ERR("Failed to set a new multicast configuration: %d\n",
+				  rc);
+
+		bnx2x_free_mcast_macs_list(&mcast_group_list);
+	} else {
+		/* If no mc addresses are required, flush the configuration */
+		rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
+		if (rc)
+			BNX2X_ERR("Failed to clear multicast configuration %d\n",
+				  rc);
 	}
 
 	return rc;
@@ -13203,13 +13272,22 @@
 		NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO |
 		NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX;
 	if (!chip_is_e1x) {
-		dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL |
-				    NETIF_F_GSO_IPXIP4;
+		dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM |
+				    NETIF_F_GSO_IPXIP4 |
+				    NETIF_F_GSO_UDP_TUNNEL |
+				    NETIF_F_GSO_UDP_TUNNEL_CSUM |
+				    NETIF_F_GSO_PARTIAL;
+
 		dev->hw_enc_features =
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
 			NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 |
 			NETIF_F_GSO_IPXIP4 |
-			NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL;
+			NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM |
+			NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM |
+			NETIF_F_GSO_PARTIAL;
+
+		dev->gso_partial_features = NETIF_F_GSO_GRE_CSUM |
+					    NETIF_F_GSO_UDP_TUNNEL_CSUM;
 	}
 
 	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index ff702a7..cea6bdc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -2600,8 +2600,29 @@
 	u8 pad[2]; /* For a natural alignment of the following buffer */
 };
 
+struct bnx2x_mcast_bin_elem {
+	struct list_head link;
+	int bin;
+	int type; /* BNX2X_MCAST_CMD_SET_{ADD, DEL} */
+};
+
+union bnx2x_mcast_elem {
+	struct bnx2x_mcast_bin_elem bin_elem;
+	struct bnx2x_mcast_mac_elem mac_elem;
+};
+
+struct bnx2x_mcast_elem_group {
+	struct list_head mcast_group_link;
+	union bnx2x_mcast_elem mcast_elems[];
+};
+
+#define MCAST_MAC_ELEMS_PER_PG \
+	((PAGE_SIZE - sizeof(struct bnx2x_mcast_elem_group)) / \
+	sizeof(union bnx2x_mcast_elem))
+
 struct bnx2x_pending_mcast_cmd {
 	struct list_head link;
+	struct list_head group_head;
 	int type; /* BNX2X_MCAST_CMD_X */
 	union {
 		struct list_head macs_head;
@@ -2609,6 +2630,11 @@
 		int next_bin; /* Needed for RESTORE flow with aprox match */
 	} data;
 
+	bool set_convert; /* in case type == BNX2X_MCAST_CMD_SET, this is set
+			   * when macs_head had been converted to a list of
+			   * bnx2x_mcast_bin_elem.
+			   */
+
 	bool done; /* set to true, when the command has been handled,
 		    * practically used in 57712 handling only, where one pending
 		    * command may be handled in a few operations. As long as for
@@ -2627,53 +2653,93 @@
 	return 0;
 }
 
+static void bnx2x_free_groups(struct list_head *mcast_group_list)
+{
+	struct bnx2x_mcast_elem_group *current_mcast_group;
+
+	while (!list_empty(mcast_group_list)) {
+		current_mcast_group = list_first_entry(mcast_group_list,
+				      struct bnx2x_mcast_elem_group,
+				      mcast_group_link);
+		list_del(&current_mcast_group->mcast_group_link);
+		free_page((unsigned long)current_mcast_group);
+	}
+}
+
 static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp,
 				   struct bnx2x_mcast_obj *o,
 				   struct bnx2x_mcast_ramrod_params *p,
 				   enum bnx2x_mcast_cmd cmd)
 {
-	int total_sz;
 	struct bnx2x_pending_mcast_cmd *new_cmd;
-	struct bnx2x_mcast_mac_elem *cur_mac = NULL;
 	struct bnx2x_mcast_list_elem *pos;
-	int macs_list_len = ((cmd == BNX2X_MCAST_CMD_ADD) ?
-			     p->mcast_list_len : 0);
+	struct bnx2x_mcast_elem_group *elem_group;
+	struct bnx2x_mcast_mac_elem *mac_elem;
+	int total_elems = 0, macs_list_len = 0, offset = 0;
+
+	/* When adding MACs we'll need to store their values */
+	if (cmd == BNX2X_MCAST_CMD_ADD || cmd == BNX2X_MCAST_CMD_SET)
+		macs_list_len = p->mcast_list_len;
 
 	/* If the command is empty ("handle pending commands only"), break */
 	if (!p->mcast_list_len)
 		return 0;
 
-	total_sz = sizeof(*new_cmd) +
-		macs_list_len * sizeof(struct bnx2x_mcast_mac_elem);
-
 	/* Add mcast is called under spin_lock, thus calling with GFP_ATOMIC */
-	new_cmd = kzalloc(total_sz, GFP_ATOMIC);
-
+	new_cmd = kzalloc(sizeof(*new_cmd), GFP_ATOMIC);
 	if (!new_cmd)
 		return -ENOMEM;
 
+	INIT_LIST_HEAD(&new_cmd->data.macs_head);
+	INIT_LIST_HEAD(&new_cmd->group_head);
+	new_cmd->type = cmd;
+	new_cmd->done = false;
+
 	DP(BNX2X_MSG_SP, "About to enqueue a new %d command. macs_list_len=%d\n",
 	   cmd, macs_list_len);
 
-	INIT_LIST_HEAD(&new_cmd->data.macs_head);
-
-	new_cmd->type = cmd;
-	new_cmd->done = false;
-
 	switch (cmd) {
 	case BNX2X_MCAST_CMD_ADD:
-		cur_mac = (struct bnx2x_mcast_mac_elem *)
-			  ((u8 *)new_cmd + sizeof(*new_cmd));
-
-		/* Push the MACs of the current command into the pending command
-		 * MACs list: FIFO
+	case BNX2X_MCAST_CMD_SET:
+		/* For a set command, we need to allocate sufficient memory for
+		 * all the bins, since we can't analyze at this point how much
+		 * memory would be required.
 		 */
-		list_for_each_entry(pos, &p->mcast_list, link) {
-			memcpy(cur_mac->mac, pos->mac, ETH_ALEN);
-			list_add_tail(&cur_mac->link, &new_cmd->data.macs_head);
-			cur_mac++;
+		total_elems = macs_list_len;
+		if (cmd == BNX2X_MCAST_CMD_SET) {
+			if (total_elems < BNX2X_MCAST_BINS_NUM)
+				total_elems = BNX2X_MCAST_BINS_NUM;
 		}
-
+		while (total_elems > 0) {
+			elem_group = (struct bnx2x_mcast_elem_group *)
+				     __get_free_page(GFP_ATOMIC | __GFP_ZERO);
+			if (!elem_group) {
+				bnx2x_free_groups(&new_cmd->group_head);
+				kfree(new_cmd);
+				return -ENOMEM;
+			}
+			total_elems -= MCAST_MAC_ELEMS_PER_PG;
+			list_add_tail(&elem_group->mcast_group_link,
+				      &new_cmd->group_head);
+		}
+		elem_group = list_first_entry(&new_cmd->group_head,
+					      struct bnx2x_mcast_elem_group,
+					      mcast_group_link);
+		list_for_each_entry(pos, &p->mcast_list, link) {
+			mac_elem = &elem_group->mcast_elems[offset].mac_elem;
+			memcpy(mac_elem->mac, pos->mac, ETH_ALEN);
+			/* Push the MACs of the current command into the pending
+			 * command MACs list: FIFO
+			 */
+			list_add_tail(&mac_elem->link,
+				      &new_cmd->data.macs_head);
+			offset++;
+			if (offset == MCAST_MAC_ELEMS_PER_PG) {
+				offset = 0;
+				elem_group = list_next_entry(elem_group,
+							     mcast_group_link);
+			}
+		}
 		break;
 
 	case BNX2X_MCAST_CMD_DEL:
@@ -2771,7 +2837,8 @@
 	u8 rx_tx_add_flag = bnx2x_mcast_get_rx_tx_flag(o);
 	int bin;
 
-	if ((cmd == BNX2X_MCAST_CMD_ADD) || (cmd == BNX2X_MCAST_CMD_RESTORE))
+	if ((cmd == BNX2X_MCAST_CMD_ADD) || (cmd == BNX2X_MCAST_CMD_RESTORE) ||
+	    (cmd == BNX2X_MCAST_CMD_SET_ADD))
 		rx_tx_add_flag |= ETH_MULTICAST_RULES_CMD_IS_ADD;
 
 	data->rules[idx].cmd_general_data |= rx_tx_add_flag;
@@ -2797,6 +2864,16 @@
 		bin = cfg_data->bin;
 		break;
 
+	case BNX2X_MCAST_CMD_SET_ADD:
+		bin = cfg_data->bin;
+		BIT_VEC64_SET_BIT(o->registry.aprox_match.vec, bin);
+		break;
+
+	case BNX2X_MCAST_CMD_SET_DEL:
+		bin = cfg_data->bin;
+		BIT_VEC64_CLEAR_BIT(o->registry.aprox_match.vec, bin);
+		break;
+
 	default:
 		BNX2X_ERR("Unknown command: %d\n", cmd);
 		return;
@@ -2932,6 +3009,110 @@
 		cmd_pos->data.next_bin++;
 }
 
+static void
+bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp,
+				       struct bnx2x_mcast_obj *o,
+				       struct bnx2x_pending_mcast_cmd *cmd_pos)
+{
+	u64 cur[BNX2X_MCAST_VEC_SZ], req[BNX2X_MCAST_VEC_SZ];
+	struct bnx2x_mcast_mac_elem *pmac_pos, *pmac_pos_n;
+	struct bnx2x_mcast_bin_elem *p_item;
+	struct bnx2x_mcast_elem_group *elem_group;
+	int cnt = 0, mac_cnt = 0, offset = 0, i;
+
+	memset(req, 0, sizeof(u64) * BNX2X_MCAST_VEC_SZ);
+	memcpy(cur, o->registry.aprox_match.vec,
+	       sizeof(u64) * BNX2X_MCAST_VEC_SZ);
+
+	/* Fill `current' with the required set of bins to configure */
+	list_for_each_entry_safe(pmac_pos, pmac_pos_n, &cmd_pos->data.macs_head,
+				 link) {
+		int bin = bnx2x_mcast_bin_from_mac(pmac_pos->mac);
+
+		DP(BNX2X_MSG_SP, "Set contains %pM mcast MAC\n",
+		   pmac_pos->mac);
+
+		BIT_VEC64_SET_BIT(req, bin);
+		list_del(&pmac_pos->link);
+		mac_cnt++;
+	}
+
+	/* We no longer have use for the MACs; Need to re-use memory for
+	 * a list that will be used to configure bins.
+	 */
+	cmd_pos->set_convert = true;
+	INIT_LIST_HEAD(&cmd_pos->data.macs_head);
+	elem_group = list_first_entry(&cmd_pos->group_head,
+				      struct bnx2x_mcast_elem_group,
+				      mcast_group_link);
+	for (i = 0; i < BNX2X_MCAST_BINS_NUM; i++) {
+		bool b_current = !!BIT_VEC64_TEST_BIT(cur, i);
+		bool b_required = !!BIT_VEC64_TEST_BIT(req, i);
+
+		if (b_current == b_required)
+			continue;
+
+		p_item = &elem_group->mcast_elems[offset].bin_elem;
+		p_item->bin = i;
+		p_item->type = b_required ? BNX2X_MCAST_CMD_SET_ADD
+					  : BNX2X_MCAST_CMD_SET_DEL;
+		list_add_tail(&p_item->link , &cmd_pos->data.macs_head);
+		cnt++;
+		offset++;
+		if (offset == MCAST_MAC_ELEMS_PER_PG) {
+			offset = 0;
+			elem_group = list_next_entry(elem_group,
+						     mcast_group_link);
+		}
+	}
+
+	/* We now definitely know how many commands are hiding here.
+	 * Also need to correct the disruption we've added to guarantee this
+	 * would be enqueued.
+	 */
+	o->total_pending_num -= (o->max_cmd_len + mac_cnt);
+	o->total_pending_num += cnt;
+
+	DP(BNX2X_MSG_SP, "o->total_pending_num=%d\n", o->total_pending_num);
+}
+
+static void
+bnx2x_mcast_hdl_pending_set_e2(struct bnx2x *bp,
+			       struct bnx2x_mcast_obj *o,
+			       struct bnx2x_pending_mcast_cmd *cmd_pos,
+			       int *cnt)
+{
+	union bnx2x_mcast_config_data cfg_data = {NULL};
+	struct bnx2x_mcast_bin_elem *p_item, *p_item_n;
+
+	/* This is actually a 2-part scheme - it starts by converting the MACs
+	 * into a list of bins to be added/removed, and correcting the numbers
+	 * on the object. this is now allowed, as we're now sure that all
+	 * previous configured requests have already applied.
+	 * The second part is actually adding rules for the newly introduced
+	 * entries [like all the rest of the hdl_pending functions].
+	 */
+	if (!cmd_pos->set_convert)
+		bnx2x_mcast_hdl_pending_set_e2_convert(bp, o, cmd_pos);
+
+	list_for_each_entry_safe(p_item, p_item_n, &cmd_pos->data.macs_head,
+				 link) {
+		cfg_data.bin = (u8)p_item->bin;
+		o->set_one_rule(bp, o, *cnt, &cfg_data, p_item->type);
+		(*cnt)++;
+
+		list_del(&p_item->link);
+
+		/* Break if we reached the maximum number of rules. */
+		if (*cnt >= o->max_cmd_len)
+			break;
+	}
+
+	/* if no more MACs to configure - we are done */
+	if (list_empty(&cmd_pos->data.macs_head))
+		cmd_pos->done = true;
+}
+
 static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp,
 				struct bnx2x_mcast_ramrod_params *p)
 {
@@ -2955,6 +3136,10 @@
 							   &cnt);
 			break;
 
+		case BNX2X_MCAST_CMD_SET:
+			bnx2x_mcast_hdl_pending_set_e2(bp, o, cmd_pos, &cnt);
+			break;
+
 		default:
 			BNX2X_ERR("Unknown command: %d\n", cmd_pos->type);
 			return -EINVAL;
@@ -2965,6 +3150,7 @@
 		 */
 		if (cmd_pos->done) {
 			list_del(&cmd_pos->link);
+			bnx2x_free_groups(&cmd_pos->group_head);
 			kfree(cmd_pos);
 		}
 
@@ -3095,6 +3281,19 @@
 		o->set_registry_size(o, reg_sz + p->mcast_list_len);
 		break;
 
+	case BNX2X_MCAST_CMD_SET:
+		/* We can only learn how many commands would actually be used
+		 * when this is being configured. So for now, simply guarantee
+		 * the command will be enqueued [to refrain from adding logic
+		 * that handles this and THEN learns it needs several ramrods].
+		 * Just like for ADD/Cont, the mcast_list_len might be an over
+		 * estimation; or even more so, since we don't take into
+		 * account the possibility of removal of existing bins.
+		 */
+		o->set_registry_size(o, reg_sz + p->mcast_list_len);
+		o->total_pending_num += o->max_cmd_len;
+		break;
+
 	default:
 		BNX2X_ERR("Unknown command: %d\n", cmd);
 		return -EINVAL;
@@ -3108,12 +3307,16 @@
 
 static void bnx2x_mcast_revert_e2(struct bnx2x *bp,
 				      struct bnx2x_mcast_ramrod_params *p,
-				      int old_num_bins)
+				  int old_num_bins,
+				  enum bnx2x_mcast_cmd cmd)
 {
 	struct bnx2x_mcast_obj *o = p->mcast_obj;
 
 	o->set_registry_size(o, old_num_bins);
 	o->total_pending_num -= p->mcast_list_len;
+
+	if (cmd == BNX2X_MCAST_CMD_SET)
+		o->total_pending_num -= o->max_cmd_len;
 }
 
 /**
@@ -3223,9 +3426,11 @@
 		bnx2x_mcast_refresh_registry_e2(bp, o);
 
 	/* If CLEAR_ONLY was requested - don't send a ramrod and clear
-	 * RAMROD_PENDING status immediately.
+	 * RAMROD_PENDING status immediately. due to the SET option, it's also
+	 * possible that after evaluating the differences there's no need for
+	 * a ramrod. In that case, we can skip it as well.
 	 */
-	if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags)) {
+	if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags) || !cnt) {
 		raw->clear_pending(raw);
 		return 0;
 	} else {
@@ -3253,6 +3458,11 @@
 				    struct bnx2x_mcast_ramrod_params *p,
 				    enum bnx2x_mcast_cmd cmd)
 {
+	if (cmd == BNX2X_MCAST_CMD_SET) {
+		BNX2X_ERR("Can't use `set' command on e1h!\n");
+		return -EINVAL;
+	}
+
 	/* Mark, that there is a work to do */
 	if ((cmd == BNX2X_MCAST_CMD_DEL) || (cmd == BNX2X_MCAST_CMD_RESTORE))
 		p->mcast_list_len = 1;
@@ -3262,7 +3472,8 @@
 
 static void bnx2x_mcast_revert_e1h(struct bnx2x *bp,
 				       struct bnx2x_mcast_ramrod_params *p,
-				       int old_num_bins)
+				       int old_num_bins,
+				       enum bnx2x_mcast_cmd cmd)
 {
 	/* Do nothing */
 }
@@ -3372,6 +3583,11 @@
 	struct bnx2x_mcast_obj *o = p->mcast_obj;
 	int reg_sz = o->get_registry_size(o);
 
+	if (cmd == BNX2X_MCAST_CMD_SET) {
+		BNX2X_ERR("Can't use `set' command on e1!\n");
+		return -EINVAL;
+	}
+
 	switch (cmd) {
 	/* DEL command deletes all currently configured MACs */
 	case BNX2X_MCAST_CMD_DEL:
@@ -3422,7 +3638,8 @@
 
 static void bnx2x_mcast_revert_e1(struct bnx2x *bp,
 				      struct bnx2x_mcast_ramrod_params *p,
-				      int old_num_macs)
+				   int old_num_macs,
+				   enum bnx2x_mcast_cmd cmd)
 {
 	struct bnx2x_mcast_obj *o = p->mcast_obj;
 
@@ -3572,6 +3789,7 @@
 	}
 
 	list_del(&cmd_pos->link);
+	bnx2x_free_groups(&cmd_pos->group_head);
 	kfree(cmd_pos);
 
 	return cnt;
@@ -3816,7 +4034,7 @@
 	r->clear_pending(r);
 
 error_exit1:
-	o->revert(bp, p, old_reg_size);
+	o->revert(bp, p, old_reg_size, cmd);
 
 	return rc;
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index 4048fc5..0bf2fd4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
@@ -536,6 +536,15 @@
 	BNX2X_MCAST_CMD_CONT,
 	BNX2X_MCAST_CMD_DEL,
 	BNX2X_MCAST_CMD_RESTORE,
+
+	/* Following this, multicast configuration should equal to approx
+	 * the set of MACs provided [i.e., remove all else].
+	 * The two sub-commands are used internally to decide whether a given
+	 * bin is to be added or removed
+	 */
+	BNX2X_MCAST_CMD_SET,
+	BNX2X_MCAST_CMD_SET_ADD,
+	BNX2X_MCAST_CMD_SET_DEL,
 };
 
 struct bnx2x_mcast_obj {
@@ -635,7 +644,8 @@
 	 */
 	void (*revert)(struct bnx2x *bp,
 		       struct bnx2x_mcast_ramrod_params *p,
-		       int old_num_bins);
+		       int old_num_bins,
+		       enum bnx2x_mcast_cmd cmd);
 
 	int (*get_registry_size)(struct bnx2x_mcast_obj *o);
 	void (*set_registry_size)(struct bnx2x_mcast_obj *o, int n);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 632daff..3f77d08 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -573,17 +573,6 @@
 		}
 	}
 
-	/* clear existing mcasts */
-	mcast.mcast_list_len = vf->mcast_list_len;
-	vf->mcast_list_len = mc_num;
-	rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL);
-	if (rc) {
-		BNX2X_ERR("Failed to remove multicasts\n");
-		kfree(mc);
-		return rc;
-	}
-
-	/* update mcast list on the ramrod params */
 	if (mc_num) {
 		INIT_LIST_HEAD(&mcast.mcast_list);
 		for (i = 0; i < mc_num; i++) {
@@ -594,12 +583,18 @@
 
 		/* add new mcasts */
 		mcast.mcast_list_len = mc_num;
-		rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_ADD);
+		rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_SET);
 		if (rc)
-			BNX2X_ERR("Faled to add multicasts\n");
-		kfree(mc);
+			BNX2X_ERR("Faled to set multicasts\n");
+	} else {
+		/* clear existing mcasts */
+		rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL);
+		if (rc)
+			BNX2X_ERR("Failed to remove multicasts\n");
 	}
 
+	kfree(mc);
+
 	return rc;
 }
 
@@ -1583,7 +1578,6 @@
 		 *  It needs to be initialized here so that it can be safely
 		 *  handled by a subsequent FLR flow.
 		 */
-		vf->mcast_list_len = 0;
 		bnx2x_init_mcast_obj(bp, &vf->mcast_obj, 0xFF,
 				     0xFF, 0xFF, 0xFF,
 				     bnx2x_vf_sp(bp, vf, mcast_rdata),
@@ -2527,7 +2521,8 @@
 	for_each_vf(bp, vfidx) {
 		bulletin = BP_VF_BULLETIN(bp, vfidx);
 		if (bulletin->valid_bitmap & (1 << VLAN_VALID))
-			bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0);
+			bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0,
+					  htons(ETH_P_8021Q));
 	}
 }
 
@@ -2787,7 +2782,8 @@
 	return 0;
 }
 
-int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos)
+int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos,
+		      __be16 vlan_proto)
 {
 	struct pf_vf_bulletin_content *bulletin = NULL;
 	struct bnx2x *bp = netdev_priv(dev);
@@ -2802,6 +2798,9 @@
 		return -EINVAL;
 	}
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	DP(BNX2X_MSG_IOV, "configuring VF %d with VLAN %d qos %d\n",
 	   vfidx, vlan, 0);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 670a581..7a6d406 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -195,7 +195,6 @@
 	int leading_rss;
 
 	/* MCAST object */
-	int mcast_list_len;
 	struct bnx2x_mcast_obj		mcast_obj;
 
 	/* RSS configuration object */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2cf7910..a9f9f37 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -32,6 +32,7 @@
 #include <linux/mii.h>
 #include <linux/if.h>
 #include <linux/if_vlan.h>
+#include <linux/rtc.h>
 #include <net/ip.h>
 #include <net/tcp.h>
 #include <net/udp.h>
@@ -93,50 +94,49 @@
 	BCM57404_NPAR,
 	BCM57406_NPAR,
 	BCM57407_SFP,
+	BCM57407_NPAR,
 	BCM57414_NPAR,
 	BCM57416_NPAR,
-	BCM57304_VF,
-	BCM57404_VF,
-	BCM57414_VF,
-	BCM57314_VF,
+	NETXTREME_E_VF,
+	NETXTREME_C_VF,
 };
 
 /* indexed by enum above */
 static const struct {
 	char *name;
 } board_info[] = {
-	{ "Broadcom BCM57301 NetXtreme-C Single-port 10Gb Ethernet" },
-	{ "Broadcom BCM57302 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57304 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	{ "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" },
+	{ "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" },
+	{ "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
 	{ "Broadcom BCM57417 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM58700 Nitro 4-port 1Gb/2.5Gb/10Gb Ethernet" },
-	{ "Broadcom BCM57311 NetXtreme-C Single-port 10Gb Ethernet" },
-	{ "Broadcom BCM57312 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57402 NetXtreme-E Dual-port 10Gb Ethernet" },
-	{ "Broadcom BCM57404 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57406 NetXtreme-E Dual-port 10GBase-T Ethernet" },
+	{ "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" },
+	{ "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" },
+	{ "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" },
+	{ "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" },
+	{ "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" },
+	{ "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" },
 	{ "Broadcom BCM57402 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57407 NetXtreme-E Dual-port 10GBase-T Ethernet" },
-	{ "Broadcom BCM57412 NetXtreme-E Dual-port 10Gb Ethernet" },
-	{ "Broadcom BCM57414 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57416 NetXtreme-E Dual-port 10GBase-T Ethernet" },
-	{ "Broadcom BCM57417 NetXtreme-E Dual-port 10GBase-T Ethernet" },
+	{ "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" },
+	{ "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" },
+	{ "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" },
+	{ "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" },
+	{ "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" },
 	{ "Broadcom BCM57412 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57314 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" },
-	{ "Broadcom BCM57417 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" },
-	{ "Broadcom BCM57416 NetXtreme-E Dual-port 10Gb Ethernet" },
+	{ "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" },
+	{ "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" },
+	{ "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" },
 	{ "Broadcom BCM57404 NetXtreme-E Ethernet Partition" },
 	{ "Broadcom BCM57406 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57407 NetXtreme-E Dual-port 25Gb Ethernet" },
+	{ "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" },
+	{ "Broadcom BCM57407 NetXtreme-E Ethernet Partition" },
 	{ "Broadcom BCM57414 NetXtreme-E Ethernet Partition" },
 	{ "Broadcom BCM57416 NetXtreme-E Ethernet Partition" },
-	{ "Broadcom BCM57304 NetXtreme-C Ethernet Virtual Function" },
-	{ "Broadcom BCM57404 NetXtreme-E Ethernet Virtual Function" },
-	{ "Broadcom BCM57414 NetXtreme-E Ethernet Virtual Function" },
-	{ "Broadcom BCM57314 NetXtreme-E Ethernet Virtual Function" },
+	{ "Broadcom NetXtreme-E Ethernet Virtual Function" },
+	{ "Broadcom NetXtreme-C Ethernet Virtual Function" },
 };
 
 static const struct pci_device_id bnxt_pci_tbl[] = {
+	{ PCI_VDEVICE(BROADCOM, 0x16c0), .driver_data = BCM57417_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 },
 	{ PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 },
 	{ PCI_VDEVICE(BROADCOM, 0x16ca), .driver_data = BCM57304 },
@@ -160,13 +160,19 @@
 	{ PCI_VDEVICE(BROADCOM, 0x16e7), .driver_data = BCM57404_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16e8), .driver_data = BCM57406_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16e9), .driver_data = BCM57407_SFP },
+	{ PCI_VDEVICE(BROADCOM, 0x16ea), .driver_data = BCM57407_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16eb), .driver_data = BCM57412_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16ec), .driver_data = BCM57414_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16ed), .driver_data = BCM57414_NPAR },
 	{ PCI_VDEVICE(BROADCOM, 0x16ee), .driver_data = BCM57416_NPAR },
+	{ PCI_VDEVICE(BROADCOM, 0x16ef), .driver_data = BCM57416_NPAR },
 #ifdef CONFIG_BNXT_SRIOV
-	{ PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = BCM57304_VF },
-	{ PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = BCM57404_VF },
-	{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = BCM57414_VF },
-	{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = BCM57314_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF },
+	{ PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF },
 #endif
 	{ 0 }
 };
@@ -189,8 +195,7 @@
 
 static bool bnxt_vf_pciid(enum board_idx idx)
 {
-	return (idx == BCM57304_VF || idx == BCM57404_VF ||
-		idx == BCM57314_VF || idx == BCM57414_VF);
+	return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF);
 }
 
 #define DB_CP_REARM_FLAGS	(DB_KEY_CP | DB_IDX_VALID)
@@ -353,8 +358,8 @@
 		push_len = (length + sizeof(*tx_push) + 7) / 8;
 		if (push_len > 16) {
 			__iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
-			__iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
-					 push_len - 16);
+			__iowrite32_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
+					 (push_len - 16) << 1);
 		} else {
 			__iowrite64_copy(txr->tx_doorbell, tx_push_buf,
 					 push_len);
@@ -3419,10 +3424,10 @@
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
 	if (set_rss) {
-		vnic->hash_type = BNXT_RSS_HASH_TYPE_FLAG_IPV4 |
-				 BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 |
-				 BNXT_RSS_HASH_TYPE_FLAG_IPV6 |
-				 BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6;
+		vnic->hash_type = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 |
+				  VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 |
+				  VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
+				  VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
 
 		req.hash_type = cpu_to_le32(vnic->hash_type);
 
@@ -4156,6 +4161,11 @@
 	if (rc)
 		goto hwrm_func_qcaps_exit;
 
+	bp->tx_push_thresh = 0;
+	if (resp->flags &
+	    cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED))
+		bp->tx_push_thresh = BNXT_TX_PUSH_THRESH;
+
 	if (BNXT_PF(bp)) {
 		struct bnxt_pf_info *pf = &bp->pf;
 
@@ -4187,12 +4197,6 @@
 		struct bnxt_vf_info *vf = &bp->vf;
 
 		vf->fw_fid = le16_to_cpu(resp->fid);
-		memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN);
-		if (is_valid_ether_addr(vf->mac_addr))
-			/* overwrite netdev dev_adr with admin VF MAC */
-			memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
-		else
-			random_ether_addr(bp->dev->dev_addr);
 
 		vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx);
 		vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings);
@@ -4204,14 +4208,21 @@
 		vf->max_l2_ctxs = le16_to_cpu(resp->max_l2_ctxs);
 		vf->max_vnics = le16_to_cpu(resp->max_vnics);
 		vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx);
+
+		memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN);
+		mutex_unlock(&bp->hwrm_cmd_lock);
+
+		if (is_valid_ether_addr(vf->mac_addr)) {
+			/* overwrite netdev dev_adr with admin VF MAC */
+			memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN);
+		} else {
+			random_ether_addr(bp->dev->dev_addr);
+			rc = bnxt_approve_mac(bp, bp->dev->dev_addr);
+		}
+		return rc;
 #endif
 	}
 
-	bp->tx_push_thresh = 0;
-	if (resp->flags &
-	    cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED))
-		bp->tx_push_thresh = BNXT_TX_PUSH_THRESH;
-
 hwrm_func_qcaps_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -4249,6 +4260,9 @@
 	if (bp->max_tc > BNXT_MAX_QUEUE)
 		bp->max_tc = BNXT_MAX_QUEUE;
 
+	if (resp->queue_cfg_info & QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG)
+		bp->max_tc = 1;
+
 	qptr = &resp->queue_id0;
 	for (i = 0; i < bp->max_tc; i++) {
 		bp->q_info[i].queue_id = *qptr++;
@@ -4307,6 +4321,31 @@
 	return rc;
 }
 
+int bnxt_hwrm_fw_set_time(struct bnxt *bp)
+{
+#if IS_ENABLED(CONFIG_RTC_LIB)
+	struct hwrm_fw_set_time_input req = {0};
+	struct rtc_time tm;
+	struct timeval tv;
+
+	if (bp->hwrm_spec_code < 0x10400)
+		return -EOPNOTSUPP;
+
+	do_gettimeofday(&tv);
+	rtc_time_to_tm(tv.tv_sec, &tm);
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1);
+	req.year = cpu_to_le16(1900 + tm.tm_year);
+	req.month = 1 + tm.tm_mon;
+	req.day = tm.tm_mday;
+	req.hour = tm.tm_hour;
+	req.minute = tm.tm_min;
+	req.second = tm.tm_sec;
+	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+#else
+	return -EOPNOTSUPP;
+#endif
+}
+
 static int bnxt_hwrm_port_qstats(struct bnxt *bp)
 {
 	int rc;
@@ -6804,6 +6843,8 @@
 	if (rc)
 		goto init_err;
 
+	bnxt_hwrm_fw_set_time(bp);
+
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
 			   NETIF_F_TSO | NETIF_F_TSO6 |
 			   NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 23e04a6..51b164a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -11,10 +11,10 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME		"bnxt_en"
-#define DRV_MODULE_VERSION	"1.3.0"
+#define DRV_MODULE_VERSION	"1.5.0"
 
 #define DRV_VER_MAJ	1
-#define DRV_VER_MIN	3
+#define DRV_VER_MIN	5
 #define DRV_VER_UPD	0
 
 struct tx_bd {
@@ -106,11 +106,11 @@
 	 #define CMP_TYPE_REMOTE_DRIVER_REQ			 34
 	 #define CMP_TYPE_REMOTE_DRIVER_RESP			 36
 	 #define CMP_TYPE_ERROR_STATUS				 48
-	 #define CMPL_BASE_TYPE_STAT_EJECT			 (0x1aUL << 0)
-	 #define CMPL_BASE_TYPE_HWRM_DONE			 (0x20UL << 0)
-	 #define CMPL_BASE_TYPE_HWRM_FWD_REQ			 (0x22UL << 0)
-	 #define CMPL_BASE_TYPE_HWRM_FWD_RESP			 (0x24UL << 0)
-	 #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT		 (0x2eUL << 0)
+	 #define CMPL_BASE_TYPE_STAT_EJECT			 0x1aUL
+	 #define CMPL_BASE_TYPE_HWRM_DONE			 0x20UL
+	 #define CMPL_BASE_TYPE_HWRM_FWD_REQ			 0x22UL
+	 #define CMPL_BASE_TYPE_HWRM_FWD_RESP			 0x24UL
+	 #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT		 0x2eUL
 
 	#define TX_CMP_FLAGS_ERROR				(1 << 6)
 	#define TX_CMP_FLAGS_PUSH				(1 << 7)
@@ -389,11 +389,6 @@
 
 #define INVALID_HW_RING_ID	((u16)-1)
 
-#define BNXT_RSS_HASH_TYPE_FLAG_IPV4		0x01
-#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4	0x02
-#define BNXT_RSS_HASH_TYPE_FLAG_IPV6		0x04
-#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6	0x08
-
 /* The hardware supports certain page sizes.  Use the supported page sizes
  * to allocate the rings.
  */
@@ -418,7 +413,7 @@
 
 #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT)
 
-#define BNXT_MIN_PKT_SIZE	45
+#define BNXT_MIN_PKT_SIZE	52
 
 #define BNXT_NUM_TESTS(bp)	0
 
@@ -1225,6 +1220,7 @@
 int bnxt_hwrm_func_qcaps(struct bnxt *);
 int bnxt_hwrm_set_pause(struct bnxt *);
 int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
+int bnxt_hwrm_fw_set_time(struct bnxt *);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_close_nic(struct bnxt *, bool, bool);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index b83e174..a7e04ff 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -21,6 +21,8 @@
 #include "bnxt_nvm_defs.h"	/* NVRAM content constant and structure defs */
 #include "bnxt_fw_hdr.h"	/* Firmware hdr constant and structure defs */
 #define FLASH_NVRAM_TIMEOUT	((HWRM_CMD_TIMEOUT) * 100)
+#define FLASH_PACKAGE_TIMEOUT	((HWRM_CMD_TIMEOUT) * 200)
+#define INSTALL_PACKAGE_TIMEOUT	((HWRM_CMD_TIMEOUT) * 200)
 
 static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen);
 
@@ -346,7 +348,7 @@
 	int max_rx_rings, max_tx_rings, tcs;
 
 	bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, true);
-	channel->max_combined = max_rx_rings;
+	channel->max_combined = max_t(int, max_rx_rings, max_tx_rings);
 
 	if (bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, false)) {
 		max_rx_rings = 0;
@@ -404,8 +406,8 @@
 	if (tcs > 1)
 		max_tx_rings /= tcs;
 
-	if (sh && (channel->combined_count > max_rx_rings ||
-		   channel->combined_count > max_tx_rings))
+	if (sh &&
+	    channel->combined_count > max_t(int, max_rx_rings, max_tx_rings))
 		return -ENOMEM;
 
 	if (!sh && (channel->rx_count > max_rx_rings ||
@@ -428,8 +430,10 @@
 
 	if (sh) {
 		bp->flags |= BNXT_FLAG_SHARED_RINGS;
-		bp->rx_nr_rings = channel->combined_count;
-		bp->tx_nr_rings_per_tc = channel->combined_count;
+		bp->rx_nr_rings = min_t(int, channel->combined_count,
+					max_rx_rings);
+		bp->tx_nr_rings_per_tc = min_t(int, channel->combined_count,
+					       max_tx_rings);
 	} else {
 		bp->flags &= ~BNXT_FLAG_SHARED_RINGS;
 		bp->rx_nr_rings = channel->rx_count;
@@ -1028,6 +1032,10 @@
 	return bp->link_info.link_up;
 }
 
+static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
+				u16 ext, u16 *index, u32 *item_length,
+				u32 *data_length);
+
 static int bnxt_flash_nvram(struct net_device *dev,
 			    u16 dir_type,
 			    u16 dir_ordinal,
@@ -1179,7 +1187,6 @@
 			   (unsigned long)calculated_crc);
 		return -EINVAL;
 	}
-	/* TODO: Validate digital signature (RSA-encrypted SHA-256 hash) here */
 	rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
 			      0, 0, fw_data, fw_size);
 	if (rc == 0)	/* Firmware update successful */
@@ -1188,6 +1195,57 @@
 	return rc;
 }
 
+static int bnxt_flash_microcode(struct net_device *dev,
+				u16 dir_type,
+				const u8 *fw_data,
+				size_t fw_size)
+{
+	struct bnxt_ucode_trailer *trailer;
+	u32 calculated_crc;
+	u32 stored_crc;
+	int rc = 0;
+
+	if (fw_size < sizeof(struct bnxt_ucode_trailer)) {
+		netdev_err(dev, "Invalid microcode file size: %u\n",
+			   (unsigned int)fw_size);
+		return -EINVAL;
+	}
+	trailer = (struct bnxt_ucode_trailer *)(fw_data + (fw_size -
+						sizeof(*trailer)));
+	if (trailer->sig != cpu_to_le32(BNXT_UCODE_TRAILER_SIGNATURE)) {
+		netdev_err(dev, "Invalid microcode trailer signature: %08X\n",
+			   le32_to_cpu(trailer->sig));
+		return -EINVAL;
+	}
+	if (le16_to_cpu(trailer->dir_type) != dir_type) {
+		netdev_err(dev, "Expected microcode type: %d, read: %d\n",
+			   dir_type, le16_to_cpu(trailer->dir_type));
+		return -EINVAL;
+	}
+	if (le16_to_cpu(trailer->trailer_length) <
+		sizeof(struct bnxt_ucode_trailer)) {
+		netdev_err(dev, "Invalid microcode trailer length: %d\n",
+			   le16_to_cpu(trailer->trailer_length));
+		return -EINVAL;
+	}
+
+	/* Confirm the CRC32 checksum of the file: */
+	stored_crc = le32_to_cpu(*(__le32 *)(fw_data + fw_size -
+					     sizeof(stored_crc)));
+	calculated_crc = ~crc32(~0, fw_data, fw_size - sizeof(stored_crc));
+	if (calculated_crc != stored_crc) {
+		netdev_err(dev,
+			   "CRC32 (%08lX) does not match calculated: %08lX\n",
+			   (unsigned long)stored_crc,
+			   (unsigned long)calculated_crc);
+		return -EINVAL;
+	}
+	rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
+			      0, 0, fw_data, fw_size);
+
+	return rc;
+}
+
 static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type)
 {
 	switch (dir_type) {
@@ -1206,7 +1264,7 @@
 	return false;
 }
 
-static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type)
+static bool bnxt_dir_type_is_other_exec_format(u16 dir_type)
 {
 	switch (dir_type) {
 	case BNX_DIR_TYPE_AVS:
@@ -1227,7 +1285,7 @@
 static bool bnxt_dir_type_is_executable(u16 dir_type)
 {
 	return bnxt_dir_type_is_ape_bin_format(dir_type) ||
-		bnxt_dir_type_is_unprotected_exec_format(dir_type);
+		bnxt_dir_type_is_other_exec_format(dir_type);
 }
 
 static int bnxt_flash_firmware_from_file(struct net_device *dev,
@@ -1237,10 +1295,6 @@
 	const struct firmware  *fw;
 	int			rc;
 
-	if (dir_type != BNX_DIR_TYPE_UPDATE &&
-	    bnxt_dir_type_is_executable(dir_type) == false)
-		return -EINVAL;
-
 	rc = request_firmware(&fw, filename, &dev->dev);
 	if (rc != 0) {
 		netdev_err(dev, "Error %d requesting firmware file: %s\n",
@@ -1249,6 +1303,8 @@
 	}
 	if (bnxt_dir_type_is_ape_bin_format(dir_type) == true)
 		rc = bnxt_flash_firmware(dev, dir_type, fw->data, fw->size);
+	else if (bnxt_dir_type_is_other_exec_format(dir_type) == true)
+		rc = bnxt_flash_microcode(dev, dir_type, fw->data, fw->size);
 	else
 		rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
 				      0, 0, fw->data, fw->size);
@@ -1257,10 +1313,83 @@
 }
 
 static int bnxt_flash_package_from_file(struct net_device *dev,
-					char *filename)
+					char *filename, u32 install_type)
 {
-	netdev_err(dev, "packages are not yet supported\n");
-	return -EINVAL;
+	struct bnxt *bp = netdev_priv(dev);
+	struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_nvm_install_update_input install = {0};
+	const struct firmware *fw;
+	u32 item_len;
+	u16 index;
+	int rc;
+
+	bnxt_hwrm_fw_set_time(bp);
+
+	if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_UPDATE,
+				 BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE,
+				 &index, &item_len, NULL) != 0) {
+		netdev_err(dev, "PKG update area not created in nvram\n");
+		return -ENOBUFS;
+	}
+
+	rc = request_firmware(&fw, filename, &dev->dev);
+	if (rc != 0) {
+		netdev_err(dev, "PKG error %d requesting file: %s\n",
+			   rc, filename);
+		return rc;
+	}
+
+	if (fw->size > item_len) {
+		netdev_err(dev, "PKG insufficient update area in nvram: %lu",
+			   (unsigned long)fw->size);
+		rc = -EFBIG;
+	} else {
+		dma_addr_t dma_handle;
+		u8 *kmem;
+		struct hwrm_nvm_modify_input modify = {0};
+
+		bnxt_hwrm_cmd_hdr_init(bp, &modify, HWRM_NVM_MODIFY, -1, -1);
+
+		modify.dir_idx = cpu_to_le16(index);
+		modify.len = cpu_to_le32(fw->size);
+
+		kmem = dma_alloc_coherent(&bp->pdev->dev, fw->size,
+					  &dma_handle, GFP_KERNEL);
+		if (!kmem) {
+			netdev_err(dev,
+				   "dma_alloc_coherent failure, length = %u\n",
+				   (unsigned int)fw->size);
+			rc = -ENOMEM;
+		} else {
+			memcpy(kmem, fw->data, fw->size);
+			modify.host_src_addr = cpu_to_le64(dma_handle);
+
+			rc = hwrm_send_message(bp, &modify, sizeof(modify),
+					       FLASH_PACKAGE_TIMEOUT);
+			dma_free_coherent(&bp->pdev->dev, fw->size, kmem,
+					  dma_handle);
+		}
+	}
+	release_firmware(fw);
+	if (rc)
+		return rc;
+
+	if ((install_type & 0xffff) == 0)
+		install_type >>= 16;
+	bnxt_hwrm_cmd_hdr_init(bp, &install, HWRM_NVM_INSTALL_UPDATE, -1, -1);
+	install.install_type = cpu_to_le32(install_type);
+
+	rc = hwrm_send_message(bp, &install, sizeof(install),
+			       INSTALL_PACKAGE_TIMEOUT);
+	if (rc)
+		return -EOPNOTSUPP;
+
+	if (resp->result) {
+		netdev_err(dev, "PKG install error = %d, problem_item = %d\n",
+			   (s8)resp->result, (int)resp->problem_item);
+		return -ENOPKG;
+	}
+	return 0;
 }
 
 static int bnxt_flash_device(struct net_device *dev,
@@ -1271,8 +1400,10 @@
 		return -EINVAL;
 	}
 
-	if (flash->region == ETHTOOL_FLASH_ALL_REGIONS)
-		return bnxt_flash_package_from_file(dev, flash->data);
+	if (flash->region == ETHTOOL_FLASH_ALL_REGIONS ||
+	    flash->region > 0xffff)
+		return bnxt_flash_package_from_file(dev, flash->data,
+						    flash->region);
 
 	return bnxt_flash_firmware_from_file(dev, flash->region, flash->data);
 }
@@ -1516,7 +1647,7 @@
 
 	/* Create or re-write an NVM item: */
 	if (bnxt_dir_type_is_executable(type) == true)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	ext = eeprom->magic & 0xffff;
 	ordinal = eeprom->offset >> 16;
 	attr = eeprom->offset & 0xffff;
@@ -1718,6 +1849,25 @@
 	return rc;
 }
 
+static int bnxt_nway_reset(struct net_device *dev)
+{
+	int rc = 0;
+
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_link_info *link_info = &bp->link_info;
+
+	if (!BNXT_SINGLE_PF(bp))
+		return -EOPNOTSUPP;
+
+	if (!(link_info->autoneg & BNXT_AUTONEG_SPEED))
+		return -EINVAL;
+
+	if (netif_running(dev))
+		rc = bnxt_hwrm_set_link_setting(bp, true, false);
+
+	return rc;
+}
+
 const struct ethtool_ops bnxt_ethtool_ops = {
 	.get_link_ksettings	= bnxt_get_link_ksettings,
 	.set_link_ksettings	= bnxt_set_link_ksettings,
@@ -1750,4 +1900,5 @@
 	.set_eee		= bnxt_set_eee,
 	.get_module_info	= bnxt_get_module_info,
 	.get_module_eeprom	= bnxt_get_module_eeprom,
+	.nway_reset		= bnxt_nway_reset
 };
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
index 82bf44a..cad30dd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h
@@ -11,6 +11,7 @@
 #define __BNXT_FW_HDR_H__
 
 #define BNXT_FIRMWARE_BIN_SIGNATURE     0x1a4d4342	/* "BCM"+0x1a */
+#define BNXT_UCODE_TRAILER_SIGNATURE	0x726c7254	/* "Trlr" */
 
 enum SUPPORTED_FAMILY {
 	DEVICE_5702_3_4_FAMILY,		/* 0  - Denali, Vinson, K2 */
@@ -85,7 +86,7 @@
 
 struct bnxt_fw_header {
 	__le32 signature;	/* constains the constant value of
-				 * BNXT_Firmware_Bin_Signatures
+				 * BNXT_FIRMWARE_BIN_SIGNATURE
 				 */
 	u8 flags;		/* reserved for ChiMP use */
 	u8 code_type;		/* enum SUPPORTED_CODE */
@@ -102,4 +103,17 @@
 	u8 major_ver;
 };
 
+/* Microcode and pre-boot software/firmware trailer: */
+struct bnxt_ucode_trailer {
+	u8 rsa_sig[256];
+	__le16 flags;
+	u8 version_format;
+	u8 version_length;
+	u8 version[16];
+	__le16 dir_type;
+	__le16 trailer_length;
+	__le32 sig;		/* BNXT_UCODE_TRAILER_SIGNATURE */
+	__le32 chksum;		/* CRC-32 */
+};
+
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 517567f..04a96cc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -39,7 +39,7 @@
 	__le16 type;
 	#define EJECT_CMPL_TYPE_MASK				    0x3fUL
 	#define EJECT_CMPL_TYPE_SFT				    0
-	#define EJECT_CMPL_TYPE_STAT_EJECT			   (0x1aUL << 0)
+	#define EJECT_CMPL_TYPE_STAT_EJECT			   0x1aUL
 	__le16 len;
 	__le32 opaque;
 	__le32 v;
@@ -52,7 +52,7 @@
 	__le16 type;
 	#define HWRM_CMPL_TYPE_MASK				    0x3fUL
 	#define HWRM_CMPL_TYPE_SFT				    0
-	#define HWRM_CMPL_TYPE_HWRM_DONE			   (0x20UL << 0)
+	#define HWRM_CMPL_TYPE_HWRM_DONE			   0x20UL
 	__le16 sequence_id;
 	__le32 unused_1;
 	__le32 v;
@@ -65,7 +65,7 @@
 	__le16 req_len_type;
 	#define HWRM_FWD_REQ_CMPL_TYPE_MASK			    0x3fUL
 	#define HWRM_FWD_REQ_CMPL_TYPE_SFT			    0
-	#define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ		   (0x22UL << 0)
+	#define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ		   0x22UL
 	#define HWRM_FWD_REQ_CMPL_REQ_LEN_MASK			    0xffc0UL
 	#define HWRM_FWD_REQ_CMPL_REQ_LEN_SFT			    6
 	__le16 source_id;
@@ -81,7 +81,7 @@
 	__le16 type;
 	#define HWRM_FWD_RESP_CMPL_TYPE_MASK			    0x3fUL
 	#define HWRM_FWD_RESP_CMPL_TYPE_SFT			    0
-	#define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP		   (0x24UL << 0)
+	#define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP		   0x24UL
 	__le16 source_id;
 	__le16 resp_len;
 	__le16 unused_1;
@@ -96,25 +96,26 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_TYPE_MASK		    0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_TYPE_SFT			    0
-	#define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT       (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT       0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE    (0x1UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE  (0x2UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE  (0x3UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD   (0x10UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD     (0x11UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD     (0x20UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD       (0x21UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR		   (0x30UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE      (0x33UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR	   (0xffUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE 0x0UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE    0x1UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE  0x2UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE  0x3UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE 0x7UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD   0x10UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD     0x11UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD     0x20UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD       0x21UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR		   0x30UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE      0x33UL
+	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR	   0xffUL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_V			    0x1UL
@@ -130,9 +131,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_MASK 0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_SFT  0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE 0x0UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_V	    0x1UL
@@ -156,9 +157,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_MASK    0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_SFT     0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE 0x1UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_V	    0x1UL
@@ -176,9 +177,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_MASK  0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_SFT   0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE 0x2UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_V	    0x1UL
@@ -200,8 +201,7 @@
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_40GB (0x190UL << 1)
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_50GB (0x1f4UL << 1)
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB (0x3e8UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB (0xffffUL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST    HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST    HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0000UL
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_SFT 16
 };
@@ -211,9 +211,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_MASK  0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_SFT   0
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_V	    0x1UL
@@ -231,9 +231,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_MASK 0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_V      0x1UL
@@ -258,9 +258,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_MASK 0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_V 0x1UL
@@ -278,9 +278,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_MASK 0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_V      0x1UL
@@ -300,9 +300,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_MASK   0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_SFT    0
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_V	    0x1UL
@@ -320,9 +320,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_MASK     0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_SFT      0
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD 0x11UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_V		    0x1UL
@@ -340,9 +340,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_MASK     0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_SFT      0
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD 0x20UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_V		    0x1UL
@@ -362,9 +362,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_MASK       0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_SFT	    0
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD 0x21UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_V		    0x1UL
@@ -384,9 +384,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_MASK		    0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_SFT		    0
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR      (0x30UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR      0x30UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_V			    0x1UL
@@ -404,9 +404,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_MASK 0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_SFT  0
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_V	    0x1UL
@@ -424,9 +424,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_MASK 0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_V   0x1UL
@@ -443,9 +443,9 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_MASK      0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_SFT       0
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL
 	__le32 event_data2;
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V		    0x1UL
@@ -465,15 +465,15 @@
 	__le16 type;
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK	    0x3fUL
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT	    0
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR (0xffUL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR 0xffUL
 	__le32 event_data2;
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK 0xffUL
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING (0x0UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL (0x1UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL (0x2UL << 0)
+	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING 0x0UL
+	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL 0x1UL
+	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL 0x2UL
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST    HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL
 	u8 opaque_v;
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_V		    0x1UL
@@ -485,12 +485,12 @@
 	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL
 };
 
-/* HW Resource Manager Specification 1.3.0 */
+/* HW Resource Manager Specification 1.5.1 */
 #define HWRM_VERSION_MAJOR	1
-#define HWRM_VERSION_MINOR	3
-#define HWRM_VERSION_UPDATE	0
+#define HWRM_VERSION_MINOR	5
+#define HWRM_VERSION_UPDATE	1
 
-#define HWRM_VERSION_STR	"1.3.0"
+#define HWRM_VERSION_STR	"1.5.1"
 /*
  * Following is the signature for HWRM message field that indicates not
  * applicable (All F's). Need to cast it the size of the field if needed.
@@ -556,8 +556,8 @@
 	#define HWRM_QUEUE_QPORTCFG				   (0x30UL)
 	#define HWRM_QUEUE_QCFG				   (0x31UL)
 	#define HWRM_QUEUE_CFG					   (0x32UL)
-	#define HWRM_QUEUE_BUFFERS_QCFG			   (0x33UL)
-	#define HWRM_QUEUE_BUFFERS_CFG				   (0x34UL)
+	#define RESERVED2					   (0x33UL)
+	#define RESERVED3					   (0x34UL)
 	#define HWRM_QUEUE_PFCENABLE_QCFG			   (0x35UL)
 	#define HWRM_QUEUE_PFCENABLE_CFG			   (0x36UL)
 	#define HWRM_QUEUE_PRI2COS_QCFG			   (0x37UL)
@@ -574,6 +574,7 @@
 	#define HWRM_VNIC_RSS_QCFG				   (0x47UL)
 	#define HWRM_VNIC_PLCMODES_CFG				   (0x48UL)
 	#define HWRM_VNIC_PLCMODES_QCFG			   (0x49UL)
+	#define HWRM_VNIC_QCAPS				   (0x4aUL)
 	#define HWRM_RING_ALLOC				   (0x50UL)
 	#define HWRM_RING_FREE					   (0x51UL)
 	#define HWRM_RING_CMPL_RING_QAGGINT_PARAMS		   (0x52UL)
@@ -581,13 +582,15 @@
 	#define HWRM_RING_RESET				   (0x5eUL)
 	#define HWRM_RING_GRP_ALLOC				   (0x60UL)
 	#define HWRM_RING_GRP_FREE				   (0x61UL)
+	#define RESERVED5					   (0x64UL)
+	#define RESERVED6					   (0x65UL)
 	#define HWRM_VNIC_RSS_COS_LB_CTX_ALLOC			   (0x70UL)
 	#define HWRM_VNIC_RSS_COS_LB_CTX_FREE			   (0x71UL)
 	#define HWRM_CFA_L2_FILTER_ALLOC			   (0x90UL)
 	#define HWRM_CFA_L2_FILTER_FREE			   (0x91UL)
 	#define HWRM_CFA_L2_FILTER_CFG				   (0x92UL)
 	#define HWRM_CFA_L2_SET_RX_MASK			   (0x93UL)
-	#define RESERVED3					   (0x94UL)
+	#define RESERVED4					   (0x94UL)
 	#define HWRM_CFA_TUNNEL_FILTER_ALLOC			   (0x95UL)
 	#define HWRM_CFA_TUNNEL_FILTER_FREE			   (0x96UL)
 	#define HWRM_CFA_ENCAP_RECORD_ALLOC			   (0x97UL)
@@ -607,6 +610,8 @@
 	#define HWRM_STAT_CTX_CLR_STATS			   (0xb3UL)
 	#define HWRM_FW_RESET					   (0xc0UL)
 	#define HWRM_FW_QSTATUS				   (0xc1UL)
+	#define HWRM_FW_SET_TIME				   (0xc8UL)
+	#define HWRM_FW_GET_TIME				   (0xc9UL)
 	#define HWRM_EXEC_FWD_RESP				   (0xd0UL)
 	#define HWRM_REJECT_FWD_RESP				   (0xd1UL)
 	#define HWRM_FWD_RESP					   (0xd2UL)
@@ -615,11 +620,13 @@
 	#define HWRM_WOL_FILTER_ALLOC				   (0xf0UL)
 	#define HWRM_WOL_FILTER_FREE				   (0xf1UL)
 	#define HWRM_WOL_FILTER_QCFG				   (0xf2UL)
+	#define HWRM_WOL_REASON_QCFG				   (0xf3UL)
 	#define HWRM_DBG_READ_DIRECT				   (0xff10UL)
 	#define HWRM_DBG_READ_INDIRECT				   (0xff11UL)
 	#define HWRM_DBG_WRITE_DIRECT				   (0xff12UL)
 	#define HWRM_DBG_WRITE_INDIRECT			   (0xff13UL)
 	#define HWRM_DBG_DUMP					   (0xff14UL)
+	#define HWRM_NVM_INSTALL_UPDATE			   (0xfff3UL)
 	#define HWRM_NVM_MODIFY				   (0xfff4UL)
 	#define HWRM_NVM_VERIFY_UPDATE				   (0xfff5UL)
 	#define HWRM_NVM_GET_DEV_INFO				   (0xfff6UL)
@@ -824,7 +831,9 @@
 	u8 netctrl_fw_min;
 	u8 netctrl_fw_bld;
 	u8 netctrl_fw_rsvd;
-	__le32 reserved1;
+	__le32 dev_caps_cfg;
+	#define VER_GET_RESP_DEV_CAPS_CFG_SECURE_FW_UPD_SUPPORTED  0x1UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_FW_DCBX_AGENT_SUPPORTED  0x2UL
 	u8 roce_fw_maj;
 	u8 roce_fw_min;
 	u8 roce_fw_bld;
@@ -839,9 +848,9 @@
 	u8 chip_metal;
 	u8 chip_bond_id;
 	u8 chip_platform_type;
-	#define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC		   (0x0UL << 0)
-	#define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA		   (0x1UL << 0)
-	#define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM	   (0x2UL << 0)
+	#define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC		   0x0UL
+	#define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA		   0x1UL
+	#define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM	   0x2UL
 	__le16 max_req_win_len;
 	__le16 max_resp_len;
 	__le16 def_req_timeout;
@@ -863,10 +872,10 @@
 	#define FUNC_RESET_REQ_ENABLES_VF_ID_VALID		    0x1UL
 	__le16 vf_id;
 	u8 func_reset_level;
-	#define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL	   (0x0UL << 0)
-	#define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME	   (0x1UL << 0)
-	#define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN     (0x2UL << 0)
-	#define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF	   (0x3UL << 0)
+	#define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL	   0x0UL
+	#define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME	   0x1UL
+	#define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN     0x2UL
+	#define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF	   0x3UL
 	u8 unused_0;
 };
 
@@ -1028,6 +1037,10 @@
 	#define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED	    0x10UL
 	#define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED       0x20UL
 	#define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED	    0x40UL
+	#define FUNC_QCAPS_RESP_FLAGS_TX_RING_RL_SUPPORTED	    0x80UL
+	#define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED	    0x100UL
+	#define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED      0x200UL
+	#define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED	    0x400UL
 	u8 mac_address[6];
 	__le16 max_rsscos_ctx;
 	__le16 max_cmpl_rings;
@@ -1047,9 +1060,8 @@
 	__le32 max_mcast_filters;
 	__le32 max_flow_id;
 	__le32 max_hw_ring_grps;
+	__le16 max_sp_tx_rings;
 	u8 unused_0;
-	u8 unused_1;
-	u8 unused_2;
 	u8 valid;
 };
 
@@ -1077,6 +1089,7 @@
 	__le16 flags;
 	#define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED      0x1UL
 	#define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED	    0x2UL
+	#define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED	    0x4UL
 	u8 mac_address[6];
 	__le16 pci_id;
 	__le16 alloc_rsscos_ctx;
@@ -1089,29 +1102,46 @@
 	__le16 mru;
 	__le16 stat_ctx_id;
 	u8 port_partition_type;
-	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF		   (0x0UL << 0)
-	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS	   (0x1UL << 0)
-	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0	   (0x2UL << 0)
-	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5	   (0x3UL << 0)
-	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0	   (0x4UL << 0)
-	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN	   (0xffUL << 0)
+	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF		   0x0UL
+	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS	   0x1UL
+	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0	   0x2UL
+	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5	   0x3UL
+	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0	   0x4UL
+	#define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN	   0xffUL
 	u8 unused_0;
 	__le16 dflt_vnic_id;
 	u8 unused_1;
 	u8 unused_2;
 	__le32 min_bw;
+	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK		    0xfffffffUL
+	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT		    0
+	#define FUNC_QCFG_RESP_MIN_BW_RSVD			    0x10000000UL
+	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MASK	    0xe0000000UL
+	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_SFT	    29
+	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MBPS	   (0x0UL << 29)
+	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_PERCENT1_100  (0x1UL << 29)
+	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID       (0x7UL << 29)
+	#define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_LAST    FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 max_bw;
+	#define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_MASK		    0xfffffffUL
+	#define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_SFT		    0
+	#define FUNC_QCFG_RESP_MAX_BW_RSVD			    0x10000000UL
+	#define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MASK	    0xe0000000UL
+	#define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_SFT	    29
+	#define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MBPS	   (0x0UL << 29)
+	#define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_PERCENT1_100  (0x1UL << 29)
+	#define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID       (0x7UL << 29)
+	#define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_LAST    FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 evb_mode;
-	#define FUNC_QCFG_RESP_EVB_MODE_NO_EVB			   (0x0UL << 0)
-	#define FUNC_QCFG_RESP_EVB_MODE_VEB			   (0x1UL << 0)
-	#define FUNC_QCFG_RESP_EVB_MODE_VEPA			   (0x2UL << 0)
+	#define FUNC_QCFG_RESP_EVB_MODE_NO_EVB			   0x0UL
+	#define FUNC_QCFG_RESP_EVB_MODE_VEB			   0x1UL
+	#define FUNC_QCFG_RESP_EVB_MODE_VEPA			   0x2UL
 	u8 unused_3;
-	__le16 unused_4;
+	__le16 alloc_vfs;
 	__le32 alloc_mcast_filters;
 	__le32 alloc_hw_ring_grps;
-	u8 unused_5;
-	u8 unused_6;
-	u8 unused_7;
+	__le16 alloc_sp_tx_rings;
+	u8 unused_4;
 	u8 valid;
 };
 
@@ -1171,18 +1201,36 @@
 	__le16 dflt_vlan;
 	__be32 dflt_ip_addr[4];
 	__le32 min_bw;
+	#define FUNC_CFG_REQ_MIN_BW_BW_VALUE_MASK		    0xfffffffUL
+	#define FUNC_CFG_REQ_MIN_BW_BW_VALUE_SFT		    0
+	#define FUNC_CFG_REQ_MIN_BW_RSVD			    0x10000000UL
+	#define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MASK		    0xe0000000UL
+	#define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_SFT		    29
+	#define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MBPS		   (0x0UL << 29)
+	#define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_PERCENT1_100    (0x1UL << 29)
+	#define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID	   (0x7UL << 29)
+	#define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_LAST    FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 max_bw;
+	#define FUNC_CFG_REQ_MAX_BW_BW_VALUE_MASK		    0xfffffffUL
+	#define FUNC_CFG_REQ_MAX_BW_BW_VALUE_SFT		    0
+	#define FUNC_CFG_REQ_MAX_BW_RSVD			    0x10000000UL
+	#define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MASK		    0xe0000000UL
+	#define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_SFT		    29
+	#define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MBPS		   (0x0UL << 29)
+	#define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100    (0x1UL << 29)
+	#define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID	   (0x7UL << 29)
+	#define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_LAST    FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID
 	__le16 async_event_cr;
 	u8 vlan_antispoof_mode;
-	#define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK	   (0x0UL << 0)
-	#define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN    (0x1UL << 0)
-	#define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE (0x2UL << 0)
-	#define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN (0x3UL << 0)
+	#define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK	   0x0UL
+	#define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN    0x1UL
+	#define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE 0x2UL
+	#define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN 0x3UL
 	u8 allowed_vlan_pris;
 	u8 evb_mode;
-	#define FUNC_CFG_REQ_EVB_MODE_NO_EVB			   (0x0UL << 0)
-	#define FUNC_CFG_REQ_EVB_MODE_VEB			   (0x1UL << 0)
-	#define FUNC_CFG_REQ_EVB_MODE_VEPA			   (0x2UL << 0)
+	#define FUNC_CFG_REQ_EVB_MODE_NO_EVB			   0x0UL
+	#define FUNC_CFG_REQ_EVB_MODE_VEB			   0x1UL
+	#define FUNC_CFG_REQ_EVB_MODE_VEPA			   0x2UL
 	u8 unused_2;
 	__le16 num_mcast_filters;
 };
@@ -1341,16 +1389,16 @@
 	#define FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD		    0x8UL
 	#define FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD	    0x10UL
 	__le16 os_type;
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN		   (0x0UL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER		   (0x1UL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS		   (0xeUL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS		   (0x12UL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS		   (0x1dUL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX		   (0x24UL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD		   (0x2aUL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI			   (0x68UL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864		   (0x73UL << 0)
-	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2		   (0x74UL << 0)
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN		   0x0UL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER		   0x1UL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS		   0xeUL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS		   0x12UL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS		   0x1dUL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX		   0x24UL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD		   0x2aUL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI			   0x68UL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864		   0x73UL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2		   0x74UL
 	u8 ver_maj;
 	u8 ver_min;
 	u8 ver_upd;
@@ -1415,13 +1463,13 @@
 	__le16 vf_id;
 	__le16 req_buf_num_pages;
 	__le16 req_buf_page_size;
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B	   (0x4UL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K		   (0xcUL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K		   (0xdUL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K	   (0x10UL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M		   (0x15UL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M		   (0x16UL << 0)
-	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G		   (0x1eUL << 0)
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B	   0x4UL
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K		   0xcUL
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K		   0xdUL
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K	   0x10UL
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M		   0x15UL
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M		   0x16UL
+	#define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G		   0x1eUL
 	__le16 req_buf_len;
 	__le16 resp_buf_len;
 	u8 unused_0;
@@ -1473,16 +1521,16 @@
 	__le16 seq_id;
 	__le16 resp_len;
 	__le16 os_type;
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN		   (0x0UL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER		   (0x1UL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS		   (0xeUL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS		   (0x12UL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS		   (0x1dUL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX		   (0x24UL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD		   (0x2aUL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI		   (0x68UL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864		   (0x73UL << 0)
-	#define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2		   (0x74UL << 0)
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN		   0x0UL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER		   0x1UL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS		   0xeUL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS		   0x12UL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS		   0x1dUL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX		   0x24UL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD		   0x2aUL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI		   0x68UL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864		   0x73UL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2		   0x74UL
 	u8 ver_maj;
 	u8 ver_min;
 	u8 ver_upd;
@@ -1528,44 +1576,44 @@
 	#define PORT_PHY_CFG_REQ_ENABLES_TX_LPI_TIMER		    0x400UL
 	__le16 port_id;
 	__le16 force_link_speed;
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB	   (0x1UL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB		   (0xaUL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB		   (0x14UL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB	   (0x19UL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB		   (0x64UL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB		   (0xc8UL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB		   (0xfaUL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB		   (0x190UL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB		   (0x1f4UL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB	   (0x3e8UL << 0)
-	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB		   (0xffffUL << 0)
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB	   0x1UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB		   0xaUL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB		   0x14UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB	   0x19UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB		   0x64UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB		   0xc8UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB		   0xfaUL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB		   0x190UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB		   0x1f4UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB	   0x3e8UL
+	#define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB		   0xffffUL
 	u8 auto_mode;
-	#define PORT_PHY_CFG_REQ_AUTO_MODE_NONE		   (0x0UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS		   (0x1UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED		   (0x2UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW	   (0x3UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK		   (0x4UL << 0)
+	#define PORT_PHY_CFG_REQ_AUTO_MODE_NONE		   0x0UL
+	#define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS		   0x1UL
+	#define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED		   0x2UL
+	#define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW	   0x3UL
+	#define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK		   0x4UL
 	u8 auto_duplex;
-	#define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF		   (0x0UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL		   (0x1UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH		   (0x2UL << 0)
+	#define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF		   0x0UL
+	#define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL		   0x1UL
+	#define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH		   0x2UL
 	u8 auto_pause;
 	#define PORT_PHY_CFG_REQ_AUTO_PAUSE_TX			    0x1UL
 	#define PORT_PHY_CFG_REQ_AUTO_PAUSE_RX			    0x2UL
 	#define PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE	    0x4UL
 	u8 unused_0;
 	__le16 auto_link_speed;
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB		   (0x1UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB		   (0xaUL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB		   (0x14UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB		   (0x19UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB		   (0x64UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB		   (0xc8UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB		   (0xfaUL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB		   (0x190UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB		   (0x1f4UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB		   (0x3e8UL << 0)
-	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB		   (0xffffUL << 0)
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB		   0x1UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB		   0xaUL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB		   0x14UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB		   0x19UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB		   0x64UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB		   0xc8UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB		   0xfaUL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB		   0x190UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB		   0x1f4UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB		   0x3e8UL
+	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB		   0xffffUL
 	__le16 auto_link_speed_mask;
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MBHD      0x1UL
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MB	    0x2UL
@@ -1582,12 +1630,12 @@
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MBHD       0x1000UL
 	#define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MB	    0x2000UL
 	u8 wirespeed;
-	#define PORT_PHY_CFG_REQ_WIRESPEED_OFF			   (0x0UL << 0)
-	#define PORT_PHY_CFG_REQ_WIRESPEED_ON			   (0x1UL << 0)
+	#define PORT_PHY_CFG_REQ_WIRESPEED_OFF			   0x0UL
+	#define PORT_PHY_CFG_REQ_WIRESPEED_ON			   0x1UL
 	u8 lpbk;
-	#define PORT_PHY_CFG_REQ_LPBK_NONE			   (0x0UL << 0)
-	#define PORT_PHY_CFG_REQ_LPBK_LOCAL			   (0x1UL << 0)
-	#define PORT_PHY_CFG_REQ_LPBK_REMOTE			   (0x2UL << 0)
+	#define PORT_PHY_CFG_REQ_LPBK_NONE			   0x0UL
+	#define PORT_PHY_CFG_REQ_LPBK_LOCAL			   0x1UL
+	#define PORT_PHY_CFG_REQ_LPBK_REMOTE			   0x2UL
 	u8 force_pause;
 	#define PORT_PHY_CFG_REQ_FORCE_PAUSE_TX		    0x1UL
 	#define PORT_PHY_CFG_REQ_FORCE_PAUSE_RX		    0x2UL
@@ -1641,25 +1689,25 @@
 	__le16 seq_id;
 	__le16 resp_len;
 	u8 link;
-	#define PORT_PHY_QCFG_RESP_LINK_NO_LINK		   (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SIGNAL			   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_LINK			   (0x2UL << 0)
+	#define PORT_PHY_QCFG_RESP_LINK_NO_LINK		   0x0UL
+	#define PORT_PHY_QCFG_RESP_LINK_SIGNAL			   0x1UL
+	#define PORT_PHY_QCFG_RESP_LINK_LINK			   0x2UL
 	u8 unused_0;
 	__le16 link_speed;
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB		   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB		   (0xaUL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB		   (0x14UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB		   (0x19UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB		   (0x64UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB		   (0xc8UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB		   (0xfaUL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB		   (0x190UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB		   (0x1f4UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB		   (0x3e8UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB		   (0xffffUL << 0)
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB		   0x1UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB		   0xaUL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB		   0x14UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB		   0x19UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB		   0x64UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB		   0xc8UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB		   0xfaUL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB		   0x190UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB		   0x1f4UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB		   0x3e8UL
+	#define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB		   0xffffUL
 	u8 duplex;
-	#define PORT_PHY_QCFG_RESP_DUPLEX_HALF			   (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_DUPLEX_FULL			   (0x1UL << 0)
+	#define PORT_PHY_QCFG_RESP_DUPLEX_HALF			   0x0UL
+	#define PORT_PHY_QCFG_RESP_DUPLEX_FULL			   0x1UL
 	u8 pause;
 	#define PORT_PHY_QCFG_RESP_PAUSE_TX			    0x1UL
 	#define PORT_PHY_QCFG_RESP_PAUSE_RX			    0x2UL
@@ -1679,39 +1727,39 @@
 	#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MBHD	    0x1000UL
 	#define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MB		    0x2000UL
 	__le16 force_link_speed;
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB	   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB	   (0xaUL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB	   (0x14UL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB	   (0x19UL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB	   (0x64UL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB	   (0xc8UL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB	   (0xfaUL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB	   (0x190UL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB	   (0x1f4UL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB	   (0x3e8UL << 0)
-	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB	   (0xffffUL << 0)
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB	   0x1UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB	   0xaUL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB	   0x14UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB	   0x19UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB	   0x64UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB	   0xc8UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB	   0xfaUL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB	   0x190UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB	   0x1f4UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB	   0x3e8UL
+	#define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB	   0xffffUL
 	u8 auto_mode;
-	#define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE		   (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS	   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED		   (0x2UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW	   (0x3UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK	   (0x4UL << 0)
+	#define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE		   0x0UL
+	#define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS	   0x1UL
+	#define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED		   0x2UL
+	#define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW	   0x3UL
+	#define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK	   0x4UL
 	u8 auto_pause;
 	#define PORT_PHY_QCFG_RESP_AUTO_PAUSE_TX		    0x1UL
 	#define PORT_PHY_QCFG_RESP_AUTO_PAUSE_RX		    0x2UL
 	#define PORT_PHY_QCFG_RESP_AUTO_PAUSE_AUTONEG_PAUSE	    0x4UL
 	__le16 auto_link_speed;
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB	   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB		   (0xaUL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB		   (0x14UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB	   (0x19UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB	   (0x64UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB	   (0xc8UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB	   (0xfaUL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB	   (0x190UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB	   (0x1f4UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB	   (0x3e8UL << 0)
-	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB	   (0xffffUL << 0)
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB	   0x1UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB		   0xaUL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB		   0x14UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB	   0x19UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB	   0x64UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB	   0xc8UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB	   0xfaUL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB	   0x190UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB	   0x1f4UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB	   0x3e8UL
+	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB	   0xffffUL
 	__le16 auto_link_speed_mask;
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MBHD    0x1UL
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MB      0x2UL
@@ -1728,46 +1776,46 @@
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MBHD     0x1000UL
 	#define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MB       0x2000UL
 	u8 wirespeed;
-	#define PORT_PHY_QCFG_RESP_WIRESPEED_OFF		   (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_WIRESPEED_ON		   (0x1UL << 0)
+	#define PORT_PHY_QCFG_RESP_WIRESPEED_OFF		   0x0UL
+	#define PORT_PHY_QCFG_RESP_WIRESPEED_ON		   0x1UL
 	u8 lpbk;
-	#define PORT_PHY_QCFG_RESP_LPBK_NONE			   (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_LPBK_LOCAL			   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_LPBK_REMOTE			   (0x2UL << 0)
+	#define PORT_PHY_QCFG_RESP_LPBK_NONE			   0x0UL
+	#define PORT_PHY_QCFG_RESP_LPBK_LOCAL			   0x1UL
+	#define PORT_PHY_QCFG_RESP_LPBK_REMOTE			   0x2UL
 	u8 force_pause;
 	#define PORT_PHY_QCFG_RESP_FORCE_PAUSE_TX		    0x1UL
 	#define PORT_PHY_QCFG_RESP_FORCE_PAUSE_RX		    0x2UL
 	u8 module_status;
-	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE		   (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX	   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG       (0x2UL << 0)
-	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN	   (0x3UL << 0)
-	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED      (0x4UL << 0)
-	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE    (0xffUL << 0)
+	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE		   0x0UL
+	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX	   0x1UL
+	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG       0x2UL
+	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN	   0x3UL
+	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED      0x4UL
+	#define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE    0xffUL
 	__le32 preemphasis;
 	u8 phy_maj;
 	u8 phy_min;
 	u8 phy_bld;
 	u8 phy_type;
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN		   (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR		   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4		   (0x2UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR		   (0x3UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR		   (0x4UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2		   (0x5UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX		   (0x6UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR		   (0x7UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET		   (0x8UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE		   (0x9UL << 0)
-	#define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY	   (0xaUL << 0)
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN		   0x0UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR		   0x1UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4		   0x2UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR		   0x3UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR		   0x4UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2		   0x5UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX		   0x6UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR		   0x7UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET		   0x8UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE		   0x9UL
+	#define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY	   0xaUL
 	u8 media_type;
-	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN		   (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP		   (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC		   (0x2UL << 0)
-	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE		   (0x3UL << 0)
+	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN		   0x0UL
+	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP		   0x1UL
+	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC		   0x2UL
+	#define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE		   0x3UL
 	u8 xcvr_pkg_type;
-	#define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL    (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL    (0x2UL << 0)
+	#define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL    0x1UL
+	#define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL    0x2UL
 	u8 eee_config_phy_addr;
 	#define PORT_PHY_QCFG_RESP_PHY_ADDR_MASK		    0x1fUL
 	#define PORT_PHY_QCFG_RESP_PHY_ADDR_SFT		    0
@@ -1796,11 +1844,11 @@
 	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MBHD  0x1000UL
 	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MB    0x2000UL
 	u8 link_partner_adv_auto_mode;
-	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE (0x0UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS (0x1UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED (0x2UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0)
-	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK (0x4UL << 0)
+	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE 0x0UL
+	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS 0x1UL
+	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED 0x2UL
+	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW 0x3UL
+	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK 0x4UL
 	u8 link_partner_adv_pause;
 	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_TX       0x1UL
 	#define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_RX       0x2UL
@@ -1859,7 +1907,7 @@
 	__le64 resp_addr;
 	__le32 flags;
 	#define PORT_MAC_CFG_REQ_FLAGS_MATCH_LINK		    0x1UL
-	#define PORT_MAC_CFG_REQ_FLAGS_COS_ASSIGNMENT_ENABLE       0x2UL
+	#define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_ENABLE	    0x2UL
 	#define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_ENABLE       0x4UL
 	#define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_ENABLE	    0x8UL
 	#define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE    0x10UL
@@ -1868,28 +1916,50 @@
 	#define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE   0x80UL
 	#define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE		    0x100UL
 	#define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE		    0x200UL
+	#define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_DISABLE	    0x400UL
+	#define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_DISABLE      0x800UL
+	#define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_DISABLE	    0x1000UL
 	__le32 enables;
 	#define PORT_MAC_CFG_REQ_ENABLES_IPG			    0x1UL
 	#define PORT_MAC_CFG_REQ_ENABLES_LPBK			    0x2UL
-	#define PORT_MAC_CFG_REQ_ENABLES_IVLAN_PRI2COS_MAP_PRI     0x4UL
-	#define PORT_MAC_CFG_REQ_ENABLES_LCOS_MAP_PRI		    0x8UL
+	#define PORT_MAC_CFG_REQ_ENABLES_VLAN_PRI2COS_MAP_PRI      0x4UL
+	#define PORT_MAC_CFG_REQ_ENABLES_RESERVED1		    0x8UL
 	#define PORT_MAC_CFG_REQ_ENABLES_TUNNEL_PRI2COS_MAP_PRI    0x10UL
 	#define PORT_MAC_CFG_REQ_ENABLES_DSCP2COS_MAP_PRI	    0x20UL
 	#define PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE 0x40UL
 	#define PORT_MAC_CFG_REQ_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE 0x80UL
+	#define PORT_MAC_CFG_REQ_ENABLES_COS_FIELD_CFG		    0x100UL
 	__le16 port_id;
 	u8 ipg;
 	u8 lpbk;
-	#define PORT_MAC_CFG_REQ_LPBK_NONE			   (0x0UL << 0)
-	#define PORT_MAC_CFG_REQ_LPBK_LOCAL			   (0x1UL << 0)
-	#define PORT_MAC_CFG_REQ_LPBK_REMOTE			   (0x2UL << 0)
-	u8 ivlan_pri2cos_map_pri;
-	u8 lcos_map_pri;
+	#define PORT_MAC_CFG_REQ_LPBK_NONE			   0x0UL
+	#define PORT_MAC_CFG_REQ_LPBK_LOCAL			   0x1UL
+	#define PORT_MAC_CFG_REQ_LPBK_REMOTE			   0x2UL
+	u8 vlan_pri2cos_map_pri;
+	u8 reserved1;
 	u8 tunnel_pri2cos_map_pri;
 	u8 dscp2pri_map_pri;
 	__le16 rx_ts_capture_ptp_msg_type;
 	__le16 tx_ts_capture_ptp_msg_type;
-	__le32 unused_0;
+	u8 cos_field_cfg;
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_RSVD1		    0x1UL
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_MASK   0x6UL
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_SFT    1
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST (0x0UL << 1)
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER (0x1UL << 1)
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST (0x2UL << 1)
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 1)
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_LAST    PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK 0x18UL
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT  3
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST (0x0UL << 3)
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER (0x1UL << 3)
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST (0x2UL << 3)
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 3)
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST    PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_MASK    0xe0UL
+	#define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT     5
+	u8 unused_0[3];
 };
 
 /* Output (16 bytes) */
@@ -1902,9 +1972,9 @@
 	__le16 mtu;
 	u8 ipg;
 	u8 lpbk;
-	#define PORT_MAC_CFG_RESP_LPBK_NONE			   (0x0UL << 0)
-	#define PORT_MAC_CFG_RESP_LPBK_LOCAL			   (0x1UL << 0)
-	#define PORT_MAC_CFG_RESP_LPBK_REMOTE			   (0x2UL << 0)
+	#define PORT_MAC_CFG_RESP_LPBK_NONE			   0x0UL
+	#define PORT_MAC_CFG_RESP_LPBK_LOCAL			   0x1UL
+	#define PORT_MAC_CFG_RESP_LPBK_REMOTE			   0x2UL
 	u8 unused_0;
 	u8 valid;
 };
@@ -2163,8 +2233,8 @@
 	__le64 resp_addr;
 	__le32 flags;
 	#define QUEUE_QPORTCFG_REQ_FLAGS_PATH			    0x1UL
-	#define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX		   (0x0UL << 0)
-	#define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX		   (0x1UL << 0)
+	#define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX		   0x0UL
+	#define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX		   0x1UL
 	#define QUEUE_QPORTCFG_REQ_FLAGS_PATH_LAST    QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX
 	__le16 port_id;
 	__le16 unused_0;
@@ -2179,50 +2249,51 @@
 	u8 max_configurable_queues;
 	u8 max_configurable_lossless_queues;
 	u8 queue_cfg_allowed;
-	u8 queue_buffers_cfg_allowed;
+	u8 queue_cfg_info;
+	#define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG	    0x1UL
 	u8 queue_pfcenable_cfg_allowed;
 	u8 queue_pri2cos_cfg_allowed;
 	u8 queue_cos2bw_cfg_allowed;
 	u8 queue_id0;
 	u8 queue_id0_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY (0x0UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS (0x1UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN (0xffUL << 0)
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY 0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS 0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN 0xffUL
 	u8 queue_id1;
 	u8 queue_id1_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY (0x0UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS (0x1UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN (0xffUL << 0)
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY 0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS 0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN 0xffUL
 	u8 queue_id2;
 	u8 queue_id2_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY (0x0UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS (0x1UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN (0xffUL << 0)
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY 0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS 0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN 0xffUL
 	u8 queue_id3;
 	u8 queue_id3_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY (0x0UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS (0x1UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN (0xffUL << 0)
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY 0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS 0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN 0xffUL
 	u8 queue_id4;
 	u8 queue_id4_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY (0x0UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS (0x1UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN (0xffUL << 0)
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY 0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS 0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN 0xffUL
 	u8 queue_id5;
 	u8 queue_id5_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY (0x0UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS (0x1UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN (0xffUL << 0)
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY 0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS 0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN 0xffUL
 	u8 queue_id6;
 	u8 queue_id6_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY (0x0UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS (0x1UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN (0xffUL << 0)
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY 0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS 0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN 0xffUL
 	u8 queue_id7;
 	u8 queue_id7_service_profile;
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY (0x0UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS (0x1UL << 0)
-	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN (0xffUL << 0)
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY 0x0UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS 0x1UL
+	#define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN 0xffUL
 	u8 valid;
 };
 
@@ -2235,19 +2306,21 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	__le32 flags;
-	#define QUEUE_CFG_REQ_FLAGS_PATH			    0x1UL
-	#define QUEUE_CFG_REQ_FLAGS_PATH_TX			   (0x0UL << 0)
-	#define QUEUE_CFG_REQ_FLAGS_PATH_RX			   (0x1UL << 0)
-	#define QUEUE_CFG_REQ_FLAGS_PATH_LAST    QUEUE_CFG_REQ_FLAGS_PATH_RX
+	#define QUEUE_CFG_REQ_FLAGS_PATH_MASK			    0x3UL
+	#define QUEUE_CFG_REQ_FLAGS_PATH_SFT			    0
+	#define QUEUE_CFG_REQ_FLAGS_PATH_TX			   0x0UL
+	#define QUEUE_CFG_REQ_FLAGS_PATH_RX			   0x1UL
+	#define QUEUE_CFG_REQ_FLAGS_PATH_BIDIR			   0x2UL
+	#define QUEUE_CFG_REQ_FLAGS_PATH_LAST    QUEUE_CFG_REQ_FLAGS_PATH_BIDIR
 	__le32 enables;
 	#define QUEUE_CFG_REQ_ENABLES_DFLT_LEN			    0x1UL
 	#define QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE		    0x2UL
 	__le32 queue_id;
 	__le32 dflt_len;
 	u8 service_profile;
-	#define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY		   (0x0UL << 0)
-	#define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS		   (0x1UL << 0)
-	#define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN		   (0xffUL << 0)
+	#define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY		   0x0UL
+	#define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS		   0x1UL
+	#define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN		   0xffUL
 	u8 unused_0[7];
 };
 
@@ -2264,50 +2337,6 @@
 	u8 valid;
 };
 
-/* hwrm_queue_buffers_cfg */
-/* Input (56 bytes) */
-struct hwrm_queue_buffers_cfg_input {
-	__le16 req_type;
-	__le16 cmpl_ring;
-	__le16 seq_id;
-	__le16 target_id;
-	__le64 resp_addr;
-	__le32 flags;
-	#define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH		    0x1UL
-	#define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_TX		   (0x0UL << 0)
-	#define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX		   (0x1UL << 0)
-	#define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_LAST    QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX
-	__le32 enables;
-	#define QUEUE_BUFFERS_CFG_REQ_ENABLES_RESERVED		    0x1UL
-	#define QUEUE_BUFFERS_CFG_REQ_ENABLES_SHARED		    0x2UL
-	#define QUEUE_BUFFERS_CFG_REQ_ENABLES_XOFF		    0x4UL
-	#define QUEUE_BUFFERS_CFG_REQ_ENABLES_XON		    0x8UL
-	#define QUEUE_BUFFERS_CFG_REQ_ENABLES_FULL		    0x10UL
-	#define QUEUE_BUFFERS_CFG_REQ_ENABLES_NOTFULL		    0x20UL
-	#define QUEUE_BUFFERS_CFG_REQ_ENABLES_MAX		    0x40UL
-	__le32 queue_id;
-	__le32 reserved;
-	__le32 shared;
-	__le32 xoff;
-	__le32 xon;
-	__le32 full;
-	__le32 notfull;
-	__le32 max;
-};
-
-/* Output (16 bytes) */
-struct hwrm_queue_buffers_cfg_output {
-	__le16 error_code;
-	__le16 req_type;
-	__le16 seq_id;
-	__le16 resp_len;
-	__le32 unused_0;
-	u8 unused_1;
-	u8 unused_2;
-	u8 unused_3;
-	u8 valid;
-};
-
 /* hwrm_queue_pfcenable_cfg */
 /* Input (24 bytes) */
 struct hwrm_queue_pfcenable_cfg_input {
@@ -2351,12 +2380,22 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	__le32 flags;
-	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH		    0x1UL
+	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_MASK		    0x3UL
+	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_SFT		    0
 	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_TX		   (0x0UL << 0)
 	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX		   (0x1UL << 0)
-	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST    QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX
-	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN		    0x2UL
+	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR		   (0x2UL << 0)
+	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST    QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR
+	#define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN		    0x4UL
 	__le32 enables;
+	#define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID    0x1UL
+	#define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI1_COS_QUEUE_ID    0x2UL
+	#define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI2_COS_QUEUE_ID    0x4UL
+	#define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI3_COS_QUEUE_ID    0x8UL
+	#define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI4_COS_QUEUE_ID    0x10UL
+	#define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI5_COS_QUEUE_ID    0x20UL
+	#define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI6_COS_QUEUE_ID    0x40UL
+	#define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI7_COS_QUEUE_ID    0x80UL
 	u8 port_id;
 	u8 pri0_cos_queue_id;
 	u8 pri1_cos_queue_id;
@@ -2404,82 +2443,226 @@
 	u8 queue_id0;
 	u8 unused_0;
 	__le32 queue_id0_min_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 queue_id0_max_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 queue_id0_tsa_assign;
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP      (0x0UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS     (0x1UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP      0x0UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS     0x1UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST 0xffUL
 	u8 queue_id0_pri_lvl;
 	u8 queue_id0_bw_weight;
 	u8 queue_id1;
 	__le32 queue_id1_min_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 queue_id1_max_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 queue_id1_tsa_assign;
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP      (0x0UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS     (0x1UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP      0x0UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS     0x1UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST 0xffUL
 	u8 queue_id1_pri_lvl;
 	u8 queue_id1_bw_weight;
 	u8 queue_id2;
 	__le32 queue_id2_min_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 queue_id2_max_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 queue_id2_tsa_assign;
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP      (0x0UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS     (0x1UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP      0x0UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS     0x1UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST 0xffUL
 	u8 queue_id2_pri_lvl;
 	u8 queue_id2_bw_weight;
 	u8 queue_id3;
 	__le32 queue_id3_min_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 queue_id3_max_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 queue_id3_tsa_assign;
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP      (0x0UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS     (0x1UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP      0x0UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS     0x1UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST 0xffUL
 	u8 queue_id3_pri_lvl;
 	u8 queue_id3_bw_weight;
 	u8 queue_id4;
 	__le32 queue_id4_min_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 queue_id4_max_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 queue_id4_tsa_assign;
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP      (0x0UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS     (0x1UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP      0x0UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS     0x1UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST 0xffUL
 	u8 queue_id4_pri_lvl;
 	u8 queue_id4_bw_weight;
 	u8 queue_id5;
 	__le32 queue_id5_min_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 queue_id5_max_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 queue_id5_tsa_assign;
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP      (0x0UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS     (0x1UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP      0x0UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS     0x1UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST 0xffUL
 	u8 queue_id5_pri_lvl;
 	u8 queue_id5_bw_weight;
 	u8 queue_id6;
 	__le32 queue_id6_min_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 queue_id6_max_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 queue_id6_tsa_assign;
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP      (0x0UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS     (0x1UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP      0x0UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS     0x1UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST 0xffUL
 	u8 queue_id6_pri_lvl;
 	u8 queue_id6_bw_weight;
 	u8 queue_id7;
 	__le32 queue_id7_min_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID
 	__le32 queue_id7_max_bw;
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_RSVD	    0x10000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 queue_id7_tsa_assign;
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP      (0x0UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS     (0x1UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0)
-	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0)
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP      0x0UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS     0x1UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST 0xffUL
 	u8 queue_id7_pri_lvl;
 	u8 queue_id7_bw_weight;
 	u8 unused_1[5];
@@ -2563,6 +2746,7 @@
 	#define VNIC_CFG_REQ_FLAGS_BD_STALL_MODE		    0x4UL
 	#define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE		    0x8UL
 	#define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE		    0x10UL
+	#define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE		    0x20UL
 	__le32 enables;
 	#define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP		    0x1UL
 	#define VNIC_CFG_REQ_ENABLES_RSS_RULE			    0x2UL
@@ -2615,18 +2799,18 @@
 	#define VNIC_TPA_CFG_REQ_ENABLES_MIN_AGG_LEN		    0x8UL
 	__le16 vnic_id;
 	__le16 max_agg_segs;
-	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1		   (0x0UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2		   (0x1UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4		   (0x2UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8		   (0x3UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX		   (0x1fUL << 0)
+	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1		   0x0UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2		   0x1UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4		   0x2UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8		   0x3UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX		   0x1fUL
 	__le16 max_aggs;
-	#define VNIC_TPA_CFG_REQ_MAX_AGGS_1			   (0x0UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGGS_2			   (0x1UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGGS_4			   (0x2UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGGS_8			   (0x3UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGGS_16			   (0x4UL << 0)
-	#define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX			   (0x7UL << 0)
+	#define VNIC_TPA_CFG_REQ_MAX_AGGS_1			   0x0UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGGS_2			   0x1UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGGS_4			   0x2UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGGS_8			   0x3UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGGS_16			   0x4UL
+	#define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX			   0x7UL
 	u8 unused_0;
 	u8 unused_1;
 	__le32 max_agg_timer;
@@ -2780,15 +2964,15 @@
 	__le64 resp_addr;
 	__le32 enables;
 	#define RING_ALLOC_REQ_ENABLES_RESERVED1		    0x1UL
-	#define RING_ALLOC_REQ_ENABLES_RESERVED2		    0x2UL
+	#define RING_ALLOC_REQ_ENABLES_RING_ARB_CFG		    0x2UL
 	#define RING_ALLOC_REQ_ENABLES_RESERVED3		    0x4UL
 	#define RING_ALLOC_REQ_ENABLES_STAT_CTX_ID_VALID	    0x8UL
 	#define RING_ALLOC_REQ_ENABLES_RESERVED4		    0x10UL
 	#define RING_ALLOC_REQ_ENABLES_MAX_BW_VALID		    0x20UL
 	u8 ring_type;
-	#define RING_ALLOC_REQ_RING_TYPE_CMPL			   (0x0UL << 0)
-	#define RING_ALLOC_REQ_RING_TYPE_TX			   (0x1UL << 0)
-	#define RING_ALLOC_REQ_RING_TYPE_RX			   (0x2UL << 0)
+	#define RING_ALLOC_REQ_RING_TYPE_CMPL			   0x0UL
+	#define RING_ALLOC_REQ_RING_TYPE_TX			   0x1UL
+	#define RING_ALLOC_REQ_RING_TYPE_RX			   0x2UL
 	u8 unused_0;
 	__le16 unused_1;
 	__le64 page_tbl_addr;
@@ -2804,18 +2988,36 @@
 	u8 unused_4;
 	u8 unused_5;
 	__le32 reserved1;
-	__le16 reserved2;
+	__le16 ring_arb_cfg;
+	#define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_MASK	    0xfUL
+	#define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SFT	    0
+	#define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SP	   (0x1UL << 0)
+	#define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ	   (0x2UL << 0)
+	#define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_LAST    RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ
+	#define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_MASK		    0xf0UL
+	#define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_SFT		    4
+	#define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_MASK  0xff00UL
+	#define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_SFT   8
 	u8 unused_6;
 	u8 unused_7;
 	__le32 reserved3;
 	__le32 stat_ctx_id;
 	__le32 reserved4;
 	__le32 max_bw;
+	#define RING_ALLOC_REQ_MAX_BW_BW_VALUE_MASK		    0xfffffffUL
+	#define RING_ALLOC_REQ_MAX_BW_BW_VALUE_SFT		    0
+	#define RING_ALLOC_REQ_MAX_BW_RSVD			    0x10000000UL
+	#define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MASK	    0xe0000000UL
+	#define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_SFT	    29
+	#define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MBPS	   (0x0UL << 29)
+	#define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100  (0x1UL << 29)
+	#define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID       (0x7UL << 29)
+	#define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_LAST    RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID
 	u8 int_mode;
-	#define RING_ALLOC_REQ_INT_MODE_LEGACY			   (0x0UL << 0)
-	#define RING_ALLOC_REQ_INT_MODE_RSVD			   (0x1UL << 0)
-	#define RING_ALLOC_REQ_INT_MODE_MSIX			   (0x2UL << 0)
-	#define RING_ALLOC_REQ_INT_MODE_POLL			   (0x3UL << 0)
+	#define RING_ALLOC_REQ_INT_MODE_LEGACY			   0x0UL
+	#define RING_ALLOC_REQ_INT_MODE_RSVD			   0x1UL
+	#define RING_ALLOC_REQ_INT_MODE_MSIX			   0x2UL
+	#define RING_ALLOC_REQ_INT_MODE_POLL			   0x3UL
 	u8 unused_8[3];
 };
 
@@ -2842,9 +3044,9 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	u8 ring_type;
-	#define RING_FREE_REQ_RING_TYPE_CMPL			   (0x0UL << 0)
-	#define RING_FREE_REQ_RING_TYPE_TX			   (0x1UL << 0)
-	#define RING_FREE_REQ_RING_TYPE_RX			   (0x2UL << 0)
+	#define RING_FREE_REQ_RING_TYPE_CMPL			   0x0UL
+	#define RING_FREE_REQ_RING_TYPE_TX			   0x1UL
+	#define RING_FREE_REQ_RING_TYPE_RX			   0x2UL
 	u8 unused_0;
 	__le16 ring_id;
 	__le32 unused_1;
@@ -2942,9 +3144,9 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	u8 ring_type;
-	#define RING_RESET_REQ_RING_TYPE_CMPL			   (0x0UL << 0)
-	#define RING_RESET_REQ_RING_TYPE_TX			   (0x1UL << 0)
-	#define RING_RESET_REQ_RING_TYPE_RX			   (0x2UL << 0)
+	#define RING_RESET_REQ_RING_TYPE_CMPL			   0x0UL
+	#define RING_RESET_REQ_RING_TYPE_TX			   0x1UL
+	#define RING_RESET_REQ_RING_TYPE_RX			   0x2UL
 	u8 unused_0;
 	__le16 ring_id;
 	__le32 unused_1;
@@ -3068,36 +3270,36 @@
 	__le16 t_l2_ivlan;
 	__le16 t_l2_ivlan_mask;
 	u8 src_type;
-	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT		   (0x0UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF		   (0x1UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF		   (0x2UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC		   (0x3UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG		   (0x4UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE		   (0x5UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO		   (0x6UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG		   (0x7UL << 0)
+	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT		   0x0UL
+	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF		   0x1UL
+	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF		   0x2UL
+	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC		   0x3UL
+	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG		   0x4UL
+	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE		   0x5UL
+	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO		   0x6UL
+	#define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG		   0x7UL
 	u8 unused_6;
 	__le32 src_id;
 	u8 tunnel_type;
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL     (0x0UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN	   (0x1UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE	   (0x2UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE	   (0x3UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP	   (0x4UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE	   (0x5UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS	   (0x6UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT	   (0x7UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE	   (0x8UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL     (0xffUL << 0)
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL     0x0UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN	   0x1UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE	   0x2UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE	   0x3UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP	   0x4UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE	   0x5UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS	   0x6UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT	   0x7UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE	   0x8UL
+	#define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL     0xffUL
 	u8 unused_7;
 	__le16 dst_id;
 	__le16 mirror_vnic_id;
 	u8 pri_hint;
-	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER	   (0x0UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER     (0x1UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER     (0x2UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX		   (0x3UL << 0)
-	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN		   (0x4UL << 0)
+	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER	   0x0UL
+	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER     0x1UL
+	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER     0x2UL
+	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX		   0x3UL
+	#define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN		   0x4UL
 	u8 unused_8;
 	__le32 unused_9;
 	__le64 l2_filter_id_hint;
@@ -3246,16 +3448,16 @@
 	u8 l3_addr_type;
 	u8 t_l3_addr_type;
 	u8 tunnel_type;
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN     (0x1UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE     (0x2UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE     (0x3UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP      (0x4UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE    (0x5UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      (0x6UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       (0x7UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     (0x8UL << 0)
-	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0)
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN     0x1UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE     0x2UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE     0x3UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP      0x4UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE    0x5UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      0x6UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       0x7UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     0x8UL
+	#define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL
 	u8 unused_0;
 	__le32 vni;
 	__le32 dst_vnic_id;
@@ -3311,14 +3513,14 @@
 	__le32 flags;
 	#define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_LOOPBACK	    0x1UL
 	u8 encap_type;
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN       (0x1UL << 0)
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE       (0x2UL << 0)
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE       (0x3UL << 0)
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP	   (0x4UL << 0)
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE      (0x5UL << 0)
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS	   (0x6UL << 0)
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN	   (0x7UL << 0)
-	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE       (0x8UL << 0)
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN       0x1UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE       0x2UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE       0x3UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP	   0x4UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE      0x5UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS	   0x6UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN	   0x7UL
+	#define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE       0x8UL
 	u8 unused_0;
 	__le16 unused_1;
 	__le32 encap_data[16];
@@ -3397,32 +3599,32 @@
 	u8 src_macaddr[6];
 	__be16 ethertype;
 	u8 ip_addr_type;
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN  (0x0UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4     (0x4UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6     (0x6UL << 0)
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN  0x0UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4     0x4UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6     0x6UL
 	u8 ip_protocol;
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN   (0x0UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP       (0x6UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP       (0x11UL << 0)
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN   0x0UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP       0x6UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP       0x11UL
 	__le16 dst_id;
 	__le16 mirror_vnic_id;
 	u8 tunnel_type;
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN     (0x1UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE     (0x2UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE     (0x3UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP      (0x4UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE    (0x5UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      (0x6UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       (0x7UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     (0x8UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0)
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN     0x1UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE     0x2UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE     0x3UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP      0x4UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE    0x5UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS      0x6UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT       0x7UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE     0x8UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL
 	u8 pri_hint;
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER    (0x0UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE	   (0x1UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW	   (0x2UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST      (0x3UL << 0)
-	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST       (0x4UL << 0)
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER    0x0UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE	   0x1UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW	   0x2UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST      0x3UL
+	#define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST       0x4UL
 	__be32 src_ipaddr[4];
 	__be32 src_ipaddr_mask[4];
 	__be32 dst_ipaddr[4];
@@ -3511,8 +3713,8 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	u8 tunnel_type;
-	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN       (0x1UL << 0)
-	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE      (0x5UL << 0)
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN       0x1UL
+	#define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE      0x5UL
 	u8 unused_0[7];
 };
 
@@ -3539,8 +3741,8 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	u8 tunnel_type;
-	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN       (0x1UL << 0)
-	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE      (0x5UL << 0)
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN       0x1UL
+	#define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE      0x5UL
 	u8 unused_0;
 	__be16 tunnel_dst_port_val;
 	__le32 unused_1;
@@ -3570,8 +3772,8 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	u8 tunnel_type;
-	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN	   (0x1UL << 0)
-	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE       (0x5UL << 0)
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN	   0x1UL
+	#define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE       0x5UL
 	u8 unused_0;
 	__le16 tunnel_dst_port_id;
 	__le32 unused_1;
@@ -3720,15 +3922,15 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	u8 embedded_proc_type;
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT		   (0x0UL << 0)
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT		   (0x1UL << 0)
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   (0x2UL << 0)
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE		   (0x3UL << 0)
-	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD		   (0x4UL << 0)
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT		   0x0UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT		   0x1UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   0x2UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE		   0x3UL
+	#define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD		   0x4UL
 	u8 selfrst_status;
-	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE	   (0x0UL << 0)
-	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP	   (0x1UL << 0)
-	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST	   (0x2UL << 0)
+	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE	   0x0UL
+	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP	   0x1UL
+	#define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST	   0x2UL
 	__le16 unused_0[3];
 };
 
@@ -3739,9 +3941,9 @@
 	__le16 seq_id;
 	__le16 resp_len;
 	u8 selfrst_status;
-	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE	   (0x0UL << 0)
-	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP	   (0x1UL << 0)
-	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST       (0x2UL << 0)
+	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE	   0x0UL
+	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP	   0x1UL
+	#define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST       0x2UL
 	u8 unused_0;
 	__le16 unused_1;
 	u8 unused_2;
@@ -3759,11 +3961,11 @@
 	__le16 target_id;
 	__le64 resp_addr;
 	u8 embedded_proc_type;
-	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT		   (0x0UL << 0)
-	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT		   (0x1UL << 0)
-	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   (0x2UL << 0)
-	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE		   (0x3UL << 0)
-	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD		   (0x4UL << 0)
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT		   0x0UL
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT		   0x1UL
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL	   0x2UL
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE		   0x3UL
+	#define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD		   0x4UL
 	u8 unused_0[7];
 };
 
@@ -3774,9 +3976,9 @@
 	__le16 seq_id;
 	__le16 resp_len;
 	u8 selfrst_status;
-	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE	   (0x0UL << 0)
-	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP	   (0x1UL << 0)
-	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST     (0x2UL << 0)
+	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE	   0x0UL
+	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP	   0x1UL
+	#define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST     0x2UL
 	u8 unused_0;
 	__le16 unused_1;
 	u8 unused_2;
@@ -3785,6 +3987,42 @@
 	u8 valid;
 };
 
+/* hwrm_fw_set_time */
+/* Input (32 bytes) */
+struct hwrm_fw_set_time_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 year;
+	#define FW_SET_TIME_REQ_YEAR_UNKNOWN			   0x0UL
+	u8 month;
+	u8 day;
+	u8 hour;
+	u8 minute;
+	u8 second;
+	u8 unused_0;
+	__le16 millisecond;
+	__le16 zone;
+	#define FW_SET_TIME_REQ_ZONE_UTC			   0x0UL
+	#define FW_SET_TIME_REQ_ZONE_UNKNOWN			   0xffffUL
+	__le32 unused_1;
+};
+
+/* Output (16 bytes) */
+struct hwrm_fw_set_time_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
 /* hwrm_exec_fwd_resp */
 /* Input (128 bytes) */
 struct hwrm_exec_fwd_resp_input {
@@ -3921,32 +4159,6 @@
 	u8 valid;
 };
 
-/* hwrm_nvm_raw_write_blk */
-/* Input (32 bytes) */
-struct hwrm_nvm_raw_write_blk_input {
-	__le16 req_type;
-	__le16 cmpl_ring;
-	__le16 seq_id;
-	__le16 target_id;
-	__le64 resp_addr;
-	__le64 host_src_addr;
-	__le32 dest_addr;
-	__le32 len;
-};
-
-/* Output (16 bytes) */
-struct hwrm_nvm_raw_write_blk_output {
-	__le16 error_code;
-	__le16 req_type;
-	__le16 seq_id;
-	__le16 resp_len;
-	__le32 unused_0;
-	u8 unused_1;
-	u8 unused_2;
-	u8 unused_3;
-	u8 valid;
-};
-
 /* hwrm_nvm_read */
 /* Input (40 bytes) */
 struct hwrm_nvm_read_input {
@@ -4132,9 +4344,9 @@
 	u8 opt_ordinal;
 	#define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_MASK	    0x3UL
 	#define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_SFT		    0
-	#define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ		   (0x0UL << 0)
-	#define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE		   (0x1UL << 0)
-	#define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT		   (0x2UL << 0)
+	#define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ		   0x0UL
+	#define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE		   0x1UL
+	#define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT		   0x2UL
 	u8 unused_1[3];
 };
 
@@ -4266,4 +4478,41 @@
 	u8 valid;
 };
 
+/* hwrm_nvm_install_update */
+/* Input (24 bytes) */
+struct hwrm_nvm_install_update_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le32 install_type;
+	#define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_NORMAL	   0x0UL
+	#define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_ALL	   0xffffffffUL
+	__le32 unused_0;
+};
+
+/* Output (24 bytes) */
+struct hwrm_nvm_install_update_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le64 installed_items;
+	u8 result;
+	#define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS		   0x0UL
+	u8 problem_item;
+	#define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE	   0x0UL
+	#define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE      0xffUL
+	u8 reset_required;
+	#define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_NONE       0x0UL
+	#define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_PCI	   0x1UL
+	#define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_POWER      0x2UL
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 50d2007..ec6cd18 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -19,6 +19,45 @@
 #include "bnxt_ethtool.h"
 
 #ifdef CONFIG_BNXT_SRIOV
+static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
+					  struct bnxt_vf_info *vf, u16 event_id)
+{
+	struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_fwd_async_event_cmpl_input req = {0};
+	struct hwrm_async_event_cmpl *async_cmpl;
+	int rc = 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1);
+	if (vf)
+		req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid);
+	else
+		/* broadcast this async event to all VFs */
+		req.encap_async_event_target_id = cpu_to_le16(0xffff);
+	async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl;
+	async_cmpl->type =
+		cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT);
+	async_cmpl->event_id = cpu_to_le16(event_id);
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+
+	if (rc) {
+		netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n",
+			   rc);
+		goto fwd_async_event_cmpl_exit;
+	}
+
+	if (resp->error_code) {
+		netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n",
+			   resp->error_code);
+		rc = -1;
+	}
+
+fwd_async_event_cmpl_exit:
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
 static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id)
 {
 	if (!test_bit(BNXT_STATE_OPEN, &bp->state)) {
@@ -135,7 +174,8 @@
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
-int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos)
+int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos,
+		     __be16 vlan_proto)
 {
 	struct hwrm_func_cfg_input req = {0};
 	struct bnxt *bp = netdev_priv(dev);
@@ -146,6 +186,9 @@
 	if (bp->hwrm_spec_code < 0x10201)
 		return -ENOTSUPP;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	rc = bnxt_vf_ndo_prep(bp, vf_id);
 	if (rc)
 		return rc;
@@ -243,8 +286,9 @@
 		rc = -EINVAL;
 		break;
 	}
-	/* CHIMP TODO: send msg to VF to update new link state */
-
+	if (vf->flags & (BNXT_VF_LINK_UP | BNXT_VF_LINK_FORCED))
+		rc = bnxt_hwrm_fwd_async_event_cmpl(bp, vf,
+			HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE);
 	return rc;
 }
 
@@ -525,46 +569,6 @@
 	return rc;
 }
 
-static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
-					  struct bnxt_vf_info *vf,
-					  u16 event_id)
-{
-	int rc = 0;
-	struct hwrm_fwd_async_event_cmpl_input req = {0};
-	struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr;
-	struct hwrm_async_event_cmpl *async_cmpl;
-
-	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1);
-	if (vf)
-		req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid);
-	else
-		/* broadcast this async event to all VFs */
-		req.encap_async_event_target_id = cpu_to_le16(0xffff);
-	async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl;
-	async_cmpl->type =
-		cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT);
-	async_cmpl->event_id = cpu_to_le16(event_id);
-
-	mutex_lock(&bp->hwrm_cmd_lock);
-	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-
-	if (rc) {
-		netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n",
-			   rc);
-		goto fwd_async_event_cmpl_exit;
-	}
-
-	if (resp->error_code) {
-		netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n",
-			   resp->error_code);
-		rc = -1;
-	}
-
-fwd_async_event_cmpl_exit:
-	mutex_unlock(&bp->hwrm_cmd_lock);
-	return rc;
-}
-
 void bnxt_sriov_disable(struct bnxt *bp)
 {
 	u16 num_vfs = pci_num_vf(bp->pdev);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 0392670..1ab72e4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -12,7 +12,7 @@
 
 int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *);
 int bnxt_set_vf_mac(struct net_device *, int, u8 *);
-int bnxt_set_vf_vlan(struct net_device *, int, u16, u8);
+int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
 int bnxt_set_vf_bw(struct net_device *, int, int, int);
 int bnxt_set_vf_link_state(struct net_device *, int, int);
 int bnxt_set_vf_spoofchk(struct net_device *, int, bool);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 8d4f849..4464bc5 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -450,28 +450,32 @@
 			genet_dma_ring_regs[r]);
 }
 
-static int bcmgenet_get_settings(struct net_device *dev,
-				 struct ethtool_cmd *cmd)
+static int bcmgenet_get_link_ksettings(struct net_device *dev,
+				       struct ethtool_link_ksettings *cmd)
 {
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!dev->phydev)
+	if (!priv->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_gset(dev->phydev, cmd);
+	return phy_ethtool_ksettings_get(priv->phydev, cmd);
 }
 
-static int bcmgenet_set_settings(struct net_device *dev,
-				 struct ethtool_cmd *cmd)
+static int bcmgenet_set_link_ksettings(struct net_device *dev,
+				       const struct ethtool_link_ksettings *cmd)
 {
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!dev->phydev)
+	if (!priv->phydev)
 		return -ENODEV;
 
-	return phy_ethtool_sset(dev->phydev, cmd);
+	return phy_ethtool_ksettings_set(priv->phydev, cmd);
 }
 
 static int bcmgenet_set_rx_csum(struct net_device *dev,
@@ -937,7 +941,7 @@
 	e->eee_active = p->eee_active;
 	e->tx_lpi_timer = bcmgenet_umac_readl(priv, UMAC_EEE_LPI_TIMER);
 
-	return phy_ethtool_get_eee(dev->phydev, e);
+	return phy_ethtool_get_eee(priv->phydev, e);
 }
 
 static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
@@ -954,7 +958,7 @@
 	if (!p->eee_enabled) {
 		bcmgenet_eee_enable_set(dev, false);
 	} else {
-		ret = phy_init_eee(dev->phydev, 0);
+		ret = phy_init_eee(priv->phydev, 0);
 		if (ret) {
 			netif_err(priv, hw, dev, "EEE initialization failed\n");
 			return ret;
@@ -964,21 +968,21 @@
 		bcmgenet_eee_enable_set(dev, true);
 	}
 
-	return phy_ethtool_set_eee(dev->phydev, e);
+	return phy_ethtool_set_eee(priv->phydev, e);
 }
 
 static int bcmgenet_nway_reset(struct net_device *dev)
 {
-	return genphy_restart_aneg(dev->phydev);
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
+	return genphy_restart_aneg(priv->phydev);
 }
 
 /* standard ethtool support functions. */
-static struct ethtool_ops bcmgenet_ethtool_ops = {
+static const struct ethtool_ops bcmgenet_ethtool_ops = {
 	.get_strings		= bcmgenet_get_strings,
 	.get_sset_count		= bcmgenet_get_sset_count,
 	.get_ethtool_stats	= bcmgenet_get_ethtool_stats,
-	.get_settings		= bcmgenet_get_settings,
-	.set_settings		= bcmgenet_set_settings,
 	.get_drvinfo		= bcmgenet_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
 	.get_msglevel		= bcmgenet_get_msglevel,
@@ -990,19 +994,20 @@
 	.nway_reset		= bcmgenet_nway_reset,
 	.get_coalesce		= bcmgenet_get_coalesce,
 	.set_coalesce		= bcmgenet_set_coalesce,
+	.get_link_ksettings	= bcmgenet_get_link_ksettings,
+	.set_link_ksettings	= bcmgenet_set_link_ksettings,
 };
 
 /* Power down the unimac, based on mode. */
 static int bcmgenet_power_down(struct bcmgenet_priv *priv,
 				enum bcmgenet_power_mode mode)
 {
-	struct net_device *ndev = priv->dev;
 	int ret = 0;
 	u32 reg;
 
 	switch (mode) {
 	case GENET_POWER_CABLE_SENSE:
-		phy_detach(ndev->phydev);
+		phy_detach(priv->phydev);
 		break;
 
 	case GENET_POWER_WOL_MAGIC:
@@ -1063,6 +1068,7 @@
 /* ioctl handle special commands that are not present in ethtool. */
 static int bcmgenet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
+	struct bcmgenet_priv *priv = netdev_priv(dev);
 	int val = 0;
 
 	if (!netif_running(dev))
@@ -1072,10 +1078,10 @@
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!dev->phydev)
+		if (!priv->phydev)
 			val = -ENODEV;
 		else
-			val = phy_mii_ioctl(dev->phydev, rq, cmd);
+			val = phy_mii_ioctl(priv->phydev, rq, cmd);
 		break;
 
 	default:
@@ -2458,7 +2464,6 @@
 {
 	struct bcmgenet_priv *priv = container_of(
 			work, struct bcmgenet_priv, bcmgenet_irq_work);
-	struct net_device *ndev = priv->dev;
 
 	netif_dbg(priv, intr, priv->dev, "%s\n", __func__);
 
@@ -2471,7 +2476,7 @@
 
 	/* Link UP/DOWN event */
 	if (priv->irq0_stat & UMAC_IRQ_LINK_EVENT) {
-		phy_mac_interrupt(ndev->phydev,
+		phy_mac_interrupt(priv->phydev,
 				  !!(priv->irq0_stat & UMAC_IRQ_LINK_UP));
 		priv->irq0_stat &= ~UMAC_IRQ_LINK_EVENT;
 	}
@@ -2664,128 +2669,6 @@
 	bcmgenet_tdma_writel(priv, reg, DMA_CTRL);
 }
 
-static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv,
-					   u32 f_index)
-{
-	u32 offset;
-	u32 reg;
-
-	offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
-	reg = bcmgenet_hfb_reg_readl(priv, offset);
-	return !!(reg & (1 << (f_index % 32)));
-}
-
-static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index)
-{
-	u32 offset;
-	u32 reg;
-
-	offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
-	reg = bcmgenet_hfb_reg_readl(priv, offset);
-	reg |= (1 << (f_index % 32));
-	bcmgenet_hfb_reg_writel(priv, reg, offset);
-}
-
-static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv,
-						     u32 f_index, u32 rx_queue)
-{
-	u32 offset;
-	u32 reg;
-
-	offset = f_index / 8;
-	reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset);
-	reg &= ~(0xF << (4 * (f_index % 8)));
-	reg |= ((rx_queue & 0xF) << (4 * (f_index % 8)));
-	bcmgenet_rdma_writel(priv, reg, DMA_INDEX2RING_0 + offset);
-}
-
-static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv,
-					   u32 f_index, u32 f_length)
-{
-	u32 offset;
-	u32 reg;
-
-	offset = HFB_FLT_LEN_V3PLUS +
-		 ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) *
-		 sizeof(u32);
-	reg = bcmgenet_hfb_reg_readl(priv, offset);
-	reg &= ~(0xFF << (8 * (f_index % 4)));
-	reg |= ((f_length & 0xFF) << (8 * (f_index % 4)));
-	bcmgenet_hfb_reg_writel(priv, reg, offset);
-}
-
-static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv)
-{
-	u32 f_index;
-
-	for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++)
-		if (!bcmgenet_hfb_is_filter_enabled(priv, f_index))
-			return f_index;
-
-	return -ENOMEM;
-}
-
-/* bcmgenet_hfb_add_filter
- *
- * Add new filter to Hardware Filter Block to match and direct Rx traffic to
- * desired Rx queue.
- *
- * f_data is an array of unsigned 32-bit integers where each 32-bit integer
- * provides filter data for 2 bytes (4 nibbles) of Rx frame:
- *
- * bits 31:20 - unused
- * bit  19    - nibble 0 match enable
- * bit  18    - nibble 1 match enable
- * bit  17    - nibble 2 match enable
- * bit  16    - nibble 3 match enable
- * bits 15:12 - nibble 0 data
- * bits 11:8  - nibble 1 data
- * bits 7:4   - nibble 2 data
- * bits 3:0   - nibble 3 data
- *
- * Example:
- * In order to match:
- * - Ethernet frame type = 0x0800 (IP)
- * - IP version field = 4
- * - IP protocol field = 0x11 (UDP)
- *
- * The following filter is needed:
- * u32 hfb_filter_ipv4_udp[] = {
- *   Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- *   Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000,
- *   Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011,
- * };
- *
- * To add the filter to HFB and direct the traffic to Rx queue 0, call:
- * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp,
- *                         ARRAY_SIZE(hfb_filter_ipv4_udp), 0);
- */
-int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
-			    u32 f_length, u32 rx_queue)
-{
-	int f_index;
-	u32 i;
-
-	f_index = bcmgenet_hfb_find_unused_filter(priv);
-	if (f_index < 0)
-		return -ENOMEM;
-
-	if (f_length > priv->hw_params->hfb_filter_size)
-		return -EINVAL;
-
-	for (i = 0; i < f_length; i++)
-		bcmgenet_hfb_writel(priv, f_data[i],
-			(f_index * priv->hw_params->hfb_filter_size + i) *
-			sizeof(u32));
-
-	bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length);
-	bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue);
-	bcmgenet_hfb_enable_filter(priv, f_index);
-	bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL);
-
-	return 0;
-}
-
 /* bcmgenet_hfb_clear
  *
  * Clear Hardware Filter Block and disable all filtering.
@@ -2833,7 +2716,7 @@
 	/* Monitor link interrupts now */
 	bcmgenet_link_intr_enable(priv);
 
-	phy_start(dev->phydev);
+	phy_start(priv->phydev);
 }
 
 static int bcmgenet_open(struct net_device *dev)
@@ -2932,7 +2815,7 @@
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
 	netif_tx_stop_all_queues(dev);
-	phy_stop(dev->phydev);
+	phy_stop(priv->phydev);
 	bcmgenet_intr_disable(priv);
 	bcmgenet_disable_rx_napi(priv);
 	bcmgenet_disable_tx_napi(priv);
@@ -2958,7 +2841,7 @@
 	bcmgenet_netif_stop(dev);
 
 	/* Really kill the PHY state machine and disconnect from it */
-	phy_disconnect(dev->phydev);
+	phy_disconnect(priv->phydev);
 
 	/* Disable MAC receive */
 	umac_enable_set(priv, CMD_RX_EN, false);
@@ -3517,7 +3400,7 @@
 
 	bcmgenet_netif_stop(dev);
 
-	phy_suspend(dev->phydev);
+	phy_suspend(priv->phydev);
 
 	netif_device_detach(dev);
 
@@ -3581,7 +3464,7 @@
 	if (priv->wolopts)
 		clk_disable_unprepare(priv->clk_wol);
 
-	phy_init_hw(dev->phydev);
+	phy_init_hw(priv->phydev);
 	/* Speed settings must be restored */
 	bcmgenet_mii_config(priv->dev);
 
@@ -3614,7 +3497,7 @@
 
 	netif_device_attach(dev);
 
-	phy_resume(dev->phydev);
+	phy_resume(priv->phydev);
 
 	if (priv->eee.eee_enabled)
 		bcmgenet_eee_enable_set(dev, true);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 0f0868c..1e2dc34 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -597,6 +597,7 @@
 
 	/* MDIO bus variables */
 	wait_queue_head_t wq;
+	struct phy_device *phydev;
 	bool internal_phy;
 	struct device_node *phy_dn;
 	struct device_node *mdio_dn;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index e907acd..457c3bc 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -86,7 +86,7 @@
 void bcmgenet_mii_setup(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
+	struct phy_device *phydev = priv->phydev;
 	u32 reg, cmd_bits = 0;
 	bool status_changed = false;
 
@@ -183,9 +183,9 @@
 	if (GENET_IS_V4(priv))
 		return;
 
-	if (dev->phydev) {
-		phy_init_hw(dev->phydev);
-		phy_start_aneg(dev->phydev);
+	if (priv->phydev) {
+		phy_init_hw(priv->phydev);
+		phy_start_aneg(priv->phydev);
 	}
 }
 
@@ -236,7 +236,6 @@
 
 static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)
 {
-	struct net_device *ndev = priv->dev;
 	u32 reg;
 
 	/* Speed settings are set in bcmgenet_mii_setup() */
@@ -245,14 +244,14 @@
 	bcmgenet_sys_writel(priv, reg, SYS_PORT_CTRL);
 
 	if (priv->hw_params->flags & GENET_HAS_MOCA_LINK_DET)
-		fixed_phy_set_link_update(ndev->phydev,
+		fixed_phy_set_link_update(priv->phydev,
 					  bcmgenet_fixed_phy_link_update);
 }
 
 int bcmgenet_mii_config(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = dev->phydev;
+	struct phy_device *phydev = priv->phydev;
 	struct device *kdev = &priv->pdev->dev;
 	const char *phy_name = NULL;
 	u32 id_mode_dis = 0;
@@ -303,7 +302,7 @@
 		 * capabilities, use that knowledge to also configure the
 		 * Reverse MII interface correctly.
 		 */
-		if ((phydev->supported & PHY_BASIC_FEATURES) ==
+		if ((priv->phydev->supported & PHY_BASIC_FEATURES) ==
 				PHY_BASIC_FEATURES)
 			port_ctrl = PORT_MODE_EXT_RVMII_25;
 		else
@@ -372,7 +371,7 @@
 			return -ENODEV;
 		}
 	} else {
-		phydev = dev->phydev;
+		phydev = priv->phydev;
 		phydev->dev_flags = phy_flags;
 
 		ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
@@ -383,6 +382,8 @@
 		}
 	}
 
+	priv->phydev = phydev;
+
 	/* Configure port multiplexer based on what the probed PHY device since
 	 * reading the 'max-speed' property determines the maximum supported
 	 * PHY speed which is needed for bcmgenet_mii_config() to configure
@@ -390,7 +391,7 @@
 	 */
 	ret = bcmgenet_mii_config(dev);
 	if (ret) {
-		phy_disconnect(phydev);
+		phy_disconnect(priv->phydev);
 		return ret;
 	}
 
@@ -400,7 +401,7 @@
 	 * Ethernet MAC ISRs
 	 */
 	if (priv->internal_phy)
-		phydev->irq = PHY_IGNORE_INTERRUPT;
+		priv->phydev->irq = PHY_IGNORE_INTERRUPT;
 
 	return 0;
 }
@@ -605,6 +606,7 @@
 
 	}
 
+	priv->phydev = phydev;
 	priv->phy_interface = pd->phy_interface;
 
 	return 0;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ff300f7..a927a73 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12079,95 +12079,107 @@
 	return ret;
 }
 
-static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int tg3_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct tg3 *tp = netdev_priv(dev);
+	u32 supported, advertising;
 
 	if (tg3_flag(tp, USE_PHYLIB)) {
 		struct phy_device *phydev;
 		if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED))
 			return -EAGAIN;
 		phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
-		return phy_ethtool_gset(phydev, cmd);
+		return phy_ethtool_ksettings_get(phydev, cmd);
 	}
 
-	cmd->supported = (SUPPORTED_Autoneg);
+	supported = (SUPPORTED_Autoneg);
 
 	if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY))
-		cmd->supported |= (SUPPORTED_1000baseT_Half |
-				   SUPPORTED_1000baseT_Full);
+		supported |= (SUPPORTED_1000baseT_Half |
+			      SUPPORTED_1000baseT_Full);
 
 	if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) {
-		cmd->supported |= (SUPPORTED_100baseT_Half |
-				  SUPPORTED_100baseT_Full |
-				  SUPPORTED_10baseT_Half |
-				  SUPPORTED_10baseT_Full |
-				  SUPPORTED_TP);
-		cmd->port = PORT_TP;
+		supported |= (SUPPORTED_100baseT_Half |
+			      SUPPORTED_100baseT_Full |
+			      SUPPORTED_10baseT_Half |
+			      SUPPORTED_10baseT_Full |
+			      SUPPORTED_TP);
+		cmd->base.port = PORT_TP;
 	} else {
-		cmd->supported |= SUPPORTED_FIBRE;
-		cmd->port = PORT_FIBRE;
+		supported |= SUPPORTED_FIBRE;
+		cmd->base.port = PORT_FIBRE;
 	}
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
 
-	cmd->advertising = tp->link_config.advertising;
+	advertising = tp->link_config.advertising;
 	if (tg3_flag(tp, PAUSE_AUTONEG)) {
 		if (tp->link_config.flowctrl & FLOW_CTRL_RX) {
 			if (tp->link_config.flowctrl & FLOW_CTRL_TX) {
-				cmd->advertising |= ADVERTISED_Pause;
+				advertising |= ADVERTISED_Pause;
 			} else {
-				cmd->advertising |= ADVERTISED_Pause |
-						    ADVERTISED_Asym_Pause;
+				advertising |= ADVERTISED_Pause |
+					ADVERTISED_Asym_Pause;
 			}
 		} else if (tp->link_config.flowctrl & FLOW_CTRL_TX) {
-			cmd->advertising |= ADVERTISED_Asym_Pause;
+			advertising |= ADVERTISED_Asym_Pause;
 		}
 	}
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
 	if (netif_running(dev) && tp->link_up) {
-		ethtool_cmd_speed_set(cmd, tp->link_config.active_speed);
-		cmd->duplex = tp->link_config.active_duplex;
-		cmd->lp_advertising = tp->link_config.rmt_adv;
+		cmd->base.speed = tp->link_config.active_speed;
+		cmd->base.duplex = tp->link_config.active_duplex;
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.lp_advertising,
+			tp->link_config.rmt_adv);
+
 		if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) {
 			if (tp->phy_flags & TG3_PHYFLG_MDIX_STATE)
-				cmd->eth_tp_mdix = ETH_TP_MDI_X;
+				cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
 			else
-				cmd->eth_tp_mdix = ETH_TP_MDI;
+				cmd->base.eth_tp_mdix = ETH_TP_MDI;
 		}
 	} else {
-		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
-		cmd->duplex = DUPLEX_UNKNOWN;
-		cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
 	}
-	cmd->phy_address = tp->phy_addr;
-	cmd->transceiver = XCVR_INTERNAL;
-	cmd->autoneg = tp->link_config.autoneg;
-	cmd->maxtxpkt = 0;
-	cmd->maxrxpkt = 0;
+	cmd->base.phy_address = tp->phy_addr;
+	cmd->base.autoneg = tp->link_config.autoneg;
 	return 0;
 }
 
-static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int tg3_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct tg3 *tp = netdev_priv(dev);
-	u32 speed = ethtool_cmd_speed(cmd);
+	u32 speed = cmd->base.speed;
+	u32 advertising;
 
 	if (tg3_flag(tp, USE_PHYLIB)) {
 		struct phy_device *phydev;
 		if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED))
 			return -EAGAIN;
 		phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
-		return phy_ethtool_sset(phydev, cmd);
+		return phy_ethtool_ksettings_set(phydev, cmd);
 	}
 
-	if (cmd->autoneg != AUTONEG_ENABLE &&
-	    cmd->autoneg != AUTONEG_DISABLE)
+	if (cmd->base.autoneg != AUTONEG_ENABLE &&
+	    cmd->base.autoneg != AUTONEG_DISABLE)
 		return -EINVAL;
 
-	if (cmd->autoneg == AUTONEG_DISABLE &&
-	    cmd->duplex != DUPLEX_FULL &&
-	    cmd->duplex != DUPLEX_HALF)
+	if (cmd->base.autoneg == AUTONEG_DISABLE &&
+	    cmd->base.duplex != DUPLEX_FULL &&
+	    cmd->base.duplex != DUPLEX_HALF)
 		return -EINVAL;
 
-	if (cmd->autoneg == AUTONEG_ENABLE) {
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		u32 mask = ADVERTISED_Autoneg |
 			   ADVERTISED_Pause |
 			   ADVERTISED_Asym_Pause;
@@ -12185,7 +12197,7 @@
 		else
 			mask |= ADVERTISED_FIBRE;
 
-		if (cmd->advertising & ~mask)
+		if (advertising & ~mask)
 			return -EINVAL;
 
 		mask &= (ADVERTISED_1000baseT_Half |
@@ -12195,13 +12207,13 @@
 			 ADVERTISED_10baseT_Half |
 			 ADVERTISED_10baseT_Full);
 
-		cmd->advertising &= mask;
+		advertising &= mask;
 	} else {
 		if (tp->phy_flags & TG3_PHYFLG_ANY_SERDES) {
 			if (speed != SPEED_1000)
 				return -EINVAL;
 
-			if (cmd->duplex != DUPLEX_FULL)
+			if (cmd->base.duplex != DUPLEX_FULL)
 				return -EINVAL;
 		} else {
 			if (speed != SPEED_100 &&
@@ -12212,16 +12224,16 @@
 
 	tg3_full_lock(tp, 0);
 
-	tp->link_config.autoneg = cmd->autoneg;
-	if (cmd->autoneg == AUTONEG_ENABLE) {
-		tp->link_config.advertising = (cmd->advertising |
+	tp->link_config.autoneg = cmd->base.autoneg;
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		tp->link_config.advertising = (advertising |
 					      ADVERTISED_Autoneg);
 		tp->link_config.speed = SPEED_UNKNOWN;
 		tp->link_config.duplex = DUPLEX_UNKNOWN;
 	} else {
 		tp->link_config.advertising = 0;
 		tp->link_config.speed = speed;
-		tp->link_config.duplex = cmd->duplex;
+		tp->link_config.duplex = cmd->base.duplex;
 	}
 
 	tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED;
@@ -12552,10 +12564,6 @@
 				info->data = TG3_RSS_MAX_NUM_QS;
 		}
 
-		/* The first interrupt vector only
-		 * handles link interrupts.
-		 */
-		info->data -= 1;
 		return 0;
 
 	default:
@@ -14014,7 +14022,9 @@
 	}
 
 	if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) ||
+	    (!ec->rx_coalesce_usecs) ||
 	    (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) ||
+	    (!ec->tx_coalesce_usecs) ||
 	    (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) ||
 	    (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) ||
 	    (ec->rx_coalesce_usecs_irq > max_rxcoal_tick_int) ||
@@ -14025,16 +14035,6 @@
 	    (ec->stats_block_coalesce_usecs < min_stat_coal_ticks))
 		return -EINVAL;
 
-	/* No rx interrupts will be generated if both are zero */
-	if ((ec->rx_coalesce_usecs == 0) &&
-	    (ec->rx_max_coalesced_frames == 0))
-		return -EINVAL;
-
-	/* No tx interrupts will be generated if both are zero */
-	if ((ec->tx_coalesce_usecs == 0) &&
-	    (ec->tx_max_coalesced_frames == 0))
-		return -EINVAL;
-
 	/* Only copy relevant parameters, ignore all others. */
 	tp->coal.rx_coalesce_usecs = ec->rx_coalesce_usecs;
 	tp->coal.tx_coalesce_usecs = ec->tx_coalesce_usecs;
@@ -14106,8 +14106,6 @@
 }
 
 static const struct ethtool_ops tg3_ethtool_ops = {
-	.get_settings		= tg3_get_settings,
-	.set_settings		= tg3_set_settings,
 	.get_drvinfo		= tg3_get_drvinfo,
 	.get_regs_len		= tg3_get_regs_len,
 	.get_regs		= tg3_get_regs,
@@ -14140,6 +14138,8 @@
 	.get_ts_info		= tg3_get_ts_info,
 	.get_eee		= tg3_get_eee,
 	.set_eee		= tg3_set_eee,
+	.get_link_ksettings	= tg3_get_link_ksettings,
+	.set_link_ksettings	= tg3_set_link_ksettings,
 };
 
 static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
@@ -18134,14 +18134,14 @@
 
 	rtnl_lock();
 
-	/* We needn't recover from permanent error */
-	if (state == pci_channel_io_frozen)
-		tp->pcierr_recovery = true;
-
 	/* We probably don't have netdev yet */
 	if (!netdev || !netif_running(netdev))
 		goto done;
 
+	/* We needn't recover from permanent error */
+	if (state == pci_channel_io_frozen)
+		tp->pcierr_recovery = true;
+
 	tg3_phy_stop(tp);
 
 	tg3_netif_stop(tp);
@@ -18238,7 +18238,7 @@
 
 	rtnl_lock();
 
-	if (!netif_running(netdev))
+	if (!netdev || !netif_running(netdev))
 		goto done;
 
 	tg3_full_lock(tp, 0);
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 771cc26..f9df4b5a 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -54,9 +54,7 @@
  * Global variables
  */
 static u32 bnad_rxqs_per_cq = 2;
-static u32 bna_id;
-static struct mutex bnad_list_mutex;
-static LIST_HEAD(bnad_list);
+static atomic_t bna_id;
 static const u8 bnad_bcast_addr[] __aligned(2) =
 	{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 
@@ -76,23 +74,6 @@
 	(_res_info)->res_u.mem_info.len = (_size);		\
 } while (0)
 
-static void
-bnad_add_to_list(struct bnad *bnad)
-{
-	mutex_lock(&bnad_list_mutex);
-	list_add_tail(&bnad->list_entry, &bnad_list);
-	bnad->id = bna_id++;
-	mutex_unlock(&bnad_list_mutex);
-}
-
-static void
-bnad_remove_from_list(struct bnad *bnad)
-{
-	mutex_lock(&bnad_list_mutex);
-	list_del(&bnad->list_entry);
-	mutex_unlock(&bnad_list_mutex);
-}
-
 /*
  * Reinitialize completions in CQ, once Rx is taken down
  */
@@ -3573,14 +3554,12 @@
 {
 	spin_lock_init(&bnad->bna_lock);
 	mutex_init(&bnad->conf_mutex);
-	mutex_init(&bnad_list_mutex);
 }
 
 static void
 bnad_lock_uninit(struct bnad *bnad)
 {
 	mutex_destroy(&bnad->conf_mutex);
-	mutex_destroy(&bnad_list_mutex);
 }
 
 /* PCI Initialization */
@@ -3653,7 +3632,7 @@
 	}
 	bnad = netdev_priv(netdev);
 	bnad_lock_init(bnad);
-	bnad_add_to_list(bnad);
+	bnad->id = atomic_inc_return(&bna_id) - 1;
 
 	mutex_lock(&bnad->conf_mutex);
 	/*
@@ -3807,7 +3786,6 @@
 	bnad_pci_uninit(pdev);
 unlock_mutex:
 	mutex_unlock(&bnad->conf_mutex);
-	bnad_remove_from_list(bnad);
 	bnad_lock_uninit(bnad);
 	free_netdev(netdev);
 	return err;
@@ -3845,7 +3823,6 @@
 	bnad_disable_msix(bnad);
 	bnad_pci_uninit(pdev);
 	mutex_unlock(&bnad->conf_mutex);
-	bnad_remove_from_list(bnad);
 	bnad_lock_uninit(bnad);
 	/* Remove the debugfs node for this bnad */
 	kfree(bnad->regdata);
diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h
index f4ed816..46f7b84 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.h
+++ b/drivers/net/ethernet/brocade/bna/bnad.h
@@ -288,7 +288,6 @@
 struct bnad {
 	struct net_device	*netdev;
 	u32			id;
-	struct list_head	list_entry;
 
 	/* Data path */
 	struct bnad_tx_info tx_info[BNAD_MAX_TX];
diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 0e4fdc3..31f61a7 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -31,15 +31,10 @@
 #define BNAD_NUM_TXF_COUNTERS 12
 #define BNAD_NUM_RXF_COUNTERS 10
 #define BNAD_NUM_CQ_COUNTERS (3 + 5)
-#define BNAD_NUM_RXQ_COUNTERS 6
+#define BNAD_NUM_RXQ_COUNTERS 7
 #define BNAD_NUM_TXQ_COUNTERS 5
 
-#define BNAD_ETHTOOL_STATS_NUM						\
-	(sizeof(struct rtnl_link_stats64) / sizeof(u64) +	\
-	sizeof(struct bnad_drv_stats) / sizeof(u64) +		\
-	offsetof(struct bfi_enet_stats, rxf_stats[0]) / sizeof(u64))
-
-static const char *bnad_net_stats_strings[BNAD_ETHTOOL_STATS_NUM] = {
+static const char *bnad_net_stats_strings[] = {
 	"rx_packets",
 	"tx_packets",
 	"rx_bytes",
@@ -50,22 +45,10 @@
 	"tx_dropped",
 	"multicast",
 	"collisions",
-
 	"rx_length_errors",
-	"rx_over_errors",
 	"rx_crc_errors",
 	"rx_frame_errors",
-	"rx_fifo_errors",
-	"rx_missed_errors",
-
-	"tx_aborted_errors",
-	"tx_carrier_errors",
 	"tx_fifo_errors",
-	"tx_heartbeat_errors",
-	"tx_window_errors",
-
-	"rx_compressed",
-	"tx_compressed",
 
 	"netif_queue_stop",
 	"netif_queue_wakeup",
@@ -254,6 +237,8 @@
 	"fc_tx_fid_parity_errors",
 };
 
+#define BNAD_ETHTOOL_STATS_NUM	ARRAY_SIZE(bnad_net_stats_strings)
+
 static int
 bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
 {
@@ -658,6 +643,8 @@
 				string += ETH_GSTRING_LEN;
 				sprintf(string, "rxq%d_allocbuf_failed", q_num);
 				string += ETH_GSTRING_LEN;
+				sprintf(string, "rxq%d_mapbuf_failed", q_num);
+				string += ETH_GSTRING_LEN;
 				sprintf(string, "rxq%d_producer_index", q_num);
 				string += ETH_GSTRING_LEN;
 				sprintf(string, "rxq%d_consumer_index", q_num);
@@ -678,6 +665,9 @@
 					sprintf(string, "rxq%d_allocbuf_failed",
 								q_num);
 					string += ETH_GSTRING_LEN;
+					sprintf(string, "rxq%d_mapbuf_failed",
+						q_num);
+					string += ETH_GSTRING_LEN;
 					sprintf(string, "rxq%d_producer_index",
 								q_num);
 					string += ETH_GSTRING_LEN;
@@ -854,9 +844,9 @@
 		       u64 *buf)
 {
 	struct bnad *bnad = netdev_priv(netdev);
-	int i, j, bi;
+	int i, j, bi = 0;
 	unsigned long flags;
-	struct rtnl_link_stats64 *net_stats64;
+	struct rtnl_link_stats64 net_stats64;
 	u64 *stats64;
 	u32 bmap;
 
@@ -871,14 +861,25 @@
 	 * under the same lock
 	 */
 	spin_lock_irqsave(&bnad->bna_lock, flags);
-	bi = 0;
-	memset(buf, 0, stats->n_stats * sizeof(u64));
 
-	net_stats64 = (struct rtnl_link_stats64 *)buf;
-	bnad_netdev_qstats_fill(bnad, net_stats64);
-	bnad_netdev_hwstats_fill(bnad, net_stats64);
+	memset(&net_stats64, 0, sizeof(net_stats64));
+	bnad_netdev_qstats_fill(bnad, &net_stats64);
+	bnad_netdev_hwstats_fill(bnad, &net_stats64);
 
-	bi = sizeof(*net_stats64) / sizeof(u64);
+	buf[bi++] = net_stats64.rx_packets;
+	buf[bi++] = net_stats64.tx_packets;
+	buf[bi++] = net_stats64.rx_bytes;
+	buf[bi++] = net_stats64.tx_bytes;
+	buf[bi++] = net_stats64.rx_errors;
+	buf[bi++] = net_stats64.tx_errors;
+	buf[bi++] = net_stats64.rx_dropped;
+	buf[bi++] = net_stats64.tx_dropped;
+	buf[bi++] = net_stats64.multicast;
+	buf[bi++] = net_stats64.collisions;
+	buf[bi++] = net_stats64.rx_length_errors;
+	buf[bi++] = net_stats64.rx_crc_errors;
+	buf[bi++] = net_stats64.rx_frame_errors;
+	buf[bi++] = net_stats64.tx_fifo_errors;
 
 	/* Get netif_queue_stopped from stack */
 	bnad->stats.drv_stats.netif_queue_stopped = netif_queue_stopped(netdev);
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 89c0cfa..63144bb 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -541,6 +541,14 @@
 	}
 }
 
+static inline void macb_set_addr(struct macb_dma_desc *desc, dma_addr_t addr)
+{
+	desc->addr = (u32)addr;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	desc->addrh = (u32)(addr >> 32);
+#endif
+}
+
 static void macb_tx_error_task(struct work_struct *work)
 {
 	struct macb_queue	*queue = container_of(work, struct macb_queue,
@@ -621,14 +629,17 @@
 
 	/* Set end of TX queue */
 	desc = macb_tx_desc(queue, 0);
-	desc->addr = 0;
+	macb_set_addr(desc, 0);
 	desc->ctrl = MACB_BIT(TX_USED);
 
 	/* Make descriptor updates visible to hardware */
 	wmb();
 
 	/* Reinitialize the TX desc queue */
-	queue_writel(queue, TBQP, queue->tx_ring_dma);
+	queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
+#endif
 	/* Make TX ring reflect state of hardware */
 	queue->tx_head = 0;
 	queue->tx_tail = 0;
@@ -750,7 +761,7 @@
 
 			if (entry == RX_RING_SIZE - 1)
 				paddr |= MACB_BIT(RX_WRAP);
-			bp->rx_ring[entry].addr = paddr;
+			macb_set_addr(&(bp->rx_ring[entry]), paddr);
 			bp->rx_ring[entry].ctrl = 0;
 
 			/* properly align Ethernet header */
@@ -798,7 +809,9 @@
 	int			count = 0;
 
 	while (count < budget) {
-		u32 addr, ctrl;
+		u32 ctrl;
+		dma_addr_t addr;
+		bool rxused;
 
 		entry = macb_rx_ring_wrap(bp->rx_tail);
 		desc = &bp->rx_ring[entry];
@@ -806,10 +819,14 @@
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
 
-		addr = desc->addr;
+		rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false;
+		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		addr |= ((u64)(desc->addrh) << 32);
+#endif
 		ctrl = desc->ctrl;
 
-		if (!(addr & MACB_BIT(RX_USED)))
+		if (!rxused)
 			break;
 
 		bp->rx_tail++;
@@ -835,7 +852,6 @@
 		netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len);
 
 		skb_put(skb, len);
-		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr));
 		dma_unmap_single(&bp->pdev->dev, addr,
 				 bp->rx_buffer_size, DMA_FROM_DEVICE);
 
@@ -1299,7 +1315,7 @@
 			ctrl |= MACB_BIT(TX_WRAP);
 
 		/* Set TX buffer descriptor */
-		desc->addr = tx_skb->mapping;
+		macb_set_addr(desc, tx_skb->mapping);
 		/* desc->addr must be visible to hardware before clearing
 		 * 'TX_USED' bit in desc->ctrl.
 		 */
@@ -1323,6 +1339,24 @@
 	return 0;
 }
 
+static inline int macb_clear_csum(struct sk_buff *skb)
+{
+	/* no change for packets without checksum offloading */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	/* make sure we can modify the header */
+	if (unlikely(skb_cow_head(skb, 0)))
+		return -1;
+
+	/* initialize checksum field
+	 * This is required - at least for Zynq, which otherwise calculates
+	 * wrong UDP header checksums for UDP packets with UDP data len <=2
+	 */
+	*(__sum16 *)(skb_checksum_start(skb) + skb->csum_offset) = 0;
+	return 0;
+}
+
 static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	u16 queue_index = skb_get_queue_mapping(skb);
@@ -1362,6 +1396,11 @@
 		return NETDEV_TX_BUSY;
 	}
 
+	if (macb_clear_csum(skb)) {
+		dev_kfree_skb_any(skb);
+		goto unlock;
+	}
+
 	/* Map socket buffer for DMA transfer */
 	if (!macb_tx_map(bp, queue, skb)) {
 		dev_kfree_skb_any(skb);
@@ -1422,6 +1461,9 @@
 
 		desc = &bp->rx_ring[i];
 		addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		addr |= ((u64)(desc->addrh) << 32);
+#endif
 		dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size,
 				 DMA_FROM_DEVICE);
 		dev_kfree_skb_any(skb);
@@ -1547,7 +1589,7 @@
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		for (i = 0; i < TX_RING_SIZE; i++) {
-			queue->tx_ring[i].addr = 0;
+			macb_set_addr(&(queue->tx_ring[i]), 0);
 			queue->tx_ring[i].ctrl = MACB_BIT(TX_USED);
 		}
 		queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
@@ -1694,6 +1736,10 @@
 			dmacfg |= GEM_BIT(TXCOEN);
 		else
 			dmacfg &= ~GEM_BIT(TXCOEN);
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		dmacfg |= GEM_BIT(ADDR64);
+#endif
 		netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n",
 			   dmacfg);
 		gem_writel(bp, DMACFG, dmacfg);
@@ -1739,9 +1785,15 @@
 	macb_configure_dma(bp);
 
 	/* Initialize TX and RX buffers */
-	macb_writel(bp, RBQP, bp->rx_ring_dma);
+	macb_writel(bp, RBQP, (u32)(bp->rx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	macb_writel(bp, RBQPH, (u32)(bp->rx_ring_dma >> 32));
+#endif
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		queue_writel(queue, TBQP, queue->tx_ring_dma);
+		queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma));
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+		queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32));
+#endif
 
 		/* Enable interrupts */
 		queue_writel(queue, IER,
@@ -2303,7 +2355,8 @@
 }
 
 static int macb_clk_init(struct platform_device *pdev, struct clk **pclk,
-			 struct clk **hclk, struct clk **tx_clk)
+			 struct clk **hclk, struct clk **tx_clk,
+			 struct clk **rx_clk)
 {
 	int err;
 
@@ -2325,6 +2378,10 @@
 	if (IS_ERR(*tx_clk))
 		*tx_clk = NULL;
 
+	*rx_clk = devm_clk_get(&pdev->dev, "rx_clk");
+	if (IS_ERR(*rx_clk))
+		*rx_clk = NULL;
+
 	err = clk_prepare_enable(*pclk);
 	if (err) {
 		dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err);
@@ -2343,8 +2400,17 @@
 		goto err_disable_hclk;
 	}
 
+	err = clk_prepare_enable(*rx_clk);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err);
+		goto err_disable_txclk;
+	}
+
 	return 0;
 
+err_disable_txclk:
+	clk_disable_unprepare(*tx_clk);
+
 err_disable_hclk:
 	clk_disable_unprepare(*hclk);
 
@@ -2379,6 +2445,9 @@
 			queue->IDR  = GEM_IDR(hw_q - 1);
 			queue->IMR  = GEM_IMR(hw_q - 1);
 			queue->TBQP = GEM_TBQP(hw_q - 1);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+			queue->TBQPH = GEM_TBQPH(hw_q -1);
+#endif
 		} else {
 			/* queue0 uses legacy registers */
 			queue->ISR  = MACB_ISR;
@@ -2386,6 +2455,9 @@
 			queue->IDR  = MACB_IDR;
 			queue->IMR  = MACB_IMR;
 			queue->TBQP = MACB_TBQP;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+			queue->TBQPH = MACB_TBQPH;
+#endif
 		}
 
 		/* get irq: here we use the linux queue index, not the hardware
@@ -2728,12 +2800,14 @@
 };
 
 static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk,
-			      struct clk **hclk, struct clk **tx_clk)
+			      struct clk **hclk, struct clk **tx_clk,
+			      struct clk **rx_clk)
 {
 	int err;
 
 	*hclk = NULL;
 	*tx_clk = NULL;
+	*rx_clk = NULL;
 
 	*pclk = devm_clk_get(&pdev->dev, "ether_clk");
 	if (IS_ERR(*pclk))
@@ -2857,13 +2931,13 @@
 static int macb_probe(struct platform_device *pdev)
 {
 	int (*clk_init)(struct platform_device *, struct clk **,
-			struct clk **, struct clk **)
+			struct clk **, struct clk **,  struct clk **)
 					      = macb_clk_init;
 	int (*init)(struct platform_device *) = macb_init;
 	struct device_node *np = pdev->dev.of_node;
 	struct device_node *phy_node;
 	const struct macb_config *macb_config = NULL;
-	struct clk *pclk, *hclk = NULL, *tx_clk = NULL;
+	struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL;
 	unsigned int queue_mask, num_queues;
 	struct macb_platform_data *pdata;
 	bool native_io;
@@ -2891,7 +2965,7 @@
 		}
 	}
 
-	err = clk_init(pdev, &pclk, &hclk, &tx_clk);
+	err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk);
 	if (err)
 		return err;
 
@@ -2927,6 +3001,7 @@
 	bp->pclk = pclk;
 	bp->hclk = hclk;
 	bp->tx_clk = tx_clk;
+	bp->rx_clk = rx_clk;
 	if (macb_config)
 		bp->jumbo_max_len = macb_config->jumbo_max_len;
 
@@ -2935,6 +3010,11 @@
 		bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
 	device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
 
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	if (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1)) > GEM_DBW32)
+		dma_set_mask(&pdev->dev, DMA_BIT_MASK(44));
+#endif
+
 	spin_lock_init(&bp->lock);
 
 	/* setup capabilities */
@@ -2945,7 +3025,7 @@
 	dev->irq = platform_get_irq(pdev, 0);
 	if (dev->irq < 0) {
 		err = dev->irq;
-		goto err_disable_clocks;
+		goto err_out_free_netdev;
 	}
 
 	mac = of_get_mac_address(np);
@@ -3020,6 +3100,7 @@
 	clk_disable_unprepare(tx_clk);
 	clk_disable_unprepare(hclk);
 	clk_disable_unprepare(pclk);
+	clk_disable_unprepare(rx_clk);
 
 	return err;
 }
@@ -3046,6 +3127,7 @@
 		clk_disable_unprepare(bp->tx_clk);
 		clk_disable_unprepare(bp->hclk);
 		clk_disable_unprepare(bp->pclk);
+		clk_disable_unprepare(bp->rx_clk);
 		free_netdev(dev);
 	}
 
@@ -3069,6 +3151,7 @@
 		clk_disable_unprepare(bp->tx_clk);
 		clk_disable_unprepare(bp->hclk);
 		clk_disable_unprepare(bp->pclk);
+		clk_disable_unprepare(bp->rx_clk);
 	}
 
 	return 0;
@@ -3088,6 +3171,7 @@
 		clk_prepare_enable(bp->pclk);
 		clk_prepare_enable(bp->hclk);
 		clk_prepare_enable(bp->tx_clk);
+		clk_prepare_enable(bp->rx_clk);
 	}
 
 	netif_device_attach(netdev);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 36893d8..8bed4b5 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -66,6 +66,8 @@
 #define MACB_USRIO		0x00c0
 #define MACB_WOL		0x00c4
 #define MACB_MID		0x00fc
+#define MACB_TBQPH		0x04C8
+#define MACB_RBQPH		0x04D4
 
 /* GEM register offsets. */
 #define GEM_NCFGR		0x0004 /* Network Config */
@@ -139,6 +141,7 @@
 
 #define GEM_ISR(hw_q)		(0x0400 + ((hw_q) << 2))
 #define GEM_TBQP(hw_q)		(0x0440 + ((hw_q) << 2))
+#define GEM_TBQPH(hw_q)		(0x04C8)
 #define GEM_RBQP(hw_q)		(0x0480 + ((hw_q) << 2))
 #define GEM_IER(hw_q)		(0x0600 + ((hw_q) << 2))
 #define GEM_IDR(hw_q)		(0x0620 + ((hw_q) << 2))
@@ -249,6 +252,8 @@
 #define GEM_RXBS_SIZE		8
 #define GEM_DDRP_OFFSET		24 /* disc_when_no_ahb */
 #define GEM_DDRP_SIZE		1
+#define GEM_ADDR64_OFFSET	30 /* Address bus width - 64b or 32b */
+#define GEM_ADDR64_SIZE		1
 
 
 /* Bitfields in NSR */
@@ -403,11 +408,11 @@
 #define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII	0x00000004
 #define MACB_CAPS_NO_GIGABIT_HALF		0x00000008
 #define MACB_CAPS_USRIO_DISABLED		0x00000010
+#define MACB_CAPS_JUMBO				0x00000020
 #define MACB_CAPS_FIFO_MODE			0x10000000
 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE	0x20000000
 #define MACB_CAPS_SG_DISABLED			0x40000000
 #define MACB_CAPS_MACB_IS_GEM			0x80000000
-#define MACB_CAPS_JUMBO				0x00000010
 
 /* Bit manipulation macros */
 #define MACB_BIT(name)					\
@@ -474,6 +479,10 @@
 struct macb_dma_desc {
 	u32	addr;
 	u32	ctrl;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	u32     addrh;
+	u32     resvd;
+#endif
 };
 
 /* DMA descriptor bitfields */
@@ -763,7 +772,8 @@
 	u32			caps;
 	unsigned int		dma_burst_length;
 	int	(*clk_init)(struct platform_device *pdev, struct clk **pclk,
-			    struct clk **hclk, struct clk **tx_clk);
+			    struct clk **hclk, struct clk **tx_clk,
+			    struct clk **rx_clk);
 	int	(*init)(struct platform_device *pdev);
 	int	jumbo_max_len;
 };
@@ -777,6 +787,7 @@
 	unsigned int		IDR;
 	unsigned int		IMR;
 	unsigned int		TBQP;
+	unsigned int		TBQPH;
 
 	unsigned int		tx_head, tx_tail;
 	struct macb_dma_desc	*tx_ring;
@@ -809,6 +820,7 @@
 	struct clk		*pclk;
 	struct clk		*hclk;
 	struct clk		*tx_clk;
+	struct clk		*rx_clk;
 	struct net_device	*dev;
 	struct napi_struct	napi;
 	struct net_device_stats	stats;
diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index 0ef232d..92f411c 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -36,10 +36,20 @@
 	depends on 64BIT
 	select PHYLIB
 	select MDIO_THUNDER
+	select THUNDER_NIC_RGX
 	---help---
 	  This driver supports programming and controlling of MAC
 	  interface from NIC physical function driver.
 
+config	THUNDER_NIC_RGX
+	tristate "Thunder MAC interface driver (RGX)"
+	depends on 64BIT
+	select PHYLIB
+	select MDIO_THUNDER
+	---help---
+	  This driver supports configuring XCV block of RGX interface
+	  present on CN81XX chip.
+
 config LIQUIDIO
 	tristate "Cavium LiquidIO support"
 	depends on 64BIT
@@ -48,7 +58,7 @@
 	select LIBCRC32C
 	---help---
 	  This driver supports Cavium LiquidIO Intelligent Server Adapters
-	  based on CN66XX and CN68XX chips.
+	  based on CN66XX, CN68XX and CN23XX chips.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called liquidio.  This is recommended.
diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile
index 2f36680..5a27b2a 100644
--- a/drivers/net/ethernet/cavium/liquidio/Makefile
+++ b/drivers/net/ethernet/cavium/liquidio/Makefile
@@ -3,14 +3,16 @@
 #
 obj-$(CONFIG_LIQUIDIO) += liquidio.o
 
-liquidio-objs := lio_main.o  \
-	      lio_ethtool.o      \
-	      request_manager.o  \
-	      response_manager.o \
-	      octeon_device.o    \
-	      cn66xx_device.o    \
-	      cn68xx_device.o    \
-	      octeon_mem_ops.o   \
-	      octeon_droq.o      \
-	      octeon_console.o   \
-	      octeon_nic.o
+liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \
+			lio_core.o         \
+			request_manager.o  \
+			response_manager.o \
+			octeon_device.o    \
+			cn66xx_device.o    \
+			cn68xx_device.o    \
+			cn23xx_pf_device.o \
+			octeon_mem_ops.o   \
+			octeon_droq.o      \
+			octeon_nic.o
+
+liquidio-objs := lio_main.o octeon_console.o $(liquidio-y)
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
new file mode 100644
index 0000000..bddb198
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
@@ -0,0 +1,1237 @@
+/**********************************************************************
+* Author: Cavium, Inc.
+*
+* Contact: support@cavium.com
+*          Please include "LiquidIO" in the subject.
+*
+* Copyright (c) 2003-2015 Cavium, Inc.
+*
+* This file is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License, Version 2, as
+* published by the Free Software Foundation.
+*
+* This file is distributed in the hope that it will be useful, but
+* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+* NONINFRINGEMENT.  See the GNU General Public License for more
+* details.
+*
+* This file may also be available under a different license from Cavium.
+* Contact Cavium, Inc. for more information
+**********************************************************************/
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "cn23xx_pf_device.h"
+#include "octeon_main.h"
+
+#define RESET_NOTDONE 0
+#define RESET_DONE 1
+
+/* Change the value of SLI Packet Input Jabber Register to allow
+ * VXLAN TSO packets which can be 64424 bytes, exceeding the
+ * MAX_GSO_SIZE we supplied to the kernel
+ */
+#define CN23XX_INPUT_JABBER 64600
+
+#define LIOLUT_RING_DISTRIBUTION 9
+const int liolut_num_vfs_to_rings_per_vf[LIOLUT_RING_DISTRIBUTION] = {
+	0, 8, 4, 2, 2, 2, 1, 1, 1
+};
+
+void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct)
+{
+	int i = 0;
+	u32 regval = 0;
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+
+	/*In cn23xx_soft_reset*/
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%llx\n",
+		"CN23XX_WIN_WR_MASK_REG", CVM_CAST64(CN23XX_WIN_WR_MASK_REG),
+		CVM_CAST64(octeon_read_csr64(oct, CN23XX_WIN_WR_MASK_REG)));
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"CN23XX_SLI_SCRATCH1", CVM_CAST64(CN23XX_SLI_SCRATCH1),
+		CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1)));
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"CN23XX_RST_SOFT_RST", CN23XX_RST_SOFT_RST,
+		lio_pci_readq(oct, CN23XX_RST_SOFT_RST));
+
+	/*In cn23xx_set_dpi_regs*/
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"CN23XX_DPI_DMA_CONTROL", CN23XX_DPI_DMA_CONTROL,
+		lio_pci_readq(oct, CN23XX_DPI_DMA_CONTROL));
+
+	for (i = 0; i < 6; i++) {
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_DPI_DMA_ENG_ENB", i,
+			CN23XX_DPI_DMA_ENG_ENB(i),
+			lio_pci_readq(oct, CN23XX_DPI_DMA_ENG_ENB(i)));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_DPI_DMA_ENG_BUF", i,
+			CN23XX_DPI_DMA_ENG_BUF(i),
+			lio_pci_readq(oct, CN23XX_DPI_DMA_ENG_BUF(i)));
+	}
+
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", "CN23XX_DPI_CTL",
+		CN23XX_DPI_CTL, lio_pci_readq(oct, CN23XX_DPI_CTL));
+
+	/*In cn23xx_setup_pcie_mps and cn23xx_setup_pcie_mrrs */
+	pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"CN23XX_CONFIG_PCIE_DEVCTL",
+		CVM_CAST64(CN23XX_CONFIG_PCIE_DEVCTL), CVM_CAST64(regval));
+
+	dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+		"CN23XX_DPI_SLI_PRTX_CFG", oct->pcie_port,
+		CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port),
+		lio_pci_readq(oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port)));
+
+	/*In cn23xx_specific_regs_setup */
+	dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+		"CN23XX_SLI_S2M_PORTX_CTL", oct->pcie_port,
+		CVM_CAST64(CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)),
+		CVM_CAST64(octeon_read_csr64(
+			oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port))));
+
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"CN23XX_SLI_RING_RST", CVM_CAST64(CN23XX_SLI_PKT_IOQ_RING_RST),
+		(u64)octeon_read_csr64(oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+
+	/*In cn23xx_setup_global_mac_regs*/
+	for (i = 0; i < CN23XX_MAX_MACS; i++) {
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_PKT_MAC_RINFO64", i,
+			CVM_CAST64(CN23XX_SLI_PKT_MAC_RINFO64(i, oct->pf_num)),
+			CVM_CAST64(octeon_read_csr64
+				(oct, CN23XX_SLI_PKT_MAC_RINFO64
+					(i, oct->pf_num))));
+	}
+
+	/*In cn23xx_setup_global_input_regs*/
+	for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_IQ_PKT_CONTROL64", i,
+			CVM_CAST64(CN23XX_SLI_IQ_PKT_CONTROL64(i)),
+			CVM_CAST64(octeon_read_csr64
+				(oct, CN23XX_SLI_IQ_PKT_CONTROL64(i))));
+	}
+
+	/*In cn23xx_setup_global_output_regs*/
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"CN23XX_SLI_OQ_WMARK", CVM_CAST64(CN23XX_SLI_OQ_WMARK),
+		CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_OQ_WMARK)));
+
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_OQ_PKT_CONTROL", i,
+			CVM_CAST64(CN23XX_SLI_OQ_PKT_CONTROL(i)),
+			CVM_CAST64(octeon_read_csr(
+				oct, CN23XX_SLI_OQ_PKT_CONTROL(i))));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_OQ_PKT_INT_LEVELS", i,
+			CVM_CAST64(CN23XX_SLI_OQ_PKT_INT_LEVELS(i)),
+			CVM_CAST64(octeon_read_csr64(
+				oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(i))));
+	}
+
+	/*In cn23xx_enable_interrupt and cn23xx_disable_interrupt*/
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"cn23xx->intr_enb_reg64",
+		CVM_CAST64((long)(cn23xx->intr_enb_reg64)),
+		CVM_CAST64(readq(cn23xx->intr_enb_reg64)));
+
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"cn23xx->intr_sum_reg64",
+		CVM_CAST64((long)(cn23xx->intr_sum_reg64)),
+		CVM_CAST64(readq(cn23xx->intr_sum_reg64)));
+
+	/*In cn23xx_setup_iq_regs*/
+	for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_IQ_BASE_ADDR64", i,
+			CVM_CAST64(CN23XX_SLI_IQ_BASE_ADDR64(i)),
+			CVM_CAST64(octeon_read_csr64(
+				oct, CN23XX_SLI_IQ_BASE_ADDR64(i))));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_IQ_SIZE", i,
+			CVM_CAST64(CN23XX_SLI_IQ_SIZE(i)),
+			CVM_CAST64(octeon_read_csr
+				(oct, CN23XX_SLI_IQ_SIZE(i))));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_IQ_DOORBELL", i,
+			CVM_CAST64(CN23XX_SLI_IQ_DOORBELL(i)),
+			CVM_CAST64(octeon_read_csr64(
+				oct, CN23XX_SLI_IQ_DOORBELL(i))));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_IQ_INSTR_COUNT64", i,
+			CVM_CAST64(CN23XX_SLI_IQ_INSTR_COUNT64(i)),
+			CVM_CAST64(octeon_read_csr64(
+				oct, CN23XX_SLI_IQ_INSTR_COUNT64(i))));
+	}
+
+	/*In cn23xx_setup_oq_regs*/
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_OQ_BASE_ADDR64", i,
+			CVM_CAST64(CN23XX_SLI_OQ_BASE_ADDR64(i)),
+			CVM_CAST64(octeon_read_csr64(
+				oct, CN23XX_SLI_OQ_BASE_ADDR64(i))));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_OQ_SIZE", i,
+			CVM_CAST64(CN23XX_SLI_OQ_SIZE(i)),
+			CVM_CAST64(octeon_read_csr
+				(oct, CN23XX_SLI_OQ_SIZE(i))));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_OQ_BUFF_INFO_SIZE", i,
+			CVM_CAST64(CN23XX_SLI_OQ_BUFF_INFO_SIZE(i)),
+			CVM_CAST64(octeon_read_csr(
+				oct, CN23XX_SLI_OQ_BUFF_INFO_SIZE(i))));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_OQ_PKTS_SENT", i,
+			CVM_CAST64(CN23XX_SLI_OQ_PKTS_SENT(i)),
+			CVM_CAST64(octeon_read_csr64(
+				oct, CN23XX_SLI_OQ_PKTS_SENT(i))));
+		dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n",
+			"CN23XX_SLI_OQ_PKTS_CREDIT", i,
+			CVM_CAST64(CN23XX_SLI_OQ_PKTS_CREDIT(i)),
+			CVM_CAST64(octeon_read_csr64(
+				oct, CN23XX_SLI_OQ_PKTS_CREDIT(i))));
+	}
+
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"CN23XX_SLI_PKT_TIME_INT",
+		CVM_CAST64(CN23XX_SLI_PKT_TIME_INT),
+		CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_TIME_INT)));
+	dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n",
+		"CN23XX_SLI_PKT_CNT_INT",
+		CVM_CAST64(CN23XX_SLI_PKT_CNT_INT),
+		CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_CNT_INT)));
+}
+
+static int cn23xx_pf_soft_reset(struct octeon_device *oct)
+{
+	octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF);
+
+	dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: BIST enabled for CN23XX soft reset\n",
+		oct->octeon_id);
+
+	octeon_write_csr64(oct, CN23XX_SLI_SCRATCH1, 0x1234ULL);
+
+	/* Initiate chip-wide soft reset */
+	lio_pci_readq(oct, CN23XX_RST_SOFT_RST);
+	lio_pci_writeq(oct, 1, CN23XX_RST_SOFT_RST);
+
+	/* Wait for 100ms as Octeon resets. */
+	mdelay(100);
+
+	if (octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1) == 0x1234ULL) {
+		dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Soft reset failed\n",
+			oct->octeon_id);
+		return 1;
+	}
+
+	dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Reset completed\n",
+		oct->octeon_id);
+
+	/* restore the  reset value*/
+	octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF);
+
+	return 0;
+}
+
+static void cn23xx_enable_error_reporting(struct octeon_device *oct)
+{
+	u32 regval;
+	u32 uncorrectable_err_mask, corrtable_err_status;
+
+	pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
+	if (regval & CN23XX_CONFIG_PCIE_DEVCTL_MASK) {
+		uncorrectable_err_mask = 0;
+		corrtable_err_status = 0;
+		pci_read_config_dword(oct->pci_dev,
+				      CN23XX_CONFIG_PCIE_UNCORRECT_ERR_MASK,
+				      &uncorrectable_err_mask);
+		pci_read_config_dword(oct->pci_dev,
+				      CN23XX_CONFIG_PCIE_CORRECT_ERR_STATUS,
+				      &corrtable_err_status);
+		dev_err(&oct->pci_dev->dev, "PCI-E Fatal error detected;\n"
+				 "\tdev_ctl_status_reg = 0x%08x\n"
+				 "\tuncorrectable_error_mask_reg = 0x%08x\n"
+				 "\tcorrectable_error_status_reg = 0x%08x\n",
+			    regval, uncorrectable_err_mask,
+			    corrtable_err_status);
+	}
+
+	regval |= 0xf; /* Enable Link error reporting */
+
+	dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Enabling PCI-E error reporting..\n",
+		oct->octeon_id);
+	pci_write_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, regval);
+}
+
+static u32 cn23xx_coprocessor_clock(struct octeon_device *oct)
+{
+	/* Bits 29:24 of RST_BOOT[PNR_MUL] holds the ref.clock MULTIPLIER
+	 * for SLI.
+	 */
+
+	/* TBD: get the info in Hand-shake */
+	return (((lio_pci_readq(oct, CN23XX_RST_BOOT) >> 24) & 0x3f) * 50);
+}
+
+u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us)
+{
+	/* This gives the SLI clock per microsec */
+	u32 oqticks_per_us = cn23xx_coprocessor_clock(oct);
+
+	oct->pfvf_hsword.coproc_tics_per_us = oqticks_per_us;
+
+	/* This gives the clock cycles per millisecond */
+	oqticks_per_us *= 1000;
+
+	/* This gives the oq ticks (1024 core clock cycles) per millisecond */
+	oqticks_per_us /= 1024;
+
+	/* time_intr is in microseconds. The next 2 steps gives the oq ticks
+	 *  corressponding to time_intr.
+	 */
+	oqticks_per_us *= time_intr_in_us;
+	oqticks_per_us /= 1000;
+
+	return oqticks_per_us;
+}
+
+static void cn23xx_setup_global_mac_regs(struct octeon_device *oct)
+{
+	u64 reg_val;
+	u16 mac_no = oct->pcie_port;
+	u16 pf_num = oct->pf_num;
+
+	/* programming SRN and TRS for each MAC(0..3)  */
+
+	dev_dbg(&oct->pci_dev->dev, "%s:Using pcie port %d\n",
+		__func__, mac_no);
+	/* By default, mapping all 64 IOQs to  a single MACs */
+
+	reg_val =
+	    octeon_read_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num));
+
+	if (oct->rev_id == OCTEON_CN23XX_REV_1_1) {
+		/* setting SRN <6:0>  */
+		reg_val = pf_num * CN23XX_MAX_RINGS_PER_PF_PASS_1_1;
+	} else {
+		/* setting SRN <6:0>  */
+		reg_val = pf_num * CN23XX_MAX_RINGS_PER_PF;
+	}
+
+	/* setting TRS <23:16> */
+	reg_val = reg_val |
+		  (oct->sriov_info.trs << CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS);
+	/* write these settings to MAC register */
+	octeon_write_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num),
+			   reg_val);
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_PKT_MAC(%d)_PF(%d)_RINFO : 0x%016llx\n",
+		mac_no, pf_num, (u64)octeon_read_csr64
+		(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num)));
+}
+
+static int cn23xx_reset_io_queues(struct octeon_device *oct)
+{
+	int ret_val = 0;
+	u64 d64;
+	u32 q_no, srn, ern;
+	u32 loop = 1000;
+
+	srn = oct->sriov_info.pf_srn;
+	ern = srn + oct->sriov_info.num_pf_rings;
+
+	/*As per HRM reg description, s/w cant write 0 to ENB. */
+	/*to make the queue off, need to set the RST bit. */
+
+	/* Reset the Enable bit for all the 64 IQs.  */
+	for (q_no = srn; q_no < ern; q_no++) {
+		/* set RST bit to 1. This bit applies to both IQ and OQ */
+		d64 = octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+		d64 = d64 | CN23XX_PKT_INPUT_CTL_RST;
+		octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), d64);
+	}
+
+	/*wait until the RST bit is clear or the RST and quite bits are set*/
+	for (q_no = srn; q_no < ern; q_no++) {
+		u64 reg_val = octeon_read_csr64(oct,
+					CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+		while ((READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) &&
+		       !(READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_QUIET) &&
+		       loop--) {
+			WRITE_ONCE(reg_val, octeon_read_csr64(
+			    oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)));
+		}
+		if (!loop) {
+			dev_err(&oct->pci_dev->dev,
+				"clearing the reset reg failed or setting the quiet reg failed for qno: %u\n",
+				q_no);
+			return -1;
+		}
+		WRITE_ONCE(reg_val, READ_ONCE(reg_val) &
+			~CN23XX_PKT_INPUT_CTL_RST);
+		octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+				   READ_ONCE(reg_val));
+
+		WRITE_ONCE(reg_val, octeon_read_csr64(
+			   oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)));
+		if (READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) {
+			dev_err(&oct->pci_dev->dev,
+				"clearing the reset failed for qno: %u\n",
+				q_no);
+			ret_val = -1;
+		}
+	}
+
+	return ret_val;
+}
+
+static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct)
+{
+	u32 q_no, ern, srn;
+	u64 pf_num;
+	u64 intr_threshold, reg_val;
+	struct octeon_instr_queue *iq;
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+
+	pf_num = oct->pf_num;
+
+	srn = oct->sriov_info.pf_srn;
+	ern = srn + oct->sriov_info.num_pf_rings;
+
+	if (cn23xx_reset_io_queues(oct))
+		return -1;
+
+	/** Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg
+	* for all queues.Only PF can set these bits.
+	* bits 29:30 indicate the MAC num.
+	* bits 32:47 indicate the PVF num.
+	*/
+	for (q_no = 0; q_no < ern; q_no++) {
+		reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS;
+		reg_val |= pf_num << CN23XX_PKT_INPUT_CTL_PF_NUM_POS;
+
+		octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+				   reg_val);
+	}
+
+	/* Select ES, RO, NS, RDSIZE,DPTR Fomat#0 for
+	 * pf queues
+	 */
+	for (q_no = srn; q_no < ern; q_no++) {
+		void __iomem *inst_cnt_reg;
+
+		iq = oct->instr_queue[q_no];
+		if (iq)
+			inst_cnt_reg = iq->inst_cnt_reg;
+		else
+			inst_cnt_reg = (u8 *)oct->mmio[0].hw_addr +
+				       CN23XX_SLI_IQ_INSTR_COUNT64(q_no);
+
+		reg_val =
+		    octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+
+		reg_val |= CN23XX_PKT_INPUT_CTL_MASK;
+
+		octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+				   reg_val);
+
+		/* Set WMARK level for triggering PI_INT */
+		/* intr_threshold = CN23XX_DEF_IQ_INTR_THRESHOLD & */
+		intr_threshold = CFG_GET_IQ_INTR_PKT(cn23xx->conf) &
+				 CN23XX_PKT_IN_DONE_WMARK_MASK;
+
+		writeq((readq(inst_cnt_reg) &
+			~(CN23XX_PKT_IN_DONE_WMARK_MASK <<
+			  CN23XX_PKT_IN_DONE_WMARK_BIT_POS)) |
+		       (intr_threshold << CN23XX_PKT_IN_DONE_WMARK_BIT_POS),
+		       inst_cnt_reg);
+	}
+	return 0;
+}
+
+static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct)
+{
+	u32 reg_val;
+	u32 q_no, ern, srn;
+	u64 time_threshold;
+
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+
+	srn = oct->sriov_info.pf_srn;
+	ern = srn + oct->sriov_info.num_pf_rings;
+
+	if (CFG_GET_IS_SLI_BP_ON(cn23xx->conf)) {
+		octeon_write_csr64(oct, CN23XX_SLI_OQ_WMARK, 32);
+	} else {
+		/** Set Output queue watermark to 0 to disable backpressure */
+		octeon_write_csr64(oct, CN23XX_SLI_OQ_WMARK, 0);
+	}
+
+	for (q_no = srn; q_no < ern; q_no++) {
+		reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no));
+
+		/* set IPTR & DPTR */
+		reg_val |=
+		    (CN23XX_PKT_OUTPUT_CTL_IPTR | CN23XX_PKT_OUTPUT_CTL_DPTR);
+
+		/* reset BMODE */
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_BMODE);
+
+		/* No Relaxed Ordering, No Snoop, 64-bit Byte swap
+		 * for Output Queue ScatterList
+		 * reset ROR_P, NSR_P
+		 */
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR_P);
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR_P);
+
+#ifdef __LITTLE_ENDIAN_BITFIELD
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ES_P);
+#else
+		reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES_P);
+#endif
+		/* No Relaxed Ordering, No Snoop, 64-bit Byte swap
+		 * for Output Queue Data
+		 * reset ROR, NSR
+		 */
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR);
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR);
+		/* set the ES bit */
+		reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES);
+
+		/* write all the selected settings */
+		octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no), reg_val);
+
+		/* Enabling these interrupt in oct->fn_list.enable_interrupt()
+		 * routine which called after IOQ init.
+		 * Set up interrupt packet and time thresholds
+		 * for all the OQs
+		 */
+		time_threshold = cn23xx_pf_get_oq_ticks(
+		    oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf));
+
+		octeon_write_csr64(oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
+				   (CFG_GET_OQ_INTR_PKT(cn23xx->conf) |
+				    (time_threshold << 32)));
+	}
+
+	/** Setting the water mark level for pko back pressure **/
+	writeq(0x40, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_WMARK);
+
+	/** Disabling setting OQs in reset when ring has no dorebells
+	  * enabling this will cause of head of line blocking
+	  */
+	/* Do it only for pass1.1. and pass1.2 */
+	if ((oct->rev_id == OCTEON_CN23XX_REV_1_0) ||
+	    (oct->rev_id == OCTEON_CN23XX_REV_1_1))
+		writeq(readq((u8 *)oct->mmio[0].hw_addr +
+				     CN23XX_SLI_GBL_CONTROL) | 0x2,
+		       (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_GBL_CONTROL);
+
+	/** Enable channel-level backpressure */
+	if (oct->pf_num)
+		writeq(0xffffffffffffffffULL,
+		       (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN2_W1S);
+	else
+		writeq(0xffffffffffffffffULL,
+		       (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN_W1S);
+}
+
+static int cn23xx_setup_pf_device_regs(struct octeon_device *oct)
+{
+	cn23xx_enable_error_reporting(oct);
+
+	/* program the MAC(0..3)_RINFO before setting up input/output regs */
+	cn23xx_setup_global_mac_regs(oct);
+
+	if (cn23xx_pf_setup_global_input_regs(oct))
+		return -1;
+
+	cn23xx_pf_setup_global_output_regs(oct);
+
+	/* Default error timeout value should be 0x200000 to avoid host hang
+	 * when reads invalid register
+	 */
+	octeon_write_csr64(oct, CN23XX_SLI_WINDOW_CTL,
+			   CN23XX_SLI_WINDOW_CTL_DEFAULT);
+
+	/* set SLI_PKT_IN_JABBER to handle large VXLAN packets */
+	octeon_write_csr64(oct, CN23XX_SLI_PKT_IN_JABBER, CN23XX_INPUT_JABBER);
+	return 0;
+}
+
+static void cn23xx_setup_iq_regs(struct octeon_device *oct, u32 iq_no)
+{
+	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
+	u64 pkt_in_done;
+
+	iq_no += oct->sriov_info.pf_srn;
+
+	/* Write the start of the input queue's ring and its size  */
+	octeon_write_csr64(oct, CN23XX_SLI_IQ_BASE_ADDR64(iq_no),
+			   iq->base_addr_dma);
+	octeon_write_csr(oct, CN23XX_SLI_IQ_SIZE(iq_no), iq->max_count);
+
+	/* Remember the doorbell & instruction count register addr
+	 * for this queue
+	 */
+	iq->doorbell_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_IQ_DOORBELL(iq_no);
+	iq->inst_cnt_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_IQ_INSTR_COUNT64(iq_no);
+	dev_dbg(&oct->pci_dev->dev, "InstQ[%d]:dbell reg @ 0x%p instcnt_reg @ 0x%p\n",
+		iq_no, iq->doorbell_reg, iq->inst_cnt_reg);
+
+	/* Store the current instruction counter (used in flush_iq
+	 * calculation)
+	 */
+	pkt_in_done = readq(iq->inst_cnt_reg);
+
+	if (oct->msix_on) {
+		/* Set CINT_ENB to enable IQ interrupt   */
+		writeq((pkt_in_done | CN23XX_INTR_CINT_ENB),
+		       iq->inst_cnt_reg);
+	} else {
+		/* Clear the count by writing back what we read, but don't
+		 * enable interrupts
+		 */
+		writeq(pkt_in_done, iq->inst_cnt_reg);
+	}
+
+	iq->reset_instr_cnt = 0;
+}
+
+static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no)
+{
+	u32 reg_val;
+	struct octeon_droq *droq = oct->droq[oq_no];
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+	u64 time_threshold;
+	u64 cnt_threshold;
+
+	oq_no += oct->sriov_info.pf_srn;
+
+	octeon_write_csr64(oct, CN23XX_SLI_OQ_BASE_ADDR64(oq_no),
+			   droq->desc_ring_dma);
+	octeon_write_csr(oct, CN23XX_SLI_OQ_SIZE(oq_no), droq->max_count);
+
+	octeon_write_csr(oct, CN23XX_SLI_OQ_BUFF_INFO_SIZE(oq_no),
+			 (droq->buffer_size | (OCT_RH_SIZE << 16)));
+
+	/* Get the mapped address of the pkt_sent and pkts_credit regs */
+	droq->pkts_sent_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_SENT(oq_no);
+	droq->pkts_credit_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_CREDIT(oq_no);
+
+	if (!oct->msix_on) {
+		/* Enable this output queue to generate Packet Timer Interrupt
+		 */
+		reg_val =
+		    octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no));
+		reg_val |= CN23XX_PKT_OUTPUT_CTL_TENB;
+		octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no),
+				 reg_val);
+
+		/* Enable this output queue to generate Packet Count Interrupt
+		 */
+		reg_val =
+		    octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no));
+		reg_val |= CN23XX_PKT_OUTPUT_CTL_CENB;
+		octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no),
+				 reg_val);
+	} else {
+		time_threshold = cn23xx_pf_get_oq_ticks(
+		    oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf));
+		cnt_threshold = (u32)CFG_GET_OQ_INTR_PKT(cn23xx->conf);
+
+		octeon_write_csr64(
+		    oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(oq_no),
+		    ((time_threshold << 32 | cnt_threshold)));
+	}
+}
+
+static int cn23xx_enable_io_queues(struct octeon_device *oct)
+{
+	u64 reg_val;
+	u32 srn, ern, q_no;
+	u32 loop = 1000;
+
+	srn = oct->sriov_info.pf_srn;
+	ern = srn + oct->num_iqs;
+
+	for (q_no = srn; q_no < ern; q_no++) {
+		/* set the corresponding IQ IS_64B bit */
+		if (oct->io_qmask.iq64B & BIT_ULL(q_no - srn)) {
+			reg_val = octeon_read_csr64(
+			    oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+			reg_val = reg_val | CN23XX_PKT_INPUT_CTL_IS_64B;
+			octeon_write_csr64(
+			    oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val);
+		}
+
+		/* set the corresponding IQ ENB bit */
+		if (oct->io_qmask.iq & BIT_ULL(q_no - srn)) {
+			/* IOQs are in reset by default in PEM2 mode,
+			 * clearing reset bit
+			 */
+			reg_val = octeon_read_csr64(
+			    oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+
+			if (reg_val & CN23XX_PKT_INPUT_CTL_RST) {
+				while ((reg_val & CN23XX_PKT_INPUT_CTL_RST) &&
+				       !(reg_val &
+					 CN23XX_PKT_INPUT_CTL_QUIET) &&
+				       loop--) {
+					reg_val = octeon_read_csr64(
+					    oct,
+					    CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+				}
+				if (!loop) {
+					dev_err(&oct->pci_dev->dev,
+						"clearing the reset reg failed or setting the quiet reg failed for qno: %u\n",
+						q_no);
+					return -1;
+				}
+				reg_val = reg_val & ~CN23XX_PKT_INPUT_CTL_RST;
+				octeon_write_csr64(
+				    oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+				    reg_val);
+
+				reg_val = octeon_read_csr64(
+				    oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+				if (reg_val & CN23XX_PKT_INPUT_CTL_RST) {
+					dev_err(&oct->pci_dev->dev,
+						"clearing the reset failed for qno: %u\n",
+						q_no);
+					return -1;
+				}
+			}
+			reg_val = octeon_read_csr64(
+			    oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+			reg_val = reg_val | CN23XX_PKT_INPUT_CTL_RING_ENB;
+			octeon_write_csr64(
+			    oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val);
+		}
+	}
+	for (q_no = srn; q_no < ern; q_no++) {
+		u32 reg_val;
+		/* set the corresponding OQ ENB bit */
+		if (oct->io_qmask.oq & BIT_ULL(q_no - srn)) {
+			reg_val = octeon_read_csr(
+			    oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no));
+			reg_val = reg_val | CN23XX_PKT_OUTPUT_CTL_RING_ENB;
+			octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no),
+					 reg_val);
+		}
+	}
+	return 0;
+}
+
+static void cn23xx_disable_io_queues(struct octeon_device *oct)
+{
+	int q_no, loop;
+	u64 d64;
+	u32 d32;
+	u32 srn, ern;
+
+	srn = oct->sriov_info.pf_srn;
+	ern = srn + oct->num_iqs;
+
+	/*** Disable Input Queues. ***/
+	for (q_no = srn; q_no < ern; q_no++) {
+		loop = HZ;
+
+		/* start the Reset for a particular ring */
+		WRITE_ONCE(d64, octeon_read_csr64(
+			   oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)));
+		WRITE_ONCE(d64, READ_ONCE(d64) &
+					(~(CN23XX_PKT_INPUT_CTL_RING_ENB)));
+		WRITE_ONCE(d64, READ_ONCE(d64) | CN23XX_PKT_INPUT_CTL_RST);
+		octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+				   READ_ONCE(d64));
+
+		/* Wait until hardware indicates that the particular IQ
+		 * is out of reset.
+		 */
+		WRITE_ONCE(d64, octeon_read_csr64(
+					oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+		while (!(READ_ONCE(d64) & BIT_ULL(q_no)) && loop--) {
+			WRITE_ONCE(d64, octeon_read_csr64(
+					oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+			schedule_timeout_uninterruptible(1);
+		}
+
+		/* Reset the doorbell register for this Input Queue. */
+		octeon_write_csr(oct, CN23XX_SLI_IQ_DOORBELL(q_no), 0xFFFFFFFF);
+		while (octeon_read_csr64(oct, CN23XX_SLI_IQ_DOORBELL(q_no)) &&
+		       loop--) {
+			schedule_timeout_uninterruptible(1);
+		}
+	}
+
+	/*** Disable Output Queues. ***/
+	for (q_no = srn; q_no < ern; q_no++) {
+		loop = HZ;
+
+		/* Wait until hardware indicates that the particular IQ
+		 * is out of reset.It given that SLI_PKT_RING_RST is
+		 * common for both IQs and OQs
+		 */
+		WRITE_ONCE(d64, octeon_read_csr64(
+					oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+		while (!(READ_ONCE(d64) & BIT_ULL(q_no)) && loop--) {
+			WRITE_ONCE(d64, octeon_read_csr64(
+					oct, CN23XX_SLI_PKT_IOQ_RING_RST));
+			schedule_timeout_uninterruptible(1);
+		}
+
+		/* Reset the doorbell register for this Output Queue. */
+		octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_CREDIT(q_no),
+				 0xFFFFFFFF);
+		while (octeon_read_csr64(oct,
+					 CN23XX_SLI_OQ_PKTS_CREDIT(q_no)) &&
+		       loop--) {
+			schedule_timeout_uninterruptible(1);
+		}
+
+		/* clear the SLI_PKT(0..63)_CNTS[CNT] reg value */
+		WRITE_ONCE(d32, octeon_read_csr(
+					oct, CN23XX_SLI_OQ_PKTS_SENT(q_no)));
+		octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_SENT(q_no),
+				 READ_ONCE(d32));
+	}
+}
+
+static u64 cn23xx_pf_msix_interrupt_handler(void *dev)
+{
+	struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+	struct octeon_device *oct = ioq_vector->oct_dev;
+	u64 pkts_sent;
+	u64 ret = 0;
+	struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+
+	dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct);
+
+	if (!droq) {
+		dev_err(&oct->pci_dev->dev, "23XX bringup FIXME: oct pfnum:%d ioq_vector->ioq_num :%d droq is NULL\n",
+			oct->pf_num, ioq_vector->ioq_num);
+		return 0;
+	}
+
+	pkts_sent = readq(droq->pkts_sent_reg);
+
+	/* If our device has interrupted, then proceed. Also check
+	 * for all f's if interrupt was triggered on an error
+	 * and the PCI read fails.
+	 */
+	if (!pkts_sent || (pkts_sent == 0xFFFFFFFFFFFFFFFFULL))
+		return ret;
+
+	/* Write count reg in sli_pkt_cnts to clear these int.*/
+	if ((pkts_sent & CN23XX_INTR_PO_INT) ||
+	    (pkts_sent & CN23XX_INTR_PI_INT)) {
+		if (pkts_sent & CN23XX_INTR_PO_INT)
+			ret |= MSIX_PO_INT;
+	}
+
+	if (pkts_sent & CN23XX_INTR_PI_INT)
+		/* We will clear the count when we update the read_index. */
+		ret |= MSIX_PI_INT;
+
+	/* Never need to handle msix mbox intr for pf. They arrive on the last
+	 * msix
+	 */
+	return ret;
+}
+
+static irqreturn_t cn23xx_interrupt_handler(void *dev)
+{
+	struct octeon_device *oct = (struct octeon_device *)dev;
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+	u64 intr64;
+
+	dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct);
+	intr64 = readq(cn23xx->intr_sum_reg64);
+
+	oct->int_status = 0;
+
+	if (intr64 & CN23XX_INTR_ERR)
+		dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Error Intr: 0x%016llx\n",
+			oct->octeon_id, CVM_CAST64(intr64));
+
+	if (oct->msix_on != LIO_FLAG_MSIX_ENABLED) {
+		if (intr64 & CN23XX_INTR_PKT_DATA)
+			oct->int_status |= OCT_DEV_INTR_PKT_DATA;
+	}
+
+	if (intr64 & (CN23XX_INTR_DMA0_FORCE))
+		oct->int_status |= OCT_DEV_INTR_DMA0_FORCE;
+	if (intr64 & (CN23XX_INTR_DMA1_FORCE))
+		oct->int_status |= OCT_DEV_INTR_DMA1_FORCE;
+
+	/* Clear the current interrupts */
+	writeq(intr64, cn23xx->intr_sum_reg64);
+
+	return IRQ_HANDLED;
+}
+
+static void cn23xx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr,
+				  u32 idx, int valid)
+{
+	u64 bar1;
+	u64 reg_adr;
+
+	if (!valid) {
+		reg_adr = lio_pci_readq(
+			oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+		WRITE_ONCE(bar1, reg_adr);
+		lio_pci_writeq(oct, (READ_ONCE(bar1) & 0xFFFFFFFEULL),
+			       CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+		reg_adr = lio_pci_readq(
+			oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+		WRITE_ONCE(bar1, reg_adr);
+		return;
+	}
+
+	/*  The PEM(0..3)_BAR1_INDEX(0..15)[ADDR_IDX]<23:4> stores
+	 *  bits <41:22> of the Core Addr
+	 */
+	lio_pci_writeq(oct, (((core_addr >> 22) << 4) | PCI_BAR1_MASK),
+		       CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+
+	WRITE_ONCE(bar1, lio_pci_readq(
+		   oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)));
+}
+
+static void cn23xx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask)
+{
+	lio_pci_writeq(oct, mask,
+		       CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+}
+
+static u32 cn23xx_bar1_idx_read(struct octeon_device *oct, u32 idx)
+{
+	return (u32)lio_pci_readq(
+	    oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx));
+}
+
+/* always call with lock held */
+static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq)
+{
+	u32 new_idx;
+	u32 last_done;
+	u32 pkt_in_done = readl(iq->inst_cnt_reg);
+
+	last_done = pkt_in_done - iq->pkt_in_done;
+	iq->pkt_in_done = pkt_in_done;
+
+	/* Modulo of the new index with the IQ size will give us
+	 * the new index.  The iq->reset_instr_cnt is always zero for
+	 * cn23xx, so no extra adjustments are needed.
+	 */
+	new_idx = (iq->octeon_read_index +
+		   (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) %
+		  iq->max_count;
+
+	return new_idx;
+}
+
+static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
+{
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+	u64 intr_val = 0;
+
+	/*  Divide the single write to multiple writes based on the flag. */
+	/* Enable Interrupt */
+	if (intr_flag == OCTEON_ALL_INTR) {
+		writeq(cn23xx->intr_mask64, cn23xx->intr_enb_reg64);
+	} else if (intr_flag & OCTEON_OUTPUT_INTR) {
+		intr_val = readq(cn23xx->intr_enb_reg64);
+		intr_val |= CN23XX_INTR_PKT_DATA;
+		writeq(intr_val, cn23xx->intr_enb_reg64);
+	}
+}
+
+static void cn23xx_disable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
+{
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+	u64 intr_val = 0;
+
+	/* Disable Interrupts */
+	if (intr_flag == OCTEON_ALL_INTR) {
+		writeq(0, cn23xx->intr_enb_reg64);
+	} else if (intr_flag & OCTEON_OUTPUT_INTR) {
+		intr_val = readq(cn23xx->intr_enb_reg64);
+		intr_val &= ~CN23XX_INTR_PKT_DATA;
+		writeq(intr_val, cn23xx->intr_enb_reg64);
+	}
+}
+
+static void cn23xx_get_pcie_qlmport(struct octeon_device *oct)
+{
+	oct->pcie_port = (octeon_read_csr(oct, CN23XX_SLI_MAC_NUMBER)) & 0xff;
+
+	dev_dbg(&oct->pci_dev->dev, "OCTEON: CN23xx uses PCIE Port %d\n",
+		oct->pcie_port);
+}
+
+static void cn23xx_get_pf_num(struct octeon_device *oct)
+{
+	u32 fdl_bit = 0;
+
+	/** Read Function Dependency Link reg to get the function number */
+	pci_read_config_dword(oct->pci_dev, CN23XX_PCIE_SRIOV_FDL, &fdl_bit);
+	oct->pf_num = ((fdl_bit >> CN23XX_PCIE_SRIOV_FDL_BIT_POS) &
+		       CN23XX_PCIE_SRIOV_FDL_MASK);
+}
+
+static void cn23xx_setup_reg_address(struct octeon_device *oct)
+{
+	u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr;
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+
+	oct->reg_list.pci_win_wr_addr_hi =
+	    (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR_HI);
+	oct->reg_list.pci_win_wr_addr_lo =
+	    (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR_LO);
+	oct->reg_list.pci_win_wr_addr =
+	    (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR64);
+
+	oct->reg_list.pci_win_rd_addr_hi =
+	    (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR_HI);
+	oct->reg_list.pci_win_rd_addr_lo =
+	    (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR_LO);
+	oct->reg_list.pci_win_rd_addr =
+	    (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR64);
+
+	oct->reg_list.pci_win_wr_data_hi =
+	    (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA_HI);
+	oct->reg_list.pci_win_wr_data_lo =
+	    (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA_LO);
+	oct->reg_list.pci_win_wr_data =
+	    (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA64);
+
+	oct->reg_list.pci_win_rd_data_hi =
+	    (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA_HI);
+	oct->reg_list.pci_win_rd_data_lo =
+	    (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA_LO);
+	oct->reg_list.pci_win_rd_data =
+	    (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA64);
+
+	cn23xx_get_pcie_qlmport(oct);
+
+	cn23xx->intr_mask64 = CN23XX_INTR_MASK;
+	if (!oct->msix_on)
+		cn23xx->intr_mask64 |= CN23XX_INTR_PKT_TIME;
+	if (oct->rev_id >= OCTEON_CN23XX_REV_1_1)
+		cn23xx->intr_mask64 |= CN23XX_INTR_VF_MBOX;
+
+	cn23xx->intr_sum_reg64 =
+	    bar0_pciaddr +
+	    CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num);
+	cn23xx->intr_enb_reg64 =
+	    bar0_pciaddr +
+	    CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num);
+}
+
+static int cn23xx_sriov_config(struct octeon_device *oct)
+{
+	u32 total_rings;
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+	/* num_vfs is already filled for us */
+	u32 pf_srn, num_pf_rings;
+
+	cn23xx->conf =
+	    (struct octeon_config *)oct_get_config_info(oct, LIO_23XX);
+	switch (oct->rev_id) {
+	case OCTEON_CN23XX_REV_1_0:
+		total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_0;
+		break;
+	case OCTEON_CN23XX_REV_1_1:
+		total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_1;
+		break;
+	default:
+		total_rings = CN23XX_MAX_RINGS_PER_PF;
+		break;
+	}
+	if (!oct->sriov_info.num_pf_rings) {
+		if (total_rings > num_present_cpus())
+			num_pf_rings = num_present_cpus();
+		else
+			num_pf_rings = total_rings;
+	} else {
+		num_pf_rings = oct->sriov_info.num_pf_rings;
+
+		if (num_pf_rings > total_rings) {
+			dev_warn(&oct->pci_dev->dev,
+				 "num_queues_per_pf requested %u is more than available rings. Reducing to %u\n",
+				 num_pf_rings, total_rings);
+			num_pf_rings = total_rings;
+		}
+	}
+
+	total_rings = num_pf_rings;
+	/* the first ring of the pf */
+	pf_srn = total_rings - num_pf_rings;
+
+	oct->sriov_info.trs = total_rings;
+	oct->sriov_info.pf_srn = pf_srn;
+	oct->sriov_info.num_pf_rings = num_pf_rings;
+	dev_dbg(&oct->pci_dev->dev, "trs:%d pf_srn:%d num_pf_rings:%d\n",
+		oct->sriov_info.trs, oct->sriov_info.pf_srn,
+		oct->sriov_info.num_pf_rings);
+	return 0;
+}
+
+int setup_cn23xx_octeon_pf_device(struct octeon_device *oct)
+{
+	if (octeon_map_pci_barx(oct, 0, 0))
+		return 1;
+
+	if (octeon_map_pci_barx(oct, 1, MAX_BAR1_IOREMAP_SIZE)) {
+		dev_err(&oct->pci_dev->dev, "%s CN23XX BAR1 map failed\n",
+			__func__);
+		octeon_unmap_pci_barx(oct, 0);
+		return 1;
+	}
+
+	cn23xx_get_pf_num(oct);
+
+	if (cn23xx_sriov_config(oct)) {
+		octeon_unmap_pci_barx(oct, 0);
+		octeon_unmap_pci_barx(oct, 1);
+		return 1;
+	}
+
+	octeon_write_csr64(oct, CN23XX_SLI_MAC_CREDIT_CNT, 0x3F802080802080ULL);
+
+	oct->fn_list.setup_iq_regs = cn23xx_setup_iq_regs;
+	oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs;
+	oct->fn_list.process_interrupt_regs = cn23xx_interrupt_handler;
+	oct->fn_list.msix_interrupt_handler = cn23xx_pf_msix_interrupt_handler;
+
+	oct->fn_list.soft_reset = cn23xx_pf_soft_reset;
+	oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs;
+	oct->fn_list.update_iq_read_idx = cn23xx_update_read_index;
+
+	oct->fn_list.bar1_idx_setup = cn23xx_bar1_idx_setup;
+	oct->fn_list.bar1_idx_write = cn23xx_bar1_idx_write;
+	oct->fn_list.bar1_idx_read = cn23xx_bar1_idx_read;
+
+	oct->fn_list.enable_interrupt = cn23xx_enable_pf_interrupt;
+	oct->fn_list.disable_interrupt = cn23xx_disable_pf_interrupt;
+
+	oct->fn_list.enable_io_queues = cn23xx_enable_io_queues;
+	oct->fn_list.disable_io_queues = cn23xx_disable_io_queues;
+
+	cn23xx_setup_reg_address(oct);
+
+	oct->coproc_clock_rate = 1000000ULL * cn23xx_coprocessor_clock(oct);
+
+	return 0;
+}
+
+int validate_cn23xx_pf_config_info(struct octeon_device *oct,
+				   struct octeon_config *conf23xx)
+{
+	if (CFG_GET_IQ_MAX_Q(conf23xx) > CN23XX_MAX_INPUT_QUEUES) {
+		dev_err(&oct->pci_dev->dev, "%s: Num IQ (%d) exceeds Max (%d)\n",
+			__func__, CFG_GET_IQ_MAX_Q(conf23xx),
+			CN23XX_MAX_INPUT_QUEUES);
+		return 1;
+	}
+
+	if (CFG_GET_OQ_MAX_Q(conf23xx) > CN23XX_MAX_OUTPUT_QUEUES) {
+		dev_err(&oct->pci_dev->dev, "%s: Num OQ (%d) exceeds Max (%d)\n",
+			__func__, CFG_GET_OQ_MAX_Q(conf23xx),
+			CN23XX_MAX_OUTPUT_QUEUES);
+		return 1;
+	}
+
+	if (CFG_GET_IQ_INSTR_TYPE(conf23xx) != OCTEON_32BYTE_INSTR &&
+	    CFG_GET_IQ_INSTR_TYPE(conf23xx) != OCTEON_64BYTE_INSTR) {
+		dev_err(&oct->pci_dev->dev, "%s: Invalid instr type for IQ\n",
+			__func__);
+		return 1;
+	}
+
+	if (!(CFG_GET_OQ_INFO_PTR(conf23xx)) ||
+	    !(CFG_GET_OQ_REFILL_THRESHOLD(conf23xx))) {
+		dev_err(&oct->pci_dev->dev, "%s: Invalid parameter for OQ\n",
+			__func__);
+		return 1;
+	}
+
+	if (!(CFG_GET_OQ_INTR_TIME(conf23xx))) {
+		dev_err(&oct->pci_dev->dev, "%s: Invalid parameter for OQ\n",
+			__func__);
+		return 1;
+	}
+
+	return 0;
+}
+
+void cn23xx_dump_iq_regs(struct octeon_device *oct)
+{
+	u32 regval, q_no;
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_IQ_DOORBELL_0 [0x%x]: 0x%016llx\n",
+		CN23XX_SLI_IQ_DOORBELL(0),
+		CVM_CAST64(octeon_read_csr64
+			(oct, CN23XX_SLI_IQ_DOORBELL(0))));
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_IQ_BASEADDR_0 [0x%x]: 0x%016llx\n",
+		CN23XX_SLI_IQ_BASE_ADDR64(0),
+		CVM_CAST64(octeon_read_csr64
+			(oct, CN23XX_SLI_IQ_BASE_ADDR64(0))));
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_IQ_FIFO_RSIZE_0 [0x%x]: 0x%016llx\n",
+		CN23XX_SLI_IQ_SIZE(0),
+		CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_IQ_SIZE(0))));
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_CTL_STATUS [0x%x]: 0x%016llx\n",
+		CN23XX_SLI_CTL_STATUS,
+		CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_CTL_STATUS)));
+
+	for (q_no = 0; q_no < CN23XX_MAX_INPUT_QUEUES; q_no++) {
+		dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n",
+			q_no, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+			CVM_CAST64(octeon_read_csr64
+				(oct,
+					CN23XX_SLI_IQ_PKT_CONTROL64(q_no))));
+	}
+
+	pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
+	dev_dbg(&oct->pci_dev->dev, "Config DevCtl [0x%x]: 0x%08x\n",
+		CN23XX_CONFIG_PCIE_DEVCTL, regval);
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_PRT[%d]_CFG [0x%llx]: 0x%016llx\n",
+		oct->pcie_port, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port),
+		CVM_CAST64(lio_pci_readq(
+			oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port))));
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_S2M_PORT[%d]_CTL [0x%x]: 0x%016llx\n",
+		oct->pcie_port, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port),
+		CVM_CAST64(octeon_read_csr64(
+			oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port))));
+}
+
+int cn23xx_fw_loaded(struct octeon_device *oct)
+{
+	u64 val;
+
+	val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1);
+	return (val >> 1) & 1ULL;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
new file mode 100644
index 0000000..21b5c90
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
@@ -0,0 +1,59 @@
+/**********************************************************************
+* Author: Cavium, Inc.
+*
+* Contact: support@cavium.com
+*          Please include "LiquidIO" in the subject.
+*
+* Copyright (c) 2003-2015 Cavium, Inc.
+*
+* This file is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License, Version 2, as
+* published by the Free Software Foundation.
+*
+* This file is distributed in the hope that it will be useful, but
+* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+* NONINFRINGEMENT.  See the GNU General Public License for more
+* details.
+*
+* This file may also be available under a different license from Cavium.
+* Contact Cavium, Inc. for more information
+**********************************************************************/
+
+/*! \file  cn23xx_device.h
+ * \brief Host Driver: Routines that perform CN23XX specific operations.
+*/
+
+#ifndef __CN23XX_PF_DEVICE_H__
+#define __CN23XX_PF_DEVICE_H__
+
+#include "cn23xx_pf_regs.h"
+
+/* Register address and configuration for a CN23XX devices.
+ * If device specific changes need to be made then add a struct to include
+ * device specific fields as shown in the commented section
+ */
+struct octeon_cn23xx_pf {
+	/** PCI interrupt summary register */
+	u8 __iomem *intr_sum_reg64;
+
+	/** PCI interrupt enable register */
+	u8 __iomem *intr_enb_reg64;
+
+	/** The PCI interrupt mask used by interrupt handler */
+	u64 intr_mask64;
+
+	struct octeon_config *conf;
+};
+
+int setup_cn23xx_octeon_pf_device(struct octeon_device *oct);
+
+int validate_cn23xx_pf_config_info(struct octeon_device *oct,
+				   struct octeon_config *conf23xx);
+
+u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
+
+void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct);
+
+int cn23xx_fw_loaded(struct octeon_device *oct);
+#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
new file mode 100644
index 0000000..03d79d9
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
@@ -0,0 +1,604 @@
+/**********************************************************************
+* Author: Cavium, Inc.
+*
+* Contact: support@cavium.com
+*          Please include "LiquidIO" in the subject.
+*
+* Copyright (c) 2003-2015 Cavium, Inc.
+*
+* This file is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License, Version 2, as
+* published by the Free Software Foundation.
+*
+* This file is distributed in the hope that it will be useful, but
+* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+* NONINFRINGEMENT.  See the GNU General Public License for more
+* details.
+*
+* This file may also be available under a different license from Cavium.
+* Contact Cavium, Inc. for more information
+**********************************************************************/
+
+/*! \file cn23xx_regs.h
+ * \brief Host Driver: Register Address and Register Mask values for
+ * Octeon CN23XX devices.
+*/
+
+#ifndef __CN23XX_PF_REGS_H__
+#define __CN23XX_PF_REGS_H__
+
+#define     CN23XX_CONFIG_VENDOR_ID	0x00
+#define     CN23XX_CONFIG_DEVICE_ID	0x02
+
+#define     CN23XX_CONFIG_XPANSION_BAR             0x38
+
+#define     CN23XX_CONFIG_MSIX_CAP		   0x50
+#define     CN23XX_CONFIG_MSIX_LMSI		   0x54
+#define     CN23XX_CONFIG_MSIX_UMSI		   0x58
+#define     CN23XX_CONFIG_MSIX_MSIMD		   0x5C
+#define     CN23XX_CONFIG_MSIX_MSIMM		   0x60
+#define     CN23XX_CONFIG_MSIX_MSIMP		   0x64
+
+#define     CN23XX_CONFIG_PCIE_CAP                 0x70
+#define     CN23XX_CONFIG_PCIE_DEVCAP              0x74
+#define     CN23XX_CONFIG_PCIE_DEVCTL              0x78
+#define     CN23XX_CONFIG_PCIE_LINKCAP             0x7C
+#define     CN23XX_CONFIG_PCIE_LINKCTL             0x80
+#define     CN23XX_CONFIG_PCIE_SLOTCAP             0x84
+#define     CN23XX_CONFIG_PCIE_SLOTCTL             0x88
+#define     CN23XX_CONFIG_PCIE_DEVCTL2             0x98
+#define     CN23XX_CONFIG_PCIE_LINKCTL2            0xA0
+#define     CN23XX_CONFIG_PCIE_UNCORRECT_ERR_MASK  0x108
+#define     CN23XX_CONFIG_PCIE_CORRECT_ERR_STATUS  0x110
+#define     CN23XX_CONFIG_PCIE_DEVCTL_MASK         0x00040000
+
+#define     CN23XX_PCIE_SRIOV_FDL		   0x188
+#define     CN23XX_PCIE_SRIOV_FDL_BIT_POS	   0x10
+#define     CN23XX_PCIE_SRIOV_FDL_MASK		   0xFF
+
+#define     CN23XX_CONFIG_PCIE_FLTMSK              0x720
+
+#define     CN23XX_CONFIG_SRIOV_VFDEVID            0x190
+
+#define     CN23XX_CONFIG_SRIOV_BAR_START	   0x19C
+#define     CN23XX_CONFIG_SRIOV_BARX(i)		\
+		(CN23XX_CONFIG_SRIOV_BAR_START + (i * 4))
+#define     CN23XX_CONFIG_SRIOV_BAR_PF		   0x08
+#define     CN23XX_CONFIG_SRIOV_BAR_64BIT	   0x04
+#define     CN23XX_CONFIG_SRIOV_BAR_IO		   0x01
+
+/* ##############  BAR0 Registers ################ */
+
+#define    CN23XX_SLI_CTL_PORT_START               0x286E0
+#define    CN23XX_PORT_OFFSET                      0x10
+
+#define    CN23XX_SLI_CTL_PORT(p)                  \
+		(CN23XX_SLI_CTL_PORT_START + ((p) * CN23XX_PORT_OFFSET))
+
+/* 2 scatch registers (64-bit)  */
+#define    CN23XX_SLI_WINDOW_CTL                   0x282E0
+#define    CN23XX_SLI_SCRATCH1                     0x283C0
+#define    CN23XX_SLI_SCRATCH2                     0x283D0
+#define    CN23XX_SLI_WINDOW_CTL_DEFAULT           0x200000ULL
+
+/* 1 registers (64-bit)  - SLI_CTL_STATUS */
+#define    CN23XX_SLI_CTL_STATUS                   0x28570
+
+/* SLI Packet Input Jabber Register (64 bit register)
+ * <31:0> for Byte count for limiting sizes of packet sizes
+ * that are allowed for sli packet inbound packets.
+ * the default value is 0xFA00(=64000).
+ */
+#define    CN23XX_SLI_PKT_IN_JABBER                0x29170
+/* The input jabber is used to determine the TSO max size.
+ * Due to H/W limitation, this need to be reduced to 60000
+ * in order to to H/W TSO and avoid the WQE malfarmation
+ * PKO_BUG_24989_WQE_LEN
+ */
+#define    CN23XX_DEFAULT_INPUT_JABBER             0xEA60 /*60000*/
+
+#define    CN23XX_WIN_WR_ADDR_LO                   0x20000
+#define    CN23XX_WIN_WR_ADDR_HI                   0x20004
+#define    CN23XX_WIN_WR_ADDR64                    CN23XX_WIN_WR_ADDR_LO
+
+#define    CN23XX_WIN_RD_ADDR_LO                   0x20010
+#define    CN23XX_WIN_RD_ADDR_HI                   0x20014
+#define    CN23XX_WIN_RD_ADDR64                    CN23XX_WIN_RD_ADDR_LO
+
+#define    CN23XX_WIN_WR_DATA_LO                   0x20020
+#define    CN23XX_WIN_WR_DATA_HI                   0x20024
+#define    CN23XX_WIN_WR_DATA64                    CN23XX_WIN_WR_DATA_LO
+
+#define    CN23XX_WIN_RD_DATA_LO                   0x20040
+#define    CN23XX_WIN_RD_DATA_HI                   0x20044
+#define    CN23XX_WIN_RD_DATA64                    CN23XX_WIN_RD_DATA_LO
+
+#define    CN23XX_WIN_WR_MASK_LO                   0x20030
+#define    CN23XX_WIN_WR_MASK_HI                   0x20034
+#define    CN23XX_WIN_WR_MASK_REG                  CN23XX_WIN_WR_MASK_LO
+#define    CN23XX_SLI_MAC_CREDIT_CNT               0x23D70
+
+/* 4 registers (64-bit) for mapping IOQs to MACs(PEMs)-
+ * SLI_PKT_MAC(0..3)_PF(0..1)_RINFO
+ */
+#define    CN23XX_SLI_PKT_MAC_RINFO_START64       0x29030
+
+/*1 register (64-bit) to determine whether IOQs are in reset. */
+#define    CN23XX_SLI_PKT_IOQ_RING_RST            0x291E0
+
+/* Each Input Queue register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_IQ_OFFSET                       0x20000
+
+#define    CN23XX_MAC_RINFO_OFFSET                0x20
+#define    CN23XX_PF_RINFO_OFFSET                 0x10
+
+#define CN23XX_SLI_PKT_MAC_RINFO64(mac, pf)		\
+		(CN23XX_SLI_PKT_MAC_RINFO_START64 +     \
+		 ((mac) * CN23XX_MAC_RINFO_OFFSET) +	\
+		 ((pf) * CN23XX_PF_RINFO_OFFSET))
+
+/** mask for total rings, setting TRS to base */
+#define    CN23XX_PKT_MAC_CTL_RINFO_TRS               BIT_ULL(16)
+/** mask for starting ring number: setting SRN <6:0> = 0x7F */
+#define    CN23XX_PKT_MAC_CTL_RINFO_SRN               (0x7F)
+
+/* Starting bit of the TRS field in CN23XX_SLI_PKT_MAC_RINFO64 register */
+#define    CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS     16
+/* Starting bit of SRN field in CN23XX_SLI_PKT_MAC_RINFO64 register */
+#define    CN23XX_PKT_MAC_CTL_RINFO_SRN_BIT_POS     0
+/* Starting bit of RPVF field in CN23XX_SLI_PKT_MAC_RINFO64 register */
+#define    CN23XX_PKT_MAC_CTL_RINFO_RPVF_BIT_POS     32
+/* Starting bit of NVFS field in CN23XX_SLI_PKT_MAC_RINFO64 register */
+#define    CN23XX_PKT_MAC_CTL_RINFO_NVFS_BIT_POS     48
+
+/*###################### REQUEST QUEUE #########################*/
+
+/* 64 registers for Input Queue Instr Count - SLI_PKT_IN_DONE0_CNTS */
+#define    CN23XX_SLI_IQ_INSTR_COUNT_START64     0x10040
+
+/* 64 registers for Input Queues Start Addr - SLI_PKT0_INSTR_BADDR */
+#define    CN23XX_SLI_IQ_BASE_ADDR_START64       0x10010
+
+/* 64 registers for Input Doorbell - SLI_PKT0_INSTR_BAOFF_DBELL */
+#define    CN23XX_SLI_IQ_DOORBELL_START          0x10020
+
+/* 64 registers for Input Queue size - SLI_PKT0_INSTR_FIFO_RSIZE */
+#define    CN23XX_SLI_IQ_SIZE_START              0x10030
+
+/* 64 registers (64-bit) - ES, RO, NS, Arbitration for Input Queue Data &
+ * gather list fetches. SLI_PKT(0..63)_INPUT_CONTROL.
+ */
+#define    CN23XX_SLI_IQ_PKT_CONTROL_START64    0x10000
+
+/*------- Request Queue Macros ---------*/
+#define    CN23XX_SLI_IQ_PKT_CONTROL64(iq)          \
+		(CN23XX_SLI_IQ_PKT_CONTROL_START64 + ((iq) * CN23XX_IQ_OFFSET))
+
+#define    CN23XX_SLI_IQ_BASE_ADDR64(iq)          \
+		(CN23XX_SLI_IQ_BASE_ADDR_START64 + ((iq) * CN23XX_IQ_OFFSET))
+
+#define    CN23XX_SLI_IQ_SIZE(iq)                 \
+		(CN23XX_SLI_IQ_SIZE_START + ((iq) * CN23XX_IQ_OFFSET))
+
+#define    CN23XX_SLI_IQ_DOORBELL(iq)             \
+		(CN23XX_SLI_IQ_DOORBELL_START + ((iq) * CN23XX_IQ_OFFSET))
+
+#define    CN23XX_SLI_IQ_INSTR_COUNT64(iq)          \
+		(CN23XX_SLI_IQ_INSTR_COUNT_START64 + ((iq) * CN23XX_IQ_OFFSET))
+
+/*------------------ Masks ----------------*/
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM                  BIT_ULL(32)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM                 BIT(29)
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ */
+#define    CN23XX_PKT_INPUT_CTL_RDSIZE                  (3 << 25)
+#define    CN23XX_PKT_INPUT_CTL_IS_64B                  BIT(24)
+#define    CN23XX_PKT_INPUT_CTL_RST                     BIT(23)
+#define    CN23XX_PKT_INPUT_CTL_QUIET                   BIT(28)
+#define    CN23XX_PKT_INPUT_CTL_RING_ENB                BIT(22)
+#define    CN23XX_PKT_INPUT_CTL_DATA_NS                 BIT(8)
+#define    CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP        BIT(6)
+#define    CN23XX_PKT_INPUT_CTL_DATA_RO                 BIT(5)
+#define    CN23XX_PKT_INPUT_CTL_USE_CSR                 BIT(4)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_NS               BIT(3)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP      (2)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_RO               (1)
+
+/** Rings per Virtual Function **/
+#define    CN23XX_PKT_INPUT_CTL_RPVF_MASK               (0x3F)
+#define    CN23XX_PKT_INPUT_CTL_RPVF_POS                (48)
+/** These bits[47:44] select the Physical function number within the MAC */
+#define    CN23XX_PKT_INPUT_CTL_PF_NUM_MASK             (0x7)
+#define    CN23XX_PKT_INPUT_CTL_PF_NUM_POS              (45)
+/** These bits[43:32] select the function number within the PF */
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM_MASK             (0x1FFF)
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM_POS              (32)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM_MASK            (0x3)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM_POS             (29)
+#define    CN23XX_PKT_IN_DONE_WMARK_MASK                (0xFFFFULL)
+#define    CN23XX_PKT_IN_DONE_WMARK_BIT_POS             (32)
+#define    CN23XX_PKT_IN_DONE_CNT_MASK                  (0x00000000FFFFFFFFULL)
+
+#ifdef __LITTLE_ENDIAN_BITFIELD
+#define    CN23XX_PKT_INPUT_CTL_MASK				\
+		(CN23XX_PKT_INPUT_CTL_RDSIZE		|	\
+		 CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP	|	\
+		 CN23XX_PKT_INPUT_CTL_USE_CSR)
+#else
+#define    CN23XX_PKT_INPUT_CTL_MASK				\
+		(CN23XX_PKT_INPUT_CTL_RDSIZE		|	\
+		 CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP	|	\
+		 CN23XX_PKT_INPUT_CTL_USE_CSR		|	\
+		 CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP)
+#endif
+
+/** Masks for SLI_PKT_IN_DONE(0..63)_CNTS Register */
+#define    CN23XX_IN_DONE_CNTS_PI_INT               BIT_ULL(62)
+#define    CN23XX_IN_DONE_CNTS_CINT_ENB             BIT_ULL(48)
+
+/*############################ OUTPUT QUEUE #########################*/
+
+/* 64 registers for Output queue control - SLI_PKT(0..63)_OUTPUT_CONTROL */
+#define    CN23XX_SLI_OQ_PKT_CONTROL_START       0x10050
+
+/* 64 registers for Output queue buffer and info size - SLI_PKT0_OUT_SIZE */
+#define    CN23XX_SLI_OQ0_BUFF_INFO_SIZE         0x10060
+
+/* 64 registers for Output Queue Start Addr - SLI_PKT0_SLIST_BADDR */
+#define    CN23XX_SLI_OQ_BASE_ADDR_START64       0x10070
+
+/* 64 registers for Output Queue Packet Credits - SLI_PKT0_SLIST_BAOFF_DBELL */
+#define    CN23XX_SLI_OQ_PKT_CREDITS_START       0x10080
+
+/* 64 registers for Output Queue size - SLI_PKT0_SLIST_FIFO_RSIZE */
+#define    CN23XX_SLI_OQ_SIZE_START              0x10090
+
+/* 64 registers for Output Queue Packet Count - SLI_PKT0_CNTS */
+#define    CN23XX_SLI_OQ_PKT_SENT_START          0x100B0
+
+/* 64 registers for Output Queue INT Levels - SLI_PKT0_INT_LEVELS */
+#define    CN23XX_SLI_OQ_PKT_INT_LEVELS_START64   0x100A0
+
+/* Each Output Queue register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_OQ_OFFSET                      0x20000
+
+/* 1 (64-bit register) for Output Queue backpressure across all rings. */
+#define    CN23XX_SLI_OQ_WMARK                   0x29180
+
+/* Global pkt control register */
+#define    CN23XX_SLI_GBL_CONTROL                0x29210
+
+/* Backpressure enable register for PF0  */
+#define    CN23XX_SLI_OUT_BP_EN_W1S              0x29260
+
+/* Backpressure enable register for PF1  */
+#define    CN23XX_SLI_OUT_BP_EN2_W1S             0x29270
+
+/* Backpressure disable register for PF0  */
+#define    CN23XX_SLI_OUT_BP_EN_W1C              0x29280
+
+/* Backpressure disable register for PF1  */
+#define    CN23XX_SLI_OUT_BP_EN2_W1C             0x29290
+
+/*------- Output Queue Macros ---------*/
+
+#define    CN23XX_SLI_OQ_PKT_CONTROL(oq)          \
+		(CN23XX_SLI_OQ_PKT_CONTROL_START + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_BASE_ADDR64(oq)          \
+		(CN23XX_SLI_OQ_BASE_ADDR_START64 + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_SIZE(oq)                 \
+		(CN23XX_SLI_OQ_SIZE_START + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_BUFF_INFO_SIZE(oq)                 \
+		(CN23XX_SLI_OQ0_BUFF_INFO_SIZE + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_PKTS_SENT(oq)            \
+		(CN23XX_SLI_OQ_PKT_SENT_START + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_PKTS_CREDIT(oq)          \
+		(CN23XX_SLI_OQ_PKT_CREDITS_START + ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_PKT_INT_LEVELS(oq)		\
+		(CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 +	\
+		 ((oq) * CN23XX_OQ_OFFSET))
+
+/*Macro's for accessing CNT and TIME separately from INT_LEVELS*/
+#define    CN23XX_SLI_OQ_PKT_INT_LEVELS_CNT(oq)		\
+		(CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \
+		 ((oq) * CN23XX_OQ_OFFSET))
+
+#define    CN23XX_SLI_OQ_PKT_INT_LEVELS_TIME(oq)	\
+		(CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 +	\
+		 ((oq) * CN23XX_OQ_OFFSET) + 4)
+
+/*------------------ Masks ----------------*/
+#define    CN23XX_PKT_OUTPUT_CTL_TENB                  BIT(13)
+#define    CN23XX_PKT_OUTPUT_CTL_CENB                  BIT(12)
+#define    CN23XX_PKT_OUTPUT_CTL_IPTR                  BIT(11)
+#define    CN23XX_PKT_OUTPUT_CTL_ES                    BIT(9)
+#define    CN23XX_PKT_OUTPUT_CTL_NSR                   BIT(8)
+#define    CN23XX_PKT_OUTPUT_CTL_ROR                   BIT(7)
+#define    CN23XX_PKT_OUTPUT_CTL_DPTR                  BIT(6)
+#define    CN23XX_PKT_OUTPUT_CTL_BMODE                 BIT(5)
+#define    CN23XX_PKT_OUTPUT_CTL_ES_P                  BIT(3)
+#define    CN23XX_PKT_OUTPUT_CTL_NSR_P                 BIT(2)
+#define    CN23XX_PKT_OUTPUT_CTL_ROR_P                 BIT(1)
+#define    CN23XX_PKT_OUTPUT_CTL_RING_ENB              BIT(0)
+
+/*######################### Mailbox Reg Macros ########################*/
+#define    CN23XX_SLI_PKT_MBOX_INT_START             0x10210
+#define    CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START       0x10200
+#define    CN23XX_SLI_MAC_PF_MBOX_INT_START          0x27380
+
+#define    CN23XX_SLI_MBOX_OFFSET		     0x20000
+#define    CN23XX_SLI_MBOX_SIG_IDX_OFFSET	     0x8
+
+#define    CN23XX_SLI_PKT_MBOX_INT(q)          \
+		(CN23XX_SLI_PKT_MBOX_INT_START + ((q) * CN23XX_SLI_MBOX_OFFSET))
+
+#define    CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q, idx)		\
+		(CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START +		\
+		 ((q) * CN23XX_SLI_MBOX_OFFSET +		\
+		  (idx) * CN23XX_SLI_MBOX_SIG_IDX_OFFSET))
+
+#define    CN23XX_SLI_MAC_PF_MBOX_INT(mac, pf)		\
+		(CN23XX_SLI_MAC_PF_MBOX_INT_START +	\
+		 ((mac) * CN23XX_MAC_INT_OFFSET +	\
+		  (pf) * CN23XX_PF_INT_OFFSET))
+
+/*######################### DMA Counters #########################*/
+
+/* 2 registers (64-bit) - DMA Count - 1 for each DMA counter 0/1. */
+#define    CN23XX_DMA_CNT_START                   0x28400
+
+/* 2 registers (64-bit) - DMA Timer 0/1, contains DMA timer values */
+/* SLI_DMA_0_TIM */
+#define    CN23XX_DMA_TIM_START                   0x28420
+
+/* 2 registers (64-bit) - DMA count & Time Interrupt threshold -
+ * SLI_DMA_0_INT_LEVEL
+ */
+#define    CN23XX_DMA_INT_LEVEL_START             0x283E0
+
+/* Each DMA register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_DMA_OFFSET                      0x10
+
+/*---------- DMA Counter Macros ---------*/
+#define    CN23XX_DMA_CNT(dq)                      \
+		(CN23XX_DMA_CNT_START + ((dq) * CN23XX_DMA_OFFSET))
+
+#define    CN23XX_DMA_INT_LEVEL(dq)                \
+		(CN23XX_DMA_INT_LEVEL_START + ((dq) * CN23XX_DMA_OFFSET))
+
+#define    CN23XX_DMA_PKT_INT_LEVEL(dq)            \
+		(CN23XX_DMA_INT_LEVEL_START + ((dq) * CN23XX_DMA_OFFSET))
+
+#define    CN23XX_DMA_TIME_INT_LEVEL(dq)           \
+		(CN23XX_DMA_INT_LEVEL_START + 4 + ((dq) * CN23XX_DMA_OFFSET))
+
+#define    CN23XX_DMA_TIM(dq)                     \
+		(CN23XX_DMA_TIM_START + ((dq) * CN23XX_DMA_OFFSET))
+
+/*######################## MSIX TABLE #########################*/
+
+#define	CN23XX_MSIX_TABLE_ADDR_START		0x0
+#define	CN23XX_MSIX_TABLE_DATA_START		0x8
+
+#define	CN23XX_MSIX_TABLE_SIZE			0x10
+#define	CN23XX_MSIX_TABLE_ENTRIES		0x41
+
+#define CN23XX_MSIX_ENTRY_VECTOR_CTL	BIT_ULL(32)
+
+#define	CN23XX_MSIX_TABLE_ADDR(idx)		\
+	(CN23XX_MSIX_TABLE_ADDR_START + ((idx) * CN23XX_MSIX_TABLE_SIZE))
+
+#define	CN23XX_MSIX_TABLE_DATA(idx)		\
+	(CN23XX_MSIX_TABLE_DATA_START + ((idx) * CN23XX_MSIX_TABLE_SIZE))
+
+/*######################## INTERRUPTS #########################*/
+#define CN23XX_MAC_INT_OFFSET   0x20
+#define CN23XX_PF_INT_OFFSET    0x10
+
+/* 1 register (64-bit) for Interrupt Summary */
+#define    CN23XX_SLI_INT_SUM64            0x27000
+
+/* 4 registers (64-bit) for Interrupt Enable for each Port */
+#define    CN23XX_SLI_INT_ENB64            0x27080
+
+#define    CN23XX_SLI_MAC_PF_INT_SUM64(mac, pf)			\
+		(CN23XX_SLI_INT_SUM64 +				\
+		 ((mac) * CN23XX_MAC_INT_OFFSET) +		\
+		 ((pf) * CN23XX_PF_INT_OFFSET))
+
+#define    CN23XX_SLI_MAC_PF_INT_ENB64(mac, pf)		\
+		(CN23XX_SLI_INT_ENB64 +			\
+		 ((mac) * CN23XX_MAC_INT_OFFSET) +	\
+		 ((pf) * CN23XX_PF_INT_OFFSET))
+
+/* 1 register (64-bit) to indicate which Output Queue reached pkt threshold */
+#define    CN23XX_SLI_PKT_CNT_INT                0x29130
+
+/* 1 register (64-bit) to indicate which Output Queue reached time threshold */
+#define    CN23XX_SLI_PKT_TIME_INT               0x29140
+
+/*------------------ Interrupt Masks ----------------*/
+
+#define    CN23XX_INTR_PO_INT			BIT_ULL(63)
+#define    CN23XX_INTR_PI_INT			BIT_ULL(62)
+#define    CN23XX_INTR_MBOX_INT			BIT_ULL(61)
+#define    CN23XX_INTR_RESEND			BIT_ULL(60)
+
+#define    CN23XX_INTR_CINT_ENB                 BIT_ULL(48)
+#define    CN23XX_INTR_MBOX_ENB                 BIT(0)
+
+#define    CN23XX_INTR_RML_TIMEOUT_ERR           (1)
+
+#define    CN23XX_INTR_MIO_INT                   BIT(1)
+
+#define    CN23XX_INTR_RESERVED1                 (3 << 2)
+
+#define    CN23XX_INTR_PKT_COUNT                 BIT(4)
+#define    CN23XX_INTR_PKT_TIME                  BIT(5)
+
+#define    CN23XX_INTR_RESERVED2                 (3 << 6)
+
+#define    CN23XX_INTR_M0UPB0_ERR                BIT(8)
+#define    CN23XX_INTR_M0UPWI_ERR                BIT(9)
+#define    CN23XX_INTR_M0UNB0_ERR                BIT(10)
+#define    CN23XX_INTR_M0UNWI_ERR                BIT(11)
+
+#define    CN23XX_INTR_RESERVED3                 (0xFFFFFULL << 12)
+
+#define    CN23XX_INTR_DMA0_FORCE                BIT_ULL(32)
+#define    CN23XX_INTR_DMA1_FORCE                BIT_ULL(33)
+
+#define    CN23XX_INTR_DMA0_COUNT                BIT_ULL(34)
+#define    CN23XX_INTR_DMA1_COUNT                BIT_ULL(35)
+
+#define    CN23XX_INTR_DMA0_TIME                 BIT_ULL(36)
+#define    CN23XX_INTR_DMA1_TIME                 BIT_ULL(37)
+
+#define    CN23XX_INTR_RESERVED4                 (0x7FFFFULL << 38)
+
+#define    CN23XX_INTR_VF_MBOX                   BIT_ULL(57)
+#define    CN23XX_INTR_DMAVF_ERR                 BIT_ULL(58)
+#define    CN23XX_INTR_DMAPF_ERR                 BIT_ULL(59)
+
+#define    CN23XX_INTR_PKTVF_ERR                 BIT_ULL(60)
+#define    CN23XX_INTR_PKTPF_ERR                 BIT_ULL(61)
+#define    CN23XX_INTR_PPVF_ERR                  BIT_ULL(62)
+#define    CN23XX_INTR_PPPF_ERR                  BIT_ULL(63)
+
+#define    CN23XX_INTR_DMA0_DATA                 (CN23XX_INTR_DMA0_TIME)
+#define    CN23XX_INTR_DMA1_DATA                 (CN23XX_INTR_DMA1_TIME)
+
+#define    CN23XX_INTR_DMA_DATA                  \
+		(CN23XX_INTR_DMA0_DATA | CN23XX_INTR_DMA1_DATA)
+
+/* By fault only TIME based */
+#define    CN23XX_INTR_PKT_DATA                  (CN23XX_INTR_PKT_TIME)
+/* For both COUNT and TIME based */
+/* #define    CN23XX_INTR_PKT_DATA                  \
+ * (CN23XX_INTR_PKT_COUNT | CN23XX_INTR_PKT_TIME)
+ */
+
+/* Sum of interrupts for all PCI-Express Data Interrupts */
+#define    CN23XX_INTR_PCIE_DATA                 \
+		(CN23XX_INTR_DMA_DATA | CN23XX_INTR_PKT_DAT)
+
+/* Sum of interrupts for error events */
+#define    CN23XX_INTR_ERR			\
+		(CN23XX_INTR_M0UPB0_ERR	|	\
+		 CN23XX_INTR_M0UPWI_ERR	|	\
+		 CN23XX_INTR_M0UNB0_ERR	|	\
+		 CN23XX_INTR_M0UNWI_ERR	|	\
+		 CN23XX_INTR_DMAVF_ERR	|	\
+		 CN23XX_INTR_DMAPF_ERR	|	\
+		 CN23XX_INTR_PKTPF_ERR	|	\
+		 CN23XX_INTR_PPPF_ERR	|	\
+		 CN23XX_INTR_PPVF_ERR)
+
+/* Programmed Mask for Interrupt Sum */
+#define    CN23XX_INTR_MASK			\
+		(CN23XX_INTR_DMA_DATA	|	\
+		 CN23XX_INTR_DMA0_FORCE	|	\
+		 CN23XX_INTR_DMA1_FORCE	|	\
+		 CN23XX_INTR_MIO_INT	|	\
+		 CN23XX_INTR_ERR)
+
+/* 4 Registers (64 - bit) */
+#define    CN23XX_SLI_S2M_PORT_CTL_START         0x23D80
+#define    CN23XX_SLI_S2M_PORTX_CTL(port)	\
+		(CN23XX_SLI_S2M_PORT_CTL_START + (port * 0x10))
+
+#define    CN23XX_SLI_MAC_NUMBER                 0x20050
+
+/** PEM(0..3)_BAR1_INDEX(0..15)address is defined as
+ *  addr = (0x00011800C0000100  |port <<24 |idx <<3 )
+ *  Here, port is PEM(0..3) & idx is INDEX(0..15)
+ */
+#define    CN23XX_PEM_BAR1_INDEX_START             0x00011800C0000100ULL
+#define    CN23XX_PEM_OFFSET                       24
+#define    CN23XX_BAR1_INDEX_OFFSET                3
+
+#define    CN23XX_PEM_BAR1_INDEX_REG(port, idx)		\
+		(CN23XX_PEM_BAR1_INDEX_START + ((port) << CN23XX_PEM_OFFSET) + \
+		 ((idx) << CN23XX_BAR1_INDEX_OFFSET))
+
+/*############################ DPI #########################*/
+
+/* 1 register (64-bit) - provides DMA Enable */
+#define    CN23XX_DPI_CTL                 0x0001df0000000040ULL
+
+/* 1 register (64-bit) - Controls the DMA IO Operation */
+#define    CN23XX_DPI_DMA_CONTROL         0x0001df0000000048ULL
+
+/* 1 register (64-bit) - Provides DMA Instr'n Queue Enable  */
+#define    CN23XX_DPI_REQ_GBL_ENB         0x0001df0000000050ULL
+
+/* 1 register (64-bit) - DPI_REQ_ERR_RSP
+ * Indicates which Instr'n Queue received error response from the IO sub-system
+ */
+#define    CN23XX_DPI_REQ_ERR_RSP         0x0001df0000000058ULL
+
+/* 1 register (64-bit) - DPI_REQ_ERR_RST
+ * Indicates which Instr'n Queue dropped an Instr'n
+ */
+#define    CN23XX_DPI_REQ_ERR_RST         0x0001df0000000060ULL
+
+/* 6 register (64-bit) - DPI_DMA_ENG(0..5)_EN
+ * Provides DMA Engine Queue Enable
+ */
+#define    CN23XX_DPI_DMA_ENG0_ENB        0x0001df0000000080ULL
+#define    CN23XX_DPI_DMA_ENG_ENB(eng) (CN23XX_DPI_DMA_ENG0_ENB + (eng * 8))
+
+/* 8 register (64-bit) - DPI_DMA(0..7)_REQQ_CTL
+ * Provides control bits for transaction on 8 Queues
+ */
+#define    CN23XX_DPI_DMA_REQQ0_CTL       0x0001df0000000180ULL
+#define    CN23XX_DPI_DMA_REQQ_CTL(q_no)	\
+		(CN23XX_DPI_DMA_REQQ0_CTL + (q_no * 8))
+
+/* 6 register (64-bit) - DPI_ENG(0..5)_BUF
+ * Provides DMA Engine FIFO (Queue) Size
+ */
+#define    CN23XX_DPI_DMA_ENG0_BUF        0x0001df0000000880ULL
+#define    CN23XX_DPI_DMA_ENG_BUF(eng)   \
+		(CN23XX_DPI_DMA_ENG0_BUF + (eng * 8))
+
+/* 4 Registers (64-bit) */
+#define    CN23XX_DPI_SLI_PRT_CFG_START   0x0001df0000000900ULL
+#define    CN23XX_DPI_SLI_PRTX_CFG(port)        \
+		(CN23XX_DPI_SLI_PRT_CFG_START + (port * 0x8))
+
+/* Masks for DPI_DMA_CONTROL Register */
+#define    CN23XX_DPI_DMA_COMMIT_MODE     BIT_ULL(58)
+#define    CN23XX_DPI_DMA_PKT_EN          BIT_ULL(56)
+#define    CN23XX_DPI_DMA_ENB             (0x0FULL << 48)
+/* Set the DMA Control, to update packet count not byte count sent by DMA,
+ * when we use Interrupt Coalescing (CA mode)
+ */
+#define    CN23XX_DPI_DMA_O_ADD1          BIT(19)
+/*selecting 64-bit Byte Swap Mode */
+#define    CN23XX_DPI_DMA_O_ES            BIT(15)
+#define    CN23XX_DPI_DMA_O_MODE          BIT(14)
+
+#define    CN23XX_DPI_DMA_CTL_MASK			\
+		(CN23XX_DPI_DMA_COMMIT_MODE	|	\
+		 CN23XX_DPI_DMA_PKT_EN		|	\
+		 CN23XX_DPI_DMA_O_ES		|	\
+		 CN23XX_DPI_DMA_O_MODE)
+
+/*############################ RST #########################*/
+
+#define    CN23XX_RST_BOOT            0x0001180006001600ULL
+#define    CN23XX_RST_SOFT_RST        0x0001180006001680ULL
+
+#define    CN23XX_LMC0_RESET_CTL               0x0001180088000180ULL
+#define    CN23XX_LMC0_RESET_CTL_DDR3RST_MASK  0x0000000000000001ULL
+
+#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
index c03d370..e779af8 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
@@ -338,7 +338,7 @@
 	octeon_write_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB, intr);
 }
 
-void lio_cn6xxx_enable_io_queues(struct octeon_device *oct)
+int lio_cn6xxx_enable_io_queues(struct octeon_device *oct)
 {
 	u32 mask;
 
@@ -353,6 +353,8 @@
 	mask = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB);
 	mask |= oct->io_qmask.oq;
 	octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, mask);
+
+	return 0;
 }
 
 void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
@@ -418,36 +420,6 @@
 		octeon_write_csr(oct, CN6XXX_SLI_PKT_TIME_INT, d32);
 }
 
-void lio_cn6xxx_reinit_regs(struct octeon_device *oct)
-{
-	int i;
-
-	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-		if (!(oct->io_qmask.iq & (1ULL << i)))
-			continue;
-		oct->fn_list.setup_iq_regs(oct, i);
-	}
-
-	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-		if (!(oct->io_qmask.oq & (1ULL << i)))
-			continue;
-		oct->fn_list.setup_oq_regs(oct, i);
-	}
-
-	oct->fn_list.setup_device_regs(oct);
-
-	oct->fn_list.enable_interrupt(oct->chip);
-
-	oct->fn_list.enable_io_queues(oct);
-
-	/* for (i = 0; i < oct->num_oqs; i++) { */
-	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-		if (!(oct->io_qmask.oq & (1ULL << i)))
-			continue;
-		writel(oct->droq[i]->max_count, oct->droq[i]->pkts_credit_reg);
-	}
-}
-
 void
 lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct,
 			  u64 core_addr,
@@ -507,18 +479,20 @@
 	return new_idx;
 }
 
-void lio_cn6xxx_enable_interrupt(void *chip)
+void lio_cn6xxx_enable_interrupt(struct octeon_device *oct,
+				 u8 unused __attribute__((unused)))
 {
-	struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)chip;
+	struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip;
 	u64 mask = cn6xxx->intr_mask64 | CN6XXX_INTR_DMA0_FORCE;
 
 	/* Enable Interrupt */
 	writeq(mask, cn6xxx->intr_enb_reg64);
 }
 
-void lio_cn6xxx_disable_interrupt(void *chip)
+void lio_cn6xxx_disable_interrupt(struct octeon_device *oct,
+				  u8 unused __attribute__((unused)))
 {
-	struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)chip;
+	struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip;
 
 	/* Disable Interrupts */
 	writeq(0, cn6xxx->intr_enb_reg64);
@@ -714,7 +688,6 @@
 
 	oct->fn_list.soft_reset = lio_cn6xxx_soft_reset;
 	oct->fn_list.setup_device_regs = lio_cn6xxx_setup_device_regs;
-	oct->fn_list.reinit_regs = lio_cn6xxx_reinit_regs;
 	oct->fn_list.update_iq_read_idx = lio_cn6xxx_update_read_index;
 
 	oct->fn_list.bar1_idx_setup = lio_cn6xxx_bar1_idx_setup;
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
index 28c4722..a40a913 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
@@ -80,18 +80,17 @@
 void lio_cn6xxx_setup_global_output_regs(struct octeon_device *oct);
 void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no);
 void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no);
-void lio_cn6xxx_enable_io_queues(struct octeon_device *oct);
+int lio_cn6xxx_enable_io_queues(struct octeon_device *oct);
 void lio_cn6xxx_disable_io_queues(struct octeon_device *oct);
 irqreturn_t lio_cn6xxx_process_interrupt_regs(void *dev);
-void lio_cn6xxx_reinit_regs(struct octeon_device *oct);
 void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr,
 			       u32 idx, int valid);
 void lio_cn6xxx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask);
 u32 lio_cn6xxx_bar1_idx_read(struct octeon_device *oct, u32 idx);
 u32
 lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq);
-void lio_cn6xxx_enable_interrupt(void *chip);
-void lio_cn6xxx_disable_interrupt(void *chip);
+void lio_cn6xxx_enable_interrupt(struct octeon_device *oct, u8 unused);
+void lio_cn6xxx_disable_interrupt(struct octeon_device *oct, u8 unused);
 void cn6xxx_get_pcie_qlmport(struct octeon_device *oct);
 void lio_cn6xxx_setup_reg_address(struct octeon_device *oct, void *chip,
 				  struct octeon_reg_list *reg_list);
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
index 29755bc6..dbf3566 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
@@ -148,7 +148,6 @@
 	oct->fn_list.process_interrupt_regs = lio_cn6xxx_process_interrupt_regs;
 	oct->fn_list.soft_reset = lio_cn68xx_soft_reset;
 	oct->fn_list.setup_device_regs = lio_cn68xx_setup_device_regs;
-	oct->fn_list.reinit_regs = lio_cn6xxx_reinit_regs;
 	oct->fn_list.update_iq_read_idx = lio_cn6xxx_update_read_index;
 
 	oct->fn_list.bar1_idx_setup = lio_cn6xxx_bar1_idx_setup;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
new file mode 100644
index 0000000..201eddb
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -0,0 +1,266 @@
+/**********************************************************************
+* Author: Cavium, Inc.
+*
+* Contact: support@cavium.com
+*          Please include "LiquidIO" in the subject.
+*
+* Copyright (c) 2003-2015 Cavium, Inc.
+*
+* This file is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License, Version 2, as
+* published by the Free Software Foundation.
+*
+* This file is distributed in the hope that it will be useful, but
+* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+* NONINFRINGEMENT.  See the GNU General Public License for more
+* details.
+*
+* This file may also be available under a different license from Cavium.
+* Contact Cavium, Inc. for more information
+**********************************************************************/
+#include <linux/pci.h>
+#include <linux/if_vlan.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "octeon_nic.h"
+#include "octeon_main.h"
+#include "octeon_network.h"
+
+int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = cmd;
+	nctrl.ncmd.s.param1 = param1;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
+void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl,
+					unsigned int bytes_compl)
+{
+	struct netdev_queue *netdev_queue = txq;
+
+	netdev_tx_completed_queue(netdev_queue, pkts_compl, bytes_compl);
+}
+
+void octeon_update_tx_completion_counters(void *buf, int reqtype,
+					  unsigned int *pkts_compl,
+					  unsigned int *bytes_compl)
+{
+	struct octnet_buf_free_info *finfo;
+	struct sk_buff *skb = NULL;
+	struct octeon_soft_command *sc;
+
+	switch (reqtype) {
+	case REQTYPE_NORESP_NET:
+	case REQTYPE_NORESP_NET_SG:
+		finfo = buf;
+		skb = finfo->skb;
+		break;
+
+	case REQTYPE_RESP_NET_SG:
+	case REQTYPE_RESP_NET:
+		sc = buf;
+		skb = sc->callback_arg;
+		break;
+
+	default:
+		return;
+	}
+
+	(*pkts_compl)++;
+/*TODO, Use some other pound define to suggest
+ * the fact that iqs are not tied to netdevs
+ * and can take traffic from different netdevs
+ * hence bql reporting is done per packet
+ * than in bulk. Usage of NO_NAPI in txq completion is
+ * a little confusing
+ */
+	*bytes_compl += skb->len;
+}
+
+void octeon_report_sent_bytes_to_bql(void *buf, int reqtype)
+{
+	struct octnet_buf_free_info *finfo;
+	struct sk_buff *skb;
+	struct octeon_soft_command *sc;
+	struct netdev_queue *txq;
+
+	switch (reqtype) {
+	case REQTYPE_NORESP_NET:
+	case REQTYPE_NORESP_NET_SG:
+		finfo = buf;
+		skb = finfo->skb;
+		break;
+
+	case REQTYPE_RESP_NET_SG:
+	case REQTYPE_RESP_NET:
+		sc = buf;
+		skb = sc->callback_arg;
+		break;
+
+	default:
+		return;
+	}
+
+	txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb));
+	netdev_tx_sent_queue(txq, skb->len);
+}
+
+void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
+{
+	struct octnic_ctrl_pkt *nctrl = (struct octnic_ctrl_pkt *)nctrl_ptr;
+	struct net_device *netdev = (struct net_device *)nctrl->netpndev;
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	u8 *mac;
+
+	switch (nctrl->ncmd.s.cmd) {
+	case OCTNET_CMD_CHANGE_DEVFLAGS:
+	case OCTNET_CMD_SET_MULTI_LIST:
+		break;
+
+	case OCTNET_CMD_CHANGE_MACADDR:
+		mac = ((u8 *)&nctrl->udd[0]) + 2;
+		netif_info(lio, probe, lio->netdev,
+			   "MACAddr changed to %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
+			   mac[0], mac[1],
+			   mac[2], mac[3],
+			   mac[4], mac[5]);
+		break;
+
+	case OCTNET_CMD_CHANGE_MTU:
+		/* If command is successful, change the MTU. */
+		netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
+			   netdev->mtu, nctrl->ncmd.s.param1);
+		dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
+			 netdev->name, netdev->mtu,
+			 nctrl->ncmd.s.param1);
+		netdev->mtu = nctrl->ncmd.s.param1;
+		queue_delayed_work(lio->link_status_wq.wq,
+				   &lio->link_status_wq.wk.work, 0);
+		break;
+
+	case OCTNET_CMD_GPIO_ACCESS:
+		netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
+
+		break;
+
+	case OCTNET_CMD_ID_ACTIVE:
+		netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
+
+		break;
+
+	case OCTNET_CMD_LRO_ENABLE:
+		dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name);
+		break;
+
+	case OCTNET_CMD_LRO_DISABLE:
+		dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n",
+			 netdev->name);
+		break;
+
+	case OCTNET_CMD_VERBOSE_ENABLE:
+		dev_info(&oct->pci_dev->dev, "%s Firmware debug enabled\n",
+			 netdev->name);
+		break;
+
+	case OCTNET_CMD_VERBOSE_DISABLE:
+		dev_info(&oct->pci_dev->dev, "%s Firmware debug disabled\n",
+			 netdev->name);
+		break;
+
+	case OCTNET_CMD_ENABLE_VLAN_FILTER:
+		dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n",
+			 netdev->name);
+		break;
+
+	case OCTNET_CMD_ADD_VLAN_FILTER:
+		dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n",
+			 netdev->name, nctrl->ncmd.s.param1);
+		break;
+
+	case OCTNET_CMD_DEL_VLAN_FILTER:
+		dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n",
+			 netdev->name, nctrl->ncmd.s.param1);
+		break;
+
+	case OCTNET_CMD_SET_SETTINGS:
+		dev_info(&oct->pci_dev->dev, "%s settings changed\n",
+			 netdev->name);
+
+		break;
+
+	/* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL"
+	 * Command passed by NIC driver
+	 */
+	case OCTNET_CMD_TNL_RX_CSUM_CTL:
+		if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "RX Checksum Offload Enabled\n");
+		} else if (nctrl->ncmd.s.param1 ==
+			   OCTNET_CMD_RXCSUM_DISABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "RX Checksum Offload Disabled\n");
+		}
+		break;
+
+		/* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL"
+		 * Command passed by NIC driver
+		 */
+	case OCTNET_CMD_TNL_TX_CSUM_CTL:
+		if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "TX Checksum Offload Enabled\n");
+		} else if (nctrl->ncmd.s.param1 ==
+			   OCTNET_CMD_TXCSUM_DISABLE) {
+			netif_info(lio, probe, lio->netdev,
+				   "TX Checksum Offload Disabled\n");
+		}
+		break;
+
+		/* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG"
+		 * Command passed by NIC driver
+		 */
+	case OCTNET_CMD_VXLAN_PORT_CONFIG:
+		if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) {
+			netif_info(lio, probe, lio->netdev,
+				   "VxLAN Destination UDP PORT:%d ADDED\n",
+				   nctrl->ncmd.s.param1);
+		} else if (nctrl->ncmd.s.more ==
+			   OCTNET_CMD_VXLAN_PORT_DEL) {
+			netif_info(lio, probe, lio->netdev,
+				   "VxLAN Destination UDP PORT:%d DELETED\n",
+				   nctrl->ncmd.s.param1);
+		}
+		break;
+
+	case OCTNET_CMD_SET_FLOW_CTL:
+		netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n");
+		break;
+
+	default:
+		dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__,
+			nctrl->ncmd.s.cmd);
+	}
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index 289eb89..f163e0a 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -32,6 +32,7 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
 
 static int octnet_get_link_stats(struct net_device *netdev);
 
@@ -75,6 +76,7 @@
 
 #define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0]))
 #define OCT_ETHTOOL_REGDUMP_LEN  4096
+#define OCT_ETHTOOL_REGDUMP_LEN_23XX  (4096 * 11)
 #define OCT_ETHTOOL_REGSVER  1
 
 /* statistics of PF */
@@ -188,6 +190,10 @@
 	"buffer_alloc_failure",
 };
 
+/* LiquidIO driver private flags */
+static const char oct_priv_flags_strings[][ETH_GSTRING_LEN] = {
+};
+
 #define OCTNIC_NCMD_AUTONEG_ON  0x1
 #define OCTNIC_NCMD_PHY_ON      0x2
 
@@ -259,6 +265,13 @@
 		max_tx = CFG_GET_IQ_MAX_Q(conf6x);
 		rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
 		tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
+	} else if (OCTEON_CN23XX_PF(oct)) {
+		struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+		max_rx = CFG_GET_OQ_MAX_Q(conf23);
+		max_tx = CFG_GET_IQ_MAX_Q(conf23);
+		rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf23, lio->ifidx);
+		tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf23, lio->ifidx);
 	}
 
 	channel->max_rx = max_rx;
@@ -290,18 +303,16 @@
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct_dev = lio->oct_dev;
 	struct octeon_board_info *board_info;
-	int len;
 
-	if (eeprom->offset != 0)
+	if (eeprom->offset)
 		return -EINVAL;
 
 	eeprom->magic = oct_dev->pci_dev->vendor;
 	board_info = (struct octeon_board_info *)(&oct_dev->boardinfo);
-	len =
-		sprintf((char *)bytes,
-			"boardname:%s serialnum:%s maj:%lld min:%lld\n",
-			board_info->name, board_info->serial_number,
-			board_info->major, board_info->minor);
+	sprintf((char *)bytes,
+		"boardname:%s serialnum:%s maj:%lld min:%lld\n",
+		board_info->name, board_info->serial_number,
+		board_info->major, board_info->minor);
 
 	return 0;
 }
@@ -333,6 +344,32 @@
 	return 0;
 }
 
+static int octnet_id_active(struct net_device *netdev, int val)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_ID_ACTIVE;
+	nctrl.ncmd.s.param1 = val;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /* Callback for when mdio command response arrives
  */
 static void octnet_mdio_resp_callback(struct octeon_device *oct,
@@ -406,7 +443,7 @@
 		dev_err(&oct_dev->pci_dev->dev,
 			"octnet_mdio45_access instruction failed status: %x\n",
 			retval);
-		retval =  -EBUSY;
+		retval = -EBUSY;
 	} else {
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived
@@ -476,6 +513,11 @@
 						   &value);
 			if (ret)
 				return ret;
+		} else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
+			octnet_id_active(netdev, LED_IDENTIFICATION_ON);
+
+			/* returns 0 since updates are asynchronous */
+			return 0;
 		} else {
 			return -EINVAL;
 		}
@@ -521,7 +563,10 @@
 						   &lio->phy_beacon_val);
 			if (ret)
 				return ret;
+		} else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
+			octnet_id_active(netdev, LED_IDENTIFICATION_OFF);
 
+			return 0;
 		} else {
 			return -EINVAL;
 		}
@@ -550,6 +595,13 @@
 		rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS;
 		rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx);
 		tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx);
+	} else if (OCTEON_CN23XX_PF(oct)) {
+		struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+		tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS;
+		rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS;
+		rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf23, lio->ifidx);
+		tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf23, lio->ifidx);
 	}
 
 	if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) {
@@ -610,6 +662,69 @@
 	pause->rx_pause = oct->rx_pause;
 }
 
+static int
+lio_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
+{
+	/* Notes: Not supporting any auto negotiation in these
+	 * drivers.
+	 */
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	struct oct_link_info *linfo = &lio->linfo;
+
+	int ret = 0;
+
+	if (oct->chip_id != OCTEON_CN23XX_PF_VID)
+		return -EINVAL;
+
+	if (linfo->link.s.duplex == 0) {
+		/*no flow control for half duplex*/
+		if (pause->rx_pause || pause->tx_pause)
+			return -EINVAL;
+	}
+
+	/*do not support autoneg of link flow control*/
+	if (pause->autoneg == AUTONEG_ENABLE)
+		return -EINVAL;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_FLOW_CTL;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	if (pause->rx_pause) {
+		/*enable rx pause*/
+		nctrl.ncmd.s.param1 = 1;
+	} else {
+		/*disable rx pause*/
+		nctrl.ncmd.s.param1 = 0;
+	}
+
+	if (pause->tx_pause) {
+		/*enable tx pause*/
+		nctrl.ncmd.s.param2 = 1;
+	} else {
+		/*disable tx pause*/
+		nctrl.ncmd.s.param2 = 0;
+	}
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Failed to set pause parameter\n");
+		return -EINVAL;
+	}
+
+	oct->rx_pause = pause->rx_pause;
+	oct->tx_pause = pause->tx_pause;
+
+	return 0;
+}
+
 static void
 lio_get_ethtool_stats(struct net_device *netdev,
 		      struct ethtool_stats *stats  __attribute__((unused)),
@@ -877,6 +992,27 @@
 	}
 }
 
+static void lio_get_priv_flags_strings(struct lio *lio, u8 *data)
+{
+	struct octeon_device *oct_dev = lio->oct_dev;
+	int i;
+
+	switch (oct_dev->chip_id) {
+	case OCTEON_CN23XX_PF_VID:
+		for (i = 0; i < ARRAY_SIZE(oct_priv_flags_strings); i++) {
+			sprintf(data, "%s", oct_priv_flags_strings[i]);
+			data += ETH_GSTRING_LEN;
+		}
+		break;
+	case OCTEON_CN68XX:
+	case OCTEON_CN66XX:
+		break;
+	default:
+		netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
+		break;
+	}
+}
+
 static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 {
 	struct lio *lio = GET_LIO(netdev);
@@ -916,12 +1052,31 @@
 		}
 		break;
 
+	case ETH_SS_PRIV_FLAGS:
+		lio_get_priv_flags_strings(lio, data);
+		break;
 	default:
 		netif_info(lio, drv, lio->netdev, "Unknown Stringset !!\n");
 		break;
 	}
 }
 
+static int lio_get_priv_flags_ss_count(struct lio *lio)
+{
+	struct octeon_device *oct_dev = lio->oct_dev;
+
+	switch (oct_dev->chip_id) {
+	case OCTEON_CN23XX_PF_VID:
+		return ARRAY_SIZE(oct_priv_flags_strings);
+	case OCTEON_CN68XX:
+	case OCTEON_CN66XX:
+		return -EOPNOTSUPP;
+	default:
+		netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n");
+		return -EOPNOTSUPP;
+	}
+}
+
 static int lio_get_sset_count(struct net_device *netdev, int sset)
 {
 	struct lio *lio = GET_LIO(netdev);
@@ -932,6 +1087,8 @@
 		return (ARRAY_SIZE(oct_stats_strings) +
 			ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs +
 			ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs);
+	case ETH_SS_PRIV_FLAGS:
+		return lio_get_priv_flags_ss_count(lio);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -948,6 +1105,16 @@
 	intrmod_cfg = &oct->intrmod;
 
 	switch (oct->chip_id) {
+	case OCTEON_CN23XX_PF_VID:
+		if (!intrmod_cfg->rx_enable) {
+			intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs;
+			intr_coal->rx_max_coalesced_frames =
+				intrmod_cfg->rx_frames;
+		}
+		if (!intrmod_cfg->tx_enable)
+			intr_coal->tx_max_coalesced_frames =
+				intrmod_cfg->tx_frames;
+		break;
 	case OCTEON_CN68XX:
 	case OCTEON_CN66XX: {
 		struct octeon_cn6xxx *cn6xxx =
@@ -983,7 +1150,15 @@
 		intr_coal->rx_coalesce_usecs_low =
 			intrmod_cfg->rx_mintmr_trigger;
 		intr_coal->rx_max_coalesced_frames_low =
-			intrmod_cfg->rx_mincnt_trigger;
+		    intrmod_cfg->rx_mincnt_trigger;
+	}
+	if (OCTEON_CN23XX_PF(oct) &&
+	    (intrmod_cfg->tx_enable)) {
+		intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable;
+		intr_coal->tx_max_coalesced_frames_high =
+		    intrmod_cfg->tx_maxcnt_trigger;
+		intr_coal->tx_max_coalesced_frames_low =
+		    intrmod_cfg->tx_mincnt_trigger;
 	}
 	return 0;
 }
@@ -1060,11 +1235,11 @@
 octnet_nic_stats_callback(struct octeon_device *oct_dev,
 			  u32 status, void *ptr)
 {
-	struct octeon_soft_command  *sc = (struct octeon_soft_command  *)ptr;
-	struct oct_nic_stats_resp *resp = (struct oct_nic_stats_resp *)
-		sc->virtrptr;
-	struct oct_nic_stats_ctrl *ctrl = (struct oct_nic_stats_ctrl *)
-		sc->ctxptr;
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr;
+	struct oct_nic_stats_resp *resp =
+	    (struct oct_nic_stats_resp *)sc->virtrptr;
+	struct oct_nic_stats_ctrl *ctrl =
+	    (struct oct_nic_stats_ctrl *)sc->ctxptr;
 	struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire;
 	struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost;
 
@@ -1314,14 +1489,35 @@
 		CFG_SET_OQ_INTR_PKT(cn6xxx->conf, rx_max_coalesced_frames);
 		break;
 	}
+	case OCTEON_CN23XX_PF_VID: {
+		int q_no;
+
+		if (!intr_coal->rx_max_coalesced_frames)
+			rx_max_coalesced_frames = oct->intrmod.rx_frames;
+		else
+			rx_max_coalesced_frames =
+			    intr_coal->rx_max_coalesced_frames;
+		for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+			q_no += oct->sriov_info.pf_srn;
+			octeon_write_csr64(
+			    oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
+			    (octeon_read_csr64(
+				 oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no)) &
+			     (0x3fffff00000000UL)) |
+				rx_max_coalesced_frames);
+			/*consider setting resend bit*/
+		}
+		oct->intrmod.rx_frames = rx_max_coalesced_frames;
+		break;
+	}
 	default:
 		return -EINVAL;
 	}
 	return 0;
 }
 
-static int oct_cfg_rx_intrtime(struct lio *lio, struct ethtool_coalesce
-			       *intr_coal)
+static int oct_cfg_rx_intrtime(struct lio *lio,
+			       struct ethtool_coalesce *intr_coal)
 {
 	struct octeon_device *oct = lio->oct_dev;
 	u32 time_threshold, rx_coalesce_usecs;
@@ -1346,6 +1542,27 @@
 		CFG_SET_OQ_INTR_TIME(cn6xxx->conf, rx_coalesce_usecs);
 		break;
 	}
+	case OCTEON_CN23XX_PF_VID: {
+		u64 time_threshold;
+		int q_no;
+
+		if (!intr_coal->rx_coalesce_usecs)
+			rx_coalesce_usecs = oct->intrmod.rx_usecs;
+		else
+			rx_coalesce_usecs = intr_coal->rx_coalesce_usecs;
+		time_threshold =
+		    cn23xx_pf_get_oq_ticks(oct, (u32)rx_coalesce_usecs);
+		for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+			q_no += oct->sriov_info.pf_srn;
+			octeon_write_csr64(oct,
+					   CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no),
+					   (oct->intrmod.rx_frames |
+					    (time_threshold << 32)));
+			/*consider writing to resend bit here*/
+		}
+		oct->intrmod.rx_usecs = rx_coalesce_usecs;
+		break;
+	}
 	default:
 		return -EINVAL;
 	}
@@ -1358,12 +1575,37 @@
 		   __attribute__((unused)))
 {
 	struct octeon_device *oct = lio->oct_dev;
+	u32 iq_intr_pkt;
+	void __iomem *inst_cnt_reg;
+	u64 val;
 
 	/* Config Cnt based interrupt values */
 	switch (oct->chip_id) {
 	case OCTEON_CN68XX:
 	case OCTEON_CN66XX:
 		break;
+	case OCTEON_CN23XX_PF_VID: {
+		int q_no;
+
+		if (!intr_coal->tx_max_coalesced_frames)
+			iq_intr_pkt = CN23XX_DEF_IQ_INTR_THRESHOLD &
+				      CN23XX_PKT_IN_DONE_WMARK_MASK;
+		else
+			iq_intr_pkt = intr_coal->tx_max_coalesced_frames &
+				      CN23XX_PKT_IN_DONE_WMARK_MASK;
+		for (q_no = 0; q_no < oct->num_iqs; q_no++) {
+			inst_cnt_reg = (oct->instr_queue[q_no])->inst_cnt_reg;
+			val = readq(inst_cnt_reg);
+			/*clear wmark and count.dont want to write count back*/
+			val = (val & 0xFFFF000000000000ULL) |
+			      ((u64)iq_intr_pkt
+			       << CN23XX_PKT_IN_DONE_WMARK_BIT_POS);
+			writeq(val, inst_cnt_reg);
+			/*consider setting resend bit*/
+		}
+		oct->intrmod.tx_frames = iq_intr_pkt;
+		break;
+	}
 	default:
 		return -EINVAL;
 	}
@@ -1399,6 +1641,8 @@
 			return -EINVAL;
 		}
 		break;
+	case OCTEON_CN23XX_PF_VID:
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1541,9 +1785,237 @@
 }
 
 /* Return register dump len. */
-static int lio_get_regs_len(struct net_device *dev __attribute__((unused)))
+static int lio_get_regs_len(struct net_device *dev)
 {
-	return OCT_ETHTOOL_REGDUMP_LEN;
+	struct lio *lio = GET_LIO(dev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	switch (oct->chip_id) {
+	case OCTEON_CN23XX_PF_VID:
+		return OCT_ETHTOOL_REGDUMP_LEN_23XX;
+	default:
+		return OCT_ETHTOOL_REGDUMP_LEN;
+	}
+}
+
+static int cn23xx_read_csr_reg(char *s, struct octeon_device *oct)
+{
+	u32 reg;
+	u8 pf_num = oct->pf_num;
+	int len = 0;
+	int i;
+
+	/* PCI  Window Registers */
+
+	len += sprintf(s + len, "\n\t Octeon CSR Registers\n\n");
+
+	/*0x29030 or 0x29040*/
+	reg = CN23XX_SLI_PKT_MAC_RINFO64(oct->pcie_port, oct->pf_num);
+	len += sprintf(s + len,
+		       "\n[%08x] (SLI_PKT_MAC%d_PF%d_RINFO): %016llx\n",
+		       reg, oct->pcie_port, oct->pf_num,
+		       (u64)octeon_read_csr64(oct, reg));
+
+	/*0x27080 or 0x27090*/
+	reg = CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num);
+	len +=
+	    sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_ENB): %016llx\n",
+		    reg, oct->pcie_port, oct->pf_num,
+		    (u64)octeon_read_csr64(oct, reg));
+
+	/*0x27000 or 0x27010*/
+	reg = CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num);
+	len +=
+	    sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_SUM): %016llx\n",
+		    reg, oct->pcie_port, oct->pf_num,
+		    (u64)octeon_read_csr64(oct, reg));
+
+	/*0x29120*/
+	reg = 0x29120;
+	len += sprintf(s + len, "\n[%08x] (SLI_PKT_MEM_CTL): %016llx\n", reg,
+		       (u64)octeon_read_csr64(oct, reg));
+
+	/*0x27300*/
+	reg = 0x27300 + oct->pcie_port * CN23XX_MAC_INT_OFFSET +
+	      (oct->pf_num) * CN23XX_PF_INT_OFFSET;
+	len += sprintf(
+	    s + len, "\n[%08x] (SLI_MAC%d_PF%d_PKT_VF_INT): %016llx\n", reg,
+	    oct->pcie_port, oct->pf_num, (u64)octeon_read_csr64(oct, reg));
+
+	/*0x27200*/
+	reg = 0x27200 + oct->pcie_port * CN23XX_MAC_INT_OFFSET +
+	      (oct->pf_num) * CN23XX_PF_INT_OFFSET;
+	len += sprintf(s + len,
+		       "\n[%08x] (SLI_MAC%d_PF%d_PP_VF_INT): %016llx\n",
+		       reg, oct->pcie_port, oct->pf_num,
+		       (u64)octeon_read_csr64(oct, reg));
+
+	/*29130*/
+	reg = CN23XX_SLI_PKT_CNT_INT;
+	len += sprintf(s + len, "\n[%08x] (SLI_PKT_CNT_INT): %016llx\n", reg,
+		       (u64)octeon_read_csr64(oct, reg));
+
+	/*0x29140*/
+	reg = CN23XX_SLI_PKT_TIME_INT;
+	len += sprintf(s + len, "\n[%08x] (SLI_PKT_TIME_INT): %016llx\n", reg,
+		       (u64)octeon_read_csr64(oct, reg));
+
+	/*0x29160*/
+	reg = 0x29160;
+	len += sprintf(s + len, "\n[%08x] (SLI_PKT_INT): %016llx\n", reg,
+		       (u64)octeon_read_csr64(oct, reg));
+
+	/*0x29180*/
+	reg = CN23XX_SLI_OQ_WMARK;
+	len += sprintf(s + len, "\n[%08x] (SLI_PKT_OUTPUT_WMARK): %016llx\n",
+		       reg, (u64)octeon_read_csr64(oct, reg));
+
+	/*0x291E0*/
+	reg = CN23XX_SLI_PKT_IOQ_RING_RST;
+	len += sprintf(s + len, "\n[%08x] (SLI_PKT_RING_RST): %016llx\n", reg,
+		       (u64)octeon_read_csr64(oct, reg));
+
+	/*0x29210*/
+	reg = CN23XX_SLI_GBL_CONTROL;
+	len += sprintf(s + len,
+		       "\n[%08x] (SLI_PKT_GBL_CONTROL): %016llx\n", reg,
+		       (u64)octeon_read_csr64(oct, reg));
+
+	/*0x29220*/
+	reg = 0x29220;
+	len += sprintf(s + len, "\n[%08x] (SLI_PKT_BIST_STATUS): %016llx\n",
+		       reg, (u64)octeon_read_csr64(oct, reg));
+
+	/*PF only*/
+	if (pf_num == 0) {
+		/*0x29260*/
+		reg = CN23XX_SLI_OUT_BP_EN_W1S;
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT_OUT_BP_EN_W1S):  %016llx\n",
+			       reg, (u64)octeon_read_csr64(oct, reg));
+	} else if (pf_num == 1) {
+		/*0x29270*/
+		reg = CN23XX_SLI_OUT_BP_EN2_W1S;
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT_OUT_BP_EN2_W1S): %016llx\n",
+			       reg, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		reg = CN23XX_SLI_OQ_BUFF_INFO_SIZE(i);
+		len +=
+		    sprintf(s + len, "\n[%08x] (SLI_PKT%d_OUT_SIZE): %016llx\n",
+			    reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	/*0x10040*/
+	for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+		reg = CN23XX_SLI_IQ_INSTR_COUNT64(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	/*0x10080*/
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		reg = CN23XX_SLI_OQ_PKTS_CREDIT(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_SLIST_BAOFF_DBELL): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	/*0x10090*/
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		reg = CN23XX_SLI_OQ_SIZE(i);
+		len += sprintf(
+		    s + len, "\n[%08x] (SLI_PKT%d_SLIST_FIFO_RSIZE): %016llx\n",
+		    reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	/*0x10050*/
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		reg = CN23XX_SLI_OQ_PKT_CONTROL(i);
+		len += sprintf(
+			s + len,
+			"\n[%08x] (SLI_PKT%d__OUTPUT_CONTROL): %016llx\n",
+			reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	/*0x10070*/
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		reg = CN23XX_SLI_OQ_BASE_ADDR64(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_SLIST_BADDR): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	/*0x100a0*/
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		reg = CN23XX_SLI_OQ_PKT_INT_LEVELS(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_INT_LEVELS): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	/*0x100b0*/
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		reg = CN23XX_SLI_OQ_PKTS_SENT(i);
+		len += sprintf(s + len, "\n[%08x] (SLI_PKT%d_CNTS): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	/*0x100c0*/
+	for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) {
+		reg = 0x100c0 + i * CN23XX_OQ_OFFSET;
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_ERROR_INFO): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+
+		/*0x10000*/
+		for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+			reg = CN23XX_SLI_IQ_PKT_CONTROL64(i);
+			len += sprintf(
+				s + len,
+				"\n[%08x] (SLI_PKT%d_INPUT_CONTROL): %016llx\n",
+				reg, i, (u64)octeon_read_csr64(oct, reg));
+		}
+
+		/*0x10010*/
+		for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+			reg = CN23XX_SLI_IQ_BASE_ADDR64(i);
+			len += sprintf(
+			    s + len,
+			    "\n[%08x] (SLI_PKT%d_INSTR_BADDR): %016llx\n", reg,
+			    i, (u64)octeon_read_csr64(oct, reg));
+		}
+
+		/*0x10020*/
+		for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+			reg = CN23XX_SLI_IQ_DOORBELL(i);
+			len += sprintf(
+			    s + len,
+			    "\n[%08x] (SLI_PKT%d_INSTR_BAOFF_DBELL): %016llx\n",
+			    reg, i, (u64)octeon_read_csr64(oct, reg));
+		}
+
+		/*0x10030*/
+		for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) {
+			reg = CN23XX_SLI_IQ_SIZE(i);
+			len += sprintf(
+			    s + len,
+			    "\n[%08x] (SLI_PKT%d_INSTR_FIFO_RSIZE): %016llx\n",
+			    reg, i, (u64)octeon_read_csr64(oct, reg));
+		}
+
+		/*0x10040*/
+		for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++)
+			reg = CN23XX_SLI_IQ_INSTR_COUNT64(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	return len;
 }
 
 static int cn6xxx_read_csr_reg(char *s, struct octeon_device *oct)
@@ -1688,6 +2160,10 @@
 	regs->version = OCT_ETHTOOL_REGSVER;
 
 	switch (oct->chip_id) {
+	case OCTEON_CN23XX_PF_VID:
+		memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN_23XX);
+		len += cn23xx_read_csr_reg(regbuf + len, oct);
+		break;
 	case OCTEON_CN68XX:
 	case OCTEON_CN66XX:
 		memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN);
@@ -1729,6 +2205,7 @@
 	.get_strings		= lio_get_strings,
 	.get_ethtool_stats	= lio_get_ethtool_stats,
 	.get_pauseparam		= lio_get_pauseparam,
+	.set_pauseparam		= lio_set_pauseparam,
 	.get_regs_len		= lio_get_regs_len,
 	.get_regs		= lio_get_regs,
 	.get_msglevel		= lio_get_msglevel,
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 20d6942..afc6f9d 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -21,11 +21,10 @@
 **********************************************************************/
 #include <linux/version.h>
 #include <linux/pci.h>
-#include <linux/net_tstamp.h>
-#include <linux/if_vlan.h>
 #include <linux/firmware.h>
 #include <linux/ptp_clock_kernel.h>
 #include <net/vxlan.h>
+#include <linux/kthread.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -37,6 +36,7 @@
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn68xx_device.h"
+#include "cn23xx_pf_device.h"
 #include "liquidio_image.h"
 
 MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
@@ -52,11 +52,6 @@
 MODULE_PARM_DESC(ddr_timeout,
 		 "Number of milliseconds to wait for DDR initialization. 0 waits for ddr_timeout to be set to non-zero value before starting to check");
 
-static u32 console_bitmask;
-module_param(console_bitmask, int, 0644);
-MODULE_PARM_DESC(console_bitmask,
-		 "Bitmask indicating which consoles have debug output redirected to syslog.");
-
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
 #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
@@ -102,6 +97,14 @@
 	u64 status;
 };
 
+struct liquidio_rx_ctl_context {
+	int octeon_id;
+
+	wait_queue_head_t wc;
+
+	int cond;
+};
+
 struct oct_link_status_resp {
 	u64 rh;
 	struct oct_link_info link_info;
@@ -139,7 +142,8 @@
 #define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
 
 #define OCTNIC_GSO_MAX_HEADER_SIZE 128
-#define OCTNIC_GSO_MAX_SIZE (GSO_MAX_SIZE - OCTNIC_GSO_MAX_HEADER_SIZE)
+#define OCTNIC_GSO_MAX_SIZE                                                    \
+	(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
 
 /** Structure of a node in list of gather components maintained by
  * NIC driver for each network device.
@@ -162,27 +166,6 @@
 	u64 sg_dma_ptr;
 };
 
-/** This structure is used by NIC driver to store information required
- * to free the sk_buff when the packet has been fetched by Octeon.
- * Bytes offset below assume worst-case of a 64-bit system.
- */
-struct octnet_buf_free_info {
-	/** Bytes 1-8.  Pointer to network device private structure. */
-	struct lio *lio;
-
-	/** Bytes 9-16.  Pointer to sk_buff. */
-	struct sk_buff *skb;
-
-	/** Bytes 17-24.  Pointer to gather list. */
-	struct octnic_gather *g;
-
-	/** Bytes 25-32. Physical address of skb->data or gather list. */
-	u64 dptr;
-
-	/** Bytes 33-47. Piggybacked soft command, if any */
-	struct octeon_soft_command *sc;
-};
-
 struct handshake {
 	struct completion init;
 	struct completion started;
@@ -198,6 +181,7 @@
 };
 
 static int octeon_device_init(struct octeon_device *);
+static int liquidio_stop(struct net_device *netdev);
 static void liquidio_remove(struct pci_dev *pdev);
 static int liquidio_probe(struct pci_dev *pdev,
 			  const struct pci_device_id *ent);
@@ -219,6 +203,20 @@
 			continue;
 		reschedule |= octeon_droq_process_packets(oct, oct->droq[q_no],
 							  MAX_PACKET_BUDGET);
+		lio_enable_irq(oct->droq[q_no], NULL);
+
+		if (OCTEON_CN23XX_PF(oct) && oct->msix_on) {
+			/* set time and cnt interrupt thresholds for this DROQ
+			 * for NAPI
+			 */
+			int adjusted_q_no = q_no + oct->sriov_info.pf_srn;
+
+			octeon_write_csr64(
+			    oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(adjusted_q_no),
+			    0x5700000040ULL);
+			octeon_write_csr64(
+			    oct, CN23XX_SLI_OQ_PKTS_SENT(adjusted_q_no), 0);
+		}
 	}
 
 	if (reschedule)
@@ -252,76 +250,6 @@
 	return pkt_cnt;
 }
 
-void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl,
-					unsigned int bytes_compl)
-{
-	struct netdev_queue *netdev_queue = txq;
-
-	netdev_tx_completed_queue(netdev_queue, pkts_compl, bytes_compl);
-}
-
-void octeon_update_tx_completion_counters(void *buf, int reqtype,
-					  unsigned int *pkts_compl,
-					  unsigned int *bytes_compl)
-{
-	struct octnet_buf_free_info *finfo;
-	struct sk_buff *skb = NULL;
-	struct octeon_soft_command *sc;
-
-	switch (reqtype) {
-	case REQTYPE_NORESP_NET:
-	case REQTYPE_NORESP_NET_SG:
-		finfo = buf;
-		skb = finfo->skb;
-		break;
-
-	case REQTYPE_RESP_NET_SG:
-	case REQTYPE_RESP_NET:
-		sc = buf;
-		skb = sc->callback_arg;
-		break;
-
-	default:
-		return;
-	}
-
-	(*pkts_compl)++;
-	*bytes_compl += skb->len;
-}
-
-void octeon_report_sent_bytes_to_bql(void *buf, int reqtype)
-{
-	struct octnet_buf_free_info *finfo;
-	struct sk_buff *skb;
-	struct octeon_soft_command *sc;
-	struct netdev_queue *txq;
-
-	switch (reqtype) {
-	case REQTYPE_NORESP_NET:
-	case REQTYPE_NORESP_NET_SG:
-		finfo = buf;
-		skb = finfo->skb;
-		break;
-
-	case REQTYPE_RESP_NET_SG:
-	case REQTYPE_RESP_NET:
-		sc = buf;
-		skb = sc->callback_arg;
-		break;
-
-	default:
-		return;
-	}
-
-	txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb));
-	netdev_tx_sent_queue(txq, skb->len);
-}
-
-int octeon_console_debug_enabled(u32 console)
-{
-	return (console_bitmask >> (console)) & 0x1;
-}
-
 /**
  * \brief Forces all IO queues off on a given device
  * @param oct Pointer to Octeon device
@@ -441,7 +369,7 @@
 	pci_disable_device(oct->pci_dev);
 
 	/* Disable interrupts  */
-	oct->fn_list.disable_interrupt(oct->chip);
+	oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
 
 	pcierror_quiesce_device(oct);
 
@@ -570,6 +498,9 @@
 	{       /* 66xx */
 		PCI_VENDOR_ID_CAVIUM, 0x92, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0
 	},
+	{       /* 23xx pf */
+		PCI_VENDOR_ID_CAVIUM, 0x9702, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0
+	},
 	{
 		0, 0, 0, 0, 0, 0, 0
 	}
@@ -587,7 +518,6 @@
 	.suspend	= liquidio_suspend,
 	.resume		= liquidio_resume,
 #endif
-
 };
 
 /**
@@ -936,6 +866,52 @@
 }
 
 /**
+ * \brief Routine to notify MTU change
+ * @param work work_struct data structure
+ */
+static void octnet_link_status_change(struct work_struct *work)
+{
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct lio *lio = (struct lio *)wk->ctxptr;
+
+	rtnl_lock();
+	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	rtnl_unlock();
+}
+
+/**
+ * \brief Sets up the mtu status change work
+ * @param netdev network device
+ */
+static inline int setup_link_status_change_wq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	lio->link_status_wq.wq = alloc_workqueue("link-status",
+						 WQ_MEM_RECLAIM, 0);
+	if (!lio->link_status_wq.wq) {
+		dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n");
+		return -1;
+	}
+	INIT_DELAYED_WORK(&lio->link_status_wq.wk.work,
+			  octnet_link_status_change);
+	lio->link_status_wq.wk.ctxptr = lio;
+
+	return 0;
+}
+
+static inline void cleanup_link_status_change_wq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (lio->link_status_wq.wq) {
+		cancel_delayed_work_sync(&lio->link_status_wq.wk.work);
+		destroy_workqueue(lio->link_status_wq.wq);
+	}
+}
+
+/**
  * \brief Update link status
  * @param netdev network device
  * @param ls link status structure
@@ -973,8 +949,6 @@
 	struct lio *lio;
 	struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
 
-	/*octeon_update_iq_read_idx(oct, iq);*/
-
 	netdev = oct->props[iq->ifidx].netdev;
 
 	/* This is needed because the first IQ does not have
@@ -1002,12 +976,32 @@
 	}
 }
 
+static
+int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
+{
+	struct octeon_device *oct = droq->oct_dev;
+	struct octeon_device_priv *oct_priv =
+	    (struct octeon_device_priv *)oct->priv;
+
+	if (droq->ops.poll_mode) {
+		droq->ops.napi_fn(droq);
+	} else {
+		if (ret & MSIX_PO_INT) {
+			tasklet_schedule(&oct_priv->droq_tasklet);
+			return 1;
+		}
+		/* this will be flushed periodically by check iq db */
+		if (ret & MSIX_PI_INT)
+			return 0;
+	}
+	return 0;
+}
+
 /**
  * \brief Droq packet processor sceduler
  * @param oct octeon device
  */
-static
-void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
+static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
 {
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
@@ -1032,19 +1026,36 @@
 	}
 }
 
+static irqreturn_t
+liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
+{
+	u64 ret;
+	struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+	struct octeon_device *oct = ioq_vector->oct_dev;
+	struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+
+	ret = oct->fn_list.msix_interrupt_handler(ioq_vector);
+
+	if ((ret & MSIX_PO_INT) || (ret & MSIX_PI_INT))
+		liquidio_schedule_msix_droq_pkt_handler(droq, ret);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * \brief Interrupt handler for octeon
  * @param irq unused
  * @param dev octeon device
  */
 static
-irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev)
+irqreturn_t liquidio_legacy_intr_handler(int irq __attribute__((unused)),
+					 void *dev)
 {
 	struct octeon_device *oct = (struct octeon_device *)dev;
 	irqreturn_t ret;
 
 	/* Disable our interrupts for the duration of ISR */
-	oct->fn_list.disable_interrupt(oct->chip);
+	oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
 
 	ret = oct->fn_list.process_interrupt_regs(oct);
 
@@ -1053,7 +1064,7 @@
 
 	/* Re-enable our interrupts  */
 	if (!(atomic_read(&oct->status) == OCT_DEV_IN_RESET))
-		oct->fn_list.enable_interrupt(oct->chip);
+		oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
 
 	return ret;
 }
@@ -1067,22 +1078,204 @@
 static int octeon_setup_interrupt(struct octeon_device *oct)
 {
 	int irqret, err;
+	struct msix_entry *msix_entries;
+	int i;
+	int num_ioq_vectors;
+	int num_alloc_ioq_vectors;
 
-	err = pci_enable_msi(oct->pci_dev);
-	if (err)
-		dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n",
-			 err);
-	else
-		oct->flags |= LIO_FLAG_MSI_ENABLED;
+	if (OCTEON_CN23XX_PF(oct) && oct->msix_on) {
+		oct->num_msix_irqs = oct->sriov_info.num_pf_rings;
+		/* one non ioq interrupt for handling sli_mac_pf_int_sum */
+		oct->num_msix_irqs += 1;
 
-	irqret = request_irq(oct->pci_dev->irq, liquidio_intr_handler,
-			     IRQF_SHARED, "octeon", oct);
-	if (irqret) {
-		if (oct->flags & LIO_FLAG_MSI_ENABLED)
-			pci_disable_msi(oct->pci_dev);
-		dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n",
-			irqret);
-		return 1;
+		oct->msix_entries = kcalloc(
+		    oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL);
+		if (!oct->msix_entries)
+			return 1;
+
+		msix_entries = (struct msix_entry *)oct->msix_entries;
+		/*Assumption is that pf msix vectors start from pf srn to pf to
+		 * trs and not from 0. if not change this code
+		 */
+		for (i = 0; i < oct->num_msix_irqs - 1; i++)
+			msix_entries[i].entry = oct->sriov_info.pf_srn + i;
+		msix_entries[oct->num_msix_irqs - 1].entry =
+		    oct->sriov_info.trs;
+		num_alloc_ioq_vectors = pci_enable_msix_range(
+						oct->pci_dev, msix_entries,
+						oct->num_msix_irqs,
+						oct->num_msix_irqs);
+		if (num_alloc_ioq_vectors < 0) {
+			dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
+			kfree(oct->msix_entries);
+			oct->msix_entries = NULL;
+			return 1;
+		}
+		dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
+
+		num_ioq_vectors = oct->num_msix_irqs;
+
+		/** For PF, there is one non-ioq interrupt handler */
+		num_ioq_vectors -= 1;
+		irqret = request_irq(msix_entries[num_ioq_vectors].vector,
+				     liquidio_legacy_intr_handler, 0, "octeon",
+				     oct);
+		if (irqret) {
+			dev_err(&oct->pci_dev->dev,
+				"OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
+				irqret);
+			pci_disable_msix(oct->pci_dev);
+			kfree(oct->msix_entries);
+			oct->msix_entries = NULL;
+			return 1;
+		}
+
+		for (i = 0; i < num_ioq_vectors; i++) {
+			irqret = request_irq(msix_entries[i].vector,
+					     liquidio_msix_intr_handler, 0,
+					     "octeon", &oct->ioq_vector[i]);
+			if (irqret) {
+				dev_err(&oct->pci_dev->dev,
+					"OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
+					irqret);
+				/** Freeing the non-ioq irq vector here . */
+				free_irq(msix_entries[num_ioq_vectors].vector,
+					 oct);
+
+				while (i) {
+					i--;
+					/** clearing affinity mask. */
+					irq_set_affinity_hint(
+						msix_entries[i].vector, NULL);
+					free_irq(msix_entries[i].vector,
+						 &oct->ioq_vector[i]);
+				}
+				pci_disable_msix(oct->pci_dev);
+				kfree(oct->msix_entries);
+				oct->msix_entries = NULL;
+				return 1;
+			}
+			oct->ioq_vector[i].vector = msix_entries[i].vector;
+			/* assign the cpu mask for this msix interrupt vector */
+			irq_set_affinity_hint(
+					msix_entries[i].vector,
+					(&oct->ioq_vector[i].affinity_mask));
+		}
+		dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: MSI-X enabled\n",
+			oct->octeon_id);
+	} else {
+		err = pci_enable_msi(oct->pci_dev);
+		if (err)
+			dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n",
+				 err);
+		else
+			oct->flags |= LIO_FLAG_MSI_ENABLED;
+
+		irqret = request_irq(oct->pci_dev->irq,
+				     liquidio_legacy_intr_handler, IRQF_SHARED,
+				     "octeon", oct);
+		if (irqret) {
+			if (oct->flags & LIO_FLAG_MSI_ENABLED)
+				pci_disable_msi(oct->pci_dev);
+			dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n",
+				irqret);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int liquidio_watchdog(void *param)
+{
+	u64 wdog;
+	u16 mask_of_stuck_cores = 0;
+	u16 mask_of_crashed_cores = 0;
+	int core_num;
+	u8 core_is_stuck[LIO_MAX_CORES];
+	u8 core_crashed[LIO_MAX_CORES];
+	struct octeon_device *oct = param;
+
+	memset(core_is_stuck, 0, sizeof(core_is_stuck));
+	memset(core_crashed, 0, sizeof(core_crashed));
+
+	while (!kthread_should_stop()) {
+		mask_of_crashed_cores =
+		    (u16)octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2);
+
+		for (core_num = 0; core_num < LIO_MAX_CORES; core_num++) {
+			if (!core_is_stuck[core_num]) {
+				wdog = lio_pci_readq(oct, CIU3_WDOG(core_num));
+
+				/* look at watchdog state field */
+				wdog &= CIU3_WDOG_MASK;
+				if (wdog) {
+					/* this watchdog timer has expired */
+					core_is_stuck[core_num] =
+						LIO_MONITOR_WDOG_EXPIRE;
+					mask_of_stuck_cores |= (1 << core_num);
+				}
+			}
+
+			if (!core_crashed[core_num])
+				core_crashed[core_num] =
+				    (mask_of_crashed_cores >> core_num) & 1;
+		}
+
+		if (mask_of_stuck_cores) {
+			for (core_num = 0; core_num < LIO_MAX_CORES;
+			     core_num++) {
+				if (core_is_stuck[core_num] == 1) {
+					dev_err(&oct->pci_dev->dev,
+						"ERROR: Octeon core %d is stuck!\n",
+						core_num);
+					/* 2 means we have printk'd  an error
+					 * so no need to repeat the same printk
+					 */
+					core_is_stuck[core_num] =
+						LIO_MONITOR_CORE_STUCK_MSGD;
+				}
+			}
+		}
+
+		if (mask_of_crashed_cores) {
+			for (core_num = 0; core_num < LIO_MAX_CORES;
+			     core_num++) {
+				if (core_crashed[core_num] == 1) {
+					dev_err(&oct->pci_dev->dev,
+						"ERROR: Octeon core %d crashed!  See oct-fwdump for details.\n",
+						core_num);
+					/* 2 means we have printk'd  an error
+					 * so no need to repeat the same printk
+					 */
+					core_crashed[core_num] =
+						LIO_MONITOR_CORE_STUCK_MSGD;
+				}
+			}
+		}
+#ifdef CONFIG_MODULE_UNLOAD
+		if (mask_of_stuck_cores || mask_of_crashed_cores) {
+			/* make module refcount=0 so that rmmod will work */
+			long refcount;
+
+			refcount = module_refcount(THIS_MODULE);
+
+			while (refcount > 0) {
+				module_put(THIS_MODULE);
+				refcount = module_refcount(THIS_MODULE);
+			}
+
+			/* compensate for and withstand an unlikely (but still
+			 * possible) race condition
+			 */
+			while (refcount < 0) {
+				try_module_get(THIS_MODULE);
+				refcount = module_refcount(THIS_MODULE);
+			}
+		}
+#endif
+		/* sleep for two seconds */
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(2 * HZ);
 	}
 
 	return 0;
@@ -1107,6 +1300,9 @@
 		return -ENOMEM;
 	}
 
+	if (pdev->device == OCTEON_CN23XX_PF_VID)
+		oct_dev->msix_on = LIO_FLAG_MSIX_ENABLED;
+
 	dev_info(&pdev->dev, "Initializing device %x:%x.\n",
 		 (u32)pdev->vendor, (u32)pdev->device);
 
@@ -1130,6 +1326,30 @@
 		return -ENOMEM;
 	}
 
+	if (OCTEON_CN23XX_PF(oct_dev)) {
+		u64 scratch1;
+		u8 bus, device, function;
+
+		scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1);
+		if (!(scratch1 & 4ULL)) {
+			/* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that
+			 * the lio watchdog kernel thread is running for this
+			 * NIC.  Each NIC gets one watchdog kernel thread.
+			 */
+			scratch1 |= 4ULL;
+			octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1,
+					   scratch1);
+
+			bus = pdev->bus->number;
+			device = PCI_SLOT(pdev->devfn);
+			function = PCI_FUNC(pdev->devfn);
+			oct_dev->watchdog_task = kthread_create(
+			    liquidio_watchdog, oct_dev,
+			    "liowd/%02hhx:%02hhx.%hhx", bus, device, function);
+			wake_up_process(oct_dev->watchdog_task);
+		}
+	}
+
 	oct_dev->rx_pause = 1;
 	oct_dev->tx_pause = 1;
 
@@ -1146,6 +1366,7 @@
 static void octeon_destroy_resources(struct octeon_device *oct)
 {
 	int i;
+	struct msix_entry *msix_entries;
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 
@@ -1190,21 +1411,40 @@
 			dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
 
 		/* Disable interrupts  */
-		oct->fn_list.disable_interrupt(oct->chip);
+		oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
 
-		/* Release the interrupt line */
-		free_irq(oct->pci_dev->irq, oct);
+		if (oct->msix_on) {
+			msix_entries = (struct msix_entry *)oct->msix_entries;
+			for (i = 0; i < oct->num_msix_irqs - 1; i++) {
+				/* clear the affinity_cpumask */
+				irq_set_affinity_hint(msix_entries[i].vector,
+						      NULL);
+				free_irq(msix_entries[i].vector,
+					 &oct->ioq_vector[i]);
+			}
+			/* non-iov vector's argument is oct struct */
+			free_irq(msix_entries[i].vector, oct);
 
-		if (oct->flags & LIO_FLAG_MSI_ENABLED)
-			pci_disable_msi(oct->pci_dev);
+			pci_disable_msix(oct->pci_dev);
+			kfree(oct->msix_entries);
+			oct->msix_entries = NULL;
+		} else {
+			/* Release the interrupt line */
+			free_irq(oct->pci_dev->irq, oct);
 
-		/* fallthrough */
+			if (oct->flags & LIO_FLAG_MSI_ENABLED)
+				pci_disable_msi(oct->pci_dev);
+		}
+
+		if (OCTEON_CN23XX_PF(oct))
+			octeon_free_ioq_vector(oct);
+	/* fallthrough */
 	case OCT_DEV_IN_RESET:
 	case OCT_DEV_DROQ_INIT_DONE:
 		/*atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);*/
 		mdelay(100);
 		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-			if (!(oct->io_qmask.oq & (1ULL << i)))
+			if (!(oct->io_qmask.oq & BIT_ULL(i)))
 				continue;
 			octeon_delete_droq(oct, i);
 		}
@@ -1226,16 +1466,15 @@
 		octeon_delete_response_list(oct);
 
 		/* fallthrough */
-	case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
-		octeon_free_sc_buffer_pool(oct);
-
-		/* fallthrough */
 	case OCT_DEV_INSTR_QUEUE_INIT_DONE:
 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-			if (!(oct->io_qmask.iq & (1ULL << i)))
+			if (!(oct->io_qmask.iq & BIT_ULL(i)))
 				continue;
 			octeon_delete_instr_queue(oct, i);
 		}
+		/* fallthrough */
+	case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
+		octeon_free_sc_buffer_pool(oct);
 
 		/* fallthrough */
 	case OCT_DEV_DISPATCH_INIT_DONE:
@@ -1244,9 +1483,9 @@
 
 		/* fallthrough */
 	case OCT_DEV_PCI_MAP_DONE:
-
 		/* Soft reset the octeon device before exiting */
-		oct->fn_list.soft_reset(oct);
+		if ((!OCTEON_CN23XX_PF(oct)) || !oct->octeon_id)
+			oct->fn_list.soft_reset(oct);
 
 		octeon_unmap_pci_barx(oct, 0);
 		octeon_unmap_pci_barx(oct, 1);
@@ -1264,23 +1503,89 @@
 }
 
 /**
+ * \brief Callback for rx ctrl
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void rx_ctl_callback(struct octeon_device *oct,
+			    u32 status,
+			    void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_rx_ctl_context *ctx;
+
+	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+	oct = lio_get_device(ctx->octeon_id);
+	if (status)
+		dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
+			CVM_CAST64(status));
+	WRITE_ONCE(ctx->cond, 1);
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
+/**
  * \brief Send Rx control command
  * @param lio per-network private data
  * @param start_stop whether to start or stop
  */
 static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
 {
-	struct octnic_ctrl_pkt nctrl;
+	struct octeon_soft_command *sc;
+	struct liquidio_rx_ctl_context *ctx;
+	union octnet_cmd *ncmd;
+	int ctx_size = sizeof(struct liquidio_rx_ctl_context);
+	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
+	int retval;
 
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+	if (oct->props[lio->ifidx].rx_on == start_stop)
+		return;
 
-	nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL;
-	nctrl.ncmd.s.param1 = start_stop;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.netpndev = (u64)lio->netdev;
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+					  16, ctx_size);
 
-	if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0)
+	ncmd = (union octnet_cmd *)sc->virtdptr;
+	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+	WRITE_ONCE(ctx->cond, 0);
+	ctx->octeon_id = lio_get_device_id(oct);
+	init_waitqueue_head(&ctx->wc);
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = OCTNET_CMD_RX_CTL;
+	ncmd->s.param1 = start_stop;
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_CMD, 0, 0, 0);
+
+	sc->callback = rx_ctl_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = 5000;
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
 		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+	} else {
+		/* Sleep on a wait queue till the cond flag indicates that the
+		 * response arrived or timed-out.
+		 */
+		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+			return;
+		oct->props[lio->ifidx].rx_on = start_stop;
+	}
+
+	octeon_free_soft_command(oct, sc);
 }
 
 /**
@@ -1307,21 +1612,24 @@
 
 	dev_dbg(&oct->pci_dev->dev, "NIC device cleanup\n");
 
-	send_rx_ctrl_cmd(lio, 0);
-
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
-		txqs_stop(netdev);
+		liquidio_stop(netdev);
 
 	if (oct->props[lio->ifidx].napi_enabled == 1) {
 		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
 			napi_disable(napi);
 
 		oct->props[lio->ifidx].napi_enabled = 0;
+
+		if (OCTEON_CN23XX_PF(oct))
+			oct->droq[0]->ops.poll_mode = 0;
 	}
 
 	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
 		unregister_netdev(netdev);
 
+	cleanup_link_status_change_wq(netdev);
+
 	delete_glists(lio);
 
 	free_netdev(netdev);
@@ -1374,6 +1682,9 @@
 
 	dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n");
 
+	if (oct_dev->watchdog_task)
+		kthread_stop(oct_dev->watchdog_task);
+
 	if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP))
 		liquidio_stop_nic_module(oct_dev);
 
@@ -1417,6 +1728,12 @@
 		s = "CN66XX";
 		break;
 
+	case OCTEON_CN23XX_PCIID_PF:
+		oct->chip_id = OCTEON_CN23XX_PF_VID;
+		ret = setup_cn23xx_octeon_pf_device(oct);
+		s = "CN23XX";
+		break;
+
 	default:
 		s = "?";
 		dev_err(&oct->pci_dev->dev, "Unknown device found (dev_id: %x)\n",
@@ -1867,7 +2184,7 @@
 	struct liquidio_if_cfg_context *ctx;
 
 	resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
-	ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+	ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
 
 	oct = lio_get_device(ctx->octeon_id);
 	if (resp->status)
@@ -2060,11 +2377,14 @@
  */
 static void liquidio_napi_drv_callback(void *arg)
 {
+	struct octeon_device *oct;
 	struct octeon_droq *droq = arg;
 	int this_cpu = smp_processor_id();
 
-	if (droq->cpu_id == this_cpu) {
-		napi_schedule(&droq->napi);
+	oct = droq->oct_dev;
+
+	if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) {
+		napi_schedule_irqoff(&droq->napi);
 	} else {
 		struct call_single_data *csd = &droq->csd;
 
@@ -2173,17 +2493,15 @@
 						   lio->ifidx), NULL);
 		if (retval) {
 			dev_err(&octeon_dev->pci_dev->dev,
-				" %s : Runtime DROQ(RxQ) creation failed.\n",
+				"%s : Runtime DROQ(RxQ) creation failed.\n",
 				__func__);
 			return 1;
 		}
 
 		droq = octeon_dev->droq[q_no];
 		napi = &droq->napi;
-		dev_dbg(&octeon_dev->pci_dev->dev,
-			"netif_napi_add netdev:%llx oct:%llx\n",
-			(u64)netdev,
-			(u64)octeon_dev);
+		dev_dbg(&octeon_dev->pci_dev->dev, "netif_napi_add netdev:%llx oct:%llx pf_num:%d\n",
+			(u64)netdev, (u64)octeon_dev, octeon_dev->pf_num);
 		netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
 
 		/* designate a CPU for this droq */
@@ -2195,6 +2513,14 @@
 		octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
 	}
 
+	if (OCTEON_CN23XX_PF(octeon_dev)) {
+		/* 23XX PF can receive control messages (via the first PF-owned
+		 * droq) from the firmware even if the ethX interface is down,
+		 * so that's why poll_mode must be off for the first droq.
+		 */
+		octeon_dev->droq[0]->ops.poll_mode = 0;
+	}
+
 	/* set up IQs. */
 	for (q = 0; q < lio->linfo.num_txpciq; q++) {
 		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf
@@ -2235,7 +2561,7 @@
  * \brief Sets up the txq poll check
  * @param netdev network device
  */
-static inline void setup_tx_poll_fn(struct net_device *netdev)
+static inline int setup_tx_poll_fn(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
@@ -2244,21 +2570,24 @@
 						WQ_MEM_RECLAIM, 0);
 	if (!lio->txq_status_wq.wq) {
 		dev_err(&oct->pci_dev->dev, "unable to create cavium txq status wq\n");
-		return;
+		return -1;
 	}
 	INIT_DELAYED_WORK(&lio->txq_status_wq.wk.work,
 			  octnet_poll_check_txq_status);
 	lio->txq_status_wq.wk.ctxptr = lio;
 	queue_delayed_work(lio->txq_status_wq.wq,
 			   &lio->txq_status_wq.wk.work, msecs_to_jiffies(1));
+	return 0;
 }
 
 static inline void cleanup_tx_poll_fn(struct net_device *netdev)
 {
 	struct lio *lio = GET_LIO(netdev);
 
-	cancel_delayed_work_sync(&lio->txq_status_wq.wk.work);
-	destroy_workqueue(lio->txq_status_wq.wq);
+	if (lio->txq_status_wq.wq) {
+		cancel_delayed_work_sync(&lio->txq_status_wq.wk.work);
+		destroy_workqueue(lio->txq_status_wq.wq);
+	}
 }
 
 /**
@@ -2276,24 +2605,34 @@
 			napi_enable(napi);
 
 		oct->props[lio->ifidx].napi_enabled = 1;
+
+		if (OCTEON_CN23XX_PF(oct))
+			oct->droq[0]->ops.poll_mode = 1;
 	}
 
 	oct_ptp_open(netdev);
 
 	ifstate_set(lio, LIO_IFSTATE_RUNNING);
 
-	setup_tx_poll_fn(netdev);
-
-	start_txq(netdev);
+	/* Ready for link status updates */
+	lio->intf_open = 1;
 
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 
+	if (OCTEON_CN23XX_PF(oct)) {
+		if (!oct->msix_on)
+			if (setup_tx_poll_fn(netdev))
+				return -1;
+	} else {
+		if (setup_tx_poll_fn(netdev))
+			return -1;
+	}
+
+	start_txq(netdev);
+
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
 
-	/* Ready for link status updates */
-	lio->intf_open = 1;
-
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n",
 		 netdev->name);
 
@@ -2328,7 +2667,12 @@
 	/* Now it should be safe to tell Octeon that nic interface is down. */
 	send_rx_ctrl_cmd(lio, 0);
 
-	cleanup_tx_poll_fn(netdev);
+	if (OCTEON_CN23XX_PF(oct)) {
+		if (!oct->msix_on)
+			cleanup_tx_poll_fn(netdev);
+	} else {
+		cleanup_tx_poll_fn(netdev);
+	}
 
 	if (lio->ptp_clock) {
 		ptp_clock_unregister(lio->ptp_clock);
@@ -2340,143 +2684,6 @@
 	return 0;
 }
 
-void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
-{
-	struct octnic_ctrl_pkt *nctrl = (struct octnic_ctrl_pkt *)nctrl_ptr;
-	struct net_device *netdev = (struct net_device *)nctrl->netpndev;
-	struct lio *lio = GET_LIO(netdev);
-	struct octeon_device *oct = lio->oct_dev;
-	u8 *mac;
-
-	switch (nctrl->ncmd.s.cmd) {
-	case OCTNET_CMD_CHANGE_DEVFLAGS:
-	case OCTNET_CMD_SET_MULTI_LIST:
-		break;
-
-	case OCTNET_CMD_CHANGE_MACADDR:
-		mac = ((u8 *)&nctrl->udd[0]) + 2;
-		netif_info(lio, probe, lio->netdev,
-			   "%s %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n",
-			   "MACAddr changed to", mac[0], mac[1],
-			   mac[2], mac[3], mac[4], mac[5]);
-		break;
-
-	case OCTNET_CMD_CHANGE_MTU:
-		/* If command is successful, change the MTU. */
-		netif_info(lio, probe, lio->netdev, " MTU Changed from %d to %d\n",
-			   netdev->mtu, nctrl->ncmd.s.param1);
-		dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
-			 netdev->name, netdev->mtu,
-			 nctrl->ncmd.s.param1);
-		rtnl_lock();
-		netdev->mtu = nctrl->ncmd.s.param1;
-		call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev);
-		rtnl_unlock();
-		break;
-
-	case OCTNET_CMD_GPIO_ACCESS:
-		netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n");
-
-		break;
-
-	case OCTNET_CMD_LRO_ENABLE:
-		dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name);
-		break;
-
-	case OCTNET_CMD_LRO_DISABLE:
-		dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n",
-			 netdev->name);
-		break;
-
-	case OCTNET_CMD_VERBOSE_ENABLE:
-		dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name);
-		break;
-
-	case OCTNET_CMD_VERBOSE_DISABLE:
-		dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n",
-			 netdev->name);
-		break;
-
-	case OCTNET_CMD_ENABLE_VLAN_FILTER:
-		dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n",
-			 netdev->name);
-		break;
-
-	case OCTNET_CMD_ADD_VLAN_FILTER:
-		dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n",
-			 netdev->name, nctrl->ncmd.s.param1);
-		break;
-
-	case OCTNET_CMD_DEL_VLAN_FILTER:
-		dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n",
-			 netdev->name, nctrl->ncmd.s.param1);
-		break;
-
-	case OCTNET_CMD_SET_SETTINGS:
-		dev_info(&oct->pci_dev->dev, "%s settings changed\n",
-			 netdev->name);
-
-		break;
-		/* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL"
-		 * Command passed by NIC driver
-		 */
-	case OCTNET_CMD_TNL_RX_CSUM_CTL:
-		if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) {
-			netif_info(lio, probe, lio->netdev,
-				   "%s RX Checksum Offload Enabled\n",
-				   netdev->name);
-		} else if (nctrl->ncmd.s.param1 ==
-			   OCTNET_CMD_RXCSUM_DISABLE) {
-			netif_info(lio, probe, lio->netdev,
-				   "%s RX Checksum Offload Disabled\n",
-				   netdev->name);
-		}
-		break;
-
-		/* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL"
-		 * Command passed by NIC driver
-		 */
-	case OCTNET_CMD_TNL_TX_CSUM_CTL:
-		if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) {
-			netif_info(lio, probe, lio->netdev,
-				   "%s TX Checksum Offload Enabled\n",
-				   netdev->name);
-		} else if (nctrl->ncmd.s.param1 ==
-			   OCTNET_CMD_TXCSUM_DISABLE) {
-			netif_info(lio, probe, lio->netdev,
-				   "%s TX Checksum Offload Disabled\n",
-				   netdev->name);
-		}
-		break;
-
-		/* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG"
-		 * Command passed by NIC driver
-		 */
-	case OCTNET_CMD_VXLAN_PORT_CONFIG:
-		if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) {
-			netif_info(lio, probe, lio->netdev,
-				   "%s VxLAN Destination UDP PORT:%d ADDED\n",
-				   netdev->name,
-				   nctrl->ncmd.s.param1);
-		} else if (nctrl->ncmd.s.more ==
-			   OCTNET_CMD_VXLAN_PORT_DEL) {
-			netif_info(lio, probe, lio->netdev,
-				   "%s VxLAN Destination UDP PORT:%d DELETED\n",
-				   netdev->name,
-				   nctrl->ncmd.s.param1);
-		}
-		break;
-
-	case OCTNET_CMD_SET_FLOW_CTL:
-		netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n");
-		break;
-
-	default:
-		dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__,
-			nctrl->ncmd.s.cmd);
-	}
-}
-
 /**
  * \brief Converts a mask based on net device flags
  * @param netdev network device
@@ -2817,8 +3024,7 @@
  */
 static inline int send_nic_timestamp_pkt(struct octeon_device *oct,
 					 struct octnic_data_pkt *ndata,
-					 struct octnet_buf_free_info *finfo,
-					 int xmit_more)
+					 struct octnet_buf_free_info *finfo)
 {
 	int retval;
 	struct octeon_soft_command *sc;
@@ -2846,9 +3052,15 @@
 	sc->callback_arg = finfo->skb;
 	sc->iq_no = ndata->q_no;
 
-	len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz;
+	if (OCTEON_CN23XX_PF(oct))
+		len = (u32)((struct octeon_instr_ih3 *)
+			    (&sc->cmd.cmd3.ih3))->dlengsz;
+	else
+		len = (u32)((struct octeon_instr_ih2 *)
+			    (&sc->cmd.cmd2.ih2))->dlengsz;
 
-	ring_doorbell = !xmit_more;
+	ring_doorbell = 1;
+
 	retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
 				     sc, len, ndata->reqtype);
 
@@ -2881,7 +3093,7 @@
 	union tx_info *tx_info;
 	int status = 0;
 	int q_idx = 0, iq_no = 0;
-	int xmit_more, j;
+	int j;
 	u64 dptr = 0;
 	u32 tag = 0;
 
@@ -2980,7 +3192,10 @@
 			return NETDEV_TX_BUSY;
 		}
 
-		ndata.cmd.cmd2.dptr = dptr;
+		if (OCTEON_CN23XX_PF(oct))
+			ndata.cmd.cmd3.dptr = dptr;
+		else
+			ndata.cmd.cmd2.dptr = dptr;
 		finfo->dptr = dptr;
 		ndata.reqtype = REQTYPE_NORESP_NET;
 
@@ -3055,15 +3270,23 @@
 					   g->sg_size, DMA_TO_DEVICE);
 		dptr = g->sg_dma_ptr;
 
-		ndata.cmd.cmd2.dptr = dptr;
+		if (OCTEON_CN23XX_PF(oct))
+			ndata.cmd.cmd3.dptr = dptr;
+		else
+			ndata.cmd.cmd2.dptr = dptr;
 		finfo->dptr = dptr;
 		finfo->g = g;
 
 		ndata.reqtype = REQTYPE_NORESP_NET_SG;
 	}
 
-	irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
-	tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
+	if (OCTEON_CN23XX_PF(oct)) {
+		irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh;
+		tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0];
+	} else {
+		irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh;
+		tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0];
+	}
 
 	if (skb_shinfo(skb)->gso_size) {
 		tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
@@ -3077,12 +3300,10 @@
 		irh->vlan = skb_vlan_tag_get(skb) & 0xfff;
 	}
 
-	xmit_more = skb->xmit_more;
-
 	if (unlikely(cmdsetup.s.timestamp))
-		status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more);
+		status = send_nic_timestamp_pkt(oct, &ndata, finfo);
 	else
-		status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more);
+		status = octnet_send_nic_data_pkt(oct, &ndata);
 	if (status == IQ_SEND_FAILED)
 		goto lio_xmit_failed;
 
@@ -3190,8 +3411,8 @@
  *                              OCTNET_CMD_RXCSUM_DISABLE
  * @returns                     SUCCESS or FAILURE
  */
-int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
-				u8 rx_cmd)
+static int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
+				       u8 rx_cmd)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
@@ -3249,31 +3470,6 @@
 	return ret;
 }
 
-int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1)
-{
-	struct lio *lio = GET_LIO(netdev);
-	struct octeon_device *oct = lio->oct_dev;
-	struct octnic_ctrl_pkt nctrl;
-	int ret = 0;
-
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = cmd;
-	nctrl.ncmd.s.param1 = param1;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 100;
-	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n",
-			ret);
-	}
-	return ret;
-}
-
 /** \brief Net device fix features
  * @param netdev  pointer to network device
  * @param request features requested
@@ -3492,8 +3688,9 @@
 	union oct_nic_if_cfg if_cfg;
 	unsigned int base_queue;
 	unsigned int gmx_port_id;
-	u32 resp_size, ctx_size;
+	u32 resp_size, ctx_size, data_size;
 	u32 ifidx_or_pfnum;
+	struct lio_version *vdata;
 
 	/* This is to handle link status changes */
 	octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC,
@@ -3515,21 +3712,37 @@
 	for (i = 0; i < octeon_dev->ifcount; i++) {
 		resp_size = sizeof(struct liquidio_if_cfg_resp);
 		ctx_size = sizeof(struct liquidio_if_cfg_context);
+		data_size = sizeof(struct lio_version);
 		sc = (struct octeon_soft_command *)
-			octeon_alloc_soft_command(octeon_dev, 0,
+			octeon_alloc_soft_command(octeon_dev, data_size,
 						  resp_size, ctx_size);
 		resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
 		ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+		vdata = (struct lio_version *)sc->virtdptr;
 
-		num_iqueues =
-			CFG_GET_NUM_TXQS_NIC_IF(octeon_get_conf(octeon_dev), i);
-		num_oqueues =
-			CFG_GET_NUM_RXQS_NIC_IF(octeon_get_conf(octeon_dev), i);
-		base_queue =
-			CFG_GET_BASE_QUE_NIC_IF(octeon_get_conf(octeon_dev), i);
-		gmx_port_id =
-			CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i);
-		ifidx_or_pfnum = i;
+		*((u64 *)vdata) = 0;
+		vdata->major = cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION);
+		vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION);
+		vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
+
+		if (OCTEON_CN23XX_PF(octeon_dev)) {
+			num_iqueues = octeon_dev->sriov_info.num_pf_rings;
+			num_oqueues = octeon_dev->sriov_info.num_pf_rings;
+			base_queue = octeon_dev->sriov_info.pf_srn;
+
+			gmx_port_id = octeon_dev->pf_num;
+			ifidx_or_pfnum = octeon_dev->pf_num;
+		} else {
+			num_iqueues = CFG_GET_NUM_TXQS_NIC_IF(
+						octeon_get_conf(octeon_dev), i);
+			num_oqueues = CFG_GET_NUM_RXQS_NIC_IF(
+						octeon_get_conf(octeon_dev), i);
+			base_queue = CFG_GET_BASE_QUE_NIC_IF(
+						octeon_get_conf(octeon_dev), i);
+			gmx_port_id = CFG_GET_GMXID_NIC_IF(
+						octeon_get_conf(octeon_dev), i);
+			ifidx_or_pfnum = i;
+		}
 
 		dev_dbg(&octeon_dev->pci_dev->dev,
 			"requesting config for interface %d, iqs %d, oqs %d\n",
@@ -3566,7 +3779,11 @@
 		/* Sleep on a wait queue till the cond flag indicates that the
 		 * response arrived or timed-out.
 		 */
-		sleep_cond(&ctx->wc, &ctx->cond);
+		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+			dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n");
+			goto setup_nic_wait_intr;
+		}
+
 		retval = resp->status;
 		if (retval) {
 			dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n");
@@ -3633,12 +3850,16 @@
 
 		lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 
-		lio->dev_capability = NETIF_F_HIGHDMA
-				| NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
-				| NETIF_F_SG | NETIF_F_RXCSUM
-				| NETIF_F_GRO
-				| NETIF_F_TSO | NETIF_F_TSO6
-				| NETIF_F_LRO;
+		if (OCTEON_CN23XX_PF(octeon_dev) ||
+		    OCTEON_CN6XXX(octeon_dev)) {
+			lio->dev_capability = NETIF_F_HIGHDMA
+					      | NETIF_F_IP_CSUM
+					      | NETIF_F_IPV6_CSUM
+					      | NETIF_F_SG | NETIF_F_RXCSUM
+					      | NETIF_F_GRO
+					      | NETIF_F_TSO | NETIF_F_TSO6
+					      | NETIF_F_LRO;
+		}
 		netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE);
 
 		/*  Copy of transmit encapsulation capabilities:
@@ -3713,7 +3934,10 @@
 
 		/* Register ethtool support */
 		liquidio_set_ethtool_ops(netdev);
-		octeon_dev->priv_flags = 0x0;
+		if (lio->oct_dev->chip_id == OCTEON_CN23XX_PF_VID)
+			octeon_dev->priv_flags = OCT_PRIV_FLAG_DEFAULT;
+		else
+			octeon_dev->priv_flags = 0x0;
 
 		if (netdev->features & NETIF_F_LRO)
 			liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
@@ -3725,6 +3949,9 @@
 			liquidio_set_feature(netdev,
 					     OCTNET_CMD_VERBOSE_ENABLE, 0);
 
+		if (setup_link_status_change_wq(netdev))
+			goto setup_nic_dev_fail;
+
 		/* Register the network device with the OS */
 		if (register_netdev(netdev)) {
 			dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
@@ -3760,6 +3987,8 @@
 
 	octeon_free_soft_command(octeon_dev, sc);
 
+setup_nic_wait_intr:
+
 	while (i--) {
 		dev_err(&octeon_dev->pci_dev->dev,
 			"NIC ifidx:%d Setup failed\n", i);
@@ -3789,8 +4018,7 @@
 	/* run port_config command for each port */
 	oct->ifcount = num_nic_ports;
 
-	memset(oct->props, 0,
-	       sizeof(struct octdev_props) * num_nic_ports);
+	memset(oct->props, 0, sizeof(struct octdev_props) * num_nic_ports);
 
 	for (i = 0; i < MAX_OCTEON_LINKS; i++)
 		oct->props[i].gmxport = -1;
@@ -3806,7 +4034,7 @@
 	/* Initialize interrupt moderation params */
 	intrmod_cfg = &((struct octeon_device *)oct)->intrmod;
 	intrmod_cfg->rx_enable = 1;
-	intrmod_cfg->check_intrvl =   LIO_INTRMOD_CHECK_INTERVAL;
+	intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL;
 	intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR;
 	intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR;
 	intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER;
@@ -3818,6 +4046,7 @@
 	intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER;
 	intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
 	intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
+	intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct));
 	dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n");
 
 	return retval;
@@ -3880,6 +4109,7 @@
 static int octeon_device_init(struct octeon_device *octeon_dev)
 {
 	int j, ret;
+	int fw_loaded = 0;
 	char bootcmd[] = "\n";
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)octeon_dev->priv;
@@ -3901,9 +4131,23 @@
 
 	octeon_dev->app_mode = CVM_DRV_INVALID_APP;
 
-	/* Do a soft reset of the Octeon device. */
-	if (octeon_dev->fn_list.soft_reset(octeon_dev))
+	if (OCTEON_CN23XX_PF(octeon_dev)) {
+		if (!cn23xx_fw_loaded(octeon_dev)) {
+			fw_loaded = 0;
+			/* Do a soft reset of the Octeon device. */
+			if (octeon_dev->fn_list.soft_reset(octeon_dev))
+				return 1;
+			/* things might have changed */
+			if (!cn23xx_fw_loaded(octeon_dev))
+				fw_loaded = 0;
+			else
+				fw_loaded = 1;
+		} else {
+			fw_loaded = 1;
+		}
+	} else if (octeon_dev->fn_list.soft_reset(octeon_dev)) {
 		return 1;
+	}
 
 	/* Initialize the dispatch mechanism used to push packets arriving on
 	 * Octeon Output queues.
@@ -3925,6 +4169,22 @@
 
 	octeon_set_io_queues_off(octeon_dev);
 
+	if (OCTEON_CN23XX_PF(octeon_dev)) {
+		ret = octeon_dev->fn_list.setup_device_regs(octeon_dev);
+		if (ret) {
+			dev_err(&octeon_dev->pci_dev->dev, "OCTEON: Failed to configure device registers\n");
+			return ret;
+		}
+	}
+
+	/* Initialize soft command buffer pool
+	 */
+	if (octeon_setup_sc_buffer_pool(octeon_dev)) {
+		dev_err(&octeon_dev->pci_dev->dev, "sc buffer pool allocation failed\n");
+		return 1;
+	}
+	atomic_set(&octeon_dev->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE);
+
 	/*  Setup the data structures that manage this Octeon's Input queues. */
 	if (octeon_setup_instr_queues(octeon_dev)) {
 		dev_err(&octeon_dev->pci_dev->dev,
@@ -3936,14 +4196,6 @@
 	}
 	atomic_set(&octeon_dev->status, OCT_DEV_INSTR_QUEUE_INIT_DONE);
 
-	/* Initialize soft command buffer pool
-	 */
-	if (octeon_setup_sc_buffer_pool(octeon_dev)) {
-		dev_err(&octeon_dev->pci_dev->dev, "sc buffer pool allocation failed\n");
-		return 1;
-	}
-	atomic_set(&octeon_dev->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE);
-
 	/* Initialize lists to manage the requests of different types that
 	 * arrive from user & kernel applications for this octeon device.
 	 */
@@ -3963,15 +4215,23 @@
 
 	atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE);
 
-	/* The input and output queue registers were setup earlier (the queues
-	 * were not enabled). Any additional registers that need to be
-	 * programmed should be done now.
-	 */
-	ret = octeon_dev->fn_list.setup_device_regs(octeon_dev);
-	if (ret) {
-		dev_err(&octeon_dev->pci_dev->dev,
-			"Failed to configure device registers\n");
-		return ret;
+	if (OCTEON_CN23XX_PF(octeon_dev)) {
+		if (octeon_allocate_ioq_vector(octeon_dev)) {
+			dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
+			return 1;
+		}
+
+	} else {
+		/* The input and output queue registers were setup earlier (the
+		 * queues were not enabled). Any additional registers
+		 * that need to be programmed should be done now.
+		 */
+		ret = octeon_dev->fn_list.setup_device_regs(octeon_dev);
+		if (ret) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"Failed to configure device registers\n");
+			return ret;
+		}
 	}
 
 	/* Initialize the tasklet that handles output queue packet processing.*/
@@ -3985,63 +4245,76 @@
 		return 1;
 
 	/* Enable Octeon device interrupts */
-	octeon_dev->fn_list.enable_interrupt(octeon_dev->chip);
+	octeon_dev->fn_list.enable_interrupt(octeon_dev, OCTEON_ALL_INTR);
 
 	/* Enable the input and output queues for this Octeon device */
-	octeon_dev->fn_list.enable_io_queues(octeon_dev);
+	ret = octeon_dev->fn_list.enable_io_queues(octeon_dev);
+	if (ret) {
+		dev_err(&octeon_dev->pci_dev->dev, "Failed to enable input/output queues");
+		return ret;
+	}
 
 	atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE);
 
-	dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n");
+	if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) {
+		dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n");
+		if (!ddr_timeout) {
+			dev_info(&octeon_dev->pci_dev->dev,
+				 "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n");
+		}
 
-	if (ddr_timeout == 0)
-		dev_info(&octeon_dev->pci_dev->dev, "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n");
+		schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS);
 
-	schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS);
-
-	/* Wait for the octeon to initialize DDR after the soft-reset. */
-	while (ddr_timeout == 0) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (schedule_timeout(HZ / 10)) {
-			/* user probably pressed Control-C */
+		/* Wait for the octeon to initialize DDR after the soft-reset.*/
+		while (!ddr_timeout) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (schedule_timeout(HZ / 10)) {
+				/* user probably pressed Control-C */
+				return 1;
+			}
+		}
+		ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout);
+		if (ret) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"DDR not initialized. Please confirm that board is configured to boot from Flash, ret: %d\n",
+				ret);
 			return 1;
 		}
-	}
-	ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout);
-	if (ret) {
-		dev_err(&octeon_dev->pci_dev->dev,
-			"DDR not initialized. Please confirm that board is configured to boot from Flash, ret: %d\n",
-			ret);
-		return 1;
-	}
 
-	if (octeon_wait_for_bootloader(octeon_dev, 1000) != 0) {
-		dev_err(&octeon_dev->pci_dev->dev, "Board not responding\n");
-		return 1;
-	}
+		if (octeon_wait_for_bootloader(octeon_dev, 1000)) {
+			dev_err(&octeon_dev->pci_dev->dev, "Board not responding\n");
+			return 1;
+		}
 
-	/* Divert uboot to take commands from host instead. */
-	ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50);
+		/* Divert uboot to take commands from host instead. */
+		ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50);
 
-	dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n");
-	ret = octeon_init_consoles(octeon_dev);
-	if (ret) {
-		dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n");
-		return 1;
-	}
-	ret = octeon_add_console(octeon_dev, 0);
-	if (ret) {
-		dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n");
-		return 1;
-	}
+		dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n");
+		ret = octeon_init_consoles(octeon_dev);
+		if (ret) {
+			dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n");
+			return 1;
+		}
+		ret = octeon_add_console(octeon_dev, 0);
+		if (ret) {
+			dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n");
+			return 1;
+		}
 
-	atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE);
+		atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE);
 
-	dev_dbg(&octeon_dev->pci_dev->dev, "Loading firmware\n");
-	ret = load_firmware(octeon_dev);
-	if (ret) {
-		dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n");
-		return 1;
+		dev_dbg(&octeon_dev->pci_dev->dev, "Loading firmware\n");
+		ret = load_firmware(octeon_dev);
+		if (ret) {
+			dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n");
+			return 1;
+		}
+		/* set bit 1 of SLI_SCRATCH_1 to indicate that firmware is
+		 * loaded
+		 */
+		if (OCTEON_CN23XX_PF(octeon_dev))
+			octeon_write_csr64(octeon_dev, CN23XX_SLI_SCRATCH1,
+					   2ULL);
 	}
 
 	handshake[octeon_dev->octeon_id].init_ok = 1;
@@ -4057,7 +4330,6 @@
 		       octeon_dev->droq[j]->pkts_credit_reg);
 
 	/* Packets can start arriving on the output queues from this point. */
-
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 199a8b9..0d990ac 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -30,10 +30,24 @@
 
 #include "octeon_config.h"
 
-#define LIQUIDIO_BASE_VERSION   "1.4"
-#define LIQUIDIO_MICRO_VERSION  ".1"
 #define LIQUIDIO_PACKAGE ""
-#define LIQUIDIO_VERSION  "1.4.1"
+#define LIQUIDIO_BASE_MAJOR_VERSION 1
+#define LIQUIDIO_BASE_MINOR_VERSION 4
+#define LIQUIDIO_BASE_MICRO_VERSION 1
+#define LIQUIDIO_BASE_VERSION   __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
+				__stringify(LIQUIDIO_BASE_MINOR_VERSION)
+#define LIQUIDIO_MICRO_VERSION  "." __stringify(LIQUIDIO_BASE_MICRO_VERSION)
+#define LIQUIDIO_VERSION        LIQUIDIO_PACKAGE \
+				__stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \
+				__stringify(LIQUIDIO_BASE_MINOR_VERSION) \
+				"." __stringify(LIQUIDIO_BASE_MICRO_VERSION)
+
+struct lio_version {
+	u16  major;
+	u16  minor;
+	u16  micro;
+	u16  reserved;
+};
 
 #define CONTROL_IQ 0
 /** Tag types used by Octeon cores in its work. */
@@ -218,6 +232,9 @@
 #define   OCTNET_CMD_ADD_VLAN_FILTER  0x17
 #define   OCTNET_CMD_DEL_VLAN_FILTER  0x18
 #define   OCTNET_CMD_VXLAN_PORT_CONFIG 0x19
+
+#define   OCTNET_CMD_ID_ACTIVE         0x1a
+
 #define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
 #define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
 #define   OCTNET_CMD_RXCSUM_ENABLE     0x0
@@ -296,6 +313,13 @@
 
 #define   OCTNET_CMD_SIZE     (sizeof(union octnet_cmd))
 
+/*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+#define LIO_SOFTCMDRESP_IH2       40
+#define LIO_SOFTCMDRESP_IH3       (40 + 8)
+
+#define LIO_PCICMD_O2             24
+#define LIO_PCICMD_O3             (24 + 8)
+
 /* Instruction Header(DPI) - for OCTEON-III models */
 struct  octeon_instr_ih3 {
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -814,6 +838,8 @@
 #define VITESSE_PHY_GPIO_DRIVEOFF 0x4
 #define VITESSE_PHY_GPIO_HIGH     0x2
 #define VITESSE_PHY_GPIO_LOW      0x3
+#define LED_IDENTIFICATION_ON     0x1
+#define LED_IDENTIFICATION_OFF    0x0
 
 struct oct_mdio_cmd {
 	u64 op;
@@ -832,7 +858,7 @@
 /* intrmod: max. packets to trigger interrupt */
 #define LIO_INTRMOD_RXMAXCNT_TRIGGER	384
 /* intrmod: min. packets to trigger interrupt */
-#define LIO_INTRMOD_RXMINCNT_TRIGGER	1
+#define LIO_INTRMOD_RXMINCNT_TRIGGER	0
 /* intrmod: max. time to trigger interrupt */
 #define LIO_INTRMOD_RXMAXTMR_TRIGGER	128
 /* 66xx:intrmod: min. time to trigger interrupt
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index b3396e3..c765568 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -64,6 +64,34 @@
 #define   DEFAULT_NUM_NIC_PORTS_68XX   4
 #define   DEFAULT_NUM_NIC_PORTS_68XX_210NV  2
 
+/* CN23xx  IQ configuration macros */
+#define   CN23XX_MAX_RINGS_PER_PF_PASS_1_0 12
+#define   CN23XX_MAX_RINGS_PER_PF_PASS_1_1 32
+#define   CN23XX_MAX_RINGS_PER_PF          64
+
+#define   CN23XX_MAX_INPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
+#define   CN23XX_MAX_IQ_DESCRIPTORS	2048
+#define   CN23XX_DB_MIN                 1
+#define   CN23XX_DB_MAX                 8
+#define   CN23XX_DB_TIMEOUT             1
+
+#define   CN23XX_MAX_OUTPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
+#define   CN23XX_MAX_OQ_DESCRIPTORS	2048
+#define   CN23XX_OQ_BUF_SIZE		1536
+#define   CN23XX_OQ_PKTSPER_INTR	128
+/*#define CAVIUM_ONLY_CN23XX_RX_PERF*/
+#define   CN23XX_OQ_REFIL_THRESHOLD	128
+
+#define   CN23XX_OQ_INTR_PKT		64
+#define   CN23XX_OQ_INTR_TIME		100
+#define   DEFAULT_NUM_NIC_PORTS_23XX	1
+
+#define   CN23XX_CFG_IO_QUEUES		CN23XX_MAX_RINGS_PER_PF
+/* PEMs count */
+#define   CN23XX_MAX_MACS		4
+
+#define   CN23XX_DEF_IQ_INTR_THRESHOLD	32
+#define   CN23XX_DEF_IQ_INTR_BYTE_THRESHOLD   (64 * 1024)
 /* common OCTEON configuration macros */
 #define   CN6XXX_CFG_IO_QUEUES         32
 #define   OCTEON_32BYTE_INSTR          32
@@ -92,6 +120,9 @@
 #define CFG_GET_IQ_DB_MIN(cfg)                   ((cfg)->iq.db_min)
 #define CFG_GET_IQ_DB_TIMEOUT(cfg)               ((cfg)->iq.db_timeout)
 
+#define CFG_GET_IQ_INTR_PKT(cfg)                 ((cfg)->iq.iq_intr_pkt)
+#define CFG_SET_IQ_INTR_PKT(cfg, val)            (cfg)->iq.iq_intr_pkt = val
+
 #define CFG_GET_OQ_MAX_Q(cfg)                    ((cfg)->oq.max_oqs)
 #define CFG_GET_OQ_INFO_PTR(cfg)                 ((cfg)->oq.info_ptr)
 #define CFG_GET_OQ_PKTS_PER_INTR(cfg)            ((cfg)->oq.pkts_per_intr)
@@ -140,19 +171,24 @@
 enum lio_card_type {
 	LIO_210SV = 0, /* Two port, 66xx */
 	LIO_210NV,     /* Two port, 68xx */
-	LIO_410NV      /* Four port, 68xx */
+	LIO_410NV,     /* Four port, 68xx */
+	LIO_23XX       /* 23xx */
 };
 
 #define LIO_210SV_NAME "210sv"
 #define LIO_210NV_NAME "210nv"
 #define LIO_410NV_NAME "410nv"
+#define LIO_23XX_NAME  "23xx"
 
 /** Structure to define the configuration attributes for each Input queue.
  *  Applicable to all Octeon processors
  **/
 struct octeon_iq_config {
 #ifdef __BIG_ENDIAN_BITFIELD
-	u64 reserved:32;
+	u64 reserved:16;
+
+	/** Tx interrupt packets. Applicable to 23xx only */
+	u64 iq_intr_pkt:16;
 
 	/** Minimum ticks to wait before checking for pending instructions. */
 	u64 db_timeout:16;
@@ -192,7 +228,10 @@
 	/** Minimum ticks to wait before checking for pending instructions. */
 	u64 db_timeout:16;
 
-	u64 reserved:32;
+	/** Tx interrupt packets. Applicable to 23xx only */
+	u64 iq_intr_pkt:16;
+
+	u64 reserved:16;
 #endif
 };
 
@@ -416,11 +455,15 @@
 #define DISPATCH_LIST_SIZE                      BIT(OPCODE_MASK_BITS)
 
 /* Maximum number of Octeon Instruction (command) queues */
-#define MAX_OCTEON_INSTR_QUEUES(oct)         CN6XXX_MAX_INPUT_QUEUES
-/* Maximum number of Octeon Output queues */
-#define MAX_OCTEON_OUTPUT_QUEUES(oct)         CN6XXX_MAX_OUTPUT_QUEUES
+#define MAX_OCTEON_INSTR_QUEUES(oct)		\
+		(OCTEON_CN23XX_PF(oct) ? CN23XX_MAX_INPUT_QUEUES : \
+					CN6XXX_MAX_INPUT_QUEUES)
 
-#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES       CN6XXX_MAX_INPUT_QUEUES
-#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES      CN6XXX_MAX_OUTPUT_QUEUES
+/* Maximum number of Octeon Instruction (command) queues */
+#define MAX_OCTEON_OUTPUT_QUEUES(oct)		\
+		(OCTEON_CN23XX_PF(oct) ? CN23XX_MAX_OUTPUT_QUEUES : \
+					CN6XXX_MAX_OUTPUT_QUEUES)
 
+#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES	CN23XX_MAX_INPUT_QUEUES
+#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES	CN23XX_MAX_OUTPUT_QUEUES
 #endif /* __OCTEON_CONFIG_H__  */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index bbb50ea..01a50f3 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -25,12 +25,13 @@
  */
 #include <linux/pci.h>
 #include <linux/netdevice.h>
+#include <linux/crc32.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
 #include "response_manager.h"
 #include "octeon_device.h"
-#include "octeon_main.h"
+#include "liquidio_image.h"
 #include "octeon_mem_ops.h"
 
 static void octeon_remote_lock(void);
@@ -40,6 +41,10 @@
 					     u32 flags);
 static int octeon_console_read(struct octeon_device *oct, u32 console_num,
 			       char *buffer, u32 buf_size);
+static u32 console_bitmask;
+module_param(console_bitmask, int, 0644);
+MODULE_PARM_DESC(console_bitmask,
+		 "Bitmask indicating which consoles have debug output redirected to syslog.");
 
 #define MIN(a, b) min((a), (b))
 #define CAST_ULL(v) ((u64)(v))
@@ -177,6 +182,15 @@
 	__cvmx_bootmem_desc_get(oct, addr,                               \
 		offsetof(struct cvmx_bootmem_named_block_desc, field),   \
 		SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field))
+/**
+ * \brief determines if a given console has debug enabled.
+ * @param console console to check
+ * @returns  1 = enabled. 0 otherwise
+ */
+static int octeon_console_debug_enabled(u32 console)
+{
+	return (console_bitmask >> (console)) & 0x1;
+}
 
 /**
  * This function is the implementation of the get macros defined
@@ -709,3 +723,104 @@
 
 	return bytes_to_read;
 }
+
+#define FBUF_SIZE	(4 * 1024 * 1024)
+u8 fbuf[FBUF_SIZE];
+
+int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
+			     size_t size)
+{
+	int ret = 0;
+	u8 *p = fbuf;
+	u32 crc32_result;
+	u64 load_addr;
+	u32 image_len;
+	struct octeon_firmware_file_header *h;
+	u32 i, rem;
+
+	if (size < sizeof(struct octeon_firmware_file_header)) {
+		dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n",
+			(u32)size,
+			(u32)sizeof(struct octeon_firmware_file_header));
+		return -EINVAL;
+	}
+
+	h = (struct octeon_firmware_file_header *)data;
+
+	if (be32_to_cpu(h->magic) != LIO_NIC_MAGIC) {
+		dev_err(&oct->pci_dev->dev, "Unrecognized firmware file.\n");
+		return -EINVAL;
+	}
+
+	crc32_result = crc32((unsigned int)~0, data,
+			     sizeof(struct octeon_firmware_file_header) -
+			     sizeof(u32)) ^ ~0U;
+	if (crc32_result != be32_to_cpu(h->crc32)) {
+		dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n",
+			crc32_result, be32_to_cpu(h->crc32));
+		return -EINVAL;
+	}
+
+	if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) {
+		dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n",
+			LIQUIDIO_PACKAGE, h->version);
+		return -EINVAL;
+	}
+
+	if (memcmp(LIQUIDIO_BASE_VERSION, h->version + strlen(LIQUIDIO_PACKAGE),
+		   strlen(LIQUIDIO_BASE_VERSION))) {
+		dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n",
+			LIQUIDIO_BASE_VERSION,
+			h->version + strlen(LIQUIDIO_PACKAGE));
+		return -EINVAL;
+	}
+
+	if (be32_to_cpu(h->num_images) > LIO_MAX_IMAGES) {
+		dev_err(&oct->pci_dev->dev, "Too many images in firmware file (%d).\n",
+			be32_to_cpu(h->num_images));
+		return -EINVAL;
+	}
+
+	dev_info(&oct->pci_dev->dev, "Firmware version: %s\n", h->version);
+	snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s",
+		 h->version);
+
+	data += sizeof(struct octeon_firmware_file_header);
+
+	dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__,
+		 be32_to_cpu(h->num_images));
+	/* load all images */
+	for (i = 0; i < be32_to_cpu(h->num_images); i++) {
+		load_addr = be64_to_cpu(h->desc[i].addr);
+		image_len = be32_to_cpu(h->desc[i].len);
+
+		dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n",
+			 image_len, load_addr);
+
+		/* Write in 4MB chunks*/
+		rem = image_len;
+
+		while (rem) {
+			if (rem < FBUF_SIZE)
+				size = rem;
+			else
+				size = FBUF_SIZE;
+
+			memcpy(p, data, size);
+
+			/* download the image */
+			octeon_pci_write_core_mem(oct, load_addr, p, (u32)size);
+
+			data += size;
+			rem -= (u32)size;
+			load_addr += size;
+		}
+	}
+	dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n",
+		 h->bootcmd);
+
+	/* Invoke the bootcmd */
+	ret = octeon_console_send_cmd(oct, h->bootcmd, 50);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 0eb504a..586b688 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -20,7 +20,6 @@
 * Contact Cavium, Inc. for more information
 **********************************************************************/
 #include <linux/pci.h>
-#include <linux/crc32.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
 #include "liquidio_common.h"
@@ -32,8 +31,7 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
-#include "liquidio_image.h"
-#include "octeon_mem_ops.h"
+#include "cn23xx_pf_device.h"
 
 /** Default configuration
  *  for CN66XX OCTEON Models.
@@ -420,6 +418,108 @@
 	,
 };
 
+static struct octeon_config default_cn23xx_conf = {
+	.card_type                              = LIO_23XX,
+	.card_name                              = LIO_23XX_NAME,
+	/** IQ attributes */
+	.iq = {
+		.max_iqs		= CN23XX_CFG_IO_QUEUES,
+		.pending_list_size	= (CN23XX_MAX_IQ_DESCRIPTORS *
+					   CN23XX_CFG_IO_QUEUES),
+		.instr_type		= OCTEON_64BYTE_INSTR,
+		.db_min			= CN23XX_DB_MIN,
+		.db_timeout		= CN23XX_DB_TIMEOUT,
+		.iq_intr_pkt		= CN23XX_DEF_IQ_INTR_THRESHOLD,
+	},
+
+	/** OQ attributes */
+	.oq = {
+		.max_oqs		= CN23XX_CFG_IO_QUEUES,
+		.info_ptr		= OCTEON_OQ_INFOPTR_MODE,
+		.pkts_per_intr	= CN23XX_OQ_PKTSPER_INTR,
+		.refill_threshold	= CN23XX_OQ_REFIL_THRESHOLD,
+		.oq_intr_pkt	= CN23XX_OQ_INTR_PKT,
+		.oq_intr_time	= CN23XX_OQ_INTR_TIME,
+	},
+
+	.num_nic_ports				= DEFAULT_NUM_NIC_PORTS_23XX,
+	.num_def_rx_descs			= CN23XX_MAX_OQ_DESCRIPTORS,
+	.num_def_tx_descs			= CN23XX_MAX_IQ_DESCRIPTORS,
+	.def_rx_buf_size			= CN23XX_OQ_BUF_SIZE,
+
+	/* For ethernet interface 0:  Port cfg Attributes */
+	.nic_if_cfg[0] = {
+		/* Max Txqs: Half for each of the two ports :max_iq/2 */
+		.max_txqs			= MAX_TXQS_PER_INTF,
+
+		/* Actual configured value. Range could be: 1...max_txqs */
+		.num_txqs			= DEF_TXQS_PER_INTF,
+
+		/* Max Rxqs: Half for each of the two ports :max_oq/2  */
+		.max_rxqs			= MAX_RXQS_PER_INTF,
+
+		/* Actual configured value. Range could be: 1...max_rxqs */
+		.num_rxqs			= DEF_RXQS_PER_INTF,
+
+		/* Num of desc for rx rings */
+		.num_rx_descs			= CN23XX_MAX_OQ_DESCRIPTORS,
+
+		/* Num of desc for tx rings */
+		.num_tx_descs			= CN23XX_MAX_IQ_DESCRIPTORS,
+
+		/* SKB size, We need not change buf size even for Jumbo frames.
+		 * Octeon can send jumbo frames in 4 consecutive descriptors,
+		 */
+		.rx_buf_size			= CN23XX_OQ_BUF_SIZE,
+
+		.base_queue			= BASE_QUEUE_NOT_REQUESTED,
+
+		.gmx_port_id			= 0,
+	},
+
+	.nic_if_cfg[1] = {
+		/* Max Txqs: Half for each of the two ports :max_iq/2 */
+		.max_txqs			= MAX_TXQS_PER_INTF,
+
+		/* Actual configured value. Range could be: 1...max_txqs */
+		.num_txqs			= DEF_TXQS_PER_INTF,
+
+		/* Max Rxqs: Half for each of the two ports :max_oq/2  */
+		.max_rxqs			= MAX_RXQS_PER_INTF,
+
+		/* Actual configured value. Range could be: 1...max_rxqs */
+		.num_rxqs			= DEF_RXQS_PER_INTF,
+
+		/* Num of desc for rx rings */
+		.num_rx_descs			= CN23XX_MAX_OQ_DESCRIPTORS,
+
+		/* Num of desc for tx rings */
+		.num_tx_descs			= CN23XX_MAX_IQ_DESCRIPTORS,
+
+		/* SKB size, We need not change buf size even for Jumbo frames.
+		 * Octeon can send jumbo frames in 4 consecutive descriptors,
+		 */
+		.rx_buf_size			= CN23XX_OQ_BUF_SIZE,
+
+		.base_queue			= BASE_QUEUE_NOT_REQUESTED,
+
+		.gmx_port_id			= 1,
+	},
+
+	.misc					= {
+		/* Host driver link query interval */
+		.oct_link_query_interval	= 100,
+
+		/* Octeon link query interval */
+		.host_link_query_interval	= 500,
+
+		.enable_sli_oq_bp		= 0,
+
+		/* Control queue group */
+		.ctrlq_grp			= 1,
+	}
+};
+
 enum {
 	OCTEON_CONFIG_TYPE_DEFAULT = 0,
 	NUM_OCTEON_CONFS,
@@ -487,6 +587,8 @@
 		} else if ((oct->chip_id == OCTEON_CN68XX) &&
 			   (card_type == LIO_410NV)) {
 			ret =  (void *)&default_cn68xx_conf;
+		} else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
+			ret =  (void *)&default_cn23xx_conf;
 		}
 		break;
 	default:
@@ -501,7 +603,8 @@
 	case OCTEON_CN66XX:
 	case OCTEON_CN68XX:
 		return lio_validate_cn6xxx_config_info(oct, conf);
-
+	case OCTEON_CN23XX_PF_VID:
+		return 0;
 	default:
 		break;
 	}
@@ -541,107 +644,6 @@
 	return oct_dev_app_str[CVM_DRV_INVALID_APP - CVM_DRV_APP_START];
 }
 
-u8 fbuf[4 * 1024 * 1024];
-
-int octeon_download_firmware(struct octeon_device *oct, const u8 *data,
-			     size_t size)
-{
-	int ret = 0;
-	u8 *p = fbuf;
-	u32 crc32_result;
-	u64 load_addr;
-	u32 image_len;
-	struct octeon_firmware_file_header *h;
-	u32 i, rem, base_len = strlen(LIQUIDIO_BASE_VERSION);
-	char *base;
-
-	if (size < sizeof(struct octeon_firmware_file_header)) {
-		dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n",
-			(u32)size,
-			(u32)sizeof(struct octeon_firmware_file_header));
-		return -EINVAL;
-	}
-
-	h = (struct octeon_firmware_file_header *)data;
-
-	if (be32_to_cpu(h->magic) != LIO_NIC_MAGIC) {
-		dev_err(&oct->pci_dev->dev, "Unrecognized firmware file.\n");
-		return -EINVAL;
-	}
-
-	crc32_result = crc32((unsigned int)~0, data,
-			     sizeof(struct octeon_firmware_file_header) -
-			     sizeof(u32)) ^ ~0U;
-	if (crc32_result != be32_to_cpu(h->crc32)) {
-		dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n",
-			crc32_result, be32_to_cpu(h->crc32));
-		return -EINVAL;
-	}
-
-	if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) {
-		dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n",
-			LIQUIDIO_PACKAGE, h->version);
-		return -EINVAL;
-	}
-
-	base = h->version + strlen(LIQUIDIO_PACKAGE);
-	ret = memcmp(LIQUIDIO_BASE_VERSION, base, base_len);
-	if (ret) {
-		dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n",
-			LIQUIDIO_BASE_VERSION, base);
-		return -EINVAL;
-	}
-
-	if (be32_to_cpu(h->num_images) > LIO_MAX_IMAGES) {
-		dev_err(&oct->pci_dev->dev, "Too many images in firmware file (%d).\n",
-			be32_to_cpu(h->num_images));
-		return -EINVAL;
-	}
-
-	dev_info(&oct->pci_dev->dev, "Firmware version: %s\n", h->version);
-	snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s",
-		 h->version);
-
-	data += sizeof(struct octeon_firmware_file_header);
-
-	dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__,
-		 be32_to_cpu(h->num_images));
-	/* load all images */
-	for (i = 0; i < be32_to_cpu(h->num_images); i++) {
-		load_addr = be64_to_cpu(h->desc[i].addr);
-		image_len = be32_to_cpu(h->desc[i].len);
-
-		dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n",
-			 image_len, load_addr);
-
-		/* Write in 4MB chunks*/
-		rem = image_len;
-
-		while (rem) {
-			if (rem < (4 * 1024 * 1024))
-				size = rem;
-			else
-				size = 4 * 1024 * 1024;
-
-			memcpy(p, data, size);
-
-			/* download the image */
-			octeon_pci_write_core_mem(oct, load_addr, p, (u32)size);
-
-			data += size;
-			rem -= (u32)size;
-			load_addr += size;
-		}
-	}
-	dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n",
-		 h->bootcmd);
-
-	/* Invoke the bootcmd */
-	ret = octeon_console_send_cmd(oct, h->bootcmd, 50);
-
-	return 0;
-}
-
 void octeon_free_device_mem(struct octeon_device *oct)
 {
 	int i;
@@ -676,6 +678,9 @@
 		configsize = sizeof(struct octeon_cn6xxx);
 		break;
 
+	case OCTEON_CN23XX_PF_VID:
+		configsize = sizeof(struct octeon_cn23xx_pf);
+		break;
 	default:
 		pr_err("%s: Unknown PCI Device: 0x%x\n",
 		       __func__,
@@ -741,6 +746,45 @@
 	return oct;
 }
 
+int
+octeon_allocate_ioq_vector(struct octeon_device  *oct)
+{
+	int i, num_ioqs = 0;
+	struct octeon_ioq_vector *ioq_vector;
+	int cpu_num;
+	int size;
+
+	if (OCTEON_CN23XX_PF(oct))
+		num_ioqs = oct->sriov_info.num_pf_rings;
+	size = sizeof(struct octeon_ioq_vector) * num_ioqs;
+
+	oct->ioq_vector = vmalloc(size);
+	if (!oct->ioq_vector)
+		return 1;
+	memset(oct->ioq_vector, 0, size);
+	for (i = 0; i < num_ioqs; i++) {
+		ioq_vector		= &oct->ioq_vector[i];
+		ioq_vector->oct_dev	= oct;
+		ioq_vector->iq_index	= i;
+		ioq_vector->droq_index	= i;
+
+		cpu_num = i % num_online_cpus();
+		cpumask_set_cpu(cpu_num, &ioq_vector->affinity_mask);
+
+		if (oct->chip_id == OCTEON_CN23XX_PF_VID)
+			ioq_vector->ioq_num	= i + oct->sriov_info.pf_srn;
+		else
+			ioq_vector->ioq_num	= i;
+	}
+	return 0;
+}
+
+void
+octeon_free_ioq_vector(struct octeon_device *oct)
+{
+	vfree(oct->ioq_vector);
+}
+
 /* this function is only for setting up the first queue */
 int octeon_setup_instr_queues(struct octeon_device *oct)
 {
@@ -749,10 +793,12 @@
 	union oct_txpciq txpciq;
 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
 
-	/* this causes queue 0 to be default queue */
 	if (OCTEON_CN6XXX(oct))
 		num_descs =
 			CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
+	else if (OCTEON_CN23XX_PF(oct))
+		num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn23xx_pf,
+								conf));
 
 	oct->num_iqs = 0;
 
@@ -769,6 +815,7 @@
 	oct->instr_queue[0]->ifidx = 0;
 	txpciq.u64 = 0;
 	txpciq.s.q_no = iq_no;
+	txpciq.s.pkind = oct->pfvf_hsword.pkind;
 	txpciq.s.use_qpg = 0;
 	txpciq.s.qpg = 0;
 	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
@@ -788,14 +835,17 @@
 	u32 oq_no = 0;
 	int numa_node = cpu_to_node(oq_no % num_online_cpus());
 
-	/* this causes queue 0 to be default queue */
 	if (OCTEON_CN6XXX(oct)) {
 		num_descs =
 			CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
 		desc_size =
 			CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn6xxx, conf));
+	} else if (OCTEON_CN23XX_PF(oct)) {
+		num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn23xx_pf,
+								conf));
+		desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn23xx_pf,
+							       conf));
 	}
-
 	oct->num_oqs = 0;
 	oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
 	if (!oct->droq[0])
@@ -812,10 +862,10 @@
 
 void octeon_set_io_queues_off(struct octeon_device *oct)
 {
-	/* Disable the i/p and o/p queues for this Octeon. */
-
-	octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0);
-	octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0);
+	if (OCTEON_CN6XXX(oct)) {
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0);
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0);
+	}
 }
 
 void octeon_set_droq_pkt_op(struct octeon_device *oct,
@@ -825,14 +875,16 @@
 	u32 reg_val = 0;
 
 	/* Disable the i/p and o/p queues for this Octeon. */
-	reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB);
+	if (OCTEON_CN6XXX(oct)) {
+		reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB);
 
-	if (enable)
-		reg_val = reg_val | (1 << q_no);
-	else
-		reg_val = reg_val & (~(1 << q_no));
+		if (enable)
+			reg_val = reg_val | (1 << q_no);
+		else
+			reg_val = reg_val & (~(1 << q_no));
 
-	octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val);
+		octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val);
+	}
 }
 
 int octeon_init_dispatch_list(struct octeon_device *oct)
@@ -1019,6 +1071,9 @@
 	if (OCTEON_CN6XXX(oct))
 		num_nic_ports =
 			CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn6xxx, conf));
+	else if (OCTEON_CN23XX_PF(oct))
+		num_nic_ports =
+			CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn23xx_pf, conf));
 
 	if (atomic_read(&oct->status) >= OCT_DEV_RUNNING) {
 		dev_err(&oct->pci_dev->dev, "Received CORE OK when device state is 0x%x\n",
@@ -1046,6 +1101,12 @@
 	}
 	oct->fw_info.app_cap_flags = recv_pkt->rh.r_core_drv_init.app_cap_flags;
 	oct->fw_info.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode;
+	oct->pfvf_hsword.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode;
+
+	oct->pfvf_hsword.pkind = recv_pkt->rh.r_core_drv_init.pkind;
+
+	for (i = 0; i < oct->num_iqs; i++)
+		oct->instr_queue[i]->txpciq.s.pkind = oct->pfvf_hsword.pkind;
 
 	atomic_set(&oct->status, OCT_DEV_CORE_OK);
 
@@ -1108,8 +1169,10 @@
 	if (OCTEON_CN6XXX(oct)) {
 		default_oct_conf =
 			(struct octeon_config *)(CHIP_FIELD(oct, cn6xxx, conf));
+	} else if (OCTEON_CN23XX_PF(oct)) {
+		default_oct_conf = (struct octeon_config *)
+			(CHIP_FIELD(oct, cn23xx_pf, conf));
 	}
-
 	return default_oct_conf;
 }
 
@@ -1141,7 +1204,9 @@
 	 * So write MSB first
 	 */
 	addrhi = (addr >> 32);
-	if ((oct->chip_id == OCTEON_CN66XX) || (oct->chip_id == OCTEON_CN68XX))
+	if ((oct->chip_id == OCTEON_CN66XX) ||
+	    (oct->chip_id == OCTEON_CN68XX) ||
+	    (oct->chip_id == OCTEON_CN23XX_PF_VID))
 		addrhi |= 0x00060000;
 	writel(addrhi, oct->reg_list.pci_win_rd_addr_hi);
 
@@ -1185,8 +1250,15 @@
 	u64 lmc0_reset_ctl;
 
 	/* Check to make sure a DDR interface is enabled */
-	lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL);
-	access_okay = (lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK);
+	if (OCTEON_CN23XX_PF(oct)) {
+		lmc0_reset_ctl = lio_pci_readq(oct, CN23XX_LMC0_RESET_CTL);
+		access_okay =
+			(lmc0_reset_ctl & CN23XX_LMC0_RESET_CTL_DDR3RST_MASK);
+	} else {
+		lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL);
+		access_okay =
+			(lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK);
+	}
 
 	return access_okay ? 0 : 1;
 }
@@ -1226,3 +1298,39 @@
 			return octeon_dev->octeon_id;
 	return -1;
 }
+
+void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
+{
+	u64 instr_cnt;
+	struct octeon_device *oct = NULL;
+
+	/* the whole thing needs to be atomic, ideally */
+	if (droq) {
+		spin_lock_bh(&droq->lock);
+		writel(droq->pkt_count, droq->pkts_sent_reg);
+		droq->pkt_count = 0;
+		spin_unlock_bh(&droq->lock);
+		oct = droq->oct_dev;
+	}
+	if (iq) {
+		spin_lock_bh(&iq->lock);
+		writel(iq->pkt_in_done, iq->inst_cnt_reg);
+		iq->pkt_in_done = 0;
+		spin_unlock_bh(&iq->lock);
+		oct = iq->oct_dev;
+	}
+	/*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough
+	 *to trigger tx interrupts as well, if they are pending.
+	 */
+	if (oct && OCTEON_CN23XX_PF(oct)) {
+		if (droq)
+			writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg);
+		/*we race with firmrware here. read and write the IN_DONE_CNTS*/
+		else if (iq) {
+			instr_cnt =  readq(iq->inst_cnt_reg);
+			writeq(((instr_cnt & 0xFFFFFFFF00000000ULL) |
+				CN23XX_INTR_RESEND),
+			       iq->inst_cnt_reg);
+		}
+	}
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 01edfb4..da15c2a 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -30,13 +30,19 @@
 /** PCI VendorId Device Id */
 #define  OCTEON_CN68XX_PCIID          0x91177d
 #define  OCTEON_CN66XX_PCIID          0x92177d
-
+#define  OCTEON_CN23XX_PCIID_PF       0x9702177d
 /** Driver identifies chips by these Ids, created by clubbing together
  *  DeviceId+RevisionId; Where Revision Id is not used to distinguish
  *  between chips, a value of 0 is used for revision id.
  */
 #define  OCTEON_CN68XX                0x0091
 #define  OCTEON_CN66XX                0x0092
+#define  OCTEON_CN23XX_PF_VID         0x9702
+
+/**RevisionId for the chips */
+#define  OCTEON_CN23XX_REV_1_0        0x00
+#define  OCTEON_CN23XX_REV_1_1        0x01
+#define  OCTEON_CN23XX_REV_2_0        0x80
 
 /** Endian-swap modes supported by Octeon. */
 enum octeon_pci_swap_mode {
@@ -46,6 +52,9 @@
 	OCTEON_PCI_32BIT_LW_SWAP = 3
 };
 
+#define  OCTEON_OUTPUT_INTR   (2)
+#define  OCTEON_ALL_INTR      0xff
+
 /*---------------   PCI BAR1 index registers -------------*/
 
 /* BAR1 Mask */
@@ -198,9 +207,9 @@
 	void (*setup_oq_regs)(struct octeon_device *, u32);
 
 	irqreturn_t (*process_interrupt_regs)(void *);
+	u64 (*msix_interrupt_handler)(void *);
 	int (*soft_reset)(struct octeon_device *);
 	int (*setup_device_regs)(struct octeon_device *);
-	void (*reinit_regs)(struct octeon_device *);
 	void (*bar1_idx_setup)(struct octeon_device *, u64, u32, int);
 	void (*bar1_idx_write)(struct octeon_device *, u32, u32);
 	u32 (*bar1_idx_read)(struct octeon_device *, u32);
@@ -209,10 +218,10 @@
 	void (*enable_oq_pkt_time_intr)(struct octeon_device *, u32);
 	void (*disable_oq_pkt_time_intr)(struct octeon_device *, u32);
 
-	void (*enable_interrupt)(void *);
-	void (*disable_interrupt)(void *);
+	void (*enable_interrupt)(struct octeon_device *, u8);
+	void (*disable_interrupt)(struct octeon_device *, u8);
 
-	void (*enable_io_queues)(struct octeon_device *);
+	int (*enable_io_queues)(struct octeon_device *);
 	void (*disable_io_queues)(struct octeon_device *);
 };
 
@@ -266,11 +275,72 @@
 	/* Each interface in the Octeon device has a network
 	 * device pointer (used for OS specific calls).
 	 */
+	int    rx_on;
 	int    napi_enabled;
 	int    gmxport;
 	struct net_device *netdev;
 };
 
+#define LIO_FLAG_MSIX_ENABLED	0x1
+#define MSIX_PO_INT		0x1
+#define MSIX_PI_INT		0x2
+
+struct octeon_pf_vf_hs_word {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+	/** PKIND value assigned for the DPI interface */
+	u64        pkind : 8;
+
+	/** OCTEON core clock multiplier   */
+	u64        core_tics_per_us : 16;
+
+	/** OCTEON coprocessor clock multiplier  */
+	u64        coproc_tics_per_us : 16;
+
+	/** app that currently running on OCTEON  */
+	u64        app_mode : 8;
+
+	/** RESERVED */
+	u64 reserved : 16;
+
+#else
+
+	/** RESERVED */
+	u64 reserved : 16;
+
+	/** app that currently running on OCTEON  */
+	u64        app_mode : 8;
+
+	/** OCTEON coprocessor clock multiplier  */
+	u64        coproc_tics_per_us : 16;
+
+	/** OCTEON core clock multiplier   */
+	u64        core_tics_per_us : 16;
+
+	/** PKIND value assigned for the DPI interface */
+	u64        pkind : 8;
+#endif
+};
+
+struct octeon_sriov_info {
+	/* Actual rings left for PF device */
+	u32	num_pf_rings;
+
+	/* SRN of PF usable IO queues   */
+	u32	pf_srn;
+	/* total pf rings */
+	u32	trs;
+
+};
+
+struct octeon_ioq_vector {
+	struct octeon_device   *oct_dev;
+	int		        iq_index;
+	int		        droq_index;
+	int			vector;
+	struct cpumask		affinity_mask;
+	u32			ioq_num;
+};
+
 /** The Octeon device.
  *  Each Octeon device has this structure to represent all its
  *  components.
@@ -296,7 +366,7 @@
 	/** Octeon Chip type. */
 	u16 chip_id;
 	u16 rev_id;
-
+	u16 pf_num;
 	/** This device's id - set by the driver. */
 	u32 octeon_id;
 
@@ -305,7 +375,6 @@
 
 	u16 flags;
 #define LIO_FLAG_MSI_ENABLED                  (u32)(1 << 1)
-#define LIO_FLAG_MSIX_ENABLED                 (u32)(1 << 2)
 
 	/** The state of this device */
 	atomic_t status;
@@ -395,6 +464,19 @@
 
 	void *priv;
 
+	int num_msix_irqs;
+
+	void *msix_entries;
+
+	struct octeon_sriov_info sriov_info;
+
+	struct octeon_pf_vf_hs_word pfvf_hsword;
+
+	int msix_on;
+
+	/** IOq information of it's corresponding MSI-X interrupt. */
+	struct octeon_ioq_vector    *ioq_vector;
+
 	int rx_pause;
 	int tx_pause;
 
@@ -402,12 +484,15 @@
 
 	/* private flags to control driver-specific features through ethtool */
 	u32 priv_flags;
+
+	void *watchdog_task;
 };
 
 #define  OCT_DRV_ONLINE 1
 #define  OCT_DRV_OFFLINE 2
 #define  OCTEON_CN6XXX(oct)           ((oct->chip_id == OCTEON_CN66XX) || \
 				       (oct->chip_id == OCTEON_CN68XX))
+#define  OCTEON_CN23XX_PF(oct)        (oct->chip_id == OCTEON_CN23XX_PF_VID)
 #define CHIP_FIELD(oct, TYPE, field)             \
 	(((struct octeon_ ## TYPE  *)(oct->chip))->field)
 
@@ -661,13 +746,24 @@
  */
 struct octeon_config *octeon_get_conf(struct octeon_device *oct);
 
+void octeon_free_ioq_vector(struct octeon_device *oct);
+int octeon_allocate_ioq_vector(struct octeon_device  *oct);
+void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq);
+
 /* LiquidIO driver pivate flags */
 enum {
 	OCT_PRIV_FLAG_TX_BYTES = 0, /* Tx interrupts by pending byte count */
 };
 
-static inline void lio_set_priv_flag(struct octeon_device *octdev, u32 flag,
-				     u32 val)
+#define OCT_PRIV_FLAG_DEFAULT 0x0
+
+static inline u32 lio_get_priv_flag(struct octeon_device *octdev, u32 flag)
+{
+	return !!(octdev->priv_flags & (0x1 << flag));
+}
+
+static inline void lio_set_priv_flag(struct octeon_device *octdev,
+				     u32 flag, u32 val)
 {
 	if (val)
 		octdev->priv_flags |= (0x1 << flag);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index e0afe4c..f60e532 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -31,6 +31,7 @@
 #include "octeon_network.h"
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
 
 #define     CVM_MIN(d1, d2)           (((d1) < (d2)) ? (d1) : (d2))
 #define     CVM_MAX(d1, d2)           (((d1) > (d2)) ? (d1) : (d2))
@@ -92,22 +93,25 @@
 	return fn_arg;
 }
 
-/** Check for packets on Droq. This function should be called with
- * lock held.
+/** Check for packets on Droq. This function should be called with lock held.
  *  @param  droq - Droq on which count is checked.
  *  @return Returns packet count.
  */
 u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq)
 {
 	u32 pkt_count = 0;
+	u32 last_count;
 
 	pkt_count = readl(droq->pkts_sent_reg);
-	if (pkt_count) {
-		atomic_add(pkt_count, &droq->pkts_pending);
-		writel(pkt_count, droq->pkts_sent_reg);
-	}
 
-	return pkt_count;
+	last_count = pkt_count - droq->pkt_count;
+	droq->pkt_count = pkt_count;
+
+	/* we shall write to cnts  at napi irq enable or end of droq tasklet */
+	if (last_count)
+		atomic_add(last_count, &droq->pkts_pending);
+
+	return last_count;
 }
 
 static void octeon_droq_compute_max_packet_bufs(struct octeon_droq *droq)
@@ -259,6 +263,11 @@
 		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x);
 		c_refill_threshold =
 			(u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
+	} else if (OCTEON_CN23XX_PF(oct)) {
+		struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+
+		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
+		c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
 	} else {
 		return 1;
 	}
@@ -564,7 +573,7 @@
 			(unsigned int)rh->r.opcode,
 			(unsigned int)rh->r.subcode);
 		droq->stats.dropped_nodispatch++;
-	}                       /* else (dispatch_fn ... */
+	}
 
 	return cnt;
 }
@@ -735,16 +744,20 @@
 	u32 pkt_count = 0, pkts_processed = 0;
 	struct list_head *tmp, *tmp2;
 
+	/* Grab the droq lock */
+	spin_lock(&droq->lock);
+
+	octeon_droq_check_hw_for_pkts(droq);
 	pkt_count = atomic_read(&droq->pkts_pending);
-	if (!pkt_count)
+
+	if (!pkt_count) {
+		spin_unlock(&droq->lock);
 		return 0;
+	}
 
 	if (pkt_count > budget)
 		pkt_count = budget;
 
-	/* Grab the droq lock */
-	spin_lock(&droq->lock);
-
 	pkts_processed = octeon_droq_fast_process_packets(oct, droq, pkt_count);
 
 	atomic_sub(pkts_processed, &droq->pkts_pending);
@@ -789,6 +802,8 @@
 	spin_lock(&droq->lock);
 
 	while (total_pkts_processed < budget) {
+		octeon_droq_check_hw_for_pkts(droq);
+
 		pkts_available =
 			CVM_MIN((budget - total_pkts_processed),
 				(u32)(atomic_read(&droq->pkts_pending)));
@@ -803,8 +818,6 @@
 		atomic_sub(pkts_processed, &droq->pkts_pending);
 
 		total_pkts_processed += pkts_processed;
-
-		octeon_droq_check_hw_for_pkts(droq);
 	}
 
 	spin_unlock(&droq->lock);
@@ -874,8 +887,11 @@
 			return 0;
 		}
 		break;
+		case OCTEON_CN23XX_PF_VID: {
+			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		}
-
+		break;
+		}
 		return 0;
 	}
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 5a6fb91..5be002d 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -261,6 +261,8 @@
 
 	u32 q_no;
 
+	u32 pkt_count;
+
 	struct octeon_droq_ops ops;
 
 	struct octeon_device *oct_dev;
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index ff4b1d6..e4d426b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -88,6 +88,8 @@
 	/** A spinlock to protect while posting on the ring.  */
 	spinlock_t post_lock;
 
+	u32 pkt_in_done;
+
 	/** A spinlock to protect access to the input ring.*/
 	spinlock_t iq_flush_running_lock;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index bc14e4c..366298f 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -38,12 +38,26 @@
 
 #define DRV_NAME "LiquidIO"
 
-/**
- * \brief determines if a given console has debug enabled.
- * @param console console to check
- * @returns  1 = enabled. 0 otherwise
+/** This structure is used by NIC driver to store information required
+ * to free the sk_buff when the packet has been fetched by Octeon.
+ * Bytes offset below assume worst-case of a 64-bit system.
  */
-int octeon_console_debug_enabled(u32 console);
+struct octnet_buf_free_info {
+	/** Bytes 1-8.  Pointer to network device private structure. */
+	struct lio *lio;
+
+	/** Bytes 9-16.  Pointer to sk_buff. */
+	struct sk_buff *skb;
+
+	/** Bytes 17-24.  Pointer to gather list. */
+	struct octnic_gather *g;
+
+	/** Bytes 25-32. Physical address of skb->data or gather list. */
+	u64 dptr;
+
+	/** Bytes 33-47. Piggybacked soft command, if any */
+	struct octeon_soft_command *sc;
+};
 
 /* BQL-related functions */
 void octeon_report_sent_bytes_to_bql(void *buf, int reqtype);
@@ -167,22 +181,26 @@
 #define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \
 		free_pages(orig_ptr, get_order(size))
 
-static inline void
+static inline int
 sleep_cond(wait_queue_head_t *wait_queue, int *condition)
 {
+	int errno = 0;
 	wait_queue_t we;
 
 	init_waitqueue_entry(&we, current);
 	add_wait_queue(wait_queue, &we);
 	while (!(READ_ONCE(*condition))) {
 		set_current_state(TASK_INTERRUPTIBLE);
-		if (signal_pending(current))
+		if (signal_pending(current)) {
+			errno = -EINTR;
 			goto out;
+		}
 		schedule();
 	}
 out:
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(wait_queue, &we);
+	return errno;
 }
 
 static inline void
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
index 95a4bbe..0dc081a 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
@@ -19,7 +19,6 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/pci.h>
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index fb820dc..e5d1deb 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -26,8 +26,6 @@
 
 #ifndef __OCTEON_NETWORK_H__
 #define __OCTEON_NETWORK_H__
-#include <linux/version.h>
-#include <linux/dma-mapping.h>
 #include <linux/ptp_clock_kernel.h>
 
 #define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE)
@@ -124,11 +122,21 @@
 
 	/* work queue for  txq status */
 	struct cavium_wq	txq_status_wq;
+
+	/* work queue for  link status */
+	struct cavium_wq	link_status_wq;
+
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
 #define GET_LIO(netdev)  ((struct lio *)netdev_priv(netdev))
 
+#define CIU3_WDOG(c)                 (0x1010000020000ULL + (c << 3))
+#define CIU3_WDOG_MASK               12ULL
+#define LIO_MONITOR_WDOG_EXPIRE      1
+#define LIO_MONITOR_CORE_STUCK_MSGD  2
+#define LIO_MAX_CORES                12
+
 /**
  * \brief Enable or disable feature
  * @param netdev    pointer to network device
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index 166727b..40ac1fe 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
@@ -19,7 +19,6 @@
  * This file may also be available under a different license from Cavium.
  * Contact Cavium, Inc. for more information
  **********************************************************************/
-#include <linux/interrupt.h>
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
@@ -36,6 +35,7 @@
 			       u32		       rdatasize)
 {
 	struct octeon_soft_command *sc;
+	struct octeon_instr_ih3  *ih3;
 	struct octeon_instr_ih2  *ih2;
 	struct octeon_instr_irh *irh;
 	struct octeon_instr_rdp *rdp;
@@ -52,10 +52,19 @@
 	/* Add in the response related fields. Opcode and Param are already
 	 * there.
 	 */
-	ih2      = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
-	rdp     = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
-	irh     = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
-	ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+	if (OCTEON_CN23XX_PF(oct)) {
+		ih3      = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
+		rdp     = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+		irh     = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+		/*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+		ih3->fsz = LIO_SOFTCMDRESP_IH3;
+	} else {
+		ih2      = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+		rdp     = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+		irh     = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+		/* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */
+		ih2->fsz = LIO_SOFTCMDRESP_IH2;
+	}
 
 	irh->rflag = 1; /* a response is required */
 
@@ -64,7 +73,10 @@
 
 	*sc->status_word = COMPLETION_WORD_INIT;
 
-	sc->cmd.cmd2.rptr =  sc->dmarptr;
+	if (OCTEON_CN23XX_PF(oct))
+		sc->cmd.cmd3.rptr =  sc->dmarptr;
+	else
+		sc->cmd.cmd2.rptr =  sc->dmarptr;
 
 	sc->wait_time = 1000;
 	sc->timeout = jiffies + sc->wait_time;
@@ -73,12 +85,9 @@
 }
 
 int octnet_send_nic_data_pkt(struct octeon_device *oct,
-			     struct octnic_data_pkt *ndata,
-			     u32 xmit_more)
+			     struct octnic_data_pkt *ndata)
 {
-	int ring_doorbell;
-
-	ring_doorbell = !xmit_more;
+	int ring_doorbell = 1;
 
 	return octeon_send_command(oct, ndata->q_no, ring_doorbell, &ndata->cmd,
 				   ndata->buf, ndata->datasize,
@@ -183,8 +192,8 @@
 	retval = octeon_send_soft_command(oct, sc);
 	if (retval == IQ_SEND_FAILED) {
 		octeon_free_soft_command(oct, sc);
-		dev_err(&oct->pci_dev->dev, "%s soft command:%d send failed status: %x\n",
-			__func__, nctrl->ncmd.s.cmd, retval);
+		dev_err(&oct->pci_dev->dev, "%s pf_num:%d soft command:%d send failed status: %x\n",
+			__func__, oct->pf_num, nctrl->ncmd.s.cmd, retval);
 		spin_unlock_bh(&oct->cmd_resp_wqlock);
 		return -1;
 	}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
index b71a2bb..4b8da67b 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
@@ -138,7 +138,7 @@
 	/* assume that rflag is cleared so therefore front data will only have
 	 * irh and ossp[0], ossp[1] for a total of 32 bytes
 	 */
-	ih2->fsz = 24;
+	ih2->fsz = LIO_PCICMD_O2;
 
 	ih2->tagtype = ORDERED_TAG;
 	ih2->grp = DEFAULT_POW_GRP;
@@ -196,7 +196,7 @@
 	 */
 	ih3->pkind       = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind;
 	/*PKI IH*/
-	ih3->fsz = 24 + 8;
+	ih3->fsz = LIO_PCICMD_O3;
 
 	if (!setup->s.gather) {
 		ih3->dlengsz = setup->s.u.datasize;
@@ -278,7 +278,7 @@
  * queue should be stopped, and IQ_SEND_OK if it sent okay.
  */
 int octnet_send_nic_data_pkt(struct octeon_device *oct,
-			     struct octnic_data_pkt *ndata, u32 xmit_more);
+			     struct octnic_data_pkt *ndata);
 
 /** Send a NIC control packet to the device
  * @param oct - octeon device pointer
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index d32492f1..90866bb 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -30,6 +30,7 @@
 #include "octeon_main.h"
 #include "octeon_network.h"
 #include "cn66xx_device.h"
+#include "cn23xx_pf_device.h"
 
 #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
 	(octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
@@ -71,7 +72,8 @@
 
 	if (OCTEON_CN6XXX(oct))
 		conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
-
+	else if (OCTEON_CN23XX_PF(oct))
+		conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn23xx_pf, conf)));
 	if (!conf) {
 		dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
 			oct->chip_id);
@@ -88,6 +90,7 @@
 	q_size = (u32)conf->instr_type * num_descs;
 
 	iq = oct->instr_queue[iq_no];
+
 	iq->oct_dev = oct;
 
 	set_dev_node(&oct->pci_dev->dev, numa_node);
@@ -181,6 +184,9 @@
 	if (OCTEON_CN6XXX(oct))
 		desc_size =
 		    CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
+	else if (OCTEON_CN23XX_PF(oct))
+		desc_size =
+		    CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn23xx_pf, conf));
 
 	vfree(iq->request_list);
 
@@ -383,7 +389,12 @@
 		case REQTYPE_SOFT_COMMAND:
 			sc = buf;
 
-			irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+			if (OCTEON_CN23XX_PF(oct))
+				irh = (struct octeon_instr_irh *)
+					&sc->cmd.cmd3.irh;
+			else
+				irh = (struct octeon_instr_irh *)
+					&sc->cmd.cmd2.irh;
 			if (irh->rflag) {
 				/* We're expecting a response from Octeon.
 				 * It's up to lio_process_ordered_list() to
@@ -499,6 +510,7 @@
 
 	if (!oct)
 		return;
+
 	iq = oct->instr_queue[iq_no];
 	if (!iq)
 		return;
@@ -514,6 +526,8 @@
 
 	/* Flush the instruction queue */
 	octeon_flush_iq(oct, iq, 1, 0);
+
+	lio_enable_irq(NULL, iq);
 }
 
 /* Called by the Poll thread at regular intervals to check the instruction
@@ -580,6 +594,8 @@
 {
 	struct octeon_config *oct_cfg;
 	struct octeon_instr_ih2 *ih2;
+	struct octeon_instr_ih3 *ih3;
+	struct octeon_instr_pki_ih3 *pki_ih3;
 	struct octeon_instr_irh *irh;
 	struct octeon_instr_rdp *rdp;
 
@@ -588,36 +604,88 @@
 
 	oct_cfg = octeon_get_conf(oct);
 
-	ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
-	ih2->tagtype = ATOMIC_TAG;
-	ih2->tag     = LIO_CONTROL;
-	ih2->raw     = 1;
-	ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
+	if (OCTEON_CN23XX_PF(oct)) {
+		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
 
-	if (sc->datasize) {
-		ih2->dlengsz = sc->datasize;
-		ih2->rs = 1;
-	}
+		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
 
-	irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
-	irh->opcode    = opcode;
-	irh->subcode   = subcode;
+		pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3;
 
-	/* opcode/subcode specific parameters (ossp) */
-	irh->ossp       = irh_ossp;
-	sc->cmd.cmd2.ossp[0] = ossp0;
-	sc->cmd.cmd2.ossp[1] = ossp1;
+		pki_ih3->w           = 1;
+		pki_ih3->raw         = 1;
+		pki_ih3->utag        = 1;
+		pki_ih3->uqpg        =
+			oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg;
+		pki_ih3->utt         = 1;
+		pki_ih3->tag     = LIO_CONTROL;
+		pki_ih3->tagtype = ATOMIC_TAG;
+		pki_ih3->qpg         =
+			oct->instr_queue[sc->iq_no]->txpciq.s.qpg;
+		pki_ih3->pm          = 0x7;
+		pki_ih3->sl          = 8;
 
-	if (sc->rdatasize) {
-		rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
-		rdp->pcie_port = oct->pcie_port;
-		rdp->rlen      = sc->rdatasize;
+		if (sc->datasize)
+			ih3->dlengsz = sc->datasize;
 
-		irh->rflag =  1;
-		ih2->fsz   = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
+		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+		irh->opcode    = opcode;
+		irh->subcode   = subcode;
+
+		/* opcode/subcode specific parameters (ossp) */
+		irh->ossp       = irh_ossp;
+		sc->cmd.cmd3.ossp[0] = ossp0;
+		sc->cmd.cmd3.ossp[1] = ossp1;
+
+		if (sc->rdatasize) {
+			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+			rdp->pcie_port = oct->pcie_port;
+			rdp->rlen      = sc->rdatasize;
+
+			irh->rflag =  1;
+			/*PKI IH3*/
+			/* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */
+			ih3->fsz    = LIO_SOFTCMDRESP_IH3;
+		} else {
+			irh->rflag =  0;
+			/*PKI IH3*/
+			/* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */
+			ih3->fsz    = LIO_PCICMD_O3;
+		}
+
 	} else {
-		irh->rflag =  0;
-		ih2->fsz   = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */
+		ih2          = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+		ih2->tagtype = ATOMIC_TAG;
+		ih2->tag     = LIO_CONTROL;
+		ih2->raw     = 1;
+		ih2->grp     = CFG_GET_CTRL_Q_GRP(oct_cfg);
+
+		if (sc->datasize) {
+			ih2->dlengsz = sc->datasize;
+			ih2->rs = 1;
+		}
+
+		irh            = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+		irh->opcode    = opcode;
+		irh->subcode   = subcode;
+
+		/* opcode/subcode specific parameters (ossp) */
+		irh->ossp       = irh_ossp;
+		sc->cmd.cmd2.ossp[0] = ossp0;
+		sc->cmd.cmd2.ossp[1] = ossp1;
+
+		if (sc->rdatasize) {
+			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+			rdp->pcie_port = oct->pcie_port;
+			rdp->rlen      = sc->rdatasize;
+
+			irh->rflag =  1;
+			/* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */
+			ih2->fsz   = LIO_SOFTCMDRESP_IH2;
+		} else {
+			irh->rflag =  0;
+			/* irh + ossp[0] + ossp[1] = 24 bytes */
+			ih2->fsz   = LIO_PCICMD_O2;
+		}
 	}
 }
 
@@ -625,23 +693,39 @@
 			     struct octeon_soft_command *sc)
 {
 	struct octeon_instr_ih2 *ih2;
+	struct octeon_instr_ih3 *ih3;
 	struct octeon_instr_irh *irh;
 	u32 len;
 
-	ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
-	if (ih2->dlengsz) {
-		WARN_ON(!sc->dmadptr);
-		sc->cmd.cmd2.dptr = sc->dmadptr;
+	if (OCTEON_CN23XX_PF(oct)) {
+		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
+		if (ih3->dlengsz) {
+			WARN_ON(!sc->dmadptr);
+			sc->cmd.cmd3.dptr = sc->dmadptr;
+		}
+		irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh;
+		if (irh->rflag) {
+			WARN_ON(!sc->dmarptr);
+			WARN_ON(!sc->status_word);
+			*sc->status_word = COMPLETION_WORD_INIT;
+			sc->cmd.cmd3.rptr = sc->dmarptr;
+		}
+		len = (u32)ih3->dlengsz;
+	} else {
+		ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2;
+		if (ih2->dlengsz) {
+			WARN_ON(!sc->dmadptr);
+			sc->cmd.cmd2.dptr = sc->dmadptr;
+		}
+		irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
+		if (irh->rflag) {
+			WARN_ON(!sc->dmarptr);
+			WARN_ON(!sc->status_word);
+			*sc->status_word = COMPLETION_WORD_INIT;
+			sc->cmd.cmd2.rptr = sc->dmarptr;
+		}
+		len = (u32)ih2->dlengsz;
 	}
-	irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh;
-	if (irh->rflag) {
-		WARN_ON(!sc->dmarptr);
-		WARN_ON(!sc->status_word);
-		*sc->status_word = COMPLETION_WORD_INIT;
-
-		sc->cmd.cmd2.rptr = sc->dmarptr;
-	}
-	len = (u32)ih2->dlengsz;
 
 	if (sc->wait_time)
 		sc->timeout = jiffies + sc->wait_time;
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index 709049e..be52178d 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -91,8 +91,13 @@
 
 		sc = (struct octeon_soft_command *)ordered_sc_list->
 		    head.next;
-		rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
-		rptr = sc->cmd.cmd2.rptr;
+		if (OCTEON_CN23XX_PF(octeon_dev)) {
+			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
+			rptr = sc->cmd.cmd3.rptr;
+		} else {
+			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp;
+			rptr = sc->cmd.cmd2.rptr;
+		}
 
 		status = OCTEON_REQUEST_PENDING;
 
diff --git a/drivers/net/ethernet/cavium/thunder/Makefile b/drivers/net/ethernet/cavium/thunder/Makefile
index 5c4615c..6b4d4ad 100644
--- a/drivers/net/ethernet/cavium/thunder/Makefile
+++ b/drivers/net/ethernet/cavium/thunder/Makefile
@@ -2,6 +2,7 @@
 # Makefile for Cavium's Thunder ethernet device
 #
 
+obj-$(CONFIG_THUNDER_NIC_RGX) += thunder_xcv.o
 obj-$(CONFIG_THUNDER_NIC_BGX) += thunder_bgx.o
 obj-$(CONFIG_THUNDER_NIC_PF) += nicpf.o
 obj-$(CONFIG_THUNDER_NIC_VF) += nicvf.o
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 83025bb..3042610 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -20,6 +20,17 @@
 #define	PCI_DEVICE_ID_THUNDER_NIC_VF		0xA034
 #define	PCI_DEVICE_ID_THUNDER_BGX		0xA026
 
+/* Subsystem device IDs */
+#define PCI_SUBSYS_DEVID_88XX_NIC_PF		0xA11E
+#define PCI_SUBSYS_DEVID_81XX_NIC_PF		0xA21E
+#define PCI_SUBSYS_DEVID_83XX_NIC_PF		0xA31E
+
+#define PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF	0xA11E
+#define PCI_SUBSYS_DEVID_88XX_NIC_VF		0xA134
+#define PCI_SUBSYS_DEVID_81XX_NIC_VF		0xA234
+#define PCI_SUBSYS_DEVID_83XX_NIC_VF		0xA334
+
+
 /* PCI BAR nos */
 #define	PCI_CFG_REG_BAR_NUM		0
 #define	PCI_MSIX_REG_BAR_NUM		4
@@ -41,40 +52,8 @@
 /* Max pkinds */
 #define	NIC_MAX_PKIND			16
 
-/* Rx Channels */
-/* Receive channel configuration in TNS bypass mode
- * Below is configuration in TNS bypass mode
- * BGX0-LMAC0-CHAN0 - VNIC CHAN0
- * BGX0-LMAC1-CHAN0 - VNIC CHAN16
- * ...
- * BGX1-LMAC0-CHAN0 - VNIC CHAN128
- * ...
- * BGX1-LMAC3-CHAN0 - VNIC CHAN174
- */
-#define	NIC_INTF_COUNT			2  /* Interfaces btw VNIC and TNS/BGX */
-#define	NIC_CHANS_PER_INF		128
-#define	NIC_MAX_CHANS			(NIC_INTF_COUNT * NIC_CHANS_PER_INF)
-#define	NIC_CPI_COUNT			2048 /* No of channel parse indices */
-
-/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */
-#define NIC_MAX_BGX			MAX_BGX_PER_CN88XX
-#define	NIC_CPI_PER_BGX			(NIC_CPI_COUNT / NIC_MAX_BGX)
-#define	NIC_MAX_CPI_PER_LMAC		64 /* Max when CPI_ALG is IP diffserv */
-#define	NIC_RSSI_PER_BGX		(NIC_RSSI_COUNT / NIC_MAX_BGX)
-
-/* Tx scheduling */
-#define	NIC_MAX_TL4			1024
-#define	NIC_MAX_TL4_SHAPERS		256 /* 1 shaper for 4 TL4s */
-#define	NIC_MAX_TL3			256
-#define	NIC_MAX_TL3_SHAPERS		64  /* 1 shaper for 4 TL3s */
-#define	NIC_MAX_TL2			64
-#define	NIC_MAX_TL2_SHAPERS		2  /* 1 shaper for 32 TL2s */
-#define	NIC_MAX_TL1			2
-
-/* TNS bypass mode */
-#define	NIC_TL2_PER_BGX			32
-#define	NIC_TL4_PER_BGX			(NIC_MAX_TL4 / NIC_MAX_BGX)
-#define	NIC_TL4_PER_LMAC		(NIC_MAX_TL4 / NIC_CHANS_PER_INF)
+/* Max when CPI_ALG is IP diffserv */
+#define	NIC_MAX_CPI_PER_LMAC		64
 
 /* NIC VF Interrupts */
 #define	NICVF_INTR_CQ			0
@@ -148,7 +127,6 @@
 	struct	napi_struct napi;
 };
 
-#define	NIC_RSSI_COUNT			4096 /* Total no of RSS indices */
 #define NIC_MAX_RSS_HASH_BITS		8
 #define NIC_MAX_RSS_IDR_TBL_SIZE	(1 << NIC_MAX_RSS_HASH_BITS)
 #define RSS_HASH_KEY_SIZE		5 /* 320 bit key */
@@ -273,12 +251,14 @@
 	struct net_device	*netdev;
 	struct pci_dev		*pdev;
 	void __iomem		*reg_base;
+#define	MAX_QUEUES_PER_QSET			8
 	struct queue_set	*qs;
 	struct nicvf_cq_poll	*napi[8];
 	u8			vf_id;
 	u8			sqs_id;
 	bool                    sqs_mode;
 	bool			hw_tso;
+	bool			t88;
 
 	/* Receive buffer alloc */
 	u32			rb_page_offset;
@@ -325,7 +305,7 @@
 	bool			msix_enabled;
 	u8			num_vec;
 	struct msix_entry	msix_entries[NIC_VF_MSIX_VECTORS];
-	char			irq_name[NIC_VF_MSIX_VECTORS][20];
+	char			irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15];
 	bool			irq_allocated[NIC_VF_MSIX_VECTORS];
 	cpumask_var_t		affinity_mask[NIC_VF_MSIX_VECTORS];
 
@@ -368,6 +348,7 @@
 #define	NIC_MBOX_MSG_PNICVF_PTR		0x14	/* Get primary qset nicvf ptr */
 #define	NIC_MBOX_MSG_SNICVF_PTR		0x15	/* Send sqet nicvf ptr to PVF */
 #define	NIC_MBOX_MSG_LOOPBACK		0x16	/* Set interface in loopback */
+#define	NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17	/* Reset statistics counters */
 #define	NIC_MBOX_MSG_CFG_DONE		0xF0	/* VF configuration done */
 #define	NIC_MBOX_MSG_SHUTDOWN		0xF1	/* VF is being shutdown */
 
@@ -484,6 +465,31 @@
 	bool  enable;
 };
 
+/* Reset statistics counters */
+struct reset_stat_cfg {
+	u8    msg;
+	/* Bitmap to select NIC_PF_VNIC(vf_id)_RX_STAT(0..13) */
+	u16   rx_stat_mask;
+	/* Bitmap to select NIC_PF_VNIC(vf_id)_TX_STAT(0..4) */
+	u8    tx_stat_mask;
+	/* Bitmap to select NIC_PF_QS(0..127)_RQ(0..7)_STAT(0..1)
+	 * bit14, bit15 NIC_PF_QS(vf_id)_RQ7_STAT(0..1)
+	 * bit12, bit13 NIC_PF_QS(vf_id)_RQ6_STAT(0..1)
+	 * ..
+	 * bit2, bit3 NIC_PF_QS(vf_id)_RQ1_STAT(0..1)
+	 * bit0, bit1 NIC_PF_QS(vf_id)_RQ0_STAT(0..1)
+	 */
+	u16   rq_stat_mask;
+	/* Bitmap to select NIC_PF_QS(0..127)_SQ(0..7)_STAT(0..1)
+	 * bit14, bit15 NIC_PF_QS(vf_id)_SQ7_STAT(0..1)
+	 * bit12, bit13 NIC_PF_QS(vf_id)_SQ6_STAT(0..1)
+	 * ..
+	 * bit2, bit3 NIC_PF_QS(vf_id)_SQ1_STAT(0..1)
+	 * bit0, bit1 NIC_PF_QS(vf_id)_SQ0_STAT(0..1)
+	 */
+	u16   sq_stat_mask;
+};
+
 /* 128 bit shared memory between PF and each VF */
 union nic_mbx {
 	struct { u8 msg; }	msg;
@@ -501,6 +507,7 @@
 	struct sqs_alloc        sqs_alloc;
 	struct nicvf_ptr	nicvf;
 	struct set_loopback	lbk;
+	struct reset_stat_cfg	reset_stat;
 };
 
 #define NIC_NODE_ID_MASK	0x03
@@ -514,7 +521,14 @@
 
 static inline bool pass1_silicon(struct pci_dev *pdev)
 {
-	return pdev->revision < 8;
+	return (pdev->revision < 8) &&
+		(pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
+}
+
+static inline bool pass2_silicon(struct pci_dev *pdev)
+{
+	return (pdev->revision >= 8) &&
+		(pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF);
 }
 
 int nicvf_set_real_num_queues(struct net_device *netdev,
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 16ed203..2bbf4cb 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -20,8 +20,25 @@
 #define DRV_NAME	"thunder-nic"
 #define DRV_VERSION	"1.0"
 
+struct hw_info {
+	u8		bgx_cnt;
+	u8		chans_per_lmac;
+	u8		chans_per_bgx; /* Rx/Tx chans */
+	u8		chans_per_rgx;
+	u8		chans_per_lbk;
+	u16		cpi_cnt;
+	u16		rssi_cnt;
+	u16		rss_ind_tbl_size;
+	u16		tl4_cnt;
+	u16		tl3_cnt;
+	u8		tl2_cnt;
+	u8		tl1_cnt;
+	bool		tl1_per_bgx; /* TL1 per BGX or per LMAC */
+};
+
 struct nicpf {
 	struct pci_dev		*pdev;
+	struct hw_info          *hw;
 	u8			node;
 	unsigned int		flags;
 	u8			num_vf_en;      /* No of VF enabled */
@@ -36,22 +53,22 @@
 #define	NIC_SET_VF_LMAC_MAP(bgx, lmac)	(((bgx & 0xF) << 4) | (lmac & 0xF))
 #define	NIC_GET_BGX_FROM_VF_LMAC_MAP(map)	((map >> 4) & 0xF)
 #define	NIC_GET_LMAC_FROM_VF_LMAC_MAP(map)	(map & 0xF)
-	u8			vf_lmac_map[MAX_LMAC];
+	u8			*vf_lmac_map;
 	struct delayed_work     dwork;
 	struct workqueue_struct *check_link;
-	u8			link[MAX_LMAC];
-	u8			duplex[MAX_LMAC];
-	u32			speed[MAX_LMAC];
+	u8			*link;
+	u8			*duplex;
+	u32			*speed;
 	u16			cpi_base[MAX_NUM_VFS_SUPPORTED];
 	u16			rssi_base[MAX_NUM_VFS_SUPPORTED];
-	u16			rss_ind_tbl_size;
 	bool			mbx_lock[MAX_NUM_VFS_SUPPORTED];
 
 	/* MSI-X */
 	bool			msix_enabled;
 	u8			num_vec;
-	struct msix_entry	msix_entries[NIC_PF_MSIX_VECTORS];
+	struct msix_entry	*msix_entries;
 	bool			irq_allocated[NIC_PF_MSIX_VECTORS];
+	char			irq_name[NIC_PF_MSIX_VECTORS][20];
 };
 
 /* Supported devices */
@@ -89,9 +106,22 @@
 /* PF -> VF mailbox communication APIs */
 static void nic_enable_mbx_intr(struct nicpf *nic)
 {
-	/* Enable mailbox interrupt for all 128 VFs */
-	nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull);
-	nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull);
+	int vf_cnt = pci_sriov_get_totalvfs(nic->pdev);
+
+#define INTR_MASK(vfs) ((vfs < 64) ? (BIT_ULL(vfs) - 1) : (~0ull))
+
+	/* Clear it, to avoid spurious interrupts (if any) */
+	nic_reg_write(nic, NIC_PF_MAILBOX_INT, INTR_MASK(vf_cnt));
+
+	/* Enable mailbox interrupt for all VFs */
+	nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, INTR_MASK(vf_cnt));
+	/* One mailbox intr enable reg per 64 VFs */
+	if (vf_cnt > 64) {
+		nic_reg_write(nic, NIC_PF_MAILBOX_INT + sizeof(u64),
+			      INTR_MASK(vf_cnt - 64));
+		nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64),
+			      INTR_MASK(vf_cnt - 64));
+	}
 }
 
 static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg)
@@ -144,7 +174,7 @@
 
 	mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE;
 
-	if (vf < MAX_LMAC) {
+	if (vf < nic->num_vf_en) {
 		bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
 		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
 
@@ -155,7 +185,7 @@
 	mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false;
 	mbx.nic_cfg.node_id = nic->node;
 
-	mbx.nic_cfg.loopback_supported = vf < MAX_LMAC;
+	mbx.nic_cfg.loopback_supported = vf < nic->num_vf_en;
 
 	nic_send_msg_to_vf(nic, vf, &mbx);
 }
@@ -248,14 +278,27 @@
 /* Set minimum transmit packet size */
 static void nic_set_tx_pkt_pad(struct nicpf *nic, int size)
 {
-	int lmac;
+	int lmac, max_lmac;
+	u16 sdevid;
 	u64 lmac_cfg;
 
-	/* Max value that can be set is 60 */
-	if (size > 60)
-		size = 60;
+	/* There is a issue in HW where-in while sending GSO sized
+	 * pkts as part of TSO, if pkt len falls below this size
+	 * NIC will zero PAD packet and also updates IP total length.
+	 * Hence set this value to lessthan min pkt size of MAC+IP+TCP
+	 * headers, BGX will do the padding to transmit 64 byte pkt.
+	 */
+	if (size > 52)
+		size = 52;
 
-	for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) {
+	pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+	/* 81xx's RGX has only one LMAC */
+	if (sdevid == PCI_SUBSYS_DEVID_81XX_NIC_PF)
+		max_lmac = ((nic->hw->bgx_cnt - 1) * MAX_LMAC_PER_BGX) + 1;
+	else
+		max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX;
+
+	for (lmac = 0; lmac < max_lmac; lmac++) {
 		lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3));
 		lmac_cfg &= ~(0xF << 2);
 		lmac_cfg |= ((size / 4) << 2);
@@ -275,7 +318,7 @@
 
 	nic->num_vf_en = 0;
 
-	for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) {
+	for (bgx = 0; bgx < nic->hw->bgx_cnt; bgx++) {
 		if (!(bgx_map & (1 << bgx)))
 			continue;
 		lmac_cnt = bgx_get_lmac_count(nic->node, bgx);
@@ -295,28 +338,125 @@
 			nic_reg_write(nic,
 				      NIC_PF_LMAC_0_7_CREDIT + (lmac * 8),
 				      lmac_credit);
+
+		/* On CN81XX there are only 8 VFs but max possible no of
+		 * interfaces are 9.
+		 */
+		if (nic->num_vf_en >= pci_sriov_get_totalvfs(nic->pdev)) {
+			nic->num_vf_en = pci_sriov_get_totalvfs(nic->pdev);
+			break;
+		}
 	}
 }
 
+static void nic_free_lmacmem(struct nicpf *nic)
+{
+	kfree(nic->vf_lmac_map);
+	kfree(nic->link);
+	kfree(nic->duplex);
+	kfree(nic->speed);
+}
+
+static int nic_get_hw_info(struct nicpf *nic)
+{
+	u8 max_lmac;
+	u16 sdevid;
+	struct hw_info *hw = nic->hw;
+
+	pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+
+	switch (sdevid) {
+	case PCI_SUBSYS_DEVID_88XX_NIC_PF:
+		hw->bgx_cnt = MAX_BGX_PER_CN88XX;
+		hw->chans_per_lmac = 16;
+		hw->chans_per_bgx = 128;
+		hw->cpi_cnt = 2048;
+		hw->rssi_cnt = 4096;
+		hw->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
+		hw->tl3_cnt = 256;
+		hw->tl2_cnt = 64;
+		hw->tl1_cnt = 2;
+		hw->tl1_per_bgx = true;
+		break;
+	case PCI_SUBSYS_DEVID_81XX_NIC_PF:
+		hw->bgx_cnt = MAX_BGX_PER_CN81XX;
+		hw->chans_per_lmac = 8;
+		hw->chans_per_bgx = 32;
+		hw->chans_per_rgx = 8;
+		hw->chans_per_lbk = 24;
+		hw->cpi_cnt = 512;
+		hw->rssi_cnt = 256;
+		hw->rss_ind_tbl_size = 32; /* Max RSSI / Max interfaces */
+		hw->tl3_cnt = 64;
+		hw->tl2_cnt = 16;
+		hw->tl1_cnt = 10;
+		hw->tl1_per_bgx = false;
+		break;
+	case PCI_SUBSYS_DEVID_83XX_NIC_PF:
+		hw->bgx_cnt = MAX_BGX_PER_CN83XX;
+		hw->chans_per_lmac = 8;
+		hw->chans_per_bgx = 32;
+		hw->chans_per_lbk = 64;
+		hw->cpi_cnt = 2048;
+		hw->rssi_cnt = 1024;
+		hw->rss_ind_tbl_size = 64; /* Max RSSI / Max interfaces */
+		hw->tl3_cnt = 256;
+		hw->tl2_cnt = 64;
+		hw->tl1_cnt = 18;
+		hw->tl1_per_bgx = false;
+		break;
+	}
+	hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev);
+
+	/* Allocate memory for LMAC tracking elements */
+	max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX;
+	nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+	if (!nic->vf_lmac_map)
+		goto error;
+	nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+	if (!nic->link)
+		goto error;
+	nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL);
+	if (!nic->duplex)
+		goto error;
+	nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL);
+	if (!nic->speed)
+		goto error;
+	return 0;
+
+error:
+	nic_free_lmacmem(nic);
+	return -ENOMEM;
+}
+
 #define BGX0_BLOCK 8
 #define BGX1_BLOCK 9
 
-static void nic_init_hw(struct nicpf *nic)
+static int nic_init_hw(struct nicpf *nic)
 {
-	int i;
+	int i, err;
 	u64 cqm_cfg;
 
+	/* Get HW capability info */
+	err = nic_get_hw_info(nic);
+	if (err)
+		return err;
+
 	/* Enable NIC HW block */
 	nic_reg_write(nic, NIC_PF_CFG, 0x3);
 
 	/* Enable backpressure */
 	nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03);
 
-	/* Disable TNS mode on both interfaces */
-	nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
-		      (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
-	nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
-		      (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+	/* TNS and TNS bypass modes are present only on 88xx */
+	if (nic->pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF) {
+		/* Disable TNS mode on both interfaces */
+		nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG,
+			      (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK);
+		nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8),
+			      (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK);
+	}
+
 	nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG,
 		      (1ULL << 63) | BGX0_BLOCK);
 	nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8),
@@ -346,11 +486,14 @@
 	cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG);
 	if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL)
 		nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL);
+
+	return 0;
 }
 
 /* Channel parse index configuration */
 static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg)
 {
+	struct hw_info *hw = nic->hw;
 	u32 vnic, bgx, lmac, chan;
 	u32 padd, cpi_count = 0;
 	u64 cpi_base, cpi, rssi_base, rssi;
@@ -360,9 +503,9 @@
 	bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
 	lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]);
 
-	chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
-	cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX);
-	rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX);
+	chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
+	cpi_base = vnic * NIC_MAX_CPI_PER_LMAC;
+	rssi_base = vnic * hw->rss_ind_tbl_size;
 
 	/* Rx channel configuration */
 	nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3),
@@ -434,7 +577,7 @@
 	msg = (u64 *)&mbx;
 
 	mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
-	mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size;
+	mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size;
 	nic_send_msg_to_vf(nic, vf, &mbx);
 }
 
@@ -481,7 +624,7 @@
 /* 4 level transmit side scheduler configutation
  * for TNS bypass mode
  *
- * Sample configuration for SQ0
+ * Sample configuration for SQ0 on 88xx
  * VNIC0-SQ0 -> TL4(0)   -> TL3[0]   -> TL2[0]  -> TL1[0] -> BGX0
  * VNIC1-SQ0 -> TL4(8)   -> TL3[2]   -> TL2[0]  -> TL1[0] -> BGX0
  * VNIC2-SQ0 -> TL4(16)  -> TL3[4]   -> TL2[1]  -> TL1[0] -> BGX0
@@ -494,6 +637,7 @@
 static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic,
 			       struct sq_cfg_msg *sq)
 {
+	struct hw_info *hw = nic->hw;
 	u32 bgx, lmac, chan;
 	u32 tl2, tl3, tl4;
 	u32 rr_quantum;
@@ -512,21 +656,28 @@
 	/* 24 bytes for FCS, IPG and preamble */
 	rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4);
 
-	if (!sq->sqs_mode) {
-		tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX);
-	} else {
-		for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
-			if (nic->vf_sqs[pqs_vnic][svf] == vnic)
-				break;
+	/* For 88xx 0-511 TL4 transmits via BGX0 and
+	 * 512-1023 TL4s transmit via BGX1.
+	 */
+	if (hw->tl1_per_bgx) {
+		tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt);
+		if (!sq->sqs_mode) {
+			tl4 += (lmac * MAX_QUEUES_PER_QSET);
+		} else {
+			for (svf = 0; svf < MAX_SQS_PER_VF; svf++) {
+				if (nic->vf_sqs[pqs_vnic][svf] == vnic)
+					break;
+			}
+			tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET);
+			tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF);
+			tl4 += (svf * MAX_QUEUES_PER_QSET);
 		}
-		tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC);
-		tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF);
-		tl4 += (svf * NIC_TL4_PER_LMAC);
-		tl4 += (bgx * NIC_TL4_PER_BGX);
+	} else {
+		tl4 = (vnic * MAX_QUEUES_PER_QSET);
 	}
 	tl4 += sq_idx;
 
-	tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3);
+	tl3 = tl4 / (hw->tl4_cnt / hw->tl3_cnt);
 	nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 |
 		      ((u64)vnic << NIC_QS_ID_SHIFT) |
 		      ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4);
@@ -534,8 +685,19 @@
 		      ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum);
 
 	nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum);
-	chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF);
-	nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+
+	/* On 88xx 0-127 channels are for BGX0 and
+	 * 127-255 channels for BGX1.
+	 *
+	 * On 81xx/83xx TL3_CHAN reg should be configured with channel
+	 * within LMAC i.e 0-7 and not the actual channel number like on 88xx
+	 */
+	chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx);
+	if (hw->tl1_per_bgx)
+		nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan);
+	else
+		nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), 0);
+
 	/* Enable backpressure on the channel */
 	nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1);
 
@@ -544,6 +706,16 @@
 	nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum);
 	/* No priorities as of now */
 	nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00);
+
+	/* Unlike 88xx where TL2s 0-31 transmits to TL1 '0' and rest to TL1 '1'
+	 * on 81xx/83xx TL2 needs to be configured to transmit to one of the
+	 * possible LMACs.
+	 *
+	 * This register doesn't exist on 88xx.
+	 */
+	if (!hw->tl1_per_bgx)
+		nic_reg_write(nic, NIC_PF_TL2_LMAC | (tl2 << 3),
+			      lmac + (bgx * MAX_LMAC_PER_BGX));
 }
 
 /* Send primary nicvf pointer to secondary QS's VF */
@@ -615,7 +787,7 @@
 {
 	int bgx_idx, lmac_idx;
 
-	if (lbk->vf_id > MAX_LMAC)
+	if (lbk->vf_id >= nic->num_vf_en)
 		return -1;
 
 	bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]);
@@ -626,6 +798,67 @@
 	return 0;
 }
 
+/* Reset statistics counters */
+static int nic_reset_stat_counters(struct nicpf *nic,
+				   int vf, struct reset_stat_cfg *cfg)
+{
+	int i, stat, qnum;
+	u64 reg_addr;
+
+	for (i = 0; i < RX_STATS_ENUM_LAST; i++) {
+		if (cfg->rx_stat_mask & BIT(i)) {
+			reg_addr = NIC_PF_VNIC_0_127_RX_STAT_0_13 |
+				   (vf << NIC_QS_ID_SHIFT) |
+				   (i << 3);
+			nic_reg_write(nic, reg_addr, 0);
+		}
+	}
+
+	for (i = 0; i < TX_STATS_ENUM_LAST; i++) {
+		if (cfg->tx_stat_mask & BIT(i)) {
+			reg_addr = NIC_PF_VNIC_0_127_TX_STAT_0_4 |
+				   (vf << NIC_QS_ID_SHIFT) |
+				   (i << 3);
+			nic_reg_write(nic, reg_addr, 0);
+		}
+	}
+
+	for (i = 0; i <= 15; i++) {
+		qnum = i >> 1;
+		stat = i & 1 ? 1 : 0;
+		reg_addr = (vf << NIC_QS_ID_SHIFT) |
+			   (qnum << NIC_Q_NUM_SHIFT) | (stat << 3);
+		if (cfg->rq_stat_mask & BIT(i)) {
+			reg_addr |= NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1;
+			nic_reg_write(nic, reg_addr, 0);
+		}
+		if (cfg->sq_stat_mask & BIT(i)) {
+			reg_addr |= NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1;
+			nic_reg_write(nic, reg_addr, 0);
+		}
+	}
+	return 0;
+}
+
+static void nic_enable_tunnel_parsing(struct nicpf *nic, int vf)
+{
+	u64 prot_def = (IPV6_PROT << 32) | (IPV4_PROT << 16) | ET_PROT;
+	u64 vxlan_prot_def = (IPV6_PROT_DEF << 32) |
+			      (IPV4_PROT_DEF) << 16 | ET_PROT_DEF;
+
+	/* Configure tunnel parsing parameters */
+	nic_reg_write(nic, NIC_PF_RX_GENEVE_DEF,
+		      (1ULL << 63 | UDP_GENEVE_PORT_NUM));
+	nic_reg_write(nic, NIC_PF_RX_GENEVE_PROT_DEF,
+		      ((7ULL << 61) | prot_def));
+	nic_reg_write(nic, NIC_PF_RX_NVGRE_PROT_DEF,
+		      ((7ULL << 61) | prot_def));
+	nic_reg_write(nic, NIC_PF_RX_VXLAN_DEF_0_1,
+		      ((1ULL << 63) | UDP_VXLAN_PORT_NUM));
+	nic_reg_write(nic, NIC_PF_RX_VXLAN_PROT_DEF,
+		      ((0xfULL << 60) | vxlan_prot_def));
+}
+
 static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
 {
 	int bgx, lmac;
@@ -664,18 +897,17 @@
 		mbx_addr += sizeof(u64);
 	}
 
-	dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n",
+	dev_dbg(&nic->pdev->dev, "%s: Mailbox msg 0x%02x from VF%d\n",
 		__func__, mbx.msg.msg, vf);
 	switch (mbx.msg.msg) {
 	case NIC_MBOX_MSG_READY:
 		nic_mbx_send_ready(nic, vf);
-		if (vf < MAX_LMAC) {
+		if (vf < nic->num_vf_en) {
 			nic->link[vf] = 0;
 			nic->duplex[vf] = 0;
 			nic->speed[vf] = 0;
 		}
-		ret = 1;
-		break;
+		goto unlock;
 	case NIC_MBOX_MSG_QS_CFG:
 		reg_addr = NIC_PF_QSET_0_127_CFG |
 			   (mbx.qs.num << NIC_QS_ID_SHIFT);
@@ -693,6 +925,15 @@
 			   (mbx.rq.qs_num << NIC_QS_ID_SHIFT) |
 			   (mbx.rq.rq_num << NIC_Q_NUM_SHIFT);
 		nic_reg_write(nic, reg_addr, mbx.rq.cfg);
+		/* Enable CQE_RX2_S extension in CQE_RX descriptor.
+		 * This gets appended by default on 81xx/83xx chips,
+		 * for consistency enabling the same on 88xx pass2
+		 * where this is introduced.
+		 */
+		if (pass2_silicon(nic->pdev))
+			nic_reg_write(nic, NIC_PF_RX_CFG, 0x01);
+		if (!pass1_silicon(nic->pdev))
+			nic_enable_tunnel_parsing(nic, vf);
 		break;
 	case NIC_MBOX_MSG_RQ_BP_CFG:
 		reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG |
@@ -717,8 +958,10 @@
 		nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq);
 		break;
 	case NIC_MBOX_MSG_SET_MAC:
-		if (vf >= nic->num_vf_en)
+		if (vf >= nic->num_vf_en) {
+			ret = -1; /* NACK */
 			break;
+		}
 		lmac = mbx.mac.vf_id;
 		bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
 		lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]);
@@ -767,25 +1010,38 @@
 	case NIC_MBOX_MSG_LOOPBACK:
 		ret = nic_config_loopback(nic, &mbx.lbk);
 		break;
+	case NIC_MBOX_MSG_RESET_STAT_COUNTER:
+		ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
+		break;
 	default:
 		dev_err(&nic->pdev->dev,
 			"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
 		break;
 	}
 
-	if (!ret)
+	if (!ret) {
 		nic_mbx_send_ack(nic, vf);
-	else if (mbx.msg.msg != NIC_MBOX_MSG_READY)
+	} else if (mbx.msg.msg != NIC_MBOX_MSG_READY) {
+		dev_err(&nic->pdev->dev, "NACK for MBOX 0x%02x from VF %d\n",
+			mbx.msg.msg, vf);
 		nic_mbx_send_nack(nic, vf);
+	}
 unlock:
 	nic->mbx_lock[vf] = false;
 }
 
-static void nic_mbx_intr_handler (struct nicpf *nic, int mbx)
+static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq)
 {
+	struct nicpf *nic = (struct nicpf *)nic_irq;
+	int mbx;
 	u64 intr;
 	u8  vf, vf_per_mbx_reg = 64;
 
+	if (irq == nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector)
+		mbx = 0;
+	else
+		mbx = 1;
+
 	intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3));
 	dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr);
 	for (vf = 0; vf < vf_per_mbx_reg; vf++) {
@@ -797,23 +1053,6 @@
 			nic_clear_mbx_intr(nic, vf, mbx);
 		}
 	}
-}
-
-static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq)
-{
-	struct nicpf *nic = (struct nicpf *)nic_irq;
-
-	nic_mbx_intr_handler(nic, 0);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq)
-{
-	struct nicpf *nic = (struct nicpf *)nic_irq;
-
-	nic_mbx_intr_handler(nic, 1);
-
 	return IRQ_HANDLED;
 }
 
@@ -821,7 +1060,13 @@
 {
 	int i, ret;
 
-	nic->num_vec = NIC_PF_MSIX_VECTORS;
+	nic->num_vec = pci_msix_vec_count(nic->pdev);
+
+	nic->msix_entries = kmalloc_array(nic->num_vec,
+					  sizeof(struct msix_entry),
+					  GFP_KERNEL);
+	if (!nic->msix_entries)
+		return -ENOMEM;
 
 	for (i = 0; i < nic->num_vec; i++)
 		nic->msix_entries[i].entry = i;
@@ -829,8 +1074,9 @@
 	ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec);
 	if (ret) {
 		dev_err(&nic->pdev->dev,
-			"Request for #%d msix vectors failed\n",
-			   nic->num_vec);
+			"Request for #%d msix vectors failed, returned %d\n",
+			   nic->num_vec, ret);
+		kfree(nic->msix_entries);
 		return ret;
 	}
 
@@ -842,6 +1088,7 @@
 {
 	if (nic->msix_enabled) {
 		pci_disable_msix(nic->pdev);
+		kfree(nic->msix_entries);
 		nic->msix_enabled = 0;
 		nic->num_vec = 0;
 	}
@@ -860,27 +1107,26 @@
 
 static int nic_register_interrupts(struct nicpf *nic)
 {
-	int ret;
+	int i, ret;
 
 	/* Enable MSI-X */
 	ret = nic_enable_msix(nic);
 	if (ret)
 		return ret;
 
-	/* Register mailbox interrupt handlers */
-	ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector,
-			  nic_mbx0_intr_handler, 0, "NIC Mbox0", nic);
-	if (ret)
-		goto fail;
+	/* Register mailbox interrupt handler */
+	for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) {
+		sprintf(nic->irq_name[i],
+			"NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0));
 
-	nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true;
+		ret = request_irq(nic->msix_entries[i].vector,
+				  nic_mbx_intr_handler, 0,
+				  nic->irq_name[i], nic);
+		if (ret)
+			goto fail;
 
-	ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector,
-			  nic_mbx1_intr_handler, 0, "NIC Mbox1", nic);
-	if (ret)
-		goto fail;
-
-	nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true;
+		nic->irq_allocated[i] = true;
+	}
 
 	/* Enable mailbox interrupt */
 	nic_enable_mbx_intr(nic);
@@ -889,6 +1135,7 @@
 fail:
 	dev_err(&nic->pdev->dev, "Request irq failed\n");
 	nic_free_all_interrupts(nic);
+	nic_disable_msix(nic);
 	return ret;
 }
 
@@ -903,6 +1150,12 @@
 	int pos, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE;
 	u16 total_vf;
 
+	/* Secondary Qsets are needed only if CPU count is
+	 * morethan MAX_QUEUES_PER_QSET.
+	 */
+	if (num_online_cpus() <= MAX_QUEUES_PER_QSET)
+		return 0;
+
 	/* Check if its a multi-node environment */
 	if (nr_node_ids > 1)
 		sqs_per_vf = MAX_SQS_PER_VF;
@@ -1008,6 +1261,12 @@
 	if (!nic)
 		return -ENOMEM;
 
+	nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL);
+	if (!nic->hw) {
+		devm_kfree(dev, nic);
+		return -ENOMEM;
+	}
+
 	pci_set_drvdata(pdev, nic);
 
 	nic->pdev = pdev;
@@ -1047,13 +1306,12 @@
 
 	nic->node = nic_get_node_id(pdev);
 
-	nic_set_lmac_vf_mapping(nic);
-
 	/* Initialize hardware */
-	nic_init_hw(nic);
+	err = nic_init_hw(nic);
+	if (err)
+		goto err_release_regions;
 
-	/* Set RSS TBL size for each VF */
-	nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE;
+	nic_set_lmac_vf_mapping(nic);
 
 	/* Register interrupts */
 	err = nic_register_interrupts(nic);
@@ -1086,6 +1344,9 @@
 err_release_regions:
 	pci_release_regions(pdev);
 err_disable_device:
+	nic_free_lmacmem(nic);
+	devm_kfree(dev, nic->hw);
+	devm_kfree(dev, nic);
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	return err;
@@ -1106,6 +1367,11 @@
 
 	nic_unregister_interrupts(nic);
 	pci_release_regions(pdev);
+
+	nic_free_lmacmem(nic);
+	devm_kfree(&pdev->dev, nic->hw);
+	devm_kfree(&pdev->dev, nic);
+
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 }
diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h
index afb10e3..edf779f 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_reg.h
+++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h
@@ -36,6 +36,20 @@
 #define   NIC_PF_MAILBOX_ENA_W1C		(0x0450)
 #define   NIC_PF_MAILBOX_ENA_W1S		(0x0470)
 #define   NIC_PF_RX_ETYPE_0_7			(0x0500)
+#define   NIC_PF_RX_GENEVE_DEF			(0x0580)
+#define    UDP_GENEVE_PORT_NUM				0x17C1ULL
+#define   NIC_PF_RX_GENEVE_PROT_DEF		(0x0588)
+#define    IPV6_PROT					0x86DDULL
+#define    IPV4_PROT					0x800ULL
+#define    ET_PROT					0x6558ULL
+#define   NIC_PF_RX_NVGRE_PROT_DEF		(0x0598)
+#define   NIC_PF_RX_VXLAN_DEF_0_1		(0x05A0)
+#define    UDP_VXLAN_PORT_NUM				0x12B5
+#define   NIC_PF_RX_VXLAN_PROT_DEF		(0x05B0)
+#define    IPV6_PROT_DEF				0x2ULL
+#define    IPV4_PROT_DEF				0x1ULL
+#define    ET_PROT_DEF					0x3ULL
+#define   NIC_PF_RX_CFG				(0x05D0)
 #define   NIC_PF_PKIND_0_15_CFG			(0x0600)
 #define   NIC_PF_ECC0_FLIP0			(0x1000)
 #define   NIC_PF_ECC1_FLIP0			(0x1008)
@@ -103,6 +117,7 @@
 #define   NIC_PF_SW_SYNC_RX_DONE		(0x490008)
 #define   NIC_PF_TL2_0_63_CFG			(0x500000)
 #define   NIC_PF_TL2_0_63_PRI			(0x520000)
+#define   NIC_PF_TL2_LMAC			(0x540000)
 #define   NIC_PF_TL2_0_63_SH_STATUS		(0x580000)
 #define   NIC_PF_TL3A_0_63_CFG			(0x5F0000)
 #define   NIC_PF_TL3_0_255_CFG			(0x600000)
@@ -170,7 +185,6 @@
 #define   NIC_QSET_SQ_0_7_DOOR			(0x010838)
 #define   NIC_QSET_SQ_0_7_STATUS		(0x010840)
 #define   NIC_QSET_SQ_0_7_DEBUG			(0x010848)
-#define   NIC_QSET_SQ_0_7_CNM_CHG		(0x010860)
 #define   NIC_QSET_SQ_0_7_STAT_0_1		(0x010900)
 
 #define   NIC_QSET_RBDR_0_1_CFG			(0x010C00)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index d2d8ef2..ad4fddb 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -382,7 +382,10 @@
 		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q);
 		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q);
 		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q);
-		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CNM_CHG, q);
+		/* Padding, was NIC_QSET_SQ_0_7_CNM_CHG, which
+		 * produces bus errors when read
+		 */
+		p[i++] = 0;
 		p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q);
 		reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3);
 		p[i++] = nicvf_queue_reg_read(nic, reg_offset, q);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index a19e73f..45a13f7 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -29,10 +29,20 @@
 static const struct pci_device_id nicvf_id_table[] = {
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
 			 PCI_DEVICE_ID_THUNDER_NIC_VF,
-			 PCI_VENDOR_ID_CAVIUM, 0xA134) },
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_88XX_NIC_VF) },
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
 			 PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF,
-			 PCI_VENDOR_ID_CAVIUM, 0xA11E) },
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+			 PCI_DEVICE_ID_THUNDER_NIC_VF,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_81XX_NIC_VF) },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM,
+			 PCI_DEVICE_ID_THUNDER_NIC_VF,
+			 PCI_VENDOR_ID_CAVIUM,
+			 PCI_SUBSYS_DEVID_83XX_NIC_VF) },
 	{ 0, }  /* end of table */
 };
 
@@ -134,15 +144,19 @@
 
 	/* Wait for previous message to be acked, timeout 2sec */
 	while (!nic->pf_acked) {
-		if (nic->pf_nacked)
+		if (nic->pf_nacked) {
+			netdev_err(nic->netdev,
+				   "PF NACK to mbox msg 0x%02x from VF%d\n",
+				   (mbx->msg.msg & 0xFF), nic->vf_id);
 			return -EINVAL;
+		}
 		msleep(sleep);
 		if (nic->pf_acked)
 			break;
 		timeout -= sleep;
 		if (!timeout) {
 			netdev_err(nic->netdev,
-				   "PF didn't ack to mbox msg %d from VF%d\n",
+				   "PF didn't ACK to mbox msg 0x%02x from VF%d\n",
 				   (mbx->msg.msg & 0xFF), nic->vf_id);
 			return -EBUSY;
 		}
@@ -352,13 +366,7 @@
 
 	rss->enable = true;
 
-	/* Using the HW reset value for now */
-	rss->key[0] = 0xFEED0BADFEED0BADULL;
-	rss->key[1] = 0xFEED0BADFEED0BADULL;
-	rss->key[2] = 0xFEED0BADFEED0BADULL;
-	rss->key[3] = 0xFEED0BADFEED0BADULL;
-	rss->key[4] = 0xFEED0BADFEED0BADULL;
-
+	netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64));
 	nicvf_set_rss_key(nic);
 
 	rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
@@ -507,12 +515,15 @@
 
 static void nicvf_snd_pkt_handler(struct net_device *netdev,
 				  struct cmp_queue *cq,
-				  struct cqe_send_t *cqe_tx, int cqe_type)
+				  struct cqe_send_t *cqe_tx,
+				  int cqe_type, int budget,
+				  unsigned int *tx_pkts, unsigned int *tx_bytes)
 {
 	struct sk_buff *skb = NULL;
 	struct nicvf *nic = netdev_priv(netdev);
 	struct snd_queue *sq;
 	struct sq_hdr_subdesc *hdr;
+	struct sq_hdr_subdesc *tso_sqe;
 
 	sq = &nic->qs->sq[cqe_tx->sq_idx];
 
@@ -527,17 +538,23 @@
 
 	nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
 	skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
-	/* For TSO offloaded packets only one SQE will have a valid SKB */
 	if (skb) {
+		/* Check for dummy descriptor used for HW TSO offload on 88xx */
+		if (hdr->dont_send) {
+			/* Get actual TSO descriptors and free them */
+			tso_sqe =
+			 (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2);
+			nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1);
+		}
 		nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
 		prefetch(skb);
-		dev_consume_skb_any(skb);
+		(*tx_pkts)++;
+		*tx_bytes += skb->len;
+		napi_consume_skb(skb, budget);
 		sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL;
 	} else {
-		/* In case of HW TSO, HW sends a CQE for each segment of a TSO
-		 * packet instead of a single CQE for the whole TSO packet
-		 * transmitted. Each of this CQE points to the same SQE, so
-		 * avoid freeing same SQE multiple times.
+		/* In case of SW TSO on 88xx, only last segment will have
+		 * a SKB attached, so just free SQEs here.
 		 */
 		if (!nic->hw_tso)
 			nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1);
@@ -648,6 +665,7 @@
 	struct cmp_queue *cq = &qs->cq[cq_idx];
 	struct cqe_rx_t *cq_desc;
 	struct netdev_queue *txq;
+	unsigned int tx_pkts = 0, tx_bytes = 0;
 
 	spin_lock_bh(&cq->lock);
 loop:
@@ -686,7 +704,8 @@
 		break;
 		case CQE_TYPE_SEND:
 			nicvf_snd_pkt_handler(netdev, cq,
-					      (void *)cq_desc, CQE_TYPE_SEND);
+					      (void *)cq_desc, CQE_TYPE_SEND,
+					      budget, &tx_pkts, &tx_bytes);
 			tx_done++;
 		break;
 		case CQE_TYPE_INVALID:
@@ -715,6 +734,9 @@
 		netdev = nic->pnicvf->netdev;
 		txq = netdev_get_tx_queue(netdev,
 					  nicvf_netdev_qidx(nic, cq_idx));
+		if (tx_pkts)
+			netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
+
 		nic = nic->pnicvf;
 		if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
 			netif_tx_start_queue(txq);
@@ -928,16 +950,19 @@
 	int vector;
 
 	for_each_cq_irq(irq)
-		sprintf(nic->irq_name[irq], "NICVF%d CQ%d",
-			nic->vf_id, irq);
+		sprintf(nic->irq_name[irq], "%s-rxtx-%d",
+			nic->pnicvf->netdev->name,
+			nicvf_netdev_qidx(nic, irq));
 
 	for_each_sq_irq(irq)
-		sprintf(nic->irq_name[irq], "NICVF%d SQ%d",
-			nic->vf_id, irq - NICVF_INTR_ID_SQ);
+		sprintf(nic->irq_name[irq], "%s-sq-%d",
+			nic->pnicvf->netdev->name,
+			nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ));
 
 	for_each_rbdr_irq(irq)
-		sprintf(nic->irq_name[irq], "NICVF%d RBDR%d",
-			nic->vf_id, irq - NICVF_INTR_ID_RBDR);
+		sprintf(nic->irq_name[irq], "%s-rbdr-%d",
+			nic->pnicvf->netdev->name,
+			nic->sqs_mode ? (nic->sqs_id + 1) : 0);
 
 	/* Register CQ interrupts */
 	for (irq = 0; irq < nic->qs->cq_cnt; irq++) {
@@ -961,8 +986,9 @@
 	}
 
 	/* Register QS error interrupt */
-	sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR],
-		"NICVF%d Qset error", nic->vf_id);
+	sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d",
+		nic->pnicvf->netdev->name,
+		nic->sqs_mode ? (nic->sqs_id + 1) : 0);
 	irq = NICVF_INTR_ID_QS_ERR;
 	ret = request_irq(nic->msix_entries[irq].vector,
 			  nicvf_qs_err_intr_handler,
@@ -1141,6 +1167,9 @@
 
 	netif_tx_disable(netdev);
 
+	for (qidx = 0; qidx < netdev->num_tx_queues; qidx++)
+		netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx));
+
 	/* Free resources */
 	nicvf_config_data_transfer(nic, false);
 
@@ -1191,7 +1220,7 @@
 	}
 
 	/* Check if we got MAC address from PF or else generate a radom MAC */
-	if (is_zero_ether_addr(netdev->dev_addr)) {
+	if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) {
 		eth_hw_addr_random(netdev);
 		nicvf_hw_set_mac_addr(nic, netdev);
 	}
@@ -1502,6 +1531,7 @@
 	struct net_device *netdev;
 	struct nicvf *nic;
 	int    err, qcount;
+	u16    sdevid;
 
 	err = pci_enable_device(pdev);
 	if (err) {
@@ -1527,14 +1557,13 @@
 		goto err_release_regions;
 	}
 
-	qcount = MAX_CMP_QUEUES_PER_QS;
+	qcount = netif_get_num_default_rss_queues();
 
 	/* Restrict multiqset support only for host bound VFs */
 	if (pdev->is_virtfn) {
 		/* Set max number of queues per VF */
-		qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS);
-		qcount = min(qcount,
-			     (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
+		qcount = min_t(int, num_online_cpus(),
+			       (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS);
 	}
 
 	netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount);
@@ -1575,6 +1604,10 @@
 	if (!pass1_silicon(nic->pdev))
 		nic->hw_tso = true;
 
+	pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid);
+	if (sdevid == 0xA134)
+		nic->t88 = true;
+
 	/* Check if this VF is in QS only mode */
 	if (nic->sqs_mode)
 		return 0;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 0ff8e60..a4fc501 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -479,6 +479,16 @@
 					      NIC_QSET_RQ_GEN_CFG, 0, rq_cfg);
 }
 
+static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
+{
+	union nic_mbx mbx = {};
+
+	/* Reset all RXQ's stats */
+	mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
+	mbx.reset_stat.rq_stat_mask = 0xFFFF;
+	nicvf_send_msg_to_pf(nic, &mbx);
+}
+
 /* Configures receive queue */
 static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 				   int qidx, bool enable)
@@ -762,10 +772,10 @@
 	nic->qs = qs;
 
 	/* Set count of each queue */
-	qs->rbdr_cnt = RBDR_CNT;
-	qs->rq_cnt = RCV_QUEUE_CNT;
-	qs->sq_cnt = SND_QUEUE_CNT;
-	qs->cq_cnt = CMP_QUEUE_CNT;
+	qs->rbdr_cnt = DEFAULT_RBDR_CNT;
+	qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus());
+	qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus());
+	qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt);
 
 	/* Set queue lengths */
 	qs->rbdr_len = RCV_BUF_COUNT;
@@ -812,6 +822,11 @@
 		nicvf_free_resources(nic);
 	}
 
+	/* Reset RXQ's stats.
+	 * SQ's stats will get reset automatically once SQ is reset.
+	 */
+	nicvf_reset_rcv_queue_stats(nic);
+
 	return 0;
 }
 
@@ -938,6 +953,8 @@
 	return num_edescs + sh->gso_segs;
 }
 
+#define POST_CQE_DESC_COUNT 2
+
 /* Get the number of SQ descriptors needed to xmit this skb */
 static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb)
 {
@@ -948,6 +965,10 @@
 		return subdesc_cnt;
 	}
 
+	/* Dummy descriptors to get TSO pkt completion notification */
+	if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size)
+		subdesc_cnt += POST_CQE_DESC_COUNT;
+
 	if (skb_shinfo(skb)->nr_frags)
 		subdesc_cnt += skb_shinfo(skb)->nr_frags;
 
@@ -965,14 +986,21 @@
 	struct sq_hdr_subdesc *hdr;
 
 	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
-	sq->skbuff[qentry] = (u64)skb;
-
 	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
 	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
-	/* Enable notification via CQE after processing SQE */
-	hdr->post_cqe = 1;
-	/* No of subdescriptors following this */
-	hdr->subdesc_cnt = subdesc_cnt;
+
+	if (nic->t88 && nic->hw_tso && skb_shinfo(skb)->gso_size) {
+		/* post_cqe = 0, to avoid HW posting a CQE for every TSO
+		 * segment transmitted on 88xx.
+		 */
+		hdr->subdesc_cnt = subdesc_cnt - POST_CQE_DESC_COUNT;
+	} else {
+		sq->skbuff[qentry] = (u64)skb;
+		/* Enable notification via CQE after processing SQE */
+		hdr->post_cqe = 1;
+		/* No of subdescriptors following this */
+		hdr->subdesc_cnt = subdesc_cnt;
+	}
 	hdr->tot_len = len;
 
 	/* Offload checksum calculation to HW */
@@ -1023,6 +1051,55 @@
 	gather->addr = data;
 }
 
+/* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
+ * packet so that a CQE is posted as a notifation for transmission of
+ * TSO packet.
+ */
+static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry,
+					    int tso_sqe, struct sk_buff *skb)
+{
+	struct sq_imm_subdesc *imm;
+	struct sq_hdr_subdesc *hdr;
+
+	sq->skbuff[qentry] = (u64)skb;
+
+	hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry);
+	memset(hdr, 0, SND_QUEUE_DESC_SIZE);
+	hdr->subdesc_type = SQ_DESC_TYPE_HEADER;
+	/* Enable notification via CQE after processing SQE */
+	hdr->post_cqe = 1;
+	/* There is no packet to transmit here */
+	hdr->dont_send = 1;
+	hdr->subdesc_cnt = POST_CQE_DESC_COUNT - 1;
+	hdr->tot_len = 1;
+	/* Actual TSO header SQE index, needed for cleanup */
+	hdr->rsvd2 = tso_sqe;
+
+	qentry = nicvf_get_nxt_sqentry(sq, qentry);
+	imm = (struct sq_imm_subdesc *)GET_SQ_DESC(sq, qentry);
+	memset(imm, 0, SND_QUEUE_DESC_SIZE);
+	imm->subdesc_type = SQ_DESC_TYPE_IMMEDIATE;
+	imm->len = 1;
+}
+
+static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb,
+				     int sq_num, int desc_cnt)
+{
+	struct netdev_queue *txq;
+
+	txq = netdev_get_tx_queue(nic->pnicvf->netdev,
+				  skb_get_queue_mapping(skb));
+
+	netdev_tx_sent_queue(txq, skb->len);
+
+	/* make sure all memory stores are done before ringing doorbell */
+	smp_wmb();
+
+	/* Inform HW to xmit all TSO segments */
+	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
+			      sq_num, desc_cnt);
+}
+
 /* Segment a TSO packet into 'gso_size' segments and append
  * them to SQ for transfer
  */
@@ -1082,12 +1159,8 @@
 	/* Save SKB in the last segment for freeing */
 	sq->skbuff[hdr_qentry] = (u64)skb;
 
-	/* make sure all memory stores are done before ringing doorbell */
-	smp_wmb();
+	nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
 
-	/* Inform HW to xmit all TSO segments */
-	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
-			      sq_num, desc_cnt);
 	nic->drv_stats.tx_tso++;
 	return 1;
 }
@@ -1096,7 +1169,7 @@
 int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
 {
 	int i, size;
-	int subdesc_cnt;
+	int subdesc_cnt, tso_sqe = 0;
 	int sq_num, qentry;
 	struct queue_set *qs;
 	struct snd_queue *sq;
@@ -1131,6 +1204,7 @@
 	/* Add SQ header subdesc */
 	nicvf_sq_add_hdr_subdesc(nic, sq, qentry, subdesc_cnt - 1,
 				 skb, skb->len);
+	tso_sqe = qentry;
 
 	/* Add SQ gather subdescs */
 	qentry = nicvf_get_nxt_sqentry(sq, qentry);
@@ -1154,12 +1228,13 @@
 	}
 
 doorbell:
-	/* make sure all memory stores are done before ringing doorbell */
-	smp_wmb();
+	if (nic->t88 && skb_shinfo(skb)->gso_size) {
+		qentry = nicvf_get_nxt_sqentry(sq, qentry);
+		nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb);
+	}
 
-	/* Inform HW to xmit new packet */
-	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR,
-			      sq_num, subdesc_cnt);
+	nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt);
+
 	return 1;
 
 append_fail:
@@ -1184,13 +1259,23 @@
 	int frag;
 	int payload_len = 0;
 	struct sk_buff *skb = NULL;
-	struct sk_buff *skb_frag = NULL;
-	struct sk_buff *prev_frag = NULL;
+	struct page *page;
+	int offset;
 	u16 *rb_lens = NULL;
 	u64 *rb_ptrs = NULL;
 
 	rb_lens = (void *)cqe_rx + (3 * sizeof(u64));
-	rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
+	/* Except 88xx pass1 on all other chips CQE_RX2_S is added to
+	 * CQE_RX at word6, hence buffer pointers move by word
+	 *
+	 * Use existing 'hw_tso' flag which will be set for all chips
+	 * except 88xx pass1 instead of a additional cache line
+	 * access (or miss) by using pci dev's revision.
+	 */
+	if (!nic->hw_tso)
+		rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64));
+	else
+		rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64));
 
 	netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n",
 		   __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz);
@@ -1208,22 +1293,10 @@
 			skb_put(skb, payload_len);
 		} else {
 			/* Add fragments */
-			skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs,
-						       payload_len);
-			if (!skb_frag) {
-				dev_kfree_skb(skb);
-				return NULL;
-			}
-
-			if (!skb_shinfo(skb)->frag_list)
-				skb_shinfo(skb)->frag_list = skb_frag;
-			else
-				prev_frag->next = skb_frag;
-
-			prev_frag = skb_frag;
-			skb->len += payload_len;
-			skb->data_len += payload_len;
-			skb_frag->len = payload_len;
+			page = virt_to_page(phys_to_virt(*rb_ptrs));
+			offset = phys_to_virt(*rb_ptrs) - page_address(page);
+			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+					offset, payload_len, RCV_FRAG_LEN);
 		}
 		/* Next buffer pointer */
 		rb_ptrs++;
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 6673e11..869f338 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -57,10 +57,7 @@
 #define CMP_QUEUE_SIZE6		6ULL /* 64K entries */
 
 /* Default queue count per QS, its lengths and threshold values */
-#define RBDR_CNT		1
-#define RCV_QUEUE_CNT		8
-#define SND_QUEUE_CNT		8
-#define CMP_QUEUE_CNT		8 /* Max of RCV and SND qcount */
+#define DEFAULT_RBDR_CNT	1
 
 #define SND_QSIZE		SND_QUEUE_SIZE2
 #define SND_QUEUE_LEN		(1ULL << (SND_QSIZE + 10))
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 63a39ac..8bbaedb 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -28,6 +28,9 @@
 	struct bgx		*bgx;
 	int			dmac;
 	u8			mac[ETH_ALEN];
+	u8                      lmac_type;
+	u8                      lane_to_sds;
+	bool                    use_training;
 	bool			link_up;
 	int			lmacid; /* ID within BGX */
 	int			lmacid_bd; /* ID on board */
@@ -43,14 +46,13 @@
 
 struct bgx {
 	u8			bgx_id;
-	u8			qlm_mode;
 	struct	lmac		lmac[MAX_LMAC_PER_BGX];
 	int			lmac_count;
-	int                     lmac_type;
-	int                     lane_to_sds;
-	int			use_training;
+	u8			max_lmac;
 	void __iomem		*reg_base;
 	struct pci_dev		*pdev;
+	bool                    is_dlm;
+	bool                    is_rgx;
 };
 
 static struct bgx *bgx_vnic[MAX_BGX_THUNDER];
@@ -61,6 +63,7 @@
 /* Supported devices */
 static const struct pci_device_id bgx_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_RGX) },
 	{ 0, }  /* end of table */
 };
 
@@ -124,8 +127,8 @@
 	int i;
 	unsigned map = 0;
 
-	for (i = 0; i < MAX_BGX_PER_CN88XX; i++) {
-		if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i])
+	for (i = 0; i < MAX_BGX_PER_NODE; i++) {
+		if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i])
 			map |= (1 << i);
 	}
 
@@ -138,7 +141,7 @@
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (bgx)
 		return bgx->lmac_count;
 
@@ -153,7 +156,7 @@
 	struct bgx *bgx;
 	struct lmac *lmac;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (!bgx)
 		return;
 
@@ -166,7 +169,7 @@
 
 const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 
 	if (bgx)
 		return bgx->lmac[lmacid].mac;
@@ -177,7 +180,7 @@
 
 void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 
 	if (!bgx)
 		return;
@@ -188,11 +191,13 @@
 
 void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 {
-	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
+	struct lmac *lmac;
 	u64 cfg;
 
 	if (!bgx)
 		return;
+	lmac = &bgx->lmac[lmacid];
 
 	cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG);
 	if (enable)
@@ -200,6 +205,9 @@
 	else
 		cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN);
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg);
+
+	if (bgx->is_rgx)
+		xcv_setup_link(enable ? lmac->link_up : 0, lmac->last_speed);
 }
 EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 
@@ -266,9 +274,12 @@
 
 	port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG);
 
-	/* renable lmac */
+	/* Re-enable lmac */
 	cmr_cfg |= CMR_EN;
 	bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg);
+
+	if (bgx->is_rgx && (cmr_cfg & (CMR_PKT_RX_EN | CMR_PKT_TX_EN)))
+		xcv_setup_link(lmac->link_up, lmac->last_speed);
 }
 
 static void bgx_lmac_handler(struct net_device *netdev)
@@ -314,7 +325,7 @@
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (!bgx)
 		return 0;
 
@@ -328,7 +339,7 @@
 {
 	struct bgx *bgx;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (!bgx)
 		return 0;
 
@@ -356,7 +367,7 @@
 	struct lmac *lmac;
 	u64    cfg;
 
-	bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx];
 	if (!bgx)
 		return;
 
@@ -379,8 +390,9 @@
 }
 EXPORT_SYMBOL(bgx_lmac_internal_loopback);
 
-static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid)
+static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac)
 {
+	int lmacid = lmac->lmacid;
 	u64 cfg;
 
 	bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30);
@@ -409,18 +421,29 @@
 	cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN);
 	bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg);
 
-	if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
-			 PCS_MRX_STATUS_AN_CPT, false)) {
-		dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
-		return -1;
+	if (lmac->lmac_type == BGX_MODE_QSGMII) {
+		/* Disable disparity check for QSGMII */
+		cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL);
+		cfg &= ~PCS_MISC_CTL_DISP_EN;
+		bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL, cfg);
+		return 0;
+	}
+
+	if (lmac->lmac_type == BGX_MODE_SGMII) {
+		if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS,
+				 PCS_MRX_STATUS_AN_CPT, false)) {
+			dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n");
+			return -1;
+		}
 	}
 
 	return 0;
 }
 
-static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type)
+static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac)
 {
 	u64 cfg;
+	int lmacid = lmac->lmacid;
 
 	/* Reset SPU */
 	bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET);
@@ -436,12 +459,14 @@
 
 	bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER);
 	/* Set interleaved running disparity for RXAUI */
-	if (bgx->lmac_type != BGX_MODE_RXAUI)
-		bgx_reg_modify(bgx, lmacid,
-			       BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS);
-	else
+	if (lmac->lmac_type == BGX_MODE_RXAUI)
 		bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL,
-			       SPU_MISC_CTL_RX_DIS | SPU_MISC_CTL_INTLV_RDISP);
+			       SPU_MISC_CTL_INTLV_RDISP);
+
+	/* Clear receive packet disable */
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
+	cfg &= ~SPU_MISC_CTL_RX_DIS;
+	bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
 
 	/* clear all interrupts */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT);
@@ -451,7 +476,7 @@
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
 	bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg);
 
-	if (bgx->use_training) {
+	if (lmac->use_training) {
 		bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00);
 		bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00);
 		bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00);
@@ -474,9 +499,9 @@
 	bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg);
 
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV);
-	if (bgx->lmac_type == BGX_MODE_10G_KR)
+	if (lmac->lmac_type == BGX_MODE_10G_KR)
 		cfg |= (1 << 23);
-	else if (bgx->lmac_type == BGX_MODE_40G_KR)
+	else if (lmac->lmac_type == BGX_MODE_40G_KR)
 		cfg |= (1 << 24);
 	else
 		cfg &= ~((1 << 23) | (1 << 24));
@@ -511,11 +536,10 @@
 {
 	struct bgx *bgx = lmac->bgx;
 	int lmacid = lmac->lmacid;
-	int lmac_type = bgx->lmac_type;
+	int lmac_type = lmac->lmac_type;
 	u64 cfg;
 
-	bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS);
-	if (bgx->use_training) {
+	if (lmac->use_training) {
 		cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
 		if (!(cfg & (1ull << 13))) {
 			cfg = (1ull << 13) | (1ull << 14);
@@ -556,7 +580,7 @@
 			       BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT);
 	if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) {
 		dev_err(&bgx->pdev->dev, "Receive fault, retry training\n");
-		if (bgx->use_training) {
+		if (lmac->use_training) {
 			cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT);
 			if (!(cfg & (1ull << 13))) {
 				cfg = (1ull << 13) | (1ull << 14);
@@ -584,11 +608,6 @@
 		return -1;
 	}
 
-	/* Clear receive packet disable */
-	cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL);
-	cfg &= ~SPU_MISC_CTL_RX_DIS;
-	bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg);
-
 	/* Check for MAC RX faults */
 	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL);
 	/* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */
@@ -599,7 +618,7 @@
 	/* Rx local/remote fault seen.
 	 * Do lmac reinit to see if condition recovers
 	 */
-	bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type);
+	bgx_lmac_xaui_init(bgx, lmac);
 
 	return -1;
 }
@@ -623,7 +642,7 @@
 	if ((spu_link & SPU_STATUS1_RCV_LNK) &&
 	    !(smu_link & SMU_RX_CTL_STATUS)) {
 		lmac->link_up = 1;
-		if (lmac->bgx->lmac_type == BGX_MODE_XLAUI)
+		if (lmac->lmac_type == BGX_MODE_XLAUI)
 			lmac->last_speed = 40000;
 		else
 			lmac->last_speed = 10000;
@@ -649,6 +668,16 @@
 	queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2);
 }
 
+static int phy_interface_mode(u8 lmac_type)
+{
+	if (lmac_type == BGX_MODE_QSGMII)
+		return PHY_INTERFACE_MODE_QSGMII;
+	if (lmac_type == BGX_MODE_RGMII)
+		return PHY_INTERFACE_MODE_RGMII;
+
+	return PHY_INTERFACE_MODE_SGMII;
+}
+
 static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid)
 {
 	struct lmac *lmac;
@@ -657,13 +686,15 @@
 	lmac = &bgx->lmac[lmacid];
 	lmac->bgx = bgx;
 
-	if (bgx->lmac_type == BGX_MODE_SGMII) {
+	if ((lmac->lmac_type == BGX_MODE_SGMII) ||
+	    (lmac->lmac_type == BGX_MODE_QSGMII) ||
+	    (lmac->lmac_type == BGX_MODE_RGMII)) {
 		lmac->is_sgmii = 1;
-		if (bgx_lmac_sgmii_init(bgx, lmacid))
+		if (bgx_lmac_sgmii_init(bgx, lmac))
 			return -1;
 	} else {
 		lmac->is_sgmii = 0;
-		if (bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type))
+		if (bgx_lmac_xaui_init(bgx, lmac))
 			return -1;
 	}
 
@@ -685,10 +716,10 @@
 	/* Restore default cfg, incase low level firmware changed it */
 	bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03);
 
-	if ((bgx->lmac_type != BGX_MODE_XFI) &&
-	    (bgx->lmac_type != BGX_MODE_XLAUI) &&
-	    (bgx->lmac_type != BGX_MODE_40G_KR) &&
-	    (bgx->lmac_type != BGX_MODE_10G_KR)) {
+	if ((lmac->lmac_type != BGX_MODE_XFI) &&
+	    (lmac->lmac_type != BGX_MODE_XLAUI) &&
+	    (lmac->lmac_type != BGX_MODE_40G_KR) &&
+	    (lmac->lmac_type != BGX_MODE_10G_KR)) {
 		if (!lmac->phydev)
 			return -ENODEV;
 
@@ -696,7 +727,7 @@
 
 		if (phy_connect_direct(&lmac->netdev, lmac->phydev,
 				       bgx_lmac_handler,
-				       PHY_INTERFACE_MODE_SGMII))
+				       phy_interface_mode(lmac->lmac_type)))
 			return -ENODEV;
 
 		phy_start_aneg(lmac->phydev);
@@ -753,76 +784,19 @@
 
 	bgx_flush_dmac_addrs(bgx, lmacid);
 
-	if ((bgx->lmac_type != BGX_MODE_XFI) &&
-	    (bgx->lmac_type != BGX_MODE_XLAUI) &&
-	    (bgx->lmac_type != BGX_MODE_40G_KR) &&
-	    (bgx->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
+	if ((lmac->lmac_type != BGX_MODE_XFI) &&
+	    (lmac->lmac_type != BGX_MODE_XLAUI) &&
+	    (lmac->lmac_type != BGX_MODE_40G_KR) &&
+	    (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev)
 		phy_disconnect(lmac->phydev);
 
 	lmac->phydev = NULL;
 }
 
-static void bgx_set_num_ports(struct bgx *bgx)
-{
-	u64 lmac_count;
-
-	switch (bgx->qlm_mode) {
-	case QLM_MODE_SGMII:
-		bgx->lmac_count = 4;
-		bgx->lmac_type = BGX_MODE_SGMII;
-		bgx->lane_to_sds = 0;
-		break;
-	case QLM_MODE_XAUI_1X4:
-		bgx->lmac_count = 1;
-		bgx->lmac_type = BGX_MODE_XAUI;
-		bgx->lane_to_sds = 0xE4;
-			break;
-	case QLM_MODE_RXAUI_2X2:
-		bgx->lmac_count = 2;
-		bgx->lmac_type = BGX_MODE_RXAUI;
-		bgx->lane_to_sds = 0xE4;
-			break;
-	case QLM_MODE_XFI_4X1:
-		bgx->lmac_count = 4;
-		bgx->lmac_type = BGX_MODE_XFI;
-		bgx->lane_to_sds = 0;
-		break;
-	case QLM_MODE_XLAUI_1X4:
-		bgx->lmac_count = 1;
-		bgx->lmac_type = BGX_MODE_XLAUI;
-		bgx->lane_to_sds = 0xE4;
-		break;
-	case QLM_MODE_10G_KR_4X1:
-		bgx->lmac_count = 4;
-		bgx->lmac_type = BGX_MODE_10G_KR;
-		bgx->lane_to_sds = 0;
-		bgx->use_training = 1;
-		break;
-	case QLM_MODE_40G_KR4_1X4:
-		bgx->lmac_count = 1;
-		bgx->lmac_type = BGX_MODE_40G_KR;
-		bgx->lane_to_sds = 0xE4;
-		bgx->use_training = 1;
-		break;
-	default:
-		bgx->lmac_count = 0;
-		break;
-	}
-
-	/* Check if low level firmware has programmed LMAC count
-	 * based on board type, if yes consider that otherwise
-	 * the default static values
-	 */
-	lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7;
-	if (lmac_count != 4)
-		bgx->lmac_count = lmac_count;
-}
-
 static void bgx_init_hw(struct bgx *bgx)
 {
 	int i;
-
-	bgx_set_num_ports(bgx);
+	struct lmac *lmac;
 
 	bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP);
 	if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS))
@@ -830,17 +804,9 @@
 
 	/* Set lmac type and lane2serdes mapping */
 	for (i = 0; i < bgx->lmac_count; i++) {
-		if (bgx->lmac_type == BGX_MODE_RXAUI) {
-			if (i)
-				bgx->lane_to_sds = 0x0e;
-			else
-				bgx->lane_to_sds = 0x04;
-			bgx_reg_write(bgx, i, BGX_CMRX_CFG,
-				      (bgx->lmac_type << 8) | bgx->lane_to_sds);
-			continue;
-		}
+		lmac = &bgx->lmac[i];
 		bgx_reg_write(bgx, i, BGX_CMRX_CFG,
-			      (bgx->lmac_type << 8) | (bgx->lane_to_sds + i));
+			      (lmac->lmac_type << 8) | lmac->lane_to_sds);
 		bgx->lmac[i].lmacid_bd = lmac_count;
 		lmac_count++;
 	}
@@ -863,55 +829,212 @@
 		bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00);
 }
 
-static void bgx_get_qlm_mode(struct bgx *bgx)
+static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac)
+{
+	return (u8)(bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG) & 0xFF);
+}
+
+static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid)
 {
 	struct device *dev = &bgx->pdev->dev;
-	int lmac_type;
-	int train_en;
+	struct lmac *lmac;
+	char str[20];
+	u8 dlm;
 
-	/* Read LMAC0 type to figure out QLM mode
-	 * This is configured by low level firmware
-	 */
-	lmac_type = bgx_reg_read(bgx, 0, BGX_CMRX_CFG);
-	lmac_type = (lmac_type >> 8) & 0x07;
+	if (lmacid > bgx->max_lmac)
+		return;
 
-	train_en = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) &
-				SPU_PMD_CRTL_TRAIN_EN;
+	lmac = &bgx->lmac[lmacid];
+	dlm = (lmacid / 2) + (bgx->bgx_id * 2);
+	if (!bgx->is_dlm)
+		sprintf(str, "BGX%d QLM mode", bgx->bgx_id);
+	else
+		sprintf(str, "BGX%d DLM%d mode", bgx->bgx_id, dlm);
 
-	switch (lmac_type) {
+	switch (lmac->lmac_type) {
 	case BGX_MODE_SGMII:
-		bgx->qlm_mode = QLM_MODE_SGMII;
-		dev_info(dev, "BGX%d QLM mode: SGMII\n", bgx->bgx_id);
+		dev_info(dev, "%s: SGMII\n", (char *)str);
 		break;
 	case BGX_MODE_XAUI:
-		bgx->qlm_mode = QLM_MODE_XAUI_1X4;
-		dev_info(dev, "BGX%d QLM mode: XAUI\n", bgx->bgx_id);
+		dev_info(dev, "%s: XAUI\n", (char *)str);
 		break;
 	case BGX_MODE_RXAUI:
-		bgx->qlm_mode = QLM_MODE_RXAUI_2X2;
-		dev_info(dev, "BGX%d QLM mode: RXAUI\n", bgx->bgx_id);
+		dev_info(dev, "%s: RXAUI\n", (char *)str);
 		break;
 	case BGX_MODE_XFI:
-		if (!train_en) {
-			bgx->qlm_mode = QLM_MODE_XFI_4X1;
-			dev_info(dev, "BGX%d QLM mode: XFI\n", bgx->bgx_id);
-		} else {
-			bgx->qlm_mode = QLM_MODE_10G_KR_4X1;
-			dev_info(dev, "BGX%d QLM mode: 10G_KR\n", bgx->bgx_id);
-		}
+		if (!lmac->use_training)
+			dev_info(dev, "%s: XFI\n", (char *)str);
+		else
+			dev_info(dev, "%s: 10G_KR\n", (char *)str);
 		break;
 	case BGX_MODE_XLAUI:
-		if (!train_en) {
-			bgx->qlm_mode = QLM_MODE_XLAUI_1X4;
-			dev_info(dev, "BGX%d QLM mode: XLAUI\n", bgx->bgx_id);
-		} else {
-			bgx->qlm_mode = QLM_MODE_40G_KR4_1X4;
-			dev_info(dev, "BGX%d QLM mode: 40G_KR4\n", bgx->bgx_id);
-		}
+		if (!lmac->use_training)
+			dev_info(dev, "%s: XLAUI\n", (char *)str);
+		else
+			dev_info(dev, "%s: 40G_KR4\n", (char *)str);
+		break;
+	case BGX_MODE_QSGMII:
+		if ((lmacid == 0) &&
+		    (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid))
+			return;
+		if ((lmacid == 2) &&
+		    (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid))
+			return;
+		dev_info(dev, "%s: QSGMII\n", (char *)str);
+		break;
+	case BGX_MODE_RGMII:
+		dev_info(dev, "%s: RGMII\n", (char *)str);
+		break;
+	case BGX_MODE_INVALID:
+		/* Nothing to do */
+		break;
+	}
+}
+
+static void lmac_set_lane2sds(struct bgx *bgx, struct lmac *lmac)
+{
+	switch (lmac->lmac_type) {
+	case BGX_MODE_SGMII:
+	case BGX_MODE_XFI:
+		lmac->lane_to_sds = lmac->lmacid;
+		break;
+	case BGX_MODE_XAUI:
+	case BGX_MODE_XLAUI:
+	case BGX_MODE_RGMII:
+		lmac->lane_to_sds = 0xE4;
+		break;
+	case BGX_MODE_RXAUI:
+		lmac->lane_to_sds = (lmac->lmacid) ? 0xE : 0x4;
+		break;
+	case BGX_MODE_QSGMII:
+		/* There is no way to determine if DLM0/2 is QSGMII or
+		 * DLM1/3 is configured to QSGMII as bootloader will
+		 * configure all LMACs, so take whatever is configured
+		 * by low level firmware.
+		 */
+		lmac->lane_to_sds = bgx_get_lane2sds_cfg(bgx, lmac);
 		break;
 	default:
-		bgx->qlm_mode = QLM_MODE_SGMII;
-		dev_info(dev, "BGX%d QLM default mode: SGMII\n", bgx->bgx_id);
+		lmac->lane_to_sds = 0;
+		break;
+	}
+}
+
+static void lmac_set_training(struct bgx *bgx, struct lmac *lmac, int lmacid)
+{
+	if ((lmac->lmac_type != BGX_MODE_10G_KR) &&
+	    (lmac->lmac_type != BGX_MODE_40G_KR)) {
+		lmac->use_training = 0;
+		return;
+	}
+
+	lmac->use_training = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL) &
+							SPU_PMD_CRTL_TRAIN_EN;
+}
+
+static void bgx_set_lmac_config(struct bgx *bgx, u8 idx)
+{
+	struct lmac *lmac;
+	struct lmac *olmac;
+	u64 cmr_cfg;
+	u8 lmac_type;
+	u8 lane_to_sds;
+
+	lmac = &bgx->lmac[idx];
+
+	if (!bgx->is_dlm || bgx->is_rgx) {
+		/* Read LMAC0 type to figure out QLM mode
+		 * This is configured by low level firmware
+		 */
+		cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG);
+		lmac->lmac_type = (cmr_cfg >> 8) & 0x07;
+		if (bgx->is_rgx)
+			lmac->lmac_type = BGX_MODE_RGMII;
+		lmac_set_training(bgx, lmac, 0);
+		lmac_set_lane2sds(bgx, lmac);
+		return;
+	}
+
+	/* On 81xx BGX can be split across 2 DLMs
+	 * firmware programs lmac_type of LMAC0 and LMAC2
+	 */
+	if ((idx == 0) || (idx == 2)) {
+		cmr_cfg = bgx_reg_read(bgx, idx, BGX_CMRX_CFG);
+		lmac_type = (u8)((cmr_cfg >> 8) & 0x07);
+		lane_to_sds = (u8)(cmr_cfg & 0xFF);
+		/* Check if config is not reset value */
+		if ((lmac_type == 0) && (lane_to_sds == 0xE4))
+			lmac->lmac_type = BGX_MODE_INVALID;
+		else
+			lmac->lmac_type = lmac_type;
+		lmac_set_training(bgx, lmac, lmac->lmacid);
+		lmac_set_lane2sds(bgx, lmac);
+
+		/* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */
+		olmac = &bgx->lmac[idx + 1];
+		olmac->lmac_type = lmac->lmac_type;
+		lmac_set_training(bgx, olmac, olmac->lmacid);
+		lmac_set_lane2sds(bgx, olmac);
+	}
+}
+
+static bool is_dlm0_in_bgx_mode(struct bgx *bgx)
+{
+	struct lmac *lmac;
+
+	if (!bgx->is_dlm)
+		return true;
+
+	lmac = &bgx->lmac[0];
+	if (lmac->lmac_type == BGX_MODE_INVALID)
+		return false;
+
+	return true;
+}
+
+static void bgx_get_qlm_mode(struct bgx *bgx)
+{
+	struct lmac *lmac;
+	struct lmac *lmac01;
+	struct lmac *lmac23;
+	u8  idx;
+
+	/* Init all LMAC's type to invalid */
+	for (idx = 0; idx < bgx->max_lmac; idx++) {
+		lmac = &bgx->lmac[idx];
+		lmac->lmacid = idx;
+		lmac->lmac_type = BGX_MODE_INVALID;
+		lmac->use_training = false;
+	}
+
+	/* It is assumed that low level firmware sets this value */
+	bgx->lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7;
+	if (bgx->lmac_count > bgx->max_lmac)
+		bgx->lmac_count = bgx->max_lmac;
+
+	for (idx = 0; idx < bgx->max_lmac; idx++)
+		bgx_set_lmac_config(bgx, idx);
+
+	if (!bgx->is_dlm || bgx->is_rgx) {
+		bgx_print_qlm_mode(bgx, 0);
+		return;
+	}
+
+	if (bgx->lmac_count) {
+		bgx_print_qlm_mode(bgx, 0);
+		bgx_print_qlm_mode(bgx, 2);
+	}
+
+	/* If DLM0 is not in BGX mode then LMAC0/1 have
+	 * to be configured with serdes lanes of DLM1
+	 */
+	if (is_dlm0_in_bgx_mode(bgx) || (bgx->lmac_count > 2))
+		return;
+	for (idx = 0; idx < bgx->lmac_count; idx++) {
+		lmac01 = &bgx->lmac[idx];
+		lmac23 = &bgx->lmac[idx + 2];
+		lmac01->lmac_type = lmac23->lmac_type;
+		lmac01->lane_to_sds = lmac23->lane_to_sds;
 	}
 }
 
@@ -1042,7 +1165,7 @@
 		}
 
 		lmac++;
-		if (lmac == MAX_LMAC_PER_BGX) {
+		if (lmac == bgx->max_lmac) {
 			of_node_put(node);
 			break;
 		}
@@ -1087,6 +1210,7 @@
 	struct device *dev = &pdev->dev;
 	struct bgx *bgx = NULL;
 	u8 lmac;
+	u16 sdevid;
 
 	bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL);
 	if (!bgx)
@@ -1115,10 +1239,30 @@
 		err = -ENOMEM;
 		goto err_release_regions;
 	}
-	bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1;
-	bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX;
 
-	bgx_vnic[bgx->bgx_id] = bgx;
+	pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid);
+	if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) {
+		bgx->bgx_id =
+		    (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1;
+		bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE;
+		bgx->max_lmac = MAX_LMAC_PER_BGX;
+		bgx_vnic[bgx->bgx_id] = bgx;
+	} else {
+		bgx->is_rgx = true;
+		bgx->max_lmac = 1;
+		bgx->bgx_id = MAX_BGX_PER_CN81XX - 1;
+		bgx_vnic[bgx->bgx_id] = bgx;
+		xcv_init_hw();
+	}
+
+	/* On 81xx all are DLMs and on 83xx there are 3 BGX QLMs and one
+	 * BGX i.e BGX2 can be split across 2 DLMs.
+	 */
+	pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid);
+	if ((sdevid == PCI_SUBSYS_DEVID_81XX_BGX) ||
+	    ((sdevid == PCI_SUBSYS_DEVID_83XX_BGX) && (bgx->bgx_id == 2)))
+		bgx->is_dlm = true;
+
 	bgx_get_qlm_mode(bgx);
 
 	err = bgx_init_phy(bgx);
@@ -1133,6 +1277,8 @@
 		if (err) {
 			dev_err(dev, "BGX%d failed to enable lmac%d\n",
 				bgx->bgx_id, lmac);
+			while (lmac)
+				bgx_lmac_disable(bgx, --lmac);
 			goto err_enable;
 		}
 	}
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 42010d2..d59c71e 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -9,8 +9,20 @@
 #ifndef THUNDER_BGX_H
 #define THUNDER_BGX_H
 
-#define    MAX_BGX_THUNDER			8 /* Max 4 nodes, 2 per node */
+/* PCI device ID */
+#define	PCI_DEVICE_ID_THUNDER_BGX		0xA026
+#define	PCI_DEVICE_ID_THUNDER_RGX		0xA054
+
+/* Subsystem device IDs */
+#define PCI_SUBSYS_DEVID_88XX_BGX		0xA126
+#define PCI_SUBSYS_DEVID_81XX_BGX		0xA226
+#define PCI_SUBSYS_DEVID_83XX_BGX		0xA326
+
+#define    MAX_BGX_THUNDER			8 /* Max 2 nodes, 4 per node */
 #define    MAX_BGX_PER_CN88XX			2
+#define    MAX_BGX_PER_CN81XX			3 /* 2 BGXs + 1 RGX */
+#define    MAX_BGX_PER_CN83XX			4
+#define    MAX_BGX_PER_NODE			4
 #define    MAX_LMAC_PER_BGX			4
 #define    MAX_BGX_CHANS_PER_LMAC		16
 #define    MAX_DMAC_PER_LMAC			8
@@ -18,8 +30,6 @@
 
 #define    MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE	2
 
-#define    MAX_LMAC	(MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX)
-
 /* Registers */
 #define BGX_CMRX_CFG			0x00
 #define  CMR_PKT_TX_EN				BIT_ULL(13)
@@ -136,6 +146,7 @@
 #define BGX_GMP_PCS_ANX_AN_RESULTS	0x30020
 #define BGX_GMP_PCS_SGM_AN_ADV		0x30068
 #define BGX_GMP_PCS_MISCX_CTL		0x30078
+#define  PCS_MISC_CTL_DISP_EN			BIT_ULL(13)
 #define  PCS_MISC_CTL_GMX_ENO			BIT_ULL(11)
 #define  PCS_MISC_CTL_SAMP_PT_MASK	0x7Full
 #define BGX_GMP_GMI_PRTX_CFG		0x38020
@@ -194,6 +205,9 @@
 void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
 void bgx_lmac_internal_loopback(int node, int bgx_idx,
 				int lmac_idx, bool enable);
+void xcv_init_hw(void);
+void xcv_setup_link(bool link_up, int link_speed);
+
 u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx);
 u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx);
 #define BGX_RX_STATS_COUNT 11
@@ -213,16 +227,9 @@
 	BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */
 	BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */
 	BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */
-};
-
-enum qlm_mode {
-	QLM_MODE_SGMII,         /* SGMII, each lane independent */
-	QLM_MODE_XAUI_1X4,      /* 1 XAUI or DXAUI, 4 lanes */
-	QLM_MODE_RXAUI_2X2,     /* 2 RXAUI, 2 lanes each */
-	QLM_MODE_XFI_4X1,       /* 4 XFI, 1 lane each */
-	QLM_MODE_XLAUI_1X4,     /* 1 XLAUI, 4 lanes each */
-	QLM_MODE_10G_KR_4X1,    /* 4 10GBASE-KR, 1 lane each */
-	QLM_MODE_40G_KR4_1X4,   /* 1 40GBASE-KR4, 4 lanes each */
+	BGX_MODE_RGMII = 5,
+	BGX_MODE_QSGMII = 6,
+	BGX_MODE_INVALID = 7,
 };
 
 #endif /* THUNDER_BGX_H */
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
new file mode 100644
index 0000000..67befed
--- /dev/null
+++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2016 Cavium, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+
+#include "nic.h"
+#include "thunder_bgx.h"
+
+#define DRV_NAME	"thunder-xcv"
+#define DRV_VERSION	"1.0"
+
+/* Register offsets */
+#define XCV_RESET		0x00
+#define   PORT_EN		BIT_ULL(63)
+#define   CLK_RESET		BIT_ULL(15)
+#define   DLL_RESET		BIT_ULL(11)
+#define   COMP_EN		BIT_ULL(7)
+#define   TX_PKT_RESET		BIT_ULL(3)
+#define   TX_DATA_RESET		BIT_ULL(2)
+#define   RX_PKT_RESET		BIT_ULL(1)
+#define   RX_DATA_RESET		BIT_ULL(0)
+#define XCV_DLL_CTL		0x10
+#define   CLKRX_BYP		BIT_ULL(23)
+#define   CLKTX_BYP		BIT_ULL(15)
+#define XCV_COMP_CTL		0x20
+#define   DRV_BYP		BIT_ULL(63)
+#define XCV_CTL			0x30
+#define XCV_INT			0x40
+#define XCV_INT_W1S		0x48
+#define XCV_INT_ENA_W1C		0x50
+#define XCV_INT_ENA_W1S		0x58
+#define XCV_INBND_STATUS	0x80
+#define XCV_BATCH_CRD_RET	0x100
+
+struct xcv {
+	void __iomem		*reg_base;
+	struct pci_dev		*pdev;
+};
+
+static struct xcv *xcv;
+
+/* Supported devices */
+static const struct pci_device_id xcv_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xA056) },
+	{ 0, }  /* end of table */
+};
+
+MODULE_AUTHOR("Cavium Inc");
+MODULE_DESCRIPTION("Cavium Thunder RGX/XCV Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(DRV_VERSION);
+MODULE_DEVICE_TABLE(pci, xcv_id_table);
+
+void xcv_init_hw(void)
+{
+	u64  cfg;
+
+	/* Take DLL out of reset */
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg &= ~DLL_RESET;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+	/* Take clock tree out of reset */
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg &= ~CLK_RESET;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+	/* Wait for DLL to lock */
+	msleep(1);
+
+	/* Configure DLL - enable or bypass
+	 * TX no bypass, RX bypass
+	 */
+	cfg = readq_relaxed(xcv->reg_base + XCV_DLL_CTL);
+	cfg &= ~0xFF03;
+	cfg |= CLKRX_BYP;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_DLL_CTL);
+
+	/* Enable compensation controller and force the
+	 * write to be visible to HW by readig back.
+	 */
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg |= COMP_EN;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+	readq_relaxed(xcv->reg_base + XCV_RESET);
+	/* Wait for compensation state machine to lock */
+	msleep(10);
+
+	/* enable the XCV block */
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg |= PORT_EN;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+	cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+	cfg |= CLK_RESET;
+	writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+}
+EXPORT_SYMBOL(xcv_init_hw);
+
+void xcv_setup_link(bool link_up, int link_speed)
+{
+	u64  cfg;
+	int speed = 2;
+
+	if (!xcv) {
+		dev_err(&xcv->pdev->dev,
+			"XCV init not done, probe may have failed\n");
+		return;
+	}
+
+	if (link_speed == 100)
+		speed = 1;
+	else if (link_speed == 10)
+		speed = 0;
+
+	if (link_up) {
+		/* set operating speed */
+		cfg = readq_relaxed(xcv->reg_base + XCV_CTL);
+		cfg &= ~0x03;
+		cfg |= speed;
+		writeq_relaxed(cfg, xcv->reg_base + XCV_CTL);
+
+		/* Reset datapaths */
+		cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+		cfg |= TX_DATA_RESET | RX_DATA_RESET;
+		writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+		/* Enable the packet flow */
+		cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+		cfg |= TX_PKT_RESET | RX_PKT_RESET;
+		writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+
+		/* Return credits to RGX */
+		writeq_relaxed(0x01, xcv->reg_base + XCV_BATCH_CRD_RET);
+	} else {
+		/* Disable packet flow */
+		cfg = readq_relaxed(xcv->reg_base + XCV_RESET);
+		cfg &= ~(TX_PKT_RESET | RX_PKT_RESET);
+		writeq_relaxed(cfg, xcv->reg_base + XCV_RESET);
+		readq_relaxed(xcv->reg_base + XCV_RESET);
+	}
+}
+EXPORT_SYMBOL(xcv_setup_link);
+
+static int xcv_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int err;
+	struct device *dev = &pdev->dev;
+
+	xcv = devm_kzalloc(dev, sizeof(struct xcv), GFP_KERNEL);
+	if (!xcv)
+		return -ENOMEM;
+	xcv->pdev = pdev;
+
+	pci_set_drvdata(pdev, xcv);
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		goto err_kfree;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(dev, "PCI request regions failed 0x%x\n", err);
+		goto err_disable_device;
+	}
+
+	/* MAP configuration registers */
+	xcv->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
+	if (!xcv->reg_base) {
+		dev_err(dev, "XCV: Cannot map CSR memory space, aborting\n");
+		err = -ENOMEM;
+		goto err_release_regions;
+	}
+
+	return 0;
+
+err_release_regions:
+	pci_release_regions(pdev);
+err_disable_device:
+	pci_disable_device(pdev);
+err_kfree:
+	devm_kfree(dev, xcv);
+	xcv = NULL;
+	return err;
+}
+
+static void xcv_remove(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	if (xcv) {
+		devm_kfree(dev, xcv);
+		xcv = NULL;
+	}
+
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver xcv_driver = {
+	.name = DRV_NAME,
+	.id_table = xcv_id_table,
+	.probe = xcv_probe,
+	.remove = xcv_remove,
+};
+
+static int __init xcv_init_module(void)
+{
+	pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION);
+
+	return pci_register_driver(&xcv_driver);
+}
+
+static void __exit xcv_cleanup_module(void)
+{
+	pci_unregister_driver(&xcv_driver);
+}
+
+module_init(xcv_init_module);
+module_exit(xcv_cleanup_module);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index ace0ab9..c6b71f6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
 
-cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o
+cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o
 cxgb4-$(CONFIG_CHELSIO_T4_DCB) +=  cxgb4_dcb.o
 cxgb4-$(CONFIG_CHELSIO_T4_FCOE) +=  cxgb4_fcoe.o
 cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 2e2aa9f..28e653e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -53,6 +53,8 @@
 #include "cxgb4_uld.h"
 
 #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
+extern struct list_head adapter_list;
+extern struct mutex uld_mutex;
 
 enum {
 	MAX_NPORTS	= 4,     /* max # of ports */
@@ -338,12 +340,14 @@
 	enum chip_type chip;               /* chip code */
 	struct arch_specific_params arch;  /* chip specific params */
 	unsigned char offload;
+	unsigned char crypto;		/* HW capability for crypto */
 
 	unsigned char bypass;
 
 	unsigned int ofldq_wr_cred;
 	bool ulptx_memwrite_dsgl;          /* use of T5 DSGL allowed */
 
+	unsigned int nsched_cls;          /* number of traffic classes */
 	unsigned int max_ordird_qp;       /* Max read depth per RDMA QP */
 	unsigned int max_ird_adapter;     /* Max read depth per adapter */
 };
@@ -403,7 +407,6 @@
 	struct fw_hdr fw_hdr;
 };
 
-
 struct trace_params {
 	u32 data[TRACE_LEN / 4];
 	u32 mask[TRACE_LEN / 4];
@@ -419,8 +422,8 @@
 	unsigned short supported;        /* link capabilities */
 	unsigned short advertising;      /* advertised capabilities */
 	unsigned short lp_advertising;   /* peer advertised capabilities */
-	unsigned short requested_speed;  /* speed user has requested */
-	unsigned short speed;            /* actual link speed */
+	unsigned int   requested_speed;  /* speed user has requested */
+	unsigned int   speed;            /* actual link speed */
 	unsigned char  requested_fc;     /* flow control user has requested */
 	unsigned char  fc;               /* actual link flow control */
 	unsigned char  autoneg;          /* autonegotiating? */
@@ -434,11 +437,6 @@
 	MAX_ETH_QSETS = 32,           /* # of Ethernet Tx/Rx queue sets */
 	MAX_OFLD_QSETS = 16,          /* # of offload Tx, iscsi Rx queue sets */
 	MAX_CTRL_QUEUES = NCHAN,      /* # of control Tx queues */
-	MAX_RDMA_QUEUES = NCHAN,      /* # of streaming RDMA Rx queues */
-	MAX_RDMA_CIQS = 32,        /* # of  RDMA concentrator IQs */
-
-	/* # of streaming iSCSIT Rx queues */
-	MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS,
 };
 
 enum {
@@ -455,8 +453,7 @@
 enum {
 	INGQ_EXTRAS = 2,        /* firmware event queue and */
 				/*   forwarded interrupts */
-	MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES +
-		   MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS,
+	MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
 };
 
 struct adapter;
@@ -493,6 +490,7 @@
 #endif /* CONFIG_CHELSIO_T4_FCOE */
 	bool rxtstamp;  /* Enable TS */
 	struct hwtstamp_config tstamp_config;
+	struct sched_table *sched_tbl;
 };
 
 struct dentry;
@@ -510,6 +508,10 @@
 	FW_OFLD_CONN       = (1 << 9),
 };
 
+enum {
+	ULP_CRYPTO_LOOKASIDE = 1 << 0,
+};
+
 struct rx_sw_desc;
 
 struct sge_fl {                     /* SGE free-buffer queue state */
@@ -680,17 +682,24 @@
 	u8 full;                    /* the Tx ring is full */
 } ____cacheline_aligned_in_smp;
 
+struct sge_uld_rxq_info {
+	char name[IFNAMSIZ];	/* name of ULD driver */
+	struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */
+	u16 *msix_tbl;		/* msix_tbl for uld */
+	u16 *rspq_id;		/* response queue id's of rxq */
+	u16 nrxq;		/* # of ingress uld queues */
+	u16 nciq;		/* # of completion queues */
+	u8 uld;			/* uld type */
+};
+
 struct sge {
 	struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
 	struct sge_ofld_txq ofldtxq[MAX_OFLD_QSETS];
 	struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES];
 
 	struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
-	struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS];
-	struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES];
-	struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
-	struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
 	struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
+	struct sge_uld_rxq_info **uld_rxq_info;
 
 	struct sge_rspq intrq ____cacheline_aligned_in_smp;
 	spinlock_t intrq_lock;
@@ -698,14 +707,8 @@
 	u16 max_ethqsets;           /* # of available Ethernet queue sets */
 	u16 ethqsets;               /* # of active Ethernet queue sets */
 	u16 ethtxq_rover;           /* Tx queue to clean up next */
-	u16 iscsiqsets;              /* # of active iSCSI queue sets */
-	u16 niscsitq;               /* # of available iSCST Rx queues */
-	u16 rdmaqs;                 /* # of available RDMA Rx queues */
-	u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
-	u16 iscsi_rxq[MAX_OFLD_QSETS];
-	u16 iscsit_rxq[MAX_ISCSIT_QUEUES];
-	u16 rdma_rxq[MAX_RDMA_QUEUES];
-	u16 rdma_ciq[MAX_RDMA_CIQS];
+	u16 ofldqsets;              /* # of active ofld queue sets */
+	u16 nqs_per_uld;	    /* # of Rx queues per ULD */
 	u16 timer_val[SGE_NTIMERS];
 	u8 counter_val[SGE_NCOUNTERS];
 	u32 fl_pg_order;            /* large page allocation size */
@@ -729,10 +732,7 @@
 };
 
 #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
-#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++)
-#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++)
-#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++)
-#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++)
+#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++)
 
 struct l2t_data;
 
@@ -757,6 +757,23 @@
 	u8 addr[ETH_ALEN];
 };
 
+struct uld_msix_bmap {
+	unsigned long *msix_bmap;
+	unsigned int mapsize;
+	spinlock_t lock; /* lock for acquiring bitmap */
+};
+
+struct uld_msix_info {
+	unsigned short vec;
+	char desc[IFNAMSIZ + 10];
+	unsigned int idx;
+};
+
+struct vf_info {
+	unsigned char vf_mac_addr[ETH_ALEN];
+	bool pf_set_mac;
+};
+
 struct adapter {
 	void __iomem *regs;
 	void __iomem *bar2;
@@ -767,6 +784,7 @@
 	unsigned int mbox;
 	unsigned int pf;
 	unsigned int flags;
+	unsigned int adap_idx;
 	enum chip_type chip;
 
 	int msg_enable;
@@ -779,6 +797,9 @@
 		unsigned short vec;
 		char desc[IFNAMSIZ + 10];
 	} msix_info[MAX_INGQ + 1];
+	struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
+	struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
+	int msi_idx;
 
 	struct doorbell_stats db_stats;
 	struct sge sge;
@@ -786,6 +807,9 @@
 	struct net_device *port[MAX_NPORTS];
 	u8 chan_map[NCHAN];                   /* channel -> port map */
 
+	struct vf_info *vfinfo;
+	u8 num_vfs;
+
 	u32 filter_mode;
 	unsigned int l2t_start;
 	unsigned int l2t_end;
@@ -793,7 +817,10 @@
 	unsigned int clipt_start;
 	unsigned int clipt_end;
 	struct clip_tbl *clipt;
+	struct cxgb4_uld_info *uld;
 	void *uld_handle[CXGB4_ULD_MAX];
+	unsigned int num_uld;
+	unsigned int num_ofld_uld;
 	struct list_head list_node;
 	struct list_head rcu_node;
 	struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
@@ -813,6 +840,8 @@
 #define T4_OS_LOG_MBOX_CMDS 256
 	struct mbox_cmd_log *mbox_log;
 
+	struct mutex uld_mutex;
+
 	struct dentry *debugfs_root;
 	bool use_bd;     /* Use SGE Back Door intfc for reading SGE Contexts */
 	bool trace_rss;	/* 1 implies that different RSS flit per filter is
@@ -822,6 +851,58 @@
 
 	spinlock_t stats_lock;
 	spinlock_t win0_lock ____cacheline_aligned_in_smp;
+
+	/* TC u32 offload */
+	struct cxgb4_tc_u32_table *tc_u32;
+};
+
+/* Support for "sched-class" command to allow a TX Scheduling Class to be
+ * programmed with various parameters.
+ */
+struct ch_sched_params {
+	s8   type;                     /* packet or flow */
+	union {
+		struct {
+			s8   level;    /* scheduler hierarchy level */
+			s8   mode;     /* per-class or per-flow */
+			s8   rateunit; /* bit or packet rate */
+			s8   ratemode; /* %port relative or kbps absolute */
+			s8   channel;  /* scheduler channel [0..N] */
+			s8   class;    /* scheduler class [0..N] */
+			s32  minrate;  /* minimum rate */
+			s32  maxrate;  /* maximum rate */
+			s16  weight;   /* percent weight */
+			s16  pktsize;  /* average packet size */
+		} params;
+	} u;
+};
+
+enum {
+	SCHED_CLASS_TYPE_PACKET = 0,    /* class type */
+};
+
+enum {
+	SCHED_CLASS_LEVEL_CL_RL = 0,    /* class rate limiter */
+};
+
+enum {
+	SCHED_CLASS_MODE_CLASS = 0,     /* per-class scheduling */
+};
+
+enum {
+	SCHED_CLASS_RATEUNIT_BITS = 0,  /* bit rate scheduling */
+};
+
+enum {
+	SCHED_CLASS_RATEMODE_ABS = 1,   /* Kb/s */
+};
+
+/* Support for "sched_queue" command to allow one or more NIC TX Queues
+ * to be bound to a TX Scheduling Class.
+ */
+struct ch_sched_queue {
+	s8   queue;    /* queue index */
+	s8   class;    /* class index */
 };
 
 /* Defined bit width of user definable filter tuples
@@ -947,11 +1028,47 @@
 	VLAN_REWRITE
 };
 
+/* Host shadow copy of ingress filter entry.  This is in host native format
+ * and doesn't match the ordering or bit order, etc. of the hardware of the
+ * firmware command.  The use of bit-field structure elements is purely to
+ * remind ourselves of the field size limitations and save memory in the case
+ * where the filter table is large.
+ */
+struct filter_entry {
+	/* Administrative fields for filter. */
+	u32 valid:1;            /* filter allocated and valid */
+	u32 locked:1;           /* filter is administratively locked */
+
+	u32 pending:1;          /* filter action is pending firmware reply */
+	u32 smtidx:8;           /* Source MAC Table index for smac */
+	struct filter_ctx *ctx; /* Caller's completion hook */
+	struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
+	struct net_device *dev; /* Associated net device */
+	u32 tid;                /* This will store the actual tid */
+
+	/* The filter itself.  Most of this is a straight copy of information
+	 * provided by the extended ioctl().  Some fields are translated to
+	 * internal forms -- for instance the Ingress Queue ID passed in from
+	 * the ioctl() is translated into the Absolute Ingress Queue ID.
+	 */
+	struct ch_filter_specification fs;
+};
+
 static inline int is_offload(const struct adapter *adap)
 {
 	return adap->params.offload;
 }
 
+static inline int is_pci_uld(const struct adapter *adap)
+{
+	return adap->params.crypto;
+}
+
+static inline int is_uld(const struct adapter *adap)
+{
+	return (adap->params.offload || adap->params.crypto);
+}
+
 static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
 {
 	return readl(adap->regs + reg_addr);
@@ -1178,6 +1295,8 @@
 int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 			  struct net_device *dev, unsigned int iqid,
 			  unsigned int cmplqid);
+int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
+			unsigned int cmplqid);
 int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 			  struct net_device *dev, unsigned int iqid);
 irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
@@ -1185,8 +1304,6 @@
 void t4_sge_start(struct adapter *adap);
 void t4_sge_stop(struct adapter *adap);
 int cxgb_busy_poll(struct napi_struct *napi);
-int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
-			       unsigned int cnt);
 void cxgb4_set_ethtool_ops(struct net_device *netdev);
 int cxgb4_write_rss(const struct port_info *pi, const u16 *queues);
 extern int dbfifo_int_thresh;
@@ -1289,6 +1406,18 @@
 	return a & 0x3f;
 }
 
+int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
+			       unsigned int cnt);
+static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
+			     unsigned int us, unsigned int cnt,
+			     unsigned int size, unsigned int iqe_size)
+{
+	q->adap = adap;
+	cxgb4_set_rspq_intr_params(q, us, cnt);
+	q->iqe_len = iqe_size;
+	q->size = size;
+}
+
 void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
 		       unsigned int data_reg, const u32 *vals,
 		       unsigned int nregs, unsigned int start_idx);
@@ -1514,6 +1643,9 @@
 			 int filter_index, int *enabled);
 int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
 			 u32 addr, u32 val);
+int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
+		    int rateunit, int ratemode, int channel, int class,
+		    int minrate, int maxrate, int weight, int pktsize);
 void t4_sge_decode_idma_state(struct adapter *adapter, int state);
 void t4_free_mem(void *addr);
 void t4_idma_monitor_init(struct adapter *adapter,
@@ -1521,4 +1653,11 @@
 void t4_idma_monitor(struct adapter *adapter,
 		     struct sge_idma_monitor_state *idma,
 		     int hz, int ticks);
+int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+		      unsigned int naddr, u8 *addr);
+void t4_uld_mem_free(struct adapter *adap);
+int t4_uld_mem_alloc(struct adapter *adap);
+void t4_uld_clean_up(struct adapter *adap);
+void t4_register_netevent_notifier(void);
+void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 91fb508..20455d0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2432,17 +2432,11 @@
 {
 	struct adapter *adap = seq->private;
 	int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
-	int iscsi_entries = DIV_ROUND_UP(adap->sge.iscsiqsets, 4);
-	int iscsit_entries = DIV_ROUND_UP(adap->sge.niscsitq, 4);
-	int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4);
-	int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4);
+	int ofld_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4);
 	int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
 	int i, r = (uintptr_t)v - 1;
-	int iscsi_idx = r - eth_entries;
-	int iscsit_idx = iscsi_idx - iscsi_entries;
-	int rdma_idx = iscsit_idx - iscsit_entries;
-	int ciq_idx = rdma_idx - rdma_entries;
-	int ctrl_idx =  ciq_idx - ciq_entries;
+	int ofld_idx = r - eth_entries;
+	int ctrl_idx =  ofld_idx - ofld_entries;
 	int fq_idx =  ctrl_idx - ctrl_entries;
 
 	if (r)
@@ -2518,119 +2512,17 @@
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-	} else if (iscsi_idx < iscsi_entries) {
-		const struct sge_ofld_rxq *rx =
-			&adap->sge.iscsirxq[iscsi_idx * 4];
+	} else if (ofld_idx < ofld_entries) {
 		const struct sge_ofld_txq *tx =
-			&adap->sge.ofldtxq[iscsi_idx * 4];
-		int n = min(4, adap->sge.iscsiqsets - 4 * iscsi_idx);
+			&adap->sge.ofldtxq[ofld_idx * 4];
+		int n = min(4, adap->sge.ofldqsets - 4 * ofld_idx);
 
-		S("QType:", "iSCSI");
+		S("QType:", "OFLD-Txq");
 		T("TxQ ID:", q.cntxt_id);
 		T("TxQ size:", q.size);
 		T("TxQ inuse:", q.in_use);
 		T("TxQ CIDX:", q.cidx);
 		T("TxQ PIDX:", q.pidx);
-		R("RspQ ID:", rspq.abs_id);
-		R("RspQ size:", rspq.size);
-		R("RspQE size:", rspq.iqe_len);
-		R("RspQ CIDX:", rspq.cidx);
-		R("RspQ Gen:", rspq.gen);
-		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-		S3("u", "Intr pktcnt:",
-		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
-		R("FL ID:", fl.cntxt_id);
-		R("FL size:", fl.size - 8);
-		R("FL pend:", fl.pend_cred);
-		R("FL avail:", fl.avail);
-		R("FL PIDX:", fl.pidx);
-		R("FL CIDX:", fl.cidx);
-		RL("RxPackets:", stats.pkts);
-		RL("RxImmPkts:", stats.imm);
-		RL("RxNoMem:", stats.nomem);
-		RL("FLAllocErr:", fl.alloc_failed);
-		RL("FLLrgAlcErr:", fl.large_alloc_failed);
-		RL("FLMapErr:", fl.mapping_err);
-		RL("FLLow:", fl.low);
-		RL("FLStarving:", fl.starving);
-
-	} else if (iscsit_idx < iscsit_entries) {
-		const struct sge_ofld_rxq *rx =
-			&adap->sge.iscsitrxq[iscsit_idx * 4];
-		int n = min(4, adap->sge.niscsitq - 4 * iscsit_idx);
-
-		S("QType:", "iSCSIT");
-		R("RspQ ID:", rspq.abs_id);
-		R("RspQ size:", rspq.size);
-		R("RspQE size:", rspq.iqe_len);
-		R("RspQ CIDX:", rspq.cidx);
-		R("RspQ Gen:", rspq.gen);
-		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-		S3("u", "Intr pktcnt:",
-		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
-		R("FL ID:", fl.cntxt_id);
-		R("FL size:", fl.size - 8);
-		R("FL pend:", fl.pend_cred);
-		R("FL avail:", fl.avail);
-		R("FL PIDX:", fl.pidx);
-		R("FL CIDX:", fl.cidx);
-		RL("RxPackets:", stats.pkts);
-		RL("RxImmPkts:", stats.imm);
-		RL("RxNoMem:", stats.nomem);
-		RL("FLAllocErr:", fl.alloc_failed);
-		RL("FLLrgAlcErr:", fl.large_alloc_failed);
-		RL("FLMapErr:", fl.mapping_err);
-		RL("FLLow:", fl.low);
-		RL("FLStarving:", fl.starving);
-
-	} else if (rdma_idx < rdma_entries) {
-		const struct sge_ofld_rxq *rx =
-				&adap->sge.rdmarxq[rdma_idx * 4];
-		int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx);
-
-		S("QType:", "RDMA-CPL");
-		S("Interface:",
-		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
-		R("RspQ ID:", rspq.abs_id);
-		R("RspQ size:", rspq.size);
-		R("RspQE size:", rspq.iqe_len);
-		R("RspQ CIDX:", rspq.cidx);
-		R("RspQ Gen:", rspq.gen);
-		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-		S3("u", "Intr pktcnt:",
-		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
-		R("FL ID:", fl.cntxt_id);
-		R("FL size:", fl.size - 8);
-		R("FL pend:", fl.pend_cred);
-		R("FL avail:", fl.avail);
-		R("FL PIDX:", fl.pidx);
-		R("FL CIDX:", fl.cidx);
-		RL("RxPackets:", stats.pkts);
-		RL("RxImmPkts:", stats.imm);
-		RL("RxNoMem:", stats.nomem);
-		RL("FLAllocErr:", fl.alloc_failed);
-		RL("FLLrgAlcErr:", fl.large_alloc_failed);
-		RL("FLMapErr:", fl.mapping_err);
-		RL("FLLow:", fl.low);
-		RL("FLStarving:", fl.starving);
-
-	} else if (ciq_idx < ciq_entries) {
-		const struct sge_ofld_rxq *rx = &adap->sge.rdmaciq[ciq_idx * 4];
-		int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx);
-
-		S("QType:", "RDMA-CIQ");
-		S("Interface:",
-		  rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
-		R("RspQ ID:", rspq.abs_id);
-		R("RspQ size:", rspq.size);
-		R("RspQE size:", rspq.iqe_len);
-		R("RspQ CIDX:", rspq.cidx);
-		R("RspQ Gen:", rspq.gen);
-		S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq));
-		S3("u", "Intr pktcnt:",
-		   adap->sge.counter_val[rx[i].rspq.pktcnt_idx]);
-		RL("RxAN:", stats.an);
-		RL("RxNoMem:", stats.nomem);
 
 	} else if (ctrl_idx < ctrl_entries) {
 		const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4];
@@ -2672,10 +2564,7 @@
 static int sge_queue_entries(const struct adapter *adap)
 {
 	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
-	       DIV_ROUND_UP(adap->sge.iscsiqsets, 4) +
-	       DIV_ROUND_UP(adap->sge.niscsitq, 4) +
-	       DIV_ROUND_UP(adap->sge.rdmaqs, 4) +
-	       DIV_ROUND_UP(adap->sge.rdmaciqs, 4) +
+	       DIV_ROUND_UP(adap->sge.ofldqsets, 4) +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
 }
 
@@ -2859,12 +2748,6 @@
 				 size_mb << 20);
 }
 
-static int blocked_fl_open(struct inode *inode, struct file *file)
-{
-	file->private_data = inode->i_private;
-	return 0;
-}
-
 static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf,
 			       size_t count, loff_t *ppos)
 {
@@ -2908,7 +2791,7 @@
 
 static const struct file_operations blocked_fl_fops = {
 	.owner   = THIS_MODULE,
-	.open    = blocked_fl_open,
+	.open    = simple_open,
 	.read    = blocked_fl_read,
 	.write   = blocked_fl_write,
 	.llseek  = generic_file_llseek,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
new file mode 100644
index 0000000..1073673
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -0,0 +1,721 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "t4_regs.h"
+#include "l2t.h"
+#include "t4fw_api.h"
+#include "cxgb4_filter.h"
+
+static inline bool is_field_set(u32 val, u32 mask)
+{
+	return val || mask;
+}
+
+static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask)
+{
+	return !(conf & conf_mask) && is_field_set(val, mask);
+}
+
+/* Validate filter spec against configuration done on the card. */
+static int validate_filter(struct net_device *dev,
+			   struct ch_filter_specification *fs)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	u32 fconf, iconf;
+
+	/* Check for unconfigured fields being used. */
+	fconf = adapter->params.tp.vlan_pri_map;
+	iconf = adapter->params.tp.ingress_config;
+
+	if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) ||
+	    unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) ||
+	    unsupported(fconf, TOS_F, fs->val.tos, fs->mask.tos) ||
+	    unsupported(fconf, ETHERTYPE_F, fs->val.ethtype,
+			fs->mask.ethtype) ||
+	    unsupported(fconf, MACMATCH_F, fs->val.macidx, fs->mask.macidx) ||
+	    unsupported(fconf, MPSHITTYPE_F, fs->val.matchtype,
+			fs->mask.matchtype) ||
+	    unsupported(fconf, FRAGMENTATION_F, fs->val.frag, fs->mask.frag) ||
+	    unsupported(fconf, PROTOCOL_F, fs->val.proto, fs->mask.proto) ||
+	    unsupported(fconf, VNIC_ID_F, fs->val.pfvf_vld,
+			fs->mask.pfvf_vld) ||
+	    unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld,
+			fs->mask.ovlan_vld) ||
+	    unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld))
+		return -EOPNOTSUPP;
+
+	/* T4 inconveniently uses the same FT_VNIC_ID_W bits for both the Outer
+	 * VLAN Tag and PF/VF/VFvld fields based on VNIC_F being set
+	 * in TP_INGRESS_CONFIG.  Hense the somewhat crazy checks
+	 * below.  Additionally, since the T4 firmware interface also
+	 * carries that overlap, we need to translate any PF/VF
+	 * specification into that internal format below.
+	 */
+	if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) &&
+	    is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld))
+		return -EOPNOTSUPP;
+	if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) ||
+	    (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) &&
+	     (iconf & VNIC_F)))
+		return -EOPNOTSUPP;
+	if (fs->val.pf > 0x7 || fs->val.vf > 0x7f)
+		return -ERANGE;
+	fs->mask.pf &= 0x7;
+	fs->mask.vf &= 0x7f;
+
+	/* If the user is requesting that the filter action loop
+	 * matching packets back out one of our ports, make sure that
+	 * the egress port is in range.
+	 */
+	if (fs->action == FILTER_SWITCH &&
+	    fs->eport >= adapter->params.nports)
+		return -ERANGE;
+
+	/* Don't allow various trivially obvious bogus out-of-range values... */
+	if (fs->val.iport >= adapter->params.nports)
+		return -ERANGE;
+
+	/* T4 doesn't support removing VLAN Tags for loop back filters. */
+	if (is_t4(adapter->params.chip) &&
+	    fs->action == FILTER_SWITCH &&
+	    (fs->newvlan == VLAN_REMOVE ||
+	     fs->newvlan == VLAN_REWRITE))
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+static int get_filter_steerq(struct net_device *dev,
+			     struct ch_filter_specification *fs)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	int iq;
+
+	/* If the user has requested steering matching Ingress Packets
+	 * to a specific Queue Set, we need to make sure it's in range
+	 * for the port and map that into the Absolute Queue ID of the
+	 * Queue Set's Response Queue.
+	 */
+	if (!fs->dirsteer) {
+		if (fs->iq)
+			return -EINVAL;
+		iq = 0;
+	} else {
+		struct port_info *pi = netdev_priv(dev);
+
+		/* If the iq id is greater than the number of qsets,
+		 * then assume it is an absolute qid.
+		 */
+		if (fs->iq < pi->nqsets)
+			iq = adapter->sge.ethrxq[pi->first_qset +
+						 fs->iq].rspq.abs_id;
+		else
+			iq = fs->iq;
+	}
+
+	return iq;
+}
+
+static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family)
+{
+	spin_lock_bh(&t->ftid_lock);
+
+	if (test_bit(fidx, t->ftid_bmap)) {
+		spin_unlock_bh(&t->ftid_lock);
+		return -EBUSY;
+	}
+
+	if (family == PF_INET)
+		__set_bit(fidx, t->ftid_bmap);
+	else
+		bitmap_allocate_region(t->ftid_bmap, fidx, 2);
+
+	spin_unlock_bh(&t->ftid_lock);
+	return 0;
+}
+
+static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family)
+{
+	spin_lock_bh(&t->ftid_lock);
+	if (family == PF_INET)
+		__clear_bit(fidx, t->ftid_bmap);
+	else
+		bitmap_release_region(t->ftid_bmap, fidx, 2);
+	spin_unlock_bh(&t->ftid_lock);
+}
+
+/* Delete the filter at a specified index. */
+static int del_filter_wr(struct adapter *adapter, int fidx)
+{
+	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+	struct fw_filter_wr *fwr;
+	struct sk_buff *skb;
+	unsigned int len;
+
+	len = sizeof(*fwr);
+
+	skb = alloc_skb(len, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	fwr = (struct fw_filter_wr *)__skb_put(skb, len);
+	t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id);
+
+	/* Mark the filter as "pending" and ship off the Filter Work Request.
+	 * When we get the Work Request Reply we'll clear the pending status.
+	 */
+	f->pending = 1;
+	t4_mgmt_tx(adapter, skb);
+	return 0;
+}
+
+/* Send a Work Request to write the filter at a specified index.  We construct
+ * a Firmware Filter Work Request to have the work done and put the indicated
+ * filter into "pending" mode which will prevent any further actions against
+ * it till we get a reply from the firmware on the completion status of the
+ * request.
+ */
+int set_filter_wr(struct adapter *adapter, int fidx)
+{
+	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
+	struct fw_filter_wr *fwr;
+	struct sk_buff *skb;
+
+	skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	/* If the new filter requires loopback Destination MAC and/or VLAN
+	 * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
+	 * the filter.
+	 */
+	if (f->fs.newdmac || f->fs.newvlan) {
+		/* allocate L2T entry for new filter */
+		f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
+						f->fs.eport, f->fs.dmac);
+		if (!f->l2t) {
+			kfree_skb(skb);
+			return -ENOMEM;
+		}
+	}
+
+	fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
+	memset(fwr, 0, sizeof(*fwr));
+
+	/* It would be nice to put most of the following in t4_hw.c but most
+	 * of the work is translating the cxgbtool ch_filter_specification
+	 * into the Work Request and the definition of that structure is
+	 * currently in cxgbtool.h which isn't appropriate to pull into the
+	 * common code.  We may eventually try to come up with a more neutral
+	 * filter specification structure but for now it's easiest to simply
+	 * put this fairly direct code in line ...
+	 */
+	fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
+	fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16));
+	fwr->tid_to_iq =
+		htonl(FW_FILTER_WR_TID_V(f->tid) |
+		      FW_FILTER_WR_RQTYPE_V(f->fs.type) |
+		      FW_FILTER_WR_NOREPLY_V(0) |
+		      FW_FILTER_WR_IQ_V(f->fs.iq));
+	fwr->del_filter_to_l2tix =
+		htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
+		      FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
+		      FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
+		      FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
+		      FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
+		      FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
+		      FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
+		      FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
+		      FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
+					     f->fs.newvlan == VLAN_REWRITE) |
+		      FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
+					    f->fs.newvlan == VLAN_REWRITE) |
+		      FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
+		      FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
+		      FW_FILTER_WR_PRIO_V(f->fs.prio) |
+		      FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
+	fwr->ethtype = htons(f->fs.val.ethtype);
+	fwr->ethtypem = htons(f->fs.mask.ethtype);
+	fwr->frag_to_ovlan_vldm =
+		(FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
+		 FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
+		 FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
+		 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
+		 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
+		 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
+	fwr->smac_sel = 0;
+	fwr->rx_chan_rx_rpl_iq =
+		htons(FW_FILTER_WR_RX_CHAN_V(0) |
+		      FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
+	fwr->maci_to_matchtypem =
+		htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
+		      FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
+		      FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
+		      FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
+		      FW_FILTER_WR_PORT_V(f->fs.val.iport) |
+		      FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
+		      FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
+		      FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
+	fwr->ptcl = f->fs.val.proto;
+	fwr->ptclm = f->fs.mask.proto;
+	fwr->ttyp = f->fs.val.tos;
+	fwr->ttypm = f->fs.mask.tos;
+	fwr->ivlan = htons(f->fs.val.ivlan);
+	fwr->ivlanm = htons(f->fs.mask.ivlan);
+	fwr->ovlan = htons(f->fs.val.ovlan);
+	fwr->ovlanm = htons(f->fs.mask.ovlan);
+	memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
+	memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
+	memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
+	memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
+	fwr->lp = htons(f->fs.val.lport);
+	fwr->lpm = htons(f->fs.mask.lport);
+	fwr->fp = htons(f->fs.val.fport);
+	fwr->fpm = htons(f->fs.mask.fport);
+	if (f->fs.newsmac)
+		memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
+
+	/* Mark the filter as "pending" and ship off the Filter Work Request.
+	 * When we get the Work Request Reply we'll clear the pending status.
+	 */
+	f->pending = 1;
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
+	t4_ofld_send(adapter, skb);
+	return 0;
+}
+
+/* Return an error number if the indicated filter isn't writable ... */
+int writable_filter(struct filter_entry *f)
+{
+	if (f->locked)
+		return -EPERM;
+	if (f->pending)
+		return -EBUSY;
+
+	return 0;
+}
+
+/* Delete the filter at the specified index (if valid).  The checks for all
+ * the common problems with doing this like the filter being locked, currently
+ * pending in another operation, etc.
+ */
+int delete_filter(struct adapter *adapter, unsigned int fidx)
+{
+	struct filter_entry *f;
+	int ret;
+
+	if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
+		return -EINVAL;
+
+	f = &adapter->tids.ftid_tab[fidx];
+	ret = writable_filter(f);
+	if (ret)
+		return ret;
+	if (f->valid)
+		return del_filter_wr(adapter, fidx);
+
+	return 0;
+}
+
+/* Clear a filter and release any of its resources that we own.  This also
+ * clears the filter's "pending" status.
+ */
+void clear_filter(struct adapter *adap, struct filter_entry *f)
+{
+	/* If the new or old filter have loopback rewriteing rules then we'll
+	 * need to free any existing Layer Two Table (L2T) entries of the old
+	 * filter rule.  The firmware will handle freeing up any Source MAC
+	 * Table (SMT) entries used for rewriting Source MAC Addresses in
+	 * loopback rules.
+	 */
+	if (f->l2t)
+		cxgb4_l2t_release(f->l2t);
+
+	/* The zeroing of the filter rule below clears the filter valid,
+	 * pending, locked flags, l2t pointer, etc. so it's all we need for
+	 * this operation.
+	 */
+	memset(f, 0, sizeof(*f));
+}
+
+void clear_all_filters(struct adapter *adapter)
+{
+	unsigned int i;
+
+	if (adapter->tids.ftid_tab) {
+		struct filter_entry *f = &adapter->tids.ftid_tab[0];
+		unsigned int max_ftid = adapter->tids.nftids +
+					adapter->tids.nsftids;
+
+		for (i = 0; i < max_ftid; i++, f++)
+			if (f->valid || f->pending)
+				clear_filter(adapter, f);
+	}
+}
+
+/* Fill up default masks for set match fields. */
+static void fill_default_mask(struct ch_filter_specification *fs)
+{
+	unsigned int lip = 0, lip_mask = 0;
+	unsigned int fip = 0, fip_mask = 0;
+	unsigned int i;
+
+	if (fs->val.iport && !fs->mask.iport)
+		fs->mask.iport |= ~0;
+	if (fs->val.fcoe && !fs->mask.fcoe)
+		fs->mask.fcoe |= ~0;
+	if (fs->val.matchtype && !fs->mask.matchtype)
+		fs->mask.matchtype |= ~0;
+	if (fs->val.macidx && !fs->mask.macidx)
+		fs->mask.macidx |= ~0;
+	if (fs->val.ethtype && !fs->mask.ethtype)
+		fs->mask.ethtype |= ~0;
+	if (fs->val.ivlan && !fs->mask.ivlan)
+		fs->mask.ivlan |= ~0;
+	if (fs->val.ovlan && !fs->mask.ovlan)
+		fs->mask.ovlan |= ~0;
+	if (fs->val.frag && !fs->mask.frag)
+		fs->mask.frag |= ~0;
+	if (fs->val.tos && !fs->mask.tos)
+		fs->mask.tos |= ~0;
+	if (fs->val.proto && !fs->mask.proto)
+		fs->mask.proto |= ~0;
+
+	for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) {
+		lip |= fs->val.lip[i];
+		lip_mask |= fs->mask.lip[i];
+		fip |= fs->val.fip[i];
+		fip_mask |= fs->mask.fip[i];
+	}
+
+	if (lip && !lip_mask)
+		memset(fs->mask.lip, ~0, sizeof(fs->mask.lip));
+
+	if (fip && !fip_mask)
+		memset(fs->mask.fip, ~0, sizeof(fs->mask.lip));
+
+	if (fs->val.lport && !fs->mask.lport)
+		fs->mask.lport = ~0;
+	if (fs->val.fport && !fs->mask.fport)
+		fs->mask.fport = ~0;
+}
+
+/* Check a Chelsio Filter Request for validity, convert it into our internal
+ * format and send it to the hardware.  Return 0 on success, an error number
+ * otherwise.  We attach any provided filter operation context to the internal
+ * filter specification in order to facilitate signaling completion of the
+ * operation.
+ */
+int __cxgb4_set_filter(struct net_device *dev, int filter_id,
+		       struct ch_filter_specification *fs,
+		       struct filter_ctx *ctx)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	unsigned int max_fidx, fidx;
+	struct filter_entry *f;
+	u32 iconf;
+	int iq, ret;
+
+	max_fidx = adapter->tids.nftids;
+	if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
+	    filter_id >= max_fidx)
+		return -E2BIG;
+
+	fill_default_mask(fs);
+
+	ret = validate_filter(dev, fs);
+	if (ret)
+		return ret;
+
+	iq = get_filter_steerq(dev, fs);
+	if (iq < 0)
+		return iq;
+
+	/* IPv6 filters occupy four slots and must be aligned on
+	 * four-slot boundaries.  IPv4 filters only occupy a single
+	 * slot and have no alignment requirements but writing a new
+	 * IPv4 filter into the middle of an existing IPv6 filter
+	 * requires clearing the old IPv6 filter and hence we prevent
+	 * insertion.
+	 */
+	if (fs->type == 0) { /* IPv4 */
+		/* If our IPv4 filter isn't being written to a
+		 * multiple of four filter index and there's an IPv6
+		 * filter at the multiple of 4 base slot, then we
+		 * prevent insertion.
+		 */
+		fidx = filter_id & ~0x3;
+		if (fidx != filter_id &&
+		    adapter->tids.ftid_tab[fidx].fs.type) {
+			f = &adapter->tids.ftid_tab[fidx];
+			if (f->valid) {
+				dev_err(adapter->pdev_dev,
+					"Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n",
+					fidx, fidx + 3);
+				return -EINVAL;
+			}
+		}
+	} else { /* IPv6 */
+		/* Ensure that the IPv6 filter is aligned on a
+		 * multiple of 4 boundary.
+		 */
+		if (filter_id & 0x3) {
+			dev_err(adapter->pdev_dev,
+				"Invalid location. IPv6 must be aligned on a 4-slot boundary\n");
+			return -EINVAL;
+		}
+
+		/* Check all except the base overlapping IPv4 filter slots. */
+		for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) {
+			f = &adapter->tids.ftid_tab[fidx];
+			if (f->valid) {
+				dev_err(adapter->pdev_dev,
+					"Invalid location.  IPv6 requires 4 slots and an IPv4 filter exists at %u\n",
+					fidx);
+				return -EINVAL;
+			}
+		}
+	}
+
+	/* Check to make sure that provided filter index is not
+	 * already in use by someone else
+	 */
+	f = &adapter->tids.ftid_tab[filter_id];
+	if (f->valid)
+		return -EBUSY;
+
+	fidx = filter_id + adapter->tids.ftid_base;
+	ret = cxgb4_set_ftid(&adapter->tids, filter_id,
+			     fs->type ? PF_INET6 : PF_INET);
+	if (ret)
+		return ret;
+
+	/* Check to make sure the filter requested is writable ... */
+	ret = writable_filter(f);
+	if (ret) {
+		/* Clear the bits we have set above */
+		cxgb4_clear_ftid(&adapter->tids, filter_id,
+				 fs->type ? PF_INET6 : PF_INET);
+		return ret;
+	}
+
+	/* Clear out any old resources being used by the filter before
+	 * we start constructing the new filter.
+	 */
+	if (f->valid)
+		clear_filter(adapter, f);
+
+	/* Convert the filter specification into our internal format.
+	 * We copy the PF/VF specification into the Outer VLAN field
+	 * here so the rest of the code -- including the interface to
+	 * the firmware -- doesn't have to constantly do these checks.
+	 */
+	f->fs = *fs;
+	f->fs.iq = iq;
+	f->dev = dev;
+
+	iconf = adapter->params.tp.ingress_config;
+	if (iconf & VNIC_F) {
+		f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf;
+		f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf;
+		f->fs.val.ovlan_vld = fs->val.pfvf_vld;
+		f->fs.mask.ovlan_vld = fs->mask.pfvf_vld;
+	}
+
+	/* Attempt to set the filter.  If we don't succeed, we clear
+	 * it and return the failure.
+	 */
+	f->ctx = ctx;
+	f->tid = fidx; /* Save the actual tid */
+	ret = set_filter_wr(adapter, filter_id);
+	if (ret) {
+		cxgb4_clear_ftid(&adapter->tids, filter_id,
+				 fs->type ? PF_INET6 : PF_INET);
+		clear_filter(adapter, f);
+	}
+
+	return ret;
+}
+
+/* Check a delete filter request for validity and send it to the hardware.
+ * Return 0 on success, an error number otherwise.  We attach any provided
+ * filter operation context to the internal filter specification in order to
+ * facilitate signaling completion of the operation.
+ */
+int __cxgb4_del_filter(struct net_device *dev, int filter_id,
+		       struct filter_ctx *ctx)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	struct filter_entry *f;
+	unsigned int max_fidx;
+	int ret;
+
+	max_fidx = adapter->tids.nftids;
+	if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
+	    filter_id >= max_fidx)
+		return -E2BIG;
+
+	f = &adapter->tids.ftid_tab[filter_id];
+	ret = writable_filter(f);
+	if (ret)
+		return ret;
+
+	if (f->valid) {
+		f->ctx = ctx;
+		cxgb4_clear_ftid(&adapter->tids, filter_id,
+				 f->fs.type ? PF_INET6 : PF_INET);
+		return del_filter_wr(adapter, filter_id);
+	}
+
+	/* If the caller has passed in a Completion Context then we need to
+	 * mark it as a successful completion so they don't stall waiting
+	 * for it.
+	 */
+	if (ctx) {
+		ctx->result = 0;
+		complete(&ctx->completion);
+	}
+	return ret;
+}
+
+int cxgb4_set_filter(struct net_device *dev, int filter_id,
+		     struct ch_filter_specification *fs)
+{
+	struct filter_ctx ctx;
+	int ret;
+
+	init_completion(&ctx.completion);
+
+	ret = __cxgb4_set_filter(dev, filter_id, fs, &ctx);
+	if (ret)
+		goto out;
+
+	/* Wait for reply */
+	ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
+	if (!ret)
+		return -ETIMEDOUT;
+
+	ret = ctx.result;
+out:
+	return ret;
+}
+
+int cxgb4_del_filter(struct net_device *dev, int filter_id)
+{
+	struct filter_ctx ctx;
+	int ret;
+
+	init_completion(&ctx.completion);
+
+	ret = __cxgb4_del_filter(dev, filter_id, &ctx);
+	if (ret)
+		goto out;
+
+	/* Wait for reply */
+	ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
+	if (!ret)
+		return -ETIMEDOUT;
+
+	ret = ctx.result;
+out:
+	return ret;
+}
+
+/* Handle a filter write/deletion reply. */
+void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
+{
+	unsigned int tid = GET_TID(rpl);
+	struct filter_entry *f = NULL;
+	unsigned int max_fidx;
+	int idx;
+
+	max_fidx = adap->tids.nftids + adap->tids.nsftids;
+	/* Get the corresponding filter entry for this tid */
+	if (adap->tids.ftid_tab) {
+		/* Check this in normal filter region */
+		idx = tid - adap->tids.ftid_base;
+		if (idx >= max_fidx)
+			return;
+		f = &adap->tids.ftid_tab[idx];
+		if (f->tid != tid)
+			return;
+	}
+
+	/* We found the filter entry for this tid */
+	if (f) {
+		unsigned int ret = TCB_COOKIE_G(rpl->cookie);
+		struct filter_ctx *ctx;
+
+		/* Pull off any filter operation context attached to the
+		 * filter.
+		 */
+		ctx = f->ctx;
+		f->ctx = NULL;
+
+		if (ret == FW_FILTER_WR_FLT_DELETED) {
+			/* Clear the filter when we get confirmation from the
+			 * hardware that the filter has been deleted.
+			 */
+			clear_filter(adap, f);
+			if (ctx)
+				ctx->result = 0;
+		} else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
+			dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
+				idx);
+			clear_filter(adap, f);
+			if (ctx)
+				ctx->result = -ENOMEM;
+		} else if (ret == FW_FILTER_WR_FLT_ADDED) {
+			f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
+			f->pending = 0;  /* asynchronous setup completed */
+			f->valid = 1;
+			if (ctx) {
+				ctx->result = 0;
+				ctx->tid = idx;
+			}
+		} else {
+			/* Something went wrong.  Issue a warning about the
+			 * problem and clear everything out.
+			 */
+			dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
+				idx, ret);
+			clear_filter(adap, f);
+			if (ctx)
+				ctx->result = -EINVAL;
+		}
+		if (ctx)
+			complete(&ctx->completion);
+	}
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
new file mode 100644
index 0000000..23742cb
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -0,0 +1,48 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_FILTER_H
+#define __CXGB4_FILTER_H
+
+#include "t4_msg.h"
+
+void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl);
+void clear_filter(struct adapter *adap, struct filter_entry *f);
+
+int set_filter_wr(struct adapter *adapter, int fidx);
+int delete_filter(struct adapter *adapter, unsigned int fidx);
+
+int writable_filter(struct filter_entry *f);
+void clear_all_filters(struct adapter *adapter);
+#endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index c45de49..cf147ca 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -67,6 +67,7 @@
 #include <linux/crash_dump.h>
 
 #include "cxgb4.h"
+#include "cxgb4_filter.h"
 #include "t4_regs.h"
 #include "t4_values.h"
 #include "t4_msg.h"
@@ -76,6 +77,8 @@
 #include "cxgb4_debugfs.h"
 #include "clip_tbl.h"
 #include "l2t.h"
+#include "sched.h"
+#include "cxgb4_tc_u32.h"
 
 char cxgb4_driver_name[] = KBUILD_MODNAME;
 
@@ -86,30 +89,6 @@
 const char cxgb4_driver_version[] = DRV_VERSION;
 #define DRV_DESC "Chelsio T4/T5/T6 Network Driver"
 
-/* Host shadow copy of ingress filter entry.  This is in host native format
- * and doesn't match the ordering or bit order, etc. of the hardware of the
- * firmware command.  The use of bit-field structure elements is purely to
- * remind ourselves of the field size limitations and save memory in the case
- * where the filter table is large.
- */
-struct filter_entry {
-	/* Administrative fields for filter.
-	 */
-	u32 valid:1;            /* filter allocated and valid */
-	u32 locked:1;           /* filter is administratively locked */
-
-	u32 pending:1;          /* filter action is pending firmware reply */
-	u32 smtidx:8;           /* Source MAC Table index for smac */
-	struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
-
-	/* The filter itself.  Most of this is a straight copy of information
-	 * provided by the extended ioctl().  Some fields are translated to
-	 * internal forms -- for instance the Ingress Queue ID passed in from
-	 * the ioctl() is translated into the Absolute Ingress Queue ID.
-	 */
-	struct ch_filter_specification fs;
-};
-
 #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
@@ -223,13 +202,8 @@
 
 static struct dentry *cxgb4_debugfs_root;
 
-static LIST_HEAD(adapter_list);
-static DEFINE_MUTEX(uld_mutex);
-/* Adapter list to be accessed from atomic context */
-static LIST_HEAD(adap_rcu_list);
-static DEFINE_SPINLOCK(adap_rcu_lock);
-static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
-static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" };
+LIST_HEAD(adapter_list);
+DEFINE_MUTEX(uld_mutex);
 
 static void link_report(struct net_device *dev)
 {
@@ -303,11 +277,9 @@
 			txq->dcb_prio = value;
 	}
 }
-#endif /* CONFIG_CHELSIO_T4_DCB */
 
-int cxgb4_dcb_enabled(const struct net_device *dev)
+static int cxgb4_dcb_enabled(const struct net_device *dev)
 {
-#ifdef CONFIG_CHELSIO_T4_DCB
 	struct port_info *pi = netdev_priv(dev);
 
 	if (!pi->dcb.enabled)
@@ -315,11 +287,8 @@
 
 	return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) ||
 		(pi->dcb.state == CXGB4_DCB_STATE_HOST));
-#else
-	return 0;
-#endif
 }
-EXPORT_SYMBOL(cxgb4_dcb_enabled);
+#endif /* CONFIG_CHELSIO_T4_DCB */
 
 void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
 {
@@ -531,66 +500,6 @@
 }
 #endif /* CONFIG_CHELSIO_T4_DCB */
 
-/* Clear a filter and release any of its resources that we own.  This also
- * clears the filter's "pending" status.
- */
-static void clear_filter(struct adapter *adap, struct filter_entry *f)
-{
-	/* If the new or old filter have loopback rewriteing rules then we'll
-	 * need to free any existing Layer Two Table (L2T) entries of the old
-	 * filter rule.  The firmware will handle freeing up any Source MAC
-	 * Table (SMT) entries used for rewriting Source MAC Addresses in
-	 * loopback rules.
-	 */
-	if (f->l2t)
-		cxgb4_l2t_release(f->l2t);
-
-	/* The zeroing of the filter rule below clears the filter valid,
-	 * pending, locked flags, l2t pointer, etc. so it's all we need for
-	 * this operation.
-	 */
-	memset(f, 0, sizeof(*f));
-}
-
-/* Handle a filter write/deletion reply.
- */
-static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
-{
-	unsigned int idx = GET_TID(rpl);
-	unsigned int nidx = idx - adap->tids.ftid_base;
-	unsigned int ret;
-	struct filter_entry *f;
-
-	if (idx >= adap->tids.ftid_base && nidx <
-	   (adap->tids.nftids + adap->tids.nsftids)) {
-		idx = nidx;
-		ret = TCB_COOKIE_G(rpl->cookie);
-		f = &adap->tids.ftid_tab[idx];
-
-		if (ret == FW_FILTER_WR_FLT_DELETED) {
-			/* Clear the filter when we get confirmation from the
-			 * hardware that the filter has been deleted.
-			 */
-			clear_filter(adap, f);
-		} else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
-			dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
-				idx);
-			clear_filter(adap, f);
-		} else if (ret == FW_FILTER_WR_FLT_ADDED) {
-			f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
-			f->pending = 0;  /* asynchronous setup completed */
-			f->valid = 1;
-		} else {
-			/* Something went wrong.  Issue a warning about the
-			 * problem and clear everything out.
-			 */
-			dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
-				idx, ret);
-			clear_filter(adap, f);
-		}
-	}
-}
-
 /* Response queue handler for the FW event queue.
  */
 static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
@@ -677,56 +586,6 @@
 	return 0;
 }
 
-/* Flush the aggregated lro sessions */
-static void uldrx_flush_handler(struct sge_rspq *q)
-{
-	if (ulds[q->uld].lro_flush)
-		ulds[q->uld].lro_flush(&q->lro_mgr);
-}
-
-/**
- *	uldrx_handler - response queue handler for ULD queues
- *	@q: the response queue that received the packet
- *	@rsp: the response queue descriptor holding the offload message
- *	@gl: the gather list of packet fragments
- *
- *	Deliver an ingress offload packet to a ULD.  All processing is done by
- *	the ULD, we just maintain statistics.
- */
-static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
-			 const struct pkt_gl *gl)
-{
-	struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
-	int ret;
-
-	/* FW can send CPLs encapsulated in a CPL_FW4_MSG.
-	 */
-	if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
-	    ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
-		rsp += 2;
-
-	if (q->flush_handler)
-		ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld],
-						  rsp, gl, &q->lro_mgr,
-						  &q->napi);
-	else
-		ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld],
-					      rsp, gl);
-
-	if (ret) {
-		rxq->stats.nomem++;
-		return -1;
-	}
-
-	if (gl == NULL)
-		rxq->stats.imm++;
-	else if (gl == CXGB4_MSG_AN)
-		rxq->stats.an++;
-	else
-		rxq->stats.pkts++;
-	return 0;
-}
-
 static void disable_msi(struct adapter *adapter)
 {
 	if (adapter->flags & USING_MSIX) {
@@ -778,30 +637,12 @@
 			snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
 				 d->name, i);
 	}
-
-	/* offload queues */
-	for_each_iscsirxq(&adap->sge, i)
-		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d",
-			 adap->port[0]->name, i);
-
-	for_each_iscsitrxq(&adap->sge, i)
-		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d",
-			 adap->port[0]->name, i);
-
-	for_each_rdmarxq(&adap->sge, i)
-		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
-			 adap->port[0]->name, i);
-
-	for_each_rdmaciq(&adap->sge, i)
-		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d",
-			 adap->port[0]->name, i);
 }
 
 static int request_msix_queue_irqs(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
-	int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
-	int iscsitqidx = 0;
+	int err, ethqidx;
 	int msi_index = 2;
 
 	err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
@@ -818,57 +659,9 @@
 			goto unwind;
 		msi_index++;
 	}
-	for_each_iscsirxq(s, iscsiqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
-				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
-				  &s->iscsirxq[iscsiqidx].rspq);
-		if (err)
-			goto unwind;
-		msi_index++;
-	}
-	for_each_iscsitrxq(s, iscsitqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
-				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
-				  &s->iscsitrxq[iscsitqidx].rspq);
-		if (err)
-			goto unwind;
-		msi_index++;
-	}
-	for_each_rdmarxq(s, rdmaqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
-				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
-				  &s->rdmarxq[rdmaqidx].rspq);
-		if (err)
-			goto unwind;
-		msi_index++;
-	}
-	for_each_rdmaciq(s, rdmaciqqidx) {
-		err = request_irq(adap->msix_info[msi_index].vec,
-				  t4_sge_intr_msix, 0,
-				  adap->msix_info[msi_index].desc,
-				  &s->rdmaciq[rdmaciqqidx].rspq);
-		if (err)
-			goto unwind;
-		msi_index++;
-	}
 	return 0;
 
 unwind:
-	while (--rdmaciqqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->rdmaciq[rdmaciqqidx].rspq);
-	while (--rdmaqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->rdmarxq[rdmaqidx].rspq);
-	while (--iscsitqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->iscsitrxq[iscsitqidx].rspq);
-	while (--iscsiqidx >= 0)
-		free_irq(adap->msix_info[--msi_index].vec,
-			 &s->iscsirxq[iscsiqidx].rspq);
 	while (--ethqidx >= 0)
 		free_irq(adap->msix_info[--msi_index].vec,
 			 &s->ethrxq[ethqidx].rspq);
@@ -884,16 +677,6 @@
 	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
 	for_each_ethrxq(s, i)
 		free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
-	for_each_iscsirxq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec,
-			 &s->iscsirxq[i].rspq);
-	for_each_iscsitrxq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec,
-			 &s->iscsitrxq[i].rspq);
-	for_each_rdmarxq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
-	for_each_rdmaciq(s, i)
-		free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq);
 }
 
 /**
@@ -1032,28 +815,30 @@
 	}
 }
 
-static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
-			   unsigned int nq, unsigned int per_chan, int msi_idx,
-			   u16 *ids, bool lro)
-{
-	int i, err;
 
-	for (i = 0; i < nq; i++, q++) {
-		if (msi_idx > 0)
-			msi_idx++;
-		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
-				       adap->port[i / per_chan],
-				       msi_idx, q->fl.size ? &q->fl : NULL,
-				       uldrx_handler,
-				       lro ? uldrx_flush_handler : NULL,
-				       0);
+static int setup_fw_sge_queues(struct adapter *adap)
+{
+	struct sge *s = &adap->sge;
+	int err = 0;
+
+	bitmap_zero(s->starving_fl, s->egr_sz);
+	bitmap_zero(s->txq_maperr, s->egr_sz);
+
+	if (adap->flags & USING_MSIX)
+		adap->msi_idx = 1;         /* vector 0 is for non-queue interrupts */
+	else {
+		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
+				       NULL, NULL, NULL, -1);
 		if (err)
 			return err;
-		memset(&q->stats, 0, sizeof(q->stats));
-		if (ids)
-			ids[i] = q->rspq.abs_id;
+		adap->msi_idx = -((int)s->intrq.abs_id + 1);
 	}
-	return 0;
+
+	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
+			       adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
+	if (err)
+		t4_free_sge_resources(adap);
+	return err;
 }
 
 /**
@@ -1066,41 +851,10 @@
  */
 static int setup_sge_queues(struct adapter *adap)
 {
-	int err, msi_idx, i, j;
+	int err, i, j;
 	struct sge *s = &adap->sge;
-
-	bitmap_zero(s->starving_fl, s->egr_sz);
-	bitmap_zero(s->txq_maperr, s->egr_sz);
-
-	if (adap->flags & USING_MSIX)
-		msi_idx = 1;         /* vector 0 is for non-queue interrupts */
-	else {
-		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
-				       NULL, NULL, NULL, -1);
-		if (err)
-			return err;
-		msi_idx = -((int)s->intrq.abs_id + 1);
-	}
-
-	/* NOTE: If you add/delete any Ingress/Egress Queue allocations in here,
-	 * don't forget to update the following which need to be
-	 * synchronized to and changes here.
-	 *
-	 * 1. The calculations of MAX_INGQ in cxgb4.h.
-	 *
-	 * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs
-	 *    to accommodate any new/deleted Ingress Queues
-	 *    which need MSI-X Vectors.
-	 *
-	 * 3. Update sge_qinfo_show() to include information on the
-	 *    new/deleted queues.
-	 */
-	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
-			       msi_idx, NULL, fwevtq_handler, NULL, -1);
-	if (err) {
-freeout:	t4_free_sge_resources(adap);
-		return err;
-	}
+	struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
+	unsigned int cmplqid = 0;
 
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
@@ -1109,10 +863,10 @@
 		struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
 
 		for (j = 0; j < pi->nqsets; j++, q++) {
-			if (msi_idx > 0)
-				msi_idx++;
+			if (adap->msi_idx > 0)
+				adap->msi_idx++;
 			err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
-					       msi_idx, &q->fl,
+					       adap->msi_idx, &q->fl,
 					       t4_ethrx_handler,
 					       NULL,
 					       t4_get_mps_bg_map(adap,
@@ -1131,8 +885,8 @@
 		}
 	}
 
-	j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */
-	for_each_iscsirxq(s, i) {
+	j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */
+	for_each_ofldtxq(s, i) {
 		err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i],
 					    adap->port[i / j],
 					    s->fw_evtq.cntxt_id);
@@ -1140,30 +894,15 @@
 			goto freeout;
 	}
 
-#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \
-	err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \
-	if (err) \
-		goto freeout; \
-	if (msi_idx > 0) \
-		msi_idx += nq; \
-} while (0)
-
-	ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false);
-	ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true);
-	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false);
-	j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
-	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false);
-
-#undef ALLOC_OFLD_RXQS
-
 	for_each_port(adap, i) {
-		/*
-		 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
+		/* Note that cmplqid below is 0 if we don't
 		 * have RDMA queues, and that's the right value.
 		 */
+		if (rxq_info)
+			cmplqid	= rxq_info->uldrxq[i].rspq.cntxt_id;
+
 		err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
-					    s->fw_evtq.cntxt_id,
-					    s->rdmarxq[i].rspq.cntxt_id);
+					    s->fw_evtq.cntxt_id, cmplqid);
 		if (err)
 			goto freeout;
 	}
@@ -1174,6 +913,9 @@
 		     RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) |
 		     QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id));
 	return 0;
+freeout:
+	t4_free_sge_resources(adap);
+	return err;
 }
 
 /*
@@ -1197,151 +939,6 @@
 	kvfree(addr);
 }
 
-/* Send a Work Request to write the filter at a specified index.  We construct
- * a Firmware Filter Work Request to have the work done and put the indicated
- * filter into "pending" mode which will prevent any further actions against
- * it till we get a reply from the firmware on the completion status of the
- * request.
- */
-static int set_filter_wr(struct adapter *adapter, int fidx)
-{
-	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
-	struct sk_buff *skb;
-	struct fw_filter_wr *fwr;
-	unsigned int ftid;
-
-	skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-
-	/* If the new filter requires loopback Destination MAC and/or VLAN
-	 * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
-	 * the filter.
-	 */
-	if (f->fs.newdmac || f->fs.newvlan) {
-		/* allocate L2T entry for new filter */
-		f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
-						f->fs.eport, f->fs.dmac);
-		if (f->l2t == NULL) {
-			kfree_skb(skb);
-			return -ENOMEM;
-		}
-	}
-
-	ftid = adapter->tids.ftid_base + fidx;
-
-	fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
-	memset(fwr, 0, sizeof(*fwr));
-
-	/* It would be nice to put most of the following in t4_hw.c but most
-	 * of the work is translating the cxgbtool ch_filter_specification
-	 * into the Work Request and the definition of that structure is
-	 * currently in cxgbtool.h which isn't appropriate to pull into the
-	 * common code.  We may eventually try to come up with a more neutral
-	 * filter specification structure but for now it's easiest to simply
-	 * put this fairly direct code in line ...
-	 */
-	fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
-	fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16));
-	fwr->tid_to_iq =
-		htonl(FW_FILTER_WR_TID_V(ftid) |
-		      FW_FILTER_WR_RQTYPE_V(f->fs.type) |
-		      FW_FILTER_WR_NOREPLY_V(0) |
-		      FW_FILTER_WR_IQ_V(f->fs.iq));
-	fwr->del_filter_to_l2tix =
-		htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
-		      FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
-		      FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
-		      FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
-		      FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
-		      FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
-		      FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
-		      FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
-		      FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
-					     f->fs.newvlan == VLAN_REWRITE) |
-		      FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
-					    f->fs.newvlan == VLAN_REWRITE) |
-		      FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
-		      FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
-		      FW_FILTER_WR_PRIO_V(f->fs.prio) |
-		      FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
-	fwr->ethtype = htons(f->fs.val.ethtype);
-	fwr->ethtypem = htons(f->fs.mask.ethtype);
-	fwr->frag_to_ovlan_vldm =
-		(FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
-		 FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
-		 FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
-		 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
-		 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
-		 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
-	fwr->smac_sel = 0;
-	fwr->rx_chan_rx_rpl_iq =
-		htons(FW_FILTER_WR_RX_CHAN_V(0) |
-		      FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
-	fwr->maci_to_matchtypem =
-		htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
-		      FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
-		      FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
-		      FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
-		      FW_FILTER_WR_PORT_V(f->fs.val.iport) |
-		      FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
-		      FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
-		      FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
-	fwr->ptcl = f->fs.val.proto;
-	fwr->ptclm = f->fs.mask.proto;
-	fwr->ttyp = f->fs.val.tos;
-	fwr->ttypm = f->fs.mask.tos;
-	fwr->ivlan = htons(f->fs.val.ivlan);
-	fwr->ivlanm = htons(f->fs.mask.ivlan);
-	fwr->ovlan = htons(f->fs.val.ovlan);
-	fwr->ovlanm = htons(f->fs.mask.ovlan);
-	memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
-	memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
-	memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
-	memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
-	fwr->lp = htons(f->fs.val.lport);
-	fwr->lpm = htons(f->fs.mask.lport);
-	fwr->fp = htons(f->fs.val.fport);
-	fwr->fpm = htons(f->fs.mask.fport);
-	if (f->fs.newsmac)
-		memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
-
-	/* Mark the filter as "pending" and ship off the Filter Work Request.
-	 * When we get the Work Request Reply we'll clear the pending status.
-	 */
-	f->pending = 1;
-	set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
-	t4_ofld_send(adapter, skb);
-	return 0;
-}
-
-/* Delete the filter at a specified index.
- */
-static int del_filter_wr(struct adapter *adapter, int fidx)
-{
-	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
-	struct sk_buff *skb;
-	struct fw_filter_wr *fwr;
-	unsigned int len, ftid;
-
-	len = sizeof(*fwr);
-	ftid = adapter->tids.ftid_base + fidx;
-
-	skb = alloc_skb(len, GFP_KERNEL);
-	if (!skb)
-		return -ENOMEM;
-
-	fwr = (struct fw_filter_wr *)__skb_put(skb, len);
-	t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
-
-	/* Mark the filter as "pending" and ship off the Filter Work Request.
-	 * When we get the Work Request Reply we'll clear the pending status.
-	 */
-	f->pending = 1;
-	t4_mgmt_tx(adapter, skb);
-	return 0;
-}
-
 static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
 			     void *accel_priv, select_queue_fallback_t fallback)
 {
@@ -1723,19 +1320,22 @@
  */
 static int tid_init(struct tid_info *t)
 {
-	size_t size;
-	unsigned int stid_bmap_size;
-	unsigned int natids = t->natids;
 	struct adapter *adap = container_of(t, struct adapter, tids);
+	unsigned int max_ftids = t->nftids + t->nsftids;
+	unsigned int natids = t->natids;
+	unsigned int stid_bmap_size;
+	unsigned int ftid_bmap_size;
+	size_t size;
 
 	stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
+	ftid_bmap_size = BITS_TO_LONGS(t->nftids);
 	size = t->ntids * sizeof(*t->tid_tab) +
 	       natids * sizeof(*t->atid_tab) +
 	       t->nstids * sizeof(*t->stid_tab) +
 	       t->nsftids * sizeof(*t->stid_tab) +
 	       stid_bmap_size * sizeof(long) +
-	       t->nftids * sizeof(*t->ftid_tab) +
-	       t->nsftids * sizeof(*t->ftid_tab);
+	       max_ftids * sizeof(*t->ftid_tab) +
+	       ftid_bmap_size * sizeof(long);
 
 	t->tid_tab = t4_alloc_mem(size);
 	if (!t->tid_tab)
@@ -1745,8 +1345,10 @@
 	t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
 	t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
 	t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+	t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
 	spin_lock_init(&t->stid_lock);
 	spin_lock_init(&t->atid_lock);
+	spin_lock_init(&t->ftid_lock);
 
 	t->stids_in_use = 0;
 	t->sftids_in_use = 0;
@@ -1761,12 +1363,16 @@
 			t->atid_tab[natids - 1].next = &t->atid_tab[natids];
 		t->afree = t->atid_tab;
 	}
-	bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
-	/* Reserve stid 0 for T4/T5 adapters */
-	if (!t->stid_base &&
-	    (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5))
-		__set_bit(0, t->stid_bmap);
 
+	if (is_offload(adap)) {
+		bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
+		/* Reserve stid 0 for T4/T5 adapters */
+		if (!t->stid_base &&
+		    CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+			__set_bit(0, t->stid_bmap);
+	}
+
+	bitmap_zero(t->ftid_bmap, t->nftids);
 	return 0;
 }
 
@@ -2316,7 +1922,7 @@
 
 	for_each_ethrxq(&adap->sge, i)
 		disable_txq_db(&adap->sge.ethtxq[i].q);
-	for_each_iscsirxq(&adap->sge, i)
+	for_each_ofldtxq(&adap->sge, i)
 		disable_txq_db(&adap->sge.ofldtxq[i].q);
 	for_each_port(adap, i)
 		disable_txq_db(&adap->sge.ctrlq[i].q);
@@ -2328,7 +1934,7 @@
 
 	for_each_ethrxq(&adap->sge, i)
 		enable_txq_db(adap, &adap->sge.ethtxq[i].q);
-	for_each_iscsirxq(&adap->sge, i)
+	for_each_ofldtxq(&adap->sge, i)
 		enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
 	for_each_port(adap, i)
 		enable_txq_db(adap, &adap->sge.ctrlq[i].q);
@@ -2336,9 +1942,10 @@
 
 static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
 {
-	if (adap->uld_handle[CXGB4_ULD_RDMA])
-		ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
-				cmd);
+	enum cxgb4_uld type = CXGB4_ULD_RDMA;
+
+	if (adap->uld && adap->uld[type].handle)
+		adap->uld[type].control(adap->uld[type].handle, cmd);
 }
 
 static void process_db_full(struct work_struct *work)
@@ -2392,13 +1999,14 @@
 	if (ret)
 		CH_WARN(adap, "DB drop recovery failed.\n");
 }
+
 static void recover_all_queues(struct adapter *adap)
 {
 	int i;
 
 	for_each_ethrxq(&adap->sge, i)
 		sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
-	for_each_iscsirxq(&adap->sge, i)
+	for_each_ofldtxq(&adap->sge, i)
 		sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
 	for_each_port(adap, i)
 		sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
@@ -2463,94 +2071,12 @@
 	queue_work(adap->workq, &adap->db_drop_task);
 }
 
-static void uld_attach(struct adapter *adap, unsigned int uld)
+void t4_register_netevent_notifier(void)
 {
-	void *handle;
-	struct cxgb4_lld_info lli;
-	unsigned short i;
-
-	lli.pdev = adap->pdev;
-	lli.pf = adap->pf;
-	lli.l2t = adap->l2t;
-	lli.tids = &adap->tids;
-	lli.ports = adap->port;
-	lli.vr = &adap->vres;
-	lli.mtus = adap->params.mtus;
-	if (uld == CXGB4_ULD_RDMA) {
-		lli.rxq_ids = adap->sge.rdma_rxq;
-		lli.ciq_ids = adap->sge.rdma_ciq;
-		lli.nrxq = adap->sge.rdmaqs;
-		lli.nciq = adap->sge.rdmaciqs;
-	} else if (uld == CXGB4_ULD_ISCSI) {
-		lli.rxq_ids = adap->sge.iscsi_rxq;
-		lli.nrxq = adap->sge.iscsiqsets;
-	} else if (uld == CXGB4_ULD_ISCSIT) {
-		lli.rxq_ids = adap->sge.iscsit_rxq;
-		lli.nrxq = adap->sge.niscsitq;
-	}
-	lli.ntxq = adap->sge.iscsiqsets;
-	lli.nchan = adap->params.nports;
-	lli.nports = adap->params.nports;
-	lli.wr_cred = adap->params.ofldq_wr_cred;
-	lli.adapter_type = adap->params.chip;
-	lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
-	lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
-	lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
-	lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
-	lli.iscsi_ppm = &adap->iscsi_ppm;
-	lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
-	lli.udb_density = 1 << adap->params.sge.eq_qpp;
-	lli.ucq_density = 1 << adap->params.sge.iq_qpp;
-	lli.filt_mode = adap->params.tp.vlan_pri_map;
-	/* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
-	for (i = 0; i < NCHAN; i++)
-		lli.tx_modq[i] = i;
-	lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A);
-	lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A);
-	lli.fw_vers = adap->params.fw_vers;
-	lli.dbfifo_int_thresh = dbfifo_int_thresh;
-	lli.sge_ingpadboundary = adap->sge.fl_align;
-	lli.sge_egrstatuspagesize = adap->sge.stat_len;
-	lli.sge_pktshift = adap->sge.pktshift;
-	lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
-	lli.max_ordird_qp = adap->params.max_ordird_qp;
-	lli.max_ird_adapter = adap->params.max_ird_adapter;
-	lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
-	lli.nodeid = dev_to_node(adap->pdev_dev);
-
-	handle = ulds[uld].add(&lli);
-	if (IS_ERR(handle)) {
-		dev_warn(adap->pdev_dev,
-			 "could not attach to the %s driver, error %ld\n",
-			 uld_str[uld], PTR_ERR(handle));
-		return;
-	}
-
-	adap->uld_handle[uld] = handle;
-
 	if (!netevent_registered) {
 		register_netevent_notifier(&cxgb4_netevent_nb);
 		netevent_registered = true;
 	}
-
-	if (adap->flags & FULL_INIT_DONE)
-		ulds[uld].state_change(handle, CXGB4_STATE_UP);
-}
-
-static void attach_ulds(struct adapter *adap)
-{
-	unsigned int i;
-
-	spin_lock(&adap_rcu_lock);
-	list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
-	spin_unlock(&adap_rcu_lock);
-
-	mutex_lock(&uld_mutex);
-	list_add_tail(&adap->list_node, &adapter_list);
-	for (i = 0; i < CXGB4_ULD_MAX; i++)
-		if (ulds[i].add)
-			uld_attach(adap, i);
-	mutex_unlock(&uld_mutex);
 }
 
 static void detach_ulds(struct adapter *adap)
@@ -2560,20 +2086,16 @@
 	mutex_lock(&uld_mutex);
 	list_del(&adap->list_node);
 	for (i = 0; i < CXGB4_ULD_MAX; i++)
-		if (adap->uld_handle[i]) {
-			ulds[i].state_change(adap->uld_handle[i],
+		if (adap->uld && adap->uld[i].handle) {
+			adap->uld[i].state_change(adap->uld[i].handle,
 					     CXGB4_STATE_DETACH);
-			adap->uld_handle[i] = NULL;
+			adap->uld[i].handle = NULL;
 		}
 	if (netevent_registered && list_empty(&adapter_list)) {
 		unregister_netevent_notifier(&cxgb4_netevent_nb);
 		netevent_registered = false;
 	}
 	mutex_unlock(&uld_mutex);
-
-	spin_lock(&adap_rcu_lock);
-	list_del_rcu(&adap->rcu_node);
-	spin_unlock(&adap_rcu_lock);
 }
 
 static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
@@ -2582,61 +2104,12 @@
 
 	mutex_lock(&uld_mutex);
 	for (i = 0; i < CXGB4_ULD_MAX; i++)
-		if (adap->uld_handle[i])
-			ulds[i].state_change(adap->uld_handle[i], new_state);
+		if (adap->uld && adap->uld[i].handle)
+			adap->uld[i].state_change(adap->uld[i].handle,
+						  new_state);
 	mutex_unlock(&uld_mutex);
 }
 
-/**
- *	cxgb4_register_uld - register an upper-layer driver
- *	@type: the ULD type
- *	@p: the ULD methods
- *
- *	Registers an upper-layer driver with this driver and notifies the ULD
- *	about any presently available devices that support its type.  Returns
- *	%-EBUSY if a ULD of the same type is already registered.
- */
-int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
-{
-	int ret = 0;
-	struct adapter *adap;
-
-	if (type >= CXGB4_ULD_MAX)
-		return -EINVAL;
-	mutex_lock(&uld_mutex);
-	if (ulds[type].add) {
-		ret = -EBUSY;
-		goto out;
-	}
-	ulds[type] = *p;
-	list_for_each_entry(adap, &adapter_list, list_node)
-		uld_attach(adap, type);
-out:	mutex_unlock(&uld_mutex);
-	return ret;
-}
-EXPORT_SYMBOL(cxgb4_register_uld);
-
-/**
- *	cxgb4_unregister_uld - unregister an upper-layer driver
- *	@type: the ULD type
- *
- *	Unregisters an existing upper-layer driver.
- */
-int cxgb4_unregister_uld(enum cxgb4_uld type)
-{
-	struct adapter *adap;
-
-	if (type >= CXGB4_ULD_MAX)
-		return -EINVAL;
-	mutex_lock(&uld_mutex);
-	list_for_each_entry(adap, &adapter_list, list_node)
-		adap->uld_handle[type] = NULL;
-	ulds[type].add = NULL;
-	mutex_unlock(&uld_mutex);
-	return 0;
-}
-EXPORT_SYMBOL(cxgb4_unregister_uld);
-
 #if IS_ENABLED(CONFIG_IPV6)
 static int cxgb4_inet6addr_handler(struct notifier_block *this,
 				   unsigned long event, void *data)
@@ -2741,7 +2214,6 @@
 				  adap->msix_info[0].desc, adap);
 		if (err)
 			goto irq_err;
-
 		err = request_msix_queue_irqs(adap);
 		if (err) {
 			free_irq(adap->msix_info[0].vec, adap);
@@ -2819,40 +2291,6 @@
 	return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
 }
 
-/* Return an error number if the indicated filter isn't writable ...
- */
-static int writable_filter(struct filter_entry *f)
-{
-	if (f->locked)
-		return -EPERM;
-	if (f->pending)
-		return -EBUSY;
-
-	return 0;
-}
-
-/* Delete the filter at the specified index (if valid).  The checks for all
- * the common problems with doing this like the filter being locked, currently
- * pending in another operation, etc.
- */
-static int delete_filter(struct adapter *adapter, unsigned int fidx)
-{
-	struct filter_entry *f;
-	int ret;
-
-	if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
-		return -EINVAL;
-
-	f = &adapter->tids.ftid_tab[fidx];
-	ret = writable_filter(f);
-	if (ret)
-		return ret;
-	if (f->valid)
-		return del_filter_wr(adapter, fidx);
-
-	return 0;
-}
-
 int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
 		__be32 sip, __be16 sport, __be16 vlan,
 		unsigned int queue, unsigned char port, unsigned char mask)
@@ -2922,7 +2360,6 @@
 int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
 		unsigned int queue, bool ipv6)
 {
-	int ret;
 	struct filter_entry *f;
 	struct adapter *adap;
 
@@ -2936,11 +2373,7 @@
 	/* Unlock the filter */
 	f->locked = 0;
 
-	ret = delete_filter(adap, stid);
-	if (ret)
-		return ret;
-
-	return 0;
+	return delete_filter(adap, stid);
 }
 EXPORT_SYMBOL(cxgb4_remove_server_filter);
 
@@ -3078,6 +2511,85 @@
 	return ret;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int dummy_open(struct net_device *dev)
+{
+	/* Turn carrier off since we don't have to transmit anything on this
+	 * interface.
+	 */
+	netif_carrier_off(dev);
+	return 0;
+}
+
+/* Fill MAC address that will be assigned by the FW */
+static void fill_vf_station_mac_addr(struct adapter *adap)
+{
+	unsigned int i;
+	u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN];
+	int err;
+	u8 *na;
+	u16 a, b;
+
+	err = t4_get_raw_vpd_params(adap, &adap->params.vpd);
+	if (!err) {
+		na = adap->params.vpd.na;
+		for (i = 0; i < ETH_ALEN; i++)
+			hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 +
+				      hex2val(na[2 * i + 1]));
+		a = (hw_addr[0] << 8) | hw_addr[1];
+		b = (hw_addr[1] << 8) | hw_addr[2];
+		a ^= b;
+		a |= 0x0200;    /* locally assigned Ethernet MAC address */
+		a &= ~0x0100;   /* not a multicast Ethernet MAC address */
+		macaddr[0] = a >> 8;
+		macaddr[1] = a & 0xff;
+
+		for (i = 2; i < 5; i++)
+			macaddr[i] = hw_addr[i + 1];
+
+		for (i = 0; i < adap->num_vfs; i++) {
+			macaddr[5] = adap->pf * 16 + i;
+			ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr);
+		}
+	}
+}
+
+static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	int ret;
+
+	/* verify MAC addr is valid */
+	if (!is_valid_ether_addr(mac)) {
+		dev_err(pi->adapter->pdev_dev,
+			"Invalid Ethernet address %pM for VF %d\n",
+			mac, vf);
+		return -EINVAL;
+	}
+
+	dev_info(pi->adapter->pdev_dev,
+		 "Setting MAC %pM on VF %d\n", mac, vf);
+	ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+	if (!ret)
+		ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac);
+	return ret;
+}
+
+static int cxgb_get_vf_config(struct net_device *dev,
+			      int vf, struct ifla_vf_info *ivi)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+
+	if (vf >= adap->num_vfs)
+		return -EINVAL;
+	ivi->vf = vf;
+	ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
+	return 0;
+}
+#endif
+
 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
 {
 	int ret;
@@ -3114,6 +2626,116 @@
 }
 #endif
 
+static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
+{
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
+	struct sched_class *e;
+	struct ch_sched_params p;
+	struct ch_sched_queue qe;
+	u32 req_rate;
+	int err = 0;
+
+	if (!can_sched(dev))
+		return -ENOTSUPP;
+
+	if (index < 0 || index > pi->nqsets - 1)
+		return -EINVAL;
+
+	if (!(adap->flags & FULL_INIT_DONE)) {
+		dev_err(adap->pdev_dev,
+			"Failed to rate limit on queue %d. Link Down?\n",
+			index);
+		return -EINVAL;
+	}
+
+	/* Convert from Mbps to Kbps */
+	req_rate = rate << 10;
+
+	/* Max rate is 10 Gbps */
+	if (req_rate >= SCHED_MAX_RATE_KBPS) {
+		dev_err(adap->pdev_dev,
+			"Invalid rate %u Mbps, Max rate is %u Gbps\n",
+			rate, SCHED_MAX_RATE_KBPS);
+		return -ERANGE;
+	}
+
+	/* First unbind the queue from any existing class */
+	memset(&qe, 0, sizeof(qe));
+	qe.queue = index;
+	qe.class = SCHED_CLS_NONE;
+
+	err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE);
+	if (err) {
+		dev_err(adap->pdev_dev,
+			"Unbinding Queue %d on port %d fail. Err: %d\n",
+			index, pi->port_id, err);
+		return err;
+	}
+
+	/* Queue already unbound */
+	if (!req_rate)
+		return 0;
+
+	/* Fetch any available unused or matching scheduling class */
+	memset(&p, 0, sizeof(p));
+	p.type = SCHED_CLASS_TYPE_PACKET;
+	p.u.params.level    = SCHED_CLASS_LEVEL_CL_RL;
+	p.u.params.mode     = SCHED_CLASS_MODE_CLASS;
+	p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS;
+	p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS;
+	p.u.params.channel  = pi->tx_chan;
+	p.u.params.class    = SCHED_CLS_NONE;
+	p.u.params.minrate  = 0;
+	p.u.params.maxrate  = req_rate;
+	p.u.params.weight   = 0;
+	p.u.params.pktsize  = dev->mtu;
+
+	e = cxgb4_sched_class_alloc(dev, &p);
+	if (!e)
+		return -ENOMEM;
+
+	/* Bind the queue to a scheduling class */
+	memset(&qe, 0, sizeof(qe));
+	qe.queue = index;
+	qe.class = e->idx;
+
+	err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE);
+	if (err)
+		dev_err(adap->pdev_dev,
+			"Queue rate limiting failed. Err: %d\n", err);
+	return err;
+}
+
+static int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+			 struct tc_to_netdev *tc)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct adapter *adap = netdev2adap(dev);
+
+	if (!(adap->flags & FULL_INIT_DONE)) {
+		dev_err(adap->pdev_dev,
+			"Failed to setup tc on port %d. Link Down?\n",
+			pi->port_id);
+		return -EINVAL;
+	}
+
+	if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
+	    tc->type == TC_SETUP_CLSU32) {
+		switch (tc->cls_u32->command) {
+		case TC_CLSU32_NEW_KNODE:
+		case TC_CLSU32_REPLACE_KNODE:
+			return cxgb4_config_knode(dev, proto, tc->cls_u32);
+		case TC_CLSU32_DELETE_KNODE:
+			return cxgb4_delete_knode(dev, proto, tc->cls_u32);
+		default:
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static const struct net_device_ops cxgb4_netdev_ops = {
 	.ndo_open             = cxgb_open,
 	.ndo_stop             = cxgb_close,
@@ -3136,7 +2758,31 @@
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll        = cxgb_busy_poll,
 #endif
+	.ndo_set_tx_maxrate   = cxgb_set_tx_maxrate,
+	.ndo_setup_tc         = cxgb_setup_tc,
+};
 
+#ifdef CONFIG_PCI_IOV
+static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
+	.ndo_open             = dummy_open,
+	.ndo_set_vf_mac       = cxgb_set_vf_mac,
+	.ndo_get_vf_config    = cxgb_get_vf_config,
+};
+#endif
+
+static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+{
+	struct adapter *adapter = netdev2adap(dev);
+
+	strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
+	strlcpy(info->version, cxgb4_driver_version,
+		sizeof(info->version));
+	strlcpy(info->bus_info, pci_name(adapter->pdev),
+		sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
+	.get_drvinfo       = get_drvinfo,
 };
 
 void t4_fatal_err(struct adapter *adap)
@@ -3979,6 +3625,12 @@
 	adap->clipt_start = val[0];
 	adap->clipt_end = val[1];
 
+	/* We don't yet have a PARAMs calls to retrieve the number of Traffic
+	 * Classes supported by the hardware/firmware so we hard code it here
+	 * for now.
+	 */
+	adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
+
 	/* query params related to active filter region */
 	params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
 	params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
@@ -4067,6 +3719,7 @@
 		adap->params.ofldq_wr_cred = val[5];
 
 		adap->params.offload = 1;
+		adap->num_ofld_uld += 1;
 	}
 	if (caps_cmd.rdmacaps) {
 		params[0] = FW_PARAM_PFVF(STAG_START);
@@ -4119,6 +3772,7 @@
 			 "max_ordird_qp %d max_ird_adapter %d\n",
 			 adap->params.max_ordird_qp,
 			 adap->params.max_ird_adapter);
+		adap->num_ofld_uld += 2;
 	}
 	if (caps_cmd.iscsicaps) {
 		params[0] = FW_PARAM_PFVF(ISCSI_START);
@@ -4129,6 +3783,13 @@
 			goto bye;
 		adap->vres.iscsi.start = val[0];
 		adap->vres.iscsi.size = val[1] - val[0] + 1;
+		/* LIO target and cxgb4i initiaitor */
+		adap->num_ofld_uld += 2;
+	}
+	if (caps_cmd.cryptocaps) {
+		/* Should query params here...TODO */
+		adap->params.crypto |= ULP_CRYPTO_LOOKASIDE;
+		adap->num_uld += 1;
 	}
 #undef FW_PARAM_PFVF
 #undef FW_PARAM_DEV
@@ -4305,20 +3966,17 @@
 	.resume         = eeh_resume,
 };
 
+/* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
 static inline bool is_x_10g_port(const struct link_config *lc)
 {
-	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
-	       (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
-}
+	unsigned int speeds, high_speeds;
 
-static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
-			     unsigned int us, unsigned int cnt,
-			     unsigned int size, unsigned int iqe_size)
-{
-	q->adap = adap;
-	cxgb4_set_rspq_intr_params(q, us, cnt);
-	q->iqe_len = iqe_size;
-	q->size = size;
+	speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
+	high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+
+	return high_speeds != 0;
 }
 
 /*
@@ -4333,7 +3991,16 @@
 #ifndef CONFIG_CHELSIO_T4_DCB
 	int q10g = 0;
 #endif
-	int ciq_size;
+
+	/* Reduce memory usage in kdump environment, disable all offload.
+	 */
+	if (is_kdump_kernel()) {
+		adap->params.offload = 0;
+		adap->params.crypto = 0;
+	} else if (is_uld(adap) && t4_uld_mem_alloc(adap)) {
+		adap->params.offload = 0;
+		adap->params.crypto = 0;
+	}
 
 	for_each_port(adap, i)
 		n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
@@ -4365,11 +4032,6 @@
 	if (q10g > netif_get_num_default_rss_queues())
 		q10g = netif_get_num_default_rss_queues();
 
-	/* Reduce memory usage in kdump environment, disable all offload.
-	 */
-	if (is_kdump_kernel())
-		adap->params.offload = 0;
-
 	for_each_port(adap, i) {
 		struct port_info *pi = adap2pinfo(adap, i);
 
@@ -4382,33 +4044,18 @@
 	s->ethqsets = qidx;
 	s->max_ethqsets = qidx;   /* MSI-X may lower it later */
 
-	if (is_offload(adap)) {
+	if (is_uld(adap)) {
 		/*
 		 * For offload we use 1 queue/channel if all ports are up to 1G,
 		 * otherwise we divide all available queues amongst the channels
 		 * capped by the number of available cores.
 		 */
 		if (n10g) {
-			i = min_t(int, ARRAY_SIZE(s->iscsirxq),
-				  num_online_cpus());
-			s->iscsiqsets = roundup(i, adap->params.nports);
-		} else
-			s->iscsiqsets = adap->params.nports;
-		/* For RDMA one Rx queue per channel suffices */
-		s->rdmaqs = adap->params.nports;
-		/* Try and allow at least 1 CIQ per cpu rounding down
-		 * to the number of ports, with a minimum of 1 per port.
-		 * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port.
-		 * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port.
-		 * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port.
-		 */
-		s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus());
-		s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
-				adap->params.nports;
-		s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
-
-		if (!is_t4(adap->params.chip))
-			s->niscsitq = s->iscsiqsets;
+			i = num_online_cpus();
+			s->ofldqsets = roundup(i, adap->params.nports);
+		} else {
+			s->ofldqsets = adap->params.nports;
+		}
 	}
 
 	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
@@ -4427,47 +4074,8 @@
 	for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
 		s->ofldtxq[i].q.size = 1024;
 
-	for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) {
-		struct sge_ofld_rxq *r = &s->iscsirxq[i];
-
-		init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
-		r->rspq.uld = CXGB4_ULD_ISCSI;
-		r->fl.size = 72;
-	}
-
-	if (!is_t4(adap->params.chip)) {
-		for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) {
-			struct sge_ofld_rxq *r = &s->iscsitrxq[i];
-
-			init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
-			r->rspq.uld = CXGB4_ULD_ISCSIT;
-			r->fl.size = 72;
-		}
-	}
-
-	for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
-		struct sge_ofld_rxq *r = &s->rdmarxq[i];
-
-		init_rspq(adap, &r->rspq, 5, 1, 511, 64);
-		r->rspq.uld = CXGB4_ULD_RDMA;
-		r->fl.size = 72;
-	}
-
-	ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
-	if (ciq_size > SGE_MAX_IQ_SIZE) {
-		CH_WARN(adap, "CIQ size too small for available IQs\n");
-		ciq_size = SGE_MAX_IQ_SIZE;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) {
-		struct sge_ofld_rxq *r = &s->rdmaciq[i];
-
-		init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
-		r->rspq.uld = CXGB4_ULD_RDMA;
-	}
-
 	init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
-	init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64);
+	init_rspq(adap, &s->intrq, 0, 1, 512, 64);
 }
 
 /*
@@ -4498,42 +4106,90 @@
 	}
 }
 
+static int get_msix_info(struct adapter *adap)
+{
+	struct uld_msix_info *msix_info;
+	unsigned int max_ingq = 0;
+
+	if (is_offload(adap))
+		max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld;
+	if (is_pci_uld(adap))
+		max_ingq += MAX_OFLD_QSETS * adap->num_uld;
+
+	if (!max_ingq)
+		goto out;
+
+	msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL);
+	if (!msix_info)
+		return -ENOMEM;
+
+	adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq),
+						 sizeof(long), GFP_KERNEL);
+	if (!adap->msix_bmap_ulds.msix_bmap) {
+		kfree(msix_info);
+		return -ENOMEM;
+	}
+	spin_lock_init(&adap->msix_bmap_ulds.lock);
+	adap->msix_info_ulds = msix_info;
+out:
+	return 0;
+}
+
+static void free_msix_info(struct adapter *adap)
+{
+	if (!(adap->num_uld && adap->num_ofld_uld))
+		return;
+
+	kfree(adap->msix_info_ulds);
+	kfree(adap->msix_bmap_ulds.msix_bmap);
+}
+
 /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
 #define EXTRA_VECS 2
 
 static int enable_msix(struct adapter *adap)
 {
-	int ofld_need = 0;
-	int i, want, need, allocated;
+	int ofld_need = 0, uld_need = 0;
+	int i, j, want, need, allocated;
 	struct sge *s = &adap->sge;
 	unsigned int nchan = adap->params.nports;
 	struct msix_entry *entries;
+	int max_ingq = MAX_INGQ;
 
-	entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1),
+	if (is_pci_uld(adap))
+		max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
+	if (is_offload(adap))
+		max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld);
+	entries = kmalloc(sizeof(*entries) * (max_ingq + 1),
 			  GFP_KERNEL);
 	if (!entries)
 		return -ENOMEM;
 
-	for (i = 0; i < MAX_INGQ + 1; ++i)
+	/* map for msix */
+	if (get_msix_info(adap)) {
+		adap->params.offload = 0;
+		adap->params.crypto = 0;
+	}
+
+	for (i = 0; i < max_ingq + 1; ++i)
 		entries[i].entry = i;
 
 	want = s->max_ethqsets + EXTRA_VECS;
 	if (is_offload(adap)) {
-		want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets	+
-			s->niscsitq;
-		/* need nchan for each possible ULD */
-		if (is_t4(adap->params.chip))
-			ofld_need = 3 * nchan;
-		else
-			ofld_need = 4 * nchan;
+		want += adap->num_ofld_uld * s->ofldqsets;
+		ofld_need = adap->num_ofld_uld * nchan;
+	}
+	if (is_pci_uld(adap)) {
+		want += adap->num_uld * s->ofldqsets;
+		uld_need = adap->num_uld * nchan;
 	}
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
 	 * each port.
 	 */
-	need = 8 * adap->params.nports + EXTRA_VECS + ofld_need;
+	need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
 #else
-	need = adap->params.nports + EXTRA_VECS + ofld_need;
+	need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
 #endif
 	allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
 	if (allocated < 0) {
@@ -4547,33 +4203,31 @@
 	 * Every group gets its minimum requirement and NIC gets top
 	 * priority for leftovers.
 	 */
-	i = allocated - EXTRA_VECS - ofld_need;
+	i = allocated - EXTRA_VECS - ofld_need - uld_need;
 	if (i < s->max_ethqsets) {
 		s->max_ethqsets = i;
 		if (i < s->ethqsets)
 			reduce_ethqs(adap, i);
 	}
-	if (is_offload(adap)) {
-		if (allocated < want) {
-			s->rdmaqs = nchan;
-			s->rdmaciqs = nchan;
-
-			if (!is_t4(adap->params.chip))
-				s->niscsitq = nchan;
-		}
-
-		/* leftovers go to OFLD */
-		i = allocated - EXTRA_VECS - s->max_ethqsets -
-		    s->rdmaqs - s->rdmaciqs - s->niscsitq;
-		s->iscsiqsets = (i / nchan) * nchan;  /* round down */
-
+	if (is_uld(adap)) {
+		if (allocated < want)
+			s->nqs_per_uld = nchan;
+		else
+			s->nqs_per_uld = s->ofldqsets;
 	}
-	for (i = 0; i < allocated; ++i)
+
+	for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i)
 		adap->msix_info[i].vec = entries[i].vector;
+	if (is_uld(adap)) {
+		for (j = 0 ; i < allocated; ++i, j++) {
+			adap->msix_info_ulds[j].vec = entries[i].vector;
+			adap->msix_info_ulds[j].idx = i;
+		}
+		adap->msix_bmap_ulds.mapsize = j;
+	}
 	dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, "
-		 "nic %d iscsi %d rdma cpl %d rdma ciq %d\n",
-		 allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs,
-		 s->rdmaciqs);
+		 "nic %d per uld %d\n",
+		 allocated, s->max_ethqsets, s->nqs_per_uld);
 
 	kfree(entries);
 	return 0;
@@ -4756,8 +4410,12 @@
 		bufp += sprintf(bufp, "1000/");
 	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
 		bufp += sprintf(bufp, "10G/");
+	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G)
+		bufp += sprintf(bufp, "25G/");
 	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
 		bufp += sprintf(bufp, "40G/");
+	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G)
+		bufp += sprintf(bufp, "100G/");
 	if (bufp != buf)
 		--bufp;
 	sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
@@ -4783,7 +4441,9 @@
 	unsigned int i;
 
 	t4_free_mem(adapter->l2t);
+	t4_cleanup_sched(adapter);
 	t4_free_mem(adapter->tids.tid_tab);
+	cxgb4_cleanup_tc_u32(adapter);
 	kfree(adapter->sge.egr_map);
 	kfree(adapter->sge.ingr_map);
 	kfree(adapter->sge.starving_fl);
@@ -4834,21 +4494,59 @@
 }
 
 #ifdef CONFIG_PCI_IOV
+static void dummy_setup(struct net_device *dev)
+{
+	dev->type = ARPHRD_NONE;
+	dev->mtu = 0;
+	dev->hard_header_len = 0;
+	dev->addr_len = 0;
+	dev->tx_queue_len = 0;
+	dev->flags |= IFF_NOARP;
+	dev->priv_flags |= IFF_NO_QUEUE;
+
+	/* Initialize the device structure. */
+	dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
+	dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
+	dev->destructor = free_netdev;
+}
+
+static int config_mgmt_dev(struct pci_dev *pdev)
+{
+	struct adapter *adap = pci_get_drvdata(pdev);
+	struct net_device *netdev;
+	struct port_info *pi;
+	char name[IFNAMSIZ];
+	int err;
+
+	snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf);
+	netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup);
+	if (!netdev)
+		return -ENOMEM;
+
+	pi = netdev_priv(netdev);
+	pi->adapter = adap;
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	adap->port[0] = netdev;
+
+	err = register_netdev(adap->port[0]);
+	if (err) {
+		pr_info("Unable to register VF mgmt netdev %s\n", name);
+		free_netdev(adap->port[0]);
+		adap->port[0] = NULL;
+		return err;
+	}
+	return 0;
+}
+
 static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 {
+	struct adapter *adap = pci_get_drvdata(pdev);
 	int err = 0;
 	int current_vfs = pci_num_vf(pdev);
 	u32 pcie_fw;
-	void __iomem *regs;
 
-	regs = pci_ioremap_bar(pdev, 0);
-	if (!regs) {
-		dev_err(&pdev->dev, "cannot map device registers\n");
-		return -ENOMEM;
-	}
-
-	pcie_fw = readl(regs + PCIE_FW_A);
-	iounmap(regs);
+	pcie_fw = readl(adap->regs + PCIE_FW_A);
 	/* Check if cxgb4 is the MASTER and fw is initialized */
 	if (!(pcie_fw & PCIE_FW_INIT_F) ||
 	    !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
@@ -4875,6 +4573,14 @@
 	 */
 	if (!num_vfs) {
 		pci_disable_sriov(pdev);
+		if (adap->port[0]) {
+			unregister_netdev(adap->port[0]);
+			adap->port[0] = NULL;
+		}
+		/* free VF resources */
+		kfree(adap->vfinfo);
+		adap->vfinfo = NULL;
+		adap->num_vfs = 0;
 		return num_vfs;
 	}
 
@@ -4882,7 +4588,17 @@
 		err = pci_enable_sriov(pdev, num_vfs);
 		if (err)
 			return err;
+
+		adap->num_vfs = num_vfs;
+		err = config_mgmt_dev(pdev);
+		if (err)
+			return err;
 	}
+
+	adap->vfinfo = kcalloc(adap->num_vfs,
+			       sizeof(struct vf_info), GFP_KERNEL);
+	if (adap->vfinfo)
+		fill_vf_station_mac_addr(adap);
 	return num_vfs;
 }
 #endif
@@ -4893,9 +4609,11 @@
 	struct port_info *pi;
 	bool highdma = false;
 	struct adapter *adapter = NULL;
+	struct net_device *netdev;
 	void __iomem *regs;
 	u32 whoami, pl_rev;
 	enum chip_type chip;
+	static int adap_idx = 1;
 
 	printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
 
@@ -4930,7 +4648,9 @@
 	func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ?
 		SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
 	if (func != ent->driver_data) {
+#ifndef CONFIG_PCI_IOV
 		iounmap(regs);
+#endif
 		pci_disable_device(pdev);
 		pci_save_state(pdev);        /* to restore SR-IOV later */
 		goto sriov;
@@ -4962,6 +4682,7 @@
 		err = -ENOMEM;
 		goto out_unmap_bar0;
 	}
+	adap_idx++;
 
 	adapter->workq = create_singlethread_workqueue("cxgb4");
 	if (!adapter->workq) {
@@ -5048,8 +4769,6 @@
 			      T6_STATMODE_V(0)));
 
 	for_each_port(adapter, i) {
-		struct net_device *netdev;
-
 		netdev = alloc_etherdev_mq(sizeof(struct port_info),
 					   MAX_ETH_QSETS);
 		if (!netdev) {
@@ -5069,7 +4788,8 @@
 		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			NETIF_F_RXCSUM | NETIF_F_RXHASH |
-			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+			NETIF_F_HW_TC;
 		if (highdma)
 			netdev->hw_features |= NETIF_F_HIGHDMA;
 		netdev->features |= netdev->hw_features;
@@ -5143,10 +4863,26 @@
 		}
 	}
 #endif
-	if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
+
+	for_each_port(adapter, i) {
+		pi = adap2pinfo(adapter, i);
+		pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls);
+		if (!pi->sched_tbl)
+			dev_warn(&pdev->dev,
+				 "could not activate scheduling on port %d\n",
+				 i);
+	}
+
+	if (tid_init(&adapter->tids) < 0) {
 		dev_warn(&pdev->dev, "could not allocate TID table, "
 			 "continuing\n");
 		adapter->params.offload = 0;
+	} else {
+		adapter->tc_u32 = cxgb4_init_tc_u32(adapter,
+						    CXGB4_MAX_LINK_HANDLE);
+		if (!adapter->tc_u32)
+			dev_warn(&pdev->dev,
+				 "could not offload tc u32, continuing\n");
 	}
 
 	if (is_offload(adapter)) {
@@ -5168,8 +4904,11 @@
 	/* See what interrupts we'll be using */
 	if (msi > 1 && enable_msix(adapter) == 0)
 		adapter->flags |= USING_MSIX;
-	else if (msi > 0 && pci_enable_msi(pdev) == 0)
+	else if (msi > 0 && pci_enable_msi(pdev) == 0) {
 		adapter->flags |= USING_MSI;
+		if (msi > 1)
+			free_msix_info(adapter);
+	}
 
 	/* check for PCI Express bandwidth capabiltites */
 	cxgb4_check_pcie_caps(adapter);
@@ -5213,10 +4952,15 @@
 	/* PCIe EEH recovery on powerpc platforms needs fundamental reset */
 	pdev->needs_freset = 1;
 
-	if (is_offload(adapter))
-		attach_ulds(adapter);
+	if (is_uld(adapter)) {
+		mutex_lock(&uld_mutex);
+		list_add_tail(&adapter->list_node, &adapter_list);
+		mutex_unlock(&uld_mutex);
+	}
 
 	print_adapter_info(adapter);
+	setup_fw_sge_queues(adapter);
+	return 0;
 
 sriov:
 #ifdef CONFIG_PCI_IOV
@@ -5230,11 +4974,48 @@
 				 "instantiated %u virtual functions\n",
 				 num_vf[func]);
 	}
-#endif
+
+	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+	if (!adapter) {
+		err = -ENOMEM;
+		goto free_pci_region;
+	}
+
+	adapter->pdev = pdev;
+	adapter->pdev_dev = &pdev->dev;
+	adapter->name = pci_name(pdev);
+	adapter->mbox = func;
+	adapter->pf = func;
+	adapter->regs = regs;
+	adapter->adap_idx = adap_idx;
+	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
+				    (sizeof(struct mbox_cmd) *
+				     T4_OS_LOG_MBOX_CMDS),
+				    GFP_KERNEL);
+	if (!adapter->mbox_log) {
+		err = -ENOMEM;
+		goto free_adapter;
+	}
+	pci_set_drvdata(pdev, adapter);
 	return 0;
 
+ free_adapter:
+	kfree(adapter);
+ free_pci_region:
+	iounmap(regs);
+	pci_disable_sriov(pdev);
+	pci_release_regions(pdev);
+	return err;
+#else
+	return 0;
+#endif
+
  out_free_dev:
 	free_some_resources(adapter);
+	if (adapter->flags & USING_MSIX)
+		free_msix_info(adapter);
+	if (adapter->num_uld || adapter->num_ofld_uld)
+		t4_uld_mem_free(adapter);
  out_unmap_bar:
 	if (!is_t4(adapter->params.chip))
 		iounmap(adapter->bar2);
@@ -5258,12 +5039,12 @@
 {
 	struct adapter *adapter = pci_get_drvdata(pdev);
 
-#ifdef CONFIG_PCI_IOV
-	pci_disable_sriov(pdev);
+	if (!adapter) {
+		pci_release_regions(pdev);
+		return;
+	}
 
-#endif
-
-	if (adapter) {
+	if (adapter->pf == 4) {
 		int i;
 
 		/* Tear down per-adapter Work Queue first since it can contain
@@ -5271,7 +5052,7 @@
 		 */
 		destroy_workqueue(adapter->workq);
 
-		if (is_offload(adapter))
+		if (is_uld(adapter))
 			detach_ulds(adapter);
 
 		disable_interrupts(adapter);
@@ -5285,17 +5066,15 @@
 		/* If we allocated filters, free up state associated with any
 		 * valid filters ...
 		 */
-		if (adapter->tids.ftid_tab) {
-			struct filter_entry *f = &adapter->tids.ftid_tab[0];
-			for (i = 0; i < (adapter->tids.nftids +
-					adapter->tids.nsftids); i++, f++)
-				if (f->valid)
-					clear_filter(adapter, f);
-		}
+		clear_all_filters(adapter);
 
 		if (adapter->flags & FULL_INIT_DONE)
 			cxgb_down(adapter);
 
+		if (adapter->flags & USING_MSIX)
+			free_msix_info(adapter);
+		if (adapter->num_uld || adapter->num_ofld_uld)
+			t4_uld_mem_free(adapter);
 		free_some_resources(adapter);
 #if IS_ENABLED(CONFIG_IPV6)
 		t4_cleanup_clip_tbl(adapter);
@@ -5312,8 +5091,64 @@
 		kfree(adapter->mbox_log);
 		synchronize_rcu();
 		kfree(adapter);
-	} else
+	}
+#ifdef CONFIG_PCI_IOV
+	else {
+		if (adapter->port[0])
+			unregister_netdev(adapter->port[0]);
+		iounmap(adapter->regs);
+		kfree(adapter->vfinfo);
+		kfree(adapter);
+		pci_disable_sriov(pdev);
 		pci_release_regions(pdev);
+	}
+#endif
+}
+
+/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
+ * delivery.  This is essentially a stripped down version of the PCI remove()
+ * function where we do the minimal amount of work necessary to shutdown any
+ * further activity.
+ */
+static void shutdown_one(struct pci_dev *pdev)
+{
+	struct adapter *adapter = pci_get_drvdata(pdev);
+
+	/* As with remove_one() above (see extended comment), we only want do
+	 * do cleanup on PCI Devices which went all the way through init_one()
+	 * ...
+	 */
+	if (!adapter) {
+		pci_release_regions(pdev);
+		return;
+	}
+
+	if (adapter->pf == 4) {
+		int i;
+
+		for_each_port(adapter, i)
+			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+				cxgb_close(adapter->port[i]);
+
+		t4_uld_clean_up(adapter);
+		disable_interrupts(adapter);
+		disable_msi(adapter);
+
+		t4_sge_stop(adapter);
+		if (adapter->flags & FW_OK)
+			t4_fw_bye(adapter, adapter->mbox);
+	}
+#ifdef CONFIG_PCI_IOV
+	else {
+		if (adapter->port[0])
+			unregister_netdev(adapter->port[0]);
+		iounmap(adapter->regs);
+		kfree(adapter->vfinfo);
+		kfree(adapter);
+		pci_disable_sriov(pdev);
+		pci_release_regions(pdev);
+	}
+#endif
 }
 
 static struct pci_driver cxgb4_driver = {
@@ -5321,7 +5156,7 @@
 	.id_table = cxgb4_pci_tbl,
 	.probe    = init_one,
 	.remove   = remove_one,
-	.shutdown = remove_one,
+	.shutdown = shutdown_one,
 #ifdef CONFIG_PCI_IOV
 	.sriov_configure = cxgb4_iov_configure,
 #endif
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
new file mode 100644
index 0000000..49d2deb
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -0,0 +1,483 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "cxgb4.h"
+#include "cxgb4_tc_u32_parse.h"
+#include "cxgb4_tc_u32.h"
+
+/* Fill ch_filter_specification with parsed match value/mask pair. */
+static int fill_match_fields(struct adapter *adap,
+			     struct ch_filter_specification *fs,
+			     struct tc_cls_u32_offload *cls,
+			     const struct cxgb4_match_field *entry,
+			     bool next_header)
+{
+	unsigned int i, j;
+	u32 val, mask;
+	int off, err;
+	bool found;
+
+	for (i = 0; i < cls->knode.sel->nkeys; i++) {
+		off = cls->knode.sel->keys[i].off;
+		val = cls->knode.sel->keys[i].val;
+		mask = cls->knode.sel->keys[i].mask;
+
+		if (next_header) {
+			/* For next headers, parse only keys with offmask */
+			if (!cls->knode.sel->keys[i].offmask)
+				continue;
+		} else {
+			/* For the remaining, parse only keys without offmask */
+			if (cls->knode.sel->keys[i].offmask)
+				continue;
+		}
+
+		found = false;
+
+		for (j = 0; entry[j].val; j++) {
+			if (off == entry[j].off) {
+				found = true;
+				err = entry[j].val(fs, val, mask);
+				if (err)
+					return err;
+				break;
+			}
+		}
+
+		if (!found)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/* Fill ch_filter_specification with parsed action. */
+static int fill_action_fields(struct adapter *adap,
+			      struct ch_filter_specification *fs,
+			      struct tc_cls_u32_offload *cls)
+{
+	unsigned int num_actions = 0;
+	const struct tc_action *a;
+	struct tcf_exts *exts;
+	LIST_HEAD(actions);
+
+	exts = cls->knode.exts;
+	if (tc_no_actions(exts))
+		return -EINVAL;
+
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		/* Don't allow more than one action per rule. */
+		if (num_actions)
+			return -EINVAL;
+
+		/* Drop in hardware. */
+		if (is_tcf_gact_shot(a)) {
+			fs->action = FILTER_DROP;
+			num_actions++;
+			continue;
+		}
+
+		/* Re-direct to specified port in hardware. */
+		if (is_tcf_mirred_redirect(a)) {
+			struct net_device *n_dev;
+			unsigned int i, index;
+			bool found = false;
+
+			index = tcf_mirred_ifindex(a);
+			for_each_port(adap, i) {
+				n_dev = adap->port[i];
+				if (index == n_dev->ifindex) {
+					fs->action = FILTER_SWITCH;
+					fs->eport = i;
+					found = true;
+					break;
+				}
+			}
+
+			/* Interface doesn't belong to any port of
+			 * the underlying hardware.
+			 */
+			if (!found)
+				return -EINVAL;
+
+			num_actions++;
+			continue;
+		}
+
+		/* Un-supported action. */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
+		       struct tc_cls_u32_offload *cls)
+{
+	const struct cxgb4_match_field *start, *link_start = NULL;
+	struct adapter *adapter = netdev2adap(dev);
+	struct ch_filter_specification fs;
+	struct cxgb4_tc_u32_table *t;
+	struct cxgb4_link *link;
+	unsigned int filter_id;
+	u32 uhtid, link_uhtid;
+	bool is_ipv6 = false;
+	int ret;
+
+	if (!can_tc_u32_offload(dev))
+		return -EOPNOTSUPP;
+
+	if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6))
+		return -EOPNOTSUPP;
+
+	/* Fetch the location to insert the filter. */
+	filter_id = cls->knode.handle & 0xFFFFF;
+
+	if (filter_id > adapter->tids.nftids) {
+		dev_err(adapter->pdev_dev,
+			"Location %d out of range for insertion. Max: %d\n",
+			filter_id, adapter->tids.nftids);
+		return -ERANGE;
+	}
+
+	t = adapter->tc_u32;
+	uhtid = TC_U32_USERHTID(cls->knode.handle);
+	link_uhtid = TC_U32_USERHTID(cls->knode.link_handle);
+
+	/* Ensure that uhtid is either root u32 (i.e. 0x800)
+	 * or a a valid linked bucket.
+	 */
+	if (uhtid != 0x800 && uhtid >= t->size)
+		return -EINVAL;
+
+	/* Ensure link handle uhtid is sane, if specified. */
+	if (link_uhtid >= t->size)
+		return -EINVAL;
+
+	memset(&fs, 0, sizeof(fs));
+
+	if (protocol == htons(ETH_P_IPV6)) {
+		start = cxgb4_ipv6_fields;
+		is_ipv6 = true;
+	} else {
+		start = cxgb4_ipv4_fields;
+		is_ipv6 = false;
+	}
+
+	if (uhtid != 0x800) {
+		/* Link must exist from root node before insertion. */
+		if (!t->table[uhtid - 1].link_handle)
+			return -EINVAL;
+
+		/* Link must have a valid supported next header. */
+		link_start = t->table[uhtid - 1].match_field;
+		if (!link_start)
+			return -EINVAL;
+	}
+
+	/* Parse links and record them for subsequent jumps to valid
+	 * next headers.
+	 */
+	if (link_uhtid) {
+		const struct cxgb4_next_header *next;
+		bool found = false;
+		unsigned int i, j;
+		u32 val, mask;
+		int off;
+
+		if (t->table[link_uhtid - 1].link_handle) {
+			dev_err(adapter->pdev_dev,
+				"Link handle exists for: 0x%x\n",
+				link_uhtid);
+			return -EINVAL;
+		}
+
+		next = is_ipv6 ? cxgb4_ipv6_jumps : cxgb4_ipv4_jumps;
+
+		/* Try to find matches that allow jumps to next header. */
+		for (i = 0; next[i].jump; i++) {
+			if (next[i].offoff != cls->knode.sel->offoff ||
+			    next[i].shift != cls->knode.sel->offshift ||
+			    next[i].mask != cls->knode.sel->offmask ||
+			    next[i].offset != cls->knode.sel->off)
+				continue;
+
+			/* Found a possible candidate.  Find a key that
+			 * matches the corresponding offset, value, and
+			 * mask to jump to next header.
+			 */
+			for (j = 0; j < cls->knode.sel->nkeys; j++) {
+				off = cls->knode.sel->keys[j].off;
+				val = cls->knode.sel->keys[j].val;
+				mask = cls->knode.sel->keys[j].mask;
+
+				if (next[i].match_off == off &&
+				    next[i].match_val == val &&
+				    next[i].match_mask == mask) {
+					found = true;
+					break;
+				}
+			}
+
+			if (!found)
+				continue; /* Try next candidate. */
+
+			/* Candidate to jump to next header found.
+			 * Translate all keys to internal specification
+			 * and store them in jump table. This spec is copied
+			 * later to set the actual filters.
+			 */
+			ret = fill_match_fields(adapter, &fs, cls,
+						start, false);
+			if (ret)
+				goto out;
+
+			link = &t->table[link_uhtid - 1];
+			link->match_field = next[i].jump;
+			link->link_handle = cls->knode.handle;
+			memcpy(&link->fs, &fs, sizeof(fs));
+			break;
+		}
+
+		/* No candidate found to jump to next header. */
+		if (!found)
+			return -EINVAL;
+
+		return 0;
+	}
+
+	/* Fill ch_filter_specification match fields to be shipped to hardware.
+	 * Copy the linked spec (if any) first.  And then update the spec as
+	 * needed.
+	 */
+	if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) {
+		/* Copy linked ch_filter_specification */
+		memcpy(&fs, &t->table[uhtid - 1].fs, sizeof(fs));
+		ret = fill_match_fields(adapter, &fs, cls,
+					link_start, true);
+		if (ret)
+			goto out;
+	}
+
+	ret = fill_match_fields(adapter, &fs, cls, start, false);
+	if (ret)
+		goto out;
+
+	/* Fill ch_filter_specification action fields to be shipped to
+	 * hardware.
+	 */
+	ret = fill_action_fields(adapter, &fs, cls);
+	if (ret)
+		goto out;
+
+	/* The filter spec has been completely built from the info
+	 * provided from u32.  We now set some default fields in the
+	 * spec for sanity.
+	 */
+
+	/* Match only packets coming from the ingress port where this
+	 * filter will be created.
+	 */
+	fs.val.iport = netdev2pinfo(dev)->port_id;
+	fs.mask.iport = ~0;
+
+	/* Enable filter hit counts. */
+	fs.hitcnts = 1;
+
+	/* Set type of filter - IPv6 or IPv4 */
+	fs.type = is_ipv6 ? 1 : 0;
+
+	/* Set the filter */
+	ret = cxgb4_set_filter(dev, filter_id, &fs);
+	if (ret)
+		goto out;
+
+	/* If this is a linked bucket, then set the corresponding
+	 * entry in the bitmap to mark it as belonging to this linked
+	 * bucket.
+	 */
+	if (uhtid != 0x800 && t->table[uhtid - 1].link_handle)
+		set_bit(filter_id, t->table[uhtid - 1].tid_map);
+
+out:
+	return ret;
+}
+
+int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
+		       struct tc_cls_u32_offload *cls)
+{
+	struct adapter *adapter = netdev2adap(dev);
+	unsigned int filter_id, max_tids, i, j;
+	struct cxgb4_link *link = NULL;
+	struct cxgb4_tc_u32_table *t;
+	u32 handle, uhtid;
+	int ret;
+
+	if (!can_tc_u32_offload(dev))
+		return -EOPNOTSUPP;
+
+	/* Fetch the location to delete the filter. */
+	filter_id = cls->knode.handle & 0xFFFFF;
+
+	if (filter_id > adapter->tids.nftids) {
+		dev_err(adapter->pdev_dev,
+			"Location %d out of range for deletion. Max: %d\n",
+			filter_id, adapter->tids.nftids);
+		return -ERANGE;
+	}
+
+	t = adapter->tc_u32;
+	handle = cls->knode.handle;
+	uhtid = TC_U32_USERHTID(cls->knode.handle);
+
+	/* Ensure that uhtid is either root u32 (i.e. 0x800)
+	 * or a a valid linked bucket.
+	 */
+	if (uhtid != 0x800 && uhtid >= t->size)
+		return -EINVAL;
+
+	/* Delete the specified filter */
+	if (uhtid != 0x800) {
+		link = &t->table[uhtid - 1];
+		if (!link->link_handle)
+			return -EINVAL;
+
+		if (!test_bit(filter_id, link->tid_map))
+			return -EINVAL;
+	}
+
+	ret = cxgb4_del_filter(dev, filter_id);
+	if (ret)
+		goto out;
+
+	if (link)
+		clear_bit(filter_id, link->tid_map);
+
+	/* If a link is being deleted, then delete all filters
+	 * associated with the link.
+	 */
+	max_tids = adapter->tids.nftids;
+	for (i = 0; i < t->size; i++) {
+		link = &t->table[i];
+
+		if (link->link_handle == handle) {
+			for (j = 0; j < max_tids; j++) {
+				if (!test_bit(j, link->tid_map))
+					continue;
+
+				ret = __cxgb4_del_filter(dev, j, NULL);
+				if (ret)
+					goto out;
+
+				clear_bit(j, link->tid_map);
+			}
+
+			/* Clear the link state */
+			link->match_field = NULL;
+			link->link_handle = 0;
+			memset(&link->fs, 0, sizeof(link->fs));
+			break;
+		}
+	}
+
+out:
+	return ret;
+}
+
+void cxgb4_cleanup_tc_u32(struct adapter *adap)
+{
+	struct cxgb4_tc_u32_table *t;
+	unsigned int i;
+
+	if (!adap->tc_u32)
+		return;
+
+	/* Free up all allocated memory. */
+	t = adap->tc_u32;
+	for (i = 0; i < t->size; i++) {
+		struct cxgb4_link *link = &t->table[i];
+
+		t4_free_mem(link->tid_map);
+	}
+	t4_free_mem(adap->tc_u32);
+}
+
+struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap,
+					     unsigned int size)
+{
+	struct cxgb4_tc_u32_table *t;
+	unsigned int i;
+
+	if (!size)
+		return NULL;
+
+	t = t4_alloc_mem(sizeof(*t) +
+			 (size * sizeof(struct cxgb4_link)));
+	if (!t)
+		return NULL;
+
+	t->size = size;
+
+	for (i = 0; i < t->size; i++) {
+		struct cxgb4_link *link = &t->table[i];
+		unsigned int bmap_size;
+		unsigned int max_tids;
+
+		max_tids = adap->tids.nftids;
+		bmap_size = BITS_TO_LONGS(max_tids);
+		link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size);
+		if (!link->tid_map)
+			goto out_no_mem;
+		bitmap_zero(link->tid_map, max_tids);
+	}
+
+	return t;
+
+out_no_mem:
+	for (i = 0; i < t->size; i++) {
+		struct cxgb4_link *link = &t->table[i];
+
+		if (link->tid_map)
+			t4_free_mem(link->tid_map);
+	}
+
+	if (t)
+		t4_free_mem(t);
+
+	return NULL;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
new file mode 100644
index 0000000..6bdc885
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h
@@ -0,0 +1,57 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_TC_U32_H
+#define __CXGB4_TC_U32_H
+
+#include <net/pkt_cls.h>
+
+#define CXGB4_MAX_LINK_HANDLE 32
+
+static inline bool can_tc_u32_offload(struct net_device *dev)
+{
+	struct adapter *adap = netdev2adap(dev);
+
+	return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false;
+}
+
+int cxgb4_config_knode(struct net_device *dev, __be16 protocol,
+		       struct tc_cls_u32_offload *cls);
+int cxgb4_delete_knode(struct net_device *dev, __be16 protocol,
+		       struct tc_cls_u32_offload *cls);
+
+void cxgb4_cleanup_tc_u32(struct adapter *adapter);
+struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap,
+					     unsigned int size);
+#endif /* __CXGB4_TC_U32_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
new file mode 100644
index 0000000..a4b99ed
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h
@@ -0,0 +1,294 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_TC_U32_PARSE_H
+#define __CXGB4_TC_U32_PARSE_H
+
+struct cxgb4_match_field {
+	int off; /* Offset from the beginning of the header to match */
+	/* Fill the value/mask pair in the spec if matched */
+	int (*val)(struct ch_filter_specification *f, u32 val, u32 mask);
+};
+
+/* IPv4 match fields */
+static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f,
+				      u32 val, u32 mask)
+{
+	f->val.tos  = (ntohl(val)  >> 16) & 0x000000FF;
+	f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF;
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f,
+				       u32 val, u32 mask)
+{
+	u32 mask_val;
+	u8 frag_val;
+
+	frag_val = (ntohl(val) >> 13) & 0x00000007;
+	mask_val = ntohl(mask) & 0x0000FFFF;
+
+	if (frag_val == 0x1 && mask_val != 0x3FFF) { /* MF set */
+		f->val.frag = 1;
+		f->mask.frag = 1;
+	} else if (frag_val == 0x2 && mask_val != 0x3FFF) { /* DF set */
+		f->val.frag = 0;
+		f->mask.frag = 1;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f,
+					u32 val, u32 mask)
+{
+	f->val.proto  = (ntohl(val)  >> 16) & 0x000000FF;
+	f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF;
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f,
+					 u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[0],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[0], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f,
+					 u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[0],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[0], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static const struct cxgb4_match_field cxgb4_ipv4_fields[] = {
+	{ .off = 0,  .val = cxgb4_fill_ipv4_tos },
+	{ .off = 4,  .val = cxgb4_fill_ipv4_frag },
+	{ .off = 8,  .val = cxgb4_fill_ipv4_proto },
+	{ .off = 12, .val = cxgb4_fill_ipv4_src_ip },
+	{ .off = 16, .val = cxgb4_fill_ipv4_dst_ip },
+	{ .val = NULL }
+};
+
+/* IPv6 match fields */
+static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f,
+				      u32 val, u32 mask)
+{
+	f->val.tos  = (ntohl(val)  >> 20) & 0x000000FF;
+	f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF;
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f,
+					u32 val, u32 mask)
+{
+	f->val.proto  = (ntohl(val)  >> 8) & 0x000000FF;
+	f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF;
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[0],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[0], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[4],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[4], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[8],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[8], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.fip[12],  &val,  sizeof(u32));
+	memcpy(&f->mask.fip[12], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[0],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[0], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[4],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[4], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[8],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[8], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f,
+					  u32 val, u32 mask)
+{
+	memcpy(&f->val.lip[12],  &val,  sizeof(u32));
+	memcpy(&f->mask.lip[12], &mask, sizeof(u32));
+
+	return 0;
+}
+
+static const struct cxgb4_match_field cxgb4_ipv6_fields[] = {
+	{ .off = 0,  .val = cxgb4_fill_ipv6_tos },
+	{ .off = 4,  .val = cxgb4_fill_ipv6_proto },
+	{ .off = 8,  .val = cxgb4_fill_ipv6_src_ip0 },
+	{ .off = 12, .val = cxgb4_fill_ipv6_src_ip1 },
+	{ .off = 16, .val = cxgb4_fill_ipv6_src_ip2 },
+	{ .off = 20, .val = cxgb4_fill_ipv6_src_ip3 },
+	{ .off = 24, .val = cxgb4_fill_ipv6_dst_ip0 },
+	{ .off = 28, .val = cxgb4_fill_ipv6_dst_ip1 },
+	{ .off = 32, .val = cxgb4_fill_ipv6_dst_ip2 },
+	{ .off = 36, .val = cxgb4_fill_ipv6_dst_ip3 },
+	{ .val = NULL }
+};
+
+/* TCP/UDP match */
+static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f,
+				      u32 val, u32 mask)
+{
+	f->val.fport  = ntohl(val)  >> 16;
+	f->mask.fport = ntohl(mask) >> 16;
+	f->val.lport  = ntohl(val)  & 0x0000FFFF;
+	f->mask.lport = ntohl(mask) & 0x0000FFFF;
+
+	return 0;
+};
+
+static const struct cxgb4_match_field cxgb4_tcp_fields[] = {
+	{ .off = 0, .val = cxgb4_fill_l4_ports },
+	{ .val = NULL }
+};
+
+static const struct cxgb4_match_field cxgb4_udp_fields[] = {
+	{ .off = 0, .val = cxgb4_fill_l4_ports },
+	{ .val = NULL }
+};
+
+struct cxgb4_next_header {
+	unsigned int offset; /* Offset to next header */
+	/* offset, shift, and mask added to offset above
+	 * to get to next header.  Useful when using a header
+	 * field's value to jump to next header such as IHL field
+	 * in IPv4 header.
+	 */
+	unsigned int offoff;
+	u32 shift;
+	u32 mask;
+	/* match criteria to make this jump */
+	unsigned int match_off;
+	u32 match_val;
+	u32 match_mask;
+	/* location of jump to make */
+	const struct cxgb4_match_field *jump;
+};
+
+/* Accept a rule with a jump to transport layer header based on IHL field in
+ * IPv4 header.
+ */
+static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = {
+	{ .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF,
+	  .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00,
+	  .jump = cxgb4_tcp_fields },
+	{ .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF,
+	  .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00,
+	  .jump = cxgb4_udp_fields },
+	{ .jump = NULL }
+};
+
+/* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header
+ * to get to transport layer header.
+ */
+static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = {
+	{ .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0,
+	  .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000,
+	  .jump = cxgb4_tcp_fields },
+	{ .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0,
+	  .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000,
+	  .jump = cxgb4_udp_fields },
+	{ .jump = NULL }
+};
+
+struct cxgb4_link {
+	const struct cxgb4_match_field *match_field;  /* Next header */
+	struct ch_filter_specification fs; /* Match spec associated with link */
+	u32 link_handle;         /* Knode handle associated with the link */
+	unsigned long *tid_map;  /* Bitmap for filter tids */
+};
+
+struct cxgb4_tc_u32_table {
+	unsigned int size;          /* number of entries in table */
+	struct cxgb4_link table[0]; /* Jump table */
+};
+#endif /* __CXGB4_TC_U32_PARSE_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
new file mode 100644
index 0000000..b4b2d20
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -0,0 +1,696 @@
+/*
+ * cxgb4_uld.c:Chelsio Upper Layer Driver Interface for T4/T5/T6 SGE management
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *  Written by: Atul Gupta (atul.gupta@chelsio.com)
+ *  Written by: Hariprasad Shenai (hariprasad@chelsio.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/pci.h>
+
+#include "cxgb4.h"
+#include "cxgb4_uld.h"
+#include "t4_regs.h"
+#include "t4fw_api.h"
+#include "t4_msg.h"
+
+#define for_each_uldrxq(m, i) for (i = 0; i < ((m)->nrxq + (m)->nciq); i++)
+
+static int get_msix_idx_from_bmap(struct adapter *adap)
+{
+	struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds;
+	unsigned long flags;
+	unsigned int msix_idx;
+
+	spin_lock_irqsave(&bmap->lock, flags);
+	msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize);
+	if (msix_idx < bmap->mapsize) {
+		__set_bit(msix_idx, bmap->msix_bmap);
+	} else {
+		spin_unlock_irqrestore(&bmap->lock, flags);
+		return -ENOSPC;
+	}
+
+	spin_unlock_irqrestore(&bmap->lock, flags);
+	return msix_idx;
+}
+
+static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx)
+{
+	struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bmap->lock, flags);
+	 __clear_bit(msix_idx, bmap->msix_bmap);
+	spin_unlock_irqrestore(&bmap->lock, flags);
+}
+
+/* Flush the aggregated lro sessions */
+static void uldrx_flush_handler(struct sge_rspq *q)
+{
+	struct adapter *adap = q->adap;
+
+	if (adap->uld[q->uld].lro_flush)
+		adap->uld[q->uld].lro_flush(&q->lro_mgr);
+}
+
+/**
+ *	uldrx_handler - response queue handler for ULD queues
+ *	@q: the response queue that received the packet
+ *	@rsp: the response queue descriptor holding the offload message
+ *	@gl: the gather list of packet fragments
+ *
+ *	Deliver an ingress offload packet to a ULD.  All processing is done by
+ *	the ULD, we just maintain statistics.
+ */
+static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
+			 const struct pkt_gl *gl)
+{
+	struct adapter *adap = q->adap;
+	struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
+	int ret;
+
+	/* FW can send CPLs encapsulated in a CPL_FW4_MSG */
+	if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
+	    ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
+		rsp += 2;
+
+	if (q->flush_handler)
+		ret = adap->uld[q->uld].lro_rx_handler(adap->uld[q->uld].handle,
+				rsp, gl, &q->lro_mgr,
+				&q->napi);
+	else
+		ret = adap->uld[q->uld].rx_handler(adap->uld[q->uld].handle,
+				rsp, gl);
+
+	if (ret) {
+		rxq->stats.nomem++;
+		return -1;
+	}
+
+	if (!gl)
+		rxq->stats.imm++;
+	else if (gl == CXGB4_MSG_AN)
+		rxq->stats.an++;
+	else
+		rxq->stats.pkts++;
+	return 0;
+}
+
+static int alloc_uld_rxqs(struct adapter *adap,
+			  struct sge_uld_rxq_info *rxq_info,
+			  unsigned int nq, unsigned int offset, bool lro)
+{
+	struct sge *s = &adap->sge;
+	struct sge_ofld_rxq *q = rxq_info->uldrxq + offset;
+	unsigned short *ids = rxq_info->rspq_id + offset;
+	unsigned int per_chan = nq / adap->params.nports;
+	unsigned int bmap_idx = 0;
+	int i, err, msi_idx;
+
+	if (adap->flags & USING_MSIX)
+		msi_idx = 1;
+	else
+		msi_idx = -((int)s->intrq.abs_id + 1);
+
+	for (i = 0; i < nq; i++, q++) {
+		if (msi_idx >= 0) {
+			bmap_idx = get_msix_idx_from_bmap(adap);
+			msi_idx = adap->msix_info_ulds[bmap_idx].idx;
+		}
+		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
+				       adap->port[i / per_chan],
+				       msi_idx,
+				       q->fl.size ? &q->fl : NULL,
+				       uldrx_handler,
+				       lro ? uldrx_flush_handler : NULL,
+				       0);
+		if (err)
+			goto freeout;
+		if (msi_idx >= 0)
+			rxq_info->msix_tbl[i + offset] = bmap_idx;
+		memset(&q->stats, 0, sizeof(q->stats));
+		if (ids)
+			ids[i] = q->rspq.abs_id;
+	}
+	return 0;
+freeout:
+	q = rxq_info->uldrxq + offset;
+	for ( ; i; i--, q++) {
+		if (q->rspq.desc)
+			free_rspq_fl(adap, &q->rspq,
+				     q->fl.size ? &q->fl : NULL);
+	}
+
+	/* We need to free rxq also in case of ciq allocation failure */
+	if (offset) {
+		q = rxq_info->uldrxq + offset;
+		for ( ; i; i--, q++) {
+			if (q->rspq.desc)
+				free_rspq_fl(adap, &q->rspq,
+					     q->fl.size ? &q->fl : NULL);
+		}
+	}
+	return err;
+}
+
+static int
+setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	int i, ret = 0;
+
+	if (adap->flags & USING_MSIX) {
+		rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq),
+					     sizeof(unsigned short),
+					     GFP_KERNEL);
+		if (!rxq_info->msix_tbl)
+			return -ENOMEM;
+	}
+
+	ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) &&
+		 !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq,
+				 rxq_info->nrxq, lro));
+
+	/* Tell uP to route control queue completions to rdma rspq */
+	if (adap->flags & FULL_INIT_DONE &&
+	    !ret && uld_type == CXGB4_ULD_RDMA) {
+		struct sge *s = &adap->sge;
+		unsigned int cmplqid;
+		u32 param, cmdop;
+
+		cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL;
+		for_each_port(adap, i) {
+			cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id;
+			param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+				 FW_PARAMS_PARAM_X_V(cmdop) |
+				 FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id));
+			ret = t4_set_params(adap, adap->mbox, adap->pf,
+					    0, 1, &param, &cmplqid);
+		}
+	}
+	return ret;
+}
+
+static void t4_free_uld_rxqs(struct adapter *adap, int n,
+			     struct sge_ofld_rxq *q)
+{
+	for ( ; n; n--, q++) {
+		if (q->rspq.desc)
+			free_rspq_fl(adap, &q->rspq,
+				     q->fl.size ? &q->fl : NULL);
+	}
+}
+
+static void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+
+	if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) {
+		struct sge *s = &adap->sge;
+		u32 param, cmdop, cmplqid = 0;
+		int i;
+
+		cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL;
+		for_each_port(adap, i) {
+			param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+				 FW_PARAMS_PARAM_X_V(cmdop) |
+				 FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id));
+			t4_set_params(adap, adap->mbox, adap->pf,
+				      0, 1, &param, &cmplqid);
+		}
+	}
+
+	if (rxq_info->nciq)
+		t4_free_uld_rxqs(adap, rxq_info->nciq,
+				 rxq_info->uldrxq + rxq_info->nrxq);
+	t4_free_uld_rxqs(adap, rxq_info->nrxq, rxq_info->uldrxq);
+	if (adap->flags & USING_MSIX)
+		kfree(rxq_info->msix_tbl);
+}
+
+static int cfg_queues_uld(struct adapter *adap, unsigned int uld_type,
+			  const struct cxgb4_uld_info *uld_info)
+{
+	struct sge *s = &adap->sge;
+	struct sge_uld_rxq_info *rxq_info;
+	int i, nrxq, ciq_size;
+
+	rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL);
+	if (!rxq_info)
+		return -ENOMEM;
+
+	if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) {
+		i = s->nqs_per_uld;
+		rxq_info->nrxq = roundup(i, adap->params.nports);
+	} else {
+		i = min_t(int, uld_info->nrxq,
+			  num_online_cpus());
+		rxq_info->nrxq = roundup(i, adap->params.nports);
+	}
+	if (!uld_info->ciq) {
+		rxq_info->nciq = 0;
+	} else  {
+		if (adap->flags & USING_MSIX)
+			rxq_info->nciq = min_t(int, s->nqs_per_uld,
+					       num_online_cpus());
+		else
+			rxq_info->nciq = min_t(int, MAX_OFLD_QSETS,
+					       num_online_cpus());
+		rxq_info->nciq = ((rxq_info->nciq / adap->params.nports) *
+				  adap->params.nports);
+		rxq_info->nciq = max_t(int, rxq_info->nciq,
+				       adap->params.nports);
+	}
+
+	nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */
+	rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq),
+				   GFP_KERNEL);
+	if (!rxq_info->uldrxq) {
+		kfree(rxq_info);
+		return -ENOMEM;
+	}
+
+	rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL);
+	if (!rxq_info->rspq_id) {
+		kfree(rxq_info->uldrxq);
+		kfree(rxq_info);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < rxq_info->nrxq; i++) {
+		struct sge_ofld_rxq *r = &rxq_info->uldrxq[i];
+
+		init_rspq(adap, &r->rspq, 5, 1, uld_info->rxq_size, 64);
+		r->rspq.uld = uld_type;
+		r->fl.size = 72;
+	}
+
+	ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
+	if (ciq_size > SGE_MAX_IQ_SIZE) {
+		dev_warn(adap->pdev_dev, "CIQ size too small for available IQs\n");
+		ciq_size = SGE_MAX_IQ_SIZE;
+	}
+
+	for (i = rxq_info->nrxq; i < nrxq; i++) {
+		struct sge_ofld_rxq *r = &rxq_info->uldrxq[i];
+
+		init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
+		r->rspq.uld = uld_type;
+	}
+
+	memcpy(rxq_info->name, uld_info->name, IFNAMSIZ);
+	adap->sge.uld_rxq_info[uld_type] = rxq_info;
+
+	return 0;
+}
+
+static void free_queues_uld(struct adapter *adap, unsigned int uld_type)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+
+	kfree(rxq_info->rspq_id);
+	kfree(rxq_info->uldrxq);
+	kfree(rxq_info);
+}
+
+static int
+request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	int err = 0;
+	unsigned int idx, bmap_idx;
+
+	for_each_uldrxq(rxq_info, idx) {
+		bmap_idx = rxq_info->msix_tbl[idx];
+		err = request_irq(adap->msix_info_ulds[bmap_idx].vec,
+				  t4_sge_intr_msix, 0,
+				  adap->msix_info_ulds[bmap_idx].desc,
+				  &rxq_info->uldrxq[idx].rspq);
+		if (err)
+			goto unwind;
+	}
+	return 0;
+unwind:
+	while (idx-- > 0) {
+		bmap_idx = rxq_info->msix_tbl[idx];
+		free_msix_idx_in_bmap(adap, bmap_idx);
+		free_irq(adap->msix_info_ulds[bmap_idx].vec,
+			 &rxq_info->uldrxq[idx].rspq);
+	}
+	return err;
+}
+
+static void
+free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	unsigned int idx, bmap_idx;
+
+	for_each_uldrxq(rxq_info, idx) {
+		bmap_idx = rxq_info->msix_tbl[idx];
+
+		free_msix_idx_in_bmap(adap, bmap_idx);
+		free_irq(adap->msix_info_ulds[bmap_idx].vec,
+			 &rxq_info->uldrxq[idx].rspq);
+	}
+}
+
+static void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	int n = sizeof(adap->msix_info_ulds[0].desc);
+	unsigned int idx, bmap_idx;
+
+	for_each_uldrxq(rxq_info, idx) {
+		bmap_idx = rxq_info->msix_tbl[idx];
+
+		snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d",
+			 adap->port[0]->name, rxq_info->name, idx);
+	}
+}
+
+static void enable_rx(struct adapter *adap, struct sge_rspq *q)
+{
+	if (!q)
+		return;
+
+	if (q->handler) {
+		cxgb_busy_poll_init_lock(q);
+		napi_enable(&q->napi);
+	}
+	/* 0-increment GTS to start the timer and enable interrupts */
+	t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
+		     SEINTARM_V(q->intr_params) |
+		     INGRESSQID_V(q->cntxt_id));
+}
+
+static void quiesce_rx(struct adapter *adap, struct sge_rspq *q)
+{
+	if (q && q->handler) {
+		napi_disable(&q->napi);
+		local_bh_disable();
+		while (!cxgb_poll_lock_napi(q))
+			mdelay(1);
+		local_bh_enable();
+	}
+}
+
+static void enable_rx_uld(struct adapter *adap, unsigned int uld_type)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	int idx;
+
+	for_each_uldrxq(rxq_info, idx)
+		enable_rx(adap, &rxq_info->uldrxq[idx].rspq);
+}
+
+static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+	int idx;
+
+	for_each_uldrxq(rxq_info, idx)
+		quiesce_rx(adap, &rxq_info->uldrxq[idx].rspq);
+}
+
+static void uld_queue_init(struct adapter *adap, unsigned int uld_type,
+			   struct cxgb4_lld_info *lli)
+{
+	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
+
+	lli->rxq_ids = rxq_info->rspq_id;
+	lli->nrxq = rxq_info->nrxq;
+	lli->ciq_ids = rxq_info->rspq_id + rxq_info->nrxq;
+	lli->nciq = rxq_info->nciq;
+}
+
+int t4_uld_mem_alloc(struct adapter *adap)
+{
+	struct sge *s = &adap->sge;
+
+	adap->uld = kcalloc(CXGB4_ULD_MAX, sizeof(*adap->uld), GFP_KERNEL);
+	if (!adap->uld)
+		return -ENOMEM;
+
+	s->uld_rxq_info = kzalloc(CXGB4_ULD_MAX *
+				  sizeof(struct sge_uld_rxq_info *),
+				  GFP_KERNEL);
+	if (!s->uld_rxq_info)
+		goto err_uld;
+
+	return 0;
+err_uld:
+	kfree(adap->uld);
+	return -ENOMEM;
+}
+
+void t4_uld_mem_free(struct adapter *adap)
+{
+	struct sge *s = &adap->sge;
+
+	kfree(s->uld_rxq_info);
+	kfree(adap->uld);
+}
+
+void t4_uld_clean_up(struct adapter *adap)
+{
+	struct sge_uld_rxq_info *rxq_info;
+	unsigned int i;
+
+	if (!adap->uld)
+		return;
+	for (i = 0; i < CXGB4_ULD_MAX; i++) {
+		if (!adap->uld[i].handle)
+			continue;
+		rxq_info = adap->sge.uld_rxq_info[i];
+		if (adap->flags & FULL_INIT_DONE)
+			quiesce_rx_uld(adap, i);
+		if (adap->flags & USING_MSIX)
+			free_msix_queue_irqs_uld(adap, i);
+		free_sge_queues_uld(adap, i);
+		free_queues_uld(adap, i);
+	}
+}
+
+static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
+{
+	int i;
+
+	lld->pdev = adap->pdev;
+	lld->pf = adap->pf;
+	lld->l2t = adap->l2t;
+	lld->tids = &adap->tids;
+	lld->ports = adap->port;
+	lld->vr = &adap->vres;
+	lld->mtus = adap->params.mtus;
+	lld->ntxq = adap->sge.ofldqsets;
+	lld->nchan = adap->params.nports;
+	lld->nports = adap->params.nports;
+	lld->wr_cred = adap->params.ofldq_wr_cred;
+	lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
+	lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
+	lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
+	lld->iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
+	lld->iscsi_ppm = &adap->iscsi_ppm;
+	lld->adapter_type = adap->params.chip;
+	lld->cclk_ps = 1000000000 / adap->params.vpd.cclk;
+	lld->udb_density = 1 << adap->params.sge.eq_qpp;
+	lld->ucq_density = 1 << adap->params.sge.iq_qpp;
+	lld->filt_mode = adap->params.tp.vlan_pri_map;
+	/* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
+	for (i = 0; i < NCHAN; i++)
+		lld->tx_modq[i] = i;
+	lld->gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A);
+	lld->db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A);
+	lld->fw_vers = adap->params.fw_vers;
+	lld->dbfifo_int_thresh = dbfifo_int_thresh;
+	lld->sge_ingpadboundary = adap->sge.fl_align;
+	lld->sge_egrstatuspagesize = adap->sge.stat_len;
+	lld->sge_pktshift = adap->sge.pktshift;
+	lld->enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
+	lld->max_ordird_qp = adap->params.max_ordird_qp;
+	lld->max_ird_adapter = adap->params.max_ird_adapter;
+	lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
+	lld->nodeid = dev_to_node(adap->pdev_dev);
+}
+
+static void uld_attach(struct adapter *adap, unsigned int uld)
+{
+	void *handle;
+	struct cxgb4_lld_info lli;
+
+	uld_init(adap, &lli);
+	uld_queue_init(adap, uld, &lli);
+
+	handle = adap->uld[uld].add(&lli);
+	if (IS_ERR(handle)) {
+		dev_warn(adap->pdev_dev,
+			 "could not attach to the %s driver, error %ld\n",
+			 adap->uld[uld].name, PTR_ERR(handle));
+		return;
+	}
+
+	adap->uld[uld].handle = handle;
+	t4_register_netevent_notifier();
+
+	if (adap->flags & FULL_INIT_DONE)
+		adap->uld[uld].state_change(handle, CXGB4_STATE_UP);
+}
+
+/**
+ *	cxgb4_register_uld - register an upper-layer driver
+ *	@type: the ULD type
+ *	@p: the ULD methods
+ *
+ *	Registers an upper-layer driver with this driver and notifies the ULD
+ *	about any presently available devices that support its type.  Returns
+ *	%-EBUSY if a ULD of the same type is already registered.
+ */
+int cxgb4_register_uld(enum cxgb4_uld type,
+		       const struct cxgb4_uld_info *p)
+{
+	int ret = 0;
+	unsigned int adap_idx = 0;
+	struct adapter *adap;
+
+	if (type >= CXGB4_ULD_MAX)
+		return -EINVAL;
+
+	mutex_lock(&uld_mutex);
+	list_for_each_entry(adap, &adapter_list, list_node) {
+		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
+		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
+			continue;
+		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
+			continue;
+		ret = cfg_queues_uld(adap, type, p);
+		if (ret)
+			goto out;
+		ret = setup_sge_queues_uld(adap, type, p->lro);
+		if (ret)
+			goto free_queues;
+		if (adap->flags & USING_MSIX) {
+			name_msix_vecs_uld(adap, type);
+			ret = request_msix_queue_irqs_uld(adap, type);
+			if (ret)
+				goto free_rxq;
+		}
+		if (adap->flags & FULL_INIT_DONE)
+			enable_rx_uld(adap, type);
+		if (adap->uld[type].add) {
+			ret = -EBUSY;
+			goto free_irq;
+		}
+		adap->uld[type] = *p;
+		uld_attach(adap, type);
+		adap_idx++;
+	}
+	mutex_unlock(&uld_mutex);
+	return 0;
+
+free_irq:
+	if (adap->flags & FULL_INIT_DONE)
+		quiesce_rx_uld(adap, type);
+	if (adap->flags & USING_MSIX)
+		free_msix_queue_irqs_uld(adap, type);
+free_rxq:
+	free_sge_queues_uld(adap, type);
+free_queues:
+	free_queues_uld(adap, type);
+out:
+
+	list_for_each_entry(adap, &adapter_list, list_node) {
+		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
+		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
+			continue;
+		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
+			continue;
+		if (!adap_idx)
+			break;
+		adap->uld[type].handle = NULL;
+		adap->uld[type].add = NULL;
+		if (adap->flags & FULL_INIT_DONE)
+			quiesce_rx_uld(adap, type);
+		if (adap->flags & USING_MSIX)
+			free_msix_queue_irqs_uld(adap, type);
+		free_sge_queues_uld(adap, type);
+		free_queues_uld(adap, type);
+		adap_idx--;
+	}
+	mutex_unlock(&uld_mutex);
+	return ret;
+}
+EXPORT_SYMBOL(cxgb4_register_uld);
+
+/**
+ *	cxgb4_unregister_uld - unregister an upper-layer driver
+ *	@type: the ULD type
+ *
+ *	Unregisters an existing upper-layer driver.
+ */
+int cxgb4_unregister_uld(enum cxgb4_uld type)
+{
+	struct adapter *adap;
+
+	if (type >= CXGB4_ULD_MAX)
+		return -EINVAL;
+
+	mutex_lock(&uld_mutex);
+	list_for_each_entry(adap, &adapter_list, list_node) {
+		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
+		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
+			continue;
+		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
+			continue;
+		adap->uld[type].handle = NULL;
+		adap->uld[type].add = NULL;
+		if (adap->flags & FULL_INIT_DONE)
+			quiesce_rx_uld(adap, type);
+		if (adap->flags & USING_MSIX)
+			free_msix_queue_irqs_uld(adap, type);
+		free_sge_queues_uld(adap, type);
+		free_queues_uld(adap, type);
+	}
+	mutex_unlock(&uld_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(cxgb4_unregister_uld);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index f3c58aa..47bd14f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -32,8 +32,8 @@
  * SOFTWARE.
  */
 
-#ifndef __CXGB4_OFLD_H
-#define __CXGB4_OFLD_H
+#ifndef __CXGB4_ULD_H
+#define __CXGB4_ULD_H
 
 #include <linux/cache.h>
 #include <linux/spinlock.h>
@@ -42,6 +42,8 @@
 #include <linux/atomic.h>
 #include "cxgb4.h"
 
+#define MAX_ULD_QSETS 16
+
 /* CPL message priority levels */
 enum {
 	CPL_PRIORITY_DATA     = 0,  /* data messages */
@@ -104,6 +106,7 @@
 	unsigned int atid_base;
 
 	struct filter_entry *ftid_tab;
+	unsigned long *ftid_bmap;
 	unsigned int nftids;
 	unsigned int ftid_base;
 	unsigned int aftid_base;
@@ -124,6 +127,8 @@
 	atomic_t tids_in_use;
 	/* TIDs in the HASH */
 	atomic_t hash_tids_in_use;
+	/* lock for setting/clearing filter bitmap */
+	spinlock_t ftid_lock;
 };
 
 static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
@@ -183,15 +188,38 @@
 int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
 			       unsigned int queue, bool ipv6);
 
+/* Filter operation context to allow callers of cxgb4_set_filter() and
+ * cxgb4_del_filter() to wait for an asynchronous completion.
+ */
+struct filter_ctx {
+	struct completion completion;	/* completion rendezvous */
+	void *closure;			/* caller's opaque information */
+	int result;			/* result of operation */
+	u32 tid;			/* to store tid */
+};
+
+struct ch_filter_specification;
+
+int __cxgb4_set_filter(struct net_device *dev, int filter_id,
+		       struct ch_filter_specification *fs,
+		       struct filter_ctx *ctx);
+int __cxgb4_del_filter(struct net_device *dev, int filter_id,
+		       struct filter_ctx *ctx);
+int cxgb4_set_filter(struct net_device *dev, int filter_id,
+		     struct ch_filter_specification *fs);
+int cxgb4_del_filter(struct net_device *dev, int filter_id);
+
 static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
 {
 	skb_set_queue_mapping(skb, (queue << 1) | prio);
 }
 
 enum cxgb4_uld {
+	CXGB4_ULD_INIT,
 	CXGB4_ULD_RDMA,
 	CXGB4_ULD_ISCSI,
 	CXGB4_ULD_ISCSIT,
+	CXGB4_ULD_CRYPTO,
 	CXGB4_ULD_MAX
 };
 
@@ -284,6 +312,11 @@
 
 struct cxgb4_uld_info {
 	const char *name;
+	void *handle;
+	unsigned int nrxq;
+	unsigned int rxq_size;
+	bool ciq;
+	bool lro;
 	void *(*add)(const struct cxgb4_lld_info *p);
 	int (*rx_handler)(void *handle, const __be64 *rsp,
 			  const struct pkt_gl *gl);
@@ -330,4 +363,4 @@
 			 u64 *pbar2_qoffset,
 			 unsigned int *pbar2_qid);
 
-#endif  /* !__CXGB4_OFLD_H */
+#endif  /* !__CXGB4_ULD_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
new file mode 100644
index 0000000..539de76
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -0,0 +1,556 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+
+#include "cxgb4.h"
+#include "sched.h"
+
+/* Spinlock must be held by caller */
+static int t4_sched_class_fw_cmd(struct port_info *pi,
+				 struct ch_sched_params *p,
+				 enum sched_fw_ops op)
+{
+	struct adapter *adap = pi->adapter;
+	struct sched_table *s = pi->sched_tbl;
+	struct sched_class *e;
+	int err = 0;
+
+	e = &s->tab[p->u.params.class];
+	switch (op) {
+	case SCHED_FW_OP_ADD:
+		err = t4_sched_params(adap, p->type,
+				      p->u.params.level, p->u.params.mode,
+				      p->u.params.rateunit,
+				      p->u.params.ratemode,
+				      p->u.params.channel, e->idx,
+				      p->u.params.minrate, p->u.params.maxrate,
+				      p->u.params.weight, p->u.params.pktsize);
+		break;
+	default:
+		err = -ENOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+/* Spinlock must be held by caller */
+static int t4_sched_bind_unbind_op(struct port_info *pi, void *arg,
+				   enum sched_bind_type type, bool bind)
+{
+	struct adapter *adap = pi->adapter;
+	u32 fw_mnem, fw_class, fw_param;
+	unsigned int pf = adap->pf;
+	unsigned int vf = 0;
+	int err = 0;
+
+	switch (type) {
+	case SCHED_QUEUE: {
+		struct sched_queue_entry *qe;
+
+		qe = (struct sched_queue_entry *)arg;
+
+		/* Create a template for the FW_PARAMS_CMD mnemonic and
+		 * value (TX Scheduling Class in this case).
+		 */
+		fw_mnem = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+			   FW_PARAMS_PARAM_X_V(
+				   FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH));
+		fw_class = bind ? qe->param.class : FW_SCHED_CLS_NONE;
+		fw_param = (fw_mnem | FW_PARAMS_PARAM_YZ_V(qe->cntxt_id));
+
+		pf = adap->pf;
+		vf = 0;
+		break;
+	}
+	default:
+		err = -ENOTSUPP;
+		goto out;
+	}
+
+	err = t4_set_params(adap, adap->mbox, pf, vf, 1, &fw_param, &fw_class);
+
+out:
+	return err;
+}
+
+static struct sched_class *t4_sched_queue_lookup(struct port_info *pi,
+						 const unsigned int qid,
+						 int *index)
+{
+	struct sched_table *s = pi->sched_tbl;
+	struct sched_class *e, *end;
+	struct sched_class *found = NULL;
+	int i;
+
+	/* Look for a class with matching bound queue parameters */
+	end = &s->tab[s->sched_size];
+	for (e = &s->tab[0]; e != end; ++e) {
+		struct sched_queue_entry *qe;
+
+		i = 0;
+		if (e->state == SCHED_STATE_UNUSED)
+			continue;
+
+		list_for_each_entry(qe, &e->queue_list, list) {
+			if (qe->cntxt_id == qid) {
+				found = e;
+				if (index)
+					*index = i;
+				break;
+			}
+			i++;
+		}
+
+		if (found)
+			break;
+	}
+
+	return found;
+}
+
+static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p)
+{
+	struct adapter *adap = pi->adapter;
+	struct sched_class *e;
+	struct sched_queue_entry *qe = NULL;
+	struct sge_eth_txq *txq;
+	unsigned int qid;
+	int index = -1;
+	int err = 0;
+
+	if (p->queue < 0 || p->queue >= pi->nqsets)
+		return -ERANGE;
+
+	txq = &adap->sge.ethtxq[pi->first_qset + p->queue];
+	qid = txq->q.cntxt_id;
+
+	/* Find the existing class that the queue is bound to */
+	e = t4_sched_queue_lookup(pi, qid, &index);
+	if (e && index >= 0) {
+		int i = 0;
+
+		spin_lock(&e->lock);
+		list_for_each_entry(qe, &e->queue_list, list) {
+			if (i == index)
+				break;
+			i++;
+		}
+		err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE,
+					      false);
+		if (err) {
+			spin_unlock(&e->lock);
+			goto out;
+		}
+
+		list_del(&qe->list);
+		t4_free_mem(qe);
+		if (atomic_dec_and_test(&e->refcnt)) {
+			e->state = SCHED_STATE_UNUSED;
+			memset(&e->info, 0, sizeof(e->info));
+		}
+		spin_unlock(&e->lock);
+	}
+out:
+	return err;
+}
+
+static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p)
+{
+	struct adapter *adap = pi->adapter;
+	struct sched_table *s = pi->sched_tbl;
+	struct sched_class *e;
+	struct sched_queue_entry *qe = NULL;
+	struct sge_eth_txq *txq;
+	unsigned int qid;
+	int err = 0;
+
+	if (p->queue < 0 || p->queue >= pi->nqsets)
+		return -ERANGE;
+
+	qe = t4_alloc_mem(sizeof(struct sched_queue_entry));
+	if (!qe)
+		return -ENOMEM;
+
+	txq = &adap->sge.ethtxq[pi->first_qset + p->queue];
+	qid = txq->q.cntxt_id;
+
+	/* Unbind queue from any existing class */
+	err = t4_sched_queue_unbind(pi, p);
+	if (err)
+		goto out;
+
+	/* Bind queue to specified class */
+	memset(qe, 0, sizeof(*qe));
+	qe->cntxt_id = qid;
+	memcpy(&qe->param, p, sizeof(qe->param));
+
+	e = &s->tab[qe->param.class];
+	spin_lock(&e->lock);
+	err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, true);
+	if (err) {
+		t4_free_mem(qe);
+		spin_unlock(&e->lock);
+		goto out;
+	}
+
+	list_add_tail(&qe->list, &e->queue_list);
+	atomic_inc(&e->refcnt);
+	spin_unlock(&e->lock);
+out:
+	return err;
+}
+
+static void t4_sched_class_unbind_all(struct port_info *pi,
+				      struct sched_class *e,
+				      enum sched_bind_type type)
+{
+	if (!e)
+		return;
+
+	switch (type) {
+	case SCHED_QUEUE: {
+		struct sched_queue_entry *qe;
+
+		list_for_each_entry(qe, &e->queue_list, list)
+			t4_sched_queue_unbind(pi, &qe->param);
+		break;
+	}
+	default:
+		break;
+	}
+}
+
+static int t4_sched_class_bind_unbind_op(struct port_info *pi, void *arg,
+					 enum sched_bind_type type, bool bind)
+{
+	int err = 0;
+
+	if (!arg)
+		return -EINVAL;
+
+	switch (type) {
+	case SCHED_QUEUE: {
+		struct ch_sched_queue *qe = (struct ch_sched_queue *)arg;
+
+		if (bind)
+			err = t4_sched_queue_bind(pi, qe);
+		else
+			err = t4_sched_queue_unbind(pi, qe);
+		break;
+	}
+	default:
+		err = -ENOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+/**
+ * cxgb4_sched_class_bind - Bind an entity to a scheduling class
+ * @dev: net_device pointer
+ * @arg: Entity opaque data
+ * @type: Entity type (Queue)
+ *
+ * Binds an entity (queue) to a scheduling class.  If the entity
+ * is bound to another class, it will be unbound from the other class
+ * and bound to the class specified in @arg.
+ */
+int cxgb4_sched_class_bind(struct net_device *dev, void *arg,
+			   enum sched_bind_type type)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct sched_table *s;
+	int err = 0;
+	u8 class_id;
+
+	if (!can_sched(dev))
+		return -ENOTSUPP;
+
+	if (!arg)
+		return -EINVAL;
+
+	switch (type) {
+	case SCHED_QUEUE: {
+		struct ch_sched_queue *qe = (struct ch_sched_queue *)arg;
+
+		class_id = qe->class;
+		break;
+	}
+	default:
+		return -ENOTSUPP;
+	}
+
+	if (!valid_class_id(dev, class_id))
+		return -EINVAL;
+
+	if (class_id == SCHED_CLS_NONE)
+		return -ENOTSUPP;
+
+	s = pi->sched_tbl;
+	write_lock(&s->rw_lock);
+	err = t4_sched_class_bind_unbind_op(pi, arg, type, true);
+	write_unlock(&s->rw_lock);
+
+	return err;
+}
+
+/**
+ * cxgb4_sched_class_unbind - Unbind an entity from a scheduling class
+ * @dev: net_device pointer
+ * @arg: Entity opaque data
+ * @type: Entity type (Queue)
+ *
+ * Unbinds an entity (queue) from a scheduling class.
+ */
+int cxgb4_sched_class_unbind(struct net_device *dev, void *arg,
+			     enum sched_bind_type type)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	struct sched_table *s;
+	int err = 0;
+	u8 class_id;
+
+	if (!can_sched(dev))
+		return -ENOTSUPP;
+
+	if (!arg)
+		return -EINVAL;
+
+	switch (type) {
+	case SCHED_QUEUE: {
+		struct ch_sched_queue *qe = (struct ch_sched_queue *)arg;
+
+		class_id = qe->class;
+		break;
+	}
+	default:
+		return -ENOTSUPP;
+	}
+
+	if (!valid_class_id(dev, class_id))
+		return -EINVAL;
+
+	s = pi->sched_tbl;
+	write_lock(&s->rw_lock);
+	err = t4_sched_class_bind_unbind_op(pi, arg, type, false);
+	write_unlock(&s->rw_lock);
+
+	return err;
+}
+
+/* If @p is NULL, fetch any available unused class */
+static struct sched_class *t4_sched_class_lookup(struct port_info *pi,
+						const struct ch_sched_params *p)
+{
+	struct sched_table *s = pi->sched_tbl;
+	struct sched_class *e, *end;
+	struct sched_class *found = NULL;
+
+	if (!p) {
+		/* Get any available unused class */
+		end = &s->tab[s->sched_size];
+		for (e = &s->tab[0]; e != end; ++e) {
+			if (e->state == SCHED_STATE_UNUSED) {
+				found = e;
+				break;
+			}
+		}
+	} else {
+		/* Look for a class with matching scheduling parameters */
+		struct ch_sched_params info;
+		struct ch_sched_params tp;
+
+		memset(&info, 0, sizeof(info));
+		memset(&tp, 0, sizeof(tp));
+
+		memcpy(&tp, p, sizeof(tp));
+		/* Don't try to match class parameter */
+		tp.u.params.class = SCHED_CLS_NONE;
+
+		end = &s->tab[s->sched_size];
+		for (e = &s->tab[0]; e != end; ++e) {
+			if (e->state == SCHED_STATE_UNUSED)
+				continue;
+
+			memset(&info, 0, sizeof(info));
+			memcpy(&info, &e->info, sizeof(info));
+			/* Don't try to match class parameter */
+			info.u.params.class = SCHED_CLS_NONE;
+
+			if ((info.type == tp.type) &&
+			    (!memcmp(&info.u.params, &tp.u.params,
+				     sizeof(info.u.params)))) {
+				found = e;
+				break;
+			}
+		}
+	}
+
+	return found;
+}
+
+static struct sched_class *t4_sched_class_alloc(struct port_info *pi,
+						struct ch_sched_params *p)
+{
+	struct sched_table *s = pi->sched_tbl;
+	struct sched_class *e;
+	u8 class_id;
+	int err;
+
+	if (!p)
+		return NULL;
+
+	class_id = p->u.params.class;
+
+	/* Only accept search for existing class with matching params
+	 * or allocation of new class with specified params
+	 */
+	if (class_id != SCHED_CLS_NONE)
+		return NULL;
+
+	write_lock(&s->rw_lock);
+	/* See if there's an exisiting class with same
+	 * requested sched params
+	 */
+	e = t4_sched_class_lookup(pi, p);
+	if (!e) {
+		struct ch_sched_params np;
+
+		/* Fetch any available unused class */
+		e = t4_sched_class_lookup(pi, NULL);
+		if (!e)
+			goto out;
+
+		memset(&np, 0, sizeof(np));
+		memcpy(&np, p, sizeof(np));
+		np.u.params.class = e->idx;
+
+		spin_lock(&e->lock);
+		/* New class */
+		err = t4_sched_class_fw_cmd(pi, &np, SCHED_FW_OP_ADD);
+		if (err) {
+			spin_unlock(&e->lock);
+			e = NULL;
+			goto out;
+		}
+		memcpy(&e->info, &np, sizeof(e->info));
+		atomic_set(&e->refcnt, 0);
+		e->state = SCHED_STATE_ACTIVE;
+		spin_unlock(&e->lock);
+	}
+
+out:
+	write_unlock(&s->rw_lock);
+	return e;
+}
+
+/**
+ * cxgb4_sched_class_alloc - allocate a scheduling class
+ * @dev: net_device pointer
+ * @p: new scheduling class to create.
+ *
+ * Returns pointer to the scheduling class created.  If @p is NULL, then
+ * it allocates and returns any available unused scheduling class. If a
+ * scheduling class with matching @p is found, then the matching class is
+ * returned.
+ */
+struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
+					    struct ch_sched_params *p)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+	u8 class_id;
+
+	if (!can_sched(dev))
+		return NULL;
+
+	class_id = p->u.params.class;
+	if (!valid_class_id(dev, class_id))
+		return NULL;
+
+	return t4_sched_class_alloc(pi, p);
+}
+
+static void t4_sched_class_free(struct port_info *pi, struct sched_class *e)
+{
+	t4_sched_class_unbind_all(pi, e, SCHED_QUEUE);
+}
+
+struct sched_table *t4_init_sched(unsigned int sched_size)
+{
+	struct sched_table *s;
+	unsigned int i;
+
+	s = t4_alloc_mem(sizeof(*s) + sched_size * sizeof(struct sched_class));
+	if (!s)
+		return NULL;
+
+	s->sched_size = sched_size;
+	rwlock_init(&s->rw_lock);
+
+	for (i = 0; i < s->sched_size; i++) {
+		memset(&s->tab[i], 0, sizeof(struct sched_class));
+		s->tab[i].idx = i;
+		s->tab[i].state = SCHED_STATE_UNUSED;
+		INIT_LIST_HEAD(&s->tab[i].queue_list);
+		spin_lock_init(&s->tab[i].lock);
+		atomic_set(&s->tab[i].refcnt, 0);
+	}
+	return s;
+}
+
+void t4_cleanup_sched(struct adapter *adap)
+{
+	struct sched_table *s;
+	unsigned int i;
+
+	for_each_port(adap, i) {
+		struct port_info *pi = netdev2pinfo(adap->port[i]);
+
+		s = pi->sched_tbl;
+		for (i = 0; i < s->sched_size; i++) {
+			struct sched_class *e;
+
+			write_lock(&s->rw_lock);
+			e = &s->tab[i];
+			if (e->state == SCHED_STATE_ACTIVE)
+				t4_sched_class_free(pi, e);
+			write_unlock(&s->rw_lock);
+		}
+		t4_free_mem(s);
+	}
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h
new file mode 100644
index 0000000..77b2b3f
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h
@@ -0,0 +1,110 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_SCHED_H
+#define __CXGB4_SCHED_H
+
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+
+#define SCHED_CLS_NONE 0xff
+
+#define FW_SCHED_CLS_NONE 0xffffffff
+
+/* Max rate that can be set to a scheduling class is 10 Gbps */
+#define SCHED_MAX_RATE_KBPS 10000000U
+
+enum {
+	SCHED_STATE_ACTIVE,
+	SCHED_STATE_UNUSED,
+};
+
+enum sched_fw_ops {
+	SCHED_FW_OP_ADD,
+};
+
+enum sched_bind_type {
+	SCHED_QUEUE,
+};
+
+struct sched_queue_entry {
+	struct list_head list;
+	unsigned int cntxt_id;
+	struct ch_sched_queue param;
+};
+
+struct sched_class {
+	u8 state;
+	u8 idx;
+	struct ch_sched_params info;
+	struct list_head queue_list;
+	spinlock_t lock; /* Per class lock */
+	atomic_t refcnt;
+};
+
+struct sched_table {      /* per port scheduling table */
+	u8 sched_size;
+	rwlock_t rw_lock; /* Table lock */
+	struct sched_class tab[0];
+};
+
+static inline bool can_sched(struct net_device *dev)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+
+	return !pi->sched_tbl ? false : true;
+}
+
+static inline bool valid_class_id(struct net_device *dev, u8 class_id)
+{
+	struct port_info *pi = netdev2pinfo(dev);
+
+	if ((class_id > pi->sched_tbl->sched_size - 1) &&
+	    (class_id != SCHED_CLS_NONE))
+		return false;
+
+	return true;
+}
+
+int cxgb4_sched_class_bind(struct net_device *dev, void *arg,
+			   enum sched_bind_type type);
+int cxgb4_sched_class_unbind(struct net_device *dev, void *arg,
+			     enum sched_bind_type type);
+
+struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev,
+					    struct ch_sched_params *p);
+
+struct sched_table *t4_init_sched(unsigned int size);
+void t4_cleanup_sched(struct adapter *adap);
+#endif  /* __CXGB4_SCHED_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ad3552d..1e74fd6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2860,6 +2860,18 @@
 	return 0;
 }
 
+int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
+			unsigned int cmplqid)
+{
+	u32 param, val;
+
+	param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
+		 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL) |
+		 FW_PARAMS_PARAM_YZ_V(eqid));
+	val = cmplqid;
+	return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
+}
+
 int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 			  struct net_device *dev, unsigned int iqid)
 {
@@ -2928,8 +2940,8 @@
 	q->desc = NULL;
 }
 
-static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
-			 struct sge_fl *fl)
+void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq,
+		  struct sge_fl *fl)
 {
 	struct sge *s = &adap->sge;
 	unsigned int fl_id = fl ? fl->cntxt_id : 0xffff;
@@ -3014,12 +3026,6 @@
 		}
 	}
 
-	/* clean up RDMA and iSCSI Rx queues */
-	t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq);
-	t4_free_ofld_rxqs(adap, adap->sge.niscsitq, adap->sge.iscsitrxq);
-	t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq);
-	t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq);
-
 	/* clean up offload Tx queues */
 	for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) {
 		struct sge_ofld_txq *q = &adap->sge.ofldtxq[i];
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index dc92c80..20dec85 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -2729,7 +2729,7 @@
 
 out:
 	vfree(vpd);
-	return ret;
+	return ret < 0 ? ret : 0;
 }
 
 /**
@@ -3627,7 +3627,8 @@
 }
 
 #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
-		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
+		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
+		     FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
 		     FW_PORT_CAP_ANEG)
 
 /**
@@ -7196,8 +7197,12 @@
 		speed = 1000;
 	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
 		speed = 10000;
+	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+		speed = 25000;
 	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
 		speed = 40000;
+	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+		speed = 100000;
 
 	lc = &pi->link_cfg;
 
@@ -8264,3 +8269,73 @@
 		t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
 	}
 }
+
+/**
+ *	t4_set_vf_mac - Set MAC address for the specified VF
+ *	@adapter: The adapter
+ *	@vf: one of the VFs instantiated by the specified PF
+ *	@naddr: the number of MAC addresses
+ *	@addr: the MAC address(es) to be set to the specified VF
+ */
+int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+		      unsigned int naddr, u8 *addr)
+{
+	struct fw_acl_mac_cmd cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+				    FW_CMD_REQUEST_F |
+				    FW_CMD_WRITE_F |
+				    FW_ACL_MAC_CMD_PFN_V(adapter->pf) |
+				    FW_ACL_MAC_CMD_VFN_V(vf));
+
+	/* Note: Do not enable the ACL */
+	cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+	cmd.nmac = naddr;
+
+	switch (adapter->pf) {
+	case 3:
+		memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3));
+		break;
+	case 2:
+		memcpy(cmd.macaddr2, addr, sizeof(cmd.macaddr2));
+		break;
+	case 1:
+		memcpy(cmd.macaddr1, addr, sizeof(cmd.macaddr1));
+		break;
+	case 0:
+		memcpy(cmd.macaddr0, addr, sizeof(cmd.macaddr0));
+		break;
+	}
+
+	return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd);
+}
+
+int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
+		    int rateunit, int ratemode, int channel, int class,
+		    int minrate, int maxrate, int weight, int pktsize)
+{
+	struct fw_sched_cmd cmd;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_SCHED_CMD) |
+				      FW_CMD_REQUEST_F |
+				      FW_CMD_WRITE_F);
+	cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
+
+	cmd.u.params.sc = FW_SCHED_SC_PARAMS;
+	cmd.u.params.type = type;
+	cmd.u.params.level = level;
+	cmd.u.params.mode = mode;
+	cmd.u.params.ch = channel;
+	cmd.u.params.cl = class;
+	cmd.u.params.unit = rateunit;
+	cmd.u.params.rate = ratemode;
+	cmd.u.params.min = cpu_to_be32(minrate);
+	cmd.u.params.max = cpu_to_be32(maxrate);
+	cmd.u.params.weight = cpu_to_be16(weight);
+	cmd.u.params.pktsize = cpu_to_be16(pktsize);
+
+	return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
+			       NULL, 1);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index e0ebe13..fba3b2a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -61,6 +61,7 @@
 	CPL_ABORT_REQ_RSS     = 0x2B,
 	CPL_ABORT_RPL_RSS     = 0x2D,
 
+	CPL_RX_PHYS_ADDR      = 0x30,
 	CPL_CLOSE_CON_RPL     = 0x32,
 	CPL_ISCSI_HDR         = 0x33,
 	CPL_RDMA_CQE          = 0x35,
@@ -83,6 +84,10 @@
 	CPL_PASS_OPEN_REQ6    = 0x81,
 	CPL_ACT_OPEN_REQ6     = 0x83,
 
+	CPL_TX_TLS_PDU     =    0x88,
+	CPL_TX_SEC_PDU        = 0x8A,
+	CPL_TX_TLS_ACK        = 0x8B,
+
 	CPL_RDMA_TERMINATE    = 0xA2,
 	CPL_RDMA_WRITE        = 0xA4,
 	CPL_SGE_EGR_UPDATE    = 0xA5,
@@ -94,6 +99,8 @@
 	CPL_FW4_PLD           = 0xC1,
 	CPL_FW4_ACK           = 0xC3,
 
+	CPL_RX_PHYS_DSGL      = 0xD0,
+
 	CPL_FW6_MSG           = 0xE0,
 	CPL_FW6_PLD           = 0xE1,
 	CPL_TX_PKT_LSO        = 0xED,
@@ -1362,6 +1369,15 @@
 	__be32 len;
 };
 
+struct ulp_txpkt {
+	__be32 cmd_dest;
+	__be32 len;
+};
+
+#define ULPTX_CMD_S    24
+#define ULPTX_CMD_M    0xFF
+#define ULPTX_CMD_V(x) ((x) << ULPTX_CMD_S)
+
 #define ULPTX_NSGE_S    0
 #define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S)
 
@@ -1369,6 +1385,22 @@
 #define ULPTX_MORE_V(x)	((x) << ULPTX_MORE_S)
 #define ULPTX_MORE_F	ULPTX_MORE_V(1U)
 
+#define ULP_TXPKT_DEST_S    16
+#define ULP_TXPKT_DEST_M    0x3
+#define ULP_TXPKT_DEST_V(x) ((x) << ULP_TXPKT_DEST_S)
+
+#define ULP_TXPKT_FID_S     4
+#define ULP_TXPKT_FID_M     0x7ff
+#define ULP_TXPKT_FID_V(x)  ((x) << ULP_TXPKT_FID_S)
+
+#define ULP_TXPKT_RO_S      3
+#define ULP_TXPKT_RO_V(x) ((x) << ULP_TXPKT_RO_S)
+#define ULP_TXPKT_RO_F ULP_TXPKT_RO_V(1U)
+
+#define ULP_TX_SC_MORE_S 23
+#define ULP_TX_SC_MORE_V(x) ((x) << ULP_TX_SC_MORE_S)
+#define ULP_TX_SC_MORE_F  ULP_TX_SC_MORE_V(1U)
+
 struct ulp_mem_io {
 	WR_HDR;
 	__be32 cmd;
@@ -1406,4 +1438,409 @@
 #define ULP_MEMIO_DATA_LEN_S    0
 #define ULP_MEMIO_DATA_LEN_V(x) ((x) << ULP_MEMIO_DATA_LEN_S)
 
+#define ULPTX_NSGE_S    0
+#define ULPTX_NSGE_M    0xFFFF
+#define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S)
+#define ULPTX_NSGE_G(x) (((x) >> ULPTX_NSGE_S) & ULPTX_NSGE_M)
+
+struct ulptx_sc_memrd {
+	__be32 cmd_to_len;
+	__be32 addr;
+};
+
+#define ULP_TXPKT_DATAMODIFY_S       23
+#define ULP_TXPKT_DATAMODIFY_M       0x1
+#define ULP_TXPKT_DATAMODIFY_V(x)    ((x) << ULP_TXPKT_DATAMODIFY_S)
+#define ULP_TXPKT_DATAMODIFY_G(x)    \
+	(((x) >> ULP_TXPKT_DATAMODIFY_S) & ULP_TXPKT_DATAMODIFY__M)
+#define ULP_TXPKT_DATAMODIFY_F       ULP_TXPKT_DATAMODIFY_V(1U)
+
+#define ULP_TXPKT_CHANNELID_S        22
+#define ULP_TXPKT_CHANNELID_M        0x1
+#define ULP_TXPKT_CHANNELID_V(x)     ((x) << ULP_TXPKT_CHANNELID_S)
+#define ULP_TXPKT_CHANNELID_G(x)     \
+	(((x) >> ULP_TXPKT_CHANNELID_S) & ULP_TXPKT_CHANNELID_M)
+#define ULP_TXPKT_CHANNELID_F        ULP_TXPKT_CHANNELID_V(1U)
+
+#define SCMD_SEQ_NO_CTRL_S      29
+#define SCMD_SEQ_NO_CTRL_M      0x3
+#define SCMD_SEQ_NO_CTRL_V(x)   ((x) << SCMD_SEQ_NO_CTRL_S)
+#define SCMD_SEQ_NO_CTRL_G(x)   \
+	(((x) >> SCMD_SEQ_NO_CTRL_S) & SCMD_SEQ_NO_CTRL_M)
+
+/* StsFieldPrsnt- Status field at the end of the TLS PDU */
+#define SCMD_STATUS_PRESENT_S   28
+#define SCMD_STATUS_PRESENT_M   0x1
+#define SCMD_STATUS_PRESENT_V(x)    ((x) << SCMD_STATUS_PRESENT_S)
+#define SCMD_STATUS_PRESENT_G(x)    \
+	(((x) >> SCMD_STATUS_PRESENT_S) & SCMD_STATUS_PRESENT_M)
+#define SCMD_STATUS_PRESENT_F   SCMD_STATUS_PRESENT_V(1U)
+
+/* ProtoVersion - Protocol Version 0: 1.2, 1:1.1, 2:DTLS, 3:Generic,
+ * 3-15: Reserved.
+ */
+#define SCMD_PROTO_VERSION_S    24
+#define SCMD_PROTO_VERSION_M    0xf
+#define SCMD_PROTO_VERSION_V(x) ((x) << SCMD_PROTO_VERSION_S)
+#define SCMD_PROTO_VERSION_G(x) \
+	(((x) >> SCMD_PROTO_VERSION_S) & SCMD_PROTO_VERSION_M)
+
+/* EncDecCtrl - Encryption/Decryption Control. 0: Encrypt, 1: Decrypt */
+#define SCMD_ENC_DEC_CTRL_S     23
+#define SCMD_ENC_DEC_CTRL_M     0x1
+#define SCMD_ENC_DEC_CTRL_V(x)  ((x) << SCMD_ENC_DEC_CTRL_S)
+#define SCMD_ENC_DEC_CTRL_G(x)  \
+	(((x) >> SCMD_ENC_DEC_CTRL_S) & SCMD_ENC_DEC_CTRL_M)
+#define SCMD_ENC_DEC_CTRL_F SCMD_ENC_DEC_CTRL_V(1U)
+
+/* CipherAuthSeqCtrl - Cipher Authentication Sequence Control. */
+#define SCMD_CIPH_AUTH_SEQ_CTRL_S       22
+#define SCMD_CIPH_AUTH_SEQ_CTRL_M       0x1
+#define SCMD_CIPH_AUTH_SEQ_CTRL_V(x)    \
+	((x) << SCMD_CIPH_AUTH_SEQ_CTRL_S)
+#define SCMD_CIPH_AUTH_SEQ_CTRL_G(x)    \
+	(((x) >> SCMD_CIPH_AUTH_SEQ_CTRL_S) & SCMD_CIPH_AUTH_SEQ_CTRL_M)
+#define SCMD_CIPH_AUTH_SEQ_CTRL_F   SCMD_CIPH_AUTH_SEQ_CTRL_V(1U)
+
+/* CiphMode -  Cipher Mode. 0: NOP, 1:AES-CBC, 2:AES-GCM, 3:AES-CTR,
+ * 4:Generic-AES, 5-15: Reserved.
+ */
+#define SCMD_CIPH_MODE_S    18
+#define SCMD_CIPH_MODE_M    0xf
+#define SCMD_CIPH_MODE_V(x) ((x) << SCMD_CIPH_MODE_S)
+#define SCMD_CIPH_MODE_G(x) \
+	(((x) >> SCMD_CIPH_MODE_S) & SCMD_CIPH_MODE_M)
+
+/* AuthMode - Auth Mode. 0: NOP, 1:SHA1, 2:SHA2-224, 3:SHA2-256
+ * 4-15: Reserved
+ */
+#define SCMD_AUTH_MODE_S    14
+#define SCMD_AUTH_MODE_M    0xf
+#define SCMD_AUTH_MODE_V(x) ((x) << SCMD_AUTH_MODE_S)
+#define SCMD_AUTH_MODE_G(x) \
+	(((x) >> SCMD_AUTH_MODE_S) & SCMD_AUTH_MODE_M)
+
+/* HmacCtrl - HMAC Control. 0:NOP, 1:No truncation, 2:Support HMAC Truncation
+ * per RFC 4366, 3:IPSec 96 bits, 4-7:Reserved
+ */
+#define SCMD_HMAC_CTRL_S    11
+#define SCMD_HMAC_CTRL_M    0x7
+#define SCMD_HMAC_CTRL_V(x) ((x) << SCMD_HMAC_CTRL_S)
+#define SCMD_HMAC_CTRL_G(x) \
+	(((x) >> SCMD_HMAC_CTRL_S) & SCMD_HMAC_CTRL_M)
+
+/* IvSize - IV size in units of 2 bytes */
+#define SCMD_IV_SIZE_S  7
+#define SCMD_IV_SIZE_M  0xf
+#define SCMD_IV_SIZE_V(x)   ((x) << SCMD_IV_SIZE_S)
+#define SCMD_IV_SIZE_G(x)   \
+	(((x) >> SCMD_IV_SIZE_S) & SCMD_IV_SIZE_M)
+
+/* NumIVs - Number of IVs */
+#define SCMD_NUM_IVS_S  0
+#define SCMD_NUM_IVS_M  0x7f
+#define SCMD_NUM_IVS_V(x)   ((x) << SCMD_NUM_IVS_S)
+#define SCMD_NUM_IVS_G(x)   \
+	(((x) >> SCMD_NUM_IVS_S) & SCMD_NUM_IVS_M)
+
+/* EnbDbgId - If this is enabled upper 20 (63:44) bits if SeqNumber
+ * (below) are used as Cid (connection id for debug status), these
+ * bits are padded to zero for forming the 64 bit
+ * sequence number for TLS
+ */
+#define SCMD_ENB_DBGID_S  31
+#define SCMD_ENB_DBGID_M  0x1
+#define SCMD_ENB_DBGID_V(x)   ((x) << SCMD_ENB_DBGID_S)
+#define SCMD_ENB_DBGID_G(x)   \
+	(((x) >> SCMD_ENB_DBGID_S) & SCMD_ENB_DBGID_M)
+
+/* IV generation in SW. */
+#define SCMD_IV_GEN_CTRL_S      30
+#define SCMD_IV_GEN_CTRL_M      0x1
+#define SCMD_IV_GEN_CTRL_V(x)   ((x) << SCMD_IV_GEN_CTRL_S)
+#define SCMD_IV_GEN_CTRL_G(x)   \
+	(((x) >> SCMD_IV_GEN_CTRL_S) & SCMD_IV_GEN_CTRL_M)
+#define SCMD_IV_GEN_CTRL_F  SCMD_IV_GEN_CTRL_V(1U)
+
+/* More frags */
+#define SCMD_MORE_FRAGS_S   20
+#define SCMD_MORE_FRAGS_M   0x1
+#define SCMD_MORE_FRAGS_V(x)    ((x) << SCMD_MORE_FRAGS_S)
+#define SCMD_MORE_FRAGS_G(x)    (((x) >> SCMD_MORE_FRAGS_S) & SCMD_MORE_FRAGS_M)
+
+/*last frag */
+#define SCMD_LAST_FRAG_S    19
+#define SCMD_LAST_FRAG_M    0x1
+#define SCMD_LAST_FRAG_V(x) ((x) << SCMD_LAST_FRAG_S)
+#define SCMD_LAST_FRAG_G(x) (((x) >> SCMD_LAST_FRAG_S) & SCMD_LAST_FRAG_M)
+
+/* TlsCompPdu */
+#define SCMD_TLS_COMPPDU_S    18
+#define SCMD_TLS_COMPPDU_M    0x1
+#define SCMD_TLS_COMPPDU_V(x) ((x) << SCMD_TLS_COMPPDU_S)
+#define SCMD_TLS_COMPPDU_G(x) (((x) >> SCMD_TLS_COMPPDU_S) & SCMD_TLS_COMPPDU_M)
+
+/* KeyCntxtInline - Key context inline after the scmd  OR PayloadOnly*/
+#define SCMD_KEY_CTX_INLINE_S   17
+#define SCMD_KEY_CTX_INLINE_M   0x1
+#define SCMD_KEY_CTX_INLINE_V(x)    ((x) << SCMD_KEY_CTX_INLINE_S)
+#define SCMD_KEY_CTX_INLINE_G(x)    \
+	(((x) >> SCMD_KEY_CTX_INLINE_S) & SCMD_KEY_CTX_INLINE_M)
+#define SCMD_KEY_CTX_INLINE_F   SCMD_KEY_CTX_INLINE_V(1U)
+
+/* TLSFragEnable - 0: Host created TLS PDUs, 1: TLS Framgmentation in ASIC */
+#define SCMD_TLS_FRAG_ENABLE_S  16
+#define SCMD_TLS_FRAG_ENABLE_M  0x1
+#define SCMD_TLS_FRAG_ENABLE_V(x)   ((x) << SCMD_TLS_FRAG_ENABLE_S)
+#define SCMD_TLS_FRAG_ENABLE_G(x)   \
+	(((x) >> SCMD_TLS_FRAG_ENABLE_S) & SCMD_TLS_FRAG_ENABLE_M)
+#define SCMD_TLS_FRAG_ENABLE_F  SCMD_TLS_FRAG_ENABLE_V(1U)
+
+/* MacOnly - Only send the MAC and discard PDU. This is valid for hash only
+ * modes, in this case TLS_TX  will drop the PDU and only
+ * send back the MAC bytes.
+ */
+#define SCMD_MAC_ONLY_S 15
+#define SCMD_MAC_ONLY_M 0x1
+#define SCMD_MAC_ONLY_V(x)  ((x) << SCMD_MAC_ONLY_S)
+#define SCMD_MAC_ONLY_G(x)  \
+	(((x) >> SCMD_MAC_ONLY_S) & SCMD_MAC_ONLY_M)
+#define SCMD_MAC_ONLY_F SCMD_MAC_ONLY_V(1U)
+
+/* AadIVDrop - Drop the AAD and IV fields. Useful in protocols
+ * which have complex AAD and IV formations Eg:AES-CCM
+ */
+#define SCMD_AADIVDROP_S 14
+#define SCMD_AADIVDROP_M 0x1
+#define SCMD_AADIVDROP_V(x)  ((x) << SCMD_AADIVDROP_S)
+#define SCMD_AADIVDROP_G(x)  \
+	(((x) >> SCMD_AADIVDROP_S) & SCMD_AADIVDROP_M)
+#define SCMD_AADIVDROP_F SCMD_AADIVDROP_V(1U)
+
+/* HdrLength - Length of all headers excluding TLS header
+ * present before start of crypto PDU/payload.
+ */
+#define SCMD_HDR_LEN_S  0
+#define SCMD_HDR_LEN_M  0x3fff
+#define SCMD_HDR_LEN_V(x)   ((x) << SCMD_HDR_LEN_S)
+#define SCMD_HDR_LEN_G(x)   \
+	(((x) >> SCMD_HDR_LEN_S) & SCMD_HDR_LEN_M)
+
+struct cpl_tx_sec_pdu {
+	__be32 op_ivinsrtofst;
+	__be32 pldlen;
+	__be32 aadstart_cipherstop_hi;
+	__be32 cipherstop_lo_authinsert;
+	__be32 seqno_numivs;
+	__be32 ivgen_hdrlen;
+	__be64 scmd1;
+};
+
+#define CPL_TX_SEC_PDU_OPCODE_S     24
+#define CPL_TX_SEC_PDU_OPCODE_M     0xff
+#define CPL_TX_SEC_PDU_OPCODE_V(x)  ((x) << CPL_TX_SEC_PDU_OPCODE_S)
+#define CPL_TX_SEC_PDU_OPCODE_G(x)  \
+	(((x) >> CPL_TX_SEC_PDU_OPCODE_S) & CPL_TX_SEC_PDU_OPCODE_M)
+
+/* RX Channel Id */
+#define CPL_TX_SEC_PDU_RXCHID_S  22
+#define CPL_TX_SEC_PDU_RXCHID_M  0x1
+#define CPL_TX_SEC_PDU_RXCHID_V(x)   ((x) << CPL_TX_SEC_PDU_RXCHID_S)
+#define CPL_TX_SEC_PDU_RXCHID_G(x)   \
+	(((x) >> CPL_TX_SEC_PDU_RXCHID_S) & CPL_TX_SEC_PDU_RXCHID_M)
+#define CPL_TX_SEC_PDU_RXCHID_F  CPL_TX_SEC_PDU_RXCHID_V(1U)
+
+/* Ack Follows */
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_S  21
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_M  0x1
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_V(x)   ((x) << CPL_TX_SEC_PDU_ACKFOLLOWS_S)
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_G(x)   \
+	(((x) >> CPL_TX_SEC_PDU_ACKFOLLOWS_S) & CPL_TX_SEC_PDU_ACKFOLLOWS_M)
+#define CPL_TX_SEC_PDU_ACKFOLLOWS_F  CPL_TX_SEC_PDU_ACKFOLLOWS_V(1U)
+
+/* Loopback bit in cpl_tx_sec_pdu */
+#define CPL_TX_SEC_PDU_ULPTXLPBK_S  20
+#define CPL_TX_SEC_PDU_ULPTXLPBK_M  0x1
+#define CPL_TX_SEC_PDU_ULPTXLPBK_V(x)   ((x) << CPL_TX_SEC_PDU_ULPTXLPBK_S)
+#define CPL_TX_SEC_PDU_ULPTXLPBK_G(x)   \
+	(((x) >> CPL_TX_SEC_PDU_ULPTXLPBK_S) & CPL_TX_SEC_PDU_ULPTXLPBK_M)
+#define CPL_TX_SEC_PDU_ULPTXLPBK_F  CPL_TX_SEC_PDU_ULPTXLPBK_V(1U)
+
+/* Length of cpl header encapsulated */
+#define CPL_TX_SEC_PDU_CPLLEN_S     16
+#define CPL_TX_SEC_PDU_CPLLEN_M     0xf
+#define CPL_TX_SEC_PDU_CPLLEN_V(x)  ((x) << CPL_TX_SEC_PDU_CPLLEN_S)
+#define CPL_TX_SEC_PDU_CPLLEN_G(x)  \
+	(((x) >> CPL_TX_SEC_PDU_CPLLEN_S) & CPL_TX_SEC_PDU_CPLLEN_M)
+
+/* PlaceHolder */
+#define CPL_TX_SEC_PDU_PLACEHOLDER_S    10
+#define CPL_TX_SEC_PDU_PLACEHOLDER_M    0x1
+#define CPL_TX_SEC_PDU_PLACEHOLDER_V(x) ((x) << CPL_TX_SEC_PDU_PLACEHOLDER_S)
+#define CPL_TX_SEC_PDU_PLACEHOLDER_G(x) \
+	(((x) >> CPL_TX_SEC_PDU_PLACEHOLDER_S) & \
+	 CPL_TX_SEC_PDU_PLACEHOLDER_M)
+
+/* IvInsrtOffset: Insertion location for IV */
+#define CPL_TX_SEC_PDU_IVINSRTOFST_S    0
+#define CPL_TX_SEC_PDU_IVINSRTOFST_M    0x3ff
+#define CPL_TX_SEC_PDU_IVINSRTOFST_V(x) ((x) << CPL_TX_SEC_PDU_IVINSRTOFST_S)
+#define CPL_TX_SEC_PDU_IVINSRTOFST_G(x) \
+	(((x) >> CPL_TX_SEC_PDU_IVINSRTOFST_S) & \
+	 CPL_TX_SEC_PDU_IVINSRTOFST_M)
+
+/* AadStartOffset: Offset in bytes for AAD start from
+ * the first byte following the pkt headers (0-255 bytes)
+ */
+#define CPL_TX_SEC_PDU_AADSTART_S   24
+#define CPL_TX_SEC_PDU_AADSTART_M   0xff
+#define CPL_TX_SEC_PDU_AADSTART_V(x)    ((x) << CPL_TX_SEC_PDU_AADSTART_S)
+#define CPL_TX_SEC_PDU_AADSTART_G(x)    \
+	(((x) >> CPL_TX_SEC_PDU_AADSTART_S) & \
+	 CPL_TX_SEC_PDU_AADSTART_M)
+
+/* AadStopOffset: offset in bytes for AAD stop/end from the first byte following
+ * the pkt headers (0-511 bytes)
+ */
+#define CPL_TX_SEC_PDU_AADSTOP_S    15
+#define CPL_TX_SEC_PDU_AADSTOP_M    0x1ff
+#define CPL_TX_SEC_PDU_AADSTOP_V(x) ((x) << CPL_TX_SEC_PDU_AADSTOP_S)
+#define CPL_TX_SEC_PDU_AADSTOP_G(x) \
+	(((x) >> CPL_TX_SEC_PDU_AADSTOP_S) & CPL_TX_SEC_PDU_AADSTOP_M)
+
+/* CipherStartOffset: offset in bytes for encryption/decryption start from the
+ * first byte following the pkt headers (0-1023 bytes)
+ */
+#define CPL_TX_SEC_PDU_CIPHERSTART_S    5
+#define CPL_TX_SEC_PDU_CIPHERSTART_M    0x3ff
+#define CPL_TX_SEC_PDU_CIPHERSTART_V(x) ((x) << CPL_TX_SEC_PDU_CIPHERSTART_S)
+#define CPL_TX_SEC_PDU_CIPHERSTART_G(x) \
+	(((x) >> CPL_TX_SEC_PDU_CIPHERSTART_S) & \
+	 CPL_TX_SEC_PDU_CIPHERSTART_M)
+
+/* CipherStopOffset: offset in bytes for encryption/decryption end
+ * from end of the payload of this command (0-511 bytes)
+ */
+#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_S      0
+#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_M      0x1f
+#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_V(x)   \
+	((x) << CPL_TX_SEC_PDU_CIPHERSTOP_HI_S)
+#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_G(x)   \
+	(((x) >> CPL_TX_SEC_PDU_CIPHERSTOP_HI_S) & \
+	 CPL_TX_SEC_PDU_CIPHERSTOP_HI_M)
+
+#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_S      28
+#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_M      0xf
+#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_V(x)   \
+	((x) << CPL_TX_SEC_PDU_CIPHERSTOP_LO_S)
+#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_G(x)   \
+	(((x) >> CPL_TX_SEC_PDU_CIPHERSTOP_LO_S) & \
+	 CPL_TX_SEC_PDU_CIPHERSTOP_LO_M)
+
+/* AuthStartOffset: offset in bytes for authentication start from
+ * the first byte following the pkt headers (0-1023)
+ */
+#define CPL_TX_SEC_PDU_AUTHSTART_S  18
+#define CPL_TX_SEC_PDU_AUTHSTART_M  0x3ff
+#define CPL_TX_SEC_PDU_AUTHSTART_V(x)   ((x) << CPL_TX_SEC_PDU_AUTHSTART_S)
+#define CPL_TX_SEC_PDU_AUTHSTART_G(x)   \
+	(((x) >> CPL_TX_SEC_PDU_AUTHSTART_S) & \
+	 CPL_TX_SEC_PDU_AUTHSTART_M)
+
+/* AuthStopOffset: offset in bytes for authentication
+ * end from end of the payload of this command (0-511 Bytes)
+ */
+#define CPL_TX_SEC_PDU_AUTHSTOP_S   9
+#define CPL_TX_SEC_PDU_AUTHSTOP_M   0x1ff
+#define CPL_TX_SEC_PDU_AUTHSTOP_V(x)    ((x) << CPL_TX_SEC_PDU_AUTHSTOP_S)
+#define CPL_TX_SEC_PDU_AUTHSTOP_G(x)    \
+	(((x) >> CPL_TX_SEC_PDU_AUTHSTOP_S) & \
+	 CPL_TX_SEC_PDU_AUTHSTOP_M)
+
+/* AuthInsrtOffset: offset in bytes for authentication insertion
+ * from end of the payload of this command (0-511 bytes)
+ */
+#define CPL_TX_SEC_PDU_AUTHINSERT_S 0
+#define CPL_TX_SEC_PDU_AUTHINSERT_M 0x1ff
+#define CPL_TX_SEC_PDU_AUTHINSERT_V(x)  ((x) << CPL_TX_SEC_PDU_AUTHINSERT_S)
+#define CPL_TX_SEC_PDU_AUTHINSERT_G(x)  \
+	(((x) >> CPL_TX_SEC_PDU_AUTHINSERT_S) & \
+	 CPL_TX_SEC_PDU_AUTHINSERT_M)
+
+struct cpl_rx_phys_dsgl {
+	__be32 op_to_tid;
+	__be32 pcirlxorder_to_noofsgentr;
+	struct rss_header rss_hdr_int;
+};
+
+#define CPL_RX_PHYS_DSGL_OPCODE_S       24
+#define CPL_RX_PHYS_DSGL_OPCODE_M       0xff
+#define CPL_RX_PHYS_DSGL_OPCODE_V(x)    ((x) << CPL_RX_PHYS_DSGL_OPCODE_S)
+#define CPL_RX_PHYS_DSGL_OPCODE_G(x)    \
+	(((x) >> CPL_RX_PHYS_DSGL_OPCODE_S) & CPL_RX_PHYS_DSGL_OPCODE_M)
+
+#define CPL_RX_PHYS_DSGL_ISRDMA_S       23
+#define CPL_RX_PHYS_DSGL_ISRDMA_M       0x1
+#define CPL_RX_PHYS_DSGL_ISRDMA_V(x)    ((x) << CPL_RX_PHYS_DSGL_ISRDMA_S)
+#define CPL_RX_PHYS_DSGL_ISRDMA_G(x)    \
+	(((x) >> CPL_RX_PHYS_DSGL_ISRDMA_S) & CPL_RX_PHYS_DSGL_ISRDMA_M)
+#define CPL_RX_PHYS_DSGL_ISRDMA_F       CPL_RX_PHYS_DSGL_ISRDMA_V(1U)
+
+#define CPL_RX_PHYS_DSGL_RSVD1_S        20
+#define CPL_RX_PHYS_DSGL_RSVD1_M        0x7
+#define CPL_RX_PHYS_DSGL_RSVD1_V(x)     ((x) << CPL_RX_PHYS_DSGL_RSVD1_S)
+#define CPL_RX_PHYS_DSGL_RSVD1_G(x)     \
+	(((x) >> CPL_RX_PHYS_DSGL_RSVD1_S) & \
+	 CPL_RX_PHYS_DSGL_RSVD1_M)
+
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_S          31
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_M          0x1
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_V(x)       \
+	((x) << CPL_RX_PHYS_DSGL_PCIRLXORDER_S)
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_G(x)       \
+	(((x) >> CPL_RX_PHYS_DSGL_PCIRLXORDER_S) & \
+	 CPL_RX_PHYS_DSGL_PCIRLXORDER_M)
+#define CPL_RX_PHYS_DSGL_PCIRLXORDER_F  CPL_RX_PHYS_DSGL_PCIRLXORDER_V(1U)
+
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_S           30
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_M           0x1
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_V(x)        \
+	((x) << CPL_RX_PHYS_DSGL_PCINOSNOOP_S)
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_G(x)        \
+	(((x) >> CPL_RX_PHYS_DSGL_PCINOSNOOP_S) & \
+	 CPL_RX_PHYS_DSGL_PCINOSNOOP_M)
+
+#define CPL_RX_PHYS_DSGL_PCINOSNOOP_F   CPL_RX_PHYS_DSGL_PCINOSNOOP_V(1U)
+
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_S          29
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_M          0x1
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_V(x)       \
+	((x) << CPL_RX_PHYS_DSGL_PCITPHNTENB_S)
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_G(x)       \
+	(((x) >> CPL_RX_PHYS_DSGL_PCITPHNTENB_S) & \
+	 CPL_RX_PHYS_DSGL_PCITPHNTENB_M)
+#define CPL_RX_PHYS_DSGL_PCITPHNTENB_F  CPL_RX_PHYS_DSGL_PCITPHNTENB_V(1U)
+
+#define CPL_RX_PHYS_DSGL_PCITPHNT_S     27
+#define CPL_RX_PHYS_DSGL_PCITPHNT_M     0x3
+#define CPL_RX_PHYS_DSGL_PCITPHNT_V(x)  ((x) << CPL_RX_PHYS_DSGL_PCITPHNT_S)
+#define CPL_RX_PHYS_DSGL_PCITPHNT_G(x)  \
+	(((x) >> CPL_RX_PHYS_DSGL_PCITPHNT_S) & \
+	 CPL_RX_PHYS_DSGL_PCITPHNT_M)
+
+#define CPL_RX_PHYS_DSGL_DCAID_S        16
+#define CPL_RX_PHYS_DSGL_DCAID_M        0x7ff
+#define CPL_RX_PHYS_DSGL_DCAID_V(x)     ((x) << CPL_RX_PHYS_DSGL_DCAID_S)
+#define CPL_RX_PHYS_DSGL_DCAID_G(x)     \
+	(((x) >> CPL_RX_PHYS_DSGL_DCAID_S) & \
+	 CPL_RX_PHYS_DSGL_DCAID_M)
+
+#define CPL_RX_PHYS_DSGL_NOOFSGENTR_S           0
+#define CPL_RX_PHYS_DSGL_NOOFSGENTR_M           0xffff
+#define CPL_RX_PHYS_DSGL_NOOFSGENTR_V(x)        \
+	((x) << CPL_RX_PHYS_DSGL_NOOFSGENTR_S)
+#define CPL_RX_PHYS_DSGL_NOOFSGENTR_G(x)        \
+	(((x) >> CPL_RX_PHYS_DSGL_NOOFSGENTR_S) & \
+	 CPL_RX_PHYS_DSGL_NOOFSGENTR_M)
+
 #endif  /* __T4_MSG_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index a89b307..4b58b32 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the Chelsio T4 Ethernet driver for Linux.
  *
- * Copyright (c) 2009-2014 Chelsio Communications, Inc. All rights reserved.
+ * Copyright (c) 2009-2016 Chelsio Communications, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -102,6 +102,7 @@
 	FW_RI_FR_NSMR_WR               = 0x19,
 	FW_RI_INV_LSTAG_WR             = 0x1a,
 	FW_ISCSI_TX_DATA_WR	       = 0x45,
+	FW_CRYPTO_LOOKASIDE_WR         = 0X6d,
 	FW_LASTC2E_WR                  = 0x70
 };
 
@@ -680,6 +681,7 @@
 	FW_RSS_IND_TBL_CMD             = 0x20,
 	FW_RSS_GLB_CONFIG_CMD          = 0x22,
 	FW_RSS_VI_CONFIG_CMD           = 0x23,
+	FW_SCHED_CMD                   = 0x24,
 	FW_DEVLOG_CMD                  = 0x25,
 	FW_CLIP_CMD                    = 0x28,
 	FW_LASTC2E_CMD                 = 0x40,
@@ -1060,7 +1062,7 @@
 	__be16 niccaps;
 	__be16 ofldcaps;
 	__be16 rdmacaps;
-	__be16 r4;
+	__be16 cryptocaps;
 	__be16 iscsicaps;
 	__be16 fcoecaps;
 	__be32 cfcsum;
@@ -2265,6 +2267,12 @@
 	FW_PORT_CAP_802_3_ASM_DIR	= 0x8000,
 };
 
+#define FW_PORT_CAP_SPEED_S     0
+#define FW_PORT_CAP_SPEED_M     0x3f
+#define FW_PORT_CAP_SPEED_V(x)  ((x) << FW_PORT_CAP_SPEED_S)
+#define FW_PORT_CAP_SPEED_G(x) \
+	(((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M)
+
 enum fw_port_mdi {
 	FW_PORT_CAP_MDI_UNCHANGED,
 	FW_PORT_CAP_MDI_AUTO,
@@ -2961,6 +2969,41 @@
 #define FW_RSS_VI_CONFIG_CMD_UDPEN_V(x)	((x) << FW_RSS_VI_CONFIG_CMD_UDPEN_S)
 #define FW_RSS_VI_CONFIG_CMD_UDPEN_F	FW_RSS_VI_CONFIG_CMD_UDPEN_V(1U)
 
+enum fw_sched_sc {
+	FW_SCHED_SC_PARAMS		= 1,
+};
+
+struct fw_sched_cmd {
+	__be32 op_to_write;
+	__be32 retval_len16;
+	union fw_sched {
+		struct fw_sched_config {
+			__u8   sc;
+			__u8   type;
+			__u8   minmaxen;
+			__u8   r3[5];
+			__u8   nclasses[4];
+			__be32 r4;
+		} config;
+		struct fw_sched_params {
+			__u8   sc;
+			__u8   type;
+			__u8   level;
+			__u8   mode;
+			__u8   unit;
+			__u8   rate;
+			__u8   ch;
+			__u8   cl;
+			__be32 min;
+			__be32 max;
+			__be16 weight;
+			__be16 pktsize;
+			__be16 burstsize;
+			__be16 r4;
+		} params;
+	} u;
+};
+
 struct fw_clip_cmd {
 	__be32 op_to_write;
 	__be32 alloc_to_len16;
@@ -3249,4 +3292,127 @@
 #define PCIE_FW_PF_DEVLOG_MEMTYPE_G(x) \
 	(((x) >> PCIE_FW_PF_DEVLOG_MEMTYPE_S) & PCIE_FW_PF_DEVLOG_MEMTYPE_M)
 
+#define MAX_IMM_OFLD_TX_DATA_WR_LEN (0xff + sizeof(struct fw_ofld_tx_data_wr))
+
+struct fw_crypto_lookaside_wr {
+	__be32 op_to_cctx_size;
+	__be32 len16_pkd;
+	__be32 session_id;
+	__be32 rx_chid_to_rx_q_id;
+	__be32 key_addr;
+	__be32 pld_size_hash_size;
+	__be64 cookie;
+};
+
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_S 24
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_OPCODE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_OPCODE_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_OPCODE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_S 23
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_M 0x1
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_COMPL_S)
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_COMPL_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_COMPL_M)
+#define FW_CRYPTO_LOOKASIDE_WR_COMPL_F FW_CRYPTO_LOOKASIDE_WR_COMPL_V(1U)
+
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S 15
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S)
+#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S 5
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S)
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M 0x1f
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_LEN16_S)
+#define FW_CRYPTO_LOOKASIDE_WR_LEN16_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_LEN16_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_LEN16_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S 29
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S)
+#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_S  27
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_M  0x3
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_LCB_S)
+#define FW_CRYPTO_LOOKASIDE_WR_LCB_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_LCB_S) & FW_CRYPTO_LOOKASIDE_WR_LCB_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_S 25
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_PHASH_S)
+#define FW_CRYPTO_LOOKASIDE_WR_PHASH_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_PHASH_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_PHASH_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_IV_S   23
+#define FW_CRYPTO_LOOKASIDE_WR_IV_M   0x3
+#define FW_CRYPTO_LOOKASIDE_WR_IV_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_IV_S)
+#define FW_CRYPTO_LOOKASIDE_WR_IV_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_IV_S) & FW_CRYPTO_LOOKASIDE_WR_IV_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_S 10
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_M 0x3
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_TX_CH_S)
+#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_TX_CH_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_TX_CH_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S 0
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M 0x3ff
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S)
+#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S 24
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M 0xff
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M)
+
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S 17
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M 0x7f
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(x) \
+	((x) << FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S)
+#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_G(x) \
+	(((x) >> FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) & \
+	 FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M)
+
 #endif /* _T4FW_INTERFACE_H_ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index e116bb8..100b2cc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2378,7 +2378,7 @@
 	 */
 	pmask_nports = hweight32(adapter->params.vfres.pmask);
 	if (pmask_nports < adapter->params.nports) {
-		dev_warn(adapter->pdev_dev, "only using %d of %d provissioned"
+		dev_warn(adapter->pdev_dev, "only using %d of %d provisioned"
 			 " virtual interfaces; limited by Port Access Rights"
 			 " mask %#x\n", pmask_nports, adapter->params.nports,
 			 adapter->params.vfres.pmask);
@@ -2777,6 +2777,7 @@
 	struct adapter *adapter;
 	struct port_info *pi;
 	struct net_device *netdev;
+	unsigned int pf;
 
 	/*
 	 * Print our driver banner the first time we're called to initialize a
@@ -2903,8 +2904,11 @@
 	 * Allocate our "adapter ports" and stitch everything together.
 	 */
 	pmask = adapter->params.vfres.pmask;
+	pf = t4vf_get_pf_from_vf(adapter);
 	for_each_port(adapter, pidx) {
 		int port_id, viid;
+		u8 mac[ETH_ALEN];
+		unsigned int naddr = 1;
 
 		/*
 		 * We simplistically allocate our virtual interfaces
@@ -2975,6 +2979,26 @@
 				pidx);
 			goto err_free_dev;
 		}
+
+		err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac);
+		if (err) {
+			dev_err(&pdev->dev,
+				"unable to determine MAC ACL address, "
+				"continuing anyway.. (status %d)\n", err);
+		} else if (naddr && adapter->params.vfres.nvi == 1) {
+			struct sockaddr addr;
+
+			ether_addr_copy(addr.sa_data, mac);
+			err = cxgb4vf_set_mac_addr(netdev, &addr);
+			if (err) {
+				dev_err(&pdev->dev,
+					"unable to set MAC address %pM\n",
+					mac);
+				goto err_free_dev;
+			}
+			dev_info(&pdev->dev,
+				 "Using assigned MAC ACL: %pM\n", mac);
+		}
 	}
 
 	/* See what interrupts we'll be using.  If we've been configured to
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index c8fd4f8..f3ed9ce 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1648,14 +1648,15 @@
 
 	if (csum_ok && !pkt->err_vec &&
 	    (be32_to_cpu(pkt->l2info) & (RXF_UDP_F | RXF_TCP_F))) {
-		if (!pkt->ip_frag)
+		if (!pkt->ip_frag) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
-		else {
+			rxq->stats.rx_cso++;
+		} else if (pkt->l2info & htonl(RXF_IP_F)) {
 			__sum16 c = (__force __sum16)pkt->csum;
 			skb->csum = csum_unfold(c);
 			skb->ip_summed = CHECKSUM_COMPLETE;
+			rxq->stats.rx_cso++;
 		}
-		rxq->stats.rx_cso++;
 	} else
 		skb_checksum_none_assert(skb);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 8ee5414..b3903fe 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -108,8 +108,8 @@
 	unsigned int   supported;        /* link capabilities */
 	unsigned int   advertising;      /* advertised capabilities */
 	unsigned short lp_advertising;   /* peer advertised capabilities */
-	unsigned short requested_speed;  /* speed user has requested */
-	unsigned short speed;            /* actual link speed */
+	unsigned int   requested_speed;  /* speed user has requested */
+	unsigned int   speed;            /* actual link speed */
 	unsigned char  requested_fc;     /* flow control user has requested */
 	unsigned char  fc;               /* actual link flow control */
 	unsigned char  autoneg;          /* autonegotiating? */
@@ -271,10 +271,17 @@
 	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
 }
 
+/* Return true if the Link Configuration supports "High Speeds" (those greater
+ * than 1Gb/s).
+ */
 static inline bool is_x_10g_port(const struct link_config *lc)
 {
-	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
-		(lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
+	unsigned int speeds, high_speeds;
+
+	speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
+	high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
+
+	return high_speeds != 0;
 }
 
 static inline unsigned int core_ticks_per_usec(const struct adapter *adapter)
@@ -347,6 +354,7 @@
 			u64 *pbar2_qoffset,
 			unsigned int *pbar2_qid);
 
+unsigned int t4vf_get_pf_from_vf(struct adapter *);
 int t4vf_get_sge_params(struct adapter *);
 int t4vf_get_vpd_params(struct adapter *);
 int t4vf_get_dev_params(struct adapter *);
@@ -381,5 +389,7 @@
 
 int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
 int t4vf_prep_adapter(struct adapter *);
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+			unsigned int *naddr, u8 *addr);
 
 #endif /* __T4VF_COMMON_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 427bfa7..e98248f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -314,8 +314,9 @@
 }
 
 #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
-		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
-		     FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG)
+		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
+		     FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
+		     FW_PORT_CAP_ANEG)
 
 /**
  *	init_link_config - initialize a link's SW state
@@ -639,6 +640,15 @@
 	return 0;
 }
 
+unsigned int t4vf_get_pf_from_vf(struct adapter *adapter)
+{
+	u32 whoami;
+
+	whoami = t4_read_reg(adapter, T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
+	return (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+			SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami));
+}
+
 /**
  *	t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters
  *	@adapter: the adapter
@@ -716,7 +726,6 @@
 	 * read.
 	 */
 	if (!is_t4(adapter->params.chip)) {
-		u32 whoami;
 		unsigned int pf, s_hps, s_qpp;
 
 		params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) |
@@ -740,11 +749,7 @@
 		 * register we just read. Do it once here so other code in
 		 * the driver can just use it.
 		 */
-		whoami = t4_read_reg(adapter,
-				     T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
-		pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
-			SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
-
+		pf = t4vf_get_pf_from_vf(adapter);
 		s_hps = (HOSTPAGESIZEPF0_S +
 			 (HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * pf);
 		sge_params->sge_vf_hps =
@@ -1712,8 +1717,12 @@
 			speed = 1000;
 		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
 			speed = 10000;
+		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
+			speed = 25000;
 		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
 			speed = 40000;
+		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
+			speed = 100000;
 
 		/*
 		 * Scan all of our "ports" (Virtual Interfaces) looking for
@@ -1807,3 +1816,50 @@
 
 	return 0;
 }
+
+/**
+ *	t4vf_get_vf_mac_acl - Get the MAC address to be set to
+ *			      the VI of this VF.
+ *	@adapter: The adapter
+ *	@pf: The pf associated with vf
+ *	@naddr: the number of ACL MAC addresses returned in addr
+ *	@addr: Placeholder for MAC addresses
+ *
+ *	Find the MAC address to be set to the VF's VI. The requested MAC address
+ *	is from the host OS via callback in the PF driver.
+ */
+int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+			unsigned int *naddr, u8 *addr)
+{
+	struct fw_acl_mac_cmd cmd;
+	int ret;
+
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+				    FW_CMD_REQUEST_F |
+				    FW_CMD_READ_F);
+	cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+	ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd);
+	if (ret)
+		return ret;
+
+	if (cmd.nmac < *naddr)
+		*naddr = cmd.nmac;
+
+	switch (pf) {
+	case 3:
+		memcpy(addr, cmd.macaddr3, sizeof(cmd.macaddr3));
+		break;
+	case 2:
+		memcpy(addr, cmd.macaddr2, sizeof(cmd.macaddr2));
+		break;
+	case 1:
+		memcpy(addr, cmd.macaddr1, sizeof(cmd.macaddr1));
+		break;
+	case 0:
+		memcpy(addr, cmd.macaddr0, sizeof(cmd.macaddr0));
+		break;
+	}
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/chelsio/libcxgb/Makefile b/drivers/net/ethernet/chelsio/libcxgb/Makefile
index 2362230..2534e30 100644
--- a/drivers/net/ethernet/chelsio/libcxgb/Makefile
+++ b/drivers/net/ethernet/chelsio/libcxgb/Makefile
@@ -1,3 +1,5 @@
+ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4
+
 obj-$(CONFIG_CHELSIO_LIB) += libcxgb.o
 
-libcxgb-y := libcxgb_ppm.o
+libcxgb-y := libcxgb_ppm.o libcxgb_cm.o
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
new file mode 100644
index 0000000..0f0de5b
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/tcp.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
+
+#include "libcxgb_cm.h"
+
+void
+cxgb_get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
+		int *iptype, __u8 *local_ip, __u8 *peer_ip,
+		__be16 *local_port, __be16 *peer_port)
+{
+	int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+		      ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+		      T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+	int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+		     IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+		     T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+	struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
+	struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
+	struct tcphdr *tcp = (struct tcphdr *)
+			     ((u8 *)(req + 1) + eth_len + ip_len);
+
+	if (ip->version == 4) {
+		pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n",
+			 __func__, ntohl(ip->saddr), ntohl(ip->daddr),
+			 ntohs(tcp->source), ntohs(tcp->dest));
+		*iptype = 4;
+		memcpy(peer_ip, &ip->saddr, 4);
+		memcpy(local_ip, &ip->daddr, 4);
+	} else {
+		pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n",
+			 __func__, ip6->saddr.s6_addr, ip6->daddr.s6_addr,
+			 ntohs(tcp->source), ntohs(tcp->dest));
+		*iptype = 6;
+		memcpy(peer_ip, ip6->saddr.s6_addr, 16);
+		memcpy(local_ip, ip6->daddr.s6_addr, 16);
+	}
+	*peer_port = tcp->source;
+	*local_port = tcp->dest;
+}
+EXPORT_SYMBOL(cxgb_get_4tuple);
+
+static bool
+cxgb_our_interface(struct cxgb4_lld_info *lldi,
+		   struct net_device *(*get_real_dev)(struct net_device *),
+		   struct net_device *egress_dev)
+{
+	int i;
+
+	egress_dev = get_real_dev(egress_dev);
+	for (i = 0; i < lldi->nports; i++)
+		if (lldi->ports[i] == egress_dev)
+			return true;
+	return false;
+}
+
+struct dst_entry *
+cxgb_find_route(struct cxgb4_lld_info *lldi,
+		struct net_device *(*get_real_dev)(struct net_device *),
+		__be32 local_ip, __be32 peer_ip, __be16 local_port,
+		__be16 peer_port, u8 tos)
+{
+	struct rtable *rt;
+	struct flowi4 fl4;
+	struct neighbour *n;
+
+	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
+				   peer_port, local_port, IPPROTO_TCP,
+				   tos, 0);
+	if (IS_ERR(rt))
+		return NULL;
+	n = dst_neigh_lookup(&rt->dst, &peer_ip);
+	if (!n)
+		return NULL;
+	if (!cxgb_our_interface(lldi, get_real_dev, n->dev) &&
+	    !(n->dev->flags & IFF_LOOPBACK)) {
+		neigh_release(n);
+		dst_release(&rt->dst);
+		return NULL;
+	}
+	neigh_release(n);
+	return &rt->dst;
+}
+EXPORT_SYMBOL(cxgb_find_route);
+
+struct dst_entry *
+cxgb_find_route6(struct cxgb4_lld_info *lldi,
+		 struct net_device *(*get_real_dev)(struct net_device *),
+		 __u8 *local_ip, __u8 *peer_ip, __be16 local_port,
+		 __be16 peer_port, u8 tos, __u32 sin6_scope_id)
+{
+	struct dst_entry *dst = NULL;
+
+	if (IS_ENABLED(CONFIG_IPV6)) {
+		struct flowi6 fl6;
+
+		memset(&fl6, 0, sizeof(fl6));
+		memcpy(&fl6.daddr, peer_ip, 16);
+		memcpy(&fl6.saddr, local_ip, 16);
+		if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
+			fl6.flowi6_oif = sin6_scope_id;
+		dst = ip6_route_output(&init_net, NULL, &fl6);
+		if (!dst)
+			goto out;
+		if (!cxgb_our_interface(lldi, get_real_dev,
+					ip6_dst_idev(dst)->dev) &&
+		    !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
+			dst_release(dst);
+			dst = NULL;
+		}
+	}
+
+out:
+	return dst;
+}
+EXPORT_SYMBOL(cxgb_find_route6);
diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
new file mode 100644
index 0000000..515b94f
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIBCXGB_CM_H__
+#define __LIBCXGB_CM_H__
+
+
+#include <net/tcp.h>
+
+#include <cxgb4.h>
+#include <t4_msg.h>
+#include <l2t.h>
+
+void
+cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type,
+		int *, __u8 *, __u8 *, __be16 *, __be16 *);
+struct dst_entry *
+cxgb_find_route(struct cxgb4_lld_info *,
+		struct net_device *(*)(struct net_device *),
+		__be32, __be32, __be16,	__be16, u8);
+struct dst_entry *
+cxgb_find_route6(struct cxgb4_lld_info *,
+		 struct net_device *(*)(struct net_device *),
+		 __u8 *, __u8 *, __be16, __be16, u8, __u32);
+
+/* Returns whether a CPL status conveys negative advice.
+ */
+static inline bool cxgb_is_neg_adv(unsigned int status)
+{
+	return status == CPL_ERR_RTX_NEG_ADVICE ||
+	       status == CPL_ERR_PERSIST_NEG_ADVICE ||
+	       status == CPL_ERR_KEEPALV_NEG_ADVICE;
+}
+
+static inline void
+cxgb_best_mtu(const unsigned short *mtus, unsigned short mtu,
+	      unsigned int *idx, int use_ts, int ipv6)
+{
+	unsigned short hdr_size = (ipv6 ?
+				   sizeof(struct ipv6hdr) :
+				   sizeof(struct iphdr)) +
+				  sizeof(struct tcphdr) +
+				  (use_ts ?
+				   round_up(TCPOLEN_TIMESTAMP, 4) : 0);
+	unsigned short data_size = mtu - hdr_size;
+
+	cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+}
+
+static inline u32 cxgb_compute_wscale(u32 win)
+{
+	u32 wscale = 0;
+
+	while (wscale < 14 && (65535 << wscale) < win)
+		wscale++;
+	return wscale;
+}
+
+static inline void
+cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
+{
+	struct cpl_tid_release *req;
+
+	req = (struct cpl_tid_release *)__skb_put(skb, len);
+	memset(req, 0, len);
+
+	INIT_TP_WR(req, tid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid));
+	set_wr_txq(skb, CPL_PRIORITY_SETUP, chan);
+}
+
+static inline void
+cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
+		      void *handle, arp_err_handler_t handler)
+{
+	struct cpl_close_con_req *req;
+
+	req = (struct cpl_close_con_req *)__skb_put(skb, len);
+	memset(req, 0, len);
+
+	INIT_TP_WR(req, tid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
+	set_wr_txq(skb, CPL_PRIORITY_DATA, chan);
+	t4_set_arp_err_handler(skb, handle, handler);
+}
+
+static inline void
+cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
+		  void *handle, arp_err_handler_t handler)
+{
+	struct cpl_abort_req *req;
+
+	req = (struct cpl_abort_req *)__skb_put(skb, len);
+	memset(req, 0, len);
+
+	INIT_TP_WR(req, tid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, tid));
+	req->cmd = CPL_ABORT_SEND_RST;
+	set_wr_txq(skb, CPL_PRIORITY_DATA, chan);
+	t4_set_arp_err_handler(skb, handle, handler);
+}
+
+static inline void
+cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan)
+{
+	struct cpl_abort_rpl *rpl;
+
+	rpl = (struct cpl_abort_rpl *)__skb_put(skb, len);
+	memset(rpl, 0, len);
+
+	INIT_TP_WR(rpl, tid);
+	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
+	rpl->cmd = CPL_ABORT_NO_RST;
+	set_wr_txq(skb, CPL_PRIORITY_DATA, chan);
+}
+
+static inline void
+cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan,
+		    u32 credit_dack)
+{
+	struct cpl_rx_data_ack *req;
+
+	req = (struct cpl_rx_data_ack *)__skb_put(skb, len);
+	memset(req, 0, len);
+
+	INIT_TP_WR(req, tid);
+	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid));
+	req->credit_dack = cpu_to_be32(credit_dack);
+	set_wr_txq(skb, CPL_PRIORITY_ACK, chan);
+}
+#endif
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 1471e16..f45385f 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1299,6 +1299,7 @@
 dm9000_open(struct net_device *dev)
 {
 	struct board_info *db = netdev_priv(dev);
+	unsigned int irq_flags = irq_get_trigger_type(dev->irq);
 
 	if (netif_msg_ifup(db))
 		dev_dbg(db->dev, "enabling %s\n", dev->name);
@@ -1306,9 +1307,11 @@
 	/* If there is no IRQ type specified, tell the user that this is a
 	 * problem
 	 */
-	if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE)
+	if (irq_flags == IRQF_TRIGGER_NONE)
 		dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n");
 
+	irq_flags |= IRQF_SHARED;
+
 	/* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */
 	iow(db, DM9000_GPR, 0);	/* REG_1F bit0 activate phyxcer */
 	mdelay(1); /* delay needs by DM9000B */
@@ -1316,8 +1319,7 @@
 	/* Initialize DM9000 board */
 	dm9000_init_dm9000(dev);
 
-	if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED,
-			dev->name, dev))
+	if (request_irq(dev->irq, dm9000_interrupt, irq_flags, dev->name, dev))
 		return -EAGAIN;
 	/* Now that we have an interrupt handler hooked up we can unmask
 	 * our interrupts
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index f0e9e2e..6620fc8 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -1966,7 +1966,7 @@
     } else if (lp->setup_f == HASH_PERF) {   /* Hash Filtering */
 	netdev_for_each_mc_addr(ha, dev) {
 		crc = ether_crc_le(ETH_ALEN, ha->addr);
-		hashcode = crc & HASH_BITS;  /* hashcode is 9 LSb of CRC */
+		hashcode = crc & DE4X5_HASH_BITS;  /* hashcode is 9 LSb of CRC */
 
 		byte = hashcode >> 3;        /* bit[3-8] -> byte in filter */
 		bit = 1 << (hashcode & 0x07);/* bit[0-2] -> bit in byte */
@@ -5043,7 +5043,7 @@
 	    *(pa + i) = dev->dev_addr[i];                 /* Host address */
 	    if (i & 0x01) pa += 2;
 	}
-	*(lp->setup_frame + (HASH_TABLE_LEN >> 3) - 3) = 0x80;
+	*(lp->setup_frame + (DE4X5_HASH_TABLE_LEN >> 3) - 3) = 0x80;
     } else {
 	for (i=0; i<ETH_ALEN; i++) { /* Host address */
 	    *(pa + (i&1)) = dev->dev_addr[i];
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.h b/drivers/net/ethernet/dec/tulip/de4x5.h
index ec756eb..1bfdc9b 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.h
+++ b/drivers/net/ethernet/dec/tulip/de4x5.h
@@ -860,8 +860,8 @@
 #define PCI  0
 #define EISA 1
 
-#define HASH_TABLE_LEN   512       /* Bits */
-#define HASH_BITS        0x01ff    /* 9 LS bits */
+#define DE4X5_HASH_TABLE_LEN   512       /* Bits */
+#define DE4X5_HASH_BITS        0x01ff    /* 9 LS bits */
 
 #define SETUP_FRAME_LEN  192       /* Bytes */
 #define IMPERF_PA_OFFSET 156       /* Bytes */
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 58c6338..79d8009 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -867,7 +867,7 @@
 
 	/* Initialize other registers. */
 	__set_mac_addr(dev);
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 	iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize);
 #else
 	iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize);
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 4555e04..6cfa63a 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -37,7 +37,7 @@
 #include "be_hw.h"
 #include "be_roce.h"
 
-#define DRV_VER			"11.0.0.0"
+#define DRV_VER			"11.1.0.0"
 #define DRV_NAME		"be2net"
 #define BE_NAME			"Emulex BladeEngine2"
 #define BE3_NAME		"Emulex BladeEngine3"
@@ -399,13 +399,13 @@
 #define BE_FLAGS_PHY_MISCONFIGURED		BIT(10)
 #define BE_FLAGS_ERR_DETECTION_SCHEDULED	BIT(11)
 #define BE_FLAGS_OS2BMC				BIT(12)
+#define BE_FLAGS_TRY_RECOVERY			BIT(13)
 
 #define BE_UC_PMAC_COUNT			30
 #define BE_VF_UC_PMAC_COUNT			2
 
 #define MAX_ERR_RECOVERY_RETRY_COUNT		3
 #define ERR_DETECTION_DELAY			1000
-#define ERR_RECOVERY_RETRY_DELAY		30000
 
 /* Ethtool set_dump flags */
 #define LANCER_INITIATE_FW_DUMP			0x1
@@ -508,6 +508,70 @@
 	u16 lso_mss;	/* MSS for LSO */
 };
 
+struct be_eth_addr {
+	unsigned char mac[ETH_ALEN];
+};
+
+#define BE_SEC	1000			/* in msec */
+#define BE_MIN	(60 * BE_SEC)		/* in msec */
+#define BE_HOUR	(60 * BE_MIN)		/* in msec */
+
+#define ERR_RECOVERY_MAX_RETRY_COUNT		3
+#define ERR_RECOVERY_DETECTION_DELAY		BE_SEC
+#define ERR_RECOVERY_RETRY_DELAY		(30 * BE_SEC)
+
+/* UE-detection-duration in BEx/Skyhawk:
+ * All PFs must wait for this duration after they detect UE before reading
+ * SLIPORT_SEMAPHORE register. At the end of this duration, the Firmware
+ * guarantees that the SLIPORT_SEMAPHORE register is updated to indicate
+ * if the UE is recoverable.
+ */
+#define ERR_RECOVERY_UE_DETECT_DURATION			BE_SEC
+
+/* Initial idle time (in msec) to elapse after driver load,
+ * before UE recovery is allowed.
+ */
+#define ERR_IDLE_HR			24
+#define ERR_RECOVERY_IDLE_TIME		(ERR_IDLE_HR * BE_HOUR)
+
+/* Time interval (in msec) after which UE recovery can be repeated */
+#define ERR_INTERVAL_HR			72
+#define ERR_RECOVERY_INTERVAL		(ERR_INTERVAL_HR * BE_HOUR)
+
+/* BEx/SH UE recovery state machine */
+enum {
+	ERR_RECOVERY_ST_NONE = 0,		/* No Recovery */
+	ERR_RECOVERY_ST_DETECT = 1,		/* UE detection duration */
+	ERR_RECOVERY_ST_RESET = 2,		/* Reset Phase (PF0 only) */
+	ERR_RECOVERY_ST_PRE_POLL = 3,		/* Pre-Poll Phase (all PFs) */
+	ERR_RECOVERY_ST_REINIT = 4		/* Re-initialize Phase */
+};
+
+struct be_error_recovery {
+	/* Lancer error recovery variables */
+	u8 recovery_retries;
+
+	/* BEx/Skyhawk error recovery variables */
+	u8 recovery_state;
+	u16 ue_to_reset_time;		/* Time after UE, to soft reset
+					 * the chip - PF0 only
+					 */
+	u16 ue_to_poll_time;		/* Time after UE, to Restart Polling
+					 * of SLIPORT_SEMAPHORE reg
+					 */
+	u16 last_err_code;
+	bool recovery_supported;
+	unsigned long probe_time;
+	unsigned long last_recovery_time;
+
+	/* Common to both Lancer & BEx/SH error recovery */
+	u32 resched_delay;
+	struct delayed_work err_detection_work;
+};
+
+/* Ethtool priv_flags */
+#define	BE_DISABLE_TPE_RECOVERY	0x1
+
 struct be_adapter {
 	struct pci_dev *pdev;
 	struct net_device *netdev;
@@ -523,7 +587,7 @@
 	struct be_dma_mem mbox_mem_alloced;
 
 	struct be_mcc_obj mcc_obj;
-	spinlock_t mcc_lock;	/* For serializing mcc cmds to BE card */
+	struct mutex mcc_lock;	/* For serializing mcc cmds to BE card */
 	spinlock_t mcc_cq_lock;
 
 	u16 cfg_num_rx_irqs;		/* configured via set-channels */
@@ -556,7 +620,6 @@
 	struct delayed_work work;
 	u16 work_counter;
 
-	struct delayed_work be_err_detection_work;
 	u8 recovery_retries;
 	u8 err_flags;
 	bool pcicfg_mapped;	/* pcicfg obtained via pci_iomap() */
@@ -570,9 +633,15 @@
 	int if_handle;		/* Used to configure filtering */
 	u32 if_flags;		/* Interface filtering flags */
 	u32 *pmac_id;		/* MAC addr handle used by BE card */
+	struct be_eth_addr *uc_list;/* list of uc-addrs programmed (not perm) */
 	u32 uc_macs;		/* Count of secondary UC MAC programmed */
+	struct be_eth_addr *mc_list;/* list of mcast addrs programmed */
+	u32 mc_count;
 	unsigned long vids[BITS_TO_LONGS(VLAN_N_VID)];
 	u16 vlans_added;
+	bool update_uc_list;
+	bool update_mc_list;
+	struct mutex rx_filter_lock;/* For protecting vids[] & mc/uc_list[] */
 
 	u32 beacon_state;	/* for set_phys_id */
 
@@ -624,6 +693,18 @@
 	u32 fat_dump_len;
 	u16 serial_num[CNTL_SERIAL_NUM_WORDS];
 	u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */
+	u8 dev_mac[ETH_ALEN];
+	u32 priv_flags; /* ethtool get/set_priv_flags() */
+	struct be_error_recovery error_recovery;
+};
+
+/* Used for defered FW config cmds. Add fields to this struct as reqd */
+struct be_cmd_work {
+	struct work_struct work;
+	struct be_adapter *adapter;
+	union {
+		__be16 vxlan_port;
+	} info;
 };
 
 #define be_physfn(adapter)		(!adapter->virtfn)
@@ -848,6 +929,9 @@
 	return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4;
 }
 
+#define be_error_recovering(adapter)	\
+		(adapter->flags & BE_FLAGS_TRY_RECOVERY)
+
 #define BE_ERROR_EEH		1
 #define BE_ERROR_UE		BIT(1)
 #define BE_ERROR_FW		BIT(2)
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 2cc1175..9cffe48 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -92,6 +92,11 @@
 		CMD_SUBSYSTEM_COMMON,
 		BE_PRIV_DEVCFG | BE_PRIV_VHADM
 	},
+	{
+		OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES,
+		CMD_SUBSYSTEM_COMMON,
+		BE_PRIV_DEVCFG
+	}
 };
 
 static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem)
@@ -571,7 +576,7 @@
 /* Wait till no more pending mcc requests are present */
 static int be_mcc_wait_compl(struct be_adapter *adapter)
 {
-#define mcc_timeout		120000 /* 12s timeout */
+#define mcc_timeout		12000 /* 12s timeout */
 	int i, status = 0;
 	struct be_mcc_obj *mcc_obj = &adapter->mcc_obj;
 
@@ -585,7 +590,7 @@
 
 		if (atomic_read(&mcc_obj->q.used) == 0)
 			break;
-		udelay(100);
+		usleep_range(500, 1000);
 	}
 	if (i == mcc_timeout) {
 		dev_err(&adapter->pdev->dev, "FW not responding\n");
@@ -705,7 +710,7 @@
 	return 0;
 }
 
-static u16 be_POST_stage_get(struct be_adapter *adapter)
+u16 be_POST_stage_get(struct be_adapter *adapter)
 {
 	u32 sem;
 
@@ -863,7 +868,7 @@
 static int be_cmd_lock(struct be_adapter *adapter)
 {
 	if (use_mcc(adapter)) {
-		spin_lock_bh(&adapter->mcc_lock);
+		mutex_lock(&adapter->mcc_lock);
 		return 0;
 	} else {
 		return mutex_lock_interruptible(&adapter->mbox_lock);
@@ -874,7 +879,7 @@
 static void be_cmd_unlock(struct be_adapter *adapter)
 {
 	if (use_mcc(adapter))
-		spin_unlock_bh(&adapter->mcc_lock);
+		return mutex_unlock(&adapter->mcc_lock);
 	else
 		return mutex_unlock(&adapter->mbox_lock);
 }
@@ -1044,7 +1049,7 @@
 	struct be_cmd_req_mac_query *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1073,7 +1078,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1085,7 +1090,7 @@
 	struct be_cmd_req_pmac_add *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1110,7 +1115,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	 if (status == MCC_STATUS_UNAUTHORIZED_REQUEST)
 		status = -EPERM;
@@ -1128,7 +1133,7 @@
 	if (pmac_id == -1)
 		return 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1148,7 +1153,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1411,7 +1416,7 @@
 	struct be_dma_mem *q_mem = &rxq->dma_mem;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1441,7 +1446,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1505,7 +1510,7 @@
 	struct be_cmd_req_q_destroy *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1522,7 +1527,7 @@
 	q->created = false;
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1590,7 +1595,7 @@
 	struct be_cmd_req_hdr *hdr;
 	int status = 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1618,7 +1623,7 @@
 	adapter->stats_cmd_sent = true;
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1634,7 +1639,7 @@
 			    CMD_SUBSYSTEM_ETH))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1657,7 +1662,7 @@
 	adapter->stats_cmd_sent = true;
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1694,7 +1699,7 @@
 	struct be_cmd_req_link_status *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	if (link_status)
 		*link_status = LINK_DOWN;
@@ -1733,7 +1738,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1744,7 +1749,7 @@
 	struct be_cmd_req_get_cntl_addnl_attribs *req;
 	int status = 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1759,7 +1764,7 @@
 
 	status = be_mcc_notify(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1808,7 +1813,7 @@
 	if (!get_fat_cmd.va)
 		return -ENOMEM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	while (total_size) {
 		buf_size = min(total_size, (u32)60*1024);
@@ -1848,7 +1853,7 @@
 err:
 	dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size,
 			  get_fat_cmd.va, get_fat_cmd.dma);
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1859,7 +1864,7 @@
 	struct be_cmd_req_get_fw_version *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1882,7 +1887,7 @@
 			sizeof(adapter->fw_on_flash));
 	}
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1896,7 +1901,7 @@
 	struct be_cmd_req_modify_eq_delay *req;
 	int status = 0, i;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1919,7 +1924,7 @@
 
 	status = be_mcc_notify(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1946,7 +1951,7 @@
 	struct be_cmd_req_vlan_config *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1968,7 +1973,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -1979,7 +1984,7 @@
 	struct be_cmd_req_rx_filter *req = mem->va;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -1996,8 +2001,7 @@
 	req->if_flags = (value == ON) ? req->if_flags_mask : 0;
 
 	if (flags & BE_IF_FLAGS_MULTICAST) {
-		struct netdev_hw_addr *ha;
-		int i = 0;
+		int i;
 
 		/* Reset mcast promisc mode if already set by setting mask
 		 * and not setting flags field
@@ -2005,14 +2009,15 @@
 		req->if_flags_mask |=
 			cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS &
 				    be_if_cap_flags(adapter));
-		req->mcast_num = cpu_to_le32(netdev_mc_count(adapter->netdev));
-		netdev_for_each_mc_addr(ha, adapter->netdev)
-			memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
+		req->mcast_num = cpu_to_le32(adapter->mc_count);
+		for (i = 0; i < adapter->mc_count; i++)
+			ether_addr_copy(req->mcast_mac[i].byte,
+					adapter->mc_list[i].mac);
 	}
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2043,7 +2048,7 @@
 			    CMD_SUBSYSTEM_COMMON))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2063,7 +2068,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	if (base_status(status) == MCC_STATUS_FEATURE_NOT_SUPPORTED)
 		return  -EOPNOTSUPP;
@@ -2082,7 +2087,7 @@
 			    CMD_SUBSYSTEM_COMMON))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2105,7 +2110,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2186,7 +2191,7 @@
 	if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
 		return 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2211,7 +2216,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2223,7 +2228,7 @@
 	struct be_cmd_req_enable_disable_beacon *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2244,7 +2249,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2255,7 +2260,7 @@
 	struct be_cmd_req_get_beacon_state *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2279,7 +2284,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2303,7 +2308,7 @@
 		return -ENOMEM;
 	}
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2325,7 +2330,7 @@
 		memcpy(data, resp->page_data, PAGE_DATA_LEN);
 	}
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 	return status;
 }
@@ -2342,7 +2347,7 @@
 	void *ctxt = NULL;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 	adapter->flash_status = 0;
 
 	wrb = wrb_from_mccq(adapter);
@@ -2384,7 +2389,7 @@
 	if (status)
 		goto err_unlock;
 
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
 					 msecs_to_jiffies(60000)))
@@ -2403,7 +2408,7 @@
 	return status;
 
 err_unlock:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2457,7 +2462,7 @@
 	struct be_mcc_wrb *wrb;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2475,7 +2480,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2488,7 +2493,7 @@
 	struct lancer_cmd_resp_read_object *resp;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2522,7 +2527,7 @@
 	}
 
 err_unlock:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2534,7 +2539,7 @@
 	struct be_cmd_write_flashrom *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 	adapter->flash_status = 0;
 
 	wrb = wrb_from_mccq(adapter);
@@ -2559,7 +2564,7 @@
 	if (status)
 		goto err_unlock;
 
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
 					 msecs_to_jiffies(40000)))
@@ -2570,7 +2575,7 @@
 	return status;
 
 err_unlock:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -2581,7 +2586,7 @@
 	struct be_mcc_wrb *wrb;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -2608,7 +2613,7 @@
 		memcpy(flashed_crc, req->crc, 4);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3192,7 +3197,7 @@
 	struct be_cmd_req_acpi_wol_magic_config *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3209,7 +3214,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3224,7 +3229,7 @@
 			    CMD_SUBSYSTEM_LOWLEVEL))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3247,7 +3252,7 @@
 	if (status)
 		goto err_unlock;
 
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
 					 msecs_to_jiffies(SET_LB_MODE_TIMEOUT)))
@@ -3256,7 +3261,7 @@
 	return status;
 
 err_unlock:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3273,7 +3278,7 @@
 			    CMD_SUBSYSTEM_LOWLEVEL))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3299,7 +3304,7 @@
 	if (status)
 		goto err;
 
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 
 	wait_for_completion(&adapter->et_cmd_compl);
 	resp = embedded_payload(wrb);
@@ -3307,7 +3312,7 @@
 
 	return status;
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3323,7 +3328,7 @@
 			    CMD_SUBSYSTEM_LOWLEVEL))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3357,7 +3362,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3368,7 +3373,7 @@
 	struct be_cmd_req_seeprom_read *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3384,7 +3389,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3399,7 +3404,7 @@
 			    CMD_SUBSYSTEM_COMMON))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3444,7 +3449,7 @@
 	}
 	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3454,7 +3459,7 @@
 	struct be_cmd_req_set_qos *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3474,7 +3479,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3581,7 +3586,7 @@
 	struct be_cmd_req_get_fn_privileges *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3613,7 +3618,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3625,7 +3630,7 @@
 	struct be_cmd_req_set_fn_privileges *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3645,7 +3650,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3677,7 +3682,7 @@
 		return -ENOMEM;
 	}
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3741,7 +3746,7 @@
 	}
 
 out:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size,
 			  get_mac_list_cmd.va, get_mac_list_cmd.dma);
 	return status;
@@ -3801,7 +3806,7 @@
 	if (!cmd.va)
 		return -ENOMEM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3823,7 +3828,7 @@
 
 err:
 	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3859,7 +3864,7 @@
 			    CMD_SUBSYSTEM_COMMON))
 		return -EPERM;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3900,7 +3905,7 @@
 	status = be_mcc_notify_wait(adapter);
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -3914,7 +3919,7 @@
 	int status;
 	u16 vid;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -3961,7 +3966,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4127,6 +4132,10 @@
 	struct be_cmd_req_get_ext_fat_caps *req;
 	int status;
 
+	if (!be_cmd_allowed(adapter, OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES,
+			    CMD_SUBSYSTEM_COMMON))
+		return -EPERM;
+
 	if (mutex_lock_interruptible(&adapter->mbox_lock))
 		return -1;
 
@@ -4138,7 +4147,7 @@
 
 	req = cmd->va;
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			       OPCODE_COMMON_GET_EXT_FAT_CAPABILITES,
+			       OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES,
 			       cmd->size, wrb, cmd);
 	req->parameter_type = cpu_to_le32(1);
 
@@ -4156,7 +4165,7 @@
 	struct be_cmd_req_set_ext_fat_caps *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4167,12 +4176,12 @@
 	req = cmd->va;
 	memcpy(&req->set_params, configs, sizeof(struct be_fat_conf_params));
 	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
-			       OPCODE_COMMON_SET_EXT_FAT_CAPABILITES,
+			       OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES,
 			       cmd->size, wrb, cmd);
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4373,7 +4382,7 @@
 }
 
 /* This routine returns a list of all the NIC PF_nums in the adapter */
-u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums)
+static u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums)
 {
 	struct be_res_desc_hdr *hdr = (struct be_res_desc_hdr *)buf;
 	struct be_pcie_res_desc *pcie = NULL;
@@ -4525,7 +4534,7 @@
 }
 
 /* Mark all fields invalid */
-void be_reset_nic_desc(struct be_nic_res_desc *nic)
+static void be_reset_nic_desc(struct be_nic_res_desc *nic)
 {
 	memset(nic, 0, sizeof(*nic));
 	nic->unicast_mac_count = 0xFFFF;
@@ -4650,7 +4659,7 @@
 	if (iface == 0xFFFFFFFF)
 		return -1;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4667,7 +4676,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4701,7 +4710,7 @@
 	struct be_cmd_resp_get_iface_list *resp;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4722,7 +4731,7 @@
 	}
 
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4816,7 +4825,7 @@
 	if (BEx_chip(adapter))
 		return 0;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4834,7 +4843,7 @@
 	req->enable = 1;
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4898,14 +4907,15 @@
 	return status;
 }
 
-int __be_cmd_set_logical_link_config(struct be_adapter *adapter,
-				     int link_state, int version, u8 domain)
+static int
+__be_cmd_set_logical_link_config(struct be_adapter *adapter,
+				 int link_state, int version, u8 domain)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_set_ll_link *req;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4931,7 +4941,7 @@
 
 	status = be_mcc_notify_wait(adapter);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 
@@ -4954,6 +4964,57 @@
 							  1, domain);
 	return status;
 }
+
+int be_cmd_set_features(struct be_adapter *adapter)
+{
+	struct be_cmd_resp_set_features *resp;
+	struct be_cmd_req_set_features *req;
+	struct be_mcc_wrb *wrb;
+	int status;
+
+	if (mutex_lock_interruptible(&adapter->mcc_lock))
+		return -1;
+
+	wrb = wrb_from_mccq(adapter);
+	if (!wrb) {
+		status = -EBUSY;
+		goto err;
+	}
+
+	req = embedded_payload(wrb);
+
+	be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
+			       OPCODE_COMMON_SET_FEATURES,
+			       sizeof(*req), wrb, NULL);
+
+	req->features = cpu_to_le32(BE_FEATURE_UE_RECOVERY);
+	req->parameter_len = cpu_to_le32(sizeof(struct be_req_ue_recovery));
+	req->parameter.req.uer = cpu_to_le32(BE_UE_RECOVERY_UER_MASK);
+
+	status = be_mcc_notify_wait(adapter);
+	if (status)
+		goto err;
+
+	resp = embedded_payload(wrb);
+
+	adapter->error_recovery.ue_to_poll_time =
+		le16_to_cpu(resp->parameter.resp.ue2rp);
+	adapter->error_recovery.ue_to_reset_time =
+		le16_to_cpu(resp->parameter.resp.ue2sr);
+	adapter->error_recovery.recovery_supported = true;
+err:
+	/* Checking "MCC_STATUS_INVALID_LENGTH" for SKH as FW
+	 * returns this error in older firmware versions
+	 */
+	if (base_status(status) == MCC_STATUS_ILLEGAL_REQUEST ||
+	    base_status(status) == MCC_STATUS_INVALID_LENGTH)
+		dev_info(&adapter->pdev->dev,
+			 "Adapter does not support HW error recovery\n");
+
+	mutex_unlock(&adapter->mcc_lock);
+	return status;
+}
+
 int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload,
 		    int wrb_payload_size, u16 *cmd_status, u16 *ext_status)
 {
@@ -4964,7 +5025,7 @@
 	struct be_cmd_resp_hdr *resp;
 	int status;
 
-	spin_lock_bh(&adapter->mcc_lock);
+	mutex_lock(&adapter->mcc_lock);
 
 	wrb = wrb_from_mccq(adapter);
 	if (!wrb) {
@@ -4987,7 +5048,7 @@
 	memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length);
 	be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length);
 err:
-	spin_unlock_bh(&adapter->mcc_lock);
+	mutex_unlock(&adapter->mcc_lock);
 	return status;
 }
 EXPORT_SYMBOL(be_roce_mcc_cmd);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 0d6be22..1bd82bc 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -58,7 +58,8 @@
 	MCC_STATUS_INSUFFICIENT_BUFFER = 4,
 	MCC_STATUS_UNAUTHORIZED_REQUEST = 5,
 	MCC_STATUS_NOT_SUPPORTED = 66,
-	MCC_STATUS_FEATURE_NOT_SUPPORTED = 68
+	MCC_STATUS_FEATURE_NOT_SUPPORTED = 68,
+	MCC_STATUS_INVALID_LENGTH = 116
 };
 
 /* Additional status */
@@ -294,8 +295,8 @@
 #define OPCODE_COMMON_GET_PHY_DETAILS			102
 #define OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP		103
 #define OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES	121
-#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITES		125
-#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITES		126
+#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES		125
+#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES		126
 #define OPCODE_COMMON_GET_MAC_LIST			147
 #define OPCODE_COMMON_SET_MAC_LIST			148
 #define OPCODE_COMMON_GET_HSW_CONFIG			152
@@ -308,6 +309,7 @@
 #define OPCODE_COMMON_READ_OBJECT			171
 #define OPCODE_COMMON_WRITE_OBJECT			172
 #define OPCODE_COMMON_DELETE_OBJECT			174
+#define OPCODE_COMMON_SET_FEATURES			191
 #define OPCODE_COMMON_MANAGE_IFACE_FILTERS		193
 #define OPCODE_COMMON_GET_IFACE_LIST			194
 #define OPCODE_COMMON_ENABLE_DISABLE_VF			196
@@ -2315,6 +2317,41 @@
 	struct be_if_desc if_desc;
 };
 
+/************** Set Features *******************/
+#define	BE_FEATURE_UE_RECOVERY		0x10
+#define	BE_UE_RECOVERY_UER_MASK		0x1
+
+struct be_req_ue_recovery {
+	u32	uer;
+	u32	rsvd;
+};
+
+struct be_cmd_req_set_features {
+	struct be_cmd_req_hdr hdr;
+	u32 features;
+	u32 parameter_len;
+	union {
+		struct be_req_ue_recovery req;
+		u32 rsvd[2];
+	} parameter;
+};
+
+struct be_resp_ue_recovery {
+	u32 uer;
+	u16 ue2rp;
+	u16 ue2sr;
+};
+
+struct be_cmd_resp_set_features {
+	struct be_cmd_resp_hdr hdr;
+	u32 features;
+	u32 parameter_len;
+	union {
+		struct be_resp_ue_recovery resp;
+		u32 rsvd[2];
+	} parameter;
+};
+
 /*************** Set logical link ********************/
 #define PLINK_ENABLE            BIT(0)
 #define PLINK_TRACK             BIT(8)
@@ -2343,6 +2380,7 @@
 	u32 cap_control_flags;
 } __packed;
 
+u16 be_POST_stage_get(struct be_adapter *adapter);
 int be_pci_fnum_get(struct be_adapter *adapter);
 int be_fw_wait_ready(struct be_adapter *adapter);
 int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
@@ -2470,3 +2508,4 @@
 int be_cmd_set_sriov_config(struct be_adapter *adapter,
 			    struct be_resources res, u16 num_vfs,
 			    struct be_resources *vft_res);
+int be_cmd_set_features(struct be_adapter *adapter);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 50e7be5..0a48a31 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -421,6 +421,10 @@
 	}
 }
 
+static const char be_priv_flags[][ETH_GSTRING_LEN] = {
+	"disable-tpe-recovery"
+};
+
 static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset,
 				uint8_t *data)
 {
@@ -454,6 +458,10 @@
 			data += ETH_GSTRING_LEN;
 		}
 		break;
+	case ETH_SS_PRIV_FLAGS:
+		for (i = 0; i < ARRAY_SIZE(be_priv_flags); i++)
+			strcpy(data + i * ETH_GSTRING_LEN, be_priv_flags[i]);
+		break;
 	}
 }
 
@@ -468,6 +476,8 @@
 		return ETHTOOL_STATS_NUM +
 			adapter->num_rx_qs * ETHTOOL_RXSTATS_NUM +
 			adapter->num_tx_qs * ETHTOOL_TXSTATS_NUM;
+	case ETH_SS_PRIV_FLAGS:
+		return ARRAY_SIZE(be_priv_flags);
 	default:
 		return -EINVAL;
 	}
@@ -1360,6 +1370,34 @@
 	return be_cmd_status(status);
 }
 
+static u32 be_get_priv_flags(struct net_device *netdev)
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+
+	return adapter->priv_flags;
+}
+
+static int be_set_priv_flags(struct net_device *netdev, u32 flags)
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+	bool tpe_old = !!(adapter->priv_flags & BE_DISABLE_TPE_RECOVERY);
+	bool tpe_new = !!(flags & BE_DISABLE_TPE_RECOVERY);
+
+	if (tpe_old != tpe_new) {
+		if (tpe_new) {
+			adapter->priv_flags |= BE_DISABLE_TPE_RECOVERY;
+			dev_info(&adapter->pdev->dev,
+				 "HW error recovery is disabled\n");
+		} else {
+			adapter->priv_flags &= ~BE_DISABLE_TPE_RECOVERY;
+			dev_info(&adapter->pdev->dev,
+				 "HW error recovery is enabled\n");
+		}
+	}
+
+	return 0;
+}
+
 const struct ethtool_ops be_ethtool_ops = {
 	.get_settings = be_get_settings,
 	.get_drvinfo = be_get_drvinfo,
@@ -1373,6 +1411,8 @@
 	.get_ringparam = be_get_ringparam,
 	.get_pauseparam = be_get_pauseparam,
 	.set_pauseparam = be_set_pauseparam,
+	.set_priv_flags = be_set_priv_flags,
+	.get_priv_flags = be_get_priv_flags,
 	.get_strings = be_get_stat_strings,
 	.set_phys_id = be_set_phys_id,
 	.set_dump = be_set_dump,
diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h
index c684bb3..92942c8 100644
--- a/drivers/net/ethernet/emulex/benet/be_hw.h
+++ b/drivers/net/ethernet/emulex/benet/be_hw.h
@@ -32,18 +32,23 @@
 #define MPU_EP_CONTROL 		0
 
 /********** MPU semphore: used for SH & BE  *************/
+#define SLIPORT_SOFTRESET_OFFSET		0x5c	/* CSR BAR offset */
 #define SLIPORT_SEMAPHORE_OFFSET_BEx		0xac  /* CSR BAR offset */
 #define SLIPORT_SEMAPHORE_OFFSET_SH		0x94  /* PCI-CFG offset */
 #define POST_STAGE_MASK				0x0000FFFF
 #define POST_ERR_MASK				0x1
 #define POST_ERR_SHIFT				31
+#define POST_ERR_RECOVERY_CODE_MASK		0xFFF
+
+/* Soft Reset register masks */
+#define SLIPORT_SOFTRESET_SR_MASK		0x00000080	/* SR bit */
 
 /* MPU semphore POST stage values */
 #define POST_STAGE_AWAITING_HOST_RDY 	0x1 /* FW awaiting goahead from host */
 #define POST_STAGE_HOST_RDY 		0x2 /* Host has given go-ahed to FW */
 #define POST_STAGE_BE_RESET		0x3 /* Host wants to reset chip */
 #define POST_STAGE_ARMFW_RDY		0xc000	/* FW is done with POST */
-
+#define POST_STAGE_RECOVERABLE_ERR	0xE000	/* Recoverable err detected */
 
 /* Lancer SLIPORT registers */
 #define SLIPORT_STATUS_OFFSET		0x404
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 874c753..dcb930a 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -41,6 +41,11 @@
 module_param(rx_frag_size, ushort, S_IRUGO);
 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
 
+/* Per-module error detection/recovery workq shared across all functions.
+ * Each function schedules its own work request on this shared workq.
+ */
+static struct workqueue_struct *be_err_recovery_workq;
+
 static const struct pci_device_id be_dev_ids[] = {
 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
 	{ PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
@@ -53,6 +58,10 @@
 	{ 0 }
 };
 MODULE_DEVICE_TABLE(pci, be_dev_ids);
+
+/* Workqueue used by all functions for defering cmd calls to the adapter */
+static struct workqueue_struct *be_wq;
+
 /* UE Status Low CSR */
 static const char * const ue_status_low_desc[] = {
 	"CEV",
@@ -260,6 +269,38 @@
 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
 }
 
+static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
+{
+	int i;
+
+	/* Check if mac has already been added as part of uc-list */
+	for (i = 0; i < adapter->uc_macs; i++) {
+		if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN],
+				     mac)) {
+			/* mac already added, skip addition */
+			adapter->pmac_id[0] = adapter->pmac_id[i + 1];
+			return 0;
+		}
+	}
+
+	return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
+			       &adapter->pmac_id[0], 0);
+}
+
+static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
+{
+	int i;
+
+	/* Skip deletion if the programmed mac is
+	 * being used in uc-list
+	 */
+	for (i = 0; i < adapter->uc_macs; i++) {
+		if (adapter->pmac_id[i + 1] == pmac_id)
+			return;
+	}
+	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
+}
+
 static int be_mac_addr_set(struct net_device *netdev, void *p)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
@@ -267,7 +308,7 @@
 	struct sockaddr *addr = p;
 	int status;
 	u8 mac[ETH_ALEN];
-	u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0;
+	u32 old_pmac_id = adapter->pmac_id[0];
 
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
@@ -275,7 +316,7 @@
 	/* Proceed further only if, User provided MAC is different
 	 * from active MAC
 	 */
-	if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
+	if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
 		return 0;
 
 	/* if device is not running, copy MAC to netdev->dev_addr */
@@ -288,23 +329,22 @@
 	 * FILTMGMT privilege. This failure is OK, only if the PF programmed
 	 * the MAC for the VF.
 	 */
-	status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data,
-				 adapter->if_handle, &adapter->pmac_id[0], 0);
+	mutex_lock(&adapter->rx_filter_lock);
+	status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
 	if (!status) {
-		curr_pmac_id = adapter->pmac_id[0];
 
 		/* Delete the old programmed MAC. This call may fail if the
 		 * old MAC was already deleted by the PF driver.
 		 */
 		if (adapter->pmac_id[0] != old_pmac_id)
-			be_cmd_pmac_del(adapter, adapter->if_handle,
-					old_pmac_id, 0);
+			be_dev_mac_del(adapter, old_pmac_id);
 	}
 
+	mutex_unlock(&adapter->rx_filter_lock);
 	/* Decide if the new MAC is successfully activated only after
 	 * querying the FW
 	 */
-	status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac,
+	status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
 				       adapter->if_handle, true, 0);
 	if (status)
 		goto err;
@@ -317,6 +357,7 @@
 		goto err;
 	}
 done:
+	ether_addr_copy(adapter->dev_mac, addr->sa_data);
 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 	return 0;
@@ -1420,13 +1461,18 @@
 	u16 num = 0, i = 0;
 	int status = 0;
 
-	/* No need to further configure vids if in promiscuous mode */
-	if (be_in_all_promisc(adapter))
+	/* No need to change the VLAN state if the I/F is in promiscuous */
+	if (adapter->netdev->flags & IFF_PROMISC)
 		return 0;
 
 	if (adapter->vlans_added > be_max_vlans(adapter))
 		return be_set_vlan_promisc(adapter);
 
+	if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
+		status = be_clear_vlan_promisc(adapter);
+		if (status)
+			return status;
+	}
 	/* Construct VLAN Table to give to HW */
 	for_each_set_bit(i, adapter->vids, VLAN_N_VID)
 		vids[num++] = cpu_to_le16(i);
@@ -1439,8 +1485,6 @@
 		    addl_status(status) ==
 				MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
 			return be_set_vlan_promisc(adapter);
-	} else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
-		status = be_clear_vlan_promisc(adapter);
 	}
 	return status;
 }
@@ -1450,46 +1494,45 @@
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int status = 0;
 
+	mutex_lock(&adapter->rx_filter_lock);
+
 	/* Packets with VID 0 are always received by Lancer by default */
 	if (lancer_chip(adapter) && vid == 0)
-		return status;
+		goto done;
 
 	if (test_bit(vid, adapter->vids))
-		return status;
+		goto done;
 
 	set_bit(vid, adapter->vids);
 	adapter->vlans_added++;
 
 	status = be_vid_config(adapter);
-	if (status) {
-		adapter->vlans_added--;
-		clear_bit(vid, adapter->vids);
-	}
-
+done:
+	mutex_unlock(&adapter->rx_filter_lock);
 	return status;
 }
 
 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
+	int status = 0;
+
+	mutex_lock(&adapter->rx_filter_lock);
 
 	/* Packets with VID 0 are always received by Lancer by default */
 	if (lancer_chip(adapter) && vid == 0)
-		return 0;
+		goto done;
 
 	if (!test_bit(vid, adapter->vids))
-		return 0;
+		goto done;
 
 	clear_bit(vid, adapter->vids);
 	adapter->vlans_added--;
 
-	return be_vid_config(adapter);
-}
-
-static void be_clear_all_promisc(struct be_adapter *adapter)
-{
-	be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF);
-	adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
+	status = be_vid_config(adapter);
+done:
+	mutex_unlock(&adapter->rx_filter_lock);
+	return status;
 }
 
 static void be_set_all_promisc(struct be_adapter *adapter)
@@ -1510,75 +1553,226 @@
 		adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
 }
 
-static void be_set_mc_list(struct be_adapter *adapter)
+static void be_set_uc_promisc(struct be_adapter *adapter)
 {
 	int status;
 
-	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
+	if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
+		return;
+
+	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
 	if (!status)
-		adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
-	else
+		adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
+}
+
+static void be_clear_uc_promisc(struct be_adapter *adapter)
+{
+	int status;
+
+	if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
+		return;
+
+	status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
+	if (!status)
+		adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
+}
+
+/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
+ * We use a single callback function for both sync and unsync. We really don't
+ * add/remove addresses through this callback. But, we use it to detect changes
+ * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
+ */
+static int be_uc_list_update(struct net_device *netdev,
+			     const unsigned char *addr)
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+
+	adapter->update_uc_list = true;
+	return 0;
+}
+
+static int be_mc_list_update(struct net_device *netdev,
+			     const unsigned char *addr)
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+
+	adapter->update_mc_list = true;
+	return 0;
+}
+
+static void be_set_mc_list(struct be_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct netdev_hw_addr *ha;
+	bool mc_promisc = false;
+	int status;
+
+	netif_addr_lock_bh(netdev);
+	__dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
+
+	if (netdev->flags & IFF_PROMISC) {
+		adapter->update_mc_list = false;
+	} else if (netdev->flags & IFF_ALLMULTI ||
+		   netdev_mc_count(netdev) > be_max_mc(adapter)) {
+		/* Enable multicast promisc if num configured exceeds
+		 * what we support
+		 */
+		mc_promisc = true;
+		adapter->update_mc_list = false;
+	} else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
+		/* Update mc-list unconditionally if the iface was previously
+		 * in mc-promisc mode and now is out of that mode.
+		 */
+		adapter->update_mc_list = true;
+	}
+
+	if (adapter->update_mc_list) {
+		int i = 0;
+
+		/* cache the mc-list in adapter */
+		netdev_for_each_mc_addr(ha, netdev) {
+			ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
+			i++;
+		}
+		adapter->mc_count = netdev_mc_count(netdev);
+	}
+	netif_addr_unlock_bh(netdev);
+
+	if (mc_promisc) {
 		be_set_mc_promisc(adapter);
+	} else if (adapter->update_mc_list) {
+		status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
+		if (!status)
+			adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
+		else
+			be_set_mc_promisc(adapter);
+
+		adapter->update_mc_list = false;
+	}
+}
+
+static void be_clear_mc_list(struct be_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+
+	__dev_mc_unsync(netdev, NULL);
+	be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
+	adapter->mc_count = 0;
+}
+
+static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
+{
+	if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
+			     adapter->dev_mac)) {
+		adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
+		return 0;
+	}
+
+	return be_cmd_pmac_add(adapter,
+			       (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN],
+			       adapter->if_handle,
+			       &adapter->pmac_id[uc_idx + 1], 0);
+}
+
+static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
+{
+	if (pmac_id == adapter->pmac_id[0])
+		return;
+
+	be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
 }
 
 static void be_set_uc_list(struct be_adapter *adapter)
 {
+	struct net_device *netdev = adapter->netdev;
 	struct netdev_hw_addr *ha;
-	int i = 1; /* First slot is claimed by the Primary MAC */
+	bool uc_promisc = false;
+	int curr_uc_macs = 0, i;
 
-	for (; adapter->uc_macs > 0; adapter->uc_macs--, i++)
-		be_cmd_pmac_del(adapter, adapter->if_handle,
-				adapter->pmac_id[i], 0);
+	netif_addr_lock_bh(netdev);
+	__dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
 
-	if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) {
-		be_set_all_promisc(adapter);
-		return;
+	if (netdev->flags & IFF_PROMISC) {
+		adapter->update_uc_list = false;
+	} else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
+		uc_promisc = true;
+		adapter->update_uc_list = false;
+	}  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
+		/* Update uc-list unconditionally if the iface was previously
+		 * in uc-promisc mode and now is out of that mode.
+		 */
+		adapter->update_uc_list = true;
 	}
 
-	netdev_for_each_uc_addr(ha, adapter->netdev) {
-		adapter->uc_macs++; /* First slot is for Primary MAC */
-		be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle,
-				&adapter->pmac_id[adapter->uc_macs], 0);
+	if (adapter->update_uc_list) {
+		i = 1; /* First slot is claimed by the Primary MAC */
+
+		/* cache the uc-list in adapter array */
+		netdev_for_each_uc_addr(ha, netdev) {
+			ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
+			i++;
+		}
+		curr_uc_macs = netdev_uc_count(netdev);
+	}
+	netif_addr_unlock_bh(netdev);
+
+	if (uc_promisc) {
+		be_set_uc_promisc(adapter);
+	} else if (adapter->update_uc_list) {
+		be_clear_uc_promisc(adapter);
+
+		for (i = 0; i < adapter->uc_macs; i++)
+			be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
+
+		for (i = 0; i < curr_uc_macs; i++)
+			be_uc_mac_add(adapter, i);
+		adapter->uc_macs = curr_uc_macs;
+		adapter->update_uc_list = false;
 	}
 }
 
 static void be_clear_uc_list(struct be_adapter *adapter)
 {
+	struct net_device *netdev = adapter->netdev;
 	int i;
 
-	for (i = 1; i < (adapter->uc_macs + 1); i++)
-		be_cmd_pmac_del(adapter, adapter->if_handle,
-				adapter->pmac_id[i], 0);
+	__dev_uc_unsync(netdev, NULL);
+	for (i = 0; i < adapter->uc_macs; i++)
+		be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
+
 	adapter->uc_macs = 0;
 }
 
-static void be_set_rx_mode(struct net_device *netdev)
+static void __be_set_rx_mode(struct be_adapter *adapter)
 {
-	struct be_adapter *adapter = netdev_priv(netdev);
+	struct net_device *netdev = adapter->netdev;
+
+	mutex_lock(&adapter->rx_filter_lock);
 
 	if (netdev->flags & IFF_PROMISC) {
-		be_set_all_promisc(adapter);
-		return;
+		if (!be_in_all_promisc(adapter))
+			be_set_all_promisc(adapter);
+	} else if (be_in_all_promisc(adapter)) {
+		/* We need to re-program the vlan-list or clear
+		 * vlan-promisc mode (if needed) when the interface
+		 * comes out of promisc mode.
+		 */
+		be_vid_config(adapter);
 	}
 
-	/* Interface was previously in promiscuous mode; disable it */
-	if (be_in_all_promisc(adapter)) {
-		be_clear_all_promisc(adapter);
-		if (adapter->vlans_added)
-			be_vid_config(adapter);
-	}
-
-	/* Enable multicast promisc if num configured exceeds what we support */
-	if (netdev->flags & IFF_ALLMULTI ||
-	    netdev_mc_count(netdev) > be_max_mc(adapter)) {
-		be_set_mc_promisc(adapter);
-		return;
-	}
-
-	if (netdev_uc_count(netdev) != adapter->uc_macs)
-		be_set_uc_list(adapter);
-
+	be_set_uc_list(adapter);
 	be_set_mc_list(adapter);
+
+	mutex_unlock(&adapter->rx_filter_lock);
+}
+
+static void be_work_set_rx_mode(struct work_struct *work)
+{
+	struct be_cmd_work *cmd_work =
+				container_of(work, struct be_cmd_work, work);
+
+	__be_set_rx_mode(cmd_work->adapter);
+	kfree(cmd_work);
 }
 
 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
@@ -1701,7 +1895,8 @@
 	return 0;
 }
 
-static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
+static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+			  __be16 vlan_proto)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
@@ -1713,6 +1908,9 @@
 	if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
 		return -EINVAL;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	if (vlan || qos) {
 		vlan |= qos << VLAN_PRIO_SHIFT;
 		status = be_set_vf_tvt(adapter, vf, vlan);
@@ -3220,9 +3418,7 @@
 		 */
 
 		if (ue_lo || ue_hi) {
-			dev_err(dev,
-				"Unrecoverable Error detected in the adapter");
-			dev_err(dev, "Please reboot server to recover");
+			dev_err(dev, "Error detected in the adapter");
 			if (skyhawk_chip(adapter))
 				be_set_error(adapter, BE_ERROR_UE);
 
@@ -3425,10 +3621,9 @@
 
 static void be_disable_if_filters(struct be_adapter *adapter)
 {
-	be_cmd_pmac_del(adapter, adapter->if_handle,
-			adapter->pmac_id[0], 0);
-
+	be_dev_mac_del(adapter, adapter->pmac_id[0]);
 	be_clear_uc_list(adapter);
+	be_clear_mc_list(adapter);
 
 	/* The IFACE flags are enabled in the open path and cleared
 	 * in the close path. When a VF gets detached from the host and
@@ -3462,6 +3657,11 @@
 	if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
 		return 0;
 
+	/* Before attempting cleanup ensure all the pending cmds in the
+	 * config_wq have finished execution
+	 */
+	flush_workqueue(be_wq);
+
 	be_disable_if_filters(adapter);
 
 	if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
@@ -3576,17 +3776,16 @@
 
 	/* For BE3 VFs, the PF programs the initial MAC address */
 	if (!(BEx_chip(adapter) && be_virtfn(adapter))) {
-		status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr,
-					 adapter->if_handle,
-					 &adapter->pmac_id[0], 0);
+		status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
 		if (status)
 			return status;
+		ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
 	}
 
 	if (adapter->vlans_added)
 		be_vid_config(adapter);
 
-	be_set_rx_mode(adapter->netdev);
+	__be_set_rx_mode(adapter);
 
 	return 0;
 }
@@ -3759,8 +3958,13 @@
 
 static void be_cancel_err_detection(struct be_adapter *adapter)
 {
+	struct be_error_recovery *err_rec = &adapter->error_recovery;
+
+	if (!be_err_recovery_workq)
+		return;
+
 	if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
-		cancel_delayed_work_sync(&adapter->be_err_detection_work);
+		cancel_delayed_work_sync(&err_rec->err_detection_work);
 		adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
 	}
 }
@@ -3860,6 +4064,20 @@
 		vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
 }
 
+static void be_if_destroy(struct be_adapter *adapter)
+{
+	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
+
+	kfree(adapter->pmac_id);
+	adapter->pmac_id = NULL;
+
+	kfree(adapter->mc_list);
+	adapter->mc_list = NULL;
+
+	kfree(adapter->uc_list);
+	adapter->uc_list = NULL;
+}
+
 static int be_clear(struct be_adapter *adapter)
 {
 	struct pci_dev *pdev = adapter->pdev;
@@ -3867,6 +4085,8 @@
 
 	be_cancel_worker(adapter);
 
+	flush_workqueue(be_wq);
+
 	if (sriov_enabled(adapter))
 		be_vf_clear(adapter);
 
@@ -3884,10 +4104,8 @@
 	}
 
 	be_disable_vxlan_offloads(adapter);
-	kfree(adapter->pmac_id);
-	adapter->pmac_id = NULL;
 
-	be_cmd_if_destroy(adapter, adapter->if_handle,  0);
+	be_if_destroy(adapter);
 
 	be_clear_queues(adapter);
 
@@ -4151,7 +4369,7 @@
  * for distribution between the VFs. This self-imposed limit will determine the
  * no: of VFs for which RSS can be enabled.
  */
-void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
+static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
 {
 	struct be_port_resources port_res = {0};
 	u8 rss_tables_on_port;
@@ -4341,14 +4559,29 @@
 
 static void be_schedule_worker(struct be_adapter *adapter)
 {
-	schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
+	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
 	adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
 }
 
+static void be_destroy_err_recovery_workq(void)
+{
+	if (!be_err_recovery_workq)
+		return;
+
+	flush_workqueue(be_err_recovery_workq);
+	destroy_workqueue(be_err_recovery_workq);
+	be_err_recovery_workq = NULL;
+}
+
 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
 {
-	schedule_delayed_work(&adapter->be_err_detection_work,
-			      msecs_to_jiffies(delay));
+	struct be_error_recovery *err_rec = &adapter->error_recovery;
+
+	if (!be_err_recovery_workq)
+		return;
+
+	queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
+			   msecs_to_jiffies(delay));
 	adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
 }
 
@@ -4393,6 +4626,22 @@
 	u32 cap_flags = be_if_cap_flags(adapter);
 	int status;
 
+	/* alloc required memory for other filtering fields */
+	adapter->pmac_id = kcalloc(be_max_uc(adapter),
+				   sizeof(*adapter->pmac_id), GFP_KERNEL);
+	if (!adapter->pmac_id)
+		return -ENOMEM;
+
+	adapter->mc_list = kcalloc(be_max_mc(adapter),
+				   sizeof(*adapter->mc_list), GFP_KERNEL);
+	if (!adapter->mc_list)
+		return -ENOMEM;
+
+	adapter->uc_list = kcalloc(be_max_uc(adapter),
+				   sizeof(*adapter->uc_list), GFP_KERNEL);
+	if (!adapter->uc_list)
+		return -ENOMEM;
+
 	if (adapter->cfg_num_rx_irqs == 1)
 		cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
 
@@ -4401,7 +4650,10 @@
 	status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
 				  &adapter->if_handle, 0);
 
-	return status;
+	if (status)
+		return status;
+
+	return 0;
 }
 
 int be_update_queues(struct be_adapter *adapter)
@@ -4458,10 +4710,15 @@
 	return fw_major;
 }
 
-/* If any VFs are already enabled don't FLR the PF */
+/* If it is error recovery, FLR the PF
+ * Else if any VFs are already enabled don't FLR the PF
+ */
 static bool be_reset_required(struct be_adapter *adapter)
 {
-	return pci_num_vf(adapter->pdev) ? false : true;
+	if (be_error_recovering(adapter))
+		return true;
+	else
+		return pci_num_vf(adapter->pdev) == 0;
 }
 
 /* Wait for the FW to be ready and perform the required initialization */
@@ -4473,6 +4730,9 @@
 	if (status)
 		return status;
 
+	/* FW is now ready; clear errors to allow cmds/doorbell */
+	be_clear_error(adapter, BE_CLEAR_ALL);
+
 	if (be_reset_required(adapter)) {
 		status = be_cmd_reset_function(adapter);
 		if (status)
@@ -4480,9 +4740,6 @@
 
 		/* Wait for interrupts to quiesce after an FLR */
 		msleep(100);
-
-		/* We can clear all errors when function reset succeeds */
-		be_clear_error(adapter, BE_CLEAR_ALL);
 	}
 
 	/* Tell FW we're ready to fire cmds */
@@ -4530,11 +4787,6 @@
 	if (status)
 		goto err;
 
-	adapter->pmac_id = kcalloc(be_max_uc(adapter),
-				   sizeof(*adapter->pmac_id), GFP_KERNEL);
-	if (!adapter->pmac_id)
-		return -ENOMEM;
-
 	status = be_msix_enable(adapter);
 	if (status)
 		goto err;
@@ -4595,6 +4847,9 @@
 	if (!status && be_pause_supported(adapter))
 		adapter->phy.fc_autoneg = 1;
 
+	if (be_physfn(adapter) && !lancer_chip(adapter))
+		be_cmd_set_features(adapter);
+
 	be_schedule_worker(adapter);
 	adapter->flags |= BE_FLAGS_SETUP_DONE;
 	return 0;
@@ -4728,6 +4983,23 @@
 				       0, 0, nlflags, filter_mask, NULL);
 }
 
+static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
+					 void (*func)(struct work_struct *))
+{
+	struct be_cmd_work *work;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work) {
+		dev_err(&adapter->pdev->dev,
+			"be_work memory allocation failed\n");
+		return NULL;
+	}
+
+	INIT_WORK(&work->work, func);
+	work->adapter = adapter;
+	return work;
+}
+
 /* VxLAN offload Notes:
  *
  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
@@ -4742,23 +5014,19 @@
  * adds more than one port, disable offloads and don't re-enable them again
  * until after all the tunnels are removed.
  */
-static void be_add_vxlan_port(struct net_device *netdev,
-			      struct udp_tunnel_info *ti)
+static void be_work_add_vxlan_port(struct work_struct *work)
 {
-	struct be_adapter *adapter = netdev_priv(netdev);
+	struct be_cmd_work *cmd_work =
+				container_of(work, struct be_cmd_work, work);
+	struct be_adapter *adapter = cmd_work->adapter;
+	struct net_device *netdev = adapter->netdev;
 	struct device *dev = &adapter->pdev->dev;
-	__be16 port = ti->port;
+	__be16 port = cmd_work->info.vxlan_port;
 	int status;
 
-	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-		return;
-
-	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
-		return;
-
 	if (adapter->vxlan_port == port && adapter->vxlan_port_count) {
 		adapter->vxlan_port_aliases++;
-		return;
+		goto done;
 	}
 
 	if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
@@ -4770,7 +5038,7 @@
 	}
 
 	if (adapter->vxlan_port_count++ >= 1)
-		return;
+		goto done;
 
 	status = be_cmd_manage_iface(adapter, adapter->if_handle,
 				     OP_CONVERT_NORMAL_TO_TUNNEL);
@@ -4795,29 +5063,26 @@
 
 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
 		 be16_to_cpu(port));
-	return;
+	goto done;
 err:
 	be_disable_vxlan_offloads(adapter);
+done:
+	kfree(cmd_work);
 }
 
-static void be_del_vxlan_port(struct net_device *netdev,
-			      struct udp_tunnel_info *ti)
+static void be_work_del_vxlan_port(struct work_struct *work)
 {
-	struct be_adapter *adapter = netdev_priv(netdev);
-	__be16 port = ti->port;
-
-	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-		return;
-
-	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
-		return;
+	struct be_cmd_work *cmd_work =
+				container_of(work, struct be_cmd_work, work);
+	struct be_adapter *adapter = cmd_work->adapter;
+	__be16 port = cmd_work->info.vxlan_port;
 
 	if (adapter->vxlan_port != port)
 		goto done;
 
 	if (adapter->vxlan_port_aliases) {
 		adapter->vxlan_port_aliases--;
-		return;
+		goto out;
 	}
 
 	be_disable_vxlan_offloads(adapter);
@@ -4827,6 +5092,40 @@
 		 be16_to_cpu(port));
 done:
 	adapter->vxlan_port_count--;
+out:
+	kfree(cmd_work);
+}
+
+static void be_cfg_vxlan_port(struct net_device *netdev,
+			      struct udp_tunnel_info *ti,
+			      void (*func)(struct work_struct *))
+{
+	struct be_adapter *adapter = netdev_priv(netdev);
+	struct be_cmd_work *cmd_work;
+
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
+		return;
+
+	cmd_work = be_alloc_work(adapter, func);
+	if (cmd_work) {
+		cmd_work->info.vxlan_port = ti->port;
+		queue_work(be_wq, &cmd_work->work);
+	}
+}
+
+static void be_del_vxlan_port(struct net_device *netdev,
+			      struct udp_tunnel_info *ti)
+{
+	be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
+}
+
+static void be_add_vxlan_port(struct net_device *netdev,
+			      struct udp_tunnel_info *ti)
+{
+	be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
 }
 
 static netdev_features_t be_features_check(struct sk_buff *skb,
@@ -4891,6 +5190,16 @@
 	return 0;
 }
 
+static void be_set_rx_mode(struct net_device *dev)
+{
+	struct be_adapter *adapter = netdev_priv(dev);
+	struct be_cmd_work *work;
+
+	work = be_alloc_work(adapter, be_work_set_rx_mode);
+	if (work)
+		queue_work(be_wq, &work->work);
+}
+
 static const struct net_device_ops be_netdev_ops = {
 	.ndo_open		= be_open,
 	.ndo_stop		= be_close,
@@ -4984,13 +5293,145 @@
 	return 0;
 }
 
+static void be_soft_reset(struct be_adapter *adapter)
+{
+	u32 val;
+
+	dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
+	val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
+	val |= SLIPORT_SOFTRESET_SR_MASK;
+	iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
+}
+
+static bool be_err_is_recoverable(struct be_adapter *adapter)
+{
+	struct be_error_recovery *err_rec = &adapter->error_recovery;
+	unsigned long initial_idle_time =
+		msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
+	unsigned long recovery_interval =
+		msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
+	u16 ue_err_code;
+	u32 val;
+
+	val = be_POST_stage_get(adapter);
+	if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
+		return false;
+	ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
+	if (ue_err_code == 0)
+		return false;
+
+	dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
+		ue_err_code);
+
+	if (jiffies - err_rec->probe_time <= initial_idle_time) {
+		dev_err(&adapter->pdev->dev,
+			"Cannot recover within %lu sec from driver load\n",
+			jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
+		return false;
+	}
+
+	if (err_rec->last_recovery_time &&
+	    (jiffies - err_rec->last_recovery_time <= recovery_interval)) {
+		dev_err(&adapter->pdev->dev,
+			"Cannot recover within %lu sec from last recovery\n",
+			jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
+		return false;
+	}
+
+	if (ue_err_code == err_rec->last_err_code) {
+		dev_err(&adapter->pdev->dev,
+			"Cannot recover from a consecutive TPE error\n");
+		return false;
+	}
+
+	err_rec->last_recovery_time = jiffies;
+	err_rec->last_err_code = ue_err_code;
+	return true;
+}
+
+static int be_tpe_recover(struct be_adapter *adapter)
+{
+	struct be_error_recovery *err_rec = &adapter->error_recovery;
+	int status = -EAGAIN;
+	u32 val;
+
+	switch (err_rec->recovery_state) {
+	case ERR_RECOVERY_ST_NONE:
+		err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
+		err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
+		break;
+
+	case ERR_RECOVERY_ST_DETECT:
+		val = be_POST_stage_get(adapter);
+		if ((val & POST_STAGE_RECOVERABLE_ERR) !=
+		    POST_STAGE_RECOVERABLE_ERR) {
+			dev_err(&adapter->pdev->dev,
+				"Unrecoverable HW error detected: 0x%x\n", val);
+			status = -EINVAL;
+			err_rec->resched_delay = 0;
+			break;
+		}
+
+		dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
+
+		/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
+		 * milliseconds before it checks for final error status in
+		 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
+		 * If it does, then PF0 initiates a Soft Reset.
+		 */
+		if (adapter->pf_num == 0) {
+			err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
+			err_rec->resched_delay = err_rec->ue_to_reset_time -
+					ERR_RECOVERY_UE_DETECT_DURATION;
+			break;
+		}
+
+		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
+		err_rec->resched_delay = err_rec->ue_to_poll_time -
+					ERR_RECOVERY_UE_DETECT_DURATION;
+		break;
+
+	case ERR_RECOVERY_ST_RESET:
+		if (!be_err_is_recoverable(adapter)) {
+			dev_err(&adapter->pdev->dev,
+				"Failed to meet recovery criteria\n");
+			status = -EIO;
+			err_rec->resched_delay = 0;
+			break;
+		}
+		be_soft_reset(adapter);
+		err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
+		err_rec->resched_delay = err_rec->ue_to_poll_time -
+					err_rec->ue_to_reset_time;
+		break;
+
+	case ERR_RECOVERY_ST_PRE_POLL:
+		err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
+		err_rec->resched_delay = 0;
+		status = 0;			/* done */
+		break;
+
+	default:
+		status = -EINVAL;
+		err_rec->resched_delay = 0;
+		break;
+	}
+
+	return status;
+}
+
 static int be_err_recover(struct be_adapter *adapter)
 {
 	int status;
 
-	/* Error recovery is supported only Lancer as of now */
-	if (!lancer_chip(adapter))
-		return -EIO;
+	if (!lancer_chip(adapter)) {
+		if (!adapter->error_recovery.recovery_supported ||
+		    adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
+			return -EIO;
+		status = be_tpe_recover(adapter);
+		if (status)
+			goto err;
+	}
 
 	/* Wait for adapter to reach quiescent state before
 	 * destroying queues
@@ -4999,59 +5440,74 @@
 	if (status)
 		goto err;
 
+	adapter->flags |= BE_FLAGS_TRY_RECOVERY;
+
 	be_cleanup(adapter);
 
 	status = be_resume(adapter);
 	if (status)
 		goto err;
 
-	return 0;
+	adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
+
 err:
 	return status;
 }
 
 static void be_err_detection_task(struct work_struct *work)
 {
+	struct be_error_recovery *err_rec =
+			container_of(work, struct be_error_recovery,
+				     err_detection_work.work);
 	struct be_adapter *adapter =
-				container_of(work, struct be_adapter,
-					     be_err_detection_work.work);
+			container_of(err_rec, struct be_adapter,
+				     error_recovery);
+	u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
 	struct device *dev = &adapter->pdev->dev;
 	int recovery_status;
-	int delay = ERR_DETECTION_DELAY;
 
 	be_detect_error(adapter);
-
-	if (be_check_error(adapter, BE_ERROR_HW))
-		recovery_status = be_err_recover(adapter);
-	else
+	if (!be_check_error(adapter, BE_ERROR_HW))
 		goto reschedule_task;
 
+	recovery_status = be_err_recover(adapter);
 	if (!recovery_status) {
-		adapter->recovery_retries = 0;
+		err_rec->recovery_retries = 0;
+		err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
 		dev_info(dev, "Adapter recovery successful\n");
 		goto reschedule_task;
-	} else if (be_virtfn(adapter)) {
+	} else if (!lancer_chip(adapter) && err_rec->resched_delay) {
+		/* BEx/SH recovery state machine */
+		if (adapter->pf_num == 0 &&
+		    err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
+			dev_err(&adapter->pdev->dev,
+				"Adapter recovery in progress\n");
+		resched_delay = err_rec->resched_delay;
+		goto reschedule_task;
+	} else if (lancer_chip(adapter) && be_virtfn(adapter)) {
 		/* For VFs, check if PF have allocated resources
 		 * every second.
 		 */
 		dev_err(dev, "Re-trying adapter recovery\n");
 		goto reschedule_task;
-	} else if (adapter->recovery_retries++ <
-		   MAX_ERR_RECOVERY_RETRY_COUNT) {
+	} else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
+		   ERR_RECOVERY_MAX_RETRY_COUNT) {
 		/* In case of another error during recovery, it takes 30 sec
 		 * for adapter to come out of error. Retry error recovery after
 		 * this time interval.
 		 */
 		dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
-		delay = ERR_RECOVERY_RETRY_DELAY;
+		resched_delay = ERR_RECOVERY_RETRY_DELAY;
 		goto reschedule_task;
 	} else {
 		dev_err(dev, "Adapter recovery failed\n");
+		dev_err(dev, "Please reboot server to recover\n");
 	}
 
 	return;
+
 reschedule_task:
-	be_schedule_err_detection(adapter, delay);
+	be_schedule_err_detection(adapter, resched_delay);
 }
 
 static void be_log_sfp_info(struct be_adapter *adapter)
@@ -5116,7 +5572,7 @@
 
 reschedule:
 	adapter->work_counter++;
-	schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000));
+	queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
 }
 
 static void be_unmap_pci_bars(struct be_adapter *adapter)
@@ -5256,14 +5712,18 @@
 	}
 
 	mutex_init(&adapter->mbox_lock);
-	spin_lock_init(&adapter->mcc_lock);
+	mutex_init(&adapter->mcc_lock);
+	mutex_init(&adapter->rx_filter_lock);
 	spin_lock_init(&adapter->mcc_cq_lock);
 	init_completion(&adapter->et_cmd_compl);
 
 	pci_save_state(adapter->pdev);
 
 	INIT_DELAYED_WORK(&adapter->work, be_worker);
-	INIT_DELAYED_WORK(&adapter->be_err_detection_work,
+
+	adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
+	adapter->error_recovery.resched_delay = 0;
+	INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
 			  be_err_detection_task);
 
 	adapter->rx_fc = true;
@@ -5298,6 +5758,9 @@
 
 	be_clear(adapter);
 
+	if (!pci_vfs_assigned(adapter->pdev))
+		be_cmd_reset_function(adapter);
+
 	/* tell fw we're done with firing cmds */
 	be_cmd_fw_clean(adapter);
 
@@ -5454,6 +5917,7 @@
 	be_roce_dev_add(adapter);
 
 	be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
+	adapter->error_recovery.probe_time = jiffies;
 
 	/* On Die temperature not supported for VF. */
 	if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
@@ -5699,6 +6163,8 @@
 
 static int __init be_init_module(void)
 {
+	int status;
+
 	if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
 	    rx_frag_size != 2048) {
 		printk(KERN_WARNING DRV_NAME
@@ -5712,12 +6178,33 @@
 		pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
 	}
 
-	return pci_register_driver(&be_driver);
+	be_wq = create_singlethread_workqueue("be_wq");
+	if (!be_wq) {
+		pr_warn(DRV_NAME "workqueue creation failed\n");
+		return -1;
+	}
+
+	be_err_recovery_workq =
+		create_singlethread_workqueue("be_err_recover");
+	if (!be_err_recovery_workq)
+		pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
+
+	status = pci_register_driver(&be_driver);
+	if (status) {
+		destroy_workqueue(be_wq);
+		be_destroy_err_recovery_workq();
+	}
+	return status;
 }
 module_init(be_init_module);
 
 static void __exit be_exit_module(void)
 {
 	pci_unregister_driver(&be_driver);
+
+	be_destroy_err_recovery_workq();
+
+	if (be_wq)
+		destroy_workqueue(be_wq);
 }
 module_exit(be_exit_module);
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 36361f8..2625872 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -60,6 +60,8 @@
 	struct ftgmac100_descs *descs;
 	dma_addr_t descs_dma_addr;
 
+	struct page *rx_pages[RX_QUEUE_ENTRIES];
+
 	unsigned int rx_pointer;
 	unsigned int tx_clean_pointer;
 	unsigned int tx_pointer;
@@ -77,6 +79,9 @@
 	int int_mask_all;
 	bool use_ncsi;
 	bool enabled;
+
+	u32 rxdes0_edorr_mask;
+	u32 txdes0_edotr_mask;
 };
 
 static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv,
@@ -257,10 +262,11 @@
 	return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_RXPKT_RDY);
 }
 
-static void ftgmac100_rxdes_set_dma_own(struct ftgmac100_rxdes *rxdes)
+static void ftgmac100_rxdes_set_dma_own(const struct ftgmac100 *priv,
+					struct ftgmac100_rxdes *rxdes)
 {
 	/* clear status bits */
-	rxdes->rxdes0 &= cpu_to_le32(FTGMAC100_RXDES0_EDORR);
+	rxdes->rxdes0 &= cpu_to_le32(priv->rxdes0_edorr_mask);
 }
 
 static bool ftgmac100_rxdes_rx_error(struct ftgmac100_rxdes *rxdes)
@@ -298,9 +304,10 @@
 	return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_MULTICAST);
 }
 
-static void ftgmac100_rxdes_set_end_of_ring(struct ftgmac100_rxdes *rxdes)
+static void ftgmac100_rxdes_set_end_of_ring(const struct ftgmac100 *priv,
+					    struct ftgmac100_rxdes *rxdes)
 {
-	rxdes->rxdes0 |= cpu_to_le32(FTGMAC100_RXDES0_EDORR);
+	rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask);
 }
 
 static void ftgmac100_rxdes_set_dma_addr(struct ftgmac100_rxdes *rxdes,
@@ -341,18 +348,27 @@
 	return rxdes->rxdes1 & cpu_to_le32(FTGMAC100_RXDES1_IP_CHKSUM_ERR);
 }
 
+static inline struct page **ftgmac100_rxdes_page_slot(struct ftgmac100 *priv,
+						      struct ftgmac100_rxdes *rxdes)
+{
+	return &priv->rx_pages[rxdes - priv->descs->rxdes];
+}
+
 /*
  * rxdes2 is not used by hardware. We use it to keep track of page.
  * Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu().
  */
-static void ftgmac100_rxdes_set_page(struct ftgmac100_rxdes *rxdes, struct page *page)
+static void ftgmac100_rxdes_set_page(struct ftgmac100 *priv,
+				     struct ftgmac100_rxdes *rxdes,
+				     struct page *page)
 {
-	rxdes->rxdes2 = (unsigned int)page;
+	*ftgmac100_rxdes_page_slot(priv, rxdes) = page;
 }
 
-static struct page *ftgmac100_rxdes_get_page(struct ftgmac100_rxdes *rxdes)
+static struct page *ftgmac100_rxdes_get_page(struct ftgmac100 *priv,
+					     struct ftgmac100_rxdes *rxdes)
 {
-	return (struct page *)rxdes->rxdes2;
+	return *ftgmac100_rxdes_page_slot(priv, rxdes);
 }
 
 /******************************************************************************
@@ -382,7 +398,7 @@
 		if (ftgmac100_rxdes_first_segment(rxdes))
 			return rxdes;
 
-		ftgmac100_rxdes_set_dma_own(rxdes);
+		ftgmac100_rxdes_set_dma_own(priv, rxdes);
 		ftgmac100_rx_pointer_advance(priv);
 		rxdes = ftgmac100_current_rxdes(priv);
 	}
@@ -453,7 +469,7 @@
 		if (ftgmac100_rxdes_last_segment(rxdes))
 			done = true;
 
-		ftgmac100_rxdes_set_dma_own(rxdes);
+		ftgmac100_rxdes_set_dma_own(priv, rxdes);
 		ftgmac100_rx_pointer_advance(priv);
 		rxdes = ftgmac100_current_rxdes(priv);
 	} while (!done && ftgmac100_rxdes_packet_ready(rxdes));
@@ -501,7 +517,7 @@
 
 	do {
 		dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes);
-		struct page *page = ftgmac100_rxdes_get_page(rxdes);
+		struct page *page = ftgmac100_rxdes_get_page(priv, rxdes);
 		unsigned int size;
 
 		dma_unmap_page(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE);
@@ -545,10 +561,11 @@
 /******************************************************************************
  * internal functions (transmit descriptor)
  *****************************************************************************/
-static void ftgmac100_txdes_reset(struct ftgmac100_txdes *txdes)
+static void ftgmac100_txdes_reset(const struct ftgmac100 *priv,
+				  struct ftgmac100_txdes *txdes)
 {
 	/* clear all except end of ring bit */
-	txdes->txdes0 &= cpu_to_le32(FTGMAC100_TXDES0_EDOTR);
+	txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask);
 	txdes->txdes1 = 0;
 	txdes->txdes2 = 0;
 	txdes->txdes3 = 0;
@@ -569,9 +586,10 @@
 	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN);
 }
 
-static void ftgmac100_txdes_set_end_of_ring(struct ftgmac100_txdes *txdes)
+static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv,
+					    struct ftgmac100_txdes *txdes)
 {
-	txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_EDOTR);
+	txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask);
 }
 
 static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes)
@@ -690,7 +708,7 @@
 
 	dev_kfree_skb(skb);
 
-	ftgmac100_txdes_reset(txdes);
+	ftgmac100_txdes_reset(priv, txdes);
 
 	ftgmac100_tx_clean_pointer_advance(priv);
 
@@ -779,9 +797,9 @@
 		return -ENOMEM;
 	}
 
-	ftgmac100_rxdes_set_page(rxdes, page);
+	ftgmac100_rxdes_set_page(priv, rxdes, page);
 	ftgmac100_rxdes_set_dma_addr(rxdes, map);
-	ftgmac100_rxdes_set_dma_own(rxdes);
+	ftgmac100_rxdes_set_dma_own(priv, rxdes);
 	return 0;
 }
 
@@ -791,7 +809,7 @@
 
 	for (i = 0; i < RX_QUEUE_ENTRIES; i++) {
 		struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i];
-		struct page *page = ftgmac100_rxdes_get_page(rxdes);
+		struct page *page = ftgmac100_rxdes_get_page(priv, rxdes);
 		dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes);
 
 		if (!page)
@@ -828,7 +846,8 @@
 		return -ENOMEM;
 
 	/* initialize RX ring */
-	ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]);
+	ftgmac100_rxdes_set_end_of_ring(priv,
+					&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]);
 
 	for (i = 0; i < RX_QUEUE_ENTRIES; i++) {
 		struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i];
@@ -838,7 +857,8 @@
 	}
 
 	/* initialize TX ring */
-	ftgmac100_txdes_set_end_of_ring(&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]);
+	ftgmac100_txdes_set_end_of_ring(priv,
+					&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]);
 	return 0;
 
 err:
@@ -1055,14 +1075,12 @@
 	}
 
 	if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF |
-			FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR |
-			FTGMAC100_INT_PHYSTS_CHG)) {
+			FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR)) {
 		if (net_ratelimit())
-			netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status,
+			netdev_info(netdev, "[ISR] = 0x%x: %s%s%s\n", status,
 				    status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "",
 				    status & FTGMAC100_INT_RPKT_LOST ? "RPKT_LOST " : "",
-				    status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "",
-				    status & FTGMAC100_INT_PHYSTS_CHG ? "PHYSTS_CHG" : "");
+				    status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "");
 
 		if (status & FTGMAC100_INT_NO_RXBUF) {
 			/* RX buffer unavailable */
@@ -1092,6 +1110,7 @@
 static int ftgmac100_open(struct net_device *netdev)
 {
 	struct ftgmac100 *priv = netdev_priv(netdev);
+	unsigned int status;
 	int err;
 
 	err = ftgmac100_alloc_buffers(priv);
@@ -1117,6 +1136,11 @@
 
 	ftgmac100_init_hw(priv);
 	ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10);
+
+	/* Clear stale interrupts */
+	status = ioread32(priv->base + FTGMAC100_OFFSET_ISR);
+	iowrite32(status, priv->base + FTGMAC100_OFFSET_ISR);
+
 	if (netdev->phydev)
 		phy_start(netdev->phydev);
 	else if (priv->use_ncsi)
@@ -1166,6 +1190,8 @@
 	napi_disable(&priv->napi);
 	if (netdev->phydev)
 		phy_stop(netdev->phydev);
+	else if (priv->use_ncsi)
+		ncsi_stop_dev(priv->ndev);
 
 	ftgmac100_stop_hw(priv);
 	free_irq(priv->irq, netdev);
@@ -1226,12 +1252,21 @@
 	struct ftgmac100 *priv = netdev_priv(netdev);
 	struct platform_device *pdev = to_platform_device(priv->dev);
 	int i, err = 0;
+	u32 reg;
 
 	/* initialize mdio bus */
 	priv->mii_bus = mdiobus_alloc();
 	if (!priv->mii_bus)
 		return -EIO;
 
+	if (of_machine_is_compatible("aspeed,ast2400") ||
+	    of_machine_is_compatible("aspeed,ast2500")) {
+		/* This driver supports the old MDIO interface */
+		reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR);
+		reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE;
+		iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR);
+	};
+
 	priv->mii_bus->name = "ftgmac100_mdio";
 	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d",
 		 pdev->name, pdev->id);
@@ -1355,9 +1390,18 @@
 			      FTGMAC100_INT_XPKT_ETH |
 			      FTGMAC100_INT_XPKT_LOST |
 			      FTGMAC100_INT_AHB_ERR |
-			      FTGMAC100_INT_PHYSTS_CHG |
 			      FTGMAC100_INT_RPKT_BUF |
 			      FTGMAC100_INT_NO_RXBUF);
+
+	if (of_machine_is_compatible("aspeed,ast2400") ||
+	    of_machine_is_compatible("aspeed,ast2500")) {
+		priv->rxdes0_edorr_mask = BIT(30);
+		priv->txdes0_edotr_mask = BIT(30);
+	} else {
+		priv->rxdes0_edorr_mask = BIT(15);
+		priv->txdes0_edotr_mask = BIT(15);
+	}
+
 	if (pdev->dev.of_node &&
 	    of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) {
 		if (!IS_ENABLED(CONFIG_NET_NCSI)) {
@@ -1367,7 +1411,6 @@
 
 		dev_info(&pdev->dev, "Using NCSI interface\n");
 		priv->use_ncsi = true;
-		priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG;
 		priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler);
 		if (!priv->ndev)
 			goto err_ncsi_dev;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
index 13408d4..a7ce0ac 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.h
+++ b/drivers/net/ethernet/faraday/ftgmac100.h
@@ -134,6 +134,11 @@
 #define FTGMAC100_DMAFIFOS_TXDMA_REQ		(1 << 31)
 
 /*
+ * Feature Register
+ */
+#define FTGMAC100_REVR_NEW_MDIO_INTERFACE	BIT(31)
+
+/*
  * Receive buffer size register
  */
 #define FTGMAC100_RBSR_SIZE(x)		((x) & 0x3fff)
@@ -152,6 +157,7 @@
 #define FTGMAC100_MACCR_FULLDUP		(1 << 8)
 #define FTGMAC100_MACCR_GIGA_MODE	(1 << 9)
 #define FTGMAC100_MACCR_CRC_APD		(1 << 10)
+#define FTGMAC100_MACCR_PHY_LINK_LEVEL	(1 << 11)
 #define FTGMAC100_MACCR_RX_RUNT		(1 << 12)
 #define FTGMAC100_MACCR_JUMBO_LF	(1 << 13)
 #define FTGMAC100_MACCR_RX_ALL		(1 << 14)
@@ -189,7 +195,6 @@
 } __attribute__ ((aligned(16)));
 
 #define FTGMAC100_TXDES0_TXBUF_SIZE(x)	((x) & 0x3fff)
-#define FTGMAC100_TXDES0_EDOTR		(1 << 15)
 #define FTGMAC100_TXDES0_CRC_ERR	(1 << 19)
 #define FTGMAC100_TXDES0_LTS		(1 << 28)
 #define FTGMAC100_TXDES0_FTS		(1 << 29)
@@ -215,7 +220,6 @@
 } __attribute__ ((aligned(16)));
 
 #define FTGMAC100_RXDES0_VDBC		0x3fff
-#define FTGMAC100_RXDES0_EDORR		(1 << 15)
 #define FTGMAC100_RXDES0_MULTICAST	(1 << 16)
 #define FTGMAC100_RXDES0_BROADCAST	(1 << 17)
 #define FTGMAC100_RXDES0_RX_ERR		(1 << 18)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 01f7e81..48a033e 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -89,10 +89,10 @@
 		.driver_data = 0,
 	}, {
 		.name = "imx25-fec",
-		.driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_HAS_RACC,
+		.driver_data = FEC_QUIRK_USE_GASKET,
 	}, {
 		.name = "imx27-fec",
-		.driver_data = FEC_QUIRK_HAS_RACC,
+		.driver_data = 0,
 	}, {
 		.name = "imx28-fec",
 		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME |
@@ -180,6 +180,7 @@
 /* FEC receive acceleration */
 #define FEC_RACC_IPDIS		(1 << 1)
 #define FEC_RACC_PRODIS		(1 << 2)
+#define FEC_RACC_SHIFT16	BIT(7)
 #define FEC_RACC_OPTIONS	(FEC_RACC_IPDIS | FEC_RACC_PRODIS)
 
 /*
@@ -912,13 +913,11 @@
 	 * enet-mac reset will reset mac address registers too,
 	 * so need to reconfigure it.
 	 */
-	if (fep->quirks & FEC_QUIRK_ENET_MAC) {
-		memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN);
-		writel((__force u32)cpu_to_be32(temp_mac[0]),
-		       fep->hwp + FEC_ADDR_LOW);
-		writel((__force u32)cpu_to_be32(temp_mac[1]),
-		       fep->hwp + FEC_ADDR_HIGH);
-	}
+	memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN);
+	writel((__force u32)cpu_to_be32(temp_mac[0]),
+	       fep->hwp + FEC_ADDR_LOW);
+	writel((__force u32)cpu_to_be32(temp_mac[1]),
+	       fep->hwp + FEC_ADDR_HIGH);
 
 	/* Clear any outstanding interrupt. */
 	writel(0xffffffff, fep->hwp + FEC_IEVENT);
@@ -945,9 +944,11 @@
 
 #if !defined(CONFIG_M5272)
 	if (fep->quirks & FEC_QUIRK_HAS_RACC) {
-		/* set RX checksum */
 		val = readl(fep->hwp + FEC_RACC);
+		/* align IP header */
+		val |= FEC_RACC_SHIFT16;
 		if (fep->csum_flags & FLAG_RX_CSUM_ENABLED)
+			/* set RX checksum */
 			val |= FEC_RACC_OPTIONS;
 		else
 			val &= ~FEC_RACC_OPTIONS;
@@ -1428,6 +1429,12 @@
 		prefetch(skb->data - NET_IP_ALIGN);
 		skb_put(skb, pkt_len - 4);
 		data = skb->data;
+
+#if !defined(CONFIG_M5272)
+		if (fep->quirks & FEC_QUIRK_HAS_RACC)
+			data = skb_pull_inline(skb, 2);
+#endif
+
 		if (!is_copybreak && need_swap)
 			swap_buffer(data, pkt_len);
 
@@ -2887,7 +2894,7 @@
  * this kind of feature?).
  */
 
-#define HASH_BITS	6		/* #bits in hash */
+#define FEC_HASH_BITS	6		/* #bits in hash */
 #define CRC32_POLY	0xEDB88320
 
 static void set_multicast_list(struct net_device *ndev)
@@ -2935,10 +2942,10 @@
 			}
 		}
 
-		/* only upper 6 bits (HASH_BITS) are used
+		/* only upper 6 bits (FEC_HASH_BITS) are used
 		 * which point to specific bit in he hash registers
 		 */
-		hash = (crc >> (32 - HASH_BITS)) & 0x3f;
+		hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f;
 
 		if (hash > 31) {
 			tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
diff --git a/drivers/net/ethernet/freescale/fman/fman_mac.h b/drivers/net/ethernet/freescale/fman/fman_mac.h
index 8ddeedb..ddf0260 100644
--- a/drivers/net/ethernet/freescale/fman/fman_mac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_mac.h
@@ -192,7 +192,7 @@
 	/* A handle to the FM object this port related to */
 	void *fm;
 	/* MDIO exceptions interrupt source - not valid for all
-	 * MACs; MUST be set to 'NO_IRQ' for MACs that don't have
+	 * MACs; MUST be set to 0 for MACs that don't have
 	 * mdio-irq, or for polling
 	 */
 	void *dev_id; /* device cookie used by the exception cbs */
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 61fd486..dc120c1 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -60,6 +60,9 @@
 MODULE_PARM_DESC(fs_enet_debug,
 		 "Freescale bitmapped debugging message enable value");
 
+#define RX_RING_SIZE	32
+#define TX_RING_SIZE	64
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void fs_enet_netpoll(struct net_device *dev);
 #endif
@@ -79,8 +82,8 @@
 		skb_reserve(skb, align - off);
 }
 
-/* NAPI receive function */
-static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
+/* NAPI function */
+static int fs_enet_napi(struct napi_struct *napi, int budget)
 {
 	struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, napi);
 	struct net_device *dev = fep->ndev;
@@ -90,147 +93,17 @@
 	int received = 0;
 	u16 pkt_len, sc;
 	int curidx;
-
-	if (budget <= 0)
-		return received;
-
-	/*
-	 * First, grab all of the stats for the incoming packet.
-	 * These get messed up if we get called due to a busy condition.
-	 */
-	bdp = fep->cur_rx;
-
-	/* clear RX status bits for napi*/
-	(*fep->ops->napi_clear_rx_event)(dev);
-
-	while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) {
-		curidx = bdp - fep->rx_bd_base;
-
-		/*
-		 * Since we have allocated space to hold a complete frame,
-		 * the last indicator should be set.
-		 */
-		if ((sc & BD_ENET_RX_LAST) == 0)
-			dev_warn(fep->dev, "rcv is not +last\n");
-
-		/*
-		 * Check for errors.
-		 */
-		if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL |
-			  BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) {
-			fep->stats.rx_errors++;
-			/* Frame too long or too short. */
-			if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
-				fep->stats.rx_length_errors++;
-			/* Frame alignment */
-			if (sc & (BD_ENET_RX_NO | BD_ENET_RX_CL))
-				fep->stats.rx_frame_errors++;
-			/* CRC Error */
-			if (sc & BD_ENET_RX_CR)
-				fep->stats.rx_crc_errors++;
-			/* FIFO overrun */
-			if (sc & BD_ENET_RX_OV)
-				fep->stats.rx_crc_errors++;
-
-			skb = fep->rx_skbuff[curidx];
-
-			dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-				DMA_FROM_DEVICE);
-
-			skbn = skb;
-
-		} else {
-			skb = fep->rx_skbuff[curidx];
-
-			dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-				DMA_FROM_DEVICE);
-
-			/*
-			 * Process the incoming frame.
-			 */
-			fep->stats.rx_packets++;
-			pkt_len = CBDR_DATLEN(bdp) - 4;	/* remove CRC */
-			fep->stats.rx_bytes += pkt_len + 4;
-
-			if (pkt_len <= fpi->rx_copybreak) {
-				/* +2 to make IP header L1 cache aligned */
-				skbn = netdev_alloc_skb(dev, pkt_len + 2);
-				if (skbn != NULL) {
-					skb_reserve(skbn, 2);	/* align IP header */
-					skb_copy_from_linear_data(skb,
-						      skbn->data, pkt_len);
-					swap(skb, skbn);
-				}
-			} else {
-				skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE);
-
-				if (skbn)
-					skb_align(skbn, ENET_RX_ALIGN);
-			}
-
-			if (skbn != NULL) {
-				skb_put(skb, pkt_len);	/* Make room */
-				skb->protocol = eth_type_trans(skb, dev);
-				received++;
-				netif_receive_skb(skb);
-			} else {
-				fep->stats.rx_dropped++;
-				skbn = skb;
-			}
-		}
-
-		fep->rx_skbuff[curidx] = skbn;
-		CBDW_BUFADDR(bdp, dma_map_single(fep->dev, skbn->data,
-			     L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-			     DMA_FROM_DEVICE));
-		CBDW_DATLEN(bdp, 0);
-		CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY);
-
-		/*
-		 * Update BD pointer to next entry.
-		 */
-		if ((sc & BD_ENET_RX_WRAP) == 0)
-			bdp++;
-		else
-			bdp = fep->rx_bd_base;
-
-		(*fep->ops->rx_bd_done)(dev);
-
-		if (received >= budget)
-			break;
-	}
-
-	fep->cur_rx = bdp;
-
-	if (received < budget) {
-		/* done */
-		napi_complete(napi);
-		(*fep->ops->napi_enable_rx)(dev);
-	}
-	return received;
-}
-
-static int fs_enet_tx_napi(struct napi_struct *napi, int budget)
-{
-	struct fs_enet_private *fep = container_of(napi, struct fs_enet_private,
-						   napi_tx);
-	struct net_device *dev = fep->ndev;
-	cbd_t __iomem *bdp;
-	struct sk_buff *skb;
 	int dirtyidx, do_wake, do_restart;
-	u16 sc;
-	int has_tx_work = 0;
+	int tx_left = TX_RING_SIZE;
 
 	spin_lock(&fep->tx_lock);
 	bdp = fep->dirty_tx;
 
-	/* clear TX status bits for napi*/
-	(*fep->ops->napi_clear_tx_event)(dev);
+	/* clear status bits for napi*/
+	(*fep->ops->napi_clear_event)(dev);
 
 	do_wake = do_restart = 0;
-	while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0) {
+	while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0 && tx_left) {
 		dirtyidx = bdp - fep->tx_bd_base;
 
 		if (fep->tx_free == fep->tx_ring)
@@ -302,9 +175,9 @@
 		 * Since we have freed up a buffer, the ring is no longer
 		 * full.
 		 */
-		if (++fep->tx_free >= MAX_SKB_FRAGS)
+		if (++fep->tx_free == MAX_SKB_FRAGS)
 			do_wake = 1;
-		has_tx_work = 1;
+		tx_left--;
 	}
 
 	fep->dirty_tx = bdp;
@@ -312,19 +185,129 @@
 	if (do_restart)
 		(*fep->ops->tx_restart)(dev);
 
-	if (!has_tx_work) {
-		napi_complete(napi);
-		(*fep->ops->napi_enable_tx)(dev);
-	}
-
 	spin_unlock(&fep->tx_lock);
 
 	if (do_wake)
 		netif_wake_queue(dev);
 
-	if (has_tx_work)
-		return budget;
-	return 0;
+	/*
+	 * First, grab all of the stats for the incoming packet.
+	 * These get messed up if we get called due to a busy condition.
+	 */
+	bdp = fep->cur_rx;
+
+	while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0 &&
+	       received < budget) {
+		curidx = bdp - fep->rx_bd_base;
+
+		/*
+		 * Since we have allocated space to hold a complete frame,
+		 * the last indicator should be set.
+		 */
+		if ((sc & BD_ENET_RX_LAST) == 0)
+			dev_warn(fep->dev, "rcv is not +last\n");
+
+		/*
+		 * Check for errors.
+		 */
+		if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL |
+			  BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) {
+			fep->stats.rx_errors++;
+			/* Frame too long or too short. */
+			if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
+				fep->stats.rx_length_errors++;
+			/* Frame alignment */
+			if (sc & (BD_ENET_RX_NO | BD_ENET_RX_CL))
+				fep->stats.rx_frame_errors++;
+			/* CRC Error */
+			if (sc & BD_ENET_RX_CR)
+				fep->stats.rx_crc_errors++;
+			/* FIFO overrun */
+			if (sc & BD_ENET_RX_OV)
+				fep->stats.rx_crc_errors++;
+
+			skbn = fep->rx_skbuff[curidx];
+		} else {
+			skb = fep->rx_skbuff[curidx];
+
+			/*
+			 * Process the incoming frame.
+			 */
+			fep->stats.rx_packets++;
+			pkt_len = CBDR_DATLEN(bdp) - 4;	/* remove CRC */
+			fep->stats.rx_bytes += pkt_len + 4;
+
+			if (pkt_len <= fpi->rx_copybreak) {
+				/* +2 to make IP header L1 cache aligned */
+				skbn = netdev_alloc_skb(dev, pkt_len + 2);
+				if (skbn != NULL) {
+					skb_reserve(skbn, 2);	/* align IP header */
+					skb_copy_from_linear_data(skb,
+						      skbn->data, pkt_len);
+					swap(skb, skbn);
+					dma_sync_single_for_cpu(fep->dev,
+						CBDR_BUFADDR(bdp),
+						L1_CACHE_ALIGN(pkt_len),
+						DMA_FROM_DEVICE);
+				}
+			} else {
+				skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE);
+
+				if (skbn) {
+					dma_addr_t dma;
+
+					skb_align(skbn, ENET_RX_ALIGN);
+
+					dma_unmap_single(fep->dev,
+						CBDR_BUFADDR(bdp),
+						L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
+						DMA_FROM_DEVICE);
+
+					dma = dma_map_single(fep->dev,
+						skbn->data,
+						L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
+						DMA_FROM_DEVICE);
+					CBDW_BUFADDR(bdp, dma);
+				}
+			}
+
+			if (skbn != NULL) {
+				skb_put(skb, pkt_len);	/* Make room */
+				skb->protocol = eth_type_trans(skb, dev);
+				received++;
+				netif_receive_skb(skb);
+			} else {
+				fep->stats.rx_dropped++;
+				skbn = skb;
+			}
+		}
+
+		fep->rx_skbuff[curidx] = skbn;
+		CBDW_DATLEN(bdp, 0);
+		CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY);
+
+		/*
+		 * Update BD pointer to next entry.
+		 */
+		if ((sc & BD_ENET_RX_WRAP) == 0)
+			bdp++;
+		else
+			bdp = fep->rx_bd_base;
+
+		(*fep->ops->rx_bd_done)(dev);
+	}
+
+	fep->cur_rx = bdp;
+
+	if (received < budget && tx_left) {
+		/* done */
+		napi_complete(napi);
+		(*fep->ops->napi_enable)(dev);
+
+		return received;
+	}
+
+	return budget;
 }
 
 /*
@@ -350,18 +333,18 @@
 		nr++;
 
 		int_clr_events = int_events;
-		int_clr_events &= ~fep->ev_napi_rx;
+		int_clr_events &= ~fep->ev_napi;
 
 		(*fep->ops->clear_int_events)(dev, int_clr_events);
 
 		if (int_events & fep->ev_err)
 			(*fep->ops->ev_error)(dev, int_events);
 
-		if (int_events & fep->ev_rx) {
+		if (int_events & fep->ev) {
 			napi_ok = napi_schedule_prep(&fep->napi);
 
-			(*fep->ops->napi_disable_rx)(dev);
-			(*fep->ops->clear_int_events)(dev, fep->ev_napi_rx);
+			(*fep->ops->napi_disable)(dev);
+			(*fep->ops->clear_int_events)(dev, fep->ev_napi);
 
 			/* NOTE: it is possible for FCCs in NAPI mode    */
 			/* to submit a spurious interrupt while in poll  */
@@ -369,17 +352,6 @@
 				__napi_schedule(&fep->napi);
 		}
 
-		if (int_events & fep->ev_tx) {
-			napi_ok = napi_schedule_prep(&fep->napi_tx);
-
-			(*fep->ops->napi_disable_tx)(dev);
-			(*fep->ops->clear_int_events)(dev, fep->ev_napi_tx);
-
-			/* NOTE: it is possible for FCCs in NAPI mode    */
-			/* to submit a spurious interrupt while in poll  */
-			if (napi_ok)
-				__napi_schedule(&fep->napi_tx);
-		}
 	}
 
 	handled = nr > 0;
@@ -659,7 +631,8 @@
 	}
 
 	phy_start(dev->phydev);
-	wake = fep->tx_free && !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY);
+	wake = fep->tx_free >= MAX_SKB_FRAGS &&
+	       !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY);
 	spin_unlock_irqrestore(&fep->lock, flags);
 
 	if (wake)
@@ -751,11 +724,10 @@
 	int err;
 
 	/* to initialize the fep->cur_rx,... */
-	/* not doing this, will cause a crash in fs_enet_rx_napi */
+	/* not doing this, will cause a crash in fs_enet_napi */
 	fs_init_bds(fep->ndev);
 
 	napi_enable(&fep->napi);
-	napi_enable(&fep->napi_tx);
 
 	/* Install our interrupt handler. */
 	r = request_irq(fep->interrupt, fs_enet_interrupt, IRQF_SHARED,
@@ -763,7 +735,6 @@
 	if (r != 0) {
 		dev_err(fep->dev, "Could not allocate FS_ENET IRQ!");
 		napi_disable(&fep->napi);
-		napi_disable(&fep->napi_tx);
 		return -EINVAL;
 	}
 
@@ -771,7 +742,6 @@
 	if (err) {
 		free_irq(fep->interrupt, dev);
 		napi_disable(&fep->napi);
-		napi_disable(&fep->napi_tx);
 		return err;
 	}
 	phy_start(dev->phydev);
@@ -789,7 +759,6 @@
 	netif_stop_queue(dev);
 	netif_carrier_off(dev);
 	napi_disable(&fep->napi);
-	napi_disable(&fep->napi_tx);
 	phy_stop(dev->phydev);
 
 	spin_lock_irqsave(&fep->lock, flags);
@@ -861,6 +830,44 @@
 	fep->msg_enable = value;
 }
 
+static int fs_get_tunable(struct net_device *dev,
+			  const struct ethtool_tunable *tuna, void *data)
+{
+	struct fs_enet_private *fep = netdev_priv(dev);
+	struct fs_platform_info *fpi = fep->fpi;
+	int ret = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		*(u32 *)data = fpi->rx_copybreak;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int fs_set_tunable(struct net_device *dev,
+			  const struct ethtool_tunable *tuna, const void *data)
+{
+	struct fs_enet_private *fep = netdev_priv(dev);
+	struct fs_platform_info *fpi = fep->fpi;
+	int ret = 0;
+
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		fpi->rx_copybreak = *(u32 *)data;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
 static const struct ethtool_ops fs_ethtool_ops = {
 	.get_drvinfo = fs_get_drvinfo,
 	.get_regs_len = fs_get_regs_len,
@@ -872,6 +879,8 @@
 	.get_ts_info = ethtool_op_get_ts_info,
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_tunable = fs_get_tunable,
+	.set_tunable = fs_set_tunable,
 };
 
 static int fs_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -939,8 +948,8 @@
 		fpi->cp_command = *data;
 	}
 
-	fpi->rx_ring = 32;
-	fpi->tx_ring = 64;
+	fpi->rx_ring = RX_RING_SIZE;
+	fpi->tx_ring = TX_RING_SIZE;
 	fpi->rx_copybreak = 240;
 	fpi->napi_weight = 17;
 	fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0);
@@ -1024,8 +1033,7 @@
 
 	ndev->netdev_ops = &fs_enet_netdev_ops;
 	ndev->watchdog_timeo = 2 * HZ;
-	netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight);
-	netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2);
+	netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight);
 
 	ndev->ethtool_ops = &fs_ethtool_ops;
 
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index e29f54a..fee24c8 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -81,12 +81,9 @@
 	void (*adjust_link)(struct net_device *dev);
 	void (*restart)(struct net_device *dev);
 	void (*stop)(struct net_device *dev);
-	void (*napi_clear_rx_event)(struct net_device *dev);
-	void (*napi_enable_rx)(struct net_device *dev);
-	void (*napi_disable_rx)(struct net_device *dev);
-	void (*napi_clear_tx_event)(struct net_device *dev);
-	void (*napi_enable_tx)(struct net_device *dev);
-	void (*napi_disable_tx)(struct net_device *dev);
+	void (*napi_clear_event)(struct net_device *dev);
+	void (*napi_enable)(struct net_device *dev);
+	void (*napi_disable)(struct net_device *dev);
 	void (*rx_bd_done)(struct net_device *dev);
 	void (*tx_kickstart)(struct net_device *dev);
 	u32 (*get_int_events)(struct net_device *dev);
@@ -122,7 +119,6 @@
 
 struct fs_enet_private {
 	struct napi_struct napi;
-	struct napi_struct napi_tx;
 	struct device *dev;	/* pointer back to the device (must be initialized first) */
 	struct net_device *ndev;
 	spinlock_t lock;	/* during all ops except TX pckt processing */
@@ -152,10 +148,8 @@
 	int oldduplex, oldspeed, oldlink;	/* current settings */
 
 	/* event masks */
-	u32 ev_napi_rx;		/* mask of NAPI rx events */
-	u32 ev_napi_tx;		/* mask of NAPI rx events */
-	u32 ev_rx;		/* rx event mask          */
-	u32 ev_tx;		/* tx event mask          */
+	u32 ev_napi;		/* mask of NAPI events */
+	u32 ev;			/* event mask          */
 	u32 ev_err;		/* error event mask       */
 
 	u16 bd_rx_empty;	/* mask of BD rx empty	  */
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
index d71761a..120c758 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c
@@ -90,7 +90,7 @@
 	int ret = -EINVAL;
 
 	fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-	if (fep->interrupt == NO_IRQ)
+	if (!fep->interrupt)
 		goto out;
 
 	fep->fcc.fccp = of_iomap(ofdev->dev.of_node, 0);
@@ -124,10 +124,8 @@
 	return ret;
 }
 
-#define FCC_NAPI_RX_EVENT_MSK	(FCC_ENET_RXF | FCC_ENET_RXB)
-#define FCC_NAPI_TX_EVENT_MSK	(FCC_ENET_TXB)
-#define FCC_RX_EVENT		(FCC_ENET_RXF)
-#define FCC_TX_EVENT		(FCC_ENET_TXB)
+#define FCC_NAPI_EVENT_MSK	(FCC_ENET_RXF | FCC_ENET_RXB | FCC_ENET_TXB)
+#define FCC_EVENT		(FCC_ENET_RXF | FCC_ENET_TXB)
 #define FCC_ERR_EVENT_MSK	(FCC_ENET_TXE)
 
 static int setup_data(struct net_device *dev)
@@ -137,10 +135,8 @@
 	if (do_pd_setup(fep) != 0)
 		return -EINVAL;
 
-	fep->ev_napi_rx = FCC_NAPI_RX_EVENT_MSK;
-	fep->ev_napi_tx = FCC_NAPI_TX_EVENT_MSK;
-	fep->ev_rx = FCC_RX_EVENT;
-	fep->ev_tx = FCC_TX_EVENT;
+	fep->ev_napi = FCC_NAPI_EVENT_MSK;
+	fep->ev = FCC_EVENT;
 	fep->ev_err = FCC_ERR_EVENT_MSK;
 
 	return 0;
@@ -424,52 +420,28 @@
 	fs_cleanup_bds(dev);
 }
 
-static void napi_clear_rx_event(struct net_device *dev)
+static void napi_clear_event_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	fcc_t __iomem *fccp = fep->fcc.fccp;
 
-	W16(fccp, fcc_fcce, FCC_NAPI_RX_EVENT_MSK);
+	W16(fccp, fcc_fcce, FCC_NAPI_EVENT_MSK);
 }
 
-static void napi_enable_rx(struct net_device *dev)
+static void napi_enable_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	fcc_t __iomem *fccp = fep->fcc.fccp;
 
-	S16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK);
+	S16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK);
 }
 
-static void napi_disable_rx(struct net_device *dev)
+static void napi_disable_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	fcc_t __iomem *fccp = fep->fcc.fccp;
 
-	C16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK);
-}
-
-static void napi_clear_tx_event(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	fcc_t __iomem *fccp = fep->fcc.fccp;
-
-	W16(fccp, fcc_fcce, FCC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_enable_tx(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	fcc_t __iomem *fccp = fep->fcc.fccp;
-
-	S16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_disable_tx(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	fcc_t __iomem *fccp = fep->fcc.fccp;
-
-	C16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK);
+	C16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK);
 }
 
 static void rx_bd_done(struct net_device *dev)
@@ -595,12 +567,9 @@
 	.set_multicast_list	= set_multicast_list,
 	.restart		= restart,
 	.stop			= stop,
-	.napi_clear_rx_event	= napi_clear_rx_event,
-	.napi_enable_rx		= napi_enable_rx,
-	.napi_disable_rx	= napi_disable_rx,
-	.napi_clear_tx_event	= napi_clear_tx_event,
-	.napi_enable_tx		= napi_enable_tx,
-	.napi_disable_tx	= napi_disable_tx,
+	.napi_clear_event	= napi_clear_event_fs,
+	.napi_enable		= napi_enable_fs,
+	.napi_disable		= napi_disable_fs,
 	.rx_bd_done		= rx_bd_done,
 	.tx_kickstart		= tx_kickstart,
 	.get_int_events		= get_int_events,
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
index 35a318e..777beff 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c
@@ -99,7 +99,7 @@
 	struct platform_device *ofdev = to_platform_device(fep->dev);
 
 	fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-	if (fep->interrupt == NO_IRQ)
+	if (!fep->interrupt)
 		return -EINVAL;
 
 	fep->fec.fecp = of_iomap(ofdev->dev.of_node, 0);
@@ -109,10 +109,8 @@
 	return 0;
 }
 
-#define FEC_NAPI_RX_EVENT_MSK	(FEC_ENET_RXF | FEC_ENET_RXB)
-#define FEC_NAPI_TX_EVENT_MSK	(FEC_ENET_TXF)
-#define FEC_RX_EVENT		(FEC_ENET_RXF)
-#define FEC_TX_EVENT		(FEC_ENET_TXF)
+#define FEC_NAPI_EVENT_MSK	(FEC_ENET_RXF | FEC_ENET_RXB | FEC_ENET_TXF)
+#define FEC_EVENT		(FEC_ENET_RXF | FEC_ENET_TXF)
 #define FEC_ERR_EVENT_MSK	(FEC_ENET_HBERR | FEC_ENET_BABR | \
 				 FEC_ENET_BABT | FEC_ENET_EBERR)
 
@@ -126,10 +124,8 @@
 	fep->fec.hthi = 0;
 	fep->fec.htlo = 0;
 
-	fep->ev_napi_rx = FEC_NAPI_RX_EVENT_MSK;
-	fep->ev_napi_tx = FEC_NAPI_TX_EVENT_MSK;
-	fep->ev_rx = FEC_RX_EVENT;
-	fep->ev_tx = FEC_TX_EVENT;
+	fep->ev_napi = FEC_NAPI_EVENT_MSK;
+	fep->ev = FEC_EVENT;
 	fep->ev_err = FEC_ERR_EVENT_MSK;
 
 	return 0;
@@ -396,52 +392,28 @@
 	}
 }
 
-static void napi_clear_rx_event(struct net_device *dev)
+static void napi_clear_event_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	struct fec __iomem *fecp = fep->fec.fecp;
 
-	FW(fecp, ievent, FEC_NAPI_RX_EVENT_MSK);
+	FW(fecp, ievent, FEC_NAPI_EVENT_MSK);
 }
 
-static void napi_enable_rx(struct net_device *dev)
+static void napi_enable_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	struct fec __iomem *fecp = fep->fec.fecp;
 
-	FS(fecp, imask, FEC_NAPI_RX_EVENT_MSK);
+	FS(fecp, imask, FEC_NAPI_EVENT_MSK);
 }
 
-static void napi_disable_rx(struct net_device *dev)
+static void napi_disable_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	struct fec __iomem *fecp = fep->fec.fecp;
 
-	FC(fecp, imask, FEC_NAPI_RX_EVENT_MSK);
-}
-
-static void napi_clear_tx_event(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	struct fec __iomem *fecp = fep->fec.fecp;
-
-	FW(fecp, ievent, FEC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_enable_tx(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	struct fec __iomem *fecp = fep->fec.fecp;
-
-	FS(fecp, imask, FEC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_disable_tx(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	struct fec __iomem *fecp = fep->fec.fecp;
-
-	FC(fecp, imask, FEC_NAPI_TX_EVENT_MSK);
+	FC(fecp, imask, FEC_NAPI_EVENT_MSK);
 }
 
 static void rx_bd_done(struct net_device *dev)
@@ -513,12 +485,9 @@
 	.set_multicast_list	= set_multicast_list,
 	.restart		= restart,
 	.stop			= stop,
-	.napi_clear_rx_event	= napi_clear_rx_event,
-	.napi_enable_rx		= napi_enable_rx,
-	.napi_disable_rx	= napi_disable_rx,
-	.napi_clear_tx_event	= napi_clear_tx_event,
-	.napi_enable_tx		= napi_enable_tx,
-	.napi_disable_tx	= napi_disable_tx,
+	.napi_clear_event	= napi_clear_event_fs,
+	.napi_enable		= napi_enable_fs,
+	.napi_disable		= napi_disable_fs,
 	.rx_bd_done		= rx_bd_done,
 	.tx_kickstart		= tx_kickstart,
 	.get_int_events		= get_int_events,
diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
index e8b9c33..15abd37 100644
--- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
+++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c
@@ -99,7 +99,7 @@
 	struct platform_device *ofdev = to_platform_device(fep->dev);
 
 	fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0);
-	if (fep->interrupt == NO_IRQ)
+	if (!fep->interrupt)
 		return -EINVAL;
 
 	fep->scc.sccp = of_iomap(ofdev->dev.of_node, 0);
@@ -115,10 +115,8 @@
 	return 0;
 }
 
-#define SCC_NAPI_RX_EVENT_MSK	(SCCE_ENET_RXF | SCCE_ENET_RXB)
-#define SCC_NAPI_TX_EVENT_MSK	(SCCE_ENET_TXB)
-#define SCC_RX_EVENT		(SCCE_ENET_RXF)
-#define SCC_TX_EVENT		(SCCE_ENET_TXB)
+#define SCC_NAPI_EVENT_MSK	(SCCE_ENET_RXF | SCCE_ENET_RXB | SCCE_ENET_TXB)
+#define SCC_EVENT		(SCCE_ENET_RXF | SCCE_ENET_TXB)
 #define SCC_ERR_EVENT_MSK	(SCCE_ENET_TXE | SCCE_ENET_BSY)
 
 static int setup_data(struct net_device *dev)
@@ -130,10 +128,8 @@
 	fep->scc.hthi = 0;
 	fep->scc.htlo = 0;
 
-	fep->ev_napi_rx = SCC_NAPI_RX_EVENT_MSK;
-	fep->ev_napi_tx = SCC_NAPI_TX_EVENT_MSK;
-	fep->ev_rx = SCC_RX_EVENT;
-	fep->ev_tx = SCC_TX_EVENT | SCCE_ENET_TXE;
+	fep->ev_napi = SCC_NAPI_EVENT_MSK;
+	fep->ev = SCC_EVENT | SCCE_ENET_TXE;
 	fep->ev_err = SCC_ERR_EVENT_MSK;
 
 	return 0;
@@ -379,52 +375,28 @@
 	fs_cleanup_bds(dev);
 }
 
-static void napi_clear_rx_event(struct net_device *dev)
+static void napi_clear_event_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	scc_t __iomem *sccp = fep->scc.sccp;
 
-	W16(sccp, scc_scce, SCC_NAPI_RX_EVENT_MSK);
+	W16(sccp, scc_scce, SCC_NAPI_EVENT_MSK);
 }
 
-static void napi_enable_rx(struct net_device *dev)
+static void napi_enable_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	scc_t __iomem *sccp = fep->scc.sccp;
 
-	S16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK);
+	S16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK);
 }
 
-static void napi_disable_rx(struct net_device *dev)
+static void napi_disable_fs(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
 	scc_t __iomem *sccp = fep->scc.sccp;
 
-	C16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK);
-}
-
-static void napi_clear_tx_event(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	scc_t __iomem *sccp = fep->scc.sccp;
-
-	W16(sccp, scc_scce, SCC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_enable_tx(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	scc_t __iomem *sccp = fep->scc.sccp;
-
-	S16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK);
-}
-
-static void napi_disable_tx(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	scc_t __iomem *sccp = fep->scc.sccp;
-
-	C16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK);
+	C16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK);
 }
 
 static void rx_bd_done(struct net_device *dev)
@@ -497,12 +469,9 @@
 	.set_multicast_list	= set_multicast_list,
 	.restart		= restart,
 	.stop			= stop,
-	.napi_clear_rx_event	= napi_clear_rx_event,
-	.napi_enable_rx		= napi_enable_rx,
-	.napi_disable_rx	= napi_disable_rx,
-	.napi_clear_tx_event	= napi_clear_tx_event,
-	.napi_enable_tx		= napi_enable_tx,
-	.napi_disable_tx	= napi_disable_tx,
+	.napi_clear_event	= napi_clear_event_fs,
+	.napi_enable		= napi_enable_fs,
+	.napi_disable		= napi_disable_fs,
 	.rx_bd_done		= rx_bd_done,
 	.tx_kickstart		= tx_kickstart,
 	.get_int_events		= get_int_events,
diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
index f3c63dc..446c7b3 100644
--- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c
+++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c
@@ -195,7 +195,7 @@
 	return 0;
 }
 
-#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
+#if IS_ENABLED(CONFIG_GIANFAR)
 /*
  * Return the TBIPA address, starting from the address
  * of the mapped GFAR MDIO registers (struct gfar)
@@ -228,7 +228,7 @@
 }
 #endif
 
-#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
+#if IS_ENABLED(CONFIG_UCC_GETH)
 /*
  * Return the TBIPAR address for a QE MDIO node, starting from the address
  * of the mapped MII registers (struct fsl_pq_mii)
@@ -306,7 +306,7 @@
 #endif
 
 static const struct of_device_id fsl_pq_mdio_match[] = {
-#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE)
+#if IS_ENABLED(CONFIG_GIANFAR)
 	{
 		.compatible = "fsl,gianfar-tbi",
 		.data = &(struct fsl_pq_mdio_data) {
@@ -344,7 +344,7 @@
 		},
 	},
 #endif
-#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE)
+#if IS_ENABLED(CONFIG_UCC_GETH)
 	{
 		.compatible = "fsl,ucc-mdio",
 		.data = &(struct fsl_pq_mdio_data) {
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index d20935d..4b4f5bc 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2922,17 +2922,25 @@
 {
 	unsigned int size = lstatus & BD_LENGTH_MASK;
 	struct page *page = rxb->page;
+	bool last = !!(lstatus & BD_LFLAG(RXBD_LAST));
 
 	/* Remove the FCS from the packet length */
-	if (likely(lstatus & BD_LFLAG(RXBD_LAST)))
+	if (last)
 		size -= ETH_FCS_LEN;
 
-	if (likely(first))
+	if (likely(first)) {
 		skb_put(skb, size);
-	else
-		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
-				rxb->page_offset + RXBUF_ALIGNMENT,
-				size, GFAR_RXB_TRUESIZE);
+	} else {
+		/* the last fragments' length contains the full frame length */
+		if (last)
+			size -= skb->len;
+
+		/* in case the last fragment consisted only of the FCS */
+		if (size > 0)
+			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+					rxb->page_offset + RXBUF_ALIGNMENT,
+					size, GFAR_RXB_TRUESIZE);
+	}
 
 	/* try reuse page */
 	if (unlikely(page_count(page) != 1))
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 373fd09..6e8a9c8 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -100,7 +100,8 @@
 #define DEFAULT_RX_LFC_THR  16
 #define DEFAULT_LFC_PTVVAL  4
 
-#define GFAR_RXB_SIZE 1536
+/* prevent fragmenation by HW in DSA environments */
+#define GFAR_RXB_SIZE roundup(1536 + 8, 64)
 #define GFAR_SKBFRAG_SIZE (RXBUF_ALIGNMENT + GFAR_RXB_SIZE \
 			  + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 #define GFAR_RXB_TRUESIZE 2048
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 5bf1ade..186ef8f 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3756,7 +3756,7 @@
 			return -EINVAL;
 		}
 		if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) {
-			pr_err("invalid rx-clock propperty\n");
+			pr_err("invalid rx-clock property\n");
 			return -EINVAL;
 		}
 		ug_info->uf_info.rx_clock = *prop;
diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 7b8fe86..e03b30c 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -271,11 +271,8 @@
 		goto err_ioremap;
 	}
 
-	if (of_get_property(pdev->dev.of_node,
-			    "little-endian", NULL))
-		priv->is_little_endian = true;
-	else
-		priv->is_little_endian = false;
+	priv->is_little_endian = of_property_read_bool(pdev->dev.of_node,
+						       "little-endian");
 
 	ret = of_mdiobus_register(bus, np);
 	if (ret) {
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index 0c4afe9..3977889 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -600,7 +600,7 @@
 	return IRQ_HANDLED;
 }
 
-enum hrtimer_restart tx_done(struct hrtimer *hrtimer)
+static enum hrtimer_restart tx_done(struct hrtimer *hrtimer)
 {
 	struct hip04_priv *priv;
 
@@ -755,13 +755,13 @@
 	strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
 }
 
-static struct ethtool_ops hip04_ethtool_ops = {
+static const struct ethtool_ops hip04_ethtool_ops = {
 	.get_coalesce		= hip04_get_coalesce,
 	.set_coalesce		= hip04_set_coalesce,
 	.get_drvinfo		= hip04_get_drvinfo,
 };
 
-static struct net_device_ops hip04_netdev_ops = {
+static const struct net_device_ops hip04_netdev_ops = {
 	.ndo_open		= hip04_mac_open,
 	.ndo_stop		= hip04_mac_stop,
 	.ndo_get_stats		= hip04_get_stats,
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index b5d7ad0..ced1859 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -699,7 +699,7 @@
 	return phy_mii_ioctl(dev->phydev, ifreq, cmd);
 }
 
-static struct ethtool_ops hisi_femac_ethtools_ops = {
+static const struct ethtool_ops hisi_femac_ethtools_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
 	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
@@ -940,8 +940,8 @@
 }
 
 #ifdef CONFIG_PM
-int hisi_femac_drv_suspend(struct platform_device *pdev,
-			   pm_message_t state)
+static int hisi_femac_drv_suspend(struct platform_device *pdev,
+				  pm_message_t state)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct hisi_femac_priv *priv = netdev_priv(ndev);
@@ -957,7 +957,7 @@
 	return 0;
 }
 
-int hisi_femac_drv_resume(struct platform_device *pdev)
+static int hisi_femac_drv_resume(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct hisi_femac_priv *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 275618bb..e69a6be 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -750,7 +750,7 @@
 	.ndo_set_mac_address	= hix5hd2_net_set_mac_address,
 };
 
-static struct ethtool_ops hix5hd2_ethtools_ops = {
+static const struct ethtool_ops hix5hd2_ethtools_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 1235c7f..1e1eb92 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -17,7 +17,7 @@
 	{"gmac_rx_octets_total_ok", MAC_STATS_FIELD_OFF(rx_good_bytes)},
 	{"gmac_rx_octets_bad", MAC_STATS_FIELD_OFF(rx_bad_bytes)},
 	{"gmac_rx_uc_pkts", MAC_STATS_FIELD_OFF(rx_uc_pkts)},
-	{"gamc_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
+	{"gmac_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)},
 	{"gmac_rx_bc_pkts", MAC_STATS_FIELD_OFF(rx_bc_pkts)},
 	{"gmac_rx_pkts_64octets", MAC_STATS_FIELD_OFF(rx_64bytes)},
 	{"gmac_rx_pkts_65to127", MAC_STATS_FIELD_OFF(rx_65to127)},
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 5c8afe1..a834774 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -684,8 +684,7 @@
 	if (!phy || IS_ERR(phy))
 		return -EIO;
 
-	if (mdio->irq)
-		phy->irq = mdio->irq[addr];
+	phy->irq = mdio->irq[addr];
 
 	/* All data is now stored in the phy struct;
 	 * register it
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index afb5daa..eb448df 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 
@@ -115,10 +116,8 @@
 
 			dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev,
 								  res);
-			if (IS_ERR(dsaf_dev->sc_base)) {
-				dev_err(dsaf_dev->dev, "subctrl can not map!\n");
+			if (IS_ERR(dsaf_dev->sc_base))
 				return PTR_ERR(dsaf_dev->sc_base);
-			}
 
 			res = platform_get_resource(pdev, IORESOURCE_MEM,
 						    res_idx++);
@@ -129,10 +128,8 @@
 
 			dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev,
 								   res);
-			if (IS_ERR(dsaf_dev->sds_base)) {
-				dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n");
+			if (IS_ERR(dsaf_dev->sds_base))
 				return PTR_ERR(dsaf_dev->sds_base);
-			}
 		} else {
 			dsaf_dev->sub_ctrl = syscon;
 		}
@@ -147,10 +144,8 @@
 		}
 	}
 	dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(dsaf_dev->ppe_base)) {
-		dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n");
+	if (IS_ERR(dsaf_dev->ppe_base))
 		return PTR_ERR(dsaf_dev->ppe_base);
-	}
 	dsaf_dev->ppe_paddr = res->start;
 
 	if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
@@ -166,10 +161,8 @@
 			}
 		}
 		dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res);
-		if (IS_ERR(dsaf_dev->io_base)) {
-			dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n");
+		if (IS_ERR(dsaf_dev->io_base))
 			return PTR_ERR(dsaf_dev->io_base);
-		}
 	}
 
 	ret = device_property_read_u32(dsaf_dev->dev, "desc-num", &desc_num);
@@ -2781,6 +2774,89 @@
 
 module_platform_driver(g_dsaf_driver);
 
+/**
+ * hns_dsaf_roce_reset - reset dsaf and roce
+ * @dsaf_fwnode: Pointer to framework node for the dasf
+ * @enable: false - request reset , true - drop reset
+ * retuen 0 - success , negative -fail
+ */
+int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable)
+{
+	struct dsaf_device *dsaf_dev;
+	struct platform_device *pdev;
+	u32 mp;
+	u32 sl;
+	u32 credit;
+	int i;
+	const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
+		{DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0},
+		{DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0},
+		{DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0},
+		{DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0},
+		{DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1},
+		{DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1},
+		{DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1},
+		{DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1},
+	};
+	const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = {
+		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0},
+		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1},
+		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2},
+		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3},
+		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0},
+		{DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1},
+		{DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2},
+		{DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3},
+	};
+
+	if (!is_of_node(dsaf_fwnode)) {
+		pr_err("hisi_dsaf: Only support DT node!\n");
+		return -EINVAL;
+	}
+	pdev = of_find_device_by_node(to_of_node(dsaf_fwnode));
+	dsaf_dev = dev_get_drvdata(&pdev->dev);
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+		dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n",
+			dsaf_dev->ae_dev.name);
+		return -ENODEV;
+	}
+
+	if (!enable) {
+		/* Reset rocee-channels in dsaf and rocee */
+		hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, false);
+		hns_dsaf_roce_srst(dsaf_dev, false);
+	} else {
+		/* Configure dsaf tx roce correspond to port map and sl map */
+		mp = dsaf_read_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG);
+		for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++)
+			dsaf_set_field(mp, 7 << i * 3, i * 3,
+				       port_map[i][DSAF_ROCE_6PORT_MODE]);
+		dsaf_set_field(mp, 3 << i * 3, i * 3, 0);
+		dsaf_write_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG, mp);
+
+		sl = dsaf_read_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG);
+		for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++)
+			dsaf_set_field(sl, 3 << i * 2, i * 2,
+				       sl_map[i][DSAF_ROCE_6PORT_MODE]);
+		dsaf_write_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG, sl);
+
+		/* De-reset rocee-channels in dsaf and rocee */
+		hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, true);
+		msleep(SRST_TIME_INTERVAL);
+		hns_dsaf_roce_srst(dsaf_dev, true);
+
+		/* Eanble dsaf channel rocee credit */
+		credit = dsaf_read_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG);
+		dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 0);
+		dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit);
+
+		dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 1);
+		dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(hns_dsaf_roce_reset);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
 MODULE_DESCRIPTION("HNS DSAF driver");
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 1daf018..f3681d5 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -43,6 +43,32 @@
 #define DSAF_PRIO_NR	8
 #define DSAF_REG_PER_ZONE	3
 
+#define DSAF_ROCE_CREDIT_CHN	8
+#define DSAF_ROCE_CHAN_MODE	3
+
+enum dsaf_roce_port_mode {
+	DSAF_ROCE_6PORT_MODE,
+	DSAF_ROCE_4PORT_MODE,
+	DSAF_ROCE_2PORT_MODE,
+	DSAF_ROCE_CHAN_MODE_NUM,
+};
+
+enum dsaf_roce_port_num {
+	DSAF_ROCE_PORT_0,
+	DSAF_ROCE_PORT_1,
+	DSAF_ROCE_PORT_2,
+	DSAF_ROCE_PORT_3,
+	DSAF_ROCE_PORT_4,
+	DSAF_ROCE_PORT_5,
+};
+
+enum dsaf_roce_qos_sl {
+	DSAF_ROCE_SL_0,
+	DSAF_ROCE_SL_1,
+	DSAF_ROCE_SL_2,
+	DSAF_ROCE_SL_3,
+};
+
 #define DSAF_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset))))
 #define HNS_DSAF_IS_DEBUG(dev) (dev->dsaf_mode == DSAF_MODE_DISABLE_SP)
 
@@ -419,6 +445,10 @@
 
 void hns_dsaf_fix_mac_mode(struct hns_mac_cb *mac_cb);
 
+void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable);
+
+void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable);
+
 int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev);
 void hns_dsaf_ae_uninit(struct dsaf_device *dsaf_dev);
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 611b67b..36b9f79 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -231,6 +231,42 @@
 	dsaf_write_sub(dsaf_dev, reg_addr, reg_val);
 }
 
+/**
+ * hns_dsaf_srst_chns - reset dsaf channels
+ * @dsaf_dev: dsaf device struct pointer
+ * @msk: xbar channels mask value:
+ * bit0-5 for xge0-5
+ * bit6-11 for ppe0-5
+ * bit12-17 for roce0-5
+ * bit18-19 for com/dfx
+ * @enable: false - request reset , true - drop reset
+ */
+void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable)
+{
+	u32 reg_addr;
+
+	if (!enable)
+		reg_addr = DSAF_SUB_SC_DSAF_RESET_REQ_REG;
+	else
+		reg_addr = DSAF_SUB_SC_DSAF_RESET_DREQ_REG;
+
+	dsaf_write_sub(dsaf_dev, reg_addr, msk);
+}
+
+void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable)
+{
+	if (!enable) {
+		dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_RESET_REQ_REG, 1);
+	} else {
+		dsaf_write_sub(dsaf_dev,
+			       DSAF_SUB_SC_ROCEE_CLK_DIS_REG, 1);
+		dsaf_write_sub(dsaf_dev,
+			       DSAF_SUB_SC_ROCEE_RESET_DREQ_REG, 1);
+		msleep(20);
+		dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_CLK_EN_REG, 1);
+	}
+}
+
 static void
 hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
 				    u32 port, bool dereset)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index ff8b6a4..6ea8722 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -328,9 +328,10 @@
 static void hns_ppe_uninit_hw(struct hns_ppe_cb *ppe_cb)
 {
 	u32 port;
-	struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev;
 
 	if (ppe_cb->ppe_common_cb) {
+		struct dsaf_device *dsaf_dev = ppe_cb->ppe_common_cb->dsaf_dev;
+
 		port = ppe_cb->index;
 		dsaf_dev->misc_op->ppe_srst(dsaf_dev, port, 0);
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 235f744..13c16ab 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -77,6 +77,12 @@
 #define DSAF_SUB_SC_PPE_RESET_DREQ_REG			0xA4C
 #define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG		0xA88
 #define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG		0xA8C
+#define DSAF_SUB_SC_DSAF_RESET_REQ_REG			0xAA8
+#define DSAF_SUB_SC_ROCEE_RESET_REQ_REG			0xA50
+#define DSAF_SUB_SC_DSAF_RESET_DREQ_REG			0xAAC
+#define DSAF_SUB_SC_ROCEE_CLK_DIS_REG			0x32C
+#define DSAF_SUB_SC_ROCEE_RESET_DREQ_REG		0xA54
+#define DSAF_SUB_SC_ROCEE_CLK_EN_REG			0x328
 #define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG		0x2060
 #define DSAF_SUB_SC_TCAM_MBIST_EN_REG			0x2300
 #define DSAF_SUB_SC_DSAF_CLK_ST_REG			0x5300
@@ -133,6 +139,8 @@
 #define DSAF_ROCEE_INT_STS_0_REG	0x200
 #define DSAFV2_SERDES_LBK_0_REG         0x220
 #define DSAF_PAUSE_CFG_REG		0x240
+#define DSAF_ROCE_PORT_MAP_REG		0x2A0
+#define DSAF_ROCE_SL_MAP_REG		0x2A4
 #define DSAF_PPE_QID_CFG_0_REG		0x300
 #define DSAF_SW_PORT_TYPE_0_REG		0x320
 #define DSAF_STP_PORT_TYPE_0_REG	0x340
@@ -178,6 +186,7 @@
 #define DSAF_SBM_BP_CFG_2_XGE_REG_0_REG		0x200C
 #define DSAF_SBM_BP_CFG_2_PPE_REG_0_REG		0x230C
 #define DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG	0x260C
+#define DSAF_SBM_ROCEE_CFG_REG_REG		0x2380
 #define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG	0x238C
 #define DSAF_SBM_FREE_CNT_0_0_REG		0x2010
 #define DSAF_SBM_FREE_CNT_1_0_REG		0x2014
@@ -796,6 +805,9 @@
 #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S 9
 #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9)
 
+#define DSAF_CHNS_MASK			0x3f000
+#define DSAF_SBM_ROCEE_CFG_CRD_EN_B	2
+#define SRST_TIME_INTERVAL		20
 #define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_S 0
 #define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_M (((1ULL << 8) - 1) << 0)
 #define DSAFV2_SBM_CFG2_ROCEE_RESET_BUF_NUM_S 8
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index d7e1f8c..059aaed 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -994,10 +994,10 @@
 	struct hnae_handle *h = priv->ae_handle;
 	int state = 1;
 
-	if (priv->phy) {
+	if (ndev->phydev) {
 		h->dev->ops->adjust_link(h, ndev->phydev->speed,
 					 ndev->phydev->duplex);
-		state = priv->phy->link;
+		state = ndev->phydev->link;
 	}
 	state = state && h->dev->ops->get_status(h);
 
@@ -1022,7 +1022,6 @@
  */
 int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h)
 {
-	struct hns_nic_priv *priv = netdev_priv(ndev);
 	struct phy_device *phy_dev = h->phy_dev;
 	int ret;
 
@@ -1046,8 +1045,6 @@
 	if (h->phy_if == PHY_INTERFACE_MODE_XGMII)
 		phy_dev->autoneg = false;
 
-	priv->phy = phy_dev;
-
 	return 0;
 }
 
@@ -1224,8 +1221,8 @@
 	if (ret)
 		goto out_start_err;
 
-	if (priv->phy)
-		phy_start(priv->phy);
+	if (ndev->phydev)
+		phy_start(ndev->phydev);
 
 	clear_bit(NIC_STATE_DOWN, &priv->state);
 	(void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ);
@@ -1259,8 +1256,8 @@
 	netif_tx_disable(ndev);
 	priv->link = 0;
 
-	if (priv->phy)
-		phy_stop(priv->phy);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
 
 	ops = priv->ae_handle->dev->ops;
 
@@ -1359,8 +1356,7 @@
 static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr,
 			    int cmd)
 {
-	struct hns_nic_priv *priv = netdev_priv(netdev);
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = netdev->phydev;
 
 	if (!netif_running(netdev))
 		return -EINVAL;
@@ -2017,9 +2013,8 @@
 		hns_nic_uninit_ring_data(priv);
 	priv->ring_data = NULL;
 
-	if (priv->phy)
-		phy_disconnect(priv->phy);
-	priv->phy = NULL;
+	if (ndev->phydev)
+		phy_disconnect(ndev->phydev);
 
 	if (!IS_ERR_OR_NULL(priv->ae_handle))
 		hnae_put_handle(priv->ae_handle);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
index 44bb301..5b412de 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h
@@ -59,7 +59,6 @@
 	u32 port_id;
 	int phy_mode;
 	int phy_led_val;
-	struct phy_device *phy;
 	struct net_device *netdev;
 	struct device *dev;
 	struct hnae_handle *ae_handle;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index ab33487..47e59bb 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -48,9 +48,9 @@
 
 	h = priv->ae_handle;
 
-	if (priv->phy) {
-		if (!genphy_read_status(priv->phy))
-			link_stat = priv->phy->link;
+	if (net_dev->phydev) {
+		if (!genphy_read_status(net_dev->phydev))
+			link_stat = net_dev->phydev->link;
 		else
 			link_stat = 0;
 	}
@@ -64,15 +64,14 @@
 }
 
 static void hns_get_mdix_mode(struct net_device *net_dev,
-			      struct ethtool_cmd *cmd)
+			      struct ethtool_link_ksettings *cmd)
 {
 	int mdix_ctrl, mdix, retval, is_resolved;
-	struct hns_nic_priv *priv = netdev_priv(net_dev);
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = net_dev->phydev;
 
 	if (!phy_dev || !phy_dev->mdio.bus) {
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
-		cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
 		return;
 	}
 
@@ -89,35 +88,35 @@
 
 	switch (mdix_ctrl) {
 	case 0x0:
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI;
 		break;
 	case 0x1:
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X;
 		break;
 	case 0x3:
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO;
 		break;
 	default:
-		cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
+		cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID;
 		break;
 	}
 
 	if (!is_resolved)
-		cmd->eth_tp_mdix = ETH_TP_MDI_INVALID;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID;
 	else if (mdix)
-		cmd->eth_tp_mdix = ETH_TP_MDI_X;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
 	else
-		cmd->eth_tp_mdix = ETH_TP_MDI;
+		cmd->base.eth_tp_mdix = ETH_TP_MDI;
 }
 
 /**
- *hns_nic_get_settings - implement ethtool get settings
+ *hns_nic_get_link_ksettings - implement ethtool get link ksettings
  *@net_dev: net_device
- *@cmd: ethtool_cmd
+ *@cmd: ethtool_link_ksettings
  *retuen 0 - success , negative --fail
  */
-static int hns_nic_get_settings(struct net_device *net_dev,
-				struct ethtool_cmd *cmd)
+static int hns_nic_get_link_ksettings(struct net_device *net_dev,
+				      struct ethtool_link_ksettings *cmd)
 {
 	struct hns_nic_priv *priv = netdev_priv(net_dev);
 	struct hnae_handle *h;
@@ -125,6 +124,7 @@
 	int ret;
 	u8 duplex;
 	u16 speed;
+	u32 supported, advertising;
 
 	if (!priv || !priv->ae_handle)
 		return -ESRCH;
@@ -139,38 +139,43 @@
 		return -EINVAL;
 	}
 
-	/* When there is no phy, autoneg is off. */
-	cmd->autoneg = false;
-	ethtool_cmd_speed_set(cmd, speed);
-	cmd->duplex = duplex;
+	ethtool_convert_link_mode_to_legacy_u32(&supported,
+						cmd->link_modes.supported);
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
 
-	if (priv->phy)
-		(void)phy_ethtool_gset(priv->phy, cmd);
+	/* When there is no phy, autoneg is off. */
+	cmd->base.autoneg = false;
+	cmd->base.cmd = speed;
+	cmd->base.duplex = duplex;
+
+	if (net_dev->phydev)
+		(void)phy_ethtool_ksettings_get(net_dev->phydev, cmd);
 
 	link_stat = hns_nic_get_link(net_dev);
 	if (!link_stat) {
-		ethtool_cmd_speed_set(cmd, (u32)SPEED_UNKNOWN);
-		cmd->duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = (u32)SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	if (cmd->autoneg)
-		cmd->advertising |= ADVERTISED_Autoneg;
+	if (cmd->base.autoneg)
+		advertising |= ADVERTISED_Autoneg;
 
-	cmd->supported |= h->if_support;
+	supported |= h->if_support;
 	if (h->phy_if == PHY_INTERFACE_MODE_SGMII) {
-		cmd->supported |= SUPPORTED_TP;
-		cmd->advertising |= ADVERTISED_1000baseT_Full;
+		supported |= SUPPORTED_TP;
+		advertising |= ADVERTISED_1000baseT_Full;
 	} else if (h->phy_if == PHY_INTERFACE_MODE_XGMII) {
-		cmd->supported |= SUPPORTED_FIBRE;
-		cmd->advertising |= ADVERTISED_10000baseKR_Full;
+		supported |= SUPPORTED_FIBRE;
+		advertising |= ADVERTISED_10000baseKR_Full;
 	}
 
 	switch (h->media_type) {
 	case HNAE_MEDIA_TYPE_FIBER:
-		cmd->port = PORT_FIBRE;
+		cmd->base.port = PORT_FIBRE;
 		break;
 	case HNAE_MEDIA_TYPE_COPPER:
-		cmd->port = PORT_TP;
+		cmd->base.port = PORT_TP;
 		break;
 	case HNAE_MEDIA_TYPE_UNKNOWN:
 	default:
@@ -178,23 +183,27 @@
 	}
 
 	if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG))
-		cmd->supported |= SUPPORTED_Pause;
+		supported |= SUPPORTED_Pause;
 
-	cmd->transceiver = XCVR_EXTERNAL;
-	cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
+	cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22;
 	hns_get_mdix_mode(net_dev, cmd);
 
 	return 0;
 }
 
 /**
- *hns_nic_set_settings - implement ethtool set settings
+ *hns_nic_set_link_settings - implement ethtool set link ksettings
  *@net_dev: net_device
- *@cmd: ethtool_cmd
+ *@cmd: ethtool_link_ksettings
  *retuen 0 - success , negative --fail
  */
-static int hns_nic_set_settings(struct net_device *net_dev,
-				struct ethtool_cmd *cmd)
+static int hns_nic_set_link_ksettings(struct net_device *net_dev,
+				      const struct ethtool_link_ksettings *cmd)
 {
 	struct hns_nic_priv *priv = netdev_priv(net_dev);
 	struct hnae_handle *h;
@@ -208,24 +217,25 @@
 		return -ENODEV;
 
 	h = priv->ae_handle;
-	speed = ethtool_cmd_speed(cmd);
+	speed = cmd->base.speed;
 
 	if (h->phy_if == PHY_INTERFACE_MODE_XGMII) {
-		if (cmd->autoneg == AUTONEG_ENABLE || speed != SPEED_10000 ||
-		    cmd->duplex != DUPLEX_FULL)
+		if (cmd->base.autoneg == AUTONEG_ENABLE ||
+		    speed != SPEED_10000 ||
+		    cmd->base.duplex != DUPLEX_FULL)
 			return -EINVAL;
 	} else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) {
-		if (!priv->phy && cmd->autoneg == AUTONEG_ENABLE)
+		if (!net_dev->phydev && cmd->base.autoneg == AUTONEG_ENABLE)
 			return -EINVAL;
 
-		if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF)
+		if (speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF)
 			return -EINVAL;
-		if (priv->phy)
-			return phy_ethtool_sset(priv->phy, cmd);
+		if (net_dev->phydev)
+			return phy_ethtool_ksettings_set(net_dev->phydev, cmd);
 
 		if ((speed != SPEED_10 && speed != SPEED_100 &&
-		     speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF &&
-		     cmd->duplex != DUPLEX_FULL))
+		     speed != SPEED_1000) || (cmd->base.duplex != DUPLEX_HALF &&
+		     cmd->base.duplex != DUPLEX_FULL))
 			return -EINVAL;
 	} else {
 		netdev_err(net_dev, "Not supported!");
@@ -233,7 +243,7 @@
 	}
 
 	if (h->dev->ops->adjust_link) {
-		h->dev->ops->adjust_link(h, (int)speed, cmd->duplex);
+		h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex);
 		return 0;
 	}
 
@@ -305,7 +315,7 @@
 {
 	int ret = 0;
 	struct hns_nic_priv *priv = netdev_priv(ndev);
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = ndev->phydev;
 	struct hnae_handle *h = priv->ae_handle;
 
 	switch (loop) {
@@ -910,7 +920,7 @@
 		memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES],
 		       ETH_GSTRING_LEN);
 		buff += ETH_GSTRING_LEN;
-		if ((priv->phy) && (!priv->phy->is_c45))
+		if ((netdev->phydev) && (!netdev->phydev->is_c45))
 			memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY],
 			       ETH_GSTRING_LEN);
 
@@ -996,7 +1006,7 @@
 		if (priv->ae_handle->phy_if == PHY_INTERFACE_MODE_XGMII)
 			cnt--;
 
-		if ((!priv->phy) || (priv->phy->is_c45))
+		if ((!netdev->phydev) || (netdev->phydev->is_c45))
 			cnt--;
 
 		return cnt;
@@ -1015,8 +1025,7 @@
 int hns_phy_led_set(struct net_device *netdev, int value)
 {
 	int retval;
-	struct hns_nic_priv *priv = netdev_priv(netdev);
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = netdev->phydev;
 
 	retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED);
 	retval |= phy_write(phy_dev, HNS_LED_FC_REG, value);
@@ -1039,7 +1048,7 @@
 {
 	struct hns_nic_priv *priv = netdev_priv(netdev);
 	struct hnae_handle *h = priv->ae_handle;
-	struct phy_device *phy_dev = priv->phy;
+	struct phy_device *phy_dev = netdev->phydev;
 	int ret;
 
 	if (phy_dev)
@@ -1159,8 +1168,7 @@
 static int hns_nic_nway_reset(struct net_device *netdev)
 {
 	int ret = 0;
-	struct hns_nic_priv *priv = netdev_priv(netdev);
-	struct phy_device *phy = priv->phy;
+	struct phy_device *phy = netdev->phydev;
 
 	if (netif_running(netdev)) {
 		if (phy)
@@ -1264,11 +1272,9 @@
 	return 0;
 }
 
-static struct ethtool_ops hns_ethtool_ops = {
+static const struct ethtool_ops hns_ethtool_ops = {
 	.get_drvinfo = hns_nic_get_drvinfo,
 	.get_link  = hns_nic_get_link,
-	.get_settings  = hns_nic_get_settings,
-	.set_settings  = hns_nic_set_settings,
 	.get_ringparam = hns_get_ringparam,
 	.get_pauseparam = hns_get_pauseparam,
 	.set_pauseparam = hns_set_pauseparam,
@@ -1288,6 +1294,8 @@
 	.get_rxfh = hns_get_rss,
 	.set_rxfh = hns_set_rss,
 	.get_rxnfc = hns_get_rxnfc,
+	.get_link_ksettings  = hns_nic_get_link_ksettings,
+	.set_link_ksettings  = hns_nic_set_link_ksettings,
 };
 
 void hns_ethtool_set_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c
index befb4ac..ce235b7 100644
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c
@@ -89,10 +89,10 @@
 #define DEB(x,y)	if (i596_debug & (x)) y
 
 
-#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_MVME16x_NET_MODULE)
+#if IS_ENABLED(CONFIG_MVME16x_NET)
 #define ENABLE_MVME16x_NET
 #endif
-#if defined(CONFIG_BVME6000_NET) || defined(CONFIG_BVME6000_NET_MODULE)
+#if IS_ENABLED(CONFIG_BVME6000_NET)
 #define ENABLE_BVME6000_NET
 #endif
 
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 4c9771d..8f13919 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -977,7 +977,37 @@
 		dev->mcast_pending = 1;
 		return;
 	}
+
+	mutex_lock(&dev->link_lock);
 	__emac_set_multicast_list(dev);
+	mutex_unlock(&dev->link_lock);
+}
+
+static int emac_set_mac_address(struct net_device *ndev, void *sa)
+{
+	struct emac_instance *dev = netdev_priv(ndev);
+	struct sockaddr *addr = sa;
+	struct emac_regs __iomem *p = dev->emacp;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+	       return -EADDRNOTAVAIL;
+
+	mutex_lock(&dev->link_lock);
+
+	memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
+
+	emac_rx_disable(dev);
+	emac_tx_disable(dev);
+	out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]);
+	out_be32(&p->ialr, (ndev->dev_addr[2] << 24) |
+		(ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) |
+		ndev->dev_addr[5]);
+	emac_tx_enable(dev);
+	emac_rx_enable(dev);
+
+	mutex_unlock(&dev->link_lock);
+
+	return 0;
 }
 
 static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
@@ -2686,7 +2716,7 @@
 	.ndo_do_ioctl		= emac_ioctl,
 	.ndo_tx_timeout		= emac_tx_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_mac_address	= emac_set_mac_address,
 	.ndo_start_xmit		= emac_start_xmit,
 	.ndo_change_mtu		= eth_change_mtu,
 };
@@ -2699,7 +2729,7 @@
 	.ndo_do_ioctl		= emac_ioctl,
 	.ndo_tx_timeout		= emac_tx_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_set_mac_address	= emac_set_mac_address,
 	.ndo_start_xmit		= emac_start_xmit_sg,
 	.ndo_change_mtu		= emac_change_mtu,
 };
@@ -2750,7 +2780,7 @@
 	/* Get interrupts. EMAC irq is mandatory, WOL irq is optional */
 	dev->emac_irq = irq_of_parse_and_map(np, 0);
 	dev->wol_irq = irq_of_parse_and_map(np, 1);
-	if (dev->emac_irq == NO_IRQ) {
+	if (!dev->emac_irq) {
 		printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name);
 		goto err_free;
 	}
@@ -2913,9 +2943,9 @@
  err_reg_unmap:
 	iounmap(dev->emacp);
  err_irq_unmap:
-	if (dev->wol_irq != NO_IRQ)
+	if (dev->wol_irq)
 		irq_dispose_mapping(dev->wol_irq);
-	if (dev->emac_irq != NO_IRQ)
+	if (dev->emac_irq)
 		irq_dispose_mapping(dev->emac_irq);
  err_free:
 	free_netdev(ndev);
@@ -2957,9 +2987,9 @@
 	emac_dbg_unregister(dev);
 	iounmap(dev->emacp);
 
-	if (dev->wol_irq != NO_IRQ)
+	if (dev->wol_irq)
 		irq_dispose_mapping(dev->wol_irq);
-	if (dev->emac_irq != NO_IRQ)
+	if (dev->emac_irq)
 		irq_dispose_mapping(dev->emac_irq);
 
 	free_netdev(dev->ndev);
diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c
index fdb5cdb..aaf6fec 100644
--- a/drivers/net/ethernet/ibm/emac/mal.c
+++ b/drivers/net/ethernet/ibm/emac/mal.c
@@ -597,9 +597,8 @@
 		mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4);
 	}
 
-	if (mal->txeob_irq == NO_IRQ || mal->rxeob_irq == NO_IRQ ||
-	    mal->serr_irq == NO_IRQ || mal->txde_irq == NO_IRQ ||
-	    mal->rxde_irq == NO_IRQ) {
+	if (!mal->txeob_irq || !mal->rxeob_irq || !mal->serr_irq ||
+	    !mal->txde_irq  || !mal->rxde_irq) {
 		printk(KERN_ERR
 		       "mal%d: failed to map interrupts !\n", index);
 		err = -ENODEV;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 88f3c85..bfe17d9 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -203,7 +203,8 @@
 	struct device *dev = &adapter->vdev->dev;
 
 	dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
-	send_request_unmap(adapter, ltb->map_id);
+	if (!adapter->failover)
+		send_request_unmap(adapter, ltb->map_id);
 }
 
 static int alloc_rx_pool(struct ibmvnic_adapter *adapter,
@@ -522,7 +523,8 @@
 	for (i = 0; i < adapter->req_rx_queues; i++)
 		napi_disable(&adapter->napi[i]);
 
-	netif_tx_stop_all_queues(netdev);
+	if (!adapter->failover)
+		netif_tx_stop_all_queues(netdev);
 
 	if (adapter->bounce_buffer) {
 		if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) {
@@ -1422,7 +1424,7 @@
 		scrq = adapter->tx_scrq[i];
 		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
 
-		if (scrq->irq == NO_IRQ) {
+		if (!scrq->irq) {
 			rc = -EINVAL;
 			dev_err(dev, "Error mapping irq\n");
 			goto req_tx_irq_failed;
@@ -1442,7 +1444,7 @@
 	for (i = 0; i < adapter->req_rx_queues; i++) {
 		scrq = adapter->rx_scrq[i];
 		scrq->irq = irq_create_mapping(NULL, scrq->hw_irq);
-		if (scrq->irq == NO_IRQ) {
+		if (!scrq->irq) {
 			rc = -EINVAL;
 			dev_err(dev, "Error mapping irq\n");
 			goto req_rx_irq_failed;
@@ -2777,12 +2779,6 @@
 	}
 }
 
-static int ibmvnic_fw_comp_open(struct inode *inode, struct file *file)
-{
-	file->private_data = inode->i_private;
-	return 0;
-}
-
 static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len,
 			  loff_t *ppos)
 {
@@ -2834,7 +2830,7 @@
 
 static const struct file_operations trace_ops = {
 	.owner		= THIS_MODULE,
-	.open		= ibmvnic_fw_comp_open,
+	.open		= simple_open,
 	.read		= trace_read,
 };
 
@@ -2884,7 +2880,7 @@
 
 static const struct file_operations paused_ops = {
 	.owner		= THIS_MODULE,
-	.open		= ibmvnic_fw_comp_open,
+	.open		= simple_open,
 	.read		= paused_read,
 	.write		= paused_write,
 };
@@ -2932,7 +2928,7 @@
 
 static const struct file_operations tracing_ops = {
 	.owner		= THIS_MODULE,
-	.open		= ibmvnic_fw_comp_open,
+	.open		= simple_open,
 	.read		= tracing_read,
 	.write		= tracing_write,
 };
@@ -2985,7 +2981,7 @@
 
 static const struct file_operations error_level_ops = {
 	.owner		= THIS_MODULE,
-	.open		= ibmvnic_fw_comp_open,
+	.open		= simple_open,
 	.read		= error_level_read,
 	.write		= error_level_write,
 };
@@ -3036,7 +3032,7 @@
 
 static const struct file_operations trace_level_ops = {
 	.owner		= THIS_MODULE,
-	.open		= ibmvnic_fw_comp_open,
+	.open		= simple_open,
 	.read		= trace_level_read,
 	.write		= trace_level_write,
 };
@@ -3089,7 +3085,7 @@
 
 static const struct file_operations trace_size_ops = {
 	.owner		= THIS_MODULE,
-	.open		= ibmvnic_fw_comp_open,
+	.open		= simple_open,
 	.read		= trace_buff_size_read,
 	.write		= trace_buff_size_write,
 };
@@ -3280,6 +3276,10 @@
 			rc = ibmvnic_send_crq_init(adapter);
 			if (rc)
 				dev_err(dev, "Error sending init rc=%ld\n", rc);
+		} else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) {
+			dev_info(dev, "Backing device failover detected\n");
+			netif_carrier_off(netdev);
+			adapter->failover = true;
 		} else {
 			/* The adapter lost the connection */
 			dev_err(dev, "Virtual Adapter failed (rc=%d)\n",
@@ -3615,8 +3615,18 @@
 	struct device *dev = &adapter->vdev->dev;
 	struct net_device *netdev = adapter->netdev;
 	unsigned long timeout = msecs_to_jiffies(30000);
+	bool restart = false;
 	int rc;
 
+	if (adapter->failover) {
+		release_sub_crqs(adapter);
+		if (netif_running(netdev)) {
+			netif_tx_disable(netdev);
+			ibmvnic_close(netdev);
+			restart = true;
+		}
+	}
+
 	send_version_xchg(adapter);
 	reinit_completion(&adapter->init_done);
 	if (!wait_for_completion_timeout(&adapter->init_done, timeout)) {
@@ -3645,6 +3655,17 @@
 
 	netdev->real_num_tx_queues = adapter->req_tx_queues;
 
+	if (adapter->failover) {
+		adapter->failover = false;
+		if (restart) {
+			rc = ibmvnic_open(netdev);
+			if (rc)
+				goto restart_failed;
+		}
+		netif_carrier_on(netdev);
+		return;
+	}
+
 	rc = register_netdev(netdev);
 	if (rc) {
 		dev_err(dev,
@@ -3655,6 +3676,8 @@
 
 	return;
 
+restart_failed:
+	dev_err(dev, "Failed to restart ibmvnic, rc=%d\n", rc);
 register_failed:
 	release_sub_crqs(adapter);
 task_failed:
@@ -3692,6 +3715,7 @@
 	dev_set_drvdata(&dev->dev, netdev);
 	adapter->vdev = dev;
 	adapter->netdev = netdev;
+	adapter->failover = false;
 
 	ether_addr_copy(adapter->mac_addr, mac_addr_p);
 	ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
@@ -3721,6 +3745,7 @@
 	if (dma_mapping_error(&dev->dev, adapter->stats_token)) {
 		if (!firmware_has_feature(FW_FEATURE_CMO))
 			dev_err(&dev->dev, "Couldn't map stats buffer\n");
+		rc = -ENOMEM;
 		goto free_crq;
 	}
 
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index e82898f..bfc84c7 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -830,6 +830,7 @@
 	IBMVNIC_CRQ_INIT                 = 0x01,
 	IBMVNIC_CRQ_INIT_COMPLETE        = 0x02,
 	IBMVNIC_PARTITION_MIGRATED       = 0x06,
+	IBMVNIC_DEVICE_FAILOVER          = 0x08,
 };
 
 struct ibmvnic_crq_queue {
@@ -1047,4 +1048,5 @@
 	u8 map_id;
 
 	struct work_struct vnic_crq_init;
+	bool failover;
 };
diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c
index 7fd4d54..6b03c85 100644
--- a/drivers/net/ethernet/intel/e1000e/82571.c
+++ b/drivers/net/ethernet/intel/e1000e/82571.c
@@ -2032,7 +2032,8 @@
 				  | FLAG2_DISABLE_ASPM_L0S
 				  | FLAG2_DISABLE_ASPM_L1
 				  | FLAG2_NO_DISABLE_RX
-				  | FLAG2_DMA_BURST,
+				  | FLAG2_DMA_BURST
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
 	.pba			= 32,
 	.max_hw_frame_size	= DEFAULT_JUMBO,
 	.get_variants		= e1000_get_variants_82571,
@@ -2053,7 +2054,8 @@
 				  | FLAG_HAS_CTRLEXT_ON_LOAD,
 	.flags2			= FLAG2_DISABLE_ASPM_L0S
 				  | FLAG2_DISABLE_ASPM_L1
-				  | FLAG2_NO_DISABLE_RX,
+				  | FLAG2_NO_DISABLE_RX
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
 	.pba			= 32,
 	.max_hw_frame_size	= DEFAULT_JUMBO,
 	.get_variants		= e1000_get_variants_82571,
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index ef96cd1..879cca4 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -452,6 +452,7 @@
 #define FLAG2_PCIM2PCI_ARBITER_WA         BIT(11)
 #define FLAG2_DFLT_CRC_STRIPPING          BIT(12)
 #define FLAG2_CHECK_RX_HWTSTAMP           BIT(13)
+#define FLAG2_CHECK_SYSTIM_OVERFLOW       BIT(14)
 
 #define E1000_RX_DESC_PS(R, i)	    \
 	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 3e11322..f3aaca7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -5885,7 +5885,8 @@
 				  | FLAG_HAS_JUMBO_FRAMES
 				  | FLAG_APME_IN_WUC,
 	.flags2			= FLAG2_HAS_PHY_STATS
-				  | FLAG2_HAS_EEE,
+				  | FLAG2_HAS_EEE
+				  | FLAG2_CHECK_SYSTIM_OVERFLOW,
 	.pba			= 26,
 	.max_hw_frame_size	= 9022,
 	.get_variants		= e1000_get_variants_ich8lan,
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 02f4439..7017281 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4303,6 +4303,42 @@
 }
 
 /**
+ * e1000e_sanitize_systim - sanitize raw cycle counter reads
+ * @hw: pointer to the HW structure
+ * @systim: cycle_t value read, sanitized and returned
+ *
+ * Errata for 82574/82583 possible bad bits read from SYSTIMH/L:
+ * check to see that the time is incrementing at a reasonable
+ * rate and is a multiple of incvalue.
+ **/
+static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim)
+{
+	u64 time_delta, rem, temp;
+	cycle_t systim_next;
+	u32 incvalue;
+	int i;
+
+	incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
+	for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
+		/* latch SYSTIMH on read of SYSTIML */
+		systim_next = (cycle_t)er32(SYSTIML);
+		systim_next |= (cycle_t)er32(SYSTIMH) << 32;
+
+		time_delta = systim_next - systim;
+		temp = time_delta;
+		/* VMWare users have seen incvalue of zero, don't div / 0 */
+		rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
+
+		systim = systim_next;
+
+		if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0))
+			break;
+	}
+
+	return systim;
+}
+
+/**
  * e1000e_cyclecounter_read - read raw cycle counter (used by time counter)
  * @cc: cyclecounter structure
  **/
@@ -4312,7 +4348,7 @@
 						     cc);
 	struct e1000_hw *hw = &adapter->hw;
 	u32 systimel, systimeh;
-	cycle_t systim, systim_next;
+	cycle_t systim;
 	/* SYSTIMH latching upon SYSTIML read does not work well.
 	 * This means that if SYSTIML overflows after we read it but before
 	 * we read SYSTIMH, the value of SYSTIMH has been incremented and we
@@ -4335,33 +4371,9 @@
 	systim = (cycle_t)systimel;
 	systim |= (cycle_t)systimeh << 32;
 
-	if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) {
-		u64 time_delta, rem, temp;
-		u32 incvalue;
-		int i;
+	if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW)
+		systim = e1000e_sanitize_systim(hw, systim);
 
-		/* errata for 82574/82583 possible bad bits read from SYSTIMH/L
-		 * check to see that the time is incrementing at a reasonable
-		 * rate and is a multiple of incvalue
-		 */
-		incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK;
-		for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) {
-			/* latch SYSTIMH on read of SYSTIML */
-			systim_next = (cycle_t)er32(SYSTIML);
-			systim_next |= (cycle_t)er32(SYSTIMH) << 32;
-
-			time_delta = systim_next - systim;
-			temp = time_delta;
-			/* VMWare users have seen incvalue of zero, don't div / 0 */
-			rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0);
-
-			systim = systim_next;
-
-			if ((time_delta < E1000_82574_SYSTIM_EPSILON) &&
-			    (rem == 0))
-				break;
-		}
-	}
 	return systim;
 }
 
diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c
index 2e1b17a..ad03763 100644
--- a/drivers/net/ethernet/intel/e1000e/ptp.c
+++ b/drivers/net/ethernet/intel/e1000e/ptp.c
@@ -334,7 +334,7 @@
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
 		e_err("ptp_clock_register failed\n");
-	} else {
+	} else if (adapter->ptp_clock) {
 		e_info("registered PHC clock\n");
 	}
 }
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index c4cf08d..4d19e46 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -240,9 +240,7 @@
 	struct fm10k_vf_info	vf_info[0];
 };
 
-#define fm10k_vxlan_port_for_each(vp, intfc) \
-	list_for_each_entry(vp, &(intfc)->vxlan_port, list)
-struct fm10k_vxlan_port {
+struct fm10k_udp_port {
 	struct list_head	list;
 	sa_family_t		sa_family;
 	__be16			port;
@@ -335,8 +333,9 @@
 	u32 reta[FM10K_RETA_SIZE];
 	u32 rssrk[FM10K_RSSRK_SIZE];
 
-	/* VXLAN port tracking information */
+	/* UDP encapsulation port tracking information */
 	struct list_head vxlan_port;
+	struct list_head geneve_port;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *dbg_intfc;
@@ -458,7 +457,7 @@
 netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb,
 				  struct fm10k_ring *tx_ring);
 void fm10k_tx_timeout_reset(struct fm10k_intfc *interface);
-u64 fm10k_get_tx_pending(struct fm10k_ring *ring);
+u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw);
 bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring);
 void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count);
 
@@ -496,7 +495,6 @@
 
 /* Ethtool */
 void fm10k_set_ethtool_ops(struct net_device *dev);
-u32 fm10k_get_reta_size(struct net_device *netdev);
 void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir);
 
 /* IOV */
@@ -509,7 +507,7 @@
 s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
 int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
-			  int vf_idx, u16 vid, u8 qos);
+			  int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
 int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
 			int unused);
 int fm10k_ndo_get_vf_config(struct net_device *netdev,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index d6baaea..dd95ac4 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -207,6 +207,9 @@
 	/* clear tx_ready to prevent any false hits for reset */
 	hw->mac.tx_ready = false;
 
+	if (FM10K_REMOVED(hw->hw_addr))
+		return 0;
+
 	/* clear the enable bit for all rings */
 	for (i = 0; i < q_cnt; i++) {
 		reg = fm10k_read_reg(hw, FM10K_TXDCTL(i));
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
index 50f71e9..d51f9c7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h
@@ -34,7 +34,7 @@
 /* write operations, indexed using DWORDS */
 #define fm10k_write_reg(hw, reg, val) \
 do { \
-	u32 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \
+	u32 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
 	if (!FM10K_REMOVED(hw_addr)) \
 		writel((val), &hw_addr[(reg)]); \
 } while (0)
@@ -42,7 +42,7 @@
 /* Switch register write operations, index using DWORDS */
 #define fm10k_write_sw_reg(hw, reg, val) \
 do { \
-	u32 __iomem *sw_addr = ACCESS_ONCE((hw)->sw_addr); \
+	u32 __iomem *sw_addr = READ_ONCE((hw)->sw_addr); \
 	if (!FM10K_REMOVED(sw_addr)) \
 		writel((val), &sw_addr[(reg)]); \
 } while (0)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index c04cbe9..5241e08 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -966,7 +966,7 @@
 	return 0;
 }
 
-u32 fm10k_get_reta_size(struct net_device __always_unused *netdev)
+static u32 fm10k_get_reta_size(struct net_device __always_unused *netdev)
 {
 	return FM10K_RETA_SIZE * FM10K_RETA_ENTRIES_PER_REG;
 }
@@ -1182,6 +1182,7 @@
 	.set_rxfh		= fm10k_set_rssh,
 	.get_channels		= fm10k_get_channels,
 	.set_channels		= fm10k_set_channels,
+	.get_ts_info		= ethtool_op_get_ts_info,
 };
 
 void fm10k_set_ethtool_ops(struct net_device *dev)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 47f0743..5f4dac0 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -51,7 +51,7 @@
 	int i;
 
 	/* if there is no iov_data then there is no mailbox to process */
-	if (!ACCESS_ONCE(interface->iov_data))
+	if (!READ_ONCE(interface->iov_data))
 		return 0;
 
 	rcu_read_lock();
@@ -99,7 +99,7 @@
 	int i;
 
 	/* if there is no iov_data then there is no mailbox to process */
-	if (!ACCESS_ONCE(interface->iov_data))
+	if (!READ_ONCE(interface->iov_data))
 		return 0;
 
 	rcu_read_lock();
@@ -445,7 +445,7 @@
 }
 
 int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
-			  u8 qos)
+			  u8 qos, __be16 vlan_proto)
 {
 	struct fm10k_intfc *interface = netdev_priv(netdev);
 	struct fm10k_iov_data *iov_data = interface->iov_data;
@@ -460,6 +460,10 @@
 	if (qos || (vid > (VLAN_VID_MASK - 1)))
 		return -EINVAL;
 
+	/* VF VLAN Protocol part to default is unsupported */
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	vf_info = &iov_data->vf_info[vf_idx];
 
 	/* exit if there is nothing to do */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index e9767b6..5de9378 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -56,7 +56,8 @@
 	pr_info("%s\n", fm10k_copyright);
 
 	/* create driver workqueue */
-	fm10k_workqueue = alloc_workqueue("fm10k", WQ_MEM_RECLAIM, 0);
+	fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0,
+					  fm10k_driver_name);
 
 	fm10k_dbg_init();
 
@@ -651,11 +652,11 @@
 static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb)
 {
 	struct fm10k_intfc *interface = netdev_priv(skb->dev);
-	struct fm10k_vxlan_port *vxlan_port;
+	struct fm10k_udp_port *vxlan_port;
 
 	/* we can only offload a vxlan if we recognize it as such */
 	vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
-					      struct fm10k_vxlan_port, list);
+					      struct fm10k_udp_port, list);
 
 	if (!vxlan_port)
 		return NULL;
@@ -1128,13 +1129,24 @@
 	return ring->stats.packets;
 }
 
-u64 fm10k_get_tx_pending(struct fm10k_ring *ring)
+/**
+ * fm10k_get_tx_pending - how many Tx descriptors not processed
+ * @ring: the ring structure
+ * @in_sw: is tx_pending being checked in SW or in HW?
+ */
+u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw)
 {
 	struct fm10k_intfc *interface = ring->q_vector->interface;
 	struct fm10k_hw *hw = &interface->hw;
+	u32 head, tail;
 
-	u32 head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx));
-	u32 tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx));
+	if (likely(in_sw)) {
+		head = ring->next_to_clean;
+		tail = ring->next_to_use;
+	} else {
+		head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx));
+		tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx));
+	}
 
 	return ((head <= tail) ? tail : tail + ring->count) - head;
 }
@@ -1143,7 +1155,7 @@
 {
 	u32 tx_done = fm10k_get_tx_completed(tx_ring);
 	u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
-	u32 tx_pending = fm10k_get_tx_pending(tx_ring);
+	u32 tx_pending = fm10k_get_tx_pending(tx_ring, true);
 
 	clear_check_for_tx_hang(tx_ring);
 
@@ -1397,7 +1409,7 @@
 	 * that the calculation will never get below a 1. The bit shift
 	 * accounts for changes in the ITR due to PCIe link speed.
 	 */
-	itr_round = ACCESS_ONCE(ring_container->itr_scale) + 8;
+	itr_round = READ_ONCE(ring_container->itr_scale) + 8;
 	avg_wire_size += BIT(itr_round) - 1;
 	avg_wire_size >>= itr_round;
 
@@ -1473,7 +1485,7 @@
 	/* re-enable the q_vector */
 	fm10k_qv_enable(q_vector);
 
-	return 0;
+	return min(work_done, budget - 1);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 20a5bbe3..0562938 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -384,129 +384,171 @@
 }
 
 /**
- * fm10k_del_vxlan_port_all
+ * fm10k_free_udp_port_info
  * @interface: board private structure
  *
- * This function frees the entire vxlan_port list
+ * This function frees both geneve_port and vxlan_port structures
  **/
-static void fm10k_del_vxlan_port_all(struct fm10k_intfc *interface)
+static void fm10k_free_udp_port_info(struct fm10k_intfc *interface)
 {
-	struct fm10k_vxlan_port *vxlan_port;
+	struct fm10k_udp_port *port;
 
-	/* flush all entries from list */
-	vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
-					      struct fm10k_vxlan_port, list);
-	while (vxlan_port) {
-		list_del(&vxlan_port->list);
-		kfree(vxlan_port);
-		vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
-						      struct fm10k_vxlan_port,
-						      list);
+	/* flush all entries from vxlan list */
+	port = list_first_entry_or_null(&interface->vxlan_port,
+					struct fm10k_udp_port, list);
+	while (port) {
+		list_del(&port->list);
+		kfree(port);
+		port = list_first_entry_or_null(&interface->vxlan_port,
+						struct fm10k_udp_port,
+						list);
+	}
+
+	/* flush all entries from geneve list */
+	port = list_first_entry_or_null(&interface->geneve_port,
+					struct fm10k_udp_port, list);
+	while (port) {
+		list_del(&port->list);
+		kfree(port);
+		port = list_first_entry_or_null(&interface->vxlan_port,
+						struct fm10k_udp_port,
+						list);
 	}
 }
 
 /**
- * fm10k_restore_vxlan_port
+ * fm10k_restore_udp_port_info
  * @interface: board private structure
  *
- * This function restores the value in the tunnel_cfg register after reset
+ * This function restores the value in the tunnel_cfg register(s) after reset
  **/
-static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface)
+static void fm10k_restore_udp_port_info(struct fm10k_intfc *interface)
 {
 	struct fm10k_hw *hw = &interface->hw;
-	struct fm10k_vxlan_port *vxlan_port;
+	struct fm10k_udp_port *port;
 
 	/* only the PF supports configuring tunnels */
 	if (hw->mac.type != fm10k_mac_pf)
 		return;
 
-	vxlan_port = list_first_entry_or_null(&interface->vxlan_port,
-					      struct fm10k_vxlan_port, list);
+	port = list_first_entry_or_null(&interface->vxlan_port,
+					struct fm10k_udp_port, list);
 
 	/* restore tunnel configuration register */
 	fm10k_write_reg(hw, FM10K_TUNNEL_CFG,
-			(vxlan_port ? ntohs(vxlan_port->port) : 0) |
+			(port ? ntohs(port->port) : 0) |
 			(ETH_P_TEB << FM10K_TUNNEL_CFG_NVGRE_SHIFT));
+
+	port = list_first_entry_or_null(&interface->geneve_port,
+					struct fm10k_udp_port, list);
+
+	/* restore Geneve tunnel configuration register */
+	fm10k_write_reg(hw, FM10K_TUNNEL_CFG_GENEVE,
+			(port ? ntohs(port->port) : 0));
+}
+
+static struct fm10k_udp_port *
+fm10k_remove_tunnel_port(struct list_head *ports,
+			 struct udp_tunnel_info *ti)
+{
+	struct fm10k_udp_port *port;
+
+	list_for_each_entry(port, ports, list) {
+		if ((port->port == ti->port) &&
+		    (port->sa_family == ti->sa_family)) {
+			list_del(&port->list);
+			return port;
+		}
+	}
+
+	return NULL;
+}
+
+static void fm10k_insert_tunnel_port(struct list_head *ports,
+				     struct udp_tunnel_info *ti)
+{
+	struct fm10k_udp_port *port;
+
+	/* remove existing port entry from the list so that the newest items
+	 * are always at the tail of the list.
+	 */
+	port = fm10k_remove_tunnel_port(ports, ti);
+	if (!port) {
+		port = kmalloc(sizeof(*port), GFP_ATOMIC);
+		if  (!port)
+			return;
+		port->port = ti->port;
+		port->sa_family = ti->sa_family;
+	}
+
+	list_add_tail(&port->list, ports);
 }
 
 /**
- * fm10k_add_vxlan_port
+ * fm10k_udp_tunnel_add
  * @netdev: network interface device structure
  * @ti: Tunnel endpoint information
  *
- * This function is called when a new VXLAN interface has added a new port
- * number to the range that is currently in use for VXLAN.  The new port
- * number is always added to the tail so that the port number list should
- * match the order in which the ports were allocated.  The head of the list
- * is always used as the VXLAN port number for offloads.
+ * This function is called when a new UDP tunnel port has been added.
+ * Due to hardware restrictions, only one port per type can be offloaded at
+ * once.
  **/
-static void fm10k_add_vxlan_port(struct net_device *dev,
+static void fm10k_udp_tunnel_add(struct net_device *dev,
 				 struct udp_tunnel_info *ti)
 {
 	struct fm10k_intfc *interface = netdev_priv(dev);
-	struct fm10k_vxlan_port *vxlan_port;
 
-	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-		return;
 	/* only the PF supports configuring tunnels */
 	if (interface->hw.mac.type != fm10k_mac_pf)
 		return;
 
-	/* existing ports are pulled out so our new entry is always last */
-	fm10k_vxlan_port_for_each(vxlan_port, interface) {
-		if ((vxlan_port->port == ti->port) &&
-		    (vxlan_port->sa_family == ti->sa_family)) {
-			list_del(&vxlan_port->list);
-			goto insert_tail;
-		}
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		fm10k_insert_tunnel_port(&interface->vxlan_port, ti);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		fm10k_insert_tunnel_port(&interface->geneve_port, ti);
+		break;
+	default:
+		return;
 	}
 
-	/* allocate memory to track ports */
-	vxlan_port = kmalloc(sizeof(*vxlan_port), GFP_ATOMIC);
-	if (!vxlan_port)
-		return;
-	vxlan_port->port = ti->port;
-	vxlan_port->sa_family = ti->sa_family;
-
-insert_tail:
-	/* add new port value to list */
-	list_add_tail(&vxlan_port->list, &interface->vxlan_port);
-
-	fm10k_restore_vxlan_port(interface);
+	fm10k_restore_udp_port_info(interface);
 }
 
 /**
- * fm10k_del_vxlan_port
+ * fm10k_udp_tunnel_del
  * @netdev: network interface device structure
  * @ti: Tunnel endpoint information
  *
- * This function is called when a new VXLAN interface has freed a port
- * number from the range that is currently in use for VXLAN.  The freed
- * port is removed from the list and the new head is used to determine
- * the port number for offloads.
+ * This function is called when a new UDP tunnel port is deleted. The freed
+ * port will be removed from the list, then we reprogram the offloaded port
+ * based on the head of the list.
  **/
-static void fm10k_del_vxlan_port(struct net_device *dev,
+static void fm10k_udp_tunnel_del(struct net_device *dev,
 				 struct udp_tunnel_info *ti)
 {
 	struct fm10k_intfc *interface = netdev_priv(dev);
-	struct fm10k_vxlan_port *vxlan_port;
+	struct fm10k_udp_port *port = NULL;
 
-	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-		return;
 	if (interface->hw.mac.type != fm10k_mac_pf)
 		return;
 
-	/* find the port in the list and free it */
-	fm10k_vxlan_port_for_each(vxlan_port, interface) {
-		if ((vxlan_port->port == ti->port) &&
-		    (vxlan_port->sa_family == ti->sa_family)) {
-			list_del(&vxlan_port->list);
-			kfree(vxlan_port);
-			break;
-		}
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		port = fm10k_remove_tunnel_port(&interface->vxlan_port, ti);
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		port = fm10k_remove_tunnel_port(&interface->geneve_port, ti);
+		break;
+	default:
+		return;
 	}
 
-	fm10k_restore_vxlan_port(interface);
+	/* if we did remove a port we need to free its memory */
+	kfree(port);
+
+	fm10k_restore_udp_port_info(interface);
 }
 
 /**
@@ -555,7 +597,6 @@
 	if (err)
 		goto err_set_queues;
 
-	/* update VXLAN port configuration */
 	udp_tunnel_get_rx_info(netdev);
 
 	fm10k_up(interface);
@@ -591,7 +632,7 @@
 
 	fm10k_qv_free_irq(interface);
 
-	fm10k_del_vxlan_port_all(interface);
+	fm10k_free_udp_port_info(interface);
 
 	fm10k_free_all_tx_resources(interface);
 	fm10k_free_all_rx_resources(interface);
@@ -1055,7 +1096,7 @@
 	interface->xcast_mode = xcast_mode;
 
 	/* Restore tunnel configuration */
-	fm10k_restore_vxlan_port(interface);
+	fm10k_restore_udp_port_info(interface);
 }
 
 void fm10k_reset_rx_state(struct fm10k_intfc *interface)
@@ -1098,7 +1139,7 @@
 	rcu_read_lock();
 
 	for (i = 0; i < interface->num_rx_queues; i++) {
-		ring = ACCESS_ONCE(interface->rx_ring[i]);
+		ring = READ_ONCE(interface->rx_ring[i]);
 
 		if (!ring)
 			continue;
@@ -1114,7 +1155,7 @@
 	}
 
 	for (i = 0; i < interface->num_tx_queues; i++) {
-		ring = ACCESS_ONCE(interface->tx_ring[i]);
+		ring = READ_ONCE(interface->tx_ring[i]);
 
 		if (!ring)
 			continue;
@@ -1299,7 +1340,7 @@
 static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
 {
 	struct fm10k_intfc *interface = netdev_priv(dev);
-	struct fm10k_l2_accel *l2_accel = ACCESS_ONCE(interface->l2_accel);
+	struct fm10k_l2_accel *l2_accel = READ_ONCE(interface->l2_accel);
 	struct fm10k_dglort_cfg dglort = { 0 };
 	struct fm10k_hw *hw = &interface->hw;
 	struct net_device *sdev = priv;
@@ -1375,8 +1416,8 @@
 	.ndo_set_vf_vlan	= fm10k_ndo_set_vf_vlan,
 	.ndo_set_vf_rate	= fm10k_ndo_set_vf_bw,
 	.ndo_get_vf_config	= fm10k_ndo_get_vf_config,
-	.ndo_udp_tunnel_add	= fm10k_add_vxlan_port,
-	.ndo_udp_tunnel_del	= fm10k_del_vxlan_port,
+	.ndo_udp_tunnel_add	= fm10k_udp_tunnel_add,
+	.ndo_udp_tunnel_del	= fm10k_udp_tunnel_del,
 	.ndo_dfwd_add_station	= fm10k_dfwd_add_station,
 	.ndo_dfwd_del_station	= fm10k_dfwd_del_station,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 774a565..b1a2f84 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -62,7 +62,7 @@
 
 u32 fm10k_read_reg(struct fm10k_hw *hw, int reg)
 {
-	u32 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr);
+	u32 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
 	u32 value = 0;
 
 	if (FM10K_REMOVED(hw_addr))
@@ -133,7 +133,7 @@
 	/* check the real address space to see if we've recovered */
 	hw_addr = READ_ONCE(interface->uc_addr);
 	value = readl(hw_addr);
-	if ((~value)) {
+	if (~value) {
 		interface->hw.hw_addr = interface->uc_addr;
 		netif_device_attach(netdev);
 		interface->flags |= FM10K_FLAG_RESET_REQUESTED;
@@ -734,15 +734,15 @@
 	u64 rdba = ring->dma;
 	struct fm10k_hw *hw = &interface->hw;
 	u32 size = ring->count * sizeof(union fm10k_rx_desc);
-	u32 rxqctl = FM10K_RXQCTL_ENABLE | FM10K_RXQCTL_PF;
-	u32 rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
+	u32 rxqctl, rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY;
 	u32 srrctl = FM10K_SRRCTL_BUFFER_CHAINING_EN;
 	u32 rxint = FM10K_INT_MAP_DISABLE;
 	u8 rx_pause = interface->rx_pause;
 	u8 reg_idx = ring->reg_idx;
 
 	/* disable queue to avoid issues while updating state */
-	fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), 0);
+	rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx));
+	rxqctl &= ~FM10K_RXQCTL_ENABLE;
 	fm10k_write_flush(hw);
 
 	/* possible poll here to verify ring resources have been cleaned */
@@ -797,6 +797,8 @@
 	fm10k_write_reg(hw, FM10K_RXINT(reg_idx), rxint);
 
 	/* enable queue */
+	rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx));
+	rxqctl |= FM10K_RXQCTL_ENABLE;
 	fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl);
 
 	/* place buffers on ring for receive data */
@@ -1699,7 +1701,7 @@
 
 		/* start checking at the last ring to have pending Tx */
 		for (; i < interface->num_tx_queues; i++)
-			if (fm10k_get_tx_pending(interface->tx_ring[i]))
+			if (fm10k_get_tx_pending(interface->tx_ring[i], false))
 				break;
 
 		/* if all the queues are drained, we can break now */
@@ -1835,8 +1837,9 @@
 	interface->tx_itr = FM10K_TX_ITR_DEFAULT;
 	interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT;
 
-	/* initialize vxlan_port list */
+	/* initialize udp port lists */
 	INIT_LIST_HEAD(&interface->vxlan_port);
+	INIT_LIST_HEAD(&interface->geneve_port);
 
 	netdev_rss_key_fill(rss_key, sizeof(rss_key));
 	memcpy(interface->rssrk, rss_key, sizeof(rss_key));
@@ -1950,9 +1953,18 @@
 	struct fm10k_intfc *interface;
 	int err;
 
+	if (pdev->error_state != pci_channel_io_normal) {
+		dev_err(&pdev->dev,
+			"PCI device still in an error state. Unable to load...\n");
+		return -EIO;
+	}
+
 	err = pci_enable_device_mem(pdev);
-	if (err)
+	if (err) {
+		dev_err(&pdev->dev,
+			"PCI enable device failed: %d\n", err);
 		return err;
+	}
 
 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48));
 	if (err)
@@ -2275,7 +2287,7 @@
 {
 	pci_ers_result_t result;
 
-	if (pci_enable_device_mem(pdev)) {
+	if (pci_reenable_device(pdev)) {
 		dev_err(&pdev->dev,
 			"Cannot re-enable PCI device after reset.\n");
 		result = PCI_ERS_RESULT_DISCONNECT;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 682299d..23fb319 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -867,10 +867,6 @@
 	vf_q_idx = fm10k_vf_queue_index(hw, vf_idx);
 	qmap_idx = qmap_stride * vf_idx;
 
-	/* MAP Tx queue back to 0 temporarily, and disable it */
-	fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0);
-	fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0);
-
 	/* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is
 	 * used here to indicate to the VF that it will not have privilege to
 	 * write VLAN_TABLE. All policy is enforced on the PF but this allows
@@ -886,9 +882,35 @@
 	fm10k_tlv_attr_put_mac_vlan(msg, FM10K_MAC_VLAN_MSG_DEFAULT_MAC,
 				    vf_info->mac, vf_vid);
 
-	/* load onto outgoing mailbox, ignore any errors on enqueue */
-	if (vf_info->mbx.ops.enqueue_tx)
-		vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg);
+	/* Configure Queue control register with new VLAN ID. The TXQCTL
+	 * register is RO from the VF, so the PF must do this even in the
+	 * case of notifying the VF of a new VID via the mailbox.
+	 */
+	txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) &
+		 FM10K_TXQCTL_VID_MASK;
+	txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
+		  FM10K_TXQCTL_VF | vf_idx;
+
+	for (i = 0; i < queues_per_pool; i++)
+		fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl);
+
+	/* try loading a message onto outgoing mailbox first */
+	if (vf_info->mbx.ops.enqueue_tx) {
+		err = vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg);
+		if (err != FM10K_MBX_ERR_NO_MBX)
+			return err;
+		err = 0;
+	}
+
+	/* If we aren't connected to a mailbox, this is most likely because
+	 * the VF driver is not running. It should thus be safe to re-map
+	 * queues and use the registers to pass the MAC address so that the VF
+	 * driver gets correct information during its initialization.
+	 */
+
+	/* MAP Tx queue back to 0 temporarily, and disable it */
+	fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0);
+	fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0);
 
 	/* verify ring has disabled before modifying base address registers */
 	txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(vf_q_idx));
@@ -927,16 +949,6 @@
 						   FM10K_TDLEN_ITR_SCALE_SHIFT);
 
 err_out:
-	/* configure Queue control register */
-	txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) &
-		 FM10K_TXQCTL_VID_MASK;
-	txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) |
-		  FM10K_TXQCTL_VF | vf_idx;
-
-	/* assign VLAN ID */
-	for (i = 0; i < queues_per_pool; i++)
-		fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl);
-
 	/* restore the queue back to VF ownership */
 	fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), vf_q_idx);
 	return err;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
index f4e75c4..6bb16c1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h
@@ -154,6 +154,7 @@
 #define FM10K_DGLORTDEC_INNERRSS_ENABLE		0x08000000
 #define FM10K_TUNNEL_CFG	0x0040
 #define FM10K_TUNNEL_CFG_NVGRE_SHIFT		16
+#define FM10K_TUNNEL_CFG_GENEVE	0x0041
 #define FM10K_SWPRI_MAP(_n)	((_n) + 0x0050)
 #define FM10K_SWPRI_MAX		16
 #define FM10K_RSSRK(_n, _m)	(((_n) * 0x10) + (_m) + 0x0800)
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 2a88291..2030d7c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -65,76 +65,72 @@
 #include "i40e_dcb.h"
 
 /* Useful i40e defaults */
-#define I40E_MAX_VEB          16
+#define I40E_MAX_VEB			16
 
-#define I40E_MAX_NUM_DESCRIPTORS      4096
-#define I40E_MAX_CSR_SPACE (4 * 1024 * 1024 - 64 * 1024)
-#define I40E_DEFAULT_NUM_DESCRIPTORS  512
-#define I40E_REQ_DESCRIPTOR_MULTIPLE  32
-#define I40E_MIN_NUM_DESCRIPTORS      64
-#define I40E_MIN_MSIX                 2
-#define I40E_DEFAULT_NUM_VMDQ_VSI     8 /* max 256 VSIs */
-#define I40E_MIN_VSI_ALLOC            51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */
+#define I40E_MAX_NUM_DESCRIPTORS	4096
+#define I40E_MAX_CSR_SPACE		(4 * 1024 * 1024 - 64 * 1024)
+#define I40E_DEFAULT_NUM_DESCRIPTORS	512
+#define I40E_REQ_DESCRIPTOR_MULTIPLE	32
+#define I40E_MIN_NUM_DESCRIPTORS	64
+#define I40E_MIN_MSIX			2
+#define I40E_DEFAULT_NUM_VMDQ_VSI	8 /* max 256 VSIs */
+#define I40E_MIN_VSI_ALLOC		83 /* LAN, ATR, FCOE, 64 VF */
 /* max 16 qps */
 #define i40e_default_queues_per_vmdq(pf) \
 		(((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1)
-#define I40E_DEFAULT_QUEUES_PER_VF    4
-#define I40E_DEFAULT_QUEUES_PER_TC    1 /* should be a power of 2 */
+#define I40E_DEFAULT_QUEUES_PER_VF	4
+#define I40E_DEFAULT_QUEUES_PER_TC	1 /* should be a power of 2 */
 #define i40e_pf_get_max_q_per_tc(pf) \
 		(((pf)->flags & I40E_FLAG_128_QP_RSS_CAPABLE) ? 128 : 64)
-#define I40E_FDIR_RING                0
-#define I40E_FDIR_RING_COUNT          32
+#define I40E_FDIR_RING			0
+#define I40E_FDIR_RING_COUNT		32
 #ifdef I40E_FCOE
-#define I40E_DEFAULT_FCOE             8 /* default number of QPs for FCoE */
-#define I40E_MINIMUM_FCOE             1 /* minimum number of QPs for FCoE */
+#define I40E_DEFAULT_FCOE		8 /* default number of QPs for FCoE */
+#define I40E_MINIMUM_FCOE		1 /* minimum number of QPs for FCoE */
 #endif /* I40E_FCOE */
-#define I40E_MAX_AQ_BUF_SIZE          4096
-#define I40E_AQ_LEN                   256
-#define I40E_AQ_WORK_LIMIT            66 /* max number of VFs + a little */
-#define I40E_MAX_USER_PRIORITY        8
-#define I40E_DEFAULT_MSG_ENABLE       4
-#define I40E_QUEUE_WAIT_RETRY_LIMIT   10
-#define I40E_INT_NAME_STR_LEN        (IFNAMSIZ + 16)
+#define I40E_MAX_AQ_BUF_SIZE		4096
+#define I40E_AQ_LEN			256
+#define I40E_AQ_WORK_LIMIT		66 /* max number of VFs + a little */
+#define I40E_MAX_USER_PRIORITY		8
+#define I40E_DEFAULT_MSG_ENABLE		4
+#define I40E_QUEUE_WAIT_RETRY_LIMIT	10
+#define I40E_INT_NAME_STR_LEN		(IFNAMSIZ + 16)
 
 /* Ethtool Private Flags */
-#define	I40E_PRIV_FLAGS_MFP_FLAG		BIT(0)
-#define	I40E_PRIV_FLAGS_LINKPOLL_FLAG		BIT(1)
+#define I40E_PRIV_FLAGS_MFP_FLAG		BIT(0)
+#define I40E_PRIV_FLAGS_LINKPOLL_FLAG		BIT(1)
 #define I40E_PRIV_FLAGS_FD_ATR			BIT(2)
 #define I40E_PRIV_FLAGS_VEB_STATS		BIT(3)
 #define I40E_PRIV_FLAGS_HW_ATR_EVICT		BIT(4)
 #define I40E_PRIV_FLAGS_TRUE_PROMISC_SUPPORT	BIT(5)
 
-#define I40E_NVM_VERSION_LO_SHIFT  0
-#define I40E_NVM_VERSION_LO_MASK   (0xff << I40E_NVM_VERSION_LO_SHIFT)
-#define I40E_NVM_VERSION_HI_SHIFT  12
-#define I40E_NVM_VERSION_HI_MASK   (0xf << I40E_NVM_VERSION_HI_SHIFT)
-#define I40E_OEM_VER_BUILD_MASK    0xffff
-#define I40E_OEM_VER_PATCH_MASK    0xff
-#define I40E_OEM_VER_BUILD_SHIFT   8
-#define I40E_OEM_VER_SHIFT         24
+#define I40E_NVM_VERSION_LO_SHIFT	0
+#define I40E_NVM_VERSION_LO_MASK	(0xff << I40E_NVM_VERSION_LO_SHIFT)
+#define I40E_NVM_VERSION_HI_SHIFT	12
+#define I40E_NVM_VERSION_HI_MASK	(0xf << I40E_NVM_VERSION_HI_SHIFT)
+#define I40E_OEM_VER_BUILD_MASK		0xffff
+#define I40E_OEM_VER_PATCH_MASK		0xff
+#define I40E_OEM_VER_BUILD_SHIFT	8
+#define I40E_OEM_VER_SHIFT		24
 #define I40E_PHY_DEBUG_ALL \
 	(I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW | \
 	I40E_AQ_PHY_DEBUG_DISABLE_ALL_LINK_FW)
 
 /* The values in here are decimal coded as hex as is the case in the NVM map*/
-#define I40E_CURRENT_NVM_VERSION_HI 0x2
-#define I40E_CURRENT_NVM_VERSION_LO 0x40
+#define I40E_CURRENT_NVM_VERSION_HI	0x2
+#define I40E_CURRENT_NVM_VERSION_LO	0x40
 
-/* magic for getting defines into strings */
-#define STRINGIFY(foo)  #foo
-#define XSTRINGIFY(bar) STRINGIFY(bar)
-
-#define I40E_RX_DESC(R, i)			\
+#define I40E_RX_DESC(R, i)	\
 	(&(((union i40e_32byte_rx_desc *)((R)->desc))[i]))
-#define I40E_TX_DESC(R, i)			\
+#define I40E_TX_DESC(R, i)	\
 	(&(((struct i40e_tx_desc *)((R)->desc))[i]))
-#define I40E_TX_CTXTDESC(R, i)			\
+#define I40E_TX_CTXTDESC(R, i)	\
 	(&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
-#define I40E_TX_FDIRDESC(R, i)			\
+#define I40E_TX_FDIRDESC(R, i)	\
 	(&(((struct i40e_filter_program_desc *)((R)->desc))[i]))
 
 /* default to trying for four seconds */
-#define I40E_TRY_LINK_TIMEOUT (4 * HZ)
+#define I40E_TRY_LINK_TIMEOUT	(4 * HZ)
 
 /**
  * i40e_is_mac_710 - Return true if MAC is X710/XL710
@@ -199,9 +195,9 @@
 #define I40E_FDIR_BUFFER_HEAD_ROOM	32
 #define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4)
 
-#define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4)
-#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4)
-#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4)
+#define I40E_HKEY_ARRAY_SIZE	((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4)
+#define I40E_HLUT_ARRAY_SIZE	((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4)
+#define I40E_VF_HLUT_ARRAY_SIZE	((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4)
 
 enum i40e_fd_stat_idx {
 	I40E_FD_STAT_ATR,
@@ -387,8 +383,8 @@
 	struct mutex switch_mutex;
 	u16 lan_vsi;       /* our default LAN VSI */
 	u16 lan_veb;       /* initial relay, if exists */
-#define I40E_NO_VEB   0xffff
-#define I40E_NO_VSI   0xffff
+#define I40E_NO_VEB	0xffff
+#define I40E_NO_VSI	0xffff
 	u16 next_vsi;      /* Next unallocated VSI - 0-based! */
 	struct i40e_vsi **vsi;
 	struct i40e_veb *veb[I40E_MAX_VEB];
@@ -423,8 +419,8 @@
 	 */
 	u16 dcbx_cap;
 
-	u32	fcoe_hmc_filt_num;
-	u32	fcoe_hmc_cntx_num;
+	u32 fcoe_hmc_filt_num;
+	u32 fcoe_hmc_cntx_num;
 	struct i40e_filter_control_settings filter_settings;
 
 	struct ptp_clock *ptp_clock;
@@ -470,10 +466,10 @@
 struct i40e_veb {
 	struct i40e_pf *pf;
 	u16 idx;
-	u16 veb_idx;           /* index of VEB parent */
+	u16 veb_idx;		/* index of VEB parent */
 	u16 seid;
 	u16 uplink_seid;
-	u16 stats_idx;           /* index of VEB parent */
+	u16 stats_idx;		/* index of VEB parent */
 	u8  enabled_tc;
 	u16 bridge_mode;	/* Bridge Mode (VEB/VEPA) */
 	u16 flags;
@@ -534,12 +530,13 @@
 	u32  promisc_threshold;
 
 	u16 work_limit;
-	u16 int_rate_limit;  /* value in usecs */
+	u16 int_rate_limit;	/* value in usecs */
 
-	u16 rss_table_size; /* HW RSS table size */
-	u16 rss_size;       /* Allocated RSS queues */
-	u8  *rss_hkey_user; /* User configured hash keys */
-	u8  *rss_lut_user;  /* User configured lookup table entries */
+	u16 rss_table_size;	/* HW RSS table size */
+	u16 rss_size;		/* Allocated RSS queues */
+	u8  *rss_hkey_user;	/* User configured hash keys */
+	u8  *rss_lut_user;	/* User configured lookup table entries */
+
 
 	u16 max_frame;
 	u16 rx_buf_len;
@@ -550,14 +547,14 @@
 	int base_vector;
 	bool irqs_ready;
 
-	u16 seid;            /* HW index of this VSI (absolute index) */
-	u16 id;              /* VSI number */
+	u16 seid;		/* HW index of this VSI (absolute index) */
+	u16 id;			/* VSI number */
 	u16 uplink_seid;
 
-	u16 base_queue;      /* vsi's first queue in hw array */
-	u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */
-	u16 req_queue_pairs; /* User requested queue pairs */
-	u16 num_queue_pairs; /* Used tx and rx pairs */
+	u16 base_queue;		/* vsi's first queue in hw array */
+	u16 alloc_queue_pairs;	/* Allocated Tx/Rx queues */
+	u16 req_queue_pairs;	/* User requested queue pairs */
+	u16 num_queue_pairs;	/* Used tx and rx pairs */
 	u16 num_desc;
 	enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
 	s16 vf_id;		/* Virtual function ID for SRIOV VSIs */
@@ -576,19 +573,16 @@
 	/* TC BW limit max quanta within VSI */
 	u8  bw_ets_max_quanta[I40E_MAX_TRAFFIC_CLASS];
 
-	struct i40e_pf *back;  /* Backreference to associated PF */
-	u16 idx;               /* index in pf->vsi[] */
-	u16 veb_idx;           /* index of VEB parent */
-	struct kobject *kobj;  /* sysfs object */
-	bool current_isup;     /* Sync 'link up' logging */
+	struct i40e_pf *back;	/* Backreference to associated PF */
+	u16 idx;		/* index in pf->vsi[] */
+	u16 veb_idx;		/* index of VEB parent */
+	struct kobject *kobj;	/* sysfs object */
+	bool current_isup;	/* Sync 'link up' logging */
 
 	void *priv;	/* client driver data reference. */
 
 	/* VSI specific handlers */
 	irqreturn_t (*irq_handler)(int irq, void *data);
-
-	/* current rxnfc data */
-	struct ethtool_rxnfc rxnfc; /* current rss hash opts */
 } ____cacheline_internodealigned_in_smp;
 
 struct i40e_netdev_priv {
@@ -707,6 +701,8 @@
 void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags);
 int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
+void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
+		       u16 rss_table_size, u16 rss_size);
 struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id);
 void i40e_update_stats(struct i40e_vsi *vsi);
 void i40e_update_eth_stats(struct i40e_vsi *vsi);
@@ -714,8 +710,6 @@
 int i40e_fetch_switch_configuration(struct i40e_pf *pf,
 				    bool printconfig);
 
-int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
-			     struct i40e_pf *pf, bool add);
 int i40e_add_del_fdir(struct i40e_vsi *vsi,
 		      struct i40e_fdir_filter *input, bool add);
 void i40e_fdir_check_and_reenable(struct i40e_pf *pf);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 11cf1a5..67e396b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -204,6 +204,9 @@
 	i40e_aqc_opc_suspend_port_tx				= 0x041B,
 	i40e_aqc_opc_resume_port_tx				= 0x041C,
 	i40e_aqc_opc_configure_partition_bw			= 0x041D,
+	/* hmc */
+	i40e_aqc_opc_query_hmc_resource_profile	= 0x0500,
+	i40e_aqc_opc_set_hmc_resource_profile	= 0x0501,
 
 	/* phy commands*/
 	i40e_aqc_opc_get_phy_abilities		= 0x0600,
@@ -450,13 +453,15 @@
 /* Set ARP Proxy command / response (indirect 0x0104) */
 struct i40e_aqc_arp_proxy_data {
 	__le16	command_flags;
-#define I40E_AQ_ARP_INIT_IPV4	0x0008
-#define I40E_AQ_ARP_UNSUP_CTL	0x0010
-#define I40E_AQ_ARP_ENA		0x0020
-#define I40E_AQ_ARP_ADD_IPV4	0x0040
-#define I40E_AQ_ARP_DEL_IPV4	0x0080
+#define I40E_AQ_ARP_INIT_IPV4	0x0800
+#define I40E_AQ_ARP_UNSUP_CTL	0x1000
+#define I40E_AQ_ARP_ENA		0x2000
+#define I40E_AQ_ARP_ADD_IPV4	0x4000
+#define I40E_AQ_ARP_DEL_IPV4	0x8000
 	__le16	table_id;
-	__le32	pfpm_proxyfc;
+	__le32	enabled_offloads;
+#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE	0x00000020
+#define I40E_AQ_ARP_OFFLOAD_ENABLE		0x00000800
 	__le32	ip_addr;
 	u8	mac_addr[6];
 	u8	reserved[2];
@@ -471,17 +476,19 @@
 	__le16	table_idx_ipv6_0;
 	__le16	table_idx_ipv6_1;
 	__le16	control;
-#define I40E_AQ_NS_PROXY_ADD_0		0x0100
-#define I40E_AQ_NS_PROXY_DEL_0		0x0200
-#define I40E_AQ_NS_PROXY_ADD_1		0x0400
-#define I40E_AQ_NS_PROXY_DEL_1		0x0800
-#define I40E_AQ_NS_PROXY_ADD_IPV6_0	0x1000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_0	0x2000
-#define I40E_AQ_NS_PROXY_ADD_IPV6_1	0x4000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_1	0x8000
-#define I40E_AQ_NS_PROXY_COMMAND_SEQ	0x0001
-#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL	0x0002
-#define I40E_AQ_NS_PROXY_INIT_MAC_TBL	0x0004
+#define I40E_AQ_NS_PROXY_ADD_0		0x0001
+#define I40E_AQ_NS_PROXY_DEL_0		0x0002
+#define I40E_AQ_NS_PROXY_ADD_1		0x0004
+#define I40E_AQ_NS_PROXY_DEL_1		0x0008
+#define I40E_AQ_NS_PROXY_ADD_IPV6_0	0x0010
+#define I40E_AQ_NS_PROXY_DEL_IPV6_0	0x0020
+#define I40E_AQ_NS_PROXY_ADD_IPV6_1	0x0040
+#define I40E_AQ_NS_PROXY_DEL_IPV6_1	0x0080
+#define I40E_AQ_NS_PROXY_COMMAND_SEQ	0x0100
+#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL	0x0200
+#define I40E_AQ_NS_PROXY_INIT_MAC_TBL	0x0400
+#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE	0x0800
+#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE	0x1000
 	u8	mac_addr_0[6];
 	u8	mac_addr_1[6];
 	u8	local_mac_addr[6];
@@ -1582,6 +1589,24 @@
 
 I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
 
+/* Get and set the active HMC resource profile and status.
+ * (direct 0x0500) and (direct 0x0501)
+ */
+struct i40e_aq_get_set_hmc_resource_profile {
+	u8	pm_profile;
+	u8	pe_vf_enabled;
+	u8	reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
+
+enum i40e_aq_hmc_profile {
+	/* I40E_HMC_PROFILE_NO_CHANGE	= 0, reserved */
+	I40E_HMC_PROFILE_DEFAULT	= 1,
+	I40E_HMC_PROFILE_FAVOR_VF	= 2,
+	I40E_HMC_PROFILE_EQUAL		= 3,
+};
+
 /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
 
 /* set in param0 for get phy abilities to report qualified modules */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index e1370c5..250db0b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -148,6 +148,11 @@
 					"Cannot locate client instance virtual channel receive routine\n");
 				continue;
 			}
+			if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+				      &cdev->state)) {
+				dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort virtchnl_receive\n");
+				continue;
+			}
 			cdev->client->ops->virtchnl_receive(&cdev->lan_info,
 							    cdev->client,
 							    vf_id, msg, len);
@@ -181,6 +186,11 @@
 					"Cannot locate client instance l2_param_change routine\n");
 				continue;
 			}
+			if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+				      &cdev->state)) {
+				dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n");
+				continue;
+			}
 			cdev->lan_info.params = params;
 			cdev->client->ops->l2_param_change(&cdev->lan_info,
 							   cdev->client,
@@ -199,6 +209,7 @@
 void i40e_notify_client_of_netdev_open(struct i40e_vsi *vsi)
 {
 	struct i40e_client_instance *cdev;
+	int ret = 0;
 
 	if (!vsi)
 		return;
@@ -211,7 +222,14 @@
 					"Cannot locate client instance open routine\n");
 				continue;
 			}
-			cdev->client->ops->open(&cdev->lan_info, cdev->client);
+			if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+				       &cdev->state))) {
+				ret = cdev->client->ops->open(&cdev->lan_info,
+							      cdev->client);
+				if (!ret)
+					set_bit(__I40E_CLIENT_INSTANCE_OPENED,
+						&cdev->state);
+			}
 		}
 	}
 	mutex_unlock(&i40e_client_instance_mutex);
@@ -298,6 +316,11 @@
 					"Cannot locate client instance VF reset routine\n");
 				continue;
 			}
+			if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+				      &cdev->state)) {
+				dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-reset\n");
+				continue;
+			}
 			cdev->client->ops->vf_reset(&cdev->lan_info,
 						    cdev->client, vf_id);
 		}
@@ -328,6 +351,11 @@
 					"Cannot locate client instance VF enable routine\n");
 				continue;
 			}
+			if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+				      &cdev->state)) {
+				dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-enable\n");
+				continue;
+			}
 			cdev->client->ops->vf_enable(&cdev->lan_info,
 						     cdev->client, num_vfs);
 		}
@@ -362,6 +390,11 @@
 					"Cannot locate client instance VF capability routine\n");
 				continue;
 			}
+			if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+				      &cdev->state)) {
+				dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-capable\n");
+				continue;
+			}
 			capable = cdev->client->ops->vf_capable(&cdev->lan_info,
 								cdev->client,
 								vf_id);
@@ -407,12 +440,14 @@
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
  * @client: pointer to a client struct in the client list.
+ * @existing: if there was already an existing instance
  *
- * Returns cdev ptr on success, NULL on failure
+ * Returns cdev ptr on success or if already exists, NULL on failure
  **/
 static
 struct i40e_client_instance *i40e_client_add_instance(struct i40e_pf *pf,
-						      struct i40e_client *client)
+						     struct i40e_client *client,
+						     bool *existing)
 {
 	struct i40e_client_instance *cdev;
 	struct netdev_hw_addr *mac = NULL;
@@ -421,7 +456,7 @@
 	mutex_lock(&i40e_client_instance_mutex);
 	list_for_each_entry(cdev, &i40e_client_instances, list) {
 		if ((cdev->lan_info.pf == pf) && (cdev->client == client)) {
-			cdev = NULL;
+			*existing = true;
 			goto out;
 		}
 	}
@@ -505,6 +540,7 @@
 {
 	struct i40e_client_instance *cdev;
 	struct i40e_client *client;
+	bool existing = false;
 	int ret = 0;
 
 	if (!(pf->flags & I40E_FLAG_SERVICE_CLIENT_REQUESTED))
@@ -528,19 +564,27 @@
 			/* check if L2 VSI is up, if not we are not ready */
 			if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
 				continue;
+		} else {
+			dev_warn(&pf->pdev->dev, "This client %s is being instanciated at probe\n",
+				 client->name);
 		}
 
 		/* Add the client instance to the instance list */
-		cdev = i40e_client_add_instance(pf, client);
+		cdev = i40e_client_add_instance(pf, client, &existing);
 		if (!cdev)
 			continue;
 
-		/* Also up the ref_cnt of no. of instances of this client */
-		atomic_inc(&client->ref_cnt);
-		dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
-			 client->name, pf->hw.pf_id,
-			 pf->hw.bus.device, pf->hw.bus.func);
+		if (!existing) {
+			/* Also up the ref_cnt for no. of instances of this
+			 * client.
+			 */
+			atomic_inc(&client->ref_cnt);
+			dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
+				 client->name, pf->hw.pf_id,
+				 pf->hw.bus.device, pf->hw.bus.func);
+		}
 
+		mutex_lock(&i40e_client_instance_mutex);
 		/* Send an Open request to the client */
 		atomic_inc(&cdev->ref_cnt);
 		if (client->ops && client->ops->open)
@@ -550,10 +594,12 @@
 			set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
 		} else {
 			/* remove client instance */
+			mutex_unlock(&i40e_client_instance_mutex);
 			i40e_client_del_instance(pf, client);
 			atomic_dec(&client->ref_cnt);
 			continue;
 		}
+		mutex_unlock(&i40e_client_instance_mutex);
 	}
 	mutex_unlock(&i40e_client_mutex);
 }
@@ -588,7 +634,8 @@
 		 pf->hw.pf_id, pf->hw.bus.device, pf->hw.bus.func);
 
 	/* Since in some cases register may have happened before a device gets
-	 * added, we can schedule a subtask to go initiate the clients.
+	 * added, we can schedule a subtask to go initiate the clients if
+	 * they can be launched at probe time.
 	 */
 	pf->flags |= I40E_FLAG_SERVICE_CLIENT_REQUESTED;
 	i40e_service_event_schedule(pf);
@@ -635,7 +682,7 @@
 static int i40e_client_release(struct i40e_client *client)
 {
 	struct i40e_client_instance *cdev, *tmp;
-	struct i40e_pf *pf = NULL;
+	struct i40e_pf *pf;
 	int ret = 0;
 
 	LIST_HEAD(cdevs_tmp);
@@ -645,12 +692,12 @@
 		if (strncmp(cdev->client->name, client->name,
 			    I40E_CLIENT_STR_LENGTH))
 			continue;
+		pf = (struct i40e_pf *)cdev->lan_info.pf;
 		if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
 			if (atomic_read(&cdev->ref_cnt) > 0) {
 				ret = I40E_ERR_NOT_READY;
 				goto out;
 			}
-			pf = (struct i40e_pf *)cdev->lan_info.pf;
 			if (client->ops && client->ops->close)
 				client->ops->close(&cdev->lan_info, client,
 						   false);
@@ -662,8 +709,7 @@
 				 client->name, pf->hw.pf_id);
 		}
 		/* delete the client instance from the list */
-		list_del(&cdev->list);
-		list_add(&cdev->list, &cdevs_tmp);
+		list_move(&cdev->list, &cdevs_tmp);
 		atomic_dec(&client->ref_cnt);
 		dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n",
 			 client->name);
@@ -792,7 +838,8 @@
 			wr32(hw, I40E_PFINT_AEQCTL, reg);
 		}
 	}
-
+	/* Mitigate sync problems with iwarp VF driver */
+	i40e_flush(hw);
 	return 0;
 err:
 	kfree(ldev->qvlist_info);
@@ -990,7 +1037,6 @@
 	if (!i40e_client_is_registered(client)) {
 		pr_info("i40e: Client %s has not been registered\n",
 			client->name);
-		mutex_unlock(&i40e_client_mutex);
 		ret = -ENODEV;
 		goto out;
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
index a4601d9..38a6c36 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -36,9 +36,9 @@
 #define I40E_CLIENT_VERSION_MINOR 01
 #define I40E_CLIENT_VERSION_BUILD 00
 #define I40E_CLIENT_VERSION_STR     \
-	XSTRINGIFY(I40E_CLIENT_VERSION_MAJOR) "." \
-	XSTRINGIFY(I40E_CLIENT_VERSION_MINOR) "." \
-	XSTRINGIFY(I40E_CLIENT_VERSION_BUILD)
+	__stringify(I40E_CLIENT_VERSION_MAJOR) "." \
+	__stringify(I40E_CLIENT_VERSION_MINOR) "." \
+	__stringify(I40E_CLIENT_VERSION_BUILD)
 
 struct i40e_client_version {
 	u8 major;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 05cf9a7..0c1875b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1054,6 +1054,7 @@
 			struct i40e_dcbx_config *r_cfg =
 						&pf->hw.remote_dcbx_config;
 			int i, ret;
+			u32 switch_id;
 
 			bw_data = kzalloc(sizeof(
 				    struct i40e_aqc_query_port_ets_config_resp),
@@ -1063,8 +1064,12 @@
 				goto command_write_done;
 			}
 
+			vsi = pf->vsi[pf->lan_vsi];
+			switch_id =
+				vsi->info.switch_id & I40E_AQ_VSI_SW_ID_MASK;
+
 			ret = i40e_aq_query_port_ets_config(&pf->hw,
-							    pf->mac_seid,
+							    switch_id,
 							    bw_data, NULL);
 			if (ret) {
 				dev_info(&pf->pdev->dev,
@@ -1425,84 +1430,6 @@
 		buff = NULL;
 		kfree(desc);
 		desc = NULL;
-	} else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) ||
-		   (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) {
-		struct i40e_fdir_filter fd_data;
-		u16 packet_len, i, j = 0;
-		char *asc_packet;
-		u8 *raw_packet;
-		bool add = false;
-		int ret;
-
-		if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED))
-			goto command_write_done;
-
-		if (strncmp(cmd_buf, "add", 3) == 0)
-			add = true;
-
-		if (add && (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED))
-			goto command_write_done;
-
-		asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE,
-				     GFP_KERNEL);
-		if (!asc_packet)
-			goto command_write_done;
-
-		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE,
-				     GFP_KERNEL);
-
-		if (!raw_packet) {
-			kfree(asc_packet);
-			asc_packet = NULL;
-			goto command_write_done;
-		}
-
-		cnt = sscanf(&cmd_buf[13],
-			     "%hx %2hhx %2hhx %hx %2hhx %2hhx %hx %x %hd %511s",
-			     &fd_data.q_index,
-			     &fd_data.flex_off, &fd_data.pctype,
-			     &fd_data.dest_vsi, &fd_data.dest_ctl,
-			     &fd_data.fd_status, &fd_data.cnt_index,
-			     &fd_data.fd_id, &packet_len, asc_packet);
-		if (cnt != 10) {
-			dev_info(&pf->pdev->dev,
-				 "program fd_filter: bad command string, cnt=%d\n",
-				 cnt);
-			kfree(asc_packet);
-			asc_packet = NULL;
-			kfree(raw_packet);
-			goto command_write_done;
-		}
-
-		/* fix packet length if user entered 0 */
-		if (packet_len == 0)
-			packet_len = I40E_FDIR_MAX_RAW_PACKET_SIZE;
-
-		/* make sure to check the max as well */
-		packet_len = min_t(u16,
-				   packet_len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
-
-		for (i = 0; i < packet_len; i++) {
-			cnt = sscanf(&asc_packet[j], "%2hhx ", &raw_packet[i]);
-			if (!cnt)
-				break;
-			j += 3;
-		}
-		dev_info(&pf->pdev->dev, "FD raw packet dump\n");
-		print_hex_dump(KERN_INFO, "FD raw packet: ",
-			       DUMP_PREFIX_OFFSET, 16, 1,
-			       raw_packet, packet_len, true);
-		ret = i40e_program_fdir_filter(&fd_data, raw_packet, pf, add);
-		if (!ret) {
-			dev_info(&pf->pdev->dev, "Filter command send Status : Success\n");
-		} else {
-			dev_info(&pf->pdev->dev,
-				 "Filter command send failed %d\n", ret);
-		}
-		kfree(raw_packet);
-		raw_packet = NULL;
-		kfree(asc_packet);
-		asc_packet = NULL;
 	} else if (strncmp(cmd_buf, "fd current cnt", 14) == 0) {
 		dev_info(&pf->pdev->dev, "FD current total filter count for this interface: %d\n",
 			 i40e_get_current_fd_count(pf));
@@ -1727,8 +1654,6 @@
 		dev_info(&pf->pdev->dev, "  globr\n");
 		dev_info(&pf->pdev->dev, "  send aq_cmd <flags> <opcode> <datalen> <retval> <cookie_h> <cookie_l> <param0> <param1> <param2> <param3>\n");
 		dev_info(&pf->pdev->dev, "  send indirect aq_cmd <flags> <opcode> <datalen> <retval> <cookie_h> <cookie_l> <param0> <param1> <param2> <param3> <buffer_len>\n");
-		dev_info(&pf->pdev->dev, "  add fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n");
-		dev_info(&pf->pdev->dev, "  rem fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n");
 		dev_info(&pf->pdev->dev, "  fd current cnt");
 		dev_info(&pf->pdev->dev, "  lldp start\n");
 		dev_info(&pf->pdev->dev, "  lldp stop\n");
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index c912e04..92bc884 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1560,13 +1560,13 @@
 		}
 #endif
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
+			snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_packets", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
+			snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_bytes", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
+			snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_packets", i);
 			p += ETH_GSTRING_LEN;
-			snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
+			snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_bytes", i);
 			p += ETH_GSTRING_LEN;
 		}
 		if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1)
@@ -1581,16 +1581,16 @@
 			}
 			for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
 				snprintf(p, ETH_GSTRING_LEN,
-					 "veb.tc_%u_tx_packets", i);
+					 "veb.tc_%d_tx_packets", i);
 				p += ETH_GSTRING_LEN;
 				snprintf(p, ETH_GSTRING_LEN,
-					 "veb.tc_%u_tx_bytes", i);
+					 "veb.tc_%d_tx_bytes", i);
 				p += ETH_GSTRING_LEN;
 				snprintf(p, ETH_GSTRING_LEN,
-					 "veb.tc_%u_rx_packets", i);
+					 "veb.tc_%d_rx_packets", i);
 				p += ETH_GSTRING_LEN;
 				snprintf(p, ETH_GSTRING_LEN,
-					 "veb.tc_%u_rx_bytes", i);
+					 "veb.tc_%d_rx_bytes", i);
 				p += ETH_GSTRING_LEN;
 			}
 		}
@@ -1601,23 +1601,23 @@
 		}
 		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.tx_priority_%u_xon", i);
+				 "port.tx_priority_%d_xon", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.tx_priority_%u_xoff", i);
+				 "port.tx_priority_%d_xoff", i);
 			p += ETH_GSTRING_LEN;
 		}
 		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx_priority_%u_xon", i);
+				 "port.rx_priority_%d_xon", i);
 			p += ETH_GSTRING_LEN;
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx_priority_%u_xoff", i);
+				 "port.rx_priority_%d_xoff", i);
 			p += ETH_GSTRING_LEN;
 		}
 		for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
 			snprintf(p, ETH_GSTRING_LEN,
-				 "port.rx_priority_%u_xon_2_xoff", i);
+				 "port.rx_priority_%d_xon_2_xoff", i);
 			p += ETH_GSTRING_LEN;
 		}
 		/* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
@@ -1970,11 +1970,22 @@
  * 125us (8000 interrupts per second) == ITR(62)
  */
 
+/**
+ * __i40e_get_coalesce - get per-queue coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ * @queue: which queue to pick
+ *
+ * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs
+ * are per queue. If queue is <0 then we default to queue 0 as the
+ * representative value.
+ **/
 static int __i40e_get_coalesce(struct net_device *netdev,
 			       struct ethtool_coalesce *ec,
 			       int queue)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_ring *rx_ring, *tx_ring;
 	struct i40e_vsi *vsi = np->vsi;
 
 	ec->tx_max_coalesced_frames_irq = vsi->work_limit;
@@ -1989,14 +2000,18 @@
 		return -EINVAL;
 	}
 
-	if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting))
+	rx_ring = vsi->rx_rings[queue];
+	tx_ring = vsi->tx_rings[queue];
+
+	if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
 		ec->use_adaptive_rx_coalesce = 1;
 
-	if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting))
+	if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
 		ec->use_adaptive_tx_coalesce = 1;
 
-	ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-	ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+
 
 	/* we use the _usecs_high to store/set the interrupt rate limit
 	 * that the hardware supports, that almost but not quite
@@ -2010,18 +2025,44 @@
 	return 0;
 }
 
+/**
+ * i40e_get_coalesce - get a netdev's coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ *
+ * Gets the coalesce settings for a particular netdev. Note that if user has
+ * modified per-queue settings, this only guarantees to represent queue 0. See
+ * __i40e_get_coalesce for more details.
+ **/
 static int i40e_get_coalesce(struct net_device *netdev,
 			     struct ethtool_coalesce *ec)
 {
 	return __i40e_get_coalesce(netdev, ec, -1);
 }
 
+/**
+ * i40e_get_per_queue_coalesce - gets coalesce settings for particular queue
+ * @netdev: netdev structure
+ * @ec: ethtool's coalesce settings
+ * @queue: the particular queue to read
+ *
+ * Will read a specific queue's coalesce settings
+ **/
 static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
 				       struct ethtool_coalesce *ec)
 {
 	return __i40e_get_coalesce(netdev, ec, queue);
 }
 
+/**
+ * i40e_set_itr_per_queue - set ITR values for specific queue
+ * @vsi: the VSI to set values for
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to modify
+ *
+ * Change the ITR settings for a specific queue.
+ **/
+
 static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
 				   struct ethtool_coalesce *ec,
 				   int queue)
@@ -2060,6 +2101,14 @@
 	i40e_flush(hw);
 }
 
+/**
+ * __i40e_set_coalesce - set coalesce settings for particular queue
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the coalesce settings for a particular queue.
+ **/
 static int __i40e_set_coalesce(struct net_device *netdev,
 			       struct ethtool_coalesce *ec,
 			       int queue)
@@ -2120,12 +2169,27 @@
 	return 0;
 }
 
+/**
+ * i40e_set_coalesce - set coalesce settings for every queue on the netdev
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ *
+ * This will set each queue to the same coalesce settings.
+ **/
 static int i40e_set_coalesce(struct net_device *netdev,
 			     struct ethtool_coalesce *ec)
 {
 	return __i40e_set_coalesce(netdev, ec, -1);
 }
 
+/**
+ * i40e_set_per_queue_coalesce - set specific queue's coalesce settings
+ * @netdev: the netdev to change
+ * @ec: ethtool's coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the specified queue's coalesce settings.
+ **/
 static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue,
 				       struct ethtool_coalesce *ec)
 {
@@ -2141,41 +2205,72 @@
  **/
 static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
 {
+	struct i40e_hw *hw = &pf->hw;
+	u8 flow_pctype = 0;
+	u64 i_set = 0;
+
 	cmd->data = 0;
 
-	if (pf->vsi[pf->lan_vsi]->rxnfc.data != 0) {
-		cmd->data = pf->vsi[pf->lan_vsi]->rxnfc.data;
-		cmd->flow_type = pf->vsi[pf->lan_vsi]->rxnfc.flow_type;
-		return 0;
-	}
-	/* Report default options for RSS on i40e */
 	switch (cmd->flow_type) {
 	case TCP_V4_FLOW:
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+		break;
 	case UDP_V4_FLOW:
-		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	/* fall through to add IP fields */
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+		break;
+	case TCP_V6_FLOW:
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+		break;
+	case UDP_V6_FLOW:
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+		break;
 	case SCTP_V4_FLOW:
 	case AH_ESP_V4_FLOW:
 	case AH_V4_FLOW:
 	case ESP_V4_FLOW:
 	case IPV4_FLOW:
-		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
-		break;
-	case TCP_V6_FLOW:
-	case UDP_V6_FLOW:
-		cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
-	/* fall through to add IP fields */
 	case SCTP_V6_FLOW:
 	case AH_ESP_V6_FLOW:
 	case AH_V6_FLOW:
 	case ESP_V6_FLOW:
 	case IPV6_FLOW:
+		/* Default is src/dest for IP, no matter the L4 hashing */
 		cmd->data |= RXH_IP_SRC | RXH_IP_DST;
 		break;
 	default:
 		return -EINVAL;
 	}
 
+	/* Read flow based hash input set register */
+	if (flow_pctype) {
+		i_set = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0,
+					      flow_pctype)) |
+			((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1,
+					       flow_pctype)) << 32);
+	}
+
+	/* Process bits of hash input set */
+	if (i_set) {
+		if (i_set & I40E_L4_SRC_MASK)
+			cmd->data |= RXH_L4_B_0_1;
+		if (i_set & I40E_L4_DST_MASK)
+			cmd->data |= RXH_L4_B_2_3;
+
+		if (cmd->flow_type == TCP_V4_FLOW ||
+		    cmd->flow_type == UDP_V4_FLOW) {
+			if (i_set & I40E_L3_SRC_MASK)
+				cmd->data |= RXH_IP_SRC;
+			if (i_set & I40E_L3_DST_MASK)
+				cmd->data |= RXH_IP_DST;
+		} else if (cmd->flow_type == TCP_V6_FLOW ||
+			  cmd->flow_type == UDP_V6_FLOW) {
+			if (i_set & I40E_L3_V6_SRC_MASK)
+				cmd->data |= RXH_IP_SRC;
+			if (i_set & I40E_L3_V6_DST_MASK)
+				cmd->data |= RXH_IP_DST;
+		}
+	}
+
 	return 0;
 }
 
@@ -2318,6 +2413,51 @@
 }
 
 /**
+ * i40e_get_rss_hash_bits - Read RSS Hash bits from register
+ * @nfc: pointer to user request
+ * @i_setc bits currently set
+ *
+ * Returns value of bits to be set per user request
+ **/
+static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc)
+{
+	u64 i_set = i_setc;
+	u64 src_l3 = 0, dst_l3 = 0;
+
+	if (nfc->data & RXH_L4_B_0_1)
+		i_set |= I40E_L4_SRC_MASK;
+	else
+		i_set &= ~I40E_L4_SRC_MASK;
+	if (nfc->data & RXH_L4_B_2_3)
+		i_set |= I40E_L4_DST_MASK;
+	else
+		i_set &= ~I40E_L4_DST_MASK;
+
+	if (nfc->flow_type == TCP_V6_FLOW || nfc->flow_type == UDP_V6_FLOW) {
+		src_l3 = I40E_L3_V6_SRC_MASK;
+		dst_l3 = I40E_L3_V6_DST_MASK;
+	} else if (nfc->flow_type == TCP_V4_FLOW ||
+		  nfc->flow_type == UDP_V4_FLOW) {
+		src_l3 = I40E_L3_SRC_MASK;
+		dst_l3 = I40E_L3_DST_MASK;
+	} else {
+		/* Any other flow type are not supported here */
+		return i_set;
+	}
+
+	if (nfc->data & RXH_IP_SRC)
+		i_set |= src_l3;
+	else
+		i_set &= ~src_l3;
+	if (nfc->data & RXH_IP_DST)
+		i_set |= dst_l3;
+	else
+		i_set &= ~dst_l3;
+
+	return i_set;
+}
+
+/**
  * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash
  * @pf: pointer to the physical function struct
  * @cmd: ethtool rxnfc command
@@ -2329,6 +2469,8 @@
 	struct i40e_hw *hw = &pf->hw;
 	u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) |
 		   ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32);
+	u8 flow_pctype = 0;
+	u64 i_set, i_setc;
 
 	/* RSS does not support anything other than hashing
 	 * to queues on src and dst IPs and ports
@@ -2337,75 +2479,39 @@
 			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
 		return -EINVAL;
 
-	/* We need at least the IP SRC and DEST fields for hashing */
-	if (!(nfc->data & RXH_IP_SRC) ||
-	    !(nfc->data & RXH_IP_DST))
-		return -EINVAL;
-
 	switch (nfc->flow_type) {
 	case TCP_V4_FLOW:
-		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		case 0:
-			return -EINVAL;
-		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
-				hena |=
-			   BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
-
-			hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
-			break;
-		default:
-			return -EINVAL;
-		}
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+			hena |=
+			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
 		break;
 	case TCP_V6_FLOW:
-		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		case 0:
-			return -EINVAL;
-		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
-				hena |=
-			   BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
-
-			hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
-			break;
-		default:
-			return -EINVAL;
-		}
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP;
+		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+			hena |=
+			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK);
+		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+			hena |=
+			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK);
 		break;
 	case UDP_V4_FLOW:
-		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		case 0:
-			return -EINVAL;
-		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
-				hena |=
-			    BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
-			    BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
+		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+			hena |=
+			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
+			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP);
 
-			hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
-				 BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4));
-			break;
-		default:
-			return -EINVAL;
-		}
+		hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4);
 		break;
 	case UDP_V6_FLOW:
-		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
-		case 0:
-			return -EINVAL;
-		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
-			if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
-				hena |=
-			    BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
-			    BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
+		flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP;
+		if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE)
+			hena |=
+			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
+			  BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP);
 
-			hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
-				 BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6));
-			break;
-		default:
-			return -EINVAL;
-		}
+		hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6);
 		break;
 	case AH_ESP_V4_FLOW:
 	case AH_V4_FLOW:
@@ -2437,13 +2543,23 @@
 		return -EINVAL;
 	}
 
+	if (flow_pctype) {
+		i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0,
+					       flow_pctype)) |
+			((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1,
+					       flow_pctype)) << 32);
+		i_set = i40e_get_rss_hash_bits(nfc, i_setc);
+		i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_pctype),
+				  (u32)i_set);
+		i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype),
+				  (u32)(i_set >> 32));
+		hena |= BIT_ULL(flow_pctype);
+	}
+
 	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena);
 	i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
 	i40e_flush(hw);
 
-	/* Save setting for future output/update */
-	pf->vsi[pf->lan_vsi]->rxnfc = *nfc;
-
 	return 0;
 }
 
@@ -2744,11 +2860,15 @@
 static int i40e_set_channels(struct net_device *dev,
 			      struct ethtool_channels *ch)
 {
+	const u8 drop = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET;
 	struct i40e_netdev_priv *np = netdev_priv(dev);
 	unsigned int count = ch->combined_count;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
+	struct i40e_fdir_filter *rule;
+	struct hlist_node *node2;
 	int new_count;
+	int err = 0;
 
 	/* We do not support setting channels for any other VSI at present */
 	if (vsi->type != I40E_VSI_MAIN)
@@ -2766,6 +2886,26 @@
 	if (count > i40e_max_channels(vsi))
 		return -EINVAL;
 
+	/* verify that the number of channels does not invalidate any current
+	 * flow director rules
+	 */
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		if (rule->dest_ctl != drop && count <= rule->q_index) {
+			dev_warn(&pf->pdev->dev,
+				 "Existing user defined filter %d assigns flow to queue %d\n",
+				 rule->fd_id, rule->q_index);
+			err = -EINVAL;
+		}
+	}
+
+	if (err) {
+		dev_err(&pf->pdev->dev,
+			"Existing filter rules must be deleted to reduce combined channel count to %d\n",
+			count);
+		return err;
+	}
+
 	/* update feature limits from largest to smallest supported values */
 	/* TODO: Flow director limit, DCB etc */
 
@@ -2846,15 +2986,13 @@
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_pf *pf = vsi->back;
 	u8 *seed = NULL;
 	u16 i;
 
 	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
 		return -EOPNOTSUPP;
 
-	if (!indir)
-		return 0;
-
 	if (key) {
 		if (!vsi->rss_hkey_user) {
 			vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE,
@@ -2872,8 +3010,12 @@
 	}
 
 	/* Each 32 bits pointed by 'indir' is stored with a lut entry */
-	for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
-		vsi->rss_lut_user[i] = (u8)(indir[i]);
+	if (indir)
+		for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++)
+			vsi->rss_lut_user[i] = (u8)(indir[i]);
+	else
+		i40e_fill_rss_lut(pf, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE,
+				  vsi->rss_size);
 
 	return i40e_config_rss(vsi, seed, vsi->rss_lut_user,
 			       I40E_HLUT_ARRAY_SIZE);
@@ -2943,6 +3085,9 @@
 	} else {
 		pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 		pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
+
+		/* flush current ATR settings */
+		set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
 	}
 
 	if ((flags & I40E_PRIV_FLAGS_VEB_STATS) &&
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 81c99e1..ac1faee 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -41,7 +41,7 @@
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 11
+#define DRV_VERSION_BUILD 16
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -57,8 +57,6 @@
 static int i40e_setup_misc_vector(struct i40e_pf *pf);
 static void i40e_determine_queue_usage(struct i40e_pf *pf);
 static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
-static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
-			      u16 rss_table_size, u16 rss_size);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf);
 static int i40e_veb_get_bw_info(struct i40e_veb *veb);
 
@@ -527,6 +525,7 @@
 			pf->veb[i]->stat_offsets_loaded = false;
 		}
 	}
+	pf->hw_csum_rx_error = 0;
 }
 
 /**
@@ -1316,7 +1315,7 @@
 	element.vlan_tag = 0;
 	/* ...and some firmware does it this way. */
 	element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
-			I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+			I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
 	i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
 }
 
@@ -1909,7 +1908,7 @@
 			ether_addr_copy(del_list[num_del].mac_addr, f->macaddr);
 			if (f->vlan == I40E_VLAN_ANY) {
 				del_list[num_del].vlan_tag = 0;
-				cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+				cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
 			} else {
 				del_list[num_del].vlan_tag =
 					cpu_to_le16((u16)(f->vlan));
@@ -4554,23 +4553,38 @@
  **/
 static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
 {
+	int i, tc_unused = 0;
 	u8 num_tc = 0;
-	int i;
+	u8 ret = 0;
 
 	/* Scan the ETS Config Priority Table to find
 	 * traffic class enabled for a given priority
-	 * and use the traffic class index to get the
-	 * number of traffic classes enabled
+	 * and create a bitmask of enabled TCs
 	 */
-	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
-		if (dcbcfg->etscfg.prioritytable[i] > num_tc)
-			num_tc = dcbcfg->etscfg.prioritytable[i];
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+		num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
+
+	/* Now scan the bitmask to check for
+	 * contiguous TCs starting with TC0
+	 */
+	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+		if (num_tc & BIT(i)) {
+			if (!tc_unused) {
+				ret++;
+			} else {
+				pr_err("Non-contiguous TC - Disabling DCB\n");
+				return 1;
+			}
+		} else {
+			tc_unused = 1;
+		}
 	}
 
-	/* Traffic class index starts from zero so
-	 * increment to return the actual count
-	 */
-	return num_tc + 1;
+	/* There is always at least TC0 */
+	if (!ret)
+		ret = 1;
+
+	return ret;
 }
 
 /**
@@ -4601,7 +4615,7 @@
 static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
 {
 	struct i40e_hw *hw = &pf->hw;
-	u8 i, enabled_tc;
+	u8 i, enabled_tc = 1;
 	u8 num_tc = 0;
 	struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
 
@@ -4619,8 +4633,6 @@
 	else
 		return 1; /* Only TC0 */
 
-	/* At least have TC0 */
-	enabled_tc = (enabled_tc ? enabled_tc : 0x1);
 	for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
 		if (enabled_tc & BIT(i))
 			num_tc++;
@@ -5098,9 +5110,13 @@
 				       DCB_CAP_DCBX_VER_IEEE;
 
 			pf->flags |= I40E_FLAG_DCB_CAPABLE;
-			/* Enable DCB tagging only when more than one TC */
+			/* Enable DCB tagging only when more than one TC
+			 * or explicitly disable if only one TC
+			 */
 			if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
 				pf->flags |= I40E_FLAG_DCB_ENABLED;
+			else
+				pf->flags &= ~I40E_FLAG_DCB_ENABLED;
 			dev_dbg(&pf->pdev->dev,
 				"DCBX offload is supported for this PF.\n");
 		}
@@ -5226,7 +5242,7 @@
 		/* reset fd counters */
 		pf->fd_add_err = pf->fd_atr_cnt = 0;
 		if (pf->fd_tcp_rule > 0) {
-			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+			pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
 			if (I40E_DEBUG_FD & pf->hw.debug_mask)
 				dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n");
 			pf->fd_tcp_rule = 0;
@@ -5416,7 +5432,6 @@
 	wr32(&pf->hw, I40E_GLLAN_TSOMSK_L, be32_to_cpu(TCP_FLAG_CWR) >> 16);
 
 	udp_tunnel_get_rx_info(netdev);
-	i40e_notify_client_of_netdev_open(vsi);
 
 	return 0;
 }
@@ -5702,7 +5717,7 @@
 	u8 type;
 
 	/* Not DCB capable or capability disabled */
-	if (!(pf->flags & I40E_FLAG_DCB_CAPABLE))
+	if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
 		return ret;
 
 	/* Ignore if event is not for Nearest Bridge */
@@ -5924,13 +5939,17 @@
 				dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
 		}
 	}
-	/* Wait for some more space to be available to turn on ATR */
+
+	/* Wait for some more space to be available to turn on ATR. We also
+	 * must check that no existing ntuple rules for TCP are in effect
+	 */
 	if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) {
 		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-		    (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) {
+		    (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		    (pf->fd_tcp_rule == 0)) {
 			pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 			if (I40E_DEBUG_FD & pf->hw.debug_mask)
-				dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table now\n");
+				dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
 		}
 	}
 
@@ -5961,9 +5980,6 @@
 	int fd_room;
 	int reg;
 
-	if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED)))
-		return;
-
 	if (!time_after(jiffies, pf->fd_flush_timestamp +
 				 (I40E_MIN_FD_FLUSH_INTERVAL * HZ)))
 		return;
@@ -5983,7 +5999,7 @@
 	}
 
 	pf->fd_flush_timestamp = jiffies;
-	pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+	pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
 	/* flush all filters */
 	wr32(&pf->hw, I40E_PFQF_CTL_1,
 	     I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
@@ -6003,7 +6019,7 @@
 		/* replay sideband filters */
 		i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]);
 		if (!disable_atr)
-			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
+			pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 		clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
@@ -6037,9 +6053,6 @@
 	if (test_bit(__I40E_DOWN, &pf->state))
 		return;
 
-	if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED)))
-		return;
-
 	if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
 		i40e_fdir_flush_and_replay(pf);
 
@@ -7139,9 +7152,9 @@
 			pf->pending_udp_bitmap &= ~BIT_ULL(i);
 			port = pf->udp_ports[i].index;
 			if (port)
-				ret = i40e_aq_add_udp_tunnel(hw, ntohs(port),
-						     pf->udp_ports[i].type,
-						     NULL, NULL);
+				ret = i40e_aq_add_udp_tunnel(hw, port,
+							pf->udp_ports[i].type,
+							NULL, NULL);
 			else
 				ret = i40e_aq_del_udp_tunnel(hw, i, NULL);
 
@@ -7628,7 +7641,6 @@
 			vectors_left--;
 		} else {
 			pf->num_fdsb_msix = 0;
-			pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
 		}
 	}
 
@@ -7648,6 +7660,8 @@
 #endif
 	/* can we reserve enough for iWARP? */
 	if (pf->flags & I40E_FLAG_IWARP_ENABLED) {
+		iwarp_requested = pf->num_iwarp_msix;
+
 		if (!vectors_left)
 			pf->num_iwarp_msix = 0;
 		else if (vectors_left < pf->num_iwarp_msix)
@@ -7661,18 +7675,23 @@
 		int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps;
 		int vmdq_vecs = min_t(int, vectors_left, vmdq_vecs_wanted);
 
-		/* if we're short on vectors for what's desired, we limit
-		 * the queues per vmdq.  If this is still more than are
-		 * available, the user will need to change the number of
-		 * queues/vectors used by the PF later with the ethtool
-		 * channels command
-		 */
-		if (vmdq_vecs < vmdq_vecs_wanted)
-			pf->num_vmdq_qps = 1;
-		pf->num_vmdq_msix = pf->num_vmdq_qps;
+		if (!vectors_left) {
+			pf->num_vmdq_msix = 0;
+			pf->num_vmdq_qps = 0;
+		} else {
+			/* if we're short on vectors for what's desired, we limit
+			 * the queues per vmdq.  If this is still more than are
+			 * available, the user will need to change the number of
+			 * queues/vectors used by the PF later with the ethtool
+			 * channels command
+			 */
+			if (vmdq_vecs < vmdq_vecs_wanted)
+				pf->num_vmdq_qps = 1;
+			pf->num_vmdq_msix = pf->num_vmdq_qps;
 
-		v_budget += vmdq_vecs;
-		vectors_left -= vmdq_vecs;
+			v_budget += vmdq_vecs;
+			vectors_left -= vmdq_vecs;
+		}
 	}
 
 	pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
@@ -7684,21 +7703,6 @@
 		pf->msix_entries[i].entry = i;
 	v_actual = i40e_reserve_msix_vectors(pf, v_budget);
 
-	if (v_actual != v_budget) {
-		/* If we have limited resources, we will start with no vectors
-		 * for the special features and then allocate vectors to some
-		 * of these features based on the policy and at the end disable
-		 * the features that did not get any vectors.
-		 */
-		iwarp_requested = pf->num_iwarp_msix;
-		pf->num_iwarp_msix = 0;
-#ifdef I40E_FCOE
-		pf->num_fcoe_qps = 0;
-		pf->num_fcoe_msix = 0;
-#endif
-		pf->num_vmdq_msix = 0;
-	}
-
 	if (v_actual < I40E_MIN_MSIX) {
 		pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
 		kfree(pf->msix_entries);
@@ -7712,9 +7716,16 @@
 		pf->num_lan_qps = 1;
 		pf->num_lan_msix = 1;
 
-	} else if (v_actual != v_budget) {
+	} else if (!vectors_left) {
+		/* If we have limited resources, we will start with no vectors
+		 * for the special features and then allocate vectors to some
+		 * of these features based on the policy and at the end disable
+		 * the features that did not get any vectors.
+		 */
 		int vec;
 
+		dev_info(&pf->pdev->dev,
+			 "MSI-X vector limit reached, attempting to redistribute vectors\n");
 		/* reserve the misc vector */
 		vec = v_actual - 1;
 
@@ -7722,7 +7733,10 @@
 		pf->num_vmdq_msix = 1;    /* force VMDqs to only one vector */
 		pf->num_vmdq_vsis = 1;
 		pf->num_vmdq_qps = 1;
-		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+#ifdef I40E_FCOE
+		pf->num_fcoe_qps = 0;
+		pf->num_fcoe_msix = 0;
+#endif
 
 		/* partition out the remaining vectors */
 		switch (vec) {
@@ -7754,9 +7768,14 @@
 				pf->num_vmdq_vsis = min_t(int, (vec / 2),
 						  I40E_DEFAULT_NUM_VMDQ_VSI);
 			}
+			if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
+				pf->num_fdsb_msix = 1;
+				vec--;
+			}
 			pf->num_lan_msix = min_t(int,
 			       (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)),
 							      pf->num_lan_msix);
+			pf->num_lan_qps = pf->num_lan_msix;
 #ifdef I40E_FCOE
 			/* give one vector to FCoE */
 			if (pf->flags & I40E_FLAG_FCOE_ENABLED) {
@@ -7768,6 +7787,11 @@
 		}
 	}
 
+	if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
+	    (pf->num_fdsb_msix == 0)) {
+		dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n");
+		pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
+	}
 	if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
 	    (pf->num_vmdq_msix == 0)) {
 		dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n");
@@ -7786,6 +7810,13 @@
 		pf->flags &= ~I40E_FLAG_FCOE_ENABLED;
 	}
 #endif
+	i40e_debug(&pf->hw, I40E_DEBUG_INIT,
+		   "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n",
+		   pf->num_lan_msix,
+		   pf->num_vmdq_msix * pf->num_vmdq_vsis,
+		   pf->num_fdsb_msix,
+		   pf->num_iwarp_msix);
+
 	return v_actual;
 }
 
@@ -7882,6 +7913,7 @@
 #endif
 				       I40E_FLAG_RSS_ENABLED	|
 				       I40E_FLAG_DCB_CAPABLE	|
+				       I40E_FLAG_DCB_ENABLED	|
 				       I40E_FLAG_SRIOV_ENABLED	|
 				       I40E_FLAG_FD_SB_ENABLED	|
 				       I40E_FLAG_FD_ATR_ENABLED	|
@@ -7971,72 +8003,34 @@
 static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
 			      u8 *lut, u16 lut_size)
 {
-	struct i40e_aqc_get_set_rss_key_data rss_key;
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
-	bool pf_lut = false;
-	u8 *rss_lut;
-	int ret, i;
+	int ret = 0;
 
-	memcpy(&rss_key, seed, sizeof(rss_key));
-
-	rss_lut = kzalloc(pf->rss_table_size, GFP_KERNEL);
-	if (!rss_lut)
-		return -ENOMEM;
-
-	/* Populate the LUT with max no. of queues in round robin fashion */
-	for (i = 0; i < vsi->rss_table_size; i++)
-		rss_lut[i] = i % vsi->rss_size;
-
-	ret = i40e_aq_set_rss_key(hw, vsi->id, &rss_key);
-	if (ret) {
-		dev_info(&pf->pdev->dev,
-			 "Cannot set RSS key, err %s aq_err %s\n",
-			 i40e_stat_str(&pf->hw, ret),
-			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-		goto config_rss_aq_out;
+	if (seed) {
+		struct i40e_aqc_get_set_rss_key_data *seed_dw =
+			(struct i40e_aqc_get_set_rss_key_data *)seed;
+		ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Cannot set RSS key, err %s aq_err %s\n",
+				 i40e_stat_str(hw, ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
 	}
+	if (lut) {
+		bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
 
-	if (vsi->type == I40E_VSI_MAIN)
-		pf_lut = true;
-
-	ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, rss_lut,
-				  vsi->rss_table_size);
-	if (ret)
-		dev_info(&pf->pdev->dev,
-			 "Cannot set RSS lut, err %s aq_err %s\n",
-			 i40e_stat_str(&pf->hw, ret),
-			 i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-
-config_rss_aq_out:
-	kfree(rss_lut);
-	return ret;
-}
-
-/**
- * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
- * @vsi: VSI structure
- **/
-static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
-{
-	u8 seed[I40E_HKEY_ARRAY_SIZE];
-	struct i40e_pf *pf = vsi->back;
-	u8 *lut;
-	int ret;
-
-	if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE))
-		return 0;
-
-	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
-	if (!lut)
-		return -ENOMEM;
-
-	i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
-	netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
-	vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs);
-	ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
-	kfree(lut);
-
+		ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Cannot set RSS lut, err %s aq_err %s\n",
+				 i40e_stat_str(hw, ret),
+				 i40e_aq_str(hw, hw->aq.asq_last_status));
+			return ret;
+		}
+	}
 	return ret;
 }
 
@@ -8087,6 +8081,46 @@
 }
 
 /**
+ * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
+ * @vsi: VSI structure
+ **/
+static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
+{
+	u8 seed[I40E_HKEY_ARRAY_SIZE];
+	struct i40e_pf *pf = vsi->back;
+	u8 *lut;
+	int ret;
+
+	if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE))
+		return 0;
+
+	if (!vsi->rss_size)
+		vsi->rss_size = min_t(int, pf->alloc_rss_size,
+				      vsi->num_queue_pairs);
+	if (!vsi->rss_size)
+		return -EINVAL;
+
+	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+	if (!lut)
+		return -ENOMEM;
+	/* Use the user configured hash keys and lookup table if there is one,
+	 * otherwise use default
+	 */
+	if (vsi->rss_lut_user)
+		memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+	else
+		i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+	if (vsi->rss_hkey_user)
+		memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+	else
+		netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+	ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+	kfree(lut);
+
+	return ret;
+}
+
+/**
  * i40e_config_rss_reg - Configure RSS keys and lut by writing registers
  * @vsi: Pointer to vsi structure
  * @seed: RSS hash seed
@@ -8224,8 +8258,8 @@
  * @rss_table_size: Lookup table size
  * @rss_size: Range of queue number for hashing
  */
-static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
-			      u16 rss_table_size, u16 rss_size)
+void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
+		       u16 rss_table_size, u16 rss_size)
 {
 	u16 i;
 
@@ -8266,6 +8300,8 @@
 	if (!vsi->rss_size)
 		vsi->rss_size = min_t(int, pf->alloc_rss_size,
 				      vsi->num_queue_pairs);
+	if (!vsi->rss_size)
+		return -EINVAL;
 
 	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
 	if (!lut)
@@ -8590,7 +8626,6 @@
 			     I40E_FLAG_WB_ON_ITR_CAPABLE |
 			     I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE |
 			     I40E_FLAG_NO_PCI_LINK_CHECK |
-			     I40E_FLAG_100M_SGMII_CAPABLE |
 			     I40E_FLAG_USE_SET_LLDP_MIB |
 			     I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
 	} else if ((pf->hw.aq.api_maj_ver > 1) ||
@@ -8665,18 +8700,40 @@
 		/* reset fd counters */
 		pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0;
 		pf->fdir_pf_active_filters = 0;
-		pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
-		if (I40E_DEBUG_FD & pf->hw.debug_mask)
-			dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
 		/* if ATR was auto disabled it can be re-enabled. */
 		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
-		    (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
+		    (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) {
 			pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+			if (I40E_DEBUG_FD & pf->hw.debug_mask)
+				dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
+		}
 	}
 	return need_reset;
 }
 
 /**
+ * i40e_clear_rss_lut - clear the rx hash lookup table
+ * @vsi: the VSI being configured
+ **/
+static void i40e_clear_rss_lut(struct i40e_vsi *vsi)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct i40e_hw *hw = &pf->hw;
+	u16 vf_id = vsi->vf_id;
+	u8 i;
+
+	if (vsi->type == I40E_VSI_MAIN) {
+		for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
+			wr32(hw, I40E_PFQF_HLUT(i), 0);
+	} else if (vsi->type == I40E_VSI_SRIOV) {
+		for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
+			i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0);
+	} else {
+		dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
+	}
+}
+
+/**
  * i40e_set_features - set the netdev feature flags
  * @netdev: ptr to the netdev being adjusted
  * @features: the feature set that the stack is suggesting
@@ -8689,6 +8746,12 @@
 	struct i40e_pf *pf = vsi->back;
 	bool need_reset;
 
+	if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH))
+		i40e_pf_config_rss(pf);
+	else if (!(features & NETIF_F_RXHASH) &&
+		 netdev->features & NETIF_F_RXHASH)
+		i40e_clear_rss_lut(vsi);
+
 	if (features & NETIF_F_HW_VLAN_CTAG_RX)
 		i40e_vlan_stripping_enable(vsi);
 	else
@@ -10488,6 +10551,7 @@
 			       I40E_FLAG_FD_SB_ENABLED	|
 			       I40E_FLAG_FD_ATR_ENABLED	|
 			       I40E_FLAG_DCB_CAPABLE	|
+			       I40E_FLAG_DCB_ENABLED	|
 			       I40E_FLAG_SRIOV_ENABLED	|
 			       I40E_FLAG_VMDQ_ENABLED);
 	} else if (!(pf->flags & (I40E_FLAG_RSS_ENABLED |
@@ -10511,7 +10575,8 @@
 		/* Not enough queues for all TCs */
 		if ((pf->flags & I40E_FLAG_DCB_CAPABLE) &&
 		    (queues_left < I40E_MAX_TRAFFIC_CLASS)) {
-			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+			pf->flags &= ~(I40E_FLAG_DCB_CAPABLE |
+					I40E_FLAG_DCB_ENABLED);
 			dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
 		}
 		pf->num_lan_qps = max_t(int, pf->rss_size_max,
@@ -10908,7 +10973,7 @@
 	err = i40e_init_pf_dcb(pf);
 	if (err) {
 		dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
-		pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+		pf->flags &= ~(I40E_FLAG_DCB_CAPABLE & I40E_FLAG_DCB_ENABLED);
 		/* Continue without DCB enabled */
 	}
 #endif /* CONFIG_I40E_DCB */
@@ -11288,11 +11353,7 @@
 	}
 
 	/* shutdown the adminq */
-	ret_code = i40e_shutdown_adminq(hw);
-	if (ret_code)
-		dev_warn(&pdev->dev,
-			 "Failed to destroy the Admin Queue resources: %d\n",
-			 ret_code);
+	i40e_shutdown_adminq(hw);
 
 	/* destroy the locks only once, here */
 	mutex_destroy(&hw->aq.arq_mutex);
@@ -11339,6 +11400,12 @@
 
 	dev_info(&pdev->dev, "%s: error %d\n", __func__, error);
 
+	if (!pf) {
+		dev_info(&pdev->dev,
+			 "Cannot recover - error happened during device probe\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
 	/* shutdown all operations */
 	if (!test_bit(__I40E_SUSPENDED, &pf->state)) {
 		rtnl_lock();
@@ -11561,7 +11628,8 @@
 	 * it can't be any worse than using the system workqueue which
 	 * was already single threaded
 	 */
-	i40e_wq = create_singlethread_workqueue(i40e_driver_name);
+	i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
+				  i40e_driver_name);
 	if (!i40e_wq) {
 		pr_err("%s: Failed to create workqueue\n", i40e_driver_name);
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index ed39cba..f1fecea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -669,7 +669,7 @@
 		pf->ptp_clock = NULL;
 		dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n",
 			__func__);
-	} else {
+	} else if (pf->ptp_clock) {
 		struct timespec64 ts;
 		u32 regval;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index df7ecc9..6287bf6 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -40,6 +40,69 @@
 }
 
 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+/**
+ * i40e_fdir - Generate a Flow Director descriptor based on fdata
+ * @tx_ring: Tx ring to send buffer on
+ * @fdata: Flow director filter data
+ * @add: Indicate if we are adding a rule or deleting one
+ *
+ **/
+static void i40e_fdir(struct i40e_ring *tx_ring,
+		      struct i40e_fdir_filter *fdata, bool add)
+{
+	struct i40e_filter_program_desc *fdir_desc;
+	struct i40e_pf *pf = tx_ring->vsi->back;
+	u32 flex_ptype, dtype_cmd;
+	u16 i;
+
+	/* grab the next descriptor */
+	i = tx_ring->next_to_use;
+	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK &
+		     (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT);
+
+	flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK &
+		      (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT);
+
+	flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
+		      (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
+
+	/* Use LAN VSI Id if not programmed by user */
+	flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK &
+		      ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) <<
+		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
+
+	dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
+
+	dtype_cmd |= add ?
+		     I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
+		     I40E_TXD_FLTR_QW1_PCMD_SHIFT :
+		     I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
+		     I40E_TXD_FLTR_QW1_PCMD_SHIFT;
+
+	dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK &
+		     (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT);
+
+	dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK &
+		     (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT);
+
+	if (fdata->cnt_index) {
+		dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
+		dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK &
+			     ((u32)fdata->cnt_index <<
+			      I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT);
+	}
+
+	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
+	fdir_desc->rsvd = cpu_to_le32(0);
+	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
+	fdir_desc->fd_id = cpu_to_le32(fdata->fd_id);
+}
+
 #define I40E_FD_CLEAN_DELAY 10
 /**
  * i40e_program_fdir_filter - Program a Flow Director filter
@@ -48,14 +111,13 @@
  * @pf: The PF pointer
  * @add: True for add/update, False for remove
  **/
-int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
-			     struct i40e_pf *pf, bool add)
+static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
+				    u8 *raw_packet, struct i40e_pf *pf,
+				    bool add)
 {
-	struct i40e_filter_program_desc *fdir_desc;
 	struct i40e_tx_buffer *tx_buf, *first;
 	struct i40e_tx_desc *tx_desc;
 	struct i40e_ring *tx_ring;
-	unsigned int fpt, dcc;
 	struct i40e_vsi *vsi;
 	struct device *dev;
 	dma_addr_t dma;
@@ -92,56 +154,8 @@
 
 	/* grab the next descriptor */
 	i = tx_ring->next_to_use;
-	fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
 	first = &tx_ring->tx_bi[i];
-	memset(first, 0, sizeof(struct i40e_tx_buffer));
-
-	tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
-
-	fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
-	      I40E_TXD_FLTR_QW0_QINDEX_MASK;
-
-	fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
-	       I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
-
-	fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
-	       I40E_TXD_FLTR_QW0_PCTYPE_MASK;
-
-	/* Use LAN VSI Id if not programmed by user */
-	if (fdir_data->dest_vsi == 0)
-		fpt |= (pf->vsi[pf->lan_vsi]->id) <<
-		       I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
-	else
-		fpt |= ((u32)fdir_data->dest_vsi <<
-			I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
-		       I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
-
-	dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
-
-	if (add)
-		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
-		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
-	else
-		dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
-		       I40E_TXD_FLTR_QW1_PCMD_SHIFT;
-
-	dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
-	       I40E_TXD_FLTR_QW1_DEST_MASK;
-
-	dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
-	       I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
-
-	if (fdir_data->cnt_index != 0) {
-		dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
-		dcc |= ((u32)fdir_data->cnt_index <<
-			I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
-			I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
-	}
-
-	fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
-	fdir_desc->rsvd = cpu_to_le32(0);
-	fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
-	fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
+	i40e_fdir(tx_ring, fdir_data, add);
 
 	/* Now program a dummy descriptor */
 	i = tx_ring->next_to_use;
@@ -282,18 +296,18 @@
 
 	if (add) {
 		pf->fd_tcp_rule++;
-		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
-			if (I40E_DEBUG_FD & pf->hw.debug_mask)
-				dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
-			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
-		}
+		if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+		    I40E_DEBUG_FD & pf->hw.debug_mask)
+			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
+		pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
 	} else {
 		pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
 				  (pf->fd_tcp_rule - 1) : 0;
 		if (pf->fd_tcp_rule == 0) {
-			pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
-			if (I40E_DEBUG_FD & pf->hw.debug_mask)
+			if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
+			    I40E_DEBUG_FD & pf->hw.debug_mask)
 				dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
+			pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED;
 		}
 	}
 
@@ -532,7 +546,10 @@
 					    struct i40e_tx_buffer *tx_buffer)
 {
 	if (tx_buffer->skb) {
-		dev_kfree_skb_any(tx_buffer->skb);
+		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
+			kfree(tx_buffer->raw_buf);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
 		if (dma_unmap_len(tx_buffer, len))
 			dma_unmap_single(ring->dev,
 					 dma_unmap_addr(tx_buffer, dma),
@@ -545,9 +562,6 @@
 			       DMA_TO_DEVICE);
 	}
 
-	if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
-		kfree(tx_buffer->raw_buf);
-
 	tx_buffer->next_to_watch = NULL;
 	tx_buffer->skb = NULL;
 	dma_unmap_len_set(tx_buffer, len, 0);
@@ -584,8 +598,7 @@
 		return;
 
 	/* cleanup Tx queue statistics */
-	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
-						  tx_ring->queue_index));
+	netdev_tx_reset_queue(txring_txq(tx_ring));
 }
 
 /**
@@ -754,8 +767,8 @@
 			tx_ring->arm_wb = true;
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
-						      tx_ring->queue_index),
+	/* notify netdev of completed buffers */
+	netdev_tx_completed_queue(txring_txq(tx_ring),
 				  total_packets, total_bytes);
 
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
@@ -1864,6 +1877,15 @@
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_PFINT_DYN_CTLN
+static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx)
+{
+	return !!(vsi->rx_rings[idx]->rx_itr_setting);
+}
+
+static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx)
+{
+	return !!(vsi->tx_rings[idx]->tx_itr_setting);
+}
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1879,6 +1901,7 @@
 	u32 rxval, txval;
 	int vector;
 	int idx = q_vector->v_idx;
+	int rx_itr_setting, tx_itr_setting;
 
 	vector = (q_vector->v_idx + vsi->base_vector);
 
@@ -1887,18 +1910,21 @@
 	 */
 	rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
 
+	rx_itr_setting = get_rx_itr_enabled(vsi, idx);
+	tx_itr_setting = get_tx_itr_enabled(vsi, idx);
+
 	if (q_vector->itr_countdown > 0 ||
-	    (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) &&
-	     !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) {
+	    (!ITR_IS_DYNAMIC(rx_itr_setting) &&
+	     !ITR_IS_DYNAMIC(tx_itr_setting))) {
 		goto enable_int;
 	}
 
-	if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) {
+	if (ITR_IS_DYNAMIC(tx_itr_setting)) {
 		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
 		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
 	}
 
-	if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) {
+	if (ITR_IS_DYNAMIC(tx_itr_setting)) {
 		tx = i40e_set_new_dynamic_itr(&q_vector->tx);
 		txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
 	}
@@ -2621,9 +2647,7 @@
 		return false;
 
 	/* We need to walk through the list and validate that each group
-	 * of 6 fragments totals at least gso_size.  However we don't need
-	 * to perform such validation on the last 6 since the last 6 cannot
-	 * inherit any data from a descriptor after them.
+	 * of 6 fragments totals at least gso_size.
 	 */
 	nr_frags -= I40E_MAX_BUFFER_TXD - 2;
 	frag = &skb_shinfo(skb)->frags[0];
@@ -2654,8 +2678,7 @@
 		if (sum < 0)
 			return true;
 
-		/* use pre-decrement to avoid processing last fragment */
-		if (!--nr_frags)
+		if (!nr_frags--)
 			break;
 
 		sum -= skb_frag_size(stale++);
@@ -2787,9 +2810,7 @@
 
 	tx_ring->next_to_use = i;
 
-	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
-						 tx_ring->queue_index),
-						 first->bytecount);
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
 	/* Algorithm to optimize tail and RS bit setting:
@@ -2814,13 +2835,11 @@
 	 * trigger a force WB.
 	 */
 	if (skb->xmit_more  &&
-	    !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-						    tx_ring->queue_index))) {
+	    !netif_xmit_stopped(txring_txq(tx_ring))) {
 		tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
 		tail_bump = false;
 	} else if (!skb->xmit_more &&
-		   !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-						       tx_ring->queue_index)) &&
+		   !netif_xmit_stopped(txring_txq(tx_ring)) &&
 		   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
 		   (tx_ring->packet_stride < WB_STRIDE) &&
 		   (desc_count < WB_STRIDE)) {
@@ -2840,10 +2859,9 @@
 						  I40E_TXD_QW1_CMD_SHIFT);
 
 	/* notify HW of packet */
-	if (!tail_bump)
+	if (!tail_bump) {
 		prefetchw(tx_desc + 1);
-
-	if (tail_bump) {
+	} else {
 		/* Force memory writes to complete before letting h/w
 		 * know there are new descriptors to fetch.  (Only
 		 * applicable for weak-ordered memory model archs,
@@ -2852,7 +2870,6 @@
 		wmb();
 		writel(i, tx_ring->tail);
 	}
-
 	return;
 
 dma_error:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index b78c810..5088405 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -463,4 +463,13 @@
 	return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
 	       (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
 }
+
+/**
+ * txring_txq - Find the netdev Tx ring based on the i40e Tx ring
+ * @ring: Tx ring to find the netdev equivalent of
+ **/
+static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
 #endif /* _I40E_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
index c92a3bd..f861d31 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
@@ -163,6 +163,7 @@
 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
+#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00100000
 
 struct i40e_virtchnl_vf_resource {
 	u16 num_vsis;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 6fcbf76..54b8ee2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -502,8 +502,16 @@
 	u32 qtx_ctl;
 	int ret = 0;
 
+	if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+		ret = -ENOENT;
+		goto error_context;
+	}
 	pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
 	vsi = i40e_find_vsi_from_id(pf, vsi_id);
+	if (!vsi) {
+		ret = -ENOENT;
+		goto error_context;
+	}
 
 	/* clear the context structure first */
 	memset(&tx_ctx, 0, sizeof(struct i40e_hmc_obj_txq));
@@ -991,7 +999,10 @@
 		i40e_enable_vf_mappings(vf);
 		set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
 		clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states);
-		i40e_notify_client_of_vf_reset(pf, abs_vf_id);
+		/* Do not notify the client during VF init */
+		if (vf->pf->num_alloc_vfs)
+			i40e_notify_client_of_vf_reset(pf, abs_vf_id);
+		vf->num_vlan = 0;
 	}
 	/* tell the VF the reset is done */
 	wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE);
@@ -1089,7 +1100,6 @@
 			goto err_iov;
 		}
 	}
-	i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
 	/* allocate memory */
 	vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
 	if (!vfs) {
@@ -1113,6 +1123,8 @@
 	}
 	pf->num_alloc_vfs = num_alloc_vfs;
 
+	i40e_notify_client_of_vf_enable(pf, num_alloc_vfs);
+
 err_alloc:
 	if (ret)
 		i40e_free_vfs(pf);
@@ -1472,7 +1484,8 @@
 
 	vsi = i40e_find_vsi_from_id(pf, info->vsi_id);
 	if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
-	    !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+	    !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) ||
+	    !vsi) {
 		aq_ret = I40E_ERR_PARAM;
 		goto error_param;
 	}
@@ -2213,8 +2226,8 @@
 error_param:
 	/* send the response to the VF */
 	return i40e_vc_send_resp_to_vf(vf,
-			       config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP :
-			       I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP,
+			       config ? I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP :
+			       I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP,
 			       aq_ret);
 }
 
@@ -2314,6 +2327,7 @@
 	/* send the response back to the VF */
 	aq_ret = i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS,
 					aq_ret, (u8 *)vrh, len);
+	kfree(vrh);
 	return aq_ret;
 }
 
@@ -2742,11 +2756,12 @@
  * @vf_id: VF identifier
  * @vlan_id: mac address
  * @qos: priority setting
+ * @vlan_proto: vlan protocol
  *
  * program VF vlan id and/or qos
  **/
-int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
-			      int vf_id, u16 vlan_id, u8 qos)
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+			      u16 vlan_id, u8 qos, __be16 vlan_proto)
 {
 	u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT);
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
@@ -2769,6 +2784,12 @@
 		goto error_pvid;
 	}
 
+	if (vlan_proto != htons(ETH_P_8021Q)) {
+		dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n");
+		ret = -EPROTONOSUPPORT;
+		goto error_pvid;
+	}
+
 	vf = &(pf->vf[vf_id]);
 	vsi = pf->vsi[vf->lan_vsi_idx];
 	if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
@@ -2995,6 +3016,7 @@
 	else
 		ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
 	ivi->spoofchk = vf->spoofchk;
+	ivi->trusted = vf->trusted;
 	ret = 0;
 
 error_param:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 8751741..4012d06 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -129,8 +129,8 @@
 
 /* VF configuration related iplink handlers */
 int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
-int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
-			      int vf_id, u16 vlan_id, u8 qos);
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
+			      u16 vlan_id, u8 qos, __be16 vlan_proto);
 int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
 		       int max_tx_rate);
 int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 3114dcf..40b0eaf 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -204,6 +204,9 @@
 	i40e_aqc_opc_suspend_port_tx				= 0x041B,
 	i40e_aqc_opc_resume_port_tx				= 0x041C,
 	i40e_aqc_opc_configure_partition_bw			= 0x041D,
+	/* hmc */
+	i40e_aqc_opc_query_hmc_resource_profile	= 0x0500,
+	i40e_aqc_opc_set_hmc_resource_profile	= 0x0501,
 
 	/* phy commands*/
 	i40e_aqc_opc_get_phy_abilities		= 0x0600,
@@ -447,13 +450,15 @@
 /* Set ARP Proxy command / response (indirect 0x0104) */
 struct i40e_aqc_arp_proxy_data {
 	__le16	command_flags;
-#define I40E_AQ_ARP_INIT_IPV4	0x0008
-#define I40E_AQ_ARP_UNSUP_CTL	0x0010
-#define I40E_AQ_ARP_ENA		0x0020
-#define I40E_AQ_ARP_ADD_IPV4	0x0040
-#define I40E_AQ_ARP_DEL_IPV4	0x0080
+#define I40E_AQ_ARP_INIT_IPV4	0x0800
+#define I40E_AQ_ARP_UNSUP_CTL	0x1000
+#define I40E_AQ_ARP_ENA		0x2000
+#define I40E_AQ_ARP_ADD_IPV4	0x4000
+#define I40E_AQ_ARP_DEL_IPV4	0x8000
 	__le16	table_id;
-	__le32	pfpm_proxyfc;
+	__le32	enabled_offloads;
+#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE	0x00000020
+#define I40E_AQ_ARP_OFFLOAD_ENABLE		0x00000800
 	__le32	ip_addr;
 	u8	mac_addr[6];
 	u8	reserved[2];
@@ -468,17 +473,19 @@
 	__le16	table_idx_ipv6_0;
 	__le16	table_idx_ipv6_1;
 	__le16	control;
-#define I40E_AQ_NS_PROXY_ADD_0		0x0100
-#define I40E_AQ_NS_PROXY_DEL_0		0x0200
-#define I40E_AQ_NS_PROXY_ADD_1		0x0400
-#define I40E_AQ_NS_PROXY_DEL_1		0x0800
-#define I40E_AQ_NS_PROXY_ADD_IPV6_0	0x1000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_0	0x2000
-#define I40E_AQ_NS_PROXY_ADD_IPV6_1	0x4000
-#define I40E_AQ_NS_PROXY_DEL_IPV6_1	0x8000
-#define I40E_AQ_NS_PROXY_COMMAND_SEQ	0x0001
-#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL	0x0002
-#define I40E_AQ_NS_PROXY_INIT_MAC_TBL	0x0004
+#define I40E_AQ_NS_PROXY_ADD_0		0x0001
+#define I40E_AQ_NS_PROXY_DEL_0		0x0002
+#define I40E_AQ_NS_PROXY_ADD_1		0x0004
+#define I40E_AQ_NS_PROXY_DEL_1		0x0008
+#define I40E_AQ_NS_PROXY_ADD_IPV6_0	0x0010
+#define I40E_AQ_NS_PROXY_DEL_IPV6_0	0x0020
+#define I40E_AQ_NS_PROXY_ADD_IPV6_1	0x0040
+#define I40E_AQ_NS_PROXY_DEL_IPV6_1	0x0080
+#define I40E_AQ_NS_PROXY_COMMAND_SEQ	0x0100
+#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL	0x0200
+#define I40E_AQ_NS_PROXY_INIT_MAC_TBL	0x0400
+#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE	0x0800
+#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE	0x1000
 	u8	mac_addr_0[6];
 	u8	mac_addr_1[6];
 	u8	local_mac_addr[6];
@@ -1579,6 +1586,24 @@
 
 I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data);
 
+/* Get and set the active HMC resource profile and status.
+ * (direct 0x0500) and (direct 0x0501)
+ */
+struct i40e_aq_get_set_hmc_resource_profile {
+	u8	pm_profile;
+	u8	pe_vf_enabled;
+	u8	reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
+
+enum i40e_aq_hmc_profile {
+	/* I40E_HMC_PROFILE_NO_CHANGE	= 0, reserved */
+	I40E_HMC_PROFILE_DEFAULT	= 1,
+	I40E_HMC_PROFILE_FAVOR_VF	= 2,
+	I40E_HMC_PROFILE_EQUAL		= 3,
+};
+
 /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
 
 /* set in param0 for get phy abilities to report qualified modules */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 4db0c03..7953c13 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -302,7 +302,6 @@
 		   void *buffer, u16 buf_len)
 {
 	struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
-	u16 len = le16_to_cpu(aq_desc->datalen);
 	u8 *buf = (u8 *)buffer;
 	u16 i = 0;
 
@@ -326,6 +325,8 @@
 		   le32_to_cpu(aq_desc->params.external.addr_low));
 
 	if ((buffer != NULL) && (aq_desc->datalen != 0)) {
+		u16 len = le16_to_cpu(aq_desc->datalen);
+
 		i40e_debug(hw, mask, "AQ CMD Buffer:\n");
 		if (buf_len < len)
 			len = buf_len;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index a579193..75f2a2c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -51,7 +51,10 @@
 					    struct i40e_tx_buffer *tx_buffer)
 {
 	if (tx_buffer->skb) {
-		dev_kfree_skb_any(tx_buffer->skb);
+		if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
+			kfree(tx_buffer->raw_buf);
+		else
+			dev_kfree_skb_any(tx_buffer->skb);
 		if (dma_unmap_len(tx_buffer, len))
 			dma_unmap_single(ring->dev,
 					 dma_unmap_addr(tx_buffer, dma),
@@ -64,9 +67,6 @@
 			       DMA_TO_DEVICE);
 	}
 
-	if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
-		kfree(tx_buffer->raw_buf);
-
 	tx_buffer->next_to_watch = NULL;
 	tx_buffer->skb = NULL;
 	dma_unmap_len_set(tx_buffer, len, 0);
@@ -103,8 +103,7 @@
 		return;
 
 	/* cleanup Tx queue statistics */
-	netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
-						  tx_ring->queue_index));
+	netdev_tx_reset_queue(txring_txq(tx_ring));
 }
 
 /**
@@ -273,8 +272,8 @@
 			tx_ring->arm_wb = true;
 	}
 
-	netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
-						      tx_ring->queue_index),
+	/* notify netdev of completed buffers */
+	netdev_tx_completed_queue(txring_txq(tx_ring),
 				  total_packets, total_bytes);
 
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
@@ -1312,6 +1311,19 @@
 
 /* a small macro to shorten up some long lines */
 #define INTREG I40E_VFINT_DYN_CTLN1
+static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx)
+{
+	struct i40evf_adapter *adapter = vsi->back;
+
+	return !!(adapter->rx_rings[idx].rx_itr_setting);
+}
+
+static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx)
+{
+	struct i40evf_adapter *adapter = vsi->back;
+
+	return !!(adapter->tx_rings[idx].tx_itr_setting);
+}
 
 /**
  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
@@ -1326,6 +1338,8 @@
 	bool rx = false, tx = false;
 	u32 rxval, txval;
 	int vector;
+	int idx = q_vector->v_idx;
+	int rx_itr_setting, tx_itr_setting;
 
 	vector = (q_vector->v_idx + vsi->base_vector);
 
@@ -1334,18 +1348,21 @@
 	 */
 	rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
 
+	rx_itr_setting = get_rx_itr_enabled(vsi, idx);
+	tx_itr_setting = get_tx_itr_enabled(vsi, idx);
+
 	if (q_vector->itr_countdown > 0 ||
-	    (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) &&
-	     !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) {
+	    (!ITR_IS_DYNAMIC(rx_itr_setting) &&
+	     !ITR_IS_DYNAMIC(tx_itr_setting))) {
 		goto enable_int;
 	}
 
-	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
+	if (ITR_IS_DYNAMIC(rx_itr_setting)) {
 		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
 		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
 	}
 
-	if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
+	if (ITR_IS_DYNAMIC(tx_itr_setting)) {
 		tx = i40e_set_new_dynamic_itr(&q_vector->tx);
 		txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
 	}
@@ -1832,9 +1849,7 @@
 		return false;
 
 	/* We need to walk through the list and validate that each group
-	 * of 6 fragments totals at least gso_size.  However we don't need
-	 * to perform such validation on the last 6 since the last 6 cannot
-	 * inherit any data from a descriptor after them.
+	 * of 6 fragments totals at least gso_size.
 	 */
 	nr_frags -= I40E_MAX_BUFFER_TXD - 2;
 	frag = &skb_shinfo(skb)->frags[0];
@@ -1865,8 +1880,7 @@
 		if (sum < 0)
 			return true;
 
-		/* use pre-decrement to avoid processing last fragment */
-		if (!--nr_frags)
+		if (!nr_frags--)
 			break;
 
 		sum -= skb_frag_size(stale++);
@@ -2015,9 +2029,7 @@
 
 	tx_ring->next_to_use = i;
 
-	netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
-						 tx_ring->queue_index),
-						 first->bytecount);
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
 	/* Algorithm to optimize tail and RS bit setting:
@@ -2042,13 +2054,11 @@
 	 * trigger a force WB.
 	 */
 	if (skb->xmit_more  &&
-	    !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-						    tx_ring->queue_index))) {
+	    !netif_xmit_stopped(txring_txq(tx_ring))) {
 		tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
 		tail_bump = false;
 	} else if (!skb->xmit_more &&
-		   !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
-						       tx_ring->queue_index)) &&
+		   !netif_xmit_stopped(txring_txq(tx_ring)) &&
 		   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
 		   (tx_ring->packet_stride < WB_STRIDE) &&
 		   (desc_count < WB_STRIDE)) {
@@ -2068,10 +2078,9 @@
 						  I40E_TXD_QW1_CMD_SHIFT);
 
 	/* notify HW of packet */
-	if (!tail_bump)
+	if (!tail_bump) {
 		prefetchw(tx_desc + 1);
-
-	if (tail_bump) {
+	} else {
 		/* Force memory writes to complete before letting h/w
 		 * know there are new descriptors to fetch.  (Only
 		 * applicable for weak-ordered memory model archs,
@@ -2080,7 +2089,6 @@
 		wmb();
 		writel(i, tx_ring->tail);
 	}
-
 	return;
 
 dma_error:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 0112277..abcdeca 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -287,6 +287,14 @@
 	u8 dcb_tc;			/* Traffic class of ring */
 	u8 __iomem *tail;
 
+	/* high bit set means dynamic, use accessors routines to read/write.
+	 * hardware only supports 2us resolution for the ITR registers.
+	 * these values always store the USER setting, and must be converted
+	 * before programming to a register.
+	 */
+	u16 rx_itr_setting;
+	u16 tx_itr_setting;
+
 	u16 count;			/* Number of descriptors */
 	u16 reg_idx;			/* HW register index of the ring */
 	u16 rx_buf_len;
@@ -445,4 +453,13 @@
 	return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) &&
 	       (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER);
 }
+
+/**
+ * txring_txq - Find the netdev Tx ring based on the i40e Tx ring
+ * @ring: Tx ring to find the netdev equivalent of
+ **/
+static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
 #endif /* _I40E_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
index f04ce6c..bd691ad 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
@@ -160,6 +160,7 @@
 #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING	0x00020000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
+#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00100000
 
 struct i40e_virtchnl_vf_resource {
 	u16 num_vsis;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 76ed97d..c5fd724 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -59,32 +59,25 @@
 	unsigned long state;
 	int base_vector;
 	u16 work_limit;
-	/* high bit set means dynamic, use accessor routines to read/write.
-	 * hardware only supports 2us resolution for the ITR registers.
-	 * these values always store the USER setting, and must be converted
-	 * before programming to a register.
-	 */
-	u16 rx_itr_setting;
-	u16 tx_itr_setting;
 	u16 qs_handle;
 };
 
 /* How many Rx Buffers do we bundle into one write to the hardware ? */
 #define I40EVF_RX_BUFFER_WRITE	16	/* Must be power of 2 */
-#define I40EVF_DEFAULT_TXD   512
-#define I40EVF_DEFAULT_RXD   512
-#define I40EVF_MAX_TXD       4096
-#define I40EVF_MIN_TXD       64
-#define I40EVF_MAX_RXD       4096
-#define I40EVF_MIN_RXD       64
-#define I40EVF_REQ_DESCRIPTOR_MULTIPLE  32
+#define I40EVF_DEFAULT_TXD	512
+#define I40EVF_DEFAULT_RXD	512
+#define I40EVF_MAX_TXD		4096
+#define I40EVF_MIN_TXD		64
+#define I40EVF_MAX_RXD		4096
+#define I40EVF_MIN_RXD		64
+#define I40EVF_REQ_DESCRIPTOR_MULTIPLE	32
 
 /* Supported Rx Buffer Sizes */
-#define I40EVF_RXBUFFER_2048  2048
-#define I40EVF_MAX_RXBUFFER   16384  /* largest size for single descriptor */
-#define I40EVF_MAX_AQ_BUF_SIZE    4096
-#define I40EVF_AQ_LEN             32
-#define I40EVF_AQ_MAX_ERR         20 /* times to try before resetting AQ */
+#define I40EVF_RXBUFFER_2048	2048
+#define I40EVF_MAX_RXBUFFER	16384  /* largest size for single descriptor */
+#define I40EVF_MAX_AQ_BUF_SIZE	4096
+#define I40EVF_AQ_LEN		32
+#define I40EVF_AQ_MAX_ERR	20 /* times to try before resetting AQ */
 
 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)
 
@@ -111,7 +104,7 @@
 	u8 num_ringpairs;	/* total number of ring pairs in vector */
 #define ITR_COUNTDOWN_START 100
 	u8 itr_countdown;	/* when 0 or 1 update ITR */
-	int v_idx;	  /* vector index in list */
+	int v_idx;	/* vector index in list */
 	char name[IFNAMSIZ + 9];
 	bool arm_wb_state;
 	cpumask_var_t affinity_mask;
@@ -129,11 +122,11 @@
 	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 	(R)->next_to_clean - (R)->next_to_use - 1)
 
-#define I40EVF_RX_DESC_ADV(R, i)	    \
+#define I40EVF_RX_DESC_ADV(R, i)	\
 	(&(((union i40e_adv_rx_desc *)((R).desc))[i]))
-#define I40EVF_TX_DESC_ADV(R, i)	    \
+#define I40EVF_TX_DESC_ADV(R, i)	\
 	(&(((union i40e_adv_tx_desc *)((R).desc))[i]))
-#define I40EVF_TX_CTXTDESC_ADV(R, i)	    \
+#define I40EVF_TX_CTXTDESC_ADV(R, i)	\
 	(&(((struct i40e_adv_tx_context_desc *)((R).desc))[i]))
 
 #define OTHER_VECTOR 1
@@ -204,22 +197,25 @@
 	struct msix_entry *msix_entries;
 
 	u32 flags;
-#define I40EVF_FLAG_RX_CSUM_ENABLED              BIT(0)
-#define I40EVF_FLAG_IMIR_ENABLED                 BIT(5)
-#define I40EVF_FLAG_MQ_CAPABLE                   BIT(6)
-#define I40EVF_FLAG_NEED_LINK_UPDATE             BIT(7)
-#define I40EVF_FLAG_PF_COMMS_FAILED              BIT(8)
-#define I40EVF_FLAG_RESET_PENDING                BIT(9)
-#define I40EVF_FLAG_RESET_NEEDED                 BIT(10)
+#define I40EVF_FLAG_RX_CSUM_ENABLED		BIT(0)
+#define I40EVF_FLAG_IN_NETPOLL			BIT(4)
+#define I40EVF_FLAG_IMIR_ENABLED		BIT(5)
+#define I40EVF_FLAG_MQ_CAPABLE			BIT(6)
+#define I40EVF_FLAG_NEED_LINK_UPDATE		BIT(7)
+#define I40EVF_FLAG_PF_COMMS_FAILED		BIT(8)
+#define I40EVF_FLAG_RESET_PENDING		BIT(9)
+#define I40EVF_FLAG_RESET_NEEDED		BIT(10)
 #define I40EVF_FLAG_WB_ON_ITR_CAPABLE		BIT(11)
 #define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE	BIT(12)
 #define I40EVF_FLAG_ADDR_SET_BY_PF		BIT(13)
+#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED	BIT(14)
 #define I40EVF_FLAG_PROMISC_ON			BIT(15)
 #define I40EVF_FLAG_ALLMULTI_ON			BIT(16)
 /* duplicates for common code */
-#define I40E_FLAG_FDIR_ATR_ENABLED		 0
-#define I40E_FLAG_DCB_ENABLED			 0
-#define I40E_FLAG_RX_CSUM_ENABLED                I40EVF_FLAG_RX_CSUM_ENABLED
+#define I40E_FLAG_FDIR_ATR_ENABLED		0
+#define I40E_FLAG_DCB_ENABLED			0
+#define I40E_FLAG_IN_NETPOLL			I40EVF_FLAG_IN_NETPOLL
+#define I40E_FLAG_RX_CSUM_ENABLED		I40EVF_FLAG_RX_CSUM_ENABLED
 #define I40E_FLAG_WB_ON_ITR_CAPABLE		I40EVF_FLAG_WB_ON_ITR_CAPABLE
 #define I40E_FLAG_OUTER_UDP_CSUM_CAPABLE	I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE
 	/* flags for admin queue service task */
@@ -233,7 +229,7 @@
 #define I40EVF_FLAG_AQ_CONFIGURE_QUEUES		BIT(6)
 #define I40EVF_FLAG_AQ_MAP_VECTORS		BIT(7)
 #define I40EVF_FLAG_AQ_HANDLE_RESET		BIT(8)
-#define I40EVF_FLAG_AQ_CONFIGURE_RSS		BIT(9)	/* direct AQ config */
+#define I40EVF_FLAG_AQ_CONFIGURE_RSS		BIT(9) /* direct AQ config */
 #define I40EVF_FLAG_AQ_GET_CONFIG		BIT(10)
 /* Newer style, RSS done by the PF so we can ignore hardware vagaries. */
 #define I40EVF_FLAG_AQ_GET_HENA			BIT(11)
@@ -258,6 +254,7 @@
 	struct work_struct watchdog_task;
 	bool netdev_registered;
 	bool link_up;
+	enum i40e_aq_link_speed link_speed;
 	enum i40e_virtchnl_ops current_op;
 #define CLIENT_ENABLED(_a) ((_a)->vf_res ? \
 			    (_a)->vf_res->vf_offload_flags & \
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index c9c202f6..a994015 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -74,13 +74,33 @@
 static int i40evf_get_settings(struct net_device *netdev,
 			       struct ethtool_cmd *ecmd)
 {
-	/* In the future the VF will be able to query the PF for
-	 * some information - for now use a dummy value
-	 */
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+
 	ecmd->supported = 0;
 	ecmd->autoneg = AUTONEG_DISABLE;
 	ecmd->transceiver = XCVR_DUMMY1;
 	ecmd->port = PORT_NONE;
+	/* Set speed and duplex */
+	switch (adapter->link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		ethtool_cmd_speed_set(ecmd, SPEED_40000);
+		break;
+	case I40E_LINK_SPEED_20GB:
+		ethtool_cmd_speed_set(ecmd, SPEED_20000);
+		break;
+	case I40E_LINK_SPEED_10GB:
+		ethtool_cmd_speed_set(ecmd, SPEED_10000);
+		break;
+	case I40E_LINK_SPEED_1GB:
+		ethtool_cmd_speed_set(ecmd, SPEED_1000);
+		break;
+	case I40E_LINK_SPEED_100MB:
+		ethtool_cmd_speed_set(ecmd, SPEED_100);
+		break;
+	default:
+		break;
+	}
+	ecmd->duplex = DUPLEX_FULL;
 
 	return 0;
 }
@@ -276,31 +296,174 @@
 }
 
 /**
+ * __i40evf_get_coalesce - get per-queue coalesce settings
+ * @netdev: the netdev to check
+ * @ec: ethtool coalesce data structure
+ * @queue: which queue to pick
+ *
+ * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs
+ * are per queue. If queue is <0 then we default to queue 0 as the
+ * representative value.
+ **/
+static int __i40evf_get_coalesce(struct net_device *netdev,
+				 struct ethtool_coalesce *ec,
+				 int queue)
+{
+	struct i40evf_adapter *adapter = netdev_priv(netdev);
+	struct i40e_vsi *vsi = &adapter->vsi;
+	struct i40e_ring *rx_ring, *tx_ring;
+
+	ec->tx_max_coalesced_frames = vsi->work_limit;
+	ec->rx_max_coalesced_frames = vsi->work_limit;
+
+	/* Rx and Tx usecs per queue value. If user doesn't specify the
+	 * queue, return queue 0's value to represent.
+	 */
+	if (queue < 0)
+		queue = 0;
+	else if (queue >= adapter->num_active_queues)
+		return -EINVAL;
+
+	rx_ring = &adapter->rx_rings[queue];
+	tx_ring = &adapter->tx_rings[queue];
+
+	if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting))
+		ec->use_adaptive_rx_coalesce = 1;
+
+	if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting))
+		ec->use_adaptive_tx_coalesce = 1;
+
+	ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
+	ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+
+	return 0;
+}
+
+/**
  * i40evf_get_coalesce - Get interrupt coalescing settings
  * @netdev: network interface device structure
  * @ec: ethtool coalesce structure
  *
  * Returns current coalescing settings. This is referred to elsewhere in the
  * driver as Interrupt Throttle Rate, as this is how the hardware describes
- * this functionality.
+ * this functionality. Note that if per-queue settings have been modified this
+ * only represents the settings of queue 0.
  **/
 static int i40evf_get_coalesce(struct net_device *netdev,
 			       struct ethtool_coalesce *ec)
 {
+	return __i40evf_get_coalesce(netdev, ec, -1);
+}
+
+/**
+ * i40evf_get_per_queue_coalesce - get coalesce values for specific queue
+ * @netdev: netdev to read
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to read
+ *
+ * Read specific queue's coalesce settings.
+ **/
+static int i40evf_get_per_queue_coalesce(struct net_device *netdev,
+					 u32 queue,
+					 struct ethtool_coalesce *ec)
+{
+	return __i40evf_get_coalesce(netdev, ec, queue);
+}
+
+/**
+ * i40evf_set_itr_per_queue - set ITR values for specific queue
+ * @vsi: the VSI to set values for
+ * @ec: coalesce settings from ethtool
+ * @queue: the queue to modify
+ *
+ * Change the ITR settings for a specific queue.
+ **/
+static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter,
+				     struct ethtool_coalesce *ec,
+				     int queue)
+{
+	struct i40e_vsi *vsi = &adapter->vsi;
+	struct i40e_hw *hw = &adapter->hw;
+	struct i40e_q_vector *q_vector;
+	u16 vector;
+
+	adapter->rx_rings[queue].rx_itr_setting = ec->rx_coalesce_usecs;
+	adapter->tx_rings[queue].tx_itr_setting = ec->tx_coalesce_usecs;
+
+	if (ec->use_adaptive_rx_coalesce)
+		adapter->rx_rings[queue].rx_itr_setting |= I40E_ITR_DYNAMIC;
+	else
+		adapter->rx_rings[queue].rx_itr_setting &= ~I40E_ITR_DYNAMIC;
+
+	if (ec->use_adaptive_tx_coalesce)
+		adapter->tx_rings[queue].tx_itr_setting |= I40E_ITR_DYNAMIC;
+	else
+		adapter->tx_rings[queue].tx_itr_setting &= ~I40E_ITR_DYNAMIC;
+
+	q_vector = adapter->rx_rings[queue].q_vector;
+	q_vector->rx.itr = ITR_TO_REG(adapter->rx_rings[queue].rx_itr_setting);
+	vector = vsi->base_vector + q_vector->v_idx;
+	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr);
+
+	q_vector = adapter->tx_rings[queue].q_vector;
+	q_vector->tx.itr = ITR_TO_REG(adapter->tx_rings[queue].tx_itr_setting);
+	vector = vsi->base_vector + q_vector->v_idx;
+	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr);
+
+	i40e_flush(hw);
+}
+
+/**
+ * __i40evf_set_coalesce - set coalesce settings for particular queue
+ * @netdev: the netdev to change
+ * @ec: ethtool coalesce settings
+ * @queue: the queue to change
+ *
+ * Sets the coalesce settings for a particular queue.
+ **/
+static int __i40evf_set_coalesce(struct net_device *netdev,
+				 struct ethtool_coalesce *ec,
+				 int queue)
+{
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	struct i40e_vsi *vsi = &adapter->vsi;
+	int i;
 
-	ec->tx_max_coalesced_frames = vsi->work_limit;
-	ec->rx_max_coalesced_frames = vsi->work_limit;
+	if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
+		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
 
-	if (ITR_IS_DYNAMIC(vsi->rx_itr_setting))
-		ec->use_adaptive_rx_coalesce = 1;
+	if (ec->rx_coalesce_usecs == 0) {
+		if (ec->use_adaptive_rx_coalesce)
+			netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
+	} else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
+		   (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+		netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n");
+		return -EINVAL;
+	}
 
-	if (ITR_IS_DYNAMIC(vsi->tx_itr_setting))
-		ec->use_adaptive_tx_coalesce = 1;
+	else
+	if (ec->tx_coalesce_usecs == 0) {
+		if (ec->use_adaptive_tx_coalesce)
+			netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n");
+	} else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
+		   (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) {
+		netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n");
+		return -EINVAL;
+	}
 
-	ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC;
-	ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC;
+	/* Rx and Tx usecs has per queue value. If user doesn't specify the
+	 * queue, apply to all queues.
+	 */
+	if (queue < 0) {
+		for (i = 0; i < adapter->num_active_queues; i++)
+			i40evf_set_itr_per_queue(adapter, ec, i);
+	} else if (queue < adapter->num_active_queues) {
+		i40evf_set_itr_per_queue(adapter, ec, queue);
+	} else {
+		netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
+			   adapter->num_active_queues - 1);
+		return -EINVAL;
+	}
 
 	return 0;
 }
@@ -310,56 +473,27 @@
  * @netdev: network interface device structure
  * @ec: ethtool coalesce structure
  *
- * Change current coalescing settings.
+ * Change current coalescing settings for every queue.
  **/
 static int i40evf_set_coalesce(struct net_device *netdev,
 			       struct ethtool_coalesce *ec)
 {
-	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	struct i40e_hw *hw = &adapter->hw;
-	struct i40e_vsi *vsi = &adapter->vsi;
-	struct i40e_q_vector *q_vector;
-	int i;
+	return __i40evf_set_coalesce(netdev, ec, -1);
+}
 
-	if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
-		vsi->work_limit = ec->tx_max_coalesced_frames_irq;
-
-	if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
-	    (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
-		vsi->rx_itr_setting = ec->rx_coalesce_usecs;
-
-	else
-		return -EINVAL;
-
-	if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) &&
-	    (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1)))
-		vsi->tx_itr_setting = ec->tx_coalesce_usecs;
-	else if (ec->use_adaptive_tx_coalesce)
-		vsi->tx_itr_setting = (I40E_ITR_DYNAMIC |
-				       ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
-	else
-		return -EINVAL;
-
-	if (ec->use_adaptive_rx_coalesce)
-		vsi->rx_itr_setting |= I40E_ITR_DYNAMIC;
-	else
-		vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC;
-
-	if (ec->use_adaptive_tx_coalesce)
-		vsi->tx_itr_setting |= I40E_ITR_DYNAMIC;
-	else
-		vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC;
-
-	for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) {
-		q_vector = &adapter->q_vectors[i];
-		q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
-		wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr);
-		q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
-		wr32(hw, I40E_VFINT_ITRN1(1, i), q_vector->tx.itr);
-		i40e_flush(hw);
-	}
-
-	return 0;
+/**
+ * i40evf_set_per_queue_coalesce - set specific queue's coalesce settings
+ * @netdev: the netdev to change
+ * @ec: ethtool's coalesce settings
+ * @queue: the queue to modify
+ *
+ * Modifies a specific queue's coalesce settings.
+ */
+static int i40evf_set_per_queue_coalesce(struct net_device *netdev,
+					 u32 queue,
+					 struct ethtool_coalesce *ec)
+{
+	return __i40evf_set_coalesce(netdev, ec, queue);
 }
 
 /**
@@ -513,6 +647,8 @@
 	.set_msglevel		= i40evf_set_msglevel,
 	.get_coalesce		= i40evf_get_coalesce,
 	.set_coalesce		= i40evf_set_coalesce,
+	.get_per_queue_coalesce = i40evf_get_per_queue_coalesce,
+	.set_per_queue_coalesce = i40evf_set_per_queue_coalesce,
 	.get_rxnfc		= i40evf_get_rxnfc,
 	.get_rxfh_indir_size	= i40evf_get_rxfh_indir_size,
 	.get_rxfh		= i40evf_get_rxfh,
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 600fb9c..1437281 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -38,7 +38,7 @@
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 11
+#define DRV_VERSION_BUILD 16
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD) \
@@ -370,6 +370,7 @@
 {
 	struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx];
 	struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx];
+	struct i40e_hw *hw = &adapter->hw;
 
 	rx_ring->q_vector = q_vector;
 	rx_ring->next = q_vector->rx.ring;
@@ -377,7 +378,10 @@
 	q_vector->rx.ring = rx_ring;
 	q_vector->rx.count++;
 	q_vector->rx.latency_range = I40E_LOW_LATENCY;
+	q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting);
+	q_vector->ring_mask |= BIT(r_idx);
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
+	wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr);
 }
 
 /**
@@ -391,6 +395,7 @@
 {
 	struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx];
 	struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx];
+	struct i40e_hw *hw = &adapter->hw;
 
 	tx_ring->q_vector = q_vector;
 	tx_ring->next = q_vector->tx.ring;
@@ -398,9 +403,10 @@
 	q_vector->tx.ring = tx_ring;
 	q_vector->tx.count++;
 	q_vector->tx.latency_range = I40E_LOW_LATENCY;
+	q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting);
 	q_vector->itr_countdown = ITR_COUNTDOWN_START;
 	q_vector->num_ringpairs++;
-	q_vector->ring_mask |= BIT(t_idx);
+	wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr);
 }
 
 /**
@@ -1007,7 +1013,7 @@
  * i40evf_up_complete - Finish the last steps of bringing up a connection
  * @adapter: board private structure
  **/
-static int i40evf_up_complete(struct i40evf_adapter *adapter)
+static void i40evf_up_complete(struct i40evf_adapter *adapter)
 {
 	adapter->state = __I40EVF_RUNNING;
 	clear_bit(__I40E_DOWN, &adapter->vsi.state);
@@ -1016,7 +1022,6 @@
 
 	adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_QUEUES;
 	mod_timer_pending(&adapter->watchdog_timer, jiffies + 1);
-	return 0;
 }
 
 /**
@@ -1037,6 +1042,7 @@
 
 	netif_carrier_off(netdev);
 	netif_tx_disable(netdev);
+	adapter->link_up = false;
 	i40evf_napi_disable_all(adapter);
 	i40evf_irq_disable(adapter);
 
@@ -1154,6 +1160,7 @@
 		tx_ring->netdev = adapter->netdev;
 		tx_ring->dev = &adapter->pdev->dev;
 		tx_ring->count = adapter->tx_desc_count;
+		tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF);
 		if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE)
 			tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
 
@@ -1162,6 +1169,7 @@
 		rx_ring->netdev = adapter->netdev;
 		rx_ring->dev = &adapter->pdev->dev;
 		rx_ring->count = adapter->rx_desc_count;
+		rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF);
 	}
 
 	return 0;
@@ -1420,7 +1428,9 @@
 {
 	int err;
 
+	rtnl_lock();
 	err = i40evf_set_interrupt_capability(adapter);
+	rtnl_unlock();
 	if (err) {
 		dev_err(&adapter->pdev->dev,
 			"Unable to setup interrupt capabilities\n");
@@ -1729,6 +1739,7 @@
 			set_bit(__I40E_DOWN, &adapter->vsi.state);
 			netif_carrier_off(netdev);
 			netif_tx_disable(netdev);
+			adapter->link_up = false;
 			i40evf_napi_disable_all(adapter);
 			i40evf_irq_disable(adapter);
 			i40evf_free_traffic_irqs(adapter);
@@ -1767,6 +1778,7 @@
 	if (netif_running(adapter->netdev)) {
 		netif_carrier_off(netdev);
 		netif_tx_stop_all_queues(netdev);
+		adapter->link_up = false;
 		i40evf_napi_disable_all(adapter);
 	}
 	i40evf_irq_disable(adapter);
@@ -1781,8 +1793,7 @@
 	i40evf_free_all_tx_resources(adapter);
 
 	/* kill and reinit the admin queue */
-	if (i40evf_shutdown_adminq(hw))
-		dev_warn(&adapter->pdev->dev, "Failed to shut down adminq\n");
+	i40evf_shutdown_adminq(hw);
 	adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
 	err = i40evf_init_adminq(hw);
 	if (err)
@@ -1802,6 +1813,8 @@
 	}
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
 	adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER;
+	/* Open RDMA Client again */
+	adapter->aq_required |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED;
 	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
 	i40evf_misc_irq_enable(adapter);
 
@@ -1820,9 +1833,7 @@
 
 		i40evf_configure(adapter);
 
-		err = i40evf_up_complete(adapter);
-		if (err)
-			goto reset_err;
+		i40evf_up_complete(adapter);
 
 		i40evf_irq_enable(adapter, true);
 	} else {
@@ -2052,9 +2063,7 @@
 	i40evf_add_filter(adapter, adapter->hw.mac.addr);
 	i40evf_configure(adapter);
 
-	err = i40evf_up_complete(adapter);
-	if (err)
-		goto err_req_irq;
+	i40evf_up_complete(adapter);
 
 	i40evf_irq_enable(adapter, true);
 
@@ -2268,10 +2277,6 @@
 	adapter->vsi.back = adapter;
 	adapter->vsi.base_vector = 1;
 	adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK;
-	adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC |
-				       ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
-	adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC |
-				       ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
 	vsi->netdev = adapter->netdev;
 	vsi->qs_handle = adapter->vsi_res->qset_handle;
 	if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) {
@@ -2453,6 +2458,7 @@
 		goto err_sw_init;
 
 	netif_carrier_off(netdev);
+	adapter->link_up = false;
 
 	if (!adapter->netdev_registered) {
 		err = register_netdev(netdev);
@@ -2831,7 +2837,8 @@
 
 	pr_info("%s\n", i40evf_copyright);
 
-	i40evf_wq = create_singlethread_workqueue(i40evf_driver_name);
+	i40evf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1,
+				    i40evf_driver_name);
 	if (!i40evf_wq) {
 		pr_err("%s: Failed to create workqueue\n", i40evf_driver_name);
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index d76c221..ddf478d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -817,6 +817,45 @@
 }
 
 /**
+ * i40evf_print_link_message - print link up or down
+ * @adapter: adapter structure
+ *
+ * Log a message telling the world of our wonderous link status
+ */
+static void i40evf_print_link_message(struct i40evf_adapter *adapter)
+{
+	struct net_device *netdev = adapter->netdev;
+	char *speed = "Unknown ";
+
+	if (!adapter->link_up) {
+		netdev_info(netdev, "NIC Link is Down\n");
+		return;
+	}
+
+	switch (adapter->link_speed) {
+	case I40E_LINK_SPEED_40GB:
+		speed = "40 G";
+		break;
+	case I40E_LINK_SPEED_20GB:
+		speed = "20 G";
+		break;
+	case I40E_LINK_SPEED_10GB:
+		speed = "10 G";
+		break;
+	case I40E_LINK_SPEED_1GB:
+		speed = "1000 M";
+		break;
+	case I40E_LINK_SPEED_100MB:
+		speed = "100 M";
+		break;
+	default:
+		break;
+	}
+
+	netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed);
+}
+
+/**
  * i40evf_request_reset
  * @adapter: adapter structure
  *
@@ -853,16 +892,20 @@
 			(struct i40e_virtchnl_pf_event *)msg;
 		switch (vpe->event) {
 		case I40E_VIRTCHNL_EVENT_LINK_CHANGE:
-			adapter->link_up =
-				vpe->event_data.link_event.link_status;
-			if (adapter->link_up && !netif_carrier_ok(netdev)) {
-				dev_info(&adapter->pdev->dev, "NIC Link is Up\n");
-				netif_carrier_on(netdev);
-				netif_tx_wake_all_queues(netdev);
-			} else if (!adapter->link_up) {
-				dev_info(&adapter->pdev->dev, "NIC Link is Down\n");
-				netif_carrier_off(netdev);
-				netif_tx_stop_all_queues(netdev);
+			adapter->link_speed =
+				vpe->event_data.link_event.link_speed;
+			if (adapter->link_up !=
+			    vpe->event_data.link_event.link_status) {
+				adapter->link_up =
+					vpe->event_data.link_event.link_status;
+				if (adapter->link_up) {
+					netif_tx_start_all_queues(netdev);
+					netif_carrier_on(netdev);
+				} else {
+					netif_tx_stop_all_queues(netdev);
+					netif_carrier_off(netdev);
+				}
+				i40evf_print_link_message(adapter);
 			}
 			break;
 		case I40E_VIRTCHNL_EVENT_RESET_IMPENDING:
@@ -937,8 +980,6 @@
 	case I40E_VIRTCHNL_OP_ENABLE_QUEUES:
 		/* enable transmits */
 		i40evf_irq_enable(adapter, true);
-		netif_tx_start_all_queues(adapter->netdev);
-		netif_carrier_on(adapter->netdev);
 		break;
 	case I40E_VIRTCHNL_OP_DISABLE_QUEUES:
 		i40evf_free_all_tx_resources(adapter);
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h
index 199ff98..acf0605 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.h
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.h
@@ -188,6 +188,11 @@
 /* ETQF register bit definitions */
 #define E1000_ETQF_FILTER_ENABLE   BIT(26)
 #define E1000_ETQF_1588            BIT(30)
+#define E1000_ETQF_IMM_INT         BIT(29)
+#define E1000_ETQF_QUEUE_ENABLE    BIT(31)
+#define E1000_ETQF_QUEUE_SHIFT     16
+#define E1000_ETQF_QUEUE_MASK      0x00070000
+#define E1000_ETQF_ETYPE_MASK      0x0000FFFF
 
 /* FTQF register bit definitions */
 #define E1000_FTQF_VF_BP               0x00008000
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 2997c44..2688180 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -1024,4 +1024,8 @@
 #define E1000_RTTBCNRC_RF_INT_MASK	\
 	(E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT)
 
+#define E1000_VLAPQF_QUEUE_SEL(_n, q_idx) (q_idx << ((_n) * 4))
+#define E1000_VLAPQF_P_VALID(_n)	(0x1 << (3 + (_n) * 4))
+#define E1000_VLAPQF_QUEUE_MASK	0x03
+
 #endif
diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h
index 21d9d02..d84afdd 100644
--- a/drivers/net/ethernet/intel/igb/e1000_regs.h
+++ b/drivers/net/ethernet/intel/igb/e1000_regs.h
@@ -309,6 +309,7 @@
 					(0x054E0 + ((_i - 16) * 8)))
 #define E1000_RAH(_i)  (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \
 					(0x054E4 + ((_i - 16) * 8)))
+#define E1000_VLAPQF	0x055B0  /* VLAN Priority Queue Filter VLAPQF */
 #define E1000_IP4AT_REG(_i)     (0x05840 + ((_i) * 8))
 #define E1000_IP6AT_REG(_i)     (0x05880 + ((_i) * 4))
 #define E1000_WUPM_REG(_i)      (0x05A00 + ((_i) * 4))
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 5387b3a..d11093d 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -350,11 +350,49 @@
 	};
 #endif
 
+/* The number of L2 ether-type filter registers, Index 3 is reserved
+ * for PTP 1588 timestamp
+ */
+#define MAX_ETYPE_FILTER	(4 - 1)
+/* ETQF filter list: one static filter per filter consumer. This is
+ * to avoid filter collisions later. Add new filters here!!
+ *
+ * Current filters:		Filter 3
+ */
+#define IGB_ETQF_FILTER_1588	3
+
 #define IGB_N_EXTTS	2
 #define IGB_N_PEROUT	2
 #define IGB_N_SDP	4
 #define IGB_RETA_SIZE	128
 
+enum igb_filter_match_flags {
+	IGB_FILTER_FLAG_ETHER_TYPE = 0x1,
+	IGB_FILTER_FLAG_VLAN_TCI   = 0x2,
+};
+
+#define IGB_MAX_RXNFC_FILTERS 16
+
+/* RX network flow classification data structure */
+struct igb_nfc_input {
+	/* Byte layout in order, all values with MSB first:
+	 * match_flags - 1 byte
+	 * etype - 2 bytes
+	 * vlan_tci - 2 bytes
+	 */
+	u8 match_flags;
+	__be16 etype;
+	__be16 vlan_tci;
+};
+
+struct igb_nfc_filter {
+	struct hlist_node nfc_node;
+	struct igb_nfc_input filter;
+	u16 etype_reg_index;
+	u16 sw_idx;
+	u16 action;
+};
+
 /* board specific private data structure */
 struct igb_adapter {
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
@@ -451,6 +489,7 @@
 	struct timecounter tc;
 	u32 tx_hwtstamp_timeouts;
 	u32 rx_hwtstamp_cleared;
+	bool pps_sys_wrap_on;
 
 	struct ptp_pin_desc sdp_config[IGB_N_SDP];
 	struct {
@@ -473,6 +512,13 @@
 	int copper_tries;
 	struct e1000_info ei;
 	u16 eee_advert;
+
+	/* RX network flow classification support */
+	struct hlist_head nfc_filter_list;
+	unsigned int nfc_filter_count;
+	/* lock for RX network flow classification filter */
+	spinlock_t nfc_lock;
+	bool etype_bitmap[MAX_ETYPE_FILTER];
 };
 
 /* flags controlling PTP/1588 function */
@@ -599,4 +645,9 @@
 	return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
 }
 
+int igb_add_filter(struct igb_adapter *adapter,
+		   struct igb_nfc_filter *input);
+int igb_erase_filter(struct igb_adapter *adapter,
+		     struct igb_nfc_filter *input);
+
 #endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 64e91c5..737b664 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2431,6 +2431,63 @@
 	}
 }
 
+#define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
+static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
+	struct igb_nfc_filter *rule = NULL;
+
+	/* report total rule count */
+	cmd->data = IGB_MAX_RXNFC_FILTERS;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (fsp->location <= rule->sw_idx)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->sw_idx)
+		return -EINVAL;
+
+	if (rule->filter.match_flags) {
+		fsp->flow_type = ETHER_FLOW;
+		fsp->ring_cookie = rule->action;
+		if (rule->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
+			fsp->h_u.ether_spec.h_proto = rule->filter.etype;
+			fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
+		}
+		if (rule->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) {
+			fsp->flow_type |= FLOW_EXT;
+			fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
+			fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
+		}
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int igb_get_ethtool_nfc_all(struct igb_adapter *adapter,
+				   struct ethtool_rxnfc *cmd,
+				   u32 *rule_locs)
+{
+	struct igb_nfc_filter *rule;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = IGB_MAX_RXNFC_FILTERS;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+		rule_locs[cnt] = rule->sw_idx;
+		cnt++;
+	}
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
 static int igb_get_rss_hash_opts(struct igb_adapter *adapter,
 				 struct ethtool_rxnfc *cmd)
 {
@@ -2484,6 +2541,16 @@
 		cmd->data = adapter->num_rx_queues;
 		ret = 0;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = adapter->nfc_filter_count;
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = igb_get_ethtool_nfc_entry(adapter, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = igb_get_ethtool_nfc_all(adapter, cmd, rule_locs);
+		break;
 	case ETHTOOL_GRXFH:
 		ret = igb_get_rss_hash_opts(adapter, cmd);
 		break;
@@ -2598,6 +2665,279 @@
 	return 0;
 }
 
+static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter,
+					struct igb_nfc_filter *input)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u8 i;
+	u32 etqf;
+	u16 etype;
+
+	/* find an empty etype filter register */
+	for (i = 0; i < MAX_ETYPE_FILTER; ++i) {
+		if (!adapter->etype_bitmap[i])
+			break;
+	}
+	if (i == MAX_ETYPE_FILTER) {
+		dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n");
+		return -EINVAL;
+	}
+
+	adapter->etype_bitmap[i] = true;
+
+	etqf = rd32(E1000_ETQF(i));
+	etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK);
+
+	etqf |= E1000_ETQF_FILTER_ENABLE;
+	etqf &= ~E1000_ETQF_ETYPE_MASK;
+	etqf |= (etype & E1000_ETQF_ETYPE_MASK);
+
+	etqf &= ~E1000_ETQF_QUEUE_MASK;
+	etqf |= ((input->action << E1000_ETQF_QUEUE_SHIFT)
+		& E1000_ETQF_QUEUE_MASK);
+	etqf |= E1000_ETQF_QUEUE_ENABLE;
+
+	wr32(E1000_ETQF(i), etqf);
+
+	input->etype_reg_index = i;
+
+	return 0;
+}
+
+static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter,
+					    struct igb_nfc_filter *input)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u8 vlan_priority;
+	u16 queue_index;
+	u32 vlapqf;
+
+	vlapqf = rd32(E1000_VLAPQF);
+	vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
+				>> VLAN_PRIO_SHIFT;
+	queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK;
+
+	/* check whether this vlan prio is already set */
+	if ((vlapqf & E1000_VLAPQF_P_VALID(vlan_priority)) &&
+	    (queue_index != input->action)) {
+		dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n");
+		return -EEXIST;
+	}
+
+	vlapqf |= E1000_VLAPQF_P_VALID(vlan_priority);
+	vlapqf |= E1000_VLAPQF_QUEUE_SEL(vlan_priority, input->action);
+
+	wr32(E1000_VLAPQF, vlapqf);
+
+	return 0;
+}
+
+int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
+{
+	int err = -EINVAL;
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) {
+		err = igb_rxnfc_write_etype_filter(adapter, input);
+		if (err)
+			return err;
+	}
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI)
+		err = igb_rxnfc_write_vlan_prio_filter(adapter, input);
+
+	return err;
+}
+
+static void igb_clear_etype_filter_regs(struct igb_adapter *adapter,
+					u16 reg_index)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u32 etqf = rd32(E1000_ETQF(reg_index));
+
+	etqf &= ~E1000_ETQF_QUEUE_ENABLE;
+	etqf &= ~E1000_ETQF_QUEUE_MASK;
+	etqf &= ~E1000_ETQF_FILTER_ENABLE;
+
+	wr32(E1000_ETQF(reg_index), etqf);
+
+	adapter->etype_bitmap[reg_index] = false;
+}
+
+static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter,
+				       u16 vlan_tci)
+{
+	struct e1000_hw *hw = &adapter->hw;
+	u8 vlan_priority;
+	u32 vlapqf;
+
+	vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+
+	vlapqf = rd32(E1000_VLAPQF);
+	vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority);
+	vlapqf &= ~E1000_VLAPQF_QUEUE_SEL(vlan_priority,
+						E1000_VLAPQF_QUEUE_MASK);
+
+	wr32(E1000_VLAPQF, vlapqf);
+}
+
+int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input)
+{
+	if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE)
+		igb_clear_etype_filter_regs(adapter,
+					    input->etype_reg_index);
+
+	if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI)
+		igb_clear_vlan_prio_filter(adapter,
+					   ntohs(input->filter.vlan_tci));
+
+	return 0;
+}
+
+static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter,
+					struct igb_nfc_filter *input,
+					u16 sw_idx)
+{
+	struct igb_nfc_filter *rule, *parent;
+	int err = -EINVAL;
+
+	parent = NULL;
+	rule = NULL;
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		/* hash found, or no matching entry */
+		if (rule->sw_idx >= sw_idx)
+			break;
+		parent = rule;
+	}
+
+	/* if there is an old rule occupying our place remove it */
+	if (rule && (rule->sw_idx == sw_idx)) {
+		if (!input)
+			err = igb_erase_filter(adapter, rule);
+
+		hlist_del(&rule->nfc_node);
+		kfree(rule);
+		adapter->nfc_filter_count--;
+	}
+
+	/* If no input this was a delete, err should be 0 if a rule was
+	 * successfully found and removed from the list else -EINVAL
+	 */
+	if (!input)
+		return err;
+
+	/* initialize node */
+	INIT_HLIST_NODE(&input->nfc_node);
+
+	/* add filter to the list */
+	if (parent)
+		hlist_add_behind(&parent->nfc_node, &input->nfc_node);
+	else
+		hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list);
+
+	/* update counts */
+	adapter->nfc_filter_count++;
+
+	return 0;
+}
+
+static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct igb_nfc_filter *input, *rule;
+	int err = 0;
+
+	if (!(netdev->hw_features & NETIF_F_NTUPLE))
+		return -EOPNOTSUPP;
+
+	/* Don't allow programming if the action is a queue greater than
+	 * the number of online Rx queues.
+	 */
+	if ((fsp->ring_cookie == RX_CLS_FLOW_DISC) ||
+	    (fsp->ring_cookie >= adapter->num_rx_queues)) {
+		dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n");
+		return -EINVAL;
+	}
+
+	/* Don't allow indexes to exist outside of available space */
+	if (fsp->location >= IGB_MAX_RXNFC_FILTERS) {
+		dev_err(&adapter->pdev->dev, "Location out of range\n");
+		return -EINVAL;
+	}
+
+	if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
+		return -EINVAL;
+
+	if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK &&
+	    fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK))
+		return -EINVAL;
+
+	input = kzalloc(sizeof(*input), GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) {
+		input->filter.etype = fsp->h_u.ether_spec.h_proto;
+		input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE;
+	}
+
+	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
+		if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
+			err = -EINVAL;
+			goto err_out;
+		}
+		input->filter.vlan_tci = fsp->h_ext.vlan_tci;
+		input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI;
+	}
+
+	input->action = fsp->ring_cookie;
+	input->sw_idx = fsp->location;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+		if (!memcmp(&input->filter, &rule->filter,
+			    sizeof(input->filter))) {
+			err = -EEXIST;
+			dev_err(&adapter->pdev->dev,
+				"ethtool: this filter is already set\n");
+			goto err_out_w_lock;
+		}
+	}
+
+	err = igb_add_filter(adapter, input);
+	if (err)
+		goto err_out_w_lock;
+
+	igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx);
+
+	spin_unlock(&adapter->nfc_lock);
+	return 0;
+
+err_out_w_lock:
+	spin_unlock(&adapter->nfc_lock);
+err_out:
+	kfree(input);
+	return err;
+}
+
+static int igb_del_ethtool_nfc_entry(struct igb_adapter *adapter,
+				     struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	int err;
+
+	spin_lock(&adapter->nfc_lock);
+	err = igb_update_ethtool_nfc_entry(adapter, NULL, fsp->location);
+	spin_unlock(&adapter->nfc_lock);
+
+	return err;
+}
+
 static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 {
 	struct igb_adapter *adapter = netdev_priv(dev);
@@ -2607,6 +2947,11 @@
 	case ETHTOOL_SRXFH:
 		ret = igb_set_rss_hash_opt(adapter, cmd);
 		break;
+	case ETHTOOL_SRXCLSRLINS:
+		ret = igb_add_ethtool_nfc_entry(adapter, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		ret = igb_del_ethtool_nfc_entry(adapter, cmd);
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 942a89f..edc9a6a 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -58,7 +58,7 @@
 #include "igb.h"
 
 #define MAJ 5
-#define MIN 3
+#define MIN 4
 #define BUILD 0
 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
 __stringify(BUILD) "-k"
@@ -169,13 +169,15 @@
 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-			       int vf, u16 vlan, u8 qos);
+			       int vf, u16 vlan, u8 qos, __be16 vlan_proto);
 static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 				   bool setting);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
 				 struct ifla_vf_info *ivi);
 static void igb_check_vf_rate_limit(struct igb_adapter *);
+static void igb_nfc_filter_exit(struct igb_adapter *adapter);
+static void igb_nfc_filter_restore(struct igb_adapter *adapter);
 
 #ifdef CONFIG_PCI_IOV
 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
@@ -1611,6 +1613,7 @@
 	igb_setup_mrqc(adapter);
 	igb_setup_rctl(adapter);
 
+	igb_nfc_filter_restore(adapter);
 	igb_configure_tx(adapter);
 	igb_configure_rx(adapter);
 
@@ -2059,6 +2062,21 @@
 	if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
 		return 0;
 
+	if (!(features & NETIF_F_NTUPLE)) {
+		struct hlist_node *node2;
+		struct igb_nfc_filter *rule;
+
+		spin_lock(&adapter->nfc_lock);
+		hlist_for_each_entry_safe(rule, node2,
+					  &adapter->nfc_filter_list, nfc_node) {
+			igb_erase_filter(adapter, rule);
+			hlist_del(&rule->nfc_node);
+			kfree(rule);
+		}
+		spin_unlock(&adapter->nfc_lock);
+		adapter->nfc_filter_count = 0;
+	}
+
 	netdev->features = features;
 
 	if (netif_running(netdev))
@@ -3053,6 +3071,7 @@
 				  VLAN_HLEN;
 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
+	spin_lock_init(&adapter->nfc_lock);
 	spin_lock_init(&adapter->stats64_lock);
 #ifdef CONFIG_PCI_IOV
 	switch (hw->mac.type) {
@@ -3240,6 +3259,8 @@
 	igb_down(adapter);
 	igb_free_irq(adapter);
 
+	igb_nfc_filter_exit(adapter);
+
 	igb_free_all_tx_resources(adapter);
 	igb_free_all_rx_resources(adapter);
 
@@ -6201,14 +6222,17 @@
 	return 0;
 }
 
-static int igb_ndo_set_vf_vlan(struct net_device *netdev,
-			       int vf, u16 vlan, u8 qos)
+static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf,
+			       u16 vlan, u8 qos, __be16 vlan_proto)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 
 	if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
 		return -EINVAL;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) :
 			       igb_disable_port_vlan(adapter, vf);
 }
@@ -8306,4 +8330,28 @@
 
 	return err;
 }
+
+static void igb_nfc_filter_exit(struct igb_adapter *adapter)
+{
+	struct igb_nfc_filter *rule;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+		igb_erase_filter(adapter, rule);
+
+	spin_unlock(&adapter->nfc_lock);
+}
+
+static void igb_nfc_filter_restore(struct igb_adapter *adapter)
+{
+	struct igb_nfc_filter *rule;
+
+	spin_lock(&adapter->nfc_lock);
+
+	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+		igb_add_filter(adapter, rule);
+
+	spin_unlock(&adapter->nfc_lock);
+}
 /* igb_main.c */
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index e61b647..a7895c4 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -591,6 +591,7 @@
 			tsim |= TSINTR_SYS_WRAP;
 		else
 			tsim &= ~TSINTR_SYS_WRAP;
+		igb->pps_sys_wrap_on = !!on;
 		wr32(E1000_TSIM, tsim);
 		spin_unlock_irqrestore(&igb->tmreg_lock, flags);
 		return 0;
@@ -744,7 +745,8 @@
 		}
 	}
 
-	shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust);
+	shhwtstamps.hwtstamp =
+		ktime_add_ns(shhwtstamps.hwtstamp, adjust);
 
 	skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps);
 	dev_kfree_skb_any(adapter->ptp_tx_skb);
@@ -767,13 +769,32 @@
 			 struct sk_buff *skb)
 {
 	__le64 *regval = (__le64 *)va;
+	struct igb_adapter *adapter = q_vector->adapter;
+	int adjust = 0;
 
 	/* The timestamp is recorded in little endian format.
 	 * DWORD: 0        1        2        3
 	 * Field: Reserved Reserved SYSTIML  SYSTIMH
 	 */
-	igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb),
+	igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
 				   le64_to_cpu(regval[1]));
+
+	/* adjust timestamp for the RX latency based on link speed */
+	if (adapter->hw.mac.type == e1000_i210) {
+		switch (adapter->link_speed) {
+		case SPEED_10:
+			adjust = IGB_I210_RX_LATENCY_10;
+			break;
+		case SPEED_100:
+			adjust = IGB_I210_RX_LATENCY_100;
+			break;
+		case SPEED_1000:
+			adjust = IGB_I210_RX_LATENCY_1000;
+			break;
+		}
+	}
+	skb_hwtstamps(skb)->hwtstamp =
+		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 }
 
 /**
@@ -825,7 +846,7 @@
 		}
 	}
 	skb_hwtstamps(skb)->hwtstamp =
-		ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+		ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 
 	/* Update the last_rx_timestamp timer in order to enable watchdog check
 	 * for error case of latched timestamp on a dropped packet.
@@ -978,12 +999,12 @@
 
 	/* define ethertype filter for timestamped packets */
 	if (is_l2)
-		wr32(E1000_ETQF(3),
+		wr32(E1000_ETQF(IGB_ETQF_FILTER_1588),
 		     (E1000_ETQF_FILTER_ENABLE | /* enable filter */
 		      E1000_ETQF_1588 | /* enable timestamping */
 		      ETH_P_1588));     /* 1588 eth protocol type */
 	else
-		wr32(E1000_ETQF(3), 0);
+		wr32(E1000_ETQF(IGB_ETQF_FILTER_1588), 0);
 
 	/* L4 Queue Filter[3]: filter by destination port and protocol */
 	if (is_l4) {
@@ -1139,7 +1160,7 @@
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
 		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
-	} else {
+	} else if (adapter->ptp_clock) {
 		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
 			 adapter->netdev->name);
 		adapter->ptp_flags |= IGB_PTP_ENABLED;
@@ -1215,7 +1236,9 @@
 	case e1000_i211:
 		wr32(E1000_TSAUXC, 0x0);
 		wr32(E1000_TSSDP, 0x0);
-		wr32(E1000_TSIM, TSYNC_INTERRUPTS);
+		wr32(E1000_TSIM,
+		     TSYNC_INTERRUPTS |
+		     (adapter->pps_sys_wrap_on ? TSINTR_SYS_WRAP : 0));
 		wr32(E1000_IMS, E1000_IMS_TS);
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index b0778ba..12bb877 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -47,7 +47,7 @@
 
 #include "igbvf.h"
 
-#define DRV_VERSION "2.0.2-k"
+#define DRV_VERSION "2.4.0-k"
 char igbvf_driver_name[] = "igbvf";
 const char igbvf_driver_version[] = DRV_VERSION;
 static const char igbvf_driver_string[] =
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 9475ff9..b06e32d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -45,10 +45,10 @@
 #include "ixgbe_type.h"
 #include "ixgbe_common.h"
 #include "ixgbe_dcb.h"
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 #define IXGBE_FCOE
 #include "ixgbe_fcoe.h"
-#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
+#endif /* IS_ENABLED(CONFIG_FCOE) */
 #ifdef CONFIG_IXGBE_DCA
 #include <linux/dca.h>
 #endif
@@ -645,6 +645,7 @@
 #define IXGBE_FLAG_RX_HWTSTAMP_ENABLED		BIT(25)
 #define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER	BIT(26)
 #define IXGBE_FLAG_DCB_CAPABLE			BIT(27)
+#define IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE	BIT(28)
 
 	u32 flags2;
 #define IXGBE_FLAG2_RSC_CAPABLE			BIT(0)
@@ -653,13 +654,12 @@
 #define IXGBE_FLAG2_TEMP_SENSOR_EVENT		BIT(3)
 #define IXGBE_FLAG2_SEARCH_FOR_SFP		BIT(4)
 #define IXGBE_FLAG2_SFP_NEEDS_RESET		BIT(5)
-#define IXGBE_FLAG2_RESET_REQUESTED		BIT(6)
 #define IXGBE_FLAG2_FDIR_REQUIRES_REINIT	BIT(7)
 #define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP		BIT(8)
 #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP		BIT(9)
 #define IXGBE_FLAG2_PTP_PPS_ENABLED		BIT(10)
 #define IXGBE_FLAG2_PHY_INTERRUPT		BIT(11)
-#define IXGBE_FLAG2_VXLAN_REREG_NEEDED		BIT(12)
+#define IXGBE_FLAG2_UDP_TUN_REREG_NEEDED	BIT(12)
 #define IXGBE_FLAG2_VLAN_PROMISC		BIT(13)
 
 	/* Tx fast path data */
@@ -673,6 +673,7 @@
 
 	/* Port number used to identify VXLAN traffic */
 	__be16 vxlan_port;
+	__be16 geneve_port;
 
 	/* TX */
 	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
@@ -840,6 +841,7 @@
 	__IXGBE_IN_SFP_INIT,
 	__IXGBE_PTP_RUNNING,
 	__IXGBE_PTP_TX_IN_PROGRESS,
+	__IXGBE_RESET_REQUESTED,
 };
 
 struct ixgbe_cb {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index b4217f3..77d3039 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -99,6 +99,7 @@
 		case IXGBE_DEV_ID_X550T:
 		case IXGBE_DEV_ID_X550T1:
 		case IXGBE_DEV_ID_X550EM_X_10G_T:
+		case IXGBE_DEV_ID_X550EM_A_10G_T:
 			supported = true;
 			break;
 		default:
@@ -2958,8 +2959,10 @@
 	}
 
 	/* was that the last pool using this rar? */
-	if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0)
+	if (mpsar_lo == 0 && mpsar_hi == 0 &&
+	    rar != 0 && rar != hw->mac.san_mac_rar_index)
 		hw->mac.ops.clear_rar(hw, rar);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 0d7209e..f49f803 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -193,7 +193,9 @@
 	if (supported_link & IXGBE_LINK_SPEED_10GB_FULL)
 		ecmd->supported |= ixgbe_get_supported_10gtypes(hw);
 	if (supported_link & IXGBE_LINK_SPEED_1GB_FULL)
-		ecmd->supported |= SUPPORTED_1000baseT_Full;
+		ecmd->supported |= (ixgbe_isbackplane(hw->phy.media_type)) ?
+				   SUPPORTED_1000baseKX_Full :
+				   SUPPORTED_1000baseT_Full;
 	if (supported_link & IXGBE_LINK_SPEED_100_FULL)
 		ecmd->supported |= ixgbe_isbackplane(hw->phy.media_type) ?
 				   SUPPORTED_1000baseKX_Full :
@@ -311,6 +313,25 @@
 		break;
 	}
 
+	/* Indicate pause support */
+	ecmd->supported |= SUPPORTED_Pause;
+
+	switch (hw->fc.requested_mode) {
+	case ixgbe_fc_full:
+		ecmd->advertising |= ADVERTISED_Pause;
+		break;
+	case ixgbe_fc_rx_pause:
+		ecmd->advertising |= ADVERTISED_Pause |
+				     ADVERTISED_Asym_Pause;
+		break;
+	case ixgbe_fc_tx_pause:
+		ecmd->advertising |= ADVERTISED_Asym_Pause;
+		break;
+	default:
+		ecmd->advertising &= ~(ADVERTISED_Pause |
+				       ADVERTISED_Asym_Pause);
+	}
+
 	if (netif_carrier_ok(netdev)) {
 		switch (adapter->link_speed) {
 		case IXGBE_LINK_SPEED_10GB_FULL:
@@ -2926,9 +2947,13 @@
 static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir)
 {
 	int i, reta_size = ixgbe_rss_indir_tbl_entries(adapter);
+	u16 rss_m = adapter->ring_feature[RING_F_RSS].mask;
+
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		rss_m = adapter->ring_feature[RING_F_RSS].indices - 1;
 
 	for (i = 0; i < reta_size; i++)
-		indir[i] = adapter->rss_indir_tbl[i];
+		indir[i] = adapter->rss_indir_tbl[i] & rss_m;
 }
 
 static int ixgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
@@ -3039,8 +3064,8 @@
 		/* We only support one q_vector without MSI-X */
 		max_combined = 1;
 	} else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
-		/* SR-IOV currently only allows one queue on the PF */
-		max_combined = 1;
+		/* Limit value based on the queue mask */
+		max_combined = adapter->ring_feature[RING_F_RSS].mask + 1;
 	} else if (tcs > 1) {
 		/* For DCB report channels per traffic class */
 		if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index bcdc884..15ab337 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -515,15 +515,16 @@
 	vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i);
 
 	/* 64 pool mode with 2 queues per pool */
-	if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) {
+	if ((vmdq_i > 32) || (vmdq_i > 16 && pools)) {
 		vmdq_m = IXGBE_82599_VMDQ_2Q_MASK;
 		rss_m = IXGBE_RSS_2Q_MASK;
 		rss_i = min_t(u16, rss_i, 2);
-	/* 32 pool mode with 4 queues per pool */
+	/* 32 pool mode with up to 4 queues per pool */
 	} else {
 		vmdq_m = IXGBE_82599_VMDQ_4Q_MASK;
 		rss_m = IXGBE_RSS_4Q_MASK;
-		rss_i = 4;
+		/* We can support 4, 2, or 1 queues */
+		rss_i = (rss_i > 3) ? 4 : (rss_i > 1) ? 2 : 1;
 	}
 
 #ifdef IXGBE_FCOE
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 5418c69a..a244d9a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -137,6 +137,7 @@
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N), board_x550em_a },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII), board_x550em_a },
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L), board_x550em_a },
+	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_10G_T), board_x550em_a},
 	{PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP), board_x550em_a },
 	/* required last entry */
 	{0, }
@@ -1103,7 +1104,7 @@
 
 	/* Do the reset outside of interrupt context */
 	if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
-		adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
+		set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
 		e_warn(drv, "initiating reset due to tx timeout\n");
 		ixgbe_service_event_schedule(adapter);
 	}
@@ -1495,7 +1496,6 @@
 				     struct sk_buff *skb)
 {
 	__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
-	__le16 hdr_info = rx_desc->wb.lower.lo_dword.hs_rss.hdr_info;
 	bool encap_pkt = false;
 
 	skb_checksum_none_assert(skb);
@@ -1504,8 +1504,8 @@
 	if (!(ring->netdev->features & NETIF_F_RXCSUM))
 		return;
 
-	if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) &&
-	    (hdr_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_TUNNEL >> 16))) {
+	/* check for VXLAN and Geneve packets */
+	if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) {
 		encap_pkt = true;
 		skb->encapsulation = 1;
 	}
@@ -2777,7 +2777,7 @@
 		}
 		if (eicr & IXGBE_EICR_ECC) {
 			e_info(link, "Received ECC Err, initiating reset\n");
-			adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
+			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
 			ixgbe_service_event_schedule(adapter);
 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
 		}
@@ -3007,7 +3007,7 @@
 	case ixgbe_mac_x550em_a:
 		if (eicr & IXGBE_EICR_ECC) {
 			e_info(link, "Received ECC Err, initiating reset\n");
-			adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
+			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
 			ixgbe_service_event_schedule(adapter);
 			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
 		}
@@ -3224,7 +3224,7 @@
 		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
 	} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
 	if (!wait_loop)
-		e_err(drv, "Could not enable Tx Queue %d\n", reg_idx);
+		hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx);
 }
 
 static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter)
@@ -3248,7 +3248,8 @@
 			mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ;
 		else if (tcs > 1)
 			mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ;
-		else if (adapter->ring_feature[RING_F_RSS].indices == 4)
+		else if (adapter->ring_feature[RING_F_VMDQ].mask ==
+			 IXGBE_82599_VMDQ_4Q_MASK)
 			mtqc |= IXGBE_MTQC_32VF;
 		else
 			mtqc |= IXGBE_MTQC_64VF;
@@ -3475,12 +3476,12 @@
 	u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
 	u16 rss_i = adapter->ring_feature[RING_F_RSS].indices;
 
-	/* Program table for at least 2 queues w/ SR-IOV so that VFs can
+	/* Program table for at least 4 queues w/ SR-IOV so that VFs can
 	 * make full use of any rings they may have.  We will use the
 	 * PSRTYPE register to control how many rings we use within the PF.
 	 */
-	if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 2))
-		rss_i = 2;
+	if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4))
+		rss_i = 4;
 
 	/* Fill out hash function seeds */
 	for (i = 0; i < 10; i++)
@@ -3544,7 +3545,8 @@
 				mrqc = IXGBE_MRQC_VMDQRT8TCEN;	/* 8 TCs */
 			else if (tcs > 1)
 				mrqc = IXGBE_MRQC_VMDQRT4TCEN;	/* 4 TCs */
-			else if (adapter->ring_feature[RING_F_RSS].indices == 4)
+			else if (adapter->ring_feature[RING_F_VMDQ].mask ==
+				 IXGBE_82599_VMDQ_4Q_MASK)
 				mrqc = IXGBE_MRQC_VMDQRSS32EN;
 			else
 				mrqc = IXGBE_MRQC_VMDQRSS64EN;
@@ -3922,6 +3924,9 @@
 	rfctl &= ~IXGBE_RFCTL_RSC_DIS;
 	if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED))
 		rfctl |= IXGBE_RFCTL_RSC_DIS;
+
+	/* disable NFS filtering */
+	rfctl |= (IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS);
 	IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl);
 
 	/* Program registers for the distribution of queues */
@@ -4100,24 +4105,22 @@
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 vlnctrl, i;
 
-	switch (hw->mac.type) {
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-	case ixgbe_mac_X550:
-	case ixgbe_mac_X550EM_x:
-	case ixgbe_mac_x550em_a:
-	default:
-		if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
-			break;
-		/* fall through */
-	case ixgbe_mac_82598EB:
-		/* legacy case, we can just disable VLAN filtering */
-		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-		vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN);
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+
+	if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) {
+	/* For VMDq and SR-IOV we must leave VLAN filtering enabled */
+		vlnctrl |= IXGBE_VLNCTRL_VFE;
+		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+	} else {
+		vlnctrl &= ~IXGBE_VLNCTRL_VFE;
 		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 		return;
 	}
 
+	/* Nothing to do for 82598 */
+	if (hw->mac.type == ixgbe_mac_82598EB)
+		return;
+
 	/* We are already in VLAN promisc, nothing to do */
 	if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)
 		return;
@@ -4191,23 +4194,14 @@
 	struct ixgbe_hw *hw = &adapter->hw;
 	u32 vlnctrl, i;
 
-	switch (hw->mac.type) {
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-	case ixgbe_mac_X550:
-	case ixgbe_mac_X550EM_x:
-	case ixgbe_mac_x550em_a:
-	default:
-		if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED)
-			break;
-		/* fall through */
-	case ixgbe_mac_82598EB:
-		vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
-		vlnctrl &= ~IXGBE_VLNCTRL_CFIEN;
-		vlnctrl |= IXGBE_VLNCTRL_VFE;
-		IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+	/* Set VLAN filtering to enabled */
+	vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
+	vlnctrl |= IXGBE_VLNCTRL_VFE;
+	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
+
+	if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) ||
+	    hw->mac.type == ixgbe_mac_82598EB)
 		return;
-	}
 
 	/* We are not in VLAN promisc, nothing to do */
 	if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
@@ -4580,18 +4574,23 @@
 	}
 }
 
-static void ixgbe_clear_vxlan_port(struct ixgbe_adapter *adapter)
+static void ixgbe_clear_udp_tunnel_port(struct ixgbe_adapter *adapter, u32 mask)
 {
-	switch (adapter->hw.mac.type) {
-	case ixgbe_mac_X550:
-	case ixgbe_mac_X550EM_x:
-	case ixgbe_mac_x550em_a:
-		IXGBE_WRITE_REG(&adapter->hw, IXGBE_VXLANCTRL, 0);
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 vxlanctrl;
+
+	if (!(adapter->flags & (IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE |
+				IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)))
+		return;
+
+	vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) && ~mask;
+	IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, vxlanctrl);
+
+	if (mask & IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK)
 		adapter->vxlan_port = 0;
-		break;
-	default:
-		break;
-	}
+
+	if (mask & IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK)
+		adapter->geneve_port = 0;
 }
 
 #ifdef CONFIG_IXGBE_DCB
@@ -5494,8 +5493,8 @@
 
 	ixgbe_napi_disable_all(adapter);
 
-	adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT |
-			     IXGBE_FLAG2_RESET_REQUESTED);
+	clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
+	adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
 	adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE;
 
 	del_timer_sync(&adapter->service_timer);
@@ -5705,8 +5704,10 @@
 		if (fwsm & IXGBE_FWSM_TS_ENABLED)
 			adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE;
 		break;
-	case ixgbe_mac_X550EM_x:
 	case ixgbe_mac_x550em_a:
+		adapter->flags |= IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE;
+	/* fall through */
+	case ixgbe_mac_X550EM_x:
 #ifdef CONFIG_IXGBE_DCB
 		adapter->flags &= ~IXGBE_FLAG_DCB_CAPABLE;
 #endif
@@ -6138,7 +6139,7 @@
 
 	ixgbe_up_complete(adapter);
 
-	ixgbe_clear_vxlan_port(adapter);
+	ixgbe_clear_udp_tunnel_port(adapter, IXGBE_VXLANCTRL_ALL_UDPPORT_MASK);
 	udp_tunnel_get_rx_info(netdev);
 
 	return 0;
@@ -6915,7 +6916,7 @@
 			 * (Do the reset outside of interrupt context).
 			 */
 			e_warn(drv, "initiating reset to clear Tx work after link loss\n");
-			adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED;
+			set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
 		}
 	}
 }
@@ -7181,11 +7182,9 @@
 
 static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter)
 {
-	if (!(adapter->flags2 & IXGBE_FLAG2_RESET_REQUESTED))
+	if (!test_and_clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state))
 		return;
 
-	adapter->flags2 &= ~IXGBE_FLAG2_RESET_REQUESTED;
-
 	/* If we're already down, removing or resetting, just bail */
 	if (test_bit(__IXGBE_DOWN, &adapter->state) ||
 	    test_bit(__IXGBE_REMOVING, &adapter->state) ||
@@ -7219,9 +7218,9 @@
 		ixgbe_service_event_complete(adapter);
 		return;
 	}
-	if (adapter->flags2 & IXGBE_FLAG2_VXLAN_REREG_NEEDED) {
+	if (adapter->flags2 & IXGBE_FLAG2_UDP_TUN_REREG_NEEDED) {
 		rtnl_lock();
-		adapter->flags2 &= ~IXGBE_FLAG2_VXLAN_REREG_NEEDED;
+		adapter->flags2 &= ~IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
 		udp_tunnel_get_rx_info(adapter->netdev);
 		rtnl_unlock();
 	}
@@ -7661,6 +7660,10 @@
 		if (adapter->vxlan_port &&
 		    udp_hdr(skb)->dest == adapter->vxlan_port)
 			hdr.network = skb_inner_network_header(skb);
+
+		if (adapter->geneve_port &&
+		    udp_hdr(skb)->dest == adapter->geneve_port)
+			hdr.network = skb_inner_network_header(skb);
 	}
 
 	/* Currently only IPv4/IPv6 with TCP is supported */
@@ -8390,12 +8393,14 @@
 			    struct tcf_exts *exts, u64 *action, u8 *queue)
 {
 	const struct tc_action *a;
+	LIST_HEAD(actions);
 	int err;
 
 	if (tc_no_actions(exts))
 		return -EINVAL;
 
-	tc_for_each_action(a, exts) {
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
 
 		/* Drop action */
 		if (is_tcf_gact_shot(a)) {
@@ -8794,10 +8799,23 @@
 	netdev->features = features;
 
 	if ((adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) {
-		if (features & NETIF_F_RXCSUM)
-			adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED;
-		else
-			ixgbe_clear_vxlan_port(adapter);
+		if (features & NETIF_F_RXCSUM) {
+			adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
+		} else {
+			u32 port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK;
+
+			ixgbe_clear_udp_tunnel_port(adapter, port_mask);
+		}
+	}
+
+	if ((adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) {
+		if (features & NETIF_F_RXCSUM) {
+			adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
+		} else {
+			u32 port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK;
+
+			ixgbe_clear_udp_tunnel_port(adapter, port_mask);
+		}
 	}
 
 	if (need_reset)
@@ -8810,67 +8828,115 @@
 }
 
 /**
- * ixgbe_add_vxlan_port - Get notifications about VXLAN ports that come up
+ * ixgbe_add_udp_tunnel_port - Get notifications about adding UDP tunnel ports
  * @dev: The port's netdev
  * @ti: Tunnel endpoint information
  **/
-static void ixgbe_add_vxlan_port(struct net_device *dev,
-				 struct udp_tunnel_info *ti)
+static void ixgbe_add_udp_tunnel_port(struct net_device *dev,
+				      struct udp_tunnel_info *ti)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct ixgbe_hw *hw = &adapter->hw;
 	__be16 port = ti->port;
-
-	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
-		return;
+	u32 port_shift = 0;
+	u32 reg;
 
 	if (ti->sa_family != AF_INET)
 		return;
 
-	if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
-		return;
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
+			return;
 
-	if (adapter->vxlan_port == port)
-		return;
+		if (adapter->vxlan_port == port)
+			return;
 
-	if (adapter->vxlan_port) {
-		netdev_info(dev,
-			    "Hit Max num of VXLAN ports, not adding port %d\n",
-			    ntohs(port));
+		if (adapter->vxlan_port) {
+			netdev_info(dev,
+				    "VXLAN port %d set, not adding port %d\n",
+				    ntohs(adapter->vxlan_port),
+				    ntohs(port));
+			return;
+		}
+
+		adapter->vxlan_port = port;
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))
+			return;
+
+		if (adapter->geneve_port == port)
+			return;
+
+		if (adapter->geneve_port) {
+			netdev_info(dev,
+				    "GENEVE port %d set, not adding port %d\n",
+				    ntohs(adapter->geneve_port),
+				    ntohs(port));
+			return;
+		}
+
+		port_shift = IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT;
+		adapter->geneve_port = port;
+		break;
+	default:
 		return;
 	}
 
-	adapter->vxlan_port = port;
-	IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, ntohs(port));
+	reg = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) | ntohs(port) << port_shift;
+	IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, reg);
 }
 
 /**
- * ixgbe_del_vxlan_port - Get notifications about VXLAN ports that go away
+ * ixgbe_del_udp_tunnel_port - Get notifications about removing UDP tunnel ports
  * @dev: The port's netdev
  * @ti: Tunnel endpoint information
  **/
-static void ixgbe_del_vxlan_port(struct net_device *dev,
-				 struct udp_tunnel_info *ti)
+static void ixgbe_del_udp_tunnel_port(struct net_device *dev,
+				      struct udp_tunnel_info *ti)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	u32 port_mask;
 
-	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN &&
+	    ti->type != UDP_TUNNEL_TYPE_GENEVE)
 		return;
 
 	if (ti->sa_family != AF_INET)
 		return;
 
-	if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
-		return;
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE))
+			return;
 
-	if (adapter->vxlan_port != ti->port) {
-		netdev_info(dev, "Port %d was not found, not deleting\n",
-			    ntohs(ti->port));
+		if (adapter->vxlan_port != ti->port) {
+			netdev_info(dev, "VXLAN port %d not found\n",
+				    ntohs(ti->port));
+			return;
+		}
+
+		port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK;
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))
+			return;
+
+		if (adapter->geneve_port != ti->port) {
+			netdev_info(dev, "GENEVE port %d not found\n",
+				    ntohs(ti->port));
+			return;
+		}
+
+		port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK;
+		break;
+	default:
 		return;
 	}
 
-	ixgbe_clear_vxlan_port(adapter);
-	adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED;
+	ixgbe_clear_udp_tunnel_port(adapter, port_mask);
+	adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED;
 }
 
 static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
@@ -9184,8 +9250,8 @@
 	.ndo_bridge_getlink	= ixgbe_ndo_bridge_getlink,
 	.ndo_dfwd_add_station	= ixgbe_fwd_add,
 	.ndo_dfwd_del_station	= ixgbe_fwd_del,
-	.ndo_udp_tunnel_add	= ixgbe_add_vxlan_port,
-	.ndo_udp_tunnel_del	= ixgbe_del_vxlan_port,
+	.ndo_udp_tunnel_add	= ixgbe_add_udp_tunnel_port,
+	.ndo_udp_tunnel_del	= ixgbe_del_udp_tunnel_port,
 	.ndo_features_check	= ixgbe_features_check,
 };
 
@@ -9517,6 +9583,7 @@
 
 	/* copy netdev features into list of user selectable features */
 	netdev->hw_features |= netdev->features |
+			       NETIF_F_HW_VLAN_CTAG_FILTER |
 			       NETIF_F_HW_VLAN_CTAG_RX |
 			       NETIF_F_HW_VLAN_CTAG_TX |
 			       NETIF_F_RXALL |
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index db0731e..021ab9b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -346,8 +346,8 @@
 				return 0;
 			}
 		}
-		/* clear value if nothing found */
-		hw->phy.mdio.prtad = 0;
+		/* indicate no PHY found */
+		hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
 		return IXGBE_ERR_PHY_ADDR_INVALID;
 	}
 	return 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index e5431bf..a922776 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -1254,7 +1254,7 @@
 		adapter->ptp_clock = NULL;
 		e_dev_err("ptp_clock_register failed\n");
 		return err;
-	} else
+	} else if (adapter->ptp_clock)
 		e_dev_info("registered PHC device on %s\n", netdev->name);
 
 	/* set default timestamp mode to disabled here. We do this in
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 8618599..7e5d985 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -329,13 +329,15 @@
 	for (i = 0; i < adapter->num_vfs; i++)
 		ixgbe_vf_configuration(dev, (i | 0x10000000));
 
+	/* reset before enabling SRIOV to avoid mailbox issues */
+	ixgbe_sriov_reinit(adapter);
+
 	err = pci_enable_sriov(dev, num_vfs);
 	if (err) {
 		e_dev_warn("Failed to enable PCI sriov: %d\n", err);
 		return err;
 	}
 	ixgbe_get_vfs(adapter);
-	ixgbe_sriov_reinit(adapter);
 
 	return num_vfs;
 #else
@@ -1354,13 +1356,16 @@
 	return err;
 }
 
-int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
+int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+			  u8 qos, __be16 vlan_proto)
 {
 	int err = 0;
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
 	if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7))
 		return -EINVAL;
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
 	if (vlan || qos) {
 		/* Check if there is already a port VLAN set, if so
 		 * we have to delete the old one first before we
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index 47e65e2..0c7977d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -43,7 +43,7 @@
 void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
 int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
-			   u8 qos);
+			   u8 qos, __be16 vlan_proto);
 int ixgbe_link_mbps(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
 			int max_tx_rate);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 1248a99..31d82e3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -90,6 +90,7 @@
 #define IXGBE_DEV_ID_X550EM_A_SFP_N	0x15C4
 #define IXGBE_DEV_ID_X550EM_A_SGMII	0x15C6
 #define IXGBE_DEV_ID_X550EM_A_SGMII_L	0x15C7
+#define IXGBE_DEV_ID_X550EM_A_10G_T	0x15C8
 #define IXGBE_DEV_ID_X550EM_A_SFP	0x15CE
 
 /* VF Device IDs */
@@ -487,6 +488,13 @@
 #define IXGBE_FHFT_EXT(_n)	(0x09800 + ((_n) * 0x100)) /* Ext Flexible Host
 							    * Filter Table */
 
+/* masks for accessing VXLAN and GENEVE UDP ports */
+#define IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK     0x0000ffff /* VXLAN port */
+#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK    0xffff0000 /* GENEVE port */
+#define IXGBE_VXLANCTRL_ALL_UDPPORT_MASK       0xffffffff /* GENEVE/VXLAN */
+
+#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT   16
+
 #define IXGBE_FLEXIBLE_FILTER_COUNT_MAX         4
 #define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX     2
 
@@ -1823,6 +1831,9 @@
 #define IXGBE_LED_IVRT(_i)       IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i)
 #define IXGBE_LED_BLINK(_i)      IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i)
 #define IXGBE_LED_MODE_MASK(_i)  IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i)
+#define IXGBE_X557_LED_MANUAL_SET_MASK	BIT(8)
+#define IXGBE_X557_MAX_LED_INDEX	3
+#define IXGBE_X557_LED_PROVISIONING	0xC430
 
 /* LED modes */
 #define IXGBE_LED_LINK_UP       0x0
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 4716ca4..7e6b926 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -295,6 +295,12 @@
 	case IXGBE_DEV_ID_X550EM_A_KR_L:
 		hw->phy.type = ixgbe_phy_x550em_kr;
 		break;
+	case IXGBE_DEV_ID_X550EM_A_10G_T:
+		if (hw->bus.lan_id)
+			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM;
+		else
+			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
+		/* Fallthrough */
 	case IXGBE_DEV_ID_X550EM_X_1G_T:
 	case IXGBE_DEV_ID_X550EM_X_10G_T:
 		return ixgbe_identify_phy_generic(hw);
@@ -1453,7 +1459,7 @@
 	/* Configure internal PHY for KR/KX. */
 	ixgbe_setup_kr_speed_x550em(hw, speed);
 
-	if (!hw->phy.mdio.prtad || hw->phy.mdio.prtad == 0xFFFF)
+	if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE)
 		return IXGBE_ERR_PHY_ADDR_INVALID;
 
 	/* Get external PHY device id */
@@ -2114,6 +2120,50 @@
 	return ixgbe_enable_lasi_ext_t_x550em(hw);
 }
 
+/**
+ *  ixgbe_led_on_t_x550em - Turns on the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @led_idx: led number to turn on
+ **/
+static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+{
+	u16 phy_data;
+
+	if (led_idx >= IXGBE_X557_MAX_LED_INDEX)
+		return IXGBE_ERR_PARAM;
+
+	/* To turn on the LED, set mode to ON. */
+	hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+			     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+	phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK;
+	hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+			      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+
+	return 0;
+}
+
+/**
+ *  ixgbe_led_off_t_x550em - Turns off the software controllable LEDs.
+ *  @hw: pointer to hardware structure
+ *  @led_idx: led number to turn off
+ **/
+static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
+{
+	u16 phy_data;
+
+	if (led_idx >= IXGBE_X557_MAX_LED_INDEX)
+		return IXGBE_ERR_PARAM;
+
+	/* To turn on the LED, set mode to ON. */
+	hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+			     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+	phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK;
+	hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
+			      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+
+	return 0;
+}
+
 /** ixgbe_get_lcd_x550em - Determine lowest common denominator
  *  @hw: pointer to hardware structure
  *  @lcd_speed: pointer to lowest common link speed
@@ -2344,18 +2394,12 @@
 	/* If X552 (X550EM_a) and MDIO is connected to external PHY, then set
 	 * PHY address. This register field was has only been used for X552.
 	 */
-	if (!hw->phy.nw_mng_if_sel) {
-		if (hw->mac.type == ixgbe_mac_x550em_a) {
-			struct ixgbe_adapter *adapter = hw->back;
-
-			e_warn(drv, "nw_mng_if_sel not set\n");
-		}
-		return;
+	if (hw->mac.type == ixgbe_mac_x550em_a &&
+	    hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) {
+		hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel &
+				      IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+				     IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
 	}
-
-	hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel &
-			      IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
-			     IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
 }
 
 /** ixgbe_init_phy_ops_X550em - PHY/SFP specific init
@@ -2456,6 +2500,7 @@
 		break;
 	case IXGBE_DEV_ID_X550EM_X_1G_T:
 	case IXGBE_DEV_ID_X550EM_X_10G_T:
+	case IXGBE_DEV_ID_X550EM_A_10G_T:
 		media_type = ixgbe_media_type_copper;
 		break;
 	default:
@@ -2514,6 +2559,9 @@
 
 	switch (hw->device_id) {
 	case IXGBE_DEV_ID_X550EM_X_10G_T:
+	case IXGBE_DEV_ID_X550EM_A_SGMII:
+	case IXGBE_DEV_ID_X550EM_A_SGMII_L:
+	case IXGBE_DEV_ID_X550EM_A_10G_T:
 	case IXGBE_DEV_ID_X550EM_A_SFP:
 		/* Config MDIO clock speed before the first MDIO PHY access */
 		hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
@@ -2853,8 +2901,6 @@
 	.write_analog_reg8		= NULL, \
 	.set_rxpba			= &ixgbe_set_rxpba_generic, \
 	.check_link			= &ixgbe_check_mac_link_generic, \
-	.led_on				= &ixgbe_led_on_generic, \
-	.led_off			= &ixgbe_led_off_generic, \
 	.blink_led_start		= &ixgbe_blink_led_start_X540, \
 	.blink_led_stop			= &ixgbe_blink_led_stop_X540, \
 	.set_rar			= &ixgbe_set_rar_generic, \
@@ -2886,6 +2932,8 @@
 
 static const struct ixgbe_mac_operations mac_ops_X550 = {
 	X550_COMMON_MAC
+	.led_on			= ixgbe_led_on_generic,
+	.led_off		= ixgbe_led_off_generic,
 	.reset_hw		= &ixgbe_reset_hw_X540,
 	.get_media_type		= &ixgbe_get_media_type_X540,
 	.get_san_mac_addr	= &ixgbe_get_san_mac_addr_generic,
@@ -2904,6 +2952,8 @@
 
 static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
 	X550_COMMON_MAC
+	.led_on			= ixgbe_led_on_t_x550em,
+	.led_off		= ixgbe_led_off_t_x550em,
 	.reset_hw		= &ixgbe_reset_hw_X550em,
 	.get_media_type		= &ixgbe_get_media_type_X550em,
 	.get_san_mac_addr	= NULL,
@@ -2922,6 +2972,8 @@
 
 static struct ixgbe_mac_operations mac_ops_x550em_a = {
 	X550_COMMON_MAC
+	.led_on			= ixgbe_led_on_t_x550em,
+	.led_off		= ixgbe_led_off_t_x550em,
 	.reset_hw		= ixgbe_reset_hw_X550em,
 	.get_media_type		= ixgbe_get_media_type_X550em,
 	.get_san_mac_addr	= NULL,
@@ -2997,6 +3049,8 @@
 	.identify		= &ixgbe_identify_phy_x550em,
 	.read_reg		= &ixgbe_read_phy_reg_x550a,
 	.write_reg		= &ixgbe_write_phy_reg_x550a,
+	.read_reg_mdi		= &ixgbe_read_phy_reg_mdi,
+	.write_reg_mdi		= &ixgbe_write_phy_reg_mdi,
 };
 
 static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index be52f59..5639fbe 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -502,12 +502,9 @@
 void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter);
 void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter);
 
-#ifdef DEBUG
-char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw);
-#define hw_dbg(hw, format, arg...) \
-	printk(KERN_DEBUG "%s: " format, ixgbevf_get_hw_dev_name(hw), ##arg)
-#else
-#define hw_dbg(hw, format, arg...) do {} while (0)
-#endif
+#define ixgbevf_hw_to_netdev(hw) \
+	(((struct ixgbevf_adapter *)(hw)->back)->netdev)
 
+#define hw_dbg(hw, format, arg...) \
+	netdev_dbg(ixgbevf_hw_to_netdev(hw), format, ## arg)
 #endif /* _IXGBEVF_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index d9d6616..7eaac32 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1612,7 +1612,7 @@
 		txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(reg_idx));
 	}  while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE));
 	if (!wait_loop)
-		pr_err("Could not enable Tx Queue %d\n", reg_idx);
+		hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx);
 }
 
 /**
@@ -1810,8 +1810,10 @@
 	if (hw->mac.type >= ixgbe_mac_X550_vf)
 		ixgbevf_setup_vfmrqc(adapter);
 
+	spin_lock_bh(&adapter->mbx_lock);
 	/* notify the PF of our intent to use this size of frame */
 	ret = hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN);
+	spin_unlock_bh(&adapter->mbx_lock);
 	if (ret)
 		dev_err(&adapter->pdev->dev,
 			"Failed to set MTU at %d\n", netdev->mtu);
@@ -2993,6 +2995,7 @@
  **/
 int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring)
 {
+	struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev);
 	int size;
 
 	size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count;
@@ -3757,8 +3760,10 @@
 	if ((new_mtu < 68) || (max_frame > max_possible_frame))
 		return -EINVAL;
 
+	spin_lock_bh(&adapter->mbx_lock);
 	/* notify the PF of our intent to use this size of frame */
 	ret = hw->mac.ops.set_rlpml(hw, max_frame);
+	spin_unlock_bh(&adapter->mbx_lock);
 	if (ret)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index a52f70e..d46ba1d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -284,7 +284,8 @@
 	if (addr)
 		ether_addr_copy(msg_addr, addr);
 
-	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3);
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					     sizeof(msgbuf) / sizeof(u32));
 	if (!ret_val) {
 		msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -441,7 +442,8 @@
 	msgbuf[0] = IXGBE_VF_SET_MAC_ADDR;
 	ether_addr_copy(msg_addr, addr);
 
-	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					     sizeof(msgbuf) / sizeof(u32));
 
 	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -551,7 +553,8 @@
 	msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE;
 	msgbuf[1] = xcast_mode;
 
-	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					 sizeof(msgbuf) / sizeof(u32));
 	if (err)
 		return err;
 
@@ -588,7 +591,8 @@
 	/* Setting the 8 bit field MSG INFO to TRUE indicates "add" */
 	msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT;
 
-	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					 sizeof(msgbuf) / sizeof(u32));
 	if (err)
 		goto mbx_err;
 
@@ -791,7 +795,8 @@
 	msgbuf[0] = IXGBE_VF_SET_LPE;
 	msgbuf[1] = max_size;
 
-	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+	ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf,
+					     sizeof(msgbuf) / sizeof(u32));
 	if (ret_val)
 		return ret_val;
 	if ((msgbuf[0] & IXGBE_VF_SET_LPE) &&
@@ -837,7 +842,8 @@
 	msg[1] = api;
 	msg[2] = 0;
 
-	err = ixgbevf_write_msg_read_ack(hw, msg, msg, 3);
+	err = ixgbevf_write_msg_read_ack(hw, msg, msg,
+					 sizeof(msg) / sizeof(u32));
 	if (!err) {
 		msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
@@ -887,7 +893,8 @@
 	msg[0] = IXGBE_VF_GET_QUEUE;
 	msg[1] = msg[2] = msg[3] = msg[4] = 0;
 
-	err = ixgbevf_write_msg_read_ack(hw, msg, msg, 5);
+	err = ixgbevf_write_msg_read_ack(hw, msg, msg,
+					 sizeof(msg) / sizeof(u32));
 	if (!err) {
 		msg[0] &= ~IXGBE_VT_MSGTYPE_CTS;
 
diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c
index 8982c88..a0d1b08 100644
--- a/drivers/net/ethernet/marvell/mvmdio.c
+++ b/drivers/net/ethernet/marvell/mvmdio.c
@@ -211,8 +211,7 @@
 	dev->regs = devm_ioremap(&pdev->dev, r->start, resource_size(r));
 	if (!dev->regs) {
 		dev_err(&pdev->dev, "Unable to remap SMI register\n");
-		ret = -ENODEV;
-		goto out_mdio;
+		return -ENODEV;
 	}
 
 	init_waitqueue_head(&dev->smi_busy_wait);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index d41c28d..5cb07c2 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -382,7 +382,8 @@
 	struct mvneta_rx_queue *rxqs;
 	struct mvneta_tx_queue *txqs;
 	struct net_device *dev;
-	struct notifier_block cpu_notifier;
+	struct hlist_node node_online;
+	struct hlist_node node_dead;
 	int rxq_def;
 	/* Protect the access to the percpu interrupt registers,
 	 * ensuring that the configuration remains coherent.
@@ -399,7 +400,6 @@
 	u16 rx_ring_size;
 
 	struct mii_bus *mii_bus;
-	struct phy_device *phy_dev;
 	phy_interface_t phy_interface;
 	struct device_node *phy_node;
 	unsigned int link;
@@ -574,6 +574,7 @@
 	int next_desc_to_proc;
 };
 
+static enum cpuhp_state online_hpstate;
 /* The hardware supports eight (8) rx queues, but we are only allowing
  * the first one to be used. Therefore, let's just allocate one queue.
  */
@@ -635,8 +636,9 @@
 }
 
 /* Get System Network Statistics */
-struct rtnl_link_stats64 *mvneta_get_stats64(struct net_device *dev,
-					     struct rtnl_link_stats64 *stats)
+static struct rtnl_link_stats64 *
+mvneta_get_stats64(struct net_device *dev,
+		   struct rtnl_link_stats64 *stats)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 	unsigned int start;
@@ -2651,6 +2653,7 @@
 	u32 cause_rx_tx;
 	int rx_queue;
 	struct mvneta_port *pp = netdev_priv(napi->dev);
+	struct net_device *ndev = pp->dev;
 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
 	if (!netif_running(pp->dev)) {
@@ -2668,7 +2671,7 @@
 				(MVNETA_CAUSE_PHY_STATUS_CHANGE |
 				 MVNETA_CAUSE_LINK_CHANGE |
 				 MVNETA_CAUSE_PSC_SYNC_CHANGE))) {
-			mvneta_fixed_link_update(pp, pp->phy_dev);
+			mvneta_fixed_link_update(pp, ndev->phydev);
 		}
 	}
 
@@ -2963,6 +2966,7 @@
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
 	int cpu;
+	struct net_device *ndev = pp->dev;
 
 	mvneta_max_rx_size_set(pp, pp->pkt_size);
 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
@@ -2985,15 +2989,16 @@
 		    MVNETA_CAUSE_LINK_CHANGE |
 		    MVNETA_CAUSE_PSC_SYNC_CHANGE);
 
-	phy_start(pp->phy_dev);
+	phy_start(ndev->phydev);
 	netif_tx_start_all_queues(pp->dev);
 }
 
 static void mvneta_stop_dev(struct mvneta_port *pp)
 {
 	unsigned int cpu;
+	struct net_device *ndev = pp->dev;
 
-	phy_stop(pp->phy_dev);
+	phy_stop(ndev->phydev);
 
 	for_each_online_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
@@ -3166,7 +3171,7 @@
 static void mvneta_adjust_link(struct net_device *ndev)
 {
 	struct mvneta_port *pp = netdev_priv(ndev);
-	struct phy_device *phydev = pp->phy_dev;
+	struct phy_device *phydev = ndev->phydev;
 	int status_change = 0;
 
 	if (phydev->link) {
@@ -3244,7 +3249,6 @@
 	phy_dev->supported &= PHY_GBIT_FEATURES;
 	phy_dev->advertising = phy_dev->supported;
 
-	pp->phy_dev = phy_dev;
 	pp->link    = 0;
 	pp->duplex  = 0;
 	pp->speed   = 0;
@@ -3254,8 +3258,9 @@
 
 static void mvneta_mdio_remove(struct mvneta_port *pp)
 {
-	phy_disconnect(pp->phy_dev);
-	pp->phy_dev = NULL;
+	struct net_device *ndev = pp->dev;
+
+	phy_disconnect(ndev->phydev);
 }
 
 /* Electing a CPU must be done in an atomic way: it should be done
@@ -3311,101 +3316,104 @@
 	}
 };
 
-static int mvneta_percpu_notifier(struct notifier_block *nfb,
-				  unsigned long action, void *hcpu)
+static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node)
 {
-	struct mvneta_port *pp = container_of(nfb, struct mvneta_port,
-					      cpu_notifier);
-	int cpu = (unsigned long)hcpu, other_cpu;
+	int other_cpu;
+	struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+						  node_online);
 	struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-		spin_lock(&pp->lock);
-		/* Configuring the driver for a new CPU while the
-		 * driver is stopping is racy, so just avoid it.
-		 */
-		if (pp->is_stopped) {
-			spin_unlock(&pp->lock);
-			break;
+
+	spin_lock(&pp->lock);
+	/*
+	 * Configuring the driver for a new CPU while the driver is
+	 * stopping is racy, so just avoid it.
+	 */
+	if (pp->is_stopped) {
+		spin_unlock(&pp->lock);
+		return 0;
+	}
+	netif_tx_stop_all_queues(pp->dev);
+
+	/*
+	 * We have to synchronise on tha napi of each CPU except the one
+	 * just being woken up
+	 */
+	for_each_online_cpu(other_cpu) {
+		if (other_cpu != cpu) {
+			struct mvneta_pcpu_port *other_port =
+				per_cpu_ptr(pp->ports, other_cpu);
+
+			napi_synchronize(&other_port->napi);
 		}
-		netif_tx_stop_all_queues(pp->dev);
-
-		/* We have to synchronise on tha napi of each CPU
-		 * except the one just being waked up
-		 */
-		for_each_online_cpu(other_cpu) {
-			if (other_cpu != cpu) {
-				struct mvneta_pcpu_port *other_port =
-					per_cpu_ptr(pp->ports, other_cpu);
-
-				napi_synchronize(&other_port->napi);
-			}
-		}
-
-		/* Mask all ethernet port interrupts */
-		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
-		napi_enable(&port->napi);
-
-
-		/* Enable per-CPU interrupts on the CPU that is
-		 * brought up.
-		 */
-		mvneta_percpu_enable(pp);
-
-		/* Enable per-CPU interrupt on the one CPU we care
-		 * about.
-		 */
-		mvneta_percpu_elect(pp);
-
-		/* Unmask all ethernet port interrupts */
-		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
-		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
-			MVNETA_CAUSE_PHY_STATUS_CHANGE |
-			MVNETA_CAUSE_LINK_CHANGE |
-			MVNETA_CAUSE_PSC_SYNC_CHANGE);
-		netif_tx_start_all_queues(pp->dev);
-		spin_unlock(&pp->lock);
-		break;
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		netif_tx_stop_all_queues(pp->dev);
-		/* Thanks to this lock we are sure that any pending
-		 * cpu election is done
-		 */
-		spin_lock(&pp->lock);
-		/* Mask all ethernet port interrupts */
-		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
-		spin_unlock(&pp->lock);
-
-		napi_synchronize(&port->napi);
-		napi_disable(&port->napi);
-		/* Disable per-CPU interrupts on the CPU that is
-		 * brought down.
-		 */
-		mvneta_percpu_disable(pp);
-
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		/* Check if a new CPU must be elected now this on is down */
-		spin_lock(&pp->lock);
-		mvneta_percpu_elect(pp);
-		spin_unlock(&pp->lock);
-		/* Unmask all ethernet port interrupts */
-		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
-		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
-			MVNETA_CAUSE_PHY_STATUS_CHANGE |
-			MVNETA_CAUSE_LINK_CHANGE |
-			MVNETA_CAUSE_PSC_SYNC_CHANGE);
-		netif_tx_start_all_queues(pp->dev);
-		break;
 	}
 
-	return NOTIFY_OK;
+	/* Mask all ethernet port interrupts */
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+	napi_enable(&port->napi);
+
+	/*
+	 * Enable per-CPU interrupts on the CPU that is
+	 * brought up.
+	 */
+	mvneta_percpu_enable(pp);
+
+	/*
+	 * Enable per-CPU interrupt on the one CPU we care
+	 * about.
+	 */
+	mvneta_percpu_elect(pp);
+
+	/* Unmask all ethernet port interrupts */
+	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
+		    MVNETA_CAUSE_LINK_CHANGE |
+		    MVNETA_CAUSE_PSC_SYNC_CHANGE);
+	netif_tx_start_all_queues(pp->dev);
+	spin_unlock(&pp->lock);
+	return 0;
+}
+
+static int mvneta_cpu_down_prepare(unsigned int cpu, struct hlist_node *node)
+{
+	struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+						  node_online);
+	struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+
+	/*
+	 * Thanks to this lock we are sure that any pending cpu election is
+	 * done.
+	 */
+	spin_lock(&pp->lock);
+	/* Mask all ethernet port interrupts */
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+	spin_unlock(&pp->lock);
+
+	napi_synchronize(&port->napi);
+	napi_disable(&port->napi);
+	/* Disable per-CPU interrupts on the CPU that is brought down. */
+	mvneta_percpu_disable(pp);
+	return 0;
+}
+
+static int mvneta_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+	struct mvneta_port *pp = hlist_entry_safe(node, struct mvneta_port,
+						  node_dead);
+
+	/* Check if a new CPU must be elected now this on is down */
+	spin_lock(&pp->lock);
+	mvneta_percpu_elect(pp);
+	spin_unlock(&pp->lock);
+	/* Unmask all ethernet port interrupts */
+	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
+		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
+		    MVNETA_CAUSE_LINK_CHANGE |
+		    MVNETA_CAUSE_PSC_SYNC_CHANGE);
+	netif_tx_start_all_queues(pp->dev);
+	return 0;
 }
 
 static int mvneta_open(struct net_device *dev)
@@ -3442,7 +3450,15 @@
 	/* Register a CPU notifier to handle the case where our CPU
 	 * might be taken offline.
 	 */
-	register_cpu_notifier(&pp->cpu_notifier);
+	ret = cpuhp_state_add_instance_nocalls(online_hpstate,
+					       &pp->node_online);
+	if (ret)
+		goto err_free_irq;
+
+	ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+					       &pp->node_dead);
+	if (ret)
+		goto err_free_online_hp;
 
 	/* In default link is down */
 	netif_carrier_off(pp->dev);
@@ -3450,15 +3466,19 @@
 	ret = mvneta_mdio_probe(pp);
 	if (ret < 0) {
 		netdev_err(dev, "cannot probe MDIO bus\n");
-		goto err_free_irq;
+		goto err_free_dead_hp;
 	}
 
 	mvneta_start_dev(pp);
 
 	return 0;
 
+err_free_dead_hp:
+	cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+					    &pp->node_dead);
+err_free_online_hp:
+	cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
 err_free_irq:
-	unregister_cpu_notifier(&pp->cpu_notifier);
 	on_each_cpu(mvneta_percpu_disable, pp, true);
 	free_percpu_irq(pp->dev->irq, pp->ports);
 err_cleanup_txqs:
@@ -3484,7 +3504,10 @@
 
 	mvneta_stop_dev(pp);
 	mvneta_mdio_remove(pp);
-	unregister_cpu_notifier(&pp->cpu_notifier);
+
+	cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
+	cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+					    &pp->node_dead);
 	on_each_cpu(mvneta_percpu_disable, pp, true);
 	free_percpu_irq(dev->irq, pp->ports);
 	mvneta_cleanup_rxqs(pp);
@@ -3495,42 +3518,31 @@
 
 static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct mvneta_port *pp = netdev_priv(dev);
-
-	if (!pp->phy_dev)
+	if (!dev->phydev)
 		return -ENOTSUPP;
 
-	return phy_mii_ioctl(pp->phy_dev, ifr, cmd);
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 /* Ethtool methods */
 
-/* Get settings (phy address, speed) for ethtools */
-int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+/* Set link ksettings (phy address, speed) for ethtools */
+static int
+mvneta_ethtool_set_link_ksettings(struct net_device *ndev,
+				  const struct ethtool_link_ksettings *cmd)
 {
-	struct mvneta_port *pp = netdev_priv(dev);
-
-	if (!pp->phy_dev)
-		return -ENODEV;
-
-	return phy_ethtool_gset(pp->phy_dev, cmd);
-}
-
-/* Set settings (phy address, speed) for ethtools */
-int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct mvneta_port *pp = netdev_priv(dev);
-	struct phy_device *phydev = pp->phy_dev;
+	struct mvneta_port *pp = netdev_priv(ndev);
+	struct phy_device *phydev = ndev->phydev;
 
 	if (!phydev)
 		return -ENODEV;
 
-	if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
+	if ((cmd->base.autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
 		u32 val;
 
-		mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE);
+		mvneta_set_autoneg(pp, cmd->base.autoneg == AUTONEG_ENABLE);
 
-		if (cmd->autoneg == AUTONEG_DISABLE) {
+		if (cmd->base.autoneg == AUTONEG_DISABLE) {
 			val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
 			val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
 				 MVNETA_GMAC_CONFIG_GMII_SPEED |
@@ -3547,17 +3559,17 @@
 			mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
 		}
 
-		pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE);
+		pp->use_inband_status = (cmd->base.autoneg == AUTONEG_ENABLE);
 		netdev_info(pp->dev, "autoneg status set to %i\n",
 			    pp->use_inband_status);
 
-		if (netif_running(dev)) {
+		if (netif_running(ndev)) {
 			mvneta_port_down(pp);
 			mvneta_port_up(pp);
 		}
 	}
 
-	return phy_ethtool_sset(pp->phy_dev, cmd);
+	return phy_ethtool_ksettings_set(ndev->phydev, cmd);
 }
 
 /* Set interrupt coalescing for ethtools */
@@ -3821,8 +3833,6 @@
 
 const struct ethtool_ops mvneta_eth_tool_ops = {
 	.get_link       = ethtool_op_get_link,
-	.get_settings   = mvneta_ethtool_get_settings,
-	.set_settings   = mvneta_ethtool_set_settings,
 	.set_coalesce   = mvneta_ethtool_set_coalesce,
 	.get_coalesce   = mvneta_ethtool_get_coalesce,
 	.get_drvinfo    = mvneta_ethtool_get_drvinfo,
@@ -3835,6 +3845,8 @@
 	.get_rxnfc	= mvneta_ethtool_get_rxnfc,
 	.get_rxfh	= mvneta_ethtool_get_rxfh,
 	.set_rxfh	= mvneta_ethtool_set_rxfh,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = mvneta_ethtool_set_link_ksettings,
 };
 
 /* Initialize hw */
@@ -4024,7 +4036,6 @@
 	err = of_property_read_string(dn, "managed", &managed);
 	pp->use_inband_status = (err == 0 &&
 				 strcmp(managed, "in-band-status") == 0);
-	pp->cpu_notifier.notifier_call = mvneta_percpu_notifier;
 
 	pp->rxq_def = rxq_def;
 
@@ -4227,7 +4238,42 @@
 	},
 };
 
-module_platform_driver(mvneta_driver);
+static int __init mvneta_driver_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/mvmeta:online",
+				      mvneta_cpu_online,
+				      mvneta_cpu_down_prepare);
+	if (ret < 0)
+		goto out;
+	online_hpstate = ret;
+	ret = cpuhp_setup_state_multi(CPUHP_NET_MVNETA_DEAD, "net/mvneta:dead",
+				      NULL, mvneta_cpu_dead);
+	if (ret)
+		goto err_dead;
+
+	ret = platform_driver_register(&mvneta_driver);
+	if (ret)
+		goto err;
+	return 0;
+
+err:
+	cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD);
+err_dead:
+	cpuhp_remove_multi_state(online_hpstate);
+out:
+	return ret;
+}
+module_init(mvneta_driver_init);
+
+static void __exit mvneta_driver_exit(void)
+{
+	platform_driver_unregister(&mvneta_driver);
+	cpuhp_remove_multi_state(CPUHP_NET_MVNETA_DEAD);
+	cpuhp_remove_multi_state(online_hpstate);
+}
+module_exit(mvneta_driver_exit);
 
 MODULE_DESCRIPTION("Marvell NETA Ethernet Driver - www.marvell.com");
 MODULE_AUTHOR("Rami Rosen <rosenr@marvell.com>, Thomas Petazzoni <thomas.petazzoni@free-electrons.com>");
diff --git a/drivers/net/ethernet/marvell/mvneta_bm.h b/drivers/net/ethernet/marvell/mvneta_bm.h
index e74fd44..a32de43 100644
--- a/drivers/net/ethernet/marvell/mvneta_bm.h
+++ b/drivers/net/ethernet/marvell/mvneta_bm.h
@@ -133,7 +133,7 @@
 void *mvneta_frag_alloc(unsigned int frag_size);
 void mvneta_frag_free(unsigned int frag_size, void *data);
 
-#if defined(CONFIG_MVNETA_BM) || defined(CONFIG_MVNETA_BM_MODULE)
+#if IS_ENABLED(CONFIG_MVNETA_BM)
 void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
 			    struct mvneta_bm_pool *bm_pool, u8 port_map);
 void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 467138b4..f05ea56 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -3070,7 +3070,7 @@
 			goto done;
 	}
 
-	napi_complete(napi);
+	napi_complete_done(napi, work_done);
 	sky2_read32(hw, B0_Y2_SP_LISR);
 done:
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index b57ae3a..ad4ab97 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -18,6 +18,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/clk.h>
+#include <linux/pm_runtime.h>
 #include <linux/if_vlan.h>
 #include <linux/reset.h>
 #include <linux/tcp.h>
@@ -50,6 +51,10 @@
 	MTK_ETHTOOL_STAT(rx_flow_control_packets),
 };
 
+static const char * const mtk_clks_source_name[] = {
+	"ethif", "esw", "gp1", "gp2", "trgpll"
+};
+
 void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg)
 {
 	__raw_writel(val, eth->base + reg);
@@ -130,6 +135,33 @@
 	return _mtk_mdio_read(eth, phy_addr, phy_reg);
 }
 
+static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed)
+{
+	u32 val;
+	int ret;
+
+	val = (speed == SPEED_1000) ?
+		INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100;
+	mtk_w32(eth, val, INTF_MODE);
+
+	regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0,
+			   ETHSYS_TRGMII_CLK_SEL362_5,
+			   ETHSYS_TRGMII_CLK_SEL362_5);
+
+	val = (speed == SPEED_1000) ? 250000000 : 500000000;
+	ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val);
+	if (ret)
+		dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret);
+
+	val = (speed == SPEED_1000) ?
+		RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100;
+	mtk_w32(eth, val, TRGMII_RCK_CTRL);
+
+	val = (speed == SPEED_1000) ?
+		TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100;
+	mtk_w32(eth, val, TRGMII_TCK_CTRL);
+}
+
 static void mtk_phy_link_adjust(struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -140,7 +172,10 @@
 		  MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
 		  MAC_MCR_BACKPR_EN;
 
-	switch (mac->phy_dev->speed) {
+	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+		return;
+
+	switch (dev->phydev->speed) {
 	case SPEED_1000:
 		mcr |= MAC_MCR_SPEED_1000;
 		break;
@@ -149,20 +184,23 @@
 		break;
 	};
 
-	if (mac->phy_dev->link)
+	if (mac->id == 0 && !mac->trgmii)
+		mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed);
+
+	if (dev->phydev->link)
 		mcr |= MAC_MCR_FORCE_LINK;
 
-	if (mac->phy_dev->duplex) {
+	if (dev->phydev->duplex) {
 		mcr |= MAC_MCR_FORCE_DPX;
 
-		if (mac->phy_dev->pause)
+		if (dev->phydev->pause)
 			rmt_adv = LPA_PAUSE_CAP;
-		if (mac->phy_dev->asym_pause)
+		if (dev->phydev->asym_pause)
 			rmt_adv |= LPA_PAUSE_ASYM;
 
-		if (mac->phy_dev->advertising & ADVERTISED_Pause)
+		if (dev->phydev->advertising & ADVERTISED_Pause)
 			lcl_adv |= ADVERTISE_PAUSE_CAP;
-		if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause)
+		if (dev->phydev->advertising & ADVERTISED_Asym_Pause)
 			lcl_adv |= ADVERTISE_PAUSE_ASYM;
 
 		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
@@ -179,7 +217,7 @@
 
 	mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
 
-	if (mac->phy_dev->link)
+	if (dev->phydev->link)
 		netif_carrier_on(dev);
 	else
 		netif_carrier_off(dev);
@@ -188,17 +226,9 @@
 static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac,
 				struct device_node *phy_node)
 {
-	const __be32 *_addr = NULL;
 	struct phy_device *phydev;
-	int phy_mode, addr;
+	int phy_mode;
 
-	_addr = of_get_property(phy_node, "reg", NULL);
-
-	if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) {
-		pr_err("%s: invalid phy address\n", phy_node->name);
-		return -EINVAL;
-	}
-	addr = be32_to_cpu(*_addr);
 	phy_mode = of_get_phy_mode(phy_node);
 	if (phy_mode < 0) {
 		dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode);
@@ -217,17 +247,17 @@
 		 mac->id, phydev_name(phydev), phydev->phy_id,
 		 phydev->drv->name);
 
-	mac->phy_dev = phydev;
-
 	return 0;
 }
 
-static int mtk_phy_connect(struct mtk_mac *mac)
+static int mtk_phy_connect(struct net_device *dev)
 {
-	struct mtk_eth *eth = mac->hw;
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth;
 	struct device_node *np;
-	u32 val, ge_mode;
+	u32 val;
 
+	eth = mac->hw;
 	np = of_parse_phandle(mac->of_node, "phy-handle", 0);
 	if (!np && of_phy_is_fixed_link(mac->of_node))
 		if (!of_phy_register_fixed_link(mac->of_node))
@@ -236,46 +266,67 @@
 		return -ENODEV;
 
 	switch (of_get_phy_mode(np)) {
+	case PHY_INTERFACE_MODE_TRGMII:
+		mac->trgmii = true;
 	case PHY_INTERFACE_MODE_RGMII_TXID:
 	case PHY_INTERFACE_MODE_RGMII_RXID:
 	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII:
-		ge_mode = 0;
+		mac->ge_mode = 0;
 		break;
 	case PHY_INTERFACE_MODE_MII:
-		ge_mode = 1;
+		mac->ge_mode = 1;
+		break;
+	case PHY_INTERFACE_MODE_REVMII:
+		mac->ge_mode = 2;
 		break;
 	case PHY_INTERFACE_MODE_RMII:
-		ge_mode = 2;
+		if (!mac->id)
+			goto err_phy;
+		mac->ge_mode = 3;
 		break;
 	default:
-		dev_err(eth->dev, "invalid phy_mode\n");
-		return -1;
+		goto err_phy;
 	}
 
 	/* put the gmac into the right mode */
 	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
 	val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
-	val |= SYSCFG0_GE_MODE(ge_mode, mac->id);
+	val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id);
 	regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
 
-	mtk_phy_connect_node(eth, mac, np);
-	mac->phy_dev->autoneg = AUTONEG_ENABLE;
-	mac->phy_dev->speed = 0;
-	mac->phy_dev->duplex = 0;
-	mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
+	/* couple phydev to net_device */
+	if (mtk_phy_connect_node(eth, mac, np))
+		goto err_phy;
+
+	dev->phydev->autoneg = AUTONEG_ENABLE;
+	dev->phydev->speed = 0;
+	dev->phydev->duplex = 0;
+
+	if (of_phy_is_fixed_link(mac->of_node))
+		dev->phydev->supported |=
+		SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
+	dev->phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
 				   SUPPORTED_Asym_Pause;
-	mac->phy_dev->advertising = mac->phy_dev->supported |
+	dev->phydev->advertising = dev->phydev->supported |
 				    ADVERTISED_Autoneg;
-	phy_start_aneg(mac->phy_dev);
+	phy_start_aneg(dev->phydev);
+
+	of_node_put(np);
 
 	return 0;
+
+err_phy:
+	of_node_put(np);
+	dev_err(eth->dev, "%s: invalid phy\n", __func__);
+	return -EINVAL;
 }
 
 static int mtk_mdio_init(struct mtk_eth *eth)
 {
 	struct device_node *mii_np;
-	int err;
+	int ret;
 
 	mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus");
 	if (!mii_np) {
@@ -284,13 +335,13 @@
 	}
 
 	if (!of_device_is_available(mii_np)) {
-		err = 0;
+		ret = -ENODEV;
 		goto err_put_node;
 	}
 
-	eth->mii_bus = mdiobus_alloc();
+	eth->mii_bus = devm_mdiobus_alloc(eth->dev);
 	if (!eth->mii_bus) {
-		err = -ENOMEM;
+		ret = -ENOMEM;
 		goto err_put_node;
 	}
 
@@ -301,19 +352,11 @@
 	eth->mii_bus->parent = eth->dev;
 
 	snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%s", mii_np->name);
-	err = of_mdiobus_register(eth->mii_bus, mii_np);
-	if (err)
-		goto err_free_bus;
-
-	return 0;
-
-err_free_bus:
-	mdiobus_free(eth->mii_bus);
+	ret = of_mdiobus_register(eth->mii_bus, mii_np);
 
 err_put_node:
 	of_node_put(mii_np);
-	eth->mii_bus = NULL;
-	return err;
+	return ret;
 }
 
 static void mtk_mdio_cleanup(struct mtk_eth *eth)
@@ -322,29 +365,29 @@
 		return;
 
 	mdiobus_unregister(eth->mii_bus);
-	of_node_put(eth->mii_bus->dev.of_node);
-	mdiobus_free(eth->mii_bus);
 }
 
-static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
+static inline void mtk_irq_disable(struct mtk_eth *eth,
+				   unsigned reg, u32 mask)
 {
 	unsigned long flags;
 	u32 val;
 
 	spin_lock_irqsave(&eth->irq_lock, flags);
-	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
-	mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
+	val = mtk_r32(eth, reg);
+	mtk_w32(eth, val & ~mask, reg);
 	spin_unlock_irqrestore(&eth->irq_lock, flags);
 }
 
-static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
+static inline void mtk_irq_enable(struct mtk_eth *eth,
+				  unsigned reg, u32 mask)
 {
 	unsigned long flags;
 	u32 val;
 
 	spin_lock_irqsave(&eth->irq_lock, flags);
-	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
-	mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
+	val = mtk_r32(eth, reg);
+	mtk_w32(eth, val | mask, reg);
 	spin_unlock_irqrestore(&eth->irq_lock, flags);
 }
 
@@ -353,18 +396,20 @@
 	int ret = eth_mac_addr(dev, p);
 	struct mtk_mac *mac = netdev_priv(dev);
 	const char *macaddr = dev->dev_addr;
-	unsigned long flags;
 
 	if (ret)
 		return ret;
 
-	spin_lock_irqsave(&mac->hw->page_lock, flags);
+	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+		return -EBUSY;
+
+	spin_lock_bh(&mac->hw->page_lock);
 	mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
 		MTK_GDMA_MAC_ADRH(mac->id));
 	mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
 		(macaddr[4] << 8) | macaddr[5],
 		MTK_GDMA_MAC_ADRL(mac->id));
-	spin_unlock_irqrestore(&mac->hw->page_lock, flags);
+	spin_unlock_bh(&mac->hw->page_lock);
 
 	return 0;
 }
@@ -542,15 +587,15 @@
 	return &ring->buf[idx];
 }
 
-static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf)
+static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf)
 {
 	if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
-		dma_unmap_single(dev,
+		dma_unmap_single(eth->dev,
 				 dma_unmap_addr(tx_buf, dma_addr0),
 				 dma_unmap_len(tx_buf, dma_len0),
 				 DMA_TO_DEVICE);
 	} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
-		dma_unmap_page(dev,
+		dma_unmap_page(eth->dev,
 			       dma_unmap_addr(tx_buf, dma_addr0),
 			       dma_unmap_len(tx_buf, dma_len0),
 			       DMA_TO_DEVICE);
@@ -572,14 +617,15 @@
 	dma_addr_t mapped_addr;
 	unsigned int nr_frags;
 	int i, n_desc = 1;
-	u32 txd4 = 0;
+	u32 txd4 = 0, fport;
 
 	itxd = ring->next_free;
 	if (itxd == ring->last_free)
 		return -ENOMEM;
 
 	/* set the forward port */
-	txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT;
+	fport = (mac->id + 1) << TX_DMA_FPORT_SHIFT;
+	txd4 |= fport;
 
 	tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 	memset(tx_buf, 0, sizeof(*tx_buf));
@@ -595,9 +641,9 @@
 	if (skb_vlan_tag_present(skb))
 		txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb);
 
-	mapped_addr = dma_map_single(&dev->dev, skb->data,
+	mapped_addr = dma_map_single(eth->dev, skb->data,
 				     skb_headlen(skb), DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+	if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
 		return -ENOMEM;
 
 	WRITE_ONCE(itxd->txd1, mapped_addr);
@@ -623,10 +669,10 @@
 
 			n_desc++;
 			frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
-			mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset,
+			mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
 						       frag_map_size,
 						       DMA_TO_DEVICE);
-			if (unlikely(dma_mapping_error(&dev->dev, mapped_addr)))
+			if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
 				goto err_dma;
 
 			if (i == nr_frags - 1 &&
@@ -637,7 +683,7 @@
 			WRITE_ONCE(txd->txd3, (TX_DMA_SWC |
 					       TX_DMA_PLEN0(frag_map_size) |
 					       last_frag * TX_DMA_LS0));
-			WRITE_ONCE(txd->txd4, 0);
+			WRITE_ONCE(txd->txd4, fport);
 
 			tx_buf->skb = (struct sk_buff *)MTK_DMA_DUMMY_DESC;
 			tx_buf = mtk_desc_to_tx_buf(ring, txd);
@@ -679,7 +725,7 @@
 		tx_buf = mtk_desc_to_tx_buf(ring, itxd);
 
 		/* unmap dma */
-		mtk_tx_unmap(&dev->dev, tx_buf);
+		mtk_tx_unmap(eth, tx_buf);
 
 		itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU;
 		itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2);
@@ -748,7 +794,6 @@
 	struct mtk_eth *eth = mac->hw;
 	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct net_device_stats *stats = &dev->stats;
-	unsigned long flags;
 	bool gso = false;
 	int tx_num;
 
@@ -756,14 +801,17 @@
 	 * however we have 2 queues running on the same ring so we need to lock
 	 * the ring access
 	 */
-	spin_lock_irqsave(&eth->page_lock, flags);
+	spin_lock(&eth->page_lock);
+
+	if (unlikely(test_bit(MTK_RESETTING, &eth->state)))
+		goto drop;
 
 	tx_num = mtk_cal_txd_req(skb);
 	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
 		mtk_stop_queue(eth);
 		netif_err(eth, tx_queued, dev,
 			  "Tx Ring full when queue awake!\n");
-		spin_unlock_irqrestore(&eth->page_lock, flags);
+		spin_unlock(&eth->page_lock);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -788,22 +836,62 @@
 	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
 		mtk_stop_queue(eth);
 
-	spin_unlock_irqrestore(&eth->page_lock, flags);
+	spin_unlock(&eth->page_lock);
 
 	return NETDEV_TX_OK;
 
 drop:
-	spin_unlock_irqrestore(&eth->page_lock, flags);
+	spin_unlock(&eth->page_lock);
 	stats->tx_dropped++;
 	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
 
+static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth)
+{
+	int i;
+	struct mtk_rx_ring *ring;
+	int idx;
+
+	if (!eth->hwlro)
+		return &eth->rx_ring[0];
+
+	for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
+		ring = &eth->rx_ring[i];
+		idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
+		if (ring->dma[idx].rxd2 & RX_DMA_DONE) {
+			ring->calc_idx_update = true;
+			return ring;
+		}
+	}
+
+	return NULL;
+}
+
+static void mtk_update_rx_cpu_idx(struct mtk_eth *eth)
+{
+	struct mtk_rx_ring *ring;
+	int i;
+
+	if (!eth->hwlro) {
+		ring = &eth->rx_ring[0];
+		mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+	} else {
+		for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
+			ring = &eth->rx_ring[i];
+			if (ring->calc_idx_update) {
+				ring->calc_idx_update = false;
+				mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+			}
+		}
+	}
+}
+
 static int mtk_poll_rx(struct napi_struct *napi, int budget,
 		       struct mtk_eth *eth)
 {
-	struct mtk_rx_ring *ring = &eth->rx_ring;
-	int idx = ring->calc_idx;
+	struct mtk_rx_ring *ring;
+	int idx;
 	struct sk_buff *skb;
 	u8 *data, *new_data;
 	struct mtk_rx_dma *rxd, trxd;
@@ -815,7 +903,11 @@
 		dma_addr_t dma_addr;
 		int mac = 0;
 
-		idx = NEXT_RX_DESP_IDX(idx);
+		ring = mtk_get_rx_ring(eth);
+		if (unlikely(!ring))
+			goto rx_done;
+
+		idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
 		rxd = &ring->dma[idx];
 		data = ring->data[idx];
 
@@ -830,17 +922,20 @@
 
 		netdev = eth->netdev[mac];
 
+		if (unlikely(test_bit(MTK_RESETTING, &eth->state)))
+			goto release_desc;
+
 		/* alloc new buffer */
 		new_data = napi_alloc_frag(ring->frag_size);
 		if (unlikely(!new_data)) {
 			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
-		dma_addr = dma_map_single(&eth->netdev[mac]->dev,
+		dma_addr = dma_map_single(eth->dev,
 					  new_data + NET_SKB_PAD,
 					  ring->buf_size,
 					  DMA_FROM_DEVICE);
-		if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
+		if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
 			skb_free_frag(new_data);
 			netdev->stats.rx_dropped++;
 			goto release_desc;
@@ -849,13 +944,13 @@
 		/* receive data */
 		skb = build_skb(data, ring->frag_size);
 		if (unlikely(!skb)) {
-			put_page(virt_to_head_page(new_data));
+			skb_free_frag(new_data);
 			netdev->stats.rx_dropped++;
 			goto release_desc;
 		}
 		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 
-		dma_unmap_single(&netdev->dev, trxd.rxd1,
+		dma_unmap_single(eth->dev, trxd.rxd1,
 				 ring->buf_size, DMA_FROM_DEVICE);
 		pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
 		skb->dev = netdev;
@@ -879,17 +974,19 @@
 		rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
 
 		ring->calc_idx = idx;
+
+		done++;
+	}
+
+rx_done:
+	if (done) {
 		/* make sure that all changes to the dma ring are flushed before
 		 * we continue
 		 */
 		wmb();
-		mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0);
-		done++;
+		mtk_update_rx_cpu_idx(eth);
 	}
 
-	if (done < budget)
-		mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
-
 	return done;
 }
 
@@ -937,7 +1034,7 @@
 			done[mac]++;
 			budget--;
 		}
-		mtk_tx_unmap(eth->dev, tx_buf);
+		mtk_tx_unmap(eth, tx_buf);
 
 		ring->last_free = desc;
 		atomic_inc(&ring->free_count);
@@ -998,7 +1095,7 @@
 		return budget;
 
 	napi_complete(napi);
-	mtk_irq_enable(eth, MTK_TX_DONE_INT);
+	mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
 
 	return tx_done;
 }
@@ -1008,30 +1105,33 @@
 	struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
 	u32 status, mask;
 	int rx_done = 0;
+	int remain_budget = budget;
 
 	mtk_handle_status_irq(eth);
-	mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
-	rx_done = mtk_poll_rx(napi, budget, eth);
+
+poll_again:
+	mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
+	rx_done = mtk_poll_rx(napi, remain_budget, eth);
 
 	if (unlikely(netif_msg_intr(eth))) {
-		status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-		mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+		status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
+		mask = mtk_r32(eth, MTK_PDMA_INT_MASK);
 		dev_info(eth->dev,
 			 "done rx %d, intr 0x%08x/0x%x\n",
 			 rx_done, status, mask);
 	}
-
-	if (rx_done == budget)
+	if (rx_done == remain_budget)
 		return budget;
 
-	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
-	if (status & MTK_RX_DONE_INT)
-		return budget;
-
+	status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
+	if (status & MTK_RX_DONE_INT) {
+		remain_budget -= rx_done;
+		goto poll_again;
+	}
 	napi_complete(napi);
-	mtk_irq_enable(eth, MTK_RX_DONE_INT);
+	mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
 
-	return rx_done;
+	return rx_done + budget - remain_budget;
 }
 
 static int mtk_tx_alloc(struct mtk_eth *eth)
@@ -1078,6 +1178,7 @@
 	mtk_w32(eth,
 		ring->phys + ((MTK_DMA_SIZE - 1) * sz),
 		MTK_QTX_DRX_PTR);
+	mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
 
 	return 0;
 
@@ -1092,7 +1193,7 @@
 
 	if (ring->buf) {
 		for (i = 0; i < MTK_DMA_SIZE; i++)
-			mtk_tx_unmap(eth->dev, &ring->buf[i]);
+			mtk_tx_unmap(eth, &ring->buf[i]);
 		kfree(ring->buf);
 		ring->buf = NULL;
 	}
@@ -1106,32 +1207,41 @@
 	}
 }
 
-static int mtk_rx_alloc(struct mtk_eth *eth)
+static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
 {
-	struct mtk_rx_ring *ring = &eth->rx_ring;
+	struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
+	int rx_data_len, rx_dma_size;
 	int i;
 
-	ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN);
+	if (rx_flag == MTK_RX_FLAGS_HWLRO) {
+		rx_data_len = MTK_MAX_LRO_RX_LENGTH;
+		rx_dma_size = MTK_HW_LRO_DMA_SIZE;
+	} else {
+		rx_data_len = ETH_DATA_LEN;
+		rx_dma_size = MTK_DMA_SIZE;
+	}
+
+	ring->frag_size = mtk_max_frag_size(rx_data_len);
 	ring->buf_size = mtk_max_buf_size(ring->frag_size);
-	ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data),
+	ring->data = kcalloc(rx_dma_size, sizeof(*ring->data),
 			     GFP_KERNEL);
 	if (!ring->data)
 		return -ENOMEM;
 
-	for (i = 0; i < MTK_DMA_SIZE; i++) {
+	for (i = 0; i < rx_dma_size; i++) {
 		ring->data[i] = netdev_alloc_frag(ring->frag_size);
 		if (!ring->data[i])
 			return -ENOMEM;
 	}
 
 	ring->dma = dma_alloc_coherent(eth->dev,
-				       MTK_DMA_SIZE * sizeof(*ring->dma),
+				       rx_dma_size * sizeof(*ring->dma),
 				       &ring->phys,
 				       GFP_ATOMIC | __GFP_ZERO);
 	if (!ring->dma)
 		return -ENOMEM;
 
-	for (i = 0; i < MTK_DMA_SIZE; i++) {
+	for (i = 0; i < rx_dma_size; i++) {
 		dma_addr_t dma_addr = dma_map_single(eth->dev,
 				ring->data[i] + NET_SKB_PAD,
 				ring->buf_size,
@@ -1142,28 +1252,30 @@
 
 		ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
 	}
-	ring->calc_idx = MTK_DMA_SIZE - 1;
+	ring->dma_size = rx_dma_size;
+	ring->calc_idx_update = false;
+	ring->calc_idx = rx_dma_size - 1;
+	ring->crx_idx_reg = MTK_PRX_CRX_IDX_CFG(ring_no);
 	/* make sure that all changes to the dma ring are flushed before we
 	 * continue
 	 */
 	wmb();
 
-	mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0);
-	mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0);
-	mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0);
-	mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX);
-	mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
+	mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no));
+	mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no));
+	mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+	mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX);
 
 	return 0;
 }
 
-static void mtk_rx_clean(struct mtk_eth *eth)
+static void mtk_rx_clean(struct mtk_eth *eth, int ring_no)
 {
-	struct mtk_rx_ring *ring = &eth->rx_ring;
+	struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
 	int i;
 
 	if (ring->data && ring->dma) {
-		for (i = 0; i < MTK_DMA_SIZE; i++) {
+		for (i = 0; i < ring->dma_size; i++) {
 			if (!ring->data[i])
 				continue;
 			if (!ring->dma[i].rxd1)
@@ -1180,13 +1292,275 @@
 
 	if (ring->dma) {
 		dma_free_coherent(eth->dev,
-				  MTK_DMA_SIZE * sizeof(*ring->dma),
+				  ring->dma_size * sizeof(*ring->dma),
 				  ring->dma,
 				  ring->phys);
 		ring->dma = NULL;
 	}
 }
 
+static int mtk_hwlro_rx_init(struct mtk_eth *eth)
+{
+	int i;
+	u32 ring_ctrl_dw1 = 0, ring_ctrl_dw2 = 0, ring_ctrl_dw3 = 0;
+	u32 lro_ctrl_dw0 = 0, lro_ctrl_dw3 = 0;
+
+	/* set LRO rings to auto-learn modes */
+	ring_ctrl_dw2 |= MTK_RING_AUTO_LERAN_MODE;
+
+	/* validate LRO ring */
+	ring_ctrl_dw2 |= MTK_RING_VLD;
+
+	/* set AGE timer (unit: 20us) */
+	ring_ctrl_dw2 |= MTK_RING_AGE_TIME_H;
+	ring_ctrl_dw1 |= MTK_RING_AGE_TIME_L;
+
+	/* set max AGG timer (unit: 20us) */
+	ring_ctrl_dw2 |= MTK_RING_MAX_AGG_TIME;
+
+	/* set max LRO AGG count */
+	ring_ctrl_dw2 |= MTK_RING_MAX_AGG_CNT_L;
+	ring_ctrl_dw3 |= MTK_RING_MAX_AGG_CNT_H;
+
+	for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) {
+		mtk_w32(eth, ring_ctrl_dw1, MTK_LRO_CTRL_DW1_CFG(i));
+		mtk_w32(eth, ring_ctrl_dw2, MTK_LRO_CTRL_DW2_CFG(i));
+		mtk_w32(eth, ring_ctrl_dw3, MTK_LRO_CTRL_DW3_CFG(i));
+	}
+
+	/* IPv4 checksum update enable */
+	lro_ctrl_dw0 |= MTK_L3_CKS_UPD_EN;
+
+	/* switch priority comparison to packet count mode */
+	lro_ctrl_dw0 |= MTK_LRO_ALT_PKT_CNT_MODE;
+
+	/* bandwidth threshold setting */
+	mtk_w32(eth, MTK_HW_LRO_BW_THRE, MTK_PDMA_LRO_CTRL_DW2);
+
+	/* auto-learn score delta setting */
+	mtk_w32(eth, MTK_HW_LRO_REPLACE_DELTA, MTK_PDMA_LRO_ALT_SCORE_DELTA);
+
+	/* set refresh timer for altering flows to 1 sec. (unit: 20us) */
+	mtk_w32(eth, (MTK_HW_LRO_TIMER_UNIT << 16) | MTK_HW_LRO_REFRESH_TIME,
+		MTK_PDMA_LRO_ALT_REFRESH_TIMER);
+
+	/* set HW LRO mode & the max aggregation count for rx packets */
+	lro_ctrl_dw3 |= MTK_ADMA_MODE | (MTK_HW_LRO_MAX_AGG_CNT & 0xff);
+
+	/* the minimal remaining room of SDL0 in RXD for lro aggregation */
+	lro_ctrl_dw3 |= MTK_LRO_MIN_RXD_SDL;
+
+	/* enable HW LRO */
+	lro_ctrl_dw0 |= MTK_LRO_EN;
+
+	mtk_w32(eth, lro_ctrl_dw3, MTK_PDMA_LRO_CTRL_DW3);
+	mtk_w32(eth, lro_ctrl_dw0, MTK_PDMA_LRO_CTRL_DW0);
+
+	return 0;
+}
+
+static void mtk_hwlro_rx_uninit(struct mtk_eth *eth)
+{
+	int i;
+	u32 val;
+
+	/* relinquish lro rings, flush aggregated packets */
+	mtk_w32(eth, MTK_LRO_RING_RELINQUISH_REQ, MTK_PDMA_LRO_CTRL_DW0);
+
+	/* wait for relinquishments done */
+	for (i = 0; i < 10; i++) {
+		val = mtk_r32(eth, MTK_PDMA_LRO_CTRL_DW0);
+		if (val & MTK_LRO_RING_RELINQUISH_DONE) {
+			msleep(20);
+			continue;
+		}
+		break;
+	}
+
+	/* invalidate lro rings */
+	for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
+		mtk_w32(eth, 0, MTK_LRO_CTRL_DW2_CFG(i));
+
+	/* disable HW LRO */
+	mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0);
+}
+
+static void mtk_hwlro_val_ipaddr(struct mtk_eth *eth, int idx, __be32 ip)
+{
+	u32 reg_val;
+
+	reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx));
+
+	/* invalidate the IP setting */
+	mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+
+	mtk_w32(eth, ip, MTK_LRO_DIP_DW0_CFG(idx));
+
+	/* validate the IP setting */
+	mtk_w32(eth, (reg_val | MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+}
+
+static void mtk_hwlro_inval_ipaddr(struct mtk_eth *eth, int idx)
+{
+	u32 reg_val;
+
+	reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx));
+
+	/* invalidate the IP setting */
+	mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+
+	mtk_w32(eth, 0, MTK_LRO_DIP_DW0_CFG(idx));
+}
+
+static int mtk_hwlro_get_ip_cnt(struct mtk_mac *mac)
+{
+	int cnt = 0;
+	int i;
+
+	for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+		if (mac->hwlro_ip[i])
+			cnt++;
+	}
+
+	return cnt;
+}
+
+static int mtk_hwlro_add_ipaddr(struct net_device *dev,
+				struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+	int hwlro_idx;
+
+	if ((fsp->flow_type != TCP_V4_FLOW) ||
+	    (!fsp->h_u.tcp_ip4_spec.ip4dst) ||
+	    (fsp->location > 1))
+		return -EINVAL;
+
+	mac->hwlro_ip[fsp->location] = htonl(fsp->h_u.tcp_ip4_spec.ip4dst);
+	hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location;
+
+	mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+
+	mtk_hwlro_val_ipaddr(eth, hwlro_idx, mac->hwlro_ip[fsp->location]);
+
+	return 0;
+}
+
+static int mtk_hwlro_del_ipaddr(struct net_device *dev,
+				struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+	int hwlro_idx;
+
+	if (fsp->location > 1)
+		return -EINVAL;
+
+	mac->hwlro_ip[fsp->location] = 0;
+	hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location;
+
+	mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+
+	mtk_hwlro_inval_ipaddr(eth, hwlro_idx);
+
+	return 0;
+}
+
+static void mtk_hwlro_netdev_disable(struct net_device *dev)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_eth *eth = mac->hw;
+	int i, hwlro_idx;
+
+	for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+		mac->hwlro_ip[i] = 0;
+		hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + i;
+
+		mtk_hwlro_inval_ipaddr(eth, hwlro_idx);
+	}
+
+	mac->hwlro_ip_cnt = 0;
+}
+
+static int mtk_hwlro_get_fdir_entry(struct net_device *dev,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	/* only tcp dst ipv4 is meaningful, others are meaningless */
+	fsp->flow_type = TCP_V4_FLOW;
+	fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]);
+	fsp->m_u.tcp_ip4_spec.ip4dst = 0;
+
+	fsp->h_u.tcp_ip4_spec.ip4src = 0;
+	fsp->m_u.tcp_ip4_spec.ip4src = 0xffffffff;
+	fsp->h_u.tcp_ip4_spec.psrc = 0;
+	fsp->m_u.tcp_ip4_spec.psrc = 0xffff;
+	fsp->h_u.tcp_ip4_spec.pdst = 0;
+	fsp->m_u.tcp_ip4_spec.pdst = 0xffff;
+	fsp->h_u.tcp_ip4_spec.tos = 0;
+	fsp->m_u.tcp_ip4_spec.tos = 0xff;
+
+	return 0;
+}
+
+static int mtk_hwlro_get_fdir_all(struct net_device *dev,
+				  struct ethtool_rxnfc *cmd,
+				  u32 *rule_locs)
+{
+	struct mtk_mac *mac = netdev_priv(dev);
+	int cnt = 0;
+	int i;
+
+	for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+		if (mac->hwlro_ip[i]) {
+			rule_locs[cnt] = i;
+			cnt++;
+		}
+	}
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+static netdev_features_t mtk_fix_features(struct net_device *dev,
+					  netdev_features_t features)
+{
+	if (!(features & NETIF_F_LRO)) {
+		struct mtk_mac *mac = netdev_priv(dev);
+		int ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+
+		if (ip_cnt) {
+			netdev_info(dev, "RX flow is programmed, LRO should keep on\n");
+
+			features |= NETIF_F_LRO;
+		}
+	}
+
+	return features;
+}
+
+static int mtk_set_features(struct net_device *dev, netdev_features_t features)
+{
+	int err = 0;
+
+	if (!((dev->features ^ features) & NETIF_F_LRO))
+		return 0;
+
+	if (!(features & NETIF_F_LRO))
+		mtk_hwlro_netdev_disable(dev);
+
+	return err;
+}
+
 /* wait for DMA to finish whatever it is doing before we start using it again */
 static int mtk_dma_busy_wait(struct mtk_eth *eth)
 {
@@ -1207,6 +1581,7 @@
 static int mtk_dma_init(struct mtk_eth *eth)
 {
 	int err;
+	u32 i;
 
 	if (mtk_dma_busy_wait(eth))
 		return -EBUSY;
@@ -1222,10 +1597,21 @@
 	if (err)
 		return err;
 
-	err = mtk_rx_alloc(eth);
+	err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL);
 	if (err)
 		return err;
 
+	if (eth->hwlro) {
+		for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) {
+			err = mtk_rx_alloc(eth, i, MTK_RX_FLAGS_HWLRO);
+			if (err)
+				return err;
+		}
+		err = mtk_hwlro_rx_init(eth);
+		if (err)
+			return err;
+	}
+
 	/* Enable random early drop and set drop threshold automatically */
 	mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
 		MTK_QDMA_FC_THRES);
@@ -1250,7 +1636,14 @@
 		eth->phy_scratch_ring = 0;
 	}
 	mtk_tx_clean(eth);
-	mtk_rx_clean(eth);
+	mtk_rx_clean(eth, 0);
+
+	if (eth->hwlro) {
+		mtk_hwlro_rx_uninit(eth);
+		for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
+			mtk_rx_clean(eth, i);
+	}
+
 	kfree(eth->scratch_head);
 }
 
@@ -1271,7 +1664,7 @@
 
 	if (likely(napi_schedule_prep(&eth->rx_napi))) {
 		__napi_schedule(&eth->rx_napi);
-		mtk_irq_disable(eth, MTK_RX_DONE_INT);
+		mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
 	}
 
 	return IRQ_HANDLED;
@@ -1283,7 +1676,7 @@
 
 	if (likely(napi_schedule_prep(&eth->tx_napi))) {
 		__napi_schedule(&eth->tx_napi);
-		mtk_irq_disable(eth, MTK_TX_DONE_INT);
+		mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
 	}
 
 	return IRQ_HANDLED;
@@ -1294,11 +1687,12 @@
 {
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
-	u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT;
 
-	mtk_irq_disable(eth, int_mask);
+	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
 	mtk_handle_irq_rx(eth->irq[2], dev);
-	mtk_irq_enable(eth, int_mask);
+	mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+	mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
 }
 #endif
 
@@ -1313,11 +1707,15 @@
 	}
 
 	mtk_w32(eth,
-		MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
-		MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
-		MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
+		MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+		MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO,
 		MTK_QDMA_GLO_CFG);
 
+	mtk_w32(eth,
+		MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+		MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
+		MTK_PDMA_GLO_CFG);
+
 	return 0;
 }
 
@@ -1335,11 +1733,12 @@
 
 		napi_enable(&eth->tx_napi);
 		napi_enable(&eth->rx_napi);
-		mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+		mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+		mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
 	}
 	atomic_inc(&eth->dma_refcnt);
 
-	phy_start(mac->phy_dev);
+	phy_start(dev->phydev);
 	netif_start_queue(dev);
 
 	return 0;
@@ -1347,16 +1746,15 @@
 
 static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg)
 {
-	unsigned long flags;
 	u32 val;
 	int i;
 
 	/* stop the dma engine */
-	spin_lock_irqsave(&eth->page_lock, flags);
+	spin_lock_bh(&eth->page_lock);
 	val = mtk_r32(eth, glo_cfg);
 	mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN),
 		glo_cfg);
-	spin_unlock_irqrestore(&eth->page_lock, flags);
+	spin_unlock_bh(&eth->page_lock);
 
 	/* wait for dma stop */
 	for (i = 0; i < 10; i++) {
@@ -1375,32 +1773,63 @@
 	struct mtk_eth *eth = mac->hw;
 
 	netif_tx_disable(dev);
-	phy_stop(mac->phy_dev);
+	phy_stop(dev->phydev);
 
 	/* only shutdown DMA if this is the last user */
 	if (!atomic_dec_and_test(&eth->dma_refcnt))
 		return 0;
 
-	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
 	napi_disable(&eth->tx_napi);
 	napi_disable(&eth->rx_napi);
 
 	mtk_stop_dma(eth, MTK_QDMA_GLO_CFG);
+	mtk_stop_dma(eth, MTK_PDMA_GLO_CFG);
 
 	mtk_dma_free(eth);
 
 	return 0;
 }
 
-static int __init mtk_hw_init(struct mtk_eth *eth)
+static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits)
 {
-	int err, i;
+	regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL,
+			   reset_bits,
+			   reset_bits);
 
-	/* reset the frame engine */
-	reset_control_assert(eth->rstc);
-	usleep_range(10, 20);
-	reset_control_deassert(eth->rstc);
-	usleep_range(10, 20);
+	usleep_range(1000, 1100);
+	regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL,
+			   reset_bits,
+			   ~reset_bits);
+	mdelay(10);
+}
+
+static int mtk_hw_init(struct mtk_eth *eth)
+{
+	int i, val;
+
+	if (test_and_set_bit(MTK_HW_INIT, &eth->state))
+		return 0;
+
+	pm_runtime_enable(eth->dev);
+	pm_runtime_get_sync(eth->dev);
+
+	clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
+	clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
+	clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
+	clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
+	ethsys_reset(eth, RSTCTRL_FE);
+	ethsys_reset(eth, RSTCTRL_PPE);
+
+	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->mac[i])
+			continue;
+		val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, eth->mac[i]->id);
+		val |= SYSCFG0_GE_MODE(eth->mac[i]->ge_mode, eth->mac[i]->id);
+	}
+	regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
 
 	/* Set GE2 driving and slew rate */
 	regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00);
@@ -1420,22 +1849,11 @@
 	/* Enable RX VLan Offloading */
 	mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
 
-	err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
-			       dev_name(eth->dev), eth);
-	if (err)
-		return err;
-	err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
-			       dev_name(eth->dev), eth);
-	if (err)
-		return err;
-
-	err = mtk_mdio_init(eth);
-	if (err)
-		return err;
-
 	/* disable delay and normal interrupt */
 	mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
-	mtk_irq_disable(eth, ~0);
+	mtk_w32(eth, 0, MTK_PDMA_DELAY_INT);
+	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
+	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
 	mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
 	mtk_w32(eth, 0, MTK_RST_GL);
 
@@ -1449,9 +1867,8 @@
 	for (i = 0; i < 2; i++) {
 		u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
 
-		/* setup the forward port to send frame to QDMA */
+		/* setup the forward port to send frame to PDMA */
 		val &= ~0xffff;
-		val |= 0x5555;
 
 		/* Enable RX checksum */
 		val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN;
@@ -1463,6 +1880,22 @@
 	return 0;
 }
 
+static int mtk_hw_deinit(struct mtk_eth *eth)
+{
+	if (!test_and_clear_bit(MTK_HW_INIT, &eth->state))
+		return 0;
+
+	clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
+	clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
+	clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
+	clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
+
+	pm_runtime_put_sync(eth->dev);
+	pm_runtime_disable(eth->dev);
+
+	return 0;
+}
+
 static int __init mtk_init(struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
@@ -1481,7 +1914,7 @@
 		dev->addr_assign_type = NET_ADDR_RANDOM;
 	}
 
-	return mtk_phy_connect(mac);
+	return mtk_phy_connect(dev);
 }
 
 static void mtk_uninit(struct net_device *dev)
@@ -1489,22 +1922,18 @@
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
 
-	phy_disconnect(mac->phy_dev);
-	mtk_mdio_cleanup(eth);
-	mtk_irq_disable(eth, ~0);
-	free_irq(eth->irq[1], dev);
-	free_irq(eth->irq[2], dev);
+	phy_disconnect(dev->phydev);
+	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
+	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
 }
 
 static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct mtk_mac *mac = netdev_priv(dev);
-
 	switch (cmd) {
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		return phy_mii_ioctl(mac->phy_dev, ifr, cmd);
+		return phy_mii_ioctl(dev->phydev, ifr, cmd);
 	default:
 		break;
 	}
@@ -1520,6 +1949,12 @@
 
 	rtnl_lock();
 
+	dev_dbg(eth->dev, "[%s][%d] reset\n", __func__, __LINE__);
+
+	while (test_and_set_bit_lock(MTK_RESETTING, &eth->state))
+		cpu_relax();
+
+	dev_dbg(eth->dev, "[%s][%d] mtk_stop starts\n", __func__, __LINE__);
 	/* stop all devices to make sure that dma is properly shut down */
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
 		if (!eth->netdev[i])
@@ -1527,6 +1962,27 @@
 		mtk_stop(eth->netdev[i]);
 		__set_bit(i, &restart);
 	}
+	dev_dbg(eth->dev, "[%s][%d] mtk_stop ends\n", __func__, __LINE__);
+
+	/* restart underlying hardware such as power, clock, pin mux
+	 * and the connected phy
+	 */
+	mtk_hw_deinit(eth);
+
+	if (eth->dev->pins)
+		pinctrl_select_state(eth->dev->pins->p,
+				     eth->dev->pins->default_state);
+	mtk_hw_init(eth);
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->mac[i] ||
+		    of_phy_is_fixed_link(eth->mac[i]->of_node))
+			continue;
+		err = phy_init_hw(eth->netdev[i]->phydev);
+		if (err)
+			dev_err(eth->dev, "%s: PHY init failed.\n",
+				eth->netdev[i]->name);
+	}
 
 	/* restart DMA and enable IRQs */
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
@@ -1539,51 +1995,69 @@
 			dev_close(eth->netdev[i]);
 		}
 	}
+
+	dev_dbg(eth->dev, "[%s][%d] reset done\n", __func__, __LINE__);
+
+	clear_bit_unlock(MTK_RESETTING, &eth->state);
+
 	rtnl_unlock();
 }
 
-static int mtk_cleanup(struct mtk_eth *eth)
+static int mtk_free_dev(struct mtk_eth *eth)
 {
 	int i;
 
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
 		if (!eth->netdev[i])
 			continue;
-
-		unregister_netdev(eth->netdev[i]);
 		free_netdev(eth->netdev[i]);
 	}
+
+	return 0;
+}
+
+static int mtk_unreg_dev(struct mtk_eth *eth)
+{
+	int i;
+
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		unregister_netdev(eth->netdev[i]);
+	}
+
+	return 0;
+}
+
+static int mtk_cleanup(struct mtk_eth *eth)
+{
+	mtk_unreg_dev(eth);
+	mtk_free_dev(eth);
 	cancel_work_sync(&eth->pending_work);
 
 	return 0;
 }
 
-static int mtk_get_settings(struct net_device *dev,
-			    struct ethtool_cmd *cmd)
+static int mtk_get_link_ksettings(struct net_device *ndev,
+				  struct ethtool_link_ksettings *cmd)
 {
-	struct mtk_mac *mac = netdev_priv(dev);
-	int err;
+	struct mtk_mac *mac = netdev_priv(ndev);
 
-	err = phy_read_status(mac->phy_dev);
-	if (err)
-		return -ENODEV;
+	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+		return -EBUSY;
 
-	return phy_ethtool_gset(mac->phy_dev, cmd);
+	return phy_ethtool_ksettings_get(ndev->phydev, cmd);
 }
 
-static int mtk_set_settings(struct net_device *dev,
-			    struct ethtool_cmd *cmd)
+static int mtk_set_link_ksettings(struct net_device *ndev,
+				  const struct ethtool_link_ksettings *cmd)
 {
-	struct mtk_mac *mac = netdev_priv(dev);
+	struct mtk_mac *mac = netdev_priv(ndev);
 
-	if (cmd->phy_address != mac->phy_dev->mdio.addr) {
-		mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus,
-					       cmd->phy_address);
-		if (!mac->phy_dev)
-			return -ENODEV;
-	}
+	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+		return -EBUSY;
 
-	return phy_ethtool_sset(mac->phy_dev, cmd);
+	return phy_ethtool_ksettings_set(ndev->phydev, cmd);
 }
 
 static void mtk_get_drvinfo(struct net_device *dev,
@@ -1614,7 +2088,10 @@
 {
 	struct mtk_mac *mac = netdev_priv(dev);
 
-	return genphy_restart_aneg(mac->phy_dev);
+	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+		return -EBUSY;
+
+	return genphy_restart_aneg(dev->phydev);
 }
 
 static u32 mtk_get_link(struct net_device *dev)
@@ -1622,11 +2099,14 @@
 	struct mtk_mac *mac = netdev_priv(dev);
 	int err;
 
-	err = genphy_update_link(mac->phy_dev);
+	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+		return -EBUSY;
+
+	err = genphy_update_link(dev->phydev);
 	if (err)
 		return ethtool_op_get_link(dev);
 
-	return mac->phy_dev->link;
+	return dev->phydev->link;
 }
 
 static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data)
@@ -1662,6 +2142,9 @@
 	unsigned int start;
 	int i;
 
+	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+		return;
+
 	if (netif_running(dev) && netif_device_present(dev)) {
 		if (spin_trylock(&hwstats->stats_lock)) {
 			mtk_stats_update_mac(mac);
@@ -1669,8 +2152,9 @@
 		}
 	}
 
+	data_src = (u64 *)hwstats;
+
 	do {
-		data_src = (u64*)hwstats;
 		data_dst = data;
 		start = u64_stats_fetch_begin_irq(&hwstats->syncp);
 
@@ -1679,9 +2163,65 @@
 	} while (u64_stats_fetch_retry_irq(&hwstats->syncp, start));
 }
 
-static struct ethtool_ops mtk_ethtool_ops = {
-	.get_settings		= mtk_get_settings,
-	.set_settings		= mtk_set_settings,
+static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			 u32 *rule_locs)
+{
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		if (dev->features & NETIF_F_LRO) {
+			cmd->data = MTK_MAX_RX_RING_NUM;
+			ret = 0;
+		}
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		if (dev->features & NETIF_F_LRO) {
+			struct mtk_mac *mac = netdev_priv(dev);
+
+			cmd->rule_cnt = mac->hwlro_ip_cnt;
+			ret = 0;
+		}
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		if (dev->features & NETIF_F_LRO)
+			ret = mtk_hwlro_get_fdir_entry(dev, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		if (dev->features & NETIF_F_LRO)
+			ret = mtk_hwlro_get_fdir_all(dev, cmd,
+						     rule_locs);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	int ret = -EOPNOTSUPP;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		if (dev->features & NETIF_F_LRO)
+			ret = mtk_hwlro_add_ipaddr(dev, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		if (dev->features & NETIF_F_LRO)
+			ret = mtk_hwlro_del_ipaddr(dev, cmd);
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+static const struct ethtool_ops mtk_ethtool_ops = {
+	.get_link_ksettings	= mtk_get_link_ksettings,
+	.set_link_ksettings	= mtk_set_link_ksettings,
 	.get_drvinfo		= mtk_get_drvinfo,
 	.get_msglevel		= mtk_get_msglevel,
 	.set_msglevel		= mtk_set_msglevel,
@@ -1690,6 +2230,8 @@
 	.get_strings		= mtk_get_strings,
 	.get_sset_count		= mtk_get_sset_count,
 	.get_ethtool_stats	= mtk_get_ethtool_stats,
+	.get_rxnfc		= mtk_get_rxnfc,
+	.set_rxnfc              = mtk_set_rxnfc,
 };
 
 static const struct net_device_ops mtk_netdev_ops = {
@@ -1704,6 +2246,8 @@
 	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_tx_timeout		= mtk_tx_timeout,
 	.ndo_get_stats64        = mtk_get_stats64,
+	.ndo_fix_features	= mtk_fix_features,
+	.ndo_set_features	= mtk_set_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= mtk_poll_controller,
 #endif
@@ -1742,6 +2286,9 @@
 	mac->hw = eth;
 	mac->of_node = np;
 
+	memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
+	mac->hwlro_ip_cnt = 0;
+
 	mac->hw_stats = devm_kzalloc(eth->dev,
 				     sizeof(*mac->hw_stats),
 				     GFP_KERNEL);
@@ -1751,27 +2298,24 @@
 		goto free_netdev;
 	}
 	spin_lock_init(&mac->hw_stats->stats_lock);
+	u64_stats_init(&mac->hw_stats->syncp);
 	mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET;
 
 	SET_NETDEV_DEV(eth->netdev[id], eth->dev);
 	eth->netdev[id]->watchdog_timeo = 5 * HZ;
 	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
 	eth->netdev[id]->base_addr = (unsigned long)eth->base;
+
+	eth->netdev[id]->hw_features = MTK_HW_FEATURES;
+	if (eth->hwlro)
+		eth->netdev[id]->hw_features |= NETIF_F_LRO;
+
 	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
 		~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
 	eth->netdev[id]->features |= MTK_HW_FEATURES;
 	eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
 
-	err = register_netdev(eth->netdev[id]);
-	if (err) {
-		dev_err(eth->dev, "error bringing up device\n");
-		goto free_netdev;
-	}
 	eth->netdev[id]->irq = eth->irq[0];
-	netif_info(eth, probe, eth->netdev[id],
-		   "mediatek frame engine at 0x%08lx, irq %d\n",
-		   eth->netdev[id]->base_addr, eth->irq[0]);
-
 	return 0;
 
 free_netdev:
@@ -1796,6 +2340,7 @@
 	if (!eth)
 		return -ENOMEM;
 
+	eth->dev = &pdev->dev;
 	eth->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(eth->base))
 		return PTR_ERR(eth->base);
@@ -1817,11 +2362,7 @@
 		return PTR_ERR(eth->pctl);
 	}
 
-	eth->rstc = devm_reset_control_get(&pdev->dev, "eth");
-	if (IS_ERR(eth->rstc)) {
-		dev_err(&pdev->dev, "no eth reset found\n");
-		return PTR_ERR(eth->rstc);
-	}
+	eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro");
 
 	for (i = 0; i < 3; i++) {
 		eth->irq[i] = platform_get_irq(pdev, i);
@@ -1830,21 +2371,16 @@
 			return -ENXIO;
 		}
 	}
+	for (i = 0; i < ARRAY_SIZE(eth->clks); i++) {
+		eth->clks[i] = devm_clk_get(eth->dev,
+					    mtk_clks_source_name[i]);
+		if (IS_ERR(eth->clks[i])) {
+			if (PTR_ERR(eth->clks[i]) == -EPROBE_DEFER)
+				return -EPROBE_DEFER;
+			return -ENODEV;
+		}
+	}
 
-	eth->clk_ethif = devm_clk_get(&pdev->dev, "ethif");
-	eth->clk_esw = devm_clk_get(&pdev->dev, "esw");
-	eth->clk_gp1 = devm_clk_get(&pdev->dev, "gp1");
-	eth->clk_gp2 = devm_clk_get(&pdev->dev, "gp2");
-	if (IS_ERR(eth->clk_esw) || IS_ERR(eth->clk_gp1) ||
-	    IS_ERR(eth->clk_gp2) || IS_ERR(eth->clk_ethif))
-		return -ENODEV;
-
-	clk_prepare_enable(eth->clk_ethif);
-	clk_prepare_enable(eth->clk_esw);
-	clk_prepare_enable(eth->clk_gp1);
-	clk_prepare_enable(eth->clk_gp2);
-
-	eth->dev = &pdev->dev;
 	eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
 	INIT_WORK(&eth->pending_work, mtk_pending_work);
 
@@ -1862,7 +2398,35 @@
 
 		err = mtk_add_mac(eth, mac_np);
 		if (err)
-			goto err_free_dev;
+			goto err_deinit_hw;
+	}
+
+	err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
+			       dev_name(eth->dev), eth);
+	if (err)
+		goto err_free_dev;
+
+	err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
+			       dev_name(eth->dev), eth);
+	if (err)
+		goto err_free_dev;
+
+	err = mtk_mdio_init(eth);
+	if (err)
+		goto err_free_dev;
+
+	for (i = 0; i < MTK_MAX_DEVS; i++) {
+		if (!eth->netdev[i])
+			continue;
+
+		err = register_netdev(eth->netdev[i]);
+		if (err) {
+			dev_err(eth->dev, "error bringing up device\n");
+			goto err_deinit_mdio;
+		} else
+			netif_info(eth, probe, eth->netdev[i],
+				   "mediatek frame engine at 0x%08lx, irq %d\n",
+				   eth->netdev[i]->base_addr, eth->irq[0]);
 	}
 
 	/* we run 2 devices on the same DMA ring so we need a dummy device
@@ -1878,24 +2442,34 @@
 
 	return 0;
 
+err_deinit_mdio:
+	mtk_mdio_cleanup(eth);
 err_free_dev:
-	mtk_cleanup(eth);
+	mtk_free_dev(eth);
+err_deinit_hw:
+	mtk_hw_deinit(eth);
+
 	return err;
 }
 
 static int mtk_remove(struct platform_device *pdev)
 {
 	struct mtk_eth *eth = platform_get_drvdata(pdev);
+	int i;
 
-	clk_disable_unprepare(eth->clk_ethif);
-	clk_disable_unprepare(eth->clk_esw);
-	clk_disable_unprepare(eth->clk_gp1);
-	clk_disable_unprepare(eth->clk_gp2);
+	/* stop all devices to make sure that dma is properly shut down */
+	for (i = 0; i < MTK_MAC_COUNT; i++) {
+		if (!eth->netdev[i])
+			continue;
+		mtk_stop(eth->netdev[i]);
+	}
+
+	mtk_hw_deinit(eth);
 
 	netif_napi_del(&eth->tx_napi);
 	netif_napi_del(&eth->rx_napi);
 	mtk_cleanup(eth);
-	platform_set_drvdata(pdev, NULL);
+	mtk_mdio_cleanup(eth);
 
 	return 0;
 }
@@ -1904,6 +2478,7 @@
 	{ .compatible = "mediatek,mt7623-eth" },
 	{},
 };
+MODULE_DEVICE_TABLE(of, of_mtk_match);
 
 static struct platform_driver mtk_driver = {
 	.probe = mtk_probe,
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index f82e3ac..3003195 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -39,7 +39,21 @@
 				 NETIF_F_SG | NETIF_F_TSO | \
 				 NETIF_F_TSO6 | \
 				 NETIF_F_IPV6_CSUM)
-#define NEXT_RX_DESP_IDX(X)	(((X) + 1) & (MTK_DMA_SIZE - 1))
+#define NEXT_RX_DESP_IDX(X, Y)	(((X) + 1) & ((Y) - 1))
+
+#define MTK_MAX_RX_RING_NUM	4
+#define MTK_HW_LRO_DMA_SIZE	8
+
+#define	MTK_MAX_LRO_RX_LENGTH		(4096 * 3)
+#define	MTK_MAX_LRO_IP_CNT		2
+#define	MTK_HW_LRO_TIMER_UNIT		1	/* 20 us */
+#define	MTK_HW_LRO_REFRESH_TIME		50000	/* 1 sec. */
+#define	MTK_HW_LRO_AGG_TIME		10	/* 200us */
+#define	MTK_HW_LRO_AGE_TIME		50	/* 1ms */
+#define	MTK_HW_LRO_MAX_AGG_CNT		64
+#define	MTK_HW_LRO_BW_THRE		3000
+#define	MTK_HW_LRO_REPLACE_DELTA	1000
+#define	MTK_HW_LRO_SDL_REMAIN_ROOM	1522
 
 /* Frame Engine Global Reset Register */
 #define MTK_RST_GL		0x04
@@ -50,6 +64,9 @@
 #define MTK_GDM1_AF		BIT(28)
 #define MTK_GDM2_AF		BIT(29)
 
+/* PDMA HW LRO Alter Flow Timer Register */
+#define MTK_PDMA_LRO_ALT_REFRESH_TIMER	0x1c
+
 /* Frame Engine Interrupt Grouping Register */
 #define MTK_FE_INT_GRP		0x20
 
@@ -68,10 +85,77 @@
 /* Unicast Filter MAC Address Register - High */
 #define MTK_GDMA_MAC_ADRH(x)	(0x50C + (x * 0x1000))
 
+/* PDMA RX Base Pointer Register */
+#define MTK_PRX_BASE_PTR0	0x900
+#define MTK_PRX_BASE_PTR_CFG(x)	(MTK_PRX_BASE_PTR0 + (x * 0x10))
+
+/* PDMA RX Maximum Count Register */
+#define MTK_PRX_MAX_CNT0	0x904
+#define MTK_PRX_MAX_CNT_CFG(x)	(MTK_PRX_MAX_CNT0 + (x * 0x10))
+
+/* PDMA RX CPU Pointer Register */
+#define MTK_PRX_CRX_IDX0	0x908
+#define MTK_PRX_CRX_IDX_CFG(x)	(MTK_PRX_CRX_IDX0 + (x * 0x10))
+
+/* PDMA HW LRO Control Registers */
+#define MTK_PDMA_LRO_CTRL_DW0	0x980
+#define MTK_LRO_EN			BIT(0)
+#define MTK_L3_CKS_UPD_EN		BIT(7)
+#define MTK_LRO_ALT_PKT_CNT_MODE	BIT(21)
+#define MTK_LRO_RING_RELINQUISH_REQ	(0x7 << 26)
+#define MTK_LRO_RING_RELINQUISH_DONE	(0x7 << 29)
+
+#define MTK_PDMA_LRO_CTRL_DW1	0x984
+#define MTK_PDMA_LRO_CTRL_DW2	0x988
+#define MTK_PDMA_LRO_CTRL_DW3	0x98c
+#define MTK_ADMA_MODE		BIT(15)
+#define MTK_LRO_MIN_RXD_SDL	(MTK_HW_LRO_SDL_REMAIN_ROOM << 16)
+
+/* PDMA Global Configuration Register */
+#define MTK_PDMA_GLO_CFG	0xa04
+#define MTK_MULTI_EN		BIT(10)
+
+/* PDMA Reset Index Register */
+#define MTK_PDMA_RST_IDX	0xa08
+#define MTK_PST_DRX_IDX0	BIT(16)
+#define MTK_PST_DRX_IDX_CFG(x)	(MTK_PST_DRX_IDX0 << (x))
+
+/* PDMA Delay Interrupt Register */
+#define MTK_PDMA_DELAY_INT	0xa0c
+
+/* PDMA Interrupt Status Register */
+#define MTK_PDMA_INT_STATUS	0xa20
+
+/* PDMA Interrupt Mask Register */
+#define MTK_PDMA_INT_MASK	0xa28
+
+/* PDMA HW LRO Alter Flow Delta Register */
+#define MTK_PDMA_LRO_ALT_SCORE_DELTA	0xa4c
+
 /* PDMA Interrupt grouping registers */
 #define MTK_PDMA_INT_GRP1	0xa50
 #define MTK_PDMA_INT_GRP2	0xa54
 
+/* PDMA HW LRO IP Setting Registers */
+#define MTK_LRO_RX_RING0_DIP_DW0	0xb04
+#define MTK_LRO_DIP_DW0_CFG(x)		(MTK_LRO_RX_RING0_DIP_DW0 + (x * 0x40))
+#define MTK_RING_MYIP_VLD		BIT(9)
+
+/* PDMA HW LRO Ring Control Registers */
+#define MTK_LRO_RX_RING0_CTRL_DW1	0xb28
+#define MTK_LRO_RX_RING0_CTRL_DW2	0xb2c
+#define MTK_LRO_RX_RING0_CTRL_DW3	0xb30
+#define MTK_LRO_CTRL_DW1_CFG(x)		(MTK_LRO_RX_RING0_CTRL_DW1 + (x * 0x40))
+#define MTK_LRO_CTRL_DW2_CFG(x)		(MTK_LRO_RX_RING0_CTRL_DW2 + (x * 0x40))
+#define MTK_LRO_CTRL_DW3_CFG(x)		(MTK_LRO_RX_RING0_CTRL_DW3 + (x * 0x40))
+#define MTK_RING_AGE_TIME_L		((MTK_HW_LRO_AGE_TIME & 0x3ff) << 22)
+#define MTK_RING_AGE_TIME_H		((MTK_HW_LRO_AGE_TIME >> 10) & 0x3f)
+#define MTK_RING_AUTO_LERAN_MODE	(3 << 6)
+#define MTK_RING_VLD			BIT(8)
+#define MTK_RING_MAX_AGG_TIME		((MTK_HW_LRO_AGG_TIME & 0xffff) << 10)
+#define MTK_RING_MAX_AGG_CNT_L		((MTK_HW_LRO_MAX_AGG_CNT & 0x3f) << 26)
+#define MTK_RING_MAX_AGG_CNT_H		((MTK_HW_LRO_MAX_AGG_CNT >> 6) & 0x3)
+
 /* QDMA TX Queue Configuration Registers */
 #define MTK_QTX_CFG(x)		(0x1800 + (x * 0x10))
 #define QDMA_RES_THRES		4
@@ -106,7 +190,6 @@
 
 /* QDMA Reset Index Register */
 #define MTK_QDMA_RST_IDX	0x1A08
-#define MTK_PST_DRX_IDX0	BIT(16)
 
 /* QDMA Delay Interrupt Register */
 #define MTK_QDMA_DELAY_INT	0x1A0C
@@ -119,13 +202,16 @@
 
 /* QDMA Interrupt Status Register */
 #define MTK_QMTK_INT_STATUS	0x1A18
+#define MTK_RX_DONE_INT3	BIT(19)
+#define MTK_RX_DONE_INT2	BIT(18)
 #define MTK_RX_DONE_INT1	BIT(17)
 #define MTK_RX_DONE_INT0	BIT(16)
 #define MTK_TX_DONE_INT3	BIT(3)
 #define MTK_TX_DONE_INT2	BIT(2)
 #define MTK_TX_DONE_INT1	BIT(1)
 #define MTK_TX_DONE_INT0	BIT(0)
-#define MTK_RX_DONE_INT		(MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1)
+#define MTK_RX_DONE_INT		(MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1 | \
+				 MTK_RX_DONE_INT2 | MTK_RX_DONE_INT3)
 #define MTK_TX_DONE_INT		(MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \
 				 MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3)
 
@@ -227,6 +313,30 @@
 				 MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \
 				 MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK)
 
+/* TRGMII RXC control register */
+#define TRGMII_RCK_CTRL		0x10300
+#define DQSI0(x)		((x << 0) & GENMASK(6, 0))
+#define DQSI1(x)		((x << 8) & GENMASK(14, 8))
+#define RXCTL_DMWTLAT(x)	((x << 16) & GENMASK(18, 16))
+#define RXC_DQSISEL		BIT(30)
+#define RCK_CTRL_RGMII_1000	(RXC_DQSISEL | RXCTL_DMWTLAT(2) | DQSI1(16))
+#define RCK_CTRL_RGMII_10_100	RXCTL_DMWTLAT(2)
+
+/* TRGMII RXC control register */
+#define TRGMII_TCK_CTRL		0x10340
+#define TXCTL_DMWTLAT(x)	((x << 16) & GENMASK(18, 16))
+#define TXC_INV			BIT(30)
+#define TCK_CTRL_RGMII_1000	TXCTL_DMWTLAT(2)
+#define TCK_CTRL_RGMII_10_100	(TXC_INV | TXCTL_DMWTLAT(2))
+
+/* TRGMII Interface mode register */
+#define INTF_MODE		0x10390
+#define TRGMII_INTF_DIS		BIT(0)
+#define TRGMII_MODE		BIT(1)
+#define TRGMII_CENTRAL_ALIGNED	BIT(2)
+#define INTF_MODE_RGMII_1000    (TRGMII_MODE | TRGMII_CENTRAL_ALIGNED)
+#define INTF_MODE_RGMII_10_100  0
+
 /* GPIO port control registers for GMAC 2*/
 #define GPIO_OD33_CTRL8		0x4c0
 #define GPIO_BIAS_CTRL		0xed0
@@ -237,6 +347,15 @@
 #define SYSCFG0_GE_MASK		0x3
 #define SYSCFG0_GE_MODE(x, y)	(x << (12 + (y * 2)))
 
+/* ethernet subsystem clock register */
+#define ETHSYS_CLKCFG0		0x2c
+#define ETHSYS_TRGMII_CLK_SEL362_5	BIT(11)
+
+/* ethernet reset control register */
+#define ETHSYS_RSTCTRL		0x34
+#define RSTCTRL_FE		BIT(6)
+#define RSTCTRL_PPE		BIT(31)
+
 struct mtk_rx_dma {
 	unsigned int rxd1;
 	unsigned int rxd2;
@@ -290,6 +409,23 @@
 	MTK_TX_FLAGS_PAGE0	= 0x02,
 };
 
+/* This enum allows us to identify how the clock is defined on the array of the
+ * clock in the order
+ */
+enum mtk_clks_map {
+	MTK_CLK_ETHIF,
+	MTK_CLK_ESW,
+	MTK_CLK_GP1,
+	MTK_CLK_GP2,
+	MTK_CLK_TRGPLL,
+	MTK_CLK_MAX
+};
+
+enum mtk_dev_state {
+	MTK_HW_INIT,
+	MTK_RESETTING
+};
+
 /* struct mtk_tx_buf -	This struct holds the pointers to the memory pointed at
  *			by the TX descriptor	s
  * @skb:		The SKB pointer of the packet being sent
@@ -327,6 +463,12 @@
 	atomic_t free_count;
 };
 
+/* PDMA rx ring mode */
+enum mtk_rx_flags {
+	MTK_RX_FLAGS_NORMAL = 0,
+	MTK_RX_FLAGS_HWLRO,
+};
+
 /* struct mtk_rx_ring -	This struct holds info describing a RX ring
  * @dma:		The descriptor ring
  * @data:		The memory pointed at by the ring
@@ -341,7 +483,10 @@
 	dma_addr_t phys;
 	u16 frag_size;
 	u16 buf_size;
+	u16 dma_size;
+	bool calc_idx_update;
 	u16 calc_idx;
+	u32 crx_idx_reg;
 };
 
 /* currently no SoC has more than 2 macs */
@@ -370,18 +515,15 @@
  * @scratch_ring:	Newer SoCs need memory for a second HW managed TX ring
  * @phy_scratch_ring:	physical address of scratch_ring
  * @scratch_head:	The scratch memory that scratch_ring points to.
- * @clk_ethif:		The ethif clock
- * @clk_esw:		The switch clock
- * @clk_gp1:		The gmac1 clock
- * @clk_gp2:		The gmac2 clock
+ * @clks:		clock array for all clocks required
  * @mii_bus:		If there is a bus we need to create an instance for it
  * @pending_work:	The workqueue used to reset the dma ring
+ * @state               Initialization and runtime state of the device.
  */
 
 struct mtk_eth {
 	struct device			*dev;
 	void __iomem			*base;
-	struct reset_control		*rstc;
 	spinlock_t			page_lock;
 	spinlock_t			irq_lock;
 	struct net_device		dummy_dev;
@@ -392,36 +534,41 @@
 	unsigned long			sysclk;
 	struct regmap			*ethsys;
 	struct regmap			*pctl;
+	bool				hwlro;
 	atomic_t			dma_refcnt;
 	struct mtk_tx_ring		tx_ring;
-	struct mtk_rx_ring		rx_ring;
+	struct mtk_rx_ring		rx_ring[MTK_MAX_RX_RING_NUM];
 	struct napi_struct		tx_napi;
 	struct napi_struct		rx_napi;
 	struct mtk_tx_dma		*scratch_ring;
 	dma_addr_t			phy_scratch_ring;
 	void				*scratch_head;
-	struct clk			*clk_ethif;
-	struct clk			*clk_esw;
-	struct clk			*clk_gp1;
-	struct clk			*clk_gp2;
+	struct clk			*clks[MTK_CLK_MAX];
+
 	struct mii_bus			*mii_bus;
 	struct work_struct		pending_work;
+	unsigned long			state;
 };
 
 /* struct mtk_mac -	the structure that holds the info about the MACs of the
  *			SoC
  * @id:			The number of the MAC
+ * @ge_mode:            Interface mode kept for setup restoring
  * @of_node:		Our devicetree node
  * @hw:			Backpointer to our main datastruture
  * @hw_stats:		Packet statistics counter
- * @phy_dev:		The attached PHY if available
+ * @trgmii		Indicate if the MAC uses TRGMII connected to internal
+			switch
  */
 struct mtk_mac {
 	int				id;
+	int				ge_mode;
 	struct device_node		*of_node;
 	struct mtk_eth			*hw;
 	struct mtk_hw_stats		*hw_stats;
-	struct phy_device		*phy_dev;
+	__be32				hwlro_ip[MTK_MAX_LRO_IP_CNT];
+	int				hwlro_ip_cnt;
+	bool				trgmii;
 };
 
 /* the struct describing the SoC. these are declared in the soc_xyz.c files */
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index f04a423..b1cef7a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -785,17 +785,23 @@
 		return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO);
 
 	if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+		int ret;
+
 		if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
 			return mlx4_internal_err_ret_value(dev, op,
 							  op_modifier);
+		down_read(&mlx4_priv(dev)->cmd.switch_sem);
 		if (mlx4_priv(dev)->cmd.use_events)
-			return mlx4_cmd_wait(dev, in_param, out_param,
-					     out_is_imm, in_modifier,
-					     op_modifier, op, timeout);
+			ret = mlx4_cmd_wait(dev, in_param, out_param,
+					    out_is_imm, in_modifier,
+					    op_modifier, op, timeout);
 		else
-			return mlx4_cmd_poll(dev, in_param, out_param,
-					     out_is_imm, in_modifier,
-					     op_modifier, op, timeout);
+			ret = mlx4_cmd_poll(dev, in_param, out_param,
+					    out_is_imm, in_modifier,
+					    op_modifier, op, timeout);
+
+		up_read(&mlx4_priv(dev)->cmd.switch_sem);
+		return ret;
 	}
 	return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm,
 			      in_modifier, op_modifier, op, timeout);
@@ -1845,6 +1851,7 @@
 
 	if (vp_oper->state.default_vlan == vp_admin->default_vlan &&
 	    vp_oper->state.default_qos == vp_admin->default_qos &&
+	    vp_oper->state.vlan_proto == vp_admin->vlan_proto &&
 	    vp_oper->state.link_state == vp_admin->link_state &&
 	    vp_oper->state.qos_vport == vp_admin->qos_vport)
 		return 0;
@@ -1903,6 +1910,7 @@
 
 	vp_oper->state.default_vlan = vp_admin->default_vlan;
 	vp_oper->state.default_qos = vp_admin->default_qos;
+	vp_oper->state.vlan_proto = vp_admin->vlan_proto;
 	vp_oper->state.link_state = vp_admin->link_state;
 	vp_oper->state.qos_vport = vp_admin->qos_vport;
 
@@ -1916,6 +1924,7 @@
 	work->qos_vport = vp_oper->state.qos_vport;
 	work->vlan_id = vp_oper->state.default_vlan;
 	work->vlan_ix = vp_oper->vlan_idx;
+	work->vlan_proto = vp_oper->state.vlan_proto;
 	work->priv = priv;
 	INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler);
 	queue_work(priv->mfunc.master.comm_wq, &work->work);
@@ -1986,6 +1995,8 @@
 	int port, err;
 	struct mlx4_vport_state *vp_admin;
 	struct mlx4_vport_oper_state *vp_oper;
+	struct mlx4_slave_state *slave_state =
+		&priv->mfunc.master.slave_state[slave];
 	struct mlx4_active_ports actv_ports = mlx4_get_active_ports(
 			&priv->dev, slave);
 	int min_port = find_first_bit(actv_ports.ports,
@@ -2000,12 +2011,26 @@
 			priv->mfunc.master.vf_admin[slave].enable_smi[port];
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
-		vp_oper->state = *vp_admin;
+		if (vp_admin->vlan_proto != htons(ETH_P_8021AD) ||
+		    slave_state->vst_qinq_supported) {
+			vp_oper->state.vlan_proto   = vp_admin->vlan_proto;
+			vp_oper->state.default_vlan = vp_admin->default_vlan;
+			vp_oper->state.default_qos  = vp_admin->default_qos;
+		}
+		vp_oper->state.link_state = vp_admin->link_state;
+		vp_oper->state.mac        = vp_admin->mac;
+		vp_oper->state.spoofchk   = vp_admin->spoofchk;
+		vp_oper->state.tx_rate    = vp_admin->tx_rate;
+		vp_oper->state.qos_vport  = vp_admin->qos_vport;
+		vp_oper->state.guid       = vp_admin->guid;
+
 		if (MLX4_VGT != vp_admin->default_vlan) {
 			err = __mlx4_register_vlan(&priv->dev, port,
 						   vp_admin->default_vlan, &(vp_oper->vlan_idx));
 			if (err) {
 				vp_oper->vlan_idx = NO_INDX;
+				vp_oper->state.default_vlan = MLX4_VGT;
+				vp_oper->state.vlan_proto = htons(ETH_P_8021Q);
 				mlx4_warn(&priv->dev,
 					  "No vlan resources slave %d, port %d\n",
 					  slave, port);
@@ -2086,6 +2111,7 @@
 		mlx4_warn(dev, "Received reset from slave:%d\n", slave);
 		slave_state[slave].active = false;
 		slave_state[slave].old_vlan_api = false;
+		slave_state[slave].vst_qinq_supported = false;
 		mlx4_master_deactivate_admin_state(priv, slave);
 		for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) {
 				slave_state[slave].event_eq[i].eqn = -1;
@@ -2353,6 +2379,7 @@
 			vf_oper = &priv->mfunc.master.vf_oper[i];
 			s_state = &priv->mfunc.master.slave_state[i];
 			s_state->last_cmd = MLX4_COMM_CMD_RESET;
+			s_state->vst_qinq_supported = false;
 			mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]);
 			for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j)
 				s_state->event_eq[j].eqn = -1;
@@ -2382,6 +2409,8 @@
 				admin_vport->qos_vport =
 						MLX4_VPP_DEFAULT_VPORT;
 				oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
+				admin_vport->vlan_proto = htons(ETH_P_8021Q);
+				oper_vport->vlan_proto = htons(ETH_P_8021Q);
 				vf_oper->vport[port].vlan_idx = NO_INDX;
 				vf_oper->vport[port].mac_idx = NO_INDX;
 				mlx4_set_random_admin_guid(dev, i, port);
@@ -2454,6 +2483,7 @@
 	int flags = 0;
 
 	if (!priv->cmd.initialized) {
+		init_rwsem(&priv->cmd.switch_sem);
 		mutex_init(&priv->cmd.slave_cmd_mutex);
 		sema_init(&priv->cmd.poll_sem, 1);
 		priv->cmd.use_events = 0;
@@ -2583,6 +2613,7 @@
 	if (!priv->cmd.context)
 		return -ENOMEM;
 
+	down_write(&priv->cmd.switch_sem);
 	for (i = 0; i < priv->cmd.max_cmds; ++i) {
 		priv->cmd.context[i].token = i;
 		priv->cmd.context[i].next  = i + 1;
@@ -2606,6 +2637,7 @@
 
 	down(&priv->cmd.poll_sem);
 	priv->cmd.use_events = 1;
+	up_write(&priv->cmd.switch_sem);
 
 	return err;
 }
@@ -2618,6 +2650,7 @@
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int i;
 
+	down_write(&priv->cmd.switch_sem);
 	priv->cmd.use_events = 0;
 
 	for (i = 0; i < priv->cmd.max_cmds; ++i)
@@ -2626,6 +2659,7 @@
 	kfree(priv->cmd.context);
 
 	up(&priv->cmd.poll_sem);
+	up_write(&priv->cmd.switch_sem);
 }
 
 struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
@@ -2937,10 +2971,13 @@
 EXPORT_SYMBOL_GPL(mlx4_set_vf_mac);
 
 
-int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos)
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos,
+		     __be16 proto)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_vport_state *vf_admin;
+	struct mlx4_slave_state *slave_state;
+	struct mlx4_vport_oper_state *vf_oper;
 	int slave;
 
 	if ((!mlx4_is_master(dev)) ||
@@ -2950,12 +2987,31 @@
 	if ((vlan > 4095) || (qos > 7))
 		return -EINVAL;
 
+	if (proto == htons(ETH_P_8021AD) &&
+	    !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP))
+		return -EPROTONOSUPPORT;
+
+	if (proto != htons(ETH_P_8021Q) &&
+	    proto != htons(ETH_P_8021AD))
+		return -EINVAL;
+
+	if ((proto == htons(ETH_P_8021AD)) &&
+	    ((vlan == 0) || (vlan == MLX4_VGT)))
+		return -EINVAL;
+
 	slave = mlx4_get_slave_indx(dev, vf);
 	if (slave < 0)
 		return -EINVAL;
 
+	slave_state = &priv->mfunc.master.slave_state[slave];
+	if ((proto == htons(ETH_P_8021AD)) && (slave_state->active) &&
+	    (!slave_state->vst_qinq_supported)) {
+		mlx4_err(dev, "vf %d does not support VST QinQ mode\n", vf);
+		return -EPROTONOSUPPORT;
+	}
 	port = mlx4_slaves_closest_port(dev, slave, port);
 	vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+	vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 
 	if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos))
 		return -EPERM;
@@ -2965,6 +3021,7 @@
 	else
 		vf_admin->default_vlan = vlan;
 	vf_admin->default_qos = qos;
+	vf_admin->vlan_proto = proto;
 
 	/* If rate was configured prior to VST, we saved the configured rate
 	 * in vf_admin->rate and now, if priority supported we enforce the QoS
@@ -2973,7 +3030,12 @@
 	    vf_admin->tx_rate)
 		vf_admin->qos_vport = slave;
 
-	if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
+	/* Try to activate new vf state without restart,
+	 * this option is not supported while moving to VST QinQ mode.
+	 */
+	if ((proto == htons(ETH_P_8021AD) &&
+	     vf_oper->state.vlan_proto != proto) ||
+	    mlx4_master_immediate_activate_vlan_qos(priv, slave, port))
 		mlx4_info(dev,
 			  "updating vf %d port %d config will take effect on next VF restart\n",
 			  vf, port);
@@ -3117,6 +3179,7 @@
 
 	ivf->vlan		= s_info->default_vlan;
 	ivf->qos		= s_info->default_qos;
+	ivf->vlan_proto		= s_info->vlan_proto;
 
 	if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info))
 		ivf->max_tx_rate = s_info->tx_rate;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index 1494997..08fc5fc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -298,7 +298,7 @@
 	if (IS_ERR(mdev->ptp_clock)) {
 		mdev->ptp_clock = NULL;
 		mlx4_err(mdev, "ptp_clock_register failed\n");
-	} else {
+	} else if (mdev->ptp_clock) {
 		mlx4_info(mdev, "registered PHC clock\n");
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
index 99c6bbd..b04760a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_dcb_nl.c
@@ -94,7 +94,7 @@
 		*cap = true;
 		break;
 	case DCB_CAP_ATTR_DCBX:
-		*cap = priv->cee_params.dcbx_cap;
+		*cap = priv->dcbx_cap;
 		break;
 	case DCB_CAP_ATTR_PFC_TCS:
 		*cap = 1 <<  mlx4_max_tc(priv->mdev->dev);
@@ -111,14 +111,14 @@
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 
-	return priv->cee_params.dcb_cfg.pfc_state;
+	return priv->cee_config.pfc_state;
 }
 
 static void mlx4_en_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 
-	priv->cee_params.dcb_cfg.pfc_state = state;
+	priv->cee_config.pfc_state = state;
 }
 
 static void mlx4_en_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
@@ -126,7 +126,7 @@
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 
-	*setting = priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc;
+	*setting = priv->cee_config.dcb_pfc[priority];
 }
 
 static void mlx4_en_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
@@ -134,8 +134,8 @@
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 
-	priv->cee_params.dcb_cfg.tc_config[priority].dcb_pfc = setting;
-	priv->cee_params.dcb_cfg.pfc_state = true;
+	priv->cee_config.dcb_pfc[priority] = setting;
+	priv->cee_config.pfc_state = true;
 }
 
 static int mlx4_en_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
@@ -157,13 +157,11 @@
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	struct mlx4_en_cee_config *dcb_cfg = &priv->cee_params.dcb_cfg;
-	int err = 0;
 
-	if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
-		return -EINVAL;
+	if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+		return 1;
 
-	if (dcb_cfg->pfc_state) {
+	if (priv->cee_config.pfc_state) {
 		int tc;
 
 		priv->prof->rx_pause = 0;
@@ -171,7 +169,7 @@
 		for (tc = 0; tc < CEE_DCBX_MAX_PRIO; tc++) {
 			u8 tc_mask = 1 << tc;
 
-			switch (dcb_cfg->tc_config[tc].dcb_pfc) {
+			switch (priv->cee_config.dcb_pfc[tc]) {
 			case pfc_disabled:
 				priv->prof->tx_ppp &= ~tc_mask;
 				priv->prof->rx_ppp &= ~tc_mask;
@@ -199,15 +197,17 @@
 		en_dbg(DRV, priv, "Set pfc off\n");
 	}
 
-	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
-				    priv->rx_skb_size + ETH_FCS_LEN,
-				    priv->prof->tx_pause,
-				    priv->prof->tx_ppp,
-				    priv->prof->rx_pause,
-				    priv->prof->rx_ppp);
-	if (err)
+	if (mlx4_SET_PORT_general(mdev->dev, priv->port,
+				  priv->rx_skb_size + ETH_FCS_LEN,
+				  priv->prof->tx_pause,
+				  priv->prof->tx_ppp,
+				  priv->prof->rx_pause,
+				  priv->prof->rx_ppp)) {
 		en_err(priv, "Failed setting pause params\n");
-	return err;
+		return 1;
+	}
+
+	return 0;
 }
 
 static u8 mlx4_en_dcbnl_get_state(struct net_device *dev)
@@ -225,7 +225,7 @@
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	int num_tcs = 0;
 
-	if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+	if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
 		return 1;
 
 	if (!!(state) == !!(priv->flags & MLX4_EN_FLAG_DCB_ENABLED))
@@ -238,7 +238,10 @@
 		priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
 	}
 
-	return mlx4_en_setup_tc(dev, num_tcs);
+	if (mlx4_en_setup_tc(dev, num_tcs))
+		return 1;
+
+	return 0;
 }
 
 /* On success returns a non-zero 802.1p user priority bitmap
@@ -252,7 +255,7 @@
 				.selector = idtype,
 				.protocol = id,
 			     };
-	if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+	if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
 		return 0;
 
 	return dcb_getapp(netdev, &app);
@@ -264,7 +267,7 @@
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 	struct dcb_app app;
 
-	if (!(priv->cee_params.dcbx_cap & DCB_CAP_DCBX_VER_CEE))
+	if (!(priv->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
 		return -EINVAL;
 
 	memset(&app, 0, sizeof(struct dcb_app));
@@ -433,7 +436,7 @@
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 
-	return priv->cee_params.dcbx_cap;
+	return priv->dcbx_cap;
 }
 
 static u8 mlx4_en_dcbnl_setdcbx(struct net_device *dev, u8 mode)
@@ -442,7 +445,7 @@
 	struct ieee_ets ets = {0};
 	struct ieee_pfc pfc = {0};
 
-	if (mode == priv->cee_params.dcbx_cap)
+	if (mode == priv->dcbx_cap)
 		return 0;
 
 	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
@@ -451,7 +454,7 @@
 	    !(mode & DCB_CAP_DCBX_HOST))
 		goto err;
 
-	priv->cee_params.dcbx_cap = mode;
+	priv->dcbx_cap = mode;
 
 	ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
 	pfc.pfc_cap = IEEE_8021QAZ_MAX_TCS;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 4198e9b..7e703be 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -71,10 +71,11 @@
 #ifdef CONFIG_MLX4_EN_DCB
 	if (!mlx4_is_slave(priv->mdev->dev)) {
 		if (up) {
-			priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
+			if (priv->dcbx_cap)
+				priv->flags |= MLX4_EN_FLAG_DCB_ENABLED;
 		} else {
 			priv->flags &= ~MLX4_EN_FLAG_DCB_ENABLED;
-			priv->cee_params.dcb_cfg.pfc_state = false;
+			priv->cee_config.pfc_state = false;
 		}
 	}
 #endif /* CONFIG_MLX4_EN_DCB */
@@ -2399,12 +2400,14 @@
 	return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64);
 }
 
-static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
+static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+			       __be16 vlan_proto)
 {
 	struct mlx4_en_priv *en_priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = en_priv->mdev;
 
-	return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos);
+	return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos,
+				vlan_proto);
 }
 
 static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
@@ -2642,12 +2645,16 @@
 			if (IS_ERR(prog))
 				return PTR_ERR(prog);
 		}
+		mutex_lock(&mdev->state_lock);
 		for (i = 0; i < priv->rx_ring_num; i++) {
-			/* This xchg is paired with READ_ONCE in the fastpath */
-			old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+			old_prog = rcu_dereference_protected(
+					priv->rx_ring[i]->xdp_prog,
+					lockdep_is_held(&mdev->state_lock));
+			rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
 			if (old_prog)
 				bpf_prog_put(old_prog);
 		}
+		mutex_unlock(&mdev->state_lock);
 		return 0;
 	}
 
@@ -2680,7 +2687,10 @@
 							priv->xdp_ring_num);
 
 	for (i = 0; i < priv->rx_ring_num; i++) {
-		old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog);
+		old_prog = rcu_dereference_protected(
+					priv->rx_ring[i]->xdp_prog,
+					lockdep_is_held(&mdev->state_lock));
+		rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog);
 		if (old_prog)
 			bpf_prog_put(old_prog);
 	}
@@ -3048,9 +3058,6 @@
 	struct mlx4_en_priv *priv;
 	int i;
 	int err;
-#ifdef CONFIG_MLX4_EN_DCB
-	struct tc_configuration *tc;
-#endif
 
 	dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
 				 MAX_TX_RINGS, MAX_RX_RINGS);
@@ -3117,16 +3124,13 @@
 	priv->msg_enable = MLX4_EN_MSG_LEVEL;
 #ifdef CONFIG_MLX4_EN_DCB
 	if (!mlx4_is_slave(priv->mdev->dev)) {
-		priv->cee_params.dcbx_cap = DCB_CAP_DCBX_VER_CEE |
-					    DCB_CAP_DCBX_HOST |
-					    DCB_CAP_DCBX_VER_IEEE;
+		priv->dcbx_cap = DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_HOST |
+			DCB_CAP_DCBX_VER_IEEE;
 		priv->flags |= MLX4_EN_DCB_ENABLED;
-		priv->cee_params.dcb_cfg.pfc_state = false;
+		priv->cee_config.pfc_state = false;
 
-		for (i = 0; i < MLX4_EN_NUM_UP; i++) {
-			tc = &priv->cee_params.dcb_cfg.tc_config[i];
-			tc->dcb_pfc = pfc_disabled;
-		}
+		for (i = 0; i < MLX4_EN_NUM_UP; i++)
+			priv->cee_config.dcb_pfc[i] = pfc_disabled;
 
 		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) {
 			dev->dcbnl_ops = &mlx4_en_dcbnl_ops;
@@ -3222,6 +3226,7 @@
 	}
 
 	if (mlx4_is_slave(mdev->dev)) {
+		bool vlan_offload_disabled;
 		int phv;
 
 		err = get_phv_bit(mdev->dev, port, &phv);
@@ -3229,6 +3234,18 @@
 			dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX;
 			priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV;
 		}
+		err = mlx4_get_is_vlan_offload_disabled(mdev->dev, port,
+							&vlan_offload_disabled);
+		if (!err && vlan_offload_disabled) {
+			dev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+					      NETIF_F_HW_VLAN_CTAG_RX |
+					      NETIF_F_HW_VLAN_STAG_TX |
+					      NETIF_F_HW_VLAN_STAG_RX);
+			dev->features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+					   NETIF_F_HW_VLAN_CTAG_RX |
+					   NETIF_F_HW_VLAN_STAG_TX |
+					   NETIF_F_HW_VLAN_STAG_RX);
+		}
 	} else {
 		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN &&
 		    !(mdev->dev->caps.flags2 &
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 2040dad..f2e8bed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -72,7 +72,7 @@
 	}
 	dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
 			   frag_info->dma_dir);
-	if (dma_mapping_error(priv->ddev, dma)) {
+	if (unlikely(dma_mapping_error(priv->ddev, dma))) {
 		put_page(page);
 		return -ENOMEM;
 	}
@@ -108,7 +108,8 @@
 		    ring_alloc[i].page_size)
 			continue;
 
-		if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp))
+		if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
+					      frag_info, gfp)))
 			goto out;
 	}
 
@@ -537,7 +538,9 @@
 	struct mlx4_en_rx_ring *ring = *pring;
 	struct bpf_prog *old_prog;
 
-	old_prog = READ_ONCE(ring->xdp_prog);
+	old_prog = rcu_dereference_protected(
+					ring->xdp_prog,
+					lockdep_is_held(&mdev->state_lock));
 	if (old_prog)
 		bpf_prog_put(old_prog);
 	mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
@@ -583,7 +586,7 @@
 		frag_info = &priv->frag_info[nr];
 		if (length <= frag_info->frag_prefix_size)
 			break;
-		if (!frags[nr].page)
+		if (unlikely(!frags[nr].page))
 			goto fail;
 
 		dma = be64_to_cpu(rx_desc->data[nr].addr);
@@ -623,7 +626,7 @@
 	dma_addr_t dma;
 
 	skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN);
-	if (!skb) {
+	if (unlikely(!skb)) {
 		en_dbg(RX_ERR, priv, "Failed allocating skb\n");
 		return NULL;
 	}
@@ -734,7 +737,8 @@
 {
 	__wsum csum_pseudo_hdr = 0;
 
-	if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS)
+	if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT ||
+		     ipv6h->nexthdr == IPPROTO_HOPOPTS))
 		return -1;
 	hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr));
 
@@ -767,7 +771,7 @@
 		get_fixed_ipv4_csum(hw_checksum, skb, hdr);
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
-		if (get_fixed_ipv6_csum(hw_checksum, skb, hdr))
+		if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr)))
 			return -1;
 #endif
 	return 0;
@@ -794,13 +798,15 @@
 	u64 timestamp;
 	bool l2_tunnel;
 
-	if (!priv->port_up)
+	if (unlikely(!priv->port_up))
 		return 0;
 
-	if (budget <= 0)
+	if (unlikely(budget <= 0))
 		return polled;
 
-	xdp_prog = READ_ONCE(ring->xdp_prog);
+	/* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
+	rcu_read_lock();
+	xdp_prog = rcu_dereference(ring->xdp_prog);
 	doorbell_pending = 0;
 	tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
 
@@ -858,15 +864,11 @@
 				/* Drop the packet, since HW loopback-ed it */
 				mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
 				bucket = &priv->mac_hash[mac_hash];
-				rcu_read_lock();
 				hlist_for_each_entry_rcu(entry, bucket, hlist) {
 					if (ether_addr_equal_64bits(entry->mac,
-								    ethh->h_source)) {
-						rcu_read_unlock();
+								    ethh->h_source))
 						goto next;
-					}
 				}
-				rcu_read_unlock();
 			}
 		}
 
@@ -902,16 +904,17 @@
 			case XDP_PASS:
 				break;
 			case XDP_TX:
-				if (!mlx4_en_xmit_frame(frags, dev,
+				if (likely(!mlx4_en_xmit_frame(frags, dev,
 							length, tx_index,
-							&doorbell_pending))
+							&doorbell_pending)))
 					goto consumed;
-				break;
+				goto xdp_drop; /* Drop on xmit failure */
 			default:
 				bpf_warn_invalid_xdp_action(act);
 			case XDP_ABORTED:
 			case XDP_DROP:
-				if (mlx4_en_rx_recycle(ring, frags))
+xdp_drop:
+				if (likely(mlx4_en_rx_recycle(ring, frags)))
 					goto consumed;
 				goto next;
 			}
@@ -1015,12 +1018,12 @@
 
 		/* GRO not possible, complete processing here */
 		skb = mlx4_en_rx_skb(priv, rx_desc, frags, length);
-		if (!skb) {
+		if (unlikely(!skb)) {
 			ring->dropped++;
 			goto next;
 		}
 
-                if (unlikely(priv->validate_loopback)) {
+		if (unlikely(priv->validate_loopback)) {
 			validate_loopback(priv, skb);
 			goto next;
 		}
@@ -1077,6 +1080,7 @@
 	}
 
 out:
+	rcu_read_unlock();
 	if (doorbell_pending)
 		mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 9df87ca..e2509bb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -818,7 +818,7 @@
 	real_size = get_real_size(skb, shinfo, dev, &lso_header_size,
 				  &inline_ok, &fragptr);
 	if (unlikely(!real_size))
-		goto tx_drop;
+		goto tx_drop_count;
 
 	/* Align descriptor to TXBB size */
 	desc_size = ALIGN(real_size, TXBB_SIZE);
@@ -826,7 +826,7 @@
 	if (unlikely(nr_txbb > MAX_DESC_TXBBS)) {
 		if (netif_msg_tx_err(priv))
 			en_warn(priv, "Oversized header or SG list\n");
-		goto tx_drop;
+		goto tx_drop_count;
 	}
 
 	bf_ok = ring->bf_enabled;
@@ -1071,9 +1071,10 @@
 			       PCI_DMA_TODEVICE);
 	}
 
+tx_drop_count:
+	ring->tx_dropped++;
 tx_drop:
 	dev_kfree_skb_any(skb);
-	ring->tx_dropped++;
 	return NETDEV_TX_OK;
 }
 
@@ -1106,7 +1107,7 @@
 		goto tx_drop;
 
 	if (mlx4_en_is_tx_ring_full(ring))
-		goto tx_drop;
+		goto tx_drop_count;
 
 	/* fetch ring->cons far ahead before needing it to avoid stall */
 	ring_cons = READ_ONCE(ring->cons);
@@ -1176,7 +1177,8 @@
 
 	return NETDEV_TX_OK;
 
-tx_drop:
+tx_drop_count:
 	ring->tx_dropped++;
+tx_drop:
 	return NETDEV_TX_BUSY;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index f613977..cf8f8a7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1305,8 +1305,8 @@
 	return 0;
 
 err_out_unmap:
-	while (i >= 0)
-		mlx4_free_eq(dev, &priv->eq_table.eq[i--]);
+	while (i > 0)
+		mlx4_free_eq(dev, &priv->eq_table.eq[--i]);
 #ifdef CONFIG_RFS_ACCEL
 	for (i = 1; i <= dev->caps.num_ports; i++) {
 		if (mlx4_priv(dev)->port[i].rmap) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index d728704..f9cbc67 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -158,7 +158,8 @@
 		[31] = "Modifying loopback source checks using UPDATE_QP support",
 		[32] = "Loopback source checks support",
 		[33] = "RoCEv2 support",
-		[34] = "DMFS Sniffer support (UC & MC)"
+		[34] = "DMFS Sniffer support (UC & MC)",
+		[35] = "QinQ VST mode support",
 	};
 	int i;
 
@@ -248,6 +249,72 @@
 	return err;
 }
 
+static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port)
+{
+	struct mlx4_vport_oper_state *vp_oper;
+	struct mlx4_vport_state *vp_admin;
+	int err;
+
+	vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+	vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+
+	if (vp_admin->default_vlan != vp_oper->state.default_vlan) {
+		err = __mlx4_register_vlan(&priv->dev, port,
+					   vp_admin->default_vlan,
+					   &vp_oper->vlan_idx);
+		if (err) {
+			vp_oper->vlan_idx = NO_INDX;
+			mlx4_warn(&priv->dev,
+				  "No vlan resources slave %d, port %d\n",
+				  slave, port);
+			return err;
+		}
+		mlx4_dbg(&priv->dev, "alloc vlan %d idx  %d slave %d port %d\n",
+			 (int)(vp_oper->state.default_vlan),
+			 vp_oper->vlan_idx, slave, port);
+	}
+	vp_oper->state.vlan_proto   = vp_admin->vlan_proto;
+	vp_oper->state.default_vlan = vp_admin->default_vlan;
+	vp_oper->state.default_qos  = vp_admin->default_qos;
+
+	return 0;
+}
+
+static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port)
+{
+	struct mlx4_vport_oper_state *vp_oper;
+	struct mlx4_slave_state *slave_state;
+	struct mlx4_vport_state *vp_admin;
+	int err;
+
+	vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+	vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
+	slave_state = &priv->mfunc.master.slave_state[slave];
+
+	if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) ||
+	    (!slave_state->active))
+		return 0;
+
+	if (vp_oper->state.vlan_proto == vp_admin->vlan_proto &&
+	    vp_oper->state.default_vlan == vp_admin->default_vlan &&
+	    vp_oper->state.default_qos == vp_admin->default_qos)
+		return 0;
+
+	if (!slave_state->vst_qinq_supported) {
+		/* Warn and revert the request to set vst QinQ mode */
+		vp_admin->vlan_proto   = vp_oper->state.vlan_proto;
+		vp_admin->default_vlan = vp_oper->state.default_vlan;
+		vp_admin->default_qos  = vp_oper->state.default_qos;
+
+		mlx4_warn(&priv->dev,
+			  "Slave %d does not support VST QinQ mode\n", slave);
+		return 0;
+	}
+
+	err = mlx4_activate_vst_qinq(priv, slave, port);
+	return err;
+}
+
 int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_vhcr *vhcr,
 				struct mlx4_cmd_mailbox *inbox,
@@ -311,14 +378,18 @@
 #define QUERY_FUNC_CAP_VF_ENABLE_QP0		0x08
 
 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
-#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31)
 #define QUERY_FUNC_CAP_PHV_BIT			0x40
+#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE	0x20
+
+#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ	BIT(30)
+#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31)
 
 	if (vhcr->op_modifier == 1) {
 		struct mlx4_active_ports actv_ports =
 			mlx4_get_active_ports(dev, slave);
 		int converted_port = mlx4_slave_convert_port(
 				dev, slave, vhcr->in_modifier);
+		struct mlx4_vport_oper_state *vp_oper;
 
 		if (converted_port < 0)
 			return -EINVAL;
@@ -357,15 +428,24 @@
 		MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
 			 QUERY_FUNC_CAP_PHYS_PORT_ID);
 
-		if (dev->caps.phv_bit[port]) {
-			field = QUERY_FUNC_CAP_PHV_BIT;
-			MLX4_PUT(outbox->buf, field,
-				 QUERY_FUNC_CAP_FLAGS0_OFFSET);
-		}
+		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
+		err = mlx4_handle_vst_qinq(priv, slave, port);
+		if (err)
+			return err;
+
+		field = 0;
+		if (dev->caps.phv_bit[port])
+			field |= QUERY_FUNC_CAP_PHV_BIT;
+		if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD))
+			field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE;
+		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET);
 
 	} else if (vhcr->op_modifier == 0) {
 		struct mlx4_active_ports actv_ports =
 			mlx4_get_active_ports(dev, slave);
+		struct mlx4_slave_state *slave_state =
+			&priv->mfunc.master.slave_state[slave];
+
 		/* enable rdma and ethernet interfaces, new quota locations,
 		 * and reserved lkey
 		 */
@@ -439,6 +519,10 @@
 
 		size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00);
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET);
+
+		if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ)
+			slave_state->vst_qinq_supported = true;
+
 	} else
 		err = -EINVAL;
 
@@ -454,10 +538,12 @@
 	u32			size, qkey;
 	int			err = 0, quotas = 0;
 	u32                     in_modifier;
+	u32			slave_caps;
 
 	op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
-	in_modifier = op_modifier ? gen_or_port :
+	slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ |
 		QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS;
+	in_modifier = op_modifier ? gen_or_port : slave_caps;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
@@ -612,8 +698,7 @@
 		MLX4_GET(func_cap->phys_port_id, outbox,
 			 QUERY_FUNC_CAP_PHYS_PORT_ID);
 
-	MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
-	func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT);
+	MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET);
 
 	/* All other resources are allocated by the master, but we still report
 	 * 'num' and 'reserved' capabilities as follows:
@@ -690,6 +775,7 @@
 #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET	0x52
 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET		0x55
 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET	0x56
+#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET	0x5D
 #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET		0x61
 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET		0x62
 #define QUERY_DEV_CAP_MAX_MCG_OFFSET		0x63
@@ -767,12 +853,8 @@
 	dev_cap->max_eqs = 1 << (field & 0xf);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
 	dev_cap->reserved_mtts = 1 << (field >> 4);
-	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET);
-	dev_cap->max_mrw_sz = 1 << field;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET);
 	dev_cap->reserved_mrws = 1 << (field & 0xf);
-	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET);
-	dev_cap->max_mtt_seg = 1 << (field & 0x3f);
 	MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET);
 	dev_cap->num_sys_eqs = size & 0xfff;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET);
@@ -857,6 +939,9 @@
 	MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET);
 	dev_cap->max_sq_desc_sz = size;
 
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET);
+	if (field & 0x1)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET);
 	dev_cap->max_qp_per_mcg = 1 << field;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET);
@@ -2914,7 +2999,7 @@
 	memset(&func_cap, 0, sizeof(func_cap));
 	err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap);
 	if (!err)
-		*phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT;
+		*phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT;
 	return err;
 }
 EXPORT_SYMBOL(get_phv_bit);
@@ -2938,6 +3023,22 @@
 }
 EXPORT_SYMBOL(set_phv_bit);
 
+int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
+				      bool *vlan_offload_disabled)
+{
+	struct mlx4_func_cap func_cap;
+	int err;
+
+	memset(&func_cap, 0, sizeof(func_cap));
+	err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap);
+	if (!err)
+		*vlan_offload_disabled =
+			!!(func_cap.flags0 &
+			   QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled);
+
 void mlx4_replace_zero_macs(struct mlx4_dev *dev)
 {
 	int i;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index cdbd76f..5343a05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -80,9 +80,7 @@
 	int max_eqs;
 	int num_sys_eqs;
 	int reserved_mtts;
-	int max_mrw_sz;
 	int reserved_mrws;
-	int max_mtt_seg;
 	int max_requester_per_qp;
 	int max_responder_per_qp;
 	int max_rdma_global;
@@ -152,7 +150,7 @@
 	u32	qp1_proxy_qpn;
 	u32	reserved_lkey;
 	u8	physical_port;
-	u8	port_flags;
+	u8	flags0;
 	u8	flags1;
 	u64	phys_port_id;
 	u32	extra_flags;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 75dd2e3..7183ac4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2970,6 +2970,7 @@
 		mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
 		device_remove_file(&info->dev->persist->pdev->dev,
 				   &info->port_attr);
+		devlink_port_unregister(&info->devlink_port);
 		info->port = -1;
 	}
 
@@ -2984,6 +2985,8 @@
 	device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
 	device_remove_file(&info->dev->persist->pdev->dev,
 			   &info->port_mtu_attr);
+	devlink_port_unregister(&info->devlink_port);
+
 #ifdef CONFIG_RFS_ACCEL
 	free_irq_cpu_rmap(info->rmap);
 	info->rmap = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index c9d7fc51..e4878f3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -46,6 +46,7 @@
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
 #include <net/devlink.h>
+#include <linux/rwsem.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/driver.h>
@@ -482,6 +483,7 @@
 	u8 init_port_mask;
 	bool active;
 	bool old_vlan_api;
+	bool vst_qinq_supported;
 	u8 function;
 	dma_addr_t vhcr_dma;
 	u16 mtu[MLX4_MAX_PORTS + 1];
@@ -507,6 +509,7 @@
 	u64 mac;
 	u16 default_vlan;
 	u8  default_qos;
+	__be16 vlan_proto;
 	u32 tx_rate;
 	bool spoofchk;
 	u32 link_state;
@@ -627,6 +630,7 @@
 	struct mutex		slave_cmd_mutex;
 	struct semaphore	poll_sem;
 	struct semaphore	event_sem;
+	struct rw_semaphore	switch_sem;
 	int			max_cmds;
 	spinlock_t		context_lock;
 	int			free_head;
@@ -655,6 +659,7 @@
 	u8                      qos_vport;
 	u16			vlan_id;
 	u16			orig_vlan_id;
+	__be16			vlan_proto;
 };
 
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 2c2913d..a3528dd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -340,7 +340,7 @@
 	u8  fcs_del;
 	void *buf;
 	void *rx_info;
-	struct bpf_prog *xdp_prog;
+	struct bpf_prog __rcu *xdp_prog;
 	struct mlx4_en_page_cache page_cache;
 	unsigned long bytes;
 	unsigned long packets;
@@ -482,20 +482,10 @@
 	pfc_enabled_rx
 };
 
-struct tc_configuration {
-	enum dcb_pfc_type  dcb_pfc;
-};
-
 struct mlx4_en_cee_config {
 	bool	pfc_state;
-	struct	tc_configuration tc_config[MLX4_EN_NUM_UP];
+	enum	dcb_pfc_type dcb_pfc[MLX4_EN_NUM_UP];
 };
-
-struct mlx4_en_cee_params {
-	u8 dcbx_cap;
-	struct mlx4_en_cee_config dcb_cfg;
-};
-
 #endif
 
 struct ethtool_flow_id {
@@ -624,7 +614,8 @@
 	struct ieee_ets ets;
 	u16 maxrate[IEEE_8021QAZ_MAX_TCS];
 	enum dcbnl_cndd_states cndd_state[IEEE_8021QAZ_MAX_TCS];
-	struct mlx4_en_cee_params cee_params;
+	struct mlx4_en_cee_config cee_config;
+	u8 dcbx_cap;
 #endif
 #ifdef CONFIG_RFS_ACCEL
 	spinlock_t filters_lock;
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 3d2095e..c5b2064 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -52,7 +52,7 @@
 
 #define MLX4_FLAG_V_IGNORE_FCS_MASK		0x2
 #define MLX4_IGNORE_FCS_MASK			0x1
-#define MLNX4_TX_MAX_NUMBER			8
+#define MLX4_TC_MAX_NUMBER			8
 
 void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table)
 {
@@ -2022,7 +2022,7 @@
 	u8 num_tc = dev->caps.max_tc_eth;
 
 	if (!num_tc)
-		num_tc = MLNX4_TX_MAX_NUMBER;
+		num_tc = MLX4_TC_MAX_NUMBER;
 
 	return num_tc;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 8b81114..84d7857 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -790,10 +790,22 @@
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
 				MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
 		} else if (0 != vp_oper->state.default_vlan) {
-			qpc->pri_path.vlan_control |=
-				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
-				MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
-				MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+			if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) {
+				/* vst QinQ should block untagged on TX,
+				 * but cvlan is in payload and phv is set so
+				 * hw see it as untagged. Block tagged instead.
+				 */
+				qpc->pri_path.vlan_control |=
+					MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
+					MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+					MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+					MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+			} else { /* vst 802.1Q */
+				qpc->pri_path.vlan_control |=
+					MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+					MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+					MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+			}
 		} else { /* priority tagged */
 			qpc->pri_path.vlan_control |=
 				MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
@@ -802,7 +814,11 @@
 
 		qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN;
 		qpc->pri_path.vlan_index = vp_oper->vlan_idx;
-		qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN;
+		qpc->pri_path.fl |= MLX4_FL_ETH_HIDE_CQE_VLAN;
+		if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD))
+			qpc->pri_path.fl |= MLX4_FL_SV;
+		else
+			qpc->pri_path.fl |= MLX4_FL_CV;
 		qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
 		qpc->pri_path.sched_queue &= 0xC7;
 		qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3;
@@ -5238,6 +5254,7 @@
 	u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) |
 		       (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) |
 		       (1ULL << MLX4_UPD_QP_PATH_MASK_CV) |
+		       (1ULL << MLX4_UPD_QP_PATH_MASK_SV) |
 		       (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) |
 		       (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) |
 		       (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) |
@@ -5266,7 +5283,12 @@
 	else if (!work->vlan_id)
 		vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
 			MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
-	else
+	else if (work->vlan_proto == htons(ETH_P_8021AD))
+		vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
+			MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
+			MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
+			MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
+	else  /* vst 802.1Q */
 		vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
 			MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
 			MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
@@ -5311,7 +5333,11 @@
 				upd_context->qp_context.pri_path.fvl_rx =
 					qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN;
 				upd_context->qp_context.pri_path.fl =
-					qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN;
+					qp->pri_path_fl | MLX4_FL_ETH_HIDE_CQE_VLAN;
+				if (work->vlan_proto == htons(ETH_P_8021AD))
+					upd_context->qp_context.pri_path.fl |= MLX4_FL_SV;
+				else
+					upd_context->qp_context.pri_path.fl |= MLX4_FL_CV;
 				upd_context->qp_context.pri_path.feup =
 					qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN;
 				upd_context->qp_context.pri_path.sched_queue =
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 6714662..f44d089 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -45,15 +45,12 @@
 	struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
 	struct mlx4_srq *srq;
 
-	spin_lock(&srq_table->lock);
-
+	rcu_read_lock();
 	srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
+	rcu_read_unlock();
 	if (srq)
 		atomic_inc(&srq->refcount);
-
-	spin_unlock(&srq_table->lock);
-
-	if (!srq) {
+	else {
 		mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
 		return;
 	}
@@ -301,12 +298,11 @@
 {
 	struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
 	struct mlx4_srq *srq;
-	unsigned long flags;
 
-	spin_lock_irqsave(&srq_table->lock, flags);
+	rcu_read_lock();
 	srq = radix_tree_lookup(&srq_table->tree,
 				srqn & (dev->caps.num_srqs - 1));
-	spin_unlock_irqrestore(&srq_table->lock, flags);
+	rcu_read_unlock();
 
 	return srq;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 05cc1ef..0343725 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -3,7 +3,7 @@
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
-		fs_counters.o rl.o
+		fs_counters.o rl.o lag.o dev.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
 		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d6e2a1c..1e639f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -143,13 +143,14 @@
 	return cmd->cmd_buf + (idx << cmd->log_stride);
 }
 
-static u8 xor8_buf(void *buf, int len)
+static u8 xor8_buf(void *buf, size_t offset, int len)
 {
 	u8 *ptr = buf;
 	u8 sum = 0;
 	int i;
+	int end = len + offset;
 
-	for (i = 0; i < len; i++)
+	for (i = offset; i < end; i++)
 		sum ^= ptr[i];
 
 	return sum;
@@ -157,41 +158,49 @@
 
 static int verify_block_sig(struct mlx5_cmd_prot_block *block)
 {
-	if (xor8_buf(block->rsvd0, sizeof(*block) - sizeof(block->data) - 1) != 0xff)
+	size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+	int xor_len = sizeof(*block) - sizeof(block->data) - 1;
+
+	if (xor8_buf(block, rsvd0_off, xor_len) != 0xff)
 		return -EINVAL;
 
-	if (xor8_buf(block, sizeof(*block)) != 0xff)
+	if (xor8_buf(block, 0, sizeof(*block)) != 0xff)
 		return -EINVAL;
 
 	return 0;
 }
 
-static void calc_block_sig(struct mlx5_cmd_prot_block *block, u8 token,
-			   int csum)
+static void calc_block_sig(struct mlx5_cmd_prot_block *block)
 {
-	block->token = token;
-	if (csum) {
-		block->ctrl_sig = ~xor8_buf(block->rsvd0, sizeof(*block) -
-					    sizeof(block->data) - 2);
-		block->sig = ~xor8_buf(block, sizeof(*block) - 1);
-	}
+	int ctrl_xor_len = sizeof(*block) - sizeof(block->data) - 2;
+	size_t rsvd0_off = offsetof(struct mlx5_cmd_prot_block, rsvd0);
+
+	block->ctrl_sig = ~xor8_buf(block, rsvd0_off, ctrl_xor_len);
+	block->sig = ~xor8_buf(block, 0, sizeof(*block) - 1);
 }
 
-static void calc_chain_sig(struct mlx5_cmd_msg *msg, u8 token, int csum)
+static void calc_chain_sig(struct mlx5_cmd_msg *msg)
 {
 	struct mlx5_cmd_mailbox *next = msg->next;
+	int size = msg->len;
+	int blen = size - min_t(int, sizeof(msg->first.data), size);
+	int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
+		/ MLX5_CMD_DATA_BLOCK_SIZE;
+	int i = 0;
 
-	while (next) {
-		calc_block_sig(next->buf, token, csum);
+	for (i = 0; i < n && next; i++)  {
+		calc_block_sig(next->buf);
 		next = next->next;
 	}
 }
 
 static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
 {
-	ent->lay->sig = ~xor8_buf(ent->lay, sizeof(*ent->lay));
-	calc_chain_sig(ent->in, ent->token, csum);
-	calc_chain_sig(ent->out, ent->token, csum);
+	ent->lay->sig = ~xor8_buf(ent->lay, 0,  sizeof(*ent->lay));
+	if (csum) {
+		calc_chain_sig(ent->in);
+		calc_chain_sig(ent->out);
+	}
 }
 
 static void poll_timeout(struct mlx5_cmd_work_ent *ent)
@@ -222,12 +231,17 @@
 	struct mlx5_cmd_mailbox *next = ent->out->next;
 	int err;
 	u8 sig;
+	int size = ent->out->len;
+	int blen = size - min_t(int, sizeof(ent->out->first.data), size);
+	int n = (blen + MLX5_CMD_DATA_BLOCK_SIZE - 1)
+		/ MLX5_CMD_DATA_BLOCK_SIZE;
+	int i = 0;
 
-	sig = xor8_buf(ent->lay, sizeof(*ent->lay));
+	sig = xor8_buf(ent->lay, 0, sizeof(*ent->lay));
 	if (sig != 0xff)
 		return -EINVAL;
 
-	while (next) {
+	for (i = 0; i < n && next; i++) {
 		err = verify_block_sig(next->buf);
 		if (err)
 			return err;
@@ -280,11 +294,13 @@
 	case MLX5_CMD_OP_DEALLOC_Q_COUNTER:
 	case MLX5_CMD_OP_DEALLOC_PD:
 	case MLX5_CMD_OP_DEALLOC_UAR:
-	case MLX5_CMD_OP_DETTACH_FROM_MCG:
+	case MLX5_CMD_OP_DETACH_FROM_MCG:
 	case MLX5_CMD_OP_DEALLOC_XRCD:
 	case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN:
 	case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT:
 	case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_DESTROY_LAG:
+	case MLX5_CMD_OP_DESTROY_VPORT_LAG:
 	case MLX5_CMD_OP_DESTROY_TIR:
 	case MLX5_CMD_OP_DESTROY_SQ:
 	case MLX5_CMD_OP_DESTROY_RQ:
@@ -301,6 +317,7 @@
 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
+	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -375,6 +392,10 @@
 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
 	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
 	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_CREATE_LAG:
+	case MLX5_CMD_OP_MODIFY_LAG:
+	case MLX5_CMD_OP_QUERY_LAG:
+	case MLX5_CMD_OP_CREATE_VPORT_LAG:
 	case MLX5_CMD_OP_CREATE_TIR:
 	case MLX5_CMD_OP_MODIFY_TIR:
 	case MLX5_CMD_OP_QUERY_TIR:
@@ -402,6 +423,7 @@
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -490,7 +512,7 @@
 	MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION);
 	MLX5_COMMAND_STR_CASE(ACCESS_REG);
 	MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG);
-	MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG);
+	MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG);
 	MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG);
 	MLX5_COMMAND_STR_CASE(MAD_IFC);
 	MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX);
@@ -512,6 +534,12 @@
 	MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY);
 	MLX5_COMMAND_STR_CASE(SET_WOL_ROL);
 	MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL);
+	MLX5_COMMAND_STR_CASE(CREATE_LAG);
+	MLX5_COMMAND_STR_CASE(MODIFY_LAG);
+	MLX5_COMMAND_STR_CASE(QUERY_LAG);
+	MLX5_COMMAND_STR_CASE(DESTROY_LAG);
+	MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG);
+	MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG);
 	MLX5_COMMAND_STR_CASE(CREATE_TIR);
 	MLX5_COMMAND_STR_CASE(MODIFY_TIR);
 	MLX5_COMMAND_STR_CASE(DESTROY_TIR);
@@ -550,15 +578,130 @@
 	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
+	MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
+	MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
 	default: return "unknown command opcode";
 	}
 }
 
+static const char *cmd_status_str(u8 status)
+{
+	switch (status) {
+	case MLX5_CMD_STAT_OK:
+		return "OK";
+	case MLX5_CMD_STAT_INT_ERR:
+		return "internal error";
+	case MLX5_CMD_STAT_BAD_OP_ERR:
+		return "bad operation";
+	case MLX5_CMD_STAT_BAD_PARAM_ERR:
+		return "bad parameter";
+	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
+		return "bad system state";
+	case MLX5_CMD_STAT_BAD_RES_ERR:
+		return "bad resource";
+	case MLX5_CMD_STAT_RES_BUSY:
+		return "resource busy";
+	case MLX5_CMD_STAT_LIM_ERR:
+		return "limits exceeded";
+	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
+		return "bad resource state";
+	case MLX5_CMD_STAT_IX_ERR:
+		return "bad index";
+	case MLX5_CMD_STAT_NO_RES_ERR:
+		return "no resources";
+	case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
+		return "bad input length";
+	case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
+		return "bad output length";
+	case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
+		return "bad QP state";
+	case MLX5_CMD_STAT_BAD_PKT_ERR:
+		return "bad packet (discarded)";
+	case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
+		return "bad size too many outstanding CQEs";
+	default:
+		return "unknown status";
+	}
+}
+
+static int cmd_status_to_err(u8 status)
+{
+	switch (status) {
+	case MLX5_CMD_STAT_OK:				return 0;
+	case MLX5_CMD_STAT_INT_ERR:			return -EIO;
+	case MLX5_CMD_STAT_BAD_OP_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_BAD_PARAM_ERR:		return -EINVAL;
+	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:		return -EIO;
+	case MLX5_CMD_STAT_BAD_RES_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_RES_BUSY:			return -EBUSY;
+	case MLX5_CMD_STAT_LIM_ERR:			return -ENOMEM;
+	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:		return -EINVAL;
+	case MLX5_CMD_STAT_IX_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_NO_RES_ERR:			return -EAGAIN;
+	case MLX5_CMD_STAT_BAD_INP_LEN_ERR:		return -EIO;
+	case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:		return -EIO;
+	case MLX5_CMD_STAT_BAD_QP_STATE_ERR:		return -EINVAL;
+	case MLX5_CMD_STAT_BAD_PKT_ERR:			return -EINVAL;
+	case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:	return -EINVAL;
+	default:					return -EIO;
+	}
+}
+
+struct mlx5_ifc_mbox_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_mbox_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome)
+{
+	*status = MLX5_GET(mbox_out, out, status);
+	*syndrome = MLX5_GET(mbox_out, out, syndrome);
+}
+
+static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out)
+{
+	u32 syndrome;
+	u8  status;
+	u16 opcode;
+	u16 op_mod;
+
+	mlx5_cmd_mbox_status(out, &status, &syndrome);
+	if (!status)
+		return 0;
+
+	opcode = MLX5_GET(mbox_in, in, opcode);
+	op_mod = MLX5_GET(mbox_in, in, op_mod);
+
+	mlx5_core_err(dev,
+		      "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n",
+		      mlx5_command_str(opcode),
+		      opcode, op_mod,
+		      cmd_status_str(status),
+		      status,
+		      syndrome);
+
+	return cmd_status_to_err(status);
+}
+
 static void dump_command(struct mlx5_core_dev *dev,
 			 struct mlx5_cmd_work_ent *ent, int input)
 {
-	u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode);
 	struct mlx5_cmd_msg *msg = input ? ent->in : ent->out;
+	u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode);
 	struct mlx5_cmd_mailbox *next = msg->next;
 	int data_only;
 	u32 offset = 0;
@@ -608,9 +751,7 @@
 
 static u16 msg_to_opcode(struct mlx5_cmd_msg *in)
 {
-	struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data);
-
-	return be16_to_cpu(hdr->opcode);
+	return MLX5_GET(mbox_in, in->first.data, opcode);
 }
 
 static void cb_timeout_handler(struct work_struct *work)
@@ -656,7 +797,6 @@
 		spin_unlock_irqrestore(&cmd->alloc_lock, flags);
 	}
 
-	ent->token = alloc_token(cmd);
 	cmd->ent_arr[ent->idx] = ent;
 	lay = get_inst(cmd, ent->idx);
 	ent->lay = lay;
@@ -749,16 +889,6 @@
 	return err;
 }
 
-static __be32 *get_synd_ptr(struct mlx5_outbox_hdr *out)
-{
-	return &out->syndrome;
-}
-
-static u8 *get_status_ptr(struct mlx5_outbox_hdr *out)
-{
-	return &out->status;
-}
-
 /*  Notes:
  *    1. Callback functions may not sleep
  *    2. page queue commands do not support asynchrous completion
@@ -766,7 +896,8 @@
 static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 			   struct mlx5_cmd_msg *out, void *uout, int uout_size,
 			   mlx5_cmd_cbk_t callback,
-			   void *context, int page_queue, u8 *status)
+			   void *context, int page_queue, u8 *status,
+			   u8 token)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
 	struct mlx5_cmd_work_ent *ent;
@@ -783,6 +914,8 @@
 	if (IS_ERR(ent))
 		return PTR_ERR(ent);
 
+	ent->token = token;
+
 	if (!callback)
 		init_completion(&ent->done);
 
@@ -804,7 +937,7 @@
 		goto out_free;
 
 	ds = ent->ts2 - ent->ts1;
-	op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode);
+	op = MLX5_GET(mbox_in, in->first.data, opcode);
 	if (op < ARRAY_SIZE(cmd->stats)) {
 		stats = &cmd->stats[op];
 		spin_lock_irq(&stats->lock);
@@ -854,7 +987,8 @@
 	.write	= dbg_write,
 };
 
-static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size)
+static int mlx5_copy_to_msg(struct mlx5_cmd_msg *to, void *from, int size,
+			    u8 token)
 {
 	struct mlx5_cmd_prot_block *block;
 	struct mlx5_cmd_mailbox *next;
@@ -880,6 +1014,7 @@
 		memcpy(block->data, from, copy);
 		from += copy;
 		size -= copy;
+		block->token = token;
 		next = next->next;
 	}
 
@@ -949,7 +1084,8 @@
 }
 
 static struct mlx5_cmd_msg *mlx5_alloc_cmd_msg(struct mlx5_core_dev *dev,
-					       gfp_t flags, int size)
+					       gfp_t flags, int size,
+					       u8 token)
 {
 	struct mlx5_cmd_mailbox *tmp, *head = NULL;
 	struct mlx5_cmd_prot_block *block;
@@ -978,6 +1114,7 @@
 		tmp->next = head;
 		block->next = cpu_to_be64(tmp->next ? tmp->next->dma : 0);
 		block->block_num = cpu_to_be32(n - i - 1);
+		block->token = token;
 		head = tmp;
 	}
 	msg->next = head;
@@ -1015,7 +1152,6 @@
 	struct mlx5_core_dev *dev = filp->private_data;
 	struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
 	void *ptr;
-	int err;
 
 	if (*pos != 0)
 		return -EINVAL;
@@ -1023,25 +1159,15 @@
 	kfree(dbg->in_msg);
 	dbg->in_msg = NULL;
 	dbg->inlen = 0;
-
-	ptr = kzalloc(count, GFP_KERNEL);
-	if (!ptr)
-		return -ENOMEM;
-
-	if (copy_from_user(ptr, buf, count)) {
-		err = -EFAULT;
-		goto out;
-	}
+	ptr = memdup_user(buf, count);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
 	dbg->in_msg = ptr;
 	dbg->inlen = count;
 
 	*pos = count;
 
 	return count;
-
-out:
-	kfree(ptr);
-	return err;
 }
 
 static ssize_t data_read(struct file *filp, char __user *buf, size_t count,
@@ -1301,11 +1427,16 @@
 				callback = ent->callback;
 				context = ent->context;
 				err = ent->ret;
-				if (!err)
+				if (!err) {
 					err = mlx5_copy_from_msg(ent->uout,
 								 ent->out,
 								 ent->uout_size);
 
+					err = err ? err : mlx5_cmd_check(dev,
+									ent->in->first.data,
+									ent->uout);
+				}
+
 				mlx5_free_cmd_msg(dev, ent->out);
 				free_msg(dev, ent->in);
 
@@ -1352,19 +1483,14 @@
 	}
 
 	if (IS_ERR(msg))
-		msg = mlx5_alloc_cmd_msg(dev, gfp, in_size);
+		msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
 
 	return msg;
 }
 
-static u16 opcode_from_in(struct mlx5_inbox_hdr *in)
+static int is_manage_pages(void *in)
 {
-	return be16_to_cpu(in->opcode);
-}
-
-static int is_manage_pages(struct mlx5_inbox_hdr *in)
-{
-	return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES;
+	return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES;
 }
 
 static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
@@ -1377,12 +1503,15 @@
 	int err;
 	u8 status = 0;
 	u32 drv_synd;
+	u8 token;
 
 	if (pci_channel_offline(dev->pdev) ||
 	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-		err = mlx5_internal_err_ret_value(dev, opcode_from_in(in), &drv_synd, &status);
-		*get_synd_ptr(out) = cpu_to_be32(drv_synd);
-		*get_status_ptr(out) = status;
+		u16 opcode = MLX5_GET(mbox_in, in, opcode);
+
+		err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
+		MLX5_SET(mbox_out, out, status, status);
+		MLX5_SET(mbox_out, out, syndrome, drv_synd);
 		return err;
 	}
 
@@ -1395,20 +1524,22 @@
 		return err;
 	}
 
-	err = mlx5_copy_to_msg(inb, in, in_size);
+	token = alloc_token(&dev->cmd);
+
+	err = mlx5_copy_to_msg(inb, in, in_size, token);
 	if (err) {
 		mlx5_core_warn(dev, "err %d\n", err);
 		goto out_in;
 	}
 
-	outb = mlx5_alloc_cmd_msg(dev, gfp, out_size);
+	outb = mlx5_alloc_cmd_msg(dev, gfp, out_size, token);
 	if (IS_ERR(outb)) {
 		err = PTR_ERR(outb);
 		goto out_in;
 	}
 
 	err = mlx5_cmd_invoke(dev, inb, outb, out, out_size, callback, context,
-			      pages_queue, &status);
+			      pages_queue, &status, token);
 	if (err)
 		goto out_out;
 
@@ -1434,7 +1565,10 @@
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size)
 {
-	return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
+	int err;
+
+	err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL);
+	return err ? : mlx5_cmd_check(dev, in, out);
 }
 EXPORT_SYMBOL(mlx5_cmd_exec);
 
@@ -1476,7 +1610,7 @@
 	INIT_LIST_HEAD(&cmd->cache.med.head);
 
 	for (i = 0; i < NUM_LONG_LISTS; i++) {
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE);
+		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
 		if (IS_ERR(msg)) {
 			err = PTR_ERR(msg);
 			goto ex_err;
@@ -1486,7 +1620,7 @@
 	}
 
 	for (i = 0; i < NUM_MED_LISTS; i++) {
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE);
+		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
 		if (IS_ERR(msg)) {
 			err = PTR_ERR(msg);
 			goto ex_err;
@@ -1671,96 +1805,3 @@
 	pci_pool_destroy(cmd->pool);
 }
 EXPORT_SYMBOL(mlx5_cmd_cleanup);
-
-static const char *cmd_status_str(u8 status)
-{
-	switch (status) {
-	case MLX5_CMD_STAT_OK:
-		return "OK";
-	case MLX5_CMD_STAT_INT_ERR:
-		return "internal error";
-	case MLX5_CMD_STAT_BAD_OP_ERR:
-		return "bad operation";
-	case MLX5_CMD_STAT_BAD_PARAM_ERR:
-		return "bad parameter";
-	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:
-		return "bad system state";
-	case MLX5_CMD_STAT_BAD_RES_ERR:
-		return "bad resource";
-	case MLX5_CMD_STAT_RES_BUSY:
-		return "resource busy";
-	case MLX5_CMD_STAT_LIM_ERR:
-		return "limits exceeded";
-	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:
-		return "bad resource state";
-	case MLX5_CMD_STAT_IX_ERR:
-		return "bad index";
-	case MLX5_CMD_STAT_NO_RES_ERR:
-		return "no resources";
-	case MLX5_CMD_STAT_BAD_INP_LEN_ERR:
-		return "bad input length";
-	case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:
-		return "bad output length";
-	case MLX5_CMD_STAT_BAD_QP_STATE_ERR:
-		return "bad QP state";
-	case MLX5_CMD_STAT_BAD_PKT_ERR:
-		return "bad packet (discarded)";
-	case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:
-		return "bad size too many outstanding CQEs";
-	default:
-		return "unknown status";
-	}
-}
-
-static int cmd_status_to_err(u8 status)
-{
-	switch (status) {
-	case MLX5_CMD_STAT_OK:				return 0;
-	case MLX5_CMD_STAT_INT_ERR:			return -EIO;
-	case MLX5_CMD_STAT_BAD_OP_ERR:			return -EINVAL;
-	case MLX5_CMD_STAT_BAD_PARAM_ERR:		return -EINVAL;
-	case MLX5_CMD_STAT_BAD_SYS_STATE_ERR:		return -EIO;
-	case MLX5_CMD_STAT_BAD_RES_ERR:			return -EINVAL;
-	case MLX5_CMD_STAT_RES_BUSY:			return -EBUSY;
-	case MLX5_CMD_STAT_LIM_ERR:			return -ENOMEM;
-	case MLX5_CMD_STAT_BAD_RES_STATE_ERR:		return -EINVAL;
-	case MLX5_CMD_STAT_IX_ERR:			return -EINVAL;
-	case MLX5_CMD_STAT_NO_RES_ERR:			return -EAGAIN;
-	case MLX5_CMD_STAT_BAD_INP_LEN_ERR:		return -EIO;
-	case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR:		return -EIO;
-	case MLX5_CMD_STAT_BAD_QP_STATE_ERR:		return -EINVAL;
-	case MLX5_CMD_STAT_BAD_PKT_ERR:			return -EINVAL;
-	case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR:	return -EINVAL;
-	default:					return -EIO;
-	}
-}
-
-/* this will be available till all the commands use set/get macros */
-int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
-{
-	if (!hdr->status)
-		return 0;
-
-	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
-		cmd_status_str(hdr->status), hdr->status,
-		be32_to_cpu(hdr->syndrome));
-
-	return cmd_status_to_err(hdr->status);
-}
-
-int mlx5_cmd_status_to_err_v2(void *ptr)
-{
-	u32	syndrome;
-	u8	status;
-
-	status = be32_to_cpu(*(__be32 *)ptr) >> 24;
-	if (!status)
-		return 0;
-
-	syndrome = be32_to_cpu(*(__be32 *)(ptr + 4));
-
-	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
-		cmd_status_str(status), status, syndrome);
-
-	return cmd_status_to_err(status);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 873a631..32d4af9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -134,33 +134,29 @@
 		complete(&cq->free);
 }
 
-
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-			struct mlx5_create_cq_mbox_in *in, int inlen)
+			u32 *in, int inlen)
 {
-	int err;
 	struct mlx5_cq_table *table = &dev->priv.cq_table;
-	struct mlx5_create_cq_mbox_out out;
-	struct mlx5_destroy_cq_mbox_in din;
-	struct mlx5_destroy_cq_mbox_out dout;
+	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
+	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
 	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context),
 			   c_eqn);
 	struct mlx5_eq *eq;
+	int err;
 
 	eq = mlx5_eqn2eq(dev, eqn);
 	if (IS_ERR(eq))
 		return PTR_ERR(eq);
 
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ);
-	memset(&out, 0, sizeof(out));
-	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	memset(out, 0, sizeof(out));
+	MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ);
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (err)
 		return err;
 
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	cq->cqn = be32_to_cpu(out.cqn) & 0xffffff;
+	cq->cqn = MLX5_GET(create_cq_out, out, cqn);
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
 	atomic_set(&cq->refcount, 1);
@@ -186,10 +182,11 @@
 	return 0;
 
 err_cmd:
-	memset(&din, 0, sizeof(din));
-	memset(&dout, 0, sizeof(dout));
-	din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
-	mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout));
+	memset(din, 0, sizeof(din));
+	memset(dout, 0, sizeof(dout));
+	MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+	MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
 EXPORT_SYMBOL(mlx5_core_create_cq);
@@ -197,8 +194,8 @@
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 {
 	struct mlx5_cq_table *table = &dev->priv.cq_table;
-	struct mlx5_destroy_cq_mbox_in in;
-	struct mlx5_destroy_cq_mbox_out out;
+	u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
 	struct mlx5_core_cq *tmp;
 	int err;
 
@@ -214,17 +211,12 @@
 		return -EINVAL;
 	}
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ);
-	in.cqn = cpu_to_be32(cq->cqn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
+	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
 
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
 	synchronize_irq(cq->irqn);
 
 	mlx5_debug_cq_remove(dev, cq);
@@ -237,44 +229,23 @@
 EXPORT_SYMBOL(mlx5_core_destroy_cq);
 
 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-		       struct mlx5_query_cq_mbox_out *out)
+		       u32 *out, int outlen)
 {
-	struct mlx5_query_cq_mbox_in in;
-	int err;
+	u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(out, 0, sizeof(*out));
-
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ);
-	in.cqn = cpu_to_be32(cq->cqn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
-	if (err)
-		return err;
-
-	if (out->hdr.status)
-		return mlx5_cmd_status_to_err(&out->hdr);
-
-	return err;
+	MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ);
+	MLX5_SET(query_cq_in, in, cqn, cq->cqn);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_cq);
 
-
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-			struct mlx5_modify_cq_mbox_in *in, int in_sz)
+			u32 *in, int inlen)
 {
-	struct mlx5_modify_cq_mbox_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
 
-	memset(&out, 0, sizeof(out));
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
-	err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	return 0;
+	MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq);
 
@@ -283,18 +254,20 @@
 				   u16 cq_period,
 				   u16 cq_max_count)
 {
-	struct mlx5_modify_cq_mbox_in in;
+	u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0};
+	void *cqc;
 
-	memset(&in, 0, sizeof(in));
+	MLX5_SET(modify_cq_in, in, cqn, cq->cqn);
+	cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context);
+	MLX5_SET(cqc, cqc, cq_period, cq_period);
+	MLX5_SET(cqc, cqc, cq_max_count, cq_max_count);
+	MLX5_SET(modify_cq_in, in,
+		 modify_field_select_resize_field_select.modify_field_select.modify_field_select,
+		 MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
 
-	in.cqn              = cpu_to_be32(cq->cqn);
-	in.ctx.cq_period    = cpu_to_be16(cq_period);
-	in.ctx.cq_max_count = cpu_to_be16(cq_max_count);
-	in.field_select     = cpu_to_be32(MLX5_CQ_MODIFY_PERIOD |
-					  MLX5_CQ_MODIFY_COUNT);
-
-	return mlx5_core_modify_cq(dev, cq, &in, sizeof(in));
+	return mlx5_core_modify_cq(dev, cq, in, sizeof(in));
 }
+EXPORT_SYMBOL(mlx5_core_modify_cq_moderation);
 
 int mlx5_init_cq_table(struct mlx5_core_dev *dev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 5210d92..e94a953 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -277,24 +277,28 @@
 static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
 			 int index, int *is_str)
 {
-	struct mlx5_query_qp_mbox_out *out;
+	int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
 	struct mlx5_qp_context *ctx;
 	u64 param = 0;
+	u32 *out;
 	int err;
 	int no_sq;
 
-	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	out = kzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return param;
 
-	err = mlx5_core_qp_query(dev, qp, out, sizeof(*out));
+	err = mlx5_core_qp_query(dev, qp, out, outlen);
 	if (err) {
-		mlx5_core_warn(dev, "failed to query qp\n");
+		mlx5_core_warn(dev, "failed to query qp err=%d\n", err);
 		goto out;
 	}
 
 	*is_str = 0;
-	ctx = &out->ctx;
+
+	/* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
+	ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc);
+
 	switch (index) {
 	case QP_PID:
 		param = qp->pid;
@@ -358,32 +362,32 @@
 static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 			 int index)
 {
-	struct mlx5_query_eq_mbox_out *out;
-	struct mlx5_eq_context *ctx;
+	int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
 	u64 param = 0;
+	void *ctx;
+	u32 *out;
 	int err;
 
-	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	out = kzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return param;
 
-	ctx = &out->ctx;
-
-	err = mlx5_core_eq_query(dev, eq, out, sizeof(*out));
+	err = mlx5_core_eq_query(dev, eq, out, outlen);
 	if (err) {
 		mlx5_core_warn(dev, "failed to query eq\n");
 		goto out;
 	}
+	ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry);
 
 	switch (index) {
 	case EQ_NUM_EQES:
-		param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+		param = 1 << MLX5_GET(eqc, ctx, log_eq_size);
 		break;
 	case EQ_INTR:
-		param = ctx->intr;
+		param = MLX5_GET(eqc, ctx, intr);
 		break;
 	case EQ_LOG_PG_SZ:
-		param = (ctx->log_page_size & 0x1f) + 12;
+		param = MLX5_GET(eqc, ctx, log_page_size) + 12;
 		break;
 	}
 
@@ -395,37 +399,37 @@
 static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			 int index)
 {
-	struct mlx5_query_cq_mbox_out *out;
-	struct mlx5_cq_context *ctx;
+	int outlen = MLX5_ST_SZ_BYTES(query_cq_out);
 	u64 param = 0;
+	void *ctx;
+	u32 *out;
 	int err;
 
-	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	out = mlx5_vzalloc(outlen);
 	if (!out)
 		return param;
 
-	ctx = &out->ctx;
-
-	err = mlx5_core_query_cq(dev, cq, out);
+	err = mlx5_core_query_cq(dev, cq, out, outlen);
 	if (err) {
 		mlx5_core_warn(dev, "failed to query cq\n");
 		goto out;
 	}
+	ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context);
 
 	switch (index) {
 	case CQ_PID:
 		param = cq->pid;
 		break;
 	case CQ_NUM_CQES:
-		param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f);
+		param = 1 << MLX5_GET(cqc, ctx, log_cq_size);
 		break;
 	case CQ_LOG_PG_SZ:
-		param = (ctx->log_pg_sz & 0x1f) + 12;
+		param = MLX5_GET(cqc, ctx, log_page_size);
 		break;
 	}
 
 out:
-	kfree(out);
+	kvfree(out);
 	return param;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
new file mode 100644
index 0000000..a9dbc28
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mlx5/driver.h>
+#include "mlx5_core.h"
+
+static LIST_HEAD(intf_list);
+static LIST_HEAD(mlx5_dev_list);
+/* intf dev list mutex */
+static DEFINE_MUTEX(mlx5_intf_mutex);
+
+struct mlx5_device_context {
+	struct list_head	list;
+	struct mlx5_interface  *intf;
+	void		       *context;
+	unsigned long		state;
+};
+
+enum {
+	MLX5_INTERFACE_ADDED,
+	MLX5_INTERFACE_ATTACHED,
+};
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+	struct mlx5_device_context *dev_ctx;
+	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	if (!mlx5_lag_intf_add(intf, priv))
+		return;
+
+	dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
+	if (!dev_ctx)
+		return;
+
+	dev_ctx->intf = intf;
+	dev_ctx->context = intf->add(dev);
+	set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+	if (intf->attach)
+		set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+
+	if (dev_ctx->context) {
+		spin_lock_irq(&priv->ctx_lock);
+		list_add_tail(&dev_ctx->list, &priv->ctx_list);
+		spin_unlock_irq(&priv->ctx_lock);
+	} else {
+		kfree(dev_ctx);
+	}
+}
+
+static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
+						   struct mlx5_priv *priv)
+{
+	struct mlx5_device_context *dev_ctx;
+
+	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+		if (dev_ctx->intf == intf)
+			return dev_ctx;
+	return NULL;
+}
+
+void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+	struct mlx5_device_context *dev_ctx;
+	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	dev_ctx = mlx5_get_device(intf, priv);
+	if (!dev_ctx)
+		return;
+
+	spin_lock_irq(&priv->ctx_lock);
+	list_del(&dev_ctx->list);
+	spin_unlock_irq(&priv->ctx_lock);
+
+	if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+		intf->remove(dev, dev_ctx->context);
+
+	kfree(dev_ctx);
+}
+
+static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+	struct mlx5_device_context *dev_ctx;
+	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	dev_ctx = mlx5_get_device(intf, priv);
+	if (!dev_ctx)
+		return;
+
+	if (intf->attach) {
+		if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
+			return;
+		intf->attach(dev, dev_ctx->context);
+		set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+	} else {
+		if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+			return;
+		dev_ctx->context = intf->add(dev);
+		set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+	}
+}
+
+void mlx5_attach_device(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_interface *intf;
+
+	mutex_lock(&mlx5_intf_mutex);
+	list_for_each_entry(intf, &intf_list, list)
+		mlx5_attach_interface(intf, priv);
+	mutex_unlock(&mlx5_intf_mutex);
+}
+
+static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+	struct mlx5_device_context *dev_ctx;
+	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	dev_ctx = mlx5_get_device(intf, priv);
+	if (!dev_ctx)
+		return;
+
+	if (intf->detach) {
+		if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
+			return;
+		intf->detach(dev, dev_ctx->context);
+		clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
+	} else {
+		if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
+			return;
+		intf->remove(dev, dev_ctx->context);
+		clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
+	}
+}
+
+void mlx5_detach_device(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_interface *intf;
+
+	mutex_lock(&mlx5_intf_mutex);
+	list_for_each_entry(intf, &intf_list, list)
+		mlx5_detach_interface(intf, priv);
+	mutex_unlock(&mlx5_intf_mutex);
+}
+
+bool mlx5_device_registered(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv;
+	bool found = false;
+
+	mutex_lock(&mlx5_intf_mutex);
+	list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+		if (priv == &dev->priv)
+			found = true;
+	mutex_unlock(&mlx5_intf_mutex);
+
+	return found;
+}
+
+int mlx5_register_device(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_interface *intf;
+
+	mutex_lock(&mlx5_intf_mutex);
+	list_add_tail(&priv->dev_list, &mlx5_dev_list);
+	list_for_each_entry(intf, &intf_list, list)
+		mlx5_add_device(intf, priv);
+	mutex_unlock(&mlx5_intf_mutex);
+
+	return 0;
+}
+
+void mlx5_unregister_device(struct mlx5_core_dev *dev)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_interface *intf;
+
+	mutex_lock(&mlx5_intf_mutex);
+	list_for_each_entry(intf, &intf_list, list)
+		mlx5_remove_device(intf, priv);
+	list_del(&priv->dev_list);
+	mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_register_interface(struct mlx5_interface *intf)
+{
+	struct mlx5_priv *priv;
+
+	if (!intf->add || !intf->remove)
+		return -EINVAL;
+
+	mutex_lock(&mlx5_intf_mutex);
+	list_add_tail(&intf->list, &intf_list);
+	list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+		mlx5_add_device(intf, priv);
+	mutex_unlock(&mlx5_intf_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_register_interface);
+
+void mlx5_unregister_interface(struct mlx5_interface *intf)
+{
+	struct mlx5_priv *priv;
+
+	mutex_lock(&mlx5_intf_mutex);
+	list_for_each_entry(priv, &mlx5_dev_list, dev_list)
+		mlx5_remove_device(intf, priv);
+	list_del(&intf->list);
+	mutex_unlock(&mlx5_intf_mutex);
+}
+EXPORT_SYMBOL(mlx5_unregister_interface);
+
+void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
+{
+	struct mlx5_priv *priv = &mdev->priv;
+	struct mlx5_device_context *dev_ctx;
+	unsigned long flags;
+	void *result = NULL;
+
+	spin_lock_irqsave(&priv->ctx_lock, flags);
+
+	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
+		if ((dev_ctx->intf->protocol == protocol) &&
+		    dev_ctx->intf->get_dev) {
+			result = dev_ctx->intf->get_dev(dev_ctx->context);
+			break;
+		}
+
+	spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+	return result;
+}
+EXPORT_SYMBOL(mlx5_get_protocol_dev);
+
+/* Must be called with intf_mutex held */
+void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+{
+	struct mlx5_interface *intf;
+
+	list_for_each_entry(intf, &intf_list, list)
+		if (intf->protocol == protocol) {
+			mlx5_add_device(intf, &dev->priv);
+			break;
+		}
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol)
+{
+	struct mlx5_interface *intf;
+
+	list_for_each_entry(intf, &intf_list, list)
+		if (intf->protocol == protocol) {
+			mlx5_remove_device(intf, &dev->priv);
+			break;
+		}
+}
+
+static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev)
+{
+	return (u16)((dev->pdev->bus->number << 8) |
+		     PCI_SLOT(dev->pdev->devfn));
+}
+
+/* Must be called with intf_mutex held */
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
+{
+	u16 pci_id = mlx5_gen_pci_id(dev);
+	struct mlx5_core_dev *res = NULL;
+	struct mlx5_core_dev *tmp_dev;
+	struct mlx5_priv *priv;
+
+	list_for_each_entry(priv, &mlx5_dev_list, dev_list) {
+		tmp_dev = container_of(priv, struct mlx5_core_dev, priv);
+		if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) {
+			res = tmp_dev;
+			break;
+		}
+	}
+
+	return res;
+}
+
+void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
+		     unsigned long param)
+{
+	struct mlx5_priv *priv = &dev->priv;
+	struct mlx5_device_context *dev_ctx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->ctx_lock, flags);
+
+	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
+		if (dev_ctx->intf->event)
+			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
+
+	spin_unlock_irqrestore(&priv->ctx_lock, flags);
+}
+
+void mlx5_dev_list_lock(void)
+{
+	mutex_lock(&mlx5_intf_mutex);
+}
+
+void mlx5_dev_list_unlock(void)
+{
+	mutex_unlock(&mlx5_intf_mutex);
+}
+
+int mlx5_dev_list_trylock(void)
+{
+	return mutex_trylock(&mlx5_intf_mutex);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 1b495ef..460363b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -62,19 +62,25 @@
 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE                0xd
 
 #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW            0x1
-#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x4
+#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW            0x3
 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW            0x6
 
+#define MLX5_RX_HEADROOM NET_SKB_PAD
+
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE		6  /* >= 6, HW restriction */
 #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS	8  /* >= 6, HW restriction */
-#define MLX5_MPWRQ_LOG_WQE_SZ			17
+#define MLX5_MPWRQ_LOG_WQE_SZ			18
 #define MLX5_MPWRQ_WQE_PAGE_ORDER  (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
 				    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
 #define MLX5_MPWRQ_PAGES_PER_WQE		BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
 #define MLX5_MPWRQ_STRIDES_PER_PAGE		(MLX5_MPWRQ_NUM_STRIDES >> \
 						 MLX5_MPWRQ_WQE_PAGE_ORDER)
-#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \
-				   BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW))
+
+#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
+#define MLX5E_REQUIRED_MTTS(rqs, wqes)\
+	(rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX)
+
 #define MLX5_UMR_ALIGN				(2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(128)
 
@@ -95,6 +101,18 @@
 #define MLX5E_UPDATE_STATS_INTERVAL    200 /* msecs */
 #define MLX5E_SQ_BF_BUDGET             16
 
+#define MLX5E_ICOSQ_MAX_WQEBBS \
+	(DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB))
+
+#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN)
+#define MLX5E_XDP_IHS_DS_COUNT \
+	DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS)
+#define MLX5E_XDP_TX_DS_COUNT \
+	(MLX5E_XDP_IHS_DS_COUNT + \
+	 (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */)
+#define MLX5E_XDP_TX_WQEBBS \
+	DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS)
+
 #define MLX5E_NUM_MAIN_GROUPS 9
 
 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
@@ -219,9 +237,8 @@
 };
 
 enum {
-	MLX5E_RQ_STATE_POST_WQES_ENABLE,
+	MLX5E_RQ_STATE_FLUSH,
 	MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
-	MLX5E_RQ_STATE_FLUSH_TIMEOUT,
 	MLX5E_RQ_STATE_AM,
 };
 
@@ -284,20 +301,44 @@
 	u8					tired;
 };
 
+/* a single cache unit is capable to serve one napi call (for non-striding rq)
+ * or a MPWQE (for striding rq).
+ */
+#define MLX5E_CACHE_UNIT	(MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \
+				 MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT)
+#define MLX5E_CACHE_SIZE	(2 * roundup_pow_of_two(MLX5E_CACHE_UNIT))
+struct mlx5e_page_cache {
+	u32 head;
+	u32 tail;
+	struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE];
+};
+
 struct mlx5e_rq {
 	/* data path */
 	struct mlx5_wq_ll      wq;
-	u32                    wqe_sz;
-	struct sk_buff       **skb;
-	struct mlx5e_mpw_info *wqe_info;
+
+	union {
+		struct mlx5e_dma_info *dma_info;
+		struct {
+			struct mlx5e_mpw_info *info;
+			void                  *mtt_no_align;
+			u32                    mtt_offset;
+		} mpwqe;
+	};
+	struct {
+		u8             page_order;
+		u32            wqe_sz;    /* wqe data buffer size */
+		u8             map_dir;   /* dma map direction */
+	} buff;
 	__be32                 mkey_be;
-	__be32                 umr_mkey_be;
 
 	struct device         *pdev;
 	struct net_device     *netdev;
 	struct mlx5e_tstamp   *tstamp;
 	struct mlx5e_rq_stats  stats;
 	struct mlx5e_cq        cq;
+	struct mlx5e_page_cache page_cache;
+
 	mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 	mlx5e_fp_alloc_wqe     alloc_wqe;
 	mlx5e_fp_dealloc_wqe   dealloc_wqe;
@@ -306,6 +347,7 @@
 	int                    ix;
 
 	struct mlx5e_rx_am     am; /* Adaptive Moderation */
+	struct bpf_prog       *xdp_prog;
 
 	/* control */
 	struct mlx5_wq_ctrl    wq_ctrl;
@@ -319,32 +361,15 @@
 
 struct mlx5e_umr_dma_info {
 	__be64                *mtt;
-	__be64                *mtt_no_align;
 	dma_addr_t             mtt_addr;
-	struct mlx5e_dma_info *dma_info;
+	struct mlx5e_dma_info  dma_info[MLX5_MPWRQ_PAGES_PER_WQE];
+	struct mlx5e_umr_wqe   wqe;
 };
 
 struct mlx5e_mpw_info {
-	union {
-		struct mlx5e_dma_info     dma_info;
-		struct mlx5e_umr_dma_info umr;
-	};
+	struct mlx5e_umr_dma_info umr;
 	u16 consumed_strides;
 	u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE];
-
-	void (*dma_pre_sync)(struct device *pdev,
-			     struct mlx5e_mpw_info *wi,
-			     u32 wqe_offset, u32 len);
-	void (*add_skb_frag)(struct mlx5e_rq *rq,
-			     struct sk_buff *skb,
-			     struct mlx5e_mpw_info *wi,
-			     u32 page_idx, u32 frag_offset, u32 len);
-	void (*copy_skb_header)(struct device *pdev,
-				struct sk_buff *skb,
-				struct mlx5e_mpw_info *wi,
-				u32 page_idx, u32 offset,
-				u32 headlen);
-	void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 };
 
 struct mlx5e_tx_wqe_info {
@@ -365,16 +390,21 @@
 };
 
 enum {
-	MLX5E_SQ_STATE_WAKE_TXQ_ENABLE,
+	MLX5E_SQ_STATE_FLUSH,
 	MLX5E_SQ_STATE_BF_ENABLE,
-	MLX5E_SQ_STATE_TX_TIMEOUT,
 };
 
-struct mlx5e_ico_wqe_info {
+struct mlx5e_sq_wqe_info {
 	u8  opcode;
 	u8  num_wqebbs;
 };
 
+enum mlx5e_sq_type {
+	MLX5E_SQ_TXQ,
+	MLX5E_SQ_ICO,
+	MLX5E_SQ_XDP
+};
+
 struct mlx5e_sq {
 	/* data path */
 
@@ -392,10 +422,20 @@
 
 	struct mlx5e_cq            cq;
 
-	/* pointers to per packet info: write@xmit, read@completion */
-	struct sk_buff           **skb;
-	struct mlx5e_sq_dma       *dma_fifo;
-	struct mlx5e_tx_wqe_info  *wqe_info;
+	/* pointers to per tx element info: write@xmit, read@completion */
+	union {
+		struct {
+			struct sk_buff           **skb;
+			struct mlx5e_sq_dma       *dma_fifo;
+			struct mlx5e_tx_wqe_info  *wqe_info;
+		} txq;
+		struct mlx5e_sq_wqe_info *ico_wqe;
+		struct {
+			struct mlx5e_sq_wqe_info  *wqe_info;
+			struct mlx5e_dma_info     *di;
+			bool                       doorbell;
+		} xdp;
+	} db;
 
 	/* read only */
 	struct mlx5_wq_cyc         wq;
@@ -417,8 +457,8 @@
 	struct mlx5_uar            uar;
 	struct mlx5e_channel      *channel;
 	int                        tc;
-	struct mlx5e_ico_wqe_info *ico_wqe_info;
 	u32                        rate_limit;
+	u8                         type;
 } ____cacheline_aligned_in_smp;
 
 static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n)
@@ -434,8 +474,10 @@
 struct mlx5e_channel {
 	/* data path */
 	struct mlx5e_rq            rq;
+	struct mlx5e_sq            xdp_sq;
 	struct mlx5e_sq            sq[MLX5E_MAX_NUM_TC];
 	struct mlx5e_sq            icosq;   /* internal control operations */
+	bool                       xdp;
 	struct napi_struct         napi;
 	struct device             *pdev;
 	struct net_device         *netdev;
@@ -617,6 +659,7 @@
 	/* priv data path fields - start */
 	struct mlx5e_sq            **txq_to_sq_map;
 	int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC];
+	struct bpf_prog *xdp_prog;
 	/* priv data path fields - end */
 
 	unsigned long              state;
@@ -651,40 +694,6 @@
 	void                      *ppriv;
 };
 
-enum mlx5e_link_mode {
-	MLX5E_1000BASE_CX_SGMII	 = 0,
-	MLX5E_1000BASE_KX	 = 1,
-	MLX5E_10GBASE_CX4	 = 2,
-	MLX5E_10GBASE_KX4	 = 3,
-	MLX5E_10GBASE_KR	 = 4,
-	MLX5E_20GBASE_KR2	 = 5,
-	MLX5E_40GBASE_CR4	 = 6,
-	MLX5E_40GBASE_KR4	 = 7,
-	MLX5E_56GBASE_R4	 = 8,
-	MLX5E_10GBASE_CR	 = 12,
-	MLX5E_10GBASE_SR	 = 13,
-	MLX5E_10GBASE_ER	 = 14,
-	MLX5E_40GBASE_SR4	 = 15,
-	MLX5E_40GBASE_LR4	 = 16,
-	MLX5E_50GBASE_SR2	 = 18,
-	MLX5E_100GBASE_CR4	 = 20,
-	MLX5E_100GBASE_SR4	 = 21,
-	MLX5E_100GBASE_KR4	 = 22,
-	MLX5E_100GBASE_LR4	 = 23,
-	MLX5E_100BASE_TX	 = 24,
-	MLX5E_1000BASE_T	 = 25,
-	MLX5E_10GBASE_T		 = 26,
-	MLX5E_25GBASE_CR	 = 27,
-	MLX5E_25GBASE_KR	 = 28,
-	MLX5E_25GBASE_SR	 = 29,
-	MLX5E_50GBASE_CR2	 = 30,
-	MLX5E_50GBASE_KR2	 = 31,
-	MLX5E_LINK_MODES_NUMBER,
-};
-
-#define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
-
-
 void mlx5e_build_ptys2ethtool_map(void);
 
 void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw);
@@ -697,31 +706,19 @@
 int mlx5e_napi_poll(struct napi_struct *napi, int budget);
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq);
-void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
+void mlx5e_free_sq_descs(struct mlx5e_sq *sq);
 
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+			bool recycle);
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
 int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
-int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
+int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,	u16 ix);
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix);
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix);
-void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq);
-void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq,
-				    struct mlx5_cqe64 *cqe,
-				    u16 byte_cnt,
-				    struct mlx5e_mpw_info *wi,
-				    struct sk_buff *skb);
-void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
-					struct mlx5_cqe64 *cqe,
-					u16 byte_cnt,
-					struct mlx5e_mpw_info *wi,
-					struct sk_buff *skb);
-void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq,
-				struct mlx5e_mpw_info *wi);
-void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
-				    struct mlx5e_mpw_info *wi);
+void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq);
+void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi);
 struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);
 
 void mlx5e_rx_am(struct mlx5e_rq *rq);
@@ -808,17 +805,18 @@
 	mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc);
 }
 
+static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
+{
+	return rq->mpwqe.mtt_offset +
+		wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
+}
+
 static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
 {
 	return min_t(int, mdev->priv.eq_table.num_comp_vectors,
 		     MLX5E_MAX_NUM_CHANNELS);
 }
 
-static inline int mlx5e_get_mtt_octw(int npages)
-{
-	return ALIGN(npages, 8) / 2;
-}
-
 extern const struct ethtool_ops mlx5e_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
@@ -871,6 +869,7 @@
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
@@ -881,9 +880,12 @@
 int mlx5e_close(struct net_device *netdev);
 int mlx5e_open(struct net_device *netdev);
 void mlx5e_update_stats_work(struct work_struct *work);
-void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
-			  const struct mlx5e_profile *profile, void *ppriv);
+struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+				       const struct mlx5e_profile *profile,
+				       void *ppriv);
 void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
+int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
+void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 847a8f3..13dc388 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -273,7 +273,7 @@
 
 	tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
 					 &priv->mdev->pdev->dev);
-	if (IS_ERR_OR_NULL(tstamp->ptp)) {
+	if (IS_ERR(tstamp->ptp)) {
 		mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n",
 			       PTR_ERR(tstamp->ptp));
 		tstamp->ptp = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 673043c..029e856 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -60,24 +60,27 @@
 static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
 			     struct mlx5_core_mkey *mkey)
 {
-	struct mlx5_create_mkey_mbox_in *in;
+	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+	void *mkc;
+	u32 *in;
 	int err;
 
-	in = mlx5_vzalloc(sizeof(*in));
+	in = mlx5_vzalloc(inlen);
 	if (!in)
 		return -ENOMEM;
 
-	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
-			MLX5_PERM_LOCAL_READ  |
-			MLX5_ACCESS_MODE_PA;
-	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
-	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA);
+	MLX5_SET(mkc, mkc, lw, 1);
+	MLX5_SET(mkc, mkc, lr, 1);
 
-	err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL,
-				    NULL);
+	MLX5_SET(mkc, mkc, pd, pdn);
+	MLX5_SET(mkc, mkc, length64, 1);
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
 
 	kvfree(in);
-
 	return err;
 }
 
@@ -139,7 +142,7 @@
 	struct mlx5e_tir *tir;
 	void *in;
 	int inlen;
-	int err;
+	int err = 0;
 
 	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
 	in = mlx5_vzalloc(inlen);
@@ -151,10 +154,11 @@
 	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
 		err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen);
 		if (err)
-			return err;
+			goto out;
 	}
 
+out:
 	kvfree(in);
 
-	return 0;
+	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index caa9a3c..762af16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -127,29 +127,40 @@
 	return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
 }
 
-static int mlx5e_dbcnl_validate_ets(struct ieee_ets *ets)
+static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
+				    struct ieee_ets *ets)
 {
 	int bw_sum = 0;
 	int i;
 
 	/* Validate Priority */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY)
+		if (ets->prio_tc[i] >= MLX5E_MAX_PRIORITY) {
+			netdev_err(netdev,
+				   "Failed to validate ETS: priority value greater than max(%d)\n",
+				    MLX5E_MAX_PRIORITY);
 			return -EINVAL;
+		}
 	}
 
 	/* Validate Bandwidth Sum */
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
 		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
-			if (!ets->tc_tx_bw[i])
+			if (!ets->tc_tx_bw[i]) {
+				netdev_err(netdev,
+					   "Failed to validate ETS: BW 0 is illegal\n");
 				return -EINVAL;
+			}
 
 			bw_sum += ets->tc_tx_bw[i];
 		}
 	}
 
-	if (bw_sum != 0 && bw_sum != 100)
+	if (bw_sum != 0 && bw_sum != 100) {
+		netdev_err(netdev,
+			   "Failed to validate ETS: BW sum is illegal\n");
 		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -159,7 +170,7 @@
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
 
-	err = mlx5e_dbcnl_validate_ets(ets);
+	err = mlx5e_dbcnl_validate_ets(netdev, ets);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 4a3757e..27ff401 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -331,7 +331,7 @@
 	if (mlx5e_query_global_pause_combined(priv)) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) {
 			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[0],
-							  pport_per_prio_pfc_stats_desc, 0);
+							  pport_per_prio_pfc_stats_desc, i);
 		}
 	}
 
@@ -352,15 +352,61 @@
 								   sq_stats_desc, j);
 }
 
+static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type,
+				    int num_wqe)
+{
+	int packets_per_wqe;
+	int stride_size;
+	int num_strides;
+	int wqe_size;
+
+	if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+		return num_wqe;
+
+	stride_size = 1 << priv->params.mpwqe_log_stride_sz;
+	num_strides = 1 << priv->params.mpwqe_log_num_strides;
+	wqe_size = stride_size * num_strides;
+
+	packets_per_wqe = wqe_size /
+			  ALIGN(ETH_DATA_LEN, stride_size);
+	return (1 << (order_base_2(num_wqe * packets_per_wqe) - 1));
+}
+
+static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type,
+				    int num_packets)
+{
+	int packets_per_wqe;
+	int stride_size;
+	int num_strides;
+	int wqe_size;
+	int num_wqes;
+
+	if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
+		return num_packets;
+
+	stride_size = 1 << priv->params.mpwqe_log_stride_sz;
+	num_strides = 1 << priv->params.mpwqe_log_num_strides;
+	wqe_size = stride_size * num_strides;
+
+	num_packets = (1 << order_base_2(num_packets));
+
+	packets_per_wqe = wqe_size /
+			  ALIGN(ETH_DATA_LEN, stride_size);
+	num_wqes = DIV_ROUND_UP(num_packets, packets_per_wqe);
+	return 1 << (order_base_2(num_wqes));
+}
+
 static void mlx5e_get_ringparam(struct net_device *dev,
 				struct ethtool_ringparam *param)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	int rq_wq_type = priv->params.rq_wq_type;
 
-	param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type);
+	param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+							 1 << mlx5_max_log_rq_size(rq_wq_type));
 	param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
-	param->rx_pending     = 1 << priv->params.log_rq_size;
+	param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+						     1 << priv->params.log_rq_size);
 	param->tx_pending     = 1 << priv->params.log_sq_size;
 }
 
@@ -370,9 +416,13 @@
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	bool was_opened;
 	int rq_wq_type = priv->params.rq_wq_type;
+	u32 rx_pending_wqes;
+	u32 min_rq_size;
+	u32 max_rq_size;
 	u16 min_rx_wqes;
 	u8 log_rq_size;
 	u8 log_sq_size;
+	u32 num_mtts;
 	int err = 0;
 
 	if (param->rx_jumbo_pending) {
@@ -385,18 +435,36 @@
 			    __func__);
 		return -EINVAL;
 	}
-	if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) {
+
+	min_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+					       1 << mlx5_min_log_rq_size(rq_wq_type));
+	max_rq_size = mlx5e_rx_wqes_to_packets(priv, rq_wq_type,
+					       1 << mlx5_max_log_rq_size(rq_wq_type));
+	rx_pending_wqes = mlx5e_packets_to_rx_wqes(priv, rq_wq_type,
+						   param->rx_pending);
+
+	if (param->rx_pending < min_rq_size) {
 		netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n",
 			    __func__, param->rx_pending,
-			    1 << mlx5_min_log_rq_size(rq_wq_type));
+			    min_rq_size);
 		return -EINVAL;
 	}
-	if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) {
+	if (param->rx_pending > max_rq_size) {
 		netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n",
 			    __func__, param->rx_pending,
-			    1 << mlx5_max_log_rq_size(rq_wq_type));
+			    max_rq_size);
 		return -EINVAL;
 	}
+
+	num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels,
+				       rx_pending_wqes);
+	if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+	    !MLX5E_VALID_NUM_MTTS(num_mtts)) {
+		netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n",
+			    __func__, param->rx_pending);
+		return -EINVAL;
+	}
+
 	if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
 		netdev_info(dev, "%s: tx_pending (%d) < min (%d)\n",
 			    __func__, param->tx_pending,
@@ -410,9 +478,9 @@
 		return -EINVAL;
 	}
 
-	log_rq_size = order_base_2(param->rx_pending);
+	log_rq_size = order_base_2(rx_pending_wqes);
 	log_sq_size = order_base_2(param->tx_pending);
-	min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending);
+	min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes);
 
 	if (log_rq_size == priv->params.log_rq_size &&
 	    log_sq_size == priv->params.log_sq_size &&
@@ -454,6 +522,7 @@
 	unsigned int count = ch->combined_count;
 	bool arfs_enabled;
 	bool was_opened;
+	u32 num_mtts;
 	int err = 0;
 
 	if (!count) {
@@ -472,6 +541,14 @@
 		return -EINVAL;
 	}
 
+	num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size));
+	if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
+	    !MLX5E_VALID_NUM_MTTS(num_mtts)) {
+		netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n",
+			    __func__, count);
+		return -EINVAL;
+	}
+
 	if (priv->params.num_channels == count)
 		return 0;
 
@@ -582,9 +659,10 @@
 static void ptys2ethtool_supported_link(unsigned long *supported_modes,
 					u32 eth_proto_cap)
 {
+	unsigned long proto_cap = eth_proto_cap;
 	int proto;
 
-	for_each_set_bit(proto, (unsigned long *)&eth_proto_cap, MLX5E_LINK_MODES_NUMBER)
+	for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
 		bitmap_or(supported_modes, supported_modes,
 			  ptys2ethtool_table[proto].supported,
 			  __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -593,9 +671,10 @@
 static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
 				    u32 eth_proto_cap)
 {
+	unsigned long proto_cap = eth_proto_cap;
 	int proto;
 
-	for_each_set_bit(proto, (unsigned long *)&eth_proto_cap, MLX5E_LINK_MODES_NUMBER)
+	for_each_set_bit(proto, &proto_cap, MLX5E_LINK_MODES_NUMBER)
 		bitmap_or(advertising_modes, advertising_modes,
 			  ptys2ethtool_table[proto].advertised,
 			  __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -726,7 +805,7 @@
 {
 	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
 	u32 eth_proto_cap;
 	u32 eth_proto_admin;
 	u32 eth_proto_lp;
@@ -736,7 +815,6 @@
 	int err;
 
 	err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
-
 	if (err) {
 		netdev_err(netdev, "%s: query port ptys failed: %d\n",
 			   __func__, err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 1587a9f..36fbc6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -294,6 +294,36 @@
 	return 0;
 }
 
+static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
+{
+	int i;
+
+	mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+	for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
+		mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+	}
+
+	if (priv->fs.vlan.filter_disabled &&
+	    !(priv->netdev->flags & IFF_PROMISC))
+		mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+}
+
+static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
+{
+	int i;
+
+	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
+
+	for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) {
+		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i);
+	}
+
+	if (priv->fs.vlan.filter_disabled &&
+	    !(priv->netdev->flags & IFF_PROMISC))
+		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0);
+}
+
 #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \
 	for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \
 		hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist)
@@ -1024,14 +1054,10 @@
 	if (err)
 		goto err_free_g;
 
-	err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
-	if (err)
-		goto err_destroy_vlan_flow_groups;
+	mlx5e_add_vlan_rules(priv);
 
 	return 0;
 
-err_destroy_vlan_flow_groups:
-	mlx5e_destroy_groups(ft);
 err_free_g:
 	kfree(ft->g);
 err_destroy_vlan_table:
@@ -1043,6 +1069,7 @@
 
 static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv)
 {
+	mlx5e_del_vlan_rules(priv);
 	mlx5e_destroy_flow_table(&priv->fs.vlan.ft);
 }
 
@@ -1100,7 +1127,6 @@
 
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv)
 {
-	mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0);
 	mlx5e_destroy_vlan_table(priv);
 	mlx5e_destroy_l2_table(priv);
 	mlx5e_destroy_ttc_table(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 870bea3..7eaf380 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -34,18 +34,12 @@
 #include <net/pkt_cls.h>
 #include <linux/mlx5/fs.h>
 #include <net/vxlan.h>
+#include <linux/bpf.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
 #include "vxlan.h"
 
-enum {
-	MLX5_EN_QP_FLUSH_TIMEOUT_MS	= 5000,
-	MLX5_EN_QP_FLUSH_MSLEEP_QUANT	= 20,
-	MLX5_EN_QP_FLUSH_MAX_ITER	= MLX5_EN_QP_FLUSH_TIMEOUT_MS /
-					  MLX5_EN_QP_FLUSH_MSLEEP_QUANT,
-};
-
 struct mlx5e_rq_param {
 	u32			rqc[MLX5_ST_SZ_DW(rqc)];
 	struct mlx5_wq_param	wq;
@@ -57,7 +51,7 @@
 	struct mlx5_wq_param       wq;
 	u16                        max_inline;
 	u8                         min_inline_mode;
-	bool                       icosq;
+	enum mlx5e_sq_type         type;
 };
 
 struct mlx5e_cq_param {
@@ -70,12 +64,55 @@
 struct mlx5e_channel_param {
 	struct mlx5e_rq_param      rq;
 	struct mlx5e_sq_param      sq;
+	struct mlx5e_sq_param      xdp_sq;
 	struct mlx5e_sq_param      icosq;
 	struct mlx5e_cq_param      rx_cq;
 	struct mlx5e_cq_param      tx_cq;
 	struct mlx5e_cq_param      icosq_cq;
 };
 
+static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
+{
+	return MLX5_CAP_GEN(mdev, striding_rq) &&
+		MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
+		MLX5_CAP_ETH(mdev, reg_umr_sq);
+}
+
+static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type)
+{
+	priv->params.rq_wq_type = rq_type;
+	switch (priv->params.rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
+		priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ?
+			MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
+			MLX5_MPWRQ_LOG_STRIDE_SIZE;
+		priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
+			priv->params.mpwqe_log_stride_sz;
+		break;
+	default: /* MLX5_WQ_TYPE_LINKED_LIST */
+		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
+	}
+	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
+					       BIT(priv->params.log_rq_size));
+
+	mlx5_core_info(priv->mdev,
+		       "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
+		       priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
+		       BIT(priv->params.log_rq_size),
+		       BIT(priv->params.mpwqe_log_stride_sz),
+		       priv->params.rx_cqe_compress_admin);
+}
+
+static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv)
+{
+	u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) &&
+		    !priv->xdp_prog ?
+		    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
+		    MLX5_WQ_TYPE_LINKED_LIST;
+	mlx5e_set_rq_type_params(priv, rq_type);
+}
+
 static void mlx5e_update_carrier(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -143,12 +180,18 @@
 		s->rx_csum_none	+= rq_stats->csum_none;
 		s->rx_csum_complete += rq_stats->csum_complete;
 		s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
+		s->rx_xdp_drop += rq_stats->xdp_drop;
+		s->rx_xdp_tx += rq_stats->xdp_tx;
+		s->rx_xdp_tx_full += rq_stats->xdp_tx_full;
 		s->rx_wqe_err   += rq_stats->wqe_err;
 		s->rx_mpwqe_filler += rq_stats->mpwqe_filler;
-		s->rx_mpwqe_frag   += rq_stats->mpwqe_frag;
 		s->rx_buff_alloc_err += rq_stats->buff_alloc_err;
 		s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks;
 		s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts;
+		s->rx_cache_reuse += rq_stats->cache_reuse;
+		s->rx_cache_full  += rq_stats->cache_full;
+		s->rx_cache_empty += rq_stats->cache_empty;
+		s->rx_cache_busy  += rq_stats->cache_busy;
 
 		for (j = 0; j < priv->params.num_tc; j++) {
 			sq_stats = &priv->channel[i]->sq[j].stats;
@@ -162,6 +205,7 @@
 			s->tx_queue_stopped	+= sq_stats->stopped;
 			s->tx_queue_wake	+= sq_stats->wake;
 			s->tx_queue_dropped	+= sq_stats->dropped;
+			s->tx_xmit_more		+= sq_stats->xmit_more;
 			s->tx_csum_partial_inner += sq_stats->csum_partial_inner;
 			tx_offload_none		+= sq_stats->csum_none;
 		}
@@ -180,18 +224,15 @@
 {
 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
 	u32 *out = (u32 *)priv->stats.vport.query_vport_out;
-	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
 	struct mlx5_core_dev *mdev = priv->mdev;
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(query_vport_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
 
 	memset(out, 0, outlen);
-
 	mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 
@@ -304,6 +345,117 @@
 #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
 #define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
 
+static inline int mlx5e_get_wqe_mtt_sz(void)
+{
+	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+	 * To avoid copying garbage after the mtt array, we allocate
+	 * a little more.
+	 */
+	return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64),
+		     MLX5_UMR_MTT_ALIGNMENT);
+}
+
+static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq,
+				       struct mlx5e_umr_wqe *wqe, u16 ix)
+{
+	struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
+	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
+	struct mlx5_wqe_data_seg      *dseg = &wqe->data;
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+	u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
+	u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix);
+
+	cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
+				      ds_cnt);
+	cseg->fm_ce_se  = MLX5_WQE_CTRL_CQ_UPDATE;
+	cseg->imm       = rq->mkey_be;
+
+	ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN;
+	ucseg->klm_octowords =
+		cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE));
+	ucseg->bsf_octowords =
+		cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset));
+	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+
+	dseg->lkey = sq->mkey_be;
+	dseg->addr = cpu_to_be64(wi->umr.mtt_addr);
+}
+
+static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
+				     struct mlx5e_channel *c)
+{
+	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+	int mtt_sz = mlx5e_get_wqe_mtt_sz();
+	int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
+	int i;
+
+	rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
+				      GFP_KERNEL, cpu_to_node(c->cpu));
+	if (!rq->mpwqe.info)
+		goto err_out;
+
+	/* We allocate more than mtt_sz as we will align the pointer */
+	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
+					cpu_to_node(c->cpu));
+	if (unlikely(!rq->mpwqe.mtt_no_align))
+		goto err_free_wqe_info;
+
+	for (i = 0; i < wq_sz; i++) {
+		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
+
+		wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc,
+					MLX5_UMR_ALIGN);
+		wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz,
+						  PCI_DMA_TODEVICE);
+		if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr)))
+			goto err_unmap_mtts;
+
+		mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i);
+	}
+
+	return 0;
+
+err_unmap_mtts:
+	while (--i >= 0) {
+		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
+
+		dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz,
+				 PCI_DMA_TODEVICE);
+	}
+	kfree(rq->mpwqe.mtt_no_align);
+err_free_wqe_info:
+	kfree(rq->mpwqe.info);
+
+err_out:
+	return -ENOMEM;
+}
+
+static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
+{
+	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
+	int mtt_sz = mlx5e_get_wqe_mtt_sz();
+	int i;
+
+	for (i = 0; i < wq_sz; i++) {
+		struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i];
+
+		dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz,
+				 PCI_DMA_TODEVICE);
+	}
+	kfree(rq->mpwqe.mtt_no_align);
+	kfree(rq->mpwqe.info);
+}
+
+static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+
+	if (rep && rep->vport != FDB_UPLINK_VPORT)
+		return true;
+
+	return false;
+}
+
 static int mlx5e_create_rq(struct mlx5e_channel *c,
 			   struct mlx5e_rq_param *param,
 			   struct mlx5e_rq *rq)
@@ -313,6 +465,8 @@
 	void *rqc = param->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
 	u32 byte_count;
+	u32 frag_sz;
+	int npages;
 	int wq_sz;
 	int err;
 	int i;
@@ -328,51 +482,6 @@
 
 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
 
-	switch (priv->params.rq_wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info),
-					    GFP_KERNEL, cpu_to_node(c->cpu));
-		if (!rq->wqe_info) {
-			err = -ENOMEM;
-			goto err_rq_wq_destroy;
-		}
-		rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
-		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
-		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
-
-		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
-		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
-		rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
-		byte_count = rq->wqe_sz;
-		break;
-	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL,
-				       cpu_to_node(c->cpu));
-		if (!rq->skb) {
-			err = -ENOMEM;
-			goto err_rq_wq_destroy;
-		}
-		rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
-		rq->alloc_wqe = mlx5e_alloc_rx_wqe;
-		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
-
-		rq->wqe_sz = (priv->params.lro_en) ?
-				priv->params.lro_wqe_sz :
-				MLX5E_SW2HW_MTU(priv->netdev->mtu);
-		rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz);
-		byte_count = rq->wqe_sz;
-		byte_count |= MLX5_HW_START_PADDING;
-	}
-
-	for (i = 0; i < wq_sz; i++) {
-		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
-
-		wqe->data.byte_count = cpu_to_be32(byte_count);
-	}
-
-	INIT_WORK(&rq->am.work, mlx5e_rx_am_work);
-	rq->am.mode = priv->params.rx_cq_period_mode;
-
 	rq->wq_type = priv->params.rq_wq_type;
 	rq->pdev    = c->pdev;
 	rq->netdev  = c->netdev;
@@ -380,8 +489,85 @@
 	rq->channel = c;
 	rq->ix      = c->ix;
 	rq->priv    = c->priv;
-	rq->mkey_be = c->mkey_be;
-	rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
+	rq->xdp_prog = priv->xdp_prog;
+
+	rq->buff.map_dir = DMA_FROM_DEVICE;
+	if (rq->xdp_prog)
+		rq->buff.map_dir = DMA_BIDIRECTIONAL;
+
+	switch (priv->params.rq_wq_type) {
+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
+		if (mlx5e_is_vf_vport_rep(priv)) {
+			err = -EINVAL;
+			goto err_rq_wq_destroy;
+		}
+
+		rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq;
+		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
+		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
+
+		rq->mpwqe.mtt_offset = c->ix *
+			MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size));
+
+		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
+		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
+
+		rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
+		byte_count = rq->buff.wqe_sz;
+		rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
+		err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+		if (err)
+			goto err_rq_wq_destroy;
+		break;
+	default: /* MLX5_WQ_TYPE_LINKED_LIST */
+		rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info),
+					    GFP_KERNEL, cpu_to_node(c->cpu));
+		if (!rq->dma_info) {
+			err = -ENOMEM;
+			goto err_rq_wq_destroy;
+		}
+
+		if (mlx5e_is_vf_vport_rep(priv))
+			rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep;
+		else
+			rq->handle_rx_cqe = mlx5e_handle_rx_cqe;
+
+		rq->alloc_wqe = mlx5e_alloc_rx_wqe;
+		rq->dealloc_wqe = mlx5e_dealloc_rx_wqe;
+
+		rq->buff.wqe_sz = (priv->params.lro_en) ?
+				priv->params.lro_wqe_sz :
+				MLX5E_SW2HW_MTU(priv->netdev->mtu);
+		byte_count = rq->buff.wqe_sz;
+
+		/* calc the required page order */
+		frag_sz = MLX5_RX_HEADROOM +
+			  byte_count /* packet data */ +
+			  SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+		frag_sz = SKB_DATA_ALIGN(frag_sz);
+
+		npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE);
+		rq->buff.page_order = order_base_2(npages);
+
+		byte_count |= MLX5_HW_START_PADDING;
+		rq->mkey_be = c->mkey_be;
+	}
+
+	for (i = 0; i < wq_sz; i++) {
+		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
+
+		wqe->data.byte_count = cpu_to_be32(byte_count);
+		wqe->data.lkey = rq->mkey_be;
+	}
+
+	INIT_WORK(&rq->am.work, mlx5e_rx_am_work);
+	rq->am.mode = priv->params.rx_cq_period_mode;
+
+	rq->page_cache.head = 0;
+	rq->page_cache.tail = 0;
+
+	if (rq->xdp_prog)
+		bpf_prog_add(rq->xdp_prog, 1);
 
 	return 0;
 
@@ -393,14 +579,25 @@
 
 static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
 {
+	int i;
+
+	if (rq->xdp_prog)
+		bpf_prog_put(rq->xdp_prog);
+
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		kfree(rq->wqe_info);
+		mlx5e_rq_free_mpwqe_info(rq);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		kfree(rq->skb);
+		kfree(rq->dma_info);
 	}
 
+	for (i = rq->page_cache.head; i != rq->page_cache.tail;
+	     i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
+		struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i];
+
+		mlx5e_page_release(rq, dma_info, false);
+	}
 	mlx5_wq_destroy(&rq->wq_ctrl);
 }
 
@@ -428,7 +625,6 @@
 
 	MLX5_SET(rqc,  rqc, cqn,		rq->cq.mcq.cqn);
 	MLX5_SET(rqc,  rqc, state,		MLX5_RQC_STATE_RST);
-	MLX5_SET(rqc,  rqc, flush_in_error_en,	1);
 	MLX5_SET(rqc,  rqc, vsd, priv->params.vlan_strip_disable);
 	MLX5_SET(wq,   wq,  log_wq_pg_sz,	rq->wq_ctrl.buf.page_shift -
 						MLX5_ADAPTER_PAGE_SHIFT);
@@ -492,7 +688,8 @@
 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
 
 	MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
-	MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD);
+	MLX5_SET64(modify_rq_in, in, modify_bitmask,
+		   MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
 	MLX5_SET(rqc, rqc, vsd, vsd);
 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
 
@@ -525,6 +722,27 @@
 	return -ETIMEDOUT;
 }
 
+static void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
+{
+	struct mlx5_wq_ll *wq = &rq->wq;
+	struct mlx5e_rx_wqe *wqe;
+	__be16 wqe_ix_be;
+	u16 wqe_ix;
+
+	/* UMR WQE (if in progress) is always at wq->head */
+	if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
+		mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
+
+	while (!mlx5_wq_ll_is_empty(wq)) {
+		wqe_ix_be = *wq->tail_next;
+		wqe_ix    = be16_to_cpu(wqe_ix_be);
+		wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
+		rq->dealloc_wqe(rq, wqe_ix);
+		mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
+			       &wqe->next.next_wqe_index);
+	}
+}
+
 static int mlx5e_open_rq(struct mlx5e_channel *c,
 			 struct mlx5e_rq_param *param,
 			 struct mlx5e_rq *rq)
@@ -548,10 +766,8 @@
 	if (param->am_enabled)
 		set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 
-	set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
-
-	sq->ico_wqe_info[pi].opcode     = MLX5_OPCODE_NOP;
-	sq->ico_wqe_info[pi].num_wqebbs = 1;
+	sq->db.ico_wqe[pi].opcode     = MLX5_OPCODE_NOP;
+	sq->db.ico_wqe[pi].num_wqebbs = 1;
 	mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */
 
 	return 0;
@@ -566,23 +782,8 @@
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
-	int tout = 0;
-	int err;
-
-	clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
+	set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state);
 	napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
-
-	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
-	while (!mlx5_wq_ll_is_empty(&rq->wq) && !err &&
-	       tout++ < MLX5_EN_QP_FLUSH_MAX_ITER)
-		msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
-
-	if (err || tout == MLX5_EN_QP_FLUSH_MAX_ITER)
-		set_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state);
-
-	/* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */
-	napi_synchronize(&rq->channel->napi);
-
 	cancel_work_sync(&rq->am.work);
 
 	mlx5e_disable_rq(rq);
@@ -590,26 +791,65 @@
 	mlx5e_destroy_rq(rq);
 }
 
-static void mlx5e_free_sq_db(struct mlx5e_sq *sq)
+static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq)
 {
-	kfree(sq->wqe_info);
-	kfree(sq->dma_fifo);
-	kfree(sq->skb);
+	kfree(sq->db.xdp.di);
+	kfree(sq->db.xdp.wqe_info);
 }
 
-static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
+static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa)
+{
+	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+	sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz,
+				     GFP_KERNEL, numa);
+	sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz,
+					   GFP_KERNEL, numa);
+	if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) {
+		mlx5e_free_sq_xdp_db(sq);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq)
+{
+	kfree(sq->db.ico_wqe);
+}
+
+static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa)
+{
+	u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+
+	sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz,
+				      GFP_KERNEL, numa);
+	if (!sq->db.ico_wqe)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq)
+{
+	kfree(sq->db.txq.wqe_info);
+	kfree(sq->db.txq.dma_fifo);
+	kfree(sq->db.txq.skb);
+}
+
+static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
 	int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
 
-	sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa);
-	sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL,
-				    numa);
-	sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL,
-				    numa);
-
-	if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) {
-		mlx5e_free_sq_db(sq);
+	sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb),
+				      GFP_KERNEL, numa);
+	sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo),
+					   GFP_KERNEL, numa);
+	sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info),
+					   GFP_KERNEL, numa);
+	if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) {
+		mlx5e_free_sq_txq_db(sq);
 		return -ENOMEM;
 	}
 
@@ -618,6 +858,46 @@
 	return 0;
 }
 
+static void mlx5e_free_sq_db(struct mlx5e_sq *sq)
+{
+	switch (sq->type) {
+	case MLX5E_SQ_TXQ:
+		mlx5e_free_sq_txq_db(sq);
+		break;
+	case MLX5E_SQ_ICO:
+		mlx5e_free_sq_ico_db(sq);
+		break;
+	case MLX5E_SQ_XDP:
+		mlx5e_free_sq_xdp_db(sq);
+		break;
+	}
+}
+
+static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
+{
+	switch (sq->type) {
+	case MLX5E_SQ_TXQ:
+		return mlx5e_alloc_sq_txq_db(sq, numa);
+	case MLX5E_SQ_ICO:
+		return mlx5e_alloc_sq_ico_db(sq, numa);
+	case MLX5E_SQ_XDP:
+		return mlx5e_alloc_sq_xdp_db(sq, numa);
+	}
+
+	return 0;
+}
+
+static int mlx5e_sq_get_max_wqebbs(u8 sq_type)
+{
+	switch (sq_type) {
+	case MLX5E_SQ_ICO:
+		return MLX5E_ICOSQ_MAX_WQEBBS;
+	case MLX5E_SQ_XDP:
+		return MLX5E_XDP_TX_WQEBBS;
+	}
+	return MLX5_SEND_WQE_MAX_WQEBBS;
+}
+
 static int mlx5e_create_sq(struct mlx5e_channel *c,
 			   int tc,
 			   struct mlx5e_sq_param *param,
@@ -630,6 +910,13 @@
 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
 	int err;
 
+	sq->type      = param->type;
+	sq->pdev      = c->pdev;
+	sq->tstamp    = &priv->tstamp;
+	sq->mkey_be   = c->mkey_be;
+	sq->channel   = c;
+	sq->tc        = tc;
+
 	err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf));
 	if (err)
 		return err;
@@ -658,18 +945,7 @@
 	if (err)
 		goto err_sq_wq_destroy;
 
-	if (param->icosq) {
-		u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
-
-		sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) *
-						wq_sz,
-						GFP_KERNEL,
-						cpu_to_node(c->cpu));
-		if (!sq->ico_wqe_info) {
-			err = -ENOMEM;
-			goto err_free_sq_db;
-		}
-	} else {
+	if (sq->type == MLX5E_SQ_TXQ) {
 		int txq_ix;
 
 		txq_ix = c->ix + tc * priv->params.num_channels;
@@ -677,19 +953,11 @@
 		priv->txq_to_sq_map[txq_ix] = sq;
 	}
 
-	sq->pdev      = c->pdev;
-	sq->tstamp    = &priv->tstamp;
-	sq->mkey_be   = c->mkey_be;
-	sq->channel   = c;
-	sq->tc        = tc;
-	sq->edge      = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
+	sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type);
 	sq->bf_budget = MLX5E_SQ_BF_BUDGET;
 
 	return 0;
 
-err_free_sq_db:
-	mlx5e_free_sq_db(sq);
-
 err_sq_wq_destroy:
 	mlx5_wq_destroy(&sq->wq_ctrl);
 
@@ -704,7 +972,6 @@
 	struct mlx5e_channel *c = sq->channel;
 	struct mlx5e_priv *priv = c->priv;
 
-	kfree(sq->ico_wqe_info);
 	mlx5e_free_sq_db(sq);
 	mlx5_wq_destroy(&sq->wq_ctrl);
 	mlx5_unmap_free_uar(priv->mdev, &sq->uar);
@@ -733,11 +1000,12 @@
 
 	memcpy(sqc, param->sqc, sizeof(param->sqc));
 
-	MLX5_SET(sqc,  sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]);
+	MLX5_SET(sqc,  sqc, tis_num_0, param->type == MLX5E_SQ_ICO ?
+				       0 : priv->tisn[sq->tc]);
 	MLX5_SET(sqc,  sqc, cqn,		sq->cq.mcq.cqn);
 	MLX5_SET(sqc,  sqc, min_wqe_inline_mode, sq->min_inline_mode);
 	MLX5_SET(sqc,  sqc, state,		MLX5_SQC_STATE_RST);
-	MLX5_SET(sqc,  sqc, tis_lst_sz,		param->icosq ? 0 : 1);
+	MLX5_SET(sqc,  sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1);
 	MLX5_SET(sqc,  sqc, flush_in_error_en,	1);
 
 	MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
@@ -821,7 +1089,6 @@
 		goto err_disable_sq;
 
 	if (sq->txq) {
-		set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
 		netdev_tx_reset_queue(sq->txq);
 		netif_tx_start_queue(sq->txq);
 	}
@@ -845,38 +1112,22 @@
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
-	int tout = 0;
-	int err;
-
-	if (sq->txq) {
-		clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state);
-		/* prevent netif_tx_wake_queue */
-		napi_synchronize(&sq->channel->napi);
-		netif_tx_disable_queue(sq->txq);
-
-		/* ensure hw is notified of all pending wqes */
-		if (mlx5e_sq_has_room_for(sq, 1))
-			mlx5e_send_nop(sq, true);
-
-		err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY,
-				      MLX5_SQC_STATE_ERR, false, 0);
-		if (err)
-			set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
-	}
-
-	/* wait till sq is empty, unless a TX timeout occurred on this SQ */
-	while (sq->cc != sq->pc &&
-	       !test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)) {
-		msleep(MLX5_EN_QP_FLUSH_MSLEEP_QUANT);
-		if (tout++ > MLX5_EN_QP_FLUSH_MAX_ITER)
-			set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
-	}
-
-	/* avoid destroying sq before mlx5e_poll_tx_cq() is done with it */
+	set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state);
+	/* prevent netif_tx_wake_queue */
 	napi_synchronize(&sq->channel->napi);
 
-	mlx5e_free_tx_descs(sq);
+	if (sq->txq) {
+		netif_tx_disable_queue(sq->txq);
+
+		/* last doorbell out, godspeed .. */
+		if (mlx5e_sq_has_room_for(sq, 1)) {
+			sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL;
+			mlx5e_send_nop(sq, true);
+		}
+	}
+
 	mlx5e_disable_sq(sq);
+	mlx5e_free_sq_descs(sq);
 	mlx5e_destroy_sq(sq);
 }
 
@@ -1237,14 +1488,31 @@
 		}
 	}
 
+	if (priv->xdp_prog) {
+		/* XDP SQ CQ params are same as normal TXQ sq CQ params */
+		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq,
+				    priv->params.tx_cq_moderation);
+		if (err)
+			goto err_close_sqs;
+
+		err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq);
+		if (err) {
+			mlx5e_close_cq(&c->xdp_sq.cq);
+			goto err_close_sqs;
+		}
+	}
+
+	c->xdp = !!priv->xdp_prog;
 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
 	if (err)
-		goto err_close_sqs;
+		goto err_close_xdp_sq;
 
 	netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix);
 	*cp = c;
 
 	return 0;
+err_close_xdp_sq:
+	mlx5e_close_sq(&c->xdp_sq);
 
 err_close_sqs:
 	mlx5e_close_sqs(c);
@@ -1273,9 +1541,13 @@
 static void mlx5e_close_channel(struct mlx5e_channel *c)
 {
 	mlx5e_close_rq(&c->rq);
+	if (c->xdp)
+		mlx5e_close_sq(&c->xdp_sq);
 	mlx5e_close_sqs(c);
 	mlx5e_close_sq(&c->icosq);
 	napi_disable(&c->napi);
+	if (c->xdp)
+		mlx5e_close_cq(&c->xdp_sq.cq);
 	mlx5e_close_cq(&c->rq.cq);
 	mlx5e_close_tx_cqs(c);
 	mlx5e_close_cq(&c->icosq.cq);
@@ -1349,6 +1621,7 @@
 
 	param->max_inline = priv->params.tx_max_inline;
 	param->min_inline_mode = priv->params.tx_min_inline_mode;
+	param->type = MLX5E_SQ_TXQ;
 }
 
 static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
@@ -1422,7 +1695,22 @@
 	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
 	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
 
-	param->icosq = true;
+	param->type = MLX5E_SQ_ICO;
+}
+
+static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
+				    struct mlx5e_sq_param *param)
+{
+	void *sqc = param->sqc;
+	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
+
+	mlx5e_build_sq_param_common(priv, param);
+	MLX5_SET(wq, wq, log_wq_sz,     priv->params.log_sq_size);
+
+	param->max_inline = priv->params.tx_max_inline;
+	/* FOR XDP SQs will support only L2 inline mode */
+	param->min_inline_mode = MLX5_INLINE_MODE_NONE;
+	param->type = MLX5E_SQ_XDP;
 }
 
 static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam)
@@ -1431,6 +1719,7 @@
 
 	mlx5e_build_rq_param(priv, &cparam->rq);
 	mlx5e_build_sq_param(priv, &cparam->sq);
+	mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq);
 	mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz);
 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
@@ -1826,10 +2115,6 @@
 	netif_set_real_num_tx_queues(netdev, num_txqs);
 	netif_set_real_num_rx_queues(netdev, priv->params.num_channels);
 
-	err = mlx5e_set_dev_port_mtu(netdev);
-	if (err)
-		goto err_clear_state_opened_flag;
-
 	err = mlx5e_open_channels(priv);
 	if (err) {
 		netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n",
@@ -1908,6 +2193,9 @@
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
 
+	if (!netif_device_present(netdev))
+		return -ENODEV;
+
 	mutex_lock(&priv->state_lock);
 	err = mlx5e_close_locked(netdev);
 	mutex_unlock(&priv->state_lock);
@@ -2022,14 +2310,15 @@
 static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
-	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
+	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(tisc, tisc, prio, tc << 1);
 	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
 
+	if (mlx5_lag_is_lacp_owner(mdev))
+		MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
+
 	return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]);
 }
 
@@ -2573,6 +2862,7 @@
 	u16 max_mtu;
 	u16 min_mtu;
 	int err = 0;
+	bool reset;
 
 	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
 
@@ -2588,13 +2878,18 @@
 
 	mutex_lock(&priv->state_lock);
 
+	reset = !priv->params.lro_en &&
+		(priv->params.rq_wq_type !=
+		 MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ);
+
 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
-	if (was_opened)
+	if (was_opened && reset)
 		mlx5e_close_locked(netdev);
 
 	netdev->mtu = new_mtu;
+	mlx5e_set_dev_port_mtu(netdev);
 
-	if (was_opened)
+	if (was_opened && reset)
 		err = mlx5e_open_locked(netdev);
 
 	mutex_unlock(&priv->state_lock);
@@ -2622,11 +2917,15 @@
 	return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
 }
 
-static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos)
+static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+			     __be16 vlan_proto)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
 					   vlan, qos);
 }
@@ -2794,7 +3093,7 @@
 		if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
 			continue;
 		sched_work = true;
-		set_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state);
+		set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state);
 		netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
 			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
 	}
@@ -2803,6 +3102,106 @@
 		schedule_work(&priv->tx_timeout_work);
 }
 
+static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct bpf_prog *old_prog;
+	int err = 0;
+	bool reset, was_opened;
+	int i;
+
+	mutex_lock(&priv->state_lock);
+
+	if ((netdev->features & NETIF_F_LRO) && prog) {
+		netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+		err = -EINVAL;
+		goto unlock;
+	}
+
+	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
+	/* no need for full reset when exchanging programs */
+	reset = (!priv->xdp_prog || !prog);
+
+	if (was_opened && reset)
+		mlx5e_close_locked(netdev);
+
+	/* exchange programs */
+	old_prog = xchg(&priv->xdp_prog, prog);
+	if (prog)
+		bpf_prog_add(prog, 1);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	if (reset) /* change RQ type according to priv->xdp_prog */
+		mlx5e_set_rq_priv_params(priv);
+
+	if (was_opened && reset)
+		mlx5e_open_locked(netdev);
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
+		goto unlock;
+
+	/* exchanging programs w/o reset, we update ref counts on behalf
+	 * of the channels RQs here.
+	 */
+	bpf_prog_add(prog, priv->params.num_channels);
+	for (i = 0; i < priv->params.num_channels; i++) {
+		struct mlx5e_channel *c = priv->channel[i];
+
+		set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state);
+		napi_synchronize(&c->napi);
+		/* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
+
+		old_prog = xchg(&c->rq.xdp_prog, prog);
+
+		clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state);
+		/* napi_schedule in case we have missed anything */
+		set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags);
+		napi_schedule(&c->napi);
+
+		if (old_prog)
+			bpf_prog_put(old_prog);
+	}
+
+unlock:
+	mutex_unlock(&priv->state_lock);
+	return err;
+}
+
+static bool mlx5e_xdp_attached(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	return !!priv->xdp_prog;
+}
+
+static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return mlx5e_xdp_set(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = mlx5e_xdp_attached(dev);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/* Fake "interrupt" called by netpoll (eg netconsole) to send skbs without
+ * reenabling interrupts.
+ */
+static void mlx5e_netpoll(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	int i;
+
+	for (i = 0; i < priv->params.num_channels; i++)
+		napi_schedule(&priv->channel[i]->napi);
+}
+#endif
+
 static const struct net_device_ops mlx5e_netdev_ops_basic = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
@@ -2822,6 +3221,10 @@
 	.ndo_rx_flow_steer	 = mlx5e_rx_flow_steer,
 #endif
 	.ndo_tx_timeout          = mlx5e_tx_timeout,
+	.ndo_xdp		 = mlx5e_xdp,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller     = mlx5e_netpoll,
+#endif
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_sriov = {
@@ -2853,6 +3256,10 @@
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
 	.ndo_tx_timeout          = mlx5e_tx_timeout,
+	.ndo_xdp		 = mlx5e_xdp,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller     = mlx5e_netpoll,
+#endif
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -2927,13 +3334,6 @@
 		indirection_rqt[i] = i % num_channels;
 }
 
-static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
-{
-	return MLX5_CAP_GEN(mdev, striding_rq) &&
-		MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
-		MLX5_CAP_ETH(mdev, reg_umr_sq);
-}
-
 static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw)
 {
 	enum pcie_link_width width;
@@ -3013,11 +3413,13 @@
 					 MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
 					 MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
 
-	priv->params.log_sq_size           =
-		MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
-	priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ?
-		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
-		MLX5_WQ_TYPE_LINKED_LIST;
+	priv->mdev                         = mdev;
+	priv->netdev                       = netdev;
+	priv->params.num_channels          = profile->max_nch(mdev);
+	priv->profile                      = profile;
+	priv->ppriv                        = ppriv;
+
+	priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 
 	/* set CQE compression */
 	priv->params.rx_cqe_compress_admin = false;
@@ -3030,33 +3432,11 @@
 		priv->params.rx_cqe_compress_admin =
 			cqe_compress_heuristic(link_speed, pci_bw);
 	}
-
 	priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin;
 
-	switch (priv->params.rq_wq_type) {
-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-		priv->params.mpwqe_log_stride_sz =
-			priv->params.rx_cqe_compress ?
-			MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
-			MLX5_MPWRQ_LOG_STRIDE_SIZE;
-		priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
-			priv->params.mpwqe_log_stride_sz;
+	mlx5e_set_rq_priv_params(priv);
+	if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
 		priv->params.lro_en = true;
-		break;
-	default: /* MLX5_WQ_TYPE_LINKED_LIST */
-		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
-	}
-
-	mlx5_core_info(mdev,
-		       "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
-		       priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
-		       BIT(priv->params.log_rq_size),
-		       BIT(priv->params.mpwqe_log_stride_sz),
-		       priv->params.rx_cqe_compress_admin);
-
-	priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type,
-					    BIT(priv->params.log_rq_size));
 
 	priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode);
@@ -3076,19 +3456,16 @@
 	mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt,
 				      MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev));
 
-	priv->params.lro_wqe_sz            =
-		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
+	priv->params.lro_wqe_sz =
+		MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ -
+		/* Extra room needed for build_skb */
+		MLX5_RX_HEADROOM -
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	/* Initialize pflags */
 	MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
 			    priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 
-	priv->mdev                         = mdev;
-	priv->netdev                       = netdev;
-	priv->params.num_channels          = profile->max_nch(mdev);
-	priv->profile                      = profile;
-	priv->ppriv                        = ppriv;
-
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_ets_init(priv);
 #endif
@@ -3228,35 +3605,37 @@
 static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_create_mkey_mbox_in *in;
-	struct mlx5_mkey_seg *mkc;
-	int inlen = sizeof(*in);
-	u64 npages =
-		priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS;
+	u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev),
+					 BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW));
+	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+	void *mkc;
+	u32 *in;
 	int err;
 
 	in = mlx5_vzalloc(inlen);
 	if (!in)
 		return -ENOMEM;
 
-	mkc = &in->seg;
-	mkc->status = MLX5_MKEY_STATUS_FREE;
-	mkc->flags = MLX5_PERM_UMR_EN |
-		     MLX5_PERM_LOCAL_READ |
-		     MLX5_PERM_LOCAL_WRITE |
-		     MLX5_ACCESS_MODE_MTT;
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
-	mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
-	mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn);
-	mkc->len = cpu_to_be64(npages << PAGE_SHIFT);
-	mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages));
-	mkc->log2_page_size = PAGE_SHIFT;
+	npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages);
 
-	err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL,
-				    NULL, NULL);
+	MLX5_SET(mkc, mkc, free, 1);
+	MLX5_SET(mkc, mkc, umr_en, 1);
+	MLX5_SET(mkc, mkc, lw, 1);
+	MLX5_SET(mkc, mkc, lr, 1);
+	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+	MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT);
+	MLX5_SET(mkc, mkc, translations_octword_size,
+		 MLX5_MTT_OCTW(npages));
+	MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
+
+	err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen);
 
 	kvfree(in);
-
 	return err;
 }
 
@@ -3375,6 +3754,8 @@
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
 	struct mlx5_eswitch_rep rep;
 
+	mlx5_lag_add(mdev, netdev);
+
 	if (mlx5e_vxlan_allowed(mdev)) {
 		rtnl_lock();
 		udp_tunnel_get_rx_info(netdev);
@@ -3385,11 +3766,12 @@
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 
 	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
+		mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id);
 		rep.load = mlx5e_nic_rep_load;
 		rep.unload = mlx5e_nic_rep_unload;
-		rep.vport = 0;
+		rep.vport = FDB_UPLINK_VPORT;
 		rep.priv_data = priv;
-		mlx5_eswitch_register_vport_rep(esw, &rep);
+		mlx5_eswitch_register_vport_rep(esw, 0, &rep);
 	}
 }
 
@@ -3397,6 +3779,7 @@
 {
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 	mlx5e_disable_async_events(priv);
+	mlx5_lag_remove(priv->mdev);
 }
 
 static const struct mlx5e_profile mlx5e_nic_profile = {
@@ -3413,13 +3796,13 @@
 	.max_tc		   = MLX5E_MAX_NUM_TC,
 };
 
-void *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
-			  const struct mlx5e_profile *profile, void *ppriv)
+struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
+				       const struct mlx5e_profile *profile,
+				       void *ppriv)
 {
+	int nch = profile->max_nch(mdev);
 	struct net_device *netdev;
 	struct mlx5e_priv *priv;
-	int nch = profile->max_nch(mdev);
-	int err;
 
 	netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
 				    nch * profile->max_tc,
@@ -3437,12 +3820,31 @@
 
 	priv->wq = create_singlethread_workqueue("mlx5e");
 	if (!priv->wq)
-		goto err_free_netdev;
+		goto err_cleanup_nic;
+
+	return netdev;
+
+err_cleanup_nic:
+	profile->cleanup(priv);
+	free_netdev(netdev);
+
+	return NULL;
+}
+
+int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
+{
+	const struct mlx5e_profile *profile;
+	struct mlx5e_priv *priv;
+	int err;
+
+	priv = netdev_priv(netdev);
+	profile = priv->profile;
+	clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
 
 	err = mlx5e_create_umr_mkey(priv);
 	if (err) {
 		mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
-		goto err_destroy_wq;
+		goto out;
 	}
 
 	err = profile->init_tx(priv);
@@ -3463,20 +3865,18 @@
 
 	mlx5e_init_l2_addr(priv);
 
-	err = register_netdev(netdev);
-	if (err) {
-		mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
-		goto err_dealloc_q_counters;
-	}
+	mlx5e_set_dev_port_mtu(netdev);
 
 	if (profile->enable)
 		profile->enable(priv);
 
-	return priv;
+	rtnl_lock();
+	if (netif_running(netdev))
+		mlx5e_open(netdev);
+	netif_device_attach(netdev);
+	rtnl_unlock();
 
-err_dealloc_q_counters:
-	mlx5e_destroy_q_counter(priv);
-	profile->cleanup_rx(priv);
+	return 0;
 
 err_close_drop_rq:
 	mlx5e_close_drop_rq(priv);
@@ -3487,13 +3887,8 @@
 err_destroy_umr_mkey:
 	mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
 
-err_destroy_wq:
-	destroy_workqueue(priv->wq);
-
-err_free_netdev:
-	free_netdev(netdev);
-
-	return NULL;
+out:
+	return err;
 }
 
 static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev)
@@ -3501,61 +3896,40 @@
 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
 	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
 	int vport;
+	u8 mac[ETH_ALEN];
 
 	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
 		return;
 
+	mlx5_query_nic_vport_mac_address(mdev, 0, mac);
+
 	for (vport = 1; vport < total_vfs; vport++) {
 		struct mlx5_eswitch_rep rep;
 
 		rep.load = mlx5e_vport_rep_load;
 		rep.unload = mlx5e_vport_rep_unload;
 		rep.vport = vport;
-		mlx5_eswitch_register_vport_rep(esw, &rep);
+		ether_addr_copy(rep.hw_id, mac);
+		mlx5_eswitch_register_vport_rep(esw, vport, &rep);
 	}
 }
 
-static void *mlx5e_add(struct mlx5_core_dev *mdev)
+void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
 {
-	struct mlx5_eswitch *esw = mdev->priv.eswitch;
-	void *ppriv = NULL;
-	void *ret;
-
-	if (mlx5e_check_required_hca_cap(mdev))
-		return NULL;
-
-	if (mlx5e_create_mdev_resources(mdev))
-		return NULL;
-
-	mlx5e_register_vport_rep(mdev);
-
-	if (MLX5_CAP_GEN(mdev, vport_group_manager))
-		ppriv = &esw->offloads.vport_reps[0];
-
-	ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
-	if (!ret) {
-		mlx5e_destroy_mdev_resources(mdev);
-		return NULL;
-	}
-	return ret;
-}
-
-void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
-{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
 	const struct mlx5e_profile *profile = priv->profile;
-	struct net_device *netdev = priv->netdev;
 
 	set_bit(MLX5E_STATE_DESTROYING, &priv->state);
 	if (profile->disable)
 		profile->disable(priv);
 
 	flush_workqueue(priv->wq);
-	if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) {
-		netif_device_detach(netdev);
+
+	rtnl_lock();
+	if (netif_running(netdev))
 		mlx5e_close(netdev);
-	} else {
-		unregister_netdev(netdev);
-	}
+	netif_device_detach(netdev);
+	rtnl_unlock();
 
 	mlx5e_destroy_q_counter(priv);
 	profile->cleanup_rx(priv);
@@ -3563,12 +3937,109 @@
 	profile->cleanup_tx(priv);
 	mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
 	cancel_delayed_work_sync(&priv->update_stats_work);
+}
+
+/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying
+ * hardware contexts and to connect it to the current netdev.
+ */
+static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv)
+{
+	struct mlx5e_priv *priv = vpriv;
+	struct net_device *netdev = priv->netdev;
+	int err;
+
+	if (netif_device_present(netdev))
+		return 0;
+
+	err = mlx5e_create_mdev_resources(mdev);
+	if (err)
+		return err;
+
+	err = mlx5e_attach_netdev(mdev, netdev);
+	if (err) {
+		mlx5e_destroy_mdev_resources(mdev);
+		return err;
+	}
+
+	return 0;
+}
+
+static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv)
+{
+	struct mlx5e_priv *priv = vpriv;
+	struct net_device *netdev = priv->netdev;
+
+	if (!netif_device_present(netdev))
+		return;
+
+	mlx5e_detach_netdev(mdev, netdev);
+	mlx5e_destroy_mdev_resources(mdev);
+}
+
+static void *mlx5e_add(struct mlx5_core_dev *mdev)
+{
+	struct mlx5_eswitch *esw = mdev->priv.eswitch;
+	int total_vfs = MLX5_TOTAL_VPORTS(mdev);
+	void *ppriv = NULL;
+	void *priv;
+	int vport;
+	int err;
+	struct net_device *netdev;
+
+	err = mlx5e_check_required_hca_cap(mdev);
+	if (err)
+		return NULL;
+
+	mlx5e_register_vport_rep(mdev);
+
+	if (MLX5_CAP_GEN(mdev, vport_group_manager))
+		ppriv = &esw->offloads.vport_reps[0];
+
+	netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv);
+	if (!netdev) {
+		mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
+		goto err_unregister_reps;
+	}
+
+	priv = netdev_priv(netdev);
+
+	err = mlx5e_attach(mdev, priv);
+	if (err) {
+		mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err);
+		goto err_destroy_netdev;
+	}
+
+	err = register_netdev(netdev);
+	if (err) {
+		mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
+		goto err_detach;
+	}
+
+	return priv;
+
+err_detach:
+	mlx5e_detach(mdev, priv);
+
+err_destroy_netdev:
+	mlx5e_destroy_netdev(mdev, priv);
+
+err_unregister_reps:
+	for (vport = 1; vport < total_vfs; vport++)
+		mlx5_eswitch_unregister_vport_rep(esw, vport);
+
+	return NULL;
+}
+
+void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv)
+{
+	const struct mlx5e_profile *profile = priv->profile;
+	struct net_device *netdev = priv->netdev;
+
+	unregister_netdev(netdev);
 	destroy_workqueue(priv->wq);
 	if (profile->cleanup)
 		profile->cleanup(priv);
-
-	if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state))
-		free_netdev(netdev);
+	free_netdev(netdev);
 }
 
 static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
@@ -3578,12 +4049,11 @@
 	struct mlx5e_priv *priv = vpriv;
 	int vport;
 
-	mlx5e_destroy_netdev(mdev, priv);
-
 	for (vport = 1; vport < total_vfs; vport++)
 		mlx5_eswitch_unregister_vport_rep(esw, vport);
 
-	mlx5e_destroy_mdev_resources(mdev);
+	mlx5e_detach(mdev, vpriv);
+	mlx5e_destroy_netdev(mdev, priv);
 }
 
 static void *mlx5e_get_netdev(void *vpriv)
@@ -3596,6 +4066,8 @@
 static struct mlx5_interface mlx5e_interface = {
 	.add       = mlx5e_add,
 	.remove    = mlx5e_remove,
+	.attach    = mlx5e_attach,
+	.detach    = mlx5e_detach,
 	.event     = mlx5e_async_event,
 	.protocol  = MLX5_INTERFACE_PROTOCOL_ETH,
 	.get_dev   = mlx5e_get_netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 1c7d8b8..3c97da1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -135,17 +135,16 @@
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	u8 mac[ETH_ALEN];
 
 	if (esw->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
 	switch (attr->id) {
 	case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
-		mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac);
 		attr->u.ppid.id_len = ETH_ALEN;
-		memcpy(&attr->u.ppid.id, &mac, ETH_ALEN);
+		ether_addr_copy(attr->u.ppid.id, rep->hw_id);
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -414,19 +413,50 @@
 int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
 			 struct mlx5_eswitch_rep *rep)
 {
-	rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
-	if (!rep->priv_data) {
-		pr_warn("Failed to create representor for vport %d\n",
+	struct net_device *netdev;
+	int err;
+
+	netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep);
+	if (!netdev) {
+		pr_warn("Failed to create representor netdev for vport %d\n",
 			rep->vport);
 		return -EINVAL;
 	}
+
+	rep->priv_data = netdev_priv(netdev);
+
+	err = mlx5e_attach_netdev(esw->dev, netdev);
+	if (err) {
+		pr_warn("Failed to attach representor netdev for vport %d\n",
+			rep->vport);
+		goto err_destroy_netdev;
+	}
+
+	err = register_netdev(netdev);
+	if (err) {
+		pr_warn("Failed to register representor netdev for vport %d\n",
+			rep->vport);
+		goto err_detach_netdev;
+	}
+
 	return 0;
+
+err_detach_netdev:
+	mlx5e_detach_netdev(esw->dev, netdev);
+
+err_destroy_netdev:
+	mlx5e_destroy_netdev(esw->dev, rep->priv_data);
+
+	return err;
+
 }
 
 void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
 			    struct mlx5_eswitch_rep *rep)
 {
 	struct mlx5e_priv *priv = rep->priv_data;
+	struct net_device *netdev = priv->netdev;
 
+	mlx5e_detach_netdev(esw->dev, netdev);
 	mlx5e_destroy_netdev(esw->dev, priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 9f2a16a..c6de6fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -36,6 +36,7 @@
 #include <net/busy_poll.h>
 #include "en.h"
 #include "en_tc.h"
+#include "eswitch.h"
 
 static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
 {
@@ -179,51 +180,99 @@
 	mutex_unlock(&priv->state_lock);
 }
 
-int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
-{
-	struct sk_buff *skb;
-	dma_addr_t dma_addr;
+#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT)
 
-	skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz);
-	if (unlikely(!skb))
+static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq,
+				      struct mlx5e_dma_info *dma_info)
+{
+	struct mlx5e_page_cache *cache = &rq->page_cache;
+	u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1);
+
+	if (tail_next == cache->head) {
+		rq->stats.cache_full++;
+		return false;
+	}
+
+	cache->page_cache[cache->tail] = *dma_info;
+	cache->tail = tail_next;
+	return true;
+}
+
+static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq,
+				      struct mlx5e_dma_info *dma_info)
+{
+	struct mlx5e_page_cache *cache = &rq->page_cache;
+
+	if (unlikely(cache->head == cache->tail)) {
+		rq->stats.cache_empty++;
+		return false;
+	}
+
+	if (page_ref_count(cache->page_cache[cache->head].page) != 1) {
+		rq->stats.cache_busy++;
+		return false;
+	}
+
+	*dma_info = cache->page_cache[cache->head];
+	cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1);
+	rq->stats.cache_reuse++;
+
+	dma_sync_single_for_device(rq->pdev, dma_info->addr,
+				   RQ_PAGE_SIZE(rq),
+				   DMA_FROM_DEVICE);
+	return true;
+}
+
+static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq,
+					  struct mlx5e_dma_info *dma_info)
+{
+	struct page *page;
+
+	if (mlx5e_rx_cache_get(rq, dma_info))
+		return 0;
+
+	page = dev_alloc_pages(rq->buff.page_order);
+	if (unlikely(!page))
 		return -ENOMEM;
 
-	dma_addr = dma_map_single(rq->pdev,
-				  /* hw start padding */
-				  skb->data,
-				  /* hw end padding */
-				  rq->wqe_sz,
-				  DMA_FROM_DEVICE);
-
-	if (unlikely(dma_mapping_error(rq->pdev, dma_addr)))
-		goto err_free_skb;
-
-	*((dma_addr_t *)skb->cb) = dma_addr;
-	wqe->data.addr = cpu_to_be64(dma_addr);
-	wqe->data.lkey = rq->mkey_be;
-
-	rq->skb[ix] = skb;
+	dma_info->page = page;
+	dma_info->addr = dma_map_page(rq->pdev, page, 0,
+				      RQ_PAGE_SIZE(rq), rq->buff.map_dir);
+	if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) {
+		put_page(page);
+		return -ENOMEM;
+	}
 
 	return 0;
+}
 
-err_free_skb:
-	dev_kfree_skb(skb);
+void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info,
+			bool recycle)
+{
+	if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info))
+		return;
 
-	return -ENOMEM;
+	dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq),
+		       rq->buff.map_dir);
+	put_page(dma_info->page);
+}
+
+int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
+{
+	struct mlx5e_dma_info *di = &rq->dma_info[ix];
+
+	if (unlikely(mlx5e_page_alloc_mapped(rq, di)))
+		return -ENOMEM;
+
+	wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM);
+	return 0;
 }
 
 void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 {
-	struct sk_buff *skb = rq->skb[ix];
+	struct mlx5e_dma_info *di = &rq->dma_info[ix];
 
-	if (skb) {
-		rq->skb[ix] = NULL;
-		dma_unmap_single(rq->pdev,
-				 *((dma_addr_t *)skb->cb),
-				 rq->wqe_sz,
-				 DMA_FROM_DEVICE);
-		dev_kfree_skb(skb);
-	}
+	mlx5e_page_release(rq, di, true);
 }
 
 static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq)
@@ -231,44 +280,11 @@
 	return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER;
 }
 
-static inline void
-mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev,
-				struct mlx5e_mpw_info *wi,
-				u32 wqe_offset, u32 len)
-{
-	dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset,
-				len, DMA_FROM_DEVICE);
-}
-
-static inline void
-mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev,
-				    struct mlx5e_mpw_info *wi,
-				    u32 wqe_offset, u32 len)
-{
-	/* No dma pre sync for fragmented MPWQE */
-}
-
-static inline void
-mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq,
-				struct sk_buff *skb,
-				struct mlx5e_mpw_info *wi,
-				u32 page_idx, u32 frag_offset,
-				u32 len)
-{
-	unsigned int truesize =	ALIGN(len, rq->mpwqe_stride_sz);
-
-	wi->skbs_frags[page_idx]++;
-	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-			&wi->dma_info.page[page_idx], frag_offset,
-			len, truesize);
-}
-
-static inline void
-mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq,
-				    struct sk_buff *skb,
-				    struct mlx5e_mpw_info *wi,
-				    u32 page_idx, u32 frag_offset,
-				    u32 len)
+static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq,
+					    struct sk_buff *skb,
+					    struct mlx5e_mpw_info *wi,
+					    u32 page_idx, u32 frag_offset,
+					    u32 len)
 {
 	unsigned int truesize =	ALIGN(len, rq->mpwqe_stride_sz);
 
@@ -282,24 +298,11 @@
 }
 
 static inline void
-mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev,
-				   struct sk_buff *skb,
-				   struct mlx5e_mpw_info *wi,
-				   u32 page_idx, u32 offset,
-				   u32 headlen)
-{
-	struct page *page = &wi->dma_info.page[page_idx];
-
-	skb_copy_to_linear_data(skb, page_address(page) + offset,
-				ALIGN(headlen, sizeof(long)));
-}
-
-static inline void
-mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev,
-				       struct sk_buff *skb,
-				       struct mlx5e_mpw_info *wi,
-				       u32 page_idx, u32 offset,
-				       u32 headlen)
+mlx5e_copy_skb_header_mpwqe(struct device *pdev,
+			    struct sk_buff *skb,
+			    struct mlx5e_mpw_info *wi,
+			    u32 page_idx, u32 offset,
+			    u32 headlen)
 {
 	u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset);
 	struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx];
@@ -324,46 +327,9 @@
 	}
 }
 
-static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix)
+static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
 {
-	return rq_ix * MLX5_CHANNEL_MAX_NUM_MTTS +
-		wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
-}
-
-static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
-				struct mlx5e_sq *sq,
-				struct mlx5e_umr_wqe *wqe,
-				u16 ix)
-{
-	struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
-	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
-	struct mlx5_wqe_data_seg      *dseg = &wqe->data;
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
-	u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS);
-	u16 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix);
-
-	memset(wqe, 0, sizeof(*wqe));
-	cseg->opmod_idx_opcode =
-		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
-			    MLX5_OPCODE_UMR);
-	cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
-				      ds_cnt);
-	cseg->fm_ce_se  = MLX5_WQE_CTRL_CQ_UPDATE;
-	cseg->imm       = rq->umr_mkey_be;
-
-	ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN;
-	ucseg->klm_octowords =
-		cpu_to_be16(mlx5e_get_mtt_octw(MLX5_MPWRQ_PAGES_PER_WQE));
-	ucseg->bsf_octowords =
-		cpu_to_be16(mlx5e_get_mtt_octw(umr_wqe_mtt_offset));
-	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
-
-	dseg->lkey = sq->mkey_be;
-	dseg->addr = cpu_to_be64(wi->umr.mtt_addr);
-}
-
-static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
-{
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 	struct mlx5e_sq *sq = &rq->channel->icosq;
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *wqe;
@@ -372,142 +338,86 @@
 
 	/* fill sq edge with nops to avoid wqe wrap around */
 	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
-		sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP;
-		sq->ico_wqe_info[pi].num_wqebbs = 1;
+		sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
+		sq->db.ico_wqe[pi].num_wqebbs = 1;
 		mlx5e_send_nop(sq, true);
 	}
 
 	wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-	mlx5e_build_umr_wqe(rq, sq, wqe, ix);
-	sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR;
-	sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs;
+	memcpy(wqe, &wi->umr.wqe, sizeof(*wqe));
+	wqe->ctrl.opmod_idx_opcode =
+		cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
+			    MLX5_OPCODE_UMR);
+
+	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
+	sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs;
 	sq->pc += num_wqebbs;
 	mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
 }
 
-static inline int mlx5e_get_wqe_mtt_sz(void)
+static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq,
+				    struct mlx5e_rx_wqe *wqe,
+				    u16 ix)
 {
-	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
-	 * To avoid copying garbage after the mtt array, we allocate
-	 * a little more.
-	 */
-	return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64),
-		     MLX5_UMR_MTT_ALIGNMENT);
-}
-
-static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq,
-				    struct mlx5e_mpw_info *wi,
-				    int i)
-{
-	struct page *page;
-
-	page = dev_alloc_page();
-	if (unlikely(!page))
-		return -ENOMEM;
-
-	wi->umr.dma_info[i].page = page;
-	wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE,
-						PCI_DMA_FROMDEVICE);
-	if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) {
-		put_page(page);
-		return -ENOMEM;
-	}
-	wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR);
-
-	return 0;
-}
-
-static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
-					   struct mlx5e_rx_wqe *wqe,
-					   u16 ix)
-{
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
-	int mtt_sz = mlx5e_get_wqe_mtt_sz();
-	u32 dma_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix) << PAGE_SHIFT;
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
+	u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT;
+	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
+	int err;
 	int i;
 
-	wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) *
-				   MLX5_MPWRQ_PAGES_PER_WQE,
-				   GFP_ATOMIC);
-	if (unlikely(!wi->umr.dma_info))
-		goto err_out;
-
-	/* We allocate more than mtt_sz as we will align the pointer */
-	wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1,
-				       GFP_ATOMIC);
-	if (unlikely(!wi->umr.mtt_no_align))
-		goto err_free_umr;
-
-	wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN);
-	wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz,
-					  PCI_DMA_TODEVICE);
-	if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr)))
-		goto err_free_mtt;
-
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i)))
+		struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
+
+		err = mlx5e_page_alloc_mapped(rq, dma_info);
+		if (unlikely(err))
 			goto err_unmap;
-		page_ref_add(wi->umr.dma_info[i].page,
-			     mlx5e_mpwqe_strides_per_page(rq));
+		wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR);
+		page_ref_add(dma_info->page, pg_strides);
 		wi->skbs_frags[i] = 0;
 	}
 
 	wi->consumed_strides = 0;
-	wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe;
-	wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe;
-	wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe;
-	wi->free_wqe     = mlx5e_free_rx_fragmented_mpwqe;
-	wqe->data.lkey = rq->umr_mkey_be;
 	wqe->data.addr = cpu_to_be64(dma_offset);
 
 	return 0;
 
 err_unmap:
 	while (--i >= 0) {
-		dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
-			       PCI_DMA_FROMDEVICE);
-		page_ref_sub(wi->umr.dma_info[i].page,
-			     mlx5e_mpwqe_strides_per_page(rq));
-		put_page(wi->umr.dma_info[i].page);
+		struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
+
+		page_ref_sub(dma_info->page, pg_strides);
+		mlx5e_page_release(rq, dma_info, true);
 	}
-	dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE);
 
-err_free_mtt:
-	kfree(wi->umr.mtt_no_align);
-
-err_free_umr:
-	kfree(wi->umr.dma_info);
-
-err_out:
-	return -ENOMEM;
+	return err;
 }
 
-void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq,
-				    struct mlx5e_mpw_info *wi)
+void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 {
-	int mtt_sz = mlx5e_get_wqe_mtt_sz();
+	int pg_strides = mlx5e_mpwqe_strides_per_page(rq);
 	int i;
 
 	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE,
-			       PCI_DMA_FROMDEVICE);
-		page_ref_sub(wi->umr.dma_info[i].page,
-			mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]);
-		put_page(wi->umr.dma_info[i].page);
+		struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i];
+
+		page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]);
+		mlx5e_page_release(rq, dma_info, true);
 	}
-	dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE);
-	kfree(wi->umr.mtt_no_align);
-	kfree(wi->umr.dma_info);
 }
 
-void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq)
+void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 {
 	struct mlx5_wq_ll *wq = &rq->wq;
 	struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head);
 
 	clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
+
+	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) {
+		mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
+		return;
+	}
+
 	mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index));
-	rq->stats.mpwqe_frag++;
 
 	/* ensure wqes are visible to device before updating doorbell record */
 	dma_wmb();
@@ -515,106 +425,28 @@
 	mlx5_wq_ll_update_db_record(wq);
 }
 
-static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq,
-				       struct mlx5e_rx_wqe *wqe,
-				       u16 ix)
-{
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
-	gfp_t gfp_mask;
-	int i;
-
-	gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC;
-	wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
-					     MLX5_MPWRQ_WQE_PAGE_ORDER);
-	if (unlikely(!wi->dma_info.page))
-		return -ENOMEM;
-
-	wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0,
-					 rq->wqe_sz, PCI_DMA_FROMDEVICE);
-	if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) {
-		put_page(wi->dma_info.page);
-		return -ENOMEM;
-	}
-
-	/* We split the high-order page into order-0 ones and manage their
-	 * reference counter to minimize the memory held by small skb fragments
-	 */
-	split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER);
-	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		page_ref_add(&wi->dma_info.page[i],
-			     mlx5e_mpwqe_strides_per_page(rq));
-		wi->skbs_frags[i] = 0;
-	}
-
-	wi->consumed_strides = 0;
-	wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe;
-	wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe;
-	wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe;
-	wi->free_wqe     = mlx5e_free_rx_linear_mpwqe;
-	wqe->data.lkey = rq->mkey_be;
-	wqe->data.addr = cpu_to_be64(wi->dma_info.addr);
-
-	return 0;
-}
-
-void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq,
-				struct mlx5e_mpw_info *wi)
-{
-	int i;
-
-	dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz,
-		       PCI_DMA_FROMDEVICE);
-	for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) {
-		page_ref_sub(&wi->dma_info.page[i],
-			mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]);
-		put_page(&wi->dma_info.page[i]);
-	}
-}
-
 int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix)
 {
 	int err;
 
-	err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix);
-	if (unlikely(err)) {
-		err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix);
-		if (unlikely(err))
-			return err;
-		set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
-		mlx5e_post_umr_wqe(rq, ix);
-		return -EBUSY;
-	}
-
-	return 0;
+	err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix);
+	if (unlikely(err))
+		return err;
+	set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
+	mlx5e_post_umr_wqe(rq, ix);
+	return -EBUSY;
 }
 
 void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[ix];
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
 
-	wi->free_wqe(rq, wi);
-}
-
-void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
-{
-	struct mlx5_wq_ll *wq = &rq->wq;
-	struct mlx5e_rx_wqe *wqe;
-	__be16 wqe_ix_be;
-	u16 wqe_ix;
-
-	while (!mlx5_wq_ll_is_empty(wq)) {
-		wqe_ix_be = *wq->tail_next;
-		wqe_ix    = be16_to_cpu(wqe_ix_be);
-		wqe       = mlx5_wq_ll_get_wqe(&rq->wq, wqe_ix);
-		rq->dealloc_wqe(rq, wqe_ix);
-		mlx5_wq_ll_pop(&rq->wq, wqe_ix_be,
-			       &wqe->next.next_wqe_index);
-	}
+	mlx5e_free_rx_mpwqe(rq, wi);
 }
 
 #define RQ_CANNOT_POST(rq) \
-		(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \
-		 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
+	(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \
+	 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
 
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
 {
@@ -628,9 +460,10 @@
 		int err;
 
 		err = rq->alloc_wqe(rq, wqe, wq->head);
+		if (err == -EBUSY)
+			return true;
 		if (unlikely(err)) {
-			if (err != -EBUSY)
-				rq->stats.buff_alloc_err++;
+			rq->stats.buff_alloc_err++;
 			break;
 		}
 
@@ -648,24 +481,32 @@
 static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 				 u32 cqe_bcnt)
 {
-	struct ethhdr	*eth	= (struct ethhdr *)(skb->data);
-	struct iphdr	*ipv4	= (struct iphdr *)(skb->data + ETH_HLEN);
-	struct ipv6hdr	*ipv6	= (struct ipv6hdr *)(skb->data + ETH_HLEN);
+	struct ethhdr	*eth = (struct ethhdr *)(skb->data);
+	struct iphdr	*ipv4;
+	struct ipv6hdr	*ipv6;
 	struct tcphdr	*tcp;
+	int network_depth = 0;
+	__be16 proto;
+	u16 tot_len;
 
 	u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe);
 	int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA  == l4_hdr_type) ||
 		       (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type));
 
-	u16 tot_len = cqe_bcnt - ETH_HLEN;
+	skb->mac_len = ETH_HLEN;
+	proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth);
 
-	if (eth->h_proto == htons(ETH_P_IP)) {
-		tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
+	ipv4 = (struct iphdr *)(skb->data + network_depth);
+	ipv6 = (struct ipv6hdr *)(skb->data + network_depth);
+	tot_len = cqe_bcnt - network_depth;
+
+	if (proto == htons(ETH_P_IP)) {
+		tcp = (struct tcphdr *)(skb->data + network_depth +
 					sizeof(struct iphdr));
 		ipv6 = NULL;
 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 	} else {
-		tcp = (struct tcphdr *)(skb->data + ETH_HLEN +
+		tcp = (struct tcphdr *)(skb->data + network_depth +
 					sizeof(struct ipv6hdr));
 		ipv4 = NULL;
 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
@@ -789,12 +630,186 @@
 	rq->stats.packets++;
 	rq->stats.bytes += cqe_bcnt;
 	mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb);
-	napi_gro_receive(rq->cq.napi, skb);
+}
+
+static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	struct mlx5e_tx_wqe *wqe;
+	u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */
+
+	wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+
+	wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+	mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0);
+}
+
+static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq,
+					struct mlx5e_dma_info *di,
+					unsigned int data_offset,
+					int len)
+{
+	struct mlx5e_sq          *sq   = &rq->channel->xdp_sq;
+	struct mlx5_wq_cyc       *wq   = &sq->wq;
+	u16                      pi    = sq->pc & wq->sz_m1;
+	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+	struct mlx5e_sq_wqe_info *wi   = &sq->db.xdp.wqe_info[pi];
+
+	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
+	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
+	struct mlx5_wqe_data_seg *dseg;
+
+	dma_addr_t dma_addr  = di->addr + data_offset + MLX5E_XDP_MIN_INLINE;
+	unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE;
+	void *data           = page_address(di->page) + data_offset;
+
+	if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) {
+		if (sq->db.xdp.doorbell) {
+			/* SQ is full, ring doorbell */
+			mlx5e_xmit_xdp_doorbell(sq);
+			sq->db.xdp.doorbell = false;
+		}
+		rq->stats.xdp_tx_full++;
+		mlx5e_page_release(rq, di, true);
+		return;
+	}
+
+	dma_sync_single_for_device(sq->pdev, dma_addr, dma_len,
+				   PCI_DMA_TODEVICE);
+
+	memset(wqe, 0, sizeof(*wqe));
+
+	/* copy the inline part */
+	memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE);
+	eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE);
+
+	dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1);
+
+	/* write the dma part */
+	dseg->addr       = cpu_to_be64(dma_addr);
+	dseg->byte_count = cpu_to_be32(dma_len);
+	dseg->lkey       = sq->mkey_be;
+
+	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND);
+	cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT);
+
+	sq->db.xdp.di[pi] = *di;
+	wi->opcode     = MLX5_OPCODE_SEND;
+	wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS;
+	sq->pc += MLX5E_XDP_TX_WQEBBS;
+
+	sq->db.xdp.doorbell = true;
+	rq->stats.xdp_tx++;
+}
+
+/* returns true if packet was consumed by xdp */
+static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq,
+				    const struct bpf_prog *prog,
+				    struct mlx5e_dma_info *di,
+				    void *data, u16 len)
+{
+	struct xdp_buff xdp;
+	u32 act;
+
+	if (!prog)
+		return false;
+
+	xdp.data = data;
+	xdp.data_end = xdp.data + len;
+	act = bpf_prog_run_xdp(prog, &xdp);
+	switch (act) {
+	case XDP_PASS:
+		return false;
+	case XDP_TX:
+		mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len);
+		return true;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+	case XDP_ABORTED:
+	case XDP_DROP:
+		rq->stats.xdp_drop++;
+		mlx5e_page_release(rq, di, true);
+		return true;
+	}
+}
+
+static inline
+struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
+			     u16 wqe_counter, u32 cqe_bcnt)
+{
+	struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
+	struct mlx5e_dma_info *di;
+	struct sk_buff *skb;
+	void *va, *data;
+
+	di             = &rq->dma_info[wqe_counter];
+	va             = page_address(di->page);
+	data           = va + MLX5_RX_HEADROOM;
+
+	dma_sync_single_range_for_cpu(rq->pdev,
+				      di->addr,
+				      MLX5_RX_HEADROOM,
+				      rq->buff.wqe_sz,
+				      DMA_FROM_DEVICE);
+	prefetch(data);
+
+	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
+		rq->stats.wqe_err++;
+		mlx5e_page_release(rq, di, true);
+		return NULL;
+	}
+
+	if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
+		return NULL; /* page/packet was consumed by XDP */
+
+	skb = build_skb(va, RQ_PAGE_SIZE(rq));
+	if (unlikely(!skb)) {
+		rq->stats.buff_alloc_err++;
+		mlx5e_page_release(rq, di, true);
+		return NULL;
+	}
+
+	/* queue up for recycling ..*/
+	page_ref_inc(di->page);
+	mlx5e_page_release(rq, di, true);
+
+	skb_reserve(skb, MLX5_RX_HEADROOM);
+	skb_put(skb, cqe_bcnt);
+
+	return skb;
 }
 
 void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
 	struct mlx5e_rx_wqe *wqe;
+	__be16 wqe_counter_be;
+	struct sk_buff *skb;
+	u16 wqe_counter;
+	u32 cqe_bcnt;
+
+	wqe_counter_be = cqe->wqe_counter;
+	wqe_counter    = be16_to_cpu(wqe_counter_be);
+	wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
+	cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
+
+	skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
+	if (!skb)
+		goto wq_ll_pop;
+
+	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+	napi_gro_receive(rq->cq.napi, skb);
+
+wq_ll_pop:
+	mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
+		       &wqe->next.next_wqe_index);
+}
+
+void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+	struct net_device *netdev = rq->netdev;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5e_rx_wqe *wqe;
 	struct sk_buff *skb;
 	__be16 wqe_counter_be;
 	u16 wqe_counter;
@@ -803,26 +818,19 @@
 	wqe_counter_be = cqe->wqe_counter;
 	wqe_counter    = be16_to_cpu(wqe_counter_be);
 	wqe            = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter);
-	skb            = rq->skb[wqe_counter];
-	prefetch(skb->data);
-	rq->skb[wqe_counter] = NULL;
+	cqe_bcnt       = be32_to_cpu(cqe->byte_cnt);
 
-	dma_unmap_single(rq->pdev,
-			 *((dma_addr_t *)skb->cb),
-			 rq->wqe_sz,
-			 DMA_FROM_DEVICE);
-
-	if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) {
-		rq->stats.wqe_err++;
-		dev_kfree_skb(skb);
+	skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt);
+	if (!skb)
 		goto wq_ll_pop;
-	}
-
-	cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
-	skb_put(skb, cqe_bcnt);
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 
+	if (rep->vlan && skb_vlan_tag_present(skb))
+		skb_vlan_pop(skb);
+
+	napi_gro_receive(rq->cq.napi, skb);
+
 wq_ll_pop:
 	mlx5_wq_ll_pop(&rq->wq, wqe_counter_be,
 		       &wqe->next.next_wqe_index);
@@ -834,7 +842,6 @@
 					   u32 cqe_bcnt,
 					   struct sk_buff *skb)
 {
-	u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz);
 	u16 stride_ix      = mpwrq_get_cqe_stride_index(cqe);
 	u32 wqe_offset     = stride_ix * rq->mpwqe_stride_sz;
 	u32 head_offset    = wqe_offset & (PAGE_SIZE - 1);
@@ -848,21 +855,20 @@
 		page_idx++;
 		frag_offset -= PAGE_SIZE;
 	}
-	wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes);
 
 	while (byte_cnt) {
 		u32 pg_consumed_bytes =
 			min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
 
-		wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset,
-				 pg_consumed_bytes);
+		mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset,
+					 pg_consumed_bytes);
 		byte_cnt -= pg_consumed_bytes;
 		frag_offset = 0;
 		page_idx++;
 	}
 	/* copy header */
-	wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset,
-			    headlen);
+	mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx,
+				    head_offset, headlen);
 	/* skb linear part was allocated with headlen and aligned to long */
 	skb->tail += headlen;
 	skb->len  += headlen;
@@ -872,7 +878,7 @@
 {
 	u16 cstrides       = mpwrq_get_cqe_consumed_strides(cqe);
 	u16 wqe_id         = be16_to_cpu(cqe->wqe_id);
-	struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id];
+	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id];
 	struct mlx5e_rx_wqe  *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id);
 	struct sk_buff *skb;
 	u16 cqe_bcnt;
@@ -902,21 +908,23 @@
 
 	mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb);
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+	napi_gro_receive(rq->cq.napi, skb);
 
 mpwrq_cqe_out:
 	if (likely(wi->consumed_strides < rq->mpwqe_num_strides))
 		return;
 
-	wi->free_wqe(rq, wi);
+	mlx5e_free_rx_mpwqe(rq, wi);
 	mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index);
 }
 
 int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 {
 	struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq);
+	struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq;
 	int work_done = 0;
 
-	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH_TIMEOUT, &rq->state)))
+	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state)))
 		return 0;
 
 	if (cq->decmprs_left)
@@ -940,6 +948,11 @@
 		rq->handle_rx_cqe(rq, cqe);
 	}
 
+	if (xdp_sq->db.xdp.doorbell) {
+		mlx5e_xmit_xdp_doorbell(xdp_sq);
+		xdp_sq->db.xdp.doorbell = false;
+	}
+
 	mlx5_cqwq_update_db_record(&cq->wq);
 
 	/* ensure cq space is freed before enabling more cqes */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 7b9d8a9..57452fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -65,17 +65,24 @@
 	u64 rx_csum_none;
 	u64 rx_csum_complete;
 	u64 rx_csum_unnecessary_inner;
+	u64 rx_xdp_drop;
+	u64 rx_xdp_tx;
+	u64 rx_xdp_tx_full;
 	u64 tx_csum_partial;
 	u64 tx_csum_partial_inner;
 	u64 tx_queue_stopped;
 	u64 tx_queue_wake;
 	u64 tx_queue_dropped;
+	u64 tx_xmit_more;
 	u64 rx_wqe_err;
 	u64 rx_mpwqe_filler;
-	u64 rx_mpwqe_frag;
 	u64 rx_buff_alloc_err;
 	u64 rx_cqe_compress_blks;
 	u64 rx_cqe_compress_pkts;
+	u64 rx_cache_reuse;
+	u64 rx_cache_full;
+	u64 rx_cache_empty;
+	u64 rx_cache_busy;
 
 	/* Special handling counters */
 	u64 link_down_events_phy;
@@ -96,17 +103,24 @@
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
+	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) },
 };
 
@@ -270,12 +284,18 @@
 	u64 csum_none;
 	u64 lro_packets;
 	u64 lro_bytes;
+	u64 xdp_drop;
+	u64 xdp_tx;
+	u64 xdp_tx_full;
 	u64 wqe_err;
 	u64 mpwqe_filler;
-	u64 mpwqe_frag;
 	u64 buff_alloc_err;
 	u64 cqe_compress_blks;
 	u64 cqe_compress_pkts;
+	u64 cache_reuse;
+	u64 cache_full;
+	u64 cache_empty;
+	u64 cache_busy;
 };
 
 static const struct counter_desc rq_stats_desc[] = {
@@ -284,20 +304,27 @@
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
+	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
 };
 
 struct mlx5e_sq_stats {
 	/* commonly accessed in data path */
 	u64 packets;
 	u64 bytes;
+	u64 xmit_more;
 	u64 tso_packets;
 	u64 tso_bytes;
 	u64 tso_inner_packets;
@@ -324,6 +351,7 @@
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) },
 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) },
+	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, xmit_more) },
 };
 
 #define NUM_SW_COUNTERS			ARRAY_SIZE(sw_stats_desc)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0f19b01..ce8c54d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -39,6 +39,7 @@
 #include <linux/rhashtable.h>
 #include <net/switchdev.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
@@ -47,6 +48,7 @@
 	struct rhash_head	node;
 	u64			cookie;
 	struct mlx5_flow_rule	*rule;
+	struct mlx5_esw_flow_attr *attr;
 };
 
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
@@ -114,27 +116,30 @@
 
 static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 						    struct mlx5_flow_spec *spec,
-						    u32 action, u32 dst_vport)
+						    struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct mlx5_eswitch_rep *rep = priv->ppriv;
-	u32 src_vport;
+	int err;
 
-	if (rep->vport) /* set source vport for the flow */
-		src_vport = rep->vport;
-	else
-		src_vport = FDB_UPLINK_VPORT;
+	err = mlx5_eswitch_add_vlan_action(esw, attr);
+	if (err)
+		return ERR_PTR(err);
 
-	return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport);
+	return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 }
 
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
-			      struct mlx5_flow_rule *rule)
+			      struct mlx5_flow_rule *rule,
+			      struct mlx5_esw_flow_attr *attr)
 {
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_fc *counter = NULL;
 
 	counter = mlx5_flow_rule_counter(rule);
 
+	if (esw && esw->mode == SRIOV_OFFLOADS)
+		mlx5_eswitch_del_vlan_action(esw, attr);
+
 	mlx5_del_flow_rule(rule);
 
 	mlx5_fc_destroy(priv->mdev, counter);
@@ -159,6 +164,7 @@
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
 	      BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_PORTS))) {
@@ -170,7 +176,7 @@
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
 		struct flow_dissector_key_control *key =
 			skb_flow_dissector_target(f->dissector,
-						  FLOW_DISSECTOR_KEY_BASIC,
+						  FLOW_DISSECTOR_KEY_CONTROL,
 						  f->key);
 		addr_type = key->addr_type;
 	}
@@ -222,6 +228,24 @@
 				key->src);
 	}
 
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_dissector_key_vlan *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->key);
+		struct flow_dissector_key_vlan *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_VLAN,
+						  f->mask);
+		if (mask->vlan_id) {
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1);
+
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
+		}
+	}
+
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 		struct flow_dissector_key_ipv4_addrs *key =
 			skb_flow_dissector_target(f->dissector,
@@ -318,6 +342,7 @@
 				u32 *action, u32 *flow_tag)
 {
 	const struct tc_action *a;
+	LIST_HEAD(actions);
 
 	if (tc_no_actions(exts))
 		return -EINVAL;
@@ -325,7 +350,8 @@
 	*flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
 	*action = 0;
 
-	tc_for_each_action(a, exts) {
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
 		/* Only support a single action per rule */
 		if (*action)
 			return -EINVAL;
@@ -359,23 +385,22 @@
 }
 
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-				u32 *action, u32 *dest_vport)
+				struct mlx5_esw_flow_attr *attr)
 {
 	const struct tc_action *a;
+	LIST_HEAD(actions);
 
 	if (tc_no_actions(exts))
 		return -EINVAL;
 
-	*action = 0;
+	memset(attr, 0, sizeof(*attr));
+	attr->in_rep = priv->ppriv;
 
-	tc_for_each_action(a, exts) {
-		/* Only support a single action per rule */
-		if (*action)
-			return -EINVAL;
-
+	tcf_exts_to_list(exts, &actions);
+	list_for_each_entry(a, &actions, list) {
 		if (is_tcf_gact_shot(a)) {
-			*action = MLX5_FLOW_CONTEXT_ACTION_DROP |
-				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
+			attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
+					MLX5_FLOW_CONTEXT_ACTION_COUNT;
 			continue;
 		}
 
@@ -383,7 +408,6 @@
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev;
 			struct mlx5e_priv *out_priv;
-			struct mlx5_eswitch_rep *out_rep;
 
 			out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
 
@@ -393,13 +417,22 @@
 				return -EINVAL;
 			}
 
+			attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 			out_priv = netdev_priv(out_dev);
-			out_rep  = out_priv->ppriv;
-			if (out_rep->vport == 0)
-				*dest_vport = FDB_UPLINK_VPORT;
-			else
-				*dest_vport = out_rep->vport;
-			*action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			attr->out_rep = out_priv->ppriv;
+			continue;
+		}
+
+		if (is_tcf_vlan(a)) {
+			if (tcf_vlan_action(a) == VLAN_F_POP) {
+				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+			} else if (tcf_vlan_action(a) == VLAN_F_PUSH) {
+				if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
+					return -EOPNOTSUPP;
+
+				attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+				attr->vlan = tcf_vlan_push_vid(a);
+			}
 			continue;
 		}
 
@@ -413,18 +446,29 @@
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
 	int err = 0;
-	u32 flow_tag, action, dest_vport = 0;
+	bool fdb_flow = false;
+	u32 flow_tag, action;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5_flow_spec *spec;
 	struct mlx5_flow_rule *old = NULL;
+	struct mlx5_esw_flow_attr *old_attr = NULL;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
+	if (esw && esw->mode == SRIOV_OFFLOADS)
+		fdb_flow = true;
+
 	flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
 				      tc->ht_params);
-	if (flow)
+	if (flow) {
 		old = flow->rule;
-	else
-		flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+		old_attr = flow->attr;
+	} else {
+		if (fdb_flow)
+			flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr),
+				       GFP_KERNEL);
+		else
+			flow = kzalloc(sizeof(*flow), GFP_KERNEL);
+	}
 
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec || !flow) {
@@ -438,11 +482,12 @@
 	if (err < 0)
 		goto err_free;
 
-	if (esw && esw->mode == SRIOV_OFFLOADS) {
-		err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport);
+	if (fdb_flow) {
+		flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
+		err = parse_tc_fdb_actions(priv, f->exts, flow->attr);
 		if (err < 0)
 			goto err_free;
-		flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport);
+		flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr);
 	} else {
 		err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag);
 		if (err < 0)
@@ -461,7 +506,7 @@
 		goto err_del_rule;
 
 	if (old)
-		mlx5e_tc_del_flow(priv, old);
+		mlx5e_tc_del_flow(priv, old, old_attr);
 
 	goto out;
 
@@ -489,7 +534,7 @@
 
 	rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
 
-	mlx5e_tc_del_flow(priv, flow->rule);
+	mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
 
 	kfree(flow);
 
@@ -503,6 +548,7 @@
 	struct mlx5e_tc_flow *flow;
 	struct tc_action *a;
 	struct mlx5_fc *counter;
+	LIST_HEAD(actions);
 	u64 bytes;
 	u64 packets;
 	u64 lastuse;
@@ -518,7 +564,8 @@
 
 	mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
 
-	tc_for_each_action(a, f->exts)
+	tcf_exts_to_list(f->exts, &actions);
+	list_for_each_entry(a, &actions, list)
 		tcf_action_stats_update(a, bytes, packets, lastuse);
 
 	return 0;
@@ -544,7 +591,7 @@
 	struct mlx5e_tc_flow *flow = ptr;
 	struct mlx5e_priv *priv = arg;
 
-	mlx5e_tc_del_flow(priv, flow->rule);
+	mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
 	kfree(flow);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index e073bf59..70a7173 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -52,7 +52,6 @@
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | 0x01);
 
-	sq->skb[pi] = NULL;
 	sq->pc++;
 	sq->stats.nop++;
 
@@ -82,15 +81,17 @@
 				  u32 size,
 				  enum mlx5e_dma_map_type map_type)
 {
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr;
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size;
-	sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type;
+	u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask;
+
+	sq->db.txq.dma_fifo[i].addr = addr;
+	sq->db.txq.dma_fifo[i].size = size;
+	sq->db.txq.dma_fifo[i].type = map_type;
 	sq->dma_fifo_pc++;
 }
 
 static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i)
 {
-	return &sq->dma_fifo[i & sq->dma_fifo_mask];
+	return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask];
 }
 
 static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma)
@@ -221,7 +222,7 @@
 
 	u16 pi = sq->pc & wq->sz_m1;
 	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-	struct mlx5e_tx_wqe_info *wi   = &sq->wqe_info[pi];
+	struct mlx5e_tx_wqe_info *wi   = &sq->db.txq.wqe_info[pi];
 
 	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
 	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
@@ -341,7 +342,7 @@
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 
-	sq->skb[pi] = skb;
+	sq->db.txq.skb[pi] = skb;
 
 	wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
 	sq->pc += wi->num_wqebbs;
@@ -356,6 +357,7 @@
 		sq->stats.stopped++;
 	}
 
+	sq->stats.xmit_more += skb->xmit_more;
 	if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) {
 		int bf_sz = 0;
 
@@ -367,8 +369,10 @@
 	}
 
 	/* fill sq edge with nops to avoid wqe wrap around */
-	while ((sq->pc & wq->sz_m1) > sq->edge)
+	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
+		sq->db.txq.skb[pi] = NULL;
 		mlx5e_send_nop(sq, false);
+	}
 
 	if (bf)
 		sq->bf_budget--;
@@ -394,35 +398,6 @@
 	return mlx5e_sq_xmit(sq, skb);
 }
 
-void mlx5e_free_tx_descs(struct mlx5e_sq *sq)
-{
-	struct mlx5e_tx_wqe_info *wi;
-	struct sk_buff *skb;
-	u16 ci;
-	int i;
-
-	while (sq->cc != sq->pc) {
-		ci = sq->cc & sq->wq.sz_m1;
-		skb = sq->skb[ci];
-		wi = &sq->wqe_info[ci];
-
-		if (!skb) { /* nop */
-			sq->cc++;
-			continue;
-		}
-
-		for (i = 0; i < wi->num_dma; i++) {
-			struct mlx5e_sq_dma *dma =
-				mlx5e_dma_get(sq, sq->dma_fifo_cc++);
-
-			mlx5e_tx_dma_unmap(sq->pdev, dma);
-		}
-
-		dev_kfree_skb_any(skb);
-		sq->cc += wi->num_wqebbs;
-	}
-}
-
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_sq *sq;
@@ -434,7 +409,7 @@
 
 	sq = container_of(cq, struct mlx5e_sq, cq);
 
-	if (unlikely(test_bit(MLX5E_SQ_STATE_TX_TIMEOUT, &sq->state)))
+	if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
 		return false;
 
 	npkts = 0;
@@ -470,8 +445,8 @@
 			last_wqe = (sqcc == wqe_counter);
 
 			ci = sqcc & sq->wq.sz_m1;
-			skb = sq->skb[ci];
-			wi = &sq->wqe_info[ci];
+			skb = sq->db.txq.skb[ci];
+			wi = &sq->db.txq.wqe_info[ci];
 
 			if (unlikely(!skb)) { /* nop */
 				sqcc++;
@@ -512,11 +487,73 @@
 	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 
 	if (netif_tx_queue_stopped(sq->txq) &&
-	    mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) &&
-	    likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) {
-				netif_tx_wake_queue(sq->txq);
-				sq->stats.wake++;
+	    mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) {
+		netif_tx_wake_queue(sq->txq);
+		sq->stats.wake++;
 	}
 
 	return (i == MLX5E_TX_CQ_POLL_BUDGET);
 }
+
+static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq)
+{
+	struct mlx5e_tx_wqe_info *wi;
+	struct sk_buff *skb;
+	u16 ci;
+	int i;
+
+	while (sq->cc != sq->pc) {
+		ci = sq->cc & sq->wq.sz_m1;
+		skb = sq->db.txq.skb[ci];
+		wi = &sq->db.txq.wqe_info[ci];
+
+		if (!skb) { /* nop */
+			sq->cc++;
+			continue;
+		}
+
+		for (i = 0; i < wi->num_dma; i++) {
+			struct mlx5e_sq_dma *dma =
+				mlx5e_dma_get(sq, sq->dma_fifo_cc++);
+
+			mlx5e_tx_dma_unmap(sq->pdev, dma);
+		}
+
+		dev_kfree_skb_any(skb);
+		sq->cc += wi->num_wqebbs;
+	}
+}
+
+static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq)
+{
+	struct mlx5e_sq_wqe_info *wi;
+	struct mlx5e_dma_info *di;
+	u16 ci;
+
+	while (sq->cc != sq->pc) {
+		ci = sq->cc & sq->wq.sz_m1;
+		di = &sq->db.xdp.di[ci];
+		wi = &sq->db.xdp.wqe_info[ci];
+
+		if (wi->opcode == MLX5_OPCODE_NOP) {
+			sq->cc++;
+			continue;
+		}
+
+		sq->cc += wi->num_wqebbs;
+
+		mlx5e_page_release(&sq->channel->rq, di, false);
+	}
+}
+
+void mlx5e_free_sq_descs(struct mlx5e_sq *sq)
+{
+	switch (sq->type) {
+	case MLX5E_SQ_TXQ:
+		mlx5e_free_txq_sq_descs(sq);
+		break;
+	case MLX5E_SQ_XDP:
+		mlx5e_free_xdp_sq_descs(sq);
+		break;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 64ae2e8..5703f19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -51,16 +51,18 @@
 
 static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 {
+	struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq);
 	struct mlx5_wq_cyc *wq;
 	struct mlx5_cqe64 *cqe;
-	struct mlx5e_sq *sq;
 	u16 sqcc;
 
+	if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+		return;
+
 	cqe = mlx5e_get_cqe(cq);
 	if (likely(!cqe))
 		return;
 
-	sq = container_of(cq, struct mlx5e_sq, cq);
 	wq = &sq->wq;
 
 	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
@@ -70,7 +72,7 @@
 
 	do {
 		u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1;
-		struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci];
+		struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci];
 
 		mlx5_cqwq_pop(&cq->wq);
 		sqcc += icowi->num_wqebbs;
@@ -85,7 +87,7 @@
 		case MLX5_OPCODE_NOP:
 			break;
 		case MLX5_OPCODE_UMR:
-			mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq);
+			mlx5e_post_rx_mpwqe(&sq->channel->rq);
 			break;
 		default:
 			WARN_ONCE(true,
@@ -103,6 +105,66 @@
 	sq->cc = sqcc;
 }
 
+static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq)
+{
+	struct mlx5e_sq *sq;
+	u16 sqcc;
+	int i;
+
+	sq = container_of(cq, struct mlx5e_sq, cq);
+
+	if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+		return false;
+
+	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
+	 * otherwise a cq overrun may occur
+	 */
+	sqcc = sq->cc;
+
+	for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) {
+		struct mlx5_cqe64 *cqe;
+		u16 wqe_counter;
+		bool last_wqe;
+
+		cqe = mlx5e_get_cqe(cq);
+		if (!cqe)
+			break;
+
+		mlx5_cqwq_pop(&cq->wq);
+
+		wqe_counter = be16_to_cpu(cqe->wqe_counter);
+
+		do {
+			struct mlx5e_sq_wqe_info *wi;
+			struct mlx5e_dma_info *di;
+			u16 ci;
+
+			last_wqe = (sqcc == wqe_counter);
+
+			ci = sqcc & sq->wq.sz_m1;
+			di = &sq->db.xdp.di[ci];
+			wi = &sq->db.xdp.wqe_info[ci];
+
+			if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) {
+				sqcc++;
+				continue;
+			}
+
+			sqcc += wi->num_wqebbs;
+			/* Recycle RX page */
+			mlx5e_page_release(&sq->channel->rq, di, true);
+		} while (!last_wqe);
+	}
+
+	mlx5_cqwq_update_db_record(&cq->wq);
+
+	/* ensure cq space is freed before enabling more cqes */
+	wmb();
+
+	sq->cc = sqcc;
+	return (i == MLX5E_TX_CQ_POLL_BUDGET);
+}
+
 int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
@@ -119,6 +181,9 @@
 	work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget);
 	busy |= work_done == budget;
 
+	if (c->xdp)
+		busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq);
+
 	mlx5e_poll_ico_cq(&c->icosq.cq);
 
 	busy |= mlx5e_post_rx_wqes(&c->rq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 0e30602..aaca090 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -86,23 +86,12 @@
 
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
-	struct mlx5_destroy_eq_mbox_in in;
-	struct mlx5_destroy_eq_mbox_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_eq_in)]   = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ);
-	in.eqn = eqn;
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (!err)
-		goto ex;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-ex:
-	return err;
+	MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
+	MLX5_SET(destroy_eq_in, in, eq_number, eqn);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
@@ -351,11 +340,13 @@
 int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 		       int nent, u64 mask, const char *name, struct mlx5_uar *uar)
 {
+	u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_create_eq_mbox_in *in;
-	struct mlx5_create_eq_mbox_out out;
-	int err;
+	__be64 *pas;
+	void *eqc;
 	int inlen;
+	u32 *in;
+	int err;
 
 	eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
 	eq->cons_index = 0;
@@ -365,35 +356,36 @@
 
 	init_eq_buf(eq);
 
-	inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages;
+	inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
+		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
+
 	in = mlx5_vzalloc(inlen);
 	if (!in) {
 		err = -ENOMEM;
 		goto err_buf;
 	}
-	memset(&out, 0, sizeof(out));
 
-	mlx5_fill_page_array(&eq->buf, in->pas);
+	pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
+	mlx5_fill_page_array(&eq->buf, pas);
 
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ);
-	in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index);
-	in->ctx.intr = vecidx;
-	in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
-	in->events_mask = cpu_to_be64(mask);
+	MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
+	MLX5_SET64(create_eq_in, in, event_bitmask, mask);
 
-	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
+	MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
+	MLX5_SET(eqc, eqc, uar_page, uar->index);
+	MLX5_SET(eqc, eqc, intr, vecidx);
+	MLX5_SET(eqc, eqc, log_page_size,
+		 eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (err)
 		goto err_in;
 
-	if (out.hdr.status) {
-		err = mlx5_cmd_status_to_err(&out.hdr);
-		goto err_in;
-	}
-
 	snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
 		 name, pci_name(dev->pdev));
 
-	eq->eqn = out.eq_number;
+	eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
 	eq->irqn = priv->msix_arr[vecidx].vector;
 	eq->dev = dev;
 	eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET;
@@ -547,22 +539,12 @@
 }
 
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-		       struct mlx5_query_eq_mbox_out *out, int outlen)
+		       u32 *out, int outlen)
 {
-	struct mlx5_query_eq_mbox_in in;
-	int err;
+	u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(out, 0, outlen);
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ);
-	in.eqn = eq->eqn;
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
-	if (err)
-		return err;
-
-	if (out->hdr.status)
-		err = mlx5_cmd_status_to_err(&out->hdr);
-
-	return err;
+	MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+	MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_eq_query);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index f6d6677..abbf2c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -81,19 +81,12 @@
 			    MC_ADDR_CHANGE | \
 			    PROMISC_CHANGE)
 
-int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
-void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
-
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
 					u32 events_mask)
 {
-	int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)];
-	int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+	int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]   = {0};
+	int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
 	void *nic_vport_ctx;
-	int err;
-
-	memset(out, 0, sizeof(out));
-	memset(in, 0, sizeof(in));
 
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
@@ -116,113 +109,44 @@
 		MLX5_SET(nic_vport_context, nic_vport_ctx,
 			 event_on_promisc_change, 1);
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (err)
-		goto ex;
-	err = mlx5_cmd_status_to_err_v2(out);
-	if (err)
-		goto ex;
-	return 0;
-ex:
-	return err;
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 /* E-Switch vport context HW commands */
-static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
-				       u32 *out, int outlen)
-{
-	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)];
-
-	memset(in, 0, sizeof(in));
-
-	MLX5_SET(query_nic_vport_context_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
-
-	MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
-
-	return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
-}
-
-static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
-				 u16 *vlan, u8 *qos)
-{
-	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)];
-	int err;
-	bool cvlan_strip;
-	bool cvlan_insert;
-
-	memset(out, 0, sizeof(out));
-
-	*vlan = 0;
-	*qos = 0;
-
-	if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
-	    !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
-		return -ENOTSUPP;
-
-	err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
-	if (err)
-		goto out;
-
-	cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
-			       esw_vport_context.vport_cvlan_strip);
-
-	cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
-				esw_vport_context.vport_cvlan_insert);
-
-	if (cvlan_strip || cvlan_insert) {
-		*vlan = MLX5_GET(query_esw_vport_context_out, out,
-				 esw_vport_context.cvlan_id);
-		*qos = MLX5_GET(query_esw_vport_context_out, out,
-				esw_vport_context.cvlan_pcp);
-	}
-
-	esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
-		  vport, *vlan, *qos);
-out:
-	return err;
-}
-
 static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
 					void *in, int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)];
-
-	memset(out, 0, sizeof(out));
-
-	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
-	if (vport)
-		MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+	u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
 
 	MLX5_SET(modify_esw_vport_context_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
-
-	return mlx5_cmd_exec_check_status(dev, in, inlen,
-					  out, sizeof(out));
+	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
+	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
 static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
-				  u16 vlan, u8 qos, bool set)
+				  u16 vlan, u8 qos, u8 set_flags)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
 
 	if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
 	    !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
 		return -ENOTSUPP;
 
-	esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
-		  vport, vlan, qos, set);
+	esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n",
+		  vport, vlan, qos, set_flags);
 
-	if (set) {
+	if (set_flags & SET_VLAN_STRIP)
 		MLX5_SET(modify_esw_vport_context_in, in,
 			 esw_vport_context.vport_cvlan_strip, 1);
+
+	if (set_flags & SET_VLAN_INSERT) {
 		/* insert only if no vlan in packet */
 		MLX5_SET(modify_esw_vport_context_in, in,
 			 esw_vport_context.vport_cvlan_insert, 1);
+
 		MLX5_SET(modify_esw_vport_context_in, in,
 			 esw_vport_context.cvlan_pcp, qos);
 		MLX5_SET(modify_esw_vport_context_in, in,
@@ -241,13 +165,10 @@
 static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
 				  u8 *mac, u8 vlan_valid, u16 vlan)
 {
-	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)];
-	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)];
+	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
 	u8 *in_mac_addr;
 
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(set_l2_table_entry_in, in, opcode,
 		 MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
 	MLX5_SET(set_l2_table_entry_in, in, table_index, index);
@@ -257,23 +178,18 @@
 	in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
 	ether_addr_copy(&in_mac_addr[2], mac);
 
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-					  out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
 {
-	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)];
-	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
 
 	MLX5_SET(delete_l2_table_entry_in, in, opcode,
 		 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
 	MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-					  out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
@@ -340,7 +256,7 @@
 
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec) {
-		pr_warn("FDB: Failed to alloc match parameters\n");
+		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
 		return NULL;
 	}
 	dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@ -374,8 +290,8 @@
 				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				   0, &dest);
 	if (IS_ERR(flow_rule)) {
-		pr_warn(
-			"FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+		esw_warn(esw->dev,
+			 "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
 			 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
 		flow_rule = NULL;
 	}
@@ -955,7 +871,7 @@
 	esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
 		  vport_num, promisc_all, promisc_mc);
 
-	if (!vport->trusted || !vport->enabled) {
+	if (!vport->info.trusted || !vport->enabled) {
 		promisc_uc = 0;
 		promisc_mc = 0;
 		promisc_all = 0;
@@ -1291,30 +1207,20 @@
 				    struct mlx5_vport *vport)
 {
 	struct mlx5_flow_spec *spec;
-	u8 smac[ETH_ALEN];
 	int err = 0;
 	u8 *smac_v;
 
-	if (vport->spoofchk) {
-		err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac);
-		if (err) {
-			esw_warn(esw->dev,
-				 "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n",
-				 vport->vport, err);
-			return err;
-		}
+	if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
+		mlx5_core_warn(esw->dev,
+			       "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
+			       vport->vport);
+		return -EPERM;
 
-		if (!is_valid_ether_addr(smac)) {
-			mlx5_core_warn(esw->dev,
-				       "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
-				       vport->vport);
-			return -EPERM;
-		}
 	}
 
 	esw_vport_cleanup_ingress_rules(esw, vport);
 
-	if (!vport->vlan && !vport->qos && !vport->spoofchk) {
+	if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
 		esw_vport_disable_ingress_acl(esw, vport);
 		return 0;
 	}
@@ -1323,7 +1229,7 @@
 
 	esw_debug(esw->dev,
 		  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
-		  vport->vport, vport->vlan, vport->qos);
+		  vport->vport, vport->info.vlan, vport->info.qos);
 
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec) {
@@ -1333,16 +1239,16 @@
 		goto out;
 	}
 
-	if (vport->vlan || vport->qos)
+	if (vport->info.vlan || vport->info.qos)
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
 
-	if (vport->spoofchk) {
+	if (vport->info.spoofchk) {
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
 		smac_v = MLX5_ADDR_OF(fte_match_param,
 				      spec->match_value,
 				      outer_headers.smac_47_16);
-		ether_addr_copy(smac_v, smac);
+		ether_addr_copy(smac_v, vport->info.mac);
 	}
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
@@ -1352,8 +1258,9 @@
 				   0, NULL);
 	if (IS_ERR(vport->ingress.allow_rule)) {
 		err = PTR_ERR(vport->ingress.allow_rule);
-		pr_warn("vport[%d] configure ingress allow rule, err(%d)\n",
-			vport->vport, err);
+		esw_warn(esw->dev,
+			 "vport[%d] configure ingress allow rule, err(%d)\n",
+			 vport->vport, err);
 		vport->ingress.allow_rule = NULL;
 		goto out;
 	}
@@ -1365,8 +1272,9 @@
 				   0, NULL);
 	if (IS_ERR(vport->ingress.drop_rule)) {
 		err = PTR_ERR(vport->ingress.drop_rule);
-		pr_warn("vport[%d] configure ingress drop rule, err(%d)\n",
-			vport->vport, err);
+		esw_warn(esw->dev,
+			 "vport[%d] configure ingress drop rule, err(%d)\n",
+			 vport->vport, err);
 		vport->ingress.drop_rule = NULL;
 		goto out;
 	}
@@ -1386,7 +1294,7 @@
 
 	esw_vport_cleanup_egress_rules(esw, vport);
 
-	if (!vport->vlan && !vport->qos) {
+	if (!vport->info.vlan && !vport->info.qos) {
 		esw_vport_disable_egress_acl(esw, vport);
 		return 0;
 	}
@@ -1395,7 +1303,7 @@
 
 	esw_debug(esw->dev,
 		  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
-		  vport->vport, vport->vlan, vport->qos);
+		  vport->vport, vport->info.vlan, vport->info.qos);
 
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec) {
@@ -1409,7 +1317,7 @@
 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
 	MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag);
 	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
-	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 	vport->egress.allowed_vlan =
@@ -1418,8 +1326,9 @@
 				   0, NULL);
 	if (IS_ERR(vport->egress.allowed_vlan)) {
 		err = PTR_ERR(vport->egress.allowed_vlan);
-		pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
-			vport->vport, err);
+		esw_warn(esw->dev,
+			 "vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
+			 vport->vport, err);
 		vport->egress.allowed_vlan = NULL;
 		goto out;
 	}
@@ -1432,8 +1341,9 @@
 				   0, NULL);
 	if (IS_ERR(vport->egress.drop_rule)) {
 		err = PTR_ERR(vport->egress.drop_rule);
-		pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n",
-			vport->vport, err);
+		esw_warn(esw->dev,
+			 "vport[%d] configure egress drop rule failed, err(%d)\n",
+			 vport->vport, err);
 		vport->egress.drop_rule = NULL;
 	}
 out:
@@ -1441,6 +1351,41 @@
 	return err;
 }
 
+static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
+{
+	((u8 *)node_guid)[7] = mac[0];
+	((u8 *)node_guid)[6] = mac[1];
+	((u8 *)node_guid)[5] = mac[2];
+	((u8 *)node_guid)[4] = 0xff;
+	((u8 *)node_guid)[3] = 0xfe;
+	((u8 *)node_guid)[2] = mac[3];
+	((u8 *)node_guid)[1] = mac[4];
+	((u8 *)node_guid)[0] = mac[5];
+}
+
+static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
+				 struct mlx5_vport *vport)
+{
+	int vport_num = vport->vport;
+
+	if (!vport_num)
+		return;
+
+	mlx5_modify_vport_admin_state(esw->dev,
+				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+				      vport_num,
+				      vport->info.link_state);
+	mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
+	mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+	modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
+			       (vport->info.vlan || vport->info.qos));
+
+	/* Only legacy mode needs ACLs */
+	if (esw->mode == SRIOV_LEGACY) {
+		esw_vport_ingress_config(esw, vport);
+		esw_vport_egress_config(esw, vport);
+	}
+}
 static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 			     int enable_events)
 {
@@ -1451,22 +1396,17 @@
 
 	esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
 
-	if (vport_num) { /* Only VFs need ACLs for VST and spoofchk filtering */
-		esw_vport_ingress_config(esw, vport);
-		esw_vport_egress_config(esw, vport);
-	}
-
-	mlx5_modify_vport_admin_state(esw->dev,
-				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-				      vport_num,
-				      MLX5_ESW_VPORT_ADMIN_STATE_AUTO);
+	/* Restore old vport configuration */
+	esw_apply_vport_conf(esw, vport);
 
 	/* Sync with current vport context */
 	vport->enabled_events = enable_events;
 	vport->enabled = true;
 
 	/* only PF is trusted by default */
-	vport->trusted = (vport_num) ? false : true;
+	if (!vport_num)
+		vport->info.trusted = true;
+
 	esw_vport_change_handle_locked(vport);
 
 	esw->enabled_vports++;
@@ -1486,11 +1426,6 @@
 	vport->enabled = false;
 
 	synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC));
-
-	mlx5_modify_vport_admin_state(esw->dev,
-				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-				      vport_num,
-				      MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
 	/* Wait for current already scheduled events to complete */
 	flush_workqueue(esw->work_queue);
 	/* Disable events from this vport */
@@ -1502,7 +1437,12 @@
 	 */
 	esw_vport_change_handle_locked(vport);
 	vport->enabled_events = 0;
-	if (vport_num) {
+
+	if (vport_num && esw->mode == SRIOV_LEGACY) {
+		mlx5_modify_vport_admin_state(esw->dev,
+					      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+					      vport_num,
+					      MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
 		esw_vport_disable_egress_acl(esw, vport);
 		esw_vport_disable_ingress_acl(esw, vport);
 	}
@@ -1553,6 +1493,7 @@
 
 abort:
 	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+	esw->mode = SRIOV_NONE;
 	return err;
 }
 
@@ -1588,6 +1529,25 @@
 	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
 }
 
+void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
+{
+	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return;
+
+	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+	/* VF Vports will be enabled when SRIOV is enabled */
+}
+
+void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
+{
+	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return;
+
+	esw_disable_vport(esw, 0);
+}
+
 int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 {
 	int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
@@ -1655,6 +1615,7 @@
 		struct mlx5_vport *vport = &esw->vports[vport_num];
 
 		vport->vport = vport_num;
+		vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
 		vport->dev = dev;
 		INIT_WORK(&vport->vport_change_handler,
 			  esw_vport_change_handler);
@@ -1665,8 +1626,6 @@
 	esw->mode = SRIOV_NONE;
 
 	dev->priv.eswitch = esw;
-	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
-	/* VF Vports will be enabled when SRIOV is enabled */
 	return 0;
 abort:
 	if (esw->work_queue)
@@ -1685,7 +1644,6 @@
 		return;
 
 	esw_info(esw->dev, "cleanup\n");
-	esw_disable_vport(esw, 0);
 
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
@@ -1718,18 +1676,6 @@
 	(esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
 #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
 
-static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
-{
-	((u8 *)node_guid)[7] = mac[0];
-	((u8 *)node_guid)[6] = mac[1];
-	((u8 *)node_guid)[5] = mac[2];
-	((u8 *)node_guid)[4] = 0xff;
-	((u8 *)node_guid)[3] = 0xfe;
-	((u8 *)node_guid)[2] = mac[3];
-	((u8 *)node_guid)[1] = mac[4];
-	((u8 *)node_guid)[0] = mac[5];
-}
-
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 			       int vport, u8 mac[ETH_ALEN])
 {
@@ -1742,13 +1688,15 @@
 	if (!LEGAL_VPORT(esw, vport))
 		return -EINVAL;
 
+	mutex_lock(&esw->state_lock);
 	evport = &esw->vports[vport];
 
-	if (evport->spoofchk && !is_valid_ether_addr(mac)) {
+	if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
 		mlx5_core_warn(esw->dev,
 			       "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
 			       vport);
-		return -EPERM;
+		err = -EPERM;
+		goto unlock;
 	}
 
 	err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
@@ -1756,7 +1704,7 @@
 		mlx5_core_warn(esw->dev,
 			       "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
 			       vport, err);
-		return err;
+		goto unlock;
 	}
 
 	node_guid_gen_from_mac(&node_guid, mac);
@@ -1766,9 +1714,12 @@
 			       "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
 			       vport, err);
 
-	mutex_lock(&esw->state_lock);
-	if (evport->enabled)
+	ether_addr_copy(evport->info.mac, mac);
+	evport->info.node_guid = node_guid;
+	if (evport->enabled && esw->mode == SRIOV_LEGACY)
 		err = esw_vport_ingress_config(esw, evport);
+
+unlock:
 	mutex_unlock(&esw->state_lock);
 	return err;
 }
@@ -1776,22 +1727,38 @@
 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
 				 int vport, int link_state)
 {
+	struct mlx5_vport *evport;
+	int err = 0;
+
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
 	if (!LEGAL_VPORT(esw, vport))
 		return -EINVAL;
 
-	return mlx5_modify_vport_admin_state(esw->dev,
-					     MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-					     vport, link_state);
+	mutex_lock(&esw->state_lock);
+	evport = &esw->vports[vport];
+
+	err = mlx5_modify_vport_admin_state(esw->dev,
+					    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+					    vport, link_state);
+	if (err) {
+		mlx5_core_warn(esw->dev,
+			       "Failed to set vport %d link state, err = %d",
+			       vport, err);
+		goto unlock;
+	}
+
+	evport->info.link_state = link_state;
+
+unlock:
+	mutex_unlock(&esw->state_lock);
+	return 0;
 }
 
 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 				  int vport, struct ifla_vf_info *ivi)
 {
 	struct mlx5_vport *evport;
-	u16 vlan;
-	u8 qos;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
@@ -1803,54 +1770,61 @@
 	memset(ivi, 0, sizeof(*ivi));
 	ivi->vf = vport - 1;
 
-	mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac);
-	ivi->linkstate = mlx5_query_vport_admin_state(esw->dev,
-						      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
-						      vport);
-	query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos);
-	ivi->vlan = vlan;
-	ivi->qos = qos;
-	ivi->spoofchk = evport->spoofchk;
+	mutex_lock(&esw->state_lock);
+	ether_addr_copy(ivi->mac, evport->info.mac);
+	ivi->linkstate = evport->info.link_state;
+	ivi->vlan = evport->info.vlan;
+	ivi->qos = evport->info.qos;
+	ivi->spoofchk = evport->info.spoofchk;
+	ivi->trusted = evport->info.trusted;
+	mutex_unlock(&esw->state_lock);
 
 	return 0;
 }
 
-int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
-				int vport, u16 vlan, u8 qos)
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+				  int vport, u16 vlan, u8 qos, u8 set_flags)
 {
 	struct mlx5_vport *evport;
 	int err = 0;
-	int set = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
 	if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
 		return -EINVAL;
 
-	if (vlan || qos)
-		set = 1;
-
+	mutex_lock(&esw->state_lock);
 	evport = &esw->vports[vport];
 
-	err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
+	err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
 	if (err)
-		return err;
+		goto unlock;
 
-	mutex_lock(&esw->state_lock);
-	evport->vlan = vlan;
-	evport->qos = qos;
-	if (evport->enabled) {
+	evport->info.vlan = vlan;
+	evport->info.qos = qos;
+	if (evport->enabled && esw->mode == SRIOV_LEGACY) {
 		err = esw_vport_ingress_config(esw, evport);
 		if (err)
-			goto out;
+			goto unlock;
 		err = esw_vport_egress_config(esw, evport);
 	}
 
-out:
+unlock:
 	mutex_unlock(&esw->state_lock);
 	return err;
 }
 
+int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+				int vport, u16 vlan, u8 qos)
+{
+	u8 set_flags = 0;
+
+	if (vlan || qos)
+		set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
+
+	return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags);
+}
+
 int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 				    int vport, bool spoofchk)
 {
@@ -1863,15 +1837,14 @@
 	if (!LEGAL_VPORT(esw, vport))
 		return -EINVAL;
 
-	evport = &esw->vports[vport];
-
 	mutex_lock(&esw->state_lock);
-	pschk = evport->spoofchk;
-	evport->spoofchk = spoofchk;
-	if (evport->enabled)
+	evport = &esw->vports[vport];
+	pschk = evport->info.spoofchk;
+	evport->info.spoofchk = spoofchk;
+	if (evport->enabled && esw->mode == SRIOV_LEGACY)
 		err = esw_vport_ingress_config(esw, evport);
 	if (err)
-		evport->spoofchk = pschk;
+		evport->info.spoofchk = pschk;
 	mutex_unlock(&esw->state_lock);
 
 	return err;
@@ -1887,10 +1860,9 @@
 	if (!LEGAL_VPORT(esw, vport))
 		return -EINVAL;
 
-	evport = &esw->vports[vport];
-
 	mutex_lock(&esw->state_lock);
-	evport->trusted = setting;
+	evport = &esw->vports[vport];
+	evport->info.trusted = setting;
 	if (evport->enabled)
 		esw_vport_change_handle_locked(evport);
 	mutex_unlock(&esw->state_lock);
@@ -1903,7 +1875,7 @@
 				 struct ifla_vf_stats *vf_stats)
 {
 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
-	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
 	int err = 0;
 	u32 *out;
 
@@ -1916,8 +1888,6 @@
 	if (!out)
 		return -ENOMEM;
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(query_vport_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index c0b0560..2e2938e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -109,6 +109,16 @@
 	struct mlx5_flow_rule  *drop_rule;
 };
 
+struct mlx5_vport_info {
+	u8                      mac[ETH_ALEN];
+	u16                     vlan;
+	u8                      qos;
+	u64                     node_guid;
+	int                     link_state;
+	bool                    spoofchk;
+	bool                    trusted;
+};
+
 struct mlx5_vport {
 	struct mlx5_core_dev    *dev;
 	int                     vport;
@@ -121,10 +131,8 @@
 	struct vport_ingress    ingress;
 	struct vport_egress     egress;
 
-	u16                     vlan;
-	u8                      qos;
-	bool                    spoofchk;
-	bool                    trusted;
+	struct mlx5_vport_info  info;
+
 	bool                    enabled;
 	u16                     enabled_events;
 };
@@ -149,6 +157,7 @@
 			struct mlx5_flow_group *send_to_vport_grp;
 			struct mlx5_flow_group *miss_grp;
 			struct mlx5_flow_rule  *miss_rule;
+			int vlan_push_pop_refcount;
 		} offloads;
 	};
 };
@@ -170,9 +179,13 @@
 	void		       (*unload)(struct mlx5_eswitch *esw,
 					 struct mlx5_eswitch_rep *rep);
 	u16		       vport;
-	struct mlx5_flow_rule *vport_rx_rule;
+	u8		       hw_id[ETH_ALEN];
 	void		      *priv_data;
+
+	struct mlx5_flow_rule *vport_rx_rule;
 	struct list_head       vport_sqs_list;
+	u16		       vlan;
+	u32		       vlan_refcount;
 	bool		       valid;
 };
 
@@ -200,9 +213,14 @@
 	int                     mode;
 };
 
+void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
+int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
+
 /* E-Switch API */
 int mlx5_eswitch_init(struct mlx5_core_dev *dev);
 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw);
+void mlx5_eswitch_attach(struct mlx5_eswitch *esw);
+void mlx5_eswitch_detach(struct mlx5_eswitch *esw);
 void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe);
 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode);
 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw);
@@ -223,14 +241,32 @@
 				 struct ifla_vf_stats *vf_stats);
 
 struct mlx5_flow_spec;
+struct mlx5_esw_flow_attr;
 
 struct mlx5_flow_rule *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
-				u32 action, u32 src_vport, u32 dst_vport);
+				struct mlx5_esw_flow_attr *attr);
 struct mlx5_flow_rule *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
 
+enum {
+	SET_VLAN_STRIP	= BIT(0),
+	SET_VLAN_INSERT	= BIT(1)
+};
+
+#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  0x40
+#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80
+
+struct mlx5_esw_flow_attr {
+	struct mlx5_eswitch_rep *in_rep;
+	struct mlx5_eswitch_rep *out_rep;
+
+	int	action;
+	u16	vlan;
+	bool	vlan_handled;
+};
+
 int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep,
 				 u16 *sqns_array, int sqns_num);
@@ -240,9 +276,17 @@
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
+				     int vport_index,
 				     struct mlx5_eswitch_rep *rep);
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport);
+				       int vport_index);
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+				 struct mlx5_esw_flow_attr *attr);
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+				 struct mlx5_esw_flow_attr *attr);
+int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
+				  int vport, u16 vlan, u8 qos, u8 set_flags);
 
 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a357e8e..c55ad8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -46,19 +46,22 @@
 struct mlx5_flow_rule *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
-				u32 action, u32 src_vport, u32 dst_vport)
+				struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_flow_destination dest = { 0 };
 	struct mlx5_fc *counter = NULL;
 	struct mlx5_flow_rule *rule;
 	void *misc;
+	int action;
 
 	if (esw->mode != SRIOV_OFFLOADS)
 		return ERR_PTR(-EOPNOTSUPP);
 
+	action = attr->action;
+
 	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 		dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-		dest.vport_num = dst_vport;
+		dest.vport_num = attr->out_rep->vport;
 		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		counter = mlx5_fc_create(esw->dev, true);
@@ -69,7 +72,7 @@
 	}
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
-	MLX5_SET(fte_match_set_misc, misc, source_port, src_vport);
+	MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport);
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
 	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
@@ -86,6 +89,186 @@
 	return rule;
 }
 
+static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
+{
+	struct mlx5_eswitch_rep *rep;
+	int vf_vport, err = 0;
+
+	esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none");
+	for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) {
+		rep = &esw->offloads.vport_reps[vf_vport];
+		if (!rep->valid)
+			continue;
+
+		err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val);
+		if (err)
+			goto out;
+	}
+
+out:
+	return err;
+}
+
+static struct mlx5_eswitch_rep *
+esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop)
+{
+	struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL;
+
+	in_rep  = attr->in_rep;
+	out_rep = attr->out_rep;
+
+	if (push)
+		vport = in_rep;
+	else if (pop)
+		vport = out_rep;
+	else
+		vport = in_rep;
+
+	return vport;
+}
+
+static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr,
+				     bool push, bool pop, bool fwd)
+{
+	struct mlx5_eswitch_rep *in_rep, *out_rep;
+
+	if ((push || pop) && !fwd)
+		goto out_notsupp;
+
+	in_rep  = attr->in_rep;
+	out_rep = attr->out_rep;
+
+	if (push && in_rep->vport == FDB_UPLINK_VPORT)
+		goto out_notsupp;
+
+	if (pop && out_rep->vport == FDB_UPLINK_VPORT)
+		goto out_notsupp;
+
+	/* vport has vlan push configured, can't offload VF --> wire rules w.o it */
+	if (!push && !pop && fwd)
+		if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT)
+			goto out_notsupp;
+
+	/* protects against (1) setting rules with different vlans to push and
+	 * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0)
+	 */
+	if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan))
+		goto out_notsupp;
+
+	return 0;
+
+out_notsupp:
+	return -ENOTSUPP;
+}
+
+int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
+				 struct mlx5_esw_flow_attr *attr)
+{
+	struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+	struct mlx5_eswitch_rep *vport = NULL;
+	bool push, pop, fwd;
+	int err = 0;
+
+	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+	err = esw_add_vlan_action_check(attr, push, pop, fwd);
+	if (err)
+		return err;
+
+	attr->vlan_handled = false;
+
+	vport = esw_vlan_action_get_vport(attr, push, pop);
+
+	if (!push && !pop && fwd) {
+		/* tracks VF --> wire rules without vlan push action */
+		if (attr->out_rep->vport == FDB_UPLINK_VPORT) {
+			vport->vlan_refcount++;
+			attr->vlan_handled = true;
+		}
+
+		return 0;
+	}
+
+	if (!push && !pop)
+		return 0;
+
+	if (!(offloads->vlan_push_pop_refcount)) {
+		/* it's the 1st vlan rule, apply global vlan pop policy */
+		err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP);
+		if (err)
+			goto out;
+	}
+	offloads->vlan_push_pop_refcount++;
+
+	if (push) {
+		if (vport->vlan_refcount)
+			goto skip_set_push;
+
+		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0,
+						    SET_VLAN_INSERT | SET_VLAN_STRIP);
+		if (err)
+			goto out;
+		vport->vlan = attr->vlan;
+skip_set_push:
+		vport->vlan_refcount++;
+	}
+out:
+	if (!err)
+		attr->vlan_handled = true;
+	return err;
+}
+
+int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
+				 struct mlx5_esw_flow_attr *attr)
+{
+	struct offloads_fdb *offloads = &esw->fdb_table.offloads;
+	struct mlx5_eswitch_rep *vport = NULL;
+	bool push, pop, fwd;
+	int err = 0;
+
+	if (!attr->vlan_handled)
+		return 0;
+
+	push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
+	pop  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+	fwd  = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+
+	vport = esw_vlan_action_get_vport(attr, push, pop);
+
+	if (!push && !pop && fwd) {
+		/* tracks VF --> wire rules without vlan push action */
+		if (attr->out_rep->vport == FDB_UPLINK_VPORT)
+			vport->vlan_refcount--;
+
+		return 0;
+	}
+
+	if (push) {
+		vport->vlan_refcount--;
+		if (vport->vlan_refcount)
+			goto skip_unset_push;
+
+		vport->vlan = 0;
+		err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport,
+						    0, 0, SET_VLAN_STRIP);
+		if (err)
+			goto out;
+	}
+
+skip_unset_push:
+	offloads->vlan_push_pop_refcount--;
+	if (offloads->vlan_push_pop_refcount)
+		return 0;
+
+	/* no more vlan rules, stop global vlan pop policy */
+	err = esw_set_global_vlan_pop(esw, 0);
+
+out:
+	return err;
+}
+
 static struct mlx5_flow_rule *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
 {
@@ -113,7 +296,7 @@
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = vport;
 
-	flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
+	flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
 				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				       0, &dest);
 	if (IS_ERR(flow_rule))
@@ -144,16 +327,12 @@
 {
 	struct mlx5_flow_rule *flow_rule;
 	struct mlx5_esw_sq *esw_sq;
-	int vport;
 	int err;
 	int i;
 
 	if (esw->mode != SRIOV_OFFLOADS)
 		return 0;
 
-	vport = rep->vport == 0 ?
-		FDB_UPLINK_VPORT : rep->vport;
-
 	for (i = 0; i < sqns_num; i++) {
 		esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL);
 		if (!esw_sq) {
@@ -163,7 +342,7 @@
 
 		/* Add re-inject rule to the PF/representor sqs */
 		flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
-								vport,
+								rep->vport,
 								sqns_array[i]);
 		if (IS_ERR(flow_rule)) {
 			err = PTR_ERR(flow_rule);
@@ -446,7 +625,7 @@
 
 static int esw_offloads_start(struct mlx5_eswitch *esw)
 {
-	int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+	int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
 
 	if (esw->mode != SRIOV_LEGACY) {
 		esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
@@ -455,8 +634,12 @@
 
 	mlx5_eswitch_disable_sriov(esw);
 	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
-	if (err)
-		esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err);
+	if (err) {
+		esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err);
+		err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
+		if (err1)
+			esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err);
+	}
 	return err;
 }
 
@@ -508,12 +691,16 @@
 
 static int esw_offloads_stop(struct mlx5_eswitch *esw)
 {
-	int err, num_vfs = esw->dev->priv.sriov.num_vfs;
+	int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
 
 	mlx5_eswitch_disable_sriov(esw);
 	err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
-	if (err)
-		esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err);
+	if (err) {
+		esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err);
+		err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
+		if (err1)
+			esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err);
+	}
 
 	return err;
 }
@@ -535,7 +722,7 @@
 	esw_destroy_offloads_fdb_table(esw);
 }
 
-static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
+static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
 {
 	switch (mode) {
 	case DEVLINK_ESWITCH_MODE_LEGACY:
@@ -551,6 +738,22 @@
 	return 0;
 }
 
+static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
+{
+	switch (mlx5_mode) {
+	case SRIOV_LEGACY:
+		*mode = DEVLINK_ESWITCH_MODE_LEGACY;
+		break;
+	case SRIOV_OFFLOADS:
+		*mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
 	struct mlx5_core_dev *dev;
@@ -566,7 +769,7 @@
 	if (cur_mlx5_mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode))
+	if (esw_mode_from_devlink(mode, &mlx5_mode))
 		return -EINVAL;
 
 	if (cur_mlx5_mode == mlx5_mode)
@@ -592,33 +795,40 @@
 	if (dev->priv.eswitch->mode == SRIOV_NONE)
 		return -EOPNOTSUPP;
 
-	*mode = dev->priv.eswitch->mode;
-
-	return 0;
+	return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
 
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
-				     struct mlx5_eswitch_rep *rep)
-{
-	struct mlx5_esw_offload *offloads = &esw->offloads;
-
-	memcpy(&offloads->vport_reps[rep->vport], rep,
-	       sizeof(struct mlx5_eswitch_rep));
-
-	INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list);
-	offloads->vport_reps[rep->vport].valid = true;
-}
-
-void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
-				       int vport)
+				     int vport_index,
+				     struct mlx5_eswitch_rep *__rep)
 {
 	struct mlx5_esw_offload *offloads = &esw->offloads;
 	struct mlx5_eswitch_rep *rep;
 
-	rep = &offloads->vport_reps[vport];
+	rep = &offloads->vport_reps[vport_index];
 
-	if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled)
+	memset(rep, 0, sizeof(*rep));
+
+	rep->load   = __rep->load;
+	rep->unload = __rep->unload;
+	rep->vport  = __rep->vport;
+	rep->priv_data = __rep->priv_data;
+	ether_addr_copy(rep->hw_id, __rep->hw_id);
+
+	INIT_LIST_HEAD(&rep->vport_sqs_list);
+	rep->valid = true;
+}
+
+void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
+				       int vport_index)
+{
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+	struct mlx5_eswitch_rep *rep;
+
+	rep = &offloads->vport_reps[vport_index];
+
+	if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled)
 		rep->unload(esw, rep);
 
-	offloads->vport_reps[vport].valid = false;
+	rep->valid = false;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 9134010..113c323 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -41,10 +41,8 @@
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 			    struct mlx5_flow_table *ft)
 {
-	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)];
-	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
 
 	MLX5_SET(set_flow_table_root_in, in, opcode,
 		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
@@ -55,30 +53,23 @@
 		MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
 	}
 
-	memset(out, 0, sizeof(out));
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					  sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 			       u16 vport,
+			       enum fs_flow_table_op_mod op_mod,
 			       enum fs_flow_table_type type, unsigned int level,
 			       unsigned int log_size, struct mlx5_flow_table
 			       *next_ft, unsigned int *table_id)
 {
-	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
-	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
+	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
 	int err;
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(create_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
 
-	if (next_ft) {
-		MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
-		MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
-	}
 	MLX5_SET(create_flow_table_in, in, table_type, type);
 	MLX5_SET(create_flow_table_in, in, level, level);
 	MLX5_SET(create_flow_table_in, in, log_size, log_size);
@@ -87,10 +78,23 @@
 		MLX5_SET(create_flow_table_in, in, other_vport, 1);
 	}
 
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					 sizeof(out));
+	switch (op_mod) {
+	case FS_FT_OP_MOD_NORMAL:
+		if (next_ft) {
+			MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
+			MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
+		}
+		break;
 
+	case FS_FT_OP_MOD_LAG_DEMUX:
+		MLX5_SET(create_flow_table_in, in, op_mod, 0x1);
+		if (next_ft)
+			MLX5_SET(create_flow_table_in, in, lag_master_next_table_id,
+				 next_ft->id);
+		break;
+	}
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (!err)
 		*table_id = MLX5_GET(create_flow_table_out, out,
 				     table_id);
@@ -100,11 +104,8 @@
 int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
 				struct mlx5_flow_table *ft)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
 
 	MLX5_SET(destroy_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
@@ -115,39 +116,49 @@
 		MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
 	}
 
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					  sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
 			       struct mlx5_flow_table *ft,
 			       struct mlx5_flow_table *next_ft)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)];
-	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
 
 	MLX5_SET(modify_flow_table_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_FLOW_TABLE);
 	MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
 	MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
-	if (ft->vport) {
-		MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport);
-		MLX5_SET(modify_flow_table_in, in, other_vport, 1);
-	}
-	MLX5_SET(modify_flow_table_in, in, modify_field_select,
-		 MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
-	if (next_ft) {
-		MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
-		MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id);
+
+	if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) {
+		MLX5_SET(modify_flow_table_in, in, modify_field_select,
+			 MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID);
+		if (next_ft) {
+			MLX5_SET(modify_flow_table_in, in,
+				 lag_master_next_table_id, next_ft->id);
+		} else {
+			MLX5_SET(modify_flow_table_in, in,
+				 lag_master_next_table_id, 0);
+		}
 	} else {
-		MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+		if (ft->vport) {
+			MLX5_SET(modify_flow_table_in, in, vport_number,
+				 ft->vport);
+			MLX5_SET(modify_flow_table_in, in, other_vport, 1);
+		}
+		MLX5_SET(modify_flow_table_in, in, modify_field_select,
+			 MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+		if (next_ft) {
+			MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
+			MLX5_SET(modify_flow_table_in, in, table_miss_id,
+				 next_ft->id);
+		} else {
+			MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+		}
 	}
 
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					  sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
@@ -155,12 +166,10 @@
 			       u32 *in,
 			       unsigned int *group_id)
 {
+	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)];
 	int err;
 
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(create_flow_group_in, in, opcode,
 		 MLX5_CMD_OP_CREATE_FLOW_GROUP);
 	MLX5_SET(create_flow_group_in, in, table_type, ft->type);
@@ -170,13 +179,10 @@
 		MLX5_SET(create_flow_group_in, in, other_vport, 1);
 	}
 
-	err = mlx5_cmd_exec_check_status(dev, in,
-					 inlen, out,
-					 sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
 		*group_id = MLX5_GET(create_flow_group_out, out,
 				     group_id);
-
 	return err;
 }
 
@@ -184,11 +190,8 @@
 				struct mlx5_flow_table *ft,
 				unsigned int group_id)
 {
-	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)];
-	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
 
 	MLX5_SET(destroy_flow_group_in, in, opcode,
 		 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
@@ -200,8 +203,7 @@
 		MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
 	}
 
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					  sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
@@ -212,7 +214,7 @@
 {
 	unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
 		fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
-	u32 out[MLX5_ST_SZ_DW(set_fte_out)];
+	u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
 	struct mlx5_flow_rule *dst;
 	void *in_flow_context;
 	void *in_match_value;
@@ -290,11 +292,8 @@
 			 list_size);
 	}
 
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
-					 sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	kvfree(in);
-
 	return err;
 }
 
@@ -303,7 +302,7 @@
 			unsigned group_id,
 			struct fs_fte *fte)
 {
-	return	mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
+	return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
 }
 
 int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
@@ -327,12 +326,8 @@
 			struct mlx5_flow_table *ft,
 			unsigned int index)
 {
-	u32 out[MLX5_ST_SZ_DW(delete_fte_out)];
-	u32 in[MLX5_ST_SZ_DW(delete_fte_in)];
-	int err;
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
 
 	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
 	MLX5_SET(delete_fte_in, in, table_type, ft->type);
@@ -343,74 +338,55 @@
 		MLX5_SET(delete_fte_in, in, other_vport, 1);
 	}
 
-	err =  mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
-
-	return err;
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id)
 {
-	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)];
-	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)];
+	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
 	int err;
 
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(alloc_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
 
-	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					 sizeof(out));
-	if (err)
-		return err;
-
-	*id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
-
-	return 0;
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+	return err;
 }
 
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)];
-	u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
 
 	MLX5_SET(dealloc_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
 	MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
-
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					  sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
 		      u64 *packets, u64 *bytes)
 {
 	u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
-		MLX5_ST_SZ_BYTES(traffic_counter)];
-	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+		MLX5_ST_SZ_BYTES(traffic_counter)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
 	void *stats;
 	int err = 0;
 
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(query_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
 	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
 	MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
-
-	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
 
 	stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
 	*packets = MLX5_GET64(traffic_counter, stats, packets);
 	*bytes = MLX5_GET64(traffic_counter, stats, octets);
-
 	return 0;
 }
 
@@ -425,11 +401,11 @@
 mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
 {
 	struct mlx5_cmd_fc_bulk *b;
-	int outlen = sizeof(*b) +
+	int outlen =
 		MLX5_ST_SZ_BYTES(query_flow_counter_out) +
 		MLX5_ST_SZ_BYTES(traffic_counter) * num;
 
-	b = kzalloc(outlen, GFP_KERNEL);
+	b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
 	if (!b)
 		return NULL;
 
@@ -448,18 +424,14 @@
 int
 mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
 {
-	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
 
 	MLX5_SET(query_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
 	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
 	MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
 	MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
-
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-					  b->out, b->outlen);
+	return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
 }
 
 void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
@@ -480,3 +452,51 @@
 	*packets = MLX5_GET64(traffic_counter, stats, packets);
 	*bytes = MLX5_GET64(traffic_counter, stats, octets);
 }
+
+#define MAX_ENCAP_SIZE (128)
+
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+			 int header_type,
+			 size_t size,
+			 void *encap_header,
+			 u32 *encap_id)
+{
+	u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
+	u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) +
+	      (MAX_ENCAP_SIZE / sizeof(u32))];
+	void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in,
+					     encap_header);
+	void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in,
+				    encap_header);
+	int inlen = header - (void *)in  + size;
+	int err;
+
+	if (size > MAX_ENCAP_SIZE)
+		return -EINVAL;
+
+	memset(in, 0, inlen);
+	MLX5_SET(alloc_encap_header_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
+	MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
+	MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
+	memcpy(header, encap_header, size);
+
+	memset(out, 0, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+
+	*encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+	return err;
+}
+
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id)
+{
+	u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
+	u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+
+	memset(in, 0, sizeof(in));
+	MLX5_SET(dealloc_encap_header_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+	MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 158844c..c5bc468 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -35,6 +35,7 @@
 
 int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 			       u16 vport,
+			       enum fs_flow_table_op_mod op_mod,
 			       enum fs_flow_table_type type, unsigned int level,
 			       unsigned int log_size, struct mlx5_flow_table
 			       *next_ft, unsigned int *table_id);
@@ -88,4 +89,11 @@
 			  struct mlx5_cmd_fc_bulk *b, u16 id,
 			  u64 *packets, u64 *bytes);
 
+int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+			 int header_type,
+			 size_t size,
+			 void *encap_header,
+			 u32 *encap_id);
+void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id);
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 75bb8c8..5da2cc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -80,7 +80,7 @@
 			   LEFTOVERS_NUM_PRIOS)
 
 #define ETHTOOL_PRIO_NUM_LEVELS 1
-#define ETHTOOL_NUM_PRIOS 10
+#define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
 /* Vlan, mac, ttc, aRFS */
 #define KERNEL_NIC_PRIO_NUM_LEVELS 4
@@ -96,6 +96,10 @@
 #define OFFLOADS_NUM_PRIOS 1
 #define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1)
 
+#define LAG_PRIO_NUM_LEVELS 1
+#define LAG_NUM_PRIOS 1
+#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1)
+
 struct node_caps {
 	size_t	arr_sz;
 	long	*caps;
@@ -111,12 +115,16 @@
 	int num_levels;
 } root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 6,
+	.ar_size = 7,
 	.children = (struct init_tree_node[]) {
 		ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0,
 			 FS_CHAINING_CAPS,
 			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
 						  BY_PASS_PRIO_NUM_LEVELS))),
+		ADD_PRIO(0, LAG_MIN_LEVEL, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS,
+						  LAG_PRIO_NUM_LEVELS))),
 		ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {},
 			 ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))),
 		ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0,
@@ -345,7 +353,7 @@
 
 	err = mlx5_cmd_destroy_flow_table(dev, ft);
 	if (err)
-		pr_warn("flow steering can't destroy ft\n");
+		mlx5_core_warn(dev, "flow steering can't destroy ft\n");
 	fs_get_obj(prio, ft->node.parent);
 	prio->num_ft--;
 }
@@ -364,7 +372,7 @@
 
 	match_value = mlx5_vzalloc(match_len);
 	if (!match_value) {
-		pr_warn("failed to allocate inbox\n");
+		mlx5_core_warn(dev, "failed to allocate inbox\n");
 		return;
 	}
 
@@ -387,8 +395,9 @@
 					  modify_mask,
 					  fte);
 		if (err)
-			pr_warn("%s can't del rule fg id=%d fte_index=%d\n",
-				__func__, fg->id, fte->index);
+			mlx5_core_warn(dev,
+				       "%s can't del rule fg id=%d fte_index=%d\n",
+				       __func__, fg->id, fte->index);
 	}
 	kvfree(match_value);
 }
@@ -409,8 +418,9 @@
 	err = mlx5_cmd_delete_fte(dev, ft,
 				  fte->index);
 	if (err)
-		pr_warn("flow steering can't delete fte in index %d of flow group id %d\n",
-			fte->index, fg->id);
+		mlx5_core_warn(dev,
+			       "flow steering can't delete fte in index %d of flow group id %d\n",
+			       fte->index, fg->id);
 
 	fte->status = 0;
 	fg->num_ftes--;
@@ -427,8 +437,8 @@
 	dev = get_dev(&ft->node);
 
 	if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
-		pr_warn("flow steering can't destroy fg %d of ft %d\n",
-			fg->id, ft->id);
+		mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
+			       fg->id, ft->id);
 }
 
 static struct fs_fte *alloc_fte(u8 action,
@@ -475,7 +485,8 @@
 }
 
 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
-						enum fs_flow_table_type table_type)
+						enum fs_flow_table_type table_type,
+						enum fs_flow_table_op_mod op_mod)
 {
 	struct mlx5_flow_table *ft;
 
@@ -485,6 +496,7 @@
 
 	ft->level = level;
 	ft->node.type = FS_TYPE_FLOW_TABLE;
+	ft->op_mod = op_mod;
 	ft->type = table_type;
 	ft->vport = vport;
 	ft->max_fte = max_fte;
@@ -722,6 +734,7 @@
 }
 
 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
+							enum fs_flow_table_op_mod op_mod,
 							u16 vport, int prio,
 							int max_fte, u32 level)
 {
@@ -754,18 +767,19 @@
 	level += fs_prio->start_level;
 	ft = alloc_flow_table(level,
 			      vport,
-			      roundup_pow_of_two(max_fte),
-			      root->table_type);
+			      max_fte ? roundup_pow_of_two(max_fte) : 0,
+			      root->table_type,
+			      op_mod);
 	if (!ft) {
 		err = -ENOMEM;
 		goto unlock_root;
 	}
 
 	tree_init_node(&ft->node, 1, del_flow_table);
-	log_table_sz = ilog2(ft->max_fte);
+	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
-	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->type, ft->level,
-					 log_table_sz, next_ft, &ft->id);
+	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
+					 ft->level, log_table_sz, next_ft, &ft->id);
 	if (err)
 		goto free_ft;
 
@@ -792,16 +806,27 @@
 					       int prio, int max_fte,
 					       u32 level)
 {
-	return __mlx5_create_flow_table(ns, 0, prio, max_fte, level);
+	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio,
+					max_fte, level);
 }
 
 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
 						     int prio, int max_fte,
 						     u32 level, u16 vport)
 {
-	return __mlx5_create_flow_table(ns, vport, prio, max_fte, level);
+	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio,
+					max_fte, level);
 }
 
+struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
+					       struct mlx5_flow_namespace *ns,
+					       int prio, u32 level)
+{
+	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0,
+					level);
+}
+EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
+
 struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
 							    int prio,
 							    int num_flow_table_entries,
@@ -1379,6 +1404,7 @@
 
 	switch (type) {
 	case MLX5_FLOW_NAMESPACE_BYPASS:
+	case MLX5_FLOW_NAMESPACE_LAG:
 	case MLX5_FLOW_NAMESPACE_OFFLOADS:
 	case MLX5_FLOW_NAMESPACE_ETHTOOL:
 	case MLX5_FLOW_NAMESPACE_KERNEL:
@@ -1401,6 +1427,16 @@
 			return &steering->esw_ingress_root_ns->ns;
 		else
 			return NULL;
+	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
+		if (steering->sniffer_rx_root_ns)
+			return &steering->sniffer_rx_root_ns->ns;
+		else
+			return NULL;
+	case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
+		if (steering->sniffer_tx_root_ns)
+			return &steering->sniffer_tx_root_ns->ns;
+		else
+			return NULL;
 	default:
 		return NULL;
 	}
@@ -1700,10 +1736,46 @@
 	cleanup_root_ns(steering->esw_egress_root_ns);
 	cleanup_root_ns(steering->esw_ingress_root_ns);
 	cleanup_root_ns(steering->fdb_root_ns);
+	cleanup_root_ns(steering->sniffer_rx_root_ns);
+	cleanup_root_ns(steering->sniffer_tx_root_ns);
 	mlx5_cleanup_fc_stats(dev);
 	kfree(steering);
 }
 
+static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+	struct fs_prio *prio;
+
+	steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX);
+	if (!steering->sniffer_tx_root_ns)
+		return -ENOMEM;
+
+	/* Create single prio */
+	prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1);
+	if (IS_ERR(prio)) {
+		cleanup_root_ns(steering->sniffer_tx_root_ns);
+		return PTR_ERR(prio);
+	}
+	return 0;
+}
+
+static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
+{
+	struct fs_prio *prio;
+
+	steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX);
+	if (!steering->sniffer_rx_root_ns)
+		return -ENOMEM;
+
+	/* Create single prio */
+	prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1);
+	if (IS_ERR(prio)) {
+		cleanup_root_ns(steering->sniffer_rx_root_ns);
+		return PTR_ERR(prio);
+	}
+	return 0;
+}
+
 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 {
 	struct fs_prio *prio;
@@ -1800,6 +1872,18 @@
 		}
 	}
 
+	if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) {
+		err = init_sniffer_rx_root_ns(steering);
+		if (err)
+			goto err;
+	}
+
+	if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) {
+		err = init_sniffer_tx_root_ns(steering);
+		if (err)
+			goto err;
+	}
+
 	return 0;
 err:
 	mlx5_cleanup_fs(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 9cffb6a..71ff03b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -49,6 +49,13 @@
 	FS_FT_ESW_EGRESS_ACL  = 0x2,
 	FS_FT_ESW_INGRESS_ACL = 0x3,
 	FS_FT_FDB             = 0X4,
+	FS_FT_SNIFFER_RX	= 0X5,
+	FS_FT_SNIFFER_TX	= 0X6,
+};
+
+enum fs_flow_table_op_mod {
+	FS_FT_OP_MOD_NORMAL,
+	FS_FT_OP_MOD_LAG_DEMUX,
 };
 
 enum fs_fte_status {
@@ -61,6 +68,8 @@
 	struct mlx5_flow_root_namespace *fdb_root_ns;
 	struct mlx5_flow_root_namespace *esw_egress_root_ns;
 	struct mlx5_flow_root_namespace *esw_ingress_root_ns;
+	struct mlx5_flow_root_namespace	*sniffer_tx_root_ns;
+	struct mlx5_flow_root_namespace	*sniffer_rx_root_ns;
 };
 
 struct fs_node {
@@ -93,6 +102,7 @@
 	unsigned int			max_fte;
 	unsigned int			level;
 	enum fs_flow_table_type		type;
+	enum fs_flow_table_op_mod	op_mod;
 	struct {
 		bool			active;
 		unsigned int		required_groups;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index c2877e9..3a9195b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -126,12 +126,21 @@
 	for (node = &first->node; node; node = rb_next(node)) {
 		struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
 		struct mlx5_fc_cache *c = &counter->cache;
+		u64 packets;
+		u64 bytes;
 
 		if (counter->id > last_id)
 			break;
 
 		mlx5_cmd_fc_bulk_get(dev, b,
-				     counter->id, &c->packets, &c->bytes);
+				     counter->id, &packets, &bytes);
+
+		if (c->packets == packets)
+			continue;
+
+		c->packets = packets;
+		c->bytes = bytes;
+		c->lastuse = jiffies;
 	}
 
 out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 77fc1aa..5718aad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -38,13 +38,10 @@
 static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
 				  int outlen)
 {
-	u32 in[MLX5_ST_SZ_DW(query_adapter_in)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0};
 
 	MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
-
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
 int mlx5_query_board_id(struct mlx5_core_dev *dev)
@@ -162,38 +159,18 @@
 
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd_init_hca_mbox_in in;
-	struct mlx5_cmd_init_hca_mbox_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(init_hca_in)]   = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-	return err;
+	MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd_teardown_hca_mbox_in in;
-	struct mlx5_cmd_teardown_hca_mbox_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)]   = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-	return err;
+	MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
new file mode 100644
index 0000000..5595724
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_core.h"
+
+enum {
+	MLX5_LAG_FLAG_BONDED = 1 << 0,
+};
+
+struct lag_func {
+	struct mlx5_core_dev *dev;
+	struct net_device    *netdev;
+};
+
+/* Used for collection of netdev event info. */
+struct lag_tracker {
+	enum   netdev_lag_tx_type           tx_type;
+	struct netdev_lag_lower_state_info  netdev_state[MLX5_MAX_PORTS];
+	bool is_bonded;
+};
+
+/* LAG data of a ConnectX card.
+ * It serves both its phys functions.
+ */
+struct mlx5_lag {
+	u8                        flags;
+	u8                        v2p_map[MLX5_MAX_PORTS];
+	struct lag_func           pf[MLX5_MAX_PORTS];
+	struct lag_tracker        tracker;
+	struct delayed_work       bond_work;
+	struct notifier_block     nb;
+};
+
+/* General purpose, use for short periods of time.
+ * Beware of lock dependencies (preferably, no locks should be acquired
+ * under it).
+ */
+static DEFINE_MUTEX(lag_mutex);
+
+static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
+			       u8 remap_port2)
+{
+	u32   in[MLX5_ST_SZ_DW(create_lag_in)]   = {0};
+	u32   out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
+	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
+
+	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
+
+	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
+	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
+			       u8 remap_port2)
+{
+	u32   in[MLX5_ST_SZ_DW(modify_lag_in)]   = {0};
+	u32   out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
+	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
+
+	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
+	MLX5_SET(modify_lag_in, in, field_select, 0x1);
+
+	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
+	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
+{
+	u32  in[MLX5_ST_SZ_DW(destroy_lag_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
+
+	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
+{
+	u32  in[MLX5_ST_SZ_DW(create_vport_lag_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
+
+	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
+
+int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
+{
+	u32  in[MLX5_ST_SZ_DW(destroy_vport_lag_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
+
+	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
+
+static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
+{
+	return dev->priv.lag;
+}
+
+static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
+				       struct net_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (ldev->pf[i].netdev == ndev)
+			return i;
+
+	return -1;
+}
+
+static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
+{
+	return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
+}
+
+static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
+					   u8 *port1, u8 *port2)
+{
+	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+		if (tracker->netdev_state[0].tx_enabled) {
+			*port1 = 1;
+			*port2 = 1;
+		} else {
+			*port1 = 2;
+			*port2 = 2;
+		}
+	} else {
+		*port1 = 1;
+		*port2 = 2;
+		if (!tracker->netdev_state[0].link_up)
+			*port1 = 2;
+		else if (!tracker->netdev_state[1].link_up)
+			*port2 = 1;
+	}
+}
+
+static void mlx5_activate_lag(struct mlx5_lag *ldev,
+			      struct lag_tracker *tracker)
+{
+	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	int err;
+
+	ldev->flags |= MLX5_LAG_FLAG_BONDED;
+
+	mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
+				       &ldev->v2p_map[1]);
+
+	err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
+	if (err)
+		mlx5_core_err(dev0,
+			      "Failed to create LAG (%d)\n",
+			      err);
+}
+
+static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
+{
+	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	int err;
+
+	ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
+
+	err = mlx5_cmd_destroy_lag(dev0);
+	if (err)
+		mlx5_core_err(dev0,
+			      "Failed to destroy LAG (%d)\n",
+			      err);
+}
+
+static void mlx5_do_bond(struct mlx5_lag *ldev)
+{
+	struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
+	struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
+	struct lag_tracker tracker;
+	u8 v2p_port1, v2p_port2;
+	int i, err;
+
+	if (!dev0 || !dev1)
+		return;
+
+	mutex_lock(&lag_mutex);
+	tracker = ldev->tracker;
+	mutex_unlock(&lag_mutex);
+
+	if (tracker.is_bonded && !mlx5_lag_is_bonded(ldev)) {
+		if (mlx5_sriov_is_enabled(dev0) ||
+		    mlx5_sriov_is_enabled(dev1)) {
+			mlx5_core_warn(dev0, "LAG is not supported with SRIOV");
+			return;
+		}
+
+		for (i = 0; i < MLX5_MAX_PORTS; i++)
+			mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
+						    MLX5_INTERFACE_PROTOCOL_IB);
+
+		mlx5_activate_lag(ldev, &tracker);
+
+		mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+		mlx5_nic_vport_enable_roce(dev1);
+	} else if (tracker.is_bonded && mlx5_lag_is_bonded(ldev)) {
+		mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
+					       &v2p_port2);
+
+		if ((v2p_port1 != ldev->v2p_map[0]) ||
+		    (v2p_port2 != ldev->v2p_map[1])) {
+			ldev->v2p_map[0] = v2p_port1;
+			ldev->v2p_map[1] = v2p_port2;
+
+			err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
+			if (err)
+				mlx5_core_err(dev0,
+					      "Failed to modify LAG (%d)\n",
+					      err);
+		}
+	} else if (!tracker.is_bonded && mlx5_lag_is_bonded(ldev)) {
+		mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
+		mlx5_nic_vport_disable_roce(dev1);
+
+		mlx5_deactivate_lag(ldev);
+
+		for (i = 0; i < MLX5_MAX_PORTS; i++)
+			if (ldev->pf[i].dev)
+				mlx5_add_dev_by_protocol(ldev->pf[i].dev,
+							 MLX5_INTERFACE_PROTOCOL_IB);
+	}
+}
+
+static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
+{
+	schedule_delayed_work(&ldev->bond_work, delay);
+}
+
+static void mlx5_do_bond_work(struct work_struct *work)
+{
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
+					     bond_work);
+	int status;
+
+	status = mlx5_dev_list_trylock();
+	if (!status) {
+		/* 1 sec delay. */
+		mlx5_queue_bond_work(ldev, HZ);
+		return;
+	}
+
+	mlx5_do_bond(ldev);
+	mlx5_dev_list_unlock();
+}
+
+static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
+					 struct lag_tracker *tracker,
+					 struct net_device *ndev,
+					 struct netdev_notifier_changeupper_info *info)
+{
+	struct net_device *upper = info->upper_dev, *ndev_tmp;
+	struct netdev_lag_upper_info *lag_upper_info;
+	bool is_bonded;
+	int bond_status = 0;
+	int num_slaves = 0;
+	int idx;
+
+	if (!netif_is_lag_master(upper))
+		return 0;
+
+	lag_upper_info = info->upper_info;
+
+	/* The event may still be of interest if the slave does not belong to
+	 * us, but is enslaved to a master which has one or more of our netdevs
+	 * as slaves (e.g., if a new slave is added to a master that bonds two
+	 * of our netdevs, we should unbond).
+	 */
+	rcu_read_lock();
+	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
+		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
+		if (idx > -1)
+			bond_status |= (1 << idx);
+
+		num_slaves++;
+	}
+	rcu_read_unlock();
+
+	/* None of this lagdev's netdevs are slaves of this master. */
+	if (!(bond_status & 0x3))
+		return 0;
+
+	if (lag_upper_info)
+		tracker->tx_type = lag_upper_info->tx_type;
+
+	/* Determine bonding status:
+	 * A device is considered bonded if both its physical ports are slaves
+	 * of the same lag master, and only them.
+	 * Lag mode must be activebackup or hash.
+	 */
+	is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
+		    (bond_status == 0x3) &&
+		    ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
+		     (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
+
+	if (tracker->is_bonded != is_bonded) {
+		tracker->is_bonded = is_bonded;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
+					      struct lag_tracker *tracker,
+					      struct net_device *ndev,
+					      struct netdev_notifier_changelowerstate_info *info)
+{
+	struct netdev_lag_lower_state_info *lag_lower_info;
+	int idx;
+
+	if (!netif_is_lag_port(ndev))
+		return 0;
+
+	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
+	if (idx == -1)
+		return 0;
+
+	/* This information is used to determine virtual to physical
+	 * port mapping.
+	 */
+	lag_lower_info = info->lower_state_info;
+	if (!lag_lower_info)
+		return 0;
+
+	tracker->netdev_state[idx] = *lag_lower_info;
+
+	return 1;
+}
+
+static int mlx5_lag_netdev_event(struct notifier_block *this,
+				 unsigned long event, void *ptr)
+{
+	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+	struct lag_tracker tracker;
+	struct mlx5_lag *ldev;
+	int changed = 0;
+
+	if (!net_eq(dev_net(ndev), &init_net))
+		return NOTIFY_DONE;
+
+	if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
+		return NOTIFY_DONE;
+
+	ldev    = container_of(this, struct mlx5_lag, nb);
+	tracker = ldev->tracker;
+
+	switch (event) {
+	case NETDEV_CHANGEUPPER:
+		changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
+							ptr);
+		break;
+	case NETDEV_CHANGELOWERSTATE:
+		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
+							     ndev, ptr);
+		break;
+	}
+
+	mutex_lock(&lag_mutex);
+	ldev->tracker = tracker;
+	mutex_unlock(&lag_mutex);
+
+	if (changed)
+		mlx5_queue_bond_work(ldev, 0);
+
+	return NOTIFY_DONE;
+}
+
+static struct mlx5_lag *mlx5_lag_dev_alloc(void)
+{
+	struct mlx5_lag *ldev;
+
+	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
+	if (!ldev)
+		return NULL;
+
+	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+
+	return ldev;
+}
+
+static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
+{
+	kfree(ldev);
+}
+
+static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
+				struct mlx5_core_dev *dev,
+				struct net_device *netdev)
+{
+	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
+
+	if (fn >= MLX5_MAX_PORTS)
+		return;
+
+	mutex_lock(&lag_mutex);
+	ldev->pf[fn].dev    = dev;
+	ldev->pf[fn].netdev = netdev;
+	ldev->tracker.netdev_state[fn].link_up = 0;
+	ldev->tracker.netdev_state[fn].tx_enabled = 0;
+
+	dev->priv.lag = ldev;
+	mutex_unlock(&lag_mutex);
+}
+
+static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
+				   struct mlx5_core_dev *dev)
+{
+	int i;
+
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (ldev->pf[i].dev == dev)
+			break;
+
+	if (i == MLX5_MAX_PORTS)
+		return;
+
+	mutex_lock(&lag_mutex);
+	memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
+
+	dev->priv.lag = NULL;
+	mutex_unlock(&lag_mutex);
+}
+
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
+{
+	struct mlx5_lag *ldev = NULL;
+	struct mlx5_core_dev *tmp_dev;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+	    !MLX5_CAP_GEN(dev, lag_master) ||
+	    (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
+		return;
+
+	tmp_dev = mlx5_get_next_phys_dev(dev);
+	if (tmp_dev)
+		ldev = tmp_dev->priv.lag;
+
+	if (!ldev) {
+		ldev = mlx5_lag_dev_alloc();
+		if (!ldev) {
+			mlx5_core_err(dev, "Failed to alloc lag dev\n");
+			return;
+		}
+	}
+
+	mlx5_lag_dev_add_pf(ldev, dev, netdev);
+
+	if (!ldev->nb.notifier_call) {
+		ldev->nb.notifier_call = mlx5_lag_netdev_event;
+		if (register_netdevice_notifier(&ldev->nb)) {
+			ldev->nb.notifier_call = NULL;
+			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
+		}
+	}
+}
+
+/* Must be called with intf_mutex held */
+void mlx5_lag_remove(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev;
+	int i;
+
+	ldev = mlx5_lag_dev_get(dev);
+	if (!ldev)
+		return;
+
+	if (mlx5_lag_is_bonded(ldev))
+		mlx5_deactivate_lag(ldev);
+
+	mlx5_lag_dev_remove_pf(ldev, dev);
+
+	for (i = 0; i < MLX5_MAX_PORTS; i++)
+		if (ldev->pf[i].dev)
+			break;
+
+	if (i == MLX5_MAX_PORTS) {
+		if (ldev->nb.notifier_call)
+			unregister_netdevice_notifier(&ldev->nb);
+		cancel_delayed_work_sync(&ldev->bond_work);
+		mlx5_lag_dev_free(ldev);
+	}
+}
+
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
+{
+	struct mlx5_lag *ldev;
+	bool res;
+
+	mutex_lock(&lag_mutex);
+	ldev = mlx5_lag_dev_get(dev);
+	res  = ldev && mlx5_lag_is_bonded(ldev);
+	mutex_unlock(&lag_mutex);
+
+	return res;
+}
+EXPORT_SYMBOL(mlx5_lag_is_active);
+
+struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
+{
+	struct net_device *ndev = NULL;
+	struct mlx5_lag *ldev;
+
+	mutex_lock(&lag_mutex);
+	ldev = mlx5_lag_dev_get(dev);
+
+	if (!(ldev && mlx5_lag_is_bonded(ldev)))
+		goto unlock;
+
+	if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
+		ndev = ldev->tracker.netdev_state[0].tx_enabled ?
+		       ldev->pf[0].netdev : ldev->pf[1].netdev;
+	} else {
+		ndev = ldev->pf[0].netdev;
+	}
+	if (ndev)
+		dev_hold(ndev);
+
+unlock:
+	mutex_unlock(&lag_mutex);
+
+	return ndev;
+}
+EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
+
+bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
+{
+	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
+						 priv);
+	struct mlx5_lag *ldev;
+
+	if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
+		return true;
+
+	ldev = mlx5_lag_dev_get(dev);
+	if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
+		return true;
+
+	/* If bonded, we do not add an IB device for PF1. */
+	return false;
+}
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
index 1368dac..3a3b000 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c
@@ -39,36 +39,33 @@
 int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
 		      u16 opmod, u8 port)
 {
-	struct mlx5_mad_ifc_mbox_in *in = NULL;
-	struct mlx5_mad_ifc_mbox_out *out = NULL;
-	int err;
+	int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
+	int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in);
+	int err = -ENOMEM;
+	void *data;
+	void *resp;
+	u32 *out;
+	u32 *in;
 
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
-	if (!in)
-		return -ENOMEM;
-
-	out = kzalloc(sizeof(*out), GFP_KERNEL);
-	if (!out) {
-		err = -ENOMEM;
+	in = kzalloc(inlen, GFP_KERNEL);
+	out = kzalloc(outlen, GFP_KERNEL);
+	if (!in || !out)
 		goto out;
-	}
 
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC);
-	in->hdr.opmod = cpu_to_be16(opmod);
-	in->port = port;
+	MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
+	MLX5_SET(mad_ifc_in, in, op_mod, opmod);
+	MLX5_SET(mad_ifc_in, in, port, port);
 
-	memcpy(in->data, inb, sizeof(in->data));
+	data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
+	memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
 
-	err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
 	if (err)
 		goto out;
 
-	if (out->hdr.status) {
-		err = mlx5_cmd_status_to_err(&out->hdr);
-		goto out;
-	}
-
-	memcpy(outb, out->data, sizeof(out->data));
+	resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet);
+	memcpy(outb, resp,
+	       MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet));
 
 out:
 	kfree(out);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 4f491d4..d9c3c70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -72,16 +72,6 @@
 module_param_named(prof_sel, prof_sel, int, 0444);
 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
 
-static LIST_HEAD(intf_list);
-static LIST_HEAD(dev_list);
-static DEFINE_MUTEX(intf_mutex);
-
-struct mlx5_device_context {
-	struct list_head	list;
-	struct mlx5_interface  *intf;
-	void		       *context;
-};
-
 enum {
 	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
 	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
@@ -324,7 +314,7 @@
 				MLX5_DEV_CAP_FLAG_DCT,
 };
 
-static u16 to_fw_pkey_sz(u32 size)
+static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
 {
 	switch (size) {
 	case 128:
@@ -340,7 +330,7 @@
 	case 4096:
 		return 5;
 	default:
-		pr_warn("invalid pkey table size %d\n", size);
+		mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
 		return 0;
 	}
 }
@@ -363,10 +353,6 @@
 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
-	if (err)
-		goto query_ex;
-
-	err = mlx5_cmd_status_to_err_v2(out);
 	if (err) {
 		mlx5_core_warn(dev,
 			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
@@ -409,20 +395,11 @@
 
 static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
 {
-	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
-	int err;
-
-	memset(out, 0, sizeof(out));
+	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
 
 	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
 	MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
-	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
-	if (err)
-		return err;
-
-	err = mlx5_cmd_status_to_err_v2(out);
-
-	return err;
+	return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
 }
 
 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
@@ -490,7 +467,7 @@
 		      128);
 	/* we limit the size of the pkey table to 128 entries for now */
 	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
-		 to_fw_pkey_sz(128));
+		 to_fw_pkey_sz(dev, 128));
 
 	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
 		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
@@ -528,37 +505,22 @@
 
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
 {
-	u32 out[MLX5_ST_SZ_DW(enable_hca_out)];
-	u32 in[MLX5_ST_SZ_DW(enable_hca_in)];
-	int err;
+	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {0};
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
 	MLX5_SET(enable_hca_in, in, function_id, func_id);
-	memset(out, 0, sizeof(out));
-
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	return mlx5_cmd_status_to_err_v2(out);
+	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 }
 
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 {
-	u32 out[MLX5_ST_SZ_DW(disable_hca_out)];
-	u32 in[MLX5_ST_SZ_DW(disable_hca_in)];
-	int err;
+	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {0};
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
 	MLX5_SET(disable_hca_in, in, function_id, func_id);
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (err)
-		return err;
-
-	return mlx5_cmd_status_to_err_v2(out);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev)
@@ -758,44 +720,40 @@
 
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
-	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)];
-	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)];
-	u32 set_in[MLX5_ST_SZ_DW(set_issi_in)];
-	u32 set_out[MLX5_ST_SZ_DW(set_issi_out)];
-	int err;
+	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]   = {0};
+	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
 	u32 sup_issi;
-
-	memset(query_in, 0, sizeof(query_in));
-	memset(query_out, 0, sizeof(query_out));
+	int err;
 
 	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
-
-	err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in),
-					 query_out, sizeof(query_out));
+	err = mlx5_cmd_exec(dev, query_in, sizeof(query_in),
+			    query_out, sizeof(query_out));
 	if (err) {
-		if (((struct mlx5_outbox_hdr *)query_out)->status ==
-		    MLX5_CMD_STAT_BAD_OP_ERR) {
+		u32 syndrome;
+		u8 status;
+
+		mlx5_cmd_mbox_status(query_out, &status, &syndrome);
+		if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
 			pr_debug("Only ISSI 0 is supported\n");
 			return 0;
 		}
 
-		pr_err("failed to query ISSI\n");
+		pr_err("failed to query ISSI err(%d)\n", err);
 		return err;
 	}
 
 	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
 
 	if (sup_issi & (1 << 1)) {
-		memset(set_in, 0, sizeof(set_in));
-		memset(set_out, 0, sizeof(set_out));
+		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]   = {0};
+		u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
 
 		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
 		MLX5_SET(set_issi_in, set_in, current_issi, 1);
-
-		err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in),
-						 set_out, sizeof(set_out));
+		err = mlx5_cmd_exec(dev, set_in, sizeof(set_in),
+				    set_out, sizeof(set_out));
 		if (err) {
-			pr_err("failed to set ISSI=1\n");
+			pr_err("failed to set ISSI=1 err(%d)\n", err);
 			return err;
 		}
 
@@ -809,120 +767,6 @@
 	return -ENOTSUPP;
 }
 
-static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
-{
-	struct mlx5_device_context *dev_ctx;
-	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-
-	dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL);
-	if (!dev_ctx)
-		return;
-
-	dev_ctx->intf    = intf;
-	dev_ctx->context = intf->add(dev);
-
-	if (dev_ctx->context) {
-		spin_lock_irq(&priv->ctx_lock);
-		list_add_tail(&dev_ctx->list, &priv->ctx_list);
-		spin_unlock_irq(&priv->ctx_lock);
-	} else {
-		kfree(dev_ctx);
-	}
-}
-
-static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
-{
-	struct mlx5_device_context *dev_ctx;
-	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
-
-	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-		if (dev_ctx->intf == intf) {
-			spin_lock_irq(&priv->ctx_lock);
-			list_del(&dev_ctx->list);
-			spin_unlock_irq(&priv->ctx_lock);
-
-			intf->remove(dev, dev_ctx->context);
-			kfree(dev_ctx);
-			return;
-		}
-}
-
-static int mlx5_register_device(struct mlx5_core_dev *dev)
-{
-	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_interface *intf;
-
-	mutex_lock(&intf_mutex);
-	list_add_tail(&priv->dev_list, &dev_list);
-	list_for_each_entry(intf, &intf_list, list)
-		mlx5_add_device(intf, priv);
-	mutex_unlock(&intf_mutex);
-
-	return 0;
-}
-
-static void mlx5_unregister_device(struct mlx5_core_dev *dev)
-{
-	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_interface *intf;
-
-	mutex_lock(&intf_mutex);
-	list_for_each_entry(intf, &intf_list, list)
-		mlx5_remove_device(intf, priv);
-	list_del(&priv->dev_list);
-	mutex_unlock(&intf_mutex);
-}
-
-int mlx5_register_interface(struct mlx5_interface *intf)
-{
-	struct mlx5_priv *priv;
-
-	if (!intf->add || !intf->remove)
-		return -EINVAL;
-
-	mutex_lock(&intf_mutex);
-	list_add_tail(&intf->list, &intf_list);
-	list_for_each_entry(priv, &dev_list, dev_list)
-		mlx5_add_device(intf, priv);
-	mutex_unlock(&intf_mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL(mlx5_register_interface);
-
-void mlx5_unregister_interface(struct mlx5_interface *intf)
-{
-	struct mlx5_priv *priv;
-
-	mutex_lock(&intf_mutex);
-	list_for_each_entry(priv, &dev_list, dev_list)
-		mlx5_remove_device(intf, priv);
-	list_del(&intf->list);
-	mutex_unlock(&intf_mutex);
-}
-EXPORT_SYMBOL(mlx5_unregister_interface);
-
-void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
-{
-	struct mlx5_priv *priv = &mdev->priv;
-	struct mlx5_device_context *dev_ctx;
-	unsigned long flags;
-	void *result = NULL;
-
-	spin_lock_irqsave(&priv->ctx_lock, flags);
-
-	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
-		if ((dev_ctx->intf->protocol == protocol) &&
-		    dev_ctx->intf->get_dev) {
-			result = dev_ctx->intf->get_dev(dev_ctx->context);
-			break;
-		}
-
-	spin_unlock_irqrestore(&priv->ctx_lock, flags);
-
-	return result;
-}
-EXPORT_SYMBOL(mlx5_get_protocol_dev);
 
 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
 {
@@ -995,8 +839,102 @@
 	debugfs_remove(priv->dbg_root);
 }
 
-#define MLX5_IB_MOD "mlx5_ib"
-static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+{
+	struct pci_dev *pdev = dev->pdev;
+	int err;
+
+	err = mlx5_query_hca_caps(dev);
+	if (err) {
+		dev_err(&pdev->dev, "query hca failed\n");
+		goto out;
+	}
+
+	err = mlx5_query_board_id(dev);
+	if (err) {
+		dev_err(&pdev->dev, "query board id failed\n");
+		goto out;
+	}
+
+	err = mlx5_eq_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize eq\n");
+		goto out;
+	}
+
+	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
+
+	err = mlx5_init_cq_table(dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize cq table\n");
+		goto err_eq_cleanup;
+	}
+
+	mlx5_init_qp_table(dev);
+
+	mlx5_init_srq_table(dev);
+
+	mlx5_init_mkey_table(dev);
+
+	err = mlx5_init_rl_table(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init rate limiting\n");
+		goto err_tables_cleanup;
+	}
+
+#ifdef CONFIG_MLX5_CORE_EN
+	err = mlx5_eswitch_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init eswitch %d\n", err);
+		goto err_rl_cleanup;
+	}
+#endif
+
+	err = mlx5_sriov_init(dev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init sriov %d\n", err);
+		goto err_eswitch_cleanup;
+	}
+
+	return 0;
+
+err_eswitch_cleanup:
+#ifdef CONFIG_MLX5_CORE_EN
+	mlx5_eswitch_cleanup(dev->priv.eswitch);
+
+err_rl_cleanup:
+#endif
+	mlx5_cleanup_rl_table(dev);
+
+err_tables_cleanup:
+	mlx5_cleanup_mkey_table(dev);
+	mlx5_cleanup_srq_table(dev);
+	mlx5_cleanup_qp_table(dev);
+	mlx5_cleanup_cq_table(dev);
+
+err_eq_cleanup:
+	mlx5_eq_cleanup(dev);
+
+out:
+	return err;
+}
+
+static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
+{
+	mlx5_sriov_cleanup(dev);
+#ifdef CONFIG_MLX5_CORE_EN
+	mlx5_eswitch_cleanup(dev->priv.eswitch);
+#endif
+	mlx5_cleanup_rl_table(dev);
+	mlx5_cleanup_mkey_table(dev);
+	mlx5_cleanup_srq_table(dev);
+	mlx5_cleanup_qp_table(dev);
+	mlx5_cleanup_cq_table(dev);
+	mlx5_eq_cleanup(dev);
+}
+
+static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+			 bool boot)
 {
 	struct pci_dev *pdev = dev->pdev;
 	int err;
@@ -1029,12 +967,10 @@
 		goto out_err;
 	}
 
-	mlx5_pagealloc_init(dev);
-
 	err = mlx5_core_enable_hca(dev, 0);
 	if (err) {
 		dev_err(&pdev->dev, "enable hca failed\n");
-		goto err_pagealloc_cleanup;
+		goto err_cmd_cleanup;
 	}
 
 	err = mlx5_core_set_issi(dev);
@@ -1087,34 +1023,21 @@
 
 	mlx5_start_health_poll(dev);
 
-	err = mlx5_query_hca_caps(dev);
-	if (err) {
-		dev_err(&pdev->dev, "query hca failed\n");
-		goto err_stop_poll;
-	}
-
-	err = mlx5_query_board_id(dev);
-	if (err) {
-		dev_err(&pdev->dev, "query board id failed\n");
+	if (boot && mlx5_init_once(dev, priv)) {
+		dev_err(&pdev->dev, "sw objs init failed\n");
 		goto err_stop_poll;
 	}
 
 	err = mlx5_enable_msix(dev);
 	if (err) {
 		dev_err(&pdev->dev, "enable msix failed\n");
-		goto err_stop_poll;
-	}
-
-	err = mlx5_eq_init(dev);
-	if (err) {
-		dev_err(&pdev->dev, "failed to initialize eq\n");
-		goto disable_msix;
+		goto err_cleanup_once;
 	}
 
 	err = mlx5_alloc_uuars(dev, &priv->uuari);
 	if (err) {
 		dev_err(&pdev->dev, "Failed allocating uar, aborting\n");
-		goto err_eq_cleanup;
+		goto err_disable_msix;
 	}
 
 	err = mlx5_start_eqs(dev);
@@ -1130,15 +1053,10 @@
 	}
 
 	err = mlx5_irq_set_affinity_hints(dev);
-	if (err)
+	if (err) {
 		dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
-
-	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
-
-	mlx5_init_cq_table(dev);
-	mlx5_init_qp_table(dev);
-	mlx5_init_srq_table(dev);
-	mlx5_init_mkey_table(dev);
+		goto err_affinity_hints;
+	}
 
 	err = mlx5_init_fs(dev);
 	if (err) {
@@ -1146,36 +1064,26 @@
 		goto err_fs;
 	}
 
-	err = mlx5_init_rl_table(dev);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to init rate limiting\n");
-		goto err_rl;
-	}
-
 #ifdef CONFIG_MLX5_CORE_EN
-	err = mlx5_eswitch_init(dev);
-	if (err) {
-		dev_err(&pdev->dev, "eswitch init failed %d\n", err);
-		goto err_reg_dev;
-	}
+	mlx5_eswitch_attach(dev->priv.eswitch);
 #endif
 
-	err = mlx5_sriov_init(dev);
+	err = mlx5_sriov_attach(dev);
 	if (err) {
 		dev_err(&pdev->dev, "sriov init failed %d\n", err);
 		goto err_sriov;
 	}
 
-	err = mlx5_register_device(dev);
-	if (err) {
-		dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
-		goto err_reg_dev;
+	if (mlx5_device_registered(dev)) {
+		mlx5_attach_device(dev);
+	} else {
+		err = mlx5_register_device(dev);
+		if (err) {
+			dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
+			goto err_reg_dev;
+		}
 	}
 
-	err = request_module_nowait(MLX5_IB_MOD);
-	if (err)
-		pr_info("failed request module on %s\n", MLX5_IB_MOD);
-
 	clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
 out:
@@ -1183,23 +1091,19 @@
 
 	return 0;
 
-err_sriov:
-	if (mlx5_sriov_cleanup(dev))
-		dev_err(&dev->pdev->dev, "sriov cleanup failed\n");
-
-#ifdef CONFIG_MLX5_CORE_EN
-	mlx5_eswitch_cleanup(dev->priv.eswitch);
-#endif
 err_reg_dev:
-	mlx5_cleanup_rl_table(dev);
-err_rl:
+	mlx5_sriov_detach(dev);
+
+err_sriov:
+#ifdef CONFIG_MLX5_CORE_EN
+	mlx5_eswitch_detach(dev->priv.eswitch);
+#endif
 	mlx5_cleanup_fs(dev);
+
 err_fs:
-	mlx5_cleanup_mkey_table(dev);
-	mlx5_cleanup_srq_table(dev);
-	mlx5_cleanup_qp_table(dev);
-	mlx5_cleanup_cq_table(dev);
 	mlx5_irq_clear_affinity_hints(dev);
+
+err_affinity_hints:
 	free_comp_eqs(dev);
 
 err_stop_eqs:
@@ -1208,12 +1112,13 @@
 err_free_uar:
 	mlx5_free_uuars(dev, &priv->uuari);
 
-err_eq_cleanup:
-	mlx5_eq_cleanup(dev);
-
-disable_msix:
+err_disable_msix:
 	mlx5_disable_msix(dev);
 
+err_cleanup_once:
+	if (boot)
+		mlx5_cleanup_once(dev);
+
 err_stop_poll:
 	mlx5_stop_health_poll(dev);
 	if (mlx5_cmd_teardown_hca(dev)) {
@@ -1230,8 +1135,7 @@
 err_disable_hca:
 	mlx5_core_disable_hca(dev, 0);
 
-err_pagealloc_cleanup:
-	mlx5_pagealloc_cleanup(dev);
+err_cmd_cleanup:
 	mlx5_cmd_cleanup(dev);
 
 out_err:
@@ -1241,40 +1145,35 @@
 	return err;
 }
 
-static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
+static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
+			   bool cleanup)
 {
 	int err = 0;
 
-	err = mlx5_sriov_cleanup(dev);
-	if (err) {
-		dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n",
-			 __func__);
-		return err;
-	}
-
 	mutex_lock(&dev->intf_state_mutex);
 	if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
 		dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n",
 			 __func__);
+		if (cleanup)
+			mlx5_cleanup_once(dev);
 		goto out;
 	}
-	mlx5_unregister_device(dev);
-#ifdef CONFIG_MLX5_CORE_EN
-	mlx5_eswitch_cleanup(dev->priv.eswitch);
-#endif
 
-	mlx5_cleanup_rl_table(dev);
+	if (mlx5_device_registered(dev))
+		mlx5_detach_device(dev);
+
+	mlx5_sriov_detach(dev);
+#ifdef CONFIG_MLX5_CORE_EN
+	mlx5_eswitch_detach(dev->priv.eswitch);
+#endif
 	mlx5_cleanup_fs(dev);
-	mlx5_cleanup_mkey_table(dev);
-	mlx5_cleanup_srq_table(dev);
-	mlx5_cleanup_qp_table(dev);
-	mlx5_cleanup_cq_table(dev);
 	mlx5_irq_clear_affinity_hints(dev);
 	free_comp_eqs(dev);
 	mlx5_stop_eqs(dev);
 	mlx5_free_uuars(dev, &priv->uuari);
-	mlx5_eq_cleanup(dev);
 	mlx5_disable_msix(dev);
+	if (cleanup)
+		mlx5_cleanup_once(dev);
 	mlx5_stop_health_poll(dev);
 	err = mlx5_cmd_teardown_hca(dev);
 	if (err) {
@@ -1284,7 +1183,6 @@
 	mlx5_pagealloc_stop(dev);
 	mlx5_reclaim_startup_pages(dev);
 	mlx5_core_disable_hca(dev, 0);
-	mlx5_pagealloc_cleanup(dev);
 	mlx5_cmd_cleanup(dev);
 
 out:
@@ -1294,22 +1192,6 @@
 	return err;
 }
 
-void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
-		     unsigned long param)
-{
-	struct mlx5_priv *priv = &dev->priv;
-	struct mlx5_device_context *dev_ctx;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->ctx_lock, flags);
-
-	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-		if (dev_ctx->intf->event)
-			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
-
-	spin_unlock_irqrestore(&priv->ctx_lock, flags);
-}
-
 struct mlx5_core_event_handler {
 	void (*event)(struct mlx5_core_dev *dev,
 		      enum mlx5_dev_event event,
@@ -1323,6 +1205,7 @@
 #endif
 };
 
+#define MLX5_IB_MOD "mlx5_ib"
 static int init_one(struct pci_dev *pdev,
 		    const struct pci_device_id *id)
 {
@@ -1344,8 +1227,9 @@
 	pci_set_drvdata(pdev, dev);
 
 	if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) {
-		pr_warn("selected profile out of range, selecting default (%d)\n",
-			MLX5_DEFAULT_PROF);
+		mlx5_core_warn(dev,
+			       "selected profile out of range, selecting default (%d)\n",
+			       MLX5_DEFAULT_PROF);
 		prof_sel = MLX5_DEFAULT_PROF;
 	}
 	dev->profile = &profile[prof_sel];
@@ -1368,12 +1252,18 @@
 		goto close_pci;
 	}
 
-	err = mlx5_load_one(dev, priv);
+	mlx5_pagealloc_init(dev);
+
+	err = mlx5_load_one(dev, priv, true);
 	if (err) {
 		dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err);
 		goto clean_health;
 	}
 
+	err = request_module_nowait(MLX5_IB_MOD);
+	if (err)
+		pr_info("failed request module on %s\n", MLX5_IB_MOD);
+
 	err = devlink_register(devlink, &pdev->dev);
 	if (err)
 		goto clean_load;
@@ -1381,8 +1271,9 @@
 	return 0;
 
 clean_load:
-	mlx5_unload_one(dev, priv);
+	mlx5_unload_one(dev, priv, true);
 clean_health:
+	mlx5_pagealloc_cleanup(dev);
 	mlx5_health_cleanup(dev);
 close_pci:
 	mlx5_pci_close(dev, priv);
@@ -1400,11 +1291,15 @@
 	struct mlx5_priv *priv = &dev->priv;
 
 	devlink_unregister(devlink);
-	if (mlx5_unload_one(dev, priv)) {
+	mlx5_unregister_device(dev);
+
+	if (mlx5_unload_one(dev, priv, true)) {
 		dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
 		mlx5_health_cleanup(dev);
 		return;
 	}
+
+	mlx5_pagealloc_cleanup(dev);
 	mlx5_health_cleanup(dev);
 	mlx5_pci_close(dev, priv);
 	pci_set_drvdata(pdev, NULL);
@@ -1419,37 +1314,13 @@
 
 	dev_info(&pdev->dev, "%s was called\n", __func__);
 	mlx5_enter_error_state(dev);
-	mlx5_unload_one(dev, priv);
+	mlx5_unload_one(dev, priv, false);
+	pci_save_state(pdev);
 	mlx5_pci_disable_device(dev);
 	return state == pci_channel_io_perm_failure ?
 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
 }
 
-static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
-{
-	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
-	int err = 0;
-
-	dev_info(&pdev->dev, "%s was called\n", __func__);
-
-	err = mlx5_pci_enable_device(dev);
-	if (err) {
-		dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
-			, __func__, err);
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-	pci_set_master(pdev);
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-
-	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
-}
-
-void mlx5_disable_device(struct mlx5_core_dev *dev)
-{
-	mlx5_pci_err_detected(dev->pdev, 0);
-}
-
 /* wait for the device to show vital signs by waiting
  * for the health counter to start counting.
  */
@@ -1477,6 +1348,36 @@
 	return -ETIMEDOUT;
 }
 
+static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
+{
+	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
+	int err;
+
+	dev_info(&pdev->dev, "%s was called\n", __func__);
+
+	err = mlx5_pci_enable_device(dev);
+	if (err) {
+		dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
+			, __func__, err);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_set_master(pdev);
+	pci_restore_state(pdev);
+
+	if (wait_vital(pdev)) {
+		dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+void mlx5_disable_device(struct mlx5_core_dev *dev)
+{
+	mlx5_pci_err_detected(dev->pdev, 0);
+}
+
 static void mlx5_pci_resume(struct pci_dev *pdev)
 {
 	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
@@ -1485,14 +1386,7 @@
 
 	dev_info(&pdev->dev, "%s was called\n", __func__);
 
-	pci_save_state(pdev);
-	err = wait_vital(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "%s: wait_vital timed out\n", __func__);
-		return;
-	}
-
-	err = mlx5_load_one(dev, priv);
+	err = mlx5_load_one(dev, priv, false);
 	if (err)
 		dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
 			, __func__, err);
@@ -1514,7 +1408,7 @@
 	dev_info(&pdev->dev, "Shutdown was called\n");
 	/* Notify mlx5 clients that the kernel is being shut down */
 	set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state);
-	mlx5_unload_one(dev, priv);
+	mlx5_unload_one(dev, priv, false);
 	mlx5_pci_disable_device(dev);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
index d5a0c2d..ba2b09c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -37,70 +37,30 @@
 #include <rdma/ib_verbs.h>
 #include "mlx5_core.h"
 
-struct mlx5_attach_mcg_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			qpn;
-	__be32			rsvd;
-	u8			gid[16];
-};
-
-struct mlx5_attach_mcg_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvf[8];
-};
-
-struct mlx5_detach_mcg_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			qpn;
-	__be32			rsvd;
-	u8			gid[16];
-};
-
-struct mlx5_detach_mcg_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvf[8];
-};
-
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
 {
-	struct mlx5_attach_mcg_mbox_in in;
-	struct mlx5_attach_mcg_mbox_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)]   = {0};
+	void *gid;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG);
-	memcpy(in.gid, mgid, sizeof(*mgid));
-	in.qpn = cpu_to_be32(qpn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-	return err;
+	MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
+	MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
+	gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
+	memcpy(gid, mgid, sizeof(*mgid));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_attach_mcg);
 
 int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
 {
-	struct mlx5_detach_mcg_mbox_in in;
-	struct mlx5_detach_mcg_mbox_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)]   = {0};
+	void *gid;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG);
-	memcpy(in.gid, mgid, sizeof(*mgid));
-	in.qpn = cpu_to_be32(qpn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-	return err;
+	MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+	MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
+	gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
+	memcpy(gid, mgid, sizeof(*mgid));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_detach_mcg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 2f86ec6..3d0cfb9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -58,8 +58,8 @@
 } while (0)
 
 #define mlx5_core_err(__dev, format, ...)				\
-	dev_err(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format,	\
-	       (__dev)->priv.name, __func__, __LINE__, current->pid,	\
+	dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,	\
+		__func__, __LINE__, current->pid,	\
 	       ##__VA_ARGS__)
 
 #define mlx5_core_warn(__dev, format, ...)				\
@@ -75,19 +75,6 @@
 	MLX5_CMD_TIME, /* print command execution time */
 };
 
-static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in,
-					     int in_size, u32 *out,
-					     int out_size)
-{
-	int err;
-
-	err = mlx5_cmd_exec(dev, in, in_size, out, out_size);
-	if (err)
-		return err;
-
-	return mlx5_cmd_status_to_err((struct mlx5_outbox_hdr *)out);
-}
-
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
 int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
@@ -96,7 +83,12 @@
 		     unsigned long param);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
+int mlx5_sriov_init(struct mlx5_core_dev *dev);
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
+int mlx5_sriov_attach(struct mlx5_core_dev *dev);
+void mlx5_sriov_detach(struct mlx5_core_dev *dev);
 int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
+bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
@@ -105,7 +97,38 @@
 struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn);
 void mlx5_cq_tasklet_cb(unsigned long data);
 
+void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev);
+void mlx5_lag_remove(struct mlx5_core_dev *dev);
+
+void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
+void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv);
+void mlx5_attach_device(struct mlx5_core_dev *dev);
+void mlx5_detach_device(struct mlx5_core_dev *dev);
+bool mlx5_device_registered(struct mlx5_core_dev *dev);
+int mlx5_register_device(struct mlx5_core_dev *dev);
+void mlx5_unregister_device(struct mlx5_core_dev *dev);
+void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
+void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol);
+struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
+void mlx5_dev_list_lock(void);
+void mlx5_dev_list_unlock(void);
+int mlx5_dev_list_trylock(void);
+
+bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
+
 void mlx5e_init(void);
 void mlx5e_cleanup(void);
 
+static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev)
+{
+	/* LACP owner conditions:
+	 * 1) Function is physical.
+	 * 2) LAG is supported by FW.
+	 * 3) LAG is managed by driver (currently the only option).
+	 */
+	return  MLX5_CAP_GEN(dev, vport_group_manager) &&
+		   (MLX5_CAP_GEN(dev, num_lag_ports) > 1) &&
+		    MLX5_CAP_GEN(dev, lag_master);
+}
+
 #endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 77a72939..b9736f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -49,48 +49,43 @@
 {
 }
 
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
-			  struct mlx5_core_mkey *mkey,
-			  struct mlx5_create_mkey_mbox_in *in, int inlen,
-			  mlx5_cmd_cbk_t callback, void *context,
-			  struct mlx5_create_mkey_mbox_out *out)
+int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
+			     struct mlx5_core_mkey *mkey,
+			     u32 *in, int inlen,
+			     u32 *out, int outlen,
+			     mlx5_cmd_cbk_t callback, void *context)
 {
 	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-	struct mlx5_create_mkey_mbox_out lout;
+	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+	u32 mkey_index;
+	void *mkc;
 	int err;
 	u8 key;
 
-	memset(&lout, 0, sizeof(lout));
 	spin_lock_irq(&dev->priv.mkey_lock);
 	key = dev->priv.mkey_key++;
 	spin_unlock_irq(&dev->priv.mkey_lock);
-	in->seg.qpn_mkey7_0 |= cpu_to_be32(key);
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY);
-	if (callback) {
-		err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out),
-				       callback, context);
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
+	MLX5_SET(mkc, mkc, mkey_7_0, key);
+
+	if (callback)
+		return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen,
+					callback, context);
+
+	err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout));
+	if (err)
 		return err;
-	} else {
-		err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout));
-	}
 
-	if (err) {
-		mlx5_core_dbg(dev, "cmd exec failed %d\n", err);
-		return err;
-	}
-
-	if (lout.hdr.status) {
-		mlx5_core_dbg(dev, "status %d\n", lout.hdr.status);
-		return mlx5_cmd_status_to_err(&lout.hdr);
-	}
-
-	mkey->iova = be64_to_cpu(in->seg.start_addr);
-	mkey->size = be64_to_cpu(in->seg.len);
-	mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
-	mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
+	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
+	mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
+	mkey->size = MLX5_GET64(mkc, mkc, len);
+	mkey->key = mlx5_idx_to_mkey(mkey_index) | key;
+	mkey->pd = MLX5_GET(mkc, mkc, pd);
 
 	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
-		      be32_to_cpu(lout.mkey), key, mkey->key);
+		      mkey_index, key, mkey->key);
 
 	/* connect to mkey tree */
 	write_lock_irq(&table->lock);
@@ -104,20 +99,25 @@
 
 	return err;
 }
+EXPORT_SYMBOL(mlx5_core_create_mkey_cb);
+
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
+			  struct mlx5_core_mkey *mkey,
+			  u32 *in, int inlen)
+{
+	return mlx5_core_create_mkey_cb(dev, mkey, in, inlen,
+					NULL, 0, NULL, NULL);
+}
 EXPORT_SYMBOL(mlx5_core_create_mkey);
 
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 			   struct mlx5_core_mkey *mkey)
 {
 	struct mlx5_mkey_table *table = &dev->priv.mkey_table;
-	struct mlx5_destroy_mkey_mbox_in in;
-	struct mlx5_destroy_mkey_mbox_out out;
+	u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)]   = {0};
 	struct mlx5_core_mkey *deleted_mkey;
 	unsigned long flags;
-	int err;
-
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
 
 	write_lock_irqsave(&table->lock, flags);
 	deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key));
@@ -128,94 +128,71 @@
 		return -ENOENT;
 	}
 
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY);
-	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key));
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	return err;
+	MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_mkey);
 
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
-			 struct mlx5_query_mkey_mbox_out *out, int outlen)
+			 u32 *out, int outlen)
 {
-	struct mlx5_query_mkey_mbox_in in;
-	int err;
+	u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0};
 
-	memset(&in, 0, sizeof(in));
 	memset(out, 0, outlen);
-
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY);
-	in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key));
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
-	if (err)
-		return err;
-
-	if (out->hdr.status)
-		return mlx5_cmd_status_to_err(&out->hdr);
-
-	return err;
+	MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
+	MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_mkey);
 
 int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
 			     u32 *mkey)
 {
-	struct mlx5_query_special_ctxs_mbox_in in;
-	struct mlx5_query_special_ctxs_mbox_out out;
+	u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {0};
 	int err;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	*mkey = be32_to_cpu(out.dump_fill_mkey);
-
+	MLX5_SET(query_special_contexts_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*mkey = MLX5_GET(query_special_contexts_out, out,
+				 dump_fill_mkey);
 	return err;
 }
 EXPORT_SYMBOL(mlx5_core_dump_fill_mkey);
 
+static inline u32 mlx5_get_psv(u32 *out, int psv_index)
+{
+	switch (psv_index) {
+	case 1: return MLX5_GET(create_psv_out, out, psv1_index);
+	case 2: return MLX5_GET(create_psv_out, out, psv2_index);
+	case 3: return MLX5_GET(create_psv_out, out, psv3_index);
+	default: return MLX5_GET(create_psv_out, out, psv0_index);
+	}
+}
+
 int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index)
 {
-	struct mlx5_allocate_psv_in in;
-	struct mlx5_allocate_psv_out out;
+	u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_psv_in)]   = {0};
 	int i, err;
 
 	if (npsvs > MLX5_MAX_PSVS)
 		return -EINVAL;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
+	MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV);
+	MLX5_SET(create_psv_in, in, pd, pdn);
+	MLX5_SET(create_psv_in, in, num_psv, npsvs);
 
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_PSV);
-	in.npsv_pd = cpu_to_be32((npsvs << 28) | pdn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err) {
-		mlx5_core_err(dev, "cmd exec failed %d\n", err);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
 		return err;
-	}
-
-	if (out.hdr.status) {
-		mlx5_core_err(dev, "create_psv bad status %d\n",
-			      out.hdr.status);
-		return mlx5_cmd_status_to_err(&out.hdr);
-	}
 
 	for (i = 0; i < npsvs; i++)
-		sig_index[i] = be32_to_cpu(out.psv_idx[i]) & 0xffffff;
+		sig_index[i] = mlx5_get_psv(out, i);
 
 	return err;
 }
@@ -223,29 +200,11 @@
 
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
 {
-	struct mlx5_destroy_psv_in in;
-	struct mlx5_destroy_psv_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_psv_in)]   = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-
-	in.psv_number = cpu_to_be32(psv_num);
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_PSV);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err) {
-		mlx5_core_err(dev, "destroy_psv cmd exec failed %d\n", err);
-		goto out;
-	}
-
-	if (out.hdr.status) {
-		mlx5_core_err(dev, "destroy_psv bad status %d\n",
-			      out.hdr.status);
-		err = mlx5_cmd_status_to_err(&out.hdr);
-		goto out;
-	}
-
-out:
-	return err;
+	MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV);
+	MLX5_SET(destroy_psv_in, in, psvn, psv_num);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_psv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 32dea35..d458515 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -44,12 +44,6 @@
 	MLX5_PAGES_TAKE		= 2
 };
 
-enum {
-	MLX5_BOOT_PAGES		= 1,
-	MLX5_INIT_PAGES		= 2,
-	MLX5_POST_INIT_PAGES	= 3
-};
-
 struct mlx5_pages_req {
 	struct mlx5_core_dev *dev;
 	u16	func_id;
@@ -67,33 +61,6 @@
 	unsigned		free_count;
 };
 
-struct mlx5_query_pages_inbox {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_query_pages_outbox {
-	struct mlx5_outbox_hdr	hdr;
-	__be16			rsvd;
-	__be16			func_id;
-	__be32			num_pages;
-};
-
-struct mlx5_manage_pages_inbox {
-	struct mlx5_inbox_hdr	hdr;
-	__be16			rsvd;
-	__be16			func_id;
-	__be32			num_entries;
-	__be64			pas[0];
-};
-
-struct mlx5_manage_pages_outbox {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			num_entries;
-	u8			rsvd[4];
-	__be64			pas[0];
-};
-
 enum {
 	MAX_RECLAIM_TIME_MSECS	= 5000,
 	MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
@@ -167,24 +134,21 @@
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 				s32 *npages, int boot)
 {
-	struct mlx5_query_pages_inbox	in;
-	struct mlx5_query_pages_outbox	out;
+	u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_pages_in)]   = {0};
 	int err;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
-	in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES);
+	MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
+	MLX5_SET(query_pages_in, in, op_mod, boot ?
+		 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
+		 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
 
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
 
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	*npages = be32_to_cpu(out.num_pages);
-	*func_id = be16_to_cpu(out.func_id);
+	*npages = MLX5_GET(query_pages_out, out, num_pages);
+	*func_id = MLX5_GET(query_pages_out, out, function_id);
 
 	return err;
 }
@@ -280,46 +244,37 @@
 
 static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
 {
-	struct mlx5_manage_pages_inbox *in;
-	struct mlx5_manage_pages_outbox out;
+	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(manage_pages_in)]   = {0};
 	int err;
 
-	in = kzalloc(sizeof(*in), GFP_KERNEL);
-	if (!in)
-		return;
+	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
+	MLX5_SET(manage_pages_in, in, function_id, func_id);
 
-	memset(&out, 0, sizeof(out));
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-	in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
-	in->func_id = cpu_to_be16(func_id);
-	err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
-	if (!err)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
-		mlx5_core_warn(dev, "page notify failed\n");
-
-	kfree(in);
+		mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
+			       func_id, err);
 }
 
 static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 		      int notify_fail)
 {
-	struct mlx5_manage_pages_inbox *in;
-	struct mlx5_manage_pages_outbox out;
-	int inlen;
+	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
+	int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
 	u64 addr;
 	int err;
+	u32 *in;
 	int i;
 
-	inlen = sizeof(*in) + npages * sizeof(in->pas[0]);
+	inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
 	in = mlx5_vzalloc(inlen);
 	if (!in) {
 		err = -ENOMEM;
 		mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
 		goto out_free;
 	}
-	memset(&out, 0, sizeof(out));
 
 	for (i = 0; i < npages; i++) {
 retry:
@@ -332,27 +287,21 @@
 
 			goto retry;
 		}
-		in->pas[i] = cpu_to_be64(addr);
+		MLX5_SET64(manage_pages_in, in, pas[i], addr);
 	}
 
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-	in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
-	in->func_id = cpu_to_be16(func_id);
-	in->num_entries = cpu_to_be32(npages);
-	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
+	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
+	MLX5_SET(manage_pages_in, in, function_id, func_id);
+	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (err) {
 		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
 			       func_id, npages, err);
 		goto out_4k;
 	}
 
-	err = mlx5_cmd_status_to_err(&out.hdr);
-	if (err) {
-		mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n",
-			       func_id, npages, out.hdr.status);
-		goto out_4k;
-	}
-
 	dev->priv.fw_pages += npages;
 	if (func_id)
 		dev->priv.vfs_pages += npages;
@@ -364,7 +313,7 @@
 
 out_4k:
 	for (i--; i >= 0; i--)
-		free_4k(dev, be64_to_cpu(in->pas[i]));
+		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
 out_free:
 	kvfree(in);
 	if (notify_fail)
@@ -373,64 +322,67 @@
 }
 
 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
-			     struct mlx5_manage_pages_inbox *in, int in_size,
-			     struct mlx5_manage_pages_outbox *out, int out_size)
+			     u32 *in, int in_size, u32 *out, int out_size)
 {
 	struct fw_page *fwp;
 	struct rb_node *p;
+	u32 func_id;
 	u32 npages;
 	u32 i = 0;
 
 	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
-		return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
-						  (u32 *)out, out_size);
+		return mlx5_cmd_exec(dev, in, in_size, out, out_size);
 
-	npages = be32_to_cpu(in->num_entries);
+	/* No hard feelings, we want our pages back! */
+	npages = MLX5_GET(manage_pages_in, in, input_num_entries);
+	func_id = MLX5_GET(manage_pages_in, in, function_id);
 
 	p = rb_first(&dev->priv.page_root);
 	while (p && i < npages) {
 		fwp = rb_entry(p, struct fw_page, rb_node);
-		out->pas[i] = cpu_to_be64(fwp->addr);
 		p = rb_next(p);
+		if (fwp->func_id != func_id)
+			continue;
+
+		MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr);
 		i++;
 	}
 
-	out->num_entries = cpu_to_be32(i);
+	MLX5_SET(manage_pages_out, out, output_num_entries, i);
 	return 0;
 }
 
 static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 			 int *nclaimed)
 {
-	struct mlx5_manage_pages_inbox   in;
-	struct mlx5_manage_pages_outbox *out;
+	int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
+	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
 	int num_claimed;
-	int outlen;
-	u64 addr;
+	u32 *out;
 	int err;
 	int i;
 
 	if (nclaimed)
 		*nclaimed = 0;
 
-	memset(&in, 0, sizeof(in));
-	outlen = sizeof(*out) + npages * sizeof(out->pas[0]);
+	outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
 	out = mlx5_vzalloc(outlen);
 	if (!out)
 		return -ENOMEM;
 
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
-	in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
-	in.func_id = cpu_to_be16(func_id);
-	in.num_entries = cpu_to_be32(npages);
+	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
+	MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
+	MLX5_SET(manage_pages_in, in, function_id, func_id);
+	MLX5_SET(manage_pages_in, in, input_num_entries, npages);
+
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
-	err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
+	err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
 	if (err) {
 		mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
 		goto out_free;
 	}
 
-	num_claimed = be32_to_cpu(out->num_entries);
+	num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
 	if (num_claimed > npages) {
 		mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
 			       num_claimed, npages);
@@ -438,10 +390,9 @@
 		goto out_free;
 	}
 
-	for (i = 0; i < num_claimed; i++) {
-		addr = be64_to_cpu(out->pas[i]);
-		free_4k(dev, addr);
-	}
+	for (i = 0; i < num_claimed; i++)
+		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+
 
 	if (nclaimed)
 		*nclaimed = num_claimed;
@@ -518,8 +469,8 @@
 	int ret;
 
 	ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
-	       sizeof(struct mlx5_manage_pages_outbox)) /
-	       FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]);
+	       MLX5_ST_SZ_BYTES(manage_pages_out)) /
+	       MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
 
 	return ret;
 }
@@ -594,6 +545,12 @@
 	unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
 	int prev_vfs_pages = dev->priv.vfs_pages;
 
+	/* In case of internal error we will free the pages manually later */
+	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		mlx5_core_warn(dev, "Skipping wait for vf pages stage");
+		return 0;
+	}
+
 	mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
 		      dev->priv.name);
 	while (dev->priv.vfs_pages) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
index f2d3aee..bd830d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -36,66 +36,27 @@
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
-struct mlx5_alloc_pd_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_alloc_pd_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			pdn;
-	u8			rsvd[4];
-};
-
-struct mlx5_dealloc_pd_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			pdn;
-	u8			rsvd[4];
-};
-
-struct mlx5_dealloc_pd_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
 {
-	struct mlx5_alloc_pd_mbox_in	in;
-	struct mlx5_alloc_pd_mbox_out	out;
+	u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(alloc_pd_in)]   = {0};
 	int err;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	*pdn = be32_to_cpu(out.pdn) & 0xffffff;
+	MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*pdn = MLX5_GET(alloc_pd_out, out, pd);
 	return err;
 }
 EXPORT_SYMBOL(mlx5_core_alloc_pd);
 
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
 {
-	struct mlx5_dealloc_pd_mbox_in	in;
-	struct mlx5_dealloc_pd_mbox_out	out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)]   = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD);
-	in.pdn = cpu_to_be32(pdn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
-	return err;
+	MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
+	MLX5_SET(dealloc_pd_in, in, pd, pdn);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_dealloc_pd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 752c081..34e7184 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -38,45 +38,42 @@
 
 int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 			 int size_in, void *data_out, int size_out,
-			 u16 reg_num, int arg, int write)
+			 u16 reg_id, int arg, int write)
 {
-	struct mlx5_access_reg_mbox_in *in = NULL;
-	struct mlx5_access_reg_mbox_out *out = NULL;
+	int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out;
+	int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in;
 	int err = -ENOMEM;
+	u32 *out = NULL;
+	u32 *in = NULL;
+	void *data;
 
-	in = mlx5_vzalloc(sizeof(*in) + size_in);
-	if (!in)
-		return -ENOMEM;
+	in = mlx5_vzalloc(inlen);
+	out = mlx5_vzalloc(outlen);
+	if (!in || !out)
+		goto out;
 
-	out = mlx5_vzalloc(sizeof(*out) + size_out);
-	if (!out)
-		goto ex1;
+	data = MLX5_ADDR_OF(access_register_in, in, register_data);
+	memcpy(data, data_in, size_in);
 
-	memcpy(in->data, data_in, size_in);
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG);
-	in->hdr.opmod = cpu_to_be16(!write);
-	in->arg = cpu_to_be32(arg);
-	in->register_id = cpu_to_be16(reg_num);
-	err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
-			    sizeof(*out) + size_out);
+	MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG);
+	MLX5_SET(access_register_in, in, op_mod, !write);
+	MLX5_SET(access_register_in, in, argument, arg);
+	MLX5_SET(access_register_in, in, register_id, reg_id);
+
+	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
 	if (err)
-		goto ex2;
+		goto out;
 
-	if (out->hdr.status)
-		err = mlx5_cmd_status_to_err(&out->hdr);
+	data = MLX5_ADDR_OF(access_register_out, out, register_data);
+	memcpy(data_out, data, size_out);
 
-	if (!err)
-		memcpy(data_out, out->data, size_out);
-
-ex2:
+out:
 	kvfree(out);
-ex1:
 	kvfree(in);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
 
-
 struct mlx5_reg_pcap {
 	u8			rsvd0;
 	u8			port_num;
@@ -104,12 +101,10 @@
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
 			 int ptys_size, int proto_mask, u8 local_port)
 {
-	u32 in[MLX5_ST_SZ_DW(ptys_reg)];
+	u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0};
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(ptys_reg, in, local_port, local_port);
 	MLX5_SET(ptys_reg, in, proto_mask, proto_mask);
-
 	return mlx5_core_access_reg(dev, in, sizeof(in), ptys,
 				    ptys_size, MLX5_REG_PTYS, 0, 0);
 }
@@ -117,13 +112,11 @@
 
 int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration)
 {
+	u32 in[MLX5_ST_SZ_DW(mlcr_reg)]  = {0};
 	u32 out[MLX5_ST_SZ_DW(mlcr_reg)];
-	u32 in[MLX5_ST_SZ_DW(mlcr_reg)];
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(mlcr_reg, in, local_port, 1);
 	MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration);
-
 	return mlx5_core_access_reg(dev, in, sizeof(in), out,
 				    sizeof(out), MLX5_REG_MLCR, 0, 1);
 }
@@ -182,25 +175,39 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper);
 
-int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
-			       u8 *proto_oper, int proto_mask,
-			       u8 local_port)
+int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
+				   u32 *proto_oper, u8 local_port)
 {
 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
 	int err;
 
-	err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, local_port);
+	err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN,
+				   local_port);
 	if (err)
 		return err;
 
-	if (proto_mask == MLX5_PTYS_EN)
-		*proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
-	else
-		*proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
+	*proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper);
+EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper);
+
+int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
+				  u8 *proto_oper, u8 local_port)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+	int err;
+
+	err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB,
+				   local_port);
+	if (err)
+		return err;
+
+	*proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper);
+
+	return 0;
+}
+EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper);
 
 int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
 		       u32 proto_admin, int proto_mask)
@@ -246,15 +253,12 @@
 int mlx5_set_port_admin_status(struct mlx5_core_dev *dev,
 			       enum mlx5_port_status status)
 {
-	u32 in[MLX5_ST_SZ_DW(paos_reg)];
+	u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(paos_reg)];
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(paos_reg, in, local_port, 1);
 	MLX5_SET(paos_reg, in, admin_status, status);
 	MLX5_SET(paos_reg, in, ase, 1);
-
 	return mlx5_core_access_reg(dev, in, sizeof(in), out,
 				    sizeof(out), MLX5_REG_PAOS, 0, 1);
 }
@@ -263,19 +267,15 @@
 int mlx5_query_port_admin_status(struct mlx5_core_dev *dev,
 				 enum mlx5_port_status *status)
 {
-	u32 in[MLX5_ST_SZ_DW(paos_reg)];
+	u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(paos_reg)];
 	int err;
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(paos_reg, in, local_port, 1);
-
 	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
 				   sizeof(out), MLX5_REG_PAOS, 0, 0);
 	if (err)
 		return err;
-
 	*status = MLX5_GET(paos_reg, out, admin_status);
 	return 0;
 }
@@ -284,13 +284,10 @@
 static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu,
 				u16 *max_mtu, u16 *oper_mtu, u8 port)
 {
-	u32 in[MLX5_ST_SZ_DW(pmtu_reg)];
+	u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(pmtu_reg, in, local_port, port);
-
 	mlx5_core_access_reg(dev, in, sizeof(in), out,
 			     sizeof(out), MLX5_REG_PMTU, 0, 0);
 
@@ -304,14 +301,11 @@
 
 int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port)
 {
-	u32 in[MLX5_ST_SZ_DW(pmtu_reg)];
+	u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pmtu_reg)];
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(pmtu_reg, in, admin_mtu, mtu);
 	MLX5_SET(pmtu_reg, in, local_port, port);
-
 	return mlx5_core_access_reg(dev, in, sizeof(in), out,
 				   sizeof(out), MLX5_REG_PMTU, 0, 1);
 }
@@ -333,15 +327,12 @@
 
 static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num)
 {
+	u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pmlp_reg)];
-	u32 in[MLX5_ST_SZ_DW(pmlp_reg)];
 	int module_mapping;
 	int err;
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(pmlp_reg, in, local_port, 1);
-
 	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
 				   MLX5_REG_PMLP, 0, 0);
 	if (err)
@@ -410,11 +401,9 @@
 static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc,
 				int pvlc_size,  u8 local_port)
 {
-	u32 in[MLX5_ST_SZ_DW(pvlc_reg)];
+	u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0};
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(pvlc_reg, in, local_port, local_port);
-
 	return mlx5_core_access_reg(dev, in, sizeof(in), pvlc,
 				    pvlc_size, MLX5_REG_PVLC, 0, 0);
 }
@@ -460,10 +449,9 @@
 
 int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(pfcc_reg, in, local_port, 1);
 	MLX5_SET(pfcc_reg, in, pptx, tx_pause);
 	MLX5_SET(pfcc_reg, in, pprx, rx_pause);
@@ -476,13 +464,11 @@
 int mlx5_query_port_pause(struct mlx5_core_dev *dev,
 			  u32 *rx_pause, u32 *tx_pause)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 	int err;
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(pfcc_reg, in, local_port, 1);
-
 	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
 				   sizeof(out), MLX5_REG_PFCC, 0, 0);
 	if (err)
@@ -500,10 +486,9 @@
 
 int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(pfcc_reg, in, local_port, 1);
 	MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx);
 	MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx);
@@ -517,13 +502,11 @@
 
 int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx)
 {
-	u32 in[MLX5_ST_SZ_DW(pfcc_reg)];
+	u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(pfcc_reg)];
 	int err;
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(pfcc_reg, in, local_port, 1);
-
 	err = mlx5_core_access_reg(dev, in, sizeof(in), out,
 				   sizeof(out), MLX5_REG_PFCC, 0, 0);
 	if (err)
@@ -567,12 +550,11 @@
 
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 {
-	u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+	u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
 	u32 out[MLX5_ST_SZ_DW(qtct_reg)];
 	int err;
 	int i;
 
-	memset(in, 0, sizeof(in));
 	for (i = 0; i < 8; i++) {
 		if (prio_tc[i] > mlx5_max_tc(mdev))
 			return -EINVAL;
@@ -617,11 +599,9 @@
 
 int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
 {
-	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+	u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
 	int i;
 
-	memset(in, 0, sizeof(in));
-
 	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
 		MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1);
 		MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]);
@@ -633,11 +613,9 @@
 
 int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
 {
-	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+	u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
 	int i;
 
-	memset(in, 0, sizeof(in));
-
 	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
 		MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1);
 		MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]);
@@ -651,12 +629,10 @@
 				    u8 *max_bw_value,
 				    u8 *max_bw_units)
 {
-	u32 in[MLX5_ST_SZ_DW(qetc_reg)];
+	u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
 	void *ets_tcn_conf;
 	int i;
 
-	memset(in, 0, sizeof(in));
-
 	MLX5_SET(qetc_reg, in, port_number, 1);
 
 	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
@@ -701,35 +677,24 @@
 
 int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
 {
-	u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)];
-	u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0};
 
 	MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
 	MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
 	MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
-
-	return mlx5_cmd_exec_check_status(mdev, in, sizeof(in),
-					  out, sizeof(out));
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
 
 int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
 {
-	u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)];
-	u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)];
+	u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0};
 	int err;
 
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
-
-	err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in),
-					 out, sizeof(out));
-
+	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 	if (!err)
 		*wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
 
@@ -740,11 +705,9 @@
 static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out,
 				  int outlen)
 {
-	u32 in[MLX5_ST_SZ_DW(pcmr_reg)];
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(pcmr_reg, in, local_port, 1);
-
 	return mlx5_core_access_reg(mdev, in, sizeof(in), out,
 				    outlen, MLX5_REG_PCMR, 0, 0);
 }
@@ -759,12 +722,10 @@
 
 int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable)
 {
-	u32 in[MLX5_ST_SZ_DW(pcmr_reg)];
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0};
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(pcmr_reg, in, local_port, 1);
 	MLX5_SET(pcmr_reg, in, fcs_chk, enable);
-
 	return mlx5_set_ports_check(mdev, in, sizeof(in));
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index b82d658..d0a4005 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -271,30 +271,20 @@
 
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
-			struct mlx5_create_qp_mbox_in *in,
-			int inlen)
+			u32 *in, int inlen)
 {
-	struct mlx5_create_qp_mbox_out out;
-	struct mlx5_destroy_qp_mbox_in din;
-	struct mlx5_destroy_qp_mbox_out dout;
+	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0};
+	u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)];
+	u32 din[MLX5_ST_SZ_DW(destroy_qp_in)];
 	int err;
 
-	memset(&out, 0, sizeof(out));
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
+	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
 
-	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
-	if (err) {
-		mlx5_core_warn(dev, "ret %d\n", err);
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	if (err)
 		return err;
-	}
 
-	if (out.hdr.status) {
-		mlx5_core_warn(dev, "current num of QPs 0x%x\n",
-			       atomic_read(&dev->num_qps));
-		return mlx5_cmd_status_to_err(&out.hdr);
-	}
-
-	qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
+	qp->qpn = MLX5_GET(create_qp_out, out, qpn);
 	mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
 	err = create_qprqsq_common(dev, qp, MLX5_RES_QP);
@@ -311,12 +301,11 @@
 	return 0;
 
 err_cmd:
-	memset(&din, 0, sizeof(din));
-	memset(&dout, 0, sizeof(dout));
-	din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
-	din.qpn = cpu_to_be32(qp->qpn);
-	mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout));
-
+	memset(din, 0, sizeof(din));
+	memset(dout, 0, sizeof(dout));
+	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
@@ -324,45 +313,145 @@
 int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 			 struct mlx5_core_qp *qp)
 {
-	struct mlx5_destroy_qp_mbox_in in;
-	struct mlx5_destroy_qp_mbox_out out;
+	u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)]   = {0};
 	int err;
 
 	mlx5_debug_qp_remove(dev, qp);
 
 	destroy_qprqsq_common(dev, qp);
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP);
-	in.qpn = cpu_to_be32(qp->qpn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
 
-	if (out.hdr.status)
-		return mlx5_cmd_status_to_err(&out.hdr);
-
 	atomic_dec(&dev->num_qps);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
 
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
-			struct mlx5_modify_qp_mbox_in *in, int sqd_event,
+struct mbox_info {
+	u32 *in;
+	u32 *out;
+	int inlen;
+	int outlen;
+};
+
+static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
+{
+	mbox->inlen  = inlen;
+	mbox->outlen = outlen;
+	mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
+	mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
+	if (!mbox->in || !mbox->out) {
+		kfree(mbox->in);
+		kfree(mbox->out);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mbox_free(struct mbox_info *mbox)
+{
+	kfree(mbox->in);
+	kfree(mbox->out);
+}
+
+static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
+				u32 opt_param_mask, void *qpc,
+				struct mbox_info *mbox)
+{
+	mbox->out = NULL;
+	mbox->in = NULL;
+
+#define MBOX_ALLOC(mbox, typ)  \
+	mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
+
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
+	MLX5_SET(typ##_in, in, opcode, _opcode); \
+	MLX5_SET(typ##_in, in, qpn, _qpn)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
+	MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
+	MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
+	memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+
+	switch (opcode) {
+	/* 2RST & 2ERR */
+	case MLX5_CMD_OP_2RST_QP:
+		if (MBOX_ALLOC(mbox, qp_2rst))
+			return -ENOMEM;
+		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+		break;
+	case MLX5_CMD_OP_2ERR_QP:
+		if (MBOX_ALLOC(mbox, qp_2err))
+			return -ENOMEM;
+		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+		break;
+
+	/* MODIFY with QPC */
+	case MLX5_CMD_OP_RST2INIT_QP:
+		if (MBOX_ALLOC(mbox, rst2init_qp))
+			return -ENOMEM;
+		 MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
+				   opt_param_mask, qpc);
+		 break;
+	case MLX5_CMD_OP_INIT2RTR_QP:
+		if (MBOX_ALLOC(mbox, init2rtr_qp))
+			return -ENOMEM;
+		 MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
+				   opt_param_mask, qpc);
+		 break;
+	case MLX5_CMD_OP_RTR2RTS_QP:
+		if (MBOX_ALLOC(mbox, rtr2rts_qp))
+			return -ENOMEM;
+		 MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
+				   opt_param_mask, qpc);
+		 break;
+	case MLX5_CMD_OP_RTS2RTS_QP:
+		if (MBOX_ALLOC(mbox, rts2rts_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc);
+		break;
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+		if (MBOX_ALLOC(mbox, sqerr2rts_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc);
+		break;
+	case MLX5_CMD_OP_INIT2INIT_QP:
+		if (MBOX_ALLOC(mbox, init2init_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc);
+		break;
+	default:
+		mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
+			      opcode, qpn);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
+			u32 opt_param_mask, void *qpc,
 			struct mlx5_core_qp *qp)
 {
-	struct mlx5_modify_qp_mbox_out out;
-	int err = 0;
+	struct mbox_info mbox;
+	int err;
 
-	memset(&out, 0, sizeof(out));
-	in->hdr.opcode = cpu_to_be16(operation);
-	in->qpn = cpu_to_be32(qp->qpn);
-	err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
+	err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
+				   opt_param_mask, qpc, &mbox);
 	if (err)
 		return err;
 
-	return mlx5_cmd_status_to_err(&out.hdr);
+	err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen);
+	mbox_free(&mbox);
+	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
 
@@ -382,66 +471,38 @@
 }
 
 int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
-		       struct mlx5_query_qp_mbox_out *out, int outlen)
+		       u32 *out, int outlen)
 {
-	struct mlx5_query_qp_mbox_in in;
-	int err;
+	u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(out, 0, outlen);
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP);
-	in.qpn = cpu_to_be32(qp->qpn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
-	if (err)
-		return err;
-
-	if (out->hdr.status)
-		return mlx5_cmd_status_to_err(&out->hdr);
-
-	return err;
+	MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+	MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
 
 int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
 {
-	struct mlx5_alloc_xrcd_mbox_in in;
-	struct mlx5_alloc_xrcd_mbox_out out;
+	u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)]   = {0};
 	int err;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-	else
-		*xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff;
-
+	MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
 
 int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
 {
-	struct mlx5_dealloc_xrcd_mbox_in in;
-	struct mlx5_dealloc_xrcd_mbox_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)]   = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD);
-	in.xrcdn = cpu_to_be32(xrcdn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-	return err;
+	MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+	MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
 
@@ -449,28 +510,23 @@
 int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
 				u8 flags, int error)
 {
-	struct mlx5_page_fault_resume_mbox_in in;
-	struct mlx5_page_fault_resume_mbox_out out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)]   = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME);
-	in.hdr.opmod = 0;
-	flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR |
-		  MLX5_PAGE_FAULT_RESUME_WRITE	   |
-		  MLX5_PAGE_FAULT_RESUME_RDMA);
-	flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0);
-	in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) |
-				   (flags << MLX5_QPN_BITS));
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		return err;
+	MLX5_SET(page_fault_resume_in, in, opcode,
+		 MLX5_CMD_OP_PAGE_FAULT_RESUME);
+	MLX5_SET(page_fault_resume_in, in, qpn, qpn);
 
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
+	if (flags & MLX5_PAGE_FAULT_RESUME_REQUESTOR)
+		MLX5_SET(page_fault_resume_in, in, req_res, 1);
+	if (flags & MLX5_PAGE_FAULT_RESUME_WRITE)
+		MLX5_SET(page_fault_resume_in, in, read_write, 1);
+	if (flags & MLX5_PAGE_FAULT_RESUME_RDMA)
+		MLX5_SET(page_fault_resume_in, in, rdma, 1);
+	if (error)
+		MLX5_SET(page_fault_resume_in, in, error, 1);
 
-	return err;
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
 #endif
@@ -541,15 +597,12 @@
 
 int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id)
 {
-	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)];
-	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)];
+	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
 	int err;
 
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
-	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (!err)
 		*counter_id = MLX5_GET(alloc_q_counter_out, out,
 				       counter_set_id);
@@ -559,31 +612,25 @@
 
 int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)];
-	u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0};
 
 	MLX5_SET(dealloc_q_counter_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
 	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id);
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					  sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter);
 
 int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
 			      int reset, void *out, int out_size)
 {
-	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0};
 
 	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
 	MLX5_SET(query_q_counter_in, in, clear, reset);
 	MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id);
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index c07c28b..104902a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -63,19 +63,14 @@
 static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev,
 				   u32 rate, u16 index)
 {
-	u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)];
-	u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0};
 
 	MLX5_SET(set_rate_limit_in, in, opcode,
 		 MLX5_CMD_OP_SET_RATE_LIMIT);
 	MLX5_SET(set_rate_limit_in, in, rate_limit_index, index);
 	MLX5_SET(set_rate_limit_in, in, rate_limit, rate);
-
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-					  out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index b380a6b..e086277 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -37,198 +37,200 @@
 #include "eswitch.h"
 #endif
 
-static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs)
+bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+
+	return !!sriov->num_vfs;
+}
+
+static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 	int err;
 	int vf;
 
-	for (vf = 1; vf <= num_vfs; vf++) {
-		err = mlx5_core_enable_hca(dev, vf);
-		if (err) {
-			mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1);
-		} else {
-			sriov->vfs_ctx[vf - 1].enabled = 1;
-			mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1);
-		}
+	if (sriov->enabled_vfs) {
+		mlx5_core_warn(dev,
+			       "failed to enable SRIOV on device, already enabled with %d vfs\n",
+			       sriov->enabled_vfs);
+		return -EBUSY;
 	}
-}
 
-static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs)
-{
-	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-	int vf;
-
-	for (vf = 1; vf <= num_vfs; vf++) {
-		if (sriov->vfs_ctx[vf - 1].enabled) {
-			if (mlx5_core_disable_hca(dev, vf))
-				mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1);
-			else
-				sriov->vfs_ctx[vf - 1].enabled = 0;
-		}
-	}
-}
-
-static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs)
-{
-	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
-	int err;
-
-	if (pci_num_vf(pdev))
-		pci_disable_sriov(pdev);
-
-	enable_vfs(dev, num_vfs);
-
-	err = pci_enable_sriov(pdev, num_vfs);
+#ifdef CONFIG_MLX5_CORE_EN
+	err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
 	if (err) {
-		dev_warn(&pdev->dev, "enable sriov failed %d\n", err);
-		goto ex;
+		mlx5_core_warn(dev,
+			       "failed to enable eswitch SRIOV (%d)\n", err);
+		return err;
+	}
+#endif
+
+	for (vf = 0; vf < num_vfs; vf++) {
+		err = mlx5_core_enable_hca(dev, vf + 1);
+		if (err) {
+			mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
+			continue;
+		}
+		sriov->vfs_ctx[vf].enabled = 1;
+		sriov->enabled_vfs++;
+		mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
+
 	}
 
 	return 0;
+}
 
-ex:
-	disable_vfs(dev, num_vfs);
+static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+	int err;
+	int vf;
+
+	if (!sriov->enabled_vfs)
+		return;
+
+	for (vf = 0; vf < sriov->num_vfs; vf++) {
+		if (!sriov->vfs_ctx[vf].enabled)
+			continue;
+		err = mlx5_core_disable_hca(dev, vf + 1);
+		if (err) {
+			mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
+			continue;
+		}
+		sriov->vfs_ctx[vf].enabled = 0;
+		sriov->enabled_vfs--;
+	}
+
+#ifdef CONFIG_MLX5_CORE_EN
+	mlx5_eswitch_disable_sriov(dev->priv.eswitch);
+#endif
+
+	if (mlx5_wait_for_vf_pages(dev))
+		mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");
+}
+
+static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs)
+{
+	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
+	int err = 0;
+
+	if (pci_num_vf(pdev)) {
+		mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n");
+		return -EBUSY;
+	}
+
+	err = pci_enable_sriov(pdev, num_vfs);
+	if (err)
+		mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err);
+
 	return err;
 }
 
-static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs)
+static void mlx5_pci_disable_sriov(struct pci_dev *pdev)
+{
+	pci_disable_sriov(pdev);
+}
+
+static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-	int err;
+	int err = 0;
 
-	kfree(sriov->vfs_ctx);
-	sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC);
-	if (!sriov->vfs_ctx)
-		return -ENOMEM;
-
-	sriov->enabled_vfs = num_vfs;
-	err = mlx5_core_create_vfs(pdev, num_vfs);
+	err = mlx5_device_enable_sriov(dev, num_vfs);
 	if (err) {
-		kfree(sriov->vfs_ctx);
-		sriov->vfs_ctx = NULL;
+		mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err);
 		return err;
 	}
 
+	err = mlx5_pci_enable_sriov(pdev, num_vfs);
+	if (err) {
+		mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err);
+		mlx5_device_disable_sriov(dev);
+		return err;
+	}
+
+	sriov->num_vfs = num_vfs;
+
 	return 0;
 }
 
-static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs)
+static void mlx5_sriov_disable(struct pci_dev *pdev)
 {
+	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 
-	sriov->num_vfs = num_vfs;
-}
-
-static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev)
-{
-	struct mlx5_core_sriov *sriov;
-
-	sriov = &dev->priv.sriov;
-	disable_vfs(dev, sriov->num_vfs);
-
-	if (mlx5_wait_for_vf_pages(dev))
-		mlx5_core_warn(dev, "timeout claiming VFs pages\n");
-
+	mlx5_pci_disable_sriov(pdev);
+	mlx5_device_disable_sriov(dev);
 	sriov->num_vfs = 0;
 }
 
 int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
-	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-	int err;
+	int err = 0;
 
 	mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs);
 	if (!mlx5_core_is_pf(dev))
 		return -EPERM;
 
-	mlx5_core_cleanup_vfs(dev);
-
-	if (!num_vfs) {
-#ifdef CONFIG_MLX5_CORE_EN
-		mlx5_eswitch_disable_sriov(dev->priv.eswitch);
-#endif
-		kfree(sriov->vfs_ctx);
-		sriov->vfs_ctx = NULL;
-		if (!pci_vfs_assigned(pdev))
-			pci_disable_sriov(pdev);
-		else
-			pr_info("unloading PF driver while leaving orphan VFs\n");
-		return 0;
+	if (num_vfs && mlx5_lag_is_active(dev)) {
+		mlx5_core_warn(dev, "can't turn sriov on while LAG is active");
+		return -EINVAL;
 	}
 
-	err = mlx5_core_sriov_enable(pdev, num_vfs);
-	if (err) {
-		dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err);
-		return err;
-	}
+	if (num_vfs)
+		err = mlx5_sriov_enable(pdev, num_vfs);
+	else
+		mlx5_sriov_disable(pdev);
 
-	mlx5_core_init_vfs(dev, num_vfs);
-#ifdef CONFIG_MLX5_CORE_EN
-	mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
-#endif
-
-	return num_vfs;
+	return err ? err : num_vfs;
 }
 
-static int sync_required(struct pci_dev *pdev)
+int mlx5_sriov_attach(struct mlx5_core_dev *dev)
 {
-	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
-	int cur_vfs = pci_num_vf(pdev);
 
-	if (cur_vfs != sriov->num_vfs) {
-		pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs);
-		return 1;
-	}
+	if (!mlx5_core_is_pf(dev) || !sriov->num_vfs)
+		return 0;
 
-	return 0;
+	/* If sriov VFs exist in PCI level, enable them in device level */
+	return mlx5_device_enable_sriov(dev, sriov->num_vfs);
+}
+
+void mlx5_sriov_detach(struct mlx5_core_dev *dev)
+{
+	if (!mlx5_core_is_pf(dev))
+		return;
+
+	mlx5_device_disable_sriov(dev);
 }
 
 int mlx5_sriov_init(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 	struct pci_dev *pdev = dev->pdev;
-	int cur_vfs;
+	int total_vfs;
 
 	if (!mlx5_core_is_pf(dev))
 		return 0;
 
-	if (!sync_required(dev->pdev))
-		return 0;
-
-	cur_vfs = pci_num_vf(pdev);
-	sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
+	total_vfs = pci_sriov_get_totalvfs(pdev);
+	sriov->num_vfs = pci_num_vf(pdev);
+	sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL);
 	if (!sriov->vfs_ctx)
 		return -ENOMEM;
 
-	sriov->enabled_vfs = cur_vfs;
-
-	mlx5_core_init_vfs(dev, cur_vfs);
-#ifdef CONFIG_MLX5_CORE_EN
-	if (cur_vfs)
-		mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs,
-					  SRIOV_LEGACY);
-#endif
-
-	enable_vfs(dev, cur_vfs);
-
 	return 0;
 }
 
-int mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
+void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
 {
-	struct pci_dev *pdev = dev->pdev;
-	int err;
+	struct mlx5_core_sriov *sriov = &dev->priv.sriov;
 
 	if (!mlx5_core_is_pf(dev))
-		return 0;
+		return;
 
-	err = mlx5_core_sriov_configure(pdev, 0);
-	if (err)
-		return err;
-
-	return 0;
+	kfree(sriov->vfs_ctx);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index c07f4d0..30996300 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -175,8 +175,8 @@
 	MLX5_SET(create_srq_in, create_in, opcode,
 		 MLX5_CMD_OP_CREATE_SRQ);
 
-	err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out,
-					 sizeof(create_out));
+	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+			    sizeof(create_out));
 	kvfree(create_in);
 	if (!err)
 		srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
@@ -194,8 +194,8 @@
 		 MLX5_CMD_OP_DESTROY_SRQ);
 	MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
 
-	return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
-					  srq_out, sizeof(srq_out));
+	return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+			     srq_out, sizeof(srq_out));
 }
 
 static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
@@ -209,8 +209,8 @@
 	MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn);
 	MLX5_SET(arm_xrc_srq_in, srq_in, lwm,      lwm);
 
-	return  mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
-					   srq_out, sizeof(srq_out));
+	return  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+			      srq_out, sizeof(srq_out));
 }
 
 static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
@@ -228,9 +228,8 @@
 	MLX5_SET(query_srq_in, srq_in, opcode,
 		 MLX5_CMD_OP_QUERY_SRQ);
 	MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
-	err =  mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in),
-					  srq_out,
-					  MLX5_ST_SZ_BYTES(query_srq_out));
+	err =  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
+			     srq_out, MLX5_ST_SZ_BYTES(query_srq_out));
 	if (err)
 		goto out;
 
@@ -272,8 +271,8 @@
 		 MLX5_CMD_OP_CREATE_XRC_SRQ);
 
 	memset(create_out, 0, sizeof(create_out));
-	err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out,
-					 sizeof(create_out));
+	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
+			    sizeof(create_out));
 	if (err)
 		goto out;
 
@@ -286,36 +285,30 @@
 static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
 			       struct mlx5_core_srq *srq)
 {
-	u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)];
-	u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)];
-
-	memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-	memset(xrcsrq_out, 0, sizeof(xrcsrq_out));
+	u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
+	u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
 
 	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
 		 MLX5_CMD_OP_DESTROY_XRC_SRQ);
 	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
 
-	return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in),
-					  xrcsrq_out, sizeof(xrcsrq_out));
+	return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+			     xrcsrq_out, sizeof(xrcsrq_out));
 }
 
 static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
 			   struct mlx5_core_srq *srq, u16 lwm)
 {
-	u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)];
-	u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)];
-
-	memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
-	memset(xrcsrq_out, 0, sizeof(xrcsrq_out));
+	u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
+	u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
 
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod,   MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm,      lwm);
 
-	return  mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in),
-					   xrcsrq_out, sizeof(xrcsrq_out));
+	return  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
+			      xrcsrq_out, sizeof(xrcsrq_out));
 }
 
 static int query_xrc_srq_cmd(struct mlx5_core_dev *dev,
@@ -335,9 +328,9 @@
 	MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
 		 MLX5_CMD_OP_QUERY_XRC_SRQ);
 	MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
-	err =  mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in),
-					  xrcsrq_out,
-					  MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+
+	err =  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out,
+			     MLX5_ST_SZ_BYTES(query_xrc_srq_out));
 	if (err)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index 28274a6..a00ff49 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -36,17 +36,14 @@
 
 int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 {
-	u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)];
-	u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)];
+	u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
 	int err;
 
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
-
 	MLX5_SET(alloc_transport_domain_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
 
-	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (!err)
 		*tdn = MLX5_GET(alloc_transport_domain_out, out,
 				transport_domain);
@@ -57,29 +54,23 @@
 
 void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)];
-	u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
 
 	MLX5_SET(dealloc_transport_domain_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
 	MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
-
-	mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
 
 int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_rq_out)];
+	u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0};
 	int err;
 
 	MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
 		*rqn = MLX5_GET(create_rq_out, out, rqn);
 
@@ -95,21 +86,18 @@
 	MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ);
 
 	memset(out, 0, sizeof(out));
-	return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_rq);
 
 void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_rq_out)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0};
 
 	MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
 	MLX5_SET(destroy_rq_in, in, rqn, rqn);
-
-	mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rq);
 
@@ -121,19 +109,17 @@
 	MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
 	MLX5_SET(query_rq_in, in, rqn, rqn);
 
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_rq);
 
 int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_sq_out)];
+	u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0};
 	int err;
 
 	MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
 		*sqn = MLX5_GET(create_sq_out, out, sqn);
 
@@ -142,27 +128,22 @@
 
 int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_sq_out)];
+	u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0};
 
 	MLX5_SET(modify_sq_in, in, sqn, sqn);
 	MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
-
-	memset(out, 0, sizeof(out));
-	return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_sq);
 
 void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_sq_out)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0};
 
 	MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
 	MLX5_SET(destroy_sq_in, in, sqn, sqn);
-
-	mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
@@ -172,21 +153,20 @@
 
 	MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
 	MLX5_SET(query_sq_in, in, sqn, sqn);
-
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_sq);
 
 int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *tirn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_tir_out)];
+	u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0};
 	int err;
 
 	MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
 
 	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
 		*tirn = MLX5_GET(create_tir_out, out, tirn);
 
@@ -197,39 +177,32 @@
 int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
 			 int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_tir_out)];
+	u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0};
 
 	MLX5_SET(modify_tir_in, in, tirn, tirn);
 	MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR);
-
-	memset(out, 0, sizeof(out));
-	return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
 void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_tir_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_tir_out)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(destroy_tir_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0};
 
 	MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
 	MLX5_SET(destroy_tir_in, in, tirn, tirn);
-
-	mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tir);
 
 int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *tisn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_tis_out)];
+	u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0};
 	int err;
 
 	MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
 		*tisn = MLX5_GET(create_tis_out, out, tisn);
 
@@ -245,34 +218,29 @@
 	MLX5_SET(modify_tis_in, in, tisn, tisn);
 	MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
 
-	return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_tis);
 
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_tis_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_tis_out)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(destroy_tis_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
 
 	MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
 	MLX5_SET(destroy_tis_in, in, tisn, tisn);
-
-	mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
 int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *rmpn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_rmp_out)];
+	u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0};
 	int err;
 
 	MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
 		*rmpn = MLX5_GET(create_rmp_out, out, rmpn);
 
@@ -281,38 +249,31 @@
 
 int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_rmp_out)];
+	u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0};
 
 	MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
-
-	memset(out, 0, sizeof(out));
-	return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
 int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0};
 
 	MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
 	MLX5_SET(destroy_rmp_in, in, rmpn, rmpn);
-
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
+	return mlx5_cmd_exec(dev, in, sizeof(in), out,
 					  sizeof(out));
 }
 
 int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out)
 {
-	u32 in[MLX5_ST_SZ_DW(query_rmp_in)];
+	u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0};
 	int outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP);
 	MLX5_SET(query_rmp_in, in, rmpn,   rmpn);
-
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
 int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm)
@@ -347,13 +308,11 @@
 int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			  u32 *xsrqn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
+	u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0};
 	int err;
 
 	MLX5_SET(create_xrc_srq_in, in, opcode,     MLX5_CMD_OP_CREATE_XRC_SRQ);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
 		*xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn);
 
@@ -362,33 +321,25 @@
 
 int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
 
 	MLX5_SET(destroy_xrc_srq_in, in, opcode,   MLX5_CMD_OP_DESTROY_XRC_SRQ);
 	MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn);
-
-	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					  sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out)
 {
-	u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
+	u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {0};
 	void *srqc;
 	void *xrc_srqc;
 	int err;
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(query_xrc_srq_in, in, opcode,   MLX5_CMD_OP_QUERY_XRC_SRQ);
 	MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn);
-
-	err =  mlx5_cmd_exec_check_status(dev, in, sizeof(in),
-					  out,
-					  MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out,
+			    MLX5_ST_SZ_BYTES(query_xrc_srq_out));
 	if (!err) {
 		xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out,
 					xrc_srq_context_entry);
@@ -401,32 +352,25 @@
 
 int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm)
 {
-	u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)];
-	u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
 
 	MLX5_SET(arm_xrc_srq_in, in, opcode,   MLX5_CMD_OP_ARM_XRC_SRQ);
 	MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn);
 	MLX5_SET(arm_xrc_srq_in, in, lwm,      lwm);
 	MLX5_SET(arm_xrc_srq_in, in, op_mod,
 		 MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
-
-	return  mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
-					   sizeof(out));
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *rqtn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
+	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
 	int err;
 
 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
-
-	memset(out, 0, sizeof(out));
-	err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 	if (!err)
 		*rqtn = MLX5_GET(create_rqt_out, out, rqtn);
 
@@ -437,25 +381,20 @@
 int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
 			 int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_rqt_out)];
+	u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0};
 
 	MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
 	MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
-
-	memset(out, 0, sizeof(out));
-	return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
 void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
-	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
 
 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
 	MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
-
-	mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rqt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 5ff8af4..ab0b896 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -42,73 +42,28 @@
 	NUM_LOW_LAT_UUARS	= 4,
 };
 
-
-struct mlx5_alloc_uar_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_alloc_uar_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			uarn;
-	u8			rsvd[4];
-};
-
-struct mlx5_free_uar_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			uarn;
-	u8			rsvd[4];
-};
-
-struct mlx5_free_uar_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 {
-	struct mlx5_alloc_uar_mbox_in	in;
-	struct mlx5_alloc_uar_mbox_out	out;
+	u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(alloc_uar_in)]   = {0};
 	int err;
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		goto ex;
-
-	if (out.hdr.status) {
-		err = mlx5_cmd_status_to_err(&out.hdr);
-		goto ex;
-	}
-
-	*uarn = be32_to_cpu(out.uarn) & 0xffffff;
-
-ex:
+	MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (!err)
+		*uarn = MLX5_GET(alloc_uar_out, out, uar);
 	return err;
 }
 EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
 
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
 {
-	struct mlx5_free_uar_mbox_in	in;
-	struct mlx5_free_uar_mbox_out	out;
-	int err;
+	u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)]   = {0};
 
-	memset(&in, 0, sizeof(in));
-	memset(&out, 0, sizeof(out));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR);
-	in.uarn = cpu_to_be32(uarn);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-	if (err)
-		goto ex;
-
-	if (out.hdr.status)
-		err = mlx5_cmd_status_to_err(&out.hdr);
-
-ex:
-	return err;
+	MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+	MLX5_SET(dealloc_uar_in, in, uar, uarn);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_cmd_free_uar);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 21365d0..525f17a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -39,10 +39,7 @@
 static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
 				   u16 vport, u32 *out, int outlen)
 {
-	int err;
-	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0};
 
 	MLX5_SET(query_vport_state_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_VPORT_STATE);
@@ -51,11 +48,7 @@
 	if (vport)
 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
 
-	err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
-	if (err)
-		mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n");
-
-	return err;
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
@@ -81,58 +74,43 @@
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 state)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)];
-	u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)];
-	int err;
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
 
 	MLX5_SET(modify_vport_state_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_VPORT_STATE);
 	MLX5_SET(modify_vport_state_in, in, op_mod, opmod);
 	MLX5_SET(modify_vport_state_in, in, vport_number, vport);
-
 	if (vport)
 		MLX5_SET(modify_vport_state_in, in, other_vport, 1);
-
 	MLX5_SET(modify_vport_state_in, in, admin_state, state);
 
-	err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out,
-					 sizeof(out));
-	if (err)
-		mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n");
-
-	return err;
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state);
 
 static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
 					u32 *out, int outlen)
 {
-	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
-
-	memset(in, 0, sizeof(in));
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
 
 	MLX5_SET(query_nic_vport_context_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
-
 	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
 	if (vport)
 		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
-	return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
 }
 
 static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
 					 int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+	u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
 
 	MLX5_SET(modify_nic_vport_context_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
-
-	memset(out, 0, sizeof(out));
-	return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 }
 
 void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
@@ -147,6 +125,26 @@
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
 
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				     u16 vport, u8 min_inline)
+{
+	u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
+	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+	void *nic_vport_ctx;
+
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 field_select.min_inline, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
+
+	nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
+				     in, nic_vport_context);
+	MLX5_SET(nic_vport_context, nic_vport_ctx,
+		 min_wqe_inline_mode, min_inline);
+
+	return mlx5_modify_nic_vport_context(mdev, in, inlen);
+}
+
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 *addr)
 {
@@ -254,7 +252,7 @@
 				  u8 addr_list[][ETH_ALEN],
 				  int *list_size)
 {
-	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)];
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
 	void *nic_vport_ctx;
 	int max_list_size;
 	int req_list_size;
@@ -278,7 +276,6 @@
 	out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
 			req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
 
-	memset(in, 0, sizeof(in));
 	out = kzalloc(out_sz, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
@@ -291,7 +288,7 @@
 	if (vport)
 		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
-	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
 	if (err)
 		goto out;
 
@@ -361,7 +358,7 @@
 		ether_addr_copy(curr_mac, addr_list[i]);
 	}
 
-	err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
 	kfree(in);
 	return err;
 }
@@ -406,7 +403,7 @@
 	if (vport)
 		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
-	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
 	if (err)
 		goto out;
 
@@ -473,7 +470,7 @@
 		MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]);
 	}
 
-	err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out));
+	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
 	kfree(in);
 	return err;
 }
@@ -631,10 +628,6 @@
 	if (err)
 		goto out;
 
-	err = mlx5_cmd_status_to_err_v2(out);
-	if (err)
-		goto out;
-
 	tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out);
 	gid->global.subnet_prefix = tmp->global.subnet_prefix;
 	gid->global.interface_id = tmp->global.interface_id;
@@ -700,10 +693,6 @@
 	if (err)
 		goto out;
 
-	err = mlx5_cmd_status_to_err_v2(out);
-	if (err)
-		goto out;
-
 	pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey);
 	for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey))
 		*pkey = MLX5_GET_PR(pkey, pkarr, pkey);
@@ -721,7 +710,7 @@
 				 struct mlx5_hca_vport_context *rep)
 {
 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
-	int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)];
+	int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0};
 	int is_group_manager;
 	void *out;
 	void *ctx;
@@ -729,7 +718,6 @@
 
 	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
 
-	memset(in, 0, sizeof(in));
 	out = kzalloc(out_sz, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
@@ -752,9 +740,6 @@
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out,  out_sz);
 	if (err)
 		goto ex;
-	err = mlx5_cmd_status_to_err_v2(out);
-	if (err)
-		goto ex;
 
 	ctx = MLX5_ADDR_OF(query_hca_vport_context_out, out, hca_vport_context);
 	rep->field_select = MLX5_GET_PR(hca_vport_context, ctx, field_select);
@@ -969,10 +954,6 @@
 		MLX5_SET(query_vport_counter_in, in, port_num, port_num);
 
 	err = mlx5_cmd_exec(dev, in, in_sz, out,  out_sz);
-	if (err)
-		goto free;
-	err = mlx5_cmd_status_to_err_v2(out);
-
 free:
 	kvfree(in);
 	return err;
@@ -1035,11 +1016,6 @@
 	MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter);
 	MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter);
 	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
-	if (err)
-		goto ex;
-
-	err = mlx5_cmd_status_to_err_v2(out);
-
 ex:
 	kfree(in);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
index e25a73ed..07a9ba6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c
@@ -46,41 +46,24 @@
 
 static int mlx5e_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 {
-	struct mlx5_outbox_hdr *hdr;
-	int err;
-
-	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)];
-	u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
 
 	MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
 		 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
 	MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-
-	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
-	if (err)
-		return err;
-
-	hdr = (struct mlx5_outbox_hdr *)out;
-	return hdr->status ? -ENOMEM : 0;
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 
 static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 {
-	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)];
-	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)];
-
-	memset(in, 0, sizeof(in));
-	memset(out, 0, sizeof(out));
+	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
 
 	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
 		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
 	MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-
-	return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out,
-					  sizeof(out));
+	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
 }
 
 struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 068ee65a..aa33d58 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1100,10 +1100,15 @@
 		goto err_alloc_stats;
 	}
 
-	if (mlxsw_driver->profile->used_max_lag &&
-	    mlxsw_driver->profile->used_max_port_per_lag) {
-		alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag *
-			     mlxsw_driver->profile->max_port_per_lag;
+	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
+			      &mlxsw_core->resources);
+	if (err)
+		goto err_bus_init;
+
+	if (mlxsw_core->resources.max_lag_valid &&
+	    mlxsw_core->resources.max_ports_in_lag_valid) {
+		alloc_size = sizeof(u8) * mlxsw_core->resources.max_lag *
+			mlxsw_core->resources.max_ports_in_lag;
 		mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL);
 		if (!mlxsw_core->lag.mapping) {
 			err = -ENOMEM;
@@ -1111,11 +1116,6 @@
 		}
 	}
 
-	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
-			      &mlxsw_core->resources);
-	if (err)
-		goto err_bus_init;
-
 	err = mlxsw_emad_init(mlxsw_core);
 	if (err)
 		goto err_emad_init;
@@ -1146,10 +1146,10 @@
 err_devlink_register:
 	mlxsw_emad_fini(mlxsw_core);
 err_emad_init:
-	mlxsw_bus->fini(bus_priv);
-err_bus_init:
 	kfree(mlxsw_core->lag.mapping);
 err_alloc_lag_mapping:
+	mlxsw_bus->fini(bus_priv);
+err_bus_init:
 	free_percpu(mlxsw_core->pcpu_stats);
 err_alloc_stats:
 	devlink_free(devlink);
@@ -1615,7 +1615,7 @@
 static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core,
 					u16 lag_id, u8 port_index)
 {
-	return mlxsw_core->driver->profile->max_port_per_lag * lag_id +
+	return mlxsw_core->resources.max_ports_in_lag * lag_id +
 	       port_index;
 }
 
@@ -1644,7 +1644,7 @@
 {
 	int i;
 
-	for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) {
+	for (i = 0; i < mlxsw_core->resources.max_ports_in_lag; i++) {
 		int index = mlxsw_core_lag_mapping_index(mlxsw_core,
 							 lag_id, i);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index d3476ea..c4f550b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -87,6 +87,7 @@
 	void (*func)(struct sk_buff *skb, u8 local_port, void *priv);
 	u8 local_port;
 	u16 trap_id;
+	enum mlxsw_reg_hpkt_action action;
 };
 
 struct mlxsw_event_listener {
@@ -178,8 +179,6 @@
 
 struct mlxsw_config_profile {
 	u16	used_max_vepa_channels:1,
-		used_max_lag:1,
-		used_max_port_per_lag:1,
 		used_max_mid:1,
 		used_max_pgt:1,
 		used_max_system_port:1,
@@ -191,10 +190,9 @@
 		used_max_pkey:1,
 		used_ar_sec:1,
 		used_adaptive_routing_group_cap:1,
-		used_kvd_sizes:1;
+		used_kvd_split_data:1; /* indicate for the kvd's values */
+
 	u8	max_vepa_channels;
-	u16	max_lag;
-	u16	max_port_per_lag;
 	u16	max_mid;
 	u16	max_pgt;
 	u16	max_system_port;
@@ -213,8 +211,9 @@
 	u16	adaptive_routing_group_cap;
 	u8	arn;
 	u32	kvd_linear_size;
-	u32	kvd_hash_single_size;
-	u32	kvd_hash_double_size;
+	u16	kvd_hash_granularity;
+	u8	kvd_hash_single_parts;
+	u8	kvd_hash_double_parts;
 	u8	resource_query_enable;
 	struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
 };
@@ -268,8 +267,35 @@
 };
 
 struct mlxsw_resources {
-	u8	max_span_valid:1;
+	u32	max_span_valid:1,
+		max_lag_valid:1,
+		max_ports_in_lag_valid:1,
+		kvd_size_valid:1,
+		kvd_single_min_size_valid:1,
+		kvd_double_min_size_valid:1,
+		max_virtual_routers_valid:1,
+		max_system_ports_valid:1,
+		max_vlan_groups_valid:1,
+		max_regions_valid:1,
+		max_rif_valid:1;
 	u8      max_span;
+	u8	max_lag;
+	u8	max_ports_in_lag;
+	u32	kvd_size;
+	u32	kvd_single_min_size;
+	u32	kvd_double_min_size;
+	u16     max_virtual_routers;
+	u16	max_system_ports;
+	u16	max_vlan_groups;
+	u16	max_regions;
+	u16	max_rif;
+
+	/* Internal resources.
+	 * Determined by the SW, not queried from the HW.
+	 */
+	u32	kvd_single_size;
+	u32	kvd_double_size;
+	u32	kvd_linear_size;
 };
 
 struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 1d1360c..e742bd4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1156,6 +1156,16 @@
 
 #define MLXSW_RESOURCES_TABLE_END_ID 0xffff
 #define MLXSW_MAX_SPAN_ID 0x2420
+#define MLXSW_MAX_LAG_ID 0x2520
+#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521
+#define MLXSW_KVD_SIZE_ID 0x1001
+#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002
+#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003
+#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01
+#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502
+#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906
+#define MLXSW_MAX_REGIONS_ID 0x2901
+#define MLXSW_MAX_RIF_ID 0x2C02
 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100
 #define MLXSW_RESOURCES_PER_QUERY 32
 
@@ -1167,6 +1177,46 @@
 		resources->max_span = val;
 		resources->max_span_valid = 1;
 		break;
+	case MLXSW_MAX_LAG_ID:
+		resources->max_lag = val;
+		resources->max_lag_valid = 1;
+		break;
+	case MLXSW_MAX_PORTS_IN_LAG_ID:
+		resources->max_ports_in_lag = val;
+		resources->max_ports_in_lag_valid = 1;
+		break;
+	case MLXSW_KVD_SIZE_ID:
+		resources->kvd_size = val;
+		resources->kvd_size_valid = 1;
+		break;
+	case MLXSW_KVD_SINGLE_MIN_SIZE_ID:
+		resources->kvd_single_min_size = val;
+		resources->kvd_single_min_size_valid = 1;
+		break;
+	case MLXSW_KVD_DOUBLE_MIN_SIZE_ID:
+		resources->kvd_double_min_size = val;
+		resources->kvd_double_min_size_valid = 1;
+		break;
+	case MLXSW_MAX_VIRTUAL_ROUTERS_ID:
+		resources->max_virtual_routers = val;
+		resources->max_virtual_routers_valid = 1;
+		break;
+	case MLXSW_MAX_SYSTEM_PORT_ID:
+		resources->max_system_ports = val;
+		resources->max_system_ports_valid = 1;
+		break;
+	case MLXSW_MAX_VLAN_GROUPS_ID:
+		resources->max_vlan_groups = val;
+		resources->max_vlan_groups_valid = 1;
+		break;
+	case MLXSW_MAX_REGIONS_ID:
+		resources->max_regions = val;
+		resources->max_regions_valid = 1;
+		break;
+	case MLXSW_MAX_RIF_ID:
+		resources->max_rif = val;
+		resources->max_rif_valid = 1;
+		break;
 	default:
 		break;
 	}
@@ -1209,10 +1259,52 @@
 	return -EIO;
 }
 
+static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile,
+					   struct mlxsw_resources *resources)
+{
+	u32 singles_size, doubles_size, linear_size;
+
+	if (!resources->kvd_single_min_size_valid ||
+	    !resources->kvd_double_min_size_valid ||
+	    !profile->used_kvd_split_data)
+		return -EIO;
+
+	linear_size = profile->kvd_linear_size;
+
+	/* The hash part is what left of the kvd without the
+	 * linear part. It is split to the single size and
+	 * double size by the parts ratio from the profile.
+	 * Both sizes must be a multiplications of the
+	 * granularity from the profile.
+	 */
+	doubles_size = (resources->kvd_size - linear_size);
+	doubles_size *= profile->kvd_hash_double_parts;
+	doubles_size /= (profile->kvd_hash_double_parts +
+			 profile->kvd_hash_single_parts);
+	doubles_size /= profile->kvd_hash_granularity;
+	doubles_size *= profile->kvd_hash_granularity;
+	singles_size = resources->kvd_size - doubles_size -
+		       linear_size;
+
+	/* Check results are legal. */
+	if (singles_size < resources->kvd_single_min_size ||
+	    doubles_size < resources->kvd_double_min_size ||
+	    resources->kvd_size < linear_size)
+		return -EIO;
+
+	resources->kvd_single_size = singles_size;
+	resources->kvd_double_size = doubles_size;
+	resources->kvd_linear_size = linear_size;
+
+	return 0;
+}
+
 static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
-				    const struct mlxsw_config_profile *profile)
+				    const struct mlxsw_config_profile *profile,
+				    struct mlxsw_resources *resources)
 {
 	int i;
+	int err;
 
 	mlxsw_cmd_mbox_zero(mbox);
 
@@ -1222,18 +1314,6 @@
 		mlxsw_cmd_mbox_config_profile_max_vepa_channels_set(
 			mbox, profile->max_vepa_channels);
 	}
-	if (profile->used_max_lag) {
-		mlxsw_cmd_mbox_config_profile_set_max_lag_set(
-			mbox, 1);
-		mlxsw_cmd_mbox_config_profile_max_lag_set(
-			mbox, profile->max_lag);
-	}
-	if (profile->used_max_port_per_lag) {
-		mlxsw_cmd_mbox_config_profile_set_max_port_per_lag_set(
-			mbox, 1);
-		mlxsw_cmd_mbox_config_profile_max_port_per_lag_set(
-			mbox, profile->max_port_per_lag);
-	}
 	if (profile->used_max_mid) {
 		mlxsw_cmd_mbox_config_profile_set_max_mid_set(
 			mbox, 1);
@@ -1310,19 +1390,22 @@
 		mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
 			mbox, profile->adaptive_routing_group_cap);
 	}
-	if (profile->used_kvd_sizes) {
-		mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(
-			mbox, 1);
-		mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(
-			mbox, profile->kvd_linear_size);
-		mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(
-			mbox, 1);
-		mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(
-			mbox, profile->kvd_hash_single_size);
+	if (resources->kvd_size_valid) {
+		err = mlxsw_pci_profile_get_kvd_sizes(profile, resources);
+		if (err)
+			return err;
+
+		mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1);
+		mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox,
+						resources->kvd_linear_size);
+		mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox,
+									   1);
+		mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox,
+						resources->kvd_single_size);
 		mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set(
-			mbox, 1);
-		mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(
-			mbox, profile->kvd_hash_double_size);
+								mbox, 1);
+		mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox,
+						resources->kvd_double_size);
 	}
 
 	for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
@@ -1524,7 +1607,7 @@
 	if (err)
 		goto err_query_resources;
 
-	err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile);
+	err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, resources);
 	if (err)
 		goto err_config_profile;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h
index f33b997..af371a8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/port.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/port.h
@@ -56,6 +56,7 @@
 #define MLXSW_PORT_PHY_BITS_MASK	(MLXSW_PORT_MAX_PHY_PORTS - 1)
 
 #define MLXSW_PORT_CPU_PORT		0x0
+#define MLXSW_PORT_ROUTER_PORT		(MLXSW_PORT_MAX_PHY_PORTS + 2)
 
 #define MLXSW_PORT_DONT_CARE		(MLXSW_PORT_MAX_PORTS)
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 7ca9201..6460c72 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -591,6 +591,12 @@
  */
 MLXSW_ITEM32(reg, sfn, swid, 0x00, 24, 8);
 
+/* reg_sfn_end
+ * Forces the current session to end.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, sfn, end, 0x04, 20, 1);
+
 /* reg_sfn_num_rec
  * Request: Number of learned notifications and aged-out notification
  * records requested.
@@ -605,6 +611,7 @@
 {
 	MLXSW_REG_ZERO(sfn, payload);
 	mlxsw_reg_sfn_swid_set(payload, 0);
+	mlxsw_reg_sfn_end_set(payload, 1);
 	mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT);
 }
 
@@ -1385,7 +1392,7 @@
 {
 	MLXSW_REG_ZERO(slcr, payload);
 	mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL);
-	mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR);
+	mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC);
 	mlxsw_reg_slcr_lag_hash_set(payload, lag_hash);
 }
 
@@ -2131,6 +2138,18 @@
  */
 MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3);
 
+enum {
+	MLXSW_REG_PTYS_AN_STATUS_NA,
+	MLXSW_REG_PTYS_AN_STATUS_OK,
+	MLXSW_REG_PTYS_AN_STATUS_FAIL,
+};
+
+/* reg_ptys_an_status
+ * Autonegotiation status.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
+
 #define MLXSW_REG_PTYS_ETH_SPEED_SGMII			BIT(0)
 #define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX		BIT(1)
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4		BIT(2)
@@ -2145,6 +2164,7 @@
 #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR		BIT(14)
 #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4		BIT(15)
 #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4	BIT(16)
+#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2		BIT(18)
 #define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4		BIT(19)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4		BIT(20)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4		BIT(21)
@@ -2177,6 +2197,13 @@
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32);
 
+/* reg_ptys_eth_proto_lp_advertise
+ * The protocols that were advertised by the link partner during
+ * autonegotiation.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
+
 static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port,
 				       u32 proto_admin)
 {
@@ -3383,6 +3410,15 @@
  */
 MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
 
+/* reg_ritr_lb_en
+ * Loop-back filter enable for unicast packets.
+ * If the flag is set then loop-back filter for unicast packets is
+ * implemented on the RIF. Multicast packets are always subject to
+ * loop-back filtering.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1);
+
 /* reg_ritr_virtual_router
  * Virtual router ID associated with the router interface.
  * Access: RW
@@ -3484,6 +3520,7 @@
 	mlxsw_reg_ritr_op_set(payload, op);
 	mlxsw_reg_ritr_rif_set(payload, rif);
 	mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
+	mlxsw_reg_ritr_lb_en_set(payload, 1);
 	mlxsw_reg_ritr_mtu_set(payload, mtu);
 	mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac);
 }
@@ -4000,6 +4037,7 @@
 {
 	MLXSW_REG_ZERO(ralue, payload);
 	mlxsw_reg_ralue_protocol_set(payload, protocol);
+	mlxsw_reg_ralue_op_set(payload, op);
 	mlxsw_reg_ralue_virtual_router_set(payload, virtual_router);
 	mlxsw_reg_ralue_prefix_len_set(payload, prefix_len);
 	mlxsw_reg_ralue_entry_type_set(payload,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index c3e6150..1ec0a4c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -56,6 +56,7 @@
 #include <generated/utsrelease.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/netevent.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -247,7 +248,8 @@
 	span_entry->used = false;
 }
 
-struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port)
+static struct mlxsw_sp_span_entry *
+mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port)
 {
 	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
 	int i;
@@ -261,7 +263,8 @@
 	return NULL;
 }
 
-struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
+static struct mlxsw_sp_span_entry
+*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port)
 {
 	struct mlxsw_sp_span_entry *span_entry;
 
@@ -363,7 +366,8 @@
 	}
 
 	/* bind the port to the SPAN entry */
-	mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, true, pa_id);
+	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
+			    (enum mlxsw_reg_mpar_i_e) type, true, pa_id);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
 	if (err)
 		goto err_mpar_reg_write;
@@ -404,7 +408,8 @@
 		return;
 
 	/* remove the inspected port */
-	mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, false, pa_id);
+	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
+			    (enum mlxsw_reg_mpar_i_e) type, false, pa_id);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
 
 	/* remove the SBIB buffer if it was egress SPAN */
@@ -555,8 +560,9 @@
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl);
 }
 
-static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
-					  u16 vid, bool learn_enable)
+int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				     u16 vid_begin, u16 vid_end,
+				     bool learn_enable)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char *spvmlr_pl;
@@ -565,13 +571,20 @@
 	spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL);
 	if (!spvmlr_pl)
 		return -ENOMEM;
-	mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid,
-			      learn_enable);
+	mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid_begin,
+			      vid_end, learn_enable);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl);
 	kfree(spvmlr_pl);
 	return err;
 }
 
+static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+					  u16 vid, bool learn_enable)
+{
+	return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
+						learn_enable);
+}
+
 static int
 mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port)
 {
@@ -810,9 +823,9 @@
 	return err;
 }
 
-static struct rtnl_link_stats64 *
-mlxsw_sp_port_get_stats64(struct net_device *dev,
-			  struct rtnl_link_stats64 *stats)
+static int
+mlxsw_sp_port_get_sw_stats64(const struct net_device *dev,
+			     struct rtnl_link_stats64 *stats)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp_port_pcpu_stats *p;
@@ -839,6 +852,107 @@
 		tx_dropped	+= p->tx_dropped;
 	}
 	stats->tx_dropped	= tx_dropped;
+	return 0;
+}
+
+static bool mlxsw_sp_port_has_offload_stats(int attr_id)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		return true;
+	}
+
+	return false;
+}
+
+static int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev,
+					   void *sp)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		return mlxsw_sp_port_get_sw_stats64(dev, sp);
+	}
+
+	return -EINVAL;
+}
+
+static int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp,
+				       int prio, char *ppcnt_pl)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+	mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio);
+	return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl);
+}
+
+static int mlxsw_sp_port_get_hw_stats(struct net_device *dev,
+				      struct rtnl_link_stats64 *stats)
+{
+	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
+	int err;
+
+	err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT,
+					  0, ppcnt_pl);
+	if (err)
+		goto out;
+
+	stats->tx_packets =
+		mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl);
+	stats->rx_packets =
+		mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl);
+	stats->tx_bytes =
+		mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl);
+	stats->rx_bytes =
+		mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl);
+	stats->multicast =
+		mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl);
+
+	stats->rx_crc_errors =
+		mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl);
+	stats->rx_frame_errors =
+		mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl);
+
+	stats->rx_length_errors = (
+		mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl) +
+		mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl) +
+		mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl));
+
+	stats->rx_errors = (stats->rx_crc_errors +
+		stats->rx_frame_errors + stats->rx_length_errors);
+
+out:
+	return err;
+}
+
+static void update_stats_cache(struct work_struct *work)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port =
+		container_of(work, struct mlxsw_sp_port,
+			     hw_stats.update_dw.work);
+
+	if (!netif_carrier_ok(mlxsw_sp_port->dev))
+		goto out;
+
+	mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev,
+				   mlxsw_sp_port->hw_stats.cache);
+
+out:
+	mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw,
+			       MLXSW_HW_STATS_UPDATE_TIME);
+}
+
+/* Return the stats from a cache that is updated periodically,
+ * as this function might get called in an atomic context.
+ */
+static struct rtnl_link_stats64 *
+mlxsw_sp_port_get_stats64(struct net_device *dev,
+			  struct rtnl_link_stats64 *stats)
+{
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+
+	memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats));
+
 	return stats;
 }
 
@@ -942,8 +1056,8 @@
 	kfree(mlxsw_sp_vport);
 }
 
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
-			  u16 vid)
+static int mlxsw_sp_port_add_vid(struct net_device *dev,
+				 __be16 __always_unused proto, u16 vid)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp_port *mlxsw_sp_vport;
@@ -956,16 +1070,12 @@
 	if (!vid)
 		return 0;
 
-	if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) {
-		netdev_warn(dev, "VID=%d already configured\n", vid);
+	if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid))
 		return 0;
-	}
 
 	mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid);
-	if (!mlxsw_sp_vport) {
-		netdev_err(dev, "Failed to create vPort for VID=%d\n", vid);
+	if (!mlxsw_sp_vport)
 		return -ENOMEM;
-	}
 
 	/* When adding the first VLAN interface on a bridged port we need to
 	 * transition all the active 802.1Q bridge VLANs to use explicit
@@ -973,30 +1083,17 @@
 	 */
 	if (list_is_singular(&mlxsw_sp_port->vports_list)) {
 		err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port);
-		if (err) {
-			netdev_err(dev, "Failed to set to Virtual mode\n");
+		if (err)
 			goto err_port_vp_mode_trans;
-		}
-	}
-
-	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false);
-	if (err) {
-		netdev_err(dev, "Failed to disable learning for VID=%d\n", vid);
-		goto err_port_vid_learning_set;
 	}
 
 	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged);
-	if (err) {
-		netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
-			   vid);
+	if (err)
 		goto err_port_add_vid;
-	}
 
 	return 0;
 
 err_port_add_vid:
-	mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
-err_port_vid_learning_set:
 	if (list_is_singular(&mlxsw_sp_port->vports_list))
 		mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
 err_port_vp_mode_trans:
@@ -1010,7 +1107,6 @@
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp_port *mlxsw_sp_vport;
 	struct mlxsw_sp_fid *f;
-	int err;
 
 	/* VLAN 0 is removed from HW filter when device goes down, but
 	 * it is reserved in our case, so simply return.
@@ -1019,23 +1115,10 @@
 		return 0;
 
 	mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
-	if (!mlxsw_sp_vport) {
-		netdev_warn(dev, "VID=%d does not exist\n", vid);
+	if (WARN_ON(!mlxsw_sp_vport))
 		return 0;
-	}
 
-	err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
-	if (err) {
-		netdev_err(dev, "Failed to set VLAN membership for VID=%d\n",
-			   vid);
-		return err;
-	}
-
-	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true);
-	if (err) {
-		netdev_err(dev, "Failed to enable learning for VID=%d\n", vid);
-		return err;
-	}
+	mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false);
 
 	/* Drop FID reference. If this was the last reference the
 	 * resources will be freed.
@@ -1048,13 +1131,8 @@
 	 * transition all active 802.1Q bridge VLANs to use VID to FID
 	 * mappings and set port's mode to VLAN mode.
 	 */
-	if (list_is_singular(&mlxsw_sp_port->vports_list)) {
-		err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
-		if (err) {
-			netdev_err(dev, "Failed to set to VLAN mode\n");
-			return err;
-		}
-	}
+	if (list_is_singular(&mlxsw_sp_port->vports_list))
+		mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port);
 
 	mlxsw_sp_port_vport_destroy(mlxsw_sp_vport);
 
@@ -1149,6 +1227,7 @@
 					  bool ingress)
 {
 	const struct tc_action *a;
+	LIST_HEAD(actions);
 	int err;
 
 	if (!tc_single_action(cls->exts)) {
@@ -1156,7 +1235,8 @@
 		return -ENOTSUPP;
 	}
 
-	tc_for_each_action(a, cls->exts) {
+	tcf_exts_to_list(cls->exts, &actions);
+	list_for_each_entry(a, &actions, list) {
 		if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL))
 			return -ENOTSUPP;
 
@@ -1234,6 +1314,8 @@
 	.ndo_set_mac_address	= mlxsw_sp_port_set_mac_address,
 	.ndo_change_mtu		= mlxsw_sp_port_change_mtu,
 	.ndo_get_stats64	= mlxsw_sp_port_get_stats64,
+	.ndo_has_offload_stats	= mlxsw_sp_port_has_offload_stats,
+	.ndo_get_offload_stats	= mlxsw_sp_port_get_offload_stats,
 	.ndo_vlan_rx_add_vid	= mlxsw_sp_port_add_vid,
 	.ndo_vlan_rx_kill_vid	= mlxsw_sp_port_kill_vid,
 	.ndo_neigh_construct	= mlxsw_sp_router_neigh_construct,
@@ -1572,8 +1654,6 @@
 				      enum mlxsw_reg_ppcnt_grp grp, int prio,
 				      u64 *data, int data_index)
 {
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_port_hw_stats *hw_stats;
 	char ppcnt_pl[MLXSW_REG_PPCNT_LEN];
 	int i, len;
@@ -1582,10 +1662,9 @@
 	err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp);
 	if (err)
 		return;
-	mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl);
+	mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl);
 	for (i = 0; i < len; i++)
-		data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0;
+		data[data_index + i] = hw_stats[i].getter(ppcnt_pl);
 }
 
 static void mlxsw_sp_port_get_stats(struct net_device *dev,
@@ -1624,112 +1703,149 @@
 }
 
 struct mlxsw_sp_port_link_mode {
+	enum ethtool_link_mode_bit_indices mask_ethtool;
 	u32 mask;
-	u32 supported;
-	u32 advertised;
 	u32 speed;
 };
 
 static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = {
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100BASE_T,
-		.supported	= SUPPORTED_100baseT_Full,
-		.advertised	= ADVERTISED_100baseT_Full,
-		.speed		= 100,
-	},
-	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX,
-		.speed		= 100,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+		.speed		= SPEED_100,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_SGMII |
 				  MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX,
-		.supported	= SUPPORTED_1000baseKX_Full,
-		.advertised	= ADVERTISED_1000baseKX_Full,
-		.speed		= 1000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+		.speed		= SPEED_1000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T,
-		.supported	= SUPPORTED_10000baseT_Full,
-		.advertised	= ADVERTISED_10000baseT_Full,
-		.speed		= 10000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+		.speed		= SPEED_10000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 |
 				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4,
-		.supported	= SUPPORTED_10000baseKX4_Full,
-		.advertised	= ADVERTISED_10000baseKX4_Full,
-		.speed		= 10000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT,
+		.speed		= SPEED_10000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
 				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
 				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
 				  MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR,
-		.supported	= SUPPORTED_10000baseKR_Full,
-		.advertised	= ADVERTISED_10000baseKR_Full,
-		.speed		= 10000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+		.speed		= SPEED_10000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2,
-		.supported	= SUPPORTED_20000baseKR2_Full,
-		.advertised	= ADVERTISED_20000baseKR2_Full,
-		.speed		= 20000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT,
+		.speed		= SPEED_20000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4,
-		.supported	= SUPPORTED_40000baseCR4_Full,
-		.advertised	= ADVERTISED_40000baseCR4_Full,
-		.speed		= 40000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+		.speed		= SPEED_40000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4,
-		.supported	= SUPPORTED_40000baseKR4_Full,
-		.advertised	= ADVERTISED_40000baseKR4_Full,
-		.speed		= 40000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+		.speed		= SPEED_40000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4,
-		.supported	= SUPPORTED_40000baseSR4_Full,
-		.advertised	= ADVERTISED_40000baseSR4_Full,
-		.speed		= 40000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT,
+		.speed		= SPEED_40000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4,
-		.supported	= SUPPORTED_40000baseLR4_Full,
-		.advertised	= ADVERTISED_40000baseLR4_Full,
-		.speed		= 40000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT,
+		.speed		= SPEED_40000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR |
-				  MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR |
-				  MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
-		.speed		= 25000,
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+		.speed		= SPEED_25000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 |
-				  MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2,
-		.speed		= 50000,
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+		.speed		= SPEED_25000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+		.speed		= SPEED_25000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_25000baseSR_Full_BIT,
+		.speed		= SPEED_25000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT,
+		.speed		= SPEED_50000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT,
+		.speed		= SPEED_50000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+		.speed		= SPEED_50000,
 	},
 	{
 		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
-		.supported	= SUPPORTED_56000baseKR4_Full,
-		.advertised	= ADVERTISED_56000baseKR4_Full,
-		.speed		= 56000,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT,
+		.speed		= SPEED_56000,
 	},
 	{
-		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
-				  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
-		.speed		= 100000,
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT,
+		.speed		= SPEED_56000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT,
+		.speed		= SPEED_56000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT,
+		.speed		= SPEED_56000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+		.speed		= SPEED_100000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT,
+		.speed		= SPEED_100000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+		.speed		= SPEED_100000,
+	},
+	{
+		.mask		= MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
+		.mask_ethtool	= ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+		.speed		= SPEED_100000,
 	},
 };
 
 #define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode)
 
-static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto)
+static void
+mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto,
+				  struct ethtool_link_ksettings *cmd)
 {
 	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR |
 			      MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR |
@@ -1737,43 +1853,29 @@
 			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 |
 			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
 			      MLXSW_REG_PTYS_ETH_SPEED_SGMII))
-		return SUPPORTED_FIBRE;
+		ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
 
 	if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR |
 			      MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 |
 			      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 |
 			      MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
 			      MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX))
-		return SUPPORTED_Backplane;
-	return 0;
+		ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane);
 }
 
-static u32 mlxsw_sp_from_ptys_supported_link(u32 ptys_eth_proto)
+static void mlxsw_sp_from_ptys_link(u32 ptys_eth_proto, unsigned long *mode)
 {
-	u32 modes = 0;
 	int i;
 
 	for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
 		if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask)
-			modes |= mlxsw_sp_port_link_mode[i].supported;
+			__set_bit(mlxsw_sp_port_link_mode[i].mask_ethtool,
+				  mode);
 	}
-	return modes;
-}
-
-static u32 mlxsw_sp_from_ptys_advert_link(u32 ptys_eth_proto)
-{
-	u32 modes = 0;
-	int i;
-
-	for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-		if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask)
-			modes |= mlxsw_sp_port_link_mode[i].advertised;
-	}
-	return modes;
 }
 
 static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto,
-					    struct ethtool_cmd *cmd)
+					    struct ethtool_link_ksettings *cmd)
 {
 	u32 speed = SPEED_UNKNOWN;
 	u8 duplex = DUPLEX_UNKNOWN;
@@ -1790,8 +1892,8 @@
 		}
 	}
 out:
-	ethtool_cmd_speed_set(cmd, speed);
-	cmd->duplex = duplex;
+	cmd->base.speed = speed;
+	cmd->base.duplex = duplex;
 }
 
 static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto)
@@ -1816,49 +1918,15 @@
 	return PORT_OTHER;
 }
 
-static int mlxsw_sp_port_get_settings(struct net_device *dev,
-				      struct ethtool_cmd *cmd)
-{
-	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char ptys_pl[MLXSW_REG_PTYS_LEN];
-	u32 eth_proto_cap;
-	u32 eth_proto_admin;
-	u32 eth_proto_oper;
-	int err;
-
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
-	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
-	if (err) {
-		netdev_err(dev, "Failed to get proto");
-		return err;
-	}
-	mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap,
-			      &eth_proto_admin, &eth_proto_oper);
-
-	cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) |
-			 mlxsw_sp_from_ptys_supported_link(eth_proto_cap) |
-			 SUPPORTED_Pause | SUPPORTED_Asym_Pause |
-			 SUPPORTED_Autoneg;
-	cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin);
-	mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev),
-					eth_proto_oper, cmd);
-
-	eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
-	cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper);
-	cmd->lp_advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_oper);
-
-	cmd->transceiver = XCVR_INTERNAL;
-	return 0;
-}
-
-static u32 mlxsw_sp_to_ptys_advert_link(u32 advertising)
+static u32
+mlxsw_sp_to_ptys_advert_link(const struct ethtool_link_ksettings *cmd)
 {
 	u32 ptys_proto = 0;
 	int i;
 
 	for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) {
-		if (advertising & mlxsw_sp_port_link_mode[i].advertised)
+		if (test_bit(mlxsw_sp_port_link_mode[i].mask_ethtool,
+			     cmd->link_modes.advertising))
 			ptys_proto |= mlxsw_sp_port_link_mode[i].mask;
 	}
 	return ptys_proto;
@@ -1888,61 +1956,113 @@
 	return ptys_proto;
 }
 
-static int mlxsw_sp_port_set_settings(struct net_device *dev,
-				      struct ethtool_cmd *cmd)
+static void mlxsw_sp_port_get_link_supported(u32 eth_proto_cap,
+					     struct ethtool_link_ksettings *cmd)
+{
+	ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg);
+	ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+
+	mlxsw_sp_from_ptys_supported_port(eth_proto_cap, cmd);
+	mlxsw_sp_from_ptys_link(eth_proto_cap, cmd->link_modes.supported);
+}
+
+static void mlxsw_sp_port_get_link_advertise(u32 eth_proto_admin, bool autoneg,
+					     struct ethtool_link_ksettings *cmd)
+{
+	if (!autoneg)
+		return;
+
+	ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg);
+	mlxsw_sp_from_ptys_link(eth_proto_admin, cmd->link_modes.advertising);
+}
+
+static void
+mlxsw_sp_port_get_link_lp_advertise(u32 eth_proto_lp, u8 autoneg_status,
+				    struct ethtool_link_ksettings *cmd)
+{
+	if (autoneg_status != MLXSW_REG_PTYS_AN_STATUS_OK || !eth_proto_lp)
+		return;
+
+	ethtool_link_ksettings_add_link_mode(cmd, lp_advertising, Autoneg);
+	mlxsw_sp_from_ptys_link(eth_proto_lp, cmd->link_modes.lp_advertising);
+}
+
+static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
+					    struct ethtool_link_ksettings *cmd)
+{
+	u32 eth_proto_cap, eth_proto_admin, eth_proto_oper, eth_proto_lp;
+	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char ptys_pl[MLXSW_REG_PTYS_LEN];
+	u8 autoneg_status;
+	bool autoneg;
+	int err;
+
+	autoneg = mlxsw_sp_port->link.autoneg;
+	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
+	if (err)
+		return err;
+	mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
+			      &eth_proto_oper);
+
+	mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd);
+
+	mlxsw_sp_port_get_link_advertise(eth_proto_admin, autoneg, cmd);
+
+	eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl);
+	autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl);
+	mlxsw_sp_port_get_link_lp_advertise(eth_proto_lp, autoneg_status, cmd);
+
+	cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE;
+	cmd->base.port = mlxsw_sp_port_connector_port(eth_proto_oper);
+	mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper,
+					cmd);
+
+	return 0;
+}
+
+static int
+mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
+				 const struct ethtool_link_ksettings *cmd)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char ptys_pl[MLXSW_REG_PTYS_LEN];
-	u32 speed;
-	u32 eth_proto_new;
-	u32 eth_proto_cap;
-	u32 eth_proto_admin;
+	u32 eth_proto_cap, eth_proto_new;
+	bool autoneg;
 	int err;
 
-	speed = ethtool_cmd_speed(cmd);
-
-	eth_proto_new = cmd->autoneg == AUTONEG_ENABLE ?
-		mlxsw_sp_to_ptys_advert_link(cmd->advertising) :
-		mlxsw_sp_to_ptys_speed(speed);
-
 	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
-	if (err) {
-		netdev_err(dev, "Failed to get proto");
+	if (err)
 		return err;
-	}
-	mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin, NULL);
+	mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
+
+	autoneg = cmd->base.autoneg == AUTONEG_ENABLE;
+	eth_proto_new = autoneg ?
+		mlxsw_sp_to_ptys_advert_link(cmd) :
+		mlxsw_sp_to_ptys_speed(cmd->base.speed);
 
 	eth_proto_new = eth_proto_new & eth_proto_cap;
 	if (!eth_proto_new) {
-		netdev_err(dev, "Not supported proto admin requested");
+		netdev_err(dev, "No supported speed requested\n");
 		return -EINVAL;
 	}
-	if (eth_proto_new == eth_proto_admin)
-		return 0;
 
 	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
-	if (err) {
-		netdev_err(dev, "Failed to set proto admin");
+	if (err)
 		return err;
-	}
 
 	if (!netif_running(dev))
 		return 0;
 
-	err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
-	if (err) {
-		netdev_err(dev, "Failed to set admin status");
-		return err;
-	}
+	mlxsw_sp_port->link.autoneg = autoneg;
 
-	err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
-	if (err) {
-		netdev_err(dev, "Failed to set admin status");
-		return err;
-	}
+	mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false);
+	mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true);
 
 	return 0;
 }
@@ -1956,8 +2076,8 @@
 	.set_phys_id		= mlxsw_sp_port_set_phys_id,
 	.get_ethtool_stats	= mlxsw_sp_port_get_stats,
 	.get_sset_count		= mlxsw_sp_port_get_sset_count,
-	.get_settings		= mlxsw_sp_port_get_settings,
-	.set_settings		= mlxsw_sp_port_set_settings,
+	.get_link_ksettings	= mlxsw_sp_port_get_link_ksettings,
+	.set_link_ksettings	= mlxsw_sp_port_set_link_ksettings,
 };
 
 static int
@@ -2076,6 +2196,18 @@
 	return 0;
 }
 
+static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	mlxsw_sp_port->pvid = 1;
+
+	return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
+static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+}
+
 static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				bool split, u8 module, u8 width, u8 lane)
 {
@@ -2095,6 +2227,7 @@
 	mlxsw_sp_port->mapping.module = module;
 	mlxsw_sp_port->mapping.width = width;
 	mlxsw_sp_port->mapping.lane = lane;
+	mlxsw_sp_port->link.autoneg = 1;
 	bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE);
 	mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL);
 	if (!mlxsw_sp_port->active_vlans) {
@@ -2116,9 +2249,26 @@
 		goto err_alloc_stats;
 	}
 
+	mlxsw_sp_port->hw_stats.cache =
+		kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL);
+
+	if (!mlxsw_sp_port->hw_stats.cache) {
+		err = -ENOMEM;
+		goto err_alloc_hw_stats;
+	}
+	INIT_DELAYED_WORK(&mlxsw_sp_port->hw_stats.update_dw,
+			  &update_stats_cache);
+
 	dev->netdev_ops = &mlxsw_sp_port_netdev_ops;
 	dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops;
 
+	err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_swid_set;
+	}
+
 	err = mlxsw_sp_port_dev_addr_init(mlxsw_sp_port);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Unable to init port mac address\n",
@@ -2135,7 +2285,7 @@
 	/* Each packet needs to have a Tx header (metadata) on top all other
 	 * headers.
 	 */
-	dev->hard_header_len += MLXSW_TXHDR_LEN;
+	dev->needed_headroom = MLXSW_TXHDR_LEN;
 
 	err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port);
 	if (err) {
@@ -2144,13 +2294,6 @@
 		goto err_port_system_port_mapping_set;
 	}
 
-	err = mlxsw_sp_port_swid_set(mlxsw_sp_port, 0);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set SWID\n",
-			mlxsw_sp_port->local_port);
-		goto err_port_swid_set;
-	}
-
 	err = mlxsw_sp_port_speed_by_width_set(mlxsw_sp_port, width);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to enable speeds\n",
@@ -2191,7 +2334,15 @@
 		goto err_port_dcb_init;
 	}
 
+	err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n",
+			mlxsw_sp_port->local_port);
+		goto err_port_pvid_vport_create;
+	}
+
 	mlxsw_sp_port_switchdev_init(mlxsw_sp_port);
+	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n",
@@ -2208,27 +2359,29 @@
 		goto err_core_port_init;
 	}
 
-	err = mlxsw_sp_port_vlan_init(mlxsw_sp_port);
-	if (err)
-		goto err_port_vlan_init;
-
-	mlxsw_sp->ports[local_port] = mlxsw_sp_port;
+	mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0);
 	return 0;
 
-err_port_vlan_init:
-	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 err_core_port_init:
 	unregister_netdev(dev);
 err_register_netdev:
+	mlxsw_sp->ports[local_port] = NULL;
+	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+	mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+err_port_pvid_vport_create:
+	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
 err_port_dcb_init:
 err_port_ets_init:
 err_port_buffers_init:
 err_port_admin_status_set:
 err_port_mtu_set:
 err_port_speed_by_width_set:
-err_port_swid_set:
 err_port_system_port_mapping_set:
 err_dev_addr_init:
+	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+	kfree(mlxsw_sp_port->hw_stats.cache);
+err_alloc_hw_stats:
 	free_percpu(mlxsw_sp_port->pcpu_stats);
 err_alloc_stats:
 	kfree(mlxsw_sp_port->untagged_vlans);
@@ -2245,15 +2398,17 @@
 
 	if (!mlxsw_sp_port)
 		return;
-	mlxsw_sp->ports[local_port] = NULL;
+	cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw);
 	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
-	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
-	mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
+	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
+	mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port);
+	mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
 	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port);
 	free_percpu(mlxsw_sp_port->pcpu_stats);
+	kfree(mlxsw_sp_port->hw_stats.cache);
 	kfree(mlxsw_sp_port->untagged_vlans);
 	kfree(mlxsw_sp_port->active_vlans);
 	WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list));
@@ -2577,103 +2732,47 @@
 	netif_receive_skb(skb);
 }
 
+static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
+					   void *priv)
+{
+	skb->offload_fwd_mark = 1;
+	return mlxsw_sp_rx_listener_func(skb, local_port, priv);
+}
+
+#define MLXSW_SP_RXL(_func, _trap_id, _action)			\
+	{							\
+		.func = _func,					\
+		.local_port = MLXSW_PORT_DONT_CARE,		\
+		.trap_id = MLXSW_TRAP_ID_##_trap_id,		\
+		.action = MLXSW_REG_HPKT_ACTION_##_action,	\
+	}
+
 static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_FDB_MC,
-	},
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC, TRAP_TO_CPU),
 	/* Traps for specific L2 packet types, not trapped as FDB MC */
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_STP,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_LACP,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_EAPOL,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_LLDP,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_MMRP,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_MVRP,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_RPVST,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_DHCP,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_QUERY,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_ARPBC,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_ARPUC,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IP2ME,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_RTR_INGRESS0,
-	},
-	{
-		.func = mlxsw_sp_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4,
-	},
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, DHCP, MIRROR_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, IGMP_QUERY, MIRROR_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPBC, MIRROR_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPUC, MIRROR_TO_CPU),
+	/* L3 traps */
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, OSPF, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU),
+	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU),
 };
 
 static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
@@ -2700,7 +2799,7 @@
 		if (err)
 			goto err_rx_listener_register;
 
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
+		mlxsw_reg_hpkt_pack(hpkt_pl, mlxsw_sp_rx_listener[i].action,
 				    mlxsw_sp_rx_listener[i].trap_id);
 		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
 		if (err)
@@ -2788,7 +2887,9 @@
 
 static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
 {
+	struct mlxsw_resources *resources;
 	char slcr_pl[MLXSW_REG_SLCR_LEN];
+	int err;
 
 	mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
 				     MLXSW_REG_SLCR_LAG_HASH_DMAC |
@@ -2799,7 +2900,26 @@
 				     MLXSW_REG_SLCR_LAG_HASH_SPORT |
 				     MLXSW_REG_SLCR_LAG_HASH_DPORT |
 				     MLXSW_REG_SLCR_LAG_HASH_IPPROTO);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
+	if (err)
+		return err;
+
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	if (!(resources->max_lag_valid && resources->max_ports_in_lag_valid))
+		return -EIO;
+
+	mlxsw_sp->lags = kcalloc(resources->max_lag,
+				 sizeof(struct mlxsw_sp_upper),
+				 GFP_KERNEL);
+	if (!mlxsw_sp->lags)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	kfree(mlxsw_sp->lags);
 }
 
 static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
@@ -2883,6 +3003,7 @@
 err_router_init:
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
 err_switchdev_init:
+	mlxsw_sp_lag_fini(mlxsw_sp);
 err_lag_init:
 	mlxsw_sp_buffers_fini(mlxsw_sp);
 err_buffers_init:
@@ -2896,38 +3017,26 @@
 static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
-	int i;
 
 	mlxsw_sp_ports_remove(mlxsw_sp);
 	mlxsw_sp_span_fini(mlxsw_sp);
 	mlxsw_sp_router_fini(mlxsw_sp);
 	mlxsw_sp_switchdev_fini(mlxsw_sp);
+	mlxsw_sp_lag_fini(mlxsw_sp);
 	mlxsw_sp_buffers_fini(mlxsw_sp);
 	mlxsw_sp_traps_fini(mlxsw_sp);
 	mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
 	WARN_ON(!list_empty(&mlxsw_sp->vfids.list));
 	WARN_ON(!list_empty(&mlxsw_sp->fids));
-	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
-		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
 }
 
 static struct mlxsw_config_profile mlxsw_sp_config_profile = {
 	.used_max_vepa_channels		= 1,
 	.max_vepa_channels		= 0,
-	.used_max_lag			= 1,
-	.max_lag			= MLXSW_SP_LAG_MAX,
-	.used_max_port_per_lag		= 1,
-	.max_port_per_lag		= MLXSW_SP_PORT_PER_LAG_MAX,
 	.used_max_mid			= 1,
 	.max_mid			= MLXSW_SP_MID_MAX,
 	.used_max_pgt			= 1,
 	.max_pgt			= 0,
-	.used_max_system_port		= 1,
-	.max_system_port		= 64,
-	.used_max_vlan_groups		= 1,
-	.max_vlan_groups		= 127,
-	.used_max_regions		= 1,
-	.max_regions			= 400,
 	.used_flood_tables		= 1,
 	.used_flood_mode		= 1,
 	.flood_mode			= 3,
@@ -2939,10 +3048,11 @@
 	.max_ib_mc			= 0,
 	.used_max_pkey			= 1,
 	.max_pkey			= 0,
-	.used_kvd_sizes			= 1,
+	.used_kvd_split_data		= 1,
+	.kvd_hash_granularity		= MLXSW_SP_KVD_GRANULARITY,
+	.kvd_hash_single_parts		= 2,
+	.kvd_hash_double_parts		= 1,
 	.kvd_linear_size		= MLXSW_SP_KVD_LINEAR_SIZE,
-	.kvd_hash_single_size		= MLXSW_SP_KVD_HASH_SINGLE_SIZE,
-	.kvd_hash_double_size		= MLXSW_SP_KVD_HASH_DOUBLE_SIZE,
 	.swid_config			= {
 		{
 			.used_type	= 1,
@@ -3059,13 +3169,15 @@
 
 static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
 {
+	struct mlxsw_resources *resources;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_rif; i++)
 		if (!mlxsw_sp->rifs[i])
 			return i;
 
-	return MLXSW_SP_RIF_MAX;
+	return MLXSW_SP_INVALID_RIF;
 }
 
 static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
@@ -3145,7 +3257,7 @@
 	int err;
 
 	rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
-	if (rif == MLXSW_SP_RIF_MAX)
+	if (rif == MLXSW_SP_INVALID_RIF)
 		return ERR_PTR(-ERANGE);
 
 	err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true);
@@ -3311,6 +3423,39 @@
 	return mlxsw_sp_fid_find(mlxsw_sp, fid);
 }
 
+static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
+{
+	return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
+	       MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
+}
+
+static u16 mlxsw_sp_flood_table_index_get(u16 fid)
+{
+	return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
+}
+
+static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
+					  bool set)
+{
+	enum mlxsw_flood_table_type table_type;
+	char *sftr_pl;
+	u16 index;
+	int err;
+
+	sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
+	if (!sftr_pl)
+		return -ENOMEM;
+
+	table_type = mlxsw_sp_flood_table_type_get(fid);
+	index = mlxsw_sp_flood_table_index_get(fid);
+	mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, index, table_type,
+			    1, MLXSW_PORT_ROUTER_PORT, set);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
+
+	kfree(sftr_pl);
+	return err;
+}
+
 static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
 {
 	if (mlxsw_sp_fid_is_vfid(fid))
@@ -3344,12 +3489,16 @@
 	int err;
 
 	rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
-	if (rif == MLXSW_SP_RIF_MAX)
+	if (rif == MLXSW_SP_INVALID_RIF)
 		return -ERANGE;
 
+	err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
+	if (err)
+		return err;
+
 	err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
 	if (err)
-		return err;
+		goto err_rif_bridge_op;
 
 	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
 	if (err)
@@ -3372,6 +3521,8 @@
 	mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
 err_rif_fdb_op:
 	mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
+err_rif_bridge_op:
+	mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
 	return err;
 }
 
@@ -3391,6 +3542,8 @@
 
 	mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
 
+	mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
+
 	netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif);
 }
 
@@ -3543,12 +3696,14 @@
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
 	u16 lag_id = mlxsw_sp_port->lag_id;
+	struct mlxsw_resources *resources;
 	int i, count = 0;
 
 	if (!mlxsw_sp_port->lagged)
 		return true;
 
-	for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_ports_in_lag; i++) {
 		struct mlxsw_sp_port *lag_port;
 
 		lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
@@ -3754,11 +3909,13 @@
 				  struct net_device *lag_dev,
 				  u16 *p_lag_id)
 {
+	struct mlxsw_resources *resources;
 	struct mlxsw_sp_upper *lag;
 	int free_lag_id = -1;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_LAG_MAX; i++) {
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_lag; i++) {
 		lag = mlxsw_sp_lag_get(mlxsw_sp, i);
 		if (lag->ref_count) {
 			if (lag->dev == lag_dev) {
@@ -3792,9 +3949,11 @@
 static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
 				       u16 lag_id, u8 *p_port_index)
 {
+	struct mlxsw_resources *resources;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_ports_in_lag; i++) {
 		if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) {
 			*p_port_index = i;
 			return 0;
@@ -4487,18 +4646,26 @@
 	.priority = 10,	/* Must be called before FIB notifier block */
 };
 
+static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
+	.notifier_call = mlxsw_sp_router_netevent_event,
+};
+
 static int __init mlxsw_sp_module_init(void)
 {
 	int err;
 
 	register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 	register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+	register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+
 	err = mlxsw_core_driver_register(&mlxsw_sp_driver);
 	if (err)
 		goto err_core_driver_register;
 	return 0;
 
 err_core_driver_register:
+	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
+	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 	return err;
 }
@@ -4506,6 +4673,7 @@
 static void __exit mlxsw_sp_module_exit(void)
 {
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
+	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
 	unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f69aa37..9b22863 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -45,7 +45,7 @@
 #include <linux/list.h>
 #include <linux/dcbnl.h>
 #include <linux/in6.h>
-#include <net/switchdev.h>
+#include <linux/notifier.h>
 
 #include "port.h"
 #include "core.h"
@@ -54,10 +54,7 @@
 #define MLXSW_SP_VFID_MAX 6656	/* Bridged VLAN interfaces */
 
 #define MLXSW_SP_RFID_BASE 15360
-#define MLXSW_SP_RIF_MAX 800
-
-#define MLXSW_SP_LAG_MAX 64
-#define MLXSW_SP_PORT_PER_LAG_MAX 16
+#define MLXSW_SP_INVALID_RIF 0xffff
 
 #define MLXSW_SP_MID_MAX 7000
 
@@ -67,8 +64,6 @@
 #define MLXSW_SP_LPM_TREE_MAX 22
 #define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN)
 
-#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256
-
 #define MLXSW_SP_PORT_BASE_SPEED 25000	/* Mb/s */
 
 #define MLXSW_SP_BYTES_PER_CELL 96
@@ -77,8 +72,7 @@
 #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL)
 
 #define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
-#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */
-#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */
+#define MLXSW_SP_KVD_GRANULARITY 128
 
 /* Maximum delay buffer needed in case of PAUSE frames, in cells.
  * Assumes 100m cable and maximum MTU.
@@ -253,7 +247,7 @@
 
 struct mlxsw_sp_router {
 	struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
-	struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX];
+	struct mlxsw_sp_vr *vrs;
 	struct rhashtable neigh_ht;
 	struct {
 		struct delayed_work dw;
@@ -263,6 +257,7 @@
 #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
 	struct list_head nexthop_group_list;
 	struct list_head nexthop_neighs_list;
+	bool aborted;
 };
 
 struct mlxsw_sp {
@@ -275,7 +270,7 @@
 		DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX);
 	} br_mids;
 	struct list_head fids;	/* VLAN-aware bridge FIDs */
-	struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX];
+	struct mlxsw_sp_rif **rifs;
 	struct mlxsw_sp_port **ports;
 	struct mlxsw_core *core;
 	const struct mlxsw_bus_info *bus_info;
@@ -290,7 +285,7 @@
 #define MLXSW_SP_DEFAULT_AGEING_TIME 300
 	u32 ageing_time;
 	struct mlxsw_sp_upper master_bridge;
-	struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
+	struct mlxsw_sp_upper *lags;
 	u8 port_to_module[MLXSW_PORT_MAX_PORTS];
 	struct mlxsw_sp_sb sb;
 	struct mlxsw_sp_router router;
@@ -302,6 +297,7 @@
 		struct mlxsw_sp_span_entry *entries;
 		int entries_count;
 	} span;
+	struct notifier_block fib_nb;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -341,7 +337,8 @@
 	} vport;
 	struct {
 		u8 tx_pause:1,
-		   rx_pause:1;
+		   rx_pause:1,
+		   autoneg:1;
 	} link;
 	struct {
 		struct ieee_ets *ets;
@@ -360,6 +357,11 @@
 	struct list_head vports_list;
 	/* TC handles */
 	struct list_head mall_tc_list;
+	struct {
+		#define MLXSW_HW_STATS_UPDATE_TIME HZ
+		struct rtnl_link_stats64 *cache;
+		struct delayed_work update_dw;
+	} hw_stats;
 };
 
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev);
@@ -477,9 +479,12 @@
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
 			 const struct net_device *dev)
 {
+	struct mlxsw_resources *resources;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+
+	for (i = 0; i < resources->max_rif; i++)
 		if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
 			return mlxsw_sp->rifs[i];
 
@@ -536,8 +541,6 @@
 				 u16 vid);
 int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin,
 			   u16 vid_end, bool is_member, bool untagged);
-int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto,
-			  u16 vid);
 int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid,
 			     bool set);
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port);
@@ -560,6 +563,9 @@
 int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				  enum mlxsw_reg_qeec_hr hr, u8 index,
 				  u8 next_index, u32 maxrate);
+int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				     u16 vid_begin, u16 vid_end,
+				     bool learn_enable);
 
 #ifdef CONFIG_MLXSW_SPECTRUM_DCB
 
@@ -580,15 +586,12 @@
 
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
-int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			     const struct switchdev_obj_ipv4_fib *fib4,
-			     struct switchdev_trans *trans);
-int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
-			     const struct switchdev_obj_ipv4_fib *fib4);
 int mlxsw_sp_router_neigh_construct(struct net_device *dev,
 				    struct neighbour *n);
 void mlxsw_sp_router_neigh_destroy(struct net_device *dev,
 				   struct neighbour *n);
+int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
+				   unsigned long event, void *ptr);
 
 int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count);
 void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 074cdda..bcaed8a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -330,7 +330,7 @@
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
-	MLXSW_SP_CPU_PORT_SB_CM,
+	MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0),
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
 	MLXSW_SP_CPU_PORT_SB_CM,
@@ -595,9 +595,9 @@
 	enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
 	struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
 
-	pool_info->pool_type = dir;
+	pool_info->pool_type = (enum devlink_sb_pool_type) dir;
 	pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size);
-	pool_info->threshold_type = pr->mode;
+	pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode;
 	return 0;
 }
 
@@ -608,9 +608,10 @@
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	u8 pool = pool_get(pool_index);
 	enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
-	enum mlxsw_reg_sbpr_mode mode = threshold_type;
 	u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size);
+	enum mlxsw_reg_sbpr_mode mode;
 
+	mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
 	return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size);
 }
 
@@ -696,13 +697,13 @@
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
 	u8 pg_buff = tc_index;
-	enum mlxsw_reg_sbxx_dir dir = pool_type;
+	enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
 	struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port,
 						       pg_buff, dir);
 
 	*p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir,
 						 cm->max_buff);
-	*p_pool_index = pool_index_get(cm->pool, pool_type);
+	*p_pool_index = pool_index_get(cm->pool, dir);
 	return 0;
 }
 
@@ -716,23 +717,19 @@
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
 	u8 pg_buff = tc_index;
-	enum mlxsw_reg_sbxx_dir dir = pool_type;
-	u8 pool = pool_index;
+	enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
+	u8 pool = pool_get(pool_index);
 	u32 max_buff;
 	int err;
 
+	if (dir != dir_get(pool_index))
+		return -EINVAL;
+
 	err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
 				       threshold, &max_buff);
 	if (err)
 		return err;
 
-	if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS) {
-		if (pool < MLXSW_SP_SB_POOL_COUNT)
-			return -EINVAL;
-		pool -= MLXSW_SP_SB_POOL_COUNT;
-	} else if (pool >= MLXSW_SP_SB_POOL_COUNT) {
-		return -EINVAL;
-	}
 	return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir,
 				    0, max_buff, pool);
 }
@@ -947,7 +944,7 @@
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
 	u8 pg_buff = tc_index;
-	enum mlxsw_reg_sbxx_dir dir = pool_type;
+	enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
 	struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port,
 						       pg_buff, dir);
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 01cfb75..b6ed7f7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -341,6 +341,8 @@
 	char pfcc_pl[MLXSW_REG_PFCC_LEN];
 
 	mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port);
+	mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause);
+	mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause);
 	mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en);
 
 	return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc),
@@ -351,17 +353,17 @@
 				      struct ieee_pfc *pfc)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
+	bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port);
 	int err;
 
-	if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) &&
-	    pfc->pfc_en) {
+	if (pause_en && pfc->pfc_en) {
 		netdev_err(dev, "PAUSE frames already enabled on port\n");
 		return -EINVAL;
 	}
 
 	err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
 					   mlxsw_sp_port->dcb.ets->prio_tc,
-					   false, pfc);
+					   pause_en, pfc);
 	if (err) {
 		netdev_err(dev, "Failed to configure port's headroom for PFC\n");
 		return err;
@@ -380,7 +382,7 @@
 
 err_port_pfc_set:
 	__mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu,
-				     mlxsw_sp_port->dcb.ets->prio_tc, false,
+				     mlxsw_sp_port->dcb.ets->prio_tc, pause_en,
 				     mlxsw_sp_port->dcb.pfc);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 81418d6..78fc557d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -43,6 +43,7 @@
 #include <net/netevent.h>
 #include <net/neighbour.h>
 #include <net/arp.h>
+#include <net/ip_fib.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -107,6 +108,7 @@
 }
 
 struct mlxsw_sp_fib_key {
+	struct net_device *dev;
 	unsigned char addr[sizeof(struct in6_addr)];
 	unsigned char prefix_len;
 };
@@ -121,17 +123,20 @@
 
 struct mlxsw_sp_fib_entry {
 	struct rhash_head ht_node;
+	struct list_head list;
 	struct mlxsw_sp_fib_key key;
 	enum mlxsw_sp_fib_entry_type type;
-	u8 added:1;
+	unsigned int ref_count;
 	u16 rif; /* used for action local */
 	struct mlxsw_sp_vr *vr;
+	struct fib_info *fi;
 	struct list_head nexthop_group_node;
 	struct mlxsw_sp_nexthop_group *nh_group;
 };
 
 struct mlxsw_sp_fib {
 	struct rhashtable ht;
+	struct list_head entry_list;
 	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
 	struct mlxsw_sp_prefix_usage prefix_usage;
 };
@@ -153,6 +158,7 @@
 				     mlxsw_sp_fib_ht_params);
 	if (err)
 		return err;
+	list_add_tail(&fib_entry->list, &fib->entry_list);
 	if (fib->prefix_ref_count[prefix_len]++ == 0)
 		mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
 	return 0;
@@ -165,19 +171,22 @@
 
 	if (--fib->prefix_ref_count[prefix_len] == 0)
 		mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
+	list_del(&fib_entry->list);
 	rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node,
 			       mlxsw_sp_fib_ht_params);
 }
 
 static struct mlxsw_sp_fib_entry *
 mlxsw_sp_fib_entry_create(struct mlxsw_sp_fib *fib, const void *addr,
-			  size_t addr_len, unsigned char prefix_len)
+			  size_t addr_len, unsigned char prefix_len,
+			  struct net_device *dev)
 {
 	struct mlxsw_sp_fib_entry *fib_entry;
 
 	fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
 	if (!fib_entry)
 		return NULL;
+	fib_entry->key.dev = dev;
 	memcpy(fib_entry->key.addr, addr, addr_len);
 	fib_entry->key.prefix_len = prefix_len;
 	return fib_entry;
@@ -190,10 +199,13 @@
 
 static struct mlxsw_sp_fib_entry *
 mlxsw_sp_fib_entry_lookup(struct mlxsw_sp_fib *fib, const void *addr,
-			  size_t addr_len, unsigned char prefix_len)
+			  size_t addr_len, unsigned char prefix_len,
+			  struct net_device *dev)
 {
-	struct mlxsw_sp_fib_key key = {{ 0 } };
+	struct mlxsw_sp_fib_key key;
 
+	memset(&key, 0, sizeof(key));
+	key.dev = dev;
 	memcpy(key.addr, addr, addr_len);
 	key.prefix_len = prefix_len;
 	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
@@ -210,6 +222,7 @@
 	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
 	if (err)
 		goto err_rhashtable_init;
+	INIT_LIST_HEAD(&fib->entry_list);
 	return fib;
 
 err_rhashtable_init:
@@ -246,7 +259,9 @@
 {
 	char ralta_pl[MLXSW_REG_RALTA_LEN];
 
-	mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id);
+	mlxsw_reg_ralta_pack(ralta_pl, true,
+			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
+			     lpm_tree->id);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 }
 
@@ -255,7 +270,9 @@
 {
 	char ralta_pl[MLXSW_REG_RALTA_LEN];
 
-	mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id);
+	mlxsw_reg_ralta_pack(ralta_pl, false,
+			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
+			     lpm_tree->id);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 }
 
@@ -362,10 +379,12 @@
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
 {
+	struct mlxsw_resources *resources;
 	struct mlxsw_sp_vr *vr;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_virtual_routers; i++) {
 		vr = &mlxsw_sp->router.vrs[i];
 		if (!vr->used)
 			return vr;
@@ -378,7 +397,9 @@
 {
 	char raltb_pl[MLXSW_REG_RALTB_LEN];
 
-	mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id);
+	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
+			     (enum mlxsw_reg_ralxx_protocol) vr->proto,
+			     vr->lpm_tree->id);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 }
 
@@ -388,7 +409,8 @@
 	char raltb_pl[MLXSW_REG_RALTB_LEN];
 
 	/* Bind to tree 0 which is default */
-	mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0);
+	mlxsw_reg_raltb_pack(raltb_pl, vr->id,
+			     (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 }
 
@@ -404,11 +426,14 @@
 					    u32 tb_id,
 					    enum mlxsw_sp_l3proto proto)
 {
+	struct mlxsw_resources *resources;
 	struct mlxsw_sp_vr *vr;
 	int i;
 
 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
-	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_virtual_routers; i++) {
 		vr = &mlxsw_sp->router.vrs[i];
 		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
 			return vr;
@@ -542,15 +567,33 @@
 					   &vr->fib->prefix_usage);
 }
 
-static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
 {
+	struct mlxsw_resources *resources;
 	struct mlxsw_sp_vr *vr;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	if (!resources->max_virtual_routers_valid)
+		return -EIO;
+
+	mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers,
+				       sizeof(struct mlxsw_sp_vr),
+				       GFP_KERNEL);
+	if (!mlxsw_sp->router.vrs)
+		return -ENOMEM;
+
+	for (i = 0; i < resources->max_virtual_routers; i++) {
 		vr = &mlxsw_sp->router.vrs[i];
 		vr->id = i;
 	}
+
+	return 0;
+}
+
+static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	kfree(mlxsw_sp->router.vrs);
 }
 
 struct mlxsw_sp_neigh_key {
@@ -657,7 +700,7 @@
 		return 0;
 	}
 
-	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+	r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
 	if (WARN_ON(!r))
 		return -EINVAL;
 
@@ -938,8 +981,8 @@
 	mlxsw_sp_port_dev_put(mlxsw_sp_port);
 }
 
-static int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
-					  unsigned long event, void *ptr)
+int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
+				   unsigned long event, void *ptr)
 {
 	struct mlxsw_sp_neigh_entry *neigh_entry;
 	struct mlxsw_sp_port *mlxsw_sp_port;
@@ -1009,10 +1052,6 @@
 	return NOTIFY_DONE;
 }
 
-static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
-	.notifier_call = mlxsw_sp_router_netevent_event,
-};
-
 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
 {
 	int err;
@@ -1027,10 +1066,6 @@
 	 */
 	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
 
-	err = register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
-	if (err)
-		goto err_register_netevent_notifier;
-
 	/* Create the delayed works for the activity_update */
 	INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
 			  mlxsw_sp_router_neighs_update_work);
@@ -1039,17 +1074,12 @@
 	mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
 	mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
 	return 0;
-
-err_register_netevent_notifier:
-	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
-	return err;
 }
 
 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
 	cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
-	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 	rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
 }
 
@@ -1088,9 +1118,10 @@
 {
 	char raleu_pl[MLXSW_REG_RALEU_LEN];
 
-	mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id,
-			     adj_index, ecmp_size,
-			     new_adj_index, new_ecmp_size);
+	mlxsw_reg_raleu_pack(raleu_pl,
+			     (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
+			     adj_index, ecmp_size, new_adj_index,
+			     new_ecmp_size);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
 }
 
@@ -1496,43 +1527,6 @@
 	mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
 }
 
-static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
-{
-	char rgcr_pl[MLXSW_REG_RGCR_LEN];
-
-	mlxsw_reg_rgcr_pack(rgcr_pl, true);
-	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
-}
-
-static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
-{
-	char rgcr_pl[MLXSW_REG_RGCR_LEN];
-
-	mlxsw_reg_rgcr_pack(rgcr_pl, false);
-	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
-}
-
-int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
-{
-	int err;
-
-	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
-	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
-	err = __mlxsw_sp_router_init(mlxsw_sp);
-	if (err)
-		return err;
-	mlxsw_sp_lpm_init(mlxsw_sp);
-	mlxsw_sp_vrs_init(mlxsw_sp);
-	return mlxsw_sp_neigh_init(mlxsw_sp);
-}
-
-void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
-{
-	mlxsw_sp_neigh_fini(mlxsw_sp);
-	__mlxsw_sp_router_fini(mlxsw_sp);
-}
-
 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
 					 struct mlxsw_sp_fib_entry *fib_entry,
 					 enum mlxsw_reg_ralue_op op)
@@ -1558,8 +1552,9 @@
 		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
 	}
 
-	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
-			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_pack4(ralue_pl,
+			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
+			      vr->id, fib_entry->key.prefix_len, *p_dip);
 	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
 					adjacency_index, ecmp_size);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
@@ -1573,8 +1568,9 @@
 	u32 *p_dip = (u32 *) fib_entry->key.addr;
 	struct mlxsw_sp_vr *vr = fib_entry->vr;
 
-	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
-			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_pack4(ralue_pl,
+			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
+			      vr->id, fib_entry->key.prefix_len, *p_dip);
 	mlxsw_reg_ralue_act_local_pack(ralue_pl,
 				       MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0,
 				       fib_entry->rif);
@@ -1589,8 +1585,9 @@
 	u32 *p_dip = (u32 *) fib_entry->key.addr;
 	struct mlxsw_sp_vr *vr = fib_entry->vr;
 
-	mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id,
-			      fib_entry->key.prefix_len, *p_dip);
+	mlxsw_reg_ralue_pack4(ralue_pl,
+			      (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
+			      vr->id, fib_entry->key.prefix_len, *p_dip);
 	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
 }
@@ -1626,11 +1623,8 @@
 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_fib_entry *fib_entry)
 {
-	enum mlxsw_reg_ralue_op op;
-
-	op = !fib_entry->added ? MLXSW_REG_RALUE_OP_WRITE_WRITE :
-				 MLXSW_REG_RALUE_OP_WRITE_UPDATE;
-	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
+	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
+				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
 }
 
 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
@@ -1640,128 +1634,180 @@
 				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
 }
 
-struct mlxsw_sp_router_fib4_add_info {
-	struct switchdev_trans_item tritem;
-	struct mlxsw_sp *mlxsw_sp;
-	struct mlxsw_sp_fib_entry *fib_entry;
-};
-
-static void mlxsw_sp_router_fib4_add_info_destroy(void const *data)
-{
-	const struct mlxsw_sp_router_fib4_add_info *info = data;
-	struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry;
-	struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp;
-
-	mlxsw_sp_fib_entry_destroy(fib_entry);
-	mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr);
-	kfree(info);
-}
-
 static int
 mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp,
-				const struct switchdev_obj_ipv4_fib *fib4,
+				const struct fib_entry_notifier_info *fen_info,
 				struct mlxsw_sp_fib_entry *fib_entry)
 {
-	struct fib_info *fi = fib4->fi;
+	struct fib_info *fi = fen_info->fi;
+	struct mlxsw_sp_rif *r = NULL;
+	int nhsel;
+	int err;
 
-	if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) {
+	if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) {
 		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
 		return 0;
 	}
-	if (fib4->type != RTN_UNICAST)
+	if (fen_info->type != RTN_UNICAST)
 		return -EINVAL;
 
-	if (fi->fib_scope != RT_SCOPE_UNIVERSE) {
-		struct mlxsw_sp_rif *r;
+	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+		const struct fib_nh *nh = &fi->fib_nh[nhsel];
 
-		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
-		r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev);
-		if (!r)
-			return -EINVAL;
-		fib_entry->rif = r->rif;
+		if (!nh->nh_dev)
+			continue;
+		r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev);
+		if (!r) {
+			/* In case router interface is not found for
+			 * at least one of the nexthops, that means
+			 * the nexthop points to some device unrelated
+			 * to us. Set trap and pass the packets for
+			 * this prefix to kernel.
+			 */
+			break;
+		}
+	}
+
+	if (!r) {
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
 		return 0;
 	}
-	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
-	return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
+
+	if (fi->fib_scope != RT_SCOPE_UNIVERSE) {
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
+		fib_entry->rif = r->rif;
+	} else {
+		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
+		err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi);
+		if (err)
+			return err;
+	}
+	fib_info_offload_inc(fen_info->fi);
+	return 0;
 }
 
 static void
 mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_fib_entry *fib_entry)
 {
-	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
-		return;
-	mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
+	if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
+		fib_info_offload_dec(fib_entry->fi);
+	if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE)
+		mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
 }
 
-static int
-mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port,
-				 const struct switchdev_obj_ipv4_fib *fib4,
-				 struct switchdev_trans *trans)
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp,
+		       const struct fib_entry_notifier_info *fen_info)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	struct mlxsw_sp_router_fib4_add_info *info;
 	struct mlxsw_sp_fib_entry *fib_entry;
+	struct fib_info *fi = fen_info->fi;
 	struct mlxsw_sp_vr *vr;
 	int err;
 
-	vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id,
+	vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
 			     MLXSW_SP_L3_PROTO_IPV4);
 	if (IS_ERR(vr))
-		return PTR_ERR(vr);
+		return ERR_CAST(vr);
 
-	fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst,
-					      sizeof(fib4->dst), fib4->dst_len);
+	fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
+					      sizeof(fen_info->dst),
+					      fen_info->dst_len, fi->fib_dev);
+	if (fib_entry) {
+		/* Already exists, just take a reference */
+		fib_entry->ref_count++;
+		return fib_entry;
+	}
+	fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst,
+					      sizeof(fen_info->dst),
+					      fen_info->dst_len, fi->fib_dev);
 	if (!fib_entry) {
 		err = -ENOMEM;
 		goto err_fib_entry_create;
 	}
 	fib_entry->vr = vr;
+	fib_entry->fi = fi;
+	fib_entry->ref_count = 1;
 
-	err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry);
+	err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry);
 	if (err)
 		goto err_fib4_entry_init;
 
-	info = kmalloc(sizeof(*info), GFP_KERNEL);
-	if (!info) {
-		err = -ENOMEM;
-		goto err_alloc_info;
-	}
-	info->mlxsw_sp = mlxsw_sp;
-	info->fib_entry = fib_entry;
-	switchdev_trans_item_enqueue(trans, info,
-				     mlxsw_sp_router_fib4_add_info_destroy,
-				     &info->tritem);
-	return 0;
+	return fib_entry;
 
-err_alloc_info:
-	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
 err_fib4_entry_init:
 	mlxsw_sp_fib_entry_destroy(fib_entry);
 err_fib_entry_create:
 	mlxsw_sp_vr_put(mlxsw_sp, vr);
-	return err;
+
+	return ERR_PTR(err);
 }
 
-static int
-mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port,
-				const struct switchdev_obj_ipv4_fib *fib4,
-				struct switchdev_trans *trans)
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp,
+			const struct fib_entry_notifier_info *fen_info)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	struct mlxsw_sp_router_fib4_add_info *info;
+	struct mlxsw_sp_vr *vr;
+
+	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id,
+			      MLXSW_SP_L3_PROTO_IPV4);
+	if (!vr)
+		return NULL;
+
+	return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst,
+					 sizeof(fen_info->dst),
+					 fen_info->dst_len,
+					 fen_info->fi->fib_dev);
+}
+
+static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp,
+				   struct mlxsw_sp_fib_entry *fib_entry)
+{
+	struct mlxsw_sp_vr *vr = fib_entry->vr;
+
+	if (--fib_entry->ref_count == 0) {
+		mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
+		mlxsw_sp_fib_entry_destroy(fib_entry);
+	}
+	mlxsw_sp_vr_put(mlxsw_sp, vr);
+}
+
+static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp,
+				       struct mlxsw_sp_fib_entry *fib_entry)
+{
+	unsigned int last_ref_count;
+
+	do {
+		last_ref_count = fib_entry->ref_count;
+		mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
+	} while (last_ref_count != 1);
+}
+
+static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
+				    struct fib_entry_notifier_info *fen_info)
+{
 	struct mlxsw_sp_fib_entry *fib_entry;
 	struct mlxsw_sp_vr *vr;
 	int err;
 
-	info = switchdev_trans_item_dequeue(trans);
-	fib_entry = info->fib_entry;
-	kfree(info);
+	if (mlxsw_sp->router.aborted)
+		return 0;
+
+	fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info);
+	if (IS_ERR(fib_entry)) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n");
+		return PTR_ERR(fib_entry);
+	}
+
+	if (fib_entry->ref_count != 1)
+		return 0;
 
 	vr = fib_entry->vr;
-	err = mlxsw_sp_fib_entry_insert(fib_entry->vr->fib, fib_entry);
-	if (err)
+	err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry);
+	if (err) {
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n");
 		goto err_fib_entry_insert;
+	}
 	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
 	if (err)
 		goto err_fib_entry_add;
@@ -1770,45 +1816,198 @@
 err_fib_entry_add:
 	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
 err_fib_entry_insert:
-	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
-	mlxsw_sp_fib_entry_destroy(fib_entry);
-	mlxsw_sp_vr_put(mlxsw_sp, vr);
+	mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
 	return err;
 }
 
-int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			     const struct switchdev_obj_ipv4_fib *fib4,
-			     struct switchdev_trans *trans)
+static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
+				    struct fib_entry_notifier_info *fen_info)
 {
-	if (switchdev_trans_ph_prepare(trans))
-		return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port,
-							fib4, trans);
-	return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port,
-					       fib4, trans);
-}
-
-int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port,
-			     const struct switchdev_obj_ipv4_fib *fib4)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_fib_entry *fib_entry;
-	struct mlxsw_sp_vr *vr;
 
-	vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4);
-	if (!vr) {
-		dev_warn(mlxsw_sp->bus_info->dev, "Failed to find virtual router for FIB4 entry being removed.\n");
-		return -ENOENT;
-	}
-	fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst,
-					      sizeof(fib4->dst), fib4->dst_len);
+	if (mlxsw_sp->router.aborted)
+		return 0;
+
+	fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info);
 	if (!fib_entry) {
 		dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n");
 		return -ENOENT;
 	}
-	mlxsw_sp_fib_entry_del(mlxsw_sp_port->mlxsw_sp, fib_entry);
-	mlxsw_sp_fib_entry_remove(vr->fib, fib_entry);
-	mlxsw_sp_router_fib4_entry_fini(mlxsw_sp, fib_entry);
-	mlxsw_sp_fib_entry_destroy(fib_entry);
-	mlxsw_sp_vr_put(mlxsw_sp, vr);
+
+	if (fib_entry->ref_count == 1) {
+		mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
+		mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, fib_entry);
+	}
+
+	mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry);
 	return 0;
 }
+
+static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
+{
+	char ralta_pl[MLXSW_REG_RALTA_LEN];
+	char ralst_pl[MLXSW_REG_RALST_LEN];
+	char raltb_pl[MLXSW_REG_RALTB_LEN];
+	char ralue_pl[MLXSW_REG_RALUE_LEN];
+	int err;
+
+	mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
+			     MLXSW_SP_LPM_TREE_MIN);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 0);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
+	if (err)
+		return err;
+
+	mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
+			      MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
+	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
+static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_resources *resources;
+	struct mlxsw_sp_fib_entry *fib_entry;
+	struct mlxsw_sp_fib_entry *tmp;
+	struct mlxsw_sp_vr *vr;
+	int i;
+	int err;
+
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_virtual_routers; i++) {
+		vr = &mlxsw_sp->router.vrs[i];
+		if (!vr->used)
+			continue;
+
+		list_for_each_entry_safe(fib_entry, tmp,
+					 &vr->fib->entry_list, list) {
+			bool do_break = &tmp->list == &vr->fib->entry_list;
+
+			mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
+			mlxsw_sp_fib_entry_remove(fib_entry->vr->fib,
+						  fib_entry);
+			mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry);
+			if (do_break)
+				break;
+		}
+	}
+	mlxsw_sp->router.aborted = true;
+	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
+	if (err)
+		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
+}
+
+static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_resources *resources;
+	char rgcr_pl[MLXSW_REG_RGCR_LEN];
+	int err;
+
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	if (!resources->max_rif_valid)
+		return -EIO;
+
+	mlxsw_sp->rifs = kcalloc(resources->max_rif,
+				 sizeof(struct mlxsw_sp_rif *), GFP_KERNEL);
+	if (!mlxsw_sp->rifs)
+		return -ENOMEM;
+
+	mlxsw_reg_rgcr_pack(rgcr_pl, true);
+	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif);
+	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+	if (err)
+		goto err_rgcr_fail;
+
+	return 0;
+
+err_rgcr_fail:
+	kfree(mlxsw_sp->rifs);
+	return err;
+}
+
+static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct mlxsw_resources *resources;
+	char rgcr_pl[MLXSW_REG_RGCR_LEN];
+	int i;
+
+	mlxsw_reg_rgcr_pack(rgcr_pl, false);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_rif; i++)
+		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
+
+	kfree(mlxsw_sp->rifs);
+}
+
+static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
+				     unsigned long event, void *ptr)
+{
+	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
+	struct fib_entry_notifier_info *fen_info = ptr;
+	int err;
+
+	switch (event) {
+	case FIB_EVENT_ENTRY_ADD:
+		err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info);
+		if (err)
+			mlxsw_sp_router_fib4_abort(mlxsw_sp);
+		break;
+	case FIB_EVENT_ENTRY_DEL:
+		mlxsw_sp_router_fib4_del(mlxsw_sp, fen_info);
+		break;
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		mlxsw_sp_router_fib4_abort(mlxsw_sp);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
+{
+	int err;
+
+	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
+	INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list);
+	err = __mlxsw_sp_router_init(mlxsw_sp);
+	if (err)
+		return err;
+
+	mlxsw_sp_lpm_init(mlxsw_sp);
+	err = mlxsw_sp_vrs_init(mlxsw_sp);
+	if (err)
+		goto err_vrs_init;
+
+	err =  mlxsw_sp_neigh_init(mlxsw_sp);
+	if (err)
+		goto err_neigh_init;
+
+	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
+	register_fib_notifier(&mlxsw_sp->fib_nb);
+	return 0;
+
+err_neigh_init:
+	mlxsw_sp_vrs_fini(mlxsw_sp);
+err_vrs_init:
+	__mlxsw_sp_router_fini(mlxsw_sp);
+	return err;
+}
+
+void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	unregister_fib_notifier(&mlxsw_sp->fib_nb);
+	mlxsw_sp_neigh_fini(mlxsw_sp);
+	mlxsw_sp_vrs_fini(mlxsw_sp);
+	__mlxsw_sp_router_fini(mlxsw_sp);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index a1ad5e6..5e00c79 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -167,8 +167,8 @@
 }
 
 static int __mlxsw_sp_port_flood_set(struct mlxsw_sp_port *mlxsw_sp_port,
-				     u16 idx_begin, u16 idx_end, bool set,
-				     bool only_uc)
+				     u16 idx_begin, u16 idx_end, bool uc_set,
+				     bool bm_set)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u16 local_port = mlxsw_sp_port->local_port;
@@ -187,28 +187,22 @@
 		return -ENOMEM;
 
 	mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin,
-			    table_type, range, local_port, set);
+			    table_type, range, local_port, uc_set);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
 	if (err)
 		goto buffer_out;
 
-	/* Flooding control allows one to decide whether a given port will
-	 * flood unicast traffic for which there is no FDB entry.
-	 */
-	if (only_uc)
-		goto buffer_out;
-
 	mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BM, idx_begin,
-			    table_type, range, local_port, set);
+			    table_type, range, local_port, bm_set);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
 	if (err)
 		goto err_flood_bm_set;
-	else
-		goto buffer_out;
+
+	goto buffer_out;
 
 err_flood_bm_set:
 	mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_UC, idx_begin,
-			    table_type, range, local_port, !set);
+			    table_type, range, local_port, !uc_set);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
 buffer_out:
 	kfree(sftr_pl);
@@ -257,16 +251,43 @@
 	 * the start of the vFIDs range.
 	 */
 	vfid = mlxsw_sp_fid_to_vfid(fid);
-	return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set,
-					 false);
+	return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set);
+}
+
+static int mlxsw_sp_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+				      bool set)
+{
+	u16 vid;
+	int err;
+
+	if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) {
+		vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port);
+
+		return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
+							set);
+	}
+
+	for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) {
+		err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid,
+						       set);
+		if (err)
+			goto err_port_vid_learning_set;
+	}
+
+	return 0;
+
+err_port_vid_learning_set:
+	for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
+		__mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, !set);
+	return err;
 }
 
 static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
 					   struct switchdev_trans *trans,
 					   unsigned long brport_flags)
 {
+	unsigned long learning = mlxsw_sp_port->learning ? BR_LEARNING : 0;
 	unsigned long uc_flood = mlxsw_sp_port->uc_flood ? BR_FLOOD : 0;
-	bool set;
 	int err;
 
 	if (!mlxsw_sp_port->bridged)
@@ -276,17 +297,30 @@
 		return 0;
 
 	if ((uc_flood ^ brport_flags) & BR_FLOOD) {
-		set = mlxsw_sp_port->uc_flood ? false : true;
-		err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, set);
+		err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port,
+						 !mlxsw_sp_port->uc_flood);
 		if (err)
 			return err;
 	}
 
+	if ((learning ^ brport_flags) & BR_LEARNING) {
+		err = mlxsw_sp_port_learning_set(mlxsw_sp_port,
+						 !mlxsw_sp_port->learning);
+		if (err)
+			goto err_port_learning_set;
+	}
+
 	mlxsw_sp_port->uc_flood = brport_flags & BR_FLOOD ? 1 : 0;
 	mlxsw_sp_port->learning = brport_flags & BR_LEARNING ? 1 : 0;
 	mlxsw_sp_port->learning_sync = brport_flags & BR_LEARNING_SYNC ? 1 : 0;
 
 	return 0;
+
+err_port_learning_set:
+	if ((uc_flood ^ brport_flags) & BR_FLOOD)
+		mlxsw_sp_port_uc_flood_set(mlxsw_sp_port,
+					   mlxsw_sp_port->uc_flood);
+	return err;
 }
 
 static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time)
@@ -450,6 +484,8 @@
 
 	kfree(f);
 
+	mlxsw_sp_fid_map(mlxsw_sp, fid, false);
+
 	mlxsw_sp_fid_op(mlxsw_sp, fid, false);
 }
 
@@ -458,6 +494,9 @@
 {
 	struct mlxsw_sp_fid *f;
 
+	if (test_bit(fid, mlxsw_sp_port->active_vlans))
+		return 0;
+
 	f = mlxsw_sp_fid_find(mlxsw_sp_port->mlxsw_sp, fid);
 	if (!f) {
 		f = mlxsw_sp_fid_create(mlxsw_sp_port->mlxsw_sp, fid);
@@ -515,7 +554,7 @@
 	}
 
 	err = __mlxsw_sp_port_flood_set(mlxsw_sp_port, fid_begin, fid_end,
-					true, false);
+					mlxsw_sp_port->uc_flood, true);
 	if (err)
 		goto err_port_flood_set;
 
@@ -633,6 +672,27 @@
 	return 0;
 }
 
+static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port,
+					  u16 vid_begin, u16 vid_end,
+					  bool learn_enable)
+{
+	u16 vid, vid_e;
+	int err;
+
+	for (vid = vid_begin; vid <= vid_end;
+	     vid += MLXSW_REG_SPVMLR_REC_MAX_COUNT) {
+		vid_e = min((u16) (vid + MLXSW_REG_SPVMLR_REC_MAX_COUNT - 1),
+			    vid_end);
+
+		err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid,
+						       vid_e, learn_enable);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port,
 				     u16 vid_begin, u16 vid_end,
 				     bool flag_untagged, bool flag_pvid)
@@ -673,6 +733,14 @@
 		}
 	}
 
+	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
+					     mlxsw_sp_port->learning);
+	if (err) {
+		netdev_err(dev, "Failed to set learning for VIDs %d-%d\n",
+			   vid_begin, vid_end);
+		goto err_port_vid_learning_set;
+	}
+
 	/* Changing activity bits only if HW operation succeded */
 	for (vid = vid_begin; vid <= vid_end; vid++) {
 		set_bit(vid, mlxsw_sp_port->active_vlans);
@@ -695,6 +763,9 @@
 err_port_stp_state_set:
 	for (vid = vid_begin; vid <= vid_end; vid++)
 		clear_bit(vid, mlxsw_sp_port->active_vlans);
+	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
+				       false);
+err_port_vid_learning_set:
 	if (old_pvid != mlxsw_sp_port->pvid)
 		mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid);
 err_port_pvid_set:
@@ -973,11 +1044,6 @@
 					      SWITCHDEV_OBJ_PORT_VLAN(obj),
 					      trans);
 		break;
-	case SWITCHDEV_OBJ_ID_IPV4_FIB:
-		err = mlxsw_sp_router_fib4_add(mlxsw_sp_port,
-					       SWITCHDEV_OBJ_IPV4_FIB(obj),
-					       trans);
-		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port,
 						   SWITCHDEV_OBJ_PORT_FDB(obj),
@@ -997,38 +1063,25 @@
 }
 
 static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
-				     u16 vid_begin, u16 vid_end, bool init)
+				     u16 vid_begin, u16 vid_end)
 {
-	struct net_device *dev = mlxsw_sp_port->dev;
 	u16 vid, pvid;
-	int err;
 
-	if (!init && !mlxsw_sp_port->bridged)
+	if (!mlxsw_sp_port->bridged)
 		return -EINVAL;
 
-	err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end,
-					false, false);
-	if (err) {
-		netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin,
-			   vid_end);
-		return err;
-	}
-
-	if (init)
-		goto out;
+	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end,
+				       false);
 
 	pvid = mlxsw_sp_port->pvid;
-	if (pvid >= vid_begin && pvid <= vid_end) {
-		err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
-		if (err) {
-			netdev_err(dev, "Unable to del PVID %d\n", pvid);
-			return err;
-		}
-	}
+	if (pvid >= vid_begin && pvid <= vid_end)
+		mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0);
+
+	__mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false,
+				  false);
 
 	mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end);
 
-out:
 	/* Changing activity bits only if HW operation succeded */
 	for (vid = vid_begin; vid <= vid_end; vid++)
 		clear_bit(vid, mlxsw_sp_port->active_vlans);
@@ -1039,8 +1092,8 @@
 static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
 				   const struct switchdev_obj_port_vlan *vlan)
 {
-	return __mlxsw_sp_port_vlans_del(mlxsw_sp_port,
-					 vlan->vid_begin, vlan->vid_end, false);
+	return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin,
+					 vlan->vid_end);
 }
 
 void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port)
@@ -1048,7 +1101,7 @@
 	u16 vid;
 
 	for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID)
-		__mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false);
+		__mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid);
 }
 
 static int
@@ -1123,10 +1176,6 @@
 		err = mlxsw_sp_port_vlans_del(mlxsw_sp_port,
 					      SWITCHDEV_OBJ_PORT_VLAN(obj));
 		break;
-	case SWITCHDEV_OBJ_ID_IPV4_FIB:
-		err = mlxsw_sp_router_fib4_del(mlxsw_sp_port,
-					       SWITCHDEV_OBJ_IPV4_FIB(obj));
-		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port,
 						   SWITCHDEV_OBJ_PORT_FDB(obj));
@@ -1147,9 +1196,11 @@
 						   u16 lag_id)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_resources *resources;
 	int i;
 
-	for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+	resources = mlxsw_core_resources_get(mlxsw_sp->core);
+	for (i = 0; i < resources->max_ports_in_lag; i++) {
 		mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
 		if (mlxsw_sp_port)
 			return mlxsw_sp_port;
@@ -1368,8 +1419,6 @@
 		vid = fid;
 	}
 
-	adding = adding && mlxsw_sp_port->learning;
-
 do_fdb_op:
 	err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid,
 				      adding, true);
@@ -1431,8 +1480,6 @@
 		vid = fid;
 	}
 
-	adding = adding && mlxsw_sp_port->learning;
-
 do_fdb_op:
 	err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid,
 					  adding, true);
@@ -1498,20 +1545,18 @@
 	mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work);
 
 	rtnl_lock();
-	do {
-		mlxsw_reg_sfn_pack(sfn_pl);
-		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
-		if (err) {
-			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n");
-			break;
-		}
-		num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl);
-		for (i = 0; i < num_rec; i++)
-			mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
+	mlxsw_reg_sfn_pack(sfn_pl);
+	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl);
+	if (err) {
+		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n");
+		goto out;
+	}
+	num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl);
+	for (i = 0; i < num_rec; i++)
+		mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i);
 
-	} while (num_rec);
+out:
 	rtnl_unlock();
-
 	kfree(sfn_pl);
 	mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp);
 }
@@ -1546,32 +1591,6 @@
 	mlxsw_sp_fdb_fini(mlxsw_sp);
 }
 
-int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-	struct net_device *dev = mlxsw_sp_port->dev;
-	int err;
-
-	/* Allow only untagged packets to ingress and tag them internally
-	 * with VID 1.
-	 */
-	mlxsw_sp_port->pvid = 1;
-	err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1,
-					true);
-	if (err) {
-		netdev_err(dev, "Unable to init VLANs\n");
-		return err;
-	}
-
-	/* Add implicit VLAN interface in the device, so that untagged
-	 * packets will be classified to the default vFID.
-	 */
-	err = mlxsw_sp_port_add_vid(dev, 0, 1);
-	if (err)
-		netdev_err(dev, "Failed to configure default vFID\n");
-
-	return err;
-}
-
 void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 377daa4..c0c23e2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -997,7 +997,7 @@
 	/* Each packet needs to have a Tx header (metadata) on top all other
 	 * headers.
 	 */
-	dev->hard_header_len += MLXSW_TXHDR_LEN;
+	dev->needed_headroom = MLXSW_TXHDR_LEN;
 
 	err = mlxsw_sx_port_module_check(mlxsw_sx_port, &usable);
 	if (err) {
@@ -1512,10 +1512,6 @@
 static struct mlxsw_config_profile mlxsw_sx_config_profile = {
 	.used_max_vepa_channels		= 1,
 	.max_vepa_channels		= 0,
-	.used_max_lag			= 1,
-	.max_lag			= 64,
-	.used_max_port_per_lag		= 1,
-	.max_port_per_lag		= 16,
 	.used_max_mid			= 1,
 	.max_mid			= 7000,
 	.used_max_pgt			= 1,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 470d769..ed8e301 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -56,6 +56,10 @@
 	MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
 	MLXSW_TRAP_ID_ARPBC = 0x50,
 	MLXSW_TRAP_ID_ARPUC = 0x51,
+	MLXSW_TRAP_ID_MTUERROR = 0x52,
+	MLXSW_TRAP_ID_TTLERROR = 0x53,
+	MLXSW_TRAP_ID_LBERROR = 0x54,
+	MLXSW_TRAP_ID_OSPF = 0x55,
 	MLXSW_TRAP_ID_IP2ME = 0x5F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
 	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index eb807b0..569ade6 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -134,7 +134,7 @@
 
 /* tunables */
 #define RX_BUF_SIZE	1500	/* 8192 */
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define NS83820_VLAN_ACCEL_SUPPORT
 #endif
 
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 6817881..0efb2ba 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -3,6 +3,13 @@
 nfp_netvf-objs := \
 	    nfp_net_common.o \
 	    nfp_net_ethtool.o \
+	    nfp_net_offload.o \
 	    nfp_netvf_main.o
 
+ifeq ($(CONFIG_BPF_SYSCALL),y)
+nfp_netvf-objs += \
+	    nfp_bpf_verifier.o \
+	    nfp_bpf_jit.o
+endif
+
 nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
new file mode 100644
index 0000000..22484b6
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_ASM_H__
+#define __NFP_ASM_H__ 1
+
+#include "nfp_bpf.h"
+
+#define REG_NONE	0
+
+#define RE_REG_NO_DST	0x020
+#define RE_REG_IMM	0x020
+#define RE_REG_IMM_encode(x)					\
+	(RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1))
+#define RE_REG_IMM_MAX	 0x07fULL
+#define RE_REG_XFR	0x080
+
+#define UR_REG_XFR	0x180
+#define UR_REG_NN	0x280
+#define UR_REG_NO_DST	0x300
+#define UR_REG_IMM	UR_REG_NO_DST
+#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x))
+#define UR_REG_IMM_MAX	 0x0ffULL
+
+#define OP_BR_BASE	0x0d800000020ULL
+#define OP_BR_BASE_MASK	0x0f8000c3ce0ULL
+#define OP_BR_MASK	0x0000000001fULL
+#define OP_BR_EV_PIP	0x00000000300ULL
+#define OP_BR_CSS	0x0000003c000ULL
+#define OP_BR_DEFBR	0x00000300000ULL
+#define OP_BR_ADDR_LO	0x007ffc00000ULL
+#define OP_BR_ADDR_HI	0x10000000000ULL
+
+#define nfp_is_br(_insn)				\
+	(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
+
+enum br_mask {
+	BR_BEQ = 0x00,
+	BR_BNE = 0x01,
+	BR_BHS = 0x04,
+	BR_BLO = 0x05,
+	BR_BGE = 0x08,
+	BR_UNC = 0x18,
+};
+
+enum br_ev_pip {
+	BR_EV_PIP_UNCOND = 0,
+	BR_EV_PIP_COND = 1,
+};
+
+enum br_ctx_signal_state {
+	BR_CSS_NONE = 2,
+};
+
+#define OP_BBYTE_BASE	0x0c800000000ULL
+#define OP_BB_A_SRC	0x000000000ffULL
+#define OP_BB_BYTE	0x00000000300ULL
+#define OP_BB_B_SRC	0x0000003fc00ULL
+#define OP_BB_I8	0x00000040000ULL
+#define OP_BB_EQ	0x00000080000ULL
+#define OP_BB_DEFBR	0x00000300000ULL
+#define OP_BB_ADDR_LO	0x007ffc00000ULL
+#define OP_BB_ADDR_HI	0x10000000000ULL
+
+#define OP_BALU_BASE	0x0e800000000ULL
+#define OP_BA_A_SRC	0x000000003ffULL
+#define OP_BA_B_SRC	0x000000ffc00ULL
+#define OP_BA_DEFBR	0x00000300000ULL
+#define OP_BA_ADDR_HI	0x0007fc00000ULL
+
+#define OP_IMMED_A_SRC	0x000000003ffULL
+#define OP_IMMED_B_SRC	0x000000ffc00ULL
+#define OP_IMMED_IMM	0x0000ff00000ULL
+#define OP_IMMED_WIDTH	0x00060000000ULL
+#define OP_IMMED_INV	0x00080000000ULL
+#define OP_IMMED_SHIFT	0x00600000000ULL
+#define OP_IMMED_BASE	0x0f000000000ULL
+#define OP_IMMED_WR_AB	0x20000000000ULL
+
+enum immed_width {
+	IMMED_WIDTH_ALL = 0,
+	IMMED_WIDTH_BYTE = 1,
+	IMMED_WIDTH_WORD = 2,
+};
+
+enum immed_shift {
+	IMMED_SHIFT_0B = 0,
+	IMMED_SHIFT_1B = 1,
+	IMMED_SHIFT_2B = 2,
+};
+
+#define OP_SHF_BASE	0x08000000000ULL
+#define OP_SHF_A_SRC	0x000000000ffULL
+#define OP_SHF_SC	0x00000000300ULL
+#define OP_SHF_B_SRC	0x0000003fc00ULL
+#define OP_SHF_I8	0x00000040000ULL
+#define OP_SHF_SW	0x00000080000ULL
+#define OP_SHF_DST	0x0000ff00000ULL
+#define OP_SHF_SHIFT	0x001f0000000ULL
+#define OP_SHF_OP	0x00e00000000ULL
+#define OP_SHF_DST_AB	0x01000000000ULL
+#define OP_SHF_WR_AB	0x20000000000ULL
+
+enum shf_op {
+	SHF_OP_NONE = 0,
+	SHF_OP_AND = 2,
+	SHF_OP_OR = 5,
+};
+
+enum shf_sc {
+	SHF_SC_R_ROT = 0,
+	SHF_SC_R_SHF = 1,
+	SHF_SC_L_SHF = 2,
+	SHF_SC_R_DSHF = 3,
+};
+
+#define OP_ALU_A_SRC	0x000000003ffULL
+#define OP_ALU_B_SRC	0x000000ffc00ULL
+#define OP_ALU_DST	0x0003ff00000ULL
+#define OP_ALU_SW	0x00040000000ULL
+#define OP_ALU_OP	0x00f80000000ULL
+#define OP_ALU_DST_AB	0x01000000000ULL
+#define OP_ALU_BASE	0x0a000000000ULL
+#define OP_ALU_WR_AB	0x20000000000ULL
+
+enum alu_op {
+	ALU_OP_NONE	= 0x00,
+	ALU_OP_ADD	= 0x01,
+	ALU_OP_NEG	= 0x04,
+	ALU_OP_AND	= 0x08,
+	ALU_OP_SUB_C	= 0x0d,
+	ALU_OP_ADD_C	= 0x11,
+	ALU_OP_OR	= 0x14,
+	ALU_OP_SUB	= 0x15,
+	ALU_OP_XOR	= 0x18,
+};
+
+enum alu_dst_ab {
+	ALU_DST_A = 0,
+	ALU_DST_B = 1,
+};
+
+#define OP_LDF_BASE	0x0c000000000ULL
+#define OP_LDF_A_SRC	0x000000000ffULL
+#define OP_LDF_SC	0x00000000300ULL
+#define OP_LDF_B_SRC	0x0000003fc00ULL
+#define OP_LDF_I8	0x00000040000ULL
+#define OP_LDF_SW	0x00000080000ULL
+#define OP_LDF_ZF	0x00000100000ULL
+#define OP_LDF_BMASK	0x0000f000000ULL
+#define OP_LDF_SHF	0x001f0000000ULL
+#define OP_LDF_WR_AB	0x20000000000ULL
+
+#define OP_CMD_A_SRC	 0x000000000ffULL
+#define OP_CMD_CTX	 0x00000000300ULL
+#define OP_CMD_B_SRC	 0x0000003fc00ULL
+#define OP_CMD_TOKEN	 0x000000c0000ULL
+#define OP_CMD_XFER	 0x00001f00000ULL
+#define OP_CMD_CNT	 0x0000e000000ULL
+#define OP_CMD_SIG	 0x000f0000000ULL
+#define OP_CMD_TGT_CMD	 0x07f00000000ULL
+#define OP_CMD_MODE	0x1c0000000000ULL
+
+struct cmd_tgt_act {
+	u8 token;
+	u8 tgt_cmd;
+};
+
+enum cmd_tgt_map {
+	CMD_TGT_READ8,
+	CMD_TGT_WRITE8,
+	CMD_TGT_READ_LE,
+	CMD_TGT_READ_SWAP_LE,
+	__CMD_TGT_MAP_SIZE,
+};
+
+enum cmd_mode {
+	CMD_MODE_40b_AB	= 0,
+	CMD_MODE_40b_BA	= 1,
+	CMD_MODE_32b	= 4,
+};
+
+enum cmd_ctx_swap {
+	CMD_CTX_SWAP = 0,
+	CMD_CTX_NO_SWAP = 3,
+};
+
+#define OP_LCSR_BASE	0x0fc00000000ULL
+#define OP_LCSR_A_SRC	0x000000003ffULL
+#define OP_LCSR_B_SRC	0x000000ffc00ULL
+#define OP_LCSR_WRITE	0x00000200000ULL
+#define OP_LCSR_ADDR	0x001ffc00000ULL
+
+enum lcsr_wr_src {
+	LCSR_WR_AREG,
+	LCSR_WR_BREG,
+	LCSR_WR_IMM,
+};
+
+#define OP_CARB_BASE	0x0e000000000ULL
+#define OP_CARB_OR	0x00000010000ULL
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
new file mode 100644
index 0000000..87aa8a3
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __NFP_BPF_H__
+#define __NFP_BPF_H__ 1
+
+#include <linux/bitfield.h>
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/types.h>
+
+#define FIELD_FIT(mask, val)  (!((((u64)val) << __bf_shf(mask)) & ~(mask)))
+
+/* For branch fixup logic use up-most byte of branch instruction as scratch
+ * area.  Remember to clear this before sending instructions to HW!
+ */
+#define OP_BR_SPECIAL	0xff00000000000000ULL
+
+enum br_special {
+	OP_BR_NORMAL = 0,
+	OP_BR_GO_OUT,
+	OP_BR_GO_ABORT,
+};
+
+enum static_regs {
+	STATIC_REG_PKT		= 1,
+#define REG_PKT_BANK	ALU_DST_A
+	STATIC_REG_IMM		= 2, /* Bank AB */
+};
+
+enum nfp_bpf_action_type {
+	NN_ACT_TC_DROP,
+	NN_ACT_TC_REDIR,
+	NN_ACT_DIRECT,
+};
+
+/* Software register representation, hardware encoding in asm.h */
+#define NN_REG_TYPE	GENMASK(31, 24)
+#define NN_REG_VAL	GENMASK(7, 0)
+
+enum nfp_bpf_reg_type {
+	NN_REG_GPR_A =	BIT(0),
+	NN_REG_GPR_B =	BIT(1),
+	NN_REG_NNR =	BIT(2),
+	NN_REG_XFER =	BIT(3),
+	NN_REG_IMM =	BIT(4),
+	NN_REG_NONE =	BIT(5),
+};
+
+#define NN_REG_GPR_BOTH	(NN_REG_GPR_A | NN_REG_GPR_B)
+
+#define reg_both(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH))
+#define reg_a(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A))
+#define reg_b(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B))
+#define reg_nnr(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR))
+#define reg_xfer(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER))
+#define reg_imm(x)	((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM))
+#define reg_none()	(FIELD_PREP(NN_REG_TYPE, NN_REG_NONE))
+
+#define pkt_reg(np)	reg_a((np)->regs_per_thread - STATIC_REG_PKT)
+#define imm_a(np)	reg_a((np)->regs_per_thread - STATIC_REG_IMM)
+#define imm_b(np)	reg_b((np)->regs_per_thread - STATIC_REG_IMM)
+#define imm_both(np)	reg_both((np)->regs_per_thread - STATIC_REG_IMM)
+
+#define NFP_BPF_ABI_FLAGS	reg_nnr(0)
+#define   NFP_BPF_ABI_FLAG_MARK	1
+#define NFP_BPF_ABI_MARK	reg_nnr(1)
+#define NFP_BPF_ABI_PKT		reg_nnr(2)
+#define NFP_BPF_ABI_LEN		reg_nnr(3)
+
+struct nfp_prog;
+struct nfp_insn_meta;
+typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
+
+#define nfp_prog_first_meta(nfp_prog)					\
+	list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
+#define nfp_prog_last_meta(nfp_prog)					\
+	list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l)
+#define nfp_meta_next(meta)	list_next_entry(meta, l)
+#define nfp_meta_prev(meta)	list_prev_entry(meta, l)
+
+/**
+ * struct nfp_insn_meta - BPF instruction wrapper
+ * @insn: BPF instruction
+ * @off: index of first generated machine instruction (in nfp_prog.prog)
+ * @n: eBPF instruction number
+ * @skip: skip this instruction (optimized out)
+ * @double_cb: callback for second part of the instruction
+ * @l: link on nfp_prog->insns list
+ */
+struct nfp_insn_meta {
+	struct bpf_insn insn;
+	unsigned int off;
+	unsigned short n;
+	bool skip;
+	instr_cb_t double_cb;
+
+	struct list_head l;
+};
+
+#define BPF_SIZE_MASK	0x18
+
+static inline u8 mbpf_class(const struct nfp_insn_meta *meta)
+{
+	return BPF_CLASS(meta->insn.code);
+}
+
+static inline u8 mbpf_src(const struct nfp_insn_meta *meta)
+{
+	return BPF_SRC(meta->insn.code);
+}
+
+static inline u8 mbpf_op(const struct nfp_insn_meta *meta)
+{
+	return BPF_OP(meta->insn.code);
+}
+
+static inline u8 mbpf_mode(const struct nfp_insn_meta *meta)
+{
+	return BPF_MODE(meta->insn.code);
+}
+
+/**
+ * struct nfp_prog - nfp BPF program
+ * @prog: machine code
+ * @prog_len: number of valid instructions in @prog array
+ * @__prog_alloc_len: alloc size of @prog array
+ * @act: BPF program/action type (TC DA, TC with action, XDP etc.)
+ * @num_regs: number of registers used by this program
+ * @regs_per_thread: number of basic registers allocated per thread
+ * @start_off: address of the first instruction in the memory
+ * @tgt_out: jump target for normal exit
+ * @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
+ * @tgt_done: jump target to get the next packet
+ * @n_translated: number of successfully translated instructions (for errors)
+ * @error: error code if something went wrong
+ * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
+ */
+struct nfp_prog {
+	u64 *prog;
+	unsigned int prog_len;
+	unsigned int __prog_alloc_len;
+
+	enum nfp_bpf_action_type act;
+
+	unsigned int num_regs;
+	unsigned int regs_per_thread;
+
+	unsigned int start_off;
+	unsigned int tgt_out;
+	unsigned int tgt_abort;
+	unsigned int tgt_done;
+
+	unsigned int n_translated;
+	int error;
+
+	struct list_head insns;
+};
+
+struct nfp_bpf_result {
+	unsigned int n_instr;
+	bool dense_mode;
+};
+
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act,
+	    unsigned int prog_start, unsigned int prog_done,
+	    unsigned int prog_sz, struct nfp_bpf_result *res);
+
+int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog);
+
+#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
new file mode 100644
index 0000000..f8df530
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
@@ -0,0 +1,1813 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt)	"NFP net bpf: " fmt
+
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/pkt_cls.h>
+#include <linux/unistd.h>
+
+#include "nfp_asm.h"
+#include "nfp_bpf.h"
+
+/* --- NFP prog --- */
+/* Foreach "multiple" entries macros provide pos and next<n> pointers.
+ * It's safe to modify the next pointers (but not pos).
+ */
+#define nfp_for_each_insn_walk2(nfp_prog, pos, next)			\
+	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
+	     next = list_next_entry(pos, l);			\
+	     &(nfp_prog)->insns != &pos->l &&			\
+	     &(nfp_prog)->insns != &next->l;			\
+	     pos = nfp_meta_next(pos),				\
+	     next = nfp_meta_next(pos))
+
+#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2)		\
+	for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
+	     next = list_next_entry(pos, l),			\
+	     next2 = list_next_entry(next, l);			\
+	     &(nfp_prog)->insns != &pos->l &&			\
+	     &(nfp_prog)->insns != &next->l &&			\
+	     &(nfp_prog)->insns != &next2->l;			\
+	     pos = nfp_meta_next(pos),				\
+	     next = nfp_meta_next(pos),				\
+	     next2 = nfp_meta_next(next))
+
+static bool
+nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return meta->l.next != &nfp_prog->insns;
+}
+
+static bool
+nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return meta->l.prev != &nfp_prog->insns;
+}
+
+static void nfp_prog_free(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta, *tmp;
+
+	list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) {
+		list_del(&meta->l);
+		kfree(meta);
+	}
+	kfree(nfp_prog);
+}
+
+static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
+{
+	if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) {
+		nfp_prog->error = -ENOSPC;
+		return;
+	}
+
+	nfp_prog->prog[nfp_prog->prog_len] = insn;
+	nfp_prog->prog_len++;
+}
+
+static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
+{
+	return nfp_prog->start_off + nfp_prog->prog_len;
+}
+
+static unsigned int
+nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
+{
+	return offset - nfp_prog->start_off;
+}
+
+/* --- SW reg --- */
+struct nfp_insn_ur_regs {
+	enum alu_dst_ab dst_ab;
+	u16 dst;
+	u16 areg, breg;
+	bool swap;
+	bool wr_both;
+};
+
+struct nfp_insn_re_regs {
+	enum alu_dst_ab dst_ab;
+	u8 dst;
+	u8 areg, breg;
+	bool swap;
+	bool wr_both;
+	bool i8;
+};
+
+static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst)
+{
+	u16 val = FIELD_GET(NN_REG_VAL, swreg);
+
+	switch (FIELD_GET(NN_REG_TYPE, swreg)) {
+	case NN_REG_GPR_A:
+	case NN_REG_GPR_B:
+	case NN_REG_GPR_BOTH:
+		return val;
+	case NN_REG_NNR:
+		return UR_REG_NN | val;
+	case NN_REG_XFER:
+		return UR_REG_XFR | val;
+	case NN_REG_IMM:
+		if (val & ~0xff) {
+			pr_err("immediate too large\n");
+			return 0;
+		}
+		return UR_REG_IMM_encode(val);
+	case NN_REG_NONE:
+		return is_dst ? UR_REG_NO_DST : REG_NONE;
+	default:
+		pr_err("unrecognized reg encoding %08x\n", swreg);
+		return 0;
+	}
+}
+
+static int
+swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg)
+{
+	memset(reg, 0, sizeof(*reg));
+
+	/* Decode destination */
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
+		reg->dst_ab = ALU_DST_B;
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
+		reg->wr_both = true;
+	reg->dst = nfp_swreg_to_unreg(dst, true);
+
+	/* Decode source operands */
+	if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
+	    FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
+		reg->areg = nfp_swreg_to_unreg(rreg, false);
+		reg->breg = nfp_swreg_to_unreg(lreg, false);
+		reg->swap = true;
+	} else {
+		reg->areg = nfp_swreg_to_unreg(lreg, false);
+		reg->breg = nfp_swreg_to_unreg(rreg, false);
+	}
+
+	return 0;
+}
+
+static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8)
+{
+	u16 val = FIELD_GET(NN_REG_VAL, swreg);
+
+	switch (FIELD_GET(NN_REG_TYPE, swreg)) {
+	case NN_REG_GPR_A:
+	case NN_REG_GPR_B:
+	case NN_REG_GPR_BOTH:
+		return val;
+	case NN_REG_XFER:
+		return RE_REG_XFR | val;
+	case NN_REG_IMM:
+		if (val & ~(0x7f | has_imm8 << 7)) {
+			pr_err("immediate too large\n");
+			return 0;
+		}
+		*i8 = val & 0x80;
+		return RE_REG_IMM_encode(val & 0x7f);
+	case NN_REG_NONE:
+		return is_dst ? RE_REG_NO_DST : REG_NONE;
+	default:
+		pr_err("unrecognized reg encoding\n");
+		return 0;
+	}
+}
+
+static int
+swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg,
+		    bool has_imm8)
+{
+	memset(reg, 0, sizeof(*reg));
+
+	/* Decode destination */
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
+		reg->dst_ab = ALU_DST_B;
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
+		reg->wr_both = true;
+	reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
+
+	/* Decode source operands */
+	if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
+		return -EFAULT;
+
+	if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
+	    FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
+		reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+		reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+		reg->swap = true;
+	} else {
+		reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+		reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+	}
+
+	return 0;
+}
+
+/* --- Emitters --- */
+static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
+	[CMD_TGT_WRITE8] =		{ 0x00, 0x42 },
+	[CMD_TGT_READ8] =		{ 0x01, 0x43 },
+	[CMD_TGT_READ_LE] =		{ 0x01, 0x40 },
+	[CMD_TGT_READ_SWAP_LE] =	{ 0x03, 0x40 },
+};
+
+static void
+__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
+	   u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
+{
+	enum cmd_ctx_swap ctx;
+	u64 insn;
+
+	if (sync)
+		ctx = CMD_CTX_SWAP;
+	else
+		ctx = CMD_CTX_NO_SWAP;
+
+	insn =	FIELD_PREP(OP_CMD_A_SRC, areg) |
+		FIELD_PREP(OP_CMD_CTX, ctx) |
+		FIELD_PREP(OP_CMD_B_SRC, breg) |
+		FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
+		FIELD_PREP(OP_CMD_XFER, xfer) |
+		FIELD_PREP(OP_CMD_CNT, size) |
+		FIELD_PREP(OP_CMD_SIG, sync) |
+		FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
+		FIELD_PREP(OP_CMD_MODE, mode);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
+	 u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+	if (reg.swap) {
+		pr_err("cmd can't swap arguments\n");
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
+}
+
+static void
+__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
+	  enum br_ctx_signal_state css, u16 addr, u8 defer)
+{
+	u16 addr_lo, addr_hi;
+	u64 insn;
+
+	addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
+	addr_hi = addr != addr_lo;
+
+	insn = OP_BR_BASE |
+		FIELD_PREP(OP_BR_MASK, mask) |
+		FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
+		FIELD_PREP(OP_BR_CSS, css) |
+		FIELD_PREP(OP_BR_DEFBR, defer) |
+		FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
+		FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer)
+{
+	if (defer > 2) {
+		pr_err("BUG: branch defer out of bounds %d\n", defer);
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+	__emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer);
+}
+
+static void
+emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
+{
+	__emit_br(nfp_prog, mask,
+		  mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
+		  BR_CSS_NONE, addr, defer);
+}
+
+static void
+__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8,
+	       u8 byte, bool equal, u16 addr, u8 defer)
+{
+	u16 addr_lo, addr_hi;
+	u64 insn;
+
+	addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO));
+	addr_hi = addr != addr_lo;
+
+	insn = OP_BBYTE_BASE |
+		FIELD_PREP(OP_BB_A_SRC, areg) |
+		FIELD_PREP(OP_BB_BYTE, byte) |
+		FIELD_PREP(OP_BB_B_SRC, breg) |
+		FIELD_PREP(OP_BB_I8, imm8) |
+		FIELD_PREP(OP_BB_EQ, equal) |
+		FIELD_PREP(OP_BB_DEFBR, defer) |
+		FIELD_PREP(OP_BB_ADDR_LO, addr_lo) |
+		FIELD_PREP(OP_BB_ADDR_HI, addr_hi);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_br_byte_neq(struct nfp_prog *nfp_prog,
+		 u32 dst, u8 imm, u8 byte, u16 addr, u8 defer)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr,
+		       defer);
+}
+
+static void
+__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
+	     enum immed_width width, bool invert,
+	     enum immed_shift shift, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_IMMED_BASE |
+		FIELD_PREP(OP_IMMED_A_SRC, areg) |
+		FIELD_PREP(OP_IMMED_B_SRC, breg) |
+		FIELD_PREP(OP_IMMED_IMM, imm_hi) |
+		FIELD_PREP(OP_IMMED_WIDTH, width) |
+		FIELD_PREP(OP_IMMED_INV, invert) |
+		FIELD_PREP(OP_IMMED_SHIFT, shift) |
+		FIELD_PREP(OP_IMMED_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm,
+	   enum immed_width width, bool invert, enum immed_shift shift)
+{
+	struct nfp_insn_ur_regs reg;
+	int err;
+
+	if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) {
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width,
+		     invert, shift, reg.wr_both);
+}
+
+static void
+__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
+	   enum shf_sc sc, u8 shift,
+	   u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both)
+{
+	u64 insn;
+
+	if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
+		nfp_prog->error = -EFAULT;
+		return;
+	}
+
+	if (sc == SHF_SC_L_SHF)
+		shift = 32 - shift;
+
+	insn = OP_SHF_BASE |
+		FIELD_PREP(OP_SHF_A_SRC, areg) |
+		FIELD_PREP(OP_SHF_SC, sc) |
+		FIELD_PREP(OP_SHF_B_SRC, breg) |
+		FIELD_PREP(OP_SHF_I8, i8) |
+		FIELD_PREP(OP_SHF_SW, sw) |
+		FIELD_PREP(OP_SHF_DST, dst) |
+		FIELD_PREP(OP_SHF_SHIFT, shift) |
+		FIELD_PREP(OP_SHF_OP, op) |
+		FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
+		FIELD_PREP(OP_SHF_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg,
+	 enum shf_sc sc, u8 shift)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
+		   reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both);
+}
+
+static void
+__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
+	   u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_ALU_BASE |
+		FIELD_PREP(OP_ALU_A_SRC, areg) |
+		FIELD_PREP(OP_ALU_B_SRC, breg) |
+		FIELD_PREP(OP_ALU_DST, dst) |
+		FIELD_PREP(OP_ALU_SW, swap) |
+		FIELD_PREP(OP_ALU_OP, op) |
+		FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
+		FIELD_PREP(OP_ALU_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg)
+{
+	struct nfp_insn_ur_regs reg;
+	int err;
+
+	err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
+		   reg.areg, op, reg.breg, reg.swap, reg.wr_both);
+}
+
+static void
+__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
+		u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
+		bool zero, bool swap, bool wr_both)
+{
+	u64 insn;
+
+	insn = OP_LDF_BASE |
+		FIELD_PREP(OP_LDF_A_SRC, areg) |
+		FIELD_PREP(OP_LDF_SC, sc) |
+		FIELD_PREP(OP_LDF_B_SRC, breg) |
+		FIELD_PREP(OP_LDF_I8, imm8) |
+		FIELD_PREP(OP_LDF_SW, swap) |
+		FIELD_PREP(OP_LDF_ZF, zero) |
+		FIELD_PREP(OP_LDF_BMASK, bmask) |
+		FIELD_PREP(OP_LDF_SHF, shift) |
+		FIELD_PREP(OP_LDF_WR_AB, wr_both);
+
+	nfp_prog_push(nfp_prog, insn);
+}
+
+static void
+emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift,
+		  u32 dst, u8 bmask, u32 src, bool zero)
+{
+	struct nfp_insn_re_regs reg;
+	int err;
+
+	err = swreg_to_restricted(reg_none(), dst, src, &reg, true);
+	if (err) {
+		nfp_prog->error = err;
+		return;
+	}
+
+	__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
+			reg.i8, zero, reg.swap, reg.wr_both);
+}
+
+static void
+emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src,
+	      enum shf_sc sc, u8 shift)
+{
+	emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false);
+}
+
+/* --- Wrappers --- */
+static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
+{
+	if (!(imm & 0xffff0000)) {
+		*val = imm;
+		*shift = IMMED_SHIFT_0B;
+	} else if (!(imm & 0xff0000ff)) {
+		*val = imm >> 8;
+		*shift = IMMED_SHIFT_1B;
+	} else if (!(imm & 0x0000ffff)) {
+		*val = imm >> 16;
+		*shift = IMMED_SHIFT_2B;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm)
+{
+	enum immed_shift shift;
+	u16 val;
+
+	if (pack_immed(imm, &val, &shift)) {
+		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
+	} else if (pack_immed(~imm, &val, &shift)) {
+		emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
+	} else {
+		emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
+			   false, IMMED_SHIFT_0B);
+		emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
+			   false, IMMED_SHIFT_2B);
+	}
+}
+
+/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
+ * If the @imm is small enough encode it directly in operand and return
+ * otherwise load @imm to a spare register and return its encoding.
+ */
+static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+{
+	if (FIELD_FIT(UR_REG_IMM_MAX, imm))
+		return reg_imm(imm);
+
+	wrp_immed(nfp_prog, tmp_reg, imm);
+	return tmp_reg;
+}
+
+/* re_load_imm_any() - encode immediate or use tmp register (restricted)
+ * If the @imm is small enough encode it directly in operand and return
+ * otherwise load @imm to a spare register and return its encoding.
+ */
+static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+{
+	if (FIELD_FIT(RE_REG_IMM_MAX, imm))
+		return reg_imm(imm);
+
+	wrp_immed(nfp_prog, tmp_reg, imm);
+	return tmp_reg;
+}
+
+static void
+wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
+	       enum br_special special)
+{
+	emit_br(nfp_prog, mask, 0, 0);
+
+	nfp_prog->prog[nfp_prog->prog_len - 1] |=
+		FIELD_PREP(OP_BR_SPECIAL, special);
+}
+
+static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
+{
+	emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src));
+}
+
+static int
+construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset,
+		      u16 src, bool src_valid, u8 size)
+{
+	unsigned int i;
+	u16 shift, sz;
+	u32 tmp_reg;
+
+	/* We load the value from the address indicated in @offset and then
+	 * shift out the data we don't need.  Note: this is big endian!
+	 */
+	sz = size < 4 ? 4 : size;
+	shift = size < 4 ? 4 - size : 0;
+
+	if (src_valid) {
+		/* Calculate the true offset (src_reg + imm) */
+		tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_both(nfp_prog),
+			 reg_a(src), ALU_OP_ADD, tmp_reg);
+		/* Check packet length (size guaranteed to fit b/c it's u8) */
+		emit_alu(nfp_prog, imm_a(nfp_prog),
+			 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
+		emit_alu(nfp_prog, reg_none(),
+			 NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog));
+		wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+		/* Load data */
+		emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+			 pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true);
+	} else {
+		/* Check packet length */
+		tmp_reg = ur_load_imm_any(nfp_prog, offset + size,
+					  imm_a(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg);
+		wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+		/* Load data */
+		tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+		emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+			 pkt_reg(nfp_prog), tmp_reg, sz - 1, true);
+	}
+
+	i = 0;
+	if (shift)
+		emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE,
+			 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
+	else
+		for (; i * 4 < size; i++)
+			emit_alu(nfp_prog, reg_both(i),
+				 reg_none(), ALU_OP_NONE, reg_xfer(i));
+
+	if (i < 2)
+		wrp_immed(nfp_prog, reg_both(1), 0);
+
+	return 0;
+}
+
+static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
+{
+	return construct_data_ind_ld(nfp_prog, offset, 0, false, size);
+}
+
+static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src)
+{
+	emit_alu(nfp_prog, NFP_BPF_ABI_MARK,
+		 reg_none(), ALU_OP_NONE, reg_b(src));
+	emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS,
+		 NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK));
+
+	return 0;
+}
+
+static void
+wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
+{
+	u32 tmp_reg;
+
+	if (alu_op == ALU_OP_AND) {
+		if (!imm)
+			wrp_immed(nfp_prog, reg_both(dst), 0);
+		if (!imm || !~imm)
+			return;
+	}
+	if (alu_op == ALU_OP_OR) {
+		if (!~imm)
+			wrp_immed(nfp_prog, reg_both(dst), ~0U);
+		if (!imm || !~imm)
+			return;
+	}
+	if (alu_op == ALU_OP_XOR) {
+		if (!~imm)
+			emit_alu(nfp_prog, reg_both(dst), reg_none(),
+				 ALU_OP_NEG, reg_b(dst));
+		if (!imm || !~imm)
+			return;
+	}
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
+}
+
+static int
+wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op, bool skip)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	if (skip) {
+		meta->skip = true;
+		return 0;
+	}
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
+
+	return 0;
+}
+
+static int
+wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op)
+{
+	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
+
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
+	emit_alu(nfp_prog, reg_both(dst + 1),
+		 reg_a(dst + 1), alu_op, reg_b(src + 1));
+
+	return 0;
+}
+
+static int
+wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op, bool skip)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (skip) {
+		meta->skip = true;
+		return 0;
+	}
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int
+wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	      enum alu_op alu_op)
+{
+	u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
+
+	emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static void
+wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
+		 enum br_mask br_mask, u16 off)
+{
+	emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
+	emit_br(nfp_prog, br_mask, off, 0);
+}
+
+static int
+wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	     enum alu_op alu_op, enum br_mask br_mask)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
+			 insn->src_reg * 2, br_mask, insn->off);
+	wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
+			 insn->src_reg * 2 + 1, br_mask, insn->off);
+
+	return 0;
+}
+
+static int
+wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	    enum br_mask br_mask, bool swap)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u8 reg = insn->dst_reg * 2;
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+	if (!swap)
+		emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg);
+	else
+		emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg));
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+	if (!swap)
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg);
+	else
+		emit_alu(nfp_prog, reg_none(),
+			 tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1));
+
+	emit_br(nfp_prog, br_mask, insn->off, 0);
+
+	return 0;
+}
+
+static int
+wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+	    enum br_mask br_mask, bool swap)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (swap) {
+		areg ^= breg;
+		breg ^= areg;
+		areg ^= breg;
+	}
+
+	emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
+	emit_br(nfp_prog, br_mask, insn->off, 0);
+
+	return 0;
+}
+
+/* --- Callbacks --- */
+static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1);
+
+	return 0;
+}
+
+static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	u64 imm = meta->insn.imm; /* sign extend */
+
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
+
+	return 0;
+}
+
+static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
+}
+
+static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
+}
+
+static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
+}
+
+static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+}
+
+static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
+}
+
+static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+}
+
+static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
+		 reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
+		 reg_b(insn->src_reg * 2 + 1));
+
+	return 0;
+}
+
+static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
+
+	return 0;
+}
+
+static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
+		 reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
+		 reg_b(insn->src_reg * 2 + 1));
+
+	return 0;
+}
+
+static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
+	wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
+
+	return 0;
+}
+
+static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->imm != 32)
+		return 1; /* TODO */
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0);
+
+	return 0;
+}
+
+static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->imm != 32)
+		return 1; /* TODO */
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_reg_mov(nfp_prog, insn->dst_reg * 2,  insn->src_reg * 2);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
+}
+
+static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm);
+}
+
+static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
+}
+
+static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
+}
+
+static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
+}
+
+static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
+}
+
+static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
+}
+
+static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm);
+}
+
+static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
+}
+
+static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm);
+}
+
+static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (!insn->imm)
+		return 1; /* TODO: zero shift means indirect */
+
+	emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
+		 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
+		 SHF_SC_L_SHF, insn->imm);
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1),
+		  meta->insn.imm);
+
+	return 0;
+}
+
+static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	meta->double_cb = imm_ld8_part2;
+	wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
+
+	return 0;
+}
+
+static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 1);
+}
+
+static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 2);
+}
+
+static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ld(nfp_prog, meta->insn.imm, 4);
+}
+
+static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 1);
+}
+
+static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 2);
+}
+
+static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return construct_data_ind_ld(nfp_prog, meta->insn.imm,
+				     meta->insn.src_reg * 2, true, 4);
+}
+
+static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off == offsetof(struct sk_buff, len))
+		emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+			 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN);
+	else
+		return -ENOTSUPP;
+
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+	return 0;
+}
+
+static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off == offsetof(struct sk_buff, mark))
+		return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
+
+	return -ENOTSUPP;
+}
+
+static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (meta->insn.off < 0) /* TODO */
+		return -ENOTSUPP;
+	emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
+
+	return 0;
+}
+
+static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1);
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (imm & ~0U) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_a(nfp_prog),
+			 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+		or1 = imm_a(nfp_prog);
+	}
+
+	if (imm >> 32) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+		emit_alu(nfp_prog, imm_b(nfp_prog),
+			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
+		or2 = imm_b(nfp_prog);
+	}
+
+	emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
+	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+	return 0;
+}
+
+static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
+}
+
+static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
+}
+
+static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (!imm) {
+		meta->skip = true;
+		return 0;
+	}
+
+	if (imm & ~0U) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	if (imm >> 32) {
+		tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+		emit_alu(nfp_prog, reg_none(),
+			 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	return 0;
+}
+
+static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+	u64 imm = insn->imm; /* sign extend */
+	u32 tmp_reg;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	if (!imm) {
+		emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
+			 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
+		emit_br(nfp_prog, BR_BNE, insn->off, 0);
+	}
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
+	emit_br(nfp_prog, BR_BNE, insn->off, 0);
+
+	tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
+	emit_alu(nfp_prog, reg_none(),
+		 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
+	emit_br(nfp_prog, BR_BNE, insn->off, 0);
+
+	return 0;
+}
+
+static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	const struct bpf_insn *insn = &meta->insn;
+
+	if (insn->off < 0) /* TODO */
+		return -ENOTSUPP;
+
+	emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
+		 ALU_OP_XOR, reg_b(insn->src_reg * 2));
+	emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
+		 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
+	emit_alu(nfp_prog, reg_none(),
+		 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
+	emit_br(nfp_prog, BR_BEQ, insn->off, 0);
+
+	return 0;
+}
+
+static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
+}
+
+static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
+}
+
+static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
+}
+
+static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
+}
+
+static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT);
+
+	return 0;
+}
+
+static const instr_cb_t instr_cb[256] = {
+	[BPF_ALU64 | BPF_MOV | BPF_X] =	mov_reg64,
+	[BPF_ALU64 | BPF_MOV | BPF_K] =	mov_imm64,
+	[BPF_ALU64 | BPF_XOR | BPF_X] =	xor_reg64,
+	[BPF_ALU64 | BPF_XOR | BPF_K] =	xor_imm64,
+	[BPF_ALU64 | BPF_AND | BPF_X] =	and_reg64,
+	[BPF_ALU64 | BPF_AND | BPF_K] =	and_imm64,
+	[BPF_ALU64 | BPF_OR | BPF_X] =	or_reg64,
+	[BPF_ALU64 | BPF_OR | BPF_K] =	or_imm64,
+	[BPF_ALU64 | BPF_ADD | BPF_X] =	add_reg64,
+	[BPF_ALU64 | BPF_ADD | BPF_K] =	add_imm64,
+	[BPF_ALU64 | BPF_SUB | BPF_X] =	sub_reg64,
+	[BPF_ALU64 | BPF_SUB | BPF_K] =	sub_imm64,
+	[BPF_ALU64 | BPF_LSH | BPF_K] =	shl_imm64,
+	[BPF_ALU64 | BPF_RSH | BPF_K] =	shr_imm64,
+	[BPF_ALU | BPF_MOV | BPF_X] =	mov_reg,
+	[BPF_ALU | BPF_MOV | BPF_K] =	mov_imm,
+	[BPF_ALU | BPF_XOR | BPF_X] =	xor_reg,
+	[BPF_ALU | BPF_XOR | BPF_K] =	xor_imm,
+	[BPF_ALU | BPF_AND | BPF_X] =	and_reg,
+	[BPF_ALU | BPF_AND | BPF_K] =	and_imm,
+	[BPF_ALU | BPF_OR | BPF_X] =	or_reg,
+	[BPF_ALU | BPF_OR | BPF_K] =	or_imm,
+	[BPF_ALU | BPF_ADD | BPF_X] =	add_reg,
+	[BPF_ALU | BPF_ADD | BPF_K] =	add_imm,
+	[BPF_ALU | BPF_SUB | BPF_X] =	sub_reg,
+	[BPF_ALU | BPF_SUB | BPF_K] =	sub_imm,
+	[BPF_ALU | BPF_LSH | BPF_K] =	shl_imm,
+	[BPF_LD | BPF_IMM | BPF_DW] =	imm_ld8,
+	[BPF_LD | BPF_ABS | BPF_B] =	data_ld1,
+	[BPF_LD | BPF_ABS | BPF_H] =	data_ld2,
+	[BPF_LD | BPF_ABS | BPF_W] =	data_ld4,
+	[BPF_LD | BPF_IND | BPF_B] =	data_ind_ld1,
+	[BPF_LD | BPF_IND | BPF_H] =	data_ind_ld2,
+	[BPF_LD | BPF_IND | BPF_W] =	data_ind_ld4,
+	[BPF_LDX | BPF_MEM | BPF_W] =	mem_ldx4,
+	[BPF_STX | BPF_MEM | BPF_W] =	mem_stx4,
+	[BPF_JMP | BPF_JA | BPF_K] =	jump,
+	[BPF_JMP | BPF_JEQ | BPF_K] =	jeq_imm,
+	[BPF_JMP | BPF_JGT | BPF_K] =	jgt_imm,
+	[BPF_JMP | BPF_JGE | BPF_K] =	jge_imm,
+	[BPF_JMP | BPF_JSET | BPF_K] =	jset_imm,
+	[BPF_JMP | BPF_JNE | BPF_K] =	jne_imm,
+	[BPF_JMP | BPF_JEQ | BPF_X] =	jeq_reg,
+	[BPF_JMP | BPF_JGT | BPF_X] =	jgt_reg,
+	[BPF_JMP | BPF_JGE | BPF_X] =	jge_reg,
+	[BPF_JMP | BPF_JSET | BPF_X] =	jset_reg,
+	[BPF_JMP | BPF_JNE | BPF_X] =	jne_reg,
+	[BPF_JMP | BPF_EXIT] =		goto_out,
+};
+
+/* --- Misc code --- */
+static void br_set_offset(u64 *instr, u16 offset)
+{
+	u16 addr_lo, addr_hi;
+
+	addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
+	addr_hi = offset != addr_lo;
+	*instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO);
+	*instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
+	*instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo);
+}
+
+/* --- Assembler logic --- */
+static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta, *next;
+	u32 off, br_idx;
+	u32 idx;
+
+	nfp_for_each_insn_walk2(nfp_prog, meta, next) {
+		if (meta->skip)
+			continue;
+		if (BPF_CLASS(meta->insn.code) != BPF_JMP)
+			continue;
+
+		br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1;
+		if (!nfp_is_br(nfp_prog->prog[br_idx])) {
+			pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
+			       br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
+			return -ELOOP;
+		}
+		/* Leave special branches for later */
+		if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]))
+			continue;
+
+		/* Find the target offset in assembler realm */
+		off = meta->insn.off;
+		if (!off) {
+			pr_err("Fixup found zero offset!!\n");
+			return -ELOOP;
+		}
+
+		while (off && nfp_meta_has_next(nfp_prog, next)) {
+			next = nfp_meta_next(next);
+			off--;
+		}
+		if (off) {
+			pr_err("Fixup found too large jump!! %d\n", off);
+			return -ELOOP;
+		}
+
+		if (next->skip) {
+			pr_err("Branch landing on removed instruction!!\n");
+			return -ELOOP;
+		}
+
+		for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off);
+		     idx <= br_idx; idx++) {
+			if (!nfp_is_br(nfp_prog->prog[idx]))
+				continue;
+			br_set_offset(&nfp_prog->prog[idx], next->off);
+		}
+	}
+
+	/* Fixup 'goto out's separately, they can be scattered around */
+	for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) {
+		enum br_special special;
+
+		if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE)
+			continue;
+
+		special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]);
+		switch (special) {
+		case OP_BR_NORMAL:
+			break;
+		case OP_BR_GO_OUT:
+			br_set_offset(&nfp_prog->prog[br_idx],
+				      nfp_prog->tgt_out);
+			break;
+		case OP_BR_GO_ABORT:
+			br_set_offset(&nfp_prog->prog[br_idx],
+				      nfp_prog->tgt_abort);
+			break;
+		}
+
+		nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL;
+	}
+
+	return 0;
+}
+
+static void nfp_intro(struct nfp_prog *nfp_prog)
+{
+	emit_alu(nfp_prog, pkt_reg(nfp_prog),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+}
+
+static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog)
+{
+	const u8 act2code[] = {
+		[NN_ACT_TC_DROP]  = 0x22,
+		[NN_ACT_TC_REDIR] = 0x24
+	};
+	/* Target for aborts */
+	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+	wrp_immed(nfp_prog, reg_both(0), 0);
+
+	/* Target for normal exits */
+	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+	/* Legacy TC mode:
+	 *   0        0x11 -> pass,  count as stat0
+	 *  -1  drop  0x22 -> drop,  count as stat1
+	 *     redir  0x24 -> redir, count as stat1
+	 *  ife mark  0x21 -> pass,  count as stat1
+	 *  ife + tx  0x24 -> redir, count as stat1
+	 */
+	emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2);
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
+
+	emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]),
+		      SHF_SC_L_SHF, 16);
+}
+
+static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
+{
+	/* TC direct-action mode:
+	 *   0,1   ok        NOT SUPPORTED[1]
+	 *   2   drop  0x22 -> drop,  count as stat1
+	 *   4,5 nuke  0x02 -> drop
+	 *   7  redir  0x44 -> redir, count as stat2
+	 *   * unspec  0x11 -> pass,  count as stat0
+	 *
+	 * [1] We can't support OK and RECLASSIFY because we can't tell TC
+	 *     the exact decision made.  We are forced to support UNSPEC
+	 *     to handle aborts so that's the only one we handle for passing
+	 *     packets up the stack.
+	 */
+	/* Target for aborts */
+	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+
+	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
+
+	/* Target for normal exits */
+	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+
+	/* if R0 > 7 jump to abort */
+	emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
+	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+
+	wrp_immed(nfp_prog, reg_b(2), 0x41221211);
+	wrp_immed(nfp_prog, reg_b(3), 0x41001211);
+
+	emit_shf(nfp_prog, reg_a(1),
+		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
+
+	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+	emit_shf(nfp_prog, reg_a(2),
+		 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
+
+	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+	emit_shf(nfp_prog, reg_b(2),
+		 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
+
+	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+	emit_shf(nfp_prog, reg_b(2),
+		 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
+}
+
+static void nfp_outro(struct nfp_prog *nfp_prog)
+{
+	switch (nfp_prog->act) {
+	case NN_ACT_DIRECT:
+		nfp_outro_tc_da(nfp_prog);
+		break;
+	case NN_ACT_TC_DROP:
+	case NN_ACT_TC_REDIR:
+		nfp_outro_tc_legacy(nfp_prog);
+		break;
+	}
+}
+
+static int nfp_translate(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta;
+	int err;
+
+	nfp_intro(nfp_prog);
+	if (nfp_prog->error)
+		return nfp_prog->error;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		instr_cb_t cb = instr_cb[meta->insn.code];
+
+		meta->off = nfp_prog_current_offset(nfp_prog);
+
+		if (meta->skip) {
+			nfp_prog->n_translated++;
+			continue;
+		}
+
+		if (nfp_meta_has_prev(nfp_prog, meta) &&
+		    nfp_meta_prev(meta)->double_cb)
+			cb = nfp_meta_prev(meta)->double_cb;
+		if (!cb)
+			return -ENOENT;
+		err = cb(nfp_prog, meta);
+		if (err)
+			return err;
+
+		nfp_prog->n_translated++;
+	}
+
+	nfp_outro(nfp_prog);
+	if (nfp_prog->error)
+		return nfp_prog->error;
+
+	return nfp_fixup_branches(nfp_prog);
+}
+
+static int
+nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
+		 unsigned int cnt)
+{
+	unsigned int i;
+
+	for (i = 0; i < cnt; i++) {
+		struct nfp_insn_meta *meta;
+
+		meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+		if (!meta)
+			return -ENOMEM;
+
+		meta->insn = prog[i];
+		meta->n = i;
+
+		list_add_tail(&meta->l, &nfp_prog->insns);
+	}
+
+	return 0;
+}
+
+/* --- Optimizations --- */
+static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		struct bpf_insn insn = meta->insn;
+
+		/* Programs converted from cBPF start with register xoring */
+		if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
+		    insn.src_reg == insn.dst_reg)
+			continue;
+
+		/* Programs start with R6 = R1 but we ignore the skb pointer */
+		if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
+		    insn.src_reg == 1 && insn.dst_reg == 6)
+			meta->skip = true;
+
+		/* Return as soon as something doesn't match */
+		if (!meta->skip)
+			return;
+	}
+}
+
+/* Try to rename registers so that program uses only low ones */
+static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog)
+{
+	bool reg_used[MAX_BPF_REG] = {};
+	u8 tgt_reg[MAX_BPF_REG] = {};
+	struct nfp_insn_meta *meta;
+	unsigned int i, j;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		if (meta->skip)
+			continue;
+
+		reg_used[meta->insn.src_reg] = true;
+		reg_used[meta->insn.dst_reg] = true;
+	}
+
+	for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) {
+		if (!reg_used[i])
+			continue;
+
+		tgt_reg[i] = j++;
+	}
+	nfp_prog->num_regs = j;
+
+	list_for_each_entry(meta, &nfp_prog->insns, l) {
+		meta->insn.src_reg = tgt_reg[meta->insn.src_reg];
+		meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg];
+	}
+
+	return 0;
+}
+
+/* Remove masking after load since our load guarantees this is not needed */
+static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta1, *meta2;
+	const s32 exp_mask[] = {
+		[BPF_B] = 0x000000ffU,
+		[BPF_H] = 0x0000ffffU,
+		[BPF_W] = 0xffffffffU,
+	};
+
+	nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
+		struct bpf_insn insn, next;
+
+		insn = meta1->insn;
+		next = meta2->insn;
+
+		if (BPF_CLASS(insn.code) != BPF_LD)
+			continue;
+		if (BPF_MODE(insn.code) != BPF_ABS &&
+		    BPF_MODE(insn.code) != BPF_IND)
+			continue;
+
+		if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
+			continue;
+
+		if (!exp_mask[BPF_SIZE(insn.code)])
+			continue;
+		if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
+			continue;
+
+		if (next.src_reg || next.dst_reg)
+			continue;
+
+		meta2->skip = true;
+	}
+}
+
+static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
+{
+	struct nfp_insn_meta *meta1, *meta2, *meta3;
+
+	nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
+		struct bpf_insn insn, next1, next2;
+
+		insn = meta1->insn;
+		next1 = meta2->insn;
+		next2 = meta3->insn;
+
+		if (BPF_CLASS(insn.code) != BPF_LD)
+			continue;
+		if (BPF_MODE(insn.code) != BPF_ABS &&
+		    BPF_MODE(insn.code) != BPF_IND)
+			continue;
+		if (BPF_SIZE(insn.code) != BPF_W)
+			continue;
+
+		if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
+		      next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
+		    !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
+		      next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
+			continue;
+
+		if (next1.src_reg || next1.dst_reg ||
+		    next2.src_reg || next2.dst_reg)
+			continue;
+
+		if (next1.imm != 0x20 || next2.imm != 0x20)
+			continue;
+
+		meta2->skip = true;
+		meta3->skip = true;
+	}
+}
+
+static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
+{
+	int ret;
+
+	nfp_bpf_opt_reg_init(nfp_prog);
+
+	ret = nfp_bpf_opt_reg_rename(nfp_prog);
+	if (ret)
+		return ret;
+
+	nfp_bpf_opt_ld_mask(nfp_prog);
+	nfp_bpf_opt_ld_shift(nfp_prog);
+
+	return 0;
+}
+
+/**
+ * nfp_bpf_jit() - translate BPF code into NFP assembly
+ * @filter:	kernel BPF filter struct
+ * @prog_mem:	memory to store assembler instructions
+ * @act:	action attached to this eBPF program
+ * @prog_start:	offset of the first instruction when loaded
+ * @prog_done:	where to jump on exit
+ * @prog_sz:	size of @prog_mem in instructions
+ * @res:	achieved parameters of translation results
+ */
+int
+nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem,
+	    enum nfp_bpf_action_type act,
+	    unsigned int prog_start, unsigned int prog_done,
+	    unsigned int prog_sz, struct nfp_bpf_result *res)
+{
+	struct nfp_prog *nfp_prog;
+	int ret;
+
+	nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL);
+	if (!nfp_prog)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&nfp_prog->insns);
+	nfp_prog->act = act;
+	nfp_prog->start_off = prog_start;
+	nfp_prog->tgt_done = prog_done;
+
+	ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len);
+	if (ret)
+		goto out;
+
+	ret = nfp_prog_verify(nfp_prog, filter);
+	if (ret)
+		goto out;
+
+	ret = nfp_bpf_optimize(nfp_prog);
+	if (ret)
+		goto out;
+
+	if (nfp_prog->num_regs <= 7)
+		nfp_prog->regs_per_thread = 16;
+	else
+		nfp_prog->regs_per_thread = 32;
+
+	nfp_prog->prog = prog_mem;
+	nfp_prog->__prog_alloc_len = prog_sz;
+
+	ret = nfp_translate(nfp_prog);
+	if (ret) {
+		pr_err("Translation failed with error %d (translated: %u)\n",
+		       ret, nfp_prog->n_translated);
+		ret = -EINVAL;
+	}
+
+	res->n_instr = nfp_prog->prog_len;
+	res->dense_mode = nfp_prog->num_regs <= 7;
+out:
+	nfp_prog_free(nfp_prog);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
new file mode 100644
index 0000000..144cae8
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt)	"NFP net bpf: " fmt
+
+#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
+#include <linux/kernel.h>
+#include <linux/pkt_cls.h>
+
+#include "nfp_bpf.h"
+
+/* Analyzer/verifier definitions */
+struct nfp_bpf_analyzer_priv {
+	struct nfp_prog *prog;
+	struct nfp_insn_meta *meta;
+};
+
+static struct nfp_insn_meta *
+nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+		  unsigned int insn_idx, unsigned int n_insns)
+{
+	unsigned int forward, backward, i;
+
+	backward = meta->n - insn_idx;
+	forward = insn_idx - meta->n;
+
+	if (min(forward, backward) > n_insns - insn_idx - 1) {
+		backward = n_insns - insn_idx - 1;
+		meta = nfp_prog_last_meta(nfp_prog);
+	}
+	if (min(forward, backward) > insn_idx && backward > insn_idx) {
+		forward = insn_idx;
+		meta = nfp_prog_first_meta(nfp_prog);
+	}
+
+	if (forward < backward)
+		for (i = 0; i < forward; i++)
+			meta = nfp_meta_next(meta);
+	else
+		for (i = 0; i < backward; i++)
+			meta = nfp_meta_prev(meta);
+
+	return meta;
+}
+
+static int
+nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
+		   const struct bpf_verifier_env *env)
+{
+	const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
+
+	if (reg0->type != CONST_IMM) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	if (nfp_prog->act != NN_ACT_DIRECT &&
+	    reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT &&
+	    reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN &&
+	    reg0->imm != TC_ACT_QUEUED) {
+		pr_info("unsupported exit state: %d, imm: %llx\n",
+			reg0->type, reg0->imm);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
+		      const struct bpf_verifier_env *env, u8 reg)
+{
+	if (env->cur_state.regs[reg].type != PTR_TO_CTX)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
+{
+	struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv;
+	struct nfp_insn_meta *meta = priv->meta;
+
+	meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len);
+	priv->meta = meta;
+
+	if (meta->insn.src_reg == BPF_REG_10 ||
+	    meta->insn.dst_reg == BPF_REG_10) {
+		pr_err("stack not yet supported\n");
+		return -EINVAL;
+	}
+	if (meta->insn.src_reg >= MAX_BPF_REG ||
+	    meta->insn.dst_reg >= MAX_BPF_REG) {
+		pr_err("program uses extended registers - jit hardening?\n");
+		return -EINVAL;
+	}
+
+	if (meta->insn.code == (BPF_JMP | BPF_EXIT))
+		return nfp_bpf_check_exit(priv->prog, env);
+
+	if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
+		return nfp_bpf_check_ctx_ptr(priv->prog, env,
+					     meta->insn.src_reg);
+	if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
+		return nfp_bpf_check_ctx_ptr(priv->prog, env,
+					     meta->insn.dst_reg);
+
+	return 0;
+}
+
+static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = {
+	.insn_hook = nfp_verify_insn,
+};
+
+int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog)
+{
+	struct nfp_bpf_analyzer_priv *priv;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->prog = nfp_prog;
+	priv->meta = nfp_prog_first_meta(nfp_prog);
+
+	ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv);
+
+	kfree(priv);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 6906356..ed824e1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -62,6 +62,9 @@
 /* Max time to wait for NFP to respond on updates (in seconds) */
 #define NFP_NET_POLL_TIMEOUT	5
 
+/* Interval for reading offloaded filter stats */
+#define NFP_NET_STAT_POLL_IVL	msecs_to_jiffies(100)
+
 /* Bar allocation */
 #define NFP_NET_CTRL_BAR	0
 #define NFP_NET_Q0_BAR		2
@@ -220,7 +223,7 @@
 #define PCIE_DESC_RX_I_TCP_CSUM_OK	cpu_to_le16(BIT(11))
 #define PCIE_DESC_RX_I_UDP_CSUM		cpu_to_le16(BIT(10))
 #define PCIE_DESC_RX_I_UDP_CSUM_OK	cpu_to_le16(BIT(9))
-#define PCIE_DESC_RX_SPARE		cpu_to_le16(BIT(8))
+#define PCIE_DESC_RX_BPF		cpu_to_le16(BIT(8))
 #define PCIE_DESC_RX_EOP		cpu_to_le16(BIT(7))
 #define PCIE_DESC_RX_IP4_CSUM		cpu_to_le16(BIT(6))
 #define PCIE_DESC_RX_IP4_CSUM_OK	cpu_to_le16(BIT(5))
@@ -266,6 +269,8 @@
 	};
 };
 
+#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0)
+
 struct nfp_net_rx_hash {
 	__be32 hash_type;
 	__be32 hash;
@@ -405,6 +410,11 @@
 	       fw_ver->minor == minor;
 }
 
+struct nfp_stat_pair {
+	u64 pkts;
+	u64 bytes;
+};
+
 /**
  * struct nfp_net - NFP network device structure
  * @pdev:               Backpointer to PCI device
@@ -413,6 +423,7 @@
  * @is_vf:              Is the driver attached to a VF?
  * @is_nfp3200:         Is the driver for a NFP-3200 card?
  * @fw_loaded:          Is the firmware loaded?
+ * @bpf_offload_skip_sw:  Offloaded BPF program will not be rerun by cls_bpf
  * @ctrl:               Local copy of the control register/word.
  * @fl_bufsz:           Currently configured size of the freelist buffers
  * @rx_offset:		Offset in the RX buffers where packet data starts
@@ -427,6 +438,11 @@
  * @rss_cfg:            RSS configuration
  * @rss_key:            RSS secret key
  * @rss_itbl:           RSS indirection table
+ * @rx_filter:		Filter offload statistics - dropped packets/bytes
+ * @rx_filter_prev:	Filter offload statistics - values from previous update
+ * @rx_filter_change:	Jiffies when statistics last changed
+ * @rx_filter_stats_timer:  Timer for polling filter offload statistics
+ * @rx_filter_lock:	Lock protecting timer state changes (teardown)
  * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
  * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
  * @num_tx_rings:       Currently configured number of TX rings
@@ -473,6 +489,7 @@
 	unsigned is_vf:1;
 	unsigned is_nfp3200:1;
 	unsigned fw_loaded:1;
+	unsigned bpf_offload_skip_sw:1;
 
 	u32 ctrl;
 	u32 fl_bufsz;
@@ -502,6 +519,11 @@
 	u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ];
 	u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ];
 
+	struct nfp_stat_pair rx_filter, rx_filter_prev;
+	unsigned long rx_filter_change;
+	struct timer_list rx_filter_stats_timer;
+	spinlock_t rx_filter_lock;
+
 	int max_tx_rings;
 	int max_rx_rings;
 
@@ -561,12 +583,28 @@
 /* Functions to read/write from/to a BAR
  * Performs any endian conversion necessary.
  */
+static inline u16 nn_readb(struct nfp_net *nn, int off)
+{
+	return readb(nn->ctrl_bar + off);
+}
+
 static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
 {
 	writeb(val, nn->ctrl_bar + off);
 }
 
-/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */
+/* NFP-3200 can't handle 16-bit accesses too well */
+static inline u16 nn_readw(struct nfp_net *nn, int off)
+{
+	WARN_ON_ONCE(nn->is_nfp3200);
+	return readw(nn->ctrl_bar + off);
+}
+
+static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
+{
+	WARN_ON_ONCE(nn->is_nfp3200);
+	writew(val, nn->ctrl_bar + off);
+}
 
 static inline u32 nn_readl(struct nfp_net *nn, int off)
 {
@@ -757,4 +795,9 @@
 }
 #endif /* CONFIG_NFP_NET_DEBUG */
 
+void nfp_net_filter_stats_timer(unsigned long data);
+int
+nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
+		    struct tc_cls_bpf_offload *cls_bpf);
+
 #endif /* _NFP_NET_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 88678c1..aee3fd2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -41,7 +41,6 @@
  *          Chris Telfer <chris.telfer@netronome.com>
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -61,6 +60,7 @@
 
 #include <linux/ktime.h>
 
+#include <net/pkt_cls.h>
 #include <net/vxlan.h>
 
 #include "nfp_net_ctrl.h"
@@ -1293,36 +1293,70 @@
 	}
 }
 
-/**
- * nfp_net_set_hash() - Set SKB hash data
- * @netdev: adapter's net_device structure
- * @skb:   SKB to set the hash data on
- * @rxd:   RX descriptor
- *
- * The RSS hash and hash-type are pre-pended to the packet data.
- * Extract and decode it and set the skb fields.
- */
 static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
-			     struct nfp_net_rx_desc *rxd)
+			     unsigned int type, __be32 *hash)
+{
+	if (!(netdev->features & NETIF_F_RXHASH))
+		return;
+
+	switch (type) {
+	case NFP_NET_RSS_IPV4:
+	case NFP_NET_RSS_IPV6:
+	case NFP_NET_RSS_IPV6_EX:
+		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
+		break;
+	default:
+		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
+		break;
+	}
+}
+
+static void
+nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
+		      struct nfp_net_rx_desc *rxd)
 {
 	struct nfp_net_rx_hash *rx_hash;
 
-	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
-	    !(netdev->features & NETIF_F_RXHASH))
+	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
 		return;
 
 	rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
 
-	switch (be32_to_cpu(rx_hash->hash_type)) {
-	case NFP_NET_RSS_IPV4:
-	case NFP_NET_RSS_IPV6:
-	case NFP_NET_RSS_IPV6_EX:
-		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
-		break;
-	default:
-		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
-		break;
+	nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
+			 &rx_hash->hash);
+}
+
+static void *
+nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
+		   int meta_len)
+{
+	u8 *data = skb->data - meta_len;
+	u32 meta_info;
+
+	meta_info = get_unaligned_be32(data);
+	data += 4;
+
+	while (meta_info) {
+		switch (meta_info & NFP_NET_META_FIELD_MASK) {
+		case NFP_NET_META_HASH:
+			meta_info >>= NFP_NET_META_FIELD_SIZE;
+			nfp_net_set_hash(netdev, skb,
+					 meta_info & NFP_NET_META_FIELD_MASK,
+					 (__be32 *)data);
+			data += 4;
+			break;
+		case NFP_NET_META_MARK:
+			skb->mark = get_unaligned_be32(data);
+			data += 4;
+			break;
+		default:
+			return NULL;
+		}
+
+		meta_info >>= NFP_NET_META_FIELD_SIZE;
 	}
+
+	return data;
 }
 
 /**
@@ -1439,18 +1473,29 @@
 			skb_reserve(skb, nn->rx_offset);
 		skb_put(skb, data_len - meta_len);
 
-		nfp_net_set_hash(nn->netdev, skb, rxd);
-
-		/* Pad small frames to minimum */
-		if (skb_put_padto(skb, 60))
-			break;
-
 		/* Stats update */
 		u64_stats_update_begin(&r_vec->rx_sync);
 		r_vec->rx_pkts++;
 		r_vec->rx_bytes += skb->len;
 		u64_stats_update_end(&r_vec->rx_sync);
 
+		if (nn->fw_ver.major <= 3) {
+			nfp_net_set_hash_desc(nn->netdev, skb, rxd);
+		} else if (meta_len) {
+			void *end;
+
+			end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
+			if (unlikely(end != skb->data)) {
+				u64_stats_update_begin(&r_vec->rx_sync);
+				r_vec->rx_drops++;
+				u64_stats_update_end(&r_vec->rx_sync);
+
+				dev_kfree_skb_any(skb);
+				nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
+				continue;
+			}
+		}
+
 		skb_record_rx_queue(skb, rx_ring->idx);
 		skb->protocol = eth_type_trans(skb, nn->netdev);
 
@@ -2049,12 +2094,16 @@
 
 	nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
 			       GFP_KERNEL);
-	if (!nn->rx_rings)
+	if (!nn->rx_rings) {
+		err = -ENOMEM;
 		goto err_free_lsc;
+	}
 	nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings),
 			       GFP_KERNEL);
-	if (!nn->tx_rings)
+	if (!nn->tx_rings) {
+		err = -ENOMEM;
 		goto err_free_rx_rings;
+	}
 
 	for (r = 0; r < nn->num_r_vecs; r++) {
 		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
@@ -2387,6 +2436,31 @@
 	return stats;
 }
 
+static bool nfp_net_ebpf_capable(struct nfp_net *nn)
+{
+	if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
+	    nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
+		return true;
+	return false;
+}
+
+static int
+nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+		 struct tc_to_netdev *tc)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+		return -ENOTSUPP;
+	if (proto != htons(ETH_P_ALL))
+		return -ENOTSUPP;
+
+	if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
+		return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
+
+	return -EINVAL;
+}
+
 static int nfp_net_set_features(struct net_device *netdev,
 				netdev_features_t features)
 {
@@ -2441,6 +2515,11 @@
 			new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
 	}
 
+	if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+		nn_err(nn, "Cannot disable HW TC offload while in use\n");
+		return -EBUSY;
+	}
+
 	nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
 	       netdev->features, features, changed);
 
@@ -2590,6 +2669,7 @@
 	.ndo_stop		= nfp_net_netdev_close,
 	.ndo_start_xmit		= nfp_net_tx,
 	.ndo_get_stats64	= nfp_net_stat64,
+	.ndo_setup_tc		= nfp_net_setup_tc,
 	.ndo_tx_timeout		= nfp_net_tx_timeout,
 	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
 	.ndo_change_mtu		= nfp_net_change_mtu,
@@ -2615,7 +2695,7 @@
 		nn->fw_ver.resv, nn->fw_ver.class,
 		nn->fw_ver.major, nn->fw_ver.minor,
 		nn->max_mtu);
-	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
 		nn->cap,
 		nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
 		nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
@@ -2632,7 +2712,8 @@
 		nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
 		nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
 		nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
-		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "");
+		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "",
+		nfp_net_ebpf_capable(nn)            ? "BPF "	  : "");
 }
 
 /**
@@ -2675,10 +2756,13 @@
 	nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
 
 	spin_lock_init(&nn->reconfig_lock);
+	spin_lock_init(&nn->rx_filter_lock);
 	spin_lock_init(&nn->link_status_lock);
 
 	setup_timer(&nn->reconfig_timer,
 		    nfp_net_reconfig_timer, (unsigned long)nn);
+	setup_timer(&nn->rx_filter_stats_timer,
+		    nfp_net_filter_stats_timer, (unsigned long)nn);
 
 	return nn;
 }
@@ -2800,6 +2884,9 @@
 
 	netdev->features = netdev->hw_features;
 
+	if (nfp_net_ebpf_capable(nn))
+		netdev->hw_features |= NETIF_F_HW_TC;
+
 	/* Advertise but disable TSO by default. */
 	netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index ad6c4e3..93b10b4 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -66,6 +66,13 @@
 #define NFP_NET_LSO_MAX_HDR_SZ		255
 
 /**
+ * Prepend field types
+ */
+#define NFP_NET_META_FIELD_SIZE		4
+#define NFP_NET_META_HASH		1 /* next field carries hash type */
+#define NFP_NET_META_MARK		2
+
+/**
  * Hash type pre-pended when a RSS hash was computed
  */
 #define NFP_NET_RSS_NONE                0
@@ -123,6 +130,7 @@
 #define   NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */
 #define   NFP_NET_CFG_CTRL_VXLAN	  (0x1 << 24) /* VXLAN tunnel support */
 #define   NFP_NET_CFG_CTRL_NVGRE	  (0x1 << 25) /* NVGRE tunnel support */
+#define   NFP_NET_CFG_CTRL_BPF		  (0x1 << 27) /* BPF offload capable */
 #define NFP_NET_CFG_UPDATE              0x0004
 #define   NFP_NET_CFG_UPDATE_GEN          (0x1 <<  0) /* General update */
 #define   NFP_NET_CFG_UPDATE_RING         (0x1 <<  1) /* Ring config change */
@@ -134,6 +142,7 @@
 #define   NFP_NET_CFG_UPDATE_RESET        (0x1 <<  7) /* Update due to FLR */
 #define   NFP_NET_CFG_UPDATE_IRQMOD       (0x1 <<  8) /* IRQ mod change */
 #define   NFP_NET_CFG_UPDATE_VXLAN	  (0x1 <<  9) /* VXLAN port change */
+#define   NFP_NET_CFG_UPDATE_BPF	  (0x1 << 10) /* BPF program load */
 #define   NFP_NET_CFG_UPDATE_ERR          (0x1 << 31) /* A error occurred */
 #define NFP_NET_CFG_TXRS_ENABLE         0x0008
 #define NFP_NET_CFG_RXRS_ENABLE         0x0010
@@ -196,10 +205,37 @@
 #define NFP_NET_CFG_VXLAN_SZ		  0x0008
 
 /**
- * 64B reserved for future use (0x0080 - 0x00c0)
+ * NFP6000 - BPF section
+ * @NFP_NET_CFG_BPF_ABI:	BPF ABI version
+ * @NFP_NET_CFG_BPF_CAP:	BPF capabilities
+ * @NFP_NET_CFG_BPF_MAX_LEN:	Maximum size of JITed BPF code in bytes
+ * @NFP_NET_CFG_BPF_START:	Offset at which BPF will be loaded
+ * @NFP_NET_CFG_BPF_DONE:	Offset to jump to on exit
+ * @NFP_NET_CFG_BPF_STACK_SZ:	Total size of stack area in 64B chunks
+ * @NFP_NET_CFG_BPF_INL_MTU:	Packet data split offset in 64B chunks
+ * @NFP_NET_CFG_BPF_SIZE:	Size of the JITed BPF code in instructions
+ * @NFP_NET_CFG_BPF_ADDR:	DMA address of the buffer with JITed BPF code
  */
-#define NFP_NET_CFG_RESERVED            0x0080
-#define NFP_NET_CFG_RESERVED_SZ         0x0040
+#define NFP_NET_CFG_BPF_ABI		0x0080
+#define   NFP_NET_BPF_ABI		1
+#define NFP_NET_CFG_BPF_CAP		0x0081
+#define   NFP_NET_BPF_CAP_RELO		(1 << 0) /* seamless reload */
+#define NFP_NET_CFG_BPF_MAX_LEN		0x0082
+#define NFP_NET_CFG_BPF_START		0x0084
+#define NFP_NET_CFG_BPF_DONE		0x0086
+#define NFP_NET_CFG_BPF_STACK_SZ	0x0088
+#define NFP_NET_CFG_BPF_INL_MTU		0x0089
+#define NFP_NET_CFG_BPF_SIZE		0x008e
+#define NFP_NET_CFG_BPF_ADDR		0x0090
+#define   NFP_NET_CFG_BPF_CFG_8CTX	(1 << 0) /* 8ctx mode */
+#define   NFP_NET_CFG_BPF_CFG_MASK	7ULL
+#define   NFP_NET_CFG_BPF_ADDR_MASK	(~NFP_NET_CFG_BPF_CFG_MASK)
+
+/**
+ * 40B reserved for future use (0x0098 - 0x00c0)
+ */
+#define NFP_NET_CFG_RESERVED            0x0098
+#define NFP_NET_CFG_RESERVED_SZ         0x0028
 
 /**
  * RSS configuration (0x0100 - 0x01ac):
@@ -303,6 +339,15 @@
 #define NFP_NET_CFG_STATS_TX_MC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x80)
 #define NFP_NET_CFG_STATS_TX_BC_FRAMES  (NFP_NET_CFG_STATS_BASE + 0x88)
 
+#define NFP_NET_CFG_STATS_APP0_FRAMES	(NFP_NET_CFG_STATS_BASE + 0x90)
+#define NFP_NET_CFG_STATS_APP0_BYTES	(NFP_NET_CFG_STATS_BASE + 0x98)
+#define NFP_NET_CFG_STATS_APP1_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xa0)
+#define NFP_NET_CFG_STATS_APP1_BYTES	(NFP_NET_CFG_STATS_BASE + 0xa8)
+#define NFP_NET_CFG_STATS_APP2_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xb0)
+#define NFP_NET_CFG_STATS_APP2_BYTES	(NFP_NET_CFG_STATS_BASE + 0xb8)
+#define NFP_NET_CFG_STATS_APP3_FRAMES	(NFP_NET_CFG_STATS_BASE + 0xc0)
+#define NFP_NET_CFG_STATS_APP3_BYTES	(NFP_NET_CFG_STATS_BASE + 0xc8)
+
 /**
  * Per ring stats (0x1000 - 0x1800)
  * options, 64bit per entry
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 7d7933d0..3418f22 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -40,7 +40,6 @@
  *          Brad Petrus <brad.petrus@netronome.com>
  */
 
-#include <linux/version.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
@@ -107,6 +106,18 @@
 	{"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)},
 	{"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)},
 	{"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)},
+
+	{"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)},
+	{"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)},
+	/* see comments in outro functions in nfp_bpf_jit.c to find out
+	 * how different BPF modes use app-specific counters
+	 */
+	{"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)},
+	{"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)},
+	{"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)},
+	{"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)},
+	{"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)},
+	{"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)},
 };
 
 #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
new file mode 100644
index 0000000..8acfb63
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2016 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * nfp_net_offload.c
+ * Netronome network device driver: TC offload functions for PF and VF
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_mirred.h>
+
+#include "nfp_bpf.h"
+#include "nfp_net_ctrl.h"
+#include "nfp_net.h"
+
+void nfp_net_filter_stats_timer(unsigned long data)
+{
+	struct nfp_net *nn = (void *)data;
+	struct nfp_stat_pair latest;
+
+	spin_lock_bh(&nn->rx_filter_lock);
+
+	if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+		mod_timer(&nn->rx_filter_stats_timer,
+			  jiffies + NFP_NET_STAT_POLL_IVL);
+
+	spin_unlock_bh(&nn->rx_filter_lock);
+
+	latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
+	latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+
+	if (latest.pkts != nn->rx_filter.pkts)
+		nn->rx_filter_change = jiffies;
+
+	nn->rx_filter = latest;
+}
+
+static void nfp_net_bpf_stats_reset(struct nfp_net *nn)
+{
+	nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES);
+	nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES);
+	nn->rx_filter_prev = nn->rx_filter;
+	nn->rx_filter_change = jiffies;
+}
+
+static int
+nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+{
+	struct tc_action *a;
+	LIST_HEAD(actions);
+	u64 bytes, pkts;
+
+	pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts;
+	bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes;
+	bytes -= pkts * ETH_HLEN;
+
+	nn->rx_filter_prev = nn->rx_filter;
+
+	preempt_disable();
+
+	tcf_exts_to_list(cls_bpf->exts, &actions);
+	list_for_each_entry(a, &actions, list)
+		tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change);
+
+	preempt_enable();
+
+	return 0;
+}
+
+static int
+nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
+{
+	const struct tc_action *a;
+	LIST_HEAD(actions);
+
+	/* TC direct action */
+	if (cls_bpf->exts_integrated) {
+		if (tc_no_actions(cls_bpf->exts))
+			return NN_ACT_DIRECT;
+
+		return -ENOTSUPP;
+	}
+
+	/* TC legacy mode */
+	if (!tc_single_action(cls_bpf->exts))
+		return -ENOTSUPP;
+
+	tcf_exts_to_list(cls_bpf->exts, &actions);
+	list_for_each_entry(a, &actions, list) {
+		if (is_tcf_gact_shot(a))
+			return NN_ACT_TC_DROP;
+
+		if (is_tcf_mirred_redirect(a) &&
+		    tcf_mirred_ifindex(a) == nn->netdev->ifindex)
+			return NN_ACT_TC_REDIR;
+	}
+
+	return -ENOTSUPP;
+}
+
+static int
+nfp_net_bpf_offload_prepare(struct nfp_net *nn,
+			    struct tc_cls_bpf_offload *cls_bpf,
+			    struct nfp_bpf_result *res,
+			    void **code, dma_addr_t *dma_addr, u16 max_instr)
+{
+	unsigned int code_sz = max_instr * sizeof(u64);
+	enum nfp_bpf_action_type act;
+	u16 start_off, done_off;
+	unsigned int max_mtu;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_BPF_SYSCALL))
+		return -ENOTSUPP;
+
+	ret = nfp_net_bpf_get_act(nn, cls_bpf);
+	if (ret < 0)
+		return ret;
+	act = ret;
+
+	max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32;
+	if (max_mtu < nn->netdev->mtu) {
+		nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n");
+		return -ENOTSUPP;
+	}
+
+	start_off = nn_readw(nn, NFP_NET_CFG_BPF_START);
+	done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE);
+
+	*code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr,
+				    GFP_KERNEL);
+	if (!*code)
+		return -ENOMEM;
+
+	ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off,
+			  max_instr, res);
+	if (ret)
+		goto out;
+
+	return 0;
+
+out:
+	dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr);
+	return ret;
+}
+
+static void
+nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags,
+			   void *code, dma_addr_t dma_addr,
+			   unsigned int code_sz, unsigned int n_instr,
+			   bool dense_mode)
+{
+	u64 bpf_addr = dma_addr;
+	int err;
+
+	nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW);
+
+	if (dense_mode)
+		bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX;
+
+	nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr);
+	nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr);
+
+	/* Load up the JITed code */
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF);
+	if (err)
+		nn_err(nn, "FW command error while loading BPF: %d\n", err);
+
+	/* Enable passing packets through BPF function */
+	nn->ctrl |= NFP_NET_CFG_CTRL_BPF;
+	nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+	err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+	if (err)
+		nn_err(nn, "FW command error while enabling BPF: %d\n", err);
+
+	dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr);
+
+	nfp_net_bpf_stats_reset(nn);
+	mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL);
+}
+
+static int nfp_net_bpf_stop(struct nfp_net *nn)
+{
+	if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF))
+		return 0;
+
+	spin_lock_bh(&nn->rx_filter_lock);
+	nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF;
+	spin_unlock_bh(&nn->rx_filter_lock);
+	nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl);
+
+	del_timer_sync(&nn->rx_filter_stats_timer);
+	nn->bpf_offload_skip_sw = 0;
+
+	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
+}
+
+int
+nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
+		    struct tc_cls_bpf_offload *cls_bpf)
+{
+	struct nfp_bpf_result res;
+	dma_addr_t dma_addr;
+	u16 max_instr;
+	void *code;
+	int err;
+
+	max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN);
+
+	switch (cls_bpf->command) {
+	case TC_CLSBPF_REPLACE:
+		/* There is nothing stopping us from implementing seamless
+		 * replace but the simple method of loading I adopted in
+		 * the firmware does not handle atomic replace (i.e. we have to
+		 * stop the BPF offload and re-enable it).  Leaking-in a few
+		 * frames which didn't have BPF applied in the hardware should
+		 * be fine if software fallback is available, though.
+		 */
+		if (nn->bpf_offload_skip_sw)
+			return -EBUSY;
+
+		err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
+						  &dma_addr, max_instr);
+		if (err)
+			return err;
+
+		nfp_net_bpf_stop(nn);
+		nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
+					   dma_addr, max_instr * sizeof(u64),
+					   res.n_instr, res.dense_mode);
+		return 0;
+
+	case TC_CLSBPF_ADD:
+		if (nn->ctrl & NFP_NET_CFG_CTRL_BPF)
+			return -EBUSY;
+
+		err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code,
+						  &dma_addr, max_instr);
+		if (err)
+			return err;
+
+		nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code,
+					   dma_addr, max_instr * sizeof(u64),
+					   res.n_instr, res.dense_mode);
+		return 0;
+
+	case TC_CLSBPF_DESTROY:
+		return nfp_net_bpf_stop(nn);
+
+	case TC_CLSBPF_STATS:
+		return nfp_net_bpf_stats_update(nn, cls_bpf);
+
+	default:
+		return -ENOTSUPP;
+	}
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index 37abef0..2800bbf 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -38,7 +38,6 @@
  *         Rolf Neugebauer <rolf.neugebauer@netronome.com>
  */
 
-#include <linux/version.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -134,7 +133,7 @@
 	}
 
 	nfp_net_get_fw_version(&fw_ver, ctrl_bar);
-	if (fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) {
+	if (fw_ver.resv || fw_ver.class != NFP_NET_CFG_VERSION_CLASS_GENERIC) {
 		dev_err(&pdev->dev, "Unknown Firmware ABI %d.%d.%d.%d\n",
 			fw_ver.resv, fw_ver.class, fw_ver.major, fw_ver.minor);
 		err = -EINVAL;
@@ -142,16 +141,14 @@
 	}
 
 	/* Determine stride */
-	if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 0) ||
-	    nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1) ||
-	    nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0x12, 0x48)) {
+	if (nfp_net_fw_ver_eq(&fw_ver, 0, 0, 0, 1)) {
 		stride = 2;
 		tx_bar_no = NFP_NET_Q0_BAR;
 		rx_bar_no = NFP_NET_Q1_BAR;
 		dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n");
 	} else {
 		switch (fw_ver.major) {
-		case 1 ... 3:
+		case 1 ... 4:
 			if (is_nfp3200) {
 				stride = 2;
 				tx_bar_no = NFP_NET_Q0_BAR;
diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c
index 87b7b81..712d8bc 100644
--- a/drivers/net/ethernet/nuvoton/w90p910_ether.c
+++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c
@@ -751,7 +751,7 @@
 				dev_err(&pdev->dev, "rx crc err\n");
 				ether->stats.rx_crc_errors++;
 			} else if (status & RXDS_ALIE) {
-				dev_err(&pdev->dev, "rx aligment err\n");
+				dev_err(&pdev->dev, "rx alignment err\n");
 				ether->stats.rx_frame_errors++;
 			} else if (status & RXDS_PTLE) {
 				dev_err(&pdev->dev, "rx longer err\n");
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 4d4ecba..8e13ec8 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -475,14 +475,6 @@
 	mac[5] = tmp >> 8;
 }
 
-static void __lpc_eth_clock_enable(struct netdata_local *pldat, bool enable)
-{
-	if (enable)
-		clk_prepare_enable(pldat->clk);
-	else
-		clk_disable_unprepare(pldat->clk);
-}
-
 static void __lpc_params_setup(struct netdata_local *pldat)
 {
 	u32 tmp;
@@ -1056,7 +1048,7 @@
 	writel(0, LPC_ENET_MAC2(pldat->net_base));
 	spin_unlock_irqrestore(&pldat->lock, flags);
 
-	__lpc_eth_clock_enable(pldat, false);
+	clk_disable_unprepare(pldat->clk);
 
 	return 0;
 }
@@ -1197,11 +1189,14 @@
 static int lpc_eth_open(struct net_device *ndev)
 {
 	struct netdata_local *pldat = netdev_priv(ndev);
+	int ret;
 
 	if (netif_msg_ifup(pldat))
 		dev_dbg(&pldat->pdev->dev, "enabling %s\n", ndev->name);
 
-	__lpc_eth_clock_enable(pldat, true);
+	ret = clk_prepare_enable(pldat->clk);
+	if (ret)
+		return ret;
 
 	/* Suspended PHY makes LPC ethernet core block, so resume now */
 	phy_resume(ndev->phydev);
@@ -1320,7 +1315,9 @@
 	}
 
 	/* Enable network clock */
-	__lpc_eth_clock_enable(pldat, true);
+	ret = clk_prepare_enable(pldat->clk);
+	if (ret)
+		goto err_out_clk_put;
 
 	/* Map IO space */
 	pldat->net_base = ioremap(res->start, resource_size(res));
@@ -1454,6 +1451,7 @@
 	iounmap(pldat->net_base);
 err_out_disable_clocks:
 	clk_disable_unprepare(pldat->clk);
+err_out_clk_put:
 	clk_put(pldat->clk);
 err_out_free_dev:
 	free_netdev(ndev);
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 6ba4840..0df1391f9 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -88,6 +88,9 @@
 	---help---
 	  This enables the support for ...
 
+config QED_LL2
+	bool
+
 config QED_SRIOV
 	bool "QLogic QED 25/40/100Gb SR-IOV support"
 	depends on QED && PCI_IOV
@@ -104,4 +107,15 @@
 	---help---
 	  This enables the support for ...
 
+config INFINIBAND_QEDR
+	tristate "QLogic qede RoCE sources [debug]"
+	depends on QEDE && 64BIT
+	select QED_LL2
+	default n
+	---help---
+	  This provides a temporary node that allows the compilation
+	  and logical testing of the InfiniBand over Ethernet support
+	  for QLogic QED. This would be replaced by the 'real' option
+	  once the QEDR driver is added [+relocated].
+
 endif # NET_VENDOR_QLOGIC
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index d1f157e..cda0af7 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -2,5 +2,7 @@
 
 qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
 	 qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \
-	 qed_selftest.o qed_dcbx.o
+	 qed_selftest.o qed_dcbx.o qed_debug.o
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
+qed-$(CONFIG_QED_LL2) += qed_ll2.o
+qed-$(CONFIG_INFINIBAND_QEDR) += qed_roce.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 35e5377..653bb57 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -23,10 +23,11 @@
 #include <linux/zlib.h>
 #include <linux/hashtable.h>
 #include <linux/qed/qed_if.h>
+#include "qed_debug.h"
 #include "qed_hsi.h"
 
 extern const struct qed_common_ops qed_common_ops_pass;
-#define DRV_MODULE_VERSION "8.7.1.20"
+#define DRV_MODULE_VERSION "8.10.9.20"
 
 #define MAX_HWFNS_PER_DEVICE    (4)
 #define NAME_SIZE 16
@@ -34,6 +35,9 @@
 
 #define QED_WFQ_UNIT	100
 
+#define QED_WID_SIZE            (1024)
+#define QED_PF_DEMS_SIZE        (4)
+
 /* cau states */
 enum qed_coalescing_mode {
 	QED_COAL_MODE_DISABLE,
@@ -42,11 +46,21 @@
 
 struct qed_eth_cb_ops;
 struct qed_dev_info;
+union qed_mcp_protocol_stats;
+enum qed_mcp_protocol_type;
 
 /* helpers */
 static inline u32 qed_db_addr(u32 cid, u32 DEMS)
 {
 	u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) |
+		      (cid * QED_PF_DEMS_SIZE);
+
+	return db_addr;
+}
+
+static inline u32 qed_db_addr_vf(u32 cid, u32 DEMS)
+{
+	u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) |
 		      FIELD_VALUE(DB_LEGACY_ADDR_ICID, cid);
 
 	return db_addr;
@@ -69,6 +83,7 @@
 struct qed_sb_attn_info;
 struct qed_cxt_mngr;
 struct qed_sb_sp_info;
+struct qed_ll2_info;
 struct qed_mcp_info;
 
 struct qed_rt_data {
@@ -148,13 +163,17 @@
 	QED_RL,
 	QED_MAC,
 	QED_VLAN,
+	QED_RDMA_CNQ_RAM,
 	QED_ILT,
+	QED_LL2_QUEUE,
+	QED_RDMA_STATS_QUEUE,
 	QED_MAX_RESC,
 };
 
 enum QED_FEATURE {
 	QED_PF_L2_QUE,
 	QED_VF,
+	QED_RDMA_CNQ,
 	QED_MAX_FEATURES,
 };
 
@@ -357,6 +376,9 @@
 	struct qed_sb_attn_info		*p_sb_attn;
 
 	/* Protocol related */
+	bool				using_ll2;
+	struct qed_ll2_info		*p_ll2_info;
+	struct qed_rdma_info		*p_rdma_info;
 	struct qed_pf_params		pf_params;
 
 	bool b_rdma_enabled_in_prs;
@@ -393,6 +415,19 @@
 	/* Buffer for unzipping firmware data */
 	void				*unzip_buf;
 
+	struct dbg_tools_data		dbg_info;
+
+	/* PWM region specific data */
+	u32				dpi_size;
+	u32				dpi_count;
+
+	/* This is used to calculate the doorbell address */
+	u32 dpi_start_offset;
+
+	/* If one of the following is set then EDPM shouldn't be used */
+	u8 dcbx_no_edpm;
+	u8 db_bar_no_edpm;
+
 	struct qed_simd_fp_handler	simd_proto_handler[64];
 
 #ifdef CONFIG_QED_SRIOV
@@ -402,6 +437,7 @@
 #endif
 
 	struct z_stream_s		*stream;
+	struct qed_roce_ll2_info	*ll2;
 };
 
 struct pci_params {
@@ -426,6 +462,21 @@
 	bool			fp_initialized;
 	u8			fp_msix_base;
 	u8			fp_msix_cnt;
+	u8			rdma_msix_base;
+	u8			rdma_msix_cnt;
+};
+
+struct qed_dbg_feature {
+	struct dentry *dentry;
+	u8 *dump_buf;
+	u32 buf_size;
+	u32 dumped_dwords;
+};
+
+struct qed_dbg_params {
+	struct qed_dbg_feature features[DBG_FEATURE_NUM];
+	u8 engine_for_debug;
+	bool print_data;
 };
 
 struct qed_dev {
@@ -442,6 +493,8 @@
 				 CHIP_REV_IS_A0(dev))
 #define QED_IS_BB_B0(dev)       (QED_IS_BB(dev) && \
 				 CHIP_REV_IS_B0(dev))
+#define QED_IS_AH(dev)  ((dev)->type == QED_DEV_TYPE_AH)
+#define QED_IS_K2(dev)  QED_IS_AH(dev)
 
 #define QED_GET_TYPE(dev)       (QED_IS_BB_A0(dev) ? CHIP_BB_A0 : \
 				 QED_IS_BB_B0(dev) ? CHIP_BB_B0 : CHIP_K2)
@@ -517,7 +570,6 @@
 
 	bool				b_is_vf;
 	u32				drv_type;
-
 	struct qed_eth_stats		*reset_stats;
 	struct qed_fw_data		*fw_data;
 
@@ -542,7 +594,18 @@
 	} protocol_ops;
 	void				*ops_cookie;
 
+	struct qed_dbg_params		dbg_params;
+
+#ifdef CONFIG_QED_LL2
+	struct qed_cb_ll2_info		*ll2;
+	u8				ll2_mac_address[ETH_ALEN];
+#endif
+
 	const struct firmware		*firmware;
+
+	u32 rdma_max_sge;
+	u32 rdma_max_inline;
+	u32 rdma_max_srq_sge;
 };
 
 #define NUM_OF_VFS(dev)         MAX_NUM_VFS_BB
@@ -561,9 +624,18 @@
 static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
 					u32 concrete_fid)
 {
+	u8 vfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_VFID);
 	u8 pfid = GET_FIELD(concrete_fid, PXP_CONCRETE_FID_PFID);
+	u8 vf_valid = GET_FIELD(concrete_fid,
+				PXP_CONCRETE_FID_VFVALID);
+	u8 sw_fid;
 
-	return pfid;
+	if (vf_valid)
+		sw_fid = vfid + MAX_NUM_PFS;
+	else
+		sw_fid = pfid;
+
+	return sw_fid;
 }
 
 #define PURE_LB_TC 8
@@ -597,7 +669,9 @@
 u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
-
+void qed_get_protocol_stats(struct qed_dev *cdev,
+			    enum qed_mcp_protocol_type type,
+			    union qed_mcp_protocol_stats *stats);
 int qed_slowpath_irq_req(struct qed_hwfn *hwfn);
 
 #endif /* _QED_H */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 1c35f37..82370a1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -48,7 +48,13 @@
 #define TM_ELEM_SIZE    4
 
 /* ILT constants */
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+/* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */
+#define ILT_DEFAULT_HW_P_SIZE		4
+#else
 #define ILT_DEFAULT_HW_P_SIZE		3
+#endif
+
 #define ILT_PAGE_IN_BYTES(hw_p_size)	(1U << ((hw_p_size) + 12))
 #define ILT_CFG_REG(cli, reg)	PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET
 
@@ -377,9 +383,8 @@
 	}
 }
 
-u32 qed_cxt_get_proto_cid_count(struct qed_hwfn		*p_hwfn,
-				enum protocol_type	type,
-				u32			*vf_cid)
+u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn,
+				enum protocol_type type, u32 *vf_cid)
 {
 	if (vf_cid)
 		*vf_cid = p_hwfn->p_cxt_mngr->conn_cfg[type].cids_per_vf;
@@ -405,10 +410,10 @@
 	return cnt;
 }
 
-static void
-qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn,
-			    enum protocol_type proto,
-			    u8 seg, u8 seg_type, u32 count, bool has_fl)
+static void qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn,
+					enum protocol_type proto,
+					u8 seg,
+					u8 seg_type, u32 count, bool has_fl)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	struct qed_tid_seg *p_seg = &p_mngr->conn_cfg[proto].tid_seg[seg];
@@ -420,8 +425,7 @@
 
 static void qed_ilt_cli_blk_fill(struct qed_ilt_client_cfg *p_cli,
 				 struct qed_ilt_cli_blk *p_blk,
-				 u32 start_line, u32 total_size,
-				 u32 elem_size)
+				 u32 start_line, u32 total_size, u32 elem_size)
 {
 	u32 ilt_size = ILT_PAGE_IN_BYTES(p_cli->p_size.val);
 
@@ -448,8 +452,7 @@
 		p_cli->first.val = *p_line;
 
 	p_cli->active = true;
-	*p_line += DIV_ROUND_UP(p_blk->total_size,
-				p_blk->real_size_in_page);
+	*p_line += DIV_ROUND_UP(p_blk->total_size, p_blk->real_size_in_page);
 	p_cli->last.val = *p_line - 1;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_ILT,
@@ -795,10 +798,9 @@
 	p_mngr->t2_num_pages = DIV_ROUND_UP(total_size, psz);
 
 	/* allocate t2 */
-	p_mngr->t2 = kzalloc(p_mngr->t2_num_pages * sizeof(struct qed_dma_mem),
+	p_mngr->t2 = kcalloc(p_mngr->t2_num_pages, sizeof(struct qed_dma_mem),
 			     GFP_KERNEL);
 	if (!p_mngr->t2) {
-		DP_NOTICE(p_hwfn, "Failed to allocate t2 table\n");
 		rc = -ENOMEM;
 		goto t2_fail;
 	}
@@ -926,12 +928,9 @@
 		void *p_virt;
 		u32 size;
 
-		size = min_t(u32, sz_left,
-			     p_blk->real_size_in_page);
+		size = min_t(u32, sz_left, p_blk->real_size_in_page);
 		p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-					    size,
-					    &p_phys,
-					    GFP_KERNEL);
+					    size, &p_phys, GFP_KERNEL);
 		if (!p_virt)
 			return -ENOMEM;
 		memset(p_virt, 0, size);
@@ -963,7 +962,6 @@
 	p_mngr->ilt_shadow = kcalloc(size, sizeof(struct qed_dma_mem),
 				     GFP_KERNEL);
 	if (!p_mngr->ilt_shadow) {
-		DP_NOTICE(p_hwfn, "Failed to allocate ilt shadow table\n");
 		rc = -ENOMEM;
 		goto ilt_shadow_fail;
 	}
@@ -976,7 +974,7 @@
 		for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) {
 			p_blk = &clients[i].pf_blks[j];
 			rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, 0);
-			if (rc != 0)
+			if (rc)
 				goto ilt_shadow_fail;
 		}
 		for (k = 0; k < p_mngr->vf_count; k++) {
@@ -985,7 +983,7 @@
 
 				p_blk = &clients[i].vf_blks[j];
 				rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, lines);
-				if (rc != 0)
+				if (rc)
 					goto ilt_shadow_fail;
 			}
 		}
@@ -1056,10 +1054,8 @@
 	u32 i;
 
 	p_mngr = kzalloc(sizeof(*p_mngr), GFP_KERNEL);
-	if (!p_mngr) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_cxt_mngr'\n");
+	if (!p_mngr)
 		return -ENOMEM;
-	}
 
 	/* Initialize ILT client registers */
 	clients = p_mngr->clients;
@@ -1111,24 +1107,18 @@
 
 	/* Allocate the ILT shadow table */
 	rc = qed_ilt_shadow_alloc(p_hwfn);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to allocate ilt memory\n");
+	if (rc)
 		goto tables_alloc_fail;
-	}
 
 	/* Allocate the T2  table */
 	rc = qed_cxt_src_t2_alloc(p_hwfn);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to allocate T2 memory\n");
+	if (rc)
 		goto tables_alloc_fail;
-	}
 
 	/* Allocate and initialize the acquired cids bitmaps */
 	rc = qed_cid_map_alloc(p_hwfn);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to allocate cid maps\n");
+	if (rc)
 		goto tables_alloc_fail;
-	}
 
 	return 0;
 
@@ -1672,7 +1662,7 @@
 		     p_hwfn->rel_pf_id * NUM_TASK_PF_SEGMENTS + i);
 
 		STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word);
-		active_seg_mask |= (tm_iids.pf_tids[i] ? (1 << i) : 0);
+		active_seg_mask |= (tm_iids.pf_tids[i] ? BIT(i) : 0);
 
 		tm_offset += tm_iids.pf_tids[i];
 	}
@@ -1702,8 +1692,7 @@
 }
 
 int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
-			enum protocol_type type,
-			u32 *p_cid)
+			enum protocol_type type, u32 *p_cid)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	u32 rel_cid;
@@ -1717,8 +1706,7 @@
 				      p_mngr->acquired[type].max_count);
 
 	if (rel_cid >= p_mngr->acquired[type].max_count) {
-		DP_NOTICE(p_hwfn, "no CID available for protocol %d\n",
-			  type);
+		DP_NOTICE(p_hwfn, "no CID available for protocol %d\n", type);
 		return -EINVAL;
 	}
 
@@ -1730,8 +1718,7 @@
 }
 
 static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn,
-				      u32 cid,
-				      enum protocol_type *p_type)
+				      u32 cid, enum protocol_type *p_type)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	struct qed_cid_acquired_map *p_map;
@@ -1763,8 +1750,7 @@
 	return true;
 }
 
-void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
-			 u32 cid)
+void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	enum protocol_type type;
@@ -1781,8 +1767,7 @@
 	__clear_bit(rel_cid, p_mngr->acquired[type].cid_map);
 }
 
-int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
-			 struct qed_cxt_info *p_info)
+int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info)
 {
 	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
 	u32 conn_cxt_size, hw_p_size, cxts_per_p, line;
@@ -1860,6 +1845,8 @@
 	/* Set the number of required CORE connections */
 	u32 core_cids = 1; /* SPQ */
 
+	if (p_hwfn->using_ll2)
+		core_cids += 4;
 	qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0);
 
 	switch (p_hwfn->hw_info.personality) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
index c6f6f2e..2b8bdaa 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -170,6 +170,13 @@
  */
 void qed_cxt_release_cid(struct qed_hwfn *p_hwfn,
 			 u32 cid);
+int qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
+			      enum qed_cxt_elem_type elem_type, u32 iid);
+u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn,
+				enum protocol_type type);
+u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn,
+				enum protocol_type type);
+int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto);
 
 #define QED_CTX_WORKING_MEM 0
 #define QED_CTX_FL_MEM 1
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index d0dc28f..130da1c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -19,6 +19,7 @@
 #include "qed_dcbx.h"
 #include "qed_hsi.h"
 #include "qed_sp.h"
+#include "qed_sriov.h"
 #ifdef CONFIG_DCB
 #include <linux/qed/qed_eth_if.h>
 #endif
@@ -52,40 +53,94 @@
 		  DCBX_APP_SF_ETHTYPE);
 }
 
+static bool qed_dcbx_ieee_app_ethtype(u32 app_info_bitmap)
+{
+	u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+	/* Old MFW */
+	if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+		return qed_dcbx_app_ethtype(app_info_bitmap);
+
+	return !!(mfw_val == DCBX_APP_SF_IEEE_ETHTYPE);
+}
+
 static bool qed_dcbx_app_port(u32 app_info_bitmap)
 {
 	return !!(QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) ==
 		  DCBX_APP_SF_PORT);
 }
 
-static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type)
 {
-	return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-		  proto_id == QED_ETH_TYPE_DEFAULT);
+	u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE);
+
+	/* Old MFW */
+	if (mfw_val == DCBX_APP_SF_IEEE_RESERVED)
+		return qed_dcbx_app_port(app_info_bitmap);
+
+	return !!(mfw_val == type || mfw_val == DCBX_APP_SF_IEEE_TCP_UDP_PORT);
 }
 
-static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_port(app_info_bitmap) &&
-		  proto_id == QED_TCP_PORT_ISCSI);
+	bool ethtype;
+
+	if (ieee)
+		ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+	else
+		ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+	return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT));
 }
 
-static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-		  proto_id == QED_ETH_TYPE_FCOE);
+	bool port;
+
+	if (ieee)
+		port = qed_dcbx_ieee_app_port(app_info_bitmap,
+					      DCBX_APP_SF_IEEE_TCP_PORT);
+	else
+		port = qed_dcbx_app_port(app_info_bitmap);
+
+	return !!(port && (proto_id == QED_TCP_PORT_ISCSI));
 }
 
-static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_ethtype(app_info_bitmap) &&
-		  proto_id == QED_ETH_TYPE_ROCE);
+	bool ethtype;
+
+	if (ieee)
+		ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+	else
+		ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+	return !!(ethtype && (proto_id == QED_ETH_TYPE_FCOE));
 }
 
-static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id)
+static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
 {
-	return !!(qed_dcbx_app_port(app_info_bitmap) &&
-		  proto_id == QED_UDP_PORT_TYPE_ROCE_V2);
+	bool ethtype;
+
+	if (ieee)
+		ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap);
+	else
+		ethtype = qed_dcbx_app_ethtype(app_info_bitmap);
+
+	return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE));
+}
+
+static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee)
+{
+	bool port;
+
+	if (ieee)
+		port = qed_dcbx_ieee_app_port(app_info_bitmap,
+					      DCBX_APP_SF_IEEE_UDP_PORT);
+	else
+		port = qed_dcbx_app_port(app_info_bitmap);
+
+	return !!(port && (proto_id == QED_UDP_PORT_TYPE_ROCE_V2));
 }
 
 static void
@@ -164,17 +219,17 @@
 static bool
 qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn,
 			       u32 app_prio_bitmap,
-			       u16 id, enum dcbx_protocol_type *type)
+			       u16 id, enum dcbx_protocol_type *type, bool ieee)
 {
-	if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id)) {
+	if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_FCOE;
-	} else if (qed_dcbx_roce_tlv(app_prio_bitmap, id)) {
+	} else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ROCE;
-	} else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) {
+	} else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ISCSI;
-	} else if (qed_dcbx_default_tlv(app_prio_bitmap, id)) {
+	} else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ETH;
-	} else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) {
+	} else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id, ieee)) {
 		*type = DCBX_PROTOCOL_ROCE_V2;
 	} else {
 		*type = DCBX_MAX_PROTOCOL_TYPE;
@@ -194,17 +249,18 @@
 qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn,
 		     struct qed_dcbx_results *p_data,
 		     struct dcbx_app_priority_entry *p_tbl,
-		     u32 pri_tc_tbl, int count, bool dcbx_enabled)
+		     u32 pri_tc_tbl, int count, u8 dcbx_version)
 {
 	u8 tc, priority_map;
 	enum dcbx_protocol_type type;
+	bool enable, ieee;
 	u16 protocol_id;
 	int priority;
-	bool enable;
 	int i;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count);
 
+	ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE);
 	/* Parse APP TLV */
 	for (i = 0; i < count; i++) {
 		protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
@@ -219,7 +275,7 @@
 
 		tc = QED_DCBX_PRIO2TC(pri_tc_tbl, priority);
 		if (qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
-						   protocol_id, &type)) {
+						   protocol_id, &type, ieee)) {
 			/* ETH always have the enable bit reset, as it gets
 			 * vlan information per packet. For other protocols,
 			 * should be set according to the dcbx_enabled
@@ -275,15 +331,12 @@
 	struct dcbx_ets_feature *p_ets;
 	struct qed_hw_info *p_info;
 	u32 pri_tc_tbl, flags;
-	bool dcbx_enabled;
+	u8 dcbx_version;
 	int num_entries;
 	int rc = 0;
 
-	/* If DCBx version is non zero, then negotiation was
-	 * successfuly performed
-	 */
 	flags = p_hwfn->p_dcbx_info->operational.flags;
-	dcbx_enabled = !!QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
+	dcbx_version = QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION);
 
 	p_app = &p_hwfn->p_dcbx_info->operational.features.app;
 	p_tbl = p_app->app_pri_tbl;
@@ -295,13 +348,13 @@
 	num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
 
 	rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl,
-				  num_entries, dcbx_enabled);
+				  num_entries, dcbx_version);
 	if (rc)
 		return rc;
 
 	p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS);
 	data.pf_id = p_hwfn->rel_pf_id;
-	data.dcbx_enabled = dcbx_enabled;
+	data.dcbx_enabled = !!dcbx_version;
 
 	qed_dcbx_dp_protocol(p_hwfn, &data);
 
@@ -400,7 +453,7 @@
 qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn,
 		      struct dcbx_app_priority_feature *p_app,
 		      struct dcbx_app_priority_entry *p_tbl,
-		      struct qed_dcbx_params *p_params)
+		      struct qed_dcbx_params *p_params, bool ieee)
 {
 	struct qed_app_entry *entry;
 	u8 pri_map;
@@ -414,15 +467,46 @@
 						      DCBX_APP_NUM_ENTRIES);
 	for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
 		entry = &p_params->app_entry[i];
-		entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
-						     DCBX_APP_SF));
+		if (ieee) {
+			u8 sf_ieee;
+			u32 val;
+
+			sf_ieee = QED_MFW_GET_FIELD(p_tbl[i].entry,
+						    DCBX_APP_SF_IEEE);
+			switch (sf_ieee) {
+			case DCBX_APP_SF_IEEE_RESERVED:
+				/* Old MFW */
+				val = QED_MFW_GET_FIELD(p_tbl[i].entry,
+							DCBX_APP_SF);
+				entry->sf_ieee = val ?
+				    QED_DCBX_SF_IEEE_TCP_UDP_PORT :
+				    QED_DCBX_SF_IEEE_ETHTYPE;
+				break;
+			case DCBX_APP_SF_IEEE_ETHTYPE:
+				entry->sf_ieee = QED_DCBX_SF_IEEE_ETHTYPE;
+				break;
+			case DCBX_APP_SF_IEEE_TCP_PORT:
+				entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_PORT;
+				break;
+			case DCBX_APP_SF_IEEE_UDP_PORT:
+				entry->sf_ieee = QED_DCBX_SF_IEEE_UDP_PORT;
+				break;
+			case DCBX_APP_SF_IEEE_TCP_UDP_PORT:
+				entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_UDP_PORT;
+				break;
+			}
+		} else {
+			entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry,
+							     DCBX_APP_SF));
+		}
+
 		pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP);
 		entry->prio = ffs(pri_map) - 1;
 		entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry,
 						    DCBX_APP_PROTOCOL_ID);
 		qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry,
 					       entry->proto_id,
-					       &entry->proto_type);
+					       &entry->proto_type, ieee);
 	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_DCB,
@@ -483,7 +567,7 @@
 	bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]);
 	tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]);
 	tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]);
-	pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]);
+	pri_map = p_ets->pri_tc_tbl[0];
 	for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) {
 		p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i];
 		p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i];
@@ -500,9 +584,9 @@
 			   struct dcbx_app_priority_feature *p_app,
 			   struct dcbx_app_priority_entry *p_tbl,
 			   struct dcbx_ets_feature *p_ets,
-			   u32 pfc, struct qed_dcbx_params *p_params)
+			   u32 pfc, struct qed_dcbx_params *p_params, bool ieee)
 {
-	qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params);
+	qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params, ieee);
 	qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params);
 	qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params);
 }
@@ -516,7 +600,7 @@
 	p_feat = &p_hwfn->p_dcbx_info->local_admin.features;
 	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
 				   p_feat->app.app_pri_tbl, &p_feat->ets,
-				   p_feat->pfc, &params->local.params);
+				   p_feat->pfc, &params->local.params, false);
 	params->local.valid = true;
 }
 
@@ -529,7 +613,7 @@
 	p_feat = &p_hwfn->p_dcbx_info->remote.features;
 	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
 				   p_feat->app.app_pri_tbl, &p_feat->ets,
-				   p_feat->pfc, &params->remote.params);
+				   p_feat->pfc, &params->remote.params, false);
 	params->remote.valid = true;
 }
 
@@ -574,7 +658,8 @@
 
 	qed_dcbx_get_common_params(p_hwfn, &p_feat->app,
 				   p_feat->app.app_pri_tbl, &p_feat->ets,
-				   p_feat->pfc, &params->operational.params);
+				   p_feat->pfc, &params->operational.params,
+				   p_operational->ieee);
 	qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results);
 	err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR);
 	p_operational->err = err;
@@ -790,11 +875,8 @@
 	int rc = 0;
 
 	p_hwfn->p_dcbx_info = kzalloc(sizeof(*p_hwfn->p_dcbx_info), GFP_KERNEL);
-	if (!p_hwfn->p_dcbx_info) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to allocate 'struct qed_dcbx_info'\n");
+	if (!p_hwfn->p_dcbx_info)
 		rc = -ENOMEM;
-	}
 
 	return rc;
 }
@@ -861,6 +943,9 @@
 	struct qed_ptt *p_ptt;
 	int rc;
 
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
 	p_ptt = qed_ptt_acquire(p_hwfn);
 	if (!p_ptt)
 		return -EBUSY;
@@ -900,6 +985,7 @@
 		if (p_params->pfc.prio[i])
 			pfc_map |= BIT(i);
 
+	*pfc &= ~DCBX_PFC_PRI_EN_BITMAP_MASK;
 	*pfc |= (pfc_map << DCBX_PFC_PRI_EN_BITMAP_SHIFT);
 
 	DP_VERBOSE(p_hwfn, QED_MSG_DCB, "pfc = 0x%x\n", *pfc);
@@ -944,7 +1030,6 @@
 		val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4));
 		p_ets->pri_tc_tbl[0] |= val;
 	}
-	p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]);
 	for (i = 0; i < 2; i++) {
 		p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]);
 		p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]);
@@ -954,7 +1039,7 @@
 static void
 qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn,
 		      struct dcbx_app_priority_feature *p_app,
-		      struct qed_dcbx_params *p_params)
+		      struct qed_dcbx_params *p_params, bool ieee)
 {
 	u32 *entry;
 	int i;
@@ -975,12 +1060,45 @@
 
 	for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) {
 		entry = &p_app->app_pri_tbl[i].entry;
-		*entry &= ~DCBX_APP_SF_MASK;
-		if (p_params->app_entry[i].ethtype)
-			*entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
-				   DCBX_APP_SF_SHIFT);
-		else
-			*entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT);
+		*entry = 0;
+		if (ieee) {
+			*entry &= ~(DCBX_APP_SF_IEEE_MASK | DCBX_APP_SF_MASK);
+			switch (p_params->app_entry[i].sf_ieee) {
+			case QED_DCBX_SF_IEEE_ETHTYPE:
+				*entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE <<
+					   DCBX_APP_SF_IEEE_SHIFT);
+				*entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+					   DCBX_APP_SF_SHIFT);
+				break;
+			case QED_DCBX_SF_IEEE_TCP_PORT:
+				*entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT <<
+					   DCBX_APP_SF_IEEE_SHIFT);
+				*entry |= ((u32)DCBX_APP_SF_PORT <<
+					   DCBX_APP_SF_SHIFT);
+				break;
+			case QED_DCBX_SF_IEEE_UDP_PORT:
+				*entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT <<
+					   DCBX_APP_SF_IEEE_SHIFT);
+				*entry |= ((u32)DCBX_APP_SF_PORT <<
+					   DCBX_APP_SF_SHIFT);
+				break;
+			case QED_DCBX_SF_IEEE_TCP_UDP_PORT:
+				*entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT <<
+					   DCBX_APP_SF_IEEE_SHIFT);
+				*entry |= ((u32)DCBX_APP_SF_PORT <<
+					   DCBX_APP_SF_SHIFT);
+				break;
+			}
+		} else {
+			*entry &= ~DCBX_APP_SF_MASK;
+			if (p_params->app_entry[i].ethtype)
+				*entry |= ((u32)DCBX_APP_SF_ETHTYPE <<
+					   DCBX_APP_SF_SHIFT);
+			else
+				*entry |= ((u32)DCBX_APP_SF_PORT <<
+					   DCBX_APP_SF_SHIFT);
+		}
+
 		*entry &= ~DCBX_APP_PROTOCOL_ID_MASK;
 		*entry |= ((u32)p_params->app_entry[i].proto_id <<
 			   DCBX_APP_PROTOCOL_ID_SHIFT);
@@ -995,15 +1113,19 @@
 			  struct dcbx_local_params *local_admin,
 			  struct qed_dcbx_set *params)
 {
+	bool ieee = false;
+
 	local_admin->flags = 0;
 	memcpy(&local_admin->features,
 	       &p_hwfn->p_dcbx_info->operational.features,
 	       sizeof(local_admin->features));
 
-	if (params->enabled)
+	if (params->enabled) {
 		local_admin->config = params->ver_num;
-	else
+		ieee = !!(params->ver_num & DCBX_CONFIG_VERSION_IEEE);
+	} else {
 		local_admin->config = DCBX_CONFIG_VERSION_DISABLED;
+	}
 
 	if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG)
 		qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc,
@@ -1015,7 +1137,7 @@
 
 	if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG)
 		qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app,
-				      &params->config.params);
+				      &params->config.params, ieee);
 }
 
 int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
@@ -1064,11 +1186,9 @@
 		return 0;
 	}
 
-	dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL);
-	if (!dcbx_info) {
-		DP_ERR(p_hwfn, "Failed to allocate struct qed_dcbx_info\n");
+	dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
+	if (!dcbx_info)
 		return -ENOMEM;
-	}
 
 	rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB);
 	if (rc) {
@@ -1101,11 +1221,9 @@
 {
 	struct qed_dcbx_get *dcbx_info;
 
-	dcbx_info = kmalloc(sizeof(*dcbx_info), GFP_KERNEL);
-	if (!dcbx_info) {
-		DP_ERR(hwfn->cdev, "Failed to allocate memory for dcbx_info\n");
+	dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL);
+	if (!dcbx_info)
 		return NULL;
-	}
 
 	if (qed_dcbx_query_params(hwfn, dcbx_info, type)) {
 		kfree(dcbx_info);
@@ -1596,8 +1714,10 @@
 		if ((entry->ethtype == ethtype) && (entry->proto_id == idval))
 			break;
 		/* First empty slot */
-		if (!entry->proto_id)
+		if (!entry->proto_id) {
+			dcbx_set.config.params.num_app_entries++;
 			break;
+		}
 	}
 
 	if (i == QED_DCBX_MAX_APP_PROTOCOL) {
@@ -1855,6 +1975,7 @@
 
 	if (!dcbx_info->operational.ieee) {
 		DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n");
+		kfree(dcbx_info);
 		return -EINVAL;
 	}
 
@@ -2023,17 +2144,19 @@
 	return rc;
 }
 
-int qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets)
+static int
+qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets)
 {
 	return qed_dcbnl_get_ieee_ets(cdev, ets, true);
 }
 
-int qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
+static int
+qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc)
 {
 	return qed_dcbnl_get_ieee_pfc(cdev, pfc, true);
 }
 
-int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app)
+static int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_dcbx_get *dcbx_info;
@@ -2077,7 +2200,7 @@
 	return 0;
 }
 
-int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
+static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_dcbx_get *dcbx_info;
@@ -2117,8 +2240,10 @@
 		    (entry->proto_id == app->protocol))
 			break;
 		/* First empty slot */
-		if (!entry->proto_id)
+		if (!entry->proto_id) {
+			dcbx_set.config.params.num_app_entries++;
 			break;
+		}
 	}
 
 	if (i == QED_DCBX_MAX_APP_PROTOCOL) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
new file mode 100644
index 0000000..88e7d5b
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -0,0 +1,6898 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/crc32.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+
+/* Chip IDs enum */
+enum chip_ids {
+	CHIP_RESERVED,
+	CHIP_BB_B0,
+	CHIP_K2,
+	MAX_CHIP_IDS
+};
+
+/* Memory groups enum */
+enum mem_groups {
+	MEM_GROUP_PXP_MEM,
+	MEM_GROUP_DMAE_MEM,
+	MEM_GROUP_CM_MEM,
+	MEM_GROUP_QM_MEM,
+	MEM_GROUP_TM_MEM,
+	MEM_GROUP_BRB_RAM,
+	MEM_GROUP_BRB_MEM,
+	MEM_GROUP_PRS_MEM,
+	MEM_GROUP_SDM_MEM,
+	MEM_GROUP_PBUF,
+	MEM_GROUP_IOR,
+	MEM_GROUP_RAM,
+	MEM_GROUP_BTB_RAM,
+	MEM_GROUP_RDIF_CTX,
+	MEM_GROUP_TDIF_CTX,
+	MEM_GROUP_CONN_CFC_MEM,
+	MEM_GROUP_TASK_CFC_MEM,
+	MEM_GROUP_CAU_PI,
+	MEM_GROUP_CAU_MEM,
+	MEM_GROUP_PXP_ILT,
+	MEM_GROUP_MULD_MEM,
+	MEM_GROUP_BTB_MEM,
+	MEM_GROUP_IGU_MEM,
+	MEM_GROUP_IGU_MSIX,
+	MEM_GROUP_CAU_SB,
+	MEM_GROUP_BMB_RAM,
+	MEM_GROUP_BMB_MEM,
+	MEM_GROUPS_NUM
+};
+
+/* Memory groups names */
+static const char * const s_mem_group_names[] = {
+	"PXP_MEM",
+	"DMAE_MEM",
+	"CM_MEM",
+	"QM_MEM",
+	"TM_MEM",
+	"BRB_RAM",
+	"BRB_MEM",
+	"PRS_MEM",
+	"SDM_MEM",
+	"PBUF",
+	"IOR",
+	"RAM",
+	"BTB_RAM",
+	"RDIF_CTX",
+	"TDIF_CTX",
+	"CONN_CFC_MEM",
+	"TASK_CFC_MEM",
+	"CAU_PI",
+	"CAU_MEM",
+	"PXP_ILT",
+	"MULD_MEM",
+	"BTB_MEM",
+	"IGU_MEM",
+	"IGU_MSIX",
+	"CAU_SB",
+	"BMB_RAM",
+	"BMB_MEM",
+};
+
+/* Idle check conditions */
+static u32 cond4(const u32 *r, const u32 *imm)
+{
+	return ((r[0] & imm[0]) != imm[1]) && ((r[1] & imm[2]) != imm[3]);
+}
+
+static u32 cond6(const u32 *r, const u32 *imm)
+{
+	return ((r[0] >> imm[0]) & imm[1]) != imm[2];
+}
+
+static u32 cond5(const u32 *r, const u32 *imm)
+{
+	return (r[0] & imm[0]) != imm[1];
+}
+
+static u32 cond8(const u32 *r, const u32 *imm)
+{
+	return ((r[0] & imm[0]) >> imm[1]) !=
+	    (((r[0] & imm[2]) >> imm[3]) | ((r[1] & imm[4]) << imm[5]));
+}
+
+static u32 cond9(const u32 *r, const u32 *imm)
+{
+	return ((r[0] & imm[0]) >> imm[1]) != (r[0] & imm[2]);
+}
+
+static u32 cond1(const u32 *r, const u32 *imm)
+{
+	return (r[0] & ~imm[0]) != imm[1];
+}
+
+static u32 cond0(const u32 *r, const u32 *imm)
+{
+	return r[0] != imm[0];
+}
+
+static u32 cond10(const u32 *r, const u32 *imm)
+{
+	return r[0] != r[1] && r[2] == imm[0];
+}
+
+static u32 cond11(const u32 *r, const u32 *imm)
+{
+	return r[0] != r[1] && r[2] > imm[0];
+}
+
+static u32 cond3(const u32 *r, const u32 *imm)
+{
+	return r[0] != r[1];
+}
+
+static u32 cond12(const u32 *r, const u32 *imm)
+{
+	return r[0] & imm[0];
+}
+
+static u32 cond7(const u32 *r, const u32 *imm)
+{
+	return r[0] < (r[1] - imm[0]);
+}
+
+static u32 cond2(const u32 *r, const u32 *imm)
+{
+	return r[0] > imm[0];
+}
+
+/* Array of Idle Check conditions */
+static u32(*cond_arr[]) (const u32 *r, const u32 *imm) = {
+	cond0,
+	cond1,
+	cond2,
+	cond3,
+	cond4,
+	cond5,
+	cond6,
+	cond7,
+	cond8,
+	cond9,
+	cond10,
+	cond11,
+	cond12,
+};
+
+/******************************* Data Types **********************************/
+
+enum platform_ids {
+	PLATFORM_ASIC,
+	PLATFORM_RESERVED,
+	PLATFORM_RESERVED2,
+	PLATFORM_RESERVED3,
+	MAX_PLATFORM_IDS
+};
+
+struct dbg_array {
+	const u32 *ptr;
+	u32 size_in_dwords;
+};
+
+/* Chip constant definitions */
+struct chip_defs {
+	const char *name;
+	struct {
+		u8 num_ports;
+		u8 num_pfs;
+	} per_platform[MAX_PLATFORM_IDS];
+};
+
+/* Platform constant definitions */
+struct platform_defs {
+	const char *name;
+	u32 delay_factor;
+};
+
+/* Storm constant definitions */
+struct storm_defs {
+	char letter;
+	enum block_id block_id;
+	enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS];
+	bool has_vfc;
+	u32 sem_fast_mem_addr;
+	u32 sem_frame_mode_addr;
+	u32 sem_slow_enable_addr;
+	u32 sem_slow_mode_addr;
+	u32 sem_slow_mode1_conf_addr;
+	u32 sem_sync_dbg_empty_addr;
+	u32 sem_slow_dbg_empty_addr;
+	u32 cm_ctx_wr_addr;
+	u32 cm_conn_ag_ctx_lid_size; /* In quad-regs */
+	u32 cm_conn_ag_ctx_rd_addr;
+	u32 cm_conn_st_ctx_lid_size; /* In quad-regs */
+	u32 cm_conn_st_ctx_rd_addr;
+	u32 cm_task_ag_ctx_lid_size; /* In quad-regs */
+	u32 cm_task_ag_ctx_rd_addr;
+	u32 cm_task_st_ctx_lid_size; /* In quad-regs */
+	u32 cm_task_st_ctx_rd_addr;
+};
+
+/* Block constant definitions */
+struct block_defs {
+	const char *name;
+	bool has_dbg_bus[MAX_CHIP_IDS];
+	bool associated_to_storm;
+	u32 storm_id; /* Valid only if associated_to_storm is true */
+	enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS];
+	u32 dbg_select_addr;
+	u32 dbg_cycle_enable_addr;
+	u32 dbg_shift_addr;
+	u32 dbg_force_valid_addr;
+	u32 dbg_force_frame_addr;
+	bool has_reset_bit;
+	bool unreset; /* If true, the block is taken out of reset before dump */
+	enum dbg_reset_regs reset_reg;
+	u8 reset_bit_offset; /* Bit offset in reset register */
+};
+
+/* Reset register definitions */
+struct reset_reg_defs {
+	u32 addr;
+	u32 unreset_val;
+	bool exists[MAX_CHIP_IDS];
+};
+
+struct grc_param_defs {
+	u32 default_val[MAX_CHIP_IDS];
+	u32 min;
+	u32 max;
+	bool is_preset;
+	u32 exclude_all_preset_val;
+	u32 crash_preset_val;
+};
+
+struct rss_mem_defs {
+	const char *mem_name;
+	const char *type_name;
+	u32 addr; /* In 128b units */
+	u32 num_entries[MAX_CHIP_IDS];
+	u32 entry_width[MAX_CHIP_IDS]; /* In bits */
+};
+
+struct vfc_ram_defs {
+	const char *mem_name;
+	const char *type_name;
+	u32 base_row;
+	u32 num_rows;
+};
+
+struct big_ram_defs {
+	const char *instance_name;
+	enum mem_groups mem_group_id;
+	enum mem_groups ram_mem_group_id;
+	enum dbg_grc_params grc_param;
+	u32 addr_reg_addr;
+	u32 data_reg_addr;
+	u32 num_of_blocks[MAX_CHIP_IDS];
+};
+
+struct phy_defs {
+	const char *phy_name;
+	u32 base_addr;
+	u32 tbus_addr_lo_addr;
+	u32 tbus_addr_hi_addr;
+	u32 tbus_data_lo_addr;
+	u32 tbus_data_hi_addr;
+};
+
+/******************************** Constants **********************************/
+
+#define MAX_LCIDS			320
+#define MAX_LTIDS			320
+#define NUM_IOR_SETS			2
+#define IORS_PER_SET			176
+#define IOR_SET_OFFSET(set_id)		((set_id) * 256)
+#define BYTES_IN_DWORD			sizeof(u32)
+
+/* In the macros below, size and offset are specified in bits */
+#define CEIL_DWORDS(size)		DIV_ROUND_UP(size, 32)
+#define FIELD_BIT_OFFSET(type, field)	type ## _ ## field ## _ ## OFFSET
+#define FIELD_BIT_SIZE(type, field)	type ## _ ## field ## _ ## SIZE
+#define FIELD_DWORD_OFFSET(type, field) \
+	 (int)(FIELD_BIT_OFFSET(type, field) / 32)
+#define FIELD_DWORD_SHIFT(type, field)	(FIELD_BIT_OFFSET(type, field) % 32)
+#define FIELD_BIT_MASK(type, field) \
+	(((1 << FIELD_BIT_SIZE(type, field)) - 1) << \
+	 FIELD_DWORD_SHIFT(type, field))
+#define SET_VAR_FIELD(var, type, field, val) \
+	do { \
+		var[FIELD_DWORD_OFFSET(type, field)] &=	\
+		(~FIELD_BIT_MASK(type, field));	\
+		var[FIELD_DWORD_OFFSET(type, field)] |= \
+		(val) << FIELD_DWORD_SHIFT(type, field); \
+	} while (0)
+#define ARR_REG_WR(dev, ptt, addr, arr, arr_size) \
+	do { \
+		for (i = 0; i < (arr_size); i++) \
+			qed_wr(dev, ptt, addr,	(arr)[i]); \
+	} while (0)
+#define ARR_REG_RD(dev, ptt, addr, arr, arr_size) \
+	do { \
+		for (i = 0; i < (arr_size); i++) \
+			(arr)[i] = qed_rd(dev, ptt, addr); \
+	} while (0)
+
+#define DWORDS_TO_BYTES(dwords)		((dwords) * BYTES_IN_DWORD)
+#define BYTES_TO_DWORDS(bytes)		((bytes) / BYTES_IN_DWORD)
+#define RAM_LINES_TO_DWORDS(lines)	((lines) * 2)
+#define RAM_LINES_TO_BYTES(lines) \
+	DWORDS_TO_BYTES(RAM_LINES_TO_DWORDS(lines))
+#define REG_DUMP_LEN_SHIFT		24
+#define MEM_DUMP_ENTRY_SIZE_DWORDS \
+	BYTES_TO_DWORDS(sizeof(struct dbg_dump_mem))
+#define IDLE_CHK_RULE_SIZE_DWORDS \
+	BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_rule))
+#define IDLE_CHK_RESULT_HDR_DWORDS \
+	BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_hdr))
+#define IDLE_CHK_RESULT_REG_HDR_DWORDS \
+	BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_reg_hdr))
+#define IDLE_CHK_MAX_ENTRIES_SIZE	32
+
+/* The sizes and offsets below are specified in bits */
+#define VFC_CAM_CMD_STRUCT_SIZE		64
+#define VFC_CAM_CMD_ROW_OFFSET		48
+#define VFC_CAM_CMD_ROW_SIZE		9
+#define VFC_CAM_ADDR_STRUCT_SIZE	16
+#define VFC_CAM_ADDR_OP_OFFSET		0
+#define VFC_CAM_ADDR_OP_SIZE		4
+#define VFC_CAM_RESP_STRUCT_SIZE	256
+#define VFC_RAM_ADDR_STRUCT_SIZE	16
+#define VFC_RAM_ADDR_OP_OFFSET		0
+#define VFC_RAM_ADDR_OP_SIZE		2
+#define VFC_RAM_ADDR_ROW_OFFSET		2
+#define VFC_RAM_ADDR_ROW_SIZE		10
+#define VFC_RAM_RESP_STRUCT_SIZE	256
+#define VFC_CAM_CMD_DWORDS		CEIL_DWORDS(VFC_CAM_CMD_STRUCT_SIZE)
+#define VFC_CAM_ADDR_DWORDS		CEIL_DWORDS(VFC_CAM_ADDR_STRUCT_SIZE)
+#define VFC_CAM_RESP_DWORDS		CEIL_DWORDS(VFC_CAM_RESP_STRUCT_SIZE)
+#define VFC_RAM_CMD_DWORDS		VFC_CAM_CMD_DWORDS
+#define VFC_RAM_ADDR_DWORDS		CEIL_DWORDS(VFC_RAM_ADDR_STRUCT_SIZE)
+#define VFC_RAM_RESP_DWORDS		CEIL_DWORDS(VFC_RAM_RESP_STRUCT_SIZE)
+#define NUM_VFC_RAM_TYPES		4
+#define VFC_CAM_NUM_ROWS		512
+#define VFC_OPCODE_CAM_RD		14
+#define VFC_OPCODE_RAM_RD		0
+#define NUM_RSS_MEM_TYPES		5
+#define NUM_BIG_RAM_TYPES		3
+#define BIG_RAM_BLOCK_SIZE_BYTES	128
+#define BIG_RAM_BLOCK_SIZE_DWORDS \
+	BYTES_TO_DWORDS(BIG_RAM_BLOCK_SIZE_BYTES)
+#define NUM_PHY_TBUS_ADDRESSES		2048
+#define PHY_DUMP_SIZE_DWORDS		(NUM_PHY_TBUS_ADDRESSES / 2)
+#define RESET_REG_UNRESET_OFFSET	4
+#define STALL_DELAY_MS			500
+#define STATIC_DEBUG_LINE_DWORDS	9
+#define NUM_DBG_BUS_LINES		256
+#define NUM_COMMON_GLOBAL_PARAMS	8
+#define FW_IMG_MAIN			1
+#define REG_FIFO_DEPTH_ELEMENTS		32
+#define REG_FIFO_ELEMENT_DWORDS		2
+#define REG_FIFO_DEPTH_DWORDS \
+	(REG_FIFO_ELEMENT_DWORDS * REG_FIFO_DEPTH_ELEMENTS)
+#define IGU_FIFO_DEPTH_ELEMENTS		64
+#define IGU_FIFO_ELEMENT_DWORDS		4
+#define IGU_FIFO_DEPTH_DWORDS \
+	(IGU_FIFO_ELEMENT_DWORDS * IGU_FIFO_DEPTH_ELEMENTS)
+#define PROTECTION_OVERRIDE_DEPTH_ELEMENTS	20
+#define PROTECTION_OVERRIDE_ELEMENT_DWORDS	2
+#define PROTECTION_OVERRIDE_DEPTH_DWORDS \
+	(PROTECTION_OVERRIDE_DEPTH_ELEMENTS * \
+	 PROTECTION_OVERRIDE_ELEMENT_DWORDS)
+#define MCP_SPAD_TRACE_OFFSIZE_ADDR \
+	(MCP_REG_SCRATCH + \
+	 offsetof(struct static_init, sections[SPAD_SECTION_TRACE]))
+#define MCP_TRACE_META_IMAGE_SIGNATURE  0x669955aa
+#define EMPTY_FW_VERSION_STR		"???_???_???_???"
+#define EMPTY_FW_IMAGE_STR		"???????????????"
+
+/***************************** Constant Arrays *******************************/
+
+/* Debug arrays */
+static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} };
+
+/* Chip constant definitions array */
+static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = {
+	{ "reserved", { {0, 0}, {0, 0}, {0, 0}, {0, 0} } },
+	{ "bb_b0",
+	  { {MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB}, {0, 0}, {0, 0}, {0, 0} } },
+	{ "k2", { {MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2}, {0, 0}, {0, 0}, {0, 0} } }
+};
+
+/* Storm constant definitions array */
+static struct storm_defs s_storm_defs[] = {
+	/* Tstorm */
+	{'T', BLOCK_TSEM,
+	 {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT,
+	  DBG_BUS_CLIENT_RBCT}, true,
+	 TSEM_REG_FAST_MEMORY,
+	 TSEM_REG_DBG_FRAME_MODE, TSEM_REG_SLOW_DBG_ACTIVE,
+	 TSEM_REG_SLOW_DBG_MODE, TSEM_REG_DBG_MODE1_CFG,
+	 TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY,
+	 TCM_REG_CTX_RBC_ACCS,
+	 4, TCM_REG_AGG_CON_CTX,
+	 16, TCM_REG_SM_CON_CTX,
+	 2, TCM_REG_AGG_TASK_CTX,
+	 4, TCM_REG_SM_TASK_CTX},
+	/* Mstorm */
+	{'M', BLOCK_MSEM,
+	 {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT,
+	  DBG_BUS_CLIENT_RBCM}, false,
+	 MSEM_REG_FAST_MEMORY,
+	 MSEM_REG_DBG_FRAME_MODE, MSEM_REG_SLOW_DBG_ACTIVE,
+	 MSEM_REG_SLOW_DBG_MODE, MSEM_REG_DBG_MODE1_CFG,
+	 MSEM_REG_SYNC_DBG_EMPTY, MSEM_REG_SLOW_DBG_EMPTY,
+	 MCM_REG_CTX_RBC_ACCS,
+	 1, MCM_REG_AGG_CON_CTX,
+	 10, MCM_REG_SM_CON_CTX,
+	 2, MCM_REG_AGG_TASK_CTX,
+	 7, MCM_REG_SM_TASK_CTX},
+	/* Ustorm */
+	{'U', BLOCK_USEM,
+	 {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU,
+	  DBG_BUS_CLIENT_RBCU}, false,
+	 USEM_REG_FAST_MEMORY,
+	 USEM_REG_DBG_FRAME_MODE, USEM_REG_SLOW_DBG_ACTIVE,
+	 USEM_REG_SLOW_DBG_MODE, USEM_REG_DBG_MODE1_CFG,
+	 USEM_REG_SYNC_DBG_EMPTY, USEM_REG_SLOW_DBG_EMPTY,
+	 UCM_REG_CTX_RBC_ACCS,
+	 2, UCM_REG_AGG_CON_CTX,
+	 13, UCM_REG_SM_CON_CTX,
+	 3, UCM_REG_AGG_TASK_CTX,
+	 3, UCM_REG_SM_TASK_CTX},
+	/* Xstorm */
+	{'X', BLOCK_XSEM,
+	 {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX,
+	  DBG_BUS_CLIENT_RBCX}, false,
+	 XSEM_REG_FAST_MEMORY,
+	 XSEM_REG_DBG_FRAME_MODE, XSEM_REG_SLOW_DBG_ACTIVE,
+	 XSEM_REG_SLOW_DBG_MODE, XSEM_REG_DBG_MODE1_CFG,
+	 XSEM_REG_SYNC_DBG_EMPTY, XSEM_REG_SLOW_DBG_EMPTY,
+	 XCM_REG_CTX_RBC_ACCS,
+	 9, XCM_REG_AGG_CON_CTX,
+	 15, XCM_REG_SM_CON_CTX,
+	 0, 0,
+	 0, 0},
+	/* Ystorm */
+	{'Y', BLOCK_YSEM,
+	 {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX,
+	  DBG_BUS_CLIENT_RBCY}, false,
+	 YSEM_REG_FAST_MEMORY,
+	 YSEM_REG_DBG_FRAME_MODE, YSEM_REG_SLOW_DBG_ACTIVE,
+	 YSEM_REG_SLOW_DBG_MODE, YSEM_REG_DBG_MODE1_CFG,
+	 YSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY,
+	 YCM_REG_CTX_RBC_ACCS,
+	 2, YCM_REG_AGG_CON_CTX,
+	 3, YCM_REG_SM_CON_CTX,
+	 2, YCM_REG_AGG_TASK_CTX,
+	 12, YCM_REG_SM_TASK_CTX},
+	/* Pstorm */
+	{'P', BLOCK_PSEM,
+	 {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS,
+	  DBG_BUS_CLIENT_RBCS}, true,
+	 PSEM_REG_FAST_MEMORY,
+	 PSEM_REG_DBG_FRAME_MODE, PSEM_REG_SLOW_DBG_ACTIVE,
+	 PSEM_REG_SLOW_DBG_MODE, PSEM_REG_DBG_MODE1_CFG,
+	 PSEM_REG_SYNC_DBG_EMPTY, PSEM_REG_SLOW_DBG_EMPTY,
+	 PCM_REG_CTX_RBC_ACCS,
+	 0, 0,
+	 10, PCM_REG_SM_CON_CTX,
+	 0, 0,
+	 0, 0}
+};
+
+/* Block definitions array */
+static struct block_defs block_grc_defs = {
+	"grc", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
+	GRC_REG_DBG_SELECT, GRC_REG_DBG_DWORD_ENABLE,
+	GRC_REG_DBG_SHIFT, GRC_REG_DBG_FORCE_VALID,
+	GRC_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISC_PL_UA, 1
+};
+
+static struct block_defs block_miscs_defs = {
+	"miscs", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_misc_defs = {
+	"misc", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_dbu_defs = {
+	"dbu", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_pglue_b_defs = {
+	"pglue_b", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH},
+	PGLUE_B_REG_DBG_SELECT, PGLUE_B_REG_DBG_DWORD_ENABLE,
+	PGLUE_B_REG_DBG_SHIFT, PGLUE_B_REG_DBG_FORCE_VALID,
+	PGLUE_B_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 1
+};
+
+static struct block_defs block_cnig_defs = {
+	"cnig", {false, false, true}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
+	CNIG_REG_DBG_SELECT_K2, CNIG_REG_DBG_DWORD_ENABLE_K2,
+	CNIG_REG_DBG_SHIFT_K2, CNIG_REG_DBG_FORCE_VALID_K2,
+	CNIG_REG_DBG_FORCE_FRAME_K2,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 0
+};
+
+static struct block_defs block_cpmu_defs = {
+	"cpmu", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 8
+};
+
+static struct block_defs block_ncsi_defs = {
+	"ncsi", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
+	NCSI_REG_DBG_SELECT, NCSI_REG_DBG_DWORD_ENABLE,
+	NCSI_REG_DBG_SHIFT, NCSI_REG_DBG_FORCE_VALID,
+	NCSI_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 5
+};
+
+static struct block_defs block_opte_defs = {
+	"opte", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 4
+};
+
+static struct block_defs block_bmb_defs = {
+	"bmb", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCB},
+	BMB_REG_DBG_SELECT, BMB_REG_DBG_DWORD_ENABLE,
+	BMB_REG_DBG_SHIFT, BMB_REG_DBG_FORCE_VALID,
+	BMB_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISCS_PL_UA, 7
+};
+
+static struct block_defs block_pcie_defs = {
+	"pcie", {false, false, true}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+	PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE,
+	PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID,
+	PCIE_REG_DBG_COMMON_FORCE_FRAME,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_mcp_defs = {
+	"mcp", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_mcp2_defs = {
+	"mcp2", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ},
+	MCP2_REG_DBG_SELECT, MCP2_REG_DBG_DWORD_ENABLE,
+	MCP2_REG_DBG_SHIFT, MCP2_REG_DBG_FORCE_VALID,
+	MCP2_REG_DBG_FORCE_FRAME,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_pswhst_defs = {
+	"pswhst", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	PSWHST_REG_DBG_SELECT, PSWHST_REG_DBG_DWORD_ENABLE,
+	PSWHST_REG_DBG_SHIFT, PSWHST_REG_DBG_FORCE_VALID,
+	PSWHST_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISC_PL_HV, 0
+};
+
+static struct block_defs block_pswhst2_defs = {
+	"pswhst2", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	PSWHST2_REG_DBG_SELECT, PSWHST2_REG_DBG_DWORD_ENABLE,
+	PSWHST2_REG_DBG_SHIFT, PSWHST2_REG_DBG_FORCE_VALID,
+	PSWHST2_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISC_PL_HV, 0
+};
+
+static struct block_defs block_pswrd_defs = {
+	"pswrd", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	PSWRD_REG_DBG_SELECT, PSWRD_REG_DBG_DWORD_ENABLE,
+	PSWRD_REG_DBG_SHIFT, PSWRD_REG_DBG_FORCE_VALID,
+	PSWRD_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISC_PL_HV, 2
+};
+
+static struct block_defs block_pswrd2_defs = {
+	"pswrd2", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	PSWRD2_REG_DBG_SELECT, PSWRD2_REG_DBG_DWORD_ENABLE,
+	PSWRD2_REG_DBG_SHIFT, PSWRD2_REG_DBG_FORCE_VALID,
+	PSWRD2_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISC_PL_HV, 2
+};
+
+static struct block_defs block_pswwr_defs = {
+	"pswwr", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	PSWWR_REG_DBG_SELECT, PSWWR_REG_DBG_DWORD_ENABLE,
+	PSWWR_REG_DBG_SHIFT, PSWWR_REG_DBG_FORCE_VALID,
+	PSWWR_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISC_PL_HV, 3
+};
+
+static struct block_defs block_pswwr2_defs = {
+	"pswwr2", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	true, false, DBG_RESET_REG_MISC_PL_HV, 3
+};
+
+static struct block_defs block_pswrq_defs = {
+	"pswrq", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	PSWRQ_REG_DBG_SELECT, PSWRQ_REG_DBG_DWORD_ENABLE,
+	PSWRQ_REG_DBG_SHIFT, PSWRQ_REG_DBG_FORCE_VALID,
+	PSWRQ_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISC_PL_HV, 1
+};
+
+static struct block_defs block_pswrq2_defs = {
+	"pswrq2", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	PSWRQ2_REG_DBG_SELECT, PSWRQ2_REG_DBG_DWORD_ENABLE,
+	PSWRQ2_REG_DBG_SHIFT, PSWRQ2_REG_DBG_FORCE_VALID,
+	PSWRQ2_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISC_PL_HV, 1
+};
+
+static struct block_defs block_pglcs_defs = {
+	"pglcs", {false, false, true}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+	PGLCS_REG_DBG_SELECT, PGLCS_REG_DBG_DWORD_ENABLE,
+	PGLCS_REG_DBG_SHIFT, PGLCS_REG_DBG_FORCE_VALID,
+	PGLCS_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 2
+};
+
+static struct block_defs block_ptu_defs = {
+	"ptu", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	PTU_REG_DBG_SELECT, PTU_REG_DBG_DWORD_ENABLE,
+	PTU_REG_DBG_SHIFT, PTU_REG_DBG_FORCE_VALID,
+	PTU_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 20
+};
+
+static struct block_defs block_dmae_defs = {
+	"dmae", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	DMAE_REG_DBG_SELECT, DMAE_REG_DBG_DWORD_ENABLE,
+	DMAE_REG_DBG_SHIFT, DMAE_REG_DBG_FORCE_VALID,
+	DMAE_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 28
+};
+
+static struct block_defs block_tcm_defs = {
+	"tcm", {true, true, true}, true, DBG_TSTORM_ID,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+	TCM_REG_DBG_SELECT, TCM_REG_DBG_DWORD_ENABLE,
+	TCM_REG_DBG_SHIFT, TCM_REG_DBG_FORCE_VALID,
+	TCM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 5
+};
+
+static struct block_defs block_mcm_defs = {
+	"mcm", {true, true, true}, true, DBG_MSTORM_ID,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+	MCM_REG_DBG_SELECT, MCM_REG_DBG_DWORD_ENABLE,
+	MCM_REG_DBG_SHIFT, MCM_REG_DBG_FORCE_VALID,
+	MCM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 3
+};
+
+static struct block_defs block_ucm_defs = {
+	"ucm", {true, true, true}, true, DBG_USTORM_ID,
+	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+	UCM_REG_DBG_SELECT, UCM_REG_DBG_DWORD_ENABLE,
+	UCM_REG_DBG_SHIFT, UCM_REG_DBG_FORCE_VALID,
+	UCM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 8
+};
+
+static struct block_defs block_xcm_defs = {
+	"xcm", {true, true, true}, true, DBG_XSTORM_ID,
+	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+	XCM_REG_DBG_SELECT, XCM_REG_DBG_DWORD_ENABLE,
+	XCM_REG_DBG_SHIFT, XCM_REG_DBG_FORCE_VALID,
+	XCM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 19
+};
+
+static struct block_defs block_ycm_defs = {
+	"ycm", {true, true, true}, true, DBG_YSTORM_ID,
+	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+	YCM_REG_DBG_SELECT, YCM_REG_DBG_DWORD_ENABLE,
+	YCM_REG_DBG_SHIFT, YCM_REG_DBG_FORCE_VALID,
+	YCM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 5
+};
+
+static struct block_defs block_pcm_defs = {
+	"pcm", {true, true, true}, true, DBG_PSTORM_ID,
+	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+	PCM_REG_DBG_SELECT, PCM_REG_DBG_DWORD_ENABLE,
+	PCM_REG_DBG_SHIFT, PCM_REG_DBG_FORCE_VALID,
+	PCM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 4
+};
+
+static struct block_defs block_qm_defs = {
+	"qm", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCQ},
+	QM_REG_DBG_SELECT, QM_REG_DBG_DWORD_ENABLE,
+	QM_REG_DBG_SHIFT, QM_REG_DBG_FORCE_VALID,
+	QM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 16
+};
+
+static struct block_defs block_tm_defs = {
+	"tm", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+	TM_REG_DBG_SELECT, TM_REG_DBG_DWORD_ENABLE,
+	TM_REG_DBG_SHIFT, TM_REG_DBG_FORCE_VALID,
+	TM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 17
+};
+
+static struct block_defs block_dorq_defs = {
+	"dorq", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+	DORQ_REG_DBG_SELECT, DORQ_REG_DBG_DWORD_ENABLE,
+	DORQ_REG_DBG_SHIFT, DORQ_REG_DBG_FORCE_VALID,
+	DORQ_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 18
+};
+
+static struct block_defs block_brb_defs = {
+	"brb", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
+	BRB_REG_DBG_SELECT, BRB_REG_DBG_DWORD_ENABLE,
+	BRB_REG_DBG_SHIFT, BRB_REG_DBG_FORCE_VALID,
+	BRB_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 0
+};
+
+static struct block_defs block_src_defs = {
+	"src", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+	SRC_REG_DBG_SELECT, SRC_REG_DBG_DWORD_ENABLE,
+	SRC_REG_DBG_SHIFT, SRC_REG_DBG_FORCE_VALID,
+	SRC_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 2
+};
+
+static struct block_defs block_prs_defs = {
+	"prs", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR},
+	PRS_REG_DBG_SELECT, PRS_REG_DBG_DWORD_ENABLE,
+	PRS_REG_DBG_SHIFT, PRS_REG_DBG_FORCE_VALID,
+	PRS_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 1
+};
+
+static struct block_defs block_tsdm_defs = {
+	"tsdm", {true, true, true}, true, DBG_TSTORM_ID,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+	TSDM_REG_DBG_SELECT, TSDM_REG_DBG_DWORD_ENABLE,
+	TSDM_REG_DBG_SHIFT, TSDM_REG_DBG_FORCE_VALID,
+	TSDM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 3
+};
+
+static struct block_defs block_msdm_defs = {
+	"msdm", {true, true, true}, true, DBG_MSTORM_ID,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+	MSDM_REG_DBG_SELECT, MSDM_REG_DBG_DWORD_ENABLE,
+	MSDM_REG_DBG_SHIFT, MSDM_REG_DBG_FORCE_VALID,
+	MSDM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 6
+};
+
+static struct block_defs block_usdm_defs = {
+	"usdm", {true, true, true}, true, DBG_USTORM_ID,
+	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+	USDM_REG_DBG_SELECT, USDM_REG_DBG_DWORD_ENABLE,
+	USDM_REG_DBG_SHIFT, USDM_REG_DBG_FORCE_VALID,
+	USDM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 7
+};
+
+static struct block_defs block_xsdm_defs = {
+	"xsdm", {true, true, true}, true, DBG_XSTORM_ID,
+	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+	XSDM_REG_DBG_SELECT, XSDM_REG_DBG_DWORD_ENABLE,
+	XSDM_REG_DBG_SHIFT, XSDM_REG_DBG_FORCE_VALID,
+	XSDM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 20
+};
+
+static struct block_defs block_ysdm_defs = {
+	"ysdm", {true, true, true}, true, DBG_YSTORM_ID,
+	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+	YSDM_REG_DBG_SELECT, YSDM_REG_DBG_DWORD_ENABLE,
+	YSDM_REG_DBG_SHIFT, YSDM_REG_DBG_FORCE_VALID,
+	YSDM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 8
+};
+
+static struct block_defs block_psdm_defs = {
+	"psdm", {true, true, true}, true, DBG_PSTORM_ID,
+	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+	PSDM_REG_DBG_SELECT, PSDM_REG_DBG_DWORD_ENABLE,
+	PSDM_REG_DBG_SHIFT, PSDM_REG_DBG_FORCE_VALID,
+	PSDM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 7
+};
+
+static struct block_defs block_tsem_defs = {
+	"tsem", {true, true, true}, true, DBG_TSTORM_ID,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+	TSEM_REG_DBG_SELECT, TSEM_REG_DBG_DWORD_ENABLE,
+	TSEM_REG_DBG_SHIFT, TSEM_REG_DBG_FORCE_VALID,
+	TSEM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 4
+};
+
+static struct block_defs block_msem_defs = {
+	"msem", {true, true, true}, true, DBG_MSTORM_ID,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+	MSEM_REG_DBG_SELECT, MSEM_REG_DBG_DWORD_ENABLE,
+	MSEM_REG_DBG_SHIFT, MSEM_REG_DBG_FORCE_VALID,
+	MSEM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 9
+};
+
+static struct block_defs block_usem_defs = {
+	"usem", {true, true, true}, true, DBG_USTORM_ID,
+	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+	USEM_REG_DBG_SELECT, USEM_REG_DBG_DWORD_ENABLE,
+	USEM_REG_DBG_SHIFT, USEM_REG_DBG_FORCE_VALID,
+	USEM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 9
+};
+
+static struct block_defs block_xsem_defs = {
+	"xsem", {true, true, true}, true, DBG_XSTORM_ID,
+	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+	XSEM_REG_DBG_SELECT, XSEM_REG_DBG_DWORD_ENABLE,
+	XSEM_REG_DBG_SHIFT, XSEM_REG_DBG_FORCE_VALID,
+	XSEM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 21
+};
+
+static struct block_defs block_ysem_defs = {
+	"ysem", {true, true, true}, true, DBG_YSTORM_ID,
+	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY},
+	YSEM_REG_DBG_SELECT, YSEM_REG_DBG_DWORD_ENABLE,
+	YSEM_REG_DBG_SHIFT, YSEM_REG_DBG_FORCE_VALID,
+	YSEM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 11
+};
+
+static struct block_defs block_psem_defs = {
+	"psem", {true, true, true}, true, DBG_PSTORM_ID,
+	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+	PSEM_REG_DBG_SELECT, PSEM_REG_DBG_DWORD_ENABLE,
+	PSEM_REG_DBG_SHIFT, PSEM_REG_DBG_FORCE_VALID,
+	PSEM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 10
+};
+
+static struct block_defs block_rss_defs = {
+	"rss", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT},
+	RSS_REG_DBG_SELECT, RSS_REG_DBG_DWORD_ENABLE,
+	RSS_REG_DBG_SHIFT, RSS_REG_DBG_FORCE_VALID,
+	RSS_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 18
+};
+
+static struct block_defs block_tmld_defs = {
+	"tmld", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+	TMLD_REG_DBG_SELECT, TMLD_REG_DBG_DWORD_ENABLE,
+	TMLD_REG_DBG_SHIFT, TMLD_REG_DBG_FORCE_VALID,
+	TMLD_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 13
+};
+
+static struct block_defs block_muld_defs = {
+	"muld", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+	MULD_REG_DBG_SELECT, MULD_REG_DBG_DWORD_ENABLE,
+	MULD_REG_DBG_SHIFT, MULD_REG_DBG_FORCE_VALID,
+	MULD_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 14
+};
+
+static struct block_defs block_yuld_defs = {
+	"yuld", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU},
+	YULD_REG_DBG_SELECT, YULD_REG_DBG_DWORD_ENABLE,
+	YULD_REG_DBG_SHIFT, YULD_REG_DBG_FORCE_VALID,
+	YULD_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 15
+};
+
+static struct block_defs block_xyld_defs = {
+	"xyld", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX},
+	XYLD_REG_DBG_SELECT, XYLD_REG_DBG_DWORD_ENABLE,
+	XYLD_REG_DBG_SHIFT, XYLD_REG_DBG_FORCE_VALID,
+	XYLD_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 12
+};
+
+static struct block_defs block_prm_defs = {
+	"prm", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+	PRM_REG_DBG_SELECT, PRM_REG_DBG_DWORD_ENABLE,
+	PRM_REG_DBG_SHIFT, PRM_REG_DBG_FORCE_VALID,
+	PRM_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 21
+};
+
+static struct block_defs block_pbf_pb1_defs = {
+	"pbf_pb1", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+	PBF_PB1_REG_DBG_SELECT, PBF_PB1_REG_DBG_DWORD_ENABLE,
+	PBF_PB1_REG_DBG_SHIFT, PBF_PB1_REG_DBG_FORCE_VALID,
+	PBF_PB1_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
+	11
+};
+
+static struct block_defs block_pbf_pb2_defs = {
+	"pbf_pb2", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+	PBF_PB2_REG_DBG_SELECT, PBF_PB2_REG_DBG_DWORD_ENABLE,
+	PBF_PB2_REG_DBG_SHIFT, PBF_PB2_REG_DBG_FORCE_VALID,
+	PBF_PB2_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
+	12
+};
+
+static struct block_defs block_rpb_defs = {
+	"rpb", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+	RPB_REG_DBG_SELECT, RPB_REG_DBG_DWORD_ENABLE,
+	RPB_REG_DBG_SHIFT, RPB_REG_DBG_FORCE_VALID,
+	RPB_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 13
+};
+
+static struct block_defs block_btb_defs = {
+	"btb", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCV},
+	BTB_REG_DBG_SELECT, BTB_REG_DBG_DWORD_ENABLE,
+	BTB_REG_DBG_SHIFT, BTB_REG_DBG_FORCE_VALID,
+	BTB_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 10
+};
+
+static struct block_defs block_pbf_defs = {
+	"pbf", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV},
+	PBF_REG_DBG_SELECT, PBF_REG_DBG_DWORD_ENABLE,
+	PBF_REG_DBG_SHIFT, PBF_REG_DBG_FORCE_VALID,
+	PBF_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 15
+};
+
+static struct block_defs block_rdif_defs = {
+	"rdif", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM},
+	RDIF_REG_DBG_SELECT, RDIF_REG_DBG_DWORD_ENABLE,
+	RDIF_REG_DBG_SHIFT, RDIF_REG_DBG_FORCE_VALID,
+	RDIF_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 16
+};
+
+static struct block_defs block_tdif_defs = {
+	"tdif", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS},
+	TDIF_REG_DBG_SELECT, TDIF_REG_DBG_DWORD_ENABLE,
+	TDIF_REG_DBG_SHIFT, TDIF_REG_DBG_FORCE_VALID,
+	TDIF_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 17
+};
+
+static struct block_defs block_cdu_defs = {
+	"cdu", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+	CDU_REG_DBG_SELECT, CDU_REG_DBG_DWORD_ENABLE,
+	CDU_REG_DBG_SHIFT, CDU_REG_DBG_FORCE_VALID,
+	CDU_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 23
+};
+
+static struct block_defs block_ccfc_defs = {
+	"ccfc", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+	CCFC_REG_DBG_SELECT, CCFC_REG_DBG_DWORD_ENABLE,
+	CCFC_REG_DBG_SHIFT, CCFC_REG_DBG_FORCE_VALID,
+	CCFC_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 24
+};
+
+static struct block_defs block_tcfc_defs = {
+	"tcfc", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF},
+	TCFC_REG_DBG_SELECT, TCFC_REG_DBG_DWORD_ENABLE,
+	TCFC_REG_DBG_SHIFT, TCFC_REG_DBG_FORCE_VALID,
+	TCFC_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 25
+};
+
+static struct block_defs block_igu_defs = {
+	"igu", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	IGU_REG_DBG_SELECT, IGU_REG_DBG_DWORD_ENABLE,
+	IGU_REG_DBG_SHIFT, IGU_REG_DBG_FORCE_VALID,
+	IGU_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 27
+};
+
+static struct block_defs block_cau_defs = {
+	"cau", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP},
+	CAU_REG_DBG_SELECT, CAU_REG_DBG_DWORD_ENABLE,
+	CAU_REG_DBG_SHIFT, CAU_REG_DBG_FORCE_VALID,
+	CAU_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 19
+};
+
+static struct block_defs block_umac_defs = {
+	"umac", {false, false, true}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
+	UMAC_REG_DBG_SELECT, UMAC_REG_DBG_DWORD_ENABLE,
+	UMAC_REG_DBG_SHIFT, UMAC_REG_DBG_FORCE_VALID,
+	UMAC_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 6
+};
+
+static struct block_defs block_xmac_defs = {
+	"xmac", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_dbg_defs = {
+	"dbg", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 3
+};
+
+static struct block_defs block_nig_defs = {
+	"nig", {true, true, true}, false, 0,
+	{DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN},
+	NIG_REG_DBG_SELECT, NIG_REG_DBG_DWORD_ENABLE,
+	NIG_REG_DBG_SHIFT, NIG_REG_DBG_FORCE_VALID,
+	NIG_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 0
+};
+
+static struct block_defs block_wol_defs = {
+	"wol", {false, false, true}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ},
+	WOL_REG_DBG_SELECT, WOL_REG_DBG_DWORD_ENABLE,
+	WOL_REG_DBG_SHIFT, WOL_REG_DBG_FORCE_VALID,
+	WOL_REG_DBG_FORCE_FRAME,
+	true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 7
+};
+
+static struct block_defs block_bmbn_defs = {
+	"bmbn", {false, false, true}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB},
+	BMBN_REG_DBG_SELECT, BMBN_REG_DBG_DWORD_ENABLE,
+	BMBN_REG_DBG_SHIFT, BMBN_REG_DBG_FORCE_VALID,
+	BMBN_REG_DBG_FORCE_FRAME,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_ipc_defs = {
+	"ipc", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	true, false, DBG_RESET_REG_MISCS_PL_UA, 8
+};
+
+static struct block_defs block_nwm_defs = {
+	"nwm", {false, false, true}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW},
+	NWM_REG_DBG_SELECT, NWM_REG_DBG_DWORD_ENABLE,
+	NWM_REG_DBG_SHIFT, NWM_REG_DBG_FORCE_VALID,
+	NWM_REG_DBG_FORCE_FRAME,
+	true, false, DBG_RESET_REG_MISCS_PL_HV_2, 0
+};
+
+static struct block_defs block_nws_defs = {
+	"nws", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 12
+};
+
+static struct block_defs block_ms_defs = {
+	"ms", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	true, false, DBG_RESET_REG_MISCS_PL_HV, 13
+};
+
+static struct block_defs block_phy_pcie_defs = {
+	"phy_pcie", {false, false, true}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH},
+	PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE,
+	PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID,
+	PCIE_REG_DBG_COMMON_FORCE_FRAME,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_led_defs = {
+	"led", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	true, true, DBG_RESET_REG_MISCS_PL_HV, 14
+};
+
+static struct block_defs block_misc_aeu_defs = {
+	"misc_aeu", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs block_bar0_map_defs = {
+	"bar0_map", {false, false, false}, false, 0,
+	{MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS},
+	0, 0, 0, 0, 0,
+	false, false, MAX_DBG_RESET_REGS, 0
+};
+
+static struct block_defs *s_block_defs[MAX_BLOCK_ID] = {
+	&block_grc_defs,
+	&block_miscs_defs,
+	&block_misc_defs,
+	&block_dbu_defs,
+	&block_pglue_b_defs,
+	&block_cnig_defs,
+	&block_cpmu_defs,
+	&block_ncsi_defs,
+	&block_opte_defs,
+	&block_bmb_defs,
+	&block_pcie_defs,
+	&block_mcp_defs,
+	&block_mcp2_defs,
+	&block_pswhst_defs,
+	&block_pswhst2_defs,
+	&block_pswrd_defs,
+	&block_pswrd2_defs,
+	&block_pswwr_defs,
+	&block_pswwr2_defs,
+	&block_pswrq_defs,
+	&block_pswrq2_defs,
+	&block_pglcs_defs,
+	&block_dmae_defs,
+	&block_ptu_defs,
+	&block_tcm_defs,
+	&block_mcm_defs,
+	&block_ucm_defs,
+	&block_xcm_defs,
+	&block_ycm_defs,
+	&block_pcm_defs,
+	&block_qm_defs,
+	&block_tm_defs,
+	&block_dorq_defs,
+	&block_brb_defs,
+	&block_src_defs,
+	&block_prs_defs,
+	&block_tsdm_defs,
+	&block_msdm_defs,
+	&block_usdm_defs,
+	&block_xsdm_defs,
+	&block_ysdm_defs,
+	&block_psdm_defs,
+	&block_tsem_defs,
+	&block_msem_defs,
+	&block_usem_defs,
+	&block_xsem_defs,
+	&block_ysem_defs,
+	&block_psem_defs,
+	&block_rss_defs,
+	&block_tmld_defs,
+	&block_muld_defs,
+	&block_yuld_defs,
+	&block_xyld_defs,
+	&block_prm_defs,
+	&block_pbf_pb1_defs,
+	&block_pbf_pb2_defs,
+	&block_rpb_defs,
+	&block_btb_defs,
+	&block_pbf_defs,
+	&block_rdif_defs,
+	&block_tdif_defs,
+	&block_cdu_defs,
+	&block_ccfc_defs,
+	&block_tcfc_defs,
+	&block_igu_defs,
+	&block_cau_defs,
+	&block_umac_defs,
+	&block_xmac_defs,
+	&block_dbg_defs,
+	&block_nig_defs,
+	&block_wol_defs,
+	&block_bmbn_defs,
+	&block_ipc_defs,
+	&block_nwm_defs,
+	&block_nws_defs,
+	&block_ms_defs,
+	&block_phy_pcie_defs,
+	&block_led_defs,
+	&block_misc_aeu_defs,
+	&block_bar0_map_defs,
+};
+
+static struct platform_defs s_platform_defs[] = {
+	{"asic", 1},
+	{"reserved", 0},
+	{"reserved2", 0},
+	{"reserved3", 0}
+};
+
+static struct grc_param_defs s_grc_param_defs[] = {
+	{{1, 1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_TSTORM */
+	{{1, 1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_MSTORM */
+	{{1, 1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_USTORM */
+	{{1, 1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_XSTORM */
+	{{1, 1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_YSTORM */
+	{{1, 1, 1}, 0, 1, false, 1, 1},	/* DBG_GRC_PARAM_DUMP_PSTORM */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_REGS */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_RAM */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_PBUF */
+	{{0, 0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_IOR */
+	{{0, 0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_VFC */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_CM_CTX */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_ILT */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_RSS */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_CAU */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_QM */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_MCP */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_RESERVED */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_CFC */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_IGU */
+	{{0, 0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_BRB */
+	{{0, 0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_BTB */
+	{{0, 0, 0}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_BMB */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_NIG */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_MULD */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_PRS */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_DMAE */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_TM */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_SDM */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_DIF */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_STATIC */
+	{{0, 0, 0}, 0, 1, false, 0, 0},	/* DBG_GRC_PARAM_UNSTALL */
+	{{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS,
+	 MAX_LCIDS},			/* DBG_GRC_PARAM_NUM_LCIDS */
+	{{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS,
+	 MAX_LTIDS},			/* DBG_GRC_PARAM_NUM_LTIDS */
+	{{0, 0, 0}, 0, 1, true, 0, 0},	/* DBG_GRC_PARAM_EXCLUDE_ALL */
+	{{0, 0, 0}, 0, 1, true, 0, 0},	/* DBG_GRC_PARAM_CRASH */
+	{{0, 0, 0}, 0, 1, false, 1, 0},	/* DBG_GRC_PARAM_PARITY_SAFE */
+	{{1, 1, 1}, 0, 1, false, 0, 1},	/* DBG_GRC_PARAM_DUMP_CM */
+	{{1, 1, 1}, 0, 1, false, 0, 1}	/* DBG_GRC_PARAM_DUMP_PHY */
+};
+
+static struct rss_mem_defs s_rss_mem_defs[] = {
+	{ "rss_mem_cid", "rss_cid", 0,
+	  {256, 256, 320},
+	  {32, 32, 32} },
+	{ "rss_mem_key_msb", "rss_key", 1024,
+	  {128, 128, 208},
+	  {256, 256, 256} },
+	{ "rss_mem_key_lsb", "rss_key", 2048,
+	  {128, 128, 208},
+	  {64, 64, 64} },
+	{ "rss_mem_info", "rss_info", 3072,
+	  {128, 128, 208},
+	  {16, 16, 16} },
+	{ "rss_mem_ind", "rss_ind", 4096,
+	  {(128 * 128), (128 * 128), (128 * 208)},
+	  {16, 16, 16} }
+};
+
+static struct vfc_ram_defs s_vfc_ram_defs[] = {
+	{"vfc_ram_tt1", "vfc_ram", 0, 512},
+	{"vfc_ram_mtt2", "vfc_ram", 512, 128},
+	{"vfc_ram_stt2", "vfc_ram", 640, 32},
+	{"vfc_ram_ro_vect", "vfc_ram", 672, 32}
+};
+
+static struct big_ram_defs s_big_ram_defs[] = {
+	{ "BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB,
+	  BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA,
+	  {4800, 4800, 5632} },
+	{ "BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB,
+	  BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA,
+	  {2880, 2880, 3680} },
+	{ "BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB,
+	  BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA,
+	  {1152, 1152, 1152} }
+};
+
+static struct reset_reg_defs s_reset_regs_defs[] = {
+	{ MISCS_REG_RESET_PL_UA, 0x0,
+	  {true, true, true} },		/* DBG_RESET_REG_MISCS_PL_UA */
+	{ MISCS_REG_RESET_PL_HV, 0x0,
+	  {true, true, true} },		/* DBG_RESET_REG_MISCS_PL_HV */
+	{ MISCS_REG_RESET_PL_HV_2, 0x0,
+	  {false, false, true} },	/* DBG_RESET_REG_MISCS_PL_HV_2 */
+	{ MISC_REG_RESET_PL_UA, 0x0,
+	  {true, true, true} },		/* DBG_RESET_REG_MISC_PL_UA */
+	{ MISC_REG_RESET_PL_HV, 0x0,
+	  {true, true, true} },		/* DBG_RESET_REG_MISC_PL_HV */
+	{ MISC_REG_RESET_PL_PDA_VMAIN_1, 0x4404040,
+	  {true, true, true} },		/* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */
+	{ MISC_REG_RESET_PL_PDA_VMAIN_2, 0x7c00007,
+	  {true, true, true} },		/* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */
+	{ MISC_REG_RESET_PL_PDA_VAUX, 0x2,
+	  {true, true, true} },		/* DBG_RESET_REG_MISC_PL_PDA_VAUX */
+};
+
+static struct phy_defs s_phy_defs[] = {
+	{"nw_phy", NWS_REG_NWS_CMU, PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0,
+	 PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8,
+	 PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0,
+	 PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8},
+	{"sgmii_phy", MS_REG_MS_CMU, PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132,
+	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133,
+	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130,
+	 PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131},
+	{"pcie_phy0", PHY_PCIE_REG_PHY0, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131},
+	{"pcie_phy1", PHY_PCIE_REG_PHY1, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130,
+	 PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131},
+};
+
+/**************************** Private Functions ******************************/
+
+/* Reads and returns a single dword from the specified unaligned buffer */
+static u32 qed_read_unaligned_dword(u8 *buf)
+{
+	u32 dword;
+
+	memcpy((u8 *)&dword, buf, sizeof(dword));
+	return dword;
+}
+
+/* Initializes debug data for the specified device */
+static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+	if (dev_data->initialized)
+		return DBG_STATUS_OK;
+
+	if (QED_IS_K2(p_hwfn->cdev)) {
+		dev_data->chip_id = CHIP_K2;
+		dev_data->mode_enable[MODE_K2] = 1;
+	} else if (QED_IS_BB_B0(p_hwfn->cdev)) {
+		dev_data->chip_id = CHIP_BB_B0;
+		dev_data->mode_enable[MODE_BB_B0] = 1;
+	} else {
+		return DBG_STATUS_UNKNOWN_CHIP;
+	}
+
+	dev_data->platform_id = PLATFORM_ASIC;
+	dev_data->mode_enable[MODE_ASIC] = 1;
+	dev_data->initialized = true;
+	return DBG_STATUS_OK;
+}
+
+/* Reads the FW info structure for the specified Storm from the chip,
+ * and writes it to the specified fw_info pointer.
+ */
+static void qed_read_fw_info(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt,
+			     u8 storm_id, struct fw_info *fw_info)
+{
+	/* Read first the address that points to fw_info location.
+	 * The address is located in the last line of the Storm RAM.
+	 */
+	u32 addr = s_storm_defs[storm_id].sem_fast_mem_addr +
+		   SEM_FAST_REG_INT_RAM +
+		   DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE) -
+		   sizeof(struct fw_info_location);
+	struct fw_info_location fw_info_location;
+	u32 *dest = (u32 *)&fw_info_location;
+	u32 i;
+
+	memset(&fw_info_location, 0, sizeof(fw_info_location));
+	memset(fw_info, 0, sizeof(*fw_info));
+	for (i = 0; i < BYTES_TO_DWORDS(sizeof(fw_info_location));
+	     i++, addr += BYTES_IN_DWORD)
+		dest[i] = qed_rd(p_hwfn, p_ptt, addr);
+	if (fw_info_location.size > 0 && fw_info_location.size <=
+	    sizeof(*fw_info)) {
+		/* Read FW version info from Storm RAM */
+		addr = fw_info_location.grc_addr;
+		dest = (u32 *)fw_info;
+		for (i = 0; i < BYTES_TO_DWORDS(fw_info_location.size);
+		     i++, addr += BYTES_IN_DWORD)
+			dest[i] = qed_rd(p_hwfn, p_ptt, addr);
+	}
+}
+
+/* Dumps the specified string to the specified buffer. Returns the dumped size
+ * in bytes (actual length + 1 for the null character termination).
+ */
+static u32 qed_dump_str(char *dump_buf, bool dump, const char *str)
+{
+	if (dump)
+		strcpy(dump_buf, str);
+	return (u32)strlen(str) + 1;
+}
+
+/* Dumps zeros to align the specified buffer to dwords. Returns the dumped size
+ * in bytes.
+ */
+static u32 qed_dump_align(char *dump_buf, bool dump, u32 byte_offset)
+{
+	u8 offset_in_dword = (u8)(byte_offset & 0x3), align_size;
+
+	align_size = offset_in_dword ? BYTES_IN_DWORD - offset_in_dword : 0;
+
+	if (dump && align_size)
+		memset(dump_buf, 0, align_size);
+	return align_size;
+}
+
+/* Writes the specified string param to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_str_param(u32 *dump_buf,
+			      bool dump,
+			      const char *param_name, const char *param_val)
+{
+	char *char_buf = (char *)dump_buf;
+	u32 offset = 0;
+
+	/* Dump param name */
+	offset += qed_dump_str(char_buf + offset, dump, param_name);
+
+	/* Indicate a string param value */
+	if (dump)
+		*(char_buf + offset) = 1;
+	offset++;
+
+	/* Dump param value */
+	offset += qed_dump_str(char_buf + offset, dump, param_val);
+
+	/* Align buffer to next dword */
+	offset += qed_dump_align(char_buf + offset, dump, offset);
+	return BYTES_TO_DWORDS(offset);
+}
+
+/* Writes the specified numeric param to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_num_param(u32 *dump_buf,
+			      bool dump, const char *param_name, u32 param_val)
+{
+	char *char_buf = (char *)dump_buf;
+	u32 offset = 0;
+
+	/* Dump param name */
+	offset += qed_dump_str(char_buf + offset, dump, param_name);
+
+	/* Indicate a numeric param value */
+	if (dump)
+		*(char_buf + offset) = 0;
+	offset++;
+
+	/* Align buffer to next dword */
+	offset += qed_dump_align(char_buf + offset, dump, offset);
+
+	/* Dump param value (and change offset from bytes to dwords) */
+	offset = BYTES_TO_DWORDS(offset);
+	if (dump)
+		*(dump_buf + offset) = param_val;
+	offset++;
+	return offset;
+}
+
+/* Reads the FW version and writes it as a param to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *dump_buf, bool dump)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	char fw_ver_str[16] = EMPTY_FW_VERSION_STR;
+	char fw_img_str[16] = EMPTY_FW_IMAGE_STR;
+	struct fw_info fw_info = { {0}, {0} };
+	int printed_chars;
+	u32 offset = 0;
+
+	if (dump) {
+		/* Read FW image/version from PRAM in a non-reset SEMI */
+		bool found = false;
+		u8 storm_id;
+
+		for (storm_id = 0; storm_id < MAX_DBG_STORMS && !found;
+		     storm_id++) {
+			/* Read FW version/image  */
+			if (!dev_data->block_in_reset
+			    [s_storm_defs[storm_id].block_id]) {
+				/* read FW info for the current Storm */
+				qed_read_fw_info(p_hwfn,
+						 p_ptt, storm_id, &fw_info);
+
+				/* Create FW version/image strings */
+				printed_chars =
+				    snprintf(fw_ver_str,
+					     sizeof(fw_ver_str),
+					     "%d_%d_%d_%d",
+					     fw_info.ver.num.major,
+					     fw_info.ver.num.minor,
+					     fw_info.ver.num.rev,
+					     fw_info.ver.num.eng);
+				if (printed_chars < 0 || printed_chars >=
+				    sizeof(fw_ver_str))
+					DP_NOTICE(p_hwfn,
+						  "Unexpected debug error: invalid FW version string\n");
+				switch (fw_info.ver.image_id) {
+				case FW_IMG_MAIN:
+					strcpy(fw_img_str, "main");
+					break;
+				default:
+					strcpy(fw_img_str, "unknown");
+					break;
+				}
+
+				found = true;
+			}
+		}
+	}
+
+	/* Dump FW version, image and timestamp */
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "fw-version", fw_ver_str);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "fw-image", fw_img_str);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "fw-timestamp", fw_info.ver.timestamp);
+	return offset;
+}
+
+/* Reads the MFW version and writes it as a param to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  u32 *dump_buf, bool dump)
+{
+	char mfw_ver_str[16] = EMPTY_FW_VERSION_STR;
+
+	if (dump) {
+		u32 global_section_offsize, global_section_addr, mfw_ver;
+		u32 public_data_addr, global_section_offsize_addr;
+		int printed_chars;
+
+		/* Find MCP public data GRC address.
+		 * Needs to be ORed with MCP_REG_SCRATCH due to a HW bug.
+		 */
+		public_data_addr = qed_rd(p_hwfn, p_ptt,
+					  MISC_REG_SHARED_MEM_ADDR) |
+					  MCP_REG_SCRATCH;
+
+		/* Find MCP public global section offset */
+		global_section_offsize_addr = public_data_addr +
+					      offsetof(struct mcp_public_data,
+						       sections) +
+					      sizeof(offsize_t) * PUBLIC_GLOBAL;
+		global_section_offsize = qed_rd(p_hwfn, p_ptt,
+						global_section_offsize_addr);
+		global_section_addr = MCP_REG_SCRATCH +
+				      (global_section_offsize &
+				       OFFSIZE_OFFSET_MASK) * 4;
+
+		/* Read MFW version from MCP public global section */
+		mfw_ver = qed_rd(p_hwfn, p_ptt,
+				 global_section_addr +
+				 offsetof(struct public_global, mfw_ver));
+
+		/* Dump MFW version param */
+		printed_chars = snprintf(mfw_ver_str, sizeof(mfw_ver_str),
+					 "%d_%d_%d_%d",
+					 (u8) (mfw_ver >> 24),
+					 (u8) (mfw_ver >> 16),
+					 (u8) (mfw_ver >> 8),
+					 (u8) mfw_ver);
+		if (printed_chars < 0 || printed_chars >= sizeof(mfw_ver_str))
+			DP_NOTICE(p_hwfn,
+				  "Unexpected debug error: invalid MFW version string\n");
+	}
+
+	return qed_dump_str_param(dump_buf, dump, "mfw-version", mfw_ver_str);
+}
+
+/* Writes a section header to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_section_hdr(u32 *dump_buf,
+				bool dump, const char *name, u32 num_params)
+{
+	return qed_dump_num_param(dump_buf, dump, name, num_params);
+}
+
+/* Writes the common global params to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn,
+					 struct qed_ptt *p_ptt,
+					 u32 *dump_buf,
+					 bool dump,
+					 u8 num_specific_global_params)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 offset = 0;
+
+	/* Find platform string and dump global params section header */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump,
+				       "global_params",
+				       NUM_COMMON_GLOBAL_PARAMS +
+				       num_specific_global_params);
+
+	/* Store params */
+	offset += qed_dump_fw_ver_param(p_hwfn, p_ptt, dump_buf + offset, dump);
+	offset += qed_dump_mfw_ver_param(p_hwfn,
+					 p_ptt, dump_buf + offset, dump);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump, "tools-version", TOOLS_VERSION);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump,
+				     "chip",
+				     s_chip_defs[dev_data->chip_id].name);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump,
+				     "platform",
+				     s_platform_defs[dev_data->platform_id].
+				     name);
+	offset +=
+	    qed_dump_num_param(dump_buf + offset, dump, "pci-func",
+			       p_hwfn->abs_pf_id);
+	return offset;
+}
+
+/* Writes the last section to the specified buffer at the given offset.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_dump_last_section(u32 *dump_buf, u32 offset, bool dump)
+{
+	u32 start_offset = offset, crc = ~0;
+
+	/* Dump CRC section header */
+	offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0);
+
+	/* Calculate CRC32 and add it to the dword following the "last" section.
+	 */
+	if (dump)
+		*(dump_buf + offset) = ~crc32(crc, (u8 *)dump_buf,
+					      DWORDS_TO_BYTES(offset));
+	offset++;
+	return offset - start_offset;
+}
+
+/* Update blocks reset state  */
+static void qed_update_blocks_reset_state(struct qed_hwfn *p_hwfn,
+					  struct qed_ptt *p_ptt)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 reg_val[MAX_DBG_RESET_REGS] = { 0 };
+	u32 i;
+
+	/* Read reset registers */
+	for (i = 0; i < MAX_DBG_RESET_REGS; i++)
+		if (s_reset_regs_defs[i].exists[dev_data->chip_id])
+			reg_val[i] = qed_rd(p_hwfn,
+					    p_ptt, s_reset_regs_defs[i].addr);
+
+	/* Check if blocks are in reset */
+	for (i = 0; i < MAX_BLOCK_ID; i++)
+		dev_data->block_in_reset[i] =
+		    s_block_defs[i]->has_reset_bit &&
+		    !(reg_val[s_block_defs[i]->reset_reg] &
+		      BIT(s_block_defs[i]->reset_bit_offset));
+}
+
+/* Enable / disable the Debug block */
+static void qed_bus_enable_dbg_block(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt, bool enable)
+{
+	qed_wr(p_hwfn, p_ptt, DBG_REG_DBG_BLOCK_ON, enable ? 1 : 0);
+}
+
+/* Resets the Debug block */
+static void qed_bus_reset_dbg_block(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt)
+{
+	u32 dbg_reset_reg_addr, old_reset_reg_val, new_reset_reg_val;
+
+	dbg_reset_reg_addr =
+		s_reset_regs_defs[s_block_defs[BLOCK_DBG]->reset_reg].addr;
+	old_reset_reg_val = qed_rd(p_hwfn, p_ptt, dbg_reset_reg_addr);
+	new_reset_reg_val = old_reset_reg_val &
+			    ~BIT(s_block_defs[BLOCK_DBG]->reset_bit_offset);
+
+	qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, new_reset_reg_val);
+	qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, old_reset_reg_val);
+}
+
+static void qed_bus_set_framing_mode(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     enum dbg_bus_frame_modes mode)
+{
+	qed_wr(p_hwfn, p_ptt, DBG_REG_FRAMING_MODE, (u8)mode);
+}
+
+/* Enable / disable Debug Bus clients according to the specified mask.
+ * (1 = enable, 0 = disable)
+ */
+static void qed_bus_enable_clients(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt, u32 client_mask)
+{
+	qed_wr(p_hwfn, p_ptt, DBG_REG_CLIENT_ENABLE, client_mask);
+}
+
+static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset)
+{
+	const u32 *ptr = s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr;
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u8 tree_val = ((u8 *)ptr)[(*modes_buf_offset)++];
+	bool arg1, arg2;
+
+	switch (tree_val) {
+	case INIT_MODE_OP_NOT:
+		return !qed_is_mode_match(p_hwfn, modes_buf_offset);
+	case INIT_MODE_OP_OR:
+	case INIT_MODE_OP_AND:
+		arg1 = qed_is_mode_match(p_hwfn, modes_buf_offset);
+		arg2 = qed_is_mode_match(p_hwfn, modes_buf_offset);
+		return (tree_val == INIT_MODE_OP_OR) ? (arg1 ||
+							arg2) : (arg1 && arg2);
+	default:
+		return dev_data->mode_enable[tree_val - MAX_INIT_MODE_OPS] > 0;
+	}
+}
+
+/* Returns the value of the specified GRC param */
+static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn,
+			     enum dbg_grc_params grc_param)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+	return dev_data->grc.param_val[grc_param];
+}
+
+/* Clear all GRC params */
+static void qed_dbg_grc_clear_params(struct qed_hwfn *p_hwfn)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 i;
+
+	for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+		dev_data->grc.param_set_by_user[i] = 0;
+}
+
+/* Assign default GRC param values */
+static void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 i;
+
+	for (i = 0; i < MAX_DBG_GRC_PARAMS; i++)
+		if (!dev_data->grc.param_set_by_user[i])
+			dev_data->grc.param_val[i] =
+			    s_grc_param_defs[i].default_val[dev_data->chip_id];
+}
+
+/* Returns true if the specified entity (indicated by GRC param) should be
+ * included in the dump, false otherwise.
+ */
+static bool qed_grc_is_included(struct qed_hwfn *p_hwfn,
+				enum dbg_grc_params grc_param)
+{
+	return qed_grc_get_param(p_hwfn, grc_param) > 0;
+}
+
+/* Returns true of the specified Storm should be included in the dump, false
+ * otherwise.
+ */
+static bool qed_grc_is_storm_included(struct qed_hwfn *p_hwfn,
+				      enum dbg_storms storm)
+{
+	return qed_grc_get_param(p_hwfn, (enum dbg_grc_params)storm) > 0;
+}
+
+/* Returns true if the specified memory should be included in the dump, false
+ * otherwise.
+ */
+static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn,
+				    enum block_id block_id, u8 mem_group_id)
+{
+	u8 i;
+
+	/* Check Storm match */
+	if (s_block_defs[block_id]->associated_to_storm &&
+	    !qed_grc_is_storm_included(p_hwfn,
+			(enum dbg_storms)s_block_defs[block_id]->storm_id))
+		return false;
+
+	for (i = 0; i < NUM_BIG_RAM_TYPES; i++)
+		if (mem_group_id == s_big_ram_defs[i].mem_group_id ||
+		    mem_group_id == s_big_ram_defs[i].ram_mem_group_id)
+			return qed_grc_is_included(p_hwfn,
+						   s_big_ram_defs[i].grc_param);
+	if (mem_group_id == MEM_GROUP_PXP_ILT || mem_group_id ==
+	    MEM_GROUP_PXP_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PXP);
+	if (mem_group_id == MEM_GROUP_RAM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RAM);
+	if (mem_group_id == MEM_GROUP_PBUF)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PBUF);
+	if (mem_group_id == MEM_GROUP_CAU_MEM ||
+	    mem_group_id == MEM_GROUP_CAU_SB ||
+	    mem_group_id == MEM_GROUP_CAU_PI)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CAU);
+	if (mem_group_id == MEM_GROUP_QM_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_QM);
+	if (mem_group_id == MEM_GROUP_CONN_CFC_MEM ||
+	    mem_group_id == MEM_GROUP_TASK_CFC_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CFC);
+	if (mem_group_id == MEM_GROUP_IGU_MEM || mem_group_id ==
+	    MEM_GROUP_IGU_MSIX)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IGU);
+	if (mem_group_id == MEM_GROUP_MULD_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MULD);
+	if (mem_group_id == MEM_GROUP_PRS_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PRS);
+	if (mem_group_id == MEM_GROUP_DMAE_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DMAE);
+	if (mem_group_id == MEM_GROUP_TM_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_TM);
+	if (mem_group_id == MEM_GROUP_SDM_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_SDM);
+	if (mem_group_id == MEM_GROUP_TDIF_CTX || mem_group_id ==
+	    MEM_GROUP_RDIF_CTX)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DIF);
+	if (mem_group_id == MEM_GROUP_CM_MEM)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM);
+	if (mem_group_id == MEM_GROUP_IOR)
+		return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR);
+
+	return true;
+}
+
+/* Stalls all Storms */
+static void qed_grc_stall_storms(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt, bool stall)
+{
+	u8 reg_val = stall ? 1 : 0;
+	u8 storm_id;
+
+	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+		if (qed_grc_is_storm_included(p_hwfn,
+					      (enum dbg_storms)storm_id)) {
+			u32 reg_addr =
+			    s_storm_defs[storm_id].sem_fast_mem_addr +
+			    SEM_FAST_REG_STALL_0;
+
+			qed_wr(p_hwfn, p_ptt, reg_addr, reg_val);
+		}
+	}
+
+	msleep(STALL_DELAY_MS);
+}
+
+/* Takes all blocks out of reset */
+static void qed_grc_unreset_blocks(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 reg_val[MAX_DBG_RESET_REGS] = { 0 };
+	u32 i;
+
+	/* Fill reset regs values */
+	for (i = 0; i < MAX_BLOCK_ID; i++)
+		if (s_block_defs[i]->has_reset_bit && s_block_defs[i]->unreset)
+			reg_val[s_block_defs[i]->reset_reg] |=
+			    BIT(s_block_defs[i]->reset_bit_offset);
+
+	/* Write reset registers */
+	for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
+		if (s_reset_regs_defs[i].exists[dev_data->chip_id]) {
+			reg_val[i] |= s_reset_regs_defs[i].unreset_val;
+			if (reg_val[i])
+				qed_wr(p_hwfn,
+				       p_ptt,
+				       s_reset_regs_defs[i].addr +
+				       RESET_REG_UNRESET_OFFSET, reg_val[i]);
+		}
+	}
+}
+
+/* Returns the attention name offsets of the specified block */
+static const struct dbg_attn_block_type_data *
+qed_get_block_attn_data(enum block_id block_id, enum dbg_attn_type attn_type)
+{
+	const struct dbg_attn_block *base_attn_block_arr =
+		(const struct dbg_attn_block *)
+		s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr;
+
+	return &base_attn_block_arr[block_id].per_type_data[attn_type];
+}
+
+/* Returns the attention registers of the specified block */
+static const struct dbg_attn_reg *
+qed_get_block_attn_regs(enum block_id block_id, enum dbg_attn_type attn_type,
+			u8 *num_attn_regs)
+{
+	const struct dbg_attn_block_type_data *block_type_data =
+		qed_get_block_attn_data(block_id, attn_type);
+
+	*num_attn_regs = block_type_data->num_regs;
+	return &((const struct dbg_attn_reg *)
+		 s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)[block_type_data->
+							  regs_offset];
+}
+
+/* For each block, clear the status of all parities */
+static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u8 reg_idx, num_attn_regs;
+	u32 block_id;
+
+	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+		const struct dbg_attn_reg *attn_reg_arr;
+
+		if (dev_data->block_in_reset[block_id])
+			continue;
+
+		attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id,
+						       ATTN_TYPE_PARITY,
+						       &num_attn_regs);
+		for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) {
+			const struct dbg_attn_reg *reg_data =
+				&attn_reg_arr[reg_idx];
+
+			/* Check mode */
+			bool eval_mode = GET_FIELD(reg_data->mode.data,
+						   DBG_MODE_HDR_EVAL_MODE) > 0;
+			u16 modes_buf_offset =
+				GET_FIELD(reg_data->mode.data,
+					  DBG_MODE_HDR_MODES_BUF_OFFSET);
+
+			if (!eval_mode ||
+			    qed_is_mode_match(p_hwfn, &modes_buf_offset))
+				/* Mode match - read parity status read-clear
+				 * register.
+				 */
+				qed_rd(p_hwfn, p_ptt,
+				       DWORDS_TO_BYTES(reg_data->
+						       sts_clr_address));
+		}
+	}
+}
+
+/* Dumps GRC registers section header. Returns the dumped size in dwords.
+ * The following parameters are dumped:
+ * - 'count' = num_dumped_entries
+ * - 'split' = split_type
+ * - 'id'i = split_id (dumped only if split_id >= 0)
+ * - 'param_name' = param_val (user param, dumped only if param_name != NULL and
+ *	param_val != NULL)
+ */
+static u32 qed_grc_dump_regs_hdr(u32 *dump_buf,
+				 bool dump,
+				 u32 num_reg_entries,
+				 const char *split_type,
+				 int split_id,
+				 const char *param_name, const char *param_val)
+{
+	u8 num_params = 2 + (split_id >= 0 ? 1 : 0) + (param_name ? 1 : 0);
+	u32 offset = 0;
+
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "grc_regs", num_params);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump, "count", num_reg_entries);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "split", split_type);
+	if (split_id >= 0)
+		offset += qed_dump_num_param(dump_buf + offset,
+					     dump, "id", split_id);
+	if (param_name && param_val)
+		offset += qed_dump_str_param(dump_buf + offset,
+					     dump, param_name, param_val);
+	return offset;
+}
+
+/* Dumps GRC register/memory. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_reg_entry(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt, u32 *dump_buf,
+				  bool dump, u32 addr, u32 len)
+{
+	u32 offset = 0, i;
+
+	if (dump) {
+		*(dump_buf + offset++) = addr | (len << REG_DUMP_LEN_SHIFT);
+		for (i = 0; i < len; i++, addr++, offset++)
+			*(dump_buf + offset) = qed_rd(p_hwfn,
+						      p_ptt,
+						      DWORDS_TO_BYTES(addr));
+	} else {
+		offset += len + 1;
+	}
+
+	return offset;
+}
+
+/* Dumps GRC registers entries. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     struct dbg_array input_regs_arr,
+				     u32 *dump_buf,
+				     bool dump,
+				     bool block_enable[MAX_BLOCK_ID],
+				     u32 *num_dumped_reg_entries)
+{
+	u32 i, offset = 0, input_offset = 0;
+	bool mode_match = true;
+
+	*num_dumped_reg_entries = 0;
+	while (input_offset < input_regs_arr.size_in_dwords) {
+		const struct dbg_dump_cond_hdr *cond_hdr =
+		    (const struct dbg_dump_cond_hdr *)
+		    &input_regs_arr.ptr[input_offset++];
+		bool eval_mode = GET_FIELD(cond_hdr->mode.data,
+					   DBG_MODE_HDR_EVAL_MODE) > 0;
+
+		/* Check mode/block */
+		if (eval_mode) {
+			u16 modes_buf_offset =
+				GET_FIELD(cond_hdr->mode.data,
+					  DBG_MODE_HDR_MODES_BUF_OFFSET);
+			mode_match = qed_is_mode_match(p_hwfn,
+						       &modes_buf_offset);
+		}
+
+		if (mode_match && block_enable[cond_hdr->block_id]) {
+			for (i = 0; i < cond_hdr->data_size;
+			     i++, input_offset++) {
+				const struct dbg_dump_reg *reg =
+				    (const struct dbg_dump_reg *)
+				    &input_regs_arr.ptr[input_offset];
+
+				offset +=
+					qed_grc_dump_reg_entry(p_hwfn, p_ptt,
+						    dump_buf + offset, dump,
+						    GET_FIELD(reg->data,
+							DBG_DUMP_REG_ADDRESS),
+						    GET_FIELD(reg->data,
+							DBG_DUMP_REG_LENGTH));
+				(*num_dumped_reg_entries)++;
+			}
+		} else {
+			input_offset += cond_hdr->data_size;
+		}
+	}
+
+	return offset;
+}
+
+/* Dumps GRC registers entries. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   struct dbg_array input_regs_arr,
+				   u32 *dump_buf,
+				   bool dump,
+				   bool block_enable[MAX_BLOCK_ID],
+				   const char *split_type_name,
+				   u32 split_id,
+				   const char *param_name,
+				   const char *param_val)
+{
+	u32 num_dumped_reg_entries, offset;
+
+	/* Calculate register dump header size (and skip it for now) */
+	offset = qed_grc_dump_regs_hdr(dump_buf,
+				       false,
+				       0,
+				       split_type_name,
+				       split_id, param_name, param_val);
+
+	/* Dump registers */
+	offset += qed_grc_dump_regs_entries(p_hwfn,
+					    p_ptt,
+					    input_regs_arr,
+					    dump_buf + offset,
+					    dump,
+					    block_enable,
+					    &num_dumped_reg_entries);
+
+	/* Write register dump header */
+	if (dump && num_dumped_reg_entries > 0)
+		qed_grc_dump_regs_hdr(dump_buf,
+				      dump,
+				      num_dumped_reg_entries,
+				      split_type_name,
+				      split_id, param_name, param_val);
+
+	return num_dumped_reg_entries > 0 ? offset : 0;
+}
+
+/* Dumps registers according to the input registers array.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  u32 *dump_buf,
+				  bool dump,
+				  bool block_enable[MAX_BLOCK_ID],
+				  const char *param_name, const char *param_val)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 offset = 0, input_offset = 0;
+	u8 port_id, pf_id;
+
+	if (dump)
+		DP_VERBOSE(p_hwfn, QED_MSG_DEBUG, "Dumping registers...\n");
+	while (input_offset <
+	       s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].size_in_dwords) {
+		const struct dbg_dump_split_hdr *split_hdr =
+			(const struct dbg_dump_split_hdr *)
+			&s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset++];
+		u8 split_type_id = GET_FIELD(split_hdr->hdr,
+					     DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
+		u32 split_data_size = GET_FIELD(split_hdr->hdr,
+						DBG_DUMP_SPLIT_HDR_DATA_SIZE);
+		struct dbg_array curr_input_regs_arr = {
+			&s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset],
+			split_data_size};
+
+		switch (split_type_id) {
+		case SPLIT_TYPE_NONE:
+		case SPLIT_TYPE_VF:
+			offset += qed_grc_dump_split_data(p_hwfn,
+							  p_ptt,
+							  curr_input_regs_arr,
+							  dump_buf + offset,
+							  dump,
+							  block_enable,
+							  "eng",
+							  (u32)(-1),
+							  param_name,
+							  param_val);
+			break;
+		case SPLIT_TYPE_PORT:
+			for (port_id = 0;
+			     port_id <
+			     s_chip_defs[dev_data->chip_id].
+			     per_platform[dev_data->platform_id].num_ports;
+			     port_id++) {
+				if (dump)
+					qed_port_pretend(p_hwfn, p_ptt,
+							 port_id);
+				offset +=
+				    qed_grc_dump_split_data(p_hwfn, p_ptt,
+							    curr_input_regs_arr,
+							    dump_buf + offset,
+							    dump, block_enable,
+							    "port", port_id,
+							    param_name,
+							    param_val);
+			}
+			break;
+		case SPLIT_TYPE_PF:
+		case SPLIT_TYPE_PORT_PF:
+			for (pf_id = 0;
+			     pf_id <
+			     s_chip_defs[dev_data->chip_id].
+			     per_platform[dev_data->platform_id].num_pfs;
+			     pf_id++) {
+				if (dump)
+					qed_fid_pretend(p_hwfn, p_ptt, pf_id);
+				offset += qed_grc_dump_split_data(p_hwfn,
+							p_ptt,
+							curr_input_regs_arr,
+							dump_buf + offset,
+							dump, block_enable,
+							"pf", pf_id, param_name,
+							param_val);
+			}
+			break;
+		default:
+			break;
+		}
+
+		input_offset += split_data_size;
+	}
+
+	/* Pretend to original PF */
+	if (dump)
+		qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
+	return offset;
+}
+
+/* Dump reset registers. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   u32 *dump_buf, bool dump)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 i, offset = 0, num_regs = 0;
+
+	/* Calculate header size */
+	offset += qed_grc_dump_regs_hdr(dump_buf,
+					false, 0, "eng", -1, NULL, NULL);
+
+	/* Write reset registers */
+	for (i = 0; i < MAX_DBG_RESET_REGS; i++) {
+		if (s_reset_regs_defs[i].exists[dev_data->chip_id]) {
+			offset += qed_grc_dump_reg_entry(p_hwfn,
+							 p_ptt,
+							 dump_buf + offset,
+							 dump,
+							 BYTES_TO_DWORDS
+							 (s_reset_regs_defs
+							  [i].addr), 1);
+			num_regs++;
+		}
+	}
+
+	/* Write header */
+	if (dump)
+		qed_grc_dump_regs_hdr(dump_buf,
+				      true, num_regs, "eng", -1, NULL, NULL);
+	return offset;
+}
+
+/* Dump registers that are modified during GRC Dump and therefore must be dumped
+ * first. Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 *dump_buf, bool dump)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 offset = 0, num_reg_entries = 0, block_id;
+	u8 storm_id, reg_idx, num_attn_regs;
+
+	/* Calculate header size */
+	offset += qed_grc_dump_regs_hdr(dump_buf,
+					false, 0, "eng", -1, NULL, NULL);
+
+	/* Write parity registers */
+	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+		const struct dbg_attn_reg *attn_reg_arr;
+
+		if (dev_data->block_in_reset[block_id] && dump)
+			continue;
+
+		attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id,
+						       ATTN_TYPE_PARITY,
+						       &num_attn_regs);
+		for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) {
+			const struct dbg_attn_reg *reg_data =
+				&attn_reg_arr[reg_idx];
+			u16 modes_buf_offset;
+			bool eval_mode;
+
+			/* Check mode */
+			eval_mode = GET_FIELD(reg_data->mode.data,
+					      DBG_MODE_HDR_EVAL_MODE) > 0;
+			modes_buf_offset =
+				GET_FIELD(reg_data->mode.data,
+					  DBG_MODE_HDR_MODES_BUF_OFFSET);
+			if (!eval_mode ||
+			    qed_is_mode_match(p_hwfn, &modes_buf_offset)) {
+				/* Mode match - read and dump registers */
+				offset += qed_grc_dump_reg_entry(p_hwfn,
+							p_ptt,
+							dump_buf + offset,
+							dump,
+							reg_data->mask_address,
+							1);
+				offset += qed_grc_dump_reg_entry(p_hwfn,
+						p_ptt,
+						dump_buf + offset,
+						dump,
+						GET_FIELD(reg_data->data,
+						    DBG_ATTN_REG_STS_ADDRESS),
+						1);
+				num_reg_entries += 2;
+			}
+		}
+	}
+
+	/* Write storm stall status registers */
+	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+		if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id] &&
+		    dump)
+			continue;
+
+		offset += qed_grc_dump_reg_entry(p_hwfn,
+					p_ptt,
+					dump_buf + offset,
+					dump,
+					BYTES_TO_DWORDS(s_storm_defs[storm_id].
+							sem_fast_mem_addr +
+							SEM_FAST_REG_STALLED),
+					1);
+		num_reg_entries++;
+	}
+
+	/* Write header */
+	if (dump)
+		qed_grc_dump_regs_hdr(dump_buf,
+				      true,
+				      num_reg_entries, "eng", -1, NULL, NULL);
+	return offset;
+}
+
+/* Dumps a GRC memory header (section and params).
+ * The following parameters are dumped:
+ * name - name is dumped only if it's not NULL.
+ * addr - byte_addr is dumped only if name is NULL.
+ * len - dword_len is always dumped.
+ * width - bit_width is dumped if it's not zero.
+ * packed - packed=1 is dumped if it's not false.
+ * mem_group - mem_group is always dumped.
+ * is_storm - true only if the memory is related to a Storm.
+ * storm_letter - storm letter (valid only if is_storm is true).
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn,
+				u32 *dump_buf,
+				bool dump,
+				const char *name,
+				u32 byte_addr,
+				u32 dword_len,
+				u32 bit_width,
+				bool packed,
+				const char *mem_group,
+				bool is_storm, char storm_letter)
+{
+	u8 num_params = 3;
+	u32 offset = 0;
+	char buf[64];
+
+	if (!dword_len)
+		DP_NOTICE(p_hwfn,
+			  "Unexpected GRC Dump error: dumped memory size must be non-zero\n");
+	if (bit_width)
+		num_params++;
+	if (packed)
+		num_params++;
+
+	/* Dump section header */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "grc_mem", num_params);
+	if (name) {
+		/* Dump name */
+		if (is_storm) {
+			strcpy(buf, "?STORM_");
+			buf[0] = storm_letter;
+			strcpy(buf + strlen(buf), name);
+		} else {
+			strcpy(buf, name);
+		}
+
+		offset += qed_dump_str_param(dump_buf + offset,
+					     dump, "name", buf);
+		if (dump)
+			DP_VERBOSE(p_hwfn,
+				   QED_MSG_DEBUG,
+				   "Dumping %d registers from %s...\n",
+				   dword_len, buf);
+	} else {
+		/* Dump address */
+		offset += qed_dump_num_param(dump_buf + offset,
+					     dump, "addr", byte_addr);
+		if (dump && dword_len > 64)
+			DP_VERBOSE(p_hwfn,
+				   QED_MSG_DEBUG,
+				   "Dumping %d registers from address 0x%x...\n",
+				   dword_len, byte_addr);
+	}
+
+	/* Dump len */
+	offset += qed_dump_num_param(dump_buf + offset, dump, "len", dword_len);
+
+	/* Dump bit width */
+	if (bit_width)
+		offset += qed_dump_num_param(dump_buf + offset,
+					     dump, "width", bit_width);
+
+	/* Dump packed */
+	if (packed)
+		offset += qed_dump_num_param(dump_buf + offset,
+					     dump, "packed", 1);
+
+	/* Dump reg type */
+	if (is_storm) {
+		strcpy(buf, "?STORM_");
+		buf[0] = storm_letter;
+		strcpy(buf + strlen(buf), mem_group);
+	} else {
+		strcpy(buf, mem_group);
+	}
+
+	offset += qed_dump_str_param(dump_buf + offset, dump, "type", buf);
+	return offset;
+}
+
+/* Dumps a single GRC memory. If name is NULL, the memory is stored by address.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    u32 *dump_buf,
+			    bool dump,
+			    const char *name,
+			    u32 byte_addr,
+			    u32 dword_len,
+			    u32 bit_width,
+			    bool packed,
+			    const char *mem_group,
+			    bool is_storm, char storm_letter)
+{
+	u32 offset = 0;
+
+	offset += qed_grc_dump_mem_hdr(p_hwfn,
+				       dump_buf + offset,
+				       dump,
+				       name,
+				       byte_addr,
+				       dword_len,
+				       bit_width,
+				       packed,
+				       mem_group, is_storm, storm_letter);
+	if (dump) {
+		u32 i;
+
+		for (i = 0; i < dword_len;
+		     i++, byte_addr += BYTES_IN_DWORD, offset++)
+			*(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr);
+	} else {
+		offset += dword_len;
+	}
+
+	return offset;
+}
+
+/* Dumps GRC memories entries. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    struct dbg_array input_mems_arr,
+				    u32 *dump_buf, bool dump)
+{
+	u32 i, offset = 0, input_offset = 0;
+	bool mode_match = true;
+
+	while (input_offset < input_mems_arr.size_in_dwords) {
+		const struct dbg_dump_cond_hdr *cond_hdr;
+		u32 num_entries;
+		bool eval_mode;
+
+		cond_hdr = (const struct dbg_dump_cond_hdr *)
+			   &input_mems_arr.ptr[input_offset++];
+		eval_mode = GET_FIELD(cond_hdr->mode.data,
+				      DBG_MODE_HDR_EVAL_MODE) > 0;
+
+		/* Check required mode */
+		if (eval_mode) {
+			u16 modes_buf_offset =
+				GET_FIELD(cond_hdr->mode.data,
+					  DBG_MODE_HDR_MODES_BUF_OFFSET);
+
+			mode_match = qed_is_mode_match(p_hwfn,
+						       &modes_buf_offset);
+		}
+
+		if (!mode_match) {
+			input_offset += cond_hdr->data_size;
+			continue;
+		}
+
+		num_entries = cond_hdr->data_size / MEM_DUMP_ENTRY_SIZE_DWORDS;
+		for (i = 0; i < num_entries;
+		     i++, input_offset += MEM_DUMP_ENTRY_SIZE_DWORDS) {
+			const struct dbg_dump_mem *mem =
+				(const struct dbg_dump_mem *)
+				&input_mems_arr.ptr[input_offset];
+			u8 mem_group_id;
+
+			mem_group_id = GET_FIELD(mem->dword0,
+						 DBG_DUMP_MEM_MEM_GROUP_ID);
+			if (mem_group_id >= MEM_GROUPS_NUM) {
+				DP_NOTICE(p_hwfn, "Invalid mem_group_id\n");
+				return 0;
+			}
+
+			if (qed_grc_is_mem_included(p_hwfn,
+					(enum block_id)cond_hdr->block_id,
+					mem_group_id)) {
+				u32 mem_byte_addr =
+					DWORDS_TO_BYTES(GET_FIELD(mem->dword0,
+							DBG_DUMP_MEM_ADDRESS));
+				u32 mem_len = GET_FIELD(mem->dword1,
+							DBG_DUMP_MEM_LENGTH);
+				char storm_letter = 'a';
+				bool is_storm = false;
+
+				/* Update memory length for CCFC/TCFC memories
+				 * according to number of LCIDs/LTIDs.
+				 */
+				if (mem_group_id == MEM_GROUP_CONN_CFC_MEM)
+					mem_len = qed_grc_get_param(p_hwfn,
+							DBG_GRC_PARAM_NUM_LCIDS)
+							* (mem_len / MAX_LCIDS);
+				else if (mem_group_id == MEM_GROUP_TASK_CFC_MEM)
+					mem_len = qed_grc_get_param(p_hwfn,
+							DBG_GRC_PARAM_NUM_LTIDS)
+							* (mem_len / MAX_LTIDS);
+
+				/* If memory is associated with Storm, update
+				 * Storm details.
+				 */
+				if (s_block_defs[cond_hdr->block_id]->
+							associated_to_storm) {
+					is_storm = true;
+					storm_letter =
+						s_storm_defs[s_block_defs[
+						cond_hdr->block_id]->
+						storm_id].letter;
+				}
+
+				/* Dump memory */
+				offset += qed_grc_dump_mem(p_hwfn, p_ptt,
+						dump_buf + offset, dump, NULL,
+						mem_byte_addr, mem_len, 0,
+						false,
+						s_mem_group_names[mem_group_id],
+						is_storm, storm_letter);
+				}
+			}
+	}
+
+	return offset;
+}
+
+/* Dumps GRC memories according to the input array dump_mem.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *dump_buf, bool dump)
+{
+	u32 offset = 0, input_offset = 0;
+
+	while (input_offset <
+	       s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].size_in_dwords) {
+		const struct dbg_dump_split_hdr *split_hdr =
+			(const struct dbg_dump_split_hdr *)
+			&s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset++];
+		u8 split_type_id = GET_FIELD(split_hdr->hdr,
+					     DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID);
+		u32 split_data_size = GET_FIELD(split_hdr->hdr,
+						DBG_DUMP_SPLIT_HDR_DATA_SIZE);
+		struct dbg_array curr_input_mems_arr = {
+			&s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset],
+			split_data_size};
+
+		switch (split_type_id) {
+		case SPLIT_TYPE_NONE:
+			offset += qed_grc_dump_mem_entries(p_hwfn,
+							   p_ptt,
+							   curr_input_mems_arr,
+							   dump_buf + offset,
+							   dump);
+			break;
+		default:
+			DP_NOTICE(p_hwfn,
+				  "Dumping split memories is currently not supported\n");
+			break;
+		}
+
+		input_offset += split_data_size;
+	}
+
+	return offset;
+}
+
+/* Dumps GRC context data for the specified Storm.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *dump_buf,
+				 bool dump,
+				 const char *name,
+				 u32 num_lids,
+				 u32 lid_size,
+				 u32 rd_reg_addr,
+				 u8 storm_id)
+{
+	u32 i, lid, total_size;
+	u32 offset = 0;
+
+	if (!lid_size)
+		return 0;
+	lid_size *= BYTES_IN_DWORD;
+	total_size = num_lids * lid_size;
+	offset += qed_grc_dump_mem_hdr(p_hwfn,
+				       dump_buf + offset,
+				       dump,
+				       name,
+				       0,
+				       total_size,
+				       lid_size * 32,
+				       false,
+				       name,
+				       true, s_storm_defs[storm_id].letter);
+
+	/* Dump context data */
+	if (dump) {
+		for (lid = 0; lid < num_lids; lid++) {
+			for (i = 0; i < lid_size; i++, offset++) {
+				qed_wr(p_hwfn,
+				       p_ptt,
+				       s_storm_defs[storm_id].cm_ctx_wr_addr,
+				       BIT(9) | lid);
+				*(dump_buf + offset) = qed_rd(p_hwfn,
+							      p_ptt,
+							      rd_reg_addr);
+			}
+		}
+	} else {
+		offset += total_size;
+	}
+
+	return offset;
+}
+
+/* Dumps GRC contexts. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	u32 offset = 0;
+	u8 storm_id;
+
+	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+		if (!qed_grc_is_storm_included(p_hwfn,
+					       (enum dbg_storms)storm_id))
+			continue;
+
+		/* Dump Conn AG context size */
+		offset +=
+			qed_grc_dump_ctx_data(p_hwfn,
+					      p_ptt,
+					      dump_buf + offset,
+					      dump,
+					      "CONN_AG_CTX",
+					      qed_grc_get_param(p_hwfn,
+						    DBG_GRC_PARAM_NUM_LCIDS),
+					      s_storm_defs[storm_id].
+						    cm_conn_ag_ctx_lid_size,
+					      s_storm_defs[storm_id].
+						    cm_conn_ag_ctx_rd_addr,
+					      storm_id);
+
+		/* Dump Conn ST context size */
+		offset +=
+			qed_grc_dump_ctx_data(p_hwfn,
+					      p_ptt,
+					      dump_buf + offset,
+					      dump,
+					      "CONN_ST_CTX",
+					      qed_grc_get_param(p_hwfn,
+						    DBG_GRC_PARAM_NUM_LCIDS),
+					      s_storm_defs[storm_id].
+						    cm_conn_st_ctx_lid_size,
+					      s_storm_defs[storm_id].
+						    cm_conn_st_ctx_rd_addr,
+					      storm_id);
+
+		/* Dump Task AG context size */
+		offset +=
+			qed_grc_dump_ctx_data(p_hwfn,
+					      p_ptt,
+					      dump_buf + offset,
+					      dump,
+					      "TASK_AG_CTX",
+					      qed_grc_get_param(p_hwfn,
+						    DBG_GRC_PARAM_NUM_LTIDS),
+					      s_storm_defs[storm_id].
+						    cm_task_ag_ctx_lid_size,
+					      s_storm_defs[storm_id].
+						    cm_task_ag_ctx_rd_addr,
+					      storm_id);
+
+		/* Dump Task ST context size */
+		offset +=
+			qed_grc_dump_ctx_data(p_hwfn,
+					      p_ptt,
+					      dump_buf + offset,
+					      dump,
+					      "TASK_ST_CTX",
+					      qed_grc_get_param(p_hwfn,
+						    DBG_GRC_PARAM_NUM_LTIDS),
+					      s_storm_defs[storm_id].
+						    cm_task_st_ctx_lid_size,
+					      s_storm_defs[storm_id].
+						    cm_task_st_ctx_rd_addr,
+					      storm_id);
+	}
+
+	return offset;
+}
+
+/* Dumps GRC IORs data. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	char buf[10] = "IOR_SET_?";
+	u8 storm_id, set_id;
+	u32 offset = 0;
+
+	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+		if (qed_grc_is_storm_included(p_hwfn,
+					      (enum dbg_storms)storm_id)) {
+			for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) {
+				u32 addr =
+				    s_storm_defs[storm_id].sem_fast_mem_addr +
+				    SEM_FAST_REG_STORM_REG_FILE +
+				    DWORDS_TO_BYTES(IOR_SET_OFFSET(set_id));
+
+				buf[strlen(buf) - 1] = '0' + set_id;
+				offset += qed_grc_dump_mem(p_hwfn,
+							   p_ptt,
+							   dump_buf + offset,
+							   dump,
+							   buf,
+							   addr,
+							   IORS_PER_SET,
+							   32,
+							   false,
+							   "ior",
+							   true,
+							   s_storm_defs
+							   [storm_id].letter);
+			}
+		}
+	}
+
+	return offset;
+}
+
+/* Dump VFC CAM. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				u32 *dump_buf, bool dump, u8 storm_id)
+{
+	u32 total_size = VFC_CAM_NUM_ROWS * VFC_CAM_RESP_DWORDS;
+	u32 cam_addr[VFC_CAM_ADDR_DWORDS] = { 0 };
+	u32 cam_cmd[VFC_CAM_CMD_DWORDS] = { 0 };
+	u32 offset = 0;
+	u32 row, i;
+
+	offset += qed_grc_dump_mem_hdr(p_hwfn,
+				       dump_buf + offset,
+				       dump,
+				       "vfc_cam",
+				       0,
+				       total_size,
+				       256,
+				       false,
+				       "vfc_cam",
+				       true, s_storm_defs[storm_id].letter);
+	if (dump) {
+		/* Prepare CAM address */
+		SET_VAR_FIELD(cam_addr, VFC_CAM_ADDR, OP, VFC_OPCODE_CAM_RD);
+		for (row = 0; row < VFC_CAM_NUM_ROWS;
+		     row++, offset += VFC_CAM_RESP_DWORDS) {
+			/* Write VFC CAM command */
+			SET_VAR_FIELD(cam_cmd, VFC_CAM_CMD, ROW, row);
+			ARR_REG_WR(p_hwfn,
+				   p_ptt,
+				   s_storm_defs[storm_id].sem_fast_mem_addr +
+				   SEM_FAST_REG_VFC_DATA_WR,
+				   cam_cmd, VFC_CAM_CMD_DWORDS);
+
+			/* Write VFC CAM address */
+			ARR_REG_WR(p_hwfn,
+				   p_ptt,
+				   s_storm_defs[storm_id].sem_fast_mem_addr +
+				   SEM_FAST_REG_VFC_ADDR,
+				   cam_addr, VFC_CAM_ADDR_DWORDS);
+
+			/* Read VFC CAM read response */
+			ARR_REG_RD(p_hwfn,
+				   p_ptt,
+				   s_storm_defs[storm_id].sem_fast_mem_addr +
+				   SEM_FAST_REG_VFC_DATA_RD,
+				   dump_buf + offset, VFC_CAM_RESP_DWORDS);
+		}
+	} else {
+		offset += total_size;
+	}
+
+	return offset;
+}
+
+/* Dump VFC RAM. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				u32 *dump_buf,
+				bool dump,
+				u8 storm_id, struct vfc_ram_defs *ram_defs)
+{
+	u32 total_size = ram_defs->num_rows * VFC_RAM_RESP_DWORDS;
+	u32 ram_addr[VFC_RAM_ADDR_DWORDS] = { 0 };
+	u32 ram_cmd[VFC_RAM_CMD_DWORDS] = { 0 };
+	u32 offset = 0;
+	u32 row, i;
+
+	offset += qed_grc_dump_mem_hdr(p_hwfn,
+				       dump_buf + offset,
+				       dump,
+				       ram_defs->mem_name,
+				       0,
+				       total_size,
+				       256,
+				       false,
+				       ram_defs->type_name,
+				       true, s_storm_defs[storm_id].letter);
+
+	/* Prepare RAM address */
+	SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, OP, VFC_OPCODE_RAM_RD);
+
+	if (!dump)
+		return offset + total_size;
+
+	for (row = ram_defs->base_row;
+	     row < ram_defs->base_row + ram_defs->num_rows;
+	     row++, offset += VFC_RAM_RESP_DWORDS) {
+		/* Write VFC RAM command */
+		ARR_REG_WR(p_hwfn,
+			   p_ptt,
+			   s_storm_defs[storm_id].sem_fast_mem_addr +
+			   SEM_FAST_REG_VFC_DATA_WR,
+			   ram_cmd, VFC_RAM_CMD_DWORDS);
+
+		/* Write VFC RAM address */
+		SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, ROW, row);
+		ARR_REG_WR(p_hwfn,
+			   p_ptt,
+			   s_storm_defs[storm_id].sem_fast_mem_addr +
+			   SEM_FAST_REG_VFC_ADDR,
+			   ram_addr, VFC_RAM_ADDR_DWORDS);
+
+		/* Read VFC RAM read response */
+		ARR_REG_RD(p_hwfn,
+			   p_ptt,
+			   s_storm_defs[storm_id].sem_fast_mem_addr +
+			   SEM_FAST_REG_VFC_DATA_RD,
+			   dump_buf + offset, VFC_RAM_RESP_DWORDS);
+	}
+
+	return offset;
+}
+
+/* Dumps GRC VFC data. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_vfc(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u8 storm_id, i;
+	u32 offset = 0;
+
+	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+		if (qed_grc_is_storm_included(p_hwfn,
+					      (enum dbg_storms)storm_id) &&
+		    s_storm_defs[storm_id].has_vfc &&
+		    (storm_id != DBG_PSTORM_ID ||
+		     dev_data->platform_id == PLATFORM_ASIC)) {
+			/* Read CAM */
+			offset += qed_grc_dump_vfc_cam(p_hwfn,
+						       p_ptt,
+						       dump_buf + offset,
+						       dump, storm_id);
+
+			/* Read RAM */
+			for (i = 0; i < NUM_VFC_RAM_TYPES; i++)
+				offset += qed_grc_dump_vfc_ram(p_hwfn,
+							       p_ptt,
+							       dump_buf +
+							       offset,
+							       dump,
+							       storm_id,
+							       &s_vfc_ram_defs
+							       [i]);
+		}
+	}
+
+	return offset;
+}
+
+/* Dumps GRC RSS data. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 offset = 0;
+	u8 rss_mem_id;
+
+	for (rss_mem_id = 0; rss_mem_id < NUM_RSS_MEM_TYPES; rss_mem_id++) {
+		struct rss_mem_defs *rss_defs = &s_rss_mem_defs[rss_mem_id];
+		u32 num_entries = rss_defs->num_entries[dev_data->chip_id];
+		u32 entry_width = rss_defs->entry_width[dev_data->chip_id];
+		u32 total_size = (num_entries * entry_width) / 32;
+		bool packed = (entry_width == 16);
+		u32 addr = rss_defs->addr;
+		u32 i, j;
+
+		offset += qed_grc_dump_mem_hdr(p_hwfn,
+					       dump_buf + offset,
+					       dump,
+					       rss_defs->mem_name,
+					       addr,
+					       total_size,
+					       entry_width,
+					       packed,
+					       rss_defs->type_name, false, 0);
+
+		if (!dump) {
+			offset += total_size;
+			continue;
+		}
+
+		/* Dump RSS data */
+		for (i = 0; i < BYTES_TO_DWORDS(total_size); i++, addr++) {
+			qed_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, addr);
+			for (j = 0; j < BYTES_IN_DWORD; j++, offset++)
+				*(dump_buf + offset) =
+					qed_rd(p_hwfn, p_ptt,
+					       RSS_REG_RSS_RAM_DATA +
+					       DWORDS_TO_BYTES(j));
+		}
+	}
+
+	return offset;
+}
+
+/* Dumps GRC Big RAM. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				u32 *dump_buf, bool dump, u8 big_ram_id)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	char mem_name[12] = "???_BIG_RAM";
+	char type_name[8] = "???_RAM";
+	u32 ram_size, total_blocks;
+	u32 offset = 0, i, j;
+
+	total_blocks =
+		s_big_ram_defs[big_ram_id].num_of_blocks[dev_data->chip_id];
+	ram_size = total_blocks * BIG_RAM_BLOCK_SIZE_DWORDS;
+
+	strncpy(type_name, s_big_ram_defs[big_ram_id].instance_name,
+		strlen(s_big_ram_defs[big_ram_id].instance_name));
+	strncpy(mem_name, s_big_ram_defs[big_ram_id].instance_name,
+		strlen(s_big_ram_defs[big_ram_id].instance_name));
+
+	/* Dump memory header */
+	offset += qed_grc_dump_mem_hdr(p_hwfn,
+				       dump_buf + offset,
+				       dump,
+				       mem_name,
+				       0,
+				       ram_size,
+				       BIG_RAM_BLOCK_SIZE_BYTES * 8,
+				       false, type_name, false, 0);
+
+	if (!dump)
+		return offset + ram_size;
+
+	/* Read and dump Big RAM data */
+	for (i = 0; i < total_blocks / 2; i++) {
+		qed_wr(p_hwfn, p_ptt, s_big_ram_defs[big_ram_id].addr_reg_addr,
+		       i);
+		for (j = 0; j < 2 * BIG_RAM_BLOCK_SIZE_DWORDS; j++, offset++)
+			*(dump_buf + offset) = qed_rd(p_hwfn, p_ptt,
+						s_big_ram_defs[big_ram_id].
+							data_reg_addr +
+						DWORDS_TO_BYTES(j));
+	}
+
+	return offset;
+}
+
+static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	bool block_enable[MAX_BLOCK_ID] = { 0 };
+	bool halted = false;
+	u32 offset = 0;
+
+	/* Halt MCP */
+	if (dump) {
+		halted = !qed_mcp_halt(p_hwfn, p_ptt);
+		if (!halted)
+			DP_NOTICE(p_hwfn, "MCP halt failed!\n");
+	}
+
+	/* Dump MCP scratchpad */
+	offset += qed_grc_dump_mem(p_hwfn,
+				   p_ptt,
+				   dump_buf + offset,
+				   dump,
+				   NULL,
+				   MCP_REG_SCRATCH,
+				   MCP_REG_SCRATCH_SIZE,
+				   0, false, "MCP", false, 0);
+
+	/* Dump MCP cpu_reg_file */
+	offset += qed_grc_dump_mem(p_hwfn,
+				   p_ptt,
+				   dump_buf + offset,
+				   dump,
+				   NULL,
+				   MCP_REG_CPU_REG_FILE,
+				   MCP_REG_CPU_REG_FILE_SIZE,
+				   0, false, "MCP", false, 0);
+
+	/* Dump MCP registers */
+	block_enable[BLOCK_MCP] = true;
+	offset += qed_grc_dump_registers(p_hwfn,
+					 p_ptt,
+					 dump_buf + offset,
+					 dump, block_enable, "block", "MCP");
+
+	/* Dump required non-MCP registers */
+	offset += qed_grc_dump_regs_hdr(dump_buf + offset,
+					dump, 1, "eng", -1, "block", "MCP");
+	offset += qed_grc_dump_reg_entry(p_hwfn,
+					 p_ptt,
+					 dump_buf + offset,
+					 dump,
+					 BYTES_TO_DWORDS
+					 (MISC_REG_SHARED_MEM_ADDR), 1);
+
+	/* Release MCP */
+	if (halted && qed_mcp_resume(p_hwfn, p_ptt))
+		DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n");
+	return offset;
+}
+
+/* Dumps the tbus indirect memory for all PHYs. */
+static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	u32 offset = 0, tbus_lo_offset, tbus_hi_offset;
+	char mem_name[32];
+	u8 phy_id;
+
+	for (phy_id = 0; phy_id < ARRAY_SIZE(s_phy_defs); phy_id++) {
+		struct phy_defs *phy_defs = &s_phy_defs[phy_id];
+		int printed_chars;
+
+		printed_chars = snprintf(mem_name, sizeof(mem_name), "tbus_%s",
+					 phy_defs->phy_name);
+		if (printed_chars < 0 || printed_chars >= sizeof(mem_name))
+			DP_NOTICE(p_hwfn,
+				  "Unexpected debug error: invalid PHY memory name\n");
+		offset += qed_grc_dump_mem_hdr(p_hwfn,
+					       dump_buf + offset,
+					       dump,
+					       mem_name,
+					       0,
+					       PHY_DUMP_SIZE_DWORDS,
+					       16, true, mem_name, false, 0);
+		if (dump) {
+			u32 addr_lo_addr = phy_defs->base_addr +
+					   phy_defs->tbus_addr_lo_addr;
+			u32 addr_hi_addr = phy_defs->base_addr +
+					   phy_defs->tbus_addr_hi_addr;
+			u32 data_lo_addr = phy_defs->base_addr +
+					   phy_defs->tbus_data_lo_addr;
+			u32 data_hi_addr = phy_defs->base_addr +
+					   phy_defs->tbus_data_hi_addr;
+			u8 *bytes_buf = (u8 *)(dump_buf + offset);
+
+			for (tbus_hi_offset = 0;
+			     tbus_hi_offset < (NUM_PHY_TBUS_ADDRESSES >> 8);
+			     tbus_hi_offset++) {
+				qed_wr(p_hwfn,
+				       p_ptt, addr_hi_addr, tbus_hi_offset);
+				for (tbus_lo_offset = 0; tbus_lo_offset < 256;
+				     tbus_lo_offset++) {
+					qed_wr(p_hwfn,
+					       p_ptt,
+					       addr_lo_addr, tbus_lo_offset);
+					*(bytes_buf++) =
+						(u8)qed_rd(p_hwfn, p_ptt,
+							   data_lo_addr);
+					*(bytes_buf++) =
+						(u8)qed_rd(p_hwfn, p_ptt,
+							   data_hi_addr);
+				}
+			}
+		}
+
+		offset += PHY_DUMP_SIZE_DWORDS;
+	}
+
+	return offset;
+}
+
+static void qed_config_dbg_line(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				enum block_id block_id,
+				u8 line_id,
+				u8 cycle_en,
+				u8 right_shift, u8 force_valid, u8 force_frame)
+{
+	struct block_defs *p_block_defs = s_block_defs[block_id];
+
+	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_select_addr, line_id);
+	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_cycle_enable_addr, cycle_en);
+	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_shift_addr, right_shift);
+	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_valid_addr, force_valid);
+	qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_frame_addr, force_frame);
+}
+
+/* Dumps Static Debug data. Returns the dumped size in dwords. */
+static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     u32 *dump_buf, bool dump)
+{
+	u32 block_dwords = NUM_DBG_BUS_LINES * STATIC_DEBUG_LINE_DWORDS;
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 offset = 0, block_id, line_id, addr, i;
+	struct block_defs *p_block_defs;
+
+	if (dump) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_DEBUG, "Dumping static debug data...\n");
+
+		/* Disable all blocks debug output */
+		for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+			p_block_defs = s_block_defs[block_id];
+
+			if (p_block_defs->has_dbg_bus[dev_data->chip_id])
+				qed_wr(p_hwfn, p_ptt,
+				       p_block_defs->dbg_cycle_enable_addr, 0);
+		}
+
+		qed_bus_reset_dbg_block(p_hwfn, p_ptt);
+		qed_bus_set_framing_mode(p_hwfn,
+					 p_ptt, DBG_BUS_FRAME_MODE_8HW_0ST);
+		qed_wr(p_hwfn,
+		       p_ptt, DBG_REG_DEBUG_TARGET, DBG_BUS_TARGET_ID_INT_BUF);
+		qed_wr(p_hwfn, p_ptt, DBG_REG_FULL_MODE, 1);
+		qed_bus_enable_dbg_block(p_hwfn, p_ptt, true);
+	}
+
+	/* Dump all static debug lines for each relevant block */
+	for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) {
+		p_block_defs = s_block_defs[block_id];
+
+		if (!p_block_defs->has_dbg_bus[dev_data->chip_id])
+			continue;
+
+		/* Dump static section params */
+		offset += qed_grc_dump_mem_hdr(p_hwfn,
+					       dump_buf + offset,
+					       dump,
+					       p_block_defs->name, 0,
+					       block_dwords, 32, false,
+					       "STATIC", false, 0);
+
+		if (dump && !dev_data->block_in_reset[block_id]) {
+			u8 dbg_client_id =
+				p_block_defs->dbg_client_id[dev_data->chip_id];
+
+			/* Enable block's client */
+			qed_bus_enable_clients(p_hwfn, p_ptt,
+					       BIT(dbg_client_id));
+
+			for (line_id = 0; line_id < NUM_DBG_BUS_LINES;
+			     line_id++) {
+				/* Configure debug line ID */
+				qed_config_dbg_line(p_hwfn,
+						    p_ptt,
+						    (enum block_id)block_id,
+						    (u8)line_id,
+						    0xf, 0, 0, 0);
+
+				/* Read debug line info */
+				for (i = 0, addr = DBG_REG_CALENDAR_OUT_DATA;
+				     i < STATIC_DEBUG_LINE_DWORDS;
+				     i++, offset++, addr += BYTES_IN_DWORD)
+					dump_buf[offset] = qed_rd(p_hwfn, p_ptt,
+								  addr);
+			}
+
+			/* Disable block's client and debug output */
+			qed_bus_enable_clients(p_hwfn, p_ptt, 0);
+			qed_wr(p_hwfn, p_ptt,
+			       p_block_defs->dbg_cycle_enable_addr, 0);
+		} else {
+			/* All lines are invalid - dump zeros */
+			if (dump)
+				memset(dump_buf + offset, 0,
+				       DWORDS_TO_BYTES(block_dwords));
+			offset += block_dwords;
+		}
+	}
+
+	if (dump) {
+		qed_bus_enable_dbg_block(p_hwfn, p_ptt, false);
+		qed_bus_enable_clients(p_hwfn, p_ptt, 0);
+	}
+
+	return offset;
+}
+
+/* Performs GRC Dump to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    u32 *dump_buf,
+				    bool dump, u32 *num_dumped_dwords)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	bool parities_masked = false;
+	u8 i, port_mode = 0;
+	u32 offset = 0;
+
+	/* Check if emulation platform */
+	*num_dumped_dwords = 0;
+
+	/* Fill GRC parameters that were not set by the user with their default
+	 * value.
+	 */
+	qed_dbg_grc_set_params_default(p_hwfn);
+
+	/* Find port mode */
+	if (dump) {
+		switch (qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE)) {
+		case 0:
+			port_mode = 1;
+			break;
+		case 1:
+			port_mode = 2;
+			break;
+		case 2:
+			port_mode = 4;
+			break;
+		}
+	}
+
+	/* Update reset state */
+	if (dump)
+		qed_update_blocks_reset_state(p_hwfn, p_ptt);
+
+	/* Dump global params */
+	offset += qed_dump_common_global_params(p_hwfn,
+						p_ptt,
+						dump_buf + offset, dump, 4);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "dump-type", "grc-dump");
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "num-lcids",
+				     qed_grc_get_param(p_hwfn,
+						DBG_GRC_PARAM_NUM_LCIDS));
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump,
+				     "num-ltids",
+				     qed_grc_get_param(p_hwfn,
+						DBG_GRC_PARAM_NUM_LTIDS));
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump, "num-ports", port_mode);
+
+	/* Dump reset registers (dumped before taking blocks out of reset ) */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS))
+		offset += qed_grc_dump_reset_regs(p_hwfn,
+						  p_ptt,
+						  dump_buf + offset, dump);
+
+	/* Take all blocks out of reset (using reset registers) */
+	if (dump) {
+		qed_grc_unreset_blocks(p_hwfn, p_ptt);
+		qed_update_blocks_reset_state(p_hwfn, p_ptt);
+	}
+
+	/* Disable all parities using MFW command */
+	if (dump) {
+		parities_masked = !qed_mcp_mask_parities(p_hwfn, p_ptt, 1);
+		if (!parities_masked) {
+			if (qed_grc_get_param
+			    (p_hwfn, DBG_GRC_PARAM_PARITY_SAFE))
+				return DBG_STATUS_MCP_COULD_NOT_MASK_PRTY;
+			else
+				DP_NOTICE(p_hwfn,
+					  "Failed to mask parities using MFW\n");
+		}
+	}
+
+	/* Dump modified registers (dumped before modifying them) */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS))
+		offset += qed_grc_dump_modified_regs(p_hwfn,
+						     p_ptt,
+						     dump_buf + offset, dump);
+
+	/* Stall storms */
+	if (dump &&
+	    (qed_grc_is_included(p_hwfn,
+				 DBG_GRC_PARAM_DUMP_IOR) ||
+	     qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC)))
+		qed_grc_stall_storms(p_hwfn, p_ptt, true);
+
+	/* Dump all regs  */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) {
+		/* Dump all blocks except MCP */
+		bool block_enable[MAX_BLOCK_ID];
+
+		for (i = 0; i < MAX_BLOCK_ID; i++)
+			block_enable[i] = true;
+		block_enable[BLOCK_MCP] = false;
+		offset += qed_grc_dump_registers(p_hwfn,
+						 p_ptt,
+						 dump_buf +
+						 offset,
+						 dump,
+						 block_enable, NULL, NULL);
+	}
+
+	/* Dump memories */
+	offset += qed_grc_dump_memories(p_hwfn, p_ptt, dump_buf + offset, dump);
+
+	/* Dump MCP */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MCP))
+		offset += qed_grc_dump_mcp(p_hwfn,
+					   p_ptt, dump_buf + offset, dump);
+
+	/* Dump context */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM_CTX))
+		offset += qed_grc_dump_ctx(p_hwfn,
+					   p_ptt, dump_buf + offset, dump);
+
+	/* Dump RSS memories */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RSS))
+		offset += qed_grc_dump_rss(p_hwfn,
+					   p_ptt, dump_buf + offset, dump);
+
+	/* Dump Big RAM */
+	for (i = 0; i < NUM_BIG_RAM_TYPES; i++)
+		if (qed_grc_is_included(p_hwfn, s_big_ram_defs[i].grc_param))
+			offset += qed_grc_dump_big_ram(p_hwfn,
+						       p_ptt,
+						       dump_buf + offset,
+						       dump, i);
+
+	/* Dump IORs */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR))
+		offset += qed_grc_dump_iors(p_hwfn,
+					    p_ptt, dump_buf + offset, dump);
+
+	/* Dump VFC */
+	if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC))
+		offset += qed_grc_dump_vfc(p_hwfn,
+					   p_ptt, dump_buf + offset, dump);
+
+	/* Dump PHY tbus */
+	if (qed_grc_is_included(p_hwfn,
+				DBG_GRC_PARAM_DUMP_PHY) && dev_data->chip_id ==
+	    CHIP_K2 && dev_data->platform_id == PLATFORM_ASIC)
+		offset += qed_grc_dump_phy(p_hwfn,
+					   p_ptt, dump_buf + offset, dump);
+
+	/* Dump static debug data  */
+	if (qed_grc_is_included(p_hwfn,
+				DBG_GRC_PARAM_DUMP_STATIC) &&
+	    dev_data->bus.state == DBG_BUS_STATE_IDLE)
+		offset += qed_grc_dump_static_debug(p_hwfn,
+						    p_ptt,
+						    dump_buf + offset, dump);
+
+	/* Dump last section */
+	offset += qed_dump_last_section(dump_buf, offset, dump);
+	if (dump) {
+		/* Unstall storms */
+		if (qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_UNSTALL))
+			qed_grc_stall_storms(p_hwfn, p_ptt, false);
+
+		/* Clear parity status */
+		qed_grc_clear_all_prty(p_hwfn, p_ptt);
+
+		/* Enable all parities using MFW command */
+		if (parities_masked)
+			qed_mcp_mask_parities(p_hwfn, p_ptt, 0);
+	}
+
+	*num_dumped_dwords = offset;
+
+	return DBG_STATUS_OK;
+}
+
+/* Writes the specified failing Idle Check rule to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     u32 *
+				     dump_buf,
+				     bool dump,
+				     u16 rule_id,
+				     const struct dbg_idle_chk_rule *rule,
+				     u16 fail_entry_id, u32 *cond_reg_values)
+{
+	const union dbg_idle_chk_reg *regs = &((const union dbg_idle_chk_reg *)
+					       s_dbg_arrays
+					       [BIN_BUF_DBG_IDLE_CHK_REGS].
+					       ptr)[rule->reg_offset];
+	const struct dbg_idle_chk_cond_reg *cond_regs = &regs[0].cond_reg;
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	struct dbg_idle_chk_result_hdr *hdr =
+		(struct dbg_idle_chk_result_hdr *)dump_buf;
+	const struct dbg_idle_chk_info_reg *info_regs =
+		&regs[rule->num_cond_regs].info_reg;
+	u32 next_reg_offset = 0, i, offset = 0;
+	u8 reg_id;
+
+	/* Dump rule data */
+	if (dump) {
+		memset(hdr, 0, sizeof(*hdr));
+		hdr->rule_id = rule_id;
+		hdr->mem_entry_id = fail_entry_id;
+		hdr->severity = rule->severity;
+		hdr->num_dumped_cond_regs = rule->num_cond_regs;
+	}
+
+	offset += IDLE_CHK_RESULT_HDR_DWORDS;
+
+	/* Dump condition register values */
+	for (reg_id = 0; reg_id < rule->num_cond_regs; reg_id++) {
+		const struct dbg_idle_chk_cond_reg *reg = &cond_regs[reg_id];
+
+		/* Write register header */
+		if (dump) {
+			struct dbg_idle_chk_result_reg_hdr *reg_hdr =
+			    (struct dbg_idle_chk_result_reg_hdr *)(dump_buf
+								   + offset);
+			offset += IDLE_CHK_RESULT_REG_HDR_DWORDS;
+			memset(reg_hdr, 0,
+			       sizeof(struct dbg_idle_chk_result_reg_hdr));
+			reg_hdr->start_entry = reg->start_entry;
+			reg_hdr->size = reg->entry_size;
+			SET_FIELD(reg_hdr->data,
+				  DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM,
+				  reg->num_entries > 1 || reg->start_entry > 0
+				  ? 1 : 0);
+			SET_FIELD(reg_hdr->data,
+				  DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, reg_id);
+
+			/* Write register values */
+			for (i = 0; i < reg_hdr->size;
+			     i++, next_reg_offset++, offset++)
+				dump_buf[offset] =
+				    cond_reg_values[next_reg_offset];
+		} else {
+			offset += IDLE_CHK_RESULT_REG_HDR_DWORDS +
+			    reg->entry_size;
+		}
+	}
+
+	/* Dump info register values */
+	for (reg_id = 0; reg_id < rule->num_info_regs; reg_id++) {
+		const struct dbg_idle_chk_info_reg *reg = &info_regs[reg_id];
+		u32 block_id;
+
+		if (!dump) {
+			offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + reg->size;
+			continue;
+		}
+
+		/* Check if register's block is in reset */
+		block_id = GET_FIELD(reg->data, DBG_IDLE_CHK_INFO_REG_BLOCK_ID);
+		if (block_id >= MAX_BLOCK_ID) {
+			DP_NOTICE(p_hwfn, "Invalid block_id\n");
+			return 0;
+		}
+
+		if (!dev_data->block_in_reset[block_id]) {
+			bool eval_mode = GET_FIELD(reg->mode.data,
+						   DBG_MODE_HDR_EVAL_MODE) > 0;
+			bool mode_match = true;
+
+			/* Check mode */
+			if (eval_mode) {
+				u16 modes_buf_offset =
+					GET_FIELD(reg->mode.data,
+						DBG_MODE_HDR_MODES_BUF_OFFSET);
+				mode_match =
+					qed_is_mode_match(p_hwfn,
+							  &modes_buf_offset);
+			}
+
+			if (mode_match) {
+				u32 grc_addr =
+					DWORDS_TO_BYTES(GET_FIELD(reg->data,
+						DBG_IDLE_CHK_INFO_REG_ADDRESS));
+
+				/* Write register header */
+				struct dbg_idle_chk_result_reg_hdr *reg_hdr =
+					(struct dbg_idle_chk_result_reg_hdr *)
+					(dump_buf + offset);
+
+				offset += IDLE_CHK_RESULT_REG_HDR_DWORDS;
+				hdr->num_dumped_info_regs++;
+				memset(reg_hdr, 0, sizeof(*reg_hdr));
+				reg_hdr->size = reg->size;
+				SET_FIELD(reg_hdr->data,
+					DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID,
+					rule->num_cond_regs + reg_id);
+
+				/* Write register values */
+				for (i = 0; i < reg->size;
+				     i++, offset++, grc_addr += 4)
+					dump_buf[offset] =
+						qed_rd(p_hwfn, p_ptt, grc_addr);
+				}
+			}
+	}
+
+	return offset;
+}
+
+/* Dumps idle check rule entries. Returns the dumped size in dwords. */
+static u32
+qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			       u32 *dump_buf, bool dump,
+			       const struct dbg_idle_chk_rule *input_rules,
+			       u32 num_input_rules, u32 *num_failing_rules)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	u32 cond_reg_values[IDLE_CHK_MAX_ENTRIES_SIZE];
+	u32 i, j, offset = 0;
+	u16 entry_id;
+	u8 reg_id;
+
+	*num_failing_rules = 0;
+	for (i = 0; i < num_input_rules; i++) {
+		const struct dbg_idle_chk_cond_reg *cond_regs;
+		const struct dbg_idle_chk_rule *rule;
+		const union dbg_idle_chk_reg *regs;
+		u16 num_reg_entries = 1;
+		bool check_rule = true;
+		const u32 *imm_values;
+
+		rule = &input_rules[i];
+		regs = &((const union dbg_idle_chk_reg *)
+			 s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr)
+			[rule->reg_offset];
+		cond_regs = &regs[0].cond_reg;
+		imm_values = &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr
+			     [rule->imm_offset];
+
+		/* Check if all condition register blocks are out of reset, and
+		 * find maximal number of entries (all condition registers that
+		 * are memories must have the same size, which is > 1).
+		 */
+		for (reg_id = 0; reg_id < rule->num_cond_regs && check_rule;
+		     reg_id++) {
+			u32 block_id = GET_FIELD(cond_regs[reg_id].data,
+						DBG_IDLE_CHK_COND_REG_BLOCK_ID);
+
+			if (block_id >= MAX_BLOCK_ID) {
+				DP_NOTICE(p_hwfn, "Invalid block_id\n");
+				return 0;
+			}
+
+			check_rule = !dev_data->block_in_reset[block_id];
+			if (cond_regs[reg_id].num_entries > num_reg_entries)
+				num_reg_entries = cond_regs[reg_id].num_entries;
+		}
+
+		if (!check_rule && dump)
+			continue;
+
+		/* Go over all register entries (number of entries is the same
+		 * for all condition registers).
+		 */
+		for (entry_id = 0; entry_id < num_reg_entries; entry_id++) {
+			/* Read current entry of all condition registers */
+			if (dump) {
+				u32 next_reg_offset = 0;
+
+				for (reg_id = 0;
+				     reg_id < rule->num_cond_regs;
+				     reg_id++) {
+					const struct dbg_idle_chk_cond_reg
+						*reg = &cond_regs[reg_id];
+
+					/* Find GRC address (if it's a memory,
+					 * the address of the specific entry is
+					 * calculated).
+					 */
+					u32 grc_addr =
+					   DWORDS_TO_BYTES(
+						GET_FIELD(reg->data,
+						    DBG_IDLE_CHK_COND_REG_ADDRESS));
+
+					if (reg->num_entries > 1 ||
+					    reg->start_entry > 0) {
+						u32 padded_entry_size =
+							reg->entry_size > 1 ?
+							roundup_pow_of_two
+							(reg->entry_size) : 1;
+
+						grc_addr +=
+							DWORDS_TO_BYTES(
+								(reg->start_entry +
+								entry_id)
+								* padded_entry_size);
+					}
+
+					/* Read registers */
+					if (next_reg_offset + reg->entry_size >=
+					    IDLE_CHK_MAX_ENTRIES_SIZE) {
+						DP_NOTICE(p_hwfn,
+							  "idle check registers entry is too large\n");
+						return 0;
+					}
+
+					for (j = 0; j < reg->entry_size;
+					     j++, next_reg_offset++,
+					     grc_addr += 4)
+					     cond_reg_values[next_reg_offset] =
+						qed_rd(p_hwfn, p_ptt, grc_addr);
+				}
+			}
+
+			/* Call rule's condition function - a return value of
+			 * true indicates failure.
+			 */
+			if ((*cond_arr[rule->cond_id])(cond_reg_values,
+						       imm_values) || !dump) {
+				offset +=
+					qed_idle_chk_dump_failure(p_hwfn,
+							p_ptt,
+							dump_buf + offset,
+							dump,
+							rule->rule_id,
+							rule,
+							entry_id,
+							cond_reg_values);
+				(*num_failing_rules)++;
+				break;
+			}
+		}
+	}
+
+	return offset;
+}
+
+/* Performs Idle Check Dump to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	u32 offset = 0, input_offset = 0, num_failing_rules = 0;
+	u32 num_failing_rules_offset;
+
+	/* Dump global params */
+	offset += qed_dump_common_global_params(p_hwfn,
+						p_ptt,
+						dump_buf + offset, dump, 1);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "dump-type", "idle-chk");
+
+	/* Dump idle check section header with a single parameter */
+	offset += qed_dump_section_hdr(dump_buf + offset, dump, "idle_chk", 1);
+	num_failing_rules_offset = offset;
+	offset += qed_dump_num_param(dump_buf + offset, dump, "num_rules", 0);
+	while (input_offset <
+	       s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].size_in_dwords) {
+		const struct dbg_idle_chk_cond_hdr *cond_hdr =
+			(const struct dbg_idle_chk_cond_hdr *)
+			&s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr
+			[input_offset++];
+		bool eval_mode = GET_FIELD(cond_hdr->mode.data,
+					   DBG_MODE_HDR_EVAL_MODE) > 0;
+		bool mode_match = true;
+
+		/* Check mode */
+		if (eval_mode) {
+			u16 modes_buf_offset =
+				GET_FIELD(cond_hdr->mode.data,
+					  DBG_MODE_HDR_MODES_BUF_OFFSET);
+
+			mode_match = qed_is_mode_match(p_hwfn,
+						       &modes_buf_offset);
+		}
+
+		if (mode_match) {
+			u32 curr_failing_rules;
+
+			offset +=
+			    qed_idle_chk_dump_rule_entries(p_hwfn,
+				p_ptt,
+				dump_buf + offset,
+				dump,
+				(const struct dbg_idle_chk_rule *)
+				&s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].
+				ptr[input_offset],
+				cond_hdr->data_size / IDLE_CHK_RULE_SIZE_DWORDS,
+				&curr_failing_rules);
+			num_failing_rules += curr_failing_rules;
+		}
+
+		input_offset += cond_hdr->data_size;
+	}
+
+	/* Overwrite num_rules parameter */
+	if (dump)
+		qed_dump_num_param(dump_buf + num_failing_rules_offset,
+				   dump, "num_rules", num_failing_rules);
+
+	return offset;
+}
+
+/* Finds the meta data image in NVRAM. */
+static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn,
+					    struct qed_ptt *p_ptt,
+					    u32 image_type,
+					    u32 *nvram_offset_bytes,
+					    u32 *nvram_size_bytes)
+{
+	u32 ret_mcp_resp, ret_mcp_param, ret_txn_size;
+	struct mcp_file_att file_att;
+
+	/* Call NVRAM get file command */
+	if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_GET_FILE_ATT,
+			       image_type, &ret_mcp_resp, &ret_mcp_param,
+			       &ret_txn_size, (u32 *)&file_att) != 0)
+		return DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
+
+	/* Check response */
+	if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK)
+		return DBG_STATUS_NVRAM_GET_IMAGE_FAILED;
+
+	/* Update return values */
+	*nvram_offset_bytes = file_att.nvm_start_addr;
+	*nvram_size_bytes = file_att.len;
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_DEBUG,
+		   "find_nvram_image: found NVRAM image of type %d in NVRAM offset %d bytes with size %d bytes\n",
+		   image_type, *nvram_offset_bytes, *nvram_size_bytes);
+
+	/* Check alignment */
+	if (*nvram_size_bytes & 0x3)
+		return DBG_STATUS_NON_ALIGNED_NVRAM_IMAGE;
+	return DBG_STATUS_OK;
+}
+
+static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 nvram_offset_bytes,
+				      u32 nvram_size_bytes, u32 *ret_buf)
+{
+	u32 ret_mcp_resp, ret_mcp_param, ret_read_size;
+	u32 bytes_to_copy, read_offset = 0;
+	s32 bytes_left = nvram_size_bytes;
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_DEBUG,
+		   "nvram_read: reading image of size %d bytes from NVRAM\n",
+		   nvram_size_bytes);
+	do {
+		bytes_to_copy =
+		    (bytes_left >
+		     MCP_DRV_NVM_BUF_LEN) ? MCP_DRV_NVM_BUF_LEN : bytes_left;
+
+		/* Call NVRAM read command */
+		if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+				       DRV_MSG_CODE_NVM_READ_NVRAM,
+				       (nvram_offset_bytes +
+					read_offset) |
+				       (bytes_to_copy <<
+					DRV_MB_PARAM_NVM_LEN_SHIFT),
+				       &ret_mcp_resp, &ret_mcp_param,
+				       &ret_read_size,
+				       (u32 *)((u8 *)ret_buf +
+					       read_offset)) != 0)
+			return DBG_STATUS_NVRAM_READ_FAILED;
+
+		/* Check response */
+		if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK)
+			return DBG_STATUS_NVRAM_READ_FAILED;
+
+		/* Update read offset */
+		read_offset += ret_read_size;
+		bytes_left -= ret_read_size;
+	} while (bytes_left > 0);
+
+	return DBG_STATUS_OK;
+}
+
+/* Get info on the MCP Trace data in the scratchpad:
+ * - trace_data_grc_addr - the GRC address of the trace data
+ * - trace_data_size_bytes - the size in bytes of the MCP Trace data (without
+ *	the header)
+ */
+static enum dbg_status qed_mcp_trace_get_data_info(struct qed_hwfn *p_hwfn,
+						   struct qed_ptt *p_ptt,
+						   u32 *trace_data_grc_addr,
+						   u32 *trace_data_size_bytes)
+{
+	/* Read MCP trace section offsize structure from MCP scratchpad */
+	u32 spad_trace_offsize = qed_rd(p_hwfn,
+					p_ptt,
+					MCP_SPAD_TRACE_OFFSIZE_ADDR);
+	u32 signature;
+
+	/* Extract MCP trace section GRC address from offsize structure (within
+	 * scratchpad).
+	 */
+	*trace_data_grc_addr =
+		MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize);
+
+	/* Read signature from MCP trace section */
+	signature = qed_rd(p_hwfn, p_ptt,
+			   *trace_data_grc_addr +
+			   offsetof(struct mcp_trace, signature));
+	if (signature != MFW_TRACE_SIGNATURE)
+		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
+	/* Read trace size from MCP trace section */
+	*trace_data_size_bytes = qed_rd(p_hwfn,
+					p_ptt,
+					*trace_data_grc_addr +
+					offsetof(struct mcp_trace, size));
+	return DBG_STATUS_OK;
+}
+
+/* Reads MCP trace meta data image from NVRAM.
+ * - running_bundle_id (OUT) - the running bundle ID (invalid when loaded from
+ *	file)
+ * - trace_meta_offset_bytes (OUT) - the NVRAM offset in bytes in which the MCP
+ *	Trace meta data starts (invalid when loaded from file)
+ * - trace_meta_size_bytes (OUT) - the size in bytes of the MCP Trace meta data
+ */
+static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn,
+						   struct qed_ptt *p_ptt,
+						   u32 trace_data_size_bytes,
+						   u32 *running_bundle_id,
+						   u32 *trace_meta_offset_bytes,
+						   u32 *trace_meta_size_bytes)
+{
+	/* Read MCP trace section offsize structure from MCP scratchpad */
+	u32 spad_trace_offsize = qed_rd(p_hwfn,
+					p_ptt,
+					MCP_SPAD_TRACE_OFFSIZE_ADDR);
+
+	/* Find running bundle ID */
+	u32 running_mfw_addr =
+		MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize) +
+		QED_SECTION_SIZE(spad_trace_offsize) + trace_data_size_bytes;
+	enum dbg_status status;
+	u32 nvram_image_type;
+
+	*running_bundle_id = qed_rd(p_hwfn, p_ptt, running_mfw_addr);
+	if (*running_bundle_id > 1)
+		return DBG_STATUS_INVALID_NVRAM_BUNDLE;
+
+	/* Find image in NVRAM */
+	nvram_image_type =
+	    (*running_bundle_id ==
+	     DIR_ID_1) ? NVM_TYPE_MFW_TRACE1 : NVM_TYPE_MFW_TRACE2;
+	status = qed_find_nvram_image(p_hwfn,
+				      p_ptt,
+				      nvram_image_type,
+				      trace_meta_offset_bytes,
+				      trace_meta_size_bytes);
+
+	return status;
+}
+
+/* Reads the MCP Trace data from the specified GRC address into the specified
+ * buffer.
+ */
+static void qed_mcp_trace_read_data(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt,
+				    u32 grc_addr, u32 size_in_dwords, u32 *buf)
+{
+	u32 i;
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_DEBUG,
+		   "mcp_trace_read_data: reading trace data of size %d dwords from GRC address 0x%x\n",
+		   size_in_dwords, grc_addr);
+	for (i = 0; i < size_in_dwords; i++, grc_addr += BYTES_IN_DWORD)
+		buf[i] = qed_rd(p_hwfn, p_ptt, grc_addr);
+}
+
+/* Reads the MCP Trace meta data (from NVRAM or buffer) into the specified
+ * buffer.
+ */
+static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn,
+					       struct qed_ptt *p_ptt,
+					       u32 nvram_offset_in_bytes,
+					       u32 size_in_bytes, u32 *buf)
+{
+	u8 *byte_buf = (u8 *)buf;
+	u8 modules_num, i;
+	u32 signature;
+
+	/* Read meta data from NVRAM */
+	enum dbg_status status = qed_nvram_read(p_hwfn,
+						p_ptt,
+						nvram_offset_in_bytes,
+						size_in_bytes,
+						buf);
+
+	if (status != DBG_STATUS_OK)
+		return status;
+
+	/* Extract and check first signature */
+	signature = qed_read_unaligned_dword(byte_buf);
+	byte_buf += sizeof(u32);
+	if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
+	/* Extract number of modules */
+	modules_num = *(byte_buf++);
+
+	/* Skip all modules */
+	for (i = 0; i < modules_num; i++) {
+		u8 module_len = *(byte_buf++);
+
+		byte_buf += module_len;
+	}
+
+	/* Extract and check second signature */
+	signature = qed_read_unaligned_dword(byte_buf);
+	byte_buf += sizeof(u32);
+	if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+	return DBG_STATUS_OK;
+}
+
+/* Dump MCP Trace */
+enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   u32 *dump_buf,
+				   bool dump, u32 *num_dumped_dwords)
+{
+	u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords;
+	u32 trace_meta_size_dwords, running_bundle_id, offset = 0;
+	u32 trace_meta_offset_bytes, trace_meta_size_bytes;
+	enum dbg_status status;
+	int halted = 0;
+
+	*num_dumped_dwords = 0;
+
+	/* Get trace data info */
+	status = qed_mcp_trace_get_data_info(p_hwfn,
+					     p_ptt,
+					     &trace_data_grc_addr,
+					     &trace_data_size_bytes);
+	if (status != DBG_STATUS_OK)
+		return status;
+
+	/* Dump global params */
+	offset += qed_dump_common_global_params(p_hwfn,
+						p_ptt,
+						dump_buf + offset, dump, 1);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "dump-type", "mcp-trace");
+
+	/* Halt MCP while reading from scratchpad so the read data will be
+	 * consistent if halt fails, MCP trace is taken anyway, with a small
+	 * risk that it may be corrupt.
+	 */
+	if (dump) {
+		halted = !qed_mcp_halt(p_hwfn, p_ptt);
+		if (!halted)
+			DP_NOTICE(p_hwfn, "MCP halt failed!\n");
+	}
+
+	/* Find trace data size */
+	trace_data_size_dwords =
+		DIV_ROUND_UP(trace_data_size_bytes + sizeof(struct mcp_trace),
+			     BYTES_IN_DWORD);
+
+	/* Dump trace data section header and param */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "mcp_trace_data", 1);
+	offset += qed_dump_num_param(dump_buf + offset,
+				     dump, "size", trace_data_size_dwords);
+
+	/* Read trace data from scratchpad into dump buffer */
+	if (dump)
+		qed_mcp_trace_read_data(p_hwfn,
+					p_ptt,
+					trace_data_grc_addr,
+					trace_data_size_dwords,
+					dump_buf + offset);
+	offset += trace_data_size_dwords;
+
+	/* Resume MCP (only if halt succeeded) */
+	if (halted && qed_mcp_resume(p_hwfn, p_ptt) != 0)
+		DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n");
+
+	/* Dump trace meta section header */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "mcp_trace_meta", 1);
+
+	/* Read trace meta info */
+	status = qed_mcp_trace_get_meta_info(p_hwfn,
+					     p_ptt,
+					     trace_data_size_bytes,
+					     &running_bundle_id,
+					     &trace_meta_offset_bytes,
+					     &trace_meta_size_bytes);
+	if (status != DBG_STATUS_OK)
+		return status;
+
+	/* Dump trace meta size param (trace_meta_size_bytes is always
+	 * dword-aligned).
+	 */
+	trace_meta_size_dwords = BYTES_TO_DWORDS(trace_meta_size_bytes);
+	offset += qed_dump_num_param(dump_buf + offset,	dump, "size",
+				     trace_meta_size_dwords);
+
+	/* Read trace meta image into dump buffer */
+	if (dump) {
+		status = qed_mcp_trace_read_meta(p_hwfn,
+						p_ptt,
+						trace_meta_offset_bytes,
+						trace_meta_size_bytes,
+						dump_buf + offset);
+		if (status != DBG_STATUS_OK)
+			return status;
+	}
+
+	offset += trace_meta_size_dwords;
+
+	*num_dumped_dwords = offset;
+
+	return DBG_STATUS_OK;
+}
+
+/* Dump GRC FIFO */
+enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  u32 *dump_buf,
+				  bool dump, u32 *num_dumped_dwords)
+{
+	u32 offset = 0, dwords_read, size_param_offset;
+	bool fifo_has_data;
+
+	*num_dumped_dwords = 0;
+
+	/* Dump global params */
+	offset += qed_dump_common_global_params(p_hwfn,
+						p_ptt,
+						dump_buf + offset, dump, 1);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "dump-type", "reg-fifo");
+
+	/* Dump fifo data section header and param. The size param is 0 for now,
+	 * and is overwritten after reading the FIFO.
+	 */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "reg_fifo_data", 1);
+	size_param_offset = offset;
+	offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0);
+
+	if (!dump) {
+		/* FIFO max size is REG_FIFO_DEPTH_DWORDS. There is no way to
+		 * test how much data is available, except for reading it.
+		 */
+		offset += REG_FIFO_DEPTH_DWORDS;
+		*num_dumped_dwords = offset;
+		return DBG_STATUS_OK;
+	}
+
+	fifo_has_data = qed_rd(p_hwfn, p_ptt,
+			       GRC_REG_TRACE_FIFO_VALID_DATA) > 0;
+
+	/* Pull available data from fifo. Use DMAE since this is widebus memory
+	 * and must be accessed atomically. Test for dwords_read not passing
+	 * buffer size since more entries could be added to the buffer as we are
+	 * emptying it.
+	 */
+	for (dwords_read = 0;
+	     fifo_has_data && dwords_read < REG_FIFO_DEPTH_DWORDS;
+	     dwords_read += REG_FIFO_ELEMENT_DWORDS, offset +=
+	     REG_FIFO_ELEMENT_DWORDS) {
+		if (qed_dmae_grc2host(p_hwfn, p_ptt, GRC_REG_TRACE_FIFO,
+				      (u64)(uintptr_t)(&dump_buf[offset]),
+				      REG_FIFO_ELEMENT_DWORDS, 0))
+			return DBG_STATUS_DMAE_FAILED;
+		fifo_has_data = qed_rd(p_hwfn, p_ptt,
+				       GRC_REG_TRACE_FIFO_VALID_DATA) > 0;
+	}
+
+	qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
+			   dwords_read);
+
+	*num_dumped_dwords = offset;
+	return DBG_STATUS_OK;
+}
+
+/* Dump IGU FIFO */
+enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  u32 *dump_buf,
+				  bool dump, u32 *num_dumped_dwords)
+{
+	u32 offset = 0, dwords_read, size_param_offset;
+	bool fifo_has_data;
+
+	*num_dumped_dwords = 0;
+
+	/* Dump global params */
+	offset += qed_dump_common_global_params(p_hwfn,
+						p_ptt,
+						dump_buf + offset, dump, 1);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "dump-type", "igu-fifo");
+
+	/* Dump fifo data section header and param. The size param is 0 for now,
+	 * and is overwritten after reading the FIFO.
+	 */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "igu_fifo_data", 1);
+	size_param_offset = offset;
+	offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0);
+
+	if (!dump) {
+		/* FIFO max size is IGU_FIFO_DEPTH_DWORDS. There is no way to
+		 * test how much data is available, except for reading it.
+		 */
+		offset += IGU_FIFO_DEPTH_DWORDS;
+		*num_dumped_dwords = offset;
+		return DBG_STATUS_OK;
+	}
+
+	fifo_has_data = qed_rd(p_hwfn, p_ptt,
+			       IGU_REG_ERROR_HANDLING_DATA_VALID) > 0;
+
+	/* Pull available data from fifo. Use DMAE since this is widebus memory
+	 * and must be accessed atomically. Test for dwords_read not passing
+	 * buffer size since more entries could be added to the buffer as we are
+	 * emptying it.
+	 */
+	for (dwords_read = 0;
+	     fifo_has_data && dwords_read < IGU_FIFO_DEPTH_DWORDS;
+	     dwords_read += IGU_FIFO_ELEMENT_DWORDS, offset +=
+	     IGU_FIFO_ELEMENT_DWORDS) {
+		if (qed_dmae_grc2host(p_hwfn, p_ptt,
+				      IGU_REG_ERROR_HANDLING_MEMORY,
+				      (u64)(uintptr_t)(&dump_buf[offset]),
+				      IGU_FIFO_ELEMENT_DWORDS, 0))
+			return DBG_STATUS_DMAE_FAILED;
+		fifo_has_data =	qed_rd(p_hwfn, p_ptt,
+				       IGU_REG_ERROR_HANDLING_DATA_VALID) > 0;
+	}
+
+	qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
+			   dwords_read);
+
+	*num_dumped_dwords = offset;
+	return DBG_STATUS_OK;
+}
+
+/* Protection Override dump */
+enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn,
+					     struct qed_ptt *p_ptt,
+					     u32 *dump_buf,
+					     bool dump, u32 *num_dumped_dwords)
+{
+	u32 offset = 0, size_param_offset, override_window_dwords;
+
+	*num_dumped_dwords = 0;
+
+	/* Dump global params */
+	offset += qed_dump_common_global_params(p_hwfn,
+						p_ptt,
+						dump_buf + offset, dump, 1);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "dump-type", "protection-override");
+
+	/* Dump data section header and param. The size param is 0 for now, and
+	 * is overwritten after reading the data.
+	 */
+	offset += qed_dump_section_hdr(dump_buf + offset,
+				       dump, "protection_override_data", 1);
+	size_param_offset = offset;
+	offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0);
+
+	if (!dump) {
+		offset += PROTECTION_OVERRIDE_DEPTH_DWORDS;
+		*num_dumped_dwords = offset;
+		return DBG_STATUS_OK;
+	}
+
+	/* Add override window info to buffer */
+	override_window_dwords =
+		qed_rd(p_hwfn, p_ptt,
+		       GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) *
+		       PROTECTION_OVERRIDE_ELEMENT_DWORDS;
+	if (qed_dmae_grc2host(p_hwfn, p_ptt,
+			      GRC_REG_PROTECTION_OVERRIDE_WINDOW,
+			      (u64)(uintptr_t)(dump_buf + offset),
+			      override_window_dwords, 0))
+		return DBG_STATUS_DMAE_FAILED;
+	offset += override_window_dwords;
+	qed_dump_num_param(dump_buf + size_param_offset, dump, "size",
+			   override_window_dwords);
+
+	*num_dumped_dwords = offset;
+	return DBG_STATUS_OK;
+}
+
+/* Performs FW Asserts Dump to the specified buffer.
+ * Returns the dumped size in dwords.
+ */
+static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt, u32 *dump_buf, bool dump)
+{
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+	char storm_letter_str[2] = "?";
+	struct fw_info fw_info;
+	u32 offset = 0, i;
+	u8 storm_id;
+
+	/* Dump global params */
+	offset += qed_dump_common_global_params(p_hwfn,
+						p_ptt,
+						dump_buf + offset, dump, 1);
+	offset += qed_dump_str_param(dump_buf + offset,
+				     dump, "dump-type", "fw-asserts");
+	for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) {
+		u32 fw_asserts_section_addr, next_list_idx_addr, next_list_idx,
+			last_list_idx, element_addr;
+
+		if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id])
+			continue;
+
+		/* Read FW info for the current Storm */
+		qed_read_fw_info(p_hwfn, p_ptt, storm_id, &fw_info);
+
+		/* Dump FW Asserts section header and params */
+		storm_letter_str[0] = s_storm_defs[storm_id].letter;
+		offset += qed_dump_section_hdr(dump_buf + offset, dump,
+					       "fw_asserts", 2);
+		offset += qed_dump_str_param(dump_buf + offset, dump, "storm",
+					     storm_letter_str);
+		offset += qed_dump_num_param(dump_buf + offset, dump, "size",
+					     fw_info.fw_asserts_section.
+					     list_element_dword_size);
+
+		if (!dump) {
+			offset += fw_info.fw_asserts_section.
+				  list_element_dword_size;
+			continue;
+		}
+
+		/* Read and dump FW Asserts data */
+		fw_asserts_section_addr =
+			s_storm_defs[storm_id].sem_fast_mem_addr +
+			SEM_FAST_REG_INT_RAM +
+			RAM_LINES_TO_BYTES(fw_info.fw_asserts_section.
+					   section_ram_line_offset);
+		next_list_idx_addr =
+			fw_asserts_section_addr +
+			DWORDS_TO_BYTES(fw_info.fw_asserts_section.
+					list_next_index_dword_offset);
+		next_list_idx = qed_rd(p_hwfn, p_ptt, next_list_idx_addr);
+		last_list_idx = (next_list_idx > 0
+				 ? next_list_idx
+				 : fw_info.fw_asserts_section.list_num_elements)
+				- 1;
+		element_addr =
+			fw_asserts_section_addr +
+			DWORDS_TO_BYTES(fw_info.fw_asserts_section.
+					list_dword_offset) +
+			last_list_idx *
+			DWORDS_TO_BYTES(fw_info.fw_asserts_section.
+					list_element_dword_size);
+		for (i = 0;
+		     i < fw_info.fw_asserts_section.list_element_dword_size;
+		     i++, offset++, element_addr += BYTES_IN_DWORD)
+			dump_buf[offset] = qed_rd(p_hwfn, p_ptt, element_addr);
+	}
+
+	/* Dump last section */
+	offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0);
+	return offset;
+}
+
+/***************************** Public Functions *******************************/
+
+enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr)
+{
+	/* Convert binary data to debug arrays */
+	u32 num_of_buffers = *(u32 *)bin_ptr;
+	struct bin_buffer_hdr *buf_array;
+	u8 buf_id;
+
+	buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1);
+
+	for (buf_id = 0; buf_id < num_of_buffers; buf_id++) {
+		s_dbg_arrays[buf_id].ptr =
+		    (u32 *)(bin_ptr + buf_array[buf_id].offset);
+		s_dbg_arrays[buf_id].size_in_dwords =
+		    BYTES_TO_DWORDS(buf_array[buf_id].length);
+	}
+
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+					      struct qed_ptt *p_ptt,
+					      u32 *buf_size)
+{
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+	*buf_size = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
+	    !s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr ||
+	    !s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr ||
+	    !s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr ||
+	    !s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)
+		return DBG_STATUS_DBG_ARRAY_NOT_SET;
+	return qed_grc_dump(p_hwfn, p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *dump_buf,
+				 u32 buf_size_in_dwords,
+				 u32 *num_dumped_dwords)
+{
+	u32 needed_buf_size_in_dwords;
+	enum dbg_status status;
+
+	status = qed_dbg_grc_get_dump_buf_size(p_hwfn, p_ptt,
+					       &needed_buf_size_in_dwords);
+
+	*num_dumped_dwords = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (buf_size_in_dwords < needed_buf_size_in_dwords)
+		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+	/* GRC Dump */
+	status = qed_grc_dump(p_hwfn, p_ptt, dump_buf, true, num_dumped_dwords);
+
+	/* Clear all GRC params */
+	qed_dbg_grc_clear_params(p_hwfn);
+	return status;
+}
+
+enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						   struct qed_ptt *p_ptt,
+						   u32 *buf_size)
+{
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
+
+	*buf_size = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr ||
+	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr ||
+	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr ||
+	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr)
+		return DBG_STATUS_DBG_ARRAY_NOT_SET;
+	if (!dev_data->idle_chk.buf_size_set) {
+		dev_data->idle_chk.buf_size = qed_idle_chk_dump(p_hwfn,
+								p_ptt,
+								NULL, false);
+		dev_data->idle_chk.buf_size_set = true;
+	}
+
+	*buf_size = dev_data->idle_chk.buf_size;
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 *dump_buf,
+				      u32 buf_size_in_dwords,
+				      u32 *num_dumped_dwords)
+{
+	u32 needed_buf_size_in_dwords;
+	enum dbg_status status;
+
+	status = qed_dbg_idle_chk_get_dump_buf_size(p_hwfn, p_ptt,
+						    &needed_buf_size_in_dwords);
+
+	*num_dumped_dwords = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (buf_size_in_dwords < needed_buf_size_in_dwords)
+		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+	/* Update reset state */
+	qed_update_blocks_reset_state(p_hwfn, p_ptt);
+
+	/* Idle Check Dump */
+	*num_dumped_dwords = qed_idle_chk_dump(p_hwfn, p_ptt, dump_buf, true);
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						    struct qed_ptt *p_ptt,
+						    u32 *buf_size)
+{
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+	*buf_size = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	return qed_mcp_trace_dump(p_hwfn, p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+				       struct qed_ptt *p_ptt,
+				       u32 *dump_buf,
+				       u32 buf_size_in_dwords,
+				       u32 *num_dumped_dwords)
+{
+	u32 needed_buf_size_in_dwords;
+	enum dbg_status status;
+
+	status = qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn, p_ptt,
+						&needed_buf_size_in_dwords);
+
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (buf_size_in_dwords < needed_buf_size_in_dwords)
+		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+	/* Update reset state */
+	qed_update_blocks_reset_state(p_hwfn, p_ptt);
+
+	/* Perform dump */
+	return qed_mcp_trace_dump(p_hwfn,
+				  p_ptt, dump_buf, true, num_dumped_dwords);
+}
+
+enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						   struct qed_ptt *p_ptt,
+						   u32 *buf_size)
+{
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+	*buf_size = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	return qed_reg_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 *dump_buf,
+				      u32 buf_size_in_dwords,
+				      u32 *num_dumped_dwords)
+{
+	u32 needed_buf_size_in_dwords;
+	enum dbg_status status;
+
+	status = qed_dbg_reg_fifo_get_dump_buf_size(p_hwfn, p_ptt,
+						    &needed_buf_size_in_dwords);
+
+	*num_dumped_dwords = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (buf_size_in_dwords < needed_buf_size_in_dwords)
+		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+	/* Update reset state */
+	qed_update_blocks_reset_state(p_hwfn, p_ptt);
+	return qed_reg_fifo_dump(p_hwfn,
+				 p_ptt, dump_buf, true, num_dumped_dwords);
+}
+
+enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						   struct qed_ptt *p_ptt,
+						   u32 *buf_size)
+{
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+	*buf_size = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	return qed_igu_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 *dump_buf,
+				      u32 buf_size_in_dwords,
+				      u32 *num_dumped_dwords)
+{
+	u32 needed_buf_size_in_dwords;
+	enum dbg_status status;
+
+	status = qed_dbg_igu_fifo_get_dump_buf_size(p_hwfn, p_ptt,
+						    &needed_buf_size_in_dwords);
+
+	*num_dumped_dwords = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (buf_size_in_dwords < needed_buf_size_in_dwords)
+		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+	/* Update reset state */
+	qed_update_blocks_reset_state(p_hwfn, p_ptt);
+	return qed_igu_fifo_dump(p_hwfn,
+				 p_ptt, dump_buf, true, num_dumped_dwords);
+}
+
+enum dbg_status
+qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+					      struct qed_ptt *p_ptt,
+					      u32 *buf_size)
+{
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+	*buf_size = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	return qed_protection_override_dump(p_hwfn,
+					    p_ptt, NULL, false, buf_size);
+}
+
+enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
+						 struct qed_ptt *p_ptt,
+						 u32 *dump_buf,
+						 u32 buf_size_in_dwords,
+						 u32 *num_dumped_dwords)
+{
+	u32 needed_buf_size_in_dwords;
+	enum dbg_status status;
+
+	status = qed_dbg_protection_override_get_dump_buf_size(p_hwfn, p_ptt,
+						&needed_buf_size_in_dwords);
+
+	*num_dumped_dwords = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (buf_size_in_dwords < needed_buf_size_in_dwords)
+		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+	/* Update reset state */
+	qed_update_blocks_reset_state(p_hwfn, p_ptt);
+	return qed_protection_override_dump(p_hwfn,
+					    p_ptt,
+					    dump_buf, true, num_dumped_dwords);
+}
+
+enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						     struct qed_ptt *p_ptt,
+						     u32 *buf_size)
+{
+	enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt);
+
+	*buf_size = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+
+	/* Update reset state */
+	qed_update_blocks_reset_state(p_hwfn, p_ptt);
+	*buf_size = qed_fw_asserts_dump(p_hwfn, p_ptt, NULL, false);
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					u32 *dump_buf,
+					u32 buf_size_in_dwords,
+					u32 *num_dumped_dwords)
+{
+	u32 needed_buf_size_in_dwords;
+	enum dbg_status status;
+
+	status = qed_dbg_fw_asserts_get_dump_buf_size(p_hwfn, p_ptt,
+						&needed_buf_size_in_dwords);
+
+	*num_dumped_dwords = 0;
+	if (status != DBG_STATUS_OK)
+		return status;
+	if (buf_size_in_dwords < needed_buf_size_in_dwords)
+		return DBG_STATUS_DUMP_BUF_TOO_SMALL;
+
+	*num_dumped_dwords = qed_fw_asserts_dump(p_hwfn, p_ptt, dump_buf, true);
+	return DBG_STATUS_OK;
+}
+
+/******************************* Data Types **********************************/
+
+struct mcp_trace_format {
+	u32 data;
+#define MCP_TRACE_FORMAT_MODULE_MASK	0x0000ffff
+#define MCP_TRACE_FORMAT_MODULE_SHIFT	0
+#define MCP_TRACE_FORMAT_LEVEL_MASK	0x00030000
+#define MCP_TRACE_FORMAT_LEVEL_SHIFT	16
+#define MCP_TRACE_FORMAT_P1_SIZE_MASK	0x000c0000
+#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT	18
+#define MCP_TRACE_FORMAT_P2_SIZE_MASK	0x00300000
+#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT	20
+#define MCP_TRACE_FORMAT_P3_SIZE_MASK	0x00c00000
+#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT	22
+#define MCP_TRACE_FORMAT_LEN_MASK	0xff000000
+#define MCP_TRACE_FORMAT_LEN_SHIFT	24
+	char *format_str;
+};
+
+struct mcp_trace_meta {
+	u32 modules_num;
+	char **modules;
+	u32 formats_num;
+	struct mcp_trace_format *formats;
+};
+
+/* Reg fifo element */
+struct reg_fifo_element {
+	u64 data;
+#define REG_FIFO_ELEMENT_ADDRESS_SHIFT		0
+#define REG_FIFO_ELEMENT_ADDRESS_MASK		0x7fffff
+#define REG_FIFO_ELEMENT_ACCESS_SHIFT		23
+#define REG_FIFO_ELEMENT_ACCESS_MASK		0x1
+#define REG_FIFO_ELEMENT_PF_SHIFT		24
+#define REG_FIFO_ELEMENT_PF_MASK		0xf
+#define REG_FIFO_ELEMENT_VF_SHIFT		28
+#define REG_FIFO_ELEMENT_VF_MASK		0xff
+#define REG_FIFO_ELEMENT_PORT_SHIFT		36
+#define REG_FIFO_ELEMENT_PORT_MASK		0x3
+#define REG_FIFO_ELEMENT_PRIVILEGE_SHIFT	38
+#define REG_FIFO_ELEMENT_PRIVILEGE_MASK		0x3
+#define REG_FIFO_ELEMENT_PROTECTION_SHIFT	40
+#define REG_FIFO_ELEMENT_PROTECTION_MASK	0x7
+#define REG_FIFO_ELEMENT_MASTER_SHIFT		43
+#define REG_FIFO_ELEMENT_MASTER_MASK		0xf
+#define REG_FIFO_ELEMENT_ERROR_SHIFT		47
+#define REG_FIFO_ELEMENT_ERROR_MASK		0x1f
+};
+
+/* IGU fifo element */
+struct igu_fifo_element {
+	u32 dword0;
+#define IGU_FIFO_ELEMENT_DWORD0_FID_SHIFT		0
+#define IGU_FIFO_ELEMENT_DWORD0_FID_MASK		0xff
+#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_SHIFT		8
+#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_MASK		0x1
+#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_SHIFT		9
+#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_MASK		0xf
+#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_SHIFT		13
+#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_MASK		0xf
+#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_SHIFT		17
+#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_MASK		0x7fff
+	u32 dword1;
+	u32 dword2;
+#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_SHIFT	0
+#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_MASK		0x1
+#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_SHIFT		1
+#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_MASK		0xffffffff
+	u32 reserved;
+};
+
+struct igu_fifo_wr_data {
+	u32 data;
+#define IGU_FIFO_WR_DATA_PROD_CONS_SHIFT		0
+#define IGU_FIFO_WR_DATA_PROD_CONS_MASK			0xffffff
+#define IGU_FIFO_WR_DATA_UPDATE_FLAG_SHIFT		24
+#define IGU_FIFO_WR_DATA_UPDATE_FLAG_MASK		0x1
+#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_SHIFT	25
+#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_MASK		0x3
+#define IGU_FIFO_WR_DATA_SEGMENT_SHIFT			27
+#define IGU_FIFO_WR_DATA_SEGMENT_MASK			0x1
+#define IGU_FIFO_WR_DATA_TIMER_MASK_SHIFT		28
+#define IGU_FIFO_WR_DATA_TIMER_MASK_MASK		0x1
+#define IGU_FIFO_WR_DATA_CMD_TYPE_SHIFT			31
+#define IGU_FIFO_WR_DATA_CMD_TYPE_MASK			0x1
+};
+
+struct igu_fifo_cleanup_wr_data {
+	u32 data;
+#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_SHIFT		0
+#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_MASK		0x7ffffff
+#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_SHIFT	27
+#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_MASK	0x1
+#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_SHIFT	28
+#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_MASK	0x7
+#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_SHIFT		31
+#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_MASK		0x1
+};
+
+/* Protection override element */
+struct protection_override_element {
+	u64 data;
+#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_SHIFT		0
+#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_MASK		0x7fffff
+#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_SHIFT		23
+#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_MASK		0xffffff
+#define PROTECTION_OVERRIDE_ELEMENT_READ_SHIFT			47
+#define PROTECTION_OVERRIDE_ELEMENT_READ_MASK			0x1
+#define PROTECTION_OVERRIDE_ELEMENT_WRITE_SHIFT			48
+#define PROTECTION_OVERRIDE_ELEMENT_WRITE_MASK			0x1
+#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_SHIFT	49
+#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_MASK	0x7
+#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_SHIFT	52
+#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_MASK	0x7
+};
+
+enum igu_fifo_sources {
+	IGU_SRC_PXP0,
+	IGU_SRC_PXP1,
+	IGU_SRC_PXP2,
+	IGU_SRC_PXP3,
+	IGU_SRC_PXP4,
+	IGU_SRC_PXP5,
+	IGU_SRC_PXP6,
+	IGU_SRC_PXP7,
+	IGU_SRC_CAU,
+	IGU_SRC_ATTN,
+	IGU_SRC_GRC
+};
+
+enum igu_fifo_addr_types {
+	IGU_ADDR_TYPE_MSIX_MEM,
+	IGU_ADDR_TYPE_WRITE_PBA,
+	IGU_ADDR_TYPE_WRITE_INT_ACK,
+	IGU_ADDR_TYPE_WRITE_ATTN_BITS,
+	IGU_ADDR_TYPE_READ_INT,
+	IGU_ADDR_TYPE_WRITE_PROD_UPDATE,
+	IGU_ADDR_TYPE_RESERVED
+};
+
+struct igu_fifo_addr_data {
+	u16 start_addr;
+	u16 end_addr;
+	char *desc;
+	char *vf_desc;
+	enum igu_fifo_addr_types type;
+};
+
+/******************************** Constants **********************************/
+
+#define MAX_MSG_LEN				1024
+#define MCP_TRACE_MAX_MODULE_LEN		8
+#define MCP_TRACE_FORMAT_MAX_PARAMS		3
+#define MCP_TRACE_FORMAT_PARAM_WIDTH \
+	(MCP_TRACE_FORMAT_P2_SIZE_SHIFT - MCP_TRACE_FORMAT_P1_SIZE_SHIFT)
+#define REG_FIFO_ELEMENT_ADDR_FACTOR		4
+#define REG_FIFO_ELEMENT_IS_PF_VF_VAL		127
+#define PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR	4
+
+/********************************* Macros ************************************/
+
+#define BYTES_TO_DWORDS(bytes)			((bytes) / BYTES_IN_DWORD)
+
+/***************************** Constant Arrays *******************************/
+
+/* Status string array */
+static const char * const s_status_str[] = {
+	"Operation completed successfully",
+	"Debug application version wasn't set",
+	"Unsupported debug application version",
+	"The debug block wasn't reset since the last recording",
+	"Invalid arguments",
+	"The debug output was already set",
+	"Invalid PCI buffer size",
+	"PCI buffer allocation failed",
+	"A PCI buffer wasn't allocated",
+	"Too many inputs were enabled. Enabled less inputs, or set 'unifyInputs' to true",
+	"GRC/Timestamp input overlap in cycle dword 0",
+	"Cannot record Storm data since the entire recording cycle is used by HW",
+	"The Storm was already enabled",
+	"The specified Storm wasn't enabled",
+	"The block was already enabled",
+	"The specified block wasn't enabled",
+	"No input was enabled for recording",
+	"Filters and triggers are not allowed when recording in 64b units",
+	"The filter was already enabled",
+	"The trigger was already enabled",
+	"The trigger wasn't enabled",
+	"A constraint can be added only after a filter was enabled or a trigger state was added",
+	"Cannot add more than 3 trigger states",
+	"Cannot add more than 4 constraints per filter or trigger state",
+	"The recording wasn't started",
+	"A trigger was configured, but it didn't trigger",
+	"No data was recorded",
+	"Dump buffer is too small",
+	"Dumped data is not aligned to chunks",
+	"Unknown chip",
+	"Failed allocating virtual memory",
+	"The input block is in reset",
+	"Invalid MCP trace signature found in NVRAM",
+	"Invalid bundle ID found in NVRAM",
+	"Failed getting NVRAM image",
+	"NVRAM image is not dword-aligned",
+	"Failed reading from NVRAM",
+	"Idle check parsing failed",
+	"MCP Trace data is corrupt",
+	"Dump doesn't contain meta data - it must be provided in an image file",
+	"Failed to halt MCP",
+	"Failed to resume MCP after halt",
+	"DMAE transaction failed",
+	"Failed to empty SEMI sync FIFO",
+	"IGU FIFO data is corrupt",
+	"MCP failed to mask parities",
+	"FW Asserts parsing failed",
+	"GRC FIFO data is corrupt",
+	"Protection Override data is corrupt",
+	"Debug arrays were not set (when using binary files, dbg_set_bin_ptr must be called)",
+	"When a block is filtered, no other blocks can be recorded unless inputs are unified (due to a HW bug)"
+};
+
+/* Idle check severity names array */
+static const char * const s_idle_chk_severity_str[] = {
+	"Error",
+	"Error if no traffic",
+	"Warning"
+};
+
+/* MCP Trace level names array */
+static const char * const s_mcp_trace_level_str[] = {
+	"ERROR",
+	"TRACE",
+	"DEBUG"
+};
+
+/* Parsing strings */
+static const char * const s_access_strs[] = {
+	"read",
+	"write"
+};
+
+static const char * const s_privilege_strs[] = {
+	"VF",
+	"PDA",
+	"HV",
+	"UA"
+};
+
+static const char * const s_protection_strs[] = {
+	"(default)",
+	"(default)",
+	"(default)",
+	"(default)",
+	"override VF",
+	"override PDA",
+	"override HV",
+	"override UA"
+};
+
+static const char * const s_master_strs[] = {
+	"???",
+	"pxp",
+	"mcp",
+	"msdm",
+	"psdm",
+	"ysdm",
+	"usdm",
+	"tsdm",
+	"xsdm",
+	"dbu",
+	"dmae",
+	"???",
+	"???",
+	"???",
+	"???",
+	"???"
+};
+
+static const char * const s_reg_fifo_error_strs[] = {
+	"grc timeout",
+	"address doesn't belong to any block",
+	"reserved address in block or write to read-only address",
+	"privilege/protection mismatch",
+	"path isolation error"
+};
+
+static const char * const s_igu_fifo_source_strs[] = {
+	"TSTORM",
+	"MSTORM",
+	"USTORM",
+	"XSTORM",
+	"YSTORM",
+	"PSTORM",
+	"PCIE",
+	"NIG_QM_PBF",
+	"CAU",
+	"ATTN",
+	"GRC",
+};
+
+static const char * const s_igu_fifo_error_strs[] = {
+	"no error",
+	"length error",
+	"function disabled",
+	"VF sent command to attnetion address",
+	"host sent prod update command",
+	"read of during interrupt register while in MIMD mode",
+	"access to PXP BAR reserved address",
+	"producer update command to attention index",
+	"unknown error",
+	"SB index not valid",
+	"SB relative index and FID not found",
+	"FID not match",
+	"command with error flag asserted (PCI error or CAU discard)",
+	"VF sent cleanup and RF cleanup is disabled",
+	"cleanup command on type bigger than 4"
+};
+
+/* IGU FIFO address data */
+static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = {
+	{0x0, 0x101, "MSI-X Memory", NULL, IGU_ADDR_TYPE_MSIX_MEM},
+	{0x102, 0x1ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
+	{0x200, 0x200, "Write PBA[0:63]", NULL, IGU_ADDR_TYPE_WRITE_PBA},
+	{0x201, 0x201, "Write PBA[64:127]", "reserved",
+	 IGU_ADDR_TYPE_WRITE_PBA},
+	{0x202, 0x202, "Write PBA[128]", "reserved", IGU_ADDR_TYPE_WRITE_PBA},
+	{0x203, 0x3ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
+	{0x400, 0x5ef, "Write interrupt acknowledgment", NULL,
+	 IGU_ADDR_TYPE_WRITE_INT_ACK},
+	{0x5f0, 0x5f0, "Attention bits update", NULL,
+	 IGU_ADDR_TYPE_WRITE_ATTN_BITS},
+	{0x5f1, 0x5f1, "Attention bits set", NULL,
+	 IGU_ADDR_TYPE_WRITE_ATTN_BITS},
+	{0x5f2, 0x5f2, "Attention bits clear", NULL,
+	 IGU_ADDR_TYPE_WRITE_ATTN_BITS},
+	{0x5f3, 0x5f3, "Read interrupt 0:63 with mask", NULL,
+	 IGU_ADDR_TYPE_READ_INT},
+	{0x5f4, 0x5f4, "Read interrupt 0:31 with mask", NULL,
+	 IGU_ADDR_TYPE_READ_INT},
+	{0x5f5, 0x5f5, "Read interrupt 32:63 with mask", NULL,
+	 IGU_ADDR_TYPE_READ_INT},
+	{0x5f6, 0x5f6, "Read interrupt 0:63 without mask", NULL,
+	 IGU_ADDR_TYPE_READ_INT},
+	{0x5f7, 0x5ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED},
+	{0x600, 0x7ff, "Producer update", NULL, IGU_ADDR_TYPE_WRITE_PROD_UPDATE}
+};
+
+/******************************** Variables **********************************/
+
+/* MCP Trace meta data - used in case the dump doesn't contain the meta data
+ * (e.g. due to no NVRAM access).
+ */
+static struct dbg_array s_mcp_trace_meta = { NULL, 0 };
+
+/* Temporary buffer, used for print size calculations */
+static char s_temp_buf[MAX_MSG_LEN];
+
+/***************************** Public Functions *******************************/
+
+enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr)
+{
+	/* Convert binary data to debug arrays */
+	u32 num_of_buffers = *(u32 *)bin_ptr;
+	struct bin_buffer_hdr *buf_array;
+	u8 buf_id;
+
+	buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1);
+
+	for (buf_id = 0; buf_id < num_of_buffers; buf_id++) {
+		s_dbg_arrays[buf_id].ptr =
+		    (u32 *)(bin_ptr + buf_array[buf_id].offset);
+		s_dbg_arrays[buf_id].size_in_dwords =
+		    BYTES_TO_DWORDS(buf_array[buf_id].length);
+	}
+
+	return DBG_STATUS_OK;
+}
+
+static u32 qed_cyclic_add(u32 a, u32 b, u32 size)
+{
+	return (a + b) % size;
+}
+
+static u32 qed_cyclic_sub(u32 a, u32 b, u32 size)
+{
+	return (size + a - b) % size;
+}
+
+/* Reads the specified number of bytes from the specified cyclic buffer (up to 4
+ * bytes) and returns them as a dword value. the specified buffer offset is
+ * updated.
+ */
+static u32 qed_read_from_cyclic_buf(void *buf,
+				    u32 *offset,
+				    u32 buf_size, u8 num_bytes_to_read)
+{
+	u8 *bytes_buf = (u8 *)buf;
+	u8 *val_ptr;
+	u32 val = 0;
+	u8 i;
+
+	val_ptr = (u8 *)&val;
+
+	for (i = 0; i < num_bytes_to_read; i++) {
+		val_ptr[i] = bytes_buf[*offset];
+		*offset = qed_cyclic_add(*offset, 1, buf_size);
+	}
+
+	return val;
+}
+
+/* Reads and returns the next byte from the specified buffer.
+ * The specified buffer offset is updated.
+ */
+static u8 qed_read_byte_from_buf(void *buf, u32 *offset)
+{
+	return ((u8 *)buf)[(*offset)++];
+}
+
+/* Reads and returns the next dword from the specified buffer.
+ * The specified buffer offset is updated.
+ */
+static u32 qed_read_dword_from_buf(void *buf, u32 *offset)
+{
+	u32 dword_val = *(u32 *)&((u8 *)buf)[*offset];
+
+	*offset += 4;
+	return dword_val;
+}
+
+/* Reads the next string from the specified buffer, and copies it to the
+ * specified pointer. The specified buffer offset is updated.
+ */
+static void qed_read_str_from_buf(void *buf, u32 *offset, u32 size, char *dest)
+{
+	const char *source_str = &((const char *)buf)[*offset];
+
+	strncpy(dest, source_str, size);
+	dest[size - 1] = '\0';
+	*offset += size;
+}
+
+/* Returns a pointer to the specified offset (in bytes) of the specified buffer.
+ * If the specified buffer in NULL, a temporary buffer pointer is returned.
+ */
+static char *qed_get_buf_ptr(void *buf, u32 offset)
+{
+	return buf ? (char *)buf + offset : s_temp_buf;
+}
+
+/* Reads a param from the specified buffer. Returns the number of dwords read.
+ * If the returned str_param is NULL, the param is numeric and its value is
+ * returned in num_param.
+ * Otheriwise, the param is a string and its pointer is returned in str_param.
+ */
+static u32 qed_read_param(u32 *dump_buf,
+			  const char **param_name,
+			  const char **param_str_val, u32 *param_num_val)
+{
+	char *char_buf = (char *)dump_buf;
+	u32 offset = 0; /* In bytes */
+
+	/* Extract param name */
+	*param_name = char_buf;
+	offset += strlen(*param_name) + 1;
+
+	/* Check param type */
+	if (*(char_buf + offset++)) {
+		/* String param */
+		*param_str_val = char_buf + offset;
+		offset += strlen(*param_str_val) + 1;
+		if (offset & 0x3)
+			offset += (4 - (offset & 0x3));
+	} else {
+		/* Numeric param */
+		*param_str_val = NULL;
+		if (offset & 0x3)
+			offset += (4 - (offset & 0x3));
+		*param_num_val = *(u32 *)(char_buf + offset);
+		offset += 4;
+	}
+
+	return offset / 4;
+}
+
+/* Reads a section header from the specified buffer.
+ * Returns the number of dwords read.
+ */
+static u32 qed_read_section_hdr(u32 *dump_buf,
+				const char **section_name,
+				u32 *num_section_params)
+{
+	const char *param_str_val;
+
+	return qed_read_param(dump_buf,
+			      section_name, &param_str_val, num_section_params);
+}
+
+/* Reads section params from the specified buffer and prints them to the results
+ * buffer. Returns the number of dwords read.
+ */
+static u32 qed_print_section_params(u32 *dump_buf,
+				    u32 num_section_params,
+				    char *results_buf, u32 *num_chars_printed)
+{
+	u32 i, dump_offset = 0, results_offset = 0;
+
+	for (i = 0; i < num_section_params; i++) {
+		const char *param_name;
+		const char *param_str_val;
+		u32 param_num_val = 0;
+
+		dump_offset += qed_read_param(dump_buf + dump_offset,
+					      &param_name,
+					      &param_str_val, &param_num_val);
+		if (param_str_val)
+			/* String param */
+			results_offset +=
+				sprintf(qed_get_buf_ptr(results_buf,
+							results_offset),
+					"%s: %s\n", param_name, param_str_val);
+		else if (strcmp(param_name, "fw-timestamp"))
+			/* Numeric param */
+			results_offset +=
+				sprintf(qed_get_buf_ptr(results_buf,
+							results_offset),
+					"%s: %d\n", param_name, param_num_val);
+	}
+
+	results_offset +=
+	    sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n");
+	*num_chars_printed = results_offset;
+	return dump_offset;
+}
+
+const char *qed_dbg_get_status_str(enum dbg_status status)
+{
+	return (status <
+		MAX_DBG_STATUS) ? s_status_str[status] : "Invalid debug status";
+}
+
+/* Parses the idle check rules and returns the number of characters printed.
+ * In case of parsing error, returns 0.
+ */
+static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn,
+					 u32 *dump_buf,
+					 u32 *dump_buf_end,
+					 u32 num_rules,
+					 bool print_fw_idle_chk,
+					 char *results_buf,
+					 u32 *num_errors, u32 *num_warnings)
+{
+	u32 rule_idx, results_offset = 0; /* Offset in results_buf in bytes */
+	u16 i, j;
+
+	*num_errors = 0;
+	*num_warnings = 0;
+
+	/* Go over dumped results */
+	for (rule_idx = 0; rule_idx < num_rules && dump_buf < dump_buf_end;
+	     rule_idx++) {
+		const struct dbg_idle_chk_rule_parsing_data *rule_parsing_data;
+		struct dbg_idle_chk_result_hdr *hdr;
+		const char *parsing_str;
+		u32 parsing_str_offset;
+		const char *lsi_msg;
+		u8 curr_reg_id = 0;
+		bool has_fw_msg;
+
+		hdr = (struct dbg_idle_chk_result_hdr *)dump_buf;
+		rule_parsing_data =
+			(const struct dbg_idle_chk_rule_parsing_data *)
+			&s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].
+			ptr[hdr->rule_id];
+		parsing_str_offset =
+			GET_FIELD(rule_parsing_data->data,
+				  DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET);
+		has_fw_msg =
+			GET_FIELD(rule_parsing_data->data,
+				DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG) > 0;
+		parsing_str = &((const char *)
+				s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr)
+				[parsing_str_offset];
+		lsi_msg = parsing_str;
+
+		if (hdr->severity >= MAX_DBG_IDLE_CHK_SEVERITY_TYPES)
+			return 0;
+
+		/* Skip rule header */
+		dump_buf += (sizeof(struct dbg_idle_chk_result_hdr) / 4);
+
+		/* Update errors/warnings count */
+		if (hdr->severity == IDLE_CHK_SEVERITY_ERROR ||
+		    hdr->severity == IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC)
+			(*num_errors)++;
+		else
+			(*num_warnings)++;
+
+		/* Print rule severity */
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset), "%s: ",
+			    s_idle_chk_severity_str[hdr->severity]);
+
+		/* Print rule message */
+		if (has_fw_msg)
+			parsing_str += strlen(parsing_str) + 1;
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset), "%s.",
+			    has_fw_msg &&
+			    print_fw_idle_chk ? parsing_str : lsi_msg);
+		parsing_str += strlen(parsing_str) + 1;
+
+		/* Print register values */
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset), " Registers:");
+		for (i = 0;
+		     i < hdr->num_dumped_cond_regs + hdr->num_dumped_info_regs;
+		     i++) {
+			struct dbg_idle_chk_result_reg_hdr *reg_hdr
+			    = (struct dbg_idle_chk_result_reg_hdr *)
+			    dump_buf;
+			bool is_mem =
+				GET_FIELD(reg_hdr->data,
+					  DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM);
+			u8 reg_id =
+				GET_FIELD(reg_hdr->data,
+					  DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID);
+
+			/* Skip reg header */
+			dump_buf +=
+			    (sizeof(struct dbg_idle_chk_result_reg_hdr) / 4);
+
+			/* Skip register names until the required reg_id is
+			 * reached.
+			 */
+			for (; reg_id > curr_reg_id;
+			     curr_reg_id++,
+			     parsing_str += strlen(parsing_str) + 1);
+
+			results_offset +=
+			    sprintf(qed_get_buf_ptr(results_buf,
+						    results_offset), " %s",
+				    parsing_str);
+			if (i < hdr->num_dumped_cond_regs && is_mem)
+				results_offset +=
+				    sprintf(qed_get_buf_ptr(results_buf,
+							    results_offset),
+					    "[%d]", hdr->mem_entry_id +
+					    reg_hdr->start_entry);
+			results_offset +=
+			    sprintf(qed_get_buf_ptr(results_buf,
+						    results_offset), "=");
+			for (j = 0; j < reg_hdr->size; j++, dump_buf++) {
+				results_offset +=
+				    sprintf(qed_get_buf_ptr(results_buf,
+							    results_offset),
+					    "0x%x", *dump_buf);
+				if (j < reg_hdr->size - 1)
+					results_offset +=
+					    sprintf(qed_get_buf_ptr
+						    (results_buf,
+						     results_offset), ",");
+			}
+		}
+
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n");
+	}
+
+	/* Check if end of dump buffer was exceeded */
+	if (dump_buf > dump_buf_end)
+		return 0;
+	return results_offset;
+}
+
+/* Parses an idle check dump buffer.
+ * If result_buf is not NULL, the idle check results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn,
+					       u32 *dump_buf,
+					       u32 num_dumped_dwords,
+					       char *results_buf,
+					       u32 *parsed_results_bytes,
+					       u32 *num_errors,
+					       u32 *num_warnings)
+{
+	const char *section_name, *param_name, *param_str_val;
+	u32 *dump_buf_end = dump_buf + num_dumped_dwords;
+	u32 num_section_params = 0, num_rules;
+	u32 results_offset = 0;	/* Offset in results_buf in bytes */
+
+	*parsed_results_bytes = 0;
+	*num_errors = 0;
+	*num_warnings = 0;
+	if (!s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr ||
+	    !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr)
+		return DBG_STATUS_DBG_ARRAY_NOT_SET;
+
+	/* Read global_params section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "global_params"))
+		return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+
+	/* Print global params */
+	dump_buf += qed_print_section_params(dump_buf,
+					     num_section_params,
+					     results_buf, &results_offset);
+
+	/* Read idle_chk section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "idle_chk") || num_section_params != 1)
+		return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+
+	dump_buf += qed_read_param(dump_buf,
+				   &param_name, &param_str_val, &num_rules);
+	if (strcmp(param_name, "num_rules") != 0)
+		return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+
+	if (num_rules) {
+		u32 rules_print_size;
+
+		/* Print FW output */
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    "FW_IDLE_CHECK:\n");
+		rules_print_size =
+			qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf,
+						      dump_buf_end, num_rules,
+						      true,
+						      results_buf ?
+						      results_buf +
+						      results_offset : NULL,
+						      num_errors, num_warnings);
+		results_offset += rules_print_size;
+		if (rules_print_size == 0)
+			return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+
+		/* Print LSI output */
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    "\nLSI_IDLE_CHECK:\n");
+		rules_print_size =
+			qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf,
+						      dump_buf_end, num_rules,
+						      false,
+						      results_buf ?
+						      results_buf +
+						      results_offset : NULL,
+						      num_errors, num_warnings);
+		results_offset += rules_print_size;
+		if (rules_print_size == 0)
+			return DBG_STATUS_IDLE_CHK_PARSE_FAILED;
+	}
+
+	/* Print errors/warnings count */
+	if (*num_errors) {
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    "\nIdle Check failed!!! (with %d errors and %d warnings)\n",
+			    *num_errors, *num_warnings);
+	} else if (*num_warnings) {
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    "\nIdle Check completed successfuly (with %d warnings)\n",
+			    *num_warnings);
+	} else {
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    "\nIdle Check completed successfuly\n");
+	}
+
+	/* Add 1 for string NULL termination */
+	*parsed_results_bytes = results_offset + 1;
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32 num_dumped_dwords,
+						  u32 *results_buf_size)
+{
+	u32 num_errors, num_warnings;
+
+	return qed_parse_idle_chk_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       NULL,
+				       results_buf_size,
+				       &num_errors, &num_warnings);
+}
+
+enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf,
+					   u32 *num_errors, u32 *num_warnings)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_idle_chk_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       results_buf,
+				       &parsed_buf_size,
+				       num_errors, num_warnings);
+}
+
+/* Frees the specified MCP Trace meta data */
+static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn,
+				    struct mcp_trace_meta *meta)
+{
+	u32 i;
+
+	/* Release modules */
+	if (meta->modules) {
+		for (i = 0; i < meta->modules_num; i++)
+			kfree(meta->modules[i]);
+		kfree(meta->modules);
+	}
+
+	/* Release formats */
+	if (meta->formats) {
+		for (i = 0; i < meta->formats_num; i++)
+			kfree(meta->formats[i].format_str);
+		kfree(meta->formats);
+	}
+}
+
+/* Allocates and fills MCP Trace meta data based on the specified meta data
+ * dump buffer.
+ * Returns debug status code.
+ */
+static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
+						const u32 *meta_buf,
+						struct mcp_trace_meta *meta)
+{
+	u8 *meta_buf_bytes = (u8 *)meta_buf;
+	u32 offset = 0, signature, i;
+
+	memset(meta, 0, sizeof(*meta));
+
+	/* Read first signature */
+	signature = qed_read_dword_from_buf(meta_buf_bytes, &offset);
+	if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
+	/* Read number of modules and allocate memory for all the modules
+	 * pointers.
+	 */
+	meta->modules_num = qed_read_byte_from_buf(meta_buf_bytes, &offset);
+	meta->modules = kzalloc(meta->modules_num * sizeof(char *), GFP_KERNEL);
+	if (!meta->modules)
+		return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+	/* Allocate and read all module strings */
+	for (i = 0; i < meta->modules_num; i++) {
+		u8 module_len = qed_read_byte_from_buf(meta_buf_bytes, &offset);
+
+		*(meta->modules + i) = kzalloc(module_len, GFP_KERNEL);
+		if (!(*(meta->modules + i))) {
+			/* Update number of modules to be released */
+			meta->modules_num = i ? i - 1 : 0;
+			return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+		}
+
+		qed_read_str_from_buf(meta_buf_bytes, &offset, module_len,
+				      *(meta->modules + i));
+		if (module_len > MCP_TRACE_MAX_MODULE_LEN)
+			(*(meta->modules + i))[MCP_TRACE_MAX_MODULE_LEN] = '\0';
+	}
+
+	/* Read second signature */
+	signature = qed_read_dword_from_buf(meta_buf_bytes, &offset);
+	if (signature != MCP_TRACE_META_IMAGE_SIGNATURE)
+		return DBG_STATUS_INVALID_TRACE_SIGNATURE;
+
+	/* Read number of formats and allocate memory for all formats */
+	meta->formats_num = qed_read_dword_from_buf(meta_buf_bytes, &offset);
+	meta->formats = kzalloc(meta->formats_num *
+				sizeof(struct mcp_trace_format),
+				GFP_KERNEL);
+	if (!meta->formats)
+		return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+	/* Allocate and read all strings */
+	for (i = 0; i < meta->formats_num; i++) {
+		struct mcp_trace_format *format_ptr = &meta->formats[i];
+		u8 format_len;
+
+		format_ptr->data = qed_read_dword_from_buf(meta_buf_bytes,
+							   &offset);
+		format_len =
+		    (format_ptr->data &
+		     MCP_TRACE_FORMAT_LEN_MASK) >> MCP_TRACE_FORMAT_LEN_SHIFT;
+		format_ptr->format_str = kzalloc(format_len, GFP_KERNEL);
+		if (!format_ptr->format_str) {
+			/* Update number of modules to be released */
+			meta->formats_num = i ? i - 1 : 0;
+			return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+		}
+
+		qed_read_str_from_buf(meta_buf_bytes,
+				      &offset,
+				      format_len, format_ptr->format_str);
+	}
+
+	return DBG_STATUS_OK;
+}
+
+/* Parses an MCP Trace dump buffer.
+ * If result_buf is not NULL, the MCP Trace results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+						u32 *dump_buf,
+						u32 num_dumped_dwords,
+						char *results_buf,
+						u32 *parsed_results_bytes)
+{
+	u32 results_offset = 0, param_mask, param_shift, param_num_val;
+	u32 num_section_params, offset, end_offset, bytes_left;
+	const char *section_name, *param_name, *param_str_val;
+	u32 trace_data_dwords, trace_meta_dwords;
+	struct mcp_trace_meta meta;
+	struct mcp_trace *trace;
+	enum dbg_status status;
+	const u32 *meta_buf;
+	u8 *trace_buf;
+
+	*parsed_results_bytes = 0;
+
+	/* Read global_params section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "global_params"))
+		return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
+	/* Print global params */
+	dump_buf += qed_print_section_params(dump_buf,
+					     num_section_params,
+					     results_buf, &results_offset);
+
+	/* Read trace_data section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "mcp_trace_data") || num_section_params != 1)
+		return DBG_STATUS_MCP_TRACE_BAD_DATA;
+	dump_buf += qed_read_param(dump_buf,
+				   &param_name, &param_str_val, &param_num_val);
+	if (strcmp(param_name, "size"))
+		return DBG_STATUS_MCP_TRACE_BAD_DATA;
+	trace_data_dwords = param_num_val;
+
+	/* Prepare trace info */
+	trace = (struct mcp_trace *)dump_buf;
+	trace_buf = (u8 *)dump_buf + sizeof(struct mcp_trace);
+	offset = trace->trace_oldest;
+	end_offset = trace->trace_prod;
+	bytes_left = qed_cyclic_sub(end_offset, offset, trace->size);
+	dump_buf += trace_data_dwords;
+
+	/* Read meta_data section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "mcp_trace_meta"))
+		return DBG_STATUS_MCP_TRACE_BAD_DATA;
+	dump_buf += qed_read_param(dump_buf,
+				   &param_name, &param_str_val, &param_num_val);
+	if (strcmp(param_name, "size") != 0)
+		return DBG_STATUS_MCP_TRACE_BAD_DATA;
+	trace_meta_dwords = param_num_val;
+
+	/* Choose meta data buffer */
+	if (!trace_meta_dwords) {
+		/* Dump doesn't include meta data */
+		if (!s_mcp_trace_meta.ptr)
+			return DBG_STATUS_MCP_TRACE_NO_META;
+		meta_buf = s_mcp_trace_meta.ptr;
+	} else {
+		/* Dump includes meta data */
+		meta_buf = dump_buf;
+	}
+
+	/* Allocate meta data memory */
+	status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &meta);
+	if (status != DBG_STATUS_OK)
+		goto free_mem;
+
+	/* Ignore the level and modules masks - just print everything that is
+	 * already in the buffer.
+	 */
+	while (bytes_left) {
+		struct mcp_trace_format *format_ptr;
+		u8 format_level, format_module;
+		u32 params[3] = { 0, 0, 0 };
+		u32 header, format_idx, i;
+
+		if (bytes_left < MFW_TRACE_ENTRY_SIZE) {
+			status = DBG_STATUS_MCP_TRACE_BAD_DATA;
+			goto free_mem;
+		}
+
+		header = qed_read_from_cyclic_buf(trace_buf,
+						  &offset,
+						  trace->size,
+						  MFW_TRACE_ENTRY_SIZE);
+		bytes_left -= MFW_TRACE_ENTRY_SIZE;
+		format_idx = header & MFW_TRACE_EVENTID_MASK;
+
+		/* Skip message if its  index doesn't exist in the meta data */
+		if (format_idx > meta.formats_num) {
+			u8 format_size =
+			    (u8)((header &
+				  MFW_TRACE_PRM_SIZE_MASK) >>
+				 MFW_TRACE_PRM_SIZE_SHIFT);
+
+			if (bytes_left < format_size) {
+				status = DBG_STATUS_MCP_TRACE_BAD_DATA;
+				goto free_mem;
+			}
+
+			offset = qed_cyclic_add(offset,
+						format_size, trace->size);
+			bytes_left -= format_size;
+			continue;
+		}
+
+		format_ptr = &meta.formats[format_idx];
+		for (i = 0,
+		     param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK, param_shift =
+		     MCP_TRACE_FORMAT_P1_SIZE_SHIFT;
+		     i < MCP_TRACE_FORMAT_MAX_PARAMS;
+		     i++, param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH,
+		     param_shift += MCP_TRACE_FORMAT_PARAM_WIDTH) {
+			/* Extract param size (0..3) */
+			u8 param_size =
+			    (u8)((format_ptr->data &
+				  param_mask) >> param_shift);
+
+			/* If the param size is zero, there are no other
+			 * parameters.
+			 */
+			if (!param_size)
+				break;
+
+			/* Size is encoded using 2 bits, where 3 is used to
+			 * encode 4.
+			 */
+			if (param_size == 3)
+				param_size = 4;
+			if (bytes_left < param_size) {
+				status = DBG_STATUS_MCP_TRACE_BAD_DATA;
+				goto free_mem;
+			}
+
+			params[i] = qed_read_from_cyclic_buf(trace_buf,
+							     &offset,
+							     trace->size,
+							     param_size);
+			bytes_left -= param_size;
+		}
+
+		format_level =
+		    (u8)((format_ptr->data &
+			  MCP_TRACE_FORMAT_LEVEL_MASK) >>
+			  MCP_TRACE_FORMAT_LEVEL_SHIFT);
+		format_module =
+		    (u8)((format_ptr->data &
+			  MCP_TRACE_FORMAT_MODULE_MASK) >>
+			 MCP_TRACE_FORMAT_MODULE_SHIFT);
+		if (format_level >= ARRAY_SIZE(s_mcp_trace_level_str)) {
+			status = DBG_STATUS_MCP_TRACE_BAD_DATA;
+			goto free_mem;
+		}
+
+		/* Print current message to results buffer */
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset), "%s %-8s: ",
+			    s_mcp_trace_level_str[format_level],
+			    meta.modules[format_module]);
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    format_ptr->format_str, params[0], params[1],
+			    params[2]);
+	}
+
+free_mem:
+	*parsed_results_bytes = results_offset + 1;
+	qed_mcp_trace_free_meta(p_hwfn, &meta);
+	return status;
+}
+
+enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
+						   u32 *dump_buf,
+						   u32 num_dumped_dwords,
+						   u32 *results_buf_size)
+{
+	return qed_parse_mcp_trace_dump(p_hwfn,
+					dump_buf,
+					num_dumped_dwords,
+					NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
+					    u32 *dump_buf,
+					    u32 num_dumped_dwords,
+					    char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_mcp_trace_dump(p_hwfn,
+					dump_buf,
+					num_dumped_dwords,
+					results_buf, &parsed_buf_size);
+}
+
+/* Parses a Reg FIFO dump buffer.
+ * If result_buf is not NULL, the Reg FIFO results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+					       u32 *dump_buf,
+					       u32 num_dumped_dwords,
+					       char *results_buf,
+					       u32 *parsed_results_bytes)
+{
+	u32 results_offset = 0, param_num_val, num_section_params, num_elements;
+	const char *section_name, *param_name, *param_str_val;
+	struct reg_fifo_element *elements;
+	u8 i, j, err_val, vf_val;
+	char vf_str[4];
+
+	/* Read global_params section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "global_params"))
+		return DBG_STATUS_REG_FIFO_BAD_DATA;
+
+	/* Print global params */
+	dump_buf += qed_print_section_params(dump_buf,
+					     num_section_params,
+					     results_buf, &results_offset);
+
+	/* Read reg_fifo_data section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "reg_fifo_data"))
+		return DBG_STATUS_REG_FIFO_BAD_DATA;
+	dump_buf += qed_read_param(dump_buf,
+				   &param_name, &param_str_val, &param_num_val);
+	if (strcmp(param_name, "size"))
+		return DBG_STATUS_REG_FIFO_BAD_DATA;
+	if (param_num_val % REG_FIFO_ELEMENT_DWORDS)
+		return DBG_STATUS_REG_FIFO_BAD_DATA;
+	num_elements = param_num_val / REG_FIFO_ELEMENT_DWORDS;
+	elements = (struct reg_fifo_element *)dump_buf;
+
+	/* Decode elements */
+	for (i = 0; i < num_elements; i++) {
+		bool err_printed = false;
+
+		/* Discover if element belongs to a VF or a PF */
+		vf_val = GET_FIELD(elements[i].data, REG_FIFO_ELEMENT_VF);
+		if (vf_val == REG_FIFO_ELEMENT_IS_PF_VF_VAL)
+			sprintf(vf_str, "%s", "N/A");
+		else
+			sprintf(vf_str, "%d", vf_val);
+
+		/* Add parsed element to parsed buffer */
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    "raw: 0x%016llx, address: 0x%07llx, access: %-5s, pf: %2lld, vf: %s, port: %lld, privilege: %-3s, protection: %-12s, master: %-4s, errors: ",
+			    elements[i].data,
+			    GET_FIELD(elements[i].data,
+				      REG_FIFO_ELEMENT_ADDRESS) *
+				      REG_FIFO_ELEMENT_ADDR_FACTOR,
+				      s_access_strs[GET_FIELD(elements[i].data,
+						    REG_FIFO_ELEMENT_ACCESS)],
+			    GET_FIELD(elements[i].data,
+				      REG_FIFO_ELEMENT_PF), vf_str,
+			    GET_FIELD(elements[i].data,
+				      REG_FIFO_ELEMENT_PORT),
+				      s_privilege_strs[GET_FIELD(elements[i].
+				      data,
+				      REG_FIFO_ELEMENT_PRIVILEGE)],
+			    s_protection_strs[GET_FIELD(elements[i].data,
+						REG_FIFO_ELEMENT_PROTECTION)],
+			    s_master_strs[GET_FIELD(elements[i].data,
+						REG_FIFO_ELEMENT_MASTER)]);
+
+		/* Print errors */
+		for (j = 0,
+		     err_val = GET_FIELD(elements[i].data,
+					 REG_FIFO_ELEMENT_ERROR);
+		     j < ARRAY_SIZE(s_reg_fifo_error_strs);
+		     j++, err_val >>= 1) {
+			if (!(err_val & 0x1))
+				continue;
+			if (err_printed)
+				results_offset +=
+					sprintf(qed_get_buf_ptr(results_buf,
+								results_offset),
+						", ");
+			results_offset +=
+				sprintf(qed_get_buf_ptr(results_buf,
+							results_offset), "%s",
+					s_reg_fifo_error_strs[j]);
+			err_printed = true;
+		}
+
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n");
+	}
+
+	results_offset += sprintf(qed_get_buf_ptr(results_buf,
+						  results_offset),
+				  "fifo contained %d elements", num_elements);
+
+	/* Add 1 for string NULL termination */
+	*parsed_results_bytes = results_offset + 1;
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32 num_dumped_dwords,
+						  u32 *results_buf_size)
+{
+	return qed_parse_reg_fifo_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_reg_fifo_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       results_buf, &parsed_buf_size);
+}
+
+/* Parses an IGU FIFO dump buffer.
+ * If result_buf is not NULL, the IGU FIFO results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+					       u32 *dump_buf,
+					       u32 num_dumped_dwords,
+					       char *results_buf,
+					       u32 *parsed_results_bytes)
+{
+	u32 results_offset = 0, param_num_val, num_section_params, num_elements;
+	const char *section_name, *param_name, *param_str_val;
+	struct igu_fifo_element *elements;
+	char parsed_addr_data[32];
+	char parsed_wr_data[256];
+	u8 i, j;
+
+	/* Read global_params section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "global_params"))
+		return DBG_STATUS_IGU_FIFO_BAD_DATA;
+
+	/* Print global params */
+	dump_buf += qed_print_section_params(dump_buf,
+					     num_section_params,
+					     results_buf, &results_offset);
+
+	/* Read igu_fifo_data section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "igu_fifo_data"))
+		return DBG_STATUS_IGU_FIFO_BAD_DATA;
+	dump_buf += qed_read_param(dump_buf,
+				   &param_name, &param_str_val, &param_num_val);
+	if (strcmp(param_name, "size"))
+		return DBG_STATUS_IGU_FIFO_BAD_DATA;
+	if (param_num_val % IGU_FIFO_ELEMENT_DWORDS)
+		return DBG_STATUS_IGU_FIFO_BAD_DATA;
+	num_elements = param_num_val / IGU_FIFO_ELEMENT_DWORDS;
+	elements = (struct igu_fifo_element *)dump_buf;
+
+	/* Decode elements */
+	for (i = 0; i < num_elements; i++) {
+		/* dword12 (dword index 1 and 2) contains bits 32..95 of the
+		 * FIFO element.
+		 */
+		u64 dword12 =
+		    ((u64)elements[i].dword2 << 32) | elements[i].dword1;
+		bool is_wr_cmd = GET_FIELD(dword12,
+					   IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD);
+		bool is_pf = GET_FIELD(elements[i].dword0,
+				       IGU_FIFO_ELEMENT_DWORD0_IS_PF);
+		u16 cmd_addr = GET_FIELD(elements[i].dword0,
+					 IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR);
+		u8 source = GET_FIELD(elements[i].dword0,
+				      IGU_FIFO_ELEMENT_DWORD0_SOURCE);
+		u8 err_type = GET_FIELD(elements[i].dword0,
+					IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE);
+		const struct igu_fifo_addr_data *addr_data = NULL;
+
+		if (source >= ARRAY_SIZE(s_igu_fifo_source_strs))
+			return DBG_STATUS_IGU_FIFO_BAD_DATA;
+		if (err_type >= ARRAY_SIZE(s_igu_fifo_error_strs))
+			return DBG_STATUS_IGU_FIFO_BAD_DATA;
+
+		/* Find address data */
+		for (j = 0; j < ARRAY_SIZE(s_igu_fifo_addr_data) && !addr_data;
+		     j++)
+			if (cmd_addr >= s_igu_fifo_addr_data[j].start_addr &&
+			    cmd_addr <= s_igu_fifo_addr_data[j].end_addr)
+				addr_data = &s_igu_fifo_addr_data[j];
+		if (!addr_data)
+			return DBG_STATUS_IGU_FIFO_BAD_DATA;
+
+		/* Prepare parsed address data */
+		switch (addr_data->type) {
+		case IGU_ADDR_TYPE_MSIX_MEM:
+			sprintf(parsed_addr_data,
+				" vector_num=0x%x", cmd_addr / 2);
+			break;
+		case IGU_ADDR_TYPE_WRITE_INT_ACK:
+		case IGU_ADDR_TYPE_WRITE_PROD_UPDATE:
+			sprintf(parsed_addr_data,
+				" SB=0x%x", cmd_addr - addr_data->start_addr);
+			break;
+		default:
+			parsed_addr_data[0] = '\0';
+		}
+
+		/* Prepare parsed write data */
+		if (is_wr_cmd) {
+			u32 wr_data = GET_FIELD(dword12,
+					IGU_FIFO_ELEMENT_DWORD12_WR_DATA);
+			u32 prod_cons = GET_FIELD(wr_data,
+						  IGU_FIFO_WR_DATA_PROD_CONS);
+			u8 is_cleanup = GET_FIELD(wr_data,
+						  IGU_FIFO_WR_DATA_CMD_TYPE);
+
+			if (source == IGU_SRC_ATTN) {
+				sprintf(parsed_wr_data,
+					"prod: 0x%x, ", prod_cons);
+			} else {
+				if (is_cleanup) {
+					u8 cleanup_val = GET_FIELD(wr_data,
+								   IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL);
+					u8 cleanup_type = GET_FIELD(wr_data,
+								    IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE);
+
+					sprintf(parsed_wr_data,
+						"cmd_type: cleanup, cleanup_val: %s, cleanup_type: %d, ",
+						cleanup_val ? "set" : "clear",
+						cleanup_type);
+				} else {
+					u8 update_flag = GET_FIELD(wr_data,
+								   IGU_FIFO_WR_DATA_UPDATE_FLAG);
+					u8 en_dis_int_for_sb =
+					    GET_FIELD(wr_data,
+						      IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB);
+					u8 segment = GET_FIELD(wr_data,
+							       IGU_FIFO_WR_DATA_SEGMENT);
+					u8 timer_mask = GET_FIELD(wr_data,
+								  IGU_FIFO_WR_DATA_TIMER_MASK);
+
+					sprintf(parsed_wr_data,
+						"cmd_type: prod/cons update, prod/cons: 0x%x, update_flag: %s, en_dis_int_for_sb: %s, segment: %s, timer_mask=%d, ",
+						prod_cons,
+						update_flag ? "update" : "nop",
+						en_dis_int_for_sb
+						? (en_dis_int_for_sb ==
+						   1 ? "disable" : "nop") :
+						"enable",
+						segment ? "attn" : "regular",
+						timer_mask);
+				}
+			}
+		} else {
+			parsed_wr_data[0] = '\0';
+		}
+
+		/* Add parsed element to parsed buffer */
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    "raw: 0x%01x%08x%08x, %s: %d, source: %s, type: %s, cmd_addr: 0x%x (%s%s), %serror: %s\n",
+			    elements[i].dword2, elements[i].dword1,
+			    elements[i].dword0,
+			    is_pf ? "pf" : "vf",
+			    GET_FIELD(elements[i].dword0,
+				      IGU_FIFO_ELEMENT_DWORD0_FID),
+			    s_igu_fifo_source_strs[source],
+			    is_wr_cmd ? "wr" : "rd", cmd_addr,
+			    (!is_pf && addr_data->vf_desc)
+			    ? addr_data->vf_desc : addr_data->desc,
+			    parsed_addr_data, parsed_wr_data,
+			    s_igu_fifo_error_strs[err_type]);
+	}
+
+	results_offset += sprintf(qed_get_buf_ptr(results_buf,
+						  results_offset),
+				  "fifo contained %d elements", num_elements);
+
+	/* Add 1 for string NULL termination */
+	*parsed_results_bytes = results_offset + 1;
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32 num_dumped_dwords,
+						  u32 *results_buf_size)
+{
+	return qed_parse_igu_fifo_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_igu_fifo_dump(p_hwfn,
+				       dump_buf,
+				       num_dumped_dwords,
+				       results_buf, &parsed_buf_size);
+}
+
+static enum dbg_status
+qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn,
+				   u32 *dump_buf,
+				   u32 num_dumped_dwords,
+				   char *results_buf,
+				   u32 *parsed_results_bytes)
+{
+	u32 results_offset = 0, param_num_val, num_section_params, num_elements;
+	const char *section_name, *param_name, *param_str_val;
+	struct protection_override_element *elements;
+	u8 i;
+
+	/* Read global_params section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "global_params"))
+		return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
+
+	/* Print global params */
+	dump_buf += qed_print_section_params(dump_buf,
+					     num_section_params,
+					     results_buf, &results_offset);
+
+	/* Read protection_override_data section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "protection_override_data"))
+		return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
+	dump_buf += qed_read_param(dump_buf,
+				   &param_name, &param_str_val, &param_num_val);
+	if (strcmp(param_name, "size"))
+		return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
+	if (param_num_val % PROTECTION_OVERRIDE_ELEMENT_DWORDS != 0)
+		return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA;
+	num_elements = param_num_val / PROTECTION_OVERRIDE_ELEMENT_DWORDS;
+	elements = (struct protection_override_element *)dump_buf;
+
+	/* Decode elements */
+	for (i = 0; i < num_elements; i++) {
+		u32 address = GET_FIELD(elements[i].data,
+					PROTECTION_OVERRIDE_ELEMENT_ADDRESS) *
+					PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR;
+
+		results_offset +=
+		    sprintf(qed_get_buf_ptr(results_buf,
+					    results_offset),
+			    "window %2d, address: 0x%07x, size: %7lld regs, read: %lld, write: %lld, read protection: %-12s, write protection: %-12s\n",
+			    i, address,
+			    GET_FIELD(elements[i].data,
+				      PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE),
+			    GET_FIELD(elements[i].data,
+				      PROTECTION_OVERRIDE_ELEMENT_READ),
+			    GET_FIELD(elements[i].data,
+				      PROTECTION_OVERRIDE_ELEMENT_WRITE),
+			    s_protection_strs[GET_FIELD(elements[i].data,
+				PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION)],
+			    s_protection_strs[GET_FIELD(elements[i].data,
+				PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION)]);
+	}
+
+	results_offset += sprintf(qed_get_buf_ptr(results_buf,
+						  results_offset),
+				  "protection override contained %d elements",
+				  num_elements);
+
+	/* Add 1 for string NULL termination */
+	*parsed_results_bytes = results_offset + 1;
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status
+qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
+					     u32 *dump_buf,
+					     u32 num_dumped_dwords,
+					     u32 *results_buf_size)
+{
+	return qed_parse_protection_override_dump(p_hwfn,
+						  dump_buf,
+						  num_dumped_dwords,
+						  NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
+						      u32 *dump_buf,
+						      u32 num_dumped_dwords,
+						      char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_protection_override_dump(p_hwfn,
+						  dump_buf,
+						  num_dumped_dwords,
+						  results_buf,
+						  &parsed_buf_size);
+}
+
+/* Parses a FW Asserts dump buffer.
+ * If result_buf is not NULL, the FW Asserts results are printed to it.
+ * In any case, the required results buffer size is assigned to
+ * parsed_results_bytes.
+ * The parsing status is returned.
+ */
+static enum dbg_status qed_parse_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+						 u32 *dump_buf,
+						 u32 num_dumped_dwords,
+						 char *results_buf,
+						 u32 *parsed_results_bytes)
+{
+	u32 results_offset = 0, num_section_params, param_num_val, i;
+	const char *param_name, *param_str_val, *section_name;
+	bool last_section_found = false;
+
+	*parsed_results_bytes = 0;
+
+	/* Read global_params section */
+	dump_buf += qed_read_section_hdr(dump_buf,
+					 &section_name, &num_section_params);
+	if (strcmp(section_name, "global_params"))
+		return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+
+	/* Print global params */
+	dump_buf += qed_print_section_params(dump_buf,
+					     num_section_params,
+					     results_buf, &results_offset);
+	while (!last_section_found) {
+		const char *storm_letter = NULL;
+		u32 storm_dump_size = 0;
+
+		dump_buf += qed_read_section_hdr(dump_buf,
+						 &section_name,
+						 &num_section_params);
+		if (!strcmp(section_name, "last")) {
+			last_section_found = true;
+			continue;
+		} else if (strcmp(section_name, "fw_asserts")) {
+			return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+		}
+
+		/* Extract params */
+		for (i = 0; i < num_section_params; i++) {
+			dump_buf += qed_read_param(dump_buf,
+						   &param_name,
+						   &param_str_val,
+						   &param_num_val);
+			if (!strcmp(param_name, "storm"))
+				storm_letter = param_str_val;
+			else if (!strcmp(param_name, "size"))
+				storm_dump_size = param_num_val;
+			else
+				return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+		}
+
+		if (!storm_letter || !storm_dump_size)
+			return DBG_STATUS_FW_ASSERTS_PARSE_FAILED;
+
+		/* Print data */
+		results_offset += sprintf(qed_get_buf_ptr(results_buf,
+							  results_offset),
+					  "\n%sSTORM_ASSERT: size=%d\n",
+					  storm_letter, storm_dump_size);
+		for (i = 0; i < storm_dump_size; i++, dump_buf++)
+			results_offset +=
+			    sprintf(qed_get_buf_ptr(results_buf,
+						    results_offset),
+				    "%08x\n", *dump_buf);
+	}
+
+	/* Add 1 for string NULL termination */
+	*parsed_results_bytes = results_offset + 1;
+	return DBG_STATUS_OK;
+}
+
+enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
+						    u32 *dump_buf,
+						    u32 num_dumped_dwords,
+						    u32 *results_buf_size)
+{
+	return qed_parse_fw_asserts_dump(p_hwfn,
+					 dump_buf,
+					 num_dumped_dwords,
+					 NULL, results_buf_size);
+}
+
+enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
+					     u32 *dump_buf,
+					     u32 num_dumped_dwords,
+					     char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_fw_asserts_dump(p_hwfn,
+					 dump_buf,
+					 num_dumped_dwords,
+					 results_buf, &parsed_buf_size);
+}
+
+/* Wrapper for unifying the idle_chk and mcp_trace api */
+enum dbg_status qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn,
+						   u32 *dump_buf,
+						   u32 num_dumped_dwords,
+						   char *results_buf)
+{
+	u32 num_errors, num_warnnings;
+
+	return qed_print_idle_chk_results(p_hwfn, dump_buf, num_dumped_dwords,
+					  results_buf, &num_errors,
+					  &num_warnnings);
+}
+
+/* Feature meta data lookup table */
+static struct {
+	char *name;
+	enum dbg_status (*get_size)(struct qed_hwfn *p_hwfn,
+				    struct qed_ptt *p_ptt, u32 *size);
+	enum dbg_status (*perform_dump)(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt, u32 *dump_buf,
+					u32 buf_size, u32 *dumped_dwords);
+	enum dbg_status (*print_results)(struct qed_hwfn *p_hwfn,
+					 u32 *dump_buf, u32 num_dumped_dwords,
+					 char *results_buf);
+	enum dbg_status (*results_buf_size)(struct qed_hwfn *p_hwfn,
+					    u32 *dump_buf,
+					    u32 num_dumped_dwords,
+					    u32 *results_buf_size);
+} qed_features_lookup[] = {
+	{
+	"grc", qed_dbg_grc_get_dump_buf_size,
+		    qed_dbg_grc_dump, NULL, NULL}, {
+	"idle_chk",
+		    qed_dbg_idle_chk_get_dump_buf_size,
+		    qed_dbg_idle_chk_dump,
+		    qed_print_idle_chk_results_wrapper,
+		    qed_get_idle_chk_results_buf_size}, {
+	"mcp_trace",
+		    qed_dbg_mcp_trace_get_dump_buf_size,
+		    qed_dbg_mcp_trace_dump, qed_print_mcp_trace_results,
+		    qed_get_mcp_trace_results_buf_size}, {
+	"reg_fifo",
+		    qed_dbg_reg_fifo_get_dump_buf_size,
+		    qed_dbg_reg_fifo_dump, qed_print_reg_fifo_results,
+		    qed_get_reg_fifo_results_buf_size}, {
+	"igu_fifo",
+		    qed_dbg_igu_fifo_get_dump_buf_size,
+		    qed_dbg_igu_fifo_dump, qed_print_igu_fifo_results,
+		    qed_get_igu_fifo_results_buf_size}, {
+	"protection_override",
+		    qed_dbg_protection_override_get_dump_buf_size,
+		    qed_dbg_protection_override_dump,
+		    qed_print_protection_override_results,
+		    qed_get_protection_override_results_buf_size}, {
+	"fw_asserts",
+		    qed_dbg_fw_asserts_get_dump_buf_size,
+		    qed_dbg_fw_asserts_dump,
+		    qed_print_fw_asserts_results,
+		    qed_get_fw_asserts_results_buf_size},};
+
+static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size)
+{
+	u32 i, precision = 80;
+
+	if (!p_text_buf)
+		return;
+
+	pr_notice("\n%.*s", precision, p_text_buf);
+	for (i = precision; i < text_size; i += precision)
+		pr_cont("%.*s", precision, p_text_buf + i);
+	pr_cont("\n");
+}
+
+#define QED_RESULTS_BUF_MIN_SIZE 16
+/* Generic function for decoding debug feature info */
+enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
+			       enum qed_dbg_features feature_idx)
+{
+	struct qed_dbg_feature *feature =
+	    &p_hwfn->cdev->dbg_params.features[feature_idx];
+	u32 text_size_bytes, null_char_pos, i;
+	enum dbg_status rc;
+	char *text_buf;
+
+	/* Check if feature supports formatting capability */
+	if (!qed_features_lookup[feature_idx].results_buf_size)
+		return DBG_STATUS_OK;
+
+	/* Obtain size of formatted output */
+	rc = qed_features_lookup[feature_idx].
+		results_buf_size(p_hwfn, (u32 *)feature->dump_buf,
+				 feature->dumped_dwords, &text_size_bytes);
+	if (rc != DBG_STATUS_OK)
+		return rc;
+
+	/* Make sure that the allocated size is a multiple of dword (4 bytes) */
+	null_char_pos = text_size_bytes - 1;
+	text_size_bytes = (text_size_bytes + 3) & ~0x3;
+
+	if (text_size_bytes < QED_RESULTS_BUF_MIN_SIZE) {
+		DP_NOTICE(p_hwfn->cdev,
+			  "formatted size of feature was too small %d. Aborting\n",
+			  text_size_bytes);
+		return DBG_STATUS_INVALID_ARGS;
+	}
+
+	/* Allocate temp text buf */
+	text_buf = vzalloc(text_size_bytes);
+	if (!text_buf)
+		return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+	/* Decode feature opcodes to string on temp buf */
+	rc = qed_features_lookup[feature_idx].
+		print_results(p_hwfn, (u32 *)feature->dump_buf,
+			      feature->dumped_dwords, text_buf);
+	if (rc != DBG_STATUS_OK) {
+		vfree(text_buf);
+		return rc;
+	}
+
+	/* Replace the original null character with a '\n' character.
+	 * The bytes that were added as a result of the dword alignment are also
+	 * padded with '\n' characters.
+	 */
+	for (i = null_char_pos; i < text_size_bytes; i++)
+		text_buf[i] = '\n';
+
+	/* Dump printable feature to log */
+	if (p_hwfn->cdev->dbg_params.print_data)
+		qed_dbg_print_feature(text_buf, text_size_bytes);
+
+	/* Free the old dump_buf and point the dump_buf to the newly allocagted
+	 * and formatted text buffer.
+	 */
+	vfree(feature->dump_buf);
+	feature->dump_buf = text_buf;
+	feature->buf_size = text_size_bytes;
+	feature->dumped_dwords = text_size_bytes / 4;
+	return rc;
+}
+
+/* Generic function for performing the dump of a debug feature. */
+enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+			     enum qed_dbg_features feature_idx)
+{
+	struct qed_dbg_feature *feature =
+	    &p_hwfn->cdev->dbg_params.features[feature_idx];
+	u32 buf_size_dwords;
+	enum dbg_status rc;
+
+	DP_NOTICE(p_hwfn->cdev, "Collecting a debug feature [\"%s\"]\n",
+		  qed_features_lookup[feature_idx].name);
+
+	/* Dump_buf was already allocated need to free (this can happen if dump
+	 * was called but file was never read).
+	 * We can't use the buffer as is since size may have changed.
+	 */
+	if (feature->dump_buf) {
+		vfree(feature->dump_buf);
+		feature->dump_buf = NULL;
+	}
+
+	/* Get buffer size from hsi, allocate accordingly, and perform the
+	 * dump.
+	 */
+	rc = qed_features_lookup[feature_idx].get_size(p_hwfn, p_ptt,
+						       &buf_size_dwords);
+	if (rc != DBG_STATUS_OK)
+		return rc;
+	feature->buf_size = buf_size_dwords * sizeof(u32);
+	feature->dump_buf = vmalloc(feature->buf_size);
+	if (!feature->dump_buf)
+		return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+	rc = qed_features_lookup[feature_idx].
+		perform_dump(p_hwfn, p_ptt, (u32 *)feature->dump_buf,
+			     feature->buf_size / sizeof(u32),
+			     &feature->dumped_dwords);
+
+	/* If mcp is stuck we get DBG_STATUS_NVRAM_GET_IMAGE_FAILED error.
+	 * In this case the buffer holds valid binary data, but we wont able
+	 * to parse it (since parsing relies on data in NVRAM which is only
+	 * accessible when MFW is responsive). skip the formatting but return
+	 * success so that binary data is provided.
+	 */
+	if (rc == DBG_STATUS_NVRAM_GET_IMAGE_FAILED)
+		return DBG_STATUS_OK;
+
+	if (rc != DBG_STATUS_OK)
+		return rc;
+
+	/* Format output */
+	rc = format_feature(p_hwfn, feature_idx);
+	return rc;
+}
+
+int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+	return qed_dbg_feature(cdev, buffer, DBG_FEATURE_GRC, num_dumped_bytes);
+}
+
+int qed_dbg_grc_size(struct qed_dev *cdev)
+{
+	return qed_dbg_feature_size(cdev, DBG_FEATURE_GRC);
+}
+
+int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+	return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IDLE_CHK,
+			       num_dumped_bytes);
+}
+
+int qed_dbg_idle_chk_size(struct qed_dev *cdev)
+{
+	return qed_dbg_feature_size(cdev, DBG_FEATURE_IDLE_CHK);
+}
+
+int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+	return qed_dbg_feature(cdev, buffer, DBG_FEATURE_REG_FIFO,
+			       num_dumped_bytes);
+}
+
+int qed_dbg_reg_fifo_size(struct qed_dev *cdev)
+{
+	return qed_dbg_feature_size(cdev, DBG_FEATURE_REG_FIFO);
+}
+
+int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes)
+{
+	return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IGU_FIFO,
+			       num_dumped_bytes);
+}
+
+int qed_dbg_igu_fifo_size(struct qed_dev *cdev)
+{
+	return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO);
+}
+
+int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer,
+				u32 *num_dumped_bytes)
+{
+	return qed_dbg_feature(cdev, buffer, DBG_FEATURE_PROTECTION_OVERRIDE,
+			       num_dumped_bytes);
+}
+
+int qed_dbg_protection_override_size(struct qed_dev *cdev)
+{
+	return qed_dbg_feature_size(cdev, DBG_FEATURE_PROTECTION_OVERRIDE);
+}
+
+int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer,
+		       u32 *num_dumped_bytes)
+{
+	return qed_dbg_feature(cdev, buffer, DBG_FEATURE_FW_ASSERTS,
+			       num_dumped_bytes);
+}
+
+int qed_dbg_fw_asserts_size(struct qed_dev *cdev)
+{
+	return qed_dbg_feature_size(cdev, DBG_FEATURE_FW_ASSERTS);
+}
+
+int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer,
+		      u32 *num_dumped_bytes)
+{
+	return qed_dbg_feature(cdev, buffer, DBG_FEATURE_MCP_TRACE,
+			       num_dumped_bytes);
+}
+
+int qed_dbg_mcp_trace_size(struct qed_dev *cdev)
+{
+	return qed_dbg_feature_size(cdev, DBG_FEATURE_MCP_TRACE);
+}
+
+/* Defines the amount of bytes allocated for recording the length of debugfs
+ * feature buffer.
+ */
+#define REGDUMP_HEADER_SIZE			sizeof(u32)
+#define REGDUMP_HEADER_FEATURE_SHIFT		24
+#define REGDUMP_HEADER_ENGINE_SHIFT		31
+#define REGDUMP_HEADER_OMIT_ENGINE_SHIFT	30
+enum debug_print_features {
+	OLD_MODE = 0,
+	IDLE_CHK = 1,
+	GRC_DUMP = 2,
+	MCP_TRACE = 3,
+	REG_FIFO = 4,
+	PROTECTION_OVERRIDE = 5,
+	IGU_FIFO = 6,
+	PHY = 7,
+	FW_ASSERTS = 8,
+};
+
+static u32 qed_calc_regdump_header(enum debug_print_features feature,
+				   int engine, u32 feature_size, u8 omit_engine)
+{
+	/* Insert the engine, feature and mode inside the header and combine it
+	 * with feature size.
+	 */
+	return feature_size | (feature << REGDUMP_HEADER_FEATURE_SHIFT) |
+	       (omit_engine << REGDUMP_HEADER_OMIT_ENGINE_SHIFT) |
+	       (engine << REGDUMP_HEADER_ENGINE_SHIFT);
+}
+
+int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
+{
+	u8 cur_engine, omit_engine = 0, org_engine;
+	u32 offset = 0, feature_size;
+	int rc;
+
+	if (cdev->num_hwfns == 1)
+		omit_engine = 1;
+
+	org_engine = qed_get_debug_engine(cdev);
+	for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) {
+		/* Collect idle_chks and grcDump for each hw function */
+		DP_VERBOSE(cdev, QED_MSG_DEBUG,
+			   "obtaining idle_chk and grcdump for current engine\n");
+		qed_set_debug_engine(cdev, cur_engine);
+
+		/* First idle_chk */
+		rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset +
+				      REGDUMP_HEADER_SIZE, &feature_size);
+		if (!rc) {
+			*(u32 *)((u8 *)buffer + offset) =
+			    qed_calc_regdump_header(IDLE_CHK, cur_engine,
+						    feature_size, omit_engine);
+			offset += (feature_size + REGDUMP_HEADER_SIZE);
+		} else {
+			DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc);
+		}
+
+		/* Second idle_chk */
+		rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset +
+				      REGDUMP_HEADER_SIZE, &feature_size);
+		if (!rc) {
+			*(u32 *)((u8 *)buffer + offset) =
+			    qed_calc_regdump_header(IDLE_CHK, cur_engine,
+						    feature_size, omit_engine);
+			offset += (feature_size + REGDUMP_HEADER_SIZE);
+		} else {
+			DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc);
+		}
+
+		/* reg_fifo dump */
+		rc = qed_dbg_reg_fifo(cdev, (u8 *)buffer + offset +
+				      REGDUMP_HEADER_SIZE, &feature_size);
+		if (!rc) {
+			*(u32 *)((u8 *)buffer + offset) =
+			    qed_calc_regdump_header(REG_FIFO, cur_engine,
+						    feature_size, omit_engine);
+			offset += (feature_size + REGDUMP_HEADER_SIZE);
+		} else {
+			DP_ERR(cdev, "qed_dbg_reg_fifo failed. rc = %d\n", rc);
+		}
+
+		/* igu_fifo dump */
+		rc = qed_dbg_igu_fifo(cdev, (u8 *)buffer + offset +
+				      REGDUMP_HEADER_SIZE, &feature_size);
+		if (!rc) {
+			*(u32 *)((u8 *)buffer + offset) =
+			    qed_calc_regdump_header(IGU_FIFO, cur_engine,
+						    feature_size, omit_engine);
+			offset += (feature_size + REGDUMP_HEADER_SIZE);
+		} else {
+			DP_ERR(cdev, "qed_dbg_igu_fifo failed. rc = %d", rc);
+		}
+
+		/* protection_override dump */
+		rc = qed_dbg_protection_override(cdev, (u8 *)buffer + offset +
+						 REGDUMP_HEADER_SIZE,
+						 &feature_size);
+		if (!rc) {
+			*(u32 *)((u8 *)buffer + offset) =
+			    qed_calc_regdump_header(PROTECTION_OVERRIDE,
+						    cur_engine,
+						    feature_size, omit_engine);
+			offset += (feature_size + REGDUMP_HEADER_SIZE);
+		} else {
+			DP_ERR(cdev,
+			       "qed_dbg_protection_override failed. rc = %d\n",
+			       rc);
+		}
+
+		/* fw_asserts dump */
+		rc = qed_dbg_fw_asserts(cdev, (u8 *)buffer + offset +
+					REGDUMP_HEADER_SIZE, &feature_size);
+		if (!rc) {
+			*(u32 *)((u8 *)buffer + offset) =
+			    qed_calc_regdump_header(FW_ASSERTS, cur_engine,
+						    feature_size, omit_engine);
+			offset += (feature_size + REGDUMP_HEADER_SIZE);
+		} else {
+			DP_ERR(cdev, "qed_dbg_fw_asserts failed. rc = %d\n",
+			       rc);
+		}
+
+		/* GRC dump - must be last because when mcp stuck it will
+		 * clutter idle_chk, reg_fifo, ...
+		 */
+		rc = qed_dbg_grc(cdev, (u8 *)buffer + offset +
+				 REGDUMP_HEADER_SIZE, &feature_size);
+		if (!rc) {
+			*(u32 *)((u8 *)buffer + offset) =
+			    qed_calc_regdump_header(GRC_DUMP, cur_engine,
+						    feature_size, omit_engine);
+			offset += (feature_size + REGDUMP_HEADER_SIZE);
+		} else {
+			DP_ERR(cdev, "qed_dbg_grc failed. rc = %d", rc);
+		}
+	}
+
+	/* mcp_trace */
+	rc = qed_dbg_mcp_trace(cdev, (u8 *)buffer + offset +
+			       REGDUMP_HEADER_SIZE, &feature_size);
+	if (!rc) {
+		*(u32 *)((u8 *)buffer + offset) =
+		    qed_calc_regdump_header(MCP_TRACE, cur_engine,
+					    feature_size, omit_engine);
+		offset += (feature_size + REGDUMP_HEADER_SIZE);
+	} else {
+		DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc);
+	}
+
+	qed_set_debug_engine(cdev, org_engine);
+
+	return 0;
+}
+
+int qed_dbg_all_data_size(struct qed_dev *cdev)
+{
+	u8 cur_engine, org_engine;
+	u32 regs_len = 0;
+
+	org_engine = qed_get_debug_engine(cdev);
+	for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) {
+		/* Engine specific */
+		DP_VERBOSE(cdev, QED_MSG_DEBUG,
+			   "calculating idle_chk and grcdump register length for current engine\n");
+		qed_set_debug_engine(cdev, cur_engine);
+		regs_len += REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) +
+			    REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) +
+			    REGDUMP_HEADER_SIZE + qed_dbg_grc_size(cdev) +
+			    REGDUMP_HEADER_SIZE + qed_dbg_reg_fifo_size(cdev) +
+			    REGDUMP_HEADER_SIZE + qed_dbg_igu_fifo_size(cdev) +
+			    REGDUMP_HEADER_SIZE +
+			    qed_dbg_protection_override_size(cdev) +
+			    REGDUMP_HEADER_SIZE + qed_dbg_fw_asserts_size(cdev);
+	}
+
+	/* Engine common */
+	regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev);
+	qed_set_debug_engine(cdev, org_engine);
+
+	return regs_len;
+}
+
+int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
+		    enum qed_dbg_features feature, u32 *num_dumped_bytes)
+{
+	struct qed_hwfn *p_hwfn =
+		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+	struct qed_dbg_feature *qed_feature =
+		&cdev->dbg_params.features[feature];
+	enum dbg_status dbg_rc;
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	/* Acquire ptt */
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EINVAL;
+
+	/* Get dump */
+	dbg_rc = qed_dbg_dump(p_hwfn, p_ptt, feature);
+	if (dbg_rc != DBG_STATUS_OK) {
+		DP_VERBOSE(cdev, QED_MSG_DEBUG, "%s\n",
+			   qed_dbg_get_status_str(dbg_rc));
+		*num_dumped_bytes = 0;
+		rc = -EINVAL;
+		goto out;
+	}
+
+	DP_VERBOSE(cdev, QED_MSG_DEBUG,
+		   "copying debugfs feature to external buffer\n");
+	memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size);
+	*num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords *
+			    4;
+
+out:
+	qed_ptt_release(p_hwfn, p_ptt);
+	return rc;
+}
+
+int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
+{
+	struct qed_hwfn *p_hwfn =
+		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	struct qed_dbg_feature *qed_feature =
+		&cdev->dbg_params.features[feature];
+	u32 buf_size_dwords;
+	enum dbg_status rc;
+
+	if (!p_ptt)
+		return -EINVAL;
+
+	rc = qed_features_lookup[feature].get_size(p_hwfn, p_ptt,
+						   &buf_size_dwords);
+	if (rc != DBG_STATUS_OK)
+		buf_size_dwords = 0;
+
+	qed_ptt_release(p_hwfn, p_ptt);
+	qed_feature->buf_size = buf_size_dwords * sizeof(u32);
+	return qed_feature->buf_size;
+}
+
+u8 qed_get_debug_engine(struct qed_dev *cdev)
+{
+	return cdev->dbg_params.engine_for_debug;
+}
+
+void qed_set_debug_engine(struct qed_dev *cdev, int engine_number)
+{
+	DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n",
+		   engine_number);
+	cdev->dbg_params.engine_for_debug = engine_number;
+}
+
+void qed_dbg_pf_init(struct qed_dev *cdev)
+{
+	const u8 *dbg_values;
+
+	/* Debug values are after init values.
+	 * The offset is the first dword of the file.
+	 */
+	dbg_values = cdev->firmware->data + *(u32 *)cdev->firmware->data;
+	qed_dbg_set_bin_ptr((u8 *)dbg_values);
+	qed_dbg_user_set_bin_ptr((u8 *)dbg_values);
+}
+
+void qed_dbg_pf_exit(struct qed_dev *cdev)
+{
+	struct qed_dbg_feature *feature = NULL;
+	enum qed_dbg_features feature_idx;
+
+	/* Debug features' buffers may be allocated if debug feature was used
+	 * but dump wasn't called.
+	 */
+	for (feature_idx = 0; feature_idx < DBG_FEATURE_NUM; feature_idx++) {
+		feature = &cdev->dbg_params.features[feature_idx];
+		if (feature->dump_buf) {
+			vfree(feature->dump_buf);
+			feature->dump_buf = NULL;
+		}
+	}
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h
new file mode 100644
index 0000000..f872d73
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.h
@@ -0,0 +1,54 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_DEBUGFS_H
+#define _QED_DEBUGFS_H
+
+enum qed_dbg_features {
+	DBG_FEATURE_GRC,
+	DBG_FEATURE_IDLE_CHK,
+	DBG_FEATURE_MCP_TRACE,
+	DBG_FEATURE_REG_FIFO,
+	DBG_FEATURE_IGU_FIFO,
+	DBG_FEATURE_PROTECTION_OVERRIDE,
+	DBG_FEATURE_FW_ASSERTS,
+	DBG_FEATURE_NUM
+};
+
+int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes);
+int qed_dbg_grc_size(struct qed_dev *cdev);
+int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer,
+		     u32 *num_dumped_bytes);
+int qed_dbg_idle_chk_size(struct qed_dev *cdev);
+int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer,
+		     u32 *num_dumped_bytes);
+int qed_dbg_reg_fifo_size(struct qed_dev *cdev);
+int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer,
+		     u32 *num_dumped_bytes);
+int qed_dbg_igu_fifo_size(struct qed_dev *cdev);
+int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer,
+				u32 *num_dumped_bytes);
+int qed_dbg_protection_override_size(struct qed_dev *cdev);
+int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer,
+		       u32 *num_dumped_bytes);
+int qed_dbg_fw_asserts_size(struct qed_dev *cdev);
+int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer,
+		      u32 *num_dumped_bytes);
+int qed_dbg_mcp_trace_size(struct qed_dev *cdev);
+int qed_dbg_all_data(struct qed_dev *cdev, void *buffer);
+int qed_dbg_all_data_size(struct qed_dev *cdev);
+u8 qed_get_debug_engine(struct qed_dev *cdev);
+void qed_set_debug_engine(struct qed_dev *cdev, int engine_number);
+int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
+		    enum qed_dbg_features feature, u32 *num_dumped_bytes);
+int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature);
+
+void qed_dbg_pf_init(struct qed_dev *cdev);
+void qed_dbg_pf_exit(struct qed_dev *cdev);
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 0e4f4a9..754f6a9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -29,14 +29,18 @@
 #include "qed_hw.h"
 #include "qed_init_ops.h"
 #include "qed_int.h"
+#include "qed_ll2.h"
 #include "qed_mcp.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
 #include "qed_sriov.h"
 #include "qed_vf.h"
+#include "qed_roce.h"
 
-static spinlock_t qm_lock;
-static bool qm_lock_init = false;
+static DEFINE_SPINLOCK(qm_lock);
+
+#define QED_MIN_DPIS            (4)
+#define QED_MIN_PWM_REGION      (QED_WID_SIZE * QED_MIN_DPIS)
 
 /* API common to all protocols */
 enum BAR_ID {
@@ -44,8 +48,7 @@
 	BAR_ID_1        /* Used for doorbells */
 };
 
-static u32 qed_hw_bar_size(struct qed_hwfn	*p_hwfn,
-			   enum BAR_ID		bar_id)
+static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id)
 {
 	u32 bar_reg = (bar_id == BAR_ID_0 ?
 		       PGLUE_B_REG_PF_BAR0_SIZE : PGLUE_B_REG_PF_BAR1_SIZE);
@@ -70,8 +73,7 @@
 	}
 }
 
-void qed_init_dp(struct qed_dev *cdev,
-		 u32 dp_module, u8 dp_level)
+void qed_init_dp(struct qed_dev *cdev, u32 dp_module, u8 dp_level)
 {
 	u32 i;
 
@@ -150,6 +152,9 @@
 		qed_eq_free(p_hwfn, p_hwfn->p_eq);
 		qed_consq_free(p_hwfn, p_hwfn->p_consq);
 		qed_int_free(p_hwfn);
+#ifdef CONFIG_QED_LL2
+		qed_ll2_free(p_hwfn, p_hwfn->p_ll2_info);
+#endif
 		qed_iov_free(p_hwfn);
 		qed_dmae_info_free(p_hwfn);
 		qed_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info);
@@ -343,7 +348,6 @@
 	return 0;
 
 alloc_err:
-	DP_NOTICE(p_hwfn, "Failed to allocate memory for QM params\n");
 	qed_qm_info_free(p_hwfn);
 	return -ENOMEM;
 }
@@ -407,6 +411,9 @@
 
 int qed_resc_alloc(struct qed_dev *cdev)
 {
+#ifdef CONFIG_QED_LL2
+	struct qed_ll2_info *p_ll2_info;
+#endif
 	struct qed_consq *p_consq;
 	struct qed_eq *p_eq;
 	int i, rc = 0;
@@ -427,18 +434,12 @@
 				     RESC_NUM(p_hwfn, QED_L2_QUEUE);
 
 		p_hwfn->p_tx_cids = kzalloc(tx_size, GFP_KERNEL);
-		if (!p_hwfn->p_tx_cids) {
-			DP_NOTICE(p_hwfn,
-				  "Failed to allocate memory for Tx Cids\n");
+		if (!p_hwfn->p_tx_cids)
 			goto alloc_no_mem;
-		}
 
 		p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL);
-		if (!p_hwfn->p_rx_cids) {
-			DP_NOTICE(p_hwfn,
-				  "Failed to allocate memory for Rx Cids\n");
+		if (!p_hwfn->p_rx_cids)
 			goto alloc_no_mem;
-		}
 	}
 
 	for_each_hwfn(cdev, i) {
@@ -523,29 +524,29 @@
 			goto alloc_no_mem;
 		p_hwfn->p_consq = p_consq;
 
+#ifdef CONFIG_QED_LL2
+		if (p_hwfn->using_ll2) {
+			p_ll2_info = qed_ll2_alloc(p_hwfn);
+			if (!p_ll2_info)
+				goto alloc_no_mem;
+			p_hwfn->p_ll2_info = p_ll2_info;
+		}
+#endif
+
 		/* DMA info initialization */
 		rc = qed_dmae_info_alloc(p_hwfn);
-		if (rc) {
-			DP_NOTICE(p_hwfn,
-				  "Failed to allocate memory for dmae_info structure\n");
+		if (rc)
 			goto alloc_err;
-		}
 
 		/* DCBX initialization */
 		rc = qed_dcbx_info_alloc(p_hwfn);
-		if (rc) {
-			DP_NOTICE(p_hwfn,
-				  "Failed to allocate memory for dcbx structure\n");
+		if (rc)
 			goto alloc_err;
-		}
 	}
 
 	cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
-	if (!cdev->reset_stats) {
-		DP_NOTICE(cdev, "Failed to allocate reset statistics\n");
-		rc = -ENOMEM;
-		goto alloc_err;
-	}
+	if (!cdev->reset_stats)
+		goto alloc_no_mem;
 
 	return 0;
 
@@ -580,6 +581,10 @@
 		qed_int_setup(p_hwfn, p_hwfn->p_main_ptt);
 
 		qed_iov_setup(p_hwfn, p_hwfn->p_main_ptt);
+#ifdef CONFIG_QED_LL2
+		if (p_hwfn->using_ll2)
+			qed_ll2_setup(p_hwfn, p_hwfn->p_ll2_info);
+#endif
 	}
 }
 
@@ -605,9 +610,8 @@
 
 	/* Make sure notification is not set before initiating final cleanup */
 	if (REG_RD(p_hwfn, addr)) {
-		DP_NOTICE(
-			p_hwfn,
-			"Unexpected; Found final cleanup notification before initiating final cleanup\n");
+		DP_NOTICE(p_hwfn,
+			  "Unexpected; Found final cleanup notification before initiating final cleanup\n");
 		REG_WR(p_hwfn, addr, 0);
 	}
 
@@ -701,17 +705,14 @@
 				continue;
 
 			qed_init_cau_sb_entry(p_hwfn, &sb_entry,
-					      p_block->function_id,
-					      0, 0);
-			STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2,
-					 sb_entry);
+					      p_block->function_id, 0, 0);
+			STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2, sb_entry);
 		}
 	}
 }
 
 static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
-			      struct qed_ptt *p_ptt,
-			      int hw_mode)
+			      struct qed_ptt *p_ptt, int hw_mode)
 {
 	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
 	struct qed_qm_common_rt_init_params params;
@@ -759,7 +760,7 @@
 	qed_port_unpretend(p_hwfn, p_ptt);
 
 	rc = qed_init_run(p_hwfn, p_ptt, PHASE_ENGINE, ANY_PHASE_ID, hw_mode);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	qed_wr(p_hwfn, p_ptt, PSWRQ2_REG_L2P_VALIDATE_VFID, 0);
@@ -780,6 +781,9 @@
 		concrete_fid = qed_vfid_to_concrete(p_hwfn, vf_id);
 		qed_fid_pretend(p_hwfn, p_ptt, (u16) concrete_fid);
 		qed_wr(p_hwfn, p_ptt, CCFC_REG_STRONG_ENABLE_VF, 0x1);
+		qed_wr(p_hwfn, p_ptt, CCFC_REG_WEAK_ENABLE_VF, 0x0);
+		qed_wr(p_hwfn, p_ptt, TCFC_REG_STRONG_ENABLE_VF, 0x1);
+		qed_wr(p_hwfn, p_ptt, TCFC_REG_WEAK_ENABLE_VF, 0x0);
 	}
 	/* pretend to original PF */
 	qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
@@ -787,38 +791,141 @@
 	return rc;
 }
 
-static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    int hw_mode)
+static int
+qed_hw_init_dpi_size(struct qed_hwfn *p_hwfn,
+		     struct qed_ptt *p_ptt, u32 pwm_region_size, u32 n_cpus)
 {
+	u32 dpi_page_size_1, dpi_page_size_2, dpi_page_size;
+	u32 dpi_bit_shift, dpi_count;
+	u32 min_dpis;
+
+	/* Calculate DPI size */
+	dpi_page_size_1 = QED_WID_SIZE * n_cpus;
+	dpi_page_size_2 = max_t(u32, QED_WID_SIZE, PAGE_SIZE);
+	dpi_page_size = max_t(u32, dpi_page_size_1, dpi_page_size_2);
+	dpi_page_size = roundup_pow_of_two(dpi_page_size);
+	dpi_bit_shift = ilog2(dpi_page_size / 4096);
+
+	dpi_count = pwm_region_size / dpi_page_size;
+
+	min_dpis = p_hwfn->pf_params.rdma_pf_params.min_dpis;
+	min_dpis = max_t(u32, QED_MIN_DPIS, min_dpis);
+
+	p_hwfn->dpi_size = dpi_page_size;
+	p_hwfn->dpi_count = dpi_count;
+
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPI_BIT_SHIFT, dpi_bit_shift);
+
+	if (dpi_count < min_dpis)
+		return -EINVAL;
+
+	return 0;
+}
+
+enum QED_ROCE_EDPM_MODE {
+	QED_ROCE_EDPM_MODE_ENABLE = 0,
+	QED_ROCE_EDPM_MODE_FORCE_ON = 1,
+	QED_ROCE_EDPM_MODE_DISABLE = 2,
+};
+
+static int
+qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 pwm_regsize, norm_regsize;
+	u32 non_pwm_conn, min_addr_reg1;
+	u32 db_bar_size, n_cpus;
+	u32 roce_edpm_mode;
+	u32 pf_dems_shift;
 	int rc = 0;
+	u8 cond;
 
-	rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
-	if (rc != 0)
-		return rc;
+	db_bar_size = qed_hw_bar_size(p_hwfn, BAR_ID_1);
+	if (p_hwfn->cdev->num_hwfns > 1)
+		db_bar_size /= 2;
 
-	if (hw_mode & (1 << MODE_MF_SI)) {
-		u8 pf_id = 0;
+	/* Calculate doorbell regions */
+	non_pwm_conn = qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_CORE) +
+		       qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_CORE,
+						   NULL) +
+		       qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH,
+						   NULL);
+	norm_regsize = roundup(QED_PF_DEMS_SIZE * non_pwm_conn, 4096);
+	min_addr_reg1 = norm_regsize / 4096;
+	pwm_regsize = db_bar_size - norm_regsize;
 
-		if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) {
-			DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
-				   "PF[%08x] is first eth on engine\n", pf_id);
-
-			/* We should have configured BIT for ppfid, i.e., the
-			 * relative function number in the port. But there's a
-			 * bug in LLH in BB where the ppfid is actually engine
-			 * based, so we need to take this into account.
-			 */
-			qed_wr(p_hwfn, p_ptt,
-			       NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR, 1 << pf_id);
-		}
-
-		/* Take the protocol-based hit vector if there is a hit,
-		 * otherwise take the other vector.
-		 */
-		qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_CLS_TYPE_DUALMODE, 0x2);
+	/* Check that the normal and PWM sizes are valid */
+	if (db_bar_size < norm_regsize) {
+		DP_ERR(p_hwfn->cdev,
+		       "Doorbell BAR size 0x%x is too small (normal region is 0x%0x )\n",
+		       db_bar_size, norm_regsize);
+		return -EINVAL;
 	}
-	return rc;
+
+	if (pwm_regsize < QED_MIN_PWM_REGION) {
+		DP_ERR(p_hwfn->cdev,
+		       "PWM region size 0x%0x is too small. Should be at least 0x%0x (Doorbell BAR size is 0x%x and normal region size is 0x%0x)\n",
+		       pwm_regsize,
+		       QED_MIN_PWM_REGION, db_bar_size, norm_regsize);
+		return -EINVAL;
+	}
+
+	/* Calculate number of DPIs */
+	roce_edpm_mode = p_hwfn->pf_params.rdma_pf_params.roce_edpm_mode;
+	if ((roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE) ||
+	    ((roce_edpm_mode == QED_ROCE_EDPM_MODE_FORCE_ON))) {
+		/* Either EDPM is mandatory, or we are attempting to allocate a
+		 * WID per CPU.
+		 */
+		n_cpus = num_active_cpus();
+		rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus);
+	}
+
+	cond = (rc && (roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE)) ||
+	       (roce_edpm_mode == QED_ROCE_EDPM_MODE_DISABLE);
+	if (cond || p_hwfn->dcbx_no_edpm) {
+		/* Either EDPM is disabled from user configuration, or it is
+		 * disabled via DCBx, or it is not mandatory and we failed to
+		 * allocated a WID per CPU.
+		 */
+		n_cpus = 1;
+		rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus);
+
+		if (cond)
+			qed_rdma_dpm_bar(p_hwfn, p_ptt);
+	}
+
+	DP_INFO(p_hwfn,
+		"doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n",
+		norm_regsize,
+		pwm_regsize,
+		p_hwfn->dpi_size,
+		p_hwfn->dpi_count,
+		((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ?
+		"disabled" : "enabled");
+
+	if (rc) {
+		DP_ERR(p_hwfn,
+		       "Failed to allocate enough DPIs. Allocated %d but the current minimum is %d.\n",
+		       p_hwfn->dpi_count,
+		       p_hwfn->pf_params.rdma_pf_params.min_dpis);
+		return -EINVAL;
+	}
+
+	p_hwfn->dpi_start_offset = norm_regsize;
+
+	/* DEMS size is configured log2 of DWORDs, hence the division by 4 */
+	pf_dems_shift = ilog2(QED_PF_DEMS_SIZE / 4);
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_ICID_BIT_SHIFT_NORM, pf_dems_shift);
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_MIN_ADDR_REG1, min_addr_reg1);
+
+	return 0;
+}
+
+static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt, int hw_mode)
+{
+	return qed_init_run(p_hwfn, p_ptt, PHASE_PORT,
+			    p_hwfn->port_id, hw_mode);
 }
 
 static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
@@ -848,7 +955,7 @@
 	qed_int_igu_init_rt(p_hwfn);
 
 	/* Set VLAN in NIG if needed */
-	if (hw_mode & (1 << MODE_MF_SD)) {
+	if (hw_mode & BIT(MODE_MF_SD)) {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "Configuring LLH_FUNC_TAG\n");
 		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET, 1);
 		STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET,
@@ -856,7 +963,7 @@
 	}
 
 	/* Enable classification by MAC if needed */
-	if (hw_mode & (1 << MODE_MF_SI)) {
+	if (hw_mode & BIT(MODE_MF_SI)) {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
 			   "Configuring TAGMAC_CLS_TYPE\n");
 		STORE_RT_REG(p_hwfn,
@@ -871,7 +978,7 @@
 
 	/* Cleanup chip from previous driver if such remains exist */
 	rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	/* PF Init sequence */
@@ -887,20 +994,9 @@
 	/* Pure runtime initializations - directly to the HW  */
 	qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true);
 
-	if (hw_mode & (1 << MODE_MF_SI)) {
-		u8 pf_id = 0;
-		u32 val = 0;
-
-		if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) {
-			if (p_hwfn->rel_pf_id == pf_id) {
-				DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
-					   "PF[%d] is first ETH on engine\n",
-					   pf_id);
-				val = 1;
-			}
-			qed_wr(p_hwfn, p_ptt, PRS_REG_MSG_INFO, val);
-		}
-	}
+	rc = qed_hw_init_pf_doorbell_bar(p_hwfn, p_ptt);
+	if (rc)
+		return rc;
 
 	if (b_hw_start) {
 		/* enable interrupts */
@@ -950,8 +1046,7 @@
 	/* Read shadow of current MFW mailbox */
 	qed_mcp_read_mb(p_hwfn, p_main_ptt);
 	memcpy(p_hwfn->mcp_info->mfw_mb_shadow,
-	       p_hwfn->mcp_info->mfw_mb_cur,
-	       p_hwfn->mcp_info->mfw_mb_length);
+	       p_hwfn->mcp_info->mfw_mb_cur, p_hwfn->mcp_info->mfw_mb_length);
 }
 
 int qed_hw_init(struct qed_dev *cdev,
@@ -971,7 +1066,7 @@
 
 	if (IS_PF(cdev)) {
 		rc = qed_init_fw_data(cdev, bin_fw_data);
-		if (rc != 0)
+		if (rc)
 			return rc;
 	}
 
@@ -988,8 +1083,7 @@
 
 		qed_calc_hw_mode(p_hwfn);
 
-		rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt,
-				      &load_code);
+		rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt, &load_code);
 		if (rc) {
 			DP_NOTICE(p_hwfn, "Failed sending LOAD_REQ command\n");
 			return rc;
@@ -1004,11 +1098,6 @@
 		p_hwfn->first_on_engine = (load_code ==
 					   FW_MSG_CODE_DRV_LOAD_ENGINE);
 
-		if (!qm_lock_init) {
-			spin_lock_init(&qm_lock);
-			qm_lock_init = true;
-		}
-
 		switch (load_code) {
 		case FW_MSG_CODE_DRV_LOAD_ENGINE:
 			rc = qed_hw_init_common(p_hwfn, p_hwfn->p_main_ptt,
@@ -1071,9 +1160,8 @@
 }
 
 #define QED_HW_STOP_RETRY_LIMIT (10)
-static inline void qed_hw_timers_stop(struct qed_dev *cdev,
-				      struct qed_hwfn *p_hwfn,
-				      struct qed_ptt *p_ptt)
+static void qed_hw_timers_stop(struct qed_dev *cdev,
+			       struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	int i;
 
@@ -1084,8 +1172,7 @@
 	for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) {
 		if ((!qed_rd(p_hwfn, p_ptt,
 			     TM_REG_PF_SCAN_ACTIVE_CONN)) &&
-		    (!qed_rd(p_hwfn, p_ptt,
-			     TM_REG_PF_SCAN_ACTIVE_TASK)))
+		    (!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_TASK)))
 			break;
 
 		/* Dependent on number of connection/tasks, possibly
@@ -1190,8 +1277,7 @@
 		}
 
 		DP_VERBOSE(p_hwfn,
-			   NETIF_MSG_IFDOWN,
-			   "Shutting down the fastpath\n");
+			   NETIF_MSG_IFDOWN, "Shutting down the fastpath\n");
 
 		qed_wr(p_hwfn, p_ptt,
 		       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x1);
@@ -1219,14 +1305,13 @@
 	       NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
 }
 
-static int qed_reg_assert(struct qed_hwfn *hwfn,
-			  struct qed_ptt *ptt, u32 reg,
-			  bool expected)
+static int qed_reg_assert(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u32 reg, bool expected)
 {
-	u32 assert_val = qed_rd(hwfn, ptt, reg);
+	u32 assert_val = qed_rd(p_hwfn, p_ptt, reg);
 
 	if (assert_val != expected) {
-		DP_NOTICE(hwfn, "Value at address 0x%x != 0x%08x\n",
+		DP_NOTICE(p_hwfn, "Value at address 0x%08x != 0x%08x\n",
 			  reg, expected);
 		return -EINVAL;
 	}
@@ -1306,8 +1391,7 @@
 
 	/* Clean Previous errors if such exist */
 	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
-	       PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR,
-	       1 << p_hwfn->abs_pf_id);
+	       PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id);
 
 	/* enable internal target-read */
 	qed_wr(p_hwfn, p_hwfn->p_main_ptt,
@@ -1317,7 +1401,8 @@
 static void get_function_id(struct qed_hwfn *p_hwfn)
 {
 	/* ME Register */
-	p_hwfn->hw_info.opaque_fid = (u16)REG_RD(p_hwfn, PXP_PF_ME_OPAQUE_ADDR);
+	p_hwfn->hw_info.opaque_fid = (u16) REG_RD(p_hwfn,
+						  PXP_PF_ME_OPAQUE_ADDR);
 
 	p_hwfn->hw_info.concrete_fid = REG_RD(p_hwfn, PXP_PF_ME_CONCRETE_ADDR);
 
@@ -1326,6 +1411,10 @@
 				      PXP_CONCRETE_FID_PFID);
 	p_hwfn->port_id = GET_FIELD(p_hwfn->hw_info.concrete_fid,
 				    PXP_CONCRETE_FID_PORT);
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
+		   "Read ME register: Concrete 0x%08x Opaque 0x%04x\n",
+		   p_hwfn->hw_info.concrete_fid, p_hwfn->hw_info.opaque_fid);
 }
 
 static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
@@ -1333,6 +1422,19 @@
 	u32 *feat_num = p_hwfn->hw_info.feat_num;
 	int num_features = 1;
 
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+	/* Roce CNQ each requires: 1 status block + 1 CNQ. We divide the
+	 * status blocks equally between L2 / RoCE but with consideration as
+	 * to how many l2 queues / cnqs we have
+	 */
+	if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) {
+		num_features++;
+
+		feat_num[QED_RDMA_CNQ] =
+			min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features,
+			      RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM));
+	}
+#endif
 	feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) /
 						num_features,
 					RESC_NUM(p_hwfn, QED_L2_QUEUE));
@@ -1373,6 +1475,10 @@
 	resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) /
 			     num_funcs;
 	resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs;
+	resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs;
+	resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs;
+	resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB /
+					 num_funcs;
 
 	for (i = 0; i < QED_MAX_RESC; i++)
 		resc_start[i] = resc_num[i] * enabled_func_idx;
@@ -1396,7 +1502,8 @@
 		   "RL = %d start = %d\n"
 		   "MAC = %d start = %d\n"
 		   "VLAN = %d start = %d\n"
-		   "ILT = %d start = %d\n",
+		   "ILT = %d start = %d\n"
+		   "LL2_QUEUE = %d start = %d\n",
 		   p_hwfn->hw_info.resc_num[QED_SB],
 		   p_hwfn->hw_info.resc_start[QED_SB],
 		   p_hwfn->hw_info.resc_num[QED_L2_QUEUE],
@@ -1412,13 +1519,14 @@
 		   p_hwfn->hw_info.resc_num[QED_VLAN],
 		   p_hwfn->hw_info.resc_start[QED_VLAN],
 		   p_hwfn->hw_info.resc_num[QED_ILT],
-		   p_hwfn->hw_info.resc_start[QED_ILT]);
+		   p_hwfn->hw_info.resc_start[QED_ILT],
+		   RESC_NUM(p_hwfn, QED_LL2_QUEUE),
+		   RESC_START(p_hwfn, QED_LL2_QUEUE));
 
 	return 0;
 }
 
-static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt)
+static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg;
 	u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities;
@@ -1472,8 +1580,7 @@
 		p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X25G;
 		break;
 	default:
-		DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n",
-			  core_cfg);
+		DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n", core_cfg);
 		break;
 	}
 
@@ -1484,11 +1591,11 @@
 	link_temp = qed_rd(p_hwfn, p_ptt,
 			   port_cfg_addr +
 			   offsetof(struct nvm_cfg1_port, speed_cap_mask));
-	link->speed.advertised_speeds =
-		link_temp & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK;
+	link_temp &= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK;
+	link->speed.advertised_speeds = link_temp;
 
-	p_hwfn->mcp_info->link_capabilities.speed_capabilities =
-						link->speed.advertised_speeds;
+	link_temp = link->speed.advertised_speeds;
+	p_hwfn->mcp_info->link_capabilities.speed_capabilities = link_temp;
 
 	link_temp = qed_rd(p_hwfn, p_ptt,
 			   port_cfg_addr +
@@ -1517,8 +1624,7 @@
 		link->speed.forced_speed = 100000;
 		break;
 	default:
-		DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n",
-			  link_temp);
+		DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", link_temp);
 	}
 
 	link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK;
@@ -1628,10 +1734,10 @@
 
 	DP_VERBOSE(p_hwfn,
 		   NETIF_MSG_PROBE,
-		   "PF [rel_id %d, abs_id %d] within the %d enabled functions on the engine\n",
+		   "PF [rel_id %d, abs_id %d] occupies index %d within the %d enabled functions on the engine\n",
 		   p_hwfn->rel_pf_id,
 		   p_hwfn->abs_pf_id,
-		   p_hwfn->num_funcs_on_engine);
+		   p_hwfn->enabled_func_idx, p_hwfn->num_funcs_on_engine);
 }
 
 static int
@@ -1703,10 +1809,9 @@
 	u32 tmp;
 
 	/* Read Vendor Id / Device Id */
-	pci_read_config_word(cdev->pdev, PCI_VENDOR_ID,
-			     &cdev->vendor_id);
-	pci_read_config_word(cdev->pdev, PCI_DEVICE_ID,
-			     &cdev->device_id);
+	pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, &cdev->vendor_id);
+	pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, &cdev->device_id);
+
 	cdev->chip_num = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
 				     MISCS_REG_CHIP_NUM);
 	cdev->chip_rev = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt,
@@ -1771,10 +1876,8 @@
 
 	/* Allocate PTT pool */
 	rc = qed_ptt_pool_alloc(p_hwfn);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to prepare hwfn's hw\n");
+	if (rc)
 		goto err0;
-	}
 
 	/* Allocate the main PTT */
 	p_hwfn->p_main_ptt = qed_get_reserved_ptt(p_hwfn, RESERVED_PTT_MAIN);
@@ -1782,7 +1885,7 @@
 	/* First hwfn learns basic information, e.g., number of hwfns */
 	if (!p_hwfn->my_id) {
 		rc = qed_get_dev_info(p_hwfn->cdev);
-		if (rc != 0)
+		if (rc)
 			goto err1;
 	}
 
@@ -1804,10 +1907,8 @@
 
 	/* Allocate the init RT array and initialize the init-ops engine */
 	rc = qed_init_alloc(p_hwfn);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to allocate the init array\n");
+	if (rc)
 		goto err2;
-	}
 
 	return rc;
 err2:
@@ -2015,10 +2116,8 @@
 		p_virt = dma_alloc_coherent(&cdev->pdev->dev,
 					    QED_CHAIN_PAGE_SIZE,
 					    &p_phys, GFP_KERNEL);
-		if (!p_virt) {
-			DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+		if (!p_virt)
 			return -ENOMEM;
-		}
 
 		if (i == 0) {
 			qed_chain_init_mem(p_chain, p_virt, p_phys);
@@ -2048,10 +2147,8 @@
 
 	p_virt = dma_alloc_coherent(&cdev->pdev->dev,
 				    QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL);
-	if (!p_virt) {
-		DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+	if (!p_virt)
 		return -ENOMEM;
-	}
 
 	qed_chain_init_mem(p_chain, p_virt, p_phys);
 	qed_chain_reset(p_chain);
@@ -2068,13 +2165,9 @@
 	void *p_virt = NULL;
 
 	size = page_cnt * sizeof(*pp_virt_addr_tbl);
-	pp_virt_addr_tbl = vmalloc(size);
-	if (!pp_virt_addr_tbl) {
-		DP_NOTICE(cdev,
-			  "Failed to allocate memory for the chain virtual addresses table\n");
+	pp_virt_addr_tbl = vzalloc(size);
+	if (!pp_virt_addr_tbl)
 		return -ENOMEM;
-	}
-	memset(pp_virt_addr_tbl, 0, size);
 
 	/* The allocation of the PBL table is done with its full size, since it
 	 * is expected to be successive.
@@ -2087,19 +2180,15 @@
 					size, &p_pbl_phys, GFP_KERNEL);
 	qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys,
 			       pp_virt_addr_tbl);
-	if (!p_pbl_virt) {
-		DP_NOTICE(cdev, "Failed to allocate chain pbl memory\n");
+	if (!p_pbl_virt)
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < page_cnt; i++) {
 		p_virt = dma_alloc_coherent(&cdev->pdev->dev,
 					    QED_CHAIN_PAGE_SIZE,
 					    &p_phys, GFP_KERNEL);
-		if (!p_virt) {
-			DP_NOTICE(cdev, "Failed to allocate chain memory\n");
+		if (!p_virt)
 			return -ENOMEM;
-		}
 
 		if (i == 0) {
 			qed_chain_init_mem(p_chain, p_virt, p_phys);
@@ -2134,7 +2223,8 @@
 	rc = qed_chain_alloc_sanity_check(cdev, cnt_type, elem_size, page_cnt);
 	if (rc) {
 		DP_NOTICE(cdev,
-			  "Cannot allocate a chain with the given arguments:\n"
+			  "Cannot allocate a chain with the given arguments:\n");
+		DP_NOTICE(cdev,
 			  "[use_mode %d, mode %d, cnt_type %d, num_elems %d, elem_size %zu]\n",
 			  intended_use, mode, cnt_type, num_elems, elem_size);
 		return rc;
@@ -2183,8 +2273,7 @@
 	return 0;
 }
 
-int qed_fw_vport(struct qed_hwfn *p_hwfn,
-		 u8 src_id, u8 *dst_id)
+int qed_fw_vport(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
 {
 	if (src_id >= RESC_NUM(p_hwfn, QED_VPORT)) {
 		u8 min, max;
@@ -2203,8 +2292,7 @@
 	return 0;
 }
 
-int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
-		   u8 src_id, u8 *dst_id)
+int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
 {
 	if (src_id >= RESC_NUM(p_hwfn, QED_RSS_ENG)) {
 		u8 min, max;
@@ -2223,6 +2311,98 @@
 	return 0;
 }
 
+static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low,
+				  u8 *p_filter)
+{
+	*p_high = p_filter[1] | (p_filter[0] << 8);
+	*p_low = p_filter[5] | (p_filter[4] << 8) |
+		 (p_filter[3] << 16) | (p_filter[2] << 24);
+}
+
+int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt, u8 *p_filter)
+{
+	u32 high = 0, low = 0, en;
+	int i;
+
+	if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+		return 0;
+
+	qed_llh_mac_to_filter(&high, &low, p_filter);
+
+	/* Find a free entry and utilize it */
+	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+		en = qed_rd(p_hwfn, p_ptt,
+			    NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
+		if (en)
+			continue;
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_LLH_FUNC_FILTER_VALUE +
+		       2 * i * sizeof(u32), low);
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_LLH_FUNC_FILTER_VALUE +
+		       (2 * i + 1) * sizeof(u32), high);
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
+		       i * sizeof(u32), 0);
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
+		break;
+	}
+	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to find an empty LLH filter to utilize\n");
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+		   "mac: %pM is added at %d\n",
+		   p_filter, i);
+
+	return 0;
+}
+
+void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt, u8 *p_filter)
+{
+	u32 high = 0, low = 0;
+	int i;
+
+	if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
+		return;
+
+	qed_llh_mac_to_filter(&high, &low, p_filter);
+
+	/* Find the entry and clean it */
+	for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+		if (qed_rd(p_hwfn, p_ptt,
+			   NIG_REG_LLH_FUNC_FILTER_VALUE +
+			   2 * i * sizeof(u32)) != low)
+			continue;
+		if (qed_rd(p_hwfn, p_ptt,
+			   NIG_REG_LLH_FUNC_FILTER_VALUE +
+			   (2 * i + 1) * sizeof(u32)) != high)
+			continue;
+
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
+		qed_wr(p_hwfn, p_ptt,
+		       NIG_REG_LLH_FUNC_FILTER_VALUE +
+		       (2 * i + 1) * sizeof(u32), 0);
+
+		DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+			   "mac: %pM is removed from %d\n",
+			   p_filter, i);
+		break;
+	}
+	if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
+		DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
+}
+
 static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			    u32 hw_addr, void *p_eth_qzone,
 			    size_t eth_qzone_size, u8 timeset)
@@ -2386,8 +2566,7 @@
  * 3. total_req_min_rate [all vports min rate sum] shouldn't exceed min_pf_rate.
  */
 static int qed_init_wfq_param(struct qed_hwfn *p_hwfn,
-			      u16 vport_id, u32 req_rate,
-			      u32 min_pf_rate)
+			      u16 vport_id, u32 req_rate, u32 min_pf_rate)
 {
 	u32 total_req_min_rate = 0, total_left_rate = 0, left_rate_per_vp = 0;
 	int non_requested_count = 0, req_count = 0, i, num_vports;
@@ -2471,7 +2650,7 @@
 
 	rc = qed_init_wfq_param(p_hwfn, vp_id, rate, p_link->min_pf_rate);
 
-	if (rc == 0)
+	if (!rc)
 		qed_configure_wfq_for_all_vports(p_hwfn, p_ptt,
 						 p_link->min_pf_rate);
 	else
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 343bb03..b6711c1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -310,6 +310,26 @@
 		   u8 *dst_id);
 
 /**
+ * @brief qed_llh_add_mac_filter - configures a MAC filter in llh
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_filter - MAC to add
+ */
+int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
+			   struct qed_ptt *p_ptt, u8 *p_filter);
+
+/**
+ * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_filter - MAC to remove
+ */
+void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
+			       struct qed_ptt *p_ptt, u8 *p_filter);
+
+/**
  * *@brief Cleanup of previous driver remains prior to load
  *
  * @param p_hwfn
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 5927840..72eee29 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -536,6 +536,247 @@
 	struct regpair ustorm_st_padding[2];
 };
 
+enum core_error_handle {
+	LL2_DROP_PACKET,
+	LL2_DO_NOTHING,
+	LL2_ASSERT,
+	MAX_CORE_ERROR_HANDLE
+};
+
+enum core_event_opcode {
+	CORE_EVENT_TX_QUEUE_START,
+	CORE_EVENT_TX_QUEUE_STOP,
+	CORE_EVENT_RX_QUEUE_START,
+	CORE_EVENT_RX_QUEUE_STOP,
+	MAX_CORE_EVENT_OPCODE
+};
+
+enum core_l4_pseudo_checksum_mode {
+	CORE_L4_PSEUDO_CSUM_CORRECT_LENGTH,
+	CORE_L4_PSEUDO_CSUM_ZERO_LENGTH,
+	MAX_CORE_L4_PSEUDO_CHECKSUM_MODE
+};
+
+struct core_ll2_port_stats {
+	struct regpair gsi_invalid_hdr;
+	struct regpair gsi_invalid_pkt_length;
+	struct regpair gsi_unsupported_pkt_typ;
+	struct regpair gsi_crcchksm_error;
+};
+
+struct core_ll2_pstorm_per_queue_stat {
+	struct regpair sent_ucast_bytes;
+	struct regpair sent_mcast_bytes;
+	struct regpair sent_bcast_bytes;
+	struct regpair sent_ucast_pkts;
+	struct regpair sent_mcast_pkts;
+	struct regpair sent_bcast_pkts;
+};
+
+struct core_ll2_rx_prod {
+	__le16 bd_prod;
+	__le16 cqe_prod;
+	__le32 reserved;
+};
+
+struct core_ll2_tstorm_per_queue_stat {
+	struct regpair packet_too_big_discard;
+	struct regpair no_buff_discard;
+};
+
+struct core_ll2_ustorm_per_queue_stat {
+	struct regpair rcv_ucast_bytes;
+	struct regpair rcv_mcast_bytes;
+	struct regpair rcv_bcast_bytes;
+	struct regpair rcv_ucast_pkts;
+	struct regpair rcv_mcast_pkts;
+	struct regpair rcv_bcast_pkts;
+};
+
+enum core_ramrod_cmd_id {
+	CORE_RAMROD_UNUSED,
+	CORE_RAMROD_RX_QUEUE_START,
+	CORE_RAMROD_TX_QUEUE_START,
+	CORE_RAMROD_RX_QUEUE_STOP,
+	CORE_RAMROD_TX_QUEUE_STOP,
+	MAX_CORE_RAMROD_CMD_ID
+};
+
+enum core_roce_flavor_type {
+	CORE_ROCE,
+	CORE_RROCE,
+	MAX_CORE_ROCE_FLAVOR_TYPE
+};
+
+struct core_rx_action_on_error {
+	u8 error_type;
+#define CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG_MASK	0x3
+#define CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG_SHIFT 0
+#define CORE_RX_ACTION_ON_ERROR_NO_BUFF_MASK	0x3
+#define CORE_RX_ACTION_ON_ERROR_NO_BUFF_SHIFT	2
+#define CORE_RX_ACTION_ON_ERROR_RESERVED_MASK	0xF
+#define CORE_RX_ACTION_ON_ERROR_RESERVED_SHIFT	4
+};
+
+struct core_rx_bd {
+	struct regpair addr;
+	__le16 reserved[4];
+};
+
+struct core_rx_bd_with_buff_len {
+	struct regpair addr;
+	__le16 buff_length;
+	__le16 reserved[3];
+};
+
+union core_rx_bd_union {
+	struct core_rx_bd rx_bd;
+	struct core_rx_bd_with_buff_len rx_bd_with_len;
+};
+
+struct core_rx_cqe_opaque_data {
+	__le32 data[2];
+};
+
+enum core_rx_cqe_type {
+	CORE_RX_CQE_ILLIGAL_TYPE,
+	CORE_RX_CQE_TYPE_REGULAR,
+	CORE_RX_CQE_TYPE_GSI_OFFLOAD,
+	CORE_RX_CQE_TYPE_SLOW_PATH,
+	MAX_CORE_RX_CQE_TYPE
+};
+
+struct core_rx_fast_path_cqe {
+	u8 type;
+	u8 placement_offset;
+	struct parsing_and_err_flags parse_flags;
+	__le16 packet_length;
+	__le16 vlan;
+	struct core_rx_cqe_opaque_data opaque_data;
+	__le32 reserved[4];
+};
+
+struct core_rx_gsi_offload_cqe {
+	u8 type;
+	u8 data_length_error;
+	struct parsing_and_err_flags parse_flags;
+	__le16 data_length;
+	__le16 vlan;
+	__le32 src_mac_addrhi;
+	__le16 src_mac_addrlo;
+	u8 reserved1[2];
+	__le32 gid_dst[4];
+};
+
+struct core_rx_slow_path_cqe {
+	u8 type;
+	u8 ramrod_cmd_id;
+	__le16 echo;
+	__le32 reserved1[7];
+};
+
+union core_rx_cqe_union {
+	struct core_rx_fast_path_cqe rx_cqe_fp;
+	struct core_rx_gsi_offload_cqe rx_cqe_gsi;
+	struct core_rx_slow_path_cqe rx_cqe_sp;
+};
+
+struct core_rx_start_ramrod_data {
+	struct regpair bd_base;
+	struct regpair cqe_pbl_addr;
+	__le16 mtu;
+	__le16 sb_id;
+	u8 sb_index;
+	u8 complete_cqe_flg;
+	u8 complete_event_flg;
+	u8 drop_ttl0_flg;
+	__le16 num_of_pbl_pages;
+	u8 inner_vlan_removal_en;
+	u8 queue_id;
+	u8 main_func_queue;
+	u8 mf_si_bcast_accept_all;
+	u8 mf_si_mcast_accept_all;
+	struct core_rx_action_on_error action_on_error;
+	u8 gsi_offload_flag;
+	u8 reserved[7];
+};
+
+struct core_rx_stop_ramrod_data {
+	u8 complete_cqe_flg;
+	u8 complete_event_flg;
+	u8 queue_id;
+	u8 reserved1;
+	__le16 reserved2[2];
+};
+
+struct core_tx_bd_flags {
+	u8 as_bitfield;
+#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_MASK	0x1
+#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_SHIFT	0
+#define CORE_TX_BD_FLAGS_VLAN_INSERTION_MASK	0x1
+#define CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT	1
+#define CORE_TX_BD_FLAGS_START_BD_MASK	0x1
+#define CORE_TX_BD_FLAGS_START_BD_SHIFT	2
+#define CORE_TX_BD_FLAGS_IP_CSUM_MASK	0x1
+#define CORE_TX_BD_FLAGS_IP_CSUM_SHIFT	3
+#define CORE_TX_BD_FLAGS_L4_CSUM_MASK	0x1
+#define CORE_TX_BD_FLAGS_L4_CSUM_SHIFT	4
+#define CORE_TX_BD_FLAGS_IPV6_EXT_MASK	0x1
+#define CORE_TX_BD_FLAGS_IPV6_EXT_SHIFT	5
+#define CORE_TX_BD_FLAGS_L4_PROTOCOL_MASK	0x1
+#define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT	6
+#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK	0x1
+#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7
+#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK		0x1
+#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT	12
+
+};
+
+struct core_tx_bd {
+	struct regpair addr;
+	__le16 nbytes;
+	__le16 nw_vlan_or_lb_echo;
+	u8 bitfield0;
+#define CORE_TX_BD_NBDS_MASK	0xF
+#define CORE_TX_BD_NBDS_SHIFT	0
+#define CORE_TX_BD_ROCE_FLAV_MASK	0x1
+#define CORE_TX_BD_ROCE_FLAV_SHIFT	4
+#define CORE_TX_BD_RESERVED0_MASK	0x7
+#define CORE_TX_BD_RESERVED0_SHIFT	5
+	struct core_tx_bd_flags bd_flags;
+	__le16 bitfield1;
+#define CORE_TX_BD_L4_HDR_OFFSET_W_MASK	0x3FFF
+#define CORE_TX_BD_L4_HDR_OFFSET_W_SHIFT 0
+#define CORE_TX_BD_TX_DST_MASK	0x1
+#define CORE_TX_BD_TX_DST_SHIFT	14
+#define CORE_TX_BD_RESERVED1_MASK	0x1
+#define CORE_TX_BD_RESERVED1_SHIFT	15
+};
+
+enum core_tx_dest {
+	CORE_TX_DEST_NW,
+	CORE_TX_DEST_LB,
+	MAX_CORE_TX_DEST
+};
+
+struct core_tx_start_ramrod_data {
+	struct regpair pbl_base_addr;
+	__le16 mtu;
+	__le16 sb_id;
+	u8 sb_index;
+	u8 stats_en;
+	u8 stats_id;
+	u8 conn_type;
+	__le16 pbl_size;
+	__le16 qm_pq_id;
+	u8 gsi_offload_flag;
+	u8 resrved[3];
+};
+
+struct core_tx_stop_ramrod_data {
+	__le32 reserved0[2];
+};
+
 struct eth_mstorm_per_pf_stat {
 	struct regpair gre_discard_pkts;
 	struct regpair vxlan_discard_pkts;
@@ -636,9 +877,33 @@
 };
 
 /* Mstorm non-triggering VF zone */
+enum malicious_vf_error_id {
+	MALICIOUS_VF_NO_ERROR,
+	VF_PF_CHANNEL_NOT_READY,
+	VF_ZONE_MSG_NOT_VALID,
+	VF_ZONE_FUNC_NOT_ENABLED,
+	ETH_PACKET_TOO_SMALL,
+	ETH_ILLEGAL_VLAN_MODE,
+	ETH_MTU_VIOLATION,
+	ETH_ILLEGAL_INBAND_TAGS,
+	ETH_VLAN_INSERT_AND_INBAND_VLAN,
+	ETH_ILLEGAL_NBDS,
+	ETH_FIRST_BD_WO_SOP,
+	ETH_INSUFFICIENT_BDS,
+	ETH_ILLEGAL_LSO_HDR_NBDS,
+	ETH_ILLEGAL_LSO_MSS,
+	ETH_ZERO_SIZE_BD,
+	ETH_ILLEGAL_LSO_HDR_LEN,
+	ETH_INSUFFICIENT_PAYLOAD,
+	ETH_EDPM_OUT_OF_SYNC,
+	ETH_TUNN_IPV6_EXT_NBD_ERR,
+	ETH_CONTROL_PACKET_VIOLATION,
+	MAX_MALICIOUS_VF_ERROR_ID
+};
+
 struct mstorm_non_trigger_vf_zone {
 	struct eth_mstorm_per_queue_stat eth_queue_stat;
-	struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF];
+	struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD];
 };
 
 /* Mstorm VF zone */
@@ -705,13 +970,17 @@
 
 struct protocol_dcb_data {
 	u8 dcb_enable_flag;
+	u8 reserved_a;
 	u8 dcb_priority;
 	u8 dcb_tc;
-	u8 reserved;
+	u8 reserved_b;
+	u8 reserved0;
 };
 
 struct pf_update_tunnel_config {
 	u8 update_rx_pf_clss;
+	u8 update_rx_def_ucast_clss;
+	u8 update_rx_def_non_ucast_clss;
 	u8 update_tx_pf_clss;
 	u8 set_vxlan_udp_port_flg;
 	u8 set_geneve_udp_port_flg;
@@ -727,7 +996,7 @@
 	u8 tunnel_clss_ipgre;
 	__le16 vxlan_udp_port;
 	__le16 geneve_udp_port;
-	__le16 reserved[3];
+	__le16 reserved[2];
 };
 
 struct pf_update_ramrod_data {
@@ -736,16 +1005,17 @@
 	u8 update_fcoe_dcb_data_flag;
 	u8 update_iscsi_dcb_data_flag;
 	u8 update_roce_dcb_data_flag;
+	u8 update_rroce_dcb_data_flag;
 	u8 update_iwarp_dcb_data_flag;
 	u8 update_mf_vlan_flag;
-	u8 reserved;
 	struct protocol_dcb_data eth_dcb_data;
 	struct protocol_dcb_data fcoe_dcb_data;
 	struct protocol_dcb_data iscsi_dcb_data;
 	struct protocol_dcb_data roce_dcb_data;
+	struct protocol_dcb_data rroce_dcb_data;
 	struct protocol_dcb_data iwarp_dcb_data;
 	__le16 mf_vlan;
-	__le16 reserved2;
+	__le16 reserved;
 	struct pf_update_tunnel_config tunnel_config;
 };
 
@@ -766,10 +1036,14 @@
 	MAX_PROTOCOL_VERSION_ARRAY_KEY
 };
 
-/* Pstorm non-triggering VF zone */
+struct rdma_sent_stats {
+	struct regpair sent_bytes;
+	struct regpair sent_pkts;
+};
+
 struct pstorm_non_trigger_vf_zone {
 	struct eth_pstorm_per_queue_stat eth_queue_stat;
-	struct regpair reserved[2];
+	struct rdma_sent_stats rdma_stats;
 };
 
 /* Pstorm VF zone */
@@ -786,7 +1060,11 @@
 	__le16 echo;
 };
 
-/* Slowpath Element (SPQE) */
+struct rdma_rcv_stats {
+	struct regpair rcv_bytes;
+	struct regpair rcv_pkts;
+};
+
 struct slow_path_element {
 	struct ramrod_header hdr;
 	struct regpair data_ptr;
@@ -794,7 +1072,7 @@
 
 /* Tstorm non-triggering VF zone */
 struct tstorm_non_trigger_vf_zone {
-	struct regpair reserved[2];
+	struct rdma_rcv_stats rdma_stats;
 };
 
 struct tstorm_per_port_stat {
@@ -802,9 +1080,14 @@
 	struct regpair mac_error_discard;
 	struct regpair mftag_filter_discard;
 	struct regpair eth_mac_filter_discard;
-	struct regpair reserved[5];
+	struct regpair ll2_mac_filter_discard;
+	struct regpair ll2_conn_disabled_discard;
+	struct regpair iscsi_irregular_pkt;
+	struct regpair reserved;
+	struct regpair roce_irregular_pkt;
 	struct regpair eth_irregular_pkt;
-	struct regpair reserved1[2];
+	struct regpair reserved1;
+	struct regpair preroce_irregular_pkt;
 	struct regpair eth_gre_tunn_filter_discard;
 	struct regpair eth_vxlan_tunn_filter_discard;
 	struct regpair eth_geneve_tunn_filter_discard;
@@ -870,7 +1153,13 @@
 	__le32 reserved2;
 };
 
-/* Attentions status block */
+enum vf_zone_size_mode {
+	VF_ZONE_SIZE_MODE_DEFAULT,
+	VF_ZONE_SIZE_MODE_DOUBLE,
+	VF_ZONE_SIZE_MODE_QUAD,
+	MAX_VF_ZONE_SIZE_MODE
+};
+
 struct atten_status_block {
 	__le32 atten_bits;
 	__le32 atten_ack;
@@ -1442,13 +1731,6 @@
 	MAX_BIN_DBG_BUFFER_TYPE
 };
 
-/* Chip IDs */
-enum chip_ids {
-	CHIP_RESERVED,
-	CHIP_BB_B0,
-	CHIP_RESERVED2,
-	MAX_CHIP_IDS
-};
 
 /* Attention bit mapping */
 struct dbg_attn_bit_mapping {
@@ -1527,6 +1809,371 @@
 	MAX_DBG_ATTN_TYPE
 };
 
+/* condition header for registers dump */
+struct dbg_dump_cond_hdr {
+	struct dbg_mode_hdr mode; /* Mode header */
+	u8 block_id; /* block ID */
+	u8 data_size; /* size in dwords of the data following this header */
+};
+
+/* memory data for registers dump */
+struct dbg_dump_mem {
+	__le32 dword0;
+#define DBG_DUMP_MEM_ADDRESS_MASK       0xFFFFFF
+#define DBG_DUMP_MEM_ADDRESS_SHIFT      0
+#define DBG_DUMP_MEM_MEM_GROUP_ID_MASK  0xFF
+#define DBG_DUMP_MEM_MEM_GROUP_ID_SHIFT 24
+	__le32 dword1;
+#define DBG_DUMP_MEM_LENGTH_MASK        0xFFFFFF
+#define DBG_DUMP_MEM_LENGTH_SHIFT       0
+#define DBG_DUMP_MEM_RESERVED_MASK      0xFF
+#define DBG_DUMP_MEM_RESERVED_SHIFT     24
+};
+
+/* register data for registers dump */
+struct dbg_dump_reg {
+	__le32 data;
+#define DBG_DUMP_REG_ADDRESS_MASK  0xFFFFFF /* register address (in dwords) */
+#define DBG_DUMP_REG_ADDRESS_SHIFT 0
+#define DBG_DUMP_REG_LENGTH_MASK   0xFF /* register size (in dwords) */
+#define DBG_DUMP_REG_LENGTH_SHIFT  24
+};
+
+/* split header for registers dump */
+struct dbg_dump_split_hdr {
+	__le32 hdr;
+#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_MASK      0xFFFFFF
+#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_SHIFT     0
+#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_MASK  0xFF
+#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_SHIFT 24
+};
+
+/* condition header for idle check */
+struct dbg_idle_chk_cond_hdr {
+	struct dbg_mode_hdr mode; /* Mode header */
+	__le16 data_size; /* size in dwords of the data following this header */
+};
+
+/* Idle Check condition register */
+struct dbg_idle_chk_cond_reg {
+	__le32 data;
+#define DBG_IDLE_CHK_COND_REG_ADDRESS_MASK   0xFFFFFF
+#define DBG_IDLE_CHK_COND_REG_ADDRESS_SHIFT  0
+#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_MASK  0xFF
+#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_SHIFT 24
+	__le16 num_entries; /* number of registers entries to check */
+	u8 entry_size; /* size of registers entry (in dwords) */
+	u8 start_entry; /* index of the first entry to check */
+};
+
+/* Idle Check info register */
+struct dbg_idle_chk_info_reg {
+	__le32 data;
+#define DBG_IDLE_CHK_INFO_REG_ADDRESS_MASK   0xFFFFFF
+#define DBG_IDLE_CHK_INFO_REG_ADDRESS_SHIFT  0
+#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_MASK  0xFF
+#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_SHIFT 24
+	__le16 size; /* register size in dwords */
+	struct dbg_mode_hdr mode; /* Mode header */
+};
+
+/* Idle Check register */
+union dbg_idle_chk_reg {
+	struct dbg_idle_chk_cond_reg cond_reg; /* condition register */
+	struct dbg_idle_chk_info_reg info_reg; /* info register */
+};
+
+/* Idle Check result header */
+struct dbg_idle_chk_result_hdr {
+	__le16 rule_id; /* Failing rule index */
+	__le16 mem_entry_id; /* Failing memory entry index */
+	u8 num_dumped_cond_regs; /* number of dumped condition registers */
+	u8 num_dumped_info_regs; /* number of dumped condition registers */
+	u8 severity; /* from dbg_idle_chk_severity_types enum */
+	u8 reserved;
+};
+
+/* Idle Check result register header */
+struct dbg_idle_chk_result_reg_hdr {
+	u8 data;
+#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_MASK  0x1
+#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_SHIFT 0
+#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_MASK  0x7F
+#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_SHIFT 1
+	u8 start_entry; /* index of the first checked entry */
+	__le16 size; /* register size in dwords */
+};
+
+/* Idle Check rule */
+struct dbg_idle_chk_rule {
+	__le16 rule_id; /* Idle Check rule ID */
+	u8 severity; /* value from dbg_idle_chk_severity_types enum */
+	u8 cond_id; /* Condition ID */
+	u8 num_cond_regs; /* number of condition registers */
+	u8 num_info_regs; /* number of info registers */
+	u8 num_imms; /* number of immediates in the condition */
+	u8 reserved1;
+	__le16 reg_offset; /* offset of this rules registers in the idle check
+			    * register array (in dbg_idle_chk_reg units).
+			    */
+	__le16 imm_offset; /* offset of this rules immediate values in the
+			    * immediate values array (in dwords).
+			    */
+};
+
+/* Idle Check rule parsing data */
+struct dbg_idle_chk_rule_parsing_data {
+	__le32 data;
+#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_MASK  0x1
+#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_SHIFT 0
+#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_MASK  0x7FFFFFFF
+#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_SHIFT 1
+};
+
+/* idle check severity types */
+enum dbg_idle_chk_severity_types {
+	/* idle check failure should cause an error */
+	IDLE_CHK_SEVERITY_ERROR,
+	/* idle check failure should cause an error only if theres no traffic */
+	IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC,
+	/* idle check failure should cause a warning */
+	IDLE_CHK_SEVERITY_WARNING,
+	MAX_DBG_IDLE_CHK_SEVERITY_TYPES
+};
+
+/* Debug Bus block data */
+struct dbg_bus_block_data {
+	u8 enabled; /* Indicates if the block is enabled for recording (0/1) */
+	u8 hw_id; /* HW ID associated with the block */
+	u8 line_num; /* Debug line number to select */
+	u8 right_shift; /* Number of units to  right the debug data (0-3) */
+	u8 cycle_en; /* 4-bit value: bit i set -> unit i is enabled. */
+	u8 force_valid; /* 4-bit value: bit i set -> unit i is forced valid. */
+	u8 force_frame; /* 4-bit value: bit i set -> unit i frame bit is forced.
+			 */
+	u8 reserved;
+};
+
+/* Debug Bus Clients */
+enum dbg_bus_clients {
+	DBG_BUS_CLIENT_RBCN,
+	DBG_BUS_CLIENT_RBCP,
+	DBG_BUS_CLIENT_RBCR,
+	DBG_BUS_CLIENT_RBCT,
+	DBG_BUS_CLIENT_RBCU,
+	DBG_BUS_CLIENT_RBCF,
+	DBG_BUS_CLIENT_RBCX,
+	DBG_BUS_CLIENT_RBCS,
+	DBG_BUS_CLIENT_RBCH,
+	DBG_BUS_CLIENT_RBCZ,
+	DBG_BUS_CLIENT_OTHER_ENGINE,
+	DBG_BUS_CLIENT_TIMESTAMP,
+	DBG_BUS_CLIENT_CPU,
+	DBG_BUS_CLIENT_RBCY,
+	DBG_BUS_CLIENT_RBCQ,
+	DBG_BUS_CLIENT_RBCM,
+	DBG_BUS_CLIENT_RBCB,
+	DBG_BUS_CLIENT_RBCW,
+	DBG_BUS_CLIENT_RBCV,
+	MAX_DBG_BUS_CLIENTS
+};
+
+/* Debug Bus memory address */
+struct dbg_bus_mem_addr {
+	__le32 lo;
+	__le32 hi;
+};
+
+/* Debug Bus PCI buffer data */
+struct dbg_bus_pci_buf_data {
+	struct dbg_bus_mem_addr phys_addr; /* PCI buffer physical address */
+	struct dbg_bus_mem_addr virt_addr; /* PCI buffer virtual address */
+	__le32 size; /* PCI buffer size in bytes */
+};
+
+/* Debug Bus Storm EID range filter params */
+struct dbg_bus_storm_eid_range_params {
+	u8 min; /* Minimal event ID to filter on */
+	u8 max; /* Maximal event ID to filter on */
+};
+
+/* Debug Bus Storm EID mask filter params */
+struct dbg_bus_storm_eid_mask_params {
+	u8 val; /* Event ID value */
+	u8 mask; /* Event ID mask. 1s in the mask = dont care bits. */
+};
+
+/* Debug Bus Storm EID filter params */
+union dbg_bus_storm_eid_params {
+	struct dbg_bus_storm_eid_range_params range;
+	struct dbg_bus_storm_eid_mask_params mask;
+};
+
+/* Debug Bus Storm data */
+struct dbg_bus_storm_data {
+	u8 fast_enabled;
+	u8 fast_mode;
+	u8 slow_enabled;
+	u8 slow_mode;
+	u8 hw_id;
+	u8 eid_filter_en;
+	u8 eid_range_not_mask;
+	u8 cid_filter_en;
+	union dbg_bus_storm_eid_params eid_filter_params;
+	__le16 reserved;
+	__le32 cid;
+};
+
+/* Debug Bus data */
+struct dbg_bus_data {
+	__le32 app_version; /* The tools version number of the application */
+	u8 state; /* The current debug bus state */
+	u8 hw_dwords; /* HW dwords per cycle */
+	u8 next_hw_id; /* Next HW ID to be associated with an input */
+	u8 num_enabled_blocks; /* Number of blocks enabled for recording */
+	u8 num_enabled_storms; /* Number of Storms enabled for recording */
+	u8 target; /* Output target */
+	u8 next_trigger_state; /* ID of next trigger state to be added */
+	u8 next_constraint_id; /* ID of next filter/trigger constraint to be
+				* added.
+				*/
+	u8 one_shot_en; /* Indicates if one-shot mode is enabled (0/1) */
+	u8 grc_input_en; /* Indicates if GRC recording is enabled (0/1) */
+	u8 timestamp_input_en; /* Indicates if timestamp recording is enabled
+				* (0/1).
+				*/
+	u8 filter_en; /* Indicates if the recording filter is enabled (0/1) */
+	u8 trigger_en; /* Indicates if the recording trigger is enabled (0/1) */
+	u8 adding_filter; /* If true, the next added constraint belong to the
+			   * filter. Otherwise, it belongs to the last added
+			   * trigger state. Valid only if either filter or
+			   * triggers are enabled.
+			   */
+	u8 filter_pre_trigger; /* Indicates if the recording filter should be
+				* applied before the trigger. Valid only if both
+				* filter and trigger are enabled (0/1).
+				*/
+	u8 filter_post_trigger; /* Indicates if the recording filter should be
+				 * applied after the trigger. Valid only if both
+				 * filter and trigger are enabled (0/1).
+				 */
+	u8 unify_inputs; /* If true, all inputs are associated with HW ID 0.
+			  * Otherwise, each input is assigned a different HW ID
+			  * (0/1).
+			  */
+	u8 rcv_from_other_engine; /* Indicates if the other engine sends it NW
+				   * recording to this engine (0/1).
+				   */
+	struct dbg_bus_pci_buf_data pci_buf; /* Debug Bus PCI buffer data. Valid
+					      * only when the target is
+					      * DBG_BUS_TARGET_ID_PCI.
+					      */
+	__le16 reserved;
+	struct dbg_bus_block_data blocks[80];/* Debug Bus data for each block */
+	struct dbg_bus_storm_data storms[6]; /* Debug Bus data for each block */
+};
+
+/* Debug bus frame modes */
+enum dbg_bus_frame_modes {
+	DBG_BUS_FRAME_MODE_0HW_4ST = 0, /* 0 HW dwords, 4 Storm dwords */
+	DBG_BUS_FRAME_MODE_4HW_0ST = 3, /* 4 HW dwords, 0 Storm dwords */
+	DBG_BUS_FRAME_MODE_8HW_0ST = 4, /* 8 HW dwords, 0 Storm dwords */
+	MAX_DBG_BUS_FRAME_MODES
+};
+
+/* Debug bus states */
+enum dbg_bus_states {
+	DBG_BUS_STATE_IDLE, /* debug bus idle state (not recording) */
+	DBG_BUS_STATE_READY, /* debug bus is ready for configuration and
+			      * recording.
+			      */
+	DBG_BUS_STATE_RECORDING, /* debug bus is currently recording */
+	DBG_BUS_STATE_STOPPED, /* debug bus recording has stopped */
+	MAX_DBG_BUS_STATES
+};
+
+/* Debug bus target IDs */
+enum dbg_bus_targets {
+	/* records debug bus to DBG block internal buffer */
+	DBG_BUS_TARGET_ID_INT_BUF,
+	/* records debug bus to the NW */
+	DBG_BUS_TARGET_ID_NIG,
+	/* records debug bus to a PCI buffer */
+	DBG_BUS_TARGET_ID_PCI,
+	MAX_DBG_BUS_TARGETS
+};
+
+/* GRC Dump data */
+struct dbg_grc_data {
+	__le32 param_val[40]; /* Value of each GRC parameter. Array size must
+			       * match the enum dbg_grc_params.
+			       */
+	u8 param_set_by_user[40]; /* Indicates for each GRC parameter if it was
+				   * set by the user (0/1). Array size must
+				   * match the enum dbg_grc_params.
+				   */
+};
+
+/* Debug GRC params */
+enum dbg_grc_params {
+	DBG_GRC_PARAM_DUMP_TSTORM, /* dump Tstorm memories (0/1) */
+	DBG_GRC_PARAM_DUMP_MSTORM, /* dump Mstorm memories (0/1) */
+	DBG_GRC_PARAM_DUMP_USTORM, /* dump Ustorm memories (0/1) */
+	DBG_GRC_PARAM_DUMP_XSTORM, /* dump Xstorm memories (0/1) */
+	DBG_GRC_PARAM_DUMP_YSTORM, /* dump Ystorm memories (0/1) */
+	DBG_GRC_PARAM_DUMP_PSTORM, /* dump Pstorm memories (0/1) */
+	DBG_GRC_PARAM_DUMP_REGS, /* dump non-memory registers (0/1) */
+	DBG_GRC_PARAM_DUMP_RAM, /* dump Storm internal RAMs (0/1) */
+	DBG_GRC_PARAM_DUMP_PBUF, /* dump Storm passive buffer (0/1) */
+	DBG_GRC_PARAM_DUMP_IOR, /* dump Storm IORs (0/1) */
+	DBG_GRC_PARAM_DUMP_VFC, /* dump VFC memories (0/1) */
+	DBG_GRC_PARAM_DUMP_CM_CTX, /* dump CM contexts (0/1) */
+	DBG_GRC_PARAM_DUMP_PXP, /* dump PXP memories (0/1) */
+	DBG_GRC_PARAM_DUMP_RSS, /* dump RSS memories (0/1) */
+	DBG_GRC_PARAM_DUMP_CAU, /* dump CAU memories (0/1) */
+	DBG_GRC_PARAM_DUMP_QM, /* dump QM memories (0/1) */
+	DBG_GRC_PARAM_DUMP_MCP, /* dump MCP memories (0/1) */
+	DBG_GRC_PARAM_RESERVED, /* reserved */
+	DBG_GRC_PARAM_DUMP_CFC, /* dump CFC memories (0/1) */
+	DBG_GRC_PARAM_DUMP_IGU, /* dump IGU memories (0/1) */
+	DBG_GRC_PARAM_DUMP_BRB, /* dump BRB memories (0/1) */
+	DBG_GRC_PARAM_DUMP_BTB, /* dump BTB memories (0/1) */
+	DBG_GRC_PARAM_DUMP_BMB, /* dump BMB memories (0/1) */
+	DBG_GRC_PARAM_DUMP_NIG, /* dump NIG memories (0/1) */
+	DBG_GRC_PARAM_DUMP_MULD, /* dump MULD memories (0/1) */
+	DBG_GRC_PARAM_DUMP_PRS, /* dump PRS memories (0/1) */
+	DBG_GRC_PARAM_DUMP_DMAE, /* dump PRS memories (0/1) */
+	DBG_GRC_PARAM_DUMP_TM, /* dump TM (timers) memories (0/1) */
+	DBG_GRC_PARAM_DUMP_SDM, /* dump SDM memories (0/1) */
+	DBG_GRC_PARAM_DUMP_DIF, /* dump DIF memories (0/1) */
+	DBG_GRC_PARAM_DUMP_STATIC, /* dump static debug data (0/1) */
+	DBG_GRC_PARAM_UNSTALL, /* un-stall Storms after dump (0/1) */
+	DBG_GRC_PARAM_NUM_LCIDS, /* number of LCIDs (0..320) */
+	DBG_GRC_PARAM_NUM_LTIDS, /* number of LTIDs (0..320) */
+	/* preset: exclude all memories from dump (1 only) */
+	DBG_GRC_PARAM_EXCLUDE_ALL,
+	/* preset: include memories for crash dump (1 only) */
+	DBG_GRC_PARAM_CRASH,
+	/* perform dump only if MFW is responding (0/1) */
+	DBG_GRC_PARAM_PARITY_SAFE,
+	DBG_GRC_PARAM_DUMP_CM, /* dump CM memories (0/1) */
+	DBG_GRC_PARAM_DUMP_PHY, /* dump PHY memories (0/1) */
+	MAX_DBG_GRC_PARAMS
+};
+
+/* Debug reset registers */
+enum dbg_reset_regs {
+	DBG_RESET_REG_MISCS_PL_UA,
+	DBG_RESET_REG_MISCS_PL_HV,
+	DBG_RESET_REG_MISCS_PL_HV_2,
+	DBG_RESET_REG_MISC_PL_UA,
+	DBG_RESET_REG_MISC_PL_HV,
+	DBG_RESET_REG_MISC_PL_PDA_VMAIN_1,
+	DBG_RESET_REG_MISC_PL_PDA_VMAIN_2,
+	DBG_RESET_REG_MISC_PL_PDA_VAUX,
+	MAX_DBG_RESET_REGS
+};
+
 /* Debug status codes */
 enum dbg_status {
 	DBG_STATUS_OK,
@@ -1579,9 +2226,45 @@
 	DBG_STATUS_REG_FIFO_BAD_DATA,
 	DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA,
 	DBG_STATUS_DBG_ARRAY_NOT_SET,
+	DBG_STATUS_MULTI_BLOCKS_WITH_FILTER,
 	MAX_DBG_STATUS
 };
 
+/* Debug Storms IDs */
+enum dbg_storms {
+	DBG_TSTORM_ID,
+	DBG_MSTORM_ID,
+	DBG_USTORM_ID,
+	DBG_XSTORM_ID,
+	DBG_YSTORM_ID,
+	DBG_PSTORM_ID,
+	MAX_DBG_STORMS
+};
+
+/* Idle Check data */
+struct idle_chk_data {
+	__le32 buf_size; /* Idle check buffer size in dwords */
+	u8 buf_size_set; /* Indicates if the idle check buffer size was set
+			  * (0/1).
+			  */
+	u8 reserved1;
+	__le16 reserved2;
+};
+
+/* Debug Tools data (per HW function) */
+struct dbg_tools_data {
+	struct dbg_grc_data grc; /* GRC Dump data */
+	struct dbg_bus_data bus; /* Debug Bus data */
+	struct idle_chk_data idle_chk; /* Idle Check data */
+	u8 mode_enable[40]; /* Indicates if a mode is enabled (0/1) */
+	u8 block_in_reset[80]; /* Indicates if a block is in reset state (0/1).
+				*/
+	u8 chip_id; /* Chip ID (from enum chip_ids) */
+	u8 platform_id; /* Platform ID (from enum platform_ids) */
+	u8 initialized; /* Indicates if the data was initialized */
+	u8 reserved;
+};
+
 /********************************/
 /* HSI Init Functions constants */
 /********************************/
@@ -1589,7 +2272,41 @@
 /* Number of VLAN priorities */
 #define NUM_OF_VLAN_PRIORITIES	8
 
-/* QM per-port init parameters */
+struct init_brb_ram_req {
+	__le32 guranteed_per_tc;
+	__le32 headroom_per_tc;
+	__le32 min_pkt_size;
+	__le32 max_ports_per_engine;
+	u8 num_active_tcs[MAX_NUM_PORTS];
+};
+
+struct init_ets_tc_req {
+	u8 use_sp;
+	u8 use_wfq;
+	__le16 weight;
+};
+
+struct init_ets_req {
+	__le32 mtu;
+	struct init_ets_tc_req tc_req[NUM_OF_TCS];
+};
+
+struct init_nig_lb_rl_req {
+	__le16 lb_mac_rate;
+	__le16 lb_rate;
+	__le32 mtu;
+	__le16 tc_rate[NUM_OF_PHYS_TCS];
+};
+
+struct init_nig_pri_tc_map_entry {
+	u8 tc_id;
+	u8 valid;
+};
+
+struct init_nig_pri_tc_map_req {
+	struct init_nig_pri_tc_map_entry pri[NUM_OF_VLAN_PRIORITIES];
+};
+
 struct init_qm_port_params {
 	u8 active;
 	u8 active_phys_tcs;
@@ -1619,7 +2336,7 @@
 
 /* Width of GRC address in bits (addresses are specified in dwords) */
 #define GRC_ADDR_BITS	23
-#define MAX_GRC_ADDR	((1 << GRC_ADDR_BITS) - 1)
+#define MAX_GRC_ADDR	(BIT(GRC_ADDR_BITS) - 1)
 
 /* indicates an init that should be applied to any phase ID */
 #define ANY_PHASE_ID	0xffff
@@ -1627,15 +2344,50 @@
 /* Max size in dwords of a zipped array */
 #define MAX_ZIPPED_SIZE	8192
 
+struct fw_asserts_ram_section {
+	__le16 section_ram_line_offset;
+	__le16 section_ram_line_size;
+	u8 list_dword_offset;
+	u8 list_element_dword_size;
+	u8 list_num_elements;
+	u8 list_next_index_dword_offset;
+};
+
+struct fw_ver_num {
+	u8 major; /* Firmware major version number */
+	u8 minor; /* Firmware minor version number */
+	u8 rev; /* Firmware revision version number */
+	u8 eng; /* Firmware engineering version number (for bootleg versions) */
+};
+
+struct fw_ver_info {
+	__le16 tools_ver; /* Tools version number */
+	u8 image_id; /* FW image ID (e.g. main) */
+	u8 reserved1;
+	struct fw_ver_num num; /* FW version number */
+	__le32 timestamp; /* FW Timestamp in unix time  (sec. since 1970) */
+	__le32 reserved2;
+};
+
+struct fw_info {
+	struct fw_ver_info ver;
+	struct fw_asserts_ram_section fw_asserts_section;
+};
+
+struct fw_info_location {
+	__le32 grc_addr;
+	__le32 size;
+};
+
 enum init_modes {
 	MODE_RESERVED,
 	MODE_BB_B0,
-	MODE_RESERVED2,
+	MODE_K2,
 	MODE_ASIC,
+	MODE_RESERVED2,
 	MODE_RESERVED3,
 	MODE_RESERVED4,
 	MODE_RESERVED5,
-	MODE_RESERVED6,
 	MODE_SF,
 	MODE_MF_SD,
 	MODE_MF_SI,
@@ -1644,7 +2396,7 @@
 	MODE_PORTS_PER_ENG_4,
 	MODE_100G,
 	MODE_40G,
-	MODE_RESERVED7,
+	MODE_RESERVED6,
 	MAX_INIT_MODES
 };
 
@@ -1674,11 +2426,11 @@
 
 /* binary init buffer types */
 enum bin_init_buffer_type {
-	BIN_BUF_FW_VER_INFO,
+	BIN_BUF_INIT_FW_VER_INFO,
 	BIN_BUF_INIT_CMD,
 	BIN_BUF_INIT_VAL,
 	BIN_BUF_INIT_MODE_TREE,
-	BIN_BUF_IRO,
+	BIN_BUF_INIT_IRO,
 	MAX_BIN_INIT_BUFFER_TYPE
 };
 
@@ -1902,8 +2654,276 @@
 	__le16 size;
 };
 
+/***************************** Public Functions *******************************/
 /**
- * @brief qed_dbg_print_attn - Prints attention registers values in the specified results struct.
+ * @brief qed_dbg_set_bin_ptr - Sets a pointer to the binary data with debug
+ *	arrays.
+ *
+ * @param bin_ptr - a pointer to the binary data with debug arrays.
+ */
+enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr);
+/**
+ * @brief qed_dbg_grc_get_dump_buf_size - Returns the required buffer size for
+ *	GRC Dump.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for the GRC Dump
+ *	data.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+					      struct qed_ptt *p_ptt,
+					      u32 *buf_size);
+/**
+ * @brief qed_dbg_grc_dump - Dumps GRC data into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the collected GRC data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- the specified dump buffer is too small
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *dump_buf,
+				 u32 buf_size_in_dwords,
+				 u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_idle_chk_get_dump_buf_size - Returns the required buffer size
+ *	for idle check results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for the idle check
+ *	data.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						   struct qed_ptt *p_ptt,
+						   u32 *buf_size);
+/**
+ * @brief qed_dbg_idle_chk_dump - Performs idle check and writes the results
+ *	into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the idle check data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- the specified buffer is too small
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 *dump_buf,
+				      u32 buf_size_in_dwords,
+				      u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_mcp_trace_get_dump_buf_size - Returns the required buffer size
+ *	for mcp trace results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for mcp trace data.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- the trace data in MCP scratchpad contain an invalid signature
+ *	- the bundle ID in NVRAM is invalid
+ *	- the trace meta data cannot be found (in NVRAM or image file)
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						    struct qed_ptt *p_ptt,
+						    u32 *buf_size);
+/**
+ * @brief qed_dbg_mcp_trace_dump - Performs mcp trace and writes the results
+ *	into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the mcp trace data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- the specified buffer is too small
+ *	- the trace data in MCP scratchpad contain an invalid signature
+ *	- the bundle ID in NVRAM is invalid
+ *	- the trace meta data cannot be found (in NVRAM or image file)
+ *	- the trace meta data cannot be read (from NVRAM or image file)
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
+				       struct qed_ptt *p_ptt,
+				       u32 *dump_buf,
+				       u32 buf_size_in_dwords,
+				       u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_reg_fifo_get_dump_buf_size - Returns the required buffer size
+ *	for grc trace fifo results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for reg fifo data.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						   struct qed_ptt *p_ptt,
+						   u32 *buf_size);
+/**
+ * @brief qed_dbg_reg_fifo_dump - Reads the reg fifo and writes the results into
+ *	the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the reg fifo data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- the specified buffer is too small
+ *	- DMAE transaction failed
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 *dump_buf,
+				      u32 buf_size_in_dwords,
+				      u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_igu_fifo_get_dump_buf_size - Returns the required buffer size
+ *	for the IGU fifo results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for the IGU fifo
+ *	data.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						   struct qed_ptt *p_ptt,
+						   u32 *buf_size);
+/**
+ * @brief qed_dbg_igu_fifo_dump - Reads the IGU fifo and writes the results into
+ *	the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the IGU fifo data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- the specified buffer is too small
+ *	- DMAE transaction failed
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt,
+				      u32 *dump_buf,
+				      u32 buf_size_in_dwords,
+				      u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_protection_override_get_dump_buf_size - Returns the required
+ *	buffer size for protection override window results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for protection
+ *	override data.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status
+qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+					      struct qed_ptt *p_ptt,
+					      u32 *buf_size);
+/**
+ * @brief qed_dbg_protection_override_dump - Reads protection override window
+ *	entries and writes the results into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the protection override data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- the specified buffer is too small
+ *	- DMAE transaction failed
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
+						 struct qed_ptt *p_ptt,
+						 u32 *dump_buf,
+						 u32 buf_size_in_dwords,
+						 u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_fw_asserts_get_dump_buf_size - Returns the required buffer
+ *	size for FW Asserts results.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param buf_size - OUT: required buffer size (in dwords) for FW Asserts data.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
+						     struct qed_ptt *p_ptt,
+						     u32 *buf_size);
+/**
+ * @brief qed_dbg_fw_asserts_dump - Reads the FW Asserts and writes the results
+ *	into the specified buffer.
+ *
+ * @param p_hwfn - HW device data
+ * @param p_ptt - Ptt window used for writing the registers.
+ * @param dump_buf - Pointer to write the FW Asserts data into.
+ * @param buf_size_in_dwords - Size of the specified buffer in dwords.
+ * @param num_dumped_dwords - OUT: number of dumped dwords.
+ *
+ * @return error if one of the following holds:
+ *	- the version wasn't set
+ *	- the specified buffer is too small
+ * Otherwise, returns ok.
+ */
+enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					u32 *dump_buf,
+					u32 buf_size_in_dwords,
+					u32 *num_dumped_dwords);
+/**
+ * @brief qed_dbg_print_attn - Prints attention registers values in the
+ *	specified results struct.
  *
  * @param p_hwfn
  * @param results - Pointer to the attention read results
@@ -1915,47 +2935,241 @@
 enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
 				   struct dbg_attn_block_result *results);
 
+/******************************** Constants **********************************/
+
 #define MAX_NAME_LEN	16
 
+/***************************** Public Functions *******************************/
+/**
+ * @brief qed_dbg_user_set_bin_ptr - Sets a pointer to the binary data with
+ *	debug arrays.
+ *
+ * @param bin_ptr - a pointer to the binary data with debug arrays.
+ */
+enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr);
+/**
+ * @brief qed_dbg_get_status_str - Returns a string for the specified status.
+ *
+ * @param status - a debug status code.
+ *
+ * @return a string for the specified status
+ */
+const char *qed_dbg_get_status_str(enum dbg_status status);
+/**
+ * @brief qed_get_idle_chk_results_buf_size - Returns the required buffer size
+ *	for idle check results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - idle check dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *	results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32  num_dumped_dwords,
+						  u32 *results_buf_size);
+/**
+ * @brief qed_print_idle_chk_results - Prints idle check results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - idle check dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the idle check results.
+ * @param num_errors - OUT: number of errors found in idle check.
+ * @param num_warnings - OUT: number of warnings found in idle check.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf,
+					   u32 *num_errors,
+					   u32 *num_warnings);
+/**
+ * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size
+ *	for MCP Trace results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - MCP Trace dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *	results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
+						   u32 *dump_buf,
+						   u32 num_dumped_dwords,
+						   u32 *results_buf_size);
+/**
+ * @brief qed_print_mcp_trace_results - Prints MCP Trace results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - mcp trace dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the mcp trace results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
+					    u32 *dump_buf,
+					    u32 num_dumped_dwords,
+					    char *results_buf);
+/**
+ * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size
+ *	for reg_fifo results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - reg fifo dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *	results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32 num_dumped_dwords,
+						  u32 *results_buf_size);
+/**
+ * @brief qed_print_reg_fifo_results - Prints reg fifo results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - reg fifo dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the reg fifo results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf);
+/**
+ * @brief qed_get_igu_fifo_results_buf_size - Returns the required buffer size
+ *	for igu_fifo results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - IGU fifo dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *	results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
+						  u32 *dump_buf,
+						  u32 num_dumped_dwords,
+						  u32 *results_buf_size);
+/**
+ * @brief qed_print_igu_fifo_results - Prints IGU fifo results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - IGU fifo dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the IGU fifo results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
+					   u32 *dump_buf,
+					   u32 num_dumped_dwords,
+					   char *results_buf);
+/**
+ * @brief qed_get_protection_override_results_buf_size - Returns the required
+ *	buffer size for protection override results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - protection override dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *	results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status
+qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
+					     u32 *dump_buf,
+					     u32 num_dumped_dwords,
+					     u32 *results_buf_size);
+/**
+ * @brief qed_print_protection_override_results - Prints protection override
+ *	results.
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - protection override dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the reg fifo results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
+						      u32 *dump_buf,
+						      u32 num_dumped_dwords,
+						      char *results_buf);
+/**
+ * @brief qed_get_fw_asserts_results_buf_size - Returns the required buffer size
+ *	for FW Asserts results (in bytes).
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - FW Asserts dump buffer.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
+ *	results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
+						    u32 *dump_buf,
+						    u32 num_dumped_dwords,
+						    u32 *results_buf_size);
+/**
+ * @brief qed_print_fw_asserts_results - Prints FW Asserts results
+ *
+ * @param p_hwfn - HW device data
+ * @param dump_buf - FW Asserts dump buffer, starting from the header.
+ * @param num_dumped_dwords - number of dwords that were dumped.
+ * @param results_buf - buffer for printing the FW Asserts results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
+					     u32 *dump_buf,
+					     u32 num_dumped_dwords,
+					     char *results_buf);
 /* Win 2 */
-#define GTT_BAR0_MAP_REG_IGU_CMD \
-	0x00f000UL
+#define GTT_BAR0_MAP_REG_IGU_CMD	0x00f000UL
 
 /* Win 3 */
-#define GTT_BAR0_MAP_REG_TSDM_RAM \
-	0x010000UL
+#define GTT_BAR0_MAP_REG_TSDM_RAM	0x010000UL
 
 /* Win 4 */
-#define GTT_BAR0_MAP_REG_MSDM_RAM \
-	0x011000UL
+#define GTT_BAR0_MAP_REG_MSDM_RAM	0x011000UL
 
 /* Win 5 */
-#define GTT_BAR0_MAP_REG_MSDM_RAM_1024 \
-	0x012000UL
+#define GTT_BAR0_MAP_REG_MSDM_RAM_1024	0x012000UL
 
 /* Win 6 */
-#define GTT_BAR0_MAP_REG_USDM_RAM \
-	0x013000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM	0x013000UL
 
 /* Win 7 */
-#define GTT_BAR0_MAP_REG_USDM_RAM_1024 \
-	0x014000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM_1024	0x014000UL
 
 /* Win 8 */
-#define GTT_BAR0_MAP_REG_USDM_RAM_2048 \
-	0x015000UL
+#define GTT_BAR0_MAP_REG_USDM_RAM_2048	0x015000UL
 
 /* Win 9 */
-#define GTT_BAR0_MAP_REG_XSDM_RAM \
-	0x016000UL
+#define GTT_BAR0_MAP_REG_XSDM_RAM	0x016000UL
 
 /* Win 10 */
-#define GTT_BAR0_MAP_REG_YSDM_RAM \
-	0x017000UL
+#define GTT_BAR0_MAP_REG_YSDM_RAM	0x017000UL
 
 /* Win 11 */
-#define GTT_BAR0_MAP_REG_PSDM_RAM \
-	0x018000UL
+#define GTT_BAR0_MAP_REG_PSDM_RAM	0x018000UL
 
 /**
  * @brief qed_qm_pf_mem_size - prepare QM ILT sizes
@@ -2003,7 +3217,7 @@
 	u16 num_vf_pqs;
 	u8 start_vport;
 	u8 num_vports;
-	u8 pf_wfq;
+	u16 pf_wfq;
 	u32 pf_rl;
 	struct init_qm_pq_params *pq_params;
 	struct init_qm_vport_params *vport_params;
@@ -2138,6 +3352,9 @@
 #define	TSTORM_PORT_STAT_OFFSET(port_id) \
 	(IRO[1].base + ((port_id) * IRO[1].m1))
 #define	TSTORM_PORT_STAT_SIZE				(IRO[1].size)
+#define TSTORM_LL2_PORT_STAT_OFFSET(port_id) \
+	(IRO[2].base + ((port_id) * IRO[2].m1))
+#define TSTORM_LL2_PORT_STAT_SIZE			(IRO[2].size)
 #define	USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) \
 	(IRO[3].base + ((vf_id) * IRO[3].m1))
 #define	USTORM_VF_PF_CHANNEL_READY_SIZE			(IRO[3].size)
@@ -2153,42 +3370,90 @@
 #define	USTORM_COMMON_QUEUE_CONS_OFFSET(queue_zone_id) \
 	(IRO[7].base + ((queue_zone_id) * IRO[7].m1))
 #define	USTORM_COMMON_QUEUE_CONS_SIZE			(IRO[7].size)
+#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) \
+	(IRO[14].base +	((core_rx_queue_id) * IRO[14].m1))
+#define TSTORM_LL2_RX_PRODS_SIZE			(IRO[14].size)
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
+	(IRO[15].base + ((core_rx_queue_id) * IRO[15].m1))
+#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE		(IRO[15].size)
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \
+	(IRO[16].base +	((core_rx_queue_id) * IRO[16].m1))
+#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE		(IRO[16].size)
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) \
+	(IRO[17].base +	((core_tx_stats_id) * IRO[17].m1))
+#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE	(IRO[17].	size)
 #define	MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
 	(IRO[18].base + ((stat_counter_id) * IRO[18].m1))
 #define	MSTORM_QUEUE_STAT_SIZE				(IRO[18].size)
 #define	MSTORM_ETH_PF_PRODS_OFFSET(queue_id) \
 	(IRO[19].base + ((queue_id) * IRO[19].m1))
 #define	MSTORM_ETH_PF_PRODS_SIZE			(IRO[19].size)
-#define	MSTORM_TPA_TIMEOUT_US_OFFSET			(IRO[20].base)
-#define	MSTORM_TPA_TIMEOUT_US_SIZE			(IRO[20].size)
+#define MSTORM_ETH_VF_PRODS_OFFSET(vf_id, vf_queue_id) \
+	(IRO[20].base +	((vf_id) * IRO[20].m1) + ((vf_queue_id) * IRO[20].m2))
+#define MSTORM_ETH_VF_PRODS_SIZE			(IRO[20].size)
+#define	MSTORM_TPA_TIMEOUT_US_OFFSET			(IRO[21].base)
+#define	MSTORM_TPA_TIMEOUT_US_SIZE			(IRO[21].size)
 #define	MSTORM_ETH_PF_STAT_OFFSET(pf_id) \
-	(IRO[21].base + ((pf_id) * IRO[21].m1))
+	(IRO[22].base + ((pf_id) * IRO[22].m1))
 #define	MSTORM_ETH_PF_STAT_SIZE				(IRO[21].size)
 #define	USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-	(IRO[22].base + ((stat_counter_id) * IRO[22].m1))
-#define	USTORM_QUEUE_STAT_SIZE				(IRO[22].size)
+	(IRO[23].base + ((stat_counter_id) * IRO[23].m1))
+#define	USTORM_QUEUE_STAT_SIZE				(IRO[23].size)
 #define	USTORM_ETH_PF_STAT_OFFSET(pf_id) \
-	(IRO[23].base + ((pf_id) * IRO[23].m1))
-#define	USTORM_ETH_PF_STAT_SIZE				(IRO[23].size)
+	(IRO[24].base + ((pf_id) * IRO[24].m1))
+#define	USTORM_ETH_PF_STAT_SIZE				(IRO[24].size)
 #define	PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \
-	(IRO[24].base + ((stat_counter_id) * IRO[24].m1))
-#define	PSTORM_QUEUE_STAT_SIZE				(IRO[24].size)
+	(IRO[25].base + ((stat_counter_id) * IRO[25].m1))
+#define	PSTORM_QUEUE_STAT_SIZE				(IRO[25].size)
 #define	PSTORM_ETH_PF_STAT_OFFSET(pf_id) \
-	(IRO[25].base + ((pf_id) * IRO[25].m1))
-#define	PSTORM_ETH_PF_STAT_SIZE				(IRO[25].size)
+	(IRO[26].base + ((pf_id) * IRO[26].m1))
+#define	PSTORM_ETH_PF_STAT_SIZE				(IRO[26].size)
 #define	PSTORM_CTL_FRAME_ETHTYPE_OFFSET(ethtype) \
-	(IRO[26].base + ((ethtype) * IRO[26].m1))
-#define	PSTORM_CTL_FRAME_ETHTYPE_SIZE			(IRO[26].size)
-#define	TSTORM_ETH_PRS_INPUT_OFFSET			(IRO[27].base)
-#define	TSTORM_ETH_PRS_INPUT_SIZE			(IRO[27].size)
+	(IRO[27].base + ((ethtype) * IRO[27].m1))
+#define	PSTORM_CTL_FRAME_ETHTYPE_SIZE			(IRO[27].size)
+#define	TSTORM_ETH_PRS_INPUT_OFFSET			(IRO[28].base)
+#define	TSTORM_ETH_PRS_INPUT_SIZE			(IRO[28].size)
 #define	ETH_RX_RATE_LIMIT_OFFSET(pf_id) \
-	(IRO[28].base + ((pf_id) * IRO[28].m1))
-#define	ETH_RX_RATE_LIMIT_SIZE				(IRO[28].size)
+	(IRO[29].base + ((pf_id) * IRO[29].m1))
+#define	ETH_RX_RATE_LIMIT_SIZE				(IRO[29].size)
 #define	XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
-	(IRO[29].base + ((queue_id) * IRO[29].m1))
-#define	XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[29].size)
+	(IRO[30].base + ((queue_id) * IRO[30].m1))
+#define	XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[30].size)
+#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \
+	(IRO[34].base +	((cmdq_queue_id) * IRO[34].m1))
+#define TSTORM_SCSI_CMDQ_CONS_SIZE				(IRO[34].size)
+#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
+	(IRO[35].base +	((func_id) * IRO[35].m1) + ((bdq_id) * IRO[35].m2))
+#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE				(IRO[35].size)
+#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
+	(IRO[36].base +	((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2))
+#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE				(IRO[36].size)
+#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+	(IRO[37].base +	((pf_id) * IRO[37].m1))
+#define TSTORM_ISCSI_RX_STATS_SIZE				(IRO[37].size)
+#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+	(IRO[38].base +	((pf_id) * IRO[38].m1))
+#define MSTORM_ISCSI_RX_STATS_SIZE				(IRO[38].size)
+#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
+	(IRO[39].base +	((pf_id) * IRO[39].m1))
+#define USTORM_ISCSI_RX_STATS_SIZE				(IRO[39].size)
+#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+	(IRO[40].base +	((pf_id) * IRO[40].m1))
+#define XSTORM_ISCSI_TX_STATS_SIZE				(IRO[40].size)
+#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+	(IRO[41].base +	((pf_id) * IRO[41].m1))
+#define YSTORM_ISCSI_TX_STATS_SIZE				(IRO[41].size)
+#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
+	(IRO[42].base +	((pf_id) * IRO[42].m1))
+#define PSTORM_ISCSI_TX_STATS_SIZE				(IRO[42].size)
+#define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
+	(IRO[45].base +	((rdma_stat_counter_id) * IRO[45].m1))
+#define PSTORM_RDMA_QUEUE_STAT_SIZE				(IRO[45].size)
+#define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
+	(IRO[46].base +	((rdma_stat_counter_id) * IRO[46].m1))
+#define TSTORM_RDMA_QUEUE_STAT_SIZE				(IRO[46].size)
 
-static const struct iro iro_arr[46] = {
+static const struct iro iro_arr[47] = {
 	{0x0, 0x0, 0x0, 0x0, 0x8},
 	{0x4cb0, 0x78, 0x0, 0x0, 0x78},
 	{0x6318, 0x20, 0x0, 0x0, 0x20},
@@ -2201,20 +3466,21 @@
 	{0x3df0, 0x0, 0x0, 0x0, 0x78},
 	{0x29b0, 0x0, 0x0, 0x0, 0x78},
 	{0x4c38, 0x0, 0x0, 0x0, 0x78},
-	{0x4a48, 0x0, 0x0, 0x0, 0x78},
+	{0x4990, 0x0, 0x0, 0x0, 0x78},
 	{0x7e48, 0x0, 0x0, 0x0, 0x78},
 	{0xa28, 0x8, 0x0, 0x0, 0x8},
 	{0x60f8, 0x10, 0x0, 0x0, 0x10},
 	{0xb820, 0x30, 0x0, 0x0, 0x30},
 	{0x95b8, 0x30, 0x0, 0x0, 0x30},
-	{0x4c18, 0x80, 0x0, 0x0, 0x40},
+	{0x4b60, 0x80, 0x0, 0x0, 0x40},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
-	{0xc9a8, 0x0, 0x0, 0x0, 0x4},
-	{0x4c58, 0x80, 0x0, 0x0, 0x20},
+	{0x53a0, 0x80, 0x4, 0x0, 0x4},
+	{0xc8f0, 0x0, 0x0, 0x0, 0x4},
+	{0x4ba0, 0x80, 0x0, 0x0, 0x20},
 	{0x8050, 0x40, 0x0, 0x0, 0x30},
 	{0xe770, 0x60, 0x0, 0x0, 0x60},
 	{0x2b48, 0x80, 0x0, 0x0, 0x38},
-	{0xdf88, 0x78, 0x0, 0x0, 0x78},
+	{0xf188, 0x78, 0x0, 0x0, 0x78},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
 	{0xacf0, 0x0, 0x0, 0x0, 0xf0},
 	{0xade0, 0x8, 0x0, 0x0, 0x8},
@@ -2226,455 +3492,457 @@
 	{0x200, 0x10, 0x8, 0x0, 0x8},
 	{0xb78, 0x10, 0x8, 0x0, 0x2},
 	{0xd888, 0x38, 0x0, 0x0, 0x24},
-	{0x12120, 0x10, 0x0, 0x0, 0x8},
-	{0x11b20, 0x38, 0x0, 0x0, 0x18},
+	{0x12c38, 0x10, 0x0, 0x0, 0x8},
+	{0x11aa0, 0x38, 0x0, 0x0, 0x18},
 	{0xa8c0, 0x30, 0x0, 0x0, 0x10},
 	{0x86f8, 0x28, 0x0, 0x0, 0x18},
-	{0xeff8, 0x10, 0x0, 0x0, 0x10},
+	{0x101f8, 0x10, 0x0, 0x0, 0x10},
 	{0xdd08, 0x48, 0x0, 0x0, 0x38},
-	{0xf460, 0x20, 0x0, 0x0, 0x20},
+	{0x10660, 0x20, 0x0, 0x0, 0x20},
 	{0x2b80, 0x80, 0x0, 0x0, 0x10},
 	{0x5000, 0x10, 0x0, 0x0, 0x10},
 };
 
 /* Runtime array offsets */
-#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET 0
-#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET 1
-#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET 2
-#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET 3
-#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET 4
-#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET 5
-#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET 6
-#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET 7
-#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET 8
-#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET 9
-#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET 10
-#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET 11
-#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET 12
-#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET 13
-#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET 14
-#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET 15
-#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16
-#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET 17
-#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 18
-#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 19
-#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 20
-#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 21
-#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 22
-#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 23
-#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 24
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736
-#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761
-#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736
-#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1497
-#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736
-#define CAU_REG_PI_MEMORY_RT_OFFSET 2233
-#define CAU_REG_PI_MEMORY_RT_SIZE 4416
-#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6649
-#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6650
-#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6651
-#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6652
-#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6653
-#define PRS_REG_SEARCH_TCP_RT_OFFSET 6654
-#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6655
-#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6656
-#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6657
-#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6658
-#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6659
-#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6660
-#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6661
-#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6662
-#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET 6663
-#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6664
-#define SRC_REG_FIRSTFREE_RT_OFFSET 6665
-#define SRC_REG_FIRSTFREE_RT_SIZE 2
-#define SRC_REG_LASTFREE_RT_OFFSET 6667
-#define SRC_REG_LASTFREE_RT_SIZE 2
-#define SRC_REG_COUNTFREE_RT_OFFSET 6669
-#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6670
-#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6671
-#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6672
-#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673
-#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674
-#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6675
-#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET 6676
-#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6677
-#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6678
-#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6679
-#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6680
-#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6681
-#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6682
-#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6683
-#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6684
-#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6685
-#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6686
-#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6687
-#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6688
-#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6689
-#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6690
-#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6691
-#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6692
-#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6693
-#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6694
-#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6695
-#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6696
-#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6697
-#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6698
-#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6699
-#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6700
-#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6701
-#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6702
-#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6703
-#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6704
-#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE 22000
-#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28704
-#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28705
-#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28706
-#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28707
-#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28708
-#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28709
-#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28710
-#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28711
-#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28712
-#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28713
-#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28714
-#define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416
-#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29130
-#define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512
-#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29642
-#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29643
-#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29644
-#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29645
-#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29646
-#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29647
-#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29648
-#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29649
-#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29650
-#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29651
-#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29652
-#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29653
-#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29654
-#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29655
-#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29656
-#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29657
-#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29658
-#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29659
-#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29660
-#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29661
-#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29662
-#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29663
-#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29664
-#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29665
-#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29666
-#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29667
-#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29668
-#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29669
-#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29670
-#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29671
-#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29672
-#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29673
-#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29674
-#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29675
-#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29676
-#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29677
-#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29678
-#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29679
-#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29680
-#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29681
-#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29682
-#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29683
-#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29684
-#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29685
-#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29686
-#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29687
-#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29688
-#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29689
-#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29690
-#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29691
-#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29692
-#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29693
-#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29694
-#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29695
-#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29696
-#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29697
-#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29698
-#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29699
-#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29700
-#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29701
-#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29702
-#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29703
-#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29704
-#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29705
-#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29706
-#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29707
-#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29708
-#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29709
-#define QM_REG_BASEADDROTHERPQ_RT_SIZE 128
-#define QM_REG_VOQCRDLINE_RT_OFFSET 29837
-#define QM_REG_VOQCRDLINE_RT_SIZE 20
-#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29857
-#define QM_REG_VOQINITCRDLINE_RT_SIZE 20
-#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29877
-#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29878
-#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29879
-#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29880
-#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29881
-#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29882
-#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29883
-#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29884
-#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29885
-#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29886
-#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29887
-#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29888
-#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29889
-#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29890
-#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29891
-#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29892
-#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29893
-#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29894
-#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29895
-#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29896
-#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29897
-#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29898
-#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29899
-#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29900
-#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29901
-#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29902
-#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29903
-#define QM_REG_PQTX2PF_0_RT_OFFSET 29904
-#define QM_REG_PQTX2PF_1_RT_OFFSET 29905
-#define QM_REG_PQTX2PF_2_RT_OFFSET 29906
-#define QM_REG_PQTX2PF_3_RT_OFFSET 29907
-#define QM_REG_PQTX2PF_4_RT_OFFSET 29908
-#define QM_REG_PQTX2PF_5_RT_OFFSET 29909
-#define QM_REG_PQTX2PF_6_RT_OFFSET 29910
-#define QM_REG_PQTX2PF_7_RT_OFFSET 29911
-#define QM_REG_PQTX2PF_8_RT_OFFSET 29912
-#define QM_REG_PQTX2PF_9_RT_OFFSET 29913
-#define QM_REG_PQTX2PF_10_RT_OFFSET 29914
-#define QM_REG_PQTX2PF_11_RT_OFFSET 29915
-#define QM_REG_PQTX2PF_12_RT_OFFSET 29916
-#define QM_REG_PQTX2PF_13_RT_OFFSET 29917
-#define QM_REG_PQTX2PF_14_RT_OFFSET 29918
-#define QM_REG_PQTX2PF_15_RT_OFFSET 29919
-#define QM_REG_PQTX2PF_16_RT_OFFSET 29920
-#define QM_REG_PQTX2PF_17_RT_OFFSET 29921
-#define QM_REG_PQTX2PF_18_RT_OFFSET 29922
-#define QM_REG_PQTX2PF_19_RT_OFFSET 29923
-#define QM_REG_PQTX2PF_20_RT_OFFSET 29924
-#define QM_REG_PQTX2PF_21_RT_OFFSET 29925
-#define QM_REG_PQTX2PF_22_RT_OFFSET 29926
-#define QM_REG_PQTX2PF_23_RT_OFFSET 29927
-#define QM_REG_PQTX2PF_24_RT_OFFSET 29928
-#define QM_REG_PQTX2PF_25_RT_OFFSET 29929
-#define QM_REG_PQTX2PF_26_RT_OFFSET 29930
-#define QM_REG_PQTX2PF_27_RT_OFFSET 29931
-#define QM_REG_PQTX2PF_28_RT_OFFSET 29932
-#define QM_REG_PQTX2PF_29_RT_OFFSET 29933
-#define QM_REG_PQTX2PF_30_RT_OFFSET 29934
-#define QM_REG_PQTX2PF_31_RT_OFFSET 29935
-#define QM_REG_PQTX2PF_32_RT_OFFSET 29936
-#define QM_REG_PQTX2PF_33_RT_OFFSET 29937
-#define QM_REG_PQTX2PF_34_RT_OFFSET 29938
-#define QM_REG_PQTX2PF_35_RT_OFFSET 29939
-#define QM_REG_PQTX2PF_36_RT_OFFSET 29940
-#define QM_REG_PQTX2PF_37_RT_OFFSET 29941
-#define QM_REG_PQTX2PF_38_RT_OFFSET 29942
-#define QM_REG_PQTX2PF_39_RT_OFFSET 29943
-#define QM_REG_PQTX2PF_40_RT_OFFSET 29944
-#define QM_REG_PQTX2PF_41_RT_OFFSET 29945
-#define QM_REG_PQTX2PF_42_RT_OFFSET 29946
-#define QM_REG_PQTX2PF_43_RT_OFFSET 29947
-#define QM_REG_PQTX2PF_44_RT_OFFSET 29948
-#define QM_REG_PQTX2PF_45_RT_OFFSET 29949
-#define QM_REG_PQTX2PF_46_RT_OFFSET 29950
-#define QM_REG_PQTX2PF_47_RT_OFFSET 29951
-#define QM_REG_PQTX2PF_48_RT_OFFSET 29952
-#define QM_REG_PQTX2PF_49_RT_OFFSET 29953
-#define QM_REG_PQTX2PF_50_RT_OFFSET 29954
-#define QM_REG_PQTX2PF_51_RT_OFFSET 29955
-#define QM_REG_PQTX2PF_52_RT_OFFSET 29956
-#define QM_REG_PQTX2PF_53_RT_OFFSET 29957
-#define QM_REG_PQTX2PF_54_RT_OFFSET 29958
-#define QM_REG_PQTX2PF_55_RT_OFFSET 29959
-#define QM_REG_PQTX2PF_56_RT_OFFSET 29960
-#define QM_REG_PQTX2PF_57_RT_OFFSET 29961
-#define QM_REG_PQTX2PF_58_RT_OFFSET 29962
-#define QM_REG_PQTX2PF_59_RT_OFFSET 29963
-#define QM_REG_PQTX2PF_60_RT_OFFSET 29964
-#define QM_REG_PQTX2PF_61_RT_OFFSET 29965
-#define QM_REG_PQTX2PF_62_RT_OFFSET 29966
-#define QM_REG_PQTX2PF_63_RT_OFFSET 29967
-#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29968
-#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29969
-#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29970
-#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29971
-#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29972
-#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29973
-#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29974
-#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29975
-#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29976
-#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29977
-#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29978
-#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29979
-#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29980
-#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29981
-#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29982
-#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29983
-#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29984
-#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29985
-#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29986
-#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29987
-#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29988
-#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29989
-#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29990
-#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29991
-#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29992
-#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29993
-#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29994
-#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29995
-#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29996
-#define QM_REG_RLGLBLINCVAL_RT_SIZE 256
-#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30252
-#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE 256
-#define QM_REG_RLGLBLCRD_RT_OFFSET 30508
-#define QM_REG_RLGLBLCRD_RT_SIZE 256
-#define QM_REG_RLGLBLENABLE_RT_OFFSET 30764
-#define QM_REG_RLPFPERIOD_RT_OFFSET 30765
-#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30766
-#define QM_REG_RLPFINCVAL_RT_OFFSET 30767
-#define QM_REG_RLPFINCVAL_RT_SIZE 16
-#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30783
-#define QM_REG_RLPFUPPERBOUND_RT_SIZE 16
-#define QM_REG_RLPFCRD_RT_OFFSET 30799
-#define QM_REG_RLPFCRD_RT_SIZE 16
-#define QM_REG_RLPFENABLE_RT_OFFSET 30815
-#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30816
-#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30817
-#define QM_REG_WFQPFWEIGHT_RT_SIZE 16
-#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30833
-#define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16
-#define QM_REG_WFQPFCRD_RT_OFFSET 30849
-#define QM_REG_WFQPFCRD_RT_SIZE 160
-#define QM_REG_WFQPFENABLE_RT_OFFSET 31009
-#define QM_REG_WFQVPENABLE_RT_OFFSET 31010
-#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31011
-#define QM_REG_BASEADDRTXPQ_RT_SIZE 512
-#define QM_REG_TXPQMAP_RT_OFFSET 31523
-#define QM_REG_TXPQMAP_RT_SIZE 512
-#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32035
-#define QM_REG_WFQVPWEIGHT_RT_SIZE 512
-#define QM_REG_WFQVPCRD_RT_OFFSET 32547
-#define QM_REG_WFQVPCRD_RT_SIZE 512
-#define QM_REG_WFQVPMAP_RT_OFFSET 33059
-#define QM_REG_WFQVPMAP_RT_SIZE 512
-#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 33571
-#define QM_REG_WFQPFCRD_MSB_RT_SIZE 160
-#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET 33731
-#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 33732
-#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 33733
-#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 33734
-#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 33735
-#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33736
-#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 33737
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 33738
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE 4
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33742
-#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE 4
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 33746
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE 4
-#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 33750
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 33751
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE 32
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 33783
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE 16
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33799
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE 16
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 33815
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE 16
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 33831
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE 16
-#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33847
-#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET 33848
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 33849
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 33850
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 33851
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 33852
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 33853
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 33854
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 33855
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 33856
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 33857
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 33858
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 33859
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 33860
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 33861
-#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET 33862
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33863
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 33864
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 33865
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 33866
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 33867
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 33868
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 33869
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 33870
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 33871
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 33872
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 33873
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 33874
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 33875
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 33876
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 33877
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 33878
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 33879
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 33880
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 33881
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 33882
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 33883
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 33884
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 33885
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 33886
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 33887
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 33888
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 33889
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 33890
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 33891
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 33892
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 33893
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 33894
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33895
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 33896
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 33897
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33898
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 33899
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 33900
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33901
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 33902
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 33903
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33904
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 33905
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 33906
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33907
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 33908
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 33909
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33910
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 33911
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 33912
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33913
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 33914
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 33915
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33916
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 33917
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 33918
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33919
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 33920
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 33921
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33922
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 33923
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET 33924
+#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET	0
+#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET	1
+#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET	2
+#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET	3
+#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET	4
+#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET	5
+#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET	6
+#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET	7
+#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET	8
+#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET	9
+#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET	10
+#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET	11
+#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET	12
+#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET	13
+#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET	14
+#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET	15
+#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET	16
+#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET	17
+#define IGU_REG_PF_CONFIGURATION_RT_OFFSET	18
+#define IGU_REG_VF_CONFIGURATION_RT_OFFSET	19
+#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET	20
+#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET	21
+#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET	22
+#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET	23
+#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET	24
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET	761
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE	736
+#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET	761
+#define CAU_REG_SB_VAR_MEMORY_RT_SIZE	736
+#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET	1497
+#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE	736
+#define CAU_REG_PI_MEMORY_RT_OFFSET	2233
+#define CAU_REG_PI_MEMORY_RT_SIZE	4416
+#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET	6649
+#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET	6650
+#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET	6651
+#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET	6652
+#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET	6653
+#define PRS_REG_SEARCH_TCP_RT_OFFSET	6654
+#define PRS_REG_SEARCH_FCOE_RT_OFFSET	6655
+#define PRS_REG_SEARCH_ROCE_RT_OFFSET	6656
+#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET	6657
+#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET	6658
+#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET	6659
+#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET	6660
+#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET	6661
+#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET	6662
+#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET	6663
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET	6664
+#define SRC_REG_FIRSTFREE_RT_OFFSET	6665
+#define SRC_REG_FIRSTFREE_RT_SIZE	2
+#define SRC_REG_LASTFREE_RT_OFFSET	6667
+#define SRC_REG_LASTFREE_RT_SIZE	2
+#define SRC_REG_COUNTFREE_RT_OFFSET	6669
+#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET	6670
+#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET	6671
+#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET	6672
+#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET	6673
+#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET	6674
+#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET	6675
+#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET	6676
+#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET	6677
+#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET	6678
+#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET	6679
+#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET	6680
+#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET	6681
+#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET	6682
+#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET	6683
+#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET	6684
+#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET	6685
+#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET	6686
+#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET	6687
+#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET	6688
+#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET	6689
+#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET	6690
+#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET	6691
+#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET	6692
+#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET	6693
+#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET	6694
+#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET	6695
+#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET	6696
+#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET	6697
+#define PSWRQ2_REG_VF_BASE_RT_OFFSET	6698
+#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET	6699
+#define PSWRQ2_REG_WR_MBS0_RT_OFFSET	6700
+#define PSWRQ2_REG_RD_MBS0_RT_OFFSET	6701
+#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET	6702
+#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET	6703
+#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET	6704
+#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE	22000
+#define PGLUE_REG_B_VF_BASE_RT_OFFSET	28704
+#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET	28705
+#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET	28706
+#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET	28707
+#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET	28708
+#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET	28709
+#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET	28710
+#define TM_REG_VF_ENABLE_CONN_RT_OFFSET	28711
+#define TM_REG_PF_ENABLE_CONN_RT_OFFSET	28712
+#define TM_REG_PF_ENABLE_TASK_RT_OFFSET	28713
+#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET	28714
+#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET	28715
+#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET	28716
+#define TM_REG_CONFIG_CONN_MEM_RT_SIZE	416
+#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET	29132
+#define TM_REG_CONFIG_TASK_MEM_RT_SIZE	512
+#define QM_REG_MAXPQSIZE_0_RT_OFFSET	29644
+#define QM_REG_MAXPQSIZE_1_RT_OFFSET	29645
+#define QM_REG_MAXPQSIZE_2_RT_OFFSET	29646
+#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET	29647
+#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET	29648
+#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET	29649
+#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET	29650
+#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET	29651
+#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET	29652
+#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET	29653
+#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET	29654
+#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET	29655
+#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET	29656
+#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET	29657
+#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET	29658
+#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET	29659
+#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET	29660
+#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET	29661
+#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET	29662
+#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET	29663
+#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET	29664
+#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET	29665
+#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET	29666
+#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET	29667
+#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET	29668
+#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET	29669
+#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET	29670
+#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET	29671
+#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET	29672
+#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET	29673
+#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET	29674
+#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET	29675
+#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET	29676
+#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET	29677
+#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET	29678
+#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET	29679
+#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET	29680
+#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET	29681
+#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET	29682
+#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET	29683
+#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET	29684
+#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET	29685
+#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET	29686
+#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET	29687
+#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET	29688
+#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET	29689
+#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET	29690
+#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET	29691
+#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET	29692
+#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET	29693
+#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET	29694
+#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET	29695
+#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET	29696
+#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET	29697
+#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET	29698
+#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET	29699
+#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET	29700
+#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET	29701
+#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET	29702
+#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET	29703
+#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET	29704
+#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET	29705
+#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET	29706
+#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET	29707
+#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET	29708
+#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET	29709
+#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET	29710
+#define QM_REG_BASEADDROTHERPQ_RT_OFFSET	29711
+#define QM_REG_BASEADDROTHERPQ_RT_SIZE	128
+#define QM_REG_VOQCRDLINE_RT_OFFSET	29839
+#define QM_REG_VOQCRDLINE_RT_SIZE	20
+#define QM_REG_VOQINITCRDLINE_RT_OFFSET	29859
+#define QM_REG_VOQINITCRDLINE_RT_SIZE	20
+#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET	29879
+#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET	29880
+#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET	29881
+#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET	29882
+#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET	29883
+#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET	29884
+#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET	29885
+#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET	29886
+#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET	29887
+#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET	29888
+#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET	29889
+#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET	29890
+#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET	29891
+#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET	29892
+#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET	29893
+#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET	29894
+#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET	29895
+#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET	29896
+#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET	29897
+#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET	29898
+#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET	29899
+#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET	29900
+#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET	29901
+#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET	29902
+#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET	29903
+#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET	29904
+#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET	29905
+#define QM_REG_PQTX2PF_0_RT_OFFSET	29906
+#define QM_REG_PQTX2PF_1_RT_OFFSET	29907
+#define QM_REG_PQTX2PF_2_RT_OFFSET	29908
+#define QM_REG_PQTX2PF_3_RT_OFFSET	29909
+#define QM_REG_PQTX2PF_4_RT_OFFSET	29910
+#define QM_REG_PQTX2PF_5_RT_OFFSET	29911
+#define QM_REG_PQTX2PF_6_RT_OFFSET	29912
+#define QM_REG_PQTX2PF_7_RT_OFFSET	29913
+#define QM_REG_PQTX2PF_8_RT_OFFSET	29914
+#define QM_REG_PQTX2PF_9_RT_OFFSET	29915
+#define QM_REG_PQTX2PF_10_RT_OFFSET	29916
+#define QM_REG_PQTX2PF_11_RT_OFFSET	29917
+#define QM_REG_PQTX2PF_12_RT_OFFSET	29918
+#define QM_REG_PQTX2PF_13_RT_OFFSET	29919
+#define QM_REG_PQTX2PF_14_RT_OFFSET	29920
+#define QM_REG_PQTX2PF_15_RT_OFFSET	29921
+#define QM_REG_PQTX2PF_16_RT_OFFSET	29922
+#define QM_REG_PQTX2PF_17_RT_OFFSET	29923
+#define QM_REG_PQTX2PF_18_RT_OFFSET	29924
+#define QM_REG_PQTX2PF_19_RT_OFFSET	29925
+#define QM_REG_PQTX2PF_20_RT_OFFSET	29926
+#define QM_REG_PQTX2PF_21_RT_OFFSET	29927
+#define QM_REG_PQTX2PF_22_RT_OFFSET	29928
+#define QM_REG_PQTX2PF_23_RT_OFFSET	29929
+#define QM_REG_PQTX2PF_24_RT_OFFSET	29930
+#define QM_REG_PQTX2PF_25_RT_OFFSET	29931
+#define QM_REG_PQTX2PF_26_RT_OFFSET	29932
+#define QM_REG_PQTX2PF_27_RT_OFFSET	29933
+#define QM_REG_PQTX2PF_28_RT_OFFSET	29934
+#define QM_REG_PQTX2PF_29_RT_OFFSET	29935
+#define QM_REG_PQTX2PF_30_RT_OFFSET	29936
+#define QM_REG_PQTX2PF_31_RT_OFFSET	29937
+#define QM_REG_PQTX2PF_32_RT_OFFSET	29938
+#define QM_REG_PQTX2PF_33_RT_OFFSET	29939
+#define QM_REG_PQTX2PF_34_RT_OFFSET	29940
+#define QM_REG_PQTX2PF_35_RT_OFFSET	29941
+#define QM_REG_PQTX2PF_36_RT_OFFSET	29942
+#define QM_REG_PQTX2PF_37_RT_OFFSET	29943
+#define QM_REG_PQTX2PF_38_RT_OFFSET	29944
+#define QM_REG_PQTX2PF_39_RT_OFFSET	29945
+#define QM_REG_PQTX2PF_40_RT_OFFSET	29946
+#define QM_REG_PQTX2PF_41_RT_OFFSET	29947
+#define QM_REG_PQTX2PF_42_RT_OFFSET	29948
+#define QM_REG_PQTX2PF_43_RT_OFFSET	29949
+#define QM_REG_PQTX2PF_44_RT_OFFSET	29950
+#define QM_REG_PQTX2PF_45_RT_OFFSET	29951
+#define QM_REG_PQTX2PF_46_RT_OFFSET	29952
+#define QM_REG_PQTX2PF_47_RT_OFFSET	29953
+#define QM_REG_PQTX2PF_48_RT_OFFSET	29954
+#define QM_REG_PQTX2PF_49_RT_OFFSET	29955
+#define QM_REG_PQTX2PF_50_RT_OFFSET	29956
+#define QM_REG_PQTX2PF_51_RT_OFFSET	29957
+#define QM_REG_PQTX2PF_52_RT_OFFSET	29958
+#define QM_REG_PQTX2PF_53_RT_OFFSET	29959
+#define QM_REG_PQTX2PF_54_RT_OFFSET	29960
+#define QM_REG_PQTX2PF_55_RT_OFFSET	29961
+#define QM_REG_PQTX2PF_56_RT_OFFSET	29962
+#define QM_REG_PQTX2PF_57_RT_OFFSET	29963
+#define QM_REG_PQTX2PF_58_RT_OFFSET	29964
+#define QM_REG_PQTX2PF_59_RT_OFFSET	29965
+#define QM_REG_PQTX2PF_60_RT_OFFSET	29966
+#define QM_REG_PQTX2PF_61_RT_OFFSET	29967
+#define QM_REG_PQTX2PF_62_RT_OFFSET	29968
+#define QM_REG_PQTX2PF_63_RT_OFFSET	29969
+#define QM_REG_PQOTHER2PF_0_RT_OFFSET	29970
+#define QM_REG_PQOTHER2PF_1_RT_OFFSET	29971
+#define QM_REG_PQOTHER2PF_2_RT_OFFSET	29972
+#define QM_REG_PQOTHER2PF_3_RT_OFFSET	29973
+#define QM_REG_PQOTHER2PF_4_RT_OFFSET	29974
+#define QM_REG_PQOTHER2PF_5_RT_OFFSET	29975
+#define QM_REG_PQOTHER2PF_6_RT_OFFSET	29976
+#define QM_REG_PQOTHER2PF_7_RT_OFFSET	29977
+#define QM_REG_PQOTHER2PF_8_RT_OFFSET	29978
+#define QM_REG_PQOTHER2PF_9_RT_OFFSET	29979
+#define QM_REG_PQOTHER2PF_10_RT_OFFSET	29980
+#define QM_REG_PQOTHER2PF_11_RT_OFFSET	29981
+#define QM_REG_PQOTHER2PF_12_RT_OFFSET	29982
+#define QM_REG_PQOTHER2PF_13_RT_OFFSET	29983
+#define QM_REG_PQOTHER2PF_14_RT_OFFSET	29984
+#define QM_REG_PQOTHER2PF_15_RT_OFFSET	29985
+#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET	29986
+#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET	29987
+#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET	29988
+#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET	29989
+#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET	29990
+#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET	29991
+#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET	29992
+#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET	29993
+#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET	29994
+#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET	29995
+#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET	29996
+#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET	29997
+#define QM_REG_RLGLBLINCVAL_RT_OFFSET	29998
+#define QM_REG_RLGLBLINCVAL_RT_SIZE	256
+#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET	30254
+#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE	256
+#define QM_REG_RLGLBLCRD_RT_OFFSET	30510
+#define QM_REG_RLGLBLCRD_RT_SIZE	256
+#define QM_REG_RLGLBLENABLE_RT_OFFSET	30766
+#define QM_REG_RLPFPERIOD_RT_OFFSET	30767
+#define QM_REG_RLPFPERIODTIMER_RT_OFFSET	30768
+#define QM_REG_RLPFINCVAL_RT_OFFSET	30769
+#define QM_REG_RLPFINCVAL_RT_SIZE	16
+#define QM_REG_RLPFUPPERBOUND_RT_OFFSET	30785
+#define QM_REG_RLPFUPPERBOUND_RT_SIZE	16
+#define QM_REG_RLPFCRD_RT_OFFSET	30801
+#define QM_REG_RLPFCRD_RT_SIZE	16
+#define QM_REG_RLPFENABLE_RT_OFFSET	30817
+#define QM_REG_RLPFVOQENABLE_RT_OFFSET	30818
+#define QM_REG_WFQPFWEIGHT_RT_OFFSET	30819
+#define QM_REG_WFQPFWEIGHT_RT_SIZE	16
+#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET	30835
+#define QM_REG_WFQPFUPPERBOUND_RT_SIZE	16
+#define QM_REG_WFQPFCRD_RT_OFFSET	30851
+#define QM_REG_WFQPFCRD_RT_SIZE	160
+#define QM_REG_WFQPFENABLE_RT_OFFSET	31011
+#define QM_REG_WFQVPENABLE_RT_OFFSET	31012
+#define QM_REG_BASEADDRTXPQ_RT_OFFSET	31013
+#define QM_REG_BASEADDRTXPQ_RT_SIZE	512
+#define QM_REG_TXPQMAP_RT_OFFSET	31525
+#define QM_REG_TXPQMAP_RT_SIZE	512
+#define QM_REG_WFQVPWEIGHT_RT_OFFSET	32037
+#define QM_REG_WFQVPWEIGHT_RT_SIZE	512
+#define QM_REG_WFQVPCRD_RT_OFFSET	32549
+#define QM_REG_WFQVPCRD_RT_SIZE	512
+#define QM_REG_WFQVPMAP_RT_OFFSET	33061
+#define QM_REG_WFQVPMAP_RT_SIZE	512
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET	33573
+#define QM_REG_WFQPFCRD_MSB_RT_SIZE	160
+#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET	33733
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET	33734
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET	33735
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET	33736
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET	33737
+#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET	33738
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET	33739
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET	33740
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE	4
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET	33744
+#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE	4
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET	33748
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE	4
+#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET	33752
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET	33753
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE	32
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET	33785
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE	16
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET	33801
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE	16
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET	33817
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE	16
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET	33833
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE	16
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET	33849
+#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET	33850
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET	33851
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET	33852
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET	33853
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET	33854
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET	33855
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET	33856
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET	33857
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET	33858
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET	33859
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET	33860
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET	33861
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET	33862
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET	33863
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET	33864
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET	33865
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET	33866
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET	33867
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET	33868
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET	33869
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET	33870
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET	33871
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET	33872
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET	33873
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET	33874
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET	33875
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET	33876
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET	33877
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET	33878
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET	33879
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET	33880
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET	33881
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET	33882
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET	33883
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET	33884
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET	33885
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET	33886
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET	33887
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET	33888
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET	33889
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET	33890
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET	33891
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET	33892
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET	33893
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET	33894
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET	33895
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET	33896
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET	33897
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET	33898
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET	33899
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET	33900
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET	33901
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET	33902
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET	33903
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET	33904
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET	33905
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET	33906
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET	33907
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET	33908
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET	33909
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET	33910
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET	33911
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET	33912
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET	33913
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET	33914
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET	33915
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET	33916
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET	33917
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET	33918
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET	33919
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET	33920
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET	33921
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET	33922
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET	33923
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET	33924
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET	33925
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET	33926
 
-#define RUNTIME_ARRAY_SIZE 33925
+#define RUNTIME_ARRAY_SIZE 33927
 
 /* The eth storm context for the Tstorm */
 struct tstorm_eth_conn_st_ctx {
@@ -3201,7 +4469,31 @@
 	struct mstorm_eth_conn_st_ctx mstorm_st_context;
 };
 
-/* opcodes for the event ring */
+enum eth_error_code {
+	ETH_OK = 0x00,
+	ETH_FILTERS_MAC_ADD_FAIL_FULL,
+	ETH_FILTERS_MAC_ADD_FAIL_FULL_MTT2,
+	ETH_FILTERS_MAC_ADD_FAIL_DUP_MTT2,
+	ETH_FILTERS_MAC_ADD_FAIL_DUP_STT2,
+	ETH_FILTERS_MAC_DEL_FAIL_NOF,
+	ETH_FILTERS_MAC_DEL_FAIL_NOF_MTT2,
+	ETH_FILTERS_MAC_DEL_FAIL_NOF_STT2,
+	ETH_FILTERS_MAC_ADD_FAIL_ZERO_MAC,
+	ETH_FILTERS_VLAN_ADD_FAIL_FULL,
+	ETH_FILTERS_VLAN_ADD_FAIL_DUP,
+	ETH_FILTERS_VLAN_DEL_FAIL_NOF,
+	ETH_FILTERS_VLAN_DEL_FAIL_NOF_TT1,
+	ETH_FILTERS_PAIR_ADD_FAIL_DUP,
+	ETH_FILTERS_PAIR_ADD_FAIL_FULL,
+	ETH_FILTERS_PAIR_ADD_FAIL_FULL_MAC,
+	ETH_FILTERS_PAIR_DEL_FAIL_NOF,
+	ETH_FILTERS_PAIR_DEL_FAIL_NOF_TT1,
+	ETH_FILTERS_PAIR_ADD_FAIL_ZERO_MAC,
+	ETH_FILTERS_VNI_ADD_FAIL_FULL,
+	ETH_FILTERS_VNI_ADD_FAIL_DUP,
+	MAX_ETH_ERROR_CODE
+};
+
 enum eth_event_opcode {
 	ETH_EVENT_UNUSED,
 	ETH_EVENT_VPORT_START,
@@ -3269,7 +4561,13 @@
 	MAX_ETH_FILTER_TYPE
 };
 
-/* Ethernet Ramrod Command IDs */
+enum eth_ipv4_frag_type {
+	ETH_IPV4_NOT_FRAG,
+	ETH_IPV4_FIRST_FRAG,
+	ETH_IPV4_NON_FIRST_FRAG,
+	MAX_ETH_IPV4_FRAG_TYPE
+};
+
 enum eth_ramrod_cmd_id {
 	ETH_RAMROD_UNUSED,
 	ETH_RAMROD_VPORT_START,
@@ -3451,8 +4749,8 @@
 	u8 toggle_val;
 
 	u8 vf_rx_prod_index;
-
-	u8 reserved[6];
+	u8 vf_rx_prod_use_zone_a;
+	u8 reserved[5];
 	__le16 reserved1;
 	struct regpair cqe_pbl_addr;
 	struct regpair bd_base;
@@ -3526,10 +4824,11 @@
 	__le16 pxp_st_index;
 	__le16 comp_agg_size;
 	__le16 queue_zone_id;
-	__le16 test_dup_count;
+	__le16 reserved2;
 	__le16 pbl_size;
 	__le16 tx_queue_id;
-
+	__le16 same_as_last_id;
+	__le16 reserved[3];
 	struct regpair pbl_base_addr;
 	struct regpair bd_cons_address;
 };
@@ -4926,8 +6225,8 @@
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG_SHIFT             5
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_MASK  0x1
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_SHIFT 6
-#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_MASK            0x1
-#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_SHIFT           7
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN_MASK	0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN_SHIFT	7
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_MASK                  0x7
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_SHIFT                 8
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_MASK    0x1F
@@ -4988,6 +6287,10 @@
 	MAX_ROCE_EVENT_OPCODE
 };
 
+struct roce_init_func_ramrod_data {
+	struct rdma_init_func_ramrod_data rdma;
+};
+
 struct roce_modify_qp_req_ramrod_data {
 	__le16 flags;
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG_MASK      0x1
@@ -6639,6 +7942,35 @@
 	__le32 reg2;
 	__le32 reg3;
 };
+
+#define MFW_TRACE_SIGNATURE     0x25071946
+
+/* The trace in the buffer */
+#define MFW_TRACE_EVENTID_MASK          0x00ffff
+#define MFW_TRACE_PRM_SIZE_MASK         0x0f0000
+#define MFW_TRACE_PRM_SIZE_SHIFT        16
+#define MFW_TRACE_ENTRY_SIZE            3
+
+struct mcp_trace {
+	u32 signature;		/* Help to identify that the trace is valid */
+	u32 size;		/* the size of the trace buffer in bytes */
+	u32 curr_level;		/* 2 - all will be written to the buffer
+				 * 1 - debug trace will not be written
+				 * 0 - just errors will be written to the buffer
+				 */
+	u32 modules_mask[2];	/* a bit per module, 1 means write it, 0 means
+				 * mask it.
+				 */
+
+	/* Warning: the following pointers are assumed to be 32bits as they are
+	 * used only in the MFW.
+	 */
+	u32 trace_prod; /* The next trace will be written to this offset */
+	u32 trace_oldest; /* The oldest valid trace starts at this offset
+			   * (usually very close after the current producer).
+			   */
+};
+
 #define VF_MAX_STATIC 192
 
 #define MCP_GLOB_PATH_MAX	2
@@ -6646,6 +7978,7 @@
 #define MCP_GLOB_PORT_MAX	4
 #define MCP_GLOB_FUNC_MAX	16
 
+typedef u32 offsize_t;		/* In DWORDS !!! */
 /* Offset from the beginning of the MCP scratchpad */
 #define OFFSIZE_OFFSET_SHIFT	0
 #define OFFSIZE_OFFSET_MASK	0x0000ffff
@@ -6850,6 +8183,14 @@
 #define DCBX_APP_SF_SHIFT		8
 #define DCBX_APP_SF_ETHTYPE		0
 #define DCBX_APP_SF_PORT		1
+#define DCBX_APP_SF_IEEE_MASK		0x0000f000
+#define DCBX_APP_SF_IEEE_SHIFT		12
+#define DCBX_APP_SF_IEEE_RESERVED	0
+#define DCBX_APP_SF_IEEE_ETHTYPE	1
+#define DCBX_APP_SF_IEEE_TCP_PORT	2
+#define DCBX_APP_SF_IEEE_UDP_PORT	3
+#define DCBX_APP_SF_IEEE_TCP_UDP_PORT	4
+
 #define DCBX_APP_PROTOCOL_ID_MASK	0xffff0000
 #define DCBX_APP_PROTOCOL_ID_SHIFT	16
 };
@@ -7228,8 +8569,19 @@
 #define DRV_MSG_CODE_NIG_DRAIN			0x30000000
 #define DRV_MSG_CODE_VF_DISABLED_DONE		0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX		0xc0010000
+#define DRV_MSG_CODE_NVM_GET_FILE_ATT		0x00030000
+#define DRV_MSG_CODE_NVM_READ_NVRAM		0x00050000
 #define DRV_MSG_CODE_MCP_RESET			0x00090000
 #define DRV_MSG_CODE_SET_VERSION		0x000f0000
+#define DRV_MSG_CODE_MCP_HALT                   0x00100000
+
+#define DRV_MSG_CODE_GET_STATS                  0x00130000
+#define DRV_MSG_CODE_STATS_TYPE_LAN             1
+#define DRV_MSG_CODE_STATS_TYPE_FCOE            2
+#define DRV_MSG_CODE_STATS_TYPE_ISCSI           3
+#define DRV_MSG_CODE_STATS_TYPE_RDMA            4
+
+#define DRV_MSG_CODE_MASK_PARITIES              0x001a0000
 
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE		0x00200000
@@ -7240,6 +8592,9 @@
 #define DRV_MB_PARAM_UNLOAD_WOL_MCP		0x00000001
 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK		0x000000FF
 #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT		3
+
+#define DRV_MB_PARAM_NVM_LEN_SHIFT		24
+
 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT	0
 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK	0x000000FF
 #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT	8
@@ -7277,6 +8632,8 @@
 #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION		0x20130000
 #define FW_MSG_CODE_DRV_UNLOAD_DONE		0x21100000
 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE	0xb0010000
+
+#define FW_MSG_CODE_NVM_OK			0x00010000
 #define FW_MSG_CODE_OK				0x00160000
 
 #define FW_MSG_SEQ_NUMBER_MASK			0x0000ffff
@@ -7307,10 +8664,10 @@
 	MFW_DRV_MSG_RESERVED4,
 	MFW_DRV_MSG_BW_UPDATE,
 	MFW_DRV_MSG_BW_UPDATE5,
-	MFW_DRV_MSG_BW_UPDATE6,
-	MFW_DRV_MSG_BW_UPDATE7,
-	MFW_DRV_MSG_BW_UPDATE8,
-	MFW_DRV_MSG_BW_UPDATE9,
+	MFW_DRV_MSG_GET_LAN_STATS,
+	MFW_DRV_MSG_GET_FCOE_STATS,
+	MFW_DRV_MSG_GET_ISCSI_STATS,
+	MFW_DRV_MSG_GET_RDMA_STATS,
 	MFW_DRV_MSG_BW_UPDATE10,
 	MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
 	MFW_DRV_MSG_BW_UPDATE11,
@@ -7513,4 +8870,101 @@
 	struct nvm_cfg1_port port[MCP_GLOB_PORT_MAX];
 	struct nvm_cfg1_func func[MCP_GLOB_FUNC_MAX];
 };
+
+enum spad_sections {
+	SPAD_SECTION_TRACE,
+	SPAD_SECTION_NVM_CFG,
+	SPAD_SECTION_PUBLIC,
+	SPAD_SECTION_PRIVATE,
+	SPAD_SECTION_MAX
+};
+
+#define MCP_TRACE_SIZE          2048	/* 2kb */
+
+/* This section is located at a fixed location in the beginning of the
+ * scratchpad, to ensure that the MCP trace is not run over during MFW upgrade.
+ * All the rest of data has a floating location which differs from version to
+ * version, and is pointed by the mcp_meta_data below.
+ * Moreover, the spad_layout section is part of the MFW firmware, and is loaded
+ * with it from nvram in order to clear this portion.
+ */
+struct static_init {
+	u32 num_sections;
+	offsize_t sections[SPAD_SECTION_MAX];
+#define SECTION(_sec_) (*((offsize_t *)(STRUCT_OFFSET(sections[_sec_]))))
+
+	struct mcp_trace trace;
+#define MCP_TRACE_P ((struct mcp_trace *)(STRUCT_OFFSET(trace)))
+	u8 trace_buffer[MCP_TRACE_SIZE];
+#define MCP_TRACE_BUF ((u8 *)(STRUCT_OFFSET(trace_buffer)))
+	/* running_mfw has the same definition as in nvm_map.h.
+	 * This bit indicate both the running dir, and the running bundle.
+	 * It is set once when the LIM is loaded.
+	 */
+	u32 running_mfw;
+#define RUNNING_MFW (*((u32 *)(STRUCT_OFFSET(running_mfw))))
+	u32 build_time;
+#define MFW_BUILD_TIME (*((u32 *)(STRUCT_OFFSET(build_time))))
+	u32 reset_type;
+#define RESET_TYPE (*((u32 *)(STRUCT_OFFSET(reset_type))))
+	u32 mfw_secure_mode;
+#define MFW_SECURE_MODE (*((u32 *)(STRUCT_OFFSET(mfw_secure_mode))))
+	u16 pme_status_pf_bitmap;
+#define PME_STATUS_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_status_pf_bitmap))))
+	u16 pme_enable_pf_bitmap;
+#define PME_ENABLE_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_enable_pf_bitmap))))
+	u32 mim_nvm_addr;
+	u32 mim_start_addr;
+	u32 ah_pcie_link_params;
+#define AH_PCIE_LINK_PARAMS_LINK_SPEED_MASK     (0x000000ff)
+#define AH_PCIE_LINK_PARAMS_LINK_SPEED_SHIFT    (0)
+#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_MASK     (0x0000ff00)
+#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_SHIFT    (8)
+#define AH_PCIE_LINK_PARAMS_ASPM_MODE_MASK      (0x00ff0000)
+#define AH_PCIE_LINK_PARAMS_ASPM_MODE_SHIFT     (16)
+#define AH_PCIE_LINK_PARAMS_ASPM_CAP_MASK       (0xff000000)
+#define AH_PCIE_LINK_PARAMS_ASPM_CAP_SHIFT      (24)
+#define AH_PCIE_LINK_PARAMS (*((u32 *)(STRUCT_OFFSET(ah_pcie_link_params))))
+
+	u32 rsrv_persist[5];	/* Persist reserved for MFW upgrades */
+};
+
+enum nvm_image_type {
+	NVM_TYPE_TIM1 = 0x01,
+	NVM_TYPE_TIM2 = 0x02,
+	NVM_TYPE_MIM1 = 0x03,
+	NVM_TYPE_MIM2 = 0x04,
+	NVM_TYPE_MBA = 0x05,
+	NVM_TYPE_MODULES_PN = 0x06,
+	NVM_TYPE_VPD = 0x07,
+	NVM_TYPE_MFW_TRACE1 = 0x08,
+	NVM_TYPE_MFW_TRACE2 = 0x09,
+	NVM_TYPE_NVM_CFG1 = 0x0a,
+	NVM_TYPE_L2B = 0x0b,
+	NVM_TYPE_DIR1 = 0x0c,
+	NVM_TYPE_EAGLE_FW1 = 0x0d,
+	NVM_TYPE_FALCON_FW1 = 0x0e,
+	NVM_TYPE_PCIE_FW1 = 0x0f,
+	NVM_TYPE_HW_SET = 0x10,
+	NVM_TYPE_LIM = 0x11,
+	NVM_TYPE_AVS_FW1 = 0x12,
+	NVM_TYPE_DIR2 = 0x13,
+	NVM_TYPE_CCM = 0x14,
+	NVM_TYPE_EAGLE_FW2 = 0x15,
+	NVM_TYPE_FALCON_FW2 = 0x16,
+	NVM_TYPE_PCIE_FW2 = 0x17,
+	NVM_TYPE_AVS_FW2 = 0x18,
+	NVM_TYPE_INIT_HW = 0x19,
+	NVM_TYPE_DEFAULT_CFG = 0x1a,
+	NVM_TYPE_MDUMP = 0x1b,
+	NVM_TYPE_META = 0x1c,
+	NVM_TYPE_ISCSI_CFG = 0x1d,
+	NVM_TYPE_FCOE_CFG = 0x1f,
+	NVM_TYPE_ETH_PHY_FW1 = 0x20,
+	NVM_TYPE_ETH_PHY_FW2 = 0x21,
+	NVM_TYPE_MAX,
+};
+
+#define DIR_ID_1    (0)
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index e178853..6e4fae9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -44,8 +44,7 @@
 
 int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn)
 {
-	struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool),
-					      GFP_KERNEL);
+	struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool), GFP_KERNEL);
 	int i;
 
 	if (!p_pool)
@@ -113,16 +112,14 @@
 	return NULL;
 }
 
-void qed_ptt_release(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt)
+void qed_ptt_release(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
 	list_add(&p_ptt->list_entry, &p_hwfn->p_ptt_pool->free_list);
 	spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
 }
 
-u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn,
-			struct qed_ptt *p_ptt)
+u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	/* The HW is using DWORDS and we need to translate it to Bytes */
 	return le32_to_cpu(p_ptt->pxp.offset) << 2;
@@ -141,8 +138,7 @@
 }
 
 void qed_ptt_set_win(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     u32 new_hw_addr)
+		     struct qed_ptt *p_ptt, u32 new_hw_addr)
 {
 	u32 prev_hw_addr;
 
@@ -166,8 +162,7 @@
 }
 
 static u32 qed_set_ptt(struct qed_hwfn *p_hwfn,
-		       struct qed_ptt *p_ptt,
-		       u32 hw_addr)
+		       struct qed_ptt *p_ptt, u32 hw_addr)
 {
 	u32 win_hw_addr = qed_ptt_get_hw_addr(p_hwfn, p_ptt);
 	u32 offset;
@@ -224,10 +219,7 @@
 
 static void qed_memcpy_hw(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt,
-			  void *addr,
-			  u32 hw_addr,
-			  size_t n,
-			  bool to_device)
+			  void *addr, u32 hw_addr, size_t n, bool to_device)
 {
 	u32 dw_count, *host_addr, hw_offset;
 	size_t quota, done = 0;
@@ -259,8 +251,7 @@
 }
 
 void qed_memcpy_from(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     void *dest, u32 hw_addr, size_t n)
+		     struct qed_ptt *p_ptt, void *dest, u32 hw_addr, size_t n)
 {
 	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
 		   "hw_addr 0x%x, dest %p hw_addr 0x%x, size %lu\n",
@@ -270,8 +261,7 @@
 }
 
 void qed_memcpy_to(struct qed_hwfn *p_hwfn,
-		   struct qed_ptt *p_ptt,
-		   u32 hw_addr, void *src, size_t n)
+		   struct qed_ptt *p_ptt, u32 hw_addr, void *src, size_t n)
 {
 	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
 		   "hw_addr 0x%x, hw_addr 0x%x, src %p size %lu\n",
@@ -280,9 +270,7 @@
 	qed_memcpy_hw(p_hwfn, p_ptt, src, hw_addr, n, true);
 }
 
-void qed_fid_pretend(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     u16 fid)
+void qed_fid_pretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 fid)
 {
 	u16 control = 0;
 
@@ -309,8 +297,7 @@
 }
 
 void qed_port_pretend(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt,
-		      u8 port_id)
+		      struct qed_ptt *p_ptt, u8 port_id)
 {
 	u16 control = 0;
 
@@ -326,8 +313,7 @@
 	       *(u32 *)&p_ptt->pxp.pretend);
 }
 
-void qed_port_unpretend(struct qed_hwfn *p_hwfn,
-			struct qed_ptt *p_ptt)
+void qed_port_unpretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u16 control = 0;
 
@@ -429,28 +415,27 @@
 	return DMAE_REG_GO_C0 + (idx << 2);
 }
 
-static int
-qed_dmae_post_command(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt)
+static int qed_dmae_post_command(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt)
 {
-	struct dmae_cmd *command = p_hwfn->dmae_info.p_dmae_cmd;
+	struct dmae_cmd *p_command = p_hwfn->dmae_info.p_dmae_cmd;
 	u8 idx_cmd = p_hwfn->dmae_info.channel, i;
 	int qed_status = 0;
 
 	/* verify address is not NULL */
-	if ((((command->dst_addr_lo == 0) && (command->dst_addr_hi == 0)) ||
-	     ((command->src_addr_lo == 0) && (command->src_addr_hi == 0)))) {
+	if ((((!p_command->dst_addr_lo) && (!p_command->dst_addr_hi)) ||
+	     ((!p_command->src_addr_lo) && (!p_command->src_addr_hi)))) {
 		DP_NOTICE(p_hwfn,
 			  "source or destination address 0 idx_cmd=%d\n"
 			  "opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n",
-			   idx_cmd,
-			   le32_to_cpu(command->opcode),
-			   le16_to_cpu(command->opcode_b),
-			   le16_to_cpu(command->length_dw),
-			   le32_to_cpu(command->src_addr_hi),
-			   le32_to_cpu(command->src_addr_lo),
-			   le32_to_cpu(command->dst_addr_hi),
-			   le32_to_cpu(command->dst_addr_lo));
+			  idx_cmd,
+			  le32_to_cpu(p_command->opcode),
+			  le16_to_cpu(p_command->opcode_b),
+			  le16_to_cpu(p_command->length_dw),
+			  le32_to_cpu(p_command->src_addr_hi),
+			  le32_to_cpu(p_command->src_addr_lo),
+			  le32_to_cpu(p_command->dst_addr_hi),
+			  le32_to_cpu(p_command->dst_addr_lo));
 
 		return -EINVAL;
 	}
@@ -459,13 +444,13 @@
 		   NETIF_MSG_HW,
 		   "Posting DMAE command [idx %d]: opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n",
 		   idx_cmd,
-		   le32_to_cpu(command->opcode),
-		   le16_to_cpu(command->opcode_b),
-		   le16_to_cpu(command->length_dw),
-		   le32_to_cpu(command->src_addr_hi),
-		   le32_to_cpu(command->src_addr_lo),
-		   le32_to_cpu(command->dst_addr_hi),
-		   le32_to_cpu(command->dst_addr_lo));
+		   le32_to_cpu(p_command->opcode),
+		   le16_to_cpu(p_command->opcode_b),
+		   le16_to_cpu(p_command->length_dw),
+		   le32_to_cpu(p_command->src_addr_hi),
+		   le32_to_cpu(p_command->src_addr_lo),
+		   le32_to_cpu(p_command->dst_addr_hi),
+		   le32_to_cpu(p_command->dst_addr_lo));
 
 	/* Copy the command to DMAE - need to do it before every call
 	 * for source/dest address no reset.
@@ -475,7 +460,7 @@
 	 */
 	for (i = 0; i < DMAE_CMD_SIZE; i++) {
 		u32 data = (i < DMAE_CMD_SIZE_TO_FILL) ?
-			   *(((u32 *)command) + i) : 0;
+			   *(((u32 *)p_command) + i) : 0;
 
 		qed_wr(p_hwfn, p_ptt,
 		       DMAE_REG_CMD_MEM +
@@ -483,9 +468,7 @@
 		       (i * sizeof(u32)), data);
 	}
 
-	qed_wr(p_hwfn, p_ptt,
-	       qed_dmae_idx_to_go_cmd(idx_cmd),
-	       DMAE_GO_VALUE);
+	qed_wr(p_hwfn, p_ptt, qed_dmae_idx_to_go_cmd(idx_cmd), DMAE_GO_VALUE);
 
 	return qed_status;
 }
@@ -498,31 +481,23 @@
 	u32 **p_comp = &p_hwfn->dmae_info.p_completion_word;
 
 	*p_comp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-				     sizeof(u32),
-				     p_addr,
-				     GFP_KERNEL);
-	if (!*p_comp) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `p_completion_word'\n");
+				     sizeof(u32), p_addr, GFP_KERNEL);
+	if (!*p_comp)
 		goto err;
-	}
 
 	p_addr = &p_hwfn->dmae_info.dmae_cmd_phys_addr;
 	*p_cmd = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
 				    sizeof(struct dmae_cmd),
 				    p_addr, GFP_KERNEL);
-	if (!*p_cmd) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `struct dmae_cmd'\n");
+	if (!*p_cmd)
 		goto err;
-	}
 
 	p_addr = &p_hwfn->dmae_info.intermediate_buffer_phys_addr;
 	*p_buff = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
 				     sizeof(u32) * DMAE_MAX_RW_SIZE,
 				     p_addr, GFP_KERNEL);
-	if (!*p_buff) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `intermediate_buffer'\n");
+	if (!*p_buff)
 		goto err;
-	}
 
 	p_hwfn->dmae_info.channel = p_hwfn->rel_pf_id;
 
@@ -543,8 +518,7 @@
 		p_phys = p_hwfn->dmae_info.completion_word_phys_addr;
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
 				  sizeof(u32),
-				  p_hwfn->dmae_info.p_completion_word,
-				  p_phys);
+				  p_hwfn->dmae_info.p_completion_word, p_phys);
 		p_hwfn->dmae_info.p_completion_word = NULL;
 	}
 
@@ -552,8 +526,7 @@
 		p_phys = p_hwfn->dmae_info.dmae_cmd_phys_addr;
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
 				  sizeof(struct dmae_cmd),
-				  p_hwfn->dmae_info.p_dmae_cmd,
-				  p_phys);
+				  p_hwfn->dmae_info.p_dmae_cmd, p_phys);
 		p_hwfn->dmae_info.p_dmae_cmd = NULL;
 	}
 
@@ -571,9 +544,7 @@
 
 static int qed_dmae_operation_wait(struct qed_hwfn *p_hwfn)
 {
-	u32 wait_cnt = 0;
-	u32 wait_cnt_limit = 10000;
-
+	u32 wait_cnt_limit = 10000, wait_cnt = 0;
 	int qed_status = 0;
 
 	barrier();
@@ -606,7 +577,7 @@
 					  u64 dst_addr,
 					  u8 src_type,
 					  u8 dst_type,
-					  u32 length)
+					  u32 length_dw)
 {
 	dma_addr_t phys = p_hwfn->dmae_info.intermediate_buffer_phys_addr;
 	struct dmae_cmd *cmd = p_hwfn->dmae_info.p_dmae_cmd;
@@ -624,7 +595,7 @@
 		cmd->src_addr_lo = cpu_to_le32(lower_32_bits(phys));
 		memcpy(&p_hwfn->dmae_info.p_intermediate_buffer[0],
 		       (void *)(uintptr_t)src_addr,
-		       length * sizeof(u32));
+		       length_dw * sizeof(u32));
 		break;
 	default:
 		return -EINVAL;
@@ -645,7 +616,7 @@
 		return -EINVAL;
 	}
 
-	cmd->length_dw = cpu_to_le16((u16)length);
+	cmd->length_dw = cpu_to_le16((u16)length_dw);
 
 	qed_dmae_post_command(p_hwfn, p_ptt);
 
@@ -654,16 +625,14 @@
 	if (qed_status) {
 		DP_NOTICE(p_hwfn,
 			  "qed_dmae_host2grc: Wait Failed. source_addr 0x%llx, grc_addr 0x%llx, size_in_dwords 0x%x\n",
-			  src_addr,
-			  dst_addr,
-			  length);
+			  src_addr, dst_addr, length_dw);
 		return qed_status;
 	}
 
 	if (dst_type == QED_DMAE_ADDRESS_HOST_VIRT)
 		memcpy((void *)(uintptr_t)(dst_addr),
 		       &p_hwfn->dmae_info.p_intermediate_buffer[0],
-		       length * sizeof(u32));
+		       length_dw * sizeof(u32));
 
 	return 0;
 }
@@ -730,10 +699,7 @@
 		if (qed_status) {
 			DP_NOTICE(p_hwfn,
 				  "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
-				  qed_status,
-				  src_addr,
-				  dst_addr,
-				  length_cur);
+				  qed_status, src_addr, dst_addr, length_cur);
 			break;
 		}
 	}
@@ -743,10 +709,7 @@
 
 int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
-		      u64 source_addr,
-		      u32 grc_addr,
-		      u32 size_in_dwords,
-		      u32 flags)
+		  u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
 	struct qed_dmae_params params;
@@ -768,9 +731,10 @@
 	return rc;
 }
 
-int
-qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr,
-		  dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+int qed_dmae_grc2host(struct qed_hwfn *p_hwfn,
+		      struct qed_ptt *p_ptt,
+		      u32 grc_addr,
+		      dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
 {
 	u32 grc_addr_in_dw = grc_addr / sizeof(u32);
 	struct qed_dmae_params params;
@@ -791,12 +755,11 @@
 	return rc;
 }
 
-int
-qed_dmae_host2host(struct qed_hwfn *p_hwfn,
-		   struct qed_ptt *p_ptt,
-		   dma_addr_t source_addr,
-		   dma_addr_t dest_addr,
-		   u32 size_in_dwords, struct qed_dmae_params *p_params)
+int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       dma_addr_t source_addr,
+		       dma_addr_t dest_addr,
+		       u32 size_in_dwords, struct qed_dmae_params *p_params)
 {
 	int rc;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index 9866a20..d567ba9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -59,17 +59,14 @@
 		p_hwfn->rt_data.b_valid[i] = false;
 }
 
-void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
-			   u32 rt_offset,
-			   u32 val)
+void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, u32 rt_offset, u32 val)
 {
 	p_hwfn->rt_data.init_val[rt_offset] = val;
 	p_hwfn->rt_data.b_valid[rt_offset] = true;
 }
 
 void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
-			   u32 rt_offset, u32 *p_val,
-			   size_t size)
+			   u32 rt_offset, u32 *p_val, size_t size)
 {
 	size_t i;
 
@@ -81,10 +78,7 @@
 
 static int qed_init_rt(struct qed_hwfn	*p_hwfn,
 		       struct qed_ptt *p_ptt,
-		       u32 addr,
-		       u16 rt_offset,
-		       u16 size,
-		       bool b_must_dmae)
+		       u32 addr, u16 rt_offset, u16 size, bool b_must_dmae)
 {
 	u32 *p_init_val = &p_hwfn->rt_data.init_val[rt_offset];
 	bool *p_valid = &p_hwfn->rt_data.b_valid[rt_offset];
@@ -102,8 +96,7 @@
 		 * simply write the data instead of using dmae.
 		 */
 		if (!b_must_dmae) {
-			qed_wr(p_hwfn, p_ptt, addr + (i << 2),
-			       p_init_val[i]);
+			qed_wr(p_hwfn, p_ptt, addr + (i << 2), p_init_val[i]);
 			continue;
 		}
 
@@ -115,7 +108,7 @@
 		rc = qed_dmae_host2grc(p_hwfn, p_ptt,
 				       (uintptr_t)(p_init_val + i),
 				       addr + (i << 2), segment, 0);
-		if (rc != 0)
+		if (rc)
 			return rc;
 
 		/* Jump over the entire segment, including invalid entry */
@@ -182,9 +175,7 @@
 
 static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt,
-			      u32 addr,
-			      u32 fill,
-			      u32 fill_count)
+			      u32 addr, u32 fill, u32 fill_count)
 {
 	static u32 zero_buffer[DMAE_MAX_RW_SIZE];
 
@@ -199,15 +190,12 @@
 
 	return qed_dmae_host2grc(p_hwfn, p_ptt,
 				 (uintptr_t)(&zero_buffer[0]),
-				 addr, fill_count,
-				 QED_DMAE_FLAG_RW_REPL_SRC);
+				 addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC);
 }
 
 static void qed_init_fill(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt,
-			  u32 addr,
-			  u32 fill,
-			  u32 fill_count)
+			  u32 addr, u32 fill, u32 fill_count)
 {
 	u32 i;
 
@@ -218,12 +206,12 @@
 static int qed_init_cmd_array(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt,
 			      struct init_write_op *cmd,
-			      bool b_must_dmae,
-			      bool b_can_dmae)
+			      bool b_must_dmae, bool b_can_dmae)
 {
+	u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset);
 	u32 data = le32_to_cpu(cmd->data);
 	u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
-	u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset);
+
 	u32 offset, output_len, input_len, max_size;
 	struct qed_dev *cdev = p_hwfn->cdev;
 	union init_array_hdr *hdr;
@@ -233,8 +221,7 @@
 
 	array_data = cdev->fw_data->arr_data;
 
-	hdr = (union init_array_hdr *)(array_data +
-				       dmae_array_offset);
+	hdr = (union init_array_hdr *)(array_data + dmae_array_offset);
 	data = le32_to_cpu(hdr->raw.data);
 	switch (GET_FIELD(data, INIT_ARRAY_RAW_HDR_TYPE)) {
 	case INIT_ARR_ZIPPED:
@@ -290,13 +277,12 @@
 /* init_ops write command */
 static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt,
-			   struct init_write_op *cmd,
-			   bool b_can_dmae)
+			   struct init_write_op *p_cmd, bool b_can_dmae)
 {
-	u32 data = le32_to_cpu(cmd->data);
-	u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
+	u32 data = le32_to_cpu(p_cmd->data);
 	bool b_must_dmae = GET_FIELD(data, INIT_WRITE_OP_WIDE_BUS);
-	union init_write_args *arg = &cmd->args;
+	u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2;
+	union init_write_args *arg = &p_cmd->args;
 	int rc = 0;
 
 	/* Sanitize */
@@ -309,20 +295,18 @@
 
 	switch (GET_FIELD(data, INIT_WRITE_OP_SOURCE)) {
 	case INIT_SRC_INLINE:
-		qed_wr(p_hwfn, p_ptt, addr,
-		       le32_to_cpu(arg->inline_val));
+		data = le32_to_cpu(p_cmd->args.inline_val);
+		qed_wr(p_hwfn, p_ptt, addr, data);
 		break;
 	case INIT_SRC_ZEROS:
-		if (b_must_dmae ||
-		    (b_can_dmae && (le32_to_cpu(arg->zeros_count) >= 64)))
-			rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0,
-						le32_to_cpu(arg->zeros_count));
+		data = le32_to_cpu(p_cmd->args.zeros_count);
+		if (b_must_dmae || (b_can_dmae && (data >= 64)))
+			rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0, data);
 		else
-			qed_init_fill(p_hwfn, p_ptt, addr, 0,
-				      le32_to_cpu(arg->zeros_count));
+			qed_init_fill(p_hwfn, p_ptt, addr, 0, data);
 		break;
 	case INIT_SRC_ARRAY:
-		rc = qed_init_cmd_array(p_hwfn, p_ptt, cmd,
+		rc = qed_init_cmd_array(p_hwfn, p_ptt, p_cmd,
 					b_must_dmae, b_can_dmae);
 		break;
 	case INIT_SRC_RUNTIME:
@@ -353,8 +337,7 @@
 
 /* init_ops read/poll commands */
 static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    struct init_read_op *cmd)
+			    struct qed_ptt *p_ptt, struct init_read_op *cmd)
 {
 	bool (*comp_check)(u32 val, u32 expected_val);
 	u32 delay = QED_INIT_POLL_PERIOD_US, val;
@@ -412,35 +395,33 @@
 }
 
 static u8 qed_init_cmd_mode_match(struct qed_hwfn *p_hwfn,
-				  u16 *offset,
-				  int modes)
+				  u16 *p_offset, int modes)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	const u8 *modes_tree_buf;
 	u8 arg1, arg2, tree_val;
 
 	modes_tree_buf = cdev->fw_data->modes_tree_buf;
-	tree_val = modes_tree_buf[(*offset)++];
+	tree_val = modes_tree_buf[(*p_offset)++];
 	switch (tree_val) {
 	case INIT_MODE_OP_NOT:
-		return qed_init_cmd_mode_match(p_hwfn, offset, modes) ^ 1;
+		return qed_init_cmd_mode_match(p_hwfn, p_offset, modes) ^ 1;
 	case INIT_MODE_OP_OR:
-		arg1	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
-		arg2	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
+		arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
+		arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
 		return arg1 | arg2;
 	case INIT_MODE_OP_AND:
-		arg1	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
-		arg2	= qed_init_cmd_mode_match(p_hwfn, offset, modes);
+		arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
+		arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes);
 		return arg1 & arg2;
 	default:
 		tree_val -= MAX_INIT_MODE_OPS;
-		return (modes & (1 << tree_val)) ? 1 : 0;
+		return (modes & BIT(tree_val)) ? 1 : 0;
 	}
 }
 
 static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn,
-			     struct init_if_mode_op *p_cmd,
-			     int modes)
+			     struct init_if_mode_op *p_cmd, int modes)
 {
 	u16 offset = le16_to_cpu(p_cmd->modes_buf_offset);
 
@@ -453,8 +434,7 @@
 
 static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn,
 			      struct init_if_phase_op *p_cmd,
-			      u32 phase,
-			      u32 phase_id)
+			      u32 phase, u32 phase_id)
 {
 	u32 data = le32_to_cpu(p_cmd->phase_data);
 	u32 op_data = le32_to_cpu(p_cmd->op_data);
@@ -468,10 +448,7 @@
 }
 
 int qed_init_run(struct qed_hwfn *p_hwfn,
-		 struct qed_ptt *p_ptt,
-		 int phase,
-		 int phase_id,
-		 int modes)
+		 struct qed_ptt *p_ptt, int phase, int phase_id, int modes)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	u32 cmd_num, num_init_ops;
@@ -483,10 +460,8 @@
 	init_ops = cdev->fw_data->init_ops;
 
 	p_hwfn->unzip_buf = kzalloc(MAX_ZIPPED_SIZE * 4, GFP_ATOMIC);
-	if (!p_hwfn->unzip_buf) {
-		DP_NOTICE(p_hwfn, "Failed to allocate unzip buffer\n");
+	if (!p_hwfn->unzip_buf)
 		return -ENOMEM;
-	}
 
 	for (cmd_num = 0; cmd_num < num_init_ops; cmd_num++) {
 		union init_op *cmd = &init_ops[cmd_num];
@@ -557,7 +532,7 @@
 	/* First Dword contains metadata and should be skipped */
 	buf_hdr = (struct bin_buffer_hdr *)(data + sizeof(u32));
 
-	offset = buf_hdr[BIN_BUF_FW_VER_INFO].offset;
+	offset = buf_hdr[BIN_BUF_INIT_FW_VER_INFO].offset;
 	fw->fw_ver_info = (struct fw_ver_info *)(data + offset);
 
 	offset = buf_hdr[BIN_BUF_INIT_CMD].offset;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 8fa50fa..2adedc6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1775,10 +1775,9 @@
 };
 
 static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn,
-				      struct qed_sb_attn_info   *p_sb_desc)
+				      struct qed_sb_attn_info *p_sb_desc)
 {
-	u16     rc = 0;
-	u16     index;
+	u16 rc = 0, index;
 
 	/* Make certain HW write took affect */
 	mmiowb();
@@ -1802,15 +1801,13 @@
  *  @param asserted_bits newly asserted bits
  *  @return int
  */
-static int qed_int_assertion(struct qed_hwfn *p_hwfn,
-			     u16 asserted_bits)
+static int qed_int_assertion(struct qed_hwfn *p_hwfn, u16 asserted_bits)
 {
 	struct qed_sb_attn_info *sb_attn_sw = p_hwfn->p_sb_attn;
 	u32 igu_mask;
 
 	/* Mask the source of the attention in the IGU */
-	igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-			  IGU_REG_ATTENTION_ENABLE);
+	igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE);
 	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "IGU mask: 0x%08x --> 0x%08x\n",
 		   igu_mask, igu_mask & ~(asserted_bits & ATTN_BITS_MASKABLE));
 	igu_mask &= ~(asserted_bits & ATTN_BITS_MASKABLE);
@@ -2041,7 +2038,7 @@
 			struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j];
 
 			if ((p_bit->flags & ATTENTION_PARITY) &&
-			    !!(parities & (1 << bit_idx)))
+			    !!(parities & BIT(bit_idx)))
 				qed_int_deassertion_parity(p_hwfn, p_bit,
 							   bit_idx);
 
@@ -2114,8 +2111,7 @@
 				    ~((u32)deasserted_bits));
 
 	/* Unmask deasserted attentions in IGU */
-	aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt,
-			  IGU_REG_ATTENTION_ENABLE);
+	aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE);
 	aeu_mask |= (deasserted_bits & ATTN_BITS_MASKABLE);
 	qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE, aeu_mask);
 
@@ -2160,8 +2156,7 @@
 			index, attn_bits, attn_acks, asserted_bits,
 			deasserted_bits, p_sb_attn_sw->known_attn);
 	} else if (asserted_bits == 0x100) {
-		DP_INFO(p_hwfn,
-			"MFW indication via attention\n");
+		DP_INFO(p_hwfn, "MFW indication via attention\n");
 	} else {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
 			   "MFW indication [deassertion]\n");
@@ -2173,18 +2168,14 @@
 			return rc;
 	}
 
-	if (deasserted_bits) {
+	if (deasserted_bits)
 		rc = qed_int_deassertion(p_hwfn, deasserted_bits);
-		if (rc)
-			return rc;
-	}
 
 	return rc;
 }
 
 static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn,
-			    void __iomem *igu_addr,
-			    u32 ack_cons)
+			    void __iomem *igu_addr, u32 ack_cons)
 {
 	struct igu_prod_cons_update igu_ack = { 0 };
 
@@ -2242,9 +2233,8 @@
 
 	/* Gather Interrupts/Attentions information */
 	if (!sb_info->sb_virt) {
-		DP_ERR(
-			p_hwfn->cdev,
-			"Interrupt Status block is NULL - cannot check for new interrupts!\n");
+		DP_ERR(p_hwfn->cdev,
+		       "Interrupt Status block is NULL - cannot check for new interrupts!\n");
 	} else {
 		u32 tmp_index = sb_info->sb_ack;
 
@@ -2255,9 +2245,8 @@
 	}
 
 	if (!sb_attn || !sb_attn->sb_attn) {
-		DP_ERR(
-			p_hwfn->cdev,
-			"Attentions Status block is NULL - cannot check for new attentions!\n");
+		DP_ERR(p_hwfn->cdev,
+		       "Attentions Status block is NULL - cannot check for new attentions!\n");
 	} else {
 		u16 tmp_index = sb_attn->index;
 
@@ -2313,8 +2302,7 @@
 	if (p_sb->sb_attn)
 		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
 				  SB_ATTN_ALIGNED_SIZE(p_hwfn),
-				  p_sb->sb_attn,
-				  p_sb->sb_phys);
+				  p_sb->sb_attn, p_sb->sb_phys);
 	kfree(p_sb);
 }
 
@@ -2337,8 +2325,7 @@
 
 static void qed_int_sb_attn_init(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt,
-				 void *sb_virt_addr,
-				 dma_addr_t sb_phy_addr)
+				 void *sb_virt_addr, dma_addr_t sb_phy_addr)
 {
 	struct qed_sb_attn_info *sb_info = p_hwfn->p_sb_attn;
 	int i, j, k;
@@ -2378,15 +2365,13 @@
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	struct qed_sb_attn_info *p_sb;
-	void *p_virt;
 	dma_addr_t p_phys = 0;
+	void *p_virt;
 
 	/* SB struct */
 	p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL);
-	if (!p_sb) {
-		DP_NOTICE(cdev, "Failed to allocate `struct qed_sb_attn_info'\n");
+	if (!p_sb)
 		return -ENOMEM;
-	}
 
 	/* SB ring  */
 	p_virt = dma_alloc_coherent(&cdev->pdev->dev,
@@ -2394,7 +2379,6 @@
 				    &p_phys, GFP_KERNEL);
 
 	if (!p_virt) {
-		DP_NOTICE(cdev, "Failed to allocate status block (attentions)\n");
 		kfree(p_sb);
 		return -ENOMEM;
 	}
@@ -2412,9 +2396,7 @@
 
 void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 			   struct cau_sb_entry *p_sb_entry,
-			   u8 pf_id,
-			   u16 vf_number,
-			   u8 vf_valid)
+			   u8 pf_id, u16 vf_number, u8 vf_valid)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	u32 cau_state;
@@ -2428,12 +2410,6 @@
 	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET0, 0x7F);
 	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET1, 0x7F);
 
-	/* setting the time resultion to a fixed value ( = 1) */
-	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES0,
-		  QED_CAU_DEF_RX_TIMER_RES);
-	SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES1,
-		  QED_CAU_DEF_TX_TIMER_RES);
-
 	cau_state = CAU_HC_DISABLE_STATE;
 
 	if (cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) {
@@ -2468,9 +2444,7 @@
 void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
 			 dma_addr_t sb_phys,
-			 u16 igu_sb_id,
-			 u16 vf_number,
-			 u8 vf_valid)
+			 u16 igu_sb_id, u16 vf_number, u8 vf_valid)
 {
 	struct cau_sb_entry sb_entry;
 
@@ -2514,8 +2488,7 @@
 			timer_res = 2;
 		timeset = (u8)(p_hwfn->cdev->rx_coalesce_usecs >> timer_res);
 		qed_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI,
-				    QED_COAL_RX_STATE_MACHINE,
-				    timeset);
+				    QED_COAL_RX_STATE_MACHINE, timeset);
 
 		if (p_hwfn->cdev->tx_coalesce_usecs <= 0x7F)
 			timer_res = 0;
@@ -2541,8 +2514,7 @@
 			 u8 timeset)
 {
 	struct cau_pi_entry pi_entry;
-	u32 sb_offset;
-	u32 pi_offset;
+	u32 sb_offset, pi_offset;
 
 	if (IS_VF(p_hwfn->cdev))
 		return;
@@ -2569,8 +2541,7 @@
 }
 
 void qed_int_sb_setup(struct qed_hwfn *p_hwfn,
-		      struct qed_ptt *p_ptt,
-		      struct qed_sb_info *sb_info)
+		      struct qed_ptt *p_ptt, struct qed_sb_info *sb_info)
 {
 	/* zero status block and ack counter */
 	sb_info->sb_ack = 0;
@@ -2590,8 +2561,7 @@
  *
  * @return u16
  */
-static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn,
-			     u16 sb_id)
+static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
 {
 	u16 igu_sb_id;
 
@@ -2603,8 +2573,12 @@
 	else
 		igu_sb_id = qed_vf_get_igu_sb_id(p_hwfn, sb_id);
 
-	DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "SB [%s] index is 0x%04x\n",
-		   (sb_id == QED_SP_SB_ID) ? "DSB" : "non-DSB", igu_sb_id);
+	if (sb_id == QED_SP_SB_ID)
+		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+			   "Slowpath SB index in IGU is 0x%04x\n", igu_sb_id);
+	else
+		DP_VERBOSE(p_hwfn, NETIF_MSG_INTR,
+			   "SB [%04x] <--> IGU SB [%04x]\n", sb_id, igu_sb_id);
 
 	return igu_sb_id;
 }
@@ -2612,9 +2586,7 @@
 int qed_int_sb_init(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt,
 		    struct qed_sb_info *sb_info,
-		    void *sb_virt_addr,
-		    dma_addr_t sb_phy_addr,
-		    u16 sb_id)
+		    void *sb_virt_addr, dma_addr_t sb_phy_addr, u16 sb_id)
 {
 	sb_info->sb_virt = sb_virt_addr;
 	sb_info->sb_phys = sb_phy_addr;
@@ -2650,8 +2622,7 @@
 }
 
 int qed_int_sb_release(struct qed_hwfn *p_hwfn,
-		       struct qed_sb_info *sb_info,
-		       u16 sb_id)
+		       struct qed_sb_info *sb_info, u16 sb_id)
 {
 	if (sb_id == QED_SP_SB_ID) {
 		DP_ERR(p_hwfn, "Do Not free sp sb using this function");
@@ -2685,8 +2656,7 @@
 	kfree(p_sb);
 }
 
-static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn,
-			       struct qed_ptt *p_ptt)
+static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_sb_sp_info *p_sb;
 	dma_addr_t p_phys = 0;
@@ -2694,17 +2664,14 @@
 
 	/* SB struct */
 	p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL);
-	if (!p_sb) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sb_info'\n");
+	if (!p_sb)
 		return -ENOMEM;
-	}
 
 	/* SB ring  */
 	p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
 				    SB_ALIGNED_SIZE(p_hwfn),
 				    &p_phys, GFP_KERNEL);
 	if (!p_virt) {
-		DP_NOTICE(p_hwfn, "Failed to allocate status block\n");
 		kfree(p_sb);
 		return -ENOMEM;
 	}
@@ -2721,9 +2688,7 @@
 
 int qed_int_register_cb(struct qed_hwfn *p_hwfn,
 			qed_int_comp_cb_t comp_cb,
-			void *cookie,
-			u8 *sb_idx,
-			__le16 **p_fw_cons)
+			void *cookie, u8 *sb_idx, __le16 **p_fw_cons)
 {
 	struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb;
 	int rc = -ENOMEM;
@@ -2764,8 +2729,7 @@
 }
 
 void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
-			    struct qed_ptt *p_ptt,
-			    enum qed_int_mode int_mode)
+			    struct qed_ptt *p_ptt, enum qed_int_mode int_mode)
 {
 	u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN | IGU_PF_CONF_ATTN_BIT_EN;
 
@@ -2809,7 +2773,7 @@
 	qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff);
 	if ((int_mode != QED_INT_MODE_INTA) || IS_LEAD_HWFN(p_hwfn)) {
 		rc = qed_slowpath_irq_req(p_hwfn);
-		if (rc != 0) {
+		if (rc) {
 			DP_NOTICE(p_hwfn, "Slowpath IRQ request failed\n");
 			return -EINVAL;
 		}
@@ -2822,8 +2786,7 @@
 	return rc;
 }
 
-void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
-			     struct qed_ptt *p_ptt)
+void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	p_hwfn->b_int_enabled = 0;
 
@@ -2950,13 +2913,11 @@
 					p_hwfn->hw_info.opaque_fid, b_set);
 }
 
-static u32 qed_int_igu_read_cam_block(struct qed_hwfn	*p_hwfn,
-				      struct qed_ptt	*p_ptt,
-				      u16		sb_id)
+static u32 qed_int_igu_read_cam_block(struct qed_hwfn *p_hwfn,
+				      struct qed_ptt *p_ptt, u16 sb_id)
 {
 	u32 val = qed_rd(p_hwfn, p_ptt,
-			 IGU_REG_MAPPING_MEMORY +
-			 sizeof(u32) * sb_id);
+			 IGU_REG_MAPPING_MEMORY + sizeof(u32) * sb_id);
 	struct qed_igu_block *p_block;
 
 	p_block = &p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks[sb_id];
@@ -2983,8 +2944,7 @@
 	return val;
 }
 
-int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
-			 struct qed_ptt *p_ptt)
+int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_igu_info *p_igu_info;
 	u32 val, min_vf = 0, max_vf = 0;
@@ -2993,7 +2953,6 @@
 	u16 prev_sb_id = 0xFF;
 
 	p_hwfn->hw_info.p_igu_info = kzalloc(sizeof(*p_igu_info), GFP_KERNEL);
-
 	if (!p_hwfn->hw_info.p_igu_info)
 		return -ENOMEM;
 
@@ -3104,22 +3063,19 @@
  */
 void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn)
 {
-	u32 igu_pf_conf = 0;
-
-	igu_pf_conf |= IGU_PF_CONF_FUNC_EN;
+	u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN;
 
 	STORE_RT_REG(p_hwfn, IGU_REG_PF_CONFIGURATION_RT_OFFSET, igu_pf_conf);
 }
 
 u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn)
 {
-	u64 intr_status = 0;
-	u32 intr_status_lo = 0;
-	u32 intr_status_hi = 0;
 	u32 lsb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_LSB_UPPER -
 			       IGU_CMD_INT_ACK_BASE;
 	u32 msb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_MSB_UPPER -
 			       IGU_CMD_INT_ACK_BASE;
+	u32 intr_status_hi = 0, intr_status_lo = 0;
+	u64 intr_status = 0;
 
 	intr_status_lo = REG_RD(p_hwfn,
 				GTT_BAR0_MAP_REG_IGU_CMD +
@@ -3153,26 +3109,20 @@
 	kfree(p_hwfn->sp_dpc);
 }
 
-int qed_int_alloc(struct qed_hwfn *p_hwfn,
-		  struct qed_ptt *p_ptt)
+int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	int rc = 0;
 
 	rc = qed_int_sp_dpc_alloc(p_hwfn);
-	if (rc) {
-		DP_ERR(p_hwfn->cdev, "Failed to allocate sp dpc mem\n");
+	if (rc)
 		return rc;
-	}
+
 	rc = qed_int_sp_sb_alloc(p_hwfn, p_ptt);
-	if (rc) {
-		DP_ERR(p_hwfn->cdev, "Failed to allocate sp sb mem\n");
+	if (rc)
 		return rc;
-	}
+
 	rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt);
-	if (rc) {
-		DP_ERR(p_hwfn->cdev, "Failed to allocate sb attn mem\n");
-		return rc;
-	}
+
 	return rc;
 }
 
@@ -3183,8 +3133,7 @@
 	qed_int_sp_dpc_free(p_hwfn);
 }
 
-void qed_int_setup(struct qed_hwfn *p_hwfn,
-		   struct qed_ptt *p_ptt)
+void qed_int_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	qed_int_sb_setup(p_hwfn, p_ptt, &p_hwfn->p_sp_sb->sb_info);
 	qed_int_sb_attn_setup(p_hwfn, p_ptt);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 401e738..ddd410a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -52,7 +52,7 @@
 	u16 rx_mode = 0;
 
 	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	memset(&init_data, 0, sizeof(init_data));
@@ -80,8 +80,7 @@
 	p_ramrod->rx_mode.state = cpu_to_le16(rx_mode);
 
 	/* TPA related fields */
-	memset(&p_ramrod->tpa_param, 0,
-	       sizeof(struct eth_vport_tpa_param));
+	memset(&p_ramrod->tpa_param, 0, sizeof(struct eth_vport_tpa_param));
 
 	p_ramrod->tpa_param.max_buff_num = p_params->max_buffers_per_cqe;
 
@@ -102,6 +101,9 @@
 
 	p_ramrod->tx_switching_en = p_params->tx_switching;
 
+	p_ramrod->ctl_frame_mac_check_en = !!p_params->check_mac;
+	p_ramrod->ctl_frame_ethtype_check_en = !!p_params->check_ethtype;
+
 	/* Software Function ID in hwfn (PFs are 0 - 15, VFs are 16 - 135) */
 	p_ramrod->sw_fid = qed_concrete_to_sw_fid(p_hwfn->cdev,
 						  p_params->concrete_fid);
@@ -109,8 +111,8 @@
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
-int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
-		       struct qed_sp_vport_start_params *p_params)
+static int qed_sp_vport_start(struct qed_hwfn *p_hwfn,
+			      struct qed_sp_vport_start_params *p_params)
 {
 	if (IS_VF(p_hwfn->cdev)) {
 		return qed_vf_pf_vport_start(p_hwfn, p_params->vport_id,
@@ -306,14 +308,14 @@
 	memset(&p_ramrod->approx_mcast.bins, 0,
 	       sizeof(p_ramrod->approx_mcast.bins));
 
-	if (p_params->update_approx_mcast_flg) {
-		p_ramrod->common.update_approx_mcast_flg = 1;
-		for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
-			u32 *p_bins = (u32 *)p_params->bins;
-			__le32 val = cpu_to_le32(p_bins[i]);
+	if (!p_params->update_approx_mcast_flg)
+		return;
 
-			p_ramrod->approx_mcast.bins[i] = val;
-		}
+	p_ramrod->common.update_approx_mcast_flg = 1;
+	for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
+		u32 *p_bins = (u32 *)p_params->bins;
+
+		p_ramrod->approx_mcast.bins[i] = cpu_to_le32(p_bins[i]);
 	}
 }
 
@@ -336,7 +338,7 @@
 	}
 
 	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	memset(&init_data, 0, sizeof(init_data));
@@ -361,8 +363,8 @@
 	p_cmn->tx_active_flg = p_params->vport_active_tx_flg;
 	p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg;
 	p_cmn->accept_any_vlan = p_params->accept_any_vlan;
-	p_cmn->update_accept_any_vlan_flg =
-			p_params->update_accept_any_vlan_flg;
+	val = p_params->update_accept_any_vlan_flg;
+	p_cmn->update_accept_any_vlan_flg = val;
 
 	p_cmn->inner_vlan_removal_en = p_params->inner_vlan_removal_flg;
 	val = p_params->update_inner_vlan_removal_flg;
@@ -411,7 +413,7 @@
 		return qed_vf_pf_vport_stop(p_hwfn);
 
 	rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id);
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	memset(&init_data, 0, sizeof(init_data));
@@ -476,7 +478,7 @@
 
 		rc = qed_sp_vport_update(p_hwfn, &vport_update_params,
 					 comp_mode, p_comp_data);
-		if (rc != 0) {
+		if (rc) {
 			DP_ERR(cdev, "Update rx_mode failed %d\n", rc);
 			return rc;
 		}
@@ -511,11 +513,12 @@
 int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 				u16 opaque_fid,
 				u32 cid,
-				struct qed_queue_start_common_params *params,
+				struct qed_queue_start_common_params *p_params,
 				u8 stats_id,
 				u16 bd_max_bytes,
 				dma_addr_t bd_chain_phys_addr,
-				dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size)
+				dma_addr_t cqe_pbl_addr,
+				u16 cqe_pbl_size, bool b_use_zone_a_prod)
 {
 	struct rx_queue_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
@@ -526,23 +529,23 @@
 	int rc = -EINVAL;
 
 	/* Store information for the stop */
-	p_rx_cid		= &p_hwfn->p_rx_cids[params->queue_id];
-	p_rx_cid->cid		= cid;
-	p_rx_cid->opaque_fid	= opaque_fid;
-	p_rx_cid->vport_id	= params->vport_id;
+	p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
+	p_rx_cid->cid = cid;
+	p_rx_cid->opaque_fid = opaque_fid;
+	p_rx_cid->vport_id = p_params->vport_id;
 
-	rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_vport_id);
-	if (rc != 0)
+	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
+	if (rc)
 		return rc;
 
-	rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_rx_q_id);
-	if (rc != 0)
+	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_rx_q_id);
+	if (rc)
 		return rc;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SP,
 		   "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-		   opaque_fid, cid, params->queue_id, params->vport_id,
-		   params->sb);
+		   opaque_fid,
+		   cid, p_params->queue_id, p_params->vport_id, p_params->sb);
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
@@ -558,24 +561,28 @@
 
 	p_ramrod = &p_ent->ramrod.rx_queue_start;
 
-	p_ramrod->sb_id			= cpu_to_le16(params->sb);
-	p_ramrod->sb_index		= params->sb_idx;
-	p_ramrod->vport_id		= abs_vport_id;
-	p_ramrod->stats_counter_id	= stats_id;
-	p_ramrod->rx_queue_id		= cpu_to_le16(abs_rx_q_id);
-	p_ramrod->complete_cqe_flg	= 0;
-	p_ramrod->complete_event_flg	= 1;
+	p_ramrod->sb_id = cpu_to_le16(p_params->sb);
+	p_ramrod->sb_index = p_params->sb_idx;
+	p_ramrod->vport_id = abs_vport_id;
+	p_ramrod->stats_counter_id = stats_id;
+	p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+	p_ramrod->complete_cqe_flg = 0;
+	p_ramrod->complete_event_flg = 1;
 
-	p_ramrod->bd_max_bytes	= cpu_to_le16(bd_max_bytes);
+	p_ramrod->bd_max_bytes = cpu_to_le16(bd_max_bytes);
 	DMA_REGPAIR_LE(p_ramrod->bd_base, bd_chain_phys_addr);
 
-	p_ramrod->num_of_pbl_pages	= cpu_to_le16(cqe_pbl_size);
+	p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
-	p_ramrod->vf_rx_prod_index = params->vf_qid;
-	if (params->vf_qid)
+	if (p_params->vf_qid || b_use_zone_a_prod) {
+		p_ramrod->vf_rx_prod_index = p_params->vf_qid;
 		DP_VERBOSE(p_hwfn, QED_MSG_SP,
-			   "Queue is meant for VF rxq[%04x]\n", params->vf_qid);
+			   "Queue%s is meant for VF rxq[%02x]\n",
+			   b_use_zone_a_prod ? " [legacy]" : "",
+			   p_params->vf_qid);
+		p_ramrod->vf_rx_prod_use_zone_a = b_use_zone_a_prod;
+	}
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -583,7 +590,7 @@
 static int
 qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
 			  u16 opaque_fid,
-			  struct qed_queue_start_common_params *params,
+			  struct qed_queue_start_common_params *p_params,
 			  u16 bd_max_bytes,
 			  dma_addr_t bd_chain_phys_addr,
 			  dma_addr_t cqe_pbl_addr,
@@ -597,20 +604,20 @@
 
 	if (IS_VF(p_hwfn->cdev)) {
 		return qed_vf_pf_rxq_start(p_hwfn,
-					   params->queue_id,
-					   params->sb,
-					   params->sb_idx,
+					   p_params->queue_id,
+					   p_params->sb,
+					   (u8)p_params->sb_idx,
 					   bd_max_bytes,
 					   bd_chain_phys_addr,
 					   cqe_pbl_addr, cqe_pbl_size, pp_prod);
 	}
 
-	rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_l2_queue);
-	if (rc != 0)
+	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_l2_queue);
+	if (rc)
 		return rc;
 
-	rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_stats_id);
-	if (rc != 0)
+	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
+	if (rc)
 		return rc;
 
 	*pp_prod = (u8 __iomem *)p_hwfn->regview +
@@ -622,9 +629,8 @@
 			  (u32 *)(&init_prod_val));
 
 	/* Allocate a CID for the queue */
-	p_rx_cid = &p_hwfn->p_rx_cids[params->queue_id];
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
-				 &p_rx_cid->cid);
+	p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_rx_cid->cid);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
 		return rc;
@@ -634,14 +640,13 @@
 	rc = qed_sp_eth_rxq_start_ramrod(p_hwfn,
 					 opaque_fid,
 					 p_rx_cid->cid,
-					 params,
+					 p_params,
 					 abs_stats_id,
 					 bd_max_bytes,
 					 bd_chain_phys_addr,
-					 cqe_pbl_addr,
-					 cqe_pbl_size);
+					 cqe_pbl_addr, cqe_pbl_size, false);
 
-	if (rc != 0)
+	if (rc)
 		qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
 
 	return rc;
@@ -788,21 +793,20 @@
 	if (rc)
 		return rc;
 
-	p_ramrod		= &p_ent->ramrod.tx_queue_start;
-	p_ramrod->vport_id	= abs_vport_id;
+	p_ramrod = &p_ent->ramrod.tx_queue_start;
+	p_ramrod->vport_id = abs_vport_id;
 
-	p_ramrod->sb_id			= cpu_to_le16(p_params->sb);
-	p_ramrod->sb_index		= p_params->sb_idx;
-	p_ramrod->stats_counter_id	= stats_id;
+	p_ramrod->sb_id = cpu_to_le16(p_params->sb);
+	p_ramrod->sb_index = p_params->sb_idx;
+	p_ramrod->stats_counter_id = stats_id;
 
-	p_ramrod->queue_zone_id		= cpu_to_le16(abs_tx_q_id);
-	p_ramrod->pbl_size		= cpu_to_le16(pbl_size);
+	p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id);
+
+	p_ramrod->pbl_size = cpu_to_le16(pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr);
 
-	pq_id			= qed_get_qm_pq(p_hwfn,
-						PROTOCOLID_ETH,
-						p_pq_params);
-	p_ramrod->qm_pq_id	= cpu_to_le16(pq_id);
+	pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, p_pq_params);
+	p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
@@ -836,8 +840,7 @@
 	memset(&pq_params, 0, sizeof(pq_params));
 
 	/* Allocate a CID for the queue */
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH,
-				 &p_tx_cid->cid);
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_tx_cid->cid);
 	if (rc) {
 		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
 		return rc;
@@ -896,8 +899,7 @@
 	return qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
 }
 
-static enum eth_filter_action
-qed_filter_action(enum qed_filter_opcode opcode)
+static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
 {
 	enum eth_filter_action action = MAX_ETH_FILTER_ACTION;
 
@@ -1033,19 +1035,19 @@
 		p_first_filter->vni = cpu_to_le32(p_filter_cmd->vni);
 
 	if (p_filter_cmd->opcode == QED_FILTER_MOVE) {
-		p_second_filter->type		= p_first_filter->type;
-		p_second_filter->mac_msb	= p_first_filter->mac_msb;
-		p_second_filter->mac_mid	= p_first_filter->mac_mid;
-		p_second_filter->mac_lsb	= p_first_filter->mac_lsb;
-		p_second_filter->vlan_id	= p_first_filter->vlan_id;
-		p_second_filter->vni		= p_first_filter->vni;
+		p_second_filter->type = p_first_filter->type;
+		p_second_filter->mac_msb = p_first_filter->mac_msb;
+		p_second_filter->mac_mid = p_first_filter->mac_mid;
+		p_second_filter->mac_lsb = p_first_filter->mac_lsb;
+		p_second_filter->vlan_id = p_first_filter->vlan_id;
+		p_second_filter->vni = p_first_filter->vni;
 
 		p_first_filter->action = ETH_FILTER_ACTION_REMOVE;
 
 		p_first_filter->vport_id = vport_to_remove_from;
 
-		p_second_filter->action		= ETH_FILTER_ACTION_ADD;
-		p_second_filter->vport_id	= vport_to_add_to;
+		p_second_filter->action = ETH_FILTER_ACTION_ADD;
+		p_second_filter->vport_id = vport_to_add_to;
 	} else if (p_filter_cmd->opcode == QED_FILTER_REPLACE) {
 		p_first_filter->vport_id = vport_to_add_to;
 		memcpy(p_second_filter, p_first_filter,
@@ -1086,7 +1088,7 @@
 	rc = qed_filter_ucast_common(p_hwfn, opaque_fid, p_filter_cmd,
 				     &p_ramrod, &p_ent,
 				     comp_mode, p_comp_data);
-	if (rc != 0) {
+	if (rc) {
 		DP_ERR(p_hwfn, "Uni. filter command failed %d\n", rc);
 		return rc;
 	}
@@ -1094,10 +1096,8 @@
 	p_header->assert_on_error = p_filter_cmd->assert_on_error;
 
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
-	if (rc != 0) {
-		DP_ERR(p_hwfn,
-		       "Unicast filter ADD command failed %d\n",
-		       rc);
+	if (rc) {
+		DP_ERR(p_hwfn, "Unicast filter ADD command failed %d\n", rc);
 		return rc;
 	}
 
@@ -1136,15 +1136,10 @@
  * Return:
  ******************************************************************************/
 static u32 qed_calc_crc32c(u8 *crc32_packet,
-			   u32 crc32_length,
-			   u32 crc32_seed,
-			   u8 complement)
+			   u32 crc32_length, u32 crc32_seed, u8 complement)
 {
-	u32 byte = 0;
-	u32 bit = 0;
-	u8 msb = 0;
-	u8 current_byte = 0;
-	u32 crc32_result = crc32_seed;
+	u32 byte = 0, bit = 0, crc32_result = crc32_seed;
+	u8 msb = 0, current_byte = 0;
 
 	if ((!crc32_packet) ||
 	    (crc32_length == 0) ||
@@ -1164,9 +1159,7 @@
 	return crc32_result;
 }
 
-static inline u32 qed_crc32c_le(u32 seed,
-				u8 *mac,
-				u32 len)
+static u32 qed_crc32c_le(u32 seed, u8 *mac, u32 len)
 {
 	u32 packet_buf[2] = { 0 };
 
@@ -1196,17 +1189,14 @@
 	u8 abs_vport_id = 0;
 	int rc, i;
 
-	if (p_filter_cmd->opcode == QED_FILTER_ADD) {
+	if (p_filter_cmd->opcode == QED_FILTER_ADD)
 		rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_add_to,
 				  &abs_vport_id);
-		if (rc)
-			return rc;
-	} else {
+	else
 		rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_remove_from,
 				  &abs_vport_id);
-		if (rc)
-			return rc;
-	}
+	if (rc)
+		return rc;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
@@ -1244,11 +1234,11 @@
 
 		/* Convert to correct endianity */
 		for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) {
+			struct vport_update_ramrod_mcast *p_ramrod_bins;
 			u32 *p_bins = (u32 *)bins;
-			struct vport_update_ramrod_mcast *approx_mcast;
 
-			approx_mcast = &p_ramrod->approx_mcast;
-			approx_mcast->bins[i] = cpu_to_le32(p_bins[i]);
+			p_ramrod_bins = &p_ramrod->approx_mcast;
+			p_ramrod_bins->bins[i] = cpu_to_le32(p_bins[i]);
 		}
 	}
 
@@ -1286,8 +1276,7 @@
 		rc = qed_sp_eth_filter_mcast(p_hwfn,
 					     opaque_fid,
 					     p_filter_cmd,
-					     comp_mode,
-					     p_comp_data);
+					     comp_mode, p_comp_data);
 	}
 	return rc;
 }
@@ -1314,9 +1303,8 @@
 		rc = qed_sp_eth_filter_ucast(p_hwfn,
 					     opaque_fid,
 					     p_filter_cmd,
-					     comp_mode,
-					     p_comp_data);
-		if (rc != 0)
+					     comp_mode, p_comp_data);
+		if (rc)
 			break;
 	}
 
@@ -1590,8 +1578,7 @@
 	}
 }
 
-void qed_get_vport_stats(struct qed_dev *cdev,
-			 struct qed_eth_stats *stats)
+void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
 {
 	u32 i;
 
@@ -1698,6 +1685,8 @@
 		qed_vf_get_num_vlan_filters(&cdev->hwfns[0],
 					    &info->num_vlan_filters);
 		qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac);
+
+		info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi;
 	}
 
 	qed_fill_dev_info(cdev, &info->common);
@@ -1766,8 +1755,7 @@
 	return 0;
 }
 
-static int qed_stop_vport(struct qed_dev *cdev,
-			  u8 vport_id)
+static int qed_stop_vport(struct qed_dev *cdev, u8 vport_id)
 {
 	int rc, i;
 
@@ -1775,8 +1763,7 @@
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
 		rc = qed_sp_vport_stop(p_hwfn,
-				       p_hwfn->hw_info.opaque_fid,
-				       vport_id);
+				       p_hwfn->hw_info.opaque_fid, vport_id);
 
 		if (rc) {
 			DP_ERR(cdev, "Failed to stop VPORT\n");
@@ -1801,10 +1788,8 @@
 
 	/* Translate protocol params into sp params */
 	sp_params.vport_id = params->vport_id;
-	sp_params.update_vport_active_rx_flg =
-		params->update_vport_active_flg;
-	sp_params.update_vport_active_tx_flg =
-		params->update_vport_active_flg;
+	sp_params.update_vport_active_rx_flg = params->update_vport_active_flg;
+	sp_params.update_vport_active_tx_flg = params->update_vport_active_flg;
 	sp_params.vport_active_rx_flg = params->vport_active_flg;
 	sp_params.vport_active_tx_flg = params->vport_active_flg;
 	sp_params.update_tx_switching_flg = params->update_tx_switching_flg;
@@ -1817,8 +1802,7 @@
 	 * We need to re-fix the rss values per engine for CMT.
 	 */
 	if (cdev->num_hwfns > 1 && params->update_rss_flg) {
-		struct qed_update_vport_rss_params *rss =
-			&params->rss_params;
+		struct qed_update_vport_rss_params *rss = &params->rss_params;
 		int k, max = 0;
 
 		/* Find largest entry, since it's possible RSS needs to
@@ -1861,8 +1845,8 @@
 		       QED_RSS_IND_TABLE_SIZE * sizeof(u16));
 		memcpy(sp_rss_params.rss_key, params->rss_params.rss_key,
 		       QED_RSS_KEY_SIZE * sizeof(u32));
+		sp_params.rss_params = &sp_rss_params;
 	}
-	sp_params.rss_params = &sp_rss_params;
 
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
@@ -1893,8 +1877,8 @@
 			 u16 cqe_pbl_size,
 			 void __iomem **pp_prod)
 {
-	int rc, hwfn_index;
 	struct qed_hwfn *p_hwfn;
+	int rc, hwfn_index;
 
 	hwfn_index = params->rss_id % cdev->num_hwfns;
 	p_hwfn = &cdev->hwfns[hwfn_index];
@@ -1935,8 +1919,7 @@
 
 	rc = qed_sp_eth_rx_queue_stop(p_hwfn,
 				      params->rx_queue_id / cdev->num_hwfns,
-				      params->eq_completion_only,
-				      false);
+				      params->eq_completion_only, false);
 	if (rc) {
 		DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id);
 		return rc;
@@ -2047,11 +2030,11 @@
 
 	memset(&accept_flags, 0, sizeof(accept_flags));
 
-	accept_flags.update_rx_mode_config	= 1;
-	accept_flags.update_tx_mode_config	= 1;
-	accept_flags.rx_accept_filter		= QED_ACCEPT_UCAST_MATCHED |
-						  QED_ACCEPT_MCAST_MATCHED |
-						  QED_ACCEPT_BCAST;
+	accept_flags.update_rx_mode_config = 1;
+	accept_flags.update_tx_mode_config = 1;
+	accept_flags.rx_accept_filter = QED_ACCEPT_UCAST_MATCHED |
+					QED_ACCEPT_MCAST_MATCHED |
+					QED_ACCEPT_BCAST;
 	accept_flags.tx_accept_filter = QED_ACCEPT_UCAST_MATCHED |
 					QED_ACCEPT_MCAST_MATCHED |
 					QED_ACCEPT_BCAST;
@@ -2072,9 +2055,8 @@
 	struct qed_filter_ucast ucast;
 
 	if (!params->vlan_valid && !params->mac_valid) {
-		DP_NOTICE(
-			cdev,
-			"Tried configuring a unicast filter, but both MAC and VLAN are not set\n");
+		DP_NOTICE(cdev,
+			  "Tried configuring a unicast filter, but both MAC and VLAN are not set\n");
 		return -EINVAL;
 	}
 
@@ -2135,8 +2117,7 @@
 	for (i = 0; i < mcast.num_mc_addrs; i++)
 		ether_addr_copy(mcast.mac[i], params->mac[i]);
 
-	return qed_filter_mcast_cmd(cdev, &mcast,
-				    QED_SPQ_MODE_CB, NULL);
+	return qed_filter_mcast_cmd(cdev, &mcast, QED_SPQ_MODE_CB, NULL);
 }
 
 static int qed_configure_filter(struct qed_dev *cdev,
@@ -2153,15 +2134,13 @@
 		accept_flags = params->filter.accept_flags;
 		return qed_configure_filter_rx_mode(cdev, accept_flags);
 	default:
-		DP_NOTICE(cdev, "Unknown filter type %d\n",
-			  (int)params->type);
+		DP_NOTICE(cdev, "Unknown filter type %d\n", (int)params->type);
 		return -EINVAL;
 	}
 }
 
 static int qed_fp_cqe_completion(struct qed_dev *dev,
-				 u8 rss_id,
-				 struct eth_slow_path_rx_cqe *cqe)
+				 u8 rss_id, struct eth_slow_path_rx_cqe *cqe)
 {
 	return qed_eth_cqe_completion(&dev->hwfns[rss_id % dev->num_hwfns],
 				      cqe);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 0021145..e495d62 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -102,6 +102,8 @@
 	u16 opaque_fid;
 	u8 vport_id;
 	u16 mtu;
+	bool check_mac;
+	bool check_ethtype;
 };
 
 int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
@@ -213,6 +215,8 @@
 			    enum spq_mode comp_mode,
 			    struct qed_spq_comp_cb *p_comp_data);
 
+void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
+
 int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 			   struct qed_sp_vport_start_params *p_params);
 
@@ -223,7 +227,8 @@
 				u8 stats_id,
 				u16 bd_max_bytes,
 				dma_addr_t bd_chain_phys_addr,
-				dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size);
+				dma_addr_t cqe_pbl_addr,
+				u16 cqe_pbl_size, bool b_use_zone_a_prod);
 
 int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
 				u16  opaque_fid,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
new file mode 100644
index 0000000..a6db107
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -0,0 +1,1792 @@
+/* QLogic qed NIC Driver
+ *
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <net/ipv6.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/qed/qed_ll2_if.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_int.h"
+#include "qed_ll2.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+
+#define QED_LL2_RX_REGISTERED(ll2)	((ll2)->rx_queue.b_cb_registred)
+#define QED_LL2_TX_REGISTERED(ll2)	((ll2)->tx_queue.b_cb_registred)
+
+#define QED_LL2_TX_SIZE (256)
+#define QED_LL2_RX_SIZE (4096)
+
+struct qed_cb_ll2_info {
+	int rx_cnt;
+	u32 rx_size;
+	u8 handle;
+	bool frags_mapped;
+
+	/* Lock protecting LL2 buffer lists in sleepless context */
+	spinlock_t lock;
+	struct list_head list;
+
+	const struct qed_ll2_cb_ops *cbs;
+	void *cb_cookie;
+};
+
+struct qed_ll2_buffer {
+	struct list_head list;
+	void *data;
+	dma_addr_t phys_addr;
+};
+
+static void qed_ll2b_complete_tx_packet(struct qed_hwfn *p_hwfn,
+					u8 connection_handle,
+					void *cookie,
+					dma_addr_t first_frag_addr,
+					bool b_last_fragment,
+					bool b_last_packet)
+{
+	struct qed_dev *cdev = p_hwfn->cdev;
+	struct sk_buff *skb = cookie;
+
+	/* All we need to do is release the mapping */
+	dma_unmap_single(&p_hwfn->cdev->pdev->dev, first_frag_addr,
+			 skb_headlen(skb), DMA_TO_DEVICE);
+
+	if (cdev->ll2->cbs && cdev->ll2->cbs->tx_cb)
+		cdev->ll2->cbs->tx_cb(cdev->ll2->cb_cookie, skb,
+				      b_last_fragment);
+
+	if (cdev->ll2->frags_mapped)
+		/* Case where mapped frags were received, need to
+		 * free skb with nr_frags marked as 0
+		 */
+		skb_shinfo(skb)->nr_frags = 0;
+
+	dev_kfree_skb_any(skb);
+}
+
+static int qed_ll2_alloc_buffer(struct qed_dev *cdev,
+				u8 **data, dma_addr_t *phys_addr)
+{
+	*data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC);
+	if (!(*data)) {
+		DP_INFO(cdev, "Failed to allocate LL2 buffer data\n");
+		return -ENOMEM;
+	}
+
+	*phys_addr = dma_map_single(&cdev->pdev->dev,
+				    ((*data) + NET_SKB_PAD),
+				    cdev->ll2->rx_size, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&cdev->pdev->dev, *phys_addr)) {
+		DP_INFO(cdev, "Failed to map LL2 buffer data\n");
+		kfree((*data));
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int qed_ll2_dealloc_buffer(struct qed_dev *cdev,
+				 struct qed_ll2_buffer *buffer)
+{
+	spin_lock_bh(&cdev->ll2->lock);
+
+	dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+			 cdev->ll2->rx_size, DMA_FROM_DEVICE);
+	kfree(buffer->data);
+	list_del(&buffer->list);
+
+	cdev->ll2->rx_cnt--;
+	if (!cdev->ll2->rx_cnt)
+		DP_INFO(cdev, "All LL2 entries were removed\n");
+
+	spin_unlock_bh(&cdev->ll2->lock);
+
+	return 0;
+}
+
+static void qed_ll2_kill_buffers(struct qed_dev *cdev)
+{
+	struct qed_ll2_buffer *buffer, *tmp_buffer;
+
+	list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list)
+		qed_ll2_dealloc_buffer(cdev, buffer);
+}
+
+void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn,
+				 u8 connection_handle,
+				 struct qed_ll2_rx_packet *p_pkt,
+				 struct core_rx_fast_path_cqe *p_cqe,
+				 bool b_last_packet)
+{
+	u16 packet_length = le16_to_cpu(p_cqe->packet_length);
+	struct qed_ll2_buffer *buffer = p_pkt->cookie;
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u16 vlan = le16_to_cpu(p_cqe->vlan);
+	u32 opaque_data_0, opaque_data_1;
+	u8 pad = p_cqe->placement_offset;
+	dma_addr_t new_phys_addr;
+	struct sk_buff *skb;
+	bool reuse = false;
+	int rc = -EINVAL;
+	u8 *new_data;
+
+	opaque_data_0 = le32_to_cpu(p_cqe->opaque_data.data[0]);
+	opaque_data_1 = le32_to_cpu(p_cqe->opaque_data.data[1]);
+
+	DP_VERBOSE(p_hwfn,
+		   (NETIF_MSG_RX_STATUS | QED_MSG_STORAGE | NETIF_MSG_PKTDATA),
+		   "Got an LL2 Rx completion: [Buffer at phys 0x%llx, offset 0x%02x] Length 0x%04x Parse_flags 0x%04x vlan 0x%04x Opaque data [0x%08x:0x%08x]\n",
+		   (u64)p_pkt->rx_buf_addr, pad, packet_length,
+		   le16_to_cpu(p_cqe->parse_flags.flags), vlan,
+		   opaque_data_0, opaque_data_1);
+
+	if ((cdev->dp_module & NETIF_MSG_PKTDATA) && buffer->data) {
+		print_hex_dump(KERN_INFO, "",
+			       DUMP_PREFIX_OFFSET, 16, 1,
+			       buffer->data, packet_length, false);
+	}
+
+	/* Determine if data is valid */
+	if (packet_length < ETH_HLEN)
+		reuse = true;
+
+	/* Allocate a replacement for buffer; Reuse upon failure */
+	if (!reuse)
+		rc = qed_ll2_alloc_buffer(p_hwfn->cdev, &new_data,
+					  &new_phys_addr);
+
+	/* If need to reuse or there's no replacement buffer, repost this */
+	if (rc)
+		goto out_post;
+
+	skb = build_skb(buffer->data, 0);
+	if (!skb) {
+		rc = -ENOMEM;
+		goto out_post;
+	}
+
+	pad += NET_SKB_PAD;
+	skb_reserve(skb, pad);
+	skb_put(skb, packet_length);
+	skb_checksum_none_assert(skb);
+
+	/* Get parital ethernet information instead of eth_type_trans(),
+	 * Since we don't have an associated net_device.
+	 */
+	skb_reset_mac_header(skb);
+	skb->protocol = eth_hdr(skb)->h_proto;
+
+	/* Pass SKB onward */
+	if (cdev->ll2->cbs && cdev->ll2->cbs->rx_cb) {
+		if (vlan)
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan);
+		cdev->ll2->cbs->rx_cb(cdev->ll2->cb_cookie, skb,
+				      opaque_data_0, opaque_data_1);
+	}
+
+	/* Update Buffer information and update FW producer */
+	buffer->data = new_data;
+	buffer->phys_addr = new_phys_addr;
+
+out_post:
+	rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle,
+				    buffer->phys_addr, 0,  buffer, 1);
+
+	if (rc)
+		qed_ll2_dealloc_buffer(cdev, buffer);
+}
+
+static struct qed_ll2_info *__qed_ll2_handle_sanity(struct qed_hwfn *p_hwfn,
+						    u8 connection_handle,
+						    bool b_lock,
+						    bool b_only_active)
+{
+	struct qed_ll2_info *p_ll2_conn, *p_ret = NULL;
+
+	if (connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS)
+		return NULL;
+
+	if (!p_hwfn->p_ll2_info)
+		return NULL;
+
+	p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle];
+
+	if (b_only_active) {
+		if (b_lock)
+			mutex_lock(&p_ll2_conn->mutex);
+		if (p_ll2_conn->b_active)
+			p_ret = p_ll2_conn;
+		if (b_lock)
+			mutex_unlock(&p_ll2_conn->mutex);
+	} else {
+		p_ret = p_ll2_conn;
+	}
+
+	return p_ret;
+}
+
+static struct qed_ll2_info *qed_ll2_handle_sanity(struct qed_hwfn *p_hwfn,
+						  u8 connection_handle)
+{
+	return __qed_ll2_handle_sanity(p_hwfn, connection_handle, false, true);
+}
+
+static struct qed_ll2_info *qed_ll2_handle_sanity_lock(struct qed_hwfn *p_hwfn,
+						       u8 connection_handle)
+{
+	return __qed_ll2_handle_sanity(p_hwfn, connection_handle, true, true);
+}
+
+static struct qed_ll2_info *qed_ll2_handle_sanity_inactive(struct qed_hwfn
+							   *p_hwfn,
+							   u8 connection_handle)
+{
+	return __qed_ll2_handle_sanity(p_hwfn, connection_handle, false, false);
+}
+
+static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
+{
+	bool b_last_packet = false, b_last_frag = false;
+	struct qed_ll2_tx_packet *p_pkt = NULL;
+	struct qed_ll2_info *p_ll2_conn;
+	struct qed_ll2_tx_queue *p_tx;
+	dma_addr_t tx_frag;
+
+	p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
+	if (!p_ll2_conn)
+		return;
+
+	p_tx = &p_ll2_conn->tx_queue;
+
+	while (!list_empty(&p_tx->active_descq)) {
+		p_pkt = list_first_entry(&p_tx->active_descq,
+					 struct qed_ll2_tx_packet, list_entry);
+		if (!p_pkt)
+			break;
+
+		list_del(&p_pkt->list_entry);
+		b_last_packet = list_empty(&p_tx->active_descq);
+		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
+		p_tx->cur_completing_packet = *p_pkt;
+		p_tx->cur_completing_bd_idx = 1;
+		b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used;
+		tx_frag = p_pkt->bds_set[0].tx_frag;
+		if (p_ll2_conn->gsi_enable)
+			qed_ll2b_release_tx_gsi_packet(p_hwfn,
+						       p_ll2_conn->my_id,
+						       p_pkt->cookie,
+						       tx_frag,
+						       b_last_frag,
+						       b_last_packet);
+		else
+			qed_ll2b_complete_tx_packet(p_hwfn,
+						    p_ll2_conn->my_id,
+						    p_pkt->cookie,
+						    tx_frag,
+						    b_last_frag,
+						    b_last_packet);
+
+	}
+}
+
+static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
+{
+	struct qed_ll2_info *p_ll2_conn = p_cookie;
+	struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue;
+	u16 new_idx = 0, num_bds = 0, num_bds_in_packet = 0;
+	struct qed_ll2_tx_packet *p_pkt;
+	bool b_last_frag = false;
+	unsigned long flags;
+	dma_addr_t tx_frag;
+	int rc = -EINVAL;
+
+	spin_lock_irqsave(&p_tx->lock, flags);
+	if (p_tx->b_completing_packet) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	new_idx = le16_to_cpu(*p_tx->p_fw_cons);
+	num_bds = ((s16)new_idx - (s16)p_tx->bds_idx);
+	while (num_bds) {
+		if (list_empty(&p_tx->active_descq))
+			goto out;
+
+		p_pkt = list_first_entry(&p_tx->active_descq,
+					 struct qed_ll2_tx_packet, list_entry);
+		if (!p_pkt)
+			goto out;
+
+		p_tx->b_completing_packet = true;
+		p_tx->cur_completing_packet = *p_pkt;
+		num_bds_in_packet = p_pkt->bd_used;
+		list_del(&p_pkt->list_entry);
+
+		if (num_bds < num_bds_in_packet) {
+			DP_NOTICE(p_hwfn,
+				  "Rest of BDs does not cover whole packet\n");
+			goto out;
+		}
+
+		num_bds -= num_bds_in_packet;
+		p_tx->bds_idx += num_bds_in_packet;
+		while (num_bds_in_packet--)
+			qed_chain_consume(&p_tx->txq_chain);
+
+		p_tx->cur_completing_bd_idx = 1;
+		b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used;
+		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
+
+		spin_unlock_irqrestore(&p_tx->lock, flags);
+		tx_frag = p_pkt->bds_set[0].tx_frag;
+		if (p_ll2_conn->gsi_enable)
+			qed_ll2b_complete_tx_gsi_packet(p_hwfn,
+							p_ll2_conn->my_id,
+							p_pkt->cookie,
+							tx_frag,
+							b_last_frag, !num_bds);
+		else
+			qed_ll2b_complete_tx_packet(p_hwfn,
+						    p_ll2_conn->my_id,
+						    p_pkt->cookie,
+						    tx_frag,
+						    b_last_frag, !num_bds);
+		spin_lock_irqsave(&p_tx->lock, flags);
+	}
+
+	p_tx->b_completing_packet = false;
+	rc = 0;
+out:
+	spin_unlock_irqrestore(&p_tx->lock, flags);
+	return rc;
+}
+
+static int
+qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn,
+			   struct qed_ll2_info *p_ll2_info,
+			   union core_rx_cqe_union *p_cqe,
+			   unsigned long lock_flags, bool b_last_cqe)
+{
+	struct qed_ll2_rx_queue *p_rx = &p_ll2_info->rx_queue;
+	struct qed_ll2_rx_packet *p_pkt = NULL;
+	u16 packet_length, parse_flags, vlan;
+	u32 src_mac_addrhi;
+	u16 src_mac_addrlo;
+
+	if (!list_empty(&p_rx->active_descq))
+		p_pkt = list_first_entry(&p_rx->active_descq,
+					 struct qed_ll2_rx_packet, list_entry);
+	if (!p_pkt) {
+		DP_NOTICE(p_hwfn,
+			  "GSI Rx completion but active_descq is empty\n");
+		return -EIO;
+	}
+
+	list_del(&p_pkt->list_entry);
+	parse_flags = le16_to_cpu(p_cqe->rx_cqe_gsi.parse_flags.flags);
+	packet_length = le16_to_cpu(p_cqe->rx_cqe_gsi.data_length);
+	vlan = le16_to_cpu(p_cqe->rx_cqe_gsi.vlan);
+	src_mac_addrhi = le32_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrhi);
+	src_mac_addrlo = le16_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrlo);
+	if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd)
+		DP_NOTICE(p_hwfn,
+			  "Mismatch between active_descq and the LL2 Rx chain\n");
+	list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
+
+	spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+	qed_ll2b_complete_rx_gsi_packet(p_hwfn,
+					p_ll2_info->my_id,
+					p_pkt->cookie,
+					p_pkt->rx_buf_addr,
+					packet_length,
+					p_cqe->rx_cqe_gsi.data_length_error,
+					parse_flags,
+					vlan,
+					src_mac_addrhi,
+					src_mac_addrlo, b_last_cqe);
+	spin_lock_irqsave(&p_rx->lock, lock_flags);
+
+	return 0;
+}
+
+static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn,
+				      struct qed_ll2_info *p_ll2_conn,
+				      union core_rx_cqe_union *p_cqe,
+				      unsigned long lock_flags,
+				      bool b_last_cqe)
+{
+	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
+	struct qed_ll2_rx_packet *p_pkt = NULL;
+
+	if (!list_empty(&p_rx->active_descq))
+		p_pkt = list_first_entry(&p_rx->active_descq,
+					 struct qed_ll2_rx_packet, list_entry);
+	if (!p_pkt) {
+		DP_NOTICE(p_hwfn,
+			  "LL2 Rx completion but active_descq is empty\n");
+		return -EIO;
+	}
+	list_del(&p_pkt->list_entry);
+
+	if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd)
+		DP_NOTICE(p_hwfn,
+			  "Mismatch between active_descq and the LL2 Rx chain\n");
+	list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
+
+	spin_unlock_irqrestore(&p_rx->lock, lock_flags);
+	qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id,
+				    p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe);
+	spin_lock_irqsave(&p_rx->lock, lock_flags);
+
+	return 0;
+}
+
+static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
+{
+	struct qed_ll2_info *p_ll2_conn = cookie;
+	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
+	union core_rx_cqe_union *cqe = NULL;
+	u16 cq_new_idx = 0, cq_old_idx = 0;
+	unsigned long flags = 0;
+	int rc = 0;
+
+	spin_lock_irqsave(&p_rx->lock, flags);
+	cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons);
+	cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
+
+	while (cq_new_idx != cq_old_idx) {
+		bool b_last_cqe = (cq_new_idx == cq_old_idx);
+
+		cqe = qed_chain_consume(&p_rx->rcq_chain);
+		cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
+
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_LL2,
+			   "LL2 [sw. cons %04x, fw. at %04x] - Got Packet of type %02x\n",
+			   cq_old_idx, cq_new_idx, cqe->rx_cqe_sp.type);
+
+		switch (cqe->rx_cqe_sp.type) {
+		case CORE_RX_CQE_TYPE_SLOW_PATH:
+			DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n");
+			rc = -EINVAL;
+			break;
+		case CORE_RX_CQE_TYPE_GSI_OFFLOAD:
+			rc = qed_ll2_rxq_completion_gsi(p_hwfn, p_ll2_conn,
+							cqe, flags, b_last_cqe);
+			break;
+		case CORE_RX_CQE_TYPE_REGULAR:
+			rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn,
+							cqe, flags, b_last_cqe);
+			break;
+		default:
+			rc = -EIO;
+		}
+	}
+
+	spin_unlock_irqrestore(&p_rx->lock, flags);
+	return rc;
+}
+
+void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
+{
+	struct qed_ll2_info *p_ll2_conn = NULL;
+	struct qed_ll2_rx_packet *p_pkt = NULL;
+	struct qed_ll2_rx_queue *p_rx;
+
+	p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle);
+	if (!p_ll2_conn)
+		return;
+
+	p_rx = &p_ll2_conn->rx_queue;
+
+	while (!list_empty(&p_rx->active_descq)) {
+		dma_addr_t rx_buf_addr;
+		void *cookie;
+		bool b_last;
+
+		p_pkt = list_first_entry(&p_rx->active_descq,
+					 struct qed_ll2_rx_packet, list_entry);
+		if (!p_pkt)
+			break;
+
+		list_del(&p_pkt->list_entry);
+		list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
+
+		rx_buf_addr = p_pkt->rx_buf_addr;
+		cookie = p_pkt->cookie;
+
+		b_last = list_empty(&p_rx->active_descq);
+	}
+}
+
+static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
+				     struct qed_ll2_info *p_ll2_conn,
+				     u8 action_on_error)
+{
+	enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type;
+	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
+	struct core_rx_start_ramrod_data *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	u16 cqe_pbl_size;
+	int rc = 0;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_ll2_conn->cid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 CORE_RAMROD_RX_QUEUE_START,
+				 PROTOCOLID_CORE, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.core_rx_queue_start;
+
+	p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn));
+	p_ramrod->sb_index = p_rx->rx_sb_index;
+	p_ramrod->complete_event_flg = 1;
+
+	p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu);
+	DMA_REGPAIR_LE(p_ramrod->bd_base,
+		       p_rx->rxq_chain.p_phys_addr);
+	cqe_pbl_size = (u16)qed_chain_get_page_cnt(&p_rx->rcq_chain);
+	p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
+	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr,
+		       qed_chain_get_pbl_phys(&p_rx->rcq_chain));
+
+	p_ramrod->drop_ttl0_flg = p_ll2_conn->rx_drop_ttl0_flg;
+	p_ramrod->inner_vlan_removal_en = p_ll2_conn->rx_vlan_removal_en;
+	p_ramrod->queue_id = p_ll2_conn->queue_id;
+	p_ramrod->main_func_queue = 1;
+
+	if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
+	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) {
+		p_ramrod->mf_si_bcast_accept_all = 1;
+		p_ramrod->mf_si_mcast_accept_all = 1;
+	} else {
+		p_ramrod->mf_si_bcast_accept_all = 0;
+		p_ramrod->mf_si_mcast_accept_all = 0;
+	}
+
+	p_ramrod->action_on_error.error_type = action_on_error;
+	p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable;
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
+				     struct qed_ll2_info *p_ll2_conn)
+{
+	enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type;
+	struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue;
+	struct core_tx_start_ramrod_data *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	union qed_qm_pq_params pq_params;
+	u16 pq_id = 0, pbl_size;
+	int rc = -EINVAL;
+
+	if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
+		return 0;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_ll2_conn->cid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 CORE_RAMROD_TX_QUEUE_START,
+				 PROTOCOLID_CORE, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.core_tx_queue_start;
+
+	p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn));
+	p_ramrod->sb_index = p_tx->tx_sb_index;
+	p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu);
+	p_ll2_conn->tx_stats_en = 1;
+	p_ramrod->stats_en = p_ll2_conn->tx_stats_en;
+	p_ramrod->stats_id = p_ll2_conn->tx_stats_id;
+
+	DMA_REGPAIR_LE(p_ramrod->pbl_base_addr,
+		       qed_chain_get_pbl_phys(&p_tx->txq_chain));
+	pbl_size = qed_chain_get_page_cnt(&p_tx->txq_chain);
+	p_ramrod->pbl_size = cpu_to_le16(pbl_size);
+
+	memset(&pq_params, 0, sizeof(pq_params));
+	pq_params.core.tc = p_ll2_conn->tx_tc;
+	pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params);
+	p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
+
+	switch (conn_type) {
+	case QED_LL2_TYPE_ISCSI:
+	case QED_LL2_TYPE_ISCSI_OOO:
+		p_ramrod->conn_type = PROTOCOLID_ISCSI;
+		break;
+	case QED_LL2_TYPE_ROCE:
+		p_ramrod->conn_type = PROTOCOLID_ROCE;
+		break;
+	default:
+		p_ramrod->conn_type = PROTOCOLID_ETH;
+		DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type);
+	}
+
+	p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable;
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_ll2_rx_queue_stop(struct qed_hwfn *p_hwfn,
+				    struct qed_ll2_info *p_ll2_conn)
+{
+	struct core_rx_stop_ramrod_data *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_ll2_conn->cid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 CORE_RAMROD_RX_QUEUE_STOP,
+				 PROTOCOLID_CORE, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.core_rx_queue_stop;
+
+	p_ramrod->complete_event_flg = 1;
+	p_ramrod->queue_id = p_ll2_conn->queue_id;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_ll2_tx_queue_stop(struct qed_hwfn *p_hwfn,
+				    struct qed_ll2_info *p_ll2_conn)
+{
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_ll2_conn->cid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 CORE_RAMROD_TX_QUEUE_STOP,
+				 PROTOCOLID_CORE, &init_data);
+	if (rc)
+		return rc;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int
+qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn,
+			      struct qed_ll2_info *p_ll2_info, u16 rx_num_desc)
+{
+	struct qed_ll2_rx_packet *p_descq;
+	u32 capacity;
+	int rc = 0;
+
+	if (!rx_num_desc)
+		goto out;
+
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_NEXT_PTR,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     rx_num_desc,
+			     sizeof(struct core_rx_bd),
+			     &p_ll2_info->rx_queue.rxq_chain);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to allocate ll2 rxq chain\n");
+		goto out;
+	}
+
+	capacity = qed_chain_get_capacity(&p_ll2_info->rx_queue.rxq_chain);
+	p_descq = kcalloc(capacity, sizeof(struct qed_ll2_rx_packet),
+			  GFP_KERNEL);
+	if (!p_descq) {
+		rc = -ENOMEM;
+		DP_NOTICE(p_hwfn, "Failed to allocate ll2 Rx desc\n");
+		goto out;
+	}
+	p_ll2_info->rx_queue.descq_array = p_descq;
+
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_PBL,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     rx_num_desc,
+			     sizeof(struct core_rx_fast_path_cqe),
+			     &p_ll2_info->rx_queue.rcq_chain);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed to allocate ll2 rcq chain\n");
+		goto out;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
+		   "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n",
+		   p_ll2_info->conn_type, rx_num_desc);
+
+out:
+	return rc;
+}
+
+static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
+					 struct qed_ll2_info *p_ll2_info,
+					 u16 tx_num_desc)
+{
+	struct qed_ll2_tx_packet *p_descq;
+	u32 capacity;
+	int rc = 0;
+
+	if (!tx_num_desc)
+		goto out;
+
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_PBL,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     tx_num_desc,
+			     sizeof(struct core_tx_bd),
+			     &p_ll2_info->tx_queue.txq_chain);
+	if (rc)
+		goto out;
+
+	capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain);
+	p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet),
+			  GFP_KERNEL);
+	if (!p_descq) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	p_ll2_info->tx_queue.descq_array = p_descq;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
+		   "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n",
+		   p_ll2_info->conn_type, tx_num_desc);
+
+out:
+	if (rc)
+		DP_NOTICE(p_hwfn,
+			  "Can't allocate memory for Tx LL2 with 0x%08x buffers\n",
+			  tx_num_desc);
+	return rc;
+}
+
+int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
+			       struct qed_ll2_info *p_params,
+			       u16 rx_num_desc,
+			       u16 tx_num_desc,
+			       u8 *p_connection_handle)
+{
+	qed_int_comp_cb_t comp_rx_cb, comp_tx_cb;
+	struct qed_ll2_info *p_ll2_info = NULL;
+	int rc;
+	u8 i;
+
+	if (!p_connection_handle || !p_hwfn->p_ll2_info)
+		return -EINVAL;
+
+	/* Find a free connection to be used */
+	for (i = 0; (i < QED_MAX_NUM_OF_LL2_CONNECTIONS); i++) {
+		mutex_lock(&p_hwfn->p_ll2_info[i].mutex);
+		if (p_hwfn->p_ll2_info[i].b_active) {
+			mutex_unlock(&p_hwfn->p_ll2_info[i].mutex);
+			continue;
+		}
+
+		p_hwfn->p_ll2_info[i].b_active = true;
+		p_ll2_info = &p_hwfn->p_ll2_info[i];
+		mutex_unlock(&p_hwfn->p_ll2_info[i].mutex);
+		break;
+	}
+	if (!p_ll2_info)
+		return -EBUSY;
+
+	p_ll2_info->conn_type = p_params->conn_type;
+	p_ll2_info->mtu = p_params->mtu;
+	p_ll2_info->rx_drop_ttl0_flg = p_params->rx_drop_ttl0_flg;
+	p_ll2_info->rx_vlan_removal_en = p_params->rx_vlan_removal_en;
+	p_ll2_info->tx_tc = p_params->tx_tc;
+	p_ll2_info->tx_dest = p_params->tx_dest;
+	p_ll2_info->ai_err_packet_too_big = p_params->ai_err_packet_too_big;
+	p_ll2_info->ai_err_no_buf = p_params->ai_err_no_buf;
+	p_ll2_info->gsi_enable = p_params->gsi_enable;
+
+	rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc);
+	if (rc)
+		goto q_allocate_fail;
+
+	rc = qed_ll2_acquire_connection_tx(p_hwfn, p_ll2_info, tx_num_desc);
+	if (rc)
+		goto q_allocate_fail;
+
+	/* Register callbacks for the Rx/Tx queues */
+	comp_rx_cb = qed_ll2_rxq_completion;
+	comp_tx_cb = qed_ll2_txq_completion;
+
+	if (rx_num_desc) {
+		qed_int_register_cb(p_hwfn, comp_rx_cb,
+				    &p_hwfn->p_ll2_info[i],
+				    &p_ll2_info->rx_queue.rx_sb_index,
+				    &p_ll2_info->rx_queue.p_fw_cons);
+		p_ll2_info->rx_queue.b_cb_registred = true;
+	}
+
+	if (tx_num_desc) {
+		qed_int_register_cb(p_hwfn,
+				    comp_tx_cb,
+				    &p_hwfn->p_ll2_info[i],
+				    &p_ll2_info->tx_queue.tx_sb_index,
+				    &p_ll2_info->tx_queue.p_fw_cons);
+		p_ll2_info->tx_queue.b_cb_registred = true;
+	}
+
+	*p_connection_handle = i;
+	return rc;
+
+q_allocate_fail:
+	qed_ll2_release_connection(p_hwfn, i);
+	return -ENOMEM;
+}
+
+static int qed_ll2_establish_connection_rx(struct qed_hwfn *p_hwfn,
+					   struct qed_ll2_info *p_ll2_conn)
+{
+	u8 action_on_error = 0;
+
+	if (!QED_LL2_RX_REGISTERED(p_ll2_conn))
+		return 0;
+
+	DIRECT_REG_WR(p_ll2_conn->rx_queue.set_prod_addr, 0x0);
+
+	SET_FIELD(action_on_error,
+		  CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG,
+		  p_ll2_conn->ai_err_packet_too_big);
+	SET_FIELD(action_on_error,
+		  CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->ai_err_no_buf);
+
+	return qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error);
+}
+
+int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
+{
+	struct qed_ll2_info *p_ll2_conn;
+	struct qed_ll2_rx_queue *p_rx;
+	struct qed_ll2_tx_queue *p_tx;
+	int rc = -EINVAL;
+	u32 i, capacity;
+	u8 qid;
+
+	p_ll2_conn = qed_ll2_handle_sanity_lock(p_hwfn, connection_handle);
+	if (!p_ll2_conn)
+		return -EINVAL;
+	p_rx = &p_ll2_conn->rx_queue;
+	p_tx = &p_ll2_conn->tx_queue;
+
+	qed_chain_reset(&p_rx->rxq_chain);
+	qed_chain_reset(&p_rx->rcq_chain);
+	INIT_LIST_HEAD(&p_rx->active_descq);
+	INIT_LIST_HEAD(&p_rx->free_descq);
+	INIT_LIST_HEAD(&p_rx->posting_descq);
+	spin_lock_init(&p_rx->lock);
+	capacity = qed_chain_get_capacity(&p_rx->rxq_chain);
+	for (i = 0; i < capacity; i++)
+		list_add_tail(&p_rx->descq_array[i].list_entry,
+			      &p_rx->free_descq);
+	*p_rx->p_fw_cons = 0;
+
+	qed_chain_reset(&p_tx->txq_chain);
+	INIT_LIST_HEAD(&p_tx->active_descq);
+	INIT_LIST_HEAD(&p_tx->free_descq);
+	INIT_LIST_HEAD(&p_tx->sending_descq);
+	spin_lock_init(&p_tx->lock);
+	capacity = qed_chain_get_capacity(&p_tx->txq_chain);
+	for (i = 0; i < capacity; i++)
+		list_add_tail(&p_tx->descq_array[i].list_entry,
+			      &p_tx->free_descq);
+	p_tx->cur_completing_bd_idx = 0;
+	p_tx->bds_idx = 0;
+	p_tx->b_completing_packet = false;
+	p_tx->cur_send_packet = NULL;
+	p_tx->cur_send_frag_num = 0;
+	p_tx->cur_completing_frag_num = 0;
+	*p_tx->p_fw_cons = 0;
+
+	qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_ll2_conn->cid);
+
+	qid = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] + connection_handle;
+	p_ll2_conn->queue_id = qid;
+	p_ll2_conn->tx_stats_id = qid;
+	p_rx->set_prod_addr = (u8 __iomem *)p_hwfn->regview +
+					    GTT_BAR0_MAP_REG_TSDM_RAM +
+					    TSTORM_LL2_RX_PRODS_OFFSET(qid);
+	p_tx->doorbell_addr = (u8 __iomem *)p_hwfn->doorbells +
+					    qed_db_addr(p_ll2_conn->cid,
+							DQ_DEMS_LEGACY);
+
+	rc = qed_ll2_establish_connection_rx(p_hwfn, p_ll2_conn);
+	if (rc)
+		return rc;
+
+	rc = qed_sp_ll2_tx_queue_start(p_hwfn, p_ll2_conn);
+	if (rc)
+		return rc;
+
+	if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
+		qed_wr(p_hwfn, p_hwfn->p_main_ptt, PRS_REG_USE_LIGHT_L2, 1);
+
+	return rc;
+}
+
+static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn,
+					     struct qed_ll2_rx_queue *p_rx,
+					     struct qed_ll2_rx_packet *p_curp)
+{
+	struct qed_ll2_rx_packet *p_posting_packet = NULL;
+	struct core_ll2_rx_prod rx_prod = { 0, 0, 0 };
+	bool b_notify_fw = false;
+	u16 bd_prod, cq_prod;
+
+	/* This handles the flushing of already posted buffers */
+	while (!list_empty(&p_rx->posting_descq)) {
+		p_posting_packet = list_first_entry(&p_rx->posting_descq,
+						    struct qed_ll2_rx_packet,
+						    list_entry);
+		list_del(&p_posting_packet->list_entry);
+		list_add_tail(&p_posting_packet->list_entry,
+			      &p_rx->active_descq);
+		b_notify_fw = true;
+	}
+
+	/* This handles the supplied packet [if there is one] */
+	if (p_curp) {
+		list_add_tail(&p_curp->list_entry, &p_rx->active_descq);
+		b_notify_fw = true;
+	}
+
+	if (!b_notify_fw)
+		return;
+
+	bd_prod = qed_chain_get_prod_idx(&p_rx->rxq_chain);
+	cq_prod = qed_chain_get_prod_idx(&p_rx->rcq_chain);
+	rx_prod.bd_prod = cpu_to_le16(bd_prod);
+	rx_prod.cqe_prod = cpu_to_le16(cq_prod);
+	DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod));
+}
+
+int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
+			   u8 connection_handle,
+			   dma_addr_t addr,
+			   u16 buf_len, void *cookie, u8 notify_fw)
+{
+	struct core_rx_bd_with_buff_len *p_curb = NULL;
+	struct qed_ll2_rx_packet *p_curp = NULL;
+	struct qed_ll2_info *p_ll2_conn;
+	struct qed_ll2_rx_queue *p_rx;
+	unsigned long flags;
+	void *p_data;
+	int rc = 0;
+
+	p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle);
+	if (!p_ll2_conn)
+		return -EINVAL;
+	p_rx = &p_ll2_conn->rx_queue;
+
+	spin_lock_irqsave(&p_rx->lock, flags);
+	if (!list_empty(&p_rx->free_descq))
+		p_curp = list_first_entry(&p_rx->free_descq,
+					  struct qed_ll2_rx_packet, list_entry);
+	if (p_curp) {
+		if (qed_chain_get_elem_left(&p_rx->rxq_chain) &&
+		    qed_chain_get_elem_left(&p_rx->rcq_chain)) {
+			p_data = qed_chain_produce(&p_rx->rxq_chain);
+			p_curb = (struct core_rx_bd_with_buff_len *)p_data;
+			qed_chain_produce(&p_rx->rcq_chain);
+		}
+	}
+
+	/* If we're lacking entires, let's try to flush buffers to FW */
+	if (!p_curp || !p_curb) {
+		rc = -EBUSY;
+		p_curp = NULL;
+		goto out_notify;
+	}
+
+	/* We have an Rx packet we can fill */
+	DMA_REGPAIR_LE(p_curb->addr, addr);
+	p_curb->buff_length = cpu_to_le16(buf_len);
+	p_curp->rx_buf_addr = addr;
+	p_curp->cookie = cookie;
+	p_curp->rxq_bd = p_curb;
+	p_curp->buf_length = buf_len;
+	list_del(&p_curp->list_entry);
+
+	/* Check if we only want to enqueue this packet without informing FW */
+	if (!notify_fw) {
+		list_add_tail(&p_curp->list_entry, &p_rx->posting_descq);
+		goto out;
+	}
+
+out_notify:
+	qed_ll2_post_rx_buffer_notify_fw(p_hwfn, p_rx, p_curp);
+out:
+	spin_unlock_irqrestore(&p_rx->lock, flags);
+	return rc;
+}
+
+static void qed_ll2_prepare_tx_packet_set(struct qed_hwfn *p_hwfn,
+					  struct qed_ll2_tx_queue *p_tx,
+					  struct qed_ll2_tx_packet *p_curp,
+					  u8 num_of_bds,
+					  dma_addr_t first_frag,
+					  u16 first_frag_len, void *p_cookie,
+					  u8 notify_fw)
+{
+	list_del(&p_curp->list_entry);
+	p_curp->cookie = p_cookie;
+	p_curp->bd_used = num_of_bds;
+	p_curp->notify_fw = notify_fw;
+	p_tx->cur_send_packet = p_curp;
+	p_tx->cur_send_frag_num = 0;
+
+	p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = first_frag;
+	p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = first_frag_len;
+	p_tx->cur_send_frag_num++;
+}
+
+static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
+					     struct qed_ll2_info *p_ll2,
+					     struct qed_ll2_tx_packet *p_curp,
+					     u8 num_of_bds,
+					     enum core_tx_dest tx_dest,
+					     u16 vlan,
+					     u8 bd_flags,
+					     u16 l4_hdr_offset_w,
+					     enum core_roce_flavor_type type,
+					     dma_addr_t first_frag,
+					     u16 first_frag_len)
+{
+	struct qed_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain;
+	u16 prod_idx = qed_chain_get_prod_idx(p_tx_chain);
+	struct core_tx_bd *start_bd = NULL;
+	u16 frag_idx;
+
+	start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
+	start_bd->nw_vlan_or_lb_echo = cpu_to_le16(vlan);
+	SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W,
+		  cpu_to_le16(l4_hdr_offset_w));
+	SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest);
+	start_bd->bd_flags.as_bitfield = bd_flags;
+	start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK <<
+	    CORE_TX_BD_FLAGS_START_BD_SHIFT;
+	SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds);
+	DMA_REGPAIR_LE(start_bd->addr, first_frag);
+	start_bd->nbytes = cpu_to_le16(first_frag_len);
+
+	SET_FIELD(start_bd->bd_flags.as_bitfield, CORE_TX_BD_FLAGS_ROCE_FLAV,
+		  type);
+
+	DP_VERBOSE(p_hwfn,
+		   (NETIF_MSG_TX_QUEUED | QED_MSG_LL2),
+		   "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n",
+		   p_ll2->queue_id,
+		   p_ll2->cid,
+		   p_ll2->conn_type,
+		   prod_idx,
+		   first_frag_len,
+		   num_of_bds,
+		   le32_to_cpu(start_bd->addr.hi),
+		   le32_to_cpu(start_bd->addr.lo));
+
+	if (p_ll2->tx_queue.cur_send_frag_num == num_of_bds)
+		return;
+
+	/* Need to provide the packet with additional BDs for frags */
+	for (frag_idx = p_ll2->tx_queue.cur_send_frag_num;
+	     frag_idx < num_of_bds; frag_idx++) {
+		struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd;
+
+		*p_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
+		(*p_bd)->bd_flags.as_bitfield = 0;
+		(*p_bd)->bitfield1 = 0;
+		(*p_bd)->bitfield0 = 0;
+		p_curp->bds_set[frag_idx].tx_frag = 0;
+		p_curp->bds_set[frag_idx].frag_len = 0;
+	}
+}
+
+/* This should be called while the Txq spinlock is being held */
+static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn,
+				     struct qed_ll2_info *p_ll2_conn)
+{
+	bool b_notify = p_ll2_conn->tx_queue.cur_send_packet->notify_fw;
+	struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue;
+	struct qed_ll2_tx_packet *p_pkt = NULL;
+	struct core_db_data db_msg = { 0, 0, 0 };
+	u16 bd_prod;
+
+	/* If there are missing BDs, don't do anything now */
+	if (p_ll2_conn->tx_queue.cur_send_frag_num !=
+	    p_ll2_conn->tx_queue.cur_send_packet->bd_used)
+		return;
+
+	/* Push the current packet to the list and clean after it */
+	list_add_tail(&p_ll2_conn->tx_queue.cur_send_packet->list_entry,
+		      &p_ll2_conn->tx_queue.sending_descq);
+	p_ll2_conn->tx_queue.cur_send_packet = NULL;
+	p_ll2_conn->tx_queue.cur_send_frag_num = 0;
+
+	/* Notify FW of packet only if requested to */
+	if (!b_notify)
+		return;
+
+	bd_prod = qed_chain_get_prod_idx(&p_ll2_conn->tx_queue.txq_chain);
+
+	while (!list_empty(&p_tx->sending_descq)) {
+		p_pkt = list_first_entry(&p_tx->sending_descq,
+					 struct qed_ll2_tx_packet, list_entry);
+		if (!p_pkt)
+			break;
+
+		list_del(&p_pkt->list_entry);
+		list_add_tail(&p_pkt->list_entry, &p_tx->active_descq);
+	}
+
+	SET_FIELD(db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
+	SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_CORE_TX_BD_PROD_CMD);
+	db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD;
+	db_msg.spq_prod = cpu_to_le16(bd_prod);
+
+	/* Make sure the BDs data is updated before ringing the doorbell */
+	wmb();
+
+	DIRECT_REG_WR(p_tx->doorbell_addr, *((u32 *)&db_msg));
+
+	DP_VERBOSE(p_hwfn,
+		   (NETIF_MSG_TX_QUEUED | QED_MSG_LL2),
+		   "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n",
+		   p_ll2_conn->queue_id,
+		   p_ll2_conn->cid, p_ll2_conn->conn_type, db_msg.spq_prod);
+}
+
+int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
+			      u8 connection_handle,
+			      u8 num_of_bds,
+			      u16 vlan,
+			      u8 bd_flags,
+			      u16 l4_hdr_offset_w,
+			      enum qed_ll2_roce_flavor_type qed_roce_flavor,
+			      dma_addr_t first_frag,
+			      u16 first_frag_len, void *cookie, u8 notify_fw)
+{
+	struct qed_ll2_tx_packet *p_curp = NULL;
+	struct qed_ll2_info *p_ll2_conn = NULL;
+	enum core_roce_flavor_type roce_flavor;
+	struct qed_ll2_tx_queue *p_tx;
+	struct qed_chain *p_tx_chain;
+	unsigned long flags;
+	int rc = 0;
+
+	p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle);
+	if (!p_ll2_conn)
+		return -EINVAL;
+	p_tx = &p_ll2_conn->tx_queue;
+	p_tx_chain = &p_tx->txq_chain;
+
+	if (num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET)
+		return -EIO;
+
+	spin_lock_irqsave(&p_tx->lock, flags);
+	if (p_tx->cur_send_packet) {
+		rc = -EEXIST;
+		goto out;
+	}
+
+	/* Get entry, but only if we have tx elements for it */
+	if (!list_empty(&p_tx->free_descq))
+		p_curp = list_first_entry(&p_tx->free_descq,
+					  struct qed_ll2_tx_packet, list_entry);
+	if (p_curp && qed_chain_get_elem_left(p_tx_chain) < num_of_bds)
+		p_curp = NULL;
+
+	if (!p_curp) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	if (qed_roce_flavor == QED_LL2_ROCE) {
+		roce_flavor = CORE_ROCE;
+	} else if (qed_roce_flavor == QED_LL2_RROCE) {
+		roce_flavor = CORE_RROCE;
+	} else {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Prepare packet and BD, and perhaps send a doorbell to FW */
+	qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp,
+				      num_of_bds, first_frag,
+				      first_frag_len, cookie, notify_fw);
+	qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp,
+					 num_of_bds, CORE_TX_DEST_NW,
+					 vlan, bd_flags, l4_hdr_offset_w,
+					 roce_flavor,
+					 first_frag, first_frag_len);
+
+	qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn);
+
+out:
+	spin_unlock_irqrestore(&p_tx->lock, flags);
+	return rc;
+}
+
+int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn,
+				      u8 connection_handle,
+				      dma_addr_t addr, u16 nbytes)
+{
+	struct qed_ll2_tx_packet *p_cur_send_packet = NULL;
+	struct qed_ll2_info *p_ll2_conn = NULL;
+	u16 cur_send_frag_num = 0;
+	struct core_tx_bd *p_bd;
+	unsigned long flags;
+
+	p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle);
+	if (!p_ll2_conn)
+		return -EINVAL;
+
+	if (!p_ll2_conn->tx_queue.cur_send_packet)
+		return -EINVAL;
+
+	p_cur_send_packet = p_ll2_conn->tx_queue.cur_send_packet;
+	cur_send_frag_num = p_ll2_conn->tx_queue.cur_send_frag_num;
+
+	if (cur_send_frag_num >= p_cur_send_packet->bd_used)
+		return -EINVAL;
+
+	/* Fill the BD information, and possibly notify FW */
+	p_bd = p_cur_send_packet->bds_set[cur_send_frag_num].txq_bd;
+	DMA_REGPAIR_LE(p_bd->addr, addr);
+	p_bd->nbytes = cpu_to_le16(nbytes);
+	p_cur_send_packet->bds_set[cur_send_frag_num].tx_frag = addr;
+	p_cur_send_packet->bds_set[cur_send_frag_num].frag_len = nbytes;
+
+	p_ll2_conn->tx_queue.cur_send_frag_num++;
+
+	spin_lock_irqsave(&p_ll2_conn->tx_queue.lock, flags);
+	qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn);
+	spin_unlock_irqrestore(&p_ll2_conn->tx_queue.lock, flags);
+
+	return 0;
+}
+
+int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
+{
+	struct qed_ll2_info *p_ll2_conn = NULL;
+	int rc = -EINVAL;
+
+	p_ll2_conn = qed_ll2_handle_sanity_lock(p_hwfn, connection_handle);
+	if (!p_ll2_conn)
+		return -EINVAL;
+
+	/* Stop Tx & Rx of connection, if needed */
+	if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
+		rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn);
+		if (rc)
+			return rc;
+		qed_ll2_txq_flush(p_hwfn, connection_handle);
+	}
+
+	if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
+		rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn);
+		if (rc)
+			return rc;
+		qed_ll2_rxq_flush(p_hwfn, connection_handle);
+	}
+
+	return rc;
+}
+
+void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
+{
+	struct qed_ll2_info *p_ll2_conn = NULL;
+
+	p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle);
+	if (!p_ll2_conn)
+		return;
+
+	if (QED_LL2_RX_REGISTERED(p_ll2_conn)) {
+		p_ll2_conn->rx_queue.b_cb_registred = false;
+		qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index);
+	}
+
+	if (QED_LL2_TX_REGISTERED(p_ll2_conn)) {
+		p_ll2_conn->tx_queue.b_cb_registred = false;
+		qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
+	}
+
+	kfree(p_ll2_conn->tx_queue.descq_array);
+	qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain);
+
+	kfree(p_ll2_conn->rx_queue.descq_array);
+	qed_chain_free(p_hwfn->cdev, &p_ll2_conn->rx_queue.rxq_chain);
+	qed_chain_free(p_hwfn->cdev, &p_ll2_conn->rx_queue.rcq_chain);
+
+	qed_cxt_release_cid(p_hwfn, p_ll2_conn->cid);
+
+	mutex_lock(&p_ll2_conn->mutex);
+	p_ll2_conn->b_active = false;
+	mutex_unlock(&p_ll2_conn->mutex);
+}
+
+struct qed_ll2_info *qed_ll2_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ll2_info *p_ll2_connections;
+	u8 i;
+
+	/* Allocate LL2's set struct */
+	p_ll2_connections = kcalloc(QED_MAX_NUM_OF_LL2_CONNECTIONS,
+				    sizeof(struct qed_ll2_info), GFP_KERNEL);
+	if (!p_ll2_connections) {
+		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_ll2'\n");
+		return NULL;
+	}
+
+	for (i = 0; i < QED_MAX_NUM_OF_LL2_CONNECTIONS; i++)
+		p_ll2_connections[i].my_id = i;
+
+	return p_ll2_connections;
+}
+
+void qed_ll2_setup(struct qed_hwfn *p_hwfn,
+		   struct qed_ll2_info *p_ll2_connections)
+{
+	int i;
+
+	for (i = 0; i < QED_MAX_NUM_OF_LL2_CONNECTIONS; i++)
+		mutex_init(&p_ll2_connections[i].mutex);
+}
+
+void qed_ll2_free(struct qed_hwfn *p_hwfn,
+		  struct qed_ll2_info *p_ll2_connections)
+{
+	kfree(p_ll2_connections);
+}
+
+static void _qed_ll2_get_tstats(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				struct qed_ll2_info *p_ll2_conn,
+				struct qed_ll2_stats *p_stats)
+{
+	struct core_ll2_tstorm_per_queue_stat tstats;
+	u8 qid = p_ll2_conn->queue_id;
+	u32 tstats_addr;
+
+	memset(&tstats, 0, sizeof(tstats));
+	tstats_addr = BAR0_MAP_REG_TSDM_RAM +
+		      CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid);
+	qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats));
+
+	p_stats->packet_too_big_discard =
+			HILO_64_REGPAIR(tstats.packet_too_big_discard);
+	p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard);
+}
+
+static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				struct qed_ll2_info *p_ll2_conn,
+				struct qed_ll2_stats *p_stats)
+{
+	struct core_ll2_ustorm_per_queue_stat ustats;
+	u8 qid = p_ll2_conn->queue_id;
+	u32 ustats_addr;
+
+	memset(&ustats, 0, sizeof(ustats));
+	ustats_addr = BAR0_MAP_REG_USDM_RAM +
+		      CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid);
+	qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats));
+
+	p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
+	p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
+	p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
+	p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
+	p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
+	p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
+}
+
+static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
+				struct qed_ptt *p_ptt,
+				struct qed_ll2_info *p_ll2_conn,
+				struct qed_ll2_stats *p_stats)
+{
+	struct core_ll2_pstorm_per_queue_stat pstats;
+	u8 stats_id = p_ll2_conn->tx_stats_id;
+	u32 pstats_addr;
+
+	memset(&pstats, 0, sizeof(pstats));
+	pstats_addr = BAR0_MAP_REG_PSDM_RAM +
+		      CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats));
+
+	p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes);
+	p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes);
+	p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes);
+	p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts);
+	p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts);
+	p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts);
+}
+
+int qed_ll2_get_stats(struct qed_hwfn *p_hwfn,
+		      u8 connection_handle, struct qed_ll2_stats *p_stats)
+{
+	struct qed_ll2_info *p_ll2_conn = NULL;
+	struct qed_ptt *p_ptt;
+
+	memset(p_stats, 0, sizeof(*p_stats));
+
+	if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) ||
+	    !p_hwfn->p_ll2_info)
+		return -EINVAL;
+
+	p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle];
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt) {
+		DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+		return -EINVAL;
+	}
+
+	_qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+	_qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+	if (p_ll2_conn->tx_stats_en)
+		_qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
+	qed_ptt_release(p_hwfn, p_ptt);
+	return 0;
+}
+
+static void qed_ll2_register_cb_ops(struct qed_dev *cdev,
+				    const struct qed_ll2_cb_ops *ops,
+				    void *cookie)
+{
+	cdev->ll2->cbs = ops;
+	cdev->ll2->cb_cookie = cookie;
+}
+
+static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+{
+	struct qed_ll2_info ll2_info;
+	struct qed_ll2_buffer *buffer;
+	enum qed_ll2_conn_type conn_type;
+	struct qed_ptt *p_ptt;
+	int rc, i;
+
+	/* Initialize LL2 locks & lists */
+	INIT_LIST_HEAD(&cdev->ll2->list);
+	spin_lock_init(&cdev->ll2->lock);
+	cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
+			     L1_CACHE_BYTES + params->mtu;
+	cdev->ll2->frags_mapped = params->frags_mapped;
+
+	/*Allocate memory for LL2 */
+	DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n",
+		cdev->ll2->rx_size);
+	for (i = 0; i < QED_LL2_RX_SIZE; i++) {
+		buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+		if (!buffer) {
+			DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
+			goto fail;
+		}
+
+		rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
+					  &buffer->phys_addr);
+		if (rc) {
+			kfree(buffer);
+			goto fail;
+		}
+
+		list_add_tail(&buffer->list, &cdev->ll2->list);
+	}
+
+	switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
+	case QED_PCI_ISCSI:
+		conn_type = QED_LL2_TYPE_ISCSI;
+		break;
+	case QED_PCI_ETH_ROCE:
+		conn_type = QED_LL2_TYPE_ROCE;
+		break;
+	default:
+		conn_type = QED_LL2_TYPE_TEST;
+	}
+
+	/* Prepare the temporary ll2 information */
+	memset(&ll2_info, 0, sizeof(ll2_info));
+	ll2_info.conn_type = conn_type;
+	ll2_info.mtu = params->mtu;
+	ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets;
+	ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping;
+	ll2_info.tx_tc = 0;
+	ll2_info.tx_dest = CORE_TX_DEST_NW;
+	ll2_info.gsi_enable = 1;
+
+	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info,
+					QED_LL2_RX_SIZE, QED_LL2_TX_SIZE,
+					&cdev->ll2->handle);
+	if (rc) {
+		DP_INFO(cdev, "Failed to acquire LL2 connection\n");
+		goto fail;
+	}
+
+	rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev),
+					  cdev->ll2->handle);
+	if (rc) {
+		DP_INFO(cdev, "Failed to establish LL2 connection\n");
+		goto release_fail;
+	}
+
+	/* Post all Rx buffers to FW */
+	spin_lock_bh(&cdev->ll2->lock);
+	list_for_each_entry(buffer, &cdev->ll2->list, list) {
+		rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev),
+					    cdev->ll2->handle,
+					    buffer->phys_addr, 0, buffer, 1);
+		if (rc) {
+			DP_INFO(cdev,
+				"Failed to post an Rx buffer; Deleting it\n");
+			dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
+					 cdev->ll2->rx_size, DMA_FROM_DEVICE);
+			kfree(buffer->data);
+			list_del(&buffer->list);
+			kfree(buffer);
+		} else {
+			cdev->ll2->rx_cnt++;
+		}
+	}
+	spin_unlock_bh(&cdev->ll2->lock);
+
+	if (!cdev->ll2->rx_cnt) {
+		DP_INFO(cdev, "Failed passing even a single Rx buffer\n");
+		goto release_terminate;
+	}
+
+	if (!is_valid_ether_addr(params->ll2_mac_address)) {
+		DP_INFO(cdev, "Invalid Ethernet address\n");
+		goto release_terminate;
+	}
+
+	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+	if (!p_ptt) {
+		DP_INFO(cdev, "Failed to acquire PTT\n");
+		goto release_terminate;
+	}
+
+	rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
+				    params->ll2_mac_address);
+	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+	if (rc) {
+		DP_ERR(cdev, "Failed to allocate LLH filter\n");
+		goto release_terminate_all;
+	}
+
+	ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
+
+	return 0;
+
+release_terminate_all:
+
+release_terminate:
+	qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
+release_fail:
+	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
+fail:
+	qed_ll2_kill_buffers(cdev);
+	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
+	return -EINVAL;
+}
+
+static int qed_ll2_stop(struct qed_dev *cdev)
+{
+	struct qed_ptt *p_ptt;
+	int rc;
+
+	if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
+		return 0;
+
+	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+	if (!p_ptt) {
+		DP_INFO(cdev, "Failed to acquire PTT\n");
+		goto fail;
+	}
+
+	qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
+				  cdev->ll2_mac_address);
+	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+	eth_zero_addr(cdev->ll2_mac_address);
+
+	rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
+					  cdev->ll2->handle);
+	if (rc)
+		DP_INFO(cdev, "Failed to terminate LL2 connection\n");
+
+	qed_ll2_kill_buffers(cdev);
+
+	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
+	cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
+
+	return rc;
+fail:
+	return -EINVAL;
+}
+
+static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
+{
+	const skb_frag_t *frag;
+	int rc = -EINVAL, i;
+	dma_addr_t mapping;
+	u16 vlan = 0;
+	u8 flags = 0;
+
+	if (unlikely(skb->ip_summed != CHECKSUM_NONE)) {
+		DP_INFO(cdev, "Cannot transmit a checksumed packet\n");
+		return -EINVAL;
+	}
+
+	if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) {
+		DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n",
+		       1 + skb_shinfo(skb)->nr_frags);
+		return -EINVAL;
+	}
+
+	mapping = dma_map_single(&cdev->pdev->dev, skb->data,
+				 skb->len, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(&cdev->pdev->dev, mapping))) {
+		DP_NOTICE(cdev, "SKB mapping failed\n");
+		return -EINVAL;
+	}
+
+	/* Request HW to calculate IP csum */
+	if (!((vlan_get_protocol(skb) == htons(ETH_P_IPV6)) &&
+	      ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
+		flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT);
+
+	if (skb_vlan_tag_present(skb)) {
+		vlan = skb_vlan_tag_get(skb);
+		flags |= BIT(CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT);
+	}
+
+	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
+				       cdev->ll2->handle,
+				       1 + skb_shinfo(skb)->nr_frags,
+				       vlan, flags, 0, 0 /* RoCE FLAVOR */,
+				       mapping, skb->len, skb, 1);
+	if (rc)
+		goto err;
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		frag = &skb_shinfo(skb)->frags[i];
+		if (!cdev->ll2->frags_mapped) {
+			mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0,
+						   skb_frag_size(frag),
+						   DMA_TO_DEVICE);
+
+			if (unlikely(dma_mapping_error(&cdev->pdev->dev,
+						       mapping))) {
+				DP_NOTICE(cdev,
+					  "Unable to map frag - dropping packet\n");
+				goto err;
+			}
+		} else {
+			mapping = page_to_phys(skb_frag_page(frag)) |
+			    frag->page_offset;
+		}
+
+		rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev),
+						       cdev->ll2->handle,
+						       mapping,
+						       skb_frag_size(frag));
+
+		/* if failed not much to do here, partial packet has been posted
+		 * we can't free memory, will need to wait for completion.
+		 */
+		if (rc)
+			goto err2;
+	}
+
+	return 0;
+
+err:
+	dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE);
+
+err2:
+	return rc;
+}
+
+static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats)
+{
+	if (!cdev->ll2)
+		return -EINVAL;
+
+	return qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
+				 cdev->ll2->handle, stats);
+}
+
+const struct qed_ll2_ops qed_ll2_ops_pass = {
+	.start = &qed_ll2_start,
+	.stop = &qed_ll2_stop,
+	.start_xmit = &qed_ll2_start_xmit,
+	.register_cb_ops = &qed_ll2_register_cb_ops,
+	.get_stats = &qed_ll2_stats,
+};
+
+int qed_ll2_alloc_if(struct qed_dev *cdev)
+{
+	cdev->ll2 = kzalloc(sizeof(*cdev->ll2), GFP_KERNEL);
+	return cdev->ll2 ? 0 : -ENOMEM;
+}
+
+void qed_ll2_dealloc_if(struct qed_dev *cdev)
+{
+	kfree(cdev->ll2);
+	cdev->ll2 = NULL;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
new file mode 100644
index 0000000..80a5dc2
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -0,0 +1,316 @@
+/* QLogic qed NIC Driver
+ *
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_LL2_H
+#define _QED_LL2_H
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/qed/qed_chain.h>
+#include <linux/qed/qed_ll2_if.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_sp.h"
+
+#define QED_MAX_NUM_OF_LL2_CONNECTIONS                    (4)
+
+enum qed_ll2_roce_flavor_type {
+	QED_LL2_ROCE,
+	QED_LL2_RROCE,
+	MAX_QED_LL2_ROCE_FLAVOR_TYPE
+};
+
+enum qed_ll2_conn_type {
+	QED_LL2_TYPE_RESERVED,
+	QED_LL2_TYPE_ISCSI,
+	QED_LL2_TYPE_TEST,
+	QED_LL2_TYPE_ISCSI_OOO,
+	QED_LL2_TYPE_RESERVED2,
+	QED_LL2_TYPE_ROCE,
+	QED_LL2_TYPE_RESERVED3,
+	MAX_QED_LL2_RX_CONN_TYPE
+};
+
+struct qed_ll2_rx_packet {
+	struct list_head list_entry;
+	struct core_rx_bd_with_buff_len *rxq_bd;
+	dma_addr_t rx_buf_addr;
+	u16 buf_length;
+	void *cookie;
+	u8 placement_offset;
+	u16 parse_flags;
+	u16 packet_length;
+	u16 vlan;
+	u32 opaque_data[2];
+};
+
+struct qed_ll2_tx_packet {
+	struct list_head list_entry;
+	u16 bd_used;
+	u16 vlan;
+	u16 l4_hdr_offset_w;
+	u8 bd_flags;
+	bool notify_fw;
+	void *cookie;
+
+	struct {
+		struct core_tx_bd *txq_bd;
+		dma_addr_t tx_frag;
+		u16 frag_len;
+	} bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET];
+};
+
+struct qed_ll2_rx_queue {
+	/* Lock protecting the Rx queue manipulation */
+	spinlock_t lock;
+	struct qed_chain rxq_chain;
+	struct qed_chain rcq_chain;
+	u8 rx_sb_index;
+	bool b_cb_registred;
+	__le16 *p_fw_cons;
+	struct list_head active_descq;
+	struct list_head free_descq;
+	struct list_head posting_descq;
+	struct qed_ll2_rx_packet *descq_array;
+	void __iomem *set_prod_addr;
+};
+
+struct qed_ll2_tx_queue {
+	/* Lock protecting the Tx queue manipulation */
+	spinlock_t lock;
+	struct qed_chain txq_chain;
+	u8 tx_sb_index;
+	bool b_cb_registred;
+	__le16 *p_fw_cons;
+	struct list_head active_descq;
+	struct list_head free_descq;
+	struct list_head sending_descq;
+	struct qed_ll2_tx_packet *descq_array;
+	struct qed_ll2_tx_packet *cur_send_packet;
+	struct qed_ll2_tx_packet cur_completing_packet;
+	u16 cur_completing_bd_idx;
+	void __iomem *doorbell_addr;
+	u16 bds_idx;
+	u16 cur_send_frag_num;
+	u16 cur_completing_frag_num;
+	bool b_completing_packet;
+};
+
+struct qed_ll2_info {
+	/* Lock protecting the state of LL2 */
+	struct mutex mutex;
+	enum qed_ll2_conn_type conn_type;
+	u32 cid;
+	u8 my_id;
+	u8 queue_id;
+	u8 tx_stats_id;
+	bool b_active;
+	u16 mtu;
+	u8 rx_drop_ttl0_flg;
+	u8 rx_vlan_removal_en;
+	u8 tx_tc;
+	enum core_tx_dest tx_dest;
+	enum core_error_handle ai_err_packet_too_big;
+	enum core_error_handle ai_err_no_buf;
+	u8 tx_stats_en;
+	struct qed_ll2_rx_queue rx_queue;
+	struct qed_ll2_tx_queue tx_queue;
+	u8 gsi_enable;
+};
+
+/**
+ * @brief qed_ll2_acquire_connection - allocate resources,
+ *        starts rx & tx (if relevant) queues pair. Provides
+ *        connecion handler as output parameter.
+ *
+ * @param p_hwfn
+ * @param p_params		Contain various configuration properties
+ * @param rx_num_desc
+ * @param tx_num_desc
+ *
+ * @param p_connection_handle  Output container for LL2 connection's handle
+ *
+ * @return 0 on success, failure otherwise
+ */
+int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
+			       struct qed_ll2_info *p_params,
+			       u16 rx_num_desc,
+			       u16 tx_num_desc,
+			       u8 *p_connection_handle);
+
+/**
+ * @brief qed_ll2_establish_connection - start previously
+ *        allocated LL2 queues pair
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param connection_handle	LL2 connection's handle obtained from
+ *                              qed_ll2_require_connection
+ *
+ * @return 0 on success, failure otherwise
+ */
+int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle);
+
+/**
+ * @brief qed_ll2_post_rx_buffers - submit buffers to LL2 Rx queue.
+ *
+ * @param p_hwfn
+ * @param connection_handle	LL2 connection's handle obtained from
+ *				qed_ll2_require_connection
+ * @param addr			rx (physical address) buffers to submit
+ * @param cookie
+ * @param notify_fw		produce corresponding Rx BD immediately
+ *
+ * @return 0 on success, failure otherwise
+ */
+int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
+			   u8 connection_handle,
+			   dma_addr_t addr,
+			   u16 buf_len, void *cookie, u8 notify_fw);
+
+/**
+ * @brief qed_ll2_prepare_tx_packet - request for start Tx BD
+ *				      to prepare Tx packet submission to FW.
+ *
+ * @param p_hwfn
+ * @param connection_handle	LL2 connection's handle obtained from
+ *				qed_ll2_require_connection
+ * @param num_of_bds		a number of requested BD equals a number of
+ *				fragments in Tx packet
+ * @param vlan			VLAN to insert to packet (if insertion set)
+ * @param bd_flags
+ * @param l4_hdr_offset_w	L4 Header Offset from start of packet
+ *				(in words). This is needed if both l4_csum
+ *				and ipv6_ext are set
+ * @param first_frag
+ * @param first_frag_len
+ * @param cookie
+ *
+ * @param notify_fw
+ *
+ * @return 0 on success, failure otherwise
+ */
+int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
+			      u8 connection_handle,
+			      u8 num_of_bds,
+			      u16 vlan,
+			      u8 bd_flags,
+			      u16 l4_hdr_offset_w,
+			      enum qed_ll2_roce_flavor_type qed_roce_flavor,
+			      dma_addr_t first_frag,
+			      u16 first_frag_len, void *cookie, u8 notify_fw);
+
+/**
+ * @brief qed_ll2_release_connection -	releases resources
+ *					allocated for LL2 connection
+ *
+ * @param p_hwfn
+ * @param connection_handle		LL2 connection's handle obtained from
+ *					qed_ll2_require_connection
+ */
+void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle);
+
+/**
+ * @brief qed_ll2_set_fragment_of_tx_packet -	provides fragments to fill
+ *						Tx BD of BDs requested by
+ *						qed_ll2_prepare_tx_packet
+ *
+ * @param p_hwfn
+ * @param connection_handle			LL2 connection's handle
+ *						obtained from
+ *						qed_ll2_require_connection
+ * @param addr
+ * @param nbytes
+ *
+ * @return 0 on success, failure otherwise
+ */
+int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn,
+				      u8 connection_handle,
+				      dma_addr_t addr, u16 nbytes);
+
+/**
+ * @brief qed_ll2_terminate_connection -	stops Tx/Rx queues
+ *
+ *
+ * @param p_hwfn
+ * @param connection_handle			LL2 connection's handle
+ *						obtained from
+ *						qed_ll2_require_connection
+ *
+ * @return 0 on success, failure otherwise
+ */
+int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle);
+
+/**
+ * @brief qed_ll2_get_stats -	get LL2 queue's statistics
+ *
+ *
+ * @param p_hwfn
+ * @param connection_handle	LL2 connection's handle obtained from
+ *				qed_ll2_require_connection
+ * @param p_stats
+ *
+ * @return 0 on success, failure otherwise
+ */
+int qed_ll2_get_stats(struct qed_hwfn *p_hwfn,
+		      u8 connection_handle, struct qed_ll2_stats *p_stats);
+
+/**
+ * @brief qed_ll2_alloc - Allocates LL2 connections set
+ *
+ * @param p_hwfn
+ *
+ * @return pointer to alocated qed_ll2_info or NULL
+ */
+struct qed_ll2_info *qed_ll2_alloc(struct qed_hwfn *p_hwfn);
+
+/**
+ * @brief qed_ll2_setup - Inits LL2 connections set
+ *
+ * @param p_hwfn
+ * @param p_ll2_connections
+ *
+ */
+void qed_ll2_setup(struct qed_hwfn *p_hwfn,
+		   struct qed_ll2_info *p_ll2_connections);
+
+/**
+ * @brief qed_ll2_free - Releases LL2 connections set
+ *
+ * @param p_hwfn
+ * @param p_ll2_connections
+ *
+ */
+void qed_ll2_free(struct qed_hwfn *p_hwfn,
+		  struct qed_ll2_info *p_ll2_connections);
+void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
+				     u8 connection_handle,
+				     void *cookie,
+				     dma_addr_t rx_buf_addr,
+				     u16 data_length,
+				     u8 data_length_error,
+				     u16 parse_flags,
+				     u16 vlan,
+				     u32 src_mac_addr_hi,
+				     u16 src_mac_addr_lo, bool b_last_packet);
+void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
+				     u8 connection_handle,
+				     void *cookie,
+				     dma_addr_t first_frag_addr,
+				     bool b_last_fragment, bool b_last_packet);
+void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn,
+				    u8 connection_handle,
+				    void *cookie,
+				    dma_addr_t first_frag_addr,
+				    bool b_last_fragment, bool b_last_packet);
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index c7dc34b..4ee3151 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -22,15 +22,22 @@
 #include <linux/etherdevice.h>
 #include <linux/vmalloc.h>
 #include <linux/qed/qed_if.h>
+#include <linux/qed/qed_ll2_if.h>
 
 #include "qed.h"
 #include "qed_sriov.h"
 #include "qed_sp.h"
 #include "qed_dev_api.h"
+#include "qed_ll2.h"
 #include "qed_mcp.h"
 #include "qed_hw.h"
 #include "qed_selftest.h"
 
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+#define QED_ROCE_QPS			(8192)
+#define QED_ROCE_DPIS			(8)
+#endif
+
 static char version[] =
 	"QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n";
 
@@ -51,8 +58,6 @@
 
 static int __init qed_init(void)
 {
-	pr_notice("qed_init called\n");
-
 	pr_info("%s", version);
 
 	return 0;
@@ -106,8 +111,7 @@
 /* Performs PCI initializations as well as initializing PCI-related parameters
  * in the device structrue. Returns 0 in case of success.
  */
-static int qed_init_pci(struct qed_dev *cdev,
-			struct pci_dev *pdev)
+static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev)
 {
 	u8 rev_id;
 	int rc;
@@ -207,8 +211,8 @@
 	dev_info->pci_mem_start = cdev->pci_params.mem_start;
 	dev_info->pci_mem_end = cdev->pci_params.mem_end;
 	dev_info->pci_irq = cdev->pci_params.irq;
-	dev_info->rdma_supported =
-	    (cdev->hwfns[0].hw_info.personality == QED_PCI_ETH_ROCE);
+	dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality ==
+				    QED_PCI_ETH_ROCE);
 	dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]);
 	ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr);
 
@@ -263,8 +267,7 @@
 }
 
 /* Sets the requested power state */
-static int qed_set_power_state(struct qed_dev *cdev,
-			       pci_power_t state)
+static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state)
 {
 	if (!cdev)
 		return -ENODEV;
@@ -366,8 +369,8 @@
 		DP_NOTICE(cdev,
 			  "Trying to enable MSI-X with less vectors (%d out of %d)\n",
 			  cnt, int_params->in.num_vectors);
-		rc = pci_enable_msix_exact(cdev->pdev,
-					   int_params->msix_table, cnt);
+		rc = pci_enable_msix_exact(cdev->pdev, int_params->msix_table,
+					   cnt);
 		if (!rc)
 			rc = cnt;
 	}
@@ -439,6 +442,11 @@
 	}
 
 out:
+	if (!rc)
+		DP_INFO(cdev, "Using %s interrupts\n",
+			int_params->out.int_mode == QED_INT_MODE_INTA ?
+			"INTa" : int_params->out.int_mode == QED_INT_MODE_MSI ?
+			"MSI" : "MSIX");
 	cdev->int_coalescing_mode = QED_COAL_MODE_ENABLE;
 
 	return rc;
@@ -514,19 +522,18 @@
 int qed_slowpath_irq_req(struct qed_hwfn *hwfn)
 {
 	struct qed_dev *cdev = hwfn->cdev;
+	u32 int_mode;
 	int rc = 0;
 	u8 id;
 
-	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+	int_mode = cdev->int_params.out.int_mode;
+	if (int_mode == QED_INT_MODE_MSIX) {
 		id = hwfn->my_id;
 		snprintf(hwfn->name, NAME_SIZE, "sp-%d-%02x:%02x.%02x",
 			 id, cdev->pdev->bus->number,
 			 PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id);
 		rc = request_irq(cdev->int_params.msix_table[id].vector,
 				 qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc);
-		if (!rc)
-			DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP),
-				   "Requested slowpath MSI-X\n");
 	} else {
 		unsigned long flags = 0;
 
@@ -541,6 +548,13 @@
 				 flags, cdev->name, cdev);
 	}
 
+	if (rc)
+		DP_NOTICE(cdev, "request_irq failed, rc = %d\n", rc);
+	else
+		DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP),
+			   "Requested slowpath %s\n",
+			   (int_mode == QED_INT_MODE_MSIX) ? "MSI-X" : "IRQ");
+
 	return rc;
 }
 
@@ -581,6 +595,8 @@
 		}
 	}
 
+	qed_dbg_pf_exit(cdev);
+
 	return rc;
 }
 
@@ -599,7 +615,16 @@
 
 static int qed_nic_setup(struct qed_dev *cdev)
 {
-	int rc;
+	int rc, i;
+
+	/* Determine if interface is going to require LL2 */
+	if (QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH) {
+		for (i = 0; i < cdev->num_hwfns; i++) {
+			struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+
+			p_hwfn->using_ll2 = true;
+		}
+	}
 
 	rc = qed_resc_alloc(cdev);
 	if (rc)
@@ -657,6 +682,9 @@
 				  enum qed_int_mode int_mode)
 {
 	struct qed_sb_cnt_info sb_cnt_info;
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+	int num_l2_queues;
+#endif
 	int rc;
 	int i;
 
@@ -687,6 +715,31 @@
 	cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors -
 				       cdev->num_hwfns;
 
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+	num_l2_queues = 0;
+	for_each_hwfn(cdev, i)
+		num_l2_queues += FEAT_NUM(&cdev->hwfns[i], QED_PF_L2_QUE);
+
+	DP_VERBOSE(cdev, QED_MSG_RDMA,
+		   "cdev->int_params.fp_msix_cnt=%d num_l2_queues=%d\n",
+		   cdev->int_params.fp_msix_cnt, num_l2_queues);
+
+	if (cdev->int_params.fp_msix_cnt > num_l2_queues) {
+		cdev->int_params.rdma_msix_cnt =
+			(cdev->int_params.fp_msix_cnt - num_l2_queues)
+			/ cdev->num_hwfns;
+		cdev->int_params.rdma_msix_base =
+			cdev->int_params.fp_msix_base + num_l2_queues;
+		cdev->int_params.fp_msix_cnt = num_l2_queues;
+	} else {
+		cdev->int_params.rdma_msix_cnt = 0;
+	}
+
+	DP_VERBOSE(cdev, QED_MSG_RDMA, "roce_msix_cnt=%d roce_msix_base=%d\n",
+		   cdev->int_params.rdma_msix_cnt,
+		   cdev->int_params.rdma_msix_base);
+#endif
+
 	return 0;
 }
 
@@ -790,6 +843,13 @@
 {
 	int i;
 
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+	params->rdma_pf_params.num_qps = QED_ROCE_QPS;
+	params->rdma_pf_params.min_dpis = QED_ROCE_DPIS;
+	/* divide by 3 the MRs to avoid MF ILT overflow */
+	params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS;
+	params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX;
+#endif
 	for (i = 0; i < cdev->num_hwfns; i++) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -834,13 +894,13 @@
 	if (IS_PF(cdev)) {
 		/* Allocate stream for unzipping */
 		rc = qed_alloc_stream_mem(cdev);
-		if (rc) {
-			DP_NOTICE(cdev, "Failed to allocate stream memory\n");
+		if (rc)
 			goto err2;
-		}
 
 		/* First Dword used to diffrentiate between various sources */
 		data = cdev->firmware->data + sizeof(u32);
+
+		qed_dbg_pf_init(cdev);
 	}
 
 	memset(&tunn_info, 0, sizeof(tunn_info));
@@ -864,6 +924,12 @@
 	DP_INFO(cdev,
 		"HW initialization and function start completed successfully\n");
 
+	/* Allocate LL2 interface if needed */
+	if (QED_LEADING_HWFN(cdev)->using_ll2) {
+		rc = qed_ll2_alloc_if(cdev);
+		if (rc)
+			goto err3;
+	}
 	if (IS_PF(cdev)) {
 		hwfn = QED_LEADING_HWFN(cdev);
 		drv_version.version = (params->drv_major << 24) |
@@ -884,6 +950,8 @@
 
 	return 0;
 
+err3:
+	qed_hw_stop(cdev);
 err2:
 	qed_hw_timers_stop_all(cdev);
 	if (IS_PF(cdev))
@@ -906,6 +974,8 @@
 	if (!cdev)
 		return -ENODEV;
 
+	qed_ll2_dealloc_if(cdev);
+
 	if (IS_PF(cdev)) {
 		qed_free_stream_mem(cdev);
 		if (IS_QED_ETH_IF(cdev))
@@ -974,8 +1044,7 @@
 }
 
 static u32 qed_sb_release(struct qed_dev *cdev,
-			  struct qed_sb_info *sb_info,
-			  u16 sb_id)
+			  struct qed_sb_info *sb_info, u16 sb_id)
 {
 	struct qed_hwfn *p_hwfn;
 	int hwfn_index;
@@ -1025,20 +1094,23 @@
 		link_params->speed.autoneg = params->autoneg;
 	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) {
 		link_params->speed.advertised_speeds = 0;
-		if ((params->adv_speeds & SUPPORTED_1000baseT_Half) ||
-		    (params->adv_speeds & SUPPORTED_1000baseT_Full))
+		if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) ||
+		    (params->adv_speeds & QED_LM_1000baseT_Full_BIT))
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
-		if (params->adv_speeds & SUPPORTED_10000baseKR_Full)
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT)
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
-		if (params->adv_speeds & SUPPORTED_40000baseLR4_Full)
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT)
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
-		if (params->adv_speeds & 0)
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+		if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT)
 			link_params->speed.advertised_speeds |=
-				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
-		if (params->adv_speeds & 0)
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+		if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT)
+			link_params->speed.advertised_speeds |=
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G;
+		if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G;
 	}
@@ -1168,50 +1240,56 @@
 		if_link->link_up = true;
 
 	/* TODO - at the moment assume supported and advertised speed equal */
-	if_link->supported_caps = SUPPORTED_FIBRE;
+	if_link->supported_caps = QED_LM_FIBRE_BIT;
 	if (params.speed.autoneg)
-		if_link->supported_caps |= SUPPORTED_Autoneg;
+		if_link->supported_caps |= QED_LM_Autoneg_BIT;
 	if (params.pause.autoneg ||
 	    (params.pause.forced_rx && params.pause.forced_tx))
-		if_link->supported_caps |= SUPPORTED_Asym_Pause;
+		if_link->supported_caps |= QED_LM_Asym_Pause_BIT;
 	if (params.pause.autoneg || params.pause.forced_rx ||
 	    params.pause.forced_tx)
-		if_link->supported_caps |= SUPPORTED_Pause;
+		if_link->supported_caps |= QED_LM_Pause_BIT;
 
 	if_link->advertised_caps = if_link->supported_caps;
 	if (params.speed.advertised_speeds &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->advertised_caps |= SUPPORTED_1000baseT_Half |
-					   SUPPORTED_1000baseT_Full;
+		if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT |
+		    QED_LM_1000baseT_Full_BIT;
 	if (params.speed.advertised_speeds &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->advertised_caps |= SUPPORTED_10000baseKR_Full;
+		if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT;
 	if (params.speed.advertised_speeds &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->advertised_caps |= SUPPORTED_40000baseLR4_Full;
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+		if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT;
 	if (params.speed.advertised_speeds &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->advertised_caps |= 0;
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+		if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT;
+	if (params.speed.advertised_speeds &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+		if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT;
 	if (params.speed.advertised_speeds &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->advertised_caps |= 0;
+		if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT;
 
 	if (link_caps.speed_capabilities &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->supported_caps |= SUPPORTED_1000baseT_Half |
-					   SUPPORTED_1000baseT_Full;
+		if_link->supported_caps |= QED_LM_1000baseT_Half_BIT |
+		    QED_LM_1000baseT_Full_BIT;
 	if (link_caps.speed_capabilities &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->supported_caps |= SUPPORTED_10000baseKR_Full;
+		if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT;
 	if (link_caps.speed_capabilities &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->supported_caps |= SUPPORTED_40000baseLR4_Full;
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+		if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT;
 	if (link_caps.speed_capabilities &
-		NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->supported_caps |= 0;
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+		if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT;
+	if (link_caps.speed_capabilities &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+		if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT;
 	if (link_caps.speed_capabilities &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->supported_caps |= 0;
+		if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT;
 
 	if (link.link_up)
 		if_link->speed = link.speed;
@@ -1231,33 +1309,29 @@
 		if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
 
 	/* Link partner capabilities */
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_1G_HD)
-		if_link->lp_caps |= SUPPORTED_1000baseT_Half;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_1G_FD)
-		if_link->lp_caps |= SUPPORTED_1000baseT_Full;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_10G)
-		if_link->lp_caps |= SUPPORTED_10000baseKR_Full;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_40G)
-		if_link->lp_caps |= SUPPORTED_40000baseLR4_Full;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_50G)
-		if_link->lp_caps |= 0;
-	if (link.partner_adv_speed &
-	    QED_LINK_PARTNER_SPEED_100G)
-		if_link->lp_caps |= 0;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD)
+		if_link->lp_caps |= QED_LM_1000baseT_Half_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD)
+		if_link->lp_caps |= QED_LM_1000baseT_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G)
+		if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_25G)
+		if_link->lp_caps |= QED_LM_25000baseKR_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_40G)
+		if_link->lp_caps |= QED_LM_40000baseLR4_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_50G)
+		if_link->lp_caps |= QED_LM_50000baseKR2_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_100G)
+		if_link->lp_caps |= QED_LM_100000baseKR4_Full_BIT;
 
 	if (link.an_complete)
-		if_link->lp_caps |= SUPPORTED_Autoneg;
+		if_link->lp_caps |= QED_LM_Autoneg_BIT;
 
 	if (link.partner_adv_pause)
-		if_link->lp_caps |= SUPPORTED_Pause;
+		if_link->lp_caps |= QED_LM_Pause_BIT;
 	if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE ||
 	    link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE)
-		if_link->lp_caps |= SUPPORTED_Asym_Pause;
+		if_link->lp_caps |= QED_LM_Asym_Pause_BIT;
 }
 
 static void qed_get_current_link(struct qed_dev *cdev,
@@ -1385,9 +1459,32 @@
 	.get_link = &qed_get_current_link,
 	.drain = &qed_drain,
 	.update_msglvl = &qed_init_dp,
+	.dbg_all_data = &qed_dbg_all_data,
+	.dbg_all_data_size = &qed_dbg_all_data_size,
 	.chain_alloc = &qed_chain_alloc,
 	.chain_free = &qed_chain_free,
 	.get_coalesce = &qed_get_coalesce,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
 };
+
+void qed_get_protocol_stats(struct qed_dev *cdev,
+			    enum qed_mcp_protocol_type type,
+			    union qed_mcp_protocol_stats *stats)
+{
+	struct qed_eth_stats eth_stats;
+
+	memset(stats, 0, sizeof(*stats));
+
+	switch (type) {
+	case QED_MCP_LAN_STATS:
+		qed_get_vport_stats(cdev, &eth_stats);
+		stats->lan_stats.ucast_rx_pkts = eth_stats.rx_ucast_pkts;
+		stats->lan_stats.ucast_tx_pkts = eth_stats.tx_ucast_pkts;
+		stats->lan_stats.fcs_err = -1;
+		break;
+	default:
+		DP_ERR(cdev, "Invalid protocol type = %d\n", type);
+		return;
+	}
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index a240f26..bdc9ba9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -54,8 +54,7 @@
 	return true;
 }
 
-void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt)
+void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
 					PUBLIC_PORT);
@@ -68,8 +67,7 @@
 		   p_hwfn->mcp_info->port_addr, MFW_PORT(p_hwfn));
 }
 
-void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt)
+void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 length = MFW_DRV_MSG_MAX_DWORDS(p_hwfn->mcp_info->mfw_mb_length);
 	u32 tmp, i;
@@ -99,8 +97,7 @@
 	return 0;
 }
 
-static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn,
-				struct qed_ptt *p_ptt)
+static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_info *p_info = p_hwfn->mcp_info;
 	u32 drv_mb_offsize, mfw_mb_offsize;
@@ -143,8 +140,7 @@
 	return 0;
 }
 
-int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt)
+int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_info *p_info;
 	u32 size;
@@ -165,9 +161,7 @@
 
 	size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
 	p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL);
-	p_info->mfw_mb_shadow =
-		kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS(
-				p_info->mfw_mb_length), GFP_KERNEL);
+	p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL);
 	if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
 		goto err;
 
@@ -177,7 +171,6 @@
 	return 0;
 
 err:
-	DP_NOTICE(p_hwfn, "Failed to allocate mcp memory\n");
 	qed_mcp_free(p_hwfn);
 	return -ENOMEM;
 }
@@ -189,8 +182,7 @@
  * access is achieved by setting a blocking flag, which will fail other
  * competing contexts to send their mailboxes.
  */
-static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn,
-			   u32 cmd)
+static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, u32 cmd)
 {
 	spin_lock_bh(&p_hwfn->mcp_info->lock);
 
@@ -221,15 +213,13 @@
 	return 0;
 }
 
-static void qed_mcp_mb_unlock(struct qed_hwfn	*p_hwfn,
-			      u32		cmd)
+static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, u32 cmd)
 {
 	if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ)
 		spin_unlock_bh(&p_hwfn->mcp_info->lock);
 }
 
-int qed_mcp_reset(struct qed_hwfn *p_hwfn,
-		  struct qed_ptt *p_ptt)
+int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 seq = ++p_hwfn->mcp_info->drv_mb_seq;
 	u8 delay = CHIP_MCP_RESP_ITER_US;
@@ -326,7 +316,8 @@
 		*o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
 	} else {
 		/* FW BUG! */
-		DP_ERR(p_hwfn, "MFW failed to respond!\n");
+		DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n",
+		       cmd, param);
 		*o_mcp_resp = 0;
 		rc = -EAGAIN;
 	}
@@ -342,7 +333,7 @@
 
 	/* MCP not initialized */
 	if (!qed_mcp_is_init(p_hwfn)) {
-		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
 		return -EBUSY;
 	}
 
@@ -398,9 +389,36 @@
 	return 0;
 }
 
+int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       u32 cmd,
+		       u32 param,
+		       u32 *o_mcp_resp,
+		       u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf)
+{
+	struct qed_mcp_mb_params mb_params;
+	union drv_union_data union_data;
+	int rc;
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	mb_params.cmd = cmd;
+	mb_params.param = param;
+	mb_params.p_data_dst = &union_data;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	*o_mcp_resp = mb_params.mcp_resp;
+	*o_mcp_param = mb_params.mcp_param;
+
+	*o_txn_size = *o_mcp_param;
+	memcpy(o_buf, &union_data.raw_data, *o_txn_size);
+
+	return 0;
+}
+
 int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
-		     struct qed_ptt *p_ptt,
-		     u32 *p_load_code)
+		     struct qed_ptt *p_ptt, u32 *p_load_code)
 {
 	struct qed_dev *cdev = p_hwfn->cdev;
 	struct qed_mcp_mb_params mb_params;
@@ -527,8 +545,7 @@
 		   "Received transceiver state update [0x%08x] from mfw [Addr 0x%x]\n",
 		   transceiver_state,
 		   (u32)(p_hwfn->mcp_info->port_addr +
-			 offsetof(struct public_port,
-				  transceiver_data)));
+			  offsetof(struct public_port, transceiver_data)));
 
 	transceiver_state = GET_FIELD(transceiver_state,
 				      ETH_TRANSCEIVER_STATE);
@@ -540,8 +557,7 @@
 }
 
 static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
-				       struct qed_ptt *p_ptt,
-				       bool b_reset)
+				       struct qed_ptt *p_ptt, bool b_reset)
 {
 	struct qed_mcp_link_state *p_link;
 	u8 max_bw, min_bw;
@@ -557,8 +573,7 @@
 			   "Received link update [0x%08x] from mfw [Addr 0x%x]\n",
 			   status,
 			   (u32)(p_hwfn->mcp_info->port_addr +
-				 offsetof(struct public_port,
-					  link_status)));
+				 offsetof(struct public_port, link_status)));
 	} else {
 		DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
 			   "Resetting link indications\n");
@@ -635,6 +650,9 @@
 		(status & LINK_STATUS_LINK_PARTNER_20G_CAPABLE) ?
 		QED_LINK_PARTNER_SPEED_20G : 0;
 	p_link->partner_adv_speed |=
+		(status & LINK_STATUS_LINK_PARTNER_25G_CAPABLE) ?
+		QED_LINK_PARTNER_SPEED_25G : 0;
+	p_link->partner_adv_speed |=
 		(status & LINK_STATUS_LINK_PARTNER_40G_CAPABLE) ?
 		QED_LINK_PARTNER_SPEED_40G : 0;
 	p_link->partner_adv_speed |=
@@ -722,6 +740,48 @@
 	return 0;
 }
 
+static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					enum MFW_DRV_MSG_TYPE type)
+{
+	enum qed_mcp_protocol_type stats_type;
+	union qed_mcp_protocol_stats stats;
+	struct qed_mcp_mb_params mb_params;
+	union drv_union_data union_data;
+	u32 hsi_param;
+
+	switch (type) {
+	case MFW_DRV_MSG_GET_LAN_STATS:
+		stats_type = QED_MCP_LAN_STATS;
+		hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN;
+		break;
+	case MFW_DRV_MSG_GET_FCOE_STATS:
+		stats_type = QED_MCP_FCOE_STATS;
+		hsi_param = DRV_MSG_CODE_STATS_TYPE_FCOE;
+		break;
+	case MFW_DRV_MSG_GET_ISCSI_STATS:
+		stats_type = QED_MCP_ISCSI_STATS;
+		hsi_param = DRV_MSG_CODE_STATS_TYPE_ISCSI;
+		break;
+	case MFW_DRV_MSG_GET_RDMA_STATS:
+		stats_type = QED_MCP_RDMA_STATS;
+		hsi_param = DRV_MSG_CODE_STATS_TYPE_RDMA;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Invalid protocol type %d\n", type);
+		return;
+	}
+
+	qed_get_protocol_stats(p_hwfn->cdev, stats_type, &stats);
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	mb_params.cmd = DRV_MSG_CODE_GET_STATS;
+	mb_params.param = hsi_param;
+	memcpy(&union_data, &stats, sizeof(stats));
+	mb_params.p_data_src = &union_data;
+	qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+}
+
 static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn,
 				  struct public_func *p_shmem_info)
 {
@@ -752,8 +812,7 @@
 
 static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
-				  struct public_func *p_data,
-				  int pfid)
+				  struct public_func *p_data, int pfid)
 {
 	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
 					PUBLIC_FUNC);
@@ -763,51 +822,20 @@
 
 	memset(p_data, 0, sizeof(*p_data));
 
-	size = min_t(u32, sizeof(*p_data),
-		     QED_SECTION_SIZE(mfw_path_offsize));
+	size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize));
 	for (i = 0; i < size / sizeof(u32); i++)
 		((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt,
 					    func_addr + (i << 2));
 	return size;
 }
 
-int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
-			  struct qed_ptt *p_ptt, u8 *p_pf)
-{
-	struct public_func shmem_info;
-	int i;
-
-	/* Find first Ethernet interface in port */
-	for (i = 0; i < NUM_OF_ENG_PFS(p_hwfn->cdev);
-	     i += p_hwfn->cdev->num_ports_in_engines) {
-		qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
-				       MCP_PF_ID_BY_REL(p_hwfn, i));
-
-		if (shmem_info.config & FUNC_MF_CFG_FUNC_HIDE)
-			continue;
-
-		if ((shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK) ==
-		    FUNC_MF_CFG_PROTOCOL_ETHERNET) {
-			*p_pf = (u8)i;
-			return 0;
-		}
-	}
-
-	DP_NOTICE(p_hwfn,
-		  "Failed to find on port an ethernet interface in MF_SI mode\n");
-
-	return -EINVAL;
-}
-
-static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn,
-			      struct qed_ptt *p_ptt)
+static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct qed_mcp_function_info *p_info;
 	struct public_func shmem_info;
 	u32 resp = 0, param = 0;
 
-	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
-			       MCP_PF_ID(p_hwfn));
+	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
 
 	qed_read_pf_bandwidth(p_hwfn, &shmem_info);
 
@@ -867,6 +895,12 @@
 		case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
 			qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
 			break;
+		case MFW_DRV_MSG_GET_LAN_STATS:
+		case MFW_DRV_MSG_GET_FCOE_STATS:
+		case MFW_DRV_MSG_GET_ISCSI_STATS:
+		case MFW_DRV_MSG_GET_RDMA_STATS:
+			qed_mcp_send_protocol_stats(p_hwfn, p_ptt, i);
+			break;
 		case MFW_DRV_MSG_BW_UPDATE:
 			qed_mcp_update_bw(p_hwfn, p_ptt);
 			break;
@@ -940,8 +974,7 @@
 	return 0;
 }
 
-int qed_mcp_get_media_type(struct qed_dev *cdev,
-			   u32 *p_media_type)
+int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
 {
 	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
 	struct qed_ptt  *p_ptt;
@@ -950,7 +983,7 @@
 		return -EINVAL;
 
 	if (!qed_mcp_is_init(p_hwfn)) {
-		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
 		return -EBUSY;
 	}
 
@@ -1003,15 +1036,13 @@
 	struct qed_mcp_function_info *info;
 	struct public_func shmem_info;
 
-	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
-			       MCP_PF_ID(p_hwfn));
+	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
 	info = &p_hwfn->mcp_info->func_info;
 
 	info->pause_on_host = (shmem_info.config &
 			       FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0;
 
-	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info,
-				    &info->protocol)) {
+	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) {
 		DP_ERR(p_hwfn, "Unknown personality %08x\n",
 		       (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK));
 		return -EINVAL;
@@ -1072,15 +1103,13 @@
 	return &p_hwfn->mcp_info->link_capabilities;
 }
 
-int qed_mcp_drain(struct qed_hwfn *p_hwfn,
-		  struct qed_ptt *p_ptt)
+int qed_mcp_drain(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 resp = 0, param = 0;
 	int rc;
 
 	rc = qed_mcp_cmd(p_hwfn, p_ptt,
-			 DRV_MSG_CODE_NIG_DRAIN, 1000,
-			 &resp, &param);
+			 DRV_MSG_CODE_NIG_DRAIN, 1000, &resp, &param);
 
 	/* Wait for the drain to complete before returning */
 	msleep(1020);
@@ -1089,8 +1118,7 @@
 }
 
 int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
-			   struct qed_ptt *p_ptt,
-			   u32 *p_flash_size)
+			   struct qed_ptt *p_ptt, u32 *p_flash_size)
 {
 	u32 flash_size;
 
@@ -1153,8 +1181,8 @@
 	p_drv_version = &union_data.drv_version;
 	p_drv_version->version = p_ver->version;
 
-	for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) {
-		val = cpu_to_be32(p_ver->name[i]);
+	for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) {
+		val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)]));
 		*(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val;
 	}
 
@@ -1168,8 +1196,35 @@
 	return rc;
 }
 
-int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
-		    enum qed_led_mode mode)
+int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 resp = 0, param = 0;
+	int rc;
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp,
+			 &param);
+	if (rc)
+		DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+
+	return rc;
+}
+
+int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 value, cpu_mode;
+
+	qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff);
+
+	value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+	value &= ~MCP_REG_CPU_MODE_SOFT_HALT;
+	qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value);
+	cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+
+	return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
+}
+
+int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt, enum qed_led_mode mode)
 {
 	u32 resp = 0, param = 0, drv_mb_param;
 	int rc;
@@ -1195,6 +1250,27 @@
 	return rc;
 }
 
+int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u32 mask_parities)
+{
+	u32 resp = 0, param = 0;
+	int rc;
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
+			 mask_parities, &resp, &param);
+
+	if (rc) {
+		DP_ERR(p_hwfn,
+		       "MCP response failure for mask parities, aborting\n");
+	} else if (resp != FW_MSG_CODE_OK) {
+		DP_ERR(p_hwfn,
+		       "MCP did not acknowledge mask parity request. Old MFW?\n");
+		rc = -EINVAL;
+	}
+
+	return rc;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 drv_mb_param = 0, rsp, param;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 7f319aa..dff520e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -60,9 +60,10 @@
 #define QED_LINK_PARTNER_SPEED_1G_FD    BIT(1)
 #define QED_LINK_PARTNER_SPEED_10G      BIT(2)
 #define QED_LINK_PARTNER_SPEED_20G      BIT(3)
-#define QED_LINK_PARTNER_SPEED_40G      BIT(4)
-#define QED_LINK_PARTNER_SPEED_50G      BIT(5)
-#define QED_LINK_PARTNER_SPEED_100G     BIT(6)
+#define QED_LINK_PARTNER_SPEED_25G      BIT(4)
+#define QED_LINK_PARTNER_SPEED_40G      BIT(5)
+#define QED_LINK_PARTNER_SPEED_50G      BIT(6)
+#define QED_LINK_PARTNER_SPEED_100G     BIT(7)
 	u32     partner_adv_speed;
 
 	bool    partner_tx_flow_ctrl_en;
@@ -105,6 +106,47 @@
 	u8	name[MCP_DRV_VER_STR_SIZE - 4];
 };
 
+struct qed_mcp_lan_stats {
+	u64 ucast_rx_pkts;
+	u64 ucast_tx_pkts;
+	u32 fcs_err;
+};
+
+struct qed_mcp_fcoe_stats {
+	u64 rx_pkts;
+	u64 tx_pkts;
+	u32 fcs_err;
+	u32 login_failure;
+};
+
+struct qed_mcp_iscsi_stats {
+	u64 rx_pdus;
+	u64 tx_pdus;
+	u64 rx_bytes;
+	u64 tx_bytes;
+};
+
+struct qed_mcp_rdma_stats {
+	u64 rx_pkts;
+	u64 tx_pkts;
+	u64 rx_bytes;
+	u64 tx_byts;
+};
+
+enum qed_mcp_protocol_type {
+	QED_MCP_LAN_STATS,
+	QED_MCP_FCOE_STATS,
+	QED_MCP_ISCSI_STATS,
+	QED_MCP_RDMA_STATS
+};
+
+union qed_mcp_protocol_stats {
+	struct qed_mcp_lan_stats lan_stats;
+	struct qed_mcp_fcoe_stats fcoe_stats;
+	struct qed_mcp_iscsi_stats iscsi_stats;
+	struct qed_mcp_rdma_stats rdma_stats;
+};
+
 /**
  * @brief - returns the link params of the hw function
  *
@@ -426,6 +468,29 @@
 		  struct qed_ptt *p_ptt);
 
 /**
+ * @brief - Sends an NVM read command request to the MFW to get
+ *        a buffer.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param cmd - Command: DRV_MSG_CODE_NVM_GET_FILE_DATA or
+ *            DRV_MSG_CODE_NVM_READ_NVRAM commands
+ * @param param - [0:23] - Offset [24:31] - Size
+ * @param o_mcp_resp - MCP response
+ * @param o_mcp_param - MCP response param
+ * @param o_txn_size -  Buffer size output
+ * @param o_buf - Pointer to the buffer returned by the MFW.
+ *
+ * @param return 0 upon success.
+ */
+int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       u32 cmd,
+		       u32 param,
+		       u32 *o_mcp_resp,
+		       u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf);
+
+/**
  * @brief indicates whether the MFW objects [under mcp_info] are accessible
  *
  * @param p_hwfn
@@ -447,6 +512,26 @@
 int qed_mcp_config_vf_msix(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt, u8 vf_id, u8 num);
 
+/**
+ * @brief - Halt the MCP.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return 0 upon success.
+ */
+int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief - Wake up the MCP.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @param return 0 upon success.
+ */
+int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
 int qed_configure_pf_min_bandwidth(struct qed_dev *cdev, u8 min_bw);
 int qed_configure_pf_max_bandwidth(struct qed_dev *cdev, u8 max_bw);
 int __qed_configure_pf_max_bandwidth(struct qed_hwfn *p_hwfn,
@@ -458,6 +543,7 @@
 				     struct qed_mcp_link_state *p_link,
 				     u8 min_bw);
 
-int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
-			  struct qed_ptt *p_ptt, u8 *p_pf);
+int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u32 mask_parities);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index f6b86ca..b414a05 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -116,8 +116,14 @@
 	0x1009c4UL
 #define  QM_REG_PF_EN \
 	0x2f2ea4UL
+#define TCFC_REG_WEAK_ENABLE_VF \
+	0x2d0704UL
 #define  TCFC_REG_STRONG_ENABLE_PF \
 	0x2d0708UL
+#define  TCFC_REG_STRONG_ENABLE_VF \
+	0x2d070cUL
+#define CCFC_REG_WEAK_ENABLE_VF \
+	0x2e0704UL
 #define  CCFC_REG_STRONG_ENABLE_PF \
 	0x2e0708UL
 #define  PGLUE_B_REG_PGL_ADDR_88_F0 \
@@ -202,6 +208,26 @@
 	0x50196cUL
 #define NIG_REG_LLH_CLS_TYPE_DUALMODE \
 	0x501964UL
+#define NIG_REG_LLH_FUNC_FILTER_VALUE \
+	0x501a00UL
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_SIZE \
+	32
+#define NIG_REG_LLH_FUNC_FILTER_EN \
+	0x501a80UL
+#define NIG_REG_LLH_FUNC_FILTER_EN_SIZE	\
+	16
+#define NIG_REG_LLH_FUNC_FILTER_MODE \
+	0x501ac0UL
+#define NIG_REG_LLH_FUNC_FILTER_MODE_SIZE \
+	16
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE \
+	0x501b00UL
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_SIZE \
+	16
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL	\
+	0x501b40UL
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_SIZE \
+	16
 #define  NCSI_REG_CONFIG	\
 	0x040200UL
 #define  PBF_REG_INIT \
@@ -258,6 +284,8 @@
 	0x1f0a1cUL
 #define PRS_REG_ROCE_DEST_QP_MAX_PF \
 	0x1f0430UL
+#define PRS_REG_USE_LIGHT_L2 \
+	0x1f096cUL
 #define  PSDM_REG_ENABLE_IN1 \
 	0xfa0004UL
 #define  PSEM_REG_ENABLE_IN \
@@ -521,4 +549,910 @@
 
 #define QM_REG_WFQPFWEIGHT	0x2f4e80UL
 #define QM_REG_WFQVPWEIGHT	0x2fa000UL
+
+#define PGLCS_REG_DBG_SELECT \
+	0x001d14UL
+#define PGLCS_REG_DBG_DWORD_ENABLE \
+	0x001d18UL
+#define PGLCS_REG_DBG_SHIFT \
+	0x001d1cUL
+#define PGLCS_REG_DBG_FORCE_VALID \
+	0x001d20UL
+#define PGLCS_REG_DBG_FORCE_FRAME \
+	0x001d24UL
+#define MISC_REG_RESET_PL_PDA_VMAIN_1 \
+	0x008070UL
+#define MISC_REG_RESET_PL_PDA_VMAIN_2 \
+	0x008080UL
+#define MISC_REG_RESET_PL_PDA_VAUX \
+	0x008090UL
+#define MISCS_REG_RESET_PL_UA \
+	0x009050UL
+#define MISCS_REG_RESET_PL_HV \
+	0x009060UL
+#define MISCS_REG_RESET_PL_HV_2	\
+	0x009150UL
+#define DMAE_REG_DBG_SELECT \
+	0x00c510UL
+#define DMAE_REG_DBG_DWORD_ENABLE \
+	0x00c514UL
+#define DMAE_REG_DBG_SHIFT \
+	0x00c518UL
+#define DMAE_REG_DBG_FORCE_VALID \
+	0x00c51cUL
+#define DMAE_REG_DBG_FORCE_FRAME \
+	0x00c520UL
+#define NCSI_REG_DBG_SELECT \
+	0x040474UL
+#define NCSI_REG_DBG_DWORD_ENABLE \
+	0x040478UL
+#define NCSI_REG_DBG_SHIFT \
+	0x04047cUL
+#define NCSI_REG_DBG_FORCE_VALID \
+	0x040480UL
+#define NCSI_REG_DBG_FORCE_FRAME \
+	0x040484UL
+#define GRC_REG_DBG_SELECT \
+	0x0500a4UL
+#define GRC_REG_DBG_DWORD_ENABLE \
+	0x0500a8UL
+#define GRC_REG_DBG_SHIFT \
+	0x0500acUL
+#define GRC_REG_DBG_FORCE_VALID	\
+	0x0500b0UL
+#define GRC_REG_DBG_FORCE_FRAME	\
+	0x0500b4UL
+#define UMAC_REG_DBG_SELECT \
+	0x051094UL
+#define UMAC_REG_DBG_DWORD_ENABLE \
+	0x051098UL
+#define UMAC_REG_DBG_SHIFT \
+	0x05109cUL
+#define UMAC_REG_DBG_FORCE_VALID \
+	0x0510a0UL
+#define UMAC_REG_DBG_FORCE_FRAME \
+	0x0510a4UL
+#define MCP2_REG_DBG_SELECT \
+	0x052400UL
+#define MCP2_REG_DBG_DWORD_ENABLE \
+	0x052404UL
+#define MCP2_REG_DBG_SHIFT \
+	0x052408UL
+#define MCP2_REG_DBG_FORCE_VALID \
+	0x052440UL
+#define MCP2_REG_DBG_FORCE_FRAME \
+	0x052444UL
+#define PCIE_REG_DBG_SELECT \
+	0x0547e8UL
+#define PCIE_REG_DBG_DWORD_ENABLE \
+	0x0547ecUL
+#define PCIE_REG_DBG_SHIFT \
+	0x0547f0UL
+#define PCIE_REG_DBG_FORCE_VALID \
+	0x0547f4UL
+#define PCIE_REG_DBG_FORCE_FRAME \
+	0x0547f8UL
+#define DORQ_REG_DBG_SELECT \
+	0x100ad0UL
+#define DORQ_REG_DBG_DWORD_ENABLE \
+	0x100ad4UL
+#define DORQ_REG_DBG_SHIFT \
+	0x100ad8UL
+#define DORQ_REG_DBG_FORCE_VALID \
+	0x100adcUL
+#define DORQ_REG_DBG_FORCE_FRAME \
+	0x100ae0UL
+#define IGU_REG_DBG_SELECT \
+	0x181578UL
+#define IGU_REG_DBG_DWORD_ENABLE \
+	0x18157cUL
+#define IGU_REG_DBG_SHIFT \
+	0x181580UL
+#define IGU_REG_DBG_FORCE_VALID	\
+	0x181584UL
+#define IGU_REG_DBG_FORCE_FRAME	\
+	0x181588UL
+#define CAU_REG_DBG_SELECT \
+	0x1c0ea8UL
+#define CAU_REG_DBG_DWORD_ENABLE \
+	0x1c0eacUL
+#define CAU_REG_DBG_SHIFT \
+	0x1c0eb0UL
+#define CAU_REG_DBG_FORCE_VALID	\
+	0x1c0eb4UL
+#define CAU_REG_DBG_FORCE_FRAME	\
+	0x1c0eb8UL
+#define PRS_REG_DBG_SELECT \
+	0x1f0b6cUL
+#define PRS_REG_DBG_DWORD_ENABLE \
+	0x1f0b70UL
+#define PRS_REG_DBG_SHIFT \
+	0x1f0b74UL
+#define PRS_REG_DBG_FORCE_VALID	\
+	0x1f0ba0UL
+#define PRS_REG_DBG_FORCE_FRAME	\
+	0x1f0ba4UL
+#define CNIG_REG_DBG_SELECT_K2 \
+	0x218254UL
+#define CNIG_REG_DBG_DWORD_ENABLE_K2 \
+	0x218258UL
+#define CNIG_REG_DBG_SHIFT_K2 \
+	0x21825cUL
+#define CNIG_REG_DBG_FORCE_VALID_K2 \
+	0x218260UL
+#define CNIG_REG_DBG_FORCE_FRAME_K2 \
+	0x218264UL
+#define PRM_REG_DBG_SELECT \
+	0x2306a8UL
+#define PRM_REG_DBG_DWORD_ENABLE \
+	0x2306acUL
+#define PRM_REG_DBG_SHIFT \
+	0x2306b0UL
+#define PRM_REG_DBG_FORCE_VALID	\
+	0x2306b4UL
+#define PRM_REG_DBG_FORCE_FRAME	\
+	0x2306b8UL
+#define SRC_REG_DBG_SELECT \
+	0x238700UL
+#define SRC_REG_DBG_DWORD_ENABLE \
+	0x238704UL
+#define SRC_REG_DBG_SHIFT \
+	0x238708UL
+#define SRC_REG_DBG_FORCE_VALID	\
+	0x23870cUL
+#define SRC_REG_DBG_FORCE_FRAME	\
+	0x238710UL
+#define RSS_REG_DBG_SELECT \
+	0x238c4cUL
+#define RSS_REG_DBG_DWORD_ENABLE \
+	0x238c50UL
+#define RSS_REG_DBG_SHIFT \
+	0x238c54UL
+#define RSS_REG_DBG_FORCE_VALID	\
+	0x238c58UL
+#define RSS_REG_DBG_FORCE_FRAME	\
+	0x238c5cUL
+#define RPB_REG_DBG_SELECT \
+	0x23c728UL
+#define RPB_REG_DBG_DWORD_ENABLE \
+	0x23c72cUL
+#define RPB_REG_DBG_SHIFT \
+	0x23c730UL
+#define RPB_REG_DBG_FORCE_VALID	\
+	0x23c734UL
+#define RPB_REG_DBG_FORCE_FRAME	\
+	0x23c738UL
+#define PSWRQ2_REG_DBG_SELECT \
+	0x240100UL
+#define PSWRQ2_REG_DBG_DWORD_ENABLE \
+	0x240104UL
+#define PSWRQ2_REG_DBG_SHIFT \
+	0x240108UL
+#define PSWRQ2_REG_DBG_FORCE_VALID \
+	0x24010cUL
+#define PSWRQ2_REG_DBG_FORCE_FRAME \
+	0x240110UL
+#define PSWRQ_REG_DBG_SELECT \
+	0x280020UL
+#define PSWRQ_REG_DBG_DWORD_ENABLE \
+	0x280024UL
+#define PSWRQ_REG_DBG_SHIFT \
+	0x280028UL
+#define PSWRQ_REG_DBG_FORCE_VALID \
+	0x28002cUL
+#define PSWRQ_REG_DBG_FORCE_FRAME \
+	0x280030UL
+#define PSWWR_REG_DBG_SELECT \
+	0x29a084UL
+#define PSWWR_REG_DBG_DWORD_ENABLE \
+	0x29a088UL
+#define PSWWR_REG_DBG_SHIFT \
+	0x29a08cUL
+#define PSWWR_REG_DBG_FORCE_VALID \
+	0x29a090UL
+#define PSWWR_REG_DBG_FORCE_FRAME \
+	0x29a094UL
+#define PSWRD_REG_DBG_SELECT \
+	0x29c040UL
+#define PSWRD_REG_DBG_DWORD_ENABLE \
+	0x29c044UL
+#define PSWRD_REG_DBG_SHIFT \
+	0x29c048UL
+#define PSWRD_REG_DBG_FORCE_VALID \
+	0x29c04cUL
+#define PSWRD_REG_DBG_FORCE_FRAME \
+	0x29c050UL
+#define PSWRD2_REG_DBG_SELECT \
+	0x29d400UL
+#define PSWRD2_REG_DBG_DWORD_ENABLE \
+	0x29d404UL
+#define PSWRD2_REG_DBG_SHIFT \
+	0x29d408UL
+#define PSWRD2_REG_DBG_FORCE_VALID \
+	0x29d40cUL
+#define PSWRD2_REG_DBG_FORCE_FRAME \
+	0x29d410UL
+#define PSWHST2_REG_DBG_SELECT \
+	0x29e058UL
+#define PSWHST2_REG_DBG_DWORD_ENABLE \
+	0x29e05cUL
+#define PSWHST2_REG_DBG_SHIFT \
+	0x29e060UL
+#define PSWHST2_REG_DBG_FORCE_VALID \
+	0x29e064UL
+#define PSWHST2_REG_DBG_FORCE_FRAME \
+	0x29e068UL
+#define PSWHST_REG_DBG_SELECT \
+	0x2a0100UL
+#define PSWHST_REG_DBG_DWORD_ENABLE \
+	0x2a0104UL
+#define PSWHST_REG_DBG_SHIFT \
+	0x2a0108UL
+#define PSWHST_REG_DBG_FORCE_VALID \
+	0x2a010cUL
+#define PSWHST_REG_DBG_FORCE_FRAME \
+	0x2a0110UL
+#define PGLUE_B_REG_DBG_SELECT \
+	0x2a8400UL
+#define PGLUE_B_REG_DBG_DWORD_ENABLE \
+	0x2a8404UL
+#define PGLUE_B_REG_DBG_SHIFT \
+	0x2a8408UL
+#define PGLUE_B_REG_DBG_FORCE_VALID \
+	0x2a840cUL
+#define PGLUE_B_REG_DBG_FORCE_FRAME \
+	0x2a8410UL
+#define TM_REG_DBG_SELECT \
+	0x2c07a8UL
+#define TM_REG_DBG_DWORD_ENABLE	\
+	0x2c07acUL
+#define TM_REG_DBG_SHIFT \
+	0x2c07b0UL
+#define TM_REG_DBG_FORCE_VALID \
+	0x2c07b4UL
+#define TM_REG_DBG_FORCE_FRAME \
+	0x2c07b8UL
+#define TCFC_REG_DBG_SELECT \
+	0x2d0500UL
+#define TCFC_REG_DBG_DWORD_ENABLE \
+	0x2d0504UL
+#define TCFC_REG_DBG_SHIFT \
+	0x2d0508UL
+#define TCFC_REG_DBG_FORCE_VALID \
+	0x2d050cUL
+#define TCFC_REG_DBG_FORCE_FRAME \
+	0x2d0510UL
+#define CCFC_REG_DBG_SELECT \
+	0x2e0500UL
+#define CCFC_REG_DBG_DWORD_ENABLE \
+	0x2e0504UL
+#define CCFC_REG_DBG_SHIFT \
+	0x2e0508UL
+#define CCFC_REG_DBG_FORCE_VALID \
+	0x2e050cUL
+#define CCFC_REG_DBG_FORCE_FRAME \
+	0x2e0510UL
+#define QM_REG_DBG_SELECT \
+	0x2f2e74UL
+#define QM_REG_DBG_DWORD_ENABLE	\
+	0x2f2e78UL
+#define QM_REG_DBG_SHIFT \
+	0x2f2e7cUL
+#define QM_REG_DBG_FORCE_VALID \
+	0x2f2e80UL
+#define QM_REG_DBG_FORCE_FRAME \
+	0x2f2e84UL
+#define RDIF_REG_DBG_SELECT \
+	0x300500UL
+#define RDIF_REG_DBG_DWORD_ENABLE \
+	0x300504UL
+#define RDIF_REG_DBG_SHIFT \
+	0x300508UL
+#define RDIF_REG_DBG_FORCE_VALID \
+	0x30050cUL
+#define RDIF_REG_DBG_FORCE_FRAME \
+	0x300510UL
+#define TDIF_REG_DBG_SELECT \
+	0x310500UL
+#define TDIF_REG_DBG_DWORD_ENABLE \
+	0x310504UL
+#define TDIF_REG_DBG_SHIFT \
+	0x310508UL
+#define TDIF_REG_DBG_FORCE_VALID \
+	0x31050cUL
+#define TDIF_REG_DBG_FORCE_FRAME \
+	0x310510UL
+#define BRB_REG_DBG_SELECT \
+	0x340ed0UL
+#define BRB_REG_DBG_DWORD_ENABLE \
+	0x340ed4UL
+#define BRB_REG_DBG_SHIFT \
+	0x340ed8UL
+#define BRB_REG_DBG_FORCE_VALID	\
+	0x340edcUL
+#define BRB_REG_DBG_FORCE_FRAME	\
+	0x340ee0UL
+#define XYLD_REG_DBG_SELECT \
+	0x4c1600UL
+#define XYLD_REG_DBG_DWORD_ENABLE \
+	0x4c1604UL
+#define XYLD_REG_DBG_SHIFT \
+	0x4c1608UL
+#define XYLD_REG_DBG_FORCE_VALID \
+	0x4c160cUL
+#define XYLD_REG_DBG_FORCE_FRAME \
+	0x4c1610UL
+#define YULD_REG_DBG_SELECT \
+	0x4c9600UL
+#define YULD_REG_DBG_DWORD_ENABLE \
+	0x4c9604UL
+#define YULD_REG_DBG_SHIFT \
+	0x4c9608UL
+#define YULD_REG_DBG_FORCE_VALID \
+	0x4c960cUL
+#define YULD_REG_DBG_FORCE_FRAME \
+	0x4c9610UL
+#define TMLD_REG_DBG_SELECT \
+	0x4d1600UL
+#define TMLD_REG_DBG_DWORD_ENABLE \
+	0x4d1604UL
+#define TMLD_REG_DBG_SHIFT \
+	0x4d1608UL
+#define TMLD_REG_DBG_FORCE_VALID \
+	0x4d160cUL
+#define TMLD_REG_DBG_FORCE_FRAME \
+	0x4d1610UL
+#define MULD_REG_DBG_SELECT \
+	0x4e1600UL
+#define MULD_REG_DBG_DWORD_ENABLE \
+	0x4e1604UL
+#define MULD_REG_DBG_SHIFT \
+	0x4e1608UL
+#define MULD_REG_DBG_FORCE_VALID \
+	0x4e160cUL
+#define MULD_REG_DBG_FORCE_FRAME \
+	0x4e1610UL
+#define NIG_REG_DBG_SELECT \
+	0x502140UL
+#define NIG_REG_DBG_DWORD_ENABLE \
+	0x502144UL
+#define NIG_REG_DBG_SHIFT \
+	0x502148UL
+#define NIG_REG_DBG_FORCE_VALID	\
+	0x50214cUL
+#define NIG_REG_DBG_FORCE_FRAME	\
+	0x502150UL
+#define BMB_REG_DBG_SELECT \
+	0x540a7cUL
+#define BMB_REG_DBG_DWORD_ENABLE \
+	0x540a80UL
+#define BMB_REG_DBG_SHIFT \
+	0x540a84UL
+#define BMB_REG_DBG_FORCE_VALID	\
+	0x540a88UL
+#define BMB_REG_DBG_FORCE_FRAME	\
+	0x540a8cUL
+#define PTU_REG_DBG_SELECT \
+	0x560100UL
+#define PTU_REG_DBG_DWORD_ENABLE \
+	0x560104UL
+#define PTU_REG_DBG_SHIFT \
+	0x560108UL
+#define PTU_REG_DBG_FORCE_VALID	\
+	0x56010cUL
+#define PTU_REG_DBG_FORCE_FRAME	\
+	0x560110UL
+#define CDU_REG_DBG_SELECT \
+	0x580704UL
+#define CDU_REG_DBG_DWORD_ENABLE \
+	0x580708UL
+#define CDU_REG_DBG_SHIFT \
+	0x58070cUL
+#define CDU_REG_DBG_FORCE_VALID	\
+	0x580710UL
+#define CDU_REG_DBG_FORCE_FRAME	\
+	0x580714UL
+#define WOL_REG_DBG_SELECT \
+	0x600140UL
+#define WOL_REG_DBG_DWORD_ENABLE \
+	0x600144UL
+#define WOL_REG_DBG_SHIFT \
+	0x600148UL
+#define WOL_REG_DBG_FORCE_VALID	\
+	0x60014cUL
+#define WOL_REG_DBG_FORCE_FRAME	\
+	0x600150UL
+#define BMBN_REG_DBG_SELECT \
+	0x610140UL
+#define BMBN_REG_DBG_DWORD_ENABLE \
+	0x610144UL
+#define BMBN_REG_DBG_SHIFT \
+	0x610148UL
+#define BMBN_REG_DBG_FORCE_VALID \
+	0x61014cUL
+#define BMBN_REG_DBG_FORCE_FRAME \
+	0x610150UL
+#define NWM_REG_DBG_SELECT \
+	0x8000ecUL
+#define NWM_REG_DBG_DWORD_ENABLE \
+	0x8000f0UL
+#define NWM_REG_DBG_SHIFT \
+	0x8000f4UL
+#define NWM_REG_DBG_FORCE_VALID	\
+	0x8000f8UL
+#define NWM_REG_DBG_FORCE_FRAME	\
+	0x8000fcUL
+#define PBF_REG_DBG_SELECT \
+	0xd80060UL
+#define PBF_REG_DBG_DWORD_ENABLE \
+	0xd80064UL
+#define PBF_REG_DBG_SHIFT \
+	0xd80068UL
+#define PBF_REG_DBG_FORCE_VALID	\
+	0xd8006cUL
+#define PBF_REG_DBG_FORCE_FRAME	\
+	0xd80070UL
+#define PBF_PB1_REG_DBG_SELECT \
+	0xda0728UL
+#define PBF_PB1_REG_DBG_DWORD_ENABLE \
+	0xda072cUL
+#define PBF_PB1_REG_DBG_SHIFT \
+	0xda0730UL
+#define PBF_PB1_REG_DBG_FORCE_VALID \
+	0xda0734UL
+#define PBF_PB1_REG_DBG_FORCE_FRAME \
+	0xda0738UL
+#define PBF_PB2_REG_DBG_SELECT \
+	0xda4728UL
+#define PBF_PB2_REG_DBG_DWORD_ENABLE \
+	0xda472cUL
+#define PBF_PB2_REG_DBG_SHIFT \
+	0xda4730UL
+#define PBF_PB2_REG_DBG_FORCE_VALID \
+	0xda4734UL
+#define PBF_PB2_REG_DBG_FORCE_FRAME \
+	0xda4738UL
+#define BTB_REG_DBG_SELECT \
+	0xdb08c8UL
+#define BTB_REG_DBG_DWORD_ENABLE \
+	0xdb08ccUL
+#define BTB_REG_DBG_SHIFT \
+	0xdb08d0UL
+#define BTB_REG_DBG_FORCE_VALID	\
+	0xdb08d4UL
+#define BTB_REG_DBG_FORCE_FRAME	\
+	0xdb08d8UL
+#define XSDM_REG_DBG_SELECT \
+	0xf80e28UL
+#define XSDM_REG_DBG_DWORD_ENABLE \
+	0xf80e2cUL
+#define XSDM_REG_DBG_SHIFT \
+	0xf80e30UL
+#define XSDM_REG_DBG_FORCE_VALID \
+	0xf80e34UL
+#define XSDM_REG_DBG_FORCE_FRAME \
+	0xf80e38UL
+#define YSDM_REG_DBG_SELECT \
+	0xf90e28UL
+#define YSDM_REG_DBG_DWORD_ENABLE \
+	0xf90e2cUL
+#define YSDM_REG_DBG_SHIFT \
+	0xf90e30UL
+#define YSDM_REG_DBG_FORCE_VALID \
+	0xf90e34UL
+#define YSDM_REG_DBG_FORCE_FRAME \
+	0xf90e38UL
+#define PSDM_REG_DBG_SELECT \
+	0xfa0e28UL
+#define PSDM_REG_DBG_DWORD_ENABLE \
+	0xfa0e2cUL
+#define PSDM_REG_DBG_SHIFT \
+	0xfa0e30UL
+#define PSDM_REG_DBG_FORCE_VALID \
+	0xfa0e34UL
+#define PSDM_REG_DBG_FORCE_FRAME \
+	0xfa0e38UL
+#define TSDM_REG_DBG_SELECT \
+	0xfb0e28UL
+#define TSDM_REG_DBG_DWORD_ENABLE \
+	0xfb0e2cUL
+#define TSDM_REG_DBG_SHIFT \
+	0xfb0e30UL
+#define TSDM_REG_DBG_FORCE_VALID \
+	0xfb0e34UL
+#define TSDM_REG_DBG_FORCE_FRAME \
+	0xfb0e38UL
+#define MSDM_REG_DBG_SELECT \
+	0xfc0e28UL
+#define MSDM_REG_DBG_DWORD_ENABLE \
+	0xfc0e2cUL
+#define MSDM_REG_DBG_SHIFT \
+	0xfc0e30UL
+#define MSDM_REG_DBG_FORCE_VALID \
+	0xfc0e34UL
+#define MSDM_REG_DBG_FORCE_FRAME \
+	0xfc0e38UL
+#define USDM_REG_DBG_SELECT \
+	0xfd0e28UL
+#define USDM_REG_DBG_DWORD_ENABLE \
+	0xfd0e2cUL
+#define USDM_REG_DBG_SHIFT \
+	0xfd0e30UL
+#define USDM_REG_DBG_FORCE_VALID \
+	0xfd0e34UL
+#define USDM_REG_DBG_FORCE_FRAME \
+	0xfd0e38UL
+#define XCM_REG_DBG_SELECT \
+	0x1000040UL
+#define XCM_REG_DBG_DWORD_ENABLE \
+	0x1000044UL
+#define XCM_REG_DBG_SHIFT \
+	0x1000048UL
+#define XCM_REG_DBG_FORCE_VALID	\
+	0x100004cUL
+#define XCM_REG_DBG_FORCE_FRAME	\
+	0x1000050UL
+#define YCM_REG_DBG_SELECT \
+	0x1080040UL
+#define YCM_REG_DBG_DWORD_ENABLE \
+	0x1080044UL
+#define YCM_REG_DBG_SHIFT \
+	0x1080048UL
+#define YCM_REG_DBG_FORCE_VALID	\
+	0x108004cUL
+#define YCM_REG_DBG_FORCE_FRAME	\
+	0x1080050UL
+#define PCM_REG_DBG_SELECT \
+	0x1100040UL
+#define PCM_REG_DBG_DWORD_ENABLE \
+	0x1100044UL
+#define PCM_REG_DBG_SHIFT \
+	0x1100048UL
+#define PCM_REG_DBG_FORCE_VALID	\
+	0x110004cUL
+#define PCM_REG_DBG_FORCE_FRAME	\
+	0x1100050UL
+#define TCM_REG_DBG_SELECT \
+	0x1180040UL
+#define TCM_REG_DBG_DWORD_ENABLE \
+	0x1180044UL
+#define TCM_REG_DBG_SHIFT \
+	0x1180048UL
+#define TCM_REG_DBG_FORCE_VALID	\
+	0x118004cUL
+#define TCM_REG_DBG_FORCE_FRAME	\
+	0x1180050UL
+#define MCM_REG_DBG_SELECT \
+	0x1200040UL
+#define MCM_REG_DBG_DWORD_ENABLE \
+	0x1200044UL
+#define MCM_REG_DBG_SHIFT \
+	0x1200048UL
+#define MCM_REG_DBG_FORCE_VALID	\
+	0x120004cUL
+#define MCM_REG_DBG_FORCE_FRAME	\
+	0x1200050UL
+#define UCM_REG_DBG_SELECT \
+	0x1280050UL
+#define UCM_REG_DBG_DWORD_ENABLE \
+	0x1280054UL
+#define UCM_REG_DBG_SHIFT \
+	0x1280058UL
+#define UCM_REG_DBG_FORCE_VALID	\
+	0x128005cUL
+#define UCM_REG_DBG_FORCE_FRAME	\
+	0x1280060UL
+#define XSEM_REG_DBG_SELECT \
+	0x1401528UL
+#define XSEM_REG_DBG_DWORD_ENABLE \
+	0x140152cUL
+#define XSEM_REG_DBG_SHIFT \
+	0x1401530UL
+#define XSEM_REG_DBG_FORCE_VALID \
+	0x1401534UL
+#define XSEM_REG_DBG_FORCE_FRAME \
+	0x1401538UL
+#define YSEM_REG_DBG_SELECT \
+	0x1501528UL
+#define YSEM_REG_DBG_DWORD_ENABLE \
+	0x150152cUL
+#define YSEM_REG_DBG_SHIFT \
+	0x1501530UL
+#define YSEM_REG_DBG_FORCE_VALID \
+	0x1501534UL
+#define YSEM_REG_DBG_FORCE_FRAME \
+	0x1501538UL
+#define PSEM_REG_DBG_SELECT \
+	0x1601528UL
+#define PSEM_REG_DBG_DWORD_ENABLE \
+	0x160152cUL
+#define PSEM_REG_DBG_SHIFT \
+	0x1601530UL
+#define PSEM_REG_DBG_FORCE_VALID \
+	0x1601534UL
+#define PSEM_REG_DBG_FORCE_FRAME \
+	0x1601538UL
+#define TSEM_REG_DBG_SELECT \
+	0x1701528UL
+#define TSEM_REG_DBG_DWORD_ENABLE \
+	0x170152cUL
+#define TSEM_REG_DBG_SHIFT \
+	0x1701530UL
+#define TSEM_REG_DBG_FORCE_VALID \
+	0x1701534UL
+#define TSEM_REG_DBG_FORCE_FRAME \
+	0x1701538UL
+#define MSEM_REG_DBG_SELECT \
+	0x1801528UL
+#define MSEM_REG_DBG_DWORD_ENABLE \
+	0x180152cUL
+#define MSEM_REG_DBG_SHIFT \
+	0x1801530UL
+#define MSEM_REG_DBG_FORCE_VALID \
+	0x1801534UL
+#define MSEM_REG_DBG_FORCE_FRAME \
+	0x1801538UL
+#define USEM_REG_DBG_SELECT \
+	0x1901528UL
+#define USEM_REG_DBG_DWORD_ENABLE \
+	0x190152cUL
+#define USEM_REG_DBG_SHIFT \
+	0x1901530UL
+#define USEM_REG_DBG_FORCE_VALID \
+	0x1901534UL
+#define USEM_REG_DBG_FORCE_FRAME \
+	0x1901538UL
+#define PCIE_REG_DBG_COMMON_SELECT \
+	0x054398UL
+#define PCIE_REG_DBG_COMMON_DWORD_ENABLE \
+	0x05439cUL
+#define PCIE_REG_DBG_COMMON_SHIFT \
+	0x0543a0UL
+#define PCIE_REG_DBG_COMMON_FORCE_VALID	\
+	0x0543a4UL
+#define PCIE_REG_DBG_COMMON_FORCE_FRAME	\
+	0x0543a8UL
+#define MISC_REG_RESET_PL_UA \
+	0x008050UL
+#define MISC_REG_RESET_PL_HV \
+	0x008060UL
+#define XCM_REG_CTX_RBC_ACCS \
+	0x1001800UL
+#define XCM_REG_AGG_CON_CTX \
+	0x1001804UL
+#define XCM_REG_SM_CON_CTX \
+	0x1001808UL
+#define YCM_REG_CTX_RBC_ACCS \
+	0x1081800UL
+#define YCM_REG_AGG_CON_CTX \
+	0x1081804UL
+#define YCM_REG_AGG_TASK_CTX \
+	0x1081808UL
+#define YCM_REG_SM_CON_CTX \
+	0x108180cUL
+#define YCM_REG_SM_TASK_CTX \
+	0x1081810UL
+#define PCM_REG_CTX_RBC_ACCS \
+	0x1101440UL
+#define PCM_REG_SM_CON_CTX \
+	0x1101444UL
+#define TCM_REG_CTX_RBC_ACCS \
+	0x11814c0UL
+#define TCM_REG_AGG_CON_CTX \
+	0x11814c4UL
+#define TCM_REG_AGG_TASK_CTX \
+	0x11814c8UL
+#define TCM_REG_SM_CON_CTX \
+	0x11814ccUL
+#define TCM_REG_SM_TASK_CTX \
+	0x11814d0UL
+#define MCM_REG_CTX_RBC_ACCS \
+	0x1201800UL
+#define MCM_REG_AGG_CON_CTX \
+	0x1201804UL
+#define MCM_REG_AGG_TASK_CTX \
+	0x1201808UL
+#define MCM_REG_SM_CON_CTX \
+	0x120180cUL
+#define MCM_REG_SM_TASK_CTX \
+	0x1201810UL
+#define UCM_REG_CTX_RBC_ACCS \
+	0x1281700UL
+#define UCM_REG_AGG_CON_CTX \
+	0x1281704UL
+#define UCM_REG_AGG_TASK_CTX \
+	0x1281708UL
+#define UCM_REG_SM_CON_CTX \
+	0x128170cUL
+#define UCM_REG_SM_TASK_CTX \
+	0x1281710UL
+#define XSEM_REG_SLOW_DBG_EMPTY	\
+	0x1401140UL
+#define XSEM_REG_SYNC_DBG_EMPTY	\
+	0x1401160UL
+#define XSEM_REG_SLOW_DBG_ACTIVE \
+	0x1401400UL
+#define XSEM_REG_SLOW_DBG_MODE \
+	0x1401404UL
+#define XSEM_REG_DBG_FRAME_MODE	\
+	0x1401408UL
+#define XSEM_REG_DBG_MODE1_CFG \
+	0x1401420UL
+#define XSEM_REG_FAST_MEMORY \
+	0x1440000UL
+#define YSEM_REG_SYNC_DBG_EMPTY	\
+	0x1501160UL
+#define YSEM_REG_SLOW_DBG_ACTIVE \
+	0x1501400UL
+#define YSEM_REG_SLOW_DBG_MODE \
+	0x1501404UL
+#define YSEM_REG_DBG_FRAME_MODE	\
+	0x1501408UL
+#define YSEM_REG_DBG_MODE1_CFG \
+	0x1501420UL
+#define YSEM_REG_FAST_MEMORY \
+	0x1540000UL
+#define PSEM_REG_SLOW_DBG_EMPTY	\
+	0x1601140UL
+#define PSEM_REG_SYNC_DBG_EMPTY	\
+	0x1601160UL
+#define PSEM_REG_SLOW_DBG_ACTIVE \
+	0x1601400UL
+#define PSEM_REG_SLOW_DBG_MODE \
+	0x1601404UL
+#define PSEM_REG_DBG_FRAME_MODE	\
+	0x1601408UL
+#define PSEM_REG_DBG_MODE1_CFG \
+	0x1601420UL
+#define PSEM_REG_FAST_MEMORY \
+	0x1640000UL
+#define TSEM_REG_SLOW_DBG_EMPTY	\
+	0x1701140UL
+#define TSEM_REG_SYNC_DBG_EMPTY	\
+	0x1701160UL
+#define TSEM_REG_SLOW_DBG_ACTIVE \
+	0x1701400UL
+#define TSEM_REG_SLOW_DBG_MODE \
+	0x1701404UL
+#define TSEM_REG_DBG_FRAME_MODE	\
+	0x1701408UL
+#define TSEM_REG_DBG_MODE1_CFG \
+	0x1701420UL
+#define TSEM_REG_FAST_MEMORY \
+	0x1740000UL
+#define MSEM_REG_SLOW_DBG_EMPTY	\
+	0x1801140UL
+#define MSEM_REG_SYNC_DBG_EMPTY	\
+	0x1801160UL
+#define MSEM_REG_SLOW_DBG_ACTIVE \
+	0x1801400UL
+#define MSEM_REG_SLOW_DBG_MODE \
+	0x1801404UL
+#define MSEM_REG_DBG_FRAME_MODE	\
+	0x1801408UL
+#define MSEM_REG_DBG_MODE1_CFG \
+	0x1801420UL
+#define MSEM_REG_FAST_MEMORY \
+	0x1840000UL
+#define USEM_REG_SLOW_DBG_EMPTY	\
+	0x1901140UL
+#define USEM_REG_SYNC_DBG_EMPTY	\
+	0x1901160UL
+#define USEM_REG_SLOW_DBG_ACTIVE \
+	0x1901400UL
+#define USEM_REG_SLOW_DBG_MODE \
+	0x1901404UL
+#define USEM_REG_DBG_FRAME_MODE	\
+	0x1901408UL
+#define USEM_REG_DBG_MODE1_CFG \
+	0x1901420UL
+#define USEM_REG_FAST_MEMORY \
+	0x1940000UL
+#define SEM_FAST_REG_INT_RAM \
+	0x020000UL
+#define SEM_FAST_REG_INT_RAM_SIZE \
+	20480
+#define GRC_REG_TRACE_FIFO_VALID_DATA \
+	0x050064UL
+#define GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW \
+	0x05040cUL
+#define GRC_REG_PROTECTION_OVERRIDE_WINDOW \
+	0x050500UL
+#define IGU_REG_ERROR_HANDLING_MEMORY \
+	0x181520UL
+#define MCP_REG_CPU_MODE \
+	0xe05000UL
+#define MCP_REG_CPU_MODE_SOFT_HALT \
+		(0x1 << 10)
+#define BRB_REG_BIG_RAM_ADDRESS \
+	0x340800UL
+#define BRB_REG_BIG_RAM_DATA \
+	0x341500UL
+#define SEM_FAST_REG_STALL_0 \
+	0x000488UL
+#define SEM_FAST_REG_STALLED \
+	0x000494UL
+#define BTB_REG_BIG_RAM_ADDRESS \
+	0xdb0800UL
+#define BTB_REG_BIG_RAM_DATA \
+	0xdb0c00UL
+#define BMB_REG_BIG_RAM_ADDRESS \
+	0x540800UL
+#define BMB_REG_BIG_RAM_DATA \
+	0x540f00UL
+#define SEM_FAST_REG_STORM_REG_FILE \
+	0x008000UL
+#define RSS_REG_RSS_RAM_ADDR \
+	0x238c30UL
+#define MISCS_REG_BLOCK_256B_EN \
+	0x009074UL
+#define MCP_REG_SCRATCH_SIZE \
+	57344
+#define MCP_REG_CPU_REG_FILE \
+	0xe05200UL
+#define MCP_REG_CPU_REG_FILE_SIZE \
+	32
+#define DBG_REG_DEBUG_TARGET \
+	0x01005cUL
+#define DBG_REG_FULL_MODE \
+	0x010060UL
+#define DBG_REG_CALENDAR_OUT_DATA \
+	0x010480UL
+#define GRC_REG_TRACE_FIFO \
+	0x050068UL
+#define IGU_REG_ERROR_HANDLING_DATA_VALID \
+	0x181530UL
+#define DBG_REG_DBG_BLOCK_ON \
+	0x010454UL
+#define DBG_REG_FRAMING_MODE \
+	0x010058UL
+#define SEM_FAST_REG_VFC_DATA_WR \
+	0x000b40UL
+#define SEM_FAST_REG_VFC_ADDR \
+	0x000b44UL
+#define SEM_FAST_REG_VFC_DATA_RD \
+	0x000b48UL
+#define RSS_REG_RSS_RAM_DATA \
+	0x238c20UL
+#define MISC_REG_BLOCK_256B_EN \
+	0x008c14UL
+#define NWS_REG_NWS_CMU	\
+	0x720000UL
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0 \
+	0x000680UL
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8 \
+	0x000684UL
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0 \
+	0x0006c0UL
+#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8 \
+	0x0006c4UL
+#define MS_REG_MS_CMU \
+	0x6a4000UL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130 \
+	0x000208UL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132 \
+	0x000210UL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131 \
+	0x00020cUL
+#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133 \
+	0x000214UL
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130 \
+	0x000208UL
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131 \
+	0x00020cUL
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132 \
+	0x000210UL
+#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133 \
+	0x000214UL
+#define PHY_PCIE_REG_PHY0 \
+	0x620000UL
+#define PHY_PCIE_REG_PHY1 \
+	0x624000UL
+#define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL
+#define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL
+#define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL
+#define DORQ_REG_PF_DPM_ENABLE 0x100510UL
+#define DORQ_REG_PF_ICID_BIT_SHIFT_NORM	0x100448UL
+#define DORQ_REG_PF_MIN_ADDR_REG1 0x100400UL
+#define DORQ_REG_PF_DPI_BIT_SHIFT 0x100450UL
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
new file mode 100644
index 0000000..2343005
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -0,0 +1,2954 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/io.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/tcp.h>
+#include <linux/bitops.h>
+#include <linux/qed/qed_roce_if.h>
+#include <linux/qed/qed_roce_if.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_init_ops.h"
+#include "qed_int.h"
+#include "qed_ll2.h"
+#include "qed_mcp.h"
+#include "qed_reg_addr.h"
+#include "qed_sp.h"
+#include "qed_roce.h"
+#include "qed_ll2.h"
+
+void qed_async_roce_event(struct qed_hwfn *p_hwfn,
+			  struct event_ring_entry *p_eqe)
+{
+	struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+
+	p_rdma_info->events.affiliated_event(p_rdma_info->events.context,
+					     p_eqe->opcode, &p_eqe->data);
+}
+
+static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn,
+			       struct qed_bmap *bmap, u32 max_count)
+{
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "max_count = %08x\n", max_count);
+
+	bmap->max_count = max_count;
+
+	bmap->bitmap = kzalloc(BITS_TO_LONGS(max_count) * sizeof(long),
+			       GFP_KERNEL);
+	if (!bmap->bitmap) {
+		DP_NOTICE(p_hwfn,
+			  "qed bmap alloc failed: cannot allocate memory (bitmap)\n");
+		return -ENOMEM;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocated bitmap %p\n",
+		   bmap->bitmap);
+	return 0;
+}
+
+static int qed_rdma_bmap_alloc_id(struct qed_hwfn *p_hwfn,
+				  struct qed_bmap *bmap, u32 *id_num)
+{
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "bmap = %p\n", bmap);
+
+	*id_num = find_first_zero_bit(bmap->bitmap, bmap->max_count);
+
+	if (*id_num >= bmap->max_count) {
+		DP_NOTICE(p_hwfn, "no id available max_count=%d\n",
+			  bmap->max_count);
+		return -EINVAL;
+	}
+
+	__set_bit(*id_num, bmap->bitmap);
+
+	return 0;
+}
+
+static void qed_bmap_release_id(struct qed_hwfn *p_hwfn,
+				struct qed_bmap *bmap, u32 id_num)
+{
+	bool b_acquired;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "id_num = %08x", id_num);
+	if (id_num >= bmap->max_count)
+		return;
+
+	b_acquired = test_and_clear_bit(id_num, bmap->bitmap);
+	if (!b_acquired) {
+		DP_NOTICE(p_hwfn, "ID %d already released\n", id_num);
+		return;
+	}
+}
+
+u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id)
+{
+	/* First sb id for RoCE is after all the l2 sb */
+	return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id;
+}
+
+u32 qed_rdma_query_cau_timer_res(void *rdma_cxt)
+{
+	return QED_CAU_DEF_RX_TIMER_RES;
+}
+
+static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  struct qed_rdma_start_in_params *params)
+{
+	struct qed_rdma_info *p_rdma_info;
+	u32 num_cons, num_tasks;
+	int rc = -ENOMEM;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n");
+
+	/* Allocate a struct with current pf rdma info */
+	p_rdma_info = kzalloc(sizeof(*p_rdma_info), GFP_KERNEL);
+	if (!p_rdma_info) {
+		DP_NOTICE(p_hwfn,
+			  "qed rdma alloc failed: cannot allocate memory (rdma info). rc = %d\n",
+			  rc);
+		return rc;
+	}
+
+	p_hwfn->p_rdma_info = p_rdma_info;
+	p_rdma_info->proto = PROTOCOLID_ROCE;
+
+	num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0);
+
+	p_rdma_info->num_qps = num_cons / 2;
+
+	num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE);
+
+	/* Each MR uses a single task */
+	p_rdma_info->num_mrs = num_tasks;
+
+	/* Queue zone lines are shared between RoCE and L2 in such a way that
+	 * they can be used by each without obstructing the other.
+	 */
+	p_rdma_info->queue_zone_base = (u16)FEAT_NUM(p_hwfn, QED_L2_QUEUE);
+
+	/* Allocate a struct with device params and fill it */
+	p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL);
+	if (!p_rdma_info->dev) {
+		DP_NOTICE(p_hwfn,
+			  "qed rdma alloc failed: cannot allocate memory (rdma info dev). rc = %d\n",
+			  rc);
+		goto free_rdma_info;
+	}
+
+	/* Allocate a struct with port params and fill it */
+	p_rdma_info->port = kzalloc(sizeof(*p_rdma_info->port), GFP_KERNEL);
+	if (!p_rdma_info->port) {
+		DP_NOTICE(p_hwfn,
+			  "qed rdma alloc failed: cannot allocate memory (rdma info port). rc = %d\n",
+			  rc);
+		goto free_rdma_dev;
+	}
+
+	/* Allocate bit map for pd's */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate pd_map, rc = %d\n",
+			   rc);
+		goto free_rdma_port;
+	}
+
+	/* Allocate DPI bitmap */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map,
+				 p_hwfn->dpi_count);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate DPI bitmap, rc = %d\n", rc);
+		goto free_pd_map;
+	}
+
+	/* Allocate bitmap for cq's. The maximum number of CQs is bounded to
+	 * twice the number of QPs.
+	 */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map,
+				 p_rdma_info->num_qps * 2);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate cq bitmap, rc = %d\n", rc);
+		goto free_dpi_map;
+	}
+
+	/* Allocate bitmap for toggle bit for cq icids
+	 * We toggle the bit every time we create or resize cq for a given icid.
+	 * The maximum number of CQs is bounded to  twice the number of QPs.
+	 */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits,
+				 p_rdma_info->num_qps * 2);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate toogle bits, rc = %d\n", rc);
+		goto free_cq_map;
+	}
+
+	/* Allocate bitmap for itids */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->tid_map,
+				 p_rdma_info->num_mrs);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate itids bitmaps, rc = %d\n", rc);
+		goto free_toggle_map;
+	}
+
+	/* Allocate bitmap for cids used for qps. */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate cid bitmap, rc = %d\n", rc);
+		goto free_tid_map;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n");
+	return 0;
+
+free_tid_map:
+	kfree(p_rdma_info->tid_map.bitmap);
+free_toggle_map:
+	kfree(p_rdma_info->toggle_bits.bitmap);
+free_cq_map:
+	kfree(p_rdma_info->cq_map.bitmap);
+free_dpi_map:
+	kfree(p_rdma_info->dpi_map.bitmap);
+free_pd_map:
+	kfree(p_rdma_info->pd_map.bitmap);
+free_rdma_port:
+	kfree(p_rdma_info->port);
+free_rdma_dev:
+	kfree(p_rdma_info->dev);
+free_rdma_info:
+	kfree(p_rdma_info);
+
+	return rc;
+}
+
+void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
+{
+	struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+
+	kfree(p_rdma_info->cid_map.bitmap);
+	kfree(p_rdma_info->tid_map.bitmap);
+	kfree(p_rdma_info->toggle_bits.bitmap);
+	kfree(p_rdma_info->cq_map.bitmap);
+	kfree(p_rdma_info->dpi_map.bitmap);
+	kfree(p_rdma_info->pd_map.bitmap);
+
+	kfree(p_rdma_info->port);
+	kfree(p_rdma_info->dev);
+
+	kfree(p_rdma_info);
+}
+
+static void qed_rdma_free(struct qed_hwfn *p_hwfn)
+{
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n");
+
+	qed_rdma_resc_free(p_hwfn);
+}
+
+static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid)
+{
+	guid[0] = p_hwfn->hw_info.hw_mac_addr[0] ^ 2;
+	guid[1] = p_hwfn->hw_info.hw_mac_addr[1];
+	guid[2] = p_hwfn->hw_info.hw_mac_addr[2];
+	guid[3] = 0xff;
+	guid[4] = 0xfe;
+	guid[5] = p_hwfn->hw_info.hw_mac_addr[3];
+	guid[6] = p_hwfn->hw_info.hw_mac_addr[4];
+	guid[7] = p_hwfn->hw_info.hw_mac_addr[5];
+}
+
+static void qed_rdma_init_events(struct qed_hwfn *p_hwfn,
+				 struct qed_rdma_start_in_params *params)
+{
+	struct qed_rdma_events *events;
+
+	events = &p_hwfn->p_rdma_info->events;
+
+	events->unaffiliated_event = params->events->unaffiliated_event;
+	events->affiliated_event = params->events->affiliated_event;
+	events->context = params->events->context;
+}
+
+static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn,
+				  struct qed_rdma_start_in_params *params)
+{
+	struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+	struct qed_dev *cdev = p_hwfn->cdev;
+	u32 pci_status_control;
+	u32 num_qps;
+
+	/* Vendor specific information */
+	dev->vendor_id = cdev->vendor_id;
+	dev->vendor_part_id = cdev->device_id;
+	dev->hw_ver = 0;
+	dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) |
+		      (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION);
+
+	qed_rdma_get_guid(p_hwfn, (u8 *)&dev->sys_image_guid);
+	dev->node_guid = dev->sys_image_guid;
+
+	dev->max_sge = min_t(u32, RDMA_MAX_SGE_PER_SQ_WQE,
+			     RDMA_MAX_SGE_PER_RQ_WQE);
+
+	if (cdev->rdma_max_sge)
+		dev->max_sge = min_t(u32, cdev->rdma_max_sge, dev->max_sge);
+
+	dev->max_inline = ROCE_REQ_MAX_INLINE_DATA_SIZE;
+
+	dev->max_inline = (cdev->rdma_max_inline) ?
+			  min_t(u32, cdev->rdma_max_inline, dev->max_inline) :
+			  dev->max_inline;
+
+	dev->max_wqe = QED_RDMA_MAX_WQE;
+	dev->max_cnq = (u8)FEAT_NUM(p_hwfn, QED_RDMA_CNQ);
+
+	/* The number of QPs may be higher than QED_ROCE_MAX_QPS, because
+	 * it is up-aligned to 16 and then to ILT page size within qed cxt.
+	 * This is OK in terms of ILT but we don't want to configure the FW
+	 * above its abilities
+	 */
+	num_qps = ROCE_MAX_QPS;
+	num_qps = min_t(u64, num_qps, p_hwfn->p_rdma_info->num_qps);
+	dev->max_qp = num_qps;
+
+	/* CQs uses the same icids that QPs use hence they are limited by the
+	 * number of icids. There are two icids per QP.
+	 */
+	dev->max_cq = num_qps * 2;
+
+	/* The number of mrs is smaller by 1 since the first is reserved */
+	dev->max_mr = p_hwfn->p_rdma_info->num_mrs - 1;
+	dev->max_mr_size = QED_RDMA_MAX_MR_SIZE;
+
+	/* The maximum CQE capacity per CQ supported.
+	 * max number of cqes will be in two layer pbl,
+	 * 8 is the pointer size in bytes
+	 * 32 is the size of cq element in bytes
+	 */
+	if (params->cq_mode == QED_RDMA_CQ_MODE_32_BITS)
+		dev->max_cqe = QED_RDMA_MAX_CQE_32_BIT;
+	else
+		dev->max_cqe = QED_RDMA_MAX_CQE_16_BIT;
+
+	dev->max_mw = 0;
+	dev->max_fmr = QED_RDMA_MAX_FMR;
+	dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8);
+	dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE;
+	dev->max_pkey = QED_RDMA_MAX_P_KEY;
+
+	dev->max_qp_resp_rd_atomic_resc = RDMA_RING_PAGE_SIZE /
+					  (RDMA_RESP_RD_ATOMIC_ELM_SIZE * 2);
+	dev->max_qp_req_rd_atomic_resc = RDMA_RING_PAGE_SIZE /
+					 RDMA_REQ_RD_ATOMIC_ELM_SIZE;
+	dev->max_dev_resp_rd_atomic_resc = dev->max_qp_resp_rd_atomic_resc *
+					   p_hwfn->p_rdma_info->num_qps;
+	dev->page_size_caps = QED_RDMA_PAGE_SIZE_CAPS;
+	dev->dev_ack_delay = QED_RDMA_ACK_DELAY;
+	dev->max_pd = RDMA_MAX_PDS;
+	dev->max_ah = p_hwfn->p_rdma_info->num_qps;
+	dev->max_stats_queues = (u8)RESC_NUM(p_hwfn, QED_RDMA_STATS_QUEUE);
+
+	/* Set capablities */
+	dev->dev_caps = 0;
+	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RNR_NAK, 1);
+	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT, 1);
+	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT, 1);
+	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RESIZE_CQ, 1);
+	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_MEMORY_EXT, 1);
+	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_QUEUE_EXT, 1);
+	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ZBVA, 1);
+	SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1);
+
+	/* Check atomic operations support in PCI configuration space. */
+	pci_read_config_dword(cdev->pdev,
+			      cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2,
+			      &pci_status_control);
+
+	if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
+		SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
+}
+
+static void qed_rdma_init_port(struct qed_hwfn *p_hwfn)
+{
+	struct qed_rdma_port *port = p_hwfn->p_rdma_info->port;
+	struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+
+	port->port_state = p_hwfn->mcp_info->link_output.link_up ?
+			   QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN;
+
+	port->max_msg_size = min_t(u64,
+				   (dev->max_mr_mw_fmr_size *
+				    p_hwfn->cdev->rdma_max_sge),
+				   BIT(31));
+
+	port->pkey_bad_counter = 0;
+}
+
+static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 ll2_ethertype_en;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n");
+	p_hwfn->b_rdma_enabled_in_prs = false;
+
+	qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
+
+	p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE;
+
+	/* We delay writing to this reg until first cid is allocated. See
+	 * qed_cxt_dynamic_ilt_alloc function for more details
+	 */
+	ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
+	qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
+	       (ll2_ethertype_en | 0x01));
+
+	if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) {
+		DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n");
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n");
+	return 0;
+}
+
+static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
+			     struct qed_rdma_start_in_params *params,
+			     struct qed_ptt *p_ptt)
+{
+	struct rdma_init_func_ramrod_data *p_ramrod;
+	struct qed_rdma_cnq_params *p_cnq_pbl_list;
+	struct rdma_init_func_hdr *p_params_header;
+	struct rdma_cnq_params *p_cnq_params;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	u32 cnq_id, sb_id;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Starting FW\n");
+
+	/* Save the number of cnqs for the function close ramrod */
+	p_hwfn->p_rdma_info->num_cnqs = params->desired_cnq;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_INIT,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
+
+	p_params_header = &p_ramrod->params_header;
+	p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn,
+							   QED_RDMA_CNQ_RAM);
+	p_params_header->num_cnqs = params->desired_cnq;
+
+	if (params->cq_mode == QED_RDMA_CQ_MODE_16_BITS)
+		p_params_header->cq_ring_mode = 1;
+	else
+		p_params_header->cq_ring_mode = 0;
+
+	for (cnq_id = 0; cnq_id < params->desired_cnq; cnq_id++) {
+		sb_id = qed_rdma_get_sb_id(p_hwfn, cnq_id);
+		p_cnq_params = &p_ramrod->cnq_params[cnq_id];
+		p_cnq_pbl_list = &params->cnq_pbl_list[cnq_id];
+		p_cnq_params->sb_num =
+			cpu_to_le16(p_hwfn->sbs_info[sb_id]->igu_sb_id);
+
+		p_cnq_params->sb_index = p_hwfn->pf_params.rdma_pf_params.gl_pi;
+		p_cnq_params->num_pbl_pages = p_cnq_pbl_list->num_pbl_pages;
+
+		DMA_REGPAIR_LE(p_cnq_params->pbl_base_addr,
+			       p_cnq_pbl_list->pbl_ptr);
+
+		/* we assume here that cnq_id and qz_offset are the same */
+		p_cnq_params->queue_zone_num =
+			cpu_to_le16(p_hwfn->p_rdma_info->queue_zone_base +
+				    cnq_id);
+	}
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn)
+{
+	struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev;
+
+	/* The first DPI is reserved for the Kernel */
+	__set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap);
+
+	/* Tid 0 will be used as the key for "reserved MR".
+	 * The driver should allocate memory for it so it can be loaded but no
+	 * ramrod should be passed on it.
+	 */
+	qed_rdma_alloc_tid(p_hwfn, &dev->reserved_lkey);
+	if (dev->reserved_lkey != RDMA_RESERVED_LKEY) {
+		DP_NOTICE(p_hwfn,
+			  "Reserved lkey should be equal to RDMA_RESERVED_LKEY\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  struct qed_rdma_start_in_params *params)
+{
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA setup\n");
+
+	spin_lock_init(&p_hwfn->p_rdma_info->lock);
+
+	qed_rdma_init_devinfo(p_hwfn, params);
+	qed_rdma_init_port(p_hwfn);
+	qed_rdma_init_events(p_hwfn, params);
+
+	rc = qed_rdma_reserve_lkey(p_hwfn);
+	if (rc)
+		return rc;
+
+	rc = qed_rdma_init_hw(p_hwfn, p_ptt);
+	if (rc)
+		return rc;
+
+	return qed_rdma_start_fw(p_hwfn, params, p_ptt);
+}
+
+int qed_rdma_stop(void *rdma_cxt)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct rdma_close_func_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	struct qed_ptt *p_ptt;
+	u32 ll2_ethertype_en;
+	int rc = -EBUSY;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA stop\n");
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to acquire PTT\n");
+		return rc;
+	}
+
+	/* Disable RoCE search */
+	qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0);
+	p_hwfn->b_rdma_enabled_in_prs = false;
+
+	qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0);
+
+	ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN);
+
+	qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN,
+	       (ll2_ethertype_en & 0xFFFE));
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	/* Stop RoCE */
+	rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_CLOSE,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc)
+		goto out;
+
+	p_ramrod = &p_ent->ramrod.rdma_close_func;
+
+	p_ramrod->num_cnqs = p_hwfn->p_rdma_info->num_cnqs;
+	p_ramrod->cnq_start_offset = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+out:
+	qed_rdma_free(p_hwfn);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA stop done, rc = %d\n", rc);
+	return rc;
+}
+
+int qed_rdma_add_user(void *rdma_cxt,
+		      struct qed_rdma_add_user_out_params *out_params)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	u32 dpi_start_offset;
+	u32 returned_id = 0;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding User\n");
+
+	/* Allocate DPI */
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map,
+				    &returned_id);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+	out_params->dpi = (u16)returned_id;
+
+	/* Calculate the corresponding DPI address */
+	dpi_start_offset = p_hwfn->dpi_start_offset;
+
+	out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells +
+				     dpi_start_offset +
+				     ((out_params->dpi) * p_hwfn->dpi_size));
+
+	out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr +
+				    dpi_start_offset +
+				    ((out_params->dpi) * p_hwfn->dpi_size);
+
+	out_params->dpi_size = p_hwfn->dpi_size;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding user - done, rc = %d\n", rc);
+	return rc;
+}
+
+struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n");
+
+	/* Link may have changed */
+	p_port->port_state = p_hwfn->mcp_info->link_output.link_up ?
+			     QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN;
+
+	p_port->link_speed = p_hwfn->mcp_info->link_output.speed;
+
+	return p_port;
+}
+
+struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query device\n");
+
+	/* Return struct with device parameters */
+	return p_hwfn->p_rdma_info->dev;
+}
+
+void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n");
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	rc = qed_rdma_bmap_alloc_id(p_hwfn,
+				    &p_hwfn->p_rdma_info->tid_map, itid);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+	if (rc)
+		goto out;
+
+	rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid);
+out:
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc);
+	return rc;
+}
+
+void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
+{
+	struct qed_hwfn *p_hwfn;
+	u16 qz_num;
+	u32 addr;
+
+	p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	qz_num = p_hwfn->p_rdma_info->queue_zone_base + qz_offset;
+	addr = GTT_BAR0_MAP_REG_USDM_RAM +
+	       USTORM_COMMON_QUEUE_CONS_OFFSET(qz_num);
+
+	REG_WR16(p_hwfn, addr, prod);
+
+	/* keep prod updates ordered */
+	wmb();
+}
+
+static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
+				  struct qed_dev_rdma_info *info)
+{
+	memset(info, 0, sizeof(*info));
+
+	info->rdma_type = QED_RDMA_TYPE_ROCE;
+
+	qed_fill_dev_info(cdev, &info->common);
+
+	return 0;
+}
+
+static int qed_rdma_get_sb_start(struct qed_dev *cdev)
+{
+	int feat_num;
+
+	if (cdev->num_hwfns > 1)
+		feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE);
+	else
+		feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) *
+			   cdev->num_hwfns;
+
+	return feat_num;
+}
+
+static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev)
+{
+	int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ);
+	int n_msix = cdev->int_params.rdma_msix_cnt;
+
+	return min_t(int, n_cnq, n_msix);
+}
+
+static int qed_rdma_set_int(struct qed_dev *cdev, u16 cnt)
+{
+	int limit = 0;
+
+	/* Mark the fastpath as free/used */
+	cdev->int_params.fp_initialized = cnt ? true : false;
+
+	if (cdev->int_params.out.int_mode != QED_INT_MODE_MSIX) {
+		DP_ERR(cdev,
+		       "qed roce supports only MSI-X interrupts (detected %d).\n",
+		       cdev->int_params.out.int_mode);
+		return -EINVAL;
+	} else if (cdev->int_params.fp_msix_cnt) {
+		limit = cdev->int_params.rdma_msix_cnt;
+	}
+
+	if (!limit)
+		return -ENOMEM;
+
+	return min_t(int, cnt, limit);
+}
+
+static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info)
+{
+	memset(info, 0, sizeof(*info));
+
+	if (!cdev->int_params.fp_initialized) {
+		DP_INFO(cdev,
+			"Protocol driver requested interrupt information, but its support is not yet configured\n");
+		return -EINVAL;
+	}
+
+	if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
+		int msix_base = cdev->int_params.rdma_msix_base;
+
+		info->msix_cnt = cdev->int_params.rdma_msix_cnt;
+		info->msix = &cdev->int_params.msix_table[msix_base];
+
+		DP_VERBOSE(cdev, QED_MSG_RDMA, "msix_cnt = %d msix_base=%d\n",
+			   info->msix_cnt, msix_base);
+	}
+
+	return 0;
+}
+
+int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	u32 returned_id;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD\n");
+
+	/* Allocates an unused protection domain */
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	rc = qed_rdma_bmap_alloc_id(p_hwfn,
+				    &p_hwfn->p_rdma_info->pd_map, &returned_id);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+	*pd = (u16)returned_id;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD - done, rc = %d\n", rc);
+	return rc;
+}
+
+void qed_rdma_free_pd(void *rdma_cxt, u16 pd)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "pd = %08x\n", pd);
+
+	/* Returns a previously allocated protection domain for reuse */
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->pd_map, pd);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+static enum qed_rdma_toggle_bit
+qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid)
+{
+	struct qed_rdma_info *p_info = p_hwfn->p_rdma_info;
+	enum qed_rdma_toggle_bit toggle_bit;
+	u32 bmap_id;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", icid);
+
+	/* the function toggle the bit that is related to a given icid
+	 * and returns the new toggle bit's value
+	 */
+	bmap_id = icid - qed_cxt_get_proto_cid_start(p_hwfn, p_info->proto);
+
+	spin_lock_bh(&p_info->lock);
+	toggle_bit = !test_and_change_bit(bmap_id,
+					  p_info->toggle_bits.bitmap);
+	spin_unlock_bh(&p_info->lock);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QED_RDMA_TOGGLE_BIT_= %d\n",
+		   toggle_bit);
+
+	return toggle_bit;
+}
+
+int qed_rdma_create_cq(void *rdma_cxt,
+		       struct qed_rdma_create_cq_in_params *params, u16 *icid)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct qed_rdma_info *p_info = p_hwfn->p_rdma_info;
+	struct rdma_create_cq_ramrod_data *p_ramrod;
+	enum qed_rdma_toggle_bit toggle_bit;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	u32 returned_id, start_cid;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "cq_handle = %08x%08x\n",
+		   params->cq_handle_hi, params->cq_handle_lo);
+
+	/* Allocate icid */
+	spin_lock_bh(&p_info->lock);
+	rc = qed_rdma_bmap_alloc_id(p_hwfn,
+				    &p_info->cq_map, &returned_id);
+	spin_unlock_bh(&p_info->lock);
+
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Can't create CQ, rc = %d\n", rc);
+		return rc;
+	}
+
+	start_cid = qed_cxt_get_proto_cid_start(p_hwfn,
+						p_info->proto);
+	*icid = returned_id + start_cid;
+
+	/* Check if icid requires a page allocation */
+	rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *icid);
+	if (rc)
+		goto err;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = *icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	/* Send create CQ ramrod */
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 RDMA_RAMROD_CREATE_CQ,
+				 p_info->proto, &init_data);
+	if (rc)
+		goto err;
+
+	p_ramrod = &p_ent->ramrod.rdma_create_cq;
+
+	p_ramrod->cq_handle.hi = cpu_to_le32(params->cq_handle_hi);
+	p_ramrod->cq_handle.lo = cpu_to_le32(params->cq_handle_lo);
+	p_ramrod->dpi = cpu_to_le16(params->dpi);
+	p_ramrod->is_two_level_pbl = params->pbl_two_level;
+	p_ramrod->max_cqes = cpu_to_le32(params->cq_size);
+	DMA_REGPAIR_LE(p_ramrod->pbl_addr, params->pbl_ptr);
+	p_ramrod->pbl_num_pages = cpu_to_le16(params->pbl_num_pages);
+	p_ramrod->cnq_id = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM) +
+			   params->cnq_id;
+	p_ramrod->int_timeout = params->int_timeout;
+
+	/* toggle the bit for every resize or create cq for a given icid */
+	toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid);
+
+	p_ramrod->toggle_bit = toggle_bit;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc) {
+		/* restore toggle bit */
+		qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid);
+		goto err;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Created CQ, rc = %d\n", rc);
+	return rc;
+
+err:
+	/* release allocated icid */
+	qed_bmap_release_id(p_hwfn, &p_info->cq_map, returned_id);
+	DP_NOTICE(p_hwfn, "Create CQ failed, rc = %d\n", rc);
+
+	return rc;
+}
+
+int qed_rdma_resize_cq(void *rdma_cxt,
+		       struct qed_rdma_resize_cq_in_params *in_params,
+		       struct qed_rdma_resize_cq_out_params *out_params)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct rdma_resize_cq_output_params *p_ramrod_res;
+	struct rdma_resize_cq_ramrod_data *p_ramrod;
+	enum qed_rdma_toggle_bit toggle_bit;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	dma_addr_t ramrod_res_phys;
+	u8 fw_return_code;
+	int rc = -ENOMEM;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid);
+
+	p_ramrod_res =
+	    (struct rdma_resize_cq_output_params *)
+	    dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+			       sizeof(struct rdma_resize_cq_output_params),
+			       &ramrod_res_phys, GFP_KERNEL);
+	if (!p_ramrod_res) {
+		DP_NOTICE(p_hwfn,
+			  "qed resize cq failed: cannot allocate memory (ramrod)\n");
+		return rc;
+	}
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = in_params->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 RDMA_RAMROD_RESIZE_CQ,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc)
+		goto err;
+
+	p_ramrod = &p_ent->ramrod.rdma_resize_cq;
+
+	p_ramrod->flags = 0;
+
+	/* toggle the bit for every resize or create cq for a given icid */
+	toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn,
+							  in_params->icid);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, toggle_bit);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL,
+		  in_params->pbl_two_level);
+
+	p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12;
+	p_ramrod->pbl_num_pages = cpu_to_le16(in_params->pbl_num_pages);
+	p_ramrod->max_cqes = cpu_to_le32(in_params->cq_size);
+	DMA_REGPAIR_LE(p_ramrod->pbl_addr, in_params->pbl_ptr);
+	DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys);
+
+	rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
+	if (rc)
+		goto err;
+
+	if (fw_return_code != RDMA_RETURN_OK) {
+		DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code);
+		rc = -EINVAL;
+		goto err;
+	}
+
+	out_params->prod = le32_to_cpu(p_ramrod_res->old_cq_prod);
+	out_params->cons = le32_to_cpu(p_ramrod_res->old_cq_cons);
+
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct rdma_resize_cq_output_params),
+			  p_ramrod_res, ramrod_res_phys);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Resized CQ, rc = %d\n", rc);
+
+	return rc;
+
+err:	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct rdma_resize_cq_output_params),
+			  p_ramrod_res, ramrod_res_phys);
+	DP_NOTICE(p_hwfn, "Resized CQ, Failed - rc = %d\n", rc);
+
+	return rc;
+}
+
+int qed_rdma_destroy_cq(void *rdma_cxt,
+			struct qed_rdma_destroy_cq_in_params *in_params,
+			struct qed_rdma_destroy_cq_out_params *out_params)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct rdma_destroy_cq_output_params *p_ramrod_res;
+	struct rdma_destroy_cq_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	dma_addr_t ramrod_res_phys;
+	int rc = -ENOMEM;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid);
+
+	p_ramrod_res =
+	    (struct rdma_destroy_cq_output_params *)
+	    dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+			       sizeof(struct rdma_destroy_cq_output_params),
+			       &ramrod_res_phys, GFP_KERNEL);
+	if (!p_ramrod_res) {
+		DP_NOTICE(p_hwfn,
+			  "qed destroy cq failed: cannot allocate memory (ramrod)\n");
+		return rc;
+	}
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = in_params->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	/* Send destroy CQ ramrod */
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 RDMA_RAMROD_DESTROY_CQ,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc)
+		goto err;
+
+	p_ramrod = &p_ent->ramrod.rdma_destroy_cq;
+	DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		goto err;
+
+	out_params->num_cq_notif = le16_to_cpu(p_ramrod_res->cnq_num);
+
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct rdma_destroy_cq_output_params),
+			  p_ramrod_res, ramrod_res_phys);
+
+	/* Free icid */
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+	qed_bmap_release_id(p_hwfn,
+			    &p_hwfn->p_rdma_info->cq_map,
+			    (in_params->icid -
+			     qed_cxt_get_proto_cid_start(p_hwfn,
+							 p_hwfn->
+							 p_rdma_info->proto)));
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroyed CQ, rc = %d\n", rc);
+	return rc;
+
+err:	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct rdma_destroy_cq_output_params),
+			  p_ramrod_res, ramrod_res_phys);
+
+	return rc;
+}
+
+static void qed_rdma_set_fw_mac(u16 *p_fw_mac, u8 *p_qed_mac)
+{
+	p_fw_mac[0] = cpu_to_le16((p_qed_mac[0] << 8) + p_qed_mac[1]);
+	p_fw_mac[1] = cpu_to_le16((p_qed_mac[2] << 8) + p_qed_mac[3]);
+	p_fw_mac[2] = cpu_to_le16((p_qed_mac[4] << 8) + p_qed_mac[5]);
+}
+
+static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid,
+			       __le32 *dst_gid)
+{
+	u32 i;
+
+	if (qp->roce_mode == ROCE_V2_IPV4) {
+		/* The IPv4 addresses shall be aligned to the highest word.
+		 * The lower words must be zero.
+		 */
+		memset(src_gid, 0, sizeof(union qed_gid));
+		memset(dst_gid, 0, sizeof(union qed_gid));
+		src_gid[3] = cpu_to_le32(qp->sgid.ipv4_addr);
+		dst_gid[3] = cpu_to_le32(qp->dgid.ipv4_addr);
+	} else {
+		/* GIDs and IPv6 addresses coincide in location and size */
+		for (i = 0; i < ARRAY_SIZE(qp->sgid.dwords); i++) {
+			src_gid[i] = cpu_to_le32(qp->sgid.dwords[i]);
+			dst_gid[i] = cpu_to_le32(qp->dgid.dwords[i]);
+		}
+	}
+}
+
+static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode)
+{
+	enum roce_flavor flavor;
+
+	switch (roce_mode) {
+	case ROCE_V1:
+		flavor = PLAIN_ROCE;
+		break;
+	case ROCE_V2_IPV4:
+		flavor = RROCE_IPV4;
+		break;
+	case ROCE_V2_IPV6:
+		flavor = ROCE_V2_IPV6;
+		break;
+	default:
+		flavor = MAX_ROCE_MODE;
+		break;
+	}
+	return flavor;
+}
+
+int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid)
+{
+	struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info;
+	u32 responder_icid;
+	u32 requester_icid;
+	int rc;
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->cid_map,
+				    &responder_icid);
+	if (rc) {
+		spin_unlock_bh(&p_rdma_info->lock);
+		return rc;
+	}
+
+	rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->cid_map,
+				    &requester_icid);
+
+	spin_unlock_bh(&p_rdma_info->lock);
+	if (rc)
+		goto err;
+
+	/* the two icid's should be adjacent */
+	if ((requester_icid - responder_icid) != 1) {
+		DP_NOTICE(p_hwfn, "Failed to allocate two adjacent qp's'\n");
+		rc = -EINVAL;
+		goto err;
+	}
+
+	responder_icid += qed_cxt_get_proto_cid_start(p_hwfn,
+						      p_rdma_info->proto);
+	requester_icid += qed_cxt_get_proto_cid_start(p_hwfn,
+						      p_rdma_info->proto);
+
+	/* If these icids require a new ILT line allocate DMA-able context for
+	 * an ILT page
+	 */
+	rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, responder_icid);
+	if (rc)
+		goto err;
+
+	rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, requester_icid);
+	if (rc)
+		goto err;
+
+	*cid = (u16)responder_icid;
+	return rc;
+
+err:
+	spin_lock_bh(&p_rdma_info->lock);
+	qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, responder_icid);
+	qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, requester_icid);
+
+	spin_unlock_bh(&p_rdma_info->lock);
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "Allocate CID - failed, rc = %d\n", rc);
+	return rc;
+}
+
+static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
+					struct qed_rdma_qp *qp)
+{
+	struct roce_create_qp_resp_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	union qed_qm_pq_params qm_params;
+	enum roce_flavor roce_flavor;
+	struct qed_spq_entry *p_ent;
+	u16 physical_queue0 = 0;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
+
+	/* Allocate DMA-able memory for IRQ */
+	qp->irq_num_pages = 1;
+	qp->irq = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				     RDMA_RING_PAGE_SIZE,
+				     &qp->irq_phys_addr, GFP_KERNEL);
+	if (!qp->irq) {
+		rc = -ENOMEM;
+		DP_NOTICE(p_hwfn,
+			  "qed create responder failed: cannot allocate memory (irq). rc = %d\n",
+			  rc);
+		return rc;
+	}
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_CREATE_QP,
+				 PROTOCOLID_ROCE, &init_data);
+	if (rc)
+		goto err;
+
+	p_ramrod = &p_ent->ramrod.roce_create_qp_resp;
+
+	p_ramrod->flags = 0;
+
+	roce_flavor = qed_roce_mode_to_flavor(qp->roce_mode);
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR, roce_flavor);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN,
+		  qp->incoming_rdma_read_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_WR_EN,
+		  qp->incoming_rdma_write_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_ATOMIC_EN,
+		  qp->incoming_atomic_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN,
+		  qp->e2e_flow_control_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG, qp->use_srq);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN,
+		  qp->fmr_and_reserved_lkey);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER,
+		  qp->min_rnr_nak_timer);
+
+	p_ramrod->max_ird = qp->max_rd_atomic_resp;
+	p_ramrod->traffic_class = qp->traffic_class_tos;
+	p_ramrod->hop_limit = qp->hop_limit_ttl;
+	p_ramrod->irq_num_pages = qp->irq_num_pages;
+	p_ramrod->p_key = cpu_to_le16(qp->pkey);
+	p_ramrod->flow_label = cpu_to_le32(qp->flow_label);
+	p_ramrod->dst_qp_id = cpu_to_le32(qp->dest_qp);
+	p_ramrod->mtu = cpu_to_le16(qp->mtu);
+	p_ramrod->initial_psn = cpu_to_le32(qp->rq_psn);
+	p_ramrod->pd = cpu_to_le16(qp->pd);
+	p_ramrod->rq_num_pages = cpu_to_le16(qp->rq_num_pages);
+	DMA_REGPAIR_LE(p_ramrod->rq_pbl_addr, qp->rq_pbl_ptr);
+	DMA_REGPAIR_LE(p_ramrod->irq_pbl_addr, qp->irq_phys_addr);
+	qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid);
+	p_ramrod->qp_handle_for_async.hi = cpu_to_le32(qp->qp_handle_async.hi);
+	p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo);
+	p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi);
+	p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
+	p_ramrod->stats_counter_id = p_hwfn->rel_pf_id;
+	p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) |
+				       qp->rq_cq_id);
+
+	memset(&qm_params, 0, sizeof(qm_params));
+	qm_params.roce.qpid = qp->icid >> 1;
+	physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params);
+
+	p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0);
+	p_ramrod->dpi = cpu_to_le16(qp->dpi);
+
+	qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr);
+	qed_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr);
+
+	p_ramrod->udp_src_port = qp->udp_src_port;
+	p_ramrod->vlan_id = cpu_to_le16(qp->vlan_id);
+	p_ramrod->srq_id.srq_idx = cpu_to_le16(qp->srq_id);
+	p_ramrod->srq_id.opaque_fid = cpu_to_le16(p_hwfn->hw_info.opaque_fid);
+
+	p_ramrod->stats_counter_id = RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) +
+				     qp->stats_queue;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d physical_queue0 = 0x%x\n",
+		   rc, physical_queue0);
+
+	if (rc)
+		goto err;
+
+	qp->resp_offloaded = true;
+
+	return rc;
+
+err:
+	DP_NOTICE(p_hwfn, "create responder - failed, rc = %d\n", rc);
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  qp->irq_num_pages * RDMA_RING_PAGE_SIZE,
+			  qp->irq, qp->irq_phys_addr);
+
+	return rc;
+}
+
+static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
+					struct qed_rdma_qp *qp)
+{
+	struct roce_create_qp_req_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	union qed_qm_pq_params qm_params;
+	enum roce_flavor roce_flavor;
+	struct qed_spq_entry *p_ent;
+	u16 physical_queue0 = 0;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
+
+	/* Allocate DMA-able memory for ORQ */
+	qp->orq_num_pages = 1;
+	qp->orq = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				     RDMA_RING_PAGE_SIZE,
+				     &qp->orq_phys_addr, GFP_KERNEL);
+	if (!qp->orq) {
+		rc = -ENOMEM;
+		DP_NOTICE(p_hwfn,
+			  "qed create requester failed: cannot allocate memory (orq). rc = %d\n",
+			  rc);
+		return rc;
+	}
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid + 1;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ROCE_RAMROD_CREATE_QP,
+				 PROTOCOLID_ROCE, &init_data);
+	if (rc)
+		goto err;
+
+	p_ramrod = &p_ent->ramrod.roce_create_qp_req;
+
+	p_ramrod->flags = 0;
+
+	roce_flavor = qed_roce_mode_to_flavor(qp->roce_mode);
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_REQ_RAMROD_DATA_ROCE_FLAVOR, roce_flavor);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_REQ_RAMROD_DATA_FMR_AND_RESERVED_EN,
+		  qp->fmr_and_reserved_lkey);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP, qp->signal_all);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, qp->retry_cnt);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT,
+		  qp->rnr_retry_cnt);
+
+	p_ramrod->max_ord = qp->max_rd_atomic_req;
+	p_ramrod->traffic_class = qp->traffic_class_tos;
+	p_ramrod->hop_limit = qp->hop_limit_ttl;
+	p_ramrod->orq_num_pages = qp->orq_num_pages;
+	p_ramrod->p_key = cpu_to_le16(qp->pkey);
+	p_ramrod->flow_label = cpu_to_le32(qp->flow_label);
+	p_ramrod->dst_qp_id = cpu_to_le32(qp->dest_qp);
+	p_ramrod->ack_timeout_val = cpu_to_le32(qp->ack_timeout);
+	p_ramrod->mtu = cpu_to_le16(qp->mtu);
+	p_ramrod->initial_psn = cpu_to_le32(qp->sq_psn);
+	p_ramrod->pd = cpu_to_le16(qp->pd);
+	p_ramrod->sq_num_pages = cpu_to_le16(qp->sq_num_pages);
+	DMA_REGPAIR_LE(p_ramrod->sq_pbl_addr, qp->sq_pbl_ptr);
+	DMA_REGPAIR_LE(p_ramrod->orq_pbl_addr, qp->orq_phys_addr);
+	qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid);
+	p_ramrod->qp_handle_for_async.hi = cpu_to_le32(qp->qp_handle_async.hi);
+	p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo);
+	p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi);
+	p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
+	p_ramrod->stats_counter_id = p_hwfn->rel_pf_id;
+	p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) |
+				       qp->sq_cq_id);
+
+	memset(&qm_params, 0, sizeof(qm_params));
+	qm_params.roce.qpid = qp->icid >> 1;
+	physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params);
+
+	p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0);
+	p_ramrod->dpi = cpu_to_le16(qp->dpi);
+
+	qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr);
+	qed_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr);
+
+	p_ramrod->udp_src_port = qp->udp_src_port;
+	p_ramrod->vlan_id = cpu_to_le16(qp->vlan_id);
+	p_ramrod->stats_counter_id = RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) +
+				     qp->stats_queue;
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+
+	if (rc)
+		goto err;
+
+	qp->req_offloaded = true;
+
+	return rc;
+
+err:
+	DP_NOTICE(p_hwfn, "Create requested - failed, rc = %d\n", rc);
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  qp->orq_num_pages * RDMA_RING_PAGE_SIZE,
+			  qp->orq, qp->orq_phys_addr);
+	return rc;
+}
+
+static int qed_roce_sp_modify_responder(struct qed_hwfn *p_hwfn,
+					struct qed_rdma_qp *qp,
+					bool move_to_err, u32 modify_flags)
+{
+	struct roce_modify_qp_resp_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
+
+	if (move_to_err && !qp->resp_offloaded)
+		return 0;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ROCE_EVENT_MODIFY_QP,
+				 PROTOCOLID_ROCE, &init_data);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "rc = %d\n", rc);
+		return rc;
+	}
+
+	p_ramrod = &p_ent->ramrod.roce_modify_qp_resp;
+
+	p_ramrod->flags = 0;
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_MOVE_TO_ERR_FLG, move_to_err);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_RD_EN,
+		  qp->incoming_rdma_read_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_WR_EN,
+		  qp->incoming_rdma_write_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_ATOMIC_EN,
+		  qp->incoming_atomic_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN,
+		  qp->e2e_flow_control_en);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG,
+		  GET_FIELD(modify_flags,
+			    QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_P_KEY_FLG,
+		  GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_ADDRESS_VECTOR_FLG,
+		  GET_FIELD(modify_flags,
+			    QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_MAX_IRD_FLG,
+		  GET_FIELD(modify_flags,
+			    QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG,
+		  GET_FIELD(modify_flags,
+			    QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER));
+
+	p_ramrod->fields = 0;
+	SET_FIELD(p_ramrod->fields,
+		  ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER,
+		  qp->min_rnr_nak_timer);
+
+	p_ramrod->max_ird = qp->max_rd_atomic_resp;
+	p_ramrod->traffic_class = qp->traffic_class_tos;
+	p_ramrod->hop_limit = qp->hop_limit_ttl;
+	p_ramrod->p_key = cpu_to_le16(qp->pkey);
+	p_ramrod->flow_label = cpu_to_le32(qp->flow_label);
+	p_ramrod->mtu = cpu_to_le16(qp->mtu);
+	qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid);
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify responder, rc = %d\n", rc);
+	return rc;
+}
+
+static int qed_roce_sp_modify_requester(struct qed_hwfn *p_hwfn,
+					struct qed_rdma_qp *qp,
+					bool move_to_sqd,
+					bool move_to_err, u32 modify_flags)
+{
+	struct roce_modify_qp_req_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
+
+	if (move_to_err && !(qp->req_offloaded))
+		return 0;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid + 1;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ROCE_EVENT_MODIFY_QP,
+				 PROTOCOLID_ROCE, &init_data);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "rc = %d\n", rc);
+		return rc;
+	}
+
+	p_ramrod = &p_ent->ramrod.roce_modify_qp_req;
+
+	p_ramrod->flags = 0;
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG, move_to_err);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_SQD_FLG, move_to_sqd);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_EN_SQD_ASYNC_NOTIFY,
+		  qp->sqd_async);
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_P_KEY_FLG,
+		  GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_ADDRESS_VECTOR_FLG,
+		  GET_FIELD(modify_flags,
+			    QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_MAX_ORD_FLG,
+		  GET_FIELD(modify_flags,
+			    QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_FLG,
+		  GET_FIELD(modify_flags,
+			    QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_FLG,
+		  GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT));
+
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_ACK_TIMEOUT_FLG,
+		  GET_FIELD(modify_flags,
+			    QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT));
+
+	p_ramrod->fields = 0;
+	SET_FIELD(p_ramrod->fields,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, qp->retry_cnt);
+
+	SET_FIELD(p_ramrod->fields,
+		  ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT,
+		  qp->rnr_retry_cnt);
+
+	p_ramrod->max_ord = qp->max_rd_atomic_req;
+	p_ramrod->traffic_class = qp->traffic_class_tos;
+	p_ramrod->hop_limit = qp->hop_limit_ttl;
+	p_ramrod->p_key = cpu_to_le16(qp->pkey);
+	p_ramrod->flow_label = cpu_to_le32(qp->flow_label);
+	p_ramrod->ack_timeout_val = cpu_to_le32(qp->ack_timeout);
+	p_ramrod->mtu = cpu_to_le16(qp->mtu);
+	qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid);
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify requester, rc = %d\n", rc);
+	return rc;
+}
+
+static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn,
+					    struct qed_rdma_qp *qp,
+					    u32 *num_invalidated_mw)
+{
+	struct roce_destroy_qp_resp_output_params *p_ramrod_res;
+	struct roce_destroy_qp_resp_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	dma_addr_t ramrod_res_phys;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
+
+	if (!qp->resp_offloaded)
+		return 0;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ROCE_RAMROD_DESTROY_QP,
+				 PROTOCOLID_ROCE, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.roce_destroy_qp_resp;
+
+	p_ramrod_res = (struct roce_destroy_qp_resp_output_params *)
+	    dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res),
+			       &ramrod_res_phys, GFP_KERNEL);
+
+	if (!p_ramrod_res) {
+		rc = -ENOMEM;
+		DP_NOTICE(p_hwfn,
+			  "qed destroy responder failed: cannot allocate memory (ramrod). rc = %d\n",
+			  rc);
+		return rc;
+	}
+
+	DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		goto err;
+
+	*num_invalidated_mw = le32_to_cpu(p_ramrod_res->num_invalidated_mw);
+
+	/* Free IRQ - only if ramrod succeeded, in case FW is still using it */
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  qp->irq_num_pages * RDMA_RING_PAGE_SIZE,
+			  qp->irq, qp->irq_phys_addr);
+
+	qp->resp_offloaded = false;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroy responder, rc = %d\n", rc);
+
+err:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct roce_destroy_qp_resp_output_params),
+			  p_ramrod_res, ramrod_res_phys);
+
+	return rc;
+}
+
+static int qed_roce_sp_destroy_qp_requester(struct qed_hwfn *p_hwfn,
+					    struct qed_rdma_qp *qp,
+					    u32 *num_bound_mw)
+{
+	struct roce_destroy_qp_req_output_params *p_ramrod_res;
+	struct roce_destroy_qp_req_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	dma_addr_t ramrod_res_phys;
+	int rc = -ENOMEM;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
+
+	if (!qp->req_offloaded)
+		return 0;
+
+	p_ramrod_res = (struct roce_destroy_qp_req_output_params *)
+		       dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					  sizeof(*p_ramrod_res),
+					  &ramrod_res_phys, GFP_KERNEL);
+	if (!p_ramrod_res) {
+		DP_NOTICE(p_hwfn,
+			  "qed destroy requester failed: cannot allocate memory (ramrod)\n");
+		return rc;
+	}
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid + 1;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_DESTROY_QP,
+				 PROTOCOLID_ROCE, &init_data);
+	if (rc)
+		goto err;
+
+	p_ramrod = &p_ent->ramrod.roce_destroy_qp_req;
+	DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		goto err;
+
+	*num_bound_mw = le32_to_cpu(p_ramrod_res->num_bound_mw);
+
+	/* Free ORQ - only if ramrod succeeded, in case FW is still using it */
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  qp->orq_num_pages * RDMA_RING_PAGE_SIZE,
+			  qp->orq, qp->orq_phys_addr);
+
+	qp->req_offloaded = false;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroy requester, rc = %d\n", rc);
+
+err:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res),
+			  p_ramrod_res, ramrod_res_phys);
+
+	return rc;
+}
+
+int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
+		      struct qed_rdma_qp *qp,
+		      struct qed_rdma_query_qp_out_params *out_params)
+{
+	struct roce_query_qp_resp_output_params *p_resp_ramrod_res;
+	struct roce_query_qp_req_output_params *p_req_ramrod_res;
+	struct roce_query_qp_resp_ramrod_data *p_resp_ramrod;
+	struct roce_query_qp_req_ramrod_data *p_req_ramrod;
+	struct qed_sp_init_data init_data;
+	dma_addr_t resp_ramrod_res_phys;
+	dma_addr_t req_ramrod_res_phys;
+	struct qed_spq_entry *p_ent;
+	bool rq_err_state;
+	bool sq_err_state;
+	bool sq_draining;
+	int rc = -ENOMEM;
+
+	if ((!(qp->resp_offloaded)) && (!(qp->req_offloaded))) {
+		/* We can't send ramrod to the fw since this qp wasn't offloaded
+		 * to the fw yet
+		 */
+		out_params->draining = false;
+		out_params->rq_psn = qp->rq_psn;
+		out_params->sq_psn = qp->sq_psn;
+		out_params->state = qp->cur_state;
+
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "No QPs as no offload\n");
+		return 0;
+	}
+
+	if (!(qp->resp_offloaded)) {
+		DP_NOTICE(p_hwfn,
+			  "The responder's qp should be offloded before requester's\n");
+		return -EINVAL;
+	}
+
+	/* Send a query responder ramrod to FW to get RQ-PSN and state */
+	p_resp_ramrod_res = (struct roce_query_qp_resp_output_params *)
+	    dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+			       sizeof(*p_resp_ramrod_res),
+			       &resp_ramrod_res_phys, GFP_KERNEL);
+	if (!p_resp_ramrod_res) {
+		DP_NOTICE(p_hwfn,
+			  "qed query qp failed: cannot allocate memory (ramrod)\n");
+		return rc;
+	}
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qp->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+	rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP,
+				 PROTOCOLID_ROCE, &init_data);
+	if (rc)
+		goto err_resp;
+
+	p_resp_ramrod = &p_ent->ramrod.roce_query_qp_resp;
+	DMA_REGPAIR_LE(p_resp_ramrod->output_params_addr, resp_ramrod_res_phys);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		goto err_resp;
+
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res),
+			  p_resp_ramrod_res, resp_ramrod_res_phys);
+
+	out_params->rq_psn = le32_to_cpu(p_resp_ramrod_res->psn);
+	rq_err_state = GET_FIELD(le32_to_cpu(p_resp_ramrod_res->err_flag),
+				 ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG);
+
+	if (!(qp->req_offloaded)) {
+		/* Don't send query qp for the requester */
+		out_params->sq_psn = qp->sq_psn;
+		out_params->draining = false;
+
+		if (rq_err_state)
+			qp->cur_state = QED_ROCE_QP_STATE_ERR;
+
+		out_params->state = qp->cur_state;
+
+		return 0;
+	}
+
+	/* Send a query requester ramrod to FW to get SQ-PSN and state */
+	p_req_ramrod_res = (struct roce_query_qp_req_output_params *)
+			   dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					      sizeof(*p_req_ramrod_res),
+					      &req_ramrod_res_phys,
+					      GFP_KERNEL);
+	if (!p_req_ramrod_res) {
+		rc = -ENOMEM;
+		DP_NOTICE(p_hwfn,
+			  "qed query qp failed: cannot allocate memory (ramrod)\n");
+		return rc;
+	}
+
+	/* Get SPQ entry */
+	init_data.cid = qp->icid + 1;
+	rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP,
+				 PROTOCOLID_ROCE, &init_data);
+	if (rc)
+		goto err_req;
+
+	p_req_ramrod = &p_ent->ramrod.roce_query_qp_req;
+	DMA_REGPAIR_LE(p_req_ramrod->output_params_addr, req_ramrod_res_phys);
+
+	rc = qed_spq_post(p_hwfn, p_ent, NULL);
+	if (rc)
+		goto err_req;
+
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res),
+			  p_req_ramrod_res, req_ramrod_res_phys);
+
+	out_params->sq_psn = le32_to_cpu(p_req_ramrod_res->psn);
+	sq_err_state = GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags),
+				 ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG);
+	sq_draining =
+		GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags),
+			  ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG);
+
+	out_params->draining = false;
+
+	if (rq_err_state)
+		qp->cur_state = QED_ROCE_QP_STATE_ERR;
+	else if (sq_err_state)
+		qp->cur_state = QED_ROCE_QP_STATE_SQE;
+	else if (sq_draining)
+		out_params->draining = true;
+	out_params->state = qp->cur_state;
+
+	return 0;
+
+err_req:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res),
+			  p_req_ramrod_res, req_ramrod_res_phys);
+	return rc;
+err_resp:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res),
+			  p_resp_ramrod_res, resp_ramrod_res_phys);
+	return rc;
+}
+
+int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp)
+{
+	u32 num_invalidated_mw = 0;
+	u32 num_bound_mw = 0;
+	u32 start_cid;
+	int rc;
+
+	/* Destroys the specified QP */
+	if ((qp->cur_state != QED_ROCE_QP_STATE_RESET) &&
+	    (qp->cur_state != QED_ROCE_QP_STATE_ERR) &&
+	    (qp->cur_state != QED_ROCE_QP_STATE_INIT)) {
+		DP_NOTICE(p_hwfn,
+			  "QP must be in error, reset or init state before destroying it\n");
+		return -EINVAL;
+	}
+
+	rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, &num_invalidated_mw);
+	if (rc)
+		return rc;
+
+	/* Send destroy requester ramrod */
+	rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp, &num_bound_mw);
+	if (rc)
+		return rc;
+
+	if (num_invalidated_mw != num_bound_mw) {
+		DP_NOTICE(p_hwfn,
+			  "number of invalidate memory windows is different from bounded ones\n");
+		return -EINVAL;
+	}
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+	start_cid = qed_cxt_get_proto_cid_start(p_hwfn,
+						p_hwfn->p_rdma_info->proto);
+
+	/* Release responder's icid */
+	qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map,
+			    qp->icid - start_cid);
+
+	/* Release requester's icid */
+	qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map,
+			    qp->icid + 1 - start_cid);
+
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+	return 0;
+}
+
+int qed_rdma_query_qp(void *rdma_cxt,
+		      struct qed_rdma_qp *qp,
+		      struct qed_rdma_query_qp_out_params *out_params)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
+
+	/* The following fields are filled in from qp and not FW as they can't
+	 * be modified by FW
+	 */
+	out_params->mtu = qp->mtu;
+	out_params->dest_qp = qp->dest_qp;
+	out_params->incoming_atomic_en = qp->incoming_atomic_en;
+	out_params->e2e_flow_control_en = qp->e2e_flow_control_en;
+	out_params->incoming_rdma_read_en = qp->incoming_rdma_read_en;
+	out_params->incoming_rdma_write_en = qp->incoming_rdma_write_en;
+	out_params->dgid = qp->dgid;
+	out_params->flow_label = qp->flow_label;
+	out_params->hop_limit_ttl = qp->hop_limit_ttl;
+	out_params->traffic_class_tos = qp->traffic_class_tos;
+	out_params->timeout = qp->ack_timeout;
+	out_params->rnr_retry = qp->rnr_retry_cnt;
+	out_params->retry_cnt = qp->retry_cnt;
+	out_params->min_rnr_nak_timer = qp->min_rnr_nak_timer;
+	out_params->pkey_index = 0;
+	out_params->max_rd_atomic = qp->max_rd_atomic_req;
+	out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp;
+	out_params->sqd_async = qp->sqd_async;
+
+	rc = qed_roce_query_qp(p_hwfn, qp, out_params);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc);
+	return rc;
+}
+
+int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	int rc = 0;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
+
+	rc = qed_roce_destroy_qp(p_hwfn, qp);
+
+	/* free qp params struct */
+	kfree(qp);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP destroyed\n");
+	return rc;
+}
+
+struct qed_rdma_qp *
+qed_rdma_create_qp(void *rdma_cxt,
+		   struct qed_rdma_create_qp_in_params *in_params,
+		   struct qed_rdma_create_qp_out_params *out_params)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct qed_rdma_qp *qp;
+	u8 max_stats_queues;
+	int rc;
+
+	if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info) {
+		DP_ERR(p_hwfn->cdev,
+		       "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n",
+		       rdma_cxt, in_params, out_params);
+		return NULL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "qed rdma create qp called with qp_handle = %08x%08x\n",
+		   in_params->qp_handle_hi, in_params->qp_handle_lo);
+
+	/* Some sanity checks... */
+	max_stats_queues = p_hwfn->p_rdma_info->dev->max_stats_queues;
+	if (in_params->stats_queue >= max_stats_queues) {
+		DP_ERR(p_hwfn->cdev,
+		       "qed rdma create qp failed due to invalid statistics queue %d. maximum is %d\n",
+		       in_params->stats_queue, max_stats_queues);
+		return NULL;
+	}
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp) {
+		DP_NOTICE(p_hwfn, "Failed to allocate qed_rdma_qp\n");
+		return NULL;
+	}
+
+	rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
+	qp->qpid = ((0xFF << 16) | qp->icid);
+
+	DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid);
+
+	if (rc) {
+		kfree(qp);
+		return NULL;
+	}
+
+	qp->cur_state = QED_ROCE_QP_STATE_RESET;
+	qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi);
+	qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo);
+	qp->qp_handle_async.hi = cpu_to_le32(in_params->qp_handle_async_hi);
+	qp->qp_handle_async.lo = cpu_to_le32(in_params->qp_handle_async_lo);
+	qp->use_srq = in_params->use_srq;
+	qp->signal_all = in_params->signal_all;
+	qp->fmr_and_reserved_lkey = in_params->fmr_and_reserved_lkey;
+	qp->pd = in_params->pd;
+	qp->dpi = in_params->dpi;
+	qp->sq_cq_id = in_params->sq_cq_id;
+	qp->sq_num_pages = in_params->sq_num_pages;
+	qp->sq_pbl_ptr = in_params->sq_pbl_ptr;
+	qp->rq_cq_id = in_params->rq_cq_id;
+	qp->rq_num_pages = in_params->rq_num_pages;
+	qp->rq_pbl_ptr = in_params->rq_pbl_ptr;
+	qp->srq_id = in_params->srq_id;
+	qp->req_offloaded = false;
+	qp->resp_offloaded = false;
+	qp->e2e_flow_control_en = qp->use_srq ? false : true;
+	qp->stats_queue = in_params->stats_queue;
+
+	out_params->icid = qp->icid;
+	out_params->qp_id = qp->qpid;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Create QP, rc = %d\n", rc);
+	return qp;
+}
+
+static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn,
+			      struct qed_rdma_qp *qp,
+			      enum qed_roce_qp_state prev_state,
+			      struct qed_rdma_modify_qp_in_params *params)
+{
+	u32 num_invalidated_mw = 0, num_bound_mw = 0;
+	int rc = 0;
+
+	/* Perform additional operations according to the current state and the
+	 * next state
+	 */
+	if (((prev_state == QED_ROCE_QP_STATE_INIT) ||
+	     (prev_state == QED_ROCE_QP_STATE_RESET)) &&
+	    (qp->cur_state == QED_ROCE_QP_STATE_RTR)) {
+		/* Init->RTR or Reset->RTR */
+		rc = qed_roce_sp_create_responder(p_hwfn, qp);
+		return rc;
+	} else if ((prev_state == QED_ROCE_QP_STATE_RTR) &&
+		   (qp->cur_state == QED_ROCE_QP_STATE_RTS)) {
+		/* RTR-> RTS */
+		rc = qed_roce_sp_create_requester(p_hwfn, qp);
+		if (rc)
+			return rc;
+
+		/* Send modify responder ramrod */
+		rc = qed_roce_sp_modify_responder(p_hwfn, qp, false,
+						  params->modify_flags);
+		return rc;
+	} else if ((prev_state == QED_ROCE_QP_STATE_RTS) &&
+		   (qp->cur_state == QED_ROCE_QP_STATE_RTS)) {
+		/* RTS->RTS */
+		rc = qed_roce_sp_modify_responder(p_hwfn, qp, false,
+						  params->modify_flags);
+		if (rc)
+			return rc;
+
+		rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false,
+						  params->modify_flags);
+		return rc;
+	} else if ((prev_state == QED_ROCE_QP_STATE_RTS) &&
+		   (qp->cur_state == QED_ROCE_QP_STATE_SQD)) {
+		/* RTS->SQD */
+		rc = qed_roce_sp_modify_requester(p_hwfn, qp, true, false,
+						  params->modify_flags);
+		return rc;
+	} else if ((prev_state == QED_ROCE_QP_STATE_SQD) &&
+		   (qp->cur_state == QED_ROCE_QP_STATE_SQD)) {
+		/* SQD->SQD */
+		rc = qed_roce_sp_modify_responder(p_hwfn, qp, false,
+						  params->modify_flags);
+		if (rc)
+			return rc;
+
+		rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false,
+						  params->modify_flags);
+		return rc;
+	} else if ((prev_state == QED_ROCE_QP_STATE_SQD) &&
+		   (qp->cur_state == QED_ROCE_QP_STATE_RTS)) {
+		/* SQD->RTS */
+		rc = qed_roce_sp_modify_responder(p_hwfn, qp, false,
+						  params->modify_flags);
+		if (rc)
+			return rc;
+
+		rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false,
+						  params->modify_flags);
+
+		return rc;
+	} else if (qp->cur_state == QED_ROCE_QP_STATE_ERR ||
+		   qp->cur_state == QED_ROCE_QP_STATE_SQE) {
+		/* ->ERR */
+		rc = qed_roce_sp_modify_responder(p_hwfn, qp, true,
+						  params->modify_flags);
+		if (rc)
+			return rc;
+
+		rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, true,
+						  params->modify_flags);
+		return rc;
+	} else if (qp->cur_state == QED_ROCE_QP_STATE_RESET) {
+		/* Any state -> RESET */
+
+		rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp,
+						      &num_invalidated_mw);
+		if (rc)
+			return rc;
+
+		rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp,
+						      &num_bound_mw);
+
+		if (num_invalidated_mw != num_bound_mw) {
+			DP_NOTICE(p_hwfn,
+				  "number of invalidate memory windows is different from bounded ones\n");
+			return -EINVAL;
+		}
+	} else {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "0\n");
+	}
+
+	return rc;
+}
+
+int qed_rdma_modify_qp(void *rdma_cxt,
+		       struct qed_rdma_qp *qp,
+		       struct qed_rdma_modify_qp_in_params *params)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	enum qed_roce_qp_state prev_state;
+	int rc = 0;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x params->new_state=%d\n",
+		   qp->icid, params->new_state);
+
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+		return rc;
+	}
+
+	if (GET_FIELD(params->modify_flags,
+		      QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)) {
+		qp->incoming_rdma_read_en = params->incoming_rdma_read_en;
+		qp->incoming_rdma_write_en = params->incoming_rdma_write_en;
+		qp->incoming_atomic_en = params->incoming_atomic_en;
+	}
+
+	/* Update QP structure with the updated values */
+	if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE))
+		qp->roce_mode = params->roce_mode;
+	if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY))
+		qp->pkey = params->pkey;
+	if (GET_FIELD(params->modify_flags,
+		      QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN))
+		qp->e2e_flow_control_en = params->e2e_flow_control_en;
+	if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_DEST_QP))
+		qp->dest_qp = params->dest_qp;
+	if (GET_FIELD(params->modify_flags,
+		      QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)) {
+		/* Indicates that the following parameters have changed:
+		 * Traffic class, flow label, hop limit, source GID,
+		 * destination GID, loopback indicator
+		 */
+		qp->traffic_class_tos = params->traffic_class_tos;
+		qp->flow_label = params->flow_label;
+		qp->hop_limit_ttl = params->hop_limit_ttl;
+
+		qp->sgid = params->sgid;
+		qp->dgid = params->dgid;
+		qp->udp_src_port = 0;
+		qp->vlan_id = params->vlan_id;
+		qp->mtu = params->mtu;
+		qp->lb_indication = params->lb_indication;
+		memcpy((u8 *)&qp->remote_mac_addr[0],
+		       (u8 *)&params->remote_mac_addr[0], ETH_ALEN);
+		if (params->use_local_mac) {
+			memcpy((u8 *)&qp->local_mac_addr[0],
+			       (u8 *)&params->local_mac_addr[0], ETH_ALEN);
+		} else {
+			memcpy((u8 *)&qp->local_mac_addr[0],
+			       (u8 *)&p_hwfn->hw_info.hw_mac_addr, ETH_ALEN);
+		}
+	}
+	if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_RQ_PSN))
+		qp->rq_psn = params->rq_psn;
+	if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_SQ_PSN))
+		qp->sq_psn = params->sq_psn;
+	if (GET_FIELD(params->modify_flags,
+		      QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ))
+		qp->max_rd_atomic_req = params->max_rd_atomic_req;
+	if (GET_FIELD(params->modify_flags,
+		      QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP))
+		qp->max_rd_atomic_resp = params->max_rd_atomic_resp;
+	if (GET_FIELD(params->modify_flags,
+		      QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT))
+		qp->ack_timeout = params->ack_timeout;
+	if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT))
+		qp->retry_cnt = params->retry_cnt;
+	if (GET_FIELD(params->modify_flags,
+		      QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT))
+		qp->rnr_retry_cnt = params->rnr_retry_cnt;
+	if (GET_FIELD(params->modify_flags,
+		      QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER))
+		qp->min_rnr_nak_timer = params->min_rnr_nak_timer;
+
+	qp->sqd_async = params->sqd_async;
+
+	prev_state = qp->cur_state;
+	if (GET_FIELD(params->modify_flags,
+		      QED_RDMA_MODIFY_QP_VALID_NEW_STATE)) {
+		qp->cur_state = params->new_state;
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "qp->cur_state=%d\n",
+			   qp->cur_state);
+	}
+
+	rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params);
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc);
+	return rc;
+}
+
+int qed_rdma_register_tid(void *rdma_cxt,
+			  struct qed_rdma_register_tid_in_params *params)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct rdma_register_tid_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	enum rdma_tid_type tid_type;
+	u8 fw_return_code;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", params->itid);
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_REGISTER_MR,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+		return rc;
+	}
+
+	if (p_hwfn->p_rdma_info->last_tid < params->itid)
+		p_hwfn->p_rdma_info->last_tid = params->itid;
+
+	p_ramrod = &p_ent->ramrod.rdma_register_tid;
+
+	p_ramrod->flags = 0;
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL,
+		  params->pbl_two_level);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED, params->zbva);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr);
+
+	/* Don't initialize D/C field, as it may override other bits. */
+	if (!(params->tid_type == QED_RDMA_TID_FMR) && !(params->dma_mr))
+		SET_FIELD(p_ramrod->flags,
+			  RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG,
+			  params->page_size_log - 12);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID,
+		  p_hwfn->p_rdma_info->last_tid);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ,
+		  params->remote_read);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE,
+		  params->remote_write);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC,
+		  params->remote_atomic);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE,
+		  params->local_write);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ, params->local_read);
+
+	SET_FIELD(p_ramrod->flags,
+		  RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND,
+		  params->mw_bind);
+
+	SET_FIELD(p_ramrod->flags1,
+		  RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG,
+		  params->pbl_page_size_log - 12);
+
+	SET_FIELD(p_ramrod->flags2,
+		  RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR, params->dma_mr);
+
+	switch (params->tid_type) {
+	case QED_RDMA_TID_REGISTERED_MR:
+		tid_type = RDMA_TID_REGISTERED_MR;
+		break;
+	case QED_RDMA_TID_FMR:
+		tid_type = RDMA_TID_FMR;
+		break;
+	case QED_RDMA_TID_MW_TYPE1:
+		tid_type = RDMA_TID_MW_TYPE1;
+		break;
+	case QED_RDMA_TID_MW_TYPE2A:
+		tid_type = RDMA_TID_MW_TYPE2A;
+		break;
+	default:
+		rc = -EINVAL;
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+		return rc;
+	}
+	SET_FIELD(p_ramrod->flags1,
+		  RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE, tid_type);
+
+	p_ramrod->itid = cpu_to_le32(params->itid);
+	p_ramrod->key = params->key;
+	p_ramrod->pd = cpu_to_le16(params->pd);
+	p_ramrod->length_hi = (u8)(params->length >> 32);
+	p_ramrod->length_lo = DMA_LO_LE(params->length);
+	if (params->zbva) {
+		/* Lower 32 bits of the registered MR address.
+		 * In case of zero based MR, will hold FBO
+		 */
+		p_ramrod->va.hi = 0;
+		p_ramrod->va.lo = cpu_to_le32(params->fbo);
+	} else {
+		DMA_REGPAIR_LE(p_ramrod->va, params->vaddr);
+	}
+	DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr);
+
+	/* DIF */
+	if (params->dif_enabled) {
+		SET_FIELD(p_ramrod->flags2,
+			  RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG, 1);
+		DMA_REGPAIR_LE(p_ramrod->dif_error_addr,
+			       params->dif_error_addr);
+		DMA_REGPAIR_LE(p_ramrod->dif_runt_addr, params->dif_runt_addr);
+	}
+
+	rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
+
+	if (fw_return_code != RDMA_RETURN_OK) {
+		DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code);
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Register TID, rc = %d\n", rc);
+	return rc;
+}
+
+int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct rdma_deregister_tid_ramrod_data *p_ramrod;
+	struct qed_sp_init_data init_data;
+	struct qed_spq_entry *p_ent;
+	struct qed_ptt *p_ptt;
+	u8 fw_return_code;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid);
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_DEREGISTER_MR,
+				 p_hwfn->p_rdma_info->proto, &init_data);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+		return rc;
+	}
+
+	p_ramrod = &p_ent->ramrod.rdma_deregister_tid;
+	p_ramrod->itid = cpu_to_le32(itid);
+
+	rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+		return rc;
+	}
+
+	if (fw_return_code == RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR) {
+		DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code);
+		return -EINVAL;
+	} else if (fw_return_code == RDMA_RETURN_NIG_DRAIN_REQ) {
+		/* Bit indicating that the TID is in use and a nig drain is
+		 * required before sending the ramrod again
+		 */
+		p_ptt = qed_ptt_acquire(p_hwfn);
+		if (!p_ptt) {
+			rc = -EBUSY;
+			DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+				   "Failed to acquire PTT\n");
+			return rc;
+		}
+
+		rc = qed_mcp_drain(p_hwfn, p_ptt);
+		if (rc) {
+			qed_ptt_release(p_hwfn, p_ptt);
+			DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+				   "Drain failed\n");
+			return rc;
+		}
+
+		qed_ptt_release(p_hwfn, p_ptt);
+
+		/* Resend the ramrod */
+		rc = qed_sp_init_request(p_hwfn, &p_ent,
+					 RDMA_RAMROD_DEREGISTER_MR,
+					 p_hwfn->p_rdma_info->proto,
+					 &init_data);
+		if (rc) {
+			DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+				   "Failed to init sp-element\n");
+			return rc;
+		}
+
+		rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
+		if (rc) {
+			DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+				   "Ramrod failed\n");
+			return rc;
+		}
+
+		if (fw_return_code != RDMA_RETURN_OK) {
+			DP_NOTICE(p_hwfn, "fw_return_code = %d\n",
+				  fw_return_code);
+			return rc;
+		}
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "De-registered TID, rc = %d\n", rc);
+	return rc;
+}
+
+static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
+{
+	return QED_LEADING_HWFN(cdev);
+}
+
+static void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	u32 val;
+
+	val = (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm) ? 0 : 1;
+
+	qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPM_ENABLE, val);
+	DP_VERBOSE(p_hwfn, (QED_MSG_DCB | QED_MSG_RDMA),
+		   "Changing DPM_EN state to %d (DCBX=%d, DB_BAR=%d)\n",
+		   val, p_hwfn->dcbx_no_edpm, p_hwfn->db_bar_no_edpm);
+}
+
+void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	p_hwfn->db_bar_no_edpm = true;
+
+	qed_rdma_dpm_conf(p_hwfn, p_ptt);
+}
+
+int qed_rdma_start(void *rdma_cxt, struct qed_rdma_start_in_params *params)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	struct qed_ptt *p_ptt;
+	int rc = -EBUSY;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "desired_cnq = %08x\n", params->desired_cnq);
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		goto err;
+
+	rc = qed_rdma_alloc(p_hwfn, p_ptt, params);
+	if (rc)
+		goto err1;
+
+	rc = qed_rdma_setup(p_hwfn, p_ptt, params);
+	if (rc)
+		goto err2;
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+
+err2:
+	qed_rdma_free(p_hwfn);
+err1:
+	qed_ptt_release(p_hwfn, p_ptt);
+err:
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA start - error, rc = %d\n", rc);
+	return rc;
+}
+
+static int qed_rdma_init(struct qed_dev *cdev,
+			 struct qed_rdma_start_in_params *params)
+{
+	return qed_rdma_start(QED_LEADING_HWFN(cdev), params);
+}
+
+void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "dpi = %08x\n", dpi);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, dpi);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn,
+				     u8 connection_handle,
+				     void *cookie,
+				     dma_addr_t first_frag_addr,
+				     bool b_last_fragment, bool b_last_packet)
+{
+	struct qed_roce_ll2_packet *packet = cookie;
+	struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2;
+
+	roce_ll2->cbs.tx_cb(roce_ll2->cb_cookie, packet);
+}
+
+void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn,
+				    u8 connection_handle,
+				    void *cookie,
+				    dma_addr_t first_frag_addr,
+				    bool b_last_fragment, bool b_last_packet)
+{
+	qed_ll2b_complete_tx_gsi_packet(p_hwfn, connection_handle,
+					cookie, first_frag_addr,
+					b_last_fragment, b_last_packet);
+}
+
+void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn,
+				     u8 connection_handle,
+				     void *cookie,
+				     dma_addr_t rx_buf_addr,
+				     u16 data_length,
+				     u8 data_length_error,
+				     u16 parse_flags,
+				     u16 vlan,
+				     u32 src_mac_addr_hi,
+				     u16 src_mac_addr_lo, bool b_last_packet)
+{
+	struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2;
+	struct qed_roce_ll2_rx_params params;
+	struct qed_dev *cdev = p_hwfn->cdev;
+	struct qed_roce_ll2_packet pkt;
+
+	DP_VERBOSE(cdev,
+		   QED_MSG_LL2,
+		   "roce ll2 rx complete: bus_addr=%p, len=%d, data_len_err=%d\n",
+		   (void *)(uintptr_t)rx_buf_addr,
+		   data_length, data_length_error);
+
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.n_seg = 1;
+	pkt.payload[0].baddr = rx_buf_addr;
+	pkt.payload[0].len = data_length;
+
+	memset(&params, 0, sizeof(params));
+	params.vlan_id = vlan;
+	*((u32 *)&params.smac[0]) = ntohl(src_mac_addr_hi);
+	*((u16 *)&params.smac[4]) = ntohs(src_mac_addr_lo);
+
+	if (data_length_error) {
+		DP_ERR(cdev,
+		       "roce ll2 rx complete: data length error %d, length=%d\n",
+		       data_length_error, data_length);
+		params.rc = -EINVAL;
+	}
+
+	roce_ll2->cbs.rx_cb(roce_ll2->cb_cookie, &pkt, &params);
+}
+
+static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
+				       u8 *old_mac_address,
+				       u8 *new_mac_address)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	if (!hwfn->ll2 || hwfn->ll2->handle == QED_LL2_UNUSED_HANDLE) {
+		DP_ERR(cdev,
+		       "qed roce mac filter failed - roce_info/ll2 NULL\n");
+		return -EINVAL;
+	}
+
+	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+	if (!p_ptt) {
+		DP_ERR(cdev,
+		       "qed roce ll2 mac filter set: failed to acquire PTT\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&hwfn->ll2->lock);
+	if (old_mac_address)
+		qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
+					  old_mac_address);
+	if (new_mac_address)
+		rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
+					    new_mac_address);
+	mutex_unlock(&hwfn->ll2->lock);
+
+	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+
+	if (rc)
+		DP_ERR(cdev,
+		       "qed roce ll2 mac filter set: failed to add mac filter\n");
+
+	return rc;
+}
+
+static int qed_roce_ll2_start(struct qed_dev *cdev,
+			      struct qed_roce_ll2_params *params)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_roce_ll2_info *roce_ll2;
+	struct qed_ll2_info ll2_params;
+	int rc;
+
+	if (!params) {
+		DP_ERR(cdev, "qed roce ll2 start: failed due to NULL params\n");
+		return -EINVAL;
+	}
+	if (!params->cbs.tx_cb || !params->cbs.rx_cb) {
+		DP_ERR(cdev,
+		       "qed roce ll2 start: failed due to NULL tx/rx. tx_cb=%p, rx_cb=%p\n",
+		       params->cbs.tx_cb, params->cbs.rx_cb);
+		return -EINVAL;
+	}
+	if (!is_valid_ether_addr(params->mac_address)) {
+		DP_ERR(cdev,
+		       "qed roce ll2 start: failed due to invalid Ethernet address %pM\n",
+		       params->mac_address);
+		return -EINVAL;
+	}
+
+	/* Initialize */
+	roce_ll2 = kzalloc(sizeof(*roce_ll2), GFP_ATOMIC);
+	if (!roce_ll2) {
+		DP_ERR(cdev, "qed roce ll2 start: failed memory allocation\n");
+		return -ENOMEM;
+	}
+	memset(roce_ll2, 0, sizeof(*roce_ll2));
+	roce_ll2->handle = QED_LL2_UNUSED_HANDLE;
+	roce_ll2->cbs = params->cbs;
+	roce_ll2->cb_cookie = params->cb_cookie;
+	mutex_init(&roce_ll2->lock);
+
+	memset(&ll2_params, 0, sizeof(ll2_params));
+	ll2_params.conn_type = QED_LL2_TYPE_ROCE;
+	ll2_params.mtu = params->mtu;
+	ll2_params.rx_drop_ttl0_flg = true;
+	ll2_params.rx_vlan_removal_en = false;
+	ll2_params.tx_dest = CORE_TX_DEST_NW;
+	ll2_params.ai_err_packet_too_big = LL2_DROP_PACKET;
+	ll2_params.ai_err_no_buf = LL2_DROP_PACKET;
+	ll2_params.gsi_enable = true;
+
+	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_params,
+					params->max_rx_buffers,
+					params->max_tx_buffers,
+					&roce_ll2->handle);
+	if (rc) {
+		DP_ERR(cdev,
+		       "qed roce ll2 start: failed to acquire LL2 connection (rc=%d)\n",
+		       rc);
+		goto err;
+	}
+
+	rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev),
+					  roce_ll2->handle);
+	if (rc) {
+		DP_ERR(cdev,
+		       "qed roce ll2 start: failed to establish LL2 connection (rc=%d)\n",
+		       rc);
+		goto err1;
+	}
+
+	hwfn->ll2 = roce_ll2;
+
+	rc = qed_roce_ll2_set_mac_filter(cdev, NULL, params->mac_address);
+	if (rc) {
+		hwfn->ll2 = NULL;
+		goto err2;
+	}
+	ether_addr_copy(roce_ll2->mac_address, params->mac_address);
+
+	return 0;
+
+err2:
+	qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle);
+err1:
+	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle);
+err:
+	kfree(roce_ll2);
+	return rc;
+}
+
+static int qed_roce_ll2_stop(struct qed_dev *cdev)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
+	int rc;
+
+	if (!cdev) {
+		DP_ERR(cdev, "qed roce ll2 stop: invalid cdev\n");
+		return -EINVAL;
+	}
+
+	if (roce_ll2->handle == QED_LL2_UNUSED_HANDLE) {
+		DP_ERR(cdev, "qed roce ll2 stop: cannot stop an unused LL2\n");
+		return -EINVAL;
+	}
+
+	/* remove LL2 MAC address filter */
+	rc = qed_roce_ll2_set_mac_filter(cdev, roce_ll2->mac_address, NULL);
+	eth_zero_addr(roce_ll2->mac_address);
+
+	rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
+					  roce_ll2->handle);
+	if (rc)
+		DP_ERR(cdev,
+		       "qed roce ll2 stop: failed to terminate LL2 connection (rc=%d)\n",
+		       rc);
+
+	qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle);
+
+	roce_ll2->handle = QED_LL2_UNUSED_HANDLE;
+
+	kfree(roce_ll2);
+
+	return rc;
+}
+
+static int qed_roce_ll2_tx(struct qed_dev *cdev,
+			   struct qed_roce_ll2_packet *pkt,
+			   struct qed_roce_ll2_tx_params *params)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
+	enum qed_ll2_roce_flavor_type qed_roce_flavor;
+	u8 flags = 0;
+	int rc;
+	int i;
+
+	if (!cdev || !pkt || !params) {
+		DP_ERR(cdev,
+		       "roce ll2 tx: failed tx because one of the following is NULL - drv=%p, pkt=%p, params=%p\n",
+		       cdev, pkt, params);
+		return -EINVAL;
+	}
+
+	qed_roce_flavor = (pkt->roce_mode == ROCE_V1) ? QED_LL2_ROCE
+						      : QED_LL2_RROCE;
+
+	if (pkt->roce_mode == ROCE_V2_IPV4)
+		flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT);
+
+	/* Tx header */
+	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle,
+				       1 + pkt->n_seg, 0, flags, 0,
+				       qed_roce_flavor, pkt->header.baddr,
+				       pkt->header.len, pkt, 1);
+	if (rc) {
+		DP_ERR(cdev, "roce ll2 tx: header failed (rc=%d)\n", rc);
+		return QED_ROCE_TX_HEAD_FAILURE;
+	}
+
+	/* Tx payload */
+	for (i = 0; i < pkt->n_seg; i++) {
+		rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev),
+						       roce_ll2->handle,
+						       pkt->payload[i].baddr,
+						       pkt->payload[i].len);
+		if (rc) {
+			/* If failed not much to do here, partial packet has
+			 * been posted * we can't free memory, will need to wait
+			 * for completion
+			 */
+			DP_ERR(cdev,
+			       "roce ll2 tx: payload failed (rc=%d)\n", rc);
+			return QED_ROCE_TX_FRAG_FAILURE;
+		}
+	}
+
+	return 0;
+}
+
+static int qed_roce_ll2_post_rx_buffer(struct qed_dev *cdev,
+				       struct qed_roce_ll2_buffer *buf,
+				       u64 cookie, u8 notify_fw)
+{
+	return qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev),
+				      QED_LEADING_HWFN(cdev)->ll2->handle,
+				      buf->baddr, buf->len,
+				      (void *)(uintptr_t)cookie, notify_fw);
+}
+
+static int qed_roce_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
+
+	return qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
+				 roce_ll2->handle, stats);
+}
+
+static const struct qed_rdma_ops qed_rdma_ops_pass = {
+	.common = &qed_common_ops_pass,
+	.fill_dev_info = &qed_fill_rdma_dev_info,
+	.rdma_get_rdma_ctx = &qed_rdma_get_rdma_ctx,
+	.rdma_init = &qed_rdma_init,
+	.rdma_add_user = &qed_rdma_add_user,
+	.rdma_remove_user = &qed_rdma_remove_user,
+	.rdma_stop = &qed_rdma_stop,
+	.rdma_query_port = &qed_rdma_query_port,
+	.rdma_query_device = &qed_rdma_query_device,
+	.rdma_get_start_sb = &qed_rdma_get_sb_start,
+	.rdma_get_rdma_int = &qed_rdma_get_int,
+	.rdma_set_rdma_int = &qed_rdma_set_int,
+	.rdma_get_min_cnq_msix = &qed_rdma_get_min_cnq_msix,
+	.rdma_cnq_prod_update = &qed_rdma_cnq_prod_update,
+	.rdma_alloc_pd = &qed_rdma_alloc_pd,
+	.rdma_dealloc_pd = &qed_rdma_free_pd,
+	.rdma_create_cq = &qed_rdma_create_cq,
+	.rdma_destroy_cq = &qed_rdma_destroy_cq,
+	.rdma_create_qp = &qed_rdma_create_qp,
+	.rdma_modify_qp = &qed_rdma_modify_qp,
+	.rdma_query_qp = &qed_rdma_query_qp,
+	.rdma_destroy_qp = &qed_rdma_destroy_qp,
+	.rdma_alloc_tid = &qed_rdma_alloc_tid,
+	.rdma_free_tid = &qed_rdma_free_tid,
+	.rdma_register_tid = &qed_rdma_register_tid,
+	.rdma_deregister_tid = &qed_rdma_deregister_tid,
+	.roce_ll2_start = &qed_roce_ll2_start,
+	.roce_ll2_stop = &qed_roce_ll2_stop,
+	.roce_ll2_tx = &qed_roce_ll2_tx,
+	.roce_ll2_post_rx_buffer = &qed_roce_ll2_post_rx_buffer,
+	.roce_ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
+	.roce_ll2_stats = &qed_roce_ll2_stats,
+};
+
+const struct qed_rdma_ops *qed_get_rdma_ops()
+{
+	return &qed_rdma_ops_pass;
+}
+EXPORT_SYMBOL(qed_get_rdma_ops);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h
new file mode 100644
index 0000000..2f091e8
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h
@@ -0,0 +1,216 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _QED_ROCE_H
+#define _QED_ROCE_H
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qed_roce_if.h>
+#include "qed.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_ll2.h"
+
+#define QED_RDMA_MAX_FMR                    (RDMA_MAX_TIDS)
+#define QED_RDMA_MAX_P_KEY                  (1)
+#define QED_RDMA_MAX_WQE                    (0x7FFF)
+#define QED_RDMA_MAX_SRQ_WQE_ELEM           (0x7FFF)
+#define QED_RDMA_PAGE_SIZE_CAPS             (0xFFFFF000)
+#define QED_RDMA_ACK_DELAY                  (15)
+#define QED_RDMA_MAX_MR_SIZE                (0x10000000000ULL)
+#define QED_RDMA_MAX_CQS                    (RDMA_MAX_CQS)
+#define QED_RDMA_MAX_MRS                    (RDMA_MAX_TIDS)
+/* Add 1 for header element */
+#define QED_RDMA_MAX_SRQ_ELEM_PER_WQE	    (RDMA_MAX_SGE_PER_RQ_WQE + 1)
+#define QED_RDMA_MAX_SGE_PER_SRQ_WQE        (RDMA_MAX_SGE_PER_RQ_WQE)
+#define QED_RDMA_SRQ_WQE_ELEM_SIZE          (16)
+#define QED_RDMA_MAX_SRQS                   (32 * 1024)
+
+#define QED_RDMA_MAX_CQE_32_BIT             (0x7FFFFFFF - 1)
+#define QED_RDMA_MAX_CQE_16_BIT             (0x7FFF - 1)
+
+enum qed_rdma_toggle_bit {
+	QED_RDMA_TOGGLE_BIT_CLEAR = 0,
+	QED_RDMA_TOGGLE_BIT_SET = 1
+};
+
+struct qed_bmap {
+	unsigned long *bitmap;
+	u32 max_count;
+};
+
+struct qed_rdma_info {
+	/* spin lock to protect bitmaps */
+	spinlock_t lock;
+
+	struct qed_bmap cq_map;
+	struct qed_bmap pd_map;
+	struct qed_bmap tid_map;
+	struct qed_bmap qp_map;
+	struct qed_bmap srq_map;
+	struct qed_bmap cid_map;
+	struct qed_bmap dpi_map;
+	struct qed_bmap toggle_bits;
+	struct qed_rdma_events events;
+	struct qed_rdma_device *dev;
+	struct qed_rdma_port *port;
+	u32 last_tid;
+	u8 num_cnqs;
+	u32 num_qps;
+	u32 num_mrs;
+	u16 queue_zone_base;
+	enum protocol_type proto;
+};
+
+struct qed_rdma_resize_cq_in_params {
+	u16 icid;
+	u32 cq_size;
+	bool pbl_two_level;
+	u64 pbl_ptr;
+	u16 pbl_num_pages;
+	u8 pbl_page_size_log;
+};
+
+struct qed_rdma_resize_cq_out_params {
+	u32 prod;
+	u32 cons;
+};
+
+struct qed_rdma_resize_cnq_in_params {
+	u32 cnq_id;
+	u32 pbl_page_size_log;
+	u64 pbl_ptr;
+};
+
+struct qed_rdma_qp {
+	struct regpair qp_handle;
+	struct regpair qp_handle_async;
+	u32 qpid;
+	u16 icid;
+	enum qed_roce_qp_state cur_state;
+	bool use_srq;
+	bool signal_all;
+	bool fmr_and_reserved_lkey;
+
+	bool incoming_rdma_read_en;
+	bool incoming_rdma_write_en;
+	bool incoming_atomic_en;
+	bool e2e_flow_control_en;
+
+	u16 pd;
+	u16 pkey;
+	u32 dest_qp;
+	u16 mtu;
+	u16 srq_id;
+	u8 traffic_class_tos;
+	u8 hop_limit_ttl;
+	u16 dpi;
+	u32 flow_label;
+	bool lb_indication;
+	u16 vlan_id;
+	u32 ack_timeout;
+	u8 retry_cnt;
+	u8 rnr_retry_cnt;
+	u8 min_rnr_nak_timer;
+	bool sqd_async;
+	union qed_gid sgid;
+	union qed_gid dgid;
+	enum roce_mode roce_mode;
+	u16 udp_src_port;
+	u8 stats_queue;
+
+	/* requeseter */
+	u8 max_rd_atomic_req;
+	u32 sq_psn;
+	u16 sq_cq_id;
+	u16 sq_num_pages;
+	dma_addr_t sq_pbl_ptr;
+	void *orq;
+	dma_addr_t orq_phys_addr;
+	u8 orq_num_pages;
+	bool req_offloaded;
+
+	/* responder */
+	u8 max_rd_atomic_resp;
+	u32 rq_psn;
+	u16 rq_cq_id;
+	u16 rq_num_pages;
+	dma_addr_t rq_pbl_ptr;
+	void *irq;
+	dma_addr_t irq_phys_addr;
+	u8 irq_num_pages;
+	bool resp_offloaded;
+
+	u8 remote_mac_addr[6];
+	u8 local_mac_addr[6];
+
+	void *shared_queue;
+	dma_addr_t shared_queue_phys_addr;
+};
+
+int
+qed_rdma_add_user(void *rdma_cxt,
+		  struct qed_rdma_add_user_out_params *out_params);
+int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd);
+int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid);
+int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid);
+void qed_rdma_free_tid(void *rdma_cxt, u32 tid);
+struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt);
+struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt);
+int
+qed_rdma_register_tid(void *rdma_cxt,
+		      struct qed_rdma_register_tid_in_params *params);
+void qed_rdma_remove_user(void *rdma_cxt, u16 dpi);
+int qed_rdma_start(void *p_hwfn, struct qed_rdma_start_in_params *params);
+int qed_rdma_stop(void *rdma_cxt);
+u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id);
+u32 qed_rdma_query_cau_timer_res(void *p_hwfn);
+void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod);
+void qed_rdma_resc_free(struct qed_hwfn *p_hwfn);
+void qed_async_roce_event(struct qed_hwfn *p_hwfn,
+			  struct event_ring_entry *p_eqe);
+int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp);
+int qed_rdma_modify_qp(void *rdma_cxt, struct qed_rdma_qp *qp,
+		       struct qed_rdma_modify_qp_in_params *params);
+int qed_rdma_query_qp(void *rdma_cxt, struct qed_rdma_qp *qp,
+		      struct qed_rdma_query_qp_out_params *out_params);
+
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+#else
+void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {}
+#endif
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
index a342bfe..9b7678f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
@@ -2,6 +2,7 @@
 #include "qed_dev_api.h"
 #include "qed_mcp.h"
 #include "qed_sp.h"
+#include "qed_selftest.h"
 
 int qed_selftest_memory(struct qed_dev *cdev)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index a548504..652c908 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -61,6 +61,10 @@
 	struct vport_start_ramrod_data vport_start;
 	struct vport_stop_ramrod_data vport_stop;
 	struct vport_update_ramrod_data vport_update;
+	struct core_rx_start_ramrod_data core_rx_queue_start;
+	struct core_rx_stop_ramrod_data core_rx_queue_stop;
+	struct core_tx_start_ramrod_data core_tx_queue_start;
+	struct core_tx_stop_ramrod_data core_tx_queue_stop;
 	struct vport_filter_update_ramrod_data vport_filter_update;
 
 	struct rdma_init_func_ramrod_data rdma_init_func;
@@ -81,6 +85,7 @@
 	struct rdma_srq_create_ramrod_data rdma_create_srq;
 	struct rdma_srq_destroy_ramrod_data rdma_destroy_srq;
 	struct rdma_srq_modify_ramrod_data rdma_modify_srq;
+	struct roce_init_func_ramrod_data roce_init_func;
 
 	struct iscsi_slow_path_hdr iscsi_empty;
 	struct iscsi_init_ramrod_params iscsi_init;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index a52f3fc..2888eb0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -25,9 +25,7 @@
 
 int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 			struct qed_spq_entry **pp_ent,
-			u8 cmd,
-			u8 protocol,
-			struct qed_sp_init_data *p_data)
+			u8 cmd, u8 protocol, struct qed_sp_init_data *p_data)
 {
 	u32 opaque_cid = p_data->opaque_fid << 16 | p_data->cid;
 	struct qed_spq_entry *p_ent = NULL;
@@ -38,7 +36,7 @@
 
 	rc = qed_spq_get_entry(p_hwfn, pp_ent);
 
-	if (rc != 0)
+	if (rc)
 		return rc;
 
 	p_ent = *pp_ent;
@@ -321,8 +319,7 @@
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
 				 COMMON_RAMROD_PF_START,
-				 PROTOCOLID_COMMON,
-				 &init_data);
+				 PROTOCOLID_COMMON, &init_data);
 	if (rc)
 		return rc;
 
@@ -356,8 +353,7 @@
 	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
 		       p_hwfn->p_consq->chain.pbl.p_phys_table);
 
-	qed_tunn_set_pf_start_params(p_hwfn, p_tunn,
-				     &p_ramrod->tunnel_config);
+	qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
 	if (IS_MF_SI(p_hwfn))
 		p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch;
@@ -389,8 +385,7 @@
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
 		   "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n",
-		   sb, sb_index,
-		   p_ramrod->outer_tag);
+		   sb, sb_index, p_ramrod->outer_tag);
 
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index d73456e..caff415 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -28,6 +28,9 @@
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
 #include "qed_sriov.h"
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+#include "qed_roce.h"
+#endif
 
 /***************************************************************************
 * Structures & Definitions
@@ -41,8 +44,7 @@
 ***************************************************************************/
 static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn,
 				void *cookie,
-				union event_ring_data *data,
-				u8 fw_return_code)
+				union event_ring_data *data, u8 fw_return_code)
 {
 	struct qed_spq_comp_done *comp_done;
 
@@ -109,9 +111,8 @@
 /***************************************************************************
 * SPQ entries inner API
 ***************************************************************************/
-static int
-qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
-		   struct qed_spq_entry *p_ent)
+static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
+			      struct qed_spq_entry *p_ent)
 {
 	p_ent->flags = 0;
 
@@ -189,8 +190,7 @@
 }
 
 static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
-			   struct qed_spq *p_spq,
-			   struct qed_spq_entry *p_ent)
+			   struct qed_spq *p_spq, struct qed_spq_entry *p_ent)
 {
 	struct qed_chain *p_chain = &p_hwfn->p_spq->chain;
 	u16 echo = qed_chain_get_prod_idx(p_chain);
@@ -240,6 +240,11 @@
 			   struct event_ring_entry *p_eqe)
 {
 	switch (p_eqe->protocol_id) {
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+	case PROTOCOLID_ROCE:
+		qed_async_roce_event(p_hwfn, p_eqe);
+		return 0;
+#endif
 	case PROTOCOLID_COMMON:
 		return qed_sriov_eqe_event(p_hwfn,
 					   p_eqe->opcode,
@@ -255,8 +260,7 @@
 /***************************************************************************
 * EQ API
 ***************************************************************************/
-void qed_eq_prod_update(struct qed_hwfn *p_hwfn,
-			u16 prod)
+void qed_eq_prod_update(struct qed_hwfn *p_hwfn, u16 prod)
 {
 	u32 addr = GTT_BAR0_MAP_REG_USDM_RAM +
 		   USTORM_EQE_CONS_OFFSET(p_hwfn->rel_pf_id);
@@ -267,9 +271,7 @@
 	mmiowb();
 }
 
-int qed_eq_completion(struct qed_hwfn *p_hwfn,
-		      void *cookie)
-
+int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie)
 {
 	struct qed_eq *p_eq = cookie;
 	struct qed_chain *p_chain = &p_eq->chain;
@@ -323,17 +325,14 @@
 	return rc;
 }
 
-struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn,
-			    u16 num_elem)
+struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem)
 {
 	struct qed_eq *p_eq;
 
 	/* Allocate EQ struct */
 	p_eq = kzalloc(sizeof(*p_eq), GFP_KERNEL);
-	if (!p_eq) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_eq'\n");
+	if (!p_eq)
 		return NULL;
-	}
 
 	/* Allocate and initialize EQ chain*/
 	if (qed_chain_alloc(p_hwfn->cdev,
@@ -342,17 +341,12 @@
 			    QED_CHAIN_CNT_TYPE_U16,
 			    num_elem,
 			    sizeof(union event_ring_element),
-			    &p_eq->chain)) {
-		DP_NOTICE(p_hwfn, "Failed to allocate eq chain\n");
+			    &p_eq->chain))
 		goto eq_allocate_fail;
-	}
 
 	/* register EQ completion on the SP SB */
-	qed_int_register_cb(p_hwfn,
-			    qed_eq_completion,
-			    p_eq,
-			    &p_eq->eq_sb_index,
-			    &p_eq->p_fw_cons);
+	qed_int_register_cb(p_hwfn, qed_eq_completion,
+			    p_eq, &p_eq->eq_sb_index, &p_eq->p_fw_cons);
 
 	return p_eq;
 
@@ -361,14 +355,12 @@
 	return NULL;
 }
 
-void qed_eq_setup(struct qed_hwfn *p_hwfn,
-		  struct qed_eq *p_eq)
+void qed_eq_setup(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq)
 {
 	qed_chain_reset(&p_eq->chain);
 }
 
-void qed_eq_free(struct qed_hwfn *p_hwfn,
-		 struct qed_eq *p_eq)
+void qed_eq_free(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq)
 {
 	if (!p_eq)
 		return;
@@ -379,10 +371,9 @@
 /***************************************************************************
 * CQE API - manipulate EQ functionality
 ***************************************************************************/
-static int qed_cqe_completion(
-	struct qed_hwfn *p_hwfn,
-	struct eth_slow_path_rx_cqe *cqe,
-	enum protocol_type protocol)
+static int qed_cqe_completion(struct qed_hwfn *p_hwfn,
+			      struct eth_slow_path_rx_cqe *cqe,
+			      enum protocol_type protocol)
 {
 	if (IS_VF(p_hwfn->cdev))
 		return 0;
@@ -463,12 +454,9 @@
 	u32 capacity;
 
 	/* SPQ struct */
-	p_spq =
-		kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
-	if (!p_spq) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n");
+	p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
+	if (!p_spq)
 		return -ENOMEM;
-	}
 
 	/* SPQ ring  */
 	if (qed_chain_alloc(p_hwfn->cdev,
@@ -477,18 +465,14 @@
 			    QED_CHAIN_CNT_TYPE_U16,
 			    0,   /* N/A when the mode is SINGLE */
 			    sizeof(struct slow_path_element),
-			    &p_spq->chain)) {
-		DP_NOTICE(p_hwfn, "Failed to allocate spq chain\n");
+			    &p_spq->chain))
 		goto spq_allocate_fail;
-	}
 
 	/* allocate and fill the SPQ elements (incl. ramrod data list) */
 	capacity = qed_chain_get_capacity(&p_spq->chain);
 	p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-				    capacity *
-				    sizeof(struct qed_spq_entry),
+				    capacity * sizeof(struct qed_spq_entry),
 				    &p_phys, GFP_KERNEL);
-
 	if (!p_virt)
 		goto spq_allocate_fail;
 
@@ -525,9 +509,7 @@
 	kfree(p_spq);
 }
 
-int
-qed_spq_get_entry(struct qed_hwfn *p_hwfn,
-		  struct qed_spq_entry **pp_ent)
+int qed_spq_get_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 	struct qed_spq_entry *p_ent = NULL;
@@ -538,14 +520,15 @@
 	if (list_empty(&p_spq->free_pool)) {
 		p_ent = kzalloc(sizeof(*p_ent), GFP_ATOMIC);
 		if (!p_ent) {
+			DP_NOTICE(p_hwfn,
+				  "Failed to allocate an SPQ entry for a pending ramrod\n");
 			rc = -ENOMEM;
 			goto out_unlock;
 		}
 		p_ent->queue = &p_spq->unlimited_pending;
 	} else {
 		p_ent = list_first_entry(&p_spq->free_pool,
-					 struct qed_spq_entry,
-					 list);
+					 struct qed_spq_entry, list);
 		list_del(&p_ent->list);
 		p_ent->queue = &p_spq->pending;
 	}
@@ -564,8 +547,7 @@
 	list_add_tail(&p_ent->list, &p_hwfn->p_spq->free_pool);
 }
 
-void qed_spq_return_entry(struct qed_hwfn *p_hwfn,
-			  struct qed_spq_entry *p_ent)
+void qed_spq_return_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent)
 {
 	spin_lock_bh(&p_hwfn->p_spq->lock);
 	__qed_spq_return_entry(p_hwfn, p_ent);
@@ -586,10 +568,9 @@
  *
  * @return int
  */
-static int
-qed_spq_add_entry(struct qed_hwfn *p_hwfn,
-		  struct qed_spq_entry *p_ent,
-		  enum spq_priority priority)
+static int qed_spq_add_entry(struct qed_hwfn *p_hwfn,
+			     struct qed_spq_entry *p_ent,
+			     enum spq_priority priority)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 
@@ -604,8 +585,7 @@
 			struct qed_spq_entry *p_en2;
 
 			p_en2 = list_first_entry(&p_spq->free_pool,
-						 struct qed_spq_entry,
-						 list);
+						 struct qed_spq_entry, list);
 			list_del(&p_en2->list);
 
 			/* Copy the ring element physical pointer to the new
@@ -655,8 +635,7 @@
 * Posting new Ramrods
 ***************************************************************************/
 static int qed_spq_post_list(struct qed_hwfn *p_hwfn,
-			     struct list_head *head,
-			     u32 keep_reserve)
+			     struct list_head *head, u32 keep_reserve)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 	int rc;
@@ -690,8 +669,7 @@
 			break;
 
 		p_ent = list_first_entry(&p_spq->unlimited_pending,
-					 struct qed_spq_entry,
-					 list);
+					 struct qed_spq_entry, list);
 		if (!p_ent)
 			return -EINVAL;
 
@@ -705,8 +683,7 @@
 }
 
 int qed_spq_post(struct qed_hwfn *p_hwfn,
-		 struct qed_spq_entry *p_ent,
-		 u8 *fw_return_code)
+		 struct qed_spq_entry *p_ent, u8 *fw_return_code)
 {
 	int rc = 0;
 	struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL;
@@ -803,8 +780,7 @@
 		return -EINVAL;
 
 	spin_lock_bh(&p_spq->lock);
-	list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending,
-				 list) {
+	list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) {
 		if (p_ent->elem.hdr.echo == echo) {
 			u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE;
 
@@ -846,15 +822,22 @@
 
 	if (!found) {
 		DP_NOTICE(p_hwfn,
-			  "Failed to find an entry this EQE completes\n");
+			  "Failed to find an entry this EQE [echo %04x] completes\n",
+			  le16_to_cpu(echo));
 		return -EEXIST;
 	}
 
-	DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Complete: func %p cookie %p)\n",
+	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
+		   "Complete EQE [echo %04x]: func %p cookie %p)\n",
+		   le16_to_cpu(echo),
 		   p_ent->comp_cb.function, p_ent->comp_cb.cookie);
 	if (found->comp_cb.function)
 		found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data,
 					fw_return_code);
+	else
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_SPQ,
+			   "Got a completion without a callback function\n");
 
 	if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) ||
 	    (found->queue == &p_spq->unlimited_pending))
@@ -878,10 +861,8 @@
 
 	/* Allocate ConsQ struct */
 	p_consq = kzalloc(sizeof(*p_consq), GFP_KERNEL);
-	if (!p_consq) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_consq'\n");
+	if (!p_consq)
 		return NULL;
-	}
 
 	/* Allocate and initialize EQ chain*/
 	if (qed_chain_alloc(p_hwfn->cdev,
@@ -889,10 +870,8 @@
 			    QED_CHAIN_MODE_PBL,
 			    QED_CHAIN_CNT_TYPE_U16,
 			    QED_CHAIN_PAGE_SIZE / 0x80,
-			    0x80, &p_consq->chain)) {
-		DP_NOTICE(p_hwfn, "Failed to allocate consq chain");
+			    0x80, &p_consq->chain))
 		goto consq_allocate_fail;
-	}
 
 	return p_consq;
 
@@ -901,14 +880,12 @@
 	return NULL;
 }
 
-void qed_consq_setup(struct qed_hwfn *p_hwfn,
-		     struct qed_consq *p_consq)
+void qed_consq_setup(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq)
 {
 	qed_chain_reset(&p_consq->chain);
 }
 
-void qed_consq_free(struct qed_hwfn *p_hwfn,
-		    struct qed_consq *p_consq)
+void qed_consq_free(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq)
 {
 	if (!p_consq)
 		return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 15399da..d2d6621 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -60,7 +60,8 @@
 	}
 
 	fp_minor = p_vf->acquire.vfdev_info.eth_fp_hsi_minor;
-	if (fp_minor > ETH_HSI_VER_MINOR) {
+	if (fp_minor > ETH_HSI_VER_MINOR &&
+	    fp_minor != ETH_HSI_VER_NO_PKT_LEN_TUNN) {
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_IOV,
 			   "VF [%d] - Requested fp hsi %02x.%02x which is slightly newer than PF's %02x.%02x; Configuring PFs version\n",
@@ -107,8 +108,8 @@
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
-bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn,
-			   int rel_vf_id, bool b_enabled_only)
+static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn,
+				  int rel_vf_id, bool b_enabled_only)
 {
 	if (!p_hwfn->pf_iov_info) {
 		DP_NOTICE(p_hwfn->cdev, "No iov info\n");
@@ -185,8 +186,8 @@
 	return false;
 }
 
-int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn,
-			     int vfid, struct qed_ptt *p_ptt)
+static int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn,
+				    int vfid, struct qed_ptt *p_ptt)
 {
 	struct qed_bulletin_content *p_bulletin;
 	int crc_size = sizeof(p_bulletin->crc);
@@ -454,10 +455,8 @@
 	}
 
 	p_sriov = kzalloc(sizeof(*p_sriov), GFP_KERNEL);
-	if (!p_sriov) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sriov'\n");
+	if (!p_sriov)
 		return -ENOMEM;
-	}
 
 	p_hwfn->pf_iov_info = p_sriov;
 
@@ -506,10 +505,9 @@
 
 	/* Allocate a new struct for IOV information */
 	cdev->p_iov_info = kzalloc(sizeof(*cdev->p_iov_info), GFP_KERNEL);
-	if (!cdev->p_iov_info) {
-		DP_NOTICE(p_hwfn, "Can't support IOV due to lack of memory\n");
+	if (!cdev->p_iov_info)
 		return -ENOMEM;
-	}
+
 	cdev->p_iov_info->pos = pos;
 
 	rc = qed_iov_pci_cfg_info(cdev);
@@ -575,7 +573,7 @@
 	}
 }
 
-void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable)
+static void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable)
 {
 	u16 i;
 
@@ -699,7 +697,7 @@
 				&qzone_id);
 
 		reg_addr = PSWHST_REG_ZONE_PERMISSION_TABLE + qzone_id * 4;
-		val = enable ? (vf->abs_vf_id | (1 << 8)) : 0;
+		val = enable ? (vf->abs_vf_id | BIT(8)) : 0;
 		qed_wr(p_hwfn, p_ptt, reg_addr, val);
 	}
 }
@@ -1090,13 +1088,13 @@
 
 	/* Prepare response for all extended tlvs if they are found by PF */
 	for (i = 0; i < QED_IOV_VP_UPDATE_MAX; i++) {
-		if (!(tlvs_mask & (1 << i)))
+		if (!(tlvs_mask & BIT(i)))
 			continue;
 
 		resp = qed_add_tlv(p_hwfn, &p_mbx->offset,
 				   qed_iov_vport_to_tlv(p_hwfn, i), size);
 
-		if (tlvs_accepted & (1 << i))
+		if (tlvs_accepted & BIT(i))
 			resp->hdr.status = status;
 		else
 			resp->hdr.status = PFVF_STATUS_NOT_SUPPORTED;
@@ -1132,9 +1130,10 @@
 	qed_iov_send_response(p_hwfn, p_ptt, vf_info, length, status);
 }
 
-struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn,
-						      u16 relative_vf_id,
-						      bool b_enabled_only)
+static struct
+qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn,
+					       u16 relative_vf_id,
+					       bool b_enabled_only)
 {
 	struct qed_vf_info *vf = NULL;
 
@@ -1145,7 +1144,7 @@
 	return &vf->p_vf_info;
 }
 
-void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid)
+static void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid)
 {
 	struct qed_public_vf_info *vf_info;
 
@@ -1241,6 +1240,16 @@
 			   p_req->num_vlan_filters,
 			   p_resp->num_vlan_filters,
 			   p_req->num_mc_filters, p_resp->num_mc_filters);
+
+		/* Some legacy OSes are incapable of correctly handling this
+		 * failure.
+		 */
+		if ((p_vf->acquire.vfdev_info.eth_fp_hsi_minor ==
+		     ETH_HSI_VER_NO_PKT_LEN_TUNN) &&
+		    (p_vf->acquire.vfdev_info.os_type ==
+		     VFPF_ACQUIRE_OS_WINDOWS))
+			return PFVF_STATUS_SUCCESS;
+
 		return PFVF_STATUS_NO_RESOURCE;
 	}
 
@@ -1280,22 +1289,42 @@
 
 	memset(resp, 0, sizeof(*resp));
 
+	/* Write the PF version so that VF would know which version
+	 * is supported - might be later overriden. This guarantees that
+	 * VF could recognize legacy PF based on lack of versions in reply.
+	 */
+	pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR;
+	pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR;
+
+	if (vf->state != VF_FREE && vf->state != VF_STOPPED) {
+		DP_VERBOSE(p_hwfn,
+			   QED_MSG_IOV,
+			   "VF[%d] sent ACQUIRE but is already in state %d - fail request\n",
+			   vf->abs_vf_id, vf->state);
+		goto out;
+	}
+
 	/* Validate FW compatibility */
 	if (req->vfdev_info.eth_fp_hsi_major != ETH_HSI_VER_MAJOR) {
-		DP_INFO(p_hwfn,
-			"VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
-			vf->abs_vf_id,
-			req->vfdev_info.eth_fp_hsi_major,
-			req->vfdev_info.eth_fp_hsi_minor,
-			ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR);
+		if (req->vfdev_info.capabilities &
+		    VFPF_ACQUIRE_CAP_PRE_FP_HSI) {
+			struct vf_pf_vfdev_info *p_vfdev = &req->vfdev_info;
 
-		/* Write the PF version so that VF would know which version
-		 * is supported.
-		 */
-		pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR;
-		pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR;
+			DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+				   "VF[%d] is pre-fastpath HSI\n",
+				   vf->abs_vf_id);
+			p_vfdev->eth_fp_hsi_major = ETH_HSI_VER_MAJOR;
+			p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN;
+		} else {
+			DP_INFO(p_hwfn,
+				"VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n",
+				vf->abs_vf_id,
+				req->vfdev_info.eth_fp_hsi_major,
+				req->vfdev_info.eth_fp_hsi_minor,
+				ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR);
 
-		goto out;
+			goto out;
+		}
 	}
 
 	/* On 100g PFs, prevent old VFs from loading */
@@ -1334,8 +1363,11 @@
 	pfdev_info->fw_minor = FW_MINOR_VERSION;
 	pfdev_info->fw_rev = FW_REVISION_VERSION;
 	pfdev_info->fw_eng = FW_ENGINEERING_VERSION;
-	pfdev_info->minor_fp_hsi = min_t(u8,
-					 ETH_HSI_VER_MINOR,
+
+	/* Incorrect when legacy, but doesn't matter as legacy isn't reading
+	 * this field.
+	 */
+	pfdev_info->minor_fp_hsi = min_t(u8, ETH_HSI_VER_MINOR,
 					 req->vfdev_info.eth_fp_hsi_minor);
 	pfdev_info->os_type = VFPF_ACQUIRE_OS_LINUX;
 	qed_mcp_get_mfw_ver(p_hwfn, p_ptt, &pfdev_info->mfw_ver, NULL);
@@ -1438,14 +1470,11 @@
 
 		filter.type = QED_FILTER_VLAN;
 		filter.vlan = p_vf->shadow_config.vlans[i].vid;
-		DP_VERBOSE(p_hwfn,
-			   QED_MSG_IOV,
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
 			   "Reconfiguring VLAN [0x%04x] for VF [%04x]\n",
 			   filter.vlan, p_vf->relative_vf_id);
-		rc = qed_sp_eth_filter_ucast(p_hwfn,
-					     p_vf->opaque_fid,
-					     &filter,
-					     QED_SPQ_MODE_CB, NULL);
+		rc = qed_sp_eth_filter_ucast(p_hwfn, p_vf->opaque_fid,
+					     &filter, QED_SPQ_MODE_CB, NULL);
 		if (rc) {
 			DP_NOTICE(p_hwfn,
 				  "Failed to configure VLAN [%04x] to VF [%04x]\n",
@@ -1463,7 +1492,7 @@
 {
 	int rc = 0;
 
-	if ((events & (1 << VLAN_ADDR_FORCED)) &&
+	if ((events & BIT(VLAN_ADDR_FORCED)) &&
 	    !(p_vf->configured_features & (1 << VLAN_ADDR_FORCED)))
 		rc = qed_iov_reconfigure_unicast_vlan(p_hwfn, p_vf);
 
@@ -1479,7 +1508,7 @@
 	if (!p_vf->vport_instance)
 		return -EINVAL;
 
-	if (events & (1 << MAC_ADDR_FORCED)) {
+	if (events & BIT(MAC_ADDR_FORCED)) {
 		/* Since there's no way [currently] of removing the MAC,
 		 * we can always assume this means we need to force it.
 		 */
@@ -1502,7 +1531,7 @@
 		p_vf->configured_features |= 1 << MAC_ADDR_FORCED;
 	}
 
-	if (events & (1 << VLAN_ADDR_FORCED)) {
+	if (events & BIT(VLAN_ADDR_FORCED)) {
 		struct qed_sp_vport_update_params vport_update;
 		u8 removal;
 		int i;
@@ -1572,7 +1601,7 @@
 		if (filter.vlan)
 			p_vf->configured_features |= 1 << VLAN_ADDR_FORCED;
 		else
-			p_vf->configured_features &= ~(1 << VLAN_ADDR_FORCED);
+			p_vf->configured_features &= ~BIT(VLAN_ADDR_FORCED);
 	}
 
 	/* If forced features are terminated, we need to configure the shadow
@@ -1619,8 +1648,7 @@
 
 		qed_int_cau_conf_sb(p_hwfn, p_ptt,
 				    start->sb_addr[sb_id],
-				    vf->igu_sbs[sb_id],
-				    vf->abs_vf_id, 1);
+				    vf->igu_sbs[sb_id], vf->abs_vf_id, 1);
 	}
 	qed_iov_enable_vf_traffic(p_hwfn, p_ptt, vf);
 
@@ -1632,7 +1660,7 @@
 	 * vfs that would still be fine, since they passed '0' as padding].
 	 */
 	p_bitmap = &vf_info->bulletin.p_virt->valid_bitmap;
-	if (!(*p_bitmap & (1 << VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) {
+	if (!(*p_bitmap & BIT(VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) {
 		u8 vf_req = start->only_untagged;
 
 		vf_info->bulletin.p_virt->default_only_untagged = vf_req;
@@ -1650,9 +1678,10 @@
 	params.vport_id = vf->vport_id;
 	params.max_buffers_per_cqe = start->max_buffers_per_cqe;
 	params.mtu = vf->mtu;
+	params.check_mac = true;
 
 	rc = qed_sp_eth_vport_start(p_hwfn, &params);
-	if (rc != 0) {
+	if (rc) {
 		DP_ERR(p_hwfn,
 		       "qed_iov_vf_mbx_start_vport returned error %d\n", rc);
 		status = PFVF_STATUS_FAILURE;
@@ -1679,7 +1708,7 @@
 	vf->spoof_chk = false;
 
 	rc = qed_sp_vport_stop(p_hwfn, vf->opaque_fid, vf->vport_id);
-	if (rc != 0) {
+	if (rc) {
 		DP_ERR(p_hwfn, "qed_iov_vf_mbx_stop_vport returned error %d\n",
 		       rc);
 		status = PFVF_STATUS_FAILURE;
@@ -1695,21 +1724,32 @@
 
 static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn,
 					  struct qed_ptt *p_ptt,
-					  struct qed_vf_info *vf, u8 status)
+					  struct qed_vf_info *vf,
+					  u8 status, bool b_legacy)
 {
 	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
 	struct pfvf_start_queue_resp_tlv *p_tlv;
 	struct vfpf_start_rxq_tlv *req;
+	u16 length;
 
 	mbx->offset = (u8 *)mbx->reply_virt;
 
+	/* Taking a bigger struct instead of adding a TLV to list was a
+	 * mistake, but one which we're now stuck with, as some older
+	 * clients assume the size of the previous response.
+	 */
+	if (!b_legacy)
+		length = sizeof(*p_tlv);
+	else
+		length = sizeof(struct pfvf_def_resp_tlv);
+
 	p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_RXQ,
-			    sizeof(*p_tlv));
+			    length);
 	qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
 		    sizeof(struct channel_list_end_tlv));
 
 	/* Update the TLV with the response */
-	if (status == PFVF_STATUS_SUCCESS) {
+	if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) {
 		req = &mbx->req_virt->start_rxq;
 		p_tlv->offset = PXP_VF_BAR0_START_MSDM_ZONE_B +
 				offsetof(struct mstorm_vf_zone,
@@ -1717,7 +1757,7 @@
 				sizeof(struct eth_rx_prod_data) * req->rx_qid;
 	}
 
-	qed_iov_send_response(p_hwfn, p_ptt, vf, sizeof(*p_tlv), status);
+	qed_iov_send_response(p_hwfn, p_ptt, vf, length, status);
 }
 
 static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
@@ -1728,6 +1768,7 @@
 	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
 	u8 status = PFVF_STATUS_NO_RESOURCE;
 	struct vfpf_start_rxq_tlv *req;
+	bool b_legacy_vf = false;
 	int rc;
 
 	memset(&params, 0, sizeof(params));
@@ -1743,13 +1784,27 @@
 	params.sb = req->hw_sb;
 	params.sb_idx = req->sb_index;
 
+	/* Legacy VFs have their Producers in a different location, which they
+	 * calculate on their own and clean the producer prior to this.
+	 */
+	if (vf->acquire.vfdev_info.eth_fp_hsi_minor ==
+	    ETH_HSI_VER_NO_PKT_LEN_TUNN) {
+		b_legacy_vf = true;
+	} else {
+		REG_WR(p_hwfn,
+		       GTT_BAR0_MAP_REG_MSDM_RAM +
+		       MSTORM_ETH_VF_PRODS_OFFSET(vf->abs_vf_id, req->rx_qid),
+		       0);
+	}
+
 	rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, vf->opaque_fid,
 					 vf->vf_queues[req->rx_qid].fw_cid,
 					 &params,
 					 vf->abs_vf_id + 0x10,
 					 req->bd_max_bytes,
 					 req->rxq_addr,
-					 req->cqe_pbl_addr, req->cqe_pbl_size);
+					 req->cqe_pbl_addr, req->cqe_pbl_size,
+					 b_legacy_vf);
 
 	if (rc) {
 		status = PFVF_STATUS_FAILURE;
@@ -1760,7 +1815,7 @@
 	}
 
 out:
-	qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status);
+	qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status, b_legacy_vf);
 }
 
 static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn,
@@ -1769,23 +1824,38 @@
 {
 	struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx;
 	struct pfvf_start_queue_resp_tlv *p_tlv;
+	bool b_legacy = false;
+	u16 length;
 
 	mbx->offset = (u8 *)mbx->reply_virt;
 
+	/* Taking a bigger struct instead of adding a TLV to list was a
+	 * mistake, but one which we're now stuck with, as some older
+	 * clients assume the size of the previous response.
+	 */
+	if (p_vf->acquire.vfdev_info.eth_fp_hsi_minor ==
+	    ETH_HSI_VER_NO_PKT_LEN_TUNN)
+		b_legacy = true;
+
+	if (!b_legacy)
+		length = sizeof(*p_tlv);
+	else
+		length = sizeof(struct pfvf_def_resp_tlv);
+
 	p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_TXQ,
-			    sizeof(*p_tlv));
+			    length);
 	qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END,
 		    sizeof(struct channel_list_end_tlv));
 
 	/* Update the TLV with the response */
-	if (status == PFVF_STATUS_SUCCESS) {
+	if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) {
 		u16 qid = mbx->req_virt->start_txq.tx_qid;
 
-		p_tlv->offset = qed_db_addr(p_vf->vf_queues[qid].fw_cid,
-					    DQ_DEMS_LEGACY);
+		p_tlv->offset = qed_db_addr_vf(p_vf->vf_queues[qid].fw_cid,
+					       DQ_DEMS_LEGACY);
 	}
 
-	qed_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_tlv), status);
+	qed_iov_send_response(p_hwfn, p_ptt, p_vf, length, status);
 }
 
 static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
@@ -2045,7 +2115,7 @@
 	p_vf->shadow_config.inner_vlan_removal = p_vlan_tlv->remove_vlan;
 
 	/* Ignore the VF request if we're forcing a vlan */
-	if (!(p_vf->configured_features & (1 << VLAN_ADDR_FORCED))) {
+	if (!(p_vf->configured_features & BIT(VLAN_ADDR_FORCED))) {
 		p_data->update_inner_vlan_removal_flg = 1;
 		p_data->inner_vlan_removal_flg = p_vlan_tlv->remove_vlan;
 	}
@@ -2340,7 +2410,7 @@
 	/* In forced mode, we're willing to remove entries - but we don't add
 	 * new ones.
 	 */
-	if (p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED))
+	if (p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED))
 		return 0;
 
 	if (p_params->opcode == QED_FILTER_ADD ||
@@ -2374,7 +2444,7 @@
 	int i;
 
 	/* If we're in forced-mode, we don't allow any change */
-	if (p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED))
+	if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))
 		return 0;
 
 	/* First remove entries and then add new ones */
@@ -2441,8 +2511,8 @@
 	return rc;
 }
 
-int qed_iov_chk_ucast(struct qed_hwfn *hwfn,
-		      int vfid, struct qed_filter_ucast *params)
+static int qed_iov_chk_ucast(struct qed_hwfn *hwfn,
+			     int vfid, struct qed_filter_ucast *params)
 {
 	struct qed_public_vf_info *vf;
 
@@ -2509,7 +2579,7 @@
 	}
 
 	/* Determine if the unicast filtering is acceptible by PF */
-	if ((p_bulletin->valid_bitmap & (1 << VLAN_ADDR_FORCED)) &&
+	if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) &&
 	    (params.type == QED_FILTER_VLAN ||
 	     params.type == QED_FILTER_MAC_VLAN)) {
 		/* Once VLAN is forced or PVID is set, do not allow
@@ -2521,7 +2591,7 @@
 		goto out;
 	}
 
-	if ((p_bulletin->valid_bitmap & (1 << MAC_ADDR_FORCED)) &&
+	if ((p_bulletin->valid_bitmap & BIT(MAC_ADDR_FORCED)) &&
 	    (params.type == QED_FILTER_MAC ||
 	     params.type == QED_FILTER_MAC_VLAN)) {
 		if (!ether_addr_equal(p_bulletin->mac, params.mac) ||
@@ -2749,7 +2819,7 @@
 		/* Mark VF for ack and clean pending state */
 		if (p_vf->state == VF_RESET)
 			p_vf->state = VF_STOPPED;
-		ack_vfs[vfid / 32] |= (1 << (vfid % 32));
+		ack_vfs[vfid / 32] |= BIT((vfid % 32));
 		p_hwfn->pf_iov_info->pending_flr[rel_vf_id / 64] &=
 		    ~(1ULL << (rel_vf_id % 64));
 		p_hwfn->pf_iov_info->pending_events[rel_vf_id / 64] &=
@@ -2759,7 +2829,8 @@
 	return rc;
 }
 
-int qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+static int
+qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 ack_vfs[VF_MAX_STATIC / 32];
 	int rc = 0;
@@ -2805,7 +2876,7 @@
 			continue;
 
 		vfid = p_vf->abs_vf_id;
-		if ((1 << (vfid % 32)) & p_disabled_vfs[vfid / 32]) {
+		if (BIT((vfid % 32)) & p_disabled_vfs[vfid / 32]) {
 			u64 *p_flr = p_hwfn->pf_iov_info->pending_flr;
 			u16 rel_vf_id = p_vf->relative_vf_id;
 
@@ -2946,7 +3017,7 @@
 	}
 }
 
-void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid)
+static void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid)
 {
 	u64 add_bit = 1ULL << (vfid % 64);
 
@@ -3064,14 +3135,13 @@
 
 	vf_info->bulletin.p_virt->valid_bitmap |= feature;
 	/* Forced MAC will disable MAC_ADDR */
-	vf_info->bulletin.p_virt->valid_bitmap &=
-				~(1 << VFPF_BULLETIN_MAC_ADDR);
+	vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR);
 
 	qed_iov_configure_vport_forced(p_hwfn, vf_info, feature);
 }
 
-void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn,
-				      u16 pvid, int vfid)
+static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn,
+					     u16 pvid, int vfid)
 {
 	struct qed_vf_info *vf_info;
 	u64 feature;
@@ -3104,7 +3174,7 @@
 	return !!p_vf_info->vport_instance;
 }
 
-bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid)
+static bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid)
 {
 	struct qed_vf_info *p_vf_info;
 
@@ -3126,7 +3196,7 @@
 	return vf_info->spoof_chk;
 }
 
-int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val)
+static int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val)
 {
 	struct qed_vf_info *vf;
 	int rc = -EINVAL;
@@ -3163,13 +3233,14 @@
 	if (!p_vf || !p_vf->bulletin.p_virt)
 		return NULL;
 
-	if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED)))
+	if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)))
 		return NULL;
 
 	return p_vf->bulletin.p_virt->mac;
 }
 
-u16 qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
+static u16
+qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
 {
 	struct qed_vf_info *p_vf;
 
@@ -3177,7 +3248,7 @@
 	if (!p_vf || !p_vf->bulletin.p_virt)
 		return 0;
 
-	if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED)))
+	if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED)))
 		return 0;
 
 	return p_vf->bulletin.p_virt->pvid;
@@ -3201,7 +3272,8 @@
 	return qed_init_vport_rl(p_hwfn, p_ptt, abs_vp_id, (u32)val);
 }
 
-int qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate)
+static int
+qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate)
 {
 	struct qed_vf_info *vf;
 	u8 vport_id;
@@ -3760,7 +3832,8 @@
 	qed_ptt_release(hwfn, ptt);
 }
 
-void qed_iov_pf_task(struct work_struct *work)
+static void qed_iov_pf_task(struct work_struct *work)
+
 {
 	struct qed_hwfn *hwfn = container_of(work, struct qed_hwfn,
 					     iov_task.work);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 9b780b3..abf5bf1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -46,6 +46,17 @@
 	return p_tlv;
 }
 
+static void qed_vf_pf_req_end(struct qed_hwfn *p_hwfn, int req_status)
+{
+	union pfvf_tlvs *resp = p_hwfn->vf_iov_info->pf2vf_reply;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+		   "VF request status = 0x%x, PF reply status = 0x%x\n",
+		   req_status, resp->default_resp.hdr.status);
+
+	mutex_unlock(&(p_hwfn->vf_iov_info->mutex));
+}
+
 static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size)
 {
 	union vfpf_tlvs *p_req = p_hwfn->vf_iov_info->vf2pf_request;
@@ -103,16 +114,12 @@
 			   "VF <-- PF Timeout [Type %d]\n",
 			   p_req->first_tlv.tl.type);
 		rc = -EBUSY;
-		goto exit;
 	} else {
 		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
 			   "PF response: %d [Type %d]\n",
 			   *done, p_req->first_tlv.tl.type);
 	}
 
-exit:
-	mutex_unlock(&(p_hwfn->vf_iov_info->mutex));
-
 	return rc;
 }
 
@@ -191,6 +198,9 @@
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_IOV, "attempting to acquire resources\n");
 
+		/* Clear response buffer, as this might be a re-send */
+		memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs));
+
 		/* send acquire request */
 		rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 		if (rc)
@@ -205,9 +215,12 @@
 			/* PF agrees to allocate our resources */
 			if (!(resp->pfdev_info.capabilities &
 			      PFVF_ACQUIRE_CAP_POST_FW_OVERRIDE)) {
-				DP_INFO(p_hwfn,
-					"PF is using old incompatible driver; Either downgrade driver or request provider to update hypervisor version\n");
-				return -EINVAL;
+				/* It's possible legacy PF mistakenly accepted;
+				 * but we don't care - simply mark it as
+				 * legacy and continue.
+				 */
+				req->vfdev_info.capabilities |=
+				    VFPF_ACQUIRE_CAP_PRE_FP_HSI;
 			}
 			DP_VERBOSE(p_hwfn, QED_MSG_IOV, "resources acquired\n");
 			resources_acquired = true;
@@ -215,27 +228,55 @@
 			   attempts < VF_ACQUIRE_THRESH) {
 			qed_vf_pf_acquire_reduce_resc(p_hwfn, p_resc,
 						      &resp->resc);
+		} else if (resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) {
+			if (pfdev_info->major_fp_hsi &&
+			    (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) {
+				DP_NOTICE(p_hwfn,
+					  "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n",
+					  pfdev_info->major_fp_hsi,
+					  pfdev_info->minor_fp_hsi,
+					  ETH_HSI_VER_MAJOR,
+					  ETH_HSI_VER_MINOR,
+					  pfdev_info->major_fp_hsi);
+				rc = -EINVAL;
+				goto exit;
+			}
 
-			/* Clear response buffer */
-			memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs));
-		} else if ((resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) &&
-			   pfdev_info->major_fp_hsi &&
-			   (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) {
-			DP_NOTICE(p_hwfn,
-				  "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n",
-				  pfdev_info->major_fp_hsi,
-				  pfdev_info->minor_fp_hsi,
-				  ETH_HSI_VER_MAJOR,
-				  ETH_HSI_VER_MINOR, pfdev_info->major_fp_hsi);
-			return -EINVAL;
+			if (!pfdev_info->major_fp_hsi) {
+				if (req->vfdev_info.capabilities &
+				    VFPF_ACQUIRE_CAP_PRE_FP_HSI) {
+					DP_NOTICE(p_hwfn,
+						  "PF uses very old drivers. Please change to a VF driver using no later than 8.8.x.x.\n");
+					rc = -EINVAL;
+					goto exit;
+				} else {
+					DP_INFO(p_hwfn,
+						"PF is old - try re-acquire to see if it supports FW-version override\n");
+					req->vfdev_info.capabilities |=
+					    VFPF_ACQUIRE_CAP_PRE_FP_HSI;
+					continue;
+				}
+			}
+
+			/* If PF/VF are using same Major, PF must have had
+			 * it's reasons. Simply fail.
+			 */
+			DP_NOTICE(p_hwfn, "PF rejected acquisition by VF\n");
+			rc = -EINVAL;
+			goto exit;
 		} else {
 			DP_ERR(p_hwfn,
 			       "PF returned error %d to VF acquisition request\n",
 			       resp->hdr.status);
-			return -EAGAIN;
+			rc = -EAGAIN;
+			goto exit;
 		}
 	}
 
+	/* Mark the PF as legacy, if needed */
+	if (req->vfdev_info.capabilities & VFPF_ACQUIRE_CAP_PRE_FP_HSI)
+		p_iov->b_pre_fp_hsi = true;
+
 	/* Update bulletin board size with response from PF */
 	p_iov->bulletin.size = resp->bulletin_size;
 
@@ -253,14 +294,18 @@
 		}
 	}
 
-	if (ETH_HSI_VER_MINOR &&
+	if (!p_iov->b_pre_fp_hsi &&
+	    ETH_HSI_VER_MINOR &&
 	    (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) {
 		DP_INFO(p_hwfn,
 			"PF is using older fastpath HSI; %02x.%02x is configured\n",
 			ETH_HSI_VER_MAJOR, resp->pfdev_info.minor_fp_hsi);
 	}
 
-	return 0;
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+
+	return rc;
 }
 
 int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
@@ -286,31 +331,23 @@
 
 	/* Allocate vf sriov info */
 	p_iov = kzalloc(sizeof(*p_iov), GFP_KERNEL);
-	if (!p_iov) {
-		DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sriov'\n");
+	if (!p_iov)
 		return -ENOMEM;
-	}
 
 	/* Allocate vf2pf msg */
 	p_iov->vf2pf_request = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
 						  sizeof(union vfpf_tlvs),
 						  &p_iov->vf2pf_request_phys,
 						  GFP_KERNEL);
-	if (!p_iov->vf2pf_request) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to allocate `vf2pf_request' DMA memory\n");
+	if (!p_iov->vf2pf_request)
 		goto free_p_iov;
-	}
 
 	p_iov->pf2vf_reply = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
 						sizeof(union pfvf_tlvs),
 						&p_iov->pf2vf_reply_phys,
 						GFP_KERNEL);
-	if (!p_iov->pf2vf_reply) {
-		DP_NOTICE(p_hwfn,
-			  "Failed to allocate `pf2vf_reply' DMA memory\n");
+	if (!p_iov->pf2vf_reply)
 		goto free_vf2pf_request;
-	}
 
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_IOV,
@@ -347,6 +384,9 @@
 
 	return -ENOMEM;
 }
+#define TSTORM_QZONE_START   PXP_VF_BAR0_START_SDM_ZONE_A
+#define MSTORM_QZONE_START(dev)   (TSTORM_QZONE_START +	\
+				   (TSTORM_QZONE_SIZE * NUM_OF_L2_QUEUES(dev)))
 
 int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 			u8 rx_qid,
@@ -374,6 +414,21 @@
 	req->bd_max_bytes = bd_max_bytes;
 	req->stat_id = -1;
 
+	/* If PF is legacy, we'll need to calculate producers ourselves
+	 * as well as clean them.
+	 */
+	if (pp_prod && p_iov->b_pre_fp_hsi) {
+		u8 hw_qid = p_iov->acquire_resp.resc.hw_qid[rx_qid];
+		u32 init_prod_val = 0;
+
+		*pp_prod = (u8 __iomem *)p_hwfn->regview +
+					 MSTORM_QZONE_START(p_hwfn->cdev) +
+					 hw_qid * MSTORM_QZONE_SIZE;
+
+		/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
+		__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
+				  (u32 *)(&init_prod_val));
+	}
 	/* add list termination tlv */
 	qed_add_tlv(p_hwfn, &p_iov->offset,
 		    CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv));
@@ -381,13 +436,15 @@
 	resp = &p_iov->pf2vf_reply->queue_start;
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EINVAL;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EINVAL;
+		goto exit;
+	}
 
 	/* Learn the address of the producer from the response */
-	if (pp_prod) {
+	if (pp_prod && !p_iov->b_pre_fp_hsi) {
 		u32 init_prod_val = 0;
 
 		*pp_prod = (u8 __iomem *)p_hwfn->regview + resp->offset;
@@ -399,6 +456,8 @@
 		__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
 				  (u32 *)&init_prod_val);
 	}
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
 
 	return rc;
 }
@@ -424,10 +483,15 @@
 	resp = &p_iov->pf2vf_reply->default_resp;
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EINVAL;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EINVAL;
+		goto exit;
+	}
+
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
 
 	return rc;
 }
@@ -470,13 +534,27 @@
 	}
 
 	if (pp_doorbell) {
-		*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + resp->offset;
+		/* Modern PFs provide the actual offsets, while legacy
+		 * provided only the queue id.
+		 */
+		if (!p_iov->b_pre_fp_hsi) {
+			*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
+						     resp->offset;
+		} else {
+			u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id];
+			u32 db_addr;
+
+			db_addr = qed_db_addr_vf(cid, DQ_DEMS_LEGACY);
+			*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
+						     db_addr;
+		}
 
 		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
 			   "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
 			   tx_queue_id, *pp_doorbell, resp->offset);
 	}
 exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
 
 	return rc;
 }
@@ -501,10 +579,15 @@
 	resp = &p_iov->pf2vf_reply->default_resp;
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EINVAL;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EINVAL;
+		goto exit;
+	}
+
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
 
 	return rc;
 }
@@ -543,10 +626,15 @@
 	resp = &p_iov->pf2vf_reply->default_resp;
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EINVAL;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EINVAL;
+		goto exit;
+	}
+
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
 
 	return rc;
 }
@@ -567,10 +655,15 @@
 
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EINVAL;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EINVAL;
+		goto exit;
+	}
+
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
 
 	return rc;
 }
@@ -770,13 +863,18 @@
 
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, resp_size);
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EINVAL;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EINVAL;
+		goto exit;
+	}
 
 	qed_vf_handle_vp_update_tlvs_resp(p_hwfn, p_params);
 
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+
 	return rc;
 }
 
@@ -797,14 +895,19 @@
 	resp = &p_iov->pf2vf_reply->default_resp;
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EAGAIN;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EAGAIN;
+		goto exit;
+	}
 
 	p_hwfn->b_int_enabled = 0;
 
-	return 0;
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+
+	return rc;
 }
 
 int qed_vf_pf_release(struct qed_hwfn *p_hwfn)
@@ -828,6 +931,8 @@
 	if (!rc && resp->hdr.status != PFVF_STATUS_SUCCESS)
 		rc = -EAGAIN;
 
+	qed_vf_pf_req_end(p_hwfn, rc);
+
 	p_hwfn->b_int_enabled = 0;
 
 	if (p_iov->vf2pf_request)
@@ -896,12 +1001,17 @@
 	resp = &p_iov->pf2vf_reply->default_resp;
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EAGAIN;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EAGAIN;
+		goto exit;
+	}
 
-	return 0;
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+
+	return rc;
 }
 
 int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn)
@@ -920,12 +1030,17 @@
 
 	rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp));
 	if (rc)
-		return rc;
+		goto exit;
 
-	if (resp->hdr.status != PFVF_STATUS_SUCCESS)
-		return -EINVAL;
+	if (resp->hdr.status != PFVF_STATUS_SUCCESS) {
+		rc = -EINVAL;
+		goto exit;
+	}
 
-	return 0;
+exit:
+	qed_vf_pf_req_end(p_hwfn, rc);
+
+	return rc;
 }
 
 u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
@@ -1071,8 +1186,8 @@
 	return false;
 }
 
-bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn,
-				    u8 *dst_mac, u8 *p_is_forced)
+static bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn,
+					   u8 *dst_mac, u8 *p_is_forced)
 {
 	struct qed_bulletin_content *bulletin;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index b23ce58..35db7a28 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -86,7 +86,7 @@
 	struct vfpf_first_tlv first_tlv;
 
 	struct vf_pf_vfdev_info {
-#define VFPF_ACQUIRE_CAP_OBSOLETE	(1 << 0)
+#define VFPF_ACQUIRE_CAP_PRE_FP_HSI     (1 << 0) /* VF pre-FP hsi version */
 #define VFPF_ACQUIRE_CAP_100G		(1 << 1) /* VF can support 100g */
 		u64 capabilities;
 		u8 fw_major;
@@ -551,6 +551,11 @@
 
 	/* we set aside a copy of the acquire response */
 	struct pfvf_acquire_resp_tlv acquire_resp;
+
+	/* In case PF originates prior to the fp-hsi version comparison,
+	 * this has to be propagated as it affects the fastpath.
+	 */
+	bool b_pre_fp_hsi;
 };
 
 #ifdef CONFIG_QED_SRIOV
diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile
index 74a4985..28dc589 100644
--- a/drivers/net/ethernet/qlogic/qede/Makefile
+++ b/drivers/net/ethernet/qlogic/qede/Makefile
@@ -2,3 +2,4 @@
 
 qede-y := qede_main.o qede_ethtool.o
 qede-$(CONFIG_DCB) += qede_dcbnl.o
+qede-$(CONFIG_INFINIBAND_QEDR) += qede_roce.o
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 02b06d4..28c0e9f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -25,7 +25,7 @@
 
 #define QEDE_MAJOR_VERSION		8
 #define QEDE_MINOR_VERSION		10
-#define QEDE_REVISION_VERSION		1
+#define QEDE_REVISION_VERSION		9
 #define QEDE_ENGINEERING_VERSION	20
 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "."	\
 		__stringify(QEDE_MINOR_VERSION) "."		\
@@ -36,6 +36,8 @@
 
 struct qede_stats {
 	u64 no_buff_discards;
+	u64 packet_too_big_discard;
+	u64 ttl0_discard;
 	u64 rx_ucast_bytes;
 	u64 rx_mcast_bytes;
 	u64 rx_bcast_bytes;
@@ -104,6 +106,13 @@
 	bool configured;
 };
 
+struct qede_rdma_dev {
+	struct qedr_dev *qedr_dev;
+	struct list_head entry;
+	struct list_head roce_event_list;
+	struct workqueue_struct *roce_wq;
+};
+
 struct qede_dev {
 	struct qed_dev			*cdev;
 	struct net_device		*ndev;
@@ -124,16 +133,22 @@
 				 (edev)->dev_info.num_tc)
 
 	struct qede_fastpath		*fp_array;
-	u16				req_rss;
-	u16				num_rss;
+	u8				req_num_tx;
+	u8				fp_num_tx;
+	u8				req_num_rx;
+	u8				fp_num_rx;
+	u16				req_queues;
+	u16				num_queues;
 	u8				num_tc;
-#define QEDE_RSS_CNT(edev)		((edev)->num_rss)
-#define QEDE_TSS_CNT(edev)		((edev)->num_rss *	\
-					 (edev)->num_tc)
-#define QEDE_TSS_IDX(edev, txqidx)	((txqidx) % (edev)->num_rss)
-#define QEDE_TC_IDX(edev, txqidx)	((txqidx) / (edev)->num_rss)
+#define QEDE_QUEUE_CNT(edev)	((edev)->num_queues)
+#define QEDE_RSS_COUNT(edev)	((edev)->num_queues - (edev)->fp_num_tx)
+#define QEDE_TSS_COUNT(edev)	(((edev)->num_queues - (edev)->fp_num_rx) * \
+				 (edev)->num_tc)
+#define QEDE_TX_IDX(edev, txqidx)	((edev)->fp_num_rx + (txqidx) % \
+					 QEDE_TSS_COUNT(edev))
+#define QEDE_TC_IDX(edev, txqidx)	((txqidx) / QEDE_TSS_COUNT(edev))
 #define QEDE_TX_QUEUE(edev, txqidx)	\
-	(&(edev)->fp_array[QEDE_TSS_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX( \
+	(&(edev)->fp_array[QEDE_TX_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX(\
 							(edev), (txqidx))])
 
 	struct qed_int_info		int_info;
@@ -177,6 +192,8 @@
 	unsigned long			sp_flags;
 	u16				vxlan_dst_port;
 	u16				geneve_dst_port;
+
+	struct qede_rdma_dev		rdma_info;
 };
 
 enum QEDE_STATE {
@@ -235,6 +252,7 @@
 	u16			num_rx_buffers;
 	u16			rxq_id;
 
+	u64			rcv_pkts;
 	u64			rx_hw_errors;
 	u64			rx_alloc_errors;
 	u64			rx_ip_frags;
@@ -263,6 +281,10 @@
 	union db_prod		tx_db;
 
 	u16			num_tx_buffers;
+	u64			xmit_pkts;
+	u64			stopped_cnt;
+
+	bool			is_legacy;
 };
 
 #define BD_UNMAP_ADDR(bd)		HILO_U64(le32_to_cpu((bd)->addr.hi), \
@@ -277,7 +299,11 @@
 
 struct qede_fastpath {
 	struct qede_dev	*edev;
-	u8			rss_id;
+#define QEDE_FASTPATH_TX	BIT(0)
+#define QEDE_FASTPATH_RX	BIT(1)
+#define QEDE_FASTPATH_COMBINED	(QEDE_FASTPATH_TX | QEDE_FASTPATH_RX)
+	u8			type;
+	u8			id;
 	struct napi_struct	napi;
 	struct qed_sb_info	*sb_info;
 	struct qede_rx_queue	*rxq;
@@ -337,6 +363,6 @@
 
 #define QEDE_MIN_PKT_LEN	64
 #define QEDE_RX_HDR_SIZE	256
-#define	for_each_rss(i) for (i = 0; i < edev->num_rss; i++)
+#define	for_each_queue(i) for (i = 0; i < edev->num_queues; i++)
 
 #endif /* _QEDE_H_ */
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index f8492ca..25a9b29 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -35,6 +35,7 @@
 	u64 offset;
 	char string[ETH_GSTRING_LEN];
 } qede_rqstats_arr[] = {
+	QEDE_RQSTAT(rcv_pkts),
 	QEDE_RQSTAT(rx_hw_errors),
 	QEDE_RQSTAT(rx_alloc_errors),
 	QEDE_RQSTAT(rx_ip_frags),
@@ -44,6 +45,24 @@
 #define QEDE_RQSTATS_DATA(dev, sindex, rqindex) \
 	(*((u64 *)(((char *)(dev->fp_array[(rqindex)].rxq)) +\
 		    qede_rqstats_arr[(sindex)].offset)))
+#define QEDE_TQSTAT_OFFSET(stat_name) \
+	(offsetof(struct qede_tx_queue, stat_name))
+#define QEDE_TQSTAT_STRING(stat_name) (#stat_name)
+#define QEDE_TQSTAT(stat_name) \
+	{QEDE_TQSTAT_OFFSET(stat_name), QEDE_TQSTAT_STRING(stat_name)}
+#define QEDE_NUM_TQSTATS ARRAY_SIZE(qede_tqstats_arr)
+static const struct {
+	u64 offset;
+	char string[ETH_GSTRING_LEN];
+} qede_tqstats_arr[] = {
+	QEDE_TQSTAT(xmit_pkts),
+	QEDE_TQSTAT(stopped_cnt),
+};
+
+#define QEDE_TQSTATS_DATA(dev, sindex, tssid, tcid) \
+	(*((u64 *)(((void *)(&dev->fp_array[tssid].txqs[tcid])) +\
+		   qede_tqstats_arr[(sindex)].offset)))
+
 static const struct {
 	u64 offset;
 	char string[ETH_GSTRING_LEN];
@@ -107,6 +126,8 @@
 	QEDE_PF_STAT(mftag_filter_discards),
 	QEDE_PF_STAT(mac_filter_discards),
 	QEDE_STAT(tx_err_drop_pkts),
+	QEDE_STAT(ttl0_discard),
+	QEDE_STAT(packet_too_big_discard),
 
 	QEDE_STAT(coalesced_pkts),
 	QEDE_STAT(coalesced_events),
@@ -151,17 +172,29 @@
 {
 	int i, j, k;
 
+	for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) {
+		int tc;
+
+		for (j = 0; j < QEDE_NUM_RQSTATS; j++)
+			sprintf(buf + (k + j) * ETH_GSTRING_LEN,
+				"%d:   %s", i, qede_rqstats_arr[j].string);
+		k += QEDE_NUM_RQSTATS;
+		for (tc = 0; tc < edev->num_tc; tc++) {
+			for (j = 0; j < QEDE_NUM_TQSTATS; j++)
+				sprintf(buf + (k + j) * ETH_GSTRING_LEN,
+					"%d.%d: %s", i, tc,
+					qede_tqstats_arr[j].string);
+			k += QEDE_NUM_TQSTATS;
+		}
+	}
+
 	for (i = 0, j = 0; i < QEDE_NUM_STATS; i++) {
 		if (IS_VF(edev) && qede_stats_arr[i].pf_only)
 			continue;
-		strcpy(buf + j * ETH_GSTRING_LEN,
+		strcpy(buf + (k + j) * ETH_GSTRING_LEN,
 		       qede_stats_arr[i].string);
 		j++;
 	}
-
-	for (k = 0; k < QEDE_NUM_RQSTATS; k++, j++)
-		strcpy(buf + j * ETH_GSTRING_LEN,
-		       qede_rqstats_arr[k].string);
 }
 
 static void qede_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
@@ -197,19 +230,30 @@
 
 	mutex_lock(&edev->qede_lock);
 
+	for (qid = 0; qid < QEDE_QUEUE_CNT(edev); qid++) {
+		int tc;
+
+		if (edev->fp_array[qid].type & QEDE_FASTPATH_RX) {
+			for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++)
+				buf[cnt++] = QEDE_RQSTATS_DATA(edev, sidx, qid);
+		}
+
+		if (edev->fp_array[qid].type & QEDE_FASTPATH_TX) {
+			for (tc = 0; tc < edev->num_tc; tc++) {
+				for (sidx = 0; sidx < QEDE_NUM_TQSTATS; sidx++)
+					buf[cnt++] = QEDE_TQSTATS_DATA(edev,
+								       sidx,
+								       qid, tc);
+			}
+		}
+	}
+
 	for (sidx = 0; sidx < QEDE_NUM_STATS; sidx++) {
 		if (IS_VF(edev) && qede_stats_arr[sidx].pf_only)
 			continue;
 		buf[cnt++] = QEDE_STATS_DATA(edev, sidx);
 	}
 
-	for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++) {
-		buf[cnt] = 0;
-		for (qid = 0; qid < edev->num_rss; qid++)
-			buf[cnt] += QEDE_RQSTATS_DATA(edev, sidx, qid);
-		cnt++;
-	}
-
 	mutex_unlock(&edev->qede_lock);
 }
 
@@ -227,7 +271,8 @@
 				if (qede_stats_arr[i].pf_only)
 					num_stats--;
 		}
-		return num_stats + QEDE_NUM_RQSTATS;
+		return num_stats + QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS +
+		       QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS * edev->num_tc;
 	case ETH_SS_PRIV_FLAGS:
 		return QEDE_PRI_FLAG_LEN;
 	case ETH_SS_TEST:
@@ -249,78 +294,150 @@
 	return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT;
 }
 
-static int qede_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+struct qede_link_mode_mapping {
+	u32 qed_link_mode;
+	u32 ethtool_link_mode;
+};
+
+static const struct qede_link_mode_mapping qed_lm_map[] = {
+	{QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
+	{QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
+	{QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT},
+	{QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
+	{QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT},
+	{QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+	{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+	{QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+	{QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT},
+	{QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT},
+	{QED_LM_100000baseKR4_Full_BIT,
+	 ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+};
+
+#define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name)	\
+{								\
+	int i;							\
+								\
+	for (i = 0; i < QED_LM_COUNT; i++) {			\
+		if ((caps) & (qed_lm_map[i].qed_link_mode))	\
+			__set_bit(qed_lm_map[i].ethtool_link_mode,\
+				  lk_ksettings->link_modes.name); \
+	}							\
+}
+
+#define QEDE_ETHTOOL_TO_DRV_CAPS(caps, lk_ksettings, name)	\
+{								\
+	int i;							\
+								\
+	for (i = 0; i < QED_LM_COUNT; i++) {			\
+		if (test_bit(qed_lm_map[i].ethtool_link_mode,	\
+			     lk_ksettings->link_modes.name))	\
+			caps |= qed_lm_map[i].qed_link_mode;	\
+	}							\
+}
+
+static int qede_get_link_ksettings(struct net_device *dev,
+				   struct ethtool_link_ksettings *cmd)
 {
+	struct ethtool_link_settings *base = &cmd->base;
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qed_link_output current_link;
 
 	memset(&current_link, 0, sizeof(current_link));
 	edev->ops->common->get_link(edev->cdev, &current_link);
 
-	cmd->supported = current_link.supported_caps;
-	cmd->advertising = current_link.advertised_caps;
+	ethtool_link_ksettings_zero_link_mode(cmd, supported);
+	QEDE_DRV_TO_ETHTOOL_CAPS(current_link.supported_caps, cmd, supported)
+
+	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
+	QEDE_DRV_TO_ETHTOOL_CAPS(current_link.advertised_caps, cmd, advertising)
+
+	ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising);
+	QEDE_DRV_TO_ETHTOOL_CAPS(current_link.lp_caps, cmd, lp_advertising)
+
 	if ((edev->state == QEDE_STATE_OPEN) && (current_link.link_up)) {
-		ethtool_cmd_speed_set(cmd, current_link.speed);
-		cmd->duplex = current_link.duplex;
+		base->speed = current_link.speed;
+		base->duplex = current_link.duplex;
 	} else {
-		cmd->duplex = DUPLEX_UNKNOWN;
-		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		base->speed = SPEED_UNKNOWN;
+		base->duplex = DUPLEX_UNKNOWN;
 	}
-	cmd->port = current_link.port;
-	cmd->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE :
-						AUTONEG_DISABLE;
-	cmd->lp_advertising = current_link.lp_caps;
+	base->port = current_link.port;
+	base->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE :
+			AUTONEG_DISABLE;
 
 	return 0;
 }
 
-static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int qede_set_link_ksettings(struct net_device *dev,
+				   const struct ethtool_link_ksettings *cmd)
 {
+	const struct ethtool_link_settings *base = &cmd->base;
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qed_link_output current_link;
 	struct qed_link_params params;
-	u32 speed;
 
 	if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
-		DP_INFO(edev,
-			"Link settings are not allowed to be changed\n");
+		DP_INFO(edev, "Link settings are not allowed to be changed\n");
 		return -EOPNOTSUPP;
 	}
-
 	memset(&current_link, 0, sizeof(current_link));
 	memset(&params, 0, sizeof(params));
 	edev->ops->common->get_link(edev->cdev, &current_link);
 
-	speed = ethtool_cmd_speed(cmd);
 	params.override_flags |= QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS;
 	params.override_flags |= QED_LINK_OVERRIDE_SPEED_AUTONEG;
-	if (cmd->autoneg == AUTONEG_ENABLE) {
+	if (base->autoneg == AUTONEG_ENABLE) {
 		params.autoneg = true;
 		params.forced_speed = 0;
-		params.adv_speeds = cmd->advertising;
-	} else { /* forced speed */
+		QEDE_ETHTOOL_TO_DRV_CAPS(params.adv_speeds, cmd, advertising)
+	} else {		/* forced speed */
 		params.override_flags |= QED_LINK_OVERRIDE_SPEED_FORCED_SPEED;
 		params.autoneg = false;
-		params.forced_speed = speed;
-		switch (speed) {
+		params.forced_speed = base->speed;
+		switch (base->speed) {
 		case SPEED_10000:
 			if (!(current_link.supported_caps &
-			    SUPPORTED_10000baseKR_Full)) {
+			      QED_LM_10000baseKR_Full_BIT)) {
 				DP_INFO(edev, "10G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = SUPPORTED_10000baseKR_Full;
+			params.adv_speeds = QED_LM_10000baseKR_Full_BIT;
+			break;
+		case SPEED_25000:
+			if (!(current_link.supported_caps &
+			      QED_LM_25000baseKR_Full_BIT)) {
+				DP_INFO(edev, "25G speed not supported\n");
+				return -EINVAL;
+			}
+			params.adv_speeds = QED_LM_25000baseKR_Full_BIT;
 			break;
 		case SPEED_40000:
 			if (!(current_link.supported_caps &
-			    SUPPORTED_40000baseLR4_Full)) {
+			      QED_LM_40000baseLR4_Full_BIT)) {
 				DP_INFO(edev, "40G speed not supported\n");
 				return -EINVAL;
 			}
-			params.adv_speeds = SUPPORTED_40000baseLR4_Full;
+			params.adv_speeds = QED_LM_40000baseLR4_Full_BIT;
+			break;
+		case SPEED_50000:
+			if (!(current_link.supported_caps &
+			      QED_LM_50000baseKR2_Full_BIT)) {
+				DP_INFO(edev, "50G speed not supported\n");
+				return -EINVAL;
+			}
+			params.adv_speeds = QED_LM_50000baseKR2_Full_BIT;
+			break;
+		case SPEED_100000:
+			if (!(current_link.supported_caps &
+			      QED_LM_100000baseKR4_Full_BIT)) {
+				DP_INFO(edev, "100G speed not supported\n");
+				return -EINVAL;
+			}
+			params.adv_speeds = QED_LM_100000baseKR4_Full_BIT;
 			break;
 		default:
-			DP_INFO(edev, "Unsupported speed %u\n", speed);
+			DP_INFO(edev, "Unsupported speed %u\n", base->speed);
 			return -EINVAL;
 		}
 	}
@@ -368,8 +485,7 @@
 {
 	struct qede_dev *edev = netdev_priv(ndev);
 
-	return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) |
-	       edev->dp_module;
+	return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) | edev->dp_module;
 }
 
 static void qede_set_msglevel(struct net_device *ndev, u32 level)
@@ -393,8 +509,7 @@
 	struct qed_link_params link_params;
 
 	if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) {
-		DP_INFO(edev,
-			"Link settings are not allowed to be changed\n");
+		DP_INFO(edev, "Link settings are not allowed to be changed\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -467,7 +582,7 @@
 
 	rxc = (u16)coal->rx_coalesce_usecs;
 	txc = (u16)coal->tx_coalesce_usecs;
-	for_each_rss(i) {
+	for_each_queue(i) {
 		sb_id = edev->fp_array[i].sb_info->igu_sb_id;
 		rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc,
 						     (u8)i, sb_id);
@@ -563,7 +678,7 @@
 	memset(&params, 0, sizeof(params));
 	params.override_flags |= QED_LINK_OVERRIDE_PAUSE_CONFIG;
 	if (epause->autoneg) {
-		if (!(current_link.supported_caps & SUPPORTED_Autoneg)) {
+		if (!(current_link.supported_caps & QED_LM_Autoneg_BIT)) {
 			DP_INFO(edev, "autoneg not supported\n");
 			return -EINVAL;
 		}
@@ -580,6 +695,28 @@
 	return 0;
 }
 
+static void qede_get_regs(struct net_device *ndev,
+			  struct ethtool_regs *regs, void *buffer)
+{
+	struct qede_dev *edev = netdev_priv(ndev);
+
+	regs->version = 0;
+	memset(buffer, 0, regs->len);
+
+	if (edev->ops && edev->ops->common)
+		edev->ops->common->dbg_all_data(edev->cdev, buffer);
+}
+
+static int qede_get_regs_len(struct net_device *ndev)
+{
+	struct qede_dev *edev = netdev_priv(ndev);
+
+	if (edev->ops && edev->ops->common)
+		return edev->ops->common->dbg_all_data_size(edev->cdev);
+	else
+		return -EINVAL;
+}
+
 static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args)
 {
 	edev->ndev->mtu = args->mtu;
@@ -619,45 +756,70 @@
 	struct qede_dev *edev = netdev_priv(dev);
 
 	channels->max_combined = QEDE_MAX_RSS_CNT(edev);
-	channels->combined_count = QEDE_RSS_CNT(edev);
+	channels->combined_count = QEDE_QUEUE_CNT(edev) - edev->fp_num_tx -
+					edev->fp_num_rx;
+	channels->tx_count = edev->fp_num_tx;
+	channels->rx_count = edev->fp_num_rx;
 }
 
 static int qede_set_channels(struct net_device *dev,
 			     struct ethtool_channels *channels)
 {
 	struct qede_dev *edev = netdev_priv(dev);
+	u32 count;
 
 	DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
 		   "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n",
 		   channels->rx_count, channels->tx_count,
 		   channels->other_count, channels->combined_count);
 
-	/* We don't support separate rx / tx, nor `other' channels. */
-	if (channels->rx_count || channels->tx_count ||
-	    channels->other_count || (channels->combined_count == 0) ||
-	    (channels->combined_count > QEDE_MAX_RSS_CNT(edev))) {
+	count = channels->rx_count + channels->tx_count +
+			channels->combined_count;
+
+	/* We don't support `other' channels */
+	if (channels->other_count) {
 		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
 			   "command parameters not supported\n");
 		return -EINVAL;
 	}
 
+	if (!(channels->combined_count || (channels->rx_count &&
+					   channels->tx_count))) {
+		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+			   "need to request at least one transmit and one receive channel\n");
+		return -EINVAL;
+	}
+
+	if (count > QEDE_MAX_RSS_CNT(edev)) {
+		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
+			   "requested channels = %d max supported channels = %d\n",
+			   count, QEDE_MAX_RSS_CNT(edev));
+		return -EINVAL;
+	}
+
 	/* Check if there was a change in the active parameters */
-	if (channels->combined_count == QEDE_RSS_CNT(edev)) {
+	if ((count == QEDE_QUEUE_CNT(edev)) &&
+	    (channels->tx_count == edev->fp_num_tx) &&
+	    (channels->rx_count == edev->fp_num_rx)) {
 		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
 			   "No change in active parameters\n");
 		return 0;
 	}
 
 	/* We need the number of queues to be divisible between the hwfns */
-	if (channels->combined_count % edev->dev_info.common.num_hwfns) {
+	if ((count % edev->dev_info.common.num_hwfns) ||
+	    (channels->tx_count % edev->dev_info.common.num_hwfns) ||
+	    (channels->rx_count % edev->dev_info.common.num_hwfns)) {
 		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
-			   "Number of channels must be divisable by %04x\n",
+			   "Number of channels must be divisible by %04x\n",
 			   edev->dev_info.common.num_hwfns);
 		return -EINVAL;
 	}
 
 	/* Set number of queues and reload if necessary */
-	edev->req_rss = channels->combined_count;
+	edev->req_queues = count;
+	edev->req_num_tx = channels->tx_count;
+	edev->req_num_rx = channels->rx_count;
 	if (netif_running(dev))
 		qede_reload(edev, NULL, NULL);
 
@@ -727,7 +889,7 @@
 
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
-		info->data = edev->num_rss;
+		info->data = QEDE_RSS_COUNT(edev);
 		return 0;
 	case ETHTOOL_GRXFH:
 		return qede_get_rss_flags(edev, info);
@@ -930,7 +1092,7 @@
 	if (!netif_running(edev->ndev))
 		return;
 
-	for_each_rss(i) {
+	for_each_queue(i) {
 		/* Update and reenable interrupts */
 		qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_ENABLE, 1);
 		napi_enable(&edev->fp_array[i].napi);
@@ -942,7 +1104,7 @@
 {
 	int i;
 
-	for_each_rss(i) {
+	for_each_queue(i) {
 		napi_disable(&edev->fp_array[i].napi);
 		/* Disable interrupts */
 		qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_DISABLE, 0);
@@ -952,11 +1114,23 @@
 static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 					  struct sk_buff *skb)
 {
-	struct qede_tx_queue *txq = &edev->fp_array[0].txqs[0];
+	struct qede_tx_queue *txq = NULL;
 	struct eth_tx_1st_bd *first_bd;
 	dma_addr_t mapping;
 	int i, idx, val;
 
+	for_each_queue(i) {
+		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			txq = edev->fp_array[i].txqs;
+			break;
+		}
+	}
+
+	if (!txq) {
+		DP_NOTICE(edev, "Tx path is not available\n");
+		return -1;
+	}
+
 	/* Fill the entry in the SW ring and the BDs in the FW ring */
 	idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
 	txq->sw_tx_ring[idx].skb = skb;
@@ -1020,14 +1194,26 @@
 
 static int qede_selftest_receive_traffic(struct qede_dev *edev)
 {
-	struct qede_rx_queue *rxq = edev->fp_array[0].rxq;
 	u16 hw_comp_cons, sw_comp_cons, sw_rx_index, len;
 	struct eth_fast_path_rx_reg_cqe *fp_cqe;
+	struct qede_rx_queue *rxq = NULL;
 	struct sw_rx_data *sw_rx_data;
 	union eth_rx_cqe *cqe;
 	u8 *data_ptr;
 	int i;
 
+	for_each_queue(i) {
+		if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
+			rxq = edev->fp_array[i].rxq;
+			break;
+		}
+	}
+
+	if (!rxq) {
+		DP_NOTICE(edev, "Rx path is not available\n");
+		return -1;
+	}
+
 	/* The packet is expected to receive on rx-queue 0 even though RSS is
 	 * enabled. This is because the queue 0 is configured as the default
 	 * queue and that the loopback traffic is not IP.
@@ -1228,9 +1414,11 @@
 }
 
 static const struct ethtool_ops qede_ethtool_ops = {
-	.get_settings = qede_get_settings,
-	.set_settings = qede_set_settings,
+	.get_link_ksettings = qede_get_link_ksettings,
+	.set_link_ksettings = qede_set_link_ksettings,
 	.get_drvinfo = qede_get_drvinfo,
+	.get_regs_len = qede_get_regs_len,
+	.get_regs = qede_get_regs,
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,
 	.nway_reset = qede_nway_reset,
@@ -1260,7 +1448,7 @@
 };
 
 static const struct ethtool_ops qede_vf_ethtool_ops = {
-	.get_settings = qede_get_settings,
+	.get_link_ksettings = qede_get_link_ksettings,
 	.get_drvinfo = qede_get_drvinfo,
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index e4bd02e..343038c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -36,7 +36,7 @@
 #include <linux/random.h>
 #include <net/ip6_checksum.h>
 #include <linux/bitops.h>
-
+#include <linux/qed/qede_roce.h>
 #include "qede.h"
 
 static char version[] =
@@ -100,7 +100,8 @@
 static void qede_link_update(void *dev, struct qed_link_output *link);
 
 #ifdef CONFIG_QED_SRIOV
-static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos)
+static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
+			    __be16 vlan_proto)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
 
@@ -109,6 +110,9 @@
 		return -EINVAL;
 	}
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	DP_VERBOSE(edev, QED_MSG_IOV, "Setting Vlan 0x%04x to VF [%d]\n",
 		   vlan, vf);
 
@@ -189,8 +193,7 @@
 	struct ethtool_drvinfo drvinfo;
 	struct qede_dev *edev;
 
-	/* Currently only support name change */
-	if (event != NETDEV_CHANGENAME)
+	if (event != NETDEV_CHANGENAME && event != NETDEV_CHANGEADDR)
 		goto done;
 
 	/* Check whether this is a qede device */
@@ -203,11 +206,18 @@
 		goto done;
 	edev = netdev_priv(ndev);
 
-	/* Notify qed of the name change */
-	if (!edev->ops || !edev->ops->common)
-		goto done;
-	edev->ops->common->set_id(edev->cdev, edev->ndev->name,
-				  "qede");
+	switch (event) {
+	case NETDEV_CHANGENAME:
+		/* Notify qed of the name change */
+		if (!edev->ops || !edev->ops->common)
+			goto done;
+		edev->ops->common->set_id(edev->cdev, edev->ndev->name, "qede");
+		break;
+	case NETDEV_CHANGEADDR:
+		edev = netdev_priv(ndev);
+		qede_roce_event_changeaddr(edev);
+		break;
+	}
 
 done:
 	return NOTIFY_DONE;
@@ -222,7 +232,7 @@
 {
 	int ret;
 
-	pr_notice("qede_init: %s\n", version);
+	pr_info("qede_init: %s\n", version);
 
 	qed_ops = qed_get_eth_ops();
 	if (!qed_ops) {
@@ -253,7 +263,8 @@
 
 static void __exit qede_cleanup(void)
 {
-	pr_notice("qede_cleanup called\n");
+	if (debug & QED_LOG_INFO_MASK)
+		pr_info("qede_cleanup called\n");
 
 	unregister_netdevice_notifier(&qede_netdev_notifier);
 	pci_unregister_driver(&qede_pci_driver);
@@ -270,8 +281,7 @@
 
 /* Unmap the data and free skb */
 static int qede_free_tx_pkt(struct qede_dev *edev,
-			    struct qede_tx_queue *txq,
-			    int *len)
+			    struct qede_tx_queue *txq, int *len)
 {
 	u16 idx = txq->sw_tx_cons & NUM_TX_BDS_MAX;
 	struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
@@ -329,8 +339,7 @@
 static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 				    struct qede_tx_queue *txq,
 				    struct eth_tx_1st_bd *first_bd,
-				    int nbd,
-				    bool data_split)
+				    int nbd, bool data_split)
 {
 	u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
 	struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
@@ -339,8 +348,7 @@
 
 	/* Return prod to its position before this skb was handled */
 	qed_chain_set_prod(&txq->tx_pbl,
-			   le16_to_cpu(txq->tx_db.data.bd_prod),
-			   first_bd);
+			   le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
 
 	first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl);
 
@@ -366,8 +374,7 @@
 
 	/* Return again prod to its position before this skb was handled */
 	qed_chain_set_prod(&txq->tx_pbl,
-			   le16_to_cpu(txq->tx_db.data.bd_prod),
-			   first_bd);
+			   le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
 
 	/* Free skb */
 	dev_kfree_skb_any(skb);
@@ -376,8 +383,7 @@
 }
 
 static u32 qede_xmit_type(struct qede_dev *edev,
-			  struct sk_buff *skb,
-			  int *ipv6_ext)
+			  struct sk_buff *skb, int *ipv6_ext)
 {
 	u32 rc = XMIT_L4_CSUM;
 	__be16 l3_proto;
@@ -434,15 +440,13 @@
 }
 
 static int map_frag_to_bd(struct qede_dev *edev,
-			  skb_frag_t *frag,
-			  struct eth_tx_bd *bd)
+			  skb_frag_t *frag, struct eth_tx_bd *bd)
 {
 	dma_addr_t mapping;
 
 	/* Map skb non-linear frag data for DMA */
 	mapping = skb_frag_dma_map(&edev->pdev->dev, frag, 0,
-				   skb_frag_size(frag),
-				   DMA_TO_DEVICE);
+				   skb_frag_size(frag), DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
 		DP_NOTICE(edev, "Unable to map frag - dropping packet\n");
 		return -ENOMEM;
@@ -504,9 +508,8 @@
 }
 
 /* Main transmit function */
-static
-netdev_tx_t qede_start_xmit(struct sk_buff *skb,
-			    struct net_device *ndev)
+static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
+				   struct net_device *ndev)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
 	struct netdev_queue *netdev_txq;
@@ -526,12 +529,11 @@
 
 	/* Get tx-queue context and netdev index */
 	txq_index = skb_get_queue_mapping(skb);
-	WARN_ON(txq_index >= QEDE_TSS_CNT(edev));
+	WARN_ON(txq_index >= QEDE_TSS_COUNT(edev));
 	txq = QEDE_TX_QUEUE(edev, txq_index);
 	netdev_txq = netdev_get_tx_queue(ndev, txq_index);
 
-	WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) <
-			       (MAX_SKB_FRAGS + 1));
+	WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1));
 
 	xmit_type = qede_xmit_type(edev, skb, &ipv6_ext);
 
@@ -606,6 +608,14 @@
 			    1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
 		}
 
+		/* Legacy FW had flipped behavior in regard to this bit -
+		 * I.e., needed to set to prevent FW from touching encapsulated
+		 * packets when it didn't need to.
+		 */
+		if (unlikely(txq->is_legacy))
+			first_bd->data.bitfields ^=
+			    1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT;
+
 		/* If the packet is IPv6 with extension header, indicate that
 		 * to FW and pass few params, since the device cracker doesn't
 		 * support parsing IPv6 with extension header/s.
@@ -722,12 +732,16 @@
 	txq->tx_db.data.bd_prod =
 		cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl));
 
-	if (!skb->xmit_more || netif_tx_queue_stopped(netdev_txq))
+	if (!skb->xmit_more || netif_xmit_stopped(netdev_txq))
 		qede_update_tx_producer(txq);
 
 	if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl)
 		      < (MAX_SKB_FRAGS + 1))) {
+		if (skb->xmit_more)
+			qede_update_tx_producer(txq);
+
 		netif_tx_stop_queue(netdev_txq);
+		txq->stopped_cnt++;
 		DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED,
 			   "Stop queue was called\n");
 		/* paired memory barrier is in qede_tx_int(), we have to keep
@@ -761,8 +775,7 @@
 	return hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl);
 }
 
-static int qede_tx_int(struct qede_dev *edev,
-		       struct qede_tx_queue *txq)
+static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
 	u16 hw_bd_cons;
@@ -788,6 +801,7 @@
 		bytes_compl += len;
 		pkts_compl++;
 		txq->sw_tx_cons++;
+		txq->xmit_pkts++;
 	}
 
 	netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl);
@@ -960,8 +974,7 @@
 
 static u32 qede_get_rxhash(struct qede_dev *edev,
 			   u8 bitfields,
-			   __le32 rss_hash,
-			   enum pkt_hash_types *rxhash_type)
+			   __le32 rss_hash, enum pkt_hash_types *rxhash_type)
 {
 	enum rss_hash_type htype;
 
@@ -990,12 +1003,10 @@
 
 static inline void qede_skb_receive(struct qede_dev *edev,
 				    struct qede_fastpath *fp,
-				    struct sk_buff *skb,
-				    u16 vlan_tag)
+				    struct sk_buff *skb, u16 vlan_tag)
 {
 	if (vlan_tag)
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-				       vlan_tag);
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 
 	napi_gro_receive(&fp->napi, skb);
 }
@@ -1018,8 +1029,7 @@
 
 static int qede_fill_frag_skb(struct qede_dev *edev,
 			      struct qede_rx_queue *rxq,
-			      u8 tpa_agg_index,
-			      u16 len_on_bd)
+			      u8 tpa_agg_index, u16 len_on_bd)
 {
 	struct sw_rx_data *current_bd = &rxq->sw_rx_ring[rxq->sw_rx_cons &
 							 NUM_RX_BDS_MAX];
@@ -1206,7 +1216,7 @@
 #endif
 
 send_skb:
-	skb_record_rx_queue(skb, fp->rss_id);
+	skb_record_rx_queue(skb, fp->rxq->rxq_id);
 	qede_skb_receive(edev, fp, skb, vlan_tag);
 }
 
@@ -1410,7 +1420,7 @@
 
 		if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) {
 			edev->ops->eth_cqe_completion(
-					edev->cdev, fp->rss_id,
+					edev->cdev, fp->id,
 					(struct eth_slow_path_rx_cqe *)cqe);
 			goto next_cqe;
 		}
@@ -1467,7 +1477,7 @@
 		skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
 		if (unlikely(!skb)) {
 			DP_NOTICE(edev,
-				  "Build_skb failed, dropping incoming packet\n");
+				  "skb allocation failed, dropping incoming packet\n");
 			qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num);
 			rxq->rx_alloc_errors++;
 			goto next_cqe;
@@ -1575,14 +1585,13 @@
 		skb->protocol = eth_type_trans(skb, edev->ndev);
 
 		rx_hash = qede_get_rxhash(edev, fp_cqe->bitfields,
-					  fp_cqe->rss_hash,
-					  &rxhash_type);
+					  fp_cqe->rss_hash, &rxhash_type);
 
 		skb_set_hash(skb, rx_hash, rxhash_type);
 
 		qede_set_skb_csum(skb, csum_flag);
 
-		skb_record_rx_queue(skb, fp->rss_id);
+		skb_record_rx_queue(skb, fp->rxq->rxq_id);
 
 		qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag));
 next_rx_only:
@@ -1601,6 +1610,8 @@
 	/* Update producers */
 	qede_update_rx_prod(edev, rxq);
 
+	rxq->rcv_pkts += rx_pkt;
+
 	return rx_pkt;
 }
 
@@ -1613,10 +1624,12 @@
 	u8 tc;
 
 	for (tc = 0; tc < edev->num_tc; tc++)
-		if (qede_txq_has_work(&fp->txqs[tc]))
+		if (likely(fp->type & QEDE_FASTPATH_TX) &&
+		    qede_txq_has_work(&fp->txqs[tc]))
 			qede_tx_int(edev, &fp->txqs[tc]);
 
-	rx_work_done = qede_has_rx_work(fp->rxq) ?
+	rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
+			qede_has_rx_work(fp->rxq)) ?
 			qede_rx_int(fp, budget) : 0;
 	if (rx_work_done < budget) {
 		qed_sb_update_sb_idx(fp->sb_info);
@@ -1636,8 +1649,10 @@
 		rmb();
 
 		/* Fall out from the NAPI loop if needed */
-		if (!(qede_has_rx_work(fp->rxq) ||
-		      qede_has_tx_work(fp))) {
+		if (!((likely(fp->type & QEDE_FASTPATH_RX) &&
+		       qede_has_rx_work(fp->rxq)) ||
+		      (likely(fp->type & QEDE_FASTPATH_TX) &&
+		       qede_has_tx_work(fp)))) {
 			napi_complete(napi);
 
 			/* Update and reenable interrupts */
@@ -1708,6 +1723,8 @@
 
 	edev->ops->get_vport_stats(edev->cdev, &stats);
 	edev->stats.no_buff_discards = stats.no_buff_discards;
+	edev->stats.packet_too_big_discard = stats.packet_too_big_discard;
+	edev->stats.ttl0_discard = stats.ttl0_discard;
 	edev->stats.rx_ucast_bytes = stats.rx_ucast_bytes;
 	edev->stats.rx_mcast_bytes = stats.rx_mcast_bytes;
 	edev->stats.rx_bcast_bytes = stats.rx_bcast_bytes;
@@ -1787,9 +1804,9 @@
 	edev->stats.tx_mac_ctrl_frames = stats.tx_mac_ctrl_frames;
 }
 
-static struct rtnl_link_stats64 *qede_get_stats64(
-			    struct net_device *dev,
-			    struct rtnl_link_stats64 *stats)
+static
+struct rtnl_link_stats64 *qede_get_stats64(struct net_device *dev,
+					   struct rtnl_link_stats64 *stats)
 {
 	struct qede_dev *edev = netdev_priv(dev);
 
@@ -2103,14 +2120,13 @@
 		}
 
 		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
-			   "marked vlan %d as non-configured\n",
-			   vlan->vid);
+			   "marked vlan %d as non-configured\n", vlan->vid);
 	}
 
 	edev->accept_any_vlan = false;
 }
 
-int qede_set_features(struct net_device *dev, netdev_features_t features)
+static int qede_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct qede_dev *edev = netdev_priv(dev);
 	netdev_features_t changes = features ^ dev->features;
@@ -2146,7 +2162,7 @@
 
 		edev->vxlan_dst_port = t_port;
 
-		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d",
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d\n",
 			   t_port);
 
 		set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
@@ -2157,7 +2173,7 @@
 
 		edev->geneve_dst_port = t_port;
 
-		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d",
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d\n",
 			   t_port);
 		set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
 		break;
@@ -2181,7 +2197,7 @@
 
 		edev->vxlan_dst_port = 0;
 
-		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d",
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d\n",
 			   t_port);
 
 		set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags);
@@ -2192,7 +2208,7 @@
 
 		edev->geneve_dst_port = 0;
 
-		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d",
+		DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d\n",
 			   t_port);
 		set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags);
 		break;
@@ -2237,15 +2253,13 @@
 static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
 					    struct pci_dev *pdev,
 					    struct qed_dev_eth_info *info,
-					    u32 dp_module,
-					    u8 dp_level)
+					    u32 dp_module, u8 dp_level)
 {
 	struct net_device *ndev;
 	struct qede_dev *edev;
 
 	ndev = alloc_etherdev_mqs(sizeof(*edev),
-				  info->num_queues,
-				  info->num_queues);
+				  info->num_queues, info->num_queues);
 	if (!ndev) {
 		pr_err("etherdev allocation failed\n");
 		return NULL;
@@ -2261,6 +2275,9 @@
 	edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
 	edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
 
+	DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n",
+		info->num_queues, info->num_queues);
+
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
 	memset(&edev->stats, 0, sizeof(edev->stats));
@@ -2349,7 +2366,7 @@
 		struct qede_fastpath *fp;
 		int i;
 
-		for_each_rss(i) {
+		for_each_queue(i) {
 			fp = &edev->fp_array[i];
 
 			kfree(fp->sb_info);
@@ -2358,22 +2375,33 @@
 		}
 		kfree(edev->fp_array);
 	}
-	edev->num_rss = 0;
+
+	edev->num_queues = 0;
+	edev->fp_num_tx = 0;
+	edev->fp_num_rx = 0;
 }
 
 static int qede_alloc_fp_array(struct qede_dev *edev)
 {
+	u8 fp_combined, fp_rx = edev->fp_num_rx;
 	struct qede_fastpath *fp;
 	int i;
 
-	edev->fp_array = kcalloc(QEDE_RSS_CNT(edev),
+	edev->fp_array = kcalloc(QEDE_QUEUE_CNT(edev),
 				 sizeof(*edev->fp_array), GFP_KERNEL);
 	if (!edev->fp_array) {
 		DP_NOTICE(edev, "fp array allocation failed\n");
 		goto err;
 	}
 
-	for_each_rss(i) {
+	fp_combined = QEDE_QUEUE_CNT(edev) - fp_rx - edev->fp_num_tx;
+
+	/* Allocate the FP elements for Rx queues followed by combined and then
+	 * the Tx. This ordering should be maintained so that the respective
+	 * queues (Rx or Tx) will be together in the fastpath array and the
+	 * associated ids will be sequential.
+	 */
+	for_each_queue(i) {
 		fp = &edev->fp_array[i];
 
 		fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL);
@@ -2382,16 +2410,33 @@
 			goto err;
 		}
 
-		fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL);
-		if (!fp->rxq) {
-			DP_NOTICE(edev, "RXQ struct allocation failed\n");
-			goto err;
+		if (fp_rx) {
+			fp->type = QEDE_FASTPATH_RX;
+			fp_rx--;
+		} else if (fp_combined) {
+			fp->type = QEDE_FASTPATH_COMBINED;
+			fp_combined--;
+		} else {
+			fp->type = QEDE_FASTPATH_TX;
 		}
 
-		fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs), GFP_KERNEL);
-		if (!fp->txqs) {
-			DP_NOTICE(edev, "TXQ array allocation failed\n");
-			goto err;
+		if (fp->type & QEDE_FASTPATH_TX) {
+			fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs),
+					   GFP_KERNEL);
+			if (!fp->txqs) {
+				DP_NOTICE(edev,
+					  "TXQ array allocation failed\n");
+				goto err;
+			}
+		}
+
+		if (fp->type & QEDE_FASTPATH_RX) {
+			fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL);
+			if (!fp->rxq) {
+				DP_NOTICE(edev,
+					  "RXQ struct allocation failed\n");
+				goto err;
+			}
 		}
 	}
 
@@ -2453,7 +2498,7 @@
 			bool is_vf, enum qede_probe_mode mode)
 {
 	struct qed_probe_params probe_params;
-	struct qed_slowpath_params params;
+	struct qed_slowpath_params sp_params;
 	struct qed_dev_eth_info dev_info;
 	struct qede_dev *edev;
 	struct qed_dev *cdev;
@@ -2476,14 +2521,14 @@
 	qede_update_pf_params(cdev);
 
 	/* Start the Slowpath-process */
-	memset(&params, 0, sizeof(struct qed_slowpath_params));
-	params.int_mode = QED_INT_MODE_MSIX;
-	params.drv_major = QEDE_MAJOR_VERSION;
-	params.drv_minor = QEDE_MINOR_VERSION;
-	params.drv_rev = QEDE_REVISION_VERSION;
-	params.drv_eng = QEDE_ENGINEERING_VERSION;
-	strlcpy(params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
-	rc = qed_ops->common->slowpath_start(cdev, &params);
+	memset(&sp_params, 0, sizeof(sp_params));
+	sp_params.int_mode = QED_INT_MODE_MSIX;
+	sp_params.drv_major = QEDE_MAJOR_VERSION;
+	sp_params.drv_minor = QEDE_MINOR_VERSION;
+	sp_params.drv_rev = QEDE_REVISION_VERSION;
+	sp_params.drv_eng = QEDE_ENGINEERING_VERSION;
+	strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE);
+	rc = qed_ops->common->slowpath_start(cdev, &sp_params);
 	if (rc) {
 		pr_notice("Cannot start slowpath\n");
 		goto err1;
@@ -2506,10 +2551,14 @@
 
 	qede_init_ndev(edev);
 
+	rc = qede_roce_dev_add(edev);
+	if (rc)
+		goto err3;
+
 	rc = register_netdev(edev->ndev);
 	if (rc) {
 		DP_NOTICE(edev, "Cannot register net-device\n");
-		goto err3;
+		goto err4;
 	}
 
 	edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION);
@@ -2517,7 +2566,8 @@
 	edev->ops->register_ops(cdev, &qede_ll_ops, edev);
 
 #ifdef CONFIG_DCB
-	qede_set_dcbnl_ops(edev->ndev);
+	if (!IS_VF(edev))
+		qede_set_dcbnl_ops(edev->ndev);
 #endif
 
 	INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task);
@@ -2528,6 +2578,8 @@
 
 	return 0;
 
+err4:
+	qede_roce_dev_remove(edev);
 err3:
 	free_netdev(edev->ndev);
 err2:
@@ -2574,8 +2626,11 @@
 	DP_INFO(edev, "Starting qede_remove\n");
 
 	cancel_delayed_work_sync(&edev->sp_task);
+
 	unregister_netdev(ndev);
 
+	qede_roce_dev_remove(edev);
+
 	edev->ops->common->set_power_state(cdev, PCI_D0);
 
 	pci_set_drvdata(pdev, NULL);
@@ -2586,7 +2641,7 @@
 	qed_ops->common->slowpath_stop(cdev);
 	qed_ops->common->remove(cdev);
 
-	pr_notice("Ending successfully qede_remove\n");
+	dev_info(&pdev->dev, "Ending qede_remove successfully\n");
 }
 
 static void qede_remove(struct pci_dev *pdev)
@@ -2605,8 +2660,8 @@
 	u16 rss_num;
 
 	/* Setup queues according to possible resources*/
-	if (edev->req_rss)
-		rss_num = edev->req_rss;
+	if (edev->req_queues)
+		rss_num = edev->req_queues;
 	else
 		rss_num = netif_get_num_default_rss_queues() *
 			  edev->dev_info.common.num_hwfns;
@@ -2616,11 +2671,15 @@
 	rc = edev->ops->common->set_fp_int(edev->cdev, rss_num);
 	if (rc > 0) {
 		/* Managed to request interrupts for our queues */
-		edev->num_rss = rc;
+		edev->num_queues = rc;
 		DP_INFO(edev, "Managed %d [of %d] RSS queues\n",
-			QEDE_RSS_CNT(edev), rss_num);
+			QEDE_QUEUE_CNT(edev), rss_num);
 		rc = 0;
 	}
+
+	edev->fp_num_tx = edev->req_num_tx;
+	edev->fp_num_rx = edev->req_num_rx;
+
 	return rc;
 }
 
@@ -2634,16 +2693,14 @@
 
 /* This function allocates fast-path status block memory */
 static int qede_alloc_mem_sb(struct qede_dev *edev,
-			     struct qed_sb_info *sb_info,
-			     u16 sb_id)
+			     struct qed_sb_info *sb_info, u16 sb_id)
 {
 	struct status_block *sb_virt;
 	dma_addr_t sb_phys;
 	int rc;
 
 	sb_virt = dma_alloc_coherent(&edev->pdev->dev,
-				     sizeof(*sb_virt),
-				     &sb_phys, GFP_KERNEL);
+				     sizeof(*sb_virt), &sb_phys, GFP_KERNEL);
 	if (!sb_virt) {
 		DP_ERR(edev, "Status block allocation failed\n");
 		return -ENOMEM;
@@ -2675,16 +2732,15 @@
 		data = rx_buf->data;
 
 		dma_unmap_page(&edev->pdev->dev,
-			       rx_buf->mapping,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+			       rx_buf->mapping, PAGE_SIZE, DMA_FROM_DEVICE);
 
 		rx_buf->data = NULL;
 		__free_page(data);
 	}
 }
 
-static void qede_free_sge_mem(struct qede_dev *edev,
-			      struct qede_rx_queue *rxq) {
+static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
+{
 	int i;
 
 	if (edev->gro_disable)
@@ -2703,8 +2759,7 @@
 	}
 }
 
-static void qede_free_mem_rxq(struct qede_dev *edev,
-			      struct qede_rx_queue *rxq)
+static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
 	qede_free_sge_mem(edev, rxq);
 
@@ -2726,9 +2781,6 @@
 	struct eth_rx_bd *rx_bd;
 	dma_addr_t mapping;
 	struct page *data;
-	u16 rx_buf_size;
-
-	rx_buf_size = rxq->rx_buf_size;
 
 	data = alloc_pages(GFP_ATOMIC, 0);
 	if (unlikely(!data)) {
@@ -2763,8 +2815,7 @@
 	return 0;
 }
 
-static int qede_alloc_sge_mem(struct qede_dev *edev,
-			      struct qede_rx_queue *rxq)
+static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
 	dma_addr_t mapping;
 	int i;
@@ -2811,15 +2862,14 @@
 }
 
 /* This function allocates all memory needed per Rx queue */
-static int qede_alloc_mem_rxq(struct qede_dev *edev,
-			      struct qede_rx_queue *rxq)
+static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
 	int i, rc, size;
 
 	rxq->num_rx_buffers = edev->q_num_rx_buffers;
 
-	rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD +
-			   edev->ndev->mtu;
+	rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu;
+
 	if (rxq->rx_buf_size > PAGE_SIZE)
 		rxq->rx_buf_size = PAGE_SIZE;
 
@@ -2873,8 +2923,7 @@
 	return rc;
 }
 
-static void qede_free_mem_txq(struct qede_dev *edev,
-			      struct qede_tx_queue *txq)
+static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	/* Free the parallel SW ring */
 	kfree(txq->sw_tx_ring);
@@ -2884,8 +2933,7 @@
 }
 
 /* This function allocates all memory needed per Tx queue */
-static int qede_alloc_mem_txq(struct qede_dev *edev,
-			      struct qede_tx_queue *txq)
+static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	int size, rc;
 	union eth_tx_bd_types *p_virt;
@@ -2917,41 +2965,45 @@
 }
 
 /* This function frees all memory of a single fp */
-static void qede_free_mem_fp(struct qede_dev *edev,
-			     struct qede_fastpath *fp)
+static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
 	int tc;
 
 	qede_free_mem_sb(edev, fp->sb_info);
 
-	qede_free_mem_rxq(edev, fp->rxq);
+	if (fp->type & QEDE_FASTPATH_RX)
+		qede_free_mem_rxq(edev, fp->rxq);
 
-	for (tc = 0; tc < edev->num_tc; tc++)
-		qede_free_mem_txq(edev, &fp->txqs[tc]);
+	if (fp->type & QEDE_FASTPATH_TX)
+		for (tc = 0; tc < edev->num_tc; tc++)
+			qede_free_mem_txq(edev, &fp->txqs[tc]);
 }
 
 /* This function allocates all memory needed for a single fp (i.e. an entity
- * which contains status block, one rx queue and multiple per-TC tx queues.
+ * which contains status block, one rx queue and/or multiple per-TC tx queues.
  */
-static int qede_alloc_mem_fp(struct qede_dev *edev,
-			     struct qede_fastpath *fp)
+static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
 	int rc, tc;
 
-	rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->rss_id);
+	rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->id);
 	if (rc)
 		goto err;
 
-	rc = qede_alloc_mem_rxq(edev, fp->rxq);
-	if (rc)
-		goto err;
-
-	for (tc = 0; tc < edev->num_tc; tc++) {
-		rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]);
+	if (fp->type & QEDE_FASTPATH_RX) {
+		rc = qede_alloc_mem_rxq(edev, fp->rxq);
 		if (rc)
 			goto err;
 	}
 
+	if (fp->type & QEDE_FASTPATH_TX) {
+		for (tc = 0; tc < edev->num_tc; tc++) {
+			rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]);
+			if (rc)
+				goto err;
+		}
+	}
+
 	return 0;
 err:
 	return rc;
@@ -2961,7 +3013,7 @@
 {
 	int i;
 
-	for_each_rss(i) {
+	for_each_queue(i) {
 		struct qede_fastpath *fp = &edev->fp_array[i];
 
 		qede_free_mem_fp(edev, fp);
@@ -2971,16 +3023,16 @@
 /* This function allocates all qede memory at NIC load. */
 static int qede_alloc_mem_load(struct qede_dev *edev)
 {
-	int rc = 0, rss_id;
+	int rc = 0, queue_id;
 
-	for (rss_id = 0; rss_id < QEDE_RSS_CNT(edev); rss_id++) {
-		struct qede_fastpath *fp = &edev->fp_array[rss_id];
+	for (queue_id = 0; queue_id < QEDE_QUEUE_CNT(edev); queue_id++) {
+		struct qede_fastpath *fp = &edev->fp_array[queue_id];
 
 		rc = qede_alloc_mem_fp(edev, fp);
 		if (rc) {
 			DP_ERR(edev,
 			       "Failed to allocate memory for fastpath - rss id = %d\n",
-			       rss_id);
+			       queue_id);
 			qede_free_mem_load(edev);
 			return rc;
 		}
@@ -2992,30 +3044,38 @@
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
-	int rss_id, txq_index, tc;
+	int queue_id, rxq_index = 0, txq_index = 0, tc;
 	struct qede_fastpath *fp;
 
-	for_each_rss(rss_id) {
-		fp = &edev->fp_array[rss_id];
+	for_each_queue(queue_id) {
+		fp = &edev->fp_array[queue_id];
 
 		fp->edev = edev;
-		fp->rss_id = rss_id;
+		fp->id = queue_id;
 
 		memset((void *)&fp->napi, 0, sizeof(fp->napi));
 
 		memset((void *)fp->sb_info, 0, sizeof(*fp->sb_info));
 
-		memset((void *)fp->rxq, 0, sizeof(*fp->rxq));
-		fp->rxq->rxq_id = rss_id;
+		if (fp->type & QEDE_FASTPATH_RX) {
+			memset((void *)fp->rxq, 0, sizeof(*fp->rxq));
+			fp->rxq->rxq_id = rxq_index++;
+		}
 
-		memset((void *)fp->txqs, 0, (edev->num_tc * sizeof(*fp->txqs)));
-		for (tc = 0; tc < edev->num_tc; tc++) {
-			txq_index = tc * QEDE_RSS_CNT(edev) + rss_id;
-			fp->txqs[tc].index = txq_index;
+		if (fp->type & QEDE_FASTPATH_TX) {
+			memset((void *)fp->txqs, 0,
+			       (edev->num_tc * sizeof(*fp->txqs)));
+			for (tc = 0; tc < edev->num_tc; tc++) {
+				fp->txqs[tc].index = txq_index +
+				    tc * QEDE_TSS_COUNT(edev);
+				if (edev->dev_info.is_legacy)
+					fp->txqs[tc].is_legacy = true;
+			}
+			txq_index++;
 		}
 
 		snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
-			 edev->ndev->name, rss_id);
+			 edev->ndev->name, queue_id);
 	}
 
 	edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO);
@@ -3025,12 +3085,13 @@
 {
 	int rc = 0;
 
-	rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_CNT(edev));
+	rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_COUNT(edev));
 	if (rc) {
 		DP_NOTICE(edev, "Failed to set real number of Tx queues\n");
 		return rc;
 	}
-	rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_CNT(edev));
+
+	rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_COUNT(edev));
 	if (rc) {
 		DP_NOTICE(edev, "Failed to set real number of Rx queues\n");
 		return rc;
@@ -3043,7 +3104,7 @@
 {
 	int i;
 
-	for_each_rss(i) {
+	for_each_queue(i) {
 		napi_disable(&edev->fp_array[i].napi);
 
 		netif_napi_del(&edev->fp_array[i].napi);
@@ -3055,7 +3116,7 @@
 	int i;
 
 	/* Add NAPI objects */
-	for_each_rss(i) {
+	for_each_queue(i) {
 		netif_napi_add(edev->ndev, &edev->fp_array[i].napi,
 			       qede_poll, NAPI_POLL_WEIGHT);
 		napi_enable(&edev->fp_array[i].napi);
@@ -3084,14 +3145,14 @@
 	int i, rc;
 
 	/* Sanitize number of interrupts == number of prepared RSS queues */
-	if (QEDE_RSS_CNT(edev) > edev->int_info.msix_cnt) {
+	if (QEDE_QUEUE_CNT(edev) > edev->int_info.msix_cnt) {
 		DP_ERR(edev,
 		       "Interrupt mismatch: %d RSS queues > %d MSI-x vectors\n",
-		       QEDE_RSS_CNT(edev), edev->int_info.msix_cnt);
+		       QEDE_QUEUE_CNT(edev), edev->int_info.msix_cnt);
 		return -EINVAL;
 	}
 
-	for (i = 0; i < QEDE_RSS_CNT(edev); i++) {
+	for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) {
 		rc = request_irq(edev->int_info.msix[i].vector,
 				 qede_msix_fp_int, 0, edev->fp_array[i].name,
 				 &edev->fp_array[i]);
@@ -3136,18 +3197,17 @@
 
 		/* qed should learn receive the RSS ids and callbacks */
 		ops = edev->ops->common;
-		for (i = 0; i < QEDE_RSS_CNT(edev); i++)
+		for (i = 0; i < QEDE_QUEUE_CNT(edev); i++)
 			ops->simd_handler_config(edev->cdev,
 						 &edev->fp_array[i], i,
 						 qede_simd_fp_handler);
-		edev->int_info.used_cnt = QEDE_RSS_CNT(edev);
+		edev->int_info.used_cnt = QEDE_QUEUE_CNT(edev);
 	}
 	return 0;
 }
 
 static int qede_drain_txq(struct qede_dev *edev,
-			  struct qede_tx_queue *txq,
-			  bool allow_drain)
+			  struct qede_tx_queue *txq, bool allow_drain)
 {
 	int rc, cnt = 1000;
 
@@ -3199,45 +3259,53 @@
 	}
 
 	/* Flush Tx queues. If needed, request drain from MCP */
-	for_each_rss(i) {
+	for_each_queue(i) {
 		struct qede_fastpath *fp = &edev->fp_array[i];
 
-		for (tc = 0; tc < edev->num_tc; tc++) {
-			struct qede_tx_queue *txq = &fp->txqs[tc];
+		if (fp->type & QEDE_FASTPATH_TX) {
+			for (tc = 0; tc < edev->num_tc; tc++) {
+				struct qede_tx_queue *txq = &fp->txqs[tc];
 
-			rc = qede_drain_txq(edev, txq, true);
-			if (rc)
-				return rc;
+				rc = qede_drain_txq(edev, txq, true);
+				if (rc)
+					return rc;
+			}
 		}
 	}
 
-	/* Stop all Queues in reverse order*/
-	for (i = QEDE_RSS_CNT(edev) - 1; i >= 0; i--) {
+	/* Stop all Queues in reverse order */
+	for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) {
 		struct qed_stop_rxq_params rx_params;
 
-		/* Stop the Tx Queue(s)*/
-		for (tc = 0; tc < edev->num_tc; tc++) {
-			struct qed_stop_txq_params tx_params;
+		/* Stop the Tx Queue(s) */
+		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
+			for (tc = 0; tc < edev->num_tc; tc++) {
+				struct qed_stop_txq_params tx_params;
+				u8 val;
 
-			tx_params.rss_id = i;
-			tx_params.tx_queue_id = tc * QEDE_RSS_CNT(edev) + i;
-			rc = edev->ops->q_tx_stop(cdev, &tx_params);
-			if (rc) {
-				DP_ERR(edev, "Failed to stop TXQ #%d\n",
-				       tx_params.tx_queue_id);
-				return rc;
+				tx_params.rss_id = i;
+				val = edev->fp_array[i].txqs[tc].index;
+				tx_params.tx_queue_id = val;
+				rc = edev->ops->q_tx_stop(cdev, &tx_params);
+				if (rc) {
+					DP_ERR(edev, "Failed to stop TXQ #%d\n",
+					       tx_params.tx_queue_id);
+					return rc;
+				}
 			}
 		}
 
-		/* Stop the Rx Queue*/
-		memset(&rx_params, 0, sizeof(rx_params));
-		rx_params.rss_id = i;
-		rx_params.rx_queue_id = i;
+		/* Stop the Rx Queue */
+		if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
+			memset(&rx_params, 0, sizeof(rx_params));
+			rx_params.rss_id = i;
+			rx_params.rx_queue_id = edev->fp_array[i].rxq->rxq_id;
 
-		rc = edev->ops->q_rx_stop(cdev, &rx_params);
-		if (rc) {
-			DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
-			return rc;
+			rc = edev->ops->q_rx_stop(cdev, &rx_params);
+			if (rc) {
+				DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
+				return rc;
+			}
 		}
 	}
 
@@ -3260,7 +3328,7 @@
 	struct qed_start_vport_params start = {0};
 	bool reset_rss_indir = false;
 
-	if (!edev->num_rss) {
+	if (!edev->num_queues) {
 		DP_ERR(edev,
 		       "Cannot update V-VPORT as active as there are no Rx queues\n");
 		return -EINVAL;
@@ -3284,50 +3352,66 @@
 		   "Start vport ramrod passed, vport_id = %d, MTU = %d, vlan_removal_en = %d\n",
 		   start.vport_id, edev->ndev->mtu + 0xe, vlan_removal_en);
 
-	for_each_rss(i) {
+	for_each_queue(i) {
 		struct qede_fastpath *fp = &edev->fp_array[i];
-		dma_addr_t phys_table = fp->rxq->rx_comp_ring.pbl.p_phys_table;
+		dma_addr_t p_phys_table;
+		u32 page_cnt;
 
-		memset(&q_params, 0, sizeof(q_params));
-		q_params.rss_id = i;
-		q_params.queue_id = i;
-		q_params.vport_id = 0;
-		q_params.sb = fp->sb_info->igu_sb_id;
-		q_params.sb_idx = RX_PI;
-
-		rc = edev->ops->q_rx_start(cdev, &q_params,
-					   fp->rxq->rx_buf_size,
-					   fp->rxq->rx_bd_ring.p_phys_addr,
-					   phys_table,
-					   fp->rxq->rx_comp_ring.page_cnt,
-					   &fp->rxq->hw_rxq_prod_addr);
-		if (rc) {
-			DP_ERR(edev, "Start RXQ #%d failed %d\n", i, rc);
-			return rc;
-		}
-
-		fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI];
-
-		qede_update_rx_prod(edev, fp->rxq);
-
-		for (tc = 0; tc < edev->num_tc; tc++) {
-			struct qede_tx_queue *txq = &fp->txqs[tc];
-			int txq_index = tc * QEDE_RSS_CNT(edev) + i;
+		if (fp->type & QEDE_FASTPATH_RX) {
+			struct qede_rx_queue *rxq = fp->rxq;
+			__le16 *val;
 
 			memset(&q_params, 0, sizeof(q_params));
 			q_params.rss_id = i;
-			q_params.queue_id = txq_index;
+			q_params.queue_id = rxq->rxq_id;
+			q_params.vport_id = 0;
+			q_params.sb = fp->sb_info->igu_sb_id;
+			q_params.sb_idx = RX_PI;
+
+			p_phys_table =
+			    qed_chain_get_pbl_phys(&rxq->rx_comp_ring);
+			page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring);
+
+			rc = edev->ops->q_rx_start(cdev, &q_params,
+						   rxq->rx_buf_size,
+						   rxq->rx_bd_ring.p_phys_addr,
+						   p_phys_table,
+						   page_cnt,
+						   &rxq->hw_rxq_prod_addr);
+			if (rc) {
+				DP_ERR(edev, "Start RXQ #%d failed %d\n", i,
+				       rc);
+				return rc;
+			}
+
+			val = &fp->sb_info->sb_virt->pi_array[RX_PI];
+			rxq->hw_cons_ptr = val;
+
+			qede_update_rx_prod(edev, rxq);
+		}
+
+		if (!(fp->type & QEDE_FASTPATH_TX))
+			continue;
+
+		for (tc = 0; tc < edev->num_tc; tc++) {
+			struct qede_tx_queue *txq = &fp->txqs[tc];
+
+			p_phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
+			page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
+
+			memset(&q_params, 0, sizeof(q_params));
+			q_params.rss_id = i;
+			q_params.queue_id = txq->index;
 			q_params.vport_id = 0;
 			q_params.sb = fp->sb_info->igu_sb_id;
 			q_params.sb_idx = TX_PI(tc);
 
 			rc = edev->ops->q_tx_start(cdev, &q_params,
-						   txq->tx_pbl.pbl.p_phys_table,
-						   txq->tx_pbl.page_cnt,
+						   p_phys_table, page_cnt,
 						   &txq->doorbell_addr);
 			if (rc) {
 				DP_ERR(edev, "Start TXQ #%d failed %d\n",
-				       txq_index, rc);
+				       txq->index, rc);
 				return rc;
 			}
 
@@ -3358,13 +3442,13 @@
 	}
 
 	/* Fill struct with RSS params */
-	if (QEDE_RSS_CNT(edev) > 1) {
+	if (QEDE_RSS_COUNT(edev) > 1) {
 		vport_update_params.update_rss_flg = 1;
 
 		/* Need to validate current RSS config uses valid entries */
 		for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) {
 			if (edev->rss_params.rss_ind_table[i] >=
-			    edev->num_rss) {
+			    QEDE_RSS_COUNT(edev)) {
 				reset_rss_indir = true;
 				break;
 			}
@@ -3377,7 +3461,7 @@
 			for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) {
 				u16 indir_val;
 
-				val = QEDE_RSS_CNT(edev);
+				val = QEDE_RSS_COUNT(edev);
 				indir_val = ethtool_rxfh_indir_default(i, val);
 				edev->rss_params.rss_ind_table[i] = indir_val;
 			}
@@ -3443,6 +3527,7 @@
 
 	DP_INFO(edev, "Starting qede unload\n");
 
+	qede_roce_dev_event_close(edev);
 	mutex_lock(&edev->qede_lock);
 	edev->state = QEDE_STATE_CLOSED;
 
@@ -3506,7 +3591,7 @@
 	if (rc)
 		goto err1;
 	DP_INFO(edev, "Allocated %d RSS queues on %d TC/s\n",
-		QEDE_RSS_CNT(edev), edev->num_tc);
+		QEDE_QUEUE_CNT(edev), edev->num_tc);
 
 	rc = qede_set_real_num_queues(edev);
 	if (rc)
@@ -3543,6 +3628,7 @@
 	/* Query whether link is already-up */
 	memset(&link_output, 0, sizeof(link_output));
 	edev->ops->common->get_link(edev->cdev, &link_output);
+	qede_roce_dev_event_open(edev);
 	qede_link_update(edev, &link_output);
 
 	DP_INFO(edev, "Ending successfully qede load\n");
@@ -3559,7 +3645,9 @@
 err1:
 	edev->ops->common->set_fp_int(edev->cdev, 0);
 	qede_free_fp_array(edev);
-	edev->num_rss = 0;
+	edev->num_queues = 0;
+	edev->fp_num_tx = 0;
+	edev->fp_num_rx = 0;
 err0:
 	return rc;
 }
diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c
new file mode 100644
index 0000000..9867f96
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qede/qede_roce.c
@@ -0,0 +1,314 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/qed/qede_roce.h>
+#include "qede.h"
+
+static struct qedr_driver *qedr_drv;
+static LIST_HEAD(qedr_dev_list);
+static DEFINE_MUTEX(qedr_dev_list_lock);
+
+bool qede_roce_supported(struct qede_dev *dev)
+{
+	return dev->dev_info.common.rdma_supported;
+}
+
+static void _qede_roce_dev_add(struct qede_dev *edev)
+{
+	if (!qedr_drv)
+		return;
+
+	edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
+						 edev->ndev);
+}
+
+static int qede_roce_create_wq(struct qede_dev *edev)
+{
+	INIT_LIST_HEAD(&edev->rdma_info.roce_event_list);
+	edev->rdma_info.roce_wq = create_singlethread_workqueue("roce_wq");
+	if (!edev->rdma_info.roce_wq) {
+		DP_NOTICE(edev, "qedr: Could not create workqueue\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void qede_roce_cleanup_event(struct qede_dev *edev)
+{
+	struct list_head *head = &edev->rdma_info.roce_event_list;
+	struct qede_roce_event_work *event_node;
+
+	flush_workqueue(edev->rdma_info.roce_wq);
+	while (!list_empty(head)) {
+		event_node = list_entry(head->next, struct qede_roce_event_work,
+					list);
+		cancel_work_sync(&event_node->work);
+		list_del(&event_node->list);
+		kfree(event_node);
+	}
+}
+
+static void qede_roce_destroy_wq(struct qede_dev *edev)
+{
+	qede_roce_cleanup_event(edev);
+	destroy_workqueue(edev->rdma_info.roce_wq);
+}
+
+int qede_roce_dev_add(struct qede_dev *edev)
+{
+	int rc = 0;
+
+	if (qede_roce_supported(edev)) {
+		rc = qede_roce_create_wq(edev);
+		if (rc)
+			return rc;
+
+		INIT_LIST_HEAD(&edev->rdma_info.entry);
+		mutex_lock(&qedr_dev_list_lock);
+		list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
+		_qede_roce_dev_add(edev);
+		mutex_unlock(&qedr_dev_list_lock);
+	}
+
+	return rc;
+}
+
+static void _qede_roce_dev_remove(struct qede_dev *edev)
+{
+	if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
+		qedr_drv->remove(edev->rdma_info.qedr_dev);
+	edev->rdma_info.qedr_dev = NULL;
+}
+
+void qede_roce_dev_remove(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	qede_roce_destroy_wq(edev);
+	mutex_lock(&qedr_dev_list_lock);
+	_qede_roce_dev_remove(edev);
+	list_del(&edev->rdma_info.entry);
+	mutex_unlock(&qedr_dev_list_lock);
+}
+
+static void _qede_roce_dev_open(struct qede_dev *edev)
+{
+	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_UP);
+}
+
+static void qede_roce_dev_open(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	mutex_lock(&qedr_dev_list_lock);
+	_qede_roce_dev_open(edev);
+	mutex_unlock(&qedr_dev_list_lock);
+}
+
+static void _qede_roce_dev_close(struct qede_dev *edev)
+{
+	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_DOWN);
+}
+
+static void qede_roce_dev_close(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	mutex_lock(&qedr_dev_list_lock);
+	_qede_roce_dev_close(edev);
+	mutex_unlock(&qedr_dev_list_lock);
+}
+
+static void qede_roce_dev_shutdown(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	mutex_lock(&qedr_dev_list_lock);
+	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CLOSE);
+	mutex_unlock(&qedr_dev_list_lock);
+}
+
+int qede_roce_register_driver(struct qedr_driver *drv)
+{
+	struct qede_dev *edev;
+	u8 qedr_counter = 0;
+
+	mutex_lock(&qedr_dev_list_lock);
+	if (qedr_drv) {
+		mutex_unlock(&qedr_dev_list_lock);
+		return -EINVAL;
+	}
+	qedr_drv = drv;
+
+	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
+		struct net_device *ndev;
+
+		qedr_counter++;
+		_qede_roce_dev_add(edev);
+		ndev = edev->ndev;
+		if (netif_running(ndev) && netif_oper_up(ndev))
+			_qede_roce_dev_open(edev);
+	}
+	mutex_unlock(&qedr_dev_list_lock);
+
+	DP_INFO(edev, "qedr: discovered and registered %d RoCE funcs\n",
+		qedr_counter);
+
+	return 0;
+}
+EXPORT_SYMBOL(qede_roce_register_driver);
+
+void qede_roce_unregister_driver(struct qedr_driver *drv)
+{
+	struct qede_dev *edev;
+
+	mutex_lock(&qedr_dev_list_lock);
+	list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
+		if (edev->rdma_info.qedr_dev)
+			_qede_roce_dev_remove(edev);
+	}
+	qedr_drv = NULL;
+	mutex_unlock(&qedr_dev_list_lock);
+}
+EXPORT_SYMBOL(qede_roce_unregister_driver);
+
+static void qede_roce_changeaddr(struct qede_dev *edev)
+{
+	if (!qede_roce_supported(edev))
+		return;
+
+	if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+		qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR);
+}
+
+struct qede_roce_event_work *qede_roce_get_free_event_node(struct qede_dev
+							   *edev)
+{
+	struct qede_roce_event_work *event_node = NULL;
+	struct list_head *list_node = NULL;
+	bool found = false;
+
+	list_for_each(list_node, &edev->rdma_info.roce_event_list) {
+		event_node = list_entry(list_node, struct qede_roce_event_work,
+					list);
+		if (!work_pending(&event_node->work)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		event_node = kzalloc(sizeof(*event_node), GFP_KERNEL);
+		if (!event_node) {
+			DP_NOTICE(edev,
+				  "qedr: Could not allocate memory for roce work\n");
+			return NULL;
+		}
+		list_add_tail(&event_node->list,
+			      &edev->rdma_info.roce_event_list);
+	}
+
+	return event_node;
+}
+
+static void qede_roce_handle_event(struct work_struct *work)
+{
+	struct qede_roce_event_work *event_node;
+	enum qede_roce_event event;
+	struct qede_dev *edev;
+
+	event_node = container_of(work, struct qede_roce_event_work, work);
+	event = event_node->event;
+	edev = event_node->ptr;
+
+	switch (event) {
+	case QEDE_UP:
+		qede_roce_dev_open(edev);
+		break;
+	case QEDE_DOWN:
+		qede_roce_dev_close(edev);
+		break;
+	case QEDE_CLOSE:
+		qede_roce_dev_shutdown(edev);
+		break;
+	case QEDE_CHANGE_ADDR:
+		qede_roce_changeaddr(edev);
+		break;
+	default:
+		DP_NOTICE(edev, "Invalid roce event %d", event);
+	}
+}
+
+static void qede_roce_add_event(struct qede_dev *edev,
+				enum qede_roce_event event)
+{
+	struct qede_roce_event_work *event_node;
+
+	if (!edev->rdma_info.qedr_dev)
+		return;
+
+	event_node = qede_roce_get_free_event_node(edev);
+	if (!event_node)
+		return;
+
+	event_node->event = event;
+	event_node->ptr = edev;
+
+	INIT_WORK(&event_node->work, qede_roce_handle_event);
+	queue_work(edev->rdma_info.roce_wq, &event_node->work);
+}
+
+void qede_roce_dev_event_open(struct qede_dev *edev)
+{
+	qede_roce_add_event(edev, QEDE_UP);
+}
+
+void qede_roce_dev_event_close(struct qede_dev *edev)
+{
+	qede_roce_add_event(edev, QEDE_DOWN);
+}
+
+void qede_roce_event_changeaddr(struct qede_dev *edev)
+{
+	qede_roce_add_event(edev, QEDE_CHANGE_ADDR);
+}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index fd973f4..49bad00 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -37,8 +37,8 @@
 
 #define _QLCNIC_LINUX_MAJOR 5
 #define _QLCNIC_LINUX_MINOR 3
-#define _QLCNIC_LINUX_SUBVERSION 64
-#define QLCNIC_LINUX_VERSIONID  "5.3.64"
+#define _QLCNIC_LINUX_SUBVERSION 65
+#define QLCNIC_LINUX_VERSIONID  "5.3.65"
 #define QLCNIC_DRV_IDC_VER  0x01
 #define QLCNIC_DRIVER_VERSION  ((_QLCNIC_LINUX_MAJOR << 16) |\
 		 (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION))
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 87c642d..fedd736 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -102,7 +102,6 @@
 #define QLCNIC_RESPONSE_DESC	0x05
 #define QLCNIC_LRO_DESC  	0x12
 
-#define QLCNIC_TX_POLL_BUDGET		128
 #define QLCNIC_TCP_HDR_SIZE		20
 #define QLCNIC_TCP_TS_OPTION_SIZE	12
 #define QLCNIC_FETCH_RING_ID(handle)	((handle) >> 63)
@@ -2008,7 +2007,6 @@
 	struct qlcnic_host_tx_ring *tx_ring;
 	struct qlcnic_adapter *adapter;
 
-	budget = QLCNIC_TX_POLL_BUDGET;
 	tx_ring = container_of(napi, struct qlcnic_host_tx_ring, napi);
 	adapter = tx_ring->adapter;
 	work_done = qlcnic_process_cmd_ring(adapter, tx_ring, budget);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 3ebef27..3ae3968 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -432,18 +432,19 @@
 
 static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb,
 			struct net_device *netdev,
-			struct net_device *filter_dev, int idx)
+			struct net_device *filter_dev, int *idx)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
+	int err = 0;
 
 	if (!adapter->fdb_mac_learn)
 		return ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx);
 
 	if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
 	    qlcnic_sriov_check(adapter))
-		idx = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx);
+		err = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx);
 
-	return idx;
+	return err;
 }
 
 static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
index 017d8c2c..5f32765 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
@@ -156,10 +156,8 @@
 	spinlock_t			vlan_list_lock;	/* Lock for VLAN list */
 };
 
-struct qlcnic_async_work_list {
+struct qlcnic_async_cmd {
 	struct list_head	list;
-	struct work_struct	work;
-	void			*ptr;
 	struct qlcnic_cmd_args	*cmd;
 };
 
@@ -168,7 +166,10 @@
 	struct workqueue_struct *bc_trans_wq;
 	struct workqueue_struct *bc_async_wq;
 	struct workqueue_struct *bc_flr_wq;
-	struct list_head	async_list;
+	struct qlcnic_adapter	*adapter;
+	struct list_head	async_cmd_list;
+	struct work_struct	vf_async_work;
+	spinlock_t		queue_lock; /* async_cmd_list queue lock */
 };
 
 struct qlcnic_sriov {
@@ -237,7 +238,7 @@
 int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int);
 int qlcnic_sriov_get_vf_config(struct net_device *, int ,
 			       struct ifla_vf_info *);
-int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8);
+int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8, __be16);
 int qlcnic_sriov_set_vf_spoofchk(struct net_device *, int, bool);
 #else
 static inline void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) {}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 7327b72..d710705 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -29,6 +29,7 @@
 #define QLC_83XX_VF_RESET_FAIL_THRESH	8
 #define QLC_BC_CMD_MAX_RETRY_CNT	5
 
+static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work);
 static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *);
 static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *, u32);
 static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *);
@@ -177,7 +178,10 @@
 	}
 
 	bc->bc_async_wq =  wq;
-	INIT_LIST_HEAD(&bc->async_list);
+	INIT_LIST_HEAD(&bc->async_cmd_list);
+	INIT_WORK(&bc->vf_async_work, qlcnic_sriov_handle_async_issue_cmd);
+	spin_lock_init(&bc->queue_lock);
+	bc->adapter = adapter;
 
 	for (i = 0; i < num_vfs; i++) {
 		vf = &sriov->vf_info[i];
@@ -1517,17 +1521,21 @@
 
 void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *bc)
 {
-	struct list_head *head = &bc->async_list;
-	struct qlcnic_async_work_list *entry;
+	struct list_head *head = &bc->async_cmd_list;
+	struct qlcnic_async_cmd *entry;
 
 	flush_workqueue(bc->bc_async_wq);
+	cancel_work_sync(&bc->vf_async_work);
+
+	spin_lock(&bc->queue_lock);
 	while (!list_empty(head)) {
-		entry = list_entry(head->next, struct qlcnic_async_work_list,
+		entry = list_entry(head->next, struct qlcnic_async_cmd,
 				   list);
-		cancel_work_sync(&entry->work);
 		list_del(&entry->list);
+		kfree(entry->cmd);
 		kfree(entry);
 	}
+	spin_unlock(&bc->queue_lock);
 }
 
 void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
@@ -1587,57 +1595,64 @@
 
 static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work)
 {
-	struct qlcnic_async_work_list *entry;
-	struct qlcnic_adapter *adapter;
+	struct qlcnic_async_cmd *entry, *tmp;
+	struct qlcnic_back_channel *bc;
 	struct qlcnic_cmd_args *cmd;
+	struct list_head *head;
+	LIST_HEAD(del_list);
 
-	entry = container_of(work, struct qlcnic_async_work_list, work);
-	adapter = entry->ptr;
-	cmd = entry->cmd;
-	__qlcnic_sriov_issue_cmd(adapter, cmd);
+	bc = container_of(work, struct qlcnic_back_channel, vf_async_work);
+	head = &bc->async_cmd_list;
+
+	spin_lock(&bc->queue_lock);
+	list_splice_init(head, &del_list);
+	spin_unlock(&bc->queue_lock);
+
+	list_for_each_entry_safe(entry, tmp, &del_list, list) {
+		list_del(&entry->list);
+		cmd = entry->cmd;
+		__qlcnic_sriov_issue_cmd(bc->adapter, cmd);
+		kfree(entry);
+	}
+
+	if (!list_empty(head))
+		queue_work(bc->bc_async_wq, &bc->vf_async_work);
+
 	return;
 }
 
-static struct qlcnic_async_work_list *
-qlcnic_sriov_get_free_node_async_work(struct qlcnic_back_channel *bc)
+static struct qlcnic_async_cmd *
+qlcnic_sriov_alloc_async_cmd(struct qlcnic_back_channel *bc,
+			     struct qlcnic_cmd_args *cmd)
 {
-	struct list_head *node;
-	struct qlcnic_async_work_list *entry = NULL;
-	u8 empty = 0;
+	struct qlcnic_async_cmd *entry = NULL;
 
-	list_for_each(node, &bc->async_list) {
-		entry = list_entry(node, struct qlcnic_async_work_list, list);
-		if (!work_pending(&entry->work)) {
-			empty = 1;
-			break;
-		}
-	}
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return NULL;
 
-	if (!empty) {
-		entry = kzalloc(sizeof(struct qlcnic_async_work_list),
-				GFP_ATOMIC);
-		if (entry == NULL)
-			return NULL;
-		list_add_tail(&entry->list, &bc->async_list);
-	}
+	entry->cmd = cmd;
+
+	spin_lock(&bc->queue_lock);
+	list_add_tail(&entry->list, &bc->async_cmd_list);
+	spin_unlock(&bc->queue_lock);
 
 	return entry;
 }
 
 static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc,
-					    work_func_t func, void *data,
 					    struct qlcnic_cmd_args *cmd)
 {
-	struct qlcnic_async_work_list *entry = NULL;
+	struct qlcnic_async_cmd *entry = NULL;
 
-	entry = qlcnic_sriov_get_free_node_async_work(bc);
-	if (!entry)
+	entry = qlcnic_sriov_alloc_async_cmd(bc, cmd);
+	if (!entry) {
+		qlcnic_free_mbx_args(cmd);
+		kfree(cmd);
 		return;
+	}
 
-	entry->ptr = data;
-	entry->cmd = cmd;
-	INIT_WORK(&entry->work, func);
-	queue_work(bc->bc_async_wq, &entry->work);
+	queue_work(bc->bc_async_wq, &bc->vf_async_work);
 }
 
 static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter,
@@ -1649,8 +1664,8 @@
 	if (adapter->need_fw_reset)
 		return -EIO;
 
-	qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd,
-					adapter, cmd);
+	qlcnic_sriov_schedule_async_cmd(bc, cmd);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index afd687e..50eaafa 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -1915,7 +1915,7 @@
 }
 
 int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf,
-			     u16 vlan, u8 qos)
+			     u16 vlan, u8 qos, __be16 vlan_proto)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_sriov *sriov = adapter->ahw->sriov;
@@ -1928,6 +1928,9 @@
 	if (vf >= sriov->num_vfs || qos > 7)
 		return -EINVAL;
 
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
 	if (vlan > MAX_VLAN_ID) {
 		netdev_err(netdev,
 			   "Invalid VLAN ID, allowed range is [0 - %d]\n",
diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig
index a76e380..9ba568d 100644
--- a/drivers/net/ethernet/qualcomm/Kconfig
+++ b/drivers/net/ethernet/qualcomm/Kconfig
@@ -24,4 +24,16 @@
 	  To compile this driver as a module, choose M here. The module
 	  will be called qcaspi.
 
+config QCOM_EMAC
+	tristate "Qualcomm Technologies, Inc. EMAC Gigabit Ethernet support"
+	select CRC32
+	select PHYLIB
+	---help---
+	  This driver supports the Qualcomm Technologies, Inc. Gigabit
+	  Ethernet Media Access Controller (EMAC). The controller
+	  supports IEEE 802.3-2002, half-duplex mode at 10/100 Mb/s,
+	  full-duplex mode at 10/100/1000Mb/s, Wake On LAN (WOL) for
+	  low power, Receive-Side Scaling (RSS), and IEEE 1588-2008
+	  Precision Clock Synchronization Protocol.
+
 endif # NET_VENDOR_QUALCOMM
diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile
index 9da2d75..aacb0a5 100644
--- a/drivers/net/ethernet/qualcomm/Makefile
+++ b/drivers/net/ethernet/qualcomm/Makefile
@@ -4,3 +4,5 @@
 
 obj-$(CONFIG_QCA7000) += qcaspi.o
 qcaspi-objs := qca_spi.o qca_framing.o qca_7k.o qca_debug.o
+
+obj-y += emac/
diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile
new file mode 100644
index 0000000..01ee144
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Qualcomm Technologies, Inc. EMAC Gigabit Ethernet driver
+#
+
+obj-$(CONFIG_QCOM_EMAC) += qcom-emac.o
+
+qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
new file mode 100644
index 0000000..e97968e
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -0,0 +1,1528 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. EMAC Ethernet Controller MAC layer support
+ */
+
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/crc32.h>
+#include <linux/if_vlan.h>
+#include <linux/jiffies.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <net/ip6_checksum.h>
+#include "emac.h"
+#include "emac-sgmii.h"
+
+/* EMAC base register offsets */
+#define EMAC_MAC_CTRL			0x001480
+#define EMAC_WOL_CTRL0			0x0014a0
+#define EMAC_RSS_KEY0			0x0014b0
+#define EMAC_H1TPD_BASE_ADDR_LO		0x0014e0
+#define EMAC_H2TPD_BASE_ADDR_LO		0x0014e4
+#define EMAC_H3TPD_BASE_ADDR_LO		0x0014e8
+#define EMAC_INTER_SRAM_PART9		0x001534
+#define EMAC_DESC_CTRL_0		0x001540
+#define EMAC_DESC_CTRL_1		0x001544
+#define EMAC_DESC_CTRL_2		0x001550
+#define EMAC_DESC_CTRL_10		0x001554
+#define EMAC_DESC_CTRL_12		0x001558
+#define EMAC_DESC_CTRL_13		0x00155c
+#define EMAC_DESC_CTRL_3		0x001560
+#define EMAC_DESC_CTRL_4		0x001564
+#define EMAC_DESC_CTRL_5		0x001568
+#define EMAC_DESC_CTRL_14		0x00156c
+#define EMAC_DESC_CTRL_15		0x001570
+#define EMAC_DESC_CTRL_16		0x001574
+#define EMAC_DESC_CTRL_6		0x001578
+#define EMAC_DESC_CTRL_8		0x001580
+#define EMAC_DESC_CTRL_9		0x001584
+#define EMAC_DESC_CTRL_11		0x001588
+#define EMAC_TXQ_CTRL_0			0x001590
+#define EMAC_TXQ_CTRL_1			0x001594
+#define EMAC_TXQ_CTRL_2			0x001598
+#define EMAC_RXQ_CTRL_0			0x0015a0
+#define EMAC_RXQ_CTRL_1			0x0015a4
+#define EMAC_RXQ_CTRL_2			0x0015a8
+#define EMAC_RXQ_CTRL_3			0x0015ac
+#define EMAC_BASE_CPU_NUMBER		0x0015b8
+#define EMAC_DMA_CTRL			0x0015c0
+#define EMAC_MAILBOX_0			0x0015e0
+#define EMAC_MAILBOX_5			0x0015e4
+#define EMAC_MAILBOX_6			0x0015e8
+#define EMAC_MAILBOX_13			0x0015ec
+#define EMAC_MAILBOX_2			0x0015f4
+#define EMAC_MAILBOX_3			0x0015f8
+#define EMAC_MAILBOX_11			0x00160c
+#define EMAC_AXI_MAST_CTRL		0x001610
+#define EMAC_MAILBOX_12			0x001614
+#define EMAC_MAILBOX_9			0x001618
+#define EMAC_MAILBOX_10			0x00161c
+#define EMAC_ATHR_HEADER_CTRL		0x001620
+#define EMAC_CLK_GATE_CTRL		0x001814
+#define EMAC_MISC_CTRL			0x001990
+#define EMAC_MAILBOX_7			0x0019e0
+#define EMAC_MAILBOX_8			0x0019e4
+#define EMAC_MAILBOX_15			0x001bd4
+#define EMAC_MAILBOX_16			0x001bd8
+
+/* EMAC_MAC_CTRL */
+#define SINGLE_PAUSE_MODE       	0x10000000
+#define DEBUG_MODE                      0x08000000
+#define BROAD_EN                        0x04000000
+#define MULTI_ALL                       0x02000000
+#define RX_CHKSUM_EN                    0x01000000
+#define HUGE                            0x00800000
+#define SPEED(x)			(((x) & 0x3) << 20)
+#define SPEED_MASK			SPEED(0x3)
+#define SIMR                            0x00080000
+#define TPAUSE                          0x00010000
+#define PROM_MODE                       0x00008000
+#define VLAN_STRIP                      0x00004000
+#define PRLEN_BMSK                      0x00003c00
+#define PRLEN_SHFT                      10
+#define HUGEN                           0x00000200
+#define FLCHK                           0x00000100
+#define PCRCE                           0x00000080
+#define CRCE                            0x00000040
+#define FULLD                           0x00000020
+#define MAC_LP_EN                       0x00000010
+#define RXFC                            0x00000008
+#define TXFC                            0x00000004
+#define RXEN                            0x00000002
+#define TXEN                            0x00000001
+
+
+/* EMAC_WOL_CTRL0 */
+#define LK_CHG_PME			0x20
+#define LK_CHG_EN			0x10
+#define MG_FRAME_PME			0x8
+#define MG_FRAME_EN			0x4
+#define WK_FRAME_EN			0x1
+
+/* EMAC_DESC_CTRL_3 */
+#define RFD_RING_SIZE_BMSK                                       0xfff
+
+/* EMAC_DESC_CTRL_4 */
+#define RX_BUFFER_SIZE_BMSK                                     0xffff
+
+/* EMAC_DESC_CTRL_6 */
+#define RRD_RING_SIZE_BMSK                                       0xfff
+
+/* EMAC_DESC_CTRL_9 */
+#define TPD_RING_SIZE_BMSK                                      0xffff
+
+/* EMAC_TXQ_CTRL_0 */
+#define NUM_TXF_BURST_PREF_BMSK                             0xffff0000
+#define NUM_TXF_BURST_PREF_SHFT                                     16
+#define LS_8023_SP                                                0x80
+#define TXQ_MODE                                                  0x40
+#define TXQ_EN                                                    0x20
+#define IP_OP_SP                                                  0x10
+#define NUM_TPD_BURST_PREF_BMSK                                    0xf
+#define NUM_TPD_BURST_PREF_SHFT                                      0
+
+/* EMAC_TXQ_CTRL_1 */
+#define JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK                        0x7ff
+
+/* EMAC_TXQ_CTRL_2 */
+#define TXF_HWM_BMSK                                         0xfff0000
+#define TXF_LWM_BMSK                                             0xfff
+
+/* EMAC_RXQ_CTRL_0 */
+#define RXQ_EN                                                 BIT(31)
+#define CUT_THRU_EN                                            BIT(30)
+#define RSS_HASH_EN                                            BIT(29)
+#define NUM_RFD_BURST_PREF_BMSK                              0x3f00000
+#define NUM_RFD_BURST_PREF_SHFT                                     20
+#define IDT_TABLE_SIZE_BMSK                                    0x1ff00
+#define IDT_TABLE_SIZE_SHFT                                          8
+#define SP_IPV6                                                   0x80
+
+/* EMAC_RXQ_CTRL_1 */
+#define JUMBO_1KAH_BMSK                                         0xf000
+#define JUMBO_1KAH_SHFT                                             12
+#define RFD_PREF_LOW_TH                                           0x10
+#define RFD_PREF_LOW_THRESHOLD_BMSK                              0xfc0
+#define RFD_PREF_LOW_THRESHOLD_SHFT                                  6
+#define RFD_PREF_UP_TH                                            0x10
+#define RFD_PREF_UP_THRESHOLD_BMSK                                0x3f
+#define RFD_PREF_UP_THRESHOLD_SHFT                                   0
+
+/* EMAC_RXQ_CTRL_2 */
+#define RXF_DOF_THRESFHOLD                                       0x1a0
+#define RXF_DOF_THRESHOLD_BMSK                               0xfff0000
+#define RXF_DOF_THRESHOLD_SHFT                                      16
+#define RXF_UOF_THRESFHOLD                                        0xbe
+#define RXF_UOF_THRESHOLD_BMSK                                   0xfff
+#define RXF_UOF_THRESHOLD_SHFT                                       0
+
+/* EMAC_RXQ_CTRL_3 */
+#define RXD_TIMER_BMSK                                      0xffff0000
+#define RXD_THRESHOLD_BMSK                                       0xfff
+#define RXD_THRESHOLD_SHFT                                           0
+
+/* EMAC_DMA_CTRL */
+#define DMAW_DLY_CNT_BMSK                                      0xf0000
+#define DMAW_DLY_CNT_SHFT                                           16
+#define DMAR_DLY_CNT_BMSK                                       0xf800
+#define DMAR_DLY_CNT_SHFT                                           11
+#define DMAR_REQ_PRI                                             0x400
+#define REGWRBLEN_BMSK                                           0x380
+#define REGWRBLEN_SHFT                                               7
+#define REGRDBLEN_BMSK                                            0x70
+#define REGRDBLEN_SHFT                                               4
+#define OUT_ORDER_MODE                                             0x4
+#define ENH_ORDER_MODE                                             0x2
+#define IN_ORDER_MODE                                              0x1
+
+/* EMAC_MAILBOX_13 */
+#define RFD3_PROC_IDX_BMSK                                   0xfff0000
+#define RFD3_PROC_IDX_SHFT                                          16
+#define RFD3_PROD_IDX_BMSK                                       0xfff
+#define RFD3_PROD_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_2 */
+#define NTPD_CONS_IDX_BMSK                                  0xffff0000
+#define NTPD_CONS_IDX_SHFT                                          16
+
+/* EMAC_MAILBOX_3 */
+#define RFD0_CONS_IDX_BMSK                                       0xfff
+#define RFD0_CONS_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_11 */
+#define H3TPD_PROD_IDX_BMSK                                 0xffff0000
+#define H3TPD_PROD_IDX_SHFT                                         16
+
+/* EMAC_AXI_MAST_CTRL */
+#define DATA_BYTE_SWAP                                             0x8
+#define MAX_BOUND                                                  0x2
+#define MAX_BTYPE                                                  0x1
+
+/* EMAC_MAILBOX_12 */
+#define H3TPD_CONS_IDX_BMSK                                 0xffff0000
+#define H3TPD_CONS_IDX_SHFT                                         16
+
+/* EMAC_MAILBOX_9 */
+#define H2TPD_PROD_IDX_BMSK                                     0xffff
+#define H2TPD_PROD_IDX_SHFT                                          0
+
+/* EMAC_MAILBOX_10 */
+#define H1TPD_CONS_IDX_BMSK                                 0xffff0000
+#define H1TPD_CONS_IDX_SHFT                                         16
+#define H2TPD_CONS_IDX_BMSK                                     0xffff
+#define H2TPD_CONS_IDX_SHFT                                          0
+
+/* EMAC_ATHR_HEADER_CTRL */
+#define HEADER_CNT_EN                                              0x2
+#define HEADER_ENABLE                                              0x1
+
+/* EMAC_MAILBOX_0 */
+#define RFD0_PROC_IDX_BMSK                                   0xfff0000
+#define RFD0_PROC_IDX_SHFT                                          16
+#define RFD0_PROD_IDX_BMSK                                       0xfff
+#define RFD0_PROD_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_5 */
+#define RFD1_PROC_IDX_BMSK                                   0xfff0000
+#define RFD1_PROC_IDX_SHFT                                          16
+#define RFD1_PROD_IDX_BMSK                                       0xfff
+#define RFD1_PROD_IDX_SHFT                                           0
+
+/* EMAC_MISC_CTRL */
+#define RX_UNCPL_INT_EN                                            0x1
+
+/* EMAC_MAILBOX_7 */
+#define RFD2_CONS_IDX_BMSK                                   0xfff0000
+#define RFD2_CONS_IDX_SHFT                                          16
+#define RFD1_CONS_IDX_BMSK                                       0xfff
+#define RFD1_CONS_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_8 */
+#define RFD3_CONS_IDX_BMSK                                       0xfff
+#define RFD3_CONS_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_15 */
+#define NTPD_PROD_IDX_BMSK                                      0xffff
+#define NTPD_PROD_IDX_SHFT                                           0
+
+/* EMAC_MAILBOX_16 */
+#define H1TPD_PROD_IDX_BMSK                                     0xffff
+#define H1TPD_PROD_IDX_SHFT                                          0
+
+#define RXQ0_RSS_HSTYP_IPV6_TCP_EN                                0x20
+#define RXQ0_RSS_HSTYP_IPV6_EN                                    0x10
+#define RXQ0_RSS_HSTYP_IPV4_TCP_EN                                 0x8
+#define RXQ0_RSS_HSTYP_IPV4_EN                                     0x4
+
+/* EMAC_EMAC_WRAPPER_TX_TS_INX */
+#define EMAC_WRAPPER_TX_TS_EMPTY                               BIT(31)
+#define EMAC_WRAPPER_TX_TS_INX_BMSK                             0xffff
+
+struct emac_skb_cb {
+	u32           tpd_idx;
+	unsigned long jiffies;
+};
+
+#define EMAC_SKB_CB(skb)	((struct emac_skb_cb *)(skb)->cb)
+#define EMAC_RSS_IDT_SIZE	256
+#define JUMBO_1KAH		0x4
+#define RXD_TH			0x100
+#define EMAC_TPD_LAST_FRAGMENT	0x80000000
+#define EMAC_TPD_TSTAMP_SAVE	0x80000000
+
+/* EMAC Errors in emac_rrd.word[3] */
+#define EMAC_RRD_L4F		BIT(14)
+#define EMAC_RRD_IPF		BIT(15)
+#define EMAC_RRD_CRC		BIT(21)
+#define EMAC_RRD_FAE		BIT(22)
+#define EMAC_RRD_TRN		BIT(23)
+#define EMAC_RRD_RNT		BIT(24)
+#define EMAC_RRD_INC		BIT(25)
+#define EMAC_RRD_FOV		BIT(29)
+#define EMAC_RRD_LEN		BIT(30)
+
+/* Error bits that will result in a received frame being discarded */
+#define EMAC_RRD_ERROR (EMAC_RRD_IPF | EMAC_RRD_CRC | EMAC_RRD_FAE | \
+			EMAC_RRD_TRN | EMAC_RRD_RNT | EMAC_RRD_INC | \
+			EMAC_RRD_FOV | EMAC_RRD_LEN)
+#define EMAC_RRD_STATS_DW_IDX 3
+
+#define EMAC_RRD(RXQ, SIZE, IDX)	((RXQ)->rrd.v_addr + (SIZE * (IDX)))
+#define EMAC_RFD(RXQ, SIZE, IDX)	((RXQ)->rfd.v_addr + (SIZE * (IDX)))
+#define EMAC_TPD(TXQ, SIZE, IDX)	((TXQ)->tpd.v_addr + (SIZE * (IDX)))
+
+#define GET_RFD_BUFFER(RXQ, IDX)	(&((RXQ)->rfd.rfbuff[(IDX)]))
+#define GET_TPD_BUFFER(RTQ, IDX)	(&((RTQ)->tpd.tpbuff[(IDX)]))
+
+#define EMAC_TX_POLL_HWTXTSTAMP_THRESHOLD	8
+
+#define ISR_RX_PKT      (\
+	RX_PKT_INT0     |\
+	RX_PKT_INT1     |\
+	RX_PKT_INT2     |\
+	RX_PKT_INT3)
+
+#define EMAC_MAC_IRQ_RES                                    	"core0"
+
+void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr)
+{
+	u32 crc32, bit, reg, mta;
+
+	/* Calculate the CRC of the MAC address */
+	crc32 = ether_crc(ETH_ALEN, addr);
+
+	/* The HASH Table is an array of 2 32-bit registers. It is
+	 * treated like an array of 64 bits (BitArray[hash_value]).
+	 * Use the upper 6 bits of the above CRC as the hash value.
+	 */
+	reg = (crc32 >> 31) & 0x1;
+	bit = (crc32 >> 26) & 0x1F;
+
+	mta = readl(adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2));
+	mta |= BIT(bit);
+	writel(mta, adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2));
+}
+
+void emac_mac_multicast_addr_clear(struct emac_adapter *adpt)
+{
+	writel(0, adpt->base + EMAC_HASH_TAB_REG0);
+	writel(0, adpt->base + EMAC_HASH_TAB_REG1);
+}
+
+/* definitions for RSS */
+#define EMAC_RSS_KEY(_i, _type) \
+		(EMAC_RSS_KEY0 + ((_i) * sizeof(_type)))
+#define EMAC_RSS_TBL(_i, _type) \
+		(EMAC_IDT_TABLE0 + ((_i) * sizeof(_type)))
+
+/* Config MAC modes */
+void emac_mac_mode_config(struct emac_adapter *adpt)
+{
+	struct net_device *netdev = adpt->netdev;
+	u32 mac;
+
+	mac = readl(adpt->base + EMAC_MAC_CTRL);
+	mac &= ~(VLAN_STRIP | PROM_MODE | MULTI_ALL | MAC_LP_EN);
+
+	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		mac |= VLAN_STRIP;
+
+	if (netdev->flags & IFF_PROMISC)
+		mac |= PROM_MODE;
+
+	if (netdev->flags & IFF_ALLMULTI)
+		mac |= MULTI_ALL;
+
+	writel(mac, adpt->base + EMAC_MAC_CTRL);
+}
+
+/* Config descriptor rings */
+static void emac_mac_dma_rings_config(struct emac_adapter *adpt)
+{
+	static const unsigned short tpd_q_offset[] = {
+		EMAC_DESC_CTRL_8,        EMAC_H1TPD_BASE_ADDR_LO,
+		EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO};
+	static const unsigned short rfd_q_offset[] = {
+		EMAC_DESC_CTRL_2,        EMAC_DESC_CTRL_10,
+		EMAC_DESC_CTRL_12,       EMAC_DESC_CTRL_13};
+	static const unsigned short rrd_q_offset[] = {
+		EMAC_DESC_CTRL_5,        EMAC_DESC_CTRL_14,
+		EMAC_DESC_CTRL_15,       EMAC_DESC_CTRL_16};
+
+	/* TPD (Transmit Packet Descriptor) */
+	writel(upper_32_bits(adpt->tx_q.tpd.dma_addr),
+	       adpt->base + EMAC_DESC_CTRL_1);
+
+	writel(lower_32_bits(adpt->tx_q.tpd.dma_addr),
+	       adpt->base + tpd_q_offset[0]);
+
+	writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK,
+	       adpt->base + EMAC_DESC_CTRL_9);
+
+	/* RFD (Receive Free Descriptor) & RRD (Receive Return Descriptor) */
+	writel(upper_32_bits(adpt->rx_q.rfd.dma_addr),
+	       adpt->base + EMAC_DESC_CTRL_0);
+
+	writel(lower_32_bits(adpt->rx_q.rfd.dma_addr),
+	       adpt->base + rfd_q_offset[0]);
+	writel(lower_32_bits(adpt->rx_q.rrd.dma_addr),
+	       adpt->base + rrd_q_offset[0]);
+
+	writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK,
+	       adpt->base + EMAC_DESC_CTRL_3);
+	writel(adpt->rx_q.rrd.count & RRD_RING_SIZE_BMSK,
+	       adpt->base + EMAC_DESC_CTRL_6);
+
+	writel(adpt->rxbuf_size & RX_BUFFER_SIZE_BMSK,
+	       adpt->base + EMAC_DESC_CTRL_4);
+
+	writel(0, adpt->base + EMAC_DESC_CTRL_11);
+
+	/* Load all of the base addresses above and ensure that triggering HW to
+	 * read ring pointers is flushed
+	 */
+	writel(1, adpt->base + EMAC_INTER_SRAM_PART9);
+}
+
+/* Config transmit parameters */
+static void emac_mac_tx_config(struct emac_adapter *adpt)
+{
+	u32 val;
+
+	writel((EMAC_MAX_TX_OFFLOAD_THRESH >> 3) &
+	       JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK, adpt->base + EMAC_TXQ_CTRL_1);
+
+	val = (adpt->tpd_burst << NUM_TPD_BURST_PREF_SHFT) &
+	       NUM_TPD_BURST_PREF_BMSK;
+
+	val |= TXQ_MODE | LS_8023_SP;
+	val |= (0x0100 << NUM_TXF_BURST_PREF_SHFT) &
+		NUM_TXF_BURST_PREF_BMSK;
+
+	writel(val, adpt->base + EMAC_TXQ_CTRL_0);
+	emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_2,
+			  (TXF_HWM_BMSK | TXF_LWM_BMSK), 0);
+}
+
+/* Config receive parameters */
+static void emac_mac_rx_config(struct emac_adapter *adpt)
+{
+	u32 val;
+
+	val = (adpt->rfd_burst << NUM_RFD_BURST_PREF_SHFT) &
+	       NUM_RFD_BURST_PREF_BMSK;
+	val |= (SP_IPV6 | CUT_THRU_EN);
+
+	writel(val, adpt->base + EMAC_RXQ_CTRL_0);
+
+	val = readl(adpt->base + EMAC_RXQ_CTRL_1);
+	val &= ~(JUMBO_1KAH_BMSK | RFD_PREF_LOW_THRESHOLD_BMSK |
+		 RFD_PREF_UP_THRESHOLD_BMSK);
+	val |= (JUMBO_1KAH << JUMBO_1KAH_SHFT) |
+		(RFD_PREF_LOW_TH << RFD_PREF_LOW_THRESHOLD_SHFT) |
+		(RFD_PREF_UP_TH  << RFD_PREF_UP_THRESHOLD_SHFT);
+	writel(val, adpt->base + EMAC_RXQ_CTRL_1);
+
+	val = readl(adpt->base + EMAC_RXQ_CTRL_2);
+	val &= ~(RXF_DOF_THRESHOLD_BMSK | RXF_UOF_THRESHOLD_BMSK);
+	val |= (RXF_DOF_THRESFHOLD  << RXF_DOF_THRESHOLD_SHFT) |
+		(RXF_UOF_THRESFHOLD << RXF_UOF_THRESHOLD_SHFT);
+	writel(val, adpt->base + EMAC_RXQ_CTRL_2);
+
+	val = readl(adpt->base + EMAC_RXQ_CTRL_3);
+	val &= ~(RXD_TIMER_BMSK | RXD_THRESHOLD_BMSK);
+	val |= RXD_TH << RXD_THRESHOLD_SHFT;
+	writel(val, adpt->base + EMAC_RXQ_CTRL_3);
+}
+
+/* Config dma */
+static void emac_mac_dma_config(struct emac_adapter *adpt)
+{
+	u32 dma_ctrl = DMAR_REQ_PRI;
+
+	switch (adpt->dma_order) {
+	case emac_dma_ord_in:
+		dma_ctrl |= IN_ORDER_MODE;
+		break;
+	case emac_dma_ord_enh:
+		dma_ctrl |= ENH_ORDER_MODE;
+		break;
+	case emac_dma_ord_out:
+		dma_ctrl |= OUT_ORDER_MODE;
+		break;
+	default:
+		break;
+	}
+
+	dma_ctrl |= (((u32)adpt->dmar_block) << REGRDBLEN_SHFT) &
+						REGRDBLEN_BMSK;
+	dma_ctrl |= (((u32)adpt->dmaw_block) << REGWRBLEN_SHFT) &
+						REGWRBLEN_BMSK;
+	dma_ctrl |= (((u32)adpt->dmar_dly_cnt) << DMAR_DLY_CNT_SHFT) &
+						DMAR_DLY_CNT_BMSK;
+	dma_ctrl |= (((u32)adpt->dmaw_dly_cnt) << DMAW_DLY_CNT_SHFT) &
+						DMAW_DLY_CNT_BMSK;
+
+	/* config DMA and ensure that configuration is flushed to HW */
+	writel(dma_ctrl, adpt->base + EMAC_DMA_CTRL);
+}
+
+/* set MAC address */
+static void emac_set_mac_address(struct emac_adapter *adpt, u8 *addr)
+{
+	u32 sta;
+
+	/* for example: 00-A0-C6-11-22-33
+	 * 0<-->C6112233, 1<-->00A0.
+	 */
+
+	/* low 32bit word */
+	sta = (((u32)addr[2]) << 24) | (((u32)addr[3]) << 16) |
+	      (((u32)addr[4]) << 8)  | (((u32)addr[5]));
+	writel(sta, adpt->base + EMAC_MAC_STA_ADDR0);
+
+	/* hight 32bit word */
+	sta = (((u32)addr[0]) << 8) | (u32)addr[1];
+	writel(sta, adpt->base + EMAC_MAC_STA_ADDR1);
+}
+
+static void emac_mac_config(struct emac_adapter *adpt)
+{
+	struct net_device *netdev = adpt->netdev;
+	unsigned int max_frame;
+	u32 val;
+
+	emac_set_mac_address(adpt, netdev->dev_addr);
+
+	max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	adpt->rxbuf_size = netdev->mtu > EMAC_DEF_RX_BUF_SIZE ?
+		ALIGN(max_frame, 8) : EMAC_DEF_RX_BUF_SIZE;
+
+	emac_mac_dma_rings_config(adpt);
+
+	writel(netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN,
+	       adpt->base + EMAC_MAX_FRAM_LEN_CTRL);
+
+	emac_mac_tx_config(adpt);
+	emac_mac_rx_config(adpt);
+	emac_mac_dma_config(adpt);
+
+	val = readl(adpt->base + EMAC_AXI_MAST_CTRL);
+	val &= ~(DATA_BYTE_SWAP | MAX_BOUND);
+	val |= MAX_BTYPE;
+	writel(val, adpt->base + EMAC_AXI_MAST_CTRL);
+	writel(0, adpt->base + EMAC_CLK_GATE_CTRL);
+	writel(RX_UNCPL_INT_EN, adpt->base + EMAC_MISC_CTRL);
+}
+
+void emac_mac_reset(struct emac_adapter *adpt)
+{
+	emac_mac_stop(adpt);
+
+	emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, SOFT_RST);
+	usleep_range(100, 150); /* reset may take up to 100usec */
+
+	/* interrupt clear-on-read */
+	emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, INT_RD_CLR_EN);
+}
+
+void emac_mac_start(struct emac_adapter *adpt)
+{
+	struct phy_device *phydev = adpt->phydev;
+	u32 mac, csr1;
+
+	/* enable tx queue */
+	emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, 0, TXQ_EN);
+
+	/* enable rx queue */
+	emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, 0, RXQ_EN);
+
+	/* enable mac control */
+	mac = readl(adpt->base + EMAC_MAC_CTRL);
+	csr1 = readl(adpt->csr + EMAC_EMAC_WRAPPER_CSR1);
+
+	mac |= TXEN | RXEN;     /* enable RX/TX */
+
+	/* We don't have ethtool support yet, so force flow-control mode
+	 * to 'full' always.
+	 */
+	mac |= TXFC | RXFC;
+
+	/* setup link speed */
+	mac &= ~SPEED_MASK;
+	if (phydev->speed == SPEED_1000) {
+		mac |= SPEED(2);
+		csr1 |= FREQ_MODE;
+	} else {
+		mac |= SPEED(1);
+		csr1 &= ~FREQ_MODE;
+	}
+
+	if (phydev->duplex == DUPLEX_FULL)
+		mac |= FULLD;
+	else
+		mac &= ~FULLD;
+
+	/* other parameters */
+	mac |= (CRCE | PCRCE);
+	mac |= ((adpt->preamble << PRLEN_SHFT) & PRLEN_BMSK);
+	mac |= BROAD_EN;
+	mac |= FLCHK;
+	mac &= ~RX_CHKSUM_EN;
+	mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL |
+		 DEBUG_MODE | SINGLE_PAUSE_MODE);
+
+	writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1);
+
+	writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL);
+
+	/* enable interrupt read clear, low power sleep mode and
+	 * the irq moderators
+	 */
+
+	writel_relaxed(adpt->irq_mod, adpt->base + EMAC_IRQ_MOD_TIM_INIT);
+	writel_relaxed(INT_RD_CLR_EN | LPW_MODE | IRQ_MODERATOR_EN |
+			IRQ_MODERATOR2_EN, adpt->base + EMAC_DMA_MAS_CTRL);
+
+	emac_mac_mode_config(adpt);
+
+	emac_reg_update32(adpt->base + EMAC_ATHR_HEADER_CTRL,
+			  (HEADER_ENABLE | HEADER_CNT_EN), 0);
+
+	emac_reg_update32(adpt->csr + EMAC_EMAC_WRAPPER_CSR2, 0, WOL_EN);
+}
+
+void emac_mac_stop(struct emac_adapter *adpt)
+{
+	emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, RXQ_EN, 0);
+	emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, TXQ_EN, 0);
+	emac_reg_update32(adpt->base + EMAC_MAC_CTRL, TXEN | RXEN, 0);
+	usleep_range(1000, 1050); /* stopping mac may take upto 1msec */
+}
+
+/* Free all descriptors of given transmit queue */
+static void emac_tx_q_descs_free(struct emac_adapter *adpt)
+{
+	struct emac_tx_queue *tx_q = &adpt->tx_q;
+	unsigned int i;
+	size_t size;
+
+	/* ring already cleared, nothing to do */
+	if (!tx_q->tpd.tpbuff)
+		return;
+
+	for (i = 0; i < tx_q->tpd.count; i++) {
+		struct emac_buffer *tpbuf = GET_TPD_BUFFER(tx_q, i);
+
+		if (tpbuf->dma_addr) {
+			dma_unmap_single(adpt->netdev->dev.parent,
+					 tpbuf->dma_addr, tpbuf->length,
+					 DMA_TO_DEVICE);
+			tpbuf->dma_addr = 0;
+		}
+		if (tpbuf->skb) {
+			dev_kfree_skb_any(tpbuf->skb);
+			tpbuf->skb = NULL;
+		}
+	}
+
+	size = sizeof(struct emac_buffer) * tx_q->tpd.count;
+	memset(tx_q->tpd.tpbuff, 0, size);
+
+	/* clear the descriptor ring */
+	memset(tx_q->tpd.v_addr, 0, tx_q->tpd.size);
+
+	tx_q->tpd.consume_idx = 0;
+	tx_q->tpd.produce_idx = 0;
+}
+
+/* Free all descriptors of given receive queue */
+static void emac_rx_q_free_descs(struct emac_adapter *adpt)
+{
+	struct device *dev = adpt->netdev->dev.parent;
+	struct emac_rx_queue *rx_q = &adpt->rx_q;
+	unsigned int i;
+	size_t size;
+
+	/* ring already cleared, nothing to do */
+	if (!rx_q->rfd.rfbuff)
+		return;
+
+	for (i = 0; i < rx_q->rfd.count; i++) {
+		struct emac_buffer *rfbuf = GET_RFD_BUFFER(rx_q, i);
+
+		if (rfbuf->dma_addr) {
+			dma_unmap_single(dev, rfbuf->dma_addr, rfbuf->length,
+					 DMA_FROM_DEVICE);
+			rfbuf->dma_addr = 0;
+		}
+		if (rfbuf->skb) {
+			dev_kfree_skb(rfbuf->skb);
+			rfbuf->skb = NULL;
+		}
+	}
+
+	size =  sizeof(struct emac_buffer) * rx_q->rfd.count;
+	memset(rx_q->rfd.rfbuff, 0, size);
+
+	/* clear the descriptor rings */
+	memset(rx_q->rrd.v_addr, 0, rx_q->rrd.size);
+	rx_q->rrd.produce_idx = 0;
+	rx_q->rrd.consume_idx = 0;
+
+	memset(rx_q->rfd.v_addr, 0, rx_q->rfd.size);
+	rx_q->rfd.produce_idx = 0;
+	rx_q->rfd.consume_idx = 0;
+}
+
+/* Free all buffers associated with given transmit queue */
+static void emac_tx_q_bufs_free(struct emac_adapter *adpt)
+{
+	struct emac_tx_queue *tx_q = &adpt->tx_q;
+
+	emac_tx_q_descs_free(adpt);
+
+	kfree(tx_q->tpd.tpbuff);
+	tx_q->tpd.tpbuff = NULL;
+	tx_q->tpd.v_addr = NULL;
+	tx_q->tpd.dma_addr = 0;
+	tx_q->tpd.size = 0;
+}
+
+/* Allocate TX descriptor ring for the given transmit queue */
+static int emac_tx_q_desc_alloc(struct emac_adapter *adpt,
+				struct emac_tx_queue *tx_q)
+{
+	struct emac_ring_header *ring_header = &adpt->ring_header;
+	size_t size;
+
+	size = sizeof(struct emac_buffer) * tx_q->tpd.count;
+	tx_q->tpd.tpbuff = kzalloc(size, GFP_KERNEL);
+	if (!tx_q->tpd.tpbuff)
+		return -ENOMEM;
+
+	tx_q->tpd.size = tx_q->tpd.count * (adpt->tpd_size * 4);
+	tx_q->tpd.dma_addr = ring_header->dma_addr + ring_header->used;
+	tx_q->tpd.v_addr = ring_header->v_addr + ring_header->used;
+	ring_header->used += ALIGN(tx_q->tpd.size, 8);
+	tx_q->tpd.produce_idx = 0;
+	tx_q->tpd.consume_idx = 0;
+
+	return 0;
+}
+
+/* Free all buffers associated with given transmit queue */
+static void emac_rx_q_bufs_free(struct emac_adapter *adpt)
+{
+	struct emac_rx_queue *rx_q = &adpt->rx_q;
+
+	emac_rx_q_free_descs(adpt);
+
+	kfree(rx_q->rfd.rfbuff);
+	rx_q->rfd.rfbuff   = NULL;
+
+	rx_q->rfd.v_addr   = NULL;
+	rx_q->rfd.dma_addr = 0;
+	rx_q->rfd.size     = 0;
+
+	rx_q->rrd.v_addr   = NULL;
+	rx_q->rrd.dma_addr = 0;
+	rx_q->rrd.size     = 0;
+}
+
+/* Allocate RX descriptor rings for the given receive queue */
+static int emac_rx_descs_alloc(struct emac_adapter *adpt)
+{
+	struct emac_ring_header *ring_header = &adpt->ring_header;
+	struct emac_rx_queue *rx_q = &adpt->rx_q;
+	size_t size;
+
+	size = sizeof(struct emac_buffer) * rx_q->rfd.count;
+	rx_q->rfd.rfbuff = kzalloc(size, GFP_KERNEL);
+	if (!rx_q->rfd.rfbuff)
+		return -ENOMEM;
+
+	rx_q->rrd.size = rx_q->rrd.count * (adpt->rrd_size * 4);
+	rx_q->rfd.size = rx_q->rfd.count * (adpt->rfd_size * 4);
+
+	rx_q->rrd.dma_addr = ring_header->dma_addr + ring_header->used;
+	rx_q->rrd.v_addr   = ring_header->v_addr + ring_header->used;
+	ring_header->used += ALIGN(rx_q->rrd.size, 8);
+
+	rx_q->rfd.dma_addr = ring_header->dma_addr + ring_header->used;
+	rx_q->rfd.v_addr   = ring_header->v_addr + ring_header->used;
+	ring_header->used += ALIGN(rx_q->rfd.size, 8);
+
+	rx_q->rrd.produce_idx = 0;
+	rx_q->rrd.consume_idx = 0;
+
+	rx_q->rfd.produce_idx = 0;
+	rx_q->rfd.consume_idx = 0;
+
+	return 0;
+}
+
+/* Allocate all TX and RX descriptor rings */
+int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt)
+{
+	struct emac_ring_header *ring_header = &adpt->ring_header;
+	struct device *dev = adpt->netdev->dev.parent;
+	unsigned int num_tx_descs = adpt->tx_desc_cnt;
+	unsigned int num_rx_descs = adpt->rx_desc_cnt;
+	int ret;
+
+	adpt->tx_q.tpd.count = adpt->tx_desc_cnt;
+
+	adpt->rx_q.rrd.count = adpt->rx_desc_cnt;
+	adpt->rx_q.rfd.count = adpt->rx_desc_cnt;
+
+	/* Ring DMA buffer. Each ring may need up to 8 bytes for alignment,
+	 * hence the additional padding bytes are allocated.
+	 */
+	ring_header->size = num_tx_descs * (adpt->tpd_size * 4) +
+			    num_rx_descs * (adpt->rfd_size * 4) +
+			    num_rx_descs * (adpt->rrd_size * 4) +
+			    8 + 2 * 8; /* 8 byte per one Tx and two Rx rings */
+
+	ring_header->used = 0;
+	ring_header->v_addr = dma_zalloc_coherent(dev, ring_header->size,
+						 &ring_header->dma_addr,
+						 GFP_KERNEL);
+	if (!ring_header->v_addr)
+		return -ENOMEM;
+
+	ring_header->used = ALIGN(ring_header->dma_addr, 8) -
+							ring_header->dma_addr;
+
+	ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q);
+	if (ret) {
+		netdev_err(adpt->netdev, "error: Tx Queue alloc failed\n");
+		goto err_alloc_tx;
+	}
+
+	ret = emac_rx_descs_alloc(adpt);
+	if (ret) {
+		netdev_err(adpt->netdev, "error: Rx Queue alloc failed\n");
+		goto err_alloc_rx;
+	}
+
+	return 0;
+
+err_alloc_rx:
+	emac_tx_q_bufs_free(adpt);
+err_alloc_tx:
+	dma_free_coherent(dev, ring_header->size,
+			  ring_header->v_addr, ring_header->dma_addr);
+
+	ring_header->v_addr   = NULL;
+	ring_header->dma_addr = 0;
+	ring_header->size     = 0;
+	ring_header->used     = 0;
+
+	return ret;
+}
+
+/* Free all TX and RX descriptor rings */
+void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt)
+{
+	struct emac_ring_header *ring_header = &adpt->ring_header;
+	struct device *dev = adpt->netdev->dev.parent;
+
+	emac_tx_q_bufs_free(adpt);
+	emac_rx_q_bufs_free(adpt);
+
+	dma_free_coherent(dev, ring_header->size,
+			  ring_header->v_addr, ring_header->dma_addr);
+
+	ring_header->v_addr   = NULL;
+	ring_header->dma_addr = 0;
+	ring_header->size     = 0;
+	ring_header->used     = 0;
+}
+
+/* Initialize descriptor rings */
+static void emac_mac_rx_tx_ring_reset_all(struct emac_adapter *adpt)
+{
+	unsigned int i;
+
+	adpt->tx_q.tpd.produce_idx = 0;
+	adpt->tx_q.tpd.consume_idx = 0;
+	for (i = 0; i < adpt->tx_q.tpd.count; i++)
+		adpt->tx_q.tpd.tpbuff[i].dma_addr = 0;
+
+	adpt->rx_q.rrd.produce_idx = 0;
+	adpt->rx_q.rrd.consume_idx = 0;
+	adpt->rx_q.rfd.produce_idx = 0;
+	adpt->rx_q.rfd.consume_idx = 0;
+	for (i = 0; i < adpt->rx_q.rfd.count; i++)
+		adpt->rx_q.rfd.rfbuff[i].dma_addr = 0;
+}
+
+/* Produce new receive free descriptor */
+static void emac_mac_rx_rfd_create(struct emac_adapter *adpt,
+				   struct emac_rx_queue *rx_q,
+				   dma_addr_t addr)
+{
+	u32 *hw_rfd = EMAC_RFD(rx_q, adpt->rfd_size, rx_q->rfd.produce_idx);
+
+	*(hw_rfd++) = lower_32_bits(addr);
+	*hw_rfd = upper_32_bits(addr);
+
+	if (++rx_q->rfd.produce_idx == rx_q->rfd.count)
+		rx_q->rfd.produce_idx = 0;
+}
+
+/* Fill up receive queue's RFD with preallocated receive buffers */
+static void emac_mac_rx_descs_refill(struct emac_adapter *adpt,
+				    struct emac_rx_queue *rx_q)
+{
+	struct emac_buffer *curr_rxbuf;
+	struct emac_buffer *next_rxbuf;
+	unsigned int count = 0;
+	u32 next_produce_idx;
+
+	next_produce_idx = rx_q->rfd.produce_idx + 1;
+	if (next_produce_idx == rx_q->rfd.count)
+		next_produce_idx = 0;
+
+	curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx);
+	next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx);
+
+	/* this always has a blank rx_buffer*/
+	while (!next_rxbuf->dma_addr) {
+		struct sk_buff *skb;
+		int ret;
+
+		skb = netdev_alloc_skb_ip_align(adpt->netdev, adpt->rxbuf_size);
+		if (!skb)
+			break;
+
+		curr_rxbuf->dma_addr =
+			dma_map_single(adpt->netdev->dev.parent, skb->data,
+				       curr_rxbuf->length, DMA_FROM_DEVICE);
+		ret = dma_mapping_error(adpt->netdev->dev.parent,
+					curr_rxbuf->dma_addr);
+		if (ret) {
+			dev_kfree_skb(skb);
+			break;
+		}
+		curr_rxbuf->skb = skb;
+		curr_rxbuf->length = adpt->rxbuf_size;
+
+		emac_mac_rx_rfd_create(adpt, rx_q, curr_rxbuf->dma_addr);
+		next_produce_idx = rx_q->rfd.produce_idx + 1;
+		if (next_produce_idx == rx_q->rfd.count)
+			next_produce_idx = 0;
+
+		curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx);
+		next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx);
+		count++;
+	}
+
+	if (count) {
+		u32 prod_idx = (rx_q->rfd.produce_idx << rx_q->produce_shift) &
+				rx_q->produce_mask;
+		emac_reg_update32(adpt->base + rx_q->produce_reg,
+				  rx_q->produce_mask, prod_idx);
+	}
+}
+
+static void emac_adjust_link(struct net_device *netdev)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+	struct phy_device *phydev = netdev->phydev;
+
+	if (phydev->link)
+		emac_mac_start(adpt);
+	else
+		emac_mac_stop(adpt);
+
+	phy_print_status(phydev);
+}
+
+/* Bringup the interface/HW */
+int emac_mac_up(struct emac_adapter *adpt)
+{
+	struct net_device *netdev = adpt->netdev;
+	struct emac_irq	*irq = &adpt->irq;
+	int ret;
+
+	emac_mac_rx_tx_ring_reset_all(adpt);
+	emac_mac_config(adpt);
+
+	ret = request_irq(irq->irq, emac_isr, 0, EMAC_MAC_IRQ_RES, irq);
+	if (ret) {
+		netdev_err(adpt->netdev, "could not request %s irq\n",
+			   EMAC_MAC_IRQ_RES);
+		return ret;
+	}
+
+	emac_mac_rx_descs_refill(adpt, &adpt->rx_q);
+
+	ret = phy_connect_direct(netdev, adpt->phydev, emac_adjust_link,
+				 PHY_INTERFACE_MODE_SGMII);
+	if (ret) {
+		netdev_err(adpt->netdev, "could not connect phy\n");
+		free_irq(irq->irq, irq);
+		return ret;
+	}
+
+	/* enable mac irq */
+	writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS);
+	writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK);
+
+	adpt->phydev->irq = PHY_IGNORE_INTERRUPT;
+	phy_start(adpt->phydev);
+
+	napi_enable(&adpt->rx_q.napi);
+	netif_start_queue(netdev);
+
+	return 0;
+}
+
+/* Bring down the interface/HW */
+void emac_mac_down(struct emac_adapter *adpt)
+{
+	struct net_device *netdev = adpt->netdev;
+
+	netif_stop_queue(netdev);
+	napi_disable(&adpt->rx_q.napi);
+
+	phy_stop(adpt->phydev);
+	phy_disconnect(adpt->phydev);
+
+	/* disable mac irq */
+	writel(DIS_INT, adpt->base + EMAC_INT_STATUS);
+	writel(0, adpt->base + EMAC_INT_MASK);
+	synchronize_irq(adpt->irq.irq);
+	free_irq(adpt->irq.irq, &adpt->irq);
+
+	emac_mac_reset(adpt);
+
+	emac_tx_q_descs_free(adpt);
+	netdev_reset_queue(adpt->netdev);
+	emac_rx_q_free_descs(adpt);
+}
+
+/* Consume next received packet descriptor */
+static bool emac_rx_process_rrd(struct emac_adapter *adpt,
+				struct emac_rx_queue *rx_q,
+				struct emac_rrd *rrd)
+{
+	u32 *hw_rrd = EMAC_RRD(rx_q, adpt->rrd_size, rx_q->rrd.consume_idx);
+
+	rrd->word[3] = *(hw_rrd + 3);
+
+	if (!RRD_UPDT(rrd))
+		return false;
+
+	rrd->word[4] = 0;
+	rrd->word[5] = 0;
+
+	rrd->word[0] = *(hw_rrd++);
+	rrd->word[1] = *(hw_rrd++);
+	rrd->word[2] = *(hw_rrd++);
+
+	if (unlikely(RRD_NOR(rrd) != 1)) {
+		netdev_err(adpt->netdev,
+			   "error: multi-RFD not support yet! nor:%lu\n",
+			   RRD_NOR(rrd));
+	}
+
+	/* mark rrd as processed */
+	RRD_UPDT_SET(rrd, 0);
+	*hw_rrd = rrd->word[3];
+
+	if (++rx_q->rrd.consume_idx == rx_q->rrd.count)
+		rx_q->rrd.consume_idx = 0;
+
+	return true;
+}
+
+/* Produce new transmit descriptor */
+static void emac_tx_tpd_create(struct emac_adapter *adpt,
+			       struct emac_tx_queue *tx_q, struct emac_tpd *tpd)
+{
+	u32 *hw_tpd;
+
+	tx_q->tpd.last_produce_idx = tx_q->tpd.produce_idx;
+	hw_tpd = EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.produce_idx);
+
+	if (++tx_q->tpd.produce_idx == tx_q->tpd.count)
+		tx_q->tpd.produce_idx = 0;
+
+	*(hw_tpd++) = tpd->word[0];
+	*(hw_tpd++) = tpd->word[1];
+	*(hw_tpd++) = tpd->word[2];
+	*hw_tpd = tpd->word[3];
+}
+
+/* Mark the last transmit descriptor as such (for the transmit packet) */
+static void emac_tx_tpd_mark_last(struct emac_adapter *adpt,
+				  struct emac_tx_queue *tx_q)
+{
+	u32 *hw_tpd =
+		EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.last_produce_idx);
+	u32 tmp_tpd;
+
+	tmp_tpd = *(hw_tpd + 1);
+	tmp_tpd |= EMAC_TPD_LAST_FRAGMENT;
+	*(hw_tpd + 1) = tmp_tpd;
+}
+
+static void emac_rx_rfd_clean(struct emac_rx_queue *rx_q, struct emac_rrd *rrd)
+{
+	struct emac_buffer *rfbuf = rx_q->rfd.rfbuff;
+	u32 consume_idx = RRD_SI(rrd);
+	unsigned int i;
+
+	for (i = 0; i < RRD_NOR(rrd); i++) {
+		rfbuf[consume_idx].skb = NULL;
+		if (++consume_idx == rx_q->rfd.count)
+			consume_idx = 0;
+	}
+
+	rx_q->rfd.consume_idx = consume_idx;
+	rx_q->rfd.process_idx = consume_idx;
+}
+
+/* Push the received skb to upper layers */
+static void emac_receive_skb(struct emac_rx_queue *rx_q,
+			     struct sk_buff *skb,
+			     u16 vlan_tag, bool vlan_flag)
+{
+	if (vlan_flag) {
+		u16 vlan;
+
+		EMAC_TAG_TO_VLAN(vlan_tag, vlan);
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan);
+	}
+
+	napi_gro_receive(&rx_q->napi, skb);
+}
+
+/* Process receive event */
+void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q,
+			 int *num_pkts, int max_pkts)
+{
+	u32 proc_idx, hw_consume_idx, num_consume_pkts;
+	struct net_device *netdev  = adpt->netdev;
+	struct emac_buffer *rfbuf;
+	unsigned int count = 0;
+	struct emac_rrd rrd;
+	struct sk_buff *skb;
+	u32 reg;
+
+	reg = readl_relaxed(adpt->base + rx_q->consume_reg);
+
+	hw_consume_idx = (reg & rx_q->consume_mask) >> rx_q->consume_shift;
+	num_consume_pkts = (hw_consume_idx >= rx_q->rrd.consume_idx) ?
+		(hw_consume_idx -  rx_q->rrd.consume_idx) :
+		(hw_consume_idx + rx_q->rrd.count - rx_q->rrd.consume_idx);
+
+	do {
+		if (!num_consume_pkts)
+			break;
+
+		if (!emac_rx_process_rrd(adpt, rx_q, &rrd))
+			break;
+
+		if (likely(RRD_NOR(&rrd) == 1)) {
+			/* good receive */
+			rfbuf = GET_RFD_BUFFER(rx_q, RRD_SI(&rrd));
+			dma_unmap_single(adpt->netdev->dev.parent,
+					 rfbuf->dma_addr, rfbuf->length,
+					 DMA_FROM_DEVICE);
+			rfbuf->dma_addr = 0;
+			skb = rfbuf->skb;
+		} else {
+			netdev_err(adpt->netdev,
+				   "error: multi-RFD not support yet!\n");
+			break;
+		}
+		emac_rx_rfd_clean(rx_q, &rrd);
+		num_consume_pkts--;
+		count++;
+
+		/* Due to a HW issue in L4 check sum detection (UDP/TCP frags
+		 * with DF set are marked as error), drop packets based on the
+		 * error mask rather than the summary bit (ignoring L4F errors)
+		 */
+		if (rrd.word[EMAC_RRD_STATS_DW_IDX] & EMAC_RRD_ERROR) {
+			netif_dbg(adpt, rx_status, adpt->netdev,
+				  "Drop error packet[RRD: 0x%x:0x%x:0x%x:0x%x]\n",
+				  rrd.word[0], rrd.word[1],
+				  rrd.word[2], rrd.word[3]);
+
+			dev_kfree_skb(skb);
+			continue;
+		}
+
+		skb_put(skb, RRD_PKT_SIZE(&rrd) - ETH_FCS_LEN);
+		skb->dev = netdev;
+		skb->protocol = eth_type_trans(skb, skb->dev);
+		if (netdev->features & NETIF_F_RXCSUM)
+			skb->ip_summed = RRD_L4F(&rrd) ?
+					  CHECKSUM_NONE : CHECKSUM_UNNECESSARY;
+		else
+			skb_checksum_none_assert(skb);
+
+		emac_receive_skb(rx_q, skb, (u16)RRD_CVALN_TAG(&rrd),
+				 (bool)RRD_CVTAG(&rrd));
+
+		netdev->last_rx = jiffies;
+		(*num_pkts)++;
+	} while (*num_pkts < max_pkts);
+
+	if (count) {
+		proc_idx = (rx_q->rfd.process_idx << rx_q->process_shft) &
+				rx_q->process_mask;
+		emac_reg_update32(adpt->base + rx_q->process_reg,
+				  rx_q->process_mask, proc_idx);
+		emac_mac_rx_descs_refill(adpt, rx_q);
+	}
+}
+
+/* get the number of free transmit descriptors */
+static unsigned int emac_tpd_num_free_descs(struct emac_tx_queue *tx_q)
+{
+	u32 produce_idx = tx_q->tpd.produce_idx;
+	u32 consume_idx = tx_q->tpd.consume_idx;
+
+	return (consume_idx > produce_idx) ?
+		(consume_idx - produce_idx - 1) :
+		(tx_q->tpd.count + consume_idx - produce_idx - 1);
+}
+
+/* Process transmit event */
+void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q)
+{
+	u32 reg = readl_relaxed(adpt->base + tx_q->consume_reg);
+	u32 hw_consume_idx, pkts_compl = 0, bytes_compl = 0;
+	struct emac_buffer *tpbuf;
+
+	hw_consume_idx = (reg & tx_q->consume_mask) >> tx_q->consume_shift;
+
+	while (tx_q->tpd.consume_idx != hw_consume_idx) {
+		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx);
+		if (tpbuf->dma_addr) {
+			dma_unmap_single(adpt->netdev->dev.parent,
+					 tpbuf->dma_addr, tpbuf->length,
+					 DMA_TO_DEVICE);
+			tpbuf->dma_addr = 0;
+		}
+
+		if (tpbuf->skb) {
+			pkts_compl++;
+			bytes_compl += tpbuf->skb->len;
+			dev_kfree_skb_irq(tpbuf->skb);
+			tpbuf->skb = NULL;
+		}
+
+		if (++tx_q->tpd.consume_idx == tx_q->tpd.count)
+			tx_q->tpd.consume_idx = 0;
+	}
+
+	netdev_completed_queue(adpt->netdev, pkts_compl, bytes_compl);
+
+	if (netif_queue_stopped(adpt->netdev))
+		if (emac_tpd_num_free_descs(tx_q) > (MAX_SKB_FRAGS + 1))
+			netif_wake_queue(adpt->netdev);
+}
+
+/* Initialize all queue data structures */
+void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev,
+				  struct emac_adapter *adpt)
+{
+	adpt->rx_q.netdev = adpt->netdev;
+
+	adpt->rx_q.produce_reg  = EMAC_MAILBOX_0;
+	adpt->rx_q.produce_mask = RFD0_PROD_IDX_BMSK;
+	adpt->rx_q.produce_shift = RFD0_PROD_IDX_SHFT;
+
+	adpt->rx_q.process_reg  = EMAC_MAILBOX_0;
+	adpt->rx_q.process_mask = RFD0_PROC_IDX_BMSK;
+	adpt->rx_q.process_shft = RFD0_PROC_IDX_SHFT;
+
+	adpt->rx_q.consume_reg  = EMAC_MAILBOX_3;
+	adpt->rx_q.consume_mask = RFD0_CONS_IDX_BMSK;
+	adpt->rx_q.consume_shift = RFD0_CONS_IDX_SHFT;
+
+	adpt->rx_q.irq          = &adpt->irq;
+	adpt->rx_q.intr         = adpt->irq.mask & ISR_RX_PKT;
+
+	adpt->tx_q.produce_reg  = EMAC_MAILBOX_15;
+	adpt->tx_q.produce_mask = NTPD_PROD_IDX_BMSK;
+	adpt->tx_q.produce_shift = NTPD_PROD_IDX_SHFT;
+
+	adpt->tx_q.consume_reg  = EMAC_MAILBOX_2;
+	adpt->tx_q.consume_mask = NTPD_CONS_IDX_BMSK;
+	adpt->tx_q.consume_shift = NTPD_CONS_IDX_SHFT;
+}
+
+/* Fill up transmit descriptors with TSO and Checksum offload information */
+static int emac_tso_csum(struct emac_adapter *adpt,
+			 struct emac_tx_queue *tx_q,
+			 struct sk_buff *skb,
+			 struct emac_tpd *tpd)
+{
+	unsigned int hdr_len;
+	int ret;
+
+	if (skb_is_gso(skb)) {
+		if (skb_header_cloned(skb)) {
+			ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+			if (unlikely(ret))
+				return ret;
+		}
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			u32 pkt_len = ((unsigned char *)ip_hdr(skb) - skb->data)
+				       + ntohs(ip_hdr(skb)->tot_len);
+			if (skb->len > pkt_len)
+				pskb_trim(skb, pkt_len);
+		}
+
+		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+		if (unlikely(skb->len == hdr_len)) {
+			/* we only need to do csum */
+			netif_warn(adpt, tx_err, adpt->netdev,
+				   "tso not needed for packet with 0 data\n");
+			goto do_csum;
+		}
+
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
+			ip_hdr(skb)->check = 0;
+			tcp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr,
+						   0, IPPROTO_TCP, 0);
+			TPD_IPV4_SET(tpd, 1);
+		}
+
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
+			/* ipv6 tso need an extra tpd */
+			struct emac_tpd extra_tpd;
+
+			memset(tpd, 0, sizeof(*tpd));
+			memset(&extra_tpd, 0, sizeof(extra_tpd));
+
+			ipv6_hdr(skb)->payload_len = 0;
+			tcp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr,
+						 0, IPPROTO_TCP, 0);
+			TPD_PKT_LEN_SET(&extra_tpd, skb->len);
+			TPD_LSO_SET(&extra_tpd, 1);
+			TPD_LSOV_SET(&extra_tpd, 1);
+			emac_tx_tpd_create(adpt, tx_q, &extra_tpd);
+			TPD_LSOV_SET(tpd, 1);
+		}
+
+		TPD_LSO_SET(tpd, 1);
+		TPD_TCPHDR_OFFSET_SET(tpd, skb_transport_offset(skb));
+		TPD_MSS_SET(tpd, skb_shinfo(skb)->gso_size);
+		return 0;
+	}
+
+do_csum:
+	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+		unsigned int css, cso;
+
+		cso = skb_transport_offset(skb);
+		if (unlikely(cso & 0x1)) {
+			netdev_err(adpt->netdev,
+				   "error: payload offset should be even\n");
+			return -EINVAL;
+		}
+		css = cso + skb->csum_offset;
+
+		TPD_PAYLOAD_OFFSET_SET(tpd, cso >> 1);
+		TPD_CXSUM_OFFSET_SET(tpd, css >> 1);
+		TPD_CSX_SET(tpd, 1);
+	}
+
+	return 0;
+}
+
+/* Fill up transmit descriptors */
+static void emac_tx_fill_tpd(struct emac_adapter *adpt,
+			     struct emac_tx_queue *tx_q, struct sk_buff *skb,
+			     struct emac_tpd *tpd)
+{
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	unsigned int first = tx_q->tpd.produce_idx;
+	unsigned int len = skb_headlen(skb);
+	struct emac_buffer *tpbuf = NULL;
+	unsigned int mapped_len = 0;
+	unsigned int i;
+	int count = 0;
+	int ret;
+
+	/* if Large Segment Offload is (in TCP Segmentation Offload struct) */
+	if (TPD_LSO(tpd)) {
+		mapped_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
+
+		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
+		tpbuf->length = mapped_len;
+		tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
+						 skb->data, tpbuf->length,
+						 DMA_TO_DEVICE);
+		ret = dma_mapping_error(adpt->netdev->dev.parent,
+					tpbuf->dma_addr);
+		if (ret)
+			goto error;
+
+		TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr));
+		TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr));
+		TPD_BUF_LEN_SET(tpd, tpbuf->length);
+		emac_tx_tpd_create(adpt, tx_q, tpd);
+		count++;
+	}
+
+	if (mapped_len < len) {
+		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
+		tpbuf->length = len - mapped_len;
+		tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent,
+						 skb->data + mapped_len,
+						 tpbuf->length, DMA_TO_DEVICE);
+		ret = dma_mapping_error(adpt->netdev->dev.parent,
+					tpbuf->dma_addr);
+		if (ret)
+			goto error;
+
+		TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr));
+		TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr));
+		TPD_BUF_LEN_SET(tpd, tpbuf->length);
+		emac_tx_tpd_create(adpt, tx_q, tpd);
+		count++;
+	}
+
+	for (i = 0; i < nr_frags; i++) {
+		struct skb_frag_struct *frag;
+
+		frag = &skb_shinfo(skb)->frags[i];
+
+		tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx);
+		tpbuf->length = frag->size;
+		tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent,
+					       frag->page.p, frag->page_offset,
+					       tpbuf->length, DMA_TO_DEVICE);
+		ret = dma_mapping_error(adpt->netdev->dev.parent,
+					tpbuf->dma_addr);
+		if (ret)
+			goto error;
+
+		TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr));
+		TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr));
+		TPD_BUF_LEN_SET(tpd, tpbuf->length);
+		emac_tx_tpd_create(adpt, tx_q, tpd);
+		count++;
+	}
+
+	/* The last tpd */
+	wmb();
+	emac_tx_tpd_mark_last(adpt, tx_q);
+
+	/* The last buffer info contain the skb address,
+	 * so it will be freed after unmap
+	 */
+	tpbuf->skb = skb;
+
+	return;
+
+error:
+	/* One of the memory mappings failed, so undo everything */
+	tx_q->tpd.produce_idx = first;
+
+	while (count--) {
+		tpbuf = GET_TPD_BUFFER(tx_q, first);
+		dma_unmap_page(adpt->netdev->dev.parent, tpbuf->dma_addr,
+			       tpbuf->length, DMA_TO_DEVICE);
+		tpbuf->dma_addr = 0;
+		tpbuf->length = 0;
+
+		if (++first == tx_q->tpd.count)
+			first = 0;
+	}
+
+	dev_kfree_skb(skb);
+}
+
+/* Transmit the packet using specified transmit queue */
+int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q,
+			 struct sk_buff *skb)
+{
+	struct emac_tpd tpd;
+	u32 prod_idx;
+
+	memset(&tpd, 0, sizeof(tpd));
+
+	if (emac_tso_csum(adpt, tx_q, skb, &tpd) != 0) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	if (skb_vlan_tag_present(skb)) {
+		u16 tag;
+
+		EMAC_VLAN_TO_TAG(skb_vlan_tag_get(skb), tag);
+		TPD_CVLAN_TAG_SET(&tpd, tag);
+		TPD_INSTC_SET(&tpd, 1);
+	}
+
+	if (skb_network_offset(skb) != ETH_HLEN)
+		TPD_TYP_SET(&tpd, 1);
+
+	emac_tx_fill_tpd(adpt, tx_q, skb, &tpd);
+
+	netdev_sent_queue(adpt->netdev, skb->len);
+
+	/* Make sure the are enough free descriptors to hold one
+	 * maximum-sized SKB.  We need one desc for each fragment,
+	 * one for the checksum (emac_tso_csum), one for TSO, and
+	 * and one for the SKB header.
+	 */
+	if (emac_tpd_num_free_descs(tx_q) < (MAX_SKB_FRAGS + 3))
+		netif_stop_queue(adpt->netdev);
+
+	/* update produce idx */
+	prod_idx = (tx_q->tpd.produce_idx << tx_q->produce_shift) &
+		    tx_q->produce_mask;
+	emac_reg_update32(adpt->base + tx_q->produce_reg,
+			  tx_q->produce_mask, prod_idx);
+
+	return NETDEV_TX_OK;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
new file mode 100644
index 0000000..f3aa24d
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
@@ -0,0 +1,248 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* EMAC DMA HW engine uses three rings:
+ * Tx:
+ *   TPD: Transmit Packet Descriptor ring.
+ * Rx:
+ *   RFD: Receive Free Descriptor ring.
+ *     Ring of descriptors with empty buffers to be filled by Rx HW.
+ *   RRD: Receive Return Descriptor ring.
+ *     Ring of descriptors with buffers filled with received data.
+ */
+
+#ifndef _EMAC_HW_H_
+#define _EMAC_HW_H_
+
+/* EMAC_CSR register offsets */
+#define EMAC_EMAC_WRAPPER_CSR1                                0x000000
+#define EMAC_EMAC_WRAPPER_CSR2                                0x000004
+#define EMAC_EMAC_WRAPPER_TX_TS_LO                            0x000104
+#define EMAC_EMAC_WRAPPER_TX_TS_HI                            0x000108
+#define EMAC_EMAC_WRAPPER_TX_TS_INX                           0x00010c
+
+/* DMA Order Settings */
+enum emac_dma_order {
+	emac_dma_ord_in = 1,
+	emac_dma_ord_enh = 2,
+	emac_dma_ord_out = 4
+};
+
+enum emac_dma_req_block {
+	emac_dma_req_128 = 0,
+	emac_dma_req_256 = 1,
+	emac_dma_req_512 = 2,
+	emac_dma_req_1024 = 3,
+	emac_dma_req_2048 = 4,
+	emac_dma_req_4096 = 5
+};
+
+/* Returns the value of bits idx...idx+n_bits */
+#define BITS_GET(val, lo, hi) ((le32_to_cpu(val) & GENMASK((hi), (lo))) >> lo)
+#define BITS_SET(val, lo, hi, new_val) \
+	val = cpu_to_le32((le32_to_cpu(val) & (~GENMASK((hi), (lo)))) |	\
+		(((new_val) << (lo)) & GENMASK((hi), (lo))))
+
+/* RRD (Receive Return Descriptor) */
+struct emac_rrd {
+	u32	word[6];
+
+/* number of RFD */
+#define RRD_NOR(rrd)			BITS_GET((rrd)->word[0], 16, 19)
+/* start consumer index of rfd-ring */
+#define RRD_SI(rrd)			BITS_GET((rrd)->word[0], 20, 31)
+/* vlan-tag (CVID, CFI and PRI) */
+#define RRD_CVALN_TAG(rrd)		BITS_GET((rrd)->word[2], 0, 15)
+/* length of the packet */
+#define RRD_PKT_SIZE(rrd)		BITS_GET((rrd)->word[3], 0, 13)
+/* L4(TCP/UDP) checksum failed */
+#define RRD_L4F(rrd)			BITS_GET((rrd)->word[3], 14, 14)
+/* vlan tagged */
+#define RRD_CVTAG(rrd)			BITS_GET((rrd)->word[3], 16, 16)
+/* When set, indicates that the descriptor is updated by the IP core.
+ * When cleared, indicates that the descriptor is invalid.
+ */
+#define RRD_UPDT(rrd)			BITS_GET((rrd)->word[3], 31, 31)
+#define RRD_UPDT_SET(rrd, val)		BITS_SET((rrd)->word[3], 31, 31, val)
+/* timestamp low */
+#define RRD_TS_LOW(rrd)			BITS_GET((rrd)->word[4], 0, 29)
+/* timestamp high */
+#define RRD_TS_HI(rrd)			le32_to_cpu((rrd)->word[5])
+};
+
+/* TPD (Transmit Packet Descriptor) */
+struct emac_tpd {
+	u32				word[4];
+
+/* Number of bytes of the transmit packet. (include 4-byte CRC) */
+#define TPD_BUF_LEN_SET(tpd, val)	BITS_SET((tpd)->word[0], 0, 15, val)
+/* Custom Checksum Offload: When set, ask IP core to offload custom checksum */
+#define TPD_CSX_SET(tpd, val)		BITS_SET((tpd)->word[1], 8, 8, val)
+/* TCP Large Send Offload: When set, ask IP core to do offload TCP Large Send */
+#define TPD_LSO(tpd)			BITS_GET((tpd)->word[1], 12, 12)
+#define TPD_LSO_SET(tpd, val)		BITS_SET((tpd)->word[1], 12, 12, val)
+/*  Large Send Offload Version: When set, indicates this is an LSOv2
+ * (for both IPv4 and IPv6). When cleared, indicates this is an LSOv1
+ * (only for IPv4).
+ */
+#define TPD_LSOV_SET(tpd, val)		BITS_SET((tpd)->word[1], 13, 13, val)
+/* IPv4 packet: When set, indicates this is an  IPv4 packet, this bit is only
+ * for LSOV2 format.
+ */
+#define TPD_IPV4_SET(tpd, val)		BITS_SET((tpd)->word[1], 16, 16, val)
+/* 0: Ethernet   frame (DA+SA+TYPE+DATA+CRC)
+ * 1: IEEE 802.3 frame (DA+SA+LEN+DSAP+SSAP+CTL+ORG+TYPE+DATA+CRC)
+ */
+#define TPD_TYP_SET(tpd, val)		BITS_SET((tpd)->word[1], 17, 17, val)
+/* Low-32bit Buffer Address */
+#define TPD_BUFFER_ADDR_L_SET(tpd, val)	((tpd)->word[2] = cpu_to_le32(val))
+/* CVLAN Tag to be inserted if INS_VLAN_TAG is set, CVLAN TPID based on global
+ * register configuration.
+ */
+#define TPD_CVLAN_TAG_SET(tpd, val)	BITS_SET((tpd)->word[3], 0, 15, val)
+/*  Insert CVlan Tag: When set, ask MAC to insert CVLAN TAG to outgoing packet
+ */
+#define TPD_INSTC_SET(tpd, val)		BITS_SET((tpd)->word[3], 17, 17, val)
+/* High-14bit Buffer Address, So, the 64b-bit address is
+ * {DESC_CTRL_11_TX_DATA_HIADDR[17:0],(register) BUFFER_ADDR_H, BUFFER_ADDR_L}
+ */
+#define TPD_BUFFER_ADDR_H_SET(tpd, val)	BITS_SET((tpd)->word[3], 18, 30, val)
+/* Format D. Word offset from the 1st byte of this packet to start to calculate
+ * the custom checksum.
+ */
+#define TPD_PAYLOAD_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 0, 7, val)
+/*  Format D. Word offset from the 1st byte of this packet to fill the custom
+ * checksum to
+ */
+#define TPD_CXSUM_OFFSET_SET(tpd, val)	BITS_SET((tpd)->word[1], 18, 25, val)
+
+/* Format C. TCP Header offset from the 1st byte of this packet. (byte unit) */
+#define TPD_TCPHDR_OFFSET_SET(tpd, val)	BITS_SET((tpd)->word[1], 0, 7, val)
+/* Format C. MSS (Maximum Segment Size) got from the protocol layer. (byte unit)
+ */
+#define TPD_MSS_SET(tpd, val)		BITS_SET((tpd)->word[1], 18, 30, val)
+/* packet length in ext tpd */
+#define TPD_PKT_LEN_SET(tpd, val)	((tpd)->word[2] = cpu_to_le32(val))
+};
+
+/* emac_ring_header represents a single, contiguous block of DMA space
+ * mapped for the three descriptor rings (tpd, rfd, rrd)
+ */
+struct emac_ring_header {
+	void			*v_addr;	/* virtual address */
+	dma_addr_t		dma_addr;	/* dma address */
+	size_t			size;		/* length in bytes */
+	size_t			used;
+};
+
+/* emac_buffer is wrapper around a pointer to a socket buffer
+ * so a DMA handle can be stored along with the skb
+ */
+struct emac_buffer {
+	struct sk_buff		*skb;		/* socket buffer */
+	u16			length;		/* rx buffer length */
+	dma_addr_t		dma_addr;	/* dma address */
+};
+
+/* receive free descriptor (rfd) ring */
+struct emac_rfd_ring {
+	struct emac_buffer	*rfbuff;
+	u32			*v_addr;	/* virtual address */
+	dma_addr_t		dma_addr;	/* dma address */
+	size_t			size;		/* length in bytes */
+	unsigned int		count;		/* number of desc in the ring */
+	unsigned int		produce_idx;
+	unsigned int		process_idx;
+	unsigned int		consume_idx;	/* unused */
+};
+
+/* Receive Return Desciptor (RRD) ring */
+struct emac_rrd_ring {
+	u32			*v_addr;	/* virtual address */
+	dma_addr_t		dma_addr;	/* physical address */
+	size_t			size;		/* length in bytes */
+	unsigned int		count;		/* number of desc in the ring */
+	unsigned int		produce_idx;	/* unused */
+	unsigned int		consume_idx;
+};
+
+/* Rx queue */
+struct emac_rx_queue {
+	struct net_device	*netdev;	/* netdev ring belongs to */
+	struct emac_rrd_ring	rrd;
+	struct emac_rfd_ring	rfd;
+	struct napi_struct	napi;
+	struct emac_irq		*irq;
+
+	u32			intr;
+	u32			produce_mask;
+	u32			process_mask;
+	u32			consume_mask;
+
+	u16			produce_reg;
+	u16			process_reg;
+	u16			consume_reg;
+
+	u8			produce_shift;
+	u8			process_shft;
+	u8			consume_shift;
+};
+
+/* Transimit Packet Descriptor (tpd) ring */
+struct emac_tpd_ring {
+	struct emac_buffer	*tpbuff;
+	u32			*v_addr;	/* virtual address */
+	dma_addr_t		dma_addr;	/* dma address */
+
+	size_t			size;		/* length in bytes */
+	unsigned int		count;		/* number of desc in the ring */
+	unsigned int		produce_idx;
+	unsigned int		consume_idx;
+	unsigned int		last_produce_idx;
+};
+
+/* Tx queue */
+struct emac_tx_queue {
+	struct emac_tpd_ring	tpd;
+
+	u32			produce_mask;
+	u32			consume_mask;
+
+	u16			max_packets;	/* max packets per interrupt */
+	u16			produce_reg;
+	u16			consume_reg;
+
+	u8			produce_shift;
+	u8			consume_shift;
+};
+
+struct emac_adapter;
+
+int  emac_mac_up(struct emac_adapter *adpt);
+void emac_mac_down(struct emac_adapter *adpt);
+void emac_mac_reset(struct emac_adapter *adpt);
+void emac_mac_start(struct emac_adapter *adpt);
+void emac_mac_stop(struct emac_adapter *adpt);
+void emac_mac_mode_config(struct emac_adapter *adpt);
+void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q,
+			 int *num_pkts, int max_pkts);
+int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q,
+			 struct sk_buff *skb);
+void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q);
+void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev,
+				  struct emac_adapter *adpt);
+int  emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt);
+void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt);
+void emac_mac_multicast_addr_clear(struct emac_adapter *adpt);
+void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr);
+
+#endif /*_EMAC_HW_H_*/
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
new file mode 100644
index 0000000..da4e90d
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
@@ -0,0 +1,227 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. EMAC PHY Controller driver.
+ */
+
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/iopoll.h>
+#include <linux/acpi.h>
+#include "emac.h"
+#include "emac-mac.h"
+#include "emac-phy.h"
+#include "emac-sgmii.h"
+
+/* EMAC base register offsets */
+#define EMAC_MDIO_CTRL                                        0x001414
+#define EMAC_PHY_STS                                          0x001418
+#define EMAC_MDIO_EX_CTRL                                     0x001440
+
+/* EMAC_MDIO_CTRL */
+#define MDIO_MODE                                              BIT(30)
+#define MDIO_PR                                                BIT(29)
+#define MDIO_AP_EN                                             BIT(28)
+#define MDIO_BUSY                                              BIT(27)
+#define MDIO_CLK_SEL_BMSK                                    0x7000000
+#define MDIO_CLK_SEL_SHFT                                           24
+#define MDIO_START                                             BIT(23)
+#define SUP_PREAMBLE                                           BIT(22)
+#define MDIO_RD_NWR                                            BIT(21)
+#define MDIO_REG_ADDR_BMSK                                    0x1f0000
+#define MDIO_REG_ADDR_SHFT                                          16
+#define MDIO_DATA_BMSK                                          0xffff
+#define MDIO_DATA_SHFT                                               0
+
+/* EMAC_PHY_STS */
+#define PHY_ADDR_BMSK                                         0x1f0000
+#define PHY_ADDR_SHFT                                               16
+
+#define MDIO_CLK_25_4                                                0
+#define MDIO_CLK_25_28                                               7
+
+#define MDIO_WAIT_TIMES                                           1000
+
+#define EMAC_LINK_SPEED_DEFAULT (\
+		EMAC_LINK_SPEED_10_HALF  |\
+		EMAC_LINK_SPEED_10_FULL  |\
+		EMAC_LINK_SPEED_100_HALF |\
+		EMAC_LINK_SPEED_100_FULL |\
+		EMAC_LINK_SPEED_1GB_FULL)
+
+/**
+ * emac_phy_mdio_autopoll_disable() - disable mdio autopoll
+ * @adpt: the emac adapter
+ *
+ * The autopoll feature takes over the MDIO bus.  In order for
+ * the PHY driver to be able to talk to the PHY over the MDIO
+ * bus, we need to temporarily disable the autopoll feature.
+ */
+static int emac_phy_mdio_autopoll_disable(struct emac_adapter *adpt)
+{
+	u32 val;
+
+	/* disable autopoll */
+	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, MDIO_AP_EN, 0);
+
+	/* wait for any mdio polling to complete */
+	if (!readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, val,
+				!(val & MDIO_BUSY), 100, MDIO_WAIT_TIMES * 100))
+		return 0;
+
+	/* failed to disable; ensure it is enabled before returning */
+	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN);
+
+	return -EBUSY;
+}
+
+/**
+ * emac_phy_mdio_autopoll_disable() - disable mdio autopoll
+ * @adpt: the emac adapter
+ *
+ * The EMAC has the ability to poll the external PHY on the MDIO
+ * bus for link state changes.  This eliminates the need for the
+ * driver to poll the phy.  If if the link state does change,
+ * the EMAC issues an interrupt on behalf of the PHY.
+ */
+static void emac_phy_mdio_autopoll_enable(struct emac_adapter *adpt)
+{
+	emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN);
+}
+
+static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum)
+{
+	struct emac_adapter *adpt = bus->priv;
+	u32 reg;
+	int ret;
+
+	ret = emac_phy_mdio_autopoll_disable(adpt);
+	if (ret)
+		return ret;
+
+	emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK,
+			  (addr << PHY_ADDR_SHFT));
+
+	reg = SUP_PREAMBLE |
+	      ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) |
+	      ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) |
+	      MDIO_START | MDIO_RD_NWR;
+
+	writel(reg, adpt->base + EMAC_MDIO_CTRL);
+
+	if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
+			       !(reg & (MDIO_START | MDIO_BUSY)),
+			       100, MDIO_WAIT_TIMES * 100))
+		ret = -EIO;
+	else
+		ret = (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK;
+
+	emac_phy_mdio_autopoll_enable(adpt);
+
+	return ret;
+}
+
+static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val)
+{
+	struct emac_adapter *adpt = bus->priv;
+	u32 reg;
+	int ret;
+
+	ret = emac_phy_mdio_autopoll_disable(adpt);
+	if (ret)
+		return ret;
+
+	emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK,
+			  (addr << PHY_ADDR_SHFT));
+
+	reg = SUP_PREAMBLE |
+		((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) |
+		((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) |
+		((val << MDIO_DATA_SHFT) & MDIO_DATA_BMSK) |
+		MDIO_START;
+
+	writel(reg, adpt->base + EMAC_MDIO_CTRL);
+
+	if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg,
+			       !(reg & (MDIO_START | MDIO_BUSY)), 100,
+			       MDIO_WAIT_TIMES * 100))
+		ret = -EIO;
+
+	emac_phy_mdio_autopoll_enable(adpt);
+
+	return ret;
+}
+
+/* Configure the MDIO bus and connect the external PHY */
+int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct mii_bus *mii_bus;
+	int ret;
+
+	/* Create the mii_bus object for talking to the MDIO bus */
+	adpt->mii_bus = mii_bus = devm_mdiobus_alloc(&pdev->dev);
+	if (!mii_bus)
+		return -ENOMEM;
+
+	mii_bus->name = "emac-mdio";
+	snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s", pdev->name);
+	mii_bus->read = emac_mdio_read;
+	mii_bus->write = emac_mdio_write;
+	mii_bus->parent = &pdev->dev;
+	mii_bus->priv = adpt;
+
+	if (has_acpi_companion(&pdev->dev)) {
+		u32 phy_addr;
+
+		ret = mdiobus_register(mii_bus);
+		if (ret) {
+			dev_err(&pdev->dev, "could not register mdio bus\n");
+			return ret;
+		}
+		ret = device_property_read_u32(&pdev->dev, "phy-channel",
+					       &phy_addr);
+		if (ret)
+			/* If we can't read a valid phy address, then assume
+			 * that there is only one phy on this mdio bus.
+			 */
+			adpt->phydev = phy_find_first(mii_bus);
+		else
+			adpt->phydev = mdiobus_get_phy(mii_bus, phy_addr);
+
+	} else {
+		struct device_node *phy_np;
+
+		ret = of_mdiobus_register(mii_bus, np);
+		if (ret) {
+			dev_err(&pdev->dev, "could not register mdio bus\n");
+			return ret;
+		}
+
+		phy_np = of_parse_phandle(np, "phy-handle", 0);
+		adpt->phydev = of_phy_find_device(phy_np);
+	}
+
+	if (!adpt->phydev) {
+		dev_err(&pdev->dev, "could not find external phy\n");
+		mdiobus_unregister(mii_bus);
+		return -ENODEV;
+	}
+
+	if (adpt->phydev->drv)
+		phy_attached_print(adpt->phydev, NULL);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.h b/drivers/net/ethernet/qualcomm/emac/emac-phy.h
new file mode 100644
index 0000000..49f3701a
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License version 2 and
+* only version 2 as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+* GNU General Public License for more details.
+*/
+
+#ifndef _EMAC_PHY_H_
+#define _EMAC_PHY_H_
+
+typedef int (*emac_sgmii_initialize)(struct emac_adapter *adpt);
+
+/** emac_phy - internal emac phy
+ * @base base address
+ * @digital per-lane digital block
+ * @initialize initialization function
+ */
+struct emac_phy {
+	void __iomem		*base;
+	void __iomem		*digital;
+	emac_sgmii_initialize	initialize;
+};
+
+struct emac_adapter;
+
+int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt);
+
+#endif /* _EMAC_PHY_H_ */
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
new file mode 100644
index 0000000..75c1b53
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -0,0 +1,784 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. EMAC SGMII Controller driver.
+ */
+
+#include <linux/iopoll.h>
+#include <linux/acpi.h>
+#include <linux/of_device.h>
+#include "emac.h"
+#include "emac-mac.h"
+#include "emac-sgmii.h"
+
+/* EMAC_QSERDES register offsets */
+#define EMAC_QSERDES_COM_SYS_CLK_CTRL		0x000000
+#define EMAC_QSERDES_COM_PLL_CNTRL		0x000014
+#define EMAC_QSERDES_COM_PLL_IP_SETI		0x000018
+#define EMAC_QSERDES_COM_PLL_CP_SETI		0x000024
+#define EMAC_QSERDES_COM_PLL_IP_SETP		0x000028
+#define EMAC_QSERDES_COM_PLL_CP_SETP		0x00002c
+#define EMAC_QSERDES_COM_SYSCLK_EN_SEL		0x000038
+#define EMAC_QSERDES_COM_RESETSM_CNTRL		0x000040
+#define EMAC_QSERDES_COM_PLLLOCK_CMP1		0x000044
+#define EMAC_QSERDES_COM_PLLLOCK_CMP2		0x000048
+#define EMAC_QSERDES_COM_PLLLOCK_CMP3		0x00004c
+#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN		0x000050
+#define EMAC_QSERDES_COM_DEC_START1		0x000064
+#define EMAC_QSERDES_COM_DIV_FRAC_START1	0x000098
+#define EMAC_QSERDES_COM_DIV_FRAC_START2	0x00009c
+#define EMAC_QSERDES_COM_DIV_FRAC_START3	0x0000a0
+#define EMAC_QSERDES_COM_DEC_START2		0x0000a4
+#define EMAC_QSERDES_COM_PLL_CRCTRL		0x0000ac
+#define EMAC_QSERDES_COM_RESET_SM		0x0000bc
+#define EMAC_QSERDES_TX_BIST_MODE_LANENO	0x000100
+#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL	0x000108
+#define EMAC_QSERDES_TX_TX_DRV_LVL		0x00010c
+#define EMAC_QSERDES_TX_LANE_MODE		0x000150
+#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN	0x000170
+#define EMAC_QSERDES_RX_CDR_CONTROL		0x000200
+#define EMAC_QSERDES_RX_CDR_CONTROL2		0x000210
+#define EMAC_QSERDES_RX_RX_EQ_GAIN12		0x000230
+
+/* EMAC_SGMII register offsets */
+#define EMAC_SGMII_PHY_SERDES_START		0x000000
+#define EMAC_SGMII_PHY_CMN_PWR_CTRL		0x000004
+#define EMAC_SGMII_PHY_RX_PWR_CTRL		0x000008
+#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x00000C
+#define EMAC_SGMII_PHY_LANE_CTRL1		0x000018
+#define EMAC_SGMII_PHY_AUTONEG_CFG2		0x000048
+#define EMAC_SGMII_PHY_CDR_CTRL0		0x000058
+#define EMAC_SGMII_PHY_SPEED_CFG1		0x000074
+#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x000080
+#define EMAC_SGMII_PHY_RESET_CTRL		0x0000a8
+#define EMAC_SGMII_PHY_IRQ_CMD			0x0000ac
+#define EMAC_SGMII_PHY_INTERRUPT_CLEAR		0x0000b0
+#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x0000b4
+#define EMAC_SGMII_PHY_INTERRUPT_STATUS		0x0000b8
+#define EMAC_SGMII_PHY_RX_CHK_STATUS		0x0000d4
+#define EMAC_SGMII_PHY_AUTONEG0_STATUS		0x0000e0
+#define EMAC_SGMII_PHY_AUTONEG1_STATUS		0x0000e4
+
+/* EMAC_QSERDES_COM_PLL_IP_SETI */
+#define PLL_IPSETI(x)				((x) & 0x3f)
+
+/* EMAC_QSERDES_COM_PLL_CP_SETI */
+#define PLL_CPSETI(x)				((x) & 0xff)
+
+/* EMAC_QSERDES_COM_PLL_IP_SETP */
+#define PLL_IPSETP(x)				((x) & 0x3f)
+
+/* EMAC_QSERDES_COM_PLL_CP_SETP */
+#define PLL_CPSETP(x)				((x) & 0x1f)
+
+/* EMAC_QSERDES_COM_PLL_CRCTRL */
+#define PLL_RCTRL(x)				(((x) & 0xf) << 4)
+#define PLL_CCTRL(x)				((x) & 0xf)
+
+/* SGMII v2 PHY registers per lane */
+#define EMAC_SGMII_PHY_LN_OFFSET		0x0400
+
+/* SGMII v2 digital lane registers */
+#define EMAC_SGMII_LN_DRVR_CTRL0		0x00C
+#define EMAC_SGMII_LN_DRVR_TAP_EN		0x018
+#define EMAC_SGMII_LN_TX_MARGINING		0x01C
+#define EMAC_SGMII_LN_TX_PRE			0x020
+#define EMAC_SGMII_LN_TX_POST			0x024
+#define EMAC_SGMII_LN_TX_BAND_MODE		0x060
+#define EMAC_SGMII_LN_LANE_MODE			0x064
+#define EMAC_SGMII_LN_PARALLEL_RATE		0x078
+#define EMAC_SGMII_LN_CML_CTRL_MODE0		0x0B8
+#define EMAC_SGMII_LN_MIXER_CTRL_MODE0		0x0D0
+#define EMAC_SGMII_LN_VGA_INITVAL		0x134
+#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0	0x17C
+#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0	0x188
+#define EMAC_SGMII_LN_UCDR_SO_CONFIG		0x194
+#define EMAC_SGMII_LN_RX_BAND			0x19C
+#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0	0x1B8
+#define EMAC_SGMII_LN_RSM_CONFIG		0x1F0
+#define EMAC_SGMII_LN_SIGDET_ENABLES		0x224
+#define EMAC_SGMII_LN_SIGDET_CNTRL		0x228
+#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL	0x22C
+#define EMAC_SGMII_LN_RX_EN_SIGNAL		0x2A0
+#define EMAC_SGMII_LN_RX_MISC_CNTRL0		0x2AC
+#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV		0x2BC
+
+/* SGMII v2 digital lane register values */
+#define UCDR_STEP_BY_TWO_MODE0			BIT(7)
+#define UCDR_xO_GAIN_MODE(x)			((x) & 0x7f)
+#define UCDR_ENABLE				BIT(6)
+#define UCDR_SO_SATURATION(x)			((x) & 0x3f)
+#define SIGDET_LP_BYP_PS4			BIT(7)
+#define SIGDET_EN_PS0_TO_PS2			BIT(6)
+#define EN_ACCOUPLEVCM_SW_MUX			BIT(5)
+#define EN_ACCOUPLEVCM_SW			BIT(4)
+#define RX_SYNC_EN				BIT(3)
+#define RXTERM_HIGHZ_PS5			BIT(2)
+#define SIGDET_EN_PS3				BIT(1)
+#define EN_ACCOUPLE_VCM_PS3			BIT(0)
+#define UFS_MODE				BIT(5)
+#define TXVAL_VALID_INIT			BIT(4)
+#define TXVAL_VALID_MUX				BIT(3)
+#define TXVAL_VALID				BIT(2)
+#define USB3P1_MODE				BIT(1)
+#define KR_PCIGEN3_MODE				BIT(0)
+#define PRE_EN					BIT(3)
+#define POST_EN					BIT(2)
+#define MAIN_EN_MUX				BIT(1)
+#define MAIN_EN					BIT(0)
+#define TX_MARGINING_MUX			BIT(6)
+#define TX_MARGINING(x)				((x) & 0x3f)
+#define TX_PRE_MUX				BIT(6)
+#define TX_PRE(x)				((x) & 0x3f)
+#define TX_POST_MUX				BIT(6)
+#define TX_POST(x)				((x) & 0x3f)
+#define CML_GEAR_MODE(x)			(((x) & 7) << 3)
+#define CML2CMOS_IBOOST_MODE(x)			((x) & 7)
+#define MIXER_LOADB_MODE(x)			(((x) & 0xf) << 2)
+#define MIXER_DATARATE_MODE(x)			((x) & 3)
+#define VGA_THRESH_DFE(x)			((x) & 0x3f)
+#define SIGDET_LP_BYP_PS0_TO_PS2		BIT(5)
+#define SIGDET_LP_BYP_MUX			BIT(4)
+#define SIGDET_LP_BYP				BIT(3)
+#define SIGDET_EN_MUX				BIT(2)
+#define SIGDET_EN				BIT(1)
+#define SIGDET_FLT_BYP				BIT(0)
+#define SIGDET_LVL(x)				(((x) & 0xf) << 4)
+#define SIGDET_BW_CTRL(x)			((x) & 0xf)
+#define SIGDET_DEGLITCH_CTRL(x)			(((x) & 0xf) << 1)
+#define SIGDET_DEGLITCH_BYP			BIT(0)
+#define INVERT_PCS_RX_CLK			BIT(7)
+#define PWM_EN					BIT(6)
+#define RXBIAS_SEL(x)				(((x) & 0x3) << 4)
+#define EBDAC_SIGN				BIT(3)
+#define EDAC_SIGN				BIT(2)
+#define EN_AUXTAP1SIGN_INVERT			BIT(1)
+#define EN_DAC_CHOPPING				BIT(0)
+#define DRVR_LOGIC_CLK_EN			BIT(4)
+#define DRVR_LOGIC_CLK_DIV(x)			((x) & 0xf)
+#define PARALLEL_RATE_MODE2(x)			(((x) & 0x3) << 4)
+#define PARALLEL_RATE_MODE1(x)			(((x) & 0x3) << 2)
+#define PARALLEL_RATE_MODE0(x)			((x) & 0x3)
+#define BAND_MODE2(x)				(((x) & 0x3) << 4)
+#define BAND_MODE1(x)				(((x) & 0x3) << 2)
+#define BAND_MODE0(x)				((x) & 0x3)
+#define LANE_SYNC_MODE				BIT(5)
+#define LANE_MODE(x)				((x) & 0x1f)
+#define CDR_PD_SEL_MODE0(x)			(((x) & 0x3) << 5)
+#define EN_DLL_MODE0				BIT(4)
+#define EN_IQ_DCC_MODE0				BIT(3)
+#define EN_IQCAL_MODE0				BIT(2)
+#define EN_QPATH_MODE0				BIT(1)
+#define EN_EPATH_MODE0				BIT(0)
+#define FORCE_TSYNC_ACK				BIT(7)
+#define FORCE_CMN_ACK				BIT(6)
+#define FORCE_CMN_READY				BIT(5)
+#define EN_RCLK_DEGLITCH			BIT(4)
+#define BYPASS_RSM_CDR_RESET			BIT(3)
+#define BYPASS_RSM_TSYNC			BIT(2)
+#define BYPASS_RSM_SAMP_CAL			BIT(1)
+#define BYPASS_RSM_DLL_CAL			BIT(0)
+
+/* EMAC_QSERDES_COM_SYS_CLK_CTRL */
+#define SYSCLK_CM				BIT(4)
+#define SYSCLK_AC_COUPLE			BIT(3)
+
+/* EMAC_QSERDES_COM_PLL_CNTRL */
+#define OCP_EN					BIT(5)
+#define PLL_DIV_FFEN				BIT(2)
+#define PLL_DIV_ORD				BIT(1)
+
+/* EMAC_QSERDES_COM_SYSCLK_EN_SEL */
+#define SYSCLK_SEL_CMOS				BIT(3)
+
+/* EMAC_QSERDES_COM_RESETSM_CNTRL */
+#define FRQ_TUNE_MODE				BIT(4)
+
+/* EMAC_QSERDES_COM_PLLLOCK_CMP_EN */
+#define PLLLOCK_CMP_EN				BIT(0)
+
+/* EMAC_QSERDES_COM_DEC_START1 */
+#define DEC_START1_MUX				BIT(7)
+#define DEC_START1(x)				((x) & 0x7f)
+
+/* EMAC_QSERDES_COM_DIV_FRAC_START1 * EMAC_QSERDES_COM_DIV_FRAC_START2 */
+#define DIV_FRAC_START_MUX			BIT(7)
+#define DIV_FRAC_START(x)			((x) & 0x7f)
+
+/* EMAC_QSERDES_COM_DIV_FRAC_START3 */
+#define DIV_FRAC_START3_MUX			BIT(4)
+#define DIV_FRAC_START3(x)			((x) & 0xf)
+
+/* EMAC_QSERDES_COM_DEC_START2 */
+#define DEC_START2_MUX				BIT(1)
+#define DEC_START2				BIT(0)
+
+/* EMAC_QSERDES_COM_RESET_SM */
+#define READY					BIT(5)
+
+/* EMAC_QSERDES_TX_TX_EMP_POST1_LVL */
+#define TX_EMP_POST1_LVL_MUX			BIT(5)
+#define TX_EMP_POST1_LVL(x)			((x) & 0x1f)
+#define TX_EMP_POST1_LVL_BMSK			0x1f
+#define TX_EMP_POST1_LVL_SHFT			0
+
+/* EMAC_QSERDES_TX_TX_DRV_LVL */
+#define TX_DRV_LVL_MUX				BIT(4)
+#define TX_DRV_LVL(x)				((x) & 0xf)
+
+/* EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN */
+#define EMP_EN_MUX				BIT(1)
+#define EMP_EN					BIT(0)
+
+/* EMAC_QSERDES_RX_CDR_CONTROL & EMAC_QSERDES_RX_CDR_CONTROL2 */
+#define HBW_PD_EN				BIT(7)
+#define SECONDORDERENABLE			BIT(6)
+#define FIRSTORDER_THRESH(x)			(((x) & 0x7) << 3)
+#define SECONDORDERGAIN(x)			((x) & 0x7)
+
+/* EMAC_QSERDES_RX_RX_EQ_GAIN12 */
+#define RX_EQ_GAIN2(x)				(((x) & 0xf) << 4)
+#define RX_EQ_GAIN1(x)				((x) & 0xf)
+
+/* EMAC_SGMII_PHY_SERDES_START */
+#define SERDES_START				BIT(0)
+
+/* EMAC_SGMII_PHY_CMN_PWR_CTRL */
+#define BIAS_EN					BIT(6)
+#define PLL_EN					BIT(5)
+#define SYSCLK_EN				BIT(4)
+#define CLKBUF_L_EN				BIT(3)
+#define PLL_TXCLK_EN				BIT(1)
+#define PLL_RXCLK_EN				BIT(0)
+
+/* EMAC_SGMII_PHY_RX_PWR_CTRL */
+#define L0_RX_SIGDET_EN				BIT(7)
+#define L0_RX_TERM_MODE(x)			(((x) & 3) << 4)
+#define L0_RX_I_EN				BIT(1)
+
+/* EMAC_SGMII_PHY_TX_PWR_CTRL */
+#define L0_TX_EN				BIT(5)
+#define L0_CLKBUF_EN				BIT(4)
+#define L0_TRAN_BIAS_EN				BIT(1)
+
+/* EMAC_SGMII_PHY_LANE_CTRL1 */
+#define L0_RX_EQUALIZE_ENABLE			BIT(6)
+#define L0_RESET_TSYNC_EN			BIT(4)
+#define L0_DRV_LVL(x)				((x) & 0xf)
+
+/* EMAC_SGMII_PHY_AUTONEG_CFG2 */
+#define FORCE_AN_TX_CFG				BIT(5)
+#define FORCE_AN_RX_CFG				BIT(4)
+#define AN_ENABLE				BIT(0)
+
+/* EMAC_SGMII_PHY_SPEED_CFG1 */
+#define DUPLEX_MODE				BIT(4)
+#define SPDMODE_1000				BIT(1)
+#define SPDMODE_100				BIT(0)
+#define SPDMODE_10				0
+#define SPDMODE_BMSK				3
+#define SPDMODE_SHFT				0
+
+/* EMAC_SGMII_PHY_POW_DWN_CTRL0 */
+#define PWRDN_B					BIT(0)
+#define CDR_MAX_CNT(x)				((x) & 0xff)
+
+/* EMAC_QSERDES_TX_BIST_MODE_LANENO */
+#define BIST_LANE_NUMBER(x)			(((x) & 3) << 5)
+#define BISTMODE(x)				((x) & 0x1f)
+
+/* EMAC_QSERDES_COM_PLLLOCK_CMPx */
+#define PLLLOCK_CMP(x)				((x) & 0xff)
+
+/* EMAC_SGMII_PHY_RESET_CTRL */
+#define PHY_SW_RESET				BIT(0)
+
+/* EMAC_SGMII_PHY_IRQ_CMD */
+#define IRQ_GLOBAL_CLEAR			BIT(0)
+
+/* EMAC_SGMII_PHY_INTERRUPT_MASK */
+#define DECODE_CODE_ERR				BIT(7)
+#define DECODE_DISP_ERR				BIT(6)
+#define PLL_UNLOCK				BIT(5)
+#define AN_ILLEGAL_TERM				BIT(4)
+#define SYNC_FAIL				BIT(3)
+#define AN_START				BIT(2)
+#define AN_END					BIT(1)
+#define AN_REQUEST				BIT(0)
+
+#define SGMII_PHY_IRQ_CLR_WAIT_TIME		10
+
+#define SGMII_PHY_INTERRUPT_ERR (\
+	DECODE_CODE_ERR         |\
+	DECODE_DISP_ERR)
+
+#define SGMII_ISR_AN_MASK       (\
+	AN_REQUEST              |\
+	AN_START                |\
+	AN_END                  |\
+	AN_ILLEGAL_TERM         |\
+	PLL_UNLOCK              |\
+	SYNC_FAIL)
+
+#define SGMII_ISR_MASK          (\
+	SGMII_PHY_INTERRUPT_ERR |\
+	SGMII_ISR_AN_MASK)
+
+/* SGMII TX_CONFIG */
+#define TXCFG_LINK				0x8000
+#define TXCFG_MODE_BMSK				0x1c00
+#define TXCFG_1000_FULL				0x1800
+#define TXCFG_100_FULL				0x1400
+#define TXCFG_100_HALF				0x0400
+#define TXCFG_10_FULL				0x1000
+#define TXCFG_10_HALF				0x0000
+
+#define SERDES_START_WAIT_TIMES			100
+
+struct emac_reg_write {
+	unsigned int offset;
+	u32 val;
+};
+
+static void emac_reg_write_all(void __iomem *base,
+			       const struct emac_reg_write *itr, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; ++itr, ++i)
+		writel(itr->val, base + itr->offset);
+}
+
+static const struct emac_reg_write physical_coding_sublayer_programming_v1[] = {
+	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
+		BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN},
+	{EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN},
+	{EMAC_SGMII_PHY_RX_PWR_CTRL,
+		L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN},
+	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
+		BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN |
+		PLL_RXCLK_EN},
+	{EMAC_SGMII_PHY_LANE_CTRL1,
+		L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)},
+};
+
+static const struct emac_reg_write sysclk_refclk_setting[] = {
+	{EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS},
+	{EMAC_QSERDES_COM_SYS_CLK_CTRL,	SYSCLK_CM | SYSCLK_AC_COUPLE},
+};
+
+static const struct emac_reg_write pll_setting[] = {
+	{EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)},
+	{EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)},
+	{EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)},
+	{EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)},
+	{EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)},
+	{EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD},
+	{EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)},
+	{EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2},
+	{EMAC_QSERDES_COM_DIV_FRAC_START1,
+		DIV_FRAC_START_MUX | DIV_FRAC_START(85)},
+	{EMAC_QSERDES_COM_DIV_FRAC_START2,
+		DIV_FRAC_START_MUX | DIV_FRAC_START(42)},
+	{EMAC_QSERDES_COM_DIV_FRAC_START3,
+		DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN},
+	{EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE},
+};
+
+static const struct emac_reg_write cdr_setting[] = {
+	{EMAC_QSERDES_RX_CDR_CONTROL,
+		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)},
+	{EMAC_QSERDES_RX_CDR_CONTROL2,
+		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)},
+};
+
+static const struct emac_reg_write tx_rx_setting[] = {
+	{EMAC_QSERDES_TX_BIST_MODE_LANENO, 0},
+	{EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)},
+	{EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN},
+	{EMAC_QSERDES_TX_TX_EMP_POST1_LVL,
+		TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)},
+	{EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)},
+	{EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)},
+};
+
+static const struct emac_reg_write sgmii_v2_laned[] = {
+	/* CDR Settings */
+	{EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
+		UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
+	{EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)},
+	{EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
+
+	/* TX/RX Settings */
+	{EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2},
+
+	{EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE},
+	{EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN},
+	{EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)},
+	{EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX},
+	{EMAC_SGMII_LN_TX_POST, TX_POST_MUX},
+
+	{EMAC_SGMII_LN_CML_CTRL_MODE0,
+		CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)},
+	{EMAC_SGMII_LN_MIXER_CTRL_MODE0,
+		MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)},
+	{EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)},
+	{EMAC_SGMII_LN_SIGDET_ENABLES,
+		SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP},
+	{EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)},
+
+	{EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)},
+	{EMAC_SGMII_LN_RX_MISC_CNTRL0, 0},
+	{EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV,
+		DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)},
+
+	{EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
+	{EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)},
+	{EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)},
+	{EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
+	{EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)},
+	{EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL},
+};
+
+static const struct emac_reg_write physical_coding_sublayer_programming_v2[] = {
+	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+	{EMAC_SGMII_PHY_TX_PWR_CTRL, 0},
+	{EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE},
+};
+
+static int emac_sgmii_link_init(struct emac_adapter *adpt)
+{
+	struct phy_device *phydev = adpt->phydev;
+	struct emac_phy *phy = &adpt->phy;
+	u32 val;
+
+	val = readl(phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2);
+
+	if (phydev->autoneg == AUTONEG_ENABLE) {
+		val &= ~(FORCE_AN_RX_CFG | FORCE_AN_TX_CFG);
+		val |= AN_ENABLE;
+		writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2);
+	} else {
+		u32 speed_cfg;
+
+		switch (phydev->speed) {
+		case SPEED_10:
+			speed_cfg = SPDMODE_10;
+			break;
+		case SPEED_100:
+			speed_cfg = SPDMODE_100;
+			break;
+		case SPEED_1000:
+			speed_cfg = SPDMODE_1000;
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (phydev->duplex == DUPLEX_FULL)
+			speed_cfg |= DUPLEX_MODE;
+
+		val &= ~AN_ENABLE;
+		writel(speed_cfg, phy->base + EMAC_SGMII_PHY_SPEED_CFG1);
+		writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2);
+	}
+
+	return 0;
+}
+
+static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits)
+{
+	struct emac_phy *phy = &adpt->phy;
+	u32 status;
+
+	writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR);
+	writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD);
+	/* Ensure interrupt clear command is written to HW */
+	wmb();
+
+	/* After set the IRQ_GLOBAL_CLEAR bit, the status clearing must
+	 * be confirmed before clearing the bits in other registers.
+	 * It takes a few cycles for hw to clear the interrupt status.
+	 */
+	if (readl_poll_timeout_atomic(phy->base +
+				      EMAC_SGMII_PHY_INTERRUPT_STATUS,
+				      status, !(status & irq_bits), 1,
+				      SGMII_PHY_IRQ_CLR_WAIT_TIME)) {
+		netdev_err(adpt->netdev,
+			   "error: failed clear SGMII irq: status:0x%x bits:0x%x\n",
+			   status, irq_bits);
+		return -EIO;
+	}
+
+	/* Finalize clearing procedure */
+	writel_relaxed(0, phy->base + EMAC_SGMII_PHY_IRQ_CMD);
+	writel_relaxed(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR);
+
+	/* Ensure that clearing procedure finalization is written to HW */
+	wmb();
+
+	return 0;
+}
+
+int emac_sgmii_init_v1(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	unsigned int i;
+	int ret;
+
+	ret = emac_sgmii_link_init(adpt);
+	if (ret)
+		return ret;
+
+	emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v1,
+			   ARRAY_SIZE(physical_coding_sublayer_programming_v1));
+	emac_reg_write_all(phy->base, sysclk_refclk_setting,
+			   ARRAY_SIZE(sysclk_refclk_setting));
+	emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting));
+	emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting));
+	emac_reg_write_all(phy->base, tx_rx_setting,
+			   ARRAY_SIZE(tx_rx_setting));
+
+	/* Power up the Ser/Des engine */
+	writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START);
+
+	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+		if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY)
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (i == SERDES_START_WAIT_TIMES) {
+		netdev_err(adpt->netdev, "error: ser/des failed to start\n");
+		return -EIO;
+	}
+	/* Mask out all the SGMII Interrupt */
+	writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+	emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
+
+	return 0;
+}
+
+int emac_sgmii_init_v2(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	void __iomem *phy_regs = phy->base;
+	void __iomem *laned = phy->digital;
+	unsigned int i;
+	u32 lnstatus;
+	int ret;
+
+	ret = emac_sgmii_link_init(adpt);
+	if (ret)
+		return ret;
+
+	/* PCS lane-x init */
+	emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v2,
+			   ARRAY_SIZE(physical_coding_sublayer_programming_v2));
+
+	/* SGMII lane-x init */
+	emac_reg_write_all(phy->digital,
+			   sgmii_v2_laned, ARRAY_SIZE(sgmii_v2_laned));
+
+	/* Power up PCS and start reset lane state machine */
+
+	writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL);
+	writel(1, laned + SGMII_LN_RSM_START);
+
+	/* Wait for c_ready assertion */
+	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+		lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS);
+		if (lnstatus & BIT(1))
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (i == SERDES_START_WAIT_TIMES) {
+		netdev_err(adpt->netdev, "SGMII failed to start\n");
+		return -EIO;
+	}
+
+	/* Disable digital and SERDES loopback */
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0);
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2);
+	writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1);
+
+	/* Mask out all the SGMII Interrupt */
+	writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+	emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
+
+	return 0;
+}
+
+static void emac_sgmii_reset_prepare(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	u32 val;
+
+	/* Reset PHY */
+	val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2);
+	writel(((val & ~PHY_RESET) | PHY_RESET), phy->base +
+	       EMAC_EMAC_WRAPPER_CSR2);
+	/* Ensure phy-reset command is written to HW before the release cmd */
+	msleep(50);
+	val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2);
+	writel((val & ~PHY_RESET), phy->base + EMAC_EMAC_WRAPPER_CSR2);
+	/* Ensure phy-reset release command is written to HW before initializing
+	 * SGMII
+	 */
+	msleep(50);
+}
+
+void emac_sgmii_reset(struct emac_adapter *adpt)
+{
+	int ret;
+
+	clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000);
+	emac_sgmii_reset_prepare(adpt);
+
+	ret = adpt->phy.initialize(adpt);
+	if (ret)
+		netdev_err(adpt->netdev,
+			   "could not reinitialize internal PHY (error=%i)\n",
+			   ret);
+
+	clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000);
+}
+
+static int emac_sgmii_acpi_match(struct device *dev, void *data)
+{
+	static const struct acpi_device_id match_table[] = {
+		{
+			.id = "QCOM8071",
+			.driver_data = (kernel_ulong_t)emac_sgmii_init_v2,
+		},
+		{}
+	};
+	const struct acpi_device_id *id = acpi_match_device(match_table, dev);
+	emac_sgmii_initialize *initialize = data;
+
+	if (id)
+		*initialize = (emac_sgmii_initialize)id->driver_data;
+
+	return !!id;
+}
+
+static const struct of_device_id emac_sgmii_dt_match[] = {
+	{
+		.compatible = "qcom,fsm9900-emac-sgmii",
+		.data = emac_sgmii_init_v1,
+	},
+	{
+		.compatible = "qcom,qdf2432-emac-sgmii",
+		.data = emac_sgmii_init_v2,
+	},
+	{}
+};
+
+int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
+{
+	struct platform_device *sgmii_pdev = NULL;
+	struct emac_phy *phy = &adpt->phy;
+	struct resource *res;
+	int ret;
+
+	if (has_acpi_companion(&pdev->dev)) {
+		struct device *dev;
+
+		dev = device_find_child(&pdev->dev, &phy->initialize,
+					emac_sgmii_acpi_match);
+
+		if (!dev) {
+			dev_err(&pdev->dev, "cannot find internal phy node\n");
+			return -ENODEV;
+		}
+
+		sgmii_pdev = to_platform_device(dev);
+	} else {
+		const struct of_device_id *match;
+		struct device_node *np;
+
+		np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0);
+		if (!np) {
+			dev_err(&pdev->dev, "missing internal-phy property\n");
+			return -ENODEV;
+		}
+
+		sgmii_pdev = of_find_device_by_node(np);
+		if (!sgmii_pdev) {
+			dev_err(&pdev->dev, "invalid internal-phy property\n");
+			return -ENODEV;
+		}
+
+		match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev);
+		if (!match) {
+			dev_err(&pdev->dev, "unrecognized internal phy node\n");
+			ret = -ENODEV;
+			goto error_put_device;
+		}
+
+		phy->initialize = (emac_sgmii_initialize)match->data;
+	}
+
+	/* Base address is the first address */
+	res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		ret = -EINVAL;
+		goto error_put_device;
+	}
+
+	phy->base = ioremap(res->start, resource_size(res));
+	if (!phy->base) {
+		ret = -ENOMEM;
+		goto error_put_device;
+	}
+
+	/* v2 SGMII has a per-lane digital digital, so parse it if it exists */
+	res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1);
+	if (res) {
+		phy->digital = ioremap(res->start, resource_size(res));
+		if (!phy->digital) {
+			ret = -ENOMEM;
+			goto error_unmap_base;
+		}
+	}
+
+	ret = phy->initialize(adpt);
+	if (ret)
+		goto error;
+
+	/* We've remapped the addresses, so we don't need the device any
+	 * more.  of_find_device_by_node() says we should release it.
+	 */
+	put_device(&sgmii_pdev->dev);
+
+	return 0;
+
+error:
+	if (phy->digital)
+		iounmap(phy->digital);
+error_unmap_base:
+	iounmap(phy->base);
+error_put_device:
+	put_device(&sgmii_pdev->dev);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
new file mode 100644
index 0000000..ce79212
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
@@ -0,0 +1,24 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _EMAC_SGMII_H_
+#define _EMAC_SGMII_H_
+
+struct emac_adapter;
+struct platform_device;
+
+int emac_sgmii_init_v1(struct emac_adapter *adpt);
+int emac_sgmii_init_v2(struct emac_adapter *adpt);
+int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt);
+void emac_sgmii_reset(struct emac_adapter *adpt);
+
+#endif
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
new file mode 100644
index 0000000..9bf3b2b
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -0,0 +1,755 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. EMAC Gigabit Ethernet Driver */
+
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_device.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include "emac.h"
+#include "emac-mac.h"
+#include "emac-phy.h"
+#include "emac-sgmii.h"
+
+#define EMAC_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK |  \
+		NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP)
+
+#define EMAC_RRD_SIZE					     4
+/* The RRD size if timestamping is enabled: */
+#define EMAC_TS_RRD_SIZE				     6
+#define EMAC_TPD_SIZE					     4
+#define EMAC_RFD_SIZE					     2
+
+#define REG_MAC_RX_STATUS_BIN		 EMAC_RXMAC_STATC_REG0
+#define REG_MAC_RX_STATUS_END		EMAC_RXMAC_STATC_REG22
+#define REG_MAC_TX_STATUS_BIN		 EMAC_TXMAC_STATC_REG0
+#define REG_MAC_TX_STATUS_END		EMAC_TXMAC_STATC_REG24
+
+#define RXQ0_NUM_RFD_PREF_DEF				     8
+#define TXQ0_NUM_TPD_PREF_DEF				     5
+
+#define EMAC_PREAMBLE_DEF				     7
+
+#define DMAR_DLY_CNT_DEF				    15
+#define DMAW_DLY_CNT_DEF				     4
+
+#define IMR_NORMAL_MASK         (\
+		ISR_ERROR       |\
+		ISR_GPHY_LINK   |\
+		ISR_TX_PKT      |\
+		GPHY_WAKEUP_INT)
+
+#define IMR_EXTENDED_MASK       (\
+		SW_MAN_INT      |\
+		ISR_OVER        |\
+		ISR_ERROR       |\
+		ISR_GPHY_LINK   |\
+		ISR_TX_PKT      |\
+		GPHY_WAKEUP_INT)
+
+#define ISR_TX_PKT      (\
+	TX_PKT_INT      |\
+	TX_PKT_INT1     |\
+	TX_PKT_INT2     |\
+	TX_PKT_INT3)
+
+#define ISR_GPHY_LINK        (\
+	GPHY_LINK_UP_INT     |\
+	GPHY_LINK_DOWN_INT)
+
+#define ISR_OVER        (\
+	RFD0_UR_INT     |\
+	RFD1_UR_INT     |\
+	RFD2_UR_INT     |\
+	RFD3_UR_INT     |\
+	RFD4_UR_INT     |\
+	RXF_OF_INT      |\
+	TXF_UR_INT)
+
+#define ISR_ERROR       (\
+	DMAR_TO_INT     |\
+	DMAW_TO_INT     |\
+	TXQ_TO_INT)
+
+/* in sync with enum emac_clk_id */
+static const char * const emac_clk_name[] = {
+	"axi_clk", "cfg_ahb_clk", "high_speed_clk", "mdio_clk", "tx_clk",
+	"rx_clk", "sys_clk"
+};
+
+void emac_reg_update32(void __iomem *addr, u32 mask, u32 val)
+{
+	u32 data = readl(addr);
+
+	writel(((data & ~mask) | val), addr);
+}
+
+/* reinitialize */
+int emac_reinit_locked(struct emac_adapter *adpt)
+{
+	int ret;
+
+	mutex_lock(&adpt->reset_lock);
+
+	emac_mac_down(adpt);
+	emac_sgmii_reset(adpt);
+	ret = emac_mac_up(adpt);
+
+	mutex_unlock(&adpt->reset_lock);
+
+	return ret;
+}
+
+/* NAPI */
+static int emac_napi_rtx(struct napi_struct *napi, int budget)
+{
+	struct emac_rx_queue *rx_q =
+		container_of(napi, struct emac_rx_queue, napi);
+	struct emac_adapter *adpt = netdev_priv(rx_q->netdev);
+	struct emac_irq *irq = rx_q->irq;
+	int work_done = 0;
+
+	emac_mac_rx_process(adpt, rx_q, &work_done, budget);
+
+	if (work_done < budget) {
+		napi_complete(napi);
+
+		irq->mask |= rx_q->intr;
+		writel(irq->mask, adpt->base + EMAC_INT_MASK);
+	}
+
+	return work_done;
+}
+
+/* Transmit the packet */
+static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb);
+}
+
+irqreturn_t emac_isr(int _irq, void *data)
+{
+	struct emac_irq *irq = data;
+	struct emac_adapter *adpt =
+		container_of(irq, struct emac_adapter, irq);
+	struct emac_rx_queue *rx_q = &adpt->rx_q;
+	u32 isr, status;
+
+	/* disable the interrupt */
+	writel(0, adpt->base + EMAC_INT_MASK);
+
+	isr = readl_relaxed(adpt->base + EMAC_INT_STATUS);
+
+	status = isr & irq->mask;
+	if (status == 0)
+		goto exit;
+
+	if (status & ISR_ERROR) {
+		netif_warn(adpt,  intr, adpt->netdev,
+			   "warning: error irq status 0x%lx\n",
+			   status & ISR_ERROR);
+		/* reset MAC */
+		schedule_work(&adpt->work_thread);
+	}
+
+	/* Schedule the napi for receive queue with interrupt
+	 * status bit set
+	 */
+	if (status & rx_q->intr) {
+		if (napi_schedule_prep(&rx_q->napi)) {
+			irq->mask &= ~rx_q->intr;
+			__napi_schedule(&rx_q->napi);
+		}
+	}
+
+	if (status & TX_PKT_INT)
+		emac_mac_tx_process(adpt, &adpt->tx_q);
+
+	if (status & ISR_OVER)
+		net_warn_ratelimited("warning: TX/RX overflow\n");
+
+	/* link event */
+	if (status & ISR_GPHY_LINK)
+		phy_mac_interrupt(adpt->phydev, !!(status & GPHY_LINK_UP_INT));
+
+exit:
+	/* enable the interrupt */
+	writel(irq->mask, adpt->base + EMAC_INT_MASK);
+
+	return IRQ_HANDLED;
+}
+
+/* Configure VLAN tag strip/insert feature */
+static int emac_set_features(struct net_device *netdev,
+			     netdev_features_t features)
+{
+	netdev_features_t changed = features ^ netdev->features;
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	/* We only need to reprogram the hardware if the VLAN tag features
+	 * have changed, and if it's already running.
+	 */
+	if (!(changed & (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX)))
+		return 0;
+
+	if (!netif_running(netdev))
+		return 0;
+
+	/* emac_mac_mode_config() uses netdev->features to configure the EMAC,
+	 * so make sure it's set first.
+	 */
+	netdev->features = features;
+
+	return emac_reinit_locked(adpt);
+}
+
+/* Configure Multicast and Promiscuous modes */
+static void emac_rx_mode_set(struct net_device *netdev)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+	struct netdev_hw_addr *ha;
+
+	emac_mac_mode_config(adpt);
+
+	/* update multicast address filtering */
+	emac_mac_multicast_addr_clear(adpt);
+	netdev_for_each_mc_addr(ha, netdev)
+		emac_mac_multicast_addr_set(adpt, ha->addr);
+}
+
+/* Change the Maximum Transfer Unit (MTU) */
+static int emac_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	unsigned int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	if ((max_frame < EMAC_MIN_ETH_FRAME_SIZE) ||
+	    (max_frame > EMAC_MAX_ETH_FRAME_SIZE)) {
+		netdev_err(adpt->netdev, "error: invalid MTU setting\n");
+		return -EINVAL;
+	}
+
+	netif_info(adpt, hw, adpt->netdev,
+		   "changing MTU from %d to %d\n", netdev->mtu,
+		   new_mtu);
+	netdev->mtu = new_mtu;
+
+	if (netif_running(netdev))
+		return emac_reinit_locked(adpt);
+
+	return 0;
+}
+
+/* Called when the network interface is made active */
+static int emac_open(struct net_device *netdev)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+	int ret;
+
+	/* allocate rx/tx dma buffer & descriptors */
+	ret = emac_mac_rx_tx_rings_alloc_all(adpt);
+	if (ret) {
+		netdev_err(adpt->netdev, "error allocating rx/tx rings\n");
+		return ret;
+	}
+
+	ret = emac_mac_up(adpt);
+	if (ret) {
+		emac_mac_rx_tx_rings_free_all(adpt);
+		return ret;
+	}
+
+	emac_mac_start(adpt);
+
+	return 0;
+}
+
+/* Called when the network interface is disabled */
+static int emac_close(struct net_device *netdev)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	mutex_lock(&adpt->reset_lock);
+
+	emac_mac_down(adpt);
+	emac_mac_rx_tx_rings_free_all(adpt);
+
+	mutex_unlock(&adpt->reset_lock);
+
+	return 0;
+}
+
+/* Respond to a TX hang */
+static void emac_tx_timeout(struct net_device *netdev)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	schedule_work(&adpt->work_thread);
+}
+
+/* IOCTL support for the interface */
+static int emac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	if (!netif_running(netdev))
+		return -EINVAL;
+
+	if (!netdev->phydev)
+		return -ENODEV;
+
+	return phy_mii_ioctl(netdev->phydev, ifr, cmd);
+}
+
+/* Provide network statistics info for the interface */
+static struct rtnl_link_stats64 *emac_get_stats64(struct net_device *netdev,
+						  struct rtnl_link_stats64 *net_stats)
+{
+	struct emac_adapter *adpt = netdev_priv(netdev);
+	unsigned int addr = REG_MAC_RX_STATUS_BIN;
+	struct emac_stats *stats = &adpt->stats;
+	u64 *stats_itr = &adpt->stats.rx_ok;
+	u32 val;
+
+	spin_lock(&stats->lock);
+
+	while (addr <= REG_MAC_RX_STATUS_END) {
+		val = readl_relaxed(adpt->base + addr);
+		*stats_itr += val;
+		stats_itr++;
+		addr += sizeof(u32);
+	}
+
+	/* additional rx status */
+	val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG23);
+	adpt->stats.rx_crc_align += val;
+	val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG24);
+	adpt->stats.rx_jabbers += val;
+
+	/* update tx status */
+	addr = REG_MAC_TX_STATUS_BIN;
+	stats_itr = &adpt->stats.tx_ok;
+
+	while (addr <= REG_MAC_TX_STATUS_END) {
+		val = readl_relaxed(adpt->base + addr);
+		*stats_itr += val;
+		++stats_itr;
+		addr += sizeof(u32);
+	}
+
+	/* additional tx status */
+	val = readl_relaxed(adpt->base + EMAC_TXMAC_STATC_REG25);
+	adpt->stats.tx_col += val;
+
+	/* return parsed statistics */
+	net_stats->rx_packets = stats->rx_ok;
+	net_stats->tx_packets = stats->tx_ok;
+	net_stats->rx_bytes = stats->rx_byte_cnt;
+	net_stats->tx_bytes = stats->tx_byte_cnt;
+	net_stats->multicast = stats->rx_mcast;
+	net_stats->collisions = stats->tx_1_col + stats->tx_2_col * 2 +
+				stats->tx_late_col + stats->tx_abort_col;
+
+	net_stats->rx_errors = stats->rx_frag + stats->rx_fcs_err +
+			       stats->rx_len_err + stats->rx_sz_ov +
+			       stats->rx_align_err;
+	net_stats->rx_fifo_errors = stats->rx_rxf_ov;
+	net_stats->rx_length_errors = stats->rx_len_err;
+	net_stats->rx_crc_errors = stats->rx_fcs_err;
+	net_stats->rx_frame_errors = stats->rx_align_err;
+	net_stats->rx_over_errors = stats->rx_rxf_ov;
+	net_stats->rx_missed_errors = stats->rx_rxf_ov;
+
+	net_stats->tx_errors = stats->tx_late_col + stats->tx_abort_col +
+			       stats->tx_underrun + stats->tx_trunc;
+	net_stats->tx_fifo_errors = stats->tx_underrun;
+	net_stats->tx_aborted_errors = stats->tx_abort_col;
+	net_stats->tx_window_errors = stats->tx_late_col;
+
+	spin_unlock(&stats->lock);
+
+	return net_stats;
+}
+
+static const struct net_device_ops emac_netdev_ops = {
+	.ndo_open		= emac_open,
+	.ndo_stop		= emac_close,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_start_xmit		= emac_start_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_change_mtu		= emac_change_mtu,
+	.ndo_do_ioctl		= emac_ioctl,
+	.ndo_tx_timeout		= emac_tx_timeout,
+	.ndo_get_stats64	= emac_get_stats64,
+	.ndo_set_features       = emac_set_features,
+	.ndo_set_rx_mode        = emac_rx_mode_set,
+};
+
+/* Watchdog task routine, called to reinitialize the EMAC */
+static void emac_work_thread(struct work_struct *work)
+{
+	struct emac_adapter *adpt =
+		container_of(work, struct emac_adapter, work_thread);
+
+	emac_reinit_locked(adpt);
+}
+
+/* Initialize various data structures  */
+static void emac_init_adapter(struct emac_adapter *adpt)
+{
+	u32 reg;
+
+	/* descriptors */
+	adpt->tx_desc_cnt = EMAC_DEF_TX_DESCS;
+	adpt->rx_desc_cnt = EMAC_DEF_RX_DESCS;
+
+	/* dma */
+	adpt->dma_order = emac_dma_ord_out;
+	adpt->dmar_block = emac_dma_req_4096;
+	adpt->dmaw_block = emac_dma_req_128;
+	adpt->dmar_dly_cnt = DMAR_DLY_CNT_DEF;
+	adpt->dmaw_dly_cnt = DMAW_DLY_CNT_DEF;
+	adpt->tpd_burst = TXQ0_NUM_TPD_PREF_DEF;
+	adpt->rfd_burst = RXQ0_NUM_RFD_PREF_DEF;
+
+	/* irq moderator */
+	reg = ((EMAC_DEF_RX_IRQ_MOD >> 1) << IRQ_MODERATOR2_INIT_SHFT) |
+	      ((EMAC_DEF_TX_IRQ_MOD >> 1) << IRQ_MODERATOR_INIT_SHFT);
+	adpt->irq_mod = reg;
+
+	/* others */
+	adpt->preamble = EMAC_PREAMBLE_DEF;
+}
+
+/* Get the clock */
+static int emac_clks_get(struct platform_device *pdev,
+			 struct emac_adapter *adpt)
+{
+	unsigned int i;
+
+	for (i = 0; i < EMAC_CLK_CNT; i++) {
+		struct clk *clk = devm_clk_get(&pdev->dev, emac_clk_name[i]);
+
+		if (IS_ERR(clk)) {
+			dev_err(&pdev->dev,
+				"could not claim clock %s (error=%li)\n",
+				emac_clk_name[i], PTR_ERR(clk));
+
+			return PTR_ERR(clk);
+		}
+
+		adpt->clk[i] = clk;
+	}
+
+	return 0;
+}
+
+/* Initialize clocks */
+static int emac_clks_phase1_init(struct platform_device *pdev,
+				 struct emac_adapter *adpt)
+{
+	int ret;
+
+	ret = emac_clks_get(pdev, adpt);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(adpt->clk[EMAC_CLK_AXI]);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]);
+	if (ret)
+		return ret;
+
+	ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000);
+	if (ret)
+		return ret;
+
+	return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]);
+}
+
+/* Enable clocks; needs emac_clks_phase1_init to be called before */
+static int emac_clks_phase2_init(struct platform_device *pdev,
+				 struct emac_adapter *adpt)
+{
+	int ret;
+
+	ret = clk_set_rate(adpt->clk[EMAC_CLK_TX], 125000000);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(adpt->clk[EMAC_CLK_TX]);
+	if (ret)
+		return ret;
+
+	ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000);
+	if (ret)
+		return ret;
+
+	ret = clk_set_rate(adpt->clk[EMAC_CLK_MDIO], 25000000);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(adpt->clk[EMAC_CLK_MDIO]);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(adpt->clk[EMAC_CLK_RX]);
+	if (ret)
+		return ret;
+
+	return clk_prepare_enable(adpt->clk[EMAC_CLK_SYS]);
+}
+
+static void emac_clks_teardown(struct emac_adapter *adpt)
+{
+
+	unsigned int i;
+
+	for (i = 0; i < EMAC_CLK_CNT; i++)
+		clk_disable_unprepare(adpt->clk[i]);
+}
+
+/* Get the resources */
+static int emac_probe_resources(struct platform_device *pdev,
+				struct emac_adapter *adpt)
+{
+	struct net_device *netdev = adpt->netdev;
+	struct resource *res;
+	char maddr[ETH_ALEN];
+	int ret = 0;
+
+	/* get mac address */
+	if (device_get_mac_address(&pdev->dev, maddr, ETH_ALEN))
+		ether_addr_copy(netdev->dev_addr, maddr);
+	else
+		eth_hw_addr_random(netdev);
+
+	/* Core 0 interrupt */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(&pdev->dev,
+			"error: missing core0 irq resource (error=%i)\n", ret);
+		return ret;
+	}
+	adpt->irq.irq = ret;
+
+	/* base register address */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	adpt->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(adpt->base))
+		return PTR_ERR(adpt->base);
+
+	/* CSR register address */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	adpt->csr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(adpt->csr))
+		return PTR_ERR(adpt->csr);
+
+	netdev->base_addr = (unsigned long)adpt->base;
+
+	return 0;
+}
+
+static const struct of_device_id emac_dt_match[] = {
+	{
+		.compatible = "qcom,fsm9900-emac",
+	},
+	{}
+};
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id emac_acpi_match[] = {
+	{
+		.id = "QCOM8070",
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(acpi, emac_acpi_match);
+#endif
+
+static int emac_probe(struct platform_device *pdev)
+{
+	struct net_device *netdev;
+	struct emac_adapter *adpt;
+	struct emac_phy *phy;
+	u16 devid, revid;
+	u32 reg;
+	int ret;
+
+	/* The EMAC itself is capable of 64-bit DMA, so try that first. */
+	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret) {
+		/* Some platforms may restrict the EMAC's address bus to less
+		 * then the size of DDR. In this case, we need to try a
+		 * smaller mask.  We could try every possible smaller mask,
+		 * but that's overkill.  Instead, just fall to 32-bit, which
+		 * should always work.
+		 */
+		ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(&pdev->dev, "could not set DMA mask\n");
+			return ret;
+		}
+	}
+
+	netdev = alloc_etherdev(sizeof(struct emac_adapter));
+	if (!netdev)
+		return -ENOMEM;
+
+	dev_set_drvdata(&pdev->dev, netdev);
+	SET_NETDEV_DEV(netdev, &pdev->dev);
+
+	adpt = netdev_priv(netdev);
+	adpt->netdev = netdev;
+	adpt->msg_enable = EMAC_MSG_DEFAULT;
+
+	phy = &adpt->phy;
+
+	mutex_init(&adpt->reset_lock);
+	spin_lock_init(&adpt->stats.lock);
+
+	adpt->irq.mask = RX_PKT_INT0 | IMR_NORMAL_MASK;
+
+	ret = emac_probe_resources(pdev, adpt);
+	if (ret)
+		goto err_undo_netdev;
+
+	/* initialize clocks */
+	ret = emac_clks_phase1_init(pdev, adpt);
+	if (ret) {
+		dev_err(&pdev->dev, "could not initialize clocks\n");
+		goto err_undo_netdev;
+	}
+
+	netdev->watchdog_timeo = EMAC_WATCHDOG_TIME;
+	netdev->irq = adpt->irq.irq;
+
+	adpt->rrd_size = EMAC_RRD_SIZE;
+	adpt->tpd_size = EMAC_TPD_SIZE;
+	adpt->rfd_size = EMAC_RFD_SIZE;
+
+	netdev->netdev_ops = &emac_netdev_ops;
+
+	emac_init_adapter(adpt);
+
+	/* init external phy */
+	ret = emac_phy_config(pdev, adpt);
+	if (ret)
+		goto err_undo_clocks;
+
+	/* init internal sgmii phy */
+	ret = emac_sgmii_config(pdev, adpt);
+	if (ret)
+		goto err_undo_mdiobus;
+
+	/* enable clocks */
+	ret = emac_clks_phase2_init(pdev, adpt);
+	if (ret) {
+		dev_err(&pdev->dev, "could not initialize clocks\n");
+		goto err_undo_mdiobus;
+	}
+
+	emac_mac_reset(adpt);
+
+	/* set hw features */
+	netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
+			NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_RX |
+			NETIF_F_HW_VLAN_CTAG_TX;
+	netdev->hw_features = netdev->features;
+
+	netdev->vlan_features |= NETIF_F_SG | NETIF_F_HW_CSUM |
+				 NETIF_F_TSO | NETIF_F_TSO6;
+
+	INIT_WORK(&adpt->work_thread, emac_work_thread);
+
+	/* Initialize queues */
+	emac_mac_rx_tx_ring_init_all(pdev, adpt);
+
+	netif_napi_add(netdev, &adpt->rx_q.napi, emac_napi_rtx,
+		       NAPI_POLL_WEIGHT);
+
+	ret = register_netdev(netdev);
+	if (ret) {
+		dev_err(&pdev->dev, "could not register net device\n");
+		goto err_undo_napi;
+	}
+
+	reg =  readl_relaxed(adpt->base + EMAC_DMA_MAS_CTRL);
+	devid = (reg & DEV_ID_NUM_BMSK)  >> DEV_ID_NUM_SHFT;
+	revid = (reg & DEV_REV_NUM_BMSK) >> DEV_REV_NUM_SHFT;
+	reg = readl_relaxed(adpt->base + EMAC_CORE_HW_VERSION);
+
+	netif_info(adpt, probe, netdev,
+		   "hardware id %d.%d, hardware version %d.%d.%d\n",
+		   devid, revid,
+		   (reg & MAJOR_BMSK) >> MAJOR_SHFT,
+		   (reg & MINOR_BMSK) >> MINOR_SHFT,
+		   (reg & STEP_BMSK)  >> STEP_SHFT);
+
+	return 0;
+
+err_undo_napi:
+	netif_napi_del(&adpt->rx_q.napi);
+err_undo_mdiobus:
+	mdiobus_unregister(adpt->mii_bus);
+err_undo_clocks:
+	emac_clks_teardown(adpt);
+err_undo_netdev:
+	free_netdev(netdev);
+
+	return ret;
+}
+
+static int emac_remove(struct platform_device *pdev)
+{
+	struct net_device *netdev = dev_get_drvdata(&pdev->dev);
+	struct emac_adapter *adpt = netdev_priv(netdev);
+
+	unregister_netdev(netdev);
+	netif_napi_del(&adpt->rx_q.napi);
+
+	emac_clks_teardown(adpt);
+
+	mdiobus_unregister(adpt->mii_bus);
+	free_netdev(netdev);
+
+	if (adpt->phy.digital)
+		iounmap(adpt->phy.digital);
+	iounmap(adpt->phy.base);
+
+	return 0;
+}
+
+static struct platform_driver emac_platform_driver = {
+	.probe	= emac_probe,
+	.remove	= emac_remove,
+	.driver = {
+		.name		= "qcom-emac",
+		.of_match_table = emac_dt_match,
+		.acpi_match_table = ACPI_PTR(emac_acpi_match),
+	},
+};
+
+module_platform_driver(emac_platform_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:qcom-emac");
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h
new file mode 100644
index 0000000..0c76e6c
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac.h
@@ -0,0 +1,335 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _EMAC_H_
+#define _EMAC_H_
+
+#include <linux/irqreturn.h>
+#include <linux/netdevice.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include "emac-mac.h"
+#include "emac-phy.h"
+
+/* EMAC base register offsets */
+#define EMAC_DMA_MAS_CTRL                                     0x001400
+#define EMAC_IRQ_MOD_TIM_INIT                                 0x001408
+#define EMAC_BLK_IDLE_STS                                     0x00140c
+#define EMAC_PHY_LINK_DELAY                                   0x00141c
+#define EMAC_SYS_ALIV_CTRL                                    0x001434
+#define EMAC_MAC_IPGIFG_CTRL                                  0x001484
+#define EMAC_MAC_STA_ADDR0                                    0x001488
+#define EMAC_MAC_STA_ADDR1                                    0x00148c
+#define EMAC_HASH_TAB_REG0                                    0x001490
+#define EMAC_HASH_TAB_REG1                                    0x001494
+#define EMAC_MAC_HALF_DPLX_CTRL                               0x001498
+#define EMAC_MAX_FRAM_LEN_CTRL                                0x00149c
+#define EMAC_INT_STATUS                                       0x001600
+#define EMAC_INT_MASK                                         0x001604
+#define EMAC_RXMAC_STATC_REG0                                 0x001700
+#define EMAC_RXMAC_STATC_REG22                                0x001758
+#define EMAC_TXMAC_STATC_REG0                                 0x001760
+#define EMAC_TXMAC_STATC_REG24                                0x0017c0
+#define EMAC_CORE_HW_VERSION                                  0x001974
+#define EMAC_IDT_TABLE0                                       0x001b00
+#define EMAC_RXMAC_STATC_REG23                                0x001bc8
+#define EMAC_RXMAC_STATC_REG24                                0x001bcc
+#define EMAC_TXMAC_STATC_REG25                                0x001bd0
+#define EMAC_INT1_MASK                                        0x001bf0
+#define EMAC_INT1_STATUS                                      0x001bf4
+#define EMAC_INT2_MASK                                        0x001bf8
+#define EMAC_INT2_STATUS                                      0x001bfc
+#define EMAC_INT3_MASK                                        0x001c00
+#define EMAC_INT3_STATUS                                      0x001c04
+
+/* EMAC_DMA_MAS_CTRL */
+#define DEV_ID_NUM_BMSK                                     0x7f000000
+#define DEV_ID_NUM_SHFT                                             24
+#define DEV_REV_NUM_BMSK                                      0xff0000
+#define DEV_REV_NUM_SHFT                                            16
+#define INT_RD_CLR_EN                                           0x4000
+#define IRQ_MODERATOR2_EN                                        0x800
+#define IRQ_MODERATOR_EN                                         0x400
+#define LPW_CLK_SEL                                               0x80
+#define LPW_STATE                                                 0x20
+#define LPW_MODE                                                  0x10
+#define SOFT_RST                                                   0x1
+
+/* EMAC_IRQ_MOD_TIM_INIT */
+#define IRQ_MODERATOR2_INIT_BMSK                            0xffff0000
+#define IRQ_MODERATOR2_INIT_SHFT                                    16
+#define IRQ_MODERATOR_INIT_BMSK                                 0xffff
+#define IRQ_MODERATOR_INIT_SHFT                                      0
+
+/* EMAC_INT_STATUS */
+#define DIS_INT                                                BIT(31)
+#define PTP_INT                                                BIT(30)
+#define RFD4_UR_INT                                            BIT(29)
+#define TX_PKT_INT3                                            BIT(26)
+#define TX_PKT_INT2                                            BIT(25)
+#define TX_PKT_INT1                                            BIT(24)
+#define RX_PKT_INT3                                            BIT(19)
+#define RX_PKT_INT2                                            BIT(18)
+#define RX_PKT_INT1                                            BIT(17)
+#define RX_PKT_INT0                                            BIT(16)
+#define TX_PKT_INT                                             BIT(15)
+#define TXQ_TO_INT                                             BIT(14)
+#define GPHY_WAKEUP_INT                                        BIT(13)
+#define GPHY_LINK_DOWN_INT                                     BIT(12)
+#define GPHY_LINK_UP_INT                                       BIT(11)
+#define DMAW_TO_INT                                            BIT(10)
+#define DMAR_TO_INT                                             BIT(9)
+#define TXF_UR_INT                                              BIT(8)
+#define RFD3_UR_INT                                             BIT(7)
+#define RFD2_UR_INT                                             BIT(6)
+#define RFD1_UR_INT                                             BIT(5)
+#define RFD0_UR_INT                                             BIT(4)
+#define RXF_OF_INT                                              BIT(3)
+#define SW_MAN_INT                                              BIT(2)
+
+/* EMAC_MAILBOX_6 */
+#define RFD2_PROC_IDX_BMSK                                   0xfff0000
+#define RFD2_PROC_IDX_SHFT                                          16
+#define RFD2_PROD_IDX_BMSK                                       0xfff
+#define RFD2_PROD_IDX_SHFT                                           0
+
+/* EMAC_CORE_HW_VERSION */
+#define MAJOR_BMSK                                          0xf0000000
+#define MAJOR_SHFT                                                  28
+#define MINOR_BMSK                                           0xfff0000
+#define MINOR_SHFT                                                  16
+#define STEP_BMSK                                               0xffff
+#define STEP_SHFT                                                    0
+
+/* EMAC_EMAC_WRAPPER_CSR1 */
+#define TX_INDX_FIFO_SYNC_RST                                  BIT(23)
+#define TX_TS_FIFO_SYNC_RST                                    BIT(22)
+#define RX_TS_FIFO2_SYNC_RST                                   BIT(21)
+#define RX_TS_FIFO1_SYNC_RST                                   BIT(20)
+#define TX_TS_ENABLE                                           BIT(16)
+#define DIS_1588_CLKS                                          BIT(11)
+#define FREQ_MODE                                               BIT(9)
+#define ENABLE_RRD_TIMESTAMP                                    BIT(3)
+
+/* EMAC_EMAC_WRAPPER_CSR2 */
+#define HDRIVE_BMSK                                             0x3000
+#define HDRIVE_SHFT                                                 12
+#define SLB_EN                                                  BIT(9)
+#define PLB_EN                                                  BIT(8)
+#define WOL_EN                                                  BIT(3)
+#define PHY_RESET                                               BIT(0)
+
+#define EMAC_DEV_ID                                             0x0040
+
+/* SGMII v2 per lane registers */
+#define SGMII_LN_RSM_START             0x029C
+
+/* SGMII v2 PHY common registers */
+#define SGMII_PHY_CMN_CTRL            0x0408
+#define SGMII_PHY_CMN_RESET_CTRL      0x0410
+
+/* SGMII v2 PHY registers per lane */
+#define SGMII_PHY_LN_OFFSET          0x0400
+#define SGMII_PHY_LN_LANE_STATUS     0x00DC
+#define SGMII_PHY_LN_BIST_GEN0       0x008C
+#define SGMII_PHY_LN_BIST_GEN1       0x0090
+#define SGMII_PHY_LN_BIST_GEN2       0x0094
+#define SGMII_PHY_LN_BIST_GEN3       0x0098
+#define SGMII_PHY_LN_CDR_CTRL1       0x005C
+
+enum emac_clk_id {
+	EMAC_CLK_AXI,
+	EMAC_CLK_CFG_AHB,
+	EMAC_CLK_HIGH_SPEED,
+	EMAC_CLK_MDIO,
+	EMAC_CLK_TX,
+	EMAC_CLK_RX,
+	EMAC_CLK_SYS,
+	EMAC_CLK_CNT
+};
+
+#define EMAC_LINK_SPEED_UNKNOWN                                    0x0
+#define EMAC_LINK_SPEED_10_HALF                                 BIT(0)
+#define EMAC_LINK_SPEED_10_FULL                                 BIT(1)
+#define EMAC_LINK_SPEED_100_HALF                                BIT(2)
+#define EMAC_LINK_SPEED_100_FULL                                BIT(3)
+#define EMAC_LINK_SPEED_1GB_FULL                                BIT(5)
+
+#define EMAC_MAX_SETUP_LNK_CYCLE                                   100
+
+/* Wake On Lan */
+#define EMAC_WOL_PHY                     0x00000001 /* PHY Status Change */
+#define EMAC_WOL_MAGIC                   0x00000002 /* Magic Packet */
+
+struct emac_stats {
+	/* rx */
+	u64 rx_ok;              /* good packets */
+	u64 rx_bcast;           /* good broadcast packets */
+	u64 rx_mcast;           /* good multicast packets */
+	u64 rx_pause;           /* pause packet */
+	u64 rx_ctrl;            /* control packets other than pause frame. */
+	u64 rx_fcs_err;         /* packets with bad FCS. */
+	u64 rx_len_err;         /* packets with length mismatch */
+	u64 rx_byte_cnt;        /* good bytes count (without FCS) */
+	u64 rx_runt;            /* runt packets */
+	u64 rx_frag;            /* fragment count */
+	u64 rx_sz_64;	        /* packets that are 64 bytes */
+	u64 rx_sz_65_127;       /* packets that are 65-127 bytes */
+	u64 rx_sz_128_255;      /* packets that are 128-255 bytes */
+	u64 rx_sz_256_511;      /* packets that are 256-511 bytes */
+	u64 rx_sz_512_1023;     /* packets that are 512-1023 bytes */
+	u64 rx_sz_1024_1518;    /* packets that are 1024-1518 bytes */
+	u64 rx_sz_1519_max;     /* packets that are 1519-MTU bytes*/
+	u64 rx_sz_ov;           /* packets that are >MTU bytes (truncated) */
+	u64 rx_rxf_ov;          /* packets dropped due to RX FIFO overflow */
+	u64 rx_align_err;       /* alignment errors */
+	u64 rx_bcast_byte_cnt;  /* broadcast packets byte count (without FCS) */
+	u64 rx_mcast_byte_cnt;  /* multicast packets byte count (without FCS) */
+	u64 rx_err_addr;        /* packets dropped due to address filtering */
+	u64 rx_crc_align;       /* CRC align errors */
+	u64 rx_jabbers;         /* jabbers */
+
+	/* tx */
+	u64 tx_ok;              /* good packets */
+	u64 tx_bcast;           /* good broadcast packets */
+	u64 tx_mcast;           /* good multicast packets */
+	u64 tx_pause;           /* pause packets */
+	u64 tx_exc_defer;       /* packets with excessive deferral */
+	u64 tx_ctrl;            /* control packets other than pause frame */
+	u64 tx_defer;           /* packets that are deferred. */
+	u64 tx_byte_cnt;        /* good bytes count (without FCS) */
+	u64 tx_sz_64;           /* packets that are 64 bytes */
+	u64 tx_sz_65_127;       /* packets that are 65-127 bytes */
+	u64 tx_sz_128_255;      /* packets that are 128-255 bytes */
+	u64 tx_sz_256_511;      /* packets that are 256-511 bytes */
+	u64 tx_sz_512_1023;     /* packets that are 512-1023 bytes */
+	u64 tx_sz_1024_1518;    /* packets that are 1024-1518 bytes */
+	u64 tx_sz_1519_max;     /* packets that are 1519-MTU bytes */
+	u64 tx_1_col;           /* packets single prior collision */
+	u64 tx_2_col;           /* packets with multiple prior collisions */
+	u64 tx_late_col;        /* packets with late collisions */
+	u64 tx_abort_col;       /* packets aborted due to excess collisions */
+	u64 tx_underrun;        /* packets aborted due to FIFO underrun */
+	u64 tx_rd_eop;          /* count of reads beyond EOP */
+	u64 tx_len_err;         /* packets with length mismatch */
+	u64 tx_trunc;           /* packets truncated due to size >MTU */
+	u64 tx_bcast_byte;      /* broadcast packets byte count (without FCS) */
+	u64 tx_mcast_byte;      /* multicast packets byte count (without FCS) */
+	u64 tx_col;             /* collisions */
+
+	spinlock_t lock;	/* prevent multiple simultaneous readers */
+};
+
+/* RSS hstype Definitions */
+#define EMAC_RSS_HSTYP_IPV4_EN				    0x00000001
+#define EMAC_RSS_HSTYP_TCP4_EN				    0x00000002
+#define EMAC_RSS_HSTYP_IPV6_EN				    0x00000004
+#define EMAC_RSS_HSTYP_TCP6_EN				    0x00000008
+#define EMAC_RSS_HSTYP_ALL_EN (\
+		EMAC_RSS_HSTYP_IPV4_EN   |\
+		EMAC_RSS_HSTYP_TCP4_EN   |\
+		EMAC_RSS_HSTYP_IPV6_EN   |\
+		EMAC_RSS_HSTYP_TCP6_EN)
+
+#define EMAC_VLAN_TO_TAG(_vlan, _tag) \
+		(_tag =  ((((_vlan) >> 8) & 0xFF) | (((_vlan) & 0xFF) << 8)))
+
+#define EMAC_TAG_TO_VLAN(_tag, _vlan) \
+		(_vlan = ((((_tag) >> 8) & 0xFF) | (((_tag) & 0xFF) << 8)))
+
+#define EMAC_DEF_RX_BUF_SIZE					  1536
+#define EMAC_MAX_JUMBO_PKT_SIZE				    (9 * 1024)
+#define EMAC_MAX_TX_OFFLOAD_THRESH			    (9 * 1024)
+
+#define EMAC_MAX_ETH_FRAME_SIZE		       EMAC_MAX_JUMBO_PKT_SIZE
+#define EMAC_MIN_ETH_FRAME_SIZE					    68
+
+#define EMAC_DEF_TX_QUEUES					     1
+#define EMAC_DEF_RX_QUEUES					     1
+
+#define EMAC_MIN_TX_DESCS					   128
+#define EMAC_MIN_RX_DESCS					   128
+
+#define EMAC_MAX_TX_DESCS					 16383
+#define EMAC_MAX_RX_DESCS					  2047
+
+#define EMAC_DEF_TX_DESCS					   512
+#define EMAC_DEF_RX_DESCS					   256
+
+#define EMAC_DEF_RX_IRQ_MOD					   250
+#define EMAC_DEF_TX_IRQ_MOD					   250
+
+#define EMAC_WATCHDOG_TIME				      (5 * HZ)
+
+/* by default check link every 4 seconds */
+#define EMAC_TRY_LINK_TIMEOUT				      (4 * HZ)
+
+/* emac_irq per-device (per-adapter) irq properties.
+ * @irq:	irq number.
+ * @mask	mask to use over status register.
+ */
+struct emac_irq {
+	unsigned int	irq;
+	u32		mask;
+};
+
+/* The device's main data structure */
+struct emac_adapter {
+	struct net_device		*netdev;
+	struct mii_bus			*mii_bus;
+	struct phy_device		*phydev;
+
+	void __iomem			*base;
+	void __iomem			*csr;
+
+	struct emac_phy			phy;
+	struct emac_stats		stats;
+
+	struct emac_irq			irq;
+	struct clk			*clk[EMAC_CLK_CNT];
+
+	/* All Descriptor memory */
+	struct emac_ring_header		ring_header;
+	struct emac_tx_queue		tx_q;
+	struct emac_rx_queue		rx_q;
+	unsigned int			tx_desc_cnt;
+	unsigned int			rx_desc_cnt;
+	unsigned int			rrd_size; /* in quad words */
+	unsigned int			rfd_size; /* in quad words */
+	unsigned int			tpd_size; /* in quad words */
+
+	unsigned int			rxbuf_size;
+
+	/* Ring parameter */
+	u8				tpd_burst;
+	u8				rfd_burst;
+	unsigned int			dmaw_dly_cnt;
+	unsigned int			dmar_dly_cnt;
+	enum emac_dma_req_block		dmar_block;
+	enum emac_dma_req_block		dmaw_block;
+	enum emac_dma_order		dma_order;
+
+	u32				irq_mod;
+	u32				preamble;
+
+	struct work_struct		work_thread;
+
+	u16				msg_enable;
+
+	struct mutex			reset_lock;
+};
+
+int emac_reinit_locked(struct emac_adapter *adpt);
+void emac_reg_update32(void __iomem *addr, u32 mask, u32 val);
+irqreturn_t emac_isr(int irq, void *data);
+
+#endif /* _EMAC_H_ */
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index cb29ee2..5ef5d72 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -1062,14 +1062,12 @@
 	/* this should always be supported */
 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 	if (err) {
-		dev_err(&pdev->dev, "32-bit PCI DMA addresses"
-				"not supported by the card\n");
+		dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card\n");
 		goto err_out_disable_dev;
 	}
 	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 	if (err) {
-		dev_err(&pdev->dev, "32-bit PCI DMA addresses"
-				"not supported by the card\n");
+		dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card\n");
 		goto err_out_disable_dev;
 	}
 
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index deae10d..5297bf7 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -467,8 +467,8 @@
 	unsigned int rx_tail = cp->rx_tail;
 	int rx;
 
-rx_status_loop:
 	rx = 0;
+rx_status_loop:
 	cpw16(IntrStatus, cp_rx_intr_mask);
 
 	while (rx < budget) {
diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index 4f132cf..85ec447 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -27,7 +27,7 @@
 	  Renesas SuperH Ethernet device driver.
 	  This driver supporting CPUs are:
 		- SH7619, SH7710, SH7712, SH7724, SH7734, SH7763, SH7757,
-		  R8A7740, R8A777x and R8A779x.
+		  R8A7740, R8A774x, R8A777x and R8A779x.
 
 config RAVB
 	tristate "Renesas Ethernet AVB support"
diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h
index 4e5d5e9..f110966 100644
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -1011,7 +1011,6 @@
 	struct work_struct work;
 	/* MII transceiver section. */
 	struct mii_bus *mii_bus;	/* MDIO bus control */
-	struct phy_device *phydev;	/* PHY device control */
 	int link;
 	phy_interface_t phy_interface;
 	int msg_enable;
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 1e1cc0f..630536b 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -942,7 +942,7 @@
 static void ravb_adjust_link(struct net_device *ndev)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = ndev->phydev;
 	bool new_state = false;
 
 	if (phydev->link) {
@@ -1032,48 +1032,47 @@
 
 	phy_attached_info(phydev);
 
-	priv->phydev = phydev;
-
 	return 0;
 }
 
 /* PHY control start function */
 static int ravb_phy_start(struct net_device *ndev)
 {
-	struct ravb_private *priv = netdev_priv(ndev);
 	int error;
 
 	error = ravb_phy_init(ndev);
 	if (error)
 		return error;
 
-	phy_start(priv->phydev);
+	phy_start(ndev->phydev);
 
 	return 0;
 }
 
-static int ravb_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
+static int ravb_get_link_ksettings(struct net_device *ndev,
+				   struct ethtool_link_ksettings *cmd)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
 	int error = -ENODEV;
 	unsigned long flags;
 
-	if (priv->phydev) {
+	if (ndev->phydev) {
 		spin_lock_irqsave(&priv->lock, flags);
-		error = phy_ethtool_gset(priv->phydev, ecmd);
+		error = phy_ethtool_ksettings_get(ndev->phydev, cmd);
 		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
 	return error;
 }
 
-static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
+static int ravb_set_link_ksettings(struct net_device *ndev,
+				   const struct ethtool_link_ksettings *cmd)
 {
 	struct ravb_private *priv = netdev_priv(ndev);
 	unsigned long flags;
 	int error;
 
-	if (!priv->phydev)
+	if (!ndev->phydev)
 		return -ENODEV;
 
 	spin_lock_irqsave(&priv->lock, flags);
@@ -1081,11 +1080,11 @@
 	/* Disable TX and RX */
 	ravb_rcv_snd_disable(ndev);
 
-	error = phy_ethtool_sset(priv->phydev, ecmd);
+	error = phy_ethtool_ksettings_set(ndev->phydev, cmd);
 	if (error)
 		goto error_exit;
 
-	if (ecmd->duplex == DUPLEX_FULL)
+	if (cmd->base.duplex == DUPLEX_FULL)
 		priv->duplex = 1;
 	else
 		priv->duplex = 0;
@@ -1110,9 +1109,9 @@
 	int error = -ENODEV;
 	unsigned long flags;
 
-	if (priv->phydev) {
+	if (ndev->phydev) {
 		spin_lock_irqsave(&priv->lock, flags);
-		error = phy_start_aneg(priv->phydev);
+		error = phy_start_aneg(ndev->phydev);
 		spin_unlock_irqrestore(&priv->lock, flags);
 	}
 
@@ -1309,8 +1308,6 @@
 }
 
 static const struct ethtool_ops ravb_ethtool_ops = {
-	.get_settings		= ravb_get_settings,
-	.set_settings		= ravb_set_settings,
 	.nway_reset		= ravb_nway_reset,
 	.get_msglevel		= ravb_get_msglevel,
 	.set_msglevel		= ravb_set_msglevel,
@@ -1321,6 +1318,8 @@
 	.get_ringparam		= ravb_get_ringparam,
 	.set_ringparam		= ravb_set_ringparam,
 	.get_ts_info		= ravb_get_ts_info,
+	.get_link_ksettings	= ravb_get_link_ksettings,
+	.set_link_ksettings	= ravb_set_link_ksettings,
 };
 
 static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
@@ -1661,10 +1660,9 @@
 	}
 
 	/* PHY disconnect */
-	if (priv->phydev) {
-		phy_stop(priv->phydev);
-		phy_disconnect(priv->phydev);
-		priv->phydev = NULL;
+	if (ndev->phydev) {
+		phy_stop(ndev->phydev);
+		phy_disconnect(ndev->phydev);
 	}
 
 	if (priv->chip_id != RCAR_GEN2) {
@@ -1753,8 +1751,7 @@
 /* ioctl to device function */
 static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
 {
-	struct ravb_private *priv = netdev_priv(ndev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (!netif_running(ndev))
 		return -EINVAL;
@@ -1876,6 +1873,20 @@
 	return 0;
 }
 
+static void ravb_set_config_mode(struct net_device *ndev)
+{
+	struct ravb_private *priv = netdev_priv(ndev);
+
+	if (priv->chip_id == RCAR_GEN2) {
+		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
+		/* Set CSEL value */
+		ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
+	} else {
+		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
+			    CCC_GAC | CCC_CSEL_HPB);
+	}
+}
+
 static int ravb_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -1978,14 +1989,7 @@
 	ndev->ethtool_ops = &ravb_ethtool_ops;
 
 	/* Set AVB config mode */
-	if (chip_id == RCAR_GEN2) {
-		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
-		/* Set CSEL value */
-		ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
-	} else {
-		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
-			    CCC_GAC | CCC_CSEL_HPB);
-	}
+	ravb_set_config_mode(ndev);
 
 	/* Set GTI value */
 	error = ravb_set_gti(ndev);
@@ -2096,8 +2100,55 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-static int ravb_runtime_nop(struct device *dev)
+static int __maybe_unused ravb_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (netif_running(ndev)) {
+		netif_device_detach(ndev);
+		ret = ravb_close(ndev);
+	}
+
+	return ret;
+}
+
+static int __maybe_unused ravb_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct ravb_private *priv = netdev_priv(ndev);
+	int ret = 0;
+
+	/* All register have been reset to default values.
+	 * Restore all registers which where setup at probe time and
+	 * reopen device if it was running before system suspended.
+	 */
+
+	/* Set AVB config mode */
+	ravb_set_config_mode(ndev);
+
+	/* Set GTI value */
+	ret = ravb_set_gti(ndev);
+	if (ret)
+		return ret;
+
+	/* Request GTI loading */
+	ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
+
+	/* Restore descriptor base address table */
+	ravb_write(ndev, priv->desc_bat_dma, DBAT);
+
+	if (netif_running(ndev)) {
+		ret = ravb_open(ndev);
+		if (ret < 0)
+			return ret;
+		netif_device_attach(ndev);
+	}
+
+	return ret;
+}
+
+static int __maybe_unused ravb_runtime_nop(struct device *dev)
 {
 	/* Runtime PM callback shared between ->runtime_suspend()
 	 * and ->runtime_resume(). Simply returns success.
@@ -2110,20 +2161,16 @@
 }
 
 static const struct dev_pm_ops ravb_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume)
 	SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL)
 };
 
-#define RAVB_PM_OPS (&ravb_dev_pm_ops)
-#else
-#define RAVB_PM_OPS NULL
-#endif
-
 static struct platform_driver ravb_driver = {
 	.probe		= ravb_probe,
 	.remove		= ravb_remove,
 	.driver = {
 		.name	= "ravb",
-		.pm	= RAVB_PM_OPS,
+		.pm	= &ravb_dev_pm_ops,
 		.of_match_table = ravb_match_table,
 	},
 };
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 799d58d..05b0dc5 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -201,9 +201,14 @@
 
 	[ARSTR]		= 0x0000,
 	[TSU_CTRST]	= 0x0004,
+	[TSU_FWSLC]	= 0x0038,
 	[TSU_VTAG0]	= 0x0058,
 	[TSU_ADSBSY]	= 0x0060,
 	[TSU_TEN]	= 0x0064,
+	[TSU_POST1]	= 0x0070,
+	[TSU_POST2]	= 0x0074,
+	[TSU_POST3]	= 0x0078,
+	[TSU_POST4]	= 0x007c,
 	[TSU_ADRH0]	= 0x0100,
 
 	[TXNLCR0]	= 0x0080,
@@ -1723,7 +1728,7 @@
 static void sh_eth_adjust_link(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
-	struct phy_device *phydev = mdp->phydev;
+	struct phy_device *phydev = ndev->phydev;
 	int new_state = 0;
 
 	if (phydev->link) {
@@ -1800,51 +1805,48 @@
 
 	phy_attached_info(phydev);
 
-	mdp->phydev = phydev;
-
 	return 0;
 }
 
 /* PHY control start function */
 static int sh_eth_phy_start(struct net_device *ndev)
 {
-	struct sh_eth_private *mdp = netdev_priv(ndev);
 	int ret;
 
 	ret = sh_eth_phy_init(ndev);
 	if (ret)
 		return ret;
 
-	phy_start(mdp->phydev);
+	phy_start(ndev->phydev);
 
 	return 0;
 }
 
-static int sh_eth_get_settings(struct net_device *ndev,
-			       struct ethtool_cmd *ecmd)
+static int sh_eth_get_link_ksettings(struct net_device *ndev,
+				     struct ethtool_link_ksettings *cmd)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	unsigned long flags;
 	int ret;
 
-	if (!mdp->phydev)
+	if (!ndev->phydev)
 		return -ENODEV;
 
 	spin_lock_irqsave(&mdp->lock, flags);
-	ret = phy_ethtool_gset(mdp->phydev, ecmd);
+	ret = phy_ethtool_ksettings_get(ndev->phydev, cmd);
 	spin_unlock_irqrestore(&mdp->lock, flags);
 
 	return ret;
 }
 
-static int sh_eth_set_settings(struct net_device *ndev,
-			       struct ethtool_cmd *ecmd)
+static int sh_eth_set_link_ksettings(struct net_device *ndev,
+				     const struct ethtool_link_ksettings *cmd)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	unsigned long flags;
 	int ret;
 
-	if (!mdp->phydev)
+	if (!ndev->phydev)
 		return -ENODEV;
 
 	spin_lock_irqsave(&mdp->lock, flags);
@@ -1852,11 +1854,11 @@
 	/* disable tx and rx */
 	sh_eth_rcv_snd_disable(ndev);
 
-	ret = phy_ethtool_sset(mdp->phydev, ecmd);
+	ret = phy_ethtool_ksettings_set(ndev->phydev, cmd);
 	if (ret)
 		goto error_exit;
 
-	if (ecmd->duplex == DUPLEX_FULL)
+	if (cmd->base.duplex == DUPLEX_FULL)
 		mdp->duplex = 1;
 	else
 		mdp->duplex = 0;
@@ -2067,11 +2069,11 @@
 	unsigned long flags;
 	int ret;
 
-	if (!mdp->phydev)
+	if (!ndev->phydev)
 		return -ENODEV;
 
 	spin_lock_irqsave(&mdp->lock, flags);
-	ret = phy_start_aneg(mdp->phydev);
+	ret = phy_start_aneg(ndev->phydev);
 	spin_unlock_irqrestore(&mdp->lock, flags);
 
 	return ret;
@@ -2198,8 +2200,6 @@
 }
 
 static const struct ethtool_ops sh_eth_ethtool_ops = {
-	.get_settings	= sh_eth_get_settings,
-	.set_settings	= sh_eth_set_settings,
 	.get_regs_len	= sh_eth_get_regs_len,
 	.get_regs	= sh_eth_get_regs,
 	.nway_reset	= sh_eth_nway_reset,
@@ -2211,6 +2211,8 @@
 	.get_sset_count     = sh_eth_get_sset_count,
 	.get_ringparam	= sh_eth_get_ringparam,
 	.set_ringparam	= sh_eth_set_ringparam,
+	.get_link_ksettings = sh_eth_get_link_ksettings,
+	.set_link_ksettings = sh_eth_set_link_ksettings,
 };
 
 /* network device open function */
@@ -2408,10 +2410,9 @@
 	sh_eth_dev_exit(ndev);
 
 	/* PHY Disconnect */
-	if (mdp->phydev) {
-		phy_stop(mdp->phydev);
-		phy_disconnect(mdp->phydev);
-		mdp->phydev = NULL;
+	if (ndev->phydev) {
+		phy_stop(ndev->phydev);
+		phy_disconnect(ndev->phydev);
 	}
 
 	free_irq(ndev->irq, ndev);
@@ -2429,8 +2430,7 @@
 /* ioctl to device function */
 static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 {
-	struct sh_eth_private *mdp = netdev_priv(ndev);
-	struct phy_device *phydev = mdp->phydev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (!netif_running(ndev))
 		return -EINVAL;
@@ -2786,6 +2786,8 @@
 {
 	if (sh_eth_is_rz_fast_ether(mdp)) {
 		sh_eth_tsu_write(mdp, 0, TSU_TEN); /* Disable all CAM entry */
+		sh_eth_tsu_write(mdp, TSU_FWSLC_POSTENU | TSU_FWSLC_POSTENL,
+				 TSU_FWSLC);	/* Enable POST registers */
 		return;
 	}
 
@@ -2957,6 +2959,8 @@
 
 static const struct of_device_id sh_eth_match_table[] = {
 	{ .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data },
+	{ .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data },
+	{ .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data },
 	{ .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data },
 	{ .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data },
 	{ .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data },
diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h
index c62380e..d050f37 100644
--- a/drivers/net/ethernet/renesas/sh_eth.h
+++ b/drivers/net/ethernet/renesas/sh_eth.h
@@ -518,7 +518,6 @@
 	/* MII transceiver section. */
 	u32 phy_id;			/* PHY ID */
 	struct mii_bus *mii_bus;	/* MDIO bus control */
-	struct phy_device *phydev;	/* PHY device control */
 	int link;
 	phy_interface_t phy_interface;
 	int msg_enable;
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index 1ab995f..2eb9b49 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/netdevice.h>
+#include <linux/notifier.h>
 #include <net/neighbour.h>
 #include <net/switchdev.h>
 
@@ -52,6 +53,9 @@
 	struct rocker_dma_ring_info rx_ring;
 };
 
+struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev,
+					       struct rocker *rocker);
+
 struct rocker_world_ops;
 
 struct rocker {
@@ -66,6 +70,7 @@
 	spinlock_t cmd_ring_lock;		/* for cmd ring accesses */
 	struct rocker_dma_ring_info cmd_ring;
 	struct rocker_dma_ring_info event_ring;
+	struct notifier_block fib_nb;
 	struct rocker_world_ops *wops;
 	void *wpriv;
 };
@@ -117,11 +122,6 @@
 	int (*port_obj_vlan_dump)(const struct rocker_port *rocker_port,
 				  struct switchdev_obj_port_vlan *vlan,
 				  switchdev_obj_dump_cb_t *cb);
-	int (*port_obj_fib4_add)(struct rocker_port *rocker_port,
-				 const struct switchdev_obj_ipv4_fib *fib4,
-				 struct switchdev_trans *trans);
-	int (*port_obj_fib4_del)(struct rocker_port *rocker_port,
-				 const struct switchdev_obj_ipv4_fib *fib4);
 	int (*port_obj_fdb_add)(struct rocker_port *rocker_port,
 				const struct switchdev_obj_port_fdb *fdb,
 				struct switchdev_trans *trans);
@@ -141,6 +141,11 @@
 	int (*port_ev_mac_vlan_seen)(struct rocker_port *rocker_port,
 				     const unsigned char *addr,
 				     __be16 vlan_id);
+	int (*fib4_add)(struct rocker *rocker,
+			const struct fib_entry_notifier_info *fen_info);
+	int (*fib4_del)(struct rocker *rocker,
+			const struct fib_entry_notifier_info *fen_info);
+	void (*fib4_abort)(struct rocker *rocker);
 };
 
 extern struct rocker_world_ops rocker_ofdpa_ops;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index f0b09b0..5424fb3 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -1625,29 +1625,6 @@
 }
 
 static int
-rocker_world_port_obj_fib4_add(struct rocker_port *rocker_port,
-			       const struct switchdev_obj_ipv4_fib *fib4,
-			       struct switchdev_trans *trans)
-{
-	struct rocker_world_ops *wops = rocker_port->rocker->wops;
-
-	if (!wops->port_obj_fib4_add)
-		return -EOPNOTSUPP;
-	return wops->port_obj_fib4_add(rocker_port, fib4, trans);
-}
-
-static int
-rocker_world_port_obj_fib4_del(struct rocker_port *rocker_port,
-			       const struct switchdev_obj_ipv4_fib *fib4)
-{
-	struct rocker_world_ops *wops = rocker_port->rocker->wops;
-
-	if (!wops->port_obj_fib4_del)
-		return -EOPNOTSUPP;
-	return wops->port_obj_fib4_del(rocker_port, fib4);
-}
-
-static int
 rocker_world_port_obj_fdb_add(struct rocker_port *rocker_port,
 			      const struct switchdev_obj_port_fdb *fdb,
 			      struct switchdev_trans *trans)
@@ -1733,6 +1710,34 @@
 	return wops->port_ev_mac_vlan_seen(rocker_port, addr, vlan_id);
 }
 
+static int rocker_world_fib4_add(struct rocker *rocker,
+				 const struct fib_entry_notifier_info *fen_info)
+{
+	struct rocker_world_ops *wops = rocker->wops;
+
+	if (!wops->fib4_add)
+		return 0;
+	return wops->fib4_add(rocker, fen_info);
+}
+
+static int rocker_world_fib4_del(struct rocker *rocker,
+				 const struct fib_entry_notifier_info *fen_info)
+{
+	struct rocker_world_ops *wops = rocker->wops;
+
+	if (!wops->fib4_del)
+		return 0;
+	return wops->fib4_del(rocker, fen_info);
+}
+
+static void rocker_world_fib4_abort(struct rocker *rocker)
+{
+	struct rocker_world_ops *wops = rocker->wops;
+
+	if (wops->fib4_abort)
+		wops->fib4_abort(rocker);
+}
+
 /*****************
  * Net device ops
  *****************/
@@ -2096,11 +2101,6 @@
 						     SWITCHDEV_OBJ_PORT_VLAN(obj),
 						     trans);
 		break;
-	case SWITCHDEV_OBJ_ID_IPV4_FIB:
-		err = rocker_world_port_obj_fib4_add(rocker_port,
-						     SWITCHDEV_OBJ_IPV4_FIB(obj),
-						     trans);
-		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = rocker_world_port_obj_fdb_add(rocker_port,
 						    SWITCHDEV_OBJ_PORT_FDB(obj),
@@ -2125,10 +2125,6 @@
 		err = rocker_world_port_obj_vlan_del(rocker_port,
 						     SWITCHDEV_OBJ_PORT_VLAN(obj));
 		break;
-	case SWITCHDEV_OBJ_ID_IPV4_FIB:
-		err = rocker_world_port_obj_fib4_del(rocker_port,
-						     SWITCHDEV_OBJ_IPV4_FIB(obj));
-		break;
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		err = rocker_world_port_obj_fdb_del(rocker_port,
 						    SWITCHDEV_OBJ_PORT_FDB(obj));
@@ -2175,6 +2171,31 @@
 	.switchdev_port_obj_dump	= rocker_port_obj_dump,
 };
 
+static int rocker_router_fib_event(struct notifier_block *nb,
+				   unsigned long event, void *ptr)
+{
+	struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
+	struct fib_entry_notifier_info *fen_info = ptr;
+	int err;
+
+	switch (event) {
+	case FIB_EVENT_ENTRY_ADD:
+		err = rocker_world_fib4_add(rocker, fen_info);
+		if (err)
+			rocker_world_fib4_abort(rocker);
+		else
+		break;
+	case FIB_EVENT_ENTRY_DEL:
+		rocker_world_fib4_del(rocker, fen_info);
+		break;
+	case FIB_EVENT_RULE_ADD: /* fall through */
+	case FIB_EVENT_RULE_DEL:
+		rocker_world_fib4_abort(rocker);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
 /********************
  * ethtool interface
  ********************/
@@ -2412,7 +2433,7 @@
 	skb->protocol = eth_type_trans(skb, rocker_port->dev);
 
 	if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD)
-		skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark;
+		skb->offload_fwd_mark = 1;
 
 	rocker_port->dev->stats.rx_packets++;
 	rocker_port->dev->stats.rx_bytes += skb->len;
@@ -2740,6 +2761,9 @@
 		goto err_probe_ports;
 	}
 
+	rocker->fib_nb.notifier_call = rocker_router_fib_event;
+	register_fib_notifier(&rocker->fib_nb);
+
 	dev_info(&pdev->dev, "Rocker switch with id %*phN\n",
 		 (int)sizeof(rocker->hw.id), &rocker->hw.id);
 
@@ -2771,6 +2795,7 @@
 {
 	struct rocker *rocker = pci_get_drvdata(pdev);
 
+	unregister_fib_notifier(&rocker->fib_nb);
 	rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
 	rocker_remove_ports(rocker);
 	free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
@@ -2799,6 +2824,37 @@
 	return dev->netdev_ops == &rocker_port_netdev_ops;
 }
 
+static bool rocker_port_dev_check_under(const struct net_device *dev,
+					struct rocker *rocker)
+{
+	struct rocker_port *rocker_port;
+
+	if (!rocker_port_dev_check(dev))
+		return false;
+
+	rocker_port = netdev_priv(dev);
+	if (rocker_port->rocker != rocker)
+		return false;
+
+	return true;
+}
+
+struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev,
+					       struct rocker *rocker)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+
+	if (rocker_port_dev_check_under(dev, rocker))
+		return netdev_priv(dev);
+
+	netdev_for_each_all_lower_dev(dev, lower_dev, iter) {
+		if (rocker_port_dev_check_under(lower_dev, rocker))
+			return netdev_priv(lower_dev);
+	}
+	return NULL;
+}
+
 static int rocker_netdevice_event(struct notifier_block *unused,
 				  unsigned long event, void *ptr)
 {
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 1ca7963..431a608 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -99,6 +99,7 @@
 	struct ofdpa_flow_tbl_key key;
 	size_t key_len;
 	u32 key_crc32; /* key */
+	struct fib_info *fi;
 };
 
 struct ofdpa_group_tbl_entry {
@@ -189,6 +190,7 @@
 	spinlock_t neigh_tbl_lock;		/* for neigh tbl accesses */
 	u32 neigh_tbl_next_index;
 	unsigned long ageing_time;
+	bool fib_aborted;
 };
 
 struct ofdpa_port {
@@ -1043,7 +1045,8 @@
 					 __be16 eth_type, __be32 dst,
 					 __be32 dst_mask, u32 priority,
 					 enum rocker_of_dpa_table_id goto_tbl,
-					 u32 group_id, int flags)
+					 u32 group_id, struct fib_info *fi,
+					 int flags)
 {
 	struct ofdpa_flow_tbl_entry *entry;
 
@@ -1060,6 +1063,7 @@
 	entry->key.ucast_routing.group_id = group_id;
 	entry->key_len = offsetof(struct ofdpa_flow_tbl_key,
 				  ucast_routing.group_id);
+	entry->fi = fi;
 
 	return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry);
 }
@@ -1425,7 +1429,7 @@
 						    eth_type, ip_addr,
 						    inet_make_mask(32),
 						    priority, goto_tbl,
-						    group_id, flags);
+						    group_id, NULL, flags);
 
 		if (err)
 			netdev_err(ofdpa_port->dev, "Error (%d) /32 unicast route %pI4 group 0x%08x\n",
@@ -2390,7 +2394,7 @@
 
 static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port,
 			       struct switchdev_trans *trans, __be32 dst,
-			       int dst_len, const struct fib_info *fi,
+			       int dst_len, struct fib_info *fi,
 			       u32 tb_id, int flags)
 {
 	const struct fib_nh *nh;
@@ -2426,7 +2430,7 @@
 
 	err = ofdpa_flow_tbl_ucast4_routing(ofdpa_port, trans, eth_type, dst,
 					    dst_mask, priority, goto_tbl,
-					    group_id, flags);
+					    group_id, fi, flags);
 	if (err)
 		netdev_err(ofdpa_port->dev, "Error (%d) IPv4 route %pI4\n",
 			   err, &dst);
@@ -2558,7 +2562,6 @@
 	struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
 	int err;
 
-	switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false);
 	rocker_port_set_learning(rocker_port,
 				 !!(ofdpa_port->brport_flags & BR_LEARNING));
 
@@ -2719,28 +2722,6 @@
 	return err;
 }
 
-static int ofdpa_port_obj_fib4_add(struct rocker_port *rocker_port,
-				   const struct switchdev_obj_ipv4_fib *fib4,
-				   struct switchdev_trans *trans)
-{
-	struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
-
-	return ofdpa_port_fib_ipv4(ofdpa_port, trans,
-				   htonl(fib4->dst), fib4->dst_len,
-				   fib4->fi, fib4->tb_id, 0);
-}
-
-static int ofdpa_port_obj_fib4_del(struct rocker_port *rocker_port,
-				   const struct switchdev_obj_ipv4_fib *fib4)
-{
-	struct ofdpa_port *ofdpa_port = rocker_port->wpriv;
-
-	return ofdpa_port_fib_ipv4(ofdpa_port, NULL,
-				   htonl(fib4->dst), fib4->dst_len,
-				   fib4->fi, fib4->tb_id,
-				   OFDPA_OP_FLAG_REMOVE);
-}
-
 static int ofdpa_port_obj_fdb_add(struct rocker_port *rocker_port,
 				  const struct switchdev_obj_port_fdb *fdb,
 				  struct switchdev_trans *trans)
@@ -2817,7 +2798,6 @@
 		ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex);
 
 	ofdpa_port->bridge_dev = bridge;
-	switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true);
 
 	return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0);
 }
@@ -2836,8 +2816,6 @@
 		ofdpa_port_internal_vlan_id_get(ofdpa_port,
 						ofdpa_port->dev->ifindex);
 
-	switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev,
-				    false);
 	ofdpa_port->bridge_dev = NULL;
 
 	err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0);
@@ -2926,6 +2904,82 @@
 	return ofdpa_port_fdb(ofdpa_port, NULL, addr, vlan_id, flags);
 }
 
+static struct ofdpa_port *ofdpa_port_dev_lower_find(struct net_device *dev,
+						    struct rocker *rocker)
+{
+	struct rocker_port *rocker_port;
+
+	rocker_port = rocker_port_dev_lower_find(dev, rocker);
+	return rocker_port ? rocker_port->wpriv : NULL;
+}
+
+static int ofdpa_fib4_add(struct rocker *rocker,
+			  const struct fib_entry_notifier_info *fen_info)
+{
+	struct ofdpa *ofdpa = rocker->wpriv;
+	struct ofdpa_port *ofdpa_port;
+	int err;
+
+	if (ofdpa->fib_aborted)
+		return 0;
+	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+	if (!ofdpa_port)
+		return 0;
+	err = ofdpa_port_fib_ipv4(ofdpa_port, NULL, htonl(fen_info->dst),
+				  fen_info->dst_len, fen_info->fi,
+				  fen_info->tb_id, 0);
+	if (err)
+		return err;
+	fib_info_offload_inc(fen_info->fi);
+	return 0;
+}
+
+static int ofdpa_fib4_del(struct rocker *rocker,
+			  const struct fib_entry_notifier_info *fen_info)
+{
+	struct ofdpa *ofdpa = rocker->wpriv;
+	struct ofdpa_port *ofdpa_port;
+
+	if (ofdpa->fib_aborted)
+		return 0;
+	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+	if (!ofdpa_port)
+		return 0;
+	fib_info_offload_dec(fen_info->fi);
+	return ofdpa_port_fib_ipv4(ofdpa_port, NULL, htonl(fen_info->dst),
+				   fen_info->dst_len, fen_info->fi,
+				   fen_info->tb_id, OFDPA_OP_FLAG_REMOVE);
+}
+
+static void ofdpa_fib4_abort(struct rocker *rocker)
+{
+	struct ofdpa *ofdpa = rocker->wpriv;
+	struct ofdpa_port *ofdpa_port;
+	struct ofdpa_flow_tbl_entry *flow_entry;
+	struct hlist_node *tmp;
+	unsigned long flags;
+	int bkt;
+
+	if (ofdpa->fib_aborted)
+		return;
+
+	spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags);
+	hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) {
+		if (flow_entry->key.tbl_id !=
+		    ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING)
+			continue;
+		ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev,
+						       rocker);
+		if (!ofdpa_port)
+			continue;
+		fib_info_offload_dec(flow_entry->fi);
+		ofdpa_flow_tbl_del(ofdpa_port, NULL, OFDPA_OP_FLAG_REMOVE,
+				   flow_entry);
+	}
+	spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags);
+	ofdpa->fib_aborted = true;
+}
+
 struct rocker_world_ops rocker_ofdpa_ops = {
 	.kind = "ofdpa",
 	.priv_size = sizeof(struct ofdpa),
@@ -2945,8 +2999,6 @@
 	.port_obj_vlan_add = ofdpa_port_obj_vlan_add,
 	.port_obj_vlan_del = ofdpa_port_obj_vlan_del,
 	.port_obj_vlan_dump = ofdpa_port_obj_vlan_dump,
-	.port_obj_fib4_add = ofdpa_port_obj_fib4_add,
-	.port_obj_fib4_del = ofdpa_port_obj_fib4_del,
 	.port_obj_fdb_add = ofdpa_port_obj_fdb_add,
 	.port_obj_fdb_del = ofdpa_port_obj_fdb_del,
 	.port_obj_fdb_dump = ofdpa_port_obj_fdb_dump,
@@ -2955,4 +3007,7 @@
 	.port_neigh_update = ofdpa_port_neigh_update,
 	.port_neigh_destroy = ofdpa_port_neigh_destroy,
 	.port_ev_mac_vlan_seen = ofdpa_port_ev_mac_vlan_seen,
+	.fib4_add = ofdpa_fib4_add,
+	.fib4_del = ofdpa_fib4_del,
+	.fib4_abort = ofdpa_fib4_abort,
 };
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index f658fee..00279da 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -177,7 +177,7 @@
 
 static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
 {
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V2_OUT_LEN);
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	size_t outlen;
 	int rc;
@@ -188,7 +188,7 @@
 			  outbuf, sizeof(outbuf), &outlen);
 	if (rc)
 		return rc;
-	if (outlen < sizeof(outbuf)) {
+	if (outlen < MC_CMD_GET_CAPABILITIES_OUT_LEN) {
 		netif_err(efx, drv, efx->net_dev,
 			  "unable to read datapath firmware capabilities\n");
 		return -EIO;
@@ -197,6 +197,12 @@
 	nic_data->datapath_caps =
 		MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1);
 
+	if (outlen >= MC_CMD_GET_CAPABILITIES_V2_OUT_LEN)
+		nic_data->datapath_caps2 = MCDI_DWORD(outbuf,
+				GET_CAPABILITIES_V2_OUT_FLAGS2);
+	else
+		nic_data->datapath_caps2 = 0;
+
 	/* record the DPCPU firmware IDs to determine VEB vswitching support.
 	 */
 	nic_data->rx_dpcpu_fw_id =
@@ -227,6 +233,116 @@
 	return rc > 0 ? rc : -ERANGE;
 }
 
+static int efx_ef10_get_timer_workarounds(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	unsigned int implemented;
+	unsigned int enabled;
+	int rc;
+
+	nic_data->workaround_35388 = false;
+	nic_data->workaround_61265 = false;
+
+	rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled);
+
+	if (rc == -ENOSYS) {
+		/* Firmware without GET_WORKAROUNDS - not a problem. */
+		rc = 0;
+	} else if (rc == 0) {
+		/* Bug61265 workaround is always enabled if implemented. */
+		if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG61265)
+			nic_data->workaround_61265 = true;
+
+		if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) {
+			nic_data->workaround_35388 = true;
+		} else if (implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) {
+			/* Workaround is implemented but not enabled.
+			 * Try to enable it.
+			 */
+			rc = efx_mcdi_set_workaround(efx,
+						     MC_CMD_WORKAROUND_BUG35388,
+						     true, NULL);
+			if (rc == 0)
+				nic_data->workaround_35388 = true;
+			/* If we failed to set the workaround just carry on. */
+			rc = 0;
+		}
+	}
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "workaround for bug 35388 is %sabled\n",
+		  nic_data->workaround_35388 ? "en" : "dis");
+	netif_dbg(efx, probe, efx->net_dev,
+		  "workaround for bug 61265 is %sabled\n",
+		  nic_data->workaround_61265 ? "en" : "dis");
+
+	return rc;
+}
+
+static void efx_ef10_process_timer_config(struct efx_nic *efx,
+					  const efx_dword_t *data)
+{
+	unsigned int max_count;
+
+	if (EFX_EF10_WORKAROUND_61265(efx)) {
+		efx->timer_quantum_ns = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS);
+		efx->timer_max_ns = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS);
+	} else if (EFX_EF10_WORKAROUND_35388(efx)) {
+		efx->timer_quantum_ns = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT);
+		max_count = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT);
+		efx->timer_max_ns = max_count * efx->timer_quantum_ns;
+	} else {
+		efx->timer_quantum_ns = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT);
+		max_count = MCDI_DWORD(data,
+			GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT);
+		efx->timer_max_ns = max_count * efx->timer_quantum_ns;
+	}
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "got timer properties from MC: quantum %u ns; max %u ns\n",
+		  efx->timer_quantum_ns, efx->timer_max_ns);
+}
+
+static int efx_ef10_get_timer_config(struct efx_nic *efx)
+{
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN);
+	int rc;
+
+	rc = efx_ef10_get_timer_workarounds(efx);
+	if (rc)
+		return rc;
+
+	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES, NULL, 0,
+				outbuf, sizeof(outbuf), NULL);
+
+	if (rc == 0) {
+		efx_ef10_process_timer_config(efx, outbuf);
+	} else if (rc == -ENOSYS || rc == -EPERM) {
+		/* Not available - fall back to Huntington defaults. */
+		unsigned int quantum;
+
+		rc = efx_ef10_get_sysclk_freq(efx);
+		if (rc < 0)
+			return rc;
+
+		quantum = 1536000 / rc; /* 1536 cycles */
+		efx->timer_quantum_ns = quantum;
+		efx->timer_max_ns = efx->type->timer_period_max * quantum;
+		rc = 0;
+	} else {
+		efx_mcdi_display_error(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES,
+				       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN,
+				       NULL, 0, rc);
+	}
+
+	return rc;
+}
+
 static int efx_ef10_get_mac_address_pf(struct efx_nic *efx, u8 *mac_address)
 {
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN);
@@ -527,32 +643,9 @@
 	if (rc)
 		goto fail5;
 
-	rc = efx_ef10_get_sysclk_freq(efx);
+	rc = efx_ef10_get_timer_config(efx);
 	if (rc < 0)
 		goto fail5;
-	efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */
-
-	/* Check whether firmware supports bug 35388 workaround.
-	 * First try to enable it, then if we get EPERM, just
-	 * ask if it's already enabled
-	 */
-	rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true, NULL);
-	if (rc == 0) {
-		nic_data->workaround_35388 = true;
-	} else if (rc == -EPERM) {
-		unsigned int enabled;
-
-		rc = efx_mcdi_get_workarounds(efx, NULL, &enabled);
-		if (rc)
-			goto fail3;
-		nic_data->workaround_35388 = enabled &
-			MC_CMD_GET_WORKAROUNDS_OUT_BUG35388;
-	} else if (rc != -ENOSYS && rc != -ENOENT) {
-		goto fail5;
-	}
-	netif_dbg(efx, probe, efx->net_dev,
-		  "workaround for bug 35388 is %sabled\n",
-		  nic_data->workaround_35388 ? "en" : "dis");
 
 	rc = efx_mcdi_mon_probe(efx);
 	if (rc && rc != -EPERM)
@@ -1440,9 +1533,10 @@
 			       (1ULL << GENERIC_STAT_rx_nodesc_trunc) |	\
 			       (1ULL << GENERIC_STAT_rx_noskb_drops))
 
-/* These statistics are only provided by the 10G MAC.  For a 10G/40G
- * switchable port we do not expose these because they might not
- * include all the packets they should.
+/* On 7000 series NICs, these statistics are only provided by the 10G MAC.
+ * For a 10G/40G switchable port we do not expose these because they might
+ * not include all the packets they should.
+ * On 8000 series NICs these statistics are always provided.
  */
 #define HUNT_10G_ONLY_STAT_MASK ((1ULL << EF10_STAT_port_tx_control) |	\
 				 (1ULL << EF10_STAT_port_tx_lt64) |	\
@@ -1488,10 +1582,15 @@
 	      1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL))
 		return 0;
 
-	if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+	if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) {
 		raw_mask |= HUNT_40G_EXTRA_STAT_MASK;
-	else
+		/* 8000 series have everything even at 40G */
+		if (nic_data->datapath_caps2 &
+		    (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN))
+			raw_mask |= HUNT_10G_ONLY_STAT_MASK;
+	} else {
 		raw_mask |= HUNT_10G_ONLY_STAT_MASK;
+	}
 
 	if (nic_data->datapath_caps &
 	    (1 << MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_LBN))
@@ -1517,13 +1616,14 @@
 	}
 
 #if BITS_PER_LONG == 64
+	BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 2);
 	mask[0] = raw_mask[0];
 	mask[1] = raw_mask[1];
 #else
+	BUILD_BUG_ON(BITS_TO_LONGS(EF10_STAT_COUNT) != 3);
 	mask[0] = raw_mask[0] & 0xffffffff;
 	mask[1] = raw_mask[0] >> 32;
 	mask[2] = raw_mask[1] & 0xffffffff;
-	mask[3] = raw_mask[1] >> 32;
 #endif
 }
 
@@ -1616,7 +1716,6 @@
 	efx_ef10_get_stat_mask(efx, mask);
 
 	dma_stats = efx->stats_buffer.addr;
-	nic_data = efx->nic_data;
 
 	generation_end = dma_stats[MC_CMD_MAC_GENERATION_END];
 	if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
@@ -1743,27 +1842,43 @@
 static void efx_ef10_push_irq_moderation(struct efx_channel *channel)
 {
 	struct efx_nic *efx = channel->efx;
-	unsigned int mode, value;
+	unsigned int mode, usecs;
 	efx_dword_t timer_cmd;
 
-	if (channel->irq_moderation) {
+	if (channel->irq_moderation_us) {
 		mode = 3;
-		value = channel->irq_moderation - 1;
+		usecs = channel->irq_moderation_us;
 	} else {
 		mode = 0;
-		value = 0;
+		usecs = 0;
 	}
 
-	if (EFX_EF10_WORKAROUND_35388(efx)) {
+	if (EFX_EF10_WORKAROUND_61265(efx)) {
+		MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_EVQ_TMR_IN_LEN);
+		unsigned int ns = usecs * 1000;
+
+		MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_INSTANCE,
+			       channel->channel);
+		MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS, ns);
+		MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS, ns);
+		MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_MODE, mode);
+
+		efx_mcdi_rpc_async(efx, MC_CMD_SET_EVQ_TMR,
+				   inbuf, sizeof(inbuf), 0, NULL, 0);
+	} else if (EFX_EF10_WORKAROUND_35388(efx)) {
+		unsigned int ticks = efx_usecs_to_ticks(efx, usecs);
+
 		EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS,
 				     EFE_DD_EVQ_IND_TIMER_FLAGS,
 				     ERF_DD_EVQ_IND_TIMER_MODE, mode,
-				     ERF_DD_EVQ_IND_TIMER_VAL, value);
+				     ERF_DD_EVQ_IND_TIMER_VAL, ticks);
 		efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT,
 				channel->channel);
 	} else {
+		unsigned int ticks = efx_usecs_to_ticks(efx, usecs);
+
 		EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode,
-				     ERF_DZ_TC_TIMER_VAL, value);
+				     ERF_DZ_TC_TIMER_VAL, ticks);
 		efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR,
 				channel->channel);
 	}
@@ -1934,14 +2049,18 @@
 	return IRQ_HANDLED;
 }
 
-static void efx_ef10_irq_test_generate(struct efx_nic *efx)
+static int efx_ef10_irq_test_generate(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_TRIGGER_INTERRUPT_IN_LEN);
 
+	if (efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG41750, true,
+				    NULL) == 0)
+		return -ENOTSUPP;
+
 	BUILD_BUG_ON(MC_CMD_TRIGGER_INTERRUPT_OUT_LEN != 0);
 
 	MCDI_SET_DWORD(inbuf, TRIGGER_INTERRUPT_IN_INTR_LEVEL, efx->irq_level);
-	(void) efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT,
+	return efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT,
 			    inbuf, sizeof(inbuf), NULL, 0, NULL);
 }
 
@@ -2535,13 +2654,12 @@
 static int efx_ef10_ev_init(struct efx_channel *channel)
 {
 	MCDI_DECLARE_BUF(inbuf,
-			 MC_CMD_INIT_EVQ_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
-						EFX_BUF_SIZE));
-	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_OUT_LEN);
+			 MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
+						   EFX_BUF_SIZE));
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
 	size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
 	struct efx_nic *efx = channel->efx;
 	struct efx_ef10_nic_data *nic_data;
-	bool supports_rx_merge;
 	size_t inlen, outlen;
 	unsigned int enabled, implemented;
 	dma_addr_t dma_addr;
@@ -2549,9 +2667,6 @@
 	int i;
 
 	nic_data = efx->nic_data;
-	supports_rx_merge =
-		!!(nic_data->datapath_caps &
-		   1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
 
 	/* Fill event queue with all ones (i.e. empty events) */
 	memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
@@ -2560,11 +2675,6 @@
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
 	/* INIT_EVQ expects index in vector table, not absolute */
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
-	MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
-			      INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
-			      INIT_EVQ_IN_FLAG_RX_MERGE, 1,
-			      INIT_EVQ_IN_FLAG_TX_MERGE, 1,
-			      INIT_EVQ_IN_FLAG_CUT_THRU, !supports_rx_merge);
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
 		       MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
@@ -2573,6 +2683,27 @@
 		       MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
 	MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
 
+	if (nic_data->datapath_caps2 &
+	    1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) {
+		/* Use the new generic approach to specifying event queue
+		 * configuration, requesting lower latency or higher throughput.
+		 * The options that actually get used appear in the output.
+		 */
+		MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
+				      INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
+				      INIT_EVQ_V2_IN_FLAG_TYPE,
+				      MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
+	} else {
+		bool cut_thru = !(nic_data->datapath_caps &
+			1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
+
+		MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
+				      INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
+				      INIT_EVQ_IN_FLAG_RX_MERGE, 1,
+				      INIT_EVQ_IN_FLAG_TX_MERGE, 1,
+				      INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru);
+	}
+
 	dma_addr = channel->eventq.buf.dma_addr;
 	for (i = 0; i < entries; ++i) {
 		MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
@@ -2583,6 +2714,13 @@
 
 	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
 			  outbuf, sizeof(outbuf), &outlen);
+
+	if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Channel %d using event queue flags %08x\n",
+			  channel->channel,
+			  MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+
 	/* IRQ return is ignored */
 	if (channel->channel || rc)
 		return rc;
@@ -2590,8 +2728,8 @@
 	/* Successfully created event queue on channel 0 */
 	rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled);
 	if (rc == -ENOSYS) {
-		/* GET_WORKAROUNDS was implemented before the bug26807
-		 * workaround, thus the latter must be unavailable in this fw
+		/* GET_WORKAROUNDS was implemented before this workaround,
+		 * thus it must be unavailable in this firmware.
 		 */
 		nic_data->workaround_26807 = false;
 		rc = 0;
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 14b821b..3cf3557 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -281,6 +281,27 @@
  * NAPI guarantees serialisation of polls of the same device, which
  * provides the guarantee required by efx_process_channel().
  */
+static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
+{
+	int step = efx->irq_mod_step_us;
+
+	if (channel->irq_mod_score < irq_adapt_low_thresh) {
+		if (channel->irq_moderation_us > step) {
+			channel->irq_moderation_us -= step;
+			efx->type->push_irq_moderation(channel);
+		}
+	} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
+		if (channel->irq_moderation_us <
+		    efx->irq_rx_moderation_us) {
+			channel->irq_moderation_us += step;
+			efx->type->push_irq_moderation(channel);
+		}
+	}
+
+	channel->irq_count = 0;
+	channel->irq_mod_score = 0;
+}
+
 static int efx_poll(struct napi_struct *napi, int budget)
 {
 	struct efx_channel *channel =
@@ -301,22 +322,7 @@
 		if (efx_channel_has_rx_queue(channel) &&
 		    efx->irq_rx_adaptive &&
 		    unlikely(++channel->irq_count == 1000)) {
-			if (unlikely(channel->irq_mod_score <
-				     irq_adapt_low_thresh)) {
-				if (channel->irq_moderation > 1) {
-					channel->irq_moderation -= 1;
-					efx->type->push_irq_moderation(channel);
-				}
-			} else if (unlikely(channel->irq_mod_score >
-					    irq_adapt_high_thresh)) {
-				if (channel->irq_moderation <
-				    efx->irq_rx_moderation) {
-					channel->irq_moderation += 1;
-					efx->type->push_irq_moderation(channel);
-				}
-			}
-			channel->irq_count = 0;
-			channel->irq_mod_score = 0;
+			efx_update_irq_mod(efx, channel);
 		}
 
 		efx_filter_rfs_expire(channel);
@@ -1703,6 +1709,7 @@
 	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
 
 	/* Initialise the interrupt moderation settings */
+	efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
 	efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
 				true);
 
@@ -1949,14 +1956,21 @@
  * Interrupt moderation
  *
  **************************************************************************/
-
-static unsigned int irq_mod_ticks(unsigned int usecs, unsigned int quantum_ns)
+unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs)
 {
 	if (usecs == 0)
 		return 0;
-	if (usecs * 1000 < quantum_ns)
+	if (usecs * 1000 < efx->timer_quantum_ns)
 		return 1; /* never round down to 0 */
-	return usecs * 1000 / quantum_ns;
+	return usecs * 1000 / efx->timer_quantum_ns;
+}
+
+unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks)
+{
+	/* We must round up when converting ticks to microseconds
+	 * because we round down when converting the other way.
+	 */
+	return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
 }
 
 /* Set interrupt moderation parameters */
@@ -1965,21 +1979,16 @@
 			    bool rx_may_override_tx)
 {
 	struct efx_channel *channel;
-	unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max *
-						efx->timer_quantum_ns,
-						1000);
-	unsigned int tx_ticks;
-	unsigned int rx_ticks;
+	unsigned int timer_max_us;
 
 	EFX_ASSERT_RESET_SERIALISED(efx);
 
-	if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max)
+	timer_max_us = efx->timer_max_ns / 1000;
+
+	if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
 		return -EINVAL;
 
-	tx_ticks = irq_mod_ticks(tx_usecs, efx->timer_quantum_ns);
-	rx_ticks = irq_mod_ticks(rx_usecs, efx->timer_quantum_ns);
-
-	if (tx_ticks != rx_ticks && efx->tx_channel_offset == 0 &&
+	if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
 	    !rx_may_override_tx) {
 		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
 			  "RX and TX IRQ moderation must be equal\n");
@@ -1987,12 +1996,12 @@
 	}
 
 	efx->irq_rx_adaptive = rx_adaptive;
-	efx->irq_rx_moderation = rx_ticks;
+	efx->irq_rx_moderation_us = rx_usecs;
 	efx_for_each_channel(channel, efx) {
 		if (efx_channel_has_rx_queue(channel))
-			channel->irq_moderation = rx_ticks;
+			channel->irq_moderation_us = rx_usecs;
 		else if (efx_channel_has_tx_queues(channel))
-			channel->irq_moderation = tx_ticks;
+			channel->irq_moderation_us = tx_usecs;
 	}
 
 	return 0;
@@ -2001,26 +2010,21 @@
 void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
 			    unsigned int *rx_usecs, bool *rx_adaptive)
 {
-	/* We must round up when converting ticks to microseconds
-	 * because we round down when converting the other way.
-	 */
-
 	*rx_adaptive = efx->irq_rx_adaptive;
-	*rx_usecs = DIV_ROUND_UP(efx->irq_rx_moderation *
-				 efx->timer_quantum_ns,
-				 1000);
+	*rx_usecs = efx->irq_rx_moderation_us;
 
 	/* If channels are shared between RX and TX, so is IRQ
 	 * moderation.  Otherwise, IRQ moderation is the same for all
 	 * TX channels and is not adaptive.
 	 */
-	if (efx->tx_channel_offset == 0)
+	if (efx->tx_channel_offset == 0) {
 		*tx_usecs = *rx_usecs;
-	else
-		*tx_usecs = DIV_ROUND_UP(
-			efx->channel[efx->tx_channel_offset]->irq_moderation *
-			efx->timer_quantum_ns,
-			1000);
+	} else {
+		struct efx_channel *tx_channel;
+
+		tx_channel = efx->channel[efx->tx_channel_offset];
+		*tx_usecs = tx_channel->irq_moderation_us;
+	}
 }
 
 /**************************************************************************
@@ -2259,8 +2263,18 @@
 	rc = efx_check_disabled(efx);
 	if (rc)
 		return rc;
-	if (new_mtu > EFX_MAX_MTU)
+	if (new_mtu > EFX_MAX_MTU) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Requested MTU of %d too big (max: %d)\n",
+			  new_mtu, EFX_MAX_MTU);
 		return -EINVAL;
+	}
+	if (new_mtu < EFX_MIN_MTU) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Requested MTU of %d too small (min: %d)\n",
+			  new_mtu, EFX_MIN_MTU);
+		return -EINVAL;
+	}
 
 	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
 
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index c3ae739..342ae16 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -204,6 +204,8 @@
 
 /* Global */
 void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
+unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs);
+unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks);
 int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
 			    unsigned int rx_usecs, bool rx_adaptive,
 			    bool rx_may_override_tx);
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c
index d790cb8..1a70926 100644
--- a/drivers/net/ethernet/sfc/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon.c
@@ -378,12 +378,15 @@
 	struct efx_nic *efx = channel->efx;
 
 	/* Set timer register */
-	if (channel->irq_moderation) {
+	if (channel->irq_moderation_us) {
+		unsigned int ticks;
+
+		ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
 		EFX_POPULATE_DWORD_2(timer_cmd,
 				     FRF_AB_TC_TIMER_MODE,
 				     FFE_BB_TIMER_MODE_INT_HLDOFF,
 				     FRF_AB_TC_TIMER_VAL,
-				     channel->irq_moderation - 1);
+				     ticks - 1);
 	} else {
 		EFX_POPULATE_DWORD_2(timer_cmd,
 				     FRF_AB_TC_TIMER_MODE,
@@ -2373,6 +2376,8 @@
 			     EFX_MAX_CHANNELS);
 	efx->max_tx_channels = efx->max_channels;
 	efx->timer_quantum_ns = 4968; /* 621 cycles */
+	efx->timer_max_ns = efx->type->timer_period_max *
+			    efx->timer_quantum_ns;
 
 	/* Initialise I2C adapter */
 	board = falcon_board(efx);
diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c
index 1736f4b..f6883b2 100644
--- a/drivers/net/ethernet/sfc/falcon_boards.c
+++ b/drivers/net/ethernet/sfc/falcon_boards.c
@@ -64,7 +64,7 @@
 #define LM87_ALARM_TEMP_INT		0x10
 #define LM87_ALARM_TEMP_EXT1		0x20
 
-#if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE)
+#if IS_ENABLED(CONFIG_SENSORS_LM87)
 
 static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values)
 {
@@ -455,7 +455,7 @@
 	struct falcon_board *board = falcon_board(efx);
 	int rc;
 
-#if defined(CONFIG_SENSORS_LM90) || defined(CONFIG_SENSORS_LM90_MODULE)
+#if IS_ENABLED(CONFIG_SENSORS_LM90)
 	board->hwmon_client =
 		i2c_new_device(&board->i2c_adap, &sfe4001_hwmon_info);
 #else
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 4c83739..4762ec4 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -1477,9 +1477,10 @@
  * Interrupt must already have been enabled, otherwise nasty things
  * may happen.
  */
-void efx_farch_irq_test_generate(struct efx_nic *efx)
+int efx_farch_irq_test_generate(struct efx_nic *efx)
 {
 	efx_farch_interrupts(efx, true, true);
+	return 0;
 }
 
 /* Process a fatal interrupt
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index d28e7dd..2415209 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -548,7 +548,10 @@
 		efx_mcdi_display_error(efx, async->cmd, async->inlen, errbuf,
 				       err_len, rc);
 	}
-	async->complete(efx, async->cookie, rc, outbuf, data_len);
+
+	if (async->complete)
+		async->complete(efx, async->cookie, rc, outbuf,
+				min(async->outlen, data_len));
 	kfree(async);
 
 	efx_mcdi_release(mcdi);
@@ -1153,7 +1156,8 @@
 	 * acquired locks in the wrong order.
 	 */
 	list_for_each_entry_safe(async, next, &mcdi->async_list, list) {
-		async->complete(efx, async->cookie, -ENETDOWN, NULL, 0);
+		if (async->complete)
+			async->complete(efx, async->cookie, -ENETDOWN, NULL, 0);
 		list_del(&async->list);
 		kfree(async);
 	}
diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index c9a5b00..ccceafc 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
@@ -2645,16 +2645,20 @@
 #define          MC_CMD_POLL_BIST_MEM_BUS_MC 0x0
 /* enum: CSR IREG bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_CSR 0x1
-/* enum: RX DPCPU bus. */
+/* enum: RX0 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX 0x2
 /* enum: TX0 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX0 0x3
 /* enum: TX1 DPCPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX1 0x4
-/* enum: RX DICPU bus. */
+/* enum: RX0 DICPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX 0x5
 /* enum: TX DICPU bus. */
 #define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_TX 0x6
+/* enum: RX1 DPCPU bus. */
+#define          MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX1 0x7
+/* enum: RX1 DICPU bus. */
+#define          MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX1 0x8
 /* Pattern written to RAM / register */
 #define       MC_CMD_POLL_BIST_OUT_MEM_EXPECT_OFST 16
 /* Actual value read from RAM / register */
@@ -3612,6 +3616,8 @@
 #define        MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1
 #define        MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1
+#define        MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6
+#define        MC_CMD_NVRAM_INFO_OUT_CMAC_WIDTH 1
 #define        MC_CMD_NVRAM_INFO_OUT_A_B_LBN 7
 #define        MC_CMD_NVRAM_INFO_OUT_A_B_WIDTH 1
 #define       MC_CMD_NVRAM_INFO_OUT_PHYSDEV_OFST 16
@@ -4389,6 +4395,8 @@
  * the command will fail with MC_CMD_ERR_FILTERS_PRESENT.
  */
 #define          MC_CMD_WORKAROUND_BUG26807 0x6
+/* enum: Bug 61265 work around (broken EVQ TMR writes). */
+#define          MC_CMD_WORKAROUND_BUG61265 0x7
 /* 0 = disable the workaround indicated by TYPE; any non-zero value = enable
  * the workaround
  */
@@ -4413,7 +4421,6 @@
  * (GET_PHY_CFG_OUT_MEDIA_TYPE); the valid 'page number' input values, and the
  * output data, are interpreted on a per-type basis. For SFP+: PAGE=0 or 1
  * returns a 128-byte block read from module I2C address 0xA0 offset 0 or 0x80.
- * Anything else: currently undefined. Locks required: None. Return code: 0.
  */
 #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b
 
@@ -5479,6 +5486,8 @@
 #define        LICENSED_V3_FEATURES_TX_SNIFF_WIDTH 1
 #define        LICENSED_V3_FEATURES_PROXY_FILTER_OPS_LBN 8
 #define        LICENSED_V3_FEATURES_PROXY_FILTER_OPS_WIDTH 1
+#define        LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_LBN 9
+#define        LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_WIDTH 1
 #define       LICENSED_V3_FEATURES_MASK_LBN 0
 #define       LICENSED_V3_FEATURES_MASK_WIDTH 64
 
@@ -5634,6 +5643,109 @@
 /* Only valid if INTRFLAG was true */
 #define       MC_CMD_INIT_EVQ_OUT_IRQ_OFST 0
 
+/* MC_CMD_INIT_EVQ_V2_IN msgrequest */
+#define    MC_CMD_INIT_EVQ_V2_IN_LENMIN 44
+#define    MC_CMD_INIT_EVQ_V2_IN_LENMAX 548
+#define    MC_CMD_INIT_EVQ_V2_IN_LEN(num) (36+8*(num))
+/* Size, in entries */
+#define       MC_CMD_INIT_EVQ_V2_IN_SIZE_OFST 0
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_INSTANCE_OFST 4
+/* The initial timer value. The load value is ignored if the timer mode is DIS.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_LOAD_OFST 8
+/* The reload value is ignored in one-shot modes */
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_RELOAD_OFST 12
+/* tbd */
+#define       MC_CMD_INIT_EVQ_V2_IN_FLAGS_OFST 16
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_LBN 0
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_LBN 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_LBN 2
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_LBN 3
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_LBN 4
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_LBN 5
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_LBN 6
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LBN 7
+#define        MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_WIDTH 4
+/* enum: All initialisation flags specified by host. */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_MANUAL 0x0
+/* enum: MEDFORD only. Certain initialisation flags specified by host may be
+ * over-ridden by firmware based on licenses and firmware variant in order to
+ * provide the lowest latency achievable. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LOW_LATENCY 0x1
+/* enum: MEDFORD only. Certain initialisation flags specified by host may be
+ * over-ridden by firmware based on licenses and firmware variant in order to
+ * provide the best throughput achievable. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_THROUGHPUT 0x2
+/* enum: MEDFORD only. Certain initialisation flags may be over-ridden by
+ * firmware based on licenses and firmware variant. See
+ * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags.
+ */
+#define          MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO 0x3
+#define       MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_OFST 20
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_DIS 0x0
+/* enum: Immediate */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_IMMED_START 0x1
+/* enum: Triggered */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_TRIG_START 0x2
+/* enum: Hold-off */
+#define          MC_CMD_INIT_EVQ_V2_IN_TMR_INT_HLDOFF 0x3
+/* Target EVQ for wakeups if in wakeup mode. */
+#define       MC_CMD_INIT_EVQ_V2_IN_TARGET_EVQ_OFST 24
+/* Target interrupt if in interrupting mode (note union with target EVQ). Use
+ * MC_CMD_RESOURCE_INSTANCE_ANY unless a specific one required for test
+ * purposes.
+ */
+#define       MC_CMD_INIT_EVQ_V2_IN_IRQ_NUM_OFST 24
+/* Event Counter Mode. */
+#define       MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_OFST 28
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_DIS 0x0
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RX 0x1
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_TX 0x2
+/* enum: Disabled */
+#define          MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RXTX 0x3
+/* Event queue packet count threshold. */
+#define       MC_CMD_INIT_EVQ_V2_IN_COUNT_THRSHLD_OFST 32
+/* 64-bit address of 4k of 4k-aligned host memory buffer */
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_OFST 36
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LEN 8
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LO_OFST 36
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_HI_OFST 40
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MINNUM 1
+#define       MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MAXNUM 64
+
+/* MC_CMD_INIT_EVQ_V2_OUT msgresponse */
+#define    MC_CMD_INIT_EVQ_V2_OUT_LEN 8
+/* Only valid if INTRFLAG was true */
+#define       MC_CMD_INIT_EVQ_V2_OUT_IRQ_OFST 0
+/* Actual configuration applied on the card */
+#define       MC_CMD_INIT_EVQ_V2_OUT_FLAGS_OFST 4
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_LBN 0
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_LBN 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_LBN 2
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_WIDTH 1
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_LBN 3
+#define        MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_WIDTH 1
+
 /* QUEUE_CRC_MODE structuredef */
 #define    QUEUE_CRC_MODE_LEN 1
 #define       QUEUE_CRC_MODE_MODE_LBN 0
@@ -5697,8 +5809,8 @@
 #define        MC_CMD_INIT_RXQ_IN_FLAG_PREFIX_WIDTH 1
 #define        MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_LBN 9
 #define        MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_WIDTH 1
-#define        MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_LBN 10
-#define        MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_WIDTH 1
+#define        MC_CMD_INIT_RXQ_IN_UNUSED_LBN 10
+#define        MC_CMD_INIT_RXQ_IN_UNUSED_WIDTH 1
 /* Owner ID to use if in buffer mode (zero if physical) */
 #define       MC_CMD_INIT_RXQ_IN_OWNER_ID_OFST 20
 /* The port ID associated with the v-adaptor which should contain this DMAQ. */
@@ -7854,6 +7966,20 @@
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_EVENT_CUT_THROUGH_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_LBN 4
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -7910,6 +8036,288 @@
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_OFST 70
 #define       MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_LEN 2
 
+/* MC_CMD_GET_CAPABILITIES_V3_OUT msgresponse */
+#define    MC_CMD_GET_CAPABILITIES_V3_OUT_LEN 73
+/* First word of flags. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS1_OFST 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_LBN 13
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_LBN 14
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_LBN 15
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_LBN 16
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_LBN 17
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_LBN 18
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_LBN 19
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_LBN 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_LBN 21
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_LBN 22
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_LBN 23
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_LBN 24
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_LBN 25
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_LBN 26
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_LBN 27
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_LBN 28
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_LBN 29
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_LBN 30
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_LBN 31
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_WIDTH 1
+/* RxDPCPU firmware id. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2
+/* enum: Standard RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP  0x0
+/* enum: Low latency RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY  0x1
+/* enum: Packed stream RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM  0x2
+/* enum: BIST RXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST  0x10a
+/* enum: RXDP Test firmware image 1 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH  0x101
+/* enum: RXDP Test firmware image 2 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD  0x102
+/* enum: RXDP Test firmware image 3 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST  0x103
+/* enum: RXDP Test firmware image 4 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE  0x104
+/* enum: RXDP Test firmware image 5 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE  0x105
+/* enum: RXDP Test firmware image 6 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS  0x106
+/* enum: RXDP Test firmware image 7 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT  0x107
+/* enum: RXDP Test firmware image 8 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL  0x108
+/* enum: RXDP Test firmware image 9 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY  0x10b
+/* TxDPCPU firmware id. */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2
+/* enum: Standard TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP  0x0
+/* enum: Low latency TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY  0x1
+/* enum: High packet rate TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE  0x3
+/* enum: BIST TXDP firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST  0x12d
+/* enum: TXDP Test firmware image 1 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT  0x101
+/* enum: TXDP Test firmware image 2 */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS  0x102
+/* enum: TXDP CSR bus test firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR  0x103
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_WIDTH 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_WIDTH 4
+/* enum: reserved value - do not use (may indicate alternative interpretation
+ * of REV field in future)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED  0x0
+/* enum: Trivial RX PD firmware for early Huntington development (Huntington
+ * development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT  0x1
+/* enum: RX PD firmware with approximately Siena-compatible behaviour
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT  0x2
+/* enum: Virtual switching (full feature) RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH  0x3
+/* enum: siena_compat variant RX PD firmware using PM rather than MAC
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+/* enum: Low latency RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY  0x5
+/* enum: Packed stream RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM  0x6
+/* enum: RX PD firmware handling layer 2 only for high packet rate performance
+ * tests (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
+/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+/* enum: RX PD firmware parsing but not filtering network overlay tunnel
+ * encapsulations (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY  0xf
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_WIDTH 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_WIDTH 4
+/* enum: reserved value - do not use (may indicate alternative interpretation
+ * of REV field in future)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED  0x0
+/* enum: Trivial TX PD firmware for early Huntington development (Huntington
+ * development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT  0x1
+/* enum: TX PD firmware with approximately Siena-compatible behaviour
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT  0x2
+/* enum: Virtual switching (full feature) TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH  0x3
+/* enum: siena_compat variant TX PD firmware using PM rather than MAC
+ * (Huntington development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM  0x4
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY  0x5 /* enum */
+/* enum: TX PD firmware handling layer 2 only for high packet rate performance
+ * tests (Medford development only)
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
+/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
+/* Hardware capabilities of NIC */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12
+/* Licensed capabilities */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_LICENSE_CAPABILITIES_OFST 16
+/* Second word of flags. Not present on older firmware (check the length). */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS2_OFST 20
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_LBN 0
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_LBN 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_LBN 2
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_LBN 3
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_LBN 4
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_LBN 5
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_LBN 7
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_LBN 8
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_LBN 9
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_LBN 10
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_LBN 11
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_WIDTH 1
+/* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
+ * on older firmware (check the length).
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_OFST 24
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_LEN 2
+/* One byte per PF containing the number of the external port assigned to this
+ * PF, indexed by PF number. Special values indicate that a PF is either not
+ * present or not assigned.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_OFST 26
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16
+/* enum: The caller is not permitted to access information on this PF. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff
+/* enum: PF does not exist. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe
+/* enum: PF does exist but is not assigned to any external port. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED  0xfd
+/* enum: This value indicates that PF is assigned, but it cannot be expressed
+ * in this field. It is intended for a possible future situation where a more
+ * complex scheme of PFs to ports mapping is being used. The future driver
+ * should look for a new field supporting the new scheme. The current/old
+ * driver should treat this value as PF_NOT_ASSIGNED.
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT  0xfc
+/* One byte per PF containing the number of its VFs, indexed by PF number. A
+ * special value indicates that a PF is not present.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_OFST 42
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16
+/* enum: The caller is not permitted to access information on this PF. */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED  0xff */
+/* enum: PF does not exist. */
+/*               MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT  0xfe */
+/* Number of VIs available for each external port */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_NUM 4
+/* Size of RX descriptor cache expressed as binary logarithm The actual size
+ * equals (2 ^ RX_DESC_CACHE_SIZE)
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_OFST 66
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_LEN 1
+/* Size of TX descriptor cache expressed as binary logarithm The actual size
+ * equals (2 ^ TX_DESC_CACHE_SIZE)
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_OFST 67
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_LEN 1
+/* Total number of available PIO buffers */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_OFST 68
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_LEN 2
+/* Size of a single PIO buffer */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_OFST 70
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_LEN 2
+/* On chips later than Medford the amount of address space assigned to each VI
+ * is configurable. This is a global setting that the driver must query to
+ * discover the VI to address mapping. Cut-through PIO (CTPIO) in not available
+ * with 8k VI windows.
+ */
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_OFST 72
+#define       MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_LEN 1
+/* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k.
+ * CTPIO is not mapped.
+ */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K   0x0
+/* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K  0x1
+/* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */
+#define          MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K  0x2
+
 
 /***********************************/
 /* MC_CMD_V2_EXTN
@@ -9026,7 +9434,7 @@
  */
 #define MC_CMD_GET_RXDP_CONFIG 0xc2
 
-#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_GET_RXDP_CONFIG_IN msgrequest */
 #define    MC_CMD_GET_RXDP_CONFIG_IN_LEN 0
@@ -10125,7 +10533,9 @@
  * that this operation returns a zero-length response
  */
 #define          MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE  0x0
-/* enum: report counts of installed licenses */
+/* enum: report counts of installed licenses Returns EAGAIN if license
+ * processing (updating) has been started but not yet completed.
+ */
 #define          MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE  0x1
 
 /* MC_CMD_LICENSING_V3_OUT msgresponse */
@@ -10763,6 +11173,8 @@
 #define          MC_CMD_GET_WORKAROUNDS_OUT_BUG42008 0x20
 /* enum: Bug 26807 features present in firmware (multicast filter chaining) */
 #define          MC_CMD_GET_WORKAROUNDS_OUT_BUG26807 0x40
+/* enum: Bug 61265 work around (broken EVQ TMR writes). */
+#define          MC_CMD_GET_WORKAROUNDS_OUT_BUG61265 0x80
 
 
 /***********************************/
@@ -11280,22 +11692,110 @@
 #define MC_CMD_0x118_PRIVILEGE_CTG SRIOV_CTG_ADMIN
 
 /* MC_CMD_RX_BALANCING_IN msgrequest */
-#define    MC_CMD_RX_BALANCING_IN_LEN 4
+#define    MC_CMD_RX_BALANCING_IN_LEN 16
 /* The RX port whose upconverter table will be modified */
 #define       MC_CMD_RX_BALANCING_IN_PORT_OFST 0
-#define       MC_CMD_RX_BALANCING_IN_PORT_LEN 1
 /* The VLAN priority associated to the table index and vFIFO */
-#define       MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 1
-#define       MC_CMD_RX_BALANCING_IN_PRIORITY_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 4
 /* The resulting bit of SRC^DST for indexing the table */
-#define       MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 2
-#define       MC_CMD_RX_BALANCING_IN_SRC_DST_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 8
 /* The RX engine to which the vFIFO in the table entry will point to */
-#define       MC_CMD_RX_BALANCING_IN_ENG_OFST 3
-#define       MC_CMD_RX_BALANCING_IN_ENG_LEN 1
+#define       MC_CMD_RX_BALANCING_IN_ENG_OFST 12
 
 /* MC_CMD_RX_BALANCING_OUT msgresponse */
 #define    MC_CMD_RX_BALANCING_OUT_LEN 0
 
+/***********************************/
+/* MC_CMD_SET_EVQ_TMR
+ * Update the timer load, timer reload and timer mode values for a given EVQ.
+ * The requested timer values (in TMR_LOAD_REQ_NS and TMR_RELOAD_REQ_NS) will
+ * be rounded up to the granularity supported by the hardware, then truncated
+ * to the range supported by the hardware. The resulting value after the
+ * rounding and truncation will be returned to the caller (in TMR_LOAD_ACT_NS
+ * and TMR_RELOAD_ACT_NS).
+ */
+#define MC_CMD_SET_EVQ_TMR 0x120
+
+#define MC_CMD_0x120_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_SET_EVQ_TMR_IN msgrequest */
+#define    MC_CMD_SET_EVQ_TMR_IN_LEN 16
+/* Function-relative queue instance */
+#define       MC_CMD_SET_EVQ_TMR_IN_INSTANCE_OFST 0
+/* Requested value for timer load (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS_OFST 4
+/* Requested value for timer reload (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS_OFST 8
+/* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */
+#define       MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS  0x0 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START  0x1 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START  0x2 /* enum */
+#define          MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF  0x3 /* enum */
+
+/* MC_CMD_SET_EVQ_TMR_OUT msgresponse */
+#define    MC_CMD_SET_EVQ_TMR_OUT_LEN 8
+/* Actual value for timer load (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_OUT_TMR_LOAD_ACT_NS_OFST 0
+/* Actual value for timer reload (in nanoseconds) */
+#define       MC_CMD_SET_EVQ_TMR_OUT_TMR_RELOAD_ACT_NS_OFST 4
+
+
+/***********************************/
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES
+ * Query properties about the event queue timers.
+ */
+#define MC_CMD_GET_EVQ_TMR_PROPERTIES 0x122
+
+#define MC_CMD_0x122_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES_IN msgrequest */
+#define    MC_CMD_GET_EVQ_TMR_PROPERTIES_IN_LEN 0
+
+/* MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT msgresponse */
+#define    MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN 36
+/* Reserved for future use. */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_FLAGS_OFST 0
+/* For timers updated via writes to EVQ_TMR_REG, this is the time interval (in
+ * nanoseconds) for each increment of the timer load/reload count. The
+ * requested duration of a timer is this value multiplied by the timer
+ * load/reload count.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT_OFST 4
+/* For timers updated via writes to EVQ_TMR_REG, this is the maximum value
+ * allowed for timer load/reload counts.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT_OFST 8
+/* For timers updated via writes to EVQ_TMR_REG, timer load/reload counts not a
+ * multiple of this step size will be rounded in an implementation defined
+ * manner.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_STEP_OFST 12
+/* Maximum timer duration (in nanoseconds) for timers updated via MCDI. Only
+ * meaningful if MC_CMD_SET_EVQ_TMR is implemented.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS_OFST 16
+/* Timer durations requested via MCDI that are not a multiple of this step size
+ * will be rounded up. Only meaningful if MC_CMD_SET_EVQ_TMR is implemented.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS_OFST 20
+/* For timers updated using the bug35388 workaround, this is the time interval
+ * (in nanoseconds) for each increment of the timer load/reload count. The
+ * requested duration of a timer is this value multiplied by the timer
+ * load/reload count. This field is only meaningful if the bug35388 workaround
+ * is enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT_OFST 24
+/* For timers updated using the bug35388 workaround, this is the maximum value
+ * allowed for timer load/reload counts. This field is only meaningful if the
+ * bug35388 workaround is enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT_OFST 28
+/* For timers updated using the bug35388 workaround, timer load/reload counts
+ * not a multiple of this step size will be rounded in an implementation
+ * defined manner. This field is only meaningful if the bug35388 workaround is
+ * enabled.
+ */
+#define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_STEP_OFST 32
 
 #endif /* MCDI_PCOL_H */
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 9ff062a..99d8c82 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -76,6 +76,9 @@
 /* Maximum possible MTU the driver supports */
 #define EFX_MAX_MTU (9 * 1024)
 
+/* Minimum MTU, from RFC791 (IP) */
+#define EFX_MIN_MTU 68
+
 /* Size of an RX scatter buffer.  Small enough to pack 2 into a 4K page,
  * and should be a multiple of the cache line size.
  */
@@ -392,7 +395,7 @@
  * @eventq_init: Event queue initialised flag
  * @enabled: Channel enabled indicator
  * @irq: IRQ number (MSI and MSI-X only)
- * @irq_moderation: IRQ moderation value (in hardware ticks)
+ * @irq_moderation_us: IRQ moderation value (in microseconds)
  * @napi_dev: Net device used with NAPI
  * @napi_str: NAPI control structure
  * @state: state for NAPI vs busy polling
@@ -433,7 +436,7 @@
 	bool eventq_init;
 	bool enabled;
 	int irq;
-	unsigned int irq_moderation;
+	unsigned int irq_moderation_us;
 	struct net_device *napi_dev;
 	struct napi_struct napi_str;
 #ifdef CONFIG_NET_RX_BUSY_POLL
@@ -810,8 +813,10 @@
  * @membase: Memory BAR value
  * @interrupt_mode: Interrupt mode
  * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds
+ * @timer_max_ns: Interrupt timer maximum value, in nanoseconds
  * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
- * @irq_rx_moderation: IRQ moderation time for RX event queues
+ * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues
+ * @irq_rx_moderation_us: IRQ moderation time for RX event queues
  * @msg_enable: Log message enable flags
  * @state: Device state number (%STATE_*). Serialised by the rtnl_lock.
  * @reset_pending: Bitmask for pending resets
@@ -940,8 +945,10 @@
 
 	enum efx_int_mode interrupt_mode;
 	unsigned int timer_quantum_ns;
+	unsigned int timer_max_ns;
 	bool irq_rx_adaptive;
-	unsigned int irq_rx_moderation;
+	unsigned int irq_mod_step_us;
+	unsigned int irq_rx_moderation_us;
 	u32 msg_enable;
 
 	enum nic_state state;
@@ -1271,7 +1278,7 @@
 	int (*mcdi_poll_reboot)(struct efx_nic *efx);
 	void (*mcdi_reboot_detected)(struct efx_nic *efx);
 	void (*irq_enable_master)(struct efx_nic *efx);
-	void (*irq_test_generate)(struct efx_nic *efx);
+	int (*irq_test_generate)(struct efx_nic *efx);
 	void (*irq_disable_non_ev)(struct efx_nic *efx);
 	irqreturn_t (*irq_handle_msi)(int irq, void *dev_id);
 	irqreturn_t (*irq_handle_legacy)(int irq, void *dev_id);
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 89b83e5..aa1945a 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -66,11 +66,11 @@
 	channel->efx->type->ev_test_generate(channel);
 }
 
-void efx_nic_irq_test_start(struct efx_nic *efx)
+int efx_nic_irq_test_start(struct efx_nic *efx)
 {
 	efx->last_irq_cpu = -1;
 	smp_wmb();
-	efx->type->irq_test_generate(efx);
+	return efx->type->irq_test_generate(efx);
 }
 
 /* Hook interrupt handler(s)
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 96944c3..73bee7e 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -507,10 +507,13 @@
  * @stats: Hardware statistics
  * @workaround_35388: Flag: firmware supports workaround for bug 35388
  * @workaround_26807: Flag: firmware supports workaround for bug 26807
+ * @workaround_61265: Flag: firmware supports workaround for bug 61265
  * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated
  *	after MC reboot
  * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of
  *	%MC_CMD_GET_CAPABILITIES response)
+ * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of
+ * %MC_CMD_GET_CAPABILITIES response)
  * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU
  * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU
  * @vport_id: The function's vport ID, only relevant for PFs
@@ -540,8 +543,10 @@
 	u64 stats[EF10_STAT_COUNT];
 	bool workaround_35388;
 	bool workaround_26807;
+	bool workaround_61265;
 	bool must_check_datapath_caps;
 	u32 datapath_caps;
+	u32 datapath_caps2;
 	unsigned int rx_dpcpu_fw_id;
 	unsigned int tx_dpcpu_fw_id;
 	unsigned int vport_id;
@@ -741,12 +746,12 @@
 
 /* Interrupts */
 int efx_nic_init_interrupt(struct efx_nic *efx);
-void efx_nic_irq_test_start(struct efx_nic *efx);
+int efx_nic_irq_test_start(struct efx_nic *efx);
 void efx_nic_fini_interrupt(struct efx_nic *efx);
 
 /* Falcon/Siena interrupts */
 void efx_farch_irq_enable_master(struct efx_nic *efx);
-void efx_farch_irq_test_generate(struct efx_nic *efx);
+int efx_farch_irq_test_generate(struct efx_nic *efx);
 void efx_farch_irq_disable_master(struct efx_nic *efx);
 irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id);
 irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id);
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index c771e0a..77a5364 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1269,13 +1269,13 @@
 		if (IS_ERR(ptp->phc_clock)) {
 			rc = PTR_ERR(ptp->phc_clock);
 			goto fail3;
-		}
-
-		INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
-		ptp->pps_workwq = create_singlethread_workqueue("sfc_pps");
-		if (!ptp->pps_workwq) {
-			rc = -ENOMEM;
-			goto fail4;
+		} else if (ptp->phc_clock) {
+			INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker);
+			ptp->pps_workwq = create_singlethread_workqueue("sfc_pps");
+			if (!ptp->pps_workwq) {
+				rc = -ENOMEM;
+				goto fail4;
+			}
 		}
 	}
 	ptp->nic_ts_enabled = false;
@@ -1306,7 +1306,7 @@
 {
 	struct efx_nic *efx = channel->efx;
 
-	channel->irq_moderation = 0;
+	channel->irq_moderation_us = 0;
 	channel->rx_queue.core_index = 0;
 
 	return efx_ptp_probe(efx, channel);
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index 9d78830..cd38b44 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -135,11 +135,19 @@
 {
 	unsigned long timeout, wait;
 	int cpu;
+	int rc;
 
 	netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n");
 	tests->interrupt = -1;
 
-	efx_nic_irq_test_start(efx);
+	rc = efx_nic_irq_test_start(efx);
+	if (rc == -ENOTSUPP) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "direct interrupt testing not supported\n");
+		tests->interrupt = 0;
+		return 0;
+	}
+
 	timeout = jiffies + IRQ_TIMEOUT;
 	wait = 1;
 
diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h
index 009dbe8..32a4272 100644
--- a/drivers/net/ethernet/sfc/selftest.h
+++ b/drivers/net/ethernet/sfc/selftest.h
@@ -28,7 +28,7 @@
 
 /* Efx self test results
  * For fields which are not counters, 1 indicates success and -1
- * indicates failure.
+ * indicates failure; 0 indicates test could not be run.
  */
 struct efx_self_tests {
 	/* online tests */
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 2219b54..04ed1b4 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -34,19 +34,24 @@
 
 static void siena_push_irq_moderation(struct efx_channel *channel)
 {
+	struct efx_nic *efx = channel->efx;
 	efx_dword_t timer_cmd;
 
-	if (channel->irq_moderation)
+	if (channel->irq_moderation_us) {
+		unsigned int ticks;
+
+		ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
 		EFX_POPULATE_DWORD_2(timer_cmd,
 				     FRF_CZ_TC_TIMER_MODE,
 				     FFE_CZ_TIMER_MODE_INT_HLDOFF,
 				     FRF_CZ_TC_TIMER_VAL,
-				     channel->irq_moderation - 1);
-	else
+				     ticks - 1);
+	} else {
 		EFX_POPULATE_DWORD_2(timer_cmd,
 				     FRF_CZ_TC_TIMER_MODE,
 				     FFE_CZ_TIMER_MODE_DIS,
 				     FRF_CZ_TC_TIMER_VAL, 0);
+	}
 	efx_writed_page_locked(channel->efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
 			       channel->channel);
 }
@@ -222,6 +227,9 @@
 	efx->timer_quantum_ns =
 		(caps & (1 << MC_CMD_CAPABILITIES_TURBO_ACTIVE_LBN)) ?
 		3072 : 6144; /* 768 cycles */
+	efx->timer_max_ns = efx->type->timer_period_max *
+			    efx->timer_quantum_ns;
+
 	return rc;
 }
 
diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c
index 816c446..9abcf4a 100644
--- a/drivers/net/ethernet/sfc/sriov.c
+++ b/drivers/net/ethernet/sfc/sriov.c
@@ -22,7 +22,7 @@
 }
 
 int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
-			  u8 qos)
+			  u8 qos, __be16 vlan_proto)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -31,6 +31,9 @@
 		    (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT)))
 			return -EINVAL;
 
+		if (vlan_proto != htons(ETH_P_8021Q))
+			return -EPROTONOSUPPORT;
+
 		return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos);
 	} else {
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h
index 400df52..ba1762e 100644
--- a/drivers/net/ethernet/sfc/sriov.h
+++ b/drivers/net/ethernet/sfc/sriov.h
@@ -16,7 +16,7 @@
 
 int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac);
 int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan,
-			  u8 qos);
+			  u8 qos, __be16 vlan_proto);
 int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i,
 			      bool spoofchk);
 int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h
index 2310b75..351cd14 100644
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h
@@ -50,4 +50,8 @@
 #define EFX_WORKAROUND_35388(efx)					\
 	(efx_nic_rev(efx) == EFX_REV_HUNT_A0 && EFX_EF10_WORKAROUND_35388(efx))
 
+/* Moderation timer access must go through MCDI */
+#define EFX_EF10_WORKAROUND_61265(efx)					\
+	(((struct efx_ef10_nic_data *)efx->nic_data)->workaround_61265)
+
 #endif /* EFX_WORKAROUNDS_H */
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 95001ee4..6f85276 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -1426,7 +1426,7 @@
 		rx_flags |= RxATX;
 	}
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 	/* Can accept Jumbo packet */
 	rx_flags |= RxAJAB;
 #endif
@@ -1750,7 +1750,7 @@
 		data_size = rx_status & DSIZE;
 		rx_size = data_size - CRC_SIZE;
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 		/* ``TOOLONG'' flag means jumbo packet received. */
 		if ((rx_status & TOOLONG) && data_size <= MAX_FRAME_SIZE)
 			rx_status &= (~ ((unsigned int)TOOLONG));
diff --git a/drivers/net/ethernet/sis/sis900.h b/drivers/net/ethernet/sis/sis900.h
index 7d430d3..f0da3dc 100644
--- a/drivers/net/ethernet/sis/sis900.h
+++ b/drivers/net/ethernet/sis/sis900.h
@@ -310,7 +310,7 @@
 #define CRC_SIZE                4
 #define MAC_HEADER_SIZE         14
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define MAX_FRAME_SIZE  (1518 + 4)
 #else
 #define MAX_FRAME_SIZE  1518
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 726b80f..7321259 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2275,6 +2275,13 @@
 	if (pd) {
 		memcpy(&lp->cfg, pd, sizeof(lp->cfg));
 		lp->io_shift = SMC91X_IO_SHIFT(lp->cfg.flags);
+
+		if (!SMC_8BIT(lp) && !SMC_16BIT(lp)) {
+			dev_err(&pdev->dev,
+				"at least one of 8-bit or 16-bit access support is required.\n");
+			ret = -ENXIO;
+			goto out_free_netdev;
+		}
 	}
 
 #if IS_BUILTIN(CONFIG_OF)
@@ -2316,6 +2323,9 @@
 		} else {
 			lp->cfg.flags |= SMC91X_USE_16BIT;
 		}
+		if (!device_property_read_u32(&pdev->dev, "reg-shift",
+					      &val))
+			lp->io_shift = val;
 	}
 #endif
 
diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h
index 1a55c79..ea84654 100644
--- a/drivers/net/ethernet/smsc/smc91x.h
+++ b/drivers/net/ethernet/smsc/smc91x.h
@@ -37,6 +37,27 @@
 #include <linux/smc91x.h>
 
 /*
+ * Any 16-bit access is performed with two 8-bit accesses if the hardware
+ * can't do it directly. Most registers are 16-bit so those are mandatory.
+ */
+#define SMC_outw_b(x, a, r)						\
+	do {								\
+		unsigned int __val16 = (x);				\
+		unsigned int __reg = (r);				\
+		SMC_outb(__val16, a, __reg);				\
+		SMC_outb(__val16 >> 8, a, __reg + (1 << SMC_IO_SHIFT));	\
+	} while (0)
+
+#define SMC_inw_b(a, r)							\
+	({								\
+		unsigned int __val16;					\
+		unsigned int __reg = r;					\
+		__val16  = SMC_inb(a, __reg);				\
+		__val16 |= SMC_inb(a, __reg + (1 << SMC_IO_SHIFT)) << 8; \
+		__val16;						\
+	})
+
+/*
  * Define your architecture specific bus configuration parameters here.
  */
 
@@ -55,10 +76,30 @@
 #define SMC_IO_SHIFT		(lp->io_shift)
 
 #define SMC_inb(a, r)		readb((a) + (r))
-#define SMC_inw(a, r)		readw((a) + (r))
+#define SMC_inw(a, r)							\
+	({								\
+		unsigned int __smc_r = r;				\
+		SMC_16BIT(lp) ? readw((a) + __smc_r) :			\
+		SMC_8BIT(lp) ? SMC_inw_b(a, __smc_r) :			\
+		({ BUG(); 0; });					\
+	})
+
 #define SMC_inl(a, r)		readl((a) + (r))
 #define SMC_outb(v, a, r)	writeb(v, (a) + (r))
+#define SMC_outw(v, a, r)						\
+	do {								\
+		unsigned int __v = v, __smc_r = r;			\
+		if (SMC_16BIT(lp))					\
+			__SMC_outw(__v, a, __smc_r);			\
+		else if (SMC_8BIT(lp))					\
+			SMC_outw_b(__v, a, __smc_r);			\
+		else							\
+			BUG();						\
+	} while (0)
+
 #define SMC_outl(v, a, r)	writel(v, (a) + (r))
+#define SMC_insb(a, r, p, l)	readsb((a) + (r), p, l)
+#define SMC_outsb(a, r, p, l)	writesb((a) + (r), p, l)
 #define SMC_insw(a, r, p, l)	readsw((a) + (r), p, l)
 #define SMC_outsw(a, r, p, l)	writesw((a) + (r), p, l)
 #define SMC_insl(a, r, p, l)	readsl((a) + (r), p, l)
@@ -66,7 +107,7 @@
 #define SMC_IRQ_FLAGS		(-1)	/* from resource */
 
 /* We actually can't write halfwords properly if not word aligned */
-static inline void SMC_outw(u16 val, void __iomem *ioaddr, int reg)
+static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 {
 	if ((machine_is_mainstone() || machine_is_stargate2() ||
 	     machine_is_pxa_idp()) && reg & 2) {
@@ -416,24 +457,8 @@
 
 #if ! SMC_CAN_USE_16BIT
 
-/*
- * Any 16-bit access is performed with two 8-bit accesses if the hardware
- * can't do it directly. Most registers are 16-bit so those are mandatory.
- */
-#define SMC_outw(x, ioaddr, reg)					\
-	do {								\
-		unsigned int __val16 = (x);				\
-		SMC_outb( __val16, ioaddr, reg );			\
-		SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\
-	} while (0)
-#define SMC_inw(ioaddr, reg)						\
-	({								\
-		unsigned int __val16;					\
-		__val16 =  SMC_inb( ioaddr, reg );			\
-		__val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \
-		__val16;						\
-	})
-
+#define SMC_outw(x, ioaddr, reg)	SMC_outw_b(x, ioaddr, reg)
+#define SMC_inw(ioaddr, reg)		SMC_inw_b(ioaddr, reg)
 #define SMC_insw(a, r, p, l)		BUG()
 #define SMC_outsw(a, r, p, l)		BUG()
 
@@ -445,7 +470,9 @@
 #endif
 
 #if ! SMC_CAN_USE_8BIT
+#undef SMC_inb
 #define SMC_inb(ioaddr, reg)		({ BUG(); 0; })
+#undef SMC_outb
 #define SMC_outb(x, ioaddr, reg)	BUG()
 #define SMC_insb(a, r, p, l)		BUG()
 #define SMC_outsb(a, r, p, l)		BUG()
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index ca31345..e9b8579 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -62,6 +62,7 @@
 #include <linux/acpi.h>
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
+#include <linux/gpio/consumer.h>
 
 #include "smsc911x.h"
 
@@ -147,6 +148,9 @@
 	/* regulators */
 	struct regulator_bulk_data supplies[SMSC911X_NUM_SUPPLIES];
 
+	/* Reset GPIO */
+	struct gpio_desc *reset_gpiod;
+
 	/* clock */
 	struct clk *clk;
 };
@@ -438,6 +442,11 @@
 		netdev_err(ndev, "couldn't get regulators %d\n",
 				ret);
 
+	/* Request optional RESET GPIO */
+	pdata->reset_gpiod = devm_gpiod_get_optional(&pdev->dev,
+						     "reset",
+						     GPIOD_OUT_LOW);
+
 	/* Request clock */
 	pdata->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pdata->clk))
@@ -1099,15 +1108,8 @@
 		goto err_out_free_bus_2;
 	}
 
-	if (smsc911x_mii_probe(dev) < 0) {
-		SMSC_WARN(pdata, probe, "Error registering mii bus");
-		goto err_out_unregister_bus_3;
-	}
-
 	return 0;
 
-err_out_unregister_bus_3:
-	mdiobus_unregister(pdata->mii_bus);
 err_out_free_bus_2:
 	mdiobus_free(pdata->mii_bus);
 err_out_1:
@@ -1514,23 +1516,90 @@
 	smsc911x_reg_write(pdata, INT_STS, 0xFFFFFFFF);
 }
 
+static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct smsc911x_data *pdata = netdev_priv(dev);
+	u32 intsts = smsc911x_reg_read(pdata, INT_STS);
+	u32 inten = smsc911x_reg_read(pdata, INT_EN);
+	int serviced = IRQ_NONE;
+	u32 temp;
+
+	if (unlikely(intsts & inten & INT_STS_SW_INT_)) {
+		temp = smsc911x_reg_read(pdata, INT_EN);
+		temp &= (~INT_EN_SW_INT_EN_);
+		smsc911x_reg_write(pdata, INT_EN, temp);
+		smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_);
+		pdata->software_irq_signal = 1;
+		smp_wmb();
+		serviced = IRQ_HANDLED;
+	}
+
+	if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) {
+		/* Called when there is a multicast update scheduled and
+		 * it is now safe to complete the update */
+		SMSC_TRACE(pdata, intr, "RX Stop interrupt");
+		smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_);
+		if (pdata->multicast_update_pending)
+			smsc911x_rx_multicast_update_workaround(pdata);
+		serviced = IRQ_HANDLED;
+	}
+
+	if (intsts & inten & INT_STS_TDFA_) {
+		temp = smsc911x_reg_read(pdata, FIFO_INT);
+		temp |= FIFO_INT_TX_AVAIL_LEVEL_;
+		smsc911x_reg_write(pdata, FIFO_INT, temp);
+		smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_);
+		netif_wake_queue(dev);
+		serviced = IRQ_HANDLED;
+	}
+
+	if (unlikely(intsts & inten & INT_STS_RXE_)) {
+		SMSC_TRACE(pdata, intr, "RX Error interrupt");
+		smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_);
+		serviced = IRQ_HANDLED;
+	}
+
+	if (likely(intsts & inten & INT_STS_RSFL_)) {
+		if (likely(napi_schedule_prep(&pdata->napi))) {
+			/* Disable Rx interrupts */
+			temp = smsc911x_reg_read(pdata, INT_EN);
+			temp &= (~INT_EN_RSFL_EN_);
+			smsc911x_reg_write(pdata, INT_EN, temp);
+			/* Schedule a NAPI poll */
+			__napi_schedule(&pdata->napi);
+		} else {
+			SMSC_WARN(pdata, rx_err, "napi_schedule_prep failed");
+		}
+		serviced = IRQ_HANDLED;
+	}
+
+	return serviced;
+}
+
 static int smsc911x_open(struct net_device *dev)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
 	unsigned int timeout;
 	unsigned int temp;
 	unsigned int intcfg;
+	int retval;
+	int irq_flags;
 
-	/* if the phy is not yet registered, retry later*/
+	/* find and start the given phy */
 	if (!dev->phydev) {
-		SMSC_WARN(pdata, hw, "phy_dev is NULL");
-		return -EAGAIN;
+		retval = smsc911x_mii_probe(dev);
+		if (retval < 0) {
+			SMSC_WARN(pdata, probe, "Error starting phy");
+			goto out;
+		}
 	}
 
 	/* Reset the LAN911x */
-	if (smsc911x_soft_reset(pdata)) {
+	retval = smsc911x_soft_reset(pdata);
+	if (retval) {
 		SMSC_WARN(pdata, hw, "soft reset failed");
-		return -EIO;
+		goto mii_free_out;
 	}
 
 	smsc911x_reg_write(pdata, HW_CFG, 0x00050000);
@@ -1586,6 +1655,15 @@
 	pdata->software_irq_signal = 0;
 	smp_wmb();
 
+	irq_flags = irq_get_trigger_type(dev->irq);
+	retval = request_irq(dev->irq, smsc911x_irqhandler,
+			     irq_flags | IRQF_SHARED, dev->name, dev);
+	if (retval) {
+		SMSC_WARN(pdata, probe,
+			  "Unable to claim requested irq: %d", dev->irq);
+		goto mii_free_out;
+	}
+
 	temp = smsc911x_reg_read(pdata, INT_EN);
 	temp |= INT_EN_SW_INT_EN_;
 	smsc911x_reg_write(pdata, INT_EN, temp);
@@ -1600,7 +1678,8 @@
 	if (!pdata->software_irq_signal) {
 		netdev_warn(dev, "ISR failed signaling test (IRQ %d)\n",
 			    dev->irq);
-		return -ENODEV;
+		retval = -ENODEV;
+		goto irq_stop_out;
 	}
 	SMSC_TRACE(pdata, ifup, "IRQ handler passed test using IRQ %d",
 		   dev->irq);
@@ -1646,6 +1725,14 @@
 
 	netif_start_queue(dev);
 	return 0;
+
+irq_stop_out:
+	free_irq(dev->irq, dev);
+mii_free_out:
+	phy_disconnect(dev->phydev);
+	dev->phydev = NULL;
+out:
+	return retval;
 }
 
 /* Entry point for stopping the interface */
@@ -1667,9 +1754,15 @@
 	dev->stats.rx_dropped += smsc911x_reg_read(pdata, RX_DROP);
 	smsc911x_tx_update_txcounters(dev);
 
+	free_irq(dev->irq, dev);
+
 	/* Bring the PHY down */
-	if (dev->phydev)
+	if (dev->phydev) {
 		phy_stop(dev->phydev);
+		phy_disconnect(dev->phydev);
+		dev->phydev = NULL;
+	}
+	netif_carrier_off(dev);
 
 	SMSC_TRACE(pdata, ifdown, "Interface stopped");
 	return 0;
@@ -1811,67 +1904,6 @@
 	spin_unlock_irqrestore(&pdata->mac_lock, flags);
 }
 
-static irqreturn_t smsc911x_irqhandler(int irq, void *dev_id)
-{
-	struct net_device *dev = dev_id;
-	struct smsc911x_data *pdata = netdev_priv(dev);
-	u32 intsts = smsc911x_reg_read(pdata, INT_STS);
-	u32 inten = smsc911x_reg_read(pdata, INT_EN);
-	int serviced = IRQ_NONE;
-	u32 temp;
-
-	if (unlikely(intsts & inten & INT_STS_SW_INT_)) {
-		temp = smsc911x_reg_read(pdata, INT_EN);
-		temp &= (~INT_EN_SW_INT_EN_);
-		smsc911x_reg_write(pdata, INT_EN, temp);
-		smsc911x_reg_write(pdata, INT_STS, INT_STS_SW_INT_);
-		pdata->software_irq_signal = 1;
-		smp_wmb();
-		serviced = IRQ_HANDLED;
-	}
-
-	if (unlikely(intsts & inten & INT_STS_RXSTOP_INT_)) {
-		/* Called when there is a multicast update scheduled and
-		 * it is now safe to complete the update */
-		SMSC_TRACE(pdata, intr, "RX Stop interrupt");
-		smsc911x_reg_write(pdata, INT_STS, INT_STS_RXSTOP_INT_);
-		if (pdata->multicast_update_pending)
-			smsc911x_rx_multicast_update_workaround(pdata);
-		serviced = IRQ_HANDLED;
-	}
-
-	if (intsts & inten & INT_STS_TDFA_) {
-		temp = smsc911x_reg_read(pdata, FIFO_INT);
-		temp |= FIFO_INT_TX_AVAIL_LEVEL_;
-		smsc911x_reg_write(pdata, FIFO_INT, temp);
-		smsc911x_reg_write(pdata, INT_STS, INT_STS_TDFA_);
-		netif_wake_queue(dev);
-		serviced = IRQ_HANDLED;
-	}
-
-	if (unlikely(intsts & inten & INT_STS_RXE_)) {
-		SMSC_TRACE(pdata, intr, "RX Error interrupt");
-		smsc911x_reg_write(pdata, INT_STS, INT_STS_RXE_);
-		serviced = IRQ_HANDLED;
-	}
-
-	if (likely(intsts & inten & INT_STS_RSFL_)) {
-		if (likely(napi_schedule_prep(&pdata->napi))) {
-			/* Disable Rx interrupts */
-			temp = smsc911x_reg_read(pdata, INT_EN);
-			temp &= (~INT_EN_RSFL_EN_);
-			smsc911x_reg_write(pdata, INT_EN, temp);
-			/* Schedule a NAPI poll */
-			__napi_schedule(&pdata->napi);
-		} else {
-			SMSC_WARN(pdata, rx_err, "napi_schedule_prep failed");
-		}
-		serviced = IRQ_HANDLED;
-	}
-
-	return serviced;
-}
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void smsc911x_poll_controller(struct net_device *dev)
 {
@@ -2291,16 +2323,14 @@
 	pdata = netdev_priv(dev);
 	BUG_ON(!pdata);
 	BUG_ON(!pdata->ioaddr);
-	BUG_ON(!dev->phydev);
+	WARN_ON(dev->phydev);
 
 	SMSC_TRACE(pdata, ifdown, "Stopping driver");
 
-	phy_disconnect(dev->phydev);
 	mdiobus_unregister(pdata->mii_bus);
 	mdiobus_free(pdata->mii_bus);
 
 	unregister_netdev(dev);
-	free_irq(dev->irq, dev);
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					   "smsc911x-memory");
 	if (!res)
@@ -2385,8 +2415,7 @@
 	struct smsc911x_data *pdata;
 	struct smsc911x_platform_config *config = dev_get_platdata(&pdev->dev);
 	struct resource *res;
-	unsigned int intcfg = 0;
-	int res_size, irq, irq_flags;
+	int res_size, irq;
 	int retval;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
@@ -2425,7 +2454,6 @@
 
 	pdata = netdev_priv(dev);
 	dev->irq = irq;
-	irq_flags = irq_get_trigger_type(irq);
 	pdata->ioaddr = ioremap_nocache(res->start, res_size);
 
 	pdata->dev = dev;
@@ -2472,41 +2500,21 @@
 	if (retval < 0)
 		goto out_disable_resources;
 
-	/* configure irq polarity and type before connecting isr */
-	if (pdata->config.irq_polarity == SMSC911X_IRQ_POLARITY_ACTIVE_HIGH)
-		intcfg |= INT_CFG_IRQ_POL_;
-
-	if (pdata->config.irq_type == SMSC911X_IRQ_TYPE_PUSH_PULL)
-		intcfg |= INT_CFG_IRQ_TYPE_;
-
-	smsc911x_reg_write(pdata, INT_CFG, intcfg);
-
-	/* Ensure interrupts are globally disabled before connecting ISR */
-	smsc911x_disable_irq_chip(dev);
-
-	retval = request_irq(dev->irq, smsc911x_irqhandler,
-			     irq_flags | IRQF_SHARED, dev->name, dev);
-	if (retval) {
-		SMSC_WARN(pdata, probe,
-			  "Unable to claim requested irq: %d", dev->irq);
-		goto out_disable_resources;
-	}
-
 	netif_carrier_off(dev);
 
-	retval = register_netdev(dev);
-	if (retval) {
-		SMSC_WARN(pdata, probe, "Error %i registering device", retval);
-		goto out_free_irq;
-	} else {
-		SMSC_TRACE(pdata, probe,
-			   "Network interface: \"%s\"", dev->name);
-	}
-
 	retval = smsc911x_mii_init(pdev, dev);
 	if (retval) {
 		SMSC_WARN(pdata, probe, "Error %i initialising mii", retval);
-		goto out_unregister_netdev_5;
+		goto out_disable_resources;
+	}
+
+	retval = register_netdev(dev);
+	if (retval) {
+		SMSC_WARN(pdata, probe, "Error %i registering device", retval);
+		goto out_disable_resources;
+	} else {
+		SMSC_TRACE(pdata, probe,
+			   "Network interface: \"%s\"", dev->name);
 	}
 
 	spin_lock_irq(&pdata->mac_lock);
@@ -2544,10 +2552,6 @@
 
 	return 0;
 
-out_unregister_netdev_5:
-	unregister_netdev(dev);
-out_free_irq:
-	free_irq(dev->irq, dev);
 out_disable_resources:
 	pm_runtime_put(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 8f06a66..c732b8c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -104,6 +104,18 @@
 	  device driver. This driver is used on for the STi series
 	  SOCs GMAC ethernet controller.
 
+config DWMAC_STM32
+	tristate "STM32 DWMAC support"
+	default ARCH_STM32
+	depends on OF && HAS_IOMEM
+	select MFD_SYSCON
+	---help---
+	  Support for ethernet controller on STM32 SOCs.
+
+	  This selects STM32 SoC glue layer support for the stmmac
+	  device driver. This driver is used on for the STM32 series
+	  SOCs GMAC ethernet controller.
+
 config DWMAC_SUNXI
 	tristate "Allwinner GMAC support"
 	default ARCH_SUNXI
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 44b630c..f0c9396 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -13,6 +13,7 @@
 obj-$(CONFIG_DWMAC_ROCKCHIP)	+= dwmac-rk.o
 obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-altr-socfpga.o
 obj-$(CONFIG_DWMAC_STI)		+= dwmac-sti.o
+obj-$(CONFIG_DWMAC_STM32)	+= dwmac-stm32.o
 obj-$(CONFIG_DWMAC_SUNXI)	+= dwmac-sunxi.o
 obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
 stmmac-platform-objs:= stmmac_platform.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 2533b91..d3292c4a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -30,7 +30,7 @@
 #include <linux/stmmac.h>
 #include <linux/phy.h>
 #include <linux/module.h>
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 #define STMMAC_VLAN_TAG_USED
 #include <linux/if_vlan.h>
 #endif
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 9210591..3740a44 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
+#include <linux/pm_runtime.h>
 
 #include "stmmac_platform.h"
 
@@ -301,6 +302,118 @@
 	.set_rmii_speed = rk3288_set_rmii_speed,
 };
 
+#define RK3366_GRF_SOC_CON6	0x0418
+#define RK3366_GRF_SOC_CON7	0x041c
+
+/* RK3366_GRF_SOC_CON6 */
+#define RK3366_GMAC_PHY_INTF_SEL_RGMII	(GRF_BIT(9) | GRF_CLR_BIT(10) | \
+					 GRF_CLR_BIT(11))
+#define RK3366_GMAC_PHY_INTF_SEL_RMII	(GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \
+					 GRF_BIT(11))
+#define RK3366_GMAC_FLOW_CTRL		GRF_BIT(8)
+#define RK3366_GMAC_FLOW_CTRL_CLR	GRF_CLR_BIT(8)
+#define RK3366_GMAC_SPEED_10M		GRF_CLR_BIT(7)
+#define RK3366_GMAC_SPEED_100M		GRF_BIT(7)
+#define RK3366_GMAC_RMII_CLK_25M	GRF_BIT(3)
+#define RK3366_GMAC_RMII_CLK_2_5M	GRF_CLR_BIT(3)
+#define RK3366_GMAC_CLK_125M		(GRF_CLR_BIT(4) | GRF_CLR_BIT(5))
+#define RK3366_GMAC_CLK_25M		(GRF_BIT(4) | GRF_BIT(5))
+#define RK3366_GMAC_CLK_2_5M		(GRF_CLR_BIT(4) | GRF_BIT(5))
+#define RK3366_GMAC_RMII_MODE		GRF_BIT(6)
+#define RK3366_GMAC_RMII_MODE_CLR	GRF_CLR_BIT(6)
+
+/* RK3366_GRF_SOC_CON7 */
+#define RK3366_GMAC_TXCLK_DLY_ENABLE	GRF_BIT(7)
+#define RK3366_GMAC_TXCLK_DLY_DISABLE	GRF_CLR_BIT(7)
+#define RK3366_GMAC_RXCLK_DLY_ENABLE	GRF_BIT(15)
+#define RK3366_GMAC_RXCLK_DLY_DISABLE	GRF_CLR_BIT(15)
+#define RK3366_GMAC_CLK_RX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3366_GMAC_CLK_TX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 0)
+
+static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv,
+				int tx_delay, int rx_delay)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+		     RK3366_GMAC_PHY_INTF_SEL_RGMII |
+		     RK3366_GMAC_RMII_MODE_CLR);
+	regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7,
+		     RK3366_GMAC_RXCLK_DLY_ENABLE |
+		     RK3366_GMAC_TXCLK_DLY_ENABLE |
+		     RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) |
+		     RK3366_GMAC_CLK_TX_DL_CFG(tx_delay));
+}
+
+static void rk3366_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+		     RK3366_GMAC_PHY_INTF_SEL_RMII | RK3366_GMAC_RMII_MODE);
+}
+
+static void rk3366_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	if (speed == 10)
+		regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+			     RK3366_GMAC_CLK_2_5M);
+	else if (speed == 100)
+		regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+			     RK3366_GMAC_CLK_25M);
+	else if (speed == 1000)
+		regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+			     RK3366_GMAC_CLK_125M);
+	else
+		dev_err(dev, "unknown speed value for RGMII! speed=%d", speed);
+}
+
+static void rk3366_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	if (speed == 10) {
+		regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+			     RK3366_GMAC_RMII_CLK_2_5M |
+			     RK3366_GMAC_SPEED_10M);
+	} else if (speed == 100) {
+		regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6,
+			     RK3366_GMAC_RMII_CLK_25M |
+			     RK3366_GMAC_SPEED_100M);
+	} else {
+		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+	}
+}
+
+static const struct rk_gmac_ops rk3366_ops = {
+	.set_to_rgmii = rk3366_set_to_rgmii,
+	.set_to_rmii = rk3366_set_to_rmii,
+	.set_rgmii_speed = rk3366_set_rgmii_speed,
+	.set_rmii_speed = rk3366_set_rmii_speed,
+};
+
 #define RK3368_GRF_SOC_CON15	0x043c
 #define RK3368_GRF_SOC_CON16	0x0440
 
@@ -413,6 +526,118 @@
 	.set_rmii_speed = rk3368_set_rmii_speed,
 };
 
+#define RK3399_GRF_SOC_CON5	0xc214
+#define RK3399_GRF_SOC_CON6	0xc218
+
+/* RK3399_GRF_SOC_CON5 */
+#define RK3399_GMAC_PHY_INTF_SEL_RGMII	(GRF_BIT(9) | GRF_CLR_BIT(10) | \
+					 GRF_CLR_BIT(11))
+#define RK3399_GMAC_PHY_INTF_SEL_RMII	(GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \
+					 GRF_BIT(11))
+#define RK3399_GMAC_FLOW_CTRL		GRF_BIT(8)
+#define RK3399_GMAC_FLOW_CTRL_CLR	GRF_CLR_BIT(8)
+#define RK3399_GMAC_SPEED_10M		GRF_CLR_BIT(7)
+#define RK3399_GMAC_SPEED_100M		GRF_BIT(7)
+#define RK3399_GMAC_RMII_CLK_25M	GRF_BIT(3)
+#define RK3399_GMAC_RMII_CLK_2_5M	GRF_CLR_BIT(3)
+#define RK3399_GMAC_CLK_125M		(GRF_CLR_BIT(4) | GRF_CLR_BIT(5))
+#define RK3399_GMAC_CLK_25M		(GRF_BIT(4) | GRF_BIT(5))
+#define RK3399_GMAC_CLK_2_5M		(GRF_CLR_BIT(4) | GRF_BIT(5))
+#define RK3399_GMAC_RMII_MODE		GRF_BIT(6)
+#define RK3399_GMAC_RMII_MODE_CLR	GRF_CLR_BIT(6)
+
+/* RK3399_GRF_SOC_CON6 */
+#define RK3399_GMAC_TXCLK_DLY_ENABLE	GRF_BIT(7)
+#define RK3399_GMAC_TXCLK_DLY_DISABLE	GRF_CLR_BIT(7)
+#define RK3399_GMAC_RXCLK_DLY_ENABLE	GRF_BIT(15)
+#define RK3399_GMAC_RXCLK_DLY_DISABLE	GRF_CLR_BIT(15)
+#define RK3399_GMAC_CLK_RX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 8)
+#define RK3399_GMAC_CLK_TX_DL_CFG(val)	HIWORD_UPDATE(val, 0x7F, 0)
+
+static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv,
+				int tx_delay, int rx_delay)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+		     RK3399_GMAC_PHY_INTF_SEL_RGMII |
+		     RK3399_GMAC_RMII_MODE_CLR);
+	regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6,
+		     RK3399_GMAC_RXCLK_DLY_ENABLE |
+		     RK3399_GMAC_TXCLK_DLY_ENABLE |
+		     RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) |
+		     RK3399_GMAC_CLK_TX_DL_CFG(tx_delay));
+}
+
+static void rk3399_set_to_rmii(struct rk_priv_data *bsp_priv)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+		     RK3399_GMAC_PHY_INTF_SEL_RMII | RK3399_GMAC_RMII_MODE);
+}
+
+static void rk3399_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	if (speed == 10)
+		regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+			     RK3399_GMAC_CLK_2_5M);
+	else if (speed == 100)
+		regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+			     RK3399_GMAC_CLK_25M);
+	else if (speed == 1000)
+		regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+			     RK3399_GMAC_CLK_125M);
+	else
+		dev_err(dev, "unknown speed value for RGMII! speed=%d", speed);
+}
+
+static void rk3399_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
+{
+	struct device *dev = &bsp_priv->pdev->dev;
+
+	if (IS_ERR(bsp_priv->grf)) {
+		dev_err(dev, "%s: Missing rockchip,grf property\n", __func__);
+		return;
+	}
+
+	if (speed == 10) {
+		regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+			     RK3399_GMAC_RMII_CLK_2_5M |
+			     RK3399_GMAC_SPEED_10M);
+	} else if (speed == 100) {
+		regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5,
+			     RK3399_GMAC_RMII_CLK_25M |
+			     RK3399_GMAC_SPEED_100M);
+	} else {
+		dev_err(dev, "unknown speed value for RMII! speed=%d", speed);
+	}
+}
+
+static const struct rk_gmac_ops rk3399_ops = {
+	.set_to_rgmii = rk3399_set_to_rgmii,
+	.set_to_rmii = rk3399_set_to_rmii,
+	.set_rgmii_speed = rk3399_set_rgmii_speed,
+	.set_rmii_speed = rk3399_set_rmii_speed,
+};
+
 static int gmac_clk_init(struct rk_priv_data *bsp_priv)
 {
 	struct device *dev = &bsp_priv->pdev->dev;
@@ -629,6 +854,16 @@
 							"rockchip,grf");
 	bsp_priv->pdev = pdev;
 
+	gmac_clk_init(bsp_priv);
+
+	return bsp_priv;
+}
+
+static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
+{
+	int ret;
+	struct device *dev = &bsp_priv->pdev->dev;
+
 	/*rmii or rgmii*/
 	if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) {
 		dev_info(dev, "init for RGMII\n");
@@ -641,15 +876,6 @@
 		dev_err(dev, "NO interface defined!\n");
 	}
 
-	gmac_clk_init(bsp_priv);
-
-	return bsp_priv;
-}
-
-static int rk_gmac_powerup(struct rk_priv_data *bsp_priv)
-{
-	int ret;
-
 	ret = phy_power_on(bsp_priv, true);
 	if (ret)
 		return ret;
@@ -658,11 +884,19 @@
 	if (ret)
 		return ret;
 
+	pm_runtime_enable(dev);
+	pm_runtime_get_sync(dev);
+
 	return 0;
 }
 
 static void rk_gmac_powerdown(struct rk_priv_data *gmac)
 {
+	struct device *dev = &gmac->pdev->dev;
+
+	pm_runtime_put_sync(dev);
+	pm_runtime_disable(dev);
+
 	phy_power_on(gmac, false);
 	gmac_clk_enable(gmac, false);
 }
@@ -760,7 +994,9 @@
 static const struct of_device_id rk_gmac_dwmac_match[] = {
 	{ .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
 	{ .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops },
+	{ .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops },
 	{ .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops },
+	{ .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
new file mode 100644
index 0000000..e5a926b
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -0,0 +1,194 @@
+/*
+ * dwmac-stm32.c - DWMAC Specific Glue layer for STM32 MCU
+ *
+ * Copyright (C) Alexandre Torgue 2015
+ * Author:  Alexandre Torgue <alexandre.torgue@gmail.com>
+ * License terms:  GNU General Public License (GPL), version 2
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/stmmac.h>
+
+#include "stmmac_platform.h"
+
+#define MII_PHY_SEL_MASK	BIT(23)
+
+struct stm32_dwmac {
+	struct clk *clk_tx;
+	struct clk *clk_rx;
+	u32 mode_reg;		/* MAC glue-logic mode register */
+	struct regmap *regmap;
+	u32 speed;
+};
+
+static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat)
+{
+	struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
+	u32 reg = dwmac->mode_reg;
+	u32 val;
+	int ret;
+
+	val = (plat_dat->interface == PHY_INTERFACE_MODE_MII) ? 0 : 1;
+	ret = regmap_update_bits(dwmac->regmap, reg, MII_PHY_SEL_MASK, val);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(dwmac->clk_tx);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(dwmac->clk_rx);
+	if (ret)
+		clk_disable_unprepare(dwmac->clk_tx);
+
+	return ret;
+}
+
+static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac)
+{
+	clk_disable_unprepare(dwmac->clk_tx);
+	clk_disable_unprepare(dwmac->clk_rx);
+}
+
+static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac,
+				  struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	int err;
+
+	/*  Get TX/RX clocks */
+	dwmac->clk_tx = devm_clk_get(dev, "mac-clk-tx");
+	if (IS_ERR(dwmac->clk_tx)) {
+		dev_err(dev, "No tx clock provided...\n");
+		return PTR_ERR(dwmac->clk_tx);
+	}
+	dwmac->clk_rx = devm_clk_get(dev, "mac-clk-rx");
+	if (IS_ERR(dwmac->clk_rx)) {
+		dev_err(dev, "No rx clock provided...\n");
+		return PTR_ERR(dwmac->clk_rx);
+	}
+
+	/* Get mode register */
+	dwmac->regmap = syscon_regmap_lookup_by_phandle(np, "st,syscon");
+	if (IS_ERR(dwmac->regmap))
+		return PTR_ERR(dwmac->regmap);
+
+	err = of_property_read_u32_index(np, "st,syscon", 1, &dwmac->mode_reg);
+	if (err)
+		dev_err(dev, "Can't get sysconfig mode offset (%d)\n", err);
+
+	return err;
+}
+
+static int stm32_dwmac_probe(struct platform_device *pdev)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	struct stm32_dwmac *dwmac;
+	int ret;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
+
+	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+	if (!dwmac)
+		return -ENOMEM;
+
+	ret = stm32_dwmac_parse_data(dwmac, &pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to parse OF data\n");
+		return ret;
+	}
+
+	plat_dat->bsp_priv = dwmac;
+
+	ret = stm32_dwmac_init(plat_dat);
+	if (ret)
+		return ret;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		stm32_dwmac_clk_disable(dwmac);
+
+	return ret;
+}
+
+static int stm32_dwmac_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	int ret = stmmac_dvr_remove(&pdev->dev);
+
+	stm32_dwmac_clk_disable(priv->plat->bsp_priv);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int stm32_dwmac_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = stmmac_suspend(dev);
+	stm32_dwmac_clk_disable(priv->plat->bsp_priv);
+
+	return ret;
+}
+
+static int stm32_dwmac_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct stmmac_priv *priv = netdev_priv(ndev);
+	int ret;
+
+	ret = stm32_dwmac_init(priv->plat);
+	if (ret)
+		return ret;
+
+	ret = stmmac_resume(dev);
+
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops,
+	stm32_dwmac_suspend, stm32_dwmac_resume);
+
+static const struct of_device_id stm32_dwmac_match[] = {
+	{ .compatible = "st,stm32-dwmac"},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, stm32_dwmac_match);
+
+static struct platform_driver stm32_dwmac_driver = {
+	.probe  = stm32_dwmac_probe,
+	.remove = stm32_dwmac_remove,
+	.driver = {
+		.name           = "stm32-dwmac",
+		.pm		= &stm32_dwmac_pm_ops,
+		.of_match_table = stm32_dwmac_match,
+	},
+};
+module_platform_driver(stm32_dwmac_driver);
+
+MODULE_AUTHOR("Alexandre Torgue <alexandre.torgue@gmail.com>");
+MODULE_DESCRIPTION("STMicroelectronics MCU DWMAC Specific Glue layer");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index cbefe9e..7df4ff1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -145,7 +145,7 @@
 		numhashregs = 8;
 		break;
 	default:
-		pr_debug("STMMAC: err in setting mulitcast filter\n");
+		pr_debug("STMMAC: err in setting multicast filter\n");
 		return;
 		break;
 	}
@@ -261,7 +261,7 @@
 	}
 	if (mode & WAKE_UCAST) {
 		pr_debug("GMAC: WOL on global unicast\n");
-		pmt |= global_unicast;
+		pmt |= power_down | global_unicast | wake_up_frame_en;
 	}
 
 	writel(pmt, ioaddr + GMAC_PMT);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index df5580d..51019b7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -102,7 +102,7 @@
 	}
 	if (mode & WAKE_UCAST) {
 		pr_debug("GMAC: WOL on global unicast\n");
-		pmt |= global_unicast;
+		pmt |= power_down | global_unicast | wake_up_frame_en;
 	}
 
 	writel(pmt, ioaddr + GMAC_PMT);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 756bb54..0a0d6a8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -265,6 +265,7 @@
 	 * once needed on other platforms.
 	 */
 	if (of_device_is_compatible(np, "st,spear600-gmac") ||
+		of_device_is_compatible(np, "snps,dwmac-3.50a") ||
 		of_device_is_compatible(np, "snps,dwmac-3.70a") ||
 		of_device_is_compatible(np, "snps,dwmac")) {
 		/* Note that the max-frame-size parameter as defined in the
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
index 170a18b..6e3b829 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
@@ -187,7 +187,7 @@
 	if (IS_ERR(priv->ptp_clock)) {
 		priv->ptp_clock = NULL;
 		pr_err("ptp_clock_register() failed on %s\n", priv->dev->name);
-	} else
+	} else if (priv->ptp_clock)
 		pr_debug("Added PTP HW clock successfully on %s\n",
 			 priv->dev->name);
 
diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
index 9f159a7..0d00531 100644
--- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c
+++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
@@ -1246,7 +1246,7 @@
 	lp->mii_bus->read  = &dwceqos_mdio_read;
 	lp->mii_bus->write = &dwceqos_mdio_write;
 	lp->mii_bus->priv = lp;
-	lp->mii_bus->parent = &lp->ndev->dev;
+	lp->mii_bus->parent = &lp->pdev->dev;
 
 	of_address_to_resource(lp->pdev->dev.of_node, 0, &res);
 	snprintf(lp->mii_bus->id, MII_BUS_ID_SIZE, "%.8llx",
@@ -1622,13 +1622,7 @@
 		DWCEQOS_MMC_CTRL_RSTONRD);
 	dwceqos_enable_mmc_interrupt(lp);
 
-	/* Enable Interrupts */
-	dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE,
-		      DWCEQOS_DMA_CH0_IE_NIE |
-		      DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE |
-		      DWCEQOS_DMA_CH0_IE_AIE |
-		      DWCEQOS_DMA_CH0_IE_FBEE);
-
+	dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE, 0);
 	dwceqos_write(lp, REG_DWCEQOS_MAC_IE, 0);
 
 	dwceqos_write(lp, REG_DWCEQOS_MAC_CFG, DWCEQOS_MAC_CFG_IPC |
@@ -1905,6 +1899,15 @@
 	netif_start_queue(ndev);
 	tasklet_enable(&lp->tx_bdreclaim_tasklet);
 
+	/* Enable Interrupts -- do this only after we enable NAPI and the
+	 * tasklet.
+	 */
+	dwceqos_write(lp, REG_DWCEQOS_DMA_CH0_IE,
+		      DWCEQOS_DMA_CH0_IE_NIE |
+		      DWCEQOS_DMA_CH0_IE_RIE | DWCEQOS_DMA_CH0_IE_TIE |
+		      DWCEQOS_DMA_CH0_IE_AIE |
+		      DWCEQOS_DMA_CH0_IE_FBEE);
+
 	return 0;
 }
 
@@ -2740,7 +2743,7 @@
 	lp->msg_enable = msglevel;
 }
 
-static struct ethtool_ops dwceqos_ethtool_ops = {
+static const struct ethtool_ops dwceqos_ethtool_ops = {
 	.get_drvinfo    = dwceqos_get_drvinfo,
 	.get_link       = ethtool_op_get_link,
 	.get_pauseparam = dwceqos_get_pauseparam,
@@ -2758,7 +2761,7 @@
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
-static struct net_device_ops netdev_ops = {
+static const struct net_device_ops netdev_ops = {
 	.ndo_open		= dwceqos_open,
 	.ndo_stop		= dwceqos_stop,
 	.ndo_start_xmit		= dwceqos_start_xmit,
@@ -2850,25 +2853,17 @@
 
 	ndev->features = ndev->hw_features;
 
-	netif_napi_add(ndev, &lp->napi, dwceqos_rx_poll, NAPI_POLL_WEIGHT);
-
-	ret = register_netdev(ndev);
-	if (ret) {
-		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-		goto err_out_clk_dis_aper;
-	}
-
 	lp->phy_ref_clk = devm_clk_get(&pdev->dev, "phy_ref_clk");
 	if (IS_ERR(lp->phy_ref_clk)) {
 		dev_err(&pdev->dev, "phy_ref_clk clock not found.\n");
 		ret = PTR_ERR(lp->phy_ref_clk);
-		goto err_out_unregister_netdev;
+		goto err_out_clk_dis_aper;
 	}
 
 	ret = clk_prepare_enable(lp->phy_ref_clk);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to enable device clock.\n");
-		goto err_out_unregister_netdev;
+		goto err_out_clk_dis_aper;
 	}
 
 	lp->phy_node = of_parse_phandle(lp->pdev->dev.of_node,
@@ -2877,7 +2872,7 @@
 		ret = of_phy_register_fixed_link(lp->pdev->dev.of_node);
 		if (ret < 0) {
 			dev_err(&pdev->dev, "invalid fixed-link");
-			goto err_out_unregister_clk_notifier;
+			goto err_out_clk_dis_phy;
 		}
 
 		lp->phy_node = of_node_get(lp->pdev->dev.of_node);
@@ -2886,7 +2881,7 @@
 	ret = of_get_phy_mode(lp->pdev->dev.of_node);
 	if (ret < 0) {
 		dev_err(&lp->pdev->dev, "error in getting phy i/f\n");
-		goto err_out_unregister_clk_notifier;
+		goto err_out_clk_dis_phy;
 	}
 
 	lp->phy_interface = ret;
@@ -2894,14 +2889,14 @@
 	ret = dwceqos_mii_init(lp);
 	if (ret) {
 		dev_err(&lp->pdev->dev, "error in dwceqos_mii_init\n");
-		goto err_out_unregister_clk_notifier;
+		goto err_out_clk_dis_phy;
 	}
 
 	ret = dwceqos_mii_probe(ndev);
 	if (ret != 0) {
 		netdev_err(ndev, "mii_probe fail.\n");
 		ret = -ENXIO;
-		goto err_out_unregister_clk_notifier;
+		goto err_out_clk_dis_phy;
 	}
 
 	dwceqos_set_umac_addr(lp, lp->ndev->dev_addr, 0);
@@ -2919,7 +2914,7 @@
 	if (ret) {
 		dev_err(&lp->pdev->dev, "Unable to retrieve DT, error %d\n",
 			ret);
-		goto err_out_unregister_clk_notifier;
+		goto err_out_clk_dis_phy;
 	}
 	dev_info(&lp->pdev->dev, "pdev->id %d, baseaddr 0x%08lx, irq %d\n",
 		 pdev->id, ndev->base_addr, ndev->irq);
@@ -2929,18 +2924,24 @@
 	if (ret) {
 		dev_err(&lp->pdev->dev, "Unable to request IRQ %d, error %d\n",
 			ndev->irq, ret);
-		goto err_out_unregister_clk_notifier;
+		goto err_out_clk_dis_phy;
 	}
 
 	if (netif_msg_probe(lp))
 		netdev_dbg(ndev, "net_local@%p\n", lp);
 
+	netif_napi_add(ndev, &lp->napi, dwceqos_rx_poll, NAPI_POLL_WEIGHT);
+
+	ret = register_netdev(ndev);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
+			goto err_out_clk_dis_phy;
+	}
+
 	return 0;
 
-err_out_unregister_clk_notifier:
+err_out_clk_dis_phy:
 	clk_disable_unprepare(lp->phy_ref_clk);
-err_out_unregister_netdev:
-	unregister_netdev(ndev);
 err_out_clk_dis_aper:
 	clk_disable_unprepare(lp->apb_pclk);
 err_out_free_netdev:
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 7452b5f..7108c68 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -1987,7 +1987,7 @@
 	if ((readl(nic->regs + FPGA_VER) & 0xFFF) >= 378) {
 		err = pci_enable_msi(pdev);
 		if (err)
-			pr_err("Can't eneble msi. error is %d\n", err);
+			pr_err("Can't enable msi. error is %d\n", err);
 		else
 			nic->irq_type = IRQ_MSI;
 	} else
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index d300d53..fa0cfda 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -546,7 +546,8 @@
 
 static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	int queue, len;
+	int queue;
+	unsigned int len;
 	struct cpmac_desc *desc;
 	struct cpmac_priv *priv = netdev_priv(dev);
 
@@ -556,7 +557,7 @@
 	if (unlikely(skb_padto(skb, ETH_ZLEN)))
 		return NETDEV_TX_OK;
 
-	len = max(skb->len, ETH_ZLEN);
+	len = max_t(unsigned int, skb->len, ETH_ZLEN);
 	queue = skb_get_queue_mapping(skb);
 	netif_stop_subqueue(dev, queue);
 
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c51f346..c6cff3d 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -124,7 +124,7 @@
 
 #define RX_PRIORITY_MAPPING	0x76543210
 #define TX_PRIORITY_MAPPING	0x33221100
-#define CPDMA_TX_PRIORITY_MAP	0x76543210
+#define CPDMA_TX_PRIORITY_MAP	0x01234567
 
 #define CPSW_VLAN_AWARE		BIT(1)
 #define CPSW_ALE_VLAN_AWARE	1
@@ -140,9 +140,11 @@
 #define CPSW_CMINTMAX_INTVL	(1000 / CPSW_CMINTMIN_CNT)
 #define CPSW_CMINTMIN_INTVL	((1000 / CPSW_CMINTMAX_CNT) + 1)
 
-#define cpsw_slave_index(priv)				\
-		((priv->data.dual_emac) ? priv->emac_port :	\
-		priv->data.active_slave)
+#define cpsw_slave_index(cpsw, priv)				\
+		((cpsw->data.dual_emac) ? priv->emac_port :	\
+		cpsw->data.active_slave)
+#define IRQ_NUM			2
+#define CPSW_MAX_QUEUES		8
 
 static int debug_level;
 module_param(debug_level, int, 0);
@@ -363,38 +365,41 @@
 	__raw_writel(val, slave->regs + offset);
 }
 
-struct cpsw_priv {
-	struct platform_device		*pdev;
-	struct net_device		*ndev;
-	struct napi_struct		napi_rx;
-	struct napi_struct		napi_tx;
+struct cpsw_common {
 	struct device			*dev;
 	struct cpsw_platform_data	data;
+	struct napi_struct		napi_rx;
+	struct napi_struct		napi_tx;
 	struct cpsw_ss_regs __iomem	*regs;
 	struct cpsw_wr_regs __iomem	*wr_regs;
 	u8 __iomem			*hw_stats;
 	struct cpsw_host_regs __iomem	*host_port_regs;
-	u32				msg_enable;
 	u32				version;
 	u32				coal_intvl;
 	u32				bus_freq_mhz;
 	int				rx_packet_max;
-	struct clk			*clk;
-	u8				mac_addr[ETH_ALEN];
 	struct cpsw_slave		*slaves;
 	struct cpdma_ctlr		*dma;
-	struct cpdma_chan		*txch, *rxch;
+	struct cpdma_chan		*txch[CPSW_MAX_QUEUES];
+	struct cpdma_chan		*rxch[CPSW_MAX_QUEUES];
 	struct cpsw_ale			*ale;
-	bool				rx_pause;
-	bool				tx_pause;
 	bool				quirk_irq;
 	bool				rx_irq_disabled;
 	bool				tx_irq_disabled;
-	/* snapshot of IRQ numbers */
-	u32 irqs_table[4];
-	u32 num_irqs;
-	struct cpts *cpts;
+	u32 irqs_table[IRQ_NUM];
+	struct cpts			*cpts;
+	int				rx_ch_num, tx_ch_num;
+};
+
+struct cpsw_priv {
+	struct net_device		*ndev;
+	struct device			*dev;
+	u32				msg_enable;
+	u8				mac_addr[ETH_ALEN];
+	bool				rx_pause;
+	bool				tx_pause;
 	u32 emac_port;
+	struct cpsw_common *cpsw;
 };
 
 struct cpsw_stats {
@@ -455,108 +460,92 @@
 	{ "Rx Start of Frame Overruns", CPSW_STAT(rxsofoverruns) },
 	{ "Rx Middle of Frame Overruns", CPSW_STAT(rxmofoverruns) },
 	{ "Rx DMA Overruns", CPSW_STAT(rxdmaoverruns) },
-	{ "Rx DMA chan: head_enqueue", CPDMA_RX_STAT(head_enqueue) },
-	{ "Rx DMA chan: tail_enqueue", CPDMA_RX_STAT(tail_enqueue) },
-	{ "Rx DMA chan: pad_enqueue", CPDMA_RX_STAT(pad_enqueue) },
-	{ "Rx DMA chan: misqueued", CPDMA_RX_STAT(misqueued) },
-	{ "Rx DMA chan: desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) },
-	{ "Rx DMA chan: pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) },
-	{ "Rx DMA chan: runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) },
-	{ "Rx DMA chan: runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) },
-	{ "Rx DMA chan: empty_dequeue", CPDMA_RX_STAT(empty_dequeue) },
-	{ "Rx DMA chan: busy_dequeue", CPDMA_RX_STAT(busy_dequeue) },
-	{ "Rx DMA chan: good_dequeue", CPDMA_RX_STAT(good_dequeue) },
-	{ "Rx DMA chan: requeue", CPDMA_RX_STAT(requeue) },
-	{ "Rx DMA chan: teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) },
-	{ "Tx DMA chan: head_enqueue", CPDMA_TX_STAT(head_enqueue) },
-	{ "Tx DMA chan: tail_enqueue", CPDMA_TX_STAT(tail_enqueue) },
-	{ "Tx DMA chan: pad_enqueue", CPDMA_TX_STAT(pad_enqueue) },
-	{ "Tx DMA chan: misqueued", CPDMA_TX_STAT(misqueued) },
-	{ "Tx DMA chan: desc_alloc_fail", CPDMA_TX_STAT(desc_alloc_fail) },
-	{ "Tx DMA chan: pad_alloc_fail", CPDMA_TX_STAT(pad_alloc_fail) },
-	{ "Tx DMA chan: runt_receive_buf", CPDMA_TX_STAT(runt_receive_buff) },
-	{ "Tx DMA chan: runt_transmit_buf", CPDMA_TX_STAT(runt_transmit_buff) },
-	{ "Tx DMA chan: empty_dequeue", CPDMA_TX_STAT(empty_dequeue) },
-	{ "Tx DMA chan: busy_dequeue", CPDMA_TX_STAT(busy_dequeue) },
-	{ "Tx DMA chan: good_dequeue", CPDMA_TX_STAT(good_dequeue) },
-	{ "Tx DMA chan: requeue", CPDMA_TX_STAT(requeue) },
-	{ "Tx DMA chan: teardown_dequeue", CPDMA_TX_STAT(teardown_dequeue) },
 };
 
-#define CPSW_STATS_LEN	ARRAY_SIZE(cpsw_gstrings_stats)
+static const struct cpsw_stats cpsw_gstrings_ch_stats[] = {
+	{ "head_enqueue", CPDMA_RX_STAT(head_enqueue) },
+	{ "tail_enqueue", CPDMA_RX_STAT(tail_enqueue) },
+	{ "pad_enqueue", CPDMA_RX_STAT(pad_enqueue) },
+	{ "misqueued", CPDMA_RX_STAT(misqueued) },
+	{ "desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) },
+	{ "pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) },
+	{ "runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) },
+	{ "runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) },
+	{ "empty_dequeue", CPDMA_RX_STAT(empty_dequeue) },
+	{ "busy_dequeue", CPDMA_RX_STAT(busy_dequeue) },
+	{ "good_dequeue", CPDMA_RX_STAT(good_dequeue) },
+	{ "requeue", CPDMA_RX_STAT(requeue) },
+	{ "teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) },
+};
 
-#define napi_to_priv(napi)	container_of(napi, struct cpsw_priv, napi)
+#define CPSW_STATS_COMMON_LEN	ARRAY_SIZE(cpsw_gstrings_stats)
+#define CPSW_STATS_CH_LEN	ARRAY_SIZE(cpsw_gstrings_ch_stats)
+
+#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw)
+#define napi_to_cpsw(napi)	container_of(napi, struct cpsw_common, napi)
 #define for_each_slave(priv, func, arg...)				\
 	do {								\
 		struct cpsw_slave *slave;				\
+		struct cpsw_common *cpsw = (priv)->cpsw;		\
 		int n;							\
-		if (priv->data.dual_emac)				\
-			(func)((priv)->slaves + priv->emac_port, ##arg);\
+		if (cpsw->data.dual_emac)				\
+			(func)((cpsw)->slaves + priv->emac_port, ##arg);\
 		else							\
-			for (n = (priv)->data.slaves,			\
-					slave = (priv)->slaves;		\
+			for (n = cpsw->data.slaves,			\
+					slave = cpsw->slaves;		\
 					n; n--)				\
 				(func)(slave++, ##arg);			\
 	} while (0)
-#define cpsw_get_slave_ndev(priv, __slave_no__)				\
-	((__slave_no__ < priv->data.slaves) ?				\
-		priv->slaves[__slave_no__].ndev : NULL)
-#define cpsw_get_slave_priv(priv, __slave_no__)				\
-	(((__slave_no__ < priv->data.slaves) &&				\
-		(priv->slaves[__slave_no__].ndev)) ?			\
-		netdev_priv(priv->slaves[__slave_no__].ndev) : NULL)	\
 
-#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb)		\
+#define cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb)		\
 	do {								\
-		if (!priv->data.dual_emac)				\
+		if (!cpsw->data.dual_emac)				\
 			break;						\
 		if (CPDMA_RX_SOURCE_PORT(status) == 1) {		\
-			ndev = cpsw_get_slave_ndev(priv, 0);		\
-			priv = netdev_priv(ndev);			\
+			ndev = cpsw->slaves[0].ndev;			\
 			skb->dev = ndev;				\
 		} else if (CPDMA_RX_SOURCE_PORT(status) == 2) {		\
-			ndev = cpsw_get_slave_ndev(priv, 1);		\
-			priv = netdev_priv(ndev);			\
+			ndev = cpsw->slaves[1].ndev;			\
 			skb->dev = ndev;				\
 		}							\
 	} while (0)
-#define cpsw_add_mcast(priv, addr)					\
+#define cpsw_add_mcast(cpsw, priv, addr)				\
 	do {								\
-		if (priv->data.dual_emac) {				\
-			struct cpsw_slave *slave = priv->slaves +	\
+		if (cpsw->data.dual_emac) {				\
+			struct cpsw_slave *slave = cpsw->slaves +	\
 						priv->emac_port;	\
-			int slave_port = cpsw_get_slave_port(priv,	\
+			int slave_port = cpsw_get_slave_port(		\
 						slave->slave_num);	\
-			cpsw_ale_add_mcast(priv->ale, addr,		\
+			cpsw_ale_add_mcast(cpsw->ale, addr,		\
 				1 << slave_port | ALE_PORT_HOST,	\
 				ALE_VLAN, slave->port_vlan, 0);		\
 		} else {						\
-			cpsw_ale_add_mcast(priv->ale, addr,		\
+			cpsw_ale_add_mcast(cpsw->ale, addr,		\
 				ALE_ALL_PORTS,				\
 				0, 0, 0);				\
 		}							\
 	} while (0)
 
-static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num)
+static inline int cpsw_get_slave_port(u32 slave_num)
 {
 	return slave_num + 1;
 }
 
 static void cpsw_set_promiscious(struct net_device *ndev, bool enable)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpsw_ale *ale = priv->ale;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	struct cpsw_ale *ale = cpsw->ale;
 	int i;
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		bool flag = false;
 
 		/* Enabling promiscuous mode for one interface will be
 		 * common for both the interface as the interface shares
 		 * the same hardware resource.
 		 */
-		for (i = 0; i < priv->data.slaves; i++)
-			if (priv->slaves[i].ndev->flags & IFF_PROMISC)
+		for (i = 0; i < cpsw->data.slaves; i++)
+			if (cpsw->slaves[i].ndev->flags & IFF_PROMISC)
 				flag = true;
 
 		if (!enable && flag) {
@@ -579,7 +568,7 @@
 			unsigned long timeout = jiffies + HZ;
 
 			/* Disable Learn for all ports (host is port 0 and slaves are port 1 and up */
-			for (i = 0; i <= priv->data.slaves; i++) {
+			for (i = 0; i <= cpsw->data.slaves; i++) {
 				cpsw_ale_control_set(ale, i,
 						     ALE_PORT_NOLEARN, 1);
 				cpsw_ale_control_set(ale, i,
@@ -606,7 +595,7 @@
 			cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0);
 
 			/* Enable Learn for all ports (host is port 0 and slaves are port 1 and up */
-			for (i = 0; i <= priv->data.slaves; i++) {
+			for (i = 0; i <= cpsw->data.slaves; i++) {
 				cpsw_ale_control_set(ale, i,
 						     ALE_PORT_NOLEARN, 0);
 				cpsw_ale_control_set(ale, i,
@@ -620,17 +609,18 @@
 static void cpsw_ndo_set_rx_mode(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int vid;
 
-	if (priv->data.dual_emac)
-		vid = priv->slaves[priv->emac_port].port_vlan;
+	if (cpsw->data.dual_emac)
+		vid = cpsw->slaves[priv->emac_port].port_vlan;
 	else
-		vid = priv->data.default_vlan;
+		vid = cpsw->data.default_vlan;
 
 	if (ndev->flags & IFF_PROMISC) {
 		/* Enable promiscuous mode */
 		cpsw_set_promiscious(ndev, true);
-		cpsw_ale_set_allmulti(priv->ale, IFF_ALLMULTI);
+		cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI);
 		return;
 	} else {
 		/* Disable promiscuous mode */
@@ -638,51 +628,54 @@
 	}
 
 	/* Restore allmulti on vlans if necessary */
-	cpsw_ale_set_allmulti(priv->ale, priv->ndev->flags & IFF_ALLMULTI);
+	cpsw_ale_set_allmulti(cpsw->ale, priv->ndev->flags & IFF_ALLMULTI);
 
 	/* Clear all mcast from ALE */
-	cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS, vid);
+	cpsw_ale_flush_multicast(cpsw->ale, ALE_ALL_PORTS, vid);
 
 	if (!netdev_mc_empty(ndev)) {
 		struct netdev_hw_addr *ha;
 
 		/* program multicast address list into ALE register */
 		netdev_for_each_mc_addr(ha, ndev) {
-			cpsw_add_mcast(priv, (u8 *)ha->addr);
+			cpsw_add_mcast(cpsw, priv, (u8 *)ha->addr);
 		}
 	}
 }
 
-static void cpsw_intr_enable(struct cpsw_priv *priv)
+static void cpsw_intr_enable(struct cpsw_common *cpsw)
 {
-	__raw_writel(0xFF, &priv->wr_regs->tx_en);
-	__raw_writel(0xFF, &priv->wr_regs->rx_en);
+	__raw_writel(0xFF, &cpsw->wr_regs->tx_en);
+	__raw_writel(0xFF, &cpsw->wr_regs->rx_en);
 
-	cpdma_ctlr_int_ctrl(priv->dma, true);
+	cpdma_ctlr_int_ctrl(cpsw->dma, true);
 	return;
 }
 
-static void cpsw_intr_disable(struct cpsw_priv *priv)
+static void cpsw_intr_disable(struct cpsw_common *cpsw)
 {
-	__raw_writel(0, &priv->wr_regs->tx_en);
-	__raw_writel(0, &priv->wr_regs->rx_en);
+	__raw_writel(0, &cpsw->wr_regs->tx_en);
+	__raw_writel(0, &cpsw->wr_regs->rx_en);
 
-	cpdma_ctlr_int_ctrl(priv->dma, false);
+	cpdma_ctlr_int_ctrl(cpsw->dma, false);
 	return;
 }
 
 static void cpsw_tx_handler(void *token, int len, int status)
 {
+	struct netdev_queue	*txq;
 	struct sk_buff		*skb = token;
 	struct net_device	*ndev = skb->dev;
-	struct cpsw_priv	*priv = netdev_priv(ndev);
+	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
 
 	/* Check whether the queue is stopped due to stalled tx dma, if the
 	 * queue is stopped then start the queue as we have free desc for tx
 	 */
-	if (unlikely(netif_queue_stopped(ndev)))
-		netif_wake_queue(ndev);
-	cpts_tx_timestamp(priv->cpts, skb);
+	txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb));
+	if (unlikely(netif_tx_queue_stopped(txq)))
+		netif_tx_wake_queue(txq);
+
+	cpts_tx_timestamp(cpsw->cpts, skb);
 	ndev->stats.tx_packets++;
 	ndev->stats.tx_bytes += len;
 	dev_kfree_skb_any(skb);
@@ -690,22 +683,23 @@
 
 static void cpsw_rx_handler(void *token, int len, int status)
 {
+	struct cpdma_chan	*ch;
 	struct sk_buff		*skb = token;
 	struct sk_buff		*new_skb;
 	struct net_device	*ndev = skb->dev;
-	struct cpsw_priv	*priv = netdev_priv(ndev);
 	int			ret = 0;
+	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
 
-	cpsw_dual_emac_src_port_detect(status, priv, ndev, skb);
+	cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb);
 
 	if (unlikely(status < 0) || unlikely(!netif_running(ndev))) {
 		bool ndev_status = false;
-		struct cpsw_slave *slave = priv->slaves;
+		struct cpsw_slave *slave = cpsw->slaves;
 		int n;
 
-		if (priv->data.dual_emac) {
+		if (cpsw->data.dual_emac) {
 			/* In dual emac mode check for all interfaces */
-			for (n = priv->data.slaves; n; n--, slave++)
+			for (n = cpsw->data.slaves; n; n--, slave++)
 				if (netif_running(slave->ndev))
 					ndev_status = true;
 		}
@@ -726,96 +720,132 @@
 		return;
 	}
 
-	new_skb = netdev_alloc_skb_ip_align(ndev, priv->rx_packet_max);
+	new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max);
 	if (new_skb) {
+		skb_copy_queue_mapping(new_skb, skb);
 		skb_put(skb, len);
-		cpts_rx_timestamp(priv->cpts, skb);
+		cpts_rx_timestamp(cpsw->cpts, skb);
 		skb->protocol = eth_type_trans(skb, ndev);
 		netif_receive_skb(skb);
 		ndev->stats.rx_bytes += len;
 		ndev->stats.rx_packets++;
+		kmemleak_not_leak(new_skb);
 	} else {
 		ndev->stats.rx_dropped++;
 		new_skb = skb;
 	}
 
 requeue:
-	ret = cpdma_chan_submit(priv->rxch, new_skb, new_skb->data,
-			skb_tailroom(new_skb), 0);
+	if (netif_dormant(ndev)) {
+		dev_kfree_skb_any(new_skb);
+		return;
+	}
+
+	ch = cpsw->rxch[skb_get_queue_mapping(new_skb)];
+	ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
+				skb_tailroom(new_skb), 0);
 	if (WARN_ON(ret < 0))
 		dev_kfree_skb_any(new_skb);
 }
 
 static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id)
 {
-	struct cpsw_priv *priv = dev_id;
+	struct cpsw_common *cpsw = dev_id;
 
-	writel(0, &priv->wr_regs->tx_en);
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
+	writel(0, &cpsw->wr_regs->tx_en);
+	cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_TX);
 
-	if (priv->quirk_irq) {
-		disable_irq_nosync(priv->irqs_table[1]);
-		priv->tx_irq_disabled = true;
+	if (cpsw->quirk_irq) {
+		disable_irq_nosync(cpsw->irqs_table[1]);
+		cpsw->tx_irq_disabled = true;
 	}
 
-	napi_schedule(&priv->napi_tx);
+	napi_schedule(&cpsw->napi_tx);
 	return IRQ_HANDLED;
 }
 
 static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
 {
-	struct cpsw_priv *priv = dev_id;
+	struct cpsw_common *cpsw = dev_id;
 
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
-	writel(0, &priv->wr_regs->rx_en);
+	cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX);
+	writel(0, &cpsw->wr_regs->rx_en);
 
-	if (priv->quirk_irq) {
-		disable_irq_nosync(priv->irqs_table[0]);
-		priv->rx_irq_disabled = true;
+	if (cpsw->quirk_irq) {
+		disable_irq_nosync(cpsw->irqs_table[0]);
+		cpsw->rx_irq_disabled = true;
 	}
 
-	napi_schedule(&priv->napi_rx);
+	napi_schedule(&cpsw->napi_rx);
 	return IRQ_HANDLED;
 }
 
 static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 {
-	struct cpsw_priv	*priv = napi_to_priv(napi_tx);
-	int			num_tx;
+	u32			ch_map;
+	int			num_tx, ch;
+	struct cpsw_common	*cpsw = napi_to_cpsw(napi_tx);
 
-	num_tx = cpdma_chan_process(priv->txch, budget);
-	if (num_tx < budget) {
-		napi_complete(napi_tx);
-		writel(0xff, &priv->wr_regs->tx_en);
-		if (priv->quirk_irq && priv->tx_irq_disabled) {
-			priv->tx_irq_disabled = false;
-			enable_irq(priv->irqs_table[1]);
+	/* process every unprocessed channel */
+	ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
+	for (ch = 0, num_tx = 0; num_tx < budget; ch_map >>= 1, ch++) {
+		if (!ch_map) {
+			ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
+			if (!ch_map)
+				break;
+
+			ch = 0;
 		}
+
+		if (!(ch_map & 0x01))
+			continue;
+
+		num_tx += cpdma_chan_process(cpsw->txch[ch], budget - num_tx);
 	}
 
-	if (num_tx)
-		cpsw_dbg(priv, intr, "poll %d tx pkts\n", num_tx);
+	if (num_tx < budget) {
+		napi_complete(napi_tx);
+		writel(0xff, &cpsw->wr_regs->tx_en);
+		if (cpsw->quirk_irq && cpsw->tx_irq_disabled) {
+			cpsw->tx_irq_disabled = false;
+			enable_irq(cpsw->irqs_table[1]);
+		}
+	}
 
 	return num_tx;
 }
 
 static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
 {
-	struct cpsw_priv	*priv = napi_to_priv(napi_rx);
-	int			num_rx;
+	u32			ch_map;
+	int			num_rx, ch;
+	struct cpsw_common	*cpsw = napi_to_cpsw(napi_rx);
 
-	num_rx = cpdma_chan_process(priv->rxch, budget);
-	if (num_rx < budget) {
-		napi_complete(napi_rx);
-		writel(0xff, &priv->wr_regs->rx_en);
-		if (priv->quirk_irq && priv->rx_irq_disabled) {
-			priv->rx_irq_disabled = false;
-			enable_irq(priv->irqs_table[0]);
+	/* process every unprocessed channel */
+	ch_map = cpdma_ctrl_rxchs_state(cpsw->dma);
+	for (ch = 0, num_rx = 0; num_rx < budget; ch_map >>= 1, ch++) {
+		if (!ch_map) {
+			ch_map = cpdma_ctrl_rxchs_state(cpsw->dma);
+			if (!ch_map)
+				break;
+
+			ch = 0;
 		}
+
+		if (!(ch_map & 0x01))
+			continue;
+
+		num_rx += cpdma_chan_process(cpsw->rxch[ch], budget - num_rx);
 	}
 
-	if (num_rx)
-		cpsw_dbg(priv, intr, "poll %d rx pkts\n", num_rx);
+	if (num_rx < budget) {
+		napi_complete(napi_rx);
+		writel(0xff, &cpsw->wr_regs->rx_en);
+		if (cpsw->quirk_irq && cpsw->rx_irq_disabled) {
+			cpsw->rx_irq_disabled = false;
+			enable_irq(cpsw->irqs_table[0]);
+		}
+	}
 
 	return num_rx;
 }
@@ -849,17 +879,18 @@
 	struct phy_device	*phy = slave->phy;
 	u32			mac_control = 0;
 	u32			slave_port;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	if (!phy)
 		return;
 
-	slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+	slave_port = cpsw_get_slave_port(slave->slave_num);
 
 	if (phy->link) {
-		mac_control = priv->data.mac_control;
+		mac_control = cpsw->data.mac_control;
 
 		/* enable forwarding */
-		cpsw_ale_control_set(priv->ale, slave_port,
+		cpsw_ale_control_set(cpsw->ale, slave_port,
 				     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
 		if (phy->speed == 1000)
@@ -883,7 +914,7 @@
 	} else {
 		mac_control = 0;
 		/* disable forwarding */
-		cpsw_ale_control_set(priv->ale, slave_port,
+		cpsw_ale_control_set(cpsw->ale, slave_port,
 				     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
 	}
 
@@ -905,19 +936,19 @@
 	if (link) {
 		netif_carrier_on(ndev);
 		if (netif_running(ndev))
-			netif_wake_queue(ndev);
+			netif_tx_wake_all_queues(ndev);
 	} else {
 		netif_carrier_off(ndev);
-		netif_stop_queue(ndev);
+		netif_tx_stop_all_queues(ndev);
 	}
 }
 
 static int cpsw_get_coalesce(struct net_device *ndev,
 				struct ethtool_coalesce *coal)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-	coal->rx_coalesce_usecs = priv->coal_intvl;
+	coal->rx_coalesce_usecs = cpsw->coal_intvl;
 	return 0;
 }
 
@@ -930,11 +961,12 @@
 	u32 prescale = 0;
 	u32 addnl_dvdr = 1;
 	u32 coal_intvl = 0;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	coal_intvl = coal->rx_coalesce_usecs;
 
-	int_ctrl =  readl(&priv->wr_regs->int_control);
-	prescale = priv->bus_freq_mhz * 4;
+	int_ctrl =  readl(&cpsw->wr_regs->int_control);
+	prescale = cpsw->bus_freq_mhz * 4;
 
 	if (!coal->rx_coalesce_usecs) {
 		int_ctrl &= ~(CPSW_INTPRESCALE_MASK | CPSW_INTPACEEN);
@@ -962,53 +994,69 @@
 	}
 
 	num_interrupts = (1000 * addnl_dvdr) / coal_intvl;
-	writel(num_interrupts, &priv->wr_regs->rx_imax);
-	writel(num_interrupts, &priv->wr_regs->tx_imax);
+	writel(num_interrupts, &cpsw->wr_regs->rx_imax);
+	writel(num_interrupts, &cpsw->wr_regs->tx_imax);
 
 	int_ctrl |= CPSW_INTPACEEN;
 	int_ctrl &= (~CPSW_INTPRESCALE_MASK);
 	int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK);
 
 update_return:
-	writel(int_ctrl, &priv->wr_regs->int_control);
+	writel(int_ctrl, &cpsw->wr_regs->int_control);
 
 	cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl);
-	if (priv->data.dual_emac) {
-		int i;
-
-		for (i = 0; i < priv->data.slaves; i++) {
-			priv = netdev_priv(priv->slaves[i].ndev);
-			priv->coal_intvl = coal_intvl;
-		}
-	} else {
-		priv->coal_intvl = coal_intvl;
-	}
+	cpsw->coal_intvl = coal_intvl;
 
 	return 0;
 }
 
 static int cpsw_get_sset_count(struct net_device *ndev, int sset)
 {
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
 	switch (sset) {
 	case ETH_SS_STATS:
-		return CPSW_STATS_LEN;
+		return (CPSW_STATS_COMMON_LEN +
+		       (cpsw->rx_ch_num + cpsw->tx_ch_num) *
+		       CPSW_STATS_CH_LEN);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
+static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir)
+{
+	int ch_stats_len;
+	int line;
+	int i;
+
+	ch_stats_len = CPSW_STATS_CH_LEN * ch_num;
+	for (i = 0; i < ch_stats_len; i++) {
+		line = i % CPSW_STATS_CH_LEN;
+		snprintf(*p, ETH_GSTRING_LEN,
+			 "%s DMA chan %d: %s", rx_dir ? "Rx" : "Tx",
+			 i / CPSW_STATS_CH_LEN,
+			 cpsw_gstrings_ch_stats[line].stat_string);
+		*p += ETH_GSTRING_LEN;
+	}
+}
+
 static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data)
 {
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 	u8 *p = data;
 	int i;
 
 	switch (stringset) {
 	case ETH_SS_STATS:
-		for (i = 0; i < CPSW_STATS_LEN; i++) {
+		for (i = 0; i < CPSW_STATS_COMMON_LEN; i++) {
 			memcpy(p, cpsw_gstrings_stats[i].stat_string,
 			       ETH_GSTRING_LEN);
 			p += ETH_GSTRING_LEN;
 		}
+
+		cpsw_add_ch_strings(&p, cpsw->rx_ch_num, 1);
+		cpsw_add_ch_strings(&p, cpsw->tx_ch_num, 0);
 		break;
 	}
 }
@@ -1016,86 +1064,78 @@
 static void cpsw_get_ethtool_stats(struct net_device *ndev,
 				    struct ethtool_stats *stats, u64 *data)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
-	struct cpdma_chan_stats rx_stats;
-	struct cpdma_chan_stats tx_stats;
-	u32 val;
 	u8 *p;
-	int i;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	struct cpdma_chan_stats ch_stats;
+	int i, l, ch;
 
 	/* Collect Davinci CPDMA stats for Rx and Tx Channel */
-	cpdma_chan_get_stats(priv->rxch, &rx_stats);
-	cpdma_chan_get_stats(priv->txch, &tx_stats);
+	for (l = 0; l < CPSW_STATS_COMMON_LEN; l++)
+		data[l] = readl(cpsw->hw_stats +
+				cpsw_gstrings_stats[l].stat_offset);
 
-	for (i = 0; i < CPSW_STATS_LEN; i++) {
-		switch (cpsw_gstrings_stats[i].type) {
-		case CPSW_STATS:
-			val = readl(priv->hw_stats +
-				    cpsw_gstrings_stats[i].stat_offset);
-			data[i] = val;
-			break;
+	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		cpdma_chan_get_stats(cpsw->rxch[ch], &ch_stats);
+		for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
+			p = (u8 *)&ch_stats +
+				cpsw_gstrings_ch_stats[i].stat_offset;
+			data[l] = *(u32 *)p;
+		}
+	}
 
-		case CPDMA_RX_STATS:
-			p = (u8 *)&rx_stats +
-				cpsw_gstrings_stats[i].stat_offset;
-			data[i] = *(u32 *)p;
-			break;
-
-		case CPDMA_TX_STATS:
-			p = (u8 *)&tx_stats +
-				cpsw_gstrings_stats[i].stat_offset;
-			data[i] = *(u32 *)p;
-			break;
+	for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
+		cpdma_chan_get_stats(cpsw->txch[ch], &ch_stats);
+		for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
+			p = (u8 *)&ch_stats +
+				cpsw_gstrings_ch_stats[i].stat_offset;
+			data[l] = *(u32 *)p;
 		}
 	}
 }
 
-static int cpsw_common_res_usage_state(struct cpsw_priv *priv)
+static int cpsw_common_res_usage_state(struct cpsw_common *cpsw)
 {
 	u32 i;
 	u32 usage_count = 0;
 
-	if (!priv->data.dual_emac)
+	if (!cpsw->data.dual_emac)
 		return 0;
 
-	for (i = 0; i < priv->data.slaves; i++)
-		if (priv->slaves[i].open_stat)
+	for (i = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].open_stat)
 			usage_count++;
 
 	return usage_count;
 }
 
-static inline int cpsw_tx_packet_submit(struct net_device *ndev,
-			struct cpsw_priv *priv, struct sk_buff *skb)
+static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv,
+					struct sk_buff *skb,
+					struct cpdma_chan *txch)
 {
-	if (!priv->data.dual_emac)
-		return cpdma_chan_submit(priv->txch, skb, skb->data,
-				  skb->len, 0);
+	struct cpsw_common *cpsw = priv->cpsw;
 
-	if (ndev == cpsw_get_slave_ndev(priv, 0))
-		return cpdma_chan_submit(priv->txch, skb, skb->data,
-				  skb->len, 1);
-	else
-		return cpdma_chan_submit(priv->txch, skb, skb->data,
-				  skb->len, 2);
+	return cpdma_chan_submit(txch, skb, skb->data, skb->len,
+				 priv->emac_port + cpsw->data.dual_emac);
 }
 
 static inline void cpsw_add_dual_emac_def_ale_entries(
 		struct cpsw_priv *priv, struct cpsw_slave *slave,
 		u32 slave_port)
 {
+	struct cpsw_common *cpsw = priv->cpsw;
 	u32 port_mask = 1 << slave_port | ALE_PORT_HOST;
 
-	if (priv->version == CPSW_VERSION_1)
+	if (cpsw->version == CPSW_VERSION_1)
 		slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN);
 	else
 		slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN);
-	cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask,
+	cpsw_ale_add_vlan(cpsw->ale, slave->port_vlan, port_mask,
 			  port_mask, port_mask, 0);
-	cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+	cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
 			   port_mask, ALE_VLAN, slave->port_vlan, 0);
-	cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
-		HOST_PORT_NUM, ALE_VLAN | ALE_SECURE, slave->port_vlan);
+	cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
+			   HOST_PORT_NUM, ALE_VLAN |
+			   ALE_SECURE, slave->port_vlan);
 }
 
 static void soft_reset_slave(struct cpsw_slave *slave)
@@ -1109,13 +1149,14 @@
 static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv)
 {
 	u32 slave_port;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	soft_reset_slave(slave);
 
 	/* setup priority mapping */
 	__raw_writel(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map);
 
-	switch (priv->version) {
+	switch (cpsw->version) {
 	case CPSW_VERSION_1:
 		slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP);
 		break;
@@ -1127,17 +1168,17 @@
 	}
 
 	/* setup max packet size, and mac address */
-	__raw_writel(priv->rx_packet_max, &slave->sliver->rx_maxlen);
+	__raw_writel(cpsw->rx_packet_max, &slave->sliver->rx_maxlen);
 	cpsw_set_slave_mac(slave, priv);
 
 	slave->mac_control = 0;	/* no link yet */
 
-	slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+	slave_port = cpsw_get_slave_port(slave->slave_num);
 
-	if (priv->data.dual_emac)
+	if (cpsw->data.dual_emac)
 		cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port);
 	else
-		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+		cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
 				   1 << slave_port, 0, 0, ALE_MCAST_FWD_2);
 
 	if (slave->data->phy_node) {
@@ -1167,81 +1208,121 @@
 	phy_start(slave->phy);
 
 	/* Configure GMII_SEL register */
-	cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, slave->slave_num);
+	cpsw_phy_sel(cpsw->dev, slave->phy->interface, slave->slave_num);
 }
 
 static inline void cpsw_add_default_vlan(struct cpsw_priv *priv)
 {
-	const int vlan = priv->data.default_vlan;
+	struct cpsw_common *cpsw = priv->cpsw;
+	const int vlan = cpsw->data.default_vlan;
 	u32 reg;
 	int i;
 	int unreg_mcast_mask;
 
-	reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
+	reg = (cpsw->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN :
 	       CPSW2_PORT_VLAN;
 
-	writel(vlan, &priv->host_port_regs->port_vlan);
+	writel(vlan, &cpsw->host_port_regs->port_vlan);
 
-	for (i = 0; i < priv->data.slaves; i++)
-		slave_write(priv->slaves + i, vlan, reg);
+	for (i = 0; i < cpsw->data.slaves; i++)
+		slave_write(cpsw->slaves + i, vlan, reg);
 
 	if (priv->ndev->flags & IFF_ALLMULTI)
 		unreg_mcast_mask = ALE_ALL_PORTS;
 	else
 		unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2;
 
-	cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS,
+	cpsw_ale_add_vlan(cpsw->ale, vlan, ALE_ALL_PORTS,
 			  ALE_ALL_PORTS, ALE_ALL_PORTS,
 			  unreg_mcast_mask);
 }
 
 static void cpsw_init_host_port(struct cpsw_priv *priv)
 {
-	u32 control_reg;
 	u32 fifo_mode;
+	u32 control_reg;
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	/* soft reset the controller and initialize ale */
-	soft_reset("cpsw", &priv->regs->soft_reset);
-	cpsw_ale_start(priv->ale);
+	soft_reset("cpsw", &cpsw->regs->soft_reset);
+	cpsw_ale_start(cpsw->ale);
 
 	/* switch to vlan unaware mode */
-	cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
+	cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
 			     CPSW_ALE_VLAN_AWARE);
-	control_reg = readl(&priv->regs->control);
+	control_reg = readl(&cpsw->regs->control);
 	control_reg |= CPSW_VLAN_AWARE;
-	writel(control_reg, &priv->regs->control);
-	fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
+	writel(control_reg, &cpsw->regs->control);
+	fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE :
 		     CPSW_FIFO_NORMAL_MODE;
-	writel(fifo_mode, &priv->host_port_regs->tx_in_ctl);
+	writel(fifo_mode, &cpsw->host_port_regs->tx_in_ctl);
 
 	/* setup host port priority mapping */
 	__raw_writel(CPDMA_TX_PRIORITY_MAP,
-		     &priv->host_port_regs->cpdma_tx_pri_map);
-	__raw_writel(0, &priv->host_port_regs->cpdma_rx_chan_map);
+		     &cpsw->host_port_regs->cpdma_tx_pri_map);
+	__raw_writel(0, &cpsw->host_port_regs->cpdma_rx_chan_map);
 
-	cpsw_ale_control_set(priv->ale, HOST_PORT_NUM,
+	cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM,
 			     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
-	if (!priv->data.dual_emac) {
-		cpsw_ale_add_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM,
+	if (!cpsw->data.dual_emac) {
+		cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM,
 				   0, 0);
-		cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+		cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
 				   ALE_PORT_HOST, 0, 0, ALE_MCAST_FWD_2);
 	}
 }
 
-static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv)
+static int cpsw_fill_rx_channels(struct cpsw_priv *priv)
+{
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct sk_buff *skb;
+	int ch_buf_num;
+	int ch, i, ret;
+
+	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
+		ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch[ch]);
+		for (i = 0; i < ch_buf_num; i++) {
+			skb = __netdev_alloc_skb_ip_align(priv->ndev,
+							  cpsw->rx_packet_max,
+							  GFP_KERNEL);
+			if (!skb) {
+				cpsw_err(priv, ifup, "cannot allocate skb\n");
+				return -ENOMEM;
+			}
+
+			skb_set_queue_mapping(skb, ch);
+			ret = cpdma_chan_submit(cpsw->rxch[ch], skb, skb->data,
+						skb_tailroom(skb), 0);
+			if (ret < 0) {
+				cpsw_err(priv, ifup,
+					 "cannot submit skb to channel %d rx, error %d\n",
+					 ch, ret);
+				kfree_skb(skb);
+				return ret;
+			}
+			kmemleak_not_leak(skb);
+		}
+
+		cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n",
+			  ch, ch_buf_num);
+	}
+
+	return 0;
+}
+
+static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw)
 {
 	u32 slave_port;
 
-	slave_port = cpsw_get_slave_port(priv, slave->slave_num);
+	slave_port = cpsw_get_slave_port(slave->slave_num);
 
 	if (!slave->phy)
 		return;
 	phy_stop(slave->phy);
 	phy_disconnect(slave->phy);
 	slave->phy = NULL;
-	cpsw_ale_control_set(priv->ale, slave_port,
+	cpsw_ale_control_set(cpsw->ale, slave_port,
 			     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
 	soft_reset_slave(slave);
 }
@@ -1249,114 +1330,111 @@
 static int cpsw_ndo_open(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int i, ret;
+	struct cpsw_common *cpsw = priv->cpsw;
+	int ret;
 	u32 reg;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 		return ret;
 	}
 
-	if (!cpsw_common_res_usage_state(priv))
-		cpsw_intr_disable(priv);
+	if (!cpsw_common_res_usage_state(cpsw))
+		cpsw_intr_disable(cpsw);
 	netif_carrier_off(ndev);
 
-	reg = priv->version;
+	/* Notify the stack of the actual queue counts. */
+	ret = netif_set_real_num_tx_queues(ndev, cpsw->tx_ch_num);
+	if (ret) {
+		dev_err(priv->dev, "cannot set real number of tx queues\n");
+		goto err_cleanup;
+	}
+
+	ret = netif_set_real_num_rx_queues(ndev, cpsw->rx_ch_num);
+	if (ret) {
+		dev_err(priv->dev, "cannot set real number of rx queues\n");
+		goto err_cleanup;
+	}
+
+	reg = cpsw->version;
 
 	dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n",
 		 CPSW_MAJOR_VERSION(reg), CPSW_MINOR_VERSION(reg),
 		 CPSW_RTL_VERSION(reg));
 
 	/* initialize host and slave ports */
-	if (!cpsw_common_res_usage_state(priv))
+	if (!cpsw_common_res_usage_state(cpsw))
 		cpsw_init_host_port(priv);
 	for_each_slave(priv, cpsw_slave_open, priv);
 
 	/* Add default VLAN */
-	if (!priv->data.dual_emac)
+	if (!cpsw->data.dual_emac)
 		cpsw_add_default_vlan(priv);
 	else
-		cpsw_ale_add_vlan(priv->ale, priv->data.default_vlan,
+		cpsw_ale_add_vlan(cpsw->ale, cpsw->data.default_vlan,
 				  ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0);
 
-	if (!cpsw_common_res_usage_state(priv)) {
-		struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0);
-		int buf_num;
-
+	if (!cpsw_common_res_usage_state(cpsw)) {
 		/* setup tx dma to fixed prio and zero offset */
-		cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1);
-		cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0);
+		cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1);
+		cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0);
 
 		/* disable priority elevation */
-		__raw_writel(0, &priv->regs->ptype);
+		__raw_writel(0, &cpsw->regs->ptype);
 
 		/* enable statistics collection only on all ports */
-		__raw_writel(0x7, &priv->regs->stat_port_en);
+		__raw_writel(0x7, &cpsw->regs->stat_port_en);
 
 		/* Enable internal fifo flow control */
-		writel(0x7, &priv->regs->flow_control);
+		writel(0x7, &cpsw->regs->flow_control);
 
-		napi_enable(&priv_sl0->napi_rx);
-		napi_enable(&priv_sl0->napi_tx);
+		napi_enable(&cpsw->napi_rx);
+		napi_enable(&cpsw->napi_tx);
 
-		if (priv_sl0->tx_irq_disabled) {
-			priv_sl0->tx_irq_disabled = false;
-			enable_irq(priv->irqs_table[1]);
+		if (cpsw->tx_irq_disabled) {
+			cpsw->tx_irq_disabled = false;
+			enable_irq(cpsw->irqs_table[1]);
 		}
 
-		if (priv_sl0->rx_irq_disabled) {
-			priv_sl0->rx_irq_disabled = false;
-			enable_irq(priv->irqs_table[0]);
+		if (cpsw->rx_irq_disabled) {
+			cpsw->rx_irq_disabled = false;
+			enable_irq(cpsw->irqs_table[0]);
 		}
 
-		buf_num = cpdma_chan_get_rx_buf_num(priv->dma);
-		for (i = 0; i < buf_num; i++) {
-			struct sk_buff *skb;
+		ret = cpsw_fill_rx_channels(priv);
+		if (ret < 0)
+			goto err_cleanup;
 
-			ret = -ENOMEM;
-			skb = __netdev_alloc_skb_ip_align(priv->ndev,
-					priv->rx_packet_max, GFP_KERNEL);
-			if (!skb)
-				goto err_cleanup;
-			ret = cpdma_chan_submit(priv->rxch, skb, skb->data,
-					skb_tailroom(skb), 0);
-			if (ret < 0) {
-				kfree_skb(skb);
-				goto err_cleanup;
-			}
-		}
-		/* continue even if we didn't manage to submit all
-		 * receive descs
-		 */
-		cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i);
-
-		if (cpts_register(&priv->pdev->dev, priv->cpts,
-				  priv->data.cpts_clock_mult,
-				  priv->data.cpts_clock_shift))
+		if (cpts_register(cpsw->dev, cpsw->cpts,
+				  cpsw->data.cpts_clock_mult,
+				  cpsw->data.cpts_clock_shift))
 			dev_err(priv->dev, "error registering cpts device\n");
 
 	}
 
 	/* Enable Interrupt pacing if configured */
-	if (priv->coal_intvl != 0) {
+	if (cpsw->coal_intvl != 0) {
 		struct ethtool_coalesce coal;
 
-		coal.rx_coalesce_usecs = priv->coal_intvl;
+		coal.rx_coalesce_usecs = cpsw->coal_intvl;
 		cpsw_set_coalesce(ndev, &coal);
 	}
 
-	cpdma_ctlr_start(priv->dma);
-	cpsw_intr_enable(priv);
+	cpdma_ctlr_start(cpsw->dma);
+	cpsw_intr_enable(cpsw);
 
-	if (priv->data.dual_emac)
-		priv->slaves[priv->emac_port].open_stat = true;
+	if (cpsw->data.dual_emac)
+		cpsw->slaves[priv->emac_port].open_stat = true;
+
+	netif_tx_start_all_queues(ndev);
+
 	return 0;
 
 err_cleanup:
-	cpdma_ctlr_stop(priv->dma);
-	for_each_slave(priv, cpsw_slave_stop, priv);
-	pm_runtime_put_sync(&priv->pdev->dev);
+	cpdma_ctlr_stop(cpsw->dma);
+	for_each_slave(priv, cpsw_slave_stop, cpsw);
+	pm_runtime_put_sync(cpsw->dev);
 	netif_carrier_off(priv->ndev);
 	return ret;
 }
@@ -1364,25 +1442,24 @@
 static int cpsw_ndo_stop(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 
 	cpsw_info(priv, ifdown, "shutting down cpsw device\n");
-	netif_stop_queue(priv->ndev);
+	netif_tx_stop_all_queues(priv->ndev);
 	netif_carrier_off(priv->ndev);
 
-	if (cpsw_common_res_usage_state(priv) <= 1) {
-		struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0);
-
-		napi_disable(&priv_sl0->napi_rx);
-		napi_disable(&priv_sl0->napi_tx);
-		cpts_unregister(priv->cpts);
-		cpsw_intr_disable(priv);
-		cpdma_ctlr_stop(priv->dma);
-		cpsw_ale_stop(priv->ale);
+	if (cpsw_common_res_usage_state(cpsw) <= 1) {
+		napi_disable(&cpsw->napi_rx);
+		napi_disable(&cpsw->napi_tx);
+		cpts_unregister(cpsw->cpts);
+		cpsw_intr_disable(cpsw);
+		cpdma_ctlr_stop(cpsw->dma);
+		cpsw_ale_stop(cpsw->ale);
 	}
-	for_each_slave(priv, cpsw_slave_stop, priv);
-	pm_runtime_put_sync(&priv->pdev->dev);
-	if (priv->data.dual_emac)
-		priv->slaves[priv->emac_port].open_stat = false;
+	for_each_slave(priv, cpsw_slave_stop, cpsw);
+	pm_runtime_put_sync(cpsw->dev);
+	if (cpsw->data.dual_emac)
+		cpsw->slaves[priv->emac_port].open_stat = false;
 	return 0;
 }
 
@@ -1390,7 +1467,10 @@
 				       struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int ret;
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct netdev_queue *txq;
+	struct cpdma_chan *txch;
+	int ret, q_idx;
 
 	netif_trans_update(ndev);
 
@@ -1401,12 +1481,17 @@
 	}
 
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-				priv->cpts->tx_enable)
+				cpsw->cpts->tx_enable)
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 	skb_tx_timestamp(skb);
 
-	ret = cpsw_tx_packet_submit(ndev, priv, skb);
+	q_idx = skb_get_queue_mapping(skb);
+	if (q_idx >= cpsw->tx_ch_num)
+		q_idx = q_idx % cpsw->tx_ch_num;
+
+	txch = cpsw->txch[q_idx];
+	ret = cpsw_tx_packet_submit(priv, skb, txch);
 	if (unlikely(ret != 0)) {
 		cpsw_err(priv, tx_err, "desc submit failed\n");
 		goto fail;
@@ -1415,24 +1500,27 @@
 	/* If there is no more tx desc left free then we need to
 	 * tell the kernel to stop sending us tx frames.
 	 */
-	if (unlikely(!cpdma_check_free_tx_desc(priv->txch)))
-		netif_stop_queue(ndev);
+	if (unlikely(!cpdma_check_free_tx_desc(txch))) {
+		txq = netdev_get_tx_queue(ndev, q_idx);
+		netif_tx_stop_queue(txq);
+	}
 
 	return NETDEV_TX_OK;
 fail:
 	ndev->stats.tx_dropped++;
-	netif_stop_queue(ndev);
+	txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb));
+	netif_tx_stop_queue(txq);
 	return NETDEV_TX_BUSY;
 }
 
 #ifdef CONFIG_TI_CPTS
 
-static void cpsw_hwtstamp_v1(struct cpsw_priv *priv)
+static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 {
-	struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave];
+	struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
 	u32 ts_en, seq_id;
 
-	if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) {
+	if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) {
 		slave_write(slave, 0, CPSW1_TS_CTL);
 		return;
 	}
@@ -1440,10 +1528,10 @@
 	seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
 	ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-	if (priv->cpts->tx_enable)
+	if (cpsw->cpts->tx_enable)
 		ts_en |= CPSW_V1_TS_TX_EN;
 
-	if (priv->cpts->rx_enable)
+	if (cpsw->cpts->rx_enable)
 		ts_en |= CPSW_V1_TS_RX_EN;
 
 	slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -1453,32 +1541,33 @@
 static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
 {
 	struct cpsw_slave *slave;
+	struct cpsw_common *cpsw = priv->cpsw;
 	u32 ctrl, mtype;
 
-	if (priv->data.dual_emac)
-		slave = &priv->slaves[priv->emac_port];
+	if (cpsw->data.dual_emac)
+		slave = &cpsw->slaves[priv->emac_port];
 	else
-		slave = &priv->slaves[priv->data.active_slave];
+		slave = &cpsw->slaves[cpsw->data.active_slave];
 
 	ctrl = slave_read(slave, CPSW2_CONTROL);
-	switch (priv->version) {
+	switch (cpsw->version) {
 	case CPSW_VERSION_2:
 		ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-		if (priv->cpts->tx_enable)
+		if (cpsw->cpts->tx_enable)
 			ctrl |= CTRL_V2_TX_TS_BITS;
 
-		if (priv->cpts->rx_enable)
+		if (cpsw->cpts->rx_enable)
 			ctrl |= CTRL_V2_RX_TS_BITS;
 		break;
 	case CPSW_VERSION_3:
 	default:
 		ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-		if (priv->cpts->tx_enable)
+		if (cpsw->cpts->tx_enable)
 			ctrl |= CTRL_V3_TX_TS_BITS;
 
-		if (priv->cpts->rx_enable)
+		if (cpsw->cpts->rx_enable)
 			ctrl |= CTRL_V3_RX_TS_BITS;
 		break;
 	}
@@ -1487,18 +1576,19 @@
 
 	slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE);
 	slave_write(slave, ctrl, CPSW2_CONTROL);
-	__raw_writel(ETH_P_1588, &priv->regs->ts_ltype);
+	__raw_writel(ETH_P_1588, &cpsw->regs->ts_ltype);
 }
 
 static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 {
 	struct cpsw_priv *priv = netdev_priv(dev);
-	struct cpts *cpts = priv->cpts;
 	struct hwtstamp_config cfg;
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpts *cpts = cpsw->cpts;
 
-	if (priv->version != CPSW_VERSION_1 &&
-	    priv->version != CPSW_VERSION_2 &&
-	    priv->version != CPSW_VERSION_3)
+	if (cpsw->version != CPSW_VERSION_1 &&
+	    cpsw->version != CPSW_VERSION_2 &&
+	    cpsw->version != CPSW_VERSION_3)
 		return -EOPNOTSUPP;
 
 	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
@@ -1538,9 +1628,9 @@
 
 	cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON;
 
-	switch (priv->version) {
+	switch (cpsw->version) {
 	case CPSW_VERSION_1:
-		cpsw_hwtstamp_v1(priv);
+		cpsw_hwtstamp_v1(cpsw);
 		break;
 	case CPSW_VERSION_2:
 	case CPSW_VERSION_3:
@@ -1555,13 +1645,13 @@
 
 static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 {
-	struct cpsw_priv *priv = netdev_priv(dev);
-	struct cpts *cpts = priv->cpts;
+	struct cpsw_common *cpsw = ndev_to_cpsw(dev);
+	struct cpts *cpts = cpsw->cpts;
 	struct hwtstamp_config cfg;
 
-	if (priv->version != CPSW_VERSION_1 &&
-	    priv->version != CPSW_VERSION_2 &&
-	    priv->version != CPSW_VERSION_3)
+	if (cpsw->version != CPSW_VERSION_1 &&
+	    cpsw->version != CPSW_VERSION_2 &&
+	    cpsw->version != CPSW_VERSION_3)
 		return -EOPNOTSUPP;
 
 	cfg.flags = 0;
@@ -1577,7 +1667,8 @@
 static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 {
 	struct cpsw_priv *priv = netdev_priv(dev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
 	if (!netif_running(dev))
 		return -EINVAL;
@@ -1591,27 +1682,33 @@
 #endif
 	}
 
-	if (!priv->slaves[slave_no].phy)
+	if (!cpsw->slaves[slave_no].phy)
 		return -EOPNOTSUPP;
-	return phy_mii_ioctl(priv->slaves[slave_no].phy, req, cmd);
+	return phy_mii_ioctl(cpsw->slaves[slave_no].phy, req, cmd);
 }
 
 static void cpsw_ndo_tx_timeout(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int ch;
 
 	cpsw_err(priv, tx_err, "transmit timeout, restarting dma\n");
 	ndev->stats.tx_errors++;
-	cpsw_intr_disable(priv);
-	cpdma_chan_stop(priv->txch);
-	cpdma_chan_start(priv->txch);
-	cpsw_intr_enable(priv);
+	cpsw_intr_disable(cpsw);
+	for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
+		cpdma_chan_stop(cpsw->txch[ch]);
+		cpdma_chan_start(cpsw->txch[ch]);
+	}
+
+	cpsw_intr_enable(cpsw);
 }
 
 static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct sockaddr *addr = (struct sockaddr *)p;
+	struct cpsw_common *cpsw = priv->cpsw;
 	int flags = 0;
 	u16 vid = 0;
 	int ret;
@@ -1619,27 +1716,27 @@
 	if (!is_valid_ether_addr(addr->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 		return ret;
 	}
 
-	if (priv->data.dual_emac) {
-		vid = priv->slaves[priv->emac_port].port_vlan;
+	if (cpsw->data.dual_emac) {
+		vid = cpsw->slaves[priv->emac_port].port_vlan;
 		flags = ALE_VLAN;
 	}
 
-	cpsw_ale_del_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM,
+	cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM,
 			   flags, vid);
-	cpsw_ale_add_ucast(priv->ale, addr->sa_data, HOST_PORT_NUM,
+	cpsw_ale_add_ucast(cpsw->ale, addr->sa_data, HOST_PORT_NUM,
 			   flags, vid);
 
 	memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN);
 	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
 	for_each_slave(priv, cpsw_set_slave_mac, priv);
 
-	pm_runtime_put(&priv->pdev->dev);
+	pm_runtime_put(cpsw->dev);
 
 	return 0;
 }
@@ -1647,12 +1744,12 @@
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void cpsw_ndo_poll_controller(struct net_device *ndev)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-	cpsw_intr_disable(priv);
-	cpsw_rx_interrupt(priv->irqs_table[0], priv);
-	cpsw_tx_interrupt(priv->irqs_table[1], priv);
-	cpsw_intr_enable(priv);
+	cpsw_intr_disable(cpsw);
+	cpsw_rx_interrupt(cpsw->irqs_table[0], cpsw);
+	cpsw_tx_interrupt(cpsw->irqs_table[1], cpsw);
+	cpsw_intr_enable(cpsw);
 }
 #endif
 
@@ -1662,8 +1759,9 @@
 	int ret;
 	int unreg_mcast_mask = 0;
 	u32 port_mask;
+	struct cpsw_common *cpsw = priv->cpsw;
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		port_mask = (1 << (priv->emac_port + 1)) | ALE_PORT_HOST;
 
 		if (priv->ndev->flags & IFF_ALLMULTI)
@@ -1677,27 +1775,27 @@
 			unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2;
 	}
 
-	ret = cpsw_ale_add_vlan(priv->ale, vid, port_mask, 0, port_mask,
+	ret = cpsw_ale_add_vlan(cpsw->ale, vid, port_mask, 0, port_mask,
 				unreg_mcast_mask);
 	if (ret != 0)
 		return ret;
 
-	ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr,
+	ret = cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr,
 				 HOST_PORT_NUM, ALE_VLAN, vid);
 	if (ret != 0)
 		goto clean_vid;
 
-	ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast,
+	ret = cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast,
 				 port_mask, ALE_VLAN, vid, 0);
 	if (ret != 0)
 		goto clean_vlan_ucast;
 	return 0;
 
 clean_vlan_ucast:
-	cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+	cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr,
 			   HOST_PORT_NUM, ALE_VLAN, vid);
 clean_vid:
-	cpsw_ale_del_vlan(priv->ale, vid, 0);
+	cpsw_ale_del_vlan(cpsw->ale, vid, 0);
 	return ret;
 }
 
@@ -1705,26 +1803,27 @@
 				    __be16 proto, u16 vid)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int ret;
 
-	if (vid == priv->data.default_vlan)
+	if (vid == cpsw->data.default_vlan)
 		return 0;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 		return ret;
 	}
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		/* In dual EMAC, reserved VLAN id should not be used for
 		 * creating VLAN interfaces as this can break the dual
 		 * EMAC port separation
 		 */
 		int i;
 
-		for (i = 0; i < priv->data.slaves; i++) {
-			if (vid == priv->slaves[i].port_vlan)
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			if (vid == cpsw->slaves[i].port_vlan)
 				return -EINVAL;
 		}
 	}
@@ -1732,7 +1831,7 @@
 	dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid);
 	ret = cpsw_add_vlan_ale_entry(priv, vid);
 
-	pm_runtime_put(&priv->pdev->dev);
+	pm_runtime_put(cpsw->dev);
 	return ret;
 }
 
@@ -1740,39 +1839,40 @@
 				     __be16 proto, u16 vid)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int ret;
 
-	if (vid == priv->data.default_vlan)
+	if (vid == cpsw->data.default_vlan)
 		return 0;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 		return ret;
 	}
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		int i;
 
-		for (i = 0; i < priv->data.slaves; i++) {
-			if (vid == priv->slaves[i].port_vlan)
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			if (vid == cpsw->slaves[i].port_vlan)
 				return -EINVAL;
 		}
 	}
 
 	dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid);
-	ret = cpsw_ale_del_vlan(priv->ale, vid, 0);
+	ret = cpsw_ale_del_vlan(cpsw->ale, vid, 0);
 	if (ret != 0)
 		return ret;
 
-	ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr,
+	ret = cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr,
 				 HOST_PORT_NUM, ALE_VLAN, vid);
 	if (ret != 0)
 		return ret;
 
-	ret = cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast,
+	ret = cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast,
 				 0, ALE_VLAN, vid);
-	pm_runtime_put(&priv->pdev->dev);
+	pm_runtime_put(cpsw->dev);
 	return ret;
 }
 
@@ -1795,31 +1895,32 @@
 
 static int cpsw_get_regs_len(struct net_device *ndev)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
-	return priv->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
+	return cpsw->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32);
 }
 
 static void cpsw_get_regs(struct net_device *ndev,
 			  struct ethtool_regs *regs, void *p)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
 	u32 *reg = p;
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
 	/* update CPSW IP version */
-	regs->version = priv->version;
+	regs->version = cpsw->version;
 
-	cpsw_ale_dump(priv->ale, reg);
+	cpsw_ale_dump(cpsw->ale, reg);
 }
 
 static void cpsw_get_drvinfo(struct net_device *ndev,
 			     struct ethtool_drvinfo *info)
 {
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+	struct platform_device	*pdev = to_platform_device(cpsw->dev);
 
 	strlcpy(info->driver, "cpsw", sizeof(info->driver));
 	strlcpy(info->version, "1.0", sizeof(info->version));
-	strlcpy(info->bus_info, priv->pdev->name, sizeof(info->bus_info));
+	strlcpy(info->bus_info, pdev->name, sizeof(info->bus_info));
 }
 
 static u32 cpsw_get_msglevel(struct net_device *ndev)
@@ -1838,7 +1939,7 @@
 			    struct ethtool_ts_info *info)
 {
 #ifdef CONFIG_TI_CPTS
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
 	info->so_timestamping =
 		SOF_TIMESTAMPING_TX_HARDWARE |
@@ -1847,7 +1948,7 @@
 		SOF_TIMESTAMPING_RX_SOFTWARE |
 		SOF_TIMESTAMPING_SOFTWARE |
 		SOF_TIMESTAMPING_RAW_HARDWARE;
-	info->phc_index = priv->cpts->phc_index;
+	info->phc_index = cpsw->cpts->phc_index;
 	info->tx_types =
 		(1 << HWTSTAMP_TX_OFF) |
 		(1 << HWTSTAMP_TX_ON);
@@ -1870,10 +1971,11 @@
 			     struct ethtool_cmd *ecmd)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
-	if (priv->slaves[slave_no].phy)
-		return phy_ethtool_gset(priv->slaves[slave_no].phy, ecmd);
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_gset(cpsw->slaves[slave_no].phy, ecmd);
 	else
 		return -EOPNOTSUPP;
 }
@@ -1881,10 +1983,11 @@
 static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
-	if (priv->slaves[slave_no].phy)
-		return phy_ethtool_sset(priv->slaves[slave_no].phy, ecmd);
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_sset(cpsw->slaves[slave_no].phy, ecmd);
 	else
 		return -EOPNOTSUPP;
 }
@@ -1892,22 +1995,24 @@
 static void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
 	wol->supported = 0;
 	wol->wolopts = 0;
 
-	if (priv->slaves[slave_no].phy)
-		phy_ethtool_get_wol(priv->slaves[slave_no].phy, wol);
+	if (cpsw->slaves[slave_no].phy)
+		phy_ethtool_get_wol(cpsw->slaves[slave_no].phy, wol);
 }
 
 static int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
-	int slave_no = cpsw_slave_index(priv);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
 
-	if (priv->slaves[slave_no].phy)
-		return phy_ethtool_set_wol(priv->slaves[slave_no].phy, wol);
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_set_wol(cpsw->slaves[slave_no].phy, wol);
 	else
 		return -EOPNOTSUPP;
 }
@@ -1938,12 +2043,13 @@
 static int cpsw_ethtool_op_begin(struct net_device *ndev)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
 	int ret;
 
-	ret = pm_runtime_get_sync(&priv->pdev->dev);
+	ret = pm_runtime_get_sync(cpsw->dev);
 	if (ret < 0) {
 		cpsw_err(priv, drv, "ethtool begin failed %d\n", ret);
-		pm_runtime_put_noidle(&priv->pdev->dev);
+		pm_runtime_put_noidle(cpsw->dev);
 	}
 
 	return ret;
@@ -1954,11 +2060,185 @@
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	int ret;
 
-	ret = pm_runtime_put(&priv->pdev->dev);
+	ret = pm_runtime_put(priv->cpsw->dev);
 	if (ret < 0)
 		cpsw_err(priv, drv, "ethtool complete failed %d\n", ret);
 }
 
+static void cpsw_get_channels(struct net_device *ndev,
+			      struct ethtool_channels *ch)
+{
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
+
+	ch->max_combined = 0;
+	ch->max_rx = CPSW_MAX_QUEUES;
+	ch->max_tx = CPSW_MAX_QUEUES;
+	ch->max_other = 0;
+	ch->other_count = 0;
+	ch->rx_count = cpsw->rx_ch_num;
+	ch->tx_count = cpsw->tx_ch_num;
+	ch->combined_count = 0;
+}
+
+static int cpsw_check_ch_settings(struct cpsw_common *cpsw,
+				  struct ethtool_channels *ch)
+{
+	if (ch->combined_count)
+		return -EINVAL;
+
+	/* verify we have at least one channel in each direction */
+	if (!ch->rx_count || !ch->tx_count)
+		return -EINVAL;
+
+	if (ch->rx_count > cpsw->data.channels ||
+	    ch->tx_count > cpsw->data.channels)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
+{
+	int (*poll)(struct napi_struct *, int);
+	struct cpsw_common *cpsw = priv->cpsw;
+	void (*handler)(void *, int, int);
+	struct cpdma_chan **chan;
+	int ret, *ch;
+
+	if (rx) {
+		ch = &cpsw->rx_ch_num;
+		chan = cpsw->rxch;
+		handler = cpsw_rx_handler;
+		poll = cpsw_rx_poll;
+	} else {
+		ch = &cpsw->tx_ch_num;
+		chan = cpsw->txch;
+		handler = cpsw_tx_handler;
+		poll = cpsw_tx_poll;
+	}
+
+	while (*ch < ch_num) {
+		chan[*ch] = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+
+		if (IS_ERR(chan[*ch]))
+			return PTR_ERR(chan[*ch]);
+
+		if (!chan[*ch])
+			return -EINVAL;
+
+		cpsw_info(priv, ifup, "created new %d %s channel\n", *ch,
+			  (rx ? "rx" : "tx"));
+		(*ch)++;
+	}
+
+	while (*ch > ch_num) {
+		(*ch)--;
+
+		ret = cpdma_chan_destroy(chan[*ch]);
+		if (ret)
+			return ret;
+
+		cpsw_info(priv, ifup, "destroyed %d %s channel\n", *ch,
+			  (rx ? "rx" : "tx"));
+	}
+
+	return 0;
+}
+
+static int cpsw_update_channels(struct cpsw_priv *priv,
+				struct ethtool_channels *ch)
+{
+	int ret;
+
+	ret = cpsw_update_channels_res(priv, ch->rx_count, 1);
+	if (ret)
+		return ret;
+
+	ret = cpsw_update_channels_res(priv, ch->tx_count, 0);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int cpsw_set_channels(struct net_device *ndev,
+			     struct ethtool_channels *chs)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_slave *slave;
+	int i, ret;
+
+	ret = cpsw_check_ch_settings(cpsw, chs);
+	if (ret < 0)
+		return ret;
+
+	/* Disable NAPI scheduling */
+	cpsw_intr_disable(cpsw);
+
+	/* Stop all transmit queues for every network device.
+	 * Disable re-using rx descriptors with dormant_on.
+	 */
+	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+		if (!(slave->ndev && netif_running(slave->ndev)))
+			continue;
+
+		netif_tx_stop_all_queues(slave->ndev);
+		netif_dormant_on(slave->ndev);
+	}
+
+	/* Handle rest of tx packets and stop cpdma channels */
+	cpdma_ctlr_stop(cpsw->dma);
+	ret = cpsw_update_channels(priv, chs);
+	if (ret)
+		goto err;
+
+	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+		if (!(slave->ndev && netif_running(slave->ndev)))
+			continue;
+
+		/* Inform stack about new count of queues */
+		ret = netif_set_real_num_tx_queues(slave->ndev,
+						   cpsw->tx_ch_num);
+		if (ret) {
+			dev_err(priv->dev, "cannot set real number of tx queues\n");
+			goto err;
+		}
+
+		ret = netif_set_real_num_rx_queues(slave->ndev,
+						   cpsw->rx_ch_num);
+		if (ret) {
+			dev_err(priv->dev, "cannot set real number of rx queues\n");
+			goto err;
+		}
+
+		/* Enable rx packets handling */
+		netif_dormant_off(slave->ndev);
+	}
+
+	if (cpsw_common_res_usage_state(cpsw)) {
+		ret = cpsw_fill_rx_channels(priv);
+		if (ret)
+			goto err;
+
+		/* After this receive is started */
+		cpdma_ctlr_start(cpsw->dma);
+		cpsw_intr_enable(cpsw);
+	}
+
+	/* Resume transmit for every affected interface */
+	for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+		if (!(slave->ndev && netif_running(slave->ndev)))
+			continue;
+		netif_tx_start_all_queues(slave->ndev);
+	}
+	return 0;
+err:
+	dev_err(priv->dev, "cannot update channels number, closing device\n");
+	dev_close(ndev);
+	return ret;
+}
+
 static const struct ethtool_ops cpsw_ethtool_ops = {
 	.get_drvinfo	= cpsw_get_drvinfo,
 	.get_msglevel	= cpsw_get_msglevel,
@@ -1980,14 +2260,16 @@
 	.get_regs	= cpsw_get_regs,
 	.begin		= cpsw_ethtool_op_begin,
 	.complete	= cpsw_ethtool_op_complete,
+	.get_channels	= cpsw_get_channels,
+	.set_channels	= cpsw_set_channels,
 };
 
-static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv,
+static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
 			    u32 slave_reg_ofs, u32 sliver_reg_ofs)
 {
-	void __iomem		*regs = priv->regs;
+	void __iomem		*regs = cpsw->regs;
 	int			slave_num = slave->slave_num;
-	struct cpsw_slave_data	*data = priv->data.slave_data + slave_num;
+	struct cpsw_slave_data	*data = cpsw->data.slave_data + slave_num;
 
 	slave->data	= data;
 	slave->regs	= regs + slave_reg_ofs;
@@ -2158,71 +2440,50 @@
 	return 0;
 }
 
-static int cpsw_probe_dual_emac(struct platform_device *pdev,
-				struct cpsw_priv *priv)
+static int cpsw_probe_dual_emac(struct cpsw_priv *priv)
 {
-	struct cpsw_platform_data	*data = &priv->data;
+	struct cpsw_common		*cpsw = priv->cpsw;
+	struct cpsw_platform_data	*data = &cpsw->data;
 	struct net_device		*ndev;
 	struct cpsw_priv		*priv_sl2;
-	int ret = 0, i;
+	int ret = 0;
 
-	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
+	ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES);
 	if (!ndev) {
-		dev_err(&pdev->dev, "cpsw: error allocating net_device\n");
+		dev_err(cpsw->dev, "cpsw: error allocating net_device\n");
 		return -ENOMEM;
 	}
 
 	priv_sl2 = netdev_priv(ndev);
-	priv_sl2->data = *data;
-	priv_sl2->pdev = pdev;
+	priv_sl2->cpsw = cpsw;
 	priv_sl2->ndev = ndev;
 	priv_sl2->dev  = &ndev->dev;
 	priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
-	priv_sl2->rx_packet_max = max(rx_packet_max, 128);
 
 	if (is_valid_ether_addr(data->slave_data[1].mac_addr)) {
 		memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr,
 			ETH_ALEN);
-		dev_info(&pdev->dev, "cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr);
+		dev_info(cpsw->dev, "cpsw: Detected MACID = %pM\n",
+			 priv_sl2->mac_addr);
 	} else {
 		random_ether_addr(priv_sl2->mac_addr);
-		dev_info(&pdev->dev, "cpsw: Random MACID = %pM\n", priv_sl2->mac_addr);
+		dev_info(cpsw->dev, "cpsw: Random MACID = %pM\n",
+			 priv_sl2->mac_addr);
 	}
 	memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN);
 
-	priv_sl2->slaves = priv->slaves;
-	priv_sl2->clk = priv->clk;
-
-	priv_sl2->coal_intvl = 0;
-	priv_sl2->bus_freq_mhz = priv->bus_freq_mhz;
-
-	priv_sl2->regs = priv->regs;
-	priv_sl2->host_port_regs = priv->host_port_regs;
-	priv_sl2->wr_regs = priv->wr_regs;
-	priv_sl2->hw_stats = priv->hw_stats;
-	priv_sl2->dma = priv->dma;
-	priv_sl2->txch = priv->txch;
-	priv_sl2->rxch = priv->rxch;
-	priv_sl2->ale = priv->ale;
 	priv_sl2->emac_port = 1;
-	priv->slaves[1].ndev = ndev;
-	priv_sl2->cpts = priv->cpts;
-	priv_sl2->version = priv->version;
-
-	for (i = 0; i < priv->num_irqs; i++) {
-		priv_sl2->irqs_table[i] = priv->irqs_table[i];
-		priv_sl2->num_irqs = priv->num_irqs;
-	}
+	cpsw->slaves[1].ndev = ndev;
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	ndev->ethtool_ops = &cpsw_ethtool_ops;
 
 	/* register the network device */
-	SET_NETDEV_DEV(ndev, &pdev->dev);
+	SET_NETDEV_DEV(ndev, cpsw->dev);
 	ret = register_netdev(ndev);
 	if (ret) {
-		dev_err(&pdev->dev, "cpsw: error registering net device\n");
+		dev_err(cpsw->dev, "cpsw: error registering net device\n");
 		free_netdev(ndev);
 		ret = -ENODEV;
 	}
@@ -2270,6 +2531,7 @@
 
 static int cpsw_probe(struct platform_device *pdev)
 {
+	struct clk			*clk;
 	struct cpsw_platform_data	*data;
 	struct net_device		*ndev;
 	struct cpsw_priv		*priv;
@@ -2280,10 +2542,14 @@
 	const struct of_device_id	*of_id;
 	struct gpio_descs		*mode;
 	u32 slave_offset, sliver_offset, slave_size;
+	struct cpsw_common		*cpsw;
 	int ret = 0, i;
 	int irq;
 
-	ndev = alloc_etherdev(sizeof(struct cpsw_priv));
+	cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL);
+	cpsw->dev = &pdev->dev;
+
+	ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES);
 	if (!ndev) {
 		dev_err(&pdev->dev, "error allocating net_device\n");
 		return -ENOMEM;
@@ -2291,13 +2557,13 @@
 
 	platform_set_drvdata(pdev, ndev);
 	priv = netdev_priv(ndev);
-	priv->pdev = pdev;
+	priv->cpsw = cpsw;
 	priv->ndev = ndev;
 	priv->dev  = &ndev->dev;
 	priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
-	priv->rx_packet_max = max(rx_packet_max, 128);
-	priv->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
-	if (!priv->cpts) {
+	cpsw->rx_packet_max = max(rx_packet_max, 128);
+	cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
+	if (!cpsw->cpts) {
 		dev_err(&pdev->dev, "error allocating cpts\n");
 		ret = -ENOMEM;
 		goto clean_ndev_ret;
@@ -2318,12 +2584,14 @@
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(&pdev->dev);
 
-	if (cpsw_probe_dt(&priv->data, pdev)) {
+	if (cpsw_probe_dt(&cpsw->data, pdev)) {
 		dev_err(&pdev->dev, "cpsw: platform data missing\n");
 		ret = -ENODEV;
 		goto clean_runtime_disable_ret;
 	}
-	data = &priv->data;
+	data = &cpsw->data;
+	cpsw->rx_ch_num = 1;
+	cpsw->tx_ch_num = 1;
 
 	if (is_valid_ether_addr(data->slave_data[0].mac_addr)) {
 		memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN);
@@ -2335,27 +2603,26 @@
 
 	memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN);
 
-	priv->slaves = devm_kzalloc(&pdev->dev,
+	cpsw->slaves = devm_kzalloc(&pdev->dev,
 				    sizeof(struct cpsw_slave) * data->slaves,
 				    GFP_KERNEL);
-	if (!priv->slaves) {
+	if (!cpsw->slaves) {
 		ret = -ENOMEM;
 		goto clean_runtime_disable_ret;
 	}
 	for (i = 0; i < data->slaves; i++)
-		priv->slaves[i].slave_num = i;
+		cpsw->slaves[i].slave_num = i;
 
-	priv->slaves[0].ndev = ndev;
+	cpsw->slaves[0].ndev = ndev;
 	priv->emac_port = 0;
 
-	priv->clk = devm_clk_get(&pdev->dev, "fck");
-	if (IS_ERR(priv->clk)) {
+	clk = devm_clk_get(&pdev->dev, "fck");
+	if (IS_ERR(clk)) {
 		dev_err(priv->dev, "fck is not found\n");
 		ret = -ENODEV;
 		goto clean_runtime_disable_ret;
 	}
-	priv->coal_intvl = 0;
-	priv->bus_freq_mhz = clk_get_rate(priv->clk) / 1000000;
+	cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000;
 
 	ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ss_regs = devm_ioremap_resource(&pdev->dev, ss_res);
@@ -2363,7 +2630,7 @@
 		ret = PTR_ERR(ss_regs);
 		goto clean_runtime_disable_ret;
 	}
-	priv->regs = ss_regs;
+	cpsw->regs = ss_regs;
 
 	/* Need to enable clocks with runtime PM api to access module
 	 * registers
@@ -2373,24 +2640,24 @@
 		pm_runtime_put_noidle(&pdev->dev);
 		goto clean_runtime_disable_ret;
 	}
-	priv->version = readl(&priv->regs->id_ver);
+	cpsw->version = readl(&cpsw->regs->id_ver);
 	pm_runtime_put_sync(&pdev->dev);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	priv->wr_regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(priv->wr_regs)) {
-		ret = PTR_ERR(priv->wr_regs);
+	cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(cpsw->wr_regs)) {
+		ret = PTR_ERR(cpsw->wr_regs);
 		goto clean_runtime_disable_ret;
 	}
 
 	memset(&dma_params, 0, sizeof(dma_params));
 	memset(&ale_params, 0, sizeof(ale_params));
 
-	switch (priv->version) {
+	switch (cpsw->version) {
 	case CPSW_VERSION_1:
-		priv->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
-		priv->cpts->reg      = ss_regs + CPSW1_CPTS_OFFSET;
-		priv->hw_stats	     = ss_regs + CPSW1_HW_STATS;
+		cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
+		cpsw->cpts->reg      = ss_regs + CPSW1_CPTS_OFFSET;
+		cpsw->hw_stats	     = ss_regs + CPSW1_HW_STATS;
 		dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
 		dma_params.txhdp     = ss_regs + CPSW1_STATERAM_OFFSET;
 		ale_params.ale_regs  = ss_regs + CPSW1_ALE_OFFSET;
@@ -2402,9 +2669,9 @@
 	case CPSW_VERSION_2:
 	case CPSW_VERSION_3:
 	case CPSW_VERSION_4:
-		priv->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
-		priv->cpts->reg      = ss_regs + CPSW2_CPTS_OFFSET;
-		priv->hw_stats	     = ss_regs + CPSW2_HW_STATS;
+		cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
+		cpsw->cpts->reg      = ss_regs + CPSW2_CPTS_OFFSET;
+		cpsw->hw_stats	     = ss_regs + CPSW2_HW_STATS;
 		dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
 		dma_params.txhdp     = ss_regs + CPSW2_STATERAM_OFFSET;
 		ale_params.ale_regs  = ss_regs + CPSW2_ALE_OFFSET;
@@ -2415,13 +2682,14 @@
 			(u32 __force) ss_res->start + CPSW2_BD_OFFSET;
 		break;
 	default:
-		dev_err(priv->dev, "unknown version 0x%08x\n", priv->version);
+		dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version);
 		ret = -ENODEV;
 		goto clean_runtime_disable_ret;
 	}
-	for (i = 0; i < priv->data.slaves; i++) {
-		struct cpsw_slave *slave = &priv->slaves[i];
-		cpsw_slave_init(slave, priv, slave_offset, sliver_offset);
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		struct cpsw_slave *slave = &cpsw->slaves[i];
+
+		cpsw_slave_init(slave, cpsw, slave_offset, sliver_offset);
 		slave_offset  += slave_size;
 		sliver_offset += SLIVER_SIZE;
 	}
@@ -2441,19 +2709,16 @@
 	dma_params.has_ext_regs		= true;
 	dma_params.desc_hw_addr         = dma_params.desc_mem_phys;
 
-	priv->dma = cpdma_ctlr_create(&dma_params);
-	if (!priv->dma) {
+	cpsw->dma = cpdma_ctlr_create(&dma_params);
+	if (!cpsw->dma) {
 		dev_err(priv->dev, "error initializing dma\n");
 		ret = -ENOMEM;
 		goto clean_runtime_disable_ret;
 	}
 
-	priv->txch = cpdma_chan_create(priv->dma, tx_chan_num(0),
-				       cpsw_tx_handler);
-	priv->rxch = cpdma_chan_create(priv->dma, rx_chan_num(0),
-				       cpsw_rx_handler);
-
-	if (WARN_ON(!priv->txch || !priv->rxch)) {
+	cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+	cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
+	if (WARN_ON(!cpsw->rxch[0] || !cpsw->txch[0])) {
 		dev_err(priv->dev, "error initializing dma channels\n");
 		ret = -ENOMEM;
 		goto clean_dma_ret;
@@ -2464,8 +2729,8 @@
 	ale_params.ale_entries		= data->ale_entries;
 	ale_params.ale_ports		= data->slaves;
 
-	priv->ale = cpsw_ale_create(&ale_params);
-	if (!priv->ale) {
+	cpsw->ale = cpsw_ale_create(&ale_params);
+	if (!cpsw->ale) {
 		dev_err(priv->dev, "error initializing ale engine\n");
 		ret = -ENODEV;
 		goto clean_dma_ret;
@@ -2482,7 +2747,7 @@
 	if (of_id) {
 		pdev->id_entry = of_id->data;
 		if (pdev->id_entry->driver_data)
-			priv->quirk_irq = true;
+			cpsw->quirk_irq = true;
 	}
 
 	/* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and
@@ -2500,9 +2765,9 @@
 		goto clean_ale_ret;
 	}
 
-	priv->irqs_table[0] = irq;
+	cpsw->irqs_table[0] = irq;
 	ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt,
-			       0, dev_name(&pdev->dev), priv);
+			       0, dev_name(&pdev->dev), cpsw);
 	if (ret < 0) {
 		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
 		goto clean_ale_ret;
@@ -2515,21 +2780,20 @@
 		goto clean_ale_ret;
 	}
 
-	priv->irqs_table[1] = irq;
+	cpsw->irqs_table[1] = irq;
 	ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt,
-			       0, dev_name(&pdev->dev), priv);
+			       0, dev_name(&pdev->dev), cpsw);
 	if (ret < 0) {
 		dev_err(priv->dev, "error attaching irq (%d)\n", ret);
 		goto clean_ale_ret;
 	}
-	priv->num_irqs = 2;
 
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
 	ndev->ethtool_ops = &cpsw_ethtool_ops;
-	netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
-	netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
+	netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
+	netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
 
 	/* register the network device */
 	SET_NETDEV_DEV(ndev, &pdev->dev);
@@ -2543,8 +2807,8 @@
 	cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n",
 		    &ss_res->start, ndev->irq);
 
-	if (priv->data.dual_emac) {
-		ret = cpsw_probe_dual_emac(pdev, priv);
+	if (cpsw->data.dual_emac) {
+		ret = cpsw_probe_dual_emac(priv);
 		if (ret) {
 			cpsw_err(priv, probe, "error probe slave 2 emac interface\n");
 			goto clean_ale_ret;
@@ -2554,9 +2818,9 @@
 	return 0;
 
 clean_ale_ret:
-	cpsw_ale_destroy(priv->ale);
+	cpsw_ale_destroy(cpsw->ale);
 clean_dma_ret:
-	cpdma_ctlr_destroy(priv->dma);
+	cpdma_ctlr_destroy(cpsw->dma);
 clean_runtime_disable_ret:
 	pm_runtime_disable(&pdev->dev);
 clean_ndev_ret:
@@ -2567,7 +2831,7 @@
 static int cpsw_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
-	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 	int ret;
 
 	ret = pm_runtime_get_sync(&pdev->dev);
@@ -2576,17 +2840,17 @@
 		return ret;
 	}
 
-	if (priv->data.dual_emac)
-		unregister_netdev(cpsw_get_slave_ndev(priv, 1));
+	if (cpsw->data.dual_emac)
+		unregister_netdev(cpsw->slaves[1].ndev);
 	unregister_netdev(ndev);
 
-	cpsw_ale_destroy(priv->ale);
-	cpdma_ctlr_destroy(priv->dma);
+	cpsw_ale_destroy(cpsw->ale);
+	cpdma_ctlr_destroy(cpsw->dma);
 	of_platform_depopulate(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	if (priv->data.dual_emac)
-		free_netdev(cpsw_get_slave_ndev(priv, 1));
+	if (cpsw->data.dual_emac)
+		free_netdev(cpsw->slaves[1].ndev);
 	free_netdev(ndev);
 	return 0;
 }
@@ -2596,14 +2860,14 @@
 {
 	struct platform_device	*pdev = to_platform_device(dev);
 	struct net_device	*ndev = platform_get_drvdata(pdev);
-	struct cpsw_priv	*priv = netdev_priv(ndev);
+	struct cpsw_common	*cpsw = ndev_to_cpsw(ndev);
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		int i;
 
-		for (i = 0; i < priv->data.slaves; i++) {
-			if (netif_running(priv->slaves[i].ndev))
-				cpsw_ndo_stop(priv->slaves[i].ndev);
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			if (netif_running(cpsw->slaves[i].ndev))
+				cpsw_ndo_stop(cpsw->slaves[i].ndev);
 		}
 	} else {
 		if (netif_running(ndev))
@@ -2611,7 +2875,7 @@
 	}
 
 	/* Select sleep pin state */
-	pinctrl_pm_select_sleep_state(&pdev->dev);
+	pinctrl_pm_select_sleep_state(dev);
 
 	return 0;
 }
@@ -2620,17 +2884,17 @@
 {
 	struct platform_device	*pdev = to_platform_device(dev);
 	struct net_device	*ndev = platform_get_drvdata(pdev);
-	struct cpsw_priv	*priv = netdev_priv(ndev);
+	struct cpsw_common	*cpsw = netdev_priv(ndev);
 
 	/* Select default pin state */
-	pinctrl_pm_select_default_state(&pdev->dev);
+	pinctrl_pm_select_default_state(dev);
 
-	if (priv->data.dual_emac) {
+	if (cpsw->data.dual_emac) {
 		int i;
 
-		for (i = 0; i < priv->data.slaves; i++) {
-			if (netif_running(priv->slaves[i].ndev))
-				cpsw_ndo_open(priv->slaves[i].ndev);
+		for (i = 0; i < cpsw->data.slaves; i++) {
+			if (netif_running(cpsw->slaves[i].ndev))
+				cpsw_ndo_open(cpsw->slaves[i].ndev);
 		}
 	} else {
 		if (netif_running(ndev))
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 19e5f32..c3f35f1 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -86,7 +86,7 @@
 	void __iomem		*iomap;		/* ioremap map */
 	void			*cpumap;	/* dma_alloc map */
 	int			desc_size, mem_size;
-	int			num_desc, used_desc;
+	int			num_desc;
 	struct device		*dev;
 	struct gen_pool		*gen_pool;
 };
@@ -104,6 +104,7 @@
 	struct cpdma_desc_pool	*pool;
 	spinlock_t		lock;
 	struct cpdma_chan	*channels[2 * CPDMA_MAX_CHANNELS];
+	int chan_num;
 };
 
 struct cpdma_chan {
@@ -123,6 +124,13 @@
 	int	int_set, int_clear, td;
 };
 
+#define tx_chan_num(chan)	(chan)
+#define rx_chan_num(chan)	((chan) + CPDMA_MAX_CHANNELS)
+#define is_rx_chan(chan)	((chan)->chan_num >= CPDMA_MAX_CHANNELS)
+#define is_tx_chan(chan)	(!is_rx_chan(chan))
+#define __chan_linear(chan_num)	((chan_num) & (CPDMA_MAX_CHANNELS - 1))
+#define chan_linear(chan)	__chan_linear((chan)->chan_num)
+
 /* The following make access to common cpdma_ctlr params more readable */
 #define dmaregs		params.dmaregs
 #define num_chan	params.num_chan
@@ -148,7 +156,10 @@
 	if (!pool)
 		return;
 
-	WARN_ON(pool->used_desc);
+	WARN(gen_pool_size(pool->gen_pool) != gen_pool_avail(pool->gen_pool),
+	     "cpdma_desc_pool size %d != avail %d",
+	     gen_pool_size(pool->gen_pool),
+	     gen_pool_avail(pool->gen_pool));
 	if (pool->cpumap)
 		dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap,
 				  pool->phys);
@@ -232,21 +243,14 @@
 static struct cpdma_desc __iomem *
 cpdma_desc_alloc(struct cpdma_desc_pool *pool)
 {
-	struct cpdma_desc __iomem *desc = NULL;
-
-	desc = (struct cpdma_desc __iomem *)gen_pool_alloc(pool->gen_pool,
-							   pool->desc_size);
-	if (desc)
-		pool->used_desc++;
-
-	return desc;
+	return (struct cpdma_desc __iomem *)
+		gen_pool_alloc(pool->gen_pool, pool->desc_size);
 }
 
 static void cpdma_desc_free(struct cpdma_desc_pool *pool,
 			    struct cpdma_desc __iomem *desc, int num_desc)
 {
 	gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size);
-	pool->used_desc--;
 }
 
 struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params)
@@ -260,6 +264,7 @@
 	ctlr->state = CPDMA_STATE_IDLE;
 	ctlr->params = *params;
 	ctlr->dev = params->dev;
+	ctlr->chan_num = 0;
 	spin_lock_init(&ctlr->lock);
 
 	ctlr->pool = cpdma_desc_pool_create(ctlr->dev,
@@ -336,12 +341,14 @@
 	}
 
 	ctlr->state = CPDMA_STATE_TEARDOWN;
+	spin_unlock_irqrestore(&ctlr->lock, flags);
 
 	for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
 		if (ctlr->channels[i])
 			cpdma_chan_stop(ctlr->channels[i]);
 	}
 
+	spin_lock_irqsave(&ctlr->lock, flags);
 	dma_reg_write(ctlr, CPDMA_RXINTMASKCLEAR, 0xffffffff);
 	dma_reg_write(ctlr, CPDMA_TXINTMASKCLEAR, 0xffffffff);
 
@@ -403,13 +410,52 @@
 }
 EXPORT_SYMBOL_GPL(cpdma_ctlr_eoi);
 
-struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
-				     cpdma_handler_fn handler)
+u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr)
 {
+	return dma_reg_read(ctlr, CPDMA_RXINTSTATMASKED);
+}
+EXPORT_SYMBOL_GPL(cpdma_ctrl_rxchs_state);
+
+u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr)
+{
+	return dma_reg_read(ctlr, CPDMA_TXINTSTATMASKED);
+}
+EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state);
+
+/**
+ * cpdma_chan_split_pool - Splits ctrl pool between all channels.
+ * Has to be called under ctlr lock
+ */
+static void cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+{
+	struct cpdma_desc_pool *pool = ctlr->pool;
 	struct cpdma_chan *chan;
-	int offset = (chan_num % CPDMA_MAX_CHANNELS) * 4;
+	int ch_desc_num;
+	int i;
+
+	if (!ctlr->chan_num)
+		return;
+
+	/* calculate average size of pool slice */
+	ch_desc_num = pool->num_desc / ctlr->chan_num;
+
+	/* split ctlr pool */
+	for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
+		chan = ctlr->channels[i];
+		if (chan)
+			chan->desc_num = ch_desc_num;
+	}
+}
+
+struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
+				     cpdma_handler_fn handler, int rx_type)
+{
+	int offset = chan_num * 4;
+	struct cpdma_chan *chan;
 	unsigned long flags;
 
+	chan_num = rx_type ? rx_chan_num(chan_num) : tx_chan_num(chan_num);
+
 	if (__chan_linear(chan_num) >= ctlr->num_chan)
 		return NULL;
 
@@ -451,14 +497,25 @@
 	spin_lock_init(&chan->lock);
 
 	ctlr->channels[chan_num] = chan;
+	ctlr->chan_num++;
+
+	cpdma_chan_split_pool(ctlr);
+
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return chan;
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_create);
 
-int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr)
+int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan)
 {
-	return ctlr->pool->num_desc / 2;
+	unsigned long flags;
+	int desc_num;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	desc_num = chan->desc_num;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return desc_num;
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_get_rx_buf_num);
 
@@ -475,6 +532,10 @@
 	if (chan->state != CPDMA_STATE_IDLE)
 		cpdma_chan_stop(chan);
 	ctlr->channels[chan->chan_num] = NULL;
+	ctlr->chan_num--;
+
+	cpdma_chan_split_pool(ctlr);
+
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return 0;
 }
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index 4b46cd6..a07b22b 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -17,13 +17,6 @@
 
 #define CPDMA_MAX_CHANNELS	BITS_PER_LONG
 
-#define tx_chan_num(chan)	(chan)
-#define rx_chan_num(chan)	((chan) + CPDMA_MAX_CHANNELS)
-#define is_rx_chan(chan)	((chan)->chan_num >= CPDMA_MAX_CHANNELS)
-#define is_tx_chan(chan)	(!is_rx_chan(chan))
-#define __chan_linear(chan_num)	((chan_num) & (CPDMA_MAX_CHANNELS - 1))
-#define chan_linear(chan)	__chan_linear((chan)->chan_num)
-
 #define CPDMA_RX_SOURCE_PORT(__status__)	((__status__ >> 16) & 0x7)
 
 #define CPDMA_EOI_RX_THRESH	0x0
@@ -79,8 +72,8 @@
 int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr);
 
 struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
-				     cpdma_handler_fn handler);
-int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr);
+				     cpdma_handler_fn handler, int rx_type);
+int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan);
 int cpdma_chan_destroy(struct cpdma_chan *chan);
 int cpdma_chan_start(struct cpdma_chan *chan);
 int cpdma_chan_stop(struct cpdma_chan *chan);
@@ -94,6 +87,8 @@
 int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable);
 void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value);
 int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable);
+u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr);
+u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr);
 bool cpdma_check_free_tx_desc(struct cpdma_chan *chan);
 
 enum cpdma_control {
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 727a79f..2fd94a5 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -597,14 +597,14 @@
 }
 
 /**
- * hash_add - Hash function to add mac addr from hash table
+ * emac_hash_add - Hash function to add mac addr from hash table
  * @priv: The DaVinci EMAC private adapter structure
  * @mac_addr: mac address to delete from hash table
  *
  * Adds mac address to the internal hash table
  *
  */
-static int hash_add(struct emac_priv *priv, u8 *mac_addr)
+static int emac_hash_add(struct emac_priv *priv, u8 *mac_addr)
 {
 	struct device *emac_dev = &priv->ndev->dev;
 	u32 rc = 0;
@@ -613,7 +613,7 @@
 
 	if (hash_value >= EMAC_NUM_MULTICAST_BITS) {
 		if (netif_msg_drv(priv)) {
-			dev_err(emac_dev, "DaVinci EMAC: hash_add(): Invalid "\
+			dev_err(emac_dev, "DaVinci EMAC: emac_hash_add(): Invalid "\
 				"Hash %08x, should not be greater than %08x",
 				hash_value, (EMAC_NUM_MULTICAST_BITS - 1));
 		}
@@ -639,14 +639,14 @@
 }
 
 /**
- * hash_del - Hash function to delete mac addr from hash table
+ * emac_hash_del - Hash function to delete mac addr from hash table
  * @priv: The DaVinci EMAC private adapter structure
  * @mac_addr: mac address to delete from hash table
  *
  * Removes mac address from the internal hash table
  *
  */
-static int hash_del(struct emac_priv *priv, u8 *mac_addr)
+static int emac_hash_del(struct emac_priv *priv, u8 *mac_addr)
 {
 	u32 hash_value;
 	u32 hash_bit;
@@ -696,10 +696,10 @@
 
 	switch (action) {
 	case EMAC_MULTICAST_ADD:
-		update = hash_add(priv, mac_addr);
+		update = emac_hash_add(priv, mac_addr);
 		break;
 	case EMAC_MULTICAST_DEL:
-		update = hash_del(priv, mac_addr);
+		update = emac_hash_del(priv, mac_addr);
 		break;
 	case EMAC_ALL_MULTI_SET:
 		update = 1;
@@ -1870,10 +1870,10 @@
 		goto no_pdata;
 	}
 
-	priv->txchan = cpdma_chan_create(priv->dma, tx_chan_num(EMAC_DEF_TX_CH),
-				       emac_tx_handler);
-	priv->rxchan = cpdma_chan_create(priv->dma, rx_chan_num(EMAC_DEF_RX_CH),
-				       emac_rx_handler);
+	priv->txchan = cpdma_chan_create(priv->dma, EMAC_DEF_TX_CH,
+					 emac_tx_handler, 0);
+	priv->rxchan = cpdma_chan_create(priv->dma, EMAC_DEF_RX_CH,
+					 emac_rx_handler, 1);
 	if (WARN_ON(!priv->txchan || !priv->rxchan)) {
 		rc = -ENOMEM;
 		goto no_cpdma_chan;
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 79f0ec4..bc258d7 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1791,7 +1791,7 @@
 	gelic_card_free_chain(card, card->tx_chain.head);
 fail_alloc_tx:
 	free_irq(card->irq, card);
-	netdev->irq = NO_IRQ;
+	netdev->irq = 0;
 fail_request_irq:
 	ps3_sb_event_receive_port_destroy(dev, card->irq);
 fail_alloc_irq:
@@ -1843,7 +1843,7 @@
 	netdev0 = card->netdev[GELIC_PORT_ETHERNET_0];
 	/* disconnect event port */
 	free_irq(card->irq, card);
-	netdev0->irq = NO_IRQ;
+	netdev0->irq = 0;
 	ps3_sb_event_receive_port_destroy(card->dev, card->irq);
 
 	wait_event(card->waitq,
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index 01a7714..8fd1312 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -166,6 +166,7 @@
 
 static void tsi108_timed_checker(unsigned long dev_ptr);
 
+#ifdef DEBUG
 static void dump_eth_one(struct net_device *dev)
 {
 	struct tsi108_prv_data *data = netdev_priv(dev);
@@ -190,6 +191,7 @@
 	       TSI_READ(TSI108_EC_RXESTAT),
 	       TSI_READ(TSI108_EC_RXERR), data->rxpending);
 }
+#endif
 
 /* Synchronization is needed between the thread and up/down events.
  * Note that the PHY is accessed through the same registers for both
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index f38696c..908e72e 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -1724,24 +1724,21 @@
 		struct velocity_td_info *tdinfo, struct tx_desc *td)
 {
 	struct sk_buff *skb = tdinfo->skb;
+	int i;
 
 	/*
 	 *	Don't unmap the pre-allocated tx_bufs
 	 */
-	if (tdinfo->skb_dma) {
-		int i;
+	for (i = 0; i < tdinfo->nskb_dma; i++) {
+		size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
 
-		for (i = 0; i < tdinfo->nskb_dma; i++) {
-			size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN);
+		/* For scatter-gather */
+		if (skb_shinfo(skb)->nr_frags > 0)
+			pktlen = max_t(size_t, pktlen,
+				       td->td_buf[i].size & ~TD_QUEUE);
 
-			/* For scatter-gather */
-			if (skb_shinfo(skb)->nr_frags > 0)
-				pktlen = max_t(size_t, pktlen,
-						td->td_buf[i].size & ~TD_QUEUE);
-
-			dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
-					le16_to_cpu(pktlen), DMA_TO_DEVICE);
-		}
+		dma_unmap_single(vptr->dev, tdinfo->skb_dma[i],
+				 le16_to_cpu(pktlen), DMA_TO_DEVICE);
 	}
 	dev_kfree_skb_irq(skb);
 	tdinfo->skb = NULL;
diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig
index 4f5c024..6d68c8a 100644
--- a/drivers/net/ethernet/xilinx/Kconfig
+++ b/drivers/net/ethernet/xilinx/Kconfig
@@ -5,7 +5,7 @@
 config NET_VENDOR_XILINX
 	bool "Xilinx devices"
 	default y
-	depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ
+	depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -18,7 +18,7 @@
 
 config XILINX_EMACLITE
 	tristate "Xilinx 10/100 Ethernet Lite support"
-	depends on (PPC32 || MICROBLAZE || ARCH_ZYNQ)
+	depends on PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS
 	select PHYLIB
 	---help---
 	  This driver supports the 10/100 Ethernet Lite from Xilinx.
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 36ee7ab..69e2a83 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1297,7 +1297,7 @@
 	return 0;
 }
 
-static struct ethtool_ops axienet_ethtool_ops = {
+static const struct ethtool_ops axienet_ethtool_ops = {
 	.get_drvinfo    = axienet_ethtools_get_drvinfo,
 	.get_regs_len   = axienet_ethtools_get_regs_len,
 	.get_regs       = axienet_ethtools_get_regs,
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 3cee84a..93dc10b 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1131,11 +1131,13 @@
 	lp->rx_ping_pong = get_bool(ofdev, "xlnx,rx-ping-pong");
 	mac_address = of_get_mac_address(ofdev->dev.of_node);
 
-	if (mac_address)
+	if (mac_address) {
 		/* Set the MAC address. */
 		memcpy(ndev->dev_addr, mac_address, ETH_ALEN);
-	else
-		dev_warn(dev, "No MAC address found\n");
+	} else {
+		dev_warn(dev, "No MAC address found, using random\n");
+		eth_hw_addr_random(ndev);
+	}
 
 	/* Clear the Tx CSR's in case this is a restart */
 	__raw_writel(0, lp->base_addr + XEL_TSR_OFFSET);
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index 9006877..e46b1eb 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -97,7 +97,6 @@
 static struct platform_driver fjes_driver = {
 	.driver = {
 		.name = DRV_NAME,
-		.owner = THIS_MODULE,
 	},
 	.probe = fjes_probe,
 	.remove = fjes_remove,
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 5a1e985..470b3dc 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -127,7 +127,7 @@
 
 #define AX25_6PACK_HEADER_LEN 0
 
-static void sixpack_decode(struct sixpack *, unsigned char[], int);
+static void sixpack_decode(struct sixpack *, const unsigned char[], int);
 static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char);
 
 /*
@@ -428,7 +428,7 @@
 
 /*
  * Handle the 'receiver data ready' interrupt.
- * This function is called by the 'tty_io' module in the kernel when
+ * This function is called by the tty module in the kernel when
  * a block of 6pack data has been received, which can now be decapsulated
  * and sent on to some IP layer for further processing.
  */
@@ -436,7 +436,6 @@
 	const unsigned char *cp, char *fp, int count)
 {
 	struct sixpack *sp;
-	unsigned char buf[512];
 	int count1;
 
 	if (!count)
@@ -446,10 +445,7 @@
 	if (!sp)
 		return;
 
-	memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf));
-
 	/* Read the characters out of the buffer */
-
 	count1 = count;
 	while (count) {
 		count--;
@@ -459,7 +455,7 @@
 			continue;
 		}
 	}
-	sixpack_decode(sp, buf, count1);
+	sixpack_decode(sp, cp, count1);
 
 	sp_put(sp);
 	tty_unthrottle(tty);
@@ -992,7 +988,7 @@
 /* decode a 6pack packet */
 
 static void
-sixpack_decode(struct sixpack *sp, unsigned char *pre_rbuff, int count)
+sixpack_decode(struct sixpack *sp, const unsigned char *pre_rbuff, int count)
 {
 	unsigned char inbyte;
 	int count1;
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index d95a50a..622ab3a 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -484,7 +484,7 @@
 	dev->flags      = 0;
 	dev->features	= NETIF_F_LLTX;	/* Allow recursion */
 
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
 	dev->header_ops      = &ax25_header_ops;
 #endif
 
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 467fb8b..f4fbcb5 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -84,8 +84,6 @@
 #define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2   40
 
 #define ITAB_NUM 128
-#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2
-extern u8 netvsc_hash_key[];
 
 struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
 	struct ndis_obj_header hdr;
@@ -175,7 +173,7 @@
 struct rndis_message;
 struct netvsc_device;
 int netvsc_device_add(struct hv_device *device, void *additional_info);
-int netvsc_device_remove(struct hv_device *device);
+void netvsc_device_remove(struct hv_device *device);
 int netvsc_send(struct hv_device *device,
 		struct hv_netvsc_packet *packet,
 		struct rndis_message *rndis_msg,
@@ -435,7 +433,7 @@
  */
 struct nvsp_1_message_send_rndis_packet {
 	/*
-	 * This field is specified by RNIDS. They assume there's two different
+	 * This field is specified by RNDIS. They assume there's two different
 	 * channels of communication. However, the Network VSP only has one.
 	 * Therefore, the channel travels with the RNDIS packet.
 	 */
@@ -490,6 +488,7 @@
 			u64 sriov:1;
 			u64 ieee8021q:1;
 			u64 correlation_id:1;
+			u64 teaming:1;
 		};
 	};
 } __packed;
@@ -579,7 +578,7 @@
 	/* The number of entries in the send indirection table */
 	u32 count;
 
-	/* The offset of the send indireciton table from top of this struct.
+	/* The offset of the send indirection table from top of this struct.
 	 * The send indirection table tells which channel to put the send
 	 * traffic on. Each entry is a channel number.
 	 */
@@ -633,23 +632,41 @@
 	u32 count; /* counter of batched packets */
 };
 
+struct recv_comp_data {
+	u64 tid; /* transaction id */
+	u32 status;
+};
+
+/* Netvsc Receive Slots Max */
+#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
+
+struct multi_recv_comp {
+	void *buf; /* queued receive completions */
+	u32 first; /* first data entry */
+	u32 next; /* next entry for writing */
+};
+
 struct netvsc_stats {
 	u64 packets;
 	u64 bytes;
+	u64 broadcast;
+	u64 multicast;
 	struct u64_stats_sync syncp;
 };
 
+struct netvsc_ethtool_stats {
+	unsigned long tx_scattered;
+	unsigned long tx_no_memory;
+	unsigned long tx_no_space;
+	unsigned long tx_too_big;
+	unsigned long tx_busy;
+};
+
 struct netvsc_reconfig {
 	struct list_head list;
 	u32 event;
 };
 
-struct garp_wrk {
-	struct work_struct dwrk;
-	struct net_device *netdev;
-	struct netvsc_device *netvsc_dev;
-};
-
 /* The context of the netvsc device  */
 struct net_device_context {
 	/* point back to our device context */
@@ -667,7 +684,6 @@
 
 	struct work_struct work;
 	u32 msg_enable; /* debug level */
-	struct garp_wrk gwrk;
 
 	struct netvsc_stats __percpu *tx_stats;
 	struct netvsc_stats __percpu *rx_stats;
@@ -675,9 +691,18 @@
 	/* Ethtool settings */
 	u8 duplex;
 	u32 speed;
+	struct netvsc_ethtool_stats eth_stats;
 
 	/* the device is going away */
 	bool start_remove;
+
+	/* State to manage the associated VF interface. */
+	struct net_device __rcu *vf_netdev;
+
+	/* 1: allocated, serial number is valid. 0: not allocated */
+	u32 vf_alloc;
+	/* Serial number of the VF to team with */
+	u32 vf_serial;
 };
 
 /* Per netvsc device */
@@ -709,7 +734,6 @@
 	struct nvsp_message channel_init_pkt;
 
 	struct nvsp_message revoke_packet;
-	/* unsigned char HwMacAddr[HW_MACADDR_LEN]; */
 
 	struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX];
 	u32 send_table[VRSS_SEND_TAB_SIZE];
@@ -733,15 +757,10 @@
 	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
 	u32 pkt_align; /* alignment bytes, e.g. 8 */
 
-	/* 1: allocated, serial number is valid. 0: not allocated */
-	u32 vf_alloc;
-	/* Serial number of the VF to team with */
-	u32 vf_serial;
+	struct multi_recv_comp mrc[VRSS_CHANNEL_MAX];
+	atomic_t num_outstanding_recvs;
+
 	atomic_t open_cnt;
-	/* State to manage the associated VF interface. */
-	bool vf_inject;
-	struct net_device *vf_netdev;
-	atomic_t vf_use_cnt;
 };
 
 static inline struct netvsc_device *
@@ -1219,7 +1238,7 @@
 	u32 ndis_msg_type;
 
 	/* Total length of this message, from the beginning */
-	/* of the sruct rndis_message, in bytes. */
+	/* of the struct rndis_message, in bytes. */
 	u32 msg_len;
 
 	/* Actual message */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 20e0917..720b5fa 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -59,7 +59,6 @@
 			       VM_PKT_DATA_INBAND, 0);
 }
 
-
 static struct netvsc_device *alloc_net_device(void)
 {
 	struct netvsc_device *net_device;
@@ -74,21 +73,26 @@
 		return NULL;
 	}
 
+	net_device->mrc[0].buf = vzalloc(NETVSC_RECVSLOT_MAX *
+					 sizeof(struct recv_comp_data));
+
 	init_waitqueue_head(&net_device->wait_drain);
 	net_device->destroy = false;
 	atomic_set(&net_device->open_cnt, 0);
-	atomic_set(&net_device->vf_use_cnt, 0);
 	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
 	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
-
-	net_device->vf_netdev = NULL;
-	net_device->vf_inject = false;
+	init_completion(&net_device->channel_init_wait);
 
 	return net_device;
 }
 
 static void free_netvsc_device(struct netvsc_device *nvdev)
 {
+	int i;
+
+	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
+		vfree(nvdev->mrc[i].buf);
+
 	kfree(nvdev->cb_buffer);
 	kfree(nvdev);
 }
@@ -111,20 +115,20 @@
 		goto get_in_err;
 
 	if (net_device->destroy &&
-		atomic_read(&net_device->num_outstanding_sends) == 0)
+	    atomic_read(&net_device->num_outstanding_sends) == 0 &&
+	    atomic_read(&net_device->num_outstanding_recvs) == 0)
 		net_device = NULL;
 
 get_in_err:
 	return net_device;
 }
 
-
-static int netvsc_destroy_buf(struct hv_device *device)
+static void netvsc_destroy_buf(struct hv_device *device)
 {
 	struct nvsp_message *revoke_packet;
-	int ret = 0;
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+	int ret;
 
 	/*
 	 * If we got a section count, it means we received a
@@ -154,7 +158,7 @@
 		if (ret != 0) {
 			netdev_err(ndev, "unable to send "
 				"revoke receive buffer to netvsp\n");
-			return ret;
+			return;
 		}
 	}
 
@@ -169,7 +173,7 @@
 		if (ret != 0) {
 			netdev_err(ndev,
 				   "unable to teardown receive buffer's gpadl\n");
-			return ret;
+			return;
 		}
 		net_device->recv_buf_gpadl_handle = 0;
 	}
@@ -213,7 +217,7 @@
 		if (ret != 0) {
 			netdev_err(ndev, "unable to send "
 				   "revoke send buffer to netvsp\n");
-			return ret;
+			return;
 		}
 	}
 	/* Teardown the gpadl on the vsp end */
@@ -227,7 +231,7 @@
 		if (ret != 0) {
 			netdev_err(ndev,
 				   "unable to teardown send buffer's gpadl\n");
-			return ret;
+			return;
 		}
 		net_device->send_buf_gpadl_handle = 0;
 	}
@@ -237,8 +241,6 @@
 		net_device->send_buf = NULL;
 	}
 	kfree(net_device->send_section_map);
-
-	return ret;
 }
 
 static int netvsc_init_buf(struct hv_device *device)
@@ -280,7 +282,6 @@
 		goto cleanup;
 	}
 
-
 	/* Notify the NetVsp of the gpadl handle */
 	init_packet = &net_device->channel_init_pkt;
 
@@ -407,7 +408,7 @@
 	/* Section count is simply the size divided by the section size.
 	 */
 	net_device->send_section_cnt =
-		net_device->send_buf_size/net_device->send_section_size;
+		net_device->send_buf_size / net_device->send_section_size;
 
 	dev_info(&device->device, "Send section size: %d, Section count:%d\n",
 		 net_device->send_section_size, net_device->send_section_cnt);
@@ -416,8 +417,8 @@
 	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
 					     BITS_PER_LONG);
 
-	net_device->send_section_map =
-		kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
+	net_device->send_section_map = kcalloc(net_device->map_words,
+					       sizeof(ulong), GFP_KERNEL);
 	if (net_device->send_section_map == NULL) {
 		ret = -ENOMEM;
 		goto cleanup;
@@ -432,7 +433,6 @@
 	return ret;
 }
 
-
 /* Negotiate NVSP protocol version */
 static int negotiate_nvsp_ver(struct hv_device *device,
 			      struct netvsc_device *net_device,
@@ -472,9 +472,13 @@
 	init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN;
 	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
 
-	if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5)
+	if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) {
 		init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1;
 
+		/* Teaming bit is needed to receive link speed updates */
+		init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1;
+	}
+
 	ret = vmbus_sendpacket(device->channel, init_packet,
 				sizeof(struct nvsp_message),
 				(unsigned long)init_packet,
@@ -489,9 +493,10 @@
 	struct netvsc_device *net_device;
 	struct nvsp_message *init_packet;
 	int ndis_version;
-	u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
+	const u32 ver_list[] = {
+		NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
 		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
-	int i, num_ver = 4; /* number of different NVSP versions */
+	int i;
 
 	net_device = get_outbound_net_device(device);
 	if (!net_device)
@@ -500,7 +505,7 @@
 	init_packet = &net_device->channel_init_pkt;
 
 	/* Negotiate the latest NVSP protocol supported */
-	for (i = num_ver - 1; i >= 0; i--)
+	for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--)
 		if (negotiate_nvsp_ver(device, net_device, init_packet,
 				       ver_list[i])  == 0) {
 			net_device->nvsp_version = ver_list[i];
@@ -559,7 +564,7 @@
 /*
  * netvsc_device_remove - Callback when the root bus device is removed
  */
-int netvsc_device_remove(struct hv_device *device)
+void netvsc_device_remove(struct hv_device *device)
 {
 	struct net_device *ndev = hv_get_drvdata(device);
 	struct net_device_context *net_device_ctx = netdev_priv(ndev);
@@ -581,10 +586,8 @@
 	/* Release all resources */
 	vfree(net_device->sub_cb_buf);
 	free_netvsc_device(net_device);
-	return 0;
 }
 
-
 #define RING_AVAIL_PERCENT_HIWATER 20
 #define RING_AVAIL_PERCENT_LOWATER 10
 
@@ -608,72 +611,79 @@
 	sync_change_bit(index, net_device->send_section_map);
 }
 
+static void netvsc_send_tx_complete(struct netvsc_device *net_device,
+				    struct vmbus_channel *incoming_channel,
+				    struct hv_device *device,
+				    struct vmpacket_descriptor *packet)
+{
+	struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id;
+	struct net_device *ndev = hv_get_drvdata(device);
+	struct net_device_context *net_device_ctx = netdev_priv(ndev);
+	struct vmbus_channel *channel = device->channel;
+	int num_outstanding_sends;
+	u16 q_idx = 0;
+	int queue_sends;
+
+	/* Notify the layer above us */
+	if (likely(skb)) {
+		struct hv_netvsc_packet *nvsc_packet
+			= (struct hv_netvsc_packet *)skb->cb;
+		u32 send_index = nvsc_packet->send_buf_index;
+
+		if (send_index != NETVSC_INVALID_INDEX)
+			netvsc_free_send_slot(net_device, send_index);
+		q_idx = nvsc_packet->q_idx;
+		channel = incoming_channel;
+
+		dev_consume_skb_any(skb);
+	}
+
+	num_outstanding_sends =
+		atomic_dec_return(&net_device->num_outstanding_sends);
+	queue_sends = atomic_dec_return(&net_device->queue_sends[q_idx]);
+
+	if (net_device->destroy && num_outstanding_sends == 0)
+		wake_up(&net_device->wait_drain);
+
+	if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
+	    !net_device_ctx->start_remove &&
+	    (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
+	     queue_sends < 1))
+		netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
+}
+
 static void netvsc_send_completion(struct netvsc_device *net_device,
 				   struct vmbus_channel *incoming_channel,
 				   struct hv_device *device,
 				   struct vmpacket_descriptor *packet)
 {
 	struct nvsp_message *nvsp_packet;
-	struct hv_netvsc_packet *nvsc_packet;
 	struct net_device *ndev = hv_get_drvdata(device);
-	struct net_device_context *net_device_ctx = netdev_priv(ndev);
-	u32 send_index;
-	struct sk_buff *skb;
 
 	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
-			(packet->offset8 << 3));
+					      (packet->offset8 << 3));
 
-	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
-	    (nvsp_packet->hdr.msg_type ==
-	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
-	    (nvsp_packet->hdr.msg_type ==
-	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
-	    (nvsp_packet->hdr.msg_type ==
-	     NVSP_MSG5_TYPE_SUBCHANNEL)) {
+	switch (nvsp_packet->hdr.msg_type) {
+	case NVSP_MSG_TYPE_INIT_COMPLETE:
+	case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
+	case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE:
+	case NVSP_MSG5_TYPE_SUBCHANNEL:
 		/* Copy the response back */
 		memcpy(&net_device->channel_init_pkt, nvsp_packet,
 		       sizeof(struct nvsp_message));
 		complete(&net_device->channel_init_wait);
-	} else if (nvsp_packet->hdr.msg_type ==
-		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
-		int num_outstanding_sends;
-		u16 q_idx = 0;
-		struct vmbus_channel *channel = device->channel;
-		int queue_sends;
+		break;
 
-		/* Get the send context */
-		skb = (struct sk_buff *)(unsigned long)packet->trans_id;
+	case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
+		netvsc_send_tx_complete(net_device, incoming_channel,
+					device, packet);
+		break;
 
-		/* Notify the layer above us */
-		if (skb) {
-			nvsc_packet = (struct hv_netvsc_packet *) skb->cb;
-			send_index = nvsc_packet->send_buf_index;
-			if (send_index != NETVSC_INVALID_INDEX)
-				netvsc_free_send_slot(net_device, send_index);
-			q_idx = nvsc_packet->q_idx;
-			channel = incoming_channel;
-			dev_kfree_skb_any(skb);
-		}
-
-		num_outstanding_sends =
-			atomic_dec_return(&net_device->num_outstanding_sends);
-		queue_sends = atomic_dec_return(&net_device->
-						queue_sends[q_idx]);
-
-		if (net_device->destroy && num_outstanding_sends == 0)
-			wake_up(&net_device->wait_drain);
-
-		if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
-		    !net_device_ctx->start_remove &&
-		    (hv_ringbuf_avail_percent(&channel->outbound) >
-		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
-				netif_tx_wake_queue(netdev_get_tx_queue(
-						    ndev, q_idx));
-	} else {
-		netdev_err(ndev, "Unknown send completion packet type- "
-			   "%d received!!\n", nvsp_packet->hdr.msg_type);
+	default:
+		netdev_err(ndev,
+			   "Unknown send completion type %d received!!\n",
+			   nvsp_packet->hdr.msg_type);
 	}
-
 }
 
 static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
@@ -863,7 +873,7 @@
 		struct sk_buff *skb)
 {
 	struct netvsc_device *net_device;
-	int ret = 0, m_ret = 0;
+	int ret = 0;
 	struct vmbus_channel *out_channel;
 	u16 q_idx = packet->q_idx;
 	u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@ -934,7 +944,7 @@
 		}
 
 		if (msdp->skb)
-			dev_kfree_skb_any(msdp->skb);
+			dev_consume_skb_any(msdp->skb);
 
 		if (xmit_more && !packet->cp_partial) {
 			msdp->skb = skb;
@@ -952,8 +962,8 @@
 	}
 
 	if (msd_send) {
-		m_ret = netvsc_send_pkt(device, msd_send, net_device,
-					NULL, msd_skb);
+		int m_ret = netvsc_send_pkt(device, msd_send, net_device,
+					    NULL, msd_skb);
 
 		if (m_ret != 0) {
 			netvsc_free_send_slot(net_device,
@@ -972,49 +982,121 @@
 	return ret;
 }
 
-static void netvsc_send_recv_completion(struct hv_device *device,
-					struct vmbus_channel *channel,
-					struct netvsc_device *net_device,
-					u64 transaction_id, u32 status)
+static int netvsc_send_recv_completion(struct vmbus_channel *channel,
+				       u64 transaction_id, u32 status)
 {
 	struct nvsp_message recvcompMessage;
-	int retries = 0;
 	int ret;
-	struct net_device *ndev = hv_get_drvdata(device);
 
 	recvcompMessage.hdr.msg_type =
 				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
 
 	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
 
-retry_send_cmplt:
 	/* Send the completion */
 	ret = vmbus_sendpacket(channel, &recvcompMessage,
-			       sizeof(struct nvsp_message), transaction_id,
-			       VM_PKT_COMP, 0);
-	if (ret == 0) {
-		/* success */
-		/* no-op */
-	} else if (ret == -EAGAIN) {
-		/* no more room...wait a bit and attempt to retry 3 times */
-		retries++;
-		netdev_err(ndev, "unable to send receive completion pkt"
-			" (tid %llx)...retrying %d\n", transaction_id, retries);
+			       sizeof(struct nvsp_message_header) + sizeof(u32),
+			       transaction_id, VM_PKT_COMP, 0);
 
-		if (retries < 4) {
-			udelay(100);
-			goto retry_send_cmplt;
-		} else {
-			netdev_err(ndev, "unable to send receive "
-				"completion pkt (tid %llx)...give up retrying\n",
-				transaction_id);
-		}
-	} else {
-		netdev_err(ndev, "unable to send receive "
-			"completion pkt - %llx\n", transaction_id);
+	return ret;
+}
+
+static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
+					u32 *filled, u32 *avail)
+{
+	u32 first = nvdev->mrc[q_idx].first;
+	u32 next = nvdev->mrc[q_idx].next;
+
+	*filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
+		  next - first;
+
+	*avail = NETVSC_RECVSLOT_MAX - *filled - 1;
+}
+
+/* Read the first filled slot, no change to index */
+static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
+							 *nvdev, u16 q_idx)
+{
+	u32 filled, avail;
+
+	if (!nvdev->mrc[q_idx].buf)
+		return NULL;
+
+	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
+	if (!filled)
+		return NULL;
+
+	return nvdev->mrc[q_idx].buf + nvdev->mrc[q_idx].first *
+	       sizeof(struct recv_comp_data);
+}
+
+/* Put the first filled slot back to available pool */
+static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
+{
+	int num_recv;
+
+	nvdev->mrc[q_idx].first = (nvdev->mrc[q_idx].first + 1) %
+				  NETVSC_RECVSLOT_MAX;
+
+	num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+
+	if (nvdev->destroy && num_recv == 0)
+		wake_up(&nvdev->wait_drain);
+}
+
+/* Check and send pending recv completions */
+static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
+				 struct vmbus_channel *channel, u16 q_idx)
+{
+	struct recv_comp_data *rcd;
+	int ret;
+
+	while (true) {
+		rcd = read_recv_comp_slot(nvdev, q_idx);
+		if (!rcd)
+			break;
+
+		ret = netvsc_send_recv_completion(channel, rcd->tid,
+						  rcd->status);
+		if (ret)
+			break;
+
+		put_recv_comp_slot(nvdev, q_idx);
 	}
 }
 
+#define NETVSC_RCD_WATERMARK 80
+
+/* Get next available slot */
+static inline struct recv_comp_data *get_recv_comp_slot(
+	struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
+{
+	u32 filled, avail, next;
+	struct recv_comp_data *rcd;
+
+	if (!nvdev->recv_section)
+		return NULL;
+
+	if (!nvdev->mrc[q_idx].buf)
+		return NULL;
+
+	if (atomic_read(&nvdev->num_outstanding_recvs) >
+	    nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
+		netvsc_chk_recv_comp(nvdev, channel, q_idx);
+
+	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
+	if (!avail)
+		return NULL;
+
+	next = nvdev->mrc[q_idx].next;
+	rcd = nvdev->mrc[q_idx].buf + next * sizeof(struct recv_comp_data);
+	nvdev->mrc[q_idx].next = (next + 1) % NETVSC_RECVSLOT_MAX;
+
+	atomic_inc(&nvdev->num_outstanding_recvs);
+
+	return rcd;
+}
+
 static void netvsc_receive(struct netvsc_device *net_device,
 			struct vmbus_channel *channel,
 			struct hv_device *device,
@@ -1029,6 +1111,9 @@
 	int count = 0;
 	struct net_device *ndev = hv_get_drvdata(device);
 	void *data;
+	int ret;
+	struct recv_comp_data *rcd;
+	u16 q_idx = channel->offermsg.offer.sub_channel_index;
 
 	/*
 	 * All inbound packets other than send completion should be xfer page
@@ -1073,13 +1158,29 @@
 		/* Pass it to the upper layer */
 		status = rndis_filter_receive(device, netvsc_packet, &data,
 					      channel);
-
 	}
 
-	netvsc_send_recv_completion(device, channel, net_device,
-				    vmxferpage_packet->d.trans_id, status);
-}
+	if (!net_device->mrc[q_idx].buf) {
+		ret = netvsc_send_recv_completion(channel,
+						  vmxferpage_packet->d.trans_id,
+						  status);
+		if (ret)
+			netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
+				   q_idx, vmxferpage_packet->d.trans_id, ret);
+		return;
+	}
 
+	rcd = get_recv_comp_slot(net_device, channel, q_idx);
+
+	if (!rcd) {
+		netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+			   q_idx, vmxferpage_packet->d.trans_id);
+		return;
+	}
+
+	rcd->tid = vmxferpage_packet->d.trans_id;
+	rcd->status = status;
+}
 
 static void netvsc_send_table(struct hv_device *hdev,
 			      struct nvsp_message *nvmsg)
@@ -1106,16 +1207,16 @@
 		nvscdev->send_table[i] = tab[i];
 }
 
-static void netvsc_send_vf(struct netvsc_device *nvdev,
+static void netvsc_send_vf(struct net_device_context *net_device_ctx,
 			   struct nvsp_message *nvmsg)
 {
-	nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
-	nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
+	net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated;
+	net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial;
 }
 
 static inline void netvsc_receive_inband(struct hv_device *hdev,
-					 struct netvsc_device *nvdev,
-					 struct nvsp_message *nvmsg)
+				 struct net_device_context *net_device_ctx,
+				 struct nvsp_message *nvmsg)
 {
 	switch (nvmsg->hdr.msg_type) {
 	case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE:
@@ -1123,7 +1224,7 @@
 		break;
 
 	case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION:
-		netvsc_send_vf(nvdev, nvmsg);
+		netvsc_send_vf(net_device_ctx, nvmsg);
 		break;
 	}
 }
@@ -1136,6 +1237,7 @@
 				   struct vmpacket_descriptor *desc)
 {
 	struct nvsp_message *nvmsg;
+	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 
 	nvmsg = (struct nvsp_message *)((unsigned long)
 		desc + (desc->offset8 << 3));
@@ -1150,7 +1252,7 @@
 		break;
 
 	case VM_PKT_DATA_INBAND:
-		netvsc_receive_inband(device, net_device, nvmsg);
+		netvsc_receive_inband(device, net_device_ctx, nvmsg);
 		break;
 
 	default:
@@ -1160,11 +1262,11 @@
 	}
 }
 
-
 void netvsc_channel_cb(void *context)
 {
 	int ret;
 	struct vmbus_channel *channel = (struct vmbus_channel *)context;
+	u16 q_idx = channel->offermsg.offer.sub_channel_index;
 	struct hv_device *device;
 	struct netvsc_device *net_device;
 	u32 bytes_recvd;
@@ -1216,8 +1318,6 @@
 						       ndev,
 						       request_id,
 						       desc);
-
-
 			} else {
 				/*
 				 * We are done for this pass.
@@ -1244,7 +1344,8 @@
 
 	if (bufferlen > NETVSC_PACKET_SIZE)
 		kfree(buffer);
-	return;
+
+	netvsc_chk_recv_comp(net_device, channel, q_idx);
 }
 
 /*
@@ -1266,9 +1367,6 @@
 
 	net_device->ring_size = ring_size;
 
-	/* Initialize the NetVSC channel extension */
-	init_completion(&net_device->channel_init_wait);
-
 	set_per_channel_state(device->channel, net_device->cb_buffer);
 
 	/* Open the channel */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 41bd952..52eeb2f 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -40,7 +40,6 @@
 
 #include "hyperv_net.h"
 
-
 #define RING_SIZE_MIN 64
 #define LINKCHANGE_INT (2 * HZ)
 #define NETVSC_HW_FEATURES	(NETIF_F_RXCSUM | \
@@ -358,18 +357,14 @@
 	struct rndis_message *rndis_msg;
 	struct rndis_packet *rndis_pkt;
 	u32 rndis_msg_size;
-	bool isvlan;
-	bool linear = false;
 	struct rndis_per_packet_info *ppi;
 	struct ndis_tcp_ip_checksum_info *csum_info;
-	struct ndis_tcp_lso_info *lso_info;
 	int  hdr_offset;
 	u32 net_trans_info;
 	u32 hash;
 	u32 skb_length;
 	struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
 	struct hv_page_buffer *pb = page_buf;
-	struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
 
 	/* We will atmost need two pages to describe the rndis
 	 * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
@@ -377,22 +372,20 @@
 	 * more pages we try linearizing it.
 	 */
 
-check_size:
 	skb_length = skb->len;
 	num_data_pgs = netvsc_get_slots(skb) + 2;
-	if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) {
-		net_alert_ratelimited("packet too big: %u pages (%u bytes)\n",
-				      num_data_pgs, skb->len);
-		ret = -EFAULT;
-		goto drop;
-	} else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
-		if (skb_linearize(skb)) {
-			net_alert_ratelimited("failed to linearize skb\n");
-			ret = -ENOMEM;
+
+	if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) {
+		++net_device_ctx->eth_stats.tx_scattered;
+
+		if (skb_linearize(skb))
+			goto no_memory;
+
+		num_data_pgs = netvsc_get_slots(skb) + 2;
+		if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
+			++net_device_ctx->eth_stats.tx_too_big;
 			goto drop;
 		}
-		linear = true;
-		goto check_size;
 	}
 
 	/*
@@ -401,17 +394,14 @@
 	 * structure.
 	 */
 	ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE);
-	if (ret) {
-		netdev_err(net, "unable to alloc hv_netvsc_packet\n");
-		ret = -ENOMEM;
-		goto drop;
-	}
+	if (ret)
+		goto no_memory;
+
 	/* Use the skb control buffer for building up the packet */
 	BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
 			FIELD_SIZEOF(struct sk_buff, cb));
 	packet = (struct hv_netvsc_packet *)skb->cb;
 
-
 	packet->q_idx = skb_get_queue_mapping(skb);
 
 	packet->total_data_buflen = skb->len;
@@ -420,8 +410,6 @@
 
 	memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE);
 
-	isvlan = skb->vlan_tci & VLAN_TAG_PRESENT;
-
 	/* Add the rndis header */
 	rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET;
 	rndis_msg->msg_len = packet->total_data_buflen;
@@ -440,7 +428,7 @@
 		*(u32 *)((void *)ppi + ppi->ppi_offset) = hash;
 	}
 
-	if (isvlan) {
+	if (skb_vlan_tag_present(skb)) {
 		struct ndis_pkt_8021q_info *vlan;
 
 		rndis_msg_size += NDIS_VLAN_PPI_SIZE;
@@ -461,8 +449,37 @@
 	 * Setup the sendside checksum offload only if this is not a
 	 * GSO packet.
 	 */
-	if (skb_is_gso(skb))
-		goto do_lso;
+	if (skb_is_gso(skb)) {
+		struct ndis_tcp_lso_info *lso_info;
+
+		rndis_msg_size += NDIS_LSO_PPI_SIZE;
+		ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
+				    TCP_LARGESEND_PKTINFO);
+
+		lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
+							ppi->ppi_offset);
+
+		lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
+		if (net_trans_info & (INFO_IPV4 << 16)) {
+			lso_info->lso_v2_transmit.ip_version =
+				NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
+			ip_hdr(skb)->tot_len = 0;
+			ip_hdr(skb)->check = 0;
+			tcp_hdr(skb)->check =
+				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+						   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
+		} else {
+			lso_info->lso_v2_transmit.ip_version =
+				NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
+			ipv6_hdr(skb)->payload_len = 0;
+			tcp_hdr(skb)->check =
+				~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+						 &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
+		}
+		lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
+		lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
+		goto do_send;
+	}
 
 	if ((skb->ip_summed == CHECKSUM_NONE) ||
 	    (skb->ip_summed == CHECKSUM_UNNECESSARY))
@@ -495,7 +512,7 @@
 
 		ret = skb_cow_head(skb, 0);
 		if (ret)
-			goto drop;
+			goto no_memory;
 
 		uh = udp_hdr(skb);
 		udp_len = ntohs(uh->len);
@@ -509,35 +526,6 @@
 
 		csum_info->transmit.udp_checksum = 0;
 	}
-	goto do_send;
-
-do_lso:
-	rndis_msg_size += NDIS_LSO_PPI_SIZE;
-	ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE,
-			    TCP_LARGESEND_PKTINFO);
-
-	lso_info = (struct ndis_tcp_lso_info *)((void *)ppi +
-			ppi->ppi_offset);
-
-	lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
-	if (net_trans_info & (INFO_IPV4 << 16)) {
-		lso_info->lso_v2_transmit.ip_version =
-			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
-		ip_hdr(skb)->tot_len = 0;
-		ip_hdr(skb)->check = 0;
-		tcp_hdr(skb)->check =
-		~csum_tcpudp_magic(ip_hdr(skb)->saddr,
-				   ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
-	} else {
-		lso_info->lso_v2_transmit.ip_version =
-			NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6;
-		ipv6_hdr(skb)->payload_len = 0;
-		tcp_hdr(skb)->check =
-		~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
-				&ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
-	}
-	lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
-	lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
 
 do_send:
 	/* Start filling in the page buffers with the rndis hdr */
@@ -550,21 +538,33 @@
 	skb_tx_timestamp(skb);
 	ret = netvsc_send(net_device_ctx->device_ctx, packet,
 			  rndis_msg, &pb, skb);
+	if (likely(ret == 0)) {
+		struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
 
-drop:
-	if (ret == 0) {
 		u64_stats_update_begin(&tx_stats->syncp);
 		tx_stats->packets++;
 		tx_stats->bytes += skb_length;
 		u64_stats_update_end(&tx_stats->syncp);
-	} else {
-		if (ret != -EAGAIN) {
-			dev_kfree_skb_any(skb);
-			net->stats.tx_dropped++;
-		}
+		return NETDEV_TX_OK;
 	}
 
-	return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK;
+	if (ret == -EAGAIN) {
+		++net_device_ctx->eth_stats.tx_busy;
+		return NETDEV_TX_BUSY;
+	}
+
+	if (ret == -ENOSPC)
+		++net_device_ctx->eth_stats.tx_no_space;
+
+drop:
+	dev_kfree_skb_any(skb);
+	net->stats.tx_dropped++;
+
+	return NETDEV_TX_OK;
+
+no_memory:
+	++net_device_ctx->eth_stats.tx_no_memory;
+	goto drop;
 }
 
 /*
@@ -579,19 +579,32 @@
 	struct netvsc_reconfig *event;
 	unsigned long flags;
 
-	/* Handle link change statuses only */
+	net = hv_get_drvdata(device_obj);
+
+	if (!net)
+		return;
+
+	ndev_ctx = netdev_priv(net);
+
+	/* Update the physical link speed when changing to another vSwitch */
+	if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
+		u32 speed;
+
+		speed = *(u32 *)((void *)indicate + indicate->
+				 status_buf_offset) / 10000;
+		ndev_ctx->speed = speed;
+		return;
+	}
+
+	/* Handle these link change statuses below */
 	if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE &&
 	    indicate->status != RNDIS_STATUS_MEDIA_CONNECT &&
 	    indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT)
 		return;
 
-	net = hv_get_drvdata(device_obj);
-
-	if (!net || net->reg_state != NETREG_REGISTERED)
+	if (net->reg_state != NETREG_REGISTERED)
 		return;
 
-	ndev_ctx = netdev_priv(net);
-
 	event = kzalloc(sizeof(*event), GFP_ATOMIC);
 	if (!event)
 		return;
@@ -604,7 +617,6 @@
 	schedule_delayed_work(&ndev_ctx->dwork, 0);
 }
 
-
 static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
 				struct hv_netvsc_packet *packet,
 				struct ndis_tcp_ip_checksum_info *csum_info,
@@ -655,50 +667,23 @@
 {
 	struct net_device *net = hv_get_drvdata(device_obj);
 	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct net_device *vf_netdev;
 	struct sk_buff *skb;
-	struct sk_buff *vf_skb;
 	struct netvsc_stats *rx_stats;
-	struct netvsc_device *netvsc_dev = net_device_ctx->nvdev;
-	u32 bytes_recvd = packet->total_data_buflen;
-	int ret = 0;
 
-	if (!net || net->reg_state != NETREG_REGISTERED)
+	if (net->reg_state != NETREG_REGISTERED)
 		return NVSP_STAT_FAIL;
 
-	if (READ_ONCE(netvsc_dev->vf_inject)) {
-		atomic_inc(&netvsc_dev->vf_use_cnt);
-		if (!READ_ONCE(netvsc_dev->vf_inject)) {
-			/*
-			 * We raced; just move on.
-			 */
-			atomic_dec(&netvsc_dev->vf_use_cnt);
-			goto vf_injection_done;
-		}
-
-		/*
-		 * Inject this packet into the VF inerface.
-		 * On Hyper-V, multicast and brodcast packets
-		 * are only delivered on the synthetic interface
-		 * (after subjecting these to policy filters on
-		 * the host). Deliver these via the VF interface
-		 * in the guest.
-		 */
-		vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet,
-					       csum_info, *data, vlan_tci);
-		if (vf_skb != NULL) {
-			++netvsc_dev->vf_netdev->stats.rx_packets;
-			netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd;
-			netif_receive_skb(vf_skb);
-		} else {
-			++net->stats.rx_dropped;
-			ret = NVSP_STAT_FAIL;
-		}
-		atomic_dec(&netvsc_dev->vf_use_cnt);
-		return ret;
-	}
-
-vf_injection_done:
-	rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
+	/*
+	 * If necessary, inject this packet into the VF interface.
+	 * On Hyper-V, multicast and brodcast packets are only delivered
+	 * to the synthetic interface (after subjecting these to
+	 * policy filters on the host). Deliver these via the VF
+	 * interface in the guest.
+	 */
+	vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
+	if (vf_netdev && (vf_netdev->flags & IFF_UP))
+		net = vf_netdev;
 
 	/* Allocate a skb - TODO direct I/O to pages? */
 	skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci);
@@ -706,12 +691,25 @@
 		++net->stats.rx_dropped;
 		return NVSP_STAT_FAIL;
 	}
-	skb_record_rx_queue(skb, channel->
-			    offermsg.offer.sub_channel_index);
 
+	if (net != vf_netdev)
+		skb_record_rx_queue(skb,
+				    channel->offermsg.offer.sub_channel_index);
+
+	/*
+	 * Even if injecting the packet, record the statistics
+	 * on the synthetic device because modifying the VF device
+	 * statistics will not work correctly.
+	 */
+	rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->packets++;
 	rx_stats->bytes += packet->total_data_buflen;
+
+	if (skb->pkt_type == PACKET_BROADCAST)
+		++rx_stats->broadcast;
+	else if (skb->pkt_type == PACKET_MULTICAST)
+		++rx_stats->multicast;
 	u64_stats_update_end(&rx_stats->syncp);
 
 	/*
@@ -727,8 +725,12 @@
 static void netvsc_get_drvinfo(struct net_device *net,
 			       struct ethtool_drvinfo *info)
 {
+	struct net_device_context *net_device_ctx = netdev_priv(net);
+	struct hv_device *dev = net_device_ctx->device_ctx;
+
 	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
 	strlcpy(info->fw_version, "N/A", sizeof(info->fw_version));
+	strlcpy(info->bus_info, vmbus_dev_name(dev), sizeof(info->bus_info));
 }
 
 static void netvsc_get_channels(struct net_device *net,
@@ -950,7 +952,7 @@
 							    cpu);
 		struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
 							    cpu);
-		u64 tx_packets, tx_bytes, rx_packets, rx_bytes;
+		u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast;
 		unsigned int start;
 
 		do {
@@ -963,12 +965,14 @@
 			start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
 			rx_packets = rx_stats->packets;
 			rx_bytes = rx_stats->bytes;
+			rx_multicast = rx_stats->multicast + rx_stats->broadcast;
 		} while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
 
 		t->tx_bytes	+= tx_bytes;
 		t->tx_packets	+= tx_packets;
 		t->rx_bytes	+= rx_bytes;
 		t->rx_packets	+= rx_packets;
+		t->multicast	+= rx_multicast;
 	}
 
 	t->tx_dropped	= net->stats.tx_dropped;
@@ -1004,6 +1008,51 @@
 	return err;
 }
 
+static const struct {
+	char name[ETH_GSTRING_LEN];
+	u16 offset;
+} netvsc_stats[] = {
+	{ "tx_scattered", offsetof(struct netvsc_ethtool_stats, tx_scattered) },
+	{ "tx_no_memory",  offsetof(struct netvsc_ethtool_stats, tx_no_memory) },
+	{ "tx_no_space",  offsetof(struct netvsc_ethtool_stats, tx_no_space) },
+	{ "tx_too_big",	  offsetof(struct netvsc_ethtool_stats, tx_too_big) },
+	{ "tx_busy",	  offsetof(struct netvsc_ethtool_stats, tx_busy) },
+};
+
+static int netvsc_get_sset_count(struct net_device *dev, int string_set)
+{
+	switch (string_set) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(netvsc_stats);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void netvsc_get_ethtool_stats(struct net_device *dev,
+				     struct ethtool_stats *stats, u64 *data)
+{
+	struct net_device_context *ndc = netdev_priv(dev);
+	const void *nds = &ndc->eth_stats;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+		data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
+}
+
+static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+			memcpy(data + i * ETH_GSTRING_LEN,
+			       netvsc_stats[i].name, ETH_GSTRING_LEN);
+		break;
+	}
+}
+
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void netvsc_poll_controller(struct net_device *net)
 {
@@ -1016,6 +1065,9 @@
 static const struct ethtool_ops ethtool_ops = {
 	.get_drvinfo	= netvsc_get_drvinfo,
 	.get_link	= ethtool_op_get_link,
+	.get_ethtool_stats = netvsc_get_ethtool_stats,
+	.get_sset_count = netvsc_get_sset_count,
+	.get_strings	= netvsc_get_strings,
 	.get_channels   = netvsc_get_channels,
 	.set_channels   = netvsc_set_channels,
 	.get_ts_info	= ethtool_op_get_ts_info,
@@ -1150,36 +1202,44 @@
 	free_netdev(netdev);
 }
 
-static void netvsc_notify_peers(struct work_struct *wrk)
+static struct net_device *get_netvsc_bymac(const u8 *mac)
 {
-	struct garp_wrk *gwrk;
+	struct net_device *dev;
 
-	gwrk = container_of(wrk, struct garp_wrk, dwrk);
-
-	netdev_notify_peers(gwrk->netdev);
-
-	atomic_dec(&gwrk->netvsc_dev->vf_use_cnt);
-}
-
-static struct net_device *get_netvsc_net_device(char *mac)
-{
-	struct net_device *dev, *found = NULL;
-	int rtnl_locked;
-
-	rtnl_locked = rtnl_trylock();
+	ASSERT_RTNL();
 
 	for_each_netdev(&init_net, dev) {
-		if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) {
-			if (dev->netdev_ops != &device_ops)
-				continue;
-			found = dev;
-			break;
-		}
-	}
-	if (rtnl_locked)
-		rtnl_unlock();
+		if (dev->netdev_ops != &device_ops)
+			continue;	/* not a netvsc device */
 
-	return found;
+		if (ether_addr_equal(mac, dev->perm_addr))
+			return dev;
+	}
+
+	return NULL;
+}
+
+static struct net_device *get_netvsc_byref(struct net_device *vf_netdev)
+{
+	struct net_device *dev;
+
+	ASSERT_RTNL();
+
+	for_each_netdev(&init_net, dev) {
+		struct net_device_context *net_device_ctx;
+
+		if (dev->netdev_ops != &device_ops)
+			continue;	/* not a netvsc device */
+
+		net_device_ctx = netdev_priv(dev);
+		if (net_device_ctx->nvdev == NULL)
+			continue;	/* device is removed */
+
+		if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev)
+			return dev;	/* a match */
+	}
+
+	return NULL;
 }
 
 static int netvsc_register_vf(struct net_device *vf_netdev)
@@ -1187,9 +1247,8 @@
 	struct net_device *ndev;
 	struct net_device_context *net_device_ctx;
 	struct netvsc_device *netvsc_dev;
-	const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops;
 
-	if (eth_ops == NULL || eth_ops == &ethtool_ops)
+	if (vf_netdev->addr_len != ETH_ALEN)
 		return NOTIFY_DONE;
 
 	/*
@@ -1197,13 +1256,13 @@
 	 * associate with the VF interface. If we don't find a matching
 	 * synthetic interface, move on.
 	 */
-	ndev = get_netvsc_net_device(vf_netdev->dev_addr);
+	ndev = get_netvsc_bymac(vf_netdev->perm_addr);
 	if (!ndev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
-	if (netvsc_dev == NULL)
+	if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
 		return NOTIFY_DONE;
 
 	netdev_info(ndev, "VF registering: %s\n", vf_netdev->name);
@@ -1211,33 +1270,26 @@
 	 * Take a reference on the module.
 	 */
 	try_module_get(THIS_MODULE);
-	netvsc_dev->vf_netdev = vf_netdev;
+
+	dev_hold(vf_netdev);
+	rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev);
 	return NOTIFY_OK;
 }
 
-
 static int netvsc_vf_up(struct net_device *vf_netdev)
 {
 	struct net_device *ndev;
 	struct netvsc_device *netvsc_dev;
-	const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops;
 	struct net_device_context *net_device_ctx;
 
-	if (eth_ops == &ethtool_ops)
-		return NOTIFY_DONE;
-
-	ndev = get_netvsc_net_device(vf_netdev->dev_addr);
+	ndev = get_netvsc_byref(vf_netdev);
 	if (!ndev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
 
-	if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
-		return NOTIFY_DONE;
-
 	netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = true;
 
 	/*
 	 * Open the device before switching data path.
@@ -1252,86 +1304,54 @@
 
 	netif_carrier_off(ndev);
 
-	/*
-	 * Now notify peers. We are scheduling work to
-	 * notify peers; take a reference to prevent
-	 * the VF interface from vanishing.
-	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
-	net_device_ctx->gwrk.netdev = vf_netdev;
-	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
-	schedule_work(&net_device_ctx->gwrk.dwrk);
+	/* Now notify peers through VF device. */
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev);
 
 	return NOTIFY_OK;
 }
 
-
 static int netvsc_vf_down(struct net_device *vf_netdev)
 {
 	struct net_device *ndev;
 	struct netvsc_device *netvsc_dev;
 	struct net_device_context *net_device_ctx;
-	const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops;
 
-	if (eth_ops == &ethtool_ops)
-		return NOTIFY_DONE;
-
-	ndev = get_netvsc_net_device(vf_netdev->dev_addr);
+	ndev = get_netvsc_byref(vf_netdev);
 	if (!ndev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
 
-	if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL))
-		return NOTIFY_DONE;
-
 	netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
-	netvsc_dev->vf_inject = false;
-	/*
-	 * Wait for currently active users to
-	 * drain out.
-	 */
-
-	while (atomic_read(&netvsc_dev->vf_use_cnt) != 0)
-		udelay(50);
 	netvsc_switch_datapath(ndev, false);
 	netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name);
 	rndis_filter_close(netvsc_dev);
 	netif_carrier_on(ndev);
-	/*
-	 * Notify peers.
-	 */
-	atomic_inc(&netvsc_dev->vf_use_cnt);
-	net_device_ctx->gwrk.netdev = ndev;
-	net_device_ctx->gwrk.netvsc_dev = netvsc_dev;
-	schedule_work(&net_device_ctx->gwrk.dwrk);
+
+	/* Now notify peers through netvsc device. */
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev);
 
 	return NOTIFY_OK;
 }
 
-
 static int netvsc_unregister_vf(struct net_device *vf_netdev)
 {
 	struct net_device *ndev;
 	struct netvsc_device *netvsc_dev;
-	const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops;
 	struct net_device_context *net_device_ctx;
 
-	if (eth_ops == &ethtool_ops)
-		return NOTIFY_DONE;
-
-	ndev = get_netvsc_net_device(vf_netdev->dev_addr);
+	ndev = get_netvsc_byref(vf_netdev);
 	if (!ndev)
 		return NOTIFY_DONE;
 
 	net_device_ctx = netdev_priv(ndev);
 	netvsc_dev = net_device_ctx->nvdev;
-	if (netvsc_dev == NULL)
-		return NOTIFY_DONE;
+
 	netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
 
-	netvsc_dev->vf_netdev = NULL;
+	RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL);
+	dev_put(vf_netdev);
 	module_put(THIS_MODULE);
 	return NOTIFY_OK;
 }
@@ -1352,6 +1372,8 @@
 
 	netif_carrier_off(net);
 
+	netvsc_init_settings(net);
+
 	net_device_ctx = netdev_priv(net);
 	net_device_ctx->device_ctx = dev;
 	net_device_ctx->msg_enable = netif_msg_init(debug, default_msg);
@@ -1377,7 +1399,6 @@
 
 	INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
 	INIT_WORK(&net_device_ctx->work, do_set_multicast);
-	INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers);
 
 	spin_lock_init(&net_device_ctx->lock);
 	INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
@@ -1410,8 +1431,6 @@
 	netif_set_real_num_tx_queues(net, nvdev->num_chn);
 	netif_set_real_num_rx_queues(net, nvdev->num_chn);
 
-	netvsc_init_settings(net);
-
 	ret = register_netdev(net);
 	if (ret != 0) {
 		pr_err("Unable to register netdev.\n");
@@ -1435,7 +1454,6 @@
 		return 0;
 	}
 
-
 	ndev_ctx = netdev_priv(net);
 	net_device = ndev_ctx->nvdev;
 
@@ -1482,7 +1500,6 @@
 	.remove = netvsc_remove,
 };
 
-
 /*
  * On Hyper-V, every VF interface is matched with a corresponding
  * synthetic interface. The synthetic interface is presented first
@@ -1494,8 +1511,21 @@
 {
 	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
 
-	/* Avoid Vlan, Bonding dev with same MAC registering as VF */
-	if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING))
+	/* Skip our own events */
+	if (event_dev->netdev_ops == &device_ops)
+		return NOTIFY_DONE;
+
+	/* Avoid non-Ethernet type devices */
+	if (event_dev->type != ARPHRD_ETHER)
+		return NOTIFY_DONE;
+
+	/* Avoid Vlan dev with same MAC registering as VF */
+	if (event_dev->priv_flags & IFF_802_1Q_VLAN)
+		return NOTIFY_DONE;
+
+	/* Avoid Bonding master dev with same MAC registering as VF */
+	if ((event_dev->priv_flags & IFF_BONDING) &&
+	    (event_dev->flags & IFF_MASTER))
 		return NOTIFY_DONE;
 
 	switch (event) {
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 8e830f7..9195d5d 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -663,13 +663,14 @@
 	return ret;
 }
 
-u8 netvsc_hash_key[HASH_KEYLEN] = {
+static const u8 netvsc_hash_key[] = {
 	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
 	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
 	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
 	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
 	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
 };
+#define HASH_KEYLEN ARRAY_SIZE(netvsc_hash_key)
 
 static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
 {
@@ -720,7 +721,6 @@
 	for (i = 0; i < HASH_KEYLEN; i++)
 		keyp[i] = netvsc_hash_key[i];
 
-
 	ret = rndis_filter_send_request(rdev, request);
 	if (ret != 0)
 		goto cleanup;
@@ -738,7 +738,6 @@
 	return ret;
 }
 
-
 static int rndis_filter_query_device_link_status(struct rndis_device *dev)
 {
 	u32 size = sizeof(u32);
@@ -752,6 +751,28 @@
 	return ret;
 }
 
+static int rndis_filter_query_link_speed(struct rndis_device *dev)
+{
+	u32 size = sizeof(u32);
+	u32 link_speed;
+	struct net_device_context *ndc;
+	int ret;
+
+	ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
+					&link_speed, &size);
+
+	if (!ret) {
+		ndc = netdev_priv(dev->ndev);
+
+		/* The link speed reported from host is in 100bps unit, so
+		 * we convert it to Mbps here.
+		 */
+		ndc->speed = link_speed / 10000;
+	}
+
+	return ret;
+}
+
 int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
 {
 	struct rndis_request *request;
@@ -792,7 +813,6 @@
 	return ret;
 }
 
-
 static int rndis_filter_init_device(struct rndis_device *dev)
 {
 	struct rndis_request *request;
@@ -875,11 +895,11 @@
 
 	/* Wait for all send completions */
 	wait_event(nvdev->wait_drain,
-		atomic_read(&nvdev->num_outstanding_sends) == 0);
+		   atomic_read(&nvdev->num_outstanding_sends) == 0 &&
+		   atomic_read(&nvdev->num_outstanding_recvs) == 0);
 
 	if (request)
 		put_rndis_request(dev, request);
-	return;
 }
 
 static int rndis_filter_open_device(struct rndis_device *dev)
@@ -931,6 +951,9 @@
 	set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
 			      NETVSC_PACKET_SIZE);
 
+	nvscdev->mrc[chn_index].buf = vzalloc(NETVSC_RECVSLOT_MAX *
+					      sizeof(struct recv_comp_data));
+
 	ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
 			 nvscdev->ring_size * PAGE_SIZE, NULL, 0,
 			 netvsc_channel_cb, new_sc);
@@ -946,7 +969,7 @@
 }
 
 int rndis_filter_device_add(struct hv_device *dev,
-				  void *additional_info)
+			    void *additional_info)
 {
 	int ret;
 	struct net_device *net = hv_get_drvdata(dev);
@@ -1028,7 +1051,6 @@
 	offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
 	offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
 
-
 	ret = rndis_filter_set_offload_params(net, &offloads);
 	if (ret)
 		goto err_dev_remv;
@@ -1044,6 +1066,8 @@
 	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
 		return 0;
 
+	rndis_filter_query_link_speed(rndis_device);
+
 	/* vRSS setup */
 	memset(&rsscap, 0, rsscap_size);
 	ret = rndis_filter_query_device(rndis_device,
@@ -1152,7 +1176,6 @@
 	netvsc_device_remove(dev);
 }
 
-
 int rndis_filter_open(struct netvsc_device *nvdev)
 {
 	if (!nvdev)
diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c
index 0becf0a..ec387ef 100644
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c
@@ -30,7 +30,7 @@
 static int numlbs = 2;
 
 static LIST_HEAD(fakelb_phys);
-static DEFINE_SPINLOCK(fakelb_phys_lock);
+static DEFINE_MUTEX(fakelb_phys_lock);
 
 static LIST_HEAD(fakelb_ifup_phys);
 static DEFINE_RWLOCK(fakelb_ifup_phys_lock);
@@ -188,9 +188,9 @@
 	if (err)
 		goto err_reg;
 
-	spin_lock(&fakelb_phys_lock);
+	mutex_lock(&fakelb_phys_lock);
 	list_add_tail(&phy->list, &fakelb_phys);
-	spin_unlock(&fakelb_phys_lock);
+	mutex_unlock(&fakelb_phys_lock);
 
 	return 0;
 
@@ -222,10 +222,10 @@
 	return 0;
 
 err_slave:
-	spin_lock(&fakelb_phys_lock);
+	mutex_lock(&fakelb_phys_lock);
 	list_for_each_entry_safe(phy, tmp, &fakelb_phys, list)
 		fakelb_del(phy);
-	spin_unlock(&fakelb_phys_lock);
+	mutex_unlock(&fakelb_phys_lock);
 	return err;
 }
 
@@ -233,10 +233,10 @@
 {
 	struct fakelb_phy *phy, *tmp;
 
-	spin_lock(&fakelb_phys_lock);
+	mutex_lock(&fakelb_phys_lock);
 	list_for_each_entry_safe(phy, tmp, &fakelb_phys, list)
 		fakelb_del(phy);
-	spin_unlock(&fakelb_phys_lock);
+	mutex_unlock(&fakelb_phys_lock);
 	return 0;
 }
 
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index 695a5dc..7e0732f 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -23,11 +23,13 @@
 #include <linux/if_vlan.h>
 #include <linux/ip.h>
 #include <linux/inetdevice.h>
+#include <linux/netfilter.h>
 #include <net/ip.h>
 #include <net/ip6_route.h>
 #include <net/rtnetlink.h>
 #include <net/route.h>
 #include <net/addrconf.h>
+#include <net/l3mdev.h>
 
 #define IPVLAN_DRV	"ipvlan"
 #define IPV_DRV_VER	"0.1"
@@ -124,4 +126,8 @@
 				   const void *iaddr, bool is_v6);
 bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
 void ipvlan_ht_addr_del(struct ipvl_addr *addr);
+struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
+			      u16 proto);
+unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+			     const struct nf_hook_state *state);
 #endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index b5f9511..b4e9907 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -560,6 +560,7 @@
 	case IPVLAN_MODE_L2:
 		return ipvlan_xmit_mode_l2(skb, dev);
 	case IPVLAN_MODE_L3:
+	case IPVLAN_MODE_L3S:
 		return ipvlan_xmit_mode_l3(skb, dev);
 	}
 
@@ -664,6 +665,8 @@
 		return ipvlan_handle_mode_l2(pskb, port);
 	case IPVLAN_MODE_L3:
 		return ipvlan_handle_mode_l3(pskb, port);
+	case IPVLAN_MODE_L3S:
+		return RX_HANDLER_PASS;
 	}
 
 	/* Should not reach here */
@@ -672,3 +675,94 @@
 	kfree_skb(skb);
 	return RX_HANDLER_CONSUMED;
 }
+
+static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb,
+					    struct net_device *dev)
+{
+	struct ipvl_addr *addr = NULL;
+	struct ipvl_port *port;
+	void *lyr3h;
+	int addr_type;
+
+	if (!dev || !netif_is_ipvlan_port(dev))
+		goto out;
+
+	port = ipvlan_port_get_rcu(dev);
+	if (!port || port->mode != IPVLAN_MODE_L3S)
+		goto out;
+
+	lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
+	if (!lyr3h)
+		goto out;
+
+	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+out:
+	return addr;
+}
+
+struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb,
+			      u16 proto)
+{
+	struct ipvl_addr *addr;
+	struct net_device *sdev;
+
+	addr = ipvlan_skb_to_addr(skb, dev);
+	if (!addr)
+		goto out;
+
+	sdev = addr->master->dev;
+	switch (proto) {
+	case AF_INET:
+	{
+		int err;
+		struct iphdr *ip4h = ip_hdr(skb);
+
+		err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr,
+					   ip4h->tos, sdev);
+		if (unlikely(err))
+			goto out;
+		break;
+	}
+	case AF_INET6:
+	{
+		struct dst_entry *dst;
+		struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		int flags = RT6_LOOKUP_F_HAS_SADDR;
+		struct flowi6 fl6 = {
+			.flowi6_iif   = sdev->ifindex,
+			.daddr        = ip6h->daddr,
+			.saddr        = ip6h->saddr,
+			.flowlabel    = ip6_flowinfo(ip6h),
+			.flowi6_mark  = skb->mark,
+			.flowi6_proto = ip6h->nexthdr,
+		};
+
+		skb_dst_drop(skb);
+		dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags);
+		skb_dst_set(skb, dst);
+		break;
+	}
+	default:
+		break;
+	}
+
+out:
+	return skb;
+}
+
+unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
+			     const struct nf_hook_state *state)
+{
+	struct ipvl_addr *addr;
+	unsigned int len;
+
+	addr = ipvlan_skb_to_addr(skb, skb->dev);
+	if (!addr)
+		goto out;
+
+	skb->dev = addr->master->dev;
+	len = skb->len + ETH_HLEN;
+	ipvlan_count_rx(addr->master, len, true, false);
+out:
+	return NF_ACCEPT;
+}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index 18b4e8c..f442eb3 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -9,24 +9,87 @@
 
 #include "ipvlan.h"
 
+static u32 ipvl_nf_hook_refcnt = 0;
+
+static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
+	{
+		.hook     = ipvlan_nf_input,
+		.pf       = NFPROTO_IPV4,
+		.hooknum  = NF_INET_LOCAL_IN,
+		.priority = INT_MAX,
+	},
+	{
+		.hook     = ipvlan_nf_input,
+		.pf       = NFPROTO_IPV6,
+		.hooknum  = NF_INET_LOCAL_IN,
+		.priority = INT_MAX,
+	},
+};
+
+static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
+	.l3mdev_l3_rcv = ipvlan_l3_rcv,
+};
+
 static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
 {
 	ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
 }
 
-static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
+static int ipvlan_register_nf_hook(void)
+{
+	int err = 0;
+
+	if (!ipvl_nf_hook_refcnt) {
+		err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
+		if (!err)
+			ipvl_nf_hook_refcnt = 1;
+	} else {
+		ipvl_nf_hook_refcnt++;
+	}
+
+	return err;
+}
+
+static void ipvlan_unregister_nf_hook(void)
+{
+	WARN_ON(!ipvl_nf_hook_refcnt);
+
+	ipvl_nf_hook_refcnt--;
+	if (!ipvl_nf_hook_refcnt)
+		_nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
+}
+
+static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
 {
 	struct ipvl_dev *ipvlan;
+	struct net_device *mdev = port->dev;
+	int err = 0;
 
+	ASSERT_RTNL();
 	if (port->mode != nval) {
+		if (nval == IPVLAN_MODE_L3S) {
+			/* New mode is L3S */
+			err = ipvlan_register_nf_hook();
+			if (!err) {
+				mdev->l3mdev_ops = &ipvl_l3mdev_ops;
+				mdev->priv_flags |= IFF_L3MDEV_MASTER;
+			} else
+				return err;
+		} else if (port->mode == IPVLAN_MODE_L3S) {
+			/* Old mode was L3S */
+			mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
+			ipvlan_unregister_nf_hook();
+			mdev->l3mdev_ops = NULL;
+		}
 		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
-			if (nval == IPVLAN_MODE_L3)
+			if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
 				ipvlan->dev->flags |= IFF_NOARP;
 			else
 				ipvlan->dev->flags &= ~IFF_NOARP;
 		}
 		port->mode = nval;
 	}
+	return err;
 }
 
 static int ipvlan_port_create(struct net_device *dev)
@@ -74,6 +137,11 @@
 	struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
 
 	dev->priv_flags &= ~IFF_IPVLAN_MASTER;
+	if (port->mode == IPVLAN_MODE_L3S) {
+		dev->priv_flags &= ~IFF_L3MDEV_MASTER;
+		ipvlan_unregister_nf_hook();
+		dev->l3mdev_ops = NULL;
+	}
 	netdev_rx_handler_unregister(dev);
 	cancel_work_sync(&port->wq);
 	__skb_queue_purge(&port->backlog);
@@ -132,7 +200,8 @@
 	struct net_device *phy_dev = ipvlan->phy_dev;
 	struct ipvl_addr *addr;
 
-	if (ipvlan->port->mode == IPVLAN_MODE_L3)
+	if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
+	    ipvlan->port->mode == IPVLAN_MODE_L3S)
 		dev->flags |= IFF_NOARP;
 	else
 		dev->flags &= ~IFF_NOARP;
@@ -372,13 +441,14 @@
 {
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
+	int err = 0;
 
 	if (data && data[IFLA_IPVLAN_MODE]) {
 		u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
 
-		ipvlan_set_port_mode(port, nmode);
+		err = ipvlan_set_port_mode(port, nmode);
 	}
-	return 0;
+	return err;
 }
 
 static size_t ipvlan_nl_getsize(const struct net_device *dev)
@@ -473,10 +543,13 @@
 		unregister_netdevice(dev);
 		return err;
 	}
+	err = ipvlan_set_port_mode(port, mode);
+	if (err) {
+		unregister_netdevice(dev);
+		return err;
+	}
 
 	list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
-	ipvlan_set_port_mode(port, mode);
-
 	netif_stacked_transfer_operstate(phy_dev, dev);
 	return 0;
 }
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index d13e6e1..3ea47f2 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -270,6 +270,7 @@
 	struct pcpu_secy_stats __percpu *stats;
 	struct list_head secys;
 	struct gro_cells gro_cells;
+	unsigned int nest_level;
 };
 
 /**
@@ -2699,6 +2700,8 @@
 
 #define MACSEC_FEATURES \
 	(NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST)
+static struct lock_class_key macsec_netdev_addr_lock_key;
+
 static int macsec_dev_init(struct net_device *dev)
 {
 	struct macsec_dev *macsec = macsec_priv(dev);
@@ -2910,6 +2913,13 @@
 	return macsec_priv(dev)->real_dev->ifindex;
 }
 
+
+static int macsec_get_nest_level(struct net_device *dev)
+{
+	return macsec_priv(dev)->nest_level;
+}
+
+
 static const struct net_device_ops macsec_netdev_ops = {
 	.ndo_init		= macsec_dev_init,
 	.ndo_uninit		= macsec_dev_uninit,
@@ -2923,6 +2933,7 @@
 	.ndo_start_xmit		= macsec_start_xmit,
 	.ndo_get_stats64	= macsec_get_stats64,
 	.ndo_get_iflink		= macsec_get_iflink,
+	.ndo_get_lock_subclass  = macsec_get_nest_level,
 };
 
 static const struct device_type macsec_type = {
@@ -2962,6 +2973,7 @@
 	dev->priv_flags |= IFF_NO_QUEUE;
 	dev->netdev_ops = &macsec_netdev_ops;
 	dev->destructor = macsec_free_netdev;
+	SET_NETDEV_DEVTYPE(dev, &macsec_type);
 
 	eth_zero_addr(dev->broadcast);
 }
@@ -3047,22 +3059,31 @@
 	}
 }
 
+static void macsec_common_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct macsec_dev *macsec = macsec_priv(dev);
+	struct net_device *real_dev = macsec->real_dev;
+
+	unregister_netdevice_queue(dev, head);
+	list_del_rcu(&macsec->secys);
+	macsec_del_dev(macsec);
+	netdev_upper_dev_unlink(real_dev, dev);
+
+	macsec_generation++;
+}
+
 static void macsec_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macsec_dev *macsec = macsec_priv(dev);
 	struct net_device *real_dev = macsec->real_dev;
 	struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev);
 
-	macsec_generation++;
+	macsec_common_dellink(dev, head);
 
-	unregister_netdevice_queue(dev, head);
-	list_del_rcu(&macsec->secys);
 	if (list_empty(&rxd->secys)) {
 		netdev_rx_handler_unregister(real_dev);
 		kfree(rxd);
 	}
-
-	macsec_del_dev(macsec);
 }
 
 static int register_macsec_dev(struct net_device *real_dev,
@@ -3181,6 +3202,16 @@
 
 	dev_hold(real_dev);
 
+	macsec->nest_level = dev_get_nest_level(real_dev) + 1;
+	netdev_lockdep_set_classes(dev);
+	lockdep_set_class_and_subclass(&dev->addr_list_lock,
+				       &macsec_netdev_addr_lock_key,
+				       macsec_get_nest_level(dev));
+
+	err = netdev_upper_dev_link(real_dev, dev);
+	if (err < 0)
+		goto unregister;
+
 	/* need to be already registered so that ->init has run and
 	 * the MAC addr is set
 	 */
@@ -3193,12 +3224,12 @@
 
 	if (rx_handler && sci_exists(real_dev, sci)) {
 		err = -EBUSY;
-		goto unregister;
+		goto unlink;
 	}
 
 	err = macsec_add_dev(dev, sci, icv_len);
 	if (err)
-		goto unregister;
+		goto unlink;
 
 	if (data)
 		macsec_changelink_common(dev, data);
@@ -3213,6 +3244,8 @@
 
 del_dev:
 	macsec_del_dev(macsec);
+unlink:
+	netdev_upper_dev_unlink(real_dev, dev);
 unregister:
 	unregister_netdevice(dev);
 	return err;
@@ -3382,8 +3415,12 @@
 
 		rxd = macsec_data_rtnl(real_dev);
 		list_for_each_entry_safe(m, n, &rxd->secys, secys) {
-			macsec_dellink(m->secy.netdev, &head);
+			macsec_common_dellink(m->secy.netdev, &head);
 		}
+
+		netdev_rx_handler_unregister(real_dev);
+		kfree(rxd);
+
 		unregister_netdevice_many(&head);
 		break;
 	}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cd9b538..3234fcd 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1315,7 +1315,7 @@
 	vlan->dev      = dev;
 	vlan->port     = port;
 	vlan->set_features = MACVLAN_FEATURES;
-	vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1;
+	vlan->nest_level = dev_get_nest_level(lowerdev) + 1;
 
 	vlan->mode     = MACVLAN_MODE_VEPA;
 	if (data && data[IFLA_MACVLAN_MODE])
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a38c0da..070e329 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -275,7 +275,6 @@
 	rtnl_unlock();
 
 	synchronize_rcu();
-	skb_array_cleanup(&q->skb_array);
 	sock_put(&q->sk);
 }
 
@@ -533,10 +532,8 @@
 static void macvtap_sock_destruct(struct sock *sk)
 {
 	struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
-	struct sk_buff *skb;
 
-	while ((skb = skb_array_consume(&q->skb_array)) != NULL)
-		kfree_skb(skb);
+	skb_array_cleanup(&q->skb_array);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 47a6434..5078a0d 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -15,162 +15,27 @@
 config SWPHY
 	bool
 
-comment "MII PHY device drivers"
+comment "MDIO bus device drivers"
 
-config AQUANTIA_PHY
-        tristate "Drivers for the Aquantia PHYs"
-        ---help---
-          Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
-
-config AT803X_PHY
-	tristate "Drivers for Atheros AT803X PHYs"
-	---help---
-	  Currently supports the AT8030 and AT8035 model
-
-config AMD_PHY
-	tristate "Drivers for the AMD PHYs"
-	---help---
-	  Currently supports the am79c874
-
-config MARVELL_PHY
-	tristate "Drivers for Marvell PHYs"
-	---help---
-	  Currently has a driver for the 88E1011S
-	
-config DAVICOM_PHY
-	tristate "Drivers for Davicom PHYs"
-	---help---
-	  Currently supports dm9161e and dm9131
-
-config QSEMI_PHY
-	tristate "Drivers for Quality Semiconductor PHYs"
-	---help---
-	  Currently supports the qs6612
-
-config LXT_PHY
-	tristate "Drivers for the Intel LXT PHYs"
-	---help---
-	  Currently supports the lxt970, lxt971
-
-config CICADA_PHY
-	tristate "Drivers for the Cicada PHYs"
-	---help---
-	  Currently supports the cis8204
-
-config VITESSE_PHY
-        tristate "Drivers for the Vitesse PHYs"
-        ---help---
-          Currently supports the vsc8244
-
-config TERANETICS_PHY
-        tristate "Drivers for the Teranetics PHYs"
-        ---help---
-          Currently supports the Teranetics TN2020
-
-config SMSC_PHY
-	tristate "Drivers for SMSC PHYs"
-	---help---
-	  Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
-
-config BCM_NET_PHYLIB
-	tristate
-
-config BROADCOM_PHY
-	tristate "Drivers for Broadcom PHYs"
-	select BCM_NET_PHYLIB
-	---help---
-	  Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
-	  BCM5481 and BCM5482 PHYs.
-
-config BCM_CYGNUS_PHY
-	tristate "Drivers for Broadcom Cygnus SoC internal PHY"
-	depends on ARCH_BCM_CYGNUS || COMPILE_TEST
-	depends on MDIO_BCM_IPROC
-	select BCM_NET_PHYLIB
-	---help---
-	  This PHY driver is for the 1G internal PHYs of the Broadcom
-	  Cygnus Family SoC.
-
-	  Currently supports internal PHY's used in the BCM11300,
-	  BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
-	  BCM58303 & BCM58305 Broadcom Cygnus SoCs.
-
-config BCM63XX_PHY
-	tristate "Drivers for Broadcom 63xx SOCs internal PHY"
-	depends on BCM63XX
-	select BCM_NET_PHYLIB
-	---help---
-	  Currently supports the 6348 and 6358 PHYs.
-
-config BCM7XXX_PHY
-	tristate "Drivers for Broadcom 7xxx SOCs internal PHYs"
-	select BCM_NET_PHYLIB
-	---help---
-	  Currently supports the BCM7366, BCM7439, BCM7445, and
-	  40nm and 65nm generation of BCM7xxx Set Top Box SoCs.
-
-config BCM87XX_PHY
-	tristate "Driver for Broadcom BCM8706 and BCM8727 PHYs"
+config MDIO_BCM_IPROC
+	tristate "Broadcom iProc MDIO bus controller"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	depends on HAS_IOMEM && OF_MDIO
 	help
-	  Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs.
+	  This module provides a driver for the MDIO busses found in the
+	  Broadcom iProc SoC's.
 
-config ICPLUS_PHY
-	tristate "Drivers for ICPlus PHYs"
-	---help---
-	  Currently supports the IP175C and IP1001 PHYs.
-
-config REALTEK_PHY
-	tristate "Drivers for Realtek PHYs"
-	---help---
-	  Supports the Realtek 821x PHY.
-
-config NATIONAL_PHY
-	tristate "Drivers for National Semiconductor PHYs"
-	---help---
-	  Currently supports the DP83865 PHY.
-
-config STE10XP
-	tristate "Driver for STMicroelectronics STe10Xp PHYs"
-	---help---
-	  This is the driver for the STe100p and STe101p PHYs.
-
-config LSI_ET1011C_PHY
-	tristate "Driver for LSI ET1011C PHY"
-	---help---
-	  Supports the LSI ET1011C PHY.
-
-config MICREL_PHY
-	tristate "Driver for Micrel PHYs"
-	---help---
-	  Supports the KSZ9021, VSC8201, KS8001 PHYs.
-
-config DP83848_PHY
-	tristate "Driver for Texas Instruments DP83848 PHY"
-	---help---
-	  Supports the DP83848 PHY.
-
-config DP83867_PHY
-	tristate "Drivers for Texas Instruments DP83867 Gigabit PHY"
-	---help---
-	  Currently supports the DP83867 PHY.
-
-config MICROCHIP_PHY
-	tristate "Drivers for Microchip PHYs"
+config MDIO_BCM_UNIMAC
+	tristate "Broadcom UniMAC MDIO bus controller"
+	depends on HAS_IOMEM
 	help
-	  Supports the LAN88XX PHYs.
-
-config FIXED_PHY
-	tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
-	depends on PHYLIB
-	select SWPHY
-	---help---
-	  Adds the platform "fixed" MDIO Bus to cover the boards that use
-	  PHYs that are not connected to the real MDIO bus.
-
-	  Currently tested with mpc866ads and mpc8349e-mitx.
+	  This module provides a driver for the Broadcom UniMAC MDIO busses.
+	  This hardware can be found in the Broadcom GENET Ethernet MAC
+	  controllers as well as some Broadcom Ethernet switches such as the
+	  Starfighter 2 switches.
 
 config MDIO_BITBANG
-	tristate "Support for bitbanged MDIO buses"
+	tristate "Bitbanged MDIO buses"
 	help
 	  This module implements the MDIO bus protocol in software,
 	  for use by low level drivers that export the ability to
@@ -178,54 +43,6 @@
 
 	  If in doubt, say N.
 
-config MDIO_GPIO
-	tristate "Support for GPIO lib-based bitbanged MDIO buses"
-	depends on MDIO_BITBANG && GPIOLIB
-	---help---
-	  Supports GPIO lib-based MDIO busses.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called mdio-gpio.
-
-config MDIO_CAVIUM
-	tristate
-
-config MDIO_OCTEON
-	tristate "Support for MDIO buses on Octeon and some ThunderX SOCs"
-	depends on 64BIT
-	depends on HAS_IOMEM
-	select MDIO_CAVIUM
-	help
-	  This module provides a driver for the Octeon and ThunderX MDIO
-	  buses. It is required by the Octeon and ThunderX ethernet device
-	  drivers on some systems.
-
-config MDIO_THUNDER
-	tristate "Support for MDIO buses on ThunderX SOCs"
-	depends on 64BIT
-	depends on PCI
-	select MDIO_CAVIUM
-	help
-	  This driver supports the MDIO interfaces found on Cavium
-	  ThunderX SoCs when the MDIO bus device appears as a PCI
-	  device.
-
-
-config MDIO_SUN4I
-	tristate "Allwinner sun4i MDIO interface support"
-	depends on ARCH_SUNXI
-	help
-	  This driver supports the MDIO interface found in the network
-	  interface units of the Allwinner SoC that have an EMAC (A10,
-	  A12, A10s, etc.)
-
-config MDIO_MOXART
-        tristate "MOXA ART MDIO interface support"
-        depends on ARCH_MOXART
-        help
-          This driver supports the MDIO interface found in the network
-          interface units of the MOXA ART SoC
-
 config MDIO_BUS_MUX
 	tristate
 	depends on OF_MDIO
@@ -235,8 +52,19 @@
 	  to a parent bus.  Switching between child busses is done by
 	  device specific drivers.
 
+config MDIO_BUS_MUX_BCM_IPROC
+	tristate "Broadcom iProc based MDIO bus multiplexers"
+	depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
+	select MDIO_BUS_MUX
+	default ARCH_BCM_IPROC
+	help
+	  This module provides a driver for MDIO bus multiplexers found in
+	  iProc based Broadcom SoCs. This multiplexer connects one of several
+	  child MDIO bus to a parent bus. Buses could be internal as well as
+	  external and selection logic lies inside the same multiplexer.
+
 config MDIO_BUS_MUX_GPIO
-	tristate "Support for GPIO controlled MDIO bus multiplexers"
+	tristate "GPIO controlled MDIO bus multiplexers"
 	depends on OF_GPIO && OF_MDIO
 	select MDIO_BUS_MUX
 	help
@@ -246,7 +74,7 @@
 	  selection is under the control of GPIO lines.
 
 config MDIO_BUS_MUX_MMIOREG
-	tristate "Support for MMIO device-controlled MDIO bus multiplexers"
+	tristate "MMIO device-controlled MDIO bus multiplexers"
 	depends on OF_MDIO && HAS_IOMEM
 	select MDIO_BUS_MUX
 	help
@@ -258,41 +86,17 @@
 
 	  Currently, only 8-bit registers are supported.
 
-config MDIO_BUS_MUX_BCM_IPROC
-	tristate "Support for iProc based MDIO bus multiplexers"
-	depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST)
-	select MDIO_BUS_MUX
-	default ARCH_BCM_IPROC
-	help
-	  This module provides a driver for MDIO bus multiplexers found in
-	  iProc based Broadcom SoCs. This multiplexer connects one of several
-	  child MDIO bus to a parent bus. Buses could be internal as well as
-	  external and selection logic lies inside the same multiplexer.
+config MDIO_CAVIUM
+	tristate
 
-config MDIO_BCM_UNIMAC
-	tristate "Broadcom UniMAC MDIO bus controller"
-	depends on HAS_IOMEM
-	help
-	  This module provides a driver for the Broadcom UniMAC MDIO busses.
-	  This hardware can be found in the Broadcom GENET Ethernet MAC
-	  controllers as well as some Broadcom Ethernet switches such as the
-	  Starfighter 2 switches.
-
-config MDIO_BCM_IPROC
-	tristate "Broadcom iProc MDIO bus controller"
-	depends on ARCH_BCM_IPROC || COMPILE_TEST
-	depends on HAS_IOMEM && OF_MDIO
-	help
-	  This module provides a driver for the MDIO busses found in the
-	  Broadcom iProc SoC's.
-
-config INTEL_XWAY_PHY
-	tristate "Driver for Intel XWAY PHYs"
+config MDIO_GPIO
+	tristate "GPIO lib-based bitbanged MDIO buses"
+	depends on MDIO_BITBANG && GPIOLIB
 	---help---
-	  Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs.
-	  These PHYs are marked as standalone chips under the names
-	  PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel
-	  SoCs xRX200, xRX300, xRX330, xRX350 and xRX550.
+	  Supports GPIO lib-based MDIO busses.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called mdio-gpio.
 
 config MDIO_HISI_FEMAC
 	tristate "Hisilicon FEMAC MDIO bus controller"
@@ -301,12 +105,228 @@
 	  This module provides a driver for the MDIO busses found in the
 	  Hisilicon SoC that have an Fast Ethernet MAC.
 
+config MDIO_MOXART
+        tristate "MOXA ART MDIO interface support"
+        depends on ARCH_MOXART
+        help
+          This driver supports the MDIO interface found in the network
+          interface units of the MOXA ART SoC
+
+config MDIO_OCTEON
+	tristate "Octeon and some ThunderX SOCs MDIO buses"
+	depends on 64BIT
+	depends on HAS_IOMEM
+	select MDIO_CAVIUM
+	help
+	  This module provides a driver for the Octeon and ThunderX MDIO
+	  buses. It is required by the Octeon and ThunderX ethernet device
+	  drivers on some systems.
+
+config MDIO_SUN4I
+	tristate "Allwinner sun4i MDIO interface support"
+	depends on ARCH_SUNXI
+	help
+	  This driver supports the MDIO interface found in the network
+	  interface units of the Allwinner SoC that have an EMAC (A10,
+	  A12, A10s, etc.)
+
+config MDIO_THUNDER
+	tristate "ThunderX SOCs MDIO buses"
+	depends on 64BIT
+	depends on PCI
+	select MDIO_CAVIUM
+	help
+	  This driver supports the MDIO interfaces found on Cavium
+	  ThunderX SoCs when the MDIO bus device appears as a PCI
+	  device.
+
 config MDIO_XGENE
 	tristate "APM X-Gene SoC MDIO bus controller"
 	help
 	  This module provides a driver for the MDIO busses found in the
 	  APM X-Gene SoC's.
 
+comment "MII PHY device drivers"
+
+config AMD_PHY
+	tristate "AMD PHYs"
+	---help---
+	  Currently supports the am79c874
+
+config AQUANTIA_PHY
+        tristate "Aquantia PHYs"
+        ---help---
+          Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405
+
+config AT803X_PHY
+	tristate "AT803X PHYs"
+	---help---
+	  Currently supports the AT8030 and AT8035 model
+
+config BCM63XX_PHY
+	tristate "Broadcom 63xx SOCs internal PHY"
+	depends on BCM63XX
+	select BCM_NET_PHYLIB
+	---help---
+	  Currently supports the 6348 and 6358 PHYs.
+
+config BCM7XXX_PHY
+	tristate "Broadcom 7xxx SOCs internal PHYs"
+	select BCM_NET_PHYLIB
+	---help---
+	  Currently supports the BCM7366, BCM7439, BCM7445, and
+	  40nm and 65nm generation of BCM7xxx Set Top Box SoCs.
+
+config BCM87XX_PHY
+	tristate "Broadcom BCM8706 and BCM8727 PHYs"
+	help
+	  Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs.
+
+config BCM_CYGNUS_PHY
+	tristate "Broadcom Cygnus SoC internal PHY"
+	depends on ARCH_BCM_CYGNUS || COMPILE_TEST
+	depends on MDIO_BCM_IPROC
+	select BCM_NET_PHYLIB
+	---help---
+	  This PHY driver is for the 1G internal PHYs of the Broadcom
+	  Cygnus Family SoC.
+
+	  Currently supports internal PHY's used in the BCM11300,
+	  BCM11320, BCM11350, BCM11360, BCM58300, BCM58302,
+	  BCM58303 & BCM58305 Broadcom Cygnus SoCs.
+
+config BCM_NET_PHYLIB
+	tristate
+
+config BROADCOM_PHY
+	tristate "Broadcom PHYs"
+	select BCM_NET_PHYLIB
+	---help---
+	  Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
+	  BCM5481 and BCM5482 PHYs.
+
+config CICADA_PHY
+	tristate "Cicada PHYs"
+	---help---
+	  Currently supports the cis8204
+
+config DAVICOM_PHY
+	tristate "Davicom PHYs"
+	---help---
+	  Currently supports dm9161e and dm9131
+
+config DP83848_PHY
+	tristate "Texas Instruments DP83848 PHY"
+	---help---
+	  Supports the DP83848 PHY.
+
+config DP83867_PHY
+	tristate "Texas Instruments DP83867 Gigabit PHY"
+	---help---
+	  Currently supports the DP83867 PHY.
+
+config FIXED_PHY
+	tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs"
+	depends on PHYLIB
+	select SWPHY
+	---help---
+	  Adds the platform "fixed" MDIO Bus to cover the boards that use
+	  PHYs that are not connected to the real MDIO bus.
+
+	  Currently tested with mpc866ads and mpc8349e-mitx.
+
+config ICPLUS_PHY
+	tristate "ICPlus PHYs"
+	---help---
+	  Currently supports the IP175C and IP1001 PHYs.
+
+config INTEL_XWAY_PHY
+	tristate "Intel XWAY PHYs"
+	---help---
+	  Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs.
+	  These PHYs are marked as standalone chips under the names
+	  PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel
+	  SoCs xRX200, xRX300, xRX330, xRX350 and xRX550.
+
+config LSI_ET1011C_PHY
+	tristate "LSI ET1011C PHY"
+	---help---
+	  Supports the LSI ET1011C PHY.
+
+config LXT_PHY
+	tristate "Intel LXT PHYs"
+	---help---
+	  Currently supports the lxt970, lxt971
+
+config MARVELL_PHY
+	tristate "Marvell PHYs"
+	---help---
+	  Currently has a driver for the 88E1011S
+
+config MICREL_PHY
+	tristate "Micrel PHYs"
+	---help---
+	  Supports the KSZ9021, VSC8201, KS8001 PHYs.
+
+config MICROCHIP_PHY
+	tristate "Microchip PHYs"
+	help
+	  Supports the LAN88XX PHYs.
+
+config MICROSEMI_PHY
+	tristate "Microsemi PHYs"
+	---help---
+	  Currently supports the VSC8531 and VSC8541 PHYs
+
+config NATIONAL_PHY
+	tristate "National Semiconductor PHYs"
+	---help---
+	  Currently supports the DP83865 PHY.
+
+config QSEMI_PHY
+	tristate "Quality Semiconductor PHYs"
+	---help---
+	  Currently supports the qs6612
+
+config REALTEK_PHY
+	tristate "Realtek PHYs"
+	---help---
+	  Supports the Realtek 821x PHY.
+
+config SMSC_PHY
+	tristate "SMSC PHYs"
+	---help---
+	  Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs
+
+config STE10XP
+	tristate "STMicroelectronics STe10Xp PHYs"
+	---help---
+	  This is the driver for the STe100p and STe101p PHYs.
+
+config TERANETICS_PHY
+        tristate "Teranetics PHYs"
+        ---help---
+          Currently supports the Teranetics TN2020
+
+config VITESSE_PHY
+        tristate "Vitesse PHYs"
+        ---help---
+          Currently supports the vsc8244
+
+config XILINX_GMII2RGMII
+       tristate "Xilinx GMII2RGMII converter driver"
+       ---help---
+         This driver support xilinx GMII to RGMII IP core it provides
+         the Reduced Gigabit Media Independent Interface(RGMII) between
+         Ethernet physical media devices and the Gigabit Ethernet controller.
+
+config MDIO_XGENE
+	tristate "APM X-Gene SoC MDIO bus controller"
+	depends on ARCH_XGENE || COMPILE_TEST
+	help
+	  This module provides a driver for the MDIO busses found in the
+	  APM X-Gene SoC's.
+
 endif # PHYLIB
 
 config MICREL_KS8995MA
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 534dfa7..e58667d 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -1,51 +1,55 @@
-# Makefile for Linux PHY drivers
+# Makefile for Linux PHY drivers and MDIO bus drivers
 
 libphy-y			:= phy.o phy_device.o mdio_bus.o mdio_device.o
 libphy-$(CONFIG_SWPHY)		+= swphy.o
 
 obj-$(CONFIG_PHYLIB)		+= libphy.o
+
+obj-$(CONFIG_MDIO_BCM_IPROC)	+= mdio-bcm-iproc.o
+obj-$(CONFIG_MDIO_BCM_UNIMAC)	+= mdio-bcm-unimac.o
+obj-$(CONFIG_MDIO_BITBANG)	+= mdio-bitbang.o
+obj-$(CONFIG_MDIO_BUS_MUX)	+= mdio-mux.o
+obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC)	+= mdio-mux-bcm-iproc.o
+obj-$(CONFIG_MDIO_BUS_MUX_GPIO)	+= mdio-mux-gpio.o
+obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
+obj-$(CONFIG_MDIO_CAVIUM)	+= mdio-cavium.o
+obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
+obj-$(CONFIG_MDIO_HISI_FEMAC)	+= mdio-hisi-femac.o
+obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
+obj-$(CONFIG_MDIO_OCTEON)	+= mdio-octeon.o
+obj-$(CONFIG_MDIO_SUN4I)	+= mdio-sun4i.o
+obj-$(CONFIG_MDIO_THUNDER)	+= mdio-thunder.o
+obj-$(CONFIG_MDIO_XGENE)	+= mdio-xgene.o
+
+obj-$(CONFIG_AMD_PHY)		+= amd.o
 obj-$(CONFIG_AQUANTIA_PHY)	+= aquantia.o
-obj-$(CONFIG_MARVELL_PHY)	+= marvell.o
-obj-$(CONFIG_DAVICOM_PHY)	+= davicom.o
-obj-$(CONFIG_CICADA_PHY)	+= cicada.o
-obj-$(CONFIG_LXT_PHY)		+= lxt.o
-obj-$(CONFIG_QSEMI_PHY)		+= qsemi.o
-obj-$(CONFIG_SMSC_PHY)		+= smsc.o
-obj-$(CONFIG_TERANETICS_PHY)	+= teranetics.o
-obj-$(CONFIG_VITESSE_PHY)	+= vitesse.o
-obj-$(CONFIG_BCM_NET_PHYLIB)	+= bcm-phy-lib.o
-obj-$(CONFIG_BROADCOM_PHY)	+= broadcom.o
+obj-$(CONFIG_AT803X_PHY)	+= at803x.o
 obj-$(CONFIG_BCM63XX_PHY)	+= bcm63xx.o
 obj-$(CONFIG_BCM7XXX_PHY)	+= bcm7xxx.o
 obj-$(CONFIG_BCM87XX_PHY)	+= bcm87xx.o
 obj-$(CONFIG_BCM_CYGNUS_PHY)	+= bcm-cygnus.o
-obj-$(CONFIG_ICPLUS_PHY)	+= icplus.o
-obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
-obj-$(CONFIG_LSI_ET1011C_PHY)	+= et1011c.o
-obj-$(CONFIG_FIXED_PHY)		+= fixed_phy.o
-obj-$(CONFIG_MDIO_BITBANG)	+= mdio-bitbang.o
-obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
-obj-$(CONFIG_NATIONAL_PHY)	+= national.o
+obj-$(CONFIG_BCM_NET_PHYLIB)	+= bcm-phy-lib.o
+obj-$(CONFIG_BROADCOM_PHY)	+= broadcom.o
+obj-$(CONFIG_CICADA_PHY)	+= cicada.o
+obj-$(CONFIG_DAVICOM_PHY)	+= davicom.o
 obj-$(CONFIG_DP83640_PHY)	+= dp83640.o
 obj-$(CONFIG_DP83848_PHY)	+= dp83848.o
 obj-$(CONFIG_DP83867_PHY)	+= dp83867.o
-obj-$(CONFIG_STE10XP)		+= ste10Xp.o
-obj-$(CONFIG_MICREL_PHY)	+= micrel.o
-obj-$(CONFIG_MDIO_OCTEON)	+= mdio-octeon.o
-obj-$(CONFIG_MDIO_THUNDER)	+= mdio-thunder.o
-obj-$(CONFIG_MDIO_CAVIUM)	+= mdio-cavium.o
-obj-$(CONFIG_MICREL_KS8995MA)	+= spi_ks8995.o
-obj-$(CONFIG_AT803X_PHY)	+= at803x.o
-obj-$(CONFIG_AMD_PHY)		+= amd.o
-obj-$(CONFIG_MDIO_BUS_MUX)	+= mdio-mux.o
-obj-$(CONFIG_MDIO_BUS_MUX_GPIO)	+= mdio-mux-gpio.o
-obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o
-obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC)	+= mdio-mux-bcm-iproc.o
-obj-$(CONFIG_MDIO_SUN4I)	+= mdio-sun4i.o
-obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
-obj-$(CONFIG_MDIO_BCM_UNIMAC)	+= mdio-bcm-unimac.o
-obj-$(CONFIG_MICROCHIP_PHY)	+= microchip.o
-obj-$(CONFIG_MDIO_BCM_IPROC)	+= mdio-bcm-iproc.o
+obj-$(CONFIG_FIXED_PHY)		+= fixed_phy.o
+obj-$(CONFIG_ICPLUS_PHY)	+= icplus.o
 obj-$(CONFIG_INTEL_XWAY_PHY)	+= intel-xway.o
-obj-$(CONFIG_MDIO_HISI_FEMAC)	+= mdio-hisi-femac.o
-obj-$(CONFIG_MDIO_XGENE)	+= mdio-xgene.o
+obj-$(CONFIG_LSI_ET1011C_PHY)	+= et1011c.o
+obj-$(CONFIG_LXT_PHY)		+= lxt.o
+obj-$(CONFIG_MARVELL_PHY)	+= marvell.o
+obj-$(CONFIG_MICREL_KS8995MA)	+= spi_ks8995.o
+obj-$(CONFIG_MICREL_PHY)	+= micrel.o
+obj-$(CONFIG_MICROCHIP_PHY)	+= microchip.o
+obj-$(CONFIG_MICROSEMI_PHY)	+= mscc.o
+obj-$(CONFIG_NATIONAL_PHY)	+= national.o
+obj-$(CONFIG_QSEMI_PHY)		+= qsemi.o
+obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
+obj-$(CONFIG_SMSC_PHY)		+= smsc.o
+obj-$(CONFIG_STE10XP)		+= ste10Xp.o
+obj-$(CONFIG_TERANETICS_PHY)	+= teranetics.o
+obj-$(CONFIG_VITESSE_PHY)	+= vitesse.o
+obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o
diff --git a/drivers/net/phy/mdio-xgene.c b/drivers/net/phy/mdio-xgene.c
index 7756748..92af182 100644
--- a/drivers/net/phy/mdio-xgene.c
+++ b/drivers/net/phy/mdio-xgene.c
@@ -424,10 +424,8 @@
 	mdiobus_unregister(mdio_bus);
 	mdiobus_free(mdio_bus);
 
-	if (dev->of_node) {
-		if (IS_ERR(pdata->clk))
-			clk_disable_unprepare(pdata->clk);
-	}
+	if (dev->of_node)
+		clk_disable_unprepare(pdata->clk);
 
 	return 0;
 }
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 1882d98..885ac9c 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -677,17 +677,28 @@
 		data[i] = kszphy_get_stat(phydev, i);
 }
 
+static int kszphy_suspend(struct phy_device *phydev)
+{
+	/* Disable PHY Interrupts */
+	if (phy_interrupt_is_valid(phydev)) {
+		phydev->interrupts = PHY_INTERRUPT_DISABLED;
+		if (phydev->drv->config_intr)
+			phydev->drv->config_intr(phydev);
+	}
+
+	return genphy_suspend(phydev);
+}
+
 static int kszphy_resume(struct phy_device *phydev)
 {
-	int value;
+	genphy_resume(phydev);
 
-	mutex_lock(&phydev->lock);
-
-	value = phy_read(phydev, MII_BMCR);
-	phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
-
-	kszphy_config_intr(phydev);
-	mutex_unlock(&phydev->lock);
+	/* Enable PHY Interrupts */
+	if (phy_interrupt_is_valid(phydev)) {
+		phydev->interrupts = PHY_INTERRUPT_ENABLED;
+		if (phydev->drv->config_intr)
+			phydev->drv->config_intr(phydev);
+	}
 
 	return 0;
 }
@@ -900,7 +911,7 @@
 	.get_sset_count = kszphy_get_sset_count,
 	.get_strings	= kszphy_get_strings,
 	.get_stats	= kszphy_get_stats,
-	.suspend	= genphy_suspend,
+	.suspend	= kszphy_suspend,
 	.resume		= kszphy_resume,
 }, {
 	.phy_id		= PHY_ID_KSZ8061,
@@ -953,7 +964,7 @@
 	.get_strings	= kszphy_get_strings,
 	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
-	.resume		= genphy_resume,
+	.resume		= kszphy_resume,
 }, {
 	.phy_id		= PHY_ID_KSZ8873MLL,
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index 15f8206..7c00e50 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -55,7 +55,7 @@
 	return rc < 0 ? rc : 0;
 }
 
-int lan88xx_suspend(struct phy_device *phydev)
+static int lan88xx_suspend(struct phy_device *phydev)
 {
 	struct lan88xx_priv *priv = phydev->priv;
 
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
new file mode 100644
index 0000000..a17573e
--- /dev/null
+++ b/drivers/net/phy/mscc.c
@@ -0,0 +1,337 @@
+/*
+ * Driver for Microsemi VSC85xx PHYs
+ *
+ * Author: Nagaraju Lakkaraju
+ * License: Dual MIT/GPL
+ * Copyright (c) 2016 Microsemi Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mdio.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/of.h>
+#include <dt-bindings/net/mscc-phy-vsc8531.h>
+
+enum rgmii_rx_clock_delay {
+	RGMII_RX_CLK_DELAY_0_2_NS = 0,
+	RGMII_RX_CLK_DELAY_0_8_NS = 1,
+	RGMII_RX_CLK_DELAY_1_1_NS = 2,
+	RGMII_RX_CLK_DELAY_1_7_NS = 3,
+	RGMII_RX_CLK_DELAY_2_0_NS = 4,
+	RGMII_RX_CLK_DELAY_2_3_NS = 5,
+	RGMII_RX_CLK_DELAY_2_6_NS = 6,
+	RGMII_RX_CLK_DELAY_3_4_NS = 7
+};
+
+/* Microsemi VSC85xx PHY registers */
+/* IEEE 802. Std Registers */
+#define MSCC_PHY_EXT_PHY_CNTL_1           23
+#define MAC_IF_SELECTION_MASK             0x1800
+#define MAC_IF_SELECTION_GMII             0
+#define MAC_IF_SELECTION_RMII             1
+#define MAC_IF_SELECTION_RGMII            2
+#define MAC_IF_SELECTION_POS              11
+#define FAR_END_LOOPBACK_MODE_MASK        0x0008
+
+#define MII_VSC85XX_INT_MASK		  25
+#define MII_VSC85XX_INT_MASK_MASK	  0xa000
+#define MII_VSC85XX_INT_STATUS		  26
+
+#define MSCC_PHY_WOL_MAC_CONTROL          27
+#define EDGE_RATE_CNTL_POS                5
+#define EDGE_RATE_CNTL_MASK               0x00E0
+
+#define MSCC_EXT_PAGE_ACCESS		  31
+#define MSCC_PHY_PAGE_STANDARD		  0x0000 /* Standard registers */
+#define MSCC_PHY_PAGE_EXTENDED_2	  0x0002 /* Extended reg - page 2 */
+
+/* Extended Page 2 Registers */
+#define MSCC_PHY_RGMII_CNTL		  20
+#define RGMII_RX_CLK_DELAY_MASK		  0x0070
+#define RGMII_RX_CLK_DELAY_POS		  4
+
+/* Microsemi PHY ID's */
+#define PHY_ID_VSC8531			  0x00070570
+#define PHY_ID_VSC8541			  0x00070770
+
+struct edge_rate_table {
+	u16 vddmac;
+	int slowdown[MSCC_SLOWDOWN_MAX];
+};
+
+struct edge_rate_table edge_table[MSCC_VDDMAC_MAX] = {
+	{3300, { 0, -2, -4,  -7,  -10, -17, -29, -53} },
+	{2500, { 0, -3, -6,  -10, -14, -23, -37, -63} },
+	{1800, { 0, -5, -9,  -16, -23, -35, -52, -76} },
+	{1500, { 0, -6, -14, -21, -29, -42, -58, -77} },
+};
+
+struct vsc8531_private {
+	u8 edge_slowdown;
+	u16 vddmac;
+};
+
+static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page)
+{
+	int rc;
+
+	rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page);
+	return rc;
+}
+
+static u8 edge_rate_magic_get(u16 vddmac,
+			      int slowdown)
+{
+	int rc = (MSCC_SLOWDOWN_MAX - 1);
+	u8 vdd;
+	u8 sd;
+
+	for (vdd = 0; vdd < MSCC_VDDMAC_MAX; vdd++) {
+		if (edge_table[vdd].vddmac == vddmac) {
+			for (sd = 0; sd < MSCC_SLOWDOWN_MAX; sd++) {
+				if (edge_table[vdd].slowdown[sd] <= slowdown) {
+					rc = (MSCC_SLOWDOWN_MAX - sd - 1);
+					break;
+				}
+			}
+		}
+	}
+
+	return rc;
+}
+
+static int vsc85xx_edge_rate_cntl_set(struct phy_device *phydev,
+				      u8 edge_rate)
+{
+	int rc;
+	u16 reg_val;
+
+	mutex_lock(&phydev->lock);
+	rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
+	if (rc != 0)
+		goto out_unlock;
+	reg_val = phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL);
+	reg_val &= ~(EDGE_RATE_CNTL_MASK);
+	reg_val |= (edge_rate << EDGE_RATE_CNTL_POS);
+	rc = phy_write(phydev, MSCC_PHY_WOL_MAC_CONTROL, reg_val);
+	if (rc != 0)
+		goto out_unlock;
+	rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+
+out_unlock:
+	mutex_unlock(&phydev->lock);
+
+	return rc;
+}
+
+static int vsc85xx_mac_if_set(struct phy_device *phydev,
+			      phy_interface_t interface)
+{
+	int rc;
+	u16 reg_val;
+
+	mutex_lock(&phydev->lock);
+	reg_val = phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_1);
+	reg_val &= ~(MAC_IF_SELECTION_MASK);
+	switch (interface) {
+	case PHY_INTERFACE_MODE_RGMII:
+		reg_val |= (MAC_IF_SELECTION_RGMII << MAC_IF_SELECTION_POS);
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		reg_val |= (MAC_IF_SELECTION_RMII << MAC_IF_SELECTION_POS);
+		break;
+	case PHY_INTERFACE_MODE_MII:
+	case PHY_INTERFACE_MODE_GMII:
+		reg_val |= (MAC_IF_SELECTION_GMII << MAC_IF_SELECTION_POS);
+		break;
+	default:
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+	rc = phy_write(phydev, MSCC_PHY_EXT_PHY_CNTL_1, reg_val);
+	if (rc != 0)
+		goto out_unlock;
+
+	rc = genphy_soft_reset(phydev);
+
+out_unlock:
+	mutex_unlock(&phydev->lock);
+
+	return rc;
+}
+
+static int vsc85xx_default_config(struct phy_device *phydev)
+{
+	int rc;
+	u16 reg_val;
+
+	mutex_lock(&phydev->lock);
+	rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2);
+	if (rc != 0)
+		goto out_unlock;
+
+	reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL);
+	reg_val &= ~(RGMII_RX_CLK_DELAY_MASK);
+	reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS);
+	phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val);
+	rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD);
+
+out_unlock:
+	mutex_unlock(&phydev->lock);
+
+	return rc;
+}
+
+#ifdef CONFIG_OF_MDIO
+static int vsc8531_of_init(struct phy_device *phydev)
+{
+	int rc;
+	struct vsc8531_private *vsc8531 = phydev->priv;
+	struct device *dev = &phydev->mdio.dev;
+	struct device_node *of_node = dev->of_node;
+
+	if (!of_node)
+		return -ENODEV;
+
+	rc = of_property_read_u16(of_node, "vsc8531,vddmac",
+				  &vsc8531->vddmac);
+	if (rc == -EINVAL)
+		vsc8531->vddmac = MSCC_VDDMAC_3300;
+	rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown",
+				 &vsc8531->edge_slowdown);
+	if (rc == -EINVAL)
+		vsc8531->edge_slowdown = 0;
+
+	rc = 0;
+	return rc;
+}
+#else
+static int vsc8531_of_init(struct phy_device *phydev)
+{
+	return 0;
+}
+#endif /* CONFIG_OF_MDIO */
+
+static int vsc85xx_config_init(struct phy_device *phydev)
+{
+	int rc;
+	struct vsc8531_private *vsc8531 = phydev->priv;
+	u8 edge_rate;
+
+	rc = vsc8531_of_init(phydev);
+	if (rc)
+		return rc;
+
+	rc = vsc85xx_default_config(phydev);
+	if (rc)
+		return rc;
+
+	rc = vsc85xx_mac_if_set(phydev, phydev->interface);
+	if (rc)
+		return rc;
+
+	edge_rate = edge_rate_magic_get(vsc8531->vddmac,
+					-(int)vsc8531->edge_slowdown);
+	rc = vsc85xx_edge_rate_cntl_set(phydev, edge_rate);
+	if (rc)
+		return rc;
+
+	rc = genphy_config_init(phydev);
+
+	return rc;
+}
+
+static int vsc85xx_ack_interrupt(struct phy_device *phydev)
+{
+	int rc = 0;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+		rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+
+	return (rc < 0) ? rc : 0;
+}
+
+static int vsc85xx_config_intr(struct phy_device *phydev)
+{
+	int rc;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+		rc = phy_write(phydev, MII_VSC85XX_INT_MASK,
+			       MII_VSC85XX_INT_MASK_MASK);
+	} else {
+		rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0);
+		if (rc < 0)
+			return rc;
+		rc = phy_read(phydev, MII_VSC85XX_INT_STATUS);
+	}
+
+	return rc;
+}
+
+static int vsc85xx_probe(struct phy_device *phydev)
+{
+	struct vsc8531_private *vsc8531;
+
+	vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL);
+	if (!vsc8531)
+		return -ENOMEM;
+
+	phydev->priv = vsc8531;
+
+	return 0;
+}
+
+/* Microsemi VSC85xx PHYs */
+static struct phy_driver vsc85xx_driver[] = {
+{
+	.phy_id		= PHY_ID_VSC8531,
+	.name		= "Microsemi VSC8531",
+	.phy_id_mask    = 0xfffffff0,
+	.features	= PHY_GBIT_FEATURES,
+	.flags		= PHY_HAS_INTERRUPT,
+	.soft_reset	= &genphy_soft_reset,
+	.config_init    = &vsc85xx_config_init,
+	.config_aneg    = &genphy_config_aneg,
+	.aneg_done	= &genphy_aneg_done,
+	.read_status    = &genphy_read_status,
+	.ack_interrupt  = &vsc85xx_ack_interrupt,
+	.config_intr    = &vsc85xx_config_intr,
+	.suspend	= &genphy_suspend,
+	.resume		= &genphy_resume,
+	.probe          = &vsc85xx_probe,
+},
+{
+	.phy_id		= PHY_ID_VSC8541,
+	.name		= "Microsemi VSC8541 SyncE",
+	.phy_id_mask    = 0xfffffff0,
+	.features	= PHY_GBIT_FEATURES,
+	.flags		= PHY_HAS_INTERRUPT,
+	.soft_reset	= &genphy_soft_reset,
+	.config_init    = &vsc85xx_config_init,
+	.config_aneg    = &genphy_config_aneg,
+	.aneg_done	= &genphy_aneg_done,
+	.read_status    = &genphy_read_status,
+	.ack_interrupt  = &vsc85xx_ack_interrupt,
+	.config_intr    = &vsc85xx_config_intr,
+	.suspend	= &genphy_suspend,
+	.resume		= &genphy_resume,
+	.probe          = &vsc85xx_probe,
+}
+
+};
+
+module_phy_driver(vsc85xx_driver);
+
+static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = {
+	{ PHY_ID_VSC8531, 0xfffffff0, },
+	{ PHY_ID_VSC8541, 0xfffffff0, },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl);
+
+MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver");
+MODULE_AUTHOR("Nagaraju Lakkaraju");
+MODULE_LICENSE("Dual MIT/GPL");
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index c5dc2c36..c6f6683 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -722,8 +722,10 @@
 int phy_start_interrupts(struct phy_device *phydev)
 {
 	atomic_set(&phydev->irq_disable, 0);
-	if (request_irq(phydev->irq, phy_interrupt, 0, "phy_interrupt",
-			phydev) < 0) {
+	if (request_irq(phydev->irq, phy_interrupt,
+				IRQF_SHARED,
+				"phy_interrupt",
+				phydev) < 0) {
 		pr_warn("%s: Can't get IRQ %d (PHY)\n",
 			phydev->mdio.bus->name, phydev->irq);
 		phydev->irq = PHY_POLL;
diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
new file mode 100644
index 0000000..d15dd39
--- /dev/null
+++ b/drivers/net/phy/xilinx_gmii2rgmii.c
@@ -0,0 +1,112 @@
+/* Xilinx GMII2RGMII Converter driver
+ *
+ * Copyright (C) 2016 Xilinx, Inc.
+ * Copyright (C) 2016 Andrew Lunn <andrew@lunn.ch>
+ *
+ * Author: Andrew Lunn <andrew@lunn.ch>
+ * Author: Kedareswara rao Appana <appanad@xilinx.com>
+ *
+ * Description:
+ * This driver is developed for Xilinx GMII2RGMII Converter
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+#include <linux/of_mdio.h>
+
+#define XILINX_GMII2RGMII_REG		0x10
+#define XILINX_GMII2RGMII_SPEED_MASK	(BMCR_SPEED1000 | BMCR_SPEED100)
+
+struct gmii2rgmii {
+	struct phy_device *phy_dev;
+	struct phy_driver *phy_drv;
+	struct phy_driver conv_phy_drv;
+	int addr;
+};
+
+static int xgmiitorgmii_read_status(struct phy_device *phydev)
+{
+	struct gmii2rgmii *priv = phydev->priv;
+	u16 val = 0;
+
+	priv->phy_drv->read_status(phydev);
+
+	val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
+	val &= XILINX_GMII2RGMII_SPEED_MASK;
+
+	if (phydev->speed == SPEED_1000)
+		val |= BMCR_SPEED1000;
+	else if (phydev->speed == SPEED_100)
+		val |= BMCR_SPEED100;
+	else
+		val |= BMCR_SPEED10;
+
+	mdiobus_write(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG, val);
+
+	return 0;
+}
+
+static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
+{
+	struct device *dev = &mdiodev->dev;
+	struct device_node *np = dev->of_node, *phy_node;
+	struct gmii2rgmii *priv;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	phy_node = of_parse_phandle(np, "phy-handle", 0);
+	if (!phy_node) {
+		dev_err(dev, "Couldn't parse phy-handle\n");
+		return -ENODEV;
+	}
+
+	priv->phy_dev = of_phy_find_device(phy_node);
+	of_node_put(phy_node);
+	if (!priv->phy_dev) {
+		dev_info(dev, "Couldn't find phydev\n");
+		return -EPROBE_DEFER;
+	}
+
+	priv->addr = mdiodev->addr;
+	priv->phy_drv = priv->phy_dev->drv;
+	memcpy(&priv->conv_phy_drv, priv->phy_dev->drv,
+	       sizeof(struct phy_driver));
+	priv->conv_phy_drv.read_status = xgmiitorgmii_read_status;
+	priv->phy_dev->priv = priv;
+	priv->phy_dev->drv = &priv->conv_phy_drv;
+
+	return 0;
+}
+
+static const struct of_device_id xgmiitorgmii_of_match[] = {
+	{ .compatible = "xlnx,gmii-to-rgmii-1.0" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, xgmiitorgmii_of_match);
+
+static struct mdio_driver xgmiitorgmii_driver = {
+	.probe	= xgmiitorgmii_probe,
+	.mdiodrv.driver = {
+		.name = "xgmiitorgmii",
+		.of_match_table = xgmiitorgmii_of_match,
+	},
+};
+
+mdio_module_driver(xgmiitorgmii_driver);
+
+MODULE_DESCRIPTION("Xilinx GMII2RGMII converter driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index f226db4..5489c0e 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1103,6 +1103,15 @@
 	}
 
 	conf.file = file;
+
+	/* Don't use device name generated by the rtnetlink layer when ifname
+	 * isn't specified. Let ppp_dev_configure() set the device name using
+	 * the PPP unit identifer as suffix (i.e. ppp<unit_id>). This allows
+	 * userspace to infer the device name using to the PPPIOCGUNIT ioctl.
+	 */
+	if (!tb[IFLA_IFNAME])
+		conf.ifname_is_set = false;
+
 	err = ppp_dev_configure(src_net, dev, &conf);
 
 out_unlock:
@@ -1354,6 +1363,8 @@
 	dev->netdev_ops = &ppp_netdev_ops;
 	SET_NETDEV_DEVTYPE(dev, &ppp_type);
 
+	dev->features |= NETIF_F_LLTX;
+
 	dev->hard_header_len = PPP_HDRLEN;
 	dev->mtu = PPP_MRU;
 	dev->addr_len = 0;
@@ -1367,12 +1378,8 @@
  * Transmit-side routines.
  */
 
-/*
- * Called to do any work queued up on the transmit side
- * that can now be done.
- */
-static void
-ppp_xmit_process(struct ppp *ppp)
+/* Called to do any work queued up on the transmit side that can now be done */
+static void __ppp_xmit_process(struct ppp *ppp)
 {
 	struct sk_buff *skb;
 
@@ -1392,6 +1399,30 @@
 	ppp_xmit_unlock(ppp);
 }
 
+static DEFINE_PER_CPU(int, ppp_xmit_recursion);
+
+static void ppp_xmit_process(struct ppp *ppp)
+{
+	local_bh_disable();
+
+	if (unlikely(__this_cpu_read(ppp_xmit_recursion)))
+		goto err;
+
+	__this_cpu_inc(ppp_xmit_recursion);
+	__ppp_xmit_process(ppp);
+	__this_cpu_dec(ppp_xmit_recursion);
+
+	local_bh_enable();
+
+	return;
+
+err:
+	local_bh_enable();
+
+	if (net_ratelimit())
+		netdev_err(ppp->dev, "recursion detected\n");
+}
+
 static inline struct sk_buff *
 pad_compress_skb(struct ppp *ppp, struct sk_buff *skb)
 {
@@ -1847,11 +1878,8 @@
 }
 #endif /* CONFIG_PPP_MULTILINK */
 
-/*
- * Try to send data out on a channel.
- */
-static void
-ppp_channel_push(struct channel *pch)
+/* Try to send data out on a channel */
+static void __ppp_channel_push(struct channel *pch)
 {
 	struct sk_buff *skb;
 	struct ppp *ppp;
@@ -1876,11 +1904,22 @@
 		read_lock_bh(&pch->upl);
 		ppp = pch->ppp;
 		if (ppp)
-			ppp_xmit_process(ppp);
+			__ppp_xmit_process(ppp);
 		read_unlock_bh(&pch->upl);
 	}
 }
 
+static void ppp_channel_push(struct channel *pch)
+{
+	local_bh_disable();
+
+	__this_cpu_inc(ppp_xmit_recursion);
+	__ppp_channel_push(pch);
+	__this_cpu_dec(ppp_xmit_recursion);
+
+	local_bh_enable();
+}
+
 /*
  * Receive-side routines.
  */
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index ae0905e..1951b10 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -37,6 +37,7 @@
 #include <net/icmp.h>
 #include <net/route.h>
 #include <net/gre.h>
+#include <net/pptp.h>
 
 #include <linux/uaccess.h>
 
@@ -53,41 +54,6 @@
 static const struct ppp_channel_ops pptp_chan_ops;
 static const struct proto_ops pptp_ops;
 
-#define PPP_LCP_ECHOREQ 0x09
-#define PPP_LCP_ECHOREP 0x0A
-#define SC_RCV_BITS	(SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
-
-#define MISSING_WINDOW 20
-#define WRAPPED(curseq, lastseq)\
-	((((curseq) & 0xffffff00) == 0) &&\
-	(((lastseq) & 0xffffff00) == 0xffffff00))
-
-#define PPTP_GRE_PROTO  0x880B
-#define PPTP_GRE_VER    0x1
-
-#define PPTP_GRE_FLAG_C	0x80
-#define PPTP_GRE_FLAG_R	0x40
-#define PPTP_GRE_FLAG_K	0x20
-#define PPTP_GRE_FLAG_S	0x10
-#define PPTP_GRE_FLAG_A	0x80
-
-#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C)
-#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R)
-#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K)
-#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S)
-#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A)
-
-#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
-struct pptp_gre_header {
-	u8  flags;
-	u8  ver;
-	__be16 protocol;
-	__be16 payload_len;
-	__be16 call_id;
-	__be32 seq;
-	__be32 ack;
-} __packed;
-
 static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr)
 {
 	struct pppox_sock *sock;
@@ -240,16 +206,14 @@
 	skb_push(skb, header_len);
 	hdr = (struct pptp_gre_header *)(skb->data);
 
-	hdr->flags       = PPTP_GRE_FLAG_K;
-	hdr->ver         = PPTP_GRE_VER;
-	hdr->protocol    = htons(PPTP_GRE_PROTO);
-	hdr->call_id     = htons(opt->dst_addr.call_id);
+	hdr->gre_hd.flags = GRE_KEY | GRE_VERSION_1 | GRE_SEQ;
+	hdr->gre_hd.protocol = GRE_PROTO_PPP;
+	hdr->call_id = htons(opt->dst_addr.call_id);
 
-	hdr->flags      |= PPTP_GRE_FLAG_S;
-	hdr->seq         = htonl(++opt->seq_sent);
+	hdr->seq = htonl(++opt->seq_sent);
 	if (opt->ack_sent != seq_recv)	{
 		/* send ack with this message */
-		hdr->ver |= PPTP_GRE_FLAG_A;
+		hdr->gre_hd.flags |= GRE_ACK;
 		hdr->ack  = htonl(seq_recv);
 		opt->ack_sent = seq_recv;
 	}
@@ -312,7 +276,7 @@
 	headersize  = sizeof(*header);
 
 	/* test if acknowledgement present */
-	if (PPTP_GRE_IS_A(header->ver)) {
+	if (GRE_IS_ACK(header->gre_hd.flags)) {
 		__u32 ack;
 
 		if (!pskb_may_pull(skb, headersize))
@@ -320,7 +284,7 @@
 		header = (struct pptp_gre_header *)(skb->data);
 
 		/* ack in different place if S = 0 */
-		ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq;
+		ack = GRE_IS_SEQ(header->gre_hd.flags) ? header->ack : header->seq;
 
 		ack = ntohl(ack);
 
@@ -333,7 +297,7 @@
 		headersize -= sizeof(header->ack);
 	}
 	/* test if payload present */
-	if (!PPTP_GRE_IS_S(header->flags))
+	if (!GRE_IS_SEQ(header->gre_hd.flags))
 		goto drop;
 
 	payload_len = ntohs(header->payload_len);
@@ -394,11 +358,11 @@
 
 	header = (struct pptp_gre_header *)skb->data;
 
-	if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */
-		PPTP_GRE_IS_C(header->flags) ||                /* flag C should be clear */
-		PPTP_GRE_IS_R(header->flags) ||                /* flag R should be clear */
-		!PPTP_GRE_IS_K(header->flags) ||               /* flag K should be set */
-		(header->flags&0xF) != 0)                      /* routing and recursion ctrl = 0 */
+	if (header->gre_hd.protocol != GRE_PROTO_PPP || /* PPTP-GRE protocol for PPTP */
+		GRE_IS_CSUM(header->gre_hd.flags) ||    /* flag CSUM should be clear */
+		GRE_IS_ROUTING(header->gre_hd.flags) || /* flag ROUTING should be clear */
+		!GRE_IS_KEY(header->gre_hd.flags) ||    /* flag KEY should be set */
+		(header->gre_hd.flags & GRE_FLAGS))     /* flag Recursion Ctrl should be clear */
 		/* if invalid, discard this packet */
 		goto drop;
 
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index cdb19b3..b228bea 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -14,9 +14,23 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/netdevice.h>
+#include <linux/etherdevice.h>
 #include <linux/filter.h>
 #include <linux/if_team.h>
 
+static rx_handler_result_t lb_receive(struct team *team, struct team_port *port,
+				      struct sk_buff *skb)
+{
+	if (unlikely(skb->protocol == htons(ETH_P_SLOW))) {
+		/* LACPDU packets should go to exact delivery */
+		const unsigned char *dest = eth_hdr(skb)->h_dest;
+
+		if (is_link_local_ether_addr(dest) && dest[5] == 0x02)
+			return RX_HANDLER_EXACT;
+	}
+	return RX_HANDLER_ANOTHER;
+}
+
 struct lb_priv;
 
 typedef struct team_port *lb_select_tx_port_func_t(struct team *,
@@ -652,6 +666,7 @@
 	.port_enter		= lb_port_enter,
 	.port_leave		= lb_port_leave,
 	.port_disabled		= lb_port_disabled,
+	.receive		= lb_receive,
 	.transmit		= lb_transmit,
 };
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9c8b5bc..8093e39 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -731,14 +731,9 @@
 	}
 
 	alen = ETH_ALEN * uf.count;
-	addr = kmalloc(alen, GFP_KERNEL);
-	if (!addr)
-		return -ENOMEM;
-
-	if (copy_from_user(addr, arg + sizeof(uf), alen)) {
-		err = -EFAULT;
-		goto done;
-	}
+	addr = memdup_user(arg + sizeof(uf), alen);
+	if (IS_ERR(addr))
+		return PTR_ERR(addr);
 
 	/* The filter is updated without holding any locks. Which is
 	 * perfectly safe. We disable it first and in the worst
@@ -758,7 +753,7 @@
 	for (; n < uf.count; n++) {
 		if (!is_multicast_ether_addr(addr[n].u)) {
 			err = 0; /* no filter */
-			goto done;
+			goto free_addr;
 		}
 		addr_hash_set(filter->mask, addr[n].u);
 	}
@@ -774,8 +769,7 @@
 
 	/* Return the number of exact filters */
 	err = nexact;
-
-done:
+free_addr:
 	kfree(addr);
 	return err;
 }
@@ -894,11 +888,7 @@
 	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 		goto drop;
 
-	if (skb->sk && sk_fullsock(skb->sk)) {
-		sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags,
-				  &skb_shinfo(skb)->tx_flags);
-		sw_tx_timestamp(skb);
-	}
+	skb_tx_timestamp(skb);
 
 	/* Orphan the skb - required as we might hang on to it
 	 * for indefinite time.
diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index a2d3ea6..d109242 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -46,6 +46,7 @@
 #define AX_CMD_SET_SW_MII		0x06
 #define AX_CMD_READ_MII_REG		0x07
 #define AX_CMD_WRITE_MII_REG		0x08
+#define AX_CMD_STATMNGSTS_REG		0x09
 #define AX_CMD_SET_HW_MII		0x0a
 #define AX_CMD_READ_EEPROM		0x0b
 #define AX_CMD_WRITE_EEPROM		0x0c
@@ -71,6 +72,17 @@
 #define AX_CMD_SW_RESET			0x20
 #define AX_CMD_SW_PHY_STATUS		0x21
 #define AX_CMD_SW_PHY_SELECT		0x22
+#define AX_QCTCTRL			0x2A
+
+#define AX_CHIPCODE_MASK		0x70
+#define AX_AX88772_CHIPCODE		0x00
+#define AX_AX88772A_CHIPCODE		0x10
+#define AX_AX88772B_CHIPCODE		0x20
+#define AX_HOST_EN			0x01
+
+#define AX_PHYSEL_PSEL			0x01
+#define AX_PHYSEL_SSMII			0
+#define AX_PHYSEL_SSEN			0x10
 
 #define AX_PHY_SELECT_MASK		(BIT(3) | BIT(2))
 #define AX_PHY_SELECT_INTERNAL		0
@@ -173,6 +185,10 @@
 };
 
 struct asix_common_private {
+	void (*resume)(struct usbnet *dev);
+	void (*suspend)(struct usbnet *dev);
+	u16 presvd_phy_advertise;
+	u16 presvd_phy_bmcr;
 	struct asix_rx_fixup_info rx_fixup_info;
 };
 
@@ -182,10 +198,10 @@
 #define FLAG_EEPROM_MAC		(1UL << 0)  /* init device MAC from eeprom */
 
 int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-		  u16 size, void *data);
+		  u16 size, void *data, int in_pm);
 
 int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-		   u16 size, void *data);
+		   u16 size, void *data, int in_pm);
 
 void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value,
 			  u16 index, u16 size, void *data);
@@ -197,27 +213,31 @@
 struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
 			      gfp_t flags);
 
-int asix_set_sw_mii(struct usbnet *dev);
-int asix_set_hw_mii(struct usbnet *dev);
+int asix_set_sw_mii(struct usbnet *dev, int in_pm);
+int asix_set_hw_mii(struct usbnet *dev, int in_pm);
 
 int asix_read_phy_addr(struct usbnet *dev, int internal);
 int asix_get_phy_addr(struct usbnet *dev);
 
-int asix_sw_reset(struct usbnet *dev, u8 flags);
+int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm);
 
-u16 asix_read_rx_ctl(struct usbnet *dev);
-int asix_write_rx_ctl(struct usbnet *dev, u16 mode);
+u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm);
+int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm);
 
-u16 asix_read_medium_status(struct usbnet *dev);
-int asix_write_medium_mode(struct usbnet *dev, u16 mode);
+u16 asix_read_medium_status(struct usbnet *dev, int in_pm);
+int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm);
 
-int asix_write_gpio(struct usbnet *dev, u16 value, int sleep);
+int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm);
 
 void asix_set_multicast(struct net_device *net);
 
 int asix_mdio_read(struct net_device *netdev, int phy_id, int loc);
 void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val);
 
+int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc);
+void asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc,
+			  int val);
+
 void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo);
 int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo);
 
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 7de5ab5..f79eb12 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -22,24 +22,49 @@
 #include "asix.h"
 
 int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-		  u16 size, void *data)
+		  u16 size, void *data, int in_pm)
 {
 	int ret;
-	ret = usbnet_read_cmd(dev, cmd,
-			       USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-			       value, index, data, size);
+	int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
 
-	if (ret != size && ret >= 0)
-		return -EINVAL;
+	BUG_ON(!dev);
+
+	if (!in_pm)
+		fn = usbnet_read_cmd;
+	else
+		fn = usbnet_read_cmd_nopm;
+
+	ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+		 value, index, data, size);
+
+	if (unlikely(ret < 0))
+		netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n",
+			    index, ret);
+
 	return ret;
 }
 
 int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-		   u16 size, void *data)
+		   u16 size, void *data, int in_pm)
 {
-	return usbnet_write_cmd(dev, cmd,
-				USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-				value, index, data, size);
+	int ret;
+	int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
+
+	BUG_ON(!dev);
+
+	if (!in_pm)
+		fn = usbnet_write_cmd;
+	else
+		fn = usbnet_write_cmd_nopm;
+
+	ret = fn(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+		 value, index, data, size);
+
+	if (unlikely(ret < 0))
+		netdev_warn(dev->net, "Failed to write reg index 0x%04x: %d\n",
+			    index, ret);
+
+	return ret;
 }
 
 void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index,
@@ -225,19 +250,20 @@
 	return skb;
 }
 
-int asix_set_sw_mii(struct usbnet *dev)
+int asix_set_sw_mii(struct usbnet *dev, int in_pm)
 {
 	int ret;
-	ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL);
+	ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL, in_pm);
+
 	if (ret < 0)
 		netdev_err(dev->net, "Failed to enable software MII access\n");
 	return ret;
 }
 
-int asix_set_hw_mii(struct usbnet *dev)
+int asix_set_hw_mii(struct usbnet *dev, int in_pm)
 {
 	int ret;
-	ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL);
+	ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL, in_pm);
 	if (ret < 0)
 		netdev_err(dev->net, "Failed to enable hardware MII access\n");
 	return ret;
@@ -247,7 +273,7 @@
 {
 	int offset = (internal ? 1 : 0);
 	u8 buf[2];
-	int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf);
+	int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf, 0);
 
 	netdev_dbg(dev->net, "asix_get_phy_addr()\n");
 
@@ -270,21 +296,21 @@
 }
 
 
-int asix_sw_reset(struct usbnet *dev, u8 flags)
+int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm)
 {
 	int ret;
 
-        ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL);
+	ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL, in_pm);
 	if (ret < 0)
 		netdev_err(dev->net, "Failed to send software reset: %02x\n", ret);
 
 	return ret;
 }
 
-u16 asix_read_rx_ctl(struct usbnet *dev)
+u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm)
 {
 	__le16 v;
-	int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v);
+	int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v, in_pm);
 
 	if (ret < 0) {
 		netdev_err(dev->net, "Error reading RX_CTL register: %02x\n", ret);
@@ -295,12 +321,12 @@
 	return ret;
 }
 
-int asix_write_rx_ctl(struct usbnet *dev, u16 mode)
+int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm)
 {
 	int ret;
 
 	netdev_dbg(dev->net, "asix_write_rx_ctl() - mode = 0x%04x\n", mode);
-	ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL, in_pm);
 	if (ret < 0)
 		netdev_err(dev->net, "Failed to write RX_CTL mode to 0x%04x: %02x\n",
 			   mode, ret);
@@ -308,10 +334,11 @@
 	return ret;
 }
 
-u16 asix_read_medium_status(struct usbnet *dev)
+u16 asix_read_medium_status(struct usbnet *dev, int in_pm)
 {
 	__le16 v;
-	int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, 0, 0, 2, &v);
+	int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS,
+				0, 0, 2, &v, in_pm);
 
 	if (ret < 0) {
 		netdev_err(dev->net, "Error reading Medium Status register: %02x\n",
@@ -323,12 +350,13 @@
 
 }
 
-int asix_write_medium_mode(struct usbnet *dev, u16 mode)
+int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm)
 {
 	int ret;
 
 	netdev_dbg(dev->net, "asix_write_medium_mode() - mode = 0x%04x\n", mode);
-	ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, mode, 0, 0, NULL);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE,
+			     mode, 0, 0, NULL, in_pm);
 	if (ret < 0)
 		netdev_err(dev->net, "Failed to write Medium Mode mode to 0x%04x: %02x\n",
 			   mode, ret);
@@ -336,12 +364,12 @@
 	return ret;
 }
 
-int asix_write_gpio(struct usbnet *dev, u16 value, int sleep)
+int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm)
 {
 	int ret;
 
 	netdev_dbg(dev->net, "asix_write_gpio() - value = 0x%04x\n", value);
-	ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL, in_pm);
 	if (ret < 0)
 		netdev_err(dev->net, "Failed to write GPIO value 0x%04x: %02x\n",
 			   value, ret);
@@ -398,16 +426,31 @@
 {
 	struct usbnet *dev = netdev_priv(netdev);
 	__le16 res;
+	u8 smsr;
+	int i = 0;
+	int ret;
 
 	mutex_lock(&dev->phy_mutex);
-	asix_set_sw_mii(dev);
+	do {
+		ret = asix_set_sw_mii(dev, 0);
+		if (ret == -ENODEV)
+			break;
+		usleep_range(1000, 1100);
+		ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+				    0, 0, 1, &smsr, 0);
+	} while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+	if (ret == -ENODEV) {
+		mutex_unlock(&dev->phy_mutex);
+		return ret;
+	}
+
 	asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
-				(__u16)loc, 2, &res);
-	asix_set_hw_mii(dev);
+				(__u16)loc, 2, &res, 0);
+	asix_set_hw_mii(dev, 0);
 	mutex_unlock(&dev->phy_mutex);
 
 	netdev_dbg(dev->net, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n",
-		   phy_id, loc, le16_to_cpu(res));
+			phy_id, loc, le16_to_cpu(res));
 
 	return le16_to_cpu(res);
 }
@@ -416,13 +459,95 @@
 {
 	struct usbnet *dev = netdev_priv(netdev);
 	__le16 res = cpu_to_le16(val);
+	u8 smsr;
+	int i = 0;
+	int ret;
 
 	netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n",
-		   phy_id, loc, val);
+			phy_id, loc, val);
+
 	mutex_lock(&dev->phy_mutex);
-	asix_set_sw_mii(dev);
-	asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res);
-	asix_set_hw_mii(dev);
+	do {
+		ret = asix_set_sw_mii(dev, 0);
+		if (ret == -ENODEV)
+			break;
+		usleep_range(1000, 1100);
+		ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+				    0, 0, 1, &smsr, 0);
+	} while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+	if (ret == -ENODEV) {
+		mutex_unlock(&dev->phy_mutex);
+		return;
+	}
+
+	asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id,
+		       (__u16)loc, 2, &res, 0);
+	asix_set_hw_mii(dev, 0);
+	mutex_unlock(&dev->phy_mutex);
+}
+
+int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	__le16 res;
+	u8 smsr;
+	int i = 0;
+	int ret;
+
+	mutex_lock(&dev->phy_mutex);
+	do {
+		ret = asix_set_sw_mii(dev, 1);
+		if (ret == -ENODEV)
+			break;
+		usleep_range(1000, 1100);
+		ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+				    0, 0, 1, &smsr, 1);
+	} while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+	if (ret == -ENODEV) {
+		mutex_unlock(&dev->phy_mutex);
+		return ret;
+	}
+
+	asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id,
+		      (__u16)loc, 2, &res, 1);
+	asix_set_hw_mii(dev, 1);
+	mutex_unlock(&dev->phy_mutex);
+
+	netdev_dbg(dev->net, "asix_mdio_read_nopm() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n",
+			phy_id, loc, le16_to_cpu(res));
+
+	return le16_to_cpu(res);
+}
+
+void
+asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	__le16 res = cpu_to_le16(val);
+	u8 smsr;
+	int i = 0;
+	int ret;
+
+	netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n",
+			phy_id, loc, val);
+
+	mutex_lock(&dev->phy_mutex);
+	do {
+		ret = asix_set_sw_mii(dev, 1);
+		if (ret == -ENODEV)
+			break;
+		usleep_range(1000, 1100);
+		ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG,
+				    0, 0, 1, &smsr, 1);
+	} while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV));
+	if (ret == -ENODEV) {
+		mutex_unlock(&dev->phy_mutex);
+		return;
+	}
+
+	asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id,
+		       (__u16)loc, 2, &res, 1);
+	asix_set_hw_mii(dev, 1);
 	mutex_unlock(&dev->phy_mutex);
 }
 
@@ -431,7 +556,8 @@
 	struct usbnet *dev = netdev_priv(net);
 	u8 opt;
 
-	if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE, 0, 0, 1, &opt) < 0) {
+	if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE,
+			  0, 0, 1, &opt, 0) < 0) {
 		wolinfo->supported = 0;
 		wolinfo->wolopts = 0;
 		return;
@@ -455,7 +581,7 @@
 		opt |= AX_MONITOR_MAGIC;
 
 	if (asix_write_cmd(dev, AX_CMD_WRITE_MONITOR_MODE,
-			      opt, 0, 0, NULL) < 0)
+			      opt, 0, 0, NULL, 0) < 0)
 		return -EINVAL;
 
 	return 0;
@@ -490,7 +616,7 @@
 	/* ax8817x returns 2 bytes from eeprom on read */
 	for (i = first_word; i <= last_word; i++) {
 		if (asix_read_cmd(dev, AX_CMD_READ_EEPROM, i, 0, 2,
-				  &(eeprom_buff[i - first_word])) < 0) {
+				  &eeprom_buff[i - first_word], 0) < 0) {
 			kfree(eeprom_buff);
 			return -EIO;
 		}
@@ -531,7 +657,7 @@
 	   the EEPROM */
 	if (eeprom->offset & 1) {
 		ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, first_word, 0, 2,
-				    &(eeprom_buff[0]));
+				    &eeprom_buff[0], 0);
 		if (ret < 0) {
 			netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", first_word);
 			goto free;
@@ -540,7 +666,7 @@
 
 	if ((eeprom->offset + eeprom->len) & 1) {
 		ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, last_word, 0, 2,
-				    &(eeprom_buff[last_word - first_word]));
+				    &eeprom_buff[last_word - first_word], 0);
 		if (ret < 0) {
 			netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", last_word);
 			goto free;
@@ -550,7 +676,7 @@
 	memcpy((u8 *)eeprom_buff + (eeprom->offset & 1), data, eeprom->len);
 
 	/* write data to EEPROM */
-	ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL, 0);
 	if (ret < 0) {
 		netdev_err(net, "Failed to enable EEPROM write\n");
 		goto free;
@@ -561,7 +687,7 @@
 		netdev_dbg(net, "write to EEPROM at offset 0x%02x, data 0x%04x\n",
 			   i, eeprom_buff[i - first_word]);
 		ret = asix_write_cmd(dev, AX_CMD_WRITE_EEPROM, i,
-				     eeprom_buff[i - first_word], 0, NULL);
+				     eeprom_buff[i - first_word], 0, NULL, 0);
 		if (ret < 0) {
 			netdev_err(net, "Failed to write EEPROM at offset 0x%02x.\n",
 				   i);
@@ -570,7 +696,7 @@
 		msleep(20);
 	}
 
-	ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL, 0);
 	if (ret < 0) {
 		netdev_err(net, "Failed to disable EEPROM write\n");
 		goto free;
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index 5cabefc..cce2495 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -35,6 +35,15 @@
 
 #define	PHY_MODE_RTL8211CL	0x000C
 
+#define AX88772A_PHY14H		0x14
+#define AX88772A_PHY14H_DEFAULT 0x442C
+
+#define AX88772A_PHY15H		0x15
+#define AX88772A_PHY15H_DEFAULT 0x03C8
+
+#define AX88772A_PHY16H		0x16
+#define AX88772A_PHY16H_DEFAULT 0x4044
+
 struct ax88172_int_data {
 	__le16 res1;
 	u8 link;
@@ -79,6 +88,8 @@
 	/* Poll for the rare case the FW or phy isn't ready yet.  */
 	for (i = 0; i < 100; i++) {
 		phy_reg = asix_mdio_read(dev->net, dev->mii.phy_id, MII_PHYSID1);
+		if (phy_reg < 0)
+			return 0;
 		if (phy_reg != 0 && phy_reg != 0xFFFF)
 			break;
 		mdelay(1);
@@ -184,7 +195,7 @@
 	netdev_dbg(dev->net, "ax88172_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
 		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
-	asix_write_medium_mode(dev, mode);
+	asix_write_medium_mode(dev, mode, 0);
 
 	return 0;
 }
@@ -201,6 +212,28 @@
 	.ndo_set_rx_mode	= ax88172_set_multicast,
 };
 
+static void asix_phy_reset(struct usbnet *dev, unsigned int reset_bits)
+{
+	unsigned int timeout = 5000;
+
+	asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, reset_bits);
+
+	/* give phy_id a chance to process reset */
+	udelay(500);
+
+	/* See IEEE 802.3 "22.2.4.1.1 Reset": 500ms max */
+	while (timeout--) {
+		if (asix_mdio_read(dev->net, dev->mii.phy_id, MII_BMCR)
+							& BMCR_RESET)
+			udelay(100);
+		else
+			return;
+	}
+
+	netdev_err(dev->net, "BMCR_RESET timeout on phy_id %d\n",
+		   dev->mii.phy_id);
+}
+
 static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
 {
 	int ret = 0;
@@ -213,18 +246,19 @@
 	/* Toggle the GPIOs in a manufacturer/model specific way */
 	for (i = 2; i >= 0; i--) {
 		ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS,
-				(gpio_bits >> (i * 8)) & 0xff, 0, 0, NULL);
+				(gpio_bits >> (i * 8)) & 0xff, 0, 0, NULL, 0);
 		if (ret < 0)
 			goto out;
 		msleep(5);
 	}
 
-	ret = asix_write_rx_ctl(dev, 0x80);
+	ret = asix_write_rx_ctl(dev, 0x80, 0);
 	if (ret < 0)
 		goto out;
 
 	/* Get the MAC address */
-	ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
+	ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID,
+			    0, 0, ETH_ALEN, buf, 0);
 	if (ret < 0) {
 		netdev_dbg(dev->net, "read AX_CMD_READ_NODE_ID failed: %d\n",
 			   ret);
@@ -246,7 +280,7 @@
 	dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */
 	dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */
 
-	asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET);
+	asix_phy_reset(dev, BMCR_RESET);
 	asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
 		ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP);
 	mii_nway_restart(&dev->mii);
@@ -290,7 +324,7 @@
 	netdev_dbg(dev->net, "ax88772_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
 		   ethtool_cmd_speed(&ecmd), ecmd.duplex, mode);
 
-	asix_write_medium_mode(dev, mode);
+	asix_write_medium_mode(dev, mode, 0);
 
 	return 0;
 }
@@ -298,100 +332,115 @@
 static int ax88772_reset(struct usbnet *dev)
 {
 	struct asix_data *data = (struct asix_data *)&dev->data;
-	int ret, embd_phy;
-	u16 rx_ctl;
+	int ret;
 
-	ret = asix_write_gpio(dev,
-			AX_GPIO_RSE | AX_GPIO_GPO_2 | AX_GPIO_GPO2EN, 5);
+	/* Rewrite MAC address */
+	ether_addr_copy(data->mac_addr, dev->net->dev_addr);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0,
+			     ETH_ALEN, data->mac_addr, 0);
 	if (ret < 0)
 		goto out;
 
-	embd_phy = ((asix_get_phy_addr(dev) & 0x1f) == 0x10 ? 1 : 0);
+	/* Set RX_CTL to default values with 2k buffer, and enable cactus */
+	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0);
+	if (ret < 0)
+		goto out;
 
-	ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL);
+	asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0);
+	if (ret < 0)
+		goto out;
+
+	return 0;
+
+out:
+	return ret;
+}
+
+static int ax88772_hw_reset(struct usbnet *dev, int in_pm)
+{
+	struct asix_data *data = (struct asix_data *)&dev->data;
+	int ret, embd_phy;
+	u16 rx_ctl;
+
+	ret = asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_2 |
+			      AX_GPIO_GPO2EN, 5, in_pm);
+	if (ret < 0)
+		goto out;
+
+	embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
+
+	ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy,
+			     0, 0, NULL, in_pm);
 	if (ret < 0) {
 		netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
 		goto out;
 	}
 
-	ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL);
-	if (ret < 0)
-		goto out;
-
-	msleep(150);
-
-	ret = asix_sw_reset(dev, AX_SWRESET_CLEAR);
-	if (ret < 0)
-		goto out;
-
-	msleep(150);
-
 	if (embd_phy) {
-		ret = asix_sw_reset(dev, AX_SWRESET_IPRL);
+		ret = asix_sw_reset(dev, AX_SWRESET_IPPD, in_pm);
+		if (ret < 0)
+			goto out;
+
+		usleep_range(10000, 11000);
+
+		ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, in_pm);
+		if (ret < 0)
+			goto out;
+
+		msleep(60);
+
+		ret = asix_sw_reset(dev, AX_SWRESET_IPRL | AX_SWRESET_PRL,
+				    in_pm);
 		if (ret < 0)
 			goto out;
 	} else {
-		ret = asix_sw_reset(dev, AX_SWRESET_PRTE);
+		ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL,
+				    in_pm);
 		if (ret < 0)
 			goto out;
 	}
 
 	msleep(150);
-	rx_ctl = asix_read_rx_ctl(dev);
-	netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl);
-	ret = asix_write_rx_ctl(dev, 0x0000);
+
+	if (in_pm && (!asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+					   MII_PHYSID1))){
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm);
 	if (ret < 0)
 		goto out;
 
-	rx_ctl = asix_read_rx_ctl(dev);
-	netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl);
-
-	ret = asix_sw_reset(dev, AX_SWRESET_PRL);
-	if (ret < 0)
-		goto out;
-
-	msleep(150);
-
-	ret = asix_sw_reset(dev, AX_SWRESET_IPRL | AX_SWRESET_PRL);
-	if (ret < 0)
-		goto out;
-
-	msleep(150);
-
-	asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET);
-	asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
-			ADVERTISE_ALL | ADVERTISE_CSMA);
-	mii_nway_restart(&dev->mii);
-
-	ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT);
+	ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, in_pm);
 	if (ret < 0)
 		goto out;
 
 	ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0,
-				AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT,
-				AX88772_IPG2_DEFAULT, 0, NULL);
+			     AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT,
+			     AX88772_IPG2_DEFAULT, 0, NULL, in_pm);
 	if (ret < 0) {
 		netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret);
 		goto out;
 	}
 
 	/* Rewrite MAC address */
-	memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
-	ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
-							data->mac_addr);
+	ether_addr_copy(data->mac_addr, dev->net->dev_addr);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0,
+			     ETH_ALEN, data->mac_addr, in_pm);
 	if (ret < 0)
 		goto out;
 
 	/* Set RX_CTL to default values with 2k buffer, and enable cactus */
-	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL);
+	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm);
 	if (ret < 0)
 		goto out;
 
-	rx_ctl = asix_read_rx_ctl(dev);
+	rx_ctl = asix_read_rx_ctl(dev, in_pm);
 	netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n",
 		   rx_ctl);
 
-	rx_ctl = asix_read_medium_status(dev);
+	rx_ctl = asix_read_medium_status(dev, in_pm);
 	netdev_dbg(dev->net,
 		   "Medium Status is 0x%04x after all initializations\n",
 		   rx_ctl);
@@ -400,7 +449,140 @@
 
 out:
 	return ret;
+}
 
+static int ax88772a_hw_reset(struct usbnet *dev, int in_pm)
+{
+	struct asix_data *data = (struct asix_data *)&dev->data;
+	int ret, embd_phy;
+	u16 rx_ctl, phy14h, phy15h, phy16h;
+	u8 chipcode = 0;
+
+	ret = asix_write_gpio(dev, AX_GPIO_RSE, 5, in_pm);
+	if (ret < 0)
+		goto out;
+
+	embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
+
+	ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy |
+			     AX_PHYSEL_SSEN, 0, 0, NULL, in_pm);
+	if (ret < 0) {
+		netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
+		goto out;
+	}
+	usleep_range(10000, 11000);
+
+	ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_IPRL, in_pm);
+	if (ret < 0)
+		goto out;
+
+	usleep_range(10000, 11000);
+
+	ret = asix_sw_reset(dev, AX_SWRESET_IPRL, in_pm);
+	if (ret < 0)
+		goto out;
+
+	msleep(160);
+
+	ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, in_pm);
+	if (ret < 0)
+		goto out;
+
+	ret = asix_sw_reset(dev, AX_SWRESET_IPRL, in_pm);
+	if (ret < 0)
+		goto out;
+
+	msleep(200);
+
+	if (in_pm && (!asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+					   MII_PHYSID1))) {
+		ret = -1;
+		goto out;
+	}
+
+	ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0,
+			    0, 1, &chipcode, in_pm);
+	if (ret < 0)
+		goto out;
+
+	if ((chipcode & AX_CHIPCODE_MASK) == AX_AX88772B_CHIPCODE) {
+		ret = asix_write_cmd(dev, AX_QCTCTRL, 0x8000, 0x8001,
+				     0, NULL, in_pm);
+		if (ret < 0) {
+			netdev_dbg(dev->net, "Write BQ setting failed: %d\n",
+				   ret);
+			goto out;
+		}
+	} else if ((chipcode & AX_CHIPCODE_MASK) == AX_AX88772A_CHIPCODE) {
+		/* Check if the PHY registers have default settings */
+		phy14h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+					     AX88772A_PHY14H);
+		phy15h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+					     AX88772A_PHY15H);
+		phy16h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id,
+					     AX88772A_PHY16H);
+
+		netdev_dbg(dev->net,
+			   "772a_hw_reset: MR20=0x%x MR21=0x%x MR22=0x%x\n",
+			   phy14h, phy15h, phy16h);
+
+		/* Restore PHY registers default setting if not */
+		if (phy14h != AX88772A_PHY14H_DEFAULT)
+			asix_mdio_write_nopm(dev->net, dev->mii.phy_id,
+					     AX88772A_PHY14H,
+					     AX88772A_PHY14H_DEFAULT);
+		if (phy15h != AX88772A_PHY15H_DEFAULT)
+			asix_mdio_write_nopm(dev->net, dev->mii.phy_id,
+					     AX88772A_PHY15H,
+					     AX88772A_PHY15H_DEFAULT);
+		if (phy16h != AX88772A_PHY16H_DEFAULT)
+			asix_mdio_write_nopm(dev->net, dev->mii.phy_id,
+					     AX88772A_PHY16H,
+					     AX88772A_PHY16H_DEFAULT);
+	}
+
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0,
+				AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT,
+				AX88772_IPG2_DEFAULT, 0, NULL, in_pm);
+	if (ret < 0) {
+		netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret);
+		goto out;
+	}
+
+	/* Rewrite MAC address */
+	memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
+	ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
+							data->mac_addr, in_pm);
+	if (ret < 0)
+		goto out;
+
+	/* Set RX_CTL to default values with 2k buffer, and enable cactus */
+	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm);
+	if (ret < 0)
+		goto out;
+
+	ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, in_pm);
+	if (ret < 0)
+		return ret;
+
+	/* Set RX_CTL to default values with 2k buffer, and enable cactus */
+	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm);
+	if (ret < 0)
+		goto out;
+
+	rx_ctl = asix_read_rx_ctl(dev, in_pm);
+	netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n",
+		   rx_ctl);
+
+	rx_ctl = asix_read_medium_status(dev, in_pm);
+	netdev_dbg(dev->net,
+		   "Medium Status is 0x%04x after all initializations\n",
+		   rx_ctl);
+
+	return 0;
+
+out:
+	return ret;
 }
 
 static const struct net_device_ops ax88772_netdev_ops = {
@@ -415,11 +597,97 @@
 	.ndo_set_rx_mode        = asix_set_multicast,
 };
 
+static void ax88772_suspend(struct usbnet *dev)
+{
+	struct asix_common_private *priv = dev->driver_priv;
+	u16 medium;
+
+	/* Stop MAC operation */
+	medium = asix_read_medium_status(dev, 0);
+	medium &= ~AX_MEDIUM_RE;
+	asix_write_medium_mode(dev, medium, 0);
+
+	netdev_dbg(dev->net, "ax88772_suspend: medium=0x%04x\n",
+		   asix_read_medium_status(dev, 0));
+
+	/* Preserve BMCR for restoring */
+	priv->presvd_phy_bmcr =
+		asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_BMCR);
+
+	/* Preserve ANAR for restoring */
+	priv->presvd_phy_advertise =
+		asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE);
+}
+
+static int asix_suspend(struct usb_interface *intf, pm_message_t message)
+{
+	struct usbnet *dev = usb_get_intfdata(intf);
+	struct asix_common_private *priv = dev->driver_priv;
+
+	if (priv->suspend)
+		priv->suspend(dev);
+
+	return usbnet_suspend(intf, message);
+}
+
+static void ax88772_restore_phy(struct usbnet *dev)
+{
+	struct asix_common_private *priv = dev->driver_priv;
+
+	if (priv->presvd_phy_advertise) {
+		/* Restore Advertisement control reg */
+		asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE,
+				     priv->presvd_phy_advertise);
+
+		/* Restore BMCR */
+		asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR,
+				     priv->presvd_phy_bmcr);
+
+		mii_nway_restart(&dev->mii);
+		priv->presvd_phy_advertise = 0;
+		priv->presvd_phy_bmcr = 0;
+	}
+}
+
+static void ax88772_resume(struct usbnet *dev)
+{
+	int i;
+
+	for (i = 0; i < 3; i++)
+		if (!ax88772_hw_reset(dev, 1))
+			break;
+	ax88772_restore_phy(dev);
+}
+
+static void ax88772a_resume(struct usbnet *dev)
+{
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		if (!ax88772a_hw_reset(dev, 1))
+			break;
+	}
+
+	ax88772_restore_phy(dev);
+}
+
+static int asix_resume(struct usb_interface *intf)
+{
+	struct usbnet *dev = usb_get_intfdata(intf);
+	struct asix_common_private *priv = dev->driver_priv;
+
+	if (priv->resume)
+		priv->resume(dev);
+
+	return usbnet_resume(intf);
+}
+
 static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
 {
-	int ret, embd_phy, i;
-	u8 buf[ETH_ALEN];
+	int ret, i;
+	u8 buf[ETH_ALEN], chipcode = 0;
 	u32 phyid;
+	struct asix_common_private *priv;
 
 	usbnet_get_endpoints(dev,intf);
 
@@ -427,13 +695,13 @@
 	if (dev->driver_info->data & FLAG_EEPROM_MAC) {
 		for (i = 0; i < (ETH_ALEN >> 1); i++) {
 			ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x04 + i,
-					0, 2, buf + i * 2);
+					    0, 2, buf + i * 2, 0);
 			if (ret < 0)
 				break;
 		}
 	} else {
 		ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID,
-				0, 0, ETH_ALEN, buf);
+				0, 0, ETH_ALEN, buf, 0);
 	}
 
 	if (ret < 0) {
@@ -456,16 +724,11 @@
 	dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */
 	dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */
 
-	embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
+	asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0);
+	chipcode &= AX_CHIPCODE_MASK;
 
-	/* Reset the PHY to normal operation mode */
-	ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL);
-	if (ret < 0) {
-		netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret);
-		return ret;
-	}
-
-	ax88772_reset(dev);
+	(chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) :
+					    ax88772a_hw_reset(dev, 0);
 
 	/* Read PHYID register *AFTER* the PHY was reset properly */
 	phyid = asix_get_phyid(dev);
@@ -482,6 +745,18 @@
 	if (!dev->driver_priv)
 		return -ENOMEM;
 
+	priv = dev->driver_priv;
+
+	priv->presvd_phy_bmcr = 0;
+	priv->presvd_phy_advertise = 0;
+	if (chipcode == AX_AX88772_CHIPCODE) {
+		priv->resume = ax88772_resume;
+		priv->suspend = ax88772_suspend;
+	} else {
+		priv->resume = ax88772a_resume;
+		priv->suspend = ax88772_suspend;
+	}
+
 	return 0;
 }
 
@@ -593,12 +868,12 @@
 	int gpio0 = 0;
 	u32 phyid;
 
-	asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status);
+	asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0);
 	netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status);
 
-	asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL);
-	asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom);
-	asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL);
+	asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0);
+	asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0);
+	asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0);
 
 	netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom);
 
@@ -614,15 +889,16 @@
 	netdev_dbg(dev->net, "GPIO0: %d, PhyMode: %d\n", gpio0, data->phymode);
 
 	/* Power up external GigaPHY through AX88178 GPIO pin */
-	asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 | AX_GPIO_GPO1EN, 40);
+	asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 |
+			AX_GPIO_GPO1EN, 40, 0);
 	if ((le16_to_cpu(eeprom) >> 8) != 1) {
-		asix_write_gpio(dev, 0x003c, 30);
-		asix_write_gpio(dev, 0x001c, 300);
-		asix_write_gpio(dev, 0x003c, 30);
+		asix_write_gpio(dev, 0x003c, 30, 0);
+		asix_write_gpio(dev, 0x001c, 300, 0);
+		asix_write_gpio(dev, 0x003c, 30, 0);
 	} else {
 		netdev_dbg(dev->net, "gpio phymode == 1 path\n");
-		asix_write_gpio(dev, AX_GPIO_GPO1EN, 30);
-		asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30);
+		asix_write_gpio(dev, AX_GPIO_GPO1EN, 30, 0);
+		asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30, 0);
 	}
 
 	/* Read PHYID register *AFTER* powering up PHY */
@@ -630,15 +906,15 @@
 	netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid);
 
 	/* Set AX88178 to enable MII/GMII/RGMII interface for external PHY */
-	asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL);
+	asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL, 0);
 
-	asix_sw_reset(dev, 0);
+	asix_sw_reset(dev, 0, 0);
 	msleep(150);
 
-	asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD);
+	asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD, 0);
 	msleep(150);
 
-	asix_write_rx_ctl(dev, 0);
+	asix_write_rx_ctl(dev, 0, 0);
 
 	if (data->phymode == PHY_MODE_MARVELL) {
 		marvell_phy_init(dev);
@@ -646,27 +922,23 @@
 	} else if (data->phymode == PHY_MODE_RTL8211CL)
 		rtl8211cl_phy_init(dev);
 
-	asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR,
-			BMCR_RESET | BMCR_ANENABLE);
+	asix_phy_reset(dev, BMCR_RESET | BMCR_ANENABLE);
 	asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
 			ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP);
 	asix_mdio_write(dev->net, dev->mii.phy_id, MII_CTRL1000,
 			ADVERTISE_1000FULL);
 
+	asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT, 0);
 	mii_nway_restart(&dev->mii);
 
-	ret = asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT);
-	if (ret < 0)
-		return ret;
-
 	/* Rewrite MAC address */
 	memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
 	ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
-							data->mac_addr);
+							data->mac_addr, 0);
 	if (ret < 0)
 		return ret;
 
-	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL);
+	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0);
 	if (ret < 0)
 		return ret;
 
@@ -704,7 +976,7 @@
 	netdev_dbg(dev->net, "ax88178_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n",
 		   speed, ecmd.duplex, mode);
 
-	asix_write_medium_mode(dev, mode);
+	asix_write_medium_mode(dev, mode, 0);
 
 	if (data->phymode == PHY_MODE_MARVELL && data->ledmode)
 		marvell_led_status(dev, speed);
@@ -733,15 +1005,15 @@
 		mfb = AX_RX_CTL_MFB_16384;
 	}
 
-	rxctl = asix_read_rx_ctl(dev);
-	asix_write_rx_ctl(dev, (rxctl & ~AX_RX_CTL_MFB_16384) | mfb);
+	rxctl = asix_read_rx_ctl(dev, 0);
+	asix_write_rx_ctl(dev, (rxctl & ~AX_RX_CTL_MFB_16384) | mfb, 0);
 
-	medium = asix_read_medium_status(dev);
+	medium = asix_read_medium_status(dev, 0);
 	if (dev->net->mtu > 1500)
 		medium |= AX_MEDIUM_JFE;
 	else
 		medium &= ~AX_MEDIUM_JFE;
-	asix_write_medium_mode(dev, medium);
+	asix_write_medium_mode(dev, medium, 0);
 
 	if (dev->rx_urb_size > old_rx_urb_size)
 		usbnet_unlink_rx_urbs(dev);
@@ -790,7 +1062,7 @@
 	usbnet_get_endpoints(dev,intf);
 
 	/* Get the MAC address */
-	ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
+	ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
 	if (ret < 0) {
 		netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret);
 		return ret;
@@ -811,10 +1083,10 @@
 	dev->net->ethtool_ops = &ax88178_ethtool_ops;
 
 	/* Blink LEDS so users know driver saw dongle */
-	asix_sw_reset(dev, 0);
+	asix_sw_reset(dev, 0, 0);
 	msleep(150);
 
-	asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD);
+	asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD, 0);
 	msleep(150);
 
 	/* Asix framing packs multiple eth frames into a 2K usb bulk transfer */
@@ -877,7 +1149,7 @@
 	.unbind = ax88772_unbind,
 	.status = asix_status,
 	.link_reset = ax88772_link_reset,
-	.reset = ax88772_link_reset,
+	.reset = ax88772_reset,
 	.flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET,
 	.rx_fixup = asix_rx_fixup_common,
 	.tx_fixup = asix_tx_fixup,
@@ -1005,7 +1277,7 @@
 }, {
 	// Lenovo U2L100P 10/100
 	USB_DEVICE (0x17ef, 0x7203),
-	.driver_info = (unsigned long) &ax88772_info,
+	.driver_info = (unsigned long)&ax88772b_info,
 }, {
 	// ASIX AX88772B 10/100
 	USB_DEVICE (0x0b95, 0x772b),
@@ -1073,7 +1345,7 @@
 }, {
 	// Asus USB Ethernet Adapter
 	USB_DEVICE (0x0b95, 0x7e2b),
-	.driver_info = (unsigned long) &ax88772_info,
+	.driver_info = (unsigned long)&ax88772b_info,
 }, {
 	/* ASIX 88172a demo board */
 	USB_DEVICE(0x0b95, 0x172a),
@@ -1095,8 +1367,8 @@
 	.name =		DRIVER_NAME,
 	.id_table =	products,
 	.probe =	usbnet_probe,
-	.suspend =	usbnet_suspend,
-	.resume =	usbnet_resume,
+	.suspend =	asix_suspend,
+	.resume =	asix_resume,
 	.disconnect =	usbnet_disconnect,
 	.supports_autosuspend = 1,
 	.disable_hub_initiated_lpm = 1,
diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c
index 163a2c5..49a3bc1 100644
--- a/drivers/net/usb/ax88172a.c
+++ b/drivers/net/usb/ax88172a.c
@@ -81,7 +81,7 @@
 	}
 
 	if (mode != priv->oldmode) {
-		asix_write_medium_mode(dev, mode);
+		asix_write_medium_mode(dev, mode, 0);
 		priv->oldmode = mode;
 		netdev_dbg(netdev, "speed %u duplex %d, setting mode to 0x%04x\n",
 			   phydev->speed, phydev->duplex, mode);
@@ -176,18 +176,19 @@
 {
 	int ret;
 
-	ret = asix_sw_reset(dev, AX_SWRESET_IPPD);
+	ret = asix_sw_reset(dev, AX_SWRESET_IPPD, 0);
 	if (ret < 0)
 		goto err;
 
 	msleep(150);
-	ret = asix_sw_reset(dev, AX_SWRESET_CLEAR);
+	ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, 0);
 	if (ret < 0)
 		goto err;
 
 	msleep(150);
 
-	ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_IPPD);
+	ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_IPPD,
+			    0);
 	if (ret < 0)
 		goto err;
 
@@ -213,7 +214,7 @@
 	dev->driver_priv = priv;
 
 	/* Get the MAC address */
-	ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf);
+	ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0);
 	if (ret < 0) {
 		netdev_err(dev->net, "Failed to read MAC address: %d\n", ret);
 		goto free;
@@ -224,7 +225,7 @@
 	dev->net->ethtool_ops = &ax88172a_ethtool_ops;
 
 	/* are we using the internal or the external phy? */
-	ret = asix_read_cmd(dev, AX_CMD_SW_PHY_STATUS, 0, 0, 1, buf);
+	ret = asix_read_cmd(dev, AX_CMD_SW_PHY_STATUS, 0, 0, 1, buf, 0);
 	if (ret < 0) {
 		netdev_err(dev->net, "Failed to read software interface selection register: %d\n",
 			   ret);
@@ -303,20 +304,20 @@
 	ax88172a_reset_phy(dev, priv->use_embdphy);
 
 	msleep(150);
-	rx_ctl = asix_read_rx_ctl(dev);
+	rx_ctl = asix_read_rx_ctl(dev, 0);
 	netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl);
-	ret = asix_write_rx_ctl(dev, 0x0000);
+	ret = asix_write_rx_ctl(dev, 0x0000, 0);
 	if (ret < 0)
 		goto out;
 
-	rx_ctl = asix_read_rx_ctl(dev);
+	rx_ctl = asix_read_rx_ctl(dev, 0);
 	netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl);
 
 	msleep(150);
 
 	ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0,
 			     AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT,
-			     AX88772_IPG2_DEFAULT, 0, NULL);
+			     AX88772_IPG2_DEFAULT, 0, NULL, 0);
 	if (ret < 0) {
 		netdev_err(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret);
 		goto out;
@@ -325,20 +326,20 @@
 	/* Rewrite MAC address */
 	memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN);
 	ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN,
-			     data->mac_addr);
+			     data->mac_addr, 0);
 	if (ret < 0)
 		goto out;
 
 	/* Set RX_CTL to default values with 2k buffer, and enable cactus */
-	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL);
+	ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0);
 	if (ret < 0)
 		goto out;
 
-	rx_ctl = asix_read_rx_ctl(dev);
+	rx_ctl = asix_read_rx_ctl(dev, 0);
 	netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n",
 		   rx_ctl);
 
-	rx_ctl = asix_read_medium_status(dev);
+	rx_ctl = asix_read_medium_status(dev, 0);
 	netdev_dbg(dev->net, "Medium Status is 0x%04x after all initializations\n",
 		   rx_ctl);
 
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 4b44586..e7b5163 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -50,6 +50,8 @@
  *
  *****************************************************************************/
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/init.h>
@@ -108,23 +110,12 @@
 /*****************************************************************************/
 /* Debugging functions                                                       */
 /*****************************************************************************/
-#define D__(lvl_, fmt, arg...)				\
-	do {						\
-		printk(lvl_ "[%d:%s]: " fmt "\n",	\
-		       __LINE__, __func__, ## arg);	\
-	} while (0)
-
-#define D_(lvl, args...)				\
-	do {						\
-		if (lvl & debug)			\
-			D__(KERN_INFO, args);		\
-	} while (0)
-
-#define D1(args...)	D_(0x01, ##args)
-#define D2(args...)	D_(0x02, ##args)
-#define D3(args...)	D_(0x04, ##args)
-#define D4(args...)	D_(0x08, ##args)
-#define D5(args...)	D_(0x10, ##args)
+#define hso_dbg(lvl, fmt, ...)						\
+do {									\
+	if ((lvl) & debug)						\
+		pr_info("[%d:%s] " fmt,					\
+			__LINE__, __func__, ##__VA_ARGS__);		\
+} while (0)
 
 /*****************************************************************************/
 /* Enumerators                                                               */
@@ -649,7 +640,7 @@
 	}
 	spin_unlock_irqrestore(&serial_table_lock, flags);
 
-	printk(KERN_ERR "%s: no free serial devices in table\n", __func__);
+	pr_err("%s: no free serial devices in table\n", __func__);
 	return -1;
 }
 
@@ -709,7 +700,8 @@
 	}
 
 	/* log a meaningful explanation of an USB status */
-	D1("%s: received USB status - %s (%d)", function, explanation, status);
+	hso_dbg(0x1, "%s: received USB status - %s (%d)\n",
+		function, explanation, status);
 }
 
 /* Network interface functions */
@@ -808,7 +800,7 @@
 	DUMP1(skb->data, skb->len);
 	/* Copy it from kernel memory to OUR memory */
 	memcpy(odev->mux_bulk_tx_buf, skb->data, skb->len);
-	D1("len: %d/%d", skb->len, MUX_BULK_TX_BUF_SIZE);
+	hso_dbg(0x1, "len: %d/%d\n", skb->len, MUX_BULK_TX_BUF_SIZE);
 
 	/* Fill in the URB for shipping it out. */
 	usb_fill_bulk_urb(odev->mux_bulk_tx_urb,
@@ -872,7 +864,7 @@
 	unsigned char *tmp_rx_buf;
 
 	/* log if needed */
-	D1("Rx %d bytes", count);
+	hso_dbg(0x1, "Rx %d bytes\n", count);
 	DUMP(ip_pkt, min(128, (int)count));
 
 	while (count) {
@@ -912,7 +904,7 @@
 								    frame_len);
 				if (!odev->skb_rx_buf) {
 					/* We got no receive buffer. */
-					D1("could not allocate memory");
+					hso_dbg(0x1, "could not allocate memory\n");
 					odev->rx_parse_state = WAIT_SYNC;
 					continue;
 				}
@@ -972,11 +964,11 @@
 			break;
 
 		case WAIT_SYNC:
-			D1(" W_S");
+			hso_dbg(0x1, " W_S\n");
 			count = 0;
 			break;
 		default:
-			D1(" ");
+			hso_dbg(0x1, "\n");
 			count--;
 			break;
 		}
@@ -1020,7 +1012,7 @@
 
 	/* Sanity check */
 	if (!odev || !test_bit(HSO_NET_RUNNING, &odev->flags)) {
-		D1("BULK IN callback but driver is not active!");
+		hso_dbg(0x1, "BULK IN callback but driver is not active!\n");
 		return;
 	}
 	usb_mark_last_busy(urb->dev);
@@ -1112,11 +1104,11 @@
 	struct hso_serial *serial = tty->driver_data;
 
 	if (!serial) {
-		printk(KERN_ERR "%s: no tty structures", __func__);
+		pr_err("%s: no tty structures", __func__);
 		return;
 	}
 
-	D4("port %d", serial->minor);
+	hso_dbg(0x8, "port %d\n", serial->minor);
 
 	/*
 	 *	Fix up unsupported bits
@@ -1205,11 +1197,11 @@
 	struct hso_serial *serial = urb->context;
 	int status = urb->status;
 
-	D4("\n--- Got serial_read_bulk callback %02x ---", status);
+	hso_dbg(0x8, "--- Got serial_read_bulk callback %02x ---\n", status);
 
 	/* sanity check */
 	if (!serial) {
-		D1("serial == NULL");
+		hso_dbg(0x1, "serial == NULL\n");
 		return;
 	}
 	if (status) {
@@ -1217,7 +1209,7 @@
 		return;
 	}
 
-	D1("Actual length = %d\n", urb->actual_length);
+	hso_dbg(0x1, "Actual length = %d\n", urb->actual_length);
 	DUMP1(urb->transfer_buffer, urb->actual_length);
 
 	/* Anyone listening? */
@@ -1266,7 +1258,7 @@
 	if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) {
 		WARN_ON(1);
 		tty->driver_data = NULL;
-		D1("Failed to open port");
+		hso_dbg(0x1, "Failed to open port\n");
 		return -ENODEV;
 	}
 
@@ -1275,7 +1267,7 @@
 	if (result < 0)
 		goto err_out;
 
-	D1("Opening %d", serial->minor);
+	hso_dbg(0x1, "Opening %d\n", serial->minor);
 
 	/* setup */
 	tty->driver_data = serial;
@@ -1298,7 +1290,7 @@
 			kref_get(&serial->parent->ref);
 		}
 	} else {
-		D1("Port was already open");
+		hso_dbg(0x1, "Port was already open\n");
 	}
 
 	usb_autopm_put_interface(serial->parent->interface);
@@ -1317,7 +1309,7 @@
 	struct hso_serial *serial = tty->driver_data;
 	u8 usb_gone;
 
-	D1("Closing serial port");
+	hso_dbg(0x1, "Closing serial port\n");
 
 	/* Open failed, no close cleanup required */
 	if (serial == NULL)
@@ -1357,7 +1349,7 @@
 
 	/* sanity check */
 	if (serial == NULL) {
-		printk(KERN_ERR "%s: serial is NULL\n", __func__);
+		pr_err("%s: serial is NULL\n", __func__);
 		return -ENODEV;
 	}
 
@@ -1412,8 +1404,8 @@
 	unsigned long flags;
 
 	if (old)
-		D5("Termios called with: cflags new[%d] - old[%d]",
-		   tty->termios.c_cflag, old->c_cflag);
+		hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n",
+			tty->termios.c_cflag, old->c_cflag);
 
 	/* the actual setup */
 	spin_lock_irqsave(&serial->serial_lock, flags);
@@ -1649,7 +1641,7 @@
 
 	/* sanity check */
 	if (!serial) {
-		D1("no tty structures");
+		hso_dbg(0x1, "no tty structures\n");
 		return -EINVAL;
 	}
 	spin_lock_irq(&serial->serial_lock);
@@ -1682,7 +1674,7 @@
 
 	/* sanity check */
 	if (!serial) {
-		D1("no tty structures");
+		hso_dbg(0x1, "no tty structures\n");
 		return -EINVAL;
 	}
 
@@ -1721,7 +1713,7 @@
 {
 	struct hso_serial *serial = tty->driver_data;
 	int ret = 0;
-	D4("IOCTL cmd: %d, arg: %ld", cmd, arg);
+	hso_dbg(0x8, "IOCTL cmd: %d, arg: %ld\n", cmd, arg);
 
 	if (!serial)
 		return -ENODEV;
@@ -1783,7 +1775,7 @@
 
 	/* Sanity check */
 	if (!serial || !ctrl_urb || !ctrl_req) {
-		printk(KERN_ERR "%s: Wrong arguments\n", __func__);
+		pr_err("%s: Wrong arguments\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1808,9 +1800,9 @@
 		pipe = usb_sndctrlpipe(serial->parent->usb, 0);
 	}
 	/* syslog */
-	D2("%s command (%02x) len: %d, port: %d",
-	   type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write",
-	   ctrl_req->bRequestType, ctrl_req->wLength, port);
+	hso_dbg(0x2, "%s command (%02x) len: %d, port: %d\n",
+		type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write",
+		ctrl_req->bRequestType, ctrl_req->wLength, port);
 
 	/* Load ctrl urb */
 	ctrl_urb->transfer_flags = 0;
@@ -1876,11 +1868,11 @@
 		handle_usb_error(status, __func__, NULL);
 		return;
 	}
-	D4("\n--- Got intr callback 0x%02X ---", status);
+	hso_dbg(0x8, "--- Got intr callback 0x%02X ---\n", status);
 
 	/* what request? */
 	port_req = urb->transfer_buffer;
-	D4(" port_req = 0x%.2X\n", *port_req);
+	hso_dbg(0x8, "port_req = 0x%.2X\n", *port_req);
 	/* loop over all muxed ports to find the one sending this */
 	for (i = 0; i < 8; i++) {
 		/* max 8 channels on MUX */
@@ -1888,7 +1880,8 @@
 			serial = get_serial_by_shared_int_and_type(shared_int,
 								   (1 << i));
 			if (serial != NULL) {
-				D1("Pending read interrupt on port %d\n", i);
+				hso_dbg(0x1, "Pending read interrupt on port %d\n",
+					i);
 				spin_lock(&serial->serial_lock);
 				if (serial->rx_state == RX_IDLE &&
 					serial->port.count > 0) {
@@ -1900,8 +1893,8 @@
 					} else
 						serial->rx_state = RX_PENDING;
 				} else {
-					D1("Already a read pending on "
-					   "port %d or port not open\n", i);
+					hso_dbg(0x1, "Already a read pending on port %d or port not open\n",
+						i);
 				}
 				spin_unlock(&serial->serial_lock);
 			}
@@ -1933,7 +1926,7 @@
 
 	/* sanity check */
 	if (!serial) {
-		D1("serial == NULL");
+		hso_dbg(0x1, "serial == NULL\n");
 		return;
 	}
 
@@ -1948,7 +1941,7 @@
 	tty_port_tty_wakeup(&serial->port);
 	hso_kick_transmit(serial);
 
-	D1(" ");
+	hso_dbg(0x1, "\n");
 }
 
 /* called for writing diag or CS serial port */
@@ -1996,8 +1989,8 @@
 
 	/* what request? */
 	req = (struct usb_ctrlrequest *)(urb->setup_packet);
-	D4("\n--- Got muxed ctrl callback 0x%02X ---", status);
-	D4("Actual length of urb = %d\n", urb->actual_length);
+	hso_dbg(0x8, "--- Got muxed ctrl callback 0x%02X ---\n", status);
+	hso_dbg(0x8, "Actual length of urb = %d\n", urb->actual_length);
 	DUMP1(urb->transfer_buffer, urb->actual_length);
 
 	if (req->bRequestType ==
@@ -2023,7 +2016,7 @@
 
 	/* Sanity check */
 	if (urb == NULL || serial == NULL) {
-		D1("serial = NULL");
+		hso_dbg(0x1, "serial = NULL\n");
 		return -2;
 	}
 
@@ -2035,7 +2028,7 @@
 	}
 
 	/* Push data to tty */
-	D1("data to push to tty");
+	hso_dbg(0x1, "data to push to tty\n");
 	count = tty_buffer_request_room(&serial->port, urb->actual_length);
 	if (count >= urb->actual_length) {
 		tty_insert_flip_string(&serial->port, urb->transfer_buffer,
@@ -2300,10 +2293,8 @@
 	serial->rx_data_length = rx_size;
 	for (i = 0; i < serial->num_rx_urbs; i++) {
 		serial->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL);
-		if (!serial->rx_urb[i]) {
-			dev_err(dev, "Could not allocate urb?\n");
+		if (!serial->rx_urb[i])
 			goto exit;
-		}
 		serial->rx_urb[i]->transfer_buffer = NULL;
 		serial->rx_urb[i]->transfer_buffer_length = 0;
 		serial->rx_data[i] = kzalloc(serial->rx_data_length,
@@ -2314,10 +2305,8 @@
 
 	/* TX, allocate urb and initialize */
 	serial->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!serial->tx_urb) {
-		dev_err(dev, "Could not allocate urb?\n");
+	if (!serial->tx_urb)
 		goto exit;
-	}
 	serial->tx_urb->transfer_buffer = NULL;
 	serial->tx_urb->transfer_buffer_length = 0;
 	/* prepare our TX buffer */
@@ -2419,7 +2408,7 @@
 {
 	struct hso_net *hso_net = netdev_priv(net);
 
-	D1("sizeof hso_net is %d", (int)sizeof(*hso_net));
+	hso_dbg(0x1, "sizeof hso_net is %zu\n", sizeof(*hso_net));
 
 	/* fill in the other fields */
 	net->netdev_ops = &hso_netdev_ops;
@@ -2555,20 +2544,16 @@
 	/* start allocating */
 	for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) {
 		hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL);
-		if (!hso_net->mux_bulk_rx_urb_pool[i]) {
-			dev_err(&interface->dev, "Could not allocate rx urb\n");
+		if (!hso_net->mux_bulk_rx_urb_pool[i])
 			goto exit;
-		}
 		hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE,
 							   GFP_KERNEL);
 		if (!hso_net->mux_bulk_rx_buf_pool[i])
 			goto exit;
 	}
 	hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!hso_net->mux_bulk_tx_urb) {
-		dev_err(&interface->dev, "Could not allocate tx urb\n");
+	if (!hso_net->mux_bulk_tx_urb)
 		goto exit;
-	}
 	hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL);
 	if (!hso_net->mux_bulk_tx_buf)
 		goto exit;
@@ -2787,10 +2772,8 @@
 	}
 
 	mux->shared_intr_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!mux->shared_intr_urb) {
-		dev_err(&interface->dev, "Could not allocate intr urb?\n");
+	if (!mux->shared_intr_urb)
 		goto exit;
-	}
 	mux->shared_intr_buf =
 		kzalloc(le16_to_cpu(mux->intr_endp->wMaxPacketSize),
 			GFP_KERNEL);
@@ -3239,7 +3222,7 @@
 	int result;
 
 	/* put it in the log */
-	printk(KERN_INFO "hso: %s\n", version);
+	pr_info("%s\n", version);
 
 	/* Initialise the serial table semaphore and table */
 	spin_lock_init(&serial_table_lock);
@@ -3270,16 +3253,15 @@
 	/* register the tty driver */
 	result = tty_register_driver(tty_drv);
 	if (result) {
-		printk(KERN_ERR "%s - tty_register_driver failed(%d)\n",
-			__func__, result);
+		pr_err("%s - tty_register_driver failed(%d)\n",
+		       __func__, result);
 		goto err_free_tty;
 	}
 
 	/* register this module as an usb driver */
 	result = usb_register(&hso_driver);
 	if (result) {
-		printk(KERN_ERR "Could not register hso driver? error: %d\n",
-			result);
+		pr_err("Could not register hso driver - error: %d\n", result);
 		goto err_unreg_tty;
 	}
 
@@ -3294,7 +3276,7 @@
 
 static void __exit hso_exit(void)
 {
-	printk(KERN_INFO "hso: unloaded\n");
+	pr_info("unloaded\n");
 
 	tty_unregister_driver(tty_drv);
 	put_tty_driver(tty_drv);
@@ -3311,7 +3293,7 @@
 MODULE_LICENSE(MOD_LICENSE);
 
 /* change the debug level (eg: insmod hso.ko debug=0x04) */
-MODULE_PARM_DESC(debug, "Level of debug [0x01 | 0x02 | 0x04 | 0x08 | 0x10]");
+MODULE_PARM_DESC(debug, "debug level mask [0x01 | 0x02 | 0x04 | 0x08 | 0x10]");
 module_param(debug, int, S_IRUGO | S_IWUSR);
 
 /* set the major tty number (eg: insmod hso.ko tty_major=245) */
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 770212b..66b34dd 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -265,8 +265,6 @@
 	struct usb_ctrlrequest *dr;
 	int retval;
 
-	netdev_dbg(kaweth->net, "kaweth_control()\n");
-
 	if(in_interrupt()) {
 		netdev_dbg(kaweth->net, "in_interrupt()\n");
 		return -EBUSY;
@@ -300,8 +298,6 @@
 {
 	int retval;
 
-	netdev_dbg(kaweth->net, "Reading kaweth configuration\n");
-
 	retval = kaweth_control(kaweth,
 				usb_rcvctrlpipe(kaweth->dev, 0),
 				KAWETH_COMMAND_GET_ETHERNET_DESC,
@@ -451,8 +447,6 @@
 	kaweth->firmware_buf[6] = 0x00;
 	kaweth->firmware_buf[7] = 0x00;
 
-	netdev_dbg(kaweth->net, "Triggering firmware\n");
-
 	return kaweth_control(kaweth,
 			      usb_sndctrlpipe(kaweth->dev, 0),
 			      KAWETH_COMMAND_SCAN,
@@ -471,7 +465,6 @@
 {
 	int result;
 
-	netdev_dbg(kaweth->net, "kaweth_reset(%p)\n", kaweth);
 	result = usb_reset_configuration(kaweth->dev);
 	mdelay(10);
 
@@ -685,8 +678,6 @@
 	struct kaweth_device *kaweth = netdev_priv(net);
 	int res;
 
-	netdev_dbg(kaweth->net, "Opening network device.\n");
-
 	res = usb_autopm_get_interface(kaweth->intf);
 	if (res) {
 		dev_err(&kaweth->intf->dev, "Interface cannot be resumed.\n");
@@ -951,7 +942,6 @@
 	struct kaweth_device *kaweth = usb_get_intfdata(intf);
 	unsigned long flags;
 
-	dev_dbg(&intf->dev, "Suspending device\n");
 	spin_lock_irqsave(&kaweth->device_lock, flags);
 	kaweth->status |= KAWETH_STATUS_SUSPENDING;
 	spin_unlock_irqrestore(&kaweth->device_lock, flags);
@@ -968,7 +958,6 @@
 	struct kaweth_device *kaweth = usb_get_intfdata(intf);
 	unsigned long flags;
 
-	dev_dbg(&intf->dev, "Resuming device\n");
 	spin_lock_irqsave(&kaweth->device_lock, flags);
 	kaweth->status &= ~KAWETH_STATUS_SUSPENDING;
 	spin_unlock_irqrestore(&kaweth->device_lock, flags);
@@ -1009,6 +998,7 @@
 	struct net_device *netdev;
 	const eth_addr_t bcast_addr = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
 	int result = 0;
+	int rv = -EIO;
 
 	dev_dbg(dev,
 		"Kawasaki Device Probe (Device number:%d): 0x%4.4x:0x%4.4x:0x%4.4x\n",
@@ -1029,6 +1019,7 @@
 	kaweth = netdev_priv(netdev);
 	kaweth->dev = udev;
 	kaweth->net = netdev;
+	kaweth->intf = intf;
 
 	spin_lock_init(&kaweth->device_lock);
 	init_waitqueue_head(&kaweth->term_wait);
@@ -1048,6 +1039,10 @@
 		/* Download the firmware */
 		dev_info(dev, "Downloading firmware...\n");
 		kaweth->firmware_buf = (__u8 *)__get_free_page(GFP_KERNEL);
+		if (!kaweth->firmware_buf) {
+			rv = -ENOMEM;
+			goto err_free_netdev;
+		}
 		if ((result = kaweth_download_firmware(kaweth,
 						      "kaweth/new_code.bin",
 						      100,
@@ -1139,8 +1134,6 @@
 
 	dev_dbg(dev, "Initializing net device.\n");
 
-	kaweth->intf = intf;
-
 	kaweth->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!kaweth->tx_urb)
 		goto err_free_netdev;
@@ -1186,8 +1179,6 @@
 	dev_info(dev, "kaweth interface created at %s\n",
 		 kaweth->net->name);
 
-	dev_dbg(dev, "Kaweth probe returning.\n");
-
 	return 0;
 
 err_intfdata:
@@ -1204,7 +1195,7 @@
 err_free_netdev:
 	free_netdev(netdev);
 
-	return -EIO;
+	return rv;
 }
 
 /****************************************************************
@@ -1215,8 +1206,6 @@
 	struct kaweth_device *kaweth = usb_get_intfdata(intf);
 	struct net_device *netdev;
 
-	dev_info(&intf->dev, "Unregistering\n");
-
 	usb_set_intfdata(intf, NULL);
 	if (!kaweth) {
 		dev_warn(&intf->dev, "unregistering non-existent device\n");
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 6a9d474..db558b8 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -1179,7 +1179,7 @@
  * NOTE:  annoying asymmetry:  if it's active, schedule_work() fails,
  * but tasklet_schedule() doesn't.	hope the failure is rare.
  */
-void lan78xx_defer_kevent(struct lan78xx_net *dev, int work)
+static void lan78xx_defer_kevent(struct lan78xx_net *dev, int work)
 {
 	set_bit(work, &dev->flags);
 	if (!schedule_delayed_work(&dev->wq, 0))
@@ -1406,7 +1406,7 @@
 	return net->phydev->link;
 }
 
-int lan78xx_nway_reset(struct net_device *net)
+static int lan78xx_nway_reset(struct net_device *net)
 {
 	return phy_start_aneg(net->phydev);
 }
@@ -1997,7 +1997,7 @@
 	return 0;
 }
 
-int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
+static int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
 {
 	struct lan78xx_net *dev = netdev_priv(netdev);
 	struct sockaddr *addr = p;
@@ -2371,7 +2371,7 @@
 	remove_wait_queue(&unlink_wakeup, &wait);
 }
 
-int lan78xx_stop(struct net_device *net)
+static int lan78xx_stop(struct net_device *net)
 {
 	struct lan78xx_net		*dev = netdev_priv(net);
 
@@ -2533,7 +2533,8 @@
 	entry->state = state;
 }
 
-netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
+static netdev_tx_t
+lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net)
 {
 	struct lan78xx_net *dev = netdev_priv(net);
 	struct sk_buff *skb2 = NULL;
@@ -2562,7 +2563,8 @@
 	return NETDEV_TX_OK;
 }
 
-int lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf)
+static int
+lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf)
 {
 	int tmp;
 	struct usb_host_interface *alt = NULL;
@@ -2700,7 +2702,7 @@
 	}
 }
 
-void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
+static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
 {
 	int		status;
 
@@ -3002,10 +3004,8 @@
 
 gso_skb:
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netif_dbg(dev, tx_err, dev->net, "no urb\n");
+	if (!urb)
 		goto drop;
-	}
 
 	entry = (struct skb_data *)skb->cb;
 	entry->urb = urb;
@@ -3285,7 +3285,7 @@
 	usb_put_dev(udev);
 }
 
-void lan78xx_tx_timeout(struct net_device *net)
+static void lan78xx_tx_timeout(struct net_device *net)
 {
 	struct lan78xx_net *dev = netdev_priv(net);
 
@@ -3605,7 +3605,7 @@
 	return 0;
 }
 
-int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
+static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message)
 {
 	struct lan78xx_net *dev = usb_get_intfdata(intf);
 	struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
@@ -3701,7 +3701,7 @@
 	return ret;
 }
 
-int lan78xx_resume(struct usb_interface *intf)
+static int lan78xx_resume(struct usb_interface *intf)
 {
 	struct lan78xx_net *dev = usb_get_intfdata(intf);
 	struct sk_buff *skb;
@@ -3768,7 +3768,7 @@
 	return 0;
 }
 
-int lan78xx_reset_resume(struct usb_interface *intf)
+static int lan78xx_reset_resume(struct usb_interface *intf)
 {
 	struct lan78xx_net *dev = usb_get_intfdata(intf);
 
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 9bbe0161..1434e5d 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -1129,7 +1129,8 @@
 		return -ENODEV;
 
 	if (pegasus_count == 0) {
-		pegasus_workqueue = create_singlethread_workqueue("pegasus");
+		pegasus_workqueue = alloc_workqueue("pegasus", WQ_MEM_RECLAIM,
+						    0);
 		if (!pegasus_workqueue)
 			return -ENOMEM;
 	}
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index f41a8ad..44d439f 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -32,7 +32,7 @@
 #define NETNEXT_VERSION		"08"
 
 /* Information for net */
-#define NET_VERSION		"5"
+#define NET_VERSION		"6"
 
 #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
@@ -1076,8 +1076,7 @@
 		return -ENODEV;
 	if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) {
 		netif_warn(tp, probe, tp->netdev,
-			   "Invalid buffer when reading pass-thru MAC addr: "
-			   "(%d, %d)\n",
+			   "Invalid buffer for pass-thru MAC addr: (%d, %d)\n",
 			   obj->type, obj->string.length);
 		goto amacout;
 	}
@@ -1090,8 +1089,8 @@
 	ret = hex2bin(buf, obj->string.pointer + 9, 6);
 	if (!(ret == 0 && is_valid_ether_addr(buf))) {
 		netif_warn(tp, probe, tp->netdev,
-			   "Invalid MAC when reading pass-thru MAC addr: "
-			   "%d, %pM\n", ret, buf);
+			   "Invalid MAC for pass-thru MAC addr: %d, %pM\n",
+			   ret, buf);
 		ret = -EINVAL;
 		goto amacout;
 	}
@@ -1111,9 +1110,9 @@
 	struct sockaddr sa;
 	int ret;
 
-	if (tp->version == RTL_VER_01)
+	if (tp->version == RTL_VER_01) {
 		ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data);
-	else {
+	} else {
 		/* if this is not an RTL8153-AD, no eFuse mac pass thru set,
 		 * or system doesn't provide valid _SB.AMAC this will be
 		 * be expected to non-zero
@@ -2552,6 +2551,77 @@
 	}
 }
 
+static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
+{
+	ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
+	ocp_reg_write(tp, OCP_EEE_DATA, reg);
+	ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
+}
+
+static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
+{
+	u16 data;
+
+	r8152_mmd_indirect(tp, dev, reg);
+	data = ocp_reg_read(tp, OCP_EEE_DATA);
+	ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
+
+	return data;
+}
+
+static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
+{
+	r8152_mmd_indirect(tp, dev, reg);
+	ocp_reg_write(tp, OCP_EEE_DATA, data);
+	ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
+}
+
+static void r8152_eee_en(struct r8152 *tp, bool enable)
+{
+	u16 config1, config2, config3;
+	u32 ocp_data;
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
+	config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
+	config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
+	config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
+
+	if (enable) {
+		ocp_data |= EEE_RX_EN | EEE_TX_EN;
+		config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
+		config1 |= sd_rise_time(1);
+		config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
+		config3 |= fast_snr(42);
+	} else {
+		ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
+		config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
+			     RX_QUIET_EN);
+		config1 |= sd_rise_time(7);
+		config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
+		config3 |= fast_snr(511);
+	}
+
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
+	ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
+	ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
+	ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
+}
+
+static void r8152b_enable_eee(struct r8152 *tp)
+{
+	r8152_eee_en(tp, true);
+	r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
+}
+
+static void r8152b_enable_fc(struct r8152 *tp)
+{
+	u16 anar;
+
+	anar = r8152_mdio_read(tp, MII_ADVERTISE);
+	anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
+	r8152_mdio_write(tp, MII_ADVERTISE, anar);
+}
+
 static void rtl8152_disable(struct r8152 *tp)
 {
 	r8152_aldps_en(tp, false);
@@ -2561,13 +2631,9 @@
 
 static void r8152b_hw_phy_cfg(struct r8152 *tp)
 {
-	u16 data;
-
-	data = r8152_mdio_read(tp, MII_BMCR);
-	if (data & BMCR_PDOWN) {
-		data &= ~BMCR_PDOWN;
-		r8152_mdio_write(tp, MII_BMCR, data);
-	}
+	r8152b_enable_eee(tp);
+	r8152_aldps_en(tp, true);
+	r8152b_enable_fc(tp);
 
 	set_bit(PHY_RESET, &tp->flags);
 }
@@ -2701,20 +2767,52 @@
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 }
 
+static void r8153_aldps_en(struct r8152 *tp, bool enable)
+{
+	u16 data;
+
+	data = ocp_reg_read(tp, OCP_POWER_CFG);
+	if (enable) {
+		data |= EN_ALDPS;
+		ocp_reg_write(tp, OCP_POWER_CFG, data);
+	} else {
+		data &= ~EN_ALDPS;
+		ocp_reg_write(tp, OCP_POWER_CFG, data);
+		msleep(20);
+	}
+}
+
+static void r8153_eee_en(struct r8152 *tp, bool enable)
+{
+	u32 ocp_data;
+	u16 config;
+
+	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
+	config = ocp_reg_read(tp, OCP_EEE_CFG);
+
+	if (enable) {
+		ocp_data |= EEE_RX_EN | EEE_TX_EN;
+		config |= EEE10_EN;
+	} else {
+		ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
+		config &= ~EEE10_EN;
+	}
+
+	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
+	ocp_reg_write(tp, OCP_EEE_CFG, config);
+}
+
 static void r8153_hw_phy_cfg(struct r8152 *tp)
 {
 	u32 ocp_data;
 	u16 data;
 
-	if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
-	    tp->version == RTL_VER_05)
-		ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+	/* disable ALDPS before updating the PHY parameters */
+	r8153_aldps_en(tp, false);
 
-	data = r8152_mdio_read(tp, MII_BMCR);
-	if (data & BMCR_PDOWN) {
-		data &= ~BMCR_PDOWN;
-		r8152_mdio_write(tp, MII_BMCR, data);
-	}
+	/* disable EEE before updating the PHY parameters */
+	r8153_eee_en(tp, false);
+	ocp_reg_write(tp, OCP_EEE_ADV, 0);
 
 	if (tp->version == RTL_VER_03) {
 		data = ocp_reg_read(tp, OCP_EEE_CFG);
@@ -2745,6 +2843,12 @@
 	sram_write(tp, SRAM_10M_AMP1, 0x00af);
 	sram_write(tp, SRAM_10M_AMP2, 0x0208);
 
+	r8153_eee_en(tp, true);
+	ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
+
+	r8153_aldps_en(tp, true);
+	r8152b_enable_fc(tp);
+
 	set_bit(PHY_RESET, &tp->flags);
 }
 
@@ -2866,21 +2970,6 @@
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 }
 
-static void r8153_aldps_en(struct r8152 *tp, bool enable)
-{
-	u16 data;
-
-	data = ocp_reg_read(tp, OCP_POWER_CFG);
-	if (enable) {
-		data |= EN_ALDPS;
-		ocp_reg_write(tp, OCP_POWER_CFG, data);
-	} else {
-		data &= ~EN_ALDPS;
-		ocp_reg_write(tp, OCP_POWER_CFG, data);
-		msleep(20);
-	}
-}
-
 static void rtl8153_disable(struct r8152 *tp)
 {
 	r8153_aldps_en(tp, false);
@@ -3246,103 +3335,6 @@
 	return res;
 }
 
-static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
-{
-	ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
-	ocp_reg_write(tp, OCP_EEE_DATA, reg);
-	ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
-}
-
-static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
-{
-	u16 data;
-
-	r8152_mmd_indirect(tp, dev, reg);
-	data = ocp_reg_read(tp, OCP_EEE_DATA);
-	ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
-
-	return data;
-}
-
-static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
-{
-	r8152_mmd_indirect(tp, dev, reg);
-	ocp_reg_write(tp, OCP_EEE_DATA, data);
-	ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
-}
-
-static void r8152_eee_en(struct r8152 *tp, bool enable)
-{
-	u16 config1, config2, config3;
-	u32 ocp_data;
-
-	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
-	config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
-	config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
-	config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
-
-	if (enable) {
-		ocp_data |= EEE_RX_EN | EEE_TX_EN;
-		config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
-		config1 |= sd_rise_time(1);
-		config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
-		config3 |= fast_snr(42);
-	} else {
-		ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
-		config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
-			     RX_QUIET_EN);
-		config1 |= sd_rise_time(7);
-		config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
-		config3 |= fast_snr(511);
-	}
-
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
-	ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
-	ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
-	ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
-}
-
-static void r8152b_enable_eee(struct r8152 *tp)
-{
-	r8152_eee_en(tp, true);
-	r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
-}
-
-static void r8153_eee_en(struct r8152 *tp, bool enable)
-{
-	u32 ocp_data;
-	u16 config;
-
-	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
-	config = ocp_reg_read(tp, OCP_EEE_CFG);
-
-	if (enable) {
-		ocp_data |= EEE_RX_EN | EEE_TX_EN;
-		config |= EEE10_EN;
-	} else {
-		ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
-		config &= ~EEE10_EN;
-	}
-
-	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
-	ocp_reg_write(tp, OCP_EEE_CFG, config);
-}
-
-static void r8153_enable_eee(struct r8152 *tp)
-{
-	r8153_eee_en(tp, true);
-	ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
-}
-
-static void r8152b_enable_fc(struct r8152 *tp)
-{
-	u16 anar;
-
-	anar = r8152_mdio_read(tp, MII_ADVERTISE);
-	anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
-	r8152_mdio_write(tp, MII_ADVERTISE, anar);
-}
-
 static void rtl_tally_reset(struct r8152 *tp)
 {
 	u32 ocp_data;
@@ -3355,10 +3347,17 @@
 static void r8152b_init(struct r8152 *tp)
 {
 	u32 ocp_data;
+	u16 data;
 
 	if (test_bit(RTL8152_UNPLUG, &tp->flags))
 		return;
 
+	data = r8152_mdio_read(tp, MII_BMCR);
+	if (data & BMCR_PDOWN) {
+		data &= ~BMCR_PDOWN;
+		r8152_mdio_write(tp, MII_BMCR, data);
+	}
+
 	r8152_aldps_en(tp, false);
 
 	if (tp->version == RTL_VER_01) {
@@ -3380,9 +3379,6 @@
 		   SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK;
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data);
 
-	r8152b_enable_eee(tp);
-	r8152_aldps_en(tp, true);
-	r8152b_enable_fc(tp);
 	rtl_tally_reset(tp);
 
 	/* enable rx aggregation */
@@ -3394,12 +3390,12 @@
 static void r8153_init(struct r8152 *tp)
 {
 	u32 ocp_data;
+	u16 data;
 	int i;
 
 	if (test_bit(RTL8152_UNPLUG, &tp->flags))
 		return;
 
-	r8153_aldps_en(tp, false);
 	r8153_u1u2en(tp, false);
 
 	for (i = 0; i < 500; i++) {
@@ -3416,6 +3412,23 @@
 		msleep(20);
 	}
 
+	if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
+	    tp->version == RTL_VER_05)
+		ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
+
+	data = r8152_mdio_read(tp, MII_BMCR);
+	if (data & BMCR_PDOWN) {
+		data &= ~BMCR_PDOWN;
+		r8152_mdio_write(tp, MII_BMCR, data);
+	}
+
+	for (i = 0; i < 500; i++) {
+		ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK;
+		if (ocp_data == PHY_STAT_LAN_ON)
+			break;
+		msleep(20);
+	}
+
 	usb_disable_lpm(tp->udev);
 	r8153_u2p3en(tp, false);
 
@@ -3483,9 +3496,6 @@
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
 	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
 
-	r8153_enable_eee(tp);
-	r8153_aldps_en(tp, true);
-	r8152b_enable_fc(tp);
 	rtl_tally_reset(tp);
 	r8153_u2p3en(tp, true);
 }
@@ -4032,7 +4042,7 @@
 	return ret;
 }
 
-static struct ethtool_ops ops = {
+static const struct ethtool_ops ops = {
 	.get_drvinfo = rtl8152_get_drvinfo,
 	.get_settings = rtl8152_get_settings,
 	.set_settings = rtl8152_set_settings,
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index dc989a8..831aa33 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -33,7 +33,7 @@
 #include "smsc95xx.h"
 
 #define SMSC_CHIPNAME			"smsc95xx"
-#define SMSC_DRIVER_VERSION		"1.0.4"
+#define SMSC_DRIVER_VERSION		"1.0.5"
 #define HS_USB_PKT_SIZE			(512)
 #define FS_USB_PKT_SIZE			(64)
 #define DEFAULT_HS_BURST_CAP_SIZE	(16 * 1024 + 5 * HS_USB_PKT_SIZE)
@@ -64,6 +64,7 @@
 #define CARRIER_CHECK_DELAY (2 * HZ)
 
 struct smsc95xx_priv {
+	u32 chip_id;
 	u32 mac_cr;
 	u32 hash_hi;
 	u32 hash_lo;
@@ -71,6 +72,7 @@
 	spinlock_t mac_cr_lock;
 	u8 features;
 	u8 suspend_flags;
+	u8 mdix_ctrl;
 	bool link_ok;
 	struct delayed_work carrier_check;
 	struct usbnet *dev;
@@ -782,14 +784,113 @@
 	return ret;
 }
 
+static int get_mdix_status(struct net_device *net)
+{
+	struct usbnet *dev = netdev_priv(net);
+	u32 val;
+	int buf;
+
+	buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, SPECIAL_CTRL_STS);
+	if (buf & SPECIAL_CTRL_STS_OVRRD_AMDIX_) {
+		if (buf & SPECIAL_CTRL_STS_AMDIX_ENABLE_)
+			return ETH_TP_MDI_AUTO;
+		else if (buf & SPECIAL_CTRL_STS_AMDIX_STATE_)
+			return ETH_TP_MDI_X;
+	} else {
+		buf = smsc95xx_read_reg(dev, STRAP_STATUS, &val);
+		if (val & STRAP_STATUS_AMDIX_EN_)
+			return ETH_TP_MDI_AUTO;
+	}
+
+	return ETH_TP_MDI;
+}
+
+static void set_mdix_status(struct net_device *net, __u8 mdix_ctrl)
+{
+	struct usbnet *dev = netdev_priv(net);
+	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+	int buf;
+
+	if ((pdata->chip_id == ID_REV_CHIP_ID_9500A_) ||
+	    (pdata->chip_id == ID_REV_CHIP_ID_9530_) ||
+	    (pdata->chip_id == ID_REV_CHIP_ID_89530_) ||
+	    (pdata->chip_id == ID_REV_CHIP_ID_9730_)) {
+		/* Extend Manual AutoMDIX timer for 9500A/9500Ai */
+		buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+					 PHY_EDPD_CONFIG);
+		buf |= PHY_EDPD_CONFIG_EXT_CROSSOVER_;
+		smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+				    PHY_EDPD_CONFIG, buf);
+	}
+
+	if (mdix_ctrl == ETH_TP_MDI) {
+		buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+					 SPECIAL_CTRL_STS);
+		buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+		buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+			 SPECIAL_CTRL_STS_AMDIX_STATE_);
+		smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+				    SPECIAL_CTRL_STS, buf);
+	} else if (mdix_ctrl == ETH_TP_MDI_X) {
+		buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+					 SPECIAL_CTRL_STS);
+		buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+		buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+			 SPECIAL_CTRL_STS_AMDIX_STATE_);
+		buf |= SPECIAL_CTRL_STS_AMDIX_STATE_;
+		smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+				    SPECIAL_CTRL_STS, buf);
+	} else if (mdix_ctrl == ETH_TP_MDI_AUTO) {
+		buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id,
+					 SPECIAL_CTRL_STS);
+		buf &= ~SPECIAL_CTRL_STS_OVRRD_AMDIX_;
+		buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ |
+			 SPECIAL_CTRL_STS_AMDIX_STATE_);
+		buf |= SPECIAL_CTRL_STS_AMDIX_ENABLE_;
+		smsc95xx_mdio_write(dev->net, dev->mii.phy_id,
+				    SPECIAL_CTRL_STS, buf);
+	}
+	pdata->mdix_ctrl = mdix_ctrl;
+}
+
+static int smsc95xx_get_settings(struct net_device *net,
+				 struct ethtool_cmd *cmd)
+{
+	struct usbnet *dev = netdev_priv(net);
+	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+	int retval;
+
+	retval = usbnet_get_settings(net, cmd);
+
+	cmd->eth_tp_mdix = pdata->mdix_ctrl;
+	cmd->eth_tp_mdix_ctrl = pdata->mdix_ctrl;
+
+	return retval;
+}
+
+static int smsc95xx_set_settings(struct net_device *net,
+				 struct ethtool_cmd *cmd)
+{
+	struct usbnet *dev = netdev_priv(net);
+	struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]);
+	int retval;
+
+	if (pdata->mdix_ctrl != cmd->eth_tp_mdix_ctrl)
+		set_mdix_status(net, cmd->eth_tp_mdix_ctrl);
+
+	retval = usbnet_set_settings(net, cmd);
+
+	return retval;
+}
+
 static const struct ethtool_ops smsc95xx_ethtool_ops = {
 	.get_link	= usbnet_get_link,
 	.nway_reset	= usbnet_nway_reset,
 	.get_drvinfo	= usbnet_get_drvinfo,
 	.get_msglevel	= usbnet_get_msglevel,
 	.set_msglevel	= usbnet_set_msglevel,
-	.get_settings	= usbnet_get_settings,
-	.set_settings	= usbnet_set_settings,
+	.get_settings	= smsc95xx_get_settings,
+	.set_settings	= smsc95xx_set_settings,
 	.get_eeprom_len	= smsc95xx_ethtool_get_eeprom_len,
 	.get_eeprom	= smsc95xx_ethtool_get_eeprom,
 	.set_eeprom	= smsc95xx_ethtool_set_eeprom,
@@ -1194,6 +1295,8 @@
 	if (ret < 0)
 		return ret;
 	val >>= 16;
+	pdata->chip_id = val;
+	pdata->mdix_ctrl = get_mdix_status(dev->net);
 
 	if ((val == ID_REV_CHIP_ID_9500A_) || (val == ID_REV_CHIP_ID_9530_) ||
 	    (val == ID_REV_CHIP_ID_89530_) || (val == ID_REV_CHIP_ID_9730_))
diff --git a/drivers/net/usb/smsc95xx.h b/drivers/net/usb/smsc95xx.h
index 526faa0..29a4d9e 100644
--- a/drivers/net/usb/smsc95xx.h
+++ b/drivers/net/usb/smsc95xx.h
@@ -144,6 +144,14 @@
 
 #define BURST_CAP			(0x38)
 
+#define	STRAP_STATUS			(0x3C)
+#define	STRAP_STATUS_PWR_SEL_		(0x00000020)
+#define	STRAP_STATUS_AMDIX_EN_		(0x00000010)
+#define	STRAP_STATUS_PORT_SWAP_		(0x00000008)
+#define	STRAP_STATUS_EEP_SIZE_		(0x00000004)
+#define	STRAP_STATUS_RMT_WKP_		(0x00000002)
+#define	STRAP_STATUS_EEP_DISABLE_	(0x00000001)
+
 #define GPIO_WAKE			(0x64)
 
 #define INT_EP_CTL			(0x68)
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 3bfb592..d5071e3 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -2062,11 +2062,8 @@
 		   cmd, reqtype, value, index, size);
 
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		netdev_err(dev->net, "Error allocating URB in"
-			   " %s!\n", __func__);
+	if (!urb)
 		goto fail;
-	}
 
 	if (data) {
 		buf = kmemdup(data, size, GFP_ATOMIC);
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index f37a6e6..fbc853e 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -313,7 +313,7 @@
 };
 
 #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
-		       NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \
+		       NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
 		       NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
 		       NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
 		       NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
@@ -340,6 +340,7 @@
 
 	dev->hw_features = VETH_FEATURES;
 	dev->hw_enc_features = VETH_FEATURES;
+	dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
 }
 
 /*
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1b5f531..fad84f3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -138,8 +138,9 @@
 	/* Does the affinity hint is set for virtqueues? */
 	bool affinity_hint_set;
 
-	/* CPU hot plug notifier */
-	struct notifier_block nb;
+	/* CPU hotplug instances for online & dead */
+	struct hlist_node node;
+	struct hlist_node node_dead;
 
 	/* Control VQ buffers: protected by the rtnl lock */
 	struct virtio_net_ctrl_hdr ctrl_hdr;
@@ -1237,25 +1238,53 @@
 	vi->affinity_hint_set = true;
 }
 
-static int virtnet_cpu_callback(struct notifier_block *nfb,
-			        unsigned long action, void *hcpu)
+static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
 {
-	struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
+	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
+						   node);
+	virtnet_set_affinity(vi);
+	return 0;
+}
 
-	switch(action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-	case CPU_DEAD:
-		virtnet_set_affinity(vi);
-		break;
-	case CPU_DOWN_PREPARE:
-		virtnet_clean_affinity(vi, (long)hcpu);
-		break;
-	default:
-		break;
-	}
+static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
+{
+	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
+						   node_dead);
+	virtnet_set_affinity(vi);
+	return 0;
+}
 
-	return NOTIFY_OK;
+static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
+{
+	struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
+						   node);
+
+	virtnet_clean_affinity(vi, cpu);
+	return 0;
+}
+
+static enum cpuhp_state virtionet_online;
+
+static int virtnet_cpu_notif_add(struct virtnet_info *vi)
+{
+	int ret;
+
+	ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
+	if (ret)
+		return ret;
+	ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
+					       &vi->node_dead);
+	if (!ret)
+		return ret;
+	cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
+	return ret;
+}
+
+static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
+{
+	cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
+	cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
+					    &vi->node_dead);
 }
 
 static void virtnet_get_ringparam(struct net_device *dev,
@@ -1879,8 +1908,7 @@
 
 	virtio_device_ready(vdev);
 
-	vi->nb.notifier_call = &virtnet_cpu_callback;
-	err = register_hotcpu_notifier(&vi->nb);
+	err = virtnet_cpu_notif_add(vi);
 	if (err) {
 		pr_debug("virtio_net: registering cpu notifier failed\n");
 		goto free_unregister_netdev;
@@ -1934,7 +1962,7 @@
 {
 	struct virtnet_info *vi = vdev->priv;
 
-	unregister_hotcpu_notifier(&vi->nb);
+	virtnet_cpu_notif_remove(vi);
 
 	/* Make sure no work handler is accessing the device. */
 	flush_work(&vi->config_work);
@@ -1953,7 +1981,7 @@
 	struct virtnet_info *vi = vdev->priv;
 	int i;
 
-	unregister_hotcpu_notifier(&vi->nb);
+	virtnet_cpu_notif_remove(vi);
 
 	/* Make sure no work handler is accessing the device */
 	flush_work(&vi->config_work);
@@ -1997,7 +2025,7 @@
 	virtnet_set_queues(vi, vi->curr_queue_pairs);
 	rtnl_unlock();
 
-	err = register_hotcpu_notifier(&vi->nb);
+	err = virtnet_cpu_notif_add(vi);
 	if (err)
 		return err;
 
@@ -2039,7 +2067,41 @@
 #endif
 };
 
-module_virtio_driver(virtio_net_driver);
+static __init int virtio_net_driver_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "AP_VIRT_NET_ONLINE",
+				      virtnet_cpu_online,
+				      virtnet_cpu_down_prep);
+	if (ret < 0)
+		goto out;
+	virtionet_online = ret;
+	ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "VIRT_NET_DEAD",
+				      NULL, virtnet_cpu_dead);
+	if (ret)
+		goto err_dead;
+
+        ret = register_virtio_driver(&virtio_net_driver);
+	if (ret)
+		goto err_virtio;
+	return 0;
+err_virtio:
+	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
+err_dead:
+	cpuhp_remove_multi_state(virtionet_online);
+out:
+	return ret;
+}
+module_init(virtio_net_driver_init);
+
+static __exit void virtio_net_driver_exit(void)
+{
+	cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
+	cpuhp_remove_multi_state(virtionet_online);
+	unregister_virtio_driver(&virtio_net_driver);
+}
+module_exit(virtio_net_driver_exit);
 
 MODULE_DEVICE_TABLE(virtio, id_table);
 MODULE_DESCRIPTION("Virtio network driver");
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index c68fe49..b5554f2 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -914,7 +914,9 @@
 {
 	struct Vmxnet3_TxDataDesc *tdd;
 
-	tdd = tq->data_ring.base + tq->tx_ring.next2fill;
+	tdd = (struct Vmxnet3_TxDataDesc *)((u8 *)tq->data_ring.base +
+					    tq->tx_ring.next2fill *
+					    tq->txdata_desc_size);
 
 	memcpy(tdd->data, skb->data, ctx->copy_size);
 	netdev_dbg(adapter->netdev,
@@ -1639,7 +1641,7 @@
 	}
 }
 
-void
+static void
 vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter)
 {
 	int i;
@@ -3184,7 +3186,6 @@
 
 	netdev_err(adapter->netdev, "tx hang\n");
 	schedule_work(&adapter->work);
-	netif_wake_queue(adapter->netdev);
 }
 
 
@@ -3211,6 +3212,7 @@
 	}
 	rtnl_unlock();
 
+	netif_wake_queue(adapter->netdev);
 	clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
 }
 
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 74fc030..7dc37a0 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.9.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.a.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040900
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040a00
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 1ce7420..85c271c 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -37,9 +37,6 @@
 #include <net/l3mdev.h>
 #include <net/fib_rules.h>
 
-#define RT_FL_TOS(oldflp4) \
-	((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
-
 #define DRV_NAME	"vrf"
 #define DRV_VERSION	"1.0"
 
@@ -137,6 +134,20 @@
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
+static int vrf_ip6_local_out(struct net *net, struct sock *sk,
+			     struct sk_buff *skb)
+{
+	int err;
+
+	err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net,
+		      sk, skb, NULL, skb_dst(skb)->dev, dst_output);
+
+	if (likely(err == 1))
+		err = dst_output(net, sk, skb);
+
+	return err;
+}
+
 static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
 					   struct net_device *dev)
 {
@@ -151,7 +162,7 @@
 		.flowlabel = ip6_flowinfo(iph),
 		.flowi6_mark = skb->mark,
 		.flowi6_proto = iph->nexthdr,
-		.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF,
+		.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF,
 	};
 	int ret = NET_XMIT_DROP;
 	struct dst_entry *dst;
@@ -207,7 +218,7 @@
 	/* strip the ethernet header added for pass through VRF device */
 	__skb_pull(skb, skb_network_offset(skb));
 
-	ret = ip6_local_out(net, skb->sk, skb);
+	ret = vrf_ip6_local_out(net, skb->sk, skb);
 	if (unlikely(net_xmit_eval(ret)))
 		dev->stats.tx_errors++;
 	else
@@ -227,6 +238,20 @@
 }
 #endif
 
+/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */
+static int vrf_ip_local_out(struct net *net, struct sock *sk,
+			    struct sk_buff *skb)
+{
+	int err;
+
+	err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+		      skb, NULL, skb_dst(skb)->dev, dst_output);
+	if (likely(err == 1))
+		err = dst_output(net, sk, skb);
+
+	return err;
+}
+
 static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
 					   struct net_device *vrf_dev)
 {
@@ -237,8 +262,7 @@
 		.flowi4_oif = vrf_dev->ifindex,
 		.flowi4_iif = LOOPBACK_IFINDEX,
 		.flowi4_tos = RT_TOS(ip4h->tos),
-		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC |
-				FLOWI_FLAG_SKIP_NH_OIF,
+		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
 		.daddr = ip4h->daddr,
 	};
 	struct net *net = dev_net(vrf_dev);
@@ -292,7 +316,7 @@
 					       RT_SCOPE_LINK);
 	}
 
-	ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
+	ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
 	if (unlikely(net_xmit_eval(ret)))
 		vrf_dev->stats.tx_errors++;
 	else
@@ -377,6 +401,43 @@
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+				   struct sock *sk,
+				   struct sk_buff *skb)
+{
+	struct net_vrf *vrf = netdev_priv(vrf_dev);
+	struct dst_entry *dst = NULL;
+	struct rt6_info *rt6;
+
+	/* don't divert link scope packets */
+	if (rt6_need_strict(&ipv6_hdr(skb)->daddr))
+		return skb;
+
+	rcu_read_lock();
+
+	rt6 = rcu_dereference(vrf->rt6);
+	if (likely(rt6)) {
+		dst = &rt6->dst;
+		dst_hold(dst);
+	}
+
+	rcu_read_unlock();
+
+	if (unlikely(!dst)) {
+		vrf_tx_error(vrf_dev, skb);
+		return NULL;
+	}
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	return skb;
+}
+
 /* holding rtnl */
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
@@ -463,6 +524,13 @@
 	return rc;
 }
 #else
+static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
+				   struct sock *sk,
+				   struct sk_buff *skb)
+{
+	return skb;
+}
+
 static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
 {
 }
@@ -531,6 +599,55 @@
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
+/* set dst on skb to send packet to us via dev_xmit path. Allows
+ * packet to go through device based features such as qdisc, netfilter
+ * hooks and packet sockets with skb->dev set to vrf device.
+ */
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+				  struct sock *sk,
+				  struct sk_buff *skb)
+{
+	struct net_vrf *vrf = netdev_priv(vrf_dev);
+	struct dst_entry *dst = NULL;
+	struct rtable *rth;
+
+	rcu_read_lock();
+
+	rth = rcu_dereference(vrf->rth);
+	if (likely(rth)) {
+		dst = &rth->dst;
+		dst_hold(dst);
+	}
+
+	rcu_read_unlock();
+
+	if (unlikely(!dst)) {
+		vrf_tx_error(vrf_dev, skb);
+		return NULL;
+	}
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+
+	return skb;
+}
+
+/* called with rcu lock held */
+static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
+				  struct sock *sk,
+				  struct sk_buff *skb,
+				  u16 proto)
+{
+	switch (proto) {
+	case AF_INET:
+		return vrf_ip_out(vrf_dev, sk, skb);
+	case AF_INET6:
+		return vrf_ip6_out(vrf_dev, sk, skb);
+	}
+
+	return skb;
+}
+
 /* holding rtnl */
 static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
 {
@@ -722,63 +839,6 @@
 	return vrf->tb_id;
 }
 
-static struct rtable *vrf_get_rtable(const struct net_device *dev,
-				     const struct flowi4 *fl4)
-{
-	struct rtable *rth = NULL;
-
-	if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-		struct net_vrf *vrf = netdev_priv(dev);
-
-		rcu_read_lock();
-
-		rth = rcu_dereference(vrf->rth);
-		if (likely(rth))
-			dst_hold(&rth->dst);
-
-		rcu_read_unlock();
-	}
-
-	return rth;
-}
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4)
-{
-	struct fib_result res = { .tclassid = 0 };
-	struct net *net = dev_net(dev);
-	u32 orig_tos = fl4->flowi4_tos;
-	u8 flags = fl4->flowi4_flags;
-	u8 scope = fl4->flowi4_scope;
-	u8 tos = RT_FL_TOS(fl4);
-	int rc;
-
-	if (unlikely(!fl4->daddr))
-		return 0;
-
-	fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-	fl4->flowi4_iif = LOOPBACK_IFINDEX;
-	/* make sure oif is set to VRF device for lookup */
-	fl4->flowi4_oif = dev->ifindex;
-	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
-	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
-			     RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
-
-	rc = fib_lookup(net, fl4, &res, 0);
-	if (!rc) {
-		if (res.type == RTN_LOCAL)
-			fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr;
-		else
-			fib_select_path(net, &res, fl4, -1);
-	}
-
-	fl4->flowi4_flags = flags;
-	fl4->flowi4_tos = orig_tos;
-	fl4->flowi4_scope = scope;
-
-	return rc;
-}
-
 static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	return 0;
@@ -970,106 +1030,44 @@
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev,
-					 struct flowi6 *fl6)
+/* send to link-local or multicast address via interface enslaved to
+ * VRF device. Force lookup to VRF table without changing flow struct
+ */
+static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev,
+					      struct flowi6 *fl6)
 {
-	bool need_strict = rt6_need_strict(&fl6->daddr);
-	struct net_vrf *vrf = netdev_priv(dev);
 	struct net *net = dev_net(dev);
+	int flags = RT6_LOOKUP_F_IFACE;
 	struct dst_entry *dst = NULL;
 	struct rt6_info *rt;
 
-	/* send to link-local or multicast address */
-	if (need_strict) {
-		int flags = RT6_LOOKUP_F_IFACE;
-
-		/* VRF device does not have a link-local address and
-		 * sending packets to link-local or mcast addresses over
-		 * a VRF device does not make sense
-		 */
-		if (fl6->flowi6_oif == dev->ifindex) {
-			struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst;
-
-			dst_hold(dst);
-			return dst;
-		}
-
-		if (!ipv6_addr_any(&fl6->saddr))
-			flags |= RT6_LOOKUP_F_HAS_SADDR;
-
-		rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
-		if (rt)
-			dst = &rt->dst;
-
-	} else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) {
-
-		rcu_read_lock();
-
-		rt = rcu_dereference(vrf->rt6);
-		if (likely(rt)) {
-			dst = &rt->dst;
-			dst_hold(dst);
-		}
-
-		rcu_read_unlock();
+	/* VRF device does not have a link-local address and
+	 * sending packets to link-local or mcast addresses over
+	 * a VRF device does not make sense
+	 */
+	if (fl6->flowi6_oif == dev->ifindex) {
+		dst = &net->ipv6.ip6_null_entry->dst;
+		dst_hold(dst);
+		return dst;
 	}
 
-	/* make sure oif is set to VRF device for lookup */
-	if (!need_strict)
-		fl6->flowi6_oif = dev->ifindex;
+	if (!ipv6_addr_any(&fl6->saddr))
+		flags |= RT6_LOOKUP_F_HAS_SADDR;
+
+	rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags);
+	if (rt)
+		dst = &rt->dst;
 
 	return dst;
 }
-
-/* called under rcu_read_lock */
-static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk,
-			  struct flowi6 *fl6)
-{
-	struct net *net = dev_net(dev);
-	struct dst_entry *dst;
-	struct rt6_info *rt;
-	int err;
-
-	if (rt6_need_strict(&fl6->daddr)) {
-		rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif,
-					  RT6_LOOKUP_F_IFACE);
-		if (unlikely(!rt))
-			return 0;
-
-		dst = &rt->dst;
-	} else {
-		__u8 flags = fl6->flowi6_flags;
-
-		fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
-		fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF;
-
-		dst = ip6_route_output(net, sk, fl6);
-		rt = (struct rt6_info *)dst;
-
-		fl6->flowi6_flags = flags;
-	}
-
-	err = dst->error;
-	if (!err) {
-		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
-					  sk ? inet6_sk(sk)->srcprefs : 0,
-					  &fl6->saddr);
-	}
-
-	dst_release(dst);
-
-	return err;
-}
 #endif
 
 static const struct l3mdev_ops vrf_l3mdev_ops = {
 	.l3mdev_fib_table	= vrf_fib_table,
-	.l3mdev_get_rtable	= vrf_get_rtable,
-	.l3mdev_get_saddr	= vrf_get_saddr,
 	.l3mdev_l3_rcv		= vrf_l3_rcv,
+	.l3mdev_l3_out		= vrf_l3_out,
 #if IS_ENABLED(CONFIG_IPV6)
-	.l3mdev_get_rt6_dst	= vrf_get_rt6_dst,
-	.l3mdev_get_saddr6	= vrf_get_saddr6,
+	.l3mdev_link_scope_lookup = vrf_link_scope_lookup,
 #endif
 };
 
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index da4e3d6..e7d1668 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -27,7 +27,6 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/vxlan.h>
-#include <net/protocol.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_tunnel.h>
@@ -288,7 +287,7 @@
 
 	if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
 	    nla_put_s32(skb, NDA_LINK_NETNSID,
-			peernet2id_alloc(dev_net(vxlan->dev), vxlan->net)))
+			peernet2id(dev_net(vxlan->dev), vxlan->net)))
 		goto nla_put_failure;
 
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
@@ -861,20 +860,20 @@
 /* Dump forwarding table */
 static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			  struct net_device *dev,
-			  struct net_device *filter_dev, int idx)
+			  struct net_device *filter_dev, int *idx)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	unsigned int h;
+	int err = 0;
 
 	for (h = 0; h < FDB_HASH_SIZE; ++h) {
 		struct vxlan_fdb *f;
-		int err;
 
 		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
 			struct vxlan_rdst *rd;
 
 			list_for_each_entry_rcu(rd, &f->remotes, list) {
-				if (idx < cb->args[0])
+				if (*idx < cb->args[2])
 					goto skip;
 
 				err = vxlan_fdb_info(skb, vxlan, f,
@@ -882,17 +881,15 @@
 						     cb->nlh->nlmsg_seq,
 						     RTM_NEWNEIGH,
 						     NLM_F_MULTI, rd);
-				if (err < 0) {
-					cb->args[1] = err;
+				if (err < 0)
 					goto out;
-				}
 skip:
-				++idx;
+				*idx += 1;
 			}
 		}
 	}
 out:
-	return idx;
+	return err;
 }
 
 /* Watch incoming packets to learn mapping between Ethernet address
@@ -1294,7 +1291,7 @@
 		struct metadata_dst *tun_dst;
 
 		tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
-					 vxlan_vni_to_tun_id(vni), sizeof(*md));
+					 key32_to_tunnel_id(vni), sizeof(*md));
 
 		if (!tun_dst)
 			goto drop;
@@ -1811,7 +1808,7 @@
 	fl4.flowi4_mark = skb->mark;
 	fl4.flowi4_proto = IPPROTO_UDP;
 	fl4.daddr = daddr;
-	fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
+	fl4.saddr = *saddr;
 
 	rt = ip_route_output_key(vxlan->net, &fl4);
 	if (!IS_ERR(rt)) {
@@ -1847,7 +1844,7 @@
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_oif = oif;
 	fl6.daddr = *daddr;
-	fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+	fl6.saddr = *saddr;
 	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label);
 	fl6.flowi6_mark = skb->mark;
 	fl6.flowi6_proto = IPPROTO_UDP;
@@ -1920,7 +1917,8 @@
 	struct rtable *rt = NULL;
 	const struct iphdr *old_iph;
 	union vxlan_addr *dst;
-	union vxlan_addr remote_ip;
+	union vxlan_addr remote_ip, local_ip;
+	union vxlan_addr *src;
 	struct vxlan_metadata _md;
 	struct vxlan_metadata *md = &_md;
 	__be16 src_port = 0, dst_port;
@@ -1938,6 +1936,7 @@
 		dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
 		vni = rdst->remote_vni;
 		dst = &rdst->remote_ip;
+		src = &vxlan->cfg.saddr;
 		dst_cache = &rdst->dst_cache;
 	} else {
 		if (!info) {
@@ -1946,13 +1945,17 @@
 			goto drop;
 		}
 		dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
-		vni = vxlan_tun_id_to_vni(info->key.tun_id);
+		vni = tunnel_id_to_key32(info->key.tun_id);
 		remote_ip.sa.sa_family = ip_tunnel_info_af(info);
-		if (remote_ip.sa.sa_family == AF_INET)
+		if (remote_ip.sa.sa_family == AF_INET) {
 			remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
-		else
+			local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src;
+		} else {
 			remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
+			local_ip.sin6.sin6_addr = info->key.u.ipv6.src;
+		}
 		dst = &remote_ip;
+		src = &local_ip;
 		dst_cache = &info->dst_cache;
 	}
 
@@ -1992,15 +1995,14 @@
 	}
 
 	if (dst->sa.sa_family == AF_INET) {
-		__be32 saddr;
-
 		if (!vxlan->vn4_sock)
 			goto drop;
 		sk = vxlan->vn4_sock->sock->sk;
 
 		rt = vxlan_get_route(vxlan, skb,
 				     rdst ? rdst->remote_ifindex : 0, tos,
-				     dst->sin.sin_addr.s_addr, &saddr,
+				     dst->sin.sin_addr.s_addr,
+				     &src->sin.sin_addr.s_addr,
 				     dst_cache, info);
 		if (IS_ERR(rt)) {
 			netdev_dbg(dev, "no route to %pI4\n",
@@ -2017,7 +2019,7 @@
 		}
 
 		/* Bypass encapsulation if the destination is local */
-		if (rt->rt_flags & RTCF_LOCAL &&
+		if (!info && rt->rt_flags & RTCF_LOCAL &&
 		    !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
 			struct vxlan_dev *dst_vxlan;
 
@@ -2043,13 +2045,12 @@
 		if (err < 0)
 			goto xmit_tx_error;
 
-		udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+		udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr,
 				    dst->sin.sin_addr.s_addr, tos, ttl, df,
 				    src_port, dst_port, xnet, !udp_sum);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
 		struct dst_entry *ndst;
-		struct in6_addr saddr;
 		u32 rt6i_flags;
 
 		if (!vxlan->vn6_sock)
@@ -2058,7 +2059,8 @@
 
 		ndst = vxlan6_get_route(vxlan, skb,
 					rdst ? rdst->remote_ifindex : 0, tos,
-					label, &dst->sin6.sin6_addr, &saddr,
+					label, &dst->sin6.sin6_addr,
+					&src->sin6.sin6_addr,
 					dst_cache, info);
 		if (IS_ERR(ndst)) {
 			netdev_dbg(dev, "no route to %pI6\n",
@@ -2077,7 +2079,7 @@
 
 		/* Bypass encapsulation if the destination is local */
 		rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
-		if (rt6i_flags & RTF_LOCAL &&
+		if (!info && rt6i_flags & RTF_LOCAL &&
 		    !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
 			struct vxlan_dev *dst_vxlan;
 
@@ -2101,10 +2103,12 @@
 				      vni, md, flags, udp_sum);
 		if (err < 0) {
 			dst_release(ndst);
+			dev->stats.tx_errors++;
 			return;
 		}
 		udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
-				     &saddr, &dst->sin6.sin6_addr, tos, ttl,
+				     &src->sin6.sin6_addr,
+				     &dst->sin6.sin6_addr, tos, ttl,
 				     label, src_port, dst_port, !udp_sum);
 #endif
 	}
@@ -2776,14 +2780,15 @@
 	struct net_device *lowerdev = NULL;
 
 	if (conf->flags & VXLAN_F_GPE) {
-		if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
-			return -EINVAL;
 		/* For now, allow GPE only together with COLLECT_METADATA.
 		 * This can be relaxed later; in such case, the other side
 		 * of the PtP link will have to be provided.
 		 */
-		if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
+		if ((conf->flags & ~VXLAN_F_ALLOWED_GPE) ||
+		    !(conf->flags & VXLAN_F_COLLECT_METADATA)) {
+			pr_info("unsupported combination of extensions\n");
 			return -EINVAL;
+		}
 
 		vxlan_raw_setup(dev);
 	} else {
@@ -2836,6 +2841,9 @@
 			dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
 
 		needed_headroom = lowerdev->hard_header_len;
+	} else if (vxlan_addr_multicast(&dst->remote_ip)) {
+		pr_info("multicast destination requires interface to be specified\n");
+		return -EINVAL;
 	}
 
 	if (conf->mtu) {
@@ -2868,8 +2876,10 @@
 		     tmp->cfg.saddr.sa.sa_family == AF_INET6) == use_ipv6 &&
 		    tmp->cfg.dst_port == vxlan->cfg.dst_port &&
 		    (tmp->flags & VXLAN_F_RCV_FLAGS) ==
-		    (vxlan->flags & VXLAN_F_RCV_FLAGS))
-		return -EEXIST;
+		    (vxlan->flags & VXLAN_F_RCV_FLAGS)) {
+			pr_info("duplicate VNI %u\n", be32_to_cpu(conf->vni));
+			return -EEXIST;
+		}
 	}
 
 	dev->ethtool_ops = &vxlan_ethtool_ops;
@@ -2903,7 +2913,6 @@
 			 struct nlattr *tb[], struct nlattr *data[])
 {
 	struct vxlan_config conf;
-	int err;
 
 	memset(&conf, 0, sizeof(conf));
 
@@ -3012,26 +3021,7 @@
 	if (tb[IFLA_MTU])
 		conf.mtu = nla_get_u32(tb[IFLA_MTU]);
 
-	err = vxlan_dev_configure(src_net, dev, &conf);
-	switch (err) {
-	case -ENODEV:
-		pr_info("ifindex %d does not exist\n", conf.remote_ifindex);
-		break;
-
-	case -EPERM:
-		pr_info("IPv6 is disabled via sysctl\n");
-		break;
-
-	case -EEXIST:
-		pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
-		break;
-
-	case -EINVAL:
-		pr_info("unsupported combination of extensions\n");
-		break;
-	}
-
-	return err;
+	return vxlan_dev_configure(src_net, dev, &conf);
 }
 
 static void vxlan_dellink(struct net_device *dev, struct list_head *head)
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 6f04445..5fbf83d 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -162,7 +162,7 @@
 				ALIGNMENT_OF_UCC_HDLC_PRAM);
 
 	if (priv->ucc_pram_offset < 0) {
-		dev_err(priv->dev, "Can not allocate MURAM for hdlc prameter.\n");
+		dev_err(priv->dev, "Can not allocate MURAM for hdlc parameter.\n");
 		ret = -ENOMEM;
 		goto free_tx_bd;
 	}
diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c
index d98c7e5..3a421ca 100644
--- a/drivers/net/wan/sbni.c
+++ b/drivers/net/wan/sbni.c
@@ -582,8 +582,8 @@
 
 
 /*
- * Routine returns 1 if it need to acknoweledge received frame.
- * Empty frame received without errors won't be acknoweledged.
+ * Routine returns 1 if it needs to acknowledge received frame.
+ * Empty frame received without errors won't be acknowledged.
  */
 
 static int
diff --git a/drivers/net/wimax/i2400m/usb-notif.c b/drivers/net/wimax/i2400m/usb-notif.c
index fc1355d..5d429f8 100644
--- a/drivers/net/wimax/i2400m/usb-notif.c
+++ b/drivers/net/wimax/i2400m/usb-notif.c
@@ -206,7 +206,6 @@
 	i2400mu->notif_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!i2400mu->notif_urb) {
 		ret = -ENOMEM;
-		dev_err(dev, "notification: cannot allocate URB\n");
 		goto error_alloc_urb;
 	}
 	epd = usb_get_epd(i2400mu->usb_iface,
diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c
index 8aded24..7a60d2e 100644
--- a/drivers/net/wireless/ath/ar5523/ar5523.c
+++ b/drivers/net/wireless/ath/ar5523/ar5523.c
@@ -706,10 +706,8 @@
 
 		data->ar = ar;
 		data->urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!data->urb) {
-			ar5523_err(ar, "could not allocate rx data urb\n");
+		if (!data->urb)
 			goto err;
-		}
 		list_add_tail(&data->list, &ar->rx_data_free);
 		atomic_inc(&ar->rx_data_free_cnt);
 	}
@@ -824,7 +822,6 @@
 
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!urb) {
-			ar5523_err(ar, "Failed to allocate TX urb\n");
 			ieee80211_free_txskb(ar->hw, skb);
 			continue;
 		}
@@ -949,10 +946,8 @@
 	init_completion(&cmd->done);
 
 	cmd->urb_tx = usb_alloc_urb(0, GFP_KERNEL);
-	if (!cmd->urb_tx) {
-		ar5523_err(ar, "could not allocate urb\n");
+	if (!cmd->urb_tx)
 		return -ENOMEM;
-	}
 	cmd->buf_tx = usb_alloc_coherent(ar->dev, AR5523_MAX_TXCMDSZ,
 					 GFP_KERNEL,
 					 &cmd->urb_tx->transfer_dma);
diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c
index acec16b..766c63b 100644
--- a/drivers/net/wireless/ath/ath10k/ahb.c
+++ b/drivers/net/wireless/ath/ath10k/ahb.c
@@ -91,59 +91,37 @@
 {
 	struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar);
 	struct device *dev;
-	int ret;
 
 	dev = &ar_ahb->pdev->dev;
 
-	ar_ahb->cmd_clk = clk_get(dev, "wifi_wcss_cmd");
+	ar_ahb->cmd_clk = devm_clk_get(dev, "wifi_wcss_cmd");
 	if (IS_ERR_OR_NULL(ar_ahb->cmd_clk)) {
 		ath10k_err(ar, "failed to get cmd clk: %ld\n",
 			   PTR_ERR(ar_ahb->cmd_clk));
-		ret = ar_ahb->cmd_clk ? PTR_ERR(ar_ahb->cmd_clk) : -ENODEV;
-		goto out;
+		return ar_ahb->cmd_clk ? PTR_ERR(ar_ahb->cmd_clk) : -ENODEV;
 	}
 
-	ar_ahb->ref_clk = clk_get(dev, "wifi_wcss_ref");
+	ar_ahb->ref_clk = devm_clk_get(dev, "wifi_wcss_ref");
 	if (IS_ERR_OR_NULL(ar_ahb->ref_clk)) {
 		ath10k_err(ar, "failed to get ref clk: %ld\n",
 			   PTR_ERR(ar_ahb->ref_clk));
-		ret = ar_ahb->ref_clk ? PTR_ERR(ar_ahb->ref_clk) : -ENODEV;
-		goto err_cmd_clk_put;
+		return ar_ahb->ref_clk ? PTR_ERR(ar_ahb->ref_clk) : -ENODEV;
 	}
 
-	ar_ahb->rtc_clk = clk_get(dev, "wifi_wcss_rtc");
+	ar_ahb->rtc_clk = devm_clk_get(dev, "wifi_wcss_rtc");
 	if (IS_ERR_OR_NULL(ar_ahb->rtc_clk)) {
 		ath10k_err(ar, "failed to get rtc clk: %ld\n",
 			   PTR_ERR(ar_ahb->rtc_clk));
-		ret = ar_ahb->rtc_clk ? PTR_ERR(ar_ahb->rtc_clk) : -ENODEV;
-		goto err_ref_clk_put;
+		return ar_ahb->rtc_clk ? PTR_ERR(ar_ahb->rtc_clk) : -ENODEV;
 	}
 
 	return 0;
-
-err_ref_clk_put:
-	clk_put(ar_ahb->ref_clk);
-
-err_cmd_clk_put:
-	clk_put(ar_ahb->cmd_clk);
-
-out:
-	return ret;
 }
 
 static void ath10k_ahb_clock_deinit(struct ath10k *ar)
 {
 	struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar);
 
-	if (!IS_ERR_OR_NULL(ar_ahb->cmd_clk))
-		clk_put(ar_ahb->cmd_clk);
-
-	if (!IS_ERR_OR_NULL(ar_ahb->ref_clk))
-		clk_put(ar_ahb->ref_clk);
-
-	if (!IS_ERR_OR_NULL(ar_ahb->rtc_clk))
-		clk_put(ar_ahb->rtc_clk);
-
 	ar_ahb->cmd_clk = NULL;
 	ar_ahb->ref_clk = NULL;
 	ar_ahb->rtc_clk = NULL;
@@ -213,92 +191,51 @@
 {
 	struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar);
 	struct device *dev;
-	int ret;
 
 	dev = &ar_ahb->pdev->dev;
 
-	ar_ahb->core_cold_rst = reset_control_get(dev, "wifi_core_cold");
-	if (IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) {
+	ar_ahb->core_cold_rst = devm_reset_control_get(dev, "wifi_core_cold");
+	if (IS_ERR(ar_ahb->core_cold_rst)) {
 		ath10k_err(ar, "failed to get core cold rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->core_cold_rst));
-		ret = ar_ahb->core_cold_rst ?
-			PTR_ERR(ar_ahb->core_cold_rst) : -ENODEV;
-		goto out;
+		return PTR_ERR(ar_ahb->core_cold_rst);
 	}
 
-	ar_ahb->radio_cold_rst = reset_control_get(dev, "wifi_radio_cold");
-	if (IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) {
+	ar_ahb->radio_cold_rst = devm_reset_control_get(dev, "wifi_radio_cold");
+	if (IS_ERR(ar_ahb->radio_cold_rst)) {
 		ath10k_err(ar, "failed to get radio cold rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->radio_cold_rst));
-		ret = ar_ahb->radio_cold_rst ?
-			PTR_ERR(ar_ahb->radio_cold_rst) : -ENODEV;
-		goto err_core_cold_rst_put;
+		return PTR_ERR(ar_ahb->radio_cold_rst);
 	}
 
-	ar_ahb->radio_warm_rst = reset_control_get(dev, "wifi_radio_warm");
-	if (IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) {
+	ar_ahb->radio_warm_rst = devm_reset_control_get(dev, "wifi_radio_warm");
+	if (IS_ERR(ar_ahb->radio_warm_rst)) {
 		ath10k_err(ar, "failed to get radio warm rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->radio_warm_rst));
-		ret = ar_ahb->radio_warm_rst ?
-			PTR_ERR(ar_ahb->radio_warm_rst) : -ENODEV;
-		goto err_radio_cold_rst_put;
+		return PTR_ERR(ar_ahb->radio_warm_rst);
 	}
 
-	ar_ahb->radio_srif_rst = reset_control_get(dev, "wifi_radio_srif");
-	if (IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) {
+	ar_ahb->radio_srif_rst = devm_reset_control_get(dev, "wifi_radio_srif");
+	if (IS_ERR(ar_ahb->radio_srif_rst)) {
 		ath10k_err(ar, "failed to get radio srif rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->radio_srif_rst));
-		ret = ar_ahb->radio_srif_rst ?
-			PTR_ERR(ar_ahb->radio_srif_rst) : -ENODEV;
-		goto err_radio_warm_rst_put;
+		return PTR_ERR(ar_ahb->radio_srif_rst);
 	}
 
-	ar_ahb->cpu_init_rst = reset_control_get(dev, "wifi_cpu_init");
-	if (IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) {
+	ar_ahb->cpu_init_rst = devm_reset_control_get(dev, "wifi_cpu_init");
+	if (IS_ERR(ar_ahb->cpu_init_rst)) {
 		ath10k_err(ar, "failed to get cpu init rst ctrl: %ld\n",
 			   PTR_ERR(ar_ahb->cpu_init_rst));
-		ret = ar_ahb->cpu_init_rst ?
-			PTR_ERR(ar_ahb->cpu_init_rst) : -ENODEV;
-		goto err_radio_srif_rst_put;
+		return PTR_ERR(ar_ahb->cpu_init_rst);
 	}
 
 	return 0;
-
-err_radio_srif_rst_put:
-	reset_control_put(ar_ahb->radio_srif_rst);
-
-err_radio_warm_rst_put:
-	reset_control_put(ar_ahb->radio_warm_rst);
-
-err_radio_cold_rst_put:
-	reset_control_put(ar_ahb->radio_cold_rst);
-
-err_core_cold_rst_put:
-	reset_control_put(ar_ahb->core_cold_rst);
-
-out:
-	return ret;
 }
 
 static void ath10k_ahb_rst_ctrl_deinit(struct ath10k *ar)
 {
 	struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar);
 
-	if (!IS_ERR_OR_NULL(ar_ahb->core_cold_rst))
-		reset_control_put(ar_ahb->core_cold_rst);
-
-	if (!IS_ERR_OR_NULL(ar_ahb->radio_cold_rst))
-		reset_control_put(ar_ahb->radio_cold_rst);
-
-	if (!IS_ERR_OR_NULL(ar_ahb->radio_warm_rst))
-		reset_control_put(ar_ahb->radio_warm_rst);
-
-	if (!IS_ERR_OR_NULL(ar_ahb->radio_srif_rst))
-		reset_control_put(ar_ahb->radio_srif_rst);
-
-	if (!IS_ERR_OR_NULL(ar_ahb->cpu_init_rst))
-		reset_control_put(ar_ahb->cpu_init_rst);
-
 	ar_ahb->core_cold_rst = NULL;
 	ar_ahb->radio_cold_rst = NULL;
 	ar_ahb->radio_warm_rst = NULL;
@@ -462,13 +399,13 @@
 static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg)
 {
 	struct ath10k *ar = arg;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 
 	if (!ath10k_pci_irq_pending(ar))
 		return IRQ_NONE;
 
 	ath10k_pci_disable_and_clear_legacy_irq(ar);
-	tasklet_schedule(&ar_pci->intr_tq);
+	ath10k_pci_irq_msi_fw_mask(ar);
+	napi_schedule(&ar->napi);
 
 	return IRQ_HANDLED;
 }
@@ -572,12 +509,13 @@
 	ar_ahb->irq = platform_get_irq_byname(pdev, "legacy");
 	if (ar_ahb->irq < 0) {
 		ath10k_err(ar, "failed to get irq number: %d\n", ar_ahb->irq);
+		ret = ar_ahb->irq;
 		goto err_clock_deinit;
 	}
 
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "irq: %d\n", ar_ahb->irq);
 
-	ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%p mem_len: %lu gcc mem: 0x%p tcsr_mem: 0x%p\n",
+	ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%pK mem_len: %lu gcc mem: 0x%pK tcsr_mem: 0x%pK\n",
 		   ar_ahb->mem, ar_ahb->mem_len,
 		   ar_ahb->gcc_mem, ar_ahb->tcsr_mem);
 	return 0;
@@ -717,6 +655,9 @@
 	synchronize_irq(ar_ahb->irq);
 
 	ath10k_pci_flush(ar);
+
+	napi_synchronize(&ar->napi);
+	napi_disable(&ar->napi);
 }
 
 static int ath10k_ahb_hif_power_up(struct ath10k *ar)
@@ -748,6 +689,7 @@
 		ath10k_err(ar, "could not wake up target CPU: %d\n", ret);
 		goto err_ce_deinit;
 	}
+	napi_enable(&ar->napi);
 
 	return 0;
 
@@ -831,7 +773,7 @@
 		goto err_resource_deinit;
 	}
 
-	ath10k_pci_init_irq_tasklets(ar);
+	ath10k_pci_init_napi(ar);
 
 	ret = ath10k_ahb_request_irq_legacy(ar);
 	if (ret)
@@ -846,6 +788,7 @@
 	chip_id = ath10k_ahb_soc_read32(ar, SOC_CHIP_ID_ADDRESS);
 	if (chip_id == 0xffffffff) {
 		ath10k_err(ar, "failed to get chip id\n");
+		ret = -ENODEV;
 		goto err_halt_device;
 	}
 
diff --git a/drivers/net/wireless/ath/ath10k/bmi.c b/drivers/net/wireless/ath/ath10k/bmi.c
index 3d29b08..2872d34 100644
--- a/drivers/net/wireless/ath/ath10k/bmi.c
+++ b/drivers/net/wireless/ath/ath10k/bmi.c
@@ -221,7 +221,7 @@
 	u32 txlen;
 	int ret;
 
-	ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%p length %d\n",
+	ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%pK length %d\n",
 		   buffer, length);
 
 	if (ar->bmi.done_sent) {
@@ -287,7 +287,7 @@
 	int ret;
 
 	ath10k_dbg(ar, ATH10K_DBG_BMI,
-		   "bmi fast download address 0x%x buffer 0x%p length %d\n",
+		   "bmi fast download address 0x%x buffer 0x%pK length %d\n",
 		   address, buffer, length);
 
 	ret = ath10k_bmi_lz_stream_start(ar, address);
diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c
index 9fb8d74..0b4d796 100644
--- a/drivers/net/wireless/ath/ath10k/ce.c
+++ b/drivers/net/wireless/ath/ath10k/ce.c
@@ -39,7 +39,7 @@
  * chooses what to send (buffer address, length). The destination
  * side keeps a supply of "anonymous receive buffers" available and
  * it handles incoming data as it arrives (when the destination
- * recieves an interrupt).
+ * receives an interrupt).
  *
  * The sender may send a simple buffer (address/length) or it may
  * send a small list of buffers.  When a small list is sent, hardware
@@ -433,6 +433,13 @@
 	unsigned int nentries_mask = dest_ring->nentries_mask;
 	unsigned int write_index = dest_ring->write_index;
 	u32 ctrl_addr = pipe->ctrl_addr;
+	u32 cur_write_idx = ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr);
+
+	/* Prevent CE ring stuck issue that will occur when ring is full.
+	 * Make sure that write index is 1 less than read index.
+	 */
+	if ((cur_write_idx + nentries)  == dest_ring->sw_index)
+		nentries -= 1;
 
 	write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries);
 	ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index);
@@ -840,7 +847,7 @@
 	ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries);
 
 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
-		   "boot init ce src ring id %d entries %d base_addr %p\n",
+		   "boot init ce src ring id %d entries %d base_addr %pK\n",
 		   ce_id, nentries, src_ring->base_addr_owner_space);
 
 	return 0;
@@ -874,7 +881,7 @@
 	ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries);
 
 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
-		   "boot ce dest ring id %d entries %d base_addr %p\n",
+		   "boot ce dest ring id %d entries %d base_addr %pK\n",
 		   ce_id, nentries, dest_ring->base_addr_owner_space);
 
 	return 0;
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index e889829..21ae8d6 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -60,7 +60,6 @@
 		.otp_exe_param = 0,
 		.channel_counters_freq_hz = 88000,
 		.max_probe_resp_desc_thres = 0,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER,
 		.cal_data_len = 2116,
 		.fw = {
 			.dir = QCA988X_HW_2_0_FW_DIR,
@@ -68,6 +67,8 @@
 			.board_size = QCA988X_BOARD_DATA_SZ,
 			.board_ext_size = QCA988X_BOARD_EXT_DATA_SZ,
 		},
+		.hw_ops = &qca988x_ops,
+		.decap_align_bytes = 4,
 	},
 	{
 		.id = QCA9887_HW_1_0_VERSION,
@@ -79,7 +80,6 @@
 		.otp_exe_param = 0,
 		.channel_counters_freq_hz = 88000,
 		.max_probe_resp_desc_thres = 0,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER,
 		.cal_data_len = 2116,
 		.fw = {
 			.dir = QCA9887_HW_1_0_FW_DIR,
@@ -87,6 +87,8 @@
 			.board_size = QCA9887_BOARD_DATA_SZ,
 			.board_ext_size = QCA9887_BOARD_EXT_DATA_SZ,
 		},
+		.hw_ops = &qca988x_ops,
+		.decap_align_bytes = 4,
 	},
 	{
 		.id = QCA6174_HW_2_1_VERSION,
@@ -104,6 +106,8 @@
 			.board_size = QCA6174_BOARD_DATA_SZ,
 			.board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
 		},
+		.hw_ops = &qca988x_ops,
+		.decap_align_bytes = 4,
 	},
 	{
 		.id = QCA6174_HW_2_1_VERSION,
@@ -114,7 +118,6 @@
 		.otp_exe_param = 0,
 		.channel_counters_freq_hz = 88000,
 		.max_probe_resp_desc_thres = 0,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER,
 		.cal_data_len = 8124,
 		.fw = {
 			.dir = QCA6174_HW_2_1_FW_DIR,
@@ -122,6 +125,8 @@
 			.board_size = QCA6174_BOARD_DATA_SZ,
 			.board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
 		},
+		.hw_ops = &qca988x_ops,
+		.decap_align_bytes = 4,
 	},
 	{
 		.id = QCA6174_HW_3_0_VERSION,
@@ -132,7 +137,6 @@
 		.otp_exe_param = 0,
 		.channel_counters_freq_hz = 88000,
 		.max_probe_resp_desc_thres = 0,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER,
 		.cal_data_len = 8124,
 		.fw = {
 			.dir = QCA6174_HW_3_0_FW_DIR,
@@ -140,6 +144,8 @@
 			.board_size = QCA6174_BOARD_DATA_SZ,
 			.board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
 		},
+		.hw_ops = &qca988x_ops,
+		.decap_align_bytes = 4,
 	},
 	{
 		.id = QCA6174_HW_3_2_VERSION,
@@ -150,7 +156,6 @@
 		.otp_exe_param = 0,
 		.channel_counters_freq_hz = 88000,
 		.max_probe_resp_desc_thres = 0,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER,
 		.cal_data_len = 8124,
 		.fw = {
 			/* uses same binaries as hw3.0 */
@@ -159,6 +164,8 @@
 			.board_size = QCA6174_BOARD_DATA_SZ,
 			.board_ext_size = QCA6174_BOARD_EXT_DATA_SZ,
 		},
+		.hw_ops = &qca988x_ops,
+		.decap_align_bytes = 4,
 	},
 	{
 		.id = QCA99X0_HW_2_0_DEV_VERSION,
@@ -171,7 +178,6 @@
 		.cck_rate_map_rev2 = true,
 		.channel_counters_freq_hz = 150000,
 		.max_probe_resp_desc_thres = 24,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE,
 		.tx_chain_mask = 0xf,
 		.rx_chain_mask = 0xf,
 		.max_spatial_stream = 4,
@@ -182,6 +188,9 @@
 			.board_size = QCA99X0_BOARD_DATA_SZ,
 			.board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
 		},
+		.sw_decrypt_mcast_mgmt = true,
+		.hw_ops = &qca99x0_ops,
+		.decap_align_bytes = 1,
 	},
 	{
 		.id = QCA9984_HW_1_0_DEV_VERSION,
@@ -194,7 +203,6 @@
 		.cck_rate_map_rev2 = true,
 		.channel_counters_freq_hz = 150000,
 		.max_probe_resp_desc_thres = 24,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE,
 		.tx_chain_mask = 0xf,
 		.rx_chain_mask = 0xf,
 		.max_spatial_stream = 4,
@@ -205,6 +213,9 @@
 			.board_size = QCA99X0_BOARD_DATA_SZ,
 			.board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
 		},
+		.sw_decrypt_mcast_mgmt = true,
+		.hw_ops = &qca99x0_ops,
+		.decap_align_bytes = 1,
 	},
 	{
 		.id = QCA9888_HW_2_0_DEV_VERSION,
@@ -216,7 +227,6 @@
 		.continuous_frag_desc = true,
 		.channel_counters_freq_hz = 150000,
 		.max_probe_resp_desc_thres = 24,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE,
 		.tx_chain_mask = 3,
 		.rx_chain_mask = 3,
 		.max_spatial_stream = 2,
@@ -227,6 +237,9 @@
 			.board_size = QCA99X0_BOARD_DATA_SZ,
 			.board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ,
 		},
+		.sw_decrypt_mcast_mgmt = true,
+		.hw_ops = &qca99x0_ops,
+		.decap_align_bytes = 1,
 	},
 	{
 		.id = QCA9377_HW_1_0_DEV_VERSION,
@@ -244,6 +257,8 @@
 			.board_size = QCA9377_BOARD_DATA_SZ,
 			.board_ext_size = QCA9377_BOARD_EXT_DATA_SZ,
 		},
+		.hw_ops = &qca988x_ops,
+		.decap_align_bytes = 4,
 	},
 	{
 		.id = QCA9377_HW_1_1_DEV_VERSION,
@@ -261,6 +276,8 @@
 			.board_size = QCA9377_BOARD_DATA_SZ,
 			.board_ext_size = QCA9377_BOARD_EXT_DATA_SZ,
 		},
+		.hw_ops = &qca988x_ops,
+		.decap_align_bytes = 4,
 	},
 	{
 		.id = QCA4019_HW_1_0_DEV_VERSION,
@@ -274,7 +291,6 @@
 		.cck_rate_map_rev2 = true,
 		.channel_counters_freq_hz = 125000,
 		.max_probe_resp_desc_thres = 24,
-		.hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE,
 		.tx_chain_mask = 0x3,
 		.rx_chain_mask = 0x3,
 		.max_spatial_stream = 2,
@@ -285,6 +301,9 @@
 			.board_size = QCA4019_BOARD_DATA_SZ,
 			.board_ext_size = QCA4019_BOARD_EXT_DATA_SZ,
 		},
+		.sw_decrypt_mcast_mgmt = true,
+		.hw_ops = &qca99x0_ops,
+		.decap_align_bytes = 1,
 	},
 };
 
@@ -304,6 +323,7 @@
 	[ATH10K_FW_FEATURE_MFP_SUPPORT] = "mfp",
 	[ATH10K_FW_FEATURE_PEER_FLOW_CONTROL] = "peer-flow-ctrl",
 	[ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param",
+	[ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR] = "skip-null-func-war",
 };
 
 static unsigned int ath10k_core_get_fw_feature_str(char *buf,
@@ -699,7 +719,7 @@
 
 	if (!ar->running_fw->fw_file.otp_data ||
 	    !ar->running_fw->fw_file.otp_len) {
-		ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %p otp_len %zd)!\n",
+		ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %pK otp_len %zd)!\n",
 			    ar->running_fw->fw_file.otp_data,
 			    ar->running_fw->fw_file.otp_len);
 		return 0;
@@ -745,7 +765,7 @@
 	data = ar->running_fw->fw_file.firmware_data;
 	data_len = ar->running_fw->fw_file.firmware_len;
 
-	ret = ath10k_swap_code_seg_configure(ar);
+	ret = ath10k_swap_code_seg_configure(ar, &ar->running_fw->fw_file);
 	if (ret) {
 		ath10k_err(ar, "failed to configure fw code swap: %d\n",
 			   ret);
@@ -753,7 +773,7 @@
 	}
 
 	ath10k_dbg(ar, ATH10K_DBG_BOOT,
-		   "boot uploading firmware image %p len %d\n",
+		   "boot uploading firmware image %pK len %d\n",
 		   data, data_len);
 
 	ret = ath10k_bmi_fast_download(ar, address, data, data_len);
@@ -787,7 +807,7 @@
 	if (!IS_ERR(ar->pre_cal_file))
 		release_firmware(ar->pre_cal_file);
 
-	ath10k_swap_code_seg_release(ar);
+	ath10k_swap_code_seg_release(ar, &ar->normal_mode_fw.fw_file);
 
 	ar->normal_mode_fw.fw_file.otp_data = NULL;
 	ar->normal_mode_fw.fw_file.otp_len = 0;
@@ -1497,14 +1517,14 @@
 
 	ieee80211_stop_queues(ar->hw);
 	ath10k_drain_tx(ar);
-	complete_all(&ar->scan.started);
-	complete_all(&ar->scan.completed);
-	complete_all(&ar->scan.on_channel);
-	complete_all(&ar->offchan_tx_completed);
-	complete_all(&ar->install_key_done);
-	complete_all(&ar->vdev_setup_done);
-	complete_all(&ar->thermal.wmi_sync);
-	complete_all(&ar->bss_survey_done);
+	complete(&ar->scan.started);
+	complete(&ar->scan.completed);
+	complete(&ar->scan.on_channel);
+	complete(&ar->offchan_tx_completed);
+	complete(&ar->install_key_done);
+	complete(&ar->vdev_setup_done);
+	complete(&ar->thermal.wmi_sync);
+	complete(&ar->bss_survey_done);
 	wake_up(&ar->htt.empty_tx_wq);
 	wake_up(&ar->wmi.tx_credits_wq);
 	wake_up(&ar->peer_mapping_wq);
@@ -1705,6 +1725,55 @@
 	return 0;
 }
 
+static int ath10k_core_reset_rx_filter(struct ath10k *ar)
+{
+	int ret;
+	int vdev_id;
+	int vdev_type;
+	int vdev_subtype;
+	const u8 *vdev_addr;
+
+	vdev_id = 0;
+	vdev_type = WMI_VDEV_TYPE_STA;
+	vdev_subtype = ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE);
+	vdev_addr = ar->mac_addr;
+
+	ret = ath10k_wmi_vdev_create(ar, vdev_id, vdev_type, vdev_subtype,
+				     vdev_addr);
+	if (ret) {
+		ath10k_err(ar, "failed to create dummy vdev: %d\n", ret);
+		return ret;
+	}
+
+	ret = ath10k_wmi_vdev_delete(ar, vdev_id);
+	if (ret) {
+		ath10k_err(ar, "failed to delete dummy vdev: %d\n", ret);
+		return ret;
+	}
+
+	/* WMI and HTT may use separate HIF pipes and are not guaranteed to be
+	 * serialized properly implicitly.
+	 *
+	 * Moreover (most) WMI commands have no explicit acknowledges. It is
+	 * possible to infer it implicitly by poking firmware with echo
+	 * command - getting a reply means all preceding comments have been
+	 * (mostly) processed.
+	 *
+	 * In case of vdev create/delete this is sufficient.
+	 *
+	 * Without this it's possible to end up with a race when HTT Rx ring is
+	 * started before vdev create/delete hack is complete allowing a short
+	 * window of opportunity to receive (and Tx ACK) a bunch of frames.
+	 */
+	ret = ath10k_wmi_barrier(ar);
+	if (ret) {
+		ath10k_err(ar, "failed to ping firmware: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
 		      const struct ath10k_fw_components *fw)
 {
@@ -1872,6 +1941,25 @@
 		goto err_hif_stop;
 	}
 
+	/* Some firmware revisions do not properly set up hardware rx filter
+	 * registers.
+	 *
+	 * A known example from QCA9880 and 10.2.4 is that MAC_PCU_ADDR1_MASK
+	 * is filled with 0s instead of 1s allowing HW to respond with ACKs to
+	 * any frames that matches MAC_PCU_RX_FILTER which is also
+	 * misconfigured to accept anything.
+	 *
+	 * The ADDR1 is programmed using internal firmware structure field and
+	 * can't be (easily/sanely) reached from the driver explicitly. It is
+	 * possible to implicitly make it correct by creating a dummy vdev and
+	 * then deleting it.
+	 */
+	status = ath10k_core_reset_rx_filter(ar);
+	if (status) {
+		ath10k_err(ar, "failed to reset rx filter: %d\n", status);
+		goto err_hif_stop;
+	}
+
 	/* If firmware indicates Full Rx Reorder support it must be used in a
 	 * slightly different manner. Let HTT code know.
 	 */
@@ -1884,7 +1972,10 @@
 		goto err_hif_stop;
 	}
 
-	ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1;
+	if (ar->max_num_vdevs >= 64)
+		ar->free_vdev_map = 0xFFFFFFFFFFFFFFFFLL;
+	else
+		ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1;
 
 	INIT_LIST_HEAD(&ar->arvifs);
 
@@ -2031,7 +2122,7 @@
 		goto err_free_firmware_files;
 	}
 
-	ret = ath10k_swap_code_seg_init(ar);
+	ret = ath10k_swap_code_seg_init(ar, &ar->normal_mode_fw.fw_file);
 	if (ret) {
 		ath10k_err(ar, "failed to initialize code swap segment: %d\n",
 			   ret);
@@ -2072,6 +2163,9 @@
 	struct ath10k *ar = container_of(work, struct ath10k, register_work);
 	int status;
 
+	/* peer stats are enabled by default */
+	set_bit(ATH10K_FLAG_PEER_STATS, &ar->dev_flags);
+
 	status = ath10k_core_probe_fw(ar);
 	if (status) {
 		ath10k_err(ar, "could not probe fw (%d)\n", status);
@@ -2249,6 +2343,8 @@
 	INIT_WORK(&ar->register_work, ath10k_core_register_work);
 	INIT_WORK(&ar->restart_work, ath10k_core_restart);
 
+	init_dummy_netdev(&ar->napi_dev);
+
 	ret = ath10k_debug_create(ar);
 	if (ret)
 		goto err_free_aux_wq;
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 30ae5bf..dda49af 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -65,6 +65,10 @@
 #define ATH10K_KEEPALIVE_MAX_IDLE 3895
 #define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900
 
+/* NAPI poll budget */
+#define ATH10K_NAPI_BUDGET      64
+#define ATH10K_NAPI_QUOTA_LIMIT 60
+
 struct ath10k;
 
 enum ath10k_bus {
@@ -142,6 +146,7 @@
 	enum ath10k_htc_ep_id eid;
 	struct completion service_ready;
 	struct completion unified_ready;
+	struct completion barrier;
 	wait_queue_head_t tx_credits_wq;
 	DECLARE_BITMAP(svc_map, WMI_SERVICE_MAX);
 	struct wmi_cmd_map *cmd;
@@ -196,10 +201,10 @@
 
 	/* PDEV stats */
 	s32 ch_noise_floor;
-	u32 tx_frame_count;
-	u32 rx_frame_count;
-	u32 rx_clear_count;
-	u32 cycle_count;
+	u32 tx_frame_count; /* Cycles spent transmitting frames */
+	u32 rx_frame_count; /* Cycles spent receiving frames */
+	u32 rx_clear_count; /* Total channel busy time, evidently */
+	u32 cycle_count; /* Total on-channel time */
 	u32 phy_err_count;
 	u32 chan_tx_power;
 	u32 ack_rx_bad;
@@ -440,7 +445,7 @@
 	struct completion tpc_complete;
 
 	/* protected by conf_mutex */
-	u32 fw_dbglog_mask;
+	u64 fw_dbglog_mask;
 	u32 fw_dbglog_level;
 	u32 pktlog_filter;
 	u32 reg_addr;
@@ -551,6 +556,13 @@
 	 */
 	ATH10K_FW_FEATURE_BTCOEX_PARAM = 14,
 
+	/* Older firmware with HTT delivers incorrect tx status for null func
+	 * frames to driver, but this fixed in 10.2 and 10.4 firmware versions.
+	 * Also this workaround results in reporting of incorrect null func
+	 * status for 10.4. This flag is used to skip the workaround.
+	 */
+	ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR = 15,
+
 	/* keep last */
 	ATH10K_FW_FEATURE_COUNT,
 };
@@ -663,6 +675,15 @@
 
 	const void *codeswap_data;
 	size_t codeswap_len;
+
+	/* The original idea of struct ath10k_fw_file was that it only
+	 * contains struct firmware and pointers to various parts (actual
+	 * firmware binary, otp, metadata etc) of the file. This seg_info
+	 * is actually created separate but as this is used similarly as
+	 * the other firmware components it's more convenient to have it
+	 * here.
+	 */
+	struct ath10k_swap_code_seg_info *firmware_swap_code_seg_info;
 };
 
 struct ath10k_fw_components {
@@ -715,53 +736,7 @@
 	struct ath10k_htc htc;
 	struct ath10k_htt htt;
 
-	struct ath10k_hw_params {
-		u32 id;
-		u16 dev_id;
-		const char *name;
-		u32 patch_load_addr;
-		int uart_pin;
-		u32 otp_exe_param;
-
-		/* Type of hw cycle counter wraparound logic, for more info
-		 * refer enum ath10k_hw_cc_wraparound_type.
-		 */
-		enum ath10k_hw_cc_wraparound_type cc_wraparound_type;
-
-		/* Some of chip expects fragment descriptor to be continuous
-		 * memory for any TX operation. Set continuous_frag_desc flag
-		 * for the hardware which have such requirement.
-		 */
-		bool continuous_frag_desc;
-
-		/* CCK hardware rate table mapping for the newer chipsets
-		 * like QCA99X0, QCA4019 got revised. The CCK h/w rate values
-		 * are in a proper order with respect to the rate/preamble
-		 */
-		bool cck_rate_map_rev2;
-
-		u32 channel_counters_freq_hz;
-
-		/* Mgmt tx descriptors threshold for limiting probe response
-		 * frames.
-		 */
-		u32 max_probe_resp_desc_thres;
-
-		/* The padding bytes's location is different on various chips */
-		enum ath10k_hw_4addr_pad hw_4addr_pad;
-
-		u32 tx_chain_mask;
-		u32 rx_chain_mask;
-		u32 max_spatial_stream;
-		u32 cal_data_len;
-
-		struct ath10k_hw_params_fw {
-			const char *dir;
-			const char *board;
-			size_t board_size;
-			size_t board_ext_size;
-		} fw;
-	} hw_params;
+	struct ath10k_hw_params hw_params;
 
 	/* contains the firmware images used with ATH10K_FIRMWARE_MODE_NORMAL */
 	struct ath10k_fw_components normal_mode_fw;
@@ -775,10 +750,6 @@
 	const struct firmware *cal_file;
 
 	struct {
-		struct ath10k_swap_code_seg_info *firmware_swap_code_seg_info;
-	} swap;
-
-	struct {
 		u32 vendor;
 		u32 device;
 		u32 subsystem_vendor;
@@ -936,6 +907,10 @@
 	struct ath10k_thermal thermal;
 	struct ath10k_wow wow;
 
+	/* NAPI */
+	struct net_device napi_dev;
+	struct napi_struct napi;
+
 	/* must be last */
 	u8 drv_priv[0] __aligned(sizeof(void *));
 };
diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 8f0fd41..832da6e 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -1228,9 +1228,9 @@
 {
 	struct ath10k *ar = file->private_data;
 	unsigned int len;
-	char buf[64];
+	char buf[96];
 
-	len = scnprintf(buf, sizeof(buf), "0x%08x %u\n",
+	len = scnprintf(buf, sizeof(buf), "0x%16llx %u\n",
 			ar->debug.fw_dbglog_mask, ar->debug.fw_dbglog_level);
 
 	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
@@ -1242,15 +1242,16 @@
 {
 	struct ath10k *ar = file->private_data;
 	int ret;
-	char buf[64];
-	unsigned int log_level, mask;
+	char buf[96];
+	unsigned int log_level;
+	u64 mask;
 
 	simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count);
 
 	/* make sure that buf is null terminated */
 	buf[sizeof(buf) - 1] = 0;
 
-	ret = sscanf(buf, "%x %u", &mask, &log_level);
+	ret = sscanf(buf, "%llx %u", &mask, &log_level);
 
 	if (!ret)
 		return -EINVAL;
diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index 5b3c6bc..175aae3 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c
@@ -44,7 +44,7 @@
 	skb_cb = ATH10K_SKB_CB(skb);
 	memset(skb_cb, 0, sizeof(*skb_cb));
 
-	ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %p\n", __func__, skb);
+	ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %pK\n", __func__, skb);
 	return skb;
 }
 
@@ -62,7 +62,7 @@
 {
 	struct ath10k *ar = ep->htc->ar;
 
-	ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %p\n", __func__,
+	ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %pK\n", __func__,
 		   ep->eid, skb);
 
 	ath10k_htc_restore_tx_skb(ep->htc, skb);
@@ -404,7 +404,7 @@
 		goto out;
 	}
 
-	ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %p\n",
+	ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %pK\n",
 		   eid, skb);
 	ep->ep_ops.ep_rx_complete(ar, skb);
 
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 430a83e..0d2ed09 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -595,7 +595,7 @@
 	/* only accept EAPOL frames */
 	HTT_RX_IND_MPDU_STATUS_UNAUTH_PEER,
 	HTT_RX_IND_MPDU_STATUS_OUT_OF_SYNC,
-	/* Non-data in promiscous mode */
+	/* Non-data in promiscuous mode */
 	HTT_RX_IND_MPDU_STATUS_MGMT_CTRL,
 	HTT_RX_IND_MPDU_STATUS_TKIP_MIC_ERR,
 	HTT_RX_IND_MPDU_STATUS_DECRYPT_ERR,
@@ -900,7 +900,7 @@
  *     Purpose: indicate how many 32-bit integers follow the message header
  *   - NUM_CHARS
  *     Bits 31:16
- *     Purpose: indicate how many 8-bit charaters follow the series of integers
+ *     Purpose: indicate how many 8-bit characters follow the series of integers
  */
 struct htt_rx_test {
 	u8 num_ints;
@@ -1042,10 +1042,10 @@
 	/* illegal rate phy errors  */
 	__le32 illgl_rate_phy_err;
 
-	/* wal pdev continous xretry */
+	/* wal pdev continuous xretry */
 	__le32 pdev_cont_xretry;
 
-	/* wal pdev continous xretry */
+	/* wal pdev continuous xretry */
 	__le32 pdev_tx_timeout;
 
 	/* wal pdev resets  */
@@ -1665,7 +1665,6 @@
 
 	/* This is used to group tx/rx completions separately and process them
 	 * in batches to reduce cache stalls */
-	struct tasklet_struct txrx_compl_task;
 	struct sk_buff_head rx_compl_q;
 	struct sk_buff_head rx_in_ord_compl_q;
 	struct sk_buff_head tx_fetch_ind_q;
@@ -1798,5 +1797,6 @@
 		  struct sk_buff *msdu);
 void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar,
 					     struct sk_buff *skb);
+int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget);
 
 #endif
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 78db5d6..0b4c156 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -34,7 +34,6 @@
 #define HTT_RX_RING_REFILL_RESCHED_MS 5
 
 static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb);
-static void ath10k_htt_txrx_compl_task(unsigned long ptr);
 
 static struct sk_buff *
 ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr)
@@ -226,7 +225,6 @@
 void ath10k_htt_rx_free(struct ath10k_htt *htt)
 {
 	del_timer_sync(&htt->rx_ring.refill_retry_timer);
-	tasklet_kill(&htt->txrx_compl_task);
 
 	skb_queue_purge(&htt->rx_compl_q);
 	skb_queue_purge(&htt->rx_in_ord_compl_q);
@@ -520,9 +518,6 @@
 	skb_queue_head_init(&htt->tx_fetch_ind_q);
 	atomic_set(&htt->num_mpdus_ready, 0);
 
-	tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
-		     (unsigned long)htt);
-
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n",
 		   htt->rx_ring.size, htt->rx_ring.fill_level);
 	return 0;
@@ -931,7 +926,7 @@
 	*status = *rx_status;
 
 	ath10k_dbg(ar, ATH10K_DBG_DATA,
-		   "rx skb %p len %u peer %pM %s %s sn %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%llx fcs-err %i mic-err %i amsdu-more %i\n",
+		   "rx skb %pK len %u peer %pM %s %s sn %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%llx fcs-err %i mic-err %i amsdu-more %i\n",
 		   skb,
 		   skb->len,
 		   ieee80211_get_SA(hdr),
@@ -958,7 +953,7 @@
 	trace_ath10k_rx_hdr(ar, skb->data, skb->len);
 	trace_ath10k_rx_payload(ar, skb->data, skb->len);
 
-	ieee80211_rx(ar->hw, skb);
+	ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi);
 }
 
 static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar,
@@ -1056,9 +1051,11 @@
 					  const u8 first_hdr[64])
 {
 	struct ieee80211_hdr *hdr;
+	struct htt_rx_desc *rxd;
 	size_t hdr_len;
 	u8 da[ETH_ALEN];
 	u8 sa[ETH_ALEN];
+	int l3_pad_bytes;
 
 	/* Delivered decapped frame:
 	 * [nwifi 802.11 header] <-- replaced with 802.11 hdr
@@ -1072,19 +1069,12 @@
 	 */
 
 	/* pull decapped header and copy SA & DA */
-	if ((ar->hw_params.hw_4addr_pad == ATH10K_HW_4ADDR_PAD_BEFORE) &&
-	    ieee80211_has_a4(((struct ieee80211_hdr *)first_hdr)->frame_control)) {
-		/* The QCA99X0 4 address mode pad 2 bytes at the
-		 * beginning of MSDU
-		 */
-		hdr = (struct ieee80211_hdr *)(msdu->data + 2);
-		/* The skb length need be extended 2 as the 2 bytes at the tail
-		 * be excluded due to the padding
-		 */
-		skb_put(msdu, 2);
-	} else {
-		hdr = (struct ieee80211_hdr *)(msdu->data);
-	}
+	rxd = (void *)msdu->data - sizeof(*rxd);
+
+	l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd);
+	skb_put(msdu, l3_pad_bytes);
+
+	hdr = (struct ieee80211_hdr *)(msdu->data + l3_pad_bytes);
 
 	hdr_len = ath10k_htt_rx_nwifi_hdrlen(ar, hdr);
 	ether_addr_copy(da, ieee80211_get_DA(hdr));
@@ -1113,6 +1103,7 @@
 	size_t hdr_len, crypto_len;
 	void *rfc1042;
 	bool is_first, is_last, is_amsdu;
+	int bytes_aligned = ar->hw_params.decap_align_bytes;
 
 	rxd = (void *)msdu->data - sizeof(*rxd);
 	hdr = (void *)rxd->rx_hdr_status;
@@ -1129,8 +1120,8 @@
 		hdr_len = ieee80211_hdrlen(hdr->frame_control);
 		crypto_len = ath10k_htt_rx_crypto_param_len(ar, enctype);
 
-		rfc1042 += round_up(hdr_len, 4) +
-			   round_up(crypto_len, 4);
+		rfc1042 += round_up(hdr_len, bytes_aligned) +
+			   round_up(crypto_len, bytes_aligned);
 	}
 
 	if (is_amsdu)
@@ -1151,6 +1142,8 @@
 	void *rfc1042;
 	u8 da[ETH_ALEN];
 	u8 sa[ETH_ALEN];
+	int l3_pad_bytes;
+	struct htt_rx_desc *rxd;
 
 	/* Delivered decapped frame:
 	 * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc
@@ -1161,6 +1154,11 @@
 	if (WARN_ON_ONCE(!rfc1042))
 		return;
 
+	rxd = (void *)msdu->data - sizeof(*rxd);
+	l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd);
+	skb_put(msdu, l3_pad_bytes);
+	skb_pull(msdu, l3_pad_bytes);
+
 	/* pull decapped header and copy SA & DA */
 	eth = (struct ethhdr *)msdu->data;
 	ether_addr_copy(da, eth->h_dest);
@@ -1191,6 +1189,8 @@
 {
 	struct ieee80211_hdr *hdr;
 	size_t hdr_len;
+	int l3_pad_bytes;
+	struct htt_rx_desc *rxd;
 
 	/* Delivered decapped frame:
 	 * [amsdu header] <-- replaced with 802.11 hdr
@@ -1198,7 +1198,11 @@
 	 * [payload]
 	 */
 
-	skb_pull(msdu, sizeof(struct amsdu_subframe_hdr));
+	rxd = (void *)msdu->data - sizeof(*rxd);
+	l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd);
+
+	skb_put(msdu, l3_pad_bytes);
+	skb_pull(msdu, sizeof(struct amsdu_subframe_hdr) + l3_pad_bytes);
 
 	hdr = (struct ieee80211_hdr *)first_hdr;
 	hdr_len = ieee80211_hdrlen(hdr->frame_control);
@@ -1525,9 +1529,9 @@
 static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt)
 {
 	struct ath10k *ar = htt->ar;
-	static struct ieee80211_rx_status rx_status;
+	struct ieee80211_rx_status *rx_status = &htt->rx_status;
 	struct sk_buff_head amsdu;
-	int ret;
+	int ret, num_msdus;
 
 	__skb_queue_head_init(&amsdu);
 
@@ -1549,13 +1553,14 @@
 		return ret;
 	}
 
-	ath10k_htt_rx_h_ppdu(ar, &amsdu, &rx_status, 0xffff);
+	num_msdus = skb_queue_len(&amsdu);
+	ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff);
 	ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0);
-	ath10k_htt_rx_h_filter(ar, &amsdu, &rx_status);
-	ath10k_htt_rx_h_mpdu(ar, &amsdu, &rx_status);
-	ath10k_htt_rx_h_deliver(ar, &amsdu, &rx_status);
+	ath10k_htt_rx_h_filter(ar, &amsdu, rx_status);
+	ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status);
+	ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status);
 
-	return 0;
+	return num_msdus;
 }
 
 static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt,
@@ -1579,15 +1584,6 @@
 		mpdu_count += mpdu_ranges[i].mpdu_count;
 
 	atomic_add(mpdu_count, &htt->num_mpdus_ready);
-
-	tasklet_schedule(&htt->txrx_compl_task);
-}
-
-static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt)
-{
-	atomic_inc(&htt->num_mpdus_ready);
-
-	tasklet_schedule(&htt->txrx_compl_task);
 }
 
 static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar,
@@ -1772,14 +1768,15 @@
 			RX_FLAG_MMIC_STRIPPED;
 }
 
-static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
-				       struct sk_buff_head *list)
+static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar,
+				      struct sk_buff_head *list)
 {
 	struct ath10k_htt *htt = &ar->htt;
 	struct ieee80211_rx_status *status = &htt->rx_status;
 	struct htt_rx_offload_msdu *rx;
 	struct sk_buff *msdu;
 	size_t offset;
+	int num_msdu = 0;
 
 	while ((msdu = __skb_dequeue(list))) {
 		/* Offloaded frames don't have Rx descriptor. Instead they have
@@ -1819,10 +1816,12 @@
 		ath10k_htt_rx_h_rx_offload_prot(status, msdu);
 		ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id);
 		ath10k_process_rx(ar, status, msdu);
+		num_msdu++;
 	}
+	return num_msdu;
 }
 
-static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
+static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb)
 {
 	struct ath10k_htt *htt = &ar->htt;
 	struct htt_resp *resp = (void *)skb->data;
@@ -1835,12 +1834,12 @@
 	u8 tid;
 	bool offload;
 	bool frag;
-	int ret;
+	int ret, num_msdus = 0;
 
 	lockdep_assert_held(&htt->rx_ring.lock);
 
 	if (htt->rx_confused)
-		return;
+		return -EIO;
 
 	skb_pull(skb, sizeof(resp->hdr));
 	skb_pull(skb, sizeof(resp->rx_in_ord_ind));
@@ -1859,7 +1858,7 @@
 
 	if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) {
 		ath10k_warn(ar, "dropping invalid in order rx indication\n");
-		return;
+		return -EINVAL;
 	}
 
 	/* The event can deliver more than 1 A-MSDU. Each A-MSDU is later
@@ -1870,14 +1869,14 @@
 	if (ret < 0) {
 		ath10k_warn(ar, "failed to pop paddr list: %d\n", ret);
 		htt->rx_confused = true;
-		return;
+		return -EIO;
 	}
 
 	/* Offloaded frames are very different and need to be handled
 	 * separately.
 	 */
 	if (offload)
-		ath10k_htt_rx_h_rx_offload(ar, &list);
+		num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list);
 
 	while (!skb_queue_empty(&list)) {
 		__skb_queue_head_init(&amsdu);
@@ -1890,6 +1889,7 @@
 			 * better to report something than nothing though. This
 			 * should still give an idea about rx rate to the user.
 			 */
+			num_msdus += skb_queue_len(&amsdu);
 			ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id);
 			ath10k_htt_rx_h_filter(ar, &amsdu, status);
 			ath10k_htt_rx_h_mpdu(ar, &amsdu, status);
@@ -1902,9 +1902,10 @@
 			ath10k_warn(ar, "failed to extract amsdu: %d\n", ret);
 			htt->rx_confused = true;
 			__skb_queue_purge(&list);
-			return;
+			return -EIO;
 		}
 	}
+	return num_msdus;
 }
 
 static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
@@ -2267,7 +2268,6 @@
 	}
 	case HTT_T2H_MSG_TYPE_TX_COMPL_IND:
 		ath10k_htt_rx_tx_compl_ind(htt->ar, skb);
-		tasklet_schedule(&htt->txrx_compl_task);
 		break;
 	case HTT_T2H_MSG_TYPE_SEC_IND: {
 		struct ath10k *ar = htt->ar;
@@ -2284,7 +2284,7 @@
 	case HTT_T2H_MSG_TYPE_RX_FRAG_IND: {
 		ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ",
 				skb->data, skb->len);
-		ath10k_htt_rx_frag_handler(htt);
+		atomic_inc(&htt->num_mpdus_ready);
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_TEST:
@@ -2320,8 +2320,7 @@
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: {
-		skb_queue_tail(&htt->rx_in_ord_compl_q, skb);
-		tasklet_schedule(&htt->txrx_compl_task);
+		__skb_queue_tail(&htt->rx_in_ord_compl_q, skb);
 		return false;
 	}
 	case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND:
@@ -2347,7 +2346,6 @@
 			break;
 		}
 		skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind);
-		tasklet_schedule(&htt->txrx_compl_task);
 		break;
 	}
 	case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM:
@@ -2376,27 +2374,77 @@
 }
 EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler);
 
-static void ath10k_htt_txrx_compl_task(unsigned long ptr)
+int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget)
 {
-	struct ath10k_htt *htt = (struct ath10k_htt *)ptr;
-	struct ath10k *ar = htt->ar;
+	struct ath10k_htt *htt = &ar->htt;
 	struct htt_tx_done tx_done = {};
-	struct sk_buff_head rx_ind_q;
 	struct sk_buff_head tx_ind_q;
 	struct sk_buff *skb;
 	unsigned long flags;
-	int num_mpdus;
+	int quota = 0, done, num_rx_msdus;
+	bool resched_napi = false;
 
-	__skb_queue_head_init(&rx_ind_q);
 	__skb_queue_head_init(&tx_ind_q);
 
-	spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags);
-	skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q);
-	spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags);
+	/* Since in-ord-ind can deliver more than 1 A-MSDU in single event,
+	 * process it first to utilize full available quota.
+	 */
+	while (quota < budget) {
+		if (skb_queue_empty(&htt->rx_in_ord_compl_q))
+			break;
 
-	spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
-	skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
-	spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
+		skb = __skb_dequeue(&htt->rx_in_ord_compl_q);
+		if (!skb) {
+			resched_napi = true;
+			goto exit;
+		}
+
+		spin_lock_bh(&htt->rx_ring.lock);
+		num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb);
+		spin_unlock_bh(&htt->rx_ring.lock);
+		if (num_rx_msdus < 0) {
+			resched_napi = true;
+			goto exit;
+		}
+
+		dev_kfree_skb_any(skb);
+		if (num_rx_msdus > 0)
+			quota += num_rx_msdus;
+
+		if ((quota > ATH10K_NAPI_QUOTA_LIMIT) &&
+		    !skb_queue_empty(&htt->rx_in_ord_compl_q)) {
+			resched_napi = true;
+			goto exit;
+		}
+	}
+
+	while (quota < budget) {
+		/* no more data to receive */
+		if (!atomic_read(&htt->num_mpdus_ready))
+			break;
+
+		num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt);
+		if (num_rx_msdus < 0) {
+			resched_napi = true;
+			goto exit;
+		}
+
+		quota += num_rx_msdus;
+		atomic_dec(&htt->num_mpdus_ready);
+		if ((quota > ATH10K_NAPI_QUOTA_LIMIT) &&
+		    atomic_read(&htt->num_mpdus_ready)) {
+			resched_napi = true;
+			goto exit;
+		}
+	}
+
+	/* From NAPI documentation:
+	 *  The napi poll() function may also process TX completions, in which
+	 *  case if it processes the entire TX ring then it should count that
+	 *  work as the rest of the budget.
+	 */
+	if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo))
+		quota = budget;
 
 	/* kfifo_get: called only within txrx_tasklet so it's neatly serialized.
 	 * From kfifo_get() documentation:
@@ -2406,27 +2454,24 @@
 	while (kfifo_get(&htt->txdone_fifo, &tx_done))
 		ath10k_txrx_tx_unref(htt, &tx_done);
 
+	ath10k_mac_tx_push_pending(ar);
+
+	spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
+	skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
+	spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
+
 	while ((skb = __skb_dequeue(&tx_ind_q))) {
 		ath10k_htt_rx_tx_fetch_ind(ar, skb);
 		dev_kfree_skb_any(skb);
 	}
 
-	num_mpdus = atomic_read(&htt->num_mpdus_ready);
-
-	while (num_mpdus) {
-		if (ath10k_htt_rx_handle_amsdu(htt))
-			break;
-
-		num_mpdus--;
-		atomic_dec(&htt->num_mpdus_ready);
-	}
-
-	while ((skb = __skb_dequeue(&rx_ind_q))) {
-		spin_lock_bh(&htt->rx_ring.lock);
-		ath10k_htt_rx_in_ord_ind(ar, skb);
-		spin_unlock_bh(&htt->rx_ring.lock);
-		dev_kfree_skb_any(skb);
-	}
-
+exit:
 	ath10k_htt_rx_msdu_buff_replenish(htt);
+	/* In case of rx failure or more data to read, report budget
+	 * to reschedule NAPI poll
+	 */
+	done = resched_napi ? budget : quota;
+
+	return done;
 }
+EXPORT_SYMBOL(ath10k_htt_txrx_compl_task);
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index 7c072b6..ae5b33fe 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -390,8 +390,6 @@
 {
 	int size;
 
-	tasklet_kill(&htt->txrx_compl_task);
-
 	idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar);
 	idr_destroy(&htt->pending_tx);
 
diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c
index f903d46..675e75d 100644
--- a/drivers/net/wireless/ath/ath10k/hw.c
+++ b/drivers/net/wireless/ath/ath10k/hw.c
@@ -85,7 +85,7 @@
 	.ce7_base_address			= 0x0004bc00,
 	/* Note: qca99x0 supports upto 12 Copy Engines. Other than address of
 	 * CE0 and CE1 no other copy engine is directly referred in the code.
-	 * It is not really neccessary to assign address for newly supported
+	 * It is not really necessary to assign address for newly supported
 	 * CEs in this address table.
 	 *	Copy Engine		Address
 	 *	CE8			0x0004c000
@@ -219,3 +219,16 @@
 	survey->time = CCNT_TO_MSEC(ar, cc);
 	survey->time_busy = CCNT_TO_MSEC(ar, rcc);
 }
+
+const struct ath10k_hw_ops qca988x_ops = {
+};
+
+static int ath10k_qca99x0_rx_desc_get_l3_pad_bytes(struct htt_rx_desc *rxd)
+{
+	return MS(__le32_to_cpu(rxd->msdu_end.qca99x0.info1),
+		  RX_MSDU_END_INFO1_L3_HDR_PAD);
+}
+
+const struct ath10k_hw_ops qca99x0_ops = {
+	.rx_desc_get_l3_pad_bytes = ath10k_qca99x0_rx_desc_get_l3_pad_bytes,
+};
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index e014cd7..6038b74 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -284,7 +284,7 @@
 #define QCA_REV_9377(ar) ((ar)->hw_rev == ATH10K_HW_QCA9377)
 #define QCA_REV_40XX(ar) ((ar)->hw_rev == ATH10K_HW_QCA4019)
 
-/* Known pecularities:
+/* Known peculiarities:
  *  - raw appears in nwifi decap, raw and nwifi appear in ethernet decap
  *  - raw have FCS, nwifi doesn't
  *  - ethernet frames have 802.11 header decapped and parts (base hdr, cipher
@@ -338,11 +338,6 @@
 	ATH10K_HW_RATE_REV2_CCK_SP_11M,
 };
 
-enum ath10k_hw_4addr_pad {
-	ATH10K_HW_4ADDR_PAD_AFTER,
-	ATH10K_HW_4ADDR_PAD_BEFORE,
-};
-
 enum ath10k_hw_cc_wraparound_type {
 	ATH10K_HW_CC_WRAP_DISABLED = 0,
 
@@ -363,6 +358,80 @@
 	ATH10K_HW_CC_WRAP_SHIFTED_EACH = 2,
 };
 
+struct ath10k_hw_params {
+	u32 id;
+	u16 dev_id;
+	const char *name;
+	u32 patch_load_addr;
+	int uart_pin;
+	u32 otp_exe_param;
+
+	/* Type of hw cycle counter wraparound logic, for more info
+	 * refer enum ath10k_hw_cc_wraparound_type.
+	 */
+	enum ath10k_hw_cc_wraparound_type cc_wraparound_type;
+
+	/* Some of chip expects fragment descriptor to be continuous
+	 * memory for any TX operation. Set continuous_frag_desc flag
+	 * for the hardware which have such requirement.
+	 */
+	bool continuous_frag_desc;
+
+	/* CCK hardware rate table mapping for the newer chipsets
+	 * like QCA99X0, QCA4019 got revised. The CCK h/w rate values
+	 * are in a proper order with respect to the rate/preamble
+	 */
+	bool cck_rate_map_rev2;
+
+	u32 channel_counters_freq_hz;
+
+	/* Mgmt tx descriptors threshold for limiting probe response
+	 * frames.
+	 */
+	u32 max_probe_resp_desc_thres;
+
+	u32 tx_chain_mask;
+	u32 rx_chain_mask;
+	u32 max_spatial_stream;
+	u32 cal_data_len;
+
+	struct ath10k_hw_params_fw {
+		const char *dir;
+		const char *board;
+		size_t board_size;
+		size_t board_ext_size;
+	} fw;
+
+	/* qca99x0 family chips deliver broadcast/multicast management
+	 * frames encrypted and expect software do decryption.
+	 */
+	bool sw_decrypt_mcast_mgmt;
+
+	const struct ath10k_hw_ops *hw_ops;
+
+	/* Number of bytes used for alignment in rx_hdr_status of rx desc. */
+	int decap_align_bytes;
+};
+
+struct htt_rx_desc;
+
+/* Defines needed for Rx descriptor abstraction */
+struct ath10k_hw_ops {
+	int (*rx_desc_get_l3_pad_bytes)(struct htt_rx_desc *rxd);
+};
+
+extern const struct ath10k_hw_ops qca988x_ops;
+extern const struct ath10k_hw_ops qca99x0_ops;
+
+static inline int
+ath10k_rx_desc_get_l3_pad_bytes(struct ath10k_hw_params *hw,
+				struct htt_rx_desc *rxd)
+{
+	if (hw->hw_ops->rx_desc_get_l3_pad_bytes)
+		return hw->hw_ops->rx_desc_get_l3_pad_bytes(rxd);
+	return 0;
+}
+
 /* Target specific defines for MAIN firmware */
 #define TARGET_NUM_VDEVS			8
 #define TARGET_NUM_PEER_AST			2
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 0bbd0a0..76297d6 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -824,7 +824,7 @@
 		 */
 		for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) {
 			if (ar->peer_map[i] == peer) {
-				ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %p idx %d)\n",
+				ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n",
 					    peer->addr, peer, i);
 				ar->peer_map[i] = NULL;
 			}
@@ -2793,7 +2793,7 @@
 
 	ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id);
 	if (ret)
-		ath10k_warn(ar, "faield to down vdev %i: %d\n",
+		ath10k_warn(ar, "failed to down vdev %i: %d\n",
 			    arvif->vdev_id, ret);
 
 	arvif->def_wep_key_idx = -1;
@@ -3255,6 +3255,8 @@
 	if (ar->htt.target_version_major < 3 &&
 	    (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) &&
 	    !test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX,
+		      ar->running_fw->fw_file.fw_features) &&
+	    !test_bit(ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR,
 		      ar->running_fw->fw_file.fw_features))
 		return ATH10K_HW_TXRX_MGMT;
 
@@ -3524,7 +3526,7 @@
 
 	if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) {
 		if (!ath10k_mac_tx_frm_has_freq(ar)) {
-			ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %p\n",
+			ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %pK\n",
 				   skb);
 
 			skb_queue_tail(&ar->offchan_tx_queue, skb);
@@ -3586,7 +3588,7 @@
 
 		mutex_lock(&ar->conf_mutex);
 
-		ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %p\n",
+		ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK\n",
 			   skb);
 
 		hdr = (struct ieee80211_hdr *)skb->data;
@@ -3643,7 +3645,7 @@
 		time_left =
 		wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ);
 		if (time_left == 0)
-			ath10k_warn(ar, "timed out waiting for offchannel skb %p\n",
+			ath10k_warn(ar, "timed out waiting for offchannel skb %pK\n",
 				    skb);
 
 		if (!peer && tmp_peer_created) {
@@ -3777,7 +3779,9 @@
 	enum ath10k_hw_txrx_mode txmode;
 	enum ath10k_mac_tx_path txpath;
 	struct sk_buff *skb;
+	struct ieee80211_hdr *hdr;
 	size_t skb_len;
+	bool is_mgmt, is_presp;
 	int ret;
 
 	spin_lock_bh(&ar->htt.tx_lock);
@@ -3801,6 +3805,22 @@
 	skb_len = skb->len;
 	txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb);
 	txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode);
+	is_mgmt = (txpath == ATH10K_MAC_TX_HTT_MGMT);
+
+	if (is_mgmt) {
+		hdr = (struct ieee80211_hdr *)skb->data;
+		is_presp = ieee80211_is_probe_resp(hdr->frame_control);
+
+		spin_lock_bh(&ar->htt.tx_lock);
+		ret = ath10k_htt_tx_mgmt_inc_pending(htt, is_mgmt, is_presp);
+
+		if (ret) {
+			ath10k_htt_tx_dec_pending(htt);
+			spin_unlock_bh(&ar->htt.tx_lock);
+			return ret;
+		}
+		spin_unlock_bh(&ar->htt.tx_lock);
+	}
 
 	ret = ath10k_mac_tx(ar, vif, sta, txmode, txpath, skb);
 	if (unlikely(ret)) {
@@ -3808,6 +3828,8 @@
 
 		spin_lock_bh(&ar->htt.tx_lock);
 		ath10k_htt_tx_dec_pending(htt);
+		if (is_mgmt)
+			ath10k_htt_tx_mgmt_dec_pending(htt);
 		spin_unlock_bh(&ar->htt.tx_lock);
 
 		return ret;
@@ -3894,7 +3916,7 @@
 		ar->scan.roc_freq = 0;
 		ath10k_offchan_tx_purge(ar);
 		cancel_delayed_work(&ar->scan.timeout);
-		complete_all(&ar->scan.completed);
+		complete(&ar->scan.completed);
 		break;
 	}
 }
@@ -4100,13 +4122,29 @@
 {
 	struct ath10k *ar = hw->priv;
 	struct ath10k_txq *artxq = (void *)txq->drv_priv;
+	struct ieee80211_txq *f_txq;
+	struct ath10k_txq *f_artxq;
+	int ret = 0;
+	int max = 16;
 
 	spin_lock_bh(&ar->txqs_lock);
 	if (list_empty(&artxq->list))
 		list_add_tail(&artxq->list, &ar->txqs);
+
+	f_artxq = list_first_entry(&ar->txqs, struct ath10k_txq, list);
+	f_txq = container_of((void *)f_artxq, struct ieee80211_txq, drv_priv);
+	list_del_init(&f_artxq->list);
+
+	while (ath10k_mac_tx_can_push(hw, f_txq) && max--) {
+		ret = ath10k_mac_tx_push_txq(hw, f_txq);
+		if (ret)
+			break;
+	}
+	if (ret != -ENOENT)
+		list_add_tail(&f_artxq->list, &ar->txqs);
 	spin_unlock_bh(&ar->txqs_lock);
 
-	ath10k_mac_tx_push_pending(ar);
+	ath10k_htt_tx_txq_update(hw, f_txq);
 	ath10k_htt_tx_txq_update(hw, txq);
 }
 
@@ -5186,7 +5224,7 @@
 
 	ret = ath10k_monitor_recalc(ar);
 	if (ret)
-		ath10k_warn(ar, "failed to recalc montior: %d\n", ret);
+		ath10k_warn(ar, "failed to recalc monitor: %d\n", ret);
 
 	mutex_unlock(&ar->conf_mutex);
 }
@@ -5984,8 +6022,8 @@
 		 * Existing station deletion.
 		 */
 		ath10k_dbg(ar, ATH10K_DBG_MAC,
-			   "mac vdev %d peer delete %pM (sta gone)\n",
-			   arvif->vdev_id, sta->addr);
+			   "mac vdev %d peer delete %pM sta %pK (sta gone)\n",
+			   arvif->vdev_id, sta->addr, sta);
 
 		ret = ath10k_peer_delete(ar, arvif->vdev_id, sta->addr);
 		if (ret)
@@ -6001,7 +6039,7 @@
 				continue;
 
 			if (peer->sta == sta) {
-				ath10k_warn(ar, "found sta peer %pM (ptr %p id %d) entry on vdev %i after it was supposedly removed\n",
+				ath10k_warn(ar, "found sta peer %pM (ptr %pK id %d) entry on vdev %i after it was supposedly removed\n",
 					    sta->addr, peer, i, arvif->vdev_id);
 				peer->sta = NULL;
 
@@ -6538,7 +6576,7 @@
 		goto exit;
 	}
 
-	ath10k_mac_update_bss_chan_survey(ar, survey->channel);
+	ath10k_mac_update_bss_chan_survey(ar, &sband->channels[idx]);
 
 	spin_lock_bh(&ar->data_lock);
 	memcpy(survey, ar_survey, sizeof(*survey));
@@ -7134,7 +7172,7 @@
 	struct ath10k *ar = hw->priv;
 
 	ath10k_dbg(ar, ATH10K_DBG_MAC,
-		   "mac chanctx add freq %hu width %d ptr %p\n",
+		   "mac chanctx add freq %hu width %d ptr %pK\n",
 		   ctx->def.chan->center_freq, ctx->def.width, ctx);
 
 	mutex_lock(&ar->conf_mutex);
@@ -7158,7 +7196,7 @@
 	struct ath10k *ar = hw->priv;
 
 	ath10k_dbg(ar, ATH10K_DBG_MAC,
-		   "mac chanctx remove freq %hu width %d ptr %p\n",
+		   "mac chanctx remove freq %hu width %d ptr %pK\n",
 		   ctx->def.chan->center_freq, ctx->def.width, ctx);
 
 	mutex_lock(&ar->conf_mutex);
@@ -7223,7 +7261,7 @@
 	mutex_lock(&ar->conf_mutex);
 
 	ath10k_dbg(ar, ATH10K_DBG_MAC,
-		   "mac chanctx change freq %hu width %d ptr %p changed %x\n",
+		   "mac chanctx change freq %hu width %d ptr %pK changed %x\n",
 		   ctx->def.chan->center_freq, ctx->def.width, ctx, changed);
 
 	/* This shouldn't really happen because channel switching should use
@@ -7281,7 +7319,7 @@
 	mutex_lock(&ar->conf_mutex);
 
 	ath10k_dbg(ar, ATH10K_DBG_MAC,
-		   "mac chanctx assign ptr %p vdev_id %i\n",
+		   "mac chanctx assign ptr %pK vdev_id %i\n",
 		   ctx, arvif->vdev_id);
 
 	if (WARN_ON(arvif->is_started)) {
@@ -7342,7 +7380,7 @@
 	mutex_lock(&ar->conf_mutex);
 
 	ath10k_dbg(ar, ATH10K_DBG_MAC,
-		   "mac chanctx unassign ptr %p vdev_id %i\n",
+		   "mac chanctx unassign ptr %pK vdev_id %i\n",
 		   ctx, arvif->vdev_id);
 
 	WARN_ON(!arvif->is_started);
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index 9a22c47..0457e31 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -1506,12 +1506,10 @@
 	ath10k_ce_per_engine_service(ar, pipe);
 }
 
-void ath10k_pci_kill_tasklet(struct ath10k *ar)
+static void ath10k_pci_rx_retry_sync(struct ath10k *ar)
 {
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 
-	tasklet_kill(&ar_pci->intr_tq);
-
 	del_timer_sync(&ar_pci->rx_post_retry);
 }
 
@@ -1570,7 +1568,7 @@
 						 ul_pipe, dl_pipe);
 }
 
-static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
+void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar)
 {
 	u32 val;
 
@@ -1693,14 +1691,12 @@
 static void ath10k_pci_tx_pipe_cleanup(struct ath10k_pci_pipe *pci_pipe)
 {
 	struct ath10k *ar;
-	struct ath10k_pci *ar_pci;
 	struct ath10k_ce_pipe *ce_pipe;
 	struct ath10k_ce_ring *ce_ring;
 	struct sk_buff *skb;
 	int i;
 
 	ar = pci_pipe->hif_ce_state;
-	ar_pci = ath10k_pci_priv(ar);
 	ce_pipe = pci_pipe->ce_hdl;
 	ce_ring = ce_pipe->src_ring;
 
@@ -1753,7 +1749,7 @@
 
 void ath10k_pci_flush(struct ath10k *ar)
 {
-	ath10k_pci_kill_tasklet(ar);
+	ath10k_pci_rx_retry_sync(ar);
 	ath10k_pci_buffer_cleanup(ar);
 }
 
@@ -1780,6 +1776,8 @@
 	ath10k_pci_irq_disable(ar);
 	ath10k_pci_irq_sync(ar);
 	ath10k_pci_flush(ar);
+	napi_synchronize(&ar->napi);
+	napi_disable(&ar->napi);
 
 	spin_lock_irqsave(&ar_pci->ps_lock, flags);
 	WARN_ON(ar_pci->ps_wake_refcount > 0);
@@ -2533,6 +2531,7 @@
 		ath10k_err(ar, "could not wake up target CPU: %d\n", ret);
 		goto err_ce;
 	}
+	napi_enable(&ar->napi);
 
 	return 0;
 
@@ -2725,7 +2724,7 @@
 	return 0;
 
 err_free:
-	kfree(data);
+	kfree(caldata);
 
 	return -EINVAL;
 }
@@ -2772,35 +2771,53 @@
 		return IRQ_NONE;
 	}
 
-	if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) {
-		if (!ath10k_pci_irq_pending(ar))
-			return IRQ_NONE;
+	if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) &&
+	    !ath10k_pci_irq_pending(ar))
+		return IRQ_NONE;
 
-		ath10k_pci_disable_and_clear_legacy_irq(ar);
-	}
-
-	tasklet_schedule(&ar_pci->intr_tq);
+	ath10k_pci_disable_and_clear_legacy_irq(ar);
+	ath10k_pci_irq_msi_fw_mask(ar);
+	napi_schedule(&ar->napi);
 
 	return IRQ_HANDLED;
 }
 
-static void ath10k_pci_tasklet(unsigned long data)
+static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget)
 {
-	struct ath10k *ar = (struct ath10k *)data;
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	struct ath10k *ar = container_of(ctx, struct ath10k, napi);
+	int done = 0;
 
 	if (ath10k_pci_has_fw_crashed(ar)) {
-		ath10k_pci_irq_disable(ar);
 		ath10k_pci_fw_crashed_clear(ar);
 		ath10k_pci_fw_crashed_dump(ar);
-		return;
+		napi_complete(ctx);
+		return done;
 	}
 
 	ath10k_ce_per_engine_service_any(ar);
 
-	/* Re-enable legacy irq that was disabled in the irq handler */
-	if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY)
+	done = ath10k_htt_txrx_compl_task(ar, budget);
+
+	if (done < budget) {
+		napi_complete(ctx);
+		/* In case of MSI, it is possible that interrupts are received
+		 * while NAPI poll is inprogress. So pending interrupts that are
+		 * received after processing all copy engine pipes by NAPI poll
+		 * will not be handled again. This is causing failure to
+		 * complete boot sequence in x86 platform. So before enabling
+		 * interrupts safer to check for pending interrupts for
+		 * immediate servicing.
+		 */
+		if (CE_INTERRUPT_SUMMARY(ar)) {
+			napi_reschedule(ctx);
+			goto out;
+		}
 		ath10k_pci_enable_legacy_irq(ar);
+		ath10k_pci_irq_msi_fw_unmask(ar);
+	}
+
+out:
+	return done;
 }
 
 static int ath10k_pci_request_irq_msi(struct ath10k *ar)
@@ -2858,11 +2875,10 @@
 	free_irq(ar_pci->pdev->irq, ar);
 }
 
-void ath10k_pci_init_irq_tasklets(struct ath10k *ar)
+void ath10k_pci_init_napi(struct ath10k *ar)
 {
-	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
-
-	tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar);
+	netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll,
+		       ATH10K_NAPI_BUDGET);
 }
 
 static int ath10k_pci_init_irq(struct ath10k *ar)
@@ -2870,7 +2886,7 @@
 	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	int ret;
 
-	ath10k_pci_init_irq_tasklets(ar);
+	ath10k_pci_init_napi(ar);
 
 	if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO)
 		ath10k_info(ar, "limiting irq mode to: %d\n",
@@ -3062,7 +3078,7 @@
 		goto err_master;
 	}
 
-	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%p\n", ar_pci->mem);
+	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%pK\n", ar_pci->mem);
 	return 0;
 
 err_master:
@@ -3131,7 +3147,8 @@
 
 void ath10k_pci_release_resource(struct ath10k *ar)
 {
-	ath10k_pci_kill_tasklet(ar);
+	ath10k_pci_rx_retry_sync(ar);
+	netif_napi_del(&ar->napi);
 	ath10k_pci_ce_deinit(ar);
 	ath10k_pci_free_pipes(ar);
 }
@@ -3162,7 +3179,6 @@
 		pci_hard_reset = ath10k_pci_qca988x_chip_reset;
 		break;
 	case QCA9887_1_0_DEVICE_ID:
-		dev_warn(&pdev->dev, "QCA9887 support is still experimental, there are likely bugs. You have been warned.\n");
 		hw_rev = ATH10K_HW_QCA9887;
 		pci_ps = false;
 		pci_soft_reset = ath10k_pci_warm_reset;
@@ -3298,7 +3314,7 @@
 
 err_free_irq:
 	ath10k_pci_free_irq(ar);
-	ath10k_pci_kill_tasklet(ar);
+	ath10k_pci_rx_retry_sync(ar);
 
 err_deinit_irq:
 	ath10k_pci_deinit_irq(ar);
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index 6eca1df..9854ad5 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -177,8 +177,6 @@
 	/* Operating interrupt mode */
 	enum ath10k_pci_irq_mode oper_irq_mode;
 
-	struct tasklet_struct intr_tq;
-
 	struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX];
 
 	/* Copy Engine used for Diagnostic Accesses */
@@ -294,8 +292,7 @@
 void ath10k_pci_free_pipes(struct ath10k *ar);
 void ath10k_pci_rx_replenish_retry(unsigned long ptr);
 void ath10k_pci_ce_deinit(struct ath10k *ar);
-void ath10k_pci_init_irq_tasklets(struct ath10k *ar);
-void ath10k_pci_kill_tasklet(struct ath10k *ar);
+void ath10k_pci_init_napi(struct ath10k *ar);
 int ath10k_pci_init_pipes(struct ath10k *ar);
 int ath10k_pci_init_config(struct ath10k *ar);
 void ath10k_pci_rx_post(struct ath10k *ar);
@@ -303,6 +300,7 @@
 void ath10k_pci_enable_legacy_irq(struct ath10k *ar);
 bool ath10k_pci_irq_pending(struct ath10k *ar);
 void ath10k_pci_disable_and_clear_legacy_irq(struct ath10k *ar);
+void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar);
 int ath10k_pci_wait_for_target_init(struct ath10k *ar);
 int ath10k_pci_setup_resource(struct ath10k *ar);
 void ath10k_pci_release_resource(struct ath10k *ar);
diff --git a/drivers/net/wireless/ath/ath10k/swap.c b/drivers/net/wireless/ath/ath10k/swap.c
index 0c5f586..adf4592 100644
--- a/drivers/net/wireless/ath/ath10k/swap.c
+++ b/drivers/net/wireless/ath/ath10k/swap.c
@@ -134,17 +134,18 @@
 	return seg_info;
 }
 
-int ath10k_swap_code_seg_configure(struct ath10k *ar)
+int ath10k_swap_code_seg_configure(struct ath10k *ar,
+				   const struct ath10k_fw_file *fw_file)
 {
 	int ret;
 	struct ath10k_swap_code_seg_info *seg_info = NULL;
 
-	if (!ar->swap.firmware_swap_code_seg_info)
+	if (!fw_file->firmware_swap_code_seg_info)
 		return 0;
 
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found firmware code swap binary\n");
 
-	seg_info = ar->swap.firmware_swap_code_seg_info;
+	seg_info = fw_file->firmware_swap_code_seg_info;
 
 	ret = ath10k_bmi_write_memory(ar, seg_info->target_addr,
 				      &seg_info->seg_hw_info,
@@ -158,28 +159,29 @@
 	return 0;
 }
 
-void ath10k_swap_code_seg_release(struct ath10k *ar)
+void ath10k_swap_code_seg_release(struct ath10k *ar,
+				  struct ath10k_fw_file *fw_file)
 {
-	ath10k_swap_code_seg_free(ar, ar->swap.firmware_swap_code_seg_info);
+	ath10k_swap_code_seg_free(ar, fw_file->firmware_swap_code_seg_info);
 
 	/* FIXME: these two assignments look to bein wrong place! Shouldn't
 	 * they be in ath10k_core_free_firmware_files() like the rest?
 	 */
-	ar->normal_mode_fw.fw_file.codeswap_data = NULL;
-	ar->normal_mode_fw.fw_file.codeswap_len = 0;
+	fw_file->codeswap_data = NULL;
+	fw_file->codeswap_len = 0;
 
-	ar->swap.firmware_swap_code_seg_info = NULL;
+	fw_file->firmware_swap_code_seg_info = NULL;
 }
 
-int ath10k_swap_code_seg_init(struct ath10k *ar)
+int ath10k_swap_code_seg_init(struct ath10k *ar, struct ath10k_fw_file *fw_file)
 {
 	int ret;
 	struct ath10k_swap_code_seg_info *seg_info;
 	const void *codeswap_data;
 	size_t codeswap_len;
 
-	codeswap_data = ar->normal_mode_fw.fw_file.codeswap_data;
-	codeswap_len = ar->normal_mode_fw.fw_file.codeswap_len;
+	codeswap_data = fw_file->codeswap_data;
+	codeswap_len = fw_file->codeswap_len;
 
 	if (!codeswap_len || !codeswap_data)
 		return 0;
@@ -200,7 +202,7 @@
 		return ret;
 	}
 
-	ar->swap.firmware_swap_code_seg_info = seg_info;
+	fw_file->firmware_swap_code_seg_info = seg_info;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/ath/ath10k/swap.h b/drivers/net/wireless/ath/ath10k/swap.h
index 36991c7..f5dc047 100644
--- a/drivers/net/wireless/ath/ath10k/swap.h
+++ b/drivers/net/wireless/ath/ath10k/swap.h
@@ -23,6 +23,8 @@
 /* Currently only one swap segment is supported */
 #define ATH10K_SWAP_CODE_SEG_NUM_SUPPORTED	1
 
+struct ath10k_fw_file;
+
 struct ath10k_swap_code_seg_tlv {
 	__le32 address;
 	__le32 length;
@@ -58,8 +60,11 @@
 	dma_addr_t paddr[ATH10K_SWAP_CODE_SEG_NUM_SUPPORTED];
 };
 
-int ath10k_swap_code_seg_configure(struct ath10k *ar);
-void ath10k_swap_code_seg_release(struct ath10k *ar);
-int ath10k_swap_code_seg_init(struct ath10k *ar);
+int ath10k_swap_code_seg_configure(struct ath10k *ar,
+				   const struct ath10k_fw_file *fw_file);
+void ath10k_swap_code_seg_release(struct ath10k *ar,
+				  struct ath10k_fw_file *fw_file);
+int ath10k_swap_code_seg_init(struct ath10k *ar,
+			      struct ath10k_fw_file *fw_file);
 
 #endif
diff --git a/drivers/net/wireless/ath/ath10k/targaddrs.h b/drivers/net/wireless/ath/ath10k/targaddrs.h
index aaf53a8..a47cab4 100644
--- a/drivers/net/wireless/ath/ath10k/targaddrs.h
+++ b/drivers/net/wireless/ath/ath10k/targaddrs.h
@@ -405,7 +405,7 @@
  * 1. target firmware would check magic number and if it's a match, firmware
  *    would consider the bits[0:15] are valid and base on that to calculate
  *    the end of DRAM. Early allocation would be located at that area and
- *    may be reclaimed when necesary
+ *    may be reclaimed when necessary
  * 2. if no magic number is found, early allocation would happen at "_end"
  *    symbol of ROM which is located before the app-data and might NOT be
  *    re-claimable. If this is adopted, link script should keep this in
diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c
index 120f423..ed85f93 100644
--- a/drivers/net/wireless/ath/ath10k/testmode.c
+++ b/drivers/net/wireless/ath/ath10k/testmode.c
@@ -23,6 +23,7 @@
 #include "wmi.h"
 #include "hif.h"
 #include "hw.h"
+#include "core.h"
 
 #include "testmode_i.h"
 
@@ -45,7 +46,7 @@
 	int ret;
 
 	ath10k_dbg(ar, ATH10K_DBG_TESTMODE,
-		   "testmode event wmi cmd_id %d skb %p skb->len %d\n",
+		   "testmode event wmi cmd_id %d skb %pK skb->len %d\n",
 		   cmd_id, skb, skb->len);
 
 	ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", skb->data, skb->len);
@@ -240,6 +241,18 @@
 		goto err;
 	}
 
+	if (ar->testmode.utf_mode_fw.fw_file.codeswap_data &&
+	    ar->testmode.utf_mode_fw.fw_file.codeswap_len) {
+		ret = ath10k_swap_code_seg_init(ar,
+						&ar->testmode.utf_mode_fw.fw_file);
+		if (ret) {
+			ath10k_warn(ar,
+				    "failed to init utf code swap segment: %d\n",
+				    ret);
+			goto err_release_utf_mode_fw;
+		}
+	}
+
 	spin_lock_bh(&ar->data_lock);
 	ar->testmode.utf_monitor = true;
 	spin_unlock_bh(&ar->data_lock);
@@ -279,6 +292,11 @@
 	ath10k_hif_power_down(ar);
 
 err_release_utf_mode_fw:
+	if (ar->testmode.utf_mode_fw.fw_file.codeswap_data &&
+	    ar->testmode.utf_mode_fw.fw_file.codeswap_len)
+		ath10k_swap_code_seg_release(ar,
+					     &ar->testmode.utf_mode_fw.fw_file);
+
 	release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware);
 	ar->testmode.utf_mode_fw.fw_file.firmware = NULL;
 
@@ -301,6 +319,11 @@
 
 	spin_unlock_bh(&ar->data_lock);
 
+	if (ar->testmode.utf_mode_fw.fw_file.codeswap_data &&
+	    ar->testmode.utf_mode_fw.fw_file.codeswap_len)
+		ath10k_swap_code_seg_release(ar,
+					     &ar->testmode.utf_mode_fw.fw_file);
+
 	release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware);
 	ar->testmode.utf_mode_fw.fw_file.firmware = NULL;
 
@@ -360,7 +383,7 @@
 	cmd_id = nla_get_u32(tb[ATH10K_TM_ATTR_WMI_CMDID]);
 
 	ath10k_dbg(ar, ATH10K_DBG_TESTMODE,
-		   "testmode cmd wmi cmd_id %d buf %p buf_len %d\n",
+		   "testmode cmd wmi cmd_id %d buf %pK buf_len %d\n",
 		   cmd_id, buf, buf_len);
 
 	ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", buf, buf_len);
diff --git a/drivers/net/wireless/ath/ath10k/thermal.c b/drivers/net/wireless/ath/ath10k/thermal.c
index 444b52c..0a47269 100644
--- a/drivers/net/wireless/ath/ath10k/thermal.c
+++ b/drivers/net/wireless/ath/ath10k/thermal.c
@@ -192,7 +192,7 @@
 
 	/* Avoid linking error on devm_hwmon_device_register_with_groups, I
 	 * guess linux/hwmon.h is missing proper stubs. */
-	if (!config_enabled(CONFIG_HWMON))
+	if (!IS_REACHABLE(CONFIG_HWMON))
 		return 0;
 
 	hwmon_dev = devm_hwmon_device_register_with_groups(ar->dev,
diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index b29a86a..9852c5d 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -44,7 +44,7 @@
 	complete(&ar->offchan_tx_completed);
 	ar->offchan_tx_skb = NULL; /* just for sanity */
 
-	ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %p\n", skb);
+	ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %pK\n", skb);
 out:
 	spin_unlock_bh(&ar->data_lock);
 }
@@ -119,8 +119,6 @@
 	ieee80211_tx_status(htt->ar->hw, msdu);
 	/* we do not own the msdu anymore */
 
-	ath10k_mac_tx_push_pending(ar);
-
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h
index 64ebd30..c9a8bb1 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-ops.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h
@@ -51,6 +51,8 @@
 			    struct wmi_roam_ev_arg *arg);
 	int (*pull_wow_event)(struct ath10k *ar, struct sk_buff *skb,
 			      struct wmi_wow_ev_arg *arg);
+	int (*pull_echo_ev)(struct ath10k *ar, struct sk_buff *skb,
+			    struct wmi_echo_ev_arg *arg);
 	enum wmi_txbf_conf (*get_txbf_conf_scheme)(struct ath10k *ar);
 
 	struct sk_buff *(*gen_pdev_suspend)(struct ath10k *ar, u32 suspend_opt);
@@ -123,7 +125,7 @@
 					     enum wmi_force_fw_hang_type type,
 					     u32 delay_ms);
 	struct sk_buff *(*gen_mgmt_tx)(struct ath10k *ar, struct sk_buff *skb);
-	struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u32 module_enable,
+	struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u64 module_enable,
 					  u32 log_level);
 	struct sk_buff *(*gen_pktlog_enable)(struct ath10k *ar, u32 filter);
 	struct sk_buff *(*gen_pktlog_disable)(struct ath10k *ar);
@@ -194,6 +196,7 @@
 	struct sk_buff *(*gen_pdev_bss_chan_info_req)
 					(struct ath10k *ar,
 					 enum wmi_bss_survey_req_type type);
+	struct sk_buff *(*gen_echo)(struct ath10k *ar, u32 value);
 };
 
 int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id);
@@ -349,6 +352,16 @@
 	return ar->wmi.ops->pull_wow_event(ar, skb, arg);
 }
 
+static inline int
+ath10k_wmi_pull_echo_ev(struct ath10k *ar, struct sk_buff *skb,
+			struct wmi_echo_ev_arg *arg)
+{
+	if (!ar->wmi.ops->pull_echo_ev)
+		return -EOPNOTSUPP;
+
+	return ar->wmi.ops->pull_echo_ev(ar, skb, arg);
+}
+
 static inline enum wmi_txbf_conf
 ath10k_wmi_get_txbf_conf_scheme(struct ath10k *ar)
 {
@@ -932,7 +945,7 @@
 }
 
 static inline int
-ath10k_wmi_dbglog_cfg(struct ath10k *ar, u32 module_enable, u32 log_level)
+ath10k_wmi_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level)
 {
 	struct sk_buff *skb;
 
@@ -1382,4 +1395,20 @@
 				   wmi->cmd->pdev_bss_chan_info_request_cmdid);
 }
 
+static inline int
+ath10k_wmi_echo(struct ath10k *ar, u32 value)
+{
+	struct ath10k_wmi *wmi = &ar->wmi;
+	struct sk_buff *skb;
+
+	if (!wmi->ops->gen_echo)
+		return -EOPNOTSUPP;
+
+	skb = wmi->ops->gen_echo(ar, value);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	return ath10k_wmi_cmd_send(ar, skb, wmi->cmd->echo_cmdid);
+}
+
 #endif
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index e09337e..e64f593 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -1223,6 +1223,33 @@
 	return 0;
 }
 
+static int ath10k_wmi_tlv_op_pull_echo_ev(struct ath10k *ar,
+					  struct sk_buff *skb,
+					  struct wmi_echo_ev_arg *arg)
+{
+	const void **tb;
+	const struct wmi_echo_event *ev;
+	int ret;
+
+	tb = ath10k_wmi_tlv_parse_alloc(ar, skb->data, skb->len, GFP_ATOMIC);
+	if (IS_ERR(tb)) {
+		ret = PTR_ERR(tb);
+		ath10k_warn(ar, "failed to parse tlv: %d\n", ret);
+		return ret;
+	}
+
+	ev = tb[WMI_TLV_TAG_STRUCT_ECHO_EVENT];
+	if (!ev) {
+		kfree(tb);
+		return -EPROTO;
+	}
+
+	arg->value = ev->value;
+
+	kfree(tb);
+	return 0;
+}
+
 static struct sk_buff *
 ath10k_wmi_tlv_op_gen_pdev_suspend(struct ath10k *ar, u32 opt)
 {
@@ -2441,7 +2468,7 @@
 }
 
 static struct sk_buff *
-ath10k_wmi_tlv_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable,
+ath10k_wmi_tlv_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable,
 				 u32 log_level) {
 	struct wmi_tlv_dbglog_cmd *cmd;
 	struct wmi_tlv *tlv;
@@ -3081,6 +3108,34 @@
 	return skb;
 }
 
+static struct sk_buff *
+ath10k_wmi_tlv_op_gen_echo(struct ath10k *ar, u32 value)
+{
+	struct wmi_echo_cmd *cmd;
+	struct wmi_tlv *tlv;
+	struct sk_buff *skb;
+	void *ptr;
+	size_t len;
+
+	len = sizeof(*tlv) + sizeof(*cmd);
+	skb = ath10k_wmi_alloc_skb(ar, len);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	ptr = (void *)skb->data;
+	tlv = ptr;
+	tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_ECHO_CMD);
+	tlv->len = __cpu_to_le16(sizeof(*cmd));
+	cmd = (void *)tlv->value;
+	cmd->value = cpu_to_le32(value);
+
+	ptr += sizeof(*tlv);
+	ptr += sizeof(*cmd);
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv echo value 0x%08x\n", value);
+	return skb;
+}
+
 /****************/
 /* TLV mappings */
 /****************/
@@ -3429,6 +3484,7 @@
 	.pull_fw_stats = ath10k_wmi_tlv_op_pull_fw_stats,
 	.pull_roam_ev = ath10k_wmi_tlv_op_pull_roam_ev,
 	.pull_wow_event = ath10k_wmi_tlv_op_pull_wow_ev,
+	.pull_echo_ev = ath10k_wmi_tlv_op_pull_echo_ev,
 	.get_txbf_conf_scheme = ath10k_wmi_tlv_txbf_conf_scheme,
 
 	.gen_pdev_suspend = ath10k_wmi_tlv_op_gen_pdev_suspend,
@@ -3485,6 +3541,7 @@
 	.gen_adaptive_qcs = ath10k_wmi_tlv_op_gen_adaptive_qcs,
 	.fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill,
 	.get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype,
+	.gen_echo = ath10k_wmi_tlv_op_gen_echo,
 };
 
 static const struct wmi_peer_flags_map wmi_tlv_peer_flags_map = {
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index d246288..54df425 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -29,6 +29,9 @@
 #include "p2p.h"
 #include "hw.h"
 
+#define ATH10K_WMI_BARRIER_ECHO_ID 0xBA991E9
+#define ATH10K_WMI_BARRIER_TIMEOUT_HZ (3 * HZ)
+
 /* MAIN WMI cmd track */
 static struct wmi_cmd_map wmi_cmd_map = {
 	.init_cmdid = WMI_INIT_CMDID,
@@ -1874,7 +1877,7 @@
 	ether_addr_copy(cmd->hdr.peer_macaddr.addr, ieee80211_get_DA(hdr));
 	memcpy(cmd->buf, msdu->data, msdu->len);
 
-	ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %p len %d ftype %02x stype %02x\n",
+	ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %pK len %d ftype %02x stype %02x\n",
 		   msdu, skb->len, fc & IEEE80211_FCTL_FTYPE,
 		   fc & IEEE80211_FCTL_STYPE);
 	trace_ath10k_tx_hdr(ar, skb->data, skb->len);
@@ -2240,6 +2243,29 @@
 	return 0;
 }
 
+static bool ath10k_wmi_rx_is_decrypted(struct ath10k *ar,
+				       struct ieee80211_hdr *hdr)
+{
+	if (!ieee80211_has_protected(hdr->frame_control))
+		return false;
+
+	/* FW delivers WEP Shared Auth frame with Protected Bit set and
+	 * encrypted payload. However in case of PMF it delivers decrypted
+	 * frames with Protected Bit set.
+	 */
+	if (ieee80211_is_auth(hdr->frame_control))
+		return false;
+
+	/* qca99x0 based FW delivers broadcast or multicast management frames
+	 * (ex: group privacy action frames in mesh) as encrypted payload.
+	 */
+	if (is_multicast_ether_addr(ieee80211_get_DA(hdr)) &&
+	    ar->hw_params.sw_decrypt_mcast_mgmt)
+		return false;
+
+	return true;
+}
+
 int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb)
 {
 	struct wmi_mgmt_rx_ev_arg arg = {};
@@ -2326,11 +2352,7 @@
 
 	ath10k_wmi_handle_wep_reauth(ar, skb, status);
 
-	/* FW delivers WEP Shared Auth frame with Protected Bit set and
-	 * encrypted payload. However in case of PMF it delivers decrypted
-	 * frames with Protected Bit set. */
-	if (ieee80211_has_protected(hdr->frame_control) &&
-	    !ieee80211_is_auth(hdr->frame_control)) {
+	if (ath10k_wmi_rx_is_decrypted(ar, hdr)) {
 		status->flag |= RX_FLAG_DECRYPTED;
 
 		if (!ieee80211_is_action(hdr->frame_control) &&
@@ -2347,7 +2369,7 @@
 		ath10k_mac_handle_beacon(ar, skb);
 
 	ath10k_dbg(ar, ATH10K_DBG_MGMT,
-		   "event mgmt rx skb %p len %d ftype %02x stype %02x\n",
+		   "event mgmt rx skb %pK len %d ftype %02x stype %02x\n",
 		   skb, skb->len,
 		   fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE);
 
@@ -2495,7 +2517,21 @@
 
 void ath10k_wmi_event_echo(struct ath10k *ar, struct sk_buff *skb)
 {
-	ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_ECHO_EVENTID\n");
+	struct wmi_echo_ev_arg arg = {};
+	int ret;
+
+	ret = ath10k_wmi_pull_echo_ev(ar, skb, &arg);
+	if (ret) {
+		ath10k_warn(ar, "failed to parse echo: %d\n", ret);
+		return;
+	}
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi event echo value 0x%08x\n",
+		   le32_to_cpu(arg.value));
+
+	if (le32_to_cpu(arg.value) == ATH10K_WMI_BARRIER_ECHO_ID)
+		complete(&ar->wmi.barrier);
 }
 
 int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb)
@@ -3478,6 +3514,12 @@
 			continue;
 		}
 
+		/* mac80211 would have already asked us to stop beaconing and
+		 * bring the vdev down, so continue in that case
+		 */
+		if (!arvif->is_up)
+			continue;
+
 		/* There are no completions for beacons so wait for next SWBA
 		 * before telling mac80211 to decrement CSA counter
 		 *
@@ -3527,7 +3569,6 @@
 				ath10k_warn(ar, "failed to map beacon: %d\n",
 					    ret);
 				dev_kfree_skb_any(bcn);
-				ret = -EIO;
 				goto skip;
 			}
 
@@ -4792,6 +4833,17 @@
 	return 0;
 }
 
+static int ath10k_wmi_op_pull_echo_ev(struct ath10k *ar,
+				      struct sk_buff *skb,
+				      struct wmi_echo_ev_arg *arg)
+{
+	struct wmi_echo_event *ev = (void *)skb->data;
+
+	arg->value = ev->value;
+
+	return 0;
+}
+
 int ath10k_wmi_event_ready(struct ath10k *ar, struct sk_buff *skb)
 {
 	struct wmi_rdy_ev_arg arg = {};
@@ -5124,6 +5176,7 @@
 {
 	struct wmi_cmd_hdr *cmd_hdr;
 	enum wmi_10_2_event_id id;
+	bool consumed;
 
 	cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
 	id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID);
@@ -5133,6 +5186,18 @@
 
 	trace_ath10k_wmi_event(ar, id, skb->data, skb->len);
 
+	consumed = ath10k_tm_event_wmi(ar, id, skb);
+
+	/* Ready event must be handled normally also in UTF mode so that we
+	 * know the UTF firmware has booted, others we are just bypass WMI
+	 * events to testmode.
+	 */
+	if (consumed && id != WMI_10_2_READY_EVENTID) {
+		ath10k_dbg(ar, ATH10K_DBG_WMI,
+			   "wmi testmode consumed 0x%x\n", id);
+		goto out;
+	}
+
 	switch (id) {
 	case WMI_10_2_MGMT_RX_EVENTID:
 		ath10k_wmi_event_mgmt_rx(ar, skb);
@@ -5248,6 +5313,7 @@
 {
 	struct wmi_cmd_hdr *cmd_hdr;
 	enum wmi_10_4_event_id id;
+	bool consumed;
 
 	cmd_hdr = (struct wmi_cmd_hdr *)skb->data;
 	id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID);
@@ -5257,6 +5323,18 @@
 
 	trace_ath10k_wmi_event(ar, id, skb->data, skb->len);
 
+	consumed = ath10k_tm_event_wmi(ar, id, skb);
+
+	/* Ready event must be handled normally also in UTF mode so that we
+	 * know the UTF firmware has booted, others we are just bypass WMI
+	 * events to testmode.
+	 */
+	if (consumed && id != WMI_10_4_READY_EVENTID) {
+		ath10k_dbg(ar, ATH10K_DBG_WMI,
+			   "wmi testmode consumed 0x%x\n", id);
+		goto out;
+	}
+
 	switch (id) {
 	case WMI_10_4_MGMT_RX_EVENTID:
 		ath10k_wmi_event_mgmt_rx(ar, skb);
@@ -5306,6 +5384,7 @@
 		break;
 	case WMI_10_4_WOW_WAKEUP_HOST_EVENTID:
 	case WMI_10_4_PEER_RATECODE_LIST_EVENTID:
+	case WMI_10_4_WDS_PEER_EVENTID:
 		ath10k_dbg(ar, ATH10K_DBG_WMI,
 			   "received event id %d not implemented\n", id);
 		break;
@@ -6863,7 +6942,7 @@
 }
 
 static struct sk_buff *
-ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable,
+ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable,
 			     u32 log_level)
 {
 	struct wmi_dbglog_cfg_cmd *cmd;
@@ -6901,6 +6980,44 @@
 }
 
 static struct sk_buff *
+ath10k_wmi_10_4_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable,
+				  u32 log_level)
+{
+	struct wmi_10_4_dbglog_cfg_cmd *cmd;
+	struct sk_buff *skb;
+	u32 cfg;
+
+	skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	cmd = (struct wmi_10_4_dbglog_cfg_cmd *)skb->data;
+
+	if (module_enable) {
+		cfg = SM(log_level,
+			 ATH10K_DBGLOG_CFG_LOG_LVL);
+	} else {
+		/* set back defaults, all modules with WARN level */
+		cfg = SM(ATH10K_DBGLOG_LEVEL_WARN,
+			 ATH10K_DBGLOG_CFG_LOG_LVL);
+		module_enable = ~0;
+	}
+
+	cmd->module_enable = __cpu_to_le64(module_enable);
+	cmd->module_valid = __cpu_to_le64(~0);
+	cmd->config_enable = __cpu_to_le32(cfg);
+	cmd->config_valid = __cpu_to_le32(ATH10K_DBGLOG_CFG_LOG_LVL_MASK);
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi dbglog cfg modules 0x%016llx 0x%016llx config %08x %08x\n",
+		   __le64_to_cpu(cmd->module_enable),
+		   __le64_to_cpu(cmd->module_valid),
+		   __le32_to_cpu(cmd->config_enable),
+		   __le32_to_cpu(cmd->config_valid));
+	return skb;
+}
+
+static struct sk_buff *
 ath10k_wmi_op_gen_pktlog_enable(struct ath10k *ar, u32 ev_bitmap)
 {
 	struct wmi_pdev_pktlog_enable_cmd *cmd;
@@ -7649,6 +7766,48 @@
 	return skb;
 }
 
+static struct sk_buff *
+ath10k_wmi_op_gen_echo(struct ath10k *ar, u32 value)
+{
+	struct wmi_echo_cmd *cmd;
+	struct sk_buff *skb;
+
+	skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd));
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	cmd = (struct wmi_echo_cmd *)skb->data;
+	cmd->value = cpu_to_le32(value);
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi echo value 0x%08x\n", value);
+	return skb;
+}
+
+int
+ath10k_wmi_barrier(struct ath10k *ar)
+{
+	int ret;
+	int time_left;
+
+	spin_lock_bh(&ar->data_lock);
+	reinit_completion(&ar->wmi.barrier);
+	spin_unlock_bh(&ar->data_lock);
+
+	ret = ath10k_wmi_echo(ar, ATH10K_WMI_BARRIER_ECHO_ID);
+	if (ret) {
+		ath10k_warn(ar, "failed to submit wmi echo: %d\n", ret);
+		return ret;
+	}
+
+	time_left = wait_for_completion_timeout(&ar->wmi.barrier,
+						ATH10K_WMI_BARRIER_TIMEOUT_HZ);
+	if (!time_left)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
 static const struct wmi_ops wmi_ops = {
 	.rx = ath10k_wmi_op_rx,
 	.map_svc = wmi_main_svc_map,
@@ -7665,6 +7824,7 @@
 	.pull_rdy = ath10k_wmi_op_pull_rdy_ev,
 	.pull_fw_stats = ath10k_wmi_main_op_pull_fw_stats,
 	.pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
+	.pull_echo_ev = ath10k_wmi_op_pull_echo_ev,
 
 	.gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
 	.gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
@@ -7709,6 +7869,7 @@
 	.gen_delba_send = ath10k_wmi_op_gen_delba_send,
 	.fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill,
 	.get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype,
+	.gen_echo = ath10k_wmi_op_gen_echo,
 	/* .gen_bcn_tmpl not implemented */
 	/* .gen_prb_tmpl not implemented */
 	/* .gen_p2p_go_bcn_ie not implemented */
@@ -7738,6 +7899,7 @@
 	.pull_phyerr = ath10k_wmi_op_pull_phyerr_ev,
 	.pull_rdy = ath10k_wmi_op_pull_rdy_ev,
 	.pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
+	.pull_echo_ev = ath10k_wmi_op_pull_echo_ev,
 
 	.gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
 	.gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
@@ -7777,6 +7939,7 @@
 	.gen_delba_send = ath10k_wmi_op_gen_delba_send,
 	.fw_stats_fill = ath10k_wmi_10x_op_fw_stats_fill,
 	.get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype,
+	.gen_echo = ath10k_wmi_op_gen_echo,
 	/* .gen_bcn_tmpl not implemented */
 	/* .gen_prb_tmpl not implemented */
 	/* .gen_p2p_go_bcn_ie not implemented */
@@ -7796,6 +7959,7 @@
 	.pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev,
 	.gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd,
 	.gen_start_scan = ath10k_wmi_10x_op_gen_start_scan,
+	.gen_echo = ath10k_wmi_op_gen_echo,
 
 	.pull_scan = ath10k_wmi_op_pull_scan_ev,
 	.pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev,
@@ -7807,6 +7971,7 @@
 	.pull_phyerr = ath10k_wmi_op_pull_phyerr_ev,
 	.pull_rdy = ath10k_wmi_op_pull_rdy_ev,
 	.pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
+	.pull_echo_ev = ath10k_wmi_op_pull_echo_ev,
 
 	.gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
 	.gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
@@ -7862,6 +8027,7 @@
 	.pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev,
 	.gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd,
 	.gen_start_scan = ath10k_wmi_10x_op_gen_start_scan,
+	.gen_echo = ath10k_wmi_op_gen_echo,
 
 	.pull_scan = ath10k_wmi_op_pull_scan_ev,
 	.pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev,
@@ -7873,6 +8039,7 @@
 	.pull_phyerr = ath10k_wmi_op_pull_phyerr_ev,
 	.pull_rdy = ath10k_wmi_op_pull_rdy_ev,
 	.pull_roam_ev = ath10k_wmi_op_pull_roam_ev,
+	.pull_echo_ev = ath10k_wmi_op_pull_echo_ev,
 
 	.gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend,
 	.gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume,
@@ -7968,7 +8135,7 @@
 	.gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm,
 	.gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang,
 	.gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx,
-	.gen_dbglog_cfg = ath10k_wmi_op_gen_dbglog_cfg,
+	.gen_dbglog_cfg = ath10k_wmi_10_4_op_gen_dbglog_cfg,
 	.gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable,
 	.gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable,
 	.gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode,
@@ -7980,10 +8147,12 @@
 	.ext_resource_config = ath10k_wmi_10_4_ext_resource_config,
 
 	/* shared with 10.2 */
+	.pull_echo_ev = ath10k_wmi_op_pull_echo_ev,
 	.gen_request_stats = ath10k_wmi_op_gen_request_stats,
 	.gen_pdev_get_temperature = ath10k_wmi_10_2_op_gen_pdev_get_temperature,
 	.get_vdev_subtype = ath10k_wmi_10_4_op_get_vdev_subtype,
 	.gen_pdev_bss_chan_info_req = ath10k_wmi_10_2_op_gen_pdev_bss_chan_info,
+	.gen_echo = ath10k_wmi_op_gen_echo,
 };
 
 int ath10k_wmi_attach(struct ath10k *ar)
@@ -8036,6 +8205,7 @@
 
 	init_completion(&ar->wmi.service_ready);
 	init_completion(&ar->wmi.unified_ready);
+	init_completion(&ar->wmi.barrier);
 
 	INIT_WORK(&ar->svc_rdy_work, ath10k_wmi_event_service_ready_work);
 
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 3ef4688..1b243c8 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -55,7 +55,7 @@
  *    type.
  *
  * 6. Comment each parameter part of the WMI command/event structure by
- *    using the 2 stars at the begining of C comment instead of one star to
+ *    using the 2 stars at the beginning of C comment instead of one star to
  *    enable HTML document generation using Doxygen.
  *
  */
@@ -180,6 +180,7 @@
 	WMI_SERVICE_MESH_NON_11S,
 	WMI_SERVICE_PEER_STATS,
 	WMI_SERVICE_RESTRT_CHNL_SUPPORT,
+	WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT,
 	WMI_SERVICE_TX_MODE_PUSH_ONLY,
 	WMI_SERVICE_TX_MODE_PUSH_PULL,
 	WMI_SERVICE_TX_MODE_DYNAMIC,
@@ -305,6 +306,7 @@
 	WMI_10_4_SERVICE_RESTRT_CHNL_SUPPORT,
 	WMI_10_4_SERVICE_PEER_STATS,
 	WMI_10_4_SERVICE_MESH_11S,
+	WMI_10_4_SERVICE_PERIODIC_CHAN_STAT_SUPPORT,
 	WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY,
 	WMI_10_4_SERVICE_TX_MODE_PUSH_PULL,
 	WMI_10_4_SERVICE_TX_MODE_DYNAMIC,
@@ -402,6 +404,7 @@
 	SVCSTR(WMI_SERVICE_MESH_NON_11S);
 	SVCSTR(WMI_SERVICE_PEER_STATS);
 	SVCSTR(WMI_SERVICE_RESTRT_CHNL_SUPPORT);
+	SVCSTR(WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT);
 	SVCSTR(WMI_SERVICE_TX_MODE_PUSH_ONLY);
 	SVCSTR(WMI_SERVICE_TX_MODE_PUSH_PULL);
 	SVCSTR(WMI_SERVICE_TX_MODE_DYNAMIC);
@@ -652,6 +655,8 @@
 	       WMI_SERVICE_PEER_STATS, len);
 	SVCMAP(WMI_10_4_SERVICE_MESH_11S,
 	       WMI_SERVICE_MESH_11S, len);
+	SVCMAP(WMI_10_4_SERVICE_PERIODIC_CHAN_STAT_SUPPORT,
+	       WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, len);
 	SVCMAP(WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY,
 	       WMI_SERVICE_TX_MODE_PUSH_ONLY, len);
 	SVCMAP(WMI_10_4_SERVICE_TX_MODE_PUSH_PULL,
@@ -2082,7 +2087,7 @@
 	 * In offload mode target supports features like WOW, chatter and
 	 * other protocol offloads. In order to support them some
 	 * functionalities like reorder buffering, PN checking need to be
-	 * done in target. This determines maximum number of peers suported
+	 * done in target. This determines maximum number of peers supported
 	 * by target in offload mode
 	 */
 	__le32 num_offload_peers;
@@ -2263,7 +2268,7 @@
 	 * Max. number of Tx fragments per MSDU
 	 *  This parameter controls the max number of Tx fragments per MSDU.
 	 *  This is sent by the target as part of the WMI_SERVICE_READY event
-	 *  and is overriden by the OS shim as required.
+	 *  and is overridden by the OS shim as required.
 	 */
 	__le32 max_frag_entries;
 } __packed;
@@ -2445,7 +2450,7 @@
 	 * Max. number of Tx fragments per MSDU
 	 *  This parameter controls the max number of Tx fragments per MSDU.
 	 *  This is sent by the target as part of the WMI_SERVICE_READY event
-	 *  and is overriden by the OS shim as required.
+	 *  and is overridden by the OS shim as required.
 	 */
 	__le32 max_frag_entries;
 } __packed;
@@ -2739,7 +2744,7 @@
 	struct wmi_host_mem_chunks mem_chunks;
 } __packed;
 
-/* _10x stucture is from 10.X FW API */
+/* _10x structure is from 10.X FW API */
 struct wmi_init_cmd_10x {
 	struct wmi_resource_config_10x resource_config;
 	struct wmi_host_mem_chunks mem_chunks;
@@ -3962,7 +3967,7 @@
 	/* illegal rate phy errors  */
 	__le32 illgl_rate_phy_err;
 
-	/* wal pdev continous xretry */
+	/* wal pdev continuous xretry */
 	__le32 pdev_cont_xretry;
 
 	/* wal pdev continous xretry */
@@ -4217,10 +4222,10 @@
  */
 struct wmi_pdev_stats_base {
 	__le32 chan_nf;
-	__le32 tx_frame_count;
-	__le32 rx_frame_count;
-	__le32 rx_clear_count;
-	__le32 cycle_count;
+	__le32 tx_frame_count; /* Cycles spent transmitting frames */
+	__le32 rx_frame_count; /* Cycles spent receiving frames */
+	__le32 rx_clear_count; /* Total channel busy time, evidently */
+	__le32 cycle_count; /* Total on-channel time */
 	__le32 phy_err_count;
 	__le32 chan_tx_pwr;
 } __packed;
@@ -4456,9 +4461,9 @@
 	__le32 flags;
 	/* ssid field. Only valid for AP/GO/IBSS/BTAmp VDEV type. */
 	struct wmi_ssid ssid;
-	/* beacon/probe reponse xmit rate. Applicable for SoftAP. */
+	/* beacon/probe response xmit rate. Applicable for SoftAP. */
 	__le32 bcn_tx_rate;
-	/* beacon/probe reponse xmit power. Applicable for SoftAP. */
+	/* beacon/probe response xmit power. Applicable for SoftAP. */
 	__le32 bcn_tx_power;
 	/* number of p2p NOA descriptor(s) from scan entry */
 	__le32 num_noa_descriptors;
@@ -4686,7 +4691,7 @@
 	WMI_VDEV_PARAM_BEACON_INTERVAL,
 	/* Listen interval in TUs */
 	WMI_VDEV_PARAM_LISTEN_INTERVAL,
-	/* muticast rate in Mbps */
+	/* multicast rate in Mbps */
 	WMI_VDEV_PARAM_MULTICAST_RATE,
 	/* management frame rate in Mbps */
 	WMI_VDEV_PARAM_MGMT_TX_RATE,
@@ -4817,7 +4822,7 @@
 	WMI_10X_VDEV_PARAM_BEACON_INTERVAL,
 	/* Listen interval in TUs */
 	WMI_10X_VDEV_PARAM_LISTEN_INTERVAL,
-	/* muticast rate in Mbps */
+	/* multicast rate in Mbps */
 	WMI_10X_VDEV_PARAM_MULTICAST_RATE,
 	/* management frame rate in Mbps */
 	WMI_10X_VDEV_PARAM_MGMT_TX_RATE,
@@ -5062,7 +5067,7 @@
 } __packed;
 
 /* VDEV start response status codes */
-/* VDEV succesfully started */
+/* VDEV successfully started */
 #define WMI_INIFIED_VDEV_START_RESPONSE_STATUS_SUCCESS	0x0
 
 /* requested VDEV not found */
@@ -5378,7 +5383,7 @@
 #define WMI_UAPSD_AC_TYPE_TRIG 1
 
 #define WMI_UAPSD_AC_BIT_MASK(ac, type) \
-	((type ==  WMI_UAPSD_AC_TYPE_DELI) ? (1 << (ac << 1)) : (1 << ((ac << 1) + 1)))
+	(type == WMI_UAPSD_AC_TYPE_DELI ? 1 << (ac << 1) : 1 << ((ac << 1) + 1))
 
 enum wmi_sta_ps_param_uapsd {
 	WMI_STA_PS_UAPSD_AC0_DELIVERY_EN = (1 << 0),
@@ -6169,6 +6174,20 @@
 	__le32 config_valid;
 } __packed;
 
+struct wmi_10_4_dbglog_cfg_cmd {
+	/* bitmask to hold mod id config*/
+	__le64 module_enable;
+
+	/* see ATH10K_DBGLOG_CFG_ */
+	__le32 config_enable;
+
+	/* mask of module id bits to be changed */
+	__le64 module_valid;
+
+	/* mask of config bits to be changed, see ATH10K_DBGLOG_CFG_ */
+	__le32 config_valid;
+} __packed;
+
 enum wmi_roam_reason {
 	WMI_ROAM_REASON_BETTER_AP = 1,
 	WMI_ROAM_REASON_BEACON_MISS = 2,
@@ -6296,6 +6315,10 @@
 	__le32 rssi;
 };
 
+struct wmi_echo_ev_arg {
+	__le32 value;
+};
+
 struct wmi_pdev_temperature_event {
 	/* temperature value in Celcius degree */
 	__le32 temperature;
@@ -6624,5 +6647,6 @@
 				      char *buf);
 int ath10k_wmi_op_get_vdev_subtype(struct ath10k *ar,
 				   enum wmi_vdev_subtype subtype);
+int ath10k_wmi_barrier(struct ath10k *ar);
 
 #endif /* _WMI_H_ */
diff --git a/drivers/net/wireless/ath/ath5k/debug.c b/drivers/net/wireless/ath/ath5k/debug.c
index 929d7cc..4f8d9ed 100644
--- a/drivers/net/wireless/ath/ath5k/debug.c
+++ b/drivers/net/wireless/ath/ath5k/debug.c
@@ -909,7 +909,7 @@
 	struct ath5k_hw *ah = inode->i_private;
 	bool res;
 	int i, ret;
-	u32 eesize;
+	u32 eesize;	/* NB: in 16-bit words */
 	u16 val, *buf;
 
 	/* Get eeprom size */
@@ -932,7 +932,7 @@
 
 	/* Create buffer and read in eeprom */
 
-	buf = vmalloc(eesize);
+	buf = vmalloc(eesize * 2);
 	if (!buf) {
 		ret = -ENOMEM;
 		goto err;
@@ -952,7 +952,7 @@
 	}
 
 	ep->buf = buf;
-	ep->len = i;
+	ep->len = eesize * 2;
 
 	file->private_data = (void *)ep;
 
diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 72e2ec6..b7fe0af 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1449,14 +1449,14 @@
 		return -EIO;
 
 	if (test_bit(CONNECTED, &vif->flags)) {
-		ar->tx_pwr = 0;
+		ar->tx_pwr = 255;
 
 		if (ath6kl_wmi_get_tx_pwr_cmd(ar->wmi, vif->fw_vif_idx) != 0) {
 			ath6kl_err("ath6kl_wmi_get_tx_pwr_cmd failed\n");
 			return -EIO;
 		}
 
-		wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 0,
+		wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 255,
 						 5 * HZ);
 
 		if (signal_pending(current)) {
diff --git a/drivers/net/wireless/ath/ath6kl/hif.c b/drivers/net/wireless/ath/ath6kl/hif.c
index 18c0708..d194253 100644
--- a/drivers/net/wireless/ath/ath6kl/hif.c
+++ b/drivers/net/wireless/ath/ath6kl/hif.c
@@ -64,7 +64,7 @@
 }
 EXPORT_SYMBOL(ath6kl_hif_rw_comp_handler);
 
-#define REG_DUMP_COUNT_AR6003   60
+#define REGISTER_DUMP_COUNT     60
 #define REGISTER_DUMP_LEN_MAX   60
 
 static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar)
@@ -73,9 +73,6 @@
 	u32 i, address, regdump_addr = 0;
 	int ret;
 
-	if (ar->target_type != TARGET_TYPE_AR6003)
-		return;
-
 	/* the reg dump pointer is copied to the host interest area */
 	address = ath6kl_get_hi_item_addr(ar, HI_ITEM(hi_failure_state));
 	address = TARG_VTOP(ar->target_type, address);
@@ -95,7 +92,7 @@
 
 	/* fetch register dump data */
 	ret = ath6kl_diag_read(ar, regdump_addr, (u8 *)&regdump_val[0],
-				  REG_DUMP_COUNT_AR6003 * (sizeof(u32)));
+				  REGISTER_DUMP_COUNT * (sizeof(u32)));
 	if (ret) {
 		ath6kl_warn("failed to get register dump: %d\n", ret);
 		return;
@@ -105,9 +102,9 @@
 	ath6kl_info("hw 0x%x fw %s\n", ar->wiphy->hw_version,
 		    ar->wiphy->fw_version);
 
-	BUILD_BUG_ON(REG_DUMP_COUNT_AR6003 % 4);
+	BUILD_BUG_ON(REGISTER_DUMP_COUNT % 4);
 
-	for (i = 0; i < REG_DUMP_COUNT_AR6003; i += 4) {
+	for (i = 0; i < REGISTER_DUMP_COUNT; i += 4) {
 		ath6kl_info("%d: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n",
 			    i,
 			    le32_to_cpu(regdump_val[i]),
diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c
index b8cf04d..3fd1cc9 100644
--- a/drivers/net/wireless/ath/ath6kl/wmi.c
+++ b/drivers/net/wireless/ath/ath6kl/wmi.c
@@ -3520,7 +3520,7 @@
 	ret = ath6kl_wmi_cmd_send(wmi, if_idx, skb, WMI_AP_SET_PVB_CMDID,
 				  NO_SYNC_WMIFLAG);
 
-	return 0;
+	return ret;
 }
 
 int ath6kl_wmi_set_rx_frame_format_cmd(struct wmi *wmi, u8 if_idx,
diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig
index f68cb00..8f231c6 100644
--- a/drivers/net/wireless/ath/ath9k/Kconfig
+++ b/drivers/net/wireless/ath/ath9k/Kconfig
@@ -180,7 +180,7 @@
 config ATH9K_HWRNG
 	bool "Random number generator support"
 	depends on ATH9K && (HW_RANDOM = y || HW_RANDOM = ATH9K)
-	default y
+	default n
 	---help---
 	  This option incorporates the ADC register output as a source of
 	  randomness into Linux entropy pool (/dev/urandom and /dev/random)
diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
index 1b271b9..8eea8d2 100644
--- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
@@ -260,8 +260,8 @@
 	int cur_bin;
 	int upper, lower, cur_vit_mask;
 	int i;
-	int8_t mask_m[123];
-	int8_t mask_p[123];
+	int8_t mask_m[123] = {0};
+	int8_t mask_p[123] = {0};
 	int8_t mask_amt;
 	int tmp_mask;
 	static const int pilot_mask_reg[4] = {
@@ -274,9 +274,6 @@
 	};
 	static const int inc[4] = { 0, 100, 0, 0 };
 
-	memset(&mask_m, 0, sizeof(int8_t) * 123);
-	memset(&mask_p, 0, sizeof(int8_t) * 123);
-
 	cur_bin = -6000;
 	upper = bin + 100;
 	lower = bin - 100;
@@ -302,7 +299,7 @@
 	upper = bin + 120;
 	lower = bin - 120;
 
-	for (i = 0; i < 123; i++) {
+	for (i = 0; i < ARRAY_SIZE(mask_m); i++) {
 		if ((cur_vit_mask > lower) && (cur_vit_mask < upper)) {
 			/* workaround for gcc bug #37014 */
 			volatile int tmp_v = abs(cur_vit_mask - bin);
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
index 5bd2cba..08607d7 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c
@@ -3252,7 +3252,8 @@
 	int i;
 
 	for (i = 0; i < mdata_size / 2; i++, data++)
-		ath9k_hw_nvram_read(ah, i, data);
+		if (!ath9k_hw_nvram_read(ah, i, data))
+			return -EIO;
 
 	return 0;
 }
@@ -3282,7 +3283,8 @@
 	if (ath9k_hw_use_flash(ah)) {
 		u8 txrx;
 
-		ar9300_eeprom_restore_flash(ah, mptr, mdata_size);
+		if (ar9300_eeprom_restore_flash(ah, mptr, mdata_size))
+			return -EIO;
 
 		/* check if eeprom contains valid data */
 		eep = (struct ar9300_eeprom *) mptr;
diff --git a/drivers/net/wireless/ath/ath9k/gpio.c b/drivers/net/wireless/ath/ath9k/gpio.c
index 490f74d..ddb2886 100644
--- a/drivers/net/wireless/ath/ath9k/gpio.c
+++ b/drivers/net/wireless/ath/ath9k/gpio.c
@@ -22,7 +22,7 @@
 
 #ifdef CONFIG_MAC80211_LEDS
 
-void ath_fill_led_pin(struct ath_softc *sc)
+static void ath_fill_led_pin(struct ath_softc *sc)
 {
 	struct ath_hw *ah = sc->sc_ah;
 
diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index d1d0c06..14b13f0 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -2482,6 +2482,8 @@
 		return -EINVAL;
 	}
 
+	ath9k_gpio_cap_init(ah);
+
 	if (AR_SREV_9485(ah) ||
 	    AR_SREV_9285(ah) ||
 	    AR_SREV_9330(ah) ||
@@ -2531,8 +2533,6 @@
 	else
 		pCap->hw_caps &= ~ATH9K_HW_CAP_HT;
 
-	ath9k_gpio_cap_init(ah);
-
 	if (AR_SREV_9160_10_OR_LATER(ah) || AR_SREV_9100(ah))
 		pCap->rts_aggr_limit = ATH_AMPDU_LIMIT_MAX;
 	else
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index a394622..e9f32b5 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -718,9 +718,12 @@
 	if (!ath_complete_reset(sc, false))
 		ah->reset_power_on = false;
 
-	if (ah->led_pin >= 0)
+	if (ah->led_pin >= 0) {
 		ath9k_hw_set_gpio(ah, ah->led_pin,
 				  (ah->config.led_active_high) ? 1 : 0);
+		ath9k_hw_gpio_request_out(ah, ah->led_pin, NULL,
+					  AR_GPIO_OUTPUT_MUX_AS_OUTPUT);
+	}
 
 	/*
 	 * Reset key cache to sane defaults (all entries cleared) instead of
@@ -864,9 +867,11 @@
 
 	spin_lock_bh(&sc->sc_pcu_lock);
 
-	if (ah->led_pin >= 0)
+	if (ah->led_pin >= 0) {
 		ath9k_hw_set_gpio(ah, ah->led_pin,
 				  (ah->config.led_active_high) ? 0 : 1);
+		ath9k_hw_gpio_request_in(ah, ah->led_pin, NULL);
+	}
 
 	ath_prepare_reset(sc);
 
@@ -919,7 +924,7 @@
 	} else {
 		if (iter_data->primary_beacon_vif->type != NL80211_IFTYPE_AP &&
 		    vif->type == NL80211_IFTYPE_AP)
-		iter_data->primary_beacon_vif = vif;
+			iter_data->primary_beacon_vif = vif;
 	}
 
 	iter_data->beacons = true;
@@ -1154,6 +1159,7 @@
 		bool changed = (iter_data.primary_sta != ctx->primary_sta);
 
 		if (iter_data.primary_sta) {
+			iter_data.primary_beacon_vif = iter_data.primary_sta;
 			iter_data.beacons = true;
 			ath9k_set_assoc_state(sc, iter_data.primary_sta,
 					      changed);
@@ -1563,13 +1569,13 @@
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
 	int ret = 0;
 
-	if (old_state == IEEE80211_STA_AUTH &&
-	    new_state == IEEE80211_STA_ASSOC) {
+	if (old_state == IEEE80211_STA_NOTEXIST &&
+	    new_state == IEEE80211_STA_NONE) {
 		ret = ath9k_sta_add(hw, vif, sta);
 		ath_dbg(common, CONFIG,
 			"Add station: %pM\n", sta->addr);
-	} else if (old_state == IEEE80211_STA_ASSOC &&
-		   new_state == IEEE80211_STA_AUTH) {
+	} else if (old_state == IEEE80211_STA_NONE &&
+		   new_state == IEEE80211_STA_NOTEXIST) {
 		ret = ath9k_sta_remove(hw, vif, sta);
 		ath_dbg(common, CONFIG,
 			"Remove station: %pM\n", sta->addr);
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index 8ddd604..52bfbb9 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -50,9 +50,11 @@
 static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq,
 			       struct ath_atx_tid *tid, struct sk_buff *skb);
 static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
-			    int tx_flags, struct ath_txq *txq);
+			    int tx_flags, struct ath_txq *txq,
+			    struct ieee80211_sta *sta);
 static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
 				struct ath_txq *txq, struct list_head *bf_q,
+				struct ieee80211_sta *sta,
 				struct ath_tx_status *ts, int txok);
 static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq,
 			     struct list_head *head, bool internal);
@@ -77,6 +79,22 @@
 /* Aggregation logic */
 /*********************/
 
+static void ath_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_sta *sta = info->status.status_driver_data[0];
+
+	if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
+		ieee80211_tx_status(hw, skb);
+		return;
+	}
+
+	if (sta)
+		ieee80211_tx_status_noskb(hw, sta, info);
+
+	dev_kfree_skb(skb);
+}
+
 void ath_txq_lock(struct ath_softc *sc, struct ath_txq *txq)
 	__acquires(&txq->axq_lock)
 {
@@ -92,6 +110,7 @@
 void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq)
 	__releases(&txq->axq_lock)
 {
+	struct ieee80211_hw *hw = sc->hw;
 	struct sk_buff_head q;
 	struct sk_buff *skb;
 
@@ -100,7 +119,7 @@
 	spin_unlock_bh(&txq->axq_lock);
 
 	while ((skb = __skb_dequeue(&q)))
-		ieee80211_tx_status(sc->hw, skb);
+		ath_tx_status(hw, skb);
 }
 
 static void ath_tx_queue_tid(struct ath_softc *sc, struct ath_txq *txq,
@@ -253,7 +272,7 @@
 		}
 
 		list_add_tail(&bf->list, &bf_head);
-		ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
+		ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
 	}
 
 	if (sendbar) {
@@ -318,12 +337,12 @@
 		bf = fi->bf;
 
 		if (!bf) {
-			ath_tx_complete(sc, skb, ATH_TX_ERROR, txq);
+			ath_tx_complete(sc, skb, ATH_TX_ERROR, txq, NULL);
 			continue;
 		}
 
 		list_add_tail(&bf->list, &bf_head);
-		ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
+		ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
 	}
 }
 
@@ -426,15 +445,14 @@
 
 static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq,
 				 struct ath_buf *bf, struct list_head *bf_q,
+				 struct ieee80211_sta *sta,
+				 struct ath_atx_tid *tid,
 				 struct ath_tx_status *ts, int txok)
 {
 	struct ath_node *an = NULL;
 	struct sk_buff *skb;
-	struct ieee80211_sta *sta;
-	struct ieee80211_hw *hw = sc->hw;
 	struct ieee80211_hdr *hdr;
 	struct ieee80211_tx_info *tx_info;
-	struct ath_atx_tid *tid = NULL;
 	struct ath_buf *bf_next, *bf_last = bf->bf_lastbf;
 	struct list_head bf_head;
 	struct sk_buff_head bf_pending;
@@ -460,12 +478,7 @@
 	for (i = 0; i < ts->ts_rateindex; i++)
 		retries += rates[i].count;
 
-	rcu_read_lock();
-
-	sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2);
 	if (!sta) {
-		rcu_read_unlock();
-
 		INIT_LIST_HEAD(&bf_head);
 		while (bf) {
 			bf_next = bf->bf_next;
@@ -473,7 +486,7 @@
 			if (!bf->bf_state.stale || bf_next != NULL)
 				list_move_tail(&bf->list, &bf_head);
 
-			ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, 0);
+			ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, ts, 0);
 
 			bf = bf_next;
 		}
@@ -481,7 +494,6 @@
 	}
 
 	an = (struct ath_node *)sta->drv_priv;
-	tid = ath_get_skb_tid(sc, an, skb);
 	seq_first = tid->seq_start;
 	isba = ts->ts_flags & ATH9K_TX_BA;
 
@@ -583,7 +595,7 @@
 								ts);
 			}
 
-			ath_tx_complete_buf(sc, bf, txq, &bf_head, ts,
+			ath_tx_complete_buf(sc, bf, txq, &bf_head, sta, ts,
 				!txfail);
 		} else {
 			if (tx_info->flags & IEEE80211_TX_STATUS_EOSP) {
@@ -604,7 +616,8 @@
 					ath_tx_update_baw(sc, tid, seqno);
 
 					ath_tx_complete_buf(sc, bf, txq,
-							    &bf_head, ts, 0);
+							    &bf_head, NULL, ts,
+							    0);
 					bar_index = max_t(int, bar_index,
 						ATH_BA_INDEX(seq_first, seqno));
 					break;
@@ -648,8 +661,6 @@
 		ath_txq_lock(sc, txq);
 	}
 
-	rcu_read_unlock();
-
 	if (needreset)
 		ath9k_queue_reset(sc, RESET_TYPE_TX_ERROR);
 }
@@ -664,7 +675,11 @@
 				  struct ath_tx_status *ts, struct ath_buf *bf,
 				  struct list_head *bf_head)
 {
+	struct ieee80211_hw *hw = sc->hw;
 	struct ieee80211_tx_info *info;
+	struct ieee80211_sta *sta;
+	struct ieee80211_hdr *hdr;
+	struct ath_atx_tid *tid = NULL;
 	bool txok, flush;
 
 	txok = !(ts->ts_status & ATH9K_TXERR_MASK);
@@ -677,6 +692,16 @@
 
 	ts->duration = ath9k_hw_get_duration(sc->sc_ah, bf->bf_desc,
 					     ts->ts_rateindex);
+
+	hdr = (struct ieee80211_hdr *) bf->bf_mpdu->data;
+	sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2);
+	if (sta) {
+		struct ath_node *an = (struct ath_node *)sta->drv_priv;
+		tid = ath_get_skb_tid(sc, an, bf->bf_mpdu);
+		if (ts->ts_status & (ATH9K_TXERR_FILT | ATH9K_TXERR_XRETRY))
+			tid->clear_ps_filter = true;
+	}
+
 	if (!bf_isampdu(bf)) {
 		if (!flush) {
 			info = IEEE80211_SKB_CB(bf->bf_mpdu);
@@ -685,9 +710,9 @@
 			ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok);
 			ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts);
 		}
-		ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok);
+		ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok);
 	} else
-		ath_tx_complete_aggr(sc, txq, bf, bf_head, ts, txok);
+		ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, tid, ts, txok);
 
 	if (!flush)
 		ath_txq_schedule(sc, txq);
@@ -923,7 +948,7 @@
 			list_add(&bf->list, &bf_head);
 			__skb_unlink(skb, *q);
 			ath_tx_update_baw(sc, tid, seqno);
-			ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
+			ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
 			continue;
 		}
 
@@ -1832,6 +1857,7 @@
  */
 void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq)
 {
+	rcu_read_lock();
 	ath_txq_lock(sc, txq);
 
 	if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) {
@@ -1850,6 +1876,7 @@
 	ath_drain_txq_list(sc, txq, &txq->axq_q);
 
 	ath_txq_unlock_complete(sc, txq);
+	rcu_read_unlock();
 }
 
 bool ath_drain_all_txq(struct ath_softc *sc)
@@ -2472,7 +2499,8 @@
 /*****************/
 
 static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
-			    int tx_flags, struct ath_txq *txq)
+			    int tx_flags, struct ath_txq *txq,
+			    struct ieee80211_sta *sta)
 {
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
@@ -2492,15 +2520,17 @@
 			tx_info->flags |= IEEE80211_TX_STAT_ACK;
 	}
 
-	padpos = ieee80211_hdrlen(hdr->frame_control);
-	padsize = padpos & 3;
-	if (padsize && skb->len>padpos+padsize) {
-		/*
-		 * Remove MAC header padding before giving the frame back to
-		 * mac80211.
-		 */
-		memmove(skb->data + padsize, skb->data, padpos);
-		skb_pull(skb, padsize);
+	if (tx_info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
+		padpos = ieee80211_hdrlen(hdr->frame_control);
+		padsize = padpos & 3;
+		if (padsize && skb->len>padpos+padsize) {
+			/*
+			 * Remove MAC header padding before giving the frame back to
+			 * mac80211.
+			 */
+			memmove(skb->data + padsize, skb->data, padpos);
+			skb_pull(skb, padsize);
+		}
 	}
 
 	spin_lock_irqsave(&sc->sc_pm_lock, flags);
@@ -2515,12 +2545,14 @@
 	}
 	spin_unlock_irqrestore(&sc->sc_pm_lock, flags);
 
-	__skb_queue_tail(&txq->complete_q, skb);
 	ath_txq_skb_done(sc, txq, skb);
+	tx_info->status.status_driver_data[0] = sta;
+	__skb_queue_tail(&txq->complete_q, skb);
 }
 
 static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
 				struct ath_txq *txq, struct list_head *bf_q,
+				struct ieee80211_sta *sta,
 				struct ath_tx_status *ts, int txok)
 {
 	struct sk_buff *skb = bf->bf_mpdu;
@@ -2548,7 +2580,7 @@
 			complete(&sc->paprd_complete);
 	} else {
 		ath_debug_stat_tx(sc, bf, ts, txq, tx_flags);
-		ath_tx_complete(sc, skb, tx_flags, txq);
+		ath_tx_complete(sc, skb, tx_flags, txq, sta);
 	}
 skip_tx_complete:
 	/* At this point, skb (bf->bf_mpdu) is consumed...make sure we don't
@@ -2700,10 +2732,12 @@
 	u32 qcumask = ((1 << ATH9K_NUM_TX_QUEUES) - 1) & ah->intr_txqs;
 	int i;
 
+	rcu_read_lock();
 	for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
 		if (ATH_TXQ_SETUP(sc, i) && (qcumask & (1 << i)))
 			ath_tx_processq(sc, &sc->tx.txq[i]);
 	}
+	rcu_read_unlock();
 }
 
 void ath_tx_edma_tasklet(struct ath_softc *sc)
@@ -2717,6 +2751,7 @@
 	struct list_head *fifo_list;
 	int status;
 
+	rcu_read_lock();
 	for (;;) {
 		if (test_bit(ATH_OP_HW_RESET, &common->op_flags))
 			break;
@@ -2787,6 +2822,7 @@
 		ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head);
 		ath_txq_unlock_complete(sc, txq);
 	}
+	rcu_read_unlock();
 }
 
 /*****************/
diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c
index 6808db4..ec3a64e 100644
--- a/drivers/net/wireless/ath/carl9170/debug.c
+++ b/drivers/net/wireless/ath/carl9170/debug.c
@@ -75,7 +75,8 @@
 
 	if (!ar)
 		return -ENODEV;
-	dfops = container_of(file->f_op, struct carl9170_debugfs_fops, fops);
+	dfops = container_of(debugfs_real_fops(file),
+			     struct carl9170_debugfs_fops, fops);
 
 	if (!dfops->read)
 		return -ENOSYS;
@@ -127,7 +128,8 @@
 
 	if (!ar)
 		return -ENODEV;
-	dfops = container_of(file->f_op, struct carl9170_debugfs_fops, fops);
+	dfops = container_of(debugfs_real_fops(file),
+			     struct carl9170_debugfs_fops, fops);
 
 	if (!dfops->write)
 		return -ENOSYS;
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index 76842e6..99ab203 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -670,6 +670,7 @@
 	ar->readlen = outlen;
 	spin_unlock_bh(&ar->cmd_lock);
 
+	reinit_completion(&ar->cmd_wait);
 	err = __carl9170_exec_cmd(ar, &ar->cmd, false);
 
 	if (!(cmd & CARL9170_CMD_ASYNC_FLAG)) {
@@ -778,10 +779,7 @@
 	spin_lock_bh(&ar->cmd_lock);
 	ar->readlen = 0;
 	spin_unlock_bh(&ar->cmd_lock);
-	complete_all(&ar->cmd_wait);
-
-	/* This is required to prevent an early completion on _start */
-	reinit_completion(&ar->cmd_wait);
+	complete(&ar->cmd_wait);
 
 	/*
 	 * Note:
diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c
index 2f8136d..4100ffd 100644
--- a/drivers/net/wireless/ath/dfs_pattern_detector.c
+++ b/drivers/net/wireless/ath/dfs_pattern_detector.c
@@ -338,7 +338,7 @@
 	return true;
 }
 
-static struct dfs_pattern_detector default_dpd = {
+static const struct dfs_pattern_detector default_dpd = {
 	.exit		= dpd_exit,
 	.set_dfs_domain	= dpd_set_domain,
 	.add_pulse	= dpd_add_pulse,
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index f0e1175..d117240 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -354,10 +354,13 @@
 	wil_dbg_misc(wil, "%s(), wdev=0x%p iftype=%d\n",
 		     __func__, wdev, wdev->iftype);
 
+	mutex_lock(&wil->p2p_wdev_mutex);
 	if (wil->scan_request) {
 		wil_err(wil, "Already scanning\n");
+		mutex_unlock(&wil->p2p_wdev_mutex);
 		return -EAGAIN;
 	}
+	mutex_unlock(&wil->p2p_wdev_mutex);
 
 	/* check we are client side */
 	switch (wdev->iftype) {
@@ -760,14 +763,11 @@
 	return rc;
 }
 
-static struct wil_tid_crypto_rx_single *
-wil_find_crypto_ctx(struct wil6210_priv *wil, u8 key_index,
-		    enum wmi_key_usage key_usage, const u8 *mac_addr)
+static struct wil_sta_info *
+wil_find_sta_by_key_usage(struct wil6210_priv *wil,
+			  enum wmi_key_usage key_usage, const u8 *mac_addr)
 {
 	int cid = -EINVAL;
-	int tid = 0;
-	struct wil_sta_info *s;
-	struct wil_tid_crypto_rx *c;
 
 	if (key_usage == WMI_KEY_USE_TX_GROUP)
 		return NULL; /* not needed */
@@ -778,18 +778,72 @@
 	else if (key_usage == WMI_KEY_USE_RX_GROUP)
 		cid = wil_find_cid_by_idx(wil, 0);
 	if (cid < 0) {
-		wil_err(wil, "No CID for %pM %s[%d]\n", mac_addr,
-			key_usage_str[key_usage], key_index);
+		wil_err(wil, "No CID for %pM %s\n", mac_addr,
+			key_usage_str[key_usage]);
 		return ERR_PTR(cid);
 	}
 
-	s = &wil->sta[cid];
-	if (key_usage == WMI_KEY_USE_PAIRWISE)
-		c = &s->tid_crypto_rx[tid];
-	else
-		c = &s->group_crypto_rx;
+	return &wil->sta[cid];
+}
 
-	return &c->key_id[key_index];
+static void wil_set_crypto_rx(u8 key_index, enum wmi_key_usage key_usage,
+			      struct wil_sta_info *cs,
+			      struct key_params *params)
+{
+	struct wil_tid_crypto_rx_single *cc;
+	int tid;
+
+	if (!cs)
+		return;
+
+	switch (key_usage) {
+	case WMI_KEY_USE_PAIRWISE:
+		for (tid = 0; tid < WIL_STA_TID_NUM; tid++) {
+			cc = &cs->tid_crypto_rx[tid].key_id[key_index];
+			if (params->seq)
+				memcpy(cc->pn, params->seq,
+				       IEEE80211_GCMP_PN_LEN);
+			else
+				memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN);
+			cc->key_set = true;
+		}
+		break;
+	case WMI_KEY_USE_RX_GROUP:
+		cc = &cs->group_crypto_rx.key_id[key_index];
+		if (params->seq)
+			memcpy(cc->pn, params->seq, IEEE80211_GCMP_PN_LEN);
+		else
+			memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN);
+		cc->key_set = true;
+		break;
+	default:
+		break;
+	}
+}
+
+static void wil_del_rx_key(u8 key_index, enum wmi_key_usage key_usage,
+			   struct wil_sta_info *cs)
+{
+	struct wil_tid_crypto_rx_single *cc;
+	int tid;
+
+	if (!cs)
+		return;
+
+	switch (key_usage) {
+	case WMI_KEY_USE_PAIRWISE:
+		for (tid = 0; tid < WIL_STA_TID_NUM; tid++) {
+			cc = &cs->tid_crypto_rx[tid].key_id[key_index];
+			cc->key_set = false;
+		}
+		break;
+	case WMI_KEY_USE_RX_GROUP:
+		cc = &cs->group_crypto_rx.key_id[key_index];
+		cc->key_set = false;
+		break;
+	default:
+		break;
+	}
 }
 
 static int wil_cfg80211_add_key(struct wiphy *wiphy,
@@ -801,24 +855,26 @@
 	int rc;
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise);
-	struct wil_tid_crypto_rx_single *cc = wil_find_crypto_ctx(wil,
-								  key_index,
-								  key_usage,
-								  mac_addr);
+	struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage,
+							    mac_addr);
+
+	if (!params) {
+		wil_err(wil, "NULL params\n");
+		return -EINVAL;
+	}
 
 	wil_dbg_misc(wil, "%s(%pM %s[%d] PN %*phN)\n", __func__,
 		     mac_addr, key_usage_str[key_usage], key_index,
 		     params->seq_len, params->seq);
 
-	if (IS_ERR(cc)) {
+	if (IS_ERR(cs)) {
 		wil_err(wil, "Not connected, %s(%pM %s[%d] PN %*phN)\n",
 			__func__, mac_addr, key_usage_str[key_usage], key_index,
 			params->seq_len, params->seq);
 		return -EINVAL;
 	}
 
-	if (cc)
-		cc->key_set = false;
+	wil_del_rx_key(key_index, key_usage, cs);
 
 	if (params->seq && params->seq_len != IEEE80211_GCMP_PN_LEN) {
 		wil_err(wil,
@@ -831,13 +887,8 @@
 
 	rc = wmi_add_cipher_key(wil, key_index, mac_addr, params->key_len,
 				params->key, key_usage);
-	if ((rc == 0) && cc) {
-		if (params->seq)
-			memcpy(cc->pn, params->seq, IEEE80211_GCMP_PN_LEN);
-		else
-			memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN);
-		cc->key_set = true;
-	}
+	if (!rc)
+		wil_set_crypto_rx(key_index, key_usage, cs, params);
 
 	return rc;
 }
@@ -849,20 +900,18 @@
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
 	enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise);
-	struct wil_tid_crypto_rx_single *cc = wil_find_crypto_ctx(wil,
-								  key_index,
-								  key_usage,
-								  mac_addr);
+	struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage,
+							    mac_addr);
 
 	wil_dbg_misc(wil, "%s(%pM %s[%d])\n", __func__, mac_addr,
 		     key_usage_str[key_usage], key_index);
 
-	if (IS_ERR(cc))
+	if (IS_ERR(cs))
 		wil_info(wil, "Not connected, %s(%pM %s[%d])\n", __func__,
 			 mac_addr, key_usage_str[key_usage], key_index);
 
-	if (!IS_ERR_OR_NULL(cc))
-		cc->key_set = false;
+	if (!IS_ERR_OR_NULL(cs))
+		wil_del_rx_key(key_index, key_usage, cs);
 
 	return wmi_del_cipher_key(wil, key_index, mac_addr, key_usage);
 }
@@ -1363,23 +1412,16 @@
 					 struct wireless_dev *wdev)
 {
 	struct wil6210_priv *wil = wiphy_to_wil(wiphy);
-	u8 started;
+	struct wil_p2p_info *p2p = &wil->p2p;
+
+	if (!p2p->p2p_dev_started)
+		return;
 
 	wil_dbg_misc(wil, "%s: entered\n", __func__);
 	mutex_lock(&wil->mutex);
-	started = wil_p2p_stop_discovery(wil);
-	if (started && wil->scan_request) {
-		struct cfg80211_scan_info info = {
-			.aborted = true,
-		};
-
-		cfg80211_scan_done(wil->scan_request, &info);
-		wil->scan_request = NULL;
-		wil->radio_wdev = wil->wdev;
-	}
+	wil_p2p_stop_radio_operations(wil);
+	p2p->p2p_dev_started = 0;
 	mutex_unlock(&wil->mutex);
-
-	wil->p2p.p2p_dev_started = 0;
 }
 
 static struct cfg80211_ops wil_cfg80211_ops = {
@@ -1464,14 +1506,8 @@
 	set_wiphy_dev(wdev->wiphy, dev);
 	wil_wiphy_init(wdev->wiphy);
 
-	rc = wiphy_register(wdev->wiphy);
-	if (rc < 0)
-		goto out_failed_reg;
-
 	return wdev;
 
-out_failed_reg:
-	wiphy_free(wdev->wiphy);
 out:
 	kfree(wdev);
 
@@ -1487,7 +1523,6 @@
 	if (!wdev)
 		return;
 
-	wiphy_unregister(wdev->wiphy);
 	wiphy_free(wdev->wiphy);
 	kfree(wdev);
 }
@@ -1498,11 +1533,11 @@
 
 	mutex_lock(&wil->p2p_wdev_mutex);
 	p2p_wdev = wil->p2p_wdev;
+	wil->p2p_wdev = NULL;
+	wil->radio_wdev = wil_to_wdev(wil);
+	mutex_unlock(&wil->p2p_wdev_mutex);
 	if (p2p_wdev) {
-		wil->p2p_wdev = NULL;
-		wil->radio_wdev = wil_to_wdev(wil);
 		cfg80211_unregister_wdev(p2p_wdev);
 		kfree(p2p_wdev);
 	}
-	mutex_unlock(&wil->p2p_wdev_mutex);
 }
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index a8098b4..5e4058a 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -1553,6 +1553,56 @@
 	.open  = simple_open,
 };
 
+/*---------FW capabilities------------*/
+static int wil_fw_capabilities_debugfs_show(struct seq_file *s, void *data)
+{
+	struct wil6210_priv *wil = s->private;
+
+	seq_printf(s, "fw_capabilities : %*pb\n", WMI_FW_CAPABILITY_MAX,
+		   wil->fw_capabilities);
+
+	return 0;
+}
+
+static int wil_fw_capabilities_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, wil_fw_capabilities_debugfs_show,
+			   inode->i_private);
+}
+
+static const struct file_operations fops_fw_capabilities = {
+	.open		= wil_fw_capabilities_seq_open,
+	.release	= single_release,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+};
+
+/*---------FW version------------*/
+static int wil_fw_version_debugfs_show(struct seq_file *s, void *data)
+{
+	struct wil6210_priv *wil = s->private;
+
+	if (wil->fw_version[0])
+		seq_printf(s, "%s\n", wil->fw_version);
+	else
+		seq_puts(s, "N/A\n");
+
+	return 0;
+}
+
+static int wil_fw_version_seq_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, wil_fw_version_debugfs_show,
+			   inode->i_private);
+}
+
+static const struct file_operations fops_fw_version = {
+	.open		= wil_fw_version_seq_open,
+	.release	= single_release,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+};
+
 /*----------------*/
 static void wil6210_debugfs_init_blobs(struct wil6210_priv *wil,
 				       struct dentry *dbg)
@@ -1603,6 +1653,8 @@
 	{"recovery",	S_IRUGO | S_IWUSR,	&fops_recovery},
 	{"led_cfg",	S_IRUGO | S_IWUSR,	&fops_led_cfg},
 	{"led_blink_time",	S_IRUGO | S_IWUSR,	&fops_led_blink_time},
+	{"fw_capabilities",	S_IRUGO,	&fops_fw_capabilities},
+	{"fw_version",	S_IRUGO,		&fops_fw_version},
 };
 
 static void wil6210_debugfs_init_files(struct wil6210_priv *wil,
@@ -1643,7 +1695,6 @@
 static const struct dbg_off dbg_wil_off[] = {
 	WIL_FIELD(privacy,	S_IRUGO,		doff_u32),
 	WIL_FIELD(status[0],	S_IRUGO | S_IWUSR,	doff_ulong),
-	WIL_FIELD(fw_version,	S_IRUGO,		doff_u32),
 	WIL_FIELD(hw_version,	S_IRUGO,		doff_x32),
 	WIL_FIELD(recovery_count, S_IRUGO,		doff_u32),
 	WIL_FIELD(ap_isolate,	S_IRUGO,		doff_u32),
diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h
index 7a2c6c1..2f2b910 100644
--- a/drivers/net/wireless/ath/wil6210/fw.h
+++ b/drivers/net/wireless/ath/wil6210/fw.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Qualcomm Atheros, Inc.
+ * Copyright (c) 2014,2016 Qualcomm Atheros, Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -58,6 +58,15 @@
 	u8 data[0]; /* free-form data [data_size], see above */
 } __packed;
 
+/* FW capabilities encoded inside a comment record */
+#define WIL_FW_CAPABILITIES_MAGIC (0xabcddcba)
+struct wil_fw_record_capabilities { /* type == wil_fw_type_comment */
+	/* identifies capabilities record */
+	__le32 magic;
+	/* capabilities (variable size), see enum wmi_fw_capability */
+	u8 capabilities[0];
+};
+
 /* perform action
  * data_size = @head.size - offsetof(struct wil_fw_record_action, data)
  */
@@ -93,6 +102,9 @@
 /* file header
  * First record of every file
  */
+/* the FW version prefix in the comment */
+#define WIL_FW_VERSION_PREFIX "FW version: "
+#define WIL_FW_VERSION_PREFIX_LEN (sizeof(WIL_FW_VERSION_PREFIX) - 1)
 struct wil_fw_record_file_header {
 	__le32 signature ; /* Wilocity signature */
 	__le32 reserved;
diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c
index d30657e..8f40eb3 100644
--- a/drivers/net/wireless/ath/wil6210/fw_inc.c
+++ b/drivers/net/wireless/ath/wil6210/fw_inc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2015 Qualcomm Atheros, Inc.
+ * Copyright (c) 2014-2016 Qualcomm Atheros, Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -118,6 +118,12 @@
 	return (int)dlen;
 }
 
+static int fw_ignore_section(struct wil6210_priv *wil, const void *data,
+			     size_t size)
+{
+	return 0;
+}
+
 static int fw_handle_comment(struct wil6210_priv *wil, const void *data,
 			     size_t size)
 {
@@ -126,6 +132,27 @@
 	return 0;
 }
 
+static int
+fw_handle_capabilities(struct wil6210_priv *wil, const void *data,
+		       size_t size)
+{
+	const struct wil_fw_record_capabilities *rec = data;
+	size_t capa_size;
+
+	if (size < sizeof(*rec) ||
+	    le32_to_cpu(rec->magic) != WIL_FW_CAPABILITIES_MAGIC)
+		return 0;
+
+	capa_size = size - offsetof(struct wil_fw_record_capabilities,
+				    capabilities);
+	bitmap_zero(wil->fw_capabilities, WMI_FW_CAPABILITY_MAX);
+	memcpy(wil->fw_capabilities, rec->capabilities,
+	       min(sizeof(wil->fw_capabilities), capa_size));
+	wil_hex_dump_fw("CAPA", DUMP_PREFIX_OFFSET, 16, 1,
+			rec->capabilities, capa_size, false);
+	return 0;
+}
+
 static int fw_handle_data(struct wil6210_priv *wil, const void *data,
 			  size_t size)
 {
@@ -196,6 +223,13 @@
 	wil_hex_dump_fw("", DUMP_PREFIX_OFFSET, 16, 1, d->comment,
 			sizeof(d->comment), true);
 
+	if (!memcmp(d->comment, WIL_FW_VERSION_PREFIX,
+		    WIL_FW_VERSION_PREFIX_LEN))
+		memcpy(wil->fw_version,
+		       d->comment + WIL_FW_VERSION_PREFIX_LEN,
+		       min(sizeof(d->comment) - WIL_FW_VERSION_PREFIX_LEN,
+			   sizeof(wil->fw_version) - 1));
+
 	return 0;
 }
 
@@ -383,42 +417,51 @@
 
 static const struct {
 	int type;
-	int (*handler)(struct wil6210_priv *wil, const void *data, size_t size);
+	int (*load_handler)(struct wil6210_priv *wil, const void *data,
+			    size_t size);
+	int (*parse_handler)(struct wil6210_priv *wil, const void *data,
+			     size_t size);
 } wil_fw_handlers[] = {
-	{wil_fw_type_comment, fw_handle_comment},
-	{wil_fw_type_data, fw_handle_data},
-	{wil_fw_type_fill, fw_handle_fill},
+	{wil_fw_type_comment, fw_handle_comment, fw_handle_capabilities},
+	{wil_fw_type_data, fw_handle_data, fw_ignore_section},
+	{wil_fw_type_fill, fw_handle_fill, fw_ignore_section},
 	/* wil_fw_type_action */
 	/* wil_fw_type_verify */
-	{wil_fw_type_file_header, fw_handle_file_header},
-	{wil_fw_type_direct_write, fw_handle_direct_write},
-	{wil_fw_type_gateway_data, fw_handle_gateway_data},
-	{wil_fw_type_gateway_data4, fw_handle_gateway_data4},
+	{wil_fw_type_file_header, fw_handle_file_header,
+		fw_handle_file_header},
+	{wil_fw_type_direct_write, fw_handle_direct_write, fw_ignore_section},
+	{wil_fw_type_gateway_data, fw_handle_gateway_data, fw_ignore_section},
+	{wil_fw_type_gateway_data4, fw_handle_gateway_data4,
+		fw_ignore_section},
 };
 
 static int wil_fw_handle_record(struct wil6210_priv *wil, int type,
-				const void *data, size_t size)
+				const void *data, size_t size, bool load)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(wil_fw_handlers); i++) {
+	for (i = 0; i < ARRAY_SIZE(wil_fw_handlers); i++)
 		if (wil_fw_handlers[i].type == type)
-			return wil_fw_handlers[i].handler(wil, data, size);
-	}
+			return load ?
+				wil_fw_handlers[i].load_handler(
+					wil, data, size) :
+				wil_fw_handlers[i].parse_handler(
+					wil, data, size);
 
 	wil_err_fw(wil, "unknown record type: %d\n", type);
 	return -EINVAL;
 }
 
 /**
- * wil_fw_load - load FW into device
- *
- * Load the FW and uCode code and data to the corresponding device
- * memory regions
+ * wil_fw_process - process section from FW file
+ * if load is true: Load the FW and uCode code and data to the
+ * corresponding device memory regions,
+ * otherwise only parse and look for capabilities
  *
  * Return error code
  */
-static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size)
+static int wil_fw_process(struct wil6210_priv *wil, const void *data,
+			  size_t size, bool load)
 {
 	int rc = 0;
 	const struct wil_fw_record_head *hdr;
@@ -437,7 +480,7 @@
 			return -EINVAL;
 		}
 		rc = wil_fw_handle_record(wil, le16_to_cpu(hdr->type),
-					  &hdr[1], hdr_sz);
+					  &hdr[1], hdr_sz, load);
 		if (rc)
 			return rc;
 	}
@@ -456,13 +499,16 @@
 }
 
 /**
- * wil_request_firmware - Request firmware and load to device
+ * wil_request_firmware - Request firmware
  *
- * Request firmware image from the file and load it to device
+ * Request firmware image from the file
+ * If load is true, load firmware to device, otherwise
+ * only parse and extract capabilities
  *
  * Return error code
  */
-int wil_request_firmware(struct wil6210_priv *wil, const char *name)
+int wil_request_firmware(struct wil6210_priv *wil, const char *name,
+			 bool load)
 {
 	int rc, rc1;
 	const struct firmware *fw;
@@ -482,7 +528,7 @@
 			rc = rc1;
 			goto out;
 		}
-		rc = wil_fw_load(wil, d, rc1);
+		rc = wil_fw_process(wil, d, rc1, load);
 		if (rc < 0)
 			goto out;
 	}
diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c
index 011e741..64046e0 100644
--- a/drivers/net/wireless/ath/wil6210/interrupt.c
+++ b/drivers/net/wireless/ath/wil6210/interrupt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2015 Qualcomm Atheros, Inc.
+ * Copyright (c) 2012-2016 Qualcomm Atheros, Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -101,7 +101,7 @@
 	      mask_halp ? WIL6210_IRQ_DISABLE : WIL6210_IRQ_DISABLE_NO_HALP);
 }
 
-static void wil6210_mask_halp(struct wil6210_priv *wil)
+void wil6210_mask_halp(struct wil6210_priv *wil)
 {
 	wil_dbg_irq(wil, "%s()\n", __func__);
 
@@ -503,6 +503,13 @@
 				offsetof(struct RGF_ICR, ICR));
 		u32 imv_misc = wil_r(wil, RGF_DMA_EP_MISC_ICR +
 				     offsetof(struct RGF_ICR, IMV));
+
+		/* HALP interrupt can be unmasked when misc interrupts are
+		 * masked
+		 */
+		if (icr_misc & BIT_DMA_EP_MISC_ICR_HALP)
+			return 0;
+
 		wil_err(wil, "IRQ when it should be masked: pseudo 0x%08x\n"
 				"Rx   icm:icr:imv 0x%08x 0x%08x 0x%08x\n"
 				"Tx   icm:icr:imv 0x%08x 0x%08x 0x%08x\n"
@@ -592,7 +599,7 @@
 
 void wil6210_set_halp(struct wil6210_priv *wil)
 {
-	wil_dbg_misc(wil, "%s()\n", __func__);
+	wil_dbg_irq(wil, "%s()\n", __func__);
 
 	wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICS),
 	      BIT_DMA_EP_MISC_ICR_HALP);
@@ -600,7 +607,7 @@
 
 void wil6210_clear_halp(struct wil6210_priv *wil)
 {
-	wil_dbg_misc(wil, "%s()\n", __func__);
+	wil_dbg_irq(wil, "%s()\n", __func__);
 
 	wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICR),
 	      BIT_DMA_EP_MISC_ICR_HALP);
diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c
index 4bc92e5..e7130b5 100644
--- a/drivers/net/wireless/ath/wil6210/main.c
+++ b/drivers/net/wireless/ath/wil6210/main.c
@@ -232,6 +232,9 @@
 	struct net_device *ndev = wil_to_ndev(wil);
 	struct wireless_dev *wdev = wil->wdev;
 
+	if (unlikely(!ndev))
+		return;
+
 	might_sleep();
 	wil_info(wil, "%s(bssid=%pM, reason=%d, ev%s)\n", __func__, bssid,
 		 reason_code, from_event ? "+" : "-");
@@ -849,6 +852,7 @@
 	bitmap_zero(wil->status, wil_status_last);
 	mutex_unlock(&wil->wmi_mutex);
 
+	mutex_lock(&wil->p2p_wdev_mutex);
 	if (wil->scan_request) {
 		struct cfg80211_scan_info info = {
 			.aborted = true,
@@ -860,6 +864,7 @@
 		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 	}
+	mutex_unlock(&wil->p2p_wdev_mutex);
 
 	wil_mask_irq(wil);
 
@@ -888,11 +893,12 @@
 			 WIL_FW2_NAME);
 
 		wil_halt_cpu(wil);
+		memset(wil->fw_version, 0, sizeof(wil->fw_version));
 		/* Loading f/w from the file */
-		rc = wil_request_firmware(wil, WIL_FW_NAME);
+		rc = wil_request_firmware(wil, WIL_FW_NAME, true);
 		if (rc)
 			return rc;
-		rc = wil_request_firmware(wil, WIL_FW2_NAME);
+		rc = wil_request_firmware(wil, WIL_FW2_NAME, true);
 		if (rc)
 			return rc;
 
@@ -1035,10 +1041,10 @@
 
 int __wil_down(struct wil6210_priv *wil)
 {
-	int rc;
-
 	WARN_ON(!mutex_is_locked(&wil->mutex));
 
+	set_bit(wil_status_resetting, wil->status);
+
 	if (wil->platform_ops.bus_request)
 		wil->platform_ops.bus_request(wil->platform_handle, 0);
 
@@ -1050,8 +1056,9 @@
 	}
 	wil_enable_irq(wil);
 
-	(void)wil_p2p_stop_discovery(wil);
+	wil_p2p_stop_radio_operations(wil);
 
+	mutex_lock(&wil->p2p_wdev_mutex);
 	if (wil->scan_request) {
 		struct cfg80211_scan_info info = {
 			.aborted = true,
@@ -1063,18 +1070,7 @@
 		cfg80211_scan_done(wil->scan_request, &info);
 		wil->scan_request = NULL;
 	}
-
-	if (test_bit(wil_status_fwconnected, wil->status) ||
-	    test_bit(wil_status_fwconnecting, wil->status)) {
-
-		mutex_unlock(&wil->mutex);
-		rc = wmi_call(wil, WMI_DISCONNECT_CMDID, NULL, 0,
-			      WMI_DISCONNECT_EVENTID, NULL, 0,
-			      WIL6210_DISCONNECT_TO_MS);
-		mutex_lock(&wil->mutex);
-		if (rc)
-			wil_err(wil, "timeout waiting for disconnect\n");
-	}
+	mutex_unlock(&wil->p2p_wdev_mutex);
 
 	wil_reset(wil, false);
 
@@ -1118,23 +1114,26 @@
 
 	mutex_lock(&wil->halp.lock);
 
-	wil_dbg_misc(wil, "%s: start, HALP ref_cnt (%d)\n", __func__,
-		     wil->halp.ref_cnt);
+	wil_dbg_irq(wil, "%s: start, HALP ref_cnt (%d)\n", __func__,
+		    wil->halp.ref_cnt);
 
 	if (++wil->halp.ref_cnt == 1) {
 		wil6210_set_halp(wil);
 		rc = wait_for_completion_timeout(&wil->halp.comp, to_jiffies);
-		if (!rc)
+		if (!rc) {
 			wil_err(wil, "%s: HALP vote timed out\n", __func__);
-		else
-			wil_dbg_misc(wil,
-				     "%s: HALP vote completed after %d ms\n",
-				     __func__,
-				     jiffies_to_msecs(to_jiffies - rc));
+			/* Mask HALP as done in case the interrupt is raised */
+			wil6210_mask_halp(wil);
+		} else {
+			wil_dbg_irq(wil,
+				    "%s: HALP vote completed after %d ms\n",
+				    __func__,
+				    jiffies_to_msecs(to_jiffies - rc));
+		}
 	}
 
-	wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__,
-		     wil->halp.ref_cnt);
+	wil_dbg_irq(wil, "%s: end, HALP ref_cnt (%d)\n", __func__,
+		    wil->halp.ref_cnt);
 
 	mutex_unlock(&wil->halp.lock);
 }
@@ -1145,16 +1144,16 @@
 
 	mutex_lock(&wil->halp.lock);
 
-	wil_dbg_misc(wil, "%s: start, HALP ref_cnt (%d)\n", __func__,
-		     wil->halp.ref_cnt);
+	wil_dbg_irq(wil, "%s: start, HALP ref_cnt (%d)\n", __func__,
+		    wil->halp.ref_cnt);
 
 	if (--wil->halp.ref_cnt == 0) {
 		wil6210_clear_halp(wil);
-		wil_dbg_misc(wil, "%s: HALP unvote\n", __func__);
+		wil_dbg_irq(wil, "%s: HALP unvote\n", __func__);
 	}
 
-	wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__,
-		     wil->halp.ref_cnt);
+	wil_dbg_irq(wil, "%s: end, HALP ref_cnt (%d)\n", __func__,
+		    wil->halp.ref_cnt);
 
 	mutex_unlock(&wil->halp.lock);
 }
diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c
index 09840975..61de5e9 100644
--- a/drivers/net/wireless/ath/wil6210/netdev.c
+++ b/drivers/net/wireless/ath/wil6210/netdev.c
@@ -179,13 +179,6 @@
 	SET_NETDEV_DEV(ndev, wiphy_dev(wdev->wiphy));
 	wdev->netdev = ndev;
 
-	netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx,
-		       WIL6210_NAPI_BUDGET);
-	netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx,
-		       WIL6210_NAPI_BUDGET);
-
-	netif_tx_stop_all_queues(ndev);
-
 	return wil;
 
  out_priv:
@@ -216,25 +209,48 @@
 
 int wil_if_add(struct wil6210_priv *wil)
 {
+	struct wireless_dev *wdev = wil_to_wdev(wil);
+	struct wiphy *wiphy = wdev->wiphy;
 	struct net_device *ndev = wil_to_ndev(wil);
 	int rc;
 
-	wil_dbg_misc(wil, "%s()\n", __func__);
+	wil_dbg_misc(wil, "entered");
+
+	strlcpy(wiphy->fw_version, wil->fw_version, sizeof(wiphy->fw_version));
+
+	rc = wiphy_register(wiphy);
+	if (rc < 0) {
+		wil_err(wil, "failed to register wiphy, err %d\n", rc);
+		return rc;
+	}
+
+	netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx,
+		       WIL6210_NAPI_BUDGET);
+	netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx,
+			  WIL6210_NAPI_BUDGET);
+
+	netif_tx_stop_all_queues(ndev);
 
 	rc = register_netdev(ndev);
 	if (rc < 0) {
 		dev_err(&ndev->dev, "Failed to register netdev: %d\n", rc);
-		return rc;
+		goto out_wiphy;
 	}
 
 	return 0;
+
+out_wiphy:
+	wiphy_unregister(wdev->wiphy);
+	return rc;
 }
 
 void wil_if_remove(struct wil6210_priv *wil)
 {
 	struct net_device *ndev = wil_to_ndev(wil);
+	struct wireless_dev *wdev = wil_to_wdev(wil);
 
 	wil_dbg_misc(wil, "%s()\n", __func__);
 
 	unregister_netdev(ndev);
+	wiphy_unregister(wdev->wiphy);
 }
diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c
index e0f8aa0..4087785 100644
--- a/drivers/net/wireless/ath/wil6210/p2p.c
+++ b/drivers/net/wireless/ath/wil6210/p2p.c
@@ -263,3 +263,49 @@
 		mutex_unlock(&wil->p2p_wdev_mutex);
 	}
 }
+
+void wil_p2p_stop_radio_operations(struct wil6210_priv *wil)
+{
+	struct wil_p2p_info *p2p = &wil->p2p;
+	struct cfg80211_scan_info info = {
+		.aborted = true,
+	};
+
+	lockdep_assert_held(&wil->mutex);
+
+	mutex_lock(&wil->p2p_wdev_mutex);
+
+	if (wil->radio_wdev != wil->p2p_wdev)
+		goto out;
+
+	if (!p2p->discovery_started) {
+		/* Regular scan on the p2p device */
+		if (wil->scan_request &&
+		    wil->scan_request->wdev == wil->p2p_wdev) {
+			cfg80211_scan_done(wil->scan_request, &info);
+			wil->scan_request = NULL;
+		}
+		goto out;
+	}
+
+	/* Search or listen on p2p device */
+	mutex_unlock(&wil->p2p_wdev_mutex);
+	wil_p2p_stop_discovery(wil);
+	mutex_lock(&wil->p2p_wdev_mutex);
+
+	if (wil->scan_request) {
+		/* search */
+		cfg80211_scan_done(wil->scan_request, &info);
+		wil->scan_request = NULL;
+	} else {
+		/* listen */
+		cfg80211_remain_on_channel_expired(wil->radio_wdev,
+						   p2p->cookie,
+						   &p2p->listen_chan,
+						   GFP_KERNEL);
+	}
+
+out:
+	wil->radio_wdev = wil->wdev;
+	mutex_unlock(&wil->p2p_wdev_mutex);
+}
diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c
index 7b5c422..44746ca 100644
--- a/drivers/net/wireless/ath/wil6210/pcie_bus.c
+++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c
@@ -20,6 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/suspend.h>
 #include "wil6210.h"
+#include <linux/rtnetlink.h>
 
 static bool use_msi = true;
 module_param(use_msi, bool, S_IRUGO);
@@ -38,6 +39,7 @@
 	u32 rev_id = wil_r(wil, RGF_USER_JTAG_DEV_ID);
 
 	bitmap_zero(wil->hw_capabilities, hw_capability_last);
+	bitmap_zero(wil->fw_capabilities, WMI_FW_CAPABILITY_MAX);
 
 	switch (rev_id) {
 	case JTAG_DEV_ID_SPARROW_B0:
@@ -51,6 +53,9 @@
 	}
 
 	wil_info(wil, "Board hardware is %s\n", wil->hw_name);
+
+	/* extract FW capabilities from file without loading the FW */
+	wil_request_firmware(wil, WIL_FW_NAME, false);
 }
 
 void wil_disable_irq(struct wil6210_priv *wil)
@@ -293,6 +298,9 @@
 #endif /* CONFIG_PM */
 
 	wil6210_debugfs_remove(wil);
+	rtnl_lock();
+	wil_p2p_wdev_free(wil);
+	rtnl_unlock();
 	wil_if_remove(wil);
 	wil_if_pcie_disable(wil);
 	pci_iounmap(pdev, csr);
@@ -300,7 +308,6 @@
 	pci_disable_device(pdev);
 	if (wil->platform_ops.uninit)
 		wil->platform_ops.uninit(wil->platform_handle);
-	wil_p2p_wdev_free(wil);
 	wil_if_free(wil);
 }
 
diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c
index f2f6a40..4c38520 100644
--- a/drivers/net/wireless/ath/wil6210/txrx.c
+++ b/drivers/net/wireless/ath/wil6210/txrx.c
@@ -873,9 +873,12 @@
 		rc = -EINVAL;
 		goto out_free;
 	}
-	vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr);
 
+	spin_lock_bh(&txdata->lock);
+	vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr);
 	txdata->enabled = 1;
+	spin_unlock_bh(&txdata->lock);
+
 	if (txdata->dot1x_open && (agg_wsize >= 0))
 		wil_addba_tx_request(wil, id, agg_wsize);
 
@@ -950,9 +953,11 @@
 		rc = -EINVAL;
 		goto out_free;
 	}
-	vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr);
 
+	spin_lock_bh(&txdata->lock);
+	vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr);
 	txdata->enabled = 1;
+	spin_unlock_bh(&txdata->lock);
 
 	return 0;
  out_free:
diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h
index ecab4af..a949cd6 100644
--- a/drivers/net/wireless/ath/wil6210/wil6210.h
+++ b/drivers/net/wireless/ath/wil6210/wil6210.h
@@ -17,6 +17,7 @@
 #ifndef __WIL6210_H__
 #define __WIL6210_H__
 
+#include <linux/etherdevice.h>
 #include <linux/netdevice.h>
 #include <linux/wireless.h>
 #include <net/cfg80211.h>
@@ -576,10 +577,11 @@
 	struct wireless_dev *wdev;
 	void __iomem *csr;
 	DECLARE_BITMAP(status, wil_status_last);
-	u32 fw_version;
+	u8 fw_version[ETHTOOL_FWVERS_LEN];
 	u32 hw_version;
 	const char *hw_name;
 	DECLARE_BITMAP(hw_capabilities, hw_capability_last);
+	DECLARE_BITMAP(fw_capabilities, WMI_FW_CAPABILITY_MAX);
 	u8 n_mids; /* number of additional MIDs as reported by FW */
 	u32 recovery_count; /* num of FW recovery attempts in a short time */
 	u32 recovery_state; /* FW recovery state machine */
@@ -657,7 +659,7 @@
 
 	/* P2P_DEVICE vif */
 	struct wireless_dev *p2p_wdev;
-	struct mutex p2p_wdev_mutex; /* protect @p2p_wdev */
+	struct mutex p2p_wdev_mutex; /* protect @p2p_wdev and @scan_request */
 	struct wireless_dev *radio_wdev;
 
 	/* High Access Latency Policy voting */
@@ -828,6 +830,7 @@
 void wil_configure_interrupt_moderation(struct wil6210_priv *wil);
 void wil_disable_irq(struct wil6210_priv *wil);
 void wil_enable_irq(struct wil6210_priv *wil);
+void wil6210_mask_halp(struct wil6210_priv *wil);
 
 /* P2P */
 bool wil_p2p_is_social_scan(struct cfg80211_scan_request *request);
@@ -840,6 +843,7 @@
 int wil_p2p_cancel_listen(struct wil6210_priv *wil, u64 cookie);
 void wil_p2p_listen_expired(struct work_struct *work);
 void wil_p2p_search_expired(struct work_struct *work);
+void wil_p2p_stop_radio_operations(struct wil6210_priv *wil);
 
 /* WMI for P2P */
 int wmi_p2p_cfg(struct wil6210_priv *wil, int channel, int bi);
@@ -893,7 +897,8 @@
 int wil_iftype_nl2wmi(enum nl80211_iftype type);
 
 int wil_ioctl(struct wil6210_priv *wil, void __user *data, int cmd);
-int wil_request_firmware(struct wil6210_priv *wil, const char *name);
+int wil_request_firmware(struct wil6210_priv *wil, const char *name,
+			 bool load);
 
 int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime);
 int wil_suspend(struct wil6210_priv *wil, bool is_runtime);
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 4d92541..fae4f12 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -312,14 +312,14 @@
 	struct wireless_dev *wdev = wil->wdev;
 	struct wmi_ready_event *evt = d;
 
-	wil->fw_version = le32_to_cpu(evt->sw_version);
 	wil->n_mids = evt->numof_additional_mids;
 
-	wil_info(wil, "FW ver. %d; MAC %pM; %d MID's\n", wil->fw_version,
+	wil_info(wil, "FW ver. %s(SW %d); MAC %pM; %d MID's\n",
+		 wil->fw_version, le32_to_cpu(evt->sw_version),
 		 evt->mac, wil->n_mids);
 	/* ignore MAC address, we already have it from the boot loader */
-	snprintf(wdev->wiphy->fw_version, sizeof(wdev->wiphy->fw_version),
-		 "%d", wil->fw_version);
+	strlcpy(wdev->wiphy->fw_version, wil->fw_version,
+		sizeof(wdev->wiphy->fw_version));
 
 	wil_set_recovery_state(wil, fw_recovery_idle);
 	set_bit(wil_status_fwready, wil->status);
@@ -424,6 +424,7 @@
 static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id,
 				  void *d, int len)
 {
+	mutex_lock(&wil->p2p_wdev_mutex);
 	if (wil->scan_request) {
 		struct wmi_scan_complete_event *data = d;
 		struct cfg80211_scan_info info = {
@@ -435,14 +436,13 @@
 			     wil->scan_request, info.aborted);
 
 		del_timer_sync(&wil->scan_timer);
-		mutex_lock(&wil->p2p_wdev_mutex);
 		cfg80211_scan_done(wil->scan_request, &info);
 		wil->radio_wdev = wil->wdev;
-		mutex_unlock(&wil->p2p_wdev_mutex);
 		wil->scan_request = NULL;
 	} else {
 		wil_err(wil, "SCAN_COMPLETE while not scanning\n");
 	}
+	mutex_unlock(&wil->p2p_wdev_mutex);
 }
 
 static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h
index 685fe0d..f430e8a 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.h
+++ b/drivers/net/wireless/ath/wil6210/wmi.h
@@ -46,6 +46,16 @@
 	MID_BROADCAST		= 0xFF,
 };
 
+/* FW capability IDs
+ * Each ID maps to a bit in a 32-bit bitmask value provided by the FW to
+ * the host
+ */
+enum wmi_fw_capability {
+	WMI_FW_CAPABILITY_FTM		= 0,
+	WMI_FW_CAPABILITY_PS_CONFIG	= 1,
+	WMI_FW_CAPABILITY_MAX,
+};
+
 /* WMI_CMD_HDR */
 struct wmi_cmd_hdr {
 	u8 mid;
@@ -120,6 +130,8 @@
 	WMI_BF_SM_MGMT_CMDID			= 0x838,
 	WMI_BF_RXSS_MGMT_CMDID			= 0x839,
 	WMI_BF_TRIG_CMDID			= 0x83A,
+	WMI_LINK_MAINTAIN_CFG_WRITE_CMDID	= 0x842,
+	WMI_LINK_MAINTAIN_CFG_READ_CMDID	= 0x843,
 	WMI_SET_SECTORS_CMDID			= 0x849,
 	WMI_MAINTAIN_PAUSE_CMDID		= 0x850,
 	WMI_MAINTAIN_RESUME_CMDID		= 0x851,
@@ -134,10 +146,15 @@
 	WMI_BF_CTRL_CMDID			= 0x862,
 	WMI_NOTIFY_REQ_CMDID			= 0x863,
 	WMI_GET_STATUS_CMDID			= 0x864,
+	WMI_GET_RF_STATUS_CMDID			= 0x866,
+	WMI_GET_BASEBAND_TYPE_CMDID		= 0x867,
 	WMI_UNIT_TEST_CMDID			= 0x900,
 	WMI_HICCUP_CMDID			= 0x901,
 	WMI_FLASH_READ_CMDID			= 0x902,
 	WMI_FLASH_WRITE_CMDID			= 0x903,
+	/* Power management */
+	WMI_TRAFFIC_DEFERRAL_CMDID		= 0x904,
+	WMI_TRAFFIC_RESUME_CMDID		= 0x905,
 	/* P2P */
 	WMI_P2P_CFG_CMDID			= 0x910,
 	WMI_PORT_ALLOCATE_CMDID			= 0x911,
@@ -150,6 +167,26 @@
 	WMI_PCP_START_CMDID			= 0x918,
 	WMI_PCP_STOP_CMDID			= 0x919,
 	WMI_GET_PCP_FACTOR_CMDID		= 0x91B,
+	/* Power Save Configuration Commands */
+	WMI_PS_DEV_PROFILE_CFG_CMDID		= 0x91C,
+	/* Not supported yet */
+	WMI_PS_DEV_CFG_CMDID			= 0x91D,
+	/* Not supported yet */
+	WMI_PS_DEV_CFG_READ_CMDID		= 0x91E,
+	/* Per MAC Power Save Configuration commands
+	 * Not supported yet
+	 */
+	WMI_PS_MID_CFG_CMDID			= 0x91F,
+	/* Not supported yet */
+	WMI_PS_MID_CFG_READ_CMDID		= 0x920,
+	WMI_RS_CFG_CMDID			= 0x921,
+	WMI_GET_DETAILED_RS_RES_CMDID		= 0x922,
+	WMI_AOA_MEAS_CMDID			= 0x923,
+	WMI_TOF_SESSION_START_CMDID		= 0x991,
+	WMI_TOF_GET_CAPABILITIES_CMDID		= 0x992,
+	WMI_TOF_SET_LCR_CMDID			= 0x993,
+	WMI_TOF_SET_LCI_CMDID			= 0x994,
+	WMI_TOF_CHANNEL_INFO_CMDID		= 0x995,
 	WMI_SET_MAC_ADDRESS_CMDID		= 0xF003,
 	WMI_ABORT_SCAN_CMDID			= 0xF007,
 	WMI_SET_PROMISCUOUS_MODE_CMDID		= 0xF041,
@@ -291,9 +328,8 @@
 /* WMI_START_SCAN_CMDID */
 struct wmi_start_scan_cmd {
 	u8 direct_scan_mac_addr[WMI_MAC_LEN];
-	/* DMG Beacon frame is transmitted during active scanning */
+	/* run scan with discovery beacon. Relevant for ACTIVE scan only. */
 	u8 discovery_mode;
-	/* reserved */
 	u8 reserved;
 	/* Max duration in the home channel(ms) */
 	__le32 dwell_time;
@@ -453,6 +489,12 @@
 	u8 reserved[3];
 } __packed;
 
+/* WMI_TRAFFIC_DEFERRAL_CMDID */
+struct wmi_traffic_deferral_cmd {
+	/* Bit vector: bit[0] - wake on Unicast, bit[1] - wake on Broadcast */
+	u8 wakeup_trigger;
+} __packed;
+
 /* WMI_P2P_CFG_CMDID */
 enum wmi_discovery_mode {
 	WMI_DISCOVERY_MODE_NON_OFFLOAD	= 0x00,
@@ -818,85 +860,193 @@
 	__le64 mem_base;
 } __packed;
 
+enum wmi_aoa_meas_type {
+	WMI_AOA_PHASE_MEAS	= 0x00,
+	WMI_AOA_PHASE_AMP_MEAS	= 0x01,
+};
+
+/* WMI_AOA_MEAS_CMDID */
+struct wmi_aoa_meas_cmd {
+	u8 mac_addr[WMI_MAC_LEN];
+	/* channels IDs:
+	 * 0 - 58320 MHz
+	 * 1 - 60480 MHz
+	 * 2 - 62640 MHz
+	 */
+	u8 channel;
+	/* enum wmi_aoa_meas_type */
+	u8 aoa_meas_type;
+	__le32 meas_rf_mask;
+} __packed;
+
+enum wmi_tof_burst_duration {
+	WMI_TOF_BURST_DURATION_250_USEC		= 2,
+	WMI_TOF_BURST_DURATION_500_USEC		= 3,
+	WMI_TOF_BURST_DURATION_1_MSEC		= 4,
+	WMI_TOF_BURST_DURATION_2_MSEC		= 5,
+	WMI_TOF_BURST_DURATION_4_MSEC		= 6,
+	WMI_TOF_BURST_DURATION_8_MSEC		= 7,
+	WMI_TOF_BURST_DURATION_16_MSEC		= 8,
+	WMI_TOF_BURST_DURATION_32_MSEC		= 9,
+	WMI_TOF_BURST_DURATION_64_MSEC		= 10,
+	WMI_TOF_BURST_DURATION_128_MSEC		= 11,
+	WMI_TOF_BURST_DURATION_NO_PREFERENCES	= 15,
+};
+
+enum wmi_tof_session_start_flags {
+	WMI_TOF_SESSION_START_FLAG_SECURED	= 0x1,
+	WMI_TOF_SESSION_START_FLAG_ASAP		= 0x2,
+	WMI_TOF_SESSION_START_FLAG_LCI_REQ	= 0x4,
+	WMI_TOF_SESSION_START_FLAG_LCR_REQ	= 0x8,
+};
+
+/* WMI_TOF_SESSION_START_CMDID */
+struct wmi_ftm_dest_info {
+	u8 channel;
+	/* wmi_tof_session_start_flags_e */
+	u8 flags;
+	u8 initial_token;
+	u8 num_of_ftm_per_burst;
+	u8 num_of_bursts_exp;
+	/* wmi_tof_burst_duration_e */
+	u8 burst_duration;
+	/* Burst Period indicate interval between two consecutive burst
+	 * instances, in units of 100 ms
+	 */
+	__le16 burst_period;
+	u8 dst_mac[WMI_MAC_LEN];
+	__le16 reserved;
+} __packed;
+
+/* WMI_TOF_SESSION_START_CMDID */
+struct wmi_tof_session_start_cmd {
+	__le32 session_id;
+	u8 num_of_aoa_measures;
+	u8 aoa_type;
+	__le16 num_of_dest;
+	u8 reserved[4];
+	struct wmi_ftm_dest_info ftm_dest_info[0];
+} __packed;
+
+enum wmi_tof_channel_info_report_type {
+	WMI_TOF_CHANNEL_INFO_TYPE_CIR			= 0x1,
+	WMI_TOF_CHANNEL_INFO_TYPE_RSSI			= 0x2,
+	WMI_TOF_CHANNEL_INFO_TYPE_SNR			= 0x4,
+	WMI_TOF_CHANNEL_INFO_TYPE_DEBUG_DATA		= 0x8,
+	WMI_TOF_CHANNEL_INFO_TYPE_VENDOR_SPECIFIC	= 0x10,
+};
+
+/* WMI_TOF_CHANNEL_INFO_CMDID */
+struct wmi_tof_channel_info_cmd {
+	/* wmi_tof_channel_info_report_type_e */
+	__le32 channel_info_report_request;
+} __packed;
+
 /* WMI Events
  * List of Events (target to host)
  */
 enum wmi_event_id {
-	WMI_READY_EVENTID			= 0x1001,
-	WMI_CONNECT_EVENTID			= 0x1002,
-	WMI_DISCONNECT_EVENTID			= 0x1003,
-	WMI_SCAN_COMPLETE_EVENTID		= 0x100A,
-	WMI_REPORT_STATISTICS_EVENTID		= 0x100B,
-	WMI_RD_MEM_RSP_EVENTID			= 0x1800,
-	WMI_FW_READY_EVENTID			= 0x1801,
-	WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID	= 0x200,
-	WMI_ECHO_RSP_EVENTID			= 0x1803,
-	WMI_FS_TUNE_DONE_EVENTID		= 0x180A,
-	WMI_CORR_MEASURE_EVENTID		= 0x180B,
-	WMI_READ_RSSI_EVENTID			= 0x180C,
-	WMI_TEMP_SENSE_DONE_EVENTID		= 0x180E,
-	WMI_DC_CALIB_DONE_EVENTID		= 0x180F,
-	WMI_IQ_TX_CALIB_DONE_EVENTID		= 0x1811,
-	WMI_IQ_RX_CALIB_DONE_EVENTID		= 0x1812,
-	WMI_SET_WORK_MODE_DONE_EVENTID		= 0x1815,
-	WMI_LO_LEAKAGE_CALIB_DONE_EVENTID	= 0x1816,
-	WMI_MARLON_R_READ_DONE_EVENTID		= 0x1818,
-	WMI_MARLON_R_WRITE_DONE_EVENTID		= 0x1819,
-	WMI_MARLON_R_TXRX_SEL_DONE_EVENTID	= 0x181A,
-	WMI_SILENT_RSSI_CALIB_DONE_EVENTID	= 0x181D,
-	WMI_RF_RX_TEST_DONE_EVENTID		= 0x181E,
-	WMI_CFG_RX_CHAIN_DONE_EVENTID		= 0x1820,
-	WMI_VRING_CFG_DONE_EVENTID		= 0x1821,
-	WMI_BA_STATUS_EVENTID			= 0x1823,
-	WMI_RCP_ADDBA_REQ_EVENTID		= 0x1824,
-	WMI_RCP_ADDBA_RESP_SENT_EVENTID		= 0x1825,
-	WMI_DELBA_EVENTID			= 0x1826,
-	WMI_GET_SSID_EVENTID			= 0x1828,
-	WMI_GET_PCP_CHANNEL_EVENTID		= 0x182A,
-	WMI_SW_TX_COMPLETE_EVENTID		= 0x182B,
-	WMI_READ_MAC_RXQ_EVENTID		= 0x1830,
-	WMI_READ_MAC_TXQ_EVENTID		= 0x1831,
-	WMI_WRITE_MAC_RXQ_EVENTID		= 0x1832,
-	WMI_WRITE_MAC_TXQ_EVENTID		= 0x1833,
-	WMI_WRITE_MAC_XQ_FIELD_EVENTID		= 0x1834,
-	WMI_BEAMFORMING_MGMT_DONE_EVENTID	= 0x1836,
-	WMI_BF_TXSS_MGMT_DONE_EVENTID		= 0x1837,
-	WMI_BF_RXSS_MGMT_DONE_EVENTID		= 0x1839,
-	WMI_RS_MGMT_DONE_EVENTID		= 0x1852,
-	WMI_RF_MGMT_STATUS_EVENTID		= 0x1853,
-	WMI_THERMAL_THROTTLING_STATUS_EVENTID	= 0x1855,
-	WMI_BF_SM_MGMT_DONE_EVENTID		= 0x1838,
-	WMI_RX_MGMT_PACKET_EVENTID		= 0x1840,
-	WMI_TX_MGMT_PACKET_EVENTID		= 0x1841,
-	WMI_OTP_READ_RESULT_EVENTID		= 0x1856,
-	WMI_LED_CFG_DONE_EVENTID		= 0x1858,
+	WMI_READY_EVENTID				= 0x1001,
+	WMI_CONNECT_EVENTID				= 0x1002,
+	WMI_DISCONNECT_EVENTID				= 0x1003,
+	WMI_SCAN_COMPLETE_EVENTID			= 0x100A,
+	WMI_REPORT_STATISTICS_EVENTID			= 0x100B,
+	WMI_RD_MEM_RSP_EVENTID				= 0x1800,
+	WMI_FW_READY_EVENTID				= 0x1801,
+	WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID		= 0x200,
+	WMI_ECHO_RSP_EVENTID				= 0x1803,
+	WMI_FS_TUNE_DONE_EVENTID			= 0x180A,
+	WMI_CORR_MEASURE_EVENTID			= 0x180B,
+	WMI_READ_RSSI_EVENTID				= 0x180C,
+	WMI_TEMP_SENSE_DONE_EVENTID			= 0x180E,
+	WMI_DC_CALIB_DONE_EVENTID			= 0x180F,
+	WMI_IQ_TX_CALIB_DONE_EVENTID			= 0x1811,
+	WMI_IQ_RX_CALIB_DONE_EVENTID			= 0x1812,
+	WMI_SET_WORK_MODE_DONE_EVENTID			= 0x1815,
+	WMI_LO_LEAKAGE_CALIB_DONE_EVENTID		= 0x1816,
+	WMI_MARLON_R_READ_DONE_EVENTID			= 0x1818,
+	WMI_MARLON_R_WRITE_DONE_EVENTID			= 0x1819,
+	WMI_MARLON_R_TXRX_SEL_DONE_EVENTID		= 0x181A,
+	WMI_SILENT_RSSI_CALIB_DONE_EVENTID		= 0x181D,
+	WMI_RF_RX_TEST_DONE_EVENTID			= 0x181E,
+	WMI_CFG_RX_CHAIN_DONE_EVENTID			= 0x1820,
+	WMI_VRING_CFG_DONE_EVENTID			= 0x1821,
+	WMI_BA_STATUS_EVENTID				= 0x1823,
+	WMI_RCP_ADDBA_REQ_EVENTID			= 0x1824,
+	WMI_RCP_ADDBA_RESP_SENT_EVENTID			= 0x1825,
+	WMI_DELBA_EVENTID				= 0x1826,
+	WMI_GET_SSID_EVENTID				= 0x1828,
+	WMI_GET_PCP_CHANNEL_EVENTID			= 0x182A,
+	WMI_SW_TX_COMPLETE_EVENTID			= 0x182B,
+	WMI_READ_MAC_RXQ_EVENTID			= 0x1830,
+	WMI_READ_MAC_TXQ_EVENTID			= 0x1831,
+	WMI_WRITE_MAC_RXQ_EVENTID			= 0x1832,
+	WMI_WRITE_MAC_TXQ_EVENTID			= 0x1833,
+	WMI_WRITE_MAC_XQ_FIELD_EVENTID			= 0x1834,
+	WMI_BEAMFORMING_MGMT_DONE_EVENTID		= 0x1836,
+	WMI_BF_TXSS_MGMT_DONE_EVENTID			= 0x1837,
+	WMI_BF_RXSS_MGMT_DONE_EVENTID			= 0x1839,
+	WMI_RS_MGMT_DONE_EVENTID			= 0x1852,
+	WMI_RF_MGMT_STATUS_EVENTID			= 0x1853,
+	WMI_THERMAL_THROTTLING_STATUS_EVENTID		= 0x1855,
+	WMI_BF_SM_MGMT_DONE_EVENTID			= 0x1838,
+	WMI_RX_MGMT_PACKET_EVENTID			= 0x1840,
+	WMI_TX_MGMT_PACKET_EVENTID			= 0x1841,
+	WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID	= 0x1842,
+	WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENTID		= 0x1843,
+	WMI_OTP_READ_RESULT_EVENTID			= 0x1856,
+	WMI_LED_CFG_DONE_EVENTID			= 0x1858,
 	/* Performance monitoring events */
-	WMI_DATA_PORT_OPEN_EVENTID		= 0x1860,
-	WMI_WBE_LINK_DOWN_EVENTID		= 0x1861,
-	WMI_BF_CTRL_DONE_EVENTID		= 0x1862,
-	WMI_NOTIFY_REQ_DONE_EVENTID		= 0x1863,
-	WMI_GET_STATUS_DONE_EVENTID		= 0x1864,
-	WMI_VRING_EN_EVENTID			= 0x1865,
-	WMI_UNIT_TEST_EVENTID			= 0x1900,
-	WMI_FLASH_READ_DONE_EVENTID		= 0x1902,
-	WMI_FLASH_WRITE_DONE_EVENTID		= 0x1903,
+	WMI_DATA_PORT_OPEN_EVENTID			= 0x1860,
+	WMI_WBE_LINK_DOWN_EVENTID			= 0x1861,
+	WMI_BF_CTRL_DONE_EVENTID			= 0x1862,
+	WMI_NOTIFY_REQ_DONE_EVENTID			= 0x1863,
+	WMI_GET_STATUS_DONE_EVENTID			= 0x1864,
+	WMI_VRING_EN_EVENTID				= 0x1865,
+	WMI_GET_RF_STATUS_EVENTID			= 0x1866,
+	WMI_GET_BASEBAND_TYPE_EVENTID			= 0x1867,
+	WMI_UNIT_TEST_EVENTID				= 0x1900,
+	WMI_FLASH_READ_DONE_EVENTID			= 0x1902,
+	WMI_FLASH_WRITE_DONE_EVENTID			= 0x1903,
+	/* Power management */
+	WMI_TRAFFIC_DEFERRAL_EVENTID			= 0x1904,
+	WMI_TRAFFIC_RESUME_EVENTID			= 0x1905,
 	/* P2P */
-	WMI_P2P_CFG_DONE_EVENTID		= 0x1910,
-	WMI_PORT_ALLOCATED_EVENTID		= 0x1911,
-	WMI_PORT_DELETED_EVENTID		= 0x1912,
-	WMI_LISTEN_STARTED_EVENTID		= 0x1914,
-	WMI_SEARCH_STARTED_EVENTID		= 0x1915,
-	WMI_DISCOVERY_STARTED_EVENTID		= 0x1916,
-	WMI_DISCOVERY_STOPPED_EVENTID		= 0x1917,
-	WMI_PCP_STARTED_EVENTID			= 0x1918,
-	WMI_PCP_STOPPED_EVENTID			= 0x1919,
-	WMI_PCP_FACTOR_EVENTID			= 0x191A,
-	WMI_SET_CHANNEL_EVENTID			= 0x9000,
-	WMI_ASSOC_REQ_EVENTID			= 0x9001,
-	WMI_EAPOL_RX_EVENTID			= 0x9002,
-	WMI_MAC_ADDR_RESP_EVENTID		= 0x9003,
-	WMI_FW_VER_EVENTID			= 0x9004,
-	WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID	= 0x9005,
+	WMI_P2P_CFG_DONE_EVENTID			= 0x1910,
+	WMI_PORT_ALLOCATED_EVENTID			= 0x1911,
+	WMI_PORT_DELETED_EVENTID			= 0x1912,
+	WMI_LISTEN_STARTED_EVENTID			= 0x1914,
+	WMI_SEARCH_STARTED_EVENTID			= 0x1915,
+	WMI_DISCOVERY_STARTED_EVENTID			= 0x1916,
+	WMI_DISCOVERY_STOPPED_EVENTID			= 0x1917,
+	WMI_PCP_STARTED_EVENTID				= 0x1918,
+	WMI_PCP_STOPPED_EVENTID				= 0x1919,
+	WMI_PCP_FACTOR_EVENTID				= 0x191A,
+	/* Power Save Configuration Events */
+	WMI_PS_DEV_PROFILE_CFG_EVENTID			= 0x191C,
+	/* Not supported yet */
+	WMI_PS_DEV_CFG_EVENTID				= 0x191D,
+	/* Not supported yet */
+	WMI_PS_DEV_CFG_READ_EVENTID			= 0x191E,
+	/* Not supported yet */
+	WMI_PS_MID_CFG_EVENTID				= 0x191F,
+	/* Not supported yet */
+	WMI_PS_MID_CFG_READ_EVENTID			= 0x1920,
+	WMI_RS_CFG_DONE_EVENTID				= 0x1921,
+	WMI_GET_DETAILED_RS_RES_EVENTID			= 0x1922,
+	WMI_AOA_MEAS_EVENTID				= 0x1923,
+	WMI_TOF_SESSION_END_EVENTID			= 0x1991,
+	WMI_TOF_GET_CAPABILITIES_EVENTID		= 0x1992,
+	WMI_TOF_SET_LCR_EVENTID				= 0x1993,
+	WMI_TOF_SET_LCI_EVENTID				= 0x1994,
+	WMI_TOF_FTM_PER_DEST_RES_EVENTID		= 0x1995,
+	WMI_TOF_CHANNEL_INFO_EVENTID			= 0x1996,
+	WMI_SET_CHANNEL_EVENTID				= 0x9000,
+	WMI_ASSOC_REQ_EVENTID				= 0x9001,
+	WMI_EAPOL_RX_EVENTID				= 0x9002,
+	WMI_MAC_ADDR_RESP_EVENTID			= 0x9003,
+	WMI_FW_VER_EVENTID				= 0x9004,
+	WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID		= 0x9005,
 };
 
 /* Events data structures */
@@ -943,10 +1093,85 @@
 
 /* WMI_FW_VER_EVENTID */
 struct wmi_fw_ver_event {
-	u8 major;
-	u8 minor;
-	__le16 subminor;
-	__le16 build;
+	/* FW image version */
+	__le32 fw_major;
+	__le32 fw_minor;
+	__le32 fw_subminor;
+	__le32 fw_build;
+	/* FW image build time stamp */
+	__le32 hour;
+	__le32 minute;
+	__le32 second;
+	__le32 day;
+	__le32 month;
+	__le32 year;
+	/* Boot Loader image version */
+	__le32 bl_major;
+	__le32 bl_minor;
+	__le32 bl_subminor;
+	__le32 bl_build;
+	/* The number of entries in the FW capabilies array */
+	u8 fw_capabilities_len;
+	u8 reserved[3];
+	/* FW capabilities info
+	 * Must be the last member of the struct
+	 */
+	__le32 fw_capabilities[0];
+} __packed;
+
+/* WMI_GET_RF_STATUS_EVENTID */
+enum rf_type {
+	RF_UNKNOWN	= 0x00,
+	RF_MARLON	= 0x01,
+	RF_SPARROW	= 0x02,
+};
+
+/* WMI_GET_RF_STATUS_EVENTID */
+enum board_file_rf_type {
+	BF_RF_MARLON	= 0x00,
+	BF_RF_SPARROW	= 0x01,
+};
+
+/* WMI_GET_RF_STATUS_EVENTID */
+enum rf_status {
+	RF_OK			= 0x00,
+	RF_NO_COMM		= 0x01,
+	RF_WRONG_BOARD_FILE	= 0x02,
+};
+
+/* WMI_GET_RF_STATUS_EVENTID */
+struct wmi_get_rf_status_event {
+	/* enum rf_type */
+	__le32 rf_type;
+	/* attached RFs bit vector */
+	__le32 attached_rf_vector;
+	/* enabled RFs bit vector */
+	__le32 enabled_rf_vector;
+	/* enum rf_status, refers to enabled RFs */
+	u8 rf_status[32];
+	/* enum board file RF type */
+	__le32 board_file_rf_type;
+	/* board file platform type */
+	__le32 board_file_platform_type;
+	/* board file version */
+	__le32 board_file_version;
+	__le32 reserved[2];
+} __packed;
+
+/* WMI_GET_BASEBAND_TYPE_EVENTID */
+enum baseband_type {
+	BASEBAND_UNKNOWN	= 0x00,
+	BASEBAND_SPARROW_M_A0	= 0x03,
+	BASEBAND_SPARROW_M_A1	= 0x04,
+	BASEBAND_SPARROW_M_B0	= 0x05,
+	BASEBAND_SPARROW_M_C0	= 0x06,
+	BASEBAND_SPARROW_M_D0	= 0x07,
+};
+
+/* WMI_GET_BASEBAND_TYPE_EVENTID */
+struct wmi_get_baseband_type_event {
+	/* enum baseband_type */
+	__le32 baseband_type;
 } __packed;
 
 /* WMI_MAC_ADDR_RESP_EVENTID */
@@ -1410,4 +1635,553 @@
 	__le32 status;
 } __packed;
 
+#define WMI_NUM_MCS	(13)
+
+/* Rate search parameters configuration per connection */
+struct wmi_rs_cfg {
+	/* The maximal allowed PER for each MCS
+	 * MCS will be considered as failed if PER during RS is higher
+	 */
+	u8 per_threshold[WMI_NUM_MCS];
+	/* Number of MPDUs for each MCS
+	 * this is the minimal statistic required to make an educated
+	 * decision
+	 */
+	u8 min_frame_cnt[WMI_NUM_MCS];
+	/* stop threshold [0-100] */
+	u8 stop_th;
+	/* MCS1 stop threshold [0-100] */
+	u8 mcs1_fail_th;
+	u8 max_back_failure_th;
+	/* Debug feature for disabling internal RS trigger (which is
+	 * currently triggered by BF Done)
+	 */
+	u8 dbg_disable_internal_trigger;
+	__le32 back_failure_mask;
+	__le32 mcs_en_vec;
+} __packed;
+
+/* WMI_RS_CFG_CMDID */
+struct wmi_rs_cfg_cmd {
+	/* connection id */
+	u8 cid;
+	/* enable or disable rate search */
+	u8 rs_enable;
+	/* rate search configuration */
+	struct wmi_rs_cfg rs_cfg;
+} __packed;
+
+/* WMI_RS_CFG_DONE_EVENTID */
+struct wmi_rs_cfg_done_event {
+	u8 cid;
+	/* enum wmi_fw_status */
+	u8 status;
+	u8 reserved[2];
+} __packed;
+
+/* WMI_GET_DETAILED_RS_RES_CMDID */
+struct wmi_get_detailed_rs_res_cmd {
+	/* connection id */
+	u8 cid;
+	u8 reserved[3];
+} __packed;
+
+/* RS results status */
+enum wmi_rs_results_status {
+	WMI_RS_RES_VALID	= 0x00,
+	WMI_RS_RES_INVALID	= 0x01,
+};
+
+/* Rate search results */
+struct wmi_rs_results {
+	/* number of sent MPDUs */
+	u8 num_of_tx_pkt[WMI_NUM_MCS];
+	/* number of non-acked MPDUs */
+	u8 num_of_non_acked_pkt[WMI_NUM_MCS];
+	/* RS timestamp */
+	__le32 tsf;
+	/* RS selected MCS */
+	u8 mcs;
+} __packed;
+
+/* WMI_GET_DETAILED_RS_RES_EVENTID */
+struct wmi_get_detailed_rs_res_event {
+	u8 cid;
+	/* enum wmi_rs_results_status */
+	u8 status;
+	/* detailed rs results */
+	struct wmi_rs_results rs_results;
+	u8 reserved[3];
+} __packed;
+
+/* broadcast connection ID */
+#define WMI_LINK_MAINTAIN_CFG_CID_BROADCAST	(0xFFFFFFFF)
+
+/* Types wmi_link_maintain_cfg presets for WMI_LINK_MAINTAIN_CFG_WRITE_CMD */
+enum wmi_link_maintain_cfg_type {
+	/* AP/PCP default normal (non-FST) configuration settings */
+	WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_NORMAL_AP	= 0x00,
+	/* AP/PCP  default FST configuration settings */
+	WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_FST_AP	= 0x01,
+	/* STA default normal (non-FST) configuration settings */
+	WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_NORMAL_STA	= 0x02,
+	/* STA default FST configuration settings */
+	WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_FST_STA	= 0x03,
+	/* custom configuration settings */
+	WMI_LINK_MAINTAIN_CFG_TYPE_CUSTOM		= 0x04,
+	/* number of defined configuration types */
+	WMI_LINK_MAINTAIN_CFG_TYPES_NUM			= 0x05,
+};
+
+/* Response status codes for WMI_LINK_MAINTAIN_CFG_WRITE/READ commands */
+enum wmi_link_maintain_cfg_response_status {
+	/* WMI_LINK_MAINTAIN_CFG_WRITE/READ command successfully accomplished
+	 */
+	WMI_LINK_MAINTAIN_CFG_RESPONSE_STATUS_OK		= 0x00,
+	/* ERROR due to bad argument in WMI_LINK_MAINTAIN_CFG_WRITE/READ
+	 * command request
+	 */
+	WMI_LINK_MAINTAIN_CFG_RESPONSE_STATUS_BAD_ARGUMENT	= 0x01,
+};
+
+/* Link Loss and Keep Alive configuration */
+struct wmi_link_maintain_cfg {
+	/* link_loss_enable_detectors_vec */
+	__le32 link_loss_enable_detectors_vec;
+	/* detectors check period usec */
+	__le32 check_link_loss_period_usec;
+	/* max allowed tx ageing */
+	__le32 tx_ageing_threshold_usec;
+	/* keep alive period for high SNR */
+	__le32 keep_alive_period_usec_high_snr;
+	/* keep alive period for low SNR */
+	__le32 keep_alive_period_usec_low_snr;
+	/* lower snr limit for keep alive period update */
+	__le32 keep_alive_snr_threshold_low_db;
+	/* upper snr limit for keep alive period update */
+	__le32 keep_alive_snr_threshold_high_db;
+	/* num of successive bad bcons causing link-loss */
+	__le32 bad_beacons_num_threshold;
+	/* SNR limit for bad_beacons_detector */
+	__le32 bad_beacons_snr_threshold_db;
+} __packed;
+
+/* WMI_LINK_MAINTAIN_CFG_WRITE_CMDID */
+struct wmi_link_maintain_cfg_write_cmd {
+	/* enum wmi_link_maintain_cfg_type_e - type of requested default
+	 * configuration to be applied
+	 */
+	__le32 cfg_type;
+	/* requested connection ID or WMI_LINK_MAINTAIN_CFG_CID_BROADCAST */
+	__le32 cid;
+	/* custom configuration settings to be applied (relevant only if
+	 * cfg_type==WMI_LINK_MAINTAIN_CFG_TYPE_CUSTOM)
+	 */
+	struct wmi_link_maintain_cfg lm_cfg;
+} __packed;
+
+/* WMI_LINK_MAINTAIN_CFG_READ_CMDID */
+struct wmi_link_maintain_cfg_read_cmd {
+	/* connection ID which configuration settings are requested */
+	__le32 cid;
+} __packed;
+
+/* WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID */
+struct wmi_link_maintain_cfg_write_done_event {
+	/* requested connection ID */
+	__le32 cid;
+	/* wmi_link_maintain_cfg_response_status_e - write status */
+	__le32 status;
+} __packed;
+
+/* \WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENT */
+struct wmi_link_maintain_cfg_read_done_event {
+	/* requested connection ID */
+	__le32 cid;
+	/* wmi_link_maintain_cfg_response_status_e - read status */
+	__le32 status;
+	/* Retrieved configuration settings */
+	struct wmi_link_maintain_cfg lm_cfg;
+} __packed;
+
+enum wmi_traffic_deferral_status {
+	WMI_TRAFFIC_DEFERRAL_APPROVED	= 0x0,
+	WMI_TRAFFIC_DEFERRAL_REJECTED	= 0x1,
+};
+
+/* WMI_TRAFFIC_DEFERRAL_EVENTID */
+struct wmi_traffic_deferral_event {
+	/* enum wmi_traffic_deferral_status_e */
+	u8 status;
+} __packed;
+
+enum wmi_traffic_resume_status {
+	WMI_TRAFFIC_RESUME_SUCCESS	= 0x0,
+	WMI_TRAFFIC_RESUME_FAILED	= 0x1,
+};
+
+/* WMI_TRAFFIC_RESUME_EVENTID */
+struct wmi_traffic_resume_event {
+	/* enum wmi_traffic_resume_status_e */
+	u8 status;
+} __packed;
+
+/* Power Save command completion status codes */
+enum wmi_ps_cfg_cmd_status {
+	WMI_PS_CFG_CMD_STATUS_SUCCESS	= 0x00,
+	WMI_PS_CFG_CMD_STATUS_BAD_PARAM	= 0x01,
+	/* other error */
+	WMI_PS_CFG_CMD_STATUS_ERROR	= 0x02,
+};
+
+/* Device Power Save Profiles */
+enum wmi_ps_profile_type {
+	WMI_PS_PROFILE_TYPE_DEFAULT		= 0x00,
+	WMI_PS_PROFILE_TYPE_PS_DISABLED		= 0x01,
+	WMI_PS_PROFILE_TYPE_MAX_PS		= 0x02,
+	WMI_PS_PROFILE_TYPE_LOW_LATENCY_PS	= 0x03,
+};
+
+/* WMI_PS_DEV_PROFILE_CFG_CMDID
+ *
+ * Power save profile to be used by the device
+ *
+ * Returned event:
+ * - WMI_PS_DEV_PROFILE_CFG_EVENTID
+ */
+struct wmi_ps_dev_profile_cfg_cmd {
+	/* wmi_ps_profile_type_e */
+	u8 ps_profile;
+	u8 reserved[3];
+} __packed;
+
+/* WMI_PS_DEV_PROFILE_CFG_EVENTID */
+struct wmi_ps_dev_profile_cfg_event {
+	/* wmi_ps_cfg_cmd_status_e */
+	__le32 status;
+} __packed;
+
+enum wmi_ps_level {
+	WMI_PS_LEVEL_DEEP_SLEEP		= 0x00,
+	WMI_PS_LEVEL_SHALLOW_SLEEP	= 0x01,
+	/* awake = all PS mechanisms are disabled */
+	WMI_PS_LEVEL_AWAKE		= 0x02,
+};
+
+enum wmi_ps_deep_sleep_clk_level {
+	/* 33k */
+	WMI_PS_DEEP_SLEEP_CLK_LEVEL_RTC		= 0x00,
+	/* 10k */
+	WMI_PS_DEEP_SLEEP_CLK_LEVEL_OSC		= 0x01,
+	/* @RTC Low latency */
+	WMI_PS_DEEP_SLEEP_CLK_LEVEL_RTC_LT	= 0x02,
+	WMI_PS_DEEP_SLEEP_CLK_LEVEL_XTAL	= 0x03,
+	WMI_PS_DEEP_SLEEP_CLK_LEVEL_SYSCLK	= 0x04,
+	/* Not Applicable */
+	WMI_PS_DEEP_SLEEP_CLK_LEVEL_N_A		= 0xFF,
+};
+
+/* Response by the FW to a D3 entry request */
+enum wmi_ps_d3_resp_policy {
+	WMI_PS_D3_RESP_POLICY_DEFAULT	= 0x00,
+	/* debug -D3 req is always denied */
+	WMI_PS_D3_RESP_POLICY_DENIED	= 0x01,
+	/* debug -D3 req is always approved */
+	WMI_PS_D3_RESP_POLICY_APPROVED	= 0x02,
+};
+
+/* Device common power save configurations */
+struct wmi_ps_dev_cfg {
+	/* lowest level of PS allowed while unassociated, enum wmi_ps_level_e
+	 */
+	u8 ps_unassoc_min_level;
+	/* lowest deep sleep clock level while nonassoc, enum
+	 * wmi_ps_deep_sleep_clk_level_e
+	 */
+	u8 ps_unassoc_deep_sleep_min_level;
+	/* lowest level of PS allowed while associated, enum wmi_ps_level_e */
+	u8 ps_assoc_min_level;
+	/* lowest deep sleep clock level while assoc, enum
+	 * wmi_ps_deep_sleep_clk_level_e
+	 */
+	u8 ps_assoc_deep_sleep_min_level;
+	/* enum wmi_ps_deep_sleep_clk_level_e */
+	u8 ps_assoc_low_latency_ds_min_level;
+	/* enum wmi_ps_d3_resp_policy_e */
+	u8 ps_D3_response_policy;
+	/* BOOL */
+	u8 ps_D3_pm_pme_enabled;
+	/* BOOL */
+	u8 ps_halp_enable;
+	u8 ps_deep_sleep_enter_thresh_msec;
+	/* BOOL */
+	u8 ps_voltage_scaling_en;
+} __packed;
+
+/* WMI_PS_DEV_CFG_CMDID
+ *
+ * Configure common Power Save parameters of the device and all MIDs.
+ *
+ * Returned event:
+ * - WMI_PS_DEV_CFG_EVENTID
+ */
+struct wmi_ps_dev_cfg_cmd {
+	/* Device Power Save configuration to be applied */
+	struct wmi_ps_dev_cfg ps_dev_cfg;
+	/* alignment to 32b */
+	u8 reserved[2];
+} __packed;
+
+/* WMI_PS_DEV_CFG_EVENTID */
+struct wmi_ps_dev_cfg_event {
+	/* wmi_ps_cfg_cmd_status_e */
+	__le32 status;
+} __packed;
+
+/* WMI_PS_DEV_CFG_READ_CMDID
+ *
+ * request to retrieve  device Power Save configuration
+ * (WMI_PS_DEV_CFG_CMD params)
+ *
+ * Returned event:
+ * - WMI_PS_DEV_CFG_READ_EVENTID
+ */
+struct wmi_ps_dev_cfg_read_cmd {
+	__le32 reserved;
+} __packed;
+
+/* WMI_PS_DEV_CFG_READ_EVENTID */
+struct wmi_ps_dev_cfg_read_event {
+	/* wmi_ps_cfg_cmd_status_e */
+	__le32 status;
+	/* Retrieved device Power Save configuration (WMI_PS_DEV_CFG_CMD
+	 * params)
+	 */
+	struct wmi_ps_dev_cfg dev_ps_cfg;
+	/* alignment to 32b */
+	u8 reserved[2];
+} __packed;
+
+/* Per Mac Power Save configurations */
+struct wmi_ps_mid_cfg {
+	/* Low power RX in BTI is enabled, BOOL */
+	u8 beacon_lprx_enable;
+	/* Sync to sector ID enabled, BOOL */
+	u8 beacon_sync_to_sectorId_enable;
+	/* Low power RX in DTI is enabled, BOOL */
+	u8 frame_exchange_lprx_enable;
+	/* Sleep Cycle while in scheduled PS, 1-31 */
+	u8 scheduled_sleep_cycle_pow2;
+	/* Stay Awake for k BIs every (sleep_cycle - k) BIs, 1-31 */
+	u8 scheduled_num_of_awake_bis;
+	u8 am_to_traffic_load_thresh_mbp;
+	u8 traffic_to_am_load_thresh_mbps;
+	u8 traffic_to_am_num_of_no_traffic_bis;
+	/* BOOL */
+	u8 continuous_traffic_psm;
+	__le16 no_traffic_to_min_usec;
+	__le16 no_traffic_to_max_usec;
+	__le16 snoozing_sleep_interval_milisec;
+	u8 max_no_data_awake_events;
+	/* Trigger WEB after k failed beacons */
+	u8 num_of_failed_beacons_rx_to_trigger_web;
+	/* Trigger BF after k failed beacons */
+	u8 num_of_failed_beacons_rx_to_trigger_bf;
+	/* Trigger SOB after k successful beacons */
+	u8 num_of_successful_beacons_rx_to_trigger_sob;
+} __packed;
+
+/* WMI_PS_MID_CFG_CMDID
+ *
+ * Configure Power Save parameters of a specific MID.
+ * These parameters are relevant for the specific BSS this MID belongs to.
+ *
+ * Returned event:
+ * - WMI_PS_MID_CFG_EVENTID
+ */
+struct wmi_ps_mid_cfg_cmd {
+	/* MAC ID */
+	u8 mid;
+	/* mid PS configuration to be applied */
+	struct wmi_ps_mid_cfg ps_mid_cfg;
+} __packed;
+
+/* WMI_PS_MID_CFG_EVENTID */
+struct wmi_ps_mid_cfg_event {
+	/* MAC ID */
+	u8 mid;
+	/* alignment to 32b */
+	u8 reserved[3];
+	/* wmi_ps_cfg_cmd_status_e */
+	__le32 status;
+} __packed;
+
+/* WMI_PS_MID_CFG_READ_CMDID
+ *
+ * request to retrieve Power Save configuration of mid
+ * (WMI_PS_MID_CFG_CMD params)
+ *
+ * Returned event:
+ * - WMI_PS_MID_CFG_READ_EVENTID
+ */
+struct wmi_ps_mid_cfg_read_cmd {
+	/* MAC ID */
+	u8 mid;
+	/* alignment to 32b */
+	u8 reserved[3];
+} __packed;
+
+/* WMI_PS_MID_CFG_READ_EVENTID */
+struct wmi_ps_mid_cfg_read_event {
+	/* MAC ID */
+	u8 mid;
+	/* Retrieved MID Power Save configuration(WMI_PS_MID_CFG_CMD params) */
+	struct wmi_ps_mid_cfg mid_ps_cfg;
+	/* wmi_ps_cfg_cmd_status_e */
+	__le32 status;
+} __packed;
+
+#define WMI_AOA_MAX_DATA_SIZE	(128)
+
+enum wmi_aoa_meas_status {
+	WMI_AOA_MEAS_SUCCESS		= 0x00,
+	WMI_AOA_MEAS_PEER_INCAPABLE	= 0x01,
+	WMI_AOA_MEAS_FAILURE		= 0x02,
+};
+
+/* WMI_AOA_MEAS_EVENTID */
+struct wmi_aoa_meas_event {
+	u8 mac_addr[WMI_MAC_LEN];
+	/* channels IDs:
+	 * 0 - 58320 MHz
+	 * 1 - 60480 MHz
+	 * 2 - 62640 MHz
+	 */
+	u8 channel;
+	/* enum wmi_aoa_meas_type */
+	u8 aoa_meas_type;
+	/* Measurments are from RFs, defined by the mask */
+	__le32 meas_rf_mask;
+	/* enum wmi_aoa_meas_status */
+	u8 meas_status;
+	u8 reserved;
+	/* Length of meas_data in bytes */
+	__le16 length;
+	u8 meas_data[WMI_AOA_MAX_DATA_SIZE];
+} __packed;
+
+/* WMI_TOF_GET_CAPABILITIES_EVENTID */
+struct wmi_tof_get_capabilities_event {
+	u8 ftm_capability;
+	/* maximum supported number of destination to start TOF */
+	u8 max_num_of_dest;
+	/* maximum supported number of measurements per burst */
+	u8 max_num_of_meas_per_burst;
+	u8 reserved;
+	/* maximum supported multi bursts */
+	__le16 max_multi_bursts_sessions;
+	/* maximum supported FTM burst duration , wmi_tof_burst_duration_e */
+	__le16 max_ftm_burst_duration;
+	/* AOA supported types */
+	__le32 aoa_supported_types;
+} __packed;
+
+enum wmi_tof_session_end_status {
+	WMI_TOF_SESSION_END_NO_ERROR		= 0x00,
+	WMI_TOF_SESSION_END_FAIL		= 0x01,
+	WMI_TOF_SESSION_END_PARAMS_ERROR	= 0x02,
+	WMI_TOF_SESSION_END_ABORTED		= 0x03,
+};
+
+/* WMI_TOF_SESSION_END_EVENTID */
+struct wmi_tof_session_end_event {
+	/* FTM session ID */
+	__le32 session_id;
+	/* wmi_tof_session_end_status_e */
+	u8 status;
+	u8 reserved[3];
+} __packed;
+
+/* Responder FTM Results */
+struct wmi_responder_ftm_res {
+	u8 t1[6];
+	u8 t2[6];
+	u8 t3[6];
+	u8 t4[6];
+	__le16 tod_err;
+	__le16 toa_err;
+	__le16 tod_err_initiator;
+	__le16 toa_err_initiator;
+} __packed;
+
+enum wmi_tof_ftm_per_dest_res_status {
+	WMI_PER_DEST_RES_NO_ERROR		= 0x00,
+	WMI_PER_DEST_RES_TX_RX_FAIL		= 0x01,
+	WMI_PER_DEST_RES_PARAM_DONT_MATCH	= 0x02,
+};
+
+enum wmi_tof_ftm_per_dest_res_flags {
+	WMI_PER_DEST_RES_REQ_START		= 0x01,
+	WMI_PER_DEST_RES_BURST_REPORT_END	= 0x02,
+	WMI_PER_DEST_RES_REQ_END		= 0x04,
+	WMI_PER_DEST_RES_PARAM_UPDATE		= 0x08,
+};
+
+/* WMI_TOF_FTM_PER_DEST_RES_EVENTID */
+struct wmi_tof_ftm_per_dest_res_event {
+	/* FTM session ID */
+	__le32 session_id;
+	/* destination MAC address */
+	u8 dst_mac[WMI_MAC_LEN];
+	/* wmi_tof_ftm_per_dest_res_flags_e */
+	u8 flags;
+	/* wmi_tof_ftm_per_dest_res_status_e */
+	u8 status;
+	/* responder ASAP */
+	u8 responder_asap;
+	/* responder number of FTM per burst */
+	u8 responder_num_ftm_per_burst;
+	/* responder number of FTM burst exponent */
+	u8 responder_num_ftm_bursts_exp;
+	/* responder burst duration ,wmi_tof_burst_duration_e */
+	u8 responder_burst_duration;
+	/* responder burst period, indicate interval between two consecutive
+	 * burst instances, in units of 100 ms
+	 */
+	__le16 responder_burst_period;
+	/* receive burst counter */
+	__le16 bursts_cnt;
+	/* tsf of responder start burst */
+	__le32 tsf_sync;
+	/* actual received ftm per burst */
+	u8 actual_ftm_per_burst;
+	u8 reserved0[7];
+	struct wmi_responder_ftm_res responder_ftm_res[0];
+} __packed;
+
+enum wmi_tof_channel_info_type {
+	WMI_TOF_CHANNEL_INFO_AOA		= 0x00,
+	WMI_TOF_CHANNEL_INFO_LCI		= 0x01,
+	WMI_TOF_CHANNEL_INFO_LCR		= 0x02,
+	WMI_TOF_CHANNEL_INFO_VENDOR_SPECIFIC	= 0x03,
+	WMI_TOF_CHANNEL_INFO_CIR		= 0x04,
+	WMI_TOF_CHANNEL_INFO_RSSI		= 0x05,
+	WMI_TOF_CHANNEL_INFO_SNR		= 0x06,
+	WMI_TOF_CHANNEL_INFO_DEBUG		= 0x07,
+};
+
+/* WMI_TOF_CHANNEL_INFO_EVENTID */
+struct wmi_tof_channel_info_event {
+	/* FTM session ID */
+	__le32 session_id;
+	/* destination MAC address */
+	u8 dst_mac[WMI_MAC_LEN];
+	/* wmi_tof_channel_info_type_e */
+	u8 type;
+	/* data report length */
+	u8 len;
+	/* data report payload */
+	u8 report[0];
+} __packed;
+
 #endif /* __WILOCITY_WMI_H__ */
diff --git a/drivers/net/wireless/broadcom/b43/debugfs.c b/drivers/net/wireless/broadcom/b43/debugfs.c
index b4bcd94..7704638 100644
--- a/drivers/net/wireless/broadcom/b43/debugfs.c
+++ b/drivers/net/wireless/broadcom/b43/debugfs.c
@@ -524,7 +524,8 @@
 		goto out_unlock;
 	}
 
-	dfops = container_of(file->f_op, struct b43_debugfs_fops, fops);
+	dfops = container_of(debugfs_real_fops(file),
+			     struct b43_debugfs_fops, fops);
 	if (!dfops->read) {
 		err = -ENOSYS;
 		goto out_unlock;
@@ -585,7 +586,8 @@
 		goto out_unlock;
 	}
 
-	dfops = container_of(file->f_op, struct b43_debugfs_fops, fops);
+	dfops = container_of(debugfs_real_fops(file),
+			     struct b43_debugfs_fops, fops);
 	if (!dfops->write) {
 		err = -ENOSYS;
 		goto out_unlock;
diff --git a/drivers/net/wireless/broadcom/b43legacy/debugfs.c b/drivers/net/wireless/broadcom/b43legacy/debugfs.c
index 090910e..82ef56e 100644
--- a/drivers/net/wireless/broadcom/b43legacy/debugfs.c
+++ b/drivers/net/wireless/broadcom/b43legacy/debugfs.c
@@ -221,7 +221,8 @@
 		goto out_unlock;
 	}
 
-	dfops = container_of(file->f_op, struct b43legacy_debugfs_fops, fops);
+	dfops = container_of(debugfs_real_fops(file),
+			     struct b43legacy_debugfs_fops, fops);
 	if (!dfops->read) {
 		err = -ENOSYS;
 		goto out_unlock;
@@ -287,7 +288,8 @@
 		goto out_unlock;
 	}
 
-	dfops = container_of(file->f_op, struct b43legacy_debugfs_fops, fops);
+	dfops = container_of(debugfs_real_fops(file),
+			     struct b43legacy_debugfs_fops, fops);
 	if (!dfops->write) {
 		err = -ENOSYS;
 		goto out_unlock;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c
index d1bc51f..038a960 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c
@@ -194,7 +194,7 @@
 	}
 
 	/* Check info buffer */
-	info = (void *)&msg[1];
+	info = (void *)&bcdc->buf[0];
 
 	/* Copy info buffer */
 	if (buf) {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index f549c25..72139b5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -420,7 +420,7 @@
 
 u32 brcmf_sdiod_regrl(struct brcmf_sdio_dev *sdiodev, u32 addr, int *ret)
 {
-	u32 data;
+	u32 data = 0;
 	int retval;
 
 	brcmf_dbg(SDIO, "addr:0x%08x\n", addr);
@@ -1101,6 +1101,7 @@
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339),
+	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4339),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354),
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 2628d5e..b777e1b 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -1595,15 +1595,9 @@
 		val = 1;
 		brcmf_dbg(CONN, "shared key\n");
 		break;
-	case NL80211_AUTHTYPE_AUTOMATIC:
-		val = 2;
-		brcmf_dbg(CONN, "automatic\n");
-		break;
-	case NL80211_AUTHTYPE_NETWORK_EAP:
-		brcmf_dbg(CONN, "network eap\n");
 	default:
 		val = 2;
-		brcmf_err("invalid auth type (%d)\n", sme->auth_type);
+		brcmf_dbg(CONN, "automatic, auth type (%d)\n", sme->auth_type);
 		break;
 	}
 
@@ -2533,7 +2527,7 @@
 				     WL_BSS_INFO_MAX);
 	if (err) {
 		brcmf_err("Failed to get bss info (%d)\n", err);
-		return;
+		goto out_kfree;
 	}
 	si->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 	si->bss_param.beacon_interval = le16_to_cpu(buf->bss_le.beacon_period);
@@ -2545,6 +2539,9 @@
 		si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE;
 	if (capability & WLAN_CAPABILITY_SHORT_SLOT_TIME)
 		si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME;
+
+out_kfree:
+	kfree(buf);
 }
 
 static s32
@@ -3703,6 +3700,7 @@
 				 struct cfg80211_wowlan *wowl)
 {
 	u32 wowl_config;
+	struct brcmf_wowl_wakeind_le wowl_wakeind;
 	u32 i;
 
 	brcmf_dbg(TRACE, "Suspend, wowl config.\n");
@@ -3744,8 +3742,9 @@
 	if (!test_bit(BRCMF_VIF_STATUS_CONNECTED, &ifp->vif->sme_state))
 		wowl_config |= BRCMF_WOWL_UNASSOC;
 
-	brcmf_fil_iovar_data_set(ifp, "wowl_wakeind", "clear",
-				 sizeof(struct brcmf_wowl_wakeind_le));
+	memcpy(&wowl_wakeind, "clear", 6);
+	brcmf_fil_iovar_data_set(ifp, "wowl_wakeind", &wowl_wakeind,
+				 sizeof(wowl_wakeind));
 	brcmf_fil_iovar_int_set(ifp, "wowl", wowl_config);
 	brcmf_fil_iovar_int_set(ifp, "wowl_activate", 1);
 	brcmf_bus_wowl_config(cfg->pub->bus_if, true);
@@ -3884,11 +3883,11 @@
 	if (!check_vif_up(ifp->vif))
 		return -EIO;
 
-	brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", &pmksa->bssid);
+	brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", pmksa->bssid);
 
 	npmk = le32_to_cpu(cfg->pmk_list.npmk);
 	for (i = 0; i < npmk; i++)
-		if (!memcmp(&pmksa->bssid, &pmk[i].bssid, ETH_ALEN))
+		if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN))
 			break;
 
 	if ((npmk > 0) && (i < npmk)) {
@@ -4502,6 +4501,7 @@
 	u16 chanspec = chandef_to_chanspec(&cfg->d11inf, &settings->chandef);
 	bool mbss;
 	int is_11d;
+	bool supports_11d;
 
 	brcmf_dbg(TRACE, "ctrlchn=%d, center=%d, bw=%d, beacon_interval=%d, dtim_period=%d,\n",
 		  settings->chandef.chan->hw_value,
@@ -4514,11 +4514,16 @@
 	mbss = ifp->vif->mbss;
 
 	/* store current 11d setting */
-	brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, &ifp->vif->is_11d);
-	country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail,
-				      settings->beacon.tail_len,
-				      WLAN_EID_COUNTRY);
-	is_11d = country_ie ? 1 : 0;
+	if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY,
+				  &ifp->vif->is_11d)) {
+		supports_11d = false;
+	} else {
+		country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail,
+					      settings->beacon.tail_len,
+					      WLAN_EID_COUNTRY);
+		is_11d = country_ie ? 1 : 0;
+		supports_11d = true;
+	}
 
 	memset(&ssid_le, 0, sizeof(ssid_le));
 	if (settings->ssid == NULL || settings->ssid_len == 0) {
@@ -4527,7 +4532,7 @@
 				(u8 *)&settings->beacon.head[ie_offset],
 				settings->beacon.head_len - ie_offset,
 				WLAN_EID_SSID);
-		if (!ssid_ie)
+		if (!ssid_ie || ssid_ie->len > IEEE80211_MAX_SSID_LEN)
 			return -EINVAL;
 
 		memcpy(ssid_le.SSID, ssid_ie->data, ssid_ie->len);
@@ -4577,7 +4582,7 @@
 
 	/* Parameters shared by all radio interfaces */
 	if (!mbss) {
-		if (is_11d != ifp->vif->is_11d) {
+		if ((supports_11d) && (is_11d != ifp->vif->is_11d)) {
 			err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY,
 						    is_11d);
 			if (err < 0) {
@@ -4619,7 +4624,7 @@
 			brcmf_err("SET INFRA error %d\n", err);
 			goto exit;
 		}
-	} else if (WARN_ON(is_11d != ifp->vif->is_11d)) {
+	} else if (WARN_ON(supports_11d && (is_11d != ifp->vif->is_11d))) {
 		/* Multiple-BSS should use same 11d configuration */
 		err = -EINVAL;
 		goto exit;
@@ -4753,11 +4758,8 @@
 			brcmf_err("setting INFRA mode failed %d\n", err);
 		if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_MBSS))
 			brcmf_fil_iovar_int_set(ifp, "mbss", 0);
-		err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY,
-					    ifp->vif->is_11d);
-		if (err < 0)
-			brcmf_err("restoring REGULATORY setting failed %d\n",
-				  err);
+		brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY,
+				      ifp->vif->is_11d);
 		/* Bring device back up so it can be used again */
 		err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_UP, 1);
 		if (err < 0)
@@ -5635,7 +5637,7 @@
 		  ifevent->action, ifevent->flags, ifevent->ifidx,
 		  ifevent->bsscfgidx);
 
-	mutex_lock(&event->vif_event_lock);
+	spin_lock(&event->vif_event_lock);
 	event->action = ifevent->action;
 	vif = event->vif;
 
@@ -5643,7 +5645,7 @@
 	case BRCMF_E_IF_ADD:
 		/* waiting process may have timed out */
 		if (!cfg->vif_event.vif) {
-			mutex_unlock(&event->vif_event_lock);
+			spin_unlock(&event->vif_event_lock);
 			return -EBADF;
 		}
 
@@ -5654,24 +5656,24 @@
 			ifp->ndev->ieee80211_ptr = &vif->wdev;
 			SET_NETDEV_DEV(ifp->ndev, wiphy_dev(cfg->wiphy));
 		}
-		mutex_unlock(&event->vif_event_lock);
+		spin_unlock(&event->vif_event_lock);
 		wake_up(&event->vif_wq);
 		return 0;
 
 	case BRCMF_E_IF_DEL:
-		mutex_unlock(&event->vif_event_lock);
+		spin_unlock(&event->vif_event_lock);
 		/* event may not be upon user request */
 		if (brcmf_cfg80211_vif_event_armed(cfg))
 			wake_up(&event->vif_wq);
 		return 0;
 
 	case BRCMF_E_IF_CHANGE:
-		mutex_unlock(&event->vif_event_lock);
+		spin_unlock(&event->vif_event_lock);
 		wake_up(&event->vif_wq);
 		return 0;
 
 	default:
-		mutex_unlock(&event->vif_event_lock);
+		spin_unlock(&event->vif_event_lock);
 		break;
 	}
 	return -EINVAL;
@@ -5792,7 +5794,7 @@
 static void init_vif_event(struct brcmf_cfg80211_vif_event *event)
 {
 	init_waitqueue_head(&event->vif_wq);
-	mutex_init(&event->vif_event_lock);
+	spin_lock_init(&event->vif_event_lock);
 }
 
 static s32 brcmf_dongle_roam(struct brcmf_if *ifp)
@@ -6691,9 +6693,9 @@
 {
 	u8 evt_action;
 
-	mutex_lock(&event->vif_event_lock);
+	spin_lock(&event->vif_event_lock);
 	evt_action = event->action;
-	mutex_unlock(&event->vif_event_lock);
+	spin_unlock(&event->vif_event_lock);
 	return evt_action == action;
 }
 
@@ -6702,10 +6704,10 @@
 {
 	struct brcmf_cfg80211_vif_event *event = &cfg->vif_event;
 
-	mutex_lock(&event->vif_event_lock);
+	spin_lock(&event->vif_event_lock);
 	event->vif = vif;
 	event->action = 0;
-	mutex_unlock(&event->vif_event_lock);
+	spin_unlock(&event->vif_event_lock);
 }
 
 bool brcmf_cfg80211_vif_event_armed(struct brcmf_cfg80211_info *cfg)
@@ -6713,9 +6715,9 @@
 	struct brcmf_cfg80211_vif_event *event = &cfg->vif_event;
 	bool armed;
 
-	mutex_lock(&event->vif_event_lock);
+	spin_lock(&event->vif_event_lock);
 	armed = event->vif != NULL;
-	mutex_unlock(&event->vif_event_lock);
+	spin_unlock(&event->vif_event_lock);
 
 	return armed;
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
index 7d77f86..8889832 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
@@ -227,7 +227,7 @@
  */
 struct brcmf_cfg80211_vif_event {
 	wait_queue_head_t vif_wq;
-	struct mutex vif_event_lock;
+	spinlock_t vif_event_lock;
 	u8 action;
 	struct brcmf_cfg80211_vif *vif;
 };
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 8d16f02..5eaac13 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -136,27 +136,6 @@
 			  err);
 }
 
-static void
-_brcmf_set_mac_address(struct work_struct *work)
-{
-	struct brcmf_if *ifp;
-	s32 err;
-
-	ifp = container_of(work, struct brcmf_if, setmacaddr_work);
-
-	brcmf_dbg(TRACE, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx);
-
-	err = brcmf_fil_iovar_data_set(ifp, "cur_etheraddr", ifp->mac_addr,
-				       ETH_ALEN);
-	if (err < 0) {
-		brcmf_err("Setting cur_etheraddr failed, %d\n", err);
-	} else {
-		brcmf_dbg(TRACE, "MAC address updated to %pM\n",
-			  ifp->mac_addr);
-		memcpy(ifp->ndev->dev_addr, ifp->mac_addr, ETH_ALEN);
-	}
-}
-
 #if IS_ENABLED(CONFIG_IPV6)
 static void _brcmf_update_ndtable(struct work_struct *work)
 {
@@ -190,10 +169,20 @@
 {
 	struct brcmf_if *ifp = netdev_priv(ndev);
 	struct sockaddr *sa = (struct sockaddr *)addr;
+	int err;
 
-	memcpy(&ifp->mac_addr, sa->sa_data, ETH_ALEN);
-	schedule_work(&ifp->setmacaddr_work);
-	return 0;
+	brcmf_dbg(TRACE, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx);
+
+	err = brcmf_fil_iovar_data_set(ifp, "cur_etheraddr", sa->sa_data,
+				       ETH_ALEN);
+	if (err < 0) {
+		brcmf_err("Setting cur_etheraddr failed, %d\n", err);
+	} else {
+		brcmf_dbg(TRACE, "updated to %pM\n", sa->sa_data);
+		memcpy(ifp->mac_addr, sa->sa_data, ETH_ALEN);
+		memcpy(ifp->ndev->dev_addr, ifp->mac_addr, ETH_ALEN);
+	}
+	return err;
 }
 
 static void brcmf_netdev_set_multicast_list(struct net_device *ndev)
@@ -519,13 +508,9 @@
 	ndev->needed_headroom += drvr->hdrlen;
 	ndev->ethtool_ops = &brcmf_ethtool_ops;
 
-	drvr->rxsz = ndev->mtu + ndev->hard_header_len +
-			      drvr->hdrlen;
-
 	/* set the mac address */
 	memcpy(ndev->dev_addr, ifp->mac_addr, ETH_ALEN);
 
-	INIT_WORK(&ifp->setmacaddr_work, _brcmf_set_mac_address);
 	INIT_WORK(&ifp->multicast_work, _brcmf_set_multicast_list);
 	INIT_WORK(&ifp->ndoffload_work, _brcmf_update_ndtable);
 
@@ -730,7 +715,6 @@
 		}
 
 		if (ifp->ndev->netdev_ops == &brcmf_netdev_ops_pri) {
-			cancel_work_sync(&ifp->setmacaddr_work);
 			cancel_work_sync(&ifp->multicast_work);
 			cancel_work_sync(&ifp->ndoffload_work);
 		}
@@ -743,7 +727,7 @@
 		 * serious troublesome side effects. The p2p module will clean
 		 * up the ifp if needed.
 		 */
-		brcmf_p2p_ifp_removed(ifp);
+		brcmf_p2p_ifp_removed(ifp, rtnl_locked);
 		kfree(ifp);
 	}
 }
@@ -886,9 +870,12 @@
 		}
 		break;
 	case NETDEV_DOWN:
-		if (i < NDOL_MAX_ENTRIES)
-			for (; i < ifp->ipv6addr_idx; i++)
+		if (i < NDOL_MAX_ENTRIES) {
+			for (; i < ifp->ipv6addr_idx - 1; i++)
 				table[i] = table[i + 1];
+			memset(&table[i], 0, sizeof(table[i]));
+			ifp->ipv6addr_idx--;
+		}
 		break;
 	default:
 		break;
@@ -1061,8 +1048,7 @@
 		brcmf_fws_del_interface(ifp);
 		brcmf_fws_deinit(drvr);
 	}
-	if (ifp)
-		brcmf_net_detach(ifp->ndev, false);
+	brcmf_net_detach(ifp->ndev, false);
 	if (p2p_ifp)
 		brcmf_net_detach(p2p_ifp->ndev, false);
 	drvr->iflist[0] = NULL;
@@ -1169,7 +1155,8 @@
 				 !brcmf_get_pend_8021x_cnt(ifp),
 				 MAX_WAIT_FOR_8021X_TX);
 
-	WARN_ON(!err);
+	if (!err)
+		brcmf_err("Timed out waiting for no pending 802.1x packets\n");
 
 	return !err;
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
index 8fa34ca..c94dcab 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h
@@ -112,15 +112,11 @@
 
 	/* Internal brcmf items */
 	uint hdrlen;		/* Total BRCMF header length (proto + bus) */
-	uint rxsz;		/* Rx buffer size bus module should use */
 
 	/* Dongle media info */
 	char fwver[BRCMF_DRIVER_FIRMWARE_VERSION_LEN];
 	u8 mac[ETH_ALEN];		/* MAC address obtained from dongle */
 
-	/* Multicast data packets sent to dongle */
-	unsigned long tx_multicast;
-
 	struct mac_address addresses[BRCMF_MAX_IFS];
 
 	struct brcmf_if *iflist[BRCMF_MAX_IFS];
@@ -176,7 +172,6 @@
  * @vif: points to cfg80211 specific interface information.
  * @ndev: associated network device.
  * @stats: interface specific network statistics.
- * @setmacaddr_work: worker object for setting mac address.
  * @multicast_work: worker object for multicast provisioning.
  * @ndoffload_work: worker object for neighbor discovery offload configuration.
  * @fws_desc: interface specific firmware-signalling descriptor.
@@ -193,7 +188,6 @@
 	struct brcmf_cfg80211_vif *vif;
 	struct net_device *ndev;
 	struct net_device_stats stats;
-	struct work_struct setmacaddr_work;
 	struct work_struct multicast_work;
 	struct work_struct ndoffload_work;
 	struct brcmf_fws_mac_descriptor *fws_desc;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
index 7e269f9..d0b738d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
@@ -234,13 +234,20 @@
 
 void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid)
 {
+	struct brcmf_bus *bus_if = dev_get_drvdata(flow->dev);
 	struct brcmf_flowring_ring *ring;
+	struct brcmf_if *ifp;
 	u16 hash_idx;
+	u8 ifidx;
 	struct sk_buff *skb;
 
 	ring = flow->rings[flowid];
 	if (!ring)
 		return;
+
+	ifidx = brcmf_flowring_ifidx_get(flow, flowid);
+	ifp = brcmf_get_ifp(bus_if->drvr, ifidx);
+
 	brcmf_flowring_block(flow, flowid, false);
 	hash_idx = ring->hash_id;
 	flow->hash[hash_idx].ifidx = BRCMF_FLOWRING_INVALID_IFIDX;
@@ -249,7 +256,7 @@
 
 	skb = skb_dequeue(&ring->skblist);
 	while (skb) {
-		brcmu_pkt_buf_free_skb(skb);
+		brcmf_txfinalize(ifp, skb, false);
 		skb = skb_dequeue(&ring->skblist);
 	}
 
@@ -495,14 +502,18 @@
 	} else {
 		search = flow->tdls_entry;
 		if (memcmp(search->mac, peer, ETH_ALEN) == 0)
-			return;
+			goto free_entry;
 		while (search->next) {
 			search = search->next;
 			if (memcmp(search->mac, peer, ETH_ALEN) == 0)
-				return;
+				goto free_entry;
 		}
 		search->next = tdls_entry;
 	}
 
 	flow->tdls_active = true;
+	return;
+
+free_entry:
+	kfree(tdls_entry);
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
index 9f9024a..a190f53 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
@@ -2104,8 +2104,6 @@
 	if ((skb->priority == 0) || (skb->priority > 7))
 		skb->priority = cfg80211_classify8021d(skb, NULL);
 
-	drvr->tx_multicast += !!multicast;
-
 	if (fws->avoid_queueing) {
 		rc = brcmf_proto_txdata(drvr, ifp->ifidx, 0, skb);
 		if (rc < 0)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 66f942f..de19c7c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2297,7 +2297,7 @@
 	return err;
 }
 
-void brcmf_p2p_ifp_removed(struct brcmf_if *ifp)
+void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool rtnl_locked)
 {
 	struct brcmf_cfg80211_info *cfg;
 	struct brcmf_cfg80211_vif *vif;
@@ -2306,9 +2306,11 @@
 	vif = ifp->vif;
 	cfg = wdev_to_cfg(&vif->wdev);
 	cfg->p2p.bss_idx[P2PAPI_BSSCFG_DEVICE].vif = NULL;
-	rtnl_lock();
+	if (!rtnl_locked)
+		rtnl_lock();
 	cfg80211_unregister_wdev(&vif->wdev);
-	rtnl_unlock();
+	if (!rtnl_locked)
+		rtnl_unlock();
 	brcmf_free_vif(vif);
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h
index a3bd18c..8ce9447 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h
@@ -155,7 +155,7 @@
 int brcmf_p2p_del_vif(struct wiphy *wiphy, struct wireless_dev *wdev);
 int brcmf_p2p_ifchange(struct brcmf_cfg80211_info *cfg,
 		       enum brcmf_fil_p2p_if_types if_type);
-void brcmf_p2p_ifp_removed(struct brcmf_if *ifp);
+void brcmf_p2p_ifp_removed(struct brcmf_if *ifp, bool rtnl_locked);
 int brcmf_p2p_start_device(struct wiphy *wiphy, struct wireless_dev *wdev);
 void brcmf_p2p_stop_device(struct wiphy *wiphy, struct wireless_dev *wdev);
 int brcmf_p2p_scan_prep(struct wiphy *wiphy,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 68ab3ac..b892dac 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -313,6 +313,7 @@
 
 #define KSO_WAIT_US 50
 #define MAX_KSO_ATTEMPTS (PMU_MAX_TRANSITION_DLY/KSO_WAIT_US)
+#define BRCMF_SDIO_MAX_ACCESS_ERRORS	5
 
 /*
  * Conversion of 802.1D priority to precedence level
@@ -677,6 +678,7 @@
 {
 	u8 wr_val = 0, rd_val, cmp_val, bmask;
 	int err = 0;
+	int err_cnt = 0;
 	int try_cnt = 0;
 
 	brcmf_dbg(TRACE, "Enter: on=%d\n", on);
@@ -712,9 +714,14 @@
 		 */
 		rd_val = brcmf_sdiod_regrb(bus->sdiodev, SBSDIO_FUNC1_SLEEPCSR,
 					   &err);
-		if (((rd_val & bmask) == cmp_val) && !err)
+		if (!err) {
+			if ((rd_val & bmask) == cmp_val)
+				break;
+			err_cnt = 0;
+		}
+		/* bail out upon subsequent access errors */
+		if (err && (err_cnt++ > BRCMF_SDIO_MAX_ACCESS_ERRORS))
 			break;
-
 		udelay(KSO_WAIT_US);
 		brcmf_sdiod_regwb(bus->sdiodev, SBSDIO_FUNC1_SLEEPCSR,
 				  wr_val, &err);
@@ -3757,7 +3764,8 @@
 	u32 val, rev;
 
 	val = brcmf_sdiod_regrl(sdiodev, addr, NULL);
-	if (sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4335_4339 &&
+	if ((sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4335_4339 ||
+	     sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4339) &&
 	    addr == CORE_CC_REG(SI_ENUM_BASE, chipid)) {
 		rev = (val & CID_REV_MASK) >> CID_REV_SHIFT;
 		if (rev >= 2) {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c
index a10f35c..fe67559 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c
@@ -19,6 +19,7 @@
 #ifndef __CHECKER__
 #define CREATE_TRACE_POINTS
 #include "tracepoint.h"
+#include "debug.h"
 
 void __brcmf_err(const char *func, const char *fmt, ...)
 {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
index 98b15a9..2f978a3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c
@@ -1099,15 +1099,11 @@
 	devinfo->tx_freecount = ntxq;
 
 	devinfo->ctl_urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!devinfo->ctl_urb) {
-		brcmf_err("usb_alloc_urb (ctl) failed\n");
+	if (!devinfo->ctl_urb)
 		goto error;
-	}
 	devinfo->bulk_urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!devinfo->bulk_urb) {
-		brcmf_err("usb_alloc_urb (bulk) failed\n");
+	if (!devinfo->bulk_urb)
 		goto error;
-	}
 
 	return &devinfo->bus_pub;
 
@@ -1462,11 +1458,15 @@
 #define BRCMF_USB_DEVICE(dev_id)	\
 	{ USB_DEVICE(BRCM_USB_VENDOR_ID_BROADCOM, dev_id) }
 
+#define LINKSYS_USB_DEVICE(dev_id)	\
+	{ USB_DEVICE(BRCM_USB_VENDOR_ID_LINKSYS, dev_id) }
+
 static struct usb_device_id brcmf_usb_devid_table[] = {
 	BRCMF_USB_DEVICE(BRCM_USB_43143_DEVICE_ID),
 	BRCMF_USB_DEVICE(BRCM_USB_43236_DEVICE_ID),
 	BRCMF_USB_DEVICE(BRCM_USB_43242_DEVICE_ID),
 	BRCMF_USB_DEVICE(BRCM_USB_43569_DEVICE_ID),
+	LINKSYS_USB_DEVICE(BRCM_USB_43235_LINKSYS_DEVICE_ID),
 	{ USB_DEVICE(BRCM_USB_VENDOR_ID_LG, BRCM_USB_43242_LG_DEVICE_ID) },
 	/* special entry for device with firmware loaded and running */
 	BRCMF_USB_DEVICE(BRCM_USB_BCMFW_DEVICE_ID),
diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
index 3cc42be..d0407d9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h
@@ -22,6 +22,7 @@
 
 #define BRCM_USB_VENDOR_ID_BROADCOM	0x0a5c
 #define BRCM_USB_VENDOR_ID_LG		0x043e
+#define BRCM_USB_VENDOR_ID_LINKSYS	0x13b1
 #define BRCM_PCIE_VENDOR_ID_BROADCOM	PCI_VENDOR_ID_BROADCOM
 
 /* Chipcommon Core Chip IDs */
@@ -58,6 +59,7 @@
 
 /* USB Device IDs */
 #define BRCM_USB_43143_DEVICE_ID	0xbd1e
+#define BRCM_USB_43235_LINKSYS_DEVICE_ID	0x0039
 #define BRCM_USB_43236_DEVICE_ID	0xbd17
 #define BRCM_USB_43242_DEVICE_ID	0xbd1f
 #define BRCM_USB_43242_LG_DEVICE_ID	0x3101
diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c
index 209dc99..4db327a 100644
--- a/drivers/net/wireless/intel/iwlegacy/3945.c
+++ b/drivers/net/wireless/intel/iwlegacy/3945.c
@@ -2671,7 +2671,7 @@
 	.send_led_cmd = il3945_send_led_cmd,
 };
 
-static struct il_cfg il3945_bg_cfg = {
+static const struct il_cfg il3945_bg_cfg = {
 	.name = "3945BG",
 	.fw_name_pre = IL3945_FW_PRE,
 	.ucode_api_max = IL3945_UCODE_API_MAX,
@@ -2700,7 +2700,7 @@
 	},
 };
 
-static struct il_cfg il3945_abg_cfg = {
+static const struct il_cfg il3945_abg_cfg = {
 	.name = "3945ABG",
 	.fw_name_pre = IL3945_FW_PRE,
 	.ucode_api_max = IL3945_UCODE_API_MAX,
diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h
index 726ede3..3bba521 100644
--- a/drivers/net/wireless/intel/iwlegacy/common.h
+++ b/drivers/net/wireless/intel/iwlegacy/common.h
@@ -1320,7 +1320,7 @@
 	u64 timestamp;
 
 	union {
-#if defined(CONFIG_IWL3945) || defined(CONFIG_IWL3945_MODULE)
+#if IS_ENABLED(CONFIG_IWL3945)
 		struct {
 			void *shared_virt;
 			dma_addr_t shared_phys;
@@ -1351,7 +1351,7 @@
 
 		} _3945;
 #endif
-#if defined(CONFIG_IWL4965) || defined(CONFIG_IWL4965_MODULE)
+#if IS_ENABLED(CONFIG_IWL4965)
 		struct {
 			struct il_rx_phy_res last_phy_res;
 			bool last_phy_res_valid;
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c b/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c
index b662cf3..c7509c5 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c
@@ -46,15 +46,6 @@
  *
  ******************************************************************************/
 
-static inline const struct fw_img *
-iwl_get_ucode_image(struct iwl_priv *priv, enum iwl_ucode_type ucode_type)
-{
-	if (ucode_type >= IWL_UCODE_TYPE_MAX)
-		return NULL;
-
-	return &priv->fw->img[ucode_type];
-}
-
 /*
  *  Calibration
  */
@@ -330,7 +321,7 @@
 	enum iwl_ucode_type old_type;
 	static const u16 alive_cmd[] = { REPLY_ALIVE };
 
-	fw = iwl_get_ucode_image(priv, ucode_type);
+	fw = iwl_get_ucode_image(priv->fw, ucode_type);
 	if (WARN_ON(!fw))
 		return -EINVAL;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
index 64690c14..d4b73de 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c
@@ -73,13 +73,13 @@
 /* Highest firmware API version supported */
 #define IWL7260_UCODE_API_MAX	17
 #define IWL7265_UCODE_API_MAX	17
-#define IWL7265D_UCODE_API_MAX	24
-#define IWL3168_UCODE_API_MAX	24
+#define IWL7265D_UCODE_API_MAX	26
+#define IWL3168_UCODE_API_MAX	26
 
 /* Lowest firmware API version supported */
-#define IWL7260_UCODE_API_MIN	16
-#define IWL7265_UCODE_API_MIN	16
-#define IWL7265D_UCODE_API_MIN	16
+#define IWL7260_UCODE_API_MIN	17
+#define IWL7265_UCODE_API_MIN	17
+#define IWL7265D_UCODE_API_MIN	17
 #define IWL3168_UCODE_API_MIN	20
 
 /* NVM versions */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
index 6c6725e..d02ca14 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c
@@ -70,11 +70,11 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL8000_UCODE_API_MAX	24
-#define IWL8265_UCODE_API_MAX	24
+#define IWL8000_UCODE_API_MAX	26
+#define IWL8265_UCODE_API_MAX	26
 
 /* Lowest firmware API version supported */
-#define IWL8000_UCODE_API_MIN	16
+#define IWL8000_UCODE_API_MIN	17
 #define IWL8265_UCODE_API_MIN	20
 
 /* NVM versions */
@@ -212,6 +212,17 @@
 	.vht_mu_mimo_supported = true,
 };
 
+const struct iwl_cfg iwl8275_2ac_cfg = {
+	.name = "Intel(R) Dual Band Wireless AC 8275",
+	.fw_name_pre = IWL8265_FW_PRE,
+	IWL_DEVICE_8265,
+	.ht_params = &iwl8000_ht_params,
+	.nvm_ver = IWL8000_NVM_VERSION,
+	.nvm_calib_ver = IWL8000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.vht_mu_mimo_supported = true,
+};
+
 const struct iwl_cfg iwl4165_2ac_cfg = {
 	.name = "Intel(R) Dual Band Wireless AC 4165",
 	.fw_name_pre = IWL8000_FW_PRE,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c
index fbaf705..ff85041 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c
@@ -55,10 +55,10 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL9000_UCODE_API_MAX	24
+#define IWL9000_UCODE_API_MAX	26
 
 /* Lowest firmware API version supported */
-#define IWL9000_UCODE_API_MIN	16
+#define IWL9000_UCODE_API_MIN	17
 
 /* NVM versions */
 #define IWL9000_NVM_VERSION		0x0a1d
@@ -72,15 +72,15 @@
 #define IWL9000_SMEM_OFFSET		0x400000
 #define IWL9000_SMEM_LEN		0x68000
 
-#define  IWL9000_FW_PRE "iwlwifi-9000-pu-a0-lc-a0-"
+#define  IWL9000_FW_PRE "iwlwifi-9000-pu-a0-jf-a0-"
 #define  IWL9260_FW_PRE "iwlwifi-9260-th-a0-jf-a0-"
-#define  IWL9260LC_FW_PRE "iwlwifi-9260-th-a0-lc-a0-"
+#define  IWL9000LC_FW_PRE "iwlwifi-9000-pu-a0-lc-a0-"
 #define IWL9000_MODULE_FIRMWARE(api) \
 	IWL9000_FW_PRE "-" __stringify(api) ".ucode"
 #define IWL9260_MODULE_FIRMWARE(api) \
 	IWL9260_FW_PRE "-" __stringify(api) ".ucode"
-#define IWL9260LC_MODULE_FIRMWARE(api) \
-	IWL9260LC_FW_PRE "-" __stringify(api) ".ucode"
+#define IWL9000LC_MODULE_FIRMWARE(api) \
+	IWL9000LC_FW_PRE "-" __stringify(api) ".ucode"
 
 #define NVM_HW_SECTION_NUM_FAMILY_9000		10
 
@@ -146,41 +146,73 @@
 	.mac_addr_from_csr = true,					\
 	.rf_id = true
 
+const struct iwl_cfg iwl9160_2ac_cfg = {
+	.name = "Intel(R) Dual Band Wireless AC 9160",
+	.fw_name_pre = IWL9260_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+};
+
 const struct iwl_cfg iwl9260_2ac_cfg = {
-		.name = "Intel(R) Dual Band Wireless AC 9260",
-		.fw_name_pre = IWL9260_FW_PRE,
-		IWL_DEVICE_9000,
-		.ht_params = &iwl9000_ht_params,
-		.nvm_ver = IWL9000_NVM_VERSION,
-		.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
-		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.name = "Intel(R) Dual Band Wireless AC 9260",
+	.fw_name_pre = IWL9260_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+};
+
+const struct iwl_cfg iwl9270_2ac_cfg = {
+	.name = "Intel(R) Dual Band Wireless AC 9270",
+	.fw_name_pre = IWL9260_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+};
+
+const struct iwl_cfg iwl9460_2ac_cfg = {
+	.name = "Intel(R) Dual Band Wireless AC 9460",
+	.fw_name_pre = IWL9000_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.integrated = true,
+};
+
+const struct iwl_cfg iwl9560_2ac_cfg = {
+	.name = "Intel(R) Dual Band Wireless AC 9560",
+	.fw_name_pre = IWL9000_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.integrated = true,
 };
 
 /*
  * TODO the struct below is for internal testing only this should be
  * removed by EO 2016~
  */
-const struct iwl_cfg iwl9260lc_2ac_cfg = {
-		.name = "Intel(R) Dual Band Wireless AC 9260",
-		.fw_name_pre = IWL9260LC_FW_PRE,
-		IWL_DEVICE_9000,
-		.ht_params = &iwl9000_ht_params,
-		.nvm_ver = IWL9000_NVM_VERSION,
-		.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
-		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
-};
-
-const struct iwl_cfg iwl5165_2ac_cfg = {
-		.name = "Intel(R) Dual Band Wireless AC 5165",
-		.fw_name_pre = IWL9000_FW_PRE,
-		IWL_DEVICE_9000,
-		.ht_params = &iwl9000_ht_params,
-		.nvm_ver = IWL9000_NVM_VERSION,
-		.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
-		.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
-		.integrated = true,
+const struct iwl_cfg iwl9000lc_2ac_cfg = {
+	.name = "Intel(R) Dual Band Wireless AC 9000",
+	.fw_name_pre = IWL9000LC_FW_PRE,
+	IWL_DEVICE_9000,
+	.ht_params = &iwl9000_ht_params,
+	.nvm_ver = IWL9000_NVM_VERSION,
+	.nvm_calib_ver = IWL9000_TX_POWER_VERSION,
+	.max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+	.integrated = true,
 };
 
 MODULE_FIRMWARE(IWL9000_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL9260_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
-MODULE_FIRMWARE(IWL9260LC_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL9000LC_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c
index 4d78232..ea16185 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c
@@ -55,7 +55,7 @@
 #include "iwl-agn-hw.h"
 
 /* Highest firmware API version supported */
-#define IWL_A000_UCODE_API_MAX	24
+#define IWL_A000_UCODE_API_MAX	26
 
 /* Lowest firmware API version supported */
 #define IWL_A000_UCODE_API_MIN	24
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 423b233..2660cc4 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -359,7 +359,6 @@
 	    high_temp:1,
 	    mac_addr_from_csr:1,
 	    lp_xtal_workaround:1,
-	    no_power_up_nic_in_init:1,
 	    disable_dummy_notification:1,
 	    apmg_not_supported:1,
 	    mq_rx_supported:1,
@@ -445,13 +444,17 @@
 extern const struct iwl_cfg iwl8260_2n_cfg;
 extern const struct iwl_cfg iwl8260_2ac_cfg;
 extern const struct iwl_cfg iwl8265_2ac_cfg;
+extern const struct iwl_cfg iwl8275_2ac_cfg;
 extern const struct iwl_cfg iwl4165_2ac_cfg;
 extern const struct iwl_cfg iwl8260_2ac_sdio_cfg;
 extern const struct iwl_cfg iwl8265_2ac_sdio_cfg;
 extern const struct iwl_cfg iwl4165_2ac_sdio_cfg;
+extern const struct iwl_cfg iwl9000lc_2ac_cfg;
+extern const struct iwl_cfg iwl9160_2ac_cfg;
 extern const struct iwl_cfg iwl9260_2ac_cfg;
-extern const struct iwl_cfg iwl9260lc_2ac_cfg;
-extern const struct iwl_cfg iwl5165_2ac_cfg;
+extern const struct iwl_cfg iwl9270_2ac_cfg;
+extern const struct iwl_cfg iwl9460_2ac_cfg;
+extern const struct iwl_cfg iwl9560_2ac_cfg;
 extern const struct iwl_cfg iwla000_2ac_cfg;
 #endif /* CONFIG_IWLMVM */
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index 871ad02..d73e9d4 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -589,6 +589,8 @@
  * Causes for the FH register interrupts
  */
 enum msix_fh_int_causes {
+	MSIX_FH_INT_CAUSES_Q0			= BIT(0),
+	MSIX_FH_INT_CAUSES_Q1			= BIT(1),
 	MSIX_FH_INT_CAUSES_D2S_CH0_NUM		= BIT(16),
 	MSIX_FH_INT_CAUSES_D2S_CH1_NUM		= BIT(17),
 	MSIX_FH_INT_CAUSES_S2D			= BIT(19),
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
index 1d9dd153..50510fb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c
@@ -33,9 +33,6 @@
 #define CREATE_TRACE_POINTS
 #include "iwl-devtrace.h"
 
-EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_iowrite8);
-EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ioread32);
-EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_iowrite32);
 EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_event);
 EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_error);
 EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_cont_event);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
index 1d6f5d2..33ef537 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
@@ -77,7 +77,6 @@
  */
 #define FH_MEM_LOWER_BOUND                   (0x1000)
 #define FH_MEM_UPPER_BOUND                   (0x2000)
-#define TFH_MEM_LOWER_BOUND                  (0xA06000)
 
 /**
  * Keep-Warm (KW) buffer base address.
@@ -120,7 +119,7 @@
 #define FH_MEM_CBBC_20_31_LOWER_BOUND		(FH_MEM_LOWER_BOUND + 0xB20)
 #define FH_MEM_CBBC_20_31_UPPER_BOUND		(FH_MEM_LOWER_BOUND + 0xB80)
 /* a000 TFD table address, 64 bit */
-#define TFH_TFDQ_CBB_TABLE			(TFH_MEM_LOWER_BOUND + 0x1C00)
+#define TFH_TFDQ_CBB_TABLE			(0x1C00)
 
 /* Find TFD CB base pointer for given queue */
 static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans,
@@ -156,7 +155,7 @@
  * In case of DRAM read address which is not aligned to 128B, the TFH will
  * enable transfer size which doesn't cross 64B DRAM address boundary.
 */
-#define TFH_TRANSFER_MODE		(TFH_MEM_LOWER_BOUND + 0x1F40)
+#define TFH_TRANSFER_MODE		(0x1F40)
 #define TFH_TRANSFER_MAX_PENDING_REQ	0xc
 #define TFH_CHUNK_SIZE_128			BIT(8)
 #define TFH_CHUNK_SPLIT_MODE		BIT(10)
@@ -167,7 +166,7 @@
  * the start of the TFD first TB.
  * In case of a DRAM Tx CMD update the TFH will update PN and Key ID
  */
-#define TFH_TXCMD_UPDATE_CFG		(TFH_MEM_LOWER_BOUND + 0x1F48)
+#define TFH_TXCMD_UPDATE_CFG		(0x1F48)
 /*
  * Controls TX DMA operation
  *
@@ -181,22 +180,22 @@
  * set to 1 - interrupt is sent to the driver
  * Bit 0: Indicates the snoop configuration
 */
-#define TFH_SRV_DMA_CHNL0_CTRL	(TFH_MEM_LOWER_BOUND + 0x1F60)
+#define TFH_SRV_DMA_CHNL0_CTRL	(0x1F60)
 #define TFH_SRV_DMA_SNOOP	BIT(0)
 #define TFH_SRV_DMA_TO_DRIVER	BIT(24)
 #define TFH_SRV_DMA_START	BIT(31)
 
 /* Defines the DMA SRAM write start address to transfer a data block */
-#define TFH_SRV_DMA_CHNL0_SRAM_ADDR	(TFH_MEM_LOWER_BOUND + 0x1F64)
+#define TFH_SRV_DMA_CHNL0_SRAM_ADDR	(0x1F64)
 
 /* Defines the 64bits DRAM start address to read the DMA data block from */
-#define TFH_SRV_DMA_CHNL0_DRAM_ADDR	(TFH_MEM_LOWER_BOUND + 0x1F68)
+#define TFH_SRV_DMA_CHNL0_DRAM_ADDR	(0x1F68)
 
 /*
  * Defines the number of bytes to transfer from DRAM to SRAM.
  * Note that this register may be configured with non-dword aligned size.
  */
-#define TFH_SRV_DMA_CHNL0_BC	(TFH_MEM_LOWER_BOUND + 0x1F70)
+#define TFH_SRV_DMA_CHNL0_BC	(0x1F70)
 
 /**
  * Rx SRAM Control and Status Registers (RSCSR)
@@ -644,6 +643,7 @@
 #define TFD_QUEUE_BC_SIZE	(TFD_QUEUE_SIZE_MAX + TFD_QUEUE_SIZE_BC_DUP)
 #define IWL_TX_DMA_MASK        DMA_BIT_MASK(36)
 #define IWL_NUM_OF_TBS		20
+#define IWL_TFH_NUM_TBS		25
 
 static inline u8 iwl_get_dma_hi_addr(dma_addr_t addr)
 {
@@ -665,25 +665,29 @@
 } __packed;
 
 /**
- * struct iwl_tfd
+ * struct iwl_tfh_tb transmit buffer descriptor within transmit frame descriptor
  *
- * Transmit Frame Descriptor (TFD)
+ * This structure contains dma address and length of transmission address
  *
- * @ __reserved1[3] reserved
- * @ num_tbs 0-4 number of active tbs
- *	     5   reserved
- * 	     6-7 padding (not used)
- * @ tbs[20]	transmit frame buffer descriptors
- * @ __pad 	padding
- *
+ * @tb_len length of the tx buffer
+ * @addr 64 bits dma address
+ */
+struct iwl_tfh_tb {
+	__le16 tb_len;
+	__le64 addr;
+} __packed;
+
+/**
  * Each Tx queue uses a circular buffer of 256 TFDs stored in host DRAM.
  * Both driver and device share these circular buffers, each of which must be
- * contiguous 256 TFDs x 128 bytes-per-TFD = 32 KBytes
+ * contiguous 256 TFDs.
+ * For pre a000 HW it is 256 x 128 bytes-per-TFD = 32 KBytes
+ * For a000 HW and on it is 256 x 256 bytes-per-TFD = 65 KBytes
  *
  * Driver must indicate the physical address of the base of each
  * circular buffer via the FH_MEM_CBBC_QUEUE registers.
  *
- * Each TFD contains pointer/size information for up to 20 data buffers
+ * Each TFD contains pointer/size information for up to 20 / 25 data buffers
  * in host DRAM.  These buffers collectively contain the (one) frame described
  * by the TFD.  Each buffer must be a single contiguous block of memory within
  * itself, but buffers may be scattered in host DRAM.  Each buffer has max size
@@ -692,6 +696,16 @@
  *
  * A maximum of 255 (not 256!) TFDs may be on a queue waiting for Tx.
  */
+
+/**
+ * struct iwl_tfd - Transmit Frame Descriptor (TFD)
+ * @ __reserved1[3] reserved
+ * @ num_tbs 0-4 number of active tbs
+ *	     5   reserved
+ *	     6-7 padding (not used)
+ * @ tbs[20]	transmit frame buffer descriptors
+ * @ __pad	padding
+ */
 struct iwl_tfd {
 	u8 __reserved1[3];
 	u8 num_tbs;
@@ -699,6 +713,19 @@
 	__le32 __pad;
 } __packed;
 
+/**
+ * struct iwl_tfh_tfd - Transmit Frame Descriptor (TFD)
+ * @ num_tbs 0-4 number of active tbs
+ *	     5 -15   reserved
+ * @ tbs[25]	transmit frame buffer descriptors
+ * @ __pad	padding
+ */
+struct iwl_tfh_tfd {
+	__le16 num_tbs;
+	struct iwl_tfh_tb tbs[IWL_TFH_NUM_TBS];
+	__le32 __pad;
+} __packed;
+
 /* Keep Warm Size */
 #define IWL_KW_SIZE 0x1000	/* 4k */
 
@@ -707,8 +734,13 @@
 /**
  * struct iwlagn_schedq_bc_tbl scheduler byte count table
  *	base physical address provided by SCD_DRAM_BASE_ADDR
+ * For devices up to a000:
  * @tfd_offset  0-12 - tx command byte count
- *	       12-16 - station index
+ *		12-16 - station index
+ * For a000 and on:
+ * @tfd_offset  0-12 - tx command byte count
+ *		12-13 - number of 64 byte chunks
+ *		14-16 - reserved
  */
 struct iwlagn_scd_bc_tbl {
 	__le16 tfd_offset[TFD_QUEUE_BC_SIZE];
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
index 1b1e045..ceec5ca 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h
@@ -199,8 +199,6 @@
  * @IWL_UCODE_TLV_FLAGS_NEWSCAN: new uCode scan behavior on hidden SSID,
  *	treats good CRC threshold as a boolean
  * @IWL_UCODE_TLV_FLAGS_MFP: This uCode image supports MFP (802.11w).
- * @IWL_UCODE_TLV_FLAGS_P2P: This uCode image supports P2P.
- * @IWL_UCODE_TLV_FLAGS_DW_BC_TABLE: The SCD byte count table is in DWORDS
  * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: This uCode image supports uAPSD
  * @IWL_UCODE_TLV_FLAGS_SHORT_BL: 16 entries of black list instead of 64 in scan
  *	offload profile config command.
@@ -210,36 +208,24 @@
  *	from the probe request template.
  * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL: new NS offload (small version)
  * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE: new NS offload (large version)
- * @IWL_UCODE_TLV_FLAGS_P2P_PM: P2P client supports PM as a stand alone MAC
- * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_DCM: support power save on BSS station and
- *	P2P client interfaces simultaneously if they are in different bindings.
- * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_SCM: support power save on BSS station and
- *	P2P client interfaces simultaneously if they are in same bindings.
  * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: General support for uAPSD
  * @IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD: P2P client supports uAPSD power save
  * @IWL_UCODE_TLV_FLAGS_BCAST_FILTERING: uCode supports broadcast filtering.
- * @IWL_UCODE_TLV_FLAGS_GO_UAPSD: AP/GO interfaces support uAPSD clients
  * @IWL_UCODE_TLV_FLAGS_EBS_SUPPORT: this uCode image supports EBS.
  */
 enum iwl_ucode_tlv_flag {
 	IWL_UCODE_TLV_FLAGS_PAN			= BIT(0),
 	IWL_UCODE_TLV_FLAGS_NEWSCAN		= BIT(1),
 	IWL_UCODE_TLV_FLAGS_MFP			= BIT(2),
-	IWL_UCODE_TLV_FLAGS_P2P			= BIT(3),
-	IWL_UCODE_TLV_FLAGS_DW_BC_TABLE		= BIT(4),
 	IWL_UCODE_TLV_FLAGS_SHORT_BL		= BIT(7),
 	IWL_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS	= BIT(10),
 	IWL_UCODE_TLV_FLAGS_NO_BASIC_SSID	= BIT(12),
 	IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL	= BIT(15),
 	IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE	= BIT(16),
-	IWL_UCODE_TLV_FLAGS_P2P_PM		= BIT(21),
-	IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM	= BIT(22),
-	IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM	= BIT(23),
 	IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT	= BIT(24),
 	IWL_UCODE_TLV_FLAGS_EBS_SUPPORT		= BIT(25),
 	IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD	= BIT(26),
 	IWL_UCODE_TLV_FLAGS_BCAST_FILTERING	= BIT(29),
-	IWL_UCODE_TLV_FLAGS_GO_UAPSD		= BIT(30),
 };
 
 typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t;
@@ -249,24 +235,21 @@
  * @IWL_UCODE_TLV_API_FRAGMENTED_SCAN: This ucode supports active dwell time
  *	longer than the passive one, which is essential for fragmented scan.
  * @IWL_UCODE_TLV_API_WIFI_MCC_UPDATE: ucode supports MCC updates with source.
- * @IWL_UCODE_TLV_API_WIDE_CMD_HDR: ucode supports wide command header
  * @IWL_UCODE_TLV_API_LQ_SS_PARAMS: Configure STBC/BFER via LQ CMD ss_params
  * @IWL_UCODE_TLV_API_NEW_VERSION: new versioning format
- * @IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY: scan APIs use 8-level priority
- *	instead of 3.
- * @IWL_UCODE_TLV_API_TX_POWER_CHAIN: TX power API has larger command size
- *	(command version 3) that supports per-chain limits
+ * @IWL_UCODE_TLV_API_SCAN_TSF_REPORT: Scan start time reported in scan
+ *	iteration complete notification, and the timestamp reported for RX
+ *	received during scan, are reported in TSF of the mac specified in the
+ *	scan request.
  *
  * @NUM_IWL_UCODE_TLV_API: number of bits used
  */
 enum iwl_ucode_tlv_api {
 	IWL_UCODE_TLV_API_FRAGMENTED_SCAN	= (__force iwl_ucode_tlv_api_t)8,
 	IWL_UCODE_TLV_API_WIFI_MCC_UPDATE	= (__force iwl_ucode_tlv_api_t)9,
-	IWL_UCODE_TLV_API_WIDE_CMD_HDR		= (__force iwl_ucode_tlv_api_t)14,
 	IWL_UCODE_TLV_API_LQ_SS_PARAMS		= (__force iwl_ucode_tlv_api_t)18,
-	IWL_UCODE_TLV_API_NEW_VERSION           = (__force iwl_ucode_tlv_api_t)20,
-	IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY	= (__force iwl_ucode_tlv_api_t)24,
-	IWL_UCODE_TLV_API_TX_POWER_CHAIN	= (__force iwl_ucode_tlv_api_t)27,
+	IWL_UCODE_TLV_API_NEW_VERSION		= (__force iwl_ucode_tlv_api_t)20,
+	IWL_UCODE_TLV_API_SCAN_TSF_REPORT	= (__force iwl_ucode_tlv_api_t)28,
 
 	NUM_IWL_UCODE_TLV_API
 #ifdef __CHECKER__
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h
index 74ea68d..5f22955 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h
@@ -329,4 +329,13 @@
 	return conf_tlv->usniffer;
 }
 
+static inline const struct fw_img *
+iwl_get_ucode_image(const struct iwl_fw *fw, enum iwl_ucode_type ucode_type)
+{
+	if (ucode_type >= IWL_UCODE_TYPE_MAX)
+		return NULL;
+
+	return &fw->img[ucode_type];
+}
+
 #endif  /* __iwl_fw_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
index 92c8b5f..a9f69fd 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
@@ -267,7 +267,7 @@
 		IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_WIDX, i);
 		IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_RIDX, i);
 		IWL_CMD_MQ(cmd, RFH_Q_URBD_STTS_WPTR_LSB, i);
-	};
+	}
 
 	switch (cmd) {
 	IWL_CMD(RFH_RXF_DMA_CFG);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c b/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c
index 8aa1f2b..88f260d 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c
@@ -99,8 +99,12 @@
 				continue;
 
 			for (i = 0; i < w->n_cmds; i++) {
-				if (w->cmds[i] ==
-				    WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd)) {
+				u16 rec_id = WIDE_ID(pkt->hdr.group_id,
+						     pkt->hdr.cmd);
+
+				if (w->cmds[i] == rec_id ||
+				    (!iwl_cmd_groupid(w->cmds[i]) &&
+				     DEF_ID(w->cmds[i]) == rec_id)) {
 					found = true;
 					break;
 				}
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 43f8f7d..3bd6fc1 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -67,6 +67,7 @@
 #include <linux/export.h>
 #include <linux/etherdevice.h>
 #include <linux/pci.h>
+#include <linux/acpi.h>
 #include "iwl-drv.h"
 #include "iwl-modparams.h"
 #include "iwl-nvm-parse.h"
@@ -564,11 +565,16 @@
 	__le32 mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_STRAP));
 	__le32 mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_STRAP));
 
-	/* If OEM did not fuse address - get it from OTP */
-	if (!mac_addr0 && !mac_addr1) {
-		mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP));
-		mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP));
-	}
+	iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr);
+	/*
+	 * If the OEM fused a valid address, use it instead of the one in the
+	 * OTP
+	 */
+	if (is_valid_ether_addr(data->hw_addr))
+		return;
+
+	mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP));
+	mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP));
 
 	iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr);
 }
@@ -899,3 +905,91 @@
 	return regd;
 }
 IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info);
+
+#ifdef CONFIG_ACPI
+#define WRDD_METHOD		"WRDD"
+#define WRDD_WIFI		(0x07)
+#define WRDD_WIGIG		(0x10)
+
+static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd)
+{
+	union acpi_object *mcc_pkg, *domain_type, *mcc_value;
+	u32 i;
+
+	if (wrdd->type != ACPI_TYPE_PACKAGE ||
+	    wrdd->package.count < 2 ||
+	    wrdd->package.elements[0].type != ACPI_TYPE_INTEGER ||
+	    wrdd->package.elements[0].integer.value != 0) {
+		IWL_DEBUG_EEPROM(dev, "Unsupported wrdd structure\n");
+		return 0;
+	}
+
+	for (i = 1 ; i < wrdd->package.count ; ++i) {
+		mcc_pkg = &wrdd->package.elements[i];
+
+		if (mcc_pkg->type != ACPI_TYPE_PACKAGE ||
+		    mcc_pkg->package.count < 2 ||
+		    mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER ||
+		    mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+			mcc_pkg = NULL;
+			continue;
+		}
+
+		domain_type = &mcc_pkg->package.elements[0];
+		if (domain_type->integer.value == WRDD_WIFI)
+			break;
+
+		mcc_pkg = NULL;
+	}
+
+	if (mcc_pkg) {
+		mcc_value = &mcc_pkg->package.elements[1];
+		return mcc_value->integer.value;
+	}
+
+	return 0;
+}
+
+int iwl_get_bios_mcc(struct device *dev, char *mcc)
+{
+	acpi_handle root_handle;
+	acpi_handle handle;
+	struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL};
+	acpi_status status;
+	u32 mcc_val;
+
+	root_handle = ACPI_HANDLE(dev);
+	if (!root_handle) {
+		IWL_DEBUG_EEPROM(dev,
+				 "Could not retrieve root port ACPI handle\n");
+		return -ENOENT;
+	}
+
+	/* Get the method's handle */
+	status = acpi_get_handle(root_handle, (acpi_string)WRDD_METHOD,
+				 &handle);
+	if (ACPI_FAILURE(status)) {
+		IWL_DEBUG_EEPROM(dev, "WRD method not found\n");
+		return -ENOENT;
+	}
+
+	/* Call WRDD with no arguments */
+	status = acpi_evaluate_object(handle, NULL, NULL, &wrdd);
+	if (ACPI_FAILURE(status)) {
+		IWL_DEBUG_EEPROM(dev, "WRDC invocation failed (0x%x)\n",
+				 status);
+		return -ENOENT;
+	}
+
+	mcc_val = iwl_wrdd_get_mcc(dev, wrdd.pointer);
+	kfree(wrdd.pointer);
+	if (!mcc_val)
+		return -ENOENT;
+
+	mcc[0] = (mcc_val >> 8) & 0xff;
+	mcc[1] = mcc_val & 0xff;
+	mcc[2] = '\0';
+	return 0;
+}
+IWL_EXPORT_SYMBOL(iwl_get_bios_mcc);
+#endif
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
index d704d52..7249e5b 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
@@ -5,7 +5,8 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2008 - 2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2016        Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -93,4 +94,21 @@
 iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 		       int num_of_ch, __le32 *channels, u16 fw_mcc);
 
+#ifdef CONFIG_ACPI
+/**
+ * iwl_get_bios_mcc - read MCC from BIOS, if available
+ *
+ * @dev: the struct device
+ * @mcc: output buffer (3 bytes) that will get the MCC
+ *
+ * This function tries to read the current MCC from ACPI if available.
+ */
+int iwl_get_bios_mcc(struct device *dev, char *mcc);
+#else
+static inline int iwl_get_bios_mcc(struct device *dev, char *mcc)
+{
+	return -ENOENT;
+}
+#endif
+
 #endif /* __iwl_nvm_parse_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
index 7beba9a..2893826 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
@@ -110,7 +110,7 @@
 	IWL_PHY_DB_MAX
 };
 
-#define PHY_DB_CMD 0x6c /* TEMP API - The actual is 0x8c */
+#define PHY_DB_CMD 0x6c
 
 /*
  * phy db - configure operational ucode
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index 459bf73..406ef30 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -302,22 +302,17 @@
 #define OSC_CLK_FORCE_CONTROL		(0x8)
 
 #define FH_UCODE_LOAD_STATUS		(0x1AF0)
-#define CSR_UCODE_LOAD_STATUS_ADDR	(0x1E70)
-enum secure_load_status_reg {
-	LMPM_CPU_UCODE_LOADING_STARTED			= 0x00000001,
-	LMPM_CPU_HDRS_LOADING_COMPLETED			= 0x00000003,
-	LMPM_CPU_UCODE_LOADING_COMPLETED		= 0x00000007,
-	LMPM_CPU_STATUS_NUM_OF_LAST_COMPLETED		= 0x000000F8,
-	LMPM_CPU_STATUS_NUM_OF_LAST_LOADED_BLOCK	= 0x0000FF00,
-};
 
-#define LMPM_SECURE_INSPECTOR_CODE_ADDR	(0x1E38)
-#define LMPM_SECURE_INSPECTOR_DATA_ADDR	(0x1E3C)
+/*
+ * Replacing FH_UCODE_LOAD_STATUS
+ * This register is writen by driver and is read by uCode during boot flow.
+ * Note this address is cleared after MAC reset.
+ */
+#define UREG_UCODE_LOAD_STATUS		(0xa05c40)
+
 #define LMPM_SECURE_UCODE_LOAD_CPU1_HDR_ADDR	(0x1E78)
 #define LMPM_SECURE_UCODE_LOAD_CPU2_HDR_ADDR	(0x1E7C)
 
-#define LMPM_SECURE_INSPECTOR_CODE_MEM_SPACE	(0x400000)
-#define LMPM_SECURE_INSPECTOR_DATA_MEM_SPACE	(0x402000)
 #define LMPM_SECURE_CPU1_HDR_MEM_SPACE		(0x420000)
 #define LMPM_SECURE_CPU2_HDR_MEM_SPACE		(0x420400)
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
index 6069a9f..d42cab2 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
@@ -65,6 +65,7 @@
 
 #include "iwl-trans.h"
 #include "iwl-drv.h"
+#include "iwl-fh.h"
 
 struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 				  struct device *dev,
@@ -77,7 +78,7 @@
 	static struct lock_class_key __key;
 #endif
 
-	trans = kzalloc(sizeof(*trans) + priv_size, GFP_KERNEL);
+	trans = devm_kzalloc(dev, sizeof(*trans) + priv_size, GFP_KERNEL);
 	if (!trans)
 		return NULL;
 
@@ -102,18 +103,14 @@
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 	if (!trans->dev_cmd_pool)
-		goto free;
+		return NULL;
 
 	return trans;
- free:
-	kfree(trans);
-	return NULL;
 }
 
 void iwl_trans_free(struct iwl_trans *trans)
 {
 	kmem_cache_destroy(trans->dev_cmd_pool);
-	kfree(trans);
 }
 
 int iwl_trans_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd)
@@ -139,6 +136,9 @@
 	if (!(cmd->flags & CMD_ASYNC))
 		lock_map_acquire_read(&trans->sync_cmd_lockdep_map);
 
+	if (trans->wide_cmd_header && !iwl_cmd_groupid(cmd->id))
+		cmd->id = DEF_ID(cmd->id);
+
 	ret = trans->ops->send_cmd(trans, cmd);
 
 	if (!(cmd->flags & CMD_ASYNC))
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 5535e22..0296124 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -153,6 +153,7 @@
 
 /* make u16 wide id out of u8 group and opcode */
 #define WIDE_ID(grp, opcode) ((grp << 8) | opcode)
+#define DEF_ID(opcode) ((1 << 8) | (opcode))
 
 /* due to the conversion, this group is special; new groups
  * should be defined in the appropriate fw-api header files
@@ -262,8 +263,6 @@
  *	(i.e. mark it as non-idle).
  * @CMD_WANT_ASYNC_CALLBACK: the op_mode's async callback function must be
  *	called after this command completes. Valid only with CMD_ASYNC.
- * @CMD_TB_BITMAP_POS: Position of the first bit for the TB bitmap. We need to
- *	check that we leave enough room for the TBs bitmap which needs 20 bits.
  */
 enum CMD_MODE {
 	CMD_ASYNC		= BIT(0),
@@ -274,8 +273,6 @@
 	CMD_MAKE_TRANS_IDLE	= BIT(5),
 	CMD_WAKE_UP_TRANS	= BIT(6),
 	CMD_WANT_ASYNC_CALLBACK	= BIT(7),
-
-	CMD_TB_BITMAP_POS	= 11,
 };
 
 #define DEF_CMD_PAYLOAD_SIZE 320
@@ -488,7 +485,6 @@
  * @bc_table_dword: set to true if the BC table expects the byte count to be
  *	in DWORD (as opposed to bytes)
  * @scd_set_active: should the transport configure the SCD for HCMD queue
- * @wide_cmd_header: firmware supports wide host command header
  * @sw_csum_tx: transport should compute the TCP checksum
  * @command_groups: array of command groups, each member is an array of the
  *	commands in the group; for debugging only
@@ -510,7 +506,6 @@
 	enum iwl_amsdu_size rx_buf_size;
 	bool bc_table_dword;
 	bool scd_set_active;
-	bool wide_cmd_header;
 	bool sw_csum_tx;
 	const struct iwl_hcmd_arr *command_groups;
 	int command_groups_size;
@@ -649,6 +644,8 @@
 	void (*txq_set_shared_mode)(struct iwl_trans *trans, u32 txq_id,
 				    bool shared);
 
+	dma_addr_t (*get_txq_byte_table)(struct iwl_trans *trans, int txq_id);
+
 	int (*wait_tx_queue_empty)(struct iwl_trans *trans, u32 txq_bm);
 	void (*freeze_txq_timer)(struct iwl_trans *trans, unsigned long txqs,
 				 bool freeze);
@@ -772,6 +769,7 @@
  * @hw_id_str: a string with info about HW ID. Set during transport allocation.
  * @pm_support: set to true in start_hw if link pm is supported
  * @ltr_enabled: set to true if the LTR is enabled
+ * @wide_cmd_header: true when ucode supports wide command header format
  * @num_rx_queues: number of RX queues allocated by the transport;
  *	the transport must set this before calling iwl_drv_start()
  * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only.
@@ -823,6 +821,7 @@
 
 	const struct iwl_hcmd_arr *command_groups;
 	int command_groups_size;
+	bool wide_cmd_header;
 
 	u8 num_rx_queues;
 
@@ -1073,6 +1072,15 @@
 		trans->ops->txq_set_shared_mode(trans, queue, shared_mode);
 }
 
+static inline dma_addr_t iwl_trans_get_txq_byte_table(struct iwl_trans *trans,
+						      int queue)
+{
+	/* we should never be called if the trans doesn't support it */
+	BUG_ON(!trans->ops->get_txq_byte_table);
+
+	return trans->ops->get_txq_byte_table(trans, queue);
+}
+
 static inline void iwl_trans_txq_enable(struct iwl_trans *trans, int queue,
 					int fifo, int sta_id, int tid,
 					int frame_limit, u16 ssn,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index b232717..2d6f44f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -504,6 +504,28 @@
 	return !strncmp(name, buf, len) ? buf + len : NULL;
 }
 
+static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file,
+						 char __user *user_buf,
+						 size_t count, loff_t *ppos)
+{
+	struct ieee80211_vif *vif = file->private_data;
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+	struct iwl_mvm *mvm = mvmvif->mvm;
+	u32 curr_gp2;
+	u64 curr_os;
+	s64 diff;
+	char buf[64];
+	const size_t bufsz = sizeof(buf);
+	int pos = 0;
+
+	iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os);
+	do_div(curr_os, NSEC_PER_USEC);
+	diff = curr_os - curr_gp2;
+	pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff);
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, pos);
+}
+
 static ssize_t iwl_dbgfs_tof_enable_write(struct ieee80211_vif *vif,
 					  char *buf,
 					  size_t count, loff_t *ppos)
@@ -1530,6 +1552,8 @@
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32);
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32);
 MVM_DEBUGFS_WRITE_FILE_OPS(lqm_send_cmd, 64);
+MVM_DEBUGFS_READ_FILE_OPS(os_device_timediff);
+
 
 void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
@@ -1554,8 +1578,7 @@
 
 	if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM &&
 	    ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) ||
-	     (vif->type == NL80211_IFTYPE_STATION && vif->p2p &&
-	      mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)))
+	     (vif->type == NL80211_IFTYPE_STATION && vif->p2p)))
 		MVM_DEBUGFS_ADD_FILE_VIF(pm_params, mvmvif->dbgfs_dir, S_IWUSR |
 					 S_IRUSR);
 
@@ -1570,6 +1593,8 @@
 	MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir,
 				 S_IRUSR | S_IWUSR);
 	MVM_DEBUGFS_ADD_FILE_VIF(lqm_send_cmd, mvmvif->dbgfs_dir, S_IWUSR);
+	MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff,
+				 mvmvif->dbgfs_dir, S_IRUSR);
 
 	if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p &&
 	    mvmvif == mvm->bf_allowed_vif)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index b344898..539d718 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -917,6 +917,59 @@
 	return ret ?: count;
 }
 
+static ssize_t iwl_dbgfs_inject_packet_write(struct iwl_mvm *mvm,
+					     char *buf, size_t count,
+					     loff_t *ppos)
+{
+	struct iwl_rx_cmd_buffer rxb = {
+		._rx_page_order = 0,
+		.truesize = 0, /* not used */
+		._offset = 0,
+	};
+	struct iwl_rx_packet *pkt;
+	struct iwl_rx_mpdu_desc *desc;
+	int bin_len = count / 2;
+	int ret = -EINVAL;
+
+	/* supporting only 9000 descriptor */
+	if (!mvm->trans->cfg->mq_rx_supported)
+		return -ENOTSUPP;
+
+	rxb._page = alloc_pages(GFP_ATOMIC, 0);
+	if (!rxb._page)
+		return -ENOMEM;
+	pkt = rxb_addr(&rxb);
+
+	ret = hex2bin(page_address(rxb._page), buf, bin_len);
+	if (ret)
+		goto out;
+
+	/* avoid invalid memory access */
+	if (bin_len < sizeof(*pkt) + sizeof(*desc))
+		goto out;
+
+	/* check this is RX packet */
+	if (WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd) !=
+	    WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))
+		goto out;
+
+	/* check the length in metadata matches actual received length */
+	desc = (void *)pkt->data;
+	if (le16_to_cpu(desc->mpdu_len) !=
+	    (bin_len - sizeof(*desc) - sizeof(*pkt)))
+		goto out;
+
+	local_bh_disable();
+	iwl_mvm_rx_mpdu_mq(mvm, NULL, &rxb, 0);
+	local_bh_enable();
+	ret = 0;
+
+out:
+	iwl_free_rxb(&rxb);
+
+	return ret ?: count;
+}
+
 static ssize_t iwl_dbgfs_fw_dbg_conf_read(struct file *file,
 					  char __user *user_buf,
 					  size_t count, loff_t *ppos)
@@ -1454,6 +1507,7 @@
 MVM_DEBUGFS_WRITE_FILE_OPS(max_amsdu_len, 8);
 MVM_DEBUGFS_WRITE_FILE_OPS(indirection_tbl,
 			   (IWL_RSS_INDIRECTION_TABLE_SIZE * 2));
+MVM_DEBUGFS_WRITE_FILE_OPS(inject_packet, 512);
 
 #ifdef CONFIG_IWLWIFI_BCAST_FILTERING
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters, 256);
@@ -1464,6 +1518,132 @@
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(d3_sram, 8);
 #endif
 
+static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf,
+				  size_t count, loff_t *ppos)
+{
+	struct iwl_mvm *mvm = file->private_data;
+	struct iwl_dbg_mem_access_cmd cmd = {};
+	struct iwl_dbg_mem_access_rsp *rsp;
+	struct iwl_host_cmd hcmd = {
+		.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
+		.data = { &cmd, },
+		.len = { sizeof(cmd) },
+	};
+	size_t delta, len;
+	ssize_t ret;
+
+	hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR,
+			     DEBUG_GROUP, 0);
+	cmd.op = cpu_to_le32(DEBUG_MEM_OP_READ);
+
+	/* Take care of alignment of both the position and the length */
+	delta = *ppos & 0x3;
+	cmd.addr = cpu_to_le32(*ppos - delta);
+	cmd.len = cpu_to_le32(min(ALIGN(count + delta, 4) / 4,
+				  (size_t)DEBUG_MEM_MAX_SIZE_DWORDS));
+
+	mutex_lock(&mvm->mutex);
+	ret = iwl_mvm_send_cmd(mvm, &hcmd);
+	mutex_unlock(&mvm->mutex);
+
+	if (ret < 0)
+		return ret;
+
+	rsp = (void *)hcmd.resp_pkt->data;
+	if (le32_to_cpu(rsp->status) != DEBUG_MEM_STATUS_SUCCESS) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	len = min((size_t)le32_to_cpu(rsp->len) << 2,
+		  iwl_rx_packet_payload_len(hcmd.resp_pkt) - sizeof(*rsp));
+	len = min(len - delta, count);
+	if (len < 0) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = len - copy_to_user(user_buf, (void *)rsp->data + delta, len);
+	*ppos += ret;
+
+out:
+	iwl_free_resp(&hcmd);
+	return ret;
+}
+
+static ssize_t iwl_dbgfs_mem_write(struct file *file,
+				   const char __user *user_buf, size_t count,
+				   loff_t *ppos)
+{
+	struct iwl_mvm *mvm = file->private_data;
+	struct iwl_dbg_mem_access_cmd *cmd;
+	struct iwl_dbg_mem_access_rsp *rsp;
+	struct iwl_host_cmd hcmd = {};
+	size_t cmd_size;
+	size_t data_size;
+	u32 op, len;
+	ssize_t ret;
+
+	hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR,
+			     DEBUG_GROUP, 0);
+
+	if (*ppos & 0x3 || count < 4) {
+		op = DEBUG_MEM_OP_WRITE_BYTES;
+		len = min(count, (size_t)(4 - (*ppos & 0x3)));
+		data_size = len;
+	} else {
+		op = DEBUG_MEM_OP_WRITE;
+		len = min(count >> 2, (size_t)DEBUG_MEM_MAX_SIZE_DWORDS);
+		data_size = len << 2;
+	}
+
+	cmd_size = sizeof(*cmd) + ALIGN(data_size, 4);
+	cmd = kzalloc(cmd_size, GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd->op = cpu_to_le32(op);
+	cmd->len = cpu_to_le32(len);
+	cmd->addr = cpu_to_le32(*ppos);
+	if (copy_from_user((void *)cmd->data, user_buf, data_size)) {
+		kfree(cmd);
+		return -EFAULT;
+	}
+
+	hcmd.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
+	hcmd.data[0] = (void *)cmd;
+	hcmd.len[0] = cmd_size;
+
+	mutex_lock(&mvm->mutex);
+	ret = iwl_mvm_send_cmd(mvm, &hcmd);
+	mutex_unlock(&mvm->mutex);
+
+	kfree(cmd);
+
+	if (ret < 0)
+		return ret;
+
+	rsp = (void *)hcmd.resp_pkt->data;
+	if (rsp->status != DEBUG_MEM_STATUS_SUCCESS) {
+		ret = -ENXIO;
+		goto out;
+	}
+
+	ret = data_size;
+	*ppos += ret;
+
+out:
+	iwl_free_resp(&hcmd);
+	return ret;
+}
+
+static const struct file_operations iwl_dbgfs_mem_ops = {
+	.read = iwl_dbgfs_mem_read,
+	.write = iwl_dbgfs_mem_write,
+	.open = simple_open,
+	.llseek = default_llseek,
+};
+
 int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir)
 {
 	struct dentry *bcast_dir __maybe_unused;
@@ -1502,6 +1682,7 @@
 	MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, S_IWUSR);
 	MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, S_IWUSR);
 	MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, S_IWUSR);
+	MVM_DEBUGFS_ADD_FILE(inject_packet, mvm->debugfs_dir, S_IWUSR);
 	if (!debugfs_create_bool("enable_scan_iteration_notif",
 				 S_IRUSR | S_IWUSR,
 				 mvm->debugfs_dir,
@@ -1560,6 +1741,9 @@
 				 mvm->debugfs_dir, &mvm->nvm_phy_sku_blob))
 		goto err;
 
+	debugfs_create_file("mem", S_IRUSR | S_IWUSR, dbgfs_dir, mvm,
+			    &iwl_dbgfs_mem_ops);
+
 	/*
 	 * Create a symlink with mac80211. It will be removed when mac80211
 	 * exists (before the opmode exists which removes the target.)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h
index 404b0de..3fa43d1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h
@@ -313,35 +313,26 @@
 	IWL_TX_POWER_MODE_SET_ACK = 3,
 }; /* TX_POWER_REDUCED_FLAGS_TYPE_API_E_VER_4 */;
 
+#define IWL_NUM_CHAIN_LIMITS	2
+#define IWL_NUM_SUB_BANDS	5
+
 /**
- * struct iwl_dev_tx_power_cmd_v2 - TX power reduction command
+ * struct iwl_dev_tx_power_cmd - TX power reduction command
  * @set_mode: see &enum iwl_dev_tx_power_cmd_mode
  * @mac_context_id: id of the mac ctx for which we are reducing TX power.
  * @pwr_restriction: TX power restriction in 1/8 dBms.
  * @dev_24: device TX power restriction in 1/8 dBms
  * @dev_52_low: device TX power restriction upper band - low
  * @dev_52_high: device TX power restriction upper band - high
+ * @per_chain_restriction: per chain restrictions
  */
-struct iwl_dev_tx_power_cmd_v2 {
+struct iwl_dev_tx_power_cmd_v3 {
 	__le32 set_mode;
 	__le32 mac_context_id;
 	__le16 pwr_restriction;
 	__le16 dev_24;
 	__le16 dev_52_low;
 	__le16 dev_52_high;
-} __packed; /* TX_REDUCED_POWER_API_S_VER_2 */
-
-#define IWL_NUM_CHAIN_LIMITS	2
-#define IWL_NUM_SUB_BANDS	5
-
-/**
- * struct iwl_dev_tx_power_cmd - TX power reduction command
- * @v2: version 2 of the command, embedded here for easier software handling
- * @per_chain_restriction: per chain restrictions
- */
-struct iwl_dev_tx_power_cmd_v3 {
-	/* v3 is just an extension of v2 - keep this here */
-	struct iwl_dev_tx_power_cmd_v2 v2;
 	__le16 per_chain_restriction[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS];
 } __packed; /* TX_REDUCED_POWER_API_S_VER_3 */
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h
index f01dab0..0c294c9f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h
@@ -7,6 +7,7 @@
  *
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -603,6 +604,8 @@
  * @uid: scan id, &enum iwl_umac_scan_uid_offsets
  * @ooc_priority: out of channel priority - &enum iwl_scan_priority
  * @general_flags: &enum iwl_umac_scan_general_flags
+ * @reserved2: for future use and alignment
+ * @scan_start_mac_id: report the scan start TSF time according to this mac TSF
  * @extended_dwell: dwell time for channels 1, 6 and 11
  * @active_dwell: dwell time for active scan
  * @passive_dwell: dwell time for passive scan
@@ -620,8 +623,10 @@
 	__le32 flags;
 	__le32 uid;
 	__le32 ooc_priority;
-	/* SCAN_GENERAL_PARAMS_API_S_VER_1 */
-	__le32 general_flags;
+	/* SCAN_GENERAL_PARAMS_API_S_VER_4 */
+	__le16 general_flags;
+	u8 reserved2;
+	u8 scan_start_mac_id;
 	u8 extended_dwell;
 	u8 active_dwell;
 	u8 passive_dwell;
@@ -629,7 +634,7 @@
 	__le32 max_out_time;
 	__le32 suspend_time;
 	__le32 scan_priority;
-	/* SCAN_CHANNEL_PARAMS_API_S_VER_1 */
+	/* SCAN_CHANNEL_PARAMS_API_S_VER_4 */
 	u8 channel_flags;
 	u8 n_channels;
 	__le16 reserved;
@@ -718,8 +723,8 @@
  * @status: one of SCAN_COMP_STATUS_*
  * @bt_status: BT on/off status
  * @last_channel: last channel that was scanned
- * @tsf_low: TSF timer (lower half) in usecs
- * @tsf_high: TSF timer (higher half) in usecs
+ * @start_tsf: TSF timer in usecs of the scan start time for the mac specified
+ *	in &struct iwl_scan_req_umac.
  * @results: array of scan results, only "scanned_channels" of them are valid
  */
 struct iwl_umac_scan_iter_complete_notif {
@@ -728,9 +733,8 @@
 	u8 status;
 	u8 bt_status;
 	u8 last_channel;
-	__le32 tsf_low;
-	__le32 tsf_high;
+	__le64 start_tsf;
 	struct iwl_scan_results_notif results[];
-} __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_1 */
+} __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_2 */
 
 #endif
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h
index d1c4fb8..6c8e3ca 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h
@@ -433,25 +433,42 @@
 } __packed; /* REMOVE_STA_CMD_API_S_VER_2 */
 
 /**
+ * struct iwl_mvm_mgmt_mcast_key_cmd_v1
+ * ( MGMT_MCAST_KEY = 0x1f )
+ * @ctrl_flags: %iwl_sta_key_flag
+ * @igtk:
+ * @k1: unused
+ * @k2: unused
+ * @sta_id: station ID that support IGTK
+ * @key_id:
+ * @receive_seq_cnt: initial RSC/PN needed for replay check
+ */
+struct iwl_mvm_mgmt_mcast_key_cmd_v1 {
+	__le32 ctrl_flags;
+	u8 igtk[16];
+	u8 k1[16];
+	u8 k2[16];
+	__le32 key_id;
+	__le32 sta_id;
+	__le64 receive_seq_cnt;
+} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */
+
+/**
  * struct iwl_mvm_mgmt_mcast_key_cmd
  * ( MGMT_MCAST_KEY = 0x1f )
  * @ctrl_flags: %iwl_sta_key_flag
- * @IGTK:
- * @K1: unused
- * @K2: unused
+ * @igtk: IGTK master key
  * @sta_id: station ID that support IGTK
  * @key_id:
  * @receive_seq_cnt: initial RSC/PN needed for replay check
  */
 struct iwl_mvm_mgmt_mcast_key_cmd {
 	__le32 ctrl_flags;
-	u8 IGTK[16];
-	u8 K1[16];
-	u8 K2[16];
+	u8 igtk[32];
 	__le32 key_id;
 	__le32 sta_id;
 	__le64 receive_seq_cnt;
-} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */
+} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_2 */
 
 struct iwl_mvm_wep_key {
 	u8 key_index;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
index 4144623..59ca97a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h
@@ -89,7 +89,6 @@
  * @TX_CMD_FLG_MH_PAD: driver inserted 2 byte padding after MAC header.
  *	Should be set for 26/30 length MAC headers
  * @TX_CMD_FLG_RESP_TO_DRV: zero this if the response should go only to FW
- * @TX_CMD_FLG_CCMP_AGG: this frame uses CCMP for aggregation acceleration
  * @TX_CMD_FLG_TKIP_MIC_DONE: FW already performed TKIP MIC calculation
  * @TX_CMD_FLG_DUR: disable duration overwriting used in PS-Poll Assoc-id
  * @TX_CMD_FLG_FW_DROP: FW should mark frame to be dropped
@@ -116,7 +115,6 @@
 	TX_CMD_FLG_KEEP_SEQ_CTL		= BIT(18),
 	TX_CMD_FLG_MH_PAD		= BIT(20),
 	TX_CMD_FLG_RESP_TO_DRV		= BIT(21),
-	TX_CMD_FLG_CCMP_AGG		= BIT(22),
 	TX_CMD_FLG_TKIP_MIC_DONE	= BIT(23),
 	TX_CMD_FLG_DUR			= BIT(25),
 	TX_CMD_FLG_FW_DROP		= BIT(26),
@@ -149,7 +147,7 @@
  * @TX_CMD_SEC_EXT: extended cipher algorithm.
  * @TX_CMD_SEC_GCMP: GCMP encryption algorithm.
  * @TX_CMD_SEC_KEY128: set for 104 bits WEP key.
- * @TC_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken
+ * @TX_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken
  *	from the table instead of from the TX command.
  *	If the key is taken from the key table its index should be given by the
  *	first byte of the TX command key field.
@@ -161,7 +159,7 @@
 	TX_CMD_SEC_EXT			= 0x04,
 	TX_CMD_SEC_GCMP			= 0x05,
 	TX_CMD_SEC_KEY128		= 0x08,
-	TC_CMD_SEC_KEY_FROM_TABLE	= 0x08,
+	TX_CMD_SEC_KEY_FROM_TABLE	= 0x08,
 };
 
 /* TODO: how does these values are OK with only 16 bit variable??? */
@@ -578,6 +576,85 @@
 } __packed;
 
 /**
+ * struct iwl_mvm_compressed_ba_tfd - progress of a TFD queue
+ * @q_num: TFD queue number
+ * @tfd_index: Index of first un-acked frame in the  TFD queue
+ */
+struct iwl_mvm_compressed_ba_tfd {
+	u8 q_num;
+	u8 reserved;
+	__le16 tfd_index;
+} __packed; /* COMPRESSED_BA_TFD_API_S_VER_1 */
+
+/**
+ * struct iwl_mvm_compressed_ba_ratid - progress of a RA TID queue
+ * @q_num: RA TID queue number
+ * @tid: TID of the queue
+ * @ssn: BA window current SSN
+ */
+struct iwl_mvm_compressed_ba_ratid {
+	u8 q_num;
+	u8 tid;
+	__le16 ssn;
+} __packed; /* COMPRESSED_BA_RATID_API_S_VER_1 */
+
+/*
+ * enum iwl_mvm_ba_resp_flags - TX aggregation status
+ * @IWL_MVM_BA_RESP_TX_AGG: generated due to BA
+ * @IWL_MVM_BA_RESP_TX_BAR: generated due to BA after BAR
+ * @IWL_MVM_BA_RESP_TX_AGG_FAIL: aggregation didn't receive BA
+ * @IWL_MVM_BA_RESP_TX_UNDERRUN: aggregation got underrun
+ * @IWL_MVM_BA_RESP_TX_BT_KILL: aggregation got BT-kill
+ * @IWL_MVM_BA_RESP_TX_DSP_TIMEOUT: aggregation didn't finish within the
+ *	expected time
+ */
+enum iwl_mvm_ba_resp_flags {
+	IWL_MVM_BA_RESP_TX_AGG,
+	IWL_MVM_BA_RESP_TX_BAR,
+	IWL_MVM_BA_RESP_TX_AGG_FAIL,
+	IWL_MVM_BA_RESP_TX_UNDERRUN,
+	IWL_MVM_BA_RESP_TX_BT_KILL,
+	IWL_MVM_BA_RESP_TX_DSP_TIMEOUT
+};
+
+/**
+ * struct iwl_mvm_compressed_ba_notif - notifies about reception of BA
+ * ( BA_NOTIF = 0xc5 )
+ * @flags: status flag, see the &iwl_mvm_ba_resp_flags
+ * @sta_id: Index of recipient (BA-sending) station in fw's station table
+ * @reduced_txp: power reduced according to TPC. This is the actual value and
+ *	not a copy from the LQ command. Thus, if not the first rate was used
+ *	for Tx-ing then this value will be set to 0 by FW.
+ * @initial_rate: TLC rate info, initial rate index, TLC table color
+ * @retry_cnt: retry count
+ * @query_byte_cnt: SCD query byte count
+ * @query_frame_cnt: SCD query frame count
+ * @txed: number of frames sent in the aggregation (all-TIDs)
+ * @done: number of frames that were Acked by the BA (all-TIDs)
+ * @wireless_time: Wireless-media time
+ * @tx_rate: the rate the aggregation was sent at
+ * @tfd_cnt: number of TFD-Q elements
+ * @ra_tid_cnt: number of RATID-Q elements
+ */
+struct iwl_mvm_compressed_ba_notif {
+	__le32 flags;
+	u8 sta_id;
+	u8 reduced_txp;
+	u8 initial_rate;
+	u8 retry_cnt;
+	__le32 query_byte_cnt;
+	__le16 query_frame_cnt;
+	__le16 txed;
+	__le16 done;
+	__le32 wireless_time;
+	__le32 tx_rate;
+	__le16 tfd_cnt;
+	__le16 ra_tid_cnt;
+	struct iwl_mvm_compressed_ba_tfd tfd[1];
+	struct iwl_mvm_compressed_ba_ratid ra_tid[0];
+} __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */
+
+/**
  * struct iwl_mac_beacon_cmd_v6 - beacon template command
  * @tx: the tx commands associated with the beacon frame
  * @template_id: currently equal to the mac context id of the coresponding
@@ -675,13 +752,21 @@
 			    tx_resp->frame_count) & 0xfff;
 }
 
+/* Available options for the SCD_QUEUE_CFG HCMD */
+enum iwl_scd_cfg_actions {
+	SCD_CFG_DISABLE_QUEUE		= 0x0,
+	SCD_CFG_ENABLE_QUEUE		= 0x1,
+	SCD_CFG_UPDATE_QUEUE_TID	= 0x2,
+};
+
 /**
  * struct iwl_scd_txq_cfg_cmd - New txq hw scheduler config command
  * @token:
  * @sta_id: station id
  * @tid:
  * @scd_queue: scheduler queue to confiug
- * @enable: 1 queue enable, 0 queue disable
+ * @action: 1 queue enable, 0 queue disable, 2 change txq's tid owner
+ *	Value is one of %iwl_scd_cfg_actions options
  * @aggregate: 1 aggregated queue, 0 otherwise
  * @tx_fifo: %enum iwl_mvm_tx_fifo
  * @window: BA window size
@@ -692,7 +777,7 @@
 	u8 sta_id;
 	u8 tid;
 	u8 scd_queue;
-	u8 enable;
+	u8 action;
 	u8 aggregate;
 	u8 tx_fifo;
 	u8 window;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
index 71076f0..9763369 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h
@@ -205,7 +205,7 @@
 	/* Phy */
 	PHY_CONFIGURATION_CMD = 0x6a,
 	CALIB_RES_NOTIF_PHY_DB = 0x6b,
-	/* PHY_DB_CMD = 0x6c, */
+	PHY_DB_CMD = 0x6c,
 
 	/* ToF - 802.11mc FTM */
 	TOF_CMD = 0x10,
@@ -340,6 +340,11 @@
 	STORED_BEACON_NTF = 0xFF,
 };
 
+enum iwl_fmac_debug_cmds {
+	LMAC_RD_WR = 0x0,
+	UMAC_RD_WR = 0x1,
+};
+
 /* command groups */
 enum {
 	LEGACY_GROUP = 0x0,
@@ -349,6 +354,7 @@
 	PHY_OPS_GROUP = 0x4,
 	DATA_PATH_GROUP = 0x5,
 	PROT_OFFLOAD_GROUP = 0xb,
+	DEBUG_GROUP = 0xf,
 };
 
 /**
@@ -482,13 +488,17 @@
  * @block_size: the block size in powers of 2
  * @block_num: number of blocks specified in the command.
  * @device_phy_addr: virtual addresses from device side
+ *	32 bit address for API version 1, 64 bit address for API version 2.
 */
 struct iwl_fw_paging_cmd {
 	__le32 flags;
 	__le32 block_size;
 	__le32 block_num;
-	__le32 device_phy_addr[NUM_OF_FW_PAGING_BLOCKS];
-} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_1 */
+	union {
+		__le32 addr32[NUM_OF_FW_PAGING_BLOCKS];
+		__le64 addr64[NUM_OF_FW_PAGING_BLOCKS];
+	} device_phy_addr;
+} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_2 */
 
 /*
  * Fw items ID's
@@ -1973,8 +1983,9 @@
 	struct iwl_tdls_config_sta_info_res sta_info[IWL_MVM_TDLS_STA_COUNT];
 } __packed; /* TDLS_CONFIG_RSP_API_S_VER_1 */
 
-#define TX_FIFO_MAX_NUM		8
-#define RX_FIFO_MAX_NUM		2
+#define TX_FIFO_MAX_NUM_9000		8
+#define TX_FIFO_MAX_NUM			15
+#define RX_FIFO_MAX_NUM			2
 #define TX_FIFO_INTERNAL_MAX_NUM	6
 
 /**
@@ -2000,6 +2011,21 @@
  * NOTE: on firmware that don't have IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG
  *	 set, the last 3 members don't exist.
  */
+struct iwl_shared_mem_cfg_v1 {
+	__le32 shared_mem_addr;
+	__le32 shared_mem_size;
+	__le32 sample_buff_addr;
+	__le32 sample_buff_size;
+	__le32 txfifo_addr;
+	__le32 txfifo_size[TX_FIFO_MAX_NUM_9000];
+	__le32 rxfifo_size[RX_FIFO_MAX_NUM];
+	__le32 page_buff_addr;
+	__le32 page_buff_size;
+	__le32 rxfifo_addr;
+	__le32 internal_txfifo_addr;
+	__le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
+} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */
+
 struct iwl_shared_mem_cfg {
 	__le32 shared_mem_addr;
 	__le32 shared_mem_size;
@@ -2013,7 +2039,7 @@
 	__le32 rxfifo_addr;
 	__le32 internal_txfifo_addr;
 	__le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
-} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */
+} __packed; /* SHARED_MEM_ALLOC_API_S_VER_3 */
 
 /**
  * VHT MU-MIMO group configuration
@@ -2129,4 +2155,48 @@
 	__le32 id_and_color;
 } __packed; /* CHANNEL_SWITCH_START_NTFY_API_S_VER_1 */
 
+/* Operation types for the debug mem access */
+enum {
+	DEBUG_MEM_OP_READ = 0,
+	DEBUG_MEM_OP_WRITE = 1,
+	DEBUG_MEM_OP_WRITE_BYTES = 2,
+};
+
+#define DEBUG_MEM_MAX_SIZE_DWORDS 32
+
+/**
+ * struct iwl_dbg_mem_access_cmd - Request the device to read/write memory
+ * @op: DEBUG_MEM_OP_*
+ * @addr: address to read/write from/to
+ * @len: in dwords, to read/write
+ * @data: for write opeations, contains the source buffer
+ */
+struct iwl_dbg_mem_access_cmd {
+	__le32 op;
+	__le32 addr;
+	__le32 len;
+	__le32 data[];
+} __packed; /* DEBUG_(U|L)MAC_RD_WR_CMD_API_S_VER_1 */
+
+/* Status responses for the debug mem access */
+enum {
+	DEBUG_MEM_STATUS_SUCCESS = 0x0,
+	DEBUG_MEM_STATUS_FAILED = 0x1,
+	DEBUG_MEM_STATUS_LOCKED = 0x2,
+	DEBUG_MEM_STATUS_HIDDEN = 0x3,
+	DEBUG_MEM_STATUS_LENGTH = 0x4,
+};
+
+/**
+ * struct iwl_dbg_mem_access_rsp - Response to debug mem commands
+ * @status: DEBUG_MEM_STATUS_*
+ * @len: read dwords (0 for write operations)
+ * @data: contains the read DWs
+ */
+struct iwl_dbg_mem_access_rsp {
+	__le32 status;
+	__le32 len;
+	__le32 data[];
+} __packed; /* DEBUG_(U|L)MAC_RD_WR_RSP_API_S_VER_1 */
+
 #endif /* __fw_api_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
index 1abcabb..d89d0a1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c
@@ -440,14 +440,12 @@
 	{ .start = 0x00a04560, .end = 0x00a0457c },
 	{ .start = 0x00a04590, .end = 0x00a04598 },
 	{ .start = 0x00a045c0, .end = 0x00a045f4 },
-	{ .start = 0x00a44000, .end = 0x00a7bf80 },
 };
 
 static const struct iwl_prph_range iwl_prph_dump_addr_9000[] = {
 	{ .start = 0x00a05c00, .end = 0x00a05c18 },
 	{ .start = 0x00a05400, .end = 0x00a056e8 },
 	{ .start = 0x00a08000, .end = 0x00a098bc },
-	{ .start = 0x00adfc00, .end = 0x00adfd1c },
 	{ .start = 0x00a02400, .end = 0x00a02758 },
 };
 
@@ -559,7 +557,7 @@
 					 sizeof(struct iwl_fw_error_dump_fifo);
 		}
 
-		for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) {
+		for (i = 0; i < mem_cfg->num_txfifo_entries; i++) {
 			if (!mem_cfg->txfifo_size[i])
 				continue;
 
@@ -960,5 +958,6 @@
 	}
 
 	mvm->fw_dbg_conf = conf_id;
-	return ret;
+
+	return 0;
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h
index f7dff76..e9f1be9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.h
@@ -105,7 +105,8 @@
 {
 	u32 trig_vif = le32_to_cpu(trig->vif_type);
 
-	return trig_vif == IWL_FW_DBG_CONF_VIF_ANY || vif->type == trig_vif;
+	return trig_vif == IWL_FW_DBG_CONF_VIF_ANY ||
+	       ieee80211_vif_type_p2p(vif) == trig_vif;
 }
 
 static inline bool
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 7e0cdbf..8720663 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -90,15 +90,6 @@
 	u32 scd_base_addr;
 };
 
-static inline const struct fw_img *
-iwl_get_ucode_image(struct iwl_mvm *mvm, enum iwl_ucode_type ucode_type)
-{
-	if (ucode_type >= IWL_UCODE_TYPE_MAX)
-		return NULL;
-
-	return &mvm->fw->img[ucode_type];
-}
-
 static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant)
 {
 	struct iwl_tx_ant_cfg_cmd tx_ant_cmd = {
@@ -385,9 +376,7 @@
 /* send paging cmd to FW in case CPU2 has paging image */
 static int iwl_send_paging_cmd(struct iwl_mvm *mvm, const struct fw_img *fw)
 {
-	int blk_idx;
-	__le32 dev_phy_addr;
-	struct iwl_fw_paging_cmd fw_paging_cmd = {
+	struct iwl_fw_paging_cmd paging_cmd = {
 		.flags =
 			cpu_to_le32(PAGING_CMD_IS_SECURED |
 				    PAGING_CMD_IS_ENABLED |
@@ -396,18 +385,32 @@
 		.block_size = cpu_to_le32(BLOCK_2_EXP_SIZE),
 		.block_num = cpu_to_le32(mvm->num_of_paging_blk),
 	};
+	int blk_idx, size = sizeof(paging_cmd);
+
+	/* A bit hard coded - but this is the old API and will be deprecated */
+	if (!iwl_mvm_has_new_tx_api(mvm))
+		size -= NUM_OF_FW_PAGING_BLOCKS * 4;
 
 	/* loop for for all paging blocks + CSS block */
 	for (blk_idx = 0; blk_idx < mvm->num_of_paging_blk + 1; blk_idx++) {
-		dev_phy_addr =
-			cpu_to_le32(mvm->fw_paging_db[blk_idx].fw_paging_phys >>
-				    PAGE_2_EXP_SIZE);
-		fw_paging_cmd.device_phy_addr[blk_idx] = dev_phy_addr;
+		dma_addr_t addr = mvm->fw_paging_db[blk_idx].fw_paging_phys;
+
+		addr = addr >> PAGE_2_EXP_SIZE;
+
+		if (iwl_mvm_has_new_tx_api(mvm)) {
+			__le64 phy_addr = cpu_to_le64(addr);
+
+			paging_cmd.device_phy_addr.addr64[blk_idx] = phy_addr;
+		} else {
+			__le32 phy_addr = cpu_to_le32(addr);
+
+			paging_cmd.device_phy_addr.addr32[blk_idx] = phy_addr;
+		}
 	}
 
 	return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(FW_PAGING_BLOCK_CMD,
 						    IWL_ALWAYS_LONG_GROUP, 0),
-				    0, sizeof(fw_paging_cmd), &fw_paging_cmd);
+				    0, size, &paging_cmd);
 }
 
 /*
@@ -580,9 +583,9 @@
 	    iwl_fw_dbg_conf_usniffer(mvm->fw, FW_DBG_START_FROM_ALIVE) &&
 	    !(fw_has_capa(&mvm->fw->ucode_capa,
 			  IWL_UCODE_TLV_CAPA_USNIFFER_UNIFIED)))
-		fw = iwl_get_ucode_image(mvm, IWL_UCODE_REGULAR_USNIFFER);
+		fw = iwl_get_ucode_image(mvm->fw, IWL_UCODE_REGULAR_USNIFFER);
 	else
-		fw = iwl_get_ucode_image(mvm, ucode_type);
+		fw = iwl_get_ucode_image(mvm->fw, ucode_type);
 	if (WARN_ON(!fw))
 		return -EINVAL;
 	mvm->cur_ucode = ucode_type;
@@ -826,6 +829,59 @@
 	return ret;
 }
 
+static void iwl_mvm_parse_shared_mem_a000(struct iwl_mvm *mvm,
+					  struct iwl_rx_packet *pkt)
+{
+	struct iwl_shared_mem_cfg *mem_cfg = (void *)pkt->data;
+	int i;
+
+	mvm->shared_mem_cfg.num_txfifo_entries =
+		ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size);
+	for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++)
+		mvm->shared_mem_cfg.txfifo_size[i] =
+			le32_to_cpu(mem_cfg->txfifo_size[i]);
+	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++)
+		mvm->shared_mem_cfg.rxfifo_size[i] =
+			le32_to_cpu(mem_cfg->rxfifo_size[i]);
+
+	BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) !=
+		     sizeof(mem_cfg->internal_txfifo_size));
+
+	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size);
+	     i++)
+		mvm->shared_mem_cfg.internal_txfifo_size[i] =
+			le32_to_cpu(mem_cfg->internal_txfifo_size[i]);
+}
+
+static void iwl_mvm_parse_shared_mem(struct iwl_mvm *mvm,
+				     struct iwl_rx_packet *pkt)
+{
+	struct iwl_shared_mem_cfg_v1 *mem_cfg = (void *)pkt->data;
+	int i;
+
+	mvm->shared_mem_cfg.num_txfifo_entries =
+		ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size);
+	for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++)
+		mvm->shared_mem_cfg.txfifo_size[i] =
+			le32_to_cpu(mem_cfg->txfifo_size[i]);
+	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++)
+		mvm->shared_mem_cfg.rxfifo_size[i] =
+			le32_to_cpu(mem_cfg->rxfifo_size[i]);
+
+	/* new API has more data, from rxfifo_addr field and on */
+	if (fw_has_capa(&mvm->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) {
+		BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) !=
+			     sizeof(mem_cfg->internal_txfifo_size));
+
+		for (i = 0;
+		     i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size);
+		     i++)
+			mvm->shared_mem_cfg.internal_txfifo_size[i] =
+				le32_to_cpu(mem_cfg->internal_txfifo_size[i]);
+	}
+}
+
 static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm)
 {
 	struct iwl_host_cmd cmd = {
@@ -833,9 +889,7 @@
 		.data = { NULL, },
 		.len = { 0, },
 	};
-	struct iwl_shared_mem_cfg *mem_cfg;
 	struct iwl_rx_packet *pkt;
-	u32 i;
 
 	lockdep_assert_held(&mvm->mutex);
 
@@ -849,45 +903,10 @@
 		return;
 
 	pkt = cmd.resp_pkt;
-	mem_cfg = (void *)pkt->data;
-
-	mvm->shared_mem_cfg.shared_mem_addr =
-		le32_to_cpu(mem_cfg->shared_mem_addr);
-	mvm->shared_mem_cfg.shared_mem_size =
-		le32_to_cpu(mem_cfg->shared_mem_size);
-	mvm->shared_mem_cfg.sample_buff_addr =
-		le32_to_cpu(mem_cfg->sample_buff_addr);
-	mvm->shared_mem_cfg.sample_buff_size =
-		le32_to_cpu(mem_cfg->sample_buff_size);
-	mvm->shared_mem_cfg.txfifo_addr = le32_to_cpu(mem_cfg->txfifo_addr);
-	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); i++)
-		mvm->shared_mem_cfg.txfifo_size[i] =
-			le32_to_cpu(mem_cfg->txfifo_size[i]);
-	for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++)
-		mvm->shared_mem_cfg.rxfifo_size[i] =
-			le32_to_cpu(mem_cfg->rxfifo_size[i]);
-	mvm->shared_mem_cfg.page_buff_addr =
-		le32_to_cpu(mem_cfg->page_buff_addr);
-	mvm->shared_mem_cfg.page_buff_size =
-		le32_to_cpu(mem_cfg->page_buff_size);
-
-	/* new API has more data */
-	if (fw_has_capa(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) {
-		mvm->shared_mem_cfg.rxfifo_addr =
-			le32_to_cpu(mem_cfg->rxfifo_addr);
-		mvm->shared_mem_cfg.internal_txfifo_addr =
-			le32_to_cpu(mem_cfg->internal_txfifo_addr);
-
-		BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) !=
-			     sizeof(mem_cfg->internal_txfifo_size));
-
-		for (i = 0;
-		     i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size);
-		     i++)
-			mvm->shared_mem_cfg.internal_txfifo_size[i] =
-				le32_to_cpu(mem_cfg->internal_txfifo_size[i]);
-	}
+	if (iwl_mvm_has_new_tx_api(mvm))
+		iwl_mvm_parse_shared_mem_a000(mvm, pkt);
+	else
+		iwl_mvm_parse_shared_mem(mvm, pkt);
 
 	IWL_DEBUG_INFO(mvm, "SHARED MEM CFG: got memory offsets/sizes\n");
 
@@ -1027,19 +1046,11 @@
 {
 	struct iwl_mvm_sar_table sar_table;
 	struct iwl_dev_tx_power_cmd cmd = {
-		.v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS),
+		.v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS),
 	};
 	int ret, i, j, idx;
 	int len = sizeof(cmd);
 
-	/* we can't do anything with the table if the FW doesn't support it */
-	if (!fw_has_api(&mvm->fw->ucode_capa,
-			IWL_UCODE_TLV_API_TX_POWER_CHAIN)) {
-		IWL_DEBUG_RADIO(mvm,
-				"FW doesn't support per-chain TX power settings.\n");
-		return 0;
-	}
-
 	if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK))
 		len = sizeof(cmd.v3);
 
@@ -1096,27 +1107,27 @@
 	 * (for example, if we were in RFKILL)
 	 */
 	ret = iwl_run_init_mvm_ucode(mvm, false);
-	if (ret && !iwlmvm_mod_params.init_dbg) {
+
+	if (iwlmvm_mod_params.init_dbg)
+		return 0;
+
+	if (ret) {
 		IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret);
 		/* this can't happen */
 		if (WARN_ON(ret > 0))
 			ret = -ERFKILL;
 		goto error;
 	}
-	if (!iwlmvm_mod_params.init_dbg) {
-		/*
-		 * Stop and start the transport without entering low power
-		 * mode. This will save the state of other components on the
-		 * device that are triggered by the INIT firwmare (MFUART).
-		 */
-		_iwl_trans_stop_device(mvm->trans, false);
-		ret = _iwl_trans_start_hw(mvm->trans, false);
-		if (ret)
-			goto error;
-	}
 
-	if (iwlmvm_mod_params.init_dbg)
-		return 0;
+	/*
+	 * Stop and start the transport without entering low power
+	 * mode. This will save the state of other components on the
+	 * device that are triggered by the INIT firwmare (MFUART).
+	 */
+	_iwl_trans_stop_device(mvm->trans, false);
+	ret = _iwl_trans_start_hw(mvm->trans, false);
+	if (ret)
+		goto error;
 
 	ret = iwl_mvm_load_ucode_wait_alive(mvm, IWL_UCODE_REGULAR);
 	if (ret) {
@@ -1214,9 +1225,12 @@
 	}
 
 	/* TODO: read the budget from BIOS / Platform NVM */
-	if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0)
+	if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0) {
 		ret = iwl_mvm_ctdp_command(mvm, CTDP_CMD_OPERATION_START,
 					   mvm->cooling_dev.cur_state);
+		if (ret)
+			goto error;
+	}
 #else
 	/* Initialize tx backoffs to the minimal possible */
 	iwl_mvm_tt_tx_backoff(mvm, 0);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
index 69c42ce..6b962d6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c
@@ -539,6 +539,11 @@
 			iwl_mvm_disable_txq(mvm, IWL_MVM_OFFCHANNEL_QUEUE,
 					    IWL_MVM_OFFCHANNEL_QUEUE,
 					    IWL_MAX_TID_COUNT, 0);
+		else
+			iwl_mvm_disable_txq(mvm,
+					    IWL_MVM_DQA_P2P_DEVICE_QUEUE,
+					    vif->hw_queue[0], IWL_MAX_TID_COUNT,
+					    0);
 
 		break;
 	case NL80211_IFTYPE_AP:
@@ -769,26 +774,6 @@
 		cmd->ac[txf].fifos_mask = BIT(txf);
 	}
 
-	if (vif->type == NL80211_IFTYPE_AP) {
-		/* in AP mode, the MCAST FIFO takes the EDCA params from VO */
-		cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |=
-			BIT(IWL_MVM_TX_FIFO_MCAST);
-
-		/*
-		 * in AP mode, pass probe requests and beacons from other APs
-		 * (needed for ht protection); when there're no any associated
-		 * station don't ask FW to pass beacons to prevent unnecessary
-		 * wake-ups.
-		 */
-		cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST);
-		if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) {
-			cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON);
-			IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n");
-		} else {
-			IWL_DEBUG_HC(mvm, "No need to receive beacons\n");
-		}
-	}
-
 	if (vif->bss_conf.qos)
 		cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA);
 
@@ -1186,6 +1171,7 @@
  */
 static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm,
 					 struct ieee80211_vif *vif,
+					 struct iwl_mac_ctx_cmd *cmd,
 					 struct iwl_mac_data_ap *ctxt_ap,
 					 bool add)
 {
@@ -1196,6 +1182,23 @@
 		.beacon_device_ts = 0
 	};
 
+	/* in AP mode, the MCAST FIFO takes the EDCA params from VO */
+	cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |= BIT(IWL_MVM_TX_FIFO_MCAST);
+
+	/*
+	 * in AP mode, pass probe requests and beacons from other APs
+	 * (needed for ht protection); when there're no any associated
+	 * station don't ask FW to pass beacons to prevent unnecessary
+	 * wake-ups.
+	 */
+	cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST);
+	if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) {
+		cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON);
+		IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n");
+	} else {
+		IWL_DEBUG_HC(mvm, "No need to receive beacons\n");
+	}
+
 	ctxt_ap->bi = cpu_to_le32(vif->bss_conf.beacon_int);
 	ctxt_ap->bi_reciprocal =
 		cpu_to_le32(iwl_mvm_reciprocal(vif->bss_conf.beacon_int));
@@ -1253,7 +1256,7 @@
 	iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action);
 
 	/* Fill the data specific for ap mode */
-	iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.ap,
+	iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd, &cmd.ap,
 				     action == FW_CTXT_ACTION_ADD);
 
 	return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd);
@@ -1272,7 +1275,7 @@
 	iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action);
 
 	/* Fill the data specific for GO mode */
-	iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.go.ap,
+	iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd, &cmd.go.ap,
 				     action == FW_CTXT_ACTION_ADD);
 
 	cmd.go.ctwin = cpu_to_le32(noa->oppps_ctwindow &
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 6d60645..318efd8 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -465,7 +465,7 @@
 	hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES;
 	hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP;
 
-	BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 4);
+	BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 6);
 	memcpy(mvm->ciphers, mvm_ciphers, sizeof(mvm_ciphers));
 	hw->wiphy->n_cipher_suites = ARRAY_SIZE(mvm_ciphers);
 	hw->wiphy->cipher_suites = mvm->ciphers;
@@ -479,17 +479,23 @@
 		hw->wiphy->n_cipher_suites++;
 	}
 
-	/*
-	 * Enable 11w if advertised by firmware and software crypto
-	 * is not enabled (as the firmware will interpret some mgmt
-	 * packets, so enabling it with software crypto isn't safe)
+	/* Enable 11w if software crypto is not enabled (as the
+	 * firmware will interpret some mgmt packets, so enabling it
+	 * with software crypto isn't safe).
 	 */
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_MFP &&
-	    !iwlwifi_mod_params.sw_crypto) {
+	if (!iwlwifi_mod_params.sw_crypto) {
 		ieee80211_hw_set(hw, MFP_CAPABLE);
 		mvm->ciphers[hw->wiphy->n_cipher_suites] =
 			WLAN_CIPHER_SUITE_AES_CMAC;
 		hw->wiphy->n_cipher_suites++;
+		if (iwl_mvm_has_new_rx_api(mvm)) {
+			mvm->ciphers[hw->wiphy->n_cipher_suites] =
+				WLAN_CIPHER_SUITE_BIP_GMAC_128;
+			hw->wiphy->n_cipher_suites++;
+			mvm->ciphers[hw->wiphy->n_cipher_suites] =
+				WLAN_CIPHER_SUITE_BIP_GMAC_256;
+			hw->wiphy->n_cipher_suites++;
+		}
 	}
 
 	/* currently FW API supports only one optional cipher scheme */
@@ -539,9 +545,7 @@
 		hw->wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG |
 					       REGULATORY_DISABLE_BEACON_HINTS;
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_GO_UAPSD)
-		hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD;
-
+	hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD;
 	hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
 
 	hw->wiphy->iface_combinations = iwl_mvm_iface_combinations;
@@ -624,6 +628,7 @@
 	hw->wiphy->features |= NL80211_FEATURE_P2P_GO_CTWIN |
 			       NL80211_FEATURE_LOW_PRIORITY_SCAN |
 			       NL80211_FEATURE_P2P_GO_OPPPS |
+			       NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE |
 			       NL80211_FEATURE_DYNAMIC_SMPS |
 			       NL80211_FEATURE_STATIC_SMPS |
 			       NL80211_FEATURE_SUPPORTS_WMM_ADMISSION;
@@ -644,6 +649,16 @@
 			IWL_UCODE_TLV_CAPA_WFA_TPC_REP_IE_SUPPORT))
 		hw->wiphy->features |= NL80211_FEATURE_WFA_TPC_IE_IN_PROBES;
 
+	if (fw_has_api(&mvm->fw->ucode_capa,
+		       IWL_UCODE_TLV_API_SCAN_TSF_REPORT)) {
+		wiphy_ext_feature_set(hw->wiphy,
+				      NL80211_EXT_FEATURE_SCAN_START_TIME);
+		wiphy_ext_feature_set(hw->wiphy,
+				      NL80211_EXT_FEATURE_BSS_PARENT_TSF);
+		wiphy_ext_feature_set(hw->wiphy,
+				      NL80211_EXT_FEATURE_SET_SCAN_DWELL);
+	}
+
 	mvm->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD;
 
 #ifdef CONFIG_PM_SLEEP
@@ -711,6 +726,10 @@
 	if (ret)
 		iwl_mvm_leds_exit(mvm);
 
+	if (mvm->cfg->vht_mu_mimo_supported)
+		wiphy_ext_feature_set(hw->wiphy,
+				      NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER);
+
 	return ret;
 }
 
@@ -1250,20 +1269,18 @@
 				s16 tx_power)
 {
 	struct iwl_dev_tx_power_cmd cmd = {
-		.v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC),
-		.v3.v2.mac_context_id =
+		.v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC),
+		.v3.mac_context_id =
 			cpu_to_le32(iwl_mvm_vif_from_mac80211(vif)->id),
-		.v3.v2.pwr_restriction = cpu_to_le16(8 * tx_power),
+		.v3.pwr_restriction = cpu_to_le16(8 * tx_power),
 	};
 	int len = sizeof(cmd);
 
 	if (tx_power == IWL_DEFAULT_MAX_TX_POWER)
-		cmd.v3.v2.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER);
+		cmd.v3.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER);
 
 	if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK))
 		len = sizeof(cmd.v3);
-	if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_TX_POWER_CHAIN))
-		len = sizeof(cmd.v3.v2);
 
 	return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0, len, &cmd);
 }
@@ -2220,6 +2237,10 @@
 	case NL80211_IFTYPE_ADHOC:
 		iwl_mvm_bss_info_changed_ap_ibss(mvm, vif, bss_conf, changes);
 		break;
+	case NL80211_IFTYPE_MONITOR:
+		if (changes & BSS_CHANGED_MU_GROUPS)
+			iwl_mvm_update_mu_groups(mvm, vif);
+		break;
 	default:
 		/* shouldn't happen */
 		WARN_ON_ONCE(1);
@@ -2746,6 +2767,8 @@
 		key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 		WARN_ON_ONCE(!ieee80211_hw_check(hw, MFP_CAPABLE));
 		break;
 	case WLAN_CIPHER_SUITE_WEP40:
@@ -2779,9 +2802,11 @@
 			 * GTK on AP interface is a TX-only key, return 0;
 			 * on IBSS they're per-station and because we're lazy
 			 * we don't support them for RX, so do the same.
-			 * CMAC in AP/IBSS modes must be done in software.
+			 * CMAC/GMAC in AP/IBSS modes must be done in software.
 			 */
-			if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+			if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
+			    key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
+			    key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)
 				ret = -EOPNOTSUPP;
 			else
 				ret = 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index b4fc86d..d17cbf6 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -467,6 +467,8 @@
 static inline struct iwl_mvm_vif *
 iwl_mvm_vif_from_mac80211(struct ieee80211_vif *vif)
 {
+	if (!vif)
+		return NULL;
 	return (void *)vif->drv_priv;
 }
 
@@ -602,16 +604,9 @@
 };
 
 struct iwl_mvm_shared_mem_cfg {
-	u32 shared_mem_addr;
-	u32 shared_mem_size;
-	u32 sample_buff_addr;
-	u32 sample_buff_size;
-	u32 txfifo_addr;
+	int num_txfifo_entries;
 	u32 txfifo_size[TX_FIFO_MAX_NUM];
 	u32 rxfifo_size[RX_FIFO_MAX_NUM];
-	u32 page_buff_addr;
-	u32 page_buff_size;
-	u32 rxfifo_addr;
 	u32 internal_txfifo_addr;
 	u32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
 };
@@ -697,6 +692,10 @@
  *	it. In this state, when a new queue is needed to be allocated but no
  *	such free queue exists, an inactive queue might be freed and given to
  *	the new RA/TID.
+ * @IWL_MVM_QUEUE_RECONFIGURING: queue is being reconfigured
+ *	This is the state of a queue that has had traffic pass through it, but
+ *	needs to be reconfigured for some reason, e.g. the queue needs to
+ *	become unshared and aggregations re-enabled on.
  */
 enum iwl_mvm_queue_status {
 	IWL_MVM_QUEUE_FREE,
@@ -704,10 +703,11 @@
 	IWL_MVM_QUEUE_READY,
 	IWL_MVM_QUEUE_SHARED,
 	IWL_MVM_QUEUE_INACTIVE,
+	IWL_MVM_QUEUE_RECONFIGURING,
 };
 
 #define IWL_MVM_DQA_QUEUE_TIMEOUT	(5 * HZ)
-#define IWL_MVM_NUM_CIPHERS             8
+#define IWL_MVM_NUM_CIPHERS             10
 
 struct iwl_mvm {
 	/* for logger access */
@@ -767,6 +767,7 @@
 		u8 ra_sta_id; /* The RA this queue is mapped to, if exists */
 		bool reserved; /* Is this the TXQ reserved for a STA */
 		u8 mac80211_ac; /* The mac80211 AC this queue is mapped to */
+		u8 txq_tid; /* The TID "owner" of this queue*/
 		u16 tid_bitmap; /* Bitmap of the TIDs mapped to this queue */
 		/* Timestamp for inactivation per TID of this queue */
 		unsigned long last_frame_time[IWL_MAX_TID_COUNT + 1];
@@ -820,6 +821,12 @@
 	/* UMAC scan tracking */
 	u32 scan_uid_status[IWL_MVM_MAX_UMAC_SCANS];
 
+	/* start time of last scan in TSF of the mac that requested the scan */
+	u64 scan_start;
+
+	/* the vif that requested the current scan */
+	struct iwl_mvm_vif *scan_vif;
+
 	/* rx chain antennas set through debugfs for the scan command */
 	u8 scan_rx_ant;
 
@@ -1122,6 +1129,18 @@
 		(mvm->trans->runtime_pm_mode != IWL_PLAT_PM_MODE_D0I3);
 }
 
+static inline bool iwl_mvm_is_dqa_data_queue(struct iwl_mvm *mvm, u8 queue)
+{
+	return (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE) &&
+	       (queue <= IWL_MVM_DQA_MAX_DATA_QUEUE);
+}
+
+static inline bool iwl_mvm_is_dqa_mgmt_queue(struct iwl_mvm *mvm, u8 queue)
+{
+	return (queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE) &&
+	       (queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE);
+}
+
 static inline bool iwl_mvm_is_lar_supported(struct iwl_mvm *mvm)
 {
 	bool nvm_lar = mvm->nvm_data->lar_enabled;
@@ -1192,6 +1211,12 @@
 			   IWL_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT);
 }
 
+static inline bool iwl_mvm_has_new_tx_api(struct iwl_mvm *mvm)
+{
+	/* TODO - replace with TLV once defined */
+	return mvm->trans->cfg->use_tfh;
+}
+
 static inline bool iwl_mvm_is_tt_in_fw(struct iwl_mvm *mvm)
 {
 #ifdef CONFIG_THERMAL
@@ -1243,6 +1268,7 @@
 void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm);
 u8 first_antenna(u8 mask);
 u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx);
+void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime);
 
 /* Tx / Host Commands */
 int __must_check iwl_mvm_send_cmd(struct iwl_mvm *mvm,
@@ -1279,8 +1305,6 @@
 
 	tx_cmd->sec_ctl = TX_CMD_SEC_CCM;
 	memcpy(tx_cmd->key, keyconf->key, keyconf->keylen);
-	if (info->flags & IEEE80211_TX_CTL_AMPDU)
-		tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_CCMP_AGG);
 }
 
 static inline void iwl_mvm_wait_for_async_handlers(struct iwl_mvm *mvm)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index 7a686f6..eade099 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -66,7 +66,6 @@
  *****************************************************************************/
 #include <linux/firmware.h>
 #include <linux/rtnetlink.h>
-#include <linux/acpi.h>
 #include "iwl-trans.h"
 #include "iwl-csr.h"
 #include "mvm.h"
@@ -751,96 +750,6 @@
 	return resp_cp;
 }
 
-#ifdef CONFIG_ACPI
-#define WRD_METHOD		"WRDD"
-#define WRDD_WIFI		(0x07)
-#define WRDD_WIGIG		(0x10)
-
-static u32 iwl_mvm_wrdd_get_mcc(struct iwl_mvm *mvm, union acpi_object *wrdd)
-{
-	union acpi_object *mcc_pkg, *domain_type, *mcc_value;
-	u32 i;
-
-	if (wrdd->type != ACPI_TYPE_PACKAGE ||
-	    wrdd->package.count < 2 ||
-	    wrdd->package.elements[0].type != ACPI_TYPE_INTEGER ||
-	    wrdd->package.elements[0].integer.value != 0) {
-		IWL_DEBUG_LAR(mvm, "Unsupported wrdd structure\n");
-		return 0;
-	}
-
-	for (i = 1 ; i < wrdd->package.count ; ++i) {
-		mcc_pkg = &wrdd->package.elements[i];
-
-		if (mcc_pkg->type != ACPI_TYPE_PACKAGE ||
-		    mcc_pkg->package.count < 2 ||
-		    mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER ||
-		    mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
-			mcc_pkg = NULL;
-			continue;
-		}
-
-		domain_type = &mcc_pkg->package.elements[0];
-		if (domain_type->integer.value == WRDD_WIFI)
-			break;
-
-		mcc_pkg = NULL;
-	}
-
-	if (mcc_pkg) {
-		mcc_value = &mcc_pkg->package.elements[1];
-		return mcc_value->integer.value;
-	}
-
-	return 0;
-}
-
-static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc)
-{
-	acpi_handle root_handle;
-	acpi_handle handle;
-	struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL};
-	acpi_status status;
-	u32 mcc_val;
-
-	root_handle = ACPI_HANDLE(mvm->dev);
-	if (!root_handle) {
-		IWL_DEBUG_LAR(mvm,
-			      "Could not retrieve root port ACPI handle\n");
-		return -ENOENT;
-	}
-
-	/* Get the method's handle */
-	status = acpi_get_handle(root_handle, (acpi_string)WRD_METHOD, &handle);
-	if (ACPI_FAILURE(status)) {
-		IWL_DEBUG_LAR(mvm, "WRD method not found\n");
-		return -ENOENT;
-	}
-
-	/* Call WRDD with no arguments */
-	status = acpi_evaluate_object(handle, NULL, NULL, &wrdd);
-	if (ACPI_FAILURE(status)) {
-		IWL_DEBUG_LAR(mvm, "WRDC invocation failed (0x%x)\n", status);
-		return -ENOENT;
-	}
-
-	mcc_val = iwl_mvm_wrdd_get_mcc(mvm, wrdd.pointer);
-	kfree(wrdd.pointer);
-	if (!mcc_val)
-		return -ENOENT;
-
-	mcc[0] = (mcc_val >> 8) & 0xff;
-	mcc[1] = mcc_val & 0xff;
-	mcc[2] = '\0';
-	return 0;
-}
-#else /* CONFIG_ACPI */
-static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc)
-{
-	return -ENOENT;
-}
-#endif
-
 int iwl_mvm_init_mcc(struct iwl_mvm *mvm)
 {
 	bool tlv_lar;
@@ -884,7 +793,7 @@
 		return -EIO;
 
 	if (iwl_mvm_is_wifi_mcc_supported(mvm) &&
-	    !iwl_mvm_get_bios_mcc(mvm, mcc)) {
+	    !iwl_get_bios_mcc(mvm->dev, mcc)) {
 		kfree(regd);
 		regd = iwl_mvm_get_regdomain(mvm->hw->wiphy, mcc,
 					     MCC_SOURCE_BIOS, NULL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 55d9096..05fe6dd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -359,6 +359,7 @@
 	HCMD_NAME(BT_COEX_CI),
 	HCMD_NAME(PHY_CONFIGURATION_CMD),
 	HCMD_NAME(CALIB_RES_NOTIF_PHY_DB),
+	HCMD_NAME(PHY_DB_CMD),
 	HCMD_NAME(SCAN_OFFLOAD_COMPLETE),
 	HCMD_NAME(SCAN_OFFLOAD_UPDATE_PROFILES_CMD),
 	HCMD_NAME(SCAN_OFFLOAD_CONFIG_CMD),
@@ -652,11 +653,9 @@
 	/* the hardware splits the A-MSDU */
 	if (mvm->cfg->mq_rx_supported)
 		trans_cfg.rx_buf_size = IWL_AMSDU_4K;
-	trans_cfg.wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa,
-					       IWL_UCODE_TLV_API_WIDE_CMD_HDR);
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DW_BC_TABLE)
-		trans_cfg.bc_table_dword = true;
+	trans->wide_cmd_header = true;
+	trans_cfg.bc_table_dword = true;
 
 	trans_cfg.command_groups = iwl_mvm_groups;
 	trans_cfg.command_groups_size = ARRAY_SIZE(iwl_mvm_groups);
@@ -711,37 +710,21 @@
 		IWL_DEBUG_EEPROM(mvm->trans->dev,
 				 "working without external nvm file\n");
 
-	if (WARN(cfg->no_power_up_nic_in_init && !mvm->nvm_file_name,
-		 "not allowing power-up and not having nvm_file\n"))
+	err = iwl_trans_start_hw(mvm->trans);
+	if (err)
 		goto out_free;
 
-	/*
-	 * Even if nvm exists in the nvm_file driver should read again the nvm
-	 * from the nic because there might be entries that exist in the OTP
-	 * and not in the file.
-	 * for nics with no_power_up_nic_in_init: rely completley on nvm_file
-	 */
-	if (cfg->no_power_up_nic_in_init && mvm->nvm_file_name) {
-		err = iwl_nvm_init(mvm, false);
-		if (err)
-			goto out_free;
-	} else {
-		err = iwl_trans_start_hw(mvm->trans);
-		if (err)
-			goto out_free;
-
-		mutex_lock(&mvm->mutex);
-		iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE);
-		err = iwl_run_init_mvm_ucode(mvm, true);
-		if (!err || !iwlmvm_mod_params.init_dbg)
-			iwl_mvm_stop_device(mvm);
-		iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE);
-		mutex_unlock(&mvm->mutex);
-		/* returns 0 if successful, 1 if success but in rfkill */
-		if (err < 0 && !iwlmvm_mod_params.init_dbg) {
-			IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err);
-			goto out_free;
-		}
+	mutex_lock(&mvm->mutex);
+	iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE);
+	err = iwl_run_init_mvm_ucode(mvm, true);
+	if (!err || !iwlmvm_mod_params.init_dbg)
+		iwl_mvm_stop_device(mvm);
+	iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE);
+	mutex_unlock(&mvm->mutex);
+	/* returns 0 if successful, 1 if success but in rfkill */
+	if (err < 0 && !iwlmvm_mod_params.init_dbg) {
+		IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err);
+		goto out_free;
 	}
 
 	scan_size = iwl_mvm_scan_size(mvm);
@@ -783,8 +766,8 @@
 	flush_delayed_work(&mvm->fw_dump_wk);
 	iwl_phy_db_free(mvm->phy_db);
 	kfree(mvm->scan_cmd);
-	if (!cfg->no_power_up_nic_in_init || !mvm->nvm_file_name)
-		iwl_trans_op_mode_leave(trans);
+	iwl_trans_op_mode_leave(trans);
+
 	ieee80211_free_hw(mvm->hw);
 	return NULL;
 }
@@ -857,9 +840,7 @@
 	struct iwl_mvm *mvm =
 		container_of(wk, struct iwl_mvm, async_handlers_wk);
 	struct iwl_async_handler_entry *entry, *tmp;
-	struct list_head local_list;
-
-	INIT_LIST_HEAD(&local_list);
+	LIST_HEAD(local_list);
 
 	/* Ensure that we are not in stop flow (check iwl_mvm_mac_stop) */
 
@@ -966,10 +947,11 @@
 {
 	struct iwl_rx_packet *pkt = rxb_addr(rxb);
 	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
+	u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd);
 
-	if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD))
+	if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD)))
 		iwl_mvm_rx_rx_mpdu(mvm, napi, rxb);
-	else if (pkt->hdr.cmd == REPLY_RX_PHY_CMD)
+	else if (cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_PHY_CMD))
 		iwl_mvm_rx_rx_phy_cmd(mvm, rxb);
 	else
 		iwl_mvm_rx_common(mvm, rxb, pkt);
@@ -981,13 +963,14 @@
 {
 	struct iwl_rx_packet *pkt = rxb_addr(rxb);
 	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
+	u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd);
 
-	if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD))
+	if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD)))
 		iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, 0);
-	else if (unlikely(pkt->hdr.group_id == DATA_PATH_GROUP &&
-			  pkt->hdr.cmd == RX_QUEUES_NOTIFICATION))
+	else if (unlikely(cmd == WIDE_ID(DATA_PATH_GROUP,
+					 RX_QUEUES_NOTIFICATION)))
 		iwl_mvm_rx_queue_notif(mvm, rxb, 0);
-	else if (pkt->hdr.cmd == FRAME_RELEASE)
+	else if (cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE))
 		iwl_mvm_rx_frame_release(mvm, napi, rxb, 0);
 	else
 		iwl_mvm_rx_common(mvm, rxb, pkt);
@@ -1666,13 +1649,14 @@
 {
 	struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
 	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd);
 
-	if (unlikely(pkt->hdr.cmd == FRAME_RELEASE))
+	if (unlikely(cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE)))
 		iwl_mvm_rx_frame_release(mvm, napi, rxb, queue);
-	else if (unlikely(pkt->hdr.cmd == RX_QUEUES_NOTIFICATION &&
-			  pkt->hdr.group_id == DATA_PATH_GROUP))
+	else if (unlikely(cmd == WIDE_ID(DATA_PATH_GROUP,
+					 RX_QUEUES_NOTIFICATION)))
 		iwl_mvm_rx_queue_notif(mvm, rxb, queue);
-	else
+	else if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD)))
 		iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, queue);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
index ff85865..af6d10c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c
@@ -694,8 +694,7 @@
 
 	/* enable PM on p2p if p2p stand alone */
 	if (vifs->p2p_active && !vifs->bss_active && !vifs->ap_active) {
-		if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM)
-			p2p_mvmvif->pm_enabled = true;
+		p2p_mvmvif->pm_enabled = true;
 		return;
 	}
 
@@ -707,12 +706,10 @@
 				   ap_mvmvif->phy_ctxt->id);
 
 	/* clients are not stand alone: enable PM if DCM */
-	if (!(client_same_channel || ap_same_channel) &&
-	    (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)) {
+	if (!(client_same_channel || ap_same_channel)) {
 		if (vifs->bss_active)
 			bss_mvmvif->pm_enabled = true;
-		if (vifs->p2p_active &&
-		    (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM))
+		if (vifs->p2p_active)
 			p2p_mvmvif->pm_enabled = true;
 		return;
 	}
@@ -721,12 +718,10 @@
 	 * There is only one channel in the system and there are only
 	 * bss and p2p clients that share it
 	 */
-	if (client_same_channel && !vifs->ap_active &&
-	    (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM)) {
+	if (client_same_channel && !vifs->ap_active) {
 		/* share same channel*/
 		bss_mvmvif->pm_enabled = true;
-		if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM)
-			p2p_mvmvif->pm_enabled = true;
+		p2p_mvmvif->pm_enabled = true;
 	}
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index df6c32c..a57c6ef 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -132,7 +132,8 @@
 		   IEEE80211_CCMP_PN_LEN) <= 0)
 		return -1;
 
-	memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN);
+	if (!(stats->flag & RX_FLAG_AMSDU_MORE))
+		memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN);
 	stats->flag |= RX_FLAG_PN_VALIDATED;
 
 	return 0;
@@ -417,10 +418,11 @@
 
 		ssn = ieee80211_sn_inc(ssn);
 
-		/* holes are valid since nssn indicates frames were received. */
-		if (skb_queue_empty(skb_list) || !skb_peek_tail(skb_list))
-			continue;
-		/* Empty the list. Will have more than one frame for A-MSDU */
+		/*
+		 * Empty the list. Will have more than one frame for A-MSDU.
+		 * Empty list is valid as well since nssn indicates frames were
+		 * received.
+		 */
 		while ((skb = __skb_dequeue(skb_list))) {
 			iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb,
 							reorder_buf->queue,
@@ -433,7 +435,7 @@
 	if (reorder_buf->num_stored && !reorder_buf->removed) {
 		u16 index = reorder_buf->head_sn % reorder_buf->buf_size;
 
-		while (!skb_peek_tail(&reorder_buf->entries[index]))
+		while (skb_queue_empty(&reorder_buf->entries[index]))
 			index = (index + 1) % reorder_buf->buf_size;
 		/* modify timer to match next frame's expiration time */
 		mod_timer(&reorder_buf->reorder_timer,
@@ -451,17 +453,17 @@
 	u16 sn = 0, index = 0;
 	bool expired = false;
 
-	spin_lock_bh(&buf->lock);
+	spin_lock(&buf->lock);
 
 	if (!buf->num_stored || buf->removed) {
-		spin_unlock_bh(&buf->lock);
+		spin_unlock(&buf->lock);
 		return;
 	}
 
 	for (i = 0; i < buf->buf_size ; i++) {
 		index = (buf->head_sn + i) % buf->buf_size;
 
-		if (!skb_peek_tail(&buf->entries[index]))
+		if (skb_queue_empty(&buf->entries[index]))
 			continue;
 		if (!time_after(jiffies, buf->reorder_time[index] +
 				RX_REORDER_BUF_TIMEOUT_MQ))
@@ -491,7 +493,7 @@
 			  buf->reorder_time[index] +
 			  1 + RX_REORDER_BUF_TIMEOUT_MQ);
 	}
-	spin_unlock_bh(&buf->lock);
+	spin_unlock(&buf->lock);
 }
 
 static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue,
@@ -502,7 +504,7 @@
 	struct iwl_mvm_reorder_buffer *reorder_buf;
 	u8 baid = data->baid;
 
-	if (WARN_ON_ONCE(baid >= IWL_RX_REORDER_DATA_INVALID_BAID))
+	if (WARN_ONCE(baid >= IWL_MAX_BAID, "invalid BAID: %x\n", baid))
 		return;
 
 	rcu_read_lock();
@@ -589,6 +591,11 @@
 	baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >>
 		IWL_RX_MPDU_REORDER_BAID_SHIFT;
 
+	/*
+	 * This also covers the case of receiving a Block Ack Request
+	 * outside a BA session; we'll pass it to mac80211 and that
+	 * then sends a delBA action frame.
+	 */
 	if (baid == IWL_RX_REORDER_DATA_INVALID_BAID)
 		return false;
 
@@ -598,9 +605,10 @@
 
 	mvm_sta = iwl_mvm_sta_from_mac80211(sta);
 
-	/* not a data packet */
-	if (!ieee80211_is_data_qos(hdr->frame_control) ||
-	    is_multicast_ether_addr(hdr->addr1))
+	/* not a data packet or a bar */
+	if (!ieee80211_is_back_req(hdr->frame_control) &&
+	    (!ieee80211_is_data_qos(hdr->frame_control) ||
+	     is_multicast_ether_addr(hdr->addr1)))
 		return false;
 
 	if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
@@ -624,6 +632,11 @@
 
 	spin_lock_bh(&buffer->lock);
 
+	if (ieee80211_is_back_req(hdr->frame_control)) {
+		iwl_mvm_release_frames(mvm, sta, napi, buffer, nssn);
+		goto drop;
+	}
+
 	/*
 	 * If there was a significant jump in the nssn - adjust.
 	 * If the SN is smaller than the NSSN it might need to first go into
@@ -883,6 +896,9 @@
 			u8 *qc = ieee80211_get_qos_ctl(hdr);
 
 			*qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT;
+			if (!(desc->amsdu_info &
+			      IWL_RX_MPDU_AMSDU_LAST_SUBFRAME))
+				rx_status->flag |= RX_FLAG_AMSDU_MORE;
 		}
 		if (baid != IWL_RX_REORDER_DATA_INVALID_BAID)
 			iwl_mvm_agg_rx_received(mvm, baid);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index dac120f..f279fdd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -141,6 +141,7 @@
 	struct cfg80211_match_set *match_sets;
 	int n_scan_plans;
 	struct cfg80211_sched_scan_plan *scan_plans;
+	u32 measurement_dwell;
 };
 
 static u8 iwl_mvm_scan_rx_ant(struct iwl_mvm *mvm)
@@ -232,6 +233,27 @@
 	return IWL_SCAN_TYPE_WILD;
 }
 
+static int
+iwl_mvm_get_measurement_dwell(struct iwl_mvm *mvm,
+			      struct cfg80211_scan_request *req,
+			      struct iwl_mvm_scan_params *params)
+{
+	if (!req->duration)
+		return 0;
+
+	if (req->duration_mandatory &&
+	    req->duration > scan_timing[params->type].max_out_time) {
+		IWL_DEBUG_SCAN(mvm,
+			       "Measurement scan - too long dwell %hu (max out time %u)\n",
+			       req->duration,
+			       scan_timing[params->type].max_out_time);
+		return -EOPNOTSUPP;
+	}
+
+	return min_t(u32, (u32)req->duration,
+		     scan_timing[params->type].max_out_time);
+}
+
 static inline bool iwl_mvm_rrm_scan_needed(struct iwl_mvm *mvm)
 {
 	/* require rrm scan whenever the fw supports it */
@@ -717,22 +739,6 @@
 	params->preq.common_data.len = cpu_to_le16(ies->common_ie_len);
 }
 
-static __le32 iwl_mvm_scan_priority(struct iwl_mvm *mvm,
-				    enum iwl_scan_priority_ext prio)
-{
-	if (fw_has_api(&mvm->fw->ucode_capa,
-		       IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY))
-		return cpu_to_le32(prio);
-
-	if (prio <= IWL_SCAN_PRIORITY_EXT_2)
-		return cpu_to_le32(IWL_SCAN_PRIORITY_LOW);
-
-	if (prio <= IWL_SCAN_PRIORITY_EXT_4)
-		return cpu_to_le32(IWL_SCAN_PRIORITY_MEDIUM);
-
-	return cpu_to_le32(IWL_SCAN_PRIORITY_HIGH);
-}
-
 static void iwl_mvm_scan_lmac_dwell(struct iwl_mvm *mvm,
 				    struct iwl_scan_req_lmac *cmd,
 				    struct iwl_mvm_scan_params *params)
@@ -743,7 +749,7 @@
 	cmd->extended_dwell = scan_timing[params->type].dwell_extended;
 	cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time);
 	cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time);
-	cmd->scan_prio = iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6);
+	cmd->scan_prio = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
 }
 
 static inline bool iwl_mvm_scan_fits(struct iwl_mvm *mvm, int n_ssids,
@@ -1033,21 +1039,24 @@
 				    struct iwl_scan_req_umac *cmd,
 				    struct iwl_mvm_scan_params *params)
 {
-	cmd->extended_dwell = scan_timing[params->type].dwell_extended;
-	cmd->active_dwell = scan_timing[params->type].dwell_active;
-	cmd->passive_dwell = scan_timing[params->type].dwell_passive;
+	if (params->measurement_dwell) {
+		cmd->active_dwell = params->measurement_dwell;
+		cmd->passive_dwell = params->measurement_dwell;
+		cmd->extended_dwell = params->measurement_dwell;
+	} else {
+		cmd->active_dwell = scan_timing[params->type].dwell_active;
+		cmd->passive_dwell = scan_timing[params->type].dwell_passive;
+		cmd->extended_dwell = scan_timing[params->type].dwell_extended;
+	}
 	cmd->fragmented_dwell = scan_timing[params->type].dwell_fragmented;
 	cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time);
 	cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time);
-	cmd->scan_priority =
-		iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6);
+	cmd->scan_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
 
 	if (iwl_mvm_is_regular_scan(params))
-		cmd->ooc_priority =
-			iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6);
+		cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6);
 	else
-		cmd->ooc_priority =
-			iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_2);
+		cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_2);
 }
 
 static void
@@ -1067,11 +1076,11 @@
 	}
 }
 
-static u32 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm,
+static u16 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm,
 				   struct iwl_mvm_scan_params *params,
 				   struct ieee80211_vif *vif)
 {
-	int flags = 0;
+	u16 flags = 0;
 
 	if (params->n_ssids == 0)
 		flags = IWL_UMAC_SCAN_GEN_FLAGS_PASSIVE;
@@ -1093,6 +1102,9 @@
 	if (!iwl_mvm_is_regular_scan(params))
 		flags |= IWL_UMAC_SCAN_GEN_FLAGS_PERIODIC;
 
+	if (params->measurement_dwell)
+		flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE;
+
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	if (mvm->scan_iter_notif_enabled)
 		flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE;
@@ -1119,6 +1131,7 @@
 			mvm->fw->ucode_capa.n_scan_channels;
 	int uid, i;
 	u32 ssid_bitmap = 0;
+	struct iwl_mvm_vif *scan_vif = iwl_mvm_vif_from_mac80211(vif);
 
 	lockdep_assert_held(&mvm->mutex);
 
@@ -1136,8 +1149,9 @@
 	mvm->scan_uid_status[uid] = type;
 
 	cmd->uid = cpu_to_le32(uid);
-	cmd->general_flags = cpu_to_le32(iwl_mvm_scan_umac_flags(mvm, params,
+	cmd->general_flags = cpu_to_le16(iwl_mvm_scan_umac_flags(mvm, params,
 								 vif));
+	cmd->scan_start_mac_id = scan_vif->id;
 
 	if (type == IWL_MVM_SCAN_SCHED || type == IWL_MVM_SCAN_NETDETECT)
 		cmd->flags = cpu_to_le32(IWL_UMAC_SCAN_FLAG_PREEMPTIVE);
@@ -1289,6 +1303,12 @@
 		iwl_mvm_get_scan_type(mvm,
 				      vif->type == NL80211_IFTYPE_P2P_DEVICE);
 
+	ret = iwl_mvm_get_measurement_dwell(mvm, req, &params);
+	if (ret < 0)
+		return ret;
+
+	params.measurement_dwell = ret;
+
 	iwl_mvm_build_scan_probe(mvm, vif, ies, &params);
 
 	if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_UMAC_SCAN)) {
@@ -1315,6 +1335,7 @@
 
 	IWL_DEBUG_SCAN(mvm, "Scan request was sent successfully\n");
 	mvm->scan_status |= IWL_MVM_SCAN_REGULAR;
+	mvm->scan_vif = iwl_mvm_vif_from_mac80211(vif);
 	iwl_mvm_ref(mvm, IWL_MVM_REF_SCAN);
 
 	queue_delayed_work(system_wq, &mvm->scan_timeout_dwork,
@@ -1437,9 +1458,12 @@
 	if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_REGULAR) {
 		struct cfg80211_scan_info info = {
 			.aborted = aborted,
+			.scan_start_tsf = mvm->scan_start,
 		};
 
+		memcpy(info.tsf_bssid, mvm->scan_vif->bssid, ETH_ALEN);
 		ieee80211_scan_completed(mvm->hw, &info);
+		mvm->scan_vif = NULL;
 		iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN);
 		cancel_delayed_work(&mvm->scan_timeout_dwork);
 	} else if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_SCHED) {
@@ -1473,6 +1497,8 @@
 	struct iwl_umac_scan_iter_complete_notif *notif = (void *)pkt->data;
 	u8 buf[256];
 
+	mvm->scan_start = le64_to_cpu(notif->start_tsf);
+
 	IWL_DEBUG_SCAN(mvm,
 		       "UMAC Scan iteration complete: status=0x%x scanned_channels=%d channels list: %s\n",
 		       notif->status, notif->scanned_channels,
@@ -1485,6 +1511,10 @@
 		ieee80211_sched_scan_results(mvm->hw);
 		mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_ENABLED;
 	}
+
+	IWL_DEBUG_SCAN(mvm,
+		       "UMAC Scan iteration complete: scan started at %llu (TSF)\n",
+		       mvm->scan_start);
 }
 
 static int iwl_mvm_umac_scan_abort(struct iwl_mvm *mvm, int type)
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 3130b9c..fc77188 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -468,6 +468,11 @@
 		    i != IWL_MVM_DQA_BSS_CLIENT_QUEUE)
 			continue;
 
+		/* Don't try and take queues being reconfigured */
+		if (mvm->queue_info[queue].status ==
+		    IWL_MVM_QUEUE_RECONFIGURING)
+			continue;
+
 		ac_to_queue[mvm->queue_info[i].mac80211_ac] = i;
 	}
 
@@ -501,31 +506,37 @@
 		queue = ac_to_queue[IEEE80211_AC_VO];
 
 	/* Make sure queue found (or not) is legal */
-	if (!((queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE &&
-	       queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE) ||
-	      (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE &&
-	       queue <= IWL_MVM_DQA_MAX_DATA_QUEUE) ||
-	      (queue == IWL_MVM_DQA_BSS_CLIENT_QUEUE))) {
+	if (!iwl_mvm_is_dqa_data_queue(mvm, queue) &&
+	    !iwl_mvm_is_dqa_mgmt_queue(mvm, queue) &&
+	    (queue != IWL_MVM_DQA_BSS_CLIENT_QUEUE)) {
 		IWL_ERR(mvm, "No DATA queues available to share\n");
-		queue = -ENOSPC;
+		return -ENOSPC;
+	}
+
+	/* Make sure the queue isn't in the middle of being reconfigured */
+	if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_RECONFIGURING) {
+		IWL_ERR(mvm,
+			"TXQ %d is in the middle of re-config - try again\n",
+			queue);
+		return -EBUSY;
 	}
 
 	return queue;
 }
 
 /*
- * If a given queue has a higher AC than the TID stream that is being added to
- * it, the queue needs to be redirected to the lower AC. This function does that
+ * If a given queue has a higher AC than the TID stream that is being compared
+ * to, the queue needs to be redirected to the lower AC. This function does that
  * in such a case, otherwise - if no redirection required - it does nothing,
  * unless the %force param is true.
  */
-static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid,
-				      int ac, int ssn, unsigned int wdg_timeout,
-				      bool force)
+int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid,
+			       int ac, int ssn, unsigned int wdg_timeout,
+			       bool force)
 {
 	struct iwl_scd_txq_cfg_cmd cmd = {
 		.scd_queue = queue,
-		.enable = 0,
+		.action = SCD_CFG_DISABLE_QUEUE,
 	};
 	bool shared_queue;
 	unsigned long mq;
@@ -551,11 +562,12 @@
 
 	cmd.sta_id = mvm->queue_info[queue].ra_sta_id;
 	cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[mvm->queue_info[queue].mac80211_ac];
+	cmd.tid = mvm->queue_info[queue].txq_tid;
 	mq = mvm->queue_info[queue].hw_queue_to_mac80211;
 	shared_queue = (mvm->queue_info[queue].hw_queue_refcount > 1);
 	spin_unlock_bh(&mvm->queue_info_lock);
 
-	IWL_DEBUG_TX_QUEUES(mvm, "Redirecting shared TXQ #%d to FIFO #%d\n",
+	IWL_DEBUG_TX_QUEUES(mvm, "Redirecting TXQ #%d to FIFO #%d\n",
 			    queue, iwl_mvm_ac_to_tx_fifo[ac]);
 
 	/* Stop MAC queues and wait for this queue to empty */
@@ -576,9 +588,12 @@
 			ret);
 
 	/* Make sure the SCD wrptr is correctly set before reconfiguring */
-	iwl_trans_txq_enable(mvm->trans, queue, iwl_mvm_ac_to_tx_fifo[ac],
-			     cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF,
-			     ssn, wdg_timeout);
+	iwl_trans_txq_enable_cfg(mvm->trans, queue, ssn, NULL, wdg_timeout);
+
+	/* Update the TID "owner" of the queue */
+	spin_lock_bh(&mvm->queue_info_lock);
+	mvm->queue_info[queue].txq_tid = tid;
+	spin_unlock_bh(&mvm->queue_info_lock);
 
 	/* TODO: Work-around SCD bug when moving back by multiples of 0x40 */
 
@@ -709,7 +724,7 @@
 	if (WARN_ON(queue <= 0)) {
 		IWL_ERR(mvm, "No available queues for tid %d on sta_id %d\n",
 			tid, cfg.sta_id);
-		return -ENOSPC;
+		return queue;
 	}
 
 	/*
@@ -728,21 +743,23 @@
 	if (using_inactive_queue) {
 		struct iwl_scd_txq_cfg_cmd cmd = {
 			.scd_queue = queue,
-			.enable = 0,
+			.action = SCD_CFG_DISABLE_QUEUE,
 		};
-		u8 ac;
+		u8 txq_curr_ac;
 
 		disable_agg_tids = iwl_mvm_remove_sta_queue_marking(mvm, queue);
 
 		spin_lock_bh(&mvm->queue_info_lock);
-		ac = mvm->queue_info[queue].mac80211_ac;
+		txq_curr_ac = mvm->queue_info[queue].mac80211_ac;
 		cmd.sta_id = mvm->queue_info[queue].ra_sta_id;
-		cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[ac];
+		cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[txq_curr_ac];
+		cmd.tid = mvm->queue_info[queue].txq_tid;
 		spin_unlock_bh(&mvm->queue_info_lock);
 
 		/* Disable the queue */
-		iwl_mvm_invalidate_sta_queue(mvm, queue, disable_agg_tids,
-					     true);
+		if (disable_agg_tids)
+			iwl_mvm_invalidate_sta_queue(mvm, queue,
+						     disable_agg_tids, false);
 		iwl_trans_txq_disable(mvm->trans, queue, false);
 		ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd),
 					   &cmd);
@@ -758,6 +775,10 @@
 
 			return ret;
 		}
+
+		/* If TXQ is allocated to another STA, update removal in FW */
+		if (cmd.sta_id != mvmsta->sta_id)
+			iwl_mvm_invalidate_sta_queue(mvm, queue, 0, true);
 	}
 
 	IWL_DEBUG_TX_QUEUES(mvm,
@@ -827,6 +848,119 @@
 	return ret;
 }
 
+static void iwl_mvm_change_queue_owner(struct iwl_mvm *mvm, int queue)
+{
+	struct iwl_scd_txq_cfg_cmd cmd = {
+		.scd_queue = queue,
+		.action = SCD_CFG_UPDATE_QUEUE_TID,
+	};
+	s8 sta_id;
+	int tid;
+	unsigned long tid_bitmap;
+	int ret;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	spin_lock_bh(&mvm->queue_info_lock);
+	sta_id = mvm->queue_info[queue].ra_sta_id;
+	tid_bitmap = mvm->queue_info[queue].tid_bitmap;
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	if (WARN(!tid_bitmap, "TXQ %d has no tids assigned to it\n", queue))
+		return;
+
+	/* Find any TID for queue */
+	tid = find_first_bit(&tid_bitmap, IWL_MAX_TID_COUNT + 1);
+	cmd.tid = tid;
+	cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]];
+
+	ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd);
+	if (ret)
+		IWL_ERR(mvm, "Failed to update owner of TXQ %d (ret=%d)\n",
+			queue, ret);
+	else
+		IWL_DEBUG_TX_QUEUES(mvm, "Changed TXQ %d ownership to tid %d\n",
+				    queue, tid);
+}
+
+static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue)
+{
+	struct ieee80211_sta *sta;
+	struct iwl_mvm_sta *mvmsta;
+	s8 sta_id;
+	int tid = -1;
+	unsigned long tid_bitmap;
+	unsigned int wdg_timeout;
+	int ssn;
+	int ret = true;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	spin_lock_bh(&mvm->queue_info_lock);
+	sta_id = mvm->queue_info[queue].ra_sta_id;
+	tid_bitmap = mvm->queue_info[queue].tid_bitmap;
+	spin_unlock_bh(&mvm->queue_info_lock);
+
+	/* Find TID for queue, and make sure it is the only one on the queue */
+	tid = find_first_bit(&tid_bitmap, IWL_MAX_TID_COUNT + 1);
+	if (tid_bitmap != BIT(tid)) {
+		IWL_ERR(mvm, "Failed to unshare q %d, active tids=0x%lx\n",
+			queue, tid_bitmap);
+		return;
+	}
+
+	IWL_DEBUG_TX_QUEUES(mvm, "Unsharing TXQ %d, keeping tid %d\n", queue,
+			    tid);
+
+	sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
+					lockdep_is_held(&mvm->mutex));
+
+	if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta)))
+		return;
+
+	mvmsta = iwl_mvm_sta_from_mac80211(sta);
+	wdg_timeout = iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false);
+
+	ssn = IEEE80211_SEQ_TO_SN(mvmsta->tid_data[tid].seq_number);
+
+	ret = iwl_mvm_scd_queue_redirect(mvm, queue, tid,
+					 tid_to_mac80211_ac[tid], ssn,
+					 wdg_timeout, true);
+	if (ret) {
+		IWL_ERR(mvm, "Failed to redirect TXQ %d\n", queue);
+		return;
+	}
+
+	/* If aggs should be turned back on - do it */
+	if (mvmsta->tid_data[tid].state == IWL_AGG_ON) {
+		struct iwl_mvm_add_sta_cmd cmd = {0};
+
+		mvmsta->tid_disable_agg &= ~BIT(tid);
+
+		cmd.mac_id_n_color = cpu_to_le32(mvmsta->mac_id_n_color);
+		cmd.sta_id = mvmsta->sta_id;
+		cmd.add_modify = STA_MODE_MODIFY;
+		cmd.modify_mask = STA_MODIFY_TID_DISABLE_TX;
+		cmd.tfd_queue_msk = cpu_to_le32(mvmsta->tfd_queue_msk);
+		cmd.tid_disable_tx = cpu_to_le16(mvmsta->tid_disable_agg);
+
+		ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC,
+					   iwl_mvm_add_sta_cmd_size(mvm), &cmd);
+		if (!ret) {
+			IWL_DEBUG_TX_QUEUES(mvm,
+					    "TXQ #%d is now aggregated again\n",
+					    queue);
+
+			/* Mark queue intenally as aggregating again */
+			iwl_trans_txq_set_shared_mode(mvm->trans, queue, false);
+		}
+	}
+
+	spin_lock_bh(&mvm->queue_info_lock);
+	mvm->queue_info[queue].status = IWL_MVM_QUEUE_READY;
+	spin_unlock_bh(&mvm->queue_info_lock);
+}
+
 static inline u8 iwl_mvm_tid_to_ac_queue(int tid)
 {
 	if (tid == IWL_MAX_TID_COUNT)
@@ -894,13 +1028,42 @@
 	struct ieee80211_sta *sta;
 	struct iwl_mvm_sta *mvmsta;
 	unsigned long deferred_tid_traffic;
-	int sta_id, tid;
+	int queue, sta_id, tid;
 
 	/* Check inactivity of queues */
 	iwl_mvm_inactivity_check(mvm);
 
 	mutex_lock(&mvm->mutex);
 
+	/* Reconfigure queues requiring reconfiguation */
+	for (queue = 0; queue < IWL_MAX_HW_QUEUES; queue++) {
+		bool reconfig;
+		bool change_owner;
+
+		spin_lock_bh(&mvm->queue_info_lock);
+		reconfig = (mvm->queue_info[queue].status ==
+			    IWL_MVM_QUEUE_RECONFIGURING);
+
+		/*
+		 * We need to take into account a situation in which a TXQ was
+		 * allocated to TID x, and then turned shared by adding TIDs y
+		 * and z. If TID x becomes inactive and is removed from the TXQ,
+		 * ownership must be given to one of the remaining TIDs.
+		 * This is mainly because if TID x continues - a new queue can't
+		 * be allocated for it as long as it is an owner of another TXQ.
+		 */
+		change_owner = !(mvm->queue_info[queue].tid_bitmap &
+				 BIT(mvm->queue_info[queue].txq_tid)) &&
+			       (mvm->queue_info[queue].status ==
+				IWL_MVM_QUEUE_SHARED);
+		spin_unlock_bh(&mvm->queue_info_lock);
+
+		if (reconfig)
+			iwl_mvm_unshare_queue(mvm, queue);
+		else if (change_owner)
+			iwl_mvm_change_queue_owner(mvm, queue);
+	}
+
 	/* Go over all stations with deferred traffic */
 	for_each_set_bit(sta_id, mvm->sta_deferred_frames,
 			 IWL_MVM_STATION_COUNT) {
@@ -963,6 +1126,61 @@
 	return 0;
 }
 
+/*
+ * In DQA mode, after a HW restart the queues should be allocated as before, in
+ * order to avoid race conditions when there are shared queues. This function
+ * does the re-mapping and queue allocation.
+ *
+ * Note that re-enabling aggregations isn't done in this function.
+ */
+static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm,
+						 struct iwl_mvm_sta *mvm_sta)
+{
+	unsigned int wdg_timeout =
+			iwl_mvm_get_wd_timeout(mvm, mvm_sta->vif, false, false);
+	int i;
+	struct iwl_trans_txq_scd_cfg cfg = {
+		.sta_id = mvm_sta->sta_id,
+		.frame_limit = IWL_FRAME_LIMIT,
+	};
+
+	/* Make sure reserved queue is still marked as such (or allocated) */
+	mvm->queue_info[mvm_sta->reserved_queue].status =
+		IWL_MVM_QUEUE_RESERVED;
+
+	for (i = 0; i <= IWL_MAX_TID_COUNT; i++) {
+		struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i];
+		int txq_id = tid_data->txq_id;
+		int ac;
+		u8 mac_queue;
+
+		if (txq_id == IEEE80211_INVAL_HW_QUEUE)
+			continue;
+
+		skb_queue_head_init(&tid_data->deferred_tx_frames);
+
+		ac = tid_to_mac80211_ac[i];
+		mac_queue = mvm_sta->vif->hw_queue[ac];
+
+		cfg.tid = i;
+		cfg.fifo = iwl_mvm_ac_to_tx_fifo[ac];
+		cfg.aggregate = (txq_id >= IWL_MVM_DQA_MIN_DATA_QUEUE ||
+				 txq_id == IWL_MVM_DQA_BSS_CLIENT_QUEUE);
+
+		IWL_DEBUG_TX_QUEUES(mvm,
+				    "Re-mapping sta %d tid %d to queue %d\n",
+				    mvm_sta->sta_id, i, txq_id);
+
+		iwl_mvm_enable_txq(mvm, txq_id, mac_queue,
+				   IEEE80211_SEQ_TO_SN(tid_data->seq_number),
+				   &cfg, wdg_timeout);
+
+		mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_READY;
+	}
+
+	atomic_set(&mvm->pending_frames[mvm_sta->sta_id], 0);
+}
+
 int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 		    struct ieee80211_vif *vif,
 		    struct ieee80211_sta *sta)
@@ -985,6 +1203,13 @@
 
 	spin_lock_init(&mvm_sta->lock);
 
+	/* In DQA mode, if this is a HW restart, re-alloc existing queues */
+	if (iwl_mvm_is_dqa_supported(mvm) &&
+	    test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
+		iwl_mvm_realloc_queues_after_restart(mvm, mvm_sta);
+		goto update_fw;
+	}
+
 	mvm_sta->sta_id = sta_id;
 	mvm_sta->mac_id_n_color = FW_CMD_ID_AND_COLOR(mvmvif->id,
 						      mvmvif->color);
@@ -1048,6 +1273,7 @@
 			goto err;
 	}
 
+update_fw:
 	ret = iwl_mvm_sta_send_to_fw(mvm, sta, false, 0);
 	if (ret)
 		goto err;
@@ -1071,13 +1297,6 @@
 	return ret;
 }
 
-int iwl_mvm_update_sta(struct iwl_mvm *mvm,
-		       struct ieee80211_vif *vif,
-		       struct ieee80211_sta *sta)
-{
-	return iwl_mvm_sta_send_to_fw(mvm, sta, true, 0);
-}
-
 int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta,
 		      bool drain)
 {
@@ -1270,9 +1489,31 @@
 		ret = iwl_mvm_drain_sta(mvm, mvm_sta, false);
 
 		/* If DQA is supported - the queues can be disabled now */
-		if (iwl_mvm_is_dqa_supported(mvm))
+		if (iwl_mvm_is_dqa_supported(mvm)) {
+			u8 reserved_txq = mvm_sta->reserved_queue;
+			enum iwl_mvm_queue_status *status;
+
 			iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta);
 
+			/*
+			 * If no traffic has gone through the reserved TXQ - it
+			 * is still marked as IWL_MVM_QUEUE_RESERVED, and
+			 * should be manually marked as free again
+			 */
+			spin_lock_bh(&mvm->queue_info_lock);
+			status = &mvm->queue_info[reserved_txq].status;
+			if (WARN((*status != IWL_MVM_QUEUE_RESERVED) &&
+				 (*status != IWL_MVM_QUEUE_FREE),
+				 "sta_id %d reserved txq %d status %d",
+				 mvm_sta->sta_id, reserved_txq, *status)) {
+				spin_unlock_bh(&mvm->queue_info_lock);
+				return -EINVAL;
+			}
+
+			*status = IWL_MVM_QUEUE_FREE;
+			spin_unlock_bh(&mvm->queue_info_lock);
+		}
+
 		if (vif->type == NL80211_IFTYPE_STATION &&
 		    mvmvif->ap_sta_id == mvm_sta->sta_id) {
 			/* if associated - we can't remove the AP STA now */
@@ -1802,11 +2043,9 @@
 		baid_data->baid = baid;
 		baid_data->timeout = timeout;
 		baid_data->last_rx = jiffies;
-		init_timer(&baid_data->session_timer);
-		baid_data->session_timer.function =
-			iwl_mvm_rx_agg_session_expired;
-		baid_data->session_timer.data =
-			(unsigned long)&mvm->baid_map[baid];
+		setup_timer(&baid_data->session_timer,
+			    iwl_mvm_rx_agg_session_expired,
+			    (unsigned long)&mvm->baid_map[baid]);
 		baid_data->mvm = mvm;
 		baid_data->tid = tid;
 		baid_data->sta_id = mvm_sta->sta_id;
@@ -1956,7 +2195,7 @@
 		return -EIO;
 	}
 
-	spin_lock_bh(&mvm->queue_info_lock);
+	spin_lock(&mvm->queue_info_lock);
 
 	/*
 	 * Note the possible cases:
@@ -1967,14 +2206,20 @@
 	 *	non-DQA mode, since the TXQ hasn't yet been allocated
 	 */
 	txq_id = mvmsta->tid_data[tid].txq_id;
-	if (!iwl_mvm_is_dqa_supported(mvm) ||
+	if (iwl_mvm_is_dqa_supported(mvm) &&
+	    unlikely(mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_SHARED)) {
+		ret = -ENXIO;
+		IWL_DEBUG_TX_QUEUES(mvm,
+				    "Can't start tid %d agg on shared queue!\n",
+				    tid);
+		goto release_locks;
+	} else if (!iwl_mvm_is_dqa_supported(mvm) ||
 	    mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) {
 		txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id,
 						 mvm->first_agg_queue,
 						 mvm->last_agg_queue);
 		if (txq_id < 0) {
 			ret = txq_id;
-			spin_unlock_bh(&mvm->queue_info_lock);
 			IWL_ERR(mvm, "Failed to allocate agg queue\n");
 			goto release_locks;
 		}
@@ -1982,7 +2227,8 @@
 		/* TXQ hasn't yet been enabled, so mark it only as reserved */
 		mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_RESERVED;
 	}
-	spin_unlock_bh(&mvm->queue_info_lock);
+
+	spin_unlock(&mvm->queue_info_lock);
 
 	IWL_DEBUG_TX_QUEUES(mvm,
 			    "AGG for tid %d will be on queue #%d\n",
@@ -2006,8 +2252,11 @@
 	}
 
 	ret = 0;
+	goto out;
 
 release_locks:
+	spin_unlock(&mvm->queue_info_lock);
+out:
 	spin_unlock_bh(&mvmsta->lock);
 
 	return ret;
@@ -2023,6 +2272,7 @@
 		iwl_mvm_get_wd_timeout(mvm, vif, sta->tdls, false);
 	int queue, ret;
 	bool alloc_queue = true;
+	enum iwl_mvm_queue_status queue_status;
 	u16 ssn;
 
 	struct iwl_trans_txq_scd_cfg cfg = {
@@ -2048,13 +2298,15 @@
 
 	cfg.fifo = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]];
 
+	spin_lock_bh(&mvm->queue_info_lock);
+	queue_status = mvm->queue_info[queue].status;
+	spin_unlock_bh(&mvm->queue_info_lock);
+
 	/* In DQA mode, the existing queue might need to be reconfigured */
 	if (iwl_mvm_is_dqa_supported(mvm)) {
-		spin_lock_bh(&mvm->queue_info_lock);
 		/* Maybe there is no need to even alloc a queue... */
 		if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_READY)
 			alloc_queue = false;
-		spin_unlock_bh(&mvm->queue_info_lock);
 
 		/*
 		 * Only reconfig the SCD for the queue if the window size has
@@ -2089,9 +2341,12 @@
 				   vif->hw_queue[tid_to_mac80211_ac[tid]], ssn,
 				   &cfg, wdg_timeout);
 
-	ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true);
-	if (ret)
-		return -EIO;
+	/* Send ADD_STA command to enable aggs only if the queue isn't shared */
+	if (queue_status != IWL_MVM_QUEUE_SHARED) {
+		ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true);
+		if (ret)
+			return -EIO;
+	}
 
 	/* No need to mark as reserved */
 	spin_lock_bh(&mvm->queue_info_lock);
@@ -2123,7 +2378,6 @@
 	u16 txq_id;
 	int err;
 
-
 	/*
 	 * If mac80211 is cleaning its state, then say that we finished since
 	 * our state has been cleared anyway.
@@ -2152,6 +2406,7 @@
 	 */
 	if (mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_RESERVED)
 		mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_FREE;
+
 	spin_unlock_bh(&mvm->queue_info_lock);
 
 	switch (tid_data->state) {
@@ -2412,9 +2667,15 @@
 	struct iwl_mvm_mgmt_mcast_key_cmd igtk_cmd = {};
 
 	/* verify the key details match the required command's expectations */
-	if (WARN_ON((keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC) ||
-		    (keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE) ||
-		    (keyconf->keyidx != 4 && keyconf->keyidx != 5)))
+	if (WARN_ON((keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE) ||
+		    (keyconf->keyidx != 4 && keyconf->keyidx != 5) ||
+		    (keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC &&
+		     keyconf->cipher != WLAN_CIPHER_SUITE_BIP_GMAC_128 &&
+		     keyconf->cipher != WLAN_CIPHER_SUITE_BIP_GMAC_256)))
+		return -EINVAL;
+
+	if (WARN_ON(!iwl_mvm_has_new_rx_api(mvm) &&
+		    keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC))
 		return -EINVAL;
 
 	igtk_cmd.key_id = cpu_to_le32(keyconf->keyidx);
@@ -2430,11 +2691,18 @@
 		case WLAN_CIPHER_SUITE_AES_CMAC:
 			igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM);
 			break;
+		case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+			igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_GCMP);
+			break;
 		default:
 			return -EINVAL;
 		}
 
-		memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen);
+		memcpy(igtk_cmd.igtk, keyconf->key, keyconf->keylen);
+		if (keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)
+			igtk_cmd.ctrl_flags |=
+				cpu_to_le32(STA_KEY_FLG_KEY_32BYTES);
 		ieee80211_get_key_rx_seq(keyconf, 0, &seq);
 		pn = seq.aes_cmac.pn;
 		igtk_cmd.receive_seq_cnt = cpu_to_le64(((u64) pn[5] << 0) |
@@ -2449,6 +2717,19 @@
 		       remove_key ? "removing" : "installing",
 		       igtk_cmd.sta_id);
 
+	if (!iwl_mvm_has_new_rx_api(mvm)) {
+		struct iwl_mvm_mgmt_mcast_key_cmd_v1 igtk_cmd_v1 = {
+			.ctrl_flags = igtk_cmd.ctrl_flags,
+			.key_id = igtk_cmd.key_id,
+			.sta_id = igtk_cmd.sta_id,
+			.receive_seq_cnt = igtk_cmd.receive_seq_cnt
+		};
+
+		memcpy(igtk_cmd_v1.igtk, igtk_cmd.igtk,
+		       ARRAY_SIZE(igtk_cmd_v1.igtk));
+		return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0,
+					    sizeof(igtk_cmd_v1), &igtk_cmd_v1);
+	}
 	return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0,
 				    sizeof(igtk_cmd), &igtk_cmd);
 }
@@ -2573,7 +2854,9 @@
 	}
 	sta_id = mvm_sta->sta_id;
 
-	if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC) {
+	if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
+	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
+	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) {
 		ret = iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, false);
 		goto end;
 	}
@@ -2659,7 +2942,9 @@
 	IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n",
 		      keyconf->keyidx, sta_id);
 
-	if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+	if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC ||
+	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 ||
+	    keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)
 		return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true);
 
 	if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) {
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index bbc1cab..e068d53 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -473,9 +473,14 @@
 int iwl_mvm_add_sta(struct iwl_mvm *mvm,
 		    struct ieee80211_vif *vif,
 		    struct ieee80211_sta *sta);
-int iwl_mvm_update_sta(struct iwl_mvm *mvm,
-		       struct ieee80211_vif *vif,
-		       struct ieee80211_sta *sta);
+
+static inline int iwl_mvm_update_sta(struct iwl_mvm *mvm,
+				     struct ieee80211_vif *vif,
+				     struct ieee80211_sta *sta)
+{
+	return iwl_mvm_sta_send_to_fw(mvm, sta, true, 0);
+}
+
 int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
 		   struct ieee80211_vif *vif,
 		   struct ieee80211_sta *sta);
@@ -554,4 +559,8 @@
 void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk);
 
+int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid,
+			       int ac, int ssn, unsigned int wdg_timeout,
+			       bool force);
+
 #endif /* __sta_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
index 58fc7b3..63a051b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
@@ -241,11 +241,8 @@
 	};
 	u32 cmdid;
 
-	if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR))
-		cmdid = iwl_cmd_id(CMD_DTS_MEASUREMENT_TRIGGER_WIDE,
-				   PHY_OPS_GROUP, 0);
-	else
-		cmdid = CMD_DTS_MEASUREMENT_TRIGGER;
+	cmdid = iwl_cmd_id(CMD_DTS_MEASUREMENT_TRIGGER_WIDE,
+			   PHY_OPS_GROUP, 0);
 
 	if (!fw_has_capa(&mvm->fw->ucode_capa,
 			 IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE))
@@ -261,9 +258,6 @@
 					    DTS_MEASUREMENT_NOTIF_WIDE) };
 	int ret;
 
-	if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR))
-		temp_notif[0] = DTS_MEASUREMENT_NOTIFICATION;
-
 	lockdep_assert_held(&mvm->mutex);
 
 	iwl_init_notification_wait(&mvm->notif_wait, &wait_temp_notif,
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index c6585ab..66957ac 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -346,7 +346,7 @@
 
 	rate_idx = info->control.rates[0].idx;
 	/* if the rate isn't a well known legacy rate, take the lowest one */
-	if (rate_idx < 0 || rate_idx > IWL_RATE_COUNT_LEGACY)
+	if (rate_idx < 0 || rate_idx >= IWL_RATE_COUNT_LEGACY)
 		rate_idx = rate_lowest_index(
 				&mvm->nvm_data->bands[info->band], sta);
 
@@ -441,7 +441,7 @@
 		 * one.
 		 * Need to handle this.
 		 */
-		tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TC_CMD_SEC_KEY_FROM_TABLE;
+		tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE;
 		tx_cmd->key[0] = keyconf->hw_key_idx;
 		iwl_mvm_set_tx_cmd_pn(info, crypto_hdr);
 		break;
@@ -490,16 +490,34 @@
 static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm,
 				      struct ieee80211_tx_info *info, __le16 fc)
 {
-	if (iwl_mvm_is_dqa_supported(mvm)) {
-		if (info->control.vif->type == NL80211_IFTYPE_AP &&
-		    ieee80211_is_probe_resp(fc))
-			return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
-		else if (ieee80211_is_mgmt(fc) &&
-			 info->control.vif->type == NL80211_IFTYPE_P2P_DEVICE)
-			return IWL_MVM_DQA_P2P_DEVICE_QUEUE;
-	}
+	if (!iwl_mvm_is_dqa_supported(mvm))
+		return info->hw_queue;
 
-	return info->hw_queue;
+	switch (info->control.vif->type) {
+	case NL80211_IFTYPE_AP:
+		/*
+		 * handle legacy hostapd as well, where station may be added
+		 * only after assoc.
+		 */
+		if (ieee80211_is_probe_resp(fc) || ieee80211_is_auth(fc))
+			return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
+		if (info->hw_queue == info->control.vif->cab_queue)
+			return info->hw_queue;
+
+		WARN_ON_ONCE(1);
+		return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE;
+	case NL80211_IFTYPE_P2P_DEVICE:
+		if (ieee80211_is_mgmt(fc))
+			return IWL_MVM_DQA_P2P_DEVICE_QUEUE;
+		if (info->hw_queue == info->control.vif->cab_queue)
+			return info->hw_queue;
+
+		WARN_ON_ONCE(1);
+		return IWL_MVM_DQA_P2P_DEVICE_QUEUE;
+	default:
+		WARN_ONCE(1, "Not a ctrl vif, no available queue\n");
+		return -1;
+	}
 }
 
 int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb)
@@ -513,6 +531,15 @@
 	int hdrlen = ieee80211_hdrlen(hdr->frame_control);
 	int queue;
 
+	/* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
+	 * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
+	 * queue. STATION (HS2.0) uses the auxiliary context of the FW,
+	 * and hence needs to be sent on the aux queue
+	 */
+	if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
+	    skb_info->control.vif->type == NL80211_IFTYPE_STATION)
+		IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
+
 	memcpy(&info, skb->cb, sizeof(info));
 
 	if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU))
@@ -526,16 +553,6 @@
 	/* This holds the amsdu headers length */
 	skb_info->driver_data[0] = (void *)(uintptr_t)0;
 
-	/*
-	 * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
-	 * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
-	 * queue. STATION (HS2.0) uses the auxiliary context of the FW,
-	 * and hence needs to be sent on the aux queue
-	 */
-	if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
-	    info.control.vif->type == NL80211_IFTYPE_STATION)
-		IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
-
 	queue = info.hw_queue;
 
 	/*
@@ -560,6 +577,9 @@
 			sta_id = mvmvif->bcast_sta.sta_id;
 			queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info,
 							   hdr->frame_control);
+			if (queue < 0)
+				return -1;
+
 		} else if (info.control.vif->type == NL80211_IFTYPE_STATION &&
 			   is_multicast_ether_addr(hdr->addr1)) {
 			u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id);
@@ -838,6 +858,22 @@
 	}
 }
 
+/* Check if there are any timed-out TIDs on a given shared TXQ */
+static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id)
+{
+	unsigned long queue_tid_bitmap = mvm->queue_info[txq_id].tid_bitmap;
+	unsigned long now = jiffies;
+	int tid;
+
+	for_each_set_bit(tid, &queue_tid_bitmap, IWL_MAX_TID_COUNT + 1) {
+		if (time_before(mvm->queue_info[txq_id].last_frame_time[tid] +
+				IWL_MVM_DQA_QUEUE_TIMEOUT, now))
+			return true;
+	}
+
+	return false;
+}
+
 /*
  * Sets the fields in the Tx cmd that are crypto related
  */
@@ -904,9 +940,13 @@
 		tid = IWL_MAX_TID_COUNT;
 	}
 
-	if (iwl_mvm_is_dqa_supported(mvm))
+	if (iwl_mvm_is_dqa_supported(mvm)) {
 		txq_id = mvmsta->tid_data[tid].txq_id;
 
+		if (ieee80211_is_mgmt(fc))
+			tx_cmd->tid_tspec = IWL_TID_NON_QOS;
+	}
+
 	/* Copy MAC header from skb into command buffer */
 	memcpy(tx_cmd->hdr, hdr, hdrlen);
 
@@ -940,7 +980,6 @@
 			iwl_trans_free_tx_cmd(mvm->trans, dev_cmd);
 			spin_unlock(&mvmsta->lock);
 			return 0;
-
 		}
 
 		/* If we are here - TXQ exists and needs to be re-activated */
@@ -953,8 +992,25 @@
 				    txq_id);
 	}
 
-	/* Keep track of the time of the last frame for this RA/TID */
-	mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
+	if (iwl_mvm_is_dqa_supported(mvm)) {
+		/* Keep track of the time of the last frame for this RA/TID */
+		mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
+
+		/*
+		 * If we have timed-out TIDs - schedule the worker that will
+		 * reconfig the queues and update them
+		 *
+		 * Note that the mvm->queue_info_lock isn't being taken here in
+		 * order to not serialize the TX flow. This isn't dangerous
+		 * because scheduling mvm->add_stream_wk can't ruin the state,
+		 * and if we DON'T schedule it due to some race condition then
+		 * next TX we get here we will.
+		 */
+		if (unlikely(mvm->queue_info[txq_id].status ==
+			     IWL_MVM_QUEUE_SHARED &&
+			     iwl_mvm_txq_should_update(mvm, txq_id)))
+			schedule_work(&mvm->add_stream_wk);
+	}
 
 	IWL_DEBUG_TX(mvm, "TX to [%d|%d] Q:%d - seq: 0x%x\n", mvmsta->sta_id,
 		     tid, txq_id, IEEE80211_SEQ_TO_SN(seq_number));
@@ -1068,9 +1124,13 @@
 		IWL_DEBUG_TX_QUEUES(mvm,
 				    "Can continue DELBA flow ssn = next_recl = %d\n",
 				    tid_data->next_reclaimed);
-		iwl_mvm_disable_txq(mvm, tid_data->txq_id,
-				    vif->hw_queue[tid_to_mac80211_ac[tid]], tid,
-				    CMD_ASYNC);
+		if (!iwl_mvm_is_dqa_supported(mvm)) {
+			u8 mac80211_ac = tid_to_mac80211_ac[tid];
+
+			iwl_mvm_disable_txq(mvm, tid_data->txq_id,
+					    vif->hw_queue[mac80211_ac], tid,
+					    CMD_ASYNC);
+		}
 		tid_data->state = IWL_AGG_OFF;
 		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
 		break;
@@ -1548,41 +1608,16 @@
 		iwl_mvm_rx_tx_cmd_agg(mvm, pkt);
 }
 
-static void iwl_mvm_tx_info_from_ba_notif(struct ieee80211_tx_info *info,
-					  struct iwl_mvm_ba_notif *ba_notif,
-					  struct iwl_mvm_tid_data *tid_data)
+static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid,
+			       int txq, int index,
+			       struct ieee80211_tx_info *ba_info, u32 rate)
 {
-	info->flags |= IEEE80211_TX_STAT_AMPDU;
-	info->status.ampdu_ack_len = ba_notif->txed_2_done;
-	info->status.ampdu_len = ba_notif->txed;
-	iwl_mvm_hwrate_to_tx_status(tid_data->rate_n_flags,
-				    info);
-	/* TODO: not accounted if the whole A-MPDU failed */
-	info->status.tx_time = tid_data->tx_time;
-	info->status.status_driver_data[0] =
-		(void *)(uintptr_t)ba_notif->reduced_txp;
-	info->status.status_driver_data[1] =
-		(void *)(uintptr_t)tid_data->rate_n_flags;
-}
-
-void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
-{
-	struct iwl_rx_packet *pkt = rxb_addr(rxb);
-	struct iwl_mvm_ba_notif *ba_notif = (void *)pkt->data;
 	struct sk_buff_head reclaimed_skbs;
 	struct iwl_mvm_tid_data *tid_data;
 	struct ieee80211_sta *sta;
 	struct iwl_mvm_sta *mvmsta;
 	struct sk_buff *skb;
-	int sta_id, tid, freed;
-	/* "flow" corresponds to Tx queue */
-	u16 scd_flow = le16_to_cpu(ba_notif->scd_flow);
-	/* "ssn" is start of block-ack Tx window, corresponds to index
-	 * (in Tx queue's circular buffer) of first TFD/frame in window */
-	u16 ba_resp_scd_ssn = le16_to_cpu(ba_notif->scd_ssn);
-
-	sta_id = ba_notif->sta_id;
-	tid = ba_notif->tid;
+	int freed;
 
 	if (WARN_ONCE(sta_id >= IWL_MVM_STATION_COUNT ||
 		      tid >= IWL_MAX_TID_COUNT,
@@ -1602,10 +1637,10 @@
 	mvmsta = iwl_mvm_sta_from_mac80211(sta);
 	tid_data = &mvmsta->tid_data[tid];
 
-	if (tid_data->txq_id != scd_flow) {
+	if (tid_data->txq_id != txq) {
 		IWL_ERR(mvm,
-			"invalid BA notification: Q %d, tid %d, flow %d\n",
-			tid_data->txq_id, tid, scd_flow);
+			"invalid BA notification: Q %d, tid %d\n",
+			tid_data->txq_id, tid);
 		rcu_read_unlock();
 		return;
 	}
@@ -1619,27 +1654,14 @@
 	 * block-ack window (we assume that they've been successfully
 	 * transmitted ... if not, it's too late anyway).
 	 */
-	iwl_trans_reclaim(mvm->trans, scd_flow, ba_resp_scd_ssn,
-			  &reclaimed_skbs);
+	iwl_trans_reclaim(mvm->trans, txq, index, &reclaimed_skbs);
 
-	IWL_DEBUG_TX_REPLY(mvm,
-			   "BA_NOTIFICATION Received from %pM, sta_id = %d\n",
-			   (u8 *)&ba_notif->sta_addr_lo32,
-			   ba_notif->sta_id);
-	IWL_DEBUG_TX_REPLY(mvm,
-			   "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n",
-			   ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl),
-			   (unsigned long long)le64_to_cpu(ba_notif->bitmap),
-			   scd_flow, ba_resp_scd_ssn, ba_notif->txed,
-			   ba_notif->txed_2_done);
-
-	IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
-			   ba_notif->reduced_txp);
-	tid_data->next_reclaimed = ba_resp_scd_ssn;
+	tid_data->next_reclaimed = index;
 
 	iwl_mvm_check_ratid_empty(mvm, sta, tid);
 
 	freed = 0;
+	ba_info->status.status_driver_data[1] = (void *)(uintptr_t)rate;
 
 	skb_queue_walk(&reclaimed_skbs, skb) {
 		struct ieee80211_hdr *hdr = (void *)skb->data;
@@ -1661,8 +1683,12 @@
 
 		/* this is the first skb we deliver in this batch */
 		/* put the rate scaling data there */
-		if (freed == 1)
-			iwl_mvm_tx_info_from_ba_notif(info, ba_notif, tid_data);
+		if (freed == 1) {
+			info->flags |= IEEE80211_TX_STAT_AMPDU;
+			memcpy(&info->status, &ba_info->status,
+			       sizeof(ba_info->status));
+			iwl_mvm_hwrate_to_tx_status(rate, info);
+		}
 	}
 
 	spin_unlock_bh(&mvmsta->lock);
@@ -1672,7 +1698,6 @@
 	 * Still it's important to update RS about sent vs. acked.
 	 */
 	if (skb_queue_empty(&reclaimed_skbs)) {
-		struct ieee80211_tx_info ba_info = {};
 		struct ieee80211_chanctx_conf *chanctx_conf = NULL;
 
 		if (mvmsta->vif)
@@ -1682,11 +1707,11 @@
 		if (WARN_ON_ONCE(!chanctx_conf))
 			goto out;
 
-		ba_info.band = chanctx_conf->def.chan->band;
-		iwl_mvm_tx_info_from_ba_notif(&ba_info, ba_notif, tid_data);
+		ba_info->band = chanctx_conf->def.chan->band;
+		iwl_mvm_hwrate_to_tx_status(rate, ba_info);
 
 		IWL_DEBUG_TX_REPLY(mvm, "No reclaim. Update rs directly\n");
-		iwl_mvm_rs_tx_status(mvm, sta, tid, &ba_info, false);
+		iwl_mvm_rs_tx_status(mvm, sta, tid, ba_info, false);
 	}
 
 out:
@@ -1698,6 +1723,92 @@
 	}
 }
 
+void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb)
+{
+	struct iwl_rx_packet *pkt = rxb_addr(rxb);
+	int sta_id, tid, txq, index;
+	struct ieee80211_tx_info ba_info = {};
+	struct iwl_mvm_ba_notif *ba_notif;
+	struct iwl_mvm_tid_data *tid_data;
+	struct iwl_mvm_sta *mvmsta;
+
+	if (iwl_mvm_has_new_tx_api(mvm)) {
+		struct iwl_mvm_compressed_ba_notif *ba_res =
+			(void *)pkt->data;
+
+		sta_id = ba_res->sta_id;
+		ba_info.status.ampdu_ack_len = (u8)le16_to_cpu(ba_res->done);
+		ba_info.status.ampdu_len = (u8)le16_to_cpu(ba_res->txed);
+		ba_info.status.tx_time =
+			(u16)le32_to_cpu(ba_res->wireless_time);
+		ba_info.status.status_driver_data[0] =
+			(void *)(uintptr_t)ba_res->reduced_txp;
+
+		/*
+		 * TODO:
+		 * When supporting multi TID aggregations - we need to move
+		 * next_reclaimed to be per TXQ and not per TID or handle it
+		 * in a different way.
+		 * This will go together with SN and AddBA offload and cannot
+		 * be handled properly for now.
+		 */
+		WARN_ON(le16_to_cpu(ba_res->tfd_cnt) != 1);
+		iwl_mvm_tx_reclaim(mvm, sta_id, ba_res->ra_tid[0].tid,
+				   (int)ba_res->tfd[0].q_num,
+				   le16_to_cpu(ba_res->tfd[0].tfd_index),
+				   &ba_info, le32_to_cpu(ba_res->tx_rate));
+
+		IWL_DEBUG_TX_REPLY(mvm,
+				   "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n",
+				   sta_id, le32_to_cpu(ba_res->flags),
+				   le16_to_cpu(ba_res->txed),
+				   le16_to_cpu(ba_res->done));
+		return;
+	}
+
+	ba_notif = (void *)pkt->data;
+	sta_id = ba_notif->sta_id;
+	tid = ba_notif->tid;
+	/* "flow" corresponds to Tx queue */
+	txq = le16_to_cpu(ba_notif->scd_flow);
+	/* "ssn" is start of block-ack Tx window, corresponds to index
+	 * (in Tx queue's circular buffer) of first TFD/frame in window */
+	index = le16_to_cpu(ba_notif->scd_ssn);
+
+	rcu_read_lock();
+	mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id);
+	if (WARN_ON_ONCE(!mvmsta)) {
+		rcu_read_unlock();
+		return;
+	}
+
+	tid_data = &mvmsta->tid_data[tid];
+
+	ba_info.status.ampdu_ack_len = ba_notif->txed_2_done;
+	ba_info.status.ampdu_len = ba_notif->txed;
+	ba_info.status.tx_time = tid_data->tx_time;
+	ba_info.status.status_driver_data[0] =
+		(void *)(uintptr_t)ba_notif->reduced_txp;
+
+	rcu_read_unlock();
+
+	iwl_mvm_tx_reclaim(mvm, sta_id, tid, txq, index, &ba_info,
+			   tid_data->rate_n_flags);
+
+	IWL_DEBUG_TX_REPLY(mvm,
+			   "BA_NOTIFICATION Received from %pM, sta_id = %d\n",
+			   (u8 *)&ba_notif->sta_addr_lo32, ba_notif->sta_id);
+
+	IWL_DEBUG_TX_REPLY(mvm,
+			   "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n",
+			   ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl),
+			   le64_to_cpu(ba_notif->bitmap), txq, index,
+			   ba_notif->txed, ba_notif->txed_2_done);
+
+	IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n",
+			   ba_notif->reduced_txp);
+}
+
 /*
  * Note that there are transports that buffer frames before they reach
  * the firmware. This means that after flush_tx_path is called, the
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 68f4e7f..d04babd 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -512,7 +512,7 @@
 			base = mvm->fw->inst_errlog_ptr;
 	}
 
-	if (base < 0x800000) {
+	if (base < 0x400000) {
 		IWL_ERR(mvm,
 			"Not valid error log pointer 0x%08X for %s uCode\n",
 			base,
@@ -610,7 +610,7 @@
 {
 	struct iwl_scd_txq_cfg_cmd cmd = {
 		.scd_queue = queue,
-		.enable = 1,
+		.action = SCD_CFG_ENABLE_QUEUE,
 		.window = frame_limit,
 		.sta_id = sta_id,
 		.ssn = cpu_to_le16(ssn),
@@ -669,6 +669,8 @@
 				tid_to_mac80211_ac[cfg->tid];
 		else
 			mvm->queue_info[queue].mac80211_ac = IEEE80211_AC_VO;
+
+		mvm->queue_info[queue].txq_tid = cfg->tid;
 	}
 
 	IWL_DEBUG_TX_QUEUES(mvm,
@@ -682,7 +684,7 @@
 	if (enable_queue) {
 		struct iwl_scd_txq_cfg_cmd cmd = {
 			.scd_queue = queue,
-			.enable = 1,
+			.action = SCD_CFG_ENABLE_QUEUE,
 			.window = cfg->frame_limit,
 			.sta_id = cfg->sta_id,
 			.ssn = cpu_to_le16(ssn),
@@ -709,7 +711,7 @@
 {
 	struct iwl_scd_txq_cfg_cmd cmd = {
 		.scd_queue = queue,
-		.enable = 0,
+		.action = SCD_CFG_DISABLE_QUEUE,
 	};
 	bool remove_mac_queue = true;
 	int ret;
@@ -744,8 +746,9 @@
 			~BIT(mac80211_queue);
 	mvm->queue_info[queue].hw_queue_refcount--;
 
-	cmd.enable = mvm->queue_info[queue].hw_queue_refcount ? 1 : 0;
-	if (!cmd.enable)
+	cmd.action = mvm->queue_info[queue].hw_queue_refcount ?
+		SCD_CFG_ENABLE_QUEUE : SCD_CFG_DISABLE_QUEUE;
+	if (cmd.action == SCD_CFG_DISABLE_QUEUE)
 		mvm->queue_info[queue].status = IWL_MVM_QUEUE_FREE;
 
 	IWL_DEBUG_TX_QUEUES(mvm,
@@ -755,12 +758,13 @@
 			    mvm->queue_info[queue].hw_queue_to_mac80211);
 
 	/* If the queue is still enabled - nothing left to do in this func */
-	if (cmd.enable) {
+	if (cmd.action == SCD_CFG_ENABLE_QUEUE) {
 		spin_unlock_bh(&mvm->queue_info_lock);
 		return;
 	}
 
 	cmd.sta_id = mvm->queue_info[queue].ra_sta_id;
+	cmd.tid = mvm->queue_info[queue].txq_tid;
 
 	/* Make sure queue info is correct even though we overwrite it */
 	WARN(mvm->queue_info[queue].hw_queue_refcount ||
@@ -1131,7 +1135,13 @@
 			BIT(mvmsta->vif->hw_queue[tid_to_mac80211_ac[tid]]);
 	}
 
-	/* TODO: if queue was shared - need to re-enable AGGs */
+	/* If the queue is marked as shared - "unshare" it */
+	if (mvm->queue_info[queue].hw_queue_refcount == 1 &&
+	    mvm->queue_info[queue].status == IWL_MVM_QUEUE_SHARED) {
+		mvm->queue_info[queue].status = IWL_MVM_QUEUE_RECONFIGURING;
+		IWL_DEBUG_TX_QUEUES(mvm, "Marking Q:%d for reconfig\n",
+				    queue);
+	}
 }
 
 void iwl_mvm_inactivity_check(struct iwl_mvm *mvm)
@@ -1215,6 +1225,28 @@
 	rcu_read_unlock();
 }
 
+void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime)
+{
+	bool ps_disabled;
+
+	lockdep_assert_held(&mvm->mutex);
+
+	/* Disable power save when reading GP2 */
+	ps_disabled = mvm->ps_disabled;
+	if (!ps_disabled) {
+		mvm->ps_disabled = true;
+		iwl_mvm_power_update_device(mvm);
+	}
+
+	*gp2 = iwl_read_prph(mvm->trans, DEVICE_SYSTEM_TIME_REG);
+	*boottime = ktime_get_boot_ns();
+
+	if (!ps_disabled) {
+		mvm->ps_disabled = ps_disabled;
+		iwl_mvm_power_update_device(mvm);
+	}
+}
+
 int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif,
 			 enum iwl_lqm_cmd_operatrions operation,
 			 u32 duration, u32 timeout)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 78cf9a7..001be40 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -487,6 +487,7 @@
 	{IWL_PCI_DEVICE(0x24FD, 0x1130, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0130, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x1010, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x10D0, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0050, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0150, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x9010, iwl8265_2ac_cfg)},
@@ -500,22 +501,36 @@
 	{IWL_PCI_DEVICE(0x24FD, 0x0930, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0950, iwl8265_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24FD, 0x0012, iwl8275_2ac_cfg)},
 
 /* 9000 Series */
+	{IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl5165_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl5165_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl5165_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl5165_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x2526, 0x1420, iwl5165_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl5165_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x2526, 0x1030, iwl9560_2ac_cfg)},
+	{IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg)},
 
 /* a000 Series */
 	{IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)},
@@ -608,7 +623,6 @@
 {
 	const struct iwl_cfg *cfg = (struct iwl_cfg *)(ent->driver_data);
 	const struct iwl_cfg *cfg_7265d __maybe_unused = NULL;
-	const struct iwl_cfg *cfg_9260lc __maybe_unused = NULL;
 	struct iwl_trans *iwl_trans;
 	int ret;
 
@@ -637,11 +651,10 @@
 	}
 
 	if (iwl_trans->cfg->rf_id) {
-		if (cfg == &iwl9260_2ac_cfg)
-			cfg_9260lc = &iwl9260lc_2ac_cfg;
-		if (cfg_9260lc && iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) {
-			cfg = cfg_9260lc;
-			iwl_trans->cfg = cfg_9260lc;
+		if (cfg == &iwl9460_2ac_cfg &&
+		    iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) {
+			cfg = &iwl9000lc_2ac_cfg;
+			iwl_trans->cfg = cfg;
 		}
 	}
 #endif
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 11e347d..cac6d99 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -37,6 +37,7 @@
 #include <linux/wait.h>
 #include <linux/pci.h>
 #include <linux/timer.h>
+#include <linux/cpu.h>
 
 #include "iwl-fh.h"
 #include "iwl-csr.h"
@@ -49,7 +50,7 @@
  * be needed for potential data in the SKB's head. The remaining ones can
  * be used for frags.
  */
-#define IWL_PCIE_MAX_FRAGS (IWL_NUM_OF_TBS - 3)
+#define IWL_PCIE_MAX_FRAGS(x) (x->max_tbs - 3)
 
 /*
  * RX related structures and functions
@@ -192,41 +193,9 @@
 	/* only for SYNC commands, iff the reply skb is wanted */
 	struct iwl_host_cmd *source;
 	u32 flags;
+	u32 tbs;
 };
 
-/*
- * Generic queue structure
- *
- * Contains common data for Rx and Tx queues.
- *
- * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware
- * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless
- * there might be HW changes in the future). For the normal TX
- * queues, n_window, which is the size of the software queue data
- * is also 256; however, for the command queue, n_window is only
- * 32 since we don't need so many commands pending. Since the HW
- * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. As a result,
- * the software buffers (in the variables @meta, @txb in struct
- * iwl_txq) only have 32 entries, while the HW buffers (@tfds in
- * the same struct) have 256.
- * This means that we end up with the following:
- *  HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 |
- *  SW entries:           | 0      | ... | 31          |
- * where N is a number between 0 and 7. This means that the SW
- * data is a window overlayed over the HW queue.
- */
-struct iwl_queue {
-	int write_ptr;       /* 1-st empty entry (index) host_w*/
-	int read_ptr;         /* last used entry (index) host_r*/
-	/* use for monitoring and recovering the stuck queue */
-	dma_addr_t dma_addr;   /* physical addr for BD's */
-	int n_window;	       /* safe queue window */
-	u32 id;
-	int low_mark;	       /* low watermark, resume queue if free
-				* space more than this */
-	int high_mark;         /* high watermark, stop queue if free
-				* space less than this */
-};
 
 #define TFD_TX_CMD_SLOTS 256
 #define TFD_CMD_SLOTS 32
@@ -273,13 +242,32 @@
  * @wd_timeout: queue watchdog timeout (jiffies) - per queue
  * @frozen: tx stuck queue timer is frozen
  * @frozen_expiry_remainder: remember how long until the timer fires
+ * @write_ptr: 1-st empty entry (index) host_w
+ * @read_ptr: last used entry (index) host_r
+ * @dma_addr:  physical addr for BD's
+ * @n_window: safe queue window
+ * @id: queue id
+ * @low_mark: low watermark, resume queue if free space more than this
+ * @high_mark: high watermark, stop queue if free space less than this
  *
  * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame
  * descriptors) and required locking structures.
+ *
+ * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware
+ * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless
+ * there might be HW changes in the future). For the normal TX
+ * queues, n_window, which is the size of the software queue data
+ * is also 256; however, for the command queue, n_window is only
+ * 32 since we don't need so many commands pending. Since the HW
+ * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256.
+ * This means that we end up with the following:
+ *  HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 |
+ *  SW entries:           | 0      | ... | 31          |
+ * where N is a number between 0 and 7. This means that the SW
+ * data is a window overlayed over the HW queue.
  */
 struct iwl_txq {
-	struct iwl_queue q;
-	struct iwl_tfd *tfds;
+	void *tfds;
 	struct iwl_pcie_first_tb_buf *first_tb_bufs;
 	dma_addr_t first_tb_dma;
 	struct iwl_pcie_txq_entry *entries;
@@ -294,6 +282,14 @@
 	bool block;
 	unsigned long wd_timeout;
 	struct sk_buff_head overflow_q;
+
+	int write_ptr;
+	int read_ptr;
+	dma_addr_t dma_addr;
+	int n_window;
+	u32 id;
+	int low_mark;
+	int high_mark;
 };
 
 static inline dma_addr_t
@@ -309,6 +305,16 @@
 };
 
 /**
+ * enum iwl_shared_irq_flags - level of sharing for irq
+ * @IWL_SHARED_IRQ_NON_RX: interrupt vector serves non rx causes.
+ * @IWL_SHARED_IRQ_FIRST_RSS: interrupt vector serves first RSS queue.
+ */
+enum iwl_shared_irq_flags {
+	IWL_SHARED_IRQ_NON_RX		= BIT(0),
+	IWL_SHARED_IRQ_FIRST_RSS	= BIT(1),
+};
+
+/**
  * struct iwl_trans_pcie - PCIe transport specific data
  * @rxq: all the RX queue data
  * @rx_pool: initial pool of iwl_rx_mem_buffer for all the queues
@@ -326,7 +332,6 @@
  * @rx_buf_size: Rx buffer size
  * @bc_table_dword: true if the BC table expects DWORD (as opposed to bytes)
  * @scd_set_active: should the transport configure the SCD for HCMD queue
- * @wide_cmd_header: true when ucode supports wide command header format
  * @sw_csum_tx: if true, then the transport will compute the csum of the TXed
  *	frame.
  * @rx_page_order: page order for receive buffer size
@@ -338,8 +343,10 @@
  * @fw_mon_size: size of the buffer for the firmware monitor
  * @msix_entries: array of MSI-X entries
  * @msix_enabled: true if managed to enable MSI-X
- * @allocated_vector: the number of interrupt vector allocated by the OS
- * @default_irq_num: default irq for non rx interrupt
+ * @shared_vec_mask: the type of causes the shared vector handles
+ *	(see iwl_shared_irq_flags).
+ * @alloc_vecs: the number of interrupt vectors allocated by the OS
+ * @def_irq: default irq for non rx causes
  * @fh_init_mask: initial unmasked fh causes
  * @hw_init_mask: initial unmasked hw causes
  * @fh_mask: current unmasked fh causes
@@ -391,11 +398,12 @@
 	unsigned int cmd_q_wdg_timeout;
 	u8 n_no_reclaim_cmds;
 	u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS];
+	u8 max_tbs;
+	u16 tfd_size;
 
 	enum iwl_amsdu_size rx_buf_size;
 	bool bc_table_dword;
 	bool scd_set_active;
-	bool wide_cmd_header;
 	bool sw_csum_tx;
 	u32 rx_page_order;
 
@@ -410,12 +418,14 @@
 
 	struct msix_entry msix_entries[IWL_MAX_RX_HW_QUEUES];
 	bool msix_enabled;
-	u32 allocated_vector;
-	u32 default_irq_num;
+	u8 shared_vec_mask;
+	u32 alloc_vecs;
+	u32 def_irq;
 	u32 fh_init_mask;
 	u32 hw_init_mask;
 	u32 fh_mask;
 	u32 hw_mask;
+	cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
 };
 
 static inline struct iwl_trans_pcie *
@@ -474,6 +484,7 @@
 				bool configure_scd);
 void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
 					bool shared_mode);
+dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq);
 void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans,
 				  struct iwl_txq *txq);
 int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
@@ -486,11 +497,20 @@
 			    struct sk_buff_head *skbs);
 void iwl_trans_pcie_tx_reset(struct iwl_trans *trans);
 
-static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx)
+static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *_tfd,
+					  u8 idx)
 {
-	struct iwl_tfd_tb *tb = &tfd->tbs[idx];
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd = _tfd;
+		struct iwl_tfh_tb *tb = &tfd->tbs[idx];
 
-	return le16_to_cpu(tb->hi_n_len) >> 4;
+		return le16_to_cpu(tb->tb_len);
+	} else {
+		struct iwl_tfd *tfd = _tfd;
+		struct iwl_tfd_tb *tb = &tfd->tbs[idx];
+
+		return le16_to_cpu(tb->hi_n_len) >> 4;
+	}
 }
 
 /*****************************************************
@@ -617,9 +637,9 @@
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	if (test_and_clear_bit(txq->q.id, trans_pcie->queue_stopped)) {
-		IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->q.id);
-		iwl_op_mode_queue_not_full(trans->op_mode, txq->q.id);
+	if (test_and_clear_bit(txq->id, trans_pcie->queue_stopped)) {
+		IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->id);
+		iwl_op_mode_queue_not_full(trans->op_mode, txq->id);
 	}
 }
 
@@ -628,22 +648,22 @@
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	if (!test_and_set_bit(txq->q.id, trans_pcie->queue_stopped)) {
-		iwl_op_mode_queue_full(trans->op_mode, txq->q.id);
-		IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->q.id);
+	if (!test_and_set_bit(txq->id, trans_pcie->queue_stopped)) {
+		iwl_op_mode_queue_full(trans->op_mode, txq->id);
+		IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->id);
 	} else
 		IWL_DEBUG_TX_QUEUES(trans, "hwq %d already stopped\n",
-				    txq->q.id);
+				    txq->id);
 }
 
-static inline bool iwl_queue_used(const struct iwl_queue *q, int i)
+static inline bool iwl_queue_used(const struct iwl_txq *q, int i)
 {
 	return q->write_ptr >= q->read_ptr ?
 		(i >= q->read_ptr && i < q->write_ptr) :
 		!(i < q->read_ptr && i >= q->write_ptr);
 }
 
-static inline u8 get_cmd_index(struct iwl_queue *q, u32 index)
+static inline u8 get_cmd_index(struct iwl_txq *q, u32 index)
 {
 	return index & (q->n_window - 1);
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 5c36e6d..6fe5546 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -487,15 +487,13 @@
 
 	while (pending) {
 		int i;
-		struct list_head local_allocated;
+		LIST_HEAD(local_allocated);
 		gfp_t gfp_mask = GFP_KERNEL;
 
 		/* Do not post a warning if there are only a few requests */
 		if (pending < RX_PENDING_WATERMARK)
 			gfp_mask |= __GFP_NOWARN;
 
-		INIT_LIST_HEAD(&local_allocated);
-
 		for (i = 0; i < RX_CLAIM_REQ_ALLOC;) {
 			struct iwl_rx_mem_buffer *rxb;
 			struct page *page;
@@ -1108,13 +1106,14 @@
 			FH_RSCSR_RXQ_POS != rxq->id);
 
 		IWL_DEBUG_RX(trans,
-			     "cmd at offset %d: %s (0x%.2x, seq 0x%x)\n",
+			     "cmd at offset %d: %s (%.2x.%2x, seq 0x%x)\n",
 			     rxcb._offset,
 			     iwl_get_cmd_string(trans,
 						iwl_cmd_id(pkt->hdr.cmd,
 							   pkt->hdr.group_id,
 							   0)),
-			     pkt->hdr.cmd, le16_to_cpu(pkt->hdr.sequence));
+			     pkt->hdr.group_id, pkt->hdr.cmd,
+			     le16_to_cpu(pkt->hdr.sequence));
 
 		len = iwl_rx_packet_len(pkt);
 		len += sizeof(u32); /* account for status word */
@@ -1142,7 +1141,7 @@
 
 		sequence = le16_to_cpu(pkt->hdr.sequence);
 		index = SEQ_TO_INDEX(sequence);
-		cmd_index = get_cmd_index(&txq->q, index);
+		cmd_index = get_cmd_index(txq, index);
 
 		if (rxq->id == 0)
 			iwl_op_mode_rx(trans->op_mode, &rxq->napi,
@@ -1885,6 +1884,20 @@
 			      inta_fh,
 			      iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD));
 
+	if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX) &&
+	    inta_fh & MSIX_FH_INT_CAUSES_Q0) {
+		local_bh_disable();
+		iwl_pcie_rx_handle(trans, 0);
+		local_bh_enable();
+	}
+
+	if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS) &&
+	    inta_fh & MSIX_FH_INT_CAUSES_Q1) {
+		local_bh_disable();
+		iwl_pcie_rx_handle(trans, 1);
+		local_bh_enable();
+	}
+
 	/* This "Tx" DMA channel is used only for loading uCode */
 	if (inta_fh & MSIX_FH_INT_CAUSES_D2S_CH0_NUM) {
 		IWL_DEBUG_ISR(trans, "uCode load interrupt\n");
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 74f2f03..ae95533 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -827,10 +827,16 @@
 		if (ret)
 			return ret;
 
-		/* Notify the ucode of the loaded section number and status */
-		val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS);
-		val = val | (sec_num << shift_param);
-		iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val);
+		/* Notify ucode of loaded section number and status */
+		if (trans->cfg->use_tfh) {
+			val = iwl_read_prph(trans, UREG_UCODE_LOAD_STATUS);
+			val = val | (sec_num << shift_param);
+			iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, val);
+		} else {
+			val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS);
+			val = val | (sec_num << shift_param);
+			iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val);
+		}
 		sec_num = (sec_num << 1) | 0x1;
 	}
 
@@ -838,10 +844,21 @@
 
 	iwl_enable_interrupts(trans);
 
-	if (cpu == 1)
-		iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFF);
-	else
-		iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFFFFFF);
+	if (trans->cfg->use_tfh) {
+		if (cpu == 1)
+			iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS,
+				       0xFFFF);
+		else
+			iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS,
+				       0xFFFFFFFF);
+	} else {
+		if (cpu == 1)
+			iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS,
+					   0xFFFF);
+		else
+			iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS,
+					   0xFFFFFFFF);
+	}
 
 	return 0;
 }
@@ -886,14 +903,6 @@
 			return ret;
 	}
 
-	if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
-		iwl_set_bits_prph(trans,
-				  CSR_UCODE_LOAD_STATUS_ADDR,
-				  (LMPM_CPU_UCODE_LOADING_COMPLETED |
-				   LMPM_CPU_HDRS_LOADING_COMPLETED |
-				   LMPM_CPU_UCODE_LOADING_STARTED) <<
-					shift_param);
-
 	*first_ucode_section = last_read_idx;
 
 	return 0;
@@ -1161,7 +1170,7 @@
 	if (trans_pcie->msix_enabled) {
 		int i;
 
-		for (i = 0; i < trans_pcie->allocated_vector; i++)
+		for (i = 0; i < trans_pcie->alloc_vecs; i++)
 			synchronize_irq(trans_pcie->msix_entries[i].vector);
 	} else {
 		synchronize_irq(trans_pcie->pci_dev->irq);
@@ -1420,13 +1429,58 @@
 	{MSIX_HW_INT_CAUSES_REG_HAP,		CSR_MSIX_HW_INT_MASK_AD, 0x2E},
 };
 
+static void iwl_pcie_map_non_rx_causes(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie =  IWL_TRANS_GET_PCIE_TRANS(trans);
+	int val = trans_pcie->def_irq | MSIX_NON_AUTO_CLEAR_CAUSE;
+	int i;
+
+	/*
+	 * Access all non RX causes and map them to the default irq.
+	 * In case we are missing at least one interrupt vector,
+	 * the first interrupt vector will serve non-RX and FBQ causes.
+	 */
+	for (i = 0; i < ARRAY_SIZE(causes_list); i++) {
+		iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val);
+		iwl_clear_bit(trans, causes_list[i].mask_reg,
+			      causes_list[i].cause_num);
+	}
+}
+
+static void iwl_pcie_map_rx_causes(struct iwl_trans *trans)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	u32 offset =
+		trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 1 : 0;
+	u32 val, idx;
+
+	/*
+	 * The first RX queue - fallback queue, which is designated for
+	 * management frame, command responses etc, is always mapped to the
+	 * first interrupt vector. The other RX queues are mapped to
+	 * the other (N - 2) interrupt vectors.
+	 */
+	val = BIT(MSIX_FH_INT_CAUSES_Q(0));
+	for (idx = 1; idx < trans->num_rx_queues; idx++) {
+		iwl_write8(trans, CSR_MSIX_RX_IVAR(idx),
+			   MSIX_FH_INT_CAUSES_Q(idx - offset));
+		val |= BIT(MSIX_FH_INT_CAUSES_Q(idx));
+	}
+	iwl_write32(trans, CSR_MSIX_FH_INT_MASK_AD, ~val);
+
+	val = MSIX_FH_INT_CAUSES_Q(0);
+	if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX)
+		val |= MSIX_NON_AUTO_CLEAR_CAUSE;
+	iwl_write8(trans, CSR_MSIX_RX_IVAR(0), val);
+
+	if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS)
+		iwl_write8(trans, CSR_MSIX_RX_IVAR(1), val);
+}
+
 static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie)
 {
-	u32 val, max_rx_vector, i;
 	struct iwl_trans *trans = trans_pcie->trans;
 
-	max_rx_vector = trans_pcie->allocated_vector - 1;
-
 	if (!trans_pcie->msix_enabled) {
 		if (trans->cfg->mq_rx_supported)
 			iwl_write_prph(trans, UREG_CHICK,
@@ -1437,25 +1491,16 @@
 	iwl_write_prph(trans, UREG_CHICK, UREG_CHICK_MSIX_ENABLE);
 
 	/*
-	 * Each cause from the list above and the RX causes is represented as
-	 * a byte in the IVAR table. We access the first (N - 1) bytes and map
-	 * them to the (N - 1) vectors so these vectors will be used as rx
-	 * vectors. Then access all non rx causes and map them to the
-	 * default queue (N'th queue).
+	 * Each cause from the causes list above and the RX causes is
+	 * represented as a byte in the IVAR table. The first nibble
+	 * represents the bound interrupt vector of the cause, the second
+	 * represents no auto clear for this cause. This will be set if its
+	 * interrupt vector is bound to serve other causes.
 	 */
-	for (i = 0; i < max_rx_vector; i++) {
-		iwl_write8(trans, CSR_MSIX_RX_IVAR(i), MSIX_FH_INT_CAUSES_Q(i));
-		iwl_clear_bit(trans, CSR_MSIX_FH_INT_MASK_AD,
-			      BIT(MSIX_FH_INT_CAUSES_Q(i)));
-	}
+	iwl_pcie_map_rx_causes(trans);
 
-	for (i = 0; i < ARRAY_SIZE(causes_list); i++) {
-		val = trans_pcie->default_irq_num |
-			MSIX_NON_AUTO_CLEAR_CAUSE;
-		iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val);
-		iwl_clear_bit(trans, causes_list[i].mask_reg,
-			      causes_list[i].cause_num);
-	}
+	iwl_pcie_map_non_rx_causes(trans);
+
 	trans_pcie->fh_init_mask =
 		~iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD);
 	trans_pcie->fh_mask = trans_pcie->fh_init_mask;
@@ -1468,40 +1513,55 @@
 					struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	int max_irqs, num_irqs, i, ret, nr_online_cpus;
 	u16 pci_cmd;
-	int max_vector;
-	int ret, i;
 
-	if (trans->cfg->mq_rx_supported) {
-		max_vector = min_t(u32, (num_possible_cpus() + 2),
-				   IWL_MAX_RX_HW_QUEUES);
-		for (i = 0; i < max_vector; i++)
-			trans_pcie->msix_entries[i].entry = i;
+	if (!trans->cfg->mq_rx_supported)
+		goto enable_msi;
 
-		ret = pci_enable_msix_range(pdev, trans_pcie->msix_entries,
-					    MSIX_MIN_INTERRUPT_VECTORS,
-					    max_vector);
-		if (ret > 1) {
-			IWL_DEBUG_INFO(trans,
-				       "Enable MSI-X allocate %d interrupt vector\n",
-				       ret);
-			trans_pcie->allocated_vector = ret;
-			trans_pcie->default_irq_num =
-				trans_pcie->allocated_vector - 1;
-			trans_pcie->trans->num_rx_queues =
-				trans_pcie->allocated_vector - 1;
-			trans_pcie->msix_enabled = true;
+	nr_online_cpus = num_online_cpus();
+	max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES);
+	for (i = 0; i < max_irqs; i++)
+		trans_pcie->msix_entries[i].entry = i;
 
-			return;
-		}
+	num_irqs = pci_enable_msix_range(pdev, trans_pcie->msix_entries,
+					 MSIX_MIN_INTERRUPT_VECTORS,
+					 max_irqs);
+	if (num_irqs < 0) {
 		IWL_DEBUG_INFO(trans,
-			       "ret = %d %s move to msi mode\n", ret,
-			       (ret == 1) ?
-			       "can't allocate more than 1 interrupt vector" :
-			       "failed to enable msi-x mode");
-		pci_disable_msix(pdev);
+			       "Failed to enable msi-x mode (ret %d). Moving to msi mode.\n",
+			       num_irqs);
+		goto enable_msi;
+	}
+	trans_pcie->def_irq = (num_irqs == max_irqs) ? num_irqs - 1 : 0;
+
+	IWL_DEBUG_INFO(trans,
+		       "MSI-X enabled. %d interrupt vectors were allocated\n",
+		       num_irqs);
+
+	/*
+	 * In case the OS provides fewer interrupts than requested, different
+	 * causes will share the same interrupt vector as follows:
+	 * One interrupt less: non rx causes shared with FBQ.
+	 * Two interrupts less: non rx causes shared with FBQ and RSS.
+	 * More than two interrupts: we will use fewer RSS queues.
+	 */
+	if (num_irqs <= nr_online_cpus) {
+		trans_pcie->trans->num_rx_queues = num_irqs + 1;
+		trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX |
+			IWL_SHARED_IRQ_FIRST_RSS;
+	} else if (num_irqs == nr_online_cpus + 1) {
+		trans_pcie->trans->num_rx_queues = num_irqs;
+		trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX;
+	} else {
+		trans_pcie->trans->num_rx_queues = num_irqs - 1;
 	}
 
+	trans_pcie->alloc_vecs = num_irqs;
+	trans_pcie->msix_enabled = true;
+	return;
+
+enable_msi:
 	ret = pci_enable_msi(pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", ret);
@@ -1514,36 +1574,57 @@
 	}
 }
 
+static void iwl_pcie_irq_set_affinity(struct iwl_trans *trans)
+{
+	int iter_rx_q, i, ret, cpu, offset;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	i = trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 0 : 1;
+	iter_rx_q = trans_pcie->trans->num_rx_queues - 1 + i;
+	offset = 1 + i;
+	for (; i < iter_rx_q ; i++) {
+		/*
+		 * Get the cpu prior to the place to search
+		 * (i.e. return will be > i - 1).
+		 */
+		cpu = cpumask_next(i - offset, cpu_online_mask);
+		cpumask_set_cpu(cpu, &trans_pcie->affinity_mask[i]);
+		ret = irq_set_affinity_hint(trans_pcie->msix_entries[i].vector,
+					    &trans_pcie->affinity_mask[i]);
+		if (ret)
+			IWL_ERR(trans_pcie->trans,
+				"Failed to set affinity mask for IRQ %d\n",
+				i);
+	}
+}
+
 static int iwl_pcie_init_msix_handler(struct pci_dev *pdev,
 				      struct iwl_trans_pcie *trans_pcie)
 {
-	int i, last_vector;
+	int i;
 
-	last_vector = trans_pcie->trans->num_rx_queues;
-
-	for (i = 0; i < trans_pcie->allocated_vector; i++) {
+	for (i = 0; i < trans_pcie->alloc_vecs; i++) {
 		int ret;
+		struct msix_entry *msix_entry;
 
-		ret = request_threaded_irq(trans_pcie->msix_entries[i].vector,
-					   iwl_pcie_msix_isr,
-					   (i == last_vector) ?
-					   iwl_pcie_irq_msix_handler :
-					   iwl_pcie_irq_rx_msix_handler,
-					   IRQF_SHARED,
-					   DRV_NAME,
-					   &trans_pcie->msix_entries[i]);
+		msix_entry = &trans_pcie->msix_entries[i];
+		ret = devm_request_threaded_irq(&pdev->dev,
+						msix_entry->vector,
+						iwl_pcie_msix_isr,
+						(i == trans_pcie->def_irq) ?
+						iwl_pcie_irq_msix_handler :
+						iwl_pcie_irq_rx_msix_handler,
+						IRQF_SHARED,
+						DRV_NAME,
+						msix_entry);
 		if (ret) {
-			int j;
-
 			IWL_ERR(trans_pcie->trans,
 				"Error allocating IRQ %d\n", i);
-			for (j = 0; j < i; j++)
-				free_irq(trans_pcie->msix_entries[j].vector,
-					 &trans_pcie->msix_entries[j]);
-			pci_disable_msix(pdev);
+
 			return ret;
 		}
 	}
+	iwl_pcie_irq_set_affinity(trans_pcie->trans);
 
 	return 0;
 }
@@ -1672,7 +1753,6 @@
 	trans_pcie->rx_page_order =
 		iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size);
 
-	trans_pcie->wide_cmd_header = trans_cfg->wide_cmd_header;
 	trans_pcie->bc_table_dword = trans_cfg->bc_table_dword;
 	trans_pcie->scd_set_active = trans_cfg->scd_set_active;
 	trans_pcie->sw_csum_tx = trans_cfg->sw_csum_tx;
@@ -1703,22 +1783,16 @@
 	iwl_pcie_rx_free(trans);
 
 	if (trans_pcie->msix_enabled) {
-		for (i = 0; i < trans_pcie->allocated_vector; i++)
-			free_irq(trans_pcie->msix_entries[i].vector,
-				 &trans_pcie->msix_entries[i]);
+		for (i = 0; i < trans_pcie->alloc_vecs; i++) {
+			irq_set_affinity_hint(
+				trans_pcie->msix_entries[i].vector,
+				NULL);
+		}
 
-		pci_disable_msix(trans_pcie->pci_dev);
 		trans_pcie->msix_enabled = false;
 	} else {
-		free_irq(trans_pcie->pci_dev->irq, trans);
-
 		iwl_pcie_free_ict(trans);
-
-		pci_disable_msi(trans_pcie->pci_dev);
 	}
-	iounmap(trans_pcie->hw_base);
-	pci_release_regions(trans_pcie->pci_dev);
-	pci_disable_device(trans_pcie->pci_dev);
 
 	iwl_pcie_free_fw_monitor(trans);
 
@@ -1890,7 +1964,7 @@
 
 		txq->frozen = freeze;
 
-		if (txq->q.read_ptr == txq->q.write_ptr)
+		if (txq->read_ptr == txq->write_ptr)
 			goto next_queue;
 
 		if (freeze) {
@@ -1938,7 +2012,7 @@
 			txq->block--;
 			if (!txq->block) {
 				iwl_write32(trans, HBUS_TARG_WRPTR,
-					    txq->q.write_ptr | (i << 8));
+					    txq->write_ptr | (i << 8));
 			}
 		} else if (block) {
 			txq->block++;
@@ -1958,10 +2032,14 @@
 	int cnt;
 
 	IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n",
-		txq->q.read_ptr, txq->q.write_ptr);
+		txq->read_ptr, txq->write_ptr);
+
+	if (trans->cfg->use_tfh)
+		/* TODO: access new SCD registers and dump them */
+		return;
 
 	scd_sram_addr = trans_pcie->scd_base_addr +
-			SCD_TX_STTS_QUEUE_OFFSET(txq->q.id);
+			SCD_TX_STTS_QUEUE_OFFSET(txq->id);
 	iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf));
 
 	iwl_print_hex_error(trans, buf, sizeof(buf));
@@ -1996,7 +2074,6 @@
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq;
-	struct iwl_queue *q;
 	int cnt;
 	unsigned long now = jiffies;
 	int ret = 0;
@@ -2014,13 +2091,12 @@
 
 		IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", cnt);
 		txq = &trans_pcie->txq[cnt];
-		q = &txq->q;
-		wr_ptr = ACCESS_ONCE(q->write_ptr);
+		wr_ptr = ACCESS_ONCE(txq->write_ptr);
 
-		while (q->read_ptr != ACCESS_ONCE(q->write_ptr) &&
+		while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) &&
 		       !time_after(jiffies,
 				   now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) {
-			u8 write_ptr = ACCESS_ONCE(q->write_ptr);
+			u8 write_ptr = ACCESS_ONCE(txq->write_ptr);
 
 			if (WARN_ONCE(wr_ptr != write_ptr,
 				      "WR pointer moved while flushing %d -> %d\n",
@@ -2029,7 +2105,7 @@
 			usleep_range(1000, 2000);
 		}
 
-		if (q->read_ptr != q->write_ptr) {
+		if (txq->read_ptr != txq->write_ptr) {
 			IWL_ERR(trans,
 				"fail to flush all tx fifo queues Q %d\n", cnt);
 			ret = -ETIMEDOUT;
@@ -2197,7 +2273,6 @@
 	struct iwl_trans *trans = file->private_data;
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq;
-	struct iwl_queue *q;
 	char *buf;
 	int pos = 0;
 	int cnt;
@@ -2215,10 +2290,9 @@
 
 	for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) {
 		txq = &trans_pcie->txq[cnt];
-		q = &txq->q;
 		pos += scnprintf(buf + pos, bufsz - pos,
 				"hwq %.2d: read=%u write=%u use=%d stop=%d need_update=%d frozen=%d%s\n",
-				cnt, q->read_ptr, q->write_ptr,
+				cnt, txq->read_ptr, txq->write_ptr,
 				!!test_bit(cnt, trans_pcie->queue_used),
 				 !!test_bit(cnt, trans_pcie->queue_stopped),
 				 txq->need_update, txq->frozen,
@@ -2424,13 +2498,14 @@
 }
 #endif /*CONFIG_IWLWIFI_DEBUGFS */
 
-static u32 iwl_trans_pcie_get_cmdlen(struct iwl_tfd *tfd)
+static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, void *tfd)
 {
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u32 cmdlen = 0;
 	int i;
 
-	for (i = 0; i < IWL_NUM_OF_TBS; i++)
-		cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i);
+	for (i = 0; i < trans_pcie->max_tbs; i++)
+		cmdlen += iwl_pcie_tfd_tb_get_len(trans, tfd, i);
 
 	return cmdlen;
 }
@@ -2645,7 +2720,7 @@
 
 	/* host commands */
 	len += sizeof(*data) +
-		cmdq->q.n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE);
+		cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE);
 
 	/* FW monitor */
 	if (trans_pcie->fw_mon_page) {
@@ -2713,12 +2788,13 @@
 	data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_TXCMD);
 	txcmd = (void *)data->data;
 	spin_lock_bh(&cmdq->lock);
-	ptr = cmdq->q.write_ptr;
-	for (i = 0; i < cmdq->q.n_window; i++) {
-		u8 idx = get_cmd_index(&cmdq->q, ptr);
+	ptr = cmdq->write_ptr;
+	for (i = 0; i < cmdq->n_window; i++) {
+		u8 idx = get_cmd_index(cmdq, ptr);
 		u32 caplen, cmdlen;
 
-		cmdlen = iwl_trans_pcie_get_cmdlen(&cmdq->tfds[ptr]);
+		cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds +
+						   trans_pcie->tfd_size * ptr);
 		caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen);
 
 		if (cmdlen) {
@@ -2788,6 +2864,8 @@
 	.txq_disable = iwl_trans_pcie_txq_disable,
 	.txq_enable = iwl_trans_pcie_txq_enable,
 
+	.get_txq_byte_table = iwl_trans_pcie_get_txq_byte_table,
+
 	.txq_set_shared_mode = iwl_trans_pcie_txq_set_shared_mode,
 
 	.wait_tx_queue_empty = iwl_trans_pcie_wait_txq_empty,
@@ -2821,13 +2899,15 @@
 	struct iwl_trans *trans;
 	int ret, addr_size;
 
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ERR_PTR(ret);
+
 	trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie),
 				&pdev->dev, cfg, &trans_ops_pcie, 0);
 	if (!trans)
 		return ERR_PTR(-ENOMEM);
 
-	trans->max_skb_frags = IWL_PCIE_MAX_FRAGS;
-
 	trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
 	trans_pcie->trans = trans;
@@ -2841,9 +2921,6 @@
 		goto out_no_pci;
 	}
 
-	ret = pci_enable_device(pdev);
-	if (ret)
-		goto out_no_pci;
 
 	if (!cfg->base_params->pcie_l1_allowed) {
 		/*
@@ -2861,6 +2938,16 @@
 	else
 		addr_size = 36;
 
+	if (cfg->use_tfh) {
+		trans_pcie->max_tbs = IWL_TFH_NUM_TBS;
+		trans_pcie->tfd_size = sizeof(struct iwl_tfh_tfd);
+
+	} else {
+		trans_pcie->max_tbs = IWL_NUM_OF_TBS;
+		trans_pcie->tfd_size = sizeof(struct iwl_tfd);
+	}
+	trans->max_skb_frags = IWL_PCIE_MAX_FRAGS(trans_pcie);
+
 	pci_set_master(pdev);
 
 	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size));
@@ -2875,21 +2962,21 @@
 		/* both attempts failed: */
 		if (ret) {
 			dev_err(&pdev->dev, "No suitable DMA available\n");
-			goto out_pci_disable_device;
+			goto out_no_pci;
 		}
 	}
 
-	ret = pci_request_regions(pdev, DRV_NAME);
+	ret = pcim_iomap_regions_request_all(pdev, BIT(0), DRV_NAME);
 	if (ret) {
-		dev_err(&pdev->dev, "pci_request_regions failed\n");
-		goto out_pci_disable_device;
+		dev_err(&pdev->dev, "pcim_iomap_regions_request_all failed\n");
+		goto out_no_pci;
 	}
 
-	trans_pcie->hw_base = pci_ioremap_bar(pdev, 0);
+	trans_pcie->hw_base = pcim_iomap_table(pdev)[0];
 	if (!trans_pcie->hw_base) {
-		dev_err(&pdev->dev, "pci_ioremap_bar failed\n");
+		dev_err(&pdev->dev, "pcim_iomap_table failed\n");
 		ret = -ENODEV;
-		goto out_pci_release_regions;
+		goto out_no_pci;
 	}
 
 	/* We disable the RETRY_TIMEOUT register (0x41) to keep
@@ -2916,7 +3003,7 @@
 		ret = iwl_pcie_prepare_card_hw(trans);
 		if (ret) {
 			IWL_WARN(trans, "Exit HW not ready\n");
-			goto out_pci_disable_msi;
+			goto out_no_pci;
 		}
 
 		/*
@@ -2933,7 +3020,7 @@
 				   25000);
 		if (ret < 0) {
 			IWL_DEBUG_INFO(trans, "Failed to wake up the nic\n");
-			goto out_pci_disable_msi;
+			goto out_no_pci;
 		}
 
 		if (iwl_trans_grab_nic_access(trans, &flags)) {
@@ -2965,15 +3052,16 @@
 
 	if (trans_pcie->msix_enabled) {
 		if (iwl_pcie_init_msix_handler(pdev, trans_pcie))
-			goto out_pci_release_regions;
+			goto out_no_pci;
 	 } else {
 		ret = iwl_pcie_alloc_ict(trans);
 		if (ret)
-			goto out_pci_disable_msi;
+			goto out_no_pci;
 
-		ret = request_threaded_irq(pdev->irq, iwl_pcie_isr,
-					   iwl_pcie_irq_handler,
-					   IRQF_SHARED, DRV_NAME, trans);
+		ret = devm_request_threaded_irq(&pdev->dev, pdev->irq,
+						iwl_pcie_isr,
+						iwl_pcie_irq_handler,
+						IRQF_SHARED, DRV_NAME, trans);
 		if (ret) {
 			IWL_ERR(trans, "Error allocating IRQ %d\n", pdev->irq);
 			goto out_free_ict;
@@ -2991,12 +3079,6 @@
 
 out_free_ict:
 	iwl_pcie_free_ict(trans);
-out_pci_disable_msi:
-	pci_disable_msi(pdev);
-out_pci_release_regions:
-	pci_release_regions(pdev);
-out_pci_disable_device:
-	pci_disable_device(pdev);
 out_no_pci:
 	free_percpu(trans_pcie->tso_hdr_page);
 	iwl_trans_free(trans);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 18650dc..e9a278b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -71,7 +71,7 @@
  *
  ***************************************************/
 
-static int iwl_queue_space(const struct iwl_queue *q)
+static int iwl_queue_space(const struct iwl_txq *q)
 {
 	unsigned int max;
 	unsigned int used;
@@ -102,7 +102,7 @@
 /*
  * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
  */
-static int iwl_queue_init(struct iwl_queue *q, int slots_num, u32 id)
+static int iwl_queue_init(struct iwl_txq *q, int slots_num, u32 id)
 {
 	q->n_window = slots_num;
 	q->id = id;
@@ -158,13 +158,13 @@
 
 	spin_lock(&txq->lock);
 	/* check if triggered erroneously */
-	if (txq->q.read_ptr == txq->q.write_ptr) {
+	if (txq->read_ptr == txq->write_ptr) {
 		spin_unlock(&txq->lock);
 		return;
 	}
 	spin_unlock(&txq->lock);
 
-	IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->q.id,
+	IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->id,
 		jiffies_to_msecs(txq->wd_timeout));
 
 	iwl_trans_pcie_log_scd_error(trans, txq);
@@ -176,22 +176,21 @@
  * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array
  */
 static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans,
-					     struct iwl_txq *txq, u16 byte_cnt)
+					     struct iwl_txq *txq, u16 byte_cnt,
+					     int num_tbs)
 {
 	struct iwlagn_scd_bc_tbl *scd_bc_tbl;
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	int write_ptr = txq->q.write_ptr;
-	int txq_id = txq->q.id;
+	int write_ptr = txq->write_ptr;
+	int txq_id = txq->id;
 	u8 sec_ctl = 0;
-	u8 sta_id = 0;
 	u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;
 	__le16 bc_ent;
 	struct iwl_tx_cmd *tx_cmd =
-		(void *) txq->entries[txq->q.write_ptr].cmd->payload;
+		(void *)txq->entries[txq->write_ptr].cmd->payload;
 
 	scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
 
-	sta_id = tx_cmd->sta_id;
 	sec_ctl = tx_cmd->sec_ctl;
 
 	switch (sec_ctl & TX_CMD_SEC_MSK) {
@@ -205,14 +204,32 @@
 		len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN;
 		break;
 	}
-
 	if (trans_pcie->bc_table_dword)
 		len = DIV_ROUND_UP(len, 4);
 
 	if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX))
 		return;
 
-	bc_ent = cpu_to_le16(len | (sta_id << 12));
+	if (trans->cfg->use_tfh) {
+		u8 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) +
+				     num_tbs * sizeof(struct iwl_tfh_tb);
+		/*
+		 * filled_tfd_size contains the number of filled bytes in the
+		 * TFD.
+		 * Dividing it by 64 will give the number of chunks to fetch
+		 * to SRAM- 0 for one chunk, 1 for 2 and so on.
+		 * If, for example, TFD contains only 3 TBs then 32 bytes
+		 * of the TFD are used, and only one chunk of 64 bytes should
+		 * be fetched
+		 */
+		u8 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1;
+
+		bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12));
+	} else {
+		u8 sta_id = tx_cmd->sta_id;
+
+		bc_ent = cpu_to_le16(len | (sta_id << 12));
+	}
 
 	scd_bc_tbl[txq_id].tfd_offset[write_ptr] = bc_ent;
 
@@ -227,12 +244,12 @@
 	struct iwl_trans_pcie *trans_pcie =
 		IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr;
-	int txq_id = txq->q.id;
-	int read_ptr = txq->q.read_ptr;
+	int txq_id = txq->id;
+	int read_ptr = txq->read_ptr;
 	u8 sta_id = 0;
 	__le16 bc_ent;
 	struct iwl_tx_cmd *tx_cmd =
-		(void *)txq->entries[txq->q.read_ptr].cmd->payload;
+		(void *)txq->entries[read_ptr].cmd->payload;
 
 	WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
 
@@ -240,6 +257,7 @@
 		sta_id = tx_cmd->sta_id;
 
 	bc_ent = cpu_to_le16(1 | (sta_id << 12));
+
 	scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent;
 
 	if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
@@ -255,7 +273,7 @@
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u32 reg = 0;
-	int txq_id = txq->q.id;
+	int txq_id = txq->id;
 
 	lockdep_assert_held(&txq->lock);
 
@@ -289,10 +307,10 @@
 	 * if not in power-save mode, uCode will never sleep when we're
 	 * trying to tx (during RFKILL, we're not trying to tx).
 	 */
-	IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->q.write_ptr);
+	IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr);
 	if (!txq->block)
 		iwl_write32(trans, HBUS_TARG_WRPTR,
-			    txq->q.write_ptr | (txq_id << 8));
+			    txq->write_ptr | (txq_id << 8));
 }
 
 void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
@@ -312,49 +330,93 @@
 	}
 }
 
-static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx)
+static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie,
+				     struct iwl_txq *txq, int idx)
 {
-	struct iwl_tfd_tb *tb = &tfd->tbs[idx];
-
-	dma_addr_t addr = get_unaligned_le32(&tb->lo);
-	if (sizeof(dma_addr_t) > sizeof(u32))
-		addr |=
-		((dma_addr_t)(le16_to_cpu(tb->hi_n_len) & 0xF) << 16) << 16;
-
-	return addr;
+	return txq->tfds + trans_pcie->tfd_size * idx;
 }
 
-static inline void iwl_pcie_tfd_set_tb(struct iwl_tfd *tfd, u8 idx,
-				       dma_addr_t addr, u16 len)
+static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans,
+						  void *_tfd, u8 idx)
 {
-	struct iwl_tfd_tb *tb = &tfd->tbs[idx];
-	u16 hi_n_len = len << 4;
 
-	put_unaligned_le32(addr, &tb->lo);
-	if (sizeof(dma_addr_t) > sizeof(u32))
-		hi_n_len |= ((addr >> 16) >> 16) & 0xF;
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd = _tfd;
+		struct iwl_tfh_tb *tb = &tfd->tbs[idx];
 
-	tb->hi_n_len = cpu_to_le16(hi_n_len);
+		return (dma_addr_t)(le64_to_cpu(tb->addr));
+	} else {
+		struct iwl_tfd *tfd = _tfd;
+		struct iwl_tfd_tb *tb = &tfd->tbs[idx];
+		dma_addr_t addr = get_unaligned_le32(&tb->lo);
+		dma_addr_t hi_len;
 
-	tfd->num_tbs = idx + 1;
+		if (sizeof(dma_addr_t) <= sizeof(u32))
+			return addr;
+
+		hi_len = le16_to_cpu(tb->hi_n_len) & 0xF;
+
+		/*
+		 * shift by 16 twice to avoid warnings on 32-bit
+		 * (where this code never runs anyway due to the
+		 * if statement above)
+		 */
+		return addr | ((hi_len << 16) << 16);
+	}
 }
 
-static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_tfd *tfd)
+static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd,
+				       u8 idx, dma_addr_t addr, u16 len)
 {
-	return tfd->num_tbs & 0x1f;
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+		struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx];
+
+		put_unaligned_le64(addr, &tb->addr);
+		tb->tb_len = cpu_to_le16(len);
+
+		tfd_fh->num_tbs = cpu_to_le16(idx + 1);
+	} else {
+		struct iwl_tfd *tfd_fh = (void *)tfd;
+		struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx];
+
+		u16 hi_n_len = len << 4;
+
+		put_unaligned_le32(addr, &tb->lo);
+		if (sizeof(dma_addr_t) > sizeof(u32))
+			hi_n_len |= ((addr >> 16) >> 16) & 0xF;
+
+		tb->hi_n_len = cpu_to_le16(hi_n_len);
+
+		tfd_fh->num_tbs = idx + 1;
+	}
+}
+
+static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *_tfd)
+{
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd = _tfd;
+
+		return le16_to_cpu(tfd->num_tbs) & 0x1f;
+	} else {
+		struct iwl_tfd *tfd = _tfd;
+
+		return tfd->num_tbs & 0x1f;
+	}
 }
 
 static void iwl_pcie_tfd_unmap(struct iwl_trans *trans,
 			       struct iwl_cmd_meta *meta,
-			       struct iwl_tfd *tfd)
+			       struct iwl_txq *txq, int index)
 {
-	int i;
-	int num_tbs;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	int i, num_tbs;
+	void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index);
 
 	/* Sanity check on number of chunks */
-	num_tbs = iwl_pcie_tfd_get_num_tbs(tfd);
+	num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
 
-	if (num_tbs >= IWL_NUM_OF_TBS) {
+	if (num_tbs >= trans_pcie->max_tbs) {
 		IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
 		/* @todo issue fatal error, it is quite serious situation */
 		return;
@@ -363,18 +425,30 @@
 	/* first TB is never freed - it's the bidirectional DMA data */
 
 	for (i = 1; i < num_tbs; i++) {
-		if (meta->flags & BIT(i + CMD_TB_BITMAP_POS))
+		if (meta->tbs & BIT(i))
 			dma_unmap_page(trans->dev,
-				       iwl_pcie_tfd_tb_get_addr(tfd, i),
-				       iwl_pcie_tfd_tb_get_len(tfd, i),
+				       iwl_pcie_tfd_tb_get_addr(trans, tfd, i),
+				       iwl_pcie_tfd_tb_get_len(trans, tfd, i),
 				       DMA_TO_DEVICE);
 		else
 			dma_unmap_single(trans->dev,
-					 iwl_pcie_tfd_tb_get_addr(tfd, i),
-					 iwl_pcie_tfd_tb_get_len(tfd, i),
+					 iwl_pcie_tfd_tb_get_addr(trans, tfd,
+								  i),
+					 iwl_pcie_tfd_tb_get_len(trans, tfd,
+								 i),
 					 DMA_TO_DEVICE);
 	}
-	tfd->num_tbs = 0;
+
+	if (trans->cfg->use_tfh) {
+		struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
+
+		tfd_fh->num_tbs = 0;
+	} else {
+		struct iwl_tfd *tfd_fh = (void *)tfd;
+
+		tfd_fh->num_tbs = 0;
+	}
+
 }
 
 /*
@@ -388,20 +462,18 @@
  */
 static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq)
 {
-	struct iwl_tfd *tfd_tmp = txq->tfds;
-
 	/* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
 	 * idx is bounded by n_window
 	 */
-	int rd_ptr = txq->q.read_ptr;
-	int idx = get_cmd_index(&txq->q, rd_ptr);
+	int rd_ptr = txq->read_ptr;
+	int idx = get_cmd_index(txq, rd_ptr);
 
 	lockdep_assert_held(&txq->lock);
 
 	/* We have only q->n_window txq->entries, but we use
 	 * TFD_QUEUE_SIZE_MAX tfds
 	 */
-	iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr]);
+	iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr);
 
 	/* free SKB */
 	if (txq->entries) {
@@ -423,23 +495,21 @@
 static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
 				  dma_addr_t addr, u16 len, bool reset)
 {
-	struct iwl_queue *q;
-	struct iwl_tfd *tfd, *tfd_tmp;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+	void *tfd;
 	u32 num_tbs;
 
-	q = &txq->q;
-	tfd_tmp = txq->tfds;
-	tfd = &tfd_tmp[q->write_ptr];
+	tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr;
 
 	if (reset)
-		memset(tfd, 0, sizeof(*tfd));
+		memset(tfd, 0, trans_pcie->tfd_size);
 
-	num_tbs = iwl_pcie_tfd_get_num_tbs(tfd);
+	num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd);
 
-	/* Each TFD can point to a maximum 20 Tx buffers */
-	if (num_tbs >= IWL_NUM_OF_TBS) {
+	/* Each TFD can point to a maximum max_tbs Tx buffers */
+	if (num_tbs >= trans_pcie->max_tbs) {
 		IWL_ERR(trans, "Error can not send more than %d chunks\n",
-			IWL_NUM_OF_TBS);
+			trans_pcie->max_tbs);
 		return -EINVAL;
 	}
 
@@ -447,7 +517,7 @@
 		 "Unaligned address = %llx\n", (unsigned long long)addr))
 		return -EINVAL;
 
-	iwl_pcie_tfd_set_tb(tfd, num_tbs, addr, len);
+	iwl_pcie_tfd_set_tb(trans, tfd, num_tbs, addr, len);
 
 	return num_tbs;
 }
@@ -457,7 +527,7 @@
 			       u32 txq_id)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX;
+	size_t tfd_sz = trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX;
 	size_t tb0_buf_sz;
 	int i;
 
@@ -468,7 +538,7 @@
 		    (unsigned long)txq);
 	txq->trans_pcie = trans_pcie;
 
-	txq->q.n_window = slots_num;
+	txq->n_window = slots_num;
 
 	txq->entries = kcalloc(slots_num,
 			       sizeof(struct iwl_pcie_txq_entry),
@@ -489,7 +559,7 @@
 	/* Circular buffer of transmit frame descriptors (TFDs),
 	 * shared with device */
 	txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
-				       &txq->q.dma_addr, GFP_KERNEL);
+				       &txq->dma_addr, GFP_KERNEL);
 	if (!txq->tfds)
 		goto error;
 
@@ -503,11 +573,11 @@
 	if (!txq->first_tb_bufs)
 		goto err_free_tfds;
 
-	txq->q.id = txq_id;
+	txq->id = txq_id;
 
 	return 0;
 err_free_tfds:
-	dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->q.dma_addr);
+	dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr);
 error:
 	if (txq->entries && txq_id == trans_pcie->cmd_queue)
 		for (i = 0; i < slots_num; i++)
@@ -531,7 +601,7 @@
 	BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1));
 
 	/* Initialize queue's high/low-water marks, and head/tail indexes */
-	ret = iwl_queue_init(&txq->q, slots_num, txq_id);
+	ret = iwl_queue_init(txq, slots_num, txq_id);
 	if (ret)
 		return ret;
 
@@ -545,10 +615,10 @@
 	if (trans->cfg->use_tfh)
 		iwl_write_direct64(trans,
 				   FH_MEM_CBBC_QUEUE(trans, txq_id),
-				   txq->q.dma_addr);
+				   txq->dma_addr);
 	else
 		iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id),
-				   txq->q.dma_addr >> 8);
+				   txq->dma_addr >> 8);
 
 	return 0;
 }
@@ -595,15 +665,14 @@
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-	struct iwl_queue *q = &txq->q;
 
 	spin_lock_bh(&txq->lock);
-	while (q->write_ptr != q->read_ptr) {
+	while (txq->write_ptr != txq->read_ptr) {
 		IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
-				   txq_id, q->read_ptr);
+				   txq_id, txq->read_ptr);
 
 		if (txq_id != trans_pcie->cmd_queue) {
-			struct sk_buff *skb = txq->entries[q->read_ptr].skb;
+			struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
 
 			if (WARN_ON_ONCE(!skb))
 				continue;
@@ -611,15 +680,15 @@
 			iwl_pcie_free_tso_page(trans_pcie, skb);
 		}
 		iwl_pcie_txq_free_tfd(trans, txq);
-		q->read_ptr = iwl_queue_inc_wrap(q->read_ptr);
+		txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr);
 
-		if (q->read_ptr == q->write_ptr) {
+		if (txq->read_ptr == txq->write_ptr) {
 			unsigned long flags;
 
 			spin_lock_irqsave(&trans_pcie->reg_lock, flags);
 			if (txq_id != trans_pcie->cmd_queue) {
 				IWL_DEBUG_RPM(trans, "Q %d - last tx freed\n",
-					      q->id);
+					      txq->id);
 				iwl_trans_unref(trans);
 			} else {
 				iwl_pcie_clear_cmd_in_flight(trans);
@@ -663,7 +732,7 @@
 
 	/* De-alloc array of command/tx buffers */
 	if (txq_id == trans_pcie->cmd_queue)
-		for (i = 0; i < txq->q.n_window; i++) {
+		for (i = 0; i < txq->n_window; i++) {
 			kzfree(txq->entries[i].cmd);
 			kzfree(txq->entries[i].free_buf);
 		}
@@ -671,13 +740,13 @@
 	/* De-alloc circular buffer of TFDs */
 	if (txq->tfds) {
 		dma_free_coherent(dev,
-				  sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX,
-				  txq->tfds, txq->q.dma_addr);
-		txq->q.dma_addr = 0;
+				  trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX,
+				  txq->tfds, txq->dma_addr);
+		txq->dma_addr = 0;
 		txq->tfds = NULL;
 
 		dma_free_coherent(dev,
-				  sizeof(*txq->first_tb_bufs) * txq->q.n_window,
+				  sizeof(*txq->first_tb_bufs) * txq->n_window,
 				  txq->first_tb_bufs, txq->first_tb_dma);
 	}
 
@@ -703,6 +772,9 @@
 	memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
 	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
 
+	if (trans->cfg->use_tfh)
+		return;
+
 	trans_pcie->scd_base_addr =
 		iwl_read_prph(trans, SCD_SRAM_BASE_ADDR);
 
@@ -758,14 +830,14 @@
 		if (trans->cfg->use_tfh)
 			iwl_write_direct64(trans,
 					   FH_MEM_CBBC_QUEUE(trans, txq_id),
-					   txq->q.dma_addr);
+					   txq->dma_addr);
 		else
 			iwl_write_direct32(trans,
 					   FH_MEM_CBBC_QUEUE(trans, txq_id),
-					   txq->q.dma_addr >> 8);
+					   txq->dma_addr >> 8);
 		iwl_pcie_txq_unmap(trans, txq_id);
-		txq->q.read_ptr = 0;
-		txq->q.write_ptr = 0;
+		txq->read_ptr = 0;
+		txq->write_ptr = 0;
 	}
 
 	/* Tell NIC where to find the "keep warm" buffer */
@@ -970,11 +1042,13 @@
 		}
 	}
 
-	if (trans->cfg->use_tfh)
+	if (trans->cfg->use_tfh) {
 		iwl_write_direct32(trans, TFH_TRANSFER_MODE,
 				   TFH_TRANSFER_MAX_PENDING_REQ |
 				   TFH_CHUNK_SIZE_128 |
 				   TFH_CHUNK_SPLIT_MODE);
+		return 0;
+	}
 
 	iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE);
 	if (trans->cfg->base_params->num_of_queues > 20)
@@ -1007,7 +1081,7 @@
 	 * if empty delete timer, otherwise move timer forward
 	 * since we're making progress on this queue
 	 */
-	if (txq->q.read_ptr == txq->q.write_ptr)
+	if (txq->read_ptr == txq->write_ptr)
 		del_timer(&txq->stuck_timer);
 	else
 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
@@ -1020,7 +1094,6 @@
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
 	int tfd_num = ssn & (TFD_QUEUE_SIZE_MAX - 1);
-	struct iwl_queue *q = &txq->q;
 	int last_to_free;
 
 	/* This function is not meant to release cmd queue*/
@@ -1035,21 +1108,21 @@
 		goto out;
 	}
 
-	if (txq->q.read_ptr == tfd_num)
+	if (txq->read_ptr == tfd_num)
 		goto out;
 
 	IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n",
-			   txq_id, txq->q.read_ptr, tfd_num, ssn);
+			   txq_id, txq->read_ptr, tfd_num, ssn);
 
 	/*Since we free until index _not_ inclusive, the one before index is
 	 * the last we will free. This one must be used */
 	last_to_free = iwl_queue_dec_wrap(tfd_num);
 
-	if (!iwl_queue_used(q, last_to_free)) {
+	if (!iwl_queue_used(txq, last_to_free)) {
 		IWL_ERR(trans,
 			"%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
 			__func__, txq_id, last_to_free, TFD_QUEUE_SIZE_MAX,
-			q->write_ptr, q->read_ptr);
+			txq->write_ptr, txq->read_ptr);
 		goto out;
 	}
 
@@ -1057,9 +1130,9 @@
 		goto out;
 
 	for (;
-	     q->read_ptr != tfd_num;
-	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) {
-		struct sk_buff *skb = txq->entries[txq->q.read_ptr].skb;
+	     txq->read_ptr != tfd_num;
+	     txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
+		struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
 
 		if (WARN_ON_ONCE(!skb))
 			continue;
@@ -1068,16 +1141,17 @@
 
 		__skb_queue_tail(skbs, skb);
 
-		txq->entries[txq->q.read_ptr].skb = NULL;
+		txq->entries[txq->read_ptr].skb = NULL;
 
-		iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
+		if (!trans->cfg->use_tfh)
+			iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq);
 
 		iwl_pcie_txq_free_tfd(trans, txq);
 	}
 
 	iwl_pcie_txq_progress(txq);
 
-	if (iwl_queue_space(&txq->q) > txq->q.low_mark &&
+	if (iwl_queue_space(txq) > txq->low_mark &&
 	    test_bit(txq_id, trans_pcie->queue_stopped)) {
 		struct sk_buff_head overflow_skbs;
 
@@ -1109,12 +1183,12 @@
 		}
 		spin_lock_bh(&txq->lock);
 
-		if (iwl_queue_space(&txq->q) > txq->q.low_mark)
+		if (iwl_queue_space(txq) > txq->low_mark)
 			iwl_wake_queue(trans, txq);
 	}
 
-	if (q->read_ptr == q->write_ptr) {
-		IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", q->id);
+	if (txq->read_ptr == txq->write_ptr) {
+		IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", txq->id);
 		iwl_trans_unref(trans);
 	}
 
@@ -1176,31 +1250,30 @@
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[txq_id];
-	struct iwl_queue *q = &txq->q;
 	unsigned long flags;
 	int nfreed = 0;
 
 	lockdep_assert_held(&txq->lock);
 
-	if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(q, idx))) {
+	if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(txq, idx))) {
 		IWL_ERR(trans,
 			"%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
 			__func__, txq_id, idx, TFD_QUEUE_SIZE_MAX,
-			q->write_ptr, q->read_ptr);
+			txq->write_ptr, txq->read_ptr);
 		return;
 	}
 
-	for (idx = iwl_queue_inc_wrap(idx); q->read_ptr != idx;
-	     q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) {
+	for (idx = iwl_queue_inc_wrap(idx); txq->read_ptr != idx;
+	     txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) {
 
 		if (nfreed++ > 0) {
 			IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
-				idx, q->write_ptr, q->read_ptr);
+				idx, txq->write_ptr, txq->read_ptr);
 			iwl_force_nmi(trans);
 		}
 	}
 
-	if (q->read_ptr == q->write_ptr) {
+	if (txq->read_ptr == txq->write_ptr) {
 		spin_lock_irqsave(&trans_pcie->reg_lock, flags);
 		iwl_pcie_clear_cmd_in_flight(trans);
 		spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
@@ -1249,6 +1322,9 @@
 	if (test_and_set_bit(txq_id, trans_pcie->queue_used))
 		WARN_ONCE(1, "queue %d already used - expect issues", txq_id);
 
+	if (cfg && trans->cfg->use_tfh)
+		WARN_ONCE(1, "Expected no calls to SCD configuration");
+
 	txq->wd_timeout = msecs_to_jiffies(wdg_timeout);
 
 	if (cfg) {
@@ -1283,14 +1359,14 @@
 			 */
 			iwl_scd_txq_disable_agg(trans, txq_id);
 
-			ssn = txq->q.read_ptr;
+			ssn = txq->read_ptr;
 		}
 	}
 
 	/* Place first TFD at index corresponding to start sequence number.
 	 * Assumes that ssn_idx is valid (!= 0xFFF) */
-	txq->q.read_ptr = (ssn & 0xff);
-	txq->q.write_ptr = (ssn & 0xff);
+	txq->read_ptr = (ssn & 0xff);
+	txq->write_ptr = (ssn & 0xff);
 	iwl_write_direct32(trans, HBUS_TARG_WRPTR,
 			   (ssn & 0xff) | (txq_id << 8));
 
@@ -1343,6 +1419,14 @@
 	txq->ampdu = !shared_mode;
 }
 
+dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq)
+{
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+
+	return trans_pcie->scd_bc_tbls.dma +
+	       txq * sizeof(struct iwlagn_scd_bc_tbl);
+}
+
 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
 				bool configure_scd)
 {
@@ -1366,6 +1450,9 @@
 		return;
 	}
 
+	if (configure_scd && trans->cfg->use_tfh)
+		WARN_ONCE(1, "Expected no calls to SCD configuration");
+
 	if (configure_scd) {
 		iwl_scd_txq_set_inactive(trans, txq_id);
 
@@ -1395,7 +1482,6 @@
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
-	struct iwl_queue *q = &txq->q;
 	struct iwl_device_cmd *out_cmd;
 	struct iwl_cmd_meta *out_meta;
 	unsigned long flags;
@@ -1410,7 +1496,7 @@
 	const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
 	u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
 
-	if (WARN(!trans_pcie->wide_cmd_header &&
+	if (WARN(!trans->wide_cmd_header &&
 		 group_id > IWL_ALWAYS_LONG_GROUP,
 		 "unsupported wide command %#x\n", cmd->id))
 		return -EINVAL;
@@ -1494,7 +1580,7 @@
 
 	spin_lock_bh(&txq->lock);
 
-	if (iwl_queue_space(q) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
+	if (iwl_queue_space(txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
 		spin_unlock_bh(&txq->lock);
 
 		IWL_ERR(trans, "No space in command queue\n");
@@ -1503,7 +1589,7 @@
 		goto free_dup_buf;
 	}
 
-	idx = get_cmd_index(q, q->write_ptr);
+	idx = get_cmd_index(txq, txq->write_ptr);
 	out_cmd = txq->entries[idx].cmd;
 	out_meta = &txq->entries[idx].meta;
 
@@ -1522,7 +1608,7 @@
 		out_cmd->hdr_wide.reserved = 0;
 		out_cmd->hdr_wide.sequence =
 			cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
-						 INDEX_TO_SEQ(q->write_ptr));
+						 INDEX_TO_SEQ(txq->write_ptr));
 
 		cmd_pos = sizeof(struct iwl_cmd_header_wide);
 		copy_size = sizeof(struct iwl_cmd_header_wide);
@@ -1530,7 +1616,7 @@
 		out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id);
 		out_cmd->hdr.sequence =
 			cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
-						 INDEX_TO_SEQ(q->write_ptr));
+						 INDEX_TO_SEQ(txq->write_ptr));
 		out_cmd->hdr.group_id = 0;
 
 		cmd_pos = sizeof(struct iwl_cmd_header);
@@ -1580,7 +1666,7 @@
 		     iwl_get_cmd_string(trans, cmd->id),
 		     group_id, out_cmd->hdr.cmd,
 		     le16_to_cpu(out_cmd->hdr.sequence),
-		     cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue);
+		     cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue);
 
 	/* start the TFD with the minimum copy bytes */
 	tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
@@ -1596,8 +1682,8 @@
 					   copy_size - tb0_size,
 					   DMA_TO_DEVICE);
 		if (dma_mapping_error(trans->dev, phys_addr)) {
-			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr]);
+			iwl_pcie_tfd_unmap(trans, out_meta, txq,
+					   txq->write_ptr);
 			idx = -ENOMEM;
 			goto out;
 		}
@@ -1620,8 +1706,8 @@
 		phys_addr = dma_map_single(trans->dev, (void *)data,
 					   cmdlen[i], DMA_TO_DEVICE);
 		if (dma_mapping_error(trans->dev, phys_addr)) {
-			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr]);
+			iwl_pcie_tfd_unmap(trans, out_meta, txq,
+					   txq->write_ptr);
 			idx = -ENOMEM;
 			goto out;
 		}
@@ -1629,8 +1715,7 @@
 		iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false);
 	}
 
-	BUILD_BUG_ON(IWL_NUM_OF_TBS + CMD_TB_BITMAP_POS >
-		     sizeof(out_meta->flags) * BITS_PER_BYTE);
+	BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE);
 	out_meta->flags = cmd->flags;
 	if (WARN_ON_ONCE(txq->entries[idx].free_buf))
 		kzfree(txq->entries[idx].free_buf);
@@ -1639,7 +1724,7 @@
 	trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide);
 
 	/* start timer if queue currently empty */
-	if (q->read_ptr == q->write_ptr && txq->wd_timeout)
+	if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
 		mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
 
 	spin_lock_irqsave(&trans_pcie->reg_lock, flags);
@@ -1651,7 +1736,7 @@
 	}
 
 	/* Increment and update queue's write index */
-	q->write_ptr = iwl_queue_inc_wrap(q->write_ptr);
+	txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
 	iwl_pcie_txq_inc_wr_ptr(trans, txq);
 
 	spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
@@ -1689,20 +1774,20 @@
 	if (WARN(txq_id != trans_pcie->cmd_queue,
 		 "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n",
 		 txq_id, trans_pcie->cmd_queue, sequence,
-		 trans_pcie->txq[trans_pcie->cmd_queue].q.read_ptr,
-		 trans_pcie->txq[trans_pcie->cmd_queue].q.write_ptr)) {
+		 trans_pcie->txq[trans_pcie->cmd_queue].read_ptr,
+		 trans_pcie->txq[trans_pcie->cmd_queue].write_ptr)) {
 		iwl_print_hex_error(trans, pkt, 32);
 		return;
 	}
 
 	spin_lock_bh(&txq->lock);
 
-	cmd_index = get_cmd_index(&txq->q, index);
+	cmd_index = get_cmd_index(txq, index);
 	cmd = txq->entries[cmd_index].cmd;
 	meta = &txq->entries[cmd_index].meta;
 	cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0);
 
-	iwl_pcie_tfd_unmap(trans, meta, &txq->tfds[index]);
+	iwl_pcie_tfd_unmap(trans, meta, txq, index);
 
 	/* Input error checking is done when commands are added to queue. */
 	if (meta->flags & CMD_WANT_SKB) {
@@ -1815,14 +1900,13 @@
 				 HOST_COMPLETE_TIMEOUT);
 	if (!ret) {
 		struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
-		struct iwl_queue *q = &txq->q;
 
 		IWL_ERR(trans, "Error sending %s: time out after %dms.\n",
 			iwl_get_cmd_string(trans, cmd->id),
 			jiffies_to_msecs(HOST_COMPLETE_TIMEOUT));
 
 		IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n",
-			q->read_ptr, q->write_ptr);
+			txq->read_ptr, txq->write_ptr);
 
 		clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
 		IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
@@ -1900,7 +1984,7 @@
 			     struct iwl_cmd_meta *out_meta,
 			     struct iwl_device_cmd *dev_cmd, u16 tb1_len)
 {
-	struct iwl_queue *q = &txq->q;
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u16 tb2_len;
 	int i;
 
@@ -1915,8 +1999,8 @@
 						     skb->data + hdr_len,
 						     tb2_len, DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) {
-			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr]);
+			iwl_pcie_tfd_unmap(trans, out_meta, txq,
+					   txq->write_ptr);
 			return -EINVAL;
 		}
 		iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false);
@@ -1935,19 +2019,19 @@
 					   skb_frag_size(frag), DMA_TO_DEVICE);
 
 		if (unlikely(dma_mapping_error(trans->dev, tb_phys))) {
-			iwl_pcie_tfd_unmap(trans, out_meta,
-					   &txq->tfds[q->write_ptr]);
+			iwl_pcie_tfd_unmap(trans, out_meta, txq,
+					   txq->write_ptr);
 			return -EINVAL;
 		}
 		tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
 						skb_frag_size(frag), false);
 
-		out_meta->flags |= BIT(tb_idx + CMD_TB_BITMAP_POS);
+		out_meta->tbs |= BIT(tb_idx);
 	}
 
 	trace_iwlwifi_dev_tx(trans->dev, skb,
-			     &txq->tfds[txq->q.write_ptr],
-			     sizeof(struct iwl_tfd),
+			     iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+			     trans_pcie->tfd_size,
 			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
 			     skb->data + hdr_len, tb2_len);
 	trace_iwlwifi_dev_tx_data(trans->dev, skb,
@@ -2008,7 +2092,6 @@
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
-	struct iwl_queue *q = &txq->q;
 	u16 length, iv_len, amsdu_pad;
 	u8 *start_hdr;
 	struct iwl_tso_hdr_page *hdr_page;
@@ -2022,8 +2105,8 @@
 		IEEE80211_CCMP_HDR_LEN : 0;
 
 	trace_iwlwifi_dev_tx(trans->dev, skb,
-			     &txq->tfds[txq->q.write_ptr],
-			     sizeof(struct iwl_tfd),
+			     iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr),
+			     trans_pcie->tfd_size,
 			     &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
 			     NULL, 0);
 
@@ -2179,7 +2262,7 @@
 	return 0;
 
 out_unmap:
-	iwl_pcie_tfd_unmap(trans, out_meta, &txq->tfds[q->write_ptr]);
+	iwl_pcie_tfd_unmap(trans, out_meta, txq, txq->write_ptr);
 	return ret;
 }
 #else /* CONFIG_INET */
@@ -2203,9 +2286,9 @@
 	struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload;
 	struct iwl_cmd_meta *out_meta;
 	struct iwl_txq *txq;
-	struct iwl_queue *q;
 	dma_addr_t tb0_phys, tb1_phys, scratch_phys;
 	void *tb1_addr;
+	void *tfd;
 	u16 len, tb1_len;
 	bool wait_write_ptr;
 	__le16 fc;
@@ -2214,7 +2297,6 @@
 	bool amsdu;
 
 	txq = &trans_pcie->txq[txq_id];
-	q = &txq->q;
 
 	if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
 		      "TX on unused queue %d\n", txq_id))
@@ -2236,7 +2318,7 @@
 	}
 
 	if (skb_is_nonlinear(skb) &&
-	    skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS &&
+	    skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) &&
 	    __skb_linearize(skb))
 		return -ENOMEM;
 
@@ -2249,11 +2331,11 @@
 
 	spin_lock(&txq->lock);
 
-	if (iwl_queue_space(q) < q->high_mark) {
+	if (iwl_queue_space(txq) < txq->high_mark) {
 		iwl_stop_queue(trans, txq);
 
 		/* don't put the packet on the ring, if there is no room */
-		if (unlikely(iwl_queue_space(q) < 3)) {
+		if (unlikely(iwl_queue_space(txq) < 3)) {
 			struct iwl_device_cmd **dev_cmd_ptr;
 
 			dev_cmd_ptr = (void *)((u8 *)skb->cb +
@@ -2274,19 +2356,19 @@
 	 */
 	wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
 	WARN_ONCE(txq->ampdu &&
-		  (wifi_seq & 0xff) != q->write_ptr,
+		  (wifi_seq & 0xff) != txq->write_ptr,
 		  "Q: %d WiFi Seq %d tfdNum %d",
-		  txq_id, wifi_seq, q->write_ptr);
+		  txq_id, wifi_seq, txq->write_ptr);
 
 	/* Set up driver data for this TFD */
-	txq->entries[q->write_ptr].skb = skb;
-	txq->entries[q->write_ptr].cmd = dev_cmd;
+	txq->entries[txq->write_ptr].skb = skb;
+	txq->entries[txq->write_ptr].cmd = dev_cmd;
 
 	dev_cmd->hdr.sequence =
 		cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
-			    INDEX_TO_SEQ(q->write_ptr)));
+			    INDEX_TO_SEQ(txq->write_ptr)));
 
-	tb0_phys = iwl_pcie_get_first_tb_dma(txq, q->write_ptr);
+	tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr);
 	scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
 		       offsetof(struct iwl_tx_cmd, scratch);
 
@@ -2294,7 +2376,7 @@
 	tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
 
 	/* Set up first empty entry in queue's array of Tx/cmd buffers */
-	out_meta = &txq->entries[q->write_ptr].meta;
+	out_meta = &txq->entries[txq->write_ptr].meta;
 	out_meta->flags = 0;
 
 	/*
@@ -2319,7 +2401,7 @@
 	}
 
 	/* The first TB points to bi-directional DMA data */
-	memcpy(&txq->first_tb_bufs[q->write_ptr], &dev_cmd->hdr,
+	memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr,
 	       IWL_FIRST_TB_SIZE);
 	iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
 			       IWL_FIRST_TB_SIZE, true);
@@ -2344,13 +2426,15 @@
 		goto out_err;
 	}
 
+	tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr);
 	/* Set up entry for this TFD in Tx byte-count array */
-	iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len));
+	iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len),
+					 iwl_pcie_tfd_get_num_tbs(trans, tfd));
 
 	wait_write_ptr = ieee80211_has_morefrags(fc);
 
 	/* start timer if queue currently empty */
-	if (q->read_ptr == q->write_ptr) {
+	if (txq->read_ptr == txq->write_ptr) {
 		if (txq->wd_timeout) {
 			/*
 			 * If the TXQ is active, then set the timer, if not,
@@ -2364,12 +2448,12 @@
 			else
 				txq->frozen_expiry_remainder = txq->wd_timeout;
 		}
-		IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", q->id);
+		IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", txq->id);
 		iwl_trans_ref(trans);
 	}
 
 	/* Tell device the write index *just past* this latest filled TFD */
-	q->write_ptr = iwl_queue_inc_wrap(q->write_ptr);
+	txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr);
 	if (!wait_write_ptr)
 		iwl_pcie_txq_inc_wr_ptr(trans, txq);
 
diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
index 3e5fa78..a5656bc 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c
@@ -3041,13 +3041,9 @@
 	    p->length > 1024 || !p->pointer)
 		return -EINVAL;
 
-	param = kmalloc(p->length, GFP_KERNEL);
-	if (param == NULL)
-		return -ENOMEM;
-
-	if (copy_from_user(param, p->pointer, p->length)) {
-		ret = -EFAULT;
-		goto out;
+	param = memdup_user(p->pointer, p->length);
+	if (IS_ERR(param)) {
+		return PTR_ERR(param);
 	}
 
 	if (p->length < sizeof(struct prism2_download_param) +
@@ -3803,13 +3799,9 @@
 	    p->length > PRISM2_HOSTAPD_MAX_BUF_SIZE || !p->pointer)
 		return -EINVAL;
 
-	param = kmalloc(p->length, GFP_KERNEL);
-	if (param == NULL)
-		return -ENOMEM;
-
-	if (copy_from_user(param, p->pointer, p->length)) {
-		ret = -EFAULT;
-		goto out;
+	param = memdup_user(p->pointer, p->length);
+	if (IS_ERR(param)) {
+		return PTR_ERR(param);
 	}
 
 	switch (param->cmd) {
diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
index 56f109b..bca6935 100644
--- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
+++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c
@@ -1613,10 +1613,8 @@
 			}
 
 			upriv->read_urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (!upriv->read_urb) {
-				err("No free urbs available");
+			if (!upriv->read_urb)
 				goto error;
-			}
 			if (le16_to_cpu(ep->wMaxPacketSize) != 64)
 				pr_warn("bulk in: wMaxPacketSize!= 64\n");
 			if (ep->bEndpointAddress != (2 | USB_DIR_IN))
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 8c35ac8..431f13b 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -487,7 +487,7 @@
 };
 
 static spinlock_t hwsim_radio_lock;
-static struct list_head hwsim_radios;
+static LIST_HEAD(hwsim_radios);
 static int hwsim_radio_idx;
 
 static struct platform_driver mac80211_hwsim_driver = {
@@ -3376,7 +3376,6 @@
 		mac80211_hwsim_unassign_vif_chanctx;
 
 	spin_lock_init(&hwsim_radio_lock);
-	INIT_LIST_HEAD(&hwsim_radios);
 
 	err = register_pernet_device(&hwsim_net_ops);
 	if (err)
diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
index 799a2ef..e0ade40d 100644
--- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c
+++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
@@ -198,22 +198,16 @@
 	}
 
 	cardp->rx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!cardp->rx_urb) {
-		lbtf_deb_usbd(&udev->dev, "Rx URB allocation failed\n");
+	if (!cardp->rx_urb)
 		goto dealloc;
-	}
 
 	cardp->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!cardp->tx_urb) {
-		lbtf_deb_usbd(&udev->dev, "Tx URB allocation failed\n");
+	if (!cardp->tx_urb)
 		goto dealloc;
-	}
 
 	cardp->cmd_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!cardp->cmd_urb) {
-		lbtf_deb_usbd(&udev->dev, "Cmd URB allocation failed\n");
+	if (!cardp->cmd_urb)
 		goto dealloc;
-	}
 
 	cardp->ep_out_buf = kmalloc(MRVDRV_ETH_TX_PACKET_BUFFER_SIZE,
 				    GFP_KERNEL);
diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c
index 81c60d0..43dccd5 100644
--- a/drivers/net/wireless/marvell/mwifiex/11h.c
+++ b/drivers/net/wireless/marvell/mwifiex/11h.c
@@ -260,22 +260,17 @@
 
 	rdr_event = (void *)(skb->data + sizeof(u32));
 
-	if (le32_to_cpu(rdr_event->passed)) {
-		mwifiex_dbg(priv->adapter, MSG,
-			    "radar detected; indicating kernel\n");
-		if (mwifiex_stop_radar_detection(priv, &priv->dfs_chandef))
-			mwifiex_dbg(priv->adapter, ERROR,
-				    "Failed to stop CAC in FW\n");
-		cfg80211_radar_event(priv->adapter->wiphy, &priv->dfs_chandef,
-				     GFP_KERNEL);
-		mwifiex_dbg(priv->adapter, MSG, "regdomain: %d\n",
-			    rdr_event->reg_domain);
-		mwifiex_dbg(priv->adapter, MSG, "radar detection type: %d\n",
-			    rdr_event->det_type);
-	} else {
-		mwifiex_dbg(priv->adapter, MSG,
-			    "false radar detection event!\n");
-	}
+	mwifiex_dbg(priv->adapter, MSG,
+		    "radar detected; indicating kernel\n");
+	if (mwifiex_stop_radar_detection(priv, &priv->dfs_chandef))
+		mwifiex_dbg(priv->adapter, ERROR,
+			    "Failed to stop CAC in FW\n");
+	cfg80211_radar_event(priv->adapter->wiphy, &priv->dfs_chandef,
+			     GFP_KERNEL);
+	mwifiex_dbg(priv->adapter, MSG, "regdomain: %d\n",
+		    rdr_event->reg_domain);
+	mwifiex_dbg(priv->adapter, MSG, "radar detection type: %d\n",
+		    rdr_event->det_type);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/marvell/mwifiex/11n.h b/drivers/net/wireless/marvell/mwifiex/11n.h
index afdd58a..ea0fa68 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.h
+++ b/drivers/net/wireless/marvell/mwifiex/11n.h
@@ -171,9 +171,10 @@
 static inline int mwifiex_is_sta_11n_enabled(struct mwifiex_private *priv,
 					     struct mwifiex_sta_node *node)
 {
-
-	if (!node || (priv->bss_role != MWIFIEX_BSS_ROLE_UAP) ||
-	    !priv->ap_11n_enabled)
+	if (!node || ((priv->bss_role == MWIFIEX_BSS_ROLE_UAP) &&
+		      !priv->ap_11n_enabled) ||
+	    ((priv->bss_mode == NL80211_IFTYPE_ADHOC) &&
+	     !priv->adapter->adhoc_11n_enabled))
 		return 0;
 
 	return node->is_11n_enabled;
diff --git a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
index dc49c3d..c47d636 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_aggr.c
@@ -205,7 +205,8 @@
 
 	do {
 		/* Check if AMSDU can accommodate this MSDU */
-		if (skb_tailroom(skb_aggr) < (skb_src->len + LLC_SNAP_LEN))
+		if ((skb_aggr->len + skb_src->len + LLC_SNAP_LEN) >
+		    adapter->tx_buf_size)
 			break;
 
 		skb_src = skb_dequeue(&pra_list->skb_head);
diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
index a74cc43..9448012 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
@@ -78,8 +78,15 @@
  */
 static int mwifiex_11n_dispatch_pkt(struct mwifiex_private *priv, void *payload)
 {
-	int ret = mwifiex_11n_dispatch_amsdu_pkt(priv, payload);
 
+	int ret;
+
+	if (!payload) {
+		mwifiex_dbg(priv->adapter, INFO, "info: fw drop data\n");
+		return 0;
+	}
+
+	ret = mwifiex_11n_dispatch_amsdu_pkt(priv, payload);
 	if (!ret)
 		return 0;
 
@@ -921,3 +928,72 @@
 	else
 		mwifiex_update_ampdu_rxwinsize(adapter, false);
 }
+
+/* This function handles rxba_sync event
+ */
+void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
+				 u8 *event_buf, u16 len)
+{
+	struct mwifiex_ie_types_rxba_sync *tlv_rxba = (void *)event_buf;
+	u16 tlv_type, tlv_len;
+	struct mwifiex_rx_reorder_tbl *rx_reor_tbl_ptr;
+	u8 i, j;
+	u16 seq_num, tlv_seq_num, tlv_bitmap_len;
+	int tlv_buf_left = len;
+	int ret;
+	u8 *tmp;
+
+	mwifiex_dbg_dump(priv->adapter, EVT_D, "RXBA_SYNC event:",
+			 event_buf, len);
+	while (tlv_buf_left >= sizeof(*tlv_rxba)) {
+		tlv_type = le16_to_cpu(tlv_rxba->header.type);
+		tlv_len  = le16_to_cpu(tlv_rxba->header.len);
+		if (tlv_type != TLV_TYPE_RXBA_SYNC) {
+			mwifiex_dbg(priv->adapter, ERROR,
+				    "Wrong TLV id=0x%x\n", tlv_type);
+			return;
+		}
+
+		tlv_seq_num = le16_to_cpu(tlv_rxba->seq_num);
+		tlv_bitmap_len = le16_to_cpu(tlv_rxba->bitmap_len);
+		mwifiex_dbg(priv->adapter, INFO,
+			    "%pM tid=%d seq_num=%d bitmap_len=%d\n",
+			    tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num,
+			    tlv_bitmap_len);
+
+		rx_reor_tbl_ptr =
+			mwifiex_11n_get_rx_reorder_tbl(priv, tlv_rxba->tid,
+						       tlv_rxba->mac);
+		if (!rx_reor_tbl_ptr) {
+			mwifiex_dbg(priv->adapter, ERROR,
+				    "Can not find rx_reorder_tbl!");
+			return;
+		}
+
+		for (i = 0; i < tlv_bitmap_len; i++) {
+			for (j = 0 ; j < 8; j++) {
+				if (tlv_rxba->bitmap[i] & (1 << j)) {
+					seq_num = (MAX_TID_VALUE - 1) &
+						(tlv_seq_num + i * 8 + j);
+
+					mwifiex_dbg(priv->adapter, ERROR,
+						    "drop packet,seq=%d\n",
+						    seq_num);
+
+					ret = mwifiex_11n_rx_reorder_pkt
+					(priv, seq_num, tlv_rxba->tid,
+					 tlv_rxba->mac, 0, NULL);
+
+					if (ret)
+						mwifiex_dbg(priv->adapter,
+							    ERROR,
+							    "Fail to drop packet");
+				}
+			}
+		}
+
+		tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len);
+		tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba);
+		tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp;
+	}
+}
diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h
index 63ecea8..22d991f 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h
+++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h
@@ -81,5 +81,6 @@
 mwifiex_11n_get_rx_reorder_tbl(struct mwifiex_private *priv, int tid, u8 *ta);
 void mwifiex_11n_del_rx_reorder_tbl_by_ta(struct mwifiex_private *priv, u8 *ta);
 void mwifiex_update_rxreor_flags(struct mwifiex_adapter *adapter, u8 flags);
-
+void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv,
+				 u8 *event_buf, u16 len);
 #endif /* _MWIFIEX_11N_RXREORDER_H_ */
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index a8ff969..39ce76a 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -484,6 +484,29 @@
 }
 
 /*
+ * CFG802.11 operation handler to set default mgmt key.
+ */
+static int
+mwifiex_cfg80211_set_default_mgmt_key(struct wiphy *wiphy,
+				      struct net_device *netdev,
+				      u8 key_index)
+{
+	struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev);
+	struct mwifiex_ds_encrypt_key encrypt_key;
+
+	wiphy_dbg(wiphy, "set default mgmt key, key index=%d\n", key_index);
+
+	memset(&encrypt_key, 0, sizeof(struct mwifiex_ds_encrypt_key));
+	encrypt_key.key_len = WLAN_KEY_LEN_CCMP;
+	encrypt_key.key_index = key_index;
+	encrypt_key.is_igtk_def_key = true;
+	eth_broadcast_addr(encrypt_key.mac_addr);
+
+	return mwifiex_send_cmd(priv, HostCmd_CMD_802_11_KEY_MATERIAL,
+				HostCmd_ACT_GEN_SET, true, &encrypt_key, true);
+}
+
+/*
  * This function sends domain information to the firmware.
  *
  * The following information are passed to the firmware -
@@ -2012,10 +2035,6 @@
 	if (mwifiex_deauthenticate(priv, NULL))
 		return -EFAULT;
 
-	mwifiex_dbg(priv->adapter, MSG,
-		    "info: successfully disconnected from %pM:\t"
-		    "reason code %d\n", priv->cfg_bssid, reason_code);
-
 	eth_zero_addr(priv->cfg_bssid);
 	priv->hs2_enabled = false;
 
@@ -2485,6 +2504,16 @@
 
 	priv->scan_request = request;
 
+	if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+		ether_addr_copy(priv->random_mac, request->mac_addr);
+		for (i = 0; i < ETH_ALEN; i++) {
+			priv->random_mac[i] &= request->mac_addr_mask[i];
+			priv->random_mac[i] |= get_random_int() &
+					       ~(request->mac_addr_mask[i]);
+		}
+	}
+
+	ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac);
 	user_scan_cfg->num_ssids = request->n_ssids;
 	user_scan_cfg->ssid_list = request->ssids;
 
@@ -2726,7 +2755,7 @@
 		ht_info->cap &= ~IEEE80211_HT_CAP_SGI_40;
 
 	if (adapter->user_dev_mcs_support == HT_STREAM_2X2)
-		ht_info->cap |= 3 << IEEE80211_HT_CAP_RX_STBC_SHIFT;
+		ht_info->cap |= 2 << IEEE80211_HT_CAP_RX_STBC_SHIFT;
 	else
 		ht_info->cap |= 1 << IEEE80211_HT_CAP_RX_STBC_SHIFT;
 
@@ -3913,6 +3942,88 @@
 	return ret;
 }
 
+#ifdef CONFIG_NL80211_TESTMODE
+
+enum mwifiex_tm_attr {
+	__MWIFIEX_TM_ATTR_INVALID	= 0,
+	MWIFIEX_TM_ATTR_CMD		= 1,
+	MWIFIEX_TM_ATTR_DATA		= 2,
+
+	/* keep last */
+	__MWIFIEX_TM_ATTR_AFTER_LAST,
+	MWIFIEX_TM_ATTR_MAX		= __MWIFIEX_TM_ATTR_AFTER_LAST - 1,
+};
+
+static const struct nla_policy mwifiex_tm_policy[MWIFIEX_TM_ATTR_MAX + 1] = {
+	[MWIFIEX_TM_ATTR_CMD]		= { .type = NLA_U32 },
+	[MWIFIEX_TM_ATTR_DATA]		= { .type = NLA_BINARY,
+					    .len = MWIFIEX_SIZE_OF_CMD_BUFFER },
+};
+
+enum mwifiex_tm_command {
+	MWIFIEX_TM_CMD_HOSTCMD	= 0,
+};
+
+static int mwifiex_tm_cmd(struct wiphy *wiphy, struct wireless_dev *wdev,
+			  void *data, int len)
+{
+	struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev);
+	struct mwifiex_ds_misc_cmd *hostcmd;
+	struct nlattr *tb[MWIFIEX_TM_ATTR_MAX + 1];
+	struct mwifiex_adapter *adapter;
+	struct sk_buff *skb;
+	int err;
+
+	if (!priv)
+		return -EINVAL;
+	adapter = priv->adapter;
+
+	err = nla_parse(tb, MWIFIEX_TM_ATTR_MAX, data, len,
+			mwifiex_tm_policy);
+	if (err)
+		return err;
+
+	if (!tb[MWIFIEX_TM_ATTR_CMD])
+		return -EINVAL;
+
+	switch (nla_get_u32(tb[MWIFIEX_TM_ATTR_CMD])) {
+	case MWIFIEX_TM_CMD_HOSTCMD:
+		if (!tb[MWIFIEX_TM_ATTR_DATA])
+			return -EINVAL;
+
+		hostcmd = kzalloc(sizeof(*hostcmd), GFP_KERNEL);
+		if (!hostcmd)
+			return -ENOMEM;
+
+		hostcmd->len = nla_len(tb[MWIFIEX_TM_ATTR_DATA]);
+		memcpy(hostcmd->cmd, nla_data(tb[MWIFIEX_TM_ATTR_DATA]),
+		       hostcmd->len);
+
+		if (mwifiex_send_cmd(priv, 0, 0, 0, hostcmd, true)) {
+			dev_err(priv->adapter->dev, "Failed to process hostcmd\n");
+			return -EFAULT;
+		}
+
+		/* process hostcmd response*/
+		skb = cfg80211_testmode_alloc_reply_skb(wiphy, hostcmd->len);
+		if (!skb)
+			return -ENOMEM;
+		err = nla_put(skb, MWIFIEX_TM_ATTR_DATA,
+			      hostcmd->len, hostcmd->cmd);
+		if (err) {
+			kfree_skb(skb);
+			return -EMSGSIZE;
+		}
+
+		err = cfg80211_testmode_reply(skb);
+		kfree(hostcmd);
+		return err;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+#endif
+
 static int
 mwifiex_cfg80211_start_radar_detection(struct wiphy *wiphy,
 				       struct net_device *dev,
@@ -3994,6 +4105,7 @@
 	.leave_ibss = mwifiex_cfg80211_leave_ibss,
 	.add_key = mwifiex_cfg80211_add_key,
 	.del_key = mwifiex_cfg80211_del_key,
+	.set_default_mgmt_key = mwifiex_cfg80211_set_default_mgmt_key,
 	.mgmt_tx = mwifiex_cfg80211_mgmt_tx,
 	.mgmt_frame_register = mwifiex_cfg80211_mgmt_frame_register,
 	.remain_on_channel = mwifiex_cfg80211_remain_on_channel,
@@ -4025,6 +4137,7 @@
 	.tdls_cancel_channel_switch = mwifiex_cfg80211_tdls_cancel_chan_switch,
 	.add_station = mwifiex_cfg80211_add_station,
 	.change_station = mwifiex_cfg80211_change_station,
+	CFG80211_TESTMODE_CMD(mwifiex_tm_cmd)
 	.get_channel = mwifiex_cfg80211_get_channel,
 	.start_radar_detection = mwifiex_cfg80211_start_radar_detection,
 	.channel_switch = mwifiex_cfg80211_channel_switch,
@@ -4135,9 +4248,12 @@
 	wiphy->cipher_suites = mwifiex_cipher_suites;
 	wiphy->n_cipher_suites = ARRAY_SIZE(mwifiex_cipher_suites);
 
-	if (adapter->region_code)
-		wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS |
+	if (adapter->regd) {
+		wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG |
+					   REGULATORY_DISABLE_BEACON_HINTS |
 					   REGULATORY_COUNTRY_IE_IGNORE;
+		wiphy_apply_custom_regulatory(wiphy, adapter->regd);
+	}
 
 	ether_addr_copy(wiphy->perm_addr, adapter->perm_addr);
 	wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
@@ -4173,7 +4289,10 @@
 	wiphy->features |= NL80211_FEATURE_HT_IBSS |
 			   NL80211_FEATURE_INACTIVITY_TIMER |
 			   NL80211_FEATURE_LOW_PRIORITY_SCAN |
-			   NL80211_FEATURE_NEED_OBSS_SCAN;
+			   NL80211_FEATURE_NEED_OBSS_SCAN |
+			   NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR |
+			   NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR |
+			   NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
 
 	if (ISSUPP_TDLS_ENABLED(adapter->fw_cap_info))
 		wiphy->features |= NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
@@ -4200,19 +4319,27 @@
 		return ret;
 	}
 
-	if (reg_alpha2 && mwifiex_is_valid_alpha2(reg_alpha2)) {
-		mwifiex_dbg(adapter, INFO,
-			    "driver hint alpha2: %2.2s\n", reg_alpha2);
-		regulatory_hint(wiphy, reg_alpha2);
-	} else {
-		if (adapter->region_code == 0x00) {
-			mwifiex_dbg(adapter, WARN, "Ignore world regulatory domain\n");
+	if (!adapter->regd) {
+		if (reg_alpha2 && mwifiex_is_valid_alpha2(reg_alpha2)) {
+			mwifiex_dbg(adapter, INFO,
+				    "driver hint alpha2: %2.2s\n", reg_alpha2);
+			regulatory_hint(wiphy, reg_alpha2);
 		} else {
-			country_code =
-				mwifiex_11d_code_2_region(adapter->region_code);
-			if (country_code &&
-			    regulatory_hint(wiphy, country_code))
-				mwifiex_dbg(priv->adapter, ERROR, "regulatory_hint() failed\n");
+			if (adapter->region_code == 0x00) {
+				mwifiex_dbg(adapter, WARN,
+					    "Ignore world regulatory domain\n");
+			} else {
+				wiphy->regulatory_flags |=
+					REGULATORY_DISABLE_BEACON_HINTS |
+					REGULATORY_COUNTRY_IE_IGNORE;
+				country_code =
+					mwifiex_11d_code_2_region(
+						adapter->region_code);
+				if (country_code &&
+				    regulatory_hint(wiphy, country_code))
+					mwifiex_dbg(priv->adapter, ERROR,
+						    "regulatory_hint() failed\n");
+			}
 		}
 	}
 
diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index c29f26d..5347728 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -480,13 +480,27 @@
  */
 int mwifiex_process_event(struct mwifiex_adapter *adapter)
 {
-	int ret;
+	int ret, i;
 	struct mwifiex_private *priv =
 		mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY);
 	struct sk_buff *skb = adapter->event_skb;
-	u32 eventcause = adapter->event_cause;
+	u32 eventcause;
 	struct mwifiex_rxinfo *rx_info;
 
+	if ((adapter->event_cause & EVENT_ID_MASK) == EVENT_RADAR_DETECTED) {
+		for (i = 0; i < adapter->priv_num; i++) {
+			priv = adapter->priv[i];
+			if (priv && mwifiex_is_11h_active(priv)) {
+				adapter->event_cause |=
+					((priv->bss_num & 0xff) << 16) |
+					((priv->bss_type & 0xff) << 24);
+				break;
+			}
+		}
+	}
+
+	eventcause = adapter->event_cause;
+
 	/* Save the last event to debug log */
 	adapter->dbg.last_event_index =
 			(adapter->dbg.last_event_index + 1) % DBG_CMD_NUM;
@@ -581,6 +595,14 @@
 			return -1;
 		}
 	}
+	/* We don't expect commands in manufacturing mode. They are cooked
+	 * in application and ready to download buffer is passed to the driver
+	 */
+	if (adapter->mfg_mode && cmd_no) {
+		dev_dbg(adapter->dev, "Ignoring commands in manufacturing mode\n");
+		return -1;
+	}
+
 
 	/* Get a new command node */
 	cmd_node = mwifiex_get_cmd_node(adapter);
diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c
index bccf17a..b9284b5 100644
--- a/drivers/net/wireless/marvell/mwifiex/debugfs.c
+++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c
@@ -118,6 +118,8 @@
 		p += sprintf(p, "bssid=\"%pM\"\n", info.bssid);
 		p += sprintf(p, "channel=\"%d\"\n", (int) info.bss_chan);
 		p += sprintf(p, "country_code = \"%s\"\n", info.country_code);
+		p += sprintf(p, "region_code=\"0x%x\"\n",
+			     priv->adapter->region_code);
 
 		netdev_for_each_mc_addr(ha, netdev)
 			p += sprintf(p, "multicast_address[%d]=\"%pM\"\n",
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index 5596b6b..4b1894b 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -78,6 +78,7 @@
 	KEY_TYPE_ID_AES,
 	KEY_TYPE_ID_WAPI,
 	KEY_TYPE_ID_AES_CMAC,
+	KEY_TYPE_ID_AES_CMAC_DEF,
 };
 
 #define WPA_PN_SIZE		8
@@ -176,6 +177,7 @@
 #define TLV_TYPE_PWK_CIPHER         (PROPRIETARY_TLV_BASE_ID + 145)
 #define TLV_TYPE_GWK_CIPHER         (PROPRIETARY_TLV_BASE_ID + 146)
 #define TLV_TYPE_TX_PAUSE           (PROPRIETARY_TLV_BASE_ID + 148)
+#define TLV_TYPE_RXBA_SYNC          (PROPRIETARY_TLV_BASE_ID + 153)
 #define TLV_TYPE_COALESCE_RULE      (PROPRIETARY_TLV_BASE_ID + 154)
 #define TLV_TYPE_KEY_PARAM_V2       (PROPRIETARY_TLV_BASE_ID + 156)
 #define TLV_TYPE_REPEAT_COUNT       (PROPRIETARY_TLV_BASE_ID + 176)
@@ -188,6 +190,8 @@
 #define TLV_BTCOEX_WL_AGGR_WINSIZE  (PROPRIETARY_TLV_BASE_ID + 202)
 #define TLV_BTCOEX_WL_SCANTIME      (PROPRIETARY_TLV_BASE_ID + 203)
 #define TLV_TYPE_BSS_MODE           (PROPRIETARY_TLV_BASE_ID + 206)
+#define TLV_TYPE_RANDOM_MAC         (PROPRIETARY_TLV_BASE_ID + 236)
+#define TLV_TYPE_CHAN_ATTR_CFG      (PROPRIETARY_TLV_BASE_ID + 237)
 
 #define MWIFIEX_TX_DATA_BUF_SIZE_2K        2048
 
@@ -208,6 +212,7 @@
 
 #define MWIFIEX_TX_DATA_BUF_SIZE_4K        4096
 #define MWIFIEX_TX_DATA_BUF_SIZE_8K        8192
+#define MWIFIEX_TX_DATA_BUF_SIZE_12K       12288
 
 #define ISSUPP_11NENABLED(FwCapInfo) (FwCapInfo & BIT(11))
 #define ISSUPP_TDLS_ENABLED(FwCapInfo) (FwCapInfo & BIT(14))
@@ -379,6 +384,7 @@
 #define HostCmd_CMD_MC_POLICY                         0x0121
 #define HostCmd_CMD_TDLS_OPER                         0x0122
 #define HostCmd_CMD_SDIO_SP_RX_AGGR_CFG               0x0223
+#define HostCmd_CMD_CHAN_REGION_CFG		      0x0242
 
 #define PROTOCOL_NO_SECURITY        0x01
 #define PROTOCOL_STATIC_WEP         0x02
@@ -411,6 +417,14 @@
 	P2P_MODE_CLIENT = 3,
 };
 
+enum mwifiex_channel_flags {
+	MWIFIEX_CHANNEL_PASSIVE = BIT(0),
+	MWIFIEX_CHANNEL_DFS = BIT(1),
+	MWIFIEX_CHANNEL_NOHT40 = BIT(2),
+	MWIFIEX_CHANNEL_NOHT80 = BIT(3),
+	MWIFIEX_CHANNEL_DISABLED = BIT(7),
+};
+
 #define HostCmd_RET_BIT                       0x8000
 #define HostCmd_ACT_GEN_GET                   0x0000
 #define HostCmd_ACT_GEN_SET                   0x0001
@@ -504,6 +518,8 @@
 #define EVENT_RSSI_HIGH                 0x0000001c
 #define EVENT_SNR_HIGH                  0x0000001d
 #define EVENT_IBSS_COALESCED            0x0000001e
+#define EVENT_IBSS_STA_CONNECT          0x00000020
+#define EVENT_IBSS_STA_DISCONNECT       0x00000021
 #define EVENT_DATA_RSSI_LOW             0x00000024
 #define EVENT_DATA_SNR_LOW              0x00000025
 #define EVENT_DATA_RSSI_HIGH            0x00000026
@@ -531,6 +547,7 @@
 #define EVENT_CHANNEL_REPORT_RDY        0x00000054
 #define EVENT_TX_DATA_PAUSE             0x00000055
 #define EVENT_EXT_SCAN_REPORT           0x00000058
+#define EVENT_RXBA_SYNC                 0x00000059
 #define EVENT_BG_SCAN_STOPPED           0x00000065
 #define EVENT_REMAIN_ON_CHAN_EXPIRED    0x0000005f
 #define EVENT_MULTI_CHAN_INFO           0x0000006a
@@ -734,6 +751,16 @@
 	struct mwifiex_chan_scan_param_set chan_scan_param[1];
 } __packed;
 
+struct mwifiex_ie_types_rxba_sync {
+	struct mwifiex_ie_types_header header;
+	u8 mac[ETH_ALEN];
+	u8 tid;
+	u8 reserved;
+	__le16 seq_num;
+	__le16 bitmap_len;
+	u8 bitmap[1];
+} __packed;
+
 struct chan_band_param_set {
 	u8 radio_type;
 	u8 chan_number;
@@ -780,6 +807,11 @@
 	__le16 chan_gap;
 } __packed;
 
+struct mwifiex_ie_types_random_mac {
+	struct mwifiex_ie_types_header header;
+	u8 mac[ETH_ALEN];
+} __packed;
+
 struct mwifiex_ietypes_chanstats {
 	struct mwifiex_ie_types_header header;
 	struct mwifiex_fw_chan_stats chanstats[0];
@@ -1464,6 +1496,7 @@
 	/* Variable number (fixed maximum) of channels to scan up */
 	struct mwifiex_user_scan_chan chan_list[MWIFIEX_USER_SCAN_CHAN_MAX];
 	u16 scan_chan_gap;
+	u8 random_mac[ETH_ALEN];
 } __packed;
 
 #define MWIFIEX_BG_SCAN_CHAN_MAX 38
@@ -1646,7 +1679,7 @@
 };
 
 struct host_cmd_ds_sta_list {
-	u16 sta_count;
+	__le16 sta_count;
 	u8 tlv[0];
 } __packed;
 
@@ -1667,6 +1700,12 @@
 	u8 wmm_ie[1];
 };
 
+struct mwifiex_ie_types_mgmt_frame {
+	struct mwifiex_ie_types_header header;
+	__le16 frame_control;
+	u8 frame_contents[0];
+};
+
 struct mwifiex_ie_types_wmm_queue_status {
 	struct mwifiex_ie_types_header header;
 	u8 queue_index;
@@ -2034,26 +2073,26 @@
 
 struct host_cmd_ds_pcie_details {
 	/* TX buffer descriptor ring address */
-	u32 txbd_addr_lo;
-	u32 txbd_addr_hi;
+	__le32 txbd_addr_lo;
+	__le32 txbd_addr_hi;
 	/* TX buffer descriptor ring count */
-	u32 txbd_count;
+	__le32 txbd_count;
 
 	/* RX buffer descriptor ring address */
-	u32 rxbd_addr_lo;
-	u32 rxbd_addr_hi;
+	__le32 rxbd_addr_lo;
+	__le32 rxbd_addr_hi;
 	/* RX buffer descriptor ring count */
-	u32 rxbd_count;
+	__le32 rxbd_count;
 
 	/* Event buffer descriptor ring address */
-	u32 evtbd_addr_lo;
-	u32 evtbd_addr_hi;
+	__le32 evtbd_addr_lo;
+	__le32 evtbd_addr_hi;
 	/* Event buffer descriptor ring count */
-	u32 evtbd_count;
+	__le32 evtbd_count;
 
 	/* Sleep cookie buffer physical address */
-	u32 sleep_cookie_addr_lo;
-	u32 sleep_cookie_addr_hi;
+	__le32 sleep_cookie_addr_lo;
+	__le32 sleep_cookie_addr_hi;
 } __packed;
 
 struct mwifiex_ie_types_rssi_threshold {
@@ -2093,8 +2132,8 @@
 	u8 chan_buf_weight;
 	u8 band_config;
 	u8 chan_num;
-	u32 chan_time;
-	u32 reserved;
+	__le32 chan_time;
+	__le32 reserved;
 	union {
 		u8 sdio_func_num;
 		u8 usb_ep_num;
@@ -2185,7 +2224,7 @@
 } __packed;
 
 struct host_cmd_ds_wakeup_reason {
-	u16  wakeup_reason;
+	__le16  wakeup_reason;
 } __packed;
 
 struct host_cmd_ds_gtk_rekey_params {
@@ -2196,6 +2235,10 @@
 	__le32 replay_ctr_high;
 } __packed;
 
+struct host_cmd_ds_chan_region_cfg {
+	__le16 action;
+} __packed;
+
 struct host_cmd_ds_command {
 	__le16 command;
 	__le16 size;
@@ -2270,6 +2313,7 @@
 		struct host_cmd_ds_robust_coex coex;
 		struct host_cmd_ds_wakeup_reason hs_wakeup_reason;
 		struct host_cmd_ds_gtk_rekey_params rekey;
+		struct host_cmd_ds_chan_region_cfg reg_cfg;
 	} params;
 } __packed;
 
diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c
index 1489c90..82839d9 100644
--- a/drivers/net/wireless/marvell/mwifiex/init.c
+++ b/drivers/net/wireless/marvell/mwifiex/init.c
@@ -298,6 +298,7 @@
 	memset(&adapter->arp_filter, 0, sizeof(adapter->arp_filter));
 	adapter->arp_filter_size = 0;
 	adapter->max_mgmt_ie_index = MAX_MGMT_IE_INDEX;
+	adapter->mfg_mode = mfg_mode;
 	adapter->key_api_major_ver = 0;
 	adapter->key_api_minor_ver = 0;
 	eth_broadcast_addr(adapter->perm_addr);
@@ -553,15 +554,22 @@
 				return -1;
 		}
 	}
+	if (adapter->mfg_mode) {
+		adapter->hw_status = MWIFIEX_HW_STATUS_READY;
+		ret = -EINPROGRESS;
+	} else {
+		for (i = 0; i < adapter->priv_num; i++) {
+			if (adapter->priv[i]) {
+				ret = mwifiex_sta_init_cmd(adapter->priv[i],
+							   first_sta, true);
+				if (ret == -1)
+					return -1;
 
-	for (i = 0; i < adapter->priv_num; i++) {
-		if (adapter->priv[i]) {
-			ret = mwifiex_sta_init_cmd(adapter->priv[i], first_sta,
-						   true);
-			if (ret == -1)
-				return -1;
+				first_sta = false;
+			}
 
-			first_sta = false;
+
+
 		}
 	}
 
diff --git a/drivers/net/wireless/marvell/mwifiex/ioctl.h b/drivers/net/wireless/marvell/mwifiex/ioctl.h
index 7042981..536ab83 100644
--- a/drivers/net/wireless/marvell/mwifiex/ioctl.h
+++ b/drivers/net/wireless/marvell/mwifiex/ioctl.h
@@ -260,6 +260,7 @@
 	u8 is_igtk_key;
 	u8 is_current_wep_key;
 	u8 is_rx_seq_valid;
+	u8 is_igtk_def_key;
 };
 
 struct mwifiex_power_cfg {
diff --git a/drivers/net/wireless/marvell/mwifiex/join.c b/drivers/net/wireless/marvell/mwifiex/join.c
index 1c7b006..b89596c 100644
--- a/drivers/net/wireless/marvell/mwifiex/join.c
+++ b/drivers/net/wireless/marvell/mwifiex/join.c
@@ -669,9 +669,8 @@
 	priv->assoc_rsp_size = min(le16_to_cpu(resp->size) - S_DS_GEN,
 				   sizeof(priv->assoc_rsp_buf));
 
-	memcpy(priv->assoc_rsp_buf, &resp->params, priv->assoc_rsp_size);
-
 	assoc_rsp->a_id = cpu_to_le16(aid);
+	memcpy(priv->assoc_rsp_buf, &resp->params, priv->assoc_rsp_size);
 
 	if (status_code) {
 		priv->adapter->dbg.num_cmd_assoc_failure++;
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index db4925d..2478ccd 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -23,6 +23,7 @@
 #include "11n.h"
 
 #define VERSION	"1.0"
+#define MFG_FIRMWARE	"mwifiex_mfg.bin"
 
 static unsigned int debug_mask = MWIFIEX_DEFAULT_DEBUG_MASK;
 module_param(debug_mask, uint, 0);
@@ -37,6 +38,10 @@
 MODULE_PARM_DESC(driver_mode,
 		 "station=0x1(default), ap-sta=0x3, station-p2p=0x5, ap-sta-p2p=0x7");
 
+bool mfg_mode;
+module_param(mfg_mode, bool, 0);
+MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0");
+
 /*
  * This function registers the device and performs all the necessary
  * initializations.
@@ -139,6 +144,8 @@
 		adapter->nd_info = NULL;
 	}
 
+	kfree(adapter->regd);
+
 	vfree(adapter->chan_stats);
 	kfree(adapter);
 	return 0;
@@ -486,9 +493,11 @@
  */
 static void mwifiex_terminate_workqueue(struct mwifiex_adapter *adapter)
 {
-	flush_workqueue(adapter->workqueue);
-	destroy_workqueue(adapter->workqueue);
-	adapter->workqueue = NULL;
+	if (adapter->workqueue) {
+		flush_workqueue(adapter->workqueue);
+		destroy_workqueue(adapter->workqueue);
+		adapter->workqueue = NULL;
+	}
 
 	if (adapter->rx_workqueue) {
 		flush_workqueue(adapter->rx_workqueue);
@@ -559,16 +568,21 @@
 		goto done;
 	}
 	/* Wait for mwifiex_init to complete */
-	wait_event_interruptible(adapter->init_wait_q,
-				 adapter->init_wait_q_woken);
-	if (adapter->hw_status != MWIFIEX_HW_STATUS_READY)
-		goto err_init_fw;
+	if (!adapter->mfg_mode) {
+		wait_event_interruptible(adapter->init_wait_q,
+					 adapter->init_wait_q_woken);
+		if (adapter->hw_status != MWIFIEX_HW_STATUS_READY)
+			goto err_init_fw;
+	}
 
 	priv = adapter->priv[MWIFIEX_BSS_ROLE_STA];
-	if (mwifiex_register_cfg80211(adapter)) {
-		mwifiex_dbg(adapter, ERROR,
-			    "cannot register with cfg80211\n");
-		goto err_init_fw;
+
+	if (!adapter->wiphy) {
+		if (mwifiex_register_cfg80211(adapter)) {
+			mwifiex_dbg(adapter, ERROR,
+				    "cannot register with cfg80211\n");
+			goto err_init_fw;
+		}
 	}
 
 	if (mwifiex_init_channel_scan_gap(adapter)) {
@@ -662,16 +676,41 @@
 /*
  * This function initializes the hardware and gets firmware.
  */
-static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter)
+static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter,
+			      bool req_fw_nowait)
 {
 	int ret;
 
-	ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name,
-				      adapter->dev, GFP_KERNEL, adapter,
-				      mwifiex_fw_dpc);
-	if (ret < 0)
-		mwifiex_dbg(adapter, ERROR,
-			    "request_firmware_nowait error %d\n", ret);
+	/* Override default firmware with manufacturing one if
+	 * manufacturing mode is enabled
+	 */
+	if (mfg_mode) {
+		if (strlcpy(adapter->fw_name, MFG_FIRMWARE,
+			    sizeof(adapter->fw_name)) >=
+			    sizeof(adapter->fw_name)) {
+			pr_err("%s: fw_name too long!\n", __func__);
+			return -1;
+		}
+	}
+
+	if (req_fw_nowait) {
+		ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name,
+					      adapter->dev, GFP_KERNEL, adapter,
+					      mwifiex_fw_dpc);
+		if (ret < 0)
+			mwifiex_dbg(adapter, ERROR,
+				    "request_firmware_nowait error %d\n", ret);
+	} else {
+		ret = request_firmware(&adapter->firmware,
+				       adapter->fw_name,
+				       adapter->dev);
+		if (ret < 0)
+			mwifiex_dbg(adapter, ERROR,
+				    "request_firmware error %d\n", ret);
+		else
+			mwifiex_fw_dpc(adapter->firmware, (void *)adapter);
+	}
+
 	return ret;
 }
 
@@ -1321,6 +1360,199 @@
 }
 
 /*
+ * This function gets called during PCIe function level reset. Required
+ * code is extracted from mwifiex_remove_card()
+ */
+static int
+mwifiex_shutdown_sw(struct mwifiex_adapter *adapter, struct semaphore *sem)
+{
+	struct mwifiex_private *priv;
+	int i;
+
+	if (!adapter)
+		goto exit_return;
+
+	if (down_interruptible(sem))
+		goto exit_sem_err;
+
+	priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY);
+	mwifiex_deauthenticate(priv, NULL);
+
+	/* We can no longer handle interrupts once we start doing the teardown
+	 * below.
+	 */
+	if (adapter->if_ops.disable_int)
+		adapter->if_ops.disable_int(adapter);
+
+	adapter->surprise_removed = true;
+	mwifiex_terminate_workqueue(adapter);
+
+	/* Stop data */
+	for (i = 0; i < adapter->priv_num; i++) {
+		priv = adapter->priv[i];
+		if (priv && priv->netdev) {
+			mwifiex_stop_net_dev_queue(priv->netdev, adapter);
+			if (netif_carrier_ok(priv->netdev))
+				netif_carrier_off(priv->netdev);
+			netif_device_detach(priv->netdev);
+		}
+	}
+
+	mwifiex_dbg(adapter, CMD, "cmd: calling mwifiex_shutdown_drv...\n");
+	adapter->init_wait_q_woken = false;
+
+	if (mwifiex_shutdown_drv(adapter) == -EINPROGRESS)
+		wait_event_interruptible(adapter->init_wait_q,
+					 adapter->init_wait_q_woken);
+	if (adapter->if_ops.down_dev)
+		adapter->if_ops.down_dev(adapter);
+
+	mwifiex_dbg(adapter, CMD, "cmd: mwifiex_shutdown_drv done\n");
+	if (atomic_read(&adapter->rx_pending) ||
+	    atomic_read(&adapter->tx_pending) ||
+	    atomic_read(&adapter->cmd_pending)) {
+		mwifiex_dbg(adapter, ERROR,
+			    "rx_pending=%d, tx_pending=%d,\t"
+			    "cmd_pending=%d\n",
+			    atomic_read(&adapter->rx_pending),
+			    atomic_read(&adapter->tx_pending),
+			    atomic_read(&adapter->cmd_pending));
+	}
+
+	for (i = 0; i < adapter->priv_num; i++) {
+		priv = adapter->priv[i];
+		if (!priv)
+			continue;
+		rtnl_lock();
+		if (priv->netdev &&
+		    priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED)
+			mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev);
+		rtnl_unlock();
+	}
+
+	up(sem);
+exit_sem_err:
+	mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__);
+exit_return:
+	return 0;
+}
+
+/* This function gets called during PCIe function level reset. Required
+ * code is extracted from mwifiex_add_card()
+ */
+static int
+mwifiex_reinit_sw(struct mwifiex_adapter *adapter, struct semaphore *sem,
+		  struct mwifiex_if_ops *if_ops, u8 iface_type)
+{
+	char fw_name[32];
+	struct pcie_service_card *card = adapter->card;
+
+	if (down_interruptible(sem))
+		goto exit_sem_err;
+
+	mwifiex_init_lock_list(adapter);
+	if (adapter->if_ops.up_dev)
+		adapter->if_ops.up_dev(adapter);
+
+	adapter->iface_type = iface_type;
+	adapter->card_sem = sem;
+
+	adapter->hw_status = MWIFIEX_HW_STATUS_INITIALIZING;
+	adapter->surprise_removed = false;
+	init_waitqueue_head(&adapter->init_wait_q);
+	adapter->is_suspended = false;
+	adapter->hs_activated = false;
+	init_waitqueue_head(&adapter->hs_activate_wait_q);
+	init_waitqueue_head(&adapter->cmd_wait_q.wait);
+	adapter->cmd_wait_q.status = 0;
+	adapter->scan_wait_q_woken = false;
+
+	if ((num_possible_cpus() > 1) || adapter->iface_type == MWIFIEX_USB)
+		adapter->rx_work_enabled = true;
+
+	adapter->workqueue =
+		alloc_workqueue("MWIFIEX_WORK_QUEUE",
+				WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+	if (!adapter->workqueue)
+		goto err_kmalloc;
+
+	INIT_WORK(&adapter->main_work, mwifiex_main_work_queue);
+
+	if (adapter->rx_work_enabled) {
+		adapter->rx_workqueue = alloc_workqueue("MWIFIEX_RX_WORK_QUEUE",
+							WQ_HIGHPRI |
+							WQ_MEM_RECLAIM |
+							WQ_UNBOUND, 1);
+		if (!adapter->rx_workqueue)
+			goto err_kmalloc;
+		INIT_WORK(&adapter->rx_work, mwifiex_rx_work_queue);
+	}
+
+	/* Register the device. Fill up the private data structure with
+	 * relevant information from the card. Some code extracted from
+	 * mwifiex_register_dev()
+	 */
+	mwifiex_dbg(adapter, INFO, "%s, mwifiex_init_hw_fw()...\n", __func__);
+	strcpy(fw_name, adapter->fw_name);
+	strcpy(adapter->fw_name, PCIE8997_DEFAULT_WIFIFW_NAME);
+
+	adapter->tx_buf_size = card->pcie.tx_buf_size;
+	adapter->ext_scan = card->pcie.can_ext_scan;
+	if (mwifiex_init_hw_fw(adapter, false)) {
+		strcpy(adapter->fw_name, fw_name);
+		mwifiex_dbg(adapter, ERROR,
+			    "%s: firmware init failed\n", __func__);
+		goto err_init_fw;
+	}
+	strcpy(adapter->fw_name, fw_name);
+	mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__);
+	up(sem);
+	return 0;
+
+err_init_fw:
+	mwifiex_dbg(adapter, ERROR, "info: %s: unregister device\n", __func__);
+	if (adapter->if_ops.unregister_dev)
+		adapter->if_ops.unregister_dev(adapter);
+	if (adapter->hw_status == MWIFIEX_HW_STATUS_READY) {
+		mwifiex_dbg(adapter, ERROR,
+			    "info: %s: shutdown mwifiex\n", __func__);
+		adapter->init_wait_q_woken = false;
+
+		if (mwifiex_shutdown_drv(adapter) == -EINPROGRESS)
+			wait_event_interruptible(adapter->init_wait_q,
+						 adapter->init_wait_q_woken);
+	}
+
+err_kmalloc:
+	mwifiex_terminate_workqueue(adapter);
+	adapter->surprise_removed = true;
+	up(sem);
+exit_sem_err:
+	mwifiex_dbg(adapter, INFO, "%s, error\n", __func__);
+
+	return -1;
+}
+
+/* This function processes pre and post PCIe function level resets.
+ * It performs software cleanup without touching PCIe specific code.
+ * Also, during initialization PCIe stuff is skipped.
+ */
+void mwifiex_do_flr(struct mwifiex_adapter *adapter, bool prepare)
+{
+	struct mwifiex_if_ops if_ops;
+
+	if (!prepare) {
+		mwifiex_reinit_sw(adapter, adapter->card_sem, &if_ops,
+				  adapter->iface_type);
+	} else {
+		memcpy(&if_ops, &adapter->if_ops,
+		       sizeof(struct mwifiex_if_ops));
+		mwifiex_shutdown_sw(adapter, adapter->card_sem);
+	}
+}
+EXPORT_SYMBOL_GPL(mwifiex_do_flr);
+
+/*
  * This function adds the card.
  *
  * This function follows the following major steps to set up the device -
@@ -1391,7 +1623,7 @@
 		goto err_registerdev;
 	}
 
-	if (mwifiex_init_hw_fw(adapter)) {
+	if (mwifiex_init_hw_fw(adapter, true)) {
 		pr_err("%s: firmware init failed\n", __func__);
 		goto err_init_fw;
 	}
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index 9f6bb40..26df28f 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -58,6 +58,7 @@
 #include "sdio.h"
 
 extern const char driver_version[];
+extern bool mfg_mode;
 
 struct mwifiex_adapter;
 struct mwifiex_private;
@@ -675,6 +676,7 @@
 	struct mwifiex_user_scan_chan hidden_chan[MWIFIEX_USER_SCAN_CHAN_MAX];
 	u8 assoc_resp_ht_param;
 	bool ht_param_present;
+	u8 random_mac[ETH_ALEN];
 };
 
 
@@ -827,6 +829,8 @@
 	void (*deaggr_pkt)(struct mwifiex_adapter *, struct sk_buff *);
 	void (*multi_port_resync)(struct mwifiex_adapter *);
 	bool (*is_port_ready)(struct mwifiex_private *);
+	void (*down_dev)(struct mwifiex_adapter *);
+	void (*up_dev)(struct mwifiex_adapter *);
 };
 
 struct mwifiex_adapter {
@@ -989,6 +993,7 @@
 	u32 drv_info_size;
 	bool scan_chan_gap_enabled;
 	struct sk_buff_head rx_data_q;
+	bool mfg_mode;
 	struct mwifiex_chan_stats *chan_stats;
 	u32 num_in_chan_stats;
 	int survey_idx;
@@ -1004,6 +1009,7 @@
 	bool usb_mc_status;
 	bool usb_mc_setup;
 	struct cfg80211_wowlan_nd_info *nd_info;
+	struct ieee80211_regdomain *regd;
 };
 
 void mwifiex_process_tx_queue(struct mwifiex_adapter *adapter);
@@ -1625,4 +1631,5 @@
 void mwifiex_dev_debugfs_init(struct mwifiex_private *priv);
 void mwifiex_dev_debugfs_remove(struct mwifiex_private *priv);
 #endif
+void mwifiex_do_flr(struct mwifiex_adapter *adapter, bool prepare);
 #endif /* !_MWIFIEX_MAIN_H_ */
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c
index 453ab6a..3c3c4f1 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.c
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.c
@@ -225,7 +225,7 @@
 	if (!adapter || !adapter->priv_num)
 		return;
 
-	if (user_rmmod) {
+	if (user_rmmod && !adapter->mfg_mode) {
 #ifdef CONFIG_PM_SLEEP
 		if (adapter->is_suspended)
 			mwifiex_pcie_resume(&pdev->dev);
@@ -277,6 +277,52 @@
 
 MODULE_DEVICE_TABLE(pci, mwifiex_ids);
 
+static void mwifiex_pcie_reset_notify(struct pci_dev *pdev, bool prepare)
+{
+	struct mwifiex_adapter *adapter;
+	struct pcie_service_card *card;
+
+	if (!pdev) {
+		pr_err("%s: PCIe device is not specified\n", __func__);
+		return;
+	}
+
+	card = (struct pcie_service_card *)pci_get_drvdata(pdev);
+	if (!card || !card->adapter) {
+		pr_err("%s: Card or adapter structure is not valid (%ld)\n",
+		       __func__, (long)card);
+		return;
+	}
+
+	adapter = card->adapter;
+	mwifiex_dbg(adapter, INFO,
+		    "%s: vendor=0x%4.04x device=0x%4.04x rev=%d %s\n",
+		    __func__, pdev->vendor, pdev->device,
+		    pdev->revision,
+		    prepare ? "Pre-FLR" : "Post-FLR");
+
+	if (prepare) {
+		/* Kernel would be performing FLR after this notification.
+		 * Cleanup all software without cleaning anything related to
+		 * PCIe and HW.
+		 */
+		mwifiex_do_flr(adapter, prepare);
+		adapter->surprise_removed = true;
+	} else {
+		/* Kernel stores and restores PCIe function context before and
+		 * after performing FLR respectively. Reconfigure the software
+		 * and firmware including firmware redownload
+		 */
+		adapter->surprise_removed = false;
+		mwifiex_do_flr(adapter, prepare);
+	}
+	mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__);
+}
+
+static const struct pci_error_handlers mwifiex_pcie_err_handler[] = {
+		{ .reset_notify = mwifiex_pcie_reset_notify, },
+};
+
 #ifdef CONFIG_PM_SLEEP
 /* Power Management Hooks */
 static SIMPLE_DEV_PM_OPS(mwifiex_pcie_pm_ops, mwifiex_pcie_suspend,
@@ -295,6 +341,7 @@
 	},
 #endif
 	.shutdown = mwifiex_pcie_shutdown,
+	.err_handler = mwifiex_pcie_err_handler,
 };
 
 /*
@@ -1956,8 +2003,6 @@
 			if (firmware_len - offset < txlen)
 				txlen = firmware_len - offset;
 
-			mwifiex_dbg(adapter, INFO, ".");
-
 			tx_blocks = (txlen + card->pcie.blksz_fw_dl - 1) /
 				    card->pcie.blksz_fw_dl;
 
@@ -2043,6 +2088,10 @@
 			ret = -1;
 		else
 			ret = 0;
+
+		mwifiex_dbg(adapter, INFO, "Try %d if FW is ready <%d,%#x>",
+			    tries, ret, firmware_stat);
+
 		if (ret)
 			continue;
 		if (firmware_stat == FIRMWARE_READY_PCIE) {
@@ -2074,8 +2123,7 @@
 		adapter->winner = 1;
 	} else {
 		mwifiex_dbg(adapter, ERROR,
-			    "PCI-E is not the winner <%#x,%d>, exit dnld\n",
-			    ret, adapter->winner);
+			    "PCI-E is not the winner <%#x>", winner);
 	}
 
 	return ret;
@@ -2863,7 +2911,7 @@
 static void mwifiex_pcie_get_fw_name(struct mwifiex_adapter *adapter)
 {
 	int revision_id = 0;
-	int version;
+	int version, magic;
 	struct pcie_service_card *card = adapter->card;
 
 	switch (card->dev->device) {
@@ -2888,30 +2936,19 @@
 		}
 		break;
 	case PCIE_DEVICE_ID_MARVELL_88W8997:
-		mwifiex_read_reg(adapter, 0x0c48, &revision_id);
+		mwifiex_read_reg(adapter, 0x8, &revision_id);
 		mwifiex_read_reg(adapter, 0x0cd0, &version);
+		mwifiex_read_reg(adapter, 0x0cd4, &magic);
+		revision_id &= 0xff;
 		version &= 0x7;
-		switch (revision_id) {
-		case PCIE8997_V2:
-			if (version == CHIP_VER_PCIEUART)
-				strcpy(adapter->fw_name,
-				       PCIEUART8997_FW_NAME_V2);
-			else
-				strcpy(adapter->fw_name,
-				       PCIEUSB8997_FW_NAME_V2);
-			break;
-		case PCIE8997_Z:
-			if (version == CHIP_VER_PCIEUART)
-				strcpy(adapter->fw_name,
-				       PCIEUART8997_FW_NAME_Z);
-			else
-				strcpy(adapter->fw_name,
-				       PCIEUSB8997_FW_NAME_Z);
-			break;
-		default:
-			strcpy(adapter->fw_name, PCIE8997_DEFAULT_FW_NAME);
-			break;
-		}
+		magic &= 0xff;
+		if (revision_id == PCIE8997_A1 &&
+		    magic == CHIP_MAGIC_VALUE &&
+		    version == CHIP_VER_PCIEUART)
+			strcpy(adapter->fw_name, PCIEUART8997_FW_NAME_V4);
+		else
+			strcpy(adapter->fw_name, PCIEUSB8997_FW_NAME_V4);
+		break;
 	default:
 		break;
 	}
@@ -2952,7 +2989,6 @@
 static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter)
 {
 	struct pcie_service_card *card = adapter->card;
-	const struct mwifiex_pcie_card_reg *reg;
 	struct pci_dev *pdev;
 	int i;
 
@@ -2976,8 +3012,90 @@
 			if (card->msi_enable)
 				pci_disable_msi(pdev);
 	       }
+	}
+}
 
-		reg = card->pcie.reg;
+/* This function initializes the PCI-E host memory space, WCB rings, etc.
+ *
+ * The following initializations steps are followed -
+ *      - Allocate TXBD ring buffers
+ *      - Allocate RXBD ring buffers
+ *      - Allocate event BD ring buffers
+ *      - Allocate command response ring buffer
+ *      - Allocate sleep cookie buffer
+ * Part of mwifiex_pcie_init(), not reset the PCIE registers
+ */
+static void mwifiex_pcie_up_dev(struct mwifiex_adapter *adapter)
+{
+	struct pcie_service_card *card = adapter->card;
+	int ret;
+	struct pci_dev *pdev = card->dev;
+	const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
+
+	card->cmdrsp_buf = NULL;
+	ret = mwifiex_pcie_create_txbd_ring(adapter);
+	if (ret) {
+		mwifiex_dbg(adapter, ERROR, "Failed to create txbd ring\n");
+		goto err_cre_txbd;
+	}
+
+	ret = mwifiex_pcie_create_rxbd_ring(adapter);
+	if (ret) {
+		mwifiex_dbg(adapter, ERROR, "Failed to create rxbd ring\n");
+		goto err_cre_rxbd;
+	}
+
+	ret = mwifiex_pcie_create_evtbd_ring(adapter);
+	if (ret) {
+		mwifiex_dbg(adapter, ERROR, "Failed to create evtbd ring\n");
+		goto err_cre_evtbd;
+	}
+
+	ret = mwifiex_pcie_alloc_cmdrsp_buf(adapter);
+	if (ret) {
+		mwifiex_dbg(adapter, ERROR, "Failed to allocate cmdbuf buffer\n");
+		goto err_alloc_cmdbuf;
+	}
+
+	if (reg->sleep_cookie) {
+		ret = mwifiex_pcie_alloc_sleep_cookie_buf(adapter);
+		if (ret) {
+			mwifiex_dbg(adapter, ERROR, "Failed to allocate sleep_cookie buffer\n");
+			goto err_alloc_cookie;
+		}
+	} else {
+		card->sleep_cookie_vbase = NULL;
+	}
+	return;
+
+err_alloc_cookie:
+	mwifiex_pcie_delete_cmdrsp_buf(adapter);
+err_alloc_cmdbuf:
+	mwifiex_pcie_delete_evtbd_ring(adapter);
+err_cre_evtbd:
+	mwifiex_pcie_delete_rxbd_ring(adapter);
+err_cre_rxbd:
+	mwifiex_pcie_delete_txbd_ring(adapter);
+err_cre_txbd:
+	pci_iounmap(pdev, card->pci_mmap1);
+}
+
+/* This function cleans up the PCI-E host memory space.
+ * Some code is extracted from mwifiex_unregister_dev()
+ *
+ */
+static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter)
+{
+	struct pcie_service_card *card = adapter->card;
+	const struct mwifiex_pcie_card_reg *reg = card->pcie.reg;
+
+	if (mwifiex_write_reg(adapter, reg->drv_rdy, 0x00000000))
+		mwifiex_dbg(adapter, ERROR, "Failed to write driver not-ready signature\n");
+
+	adapter->seq_num = 0;
+	adapter->tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K;
+
+	if (card) {
 		if (reg->sleep_cookie)
 			mwifiex_pcie_delete_sleep_cookie_buf(adapter);
 
@@ -2987,6 +3105,8 @@
 		mwifiex_pcie_delete_txbd_ring(adapter);
 		card->cmdrsp_buf = NULL;
 	}
+
+	return;
 }
 
 static struct mwifiex_if_ops pcie_ops = {
@@ -3013,6 +3133,8 @@
 	.clean_pcie_ring =		mwifiex_clean_pcie_ring_buf,
 	.reg_dump =			mwifiex_pcie_reg_dump,
 	.device_dump =			mwifiex_pcie_device_dump,
+	.down_dev =			mwifiex_pcie_down_dev,
+	.up_dev =			mwifiex_pcie_up_dev,
 };
 
 /*
diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h
index f05061c..46f99ca 100644
--- a/drivers/net/wireless/marvell/mwifiex/pcie.h
+++ b/drivers/net/wireless/marvell/mwifiex/pcie.h
@@ -32,11 +32,9 @@
 #define PCIE8897_DEFAULT_FW_NAME "mrvl/pcie8897_uapsta.bin"
 #define PCIE8897_A0_FW_NAME "mrvl/pcie8897_uapsta_a0.bin"
 #define PCIE8897_B0_FW_NAME "mrvl/pcie8897_uapsta.bin"
-#define PCIE8997_DEFAULT_FW_NAME "mrvl/pcieusb8997_combo_v2.bin"
-#define PCIEUART8997_FW_NAME_Z "mrvl/pcieuart8997_combo.bin"
-#define PCIEUART8997_FW_NAME_V2 "mrvl/pcieuart8997_combo_v2.bin"
-#define PCIEUSB8997_FW_NAME_Z "mrvl/pcieusb8997_combo.bin"
-#define PCIEUSB8997_FW_NAME_V2 "mrvl/pcieusb8997_combo_v2.bin"
+#define PCIEUART8997_FW_NAME_V4 "mrvl/pcieuart8997_combo_v4.bin"
+#define PCIEUSB8997_FW_NAME_V4 "mrvl/pcieusb8997_combo_v4.bin"
+#define PCIE8997_DEFAULT_WIFIFW_NAME "mrvl/pcie8997_wlan_v4.bin"
 
 #define PCIE_VENDOR_ID_MARVELL              (0x11ab)
 #define PCIE_VENDOR_ID_V2_MARVELL           (0x1b4b)
@@ -46,9 +44,10 @@
 
 #define PCIE8897_A0	0x1100
 #define PCIE8897_B0	0x1200
-#define PCIE8997_Z	0x0
-#define PCIE8997_V2	0x471
+#define PCIE8997_A0	0x10
+#define PCIE8997_A1	0x11
 #define CHIP_VER_PCIEUART	0x3
+#define CHIP_MAGIC_VALUE	0x24
 
 /* Constants for Buffer Descriptor (BD) rings */
 #define MWIFIEX_MAX_TXRX_BD			0x20
diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c
index 21ec847..97c9765 100644
--- a/drivers/net/wireless/marvell/mwifiex/scan.c
+++ b/drivers/net/wireless/marvell/mwifiex/scan.c
@@ -820,6 +820,7 @@
 	struct mwifiex_adapter *adapter = priv->adapter;
 	struct mwifiex_ie_types_num_probes *num_probes_tlv;
 	struct mwifiex_ie_types_scan_chan_gap *chan_gap_tlv;
+	struct mwifiex_ie_types_random_mac *random_mac_tlv;
 	struct mwifiex_ie_types_wildcard_ssid_params *wildcard_ssid_tlv;
 	struct mwifiex_ie_types_bssid_list *bssid_tlv;
 	u8 *tlv_pos;
@@ -835,6 +836,7 @@
 	u8 ssid_filter;
 	struct mwifiex_ie_types_htcap *ht_cap;
 	struct mwifiex_ie_types_bss_mode *bss_mode;
+	const u8 zero_mac[6] = {0, 0, 0, 0, 0, 0};
 
 	/* The tlv_buf_len is calculated for each scan command.  The TLVs added
 	   in this routine will be preserved since the routine that sends the
@@ -967,6 +969,18 @@
 			tlv_pos +=
 				  sizeof(struct mwifiex_ie_types_scan_chan_gap);
 		}
+
+		if (!ether_addr_equal(user_scan_in->random_mac, zero_mac)) {
+			random_mac_tlv = (void *)tlv_pos;
+			random_mac_tlv->header.type =
+					 cpu_to_le16(TLV_TYPE_RANDOM_MAC);
+			random_mac_tlv->header.len =
+				    cpu_to_le16(sizeof(random_mac_tlv->mac));
+			ether_addr_copy(random_mac_tlv->mac,
+					user_scan_in->random_mac);
+			tlv_pos +=
+				  sizeof(struct mwifiex_ie_types_random_mac);
+		}
 	} else {
 		scan_cfg_out->bss_mode = (u8) adapter->scan_mode;
 		num_probes = adapter->scan_probes;
@@ -1922,6 +1936,7 @@
 	}
 
 	adapter->active_scan_triggered = true;
+	ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac);
 	user_scan_cfg->num_ssids = priv->scan_request->n_ssids;
 	user_scan_cfg->ssid_list = priv->scan_request->ssids;
 
@@ -2179,18 +2194,14 @@
 
 		if (chan_band_tlv && adapter->nd_info) {
 			adapter->nd_info->matches[idx] =
-				kzalloc(sizeof(*pmatch) +
-				sizeof(u32), GFP_ATOMIC);
+				kzalloc(sizeof(*pmatch) + sizeof(u32),
+					GFP_ATOMIC);
 
 			pmatch = adapter->nd_info->matches[idx];
 
 			if (pmatch) {
-				memset(pmatch, 0, sizeof(*pmatch));
-				if (chan_band_tlv) {
-					pmatch->n_channels = 1;
-					pmatch->channels[0] =
-						chan_band->chan_number;
-				}
+				pmatch->n_channels = 1;
+				pmatch->channels[0] = chan_band->chan_number;
 			}
 		}
 
@@ -2761,6 +2772,7 @@
 	if (!scan_cfg)
 		return -ENOMEM;
 
+	ether_addr_copy(scan_cfg->random_mac, priv->random_mac);
 	scan_cfg->ssid_list = req_ssid;
 	scan_cfg->num_ssids = 1;
 
diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c
index d3e1561..8718950 100644
--- a/drivers/net/wireless/marvell/mwifiex/sdio.c
+++ b/drivers/net/wireless/marvell/mwifiex/sdio.c
@@ -122,9 +122,11 @@
 					       IRQF_TRIGGER_LOW,
 					       "wifi_wake", cfg);
 			if (ret) {
-				dev_err(dev,
+				dev_dbg(dev,
 					"Failed to request irq_wifi %d (%d)\n",
 					cfg->irq_wifi, ret);
+				card->plt_wake_cfg = NULL;
+				return 0;
 			}
 			disable_irq(cfg->irq_wifi);
 		}
@@ -289,7 +291,7 @@
 
 	mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num);
 
-	if (user_rmmod) {
+	if (user_rmmod && !adapter->mfg_mode) {
 		if (adapter->is_suspended)
 			mwifiex_sdio_resume(adapter->dev);
 
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
index 7897037..2a162c3 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
@@ -598,6 +598,11 @@
 		memcpy(km->key_param_set.key_params.cmac_aes.key,
 		       enc_key->key_material, enc_key->key_len);
 		len += sizeof(struct mwifiex_cmac_aes_param);
+	} else if (enc_key->is_igtk_def_key) {
+		mwifiex_dbg(adapter, INFO,
+			    "%s: Set CMAC default Key index\n", __func__);
+		km->key_param_set.key_type = KEY_TYPE_ID_AES_CMAC_DEF;
+		km->key_param_set.key_idx = enc_key->key_index & KEY_INDEX_MASK;
 	} else {
 		mwifiex_dbg(adapter, INFO,
 			    "%s: Set AES Key\n", __func__);
@@ -706,15 +711,10 @@
 				    (priv->wep_key_curr_index & KEY_INDEX_MASK))
 					key_info |= KEY_DEFAULT;
 			} else {
-				if (mac) {
-					if (is_broadcast_ether_addr(mac))
-						key_info |= KEY_MCAST;
-					else
-						key_info |= KEY_UNICAST |
-							    KEY_DEFAULT;
-				} else {
+				if (is_broadcast_ether_addr(mac))
 					key_info |= KEY_MCAST;
-				}
+				else
+					key_info |= KEY_UNICAST | KEY_DEFAULT;
 			}
 		}
 		km->key_param_set.key_info = cpu_to_le16(key_info);
@@ -1244,20 +1244,23 @@
 		return 0;
 
 	/* Send the ring base addresses and count to firmware */
-	host_spec->txbd_addr_lo = (u32)(card->txbd_ring_pbase);
-	host_spec->txbd_addr_hi = (u32)(((u64)card->txbd_ring_pbase)>>32);
-	host_spec->txbd_count = MWIFIEX_MAX_TXRX_BD;
-	host_spec->rxbd_addr_lo = (u32)(card->rxbd_ring_pbase);
-	host_spec->rxbd_addr_hi = (u32)(((u64)card->rxbd_ring_pbase)>>32);
-	host_spec->rxbd_count = MWIFIEX_MAX_TXRX_BD;
-	host_spec->evtbd_addr_lo = (u32)(card->evtbd_ring_pbase);
-	host_spec->evtbd_addr_hi = (u32)(((u64)card->evtbd_ring_pbase)>>32);
-	host_spec->evtbd_count = MWIFIEX_MAX_EVT_BD;
+	host_spec->txbd_addr_lo = cpu_to_le32((u32)(card->txbd_ring_pbase));
+	host_spec->txbd_addr_hi =
+			cpu_to_le32((u32)(((u64)card->txbd_ring_pbase) >> 32));
+	host_spec->txbd_count = cpu_to_le32(MWIFIEX_MAX_TXRX_BD);
+	host_spec->rxbd_addr_lo = cpu_to_le32((u32)(card->rxbd_ring_pbase));
+	host_spec->rxbd_addr_hi =
+			cpu_to_le32((u32)(((u64)card->rxbd_ring_pbase) >> 32));
+	host_spec->rxbd_count = cpu_to_le32(MWIFIEX_MAX_TXRX_BD);
+	host_spec->evtbd_addr_lo = cpu_to_le32((u32)(card->evtbd_ring_pbase));
+	host_spec->evtbd_addr_hi =
+			cpu_to_le32((u32)(((u64)card->evtbd_ring_pbase) >> 32));
+	host_spec->evtbd_count = cpu_to_le32(MWIFIEX_MAX_EVT_BD);
 	if (card->sleep_cookie_vbase) {
 		host_spec->sleep_cookie_addr_lo =
-						(u32)(card->sleep_cookie_pbase);
-		host_spec->sleep_cookie_addr_hi =
-				 (u32)(((u64)(card->sleep_cookie_pbase)) >> 32);
+				cpu_to_le32((u32)(card->sleep_cookie_pbase));
+		host_spec->sleep_cookie_addr_hi = cpu_to_le32((u32)(((u64)
+					(card->sleep_cookie_pbase)) >> 32));
 		mwifiex_dbg(priv->adapter, INFO,
 			    "sleep_cook_lo phy addr: 0x%x\n",
 			    host_spec->sleep_cookie_addr_lo);
@@ -1482,7 +1485,7 @@
 			continue;
 
 		/* property header is 6 bytes, data must fit in cmd buffer */
-		if (prop && prop->value && prop->length > 6 &&
+		if (prop->value && prop->length > 6 &&
 		    prop->length <= MWIFIEX_SIZE_OF_CMD_BUFFER - S_DS_GEN) {
 			ret = mwifiex_send_cmd(priv, HostCmd_CMD_CFG_DATA,
 					       HostCmd_ACT_GEN_SET, 0,
@@ -1596,6 +1599,21 @@
 	return 0;
 }
 
+static int mwifiex_cmd_chan_region_cfg(struct mwifiex_private *priv,
+				       struct host_cmd_ds_command *cmd,
+				       u16 cmd_action)
+{
+	struct host_cmd_ds_chan_region_cfg *reg = &cmd->params.reg_cfg;
+
+	cmd->command = cpu_to_le16(HostCmd_CMD_CHAN_REGION_CFG);
+	cmd->size = cpu_to_le16(sizeof(*reg) + S_DS_GEN);
+
+	if (cmd_action == HostCmd_ACT_GEN_GET)
+		reg->action = cpu_to_le16(cmd_action);
+
+	return 0;
+}
+
 static int
 mwifiex_cmd_coalesce_cfg(struct mwifiex_private *priv,
 			 struct host_cmd_ds_command *cmd,
@@ -2136,6 +2154,9 @@
 		ret = mwifiex_cmd_gtk_rekey_offload(priv, cmd_ptr, cmd_action,
 						    data_buf);
 		break;
+	case HostCmd_CMD_CHAN_REGION_CFG:
+		ret = mwifiex_cmd_chan_region_cfg(priv, cmd_ptr, cmd_action);
+		break;
 	default:
 		mwifiex_dbg(priv->adapter, ERROR,
 			    "PREP_CMD: unknown cmd- %#x\n", cmd_no);
@@ -2273,6 +2294,9 @@
 			if (ret)
 				return -1;
 		}
+
+		mwifiex_send_cmd(priv, HostCmd_CMD_CHAN_REGION_CFG,
+				 HostCmd_ACT_GEN_GET, 0, NULL, true);
 	}
 
 	/* get tx rate */
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
index ccf54932..8548027a 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
@@ -962,7 +962,7 @@
 	int i;
 	struct mwifiex_sta_node *sta_node;
 
-	for (i = 0; i < sta_list->sta_count; i++) {
+	for (i = 0; i < (le16_to_cpu(sta_list->sta_count)); i++) {
 		sta_node = mwifiex_get_sta_entry(priv, sta_info->mac);
 		if (unlikely(!sta_node))
 			continue;
@@ -1022,6 +1022,138 @@
 	return 0;
 }
 
+static struct ieee80211_regdomain *
+mwifiex_create_custom_regdomain(struct mwifiex_private *priv,
+				u8 *buf, u16 buf_len)
+{
+	u16 num_chan = buf_len / 2;
+	struct ieee80211_regdomain *regd;
+	struct ieee80211_reg_rule *rule;
+	bool new_rule;
+	int regd_size, idx, freq, prev_freq = 0;
+	u32 bw, prev_bw = 0;
+	u8 chflags, prev_chflags = 0, valid_rules = 0;
+
+	if (WARN_ON_ONCE(num_chan > NL80211_MAX_SUPP_REG_RULES))
+		return ERR_PTR(-EINVAL);
+
+	regd_size = sizeof(struct ieee80211_regdomain) +
+		    num_chan * sizeof(struct ieee80211_reg_rule);
+
+	regd = kzalloc(regd_size, GFP_KERNEL);
+	if (!regd)
+		return ERR_PTR(-ENOMEM);
+
+	for (idx = 0; idx < num_chan; idx++) {
+		u8 chan;
+		enum nl80211_band band;
+
+		chan = *buf++;
+		if (!chan) {
+			kfree(regd);
+			return NULL;
+		}
+		chflags = *buf++;
+		band = (chan <= 14) ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ;
+		freq = ieee80211_channel_to_frequency(chan, band);
+		new_rule = false;
+
+		if (chflags & MWIFIEX_CHANNEL_DISABLED)
+			continue;
+
+		if (band == NL80211_BAND_5GHZ) {
+			if (!(chflags & MWIFIEX_CHANNEL_NOHT80))
+				bw = MHZ_TO_KHZ(80);
+			else if (!(chflags & MWIFIEX_CHANNEL_NOHT40))
+				bw = MHZ_TO_KHZ(40);
+			else
+				bw = MHZ_TO_KHZ(20);
+		} else {
+			if (!(chflags & MWIFIEX_CHANNEL_NOHT40))
+				bw = MHZ_TO_KHZ(40);
+			else
+				bw = MHZ_TO_KHZ(20);
+		}
+
+		if (idx == 0 || prev_chflags != chflags || prev_bw != bw ||
+		    freq - prev_freq > 20) {
+			valid_rules++;
+			new_rule = true;
+		}
+
+		rule = &regd->reg_rules[valid_rules - 1];
+
+		rule->freq_range.end_freq_khz = MHZ_TO_KHZ(freq + 10);
+
+		prev_chflags = chflags;
+		prev_freq = freq;
+		prev_bw = bw;
+
+		if (!new_rule)
+			continue;
+
+		rule->freq_range.start_freq_khz = MHZ_TO_KHZ(freq - 10);
+		rule->power_rule.max_eirp = DBM_TO_MBM(19);
+
+		if (chflags & MWIFIEX_CHANNEL_PASSIVE)
+			rule->flags = NL80211_RRF_NO_IR;
+
+		if (chflags & MWIFIEX_CHANNEL_DFS)
+			rule->flags = NL80211_RRF_DFS;
+
+		rule->freq_range.max_bandwidth_khz = bw;
+	}
+
+	regd->n_reg_rules = valid_rules;
+	regd->alpha2[0] = '9';
+	regd->alpha2[1] = '9';
+
+	return regd;
+}
+
+static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv,
+				       struct host_cmd_ds_command *resp)
+{
+	struct host_cmd_ds_chan_region_cfg *reg = &resp->params.reg_cfg;
+	u16 action = le16_to_cpu(reg->action);
+	u16 tlv, tlv_buf_len, tlv_buf_left;
+	struct mwifiex_ie_types_header *head;
+	struct ieee80211_regdomain *regd;
+	u8 *tlv_buf;
+
+	if (action != HostCmd_ACT_GEN_GET)
+		return 0;
+
+	tlv_buf = (u8 *)reg + sizeof(*reg);
+	tlv_buf_left = le16_to_cpu(resp->size) - S_DS_GEN - sizeof(*reg);
+
+	while (tlv_buf_left >= sizeof(*head)) {
+		head = (struct mwifiex_ie_types_header *)tlv_buf;
+		tlv = le16_to_cpu(head->type);
+		tlv_buf_len = le16_to_cpu(head->len);
+
+		if (tlv_buf_left < (sizeof(*head) + tlv_buf_len))
+			break;
+
+		switch (tlv) {
+		case TLV_TYPE_CHAN_ATTR_CFG:
+			mwifiex_dbg_dump(priv->adapter, CMD_D, "CHAN:",
+					 (u8 *)head + sizeof(*head),
+					 tlv_buf_len);
+			regd = mwifiex_create_custom_regdomain(priv,
+				(u8 *)head + sizeof(*head), tlv_buf_len);
+			if (!IS_ERR(regd))
+				priv->adapter->regd = regd;
+			break;
+		}
+
+		tlv_buf += (sizeof(*head) + tlv_buf_len);
+		tlv_buf_left -= (sizeof(*head) + tlv_buf_len);
+	}
+
+	return 0;
+}
+
 /*
  * This function handles the command responses.
  *
@@ -1239,6 +1371,9 @@
 		break;
 	case HostCmd_CMD_GTK_REKEY_OFFLOAD_CFG:
 		break;
+	case HostCmd_CMD_CHAN_REGION_CFG:
+		ret = mwifiex_ret_chan_region_cfg(priv, resp);
+		break;
 	default:
 		mwifiex_dbg(adapter, ERROR,
 			    "CMD_RESP: unknown cmd response %#x\n",
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c
index a422f33..9df0c4d 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_event.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c
@@ -25,6 +25,99 @@
 #include "wmm.h"
 #include "11n.h"
 
+#define MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE    12
+
+static int mwifiex_check_ibss_peer_capabilties(struct mwifiex_private *priv,
+					       struct mwifiex_sta_node *sta_ptr,
+					       struct sk_buff *event)
+{
+	int evt_len, ele_len;
+	u8 *curr;
+	struct ieee_types_header *ele_hdr;
+	struct mwifiex_ie_types_mgmt_frame *tlv_mgmt_frame;
+	const struct ieee80211_ht_cap *ht_cap;
+	const struct ieee80211_vht_cap *vht_cap;
+
+	skb_pull(event, MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE);
+	evt_len = event->len;
+	curr = event->data;
+
+	mwifiex_dbg_dump(priv->adapter, EVT_D, "ibss peer capabilties:",
+			 event->data, event->len);
+
+	skb_push(event, MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE);
+
+	tlv_mgmt_frame = (void *)curr;
+	if (evt_len >= sizeof(*tlv_mgmt_frame) &&
+	    le16_to_cpu(tlv_mgmt_frame->header.type) ==
+	    TLV_TYPE_UAP_MGMT_FRAME) {
+		/* Locate curr pointer to the start of beacon tlv,
+		 * timestamp 8 bytes, beacon intervel 2 bytes,
+		 * capability info 2 bytes, totally 12 byte beacon header
+		 */
+		evt_len = le16_to_cpu(tlv_mgmt_frame->header.len);
+		curr += (sizeof(*tlv_mgmt_frame) + 12);
+	} else {
+		mwifiex_dbg(priv->adapter, MSG,
+			    "management frame tlv not found!\n");
+		return 0;
+	}
+
+	while (evt_len >= sizeof(*ele_hdr)) {
+		ele_hdr = (struct ieee_types_header *)curr;
+		ele_len = ele_hdr->len;
+
+		if (evt_len < ele_len + sizeof(*ele_hdr))
+			break;
+
+		switch (ele_hdr->element_id) {
+		case WLAN_EID_HT_CAPABILITY:
+			sta_ptr->is_11n_enabled = true;
+			ht_cap = (void *)(ele_hdr + 2);
+			sta_ptr->max_amsdu = le16_to_cpu(ht_cap->cap_info) &
+				IEEE80211_HT_CAP_MAX_AMSDU ?
+				MWIFIEX_TX_DATA_BUF_SIZE_8K :
+				MWIFIEX_TX_DATA_BUF_SIZE_4K;
+			mwifiex_dbg(priv->adapter, INFO,
+				    "11n enabled!, max_amsdu : %d\n",
+				    sta_ptr->max_amsdu);
+			break;
+
+		case WLAN_EID_VHT_CAPABILITY:
+			sta_ptr->is_11ac_enabled = true;
+			vht_cap = (void *)(ele_hdr + 2);
+			/* check VHT MAXMPDU capability */
+			switch (le32_to_cpu(vht_cap->vht_cap_info) & 0x3) {
+			case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
+				sta_ptr->max_amsdu =
+					MWIFIEX_TX_DATA_BUF_SIZE_12K;
+				break;
+			case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
+				sta_ptr->max_amsdu =
+					MWIFIEX_TX_DATA_BUF_SIZE_8K;
+				break;
+			case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
+				sta_ptr->max_amsdu =
+					MWIFIEX_TX_DATA_BUF_SIZE_4K;
+			default:
+				break;
+			}
+
+			mwifiex_dbg(priv->adapter, INFO,
+				    "11ac enabled!, max_amsdu : %d\n",
+				    sta_ptr->max_amsdu);
+			break;
+		default:
+			break;
+		}
+
+		curr += (ele_len + sizeof(*ele_hdr));
+		evt_len -= (ele_len + sizeof(*ele_hdr));
+	}
+
+	return 0;
+}
+
 /*
  * This function resets the connection state.
  *
@@ -519,6 +612,8 @@
  *      - EVENT_LINK_QUALITY
  *      - EVENT_PRE_BEACON_LOST
  *      - EVENT_IBSS_COALESCED
+ *      - EVENT_IBSS_STA_CONNECT
+ *      - EVENT_IBSS_STA_DISCONNECT
  *      - EVENT_WEP_ICV_ERR
  *      - EVENT_BW_CHANGE
  *      - EVENT_HOSTWAKE_STAIE
@@ -547,9 +642,11 @@
 int mwifiex_process_sta_event(struct mwifiex_private *priv)
 {
 	struct mwifiex_adapter *adapter = priv->adapter;
-	int ret = 0;
+	int ret = 0, i;
 	u32 eventcause = adapter->event_cause;
 	u16 ctrl, reason_code;
+	u8 ibss_sta_addr[ETH_ALEN];
+	struct mwifiex_sta_node *sta_ptr;
 
 	switch (eventcause) {
 	case EVENT_DUMMY_HOST_WAKEUP_SIGNAL:
@@ -708,7 +805,11 @@
 
 	case EVENT_EXT_SCAN_REPORT:
 		mwifiex_dbg(adapter, EVENT, "event: EXT_SCAN Report\n");
-		if (adapter->ext_scan && !priv->scan_aborting)
+		/* We intend to skip this event during suspend, but handle
+		 * it in interface disabled case
+		 */
+		if (adapter->ext_scan && (!priv->scan_aborting ||
+					  !netif_running(priv->netdev)))
 			ret = mwifiex_handle_event_ext_scan_report(priv,
 						adapter->event_skb->data);
 
@@ -771,6 +872,39 @@
 				HostCmd_CMD_802_11_IBSS_COALESCING_STATUS,
 				HostCmd_ACT_GEN_GET, 0, NULL, false);
 		break;
+	case EVENT_IBSS_STA_CONNECT:
+		ether_addr_copy(ibss_sta_addr, adapter->event_body + 2);
+		mwifiex_dbg(adapter, EVENT, "event: IBSS_STA_CONNECT %pM\n",
+			    ibss_sta_addr);
+		sta_ptr = mwifiex_add_sta_entry(priv, ibss_sta_addr);
+		if (sta_ptr && adapter->adhoc_11n_enabled) {
+			mwifiex_check_ibss_peer_capabilties(priv, sta_ptr,
+							    adapter->event_skb);
+			if (sta_ptr->is_11n_enabled)
+				for (i = 0; i < MAX_NUM_TID; i++)
+					sta_ptr->ampdu_sta[i] =
+					priv->aggr_prio_tbl[i].ampdu_user;
+			else
+				for (i = 0; i < MAX_NUM_TID; i++)
+					sta_ptr->ampdu_sta[i] =
+						BA_STREAM_NOT_ALLOWED;
+			memset(sta_ptr->rx_seq, 0xff, sizeof(sta_ptr->rx_seq));
+		}
+
+		break;
+	case EVENT_IBSS_STA_DISCONNECT:
+		ether_addr_copy(ibss_sta_addr, adapter->event_body + 2);
+		mwifiex_dbg(adapter, EVENT, "event: IBSS_STA_DISCONNECT %pM\n",
+			    ibss_sta_addr);
+		sta_ptr = mwifiex_get_sta_entry(priv, ibss_sta_addr);
+		if (sta_ptr && sta_ptr->is_11n_enabled) {
+			mwifiex_11n_del_rx_reorder_tbl_by_ta(priv,
+							     ibss_sta_addr);
+			mwifiex_del_tx_ba_stream_tbl_by_ra(priv, ibss_sta_addr);
+		}
+		mwifiex_wmm_del_peer_ra_list(priv, ibss_sta_addr);
+		mwifiex_del_sta_entry(priv, ibss_sta_addr);
+		break;
 	case EVENT_ADDBA:
 		mwifiex_dbg(adapter, EVENT, "event: ADDBA Request\n");
 		mwifiex_send_cmd(priv, HostCmd_CMD_11N_ADDBA_RSP,
@@ -869,6 +1003,12 @@
 		mwifiex_bt_coex_wlan_param_update_event(priv,
 							adapter->event_skb);
 		break;
+	case EVENT_RXBA_SYNC:
+		dev_dbg(adapter->dev, "EVENT: RXBA_SYNC\n");
+		mwifiex_11n_rxba_sync_event(priv, adapter->event_body,
+					    adapter->event_skb->len -
+					    sizeof(eventcause));
+		break;
 	default:
 		mwifiex_dbg(adapter, ERROR, "event: unknown event id: %#x\n",
 			    eventcause);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index e06647a..644f3a2 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -574,7 +574,7 @@
 
 	adapter->hs_activate_wait_q_woken = false;
 
-	memset(&hscfg, 0, sizeof(struct mwifiex_ds_hs_cfg));
+	memset(&hscfg, 0, sizeof(hscfg));
 	hscfg.is_invoke_hostcmd = true;
 
 	adapter->hs_enabling = true;
@@ -1138,7 +1138,7 @@
 {
 	struct mwifiex_ds_encrypt_key encrypt_key;
 
-	memset(&encrypt_key, 0, sizeof(struct mwifiex_ds_encrypt_key));
+	memset(&encrypt_key, 0, sizeof(encrypt_key));
 	encrypt_key.key_len = key_len;
 	encrypt_key.key_index = key_index;
 
@@ -1180,7 +1180,7 @@
 {
 	struct mwifiex_ver_ext ver_ext;
 
-	memset(&ver_ext, 0, sizeof(struct host_cmd_ds_version_ext));
+	memset(&ver_ext, 0, sizeof(ver_ext));
 	ver_ext.version_str_sel = version_str_sel;
 	if (mwifiex_send_cmd(priv, HostCmd_CMD_VERSION_EXT,
 			     HostCmd_ACT_GEN_GET, 0, &ver_ext, true))
diff --git a/drivers/net/wireless/marvell/mwifiex/uap_event.c b/drivers/net/wireless/marvell/mwifiex/uap_event.c
index 86ff542..d24eca3 100644
--- a/drivers/net/wireless/marvell/mwifiex/uap_event.c
+++ b/drivers/net/wireless/marvell/mwifiex/uap_event.c
@@ -306,7 +306,12 @@
 		mwifiex_dbg(adapter, EVENT, "event: multi-chan info\n");
 		mwifiex_process_multi_chan_event(priv, adapter->event_skb);
 		break;
-
+	case EVENT_RXBA_SYNC:
+		dev_dbg(adapter->dev, "EVENT: RXBA_SYNC\n");
+		mwifiex_11n_rxba_sync_event(priv, adapter->event_body,
+					    adapter->event_skb->len -
+					    sizeof(eventcause));
+		break;
 	default:
 		mwifiex_dbg(adapter, EVENT,
 			    "event: unknown event id: %#x\n", eventcause);
diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c
index 0857575..73eb084 100644
--- a/drivers/net/wireless/marvell/mwifiex/usb.c
+++ b/drivers/net/wireless/marvell/mwifiex/usb.c
@@ -273,6 +273,8 @@
 	} else {
 		mwifiex_dbg(adapter, DATA,
 			    "%s: DATA\n", __func__);
+		mwifiex_write_data_complete(adapter, context->skb, 0,
+					    urb->status ? -1 : 0);
 		for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) {
 			port = &card->port[i];
 			if (context->ep == port->tx_data_ep) {
@@ -282,8 +284,6 @@
 			}
 		}
 		adapter->data_sent = false;
-		mwifiex_write_data_complete(adapter, context->skb, 0,
-					    urb->status ? -1 : 0);
 	}
 
 	if (card->mc_resync_flag)
@@ -611,7 +611,7 @@
 	if (!adapter->priv_num)
 		return;
 
-	if (user_rmmod) {
+	if (user_rmmod && !adapter->mfg_mode) {
 #ifdef CONFIG_PM
 		if (adapter->is_suspended)
 			mwifiex_usb_resume(intf);
@@ -657,11 +657,8 @@
 	card->tx_cmd.ep = card->tx_cmd_ep;
 
 	card->tx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!card->tx_cmd.urb) {
-		mwifiex_dbg(adapter, ERROR,
-			    "tx_cmd.urb allocation failed\n");
+	if (!card->tx_cmd.urb)
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) {
 		port = &card->port[i];
@@ -677,11 +674,8 @@
 			port->tx_data_list[j].ep = port->tx_data_ep;
 			port->tx_data_list[j].urb =
 					usb_alloc_urb(0, GFP_KERNEL);
-			if (!port->tx_data_list[j].urb) {
-				mwifiex_dbg(adapter, ERROR,
-					    "urb allocation failed\n");
+			if (!port->tx_data_list[j].urb)
 				return -ENOMEM;
-			}
 		}
 	}
 
@@ -697,10 +691,8 @@
 	card->rx_cmd.ep = card->rx_cmd_ep;
 
 	card->rx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!card->rx_cmd.urb) {
-		mwifiex_dbg(adapter, ERROR, "rx_cmd.urb allocation failed\n");
+	if (!card->rx_cmd.urb)
 		return -ENOMEM;
-	}
 
 	card->rx_cmd.skb = dev_alloc_skb(MWIFIEX_RX_CMD_BUF_SIZE);
 	if (!card->rx_cmd.skb)
@@ -714,11 +706,8 @@
 		card->rx_data_list[i].ep = card->rx_data_ep;
 
 		card->rx_data_list[i].urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!card->rx_data_list[i].urb) {
-			mwifiex_dbg(adapter, ERROR,
-				    "rx_data_list[] urb allocation failed\n");
+		if (!card->rx_data_list[i].urb)
 			return -1;
-		}
 		if (mwifiex_usb_submit_rx_urb(&card->rx_data_list[i],
 					      MWIFIEX_RX_DATA_BUF_SIZE))
 			return -1;
@@ -852,7 +841,7 @@
 	struct usb_tx_data_port *port = NULL;
 	u8 *data = (u8 *)skb->data;
 	struct urb *tx_urb;
-	int idx, ret;
+	int idx, ret = -EINPROGRESS;
 
 	if (adapter->is_suspended) {
 		mwifiex_dbg(adapter, ERROR,
@@ -876,8 +865,9 @@
 				if (atomic_read(&port->tx_data_urb_pending)
 				    >= MWIFIEX_TX_DATA_URB) {
 					port->block_status = true;
-					ret = -EBUSY;
-					goto done;
+					adapter->data_sent =
+						mwifiex_usb_data_sent(adapter);
+					return -EBUSY;
 				}
 				if (port->tx_data_ix >= MWIFIEX_TX_DATA_URB)
 					port->tx_data_ix = 0;
@@ -908,6 +898,14 @@
 	else
 		atomic_inc(&port->tx_data_urb_pending);
 
+	if (ep != card->tx_cmd_ep &&
+	    atomic_read(&port->tx_data_urb_pending) ==
+					MWIFIEX_TX_DATA_URB) {
+		port->block_status = true;
+		adapter->data_sent = mwifiex_usb_data_sent(adapter);
+		ret = -ENOSR;
+	}
+
 	if (usb_submit_urb(tx_urb, GFP_ATOMIC)) {
 		mwifiex_dbg(adapter, ERROR,
 			    "%s: usb_submit_urb failed\n", __func__);
@@ -916,29 +914,15 @@
 		} else {
 			atomic_dec(&port->tx_data_urb_pending);
 			port->block_status = false;
+			adapter->data_sent = false;
 			if (port->tx_data_ix)
 				port->tx_data_ix--;
 			else
 				port->tx_data_ix = MWIFIEX_TX_DATA_URB;
 		}
-
-		return -1;
-	} else {
-		if (ep != card->tx_cmd_ep &&
-		    atomic_read(&port->tx_data_urb_pending) ==
-							MWIFIEX_TX_DATA_URB) {
-			port->block_status = true;
-			ret = -ENOSR;
-			goto done;
-		}
+		ret = -1;
 	}
 
-	return -EINPROGRESS;
-
-done:
-	if (ep != card->tx_cmd_ep)
-		adapter->data_sent = mwifiex_usb_data_sent(adapter);
-
 	return ret;
 }
 
@@ -1037,6 +1021,10 @@
 			dnld_cmd = le32_to_cpu(fwdata->fw_hdr.dnld_cmd);
 			tlen += sizeof(struct fw_header);
 
+			/* Command 7 doesn't have data length field */
+			if (dnld_cmd == FW_CMD_7)
+				dlen = 0;
+
 			memcpy(fwdata->data, &firmware[tlen], dlen);
 
 			fwdata->seq_num = cpu_to_le32(fw_seqnum);
diff --git a/drivers/net/wireless/marvell/mwifiex/usb.h b/drivers/net/wireless/marvell/mwifiex/usb.h
index b4e9246..30e8eb8c 100644
--- a/drivers/net/wireless/marvell/mwifiex/usb.h
+++ b/drivers/net/wireless/marvell/mwifiex/usb.h
@@ -46,11 +46,12 @@
 #define USB8766_DEFAULT_FW_NAME	"mrvl/usb8766_uapsta.bin"
 #define USB8797_DEFAULT_FW_NAME	"mrvl/usb8797_uapsta.bin"
 #define USB8801_DEFAULT_FW_NAME	"mrvl/usb8801_uapsta.bin"
-#define USB8997_DEFAULT_FW_NAME	"mrvl/usb8997_uapsta.bin"
+#define USB8997_DEFAULT_FW_NAME	"mrvl/usbusb8997_combo_v4.bin"
 
 #define FW_DNLD_TX_BUF_SIZE	620
 #define FW_DNLD_RX_BUF_SIZE	2048
 #define FW_HAS_LAST_BLOCK	0x00000004
+#define FW_CMD_7		0x00000007
 
 #define FW_DATA_XMIT_SIZE \
 	(sizeof(struct fw_header) + dlen + sizeof(u32))
diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c
index 6681be0..18fbb96 100644
--- a/drivers/net/wireless/marvell/mwifiex/util.c
+++ b/drivers/net/wireless/marvell/mwifiex/util.c
@@ -386,6 +386,7 @@
 				    "unknown public action frame category %d\n",
 				    category);
 		}
+		break;
 	default:
 		mwifiex_dbg(priv->adapter, INFO,
 		    "unknown mgmt frame subtype %#x\n", stype);
diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c
index 57a80cf..a8bc064 100644
--- a/drivers/net/wireless/mediatek/mt7601u/dma.c
+++ b/drivers/net/wireless/mediatek/mt7601u/dma.c
@@ -103,7 +103,7 @@
 
 	if (unlikely(rxwi->zero[0] || rxwi->zero[1] || rxwi->zero[2]))
 		dev_err_once(dev->dev, "Error: RXWI zero fields are set\n");
-	if (unlikely(MT76_GET(MT_RXD_INFO_TYPE, fce_info)))
+	if (unlikely(FIELD_GET(MT_RXD_INFO_TYPE, fce_info)))
 		dev_err_once(dev->dev, "Error: RX path seen a non-pkt urb\n");
 
 	trace_mt_rx(dev, rxwi, fce_info);
diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.h b/drivers/net/wireless/mediatek/mt7601u/dma.h
index 978e8a9..270d126 100644
--- a/drivers/net/wireless/mediatek/mt7601u/dma.h
+++ b/drivers/net/wireless/mediatek/mt7601u/dma.h
@@ -18,8 +18,6 @@
 #include <asm/unaligned.h>
 #include <linux/skbuff.h>
 
-#include "util.h"
-
 #define MT_DMA_HDR_LEN			4
 #define MT_RX_INFO_LEN			4
 #define MT_FCE_INFO_LEN			4
@@ -79,9 +77,9 @@
 	 */
 
 	info = flags |
-		MT76_SET(MT_TXD_INFO_LEN, round_up(skb->len, 4)) |
-		MT76_SET(MT_TXD_INFO_D_PORT, d_port) |
-		MT76_SET(MT_TXD_INFO_TYPE, type);
+		FIELD_PREP(MT_TXD_INFO_LEN, round_up(skb->len, 4)) |
+		FIELD_PREP(MT_TXD_INFO_D_PORT, d_port) |
+		FIELD_PREP(MT_TXD_INFO_TYPE, type);
 
 	put_unaligned_le32(info, skb_push(skb, sizeof(info)));
 	return skb_put_padto(skb, round_up(skb->len, 4) + 4);
@@ -90,7 +88,7 @@
 static inline int
 mt7601u_dma_skb_wrap_pkt(struct sk_buff *skb, enum mt76_qsel qsel, u32 flags)
 {
-	flags |= MT76_SET(MT_TXD_PKT_INFO_QSEL, qsel);
+	flags |= FIELD_PREP(MT_TXD_PKT_INFO_QSEL, qsel);
 	return mt7601u_dma_skb_wrap(skb, WLAN_PORT, DMA_PACKET, flags);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
index 8d8ee03..da6faea 100644
--- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c
@@ -45,8 +45,8 @@
 	val = mt76_rr(dev, MT_EFUSE_CTRL);
 	val &= ~(MT_EFUSE_CTRL_AIN |
 		 MT_EFUSE_CTRL_MODE);
-	val |= MT76_SET(MT_EFUSE_CTRL_AIN, addr & ~0xf) |
-	       MT76_SET(MT_EFUSE_CTRL_MODE, mode) |
+	val |= FIELD_PREP(MT_EFUSE_CTRL_AIN, addr & ~0xf) |
+	       FIELD_PREP(MT_EFUSE_CTRL_MODE, mode) |
 	       MT_EFUSE_CTRL_KICK;
 	mt76_wr(dev, MT_EFUSE_CTRL, val);
 
@@ -128,8 +128,8 @@
 	if (!field_valid(nic_conf0 >> 8))
 		return;
 
-	if (MT76_GET(MT_EE_NIC_CONF_0_RX_PATH, nic_conf0) > 1 ||
-	    MT76_GET(MT_EE_NIC_CONF_0_TX_PATH, nic_conf0) > 1)
+	if (FIELD_GET(MT_EE_NIC_CONF_0_RX_PATH, nic_conf0) > 1 ||
+	    FIELD_GET(MT_EE_NIC_CONF_0_TX_PATH, nic_conf0) > 1)
 		dev_err(dev->dev,
 			"Error: device has more than 1 RX/TX stream!\n");
 }
@@ -150,7 +150,7 @@
 
 	mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr));
 	mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) |
-		MT76_SET(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff));
+		FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff));
 
 	return 0;
 }
@@ -176,7 +176,7 @@
 	u8 max_pwr;
 
 	val = mt7601u_rr(dev, MT_TX_ALC_CFG_0);
-	max_pwr = MT76_GET(MT_TX_ALC_CFG_0_LIMIT_0, val);
+	max_pwr = FIELD_GET(MT_TX_ALC_CFG_0_LIMIT_0, val);
 
 	if (mt7601u_has_tssi(dev, eeprom)) {
 		mt7601u_set_channel_target_power(dev, eeprom, max_pwr);
diff --git a/drivers/net/wireless/mediatek/mt7601u/init.c b/drivers/net/wireless/mediatek/mt7601u/init.c
index 8fa78d7..44d46e2 100644
--- a/drivers/net/wireless/mediatek/mt7601u/init.c
+++ b/drivers/net/wireless/mediatek/mt7601u/init.c
@@ -108,8 +108,9 @@
 {
 	u32 val;
 
-	val = MT76_SET(MT_USB_DMA_CFG_RX_BULK_AGG_TOUT, MT_USB_AGGR_TIMEOUT) |
-	      MT76_SET(MT_USB_DMA_CFG_RX_BULK_AGG_LMT, MT_USB_AGGR_SIZE_LIMIT) |
+	val = FIELD_PREP(MT_USB_DMA_CFG_RX_BULK_AGG_TOUT, MT_USB_AGGR_TIMEOUT) |
+	      FIELD_PREP(MT_USB_DMA_CFG_RX_BULK_AGG_LMT,
+			 MT_USB_AGGR_SIZE_LIMIT) |
 	      MT_USB_DMA_CFG_RX_BULK_EN |
 	      MT_USB_DMA_CFG_TX_BULK_EN;
 	if (dev->in_max_packet == 512)
@@ -396,8 +397,9 @@
 
 	mt7601u_rmw(dev, MT_US_CYC_CFG, MT_US_CYC_CNT, 0x1e);
 
-	mt7601u_wr(dev, MT_TXOP_CTRL_CFG, MT76_SET(MT_TXOP_TRUN_EN, 0x3f) |
-					  MT76_SET(MT_TXOP_EXT_CCA_DLY, 0x58));
+	mt7601u_wr(dev, MT_TXOP_CTRL_CFG,
+		   FIELD_PREP(MT_TXOP_TRUN_EN, 0x3f) |
+		   FIELD_PREP(MT_TXOP_EXT_CCA_DLY, 0x58));
 
 	ret = mt7601u_eeprom_init(dev);
 	if (ret)
diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.c b/drivers/net/wireless/mediatek/mt7601u/mac.c
index e21c53e..3c57639 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mac.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mac.c
@@ -19,13 +19,13 @@
 static void
 mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate)
 {
-	u8 idx = MT76_GET(MT_TXWI_RATE_MCS, rate);
+	u8 idx = FIELD_GET(MT_TXWI_RATE_MCS, rate);
 
 	txrate->idx = 0;
 	txrate->flags = 0;
 	txrate->count = 1;
 
-	switch (MT76_GET(MT_TXWI_RATE_PHY_MODE, rate)) {
+	switch (FIELD_GET(MT_TXWI_RATE_PHY_MODE, rate)) {
 	case MT_PHY_TYPE_OFDM:
 		txrate->idx = idx + 4;
 		return;
@@ -47,7 +47,7 @@
 		return;
 	}
 
-	if (MT76_GET(MT_TXWI_RATE_BW, rate) == MT_PHY_BW_40)
+	if (FIELD_GET(MT_TXWI_RATE_BW, rate) == MT_PHY_BW_40)
 		txrate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
 
 	if (rate & MT_TXWI_RATE_SGI)
@@ -125,9 +125,9 @@
 		bw = 0;
 	}
 
-	rateval = MT76_SET(MT_RXWI_RATE_MCS, rate_idx);
-	rateval |= MT76_SET(MT_RXWI_RATE_PHY, phy);
-	rateval |= MT76_SET(MT_RXWI_RATE_BW, bw);
+	rateval = FIELD_PREP(MT_RXWI_RATE_MCS, rate_idx);
+	rateval |= FIELD_PREP(MT_RXWI_RATE_PHY, phy);
+	rateval |= FIELD_PREP(MT_RXWI_RATE_BW, bw);
 	if (rate->flags & IEEE80211_TX_RC_SHORT_GI)
 		rateval |= MT_RXWI_RATE_SGI;
 
@@ -156,9 +156,9 @@
 	stat.success = !!(val & MT_TX_STAT_FIFO_SUCCESS);
 	stat.aggr = !!(val & MT_TX_STAT_FIFO_AGGR);
 	stat.ack_req = !!(val & MT_TX_STAT_FIFO_ACKREQ);
-	stat.pktid = MT76_GET(MT_TX_STAT_FIFO_PID_TYPE, val);
-	stat.wcid = MT76_GET(MT_TX_STAT_FIFO_WCID, val);
-	stat.rate = MT76_GET(MT_TX_STAT_FIFO_RATE, val);
+	stat.pktid = FIELD_GET(MT_TX_STAT_FIFO_PID_TYPE, val);
+	stat.wcid = FIELD_GET(MT_TX_STAT_FIFO_WCID, val);
+	stat.rate = FIELD_GET(MT_TX_STAT_FIFO_RATE, val);
 
 	return stat;
 }
@@ -270,7 +270,7 @@
 	}
 
 	val &= ~MT_BEACON_TIME_CFG_INTVAL;
-	val |= MT76_SET(MT_BEACON_TIME_CFG_INTVAL, interval << 4) |
+	val |= FIELD_PREP(MT_BEACON_TIME_CFG_INTVAL, interval << 4) |
 		MT_BEACON_TIME_CFG_TIMER_EN |
 		MT_BEACON_TIME_CFG_SYNC_MODE |
 		MT_BEACON_TIME_CFG_TBTT_EN;
@@ -349,8 +349,8 @@
 	u8 zmac[ETH_ALEN] = {};
 	u32 attr;
 
-	attr = MT76_SET(MT_WCID_ATTR_BSS_IDX, vif_idx & 7) |
-	       MT76_SET(MT_WCID_ATTR_BSS_IDX_EXT, !!(vif_idx & 8));
+	attr = FIELD_PREP(MT_WCID_ATTR_BSS_IDX, vif_idx & 7) |
+	       FIELD_PREP(MT_WCID_ATTR_BSS_IDX_EXT, !!(vif_idx & 8));
 
 	mt76_wr(dev, MT_WCID_ATTR(idx), attr);
 
@@ -382,15 +382,15 @@
 	rcu_read_unlock();
 
 	mt7601u_wr(dev, MT_MAX_LEN_CFG, 0xa0fff |
-		   MT76_SET(MT_MAX_LEN_CFG_AMPDU, min_factor));
+		   FIELD_PREP(MT_MAX_LEN_CFG_AMPDU, min_factor));
 }
 
 static void
 mt76_mac_process_rate(struct ieee80211_rx_status *status, u16 rate)
 {
-	u8 idx = MT76_GET(MT_RXWI_RATE_MCS, rate);
+	u8 idx = FIELD_GET(MT_RXWI_RATE_MCS, rate);
 
-	switch (MT76_GET(MT_RXWI_RATE_PHY, rate)) {
+	switch (FIELD_GET(MT_RXWI_RATE_PHY, rate)) {
 	case MT_PHY_TYPE_OFDM:
 		if (WARN_ON(idx >= 8))
 			idx = 0;
@@ -436,7 +436,7 @@
 			  u16 rate, int rssi)
 {
 	dev->bcn_freq_off = rxwi->freq_off;
-	dev->bcn_phy_mode = MT76_GET(MT_RXWI_RATE_PHY, rate);
+	dev->bcn_phy_mode = FIELD_GET(MT_RXWI_RATE_PHY, rate);
 	dev->avg_rssi = (dev->avg_rssi * 15) / 16 + (rssi << 8);
 }
 
@@ -458,7 +458,7 @@
 	u16 rate = le16_to_cpu(rxwi->rate);
 	int rssi;
 
-	len = MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
+	len = FIELD_GET(MT_RXWI_CTL_MPDU_LEN, ctl);
 	if (len < 10)
 		return 0;
 
@@ -542,8 +542,8 @@
 
 	val = mt7601u_rr(dev, MT_WCID_ATTR(idx));
 	val &= ~MT_WCID_ATTR_PKEY_MODE & ~MT_WCID_ATTR_PKEY_MODE_EXT;
-	val |= MT76_SET(MT_WCID_ATTR_PKEY_MODE, cipher & 7) |
-	       MT76_SET(MT_WCID_ATTR_PKEY_MODE_EXT, cipher >> 3);
+	val |= FIELD_PREP(MT_WCID_ATTR_PKEY_MODE, cipher & 7) |
+	       FIELD_PREP(MT_WCID_ATTR_PKEY_MODE_EXT, cipher >> 3);
 	val &= ~MT_WCID_ATTR_PAIRWISE;
 	val |= MT_WCID_ATTR_PAIRWISE *
 		!!(key && key->flags & IEEE80211_KEY_FLAG_PAIRWISE);
diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c
index e70dd95..43ebd46 100644
--- a/drivers/net/wireless/mediatek/mt7601u/main.c
+++ b/drivers/net/wireless/mediatek/mt7601u/main.c
@@ -15,7 +15,6 @@
 #include "mt7601u.h"
 #include "mac.h"
 #include <linux/etherdevice.h>
-#include <linux/version.h>
 
 static int mt7601u_start(struct ieee80211_hw *hw)
 {
diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c
index 91c4b34..dbdfb3f 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mcu.c
+++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c
@@ -43,8 +43,8 @@
 					    u8 seq, enum mcu_cmd cmd)
 {
 	WARN_ON(mt7601u_dma_skb_wrap(skb, CPU_TX_PORT, DMA_COMMAND,
-				     MT76_SET(MT_TXD_CMD_INFO_SEQ, seq) |
-				     MT76_SET(MT_TXD_CMD_INFO_TYPE, cmd)));
+				     FIELD_PREP(MT_TXD_CMD_INFO_SEQ, seq) |
+				     FIELD_PREP(MT_TXD_CMD_INFO_TYPE, cmd)));
 }
 
 static inline void trace_mt_mcu_msg_send_cs(struct mt7601u_dev *dev,
@@ -100,13 +100,13 @@
 			dev_err(dev->dev, "Error: MCU resp urb failed:%d\n",
 				urb_status);
 
-		if (MT76_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce) == seq &&
-		    MT76_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce) == CMD_DONE)
+		if (FIELD_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce) == seq &&
+		    FIELD_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce) == CMD_DONE)
 			return 0;
 
-		dev_err(dev->dev, "Error: MCU resp evt:%hhx seq:%hhx-%hhx!\n",
-			MT76_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce),
-			seq, MT76_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce));
+		dev_err(dev->dev, "Error: MCU resp evt:%lx seq:%hhx-%lx!\n",
+			FIELD_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce),
+			seq, FIELD_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce));
 	}
 
 	dev_err(dev->dev, "Error: %s timed out\n", __func__);
@@ -291,9 +291,9 @@
 	u32 val;
 	int ret;
 
-	reg = cpu_to_le32(MT76_SET(MT_TXD_INFO_TYPE, DMA_PACKET) |
-			  MT76_SET(MT_TXD_INFO_D_PORT, CPU_TX_PORT) |
-			  MT76_SET(MT_TXD_INFO_LEN, len));
+	reg = cpu_to_le32(FIELD_PREP(MT_TXD_INFO_TYPE, DMA_PACKET) |
+			  FIELD_PREP(MT_TXD_INFO_D_PORT, CPU_TX_PORT) |
+			  FIELD_PREP(MT_TXD_INFO_LEN, len));
 	memcpy(buf.buf, &reg, sizeof(reg));
 	memcpy(buf.buf + sizeof(reg), data, len);
 	memset(buf.buf + sizeof(reg) + len, 0, 8);
diff --git a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
index 428bd2f1..c7ec404 100644
--- a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
+++ b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h
@@ -15,6 +15,7 @@
 #ifndef MT7601U_H
 #define MT7601U_H
 
+#include <linux/bitfield.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/mutex.h>
@@ -24,7 +25,6 @@
 #include <linux/debugfs.h>
 
 #include "regs.h"
-#include "util.h"
 
 #define MT_CALIBRATE_INTERVAL		(4 * HZ)
 
@@ -299,7 +299,7 @@
 
 /* Compatibility with mt76 */
 #define mt76_rmw_field(_dev, _reg, _field, _val)	\
-	mt76_rmw(_dev, _reg, _field, MT76_SET(_field, _val))
+	mt76_rmw(_dev, _reg, _field, FIELD_PREP(_field, _val))
 
 static inline u32 mt76_rr(struct mt7601u_dev *dev, u32 offset)
 {
diff --git a/drivers/net/wireless/mediatek/mt7601u/phy.c b/drivers/net/wireless/mediatek/mt7601u/phy.c
index 1908af6..ca09a5d 100644
--- a/drivers/net/wireless/mediatek/mt7601u/phy.c
+++ b/drivers/net/wireless/mediatek/mt7601u/phy.c
@@ -41,11 +41,12 @@
 		goto out;
 	}
 
-	mt7601u_wr(dev, MT_RF_CSR_CFG, MT76_SET(MT_RF_CSR_CFG_DATA, value) |
-				       MT76_SET(MT_RF_CSR_CFG_REG_BANK, bank) |
-				       MT76_SET(MT_RF_CSR_CFG_REG_ID, offset) |
-				       MT_RF_CSR_CFG_WR |
-				       MT_RF_CSR_CFG_KICK);
+	mt7601u_wr(dev, MT_RF_CSR_CFG,
+		   FIELD_PREP(MT_RF_CSR_CFG_DATA, value) |
+		   FIELD_PREP(MT_RF_CSR_CFG_REG_BANK, bank) |
+		   FIELD_PREP(MT_RF_CSR_CFG_REG_ID, offset) |
+		   MT_RF_CSR_CFG_WR |
+		   MT_RF_CSR_CFG_KICK);
 	trace_rf_write(dev, bank, offset, value);
 out:
 	mutex_unlock(&dev->reg_atomic_mutex);
@@ -74,17 +75,18 @@
 	if (!mt76_poll(dev, MT_RF_CSR_CFG, MT_RF_CSR_CFG_KICK, 0, 100))
 		goto out;
 
-	mt7601u_wr(dev, MT_RF_CSR_CFG, MT76_SET(MT_RF_CSR_CFG_REG_BANK, bank) |
-				       MT76_SET(MT_RF_CSR_CFG_REG_ID, offset) |
-				       MT_RF_CSR_CFG_KICK);
+	mt7601u_wr(dev, MT_RF_CSR_CFG,
+		   FIELD_PREP(MT_RF_CSR_CFG_REG_BANK, bank) |
+		   FIELD_PREP(MT_RF_CSR_CFG_REG_ID, offset) |
+		   MT_RF_CSR_CFG_KICK);
 
 	if (!mt76_poll(dev, MT_RF_CSR_CFG, MT_RF_CSR_CFG_KICK, 0, 100))
 		goto out;
 
 	val = mt7601u_rr(dev, MT_RF_CSR_CFG);
-	if (MT76_GET(MT_RF_CSR_CFG_REG_ID, val) == offset &&
-	    MT76_GET(MT_RF_CSR_CFG_REG_BANK, val) == bank) {
-		ret = MT76_GET(MT_RF_CSR_CFG_DATA, val);
+	if (FIELD_GET(MT_RF_CSR_CFG_REG_ID, val) == offset &&
+	    FIELD_GET(MT_RF_CSR_CFG_REG_BANK, val) == bank) {
+		ret = FIELD_GET(MT_RF_CSR_CFG_DATA, val);
 		trace_rf_read(dev, bank, offset, ret);
 	}
 out:
@@ -139,8 +141,8 @@
 	}
 
 	mt7601u_wr(dev, MT_BBP_CSR_CFG,
-		   MT76_SET(MT_BBP_CSR_CFG_VAL, val) |
-		   MT76_SET(MT_BBP_CSR_CFG_REG_NUM, offset) |
+		   FIELD_PREP(MT_BBP_CSR_CFG_VAL, val) |
+		   FIELD_PREP(MT_BBP_CSR_CFG_REG_NUM, offset) |
 		   MT_BBP_CSR_CFG_RW_MODE | MT_BBP_CSR_CFG_BUSY);
 	trace_bbp_write(dev, offset, val);
 out:
@@ -163,7 +165,7 @@
 		goto out;
 
 	mt7601u_wr(dev, MT_BBP_CSR_CFG,
-		   MT76_SET(MT_BBP_CSR_CFG_REG_NUM, offset) |
+		   FIELD_PREP(MT_BBP_CSR_CFG_REG_NUM, offset) |
 		   MT_BBP_CSR_CFG_RW_MODE | MT_BBP_CSR_CFG_BUSY |
 		   MT_BBP_CSR_CFG_READ);
 
@@ -171,8 +173,8 @@
 		goto out;
 
 	val = mt7601u_rr(dev, MT_BBP_CSR_CFG);
-	if (MT76_GET(MT_BBP_CSR_CFG_REG_NUM, val) == offset) {
-		ret = MT76_GET(MT_BBP_CSR_CFG_VAL, val);
+	if (FIELD_GET(MT_BBP_CSR_CFG_REG_NUM, val) == offset) {
+		ret = FIELD_GET(MT_BBP_CSR_CFG_VAL, val);
 		trace_bbp_read(dev, offset, ret);
 	}
 out:
@@ -249,9 +251,9 @@
 			/* bw40 */ { -2, 16, 34 }
 		}
 	};
-	int bw = MT76_GET(MT_RXWI_RATE_BW, rate);
-	int aux_lna = MT76_GET(MT_RXWI_ANT_AUX_LNA, rxwi->ant);
-	int lna_id = MT76_GET(MT_RXWI_GAIN_RSSI_LNA_ID, rxwi->gain);
+	int bw = FIELD_GET(MT_RXWI_RATE_BW, rate);
+	int aux_lna = FIELD_GET(MT_RXWI_ANT_AUX_LNA, rxwi->ant);
+	int lna_id = FIELD_GET(MT_RXWI_GAIN_RSSI_LNA_ID, rxwi->gain);
 	int val;
 
 	if (lna_id) /* LNA id can be 0, 2, 3. */
@@ -259,7 +261,7 @@
 
 	val = 8;
 	val -= lna[aux_lna][bw][lna_id];
-	val -= MT76_GET(MT_RXWI_GAIN_RSSI_VAL, rxwi->gain);
+	val -= FIELD_GET(MT_RXWI_GAIN_RSSI_VAL, rxwi->gain);
 	val -= dev->ee->lna_gain;
 	val -= dev->ee->rssi_offset[0];
 
@@ -939,7 +941,7 @@
 	dev_dbg(dev->dev, "final diff: %08x\n", diff_pwr);
 
 	val = mt7601u_rr(dev, MT_TX_ALC_CFG_1);
-	curr_pwr = s6_to_int(MT76_GET(MT_TX_ALC_CFG_1_TEMP_COMP, val));
+	curr_pwr = s6_to_int(FIELD_GET(MT_TX_ALC_CFG_1_TEMP_COMP, val));
 	diff_pwr += curr_pwr;
 	val = (val & ~MT_TX_ALC_CFG_1_TEMP_COMP) | int_to_s6(diff_pwr);
 	mt7601u_wr(dev, MT_TX_ALC_CFG_1, val);
diff --git a/drivers/net/wireless/mediatek/mt7601u/regs.h b/drivers/net/wireless/mediatek/mt7601u/regs.h
index afd8978..27a429d 100644
--- a/drivers/net/wireless/mediatek/mt7601u/regs.h
+++ b/drivers/net/wireless/mediatek/mt7601u/regs.h
@@ -17,10 +17,6 @@
 
 #include <linux/bitops.h>
 
-#ifndef GENMASK
-#define GENMASK(h, l)       (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l))
-#endif
-
 #define MT_ASIC_VERSION			0x0000
 
 #define MT76XX_REV_E3		0x22
diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c
index a0a33dc..ad77bec 100644
--- a/drivers/net/wireless/mediatek/mt7601u/tx.c
+++ b/drivers/net/wireless/mediatek/mt7601u/tx.c
@@ -175,11 +175,12 @@
 		ba_size = min_t(int, 63, ba_size);
 		if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
 			ba_size = 0;
-		txwi->ack_ctl |= MT76_SET(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size);
+		txwi->ack_ctl |= FIELD_PREP(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size);
 
-		txwi->flags = cpu_to_le16(MT_TXWI_FLAGS_AMPDU |
-					  MT76_SET(MT_TXWI_FLAGS_MPDU_DENSITY,
-						   sta->ht_cap.ampdu_density));
+		txwi->flags =
+			cpu_to_le16(MT_TXWI_FLAGS_AMPDU |
+				    FIELD_PREP(MT_TXWI_FLAGS_MPDU_DENSITY,
+					       sta->ht_cap.ampdu_density));
 		if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
 			txwi->flags = 0;
 	}
@@ -188,7 +189,7 @@
 
 	is_probe = !!(info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE);
 	pkt_id = mt7601u_tx_pktid_enc(dev, rate_ctl & 0x7, is_probe);
-	pkt_len |= MT76_SET(MT_TXWI_LEN_PKTID, pkt_id);
+	pkt_len |= FIELD_PREP(MT_TXWI_LEN_PKTID, pkt_id);
 	txwi->len_ctl = cpu_to_le16(pkt_len);
 
 	return txwi;
@@ -285,9 +286,9 @@
 	WARN_ON(cw_min > 0xf);
 	WARN_ON(cw_max > 0xf);
 
-	val = MT76_SET(MT_EDCA_CFG_AIFSN, params->aifs) |
-	      MT76_SET(MT_EDCA_CFG_CWMIN, cw_min) |
-	      MT76_SET(MT_EDCA_CFG_CWMAX, cw_max);
+	val = FIELD_PREP(MT_EDCA_CFG_AIFSN, params->aifs) |
+	      FIELD_PREP(MT_EDCA_CFG_CWMIN, cw_min) |
+	      FIELD_PREP(MT_EDCA_CFG_CWMAX, cw_max);
 	/* TODO: based on user-controlled EnableTxBurst var vendor drv sets
 	 *	 a really long txop on AC0 (see connect.c:2009) but only on
 	 *	 connect? When not connected should be 0.
@@ -295,7 +296,7 @@
 	if (!hw_q)
 		val |= 0x60;
 	else
-		val |= MT76_SET(MT_EDCA_CFG_TXOP, params->txop);
+		val |= FIELD_PREP(MT_EDCA_CFG_TXOP, params->txop);
 	mt76_wr(dev, MT_EDCA_CFG_AC(hw_q), val);
 
 	val = mt76_rr(dev, MT_WMM_TXOP(hw_q));
diff --git a/drivers/net/wireless/mediatek/mt7601u/util.h b/drivers/net/wireless/mediatek/mt7601u/util.h
deleted file mode 100644
index b89140b..0000000
--- a/drivers/net/wireless/mediatek/mt7601u/util.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (C) 2014 Felix Fietkau <nbd@openwrt.org>
- * Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __MT76_UTIL_H
-#define __MT76_UTIL_H
-
-/*
- * Power of two check, this will check
- * if the mask that has been given contains and contiguous set of bits.
- * Note that we cannot use the is_power_of_2() function since this
- * check must be done at compile-time.
- */
-#define is_power_of_two(x)	( !((x) & ((x)-1)) )
-#define low_bit_mask(x)		( ((x)-1) & ~(x) )
-#define is_valid_mask(x)	is_power_of_two(1LU + (x) + low_bit_mask(x))
-
-/*
- * Macros to find first set bit in a variable.
- * These macros behave the same as the __ffs() functions but
- * the most important difference that this is done during
- * compile-time rather then run-time.
- */
-#define compile_ffs2(__x) \
-	__builtin_choose_expr(((__x) & 0x1), 0, 1)
-
-#define compile_ffs4(__x) \
-	__builtin_choose_expr(((__x) & 0x3), \
-			      (compile_ffs2((__x))), \
-			      (compile_ffs2((__x) >> 2) + 2))
-
-#define compile_ffs8(__x) \
-	__builtin_choose_expr(((__x) & 0xf), \
-			      (compile_ffs4((__x))), \
-			      (compile_ffs4((__x) >> 4) + 4))
-
-#define compile_ffs16(__x) \
-	__builtin_choose_expr(((__x) & 0xff), \
-			      (compile_ffs8((__x))), \
-			      (compile_ffs8((__x) >> 8) + 8))
-
-#define compile_ffs32(__x) \
-	__builtin_choose_expr(((__x) & 0xffff), \
-			      (compile_ffs16((__x))), \
-			      (compile_ffs16((__x) >> 16) + 16))
-
-/*
- * This macro will check the requirements for the FIELD{8,16,32} macros
- * The mask should be a constant non-zero contiguous set of bits which
- * does not exceed the given typelimit.
- */
-#define FIELD_CHECK(__mask) \
-	BUILD_BUG_ON(!(__mask) || !is_valid_mask(__mask))
-
-#define MT76_SET(_mask, _val)						\
-	({								\
-		FIELD_CHECK(_mask);					\
-		(((u32) (_val)) << compile_ffs32(_mask)) & _mask;	\
-	})
-
-#define MT76_GET(_mask, _val)						\
-	({								\
-		FIELD_CHECK(_mask);					\
-		(u32) (((_val) & _mask) >> compile_ffs32(_mask));	\
-	})
-
-#endif
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c
index 7cf26c6..6005e14 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c
@@ -831,8 +831,10 @@
 	rt2x00dev->anchor = devm_kmalloc(&usb_dev->dev,
 					sizeof(struct usb_anchor),
 					GFP_KERNEL);
-	if (!rt2x00dev->anchor)
+	if (!rt2x00dev->anchor) {
+		retval = -ENOMEM;
 		goto exit_free_reg;
+	}
 
 	init_usb_anchor(rt2x00dev->anchor);
 	return 0;
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
index 4341d56..10166289 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h
@@ -29,6 +29,7 @@
 #define RTL8XXXU_DEBUG_H2C		0x800
 #define RTL8XXXU_DEBUG_ACTION		0x1000
 #define RTL8XXXU_DEBUG_EFUSE		0x2000
+#define RTL8XXXU_DEBUG_INTERRUPT	0x4000
 
 #define RTW_USB_CONTROL_MSG_TIMEOUT	500
 #define RTL8XXXU_MAX_REG_POLL		500
@@ -43,6 +44,7 @@
 
 #define TX_TOTAL_PAGE_NUM		0xf8
 #define TX_TOTAL_PAGE_NUM_8192E		0xf3
+#define TX_TOTAL_PAGE_NUM_8723B		0xf7
 /* (HPQ + LPQ + NPQ + PUBQ) = TX_TOTAL_PAGE_NUM */
 #define TX_PAGE_NUM_PUBQ		0xe7
 #define TX_PAGE_NUM_HI_PQ		0x0c
@@ -54,6 +56,11 @@
 #define TX_PAGE_NUM_LO_PQ_8192E		0x0c
 #define TX_PAGE_NUM_NORM_PQ_8192E	0x00
 
+#define TX_PAGE_NUM_PUBQ_8723B		0xe7
+#define TX_PAGE_NUM_HI_PQ_8723B		0x0c
+#define TX_PAGE_NUM_LO_PQ_8723B		0x02
+#define TX_PAGE_NUM_NORM_PQ_8723B	0x02
+
 #define RTL_FW_PAGE_SIZE		4096
 #define RTL8XXXU_FIRMWARE_POLL_MAX	1000
 
@@ -1312,7 +1319,7 @@
 	int (*power_on) (struct rtl8xxxu_priv *priv);
 	void (*power_off) (struct rtl8xxxu_priv *priv);
 	void (*reset_8051) (struct rtl8xxxu_priv *priv);
-	int (*llt_init) (struct rtl8xxxu_priv *priv, u8 last_tx_page);
+	int (*llt_init) (struct rtl8xxxu_priv *priv);
 	void (*init_phy_bb) (struct rtl8xxxu_priv *priv);
 	int (*init_phy_rf) (struct rtl8xxxu_priv *priv);
 	void (*phy_init_antenna_selection) (struct rtl8xxxu_priv *priv);
@@ -1330,11 +1337,17 @@
 				  u32 ramask, int sgi);
 	void (*report_connect) (struct rtl8xxxu_priv *priv,
 				u8 macid, bool connect);
+	void (*fill_txdesc) (struct ieee80211_hdr *hdr,
+			     struct rtl8xxxu_txdesc32 *tx_desc, u32 rate,
+			     u16 rate_flag, bool sgi, bool short_preamble,
+			     bool ampdu_enable);
 	int writeN_block_size;
 	int rx_agg_buf_size;
 	char tx_desc_size;
 	char rx_desc_size;
-	char has_s0s1;
+	u8 has_s0s1:1;
+	u8 has_tx_report:1;
+	u8 gen2_thermal_meter:1;
 	u32 adda_1t_init;
 	u32 adda_1t_path_on;
 	u32 adda_2t_path_on_a;
@@ -1388,14 +1401,14 @@
 void rtl8xxxu_firmware_self_reset(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_power_off(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_reset_8051(struct rtl8xxxu_priv *priv);
-int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page);
+int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_gen2_prepare_calibrate(struct rtl8xxxu_priv *priv, u8 start);
 int rtl8xxxu_flush_fifo(struct rtl8xxxu_priv *priv);
 int rtl8xxxu_gen2_h2c_cmd(struct rtl8xxxu_priv *priv,
 			  struct h2c_cmd *h2c, int len);
 int rtl8xxxu_active_to_lps(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_disabled_to_emu(struct rtl8xxxu_priv *priv);
-int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page);
+int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_gen1_phy_iq_calibrate(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_gen1_init_phy_bb(struct rtl8xxxu_priv *priv);
 void rtl8xxxu_gen1_set_tx_power(struct rtl8xxxu_priv *priv,
@@ -1421,6 +1434,14 @@
 int rtl8xxxu_gen2_channel_to_group(int channel);
 bool rtl8xxxu_gen2_simularity_compare(struct rtl8xxxu_priv *priv,
 				      int result[][8], int c1, int c2);
+void rtl8xxxu_fill_txdesc_v1(struct ieee80211_hdr *hdr,
+			     struct rtl8xxxu_txdesc32 *tx_desc, u32 rate,
+			     u16 rate_flag, bool sgi, bool short_preamble,
+			     bool ampdu_enable);
+void rtl8xxxu_fill_txdesc_v2(struct ieee80211_hdr *hdr,
+			     struct rtl8xxxu_txdesc32 *tx_desc32, u32 rate,
+			     u16 rate_flag, bool sgi, bool short_preamble,
+			     bool ampdu_enable);
 
 extern struct rtl8xxxu_fileops rtl8192cu_fops;
 extern struct rtl8xxxu_fileops rtl8192eu_fops;
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
index 69d1a14..f9e2050 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c
@@ -567,6 +567,7 @@
 	.set_tx_power = rtl8xxxu_gen1_set_tx_power,
 	.update_rate_mask = rtl8xxxu_update_rate_mask,
 	.report_connect = rtl8xxxu_gen1_report_connect,
+	.fill_txdesc = rtl8xxxu_fill_txdesc_v1,
 	.writeN_block_size = 128,
 	.rx_agg_buf_size = 16000,
 	.tx_desc_size = sizeof(struct rtl8xxxu_txdesc32),
@@ -579,5 +580,9 @@
 	.pbp_rx = PBP_PAGE_SIZE_128,
 	.pbp_tx = PBP_PAGE_SIZE_128,
 	.mactable = rtl8xxxu_gen1_mac_init_table,
+	.total_page_num = TX_TOTAL_PAGE_NUM,
+	.page_num_hi = TX_PAGE_NUM_HI_PQ,
+	.page_num_lo = TX_PAGE_NUM_LO_PQ,
+	.page_num_norm = TX_PAGE_NUM_NORM_PQ,
 };
 #endif
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
index 9a1994f..df54d27 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
@@ -1396,6 +1396,114 @@
 	return ret;
 }
 
+static int rtl8192eu_active_to_lps(struct rtl8xxxu_priv *priv)
+{
+	struct device *dev = &priv->udev->dev;
+	u8 val8;
+	u16 val16;
+	u32 val32;
+	int retry, retval;
+
+	rtl8xxxu_write8(priv, REG_TXPAUSE, 0xff);
+
+	retry = 100;
+	retval = -EBUSY;
+	/*
+	 * Poll 32 bit wide 0x05f8 for 0x00000000 to ensure no TX is pending.
+	 */
+	do {
+		val32 = rtl8xxxu_read32(priv, REG_SCH_TX_CMD);
+		if (!val32) {
+			retval = 0;
+			break;
+		}
+	} while (retry--);
+
+	if (!retry) {
+		dev_warn(dev, "Failed to flush TX queue\n");
+		retval = -EBUSY;
+		goto out;
+	}
+
+	/* Disable CCK and OFDM, clock gated */
+	val8 = rtl8xxxu_read8(priv, REG_SYS_FUNC);
+	val8 &= ~SYS_FUNC_BBRSTB;
+	rtl8xxxu_write8(priv, REG_SYS_FUNC, val8);
+
+	udelay(2);
+
+	/* Reset whole BB */
+	val8 = rtl8xxxu_read8(priv, REG_SYS_FUNC);
+	val8 &= ~SYS_FUNC_BB_GLB_RSTN;
+	rtl8xxxu_write8(priv, REG_SYS_FUNC, val8);
+
+	/* Reset MAC TRX */
+	val16 = rtl8xxxu_read16(priv, REG_CR);
+	val16 &= 0xff00;
+	val16 |= (CR_HCI_TXDMA_ENABLE | CR_HCI_RXDMA_ENABLE);
+	rtl8xxxu_write16(priv, REG_CR, val16);
+
+	val16 = rtl8xxxu_read16(priv, REG_CR);
+	val16 &= ~CR_SECURITY_ENABLE;
+	rtl8xxxu_write16(priv, REG_CR, val16);
+
+	val8 = rtl8xxxu_read8(priv, REG_DUAL_TSF_RST);
+	val8 |= DUAL_TSF_TX_OK;
+	rtl8xxxu_write8(priv, REG_DUAL_TSF_RST, val8);
+
+out:
+	return retval;
+}
+
+static int rtl8192eu_active_to_emu(struct rtl8xxxu_priv *priv)
+{
+	u8 val8;
+	int count, ret = 0;
+
+	/* Turn off RF */
+	rtl8xxxu_write8(priv, REG_RF_CTRL, 0);
+
+	/* Switch DPDT_SEL_P output from register 0x65[2] */
+	val8 = rtl8xxxu_read8(priv, REG_LEDCFG2);
+	val8 &= ~LEDCFG2_DPDT_SELECT;
+	rtl8xxxu_write8(priv, REG_LEDCFG2, val8);
+
+	/* 0x0005[1] = 1 turn off MAC by HW state machine*/
+	val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1);
+	val8 |= BIT(1);
+	rtl8xxxu_write8(priv, REG_APS_FSMCO + 1, val8);
+
+	for (count = RTL8XXXU_MAX_REG_POLL; count; count--) {
+		val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1);
+		if ((val8 & BIT(1)) == 0)
+			break;
+		udelay(10);
+	}
+
+	if (!count) {
+		dev_warn(&priv->udev->dev, "%s: Disabling MAC timed out\n",
+			 __func__);
+		ret = -EBUSY;
+		goto exit;
+	}
+
+exit:
+	return ret;
+}
+
+static int rtl8192eu_emu_to_disabled(struct rtl8xxxu_priv *priv)
+{
+	u8 val8;
+
+	/* 0x04[12:11] = 01 enable WL suspend */
+	val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1);
+	val8 &= ~(BIT(3) | BIT(4));
+	val8 |= BIT(3);
+	rtl8xxxu_write8(priv, REG_APS_FSMCO + 1, val8);
+
+	return 0;
+}
+
 static int rtl8192eu_power_on(struct rtl8xxxu_priv *priv)
 {
 	u16 val16;
@@ -1446,6 +1554,40 @@
 	return ret;
 }
 
+void rtl8192eu_power_off(struct rtl8xxxu_priv *priv)
+{
+	u8 val8;
+	u16 val16;
+
+	rtl8xxxu_flush_fifo(priv);
+
+	val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL);
+	val8 &= ~TX_REPORT_CTRL_TIMER_ENABLE;
+	rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8);
+
+	/* Turn off RF */
+	rtl8xxxu_write8(priv, REG_RF_CTRL, 0x00);
+
+	rtl8192eu_active_to_lps(priv);
+
+	/* Reset Firmware if running in RAM */
+	if (rtl8xxxu_read8(priv, REG_MCU_FW_DL) & MCU_FW_RAM_SEL)
+		rtl8xxxu_firmware_self_reset(priv);
+
+	/* Reset MCU */
+	val16 = rtl8xxxu_read16(priv, REG_SYS_FUNC);
+	val16 &= ~SYS_FUNC_CPU_ENABLE;
+	rtl8xxxu_write16(priv, REG_SYS_FUNC, val16);
+
+	/* Reset MCU ready status */
+	rtl8xxxu_write8(priv, REG_MCU_FW_DL, 0x00);
+
+	rtl8xxxu_reset_8051(priv);
+
+	rtl8192eu_active_to_emu(priv);
+	rtl8192eu_emu_to_disabled(priv);
+}
+
 static void rtl8192e_enable_rf(struct rtl8xxxu_priv *priv)
 {
 	u32 val32;
@@ -1487,7 +1629,7 @@
 	.parse_efuse = rtl8192eu_parse_efuse,
 	.load_firmware = rtl8192eu_load_firmware,
 	.power_on = rtl8192eu_power_on,
-	.power_off = rtl8xxxu_power_off,
+	.power_off = rtl8192eu_power_off,
 	.reset_8051 = rtl8xxxu_reset_8051,
 	.llt_init = rtl8xxxu_auto_llt_table,
 	.init_phy_bb = rtl8192eu_init_phy_bb,
@@ -1501,10 +1643,12 @@
 	.set_tx_power = rtl8192e_set_tx_power,
 	.update_rate_mask = rtl8xxxu_gen2_update_rate_mask,
 	.report_connect = rtl8xxxu_gen2_report_connect,
+	.fill_txdesc = rtl8xxxu_fill_txdesc_v2,
 	.writeN_block_size = 128,
 	.tx_desc_size = sizeof(struct rtl8xxxu_txdesc40),
 	.rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24),
 	.has_s0s1 = 0,
+	.gen2_thermal_meter = 1,
 	.adda_1t_init = 0x0fc01616,
 	.adda_1t_path_on = 0x0fc01616,
 	.adda_2t_path_on_a = 0x0fc01616,
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
index 686c551..aef3730 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c
@@ -384,6 +384,7 @@
 	.set_tx_power = rtl8xxxu_gen1_set_tx_power,
 	.update_rate_mask = rtl8xxxu_update_rate_mask,
 	.report_connect = rtl8xxxu_gen1_report_connect,
+	.fill_txdesc = rtl8xxxu_fill_txdesc_v1,
 	.writeN_block_size = 1024,
 	.rx_agg_buf_size = 16000,
 	.tx_desc_size = sizeof(struct rtl8xxxu_txdesc32),
@@ -396,4 +397,8 @@
 	.pbp_rx = PBP_PAGE_SIZE_128,
 	.pbp_tx = PBP_PAGE_SIZE_128,
 	.mactable = rtl8xxxu_gen1_mac_init_table,
+	.total_page_num = TX_TOTAL_PAGE_NUM,
+	.page_num_hi = TX_PAGE_NUM_HI_PQ,
+	.page_num_lo = TX_PAGE_NUM_LO_PQ,
+	.page_num_norm = TX_PAGE_NUM_NORM_PQ,
 };
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
index 9d45afb..6c086b5 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c
@@ -1662,10 +1662,13 @@
 	.set_tx_power = rtl8723b_set_tx_power,
 	.update_rate_mask = rtl8xxxu_gen2_update_rate_mask,
 	.report_connect = rtl8xxxu_gen2_report_connect,
+	.fill_txdesc = rtl8xxxu_fill_txdesc_v2,
 	.writeN_block_size = 1024,
 	.tx_desc_size = sizeof(struct rtl8xxxu_txdesc40),
 	.rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24),
 	.has_s0s1 = 1,
+	.has_tx_report = 1,
+	.gen2_thermal_meter = 1,
 	.adda_1t_init = 0x01c00014,
 	.adda_1t_path_on = 0x01c00014,
 	.adda_2t_path_on_a = 0x01c00014,
@@ -1674,4 +1677,8 @@
 	.pbp_rx = PBP_PAGE_SIZE_256,
 	.pbp_tx = PBP_PAGE_SIZE_256,
 	.mactable = rtl8723b_mac_init_table,
+	.total_page_num = TX_TOTAL_PAGE_NUM_8723B,
+	.page_num_hi = TX_PAGE_NUM_HI_PQ_8723B,
+	.page_num_lo = TX_PAGE_NUM_LO_PQ_8723B,
+	.page_num_norm = TX_PAGE_NUM_NORM_PQ_8723B,
 };
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
index 77048db..b2d7f6e 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c
@@ -894,7 +894,7 @@
 	return retval;
 }
 
-int
+static int
 rtl8xxxu_gen1_h2c_cmd(struct rtl8xxxu_priv *priv, struct h2c_cmd *h2c, int len)
 {
 	struct device *dev = &priv->udev->dev;
@@ -2472,10 +2472,13 @@
 	return ret;
 }
 
-int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page)
+int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv)
 {
 	int ret;
 	int i;
+	u8 last_tx_page;
+
+	last_tx_page = priv->fops->total_page_num;
 
 	for (i = 0; i < last_tx_page; i++) {
 		ret = rtl8xxxu_llt_write(priv, i, i + 1);
@@ -2503,7 +2506,7 @@
 	return ret;
 }
 
-int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page)
+int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv)
 {
 	u32 val32;
 	int ret = 0;
@@ -3847,28 +3850,6 @@
 	rtl8xxxu_write32(priv, REG_RX_WAIT_CCA, val32);
 }
 
-static void rtl8xxxu_old_init_queue_reserved_page(struct rtl8xxxu_priv *priv)
-{
-	u8 val8;
-	u32 val32;
-
-	if (priv->ep_tx_normal_queue)
-		val8 = TX_PAGE_NUM_NORM_PQ;
-	else
-		val8 = 0;
-
-	rtl8xxxu_write8(priv, REG_RQPN_NPQ, val8);
-
-	val32 = (TX_PAGE_NUM_PUBQ << RQPN_PUB_PQ_SHIFT) | RQPN_LOAD;
-
-	if (priv->ep_tx_high_queue)
-		val32 |= (TX_PAGE_NUM_HI_PQ << RQPN_HI_PQ_SHIFT);
-	if (priv->ep_tx_low_queue)
-		val32 |= (TX_PAGE_NUM_LO_PQ << RQPN_LO_PQ_SHIFT);
-
-	rtl8xxxu_write32(priv, REG_RQPN, val32);
-}
-
 static void rtl8xxxu_init_queue_reserved_page(struct rtl8xxxu_priv *priv)
 {
 	struct rtl8xxxu_fileops *fops = priv->fops;
@@ -3891,7 +3872,7 @@
 	val32 = (nq << RQPN_NPQ_SHIFT) | (eq << RQPN_EPQ_SHIFT);
 	rtl8xxxu_write32(priv, REG_RQPN_NPQ, val32);
 
-	pubq = fops->total_page_num - hq - lq - nq;
+	pubq = fops->total_page_num - hq - lq - nq - 1;
 
 	val32 = RQPN_LOAD;
 	val32 |= (hq << RQPN_HI_PQ_SHIFT);
@@ -3905,6 +3886,7 @@
 {
 	struct rtl8xxxu_priv *priv = hw->priv;
 	struct device *dev = &priv->udev->dev;
+	struct rtl8xxxu_fileops *fops = priv->fops;
 	bool macpower;
 	int ret;
 	u8 val8;
@@ -3923,18 +3905,14 @@
 	else
 		macpower = true;
 
-	ret = priv->fops->power_on(priv);
+	ret = fops->power_on(priv);
 	if (ret < 0) {
 		dev_warn(dev, "%s: Failed power on\n", __func__);
 		goto exit;
 	}
 
-	if (!macpower) {
-		if (priv->fops->total_page_num)
-			rtl8xxxu_init_queue_reserved_page(priv);
-		else
-			rtl8xxxu_old_init_queue_reserved_page(priv);
-	}
+	if (!macpower)
+		rtl8xxxu_init_queue_reserved_page(priv);
 
 	ret = rtl8xxxu_init_queue_priority(priv);
 	dev_dbg(dev, "%s: init_queue_priority %i\n", __func__, ret);
@@ -3944,19 +3922,19 @@
 	/*
 	 * Set RX page boundary
 	 */
-	rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, priv->fops->trxff_boundary);
+	rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, fops->trxff_boundary);
 
 	ret = rtl8xxxu_download_firmware(priv);
-	dev_dbg(dev, "%s: download_fiwmare %i\n", __func__, ret);
+	dev_dbg(dev, "%s: download_firmware %i\n", __func__, ret);
 	if (ret)
 		goto exit;
 	ret = rtl8xxxu_start_firmware(priv);
-	dev_dbg(dev, "%s: start_fiwmare %i\n", __func__, ret);
+	dev_dbg(dev, "%s: start_firmware %i\n", __func__, ret);
 	if (ret)
 		goto exit;
 
-	if (priv->fops->phy_init_antenna_selection)
-		priv->fops->phy_init_antenna_selection(priv);
+	if (fops->phy_init_antenna_selection)
+		fops->phy_init_antenna_selection(priv);
 
 	ret = rtl8xxxu_init_mac(priv);
 
@@ -3969,7 +3947,7 @@
 	if (ret)
 		goto exit;
 
-	ret = priv->fops->init_phy_rf(priv);
+	ret = fops->init_phy_rf(priv);
 	if (ret)
 		goto exit;
 
@@ -3994,13 +3972,7 @@
 		/*
 		 * Set TX buffer boundary
 		 */
-		if (priv->rtl_chip == RTL8192E)
-			val8 = TX_TOTAL_PAGE_NUM_8192E + 1;
-		else
-			val8 = TX_TOTAL_PAGE_NUM + 1;
-
-		if (priv->rtl_chip == RTL8723B)
-			val8 -= 1;
+		val8 = fops->total_page_num + 1;
 
 		rtl8xxxu_write8(priv, REG_TXPKTBUF_BCNQ_BDNY, val8);
 		rtl8xxxu_write8(priv, REG_TXPKTBUF_MGQ_BDNY, val8);
@@ -4013,14 +3985,14 @@
 	 * The vendor drivers set PBP for all devices, except 8192e.
 	 * There is no explanation for this in any of the sources.
 	 */
-	val8 = (priv->fops->pbp_rx << PBP_PAGE_SIZE_RX_SHIFT) |
-		(priv->fops->pbp_tx << PBP_PAGE_SIZE_TX_SHIFT);
+	val8 = (fops->pbp_rx << PBP_PAGE_SIZE_RX_SHIFT) |
+		(fops->pbp_tx << PBP_PAGE_SIZE_TX_SHIFT);
 	if (priv->rtl_chip != RTL8192E)
 		rtl8xxxu_write8(priv, REG_PBP, val8);
 
 	dev_dbg(dev, "%s: macpower %i\n", __func__, macpower);
 	if (!macpower) {
-		ret = priv->fops->llt_init(priv, TX_TOTAL_PAGE_NUM);
+		ret = fops->llt_init(priv);
 		if (ret) {
 			dev_warn(dev, "%s: LLT table init failed\n", __func__);
 			goto exit;
@@ -4029,13 +4001,12 @@
 		/*
 		 * Chip specific quirks
 		 */
-		priv->fops->usb_quirks(priv);
+		fops->usb_quirks(priv);
 
 		/*
-		 * Presumably this is for 8188EU as well
-		 * Enable TX report and TX report timer
+		 * Enable TX report and TX report timer for 8723bu/8188eu/...
 		 */
-		if (priv->rtl_chip == RTL8723B) {
+		if (fops->has_tx_report) {
 			val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL);
 			val8 |= TX_REPORT_CTRL_TIMER_ENABLE;
 			rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8);
@@ -4170,8 +4141,8 @@
 		rtl8xxxu_write8(priv, REG_RSV_CTRL, val8);
 	}
 
-	if (priv->fops->init_aggregation)
-		priv->fops->init_aggregation(priv);
+	if (fops->init_aggregation)
+		fops->init_aggregation(priv);
 
 	/*
 	 * Enable CCK and OFDM block
@@ -4188,7 +4159,7 @@
 	/*
 	 * Start out with default power levels for channel 6, 20MHz
 	 */
-	priv->fops->set_tx_power(priv, 1, false);
+	fops->set_tx_power(priv, 1, false);
 
 	/* Let the 8051 take control of antenna setting */
 	if (priv->rtl_chip != RTL8192E) {
@@ -4204,8 +4175,8 @@
 
 	rtl8xxxu_write16(priv, REG_FAST_EDCA_CTRL, 0);
 
-	if (priv->fops->init_statistics)
-		priv->fops->init_statistics(priv);
+	if (fops->init_statistics)
+		fops->init_statistics(priv);
 
 	if (priv->rtl_chip == RTL8192E) {
 		/*
@@ -4223,12 +4194,12 @@
 
 	rtl8723a_phy_lc_calibrate(priv);
 
-	priv->fops->phy_iq_calibrate(priv);
+	fops->phy_iq_calibrate(priv);
 
 	/*
 	 * This should enable thermal meter
 	 */
-	if (priv->fops->tx_desc_size == sizeof(struct rtl8xxxu_txdesc40))
+	if (fops->gen2_thermal_meter)
 		rtl8xxxu_write_rfreg(priv,
 				     RF_A, RF6052_REG_T_METER_8723B, 0x37cf8);
 	else
@@ -4783,6 +4754,113 @@
 	}
 }
 
+/*
+ * Fill in v1 (gen1) specific TX descriptor bits.
+ * This format is used on 8188cu/8192cu/8723au
+ */
+void
+rtl8xxxu_fill_txdesc_v1(struct ieee80211_hdr *hdr,
+			struct rtl8xxxu_txdesc32 *tx_desc, u32 rate,
+			u16 rate_flag, bool sgi, bool short_preamble,
+			bool ampdu_enable)
+{
+	u16 seq_number;
+
+	seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+
+	tx_desc->txdw5 = cpu_to_le32(rate);
+
+	if (ieee80211_is_data(hdr->frame_control))
+		tx_desc->txdw5 |= cpu_to_le32(0x0001ff00);
+
+	tx_desc->txdw3 = cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT);
+
+	if (ampdu_enable)
+		tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE);
+	else
+		tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK);
+
+	if (ieee80211_is_mgmt(hdr->frame_control)) {
+		tx_desc->txdw5 = cpu_to_le32(rate);
+		tx_desc->txdw4 |= cpu_to_le32(TXDESC32_USE_DRIVER_RATE);
+		tx_desc->txdw5 |= cpu_to_le32(6 << TXDESC32_RETRY_LIMIT_SHIFT);
+		tx_desc->txdw5 |= cpu_to_le32(TXDESC32_RETRY_LIMIT_ENABLE);
+	}
+
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS);
+
+	if (short_preamble)
+		tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE);
+
+	if (sgi)
+		tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI);
+
+	if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) {
+		/*
+		 * Use RTS rate 24M - does the mac80211 tell
+		 * us which to use?
+		 */
+		tx_desc->txdw4 |= cpu_to_le32(DESC_RATE_24M <<
+					      TXDESC32_RTS_RATE_SHIFT);
+		tx_desc->txdw4 |= cpu_to_le32(TXDESC32_RTS_CTS_ENABLE);
+		tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE);
+	}
+}
+
+/*
+ * Fill in v2 (gen2) specific TX descriptor bits.
+ * This format is used on 8192eu/8723bu
+ */
+void
+rtl8xxxu_fill_txdesc_v2(struct ieee80211_hdr *hdr,
+			struct rtl8xxxu_txdesc32 *tx_desc32, u32 rate,
+			u16 rate_flag, bool sgi, bool short_preamble,
+			bool ampdu_enable)
+{
+	struct rtl8xxxu_txdesc40 *tx_desc40;
+	u16 seq_number;
+
+	tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc32;
+
+	seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+
+	tx_desc40->txdw4 = cpu_to_le32(rate);
+	if (ieee80211_is_data(hdr->frame_control)) {
+		tx_desc40->txdw4 |= cpu_to_le32(0x1f <<
+						TXDESC40_DATA_RATE_FB_SHIFT);
+	}
+
+	tx_desc40->txdw9 = cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT);
+
+	if (ampdu_enable)
+		tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE);
+	else
+		tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK);
+
+	if (ieee80211_is_mgmt(hdr->frame_control)) {
+		tx_desc40->txdw4 = cpu_to_le32(rate);
+		tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_USE_DRIVER_RATE);
+		tx_desc40->txdw4 |=
+			cpu_to_le32(6 << TXDESC40_RETRY_LIMIT_SHIFT);
+		tx_desc40->txdw4 |= cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE);
+	}
+
+	if (short_preamble)
+		tx_desc40->txdw5 |= cpu_to_le32(TXDESC40_SHORT_PREAMBLE);
+
+	if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) {
+		/*
+		 * Use RTS rate 24M - does the mac80211 tell
+		 * us which to use?
+		 */
+		tx_desc40->txdw4 |= cpu_to_le32(DESC_RATE_24M <<
+						TXDESC40_RTS_RATE_SHIFT);
+		tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE);
+		tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE);
+	}
+}
+
 static void rtl8xxxu_tx(struct ieee80211_hw *hw,
 			struct ieee80211_tx_control *control,
 			struct sk_buff *skb)
@@ -4792,7 +4870,6 @@
 	struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info);
 	struct rtl8xxxu_priv *priv = hw->priv;
 	struct rtl8xxxu_txdesc32 *tx_desc;
-	struct rtl8xxxu_txdesc40 *tx_desc40;
 	struct rtl8xxxu_tx_urb *tx_urb;
 	struct ieee80211_sta *sta = NULL;
 	struct ieee80211_vif *vif = tx_info->control.vif;
@@ -4803,7 +4880,7 @@
 	u16 rate_flag = tx_info->control.rates[0].flags;
 	int tx_desc_size = priv->fops->tx_desc_size;
 	int ret;
-	bool usedesc40, ampdu_enable;
+	bool usedesc40, ampdu_enable, sgi = false, short_preamble = false;
 
 	if (skb_headroom(skb) < tx_desc_size) {
 		dev_warn(dev,
@@ -4881,107 +4958,26 @@
 		}
 	}
 
-	if (rate_flag & IEEE80211_TX_RC_MCS)
+	if (rate_flag & IEEE80211_TX_RC_MCS &&
+	    !ieee80211_is_mgmt(hdr->frame_control))
 		rate = tx_info->control.rates[0].idx + DESC_RATE_MCS0;
 	else
 		rate = tx_rate->hw_value;
 
+	if (rate_flag & IEEE80211_TX_RC_SHORT_GI ||
+	    (ieee80211_is_data_qos(hdr->frame_control) &&
+	     sta && sta->ht_cap.cap &
+	     (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20)))
+		sgi = true;
+
+	if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE ||
+	    (sta && vif && vif->bss_conf.use_short_preamble))
+		short_preamble = true;
+
 	seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
-	if (!usedesc40) {
-		tx_desc->txdw5 = cpu_to_le32(rate);
 
-		if (ieee80211_is_data(hdr->frame_control))
-			tx_desc->txdw5 |= cpu_to_le32(0x0001ff00);
-
-		tx_desc->txdw3 =
-			cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT);
-
-		if (ampdu_enable)
-			tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE);
-		else
-			tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK);
-
-		if (ieee80211_is_mgmt(hdr->frame_control)) {
-			tx_desc->txdw5 = cpu_to_le32(tx_rate->hw_value);
-			tx_desc->txdw4 |=
-				cpu_to_le32(TXDESC32_USE_DRIVER_RATE);
-			tx_desc->txdw5 |=
-				cpu_to_le32(6 << TXDESC32_RETRY_LIMIT_SHIFT);
-			tx_desc->txdw5 |=
-				cpu_to_le32(TXDESC32_RETRY_LIMIT_ENABLE);
-		}
-
-		if (ieee80211_is_data_qos(hdr->frame_control))
-			tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS);
-
-		if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE ||
-		    (sta && vif && vif->bss_conf.use_short_preamble))
-			tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE);
-
-		if (rate_flag & IEEE80211_TX_RC_SHORT_GI ||
-		    (ieee80211_is_data_qos(hdr->frame_control) &&
-		     sta && sta->ht_cap.cap &
-		     (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) {
-			tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI);
-		}
-
-		if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) {
-			/*
-			 * Use RTS rate 24M - does the mac80211 tell
-			 * us which to use?
-			 */
-			tx_desc->txdw4 |=
-				cpu_to_le32(DESC_RATE_24M <<
-					    TXDESC32_RTS_RATE_SHIFT);
-			tx_desc->txdw4 |=
-				cpu_to_le32(TXDESC32_RTS_CTS_ENABLE);
-			tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE);
-		}
-	} else {
-		tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc;
-
-		tx_desc40->txdw4 = cpu_to_le32(rate);
-		if (ieee80211_is_data(hdr->frame_control)) {
-			tx_desc->txdw4 |=
-				cpu_to_le32(0x1f <<
-					    TXDESC40_DATA_RATE_FB_SHIFT);
-		}
-
-		tx_desc40->txdw9 =
-			cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT);
-
-		if (ampdu_enable)
-			tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE);
-		else
-			tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK);
-
-		if (ieee80211_is_mgmt(hdr->frame_control)) {
-			tx_desc40->txdw4 = cpu_to_le32(tx_rate->hw_value);
-			tx_desc40->txdw3 |=
-				cpu_to_le32(TXDESC40_USE_DRIVER_RATE);
-			tx_desc40->txdw4 |=
-				cpu_to_le32(6 << TXDESC40_RETRY_LIMIT_SHIFT);
-			tx_desc40->txdw4 |=
-				cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE);
-		}
-
-		if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE ||
-		    (sta && vif && vif->bss_conf.use_short_preamble))
-			tx_desc40->txdw5 |=
-				cpu_to_le32(TXDESC40_SHORT_PREAMBLE);
-
-		if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) {
-			/*
-			 * Use RTS rate 24M - does the mac80211 tell
-			 * us which to use?
-			 */
-			tx_desc->txdw4 |=
-				cpu_to_le32(DESC_RATE_24M <<
-					    TXDESC40_RTS_RATE_SHIFT);
-			tx_desc->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE);
-			tx_desc->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE);
-		}
-	}
+	priv->fops->fill_txdesc(hdr, tx_desc, rate, rate_flag,
+				sgi, short_preamble, ampdu_enable);
 
 	rtl8xxxu_calc_tx_desc_csum(tx_desc);
 
@@ -5379,7 +5375,8 @@
 	struct device *dev = &priv->udev->dev;
 	int ret;
 
-	dev_dbg(dev, "%s: status %i\n", __func__, urb->status);
+	if (rtl8xxxu_debug & RTL8XXXU_DEBUG_INTERRUPT)
+		dev_dbg(dev, "%s: status %i\n", __func__, urb->status);
 	if (urb->status == 0) {
 		usb_anchor_urb(urb, &priv->int_anchor);
 		ret = usb_submit_urb(urb, GFP_ATOMIC);
@@ -5704,7 +5701,7 @@
 
 	switch (action) {
 	case IEEE80211_AMPDU_TX_START:
-		dev_info(dev, "%s: IEEE80211_AMPDU_TX_START\n", __func__);
+		dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_START\n", __func__);
 		ampdu_factor = sta->ht_cap.ampdu_factor;
 		ampdu_density = sta->ht_cap.ampdu_density;
 		rtl8xxxu_set_ampdu_factor(priv, ampdu_factor);
@@ -5714,21 +5711,21 @@
 			ampdu_factor, ampdu_density);
 		break;
 	case IEEE80211_AMPDU_TX_STOP_FLUSH:
-		dev_info(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__);
+		dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__);
 		rtl8xxxu_set_ampdu_factor(priv, 0);
 		rtl8xxxu_set_ampdu_min_space(priv, 0);
 		break;
 	case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
-		dev_info(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n",
+		dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n",
 			 __func__);
 		rtl8xxxu_set_ampdu_factor(priv, 0);
 		rtl8xxxu_set_ampdu_min_space(priv, 0);
 		break;
 	case IEEE80211_AMPDU_RX_START:
-		dev_info(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__);
+		dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__);
 		break;
 	case IEEE80211_AMPDU_RX_STOP:
-		dev_info(dev, "%s: IEEE80211_AMPDU_RX_STOP\n", __func__);
+		dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_STOP\n", __func__);
 		break;
 	default:
 		break;
@@ -5947,7 +5944,7 @@
 	struct ieee80211_hw *hw;
 	struct usb_device *udev;
 	struct ieee80211_supported_band *sband;
-	int ret = 0;
+	int ret;
 	int untested = 1;
 
 	udev = usb_get_dev(interface_to_usbdev(interface));
@@ -5971,6 +5968,18 @@
 		if (id->idProduct == 0x1004)
 			untested = 0;
 		break;
+	case 0x20f4:
+		if (id->idProduct == 0x648b)
+			untested = 0;
+		break;
+	case 0x2001:
+		if (id->idProduct == 0x3308)
+			untested = 0;
+		break;
+	case 0x2357:
+		if (id->idProduct == 0x0109)
+			untested = 0;
+		break;
 	default:
 		break;
 	}
@@ -5987,6 +5996,7 @@
 	hw = ieee80211_alloc_hw(sizeof(struct rtl8xxxu_priv), &rtl8xxxu_ops);
 	if (!hw) {
 		ret = -ENOMEM;
+		priv = NULL;
 		goto exit;
 	}
 
@@ -6035,6 +6045,8 @@
 	}
 
 	ret = rtl8xxxu_init_device(hw);
+	if (ret)
+		goto exit;
 
 	hw->wiphy->max_scan_ssids = 1;
 	hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
@@ -6085,9 +6097,20 @@
 		goto exit;
 	}
 
+	return 0;
+
 exit:
-	if (ret < 0)
-		usb_put_dev(udev);
+	usb_set_intfdata(interface, NULL);
+
+	if (priv) {
+		kfree(priv->fw_data);
+		mutex_destroy(&priv->usb_buf_mutex);
+		mutex_destroy(&priv->h2c_mutex);
+	}
+	usb_put_dev(udev);
+
+	ieee80211_free_hw(hw);
+
 	return ret;
 }
 
@@ -6111,6 +6134,11 @@
 	mutex_destroy(&priv->usb_buf_mutex);
 	mutex_destroy(&priv->h2c_mutex);
 
+	if (priv->udev->state != USB_STATE_NOTATTACHED) {
+		dev_info(&priv->udev->dev,
+			 "Device still attached, trying to reset\n");
+		usb_reset_device(priv->udev);
+	}
 	usb_put_dev(priv->udev);
 	ieee80211_free_hw(hw);
 }
@@ -6124,6 +6152,9 @@
 	.driver_info = (unsigned long)&rtl8723au_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x818b, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8192eu_fops},
+/* Tested by Myckel Habets */
+{USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0109, 0xff, 0xff, 0xff),
+	.driver_info = (unsigned long)&rtl8192eu_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0xb720, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8723bu_fops},
 #ifdef CONFIG_RTL8XXXU_UNTESTED
@@ -6140,6 +6171,12 @@
 /* Tested by Andrea Merello */
 {USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x1004, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8192cu_fops},
+/* Tested by Jocelyn Mayer */
+{USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff),
+	.driver_info = (unsigned long)&rtl8192cu_fops},
+/* Tested by Stefano Bravi */
+{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3308, 0xff, 0xff, 0xff),
+	.driver_info = (unsigned long)&rtl8192cu_fops},
 /* Currently untested 8188 series devices */
 {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8191, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8192cu_fops},
@@ -6187,8 +6224,6 @@
 	.driver_info = (unsigned long)&rtl8192cu_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(0x13d3, 0x3357, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8192cu_fops},
-{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3308, 0xff, 0xff, 0xff),
-	.driver_info = (unsigned long)&rtl8192cu_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x330b, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8192cu_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0x4902, 0xff, 0xff, 0xff),
@@ -6199,8 +6234,6 @@
 	.driver_info = (unsigned long)&rtl8192cu_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0xed17, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8192cu_fops},
-{USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff),
-	.driver_info = (unsigned long)&rtl8192cu_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(0x4855, 0x0090, 0xff, 0xff, 0xff),
 	.driver_info = (unsigned long)&rtl8192cu_fops},
 {USB_DEVICE_AND_INTERFACE_INFO(0x4856, 0x0091, 0xff, 0xff, 0xff),
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
index 921c565..315ccfb 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h
@@ -213,10 +213,66 @@
 #define REG_HMBOX_EXT_1			0x008a
 #define REG_HMBOX_EXT_2			0x008c
 #define REG_HMBOX_EXT_3			0x008e
+
 /* Interrupt registers for 8192e/8723bu/8812 */
 #define REG_HIMR0			0x00b0
+#define	 IMR0_TXCCK			BIT(30)	/* TXRPT interrupt when CCX bit
+						   of the packet is set */
+#define	 IMR0_PSTIMEOUT			BIT(29)	/* Power Save Time Out Int */
+#define	 IMR0_GTINT4			BIT(28)	/* Set when GTIMER4 expires */
+#define	 IMR0_GTINT3			BIT(27)	/* Set when GTIMER3 expires */
+#define	 IMR0_TBDER			BIT(26)	/* Transmit Beacon0 Error */
+#define	 IMR0_TBDOK			BIT(25)	/* Transmit Beacon0 OK */
+#define	 IMR0_TSF_BIT32_TOGGLE		BIT(24)	/* TSF Timer BIT32 toggle
+						   indication interrupt */
+#define	 IMR0_BCNDMAINT0		BIT(20)	/* Beacon DMA Interrupt 0 */
+#define	 IMR0_BCNDERR0			BIT(16)	/* Beacon Queue DMA Error 0 */
+#define	 IMR0_HSISR_IND_ON_INT		BIT(15)	/* HSISR Indicator (HSIMR &
+						   HSISR is true) */
+#define	 IMR0_BCNDMAINT_E		BIT(14)	/* Beacon DMA Interrupt
+						   Extension for Win7 */
+#define	 IMR0_ATIMEND			BIT(12)	/* CTWidnow End or
+						   ATIM Window End */
+#define	 IMR0_HISR1_IND_INT		BIT(11)	/* HISR1 Indicator
+						   (HISR1 & HIMR1 is true) */
+#define	 IMR0_C2HCMD			BIT(10)	/* CPU to Host Command INT
+						   Status, Write 1 to clear */
+#define	 IMR0_CPWM2			BIT(9)	/* CPU power Mode exchange INT
+						   Status, Write 1 to clear */
+#define	 IMR0_CPWM			BIT(8)	/* CPU power Mode exchange INT
+						   Status, Write 1 to clear */
+#define	 IMR0_HIGHDOK			BIT(7)	/* High Queue DMA OK */
+#define	 IMR0_MGNTDOK			BIT(6)	/* Management Queue DMA OK */
+#define	 IMR0_BKDOK			BIT(5)	/* AC_BK DMA OK */
+#define	 IMR0_BEDOK			BIT(4)	/* AC_BE DMA OK */
+#define	 IMR0_VIDOK			BIT(3)	/* AC_VI DMA OK */
+#define	 IMR0_VODOK			BIT(2)	/* AC_VO DMA OK */
+#define	 IMR0_RDU			BIT(1)	/* Rx Descriptor Unavailable */
+#define	 IMR0_ROK			BIT(0)	/* Receive DMA OK */
 #define REG_HISR0			0x00b4
 #define REG_HIMR1			0x00b8
+#define	 IMR1_BCNDMAINT7		BIT(27)	/* Beacon DMA Interrupt 7 */
+#define	 IMR1_BCNDMAINT6		BIT(26)	/* Beacon DMA Interrupt 6 */
+#define	 IMR1_BCNDMAINT5		BIT(25)	/* Beacon DMA Interrupt 5 */
+#define	 IMR1_BCNDMAINT4		BIT(24)	/* Beacon DMA Interrupt 4 */
+#define	 IMR1_BCNDMAINT3		BIT(23)	/* Beacon DMA Interrupt 3 */
+#define	 IMR1_BCNDMAINT2		BIT(22)	/* Beacon DMA Interrupt 2 */
+#define	 IMR1_BCNDMAINT1		BIT(21)	/* Beacon DMA Interrupt 1 */
+#define	 IMR1_BCNDERR7			BIT(20)	/* Beacon Queue DMA Err Int 7 */
+#define	 IMR1_BCNDERR6			BIT(19)	/* Beacon Queue DMA Err Int 6 */
+#define	 IMR1_BCNDERR5			BIT(18)	/* Beacon Queue DMA Err Int 5 */
+#define	 IMR1_BCNDERR4			BIT(17)	/* Beacon Queue DMA Err Int 4 */
+#define	 IMR1_BCNDERR3			BIT(16)	/* Beacon Queue DMA Err Int 3 */
+#define	 IMR1_BCNDERR2			BIT(15)	/* Beacon Queue DMA Err Int 2 */
+#define	 IMR1_BCNDERR1			BIT(14)	/* Beacon Queue DMA Err Int 1 */
+#define	 IMR1_ATIMEND_E			BIT(13)	/* ATIM Window End Extension
+						   for Win7 */
+#define	 IMR1_TXERR			BIT(11)	/* Tx Error Flag Int Status,
+						   write 1 to clear */
+#define	 IMR1_RXERR			BIT(10)	/* Rx Error Flag Int Status,
+						   write 1 to clear */
+#define	 IMR1_TXFOVW			BIT(9)	/* Transmit FIFO Overflow */
+#define	 IMR1_RXFOVW			BIT(8)	/* Receive FIFO Overflow */
 #define REG_HISR1			0x00bc
 
 /*  Host suspend counter on FPGA platform */
@@ -620,6 +676,7 @@
 #define REG_SCH_TXCMD			0x05d0
 
 /* define REG_FW_TSF_SYNC_CNT		0x04a0 */
+#define REG_SCH_TX_CMD			0x05f8
 #define REG_FW_RESET_TSF_CNT_1		0x05fc
 #define REG_FW_RESET_TSF_CNT_0		0x05fd
 #define REG_FW_BCN_DIS_CNT		0x05fe
@@ -780,6 +837,10 @@
 #define  FPGA_RF_MODE_OFDM		BIT(25)
 
 #define REG_FPGA0_TX_INFO		0x0804
+#define  FPGA0_TX_INFO_OFDM_PATH_A	BIT(0)
+#define  FPGA0_TX_INFO_OFDM_PATH_B	BIT(1)
+#define  FPGA0_TX_INFO_OFDM_PATH_C	BIT(2)
+#define  FPGA0_TX_INFO_OFDM_PATH_D	BIT(3)
 #define REG_FPGA0_PSD_FUNC		0x0808
 #define REG_FPGA0_TX_GAIN		0x080c
 #define REG_FPGA0_RF_TIMING1		0x0810
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index 41f77f8..f95760c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -765,7 +765,8 @@
 					mac->bw_40 = false;
 					mac->bw_80 = false;
 					RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-						 "switch case not processed\n");
+						 "switch case %#x not processed\n",
+						 channel_type);
 					break;
 			}
 		}
@@ -1135,7 +1136,7 @@
 					mac->mode = WIRELESS_MODE_AC_24G;
 			}
 
-			if (vif->type == NL80211_IFTYPE_STATION && sta)
+			if (vif->type == NL80211_IFTYPE_STATION)
 				rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0);
 			rcu_read_unlock();
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index d12586d..0dfa9ea 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -179,7 +179,8 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n",
+			 rtlpci->const_support_pciaspm);
 		break;
 	}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c
index 9a64f9b..18d979a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/ps.c
+++ b/drivers/net/wireless/realtek/rtlwifi/ps.c
@@ -151,7 +151,7 @@
 
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", state_toset);
 		break;
 	}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c
index 3524441..6ee6bf8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/regd.c
+++ b/drivers/net/wireless/realtek/rtlwifi/regd.c
@@ -345,9 +345,9 @@
 		return &rtl_regdom_no_midband;
 	case COUNTRY_CODE_IC:
 		return &rtl_regdom_11;
-	case COUNTRY_CODE_ETSI:
 	case COUNTRY_CODE_TELEC_NETGEAR:
 		return &rtl_regdom_60_64;
+	case COUNTRY_CODE_ETSI:
 	case COUNTRY_CODE_SPAIN:
 	case COUNTRY_CODE_FRANCE:
 	case COUNTRY_CODE_ISRAEL:
@@ -406,6 +406,8 @@
 		return COUNTRY_CODE_WORLD_WIDE_13;
 	case 0x22:
 		return COUNTRY_CODE_IC;
+	case 0x25:
+		return COUNTRY_CODE_ETSI;
 	case 0x32:
 		return COUNTRY_CODE_TELEC_NETGEAR;
 	case 0x41:
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c
index 6291256..5360d533 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c
@@ -334,7 +334,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", boxnum);
 			break;
 		}
 		isfw_read = _rtl88e_check_fw_read_last_h2c(hw, boxnum);
@@ -405,7 +405,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", cmd_len);
 			break;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
index 4ab6201..37d6efc 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
@@ -355,9 +355,11 @@
 
 		*((u64 *)(val)) = tsf;
 		break; }
+	case HAL_DEF_WOWLAN:
+		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process %x\n", variable);
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -571,7 +573,8 @@
 				break;
 			default:
 				RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-					 "switch case not process\n");
+					 "switch case %#x not processed\n",
+					 e_aci);
 				break;
 			}
 		}
@@ -735,7 +738,7 @@
 		break; }
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process %x\n", variable);
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -2352,7 +2355,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", enc_algo);
 			enc_algo = CAM_TKIP;
 			break;
 		}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c
index b504bd0..f05c2c6 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c
@@ -62,7 +62,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -100,7 +100,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c
index 7498a12..fffaa92 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c
@@ -1346,7 +1346,8 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n",
+				 currentcmd->cmdid);
 			break;
 		}
 
@@ -2128,7 +2129,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", iotype);
 			break;
 		}
 	} while (false);
@@ -2166,7 +2167,8 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n",
+			 rtlphy->current_io_type);
 		break;
 	}
 	rtlphy->set_io_inprogress = false;
@@ -2319,7 +2321,7 @@
 		}
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
index 47e32cb..e7b11b4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
@@ -280,7 +280,7 @@
 	.debug = DBG_EMERG,
 };
 
-static struct rtl_hal_cfg rtl88ee_hal_cfg = {
+static const struct rtl_hal_cfg rtl88ee_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl88e_pci",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c
index 43fcb25..7d15246 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c
@@ -352,7 +352,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", boxnum);
 			break;
 		}
 
@@ -456,7 +456,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", cmd_len);
 			break;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c
index 60ab2ec..27e3d5f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c
@@ -910,7 +910,8 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n",
+				 currentcmd->cmdid);
 			break;
 		}
 
@@ -1567,7 +1568,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", iotype);
 			break;
 		}
 	} while (false);
@@ -1605,7 +1606,8 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n",
+			 rtlphy->current_io_type);
 		break;
 	}
 	rtlphy->set_io_inprogress = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
index 2446079..a47be73 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
@@ -141,9 +141,11 @@
 
 		break;
 		}
+	case HAL_DEF_WOWLAN:
+		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -367,7 +369,8 @@
 					break;
 				default:
 					RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-						 "switch case not processed\n");
+						 "switch case %#x not processed\n",
+						 e_aci);
 					break;
 				}
 			}
@@ -2154,7 +2157,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not processed\n");
+				 "switch case %#x not processed\n", enc_algo);
 			enc_algo = CAM_TKIP;
 			break;
 		}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c
index 8283e9b..24e483b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c
@@ -62,7 +62,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -97,7 +97,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c
index 1ee5a6a..46d0d94 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c
@@ -300,12 +300,9 @@
 		}
 		break;
 	case RF90_PATH_C:
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
-		break;
 	case RF90_PATH_D:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", rfpath);
 		break;
 	default:
 		break;
@@ -554,7 +551,7 @@
 		}
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
index 4780bdc..87aa209 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
@@ -258,7 +258,7 @@
 	.debug = DBG_EMERG,
 };
 
-static struct rtl_hal_cfg rtl92ce_hal_cfg = {
+static const struct rtl_hal_cfg rtl92ce_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl92c_pci",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
index 8789752..ae8f055 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
@@ -1560,7 +1560,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -1931,7 +1931,7 @@
 		}
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c
index 75a2deb..8514ab65 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c
@@ -62,7 +62,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -95,7 +95,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c
index c972fa5..4b29764 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c
@@ -277,12 +277,9 @@
 		}
 		break;
 	case RF90_PATH_C:
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
-		break;
 	case RF90_PATH_D:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", rfpath);
 		break;
 	default:
 		break;
@@ -517,7 +514,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c
index 62ef820..8de29cc 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c
@@ -435,7 +435,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not processed\n");
+				 "switch case %#x not processed\n", boxnum);
 			break;
 		}
 		isfw_read = _rtl92d_check_fw_read_last_h2c(hw, boxnum);
@@ -512,7 +512,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not processed\n");
+				 "switch case %#x not processed\n", cmd_len);
 			break;
 		}
 		bwrite_success = true;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
index b0f6324..d91f8bb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
@@ -164,9 +164,11 @@
 	case HW_VAR_INT_AC:
 		*((bool *)(val)) = rtlpriv->dm.disable_tx_int;
 		break;
+	case HAL_DEF_WOWLAN:
+		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -361,7 +363,8 @@
 				break;
 			default:
 				RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-					 "switch case not processed\n");
+					 "switch case %#x not processed\n",
+					 e_aci);
 				break;
 			}
 		}
@@ -502,7 +505,7 @@
 	}
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -1757,7 +1760,7 @@
 		return;
 
 	if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params))
-		return;
+		goto exit;
 
 	_rtl92de_efuse_update_chip_version(hw);
 	_rtl92de_read_macphymode_and_bandtype(hw, hwinfo);
@@ -1790,6 +1793,7 @@
 		break;
 	}
 	rtlefuse->txpwr_fromeprom = true;
+exit:
 	kfree(hwinfo);
 }
 
@@ -2170,7 +2174,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not processed\n");
+				 "switch case %#x not processed\n", enc_algo);
 			enc_algo = CAM_TKIP;
 			break;
 		}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c
index 76a57ae..811ba57 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c
@@ -71,7 +71,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -106,7 +106,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
index d334d2a..2a1edfd 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c
@@ -588,7 +588,7 @@
 				 * setting. */
 				udelay(1);
 				RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-					 "The Rtl819XAGCTAB_Array_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n",
+					 "The Rtl819XAGCTAB_Array_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n",
 					 agctab_array_table[i],
 					 agctab_array_table[i + 1]);
 			}
@@ -604,7 +604,7 @@
 					 * setting. */
 					udelay(1);
 					RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-						 "The Rtl819XAGCTAB_Array_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n",
+						 "The Rtl819XAGCTAB_Array_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n",
 						 agctab_array_table[i],
 						 agctab_array_table[i + 1]);
 				}
@@ -620,7 +620,7 @@
 					 * setting. */
 					udelay(1);
 					RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
-						 "The Rtl819XAGCTAB_5GArray_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n",
+						 "The Rtl819XAGCTAB_5GArray_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n",
 						 agctab_5garray_table[i],
 						 agctab_5garray_table[i + 1]);
 				}
@@ -836,12 +836,9 @@
 		}
 		break;
 	case RF90_PATH_C:
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
-		break;
 	case RF90_PATH_D:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", rfpath);
 		break;
 	}
 	return true;
@@ -2850,7 +2847,8 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not processed\n");
+				 "switch case %#x not processed\n",
+				 currentcmd->cmdid);
 			break;
 		}
 		break;
@@ -2963,7 +2961,8 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n",
+			 rtlphy->current_io_type);
 		break;
 	}
 	rtlphy->set_io_inprogress = false;
@@ -2994,7 +2993,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not processed\n");
+				 "switch case %#x not processed\n", iotype);
 			break;
 		}
 	} while (false);
@@ -3182,7 +3181,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
index c6e09a1..0538a4d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
@@ -262,7 +262,7 @@
 	.debug = DBG_EMERG,
 };
 
-static struct rtl_hal_cfg rtl92de_hal_cfg = {
+static const struct rtl_hal_cfg rtl92de_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl8192de",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
index 0708eed..b3f6a9e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
@@ -344,7 +344,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", boxnum);
 			break;
 		}
 
@@ -433,7 +433,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", cmd_len);
 			break;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
index b07af8d..ebf663e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
@@ -338,9 +338,11 @@
 		*((u64 *)(val)) = tsf;
 		}
 		break;
+	case HAL_DEF_WOWLAN:
+		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG,
-			 "switch case not process %x\n", variable);
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -566,7 +568,8 @@
 				break;
 			default:
 				RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG,
-					 "switch case not process\n");
+					 "switch case %#x not processed\n",
+					 e_aci);
 				break;
 			}
 		}
@@ -685,7 +688,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG,
-			 "switch case not process %x\n", variable);
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -2463,7 +2466,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", enc_algo);
 			enc_algo = CAM_TKIP;
 			break;
 		}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c
index 8388e37..47da05d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c
@@ -61,7 +61,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -91,7 +91,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
index beafc9a..5ad7e75 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
@@ -1927,7 +1927,8 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n",
+				 currentcmd->cmdid);
 			break;
 		}
 
@@ -3001,7 +3002,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", iotype);
 			break;
 		}
 	} while (false);
@@ -3041,7 +3042,8 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n",
+			 rtlphy->current_io_type);
 		break;
 	}
 	rtlphy->set_io_inprogress = false;
@@ -3187,7 +3189,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
index c31c6bf..ac299cb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
@@ -262,7 +262,7 @@
 	.debug = DBG_EMERG,
 };
 
-static struct rtl_hal_cfg rtl92ee_hal_cfg = {
+static const struct rtl_hal_cfg rtl92ee_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl92ee_pci",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
index ddfa0ae..52e4430 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
@@ -77,9 +77,11 @@
 			*((bool *)(val)) = rtlpriv->dm.current_mrc_switch;
 			break;
 		}
+	case HAL_DEF_WOWLAN:
+		break;
 	default: {
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", variable);
 			break;
 		}
 	}
@@ -297,7 +299,8 @@
 					break;
 				default:
 					RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-						 "switch case not processed\n");
+						 "switch case %#x not processed\n",
+						 e_aci);
 					break;
 				}
 			}
@@ -433,7 +436,7 @@
 		break; }
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 
@@ -2465,7 +2468,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not processed\n");
+				 "switch case %#x not processed\n", enc_algo);
 			enc_algo = CAM_TKIP;
 			break;
 		}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c
index 44949b5..9849cb9 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c
@@ -68,7 +68,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -104,7 +104,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c
index 881821f..4bb7558 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c
@@ -442,7 +442,8 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not processed\n");
+				 "switch case %#x not processed\n",
+				 currentcmd->cmdid);
 			break;
 		}
 
@@ -648,7 +649,7 @@
 			break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not processed\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
index 31baca41..5e8e02d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
@@ -306,7 +306,7 @@
 
 /* Because memory R/W bursting will cause system hang/crash
  * for 92se, so we don't read back after every write action */
-static struct rtl_hal_cfg rtl92se_hal_cfg = {
+static const struct rtl_hal_cfg rtl92se_hal_cfg = {
 	.bar_id = 1,
 	.write_readback = false,
 	.name = "rtl92s_pci",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c
index b7c0d38..1186755 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c
@@ -124,7 +124,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", boxnum);
 			break;
 		}
 
@@ -230,7 +230,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", cmd_len);
 			break;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
index b88c7ee..f8be0bd 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
@@ -141,9 +141,11 @@
 
 			break;
 		}
+	case HAL_DEF_WOWLAN:
+		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -366,7 +368,8 @@
 					break;
 				default:
 					RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-						 "switch case not process\n");
+						 "switch case %#x not processed\n",
+						 e_aci);
 					break;
 				}
 			}
@@ -546,7 +549,7 @@
 		}
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -1654,7 +1657,7 @@
 			rtlefuse->autoload_failflag, hwinfo);
 
 	if (rtlhal->oem_id != RT_CID_DEFAULT)
-		return;
+		goto exit;
 
 	switch (rtlefuse->eeprom_oemid) {
 	case EEPROM_CID_DEFAULT:
@@ -2225,7 +2228,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", enc_algo);
 			enc_algo = CAM_TKIP;
 			break;
 		}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c
index 1317335..c7be934 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c
@@ -63,7 +63,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -105,7 +105,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c
index 601b78e..17b58cb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c
@@ -1023,7 +1023,8 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n",
+				 currentcmd->cmdid);
 			break;
 		}
 
@@ -1499,7 +1500,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", iotype);
 			break;
 		}
 	} while (false);
@@ -1536,7 +1537,8 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n",
+			 rtlphy->current_io_type);
 		break;
 	}
 	rtlphy->set_io_inprogress = false;
@@ -1682,7 +1684,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
index ff49a8c..89c828a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
@@ -276,7 +276,7 @@
 	.disable_watchdog = false,
 };
 
-static struct rtl_hal_cfg rtl8723e_hal_cfg = {
+static const struct rtl_hal_cfg rtl8723e_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl8723e_pci",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c
index d5da0f3..8c5c27c 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c
@@ -122,7 +122,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", boxnum);
 			break;
 		}
 
@@ -195,7 +195,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", cmd_len);
 			break;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
index 82e4476..aba60c3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
@@ -348,9 +348,11 @@
 		*((u64 *)(val)) = tsf;
 		}
 		break;
+	case HAL_DEF_WOWLAN:
+		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process %x\n", variable);
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -607,7 +609,8 @@
 				break;
 			default:
 				RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-					 "switch case not process\n");
+					 "switch case %#x not processed\n",
+					 e_aci);
 				break;
 			}
 		}
@@ -723,8 +726,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process %x\n",
-			 variable);
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -2565,7 +2567,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", enc_algo);
 			enc_algo = CAM_TKIP;
 			break;
 		}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c
index 4196efb..497913e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c
@@ -58,7 +58,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -100,7 +100,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
index 285818d..3cc2232 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c
@@ -837,7 +837,7 @@
 		break;
 	case RF90_PATH_D:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", rfpath);
 		break;
 	}
 	return true;
@@ -1507,7 +1507,8 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n",
+				 currentcmd->cmdid);
 			break;
 		}
 
@@ -2515,7 +2516,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", iotype);
 			break;
 		}
 	} while (false);
@@ -2553,7 +2554,8 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n",
+			 rtlphy->current_io_type);
 		break;
 	}
 	rtlphy->set_io_inprogress = false;
@@ -2705,7 +2707,7 @@
 
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
index 2101793..20b53f0 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
@@ -276,7 +276,7 @@
 	.ant_sel = 0,
 };
 
-static struct rtl_hal_cfg rtl8723be_hal_cfg = {
+static const struct rtl_hal_cfg rtl8723be_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl8723be_pci",
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c
index a4fc70e..b665446 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c
@@ -392,7 +392,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", boxnum);
 			break;
 		}
 
@@ -481,7 +481,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", cmd_len);
 			break;
 		}
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index 0cddf1a..1281ebe 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -480,7 +480,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process %x\n", variable);
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -671,7 +671,8 @@
 				break;
 			default:
 				RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-					 "switch case not process\n");
+					 "switch case %#x not processed\n",
+					 e_aci);
 				break;
 			}
 		}
@@ -800,7 +801,7 @@
 		break; }
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process %x\n", variable);
+			 "switch case %#x not processed\n", variable);
 		break;
 	}
 }
@@ -3934,7 +3935,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", enc_algo);
 			enc_algo = CAM_TKIP;
 			break;
 		}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c
index ba1946a..fcb3b28 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c
@@ -60,7 +60,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = true;
@@ -133,7 +133,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", pled->ledpin);
 		break;
 	}
 	pled->ledon = false;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
index a71bfe3..5dad40217 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c
@@ -2063,12 +2063,9 @@
 		}
 		break;
 	case RF90_PATH_C:
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
-		break;
 	case RF90_PATH_D:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", rfpath);
 		break;
 	}
 	return true;
@@ -2133,16 +2130,10 @@
 		break;
 
 	case RF90_PATH_B:
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
-		break;
 	case RF90_PATH_C:
-		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
-		break;
 	case RF90_PATH_D:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", rfpath);
 		break;
 	}
 	return true;
@@ -4670,7 +4661,7 @@
 			break;
 		default:
 			RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-				 "switch case not process\n");
+				 "switch case %#x not processed\n", iotype);
 			break;
 		}
 	} while (false);
@@ -4714,7 +4705,8 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n",
+			 rtlphy->current_io_type);
 		break;
 	}
 	rtlphy->set_io_inprogress = false;
@@ -4820,7 +4812,7 @@
 		break;
 	default:
 		RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
-			 "switch case not process\n");
+			 "switch case %#x not processed\n", rfpwr_state);
 		bresult = false;
 		break;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index 4159f9b..22f687b1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -316,7 +316,7 @@
 	.disable_watchdog = 0,
 };
 
-static struct rtl_hal_cfg rtl8821ae_hal_cfg = {
+static const struct rtl_hal_cfg rtl8821ae_hal_cfg = {
 	.bar_id = 2,
 	.write_readback = true,
 	.name = "rtl8821ae_pci",
diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 41617b7..32aa5c1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
@@ -739,11 +739,8 @@
 	for (i = 0; i < rtlusb->rx_urb_num; i++) {
 		err = -ENOMEM;
 		urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!urb) {
-			RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG,
-				 "Failed to alloc URB!!\n");
+		if (!urb)
 			goto err_out;
-		}
 
 		err = _rtl_prep_rx_urb(hw, rtlusb, urb, GFP_KERNEL);
 		if (err < 0) {
@@ -907,15 +904,12 @@
 static struct urb *_rtl_usb_tx_urb_setup(struct ieee80211_hw *hw,
 				struct sk_buff *skb, u32 ep_num)
 {
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
 	struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw));
 	struct urb *_urb;
 
 	WARN_ON(NULL == skb);
 	_urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!_urb) {
-		RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG,
-			 "Can't allocate URB for bulk out!\n");
 		kfree_skb(skb);
 		return NULL;
 	}
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index c5086c2..595f7d5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -394,110 +394,110 @@
 };
 
 enum hw_variables {
-	HW_VAR_ETHER_ADDR,
-	HW_VAR_MULTICAST_REG,
-	HW_VAR_BASIC_RATE,
-	HW_VAR_BSSID,
-	HW_VAR_MEDIA_STATUS,
-	HW_VAR_SECURITY_CONF,
-	HW_VAR_BEACON_INTERVAL,
-	HW_VAR_ATIM_WINDOW,
-	HW_VAR_LISTEN_INTERVAL,
-	HW_VAR_CS_COUNTER,
-	HW_VAR_DEFAULTKEY0,
-	HW_VAR_DEFAULTKEY1,
-	HW_VAR_DEFAULTKEY2,
-	HW_VAR_DEFAULTKEY3,
-	HW_VAR_SIFS,
-	HW_VAR_R2T_SIFS,
-	HW_VAR_DIFS,
-	HW_VAR_EIFS,
-	HW_VAR_SLOT_TIME,
-	HW_VAR_ACK_PREAMBLE,
-	HW_VAR_CW_CONFIG,
-	HW_VAR_CW_VALUES,
-	HW_VAR_RATE_FALLBACK_CONTROL,
-	HW_VAR_CONTENTION_WINDOW,
-	HW_VAR_RETRY_COUNT,
-	HW_VAR_TR_SWITCH,
-	HW_VAR_COMMAND,
-	HW_VAR_WPA_CONFIG,
-	HW_VAR_AMPDU_MIN_SPACE,
-	HW_VAR_SHORTGI_DENSITY,
-	HW_VAR_AMPDU_FACTOR,
-	HW_VAR_MCS_RATE_AVAILABLE,
-	HW_VAR_AC_PARAM,
-	HW_VAR_ACM_CTRL,
-	HW_VAR_DIS_Req_Qsize,
-	HW_VAR_CCX_CHNL_LOAD,
-	HW_VAR_CCX_NOISE_HISTOGRAM,
-	HW_VAR_CCX_CLM_NHM,
-	HW_VAR_TxOPLimit,
-	HW_VAR_TURBO_MODE,
-	HW_VAR_RF_STATE,
-	HW_VAR_RF_OFF_BY_HW,
-	HW_VAR_BUS_SPEED,
-	HW_VAR_SET_DEV_POWER,
+	HW_VAR_ETHER_ADDR = 0x0,
+	HW_VAR_MULTICAST_REG = 0x1,
+	HW_VAR_BASIC_RATE = 0x2,
+	HW_VAR_BSSID = 0x3,
+	HW_VAR_MEDIA_STATUS= 0x4,
+	HW_VAR_SECURITY_CONF= 0x5,
+	HW_VAR_BEACON_INTERVAL = 0x6,
+	HW_VAR_ATIM_WINDOW = 0x7,
+	HW_VAR_LISTEN_INTERVAL = 0x8,
+	HW_VAR_CS_COUNTER = 0x9,
+	HW_VAR_DEFAULTKEY0 = 0xa,
+	HW_VAR_DEFAULTKEY1 = 0xb,
+	HW_VAR_DEFAULTKEY2 = 0xc,
+	HW_VAR_DEFAULTKEY3 = 0xd,
+	HW_VAR_SIFS = 0xe,
+	HW_VAR_R2T_SIFS = 0xf,
+	HW_VAR_DIFS = 0x10,
+	HW_VAR_EIFS = 0x11,
+	HW_VAR_SLOT_TIME = 0x12,
+	HW_VAR_ACK_PREAMBLE = 0x13,
+	HW_VAR_CW_CONFIG = 0x14,
+	HW_VAR_CW_VALUES = 0x15,
+	HW_VAR_RATE_FALLBACK_CONTROL= 0x16,
+	HW_VAR_CONTENTION_WINDOW = 0x17,
+	HW_VAR_RETRY_COUNT = 0x18,
+	HW_VAR_TR_SWITCH = 0x19,
+	HW_VAR_COMMAND = 0x1a,
+	HW_VAR_WPA_CONFIG = 0x1b,
+	HW_VAR_AMPDU_MIN_SPACE = 0x1c,
+	HW_VAR_SHORTGI_DENSITY = 0x1d,
+	HW_VAR_AMPDU_FACTOR = 0x1e,
+	HW_VAR_MCS_RATE_AVAILABLE = 0x1f,
+	HW_VAR_AC_PARAM = 0x20,
+	HW_VAR_ACM_CTRL = 0x21,
+	HW_VAR_DIS_Req_Qsize = 0x22,
+	HW_VAR_CCX_CHNL_LOAD = 0x23,
+	HW_VAR_CCX_NOISE_HISTOGRAM = 0x24,
+	HW_VAR_CCX_CLM_NHM = 0x25,
+	HW_VAR_TxOPLimit = 0x26,
+	HW_VAR_TURBO_MODE = 0x27,
+	HW_VAR_RF_STATE = 0x28,
+	HW_VAR_RF_OFF_BY_HW = 0x29,
+	HW_VAR_BUS_SPEED = 0x2a,
+	HW_VAR_SET_DEV_POWER = 0x2b,
 
-	HW_VAR_RCR,
-	HW_VAR_RATR_0,
-	HW_VAR_RRSR,
-	HW_VAR_CPU_RST,
-	HW_VAR_CHECK_BSSID,
-	HW_VAR_LBK_MODE,
-	HW_VAR_AES_11N_FIX,
-	HW_VAR_USB_RX_AGGR,
-	HW_VAR_USER_CONTROL_TURBO_MODE,
-	HW_VAR_RETRY_LIMIT,
-	HW_VAR_INIT_TX_RATE,
-	HW_VAR_TX_RATE_REG,
-	HW_VAR_EFUSE_USAGE,
-	HW_VAR_EFUSE_BYTES,
-	HW_VAR_AUTOLOAD_STATUS,
-	HW_VAR_RF_2R_DISABLE,
-	HW_VAR_SET_RPWM,
-	HW_VAR_H2C_FW_PWRMODE,
-	HW_VAR_H2C_FW_JOINBSSRPT,
-	HW_VAR_H2C_FW_MEDIASTATUSRPT,
-	HW_VAR_H2C_FW_P2P_PS_OFFLOAD,
-	HW_VAR_FW_PSMODE_STATUS,
-	HW_VAR_INIT_RTS_RATE,
-	HW_VAR_RESUME_CLK_ON,
-	HW_VAR_FW_LPS_ACTION,
-	HW_VAR_1X1_RECV_COMBINE,
-	HW_VAR_STOP_SEND_BEACON,
-	HW_VAR_TSF_TIMER,
-	HW_VAR_IO_CMD,
+	HW_VAR_RCR = 0x2c,
+	HW_VAR_RATR_0 = 0x2d,
+	HW_VAR_RRSR = 0x2e,
+	HW_VAR_CPU_RST = 0x2f,
+	HW_VAR_CHECK_BSSID = 0x30,
+	HW_VAR_LBK_MODE = 0x31,
+	HW_VAR_AES_11N_FIX = 0x32,
+	HW_VAR_USB_RX_AGGR = 0x33,
+	HW_VAR_USER_CONTROL_TURBO_MODE = 0x34,
+	HW_VAR_RETRY_LIMIT = 0x35,
+	HW_VAR_INIT_TX_RATE = 0x36,
+	HW_VAR_TX_RATE_REG = 0x37,
+	HW_VAR_EFUSE_USAGE = 0x38,
+	HW_VAR_EFUSE_BYTES = 0x39,
+	HW_VAR_AUTOLOAD_STATUS = 0x3a,
+	HW_VAR_RF_2R_DISABLE = 0x3b,
+	HW_VAR_SET_RPWM = 0x3c,
+	HW_VAR_H2C_FW_PWRMODE = 0x3d,
+	HW_VAR_H2C_FW_JOINBSSRPT = 0x3e,
+	HW_VAR_H2C_FW_MEDIASTATUSRPT = 0x3f,
+	HW_VAR_H2C_FW_P2P_PS_OFFLOAD = 0x40,
+	HW_VAR_FW_PSMODE_STATUS = 0x41,
+	HW_VAR_INIT_RTS_RATE = 0x42,
+	HW_VAR_RESUME_CLK_ON = 0x43,
+	HW_VAR_FW_LPS_ACTION = 0x44,
+	HW_VAR_1X1_RECV_COMBINE = 0x45,
+	HW_VAR_STOP_SEND_BEACON = 0x46,
+	HW_VAR_TSF_TIMER = 0x47,
+	HW_VAR_IO_CMD = 0x48,
 
-	HW_VAR_RF_RECOVERY,
-	HW_VAR_H2C_FW_UPDATE_GTK,
-	HW_VAR_WF_MASK,
-	HW_VAR_WF_CRC,
-	HW_VAR_WF_IS_MAC_ADDR,
-	HW_VAR_H2C_FW_OFFLOAD,
-	HW_VAR_RESET_WFCRC,
+	HW_VAR_RF_RECOVERY = 0x49,
+	HW_VAR_H2C_FW_UPDATE_GTK = 0x4a,
+	HW_VAR_WF_MASK = 0x4b,
+	HW_VAR_WF_CRC = 0x4c,
+	HW_VAR_WF_IS_MAC_ADDR = 0x4d,
+	HW_VAR_H2C_FW_OFFLOAD = 0x4e,
+	HW_VAR_RESET_WFCRC = 0x4f,
 
-	HW_VAR_HANDLE_FW_C2H,
-	HW_VAR_DL_FW_RSVD_PAGE,
-	HW_VAR_AID,
-	HW_VAR_HW_SEQ_ENABLE,
-	HW_VAR_CORRECT_TSF,
-	HW_VAR_BCN_VALID,
-	HW_VAR_FWLPS_RF_ON,
-	HW_VAR_DUAL_TSF_RST,
-	HW_VAR_SWITCH_EPHY_WoWLAN,
-	HW_VAR_INT_MIGRATION,
-	HW_VAR_INT_AC,
-	HW_VAR_RF_TIMING,
+	HW_VAR_HANDLE_FW_C2H = 0x50,
+	HW_VAR_DL_FW_RSVD_PAGE = 0x51,
+	HW_VAR_AID = 0x52,
+	HW_VAR_HW_SEQ_ENABLE = 0x53,
+	HW_VAR_CORRECT_TSF = 0x54,
+	HW_VAR_BCN_VALID = 0x55,
+	HW_VAR_FWLPS_RF_ON = 0x56,
+	HW_VAR_DUAL_TSF_RST = 0x57,
+	HW_VAR_SWITCH_EPHY_WoWLAN = 0x58,
+	HW_VAR_INT_MIGRATION = 0x59,
+	HW_VAR_INT_AC = 0x5a,
+	HW_VAR_RF_TIMING = 0x5b,
 
-	HAL_DEF_WOWLAN,
-	HW_VAR_MRC,
-	HW_VAR_KEEP_ALIVE,
-	HW_VAR_NAV_UPPER,
+	HAL_DEF_WOWLAN = 0x5c,
+	HW_VAR_MRC = 0x5d,
+	HW_VAR_KEEP_ALIVE = 0x5e,
+	HW_VAR_NAV_UPPER = 0x5f,
 
-	HW_VAR_MGT_FILTER,
-	HW_VAR_CTRL_FILTER,
-	HW_VAR_DATA_FILTER,
+	HW_VAR_MGT_FILTER = 0x60,
+	HW_VAR_CTRL_FILTER = 0x61,
+	HW_VAR_DATA_FILTER = 0x62,
 };
 
 enum rt_media_status {
diff --git a/drivers/net/wireless/ti/wl18xx/acx.c b/drivers/net/wireless/ti/wl18xx/acx.c
index 4be0409..b5525a3 100644
--- a/drivers/net/wireless/ti/wl18xx/acx.c
+++ b/drivers/net/wireless/ti/wl18xx/acx.c
@@ -309,3 +309,32 @@
 	kfree(acx);
 	return ret;
 }
+
+int wl18xx_acx_time_sync_cfg(struct wl1271 *wl)
+{
+	struct acx_time_sync_cfg *acx;
+	int ret;
+
+	wl1271_debug(DEBUG_ACX, "acx time sync cfg: mode %d, addr: %pM",
+		     wl->conf.sg.params[WL18XX_CONF_SG_TIME_SYNC],
+		     wl->zone_master_mac_addr);
+
+	acx = kzalloc(sizeof(*acx), GFP_KERNEL);
+	if (!acx) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	acx->sync_mode = wl->conf.sg.params[WL18XX_CONF_SG_TIME_SYNC];
+	memcpy(acx->zone_mac_addr, wl->zone_master_mac_addr, ETH_ALEN);
+
+	ret = wl1271_cmd_configure(wl, ACX_TIME_SYNC_CFG,
+				   acx, sizeof(*acx));
+	if (ret < 0) {
+		wl1271_warning("acx time sync cfg failed: %d", ret);
+		goto out;
+	}
+out:
+	kfree(acx);
+	return ret;
+}
diff --git a/drivers/net/wireless/ti/wl18xx/acx.h b/drivers/net/wireless/ti/wl18xx/acx.h
index 342a299..2edbbbf 100644
--- a/drivers/net/wireless/ti/wl18xx/acx.h
+++ b/drivers/net/wireless/ti/wl18xx/acx.h
@@ -37,6 +37,7 @@
 	ACX_RX_BA_FILTER		 = 0x0058,
 	ACX_AP_SLEEP_CFG                 = 0x0059,
 	ACX_DYNAMIC_TRACES_CFG		 = 0x005A,
+	ACX_TIME_SYNC_CFG		 = 0x005B,
 };
 
 /* numbers of bits the length field takes (add 1 for the actual number) */
@@ -388,6 +389,17 @@
 	__le32 dynamic_fw_traces;
 } __packed;
 
+/*
+ * ACX_TIME_SYNC_CFG
+ * configure the time sync parameters
+ */
+struct acx_time_sync_cfg {
+	struct acx_header header;
+	u8 sync_mode;
+	u8 zone_mac_addr[ETH_ALEN];
+	u8 padding[1];
+} __packed;
+
 int wl18xx_acx_host_if_cfg_bitmap(struct wl1271 *wl, u32 host_cfg_bitmap,
 				  u32 sdio_blk_size, u32 extra_mem_blks,
 				  u32 len_field_size);
@@ -402,5 +414,6 @@
 int wl18xx_acx_rx_ba_filter(struct wl1271 *wl, bool action);
 int wl18xx_acx_ap_sleep(struct wl1271 *wl);
 int wl18xx_acx_dynamic_fw_traces(struct wl1271 *wl);
+int wl18xx_acx_time_sync_cfg(struct wl1271 *wl);
 
 #endif /* __WL18XX_ACX_H__ */
diff --git a/drivers/net/wireless/ti/wl18xx/event.c b/drivers/net/wireless/ti/wl18xx/event.c
index 2c5df43..b36ce18 100644
--- a/drivers/net/wireless/ti/wl18xx/event.c
+++ b/drivers/net/wireless/ti/wl18xx/event.c
@@ -22,6 +22,7 @@
 #include <net/genetlink.h>
 #include "event.h"
 #include "scan.h"
+#include "conf.h"
 #include "../wlcore/cmd.h"
 #include "../wlcore/debug.h"
 #include "../wlcore/vendor_cmd.h"
diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c
index 00a04df..06d6943 100644
--- a/drivers/net/wireless/ti/wl18xx/main.c
+++ b/drivers/net/wireless/ti/wl18xx/main.c
@@ -1397,25 +1397,24 @@
 	return ret;
 }
 
-#define WL18XX_CONF_FILE_NAME "ti-connectivity/wl18xx-conf.bin"
-
 static int wl18xx_load_conf_file(struct device *dev, struct wlcore_conf *conf,
-				 struct wl18xx_priv_conf *priv_conf)
+				 struct wl18xx_priv_conf *priv_conf,
+				 const char *file)
 {
 	struct wlcore_conf_file *conf_file;
 	const struct firmware *fw;
 	int ret;
 
-	ret = request_firmware(&fw, WL18XX_CONF_FILE_NAME, dev);
+	ret = request_firmware(&fw, file, dev);
 	if (ret < 0) {
 		wl1271_error("could not get configuration binary %s: %d",
-			     WL18XX_CONF_FILE_NAME, ret);
+			     file, ret);
 		return ret;
 	}
 
 	if (fw->size != WL18XX_CONF_SIZE) {
-		wl1271_error("configuration binary file size is wrong, expected %zu got %zu",
-			     WL18XX_CONF_SIZE, fw->size);
+		wl1271_error("%s configuration binary size is wrong, expected %zu got %zu",
+			     file, WL18XX_CONF_SIZE, fw->size);
 		ret = -EINVAL;
 		goto out_release;
 	}
@@ -1448,9 +1447,12 @@
 
 static int wl18xx_conf_init(struct wl1271 *wl, struct device *dev)
 {
+	struct platform_device *pdev = wl->pdev;
+	struct wlcore_platdev_data *pdata = dev_get_platdata(&pdev->dev);
 	struct wl18xx_priv *priv = wl->priv;
 
-	if (wl18xx_load_conf_file(dev, &wl->conf, &priv->conf) < 0) {
+	if (wl18xx_load_conf_file(dev, &wl->conf, &priv->conf,
+				  pdata->family->cfg_name) < 0) {
 		wl1271_warning("falling back to default config");
 
 		/* apply driver default configuration */
@@ -2141,4 +2143,3 @@
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
 MODULE_FIRMWARE(WL18XX_FW_NAME);
-MODULE_FIRMWARE(WL18XX_CONF_FILE_NAME);
diff --git a/drivers/net/wireless/ti/wlcore/boot.c b/drivers/net/wireless/ti/wlcore/boot.c
index f75d304..f00509e 100644
--- a/drivers/net/wireless/ti/wlcore/boot.c
+++ b/drivers/net/wireless/ti/wlcore/boot.c
@@ -282,6 +282,9 @@
 
 int wlcore_boot_upload_nvs(struct wl1271 *wl)
 {
+	struct platform_device *pdev = wl->pdev;
+	struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev);
+	const char *nvs_name = "unknown";
 	size_t nvs_len, burst_len;
 	int i;
 	u32 dest_addr, val;
@@ -293,6 +296,9 @@
 		return -ENODEV;
 	}
 
+	if (pdev_data && pdev_data->family)
+		nvs_name = pdev_data->family->nvs_name;
+
 	if (wl->quirks & WLCORE_QUIRK_LEGACY_NVS) {
 		struct wl1271_nvs_file *nvs =
 			(struct wl1271_nvs_file *)wl->nvs;
@@ -310,8 +316,9 @@
 		if (wl->nvs_len != sizeof(struct wl1271_nvs_file) &&
 		    (wl->nvs_len != WL1271_INI_LEGACY_NVS_FILE_SIZE ||
 		     wl->enable_11a)) {
-			wl1271_error("nvs size is not as expected: %zu != %zu",
-				wl->nvs_len, sizeof(struct wl1271_nvs_file));
+			wl1271_error("%s size is not as expected: %zu != %zu",
+				     nvs_name, wl->nvs_len,
+				     sizeof(struct wl1271_nvs_file));
 			kfree(wl->nvs);
 			wl->nvs = NULL;
 			wl->nvs_len = 0;
@@ -328,8 +335,8 @@
 			if (nvs->general_params.dual_mode_select)
 				wl->enable_11a = true;
 		} else {
-			wl1271_error("nvs size is not as expected: %zu != %zu",
-				     wl->nvs_len,
+			wl1271_error("%s size is not as expected: %zu != %zu",
+				     nvs_name, wl->nvs_len,
 				     sizeof(struct wl128x_nvs_file));
 			kfree(wl->nvs);
 			wl->nvs = NULL;
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 1d68916..471521a 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -4986,7 +4986,6 @@
 		return ret;
 
 	wl_sta = (struct wl1271_station *)sta->drv_priv;
-	wl_sta->wl = wl;
 	hlid = wl_sta->hlid;
 
 	ret = wl12xx_cmd_add_peer(wl, wlvif, sta, hlid);
@@ -5700,10 +5699,11 @@
 	mutex_unlock(&wl->mutex);
 }
 
-static u32 wlcore_op_get_expected_throughput(struct ieee80211_sta *sta)
+static u32 wlcore_op_get_expected_throughput(struct ieee80211_hw *hw,
+					     struct ieee80211_sta *sta)
 {
 	struct wl1271_station *wl_sta = (struct wl1271_station *)sta->drv_priv;
-	struct wl1271 *wl = wl_sta->wl;
+	struct wl1271 *wl = hw->priv;
 	u8 hlid = wl_sta->hlid;
 
 	/* return in units of Kbps */
@@ -6413,9 +6413,12 @@
 			goto out;
 		}
 		wl->nvs_len = fw->size;
-	} else {
+	} else if (pdev_data->family->nvs_name) {
 		wl1271_debug(DEBUG_BOOT, "Could not get nvs file %s",
-			     WL12XX_NVS_NAME);
+			     pdev_data->family->nvs_name);
+		wl->nvs = NULL;
+		wl->nvs_len = 0;
+	} else {
 		wl->nvs = NULL;
 		wl->nvs_len = 0;
 	}
@@ -6510,21 +6513,29 @@
 
 int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev)
 {
-	int ret;
+	struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev);
+	const char *nvs_name;
+	int ret = 0;
 
-	if (!wl->ops || !wl->ptable)
+	if (!wl->ops || !wl->ptable || !pdev_data)
 		return -EINVAL;
 
 	wl->dev = &pdev->dev;
 	wl->pdev = pdev;
 	platform_set_drvdata(pdev, wl);
 
-	ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
-				      WL12XX_NVS_NAME, &pdev->dev, GFP_KERNEL,
-				      wl, wlcore_nvs_cb);
-	if (ret < 0) {
-		wl1271_error("request_firmware_nowait failed: %d", ret);
-		complete_all(&wl->nvs_loading_complete);
+	if (pdev_data->family && pdev_data->family->nvs_name) {
+		nvs_name = pdev_data->family->nvs_name;
+		ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG,
+					      nvs_name, &pdev->dev, GFP_KERNEL,
+					      wl, wlcore_nvs_cb);
+		if (ret < 0) {
+			wl1271_error("request_firmware_nowait failed for %s: %d",
+				     nvs_name, ret);
+			complete_all(&wl->nvs_loading_complete);
+		}
+	} else {
+		wlcore_nvs_cb(NULL, wl);
 	}
 
 	return ret;
@@ -6533,9 +6544,11 @@
 
 int wlcore_remove(struct platform_device *pdev)
 {
+	struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev);
 	struct wl1271 *wl = platform_get_drvdata(pdev);
 
-	wait_for_completion(&wl->nvs_loading_complete);
+	if (pdev_data->family && pdev_data->family->nvs_name)
+		wait_for_completion(&wl->nvs_loading_complete);
 	if (!wl->initialized)
 		return 0;
 
@@ -6572,4 +6585,3 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Luciano Coelho <coelho@ti.com>");
 MODULE_AUTHOR("Juuso Oikarinen <juuso.oikarinen@nokia.com>");
-MODULE_FIRMWARE(WL12XX_NVS_NAME);
diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c
index 5839acb..a6e94b1 100644
--- a/drivers/net/wireless/ti/wlcore/sdio.c
+++ b/drivers/net/wireless/ti/wlcore/sdio.c
@@ -216,17 +216,33 @@
 };
 
 #ifdef CONFIG_OF
+
+static const struct wilink_family_data wl127x_data = {
+	.name = "wl127x",
+	.nvs_name = "ti-connectivity/wl127x-nvs.bin",
+};
+
+static const struct wilink_family_data wl128x_data = {
+	.name = "wl128x",
+	.nvs_name = "ti-connectivity/wl128x-nvs.bin",
+};
+
+static const struct wilink_family_data wl18xx_data = {
+	.name = "wl18xx",
+	.cfg_name = "ti-connectivity/wl18xx-conf.bin",
+};
+
 static const struct of_device_id wlcore_sdio_of_match_table[] = {
-	{ .compatible = "ti,wl1271" },
-	{ .compatible = "ti,wl1273" },
-	{ .compatible = "ti,wl1281" },
-	{ .compatible = "ti,wl1283" },
-	{ .compatible = "ti,wl1801" },
-	{ .compatible = "ti,wl1805" },
-	{ .compatible = "ti,wl1807" },
-	{ .compatible = "ti,wl1831" },
-	{ .compatible = "ti,wl1835" },
-	{ .compatible = "ti,wl1837" },
+	{ .compatible = "ti,wl1271", .data = &wl127x_data },
+	{ .compatible = "ti,wl1273", .data = &wl127x_data },
+	{ .compatible = "ti,wl1281", .data = &wl128x_data },
+	{ .compatible = "ti,wl1283", .data = &wl128x_data },
+	{ .compatible = "ti,wl1801", .data = &wl18xx_data },
+	{ .compatible = "ti,wl1805", .data = &wl18xx_data },
+	{ .compatible = "ti,wl1807", .data = &wl18xx_data },
+	{ .compatible = "ti,wl1831", .data = &wl18xx_data },
+	{ .compatible = "ti,wl1835", .data = &wl18xx_data },
+	{ .compatible = "ti,wl1837", .data = &wl18xx_data },
 	{ }
 };
 
@@ -234,9 +250,13 @@
 			   struct wlcore_platdev_data *pdev_data)
 {
 	struct device_node *np = dev->of_node;
+	const struct of_device_id *of_id;
 
-	if (!np || !of_match_node(wlcore_sdio_of_match_table, np))
-		return -ENODATA;
+	of_id = of_match_node(wlcore_sdio_of_match_table, np);
+	if (!of_id)
+		return -ENODEV;
+
+	pdev_data->family = of_id->data;
 
 	*irq = irq_of_parse_and_map(np, 0);
 	if (!*irq) {
@@ -263,7 +283,7 @@
 static int wl1271_probe(struct sdio_func *func,
 				  const struct sdio_device_id *id)
 {
-	struct wlcore_platdev_data pdev_data;
+	struct wlcore_platdev_data *pdev_data;
 	struct wl12xx_sdio_glue *glue;
 	struct resource res[1];
 	mmc_pm_flag_t mmcflags;
@@ -275,14 +295,15 @@
 	if (func->num != 0x02)
 		return -ENODEV;
 
-	memset(&pdev_data, 0x00, sizeof(pdev_data));
-	pdev_data.if_ops = &sdio_ops;
+	pdev_data = devm_kzalloc(&func->dev, sizeof(*pdev_data), GFP_KERNEL);
+	if (!pdev_data)
+		return -ENOMEM;
 
-	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
-	if (!glue) {
-		dev_err(&func->dev, "can't allocate glue\n");
-		goto out;
-	}
+	pdev_data->if_ops = &sdio_ops;
+
+	glue = devm_kzalloc(&func->dev, sizeof(*glue), GFP_KERNEL);
+	if (!glue)
+		return -ENOMEM;
 
 	glue->dev = &func->dev;
 
@@ -292,16 +313,16 @@
 	/* Use block mode for transferring over one block size of data */
 	func->card->quirks |= MMC_QUIRK_BLKSZ_FOR_BYTE_MODE;
 
-	ret = wlcore_probe_of(&func->dev, &irq, &pdev_data);
+	ret = wlcore_probe_of(&func->dev, &irq, pdev_data);
 	if (ret)
-		goto out_free_glue;
+		goto out;
 
 	/* if sdio can keep power while host is suspended, enable wow */
 	mmcflags = sdio_get_host_pm_caps(func);
 	dev_dbg(glue->dev, "sdio PM caps = 0x%x\n", mmcflags);
 
 	if (mmcflags & MMC_PM_KEEP_POWER)
-		pdev_data.pwr_in_suspend = true;
+		pdev_data->pwr_in_suspend = true;
 
 	sdio_set_drvdata(func, glue);
 
@@ -323,7 +344,7 @@
 	if (!glue->core) {
 		dev_err(glue->dev, "can't allocate platform_device");
 		ret = -ENOMEM;
-		goto out_free_glue;
+		goto out;
 	}
 
 	glue->core->dev.parent = &func->dev;
@@ -341,8 +362,8 @@
 		goto out_dev_put;
 	}
 
-	ret = platform_device_add_data(glue->core, &pdev_data,
-				       sizeof(pdev_data));
+	ret = platform_device_add_data(glue->core, pdev_data,
+				       sizeof(*pdev_data));
 	if (ret) {
 		dev_err(glue->dev, "can't add platform data\n");
 		goto out_dev_put;
@@ -358,9 +379,6 @@
 out_dev_put:
 	platform_device_put(glue->core);
 
-out_free_glue:
-	kfree(glue);
-
 out:
 	return ret;
 }
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 6d24040..f949ad2b 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -79,19 +79,19 @@
 #define WSPI_MAX_NUM_OF_CHUNKS \
 	((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1)
 
-
-struct wilink_familiy_data {
-	char name[8];
+static const struct wilink_family_data wl127x_data = {
+	.name = "wl127x",
+	.nvs_name = "ti-connectivity/wl127x-nvs.bin",
 };
 
-const struct wilink_familiy_data *wilink_data;
+static const struct wilink_family_data wl128x_data = {
+	.name = "wl128x",
+	.nvs_name = "ti-connectivity/wl128x-nvs.bin",
+};
 
-static const struct wilink_familiy_data wl18xx_data = {
+static const struct wilink_family_data wl18xx_data = {
 	.name = "wl18xx",
-};
-
-static const struct wilink_familiy_data wl12xx_data = {
-	.name = "wl12xx",
+	.cfg_name = "ti-connectivity/wl18xx-conf.bin",
 };
 
 struct wl12xx_spi_glue {
@@ -429,10 +429,10 @@
 };
 
 static const struct of_device_id wlcore_spi_of_match_table[] = {
-	{ .compatible = "ti,wl1271", .data = &wl12xx_data},
-	{ .compatible = "ti,wl1273", .data = &wl12xx_data},
-	{ .compatible = "ti,wl1281", .data = &wl12xx_data},
-	{ .compatible = "ti,wl1283", .data = &wl12xx_data},
+	{ .compatible = "ti,wl1271", .data = &wl127x_data},
+	{ .compatible = "ti,wl1273", .data = &wl127x_data},
+	{ .compatible = "ti,wl1281", .data = &wl128x_data},
+	{ .compatible = "ti,wl1283", .data = &wl128x_data},
 	{ .compatible = "ti,wl1801", .data = &wl18xx_data},
 	{ .compatible = "ti,wl1805", .data = &wl18xx_data},
 	{ .compatible = "ti,wl1807", .data = &wl18xx_data},
@@ -460,9 +460,9 @@
 	if (!of_id)
 		return -ENODEV;
 
-	wilink_data = of_id->data;
-	dev_info(&spi->dev, "selected chip familiy is %s\n",
-		 wilink_data->name);
+	pdev_data->family = of_id->data;
+	dev_info(&spi->dev, "selected chip family is %s\n",
+		 pdev_data->family->name);
 
 	if (of_find_property(dt_node, "clock-xtal", NULL))
 		pdev_data->ref_clock_xtal = true;
@@ -479,13 +479,15 @@
 static int wl1271_probe(struct spi_device *spi)
 {
 	struct wl12xx_spi_glue *glue;
-	struct wlcore_platdev_data pdev_data;
+	struct wlcore_platdev_data *pdev_data;
 	struct resource res[1];
 	int ret;
 
-	memset(&pdev_data, 0x00, sizeof(pdev_data));
+	pdev_data = devm_kzalloc(&spi->dev, sizeof(*pdev_data), GFP_KERNEL);
+	if (!pdev_data)
+		return -ENOMEM;
 
-	pdev_data.if_ops = &spi_ops;
+	pdev_data->if_ops = &spi_ops;
 
 	glue = devm_kzalloc(&spi->dev, sizeof(*glue), GFP_KERNEL);
 	if (!glue) {
@@ -509,7 +511,7 @@
 		return PTR_ERR(glue->reg);
 	}
 
-	ret = wlcore_probe_of(spi, glue, &pdev_data);
+	ret = wlcore_probe_of(spi, glue, pdev_data);
 	if (ret) {
 		dev_err(glue->dev,
 			"can't get device tree parameters (%d)\n", ret);
@@ -522,7 +524,7 @@
 		return ret;
 	}
 
-	glue->core = platform_device_alloc(wilink_data->name,
+	glue->core = platform_device_alloc(pdev_data->family->name,
 					   PLATFORM_DEVID_AUTO);
 	if (!glue->core) {
 		dev_err(glue->dev, "can't allocate platform_device\n");
@@ -543,8 +545,8 @@
 		goto out_dev_put;
 	}
 
-	ret = platform_device_add_data(glue->core, &pdev_data,
-				       sizeof(pdev_data));
+	ret = platform_device_add_data(glue->core, pdev_data,
+				       sizeof(*pdev_data));
 	if (ret) {
 		dev_err(glue->dev, "can't add platform data\n");
 		goto out_dev_put;
diff --git a/drivers/net/wireless/ti/wlcore/wlcore.h b/drivers/net/wireless/ti/wlcore/wlcore.h
index 8f28aa0..1827546 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore.h
@@ -501,6 +501,9 @@
 
 	/* dynamic fw traces */
 	u32 dynamic_fw_traces;
+
+	/* time sync zone master */
+	u8 zone_master_mac_addr[ETH_ALEN];
 };
 
 int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev);
diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h
index 242b4e3..e840985 100644
--- a/drivers/net/wireless/ti/wlcore/wlcore_i.h
+++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h
@@ -35,12 +35,11 @@
 #include "conf.h"
 #include "ini.h"
 
-/*
- * wl127x and wl128x are using the same NVS file name. However, the
- * ini parameters between them are different.  The driver validates
- * the correct NVS size in wl1271_boot_upload_nvs().
- */
-#define WL12XX_NVS_NAME "ti-connectivity/wl1271-nvs.bin"
+struct wilink_family_data {
+	const char *name;
+	const char *nvs_name;	/* wl12xx nvs file */
+	const char *cfg_name;	/* wl18xx cfg file */
+};
 
 #define WL1271_TX_SECURITY_LO16(s) ((u16)((s) & 0xffff))
 #define WL1271_TX_SECURITY_HI32(s) ((u32)(((s) >> 16) & 0xffffffff))
@@ -208,6 +207,7 @@
 
 struct wlcore_platdev_data {
 	struct wl1271_if_operations *if_ops;
+	const struct wilink_family_data *family;
 
 	bool ref_clock_xtal;	/* specify whether the clock is XTAL or not */
 	u32 ref_clock_freq;	/* in Hertz */
@@ -347,7 +347,6 @@
 	 * Used in both AP and STA mode.
 	 */
 	u64 total_freed_pkts;
-	struct wl1271 *wl;
 };
 
 struct wl12xx_vif {
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 82d94f8..932f3f81 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1258,7 +1258,9 @@
 {
 	struct wl3501_card *this = netdev_priv(dev);
 	int rc = -ENODEV;
+	unsigned long flags;
 
+	spin_lock_irqsave(&this->lock, flags);
 	wl3501_block_interrupt(this);
 
 	if (wl3501_init_firmware(this)) {
@@ -1280,20 +1282,17 @@
 	pr_debug("%s: device reset", dev->name);
 	rc = 0;
 out:
+	spin_unlock_irqrestore(&this->lock, flags);
 	return rc;
 }
 
 static void wl3501_tx_timeout(struct net_device *dev)
 {
-	struct wl3501_card *this = netdev_priv(dev);
 	struct net_device_stats *stats = &dev->stats;
-	unsigned long flags;
 	int rc;
 
 	stats->tx_errors++;
-	spin_lock_irqsave(&this->lock, flags);
 	rc = wl3501_reset(dev);
-	spin_unlock_irqrestore(&this->lock, flags);
 	if (rc)
 		printk(KERN_ERR "%s: Error %d resetting card on Tx timeout!\n",
 		       dev->name, rc);
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
index a912dc0..c5effd6c 100644
--- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
+++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c
@@ -193,7 +193,7 @@
 			0, 0, p, sizeof(ret), 5000 /* ms */);
 		if (r != sizeof(ret)) {
 			dev_err(&udev->dev,
-				"control request firmeware confirmation failed."
+				"control request firmware confirmation failed."
 				" Return value %d\n", r);
 			if (r >= 0)
 				r = -ENODEV;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 84d6cbd..b38fb2c 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -292,8 +292,6 @@
 #endif
 
 	struct xen_netif_ctrl_back_ring ctrl;
-	struct task_struct *ctrl_task;
-	wait_queue_head_t ctrl_wq;
 	unsigned int ctrl_irq;
 
 	/* Miscellaneous private stuff. */
@@ -359,7 +357,7 @@
 
 int xenvif_dealloc_kthread(void *data);
 
-int xenvif_ctrl_kthread(void *data);
+irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
 
 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
 
diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c
index fb87cb3..613bac0 100644
--- a/drivers/net/xen-netback/hash.c
+++ b/drivers/net/xen-netback/hash.c
@@ -32,15 +32,6 @@
 #include <linux/vmalloc.h>
 #include <linux/rculist.h>
 
-static void xenvif_del_hash(struct rcu_head *rcu)
-{
-	struct xenvif_hash_cache_entry *entry;
-
-	entry = container_of(rcu, struct xenvif_hash_cache_entry, rcu);
-
-	kfree(entry);
-}
-
 static void xenvif_add_hash(struct xenvif *vif, const u8 *tag,
 			    unsigned int len, u32 val)
 {
@@ -76,7 +67,7 @@
 		if (++vif->hash.cache.count > xenvif_hash_cache_size) {
 			list_del_rcu(&oldest->link);
 			vif->hash.cache.count--;
-			call_rcu(&oldest->rcu, xenvif_del_hash);
+			kfree_rcu(oldest, rcu);
 		}
 	}
 
@@ -114,7 +105,7 @@
 	list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) {
 		list_del_rcu(&entry->link);
 		vif->hash.cache.count--;
-		call_rcu(&entry->rcu, xenvif_del_hash);
+		kfree_rcu(entry, rcu);
 	}
 
 	spin_unlock_irqrestore(&vif->hash.cache.lock, flags);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 83deeeb..fb50c6d 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -128,15 +128,6 @@
 	return IRQ_HANDLED;
 }
 
-irqreturn_t xenvif_ctrl_interrupt(int irq, void *dev_id)
-{
-	struct xenvif *vif = dev_id;
-
-	wake_up(&vif->ctrl_wq);
-
-	return IRQ_HANDLED;
-}
-
 int xenvif_queue_stopped(struct xenvif_queue *queue)
 {
 	struct net_device *dev = queue->vif->dev;
@@ -570,8 +561,7 @@
 	struct net_device *dev = vif->dev;
 	void *addr;
 	struct xen_netif_ctrl_sring *shared;
-	struct task_struct *task;
-	int err = -ENOMEM;
+	int err;
 
 	err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
 				     &ring_ref, 1, &addr);
@@ -581,11 +571,7 @@
 	shared = (struct xen_netif_ctrl_sring *)addr;
 	BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE);
 
-	init_waitqueue_head(&vif->ctrl_wq);
-
-	err = bind_interdomain_evtchn_to_irqhandler(vif->domid, evtchn,
-						    xenvif_ctrl_interrupt,
-						    0, dev->name, vif);
+	err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn);
 	if (err < 0)
 		goto err_unmap;
 
@@ -593,19 +579,13 @@
 
 	xenvif_init_hash(vif);
 
-	task = kthread_create(xenvif_ctrl_kthread, (void *)vif,
-			      "%s-control", dev->name);
-	if (IS_ERR(task)) {
-		pr_warn("Could not allocate kthread for %s\n", dev->name);
-		err = PTR_ERR(task);
+	err = request_threaded_irq(vif->ctrl_irq, NULL, xenvif_ctrl_irq_fn,
+				   IRQF_ONESHOT, "xen-netback-ctrl", vif);
+	if (err) {
+		pr_warn("Could not setup irq handler for %s\n", dev->name);
 		goto err_deinit;
 	}
 
-	get_task_struct(task);
-	vif->ctrl_task = task;
-
-	wake_up_process(vif->ctrl_task);
-
 	return 0;
 
 err_deinit:
@@ -774,12 +754,6 @@
 
 void xenvif_disconnect_ctrl(struct xenvif *vif)
 {
-	if (vif->ctrl_task) {
-		kthread_stop(vif->ctrl_task);
-		put_task_struct(vif->ctrl_task);
-		vif->ctrl_task = NULL;
-	}
-
 	if (vif->ctrl_irq) {
 		xenvif_deinit_hash(vif);
 		unbind_from_irqhandler(vif->ctrl_irq, vif);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index edbae0b..3d0c989 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -2359,24 +2359,14 @@
 	return 0;
 }
 
-int xenvif_ctrl_kthread(void *data)
+irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data)
 {
 	struct xenvif *vif = data;
 
-	for (;;) {
-		wait_event_interruptible(vif->ctrl_wq,
-					 xenvif_ctrl_work_todo(vif) ||
-					 kthread_should_stop());
-		if (kthread_should_stop())
-			break;
+	while (xenvif_ctrl_work_todo(vif))
+		xenvif_ctrl_action(vif);
 
-		while (xenvif_ctrl_work_todo(vif))
-			xenvif_ctrl_action(vif);
-
-		cond_resched();
-	}
-
-	return 0;
+	return IRQ_HANDLED;
 }
 
 static int __init netback_init(void)
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 6a31f26..daf4c78 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -271,6 +271,11 @@
 	be->dev = dev;
 	dev_set_drvdata(&dev->dev, be);
 
+	be->state = XenbusStateInitialising;
+	err = xenbus_switch_state(dev, XenbusStateInitialising);
+	if (err)
+		goto fail;
+
 	sg = 1;
 
 	do {
@@ -383,11 +388,6 @@
 
 	be->hotplug_script = script;
 
-	err = xenbus_switch_state(dev, XenbusStateInitWait);
-	if (err)
-		goto fail;
-
-	be->state = XenbusStateInitWait;
 
 	/* This kicks hotplug scripts, so do it immediately. */
 	err = backend_create_xenvif(be);
@@ -492,20 +492,20 @@
 
 /* Handle backend state transitions:
  *
- * The backend state starts in InitWait and the following transitions are
+ * The backend state starts in Initialising and the following transitions are
  * allowed.
  *
- * InitWait -> Connected
+ * Initialising -> InitWait -> Connected
+ *          \
+ *           \        ^    \         |
+ *            \       |     \        |
+ *             \      |      \       |
+ *              \     |       \      |
+ *               \    |        \     |
+ *                \   |         \    |
+ *                 V  |          V   V
  *
- *    ^    \         |
- *    |     \        |
- *    |      \       |
- *    |       \      |
- *    |        \     |
- *    |         \    |
- *    |          V   V
- *
- *  Closed  <-> Closing
+ *                  Closed  <-> Closing
  *
  * The state argument specifies the eventual state of the backend and the
  * function transitions to that state via the shortest path.
@@ -515,6 +515,20 @@
 {
 	while (be->state != state) {
 		switch (be->state) {
+		case XenbusStateInitialising:
+			switch (state) {
+			case XenbusStateInitWait:
+			case XenbusStateConnected:
+			case XenbusStateClosing:
+				backend_switch_state(be, XenbusStateInitWait);
+				break;
+			case XenbusStateClosed:
+				backend_switch_state(be, XenbusStateClosed);
+				break;
+			default:
+				BUG();
+			}
+			break;
 		case XenbusStateClosed:
 			switch (state) {
 			case XenbusStateInitWait:
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 96ccd4e..e17879d 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -565,6 +565,7 @@
 	struct netfront_queue *queue = NULL;
 	unsigned int num_queues = dev->real_num_tx_queues;
 	u16 queue_index;
+	struct sk_buff *nskb;
 
 	/* Drop the packet if no queues are set up */
 	if (num_queues < 1)
@@ -593,6 +594,20 @@
 
 	page = virt_to_page(skb->data);
 	offset = offset_in_page(skb->data);
+
+	/* The first req should be at least ETH_HLEN size or the packet will be
+	 * dropped by netback.
+	 */
+	if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
+		nskb = skb_copy(skb, GFP_ATOMIC);
+		if (!nskb)
+			goto drop;
+		dev_kfree_skb_any(skb);
+		skb = nskb;
+		page = virt_to_page(skb->data);
+		offset = offset_in_page(skb->data);
+	}
+
 	len = skb_headlen(skb);
 
 	spin_lock_irqsave(&queue->tx_lock, flags);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 88e9166..368795a 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1269,6 +1269,7 @@
 		}
 	}
 	set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
+	btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
 	revalidate_disk(btt->btt_disk);
 
 	return 0;
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 3fa7919..97dd292 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -140,10 +140,30 @@
 }
 static DEVICE_ATTR_RW(namespace);
 
+static ssize_t size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_btt *nd_btt = to_nd_btt(dev);
+	ssize_t rc;
+
+	device_lock(dev);
+	if (dev->driver)
+		rc = sprintf(buf, "%llu\n", nd_btt->size);
+	else {
+		/* no size to convey if the btt instance is disabled */
+		rc = -ENXIO;
+	}
+	device_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(size);
+
 static struct attribute *nd_btt_attributes[] = {
 	&dev_attr_sector_size.attr,
 	&dev_attr_namespace.attr,
 	&dev_attr_uuid.attr,
+	&dev_attr_size.attr,
 	NULL,
 };
 
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 458daf9..935866f 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -185,8 +185,12 @@
 		return -ENXIO;
 
 	nd_desc = nvdimm_bus->nd_desc;
+	/*
+	 * if ndctl does not exist, it's PMEM_LEGACY and
+	 * we want to just pretend everything is handled.
+	 */
 	if (!nd_desc->ndctl)
-		return -ENXIO;
+		return len;
 
 	memset(&ars_cap, 0, sizeof(ars_cap));
 	ars_cap.address = phys;
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 715583f..4d7bbd2 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -99,8 +99,11 @@
 	nvdimm_map->size = size;
 	kref_init(&nvdimm_map->kref);
 
-	if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev)))
+	if (!request_mem_region(offset, size, dev_name(&nvdimm_bus->dev))) {
+		dev_err(&nvdimm_bus->dev, "failed to request %pa + %zd for %s\n",
+				&offset, size, dev_name(dev));
 		goto err_request_region;
+	}
 
 	if (flags)
 		nvdimm_map->mem = memremap(offset, size, flags);
@@ -171,6 +174,9 @@
 		kref_get(&nvdimm_map->kref);
 	nvdimm_bus_unlock(dev);
 
+	if (!nvdimm_map)
+		return NULL;
+
 	if (devm_add_action_or_reset(dev, nvdimm_map_put, nvdimm_map))
 		return NULL;
 
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 4047639..0b78a82 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -52,10 +52,28 @@
 struct nd_region_data {
 	int ns_count;
 	int ns_active;
-	unsigned int flush_mask;
-	void __iomem *flush_wpq[0][0];
+	unsigned int hints_shift;
+	void __iomem *flush_wpq[0];
 };
 
+static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd,
+		int dimm, int hint)
+{
+	unsigned int num = 1 << ndrd->hints_shift;
+	unsigned int mask = num - 1;
+
+	return ndrd->flush_wpq[dimm * num + (hint & mask)];
+}
+
+static inline void ndrd_set_flush_wpq(struct nd_region_data *ndrd, int dimm,
+		int hint, void __iomem *flush)
+{
+	unsigned int num = 1 << ndrd->hints_shift;
+	unsigned int mask = num - 1;
+
+	ndrd->flush_wpq[dimm * num + (hint & mask)] = flush;
+}
+
 static inline struct nd_namespace_index *to_namespace_index(
 		struct nvdimm_drvdata *ndd, int i)
 {
@@ -143,6 +161,7 @@
 	struct nd_namespace_common *ndns;
 	struct btt *btt;
 	unsigned long lbasize;
+	u64 size;
 	u8 *uuid;
 	int id;
 };
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e8d5ba7..4c0ac4a 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -38,7 +38,7 @@
 
 	dev_dbg(dev, "%s: map %d flush address%s\n", nvdimm_name(nvdimm),
 			nvdimm->num_flush, nvdimm->num_flush == 1 ? "" : "es");
-	for (i = 0; i < nvdimm->num_flush; i++) {
+	for (i = 0; i < (1 << ndrd->hints_shift); i++) {
 		struct resource *res = &nvdimm->flush_wpq[i];
 		unsigned long pfn = PHYS_PFN(res->start);
 		void __iomem *flush_page;
@@ -54,14 +54,15 @@
 
 		if (j < i)
 			flush_page = (void __iomem *) ((unsigned long)
-					ndrd->flush_wpq[dimm][j] & PAGE_MASK);
+					ndrd_get_flush_wpq(ndrd, dimm, j)
+					& PAGE_MASK);
 		else
 			flush_page = devm_nvdimm_ioremap(dev,
-					PHYS_PFN(pfn), PAGE_SIZE);
+					PFN_PHYS(pfn), PAGE_SIZE);
 		if (!flush_page)
 			return -ENXIO;
-		ndrd->flush_wpq[dimm][i] = flush_page
-			+ (res->start & ~PAGE_MASK);
+		ndrd_set_flush_wpq(ndrd, dimm, i, flush_page
+				+ (res->start & ~PAGE_MASK));
 	}
 
 	return 0;
@@ -93,7 +94,10 @@
 		return -ENOMEM;
 	dev_set_drvdata(dev, ndrd);
 
-	ndrd->flush_mask = (1 << ilog2(num_flush)) - 1;
+	if (!num_flush)
+		return 0;
+
+	ndrd->hints_shift = ilog2(num_flush);
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
 		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
 		struct nvdimm *nvdimm = nd_mapping->nvdimm;
@@ -900,8 +904,8 @@
 	 */
 	wmb();
 	for (i = 0; i < nd_region->ndr_mappings; i++)
-		if (ndrd->flush_wpq[i][0])
-			writeq(1, ndrd->flush_wpq[i][idx & ndrd->flush_mask]);
+		if (ndrd_get_flush_wpq(ndrd, i, 0))
+			writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
 	wmb();
 }
 EXPORT_SYMBOL_GPL(nvdimm_flush);
@@ -925,7 +929,7 @@
 
 	for (i = 0; i < nd_region->ndr_mappings; i++)
 		/* flush hints present, flushing required */
-		if (ndrd->flush_wpq[i][0])
+		if (ndrd_get_flush_wpq(ndrd, i, 0))
 			return 1;
 
 	/*
diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig
index db39d53..f7d37a6 100644
--- a/drivers/nvme/host/Kconfig
+++ b/drivers/nvme/host/Kconfig
@@ -30,8 +30,8 @@
 
 config NVME_RDMA
 	tristate "NVM Express over Fabrics RDMA host driver"
-	depends on INFINIBAND
-	depends on BLK_DEV_NVME
+	depends on INFINIBAND && BLOCK
+	select NVME_CORE
 	select NVME_FABRICS
 	select SG_POOL
 	help
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 7ff2e82..2feacc7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -81,10 +81,12 @@
 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		enum nvme_ctrl_state new_state)
 {
-	enum nvme_ctrl_state old_state = ctrl->state;
+	enum nvme_ctrl_state old_state;
 	bool changed = false;
 
 	spin_lock_irq(&ctrl->lock);
+
+	old_state = ctrl->state;
 	switch (new_state) {
 	case NVME_CTRL_LIVE:
 		switch (old_state) {
@@ -140,11 +142,12 @@
 	default:
 		break;
 	}
-	spin_unlock_irq(&ctrl->lock);
 
 	if (changed)
 		ctrl->state = new_state;
 
+	spin_unlock_irq(&ctrl->lock);
+
 	return changed;
 }
 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
@@ -608,7 +611,7 @@
 
 	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0,
 			NVME_QID_ANY, 0, 0);
-	if (ret >= 0)
+	if (ret >= 0 && result)
 		*result = le32_to_cpu(cqe.result);
 	return ret;
 }
@@ -628,7 +631,7 @@
 
 	ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0,
 			NVME_QID_ANY, 0, 0);
-	if (ret >= 0)
+	if (ret >= 0 && result)
 		*result = le32_to_cpu(cqe.result);
 	return ret;
 }
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index dc99676..4eff491 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -47,8 +47,10 @@
 
 	mutex_lock(&nvmf_hosts_mutex);
 	host = __nvmf_host_find(hostnqn);
-	if (host)
+	if (host) {
+		kref_get(&host->ref);
 		goto out_unlock;
+	}
 
 	host = kmalloc(sizeof(*host), GFP_KERNEL);
 	if (!host)
@@ -56,7 +58,7 @@
 
 	kref_init(&host->ref);
 	memcpy(host->nqn, hostnqn, NVMF_NQN_SIZE);
-	uuid_le_gen(&host->id);
+	uuid_be_gen(&host->id);
 
 	list_add_tail(&host->list, &nvmf_hosts);
 out_unlock:
@@ -73,9 +75,9 @@
 		return NULL;
 
 	kref_init(&host->ref);
-	uuid_le_gen(&host->id);
+	uuid_be_gen(&host->id);
 	snprintf(host->nqn, NVMF_NQN_SIZE,
-		"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUl", &host->id);
+		"nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id);
 
 	mutex_lock(&nvmf_hosts_mutex);
 	list_add_tail(&host->list, &nvmf_hosts);
@@ -363,7 +365,14 @@
 	cmd.connect.opcode = nvme_fabrics_command;
 	cmd.connect.fctype = nvme_fabrics_type_connect;
 	cmd.connect.qid = 0;
-	cmd.connect.sqsize = cpu_to_le16(ctrl->sqsize);
+
+	/*
+	 * fabrics spec sets a minimum of depth 32 for admin queue,
+	 * so set the queue with this depth always until
+	 * justification otherwise.
+	 */
+	cmd.connect.sqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+
 	/*
 	 * Set keep-alive timeout in seconds granularity (ms * 1000)
 	 * and add a grace period for controller kato enforcement
@@ -375,7 +384,7 @@
 	if (!data)
 		return -ENOMEM;
 
-	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le));
+	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
 	data->cntlid = cpu_to_le16(0xffff);
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
@@ -434,7 +443,7 @@
 	if (!data)
 		return -ENOMEM;
 
-	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_le));
+	memcpy(&data->hostid, &ctrl->opts->host->id, sizeof(uuid_be));
 	data->cntlid = cpu_to_le16(ctrl->cntlid);
 	strncpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE);
 	strncpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE);
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 89df52c..46e460a 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -34,7 +34,7 @@
 	struct kref		ref;
 	struct list_head	list;
 	char			nqn[NVMF_NQN_SIZE];
-	uuid_le			id;
+	uuid_be			id;
 };
 
 /**
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d7c33f9..60f7eab 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1543,15 +1543,10 @@
 		reinit_completion(&dev->ioq_wait);
  retry:
 		timeout = ADMIN_TIMEOUT;
-		for (; i > 0; i--) {
-			struct nvme_queue *nvmeq = dev->queues[i];
-
-			if (!pass)
-				nvme_suspend_queue(nvmeq);
-			if (nvme_delete_queue(nvmeq, opcode))
+		for (; i > 0; i--, sent++)
+			if (nvme_delete_queue(dev->queues[i], opcode))
 				break;
-			++sent;
-		}
+
 		while (sent--) {
 			timeout = wait_for_completion_io_timeout(&dev->ioq_wait, timeout);
 			if (timeout == 0)
@@ -1693,11 +1688,17 @@
 		nvme_stop_queues(&dev->ctrl);
 		csts = readl(dev->bar + NVME_REG_CSTS);
 	}
+
+	for (i = dev->queue_count - 1; i > 0; i--)
+		nvme_suspend_queue(dev->queues[i]);
+
 	if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
-		for (i = dev->queue_count - 1; i >= 0; i--) {
-			struct nvme_queue *nvmeq = dev->queues[i];
-			nvme_suspend_queue(nvmeq);
-		}
+		/* A device might become IO incapable very soon during
+		 * probe, before the admin queue is configured. Thus,
+		 * queue_count can be 0 here.
+		 */
+		if (dev->queue_count)
+			nvme_suspend_queue(dev->queues[0]);
 	} else {
 		nvme_disable_io_queues(dev);
 		nvme_disable_admin_queue(dev, shutdown);
@@ -2116,6 +2117,8 @@
 		.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
 	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
 		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+	{ PCI_DEVICE(0x1c5f, 0x0540),	/* Memblaze Pblaze4 adapter */
+		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
 	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
 	{ 0, }
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 3e3ce2b..fbdb226 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -12,13 +12,11 @@
  * more details.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/string.h>
-#include <linux/jiffies.h>
 #include <linux/atomic.h>
 #include <linux/blk-mq.h>
 #include <linux/types.h>
@@ -26,7 +24,6 @@
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/nvme.h>
-#include <linux/t10-pi.h>
 #include <asm/unaligned.h>
 
 #include <rdma/ib_verbs.h>
@@ -46,10 +43,6 @@
 
 #define NVME_RDMA_MAX_INLINE_SEGMENTS	1
 
-#define NVME_RDMA_MAX_PAGES_PER_MR	512
-
-#define NVME_RDMA_DEF_RECONNECT_DELAY	20
-
 /*
  * We handle AEN commands ourselves and don't even let the
  * block layer know about them.
@@ -80,7 +73,6 @@
 	u32			num_sge;
 	int			nents;
 	bool			inline_data;
-	bool			need_inval;
 	struct ib_reg_wr	reg_wr;
 	struct ib_cqe		reg_cqe;
 	struct nvme_rdma_queue  *queue;
@@ -90,6 +82,8 @@
 
 enum nvme_rdma_queue_flags {
 	NVME_RDMA_Q_CONNECTED = (1 << 0),
+	NVME_RDMA_IB_QUEUE_ALLOCATED = (1 << 1),
+	NVME_RDMA_Q_DELETING = (1 << 2),
 };
 
 struct nvme_rdma_queue {
@@ -169,7 +163,6 @@
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event);
 static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
-static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl);
 
 /* XXX: really should move to a generic header sooner or later.. */
 static inline void put_unaligned_le24(u32 val, u8 *p)
@@ -290,7 +283,7 @@
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	int ret = 0;
 
-	if (!req->need_inval)
+	if (!req->mr->need_inval)
 		goto out;
 
 	ib_dereg_mr(req->mr);
@@ -300,9 +293,10 @@
 	if (IS_ERR(req->mr)) {
 		ret = PTR_ERR(req->mr);
 		req->mr = NULL;
+		goto out;
 	}
 
-	req->need_inval = false;
+	req->mr->need_inval = false;
 
 out:
 	return ret;
@@ -489,9 +483,14 @@
 
 static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
 {
-	struct nvme_rdma_device *dev = queue->device;
-	struct ib_device *ibdev = dev->dev;
+	struct nvme_rdma_device *dev;
+	struct ib_device *ibdev;
 
+	if (!test_and_clear_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags))
+		return;
+
+	dev = queue->device;
+	ibdev = dev->dev;
 	rdma_destroy_qp(queue->cm_id);
 	ib_free_cq(queue->ib_cq);
 
@@ -542,6 +541,7 @@
 		ret = -ENOMEM;
 		goto out_destroy_qp;
 	}
+	set_bit(NVME_RDMA_IB_QUEUE_ALLOCATED, &queue->flags);
 
 	return 0;
 
@@ -594,11 +594,13 @@
 		goto out_destroy_cm_id;
 	}
 
+	clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
 	set_bit(NVME_RDMA_Q_CONNECTED, &queue->flags);
 
 	return 0;
 
 out_destroy_cm_id:
+	nvme_rdma_destroy_queue_ib(queue);
 	rdma_destroy_id(queue->cm_id);
 	return ret;
 }
@@ -617,7 +619,7 @@
 
 static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue)
 {
-	if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
+	if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
 		return;
 	nvme_rdma_stop_queue(queue);
 	nvme_rdma_free_queue(queue);
@@ -649,7 +651,8 @@
 	int i, ret;
 
 	for (i = 1; i < ctrl->queue_count; i++) {
-		ret = nvme_rdma_init_queue(ctrl, i, ctrl->ctrl.sqsize);
+		ret = nvme_rdma_init_queue(ctrl, i,
+					   ctrl->ctrl.opts->queue_size);
 		if (ret) {
 			dev_info(ctrl->ctrl.device,
 				"failed to initialize i/o queue: %d\n", ret);
@@ -660,7 +663,7 @@
 	return 0;
 
 out_free_queues:
-	for (; i >= 1; i--)
+	for (i--; i >= 1; i--)
 		nvme_rdma_stop_and_free_queue(&ctrl->queues[i]);
 
 	return ret;
@@ -687,11 +690,6 @@
 	list_del(&ctrl->list);
 	mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-	if (ctrl->ctrl.tagset) {
-		blk_cleanup_queue(ctrl->ctrl.connect_q);
-		blk_mq_free_tag_set(&ctrl->tag_set);
-		nvme_rdma_dev_put(ctrl->device);
-	}
 	kfree(ctrl->queues);
 	nvmf_free_options(nctrl->opts);
 free_ctrl:
@@ -748,8 +746,11 @@
 	changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
 	WARN_ON_ONCE(!changed);
 
-	if (ctrl->queue_count > 1)
+	if (ctrl->queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
+		nvme_queue_scan(&ctrl->ctrl);
+		nvme_queue_async_events(&ctrl->ctrl);
+	}
 
 	dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
 
@@ -771,8 +772,13 @@
 {
 	struct nvme_rdma_ctrl *ctrl = container_of(work,
 			struct nvme_rdma_ctrl, err_work);
+	int i;
 
 	nvme_stop_keep_alive(&ctrl->ctrl);
+
+	for (i = 0; i < ctrl->queue_count; i++)
+		clear_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[i].flags);
+
 	if (ctrl->queue_count > 1)
 		nvme_stop_queues(&ctrl->ctrl);
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -855,7 +861,7 @@
 	if (!blk_rq_bytes(rq))
 		return;
 
-	if (req->need_inval) {
+	if (req->mr->need_inval) {
 		res = nvme_rdma_inv_rkey(queue, req);
 		if (res < 0) {
 			dev_err(ctrl->ctrl.device,
@@ -941,7 +947,7 @@
 			     IB_ACCESS_REMOTE_READ |
 			     IB_ACCESS_REMOTE_WRITE;
 
-	req->need_inval = true;
+	req->mr->need_inval = true;
 
 	sg->addr = cpu_to_le64(req->mr->iova);
 	put_unaligned_le24(req->mr->length, sg->length);
@@ -964,7 +970,7 @@
 
 	req->num_sge = 1;
 	req->inline_data = false;
-	req->need_inval = false;
+	req->mr->need_inval = false;
 
 	c->common.flags |= NVME_CMD_SGL_METABUF;
 
@@ -1151,7 +1157,7 @@
 
 	if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) &&
 	    wc->ex.invalidate_rkey == req->mr->rkey)
-		req->need_inval = false;
+		req->mr->need_inval = false;
 
 	blk_mq_complete_request(rq, status);
 
@@ -1269,7 +1275,7 @@
 {
 	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
 	struct rdma_conn_param param = { };
-	struct nvme_rdma_cm_req priv;
+	struct nvme_rdma_cm_req priv = { };
 	int ret;
 
 	param.qp_num = queue->qp->qp_num;
@@ -1284,8 +1290,22 @@
 
 	priv.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
 	priv.qid = cpu_to_le16(nvme_rdma_queue_idx(queue));
-	priv.hrqsize = cpu_to_le16(queue->queue_size);
-	priv.hsqsize = cpu_to_le16(queue->queue_size);
+	/*
+	 * set the admin queue depth to the minimum size
+	 * specified by the Fabrics standard.
+	 */
+	if (priv.qid == 0) {
+		priv.hrqsize = cpu_to_le16(NVMF_AQ_DEPTH);
+		priv.hsqsize = cpu_to_le16(NVMF_AQ_DEPTH - 1);
+	} else {
+		/*
+		 * current interpretation of the fabrics spec
+		 * is at minimum you make hrqsize sqsize+1, or a
+		 * 1's based representation of sqsize.
+		 */
+		priv.hrqsize = cpu_to_le16(queue->queue_size);
+		priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize);
+	}
 
 	ret = rdma_connect(queue->cm_id, &param);
 	if (ret) {
@@ -1301,56 +1321,6 @@
 	return ret;
 }
 
-/**
- * nvme_rdma_device_unplug() - Handle RDMA device unplug
- * @queue:      Queue that owns the cm_id that caught the event
- *
- * DEVICE_REMOVAL event notifies us that the RDMA device is about
- * to unplug so we should take care of destroying our RDMA resources.
- * This event will be generated for each allocated cm_id.
- *
- * In our case, the RDMA resources are managed per controller and not
- * only per queue. So the way we handle this is we trigger an implicit
- * controller deletion upon the first DEVICE_REMOVAL event we see, and
- * hold the event inflight until the controller deletion is completed.
- *
- * One exception that we need to handle is the destruction of the cm_id
- * that caught the event. Since we hold the callout until the controller
- * deletion is completed, we'll deadlock if the controller deletion will
- * call rdma_destroy_id on this queue's cm_id. Thus, we claim ownership
- * of destroying this queue before-hand, destroy the queue resources
- * after the controller deletion completed with the exception of destroying
- * the cm_id implicitely by returning a non-zero rc to the callout.
- */
-static int nvme_rdma_device_unplug(struct nvme_rdma_queue *queue)
-{
-	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
-	int ret, ctrl_deleted = 0;
-
-	/* First disable the queue so ctrl delete won't free it */
-	if (!test_and_clear_bit(NVME_RDMA_Q_CONNECTED, &queue->flags))
-		goto out;
-
-	/* delete the controller */
-	ret = __nvme_rdma_del_ctrl(ctrl);
-	if (!ret) {
-		dev_warn(ctrl->ctrl.device,
-			"Got rdma device removal event, deleting ctrl\n");
-		flush_work(&ctrl->delete_work);
-
-		/* Return non-zero so the cm_id will destroy implicitly */
-		ctrl_deleted = 1;
-
-		/* Free this queue ourselves */
-		rdma_disconnect(queue->cm_id);
-		ib_drain_qp(queue->qp);
-		nvme_rdma_destroy_queue_ib(queue);
-	}
-
-out:
-	return ctrl_deleted;
-}
-
 static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *ev)
 {
@@ -1392,8 +1362,8 @@
 		nvme_rdma_error_recovery(queue->ctrl);
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
-		/* return 1 means impliciy CM ID destroy */
-		return nvme_rdma_device_unplug(queue);
+		/* device removal is handled via the ib_client API */
+		break;
 	default:
 		dev_err(queue->ctrl->ctrl.device,
 			"Unexpected RDMA CM event (%d)\n", ev->event);
@@ -1465,7 +1435,7 @@
 	if (rq->cmd_type == REQ_TYPE_FS && req_op(rq) == REQ_OP_FLUSH)
 		flush = true;
 	ret = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
-			req->need_inval ? &req->reg_wr.wr : NULL, flush);
+			req->mr->need_inval ? &req->reg_wr.wr : NULL, flush);
 	if (ret) {
 		nvme_rdma_unmap_data(queue, rq);
 		goto err;
@@ -1648,7 +1618,7 @@
 		nvme_rdma_free_io_queues(ctrl);
 	}
 
-	if (ctrl->ctrl.state == NVME_CTRL_LIVE)
+	if (test_bit(NVME_RDMA_Q_CONNECTED, &ctrl->queues[0].flags))
 		nvme_shutdown_ctrl(&ctrl->ctrl);
 
 	blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
@@ -1657,15 +1627,27 @@
 	nvme_rdma_destroy_admin_queue(ctrl);
 }
 
+static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
+{
+	nvme_uninit_ctrl(&ctrl->ctrl);
+	if (shutdown)
+		nvme_rdma_shutdown_ctrl(ctrl);
+
+	if (ctrl->ctrl.tagset) {
+		blk_cleanup_queue(ctrl->ctrl.connect_q);
+		blk_mq_free_tag_set(&ctrl->tag_set);
+		nvme_rdma_dev_put(ctrl->device);
+	}
+
+	nvme_put_ctrl(&ctrl->ctrl);
+}
+
 static void nvme_rdma_del_ctrl_work(struct work_struct *work)
 {
 	struct nvme_rdma_ctrl *ctrl = container_of(work,
 				struct nvme_rdma_ctrl, delete_work);
 
-	nvme_remove_namespaces(&ctrl->ctrl);
-	nvme_rdma_shutdown_ctrl(ctrl);
-	nvme_uninit_ctrl(&ctrl->ctrl);
-	nvme_put_ctrl(&ctrl->ctrl);
+	__nvme_rdma_remove_ctrl(ctrl, true);
 }
 
 static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
@@ -1682,15 +1664,19 @@
 static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
 {
 	struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
-	int ret;
+	int ret = 0;
 
+	/*
+	 * Keep a reference until all work is flushed since
+	 * __nvme_rdma_del_ctrl can free the ctrl mem
+	 */
+	if (!kref_get_unless_zero(&ctrl->ctrl.kref))
+		return -EBUSY;
 	ret = __nvme_rdma_del_ctrl(ctrl);
-	if (ret)
-		return ret;
-
-	flush_work(&ctrl->delete_work);
-
-	return 0;
+	if (!ret)
+		flush_work(&ctrl->delete_work);
+	nvme_put_ctrl(&ctrl->ctrl);
+	return ret;
 }
 
 static void nvme_rdma_remove_ctrl_work(struct work_struct *work)
@@ -1698,9 +1684,7 @@
 	struct nvme_rdma_ctrl *ctrl = container_of(work,
 				struct nvme_rdma_ctrl, delete_work);
 
-	nvme_remove_namespaces(&ctrl->ctrl);
-	nvme_uninit_ctrl(&ctrl->ctrl);
-	nvme_put_ctrl(&ctrl->ctrl);
+	__nvme_rdma_remove_ctrl(ctrl, false);
 }
 
 static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
@@ -1739,6 +1723,7 @@
 	if (ctrl->queue_count > 1) {
 		nvme_start_queues(&ctrl->ctrl);
 		nvme_queue_scan(&ctrl->ctrl);
+		nvme_queue_async_events(&ctrl->ctrl);
 	}
 
 	return;
@@ -1809,7 +1794,7 @@
 
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
 	ctrl->tag_set.ops = &nvme_rdma_mq_ops;
-	ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
+	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
 	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
 	ctrl->tag_set.numa_node = NUMA_NO_NODE;
 	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -1907,7 +1892,7 @@
 	spin_lock_init(&ctrl->lock);
 
 	ctrl->queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
-	ctrl->ctrl.sqsize = opts->queue_size;
+	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ret = -ENOMEM;
@@ -1988,27 +1973,57 @@
 	.create_ctrl	= nvme_rdma_create_ctrl,
 };
 
+static void nvme_rdma_add_one(struct ib_device *ib_device)
+{
+}
+
+static void nvme_rdma_remove_one(struct ib_device *ib_device, void *client_data)
+{
+	struct nvme_rdma_ctrl *ctrl;
+
+	/* Delete all controllers using this device */
+	mutex_lock(&nvme_rdma_ctrl_mutex);
+	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
+		if (ctrl->device->dev != ib_device)
+			continue;
+		dev_info(ctrl->ctrl.device,
+			"Removing ctrl: NQN \"%s\", addr %pISp\n",
+			ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
+		__nvme_rdma_del_ctrl(ctrl);
+	}
+	mutex_unlock(&nvme_rdma_ctrl_mutex);
+
+	flush_workqueue(nvme_rdma_wq);
+}
+
+static struct ib_client nvme_rdma_ib_client = {
+	.name   = "nvme_rdma",
+	.add = nvme_rdma_add_one,
+	.remove = nvme_rdma_remove_one
+};
+
 static int __init nvme_rdma_init_module(void)
 {
+	int ret;
+
 	nvme_rdma_wq = create_workqueue("nvme_rdma_wq");
 	if (!nvme_rdma_wq)
 		return -ENOMEM;
 
+	ret = ib_register_client(&nvme_rdma_ib_client);
+	if (ret) {
+		destroy_workqueue(nvme_rdma_wq);
+		return ret;
+	}
+
 	nvmf_register_transport(&nvme_rdma_transport);
 	return 0;
 }
 
 static void __exit nvme_rdma_cleanup_module(void)
 {
-	struct nvme_rdma_ctrl *ctrl;
-
 	nvmf_unregister_transport(&nvme_rdma_transport);
-
-	mutex_lock(&nvme_rdma_ctrl_mutex);
-	list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list)
-		__nvme_rdma_del_ctrl(ctrl);
-	mutex_unlock(&nvme_rdma_ctrl_mutex);
-
+	ib_unregister_client(&nvme_rdma_ib_client);
 	destroy_workqueue(nvme_rdma_wq);
 }
 
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index a5c31cb..3a5b9d0 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -15,8 +15,8 @@
 
 config NVME_TARGET_LOOP
 	tristate "NVMe loopback device support"
-	depends on BLK_DEV_NVME
 	depends on NVME_TARGET
+	select NVME_CORE
 	select NVME_FABRICS
 	select SG_POOL
 	help
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 2fac17a..47c564b 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -13,7 +13,6 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
-#include <linux/random.h>
 #include <generated/utsrelease.h>
 #include "nvmet.h"
 
@@ -83,7 +82,6 @@
 {
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvme_id_ctrl *id;
-	u64 serial;
 	u16 status = 0;
 
 	id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -96,10 +94,8 @@
 	id->vid = 0;
 	id->ssvid = 0;
 
-	/* generate a random serial number as our controllers are ephemeral: */
-	get_random_bytes(&serial, sizeof(serial));
 	memset(id->sn, ' ', sizeof(id->sn));
-	snprintf(id->sn, sizeof(id->sn), "%llx", serial);
+	snprintf(id->sn, sizeof(id->sn), "%llx", ctrl->serial);
 
 	memset(id->mn, ' ', sizeof(id->mn));
 	strncpy((char *)id->mn, "Linux", sizeof(id->mn));
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 8a891ca..6559d5a 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -13,6 +13,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
+#include <linux/random.h>
 #include "nvmet.h"
 
 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
@@ -728,6 +729,9 @@
 	memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 	memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 
+	/* generate a random serial number as our controllers are ephemeral: */
+	get_random_bytes(&ctrl->serial, sizeof(ctrl->serial));
+
 	kref_init(&ctrl->ref);
 	ctrl->subsys = subsys;
 
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 94e7829..395e60d 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -414,9 +414,8 @@
 	struct nvme_loop_ctrl *ctrl = container_of(work,
 				struct nvme_loop_ctrl, delete_work);
 
-	nvme_remove_namespaces(&ctrl->ctrl);
-	nvme_loop_shutdown_ctrl(ctrl);
 	nvme_uninit_ctrl(&ctrl->ctrl);
+	nvme_loop_shutdown_ctrl(ctrl);
 	nvme_put_ctrl(&ctrl->ctrl);
 }
 
@@ -501,7 +500,6 @@
 	nvme_loop_destroy_admin_queue(ctrl);
 out_disable:
 	dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
-	nvme_remove_namespaces(&ctrl->ctrl);
 	nvme_uninit_ctrl(&ctrl->ctrl);
 	nvme_put_ctrl(&ctrl->ctrl);
 }
@@ -558,7 +556,7 @@
 
 	memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
 	ctrl->tag_set.ops = &nvme_loop_mq_ops;
-	ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
+	ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
 	ctrl->tag_set.reserved_tags = 1; /* fabric connect */
 	ctrl->tag_set.numa_node = NUMA_NO_NODE;
 	ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -622,7 +620,7 @@
 
 	ret = -ENOMEM;
 
-	ctrl->ctrl.sqsize = opts->queue_size;
+	ctrl->ctrl.sqsize = opts->queue_size - 1;
 	ctrl->ctrl.kato = opts->kato;
 
 	ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 57dd6d8..76b6eed 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -113,6 +113,7 @@
 
 	struct mutex		lock;
 	u64			cap;
+	u64			serial;
 	u32			cc;
 	u32			csts;
 
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e06d504..1cbe6e0 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -77,6 +77,7 @@
 	NVMET_RDMA_Q_CONNECTING,
 	NVMET_RDMA_Q_LIVE,
 	NVMET_RDMA_Q_DISCONNECTING,
+	NVMET_RDMA_IN_DEVICE_REMOVAL,
 };
 
 struct nvmet_rdma_queue {
@@ -615,15 +616,10 @@
 	if (!len)
 		return 0;
 
-	/* use the already allocated data buffer if possible */
-	if (len <= NVMET_RDMA_INLINE_DATA_SIZE && rsp->queue->host_qid) {
-		nvmet_rdma_use_inline_sg(rsp, len, 0);
-	} else {
-		status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
-				len);
-		if (status)
-			return status;
-	}
+	status = nvmet_rdma_alloc_sgl(&rsp->req.sg, &rsp->req.sg_cnt,
+			len);
+	if (status)
+		return status;
 
 	ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
 			rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
@@ -982,9 +978,13 @@
 		container_of(w, struct nvmet_rdma_queue, release_work);
 	struct rdma_cm_id *cm_id = queue->cm_id;
 	struct nvmet_rdma_device *dev = queue->dev;
+	enum nvmet_rdma_queue_state state = queue->state;
 
 	nvmet_rdma_free_queue(queue);
-	rdma_destroy_id(cm_id);
+
+	if (state != NVMET_RDMA_IN_DEVICE_REMOVAL)
+		rdma_destroy_id(cm_id);
+
 	kref_put(&dev->ref, nvmet_rdma_free_dev);
 }
 
@@ -1004,10 +1004,10 @@
 	queue->host_qid = le16_to_cpu(req->qid);
 
 	/*
-	 * req->hsqsize corresponds to our recv queue size
+	 * req->hsqsize corresponds to our recv queue size plus 1
 	 * req->hrqsize corresponds to our send queue size
 	 */
-	queue->recv_queue_size = le16_to_cpu(req->hsqsize);
+	queue->recv_queue_size = le16_to_cpu(req->hsqsize) + 1;
 	queue->send_queue_size = le16_to_cpu(req->hrqsize);
 
 	if (!queue->host_qid && queue->recv_queue_size > NVMF_AQ_DEPTH)
@@ -1233,8 +1233,9 @@
 	switch (queue->state) {
 	case NVMET_RDMA_Q_CONNECTING:
 	case NVMET_RDMA_Q_LIVE:
-		disconnect = true;
 		queue->state = NVMET_RDMA_Q_DISCONNECTING;
+	case NVMET_RDMA_IN_DEVICE_REMOVAL:
+		disconnect = true;
 		break;
 	case NVMET_RDMA_Q_DISCONNECTING:
 		break;
@@ -1272,6 +1273,62 @@
 	schedule_work(&queue->release_work);
 }
 
+/**
+ * nvme_rdma_device_removal() - Handle RDMA device removal
+ * @queue:      nvmet rdma queue (cm id qp_context)
+ * @addr:	nvmet address (cm_id context)
+ *
+ * DEVICE_REMOVAL event notifies us that the RDMA device is about
+ * to unplug so we should take care of destroying our RDMA resources.
+ * This event will be generated for each allocated cm_id.
+ *
+ * Note that this event can be generated on a normal queue cm_id
+ * and/or a device bound listener cm_id (where in this case
+ * queue will be null).
+ *
+ * we claim ownership on destroying the cm_id. For queues we move
+ * the queue state to NVMET_RDMA_IN_DEVICE_REMOVAL and for port
+ * we nullify the priv to prevent double cm_id destruction and destroying
+ * the cm_id implicitely by returning a non-zero rc to the callout.
+ */
+static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
+		struct nvmet_rdma_queue *queue)
+{
+	unsigned long flags;
+
+	if (!queue) {
+		struct nvmet_port *port = cm_id->context;
+
+		/*
+		 * This is a listener cm_id. Make sure that
+		 * future remove_port won't invoke a double
+		 * cm_id destroy. use atomic xchg to make sure
+		 * we don't compete with remove_port.
+		 */
+		if (xchg(&port->priv, NULL) != cm_id)
+			return 0;
+	} else {
+		/*
+		 * This is a queue cm_id. Make sure that
+		 * release queue will not destroy the cm_id
+		 * and schedule all ctrl queues removal (only
+		 * if the queue is not disconnecting already).
+		 */
+		spin_lock_irqsave(&queue->state_lock, flags);
+		if (queue->state != NVMET_RDMA_Q_DISCONNECTING)
+			queue->state = NVMET_RDMA_IN_DEVICE_REMOVAL;
+		spin_unlock_irqrestore(&queue->state_lock, flags);
+		nvmet_rdma_queue_disconnect(queue);
+		flush_scheduled_work();
+	}
+
+	/*
+	 * We need to return 1 so that the core will destroy
+	 * it's own ID.  What a great API design..
+	 */
+	return 1;
+}
+
 static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
 		struct rdma_cm_event *event)
 {
@@ -1294,20 +1351,11 @@
 		break;
 	case RDMA_CM_EVENT_ADDR_CHANGE:
 	case RDMA_CM_EVENT_DISCONNECTED:
-	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-		/*
-		 * We can get the device removal callback even for a
-		 * CM ID that we aren't actually using.  In that case
-		 * the context pointer is NULL, so we shouldn't try
-		 * to disconnect a non-existing queue.  But we also
-		 * need to return 1 so that the core will destroy
-		 * it's own ID.  What a great API design..
-		 */
-		if (queue)
-			nvmet_rdma_queue_disconnect(queue);
-		else
-			ret = 1;
+		nvmet_rdma_queue_disconnect(queue);
+		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		ret = nvmet_rdma_device_removal(cm_id, queue);
 		break;
 	case RDMA_CM_EVENT_REJECTED:
 	case RDMA_CM_EVENT_UNREACHABLE:
@@ -1396,9 +1444,10 @@
 
 static void nvmet_rdma_remove_port(struct nvmet_port *port)
 {
-	struct rdma_cm_id *cm_id = port->priv;
+	struct rdma_cm_id *cm_id = xchg(&port->priv, NULL);
 
-	rdma_destroy_id(cm_id);
+	if (cm_id)
+		rdma_destroy_id(cm_id);
 }
 
 static struct nvmet_fabrics_ops nvmet_rdma_ops = {
diff --git a/drivers/nvmem/rockchip-efuse.c b/drivers/nvmem/rockchip-efuse.c
index 4d3f391..423907b 100644
--- a/drivers/nvmem/rockchip-efuse.c
+++ b/drivers/nvmem/rockchip-efuse.c
@@ -22,17 +22,29 @@
 #include <linux/nvmem-provider.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 
-#define EFUSE_A_SHIFT			6
-#define EFUSE_A_MASK			0x3ff
-#define EFUSE_PGENB			BIT(3)
-#define EFUSE_LOAD			BIT(2)
-#define EFUSE_STROBE			BIT(1)
-#define EFUSE_CSB			BIT(0)
+#define RK3288_A_SHIFT		6
+#define RK3288_A_MASK		0x3ff
+#define RK3288_PGENB		BIT(3)
+#define RK3288_LOAD		BIT(2)
+#define RK3288_STROBE		BIT(1)
+#define RK3288_CSB		BIT(0)
 
-#define REG_EFUSE_CTRL			0x0000
-#define REG_EFUSE_DOUT			0x0004
+#define RK3399_A_SHIFT		16
+#define RK3399_A_MASK		0x3ff
+#define RK3399_NBYTES		4
+#define RK3399_STROBSFTSEL	BIT(9)
+#define RK3399_RSB		BIT(7)
+#define RK3399_PD		BIT(5)
+#define RK3399_PGENB		BIT(3)
+#define RK3399_LOAD		BIT(2)
+#define RK3399_STROBE		BIT(1)
+#define RK3399_CSB		BIT(0)
+
+#define REG_EFUSE_CTRL		0x0000
+#define REG_EFUSE_DOUT		0x0004
 
 struct rockchip_efuse_chip {
 	struct device *dev;
@@ -40,8 +52,8 @@
 	struct clk *clk;
 };
 
-static int rockchip_efuse_read(void *context, unsigned int offset,
-			       void *val, size_t bytes)
+static int rockchip_rk3288_efuse_read(void *context, unsigned int offset,
+				      void *val, size_t bytes)
 {
 	struct rockchip_efuse_chip *efuse = context;
 	u8 *buf = val;
@@ -53,27 +65,82 @@
 		return ret;
 	}
 
-	writel(EFUSE_LOAD | EFUSE_PGENB, efuse->base + REG_EFUSE_CTRL);
+	writel(RK3288_LOAD | RK3288_PGENB, efuse->base + REG_EFUSE_CTRL);
 	udelay(1);
 	while (bytes--) {
 		writel(readl(efuse->base + REG_EFUSE_CTRL) &
-			     (~(EFUSE_A_MASK << EFUSE_A_SHIFT)),
+			     (~(RK3288_A_MASK << RK3288_A_SHIFT)),
 			     efuse->base + REG_EFUSE_CTRL);
 		writel(readl(efuse->base + REG_EFUSE_CTRL) |
-			     ((offset++ & EFUSE_A_MASK) << EFUSE_A_SHIFT),
+			     ((offset++ & RK3288_A_MASK) << RK3288_A_SHIFT),
 			     efuse->base + REG_EFUSE_CTRL);
 		udelay(1);
 		writel(readl(efuse->base + REG_EFUSE_CTRL) |
-			     EFUSE_STROBE, efuse->base + REG_EFUSE_CTRL);
+			     RK3288_STROBE, efuse->base + REG_EFUSE_CTRL);
 		udelay(1);
 		*buf++ = readb(efuse->base + REG_EFUSE_DOUT);
 		writel(readl(efuse->base + REG_EFUSE_CTRL) &
-		     (~EFUSE_STROBE), efuse->base + REG_EFUSE_CTRL);
+		       (~RK3288_STROBE), efuse->base + REG_EFUSE_CTRL);
 		udelay(1);
 	}
 
 	/* Switch to standby mode */
-	writel(EFUSE_PGENB | EFUSE_CSB, efuse->base + REG_EFUSE_CTRL);
+	writel(RK3288_PGENB | RK3288_CSB, efuse->base + REG_EFUSE_CTRL);
+
+	clk_disable_unprepare(efuse->clk);
+
+	return 0;
+}
+
+static int rockchip_rk3399_efuse_read(void *context, unsigned int offset,
+				      void *val, size_t bytes)
+{
+	struct rockchip_efuse_chip *efuse = context;
+	unsigned int addr_start, addr_end, addr_offset, addr_len;
+	u32 out_value;
+	u8 *buf;
+	int ret, i = 0;
+
+	ret = clk_prepare_enable(efuse->clk);
+	if (ret < 0) {
+		dev_err(efuse->dev, "failed to prepare/enable efuse clk\n");
+		return ret;
+	}
+
+	addr_start = rounddown(offset, RK3399_NBYTES) / RK3399_NBYTES;
+	addr_end = roundup(offset + bytes, RK3399_NBYTES) / RK3399_NBYTES;
+	addr_offset = offset % RK3399_NBYTES;
+	addr_len = addr_end - addr_start;
+
+	buf = kzalloc(sizeof(*buf) * addr_len * RK3399_NBYTES, GFP_KERNEL);
+	if (!buf) {
+		clk_disable_unprepare(efuse->clk);
+		return -ENOMEM;
+	}
+
+	writel(RK3399_LOAD | RK3399_PGENB | RK3399_STROBSFTSEL | RK3399_RSB,
+	       efuse->base + REG_EFUSE_CTRL);
+	udelay(1);
+	while (addr_len--) {
+		writel(readl(efuse->base + REG_EFUSE_CTRL) | RK3399_STROBE |
+		       ((addr_start++ & RK3399_A_MASK) << RK3399_A_SHIFT),
+		       efuse->base + REG_EFUSE_CTRL);
+		udelay(1);
+		out_value = readl(efuse->base + REG_EFUSE_DOUT);
+		writel(readl(efuse->base + REG_EFUSE_CTRL) & (~RK3399_STROBE),
+		       efuse->base + REG_EFUSE_CTRL);
+		udelay(1);
+
+		memcpy(&buf[i], &out_value, RK3399_NBYTES);
+		i += RK3399_NBYTES;
+	}
+
+	/* Switch to standby mode */
+	writel(RK3399_PD | RK3399_CSB, efuse->base + REG_EFUSE_CTRL);
+
+	memcpy(val, buf + addr_offset, bytes);
+
+	kfree(buf);
 
 	clk_disable_unprepare(efuse->clk);
 
@@ -89,7 +156,27 @@
 };
 
 static const struct of_device_id rockchip_efuse_match[] = {
-	{ .compatible = "rockchip,rockchip-efuse", },
+	/* deprecated but kept around for dts binding compatibility */
+	{
+		.compatible = "rockchip,rockchip-efuse",
+		.data = (void *)&rockchip_rk3288_efuse_read,
+	},
+	{
+		.compatible = "rockchip,rk3066a-efuse",
+		.data = (void *)&rockchip_rk3288_efuse_read,
+	},
+	{
+		.compatible = "rockchip,rk3188-efuse",
+		.data = (void *)&rockchip_rk3288_efuse_read,
+	},
+	{
+		.compatible = "rockchip,rk3288-efuse",
+		.data = (void *)&rockchip_rk3288_efuse_read,
+	},
+	{
+		.compatible = "rockchip,rk3399-efuse",
+		.data = (void *)&rockchip_rk3399_efuse_read,
+	},
 	{ /* sentinel */},
 };
 MODULE_DEVICE_TABLE(of, rockchip_efuse_match);
@@ -99,6 +186,14 @@
 	struct resource *res;
 	struct nvmem_device *nvmem;
 	struct rockchip_efuse_chip *efuse;
+	const struct of_device_id *match;
+	struct device *dev = &pdev->dev;
+
+	match = of_match_device(dev->driver->of_match_table, dev);
+	if (!match || !match->data) {
+		dev_err(dev, "failed to get match data\n");
+		return -EINVAL;
+	}
 
 	efuse = devm_kzalloc(&pdev->dev, sizeof(struct rockchip_efuse_chip),
 			     GFP_KERNEL);
@@ -116,7 +211,7 @@
 
 	efuse->dev = &pdev->dev;
 	econfig.size = resource_size(res);
-	econfig.reg_read = rockchip_efuse_read;
+	econfig.reg_read = match->data;
 	econfig.priv = efuse;
 	econfig.dev = efuse->dev;
 	nvmem = nvmem_register(&econfig);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 7792266..3ce6953 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1631,8 +1631,7 @@
 	 */
 
  err:
-	if (it.node)
-		of_node_put(it.node);
+	of_node_put(it.node);
 	return rc;
 }
 
@@ -2343,20 +2342,13 @@
 	const struct device_node *parent, int port_reg, int reg)
 {
 	struct of_endpoint endpoint;
-	struct device_node *node, *prev_node = NULL;
+	struct device_node *node = NULL;
 
-	while (1) {
-		node = of_graph_get_next_endpoint(parent, prev_node);
-		of_node_put(prev_node);
-		if (!node)
-			break;
-
+	for_each_endpoint_of_node(parent, node) {
 		of_graph_parse_endpoint(node, &endpoint);
 		if (((port_reg == -1) || (endpoint.port == port_reg)) &&
 			((reg == -1) || (endpoint.id == reg)))
 			return node;
-
-		prev_node = node;
 	}
 
 	return NULL;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 55f1b83..c89d5d2 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -517,7 +517,7 @@
 		pr_warning("End of tree marker overwritten: %08x\n",
 			   be32_to_cpup(mem + size));
 
-	if (detached) {
+	if (detached && mynodes) {
 		of_node_set_flag(*mynodes, OF_DETACHED);
 		pr_debug("unflattened tree is detached\n");
 	}
@@ -924,7 +924,7 @@
 
 #ifdef CONFIG_SERIAL_EARLYCON
 
-static int __init early_init_dt_scan_chosen_serial(void)
+int __init early_init_dt_scan_chosen_stdout(void)
 {
 	int offset;
 	const char *p, *q, *options = NULL;
@@ -968,15 +968,6 @@
 	}
 	return -ENODEV;
 }
-
-static int __init setup_of_earlycon(char *buf)
-{
-	if (buf)
-		return 0;
-
-	return early_init_dt_scan_chosen_serial();
-}
-early_param("earlycon", setup_of_earlycon);
 #endif
 
 /**
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 89a71c6..a2e68f7 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -544,12 +544,15 @@
 
 			list_del(&desc->list);
 
+			of_node_set_flag(desc->dev, OF_POPULATED);
+
 			pr_debug("of_irq_init: init %s (%p), parent %p\n",
 				 desc->dev->full_name,
 				 desc->dev, desc->interrupt_parent);
 			ret = desc->irq_init_cb(desc->dev,
 						desc->interrupt_parent);
 			if (ret) {
+				of_node_clear_flag(desc->dev, OF_POPULATED);
 				kfree(desc);
 				continue;
 			}
@@ -559,8 +562,6 @@
 			 * its children can get processed in a subsequent pass.
 			 */
 			list_add_tail(&desc->list, &intc_parent_list);
-
-			of_node_set_flag(desc->dev, OF_POPULATED);
 		}
 
 		/* Get the next pending parent that might have children */
diff --git a/drivers/of/of_numa.c b/drivers/of/of_numa.c
index ed5a097..f63d4b0d 100644
--- a/drivers/of/of_numa.c
+++ b/drivers/of/of_numa.c
@@ -16,6 +16,8 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#define pr_fmt(fmt) "OF: NUMA: " fmt
+
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/nodemask.h>
@@ -49,10 +51,9 @@
 		if (r)
 			continue;
 
-		pr_debug("NUMA: CPU on %u\n", nid);
+		pr_debug("CPU on %u\n", nid);
 		if (nid >= MAX_NUMNODES)
-			pr_warn("NUMA: Node id %u exceeds maximum value\n",
-				nid);
+			pr_warn("Node id %u exceeds maximum value\n", nid);
 		else
 			node_set(nid, numa_nodes_parsed);
 	}
@@ -63,13 +64,9 @@
 	struct device_node *np = NULL;
 	struct resource rsrc;
 	u32 nid;
-	int r = 0;
+	int i, r;
 
-	for (;;) {
-		np = of_find_node_by_type(np, "memory");
-		if (!np)
-			break;
-
+	for_each_node_by_type(np, "memory") {
 		r = of_property_read_u32(np, "numa-node-id", &nid);
 		if (r == -EINVAL)
 			/*
@@ -78,27 +75,23 @@
 			 * "numa-node-id" property
 			 */
 			continue;
-		else if (r)
-			/* some other error */
-			break;
 
-		r = of_address_to_resource(np, 0, &rsrc);
-		if (r) {
-			pr_err("NUMA: bad reg property in memory node\n");
-			break;
+		if (nid >= MAX_NUMNODES) {
+			pr_warn("Node id %u exceeds maximum value\n", nid);
+			r = -EINVAL;
 		}
 
-		pr_debug("NUMA:  base = %llx len = %llx, node = %u\n",
-			 rsrc.start, rsrc.end - rsrc.start + 1, nid);
+		for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++)
+			r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
 
-
-		r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
-		if (r)
-			break;
+		if (!i || r) {
+			of_node_put(np);
+			pr_err("bad property in memory node\n");
+			return r ? : -EINVAL;
+		}
 	}
-	of_node_put(np);
 
-	return r;
+	return 0;
 }
 
 static int __init of_numa_parse_distance_map_v1(struct device_node *map)
@@ -107,17 +100,17 @@
 	int entry_count;
 	int i;
 
-	pr_info("NUMA: parsing numa-distance-map-v1\n");
+	pr_info("parsing numa-distance-map-v1\n");
 
 	matrix = of_get_property(map, "distance-matrix", NULL);
 	if (!matrix) {
-		pr_err("NUMA: No distance-matrix property in distance-map\n");
+		pr_err("No distance-matrix property in distance-map\n");
 		return -EINVAL;
 	}
 
 	entry_count = of_property_count_u32_elems(map, "distance-matrix");
 	if (entry_count <= 0) {
-		pr_err("NUMA: Invalid distance-matrix\n");
+		pr_err("Invalid distance-matrix\n");
 		return -EINVAL;
 	}
 
@@ -132,7 +125,7 @@
 		matrix++;
 
 		numa_set_distance(nodea, nodeb, distance);
-		pr_debug("NUMA:  distance[node%d -> node%d] = %d\n",
+		pr_debug("distance[node%d -> node%d] = %d\n",
 			 nodea, nodeb, distance);
 
 		/* Set default distance of node B->A same as A->B */
@@ -166,8 +159,6 @@
 	np = of_node_get(device);
 
 	while (np) {
-		struct device_node *parent;
-
 		r = of_property_read_u32(np, "numa-node-id", &nid);
 		/*
 		 * -EINVAL indicates the property was not found, and
@@ -178,22 +169,15 @@
 		if (r != -EINVAL)
 			break;
 
-		parent = of_get_parent(np);
-		of_node_put(np);
-		np = parent;
+		np = of_get_next_parent(np);
 	}
 	if (np && r)
-		pr_warn("NUMA: Invalid \"numa-node-id\" property in node %s\n",
+		pr_warn("Invalid \"numa-node-id\" property in node %s\n",
 			np->name);
 	of_node_put(np);
 
-	if (!r) {
-		if (nid >= MAX_NUMNODES)
-			pr_warn("NUMA: Node id %u exceeds maximum value\n",
-				nid);
-		else
-			return nid;
-	}
+	if (!r)
+		return nid;
 
 	return NUMA_NO_NODE;
 }
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 8aa1976..f39ccd5 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -497,6 +497,7 @@
 }
 EXPORT_SYMBOL_GPL(of_platform_default_populate);
 
+#ifndef CONFIG_PPC
 static int __init of_platform_default_populate_init(void)
 {
 	struct device_node *node;
@@ -521,6 +522,7 @@
 	return 0;
 }
 arch_initcall_sync(of_platform_default_populate_init);
+#endif
 
 static int of_platform_device_destroy(struct device *dev, void *data)
 {
diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c
index bdef916..2498a6cd 100644
--- a/drivers/oprofile/timer_int.c
+++ b/drivers/oprofile/timer_int.c
@@ -74,37 +74,39 @@
 	put_online_cpus();
 }
 
-static int oprofile_cpu_notify(struct notifier_block *self,
-			       unsigned long action, void *hcpu)
+static int oprofile_timer_online(unsigned int cpu)
 {
-	long cpu = (long) hcpu;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		smp_call_function_single(cpu, __oprofile_hrtimer_start,
-					 NULL, 1);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		__oprofile_hrtimer_stop(cpu);
-		break;
-	}
-	return NOTIFY_OK;
+	local_irq_disable();
+	__oprofile_hrtimer_start(NULL);
+	local_irq_enable();
+	return 0;
 }
 
-static struct notifier_block __refdata oprofile_cpu_notifier = {
-	.notifier_call = oprofile_cpu_notify,
-};
+static int oprofile_timer_prep_down(unsigned int cpu)
+{
+	__oprofile_hrtimer_stop(cpu);
+	return 0;
+}
+
+static enum cpuhp_state hp_online;
 
 static int oprofile_hrtimer_setup(void)
 {
-	return register_hotcpu_notifier(&oprofile_cpu_notifier);
+	int ret;
+
+	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+					"oprofile/timer:online",
+					oprofile_timer_online,
+					oprofile_timer_prep_down);
+	if (ret < 0)
+		return ret;
+	hp_online = ret;
+	return 0;
 }
 
 static void oprofile_hrtimer_shutdown(void)
 {
-	unregister_hotcpu_notifier(&oprofile_cpu_notifier);
+	cpuhp_remove_state_nocalls(hp_online);
 }
 
 int oprofile_timer_init(struct oprofile_operations *ops)
diff --git a/drivers/pci/host-bridge.c b/drivers/pci/host-bridge.c
index 5f4a2e0..add6623 100644
--- a/drivers/pci/host-bridge.c
+++ b/drivers/pci/host-bridge.c
@@ -44,6 +44,7 @@
 	bridge->release_fn = release_fn;
 	bridge->release_data = release_data;
 }
+EXPORT_SYMBOL_GPL(pci_set_host_bridge_release);
 
 void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region,
 			     struct resource *res)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index a02981e..bfdd074 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -19,6 +19,7 @@
 #include <linux/smp.h>
 #include <linux/errno.h>
 #include <linux/io.h>
+#include <linux/acpi_iort.h>
 #include <linux/slab.h>
 #include <linux/irqdomain.h>
 #include <linux/of_irq.h>
@@ -549,15 +550,23 @@
 	return ret;
 }
 
-static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
+static struct msi_desc *
+msi_setup_entry(struct pci_dev *dev, int nvec, bool affinity)
 {
-	u16 control;
+	struct cpumask *masks = NULL;
 	struct msi_desc *entry;
+	u16 control;
+
+	if (affinity) {
+		masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+		if (!masks)
+			pr_err("Unable to allocate affinity masks, ignoring\n");
+	}
 
 	/* MSI Entry Initialization */
-	entry = alloc_msi_entry(&dev->dev);
+	entry = alloc_msi_entry(&dev->dev, nvec, masks);
 	if (!entry)
-		return NULL;
+		goto out;
 
 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
 
@@ -568,8 +577,6 @@
 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
 	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
-	entry->nvec_used		= nvec;
-	entry->affinity			= dev->irq_affinity;
 
 	if (control & PCI_MSI_FLAGS_64BIT)
 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -580,6 +587,8 @@
 	if (entry->msi_attrib.maskbit)
 		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
 
+out:
+	kfree(masks);
 	return entry;
 }
 
@@ -608,7 +617,7 @@
  * an error, and a positive return value indicates the number of interrupts
  * which could have been allocated.
  */
-static int msi_capability_init(struct pci_dev *dev, int nvec)
+static int msi_capability_init(struct pci_dev *dev, int nvec, bool affinity)
 {
 	struct msi_desc *entry;
 	int ret;
@@ -616,7 +625,7 @@
 
 	pci_msi_set_enable(dev, 0);	/* Disable MSI during set up */
 
-	entry = msi_setup_entry(dev, nvec);
+	entry = msi_setup_entry(dev, nvec, affinity);
 	if (!entry)
 		return -ENOMEM;
 
@@ -679,28 +688,29 @@
 }
 
 static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
-			      struct msix_entry *entries, int nvec)
+			      struct msix_entry *entries, int nvec,
+			      bool affinity)
 {
-	const struct cpumask *mask = NULL;
+	struct cpumask *curmsk, *masks = NULL;
 	struct msi_desc *entry;
-	int cpu = -1, i;
+	int ret, i;
 
-	for (i = 0; i < nvec; i++) {
-		if (dev->irq_affinity) {
-			cpu = cpumask_next(cpu, dev->irq_affinity);
-			if (cpu >= nr_cpu_ids)
-				cpu = cpumask_first(dev->irq_affinity);
-			mask = cpumask_of(cpu);
-		}
+	if (affinity) {
+		masks = irq_create_affinity_masks(dev->irq_affinity, nvec);
+		if (!masks)
+			pr_err("Unable to allocate affinity masks, ignoring\n");
+	}
 
-		entry = alloc_msi_entry(&dev->dev);
+	for (i = 0, curmsk = masks; i < nvec; i++) {
+		entry = alloc_msi_entry(&dev->dev, 1, curmsk);
 		if (!entry) {
 			if (!i)
 				iounmap(base);
 			else
 				free_msi_irqs(dev);
 			/* No enough memory. Don't try again */
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto out;
 		}
 
 		entry->msi_attrib.is_msix	= 1;
@@ -711,12 +721,14 @@
 			entry->msi_attrib.entry_nr = i;
 		entry->msi_attrib.default_irq	= dev->irq;
 		entry->mask_base		= base;
-		entry->nvec_used		= 1;
-		entry->affinity			= mask;
 
 		list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
+		if (masks)
+			curmsk++;
 	}
-
+	ret = 0;
+out:
+	kfree(masks);
 	return 0;
 }
 
@@ -745,8 +757,8 @@
  * single MSI-X irq. A return of zero indicates the successful setup of
  * requested MSI-X entries with allocated irqs or non-zero for otherwise.
  **/
-static int msix_capability_init(struct pci_dev *dev,
-				struct msix_entry *entries, int nvec)
+static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries,
+				int nvec, bool affinity)
 {
 	int ret;
 	u16 control;
@@ -761,7 +773,7 @@
 	if (!base)
 		return -ENOMEM;
 
-	ret = msix_setup_entries(dev, base, entries, nvec);
+	ret = msix_setup_entries(dev, base, entries, nvec, affinity);
 	if (ret)
 		return ret;
 
@@ -941,22 +953,8 @@
 }
 EXPORT_SYMBOL(pci_msix_vec_count);
 
-/**
- * pci_enable_msix - configure device's MSI-X capability structure
- * @dev: pointer to the pci_dev data structure of MSI-X device function
- * @entries: pointer to an array of MSI-X entries (optional)
- * @nvec: number of MSI-X irqs requested for allocation by device driver
- *
- * Setup the MSI-X capability structure of device function with the number
- * of requested irqs upon its software driver call to request for
- * MSI-X mode enabled on its hardware device function. A return of zero
- * indicates the successful configuration of MSI-X capability structure
- * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
- * Or a return of > 0 indicates that driver request is exceeding the number
- * of irqs or MSI-X vectors available. Driver should use the returned value to
- * re-send its request.
- **/
-int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
+			     int nvec, bool affinity)
 {
 	int nr_entries;
 	int i, j;
@@ -988,7 +986,27 @@
 		dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
 		return -EINVAL;
 	}
-	return msix_capability_init(dev, entries, nvec);
+	return msix_capability_init(dev, entries, nvec, affinity);
+}
+
+/**
+ * pci_enable_msix - configure device's MSI-X capability structure
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
+ * @entries: pointer to an array of MSI-X entries (optional)
+ * @nvec: number of MSI-X irqs requested for allocation by device driver
+ *
+ * Setup the MSI-X capability structure of device function with the number
+ * of requested irqs upon its software driver call to request for
+ * MSI-X mode enabled on its hardware device function. A return of zero
+ * indicates the successful configuration of MSI-X capability structure
+ * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
+ * Or a return of > 0 indicates that driver request is exceeding the number
+ * of irqs or MSI-X vectors available. Driver should use the returned value to
+ * re-send its request.
+ **/
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
+{
+	return __pci_enable_msix(dev, entries, nvec, false);
 }
 EXPORT_SYMBOL(pci_enable_msix);
 
@@ -1041,6 +1059,7 @@
 static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
 		unsigned int flags)
 {
+	bool affinity = flags & PCI_IRQ_AFFINITY;
 	int nvec;
 	int rc;
 
@@ -1069,19 +1088,17 @@
 		nvec = maxvec;
 
 	for (;;) {
-		if (!(flags & PCI_IRQ_NOAFFINITY)) {
-			dev->irq_affinity = irq_create_affinity_mask(&nvec);
+		if (affinity) {
+			nvec = irq_calc_affinity_vectors(dev->irq_affinity,
+					nvec);
 			if (nvec < minvec)
 				return -ENOSPC;
 		}
 
-		rc = msi_capability_init(dev, nvec);
+		rc = msi_capability_init(dev, nvec, affinity);
 		if (rc == 0)
 			return nvec;
 
-		kfree(dev->irq_affinity);
-		dev->irq_affinity = NULL;
-
 		if (rc < 0)
 			return rc;
 		if (rc < minvec)
@@ -1105,7 +1122,7 @@
  **/
 int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
 {
-	return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY);
+	return __pci_enable_msi_range(dev, minvec, maxvec, 0);
 }
 EXPORT_SYMBOL(pci_enable_msi_range);
 
@@ -1113,26 +1130,24 @@
 		struct msix_entry *entries, int minvec, int maxvec,
 		unsigned int flags)
 {
-	int nvec = maxvec;
-	int rc;
+	bool affinity = flags & PCI_IRQ_AFFINITY;
+	int rc, nvec = maxvec;
 
 	if (maxvec < minvec)
 		return -ERANGE;
 
 	for (;;) {
-		if (!(flags & PCI_IRQ_NOAFFINITY)) {
-			dev->irq_affinity = irq_create_affinity_mask(&nvec);
+		if (affinity) {
+			nvec = irq_calc_affinity_vectors(dev->irq_affinity,
+					nvec);
 			if (nvec < minvec)
 				return -ENOSPC;
 		}
 
-		rc = pci_enable_msix(dev, entries, nvec);
+		rc = __pci_enable_msix(dev, entries, nvec, affinity);
 		if (rc == 0)
 			return nvec;
 
-		kfree(dev->irq_affinity);
-		dev->irq_affinity = NULL;
-
 		if (rc < 0)
 			return rc;
 		if (rc < minvec)
@@ -1160,8 +1175,7 @@
 int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 		int minvec, int maxvec)
 {
-	return __pci_enable_msix_range(dev, entries, minvec, maxvec,
-			PCI_IRQ_NOAFFINITY);
+	return __pci_enable_msix_range(dev, entries, minvec, maxvec, 0);
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
 
@@ -1187,22 +1201,25 @@
 {
 	int vecs = -ENOSPC;
 
-	if (!(flags & PCI_IRQ_NOMSIX)) {
+	if (flags & PCI_IRQ_MSIX) {
 		vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
 				flags);
 		if (vecs > 0)
 			return vecs;
 	}
 
-	if (!(flags & PCI_IRQ_NOMSI)) {
+	if (flags & PCI_IRQ_MSI) {
 		vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
 		if (vecs > 0)
 			return vecs;
 	}
 
 	/* use legacy irq if allowed */
-	if (!(flags & PCI_IRQ_NOLEGACY) && min_vecs == 1)
+	if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) {
+		pci_intx(dev, 1);
 		return 1;
+	}
+
 	return vecs;
 }
 EXPORT_SYMBOL(pci_alloc_irq_vectors);
@@ -1254,6 +1271,37 @@
 }
 EXPORT_SYMBOL(pci_irq_vector);
 
+/**
+ * pci_irq_get_affinity - return the affinity of a particular msi vector
+ * @dev:	PCI device to operate on
+ * @nr:		device-relative interrupt vector index (0-based).
+ */
+const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr)
+{
+	if (dev->msix_enabled) {
+		struct msi_desc *entry;
+		int i = 0;
+
+		for_each_pci_msi_entry(entry, dev) {
+			if (i == nr)
+				return entry->affinity;
+			i++;
+		}
+		WARN_ON_ONCE(1);
+		return NULL;
+	} else if (dev->msi_enabled) {
+		struct msi_desc *entry = first_pci_msi_entry(dev);
+
+		if (WARN_ON_ONCE(!entry || nr >= entry->nvec_used))
+			return NULL;
+
+		return &entry->affinity[nr];
+	} else {
+		return cpu_possible_mask;
+	}
+}
+EXPORT_SYMBOL(pci_irq_get_affinity);
+
 struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
 {
 	return to_pci_dev(desc->dev);
@@ -1411,6 +1459,8 @@
 	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
 		pci_msi_domain_update_chip_ops(info);
 
+	info->flags |= MSI_FLAG_ACTIVATE_EARLY;
+
 	domain = msi_create_irq_domain(fwnode, info, parent);
 	if (!domain)
 		return NULL;
@@ -1498,8 +1548,8 @@
 	pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
 
 	of_node = irq_domain_get_of_node(domain);
-	if (of_node)
-		rid = of_msi_map_rid(&pdev->dev, of_node, rid);
+	rid = of_node ? of_msi_map_rid(&pdev->dev, of_node, rid) :
+			iort_msi_map_rid(&pdev->dev, rid);
 
 	return rid;
 }
@@ -1515,9 +1565,13 @@
  */
 struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev)
 {
+	struct irq_domain *dom;
 	u32 rid = 0;
 
 	pci_for_each_dma_alias(pdev, get_msi_id_cb, &rid);
-	return of_msi_map_get_device_domain(&pdev->dev, rid);
+	dom = of_msi_map_get_device_domain(&pdev->dev, rid);
+	if (!dom)
+		dom = iort_get_device_domain(&pdev->dev, rid);
+	return dom;
 }
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --git a/drivers/pci/pci-mid.c b/drivers/pci/pci-mid.c
index c878aa7..55f453d 100644
--- a/drivers/pci/pci-mid.c
+++ b/drivers/pci/pci-mid.c
@@ -60,8 +60,13 @@
 
 #define ICPU(model)	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, }
 
+/*
+ * This table should be in sync with the one in
+ * arch/x86/platform/intel-mid/pwr.c.
+ */
 static const struct x86_cpu_id lpss_cpu_ids[] = {
-	ICPU(INTEL_FAM6_ATOM_MERRIFIELD1),
+	ICPU(INTEL_FAM6_ATOM_PENWELL),
+	ICPU(INTEL_FAM6_ATOM_MERRIFIELD),
 	{}
 };
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index aab9d51..415956c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -480,6 +480,30 @@
 EXPORT_SYMBOL(pci_find_parent_resource);
 
 /**
+ * pci_find_resource - Return matching PCI device resource
+ * @dev: PCI device to query
+ * @res: Resource to look for
+ *
+ * Goes over standard PCI resources (BARs) and checks if the given resource
+ * is partially or fully contained in any of them. In that case the
+ * matching resource is returned, %NULL otherwise.
+ */
+struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res)
+{
+	int i;
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+		struct resource *r = &dev->resource[i];
+
+		if (r->start && resource_contains(r, res))
+			return r;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(pci_find_resource);
+
+/**
  * pci_find_pcie_root_port - return PCIe Root Port
  * @dev: PCI device to query
  *
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 37ff015..44e0ff3 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3327,9 +3327,9 @@
 	if (nhi->vendor != PCI_VENDOR_ID_INTEL
 		    || (nhi->device != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE &&
 			nhi->device != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C &&
+			nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI &&
 			nhi->device != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI)
-		    || nhi->subsystem_vendor != 0x2222
-		    || nhi->subsystem_device != 0x1111)
+		    || nhi->class != PCI_CLASS_SYSTEM_OTHER << 8)
 		goto out;
 	dev_info(&dev->dev, "quirk: waiting for thunderbolt to reestablish PCI tunnels...\n");
 	device_pm_wait_for_dev(&dev->dev, &nhi->dev);
@@ -3344,6 +3344,9 @@
 			       PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C,
 			       quirk_apple_wait_for_thunderbolt);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
+			       PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE,
+			       quirk_apple_wait_for_thunderbolt);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,
 			       PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE,
 			       quirk_apple_wait_for_thunderbolt);
 #endif
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index d1ef7ac..f9357e0 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -40,6 +40,7 @@
 	list_del(&dev->bus_list);
 	up_write(&pci_bus_sem);
 
+	pci_bridge_d3_device_removed(dev);
 	pci_free_resources(dev);
 	put_device(&dev->dev);
 }
@@ -96,8 +97,6 @@
 		dev->subordinate = NULL;
 	}
 
-	pci_bridge_d3_device_removed(dev);
-
 	pci_destroy_dev(dev);
 }
 
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index c74059e..f30ca75 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -25,6 +25,7 @@
 #include <linux/ioport.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
+#include <linux/acpi.h>
 #include "pci.h"
 
 unsigned int pci_flags;
@@ -1852,8 +1853,13 @@
 {
 	struct pci_bus *root_bus;
 
-	list_for_each_entry(root_bus, &pci_root_buses, node)
+	list_for_each_entry(root_bus, &pci_root_buses, node) {
 		pci_assign_unassigned_root_bus_resources(root_bus);
+
+		/* Make sure the root bridge has a companion ACPI device: */
+		if (ACPI_HANDLE(root_bus->bridge))
+			acpi_ioapic_add(ACPI_HANDLE(root_bus->bridge));
+	}
 }
 
 void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge)
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 489ea10..69b5e81 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -977,7 +977,7 @@
 
 /************************ runtime PM support ***************************/
 
-static int pcmcia_dev_suspend(struct device *dev, pm_message_t state);
+static int pcmcia_dev_suspend(struct device *dev);
 static int pcmcia_dev_resume(struct device *dev);
 
 static int runtime_suspend(struct device *dev)
@@ -985,7 +985,7 @@
 	int rc;
 
 	device_lock(dev);
-	rc = pcmcia_dev_suspend(dev, PMSG_SUSPEND);
+	rc = pcmcia_dev_suspend(dev);
 	device_unlock(dev);
 	return rc;
 }
@@ -1135,7 +1135,7 @@
 
 /* PM support, also needed for reset */
 
-static int pcmcia_dev_suspend(struct device *dev, pm_message_t state)
+static int pcmcia_dev_suspend(struct device *dev)
 {
 	struct pcmcia_device *p_dev = to_pcmcia_dev(dev);
 	struct pcmcia_driver *p_drv = NULL;
@@ -1410,6 +1410,9 @@
 	.remove_dev = &pcmcia_bus_remove_socket,
 };
 
+static const struct dev_pm_ops pcmcia_bus_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(pcmcia_dev_suspend, pcmcia_dev_resume)
+};
 
 struct bus_type pcmcia_bus_type = {
 	.name = "pcmcia",
@@ -1418,8 +1421,7 @@
 	.dev_groups = pcmcia_dev_groups,
 	.probe = pcmcia_device_probe,
 	.remove = pcmcia_device_remove,
-	.suspend = pcmcia_dev_suspend,
-	.resume = pcmcia_dev_resume,
+	.pm = &pcmcia_bus_pm_ops,
 };
 
 
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index 483f919..91b5f57 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -214,9 +214,8 @@
 }
 #endif
 
-void pxa2xx_configure_sockets(struct device *dev)
+void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops)
 {
-	struct pcmcia_low_level *ops = dev->platform_data;
 	/*
 	 * We have at least one socket, so set MECR:CIT
 	 * (Card Is There)
@@ -322,7 +321,7 @@
 			goto err1;
 	}
 
-	pxa2xx_configure_sockets(&dev->dev);
+	pxa2xx_configure_sockets(&dev->dev, ops);
 	dev_set_drvdata(&dev->dev, sinfo);
 
 	return 0;
@@ -348,7 +347,9 @@
 
 static int pxa2xx_drv_pcmcia_resume(struct device *dev)
 {
-	pxa2xx_configure_sockets(dev);
+	struct pcmcia_low_level *ops = (struct pcmcia_low_level *)dev->platform_data;
+
+	pxa2xx_configure_sockets(dev, ops);
 	return 0;
 }
 
diff --git a/drivers/pcmcia/pxa2xx_base.h b/drivers/pcmcia/pxa2xx_base.h
index b609b45..e58c7a4 100644
--- a/drivers/pcmcia/pxa2xx_base.h
+++ b/drivers/pcmcia/pxa2xx_base.h
@@ -1,4 +1,4 @@
 int pxa2xx_drv_pcmcia_add_one(struct soc_pcmcia_socket *skt);
 void pxa2xx_drv_pcmcia_ops(struct pcmcia_low_level *ops);
-void pxa2xx_configure_sockets(struct device *dev);
+void pxa2xx_configure_sockets(struct device *dev, struct pcmcia_low_level *ops);
 
diff --git a/drivers/pcmcia/sa1111_badge4.c b/drivers/pcmcia/sa1111_badge4.c
index 12f0dd0..2f49093 100644
--- a/drivers/pcmcia/sa1111_badge4.c
+++ b/drivers/pcmcia/sa1111_badge4.c
@@ -134,20 +134,14 @@
 
 int pcmcia_badge4_init(struct sa1111_dev *dev)
 {
-	int ret = -ENODEV;
+	printk(KERN_INFO
+	       "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n",
+	       __func__,
+	       badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc);
 
-	if (machine_is_badge4()) {
-		printk(KERN_INFO
-		       "%s: badge4_pcmvcc=%d, badge4_pcmvpp=%d, badge4_cfvcc=%d\n",
-		       __func__,
-		       badge4_pcmvcc, badge4_pcmvpp, badge4_cfvcc);
-
-		sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops);
-		ret = sa1111_pcmcia_add(dev, &badge4_pcmcia_ops,
-				sa11xx_drv_pcmcia_add_one);
-	}
-
-	return ret;
+	sa11xx_drv_pcmcia_ops(&badge4_pcmcia_ops);
+	return sa1111_pcmcia_add(dev, &badge4_pcmcia_ops,
+				 sa11xx_drv_pcmcia_add_one);
 }
 
 static int __init pcmv_setup(char *s)
diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c
index a1531fe..3d95dff 100644
--- a/drivers/pcmcia/sa1111_generic.c
+++ b/drivers/pcmcia/sa1111_generic.c
@@ -18,6 +18,7 @@
 
 #include <mach/hardware.h>
 #include <asm/hardware/sa1111.h>
+#include <asm/mach-types.h>
 #include <asm/irq.h>
 
 #include "sa1111_generic.h"
@@ -203,19 +204,30 @@
 	sa1111_writel(PCSSR_S0_SLEEP | PCSSR_S1_SLEEP, base + PCSSR);
 	sa1111_writel(PCCR_S0_FLT | PCCR_S1_FLT, base + PCCR);
 
+	ret = -ENODEV;
 #ifdef CONFIG_SA1100_BADGE4
-	pcmcia_badge4_init(dev);
+	if (machine_is_badge4())
+		ret = pcmcia_badge4_init(dev);
 #endif
 #ifdef CONFIG_SA1100_JORNADA720
-	pcmcia_jornada720_init(dev);
+	if (machine_is_jornada720())
+		ret = pcmcia_jornada720_init(dev);
 #endif
 #ifdef CONFIG_ARCH_LUBBOCK
-	pcmcia_lubbock_init(dev);
+	if (machine_is_lubbock())
+		ret = pcmcia_lubbock_init(dev);
 #endif
 #ifdef CONFIG_ASSABET_NEPONSET
-	pcmcia_neponset_init(dev);
+	if (machine_is_assabet())
+		ret = pcmcia_neponset_init(dev);
 #endif
-	return 0;
+
+	if (ret) {
+		release_mem_region(dev->res.start, 512);
+		sa1111_disable_device(dev);
+	}
+
+	return ret;
 }
 
 static int pcmcia_remove(struct sa1111_dev *dev)
diff --git a/drivers/pcmcia/sa1111_jornada720.c b/drivers/pcmcia/sa1111_jornada720.c
index c2c3058..480a3ed 100644
--- a/drivers/pcmcia/sa1111_jornada720.c
+++ b/drivers/pcmcia/sa1111_jornada720.c
@@ -94,22 +94,17 @@
 
 int pcmcia_jornada720_init(struct sa1111_dev *sadev)
 {
-	int ret = -ENODEV;
+	unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3;
 
-	if (machine_is_jornada720()) {
-		unsigned int pin = GPIO_A0 | GPIO_A1 | GPIO_A2 | GPIO_A3;
+	/* Fixme: why messing around with SA11x0's GPIO1? */
+	GRER |= 0x00000002;
 
-		GRER |= 0x00000002;
+	/* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */
+	sa1111_set_io_dir(sadev, pin, 0, 0);
+	sa1111_set_io(sadev, pin, 0);
+	sa1111_set_sleep_io(sadev, pin, 0);
 
-		/* Set GPIO_A<3:1> to be outputs for PCMCIA/CF power controller: */
-		sa1111_set_io_dir(sadev, pin, 0, 0);
-		sa1111_set_io(sadev, pin, 0);
-		sa1111_set_sleep_io(sadev, pin, 0);
-
-		sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops);
-		ret = sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops,
-				sa11xx_drv_pcmcia_add_one);
-	}
-
-	return ret;
+	sa11xx_drv_pcmcia_ops(&jornada720_pcmcia_ops);
+	return sa1111_pcmcia_add(sadev, &jornada720_pcmcia_ops,
+				 sa11xx_drv_pcmcia_add_one);
 }
diff --git a/drivers/pcmcia/sa1111_lubbock.c b/drivers/pcmcia/sa1111_lubbock.c
index c5caf57..e741f49 100644
--- a/drivers/pcmcia/sa1111_lubbock.c
+++ b/drivers/pcmcia/sa1111_lubbock.c
@@ -210,27 +210,21 @@
 
 int pcmcia_lubbock_init(struct sa1111_dev *sadev)
 {
-	int ret = -ENODEV;
+	/*
+	 * Set GPIO_A<3:0> to be outputs for the MAX1600,
+	 * and switch to standby mode.
+	 */
+	sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
+	sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
+	sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
 
-	if (machine_is_lubbock()) {
-		/*
-		 * Set GPIO_A<3:0> to be outputs for the MAX1600,
-		 * and switch to standby mode.
-		 */
-		sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
-		sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-		sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
+	/* Set CF Socket 1 power to standby mode. */
+	lubbock_set_misc_wr((1 << 15) | (1 << 14), 0);
 
-		/* Set CF Socket 1 power to standby mode. */
-		lubbock_set_misc_wr((1 << 15) | (1 << 14), 0);
-
-		pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops);
-		pxa2xx_configure_sockets(&sadev->dev);
-		ret = sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops,
-				pxa2xx_drv_pcmcia_add_one);
-	}
-
-	return ret;
+	pxa2xx_drv_pcmcia_ops(&lubbock_pcmcia_ops);
+	pxa2xx_configure_sockets(&sadev->dev, &lubbock_pcmcia_ops);
+	return sa1111_pcmcia_add(sadev, &lubbock_pcmcia_ops,
+				 pxa2xx_drv_pcmcia_add_one);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/pcmcia/sa1111_neponset.c b/drivers/pcmcia/sa1111_neponset.c
index 1d78739..019c395 100644
--- a/drivers/pcmcia/sa1111_neponset.c
+++ b/drivers/pcmcia/sa1111_neponset.c
@@ -110,20 +110,14 @@
 
 int pcmcia_neponset_init(struct sa1111_dev *sadev)
 {
-	int ret = -ENODEV;
-
-	if (machine_is_assabet()) {
-		/*
-		 * Set GPIO_A<3:0> to be outputs for the MAX1600,
-		 * and switch to standby mode.
-		 */
-		sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
-		sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-		sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
-		sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops);
-		ret = sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops,
-				sa11xx_drv_pcmcia_add_one);
-	}
-
-	return ret;
+	/*
+	 * Set GPIO_A<3:0> to be outputs for the MAX1600,
+	 * and switch to standby mode.
+	 */
+	sa1111_set_io_dir(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0, 0);
+	sa1111_set_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
+	sa1111_set_sleep_io(sadev, GPIO_A0|GPIO_A1|GPIO_A2|GPIO_A3, 0);
+	sa11xx_drv_pcmcia_ops(&neponset_pcmcia_ops);
+	return sa1111_pcmcia_add(sadev, &neponset_pcmcia_ops,
+				 sa11xx_drv_pcmcia_add_one);
 }
diff --git a/drivers/pcmcia/sa11xx_base.c b/drivers/pcmcia/sa11xx_base.c
index 9f6ec87..48140ac 100644
--- a/drivers/pcmcia/sa11xx_base.c
+++ b/drivers/pcmcia/sa11xx_base.c
@@ -144,19 +144,19 @@
 sa1100_pcmcia_show_timing(struct soc_pcmcia_socket *skt, char *buf)
 {
 	struct soc_pcmcia_timing timing;
-	unsigned int clock = clk_get_rate(skt->clk);
+	unsigned int clock = clk_get_rate(skt->clk) / 1000;
 	unsigned long mecr = MECR;
 	char *p = buf;
 
 	soc_common_pcmcia_get_timing(skt, &timing);
 
-	p+=sprintf(p, "I/O      : %u (%u)\n", timing.io,
+	p+=sprintf(p, "I/O      : %uns (%uns)\n", timing.io,
 		   sa1100_pcmcia_cmd_time(clock, MECR_BSIO_GET(mecr, skt->nr)));
 
-	p+=sprintf(p, "attribute: %u (%u)\n", timing.attr,
+	p+=sprintf(p, "attribute: %uns (%uns)\n", timing.attr,
 		   sa1100_pcmcia_cmd_time(clock, MECR_BSA_GET(mecr, skt->nr)));
 
-	p+=sprintf(p, "common   : %u (%u)\n", timing.mem,
+	p+=sprintf(p, "common   : %uns (%uns)\n", timing.mem,
 		   sa1100_pcmcia_cmd_time(clock, MECR_BSM_GET(mecr, skt->nr)));
 
 	return p - buf;
diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c
index eed5e9c..d5ca760 100644
--- a/drivers/pcmcia/soc_common.c
+++ b/drivers/pcmcia/soc_common.c
@@ -235,7 +235,7 @@
 	stat |= skt->cs_state.Vcc ? SS_POWERON : 0;
 
 	if (skt->cs_state.flags & SS_IOCARD)
-		stat |= state.bvd1 ? SS_STSCHG : 0;
+		stat |= state.bvd1 ? 0 : SS_STSCHG;
 	else {
 		if (state.bvd1 == 0)
 			stat |= SS_BATDEAD;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 6ccb994..b37b572 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -534,6 +534,24 @@
 	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
 }
 
+static ssize_t armpmu_cpumask_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct arm_pmu *armpmu = to_arm_pmu(dev_get_drvdata(dev));
+	return cpumap_print_to_pagebuf(true, buf, &armpmu->supported_cpus);
+}
+
+static DEVICE_ATTR(cpus, S_IRUGO, armpmu_cpumask_show, NULL);
+
+static struct attribute *armpmu_common_attrs[] = {
+	&dev_attr_cpus.attr,
+	NULL,
+};
+
+static struct attribute_group armpmu_common_attr_group = {
+	.attrs = armpmu_common_attrs,
+};
+
 static void armpmu_init(struct arm_pmu *armpmu)
 {
 	atomic_set(&armpmu->active_events, 0);
@@ -549,7 +567,10 @@
 		.stop		= armpmu_stop,
 		.read		= armpmu_read,
 		.filter_match	= armpmu_filter_match,
+		.attr_groups	= armpmu->attr_groups,
 	};
+	armpmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
+		&armpmu_common_attr_group;
 }
 
 /* Set at runtime when we know what CPU type we are. */
@@ -602,7 +623,7 @@
 	irqs = min(pmu_device->num_resources, num_possible_cpus());
 
 	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
+	if (irq > 0 && irq_is_percpu(irq)) {
 		on_each_cpu_mask(&cpu_pmu->supported_cpus,
 				 cpu_pmu_disable_percpu_irq, &irq, 1);
 		free_percpu_irq(irq, &hw_events->percpu_pmu);
@@ -616,7 +637,7 @@
 			if (!cpumask_test_and_clear_cpu(cpu, &cpu_pmu->active_irqs))
 				continue;
 			irq = platform_get_irq(pmu_device, i);
-			if (irq >= 0)
+			if (irq > 0)
 				free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 		}
 	}
@@ -638,7 +659,7 @@
 	}
 
 	irq = platform_get_irq(pmu_device, 0);
-	if (irq >= 0 && irq_is_percpu(irq)) {
+	if (irq > 0 && irq_is_percpu(irq)) {
 		err = request_percpu_irq(irq, handler, "arm-pmu",
 					 &hw_events->percpu_pmu);
 		if (err) {
@@ -688,28 +709,20 @@
 	return 0;
 }
 
-static DEFINE_MUTEX(arm_pmu_mutex);
-static LIST_HEAD(arm_pmu_list);
-
 /*
  * PMU hardware loses all context when a CPU goes offline.
  * When a CPU is hotplugged back in, since some hardware registers are
  * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
  * junk values out of them.
  */
-static int arm_perf_starting_cpu(unsigned int cpu)
+static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
 {
-	struct arm_pmu *pmu;
+	struct arm_pmu *pmu = hlist_entry_safe(node, struct arm_pmu, node);
 
-	mutex_lock(&arm_pmu_mutex);
-	list_for_each_entry(pmu, &arm_pmu_list, entry) {
-
-		if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
-			continue;
-		if (pmu->reset)
-			pmu->reset(pmu);
-	}
-	mutex_unlock(&arm_pmu_mutex);
+	if (!cpumask_test_cpu(cpu, &pmu->supported_cpus))
+		return 0;
+	if (pmu->reset)
+		pmu->reset(pmu);
 	return 0;
 }
 
@@ -821,9 +834,10 @@
 	if (!cpu_hw_events)
 		return -ENOMEM;
 
-	mutex_lock(&arm_pmu_mutex);
-	list_add_tail(&cpu_pmu->entry, &arm_pmu_list);
-	mutex_unlock(&arm_pmu_mutex);
+	err = cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+					       &cpu_pmu->node);
+	if (err)
+		goto out_free;
 
 	err = cpu_pm_pmu_register(cpu_pmu);
 	if (err)
@@ -859,9 +873,9 @@
 	return 0;
 
 out_unregister:
-	mutex_lock(&arm_pmu_mutex);
-	list_del(&cpu_pmu->entry);
-	mutex_unlock(&arm_pmu_mutex);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+					    &cpu_pmu->node);
+out_free:
 	free_percpu(cpu_hw_events);
 	return err;
 }
@@ -869,9 +883,8 @@
 static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
 {
 	cpu_pm_pmu_unregister(cpu_pmu);
-	mutex_lock(&arm_pmu_mutex);
-	list_del(&cpu_pmu->entry);
-	mutex_unlock(&arm_pmu_mutex);
+	cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_STARTING,
+					    &cpu_pmu->node);
 	free_percpu(cpu_pmu->hw_events);
 }
 
@@ -919,12 +932,13 @@
 
 		/* Check the IRQ type and prohibit a mix of PPIs and SPIs */
 		irq = platform_get_irq(pdev, i);
-		if (irq >= 0) {
+		if (irq > 0) {
 			bool spi = !irq_is_percpu(irq);
 
 			if (i > 0 && spi != using_spi) {
 				pr_err("PPI/SPI IRQ type mismatch for %s!\n",
 					dn->name);
+				of_node_put(dn);
 				kfree(irqs);
 				return -EINVAL;
 			}
@@ -967,11 +981,12 @@
 
 	/* If we didn't manage to parse anything, try the interrupt affinity */
 	if (cpumask_weight(&pmu->supported_cpus) == 0) {
-		if (!using_spi) {
-			/* If using PPIs, check the affinity of the partition */
-			int ret, irq;
+		int irq = platform_get_irq(pdev, 0);
 
-			irq = platform_get_irq(pdev, 0);
+		if (irq > 0 && irq_is_percpu(irq)) {
+			/* If using PPIs, check the affinity of the partition */
+			int ret;
+
 			ret = irq_get_percpu_devid_partition(irq, &pmu->supported_cpus);
 			if (ret) {
 				kfree(irqs);
@@ -1027,7 +1042,7 @@
 		ret = of_pmu_irq_cfg(pmu);
 		if (!ret)
 			ret = init_fn(pmu);
-	} else {
+	} else if (probe_table) {
 		cpumask_setall(&pmu->supported_cpus);
 		ret = probe_current_pmu(pmu, probe_table);
 	}
@@ -1037,6 +1052,7 @@
 		goto out_free;
 	}
 
+
 	ret = cpu_pmu_init(pmu);
 	if (ret)
 		goto out_free;
@@ -1067,9 +1083,9 @@
 {
 	int ret;
 
-	ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_STARTING,
-					"AP_PERF_ARM_STARTING",
-					arm_perf_starting_cpu, NULL);
+	ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_STARTING,
+				      "AP_PERF_ARM_STARTING",
+				      arm_perf_starting_cpu, NULL);
 	if (ret)
 		pr_err("CPU hotplug notifier for ARM PMU could not be registered: %d\n",
 		       ret);
diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 19bff3a..fe00f91 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -24,6 +24,15 @@
 	  Enable this to support Broadcom USB 2.0 PHY connected to the USB
 	  controller on Northstar family.
 
+config PHY_BCM_NS_USB3
+	tristate "Broadcom Northstar USB 3.0 PHY Driver"
+	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	depends on HAS_IOMEM && OF
+	select GENERIC_PHY
+	help
+	  Enable this to support Broadcom USB 3.0 PHY connected to the USB
+	  controller on Northstar family.
+
 config PHY_BERLIN_USB
 	tristate "Marvell Berlin USB PHY Driver"
 	depends on ARCH_BERLIN && RESET_CONTROLLER && HAS_IOMEM && OF
@@ -258,7 +267,9 @@
 	depends on RESET_CONTROLLER
 	depends on EXTCON
 	depends on POWER_SUPPLY
+	depends on USB_SUPPORT
 	select GENERIC_PHY
+	select USB_COMMON
 	help
 	  Enable this to support the transceiver that is part of Allwinner
 	  sunxi SoCs.
@@ -358,6 +369,14 @@
 	help
 	  Enable this to support the Rockchip USB 2.0 PHY.
 
+config PHY_ROCKCHIP_INNO_USB2
+	tristate "Rockchip INNO USB2PHY Driver"
+	depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF
+	depends on COMMON_CLK
+	select GENERIC_PHY
+	help
+	  Support for Rockchip USB2.0 PHY with Innosilicon IP block.
+
 config PHY_ROCKCHIP_EMMC
 	tristate "Rockchip EMMC PHY Driver"
 	depends on ARCH_ROCKCHIP && OF
@@ -372,6 +391,23 @@
 	help
 	  Enable this to support the Rockchip Display Port PHY.
 
+config PHY_ROCKCHIP_PCIE
+	tristate "Rockchip PCIe PHY Driver"
+	depends on (ARCH_ROCKCHIP && OF) || COMPILE_TEST
+	select GENERIC_PHY
+	select MFD_SYSCON
+	help
+	  Enable this to support the Rockchip PCIe PHY.
+
+config PHY_ROCKCHIP_TYPEC
+	tristate "Rockchip TYPEC PHY Driver"
+	depends on OF && (ARCH_ROCKCHIP || COMPILE_TEST)
+	select EXTCON
+	select GENERIC_PHY
+	select RESET_CONTROLLER
+	help
+	  Enable this to support the Rockchip USB TYPEC PHY.
+
 config PHY_ST_SPEAR1310_MIPHY
 	tristate "ST SPEAR1310-MIPHY driver"
 	select GENERIC_PHY
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index 90ae198..a534cf5 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_GENERIC_PHY)		+= phy-core.o
 obj-$(CONFIG_PHY_BCM_NS_USB2)		+= phy-bcm-ns-usb2.o
+obj-$(CONFIG_PHY_BCM_NS_USB3)		+= phy-bcm-ns-usb3.o
 obj-$(CONFIG_PHY_BERLIN_USB)		+= phy-berlin-usb.o
 obj-$(CONFIG_PHY_BERLIN_SATA)		+= phy-berlin-sata.o
 obj-$(CONFIG_PHY_DA8XX_USB)		+= phy-da8xx-usb.o
@@ -39,8 +40,11 @@
 obj-$(CONFIG_PHY_EXYNOS5_USBDRD)	+= phy-exynos5-usbdrd.o
 obj-$(CONFIG_PHY_QCOM_APQ8064_SATA)	+= phy-qcom-apq8064-sata.o
 obj-$(CONFIG_PHY_ROCKCHIP_USB) += phy-rockchip-usb.o
+obj-$(CONFIG_PHY_ROCKCHIP_INNO_USB2)	+= phy-rockchip-inno-usb2.o
 obj-$(CONFIG_PHY_ROCKCHIP_EMMC) += phy-rockchip-emmc.o
+obj-$(CONFIG_PHY_ROCKCHIP_PCIE) += phy-rockchip-pcie.o
 obj-$(CONFIG_PHY_ROCKCHIP_DP)		+= phy-rockchip-dp.o
+obj-$(CONFIG_PHY_ROCKCHIP_TYPEC) += phy-rockchip-typec.o
 obj-$(CONFIG_PHY_QCOM_IPQ806X_SATA)	+= phy-qcom-ipq806x-sata.o
 obj-$(CONFIG_PHY_ST_SPEAR1310_MIPHY)	+= phy-spear1310-miphy.o
 obj-$(CONFIG_PHY_ST_SPEAR1340_MIPHY)	+= phy-spear1340-miphy.o
diff --git a/drivers/phy/phy-bcm-ns-usb3.c b/drivers/phy/phy-bcm-ns-usb3.c
new file mode 100644
index 0000000..f420fa4
--- /dev/null
+++ b/drivers/phy/phy-bcm-ns-usb3.c
@@ -0,0 +1,274 @@
+/*
+ * Broadcom Northstar USB 3.0 PHY Driver
+ *
+ * Copyright (C) 2016 Rafał Miłecki <rafal@milecki.pl>
+ *
+ * All magic values used for initialization (and related comments) were obtained
+ * from Broadcom's SDK:
+ * Copyright (c) Broadcom Corp, 2012
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bcma/bcma.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/phy/phy.h>
+#include <linux/slab.h>
+
+#define BCM_NS_USB3_MII_MNG_TIMEOUT_US	1000	/* usecs */
+
+enum bcm_ns_family {
+	BCM_NS_UNKNOWN,
+	BCM_NS_AX,
+	BCM_NS_BX,
+};
+
+struct bcm_ns_usb3 {
+	struct device *dev;
+	enum bcm_ns_family family;
+	void __iomem *dmp;
+	void __iomem *ccb_mii;
+	struct phy *phy;
+};
+
+static const struct of_device_id bcm_ns_usb3_id_table[] = {
+	{
+		.compatible = "brcm,ns-ax-usb3-phy",
+		.data = (int *)BCM_NS_AX,
+	},
+	{
+		.compatible = "brcm,ns-bx-usb3-phy",
+		.data = (int *)BCM_NS_BX,
+	},
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcm_ns_usb3_id_table);
+
+static int bcm_ns_usb3_wait_reg(struct bcm_ns_usb3 *usb3, void __iomem *addr,
+				u32 mask, u32 value, unsigned long timeout)
+{
+	unsigned long deadline = jiffies + timeout;
+	u32 val;
+
+	do {
+		val = readl(addr);
+		if ((val & mask) == value)
+			return 0;
+		cpu_relax();
+		udelay(10);
+	} while (!time_after_eq(jiffies, deadline));
+
+	dev_err(usb3->dev, "Timeout waiting for register %p\n", addr);
+
+	return -EBUSY;
+}
+
+static inline int bcm_ns_usb3_mii_mng_wait_idle(struct bcm_ns_usb3 *usb3)
+{
+	return bcm_ns_usb3_wait_reg(usb3, usb3->ccb_mii + BCMA_CCB_MII_MNG_CTL,
+				    0x0100, 0x0000,
+				    usecs_to_jiffies(BCM_NS_USB3_MII_MNG_TIMEOUT_US));
+}
+
+static int bcm_ns_usb3_mii_mng_write32(struct bcm_ns_usb3 *usb3, u32 value)
+{
+	int err;
+
+	err = bcm_ns_usb3_mii_mng_wait_idle(usb3);
+	if (err < 0) {
+		dev_err(usb3->dev, "Couldn't write 0x%08x value\n", value);
+		return err;
+	}
+
+	writel(value, usb3->ccb_mii + BCMA_CCB_MII_MNG_CMD_DATA);
+
+	return 0;
+}
+
+static int bcm_ns_usb3_phy_init_ns_bx(struct bcm_ns_usb3 *usb3)
+{
+	int err;
+
+	/* Enable MDIO. Setting MDCDIV as 26  */
+	writel(0x0000009a, usb3->ccb_mii + BCMA_CCB_MII_MNG_CTL);
+
+	/* Wait for MDIO? */
+	udelay(2);
+
+	/* USB3 PLL Block */
+	err = bcm_ns_usb3_mii_mng_write32(usb3, 0x587e8000);
+	if (err < 0)
+		return err;
+
+	/* Assert Ana_Pllseq start */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x58061000);
+
+	/* Assert CML Divider ratio to 26 */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x582a6400);
+
+	/* Asserting PLL Reset */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x582ec000);
+
+	/* Deaaserting PLL Reset */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x582e8000);
+
+	/* Waiting MII Mgt interface idle */
+	bcm_ns_usb3_mii_mng_wait_idle(usb3);
+
+	/* Deasserting USB3 system reset */
+	writel(0, usb3->dmp + BCMA_RESET_CTL);
+
+	/* PLL frequency monitor enable */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x58069000);
+
+	/* PIPE Block */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x587e8060);
+
+	/* CMPMAX & CMPMINTH setting */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x580af30d);
+
+	/* DEGLITCH MIN & MAX setting */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x580e6302);
+
+	/* TXPMD block */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x587e8040);
+
+	/* Enabling SSC */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x58061003);
+
+	/* Waiting MII Mgt interface idle */
+	bcm_ns_usb3_mii_mng_wait_idle(usb3);
+
+	return 0;
+}
+
+static int bcm_ns_usb3_phy_init_ns_ax(struct bcm_ns_usb3 *usb3)
+{
+	int err;
+
+	/* Enable MDIO. Setting MDCDIV as 26  */
+	writel(0x0000009a, usb3->ccb_mii + BCMA_CCB_MII_MNG_CTL);
+
+	/* Wait for MDIO? */
+	udelay(2);
+
+	/* PLL30 block */
+	err = bcm_ns_usb3_mii_mng_write32(usb3, 0x587e8000);
+	if (err < 0)
+		return err;
+
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x582a6400);
+
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x587e80e0);
+
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x580a009c);
+
+	/* Enable SSC */
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x587e8040);
+
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x580a21d3);
+
+	bcm_ns_usb3_mii_mng_write32(usb3, 0x58061003);
+
+	/* Waiting MII Mgt interface idle */
+	bcm_ns_usb3_mii_mng_wait_idle(usb3);
+
+	/* Deasserting USB3 system reset */
+	writel(0, usb3->dmp + BCMA_RESET_CTL);
+
+	return 0;
+}
+
+static int bcm_ns_usb3_phy_init(struct phy *phy)
+{
+	struct bcm_ns_usb3 *usb3 = phy_get_drvdata(phy);
+	int err;
+
+	/* Perform USB3 system soft reset */
+	writel(BCMA_RESET_CTL_RESET, usb3->dmp + BCMA_RESET_CTL);
+
+	switch (usb3->family) {
+	case BCM_NS_AX:
+		err = bcm_ns_usb3_phy_init_ns_ax(usb3);
+		break;
+	case BCM_NS_BX:
+		err = bcm_ns_usb3_phy_init_ns_bx(usb3);
+		break;
+	default:
+		WARN_ON(1);
+		err = -ENOTSUPP;
+	}
+
+	return err;
+}
+
+static const struct phy_ops ops = {
+	.init		= bcm_ns_usb3_phy_init,
+	.owner		= THIS_MODULE,
+};
+
+static int bcm_ns_usb3_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *of_id;
+	struct bcm_ns_usb3 *usb3;
+	struct resource *res;
+	struct phy_provider *phy_provider;
+
+	usb3 = devm_kzalloc(dev, sizeof(*usb3), GFP_KERNEL);
+	if (!usb3)
+		return -ENOMEM;
+
+	usb3->dev = dev;
+
+	of_id = of_match_device(bcm_ns_usb3_id_table, dev);
+	if (!of_id)
+		return -EINVAL;
+	usb3->family = (enum bcm_ns_family)of_id->data;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dmp");
+	usb3->dmp = devm_ioremap_resource(dev, res);
+	if (IS_ERR(usb3->dmp)) {
+		dev_err(dev, "Failed to map DMP regs\n");
+		return PTR_ERR(usb3->dmp);
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ccb-mii");
+	usb3->ccb_mii = devm_ioremap_resource(dev, res);
+	if (IS_ERR(usb3->ccb_mii)) {
+		dev_err(dev, "Failed to map ChipCommon B MII regs\n");
+		return PTR_ERR(usb3->ccb_mii);
+	}
+
+	usb3->phy = devm_phy_create(dev, NULL, &ops);
+	if (IS_ERR(usb3->phy)) {
+		dev_err(dev, "Failed to create PHY\n");
+		return PTR_ERR(usb3->phy);
+	}
+
+	phy_set_drvdata(usb3->phy, usb3);
+	platform_set_drvdata(pdev, usb3);
+
+	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+	if (!IS_ERR(phy_provider))
+		dev_info(dev, "Registered Broadcom Northstar USB 3.0 PHY driver\n");
+
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static struct platform_driver bcm_ns_usb3_driver = {
+	.probe		= bcm_ns_usb3_probe,
+	.driver = {
+		.name = "bcm_ns_usb3",
+		.of_match_table = bcm_ns_usb3_id_table,
+	},
+};
+module_platform_driver(bcm_ns_usb3_driver);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/phy-bcm-ns2-pcie.c b/drivers/phy/phy-bcm-ns2-pcie.c
index 9513f7a..4c7d11d 100644
--- a/drivers/phy/phy-bcm-ns2-pcie.c
+++ b/drivers/phy/phy-bcm-ns2-pcie.c
@@ -18,11 +18,6 @@
 #include <linux/phy.h>
 #include <linux/phy/phy.h>
 
-struct ns2_pci_phy {
-	struct mdio_device *mdiodev;
-	struct phy *phy;
-};
-
 #define BLK_ADDR_REG_OFFSET	0x1f
 #define PLL_AFE1_100MHZ_BLK	0x2100
 #define PLL_CLK_AMP_OFFSET	0x03
@@ -30,17 +25,17 @@
 
 static int ns2_pci_phy_init(struct phy *p)
 {
-	struct ns2_pci_phy *phy = phy_get_drvdata(p);
+	struct mdio_device *mdiodev = phy_get_drvdata(p);
 	int rc;
 
 	/* select the AFE 100MHz block page */
-	rc = mdiobus_write(phy->mdiodev->bus, phy->mdiodev->addr,
+	rc = mdiobus_write(mdiodev->bus, mdiodev->addr,
 			   BLK_ADDR_REG_OFFSET, PLL_AFE1_100MHZ_BLK);
 	if (rc)
 		goto err;
 
 	/* set the 100 MHz reference clock amplitude to 2.05 v */
-	rc = mdiobus_write(phy->mdiodev->bus, phy->mdiodev->addr,
+	rc = mdiobus_write(mdiodev->bus, mdiodev->addr,
 			   PLL_CLK_AMP_OFFSET, PLL_CLK_AMP_2P05V);
 	if (rc)
 		goto err;
@@ -48,19 +43,19 @@
 	return 0;
 
 err:
-	dev_err(&phy->mdiodev->dev, "Error %d writing to phy\n", rc);
+	dev_err(&mdiodev->dev, "Error %d writing to phy\n", rc);
 	return rc;
 }
 
-static struct phy_ops ns2_pci_phy_ops = {
+static const struct phy_ops ns2_pci_phy_ops = {
 	.init = ns2_pci_phy_init,
+	.owner = THIS_MODULE,
 };
 
 static int ns2_pci_phy_probe(struct mdio_device *mdiodev)
 {
 	struct device *dev = &mdiodev->dev;
 	struct phy_provider *provider;
-	struct ns2_pci_phy *p;
 	struct phy *phy;
 
 	phy = devm_phy_create(dev, dev->of_node, &ns2_pci_phy_ops);
@@ -69,16 +64,7 @@
 		return PTR_ERR(phy);
 	}
 
-	p = devm_kmalloc(dev, sizeof(struct ns2_pci_phy),
-			 GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	p->mdiodev = mdiodev;
-	dev_set_drvdata(dev, p);
-
-	p->phy = phy;
-	phy_set_drvdata(phy, p);
+	phy_set_drvdata(phy, mdiodev);
 
 	provider = devm_of_phy_provider_register(&phy->dev,
 						 of_phy_simple_xlate);
diff --git a/drivers/phy/phy-brcm-sata.c b/drivers/phy/phy-brcm-sata.c
index 18d6626..8ffc44a 100644
--- a/drivers/phy/phy-brcm-sata.c
+++ b/drivers/phy/phy-brcm-sata.c
@@ -367,7 +367,7 @@
 		rc = -ENODEV;
 	};
 
-	return 0;
+	return rc;
 }
 
 static const struct phy_ops phy_ops = {
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 8eca906..a268f4d 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -357,6 +357,21 @@
 }
 EXPORT_SYMBOL_GPL(phy_set_mode);
 
+int phy_reset(struct phy *phy)
+{
+	int ret;
+
+	if (!phy || !phy->ops->reset)
+		return 0;
+
+	mutex_lock(&phy->mutex);
+	ret = phy->ops->reset(phy);
+	mutex_unlock(&phy->mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_reset);
+
 /**
  * _of_phy_get() - lookup and obtain a reference to a phy by phandle
  * @np: device_node for which to get the phy
diff --git a/drivers/phy/phy-da8xx-usb.c b/drivers/phy/phy-da8xx-usb.c
index b2e59b6..32ae78c 100644
--- a/drivers/phy/phy-da8xx-usb.c
+++ b/drivers/phy/phy-da8xx-usb.c
@@ -154,7 +154,7 @@
 		d_phy->regmap = syscon_regmap_lookup_by_compatible(
 							"ti,da830-cfgchip");
 	else
-		d_phy->regmap = syscon_regmap_lookup_by_pdevname("syscon.0");
+		d_phy->regmap = syscon_regmap_lookup_by_pdevname("syscon");
 	if (IS_ERR(d_phy->regmap)) {
 		dev_err(dev, "Failed to get syscon\n");
 		return PTR_ERR(d_phy->regmap);
diff --git a/drivers/phy/phy-exynos5-usbdrd.c b/drivers/phy/phy-exynos5-usbdrd.c
index 20696f5..07ed608 100644
--- a/drivers/phy/phy-exynos5-usbdrd.c
+++ b/drivers/phy/phy-exynos5-usbdrd.c
@@ -249,7 +249,7 @@
 static unsigned int
 exynos5_usbdrd_pipe3_set_refclk(struct phy_usb_instance *inst)
 {
-	static u32 reg;
+	u32 reg;
 	struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
 
 	/* restore any previous reference clock settings */
@@ -295,7 +295,7 @@
 static unsigned int
 exynos5_usbdrd_utmi_set_refclk(struct phy_usb_instance *inst)
 {
-	static u32 reg;
+	u32 reg;
 	struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
 
 	/* restore any previous reference clock settings */
diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c
index c134989..fe909fd 100644
--- a/drivers/phy/phy-omap-usb2.c
+++ b/drivers/phy/phy-omap-usb2.c
@@ -133,11 +133,49 @@
 	return omap_usb_phy_power(phy, true);
 }
 
+static int omap_usb2_disable_clocks(struct omap_usb *phy)
+{
+	clk_disable(phy->wkupclk);
+	if (!IS_ERR(phy->optclk))
+		clk_disable(phy->optclk);
+
+	return 0;
+}
+
+static int omap_usb2_enable_clocks(struct omap_usb *phy)
+{
+	int ret;
+
+	ret = clk_enable(phy->wkupclk);
+	if (ret < 0) {
+		dev_err(phy->dev, "Failed to enable wkupclk %d\n", ret);
+		goto err0;
+	}
+
+	if (!IS_ERR(phy->optclk)) {
+		ret = clk_enable(phy->optclk);
+		if (ret < 0) {
+			dev_err(phy->dev, "Failed to enable optclk %d\n", ret);
+			goto err1;
+		}
+	}
+
+	return 0;
+
+err1:
+	clk_disable(phy->wkupclk);
+
+err0:
+	return ret;
+}
+
 static int omap_usb_init(struct phy *x)
 {
 	struct omap_usb *phy = phy_get_drvdata(x);
 	u32 val;
 
+	omap_usb2_enable_clocks(phy);
+
 	if (phy->flags & OMAP_USB2_CALIBRATE_FALSE_DISCONNECT) {
 		/*
 		 *
@@ -155,8 +193,16 @@
 	return 0;
 }
 
+static int omap_usb_exit(struct phy *x)
+{
+	struct omap_usb *phy = phy_get_drvdata(x);
+
+	return omap_usb2_disable_clocks(phy);
+}
+
 static const struct phy_ops ops = {
 	.init		= omap_usb_init,
+	.exit		= omap_usb_exit,
 	.power_on	= omap_usb_power_on,
 	.power_off	= omap_usb_power_off,
 	.owner		= THIS_MODULE,
@@ -376,65 +422,11 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM
-
-static int omap_usb2_runtime_suspend(struct device *dev)
-{
-	struct platform_device	*pdev = to_platform_device(dev);
-	struct omap_usb	*phy = platform_get_drvdata(pdev);
-
-	clk_disable(phy->wkupclk);
-	if (!IS_ERR(phy->optclk))
-		clk_disable(phy->optclk);
-
-	return 0;
-}
-
-static int omap_usb2_runtime_resume(struct device *dev)
-{
-	struct platform_device	*pdev = to_platform_device(dev);
-	struct omap_usb	*phy = platform_get_drvdata(pdev);
-	int ret;
-
-	ret = clk_enable(phy->wkupclk);
-	if (ret < 0) {
-		dev_err(phy->dev, "Failed to enable wkupclk %d\n", ret);
-		goto err0;
-	}
-
-	if (!IS_ERR(phy->optclk)) {
-		ret = clk_enable(phy->optclk);
-		if (ret < 0) {
-			dev_err(phy->dev, "Failed to enable optclk %d\n", ret);
-			goto err1;
-		}
-	}
-
-	return 0;
-
-err1:
-	clk_disable(phy->wkupclk);
-
-err0:
-	return ret;
-}
-
-static const struct dev_pm_ops omap_usb2_pm_ops = {
-	SET_RUNTIME_PM_OPS(omap_usb2_runtime_suspend, omap_usb2_runtime_resume,
-		NULL)
-};
-
-#define DEV_PM_OPS     (&omap_usb2_pm_ops)
-#else
-#define DEV_PM_OPS     NULL
-#endif
-
 static struct platform_driver omap_usb2_driver = {
 	.probe		= omap_usb2_probe,
 	.remove		= omap_usb2_remove,
 	.driver		= {
 		.name	= "omap-usb2",
-		.pm	= DEV_PM_OPS,
 		.of_match_table = omap_usb2_id_table,
 	},
 };
diff --git a/drivers/phy/phy-qcom-ufs.c b/drivers/phy/phy-qcom-ufs.c
index 107cb57..18a5b49 100644
--- a/drivers/phy/phy-qcom-ufs.c
+++ b/drivers/phy/phy-qcom-ufs.c
@@ -283,10 +283,8 @@
 			err = 0;
 		}
 		snprintf(prop_name, MAX_PROP_NAME, "%s-always-on", name);
-		if (of_get_property(dev->of_node, prop_name, NULL))
-			vreg->is_always_on = true;
-		else
-			vreg->is_always_on = false;
+		vreg->is_always_on = of_property_read_bool(dev->of_node,
+							   prop_name);
 	}
 
 	if (!strcmp(name, "vdda-pll")) {
diff --git a/drivers/phy/phy-rcar-gen3-usb2.c b/drivers/phy/phy-rcar-gen3-usb2.c
index 31156c9..3d97ead 100644
--- a/drivers/phy/phy-rcar-gen3-usb2.c
+++ b/drivers/phy/phy-rcar-gen3-usb2.c
@@ -280,6 +280,7 @@
 
 static const struct of_device_id rcar_gen3_phy_usb2_match_table[] = {
 	{ .compatible = "renesas,usb2-phy-r8a7795" },
+	{ .compatible = "renesas,usb2-phy-r8a7796" },
 	{ .compatible = "renesas,rcar-gen3-usb2-phy" },
 	{ }
 };
diff --git a/drivers/phy/phy-rockchip-inno-usb2.c b/drivers/phy/phy-rockchip-inno-usb2.c
new file mode 100644
index 0000000..ac20310
--- /dev/null
+++ b/drivers/phy/phy-rockchip-inno-usb2.c
@@ -0,0 +1,707 @@
+/*
+ * Rockchip USB2.0 PHY with Innosilicon IP block driver
+ *
+ * Copyright (C) 2016 Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/gpio/consumer.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+
+#define BIT_WRITEABLE_SHIFT	16
+#define SCHEDULE_DELAY	(60 * HZ)
+
+enum rockchip_usb2phy_port_id {
+	USB2PHY_PORT_OTG,
+	USB2PHY_PORT_HOST,
+	USB2PHY_NUM_PORTS,
+};
+
+enum rockchip_usb2phy_host_state {
+	PHY_STATE_HS_ONLINE	= 0,
+	PHY_STATE_DISCONNECT	= 1,
+	PHY_STATE_CONNECT	= 2,
+	PHY_STATE_FS_LS_ONLINE	= 4,
+};
+
+struct usb2phy_reg {
+	unsigned int	offset;
+	unsigned int	bitend;
+	unsigned int	bitstart;
+	unsigned int	disable;
+	unsigned int	enable;
+};
+
+/**
+ * struct rockchip_usb2phy_port_cfg: usb-phy port configuration.
+ * @phy_sus: phy suspend register.
+ * @ls_det_en: linestate detection enable register.
+ * @ls_det_st: linestate detection state register.
+ * @ls_det_clr: linestate detection clear register.
+ * @utmi_ls: utmi linestate state register.
+ * @utmi_hstdet: utmi host disconnect register.
+ */
+struct rockchip_usb2phy_port_cfg {
+	struct usb2phy_reg	phy_sus;
+	struct usb2phy_reg	ls_det_en;
+	struct usb2phy_reg	ls_det_st;
+	struct usb2phy_reg	ls_det_clr;
+	struct usb2phy_reg	utmi_ls;
+	struct usb2phy_reg	utmi_hstdet;
+};
+
+/**
+ * struct rockchip_usb2phy_cfg: usb-phy configuration.
+ * @reg: the address offset of grf for usb-phy config.
+ * @num_ports: specify how many ports that the phy has.
+ * @clkout_ctl: keep on/turn off output clk of phy.
+ */
+struct rockchip_usb2phy_cfg {
+	unsigned int	reg;
+	unsigned int	num_ports;
+	struct usb2phy_reg	clkout_ctl;
+	const struct rockchip_usb2phy_port_cfg	port_cfgs[USB2PHY_NUM_PORTS];
+};
+
+/**
+ * struct rockchip_usb2phy_port: usb-phy port data.
+ * @port_id: flag for otg port or host port.
+ * @suspended: phy suspended flag.
+ * @ls_irq: IRQ number assigned for linestate detection.
+ * @mutex: for register updating in sm_work.
+ * @sm_work: OTG state machine work.
+ * @phy_cfg: port register configuration, assigned by driver data.
+ */
+struct rockchip_usb2phy_port {
+	struct phy	*phy;
+	unsigned int	port_id;
+	bool		suspended;
+	int		ls_irq;
+	struct mutex	mutex;
+	struct		delayed_work sm_work;
+	const struct	rockchip_usb2phy_port_cfg *port_cfg;
+};
+
+/**
+ * struct rockchip_usb2phy: usb2.0 phy driver data.
+ * @grf: General Register Files regmap.
+ * @clk: clock struct of phy input clk.
+ * @clk480m: clock struct of phy output clk.
+ * @clk_hw: clock struct of phy output clk management.
+ * @phy_cfg: phy register configuration, assigned by driver data.
+ * @ports: phy port instance.
+ */
+struct rockchip_usb2phy {
+	struct device	*dev;
+	struct regmap	*grf;
+	struct clk	*clk;
+	struct clk	*clk480m;
+	struct clk_hw	clk480m_hw;
+	const struct rockchip_usb2phy_cfg	*phy_cfg;
+	struct rockchip_usb2phy_port	ports[USB2PHY_NUM_PORTS];
+};
+
+static inline int property_enable(struct rockchip_usb2phy *rphy,
+				  const struct usb2phy_reg *reg, bool en)
+{
+	unsigned int val, mask, tmp;
+
+	tmp = en ? reg->enable : reg->disable;
+	mask = GENMASK(reg->bitend, reg->bitstart);
+	val = (tmp << reg->bitstart) | (mask << BIT_WRITEABLE_SHIFT);
+
+	return regmap_write(rphy->grf, reg->offset, val);
+}
+
+static inline bool property_enabled(struct rockchip_usb2phy *rphy,
+				    const struct usb2phy_reg *reg)
+{
+	int ret;
+	unsigned int tmp, orig;
+	unsigned int mask = GENMASK(reg->bitend, reg->bitstart);
+
+	ret = regmap_read(rphy->grf, reg->offset, &orig);
+	if (ret)
+		return false;
+
+	tmp = (orig & mask) >> reg->bitstart;
+	return tmp == reg->enable;
+}
+
+static int rockchip_usb2phy_clk480m_enable(struct clk_hw *hw)
+{
+	struct rockchip_usb2phy *rphy =
+		container_of(hw, struct rockchip_usb2phy, clk480m_hw);
+	int ret;
+
+	/* turn on 480m clk output if it is off */
+	if (!property_enabled(rphy, &rphy->phy_cfg->clkout_ctl)) {
+		ret = property_enable(rphy, &rphy->phy_cfg->clkout_ctl, true);
+		if (ret)
+			return ret;
+
+		/* waitting for the clk become stable */
+		mdelay(1);
+	}
+
+	return 0;
+}
+
+static void rockchip_usb2phy_clk480m_disable(struct clk_hw *hw)
+{
+	struct rockchip_usb2phy *rphy =
+		container_of(hw, struct rockchip_usb2phy, clk480m_hw);
+
+	/* turn off 480m clk output */
+	property_enable(rphy, &rphy->phy_cfg->clkout_ctl, false);
+}
+
+static int rockchip_usb2phy_clk480m_enabled(struct clk_hw *hw)
+{
+	struct rockchip_usb2phy *rphy =
+		container_of(hw, struct rockchip_usb2phy, clk480m_hw);
+
+	return property_enabled(rphy, &rphy->phy_cfg->clkout_ctl);
+}
+
+static unsigned long
+rockchip_usb2phy_clk480m_recalc_rate(struct clk_hw *hw,
+				     unsigned long parent_rate)
+{
+	return 480000000;
+}
+
+static const struct clk_ops rockchip_usb2phy_clkout_ops = {
+	.enable = rockchip_usb2phy_clk480m_enable,
+	.disable = rockchip_usb2phy_clk480m_disable,
+	.is_enabled = rockchip_usb2phy_clk480m_enabled,
+	.recalc_rate = rockchip_usb2phy_clk480m_recalc_rate,
+};
+
+static void rockchip_usb2phy_clk480m_unregister(void *data)
+{
+	struct rockchip_usb2phy *rphy = data;
+
+	of_clk_del_provider(rphy->dev->of_node);
+	clk_unregister(rphy->clk480m);
+}
+
+static int
+rockchip_usb2phy_clk480m_register(struct rockchip_usb2phy *rphy)
+{
+	struct device_node *node = rphy->dev->of_node;
+	struct clk_init_data init;
+	const char *clk_name;
+	int ret;
+
+	init.flags = 0;
+	init.name = "clk_usbphy_480m";
+	init.ops = &rockchip_usb2phy_clkout_ops;
+
+	/* optional override of the clockname */
+	of_property_read_string(node, "clock-output-names", &init.name);
+
+	if (rphy->clk) {
+		clk_name = __clk_get_name(rphy->clk);
+		init.parent_names = &clk_name;
+		init.num_parents = 1;
+	} else {
+		init.parent_names = NULL;
+		init.num_parents = 0;
+	}
+
+	rphy->clk480m_hw.init = &init;
+
+	/* register the clock */
+	rphy->clk480m = clk_register(rphy->dev, &rphy->clk480m_hw);
+	if (IS_ERR(rphy->clk480m)) {
+		ret = PTR_ERR(rphy->clk480m);
+		goto err_ret;
+	}
+
+	ret = of_clk_add_provider(node, of_clk_src_simple_get, rphy->clk480m);
+	if (ret < 0)
+		goto err_clk_provider;
+
+	ret = devm_add_action(rphy->dev, rockchip_usb2phy_clk480m_unregister,
+			      rphy);
+	if (ret < 0)
+		goto err_unreg_action;
+
+	return 0;
+
+err_unreg_action:
+	of_clk_del_provider(node);
+err_clk_provider:
+	clk_unregister(rphy->clk480m);
+err_ret:
+	return ret;
+}
+
+static int rockchip_usb2phy_init(struct phy *phy)
+{
+	struct rockchip_usb2phy_port *rport = phy_get_drvdata(phy);
+	struct rockchip_usb2phy *rphy = dev_get_drvdata(phy->dev.parent);
+	int ret;
+
+	if (rport->port_id == USB2PHY_PORT_HOST) {
+		/* clear linestate and enable linestate detect irq */
+		mutex_lock(&rport->mutex);
+
+		ret = property_enable(rphy, &rport->port_cfg->ls_det_clr, true);
+		if (ret) {
+			mutex_unlock(&rport->mutex);
+			return ret;
+		}
+
+		ret = property_enable(rphy, &rport->port_cfg->ls_det_en, true);
+		if (ret) {
+			mutex_unlock(&rport->mutex);
+			return ret;
+		}
+
+		mutex_unlock(&rport->mutex);
+		schedule_delayed_work(&rport->sm_work, SCHEDULE_DELAY);
+	}
+
+	return 0;
+}
+
+static int rockchip_usb2phy_power_on(struct phy *phy)
+{
+	struct rockchip_usb2phy_port *rport = phy_get_drvdata(phy);
+	struct rockchip_usb2phy *rphy = dev_get_drvdata(phy->dev.parent);
+	int ret;
+
+	dev_dbg(&rport->phy->dev, "port power on\n");
+
+	if (!rport->suspended)
+		return 0;
+
+	ret = clk_prepare_enable(rphy->clk480m);
+	if (ret)
+		return ret;
+
+	ret = property_enable(rphy, &rport->port_cfg->phy_sus, false);
+	if (ret)
+		return ret;
+
+	rport->suspended = false;
+	return 0;
+}
+
+static int rockchip_usb2phy_power_off(struct phy *phy)
+{
+	struct rockchip_usb2phy_port *rport = phy_get_drvdata(phy);
+	struct rockchip_usb2phy *rphy = dev_get_drvdata(phy->dev.parent);
+	int ret;
+
+	dev_dbg(&rport->phy->dev, "port power off\n");
+
+	if (rport->suspended)
+		return 0;
+
+	ret = property_enable(rphy, &rport->port_cfg->phy_sus, true);
+	if (ret)
+		return ret;
+
+	rport->suspended = true;
+	clk_disable_unprepare(rphy->clk480m);
+
+	return 0;
+}
+
+static int rockchip_usb2phy_exit(struct phy *phy)
+{
+	struct rockchip_usb2phy_port *rport = phy_get_drvdata(phy);
+
+	if (rport->port_id == USB2PHY_PORT_HOST)
+		cancel_delayed_work_sync(&rport->sm_work);
+
+	return 0;
+}
+
+static const struct phy_ops rockchip_usb2phy_ops = {
+	.init		= rockchip_usb2phy_init,
+	.exit		= rockchip_usb2phy_exit,
+	.power_on	= rockchip_usb2phy_power_on,
+	.power_off	= rockchip_usb2phy_power_off,
+	.owner		= THIS_MODULE,
+};
+
+/*
+ * The function manage host-phy port state and suspend/resume phy port
+ * to save power.
+ *
+ * we rely on utmi_linestate and utmi_hostdisconnect to identify whether
+ * devices is disconnect or not. Besides, we do not need care it is FS/LS
+ * disconnected or HS disconnected, actually, we just only need get the
+ * device is disconnected at last through rearm the delayed work,
+ * to suspend the phy port in _PHY_STATE_DISCONNECT_ case.
+ *
+ * NOTE: It may invoke *phy_powr_off or *phy_power_on which will invoke
+ * some clk related APIs, so do not invoke it from interrupt context directly.
+ */
+static void rockchip_usb2phy_sm_work(struct work_struct *work)
+{
+	struct rockchip_usb2phy_port *rport =
+		container_of(work, struct rockchip_usb2phy_port, sm_work.work);
+	struct rockchip_usb2phy *rphy = dev_get_drvdata(rport->phy->dev.parent);
+	unsigned int sh = rport->port_cfg->utmi_hstdet.bitend -
+			  rport->port_cfg->utmi_hstdet.bitstart + 1;
+	unsigned int ul, uhd, state;
+	unsigned int ul_mask, uhd_mask;
+	int ret;
+
+	mutex_lock(&rport->mutex);
+
+	ret = regmap_read(rphy->grf, rport->port_cfg->utmi_ls.offset, &ul);
+	if (ret < 0)
+		goto next_schedule;
+
+	ret = regmap_read(rphy->grf, rport->port_cfg->utmi_hstdet.offset,
+			  &uhd);
+	if (ret < 0)
+		goto next_schedule;
+
+	uhd_mask = GENMASK(rport->port_cfg->utmi_hstdet.bitend,
+			   rport->port_cfg->utmi_hstdet.bitstart);
+	ul_mask = GENMASK(rport->port_cfg->utmi_ls.bitend,
+			  rport->port_cfg->utmi_ls.bitstart);
+
+	/* stitch on utmi_ls and utmi_hstdet as phy state */
+	state = ((uhd & uhd_mask) >> rport->port_cfg->utmi_hstdet.bitstart) |
+		(((ul & ul_mask) >> rport->port_cfg->utmi_ls.bitstart) << sh);
+
+	switch (state) {
+	case PHY_STATE_HS_ONLINE:
+		dev_dbg(&rport->phy->dev, "HS online\n");
+		break;
+	case PHY_STATE_FS_LS_ONLINE:
+		/*
+		 * For FS/LS device, the online state share with connect state
+		 * from utmi_ls and utmi_hstdet register, so we distinguish
+		 * them via suspended flag.
+		 *
+		 * Plus, there are two cases, one is D- Line pull-up, and D+
+		 * line pull-down, the state is 4; another is D+ line pull-up,
+		 * and D- line pull-down, the state is 2.
+		 */
+		if (!rport->suspended) {
+			/* D- line pull-up, D+ line pull-down */
+			dev_dbg(&rport->phy->dev, "FS/LS online\n");
+			break;
+		}
+		/* fall through */
+	case PHY_STATE_CONNECT:
+		if (rport->suspended) {
+			dev_dbg(&rport->phy->dev, "Connected\n");
+			rockchip_usb2phy_power_on(rport->phy);
+			rport->suspended = false;
+		} else {
+			/* D+ line pull-up, D- line pull-down */
+			dev_dbg(&rport->phy->dev, "FS/LS online\n");
+		}
+		break;
+	case PHY_STATE_DISCONNECT:
+		if (!rport->suspended) {
+			dev_dbg(&rport->phy->dev, "Disconnected\n");
+			rockchip_usb2phy_power_off(rport->phy);
+			rport->suspended = true;
+		}
+
+		/*
+		 * activate the linestate detection to get the next device
+		 * plug-in irq.
+		 */
+		property_enable(rphy, &rport->port_cfg->ls_det_clr, true);
+		property_enable(rphy, &rport->port_cfg->ls_det_en, true);
+
+		/*
+		 * we don't need to rearm the delayed work when the phy port
+		 * is suspended.
+		 */
+		mutex_unlock(&rport->mutex);
+		return;
+	default:
+		dev_dbg(&rport->phy->dev, "unknown phy state\n");
+		break;
+	}
+
+next_schedule:
+	mutex_unlock(&rport->mutex);
+	schedule_delayed_work(&rport->sm_work, SCHEDULE_DELAY);
+}
+
+static irqreturn_t rockchip_usb2phy_linestate_irq(int irq, void *data)
+{
+	struct rockchip_usb2phy_port *rport = data;
+	struct rockchip_usb2phy *rphy = dev_get_drvdata(rport->phy->dev.parent);
+
+	if (!property_enabled(rphy, &rport->port_cfg->ls_det_st))
+		return IRQ_NONE;
+
+	mutex_lock(&rport->mutex);
+
+	/* disable linestate detect irq and clear its status */
+	property_enable(rphy, &rport->port_cfg->ls_det_en, false);
+	property_enable(rphy, &rport->port_cfg->ls_det_clr, true);
+
+	mutex_unlock(&rport->mutex);
+
+	/*
+	 * In this case for host phy port, a new device is plugged in,
+	 * meanwhile, if the phy port is suspended, we need rearm the work to
+	 * resume it and mange its states; otherwise, we do nothing about that.
+	 */
+	if (rport->suspended && rport->port_id == USB2PHY_PORT_HOST)
+		rockchip_usb2phy_sm_work(&rport->sm_work.work);
+
+	return IRQ_HANDLED;
+}
+
+static int rockchip_usb2phy_host_port_init(struct rockchip_usb2phy *rphy,
+					   struct rockchip_usb2phy_port *rport,
+					   struct device_node *child_np)
+{
+	int ret;
+
+	rport->port_id = USB2PHY_PORT_HOST;
+	rport->port_cfg = &rphy->phy_cfg->port_cfgs[USB2PHY_PORT_HOST];
+	rport->suspended = true;
+
+	mutex_init(&rport->mutex);
+	INIT_DELAYED_WORK(&rport->sm_work, rockchip_usb2phy_sm_work);
+
+	rport->ls_irq = of_irq_get_byname(child_np, "linestate");
+	if (rport->ls_irq < 0) {
+		dev_err(rphy->dev, "no linestate irq provided\n");
+		return rport->ls_irq;
+	}
+
+	ret = devm_request_threaded_irq(rphy->dev, rport->ls_irq, NULL,
+					rockchip_usb2phy_linestate_irq,
+					IRQF_ONESHOT,
+					"rockchip_usb2phy", rport);
+	if (ret) {
+		dev_err(rphy->dev, "failed to request irq handle\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rockchip_usb2phy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *child_np;
+	struct phy_provider *provider;
+	struct rockchip_usb2phy *rphy;
+	const struct rockchip_usb2phy_cfg *phy_cfgs;
+	const struct of_device_id *match;
+	unsigned int reg;
+	int index, ret;
+
+	rphy = devm_kzalloc(dev, sizeof(*rphy), GFP_KERNEL);
+	if (!rphy)
+		return -ENOMEM;
+
+	match = of_match_device(dev->driver->of_match_table, dev);
+	if (!match || !match->data) {
+		dev_err(dev, "phy configs are not assigned!\n");
+		return -EINVAL;
+	}
+
+	if (!dev->parent || !dev->parent->of_node)
+		return -EINVAL;
+
+	rphy->grf = syscon_node_to_regmap(dev->parent->of_node);
+	if (IS_ERR(rphy->grf))
+		return PTR_ERR(rphy->grf);
+
+	if (of_property_read_u32(np, "reg", &reg)) {
+		dev_err(dev, "the reg property is not assigned in %s node\n",
+			np->name);
+		return -EINVAL;
+	}
+
+	rphy->dev = dev;
+	phy_cfgs = match->data;
+	platform_set_drvdata(pdev, rphy);
+
+	/* find out a proper config which can be matched with dt. */
+	index = 0;
+	while (phy_cfgs[index].reg) {
+		if (phy_cfgs[index].reg == reg) {
+			rphy->phy_cfg = &phy_cfgs[index];
+			break;
+		}
+
+		++index;
+	}
+
+	if (!rphy->phy_cfg) {
+		dev_err(dev, "no phy-config can be matched with %s node\n",
+			np->name);
+		return -EINVAL;
+	}
+
+	rphy->clk = of_clk_get_by_name(np, "phyclk");
+	if (!IS_ERR(rphy->clk)) {
+		clk_prepare_enable(rphy->clk);
+	} else {
+		dev_info(&pdev->dev, "no phyclk specified\n");
+		rphy->clk = NULL;
+	}
+
+	ret = rockchip_usb2phy_clk480m_register(rphy);
+	if (ret) {
+		dev_err(dev, "failed to register 480m output clock\n");
+		goto disable_clks;
+	}
+
+	index = 0;
+	for_each_available_child_of_node(np, child_np) {
+		struct rockchip_usb2phy_port *rport = &rphy->ports[index];
+		struct phy *phy;
+
+		/*
+		 * This driver aim to support both otg-port and host-port,
+		 * but unfortunately, the otg part is not ready in current,
+		 * so this comments and below codes are interim, which should
+		 * be changed after otg-port is supplied soon.
+		 */
+		if (of_node_cmp(child_np->name, "host-port"))
+			goto next_child;
+
+		phy = devm_phy_create(dev, child_np, &rockchip_usb2phy_ops);
+		if (IS_ERR(phy)) {
+			dev_err(dev, "failed to create phy\n");
+			ret = PTR_ERR(phy);
+			goto put_child;
+		}
+
+		rport->phy = phy;
+		phy_set_drvdata(rport->phy, rport);
+
+		ret = rockchip_usb2phy_host_port_init(rphy, rport, child_np);
+		if (ret)
+			goto put_child;
+
+next_child:
+		/* to prevent out of boundary */
+		if (++index >= rphy->phy_cfg->num_ports)
+			break;
+	}
+
+	provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+	return PTR_ERR_OR_ZERO(provider);
+
+put_child:
+	of_node_put(child_np);
+disable_clks:
+	if (rphy->clk) {
+		clk_disable_unprepare(rphy->clk);
+		clk_put(rphy->clk);
+	}
+	return ret;
+}
+
+static const struct rockchip_usb2phy_cfg rk3366_phy_cfgs[] = {
+	{
+		.reg = 0x700,
+		.num_ports	= 2,
+		.clkout_ctl	= { 0x0724, 15, 15, 1, 0 },
+		.port_cfgs	= {
+			[USB2PHY_PORT_HOST] = {
+				.phy_sus	= { 0x0728, 15, 0, 0, 0x1d1 },
+				.ls_det_en	= { 0x0680, 4, 4, 0, 1 },
+				.ls_det_st	= { 0x0690, 4, 4, 0, 1 },
+				.ls_det_clr	= { 0x06a0, 4, 4, 0, 1 },
+				.utmi_ls	= { 0x049c, 14, 13, 0, 1 },
+				.utmi_hstdet	= { 0x049c, 12, 12, 0, 1 }
+			}
+		},
+	},
+	{ /* sentinel */ }
+};
+
+static const struct rockchip_usb2phy_cfg rk3399_phy_cfgs[] = {
+	{
+		.reg = 0xe450,
+		.num_ports	= 2,
+		.clkout_ctl	= { 0xe450, 4, 4, 1, 0 },
+		.port_cfgs	= {
+			[USB2PHY_PORT_HOST] = {
+				.phy_sus	= { 0xe458, 1, 0, 0x2, 0x1 },
+				.ls_det_en	= { 0xe3c0, 6, 6, 0, 1 },
+				.ls_det_st	= { 0xe3e0, 6, 6, 0, 1 },
+				.ls_det_clr	= { 0xe3d0, 6, 6, 0, 1 },
+				.utmi_ls	= { 0xe2ac, 22, 21, 0, 1 },
+				.utmi_hstdet	= { 0xe2ac, 23, 23, 0, 1 }
+			}
+		},
+	},
+	{
+		.reg = 0xe460,
+		.num_ports	= 2,
+		.clkout_ctl	= { 0xe460, 4, 4, 1, 0 },
+		.port_cfgs	= {
+			[USB2PHY_PORT_HOST] = {
+				.phy_sus	= { 0xe468, 1, 0, 0x2, 0x1 },
+				.ls_det_en	= { 0xe3c0, 11, 11, 0, 1 },
+				.ls_det_st	= { 0xe3e0, 11, 11, 0, 1 },
+				.ls_det_clr	= { 0xe3d0, 11, 11, 0, 1 },
+				.utmi_ls	= { 0xe2ac, 26, 25, 0, 1 },
+				.utmi_hstdet	= { 0xe2ac, 27, 27, 0, 1 }
+			}
+		},
+	},
+	{ /* sentinel */ }
+};
+
+static const struct of_device_id rockchip_usb2phy_dt_match[] = {
+	{ .compatible = "rockchip,rk3366-usb2phy", .data = &rk3366_phy_cfgs },
+	{ .compatible = "rockchip,rk3399-usb2phy", .data = &rk3399_phy_cfgs },
+	{}
+};
+MODULE_DEVICE_TABLE(of, rockchip_usb2phy_dt_match);
+
+static struct platform_driver rockchip_usb2phy_driver = {
+	.probe		= rockchip_usb2phy_probe,
+	.driver		= {
+		.name	= "rockchip-usb2phy",
+		.of_match_table = rockchip_usb2phy_dt_match,
+	},
+};
+module_platform_driver(rockchip_usb2phy_driver);
+
+MODULE_AUTHOR("Frank Wang <frank.wang@rock-chips.com>");
+MODULE_DESCRIPTION("Rockchip USB2.0 PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/phy-rockchip-pcie.c b/drivers/phy/phy-rockchip-pcie.c
new file mode 100644
index 0000000..a2b4c6b
--- /dev/null
+++ b/drivers/phy/phy-rockchip-pcie.c
@@ -0,0 +1,357 @@
+/*
+ * Rockchip PCIe PHY driver
+ *
+ * Copyright (C) 2016 Shawn Lin <shawn.lin@rock-chips.com>
+ * Copyright (C) 2016 ROCKCHIP, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+/*
+ * The higher 16-bit of this register is used for write protection
+ * only if BIT(x + 16) set to 1 the BIT(x) can be written.
+ */
+#define HIWORD_UPDATE(val, mask, shift) \
+		((val) << (shift) | (mask) << ((shift) + 16))
+
+#define PHY_MAX_LANE_NUM      4
+#define PHY_CFG_DATA_SHIFT    7
+#define PHY_CFG_ADDR_SHIFT    1
+#define PHY_CFG_DATA_MASK     0xf
+#define PHY_CFG_ADDR_MASK     0x3f
+#define PHY_CFG_RD_MASK       0x3ff
+#define PHY_CFG_WR_ENABLE     1
+#define PHY_CFG_WR_DISABLE    1
+#define PHY_CFG_WR_SHIFT      0
+#define PHY_CFG_WR_MASK       1
+#define PHY_CFG_PLL_LOCK      0x10
+#define PHY_CFG_CLK_TEST      0x10
+#define PHY_CFG_CLK_SCC       0x12
+#define PHY_CFG_SEPE_RATE     BIT(3)
+#define PHY_CFG_PLL_100M      BIT(3)
+#define PHY_PLL_LOCKED        BIT(9)
+#define PHY_PLL_OUTPUT        BIT(10)
+#define PHY_LANE_A_STATUS     0x30
+#define PHY_LANE_B_STATUS     0x31
+#define PHY_LANE_C_STATUS     0x32
+#define PHY_LANE_D_STATUS     0x33
+#define PHY_LANE_RX_DET_SHIFT 11
+#define PHY_LANE_RX_DET_TH    0x1
+#define PHY_LANE_IDLE_OFF     0x1
+#define PHY_LANE_IDLE_MASK    0x1
+#define PHY_LANE_IDLE_A_SHIFT 3
+#define PHY_LANE_IDLE_B_SHIFT 4
+#define PHY_LANE_IDLE_C_SHIFT 5
+#define PHY_LANE_IDLE_D_SHIFT 6
+
+struct rockchip_pcie_data {
+	unsigned int pcie_conf;
+	unsigned int pcie_status;
+	unsigned int pcie_laneoff;
+};
+
+struct rockchip_pcie_phy {
+	struct rockchip_pcie_data *phy_data;
+	struct regmap *reg_base;
+	struct reset_control *phy_rst;
+	struct clk *clk_pciephy_ref;
+};
+
+static inline void phy_wr_cfg(struct rockchip_pcie_phy *rk_phy,
+			      u32 addr, u32 data)
+{
+	regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf,
+		     HIWORD_UPDATE(data,
+				   PHY_CFG_DATA_MASK,
+				   PHY_CFG_DATA_SHIFT) |
+		     HIWORD_UPDATE(addr,
+				   PHY_CFG_ADDR_MASK,
+				   PHY_CFG_ADDR_SHIFT));
+	udelay(1);
+	regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf,
+		     HIWORD_UPDATE(PHY_CFG_WR_ENABLE,
+				   PHY_CFG_WR_MASK,
+				   PHY_CFG_WR_SHIFT));
+	udelay(1);
+	regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf,
+		     HIWORD_UPDATE(PHY_CFG_WR_DISABLE,
+				   PHY_CFG_WR_MASK,
+				   PHY_CFG_WR_SHIFT));
+}
+
+static inline u32 phy_rd_cfg(struct rockchip_pcie_phy *rk_phy,
+			     u32 addr)
+{
+	u32 val;
+
+	regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf,
+		     HIWORD_UPDATE(addr,
+				   PHY_CFG_RD_MASK,
+				   PHY_CFG_ADDR_SHIFT));
+	regmap_read(rk_phy->reg_base,
+		    rk_phy->phy_data->pcie_status,
+		    &val);
+	return val;
+}
+
+static int rockchip_pcie_phy_power_off(struct phy *phy)
+{
+	struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
+	int err = 0;
+
+	err = reset_control_assert(rk_phy->phy_rst);
+	if (err) {
+		dev_err(&phy->dev, "assert phy_rst err %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int rockchip_pcie_phy_power_on(struct phy *phy)
+{
+	struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
+	int err = 0;
+	u32 status;
+	unsigned long timeout;
+
+	err = reset_control_deassert(rk_phy->phy_rst);
+	if (err) {
+		dev_err(&phy->dev, "deassert phy_rst err %d\n", err);
+		return err;
+	}
+
+	regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf,
+		     HIWORD_UPDATE(PHY_CFG_PLL_LOCK,
+				   PHY_CFG_ADDR_MASK,
+				   PHY_CFG_ADDR_SHIFT));
+
+	/*
+	 * No documented timeout value for phy operation below,
+	 * so we make it large enough here. And we use loop-break
+	 * method which should not be harmful.
+	 */
+	timeout = jiffies + msecs_to_jiffies(1000);
+
+	err = -EINVAL;
+	while (time_before(jiffies, timeout)) {
+		regmap_read(rk_phy->reg_base,
+			    rk_phy->phy_data->pcie_status,
+			    &status);
+		if (status & PHY_PLL_LOCKED) {
+			dev_dbg(&phy->dev, "pll locked!\n");
+			err = 0;
+			break;
+		}
+		msleep(20);
+	}
+
+	if (err) {
+		dev_err(&phy->dev, "pll lock timeout!\n");
+		goto err_pll_lock;
+	}
+
+	phy_wr_cfg(rk_phy, PHY_CFG_CLK_TEST, PHY_CFG_SEPE_RATE);
+	phy_wr_cfg(rk_phy, PHY_CFG_CLK_SCC, PHY_CFG_PLL_100M);
+
+	err = -ETIMEDOUT;
+	while (time_before(jiffies, timeout)) {
+		regmap_read(rk_phy->reg_base,
+			    rk_phy->phy_data->pcie_status,
+			    &status);
+		if (!(status & PHY_PLL_OUTPUT)) {
+			dev_dbg(&phy->dev, "pll output enable done!\n");
+			err = 0;
+			break;
+		}
+		msleep(20);
+	}
+
+	if (err) {
+		dev_err(&phy->dev, "pll output enable timeout!\n");
+		goto err_pll_lock;
+	}
+
+	regmap_write(rk_phy->reg_base, rk_phy->phy_data->pcie_conf,
+		     HIWORD_UPDATE(PHY_CFG_PLL_LOCK,
+				   PHY_CFG_ADDR_MASK,
+				   PHY_CFG_ADDR_SHIFT));
+	err = -EINVAL;
+	while (time_before(jiffies, timeout)) {
+		regmap_read(rk_phy->reg_base,
+			    rk_phy->phy_data->pcie_status,
+			    &status);
+		if (status & PHY_PLL_LOCKED) {
+			dev_dbg(&phy->dev, "pll relocked!\n");
+			err = 0;
+			break;
+		}
+		msleep(20);
+	}
+
+	if (err) {
+		dev_err(&phy->dev, "pll relock timeout!\n");
+		goto err_pll_lock;
+	}
+
+	return 0;
+
+err_pll_lock:
+	reset_control_assert(rk_phy->phy_rst);
+	return err;
+}
+
+static int rockchip_pcie_phy_init(struct phy *phy)
+{
+	struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
+	int err = 0;
+
+	err = clk_prepare_enable(rk_phy->clk_pciephy_ref);
+	if (err) {
+		dev_err(&phy->dev, "Fail to enable pcie ref clock.\n");
+		goto err_refclk;
+	}
+
+	err = reset_control_assert(rk_phy->phy_rst);
+	if (err) {
+		dev_err(&phy->dev, "assert phy_rst err %d\n", err);
+		goto err_reset;
+	}
+
+	return err;
+
+err_reset:
+	clk_disable_unprepare(rk_phy->clk_pciephy_ref);
+err_refclk:
+	return err;
+}
+
+static int rockchip_pcie_phy_exit(struct phy *phy)
+{
+	struct rockchip_pcie_phy *rk_phy = phy_get_drvdata(phy);
+	int err = 0;
+
+	clk_disable_unprepare(rk_phy->clk_pciephy_ref);
+
+	err = reset_control_deassert(rk_phy->phy_rst);
+	if (err) {
+		dev_err(&phy->dev, "deassert phy_rst err %d\n", err);
+		goto err_reset;
+	}
+
+	return err;
+
+err_reset:
+	clk_prepare_enable(rk_phy->clk_pciephy_ref);
+	return err;
+}
+
+static const struct phy_ops ops = {
+	.init		= rockchip_pcie_phy_init,
+	.exit		= rockchip_pcie_phy_exit,
+	.power_on	= rockchip_pcie_phy_power_on,
+	.power_off	= rockchip_pcie_phy_power_off,
+	.owner		= THIS_MODULE,
+};
+
+static const struct rockchip_pcie_data rk3399_pcie_data = {
+	.pcie_conf = 0xe220,
+	.pcie_status = 0xe2a4,
+	.pcie_laneoff = 0xe214,
+};
+
+static const struct of_device_id rockchip_pcie_phy_dt_ids[] = {
+	{
+		.compatible = "rockchip,rk3399-pcie-phy",
+		.data = &rk3399_pcie_data,
+	},
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, rockchip_pcie_phy_dt_ids);
+
+static int rockchip_pcie_phy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rockchip_pcie_phy *rk_phy;
+	struct phy *generic_phy;
+	struct phy_provider *phy_provider;
+	struct regmap *grf;
+	const struct of_device_id *of_id;
+
+	grf = syscon_node_to_regmap(dev->parent->of_node);
+	if (IS_ERR(grf)) {
+		dev_err(dev, "Cannot find GRF syscon\n");
+		return PTR_ERR(grf);
+	}
+
+	rk_phy = devm_kzalloc(dev, sizeof(*rk_phy), GFP_KERNEL);
+	if (!rk_phy)
+		return -ENOMEM;
+
+	of_id = of_match_device(rockchip_pcie_phy_dt_ids, &pdev->dev);
+	if (!of_id)
+		return -EINVAL;
+
+	rk_phy->phy_data = (struct rockchip_pcie_data *)of_id->data;
+	rk_phy->reg_base = grf;
+
+	rk_phy->phy_rst = devm_reset_control_get(dev, "phy");
+	if (IS_ERR(rk_phy->phy_rst)) {
+		if (PTR_ERR(rk_phy->phy_rst) != -EPROBE_DEFER)
+			dev_err(dev,
+				"missing phy property for reset controller\n");
+		return PTR_ERR(rk_phy->phy_rst);
+	}
+
+	rk_phy->clk_pciephy_ref = devm_clk_get(dev, "refclk");
+	if (IS_ERR(rk_phy->clk_pciephy_ref)) {
+		dev_err(dev, "refclk not found.\n");
+		return PTR_ERR(rk_phy->clk_pciephy_ref);
+	}
+
+	generic_phy = devm_phy_create(dev, dev->of_node, &ops);
+	if (IS_ERR(generic_phy)) {
+		dev_err(dev, "failed to create PHY\n");
+		return PTR_ERR(generic_phy);
+	}
+
+	phy_set_drvdata(generic_phy, rk_phy);
+	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+
+	return PTR_ERR_OR_ZERO(phy_provider);
+}
+
+static struct platform_driver rockchip_pcie_driver = {
+	.probe		= rockchip_pcie_phy_probe,
+	.driver		= {
+		.name	= "rockchip-pcie-phy",
+		.of_match_table = rockchip_pcie_phy_dt_ids,
+	},
+};
+
+module_platform_driver(rockchip_pcie_driver);
+
+MODULE_AUTHOR("Shawn Lin <shawn.lin@rock-chips.com>");
+MODULE_DESCRIPTION("Rockchip PCIe PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/phy-rockchip-typec.c b/drivers/phy/phy-rockchip-typec.c
new file mode 100644
index 0000000..7cfb0f8
--- /dev/null
+++ b/drivers/phy/phy-rockchip-typec.c
@@ -0,0 +1,1023 @@
+/*
+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ *         Kever Yang <kever.yang@rock-chips.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * The ROCKCHIP Type-C PHY has two PLL clocks. The first PLL clock
+ * is used for USB3, the second PLL clock is used for DP. This Type-C PHY has
+ * 3 working modes: USB3 only mode, DP only mode, and USB3+DP mode.
+ * At USB3 only mode, both PLL clocks need to be initialized, this allows the
+ * PHY to switch mode between USB3 and USB3+DP, without disconnecting the USB
+ * device.
+ * In The DP only mode, only the DP PLL needs to be powered on, and the 4 lanes
+ * are all used for DP.
+ *
+ * This driver gets extcon cable state and property, then decides which mode to
+ * select:
+ *
+ * 1. USB3 only mode:
+ *    EXTCON_USB or EXTCON_USB_HOST state is true, and
+ *    EXTCON_PROP_USB_SS property is true.
+ *    EXTCON_DISP_DP state is false.
+ *
+ * 2. DP only mode:
+ *    EXTCON_DISP_DP state is true, and
+ *    EXTCON_PROP_USB_SS property is false.
+ *    If EXTCON_USB_HOST state is true, it is DP + USB2 mode, since the USB2 phy
+ *    is a separate phy, so this case is still DP only mode.
+ *
+ * 3. USB3+DP mode:
+ *    EXTCON_USB_HOST and EXTCON_DISP_DP are both true, and
+ *    EXTCON_PROP_USB_SS property is true.
+ *
+ * This Type-C PHY driver supports normal and flip orientation. The orientation
+ * is reported by the EXTCON_PROP_USB_TYPEC_POLARITY property: true is flip
+ * orientation, false is normal orientation.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/extcon.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+
+#include <linux/mfd/syscon.h>
+#include <linux/phy/phy.h>
+
+#define CMN_SSM_BANDGAP			(0x21 << 2)
+#define CMN_SSM_BIAS			(0x22 << 2)
+#define CMN_PLLSM0_PLLEN		(0x29 << 2)
+#define CMN_PLLSM0_PLLPRE		(0x2a << 2)
+#define CMN_PLLSM0_PLLVREF		(0x2b << 2)
+#define CMN_PLLSM0_PLLLOCK		(0x2c << 2)
+#define CMN_PLLSM1_PLLEN		(0x31 << 2)
+#define CMN_PLLSM1_PLLPRE		(0x32 << 2)
+#define CMN_PLLSM1_PLLVREF		(0x33 << 2)
+#define CMN_PLLSM1_PLLLOCK		(0x34 << 2)
+#define CMN_PLLSM1_USER_DEF_CTRL	(0x37 << 2)
+#define CMN_ICAL_OVRD			(0xc1 << 2)
+#define CMN_PLL0_VCOCAL_OVRD		(0x83 << 2)
+#define CMN_PLL0_VCOCAL_INIT		(0x84 << 2)
+#define CMN_PLL0_VCOCAL_ITER		(0x85 << 2)
+#define CMN_PLL0_LOCK_REFCNT_START	(0x90 << 2)
+#define CMN_PLL0_LOCK_PLLCNT_START	(0x92 << 2)
+#define CMN_PLL0_LOCK_PLLCNT_THR	(0x93 << 2)
+#define CMN_PLL0_INTDIV			(0x94 << 2)
+#define CMN_PLL0_FRACDIV		(0x95 << 2)
+#define CMN_PLL0_HIGH_THR		(0x96 << 2)
+#define CMN_PLL0_DSM_DIAG		(0x97 << 2)
+#define CMN_PLL0_SS_CTRL1		(0x98 << 2)
+#define CMN_PLL0_SS_CTRL2		(0x99 << 2)
+#define CMN_PLL1_VCOCAL_START		(0xa1 << 2)
+#define CMN_PLL1_VCOCAL_OVRD		(0xa3 << 2)
+#define CMN_PLL1_VCOCAL_INIT		(0xa4 << 2)
+#define CMN_PLL1_VCOCAL_ITER		(0xa5 << 2)
+#define CMN_PLL1_LOCK_REFCNT_START	(0xb0 << 2)
+#define CMN_PLL1_LOCK_PLLCNT_START	(0xb2 << 2)
+#define CMN_PLL1_LOCK_PLLCNT_THR	(0xb3 << 2)
+#define CMN_PLL1_INTDIV			(0xb4 << 2)
+#define CMN_PLL1_FRACDIV		(0xb5 << 2)
+#define CMN_PLL1_HIGH_THR		(0xb6 << 2)
+#define CMN_PLL1_DSM_DIAG		(0xb7 << 2)
+#define CMN_PLL1_SS_CTRL1		(0xb8 << 2)
+#define CMN_PLL1_SS_CTRL2		(0xb9 << 2)
+#define CMN_RXCAL_OVRD			(0xd1 << 2)
+#define CMN_TXPUCAL_CTRL		(0xe0 << 2)
+#define CMN_TXPUCAL_OVRD		(0xe1 << 2)
+#define CMN_TXPDCAL_OVRD		(0xf1 << 2)
+#define CMN_DIAG_PLL0_FBH_OVRD		(0x1c0 << 2)
+#define CMN_DIAG_PLL0_FBL_OVRD		(0x1c1 << 2)
+#define CMN_DIAG_PLL0_OVRD		(0x1c2 << 2)
+#define CMN_DIAG_PLL0_V2I_TUNE		(0x1c5 << 2)
+#define CMN_DIAG_PLL0_CP_TUNE		(0x1c6 << 2)
+#define CMN_DIAG_PLL0_LF_PROG		(0x1c7 << 2)
+#define CMN_DIAG_PLL1_FBH_OVRD		(0x1d0 << 2)
+#define CMN_DIAG_PLL1_FBL_OVRD		(0x1d1 << 2)
+#define CMN_DIAG_PLL1_OVRD		(0x1d2 << 2)
+#define CMN_DIAG_PLL1_V2I_TUNE		(0x1d5 << 2)
+#define CMN_DIAG_PLL1_CP_TUNE		(0x1d6 << 2)
+#define CMN_DIAG_PLL1_LF_PROG		(0x1d7 << 2)
+#define CMN_DIAG_PLL1_PTATIS_TUNE1	(0x1d8 << 2)
+#define CMN_DIAG_PLL1_PTATIS_TUNE2	(0x1d9 << 2)
+#define CMN_DIAG_PLL1_INCLK_CTRL	(0x1da << 2)
+#define CMN_DIAG_HSCLK_SEL		(0x1e0 << 2)
+
+#define XCVR_PSM_RCTRL(n)		((0x4001 | ((n) << 9)) << 2)
+#define XCVR_PSM_CAL_TMR(n)		((0x4002 | ((n) << 9)) << 2)
+#define XCVR_PSM_A0IN_TMR(n)		((0x4003 | ((n) << 9)) << 2)
+#define TX_TXCC_CAL_SCLR_MULT(n)	((0x4047 | ((n) << 9)) << 2)
+#define TX_TXCC_CPOST_MULT_00(n)	((0x404c | ((n) << 9)) << 2)
+#define TX_TXCC_CPOST_MULT_01(n)	((0x404d | ((n) << 9)) << 2)
+#define TX_TXCC_CPOST_MULT_10(n)	((0x404e | ((n) << 9)) << 2)
+#define TX_TXCC_CPOST_MULT_11(n)	((0x404f | ((n) << 9)) << 2)
+#define TX_TXCC_MGNFS_MULT_000(n)	((0x4050 | ((n) << 9)) << 2)
+#define TX_TXCC_MGNFS_MULT_001(n)	((0x4051 | ((n) << 9)) << 2)
+#define TX_TXCC_MGNFS_MULT_010(n)	((0x4052 | ((n) << 9)) << 2)
+#define TX_TXCC_MGNFS_MULT_011(n)	((0x4053 | ((n) << 9)) << 2)
+#define TX_TXCC_MGNFS_MULT_100(n)	((0x4054 | ((n) << 9)) << 2)
+#define TX_TXCC_MGNFS_MULT_101(n)	((0x4055 | ((n) << 9)) << 2)
+#define TX_TXCC_MGNFS_MULT_110(n)	((0x4056 | ((n) << 9)) << 2)
+#define TX_TXCC_MGNFS_MULT_111(n)	((0x4057 | ((n) << 9)) << 2)
+#define XCVR_DIAG_PLLDRC_CTRL(n)	((0x40e0 | ((n) << 9)) << 2)
+#define XCVR_DIAG_BIDI_CTRL(n)		((0x40e8 | ((n) << 9)) << 2)
+#define XCVR_DIAG_LANE_FCM_EN_MGN(n)	((0x40f2 | ((n) << 9)) << 2)
+#define TX_PSC_A0(n)			((0x4100 | ((n) << 9)) << 2)
+#define TX_PSC_A1(n)			((0x4101 | ((n) << 9)) << 2)
+#define TX_PSC_A2(n)			((0x4102 | ((n) << 9)) << 2)
+#define TX_PSC_A3(n)			((0x4103 | ((n) << 9)) << 2)
+#define TX_RCVDET_CTRL(n)		((0x4120 | ((n) << 9)) << 2)
+#define TX_RCVDET_EN_TMR(n)		((0x4122 | ((n) << 9)) << 2)
+#define TX_RCVDET_ST_TMR(n)		((0x4123 | ((n) << 9)) << 2)
+#define TX_DIAG_TX_DRV(n)		((0x41e1 | ((n) << 9)) << 2)
+#define TX_DIAG_BGREF_PREDRV_DELAY	(0x41e7 << 2)
+#define TX_ANA_CTRL_REG_1		(0x5020 << 2)
+#define TX_ANA_CTRL_REG_2		(0x5021 << 2)
+#define TXDA_COEFF_CALC_CTRL		(0x5022 << 2)
+#define TX_DIG_CTRL_REG_2		(0x5024 << 2)
+#define TXDA_CYA_AUXDA_CYA		(0x5025 << 2)
+#define TX_ANA_CTRL_REG_3		(0x5026 << 2)
+#define TX_ANA_CTRL_REG_4		(0x5027 << 2)
+#define TX_ANA_CTRL_REG_5		(0x5029 << 2)
+
+#define RX_PSC_A0(n)			((0x8000 | ((n) << 9)) << 2)
+#define RX_PSC_A1(n)			((0x8001 | ((n) << 9)) << 2)
+#define RX_PSC_A2(n)			((0x8002 | ((n) << 9)) << 2)
+#define RX_PSC_A3(n)			((0x8003 | ((n) << 9)) << 2)
+#define RX_PSC_CAL(n)			((0x8006 | ((n) << 9)) << 2)
+#define RX_PSC_RDY(n)			((0x8007 | ((n) << 9)) << 2)
+#define RX_IQPI_ILL_CAL_OVRD		(0x8023 << 2)
+#define RX_EPI_ILL_CAL_OVRD		(0x8033 << 2)
+#define RX_SDCAL0_OVRD			(0x8041 << 2)
+#define RX_SDCAL1_OVRD			(0x8049 << 2)
+#define RX_SLC_INIT			(0x806d << 2)
+#define RX_SLC_RUN			(0x806e << 2)
+#define RX_CDRLF_CNFG2			(0x8081 << 2)
+#define RX_SIGDET_HL_FILT_TMR(n)	((0x8090 | ((n) << 9)) << 2)
+#define RX_SLC_IOP0_OVRD		(0x8101 << 2)
+#define RX_SLC_IOP1_OVRD		(0x8105 << 2)
+#define RX_SLC_QOP0_OVRD		(0x8109 << 2)
+#define RX_SLC_QOP1_OVRD		(0x810d << 2)
+#define RX_SLC_EOP0_OVRD		(0x8111 << 2)
+#define RX_SLC_EOP1_OVRD		(0x8115 << 2)
+#define RX_SLC_ION0_OVRD		(0x8119 << 2)
+#define RX_SLC_ION1_OVRD		(0x811d << 2)
+#define RX_SLC_QON0_OVRD		(0x8121 << 2)
+#define RX_SLC_QON1_OVRD		(0x8125 << 2)
+#define RX_SLC_EON0_OVRD		(0x8129 << 2)
+#define RX_SLC_EON1_OVRD		(0x812d << 2)
+#define RX_SLC_IEP0_OVRD		(0x8131 << 2)
+#define RX_SLC_IEP1_OVRD		(0x8135 << 2)
+#define RX_SLC_QEP0_OVRD		(0x8139 << 2)
+#define RX_SLC_QEP1_OVRD		(0x813d << 2)
+#define RX_SLC_EEP0_OVRD		(0x8141 << 2)
+#define RX_SLC_EEP1_OVRD		(0x8145 << 2)
+#define RX_SLC_IEN0_OVRD		(0x8149 << 2)
+#define RX_SLC_IEN1_OVRD		(0x814d << 2)
+#define RX_SLC_QEN0_OVRD		(0x8151 << 2)
+#define RX_SLC_QEN1_OVRD		(0x8155 << 2)
+#define RX_SLC_EEN0_OVRD		(0x8159 << 2)
+#define RX_SLC_EEN1_OVRD		(0x815d << 2)
+#define RX_REE_CTRL_DATA_MASK(n)	((0x81bb | ((n) << 9)) << 2)
+#define RX_DIAG_SIGDET_TUNE(n)		((0x81dc | ((n) << 9)) << 2)
+#define RX_DIAG_SC2C_DELAY		(0x81e1 << 2)
+
+#define PMA_LANE_CFG			(0xc000 << 2)
+#define PIPE_CMN_CTRL1			(0xc001 << 2)
+#define PIPE_CMN_CTRL2			(0xc002 << 2)
+#define PIPE_COM_LOCK_CFG1		(0xc003 << 2)
+#define PIPE_COM_LOCK_CFG2		(0xc004 << 2)
+#define PIPE_RCV_DET_INH		(0xc005 << 2)
+#define DP_MODE_CTL			(0xc008 << 2)
+#define DP_CLK_CTL			(0xc009 << 2)
+#define STS				(0xc00F << 2)
+#define PHY_ISO_CMN_CTRL		(0xc010 << 2)
+#define PHY_DP_TX_CTL			(0xc408 << 2)
+#define PMA_CMN_CTRL1			(0xc800 << 2)
+#define PHY_PMA_ISO_CMN_CTRL		(0xc810 << 2)
+#define PHY_ISOLATION_CTRL		(0xc81f << 2)
+#define PHY_PMA_ISO_XCVR_CTRL(n)	((0xcc11 | ((n) << 6)) << 2)
+#define PHY_PMA_ISO_LINK_MODE(n)	((0xcc12 | ((n) << 6)) << 2)
+#define PHY_PMA_ISO_PWRST_CTRL(n)	((0xcc13 | ((n) << 6)) << 2)
+#define PHY_PMA_ISO_TX_DATA_LO(n)	((0xcc14 | ((n) << 6)) << 2)
+#define PHY_PMA_ISO_TX_DATA_HI(n)	((0xcc15 | ((n) << 6)) << 2)
+#define PHY_PMA_ISO_RX_DATA_LO(n)	((0xcc16 | ((n) << 6)) << 2)
+#define PHY_PMA_ISO_RX_DATA_HI(n)	((0xcc17 | ((n) << 6)) << 2)
+#define TX_BIST_CTRL(n)			((0x4140 | ((n) << 9)) << 2)
+#define TX_BIST_UDDWR(n)		((0x4141 | ((n) << 9)) << 2)
+
+/*
+ * Selects which PLL clock will be driven on the analog high speed
+ * clock 0: PLL 0 div 1
+ * clock 1: PLL 1 div 2
+ */
+#define CLK_PLL_CONFIG			0X30
+#define CLK_PLL_MASK			0x33
+
+#define CMN_READY			BIT(0)
+
+#define DP_PLL_CLOCK_ENABLE		BIT(2)
+#define DP_PLL_ENABLE			BIT(0)
+#define DP_PLL_DATA_RATE_RBR		((2 << 12) | (4 << 8))
+#define DP_PLL_DATA_RATE_HBR		((2 << 12) | (4 << 8))
+#define DP_PLL_DATA_RATE_HBR2		((1 << 12) | (2 << 8))
+
+#define DP_MODE_A0			BIT(4)
+#define DP_MODE_A2			BIT(6)
+#define DP_MODE_ENTER_A0		0xc101
+#define DP_MODE_ENTER_A2		0xc104
+
+#define PHY_MODE_SET_TIMEOUT		100000
+
+#define PIN_ASSIGN_C_E			0x51d9
+#define PIN_ASSIGN_D_F			0x5100
+
+#define MODE_DISCONNECT			0
+#define MODE_UFP_USB			BIT(0)
+#define MODE_DFP_USB			BIT(1)
+#define MODE_DFP_DP			BIT(2)
+
+struct usb3phy_reg {
+	u32 offset;
+	u32 enable_bit;
+	u32 write_enable;
+};
+
+struct rockchip_usb3phy_port_cfg {
+	struct usb3phy_reg typec_conn_dir;
+	struct usb3phy_reg usb3tousb2_en;
+	struct usb3phy_reg external_psm;
+	struct usb3phy_reg pipe_status;
+};
+
+struct rockchip_typec_phy {
+	struct device *dev;
+	void __iomem *base;
+	struct extcon_dev *extcon;
+	struct regmap *grf_regs;
+	struct clk *clk_core;
+	struct clk *clk_ref;
+	struct reset_control *uphy_rst;
+	struct reset_control *pipe_rst;
+	struct reset_control *tcphy_rst;
+	struct rockchip_usb3phy_port_cfg port_cfgs;
+	/* mutex to protect access to individual PHYs */
+	struct mutex lock;
+
+	bool flip;
+	u8 mode;
+};
+
+struct phy_reg {
+	u16 value;
+	u32 addr;
+};
+
+struct phy_reg usb3_pll_cfg[] = {
+	{ 0xf0,		CMN_PLL0_VCOCAL_INIT },
+	{ 0x18,		CMN_PLL0_VCOCAL_ITER },
+	{ 0xd0,		CMN_PLL0_INTDIV },
+	{ 0x4a4a,	CMN_PLL0_FRACDIV },
+	{ 0x34,		CMN_PLL0_HIGH_THR },
+	{ 0x1ee,	CMN_PLL0_SS_CTRL1 },
+	{ 0x7f03,	CMN_PLL0_SS_CTRL2 },
+	{ 0x20,		CMN_PLL0_DSM_DIAG },
+	{ 0,		CMN_DIAG_PLL0_OVRD },
+	{ 0,		CMN_DIAG_PLL0_FBH_OVRD },
+	{ 0,		CMN_DIAG_PLL0_FBL_OVRD },
+	{ 0x7,		CMN_DIAG_PLL0_V2I_TUNE },
+	{ 0x45,		CMN_DIAG_PLL0_CP_TUNE },
+	{ 0x8,		CMN_DIAG_PLL0_LF_PROG },
+};
+
+struct phy_reg dp_pll_cfg[] = {
+	{ 0xf0,		CMN_PLL1_VCOCAL_INIT },
+	{ 0x18,		CMN_PLL1_VCOCAL_ITER },
+	{ 0x30b9,	CMN_PLL1_VCOCAL_START },
+	{ 0x21c,	CMN_PLL1_INTDIV },
+	{ 0,		CMN_PLL1_FRACDIV },
+	{ 0x5,		CMN_PLL1_HIGH_THR },
+	{ 0x35,		CMN_PLL1_SS_CTRL1 },
+	{ 0x7f1e,	CMN_PLL1_SS_CTRL2 },
+	{ 0x20,		CMN_PLL1_DSM_DIAG },
+	{ 0,		CMN_PLLSM1_USER_DEF_CTRL },
+	{ 0,		CMN_DIAG_PLL1_OVRD },
+	{ 0,		CMN_DIAG_PLL1_FBH_OVRD },
+	{ 0,		CMN_DIAG_PLL1_FBL_OVRD },
+	{ 0x6,		CMN_DIAG_PLL1_V2I_TUNE },
+	{ 0x45,		CMN_DIAG_PLL1_CP_TUNE },
+	{ 0x8,		CMN_DIAG_PLL1_LF_PROG },
+	{ 0x100,	CMN_DIAG_PLL1_PTATIS_TUNE1 },
+	{ 0x7,		CMN_DIAG_PLL1_PTATIS_TUNE2 },
+	{ 0x4,		CMN_DIAG_PLL1_INCLK_CTRL },
+};
+
+static void tcphy_cfg_24m(struct rockchip_typec_phy *tcphy)
+{
+	u32 i, rdata;
+
+	/*
+	 * cmn_ref_clk_sel = 3, select the 24Mhz for clk parent
+	 * cmn_psm_clk_dig_div = 2, set the clk division to 2
+	 */
+	writel(0x830, tcphy->base + PMA_CMN_CTRL1);
+	for (i = 0; i < 4; i++) {
+		/*
+		 * The following PHY configuration assumes a 24 MHz reference
+		 * clock.
+		 */
+		writel(0x90, tcphy->base + XCVR_DIAG_LANE_FCM_EN_MGN(i));
+		writel(0x960, tcphy->base + TX_RCVDET_EN_TMR(i));
+		writel(0x30, tcphy->base + TX_RCVDET_ST_TMR(i));
+	}
+
+	rdata = readl(tcphy->base + CMN_DIAG_HSCLK_SEL);
+	rdata &= ~CLK_PLL_MASK;
+	rdata |= CLK_PLL_CONFIG;
+	writel(rdata, tcphy->base + CMN_DIAG_HSCLK_SEL);
+}
+
+static void tcphy_cfg_usb3_pll(struct rockchip_typec_phy *tcphy)
+{
+	u32 i;
+
+	/* load the configuration of PLL0 */
+	for (i = 0; i < ARRAY_SIZE(usb3_pll_cfg); i++)
+		writel(usb3_pll_cfg[i].value,
+		       tcphy->base + usb3_pll_cfg[i].addr);
+}
+
+static void tcphy_cfg_dp_pll(struct rockchip_typec_phy *tcphy)
+{
+	u32 i;
+
+	/* set the default mode to RBR */
+	writel(DP_PLL_CLOCK_ENABLE | DP_PLL_ENABLE | DP_PLL_DATA_RATE_RBR,
+	       tcphy->base + DP_CLK_CTL);
+
+	/* load the configuration of PLL1 */
+	for (i = 0; i < ARRAY_SIZE(dp_pll_cfg); i++)
+		writel(dp_pll_cfg[i].value, tcphy->base + dp_pll_cfg[i].addr);
+}
+
+static void tcphy_tx_usb3_cfg_lane(struct rockchip_typec_phy *tcphy, u32 lane)
+{
+	writel(0x7799, tcphy->base + TX_PSC_A0(lane));
+	writel(0x7798, tcphy->base + TX_PSC_A1(lane));
+	writel(0x5098, tcphy->base + TX_PSC_A2(lane));
+	writel(0x5098, tcphy->base + TX_PSC_A3(lane));
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_000(lane));
+	writel(0xbf, tcphy->base + XCVR_DIAG_BIDI_CTRL(lane));
+}
+
+static void tcphy_rx_usb3_cfg_lane(struct rockchip_typec_phy *tcphy, u32 lane)
+{
+	writel(0xa6fd, tcphy->base + RX_PSC_A0(lane));
+	writel(0xa6fd, tcphy->base + RX_PSC_A1(lane));
+	writel(0xa410, tcphy->base + RX_PSC_A2(lane));
+	writel(0x2410, tcphy->base + RX_PSC_A3(lane));
+	writel(0x23ff, tcphy->base + RX_PSC_CAL(lane));
+	writel(0x13, tcphy->base + RX_SIGDET_HL_FILT_TMR(lane));
+	writel(0x03e7, tcphy->base + RX_REE_CTRL_DATA_MASK(lane));
+	writel(0x1004, tcphy->base + RX_DIAG_SIGDET_TUNE(lane));
+	writel(0x2010, tcphy->base + RX_PSC_RDY(lane));
+	writel(0xfb, tcphy->base + XCVR_DIAG_BIDI_CTRL(lane));
+}
+
+static void tcphy_dp_cfg_lane(struct rockchip_typec_phy *tcphy, u32 lane)
+{
+	u16 rdata;
+
+	writel(0xbefc, tcphy->base + XCVR_PSM_RCTRL(lane));
+	writel(0x6799, tcphy->base + TX_PSC_A0(lane));
+	writel(0x6798, tcphy->base + TX_PSC_A1(lane));
+	writel(0x98, tcphy->base + TX_PSC_A2(lane));
+	writel(0x98, tcphy->base + TX_PSC_A3(lane));
+
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_000(lane));
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_001(lane));
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_010(lane));
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_011(lane));
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_100(lane));
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_101(lane));
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_110(lane));
+	writel(0, tcphy->base + TX_TXCC_MGNFS_MULT_111(lane));
+	writel(0, tcphy->base + TX_TXCC_CPOST_MULT_10(lane));
+	writel(0, tcphy->base + TX_TXCC_CPOST_MULT_01(lane));
+	writel(0, tcphy->base + TX_TXCC_CPOST_MULT_00(lane));
+	writel(0, tcphy->base + TX_TXCC_CPOST_MULT_11(lane));
+
+	writel(0x128, tcphy->base + TX_TXCC_CAL_SCLR_MULT(lane));
+	writel(0x400, tcphy->base + TX_DIAG_TX_DRV(lane));
+
+	rdata = readl(tcphy->base + XCVR_DIAG_PLLDRC_CTRL(lane));
+	rdata = (rdata & 0x8fff) | 0x6000;
+	writel(rdata, tcphy->base + XCVR_DIAG_PLLDRC_CTRL(lane));
+}
+
+static inline int property_enable(struct rockchip_typec_phy *tcphy,
+				  const struct usb3phy_reg *reg, bool en)
+{
+	u32 mask = 1 << reg->write_enable;
+	u32 val = en << reg->enable_bit;
+
+	return regmap_write(tcphy->grf_regs, reg->offset, val | mask);
+}
+
+static void tcphy_dp_aux_calibration(struct rockchip_typec_phy *tcphy)
+{
+	u16 rdata, rdata2, val;
+
+	/* disable txda_cal_latch_en for rewrite the calibration values */
+	rdata = readl(tcphy->base + TX_ANA_CTRL_REG_1);
+	val = rdata & 0xdfff;
+	writel(val, tcphy->base + TX_ANA_CTRL_REG_1);
+
+	/*
+	 * read a resistor calibration code from CMN_TXPUCAL_CTRL[6:0] and
+	 * write it to TX_DIG_CTRL_REG_2[6:0], and delay 1ms to make sure it
+	 * works.
+	 */
+	rdata = readl(tcphy->base + TX_DIG_CTRL_REG_2);
+	rdata = rdata & 0xffc0;
+
+	rdata2 = readl(tcphy->base + CMN_TXPUCAL_CTRL);
+	rdata2 = rdata2 & 0x3f;
+
+	val = rdata | rdata2;
+	writel(val, tcphy->base + TX_DIG_CTRL_REG_2);
+	usleep_range(1000, 1050);
+
+	/*
+	 * Enable signal for latch that sample and holds calibration values.
+	 * Activate this signal for 1 clock cycle to sample new calibration
+	 * values.
+	 */
+	rdata = readl(tcphy->base + TX_ANA_CTRL_REG_1);
+	val = rdata | 0x2000;
+	writel(val, tcphy->base + TX_ANA_CTRL_REG_1);
+	usleep_range(150, 200);
+
+	/* set TX Voltage Level and TX Deemphasis to 0 */
+	writel(0, tcphy->base + PHY_DP_TX_CTL);
+	/* re-enable decap */
+	writel(0x100, tcphy->base + TX_ANA_CTRL_REG_2);
+	writel(0x300, tcphy->base + TX_ANA_CTRL_REG_2);
+	writel(0x2008, tcphy->base + TX_ANA_CTRL_REG_1);
+	writel(0x2018, tcphy->base + TX_ANA_CTRL_REG_1);
+
+	writel(0, tcphy->base + TX_ANA_CTRL_REG_5);
+
+	/*
+	 * Programs txda_drv_ldo_prog[15:0], Sets driver LDO
+	 * voltage 16'h1001 for DP-AUX-TX and RX
+	 */
+	writel(0x1001, tcphy->base + TX_ANA_CTRL_REG_4);
+
+	/* re-enables Bandgap reference for LDO */
+	writel(0x2098, tcphy->base + TX_ANA_CTRL_REG_1);
+	writel(0x2198, tcphy->base + TX_ANA_CTRL_REG_1);
+
+	/*
+	 * re-enables the transmitter pre-driver, driver data selection MUX,
+	 * and receiver detect circuits.
+	 */
+	writel(0x301, tcphy->base + TX_ANA_CTRL_REG_2);
+	writel(0x303, tcphy->base + TX_ANA_CTRL_REG_2);
+
+	/*
+	 * BIT 12: Controls auxda_polarity, which selects the polarity of the
+	 * xcvr:
+	 * 1, Reverses the polarity (If TYPEC, Pulls ups aux_p and pull
+	 * down aux_m)
+	 * 0, Normal polarity (if TYPE_C, pulls up aux_m and pulls down
+	 * aux_p)
+	 */
+	val = 0xa078;
+	if (!tcphy->flip)
+		val |= BIT(12);
+	writel(val, tcphy->base + TX_ANA_CTRL_REG_1);
+
+	writel(0, tcphy->base + TX_ANA_CTRL_REG_3);
+	writel(0, tcphy->base + TX_ANA_CTRL_REG_4);
+	writel(0, tcphy->base + TX_ANA_CTRL_REG_5);
+
+	/*
+	 * Controls low_power_swing_en, set the voltage swing of the driver
+	 * to 400mv. The values	below are peak to peak (differential) values.
+	 */
+	writel(4, tcphy->base + TXDA_COEFF_CALC_CTRL);
+	writel(0, tcphy->base + TXDA_CYA_AUXDA_CYA);
+
+	/* Controls tx_high_z_tm_en */
+	val = readl(tcphy->base + TX_DIG_CTRL_REG_2);
+	val |= BIT(15);
+	writel(val, tcphy->base + TX_DIG_CTRL_REG_2);
+}
+
+static int tcphy_phy_init(struct rockchip_typec_phy *tcphy, u8 mode)
+{
+	struct rockchip_usb3phy_port_cfg *cfg = &tcphy->port_cfgs;
+	int ret, i;
+	u32 val;
+
+	ret = clk_prepare_enable(tcphy->clk_core);
+	if (ret) {
+		dev_err(tcphy->dev, "Failed to prepare_enable core clock\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(tcphy->clk_ref);
+	if (ret) {
+		dev_err(tcphy->dev, "Failed to prepare_enable ref clock\n");
+		goto err_clk_core;
+	}
+
+	reset_control_deassert(tcphy->tcphy_rst);
+
+	property_enable(tcphy, &cfg->typec_conn_dir, tcphy->flip);
+
+	tcphy_cfg_24m(tcphy);
+
+	if (mode == MODE_DFP_DP) {
+		tcphy_cfg_dp_pll(tcphy);
+		for (i = 0; i < 4; i++)
+			tcphy_dp_cfg_lane(tcphy, i);
+
+		writel(PIN_ASSIGN_C_E, tcphy->base + PMA_LANE_CFG);
+	} else {
+		tcphy_cfg_usb3_pll(tcphy);
+		tcphy_cfg_dp_pll(tcphy);
+		if (tcphy->flip) {
+			tcphy_tx_usb3_cfg_lane(tcphy, 3);
+			tcphy_rx_usb3_cfg_lane(tcphy, 2);
+			tcphy_dp_cfg_lane(tcphy, 0);
+			tcphy_dp_cfg_lane(tcphy, 1);
+		} else {
+			tcphy_tx_usb3_cfg_lane(tcphy, 0);
+			tcphy_rx_usb3_cfg_lane(tcphy, 1);
+			tcphy_dp_cfg_lane(tcphy, 2);
+			tcphy_dp_cfg_lane(tcphy, 3);
+		}
+
+		writel(PIN_ASSIGN_D_F, tcphy->base + PMA_LANE_CFG);
+	}
+
+	writel(DP_MODE_ENTER_A2, tcphy->base + DP_MODE_CTL);
+
+	reset_control_deassert(tcphy->uphy_rst);
+
+	ret = readx_poll_timeout(readl, tcphy->base + PMA_CMN_CTRL1,
+				 val, val & CMN_READY, 10,
+				 PHY_MODE_SET_TIMEOUT);
+	if (ret < 0) {
+		dev_err(tcphy->dev, "wait pma ready timeout\n");
+		ret = -ETIMEDOUT;
+		goto err_wait_pma;
+	}
+
+	reset_control_deassert(tcphy->pipe_rst);
+
+	return 0;
+
+err_wait_pma:
+	reset_control_assert(tcphy->uphy_rst);
+	reset_control_assert(tcphy->tcphy_rst);
+	clk_disable_unprepare(tcphy->clk_ref);
+err_clk_core:
+	clk_disable_unprepare(tcphy->clk_core);
+	return ret;
+}
+
+static void tcphy_phy_deinit(struct rockchip_typec_phy *tcphy)
+{
+	reset_control_assert(tcphy->tcphy_rst);
+	reset_control_assert(tcphy->uphy_rst);
+	reset_control_assert(tcphy->pipe_rst);
+	clk_disable_unprepare(tcphy->clk_core);
+	clk_disable_unprepare(tcphy->clk_ref);
+}
+
+static int tcphy_get_mode(struct rockchip_typec_phy *tcphy)
+{
+	struct extcon_dev *edev = tcphy->extcon;
+	union extcon_property_value property;
+	unsigned int id;
+	bool dfp, ufp, dp;
+	u8 mode;
+	int ret;
+
+	ufp = extcon_get_state(edev, EXTCON_USB);
+	dfp = extcon_get_state(edev, EXTCON_USB_HOST);
+	dp = extcon_get_state(edev, EXTCON_DISP_DP);
+
+	mode = MODE_DFP_USB;
+	id = EXTCON_USB_HOST;
+
+	if (ufp) {
+		mode = MODE_UFP_USB;
+		id = EXTCON_USB;
+	} else if (dp) {
+		mode = MODE_DFP_DP;
+		id = EXTCON_DISP_DP;
+
+		ret = extcon_get_property(edev, id, EXTCON_PROP_USB_SS,
+					  &property);
+		if (ret) {
+			dev_err(tcphy->dev, "get superspeed property failed\n");
+			return ret;
+		}
+
+		if (property.intval)
+			mode |= MODE_DFP_USB;
+	}
+
+	ret = extcon_get_property(edev, id, EXTCON_PROP_USB_TYPEC_POLARITY,
+				  &property);
+	if (ret) {
+		dev_err(tcphy->dev, "get polarity property failed\n");
+		return ret;
+	}
+
+	tcphy->flip = property.intval ? 1 : 0;
+
+	return mode;
+}
+
+static int rockchip_usb3_phy_power_on(struct phy *phy)
+{
+	struct rockchip_typec_phy *tcphy = phy_get_drvdata(phy);
+	struct rockchip_usb3phy_port_cfg *cfg = &tcphy->port_cfgs;
+	const struct usb3phy_reg *reg = &cfg->pipe_status;
+	int timeout, new_mode, ret = 0;
+	u32 val;
+
+	mutex_lock(&tcphy->lock);
+
+	new_mode = tcphy_get_mode(tcphy);
+	if (new_mode < 0) {
+		ret = new_mode;
+		goto unlock_ret;
+	}
+
+	/* DP-only mode; fall back to USB2 */
+	if (!(new_mode & (MODE_DFP_USB | MODE_UFP_USB)))
+		goto unlock_ret;
+
+	if (tcphy->mode == new_mode)
+		goto unlock_ret;
+
+	if (tcphy->mode == MODE_DISCONNECT)
+		tcphy_phy_init(tcphy, new_mode);
+
+	/* wait TCPHY for pipe ready */
+	for (timeout = 0; timeout < 100; timeout++) {
+		regmap_read(tcphy->grf_regs, reg->offset, &val);
+		if (!(val & BIT(reg->enable_bit))) {
+			tcphy->mode |= new_mode & (MODE_DFP_USB | MODE_UFP_USB);
+			goto unlock_ret;
+		}
+		usleep_range(10, 20);
+	}
+
+	if (tcphy->mode == MODE_DISCONNECT)
+		tcphy_phy_deinit(tcphy);
+
+	ret = -ETIMEDOUT;
+
+unlock_ret:
+	mutex_unlock(&tcphy->lock);
+	return ret;
+}
+
+static int rockchip_usb3_phy_power_off(struct phy *phy)
+{
+	struct rockchip_typec_phy *tcphy = phy_get_drvdata(phy);
+
+	mutex_lock(&tcphy->lock);
+
+	if (tcphy->mode == MODE_DISCONNECT)
+		goto unlock;
+
+	tcphy->mode &= ~(MODE_UFP_USB | MODE_DFP_USB);
+	if (tcphy->mode == MODE_DISCONNECT)
+		tcphy_phy_deinit(tcphy);
+
+unlock:
+	mutex_unlock(&tcphy->lock);
+	return 0;
+}
+
+static const struct phy_ops rockchip_usb3_phy_ops = {
+	.power_on	= rockchip_usb3_phy_power_on,
+	.power_off	= rockchip_usb3_phy_power_off,
+	.owner		= THIS_MODULE,
+};
+
+static int rockchip_dp_phy_power_on(struct phy *phy)
+{
+	struct rockchip_typec_phy *tcphy = phy_get_drvdata(phy);
+	int new_mode, ret = 0;
+	u32 val;
+
+	mutex_lock(&tcphy->lock);
+
+	new_mode = tcphy_get_mode(tcphy);
+	if (new_mode < 0) {
+		ret = new_mode;
+		goto unlock_ret;
+	}
+
+	if (!(new_mode & MODE_DFP_DP)) {
+		ret = -ENODEV;
+		goto unlock_ret;
+	}
+
+	if (tcphy->mode == new_mode)
+		goto unlock_ret;
+
+	/*
+	 * If the PHY has been power on, but the mode is not DP only mode,
+	 * re-init the PHY for setting all of 4 lanes to DP.
+	 */
+	if (new_mode == MODE_DFP_DP && tcphy->mode != MODE_DISCONNECT) {
+		tcphy_phy_deinit(tcphy);
+		tcphy_phy_init(tcphy, new_mode);
+	} else if (tcphy->mode == MODE_DISCONNECT) {
+		tcphy_phy_init(tcphy, new_mode);
+	}
+
+	ret = readx_poll_timeout(readl, tcphy->base + DP_MODE_CTL,
+				 val, val & DP_MODE_A2, 1000,
+				 PHY_MODE_SET_TIMEOUT);
+	if (ret < 0) {
+		dev_err(tcphy->dev, "failed to wait TCPHY enter A2\n");
+		goto power_on_finish;
+	}
+
+	tcphy_dp_aux_calibration(tcphy);
+
+	writel(DP_MODE_ENTER_A0, tcphy->base + DP_MODE_CTL);
+
+	ret = readx_poll_timeout(readl, tcphy->base + DP_MODE_CTL,
+				 val, val & DP_MODE_A0, 1000,
+				 PHY_MODE_SET_TIMEOUT);
+	if (ret < 0) {
+		writel(DP_MODE_ENTER_A2, tcphy->base + DP_MODE_CTL);
+		dev_err(tcphy->dev, "failed to wait TCPHY enter A0\n");
+		goto power_on_finish;
+	}
+
+	tcphy->mode |= MODE_DFP_DP;
+
+power_on_finish:
+	if (tcphy->mode == MODE_DISCONNECT)
+		tcphy_phy_deinit(tcphy);
+unlock_ret:
+	mutex_unlock(&tcphy->lock);
+	return ret;
+}
+
+static int rockchip_dp_phy_power_off(struct phy *phy)
+{
+	struct rockchip_typec_phy *tcphy = phy_get_drvdata(phy);
+
+	mutex_lock(&tcphy->lock);
+
+	if (tcphy->mode == MODE_DISCONNECT)
+		goto unlock;
+
+	tcphy->mode &= ~MODE_DFP_DP;
+
+	writel(DP_MODE_ENTER_A2, tcphy->base + DP_MODE_CTL);
+
+	if (tcphy->mode == MODE_DISCONNECT)
+		tcphy_phy_deinit(tcphy);
+
+unlock:
+	mutex_unlock(&tcphy->lock);
+	return 0;
+}
+
+static const struct phy_ops rockchip_dp_phy_ops = {
+	.power_on	= rockchip_dp_phy_power_on,
+	.power_off	= rockchip_dp_phy_power_off,
+	.owner		= THIS_MODULE,
+};
+
+static int tcphy_get_param(struct device *dev,
+			   struct usb3phy_reg *reg,
+			   const char *name)
+{
+	u32 buffer[3];
+	int ret;
+
+	ret = of_property_read_u32_array(dev->of_node, name, buffer, 3);
+	if (ret) {
+		dev_err(dev, "Can not parse %s\n", name);
+		return ret;
+	}
+
+	reg->offset = buffer[0];
+	reg->enable_bit = buffer[1];
+	reg->write_enable = buffer[2];
+	return 0;
+}
+
+static int tcphy_parse_dt(struct rockchip_typec_phy *tcphy,
+			  struct device *dev)
+{
+	struct rockchip_usb3phy_port_cfg *cfg = &tcphy->port_cfgs;
+	int ret;
+
+	ret = tcphy_get_param(dev, &cfg->typec_conn_dir,
+			      "rockchip,typec-conn-dir");
+	if (ret)
+		return ret;
+
+	ret = tcphy_get_param(dev, &cfg->usb3tousb2_en,
+			      "rockchip,usb3tousb2-en");
+	if (ret)
+		return ret;
+
+	ret = tcphy_get_param(dev, &cfg->external_psm,
+			      "rockchip,external-psm");
+	if (ret)
+		return ret;
+
+	ret = tcphy_get_param(dev, &cfg->pipe_status,
+			      "rockchip,pipe-status");
+	if (ret)
+		return ret;
+
+	tcphy->grf_regs = syscon_regmap_lookup_by_phandle(dev->of_node,
+							  "rockchip,grf");
+	if (IS_ERR(tcphy->grf_regs)) {
+		dev_err(dev, "could not find grf dt node\n");
+		return PTR_ERR(tcphy->grf_regs);
+	}
+
+	tcphy->clk_core = devm_clk_get(dev, "tcpdcore");
+	if (IS_ERR(tcphy->clk_core)) {
+		dev_err(dev, "could not get uphy core clock\n");
+		return PTR_ERR(tcphy->clk_core);
+	}
+
+	tcphy->clk_ref = devm_clk_get(dev, "tcpdphy-ref");
+	if (IS_ERR(tcphy->clk_ref)) {
+		dev_err(dev, "could not get uphy ref clock\n");
+		return PTR_ERR(tcphy->clk_ref);
+	}
+
+	tcphy->uphy_rst = devm_reset_control_get(dev, "uphy");
+	if (IS_ERR(tcphy->uphy_rst)) {
+		dev_err(dev, "no uphy_rst reset control found\n");
+		return PTR_ERR(tcphy->uphy_rst);
+	}
+
+	tcphy->pipe_rst = devm_reset_control_get(dev, "uphy-pipe");
+	if (IS_ERR(tcphy->pipe_rst)) {
+		dev_err(dev, "no pipe_rst reset control found\n");
+		return PTR_ERR(tcphy->pipe_rst);
+	}
+
+	tcphy->tcphy_rst = devm_reset_control_get(dev, "uphy-tcphy");
+	if (IS_ERR(tcphy->tcphy_rst)) {
+		dev_err(dev, "no tcphy_rst reset control found\n");
+		return PTR_ERR(tcphy->tcphy_rst);
+	}
+
+	return 0;
+}
+
+static void typec_phy_pre_init(struct rockchip_typec_phy *tcphy)
+{
+	struct rockchip_usb3phy_port_cfg *cfg = &tcphy->port_cfgs;
+
+	reset_control_assert(tcphy->tcphy_rst);
+	reset_control_assert(tcphy->uphy_rst);
+	reset_control_assert(tcphy->pipe_rst);
+
+	/* select external psm clock */
+	property_enable(tcphy, &cfg->external_psm, 1);
+	property_enable(tcphy, &cfg->usb3tousb2_en, 0);
+
+	tcphy->mode = MODE_DISCONNECT;
+}
+
+static int rockchip_typec_phy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *child_np;
+	struct rockchip_typec_phy *tcphy;
+	struct phy_provider *phy_provider;
+	struct resource *res;
+	int ret;
+
+	tcphy = devm_kzalloc(dev, sizeof(*tcphy), GFP_KERNEL);
+	if (!tcphy)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tcphy->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(tcphy->base))
+		return PTR_ERR(tcphy->base);
+
+	ret = tcphy_parse_dt(tcphy, dev);
+	if (ret)
+		return ret;
+
+	tcphy->dev = dev;
+	platform_set_drvdata(pdev, tcphy);
+	mutex_init(&tcphy->lock);
+
+	typec_phy_pre_init(tcphy);
+
+	tcphy->extcon = extcon_get_edev_by_phandle(dev, 0);
+	if (IS_ERR(tcphy->extcon)) {
+		if (PTR_ERR(tcphy->extcon) != -EPROBE_DEFER)
+			dev_err(dev, "Invalid or missing extcon\n");
+		return PTR_ERR(tcphy->extcon);
+	}
+
+	pm_runtime_enable(dev);
+
+	for_each_available_child_of_node(np, child_np) {
+		struct phy *phy;
+
+		if (!of_node_cmp(child_np->name, "dp-port"))
+			phy = devm_phy_create(dev, child_np,
+					      &rockchip_dp_phy_ops);
+		else if (!of_node_cmp(child_np->name, "usb3-port"))
+			phy = devm_phy_create(dev, child_np,
+					      &rockchip_usb3_phy_ops);
+		else
+			continue;
+
+		if (IS_ERR(phy)) {
+			dev_err(dev, "failed to create phy: %s\n",
+				child_np->name);
+			return PTR_ERR(phy);
+		}
+
+		phy_set_drvdata(phy, tcphy);
+	}
+
+	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+	if (IS_ERR(phy_provider)) {
+		dev_err(dev, "Failed to register phy provider\n");
+		return PTR_ERR(phy_provider);
+	}
+
+	return 0;
+}
+
+static int rockchip_typec_phy_remove(struct platform_device *pdev)
+{
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id rockchip_typec_phy_dt_ids[] = {
+	{ .compatible = "rockchip,rk3399-typec-phy" },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, rockchip_typec_phy_dt_ids);
+
+static struct platform_driver rockchip_typec_phy_driver = {
+	.probe		= rockchip_typec_phy_probe,
+	.remove		= rockchip_typec_phy_remove,
+	.driver		= {
+		.name	= "rockchip-typec-phy",
+		.of_match_table = rockchip_typec_phy_dt_ids,
+	},
+};
+
+module_platform_driver(rockchip_typec_phy_driver);
+
+MODULE_AUTHOR("Chris Zhong <zyw@rock-chips.com>");
+MODULE_AUTHOR("Kever Yang <kever.yang@rock-chips.com>");
+MODULE_DESCRIPTION("Rockchip USB TYPE-C PHY driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/phy/phy-rockchip-usb.c b/drivers/phy/phy-rockchip-usb.c
index 2a7381f..734987f 100644
--- a/drivers/phy/phy-rockchip-usb.c
+++ b/drivers/phy/phy-rockchip-usb.c
@@ -29,6 +29,7 @@
 #include <linux/reset.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
+#include <linux/delay.h>
 
 static int enable_usb_uart;
 
@@ -64,6 +65,7 @@
 	struct clk_hw	clk480m_hw;
 	struct phy	*phy;
 	bool		uart_enabled;
+	struct reset_control *reset;
 };
 
 static int rockchip_usb_phy_power(struct rockchip_usb_phy *phy,
@@ -144,9 +146,23 @@
 	return clk_prepare_enable(phy->clk480m);
 }
 
+static int rockchip_usb_phy_reset(struct phy *_phy)
+{
+	struct rockchip_usb_phy *phy = phy_get_drvdata(_phy);
+
+	if (phy->reset) {
+		reset_control_assert(phy->reset);
+		udelay(10);
+		reset_control_deassert(phy->reset);
+	}
+
+	return 0;
+}
+
 static const struct phy_ops ops = {
 	.power_on	= rockchip_usb_phy_power_on,
 	.power_off	= rockchip_usb_phy_power_off,
+	.reset		= rockchip_usb_phy_reset,
 	.owner		= THIS_MODULE,
 };
 
@@ -185,6 +201,10 @@
 		return -EINVAL;
 	}
 
+	rk_phy->reset = of_reset_control_get(child, "phy-reset");
+	if (IS_ERR(rk_phy->reset))
+		rk_phy->reset = NULL;
+
 	rk_phy->reg_offset = reg_offset;
 
 	rk_phy->clk = of_clk_get_by_name(child, "phyclk");
diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index 0a45bc6..b9342a2 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -40,6 +40,8 @@
 #include <linux/power_supply.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
+#include <linux/spinlock.h>
+#include <linux/usb/of.h>
 #include <linux/workqueue.h>
 
 #define REG_ISCR			0x00
@@ -49,7 +51,7 @@
 #define REG_PHYCTL_A33			0x10
 #define REG_PHY_UNK_H3			0x20
 
-#define REG_PMU_UNK_H3			0x10
+#define REG_PMU_UNK1			0x10
 
 #define PHYCTL_DATA			BIT(7)
 
@@ -97,6 +99,7 @@
 	sun6i_a31_phy,
 	sun8i_a33_phy,
 	sun8i_h3_phy,
+	sun50i_a64_phy,
 };
 
 struct sun4i_usb_phy_cfg {
@@ -105,12 +108,14 @@
 	u32 disc_thresh;
 	u8 phyctl_offset;
 	bool dedicated_clocks;
+	bool enable_pmu_unk1;
 };
 
 struct sun4i_usb_phy_data {
 	void __iomem *base;
 	const struct sun4i_usb_phy_cfg *cfg;
-	struct mutex mutex;
+	enum usb_dr_mode dr_mode;
+	spinlock_t reg_lock; /* guard access to phyctl reg */
 	struct sun4i_usb_phy {
 		struct phy *phy;
 		void __iomem *pmu;
@@ -128,6 +133,7 @@
 	struct power_supply *vbus_power_supply;
 	struct notifier_block vbus_power_nb;
 	bool vbus_power_nb_registered;
+	bool force_session_end;
 	int id_det_irq;
 	int vbus_det_irq;
 	int id_det;
@@ -176,12 +182,14 @@
 	struct sun4i_usb_phy_data *phy_data = to_sun4i_usb_phy_data(phy);
 	u32 temp, usbc_bit = BIT(phy->index * 2);
 	void __iomem *phyctl = phy_data->base + phy_data->cfg->phyctl_offset;
+	unsigned long flags;
 	int i;
 
-	mutex_lock(&phy_data->mutex);
+	spin_lock_irqsave(&phy_data->reg_lock, flags);
 
-	if (phy_data->cfg->type == sun8i_a33_phy) {
-		/* A33 needs us to set phyctl to 0 explicitly */
+	if (phy_data->cfg->type == sun8i_a33_phy ||
+	    phy_data->cfg->type == sun50i_a64_phy) {
+		/* A33 or A64 needs us to set phyctl to 0 explicitly */
 		writel(0, phyctl);
 	}
 
@@ -215,7 +223,8 @@
 
 		data >>= 1;
 	}
-	mutex_unlock(&phy_data->mutex);
+
+	spin_unlock_irqrestore(&phy_data->reg_lock, flags);
 }
 
 static void sun4i_usb_phy_passby(struct sun4i_usb_phy *phy, int enable)
@@ -255,14 +264,16 @@
 		return ret;
 	}
 
+	if (data->cfg->enable_pmu_unk1) {
+		val = readl(phy->pmu + REG_PMU_UNK1);
+		writel(val & ~2, phy->pmu + REG_PMU_UNK1);
+	}
+
 	if (data->cfg->type == sun8i_h3_phy) {
 		if (phy->index == 0) {
 			val = readl(data->base + REG_PHY_UNK_H3);
 			writel(val & ~1, data->base + REG_PHY_UNK_H3);
 		}
-
-		val = readl(phy->pmu + REG_PMU_UNK_H3);
-		writel(val & ~2, phy->pmu + REG_PMU_UNK_H3);
 	} else {
 		/* Enable USB 45 Ohm resistor calibration */
 		if (phy->index == 0)
@@ -285,16 +296,10 @@
 		sun4i_usb_phy0_update_iscr(_phy, 0, ISCR_DPDM_PULLUP_EN);
 		sun4i_usb_phy0_update_iscr(_phy, 0, ISCR_ID_PULLUP_EN);
 
-		if (data->id_det_gpio) {
-			/* OTG mode, force ISCR and cable state updates */
-			data->id_det = -1;
-			data->vbus_det = -1;
-			queue_delayed_work(system_wq, &data->detect, 0);
-		} else {
-			/* Host only mode */
-			sun4i_usb_phy0_set_id_detect(_phy, 0);
-			sun4i_usb_phy0_set_vbus_detect(_phy, 1);
-		}
+		/* Force ISCR and cable state updates */
+		data->id_det = -1;
+		data->vbus_det = -1;
+		queue_delayed_work(system_wq, &data->detect, 0);
 	}
 
 	return 0;
@@ -319,6 +324,22 @@
 	return 0;
 }
 
+static int sun4i_usb_phy0_get_id_det(struct sun4i_usb_phy_data *data)
+{
+	switch (data->dr_mode) {
+	case USB_DR_MODE_OTG:
+		if (data->id_det_gpio)
+			return gpiod_get_value_cansleep(data->id_det_gpio);
+		else
+			return 1; /* Fallback to peripheral mode */
+	case USB_DR_MODE_HOST:
+		return 0;
+	case USB_DR_MODE_PERIPHERAL:
+	default:
+		return 1;
+	}
+}
+
 static int sun4i_usb_phy0_get_vbus_det(struct sun4i_usb_phy_data *data)
 {
 	if (data->vbus_det_gpio)
@@ -372,8 +393,10 @@
 
 	/* For phy0 only turn on Vbus if we don't have an ext. Vbus */
 	if (phy->index == 0 && sun4i_usb_phy0_have_vbus_det(data) &&
-				data->vbus_det)
+				data->vbus_det) {
+		dev_warn(&_phy->dev, "External vbus detected, not enabling our own vbus\n");
 		return 0;
+	}
 
 	ret = regulator_enable(phy->vbus);
 	if (ret)
@@ -409,6 +432,35 @@
 	return 0;
 }
 
+static int sun4i_usb_phy_set_mode(struct phy *_phy, enum phy_mode mode)
+{
+	struct sun4i_usb_phy *phy = phy_get_drvdata(_phy);
+	struct sun4i_usb_phy_data *data = to_sun4i_usb_phy_data(phy);
+
+	if (phy->index != 0)
+		return -EINVAL;
+
+	switch (mode) {
+	case PHY_MODE_USB_HOST:
+		data->dr_mode = USB_DR_MODE_HOST;
+		break;
+	case PHY_MODE_USB_DEVICE:
+		data->dr_mode = USB_DR_MODE_PERIPHERAL;
+		break;
+	case PHY_MODE_USB_OTG:
+		data->dr_mode = USB_DR_MODE_OTG;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dev_info(&_phy->dev, "Changing dr_mode to %d\n", (int)data->dr_mode);
+	data->force_session_end = true;
+	queue_delayed_work(system_wq, &data->detect, 0);
+
+	return 0;
+}
+
 void sun4i_usb_phy_set_squelch_detect(struct phy *_phy, bool enabled)
 {
 	struct sun4i_usb_phy *phy = phy_get_drvdata(_phy);
@@ -422,6 +474,7 @@
 	.exit		= sun4i_usb_phy_exit,
 	.power_on	= sun4i_usb_phy_power_on,
 	.power_off	= sun4i_usb_phy_power_off,
+	.set_mode	= sun4i_usb_phy_set_mode,
 	.owner		= THIS_MODULE,
 };
 
@@ -430,9 +483,13 @@
 	struct sun4i_usb_phy_data *data =
 		container_of(work, struct sun4i_usb_phy_data, detect.work);
 	struct phy *phy0 = data->phys[0].phy;
-	int id_det, vbus_det, id_notify = 0, vbus_notify = 0;
+	bool force_session_end, id_notify = false, vbus_notify = false;
+	int id_det, vbus_det;
 
-	id_det = gpiod_get_value_cansleep(data->id_det_gpio);
+	if (phy0 == NULL)
+		return;
+
+	id_det = sun4i_usb_phy0_get_id_det(data);
 	vbus_det = sun4i_usb_phy0_get_vbus_det(data);
 
 	mutex_lock(&phy0->mutex);
@@ -442,26 +499,30 @@
 		return;
 	}
 
+	force_session_end = data->force_session_end;
+	data->force_session_end = false;
+
 	if (id_det != data->id_det) {
-		/*
-		 * When a host cable (id == 0) gets plugged in on systems
-		 * without vbus detection report vbus low for long enough for
-		 * the musb-ip to end the current device session.
-		 */
-		if (!sun4i_usb_phy0_have_vbus_det(data) && id_det == 0) {
+		/* id-change, force session end if we've no vbus detection */
+		if (data->dr_mode == USB_DR_MODE_OTG &&
+		    !sun4i_usb_phy0_have_vbus_det(data))
+			force_session_end = true;
+
+		/* When entering host mode (id = 0) force end the session now */
+		if (force_session_end && id_det == 0) {
 			sun4i_usb_phy0_set_vbus_detect(phy0, 0);
 			msleep(200);
 			sun4i_usb_phy0_set_vbus_detect(phy0, 1);
 		}
 		sun4i_usb_phy0_set_id_detect(phy0, id_det);
 		data->id_det = id_det;
-		id_notify = 1;
+		id_notify = true;
 	}
 
 	if (vbus_det != data->vbus_det) {
 		sun4i_usb_phy0_set_vbus_detect(phy0, vbus_det);
 		data->vbus_det = vbus_det;
-		vbus_notify = 1;
+		vbus_notify = true;
 	}
 
 	mutex_unlock(&phy0->mutex);
@@ -469,12 +530,8 @@
 	if (id_notify) {
 		extcon_set_cable_state_(data->extcon, EXTCON_USB_HOST,
 					!id_det);
-		/*
-		 * When a host cable gets unplugged (id == 1) on systems
-		 * without vbus detection report vbus low for long enough to
-		 * the musb-ip to end the current host session.
-		 */
-		if (!sun4i_usb_phy0_have_vbus_det(data) && id_det == 1) {
+		/* When leaving host mode force end the session here */
+		if (force_session_end && id_det == 1) {
 			mutex_lock(&phy0->mutex);
 			sun4i_usb_phy0_set_vbus_detect(phy0, 0);
 			msleep(1000);
@@ -561,7 +618,7 @@
 	if (!data)
 		return -ENOMEM;
 
-	mutex_init(&data->mutex);
+	spin_lock_init(&data->reg_lock);
 	INIT_DELAYED_WORK(&data->detect, sun4i_usb_phy0_id_vbus_det_scan);
 	dev_set_drvdata(dev, data);
 	data->cfg = of_device_get_match_data(dev);
@@ -593,23 +650,16 @@
 			return -EPROBE_DEFER;
 	}
 
-	/* vbus_det without id_det makes no sense, and is not supported */
-	if (sun4i_usb_phy0_have_vbus_det(data) && !data->id_det_gpio) {
-		dev_err(dev, "usb0_id_det missing or invalid\n");
-		return -ENODEV;
-	}
+	data->dr_mode = of_usb_get_dr_mode_by_phy(np, 0);
 
-	if (data->id_det_gpio) {
-		data->extcon = devm_extcon_dev_allocate(dev,
-							sun4i_usb_phy0_cable);
-		if (IS_ERR(data->extcon))
-			return PTR_ERR(data->extcon);
+	data->extcon = devm_extcon_dev_allocate(dev, sun4i_usb_phy0_cable);
+	if (IS_ERR(data->extcon))
+		return PTR_ERR(data->extcon);
 
-		ret = devm_extcon_dev_register(dev, data->extcon);
-		if (ret) {
-			dev_err(dev, "failed to register extcon: %d\n", ret);
-			return ret;
-		}
+	ret = devm_extcon_dev_register(dev, data->extcon);
+	if (ret) {
+		dev_err(dev, "failed to register extcon: %d\n", ret);
+		return ret;
 	}
 
 	for (i = 0; i < data->cfg->num_phys; i++) {
@@ -713,6 +763,7 @@
 	.disc_thresh = 3,
 	.phyctl_offset = REG_PHYCTL_A10,
 	.dedicated_clocks = false,
+	.enable_pmu_unk1 = false,
 };
 
 static const struct sun4i_usb_phy_cfg sun5i_a13_cfg = {
@@ -721,6 +772,7 @@
 	.disc_thresh = 2,
 	.phyctl_offset = REG_PHYCTL_A10,
 	.dedicated_clocks = false,
+	.enable_pmu_unk1 = false,
 };
 
 static const struct sun4i_usb_phy_cfg sun6i_a31_cfg = {
@@ -729,6 +781,7 @@
 	.disc_thresh = 3,
 	.phyctl_offset = REG_PHYCTL_A10,
 	.dedicated_clocks = true,
+	.enable_pmu_unk1 = false,
 };
 
 static const struct sun4i_usb_phy_cfg sun7i_a20_cfg = {
@@ -737,6 +790,7 @@
 	.disc_thresh = 2,
 	.phyctl_offset = REG_PHYCTL_A10,
 	.dedicated_clocks = false,
+	.enable_pmu_unk1 = false,
 };
 
 static const struct sun4i_usb_phy_cfg sun8i_a23_cfg = {
@@ -745,6 +799,7 @@
 	.disc_thresh = 3,
 	.phyctl_offset = REG_PHYCTL_A10,
 	.dedicated_clocks = true,
+	.enable_pmu_unk1 = false,
 };
 
 static const struct sun4i_usb_phy_cfg sun8i_a33_cfg = {
@@ -753,6 +808,7 @@
 	.disc_thresh = 3,
 	.phyctl_offset = REG_PHYCTL_A33,
 	.dedicated_clocks = true,
+	.enable_pmu_unk1 = false,
 };
 
 static const struct sun4i_usb_phy_cfg sun8i_h3_cfg = {
@@ -760,6 +816,16 @@
 	.type = sun8i_h3_phy,
 	.disc_thresh = 3,
 	.dedicated_clocks = true,
+	.enable_pmu_unk1 = true,
+};
+
+static const struct sun4i_usb_phy_cfg sun50i_a64_cfg = {
+	.num_phys = 2,
+	.type = sun50i_a64_phy,
+	.disc_thresh = 3,
+	.phyctl_offset = REG_PHYCTL_A33,
+	.dedicated_clocks = true,
+	.enable_pmu_unk1 = true,
 };
 
 static const struct of_device_id sun4i_usb_phy_of_match[] = {
@@ -770,6 +836,8 @@
 	{ .compatible = "allwinner,sun8i-a23-usb-phy", .data = &sun8i_a23_cfg },
 	{ .compatible = "allwinner,sun8i-a33-usb-phy", .data = &sun8i_a33_cfg },
 	{ .compatible = "allwinner,sun8i-h3-usb-phy", .data = &sun8i_h3_cfg },
+	{ .compatible = "allwinner,sun50i-a64-usb-phy",
+	  .data = &sun50i_a64_cfg},
 	{ },
 };
 MODULE_DEVICE_TABLE(of, sun4i_usb_phy_of_match);
diff --git a/drivers/phy/phy-sun9i-usb.c b/drivers/phy/phy-sun9i-usb.c
index ac4f31a..28fce4b 100644
--- a/drivers/phy/phy-sun9i-usb.c
+++ b/drivers/phy/phy-sun9i-usb.c
@@ -141,9 +141,9 @@
 		}
 
 		phy->hsic_clk = devm_clk_get(dev, "hsic_12M");
-		if (IS_ERR(phy->clk)) {
+		if (IS_ERR(phy->hsic_clk)) {
 			dev_err(dev, "failed to get hsic_12M clock\n");
-			return PTR_ERR(phy->clk);
+			return PTR_ERR(phy->hsic_clk);
 		}
 
 		phy->reset = devm_reset_control_get(dev, "hsic");
diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c
index d9b10a3..87e6334 100644
--- a/drivers/phy/phy-twl4030-usb.c
+++ b/drivers/phy/phy-twl4030-usb.c
@@ -172,6 +172,7 @@
 	int			irq;
 	enum musb_vbus_id_status linkstat;
 	bool			vbus_supplied;
+	bool			musb_mailbox_pending;
 
 	struct delayed_work	id_workaround_work;
 };
@@ -439,6 +440,17 @@
 			  (PHY_CLK_CTRL_CLOCKGATING_EN |
 			   PHY_CLK_CTRL_CLK32K_EN));
 
+	twl4030_i2c_access(twl, 1);
+	twl4030_usb_set_mode(twl, twl->usb_mode);
+	if (twl->usb_mode == T2_USB_MODE_ULPI)
+		twl4030_i2c_access(twl, 0);
+	/*
+	 * According to the TPS65950 TRM, there has to be at least 50ms
+	 * delay between setting POWER_CTRL_OTG_ENAB and enabling charging
+	 * so wait here so that a fully enabled phy can be expected after
+	 * resume
+	 */
+	msleep(50);
 	return 0;
 }
 
@@ -459,11 +471,6 @@
 
 	dev_dbg(twl->dev, "%s\n", __func__);
 	pm_runtime_get_sync(twl->dev);
-	twl4030_i2c_access(twl, 1);
-	twl4030_usb_set_mode(twl, twl->usb_mode);
-	if (twl->usb_mode == T2_USB_MODE_ULPI)
-		twl4030_i2c_access(twl, 0);
-	twl->linkstat = MUSB_UNKNOWN;
 	schedule_delayed_work(&twl->id_workaround_work, HZ);
 
 	return 0;
@@ -569,9 +576,12 @@
 			pm_runtime_mark_last_busy(twl->dev);
 			pm_runtime_put_autosuspend(twl->dev);
 		}
+		twl->musb_mailbox_pending = true;
+	}
+	if (twl->musb_mailbox_pending) {
 		err = musb_mailbox(status);
-		if (err)
-			twl->linkstat = MUSB_UNKNOWN;
+		if (!err)
+			twl->musb_mailbox_pending = false;
 	}
 
 	/* don't schedule during sleep - irq works right then */
@@ -676,6 +686,7 @@
 	twl->irq		= platform_get_irq(pdev, 0);
 	twl->vbus_supplied	= false;
 	twl->linkstat		= MUSB_UNKNOWN;
+	twl->musb_mailbox_pending = false;
 
 	twl->phy.dev		= twl->dev;
 	twl->phy.label		= "twl4030";
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index ec83dfd..873424a 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/phy/phy.h>
+#include <linux/phy/tegra/xusb.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
@@ -101,7 +102,8 @@
 	return of_find_node_by_name(np, pad->soc->lanes[index].name);
 }
 
-int tegra_xusb_lane_lookup_function(struct tegra_xusb_lane *lane,
+static int
+tegra_xusb_lane_lookup_function(struct tegra_xusb_lane *lane,
 				    const char *function)
 {
 	unsigned int i;
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 5749a4ee..0fe8fad 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -1539,12 +1539,11 @@
 		offset += range->npins;
 	}
 
-	/* Mask and clear all interrupts */
-	chv_writel(0, pctrl->regs + CHV_INTMASK);
+	/* Clear all interrupts */
 	chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
 
 	ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0,
-				   handle_simple_irq, IRQ_TYPE_NONE);
+				   handle_bad_irq, IRQ_TYPE_NONE);
 	if (ret) {
 		dev_err(pctrl->dev, "failed to add IRQ chip\n");
 		goto fail;
diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c
index eb4990f..7fb7656 100644
--- a/drivers/pinctrl/intel/pinctrl-merrifield.c
+++ b/drivers/pinctrl/intel/pinctrl-merrifield.c
@@ -11,6 +11,7 @@
 
 #include <linux/bitops.h>
 #include <linux/err.h>
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pinctrl/pinconf.h>
diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c
index 11623c6..44e69c9 100644
--- a/drivers/pinctrl/meson/pinctrl-meson.c
+++ b/drivers/pinctrl/meson/pinctrl-meson.c
@@ -727,13 +727,7 @@
 		return PTR_ERR(pc->pcdev);
 	}
 
-	ret = meson_gpiolib_register(pc);
-	if (ret) {
-		pinctrl_unregister(pc->pcdev);
-		return ret;
-	}
-
-	return 0;
+	return meson_gpiolib_register(pc);
 }
 
 static struct platform_driver meson_pinctrl_driver = {
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 634b4d3..b3e7723 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -43,17 +43,6 @@
 
 	spin_lock_irqsave(&gpio_dev->lock, flags);
 	pin_reg = readl(gpio_dev->base + offset * 4);
-	/*
-	 * Suppose BIOS or Bootloader sets specific debounce for the
-	 * GPIO. if not, set debounce to be  2.75ms and remove glitch.
-	*/
-	if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
-		pin_reg |= 0xf;
-		pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
-		pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF;
-		pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
-	}
-
 	pin_reg &= ~BIT(OUTPUT_ENABLE_OFF);
 	writel(pin_reg, gpio_dev->base + offset * 4);
 	spin_unlock_irqrestore(&gpio_dev->lock, flags);
@@ -326,15 +315,6 @@
 
 	spin_lock_irqsave(&gpio_dev->lock, flags);
 	pin_reg = readl(gpio_dev->base + (d->hwirq)*4);
-	/*
-		Suppose BIOS or Bootloader sets specific debounce for the
-		GPIO. if not, set debounce to be  2.75ms.
-	*/
-	if ((pin_reg & DB_TMR_OUT_MASK) == 0) {
-		pin_reg |= 0xf;
-		pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF);
-		pin_reg &= ~BIT(DB_TMR_LARGE_OFF);
-	}
 	pin_reg |= BIT(INTERRUPT_ENABLE_OFF);
 	pin_reg |= BIT(INTERRUPT_MASK_OFF);
 	writel(pin_reg, gpio_dev->base + (d->hwirq)*4);
diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c
index c6d410e..55375b1 100644
--- a/drivers/pinctrl/pinctrl-pistachio.c
+++ b/drivers/pinctrl/pinctrl-pistachio.c
@@ -809,17 +809,17 @@
 			   PADS_FUNCTION_SELECT2, 12, 0x3),
 	MFIO_MUX_PIN_GROUP(83, MIPS_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
 			   PADS_FUNCTION_SELECT2, 14, 0x3),
-	MFIO_MUX_PIN_GROUP(84, SYS_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
+	MFIO_MUX_PIN_GROUP(84, AUDIO_PLL_LOCK, MIPS_TRACE_DATA, USB_DEBUG,
 			   PADS_FUNCTION_SELECT2, 16, 0x3),
-	MFIO_MUX_PIN_GROUP(85, WIFI_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
+	MFIO_MUX_PIN_GROUP(85, RPU_V_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
 			   PADS_FUNCTION_SELECT2, 18, 0x3),
-	MFIO_MUX_PIN_GROUP(86, BT_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
+	MFIO_MUX_PIN_GROUP(86, RPU_L_PLL_LOCK, MIPS_TRACE_DATA, SDHOST_DEBUG,
 			   PADS_FUNCTION_SELECT2, 20, 0x3),
-	MFIO_MUX_PIN_GROUP(87, RPU_V_PLL_LOCK, DREQ2, SOCIF_DEBUG,
+	MFIO_MUX_PIN_GROUP(87, SYS_PLL_LOCK, DREQ2, SOCIF_DEBUG,
 			   PADS_FUNCTION_SELECT2, 22, 0x3),
-	MFIO_MUX_PIN_GROUP(88, RPU_L_PLL_LOCK, DREQ3, SOCIF_DEBUG,
+	MFIO_MUX_PIN_GROUP(88, WIFI_PLL_LOCK, DREQ3, SOCIF_DEBUG,
 			   PADS_FUNCTION_SELECT2, 24, 0x3),
-	MFIO_MUX_PIN_GROUP(89, AUDIO_PLL_LOCK, DREQ4, DREQ5,
+	MFIO_MUX_PIN_GROUP(89, BT_PLL_LOCK, DREQ4, DREQ5,
 			   PADS_FUNCTION_SELECT2, 26, 0x3),
 	PIN_GROUP(TCK, "tck"),
 	PIN_GROUP(TRSTN, "trstn"),
@@ -1432,7 +1432,6 @@
 {
 	struct pistachio_pinctrl *pctl;
 	struct resource *res;
-	int ret;
 
 	pctl = devm_kzalloc(&pdev->dev, sizeof(*pctl), GFP_KERNEL);
 	if (!pctl)
@@ -1464,13 +1463,7 @@
 		return PTR_ERR(pctl->pctldev);
 	}
 
-	ret = pistachio_gpio_register(pctl);
-	if (ret < 0) {
-		pinctrl_unregister(pctl->pctldev);
-		return ret;
-	}
-
-	return 0;
+	return pistachio_gpio_register(pctl);
 }
 
 static struct platform_driver pistachio_pinctrl_driver = {
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c
index ce483b03..f9d661e5 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a23.c
@@ -485,12 +485,12 @@
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
-		  SUNXI_FUNCTION(0x2, "uart2"),		/* RTS */
+		  SUNXI_FUNCTION(0x2, "uart1"),		/* RTS */
 		  SUNXI_FUNCTION_IRQ_BANK(0x4, 2, 8)),	/* PG_EINT8 */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
-		  SUNXI_FUNCTION(0x2, "uart2"),		/* CTS */
+		  SUNXI_FUNCTION(0x2, "uart1"),		/* CTS */
 		  SUNXI_FUNCTION_IRQ_BANK(0x4, 2, 9)),	/* PG_EINT9 */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
diff --git a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
index 3040abe..3131cac 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sun8i-a33.c
@@ -407,12 +407,12 @@
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 8),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
-		  SUNXI_FUNCTION(0x2, "uart2"),		/* RTS */
+		  SUNXI_FUNCTION(0x2, "uart1"),		/* RTS */
 		  SUNXI_FUNCTION_IRQ_BANK(0x4, 1, 8)),	/* PG_EINT8 */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 9),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
 		  SUNXI_FUNCTION(0x1, "gpio_out"),
-		  SUNXI_FUNCTION(0x2, "uart2"),		/* CTS */
+		  SUNXI_FUNCTION(0x2, "uart1"),		/* CTS */
 		  SUNXI_FUNCTION_IRQ_BANK(0x4, 1, 9)),	/* PG_EINT9 */
 	SUNXI_PIN(SUNXI_PINCTRL_PIN(G, 10),
 		  SUNXI_FUNCTION(0x0, "gpio_in"),
diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c
index f99b183..374a802 100644
--- a/drivers/platform/olpc/olpc-ec.c
+++ b/drivers/platform/olpc/olpc-ec.c
@@ -1,6 +1,8 @@
 /*
  * Generic driver for the OLPC Embedded Controller.
  *
+ * Author: Andres Salomon <dilinger@queued.net>
+ *
  * Copyright (C) 2011-2012 One Laptop per Child Foundation.
  *
  * Licensed under the GPL v2 or later.
@@ -12,7 +14,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
-#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/list.h>
 #include <linux/olpc-ec.h>
 #include <asm/olpc.h>
@@ -326,8 +328,4 @@
 {
 	return platform_driver_register(&olpc_ec_plat_driver);
 }
-
 arch_initcall(olpc_ec_init_module);
-
-MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
-MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index d2bc092..da2fe18 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c
@@ -110,8 +110,8 @@
 	/* BIOS error detected */
 	{ KE_IGNORE, 0xe00d, { KEY_RESERVED } },
 
-	/* Unknown, defined in ACPI DSDT */
-	/* { KE_IGNORE, 0xe00e, { KEY_RESERVED } }, */
+	/* Battery was removed or inserted */
+	{ KE_IGNORE, 0xe00e, { KEY_RESERVED } },
 
 	/* Wifi Catcher */
 	{ KE_KEY,    0xe011, { KEY_PROG2 } },
diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c
index b86e1bc..a511d51 100644
--- a/drivers/platform/x86/intel_pmc_ipc.c
+++ b/drivers/platform/x86/intel_pmc_ipc.c
@@ -651,11 +651,15 @@
 {
 	int ret;
 
-	ret = ipc_create_tco_device();
-	if (ret) {
-		dev_err(ipcdev.dev, "Failed to add tco platform device\n");
-		return ret;
+	/* If we have ACPI based watchdog use that instead */
+	if (!acpi_has_watchdog()) {
+		ret = ipc_create_tco_device();
+		if (ret) {
+			dev_err(ipcdev.dev, "Failed to add tco platform device\n");
+			return ret;
+		}
 	}
+
 	ret = ipc_create_punit_device();
 	if (ret) {
 		dev_err(ipcdev.dev, "Failed to add punit platform device\n");
diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c
index 63b371d..91ae585 100644
--- a/drivers/platform/x86/intel_pmic_gpio.c
+++ b/drivers/platform/x86/intel_pmic_gpio.c
@@ -1,6 +1,8 @@
 /* Moorestown PMIC GPIO (access through IPC) driver
  * Copyright (c) 2008 - 2009, Intel Corporation.
  *
+ * Author: Alek Du <alek.du@intel.com>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
@@ -21,7 +23,6 @@
 
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -322,9 +323,4 @@
 {
 	return platform_driver_register(&platform_pmic_gpio_driver);
 }
-
 subsys_initcall(platform_pmic_gpio_init);
-
-MODULE_AUTHOR("Alek Du <alek.du@intel.com>");
-MODULE_DESCRIPTION("Intel Moorestown PMIC GPIO driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/pnp/isapnp/core.c b/drivers/pnp/isapnp/core.c
index cf88f9b6..d8bf5a1 100644
--- a/drivers/pnp/isapnp/core.c
+++ b/drivers/pnp/isapnp/core.c
@@ -34,7 +34,7 @@
  *  2003-08-11	Resource Management Updates - Adam Belay <ambx1@neo.rr.com>
  */
 
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
@@ -54,8 +54,6 @@
 static int isapnp_reset = 1;	/* reset all PnP cards (deactivate) */
 static int isapnp_verbose = 1;	/* verbose mode */
 
-MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
-MODULE_DESCRIPTION("Generic ISA Plug & Play support");
 module_param(isapnp_disable, int, 0);
 MODULE_PARM_DESC(isapnp_disable, "ISA Plug & Play disable");
 module_param(isapnp_rdp, int, 0);
@@ -64,7 +62,6 @@
 MODULE_PARM_DESC(isapnp_reset, "ISA Plug & Play reset all cards");
 module_param(isapnp_verbose, int, 0);
 MODULE_PARM_DESC(isapnp_verbose, "ISA Plug & Play verbose mode");
-MODULE_LICENSE("GPL");
 
 #define _PIDXR		0x279
 #define _PNPWRP		0xa79
diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c
index db9973b..fa0f19b 100644
--- a/drivers/power/avs/smartreflex.c
+++ b/drivers/power/avs/smartreflex.c
@@ -136,7 +136,7 @@
 
 	if (IS_ERR(fck)) {
 		dev_err(&sr->pdev->dev, "%s: unable to get fck for device %s\n",
-				__func__, dev_name(&sr->pdev->dev));
+			__func__, dev_name(&sr->pdev->dev));
 		return;
 	}
 
@@ -170,8 +170,8 @@
 {
 	if (!sr_class || !(sr_class->enable) || !(sr_class->configure)) {
 		dev_warn(&sr->pdev->dev,
-			"%s: smartreflex class driver not registered\n",
-			__func__);
+			 "%s: smartreflex class driver not registered\n",
+			 __func__);
 		return;
 	}
 
@@ -183,8 +183,8 @@
 {
 	if (!sr_class || !(sr_class->disable)) {
 		dev_warn(&sr->pdev->dev,
-			"%s: smartreflex class driver not registered\n",
-			__func__);
+			 "%s: smartreflex class driver not registered\n",
+			 __func__);
 		return;
 	}
 
@@ -225,9 +225,8 @@
 
 error:
 	list_del(&sr_info->node);
-	dev_err(&sr_info->pdev->dev, "%s: ERROR in registering"
-		"interrupt handler. Smartreflex will"
-		"not function as desired\n", __func__);
+	dev_err(&sr_info->pdev->dev, "%s: ERROR in registering interrupt handler. Smartreflex will not function as desired\n",
+		__func__);
 
 	return ret;
 }
@@ -263,7 +262,7 @@
 
 	if (timeout >= SR_DISABLE_TIMEOUT)
 		dev_warn(&sr->pdev->dev, "%s: Smartreflex disable timedout\n",
-			__func__);
+			 __func__);
 
 	/* Disable MCUDisableAcknowledge interrupt & clear pending interrupt */
 	sr_modify_reg(sr, ERRCONFIG_V1, ERRCONFIG_MCUDISACKINTEN,
@@ -308,7 +307,7 @@
 
 	if (timeout >= SR_DISABLE_TIMEOUT)
 		dev_warn(&sr->pdev->dev, "%s: Smartreflex disable timedout\n",
-			__func__);
+			 __func__);
 
 	/* Disable MCUDisableAcknowledge interrupt & clear pending interrupt */
 	sr_write_reg(sr, IRQENABLE_CLR, IRQENABLE_MCUDISABLEACKINT);
@@ -322,7 +321,7 @@
 
 	if (!sr->nvalue_table) {
 		dev_warn(&sr->pdev->dev, "%s: Missing ntarget value table\n",
-			__func__);
+			 __func__);
 		return NULL;
 	}
 
@@ -356,8 +355,8 @@
 	u8 senp_shift, senn_shift;
 
 	if (!sr) {
-		pr_warn("%s: NULL omap_sr from %pF\n", __func__,
-			(void *)_RET_IP_);
+		pr_warn("%s: NULL omap_sr from %pF\n",
+			__func__, (void *)_RET_IP_);
 		return -EINVAL;
 	}
 
@@ -387,8 +386,8 @@
 		vpboundint_st = ERRCONFIG_VPBOUNDINTST_V2;
 		break;
 	default:
-		dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex"
-			"module without specifying the ip\n", __func__);
+		dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n",
+			__func__);
 		return -EINVAL;
 	}
 
@@ -423,8 +422,8 @@
 	u32 vpboundint_en, vpboundint_st;
 
 	if (!sr) {
-		pr_warn("%s: NULL omap_sr from %pF\n", __func__,
-			(void *)_RET_IP_);
+		pr_warn("%s: NULL omap_sr from %pF\n",
+			__func__, (void *)_RET_IP_);
 		return -EINVAL;
 	}
 
@@ -440,8 +439,8 @@
 		vpboundint_st = ERRCONFIG_VPBOUNDINTST_V2;
 		break;
 	default:
-		dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex"
-			"module without specifying the ip\n", __func__);
+		dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n",
+			__func__);
 		return -EINVAL;
 	}
 
@@ -478,8 +477,8 @@
 	u8 senp_shift, senn_shift;
 
 	if (!sr) {
-		pr_warn("%s: NULL omap_sr from %pF\n", __func__,
-			(void *)_RET_IP_);
+		pr_warn("%s: NULL omap_sr from %pF\n",
+			__func__, (void *)_RET_IP_);
 		return -EINVAL;
 	}
 
@@ -504,8 +503,8 @@
 		senp_shift = SRCONFIG_SENPENABLE_V2_SHIFT;
 		break;
 	default:
-		dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex"
-			"module without specifying the ip\n", __func__);
+		dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n",
+			__func__);
 		return -EINVAL;
 	}
 
@@ -537,8 +536,8 @@
 			IRQENABLE_MCUBOUNDSINT | IRQENABLE_MCUDISABLEACKINT);
 		break;
 	default:
-		dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex"
-			"module without specifying the ip\n", __func__);
+		dev_err(&sr->pdev->dev, "%s: Trying to Configure smartreflex module without specifying the ip\n",
+			__func__);
 		return -EINVAL;
 	}
 
@@ -563,16 +562,16 @@
 	int ret;
 
 	if (!sr) {
-		pr_warn("%s: NULL omap_sr from %pF\n", __func__,
-			(void *)_RET_IP_);
+		pr_warn("%s: NULL omap_sr from %pF\n",
+			__func__, (void *)_RET_IP_);
 		return -EINVAL;
 	}
 
 	volt_data = omap_voltage_get_voltdata(sr->voltdm, volt);
 
 	if (IS_ERR(volt_data)) {
-		dev_warn(&sr->pdev->dev, "%s: Unable to get voltage table"
-			"for nominal voltage %ld\n", __func__, volt);
+		dev_warn(&sr->pdev->dev, "%s: Unable to get voltage table for nominal voltage %ld\n",
+			 __func__, volt);
 		return PTR_ERR(volt_data);
 	}
 
@@ -615,8 +614,8 @@
 void sr_disable(struct omap_sr *sr)
 {
 	if (!sr) {
-		pr_warn("%s: NULL omap_sr from %pF\n", __func__,
-			(void *)_RET_IP_);
+		pr_warn("%s: NULL omap_sr from %pF\n",
+			__func__, (void *)_RET_IP_);
 		return;
 	}
 
@@ -658,13 +657,13 @@
 	struct omap_sr *sr_info;
 
 	if (!class_data) {
-		pr_warning("%s:, Smartreflex class data passed is NULL\n",
+		pr_warn("%s:, Smartreflex class data passed is NULL\n",
 			__func__);
 		return -EINVAL;
 	}
 
 	if (sr_class) {
-		pr_warning("%s: Smartreflex class driver already registered\n",
+		pr_warn("%s: Smartreflex class driver already registered\n",
 			__func__);
 		return -EBUSY;
 	}
@@ -696,7 +695,7 @@
 	struct omap_sr *sr = _sr_lookup(voltdm);
 
 	if (IS_ERR(sr)) {
-		pr_warning("%s: omap_sr struct for voltdm not found\n",	__func__);
+		pr_warn("%s: omap_sr struct for voltdm not found\n", __func__);
 		return;
 	}
 
@@ -704,8 +703,8 @@
 		return;
 
 	if (!sr_class || !(sr_class->enable) || !(sr_class->configure)) {
-		dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not"
-			"registered\n", __func__);
+		dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n",
+			 __func__);
 		return;
 	}
 
@@ -728,7 +727,7 @@
 	struct omap_sr *sr = _sr_lookup(voltdm);
 
 	if (IS_ERR(sr)) {
-		pr_warning("%s: omap_sr struct for voltdm not found\n",	__func__);
+		pr_warn("%s: omap_sr struct for voltdm not found\n", __func__);
 		return;
 	}
 
@@ -736,8 +735,8 @@
 		return;
 
 	if (!sr_class || !(sr_class->disable)) {
-		dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not"
-			"registered\n", __func__);
+		dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n",
+			 __func__);
 		return;
 	}
 
@@ -760,7 +759,7 @@
 	struct omap_sr *sr = _sr_lookup(voltdm);
 
 	if (IS_ERR(sr)) {
-		pr_warning("%s: omap_sr struct for voltdm not found\n",	__func__);
+		pr_warn("%s: omap_sr struct for voltdm not found\n", __func__);
 		return;
 	}
 
@@ -768,8 +767,8 @@
 		return;
 
 	if (!sr_class || !(sr_class->disable)) {
-		dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not"
-			"registered\n", __func__);
+		dev_warn(&sr->pdev->dev, "%s: smartreflex class driver not registered\n",
+			 __func__);
 		return;
 	}
 
@@ -787,8 +786,8 @@
 void omap_sr_register_pmic(struct omap_sr_pmic_data *pmic_data)
 {
 	if (!pmic_data) {
-		pr_warning("%s: Trying to register NULL PMIC data structure"
-			"with smartreflex\n", __func__);
+		pr_warn("%s: Trying to register NULL PMIC data structure with smartreflex\n",
+			__func__);
 		return;
 	}
 
@@ -801,7 +800,7 @@
 	struct omap_sr *sr_info = data;
 
 	if (!sr_info) {
-		pr_warning("%s: omap_sr struct not found\n", __func__);
+		pr_warn("%s: omap_sr struct not found\n", __func__);
 		return -EINVAL;
 	}
 
@@ -815,13 +814,13 @@
 	struct omap_sr *sr_info = data;
 
 	if (!sr_info) {
-		pr_warning("%s: omap_sr struct not found\n", __func__);
+		pr_warn("%s: omap_sr struct not found\n", __func__);
 		return -EINVAL;
 	}
 
 	/* Sanity check */
 	if (val > 1) {
-		pr_warning("%s: Invalid argument %lld\n", __func__, val);
+		pr_warn("%s: Invalid argument %lld\n", __func__, val);
 		return -EINVAL;
 	}
 
@@ -848,19 +847,13 @@
 	int i, ret = 0;
 
 	sr_info = devm_kzalloc(&pdev->dev, sizeof(struct omap_sr), GFP_KERNEL);
-	if (!sr_info) {
-		dev_err(&pdev->dev, "%s: unable to allocate sr_info\n",
-			__func__);
+	if (!sr_info)
 		return -ENOMEM;
-	}
 
 	sr_info->name = devm_kzalloc(&pdev->dev,
 				     SMARTREFLEX_NAME_LEN, GFP_KERNEL);
-	if (!sr_info->name) {
-		dev_err(&pdev->dev, "%s: unable to allocate SR instance name\n",
-			__func__);
+	if (!sr_info->name)
 		return -ENOMEM;
-	}
 
 	platform_set_drvdata(pdev, sr_info);
 
@@ -912,7 +905,7 @@
 	if (sr_class) {
 		ret = sr_late_init(sr_info);
 		if (ret) {
-			pr_warning("%s: Error in SR late init\n", __func__);
+			pr_warn("%s: Error in SR late init\n", __func__);
 			goto err_list_del;
 		}
 	}
@@ -923,7 +916,7 @@
 		if (IS_ERR_OR_NULL(sr_dbg_dir)) {
 			ret = PTR_ERR(sr_dbg_dir);
 			pr_err("%s:sr debugfs dir creation failed(%d)\n",
-				__func__, ret);
+			       __func__, ret);
 			goto err_list_del;
 		}
 	}
@@ -945,8 +938,8 @@
 
 	nvalue_dir = debugfs_create_dir("nvalue", sr_info->dbg_dir);
 	if (IS_ERR_OR_NULL(nvalue_dir)) {
-		dev_err(&pdev->dev, "%s: Unable to create debugfs directory"
-			"for n-values\n", __func__);
+		dev_err(&pdev->dev, "%s: Unable to create debugfs directory for n-values\n",
+			__func__);
 		ret = PTR_ERR(nvalue_dir);
 		goto err_debugfs;
 	}
@@ -1053,12 +1046,12 @@
 	if (sr_pmic_data && sr_pmic_data->sr_pmic_init)
 		sr_pmic_data->sr_pmic_init();
 	else
-		pr_warning("%s: No PMIC hook to init smartreflex\n", __func__);
+		pr_warn("%s: No PMIC hook to init smartreflex\n", __func__);
 
 	ret = platform_driver_probe(&smartreflex_driver, omap_sr_probe);
 	if (ret) {
 		pr_err("%s: platform driver register failed for SR\n",
-			__func__);
+		       __func__);
 		return ret;
 	}
 
diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c
index 9c65f13..da7a75f 100644
--- a/drivers/power/max17042_battery.c
+++ b/drivers/power/max17042_battery.c
@@ -457,13 +457,16 @@
 }
 
 static inline void max17042_read_model_data(struct max17042_chip *chip,
-					u8 addr, u32 *data, int size)
+					u8 addr, u16 *data, int size)
 {
 	struct regmap *map = chip->regmap;
 	int i;
+	u32 tmp;
 
-	for (i = 0; i < size; i++)
-		regmap_read(map, addr + i, &data[i]);
+	for (i = 0; i < size; i++) {
+		regmap_read(map, addr + i, &tmp);
+		data[i] = (u16)tmp;
+	}
 }
 
 static inline int max17042_model_data_compare(struct max17042_chip *chip,
@@ -486,7 +489,7 @@
 {
 	int ret;
 	int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
-	u32 *temp_data;
+	u16 *temp_data;
 
 	temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
 	if (!temp_data)
@@ -501,7 +504,7 @@
 	ret = max17042_model_data_compare(
 		chip,
 		chip->pdata->config_data->cell_char_tbl,
-		(u16 *)temp_data,
+		temp_data,
 		table_size);
 
 	max10742_lock_model(chip);
@@ -514,7 +517,7 @@
 {
 	int i;
 	int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl);
-	u32 *temp_data;
+	u16 *temp_data;
 	int ret = 0;
 
 	temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL);
diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig
index 3bfac53..c74c3f6 100644
--- a/drivers/power/reset/Kconfig
+++ b/drivers/power/reset/Kconfig
@@ -200,8 +200,8 @@
 config SYSCON_REBOOT_MODE
 	tristate "Generic SYSCON regmap reboot mode driver"
 	depends on OF
+	depends on MFD_SYSCON
 	select REBOOT_MODE
-	select MFD_SYSCON
 	help
 	  Say y here will enable reboot mode driver. This will
 	  get reboot mode arguments and store it in SYSCON mapped
diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c
index 9ab7f56..f69387e 100644
--- a/drivers/power/reset/hisi-reboot.c
+++ b/drivers/power/reset/hisi-reboot.c
@@ -53,13 +53,16 @@
 
 	if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) {
 		pr_err("failed to find reboot-offset property\n");
+		iounmap(base);
 		return -EINVAL;
 	}
 
 	err = register_restart_handler(&hisi_restart_nb);
-	if (err)
+	if (err) {
 		dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n",
 			err);
+		iounmap(base);
+	}
 
 	return err;
 }
diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c
index 73dfae4..4c56e54 100644
--- a/drivers/power/tps65217_charger.c
+++ b/drivers/power/tps65217_charger.c
@@ -206,6 +206,7 @@
 	if (!charger)
 		return -ENOMEM;
 
+	platform_set_drvdata(pdev, charger);
 	charger->tps = tps;
 	charger->dev = &pdev->dev;
 
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index fbab29d..243b233 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1154,8 +1154,8 @@
 
 	RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1,	rapl_defaults_byt),
 	RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT,	rapl_defaults_cht),
-	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD1,	rapl_defaults_tng),
-	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD2,	rapl_defaults_ann),
+	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD,	rapl_defaults_tng),
+	RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD,	rapl_defaults_ann),
 	RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT,	rapl_defaults_core),
 	RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON,	rapl_defaults_core),
 
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index 2e481b9..86280b7 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -263,6 +263,7 @@
 no_device:
 	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
+	ida_simple_remove(&ptp_clocks_map, index);
 no_slot:
 	kfree(ptp);
 no_memory:
diff --git a/drivers/ptp/ptp_ixp46x.c b/drivers/ptp/ptp_ixp46x.c
index ee4f183..344a3ba 100644
--- a/drivers/ptp/ptp_ixp46x.c
+++ b/drivers/ptp/ptp_ixp46x.c
@@ -268,18 +268,19 @@
 		return err;
 
 	irq = gpio_to_irq(gpio);
+	if (irq < 0)
+		return irq;
 
-	if (NO_IRQ == irq)
-		return NO_IRQ;
-
-	if (irq_set_irq_type(irq, IRQF_TRIGGER_FALLING)) {
+	err = irq_set_irq_type(irq, IRQF_TRIGGER_FALLING);
+	if (err) {
 		pr_err("cannot set trigger type for irq %d\n", irq);
-		return NO_IRQ;
+		return err;
 	}
 
-	if (request_irq(irq, isr, 0, DRIVER, &ixp_clock)) {
+	err = request_irq(irq, isr, 0, DRIVER, &ixp_clock);
+	if (err) {
 		pr_err("request_irq failed for irq %d\n", irq);
-		return NO_IRQ;
+		return err;
 	}
 
 	return irq;
diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c
index 32f0f01..9d19b9a 100644
--- a/drivers/rapidio/devices/tsi721.c
+++ b/drivers/rapidio/devices/tsi721.c
@@ -1161,7 +1161,7 @@
 		} else if (ibw_start < (ib_win->rstart + ib_win->size) &&
 			   (ibw_start + ibw_size) > ib_win->rstart) {
 			/* Return error if address translation involved */
-			if (direct && ib_win->xlat) {
+			if (!direct || ib_win->xlat) {
 				ret = -EFAULT;
 				break;
 			}
diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c
index cecc15a..cebc296 100644
--- a/drivers/rapidio/rio_cm.c
+++ b/drivers/rapidio/rio_cm.c
@@ -1080,8 +1080,8 @@
 static struct rio_channel *riocm_ch_accept(u16 ch_id, u16 *new_ch_id,
 					   long timeout)
 {
-	struct rio_channel *ch = NULL;
-	struct rio_channel *new_ch = NULL;
+	struct rio_channel *ch;
+	struct rio_channel *new_ch;
 	struct conn_req *req;
 	struct cm_peer *peer;
 	int found = 0;
@@ -1155,6 +1155,7 @@
 
 	spin_unlock_bh(&ch->lock);
 	riocm_put_channel(ch);
+	ch = NULL;
 	kfree(req);
 
 	down_read(&rdev_sem);
@@ -1172,7 +1173,7 @@
 	if (!found) {
 		/* If peer device object not found, simply ignore the request */
 		err = -ENODEV;
-		goto err_nodev;
+		goto err_put_new_ch;
 	}
 
 	new_ch->rdev = peer->rdev;
@@ -1184,15 +1185,16 @@
 
 	*new_ch_id = new_ch->id;
 	return new_ch;
+
+err_put_new_ch:
+	spin_lock_bh(&idr_lock);
+	idr_remove(&ch_idr, new_ch->id);
+	spin_unlock_bh(&idr_lock);
+	riocm_put_channel(new_ch);
+
 err_put:
-	riocm_put_channel(ch);
-err_nodev:
-	if (new_ch) {
-		spin_lock_bh(&idr_lock);
-		idr_remove(&ch_idr, new_ch->id);
-		spin_unlock_bh(&idr_lock);
-		riocm_put_channel(new_ch);
-	}
+	if (ch)
+		riocm_put_channel(ch);
 	*new_ch_id = 0;
 	return ERR_PTR(err);
 }
@@ -2245,17 +2247,30 @@
 {
 	struct rio_channel *ch;
 	unsigned int i;
+	LIST_HEAD(list);
 
 	riocm_debug(EXIT, ".");
 
+	/*
+	 * If there are any channels left in connected state send
+	 * close notification to the connection partner.
+	 * First build a list of channels that require a closing
+	 * notification because function riocm_send_close() should
+	 * be called outside of spinlock protected code.
+	 */
 	spin_lock_bh(&idr_lock);
 	idr_for_each_entry(&ch_idr, ch, i) {
-		riocm_debug(EXIT, "close ch %d", ch->id);
-		if (ch->state == RIO_CM_CONNECTED)
-			riocm_send_close(ch);
+		if (ch->state == RIO_CM_CONNECTED) {
+			riocm_debug(EXIT, "close ch %d", ch->id);
+			idr_remove(&ch_idr, ch->id);
+			list_add(&ch->ch_node, &list);
+		}
 	}
 	spin_unlock_bh(&idr_lock);
 
+	list_for_each_entry(ch, &list, ch_node)
+		riocm_send_close(ch);
+
 	return NOTIFY_DONE;
 }
 
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 6c88e31..4c2631a 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -353,6 +353,14 @@
 	  This enables support for the LTC3589, LTC3589-1, and LTC3589-2
 	  8-output regulators controlled via I2C.
 
+config REGULATOR_LTC3676
+	tristate "LTC3676 8-output voltage regulator"
+	depends on I2C
+	select REGMAP_I2C
+	help
+	  This enables support for the LTC3676
+	  8-output regulators controlled via I2C.
+
 config REGULATOR_MAX14577
 	tristate "Maxim 14577/77836 regulator"
 	depends on MFD_MAX14577
@@ -820,7 +828,7 @@
 	    This driver supports TPS65912 voltage regulator chip.
 
 config REGULATOR_TPS80031
-	tristate "TI TPS80031/TPS80032 power regualtor driver"
+	tristate "TI TPS80031/TPS80032 power regulator driver"
 	depends on MFD_TPS80031
 	help
 	  TPS80031/ TPS80032 Fully Integrated Power Management with Power
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index f3da9ee..2142a5d 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -47,6 +47,7 @@
 obj-$(CONFIG_REGULATOR_LP8788) += lp8788-ldo.o
 obj-$(CONFIG_REGULATOR_LP8755) += lp8755.o
 obj-$(CONFIG_REGULATOR_LTC3589) += ltc3589.o
+obj-$(CONFIG_REGULATOR_LTC3676) += ltc3676.o
 obj-$(CONFIG_REGULATOR_MAX14577) += max14577-regulator.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_MAX77620) += max77620-regulator.o
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index db320e8..67426c0 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -679,24 +679,6 @@
 	    !rdev->desc->ops->set_load)
 		return -EINVAL;
 
-	/* get output voltage */
-	output_uV = _regulator_get_voltage(rdev);
-	if (output_uV <= 0) {
-		rdev_err(rdev, "invalid output voltage found\n");
-		return -EINVAL;
-	}
-
-	/* get input voltage */
-	input_uV = 0;
-	if (rdev->supply)
-		input_uV = regulator_get_voltage(rdev->supply);
-	if (input_uV <= 0)
-		input_uV = rdev->constraints->input_uV;
-	if (input_uV <= 0) {
-		rdev_err(rdev, "invalid input voltage found\n");
-		return -EINVAL;
-	}
-
 	/* calc total requested load */
 	list_for_each_entry(sibling, &rdev->consumer_list, list)
 		current_uA += sibling->uA_load;
@@ -709,6 +691,24 @@
 		if (err < 0)
 			rdev_err(rdev, "failed to set load %d\n", current_uA);
 	} else {
+		/* get output voltage */
+		output_uV = _regulator_get_voltage(rdev);
+		if (output_uV <= 0) {
+			rdev_err(rdev, "invalid output voltage found\n");
+			return -EINVAL;
+		}
+
+		/* get input voltage */
+		input_uV = 0;
+		if (rdev->supply)
+			input_uV = regulator_get_voltage(rdev->supply);
+		if (input_uV <= 0)
+			input_uV = rdev->constraints->input_uV;
+		if (input_uV <= 0) {
+			rdev_err(rdev, "invalid input voltage found\n");
+			return -EINVAL;
+		}
+
 		/* now get the optimum mode for our new total regulator load */
 		mode = rdev->desc->ops->get_optimum_mode(rdev, input_uV,
 							 output_uV, current_uA);
@@ -2743,6 +2743,24 @@
 	return ret;
 }
 
+static int _regulator_set_voltage_time(struct regulator_dev *rdev,
+				       int old_uV, int new_uV)
+{
+	unsigned int ramp_delay = 0;
+
+	if (rdev->constraints->ramp_delay)
+		ramp_delay = rdev->constraints->ramp_delay;
+	else if (rdev->desc->ramp_delay)
+		ramp_delay = rdev->desc->ramp_delay;
+
+	if (ramp_delay == 0) {
+		rdev_warn(rdev, "ramp_delay not set\n");
+		return 0;
+	}
+
+	return DIV_ROUND_UP(abs(new_uV - old_uV), ramp_delay);
+}
+
 static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 				     int min_uV, int max_uV)
 {
@@ -2751,6 +2769,8 @@
 	int best_val = 0;
 	unsigned int selector;
 	int old_selector = -1;
+	const struct regulator_ops *ops = rdev->desc->ops;
+	int old_uV = _regulator_get_voltage(rdev);
 
 	trace_regulator_set_voltage(rdev_get_name(rdev), min_uV, max_uV);
 
@@ -2762,29 +2782,28 @@
 	 * info to call set_voltage_time_sel().
 	 */
 	if (_regulator_is_enabled(rdev) &&
-	    rdev->desc->ops->set_voltage_time_sel &&
-	    rdev->desc->ops->get_voltage_sel) {
-		old_selector = rdev->desc->ops->get_voltage_sel(rdev);
+	    ops->set_voltage_time_sel && ops->get_voltage_sel) {
+		old_selector = ops->get_voltage_sel(rdev);
 		if (old_selector < 0)
 			return old_selector;
 	}
 
-	if (rdev->desc->ops->set_voltage) {
+	if (ops->set_voltage) {
 		ret = _regulator_call_set_voltage(rdev, min_uV, max_uV,
 						  &selector);
 
 		if (ret >= 0) {
-			if (rdev->desc->ops->list_voltage)
-				best_val = rdev->desc->ops->list_voltage(rdev,
-									 selector);
+			if (ops->list_voltage)
+				best_val = ops->list_voltage(rdev,
+							     selector);
 			else
 				best_val = _regulator_get_voltage(rdev);
 		}
 
-	} else if (rdev->desc->ops->set_voltage_sel) {
+	} else if (ops->set_voltage_sel) {
 		ret = regulator_map_voltage(rdev, min_uV, max_uV);
 		if (ret >= 0) {
-			best_val = rdev->desc->ops->list_voltage(rdev, ret);
+			best_val = ops->list_voltage(rdev, ret);
 			if (min_uV <= best_val && max_uV >= best_val) {
 				selector = ret;
 				if (old_selector == selector)
@@ -2800,34 +2819,50 @@
 		ret = -EINVAL;
 	}
 
-	/* Call set_voltage_time_sel if successfully obtained old_selector */
-	if (ret == 0 && !rdev->constraints->ramp_disable && old_selector >= 0
-		&& old_selector != selector) {
+	if (ret)
+		goto out;
 
-		delay = rdev->desc->ops->set_voltage_time_sel(rdev,
-						old_selector, selector);
-		if (delay < 0) {
-			rdev_warn(rdev, "set_voltage_time_sel() failed: %d\n",
-				  delay);
-			delay = 0;
-		}
-
-		/* Insert any necessary delays */
-		if (delay >= 1000) {
-			mdelay(delay / 1000);
-			udelay(delay % 1000);
-		} else if (delay) {
-			udelay(delay);
+	if (ops->set_voltage_time_sel) {
+		/*
+		 * Call set_voltage_time_sel if successfully obtained
+		 * old_selector
+		 */
+		if (old_selector >= 0 && old_selector != selector)
+			delay = ops->set_voltage_time_sel(rdev, old_selector,
+							  selector);
+	} else {
+		if (old_uV != best_val) {
+			if (ops->set_voltage_time)
+				delay = ops->set_voltage_time(rdev, old_uV,
+							      best_val);
+			else
+				delay = _regulator_set_voltage_time(rdev,
+								    old_uV,
+								    best_val);
 		}
 	}
 
-	if (ret == 0 && best_val >= 0) {
+	if (delay < 0) {
+		rdev_warn(rdev, "failed to get delay: %d\n", delay);
+		delay = 0;
+	}
+
+	/* Insert any necessary delays */
+	if (delay >= 1000) {
+		mdelay(delay / 1000);
+		udelay(delay % 1000);
+	} else if (delay) {
+		udelay(delay);
+	}
+
+	if (best_val >= 0) {
 		unsigned long data = best_val;
 
 		_notifier_call_chain(rdev, REGULATOR_EVENT_VOLTAGE_CHANGE,
 				     (void *)data);
 	}
 
+out:
 	trace_regulator_set_voltage_complete(rdev_get_name(rdev), best_val);
 
 	return ret;
@@ -2998,9 +3033,13 @@
 	int voltage;
 	int i;
 
+	if (ops->set_voltage_time)
+		return ops->set_voltage_time(rdev, old_uV, new_uV);
+	else if (!ops->set_voltage_time_sel)
+		return _regulator_set_voltage_time(rdev, old_uV, new_uV);
+
 	/* Currently requires operations to do this */
-	if (!ops->list_voltage || !ops->set_voltage_time_sel
-	    || !rdev->desc->n_voltages)
+	if (!ops->list_voltage || !rdev->desc->n_voltages)
 		return -EINVAL;
 
 	for (i = 0; i < rdev->desc->n_voltages; i++) {
@@ -3039,19 +3078,8 @@
 				   unsigned int old_selector,
 				   unsigned int new_selector)
 {
-	unsigned int ramp_delay = 0;
 	int old_volt, new_volt;
 
-	if (rdev->constraints->ramp_delay)
-		ramp_delay = rdev->constraints->ramp_delay;
-	else if (rdev->desc->ramp_delay)
-		ramp_delay = rdev->desc->ramp_delay;
-
-	if (ramp_delay == 0) {
-		rdev_warn(rdev, "ramp_delay not set\n");
-		return 0;
-	}
-
 	/* sanity check */
 	if (!rdev->desc->ops->list_voltage)
 		return -EINVAL;
@@ -3059,7 +3087,11 @@
 	old_volt = rdev->desc->ops->list_voltage(rdev, old_selector);
 	new_volt = rdev->desc->ops->list_voltage(rdev, new_selector);
 
-	return DIV_ROUND_UP(abs(new_volt - old_volt), ramp_delay);
+	if (rdev->desc->ops->set_voltage_time)
+		return rdev->desc->ops->set_voltage_time(rdev, old_volt,
+							 new_volt);
+	else
+		return _regulator_set_voltage_time(rdev, old_volt, new_volt);
 }
 EXPORT_SYMBOL_GPL(regulator_set_voltage_time_sel);
 
@@ -3483,10 +3515,8 @@
 		consumers[i].consumer = NULL;
 
 	for (i = 0; i < num_consumers; i++) {
-		consumers[i].consumer = _regulator_get(dev,
-						       consumers[i].supply,
-						       false,
-						       !consumers[i].optional);
+		consumers[i].consumer = regulator_get(dev,
+						      consumers[i].supply);
 		if (IS_ERR(consumers[i].consumer)) {
 			ret = PTR_ERR(consumers[i].consumer);
 			dev_err(dev, "Failed to get supply '%s': %d\n",
diff --git a/drivers/regulator/dbx500-prcmu.c b/drivers/regulator/dbx500-prcmu.c
index 3963dfa..8976141 100644
--- a/drivers/regulator/dbx500-prcmu.c
+++ b/drivers/regulator/dbx500-prcmu.c
@@ -75,24 +75,6 @@
 	u8 *state_after_suspend;
 } rdebug;
 
-void ux500_regulator_suspend_debug(void)
-{
-	int i;
-
-	for (i = 0; i < rdebug.num_regulators; i++)
-		rdebug.state_before_suspend[i] =
-			rdebug.regulator_array[i].is_enabled;
-}
-
-void ux500_regulator_resume_debug(void)
-{
-	int i;
-
-	for (i = 0; i < rdebug.num_regulators; i++)
-		rdebug.state_after_suspend[i] =
-			rdebug.regulator_array[i].is_enabled;
-}
-
 static int ux500_regulator_power_state_cnt_print(struct seq_file *s, void *p)
 {
 	/* print power state count */
diff --git a/drivers/regulator/devres.c b/drivers/regulator/devres.c
index 6ad8ab4..6ec1d40 100644
--- a/drivers/regulator/devres.c
+++ b/drivers/regulator/devres.c
@@ -164,11 +164,8 @@
 		consumers[i].consumer = NULL;
 
 	for (i = 0; i < num_consumers; i++) {
-		consumers[i].consumer = _devm_regulator_get(dev,
-							    consumers[i].supply,
-							    consumers[i].optional ?
-								OPTIONAL_GET :
-								NORMAL_GET);
+		consumers[i].consumer = devm_regulator_get(dev,
+							   consumers[i].supply);
 		if (IS_ERR(consumers[i].consumer)) {
 			ret = PTR_ERR(consumers[i].consumer);
 			dev_err(dev, "Failed to get supply '%s': %d\n",
diff --git a/drivers/regulator/hi6421-regulator.c b/drivers/regulator/hi6421-regulator.c
index 42dc5fb..62c5f54 100644
--- a/drivers/regulator/hi6421-regulator.c
+++ b/drivers/regulator/hi6421-regulator.c
@@ -477,7 +477,8 @@
 	return 0;
 }
 
-unsigned int hi6421_regulator_ldo_get_optimum_mode(struct regulator_dev *rdev,
+static unsigned int
+hi6421_regulator_ldo_get_optimum_mode(struct regulator_dev *rdev,
 			int input_uV, int output_uV, int load_uA)
 {
 	struct hi6421_regulator_info *info = rdev_get_drvdata(rdev);
diff --git a/drivers/regulator/ltc3676.c b/drivers/regulator/ltc3676.c
new file mode 100644
index 0000000..e2b476c
--- /dev/null
+++ b/drivers/regulator/ltc3676.c
@@ -0,0 +1,420 @@
+/*
+ * Copyright (C) 2016 Gateworks Corporation, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
+#include <linux/regulator/of_regulator.h>
+
+#define DRIVER_NAME		"ltc3676"
+
+/* LTC3676 Registers */
+#define LTC3676_BUCK1     0x01
+#define LTC3676_BUCK2     0x02
+#define LTC3676_BUCK3     0x03
+#define LTC3676_BUCK4     0x04
+#define LTC3676_LDOA      0x05
+#define LTC3676_LDOB      0x06
+#define LTC3676_SQD1      0x07
+#define LTC3676_SQD2      0x08
+#define LTC3676_CNTRL     0x09
+#define LTC3676_DVB1A     0x0A
+#define LTC3676_DVB1B     0x0B
+#define LTC3676_DVB2A     0x0C
+#define LTC3676_DVB2B     0x0D
+#define LTC3676_DVB3A     0x0E
+#define LTC3676_DVB3B     0x0F
+#define LTC3676_DVB4A     0x10
+#define LTC3676_DVB4B     0x11
+#define LTC3676_MSKIRQ    0x12
+#define LTC3676_MSKPG     0x13
+#define LTC3676_USER      0x14
+#define LTC3676_IRQSTAT   0x15
+#define LTC3676_PGSTATL   0x16
+#define LTC3676_PGSTATRT  0x17
+#define LTC3676_HRST      0x1E
+#define LTC3676_CLIRQ     0x1F
+
+#define LTC3676_DVBxA_REF_SELECT	BIT(5)
+
+#define LTC3676_IRQSTAT_PGOOD_TIMEOUT	BIT(3)
+#define LTC3676_IRQSTAT_UNDERVOLT_WARN	BIT(4)
+#define LTC3676_IRQSTAT_UNDERVOLT_FAULT	BIT(5)
+#define LTC3676_IRQSTAT_THERMAL_WARN	BIT(6)
+#define LTC3676_IRQSTAT_THERMAL_FAULT	BIT(7)
+
+enum ltc3676_reg {
+	LTC3676_SW1,
+	LTC3676_SW2,
+	LTC3676_SW3,
+	LTC3676_SW4,
+	LTC3676_LDO1,
+	LTC3676_LDO2,
+	LTC3676_LDO3,
+	LTC3676_LDO4,
+	LTC3676_NUM_REGULATORS,
+};
+
+struct ltc3676 {
+	struct regmap *regmap;
+	struct device *dev;
+	struct regulator_desc regulator_descs[LTC3676_NUM_REGULATORS];
+	struct regulator_dev *regulators[LTC3676_NUM_REGULATORS];
+};
+
+static int ltc3676_set_suspend_voltage(struct regulator_dev *rdev, int uV)
+{
+	struct ltc3676 *ltc3676 = rdev_get_drvdata(rdev);
+	struct device *dev = ltc3676->dev;
+	int dcdc = rdev_get_id(rdev);
+	int sel;
+
+	dev_dbg(dev, "%s id=%d uV=%d\n", __func__, dcdc, uV);
+	sel = regulator_map_voltage_linear(rdev, uV, uV);
+	if (sel < 0)
+		return sel;
+
+	/* DVBB register follows right after the corresponding DVBA register */
+	return regmap_update_bits(ltc3676->regmap, rdev->desc->vsel_reg + 1,
+				  rdev->desc->vsel_mask, sel);
+}
+
+static int ltc3676_set_suspend_mode(struct regulator_dev *rdev,
+				    unsigned int mode)
+{
+	struct ltc3676 *ltc3676= rdev_get_drvdata(rdev);
+	struct device *dev = ltc3676->dev;
+	int mask, val;
+	int dcdc = rdev_get_id(rdev);
+
+	dev_dbg(dev, "%s id=%d mode=%d\n", __func__, dcdc, mode);
+
+	mask = LTC3676_DVBxA_REF_SELECT;
+	switch (mode) {
+	case REGULATOR_MODE_STANDBY:
+		val = 0; /* select DVBxA */
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = LTC3676_DVBxA_REF_SELECT; /* select DVBxB */
+		break;
+	default:
+		dev_warn(&rdev->dev, "%s: regulator mode: 0x%x not supported\n",
+			 rdev->desc->name, mode);
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(ltc3676->regmap, rdev->desc->vsel_reg,
+				  mask, val);
+}
+
+static inline unsigned int ltc3676_scale(unsigned int uV, u32 r1, u32 r2)
+{
+	uint64_t tmp;
+	if (uV == 0)
+		return 0;
+	tmp = (uint64_t)uV * r1;
+	do_div(tmp, r2);
+	return uV + (unsigned int)tmp;
+}
+
+static int ltc3676_of_parse_cb(struct device_node *np,
+			       const struct regulator_desc *desc,
+			       struct regulator_config *config)
+{
+	struct ltc3676 *ltc3676 = config->driver_data;
+	struct regulator_desc *rdesc = &ltc3676->regulator_descs[desc->id];
+	u32 r[2];
+	int ret;
+
+	/* LDO3 has a fixed output */
+	if (desc->id == LTC3676_LDO3)
+		return 0;
+
+	ret = of_property_read_u32_array(np, "lltc,fb-voltage-divider", r, 2);
+	if (ret) {
+		dev_err(ltc3676->dev, "Failed to parse voltage divider: %d\n",
+			ret);
+		return ret;
+	}
+
+	rdesc->min_uV = ltc3676_scale(desc->min_uV, r[0], r[1]);
+	rdesc->uV_step = ltc3676_scale(desc->uV_step, r[0], r[1]);
+	rdesc->fixed_uV = ltc3676_scale(desc->fixed_uV, r[0], r[1]);
+
+	return 0;
+}
+
+/* SW1, SW2, SW3, SW4 linear 0.8V-3.3V with scalar via R1/R2 feeback res */
+static struct regulator_ops ltc3676_linear_regulator_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_linear,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_suspend_voltage = ltc3676_set_suspend_voltage,
+	.set_suspend_mode = ltc3676_set_suspend_mode,
+};
+
+/* LDO1 always on fixed 0.8V-3.3V via scalar via R1/R2 feeback res */
+static struct regulator_ops ltc3676_fixed_standby_regulator_ops = {
+};
+
+/* LDO2, LDO3 fixed (LDO2 has external scalar via R1/R2 feedback res) */
+static struct regulator_ops ltc3676_fixed_regulator_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+};
+
+#define LTC3676_REG(_id, _name, _ops, en_reg, en_bit, dvba_reg, dvb_mask)   \
+	[LTC3676_ ## _id] = {                                        \
+		.name = #_name,                                \
+		.of_match = of_match_ptr(#_name),              \
+		.regulators_node = of_match_ptr("regulators"), \
+		.of_parse_cb = ltc3676_of_parse_cb,            \
+		.n_voltages = (dvb_mask) + 1,                  \
+		.min_uV = (dvba_reg) ? 412500 : 0,             \
+		.uV_step = (dvba_reg) ? 12500 : 0,             \
+		.ramp_delay = (dvba_reg) ? 800 : 0,            \
+		.fixed_uV = (dvb_mask) ? 0 : 725000,           \
+		.ops = &ltc3676_ ## _ops ## _regulator_ops,    \
+		.type = REGULATOR_VOLTAGE,                     \
+		.id = LTC3676_ ## _id,                         \
+		.owner = THIS_MODULE,                          \
+		.vsel_reg = (dvba_reg),                        \
+		.vsel_mask = (dvb_mask),                       \
+		.enable_reg = (en_reg),                        \
+		.enable_mask = (1 << en_bit),                  \
+	}
+
+#define LTC3676_LINEAR_REG(_id, _name, _en, _dvba)                     \
+	LTC3676_REG(_id, _name, linear,                                \
+		    LTC3676_ ## _en, 7,                                \
+		    LTC3676_ ## _dvba, 0x1f)
+
+#define LTC3676_FIXED_REG(_id, _name, _en_reg, _en_bit)                \
+	LTC3676_REG(_id, _name, fixed, LTC3676_ ## _en_reg, _en_bit, 0, 0)
+
+static struct regulator_desc ltc3676_regulators[LTC3676_NUM_REGULATORS] = {
+	LTC3676_LINEAR_REG(SW1, sw1, BUCK1, DVB1A),
+	LTC3676_LINEAR_REG(SW2, sw2, BUCK2, DVB2A),
+	LTC3676_LINEAR_REG(SW3, sw3, BUCK3, DVB3A),
+	LTC3676_LINEAR_REG(SW4, sw4, BUCK4, DVB4A),
+	LTC3676_REG(LDO1, ldo1, fixed_standby, 0, 0, 0, 0),
+	LTC3676_FIXED_REG(LDO2, ldo2, LDOA, 2),
+	LTC3676_FIXED_REG(LDO3, ldo3, LDOA, 5),
+	LTC3676_FIXED_REG(LDO4, ldo4, LDOB, 2),
+};
+
+static bool ltc3676_writeable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LTC3676_IRQSTAT:
+	case LTC3676_BUCK1:
+	case LTC3676_BUCK2:
+	case LTC3676_BUCK3:
+	case LTC3676_BUCK4:
+	case LTC3676_LDOA:
+	case LTC3676_LDOB:
+	case LTC3676_SQD1:
+	case LTC3676_SQD2:
+	case LTC3676_CNTRL:
+	case LTC3676_DVB1A:
+	case LTC3676_DVB1B:
+	case LTC3676_DVB2A:
+	case LTC3676_DVB2B:
+	case LTC3676_DVB3A:
+	case LTC3676_DVB3B:
+	case LTC3676_DVB4A:
+	case LTC3676_DVB4B:
+	case LTC3676_MSKIRQ:
+	case LTC3676_MSKPG:
+	case LTC3676_USER:
+	case LTC3676_HRST:
+	case LTC3676_CLIRQ:
+		return true;
+	}
+	return false;
+}
+
+static bool ltc3676_readable_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LTC3676_IRQSTAT:
+	case LTC3676_BUCK1:
+	case LTC3676_BUCK2:
+	case LTC3676_BUCK3:
+	case LTC3676_BUCK4:
+	case LTC3676_LDOA:
+	case LTC3676_LDOB:
+	case LTC3676_SQD1:
+	case LTC3676_SQD2:
+	case LTC3676_CNTRL:
+	case LTC3676_DVB1A:
+	case LTC3676_DVB1B:
+	case LTC3676_DVB2A:
+	case LTC3676_DVB2B:
+	case LTC3676_DVB3A:
+	case LTC3676_DVB3B:
+	case LTC3676_DVB4A:
+	case LTC3676_DVB4B:
+	case LTC3676_MSKIRQ:
+	case LTC3676_MSKPG:
+	case LTC3676_USER:
+	case LTC3676_HRST:
+	case LTC3676_CLIRQ:
+		return true;
+	}
+	return false;
+}
+
+static bool ltc3676_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case LTC3676_IRQSTAT:
+	case LTC3676_PGSTATL:
+	case LTC3676_PGSTATRT:
+		return true;
+	}
+	return false;
+}
+
+static const struct regmap_config ltc3676_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.writeable_reg = ltc3676_writeable_reg,
+	.readable_reg = ltc3676_readable_reg,
+	.volatile_reg = ltc3676_volatile_reg,
+	.max_register = LTC3676_CLIRQ,
+	.use_single_rw = true,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static irqreturn_t ltc3676_isr(int irq, void *dev_id)
+{
+	struct ltc3676 *ltc3676 = dev_id;
+	struct device *dev = ltc3676->dev;
+	unsigned int i, irqstat, event;
+
+	regmap_read(ltc3676->regmap, LTC3676_IRQSTAT, &irqstat);
+
+	dev_dbg(dev, "irq%d irqstat=0x%02x\n", irq, irqstat);
+	if (irqstat & LTC3676_IRQSTAT_THERMAL_WARN) {
+		dev_warn(dev, "Over-temperature Warning\n");
+		event = REGULATOR_EVENT_OVER_TEMP;
+		for (i = 0; i < LTC3676_NUM_REGULATORS; i++)
+			regulator_notifier_call_chain(ltc3676->regulators[i],
+						      event, NULL);
+	}
+
+	if (irqstat & LTC3676_IRQSTAT_UNDERVOLT_WARN) {
+		dev_info(dev, "Undervoltage Warning\n");
+		event = REGULATOR_EVENT_UNDER_VOLTAGE;
+		for (i = 0; i < LTC3676_NUM_REGULATORS; i++)
+			regulator_notifier_call_chain(ltc3676->regulators[i],
+						      event, NULL);
+	}
+
+	/* Clear warning condition */
+	regmap_write(ltc3676->regmap, LTC3676_CLIRQ, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int ltc3676_regulator_probe(struct i2c_client *client,
+				    const struct i2c_device_id *id)
+{
+	struct device *dev = &client->dev;
+	struct regulator_init_data *init_data = dev_get_platdata(dev);
+	struct regulator_desc *descs;
+	struct ltc3676 *ltc3676;
+	int i, ret;
+
+	ltc3676 = devm_kzalloc(dev, sizeof(*ltc3676), GFP_KERNEL);
+	if (!ltc3676)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, ltc3676);
+	ltc3676->dev = dev;
+
+	descs = ltc3676->regulator_descs;
+	memcpy(descs, ltc3676_regulators, sizeof(ltc3676_regulators));
+	descs[LTC3676_LDO3].fixed_uV = 1800000; /* LDO3 is fixed 1.8V */
+
+	ltc3676->regmap = devm_regmap_init_i2c(client, &ltc3676_regmap_config);
+	if (IS_ERR(ltc3676->regmap)) {
+		ret = PTR_ERR(ltc3676->regmap);
+		dev_err(dev, "failed to initialize regmap: %d\n", ret);
+		return ret;
+	}
+
+	for (i = 0; i < LTC3676_NUM_REGULATORS; i++) {
+		struct regulator_desc *desc = &ltc3676->regulator_descs[i];
+		struct regulator_config config = { };
+
+		if (init_data)
+			config.init_data = &init_data[i];
+
+		config.dev = dev;
+		config.driver_data = ltc3676;
+
+		ltc3676->regulators[i] = devm_regulator_register(dev, desc,
+								 &config);
+		if (IS_ERR(ltc3676->regulators[i])) {
+			ret = PTR_ERR(ltc3676->regulators[i]);
+			dev_err(dev, "failed to register regulator %s: %d\n",
+				desc->name, ret);
+			return ret;
+		}
+	}
+
+	regmap_write(ltc3676->regmap, LTC3676_CLIRQ, 0);
+	if (client->irq) {
+		ret = devm_request_threaded_irq(dev, client->irq, NULL,
+						ltc3676_isr,
+						IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+						client->name, ltc3676);
+		if (ret) {
+			dev_err(dev, "Failed to request IRQ: %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct i2c_device_id ltc3676_i2c_id[] = {
+	{ "ltc3676" },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ltc3676_i2c_id);
+
+static struct i2c_driver ltc3676_driver = {
+	.driver = {
+		.name = DRIVER_NAME,
+	},
+	.probe = ltc3676_regulator_probe,
+	.id_table = ltc3676_i2c_id,
+};
+module_i2c_driver(ltc3676_driver);
+
+MODULE_AUTHOR("Tim Harvey <tharvey@gateworks.com>");
+MODULE_DESCRIPTION("Regulator driver for Linear Technology LTC1376");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/max14577-regulator.c b/drivers/regulator/max14577-regulator.c
index b2daa66..c9ff261 100644
--- a/drivers/regulator/max14577-regulator.c
+++ b/drivers/regulator/max14577-regulator.c
@@ -2,7 +2,7 @@
  * max14577.c - Regulator driver for the Maxim 14577/77836
  *
  * Copyright (C) 2013,2014 Samsung Electronics
- * Krzysztof Kozlowski <k.kozlowski@samsung.com>
+ * Krzysztof Kozlowski <krzk@kernel.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -331,7 +331,7 @@
 }
 module_exit(max14577_regulator_exit);
 
-MODULE_AUTHOR("Krzysztof Kozlowski <k.kozlowski@samsung.com>");
+MODULE_AUTHOR("Krzysztof Kozlowski <krzk@kernel.org>");
 MODULE_DESCRIPTION("Maxim 14577/77836 regulator driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:max14577-regulator");
diff --git a/drivers/regulator/max77693-regulator.c b/drivers/regulator/max77693-regulator.c
index de730fd..cfbb951 100644
--- a/drivers/regulator/max77693-regulator.c
+++ b/drivers/regulator/max77693-regulator.c
@@ -3,7 +3,7 @@
  *
  * Copyright (C) 2013-2015 Samsung Electronics
  * Jonghwa Lee <jonghwa3.lee@samsung.com>
- * Krzysztof Kozlowski <k.kozlowski.k@gmail.com>
+ * Krzysztof Kozlowski <krzk@kernel.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -314,5 +314,5 @@
 
 MODULE_DESCRIPTION("MAXIM 77693/77843 regulator driver");
 MODULE_AUTHOR("Jonghwa Lee <jonghwa3.lee@samsung.com>");
-MODULE_AUTHOR("Krzysztof Kozlowski <k.kozlowski.k@gmail.com>");
+MODULE_AUTHOR("Krzysztof Kozlowski <krzk@kernel.org>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/pv88080-regulator.c b/drivers/regulator/pv88080-regulator.c
index 81950bd..954a20e 100644
--- a/drivers/regulator/pv88080-regulator.c
+++ b/drivers/regulator/pv88080-regulator.c
@@ -16,6 +16,7 @@
 #include <linux/err.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/regulator/driver.h>
@@ -26,7 +27,7 @@
 #include <linux/regulator/of_regulator.h>
 #include "pv88080-regulator.h"
 
-#define PV88080_MAX_REGULATORS	3
+#define PV88080_MAX_REGULATORS	4
 
 /* PV88080 REGULATOR IDs */
 enum {
@@ -34,6 +35,12 @@
 	PV88080_ID_BUCK1,
 	PV88080_ID_BUCK2,
 	PV88080_ID_BUCK3,
+	PV88080_ID_HVBUCK,
+};
+
+enum pv88080_types {
+	TYPE_PV88080_AA,
+	TYPE_PV88080_BA,
 };
 
 struct pv88080_regulator {
@@ -42,7 +49,8 @@
 	unsigned int n_current_limits;
 	const int *current_limits;
 	unsigned int limit_mask;
-	unsigned int conf;
+	unsigned int mode_reg;
+	unsigned int limit_reg;
 	unsigned int conf2;
 	unsigned int conf5;
 };
@@ -51,6 +59,8 @@
 	struct device *dev;
 	struct regmap *regmap;
 	struct regulator_dev *rdev[PV88080_MAX_REGULATORS];
+	unsigned long type;
+	const struct pv88080_compatible_regmap *regmap_config;
 };
 
 struct pv88080_buck_voltage {
@@ -59,6 +69,30 @@
 	int uV_step;
 };
 
+struct pv88080_buck_regmap {
+	/* REGS */
+	int buck_enable_reg;
+	int buck_vsel_reg;
+	int buck_mode_reg;
+	int buck_limit_reg;
+	int buck_vdac_range_reg;
+	int buck_vrange_gain_reg;
+	/* MASKS */
+	int buck_enable_mask;
+	int buck_vsel_mask;
+	int buck_limit_mask;
+};
+
+struct pv88080_compatible_regmap {
+	/* BUCK1, 2, 3 */
+	struct pv88080_buck_regmap buck_regmap[PV88080_MAX_REGULATORS-1];
+	/* HVBUCK */
+	int hvbuck_enable_reg;
+	int hvbuck_vsel_reg;
+	int hvbuck_enable_mask;
+	int hvbuck_vsel_mask;
+};
+
 static const struct regmap_config pv88080_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -89,13 +123,111 @@
 	},
 };
 
+static const struct pv88080_compatible_regmap pv88080_aa_regs = {
+	/* BUCK1 */
+	.buck_regmap[0] = {
+		.buck_enable_reg      = PV88080AA_REG_BUCK1_CONF0,
+		.buck_vsel_reg        = PV88080AA_REG_BUCK1_CONF0,
+		.buck_mode_reg        = PV88080AA_REG_BUCK1_CONF1,
+		.buck_limit_reg       = PV88080AA_REG_BUCK1_CONF1,
+		.buck_vdac_range_reg  = PV88080AA_REG_BUCK1_CONF2,
+		.buck_vrange_gain_reg = PV88080AA_REG_BUCK1_CONF5,
+		.buck_enable_mask     = PV88080_BUCK1_EN,
+		.buck_vsel_mask       = PV88080_VBUCK1_MASK,
+		.buck_limit_mask      = PV88080_BUCK1_ILIM_MASK,
+	},
+	/* BUCK2 */
+	.buck_regmap[1] = {
+		.buck_enable_reg      = PV88080AA_REG_BUCK2_CONF0,
+		.buck_vsel_reg        = PV88080AA_REG_BUCK2_CONF0,
+		.buck_mode_reg        = PV88080AA_REG_BUCK2_CONF1,
+		.buck_limit_reg	      = PV88080AA_REG_BUCK2_CONF1,
+		.buck_vdac_range_reg  = PV88080AA_REG_BUCK2_CONF2,
+		.buck_vrange_gain_reg = PV88080AA_REG_BUCK2_CONF5,
+		.buck_enable_mask	  = PV88080_BUCK2_EN,
+		.buck_vsel_mask       = PV88080_VBUCK2_MASK,
+		.buck_limit_mask      = PV88080_BUCK2_ILIM_MASK,
+	},
+	/* BUCK3 */
+	.buck_regmap[2] = {
+		.buck_enable_reg	  = PV88080AA_REG_BUCK3_CONF0,
+		.buck_vsel_reg        = PV88080AA_REG_BUCK3_CONF0,
+		.buck_mode_reg        = PV88080AA_REG_BUCK3_CONF1,
+		.buck_limit_reg	      = PV88080AA_REG_BUCK3_CONF1,
+		.buck_vdac_range_reg  = PV88080AA_REG_BUCK3_CONF2,
+		.buck_vrange_gain_reg = PV88080AA_REG_BUCK3_CONF5,
+		.buck_enable_mask	  = PV88080_BUCK3_EN,
+		.buck_vsel_mask       = PV88080_VBUCK3_MASK,
+		.buck_limit_mask      = PV88080_BUCK3_ILIM_MASK,
+	},
+	/* HVBUCK */
+	.hvbuck_enable_reg	      = PV88080AA_REG_HVBUCK_CONF2,
+	.hvbuck_vsel_reg          = PV88080AA_REG_HVBUCK_CONF1,
+	.hvbuck_enable_mask       = PV88080_HVBUCK_EN,
+	.hvbuck_vsel_mask         = PV88080_VHVBUCK_MASK,
+};
+
+static const struct pv88080_compatible_regmap pv88080_ba_regs = {
+	/* BUCK1 */
+	.buck_regmap[0] = {
+		.buck_enable_reg	  = PV88080BA_REG_BUCK1_CONF0,
+		.buck_vsel_reg        = PV88080BA_REG_BUCK1_CONF0,
+		.buck_mode_reg        = PV88080BA_REG_BUCK1_CONF1,
+		.buck_limit_reg       = PV88080BA_REG_BUCK1_CONF1,
+		.buck_vdac_range_reg  = PV88080BA_REG_BUCK1_CONF2,
+		.buck_vrange_gain_reg = PV88080BA_REG_BUCK1_CONF5,
+		.buck_enable_mask     = PV88080_BUCK1_EN,
+		.buck_vsel_mask       = PV88080_VBUCK1_MASK,
+		.buck_limit_mask	  = PV88080_BUCK1_ILIM_MASK,
+	},
+	/* BUCK2 */
+	.buck_regmap[1] = {
+		.buck_enable_reg	  = PV88080BA_REG_BUCK2_CONF0,
+		.buck_vsel_reg        = PV88080BA_REG_BUCK2_CONF0,
+		.buck_mode_reg        = PV88080BA_REG_BUCK2_CONF1,
+		.buck_limit_reg	      = PV88080BA_REG_BUCK2_CONF1,
+		.buck_vdac_range_reg  = PV88080BA_REG_BUCK2_CONF2,
+		.buck_vrange_gain_reg = PV88080BA_REG_BUCK2_CONF5,
+		.buck_enable_mask	  = PV88080_BUCK2_EN,
+		.buck_vsel_mask       = PV88080_VBUCK2_MASK,
+		.buck_limit_mask	  = PV88080_BUCK2_ILIM_MASK,
+	},
+	/* BUCK3 */
+	.buck_regmap[2] = {
+		.buck_enable_reg	  = PV88080BA_REG_BUCK3_CONF0,
+		.buck_vsel_reg        = PV88080BA_REG_BUCK3_CONF0,
+		.buck_mode_reg        = PV88080BA_REG_BUCK3_CONF1,
+		.buck_limit_reg	      = PV88080BA_REG_BUCK3_CONF1,
+		.buck_vdac_range_reg  = PV88080BA_REG_BUCK3_CONF2,
+		.buck_vrange_gain_reg = PV88080BA_REG_BUCK3_CONF5,
+		.buck_enable_mask	  = PV88080_BUCK3_EN,
+		.buck_vsel_mask       = PV88080_VBUCK3_MASK,
+		.buck_limit_mask	  = PV88080_BUCK3_ILIM_MASK,
+	},
+	/* HVBUCK */
+	.hvbuck_enable_reg	      = PV88080BA_REG_HVBUCK_CONF2,
+	.hvbuck_vsel_reg          = PV88080BA_REG_HVBUCK_CONF1,
+	.hvbuck_enable_mask       = PV88080_HVBUCK_EN,
+	.hvbuck_vsel_mask		  = PV88080_VHVBUCK_MASK,
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id pv88080_dt_ids[] = {
+	{ .compatible = "pvs,pv88080",    .data = (void *)TYPE_PV88080_AA },
+	{ .compatible = "pvs,pv88080-aa", .data = (void *)TYPE_PV88080_AA },
+	{ .compatible = "pvs,pv88080-ba", .data = (void *)TYPE_PV88080_BA },
+	{},
+};
+MODULE_DEVICE_TABLE(of, pv88080_dt_ids);
+#endif
+
 static unsigned int pv88080_buck_get_mode(struct regulator_dev *rdev)
 {
 	struct pv88080_regulator *info = rdev_get_drvdata(rdev);
 	unsigned int data;
 	int ret, mode = 0;
 
-	ret = regmap_read(rdev->regmap, info->conf, &data);
+	ret = regmap_read(rdev->regmap, info->mode_reg, &data);
 	if (ret < 0)
 		return ret;
 
@@ -136,7 +268,7 @@
 		return -EINVAL;
 	}
 
-	return regmap_update_bits(rdev->regmap, info->conf,
+	return regmap_update_bits(rdev->regmap, info->mode_reg,
 					PV88080_BUCK1_MODE_MASK, val);
 }
 
@@ -151,7 +283,7 @@
 		if (min <= info->current_limits[i]
 			&& max >= info->current_limits[i]) {
 				return regmap_update_bits(rdev->regmap,
-					info->conf,
+					info->limit_reg,
 					info->limit_mask,
 					i << PV88080_BUCK1_ILIM_SHIFT);
 		}
@@ -166,7 +298,7 @@
 	unsigned int data;
 	int ret;
 
-	ret = regmap_read(rdev->regmap, info->conf, &data);
+	ret = regmap_read(rdev->regmap, info->limit_reg, &data);
 	if (ret < 0)
 		return ret;
 
@@ -187,6 +319,15 @@
 	.get_current_limit = pv88080_get_current_limit,
 };
 
+static struct regulator_ops pv88080_hvbuck_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.list_voltage = regulator_list_voltage_linear,
+};
+
 #define PV88080_BUCK(chip, regl_name, min, step, max, limits_array) \
 {\
 	.desc	=	{\
@@ -200,17 +341,25 @@
 		.min_uV = min, \
 		.uV_step = step, \
 		.n_voltages = ((max) - (min))/(step) + 1, \
-		.enable_reg = PV88080_REG_##regl_name##_CONF0, \
-		.enable_mask = PV88080_##regl_name##_EN, \
-		.vsel_reg = PV88080_REG_##regl_name##_CONF0, \
-		.vsel_mask = PV88080_V##regl_name##_MASK, \
 	},\
 	.current_limits = limits_array, \
 	.n_current_limits = ARRAY_SIZE(limits_array), \
-	.limit_mask = PV88080_##regl_name##_ILIM_MASK, \
-	.conf = PV88080_REG_##regl_name##_CONF1, \
-	.conf2 = PV88080_REG_##regl_name##_CONF2, \
-	.conf5 = PV88080_REG_##regl_name##_CONF5, \
+}
+
+#define PV88080_HVBUCK(chip, regl_name, min, step, max) \
+{\
+	.desc	=	{\
+		.id = chip##_ID_##regl_name,\
+		.name = __stringify(chip##_##regl_name),\
+		.of_match = of_match_ptr(#regl_name),\
+		.regulators_node = of_match_ptr("regulators"),\
+		.type = REGULATOR_VOLTAGE,\
+		.owner = THIS_MODULE,\
+		.ops = &pv88080_hvbuck_ops,\
+		.min_uV = min, \
+		.uV_step = step, \
+		.n_voltages = ((max) - (min))/(step) + 1, \
+	},\
 }
 
 static struct pv88080_regulator pv88080_regulator_info[] = {
@@ -220,6 +369,7 @@
 		pv88080_buck23_limits),
 	PV88080_BUCK(PV88080, BUCK3, 600000, 6250, 1393750,
 		pv88080_buck23_limits),
+	PV88080_HVBUCK(PV88080, HVBUCK, 0, 5000, 1275000),
 };
 
 static irqreturn_t pv88080_irq_handler(int irq, void *data)
@@ -280,6 +430,8 @@
 {
 	struct regulator_init_data *init_data = dev_get_platdata(&i2c->dev);
 	struct pv88080 *chip;
+	const struct pv88080_compatible_regmap *regmap_config;
+	const struct of_device_id *match;
 	struct regulator_config config = { };
 	int i, error, ret;
 	unsigned int conf2, conf5;
@@ -297,6 +449,17 @@
 		return error;
 	}
 
+	if (i2c->dev.of_node) {
+		match = of_match_node(pv88080_dt_ids, i2c->dev.of_node);
+		if (!match) {
+			dev_err(chip->dev, "Failed to get of_match_node\n");
+			return -EINVAL;
+		}
+		chip->type = (unsigned long)match->data;
+	} else {
+		chip->type = id->driver_data;
+	}
+
 	i2c_set_clientdata(i2c, chip);
 
 	if (i2c->irq != 0) {
@@ -336,31 +499,58 @@
 				"Failed to update mask reg: %d\n", ret);
 			return ret;
 		}
-
 	} else {
 		dev_warn(chip->dev, "No IRQ configured\n");
 	}
 
+	switch (chip->type) {
+	case TYPE_PV88080_AA:
+		chip->regmap_config = &pv88080_aa_regs;
+		break;
+	case TYPE_PV88080_BA:
+		chip->regmap_config = &pv88080_ba_regs;
+		break;
+	}
+
+	regmap_config = chip->regmap_config;
 	config.dev = chip->dev;
 	config.regmap = chip->regmap;
 
-	for (i = 0; i < PV88080_MAX_REGULATORS; i++) {
+	/* Registeration for BUCK1, 2, 3 */
+	for (i = 0; i < PV88080_MAX_REGULATORS-1; i++) {
 		if (init_data)
 			config.init_data = &init_data[i];
 
+		pv88080_regulator_info[i].limit_reg
+			= regmap_config->buck_regmap[i].buck_limit_reg;
+		pv88080_regulator_info[i].limit_mask
+			= regmap_config->buck_regmap[i].buck_limit_mask;
+		pv88080_regulator_info[i].mode_reg
+			= regmap_config->buck_regmap[i].buck_mode_reg;
+		pv88080_regulator_info[i].conf2
+			= regmap_config->buck_regmap[i].buck_vdac_range_reg;
+		pv88080_regulator_info[i].conf5
+			= regmap_config->buck_regmap[i].buck_vrange_gain_reg;
+		pv88080_regulator_info[i].desc.enable_reg
+			= regmap_config->buck_regmap[i].buck_enable_reg;
+		pv88080_regulator_info[i].desc.enable_mask
+			= regmap_config->buck_regmap[i].buck_enable_mask;
+		pv88080_regulator_info[i].desc.vsel_reg
+			= regmap_config->buck_regmap[i].buck_vsel_reg;
+		pv88080_regulator_info[i].desc.vsel_mask
+			= regmap_config->buck_regmap[i].buck_vsel_mask;
+
 		ret = regmap_read(chip->regmap,
-			pv88080_regulator_info[i].conf2, &conf2);
+				pv88080_regulator_info[i].conf2, &conf2);
 		if (ret < 0)
 			return ret;
-
 		conf2 = ((conf2 >> PV88080_BUCK_VDAC_RANGE_SHIFT) &
 			PV88080_BUCK_VDAC_RANGE_MASK);
 
 		ret = regmap_read(chip->regmap,
-			pv88080_regulator_info[i].conf5, &conf5);
+				pv88080_regulator_info[i].conf5, &conf5);
 		if (ret < 0)
 			return ret;
-
 		conf5 = ((conf5 >> PV88080_BUCK_VRANGE_GAIN_SHIFT) &
 			PV88080_BUCK_VRANGE_GAIN_MASK);
 
@@ -383,23 +573,38 @@
 		}
 	}
 
+	pv88080_regulator_info[PV88080_ID_HVBUCK].desc.enable_reg
+		= regmap_config->hvbuck_enable_reg;
+	pv88080_regulator_info[PV88080_ID_HVBUCK].desc.enable_mask
+		= regmap_config->hvbuck_enable_mask;
+	pv88080_regulator_info[PV88080_ID_HVBUCK].desc.vsel_reg
+		= regmap_config->hvbuck_vsel_reg;
+	pv88080_regulator_info[PV88080_ID_HVBUCK].desc.vsel_mask
+		= regmap_config->hvbuck_vsel_mask;
+
+	/* Registeration for HVBUCK */
+	if (init_data)
+		config.init_data = &init_data[PV88080_ID_HVBUCK];
+
+	config.driver_data = (void *)&pv88080_regulator_info[PV88080_ID_HVBUCK];
+	chip->rdev[PV88080_ID_HVBUCK] = devm_regulator_register(chip->dev,
+		&pv88080_regulator_info[PV88080_ID_HVBUCK].desc, &config);
+	if (IS_ERR(chip->rdev[PV88080_ID_HVBUCK])) {
+		dev_err(chip->dev, "Failed to register PV88080 regulator\n");
+		return PTR_ERR(chip->rdev[PV88080_ID_HVBUCK]);
+	}
+
 	return 0;
 }
 
 static const struct i2c_device_id pv88080_i2c_id[] = {
-	{"pv88080", 0},
+	{ "pv88080",    TYPE_PV88080_AA },
+	{ "pv88080-aa", TYPE_PV88080_AA },
+	{ "pv88080-ba", TYPE_PV88080_BA },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, pv88080_i2c_id);
 
-#ifdef CONFIG_OF
-static const struct of_device_id pv88080_dt_ids[] = {
-	{ .compatible = "pvs,pv88080", .data = &pv88080_i2c_id[0] },
-	{},
-};
-MODULE_DEVICE_TABLE(of, pv88080_dt_ids);
-#endif
-
 static struct i2c_driver pv88080_regulator_driver = {
 	.driver = {
 		.name = "pv88080",
diff --git a/drivers/regulator/pv88080-regulator.h b/drivers/regulator/pv88080-regulator.h
index 5e9afde..ae25ff3 100644
--- a/drivers/regulator/pv88080-regulator.h
+++ b/drivers/regulator/pv88080-regulator.h
@@ -17,55 +17,75 @@
 #define __PV88080_REGISTERS_H__
 
 /* System Control and Event Registers */
-#define	PV88080_REG_EVENT_A			0x04
-#define	PV88080_REG_MASK_A			0x09
-#define	PV88080_REG_MASK_B			0x0a
-#define	PV88080_REG_MASK_C			0x0b
+#define	PV88080_REG_EVENT_A				0x04
+#define	PV88080_REG_MASK_A				0x09
+#define	PV88080_REG_MASK_B				0x0A
+#define	PV88080_REG_MASK_C				0x0B
 
-/* Regulator Registers */
-#define	PV88080_REG_BUCK1_CONF0			0x27
-#define	PV88080_REG_BUCK1_CONF1			0x28
-#define	PV88080_REG_BUCK1_CONF2			0x59
-#define	PV88080_REG_BUCK1_CONF5			0x5c
-#define	PV88080_REG_BUCK2_CONF0			0x29
-#define	PV88080_REG_BUCK2_CONF1			0x2a
-#define	PV88080_REG_BUCK2_CONF2			0x61
-#define	PV88080_REG_BUCK2_CONF5			0x64
-#define	PV88080_REG_BUCK3_CONF0			0x2b
-#define	PV88080_REG_BUCK3_CONF1			0x2c
-#define	PV88080_REG_BUCK3_CONF2			0x69
-#define	PV88080_REG_BUCK3_CONF5			0x6c
+/* Regulator Registers - rev. AA */
+#define PV88080AA_REG_HVBUCK_CONF1		0x2D
+#define PV88080AA_REG_HVBUCK_CONF2		0x2E
+#define	PV88080AA_REG_BUCK1_CONF0		0x27
+#define	PV88080AA_REG_BUCK1_CONF1		0x28
+#define	PV88080AA_REG_BUCK1_CONF2		0x59
+#define	PV88080AA_REG_BUCK1_CONF5		0x5C
+#define	PV88080AA_REG_BUCK2_CONF0		0x29
+#define	PV88080AA_REG_BUCK2_CONF1		0x2A
+#define	PV88080AA_REG_BUCK2_CONF2		0x61
+#define	PV88080AA_REG_BUCK2_CONF5		0x64
+#define	PV88080AA_REG_BUCK3_CONF0		0x2B
+#define	PV88080AA_REG_BUCK3_CONF1		0x2C
+#define	PV88080AA_REG_BUCK3_CONF2		0x69
+#define	PV88080AA_REG_BUCK3_CONF5		0x6C
+
+/* Regulator Registers - rev. BA */
+#define	PV88080BA_REG_HVBUCK_CONF1		0x33
+#define	PV88080BA_REG_HVBUCK_CONF2		0x34
+#define	PV88080BA_REG_BUCK1_CONF0		0x2A
+#define	PV88080BA_REG_BUCK1_CONF1		0x2C
+#define	PV88080BA_REG_BUCK1_CONF2		0x5A
+#define	PV88080BA_REG_BUCK1_CONF5		0x5D
+#define	PV88080BA_REG_BUCK2_CONF0		0x2D
+#define	PV88080BA_REG_BUCK2_CONF1		0x2F
+#define	PV88080BA_REG_BUCK2_CONF2		0x63
+#define	PV88080BA_REG_BUCK2_CONF5		0x66
+#define	PV88080BA_REG_BUCK3_CONF0		0x30
+#define	PV88080BA_REG_BUCK3_CONF1		0x32
+#define	PV88080BA_REG_BUCK3_CONF2		0x6C
+#define	PV88080BA_REG_BUCK3_CONF5		0x6F
 
 /* PV88080_REG_EVENT_A (addr=0x04) */
 #define	PV88080_E_VDD_FLT				0x01
-#define	PV88080_E_OVER_TEMP			0x02
+#define	PV88080_E_OVER_TEMP				0x02
 
 /* PV88080_REG_MASK_A (addr=0x09) */
 #define	PV88080_M_VDD_FLT				0x01
-#define	PV88080_M_OVER_TEMP			0x02
+#define	PV88080_M_OVER_TEMP				0x02
 
-/* PV88080_REG_BUCK1_CONF0 (addr=0x27) */
+/* PV88080_REG_BUCK1_CONF0 (addr=0x27|0x2A) */
 #define	PV88080_BUCK1_EN				0x80
-#define PV88080_VBUCK1_MASK			0x7F
-/* PV88080_REG_BUCK2_CONF0 (addr=0x29) */
-#define	PV88080_BUCK2_EN				0x80
-#define PV88080_VBUCK2_MASK			0x7F
-/* PV88080_REG_BUCK3_CONF0 (addr=0x2b) */
-#define	PV88080_BUCK3_EN				0x80
-#define PV88080_VBUCK3_MASK			0x7F
+#define PV88080_VBUCK1_MASK				0x7F
 
-/* PV88080_REG_BUCK1_CONF1 (addr=0x28) */
-#define PV88080_BUCK1_ILIM_SHIFT			2
+/* PV88080_REG_BUCK2_CONF0 (addr=0x29|0x2D) */
+#define	PV88080_BUCK2_EN				0x80
+#define PV88080_VBUCK2_MASK				0x7F
+
+/* PV88080_REG_BUCK3_CONF0 (addr=0x2B|0x30) */
+#define	PV88080_BUCK3_EN				0x80
+#define PV88080_VBUCK3_MASK				0x7F
+
+/* PV88080_REG_BUCK1_CONF1 (addr=0x28|0x2C) */
+#define PV88080_BUCK1_ILIM_SHIFT		2
 #define PV88080_BUCK1_ILIM_MASK			0x0C
 #define PV88080_BUCK1_MODE_MASK			0x03
 
-/* PV88080_REG_BUCK2_CONF1 (addr=0x2a) */
-#define PV88080_BUCK2_ILIM_SHIFT			2
+/* PV88080_REG_BUCK2_CONF1 (addr=0x2A|0x2F) */
+#define PV88080_BUCK2_ILIM_SHIFT		2
 #define PV88080_BUCK2_ILIM_MASK			0x0C
 #define PV88080_BUCK2_MODE_MASK			0x03
 
-/* PV88080_REG_BUCK3_CONF1 (addr=0x2c) */
-#define PV88080_BUCK3_ILIM_SHIFT			2
+/* PV88080_REG_BUCK3_CONF1 (addr=0x2C|0x32) */
+#define PV88080_BUCK3_ILIM_SHIFT		2
 #define PV88080_BUCK3_ILIM_MASK			0x0C
 #define PV88080_BUCK3_MODE_MASK			0x03
 
@@ -73,20 +93,26 @@
 #define	PV88080_BUCK_MODE_AUTO			0x01
 #define	PV88080_BUCK_MODE_SYNC			0x02
 
-/* PV88080_REG_BUCK2_CONF2 (addr=0x61) */
-/* PV88080_REG_BUCK3_CONF2 (addr=0x69) */
-#define PV88080_BUCK_VDAC_RANGE_SHIFT			7
-#define PV88080_BUCK_VDAC_RANGE_MASK			0x01
+/* PV88080_REG_HVBUCK_CONF1 (addr=0x2D|0x33) */
+#define PV88080_VHVBUCK_MASK			0xFF
 
-#define PV88080_BUCK_VDAC_RANGE_1			0x00
-#define PV88080_BUCK_VDAC_RANGE_2			0x01
+/* PV88080_REG_HVBUCK_CONF1 (addr=0x2E|0x34) */
+#define PV88080_HVBUCK_EN				0x01
 
-/* PV88080_REG_BUCK2_CONF5 (addr=0x64) */
-/* PV88080_REG_BUCK3_CONF5 (addr=0x6c) */
-#define PV88080_BUCK_VRANGE_GAIN_SHIFT			0
-#define PV88080_BUCK_VRANGE_GAIN_MASK			0x01
+/* PV88080_REG_BUCK2_CONF2 (addr=0x61|0x63) */
+/* PV88080_REG_BUCK3_CONF2 (addr=0x69|0x6C) */
+#define PV88080_BUCK_VDAC_RANGE_SHIFT	7
+#define PV88080_BUCK_VDAC_RANGE_MASK	0x01
 
-#define PV88080_BUCK_VRANGE_GAIN_1			0x00
-#define PV88080_BUCK_VRANGE_GAIN_2			0x01
+#define PV88080_BUCK_VDAC_RANGE_1		0x00
+#define PV88080_BUCK_VDAC_RANGE_2		0x01
+
+/* PV88080_REG_BUCK2_CONF5 (addr=0x64|0x66) */
+/* PV88080_REG_BUCK3_CONF5 (addr=0x6C|0x6F) */
+#define PV88080_BUCK_VRANGE_GAIN_SHIFT	0
+#define PV88080_BUCK_VRANGE_GAIN_MASK	0x01
+
+#define PV88080_BUCK_VRANGE_GAIN_1		0x00
+#define PV88080_BUCK_VRANGE_GAIN_2		0x01
 
 #endif	/* __PV88080_REGISTERS_H__ */
diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c
index c2452424..1b88e0e1 100644
--- a/drivers/regulator/pwm-regulator.c
+++ b/drivers/regulator/pwm-regulator.c
@@ -10,7 +10,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/err.h>
@@ -194,12 +193,10 @@
 	unsigned int min_uV_duty = drvdata->continuous.min_uV_dutycycle;
 	unsigned int max_uV_duty = drvdata->continuous.max_uV_dutycycle;
 	unsigned int duty_unit = drvdata->continuous.dutycycle_unit;
-	unsigned int ramp_delay = rdev->constraints->ramp_delay;
 	int min_uV = rdev->constraints->min_uV;
 	int max_uV = rdev->constraints->max_uV;
 	int diff_uV = max_uV - min_uV;
 	struct pwm_state pstate;
-	int old_uV = pwm_regulator_get_voltage(rdev);
 	unsigned int diff_duty;
 	unsigned int dutycycle;
 	int ret;
@@ -233,13 +230,6 @@
 		return ret;
 	}
 
-	if ((ramp_delay == 0) || !pwm_regulator_is_enabled(rdev))
-		return 0;
-
-	/* Ramp delay is in uV/uS. Adjust to uS and delay */
-	ramp_delay = DIV_ROUND_UP(abs(req_min_uV - old_uV), ramp_delay);
-	usleep_range(ramp_delay, ramp_delay + DIV_ROUND_UP(ramp_delay, 10));
-
 	return 0;
 }
 
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 5022fa8..8ed46a9 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -178,20 +178,21 @@
 static const struct regulator_desc pma8084_ftsmps = {
 	.linear_ranges = (struct regulator_linear_range[]) {
 		REGULATOR_LINEAR_RANGE(350000,  0, 184, 5000),
-		REGULATOR_LINEAR_RANGE(700000, 185, 339, 10000),
+		REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
 	},
 	.n_linear_ranges = 2,
-	.n_voltages = 340,
+	.n_voltages = 262,
 	.ops = &rpm_smps_ldo_ops,
 };
 
 static const struct regulator_desc pma8084_pldo = {
 	.linear_ranges = (struct regulator_linear_range[]) {
-		REGULATOR_LINEAR_RANGE(750000,  0,  30, 25000),
-		REGULATOR_LINEAR_RANGE(1500000, 31, 99, 50000),
+		REGULATOR_LINEAR_RANGE( 750000,  0,  63, 12500),
+		REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
+		REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
 	},
-	.n_linear_ranges = 2,
-	.n_voltages = 100,
+	.n_linear_ranges = 3,
+	.n_voltages = 164,
 	.ops = &rpm_smps_ldo_ops,
 };
 
@@ -221,29 +222,30 @@
 static const struct regulator_desc pm8841_ftsmps = {
 	.linear_ranges = (struct regulator_linear_range[]) {
 		REGULATOR_LINEAR_RANGE(350000,  0, 184, 5000),
-		REGULATOR_LINEAR_RANGE(700000, 185, 339, 10000),
+		REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000),
 	},
 	.n_linear_ranges = 2,
-	.n_voltages = 340,
+	.n_voltages = 262,
 	.ops = &rpm_smps_ldo_ops,
 };
 
 static const struct regulator_desc pm8941_boost = {
 	.linear_ranges = (struct regulator_linear_range[]) {
-		REGULATOR_LINEAR_RANGE(4000000, 0, 15, 100000),
+		REGULATOR_LINEAR_RANGE(4000000, 0, 30, 50000),
 	},
 	.n_linear_ranges = 1,
-	.n_voltages = 16,
+	.n_voltages = 31,
 	.ops = &rpm_smps_ldo_ops,
 };
 
 static const struct regulator_desc pm8941_pldo = {
 	.linear_ranges = (struct regulator_linear_range[]) {
-		REGULATOR_LINEAR_RANGE( 750000,  0,  30, 25000),
-		REGULATOR_LINEAR_RANGE(1500000, 31, 99, 50000),
+		REGULATOR_LINEAR_RANGE( 750000,  0,  63, 12500),
+		REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000),
+		REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000),
 	},
-	.n_linear_ranges = 2,
-	.n_voltages = 100,
+	.n_linear_ranges = 3,
+	.n_voltages = 164,
 	.ops = &rpm_smps_ldo_ops,
 };
 
diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c
index 40d07ba0..a676749 100644
--- a/drivers/regulator/rk808-regulator.c
+++ b/drivers/regulator/rk808-regulator.c
@@ -533,8 +533,7 @@
 static struct platform_driver rk808_regulator_driver = {
 	.probe = rk808_regulator_probe,
 	.driver = {
-		.name = "rk808-regulator",
-		.owner = THIS_MODULE,
+		.name = "rk808-regulator"
 	},
 };
 
diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c
index d1e631d..eb0f5b1 100644
--- a/drivers/regulator/tps65218-regulator.c
+++ b/drivers/regulator/tps65218-regulator.c
@@ -180,6 +180,14 @@
 	if (rid < TPS65218_DCDC_1 || rid > TPS65218_LDO_1)
 		return -EINVAL;
 
+	/*
+	 * Certain revisions of TPS65218 will need to have DCDC3 regulator
+	 * enabled always, otherwise an immediate system reboot will occur
+	 * during poweroff.
+	 */
+	if (rid == TPS65218_DCDC_3 && tps->rev == TPS65218_REV_2_1)
+		return 0;
+
 	if (!tps->info[rid]->strobe) {
 		if (rid == TPS65218_DCDC_3)
 			tps->info[rid]->strobe = 3;
diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c
index fb991ec..696116e 100644
--- a/drivers/regulator/tps65910-regulator.c
+++ b/drivers/regulator/tps65910-regulator.c
@@ -1111,6 +1111,12 @@
 		pmic->num_regulators = ARRAY_SIZE(tps65910_regs);
 		pmic->ext_sleep_control = tps65910_ext_sleep_control;
 		info = tps65910_regs;
+		/* Work around silicon erratum SWCZ010: output programmed
+		 * voltage level can go higher than expected or crash
+		 * Workaround: use no synchronization of DCDC clocks
+		 */
+		tps65910_reg_clear_bits(pmic->mfd, TPS65910_DCDCCTRL,
+					DCDCCTRL_DCDCCKSYNC_MASK);
 		break;
 	case TPS65911:
 		pmic->get_ctrl_reg = &tps65911_get_ctrl_register;
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 8973d34..1de0890 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -212,16 +212,6 @@
 {
 	/* Disable extended error reporting for this device. */
 	dasd_eer_disable(device);
-	/* Forget the discipline information. */
-	if (device->discipline) {
-		if (device->discipline->uncheck_device)
-			device->discipline->uncheck_device(device);
-		module_put(device->discipline->owner);
-	}
-	device->discipline = NULL;
-	if (device->base_discipline)
-		module_put(device->base_discipline->owner);
-	device->base_discipline = NULL;
 	device->state = DASD_STATE_NEW;
 
 	if (device->block)
@@ -336,6 +326,7 @@
 {
 	int rc;
 	struct dasd_block *block;
+	struct gendisk *disk;
 
 	rc = 0;
 	block = device->block;
@@ -346,6 +337,9 @@
 		if (rc) {
 			if (rc != -EAGAIN) {
 				device->state = DASD_STATE_UNFMT;
+				disk = device->block->gdp;
+				kobject_uevent(&disk_to_dev(disk)->kobj,
+					       KOBJ_CHANGE);
 				goto out;
 			}
 			return rc;
@@ -1643,9 +1637,18 @@
 	u8 *sense = NULL;
 	int expires;
 
+	cqr = (struct dasd_ccw_req *) intparm;
 	if (IS_ERR(irb)) {
 		switch (PTR_ERR(irb)) {
 		case -EIO:
+			if (cqr && cqr->status == DASD_CQR_CLEAR_PENDING) {
+				device = (struct dasd_device *) cqr->startdev;
+				cqr->status = DASD_CQR_CLEARED;
+				dasd_device_clear_timer(device);
+				wake_up(&dasd_flush_wq);
+				dasd_schedule_device_bh(device);
+				return;
+			}
 			break;
 		case -ETIMEDOUT:
 			DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: "
@@ -1661,7 +1664,6 @@
 	}
 
 	now = get_tod_clock();
-	cqr = (struct dasd_ccw_req *) intparm;
 	/* check for conditions that should be handled immediately */
 	if (!cqr ||
 	    !(scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) &&
@@ -2266,6 +2268,15 @@
 			continue;
 		}
 		/*
+		 * Don't try to start requests if device is in
+		 * offline processing, it might wait forever
+		 */
+		if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
+			cqr->status = DASD_CQR_FAILED;
+			cqr->intrc = -ENODEV;
+			continue;
+		}
+		/*
 		 * Don't try to start requests if device is stopped
 		 * except path verification requests
 		 */
@@ -3356,6 +3367,22 @@
 }
 EXPORT_SYMBOL_GPL(dasd_generic_probe);
 
+void dasd_generic_free_discipline(struct dasd_device *device)
+{
+	/* Forget the discipline information. */
+	if (device->discipline) {
+		if (device->discipline->uncheck_device)
+			device->discipline->uncheck_device(device);
+		module_put(device->discipline->owner);
+		device->discipline = NULL;
+	}
+	if (device->base_discipline) {
+		module_put(device->base_discipline->owner);
+		device->base_discipline = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(dasd_generic_free_discipline);
+
 /*
  * This will one day be called from a global not_oper handler.
  * It is also used by driver_unregister during module unload.
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 3cdbce4..15a1a70 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -617,6 +617,7 @@
 	/* Wait for reference counter to drop to zero. */
 	wait_event(dasd_delete_wq, atomic_read(&device->ref_count) == 0);
 
+	dasd_generic_free_discipline(device);
 	/* Disconnect dasd_device structure from ccw_device structure. */
 	cdev = device->cdev;
 	device->cdev = NULL;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index fd2eff4..831935a 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -5078,6 +5078,8 @@
 		return PTR_ERR(cqr);
 	}
 
+	cqr->lpm = lpum;
+retry:
 	cqr->startdev = device;
 	cqr->memdev = device;
 	cqr->block = NULL;
@@ -5122,6 +5124,14 @@
 			(prssdp + 1);
 		memcpy(messages, message_buf,
 		       sizeof(struct dasd_rssd_messages));
+	} else if (cqr->lpm) {
+		/*
+		 * on z/VM we might not be able to do I/O on the requested path
+		 * but instead we get the required information on any path
+		 * so retry with open path mask
+		 */
+		cqr->lpm = 0;
+		goto retry;
 	} else
 		DBF_EVENT_DEVID(DBF_WARNING, device->cdev,
 				"Reading messages failed with rc=%d\n"
@@ -5191,7 +5201,7 @@
 
 	cqr->buildclk = get_tod_clock();
 	cqr->status = DASD_CQR_FILLED;
-	rc = dasd_sleep_on(cqr);
+	rc = dasd_sleep_on_interruptible(cqr);
 	if (rc == 0) {
 		*data = *host_access;
 	} else {
diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c
index e1e8848..d138d01 100644
--- a/drivers/s390/block/dasd_erp.c
+++ b/drivers/s390/block/dasd_erp.c
@@ -169,12 +169,12 @@
 	device = cqr->startdev;
 	if (cqr->intrc == -ETIMEDOUT) {
 		dev_err(&device->cdev->dev,
-			"A timeout error occurred for cqr %p", cqr);
+			"A timeout error occurred for cqr %p\n", cqr);
 		return;
 	}
 	if (cqr->intrc == -ENOLINK) {
 		dev_err(&device->cdev->dev,
-			"A transport error occurred for cqr %p", cqr);
+			"A transport error occurred for cqr %p\n", cqr);
 		return;
 	}
 	/* dump sense data */
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index ac7027e..87ff6ce 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -725,6 +725,7 @@
 int  dasd_cancel_req(struct dasd_ccw_req *);
 int dasd_flush_device_queue(struct dasd_device *);
 int dasd_generic_probe (struct ccw_device *, struct dasd_discipline *);
+void dasd_generic_free_discipline(struct dasd_device *);
 void dasd_generic_remove (struct ccw_device *cdev);
 int dasd_generic_set_online(struct ccw_device *, struct dasd_discipline *);
 int dasd_generic_set_offline (struct ccw_device *cdev);
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 6b1577c..285b400 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -124,7 +124,12 @@
 static void
 con3270_update_string(struct con3270 *cp, struct string *s, int nr)
 {
-	if (s->len >= cp->view.cols - 5)
+	if (s->len < 4) {
+		/* This indicates a bug, but printing a warning would
+		 * cause a deadlock. */
+		return;
+	}
+	if (s->string[s->len - 4] != TO_RA)
 		return;
 	raw3270_buffer_address(cp->view.dev, s->string + s->len - 3,
 			       cp->view.cols * (nr + 1));
@@ -460,11 +465,11 @@
 		cp->cline->len + 4 : cp->view.cols;
 	s = con3270_alloc_string(cp, size);
 	memcpy(s->string, cp->cline->string, cp->cline->len);
-	if (s->len < cp->view.cols - 5) {
+	if (cp->cline->len < cp->view.cols - 5) {
 		s->string[s->len - 4] = TO_RA;
 		s->string[s->len - 1] = 0;
 	} else {
-		while (--size > cp->cline->len)
+		while (--size >= cp->cline->len)
 			s->string[size] = cp->view.ascebc[' '];
 	}
 	/* Replace cline with allocated line s and reset cline. */
diff --git a/drivers/s390/char/sclp_ctl.c b/drivers/s390/char/sclp_ctl.c
index ea607a4..554eaa1 100644
--- a/drivers/s390/char/sclp_ctl.c
+++ b/drivers/s390/char/sclp_ctl.c
@@ -126,21 +126,4 @@
 	.name = "sclp",
 	.fops = &sclp_ctl_fops,
 };
-
-/*
- * Register sclp_ctl misc device
- */
-static int __init sclp_ctl_init(void)
-{
-	return misc_register(&sclp_ctl_device);
-}
-module_init(sclp_ctl_init);
-
-/*
- * Deregister sclp_ctl misc device
- */
-static void __exit sclp_ctl_exit(void)
-{
-	misc_deregister(&sclp_ctl_device);
-}
-module_exit(sclp_ctl_exit);
+module_misc_device(sclp_ctl_device);
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index d3d1936..e352047 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -312,15 +312,10 @@
 		return -ENOSYS;
 	if (!crypt_enabled(device))
 		return -EUNATCH;
-	ext_kekls = kmalloc(sizeof(*ext_kekls), GFP_KERNEL);
-	if (!ext_kekls)
-		return -ENOMEM;
-	if (copy_from_user(ext_kekls, (char __user *)arg, sizeof(*ext_kekls))) {
-		rc = -EFAULT;
-		goto out;
-	}
+	ext_kekls = memdup_user((char __user *)arg, sizeof(*ext_kekls));
+	if (IS_ERR(ext_kekls))
+		return PTR_ERR(ext_kekls);
 	rc = tape_3592_kekl_set(device, ext_kekls);
-out:
 	kfree(ext_kekls);
 	return rc;
 }
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 6c30e93a..ff18f37 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -306,10 +306,11 @@
 {
 	struct urdev *urd;
 
-	TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n",
-	      intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat,
-	      irb->scsw.cmd.count);
-
+	if (!IS_ERR(irb)) {
+		TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n",
+		      intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat,
+		      irb->scsw.cmd.count);
+	}
 	if (!intparm) {
 		TRACE("ur_int_handler: unsolicited interrupt\n");
 		return;
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 940e725..1167469 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -95,12 +95,13 @@
 int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd)
 {
 	struct chsc_ssd_area *ssd_area;
+	unsigned long flags;
 	int ccode;
 	int ret;
 	int i;
 	int mask;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	ssd_area = chsc_page;
 	ssd_area->request.length = 0x0010;
@@ -144,7 +145,7 @@
 			ssd->fla[i] = ssd_area->fla[i];
 	}
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -832,9 +833,10 @@
 		u32 fmt : 4;
 		u32 : 16;
 	} __attribute__ ((packed)) *secm_area;
+	unsigned long flags;
 	int ret, ccode;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	secm_area = chsc_page;
 	secm_area->request.length = 0x0050;
@@ -864,7 +866,7 @@
 		CIO_CRW_EVENT(2, "chsc: secm failed (rc=%04x)\n",
 			      secm_area->response.code);
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -992,6 +994,7 @@
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
+	unsigned long flags;
 	int ccode, ret;
 
 	struct {
@@ -1021,7 +1024,7 @@
 	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
 		return -EINVAL;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	scmc_area = chsc_page;
 	scmc_area->request.length = 0x0010;
@@ -1053,7 +1056,7 @@
 	chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
 				  (struct cmg_chars *) &scmc_area->data);
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -1134,6 +1137,7 @@
 int __init
 chsc_determine_css_characteristics(void)
 {
+	unsigned long flags;
 	int result;
 	struct {
 		struct chsc_header request;
@@ -1146,7 +1150,7 @@
 		u32 chsc_char[508];
 	} __attribute__ ((packed)) *scsc_area;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	scsc_area = chsc_page;
 	scsc_area->request.length = 0x0010;
@@ -1168,7 +1172,7 @@
 		CIO_CRW_EVENT(2, "chsc: scsc failed (rc=%04x)\n",
 			      scsc_area->response.code);
 exit:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return result;
 }
 
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 93de0b4..f0e57ae 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -127,7 +127,6 @@
 extern int cio_halt (struct subchannel *);
 extern int cio_start (struct subchannel *, struct ccw1 *, __u8);
 extern int cio_start_key (struct subchannel *, struct ccw1 *, __u8, __u8);
-extern int cio_cancel (struct subchannel *);
 extern int cio_set_options (struct subchannel *, int);
 extern int cio_update_schib(struct subchannel *sch);
 extern int cio_commit_config(struct subchannel *sch);
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 7ada078..6a58bc8 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -762,7 +762,6 @@
 	priv->state = DEV_STATE_NOT_OPER;
 	priv->dev_id.devno = sch->schib.pmcw.dev;
 	priv->dev_id.ssid = sch->schid.ssid;
-	priv->schid = sch->schid;
 
 	INIT_WORK(&priv->todo_work, ccw_device_todo);
 	INIT_LIST_HEAD(&priv->cmb_list);
@@ -1000,7 +999,6 @@
 	put_device(&old_sch->dev);
 	/* Initialize new subchannel. */
 	spin_lock_irq(sch->lock);
-	cdev->private->schid = sch->schid;
 	cdev->ccwlock = sch->lock;
 	if (!sch_is_pseudo_sch(sch))
 		sch_set_cdev(sch, cdev);
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index 15b56a1..9bc3512 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -26,6 +26,7 @@
 static void
 ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb)
 {
+	struct subchannel *sch = to_subchannel(cdev->dev.parent);
 	char dbf_text[15];
 
 	if (!scsw_is_valid_cstat(&irb->scsw) ||
@@ -36,10 +37,10 @@
 		      "received"
 		      " ... device %04x on subchannel 0.%x.%04x, dev_stat "
 		      ": %02X sch_stat : %02X\n",
-		      cdev->private->dev_id.devno, cdev->private->schid.ssid,
-		      cdev->private->schid.sch_no,
+		      cdev->private->dev_id.devno, sch->schid.ssid,
+		      sch->schid.sch_no,
 		      scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw));
-	sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no);
+	sprintf(dbf_text, "chk%x", sch->schid.sch_no);
 	CIO_TRACE_EVENT(0, dbf_text);
 	CIO_HEX_EVENT(0, irb, sizeof(struct irb));
 }
diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h
index 8975060..220f491 100644
--- a/drivers/s390/cio/io_sch.h
+++ b/drivers/s390/cio/io_sch.h
@@ -120,7 +120,6 @@
 	int state;		/* device state */
 	atomic_t onoff;
 	struct ccw_dev_id dev_id;	/* device id */
-	struct subchannel_id schid;	/* subchannel number */
 	struct ccw_request req;		/* internal I/O request */
 	int iretry;
 	u8 pgid_valid_mask;	/* mask of valid PGIDs */
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 4bb5262f..71bf9bd 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -686,6 +686,15 @@
 	q->qdio_error = 0;
 }
 
+static inline int qdio_tasklet_schedule(struct qdio_q *q)
+{
+	if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) {
+		tasklet_schedule(&q->tasklet);
+		return 0;
+	}
+	return -EPERM;
+}
+
 static void __qdio_inbound_processing(struct qdio_q *q)
 {
 	qperf_inc(q, tasklet_inbound);
@@ -698,10 +707,8 @@
 	if (!qdio_inbound_q_done(q)) {
 		/* means poll time is not yet over */
 		qperf_inc(q, tasklet_inbound_resched);
-		if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
-			tasklet_schedule(&q->tasklet);
+		if (!qdio_tasklet_schedule(q))
 			return;
-		}
 	}
 
 	qdio_stop_polling(q);
@@ -711,8 +718,7 @@
 	 */
 	if (!qdio_inbound_q_done(q)) {
 		qperf_inc(q, tasklet_inbound_resched2);
-		if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
-			tasklet_schedule(&q->tasklet);
+		qdio_tasklet_schedule(q);
 	}
 }
 
@@ -869,16 +875,15 @@
 	 * is noticed and outbound_handler is called after some time.
 	 */
 	if (qdio_outbound_q_done(q))
-		del_timer(&q->u.out.timer);
+		del_timer_sync(&q->u.out.timer);
 	else
-		if (!timer_pending(&q->u.out.timer))
+		if (!timer_pending(&q->u.out.timer) &&
+		    likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
 			mod_timer(&q->u.out.timer, jiffies + 10 * HZ);
 	return;
 
 sched:
-	if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
-		return;
-	tasklet_schedule(&q->tasklet);
+	qdio_tasklet_schedule(q);
 }
 
 /* outbound tasklet */
@@ -892,9 +897,7 @@
 {
 	struct qdio_q *q = (struct qdio_q *)data;
 
-	if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
-		return;
-	tasklet_schedule(&q->tasklet);
+	qdio_tasklet_schedule(q);
 }
 
 static inline void qdio_check_outbound_after_thinint(struct qdio_q *q)
@@ -907,7 +910,7 @@
 
 	for_each_output_queue(q->irq_ptr, out, i)
 		if (!qdio_outbound_q_done(out))
-			tasklet_schedule(&out->tasklet);
+			qdio_tasklet_schedule(out);
 }
 
 static void __tiqdio_inbound_processing(struct qdio_q *q)
@@ -929,10 +932,8 @@
 
 	if (!qdio_inbound_q_done(q)) {
 		qperf_inc(q, tasklet_inbound_resched);
-		if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) {
-			tasklet_schedule(&q->tasklet);
+		if (!qdio_tasklet_schedule(q))
 			return;
-		}
 	}
 
 	qdio_stop_polling(q);
@@ -942,8 +943,7 @@
 	 */
 	if (!qdio_inbound_q_done(q)) {
 		qperf_inc(q, tasklet_inbound_resched2);
-		if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED))
-			tasklet_schedule(&q->tasklet);
+		qdio_tasklet_schedule(q);
 	}
 }
 
@@ -977,7 +977,7 @@
 	int i;
 	struct qdio_q *q;
 
-	if (unlikely(irq_ptr->state == QDIO_IRQ_STATE_STOPPED))
+	if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE))
 		return;
 
 	for_each_input_queue(irq_ptr, q, i) {
@@ -1003,7 +1003,7 @@
 			continue;
 		if (need_siga_sync(q) && need_siga_sync_out_after_pci(q))
 			qdio_siga_sync_q(q);
-		tasklet_schedule(&q->tasklet);
+		qdio_tasklet_schedule(q);
 	}
 }
 
@@ -1066,10 +1066,12 @@
 		      struct irb *irb)
 {
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+	struct subchannel_id schid;
 	int cstat, dstat;
 
 	if (!intparm || !irq_ptr) {
-		DBF_ERROR("qint:%4x", cdev->private->schid.sch_no);
+		ccw_device_get_schid(cdev, &schid);
+		DBF_ERROR("qint:%4x", schid.sch_no);
 		return;
 	}
 
@@ -1122,12 +1124,14 @@
 int qdio_get_ssqd_desc(struct ccw_device *cdev,
 		       struct qdio_ssqd_desc *data)
 {
+	struct subchannel_id schid;
 
 	if (!cdev || !cdev->private)
 		return -EINVAL;
 
-	DBF_EVENT("get ssqd:%4x", cdev->private->schid.sch_no);
-	return qdio_setup_get_ssqd(NULL, &cdev->private->schid, data);
+	ccw_device_get_schid(cdev, &schid);
+	DBF_EVENT("get ssqd:%4x", schid.sch_no);
+	return qdio_setup_get_ssqd(NULL, &schid, data);
 }
 EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc);
 
@@ -1141,7 +1145,7 @@
 		tasklet_kill(&q->tasklet);
 
 	for_each_output_queue(irq_ptr, q, i) {
-		del_timer(&q->u.out.timer);
+		del_timer_sync(&q->u.out.timer);
 		tasklet_kill(&q->tasklet);
 	}
 }
@@ -1154,14 +1158,15 @@
 int qdio_shutdown(struct ccw_device *cdev, int how)
 {
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+	struct subchannel_id schid;
 	int rc;
-	unsigned long flags;
 
 	if (!irq_ptr)
 		return -ENODEV;
 
 	WARN_ON_ONCE(irqs_disabled());
-	DBF_EVENT("qshutdown:%4x", cdev->private->schid.sch_no);
+	ccw_device_get_schid(cdev, &schid);
+	DBF_EVENT("qshutdown:%4x", schid.sch_no);
 
 	mutex_lock(&irq_ptr->setup_mutex);
 	/*
@@ -1184,7 +1189,7 @@
 	qdio_shutdown_debug_entries(irq_ptr);
 
 	/* cleanup subchannel */
-	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+	spin_lock_irq(get_ccwdev_lock(cdev));
 
 	if (how & QDIO_FLAG_CLEANUP_USING_CLEAR)
 		rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP);
@@ -1198,12 +1203,12 @@
 	}
 
 	qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP);
-	spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+	spin_unlock_irq(get_ccwdev_lock(cdev));
 	wait_event_interruptible_timeout(cdev->private->wait_q,
 		irq_ptr->state == QDIO_IRQ_STATE_INACTIVE ||
 		irq_ptr->state == QDIO_IRQ_STATE_ERR,
 		10 * HZ);
-	spin_lock_irqsave(get_ccwdev_lock(cdev), flags);
+	spin_lock_irq(get_ccwdev_lock(cdev));
 
 no_cleanup:
 	qdio_shutdown_thinint(irq_ptr);
@@ -1211,7 +1216,7 @@
 	/* restore interrupt handler */
 	if ((void *)cdev->handler == (void *)qdio_int_handler)
 		cdev->handler = irq_ptr->orig_handler;
-	spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags);
+	spin_unlock_irq(get_ccwdev_lock(cdev));
 
 	qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE);
 	mutex_unlock(&irq_ptr->setup_mutex);
@@ -1228,11 +1233,13 @@
 int qdio_free(struct ccw_device *cdev)
 {
 	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
+	struct subchannel_id schid;
 
 	if (!irq_ptr)
 		return -ENODEV;
 
-	DBF_EVENT("qfree:%4x", cdev->private->schid.sch_no);
+	ccw_device_get_schid(cdev, &schid);
+	DBF_EVENT("qfree:%4x", schid.sch_no);
 	DBF_DEV_EVENT(DBF_ERR, irq_ptr, "dbf abandoned");
 	mutex_lock(&irq_ptr->setup_mutex);
 
@@ -1251,9 +1258,11 @@
  */
 int qdio_allocate(struct qdio_initialize *init_data)
 {
+	struct subchannel_id schid;
 	struct qdio_irq *irq_ptr;
 
-	DBF_EVENT("qallocate:%4x", init_data->cdev->private->schid.sch_no);
+	ccw_device_get_schid(init_data->cdev, &schid);
+	DBF_EVENT("qallocate:%4x", schid.sch_no);
 
 	if ((init_data->no_input_qs && !init_data->input_handler) ||
 	    (init_data->no_output_qs && !init_data->output_handler))
@@ -1331,20 +1340,18 @@
  */
 int qdio_establish(struct qdio_initialize *init_data)
 {
-	struct qdio_irq *irq_ptr;
 	struct ccw_device *cdev = init_data->cdev;
-	unsigned long saveflags;
+	struct subchannel_id schid;
+	struct qdio_irq *irq_ptr;
 	int rc;
 
-	DBF_EVENT("qestablish:%4x", cdev->private->schid.sch_no);
+	ccw_device_get_schid(cdev, &schid);
+	DBF_EVENT("qestablish:%4x", schid.sch_no);
 
 	irq_ptr = cdev->private->qdio_data;
 	if (!irq_ptr)
 		return -ENODEV;
 
-	if (cdev->private->state != DEV_STATE_ONLINE)
-		return -EINVAL;
-
 	mutex_lock(&irq_ptr->setup_mutex);
 	qdio_setup_irq(init_data);
 
@@ -1361,17 +1368,14 @@
 	irq_ptr->ccw.count = irq_ptr->equeue.count;
 	irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr);
 
-	spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+	spin_lock_irq(get_ccwdev_lock(cdev));
 	ccw_device_set_options_mask(cdev, 0);
 
 	rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0);
+	spin_unlock_irq(get_ccwdev_lock(cdev));
 	if (rc) {
 		DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no);
 		DBF_ERROR("rc:%4x", rc);
-	}
-	spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
-
-	if (rc) {
 		mutex_unlock(&irq_ptr->setup_mutex);
 		qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
 		return rc;
@@ -1407,19 +1411,17 @@
  */
 int qdio_activate(struct ccw_device *cdev)
 {
+	struct subchannel_id schid;
 	struct qdio_irq *irq_ptr;
 	int rc;
-	unsigned long saveflags;
 
-	DBF_EVENT("qactivate:%4x", cdev->private->schid.sch_no);
+	ccw_device_get_schid(cdev, &schid);
+	DBF_EVENT("qactivate:%4x", schid.sch_no);
 
 	irq_ptr = cdev->private->qdio_data;
 	if (!irq_ptr)
 		return -ENODEV;
 
-	if (cdev->private->state != DEV_STATE_ONLINE)
-		return -EINVAL;
-
 	mutex_lock(&irq_ptr->setup_mutex);
 	if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) {
 		rc = -EBUSY;
@@ -1431,19 +1433,17 @@
 	irq_ptr->ccw.count = irq_ptr->aqueue.count;
 	irq_ptr->ccw.cda = 0;
 
-	spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags);
+	spin_lock_irq(get_ccwdev_lock(cdev));
 	ccw_device_set_options(cdev, CCWDEV_REPORT_ALL);
 
 	rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE,
 			      0, DOIO_DENY_PREFETCH);
+	spin_unlock_irq(get_ccwdev_lock(cdev));
 	if (rc) {
 		DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no);
 		DBF_ERROR("rc:%4x", rc);
-	}
-	spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags);
-
-	if (rc)
 		goto out;
+	}
 
 	if (is_thinint_irq(irq_ptr))
 		tiqdio_add_input_queues(irq_ptr);
@@ -1585,10 +1585,11 @@
 
 	/* in case of SIGA errors we must process the error immediately */
 	if (used >= q->u.out.scan_threshold || rc)
-		tasklet_schedule(&q->tasklet);
+		qdio_tasklet_schedule(q);
 	else
 		/* free the SBALs in case of no further traffic */
-		if (!timer_pending(&q->u.out.timer))
+		if (!timer_pending(&q->u.out.timer) &&
+		    likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE))
 			mod_timer(&q->u.out.timer, jiffies + HZ);
 	return rc;
 }
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index bf40063..6d4b68c4 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -999,6 +999,7 @@
 						 __u16, __u16,
 						 enum qeth_prot_versions);
 int qeth_set_features(struct net_device *, netdev_features_t);
+int qeth_recover_features(struct net_device *);
 netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
 
 /* exports for OSN */
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 7dba6c8..20cf296 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3619,7 +3619,8 @@
 		int e;
 
 		e = 0;
-		while (buffer->element[e].addr) {
+		while ((e < QDIO_MAX_ELEMENTS_PER_BUFFER) &&
+		       buffer->element[e].addr) {
 			unsigned long phys_aob_addr;
 
 			phys_aob_addr = (unsigned long) buffer->element[e].addr;
@@ -6131,6 +6132,35 @@
 	return rc;
 }
 
+/* try to restore device features on a device after recovery */
+int qeth_recover_features(struct net_device *dev)
+{
+	struct qeth_card *card = dev->ml_priv;
+	netdev_features_t recover = dev->features;
+
+	if (recover & NETIF_F_IP_CSUM) {
+		if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM))
+			recover ^= NETIF_F_IP_CSUM;
+	}
+	if (recover & NETIF_F_RXCSUM) {
+		if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM))
+			recover ^= NETIF_F_RXCSUM;
+	}
+	if (recover & NETIF_F_TSO) {
+		if (qeth_set_ipa_tso(card, 1))
+			recover ^= NETIF_F_TSO;
+	}
+
+	if (recover == dev->features)
+		return 0;
+
+	dev_warn(&card->gdev->dev,
+		 "Device recovery failed to restore all offload features\n");
+	dev->features = recover;
+	return -EIO;
+}
+EXPORT_SYMBOL_GPL(qeth_recover_features);
+
 int qeth_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct qeth_card *card = dev->ml_priv;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 7bc20c5..bb27058 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -1124,14 +1124,11 @@
 			card->dev->hw_features |= NETIF_F_RXCSUM;
 			card->dev->vlan_features |= NETIF_F_RXCSUM;
 		}
-		/* Turn on SG per default */
-		card->dev->features |= NETIF_F_SG;
 	}
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
 	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
 				  PAGE_SIZE;
-	card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1);
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_l2_poll, QETH_NAPI_WEIGHT);
 	netif_carrier_off(card->dev);
@@ -1246,6 +1243,9 @@
 		}
 		/* this also sets saved unicast addresses */
 		qeth_l2_set_rx_mode(card->dev);
+		rtnl_lock();
+		qeth_recover_features(card->dev);
+		rtnl_unlock();
 	}
 	/* let user_space know that device is online */
 	kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 7293466..272d9e7 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -257,6 +257,11 @@
 	if (addr->in_progress)
 		return -EINPROGRESS;
 
+	if (!qeth_card_hw_is_reachable(card)) {
+		addr->disp_flag = QETH_DISP_ADDR_DELETE;
+		return 0;
+	}
+
 	rc = qeth_l3_deregister_addr_entry(card, addr);
 
 	hash_del(&addr->hnode);
@@ -296,6 +301,11 @@
 		hash_add(card->ip_htable, &addr->hnode,
 				qeth_l3_ipaddr_hash(addr));
 
+		if (!qeth_card_hw_is_reachable(card)) {
+			addr->disp_flag = QETH_DISP_ADDR_ADD;
+			return 0;
+		}
+
 		/* qeth_l3_register_addr_entry can go to sleep
 		 * if we add a IPV4 addr. It is caused by the reason
 		 * that SETIP ipa cmd starts ARP staff for IPV4 addr.
@@ -390,12 +400,16 @@
 	int i;
 	int rc;
 
-	QETH_CARD_TEXT(card, 4, "recoverip");
+	QETH_CARD_TEXT(card, 4, "recovrip");
 
 	spin_lock_bh(&card->ip_lock);
 
 	hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
-		if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
+		if (addr->disp_flag == QETH_DISP_ADDR_DELETE) {
+			qeth_l3_deregister_addr_entry(card, addr);
+			hash_del(&addr->hnode);
+			kfree(addr);
+		} else if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
 			if (addr->proto == QETH_PROT_IPV4) {
 				addr->in_progress = 1;
 				spin_unlock_bh(&card->ip_lock);
@@ -407,10 +421,8 @@
 
 			if (!rc) {
 				addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
-				if (addr->ref_counter < 1) {
+				if (addr->ref_counter < 1)
 					qeth_l3_delete_ip(card, addr);
-					kfree(addr);
-				}
 			} else {
 				hash_del(&addr->hnode);
 				kfree(addr);
@@ -689,7 +701,7 @@
 
 	spin_lock_bh(&card->ip_lock);
 
-	if (!qeth_l3_ip_from_hash(card, ipaddr))
+	if (qeth_l3_ip_from_hash(card, ipaddr))
 		rc = -EEXIST;
 	else
 		qeth_l3_add_ip(card, ipaddr);
@@ -757,7 +769,7 @@
 
 	spin_lock_bh(&card->ip_lock);
 
-	if (!qeth_l3_ip_from_hash(card, ipaddr))
+	if (qeth_l3_ip_from_hash(card, ipaddr))
 		rc = -EEXIST;
 	else
 		qeth_l3_add_ip(card, ipaddr);
@@ -3108,7 +3120,6 @@
 				card->dev->vlan_features = NETIF_F_SG |
 					NETIF_F_RXCSUM | NETIF_F_IP_CSUM |
 					NETIF_F_TSO;
-				card->dev->features = NETIF_F_SG;
 			}
 		}
 	} else if (card->info.type == QETH_CARD_TYPE_IQD) {
@@ -3136,7 +3147,6 @@
 	netif_keep_dst(card->dev);
 	card->dev->gso_max_size = (QETH_MAX_BUFFER_ELEMENTS(card) - 1) *
 				  PAGE_SIZE;
-	card->dev->gso_max_segs = (QETH_MAX_BUFFER_ELEMENTS(card) - 1);
 
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
 	netif_napi_add(card->dev, &card->napi, qeth_l3_poll, QETH_NAPI_WEIGHT);
@@ -3269,6 +3279,7 @@
 		else
 			dev_open(card->dev);
 		qeth_l3_set_multicast_list(card->dev);
+		qeth_recover_features(card->dev);
 		rtnl_unlock();
 	}
 	qeth_trace_features(card);
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index 65645b1..0e00a5c 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -297,7 +297,9 @@
 		addr->u.a6.pfxlen = 0;
 		addr->type = QETH_IP_TYPE_NORMAL;
 
+		spin_lock_bh(&card->ip_lock);
 		qeth_l3_delete_ip(card, addr);
+		spin_unlock_bh(&card->ip_lock);
 		kfree(addr);
 	}
 
@@ -329,7 +331,10 @@
 		addr->type = QETH_IP_TYPE_NORMAL;
 	} else
 		return -ENOMEM;
+
+	spin_lock_bh(&card->ip_lock);
 	qeth_l3_add_ip(card, addr);
+	spin_unlock_bh(&card->ip_lock);
 	kfree(addr);
 
 	return count;
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index c00ac46..bcc8f3d 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -310,7 +310,7 @@
 
 	snprintf(name, sizeof(name), "zfcp_q_%s",
 		 dev_name(&adapter->ccw_device->dev));
-	adapter->work_queue = create_singlethread_workqueue(name);
+	adapter->work_queue = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
 
 	if (adapter->work_queue)
 		return 0;
diff --git a/drivers/s390/virtio/Makefile b/drivers/s390/virtio/Makefile
index 241891a..df40692 100644
--- a/drivers/s390/virtio/Makefile
+++ b/drivers/s390/virtio/Makefile
@@ -6,4 +6,8 @@
 # it under the terms of the GNU General Public License (version 2 only)
 # as published by the Free Software Foundation.
 
-obj-$(CONFIG_S390_GUEST) += kvm_virtio.o virtio_ccw.o
+s390-virtio-objs := virtio_ccw.o
+ifdef CONFIG_S390_GUEST_OLD_TRANSPORT
+s390-virtio-objs += kvm_virtio.o
+endif
+obj-$(CONFIG_S390_GUEST) += $(s390-virtio-objs)
diff --git a/drivers/s390/virtio/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c
index 1d060fd..5e5c11f 100644
--- a/drivers/s390/virtio/kvm_virtio.c
+++ b/drivers/s390/virtio/kvm_virtio.c
@@ -458,6 +458,8 @@
 	if (test_devices_support(total_memory_size) < 0)
 		return -ENODEV;
 
+	pr_warn("The s390-virtio transport is deprecated. Please switch to a modern host providing virtio-ccw.\n");
+
 	rc = vmem_add_mapping(total_memory_size, PAGE_SIZE);
 	if (rc)
 		return rc;
@@ -482,7 +484,7 @@
 }
 
 /* code for early console output with virtio_console */
-static __init int early_put_chars(u32 vtermno, const char *buf, int count)
+static int early_put_chars(u32 vtermno, const char *buf, int count)
 {
 	char scratch[17];
 	unsigned int len = count;
diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c
index b381b371..5648b71 100644
--- a/drivers/scsi/aacraid/commctrl.c
+++ b/drivers/scsi/aacraid/commctrl.c
@@ -63,7 +63,7 @@
 	struct fib *fibptr;
 	struct hw_fib * hw_fib = (struct hw_fib *)0;
 	dma_addr_t hw_fib_pa = (dma_addr_t)0LL;
-	unsigned size;
+	unsigned int size, osize;
 	int retval;
 
 	if (dev->in_reset) {
@@ -87,7 +87,8 @@
 	 *	will not overrun the buffer when we copy the memory. Return
 	 *	an error if we would.
 	 */
-	size = le16_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr);
+	osize = size = le16_to_cpu(kfib->header.Size) +
+		sizeof(struct aac_fibhdr);
 	if (size < le16_to_cpu(kfib->header.SenderSize))
 		size = le16_to_cpu(kfib->header.SenderSize);
 	if (size > dev->max_fib_size) {
@@ -118,6 +119,14 @@
 		goto cleanup;
 	}
 
+	/* Sanity check the second copy */
+	if ((osize != le16_to_cpu(kfib->header.Size) +
+		sizeof(struct aac_fibhdr))
+		|| (size < le16_to_cpu(kfib->header.SenderSize))) {
+		retval = -EINVAL;
+		goto cleanup;
+	}
+
 	if (kfib->header.Command == cpu_to_le16(TakeABreakPt)) {
 		aac_adapter_interrupt(dev);
 		/*
diff --git a/drivers/scsi/constants.c b/drivers/scsi/constants.c
index 83458f7..6dc96c8 100644
--- a/drivers/scsi/constants.c
+++ b/drivers/scsi/constants.c
@@ -361,8 +361,9 @@
 
 /* Get sense key string or NULL if not available */
 const char *
-scsi_sense_key_string(unsigned char key) {
-	if (key <= 0xE)
+scsi_sense_key_string(unsigned char key)
+{
+	if (key < ARRAY_SIZE(snstext))
 		return snstext[key];
 	return NULL;
 }
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index e4ba2d2..7c0d7af 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -84,6 +84,9 @@
 
 static const struct cxgb4_uld_info cxgb4i_uld_info = {
 	.name = DRV_MODULE_NAME,
+	.nrxq = MAX_ULD_QSETS,
+	.rxq_size = 1024,
+	.lro = false,
 	.add = t4_uld_add,
 	.rx_handler = t4_uld_rx_handler,
 	.state_change = t4_uld_state_change,
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index a569c65..dcf3653 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -2923,7 +2923,7 @@
 	mutex_unlock(&fip->ctlr_mutex);
 
 drop:
-	kfree(skb);
+	kfree_skb(skb);
 	return rc;
 }
 
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index ba9af4a..ec6381e 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -486,6 +486,8 @@
 	else
 		shost->dma_boundary = 0xffffffff;
 
+	shost->use_blk_mq = scsi_use_blk_mq;
+
 	device_initialize(&shost->shost_gendev);
 	dev_set_name(&shost->shost_gendev, "host%d", shost->host_no);
 	shost->shost_gendev.bus = &scsi_bus_type;
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index bf85974..17d04c7 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -10410,8 +10410,11 @@
 		__ipr_remove(pdev);
 		return rc;
 	}
+	spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
+	ioa_cfg->scan_enabled = 1;
+	schedule_work(&ioa_cfg->work_q);
+	spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
 
-	scsi_scan_host(ioa_cfg->host);
 	ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight;
 
 	if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
@@ -10421,10 +10424,8 @@
 		}
 	}
 
-	spin_lock_irqsave(ioa_cfg->host->host_lock, flags);
-	ioa_cfg->scan_enabled = 1;
-	schedule_work(&ioa_cfg->work_q);
-	spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
+	scsi_scan_host(ioa_cfg->host);
+
 	return 0;
 }
 
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 2dab3dc..c1ed25a 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -5037,7 +5037,7 @@
 	/* Find first memory bar */
 	bar_list = pci_select_bars(instance->pdev, IORESOURCE_MEM);
 	instance->bar = find_first_bit(&bar_list, sizeof(unsigned long));
-	if (pci_request_selected_regions(instance->pdev, instance->bar,
+	if (pci_request_selected_regions(instance->pdev, 1<<instance->bar,
 					 "megasas: LSI")) {
 		dev_printk(KERN_DEBUG, &instance->pdev->dev, "IO memory region busy!\n");
 		return -EBUSY;
@@ -5339,7 +5339,7 @@
 	iounmap(instance->reg_set);
 
       fail_ioremap:
-	pci_release_selected_regions(instance->pdev, instance->bar);
+	pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 
 	return -EINVAL;
 }
@@ -5360,7 +5360,7 @@
 
 	iounmap(instance->reg_set);
 
-	pci_release_selected_regions(instance->pdev, instance->bar);
+	pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 }
 
 /**
diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index ec83754..52d8bbf 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -2603,7 +2603,7 @@
 
 	iounmap(instance->reg_set);
 
-	pci_release_selected_regions(instance->pdev, instance->bar);
+	pci_release_selected_regions(instance->pdev, 1<<instance->bar);
 }
 
 /**
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 751f13e..750f82c 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -2188,6 +2188,17 @@
 	} else
 		ioc->msix96_vector = 0;
 
+	if (ioc->is_warpdrive) {
+		ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
+		    &ioc->chip->ReplyPostHostIndex;
+
+		for (i = 1; i < ioc->cpu_msix_table_sz; i++)
+			ioc->reply_post_host_index[i] =
+			(resource_size_t __iomem *)
+			((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
+			* 4)));
+	}
+
 	list_for_each_entry(reply_q, &ioc->reply_queue_list, list)
 		pr_info(MPT3SAS_FMT "%s: IRQ %d\n",
 		    reply_q->name,  ((ioc->msix_enable) ? "PCI-MSI-X enabled" :
@@ -5280,17 +5291,6 @@
 	if (r)
 		goto out_free_resources;
 
-	if (ioc->is_warpdrive) {
-		ioc->reply_post_host_index[0] = (resource_size_t __iomem *)
-		    &ioc->chip->ReplyPostHostIndex;
-
-		for (i = 1; i < ioc->cpu_msix_table_sz; i++)
-			ioc->reply_post_host_index[i] =
-			(resource_size_t __iomem *)
-			((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1)
-			* 4)));
-	}
-
 	pci_set_drvdata(ioc->pdev, ioc->shost);
 	r = _base_get_ioc_facts(ioc, CAN_SLEEP);
 	if (r)
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1f36aca..1deb6ad 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1160,7 +1160,6 @@
 bool scsi_use_blk_mq = false;
 #endif
 module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO);
-EXPORT_SYMBOL_GPL(scsi_use_blk_mq);
 
 static int __init init_scsi(void)
 {
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index eaccd65..2464569 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -246,6 +246,10 @@
 	{"IBM", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
 	{"SUN", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
 	{"DELL", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+	{"STK", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+	{"NETAPP", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+	{"LSI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
+	{"ENGENIO", "Universal Xport", "*", BLIST_NO_ULD_ATTACH},
 	{"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36},
 	{"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN},
 	{"SONY", "TSL", NULL, BLIST_FORCELUN},		/* DDS3 & DDS4 autoloaders */
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 57a4b99..85c8a51 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -29,6 +29,7 @@
 extern void scsi_exit_hosts(void);
 
 /* scsi.c */
+extern bool scsi_use_blk_mq;
 extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
 extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
 #ifdef CONFIG_SCSI_LOGGING
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 3f0ff07..60b651b 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -341,22 +341,6 @@
 }
 
 /**
- * is_sas_attached - check if device is SAS attached
- * @sdev: scsi device to check
- *
- * returns true if the device is SAS attached
- */
-int is_sas_attached(struct scsi_device *sdev)
-{
-	struct Scsi_Host *shost = sdev->host;
-
-	return shost->transportt->host_attrs.ac.class ==
-		&sas_host_class.class;
-}
-EXPORT_SYMBOL(is_sas_attached);
-
-
-/**
  * sas_remove_children  -  tear down a devices SAS data structures
  * @dev:	device belonging to the sas object
  *
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index 53ef1cb..8c9a35c 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -587,7 +587,7 @@
 
 	ses_enclosure_data_process(edev, to_scsi_device(edev->edev.parent), 0);
 
-	if (is_sas_attached(sdev))
+	if (scsi_is_sas_rphy(&sdev->sdev_gendev))
 		efd.addr = sas_get_address(sdev);
 
 	if (efd.addr) {
@@ -778,6 +778,8 @@
 	if (!edev)
 		return;
 
+	enclosure_unregister(edev);
+
 	ses_dev = edev->scratch;
 	edev->scratch = NULL;
 
@@ -789,7 +791,6 @@
 	kfree(edev->component[0].scratch);
 
 	put_device(&edev->edev);
-	enclosure_unregister(edev);
 }
 
 static void ses_intf_remove(struct device *cdev,
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 7dbbb29..deefab3 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -107,8 +107,8 @@
 	/* If the affinity hint is set for virtqueues */
 	bool affinity_hint_set;
 
-	/* CPU hotplug notifier */
-	struct notifier_block nb;
+	struct hlist_node node;
+	struct hlist_node node_dead;
 
 	/* Protected by event_vq lock */
 	bool stop_events;
@@ -118,6 +118,7 @@
 	struct virtio_scsi_vq req_vqs[];
 };
 
+static enum cpuhp_state virtioscsi_online;
 static struct kmem_cache *virtscsi_cmd_cache;
 static mempool_t *virtscsi_cmd_pool;
 
@@ -852,21 +853,33 @@
 	put_online_cpus();
 }
 
-static int virtscsi_cpu_callback(struct notifier_block *nfb,
-				 unsigned long action, void *hcpu)
+static int virtscsi_cpu_online(unsigned int cpu, struct hlist_node *node)
 {
-	struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb);
-	switch(action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		__virtscsi_set_affinity(vscsi, true);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
+	struct virtio_scsi *vscsi = hlist_entry_safe(node, struct virtio_scsi,
+						     node);
+	__virtscsi_set_affinity(vscsi, true);
+	return 0;
+}
+
+static int virtscsi_cpu_notif_add(struct virtio_scsi *vi)
+{
+	int ret;
+
+	ret = cpuhp_state_add_instance(virtioscsi_online, &vi->node);
+	if (ret)
+		return ret;
+
+	ret = cpuhp_state_add_instance(CPUHP_VIRT_SCSI_DEAD, &vi->node_dead);
+	if (ret)
+		cpuhp_state_remove_instance(virtioscsi_online, &vi->node);
+	return ret;
+}
+
+static void virtscsi_cpu_notif_remove(struct virtio_scsi *vi)
+{
+	cpuhp_state_remove_instance_nocalls(virtioscsi_online, &vi->node);
+	cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_SCSI_DEAD,
+					    &vi->node_dead);
 }
 
 static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
@@ -929,8 +942,6 @@
 		virtscsi_init_vq(&vscsi->req_vqs[i - VIRTIO_SCSI_VQ_BASE],
 				 vqs[i]);
 
-	virtscsi_set_affinity(vscsi, true);
-
 	virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
 	virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
 
@@ -987,12 +998,9 @@
 	if (err)
 		goto virtscsi_init_failed;
 
-	vscsi->nb.notifier_call = &virtscsi_cpu_callback;
-	err = register_hotcpu_notifier(&vscsi->nb);
-	if (err) {
-		pr_err("registering cpu notifier failed\n");
+	err = virtscsi_cpu_notif_add(vscsi);
+	if (err)
 		goto scsi_add_host_failed;
-	}
 
 	cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
 	shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
@@ -1049,7 +1057,7 @@
 
 	scsi_remove_host(shost);
 
-	unregister_hotcpu_notifier(&vscsi->nb);
+	virtscsi_cpu_notif_remove(vscsi);
 
 	virtscsi_remove_vqs(vdev);
 	scsi_host_put(shost);
@@ -1061,7 +1069,7 @@
 	struct Scsi_Host *sh = virtio_scsi_host(vdev);
 	struct virtio_scsi *vscsi = shost_priv(sh);
 
-	unregister_hotcpu_notifier(&vscsi->nb);
+	virtscsi_cpu_notif_remove(vscsi);
 	virtscsi_remove_vqs(vdev);
 	return 0;
 }
@@ -1076,12 +1084,11 @@
 	if (err)
 		return err;
 
-	err = register_hotcpu_notifier(&vscsi->nb);
+	err = virtscsi_cpu_notif_add(vscsi);
 	if (err) {
 		vdev->config->del_vqs(vdev);
 		return err;
 	}
-
 	virtio_device_ready(vdev);
 
 	if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1136,6 +1143,16 @@
 		pr_err("mempool_create() for virtscsi_cmd_pool failed\n");
 		goto error;
 	}
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+				      "scsi/virtio:online",
+				      virtscsi_cpu_online, NULL);
+	if (ret < 0)
+		goto error;
+	virtioscsi_online = ret;
+	ret = cpuhp_setup_state_multi(CPUHP_VIRT_SCSI_DEAD, "scsi/virtio:dead",
+				      NULL, virtscsi_cpu_online);
+	if (ret)
+		goto error;
 	ret = register_virtio_driver(&virtio_scsi_driver);
 	if (ret < 0)
 		goto error;
@@ -1151,12 +1168,17 @@
 		kmem_cache_destroy(virtscsi_cmd_cache);
 		virtscsi_cmd_cache = NULL;
 	}
+	if (virtioscsi_online)
+		cpuhp_remove_multi_state(virtioscsi_online);
+	cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
 	return ret;
 }
 
 static void __exit fini(void)
 {
 	unregister_virtio_driver(&virtio_scsi_driver);
+	cpuhp_remove_multi_state(virtioscsi_online);
+	cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
 	mempool_destroy(virtscsi_cmd_pool);
 	kmem_cache_destroy(virtscsi_cmd_cache);
 }
diff --git a/drivers/scsi/wd719x.c b/drivers/scsi/wd719x.c
index e3da1a2..2a9da2e 100644
--- a/drivers/scsi/wd719x.c
+++ b/drivers/scsi/wd719x.c
@@ -962,7 +962,7 @@
 	scsi_host_put(sh);
 }
 
-static DEFINE_PCI_DEVICE_TABLE(wd719x_pci_table) = {
+static const struct pci_device_id wd719x_pci_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_WD, 0x3296) },
 	{}
 };
diff --git a/drivers/soc/samsung/pm_domains.c b/drivers/soc/samsung/pm_domains.c
index 4822346..7112004 100644
--- a/drivers/soc/samsung/pm_domains.c
+++ b/drivers/soc/samsung/pm_domains.c
@@ -215,29 +215,22 @@
 
 	/* Assign the child power domains to their parents */
 	for_each_matching_node(np, exynos_pm_domain_of_match) {
-		struct generic_pm_domain *child_domain, *parent_domain;
-		struct of_phandle_args args;
+		struct of_phandle_args child, parent;
 
-		args.np = np;
-		args.args_count = 0;
-		child_domain = of_genpd_get_from_provider(&args);
-		if (IS_ERR(child_domain))
-			continue;
+		child.np = np;
+		child.args_count = 0;
 
 		if (of_parse_phandle_with_args(np, "power-domains",
-					 "#power-domain-cells", 0, &args) != 0)
+					       "#power-domain-cells", 0,
+					       &parent) != 0)
 			continue;
 
-		parent_domain = of_genpd_get_from_provider(&args);
-		if (IS_ERR(parent_domain))
-			continue;
-
-		if (pm_genpd_add_subdomain(parent_domain, child_domain))
+		if (of_genpd_add_subdomain(&parent, &child))
 			pr_warn("%s failed to add subdomain: %s\n",
-				parent_domain->name, child_domain->name);
+				parent.np->name, child.np->name);
 		else
 			pr_info("%s has as child subdomain: %s.\n",
-				parent_domain->name, child_domain->name);
+				parent.np->name, child.np->name);
 	}
 
 	return 0;
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index d6fb8d4..b799547 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -153,6 +153,16 @@
 	  This enables support for the High Speed SPI controller present on
 	  newer Broadcom BCM63XX SoCs.
 
+config SPI_BCM_QSPI
+	tristate "Broadcom BSPI and MSPI controller support"
+	depends on ARCH_BRCMSTB || ARCH_BCM || ARCH_BCM_IPROC || COMPILE_TEST
+	default ARCH_BCM_IPROC
+	help
+	  Enables support for the Broadcom SPI flash and MSPI controller.
+	  Select this option for any one of BRCMSTB, iProc NSP and NS2 SoCs
+	  based platforms. This driver works for both SPI master for spi-nor
+	  flash device as well as MSPI device.
+
 config SPI_BITBANG
 	tristate "Utilities for Bitbanging SPI masters"
 	help
@@ -285,6 +295,13 @@
 	  This enables using the Freescale i.MX SPI controllers in master
 	  mode.
 
+config SPI_JCORE
+	tristate "J-Core SPI Master"
+	depends on OF && (SUPERH || COMPILE_TEST)
+	help
+	  This enables support for the SPI master controller in the J-Core
+	  synthesizable, open source SoC.
+
 config SPI_LM70_LLP
 	tristate "Parallel port adapter for LM70 eval board (DEVELOPMENT)"
 	depends on PARPORT
@@ -549,7 +566,7 @@
 config SPI_SH_MSIOF
 	tristate "SuperH MSIOF SPI controller"
 	depends on HAVE_CLK && HAS_DMA
-	depends on SUPERH || ARCH_RENESAS || COMPILE_TEST
+	depends on ARCH_SHMOBILE || ARCH_RENESAS || COMPILE_TEST
 	help
 	  SPI driver for SuperH and SH Mobile MSIOF blocks.
 
@@ -631,6 +648,13 @@
 	help
 	  SPI driver for Nvidia Tegra20/Tegra30 SLINK Controller interface.
 
+config SPI_THUNDERX
+	tristate "Cavium ThunderX SPI controller"
+	depends on PCI && 64BIT && (ARM64 || COMPILE_TEST)
+	help
+	  SPI host driver for the hardware found on Cavium ThunderX
+	  SOCs.
+
 config SPI_TOPCLIFF_PCH
 	tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) SPI"
 	depends on PCI && (X86_32 || MIPS || COMPILE_TEST)
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index 185367e..aa939d9 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -21,6 +21,7 @@
 obj-$(CONFIG_SPI_BCM53XX)		+= spi-bcm53xx.o
 obj-$(CONFIG_SPI_BCM63XX)		+= spi-bcm63xx.o
 obj-$(CONFIG_SPI_BCM63XX_HSSPI)		+= spi-bcm63xx-hsspi.o
+obj-$(CONFIG_SPI_BCM_QSPI)		+= spi-iproc-qspi.o spi-brcmstb-qspi.o spi-bcm-qspi.o
 obj-$(CONFIG_SPI_BFIN5XX)		+= spi-bfin5xx.o
 obj-$(CONFIG_SPI_ADI_V3)                += spi-adi-v3.o
 obj-$(CONFIG_SPI_BFIN_SPORT)		+= spi-bfin-sport.o
@@ -46,6 +47,7 @@
 obj-$(CONFIG_SPI_GPIO)			+= spi-gpio.o
 obj-$(CONFIG_SPI_IMG_SPFI)		+= spi-img-spfi.o
 obj-$(CONFIG_SPI_IMX)			+= spi-imx.o
+obj-$(CONFIG_SPI_JCORE)			+= spi-jcore.o
 obj-$(CONFIG_SPI_LM70_LLP)		+= spi-lm70llp.o
 obj-$(CONFIG_SPI_LP8841_RTC)		+= spi-lp8841-rtc.o
 obj-$(CONFIG_SPI_MESON_SPIFC)		+= spi-meson-spifc.o
@@ -91,6 +93,8 @@
 obj-$(CONFIG_SPI_TEGRA20_SFLASH)	+= spi-tegra20-sflash.o
 obj-$(CONFIG_SPI_TEGRA20_SLINK)		+= spi-tegra20-slink.o
 obj-$(CONFIG_SPI_TLE62X0)		+= spi-tle62x0.o
+spi-thunderx-objs			:= spi-cavium.o spi-cavium-thunderx.o
+obj-$(CONFIG_SPI_THUNDERX)		+= spi-thunderx.o
 obj-$(CONFIG_SPI_TOPCLIFF_PCH)		+= spi-topcliff-pch.o
 obj-$(CONFIG_SPI_TXX9)			+= spi-txx9.o
 obj-$(CONFIG_SPI_XCOMM)		+= spi-xcomm.o
diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
new file mode 100644
index 0000000..14f9dea
--- /dev/null
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -0,0 +1,1397 @@
+/*
+ * Driver for Broadcom BRCMSTB, NSP,  NS2, Cygnus SPI Controllers
+ *
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mtd/spi-nor.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+#include "spi-bcm-qspi.h"
+
+#define DRIVER_NAME "bcm_qspi"
+
+
+/* BSPI register offsets */
+#define BSPI_REVISION_ID			0x000
+#define BSPI_SCRATCH				0x004
+#define BSPI_MAST_N_BOOT_CTRL			0x008
+#define BSPI_BUSY_STATUS			0x00c
+#define BSPI_INTR_STATUS			0x010
+#define BSPI_B0_STATUS				0x014
+#define BSPI_B0_CTRL				0x018
+#define BSPI_B1_STATUS				0x01c
+#define BSPI_B1_CTRL				0x020
+#define BSPI_STRAP_OVERRIDE_CTRL		0x024
+#define BSPI_FLEX_MODE_ENABLE			0x028
+#define BSPI_BITS_PER_CYCLE			0x02c
+#define BSPI_BITS_PER_PHASE			0x030
+#define BSPI_CMD_AND_MODE_BYTE			0x034
+#define BSPI_BSPI_FLASH_UPPER_ADDR_BYTE	0x038
+#define BSPI_BSPI_XOR_VALUE			0x03c
+#define BSPI_BSPI_XOR_ENABLE			0x040
+#define BSPI_BSPI_PIO_MODE_ENABLE		0x044
+#define BSPI_BSPI_PIO_IODIR			0x048
+#define BSPI_BSPI_PIO_DATA			0x04c
+
+/* RAF register offsets */
+#define BSPI_RAF_START_ADDR			0x100
+#define BSPI_RAF_NUM_WORDS			0x104
+#define BSPI_RAF_CTRL				0x108
+#define BSPI_RAF_FULLNESS			0x10c
+#define BSPI_RAF_WATERMARK			0x110
+#define BSPI_RAF_STATUS			0x114
+#define BSPI_RAF_READ_DATA			0x118
+#define BSPI_RAF_WORD_CNT			0x11c
+#define BSPI_RAF_CURR_ADDR			0x120
+
+/* Override mode masks */
+#define BSPI_STRAP_OVERRIDE_CTRL_OVERRIDE	BIT(0)
+#define BSPI_STRAP_OVERRIDE_CTRL_DATA_DUAL	BIT(1)
+#define BSPI_STRAP_OVERRIDE_CTRL_ADDR_4BYTE	BIT(2)
+#define BSPI_STRAP_OVERRIDE_CTRL_DATA_QUAD	BIT(3)
+#define BSPI_STRAP_OVERRIDE_CTRL_ENDAIN_MODE	BIT(4)
+
+#define BSPI_ADDRLEN_3BYTES			3
+#define BSPI_ADDRLEN_4BYTES			4
+
+#define BSPI_RAF_STATUS_FIFO_EMPTY_MASK	BIT(1)
+
+#define BSPI_RAF_CTRL_START_MASK		BIT(0)
+#define BSPI_RAF_CTRL_CLEAR_MASK		BIT(1)
+
+#define BSPI_BPP_MODE_SELECT_MASK		BIT(8)
+#define BSPI_BPP_ADDR_SELECT_MASK		BIT(16)
+
+#define BSPI_READ_LENGTH			256
+
+/* MSPI register offsets */
+#define MSPI_SPCR0_LSB				0x000
+#define MSPI_SPCR0_MSB				0x004
+#define MSPI_SPCR1_LSB				0x008
+#define MSPI_SPCR1_MSB				0x00c
+#define MSPI_NEWQP				0x010
+#define MSPI_ENDQP				0x014
+#define MSPI_SPCR2				0x018
+#define MSPI_MSPI_STATUS			0x020
+#define MSPI_CPTQP				0x024
+#define MSPI_SPCR3				0x028
+#define MSPI_TXRAM				0x040
+#define MSPI_RXRAM				0x0c0
+#define MSPI_CDRAM				0x140
+#define MSPI_WRITE_LOCK			0x180
+
+#define MSPI_MASTER_BIT			BIT(7)
+
+#define MSPI_NUM_CDRAM				16
+#define MSPI_CDRAM_CONT_BIT			BIT(7)
+#define MSPI_CDRAM_BITSE_BIT			BIT(6)
+#define MSPI_CDRAM_PCS				0xf
+
+#define MSPI_SPCR2_SPE				BIT(6)
+#define MSPI_SPCR2_CONT_AFTER_CMD		BIT(7)
+
+#define MSPI_MSPI_STATUS_SPIF			BIT(0)
+
+#define INTR_BASE_BIT_SHIFT			0x02
+#define INTR_COUNT				0x07
+
+#define NUM_CHIPSELECT				4
+#define QSPI_SPBR_MIN				8U
+#define QSPI_SPBR_MAX				255U
+
+#define OPCODE_DIOR				0xBB
+#define OPCODE_QIOR				0xEB
+#define OPCODE_DIOR_4B				0xBC
+#define OPCODE_QIOR_4B				0xEC
+
+#define MAX_CMD_SIZE				6
+
+#define ADDR_4MB_MASK				GENMASK(22, 0)
+
+/* stop at end of transfer, no other reason */
+#define TRANS_STATUS_BREAK_NONE		0
+/* stop at end of spi_message */
+#define TRANS_STATUS_BREAK_EOM			1
+/* stop at end of spi_transfer if delay */
+#define TRANS_STATUS_BREAK_DELAY		2
+/* stop at end of spi_transfer if cs_change */
+#define TRANS_STATUS_BREAK_CS_CHANGE		4
+/* stop if we run out of bytes */
+#define TRANS_STATUS_BREAK_NO_BYTES		8
+
+/* events that make us stop filling TX slots */
+#define TRANS_STATUS_BREAK_TX (TRANS_STATUS_BREAK_EOM |		\
+			       TRANS_STATUS_BREAK_DELAY |		\
+			       TRANS_STATUS_BREAK_CS_CHANGE)
+
+/* events that make us deassert CS */
+#define TRANS_STATUS_BREAK_DESELECT (TRANS_STATUS_BREAK_EOM |		\
+				     TRANS_STATUS_BREAK_CS_CHANGE)
+
+struct bcm_qspi_parms {
+	u32 speed_hz;
+	u8 mode;
+	u8 bits_per_word;
+};
+
+struct bcm_xfer_mode {
+	bool flex_mode;
+	unsigned int width;
+	unsigned int addrlen;
+	unsigned int hp;
+};
+
+enum base_type {
+	MSPI,
+	BSPI,
+	CHIP_SELECT,
+	BASEMAX,
+};
+
+enum irq_source {
+	SINGLE_L2,
+	MUXED_L1,
+};
+
+struct bcm_qspi_irq {
+	const char *irq_name;
+	const irq_handler_t irq_handler;
+	int irq_source;
+	u32 mask;
+};
+
+struct bcm_qspi_dev_id {
+	const struct bcm_qspi_irq *irqp;
+	void *dev;
+};
+
+struct qspi_trans {
+	struct spi_transfer *trans;
+	int byte;
+};
+
+struct bcm_qspi {
+	struct platform_device *pdev;
+	struct spi_master *master;
+	struct clk *clk;
+	u32 base_clk;
+	u32 max_speed_hz;
+	void __iomem *base[BASEMAX];
+
+	/* Some SoCs provide custom interrupt status register(s) */
+	struct bcm_qspi_soc_intc	*soc_intc;
+
+	struct bcm_qspi_parms last_parms;
+	struct qspi_trans  trans_pos;
+	int curr_cs;
+	int bspi_maj_rev;
+	int bspi_min_rev;
+	int bspi_enabled;
+	struct spi_flash_read_message *bspi_rf_msg;
+	u32 bspi_rf_msg_idx;
+	u32 bspi_rf_msg_len;
+	u32 bspi_rf_msg_status;
+	struct bcm_xfer_mode xfer_mode;
+	u32 s3_strap_override_ctrl;
+	bool bspi_mode;
+	bool big_endian;
+	int num_irqs;
+	struct bcm_qspi_dev_id *dev_ids;
+	struct completion mspi_done;
+	struct completion bspi_done;
+};
+
+static inline bool has_bspi(struct bcm_qspi *qspi)
+{
+	return qspi->bspi_mode;
+}
+
+/* Read qspi controller register*/
+static inline u32 bcm_qspi_read(struct bcm_qspi *qspi, enum base_type type,
+				unsigned int offset)
+{
+	return bcm_qspi_readl(qspi->big_endian, qspi->base[type] + offset);
+}
+
+/* Write qspi controller register*/
+static inline void bcm_qspi_write(struct bcm_qspi *qspi, enum base_type type,
+				  unsigned int offset, unsigned int data)
+{
+	bcm_qspi_writel(qspi->big_endian, data, qspi->base[type] + offset);
+}
+
+/* BSPI helpers */
+static int bcm_qspi_bspi_busy_poll(struct bcm_qspi *qspi)
+{
+	int i;
+
+	/* this should normally finish within 10us */
+	for (i = 0; i < 1000; i++) {
+		if (!(bcm_qspi_read(qspi, BSPI, BSPI_BUSY_STATUS) & 1))
+			return 0;
+		udelay(1);
+	}
+	dev_warn(&qspi->pdev->dev, "timeout waiting for !busy_status\n");
+	return -EIO;
+}
+
+static inline bool bcm_qspi_bspi_ver_three(struct bcm_qspi *qspi)
+{
+	if (qspi->bspi_maj_rev < 4)
+		return true;
+	return false;
+}
+
+static void bcm_qspi_bspi_flush_prefetch_buffers(struct bcm_qspi *qspi)
+{
+	bcm_qspi_bspi_busy_poll(qspi);
+	/* Force rising edge for the b0/b1 'flush' field */
+	bcm_qspi_write(qspi, BSPI, BSPI_B0_CTRL, 1);
+	bcm_qspi_write(qspi, BSPI, BSPI_B1_CTRL, 1);
+	bcm_qspi_write(qspi, BSPI, BSPI_B0_CTRL, 0);
+	bcm_qspi_write(qspi, BSPI, BSPI_B1_CTRL, 0);
+}
+
+static int bcm_qspi_bspi_lr_is_fifo_empty(struct bcm_qspi *qspi)
+{
+	return (bcm_qspi_read(qspi, BSPI, BSPI_RAF_STATUS) &
+				BSPI_RAF_STATUS_FIFO_EMPTY_MASK);
+}
+
+static inline u32 bcm_qspi_bspi_lr_read_fifo(struct bcm_qspi *qspi)
+{
+	u32 data = bcm_qspi_read(qspi, BSPI, BSPI_RAF_READ_DATA);
+
+	/* BSPI v3 LR is LE only, convert data to host endianness */
+	if (bcm_qspi_bspi_ver_three(qspi))
+		data = le32_to_cpu(data);
+
+	return data;
+}
+
+static inline void bcm_qspi_bspi_lr_start(struct bcm_qspi *qspi)
+{
+	bcm_qspi_bspi_busy_poll(qspi);
+	bcm_qspi_write(qspi, BSPI, BSPI_RAF_CTRL,
+		       BSPI_RAF_CTRL_START_MASK);
+}
+
+static inline void bcm_qspi_bspi_lr_clear(struct bcm_qspi *qspi)
+{
+	bcm_qspi_write(qspi, BSPI, BSPI_RAF_CTRL,
+		       BSPI_RAF_CTRL_CLEAR_MASK);
+	bcm_qspi_bspi_flush_prefetch_buffers(qspi);
+}
+
+static void bcm_qspi_bspi_lr_data_read(struct bcm_qspi *qspi)
+{
+	u32 *buf = (u32 *)qspi->bspi_rf_msg->buf;
+	u32 data = 0;
+
+	dev_dbg(&qspi->pdev->dev, "xfer %p rx %p rxlen %d\n", qspi->bspi_rf_msg,
+		qspi->bspi_rf_msg->buf, qspi->bspi_rf_msg_len);
+	while (!bcm_qspi_bspi_lr_is_fifo_empty(qspi)) {
+		data = bcm_qspi_bspi_lr_read_fifo(qspi);
+		if (likely(qspi->bspi_rf_msg_len >= 4) &&
+		    IS_ALIGNED((uintptr_t)buf, 4)) {
+			buf[qspi->bspi_rf_msg_idx++] = data;
+			qspi->bspi_rf_msg_len -= 4;
+		} else {
+			/* Read out remaining bytes, make sure*/
+			u8 *cbuf = (u8 *)&buf[qspi->bspi_rf_msg_idx];
+
+			data = cpu_to_le32(data);
+			while (qspi->bspi_rf_msg_len) {
+				*cbuf++ = (u8)data;
+				data >>= 8;
+				qspi->bspi_rf_msg_len--;
+			}
+		}
+	}
+}
+
+static void bcm_qspi_bspi_set_xfer_params(struct bcm_qspi *qspi, u8 cmd_byte,
+					  int bpp, int bpc, int flex_mode)
+{
+	bcm_qspi_write(qspi, BSPI, BSPI_FLEX_MODE_ENABLE, 0);
+	bcm_qspi_write(qspi, BSPI, BSPI_BITS_PER_CYCLE, bpc);
+	bcm_qspi_write(qspi, BSPI, BSPI_BITS_PER_PHASE, bpp);
+	bcm_qspi_write(qspi, BSPI, BSPI_CMD_AND_MODE_BYTE, cmd_byte);
+	bcm_qspi_write(qspi, BSPI, BSPI_FLEX_MODE_ENABLE, flex_mode);
+}
+
+static int bcm_qspi_bspi_set_flex_mode(struct bcm_qspi *qspi, int width,
+				       int addrlen, int hp)
+{
+	int bpc = 0, bpp = 0;
+	u8 command = SPINOR_OP_READ_FAST;
+	int flex_mode = 1, rv = 0;
+	bool spans_4byte = false;
+
+	dev_dbg(&qspi->pdev->dev, "set flex mode w %x addrlen %x hp %d\n",
+		width, addrlen, hp);
+
+	if (addrlen == BSPI_ADDRLEN_4BYTES) {
+		bpp = BSPI_BPP_ADDR_SELECT_MASK;
+		spans_4byte = true;
+	}
+
+	bpp |= 8;
+
+	switch (width) {
+	case SPI_NBITS_SINGLE:
+		if (addrlen == BSPI_ADDRLEN_3BYTES)
+			/* default mode, does not need flex_cmd */
+			flex_mode = 0;
+		else
+			command = SPINOR_OP_READ4_FAST;
+		break;
+	case SPI_NBITS_DUAL:
+		bpc = 0x00000001;
+		if (hp) {
+			bpc |= 0x00010100; /* address and mode are 2-bit */
+			bpp = BSPI_BPP_MODE_SELECT_MASK;
+			command = OPCODE_DIOR;
+			if (spans_4byte)
+				command = OPCODE_DIOR_4B;
+		} else {
+			command = SPINOR_OP_READ_1_1_2;
+			if (spans_4byte)
+				command = SPINOR_OP_READ4_1_1_2;
+		}
+		break;
+	case SPI_NBITS_QUAD:
+		bpc = 0x00000002;
+		if (hp) {
+			bpc |= 0x00020200; /* address and mode are 4-bit */
+			bpp = 4; /* dummy cycles */
+			bpp |= BSPI_BPP_ADDR_SELECT_MASK;
+			command = OPCODE_QIOR;
+			if (spans_4byte)
+				command = OPCODE_QIOR_4B;
+		} else {
+			command = SPINOR_OP_READ_1_1_4;
+			if (spans_4byte)
+				command = SPINOR_OP_READ4_1_1_4;
+		}
+		break;
+	default:
+		rv = -EINVAL;
+		break;
+	}
+
+	if (rv == 0)
+		bcm_qspi_bspi_set_xfer_params(qspi, command, bpp, bpc,
+					      flex_mode);
+
+	return rv;
+}
+
+static int bcm_qspi_bspi_set_override(struct bcm_qspi *qspi, int width,
+				      int addrlen, int hp)
+{
+	u32 data = bcm_qspi_read(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL);
+
+	dev_dbg(&qspi->pdev->dev, "set override mode w %x addrlen %x hp %d\n",
+		width, addrlen, hp);
+
+	switch (width) {
+	case SPI_NBITS_SINGLE:
+		/* clear quad/dual mode */
+		data &= ~(BSPI_STRAP_OVERRIDE_CTRL_DATA_QUAD |
+			  BSPI_STRAP_OVERRIDE_CTRL_DATA_DUAL);
+		break;
+
+	case SPI_NBITS_QUAD:
+		/* clear dual mode and set quad mode */
+		data &= ~BSPI_STRAP_OVERRIDE_CTRL_DATA_DUAL;
+		data |= BSPI_STRAP_OVERRIDE_CTRL_DATA_QUAD;
+		break;
+	case SPI_NBITS_DUAL:
+		/* clear quad mode set dual mode */
+		data &= ~BSPI_STRAP_OVERRIDE_CTRL_DATA_QUAD;
+		data |= BSPI_STRAP_OVERRIDE_CTRL_DATA_DUAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (addrlen == BSPI_ADDRLEN_4BYTES)
+		/* set 4byte mode*/
+		data |= BSPI_STRAP_OVERRIDE_CTRL_ADDR_4BYTE;
+	else
+		/* clear 4 byte mode */
+		data &= ~BSPI_STRAP_OVERRIDE_CTRL_ADDR_4BYTE;
+
+	/* set the override mode */
+	data |=	BSPI_STRAP_OVERRIDE_CTRL_OVERRIDE;
+	bcm_qspi_write(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL, data);
+	bcm_qspi_bspi_set_xfer_params(qspi, SPINOR_OP_READ_FAST, 0, 0, 0);
+
+	return 0;
+}
+
+static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi,
+				  int width, int addrlen, int hp)
+{
+	int error = 0;
+
+	/* default mode */
+	qspi->xfer_mode.flex_mode = true;
+
+	if (!bcm_qspi_bspi_ver_three(qspi)) {
+		u32 val, mask;
+
+		val = bcm_qspi_read(qspi, BSPI, BSPI_STRAP_OVERRIDE_CTRL);
+		mask = BSPI_STRAP_OVERRIDE_CTRL_OVERRIDE;
+		if (val & mask || qspi->s3_strap_override_ctrl & mask) {
+			qspi->xfer_mode.flex_mode = false;
+			bcm_qspi_write(qspi, BSPI, BSPI_FLEX_MODE_ENABLE,
+				       0);
+
+			if ((val | qspi->s3_strap_override_ctrl) &
+			    BSPI_STRAP_OVERRIDE_CTRL_DATA_DUAL)
+				width = SPI_NBITS_DUAL;
+			else if ((val |  qspi->s3_strap_override_ctrl) &
+				 BSPI_STRAP_OVERRIDE_CTRL_DATA_QUAD)
+				width = SPI_NBITS_QUAD;
+
+			error = bcm_qspi_bspi_set_override(qspi, width, addrlen,
+							   hp);
+		}
+	}
+
+	if (qspi->xfer_mode.flex_mode)
+		error = bcm_qspi_bspi_set_flex_mode(qspi, width, addrlen, hp);
+
+	if (error) {
+		dev_warn(&qspi->pdev->dev,
+			 "INVALID COMBINATION: width=%d addrlen=%d hp=%d\n",
+			 width, addrlen, hp);
+	} else if (qspi->xfer_mode.width != width ||
+		   qspi->xfer_mode.addrlen != addrlen ||
+		   qspi->xfer_mode.hp != hp) {
+		qspi->xfer_mode.width = width;
+		qspi->xfer_mode.addrlen = addrlen;
+		qspi->xfer_mode.hp = hp;
+		dev_dbg(&qspi->pdev->dev,
+			"cs:%d %d-lane output, %d-byte address%s\n",
+			qspi->curr_cs,
+			qspi->xfer_mode.width,
+			qspi->xfer_mode.addrlen,
+			qspi->xfer_mode.hp != -1 ? ", hp mode" : "");
+	}
+
+	return error;
+}
+
+static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi)
+{
+	if (!has_bspi(qspi) || (qspi->bspi_enabled))
+		return;
+
+	qspi->bspi_enabled = 1;
+	if ((bcm_qspi_read(qspi, BSPI, BSPI_MAST_N_BOOT_CTRL) & 1) == 0)
+		return;
+
+	bcm_qspi_bspi_flush_prefetch_buffers(qspi);
+	udelay(1);
+	bcm_qspi_write(qspi, BSPI, BSPI_MAST_N_BOOT_CTRL, 0);
+	udelay(1);
+}
+
+static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi)
+{
+	if (!has_bspi(qspi) || (!qspi->bspi_enabled))
+		return;
+
+	qspi->bspi_enabled = 0;
+	if ((bcm_qspi_read(qspi, BSPI, BSPI_MAST_N_BOOT_CTRL) & 1))
+		return;
+
+	bcm_qspi_bspi_busy_poll(qspi);
+	bcm_qspi_write(qspi, BSPI, BSPI_MAST_N_BOOT_CTRL, 1);
+	udelay(1);
+}
+
+static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs)
+{
+	u32 data = 0;
+
+	if (qspi->curr_cs == cs)
+		return;
+	if (qspi->base[CHIP_SELECT]) {
+		data = bcm_qspi_read(qspi, CHIP_SELECT, 0);
+		data = (data & ~0xff) | (1 << cs);
+		bcm_qspi_write(qspi, CHIP_SELECT, 0, data);
+		usleep_range(10, 20);
+	}
+	qspi->curr_cs = cs;
+}
+
+/* MSPI helpers */
+static void bcm_qspi_hw_set_parms(struct bcm_qspi *qspi,
+				  const struct bcm_qspi_parms *xp)
+{
+	u32 spcr, spbr = 0;
+
+	if (xp->speed_hz)
+		spbr = qspi->base_clk / (2 * xp->speed_hz);
+
+	spcr = clamp_val(spbr, QSPI_SPBR_MIN, QSPI_SPBR_MAX);
+	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_LSB, spcr);
+
+	spcr = MSPI_MASTER_BIT;
+	/* for 16 bit the data should be zero */
+	if (xp->bits_per_word != 16)
+		spcr |= xp->bits_per_word << 2;
+	spcr |= xp->mode & 3;
+	bcm_qspi_write(qspi, MSPI, MSPI_SPCR0_MSB, spcr);
+
+	qspi->last_parms = *xp;
+}
+
+static void bcm_qspi_update_parms(struct bcm_qspi *qspi,
+				  struct spi_device *spi,
+				  struct spi_transfer *trans)
+{
+	struct bcm_qspi_parms xp;
+
+	xp.speed_hz = trans->speed_hz;
+	xp.bits_per_word = trans->bits_per_word;
+	xp.mode = spi->mode;
+
+	bcm_qspi_hw_set_parms(qspi, &xp);
+}
+
+static int bcm_qspi_setup(struct spi_device *spi)
+{
+	struct bcm_qspi_parms *xp;
+
+	if (spi->bits_per_word > 16)
+		return -EINVAL;
+
+	xp = spi_get_ctldata(spi);
+	if (!xp) {
+		xp = kzalloc(sizeof(*xp), GFP_KERNEL);
+		if (!xp)
+			return -ENOMEM;
+		spi_set_ctldata(spi, xp);
+	}
+	xp->speed_hz = spi->max_speed_hz;
+	xp->mode = spi->mode;
+
+	if (spi->bits_per_word)
+		xp->bits_per_word = spi->bits_per_word;
+	else
+		xp->bits_per_word = 8;
+
+	return 0;
+}
+
+static int update_qspi_trans_byte_count(struct bcm_qspi *qspi,
+					struct qspi_trans *qt, int flags)
+{
+	int ret = TRANS_STATUS_BREAK_NONE;
+
+	/* count the last transferred bytes */
+	if (qt->trans->bits_per_word <= 8)
+		qt->byte++;
+	else
+		qt->byte += 2;
+
+	if (qt->byte >= qt->trans->len) {
+		/* we're at the end of the spi_transfer */
+
+		/* in TX mode, need to pause for a delay or CS change */
+		if (qt->trans->delay_usecs &&
+		    (flags & TRANS_STATUS_BREAK_DELAY))
+			ret |= TRANS_STATUS_BREAK_DELAY;
+		if (qt->trans->cs_change &&
+		    (flags & TRANS_STATUS_BREAK_CS_CHANGE))
+			ret |= TRANS_STATUS_BREAK_CS_CHANGE;
+		if (ret)
+			goto done;
+
+		dev_dbg(&qspi->pdev->dev, "advance msg exit\n");
+		if (spi_transfer_is_last(qspi->master, qt->trans))
+			ret = TRANS_STATUS_BREAK_EOM;
+		else
+			ret = TRANS_STATUS_BREAK_NO_BYTES;
+
+		qt->trans = NULL;
+	}
+
+done:
+	dev_dbg(&qspi->pdev->dev, "trans %p len %d byte %d ret %x\n",
+		qt->trans, qt->trans ? qt->trans->len : 0, qt->byte, ret);
+	return ret;
+}
+
+static inline u8 read_rxram_slot_u8(struct bcm_qspi *qspi, int slot)
+{
+	u32 slot_offset = MSPI_RXRAM + (slot << 3) + 0x4;
+
+	/* mask out reserved bits */
+	return bcm_qspi_read(qspi, MSPI, slot_offset) & 0xff;
+}
+
+static inline u16 read_rxram_slot_u16(struct bcm_qspi *qspi, int slot)
+{
+	u32 reg_offset = MSPI_RXRAM;
+	u32 lsb_offset = reg_offset + (slot << 3) + 0x4;
+	u32 msb_offset = reg_offset + (slot << 3);
+
+	return (bcm_qspi_read(qspi, MSPI, lsb_offset) & 0xff) |
+		((bcm_qspi_read(qspi, MSPI, msb_offset) & 0xff) << 8);
+}
+
+static void read_from_hw(struct bcm_qspi *qspi, int slots)
+{
+	struct qspi_trans tp;
+	int slot;
+
+	bcm_qspi_disable_bspi(qspi);
+
+	if (slots > MSPI_NUM_CDRAM) {
+		/* should never happen */
+		dev_err(&qspi->pdev->dev, "%s: too many slots!\n", __func__);
+		return;
+	}
+
+	tp = qspi->trans_pos;
+
+	for (slot = 0; slot < slots; slot++) {
+		if (tp.trans->bits_per_word <= 8) {
+			u8 *buf = tp.trans->rx_buf;
+
+			if (buf)
+				buf[tp.byte] = read_rxram_slot_u8(qspi, slot);
+			dev_dbg(&qspi->pdev->dev, "RD %02x\n",
+				buf ? buf[tp.byte] : 0xff);
+		} else {
+			u16 *buf = tp.trans->rx_buf;
+
+			if (buf)
+				buf[tp.byte / 2] = read_rxram_slot_u16(qspi,
+								      slot);
+			dev_dbg(&qspi->pdev->dev, "RD %04x\n",
+				buf ? buf[tp.byte] : 0xffff);
+		}
+
+		update_qspi_trans_byte_count(qspi, &tp,
+					     TRANS_STATUS_BREAK_NONE);
+	}
+
+	qspi->trans_pos = tp;
+}
+
+static inline void write_txram_slot_u8(struct bcm_qspi *qspi, int slot,
+				       u8 val)
+{
+	u32 reg_offset = MSPI_TXRAM + (slot << 3);
+
+	/* mask out reserved bits */
+	bcm_qspi_write(qspi, MSPI, reg_offset, val);
+}
+
+static inline void write_txram_slot_u16(struct bcm_qspi *qspi, int slot,
+					u16 val)
+{
+	u32 reg_offset = MSPI_TXRAM;
+	u32 msb_offset = reg_offset + (slot << 3);
+	u32 lsb_offset = reg_offset + (slot << 3) + 0x4;
+
+	bcm_qspi_write(qspi, MSPI, msb_offset, (val >> 8));
+	bcm_qspi_write(qspi, MSPI, lsb_offset, (val & 0xff));
+}
+
+static inline u32 read_cdram_slot(struct bcm_qspi *qspi, int slot)
+{
+	return bcm_qspi_read(qspi, MSPI, MSPI_CDRAM + (slot << 2));
+}
+
+static inline void write_cdram_slot(struct bcm_qspi *qspi, int slot, u32 val)
+{
+	bcm_qspi_write(qspi, MSPI, (MSPI_CDRAM + (slot << 2)), val);
+}
+
+/* Return number of slots written */
+static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi)
+{
+	struct qspi_trans tp;
+	int slot = 0, tstatus = 0;
+	u32 mspi_cdram = 0;
+
+	bcm_qspi_disable_bspi(qspi);
+	tp = qspi->trans_pos;
+	bcm_qspi_update_parms(qspi, spi, tp.trans);
+
+	/* Run until end of transfer or reached the max data */
+	while (!tstatus && slot < MSPI_NUM_CDRAM) {
+		if (tp.trans->bits_per_word <= 8) {
+			const u8 *buf = tp.trans->tx_buf;
+			u8 val = buf ? buf[tp.byte] : 0xff;
+
+			write_txram_slot_u8(qspi, slot, val);
+			dev_dbg(&qspi->pdev->dev, "WR %02x\n", val);
+		} else {
+			const u16 *buf = tp.trans->tx_buf;
+			u16 val = buf ? buf[tp.byte / 2] : 0xffff;
+
+			write_txram_slot_u16(qspi, slot, val);
+			dev_dbg(&qspi->pdev->dev, "WR %04x\n", val);
+		}
+		mspi_cdram = MSPI_CDRAM_CONT_BIT;
+		mspi_cdram |= (~(1 << spi->chip_select) &
+			       MSPI_CDRAM_PCS);
+		mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 :
+				MSPI_CDRAM_BITSE_BIT);
+
+		write_cdram_slot(qspi, slot, mspi_cdram);
+
+		tstatus = update_qspi_trans_byte_count(qspi, &tp,
+						       TRANS_STATUS_BREAK_TX);
+		slot++;
+	}
+
+	if (!slot) {
+		dev_err(&qspi->pdev->dev, "%s: no data to send?", __func__);
+		goto done;
+	}
+
+	dev_dbg(&qspi->pdev->dev, "submitting %d slots\n", slot);
+	bcm_qspi_write(qspi, MSPI, MSPI_NEWQP, 0);
+	bcm_qspi_write(qspi, MSPI, MSPI_ENDQP, slot - 1);
+
+	if (tstatus & TRANS_STATUS_BREAK_DESELECT) {
+		mspi_cdram = read_cdram_slot(qspi, slot - 1) &
+			~MSPI_CDRAM_CONT_BIT;
+		write_cdram_slot(qspi, slot - 1, mspi_cdram);
+	}
+
+	if (has_bspi(qspi))
+		bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 1);
+
+	/* Must flush previous writes before starting MSPI operation */
+	mb();
+	/* Set cont | spe | spifie */
+	bcm_qspi_write(qspi, MSPI, MSPI_SPCR2, 0xe0);
+
+done:
+	return slot;
+}
+
+static int bcm_qspi_bspi_flash_read(struct spi_device *spi,
+				    struct spi_flash_read_message *msg)
+{
+	struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
+	u32 addr = 0, len, len_words;
+	int ret = 0;
+	unsigned long timeo = msecs_to_jiffies(100);
+	struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
+
+	if (bcm_qspi_bspi_ver_three(qspi))
+		if (msg->addr_width == BSPI_ADDRLEN_4BYTES)
+			return -EIO;
+
+	bcm_qspi_chip_select(qspi, spi->chip_select);
+	bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 0);
+
+	/*
+	 * when using flex mode mode we need to send
+	 * the upper address byte to bspi
+	 */
+	if (bcm_qspi_bspi_ver_three(qspi) == false) {
+		addr = msg->from & 0xff000000;
+		bcm_qspi_write(qspi, BSPI,
+			       BSPI_BSPI_FLASH_UPPER_ADDR_BYTE, addr);
+	}
+
+	if (!qspi->xfer_mode.flex_mode)
+		addr = msg->from;
+	else
+		addr = msg->from & 0x00ffffff;
+
+	/* set BSPI RAF buffer max read length */
+	len = msg->len;
+	if (len > BSPI_READ_LENGTH)
+		len = BSPI_READ_LENGTH;
+
+	if (bcm_qspi_bspi_ver_three(qspi) == true)
+		addr = (addr + 0xc00000) & 0xffffff;
+
+	reinit_completion(&qspi->bspi_done);
+	bcm_qspi_enable_bspi(qspi);
+	len_words = (len + 3) >> 2;
+	qspi->bspi_rf_msg = msg;
+	qspi->bspi_rf_msg_status = 0;
+	qspi->bspi_rf_msg_idx = 0;
+	qspi->bspi_rf_msg_len = len;
+	dev_dbg(&qspi->pdev->dev, "bspi xfr addr 0x%x len 0x%x", addr, len);
+
+	bcm_qspi_write(qspi, BSPI, BSPI_RAF_START_ADDR, addr);
+	bcm_qspi_write(qspi, BSPI, BSPI_RAF_NUM_WORDS, len_words);
+	bcm_qspi_write(qspi, BSPI, BSPI_RAF_WATERMARK, 0);
+
+	if (qspi->soc_intc) {
+		/*
+		 * clear soc MSPI and BSPI interrupts and enable
+		 * BSPI interrupts.
+		 */
+		soc_intc->bcm_qspi_int_ack(soc_intc, MSPI_BSPI_DONE);
+		soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE, true);
+	}
+
+	/* Must flush previous writes before starting BSPI operation */
+	mb();
+
+	bcm_qspi_bspi_lr_start(qspi);
+	if (!wait_for_completion_timeout(&qspi->bspi_done, timeo)) {
+		dev_err(&qspi->pdev->dev, "timeout waiting for BSPI\n");
+		ret = -ETIMEDOUT;
+	} else {
+		/* set the return length for the caller */
+		msg->retlen = len;
+	}
+
+	return ret;
+}
+
+static int bcm_qspi_flash_read(struct spi_device *spi,
+			       struct spi_flash_read_message *msg)
+{
+	struct bcm_qspi *qspi = spi_master_get_devdata(spi->master);
+	int ret = 0;
+	bool mspi_read = false;
+	u32 io_width, addrlen, addr, len;
+	u_char *buf;
+
+	buf = msg->buf;
+	addr = msg->from;
+	len = msg->len;
+
+	if (bcm_qspi_bspi_ver_three(qspi) == true) {
+		/*
+		 * The address coming into this function is a raw flash offset.
+		 * But for BSPI <= V3, we need to convert it to a remapped BSPI
+		 * address. If it crosses a 4MB boundary, just revert back to
+		 * using MSPI.
+		 */
+		addr = (addr + 0xc00000) & 0xffffff;
+
+		if ((~ADDR_4MB_MASK & addr) ^
+		    (~ADDR_4MB_MASK & (addr + len - 1)))
+			mspi_read = true;
+	}
+
+	/* non-aligned and very short transfers are handled by MSPI */
+	if (!IS_ALIGNED((uintptr_t)addr, 4) || !IS_ALIGNED((uintptr_t)buf, 4) ||
+	    len < 4)
+		mspi_read = true;
+
+	if (mspi_read)
+		/* this will make the m25p80 read to fallback to mspi read */
+		return -EAGAIN;
+
+	io_width = msg->data_nbits ? msg->data_nbits : SPI_NBITS_SINGLE;
+	addrlen = msg->addr_width;
+	ret = bcm_qspi_bspi_set_mode(qspi, io_width, addrlen, -1);
+
+	if (!ret)
+		ret = bcm_qspi_bspi_flash_read(spi, msg);
+
+	return ret;
+}
+
+static int bcm_qspi_transfer_one(struct spi_master *master,
+				 struct spi_device *spi,
+				 struct spi_transfer *trans)
+{
+	struct bcm_qspi *qspi = spi_master_get_devdata(master);
+	int slots;
+	unsigned long timeo = msecs_to_jiffies(100);
+
+	bcm_qspi_chip_select(qspi, spi->chip_select);
+	qspi->trans_pos.trans = trans;
+	qspi->trans_pos.byte = 0;
+
+	while (qspi->trans_pos.byte < trans->len) {
+		reinit_completion(&qspi->mspi_done);
+
+		slots = write_to_hw(qspi, spi);
+		if (!wait_for_completion_timeout(&qspi->mspi_done, timeo)) {
+			dev_err(&qspi->pdev->dev, "timeout waiting for MSPI\n");
+			return -ETIMEDOUT;
+		}
+
+		read_from_hw(qspi, slots);
+	}
+
+	return 0;
+}
+
+static void bcm_qspi_cleanup(struct spi_device *spi)
+{
+	struct bcm_qspi_parms *xp = spi_get_ctldata(spi);
+
+	kfree(xp);
+}
+
+static irqreturn_t bcm_qspi_mspi_l2_isr(int irq, void *dev_id)
+{
+	struct bcm_qspi_dev_id *qspi_dev_id = dev_id;
+	struct bcm_qspi *qspi = qspi_dev_id->dev;
+	u32 status = bcm_qspi_read(qspi, MSPI, MSPI_MSPI_STATUS);
+
+	if (status & MSPI_MSPI_STATUS_SPIF) {
+		struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
+		/* clear interrupt */
+		status &= ~MSPI_MSPI_STATUS_SPIF;
+		bcm_qspi_write(qspi, MSPI, MSPI_MSPI_STATUS, status);
+		if (qspi->soc_intc)
+			soc_intc->bcm_qspi_int_ack(soc_intc, MSPI_DONE);
+		complete(&qspi->mspi_done);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t bcm_qspi_bspi_lr_l2_isr(int irq, void *dev_id)
+{
+	struct bcm_qspi_dev_id *qspi_dev_id = dev_id;
+	struct bcm_qspi *qspi = qspi_dev_id->dev;
+	struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
+	u32 status = qspi_dev_id->irqp->mask;
+
+	if (qspi->bspi_enabled && qspi->bspi_rf_msg) {
+		bcm_qspi_bspi_lr_data_read(qspi);
+		if (qspi->bspi_rf_msg_len == 0) {
+			qspi->bspi_rf_msg = NULL;
+			if (qspi->soc_intc) {
+				/* disable soc BSPI interrupt */
+				soc_intc->bcm_qspi_int_set(soc_intc, BSPI_DONE,
+							   false);
+				/* indicate done */
+				status = INTR_BSPI_LR_SESSION_DONE_MASK;
+			}
+
+			if (qspi->bspi_rf_msg_status)
+				bcm_qspi_bspi_lr_clear(qspi);
+			else
+				bcm_qspi_bspi_flush_prefetch_buffers(qspi);
+		}
+
+		if (qspi->soc_intc)
+			/* clear soc BSPI interrupt */
+			soc_intc->bcm_qspi_int_ack(soc_intc, BSPI_DONE);
+	}
+
+	status &= INTR_BSPI_LR_SESSION_DONE_MASK;
+	if (qspi->bspi_enabled && status && qspi->bspi_rf_msg_len == 0)
+		complete(&qspi->bspi_done);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t bcm_qspi_bspi_lr_err_l2_isr(int irq, void *dev_id)
+{
+	struct bcm_qspi_dev_id *qspi_dev_id = dev_id;
+	struct bcm_qspi *qspi = qspi_dev_id->dev;
+	struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
+
+	dev_err(&qspi->pdev->dev, "BSPI INT error\n");
+	qspi->bspi_rf_msg_status = -EIO;
+	if (qspi->soc_intc)
+		/* clear soc interrupt */
+		soc_intc->bcm_qspi_int_ack(soc_intc, BSPI_ERR);
+
+	complete(&qspi->bspi_done);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t bcm_qspi_l1_isr(int irq, void *dev_id)
+{
+	struct bcm_qspi_dev_id *qspi_dev_id = dev_id;
+	struct bcm_qspi *qspi = qspi_dev_id->dev;
+	struct bcm_qspi_soc_intc *soc_intc = qspi->soc_intc;
+	irqreturn_t ret = IRQ_NONE;
+
+	if (soc_intc) {
+		u32 status = soc_intc->bcm_qspi_get_int_status(soc_intc);
+
+		if (status & MSPI_DONE)
+			ret = bcm_qspi_mspi_l2_isr(irq, dev_id);
+		else if (status & BSPI_DONE)
+			ret = bcm_qspi_bspi_lr_l2_isr(irq, dev_id);
+		else if (status & BSPI_ERR)
+			ret = bcm_qspi_bspi_lr_err_l2_isr(irq, dev_id);
+	}
+
+	return ret;
+}
+
+static const struct bcm_qspi_irq qspi_irq_tab[] = {
+	{
+		.irq_name = "spi_lr_fullness_reached",
+		.irq_handler = bcm_qspi_bspi_lr_l2_isr,
+		.mask = INTR_BSPI_LR_FULLNESS_REACHED_MASK,
+	},
+	{
+		.irq_name = "spi_lr_session_aborted",
+		.irq_handler = bcm_qspi_bspi_lr_err_l2_isr,
+		.mask = INTR_BSPI_LR_SESSION_ABORTED_MASK,
+	},
+	{
+		.irq_name = "spi_lr_impatient",
+		.irq_handler = bcm_qspi_bspi_lr_err_l2_isr,
+		.mask = INTR_BSPI_LR_IMPATIENT_MASK,
+	},
+	{
+		.irq_name = "spi_lr_session_done",
+		.irq_handler = bcm_qspi_bspi_lr_l2_isr,
+		.mask = INTR_BSPI_LR_SESSION_DONE_MASK,
+	},
+#ifdef QSPI_INT_DEBUG
+	/* this interrupt is for debug purposes only, dont request irq */
+	{
+		.irq_name = "spi_lr_overread",
+		.irq_handler = bcm_qspi_bspi_lr_err_l2_isr,
+		.mask = INTR_BSPI_LR_OVERREAD_MASK,
+	},
+#endif
+	{
+		.irq_name = "mspi_done",
+		.irq_handler = bcm_qspi_mspi_l2_isr,
+		.mask = INTR_MSPI_DONE_MASK,
+	},
+	{
+		.irq_name = "mspi_halted",
+		.irq_handler = bcm_qspi_mspi_l2_isr,
+		.mask = INTR_MSPI_HALTED_MASK,
+	},
+	{
+		/* single muxed L1 interrupt source */
+		.irq_name = "spi_l1_intr",
+		.irq_handler = bcm_qspi_l1_isr,
+		.irq_source = MUXED_L1,
+		.mask = QSPI_INTERRUPTS_ALL,
+	},
+};
+
+static void bcm_qspi_bspi_init(struct bcm_qspi *qspi)
+{
+	u32 val = 0;
+
+	val = bcm_qspi_read(qspi, BSPI, BSPI_REVISION_ID);
+	qspi->bspi_maj_rev = (val >> 8) & 0xff;
+	qspi->bspi_min_rev = val & 0xff;
+	if (!(bcm_qspi_bspi_ver_three(qspi))) {
+		/* Force mapping of BSPI address -> flash offset */
+		bcm_qspi_write(qspi, BSPI, BSPI_BSPI_XOR_VALUE, 0);
+		bcm_qspi_write(qspi, BSPI, BSPI_BSPI_XOR_ENABLE, 1);
+	}
+	qspi->bspi_enabled = 1;
+	bcm_qspi_disable_bspi(qspi);
+	bcm_qspi_write(qspi, BSPI, BSPI_B0_CTRL, 0);
+	bcm_qspi_write(qspi, BSPI, BSPI_B1_CTRL, 0);
+}
+
+static void bcm_qspi_hw_init(struct bcm_qspi *qspi)
+{
+	struct bcm_qspi_parms parms;
+
+	bcm_qspi_write(qspi, MSPI, MSPI_SPCR1_LSB, 0);
+	bcm_qspi_write(qspi, MSPI, MSPI_SPCR1_MSB, 0);
+	bcm_qspi_write(qspi, MSPI, MSPI_NEWQP, 0);
+	bcm_qspi_write(qspi, MSPI, MSPI_ENDQP, 0);
+	bcm_qspi_write(qspi, MSPI, MSPI_SPCR2, 0x20);
+
+	parms.mode = SPI_MODE_3;
+	parms.bits_per_word = 8;
+	parms.speed_hz = qspi->max_speed_hz;
+	bcm_qspi_hw_set_parms(qspi, &parms);
+
+	if (has_bspi(qspi))
+		bcm_qspi_bspi_init(qspi);
+}
+
+static void bcm_qspi_hw_uninit(struct bcm_qspi *qspi)
+{
+	bcm_qspi_write(qspi, MSPI, MSPI_SPCR2, 0);
+	if (has_bspi(qspi))
+		bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 0);
+
+}
+
+static const struct of_device_id bcm_qspi_of_match[] = {
+	{ .compatible = "brcm,spi-bcm-qspi" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcm_qspi_of_match);
+
+int bcm_qspi_probe(struct platform_device *pdev,
+		   struct bcm_qspi_soc_intc *soc_intc)
+{
+	struct device *dev = &pdev->dev;
+	struct bcm_qspi *qspi;
+	struct spi_master *master;
+	struct resource *res;
+	int irq, ret = 0, num_ints = 0;
+	u32 val;
+	const char *name = NULL;
+	int num_irqs = ARRAY_SIZE(qspi_irq_tab);
+
+	/* We only support device-tree instantiation */
+	if (!dev->of_node)
+		return -ENODEV;
+
+	if (!of_match_node(bcm_qspi_of_match, dev->of_node))
+		return -ENODEV;
+
+	master = spi_alloc_master(dev, sizeof(struct bcm_qspi));
+	if (!master) {
+		dev_err(dev, "error allocating spi_master\n");
+		return -ENOMEM;
+	}
+
+	qspi = spi_master_get_devdata(master);
+	qspi->pdev = pdev;
+	qspi->trans_pos.trans = NULL;
+	qspi->trans_pos.byte = 0;
+	qspi->master = master;
+
+	master->bus_num = -1;
+	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_RX_DUAL | SPI_RX_QUAD;
+	master->setup = bcm_qspi_setup;
+	master->transfer_one = bcm_qspi_transfer_one;
+	master->spi_flash_read = bcm_qspi_flash_read;
+	master->cleanup = bcm_qspi_cleanup;
+	master->dev.of_node = dev->of_node;
+	master->num_chipselect = NUM_CHIPSELECT;
+
+	qspi->big_endian = of_device_is_big_endian(dev->of_node);
+
+	if (!of_property_read_u32(dev->of_node, "num-cs", &val))
+		master->num_chipselect = val;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "hif_mspi");
+	if (!res)
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						   "mspi");
+
+	if (res) {
+		qspi->base[MSPI]  = devm_ioremap_resource(dev, res);
+		if (IS_ERR(qspi->base[MSPI])) {
+			ret = PTR_ERR(qspi->base[MSPI]);
+			goto qspi_probe_err;
+		}
+	} else {
+		goto qspi_probe_err;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "bspi");
+	if (res) {
+		qspi->base[BSPI]  = devm_ioremap_resource(dev, res);
+		if (IS_ERR(qspi->base[BSPI])) {
+			ret = PTR_ERR(qspi->base[BSPI]);
+			goto qspi_probe_err;
+		}
+		qspi->bspi_mode = true;
+	} else {
+		qspi->bspi_mode = false;
+	}
+
+	dev_info(dev, "using %smspi mode\n", qspi->bspi_mode ? "bspi-" : "");
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cs_reg");
+	if (res) {
+		qspi->base[CHIP_SELECT]  = devm_ioremap_resource(dev, res);
+		if (IS_ERR(qspi->base[CHIP_SELECT])) {
+			ret = PTR_ERR(qspi->base[CHIP_SELECT]);
+			goto qspi_probe_err;
+		}
+	}
+
+	qspi->dev_ids = kcalloc(num_irqs, sizeof(struct bcm_qspi_dev_id),
+				GFP_KERNEL);
+	if (!qspi->dev_ids) {
+		ret = -ENOMEM;
+		goto qspi_probe_err;
+	}
+
+	for (val = 0; val < num_irqs; val++) {
+		irq = -1;
+		name = qspi_irq_tab[val].irq_name;
+		if (qspi_irq_tab[val].irq_source == SINGLE_L2) {
+			/* get the l2 interrupts */
+			irq = platform_get_irq_byname(pdev, name);
+		} else if (!num_ints && soc_intc) {
+			/* all mspi, bspi intrs muxed to one L1 intr */
+			irq = platform_get_irq(pdev, 0);
+		}
+
+		if (irq  >= 0) {
+			ret = devm_request_irq(&pdev->dev, irq,
+					       qspi_irq_tab[val].irq_handler, 0,
+					       name,
+					       &qspi->dev_ids[val]);
+			if (ret < 0) {
+				dev_err(&pdev->dev, "IRQ %s not found\n", name);
+				goto qspi_probe_err;
+			}
+
+			qspi->dev_ids[val].dev = qspi;
+			qspi->dev_ids[val].irqp = &qspi_irq_tab[val];
+			num_ints++;
+			dev_dbg(&pdev->dev, "registered IRQ %s %d\n",
+				qspi_irq_tab[val].irq_name,
+				irq);
+		}
+	}
+
+	if (!num_ints) {
+		dev_err(&pdev->dev, "no IRQs registered, cannot init driver\n");
+		ret = -EINVAL;
+		goto qspi_probe_err;
+	}
+
+	/*
+	 * Some SoCs integrate spi controller (e.g., its interrupt bits)
+	 * in specific ways
+	 */
+	if (soc_intc) {
+		qspi->soc_intc = soc_intc;
+		soc_intc->bcm_qspi_int_set(soc_intc, MSPI_DONE, true);
+	} else {
+		qspi->soc_intc = NULL;
+	}
+
+	qspi->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(qspi->clk)) {
+		dev_warn(dev, "unable to get clock\n");
+		ret = PTR_ERR(qspi->clk);
+		goto qspi_probe_err;
+	}
+
+	ret = clk_prepare_enable(qspi->clk);
+	if (ret) {
+		dev_err(dev, "failed to prepare clock\n");
+		goto qspi_probe_err;
+	}
+
+	qspi->base_clk = clk_get_rate(qspi->clk);
+	qspi->max_speed_hz = qspi->base_clk / (QSPI_SPBR_MIN * 2);
+
+	bcm_qspi_hw_init(qspi);
+	init_completion(&qspi->mspi_done);
+	init_completion(&qspi->bspi_done);
+	qspi->curr_cs = -1;
+
+	platform_set_drvdata(pdev, qspi);
+
+	qspi->xfer_mode.width = -1;
+	qspi->xfer_mode.addrlen = -1;
+	qspi->xfer_mode.hp = -1;
+
+	ret = devm_spi_register_master(&pdev->dev, master);
+	if (ret < 0) {
+		dev_err(dev, "can't register master\n");
+		goto qspi_reg_err;
+	}
+
+	return 0;
+
+qspi_reg_err:
+	bcm_qspi_hw_uninit(qspi);
+	clk_disable_unprepare(qspi->clk);
+qspi_probe_err:
+	spi_master_put(master);
+	kfree(qspi->dev_ids);
+	return ret;
+}
+/* probe function to be called by SoC specific platform driver probe */
+EXPORT_SYMBOL_GPL(bcm_qspi_probe);
+
+int bcm_qspi_remove(struct platform_device *pdev)
+{
+	struct bcm_qspi *qspi = platform_get_drvdata(pdev);
+
+	platform_set_drvdata(pdev, NULL);
+	bcm_qspi_hw_uninit(qspi);
+	clk_disable_unprepare(qspi->clk);
+	kfree(qspi->dev_ids);
+	spi_unregister_master(qspi->master);
+
+	return 0;
+}
+/* function to be called by SoC specific platform driver remove() */
+EXPORT_SYMBOL_GPL(bcm_qspi_remove);
+
+static int __maybe_unused bcm_qspi_suspend(struct device *dev)
+{
+	struct bcm_qspi *qspi = dev_get_drvdata(dev);
+
+	spi_master_suspend(qspi->master);
+	clk_disable(qspi->clk);
+	bcm_qspi_hw_uninit(qspi);
+
+	return 0;
+};
+
+static int __maybe_unused bcm_qspi_resume(struct device *dev)
+{
+	struct bcm_qspi *qspi = dev_get_drvdata(dev);
+	int ret = 0;
+
+	bcm_qspi_hw_init(qspi);
+	bcm_qspi_chip_select(qspi, qspi->curr_cs);
+	if (qspi->soc_intc)
+		/* enable MSPI interrupt */
+		qspi->soc_intc->bcm_qspi_int_set(qspi->soc_intc, MSPI_DONE,
+						 true);
+
+	ret = clk_enable(qspi->clk);
+	if (!ret)
+		spi_master_resume(qspi->master);
+
+	return ret;
+}
+
+SIMPLE_DEV_PM_OPS(bcm_qspi_pm_ops, bcm_qspi_suspend, bcm_qspi_resume);
+
+/* pm_ops to be called by SoC specific platform driver */
+EXPORT_SYMBOL_GPL(bcm_qspi_pm_ops);
+
+MODULE_AUTHOR("Kamal Dasu");
+MODULE_DESCRIPTION("Broadcom QSPI driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/spi/spi-bcm-qspi.h b/drivers/spi/spi-bcm-qspi.h
new file mode 100644
index 0000000..7abfc75
--- /dev/null
+++ b/drivers/spi/spi-bcm-qspi.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#ifndef __SPI_BCM_QSPI_H__
+#define __SPI_BCM_QSPI_H__
+
+#include <linux/types.h>
+#include <linux/io.h>
+
+/* BSPI interrupt masks */
+#define INTR_BSPI_LR_OVERREAD_MASK		BIT(4)
+#define INTR_BSPI_LR_SESSION_DONE_MASK		BIT(3)
+#define INTR_BSPI_LR_IMPATIENT_MASK		BIT(2)
+#define INTR_BSPI_LR_SESSION_ABORTED_MASK	BIT(1)
+#define INTR_BSPI_LR_FULLNESS_REACHED_MASK	BIT(0)
+
+#define BSPI_LR_INTERRUPTS_DATA		       \
+	(INTR_BSPI_LR_SESSION_DONE_MASK |	       \
+	 INTR_BSPI_LR_FULLNESS_REACHED_MASK)
+
+#define BSPI_LR_INTERRUPTS_ERROR               \
+	(INTR_BSPI_LR_OVERREAD_MASK |	       \
+	 INTR_BSPI_LR_IMPATIENT_MASK |	       \
+	 INTR_BSPI_LR_SESSION_ABORTED_MASK)
+
+#define BSPI_LR_INTERRUPTS_ALL                 \
+	(BSPI_LR_INTERRUPTS_ERROR |	       \
+	 BSPI_LR_INTERRUPTS_DATA)
+
+/* MSPI Interrupt masks */
+#define INTR_MSPI_HALTED_MASK			BIT(6)
+#define INTR_MSPI_DONE_MASK			BIT(5)
+
+#define MSPI_INTERRUPTS_ALL		       \
+	(INTR_MSPI_DONE_MASK |		       \
+	 INTR_MSPI_HALTED_MASK)
+
+#define QSPI_INTERRUPTS_ALL                    \
+	(MSPI_INTERRUPTS_ALL |		       \
+	 BSPI_LR_INTERRUPTS_ALL)
+
+struct platform_device;
+struct dev_pm_ops;
+
+enum {
+	MSPI_DONE = 0x1,
+	BSPI_DONE = 0x2,
+	BSPI_ERR = 0x4,
+	MSPI_BSPI_DONE = 0x7
+};
+
+struct bcm_qspi_soc_intc {
+	void (*bcm_qspi_int_ack)(struct bcm_qspi_soc_intc *soc_intc, int type);
+	void (*bcm_qspi_int_set)(struct bcm_qspi_soc_intc *soc_intc, int type,
+				 bool en);
+	u32 (*bcm_qspi_get_int_status)(struct bcm_qspi_soc_intc *soc_intc);
+};
+
+/* Read controller register*/
+static inline u32 bcm_qspi_readl(bool be, void __iomem *addr)
+{
+	if (be)
+		return ioread32be(addr);
+	else
+		return readl_relaxed(addr);
+}
+
+/* Write controller register*/
+static inline void bcm_qspi_writel(bool be,
+				   unsigned int data, void __iomem *addr)
+{
+	if (be)
+		iowrite32be(data, addr);
+	else
+		writel_relaxed(data, addr);
+}
+
+static inline u32 get_qspi_mask(int type)
+{
+	switch (type) {
+	case MSPI_DONE:
+		return INTR_MSPI_DONE_MASK;
+	case BSPI_DONE:
+		return BSPI_LR_INTERRUPTS_ALL;
+	case MSPI_BSPI_DONE:
+		return QSPI_INTERRUPTS_ALL;
+	case BSPI_ERR:
+		return BSPI_LR_INTERRUPTS_ERROR;
+	}
+
+	return 0;
+}
+
+/* The common driver functions to be called by the SoC platform driver */
+int bcm_qspi_probe(struct platform_device *pdev,
+		   struct bcm_qspi_soc_intc *soc_intc);
+int bcm_qspi_remove(struct platform_device *pdev);
+
+/* pm_ops used by the SoC platform driver called on PM suspend/resume */
+extern const struct dev_pm_ops bcm_qspi_pm_ops;
+
+#endif /* __SPI_BCM_QSPI_H__ */
diff --git a/drivers/spi/spi-brcmstb-qspi.c b/drivers/spi/spi-brcmstb-qspi.c
new file mode 100644
index 0000000..c7df92e
--- /dev/null
+++ b/drivers/spi/spi-brcmstb-qspi.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation (the "GPL").
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 (GPLv2) for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 (GPLv2) along with this source code.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include "spi-bcm-qspi.h"
+
+static const struct of_device_id brcmstb_qspi_of_match[] = {
+	{ .compatible = "brcm,spi-brcmstb-qspi" },
+	{ .compatible = "brcm,spi-brcmstb-mspi" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, brcmstb_qspi_of_match);
+
+static int brcmstb_qspi_probe(struct platform_device *pdev)
+{
+	return bcm_qspi_probe(pdev, NULL);
+}
+
+static int brcmstb_qspi_remove(struct platform_device *pdev)
+{
+	return bcm_qspi_remove(pdev);
+}
+
+static struct platform_driver brcmstb_qspi_driver = {
+	.probe			= brcmstb_qspi_probe,
+	.remove			= brcmstb_qspi_remove,
+	.driver = {
+		.name		= "brcmstb_qspi",
+		.pm		= &bcm_qspi_pm_ops,
+		.of_match_table = brcmstb_qspi_of_match,
+	}
+};
+module_platform_driver(brcmstb_qspi_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Kamal Dasu");
+MODULE_DESCRIPTION("Broadcom SPI driver for settop SoC");
diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c
new file mode 100644
index 0000000..8779377
--- /dev/null
+++ b/drivers/spi/spi-cavium-thunderx.c
@@ -0,0 +1,120 @@
+/*
+ * Cavium ThunderX SPI driver.
+ *
+ * Copyright (C) 2016 Cavium Inc.
+ * Authors: Jan Glauber <jglauber@cavium.com>
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/spi/spi.h>
+
+#include "spi-cavium.h"
+
+#define DRV_NAME "spi-thunderx"
+
+#define SYS_FREQ_DEFAULT 700000000 /* 700 Mhz */
+
+static int thunderx_spi_probe(struct pci_dev *pdev,
+			      const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct spi_master *master;
+	struct octeon_spi *p;
+	int ret;
+
+	master = spi_alloc_master(dev, sizeof(struct octeon_spi));
+	if (!master)
+		return -ENOMEM;
+
+	p = spi_master_get_devdata(master);
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		goto error;
+
+	ret = pci_request_regions(pdev, DRV_NAME);
+	if (ret)
+		goto error;
+
+	p->register_base = pcim_iomap(pdev, 0, pci_resource_len(pdev, 0));
+	if (!p->register_base) {
+		ret = -EINVAL;
+		goto error;
+	}
+
+	p->regs.config = 0x1000;
+	p->regs.status = 0x1008;
+	p->regs.tx = 0x1010;
+	p->regs.data = 0x1080;
+
+	p->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(p->clk)) {
+		ret = PTR_ERR(p->clk);
+		goto error;
+	}
+
+	ret = clk_prepare_enable(p->clk);
+	if (ret)
+		goto error;
+
+	p->sys_freq = clk_get_rate(p->clk);
+	if (!p->sys_freq)
+		p->sys_freq = SYS_FREQ_DEFAULT;
+	dev_info(dev, "Set system clock to %u\n", p->sys_freq);
+
+	master->num_chipselect = 4;
+	master->mode_bits = SPI_CPHA | SPI_CPOL | SPI_CS_HIGH |
+			    SPI_LSB_FIRST | SPI_3WIRE;
+	master->transfer_one_message = octeon_spi_transfer_one_message;
+	master->bits_per_word_mask = SPI_BPW_MASK(8);
+	master->max_speed_hz = OCTEON_SPI_MAX_CLOCK_HZ;
+	master->dev.of_node = pdev->dev.of_node;
+
+	pci_set_drvdata(pdev, master);
+
+	ret = devm_spi_register_master(dev, master);
+	if (ret)
+		goto error;
+
+	return 0;
+
+error:
+	clk_disable_unprepare(p->clk);
+	spi_master_put(master);
+	return ret;
+}
+
+static void thunderx_spi_remove(struct pci_dev *pdev)
+{
+	struct spi_master *master = pci_get_drvdata(pdev);
+	struct octeon_spi *p;
+
+	p = spi_master_get_devdata(master);
+	if (!p)
+		return;
+
+	clk_disable_unprepare(p->clk);
+	/* Put everything in a known state. */
+	writeq(0, p->register_base + OCTEON_SPI_CFG(p));
+}
+
+static const struct pci_device_id thunderx_spi_pci_id_table[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa00b) },
+	{ 0, }
+};
+
+MODULE_DEVICE_TABLE(pci, thunderx_spi_pci_id_table);
+
+static struct pci_driver thunderx_spi_driver = {
+	.name		= DRV_NAME,
+	.id_table	= thunderx_spi_pci_id_table,
+	.probe		= thunderx_spi_probe,
+	.remove		= thunderx_spi_remove,
+};
+
+module_pci_driver(thunderx_spi_driver);
+
+MODULE_DESCRIPTION("Cavium, Inc. ThunderX SPI bus driver");
+MODULE_AUTHOR("Jan Glauber");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-cavium.h b/drivers/spi/spi-cavium.h
index 88c5f36..1f91d61 100644
--- a/drivers/spi/spi-cavium.h
+++ b/drivers/spi/spi-cavium.h
@@ -1,6 +1,8 @@
 #ifndef __SPI_CAVIUM_H
 #define __SPI_CAVIUM_H
 
+#include <linux/clk.h>
+
 #define OCTEON_SPI_MAX_BYTES 9
 #define OCTEON_SPI_MAX_CLOCK_HZ 16000000
 
@@ -17,6 +19,7 @@
 	u64 cs_enax;
 	int sys_freq;
 	struct octeon_spi_regs regs;
+	struct clk *clk;
 };
 
 #define OCTEON_SPI_CFG(x)	(x->regs.config)
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
index c09bb74..27960e4 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw.c
@@ -283,7 +283,6 @@
 	struct chip_data *chip = spi_get_ctldata(spi);
 	u8 imask = 0;
 	u16 txlevel = 0;
-	u16 clk_div;
 	u32 cr0;
 	int ret;
 
@@ -298,13 +297,13 @@
 	spi_enable_chip(dws, 0);
 
 	/* Handle per transfer options for bpw and speed */
-	if (transfer->speed_hz != chip->speed_hz) {
-		/* clk_div doesn't support odd number */
-		clk_div = (dws->max_freq / transfer->speed_hz + 1) & 0xfffe;
-
-		chip->speed_hz = transfer->speed_hz;
-		chip->clk_div = clk_div;
-
+	if (transfer->speed_hz != dws->current_freq) {
+		if (transfer->speed_hz != chip->speed_hz) {
+			/* clk_div doesn't support odd number */
+			chip->clk_div = (DIV_ROUND_UP(dws->max_freq, transfer->speed_hz) + 1) & 0xfffe;
+			chip->speed_hz = transfer->speed_hz;
+		}
+		dws->current_freq = transfer->speed_hz;
 		spi_set_clk(dws, chip->clk_div);
 	}
 	if (transfer->bits_per_word == 8) {
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index 61bc3cb..c21ca02 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -123,6 +123,7 @@
 	u8			n_bytes;	/* current is a 1/2 bytes op */
 	u32			dma_width;
 	irqreturn_t		(*transfer_handler)(struct dw_spi *dws);
+	u32			current_freq;	/* frequency in hz */
 
 	/* DMA info */
 	int			dma_inited;
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 9e9dadb..35c0dd9 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -159,7 +159,7 @@
 	u8			cs;
 	u16			void_write_data;
 	u32			cs_change;
-	struct fsl_dspi_devtype_data *devtype_data;
+	const struct fsl_dspi_devtype_data *devtype_data;
 
 	wait_queue_head_t	waitq;
 	u32			waitflags;
@@ -624,10 +624,13 @@
 {
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct fsl_dspi *dspi = spi_master_get_devdata(master);
+	int ret;
 
 	pinctrl_pm_select_default_state(dev);
 
-	clk_prepare_enable(dspi->clk);
+	ret = clk_prepare_enable(dspi->clk);
+	if (ret)
+		return ret;
 	spi_master_resume(master);
 
 	return 0;
@@ -651,8 +654,6 @@
 	struct resource *res;
 	void __iomem *base;
 	int ret = 0, cs_num, bus_num;
-	const struct of_device_id *of_id =
-			of_match_device(fsl_dspi_dt_ids, &pdev->dev);
 
 	master = spi_alloc_master(&pdev->dev, sizeof(struct fsl_dspi));
 	if (!master)
@@ -686,7 +687,7 @@
 	}
 	master->bus_num = bus_num;
 
-	dspi->devtype_data = (struct fsl_dspi_devtype_data *)of_id->data;
+	dspi->devtype_data = of_device_get_match_data(&pdev->dev);
 	if (!dspi->devtype_data) {
 		dev_err(&pdev->dev, "can't get devtype_data\n");
 		ret = -EFAULT;
@@ -728,7 +729,9 @@
 		dev_err(&pdev->dev, "unable to get clock\n");
 		goto out_master_put;
 	}
-	clk_prepare_enable(dspi->clk);
+	ret = clk_prepare_enable(dspi->clk);
+	if (ret)
+		goto out_master_put;
 
 	master->max_speed_hz =
 		clk_get_rate(dspi->clk) / dspi->devtype_data->max_clock_factor;
@@ -760,7 +763,6 @@
 	/* Disconnect from the SPI framework */
 	clk_disable_unprepare(dspi->clk);
 	spi_unregister_master(dspi->master);
-	spi_master_put(dspi->master);
 
 	return 0;
 }
diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index 8d85a3c..7451585 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -12,7 +12,6 @@
 #include <linux/err.h>
 #include <linux/fsl_devices.h>
 #include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/of.h>
@@ -27,40 +26,29 @@
 #include "spi-fsl-lib.h"
 
 /* eSPI Controller registers */
-struct fsl_espi_reg {
-	__be32 mode;		/* 0x000 - eSPI mode register */
-	__be32 event;		/* 0x004 - eSPI event register */
-	__be32 mask;		/* 0x008 - eSPI mask register */
-	__be32 command;		/* 0x00c - eSPI command register */
-	__be32 transmit;	/* 0x010 - eSPI transmit FIFO access register*/
-	__be32 receive;		/* 0x014 - eSPI receive FIFO access register*/
-	u8 res[8];		/* 0x018 - 0x01c reserved */
-	__be32 csmode[4];	/* 0x020 - 0x02c eSPI cs mode register */
-};
+#define ESPI_SPMODE	0x00	/* eSPI mode register */
+#define ESPI_SPIE	0x04	/* eSPI event register */
+#define ESPI_SPIM	0x08	/* eSPI mask register */
+#define ESPI_SPCOM	0x0c	/* eSPI command register */
+#define ESPI_SPITF	0x10	/* eSPI transmit FIFO access register*/
+#define ESPI_SPIRF	0x14	/* eSPI receive FIFO access register*/
+#define ESPI_SPMODE0	0x20	/* eSPI cs0 mode register */
 
-struct fsl_espi_transfer {
-	const void *tx_buf;
-	void *rx_buf;
-	unsigned len;
-	unsigned n_tx;
-	unsigned n_rx;
-	unsigned actual_length;
-	int status;
-};
+#define ESPI_SPMODEx(x)	(ESPI_SPMODE0 + (x) * 4)
 
 /* eSPI Controller mode register definitions */
-#define SPMODE_ENABLE		(1 << 31)
-#define SPMODE_LOOP		(1 << 30)
+#define SPMODE_ENABLE		BIT(31)
+#define SPMODE_LOOP		BIT(30)
 #define SPMODE_TXTHR(x)		((x) << 8)
 #define SPMODE_RXTHR(x)		((x) << 0)
 
 /* eSPI Controller CS mode register definitions */
-#define CSMODE_CI_INACTIVEHIGH	(1 << 31)
-#define CSMODE_CP_BEGIN_EDGECLK	(1 << 30)
-#define CSMODE_REV		(1 << 29)
-#define CSMODE_DIV16		(1 << 28)
+#define CSMODE_CI_INACTIVEHIGH	BIT(31)
+#define CSMODE_CP_BEGIN_EDGECLK	BIT(30)
+#define CSMODE_REV		BIT(29)
+#define CSMODE_DIV16		BIT(28)
 #define CSMODE_PM(x)		((x) << 24)
-#define CSMODE_POL_1		(1 << 20)
+#define CSMODE_POL_1		BIT(20)
 #define CSMODE_LEN(x)		((x) << 16)
 #define CSMODE_BEF(x)		((x) << 12)
 #define CSMODE_AFT(x)		((x) << 8)
@@ -72,29 +60,114 @@
 		| CSMODE_AFT(0) | CSMODE_CG(1))
 
 /* SPIE register values */
-#define	SPIE_NE		0x00000200	/* Not empty */
-#define	SPIE_NF		0x00000100	/* Not full */
-
-/* SPIM register values */
-#define	SPIM_NE		0x00000200	/* Not empty */
-#define	SPIM_NF		0x00000100	/* Not full */
 #define SPIE_RXCNT(reg)     ((reg >> 24) & 0x3F)
 #define SPIE_TXCNT(reg)     ((reg >> 16) & 0x3F)
+#define	SPIE_TXE		BIT(15)	/* TX FIFO empty */
+#define	SPIE_DON		BIT(14)	/* TX done */
+#define	SPIE_RXT		BIT(13)	/* RX FIFO threshold */
+#define	SPIE_RXF		BIT(12)	/* RX FIFO full */
+#define	SPIE_TXT		BIT(11)	/* TX FIFO threshold*/
+#define	SPIE_RNE		BIT(9)	/* RX FIFO not empty */
+#define	SPIE_TNF		BIT(8)	/* TX FIFO not full */
+
+/* SPIM register values */
+#define	SPIM_TXE		BIT(15)	/* TX FIFO empty */
+#define	SPIM_DON		BIT(14)	/* TX done */
+#define	SPIM_RXT		BIT(13)	/* RX FIFO threshold */
+#define	SPIM_RXF		BIT(12)	/* RX FIFO full */
+#define	SPIM_TXT		BIT(11)	/* TX FIFO threshold*/
+#define	SPIM_RNE		BIT(9)	/* RX FIFO not empty */
+#define	SPIM_TNF		BIT(8)	/* TX FIFO not full */
 
 /* SPCOM register values */
 #define SPCOM_CS(x)		((x) << 30)
+#define SPCOM_DO		BIT(28) /* Dual output */
+#define SPCOM_TO		BIT(27) /* TX only */
+#define SPCOM_RXSKIP(x)		((x) << 16)
 #define SPCOM_TRANLEN(x)	((x) << 0)
+
 #define	SPCOM_TRANLEN_MAX	0x10000	/* Max transaction length */
 
 #define AUTOSUSPEND_TIMEOUT 2000
 
+static inline u32 fsl_espi_read_reg(struct mpc8xxx_spi *mspi, int offset)
+{
+	return ioread32be(mspi->reg_base + offset);
+}
+
+static inline u8 fsl_espi_read_reg8(struct mpc8xxx_spi *mspi, int offset)
+{
+	return ioread8(mspi->reg_base + offset);
+}
+
+static inline void fsl_espi_write_reg(struct mpc8xxx_spi *mspi, int offset,
+				      u32 val)
+{
+	iowrite32be(val, mspi->reg_base + offset);
+}
+
+static inline void fsl_espi_write_reg8(struct mpc8xxx_spi *mspi, int offset,
+				       u8 val)
+{
+	iowrite8(val, mspi->reg_base + offset);
+}
+
+static void fsl_espi_copy_to_buf(struct spi_message *m,
+				 struct mpc8xxx_spi *mspi)
+{
+	struct spi_transfer *t;
+	u8 *buf = mspi->local_buf;
+
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		if (t->tx_buf)
+			memcpy(buf, t->tx_buf, t->len);
+		else
+			memset(buf, 0, t->len);
+		buf += t->len;
+	}
+}
+
+static void fsl_espi_copy_from_buf(struct spi_message *m,
+				   struct mpc8xxx_spi *mspi)
+{
+	struct spi_transfer *t;
+	u8 *buf = mspi->local_buf;
+
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		if (t->rx_buf)
+			memcpy(t->rx_buf, buf, t->len);
+		buf += t->len;
+	}
+}
+
+static int fsl_espi_check_message(struct spi_message *m)
+{
+	struct mpc8xxx_spi *mspi = spi_master_get_devdata(m->spi->master);
+	struct spi_transfer *t, *first;
+
+	if (m->frame_length > SPCOM_TRANLEN_MAX) {
+		dev_err(mspi->dev, "message too long, size is %u bytes\n",
+			m->frame_length);
+		return -EMSGSIZE;
+	}
+
+	first = list_first_entry(&m->transfers, struct spi_transfer,
+				 transfer_list);
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		if (first->bits_per_word != t->bits_per_word ||
+		    first->speed_hz != t->speed_hz) {
+			dev_err(mspi->dev, "bits_per_word/speed_hz should be the same for all transfers\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static void fsl_espi_change_mode(struct spi_device *spi)
 {
 	struct mpc8xxx_spi *mspi = spi_master_get_devdata(spi->master);
 	struct spi_mpc8xxx_cs *cs = spi->controller_state;
-	struct fsl_espi_reg *reg_base = mspi->reg_base;
-	__be32 __iomem *mode = &reg_base->csmode[spi->chip_select];
-	__be32 __iomem *espi_mode = &reg_base->mode;
 	u32 tmp;
 	unsigned long flags;
 
@@ -102,10 +175,11 @@
 	local_irq_save(flags);
 
 	/* Turn off SPI unit prior changing mode */
-	tmp = mpc8xxx_spi_read_reg(espi_mode);
-	mpc8xxx_spi_write_reg(espi_mode, tmp & ~SPMODE_ENABLE);
-	mpc8xxx_spi_write_reg(mode, cs->hw_mode);
-	mpc8xxx_spi_write_reg(espi_mode, tmp);
+	tmp = fsl_espi_read_reg(mspi, ESPI_SPMODE);
+	fsl_espi_write_reg(mspi, ESPI_SPMODE, tmp & ~SPMODE_ENABLE);
+	fsl_espi_write_reg(mspi, ESPI_SPMODEx(spi->chip_select),
+			      cs->hw_mode);
+	fsl_espi_write_reg(mspi, ESPI_SPMODE, tmp);
 
 	local_irq_restore(flags);
 }
@@ -131,27 +205,15 @@
 	return data;
 }
 
-static int fsl_espi_setup_transfer(struct spi_device *spi,
+static void fsl_espi_setup_transfer(struct spi_device *spi,
 					struct spi_transfer *t)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
-	int bits_per_word = 0;
+	int bits_per_word = t ? t->bits_per_word : spi->bits_per_word;
+	u32 hz = t ? t->speed_hz : spi->max_speed_hz;
 	u8 pm;
-	u32 hz = 0;
 	struct spi_mpc8xxx_cs *cs = spi->controller_state;
 
-	if (t) {
-		bits_per_word = t->bits_per_word;
-		hz = t->speed_hz;
-	}
-
-	/* spi_transfer level calls that work per-word */
-	if (!bits_per_word)
-		bits_per_word = spi->bits_per_word;
-
-	if (!hz)
-		hz = spi->max_speed_hz;
-
 	cs->rx_shift = 0;
 	cs->tx_shift = 0;
 	cs->get_rx = mpc8xxx_spi_rx_buf_u32;
@@ -169,12 +231,10 @@
 	mpc8xxx_spi->get_rx = cs->get_rx;
 	mpc8xxx_spi->get_tx = cs->get_tx;
 
-	bits_per_word = bits_per_word - 1;
-
 	/* mask out bits we are going to set */
 	cs->hw_mode &= ~(CSMODE_LEN(0xF) | CSMODE_DIV16 | CSMODE_PM(0xF));
 
-	cs->hw_mode |= CSMODE_LEN(bits_per_word);
+	cs->hw_mode |= CSMODE_LEN(bits_per_word - 1);
 
 	if ((mpc8xxx_spi->spibrg / hz) > 64) {
 		cs->hw_mode |= CSMODE_DIV16;
@@ -196,36 +256,16 @@
 	cs->hw_mode |= CSMODE_PM(pm);
 
 	fsl_espi_change_mode(spi);
-	return 0;
-}
-
-static int fsl_espi_cpu_bufs(struct mpc8xxx_spi *mspi, struct spi_transfer *t,
-		unsigned int len)
-{
-	u32 word;
-	struct fsl_espi_reg *reg_base = mspi->reg_base;
-
-	mspi->count = len;
-
-	/* enable rx ints */
-	mpc8xxx_spi_write_reg(&reg_base->mask, SPIM_NE);
-
-	/* transmit word */
-	word = mspi->get_tx(mspi);
-	mpc8xxx_spi_write_reg(&reg_base->transmit, word);
-
-	return 0;
 }
 
 static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
-	struct fsl_espi_reg *reg_base = mpc8xxx_spi->reg_base;
-	unsigned int len = t->len;
+	u32 word;
 	int ret;
 
 	mpc8xxx_spi->len = t->len;
-	len = roundup(len, 4) / 4;
+	mpc8xxx_spi->count = roundup(t->len, 4) / 4;
 
 	mpc8xxx_spi->tx = t->tx_buf;
 	mpc8xxx_spi->rx = t->rx_buf;
@@ -233,17 +273,15 @@
 	reinit_completion(&mpc8xxx_spi->done);
 
 	/* Set SPCOM[CS] and SPCOM[TRANLEN] field */
-	if (t->len > SPCOM_TRANLEN_MAX) {
-		dev_err(mpc8xxx_spi->dev, "Transaction length (%d)"
-				" beyond the SPCOM[TRANLEN] field\n", t->len);
-		return -EINVAL;
-	}
-	mpc8xxx_spi_write_reg(&reg_base->command,
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPCOM,
 		(SPCOM_CS(spi->chip_select) | SPCOM_TRANLEN(t->len - 1)));
 
-	ret = fsl_espi_cpu_bufs(mpc8xxx_spi, t, len);
-	if (ret)
-		return ret;
+	/* enable rx ints */
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIM, SPIM_RNE);
+
+	/* transmit word */
+	word = mpc8xxx_spi->get_tx(mpc8xxx_spi);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPITF, word);
 
 	/* Won't hang up forever, SPI bus sometimes got lost interrupts... */
 	ret = wait_for_completion_timeout(&mpc8xxx_spi->done, 2 * HZ);
@@ -253,230 +291,76 @@
 			mpc8xxx_spi->count);
 
 	/* disable rx ints */
-	mpc8xxx_spi_write_reg(&reg_base->mask, 0);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIM, 0);
 
-	return mpc8xxx_spi->count;
+	return mpc8xxx_spi->count > 0 ? -EMSGSIZE : 0;
 }
 
-static inline void fsl_espi_addr2cmd(unsigned int addr, u8 *cmd)
+static int fsl_espi_trans(struct spi_message *m, struct spi_transfer *trans)
 {
-	if (cmd) {
-		cmd[1] = (u8)(addr >> 16);
-		cmd[2] = (u8)(addr >> 8);
-		cmd[3] = (u8)(addr >> 0);
-	}
-}
-
-static inline unsigned int fsl_espi_cmd2addr(u8 *cmd)
-{
-	if (cmd)
-		return cmd[1] << 16 | cmd[2] << 8 | cmd[3] << 0;
-
-	return 0;
-}
-
-static void fsl_espi_do_trans(struct spi_message *m,
-				struct fsl_espi_transfer *tr)
-{
+	struct mpc8xxx_spi *mspi = spi_master_get_devdata(m->spi->master);
 	struct spi_device *spi = m->spi;
-	struct mpc8xxx_spi *mspi = spi_master_get_devdata(spi->master);
-	struct fsl_espi_transfer *espi_trans = tr;
-	struct spi_message message;
-	struct spi_transfer *t, *first, trans;
-	int status = 0;
+	int ret;
 
-	spi_message_init(&message);
-	memset(&trans, 0, sizeof(trans));
+	fsl_espi_copy_to_buf(m, mspi);
+	fsl_espi_setup_transfer(spi, trans);
 
-	first = list_first_entry(&m->transfers, struct spi_transfer,
-			transfer_list);
-	list_for_each_entry(t, &m->transfers, transfer_list) {
-		if ((first->bits_per_word != t->bits_per_word) ||
-			(first->speed_hz != t->speed_hz)) {
-			espi_trans->status = -EINVAL;
-			dev_err(mspi->dev,
-				"bits_per_word/speed_hz should be same for the same SPI transfer\n");
-			return;
-		}
+	ret = fsl_espi_bufs(spi, trans);
 
-		trans.speed_hz = t->speed_hz;
-		trans.bits_per_word = t->bits_per_word;
-		trans.delay_usecs = max(first->delay_usecs, t->delay_usecs);
-	}
+	if (trans->delay_usecs)
+		udelay(trans->delay_usecs);
 
-	trans.len = espi_trans->len;
-	trans.tx_buf = espi_trans->tx_buf;
-	trans.rx_buf = espi_trans->rx_buf;
-	spi_message_add_tail(&trans, &message);
-
-	list_for_each_entry(t, &message.transfers, transfer_list) {
-		if (t->bits_per_word || t->speed_hz) {
-			status = -EINVAL;
-
-			status = fsl_espi_setup_transfer(spi, t);
-			if (status < 0)
-				break;
-		}
-
-		if (t->len)
-			status = fsl_espi_bufs(spi, t);
-
-		if (status) {
-			status = -EMSGSIZE;
-			break;
-		}
-
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
-	}
-
-	espi_trans->status = status;
 	fsl_espi_setup_transfer(spi, NULL);
-}
 
-static void fsl_espi_cmd_trans(struct spi_message *m,
-				struct fsl_espi_transfer *trans, u8 *rx_buff)
-{
-	struct spi_transfer *t;
-	u8 *local_buf;
-	int i = 0;
-	struct fsl_espi_transfer *espi_trans = trans;
+	if (!ret)
+		fsl_espi_copy_from_buf(m, mspi);
 
-	local_buf = kzalloc(SPCOM_TRANLEN_MAX, GFP_KERNEL);
-	if (!local_buf) {
-		espi_trans->status = -ENOMEM;
-		return;
-	}
-
-	list_for_each_entry(t, &m->transfers, transfer_list) {
-		if (t->tx_buf) {
-			memcpy(local_buf + i, t->tx_buf, t->len);
-			i += t->len;
-		}
-	}
-
-	espi_trans->tx_buf = local_buf;
-	espi_trans->rx_buf = local_buf;
-	fsl_espi_do_trans(m, espi_trans);
-
-	espi_trans->actual_length = espi_trans->len;
-	kfree(local_buf);
-}
-
-static void fsl_espi_rw_trans(struct spi_message *m,
-				struct fsl_espi_transfer *trans, u8 *rx_buff)
-{
-	struct fsl_espi_transfer *espi_trans = trans;
-	unsigned int total_len = espi_trans->len;
-	struct spi_transfer *t;
-	u8 *local_buf;
-	u8 *rx_buf = rx_buff;
-	unsigned int trans_len;
-	unsigned int addr;
-	unsigned int tx_only;
-	unsigned int rx_pos = 0;
-	unsigned int pos;
-	int i, loop;
-
-	local_buf = kzalloc(SPCOM_TRANLEN_MAX, GFP_KERNEL);
-	if (!local_buf) {
-		espi_trans->status = -ENOMEM;
-		return;
-	}
-
-	for (pos = 0, loop = 0; pos < total_len; pos += trans_len, loop++) {
-		trans_len = total_len - pos;
-
-		i = 0;
-		tx_only = 0;
-		list_for_each_entry(t, &m->transfers, transfer_list) {
-			if (t->tx_buf) {
-				memcpy(local_buf + i, t->tx_buf, t->len);
-				i += t->len;
-				if (!t->rx_buf)
-					tx_only += t->len;
-			}
-		}
-
-		/* Add additional TX bytes to compensate SPCOM_TRANLEN_MAX */
-		if (loop > 0)
-			trans_len += tx_only;
-
-		if (trans_len > SPCOM_TRANLEN_MAX)
-			trans_len = SPCOM_TRANLEN_MAX;
-
-		/* Update device offset */
-		if (pos > 0) {
-			addr = fsl_espi_cmd2addr(local_buf);
-			addr += rx_pos;
-			fsl_espi_addr2cmd(addr, local_buf);
-		}
-
-		espi_trans->len = trans_len;
-		espi_trans->tx_buf = local_buf;
-		espi_trans->rx_buf = local_buf;
-		fsl_espi_do_trans(m, espi_trans);
-
-		/* If there is at least one RX byte then copy it to rx_buf */
-		if (tx_only < SPCOM_TRANLEN_MAX)
-			memcpy(rx_buf + rx_pos, espi_trans->rx_buf + tx_only,
-					trans_len - tx_only);
-
-		rx_pos += trans_len - tx_only;
-
-		if (loop > 0)
-			espi_trans->actual_length += espi_trans->len - tx_only;
-		else
-			espi_trans->actual_length += espi_trans->len;
-	}
-
-	kfree(local_buf);
+	return ret;
 }
 
 static int fsl_espi_do_one_msg(struct spi_master *master,
 			       struct spi_message *m)
 {
-	struct spi_transfer *t;
-	u8 *rx_buf = NULL;
-	unsigned int n_tx = 0;
-	unsigned int n_rx = 0;
-	unsigned int xfer_len = 0;
-	struct fsl_espi_transfer espi_trans;
+	struct mpc8xxx_spi *mspi = spi_master_get_devdata(m->spi->master);
+	unsigned int delay_usecs = 0;
+	struct spi_transfer *t, trans = {};
+	int ret;
+
+	ret = fsl_espi_check_message(m);
+	if (ret)
+		goto out;
 
 	list_for_each_entry(t, &m->transfers, transfer_list) {
-		if (t->tx_buf)
-			n_tx += t->len;
-		if (t->rx_buf) {
-			n_rx += t->len;
-			rx_buf = t->rx_buf;
-		}
-		if ((t->tx_buf) || (t->rx_buf))
-			xfer_len += t->len;
+		if (t->delay_usecs > delay_usecs)
+			delay_usecs = t->delay_usecs;
 	}
 
-	espi_trans.n_tx = n_tx;
-	espi_trans.n_rx = n_rx;
-	espi_trans.len = xfer_len;
-	espi_trans.actual_length = 0;
-	espi_trans.status = 0;
+	t = list_first_entry(&m->transfers, struct spi_transfer,
+			     transfer_list);
 
-	if (!rx_buf)
-		fsl_espi_cmd_trans(m, &espi_trans, NULL);
-	else
-		fsl_espi_rw_trans(m, &espi_trans, rx_buf);
+	trans.len = m->frame_length;
+	trans.speed_hz = t->speed_hz;
+	trans.bits_per_word = t->bits_per_word;
+	trans.delay_usecs = delay_usecs;
+	trans.tx_buf = mspi->local_buf;
+	trans.rx_buf = mspi->local_buf;
 
-	m->actual_length = espi_trans.actual_length;
-	m->status = espi_trans.status;
+	if (trans.len)
+		ret = fsl_espi_trans(m, &trans);
+
+	m->actual_length = ret ? 0 : trans.len;
+out:
+	if (m->status == -EINPROGRESS)
+		m->status = ret;
+
 	spi_finalize_current_message(master);
-	return 0;
+
+	return ret;
 }
 
 static int fsl_espi_setup(struct spi_device *spi)
 {
 	struct mpc8xxx_spi *mpc8xxx_spi;
-	struct fsl_espi_reg *reg_base;
-	int retval;
-	u32 hw_mode;
 	u32 loop_mode;
 	struct spi_mpc8xxx_cs *cs = spi_get_ctldata(spi);
 
@@ -491,13 +375,11 @@
 	}
 
 	mpc8xxx_spi = spi_master_get_devdata(spi->master);
-	reg_base = mpc8xxx_spi->reg_base;
 
 	pm_runtime_get_sync(mpc8xxx_spi->dev);
 
-	hw_mode = cs->hw_mode; /* Save original settings */
-	cs->hw_mode = mpc8xxx_spi_read_reg(
-			&reg_base->csmode[spi->chip_select]);
+	cs->hw_mode = fsl_espi_read_reg(mpc8xxx_spi,
+					   ESPI_SPMODEx(spi->chip_select));
 	/* mask out bits we are going to set */
 	cs->hw_mode &= ~(CSMODE_CP_BEGIN_EDGECLK | CSMODE_CI_INACTIVEHIGH
 			 | CSMODE_REV);
@@ -510,21 +392,17 @@
 		cs->hw_mode |= CSMODE_REV;
 
 	/* Handle the loop mode */
-	loop_mode = mpc8xxx_spi_read_reg(&reg_base->mode);
+	loop_mode = fsl_espi_read_reg(mpc8xxx_spi, ESPI_SPMODE);
 	loop_mode &= ~SPMODE_LOOP;
 	if (spi->mode & SPI_LOOP)
 		loop_mode |= SPMODE_LOOP;
-	mpc8xxx_spi_write_reg(&reg_base->mode, loop_mode);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, loop_mode);
 
-	retval = fsl_espi_setup_transfer(spi, NULL);
+	fsl_espi_setup_transfer(spi, NULL);
 
 	pm_runtime_mark_last_busy(mpc8xxx_spi->dev);
 	pm_runtime_put_autosuspend(mpc8xxx_spi->dev);
 
-	if (retval < 0) {
-		cs->hw_mode = hw_mode; /* Restore settings */
-		return retval;
-	}
 	return 0;
 }
 
@@ -536,12 +414,10 @@
 	spi_set_ctldata(spi, NULL);
 }
 
-void fsl_espi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
+static void fsl_espi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
 {
-	struct fsl_espi_reg *reg_base = mspi->reg_base;
-
 	/* We need handle RX first */
-	if (events & SPIE_NE) {
+	if (events & SPIE_RNE) {
 		u32 rx_data, tmp;
 		u8 rx_data_8;
 		int rx_nr_bytes = 4;
@@ -551,7 +427,7 @@
 		if (SPIE_RXCNT(events) < min(4, mspi->len)) {
 			ret = spin_event_timeout(
 				!(SPIE_RXCNT(events =
-				mpc8xxx_spi_read_reg(&reg_base->event)) <
+				fsl_espi_read_reg(mspi, ESPI_SPIE)) <
 						min(4, mspi->len)),
 						10000, 0); /* 10 msec */
 			if (!ret)
@@ -560,10 +436,10 @@
 		}
 
 		if (mspi->len >= 4) {
-			rx_data = mpc8xxx_spi_read_reg(&reg_base->receive);
+			rx_data = fsl_espi_read_reg(mspi, ESPI_SPIRF);
 		} else if (mspi->len <= 0) {
 			dev_err(mspi->dev,
-				"unexpected RX(SPIE_NE) interrupt occurred,\n"
+				"unexpected RX(SPIE_RNE) interrupt occurred,\n"
 				"(local rxlen %d bytes, reg rxlen %d bytes)\n",
 				min(4, mspi->len), SPIE_RXCNT(events));
 			rx_nr_bytes = 0;
@@ -572,7 +448,8 @@
 			tmp = mspi->len;
 			rx_data = 0;
 			while (tmp--) {
-				rx_data_8 = in_8((u8 *)&reg_base->receive);
+				rx_data_8 = fsl_espi_read_reg8(mspi,
+							       ESPI_SPIRF);
 				rx_data |= (rx_data_8 << (tmp * 8));
 			}
 
@@ -585,30 +462,24 @@
 			mspi->get_rx(rx_data, mspi);
 	}
 
-	if (!(events & SPIE_NF)) {
+	if (!(events & SPIE_TNF)) {
 		int ret;
 
 		/* spin until TX is done */
-		ret = spin_event_timeout(((events = mpc8xxx_spi_read_reg(
-				&reg_base->event)) & SPIE_NF), 1000, 0);
+		ret = spin_event_timeout(((events = fsl_espi_read_reg(
+				mspi, ESPI_SPIE)) & SPIE_TNF), 1000, 0);
 		if (!ret) {
-			dev_err(mspi->dev, "tired waiting for SPIE_NF\n");
-
-			/* Clear the SPIE bits */
-			mpc8xxx_spi_write_reg(&reg_base->event, events);
+			dev_err(mspi->dev, "tired waiting for SPIE_TNF\n");
 			complete(&mspi->done);
 			return;
 		}
 	}
 
-	/* Clear the events */
-	mpc8xxx_spi_write_reg(&reg_base->event, events);
-
 	mspi->count -= 1;
 	if (mspi->count) {
 		u32 word = mspi->get_tx(mspi);
 
-		mpc8xxx_spi_write_reg(&reg_base->transmit, word);
+		fsl_espi_write_reg(mspi, ESPI_SPITF, word);
 	} else {
 		complete(&mspi->done);
 	}
@@ -617,20 +488,21 @@
 static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
 {
 	struct mpc8xxx_spi *mspi = context_data;
-	struct fsl_espi_reg *reg_base = mspi->reg_base;
-	irqreturn_t ret = IRQ_NONE;
 	u32 events;
 
 	/* Get interrupt events(tx/rx) */
-	events = mpc8xxx_spi_read_reg(&reg_base->event);
-	if (events)
-		ret = IRQ_HANDLED;
+	events = fsl_espi_read_reg(mspi, ESPI_SPIE);
+	if (!events)
+		return IRQ_NONE;
 
 	dev_vdbg(mspi->dev, "%s: events %x\n", __func__, events);
 
 	fsl_espi_cpu_irq(mspi, events);
 
-	return ret;
+	/* Clear the events */
+	fsl_espi_write_reg(mspi, ESPI_SPIE, events);
+
+	return IRQ_HANDLED;
 }
 
 #ifdef CONFIG_PM
@@ -638,12 +510,11 @@
 {
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
-	struct fsl_espi_reg *reg_base = mpc8xxx_spi->reg_base;
 	u32 regval;
 
-	regval = mpc8xxx_spi_read_reg(&reg_base->mode);
+	regval = fsl_espi_read_reg(mpc8xxx_spi, ESPI_SPMODE);
 	regval &= ~SPMODE_ENABLE;
-	mpc8xxx_spi_write_reg(&reg_base->mode, regval);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, regval);
 
 	return 0;
 }
@@ -652,39 +523,35 @@
 {
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
-	struct fsl_espi_reg *reg_base = mpc8xxx_spi->reg_base;
 	u32 regval;
 
-	regval = mpc8xxx_spi_read_reg(&reg_base->mode);
+	regval = fsl_espi_read_reg(mpc8xxx_spi, ESPI_SPMODE);
 	regval |= SPMODE_ENABLE;
-	mpc8xxx_spi_write_reg(&reg_base->mode, regval);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, regval);
 
 	return 0;
 }
 #endif
 
-static size_t fsl_espi_max_transfer_size(struct spi_device *spi)
+static size_t fsl_espi_max_message_size(struct spi_device *spi)
 {
 	return SPCOM_TRANLEN_MAX;
 }
 
-static struct spi_master * fsl_espi_probe(struct device *dev,
-		struct resource *mem, unsigned int irq)
+static int fsl_espi_probe(struct device *dev, struct resource *mem,
+			  unsigned int irq)
 {
 	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
 	struct spi_master *master;
 	struct mpc8xxx_spi *mpc8xxx_spi;
-	struct fsl_espi_reg *reg_base;
 	struct device_node *nc;
 	const __be32 *prop;
 	u32 regval, csmode;
-	int i, len, ret = 0;
+	int i, len, ret;
 
 	master = spi_alloc_master(dev, sizeof(struct mpc8xxx_spi));
-	if (!master) {
-		ret = -ENOMEM;
-		goto err;
-	}
+	if (!master)
+		return -ENOMEM;
 
 	dev_set_drvdata(dev, master);
 
@@ -695,18 +562,23 @@
 	master->cleanup = fsl_espi_cleanup;
 	master->transfer_one_message = fsl_espi_do_one_msg;
 	master->auto_runtime_pm = true;
-	master->max_transfer_size = fsl_espi_max_transfer_size;
+	master->max_message_size = fsl_espi_max_message_size;
 
 	mpc8xxx_spi = spi_master_get_devdata(master);
 
+	mpc8xxx_spi->local_buf =
+		devm_kmalloc(dev, SPCOM_TRANLEN_MAX, GFP_KERNEL);
+	if (!mpc8xxx_spi->local_buf) {
+		ret = -ENOMEM;
+		goto err_probe;
+	}
+
 	mpc8xxx_spi->reg_base = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(mpc8xxx_spi->reg_base)) {
 		ret = PTR_ERR(mpc8xxx_spi->reg_base);
 		goto err_probe;
 	}
 
-	reg_base = mpc8xxx_spi->reg_base;
-
 	/* Register for SPI Interrupt */
 	ret = devm_request_irq(dev, mpc8xxx_spi->irq, fsl_espi_irq,
 			  0, "fsl_espi", mpc8xxx_spi);
@@ -719,10 +591,10 @@
 	}
 
 	/* SPI controller initializations */
-	mpc8xxx_spi_write_reg(&reg_base->mode, 0);
-	mpc8xxx_spi_write_reg(&reg_base->mask, 0);
-	mpc8xxx_spi_write_reg(&reg_base->command, 0);
-	mpc8xxx_spi_write_reg(&reg_base->event, 0xffffffff);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, 0);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIM, 0);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPCOM, 0);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIE, 0xffffffff);
 
 	/* Init eSPI CS mode register */
 	for_each_available_child_of_node(master->dev.of_node, nc) {
@@ -747,7 +619,7 @@
 			csmode &= ~(CSMODE_AFT(0xf));
 			csmode |= CSMODE_AFT(be32_to_cpup(prop));
 		}
-		mpc8xxx_spi_write_reg(&reg_base->csmode[i], csmode);
+		fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODEx(i), csmode);
 
 		dev_info(dev, "cs=%d, init_csmode=0x%x\n", i, csmode);
 	}
@@ -755,7 +627,7 @@
 	/* Enable SPI interface */
 	regval = pdata->initial_spmode | SPMODE_INIT_VAL | SPMODE_ENABLE;
 
-	mpc8xxx_spi_write_reg(&reg_base->mode, regval);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, regval);
 
 	pm_runtime_set_autosuspend_delay(dev, AUTOSUSPEND_TIMEOUT);
 	pm_runtime_use_autosuspend(dev);
@@ -767,12 +639,13 @@
 	if (ret < 0)
 		goto err_pm;
 
-	dev_info(dev, "at 0x%p (irq = %d)\n", reg_base, mpc8xxx_spi->irq);
+	dev_info(dev, "at 0x%p (irq = %d)\n", mpc8xxx_spi->reg_base,
+		 mpc8xxx_spi->irq);
 
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
 
-	return master;
+	return 0;
 
 err_pm:
 	pm_runtime_put_noidle(dev);
@@ -780,8 +653,7 @@
 	pm_runtime_set_suspended(dev);
 err_probe:
 	spi_master_put(master);
-err:
-	return ERR_PTR(ret);
+	return ret;
 }
 
 static int of_fsl_espi_get_chipselects(struct device *dev)
@@ -807,10 +679,9 @@
 {
 	struct device *dev = &ofdev->dev;
 	struct device_node *np = ofdev->dev.of_node;
-	struct spi_master *master;
 	struct resource mem;
 	unsigned int irq;
-	int ret = -ENOMEM;
+	int ret;
 
 	ret = of_mpc8xxx_spi_probe(ofdev);
 	if (ret)
@@ -818,28 +689,17 @@
 
 	ret = of_fsl_espi_get_chipselects(dev);
 	if (ret)
-		goto err;
+		return ret;
 
 	ret = of_address_to_resource(np, 0, &mem);
 	if (ret)
-		goto err;
+		return ret;
 
 	irq = irq_of_parse_and_map(np, 0);
-	if (!irq) {
-		ret = -EINVAL;
-		goto err;
-	}
+	if (!irq)
+		return -EINVAL;
 
-	master = fsl_espi_probe(dev, &mem, irq);
-	if (IS_ERR(master)) {
-		ret = PTR_ERR(master);
-		goto err;
-	}
-
-	return 0;
-
-err:
-	return ret;
+	return fsl_espi_probe(dev, &mem, irq);
 }
 
 static int of_fsl_espi_remove(struct platform_device *dev)
@@ -873,27 +733,26 @@
 	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
 	struct spi_master *master = dev_get_drvdata(dev);
 	struct mpc8xxx_spi *mpc8xxx_spi;
-	struct fsl_espi_reg *reg_base;
 	u32 regval;
 	int i, ret;
 
 	mpc8xxx_spi = spi_master_get_devdata(master);
-	reg_base = mpc8xxx_spi->reg_base;
 
 	/* SPI controller initializations */
-	mpc8xxx_spi_write_reg(&reg_base->mode, 0);
-	mpc8xxx_spi_write_reg(&reg_base->mask, 0);
-	mpc8xxx_spi_write_reg(&reg_base->command, 0);
-	mpc8xxx_spi_write_reg(&reg_base->event, 0xffffffff);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, 0);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIM, 0);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPCOM, 0);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIE, 0xffffffff);
 
 	/* Init eSPI CS mode register */
 	for (i = 0; i < pdata->max_chipselect; i++)
-		mpc8xxx_spi_write_reg(&reg_base->csmode[i], CSMODE_INIT_VAL);
+		fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODEx(i),
+				      CSMODE_INIT_VAL);
 
 	/* Enable SPI interface */
 	regval = pdata->initial_spmode | SPMODE_INIT_VAL | SPMODE_ENABLE;
 
-	mpc8xxx_spi_write_reg(&reg_base->mode, regval);
+	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, regval);
 
 	ret = pm_runtime_force_resume(dev);
 	if (ret < 0)
diff --git a/drivers/spi/spi-fsl-lib.h b/drivers/spi/spi-fsl-lib.h
index 84f5dcb..2925c80 100644
--- a/drivers/spi/spi-fsl-lib.h
+++ b/drivers/spi/spi-fsl-lib.h
@@ -23,13 +23,14 @@
 /* SPI/eSPI Controller driver's private data. */
 struct mpc8xxx_spi {
 	struct device *dev;
-	void *reg_base;
+	void __iomem *reg_base;
 
 	/* rx & tx bufs from the spi_transfer */
 	const void *tx;
 	void *rx;
 #if IS_ENABLED(CONFIG_SPI_FSL_ESPI)
 	int len;
+	u8 *local_buf;
 #endif
 
 	int subblock;
diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index 823cbc9..7a37090 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c
@@ -720,8 +720,6 @@
 		clk_disable_unprepare(spfi->sys_clk);
 	}
 
-	spi_master_put(master);
-
 	return 0;
 }
 
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index f63cb30..deb782f 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -186,17 +186,19 @@
 
 /* MX1, MX31, MX35, MX51 CSPI */
 static unsigned int spi_imx_clkdiv_2(unsigned int fin,
-		unsigned int fspi)
+		unsigned int fspi, unsigned int *fres)
 {
 	int i, div = 4;
 
 	for (i = 0; i < 7; i++) {
 		if (fspi * div >= fin)
-			return i;
+			goto out;
 		div <<= 1;
 	}
 
-	return 7;
+out:
+	*fres = fin / div;
+	return i;
 }
 
 static int spi_imx_bytes_per_word(const int bpw)
@@ -453,6 +455,9 @@
 #define MX31_CSPISTATUS		0x14
 #define MX31_STATUS_RR		(1 << 3)
 
+#define MX31_CSPI_TESTREG	0x1C
+#define MX31_TEST_LBC		(1 << 14)
+
 /* These functions also work for the i.MX35, but be aware that
  * the i.MX35 has a slightly different register layout for bits
  * we do not use here.
@@ -482,9 +487,11 @@
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
 	unsigned int reg = MX31_CSPICTRL_ENABLE | MX31_CSPICTRL_MASTER;
+	unsigned int clk;
 
-	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, config->speed_hz) <<
+	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, config->speed_hz, &clk) <<
 		MX31_CSPICTRL_DR_SHIFT;
+	spi_imx->spi_bus_clk = clk;
 
 	if (is_imx35_cspi(spi_imx)) {
 		reg |= (config->bpw - 1) << MX35_CSPICTRL_BL_SHIFT;
@@ -506,6 +513,13 @@
 
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
 
+	reg = readl(spi_imx->base + MX31_CSPI_TESTREG);
+	if (spi->mode & SPI_LOOP)
+		reg |= MX31_TEST_LBC;
+	else
+		reg &= ~MX31_TEST_LBC;
+	writel(reg, spi_imx->base + MX31_CSPI_TESTREG);
+
 	return 0;
 }
 
@@ -625,9 +639,12 @@
 {
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
 	unsigned int reg = MX1_CSPICTRL_ENABLE | MX1_CSPICTRL_MASTER;
+	unsigned int clk;
 
-	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, config->speed_hz) <<
+	reg |= spi_imx_clkdiv_2(spi_imx->spi_clk, config->speed_hz, &clk) <<
 		MX1_CSPICTRL_DR_SHIFT;
+	spi_imx->spi_bus_clk = clk;
+
 	reg |= config->bpw - 1;
 
 	if (spi->mode & SPI_CPHA)
@@ -1179,7 +1196,7 @@
 	spi_imx->bitbang.master->prepare_message = spi_imx_prepare_message;
 	spi_imx->bitbang.master->unprepare_message = spi_imx_unprepare_message;
 	spi_imx->bitbang.master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
-	if (is_imx51_ecspi(spi_imx))
+	if (is_imx35_cspi(spi_imx) || is_imx51_ecspi(spi_imx))
 		spi_imx->bitbang.master->mode_bits |= SPI_LOOP;
 
 	init_completion(&spi_imx->xfer_done);
@@ -1251,6 +1268,12 @@
 		goto out_clk_put;
 	}
 
+	if (!master->cs_gpios) {
+		dev_err(&pdev->dev, "No CS GPIOs available\n");
+		ret = -EINVAL;
+		goto out_clk_put;
+	}
+
 	for (i = 0; i < master->num_chipselect; i++) {
 		if (!gpio_is_valid(master->cs_gpios[i]))
 			continue;
diff --git a/drivers/spi/spi-iproc-qspi.c b/drivers/spi/spi-iproc-qspi.c
new file mode 100644
index 0000000..be6ccb2
--- /dev/null
+++ b/drivers/spi/spi-iproc-qspi.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright 2016 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "spi-bcm-qspi.h"
+
+#define INTR_BASE_BIT_SHIFT			0x02
+#define INTR_COUNT				0x07
+
+struct bcm_iproc_intc {
+	struct bcm_qspi_soc_intc soc_intc;
+	struct platform_device *pdev;
+	void __iomem *int_reg;
+	void __iomem *int_status_reg;
+	spinlock_t soclock;
+	bool big_endian;
+};
+
+static u32 bcm_iproc_qspi_get_l2_int_status(struct bcm_qspi_soc_intc *soc_intc)
+{
+	struct bcm_iproc_intc *priv =
+			container_of(soc_intc, struct bcm_iproc_intc, soc_intc);
+	void __iomem *mmio = priv->int_status_reg;
+	int i;
+	u32 val = 0, sts = 0;
+
+	for (i = 0; i < INTR_COUNT; i++) {
+		if (bcm_qspi_readl(priv->big_endian, mmio + (i * 4)))
+			val |= 1UL << i;
+	}
+
+	if (val & INTR_MSPI_DONE_MASK)
+		sts |= MSPI_DONE;
+
+	if (val & BSPI_LR_INTERRUPTS_ALL)
+		sts |= BSPI_DONE;
+
+	if (val & BSPI_LR_INTERRUPTS_ERROR)
+		sts |= BSPI_ERR;
+
+	return sts;
+}
+
+static void bcm_iproc_qspi_int_ack(struct bcm_qspi_soc_intc *soc_intc, int type)
+{
+	struct bcm_iproc_intc *priv =
+			container_of(soc_intc, struct bcm_iproc_intc, soc_intc);
+	void __iomem *mmio = priv->int_status_reg;
+	u32 mask = get_qspi_mask(type);
+	int i;
+
+	for (i = 0; i < INTR_COUNT; i++) {
+		if (mask & (1UL << i))
+			bcm_qspi_writel(priv->big_endian, 1, mmio + (i * 4));
+	}
+}
+
+static void bcm_iproc_qspi_int_set(struct bcm_qspi_soc_intc *soc_intc, int type,
+				   bool en)
+{
+	struct bcm_iproc_intc *priv =
+			container_of(soc_intc, struct bcm_iproc_intc, soc_intc);
+	void __iomem *mmio = priv->int_reg;
+	u32 mask = get_qspi_mask(type);
+	u32 val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->soclock, flags);
+
+	val = bcm_qspi_readl(priv->big_endian, mmio);
+
+	if (en)
+		val = val | (mask << INTR_BASE_BIT_SHIFT);
+	else
+		val = val & ~(mask << INTR_BASE_BIT_SHIFT);
+
+	bcm_qspi_writel(priv->big_endian, val, mmio);
+
+	spin_unlock_irqrestore(&priv->soclock, flags);
+}
+
+static int bcm_iproc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct bcm_iproc_intc *priv;
+	struct bcm_qspi_soc_intc *soc_intc;
+	struct resource *res;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+	soc_intc = &priv->soc_intc;
+	priv->pdev = pdev;
+
+	spin_lock_init(&priv->soclock);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "intr_regs");
+	priv->int_reg = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->int_reg))
+		return PTR_ERR(priv->int_reg);
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+					   "intr_status_reg");
+	priv->int_status_reg = devm_ioremap_resource(dev, res);
+	if (IS_ERR(priv->int_status_reg))
+		return PTR_ERR(priv->int_status_reg);
+
+	priv->big_endian = of_device_is_big_endian(dev->of_node);
+
+	bcm_iproc_qspi_int_ack(soc_intc, MSPI_BSPI_DONE);
+	bcm_iproc_qspi_int_set(soc_intc, MSPI_BSPI_DONE, false);
+
+	soc_intc->bcm_qspi_int_ack = bcm_iproc_qspi_int_ack;
+	soc_intc->bcm_qspi_int_set = bcm_iproc_qspi_int_set;
+	soc_intc->bcm_qspi_get_int_status = bcm_iproc_qspi_get_l2_int_status;
+
+	return bcm_qspi_probe(pdev, soc_intc);
+}
+
+static int bcm_iproc_remove(struct platform_device *pdev)
+{
+	return bcm_qspi_remove(pdev);
+}
+
+static const struct of_device_id bcm_iproc_of_match[] = {
+	{ .compatible = "brcm,spi-nsp-qspi" },
+	{ .compatible = "brcm,spi-ns2-qspi" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcm_iproc_of_match);
+
+static struct platform_driver bcm_iproc_driver = {
+	.probe			= bcm_iproc_probe,
+	.remove			= bcm_iproc_remove,
+	.driver = {
+		.name		= "bcm_iproc",
+		.pm		= &bcm_qspi_pm_ops,
+		.of_match_table = bcm_iproc_of_match,
+	}
+};
+module_platform_driver(bcm_iproc_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Kamal Dasu");
+MODULE_DESCRIPTION("SPI flash driver for Broadcom iProc SoCs");
diff --git a/drivers/spi/spi-jcore.c b/drivers/spi/spi-jcore.c
new file mode 100644
index 0000000..f8117b8
--- /dev/null
+++ b/drivers/spi/spi-jcore.c
@@ -0,0 +1,231 @@
+/*
+ * J-Core SPI controller driver
+ *
+ * Copyright (C) 2012-2016 Smart Energy Instruments, Inc.
+ *
+ * Current version by Rich Felker
+ * Based loosely on initial version by Oleksandr G Zhadan
+ *
+ */
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spi/spi.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/delay.h>
+
+#define DRV_NAME	"jcore_spi"
+
+#define CTRL_REG	0x0
+#define DATA_REG	0x4
+
+#define JCORE_SPI_CTRL_XMIT		0x02
+#define JCORE_SPI_STAT_BUSY		0x02
+#define JCORE_SPI_CTRL_LOOP		0x08
+#define JCORE_SPI_CTRL_CS_BITS		0x15
+
+#define JCORE_SPI_WAIT_RDY_MAX_LOOP	2000000
+
+struct jcore_spi {
+	struct spi_master *master;
+	void __iomem *base;
+	unsigned int cs_reg;
+	unsigned int speed_reg;
+	unsigned int speed_hz;
+	unsigned int clock_freq;
+};
+
+static int jcore_spi_wait(void __iomem *ctrl_reg)
+{
+	unsigned timeout = JCORE_SPI_WAIT_RDY_MAX_LOOP;
+
+	do {
+		if (!(readl(ctrl_reg) & JCORE_SPI_STAT_BUSY))
+			return 0;
+		cpu_relax();
+	} while (--timeout);
+
+	return -EBUSY;
+}
+
+static void jcore_spi_program(struct jcore_spi *hw)
+{
+	void __iomem *ctrl_reg = hw->base + CTRL_REG;
+
+	if (jcore_spi_wait(ctrl_reg))
+		dev_err(hw->master->dev.parent,
+			"timeout waiting to program ctrl reg.\n");
+
+	writel(hw->cs_reg | hw->speed_reg, ctrl_reg);
+}
+
+static void jcore_spi_chipsel(struct spi_device *spi, bool value)
+{
+	struct jcore_spi *hw = spi_master_get_devdata(spi->master);
+	u32 csbit = 1U << (2 * spi->chip_select);
+
+	dev_dbg(hw->master->dev.parent, "chipselect %d\n", spi->chip_select);
+
+	if (value)
+		hw->cs_reg |= csbit;
+	else
+		hw->cs_reg &= ~csbit;
+
+	jcore_spi_program(hw);
+}
+
+static void jcore_spi_baudrate(struct jcore_spi *hw, int speed)
+{
+	if (speed == hw->speed_hz) return;
+	hw->speed_hz = speed;
+	if (speed >= hw->clock_freq / 2)
+		hw->speed_reg = 0;
+	else
+		hw->speed_reg = ((hw->clock_freq / 2 / speed) - 1) << 27;
+	jcore_spi_program(hw);
+	dev_dbg(hw->master->dev.parent, "speed=%d reg=0x%x\n",
+		speed, hw->speed_reg);
+}
+
+static int jcore_spi_txrx(struct spi_master *master, struct spi_device *spi,
+			  struct spi_transfer *t)
+{
+	struct jcore_spi *hw = spi_master_get_devdata(master);
+
+	void __iomem *ctrl_reg = hw->base + CTRL_REG;
+	void __iomem *data_reg = hw->base + DATA_REG;
+	u32 xmit;
+
+	/* data buffers */
+	const unsigned char *tx;
+	unsigned char *rx;
+	unsigned int len;
+	unsigned int count;
+
+	jcore_spi_baudrate(hw, t->speed_hz);
+
+	xmit = hw->cs_reg | hw->speed_reg | JCORE_SPI_CTRL_XMIT;
+	tx = t->tx_buf;
+	rx = t->rx_buf;
+	len = t->len;
+
+	for (count = 0; count < len; count++) {
+		if (jcore_spi_wait(ctrl_reg))
+			break;
+
+		writel(tx ? *tx++ : 0, data_reg);
+		writel(xmit, ctrl_reg);
+
+		if (jcore_spi_wait(ctrl_reg))
+			break;
+
+		if (rx)
+			*rx++ = readl(data_reg);
+	}
+
+	spi_finalize_current_transfer(master);
+
+	if (count < len)
+		return -EREMOTEIO;
+
+	return 0;
+}
+
+static int jcore_spi_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct jcore_spi *hw;
+	struct spi_master *master;
+	struct resource *res;
+	u32 clock_freq;
+	struct clk *clk;
+	int err = -ENODEV;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(struct jcore_spi));
+	if (!master)
+		return err;
+
+	/* Setup the master state. */
+	master->num_chipselect = 3;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	master->transfer_one = jcore_spi_txrx;
+	master->set_cs = jcore_spi_chipsel;
+	master->dev.of_node = node;
+	master->bus_num = pdev->id;
+
+	hw = spi_master_get_devdata(master);
+	hw->master = master;
+	platform_set_drvdata(pdev, hw);
+
+	/* Find and map our resources */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		goto exit_busy;
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+				     resource_size(res), pdev->name))
+		goto exit_busy;
+	hw->base = devm_ioremap_nocache(&pdev->dev, res->start,
+					resource_size(res));
+	if (!hw->base)
+		goto exit_busy;
+
+	/*
+	 * The SPI clock rate controlled via a configurable clock divider
+	 * which is applied to the reference clock. A 50 MHz reference is
+	 * most suitable for obtaining standard SPI clock rates, but some
+	 * designs may have a different reference clock, and the DT must
+	 * make the driver aware so that it can properly program the
+	 * requested rate. If the clock is omitted, 50 MHz is assumed.
+	 */
+	clock_freq = 50000000;
+	clk = devm_clk_get(&pdev->dev, "ref_clk");
+	if (!IS_ERR_OR_NULL(clk)) {
+		if (clk_enable(clk) == 0)
+			clock_freq = clk_get_rate(clk);
+		else
+			dev_warn(&pdev->dev, "could not enable ref_clk\n");
+	}
+	hw->clock_freq = clock_freq;
+
+	/* Initialize all CS bits to high. */
+	hw->cs_reg = JCORE_SPI_CTRL_CS_BITS;
+	jcore_spi_baudrate(hw, 400000);
+
+	/* Register our spi controller */
+	err = devm_spi_register_master(&pdev->dev, master);
+	if (err)
+		goto exit;
+
+	return 0;
+
+exit_busy:
+	err = -EBUSY;
+exit:
+	spi_master_put(master);
+	return err;
+}
+
+static const struct of_device_id jcore_spi_of_match[] = {
+	{ .compatible = "jcore,spi2" },
+	{},
+};
+
+static struct platform_driver jcore_spi_driver = {
+	.probe = jcore_spi_probe,
+	.driver = {
+		.name = DRV_NAME,
+		.of_match_table = jcore_spi_of_match,
+	},
+};
+
+module_platform_driver(jcore_spi_driver);
+
+MODULE_DESCRIPTION("J-Core SPI driver");
+MODULE_AUTHOR("Rich Felker <dalias@libc.org>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index faa1b13..50e620f 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -405,7 +405,7 @@
 	u8 *end;
 };
 
-int rx_ranges_cmp(void *priv, struct list_head *a, struct list_head *b)
+static int rx_ranges_cmp(void *priv, struct list_head *a, struct list_head *b)
 {
 	struct rx_ranges *rx_a = list_entry(a, struct rx_ranges, list);
 	struct rx_ranges *rx_b = list_entry(b, struct rx_ranges, list);
diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c
index 2465259..616566e 100644
--- a/drivers/spi/spi-meson-spifc.c
+++ b/drivers/spi/spi-meson-spifc.c
@@ -442,6 +442,7 @@
 
 static const struct of_device_id meson_spifc_dt_match[] = {
 	{ .compatible = "amlogic,meson6-spifc", },
+	{ .compatible = "amlogic,meson-gxbb-spifc", },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, meson_spifc_dt_match);
diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c
index 0be89e0..899d7a8 100644
--- a/drivers/spi/spi-mt65xx.c
+++ b/drivers/spi/spi-mt65xx.c
@@ -685,7 +685,6 @@
 	pm_runtime_disable(&pdev->dev);
 
 	mtk_spi_reset(mdata);
-	spi_master_put(master);
 
 	return 0;
 }
diff --git a/drivers/spi/spi-pic32-sqi.c b/drivers/spi/spi-pic32-sqi.c
index c41abdd..bd1c6b5 100644
--- a/drivers/spi/spi-pic32-sqi.c
+++ b/drivers/spi/spi-pic32-sqi.c
@@ -253,15 +253,13 @@
 		return NULL;
 
 	rdesc = list_first_entry(&sqi->bd_list_free, struct ring_desc, list);
-	list_del(&rdesc->list);
-	list_add_tail(&rdesc->list, &sqi->bd_list_used);
+	list_move_tail(&rdesc->list, &sqi->bd_list_used);
 	return rdesc;
 }
 
 static void ring_desc_put(struct pic32_sqi *sqi, struct ring_desc *rdesc)
 {
-	list_del(&rdesc->list);
-	list_add(&rdesc->list, &sqi->bd_list_free);
+	list_move(&rdesc->list, &sqi->bd_list_free);
 }
 
 static int pic32_sqi_one_transfer(struct pic32_sqi *sqi,
diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index db3ae1d..04f3eec 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -23,7 +23,7 @@
 static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data,
 					     bool error)
 {
-	struct spi_message *msg = drv_data->cur_msg;
+	struct spi_message *msg = drv_data->master->cur_msg;
 
 	/*
 	 * It is possible that one CPU is handling ROR interrupt and other
@@ -76,7 +76,8 @@
 pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 			   enum dma_transfer_direction dir)
 {
-	struct chip_data *chip = drv_data->cur_chip;
+	struct chip_data *chip =
+		spi_get_ctldata(drv_data->master->cur_msg->spi);
 	struct spi_transfer *xfer = drv_data->cur_transfer;
 	enum dma_slave_buswidth width;
 	struct dma_slave_config cfg;
@@ -146,7 +147,7 @@
 int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, u32 dma_burst)
 {
 	struct dma_async_tx_descriptor *tx_desc, *rx_desc;
-	int err = 0;
+	int err;
 
 	tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV);
 	if (!tx_desc) {
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index f3df522..58d2d48 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -214,6 +214,7 @@
 		return PTR_ERR(ssp->clk);
 
 	memset(&pi, 0, sizeof(pi));
+	pi.fwnode = dev->dev.fwnode;
 	pi.parent = &dev->dev;
 	pi.name = "pxa2xx-spi";
 	pi.id = ssp->port_id;
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 87150a1..dd7b5b4 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -28,6 +28,7 @@
 #include <linux/spi/spi.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
@@ -62,6 +63,13 @@
 				| QUARK_X1000_SSCR1_TFT		\
 				| SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
 
+#define CE4100_SSCR1_CHANGE_MASK (SSCR1_TTELP | SSCR1_TTE | SSCR1_SCFR \
+				| SSCR1_ECRA | SSCR1_ECRB | SSCR1_SCLKDIR \
+				| SSCR1_SFRMDIR | SSCR1_RWOT | SSCR1_TRAIL \
+				| SSCR1_IFS | SSCR1_STRF | SSCR1_EFWR \
+				| CE4100_SSCR1_RFT | CE4100_SSCR1_TFT | SSCR1_MWDS \
+				| SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
+
 #define LPSS_GENERAL_REG_RXTO_HOLDOFF_DISABLE	BIT(24)
 #define LPSS_CS_CONTROL_SW_MODE			BIT(0)
 #define LPSS_CS_CONTROL_CS_HIGH			BIT(1)
@@ -175,6 +183,8 @@
 	switch (drv_data->ssp_type) {
 	case QUARK_X1000_SSP:
 		return QUARK_X1000_SSCR1_CHANGE_MASK;
+	case CE4100_SSP:
+		return CE4100_SSCR1_CHANGE_MASK;
 	default:
 		return SSCR1_CHANGE_MASK;
 	}
@@ -186,6 +196,8 @@
 	switch (drv_data->ssp_type) {
 	case QUARK_X1000_SSP:
 		return RX_THRESH_QUARK_X1000_DFLT;
+	case CE4100_SSP:
+		return RX_THRESH_CE4100_DFLT;
 	default:
 		return RX_THRESH_DFLT;
 	}
@@ -199,6 +211,9 @@
 	case QUARK_X1000_SSP:
 		mask = QUARK_X1000_SSSR_TFL_MASK;
 		break;
+	case CE4100_SSP:
+		mask = CE4100_SSSR_TFL_MASK;
+		break;
 	default:
 		mask = SSSR_TFL_MASK;
 		break;
@@ -216,6 +231,9 @@
 	case QUARK_X1000_SSP:
 		mask = QUARK_X1000_SSCR1_RFT;
 		break;
+	case CE4100_SSP:
+		mask = CE4100_SSCR1_RFT;
+		break;
 	default:
 		mask = SSCR1_RFT;
 		break;
@@ -230,6 +248,9 @@
 	case QUARK_X1000_SSP:
 		*sccr1_reg |= QUARK_X1000_SSCR1_RxTresh(threshold);
 		break;
+	case CE4100_SSP:
+		*sccr1_reg |= CE4100_SSCR1_RxTresh(threshold);
+		break;
 	default:
 		*sccr1_reg |= SSCR1_RxTresh(threshold);
 		break;
@@ -316,7 +337,7 @@
 
 	value = __lpss_ssp_read_priv(drv_data, config->reg_cs_ctrl);
 
-	cs = drv_data->cur_msg->spi->chip_select;
+	cs = drv_data->master->cur_msg->spi->chip_select;
 	cs <<= config->cs_sel_shift;
 	if (cs != (value & config->cs_sel_mask)) {
 		/*
@@ -355,10 +376,11 @@
 
 static void cs_assert(struct driver_data *drv_data)
 {
-	struct chip_data *chip = drv_data->cur_chip;
+	struct chip_data *chip =
+		spi_get_ctldata(drv_data->master->cur_msg->spi);
 
 	if (drv_data->ssp_type == CE4100_SSP) {
-		pxa2xx_spi_write(drv_data, SSSR, drv_data->cur_chip->frm);
+		pxa2xx_spi_write(drv_data, SSSR, chip->frm);
 		return;
 	}
 
@@ -378,7 +400,8 @@
 
 static void cs_deassert(struct driver_data *drv_data)
 {
-	struct chip_data *chip = drv_data->cur_chip;
+	struct chip_data *chip =
+		spi_get_ctldata(drv_data->master->cur_msg->spi);
 
 	if (drv_data->ssp_type == CE4100_SSP)
 		return;
@@ -508,7 +531,7 @@
 
 void *pxa2xx_spi_next_transfer(struct driver_data *drv_data)
 {
-	struct spi_message *msg = drv_data->cur_msg;
+	struct spi_message *msg = drv_data->master->cur_msg;
 	struct spi_transfer *trans = drv_data->cur_transfer;
 
 	/* Move to next transfer */
@@ -529,8 +552,7 @@
 	struct spi_message *msg;
 	unsigned long timeout;
 
-	msg = drv_data->cur_msg;
-	drv_data->cur_msg = NULL;
+	msg = drv_data->master->cur_msg;
 	drv_data->cur_transfer = NULL;
 
 	last_transfer = list_last_entry(&msg->transfers, struct spi_transfer,
@@ -575,13 +597,13 @@
 			cs_deassert(drv_data);
 	}
 
-	drv_data->cur_chip = NULL;
 	spi_finalize_current_message(drv_data->master);
 }
 
 static void reset_sccr1(struct driver_data *drv_data)
 {
-	struct chip_data *chip = drv_data->cur_chip;
+	struct chip_data *chip =
+		spi_get_ctldata(drv_data->master->cur_msg->spi);
 	u32 sccr1_reg;
 
 	sccr1_reg = pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1;
@@ -589,6 +611,9 @@
 	case QUARK_X1000_SSP:
 		sccr1_reg &= ~QUARK_X1000_SSCR1_RFT;
 		break;
+	case CE4100_SSP:
+		sccr1_reg &= ~CE4100_SSCR1_RFT;
+		break;
 	default:
 		sccr1_reg &= ~SSCR1_RFT;
 		break;
@@ -610,7 +635,7 @@
 
 	dev_err(&drv_data->pdev->dev, "%s\n", msg);
 
-	drv_data->cur_msg->state = ERROR_STATE;
+	drv_data->master->cur_msg->state = ERROR_STATE;
 	tasklet_schedule(&drv_data->pump_transfers);
 }
 
@@ -623,7 +648,7 @@
 		pxa2xx_spi_write(drv_data, SSTO, 0);
 
 	/* Update total byte transferred return count actual bytes read */
-	drv_data->cur_msg->actual_length += drv_data->len -
+	drv_data->master->cur_msg->actual_length += drv_data->len -
 				(drv_data->rx_end - drv_data->rx);
 
 	/* Transfer delays and chip select release are
@@ -631,7 +656,7 @@
 	 */
 
 	/* Move to next transfer */
-	drv_data->cur_msg->state = pxa2xx_spi_next_transfer(drv_data);
+	drv_data->master->cur_msg->state = pxa2xx_spi_next_transfer(drv_data);
 
 	/* Schedule transfer tasklet */
 	tasklet_schedule(&drv_data->pump_transfers);
@@ -746,7 +771,7 @@
 	if (!(status & mask))
 		return IRQ_NONE;
 
-	if (!drv_data->cur_msg) {
+	if (!drv_data->master->cur_msg) {
 
 		pxa2xx_spi_write(drv_data, SSCR0,
 				 pxa2xx_spi_read(drv_data, SSCR0)
@@ -905,7 +930,8 @@
 static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data,
 					   int rate)
 {
-	struct chip_data *chip = drv_data->cur_chip;
+	struct chip_data *chip =
+		spi_get_ctldata(drv_data->master->cur_msg->spi);
 	unsigned int clk_div;
 
 	switch (drv_data->ssp_type) {
@@ -934,25 +960,23 @@
 {
 	struct driver_data *drv_data = (struct driver_data *)data;
 	struct spi_master *master = drv_data->master;
-	struct spi_message *message = NULL;
-	struct spi_transfer *transfer = NULL;
-	struct spi_transfer *previous = NULL;
-	struct chip_data *chip = NULL;
-	u32 clk_div = 0;
-	u8 bits = 0;
-	u32 speed = 0;
+	struct spi_message *message = master->cur_msg;
+	struct chip_data *chip = spi_get_ctldata(message->spi);
+	u32 dma_thresh = chip->dma_threshold;
+	u32 dma_burst = chip->dma_burst_size;
+	u32 change_mask = pxa2xx_spi_get_ssrc1_change_mask(drv_data);
+	struct spi_transfer *transfer;
+	struct spi_transfer *previous;
+	u32 clk_div;
+	u8 bits;
+	u32 speed;
 	u32 cr0;
 	u32 cr1;
-	u32 dma_thresh = drv_data->cur_chip->dma_threshold;
-	u32 dma_burst = drv_data->cur_chip->dma_burst_size;
-	u32 change_mask = pxa2xx_spi_get_ssrc1_change_mask(drv_data);
 	int err;
 	int dma_mapped;
 
 	/* Get current state information */
-	message = drv_data->cur_msg;
 	transfer = drv_data->cur_transfer;
-	chip = drv_data->cur_chip;
 
 	/* Handle for abort */
 	if (message->state == ERROR_STATE) {
@@ -1146,17 +1170,12 @@
 {
 	struct driver_data *drv_data = spi_master_get_devdata(master);
 
-	drv_data->cur_msg = msg;
 	/* Initial message state*/
-	drv_data->cur_msg->state = START_STATE;
-	drv_data->cur_transfer = list_entry(drv_data->cur_msg->transfers.next,
+	msg->state = START_STATE;
+	drv_data->cur_transfer = list_entry(msg->transfers.next,
 						struct spi_transfer,
 						transfer_list);
 
-	/* prepare to setup the SSP, in pump_transfers, using the per
-	 * chip configuration */
-	drv_data->cur_chip = spi_get_ctldata(drv_data->cur_msg->spi);
-
 	/* Mark as busy and launch transfers */
 	tasklet_schedule(&drv_data->pump_transfers);
 	return 0;
@@ -1176,9 +1195,26 @@
 static int setup_cs(struct spi_device *spi, struct chip_data *chip,
 		    struct pxa2xx_spi_chip *chip_info)
 {
+	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
 	int err = 0;
 
-	if (chip == NULL || chip_info == NULL)
+	if (chip == NULL)
+		return 0;
+
+	if (drv_data->cs_gpiods) {
+		struct gpio_desc *gpiod;
+
+		gpiod = drv_data->cs_gpiods[spi->chip_select];
+		if (gpiod) {
+			chip->gpio_cs = desc_to_gpio(gpiod);
+			chip->gpio_cs_inverted = spi->mode & SPI_CS_HIGH;
+			gpiod_set_value(gpiod, chip->gpio_cs_inverted);
+		}
+
+		return 0;
+	}
+
+	if (chip_info == NULL)
 		return 0;
 
 	/* NOTE: setup() can be called multiple times, possibly with
@@ -1213,7 +1249,7 @@
 
 static int setup(struct spi_device *spi)
 {
-	struct pxa2xx_spi_chip *chip_info = NULL;
+	struct pxa2xx_spi_chip *chip_info;
 	struct chip_data *chip;
 	const struct lpss_config *config;
 	struct driver_data *drv_data = spi_master_get_devdata(spi->master);
@@ -1225,6 +1261,11 @@
 		tx_hi_thres = 0;
 		rx_thres = RX_THRESH_QUARK_X1000_DFLT;
 		break;
+	case CE4100_SSP:
+		tx_thres = TX_THRESH_CE4100_DFLT;
+		tx_hi_thres = 0;
+		rx_thres = RX_THRESH_CE4100_DFLT;
+		break;
 	case LPSS_LPT_SSP:
 	case LPSS_BYT_SSP:
 	case LPSS_BSW_SSP:
@@ -1309,6 +1350,10 @@
 				   | (QUARK_X1000_SSCR1_TxTresh(tx_thres)
 				   & QUARK_X1000_SSCR1_TFT);
 		break;
+	case CE4100_SSP:
+		chip->threshold = (CE4100_SSCR1_RxTresh(rx_thres) & CE4100_SSCR1_RFT) |
+			(CE4100_SSCR1_TxTresh(tx_thres) & CE4100_SSCR1_TFT);
+		break;
 	default:
 		chip->threshold = (SSCR1_RxTresh(rx_thres) & SSCR1_RFT) |
 			(SSCR1_TxTresh(tx_thres) & SSCR1_TFT);
@@ -1352,7 +1397,8 @@
 	if (!chip)
 		return;
 
-	if (drv_data->ssp_type != CE4100_SSP && gpio_is_valid(chip->gpio_cs))
+	if (drv_data->ssp_type != CE4100_SSP && !drv_data->cs_gpiods &&
+	    gpio_is_valid(chip->gpio_cs))
 		gpio_free(chip->gpio_cs);
 
 	kfree(chip);
@@ -1530,7 +1576,7 @@
 	struct driver_data *drv_data;
 	struct ssp_device *ssp;
 	const struct lpss_config *config;
-	int status;
+	int status, count;
 	u32 tmp;
 
 	platform_info = dev_get_platdata(dev);
@@ -1630,15 +1676,20 @@
 	pxa2xx_spi_write(drv_data, SSCR0, 0);
 	switch (drv_data->ssp_type) {
 	case QUARK_X1000_SSP:
-		tmp = QUARK_X1000_SSCR1_RxTresh(RX_THRESH_QUARK_X1000_DFLT)
-		      | QUARK_X1000_SSCR1_TxTresh(TX_THRESH_QUARK_X1000_DFLT);
+		tmp = QUARK_X1000_SSCR1_RxTresh(RX_THRESH_QUARK_X1000_DFLT) |
+		      QUARK_X1000_SSCR1_TxTresh(TX_THRESH_QUARK_X1000_DFLT);
 		pxa2xx_spi_write(drv_data, SSCR1, tmp);
 
 		/* using the Motorola SPI protocol and use 8 bit frame */
-		pxa2xx_spi_write(drv_data, SSCR0,
-				 QUARK_X1000_SSCR0_Motorola
-				 | QUARK_X1000_SSCR0_DataSize(8));
+		tmp = QUARK_X1000_SSCR0_Motorola | QUARK_X1000_SSCR0_DataSize(8);
+		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 		break;
+	case CE4100_SSP:
+		tmp = CE4100_SSCR1_RxTresh(RX_THRESH_CE4100_DFLT) |
+		      CE4100_SSCR1_TxTresh(TX_THRESH_CE4100_DFLT);
+		pxa2xx_spi_write(drv_data, SSCR1, tmp);
+		tmp = SSCR0_SCR(2) | SSCR0_Motorola | SSCR0_DataSize(8);
+		pxa2xx_spi_write(drv_data, SSCR0, tmp);
 	default:
 		tmp = SSCR1_RxTresh(RX_THRESH_DFLT) |
 		      SSCR1_TxTresh(TX_THRESH_DFLT);
@@ -1669,6 +1720,39 @@
 	}
 	master->num_chipselect = platform_info->num_chipselect;
 
+	count = gpiod_count(&pdev->dev, "cs");
+	if (count > 0) {
+		int i;
+
+		master->num_chipselect = max_t(int, count,
+			master->num_chipselect);
+
+		drv_data->cs_gpiods = devm_kcalloc(&pdev->dev,
+			master->num_chipselect, sizeof(struct gpio_desc *),
+			GFP_KERNEL);
+		if (!drv_data->cs_gpiods) {
+			status = -ENOMEM;
+			goto out_error_clock_enabled;
+		}
+
+		for (i = 0; i < master->num_chipselect; i++) {
+			struct gpio_desc *gpiod;
+
+			gpiod = devm_gpiod_get_index(dev, "cs", i,
+						     GPIOD_OUT_HIGH);
+			if (IS_ERR(gpiod)) {
+				/* Means use native chip select */
+				if (PTR_ERR(gpiod) == -ENOENT)
+					continue;
+
+				status = (int)PTR_ERR(gpiod);
+				goto out_error_clock_enabled;
+			} else {
+				drv_data->cs_gpiods[i] = gpiod;
+			}
+		}
+	}
+
 	tasklet_init(&drv_data->pump_transfers, pump_transfers,
 		     (unsigned long)drv_data);
 
@@ -1742,7 +1826,7 @@
 {
 	struct driver_data *drv_data = dev_get_drvdata(dev);
 	struct ssp_device *ssp = drv_data->ssp;
-	int status = 0;
+	int status;
 
 	status = spi_master_suspend(drv_data->master);
 	if (status != 0)
@@ -1759,7 +1843,7 @@
 {
 	struct driver_data *drv_data = dev_get_drvdata(dev);
 	struct ssp_device *ssp = drv_data->ssp;
-	int status = 0;
+	int status;
 
 	/* Enable the SSP clock */
 	if (!pm_runtime_suspended(dev))
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index d217ad5..ce31b81 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -53,9 +53,7 @@
 	atomic_t dma_running;
 
 	/* Current message transfer state info */
-	struct spi_message *cur_msg;
 	struct spi_transfer *cur_transfer;
-	struct chip_data *cur_chip;
 	size_t len;
 	void *tx;
 	void *tx_end;
@@ -68,6 +66,9 @@
 	void (*cs_control)(u32 command);
 
 	void __iomem *lpss_base;
+
+	/* GPIOs for chip selects */
+	struct gpio_desc **cs_gpiods;
 };
 
 struct chip_data {
diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index c338ef1..1bfa889 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c
@@ -982,8 +982,10 @@
 	if (ret)
 		return ret;
 
-	clk_disable_unprepare(controller->cclk);
-	clk_disable_unprepare(controller->iclk);
+	if (!pm_runtime_suspended(device)) {
+		clk_disable_unprepare(controller->cclk);
+		clk_disable_unprepare(controller->iclk);
+	}
 	return 0;
 }
 
@@ -1030,7 +1032,6 @@
 
 	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-	spi_master_put(master);
 
 	return 0;
 }
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 8188433..a816f07 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -295,14 +295,24 @@
 static int rspi_rz_set_config_register(struct rspi_data *rspi, int access_size)
 {
 	int spbr;
+	int div = 0;
+	unsigned long clksrc;
 
 	/* Sets output mode, MOSI signal, and (optionally) loopback */
 	rspi_write8(rspi, rspi->sppcr, RSPI_SPPCR);
 
+	clksrc = clk_get_rate(rspi->clk);
+	while (div < 3) {
+		if (rspi->max_speed_hz >= clksrc/4) /* 4=(CLK/2)/2 */
+			break;
+		div++;
+		clksrc /= 2;
+	}
+
 	/* Sets transfer bit rate */
-	spbr = DIV_ROUND_UP(clk_get_rate(rspi->clk),
-			    2 * rspi->max_speed_hz) - 1;
+	spbr = DIV_ROUND_UP(clksrc, 2 * rspi->max_speed_hz) - 1;
 	rspi_write8(rspi, clamp(spbr, 0, 255), RSPI_SPBR);
+	rspi->spcmd |= div << 2;
 
 	/* Disable dummy transmission, set byte access */
 	rspi_write8(rspi, SPDCR_SPLBYTE, RSPI_SPDCR);
diff --git a/drivers/spi/spi-sc18is602.c b/drivers/spi/spi-sc18is602.c
index 36af4d4..f63714f 100644
--- a/drivers/spi/spi-sc18is602.c
+++ b/drivers/spi/spi-sc18is602.c
@@ -23,6 +23,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
 #include <linux/platform_data/sc18is602.h>
+#include <linux/gpio/consumer.h>
 
 enum chips { sc18is602, sc18is602b, sc18is603 };
 
@@ -50,6 +51,8 @@
 	u8			buffer[SC18IS602_BUFSIZ + 1];
 	int			tlen;	/* Data queued for tx in buffer */
 	int			rindex;	/* Receive data index in buffer */
+
+	struct gpio_desc	*reset;
 };
 
 static int sc18is602_wait_ready(struct sc18is602 *hw, int len)
@@ -257,6 +260,12 @@
 	hw = spi_master_get_devdata(master);
 	i2c_set_clientdata(client, hw);
 
+	/* assert reset and then release */
+	hw->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(hw->reset))
+		return PTR_ERR(hw->reset);
+	gpiod_set_value_cansleep(hw->reset, 0);
+
 	hw->master = master;
 	hw->client = client;
 	hw->dev = dev;
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 0f83ad1..1de3a77 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -262,6 +262,9 @@
 
 	for (k = 0; k < ARRAY_SIZE(sh_msiof_spi_div_table); k++) {
 		brps = DIV_ROUND_UP(div, sh_msiof_spi_div_table[k].div);
+		/* SCR_BRDV_DIV_1 is valid only if BRPS is x 1/1 or x 1/2 */
+		if (sh_msiof_spi_div_table[k].div == 1 && brps > 2)
+			continue;
 		if (brps <= 32) /* max of brdv is 32 */
 			break;
 	}
diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c
index a56eca0..e54b596 100644
--- a/drivers/spi/spi-st-ssc4.c
+++ b/drivers/spi/spi-st-ssc4.c
@@ -175,10 +175,7 @@
 
 static void spi_st_cleanup(struct spi_device *spi)
 {
-	int cs = spi->cs_gpio;
-
-	if (gpio_is_valid(cs))
-		devm_gpio_free(&spi->dev, cs);
+	gpio_free(spi->cs_gpio);
 }
 
 /* the spi->mode bits understood by this driver: */
@@ -201,14 +198,15 @@
 		return -EINVAL;
 	}
 
-	if (devm_gpio_request(&spi->dev, cs, dev_name(&spi->dev))) {
+	ret = gpio_request(cs, dev_name(&spi->dev));
+	if (ret) {
 		dev_err(&spi->dev, "could not request gpio:%d\n", cs);
-		return -EINVAL;
+		return ret;
 	}
 
 	ret = gpio_direction_output(cs, spi->mode & SPI_CS_HIGH);
 	if (ret)
-		return ret;
+		goto out_free_gpio;
 
 	spi_st_clk = clk_get_rate(spi_st->clk);
 
@@ -217,7 +215,8 @@
 	if (sscbrg < 0x07 || sscbrg > BIT(16)) {
 		dev_err(&spi->dev,
 			"baudrate %d outside valid range %d\n", sscbrg, hz);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out_free_gpio;
 	}
 
 	spi_st->baud = spi_st_clk / (2 * sscbrg);
@@ -266,6 +265,10 @@
 	 readl_relaxed(spi_st->base + SSC_RBUF);
 
 	 return 0;
+
+out_free_gpio:
+	gpio_free(cs);
+	return ret;
 }
 
 /* Interrupt fired when TX shift register becomes empty */
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index ac0b072..caeac66 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -41,6 +41,8 @@
 };
 
 struct ti_qspi {
+	struct completion	transfer_complete;
+
 	/* list synchronization */
 	struct mutex            list_lock;
 
@@ -54,6 +56,9 @@
 
 	struct ti_qspi_regs     ctx_reg;
 
+	dma_addr_t		mmap_phys_base;
+	struct dma_chan		*rx_chan;
+
 	u32 spi_max_frequency;
 	u32 cmd;
 	u32 dc;
@@ -379,6 +384,72 @@
 	return 0;
 }
 
+static void ti_qspi_dma_callback(void *param)
+{
+	struct ti_qspi *qspi = param;
+
+	complete(&qspi->transfer_complete);
+}
+
+static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst,
+			    dma_addr_t dma_src, size_t len)
+{
+	struct dma_chan *chan = qspi->rx_chan;
+	struct dma_device *dma_dev = chan->device;
+	dma_cookie_t cookie;
+	enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
+	struct dma_async_tx_descriptor *tx;
+	int ret;
+
+	tx = dma_dev->device_prep_dma_memcpy(chan, dma_dst, dma_src,
+					     len, flags);
+	if (!tx) {
+		dev_err(qspi->dev, "device_prep_dma_memcpy error\n");
+		return -EIO;
+	}
+
+	tx->callback = ti_qspi_dma_callback;
+	tx->callback_param = qspi;
+	cookie = tx->tx_submit(tx);
+
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		dev_err(qspi->dev, "dma_submit_error %d\n", cookie);
+		return -EIO;
+	}
+
+	dma_async_issue_pending(chan);
+	ret = wait_for_completion_timeout(&qspi->transfer_complete,
+					  msecs_to_jiffies(len));
+	if (ret <= 0) {
+		dmaengine_terminate_sync(chan);
+		dev_err(qspi->dev, "DMA wait_for_completion_timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int ti_qspi_dma_xfer_sg(struct ti_qspi *qspi, struct sg_table rx_sg,
+			       loff_t from)
+{
+	struct scatterlist *sg;
+	dma_addr_t dma_src = qspi->mmap_phys_base + from;
+	dma_addr_t dma_dst;
+	int i, len, ret;
+
+	for_each_sg(rx_sg.sgl, sg, rx_sg.nents, i) {
+		dma_dst = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+		ret = ti_qspi_dma_xfer(qspi, dma_dst, dma_src, len);
+		if (ret)
+			return ret;
+		dma_src += len;
+	}
+
+	return 0;
+}
+
 static void ti_qspi_enable_memory_map(struct spi_device *spi)
 {
 	struct ti_qspi  *qspi = spi_master_get_devdata(spi->master);
@@ -426,7 +497,7 @@
 		      QSPI_SPI_SETUP_REG(spi->chip_select));
 }
 
-static int ti_qspi_spi_flash_read(struct  spi_device *spi,
+static int ti_qspi_spi_flash_read(struct spi_device *spi,
 				  struct spi_flash_read_message *msg)
 {
 	struct ti_qspi *qspi = spi_master_get_devdata(spi->master);
@@ -437,9 +508,23 @@
 	if (!qspi->mmap_enabled)
 		ti_qspi_enable_memory_map(spi);
 	ti_qspi_setup_mmap_read(spi, msg);
-	memcpy_fromio(msg->buf, qspi->mmap_base + msg->from, msg->len);
+
+	if (qspi->rx_chan) {
+		if (msg->cur_msg_mapped) {
+			ret = ti_qspi_dma_xfer_sg(qspi, msg->rx_sg, msg->from);
+			if (ret)
+				goto err_unlock;
+		} else {
+			dev_err(qspi->dev, "Invalid address for DMA\n");
+			ret = -EIO;
+			goto err_unlock;
+		}
+	} else {
+		memcpy_fromio(msg->buf, qspi->mmap_base + msg->from, msg->len);
+	}
 	msg->retlen = msg->len;
 
+err_unlock:
 	mutex_unlock(&qspi->list_lock);
 
 	return ret;
@@ -536,6 +621,7 @@
 	struct device_node *np = pdev->dev.of_node;
 	u32 max_freq;
 	int ret = 0, num_cs, irq;
+	dma_cap_mask_t mask;
 
 	master = spi_alloc_master(&pdev->dev, sizeof(*qspi));
 	if (!master)
@@ -550,6 +636,7 @@
 	master->dev.of_node = pdev->dev.of_node;
 	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
 				     SPI_BPW_MASK(8);
+	master->spi_flash_read = ti_qspi_spi_flash_read;
 
 	if (!of_property_read_u32(np, "num-cs", &num_cs))
 		master->num_chipselect = num_cs;
@@ -592,17 +679,6 @@
 		goto free_master;
 	}
 
-	if (res_mmap) {
-		qspi->mmap_base = devm_ioremap_resource(&pdev->dev,
-							res_mmap);
-		master->spi_flash_read = ti_qspi_spi_flash_read;
-		if (IS_ERR(qspi->mmap_base)) {
-			dev_err(&pdev->dev,
-				"falling back to PIO mode\n");
-			master->spi_flash_read = NULL;
-		}
-	}
-	qspi->mmap_enabled = false;
 
 	if (of_property_read_bool(np, "syscon-chipselects")) {
 		qspi->ctrl_base =
@@ -633,11 +709,37 @@
 	if (!of_property_read_u32(np, "spi-max-frequency", &max_freq))
 		qspi->spi_max_frequency = max_freq;
 
-	ret = devm_spi_register_master(&pdev->dev, master);
-	if (ret)
-		goto free_master;
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_MEMCPY, mask);
 
-	return 0;
+	qspi->rx_chan = dma_request_chan_by_mask(&mask);
+	if (!qspi->rx_chan) {
+		dev_err(qspi->dev,
+			"No Rx DMA available, trying mmap mode\n");
+		ret = 0;
+		goto no_dma;
+	}
+	master->dma_rx = qspi->rx_chan;
+	init_completion(&qspi->transfer_complete);
+	if (res_mmap)
+		qspi->mmap_phys_base = (dma_addr_t)res_mmap->start;
+
+no_dma:
+	if (!qspi->rx_chan && res_mmap) {
+		qspi->mmap_base = devm_ioremap_resource(&pdev->dev, res_mmap);
+		if (IS_ERR(qspi->mmap_base)) {
+			dev_info(&pdev->dev,
+				 "mmap failed with error %ld using PIO mode\n",
+				 PTR_ERR(qspi->mmap_base));
+			qspi->mmap_base = NULL;
+			master->spi_flash_read = NULL;
+		}
+	}
+	qspi->mmap_enabled = false;
+
+	ret = devm_spi_register_master(&pdev->dev, master);
+	if (!ret)
+		return 0;
 
 free_master:
 	spi_master_put(master);
@@ -656,6 +758,9 @@
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
+	if (qspi->rx_chan)
+		dma_release_channel(qspi->rx_chan);
+
 	return 0;
 }
 
diff --git a/drivers/spi/spi-txx9.c b/drivers/spi/spi-txx9.c
index 7492ea3..51759d3 100644
--- a/drivers/spi/spi-txx9.c
+++ b/drivers/spi/spi-txx9.c
@@ -346,7 +346,7 @@
 		c->clk = NULL;
 		goto exit;
 	}
-	ret = clk_enable(c->clk);
+	ret = clk_prepare_enable(c->clk);
 	if (ret) {
 		c->clk = NULL;
 		goto exit;
@@ -395,7 +395,7 @@
 exit_busy:
 	ret = -EBUSY;
 exit:
-	clk_disable(c->clk);
+	clk_disable_unprepare(c->clk);
 	spi_master_put(master);
 	return ret;
 }
@@ -406,7 +406,7 @@
 	struct txx9spi *c = spi_master_get_devdata(master);
 
 	flush_work(&c->work);
-	clk_disable(c->clk);
+	clk_disable_unprepare(c->clk);
 	return 0;
 }
 
diff --git a/drivers/spi/spi-xlp.c b/drivers/spi/spi-xlp.c
index 8f04fec..4071a72 100644
--- a/drivers/spi/spi-xlp.c
+++ b/drivers/spi/spi-xlp.c
@@ -11,6 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
+#include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -405,8 +406,9 @@
 	clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(clk)) {
 		dev_err(&pdev->dev, "could not get spi clock\n");
-		return -ENODEV;
+		return PTR_ERR(clk);
 	}
+
 	xspi->spi_clk = clk_get_rate(clk);
 
 	master = spi_alloc_master(&pdev->dev, 0);
@@ -437,6 +439,14 @@
 	return 0;
 }
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xlp_spi_acpi_match[] = {
+	{ "BRCM900D", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, xlp_spi_acpi_match);
+#endif
+
 static const struct of_device_id xlp_spi_dt_id[] = {
 	{ .compatible = "netlogic,xlp832-spi" },
 	{ },
@@ -447,6 +457,7 @@
 	.driver = {
 		.name	= "xlp-spi",
 		.of_match_table = xlp_spi_dt_id,
+		.acpi_match_table = ACPI_PTR(xlp_spi_acpi_match),
 	},
 };
 module_platform_driver(xlp_spi_driver);
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 51ad42f..8146ccd 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -37,6 +37,7 @@
 #include <linux/kthread.h>
 #include <linux/ioport.h>
 #include <linux/acpi.h>
+#include <linux/highmem.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/spi.h>
@@ -709,6 +710,13 @@
 {
 	const bool vmalloced_buf = is_vmalloc_addr(buf);
 	unsigned int max_seg_size = dma_get_max_seg_size(dev);
+#ifdef CONFIG_HIGHMEM
+	const bool kmap_buf = ((unsigned long)buf >= PKMAP_BASE &&
+				(unsigned long)buf < (PKMAP_BASE +
+					(LAST_PKMAP * PAGE_SIZE)));
+#else
+	const bool kmap_buf = false;
+#endif
 	int desc_len;
 	int sgs;
 	struct page *vm_page;
@@ -716,7 +724,7 @@
 	size_t min;
 	int i, ret;
 
-	if (vmalloced_buf) {
+	if (vmalloced_buf || kmap_buf) {
 		desc_len = min_t(int, max_seg_size, PAGE_SIZE);
 		sgs = DIV_ROUND_UP(len + offset_in_page(buf), desc_len);
 	} else if (virt_addr_valid(buf)) {
@@ -732,10 +740,13 @@
 
 	for (i = 0; i < sgs; i++) {
 
-		if (vmalloced_buf) {
+		if (vmalloced_buf || kmap_buf) {
 			min = min_t(size_t,
 				    len, desc_len - offset_in_page(buf));
-			vm_page = vmalloc_to_page(buf);
+			if (vmalloced_buf)
+				vm_page = vmalloc_to_page(buf);
+			else
+				vm_page = kmap_to_page(buf);
 			if (!vm_page) {
 				sg_free_table(sgt);
 				return -ENOMEM;
@@ -960,7 +971,7 @@
 	struct spi_transfer *xfer;
 	bool keep_cs = false;
 	int ret = 0;
-	unsigned long ms = 1;
+	unsigned long long ms = 1;
 	struct spi_statistics *statm = &master->statistics;
 	struct spi_statistics *stats = &msg->spi->statistics;
 
@@ -991,9 +1002,13 @@
 
 			if (ret > 0) {
 				ret = 0;
-				ms = xfer->len * 8 * 1000 / xfer->speed_hz;
+				ms = 8LL * 1000LL * xfer->len;
+				do_div(ms, xfer->speed_hz);
 				ms += ms + 100; /* some tolerance */
 
+				if (ms > UINT_MAX)
+					ms = UINT_MAX;
+
 				ms = wait_for_completion_timeout(&master->xfer_completion,
 								 msecs_to_jiffies(ms));
 			}
@@ -1159,6 +1174,7 @@
 		if (ret < 0) {
 			dev_err(&master->dev, "Failed to power device: %d\n",
 				ret);
+			mutex_unlock(&master->io_mutex);
 			return;
 		}
 	}
@@ -1174,6 +1190,7 @@
 
 			if (master->auto_runtime_pm)
 				pm_runtime_put(master->dev.parent);
+			mutex_unlock(&master->io_mutex);
 			return;
 		}
 	}
diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
index aca282d..5ec3a59 100644
--- a/drivers/spmi/spmi-pmic-arb.c
+++ b/drivers/spmi/spmi-pmic-arb.c
@@ -954,6 +954,7 @@
 	if (channel > 5) {
 		dev_err(&pdev->dev, "invalid channel (%u) specified.\n",
 			channel);
+		err = -EINVAL;
 		goto err_put_ctrl;
 	}
 
diff --git a/drivers/staging/board/board.c b/drivers/staging/board/board.c
index 45807d8..86dc411 100644
--- a/drivers/staging/board/board.c
+++ b/drivers/staging/board/board.c
@@ -140,7 +140,6 @@
 					const char *domain)
 {
 	struct of_phandle_args pd_args;
-	struct generic_pm_domain *pd;
 	struct device_node *np;
 
 	np = of_find_node_by_path(domain);
@@ -151,14 +150,8 @@
 
 	pd_args.np = np;
 	pd_args.args_count = 0;
-	pd = of_genpd_get_from_provider(&pd_args);
-	if (IS_ERR(pd)) {
-		pr_err("Cannot find genpd %s (%ld)\n", domain, PTR_ERR(pd));
-		return PTR_ERR(pd);
-	}
-	pr_debug("Found genpd %s for device %s\n", pd->name, pdev->name);
 
-	return pm_genpd_add_device(pd, &pdev->dev);
+	return of_genpd_add_device(&pd_args, &pdev->dev);
 }
 #else
 static inline int board_staging_add_dev_domain(struct platform_device *pdev,
diff --git a/drivers/staging/comedi/drivers/adv_pci1760.c b/drivers/staging/comedi/drivers/adv_pci1760.c
index d7dd1e5..9f525ff 100644
--- a/drivers/staging/comedi/drivers/adv_pci1760.c
+++ b/drivers/staging/comedi/drivers/adv_pci1760.c
@@ -196,6 +196,7 @@
 		break;
 	case CMDF_ROUND_DOWN:
 		divisor = ns / PCI1760_PWM_TIMEBASE;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/staging/comedi/drivers/comedi_test.c b/drivers/staging/comedi/drivers/comedi_test.c
index 4ab1866..ec5b9a2 100644
--- a/drivers/staging/comedi/drivers/comedi_test.c
+++ b/drivers/staging/comedi/drivers/comedi_test.c
@@ -56,11 +56,6 @@
 
 #define N_CHANS 8
 
-enum waveform_state_bits {
-	WAVEFORM_AI_RUNNING,
-	WAVEFORM_AO_RUNNING
-};
-
 /* Data unique to this driver */
 struct waveform_private {
 	struct timer_list ai_timer;	/* timer for AI commands */
@@ -68,7 +63,6 @@
 	unsigned int wf_amplitude;	/* waveform amplitude in microvolts */
 	unsigned int wf_period;		/* waveform period in microseconds */
 	unsigned int wf_current;	/* current time in waveform period */
-	unsigned long state_bits;
 	unsigned int ai_scan_period;	/* AI scan period in usec */
 	unsigned int ai_convert_period;	/* AI conversion period in usec */
 	struct timer_list ao_timer;	/* timer for AO commands */
@@ -191,10 +185,6 @@
 	unsigned int nsamples;
 	unsigned int time_increment;
 
-	/* check command is still active */
-	if (!test_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits))
-		return;
-
 	now = ktime_to_us(ktime_get());
 	nsamples = comedi_nsamples_left(s, UINT_MAX);
 
@@ -386,11 +376,6 @@
 	 */
 	devpriv->ai_timer.expires =
 		jiffies + usecs_to_jiffies(devpriv->ai_convert_period) + 1;
-
-	/* mark command as active */
-	smp_mb__before_atomic();
-	set_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits);
-	smp_mb__after_atomic();
 	add_timer(&devpriv->ai_timer);
 	return 0;
 }
@@ -400,11 +385,12 @@
 {
 	struct waveform_private *devpriv = dev->private;
 
-	/* mark command as no longer active */
-	clear_bit(WAVEFORM_AI_RUNNING, &devpriv->state_bits);
-	smp_mb__after_atomic();
-	/* cannot call del_timer_sync() as may be called from timer routine */
-	del_timer(&devpriv->ai_timer);
+	if (in_softirq()) {
+		/* Assume we were called from the timer routine itself. */
+		del_timer(&devpriv->ai_timer);
+	} else {
+		del_timer_sync(&devpriv->ai_timer);
+	}
 	return 0;
 }
 
@@ -436,10 +422,6 @@
 	u64 scans_since;
 	unsigned int scans_avail = 0;
 
-	/* check command is still active */
-	if (!test_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits))
-		return;
-
 	/* determine number of scan periods since last time */
 	now = ktime_to_us(ktime_get());
 	scans_since = now - devpriv->ao_last_scan_time;
@@ -518,11 +500,6 @@
 	devpriv->ao_last_scan_time = ktime_to_us(ktime_get());
 	devpriv->ao_timer.expires =
 		jiffies + usecs_to_jiffies(devpriv->ao_scan_period);
-
-	/* mark command as active */
-	smp_mb__before_atomic();
-	set_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits);
-	smp_mb__after_atomic();
 	add_timer(&devpriv->ao_timer);
 
 	return 1;
@@ -608,11 +585,12 @@
 	struct waveform_private *devpriv = dev->private;
 
 	s->async->inttrig = NULL;
-	/* mark command as no longer active */
-	clear_bit(WAVEFORM_AO_RUNNING, &devpriv->state_bits);
-	smp_mb__after_atomic();
-	/* cannot call del_timer_sync() as may be called from timer routine */
-	del_timer(&devpriv->ao_timer);
+	if (in_softirq()) {
+		/* Assume we were called from the timer routine itself. */
+		del_timer(&devpriv->ao_timer);
+	} else {
+		del_timer_sync(&devpriv->ao_timer);
+	}
 	return 0;
 }
 
diff --git a/drivers/staging/comedi/drivers/daqboard2000.c b/drivers/staging/comedi/drivers/daqboard2000.c
index 65daef0..0f4eb95 100644
--- a/drivers/staging/comedi/drivers/daqboard2000.c
+++ b/drivers/staging/comedi/drivers/daqboard2000.c
@@ -634,7 +634,7 @@
 	const struct daq200_boardtype *board;
 	int i;
 
-	if (pcidev->subsystem_device != PCI_VENDOR_ID_IOTECH)
+	if (pcidev->subsystem_vendor != PCI_VENDOR_ID_IOTECH)
 		return NULL;
 
 	for (i = 0; i < ARRAY_SIZE(boardtypes); i++) {
diff --git a/drivers/staging/comedi/drivers/dt2811.c b/drivers/staging/comedi/drivers/dt2811.c
index 904f6377..8bbd938 100644
--- a/drivers/staging/comedi/drivers/dt2811.c
+++ b/drivers/staging/comedi/drivers/dt2811.c
@@ -588,8 +588,8 @@
 	s = &dev->subdevices[0];
 	s->type		= COMEDI_SUBD_AI;
 	s->subdev_flags	= SDF_READABLE |
-			  (it->options[2] == 1) ? SDF_DIFF :
-			  (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND;
+			  ((it->options[2] == 1) ? SDF_DIFF :
+			   (it->options[2] == 2) ? SDF_COMMON : SDF_GROUND);
 	s->n_chan	= (it->options[2] == 1) ? 8 : 16;
 	s->maxdata	= 0x0fff;
 	s->range_table	= board->is_pgh ? &dt2811_pgh_ai_ranges
diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c
index 8dabb19..0f97d7b 100644
--- a/drivers/staging/comedi/drivers/ni_mio_common.c
+++ b/drivers/staging/comedi/drivers/ni_mio_common.c
@@ -2772,7 +2772,15 @@
 	int i;
 	static const int timeout = 1000;
 
-	if (trig_num != cmd->start_arg)
+	/*
+	 * Require trig_num == cmd->start_arg when cmd->start_src == TRIG_INT.
+	 * For backwards compatibility, also allow trig_num == 0 when
+	 * cmd->start_src != TRIG_INT (i.e. when cmd->start_src == TRIG_EXT);
+	 * in that case, the internal trigger is being used as a pre-trigger
+	 * before the external trigger.
+	 */
+	if (!(trig_num == cmd->start_arg ||
+	      (trig_num == 0 && cmd->start_src != TRIG_INT)))
 		return -EINVAL;
 
 	/*
@@ -5480,7 +5488,7 @@
 		s->maxdata	= (devpriv->is_m_series) ? 0xffffffff
 							 : 0x00ffffff;
 		s->insn_read	= ni_tio_insn_read;
-		s->insn_write	= ni_tio_insn_read;
+		s->insn_write	= ni_tio_insn_write;
 		s->insn_config	= ni_tio_insn_config;
 #ifdef PCIDMA
 		if (dev->irq && devpriv->mite) {
diff --git a/drivers/staging/fsl-mc/bus/mc-msi.c b/drivers/staging/fsl-mc/bus/mc-msi.c
index c7be156..4fd8e41 100644
--- a/drivers/staging/fsl-mc/bus/mc-msi.c
+++ b/drivers/staging/fsl-mc/bus/mc-msi.c
@@ -213,7 +213,7 @@
 	struct msi_desc *msi_desc;
 
 	for (i = 0; i < irq_count; i++) {
-		msi_desc = alloc_msi_entry(dev);
+		msi_desc = alloc_msi_entry(dev, 1, NULL);
 		if (!msi_desc) {
 			dev_err(dev, "Failed to allocate msi entry\n");
 			error = -ENOMEM;
@@ -221,7 +221,6 @@
 		}
 
 		msi_desc->fsl_mc.msi_index = i;
-		msi_desc->nvec_used = 1;
 		INIT_LIST_HEAD(&msi_desc->list);
 		list_add_tail(&msi_desc->list, dev_to_msi_list(dev));
 	}
diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index 170ac98..24c348d 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -419,6 +419,7 @@
 	mutex_lock(&indio_dev->mlock);
 	switch ((u32)this_attr->address) {
 	case AD5933_OUT_RANGE:
+		ret = -EINVAL;
 		for (i = 0; i < 4; i++)
 			if (val == st->range_avail[i]) {
 				st->ctrl_hb &= ~AD5933_CTRL_RANGE(0x3);
@@ -426,7 +427,6 @@
 				ret = ad5933_cmd(st, 0);
 				break;
 			}
-		ret = -EINVAL;
 		break;
 	case AD5933_IN_PGA_GAIN:
 		if (sysfs_streq(buf, "1")) {
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 3664bfd..2c4dc69 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -388,6 +388,7 @@
 	struct inode *inode = NULL;
 	__u64 bits = 0;
 	int rc = 0;
+	struct dentry *alias;
 
 	/* NB 1 request reference will be taken away by ll_intent_lock()
 	 * when I return
@@ -412,26 +413,12 @@
 		 */
 	}
 
-	/* Only hash *de if it is unhashed (new dentry).
-	 * Atoimc_open may passing hashed dentries for open.
-	 */
-	if (d_unhashed(*de)) {
-		struct dentry *alias;
-
-		alias = ll_splice_alias(inode, *de);
-		if (IS_ERR(alias)) {
-			rc = PTR_ERR(alias);
-			goto out;
-		}
-		*de = alias;
-	} else if (!it_disposition(it, DISP_LOOKUP_NEG)  &&
-		   !it_disposition(it, DISP_OPEN_CREATE)) {
-		/* With DISP_OPEN_CREATE dentry will be
-		 * instantiated in ll_create_it.
-		 */
-		LASSERT(!d_inode(*de));
-		d_instantiate(*de, inode);
+	alias = ll_splice_alias(inode, *de);
+	if (IS_ERR(alias)) {
+		rc = PTR_ERR(alias);
+		goto out;
 	}
+	*de = alias;
 
 	if (!it_disposition(it, DISP_LOOKUP_NEG)) {
 		/* we have lookup look - unhide dentry */
@@ -587,6 +574,24 @@
 	       dentry, PFID(ll_inode2fid(dir)), dir, file, open_flags, mode,
 	       *opened);
 
+	/* Only negative dentries enter here */
+	LASSERT(!d_inode(dentry));
+
+	if (!d_in_lookup(dentry)) {
+		/* A valid negative dentry that just passed revalidation,
+		 * there's little point to try and open it server-side,
+		 * even though there's a minuscle chance it might succeed.
+		 * Either way it's a valid race to just return -ENOENT here.
+		 */
+		if (!(open_flags & O_CREAT))
+			return -ENOENT;
+
+		/* Otherwise we just unhash it to be rehashed afresh via
+		 * lookup if necessary
+		 */
+		d_drop(dentry);
+	}
+
 	it = kzalloc(sizeof(*it), GFP_NOFS);
 	if (!it)
 		return -ENOMEM;
diff --git a/drivers/staging/media/cec/TODO b/drivers/staging/media/cec/TODO
index a10d4f8..1322469 100644
--- a/drivers/staging/media/cec/TODO
+++ b/drivers/staging/media/cec/TODO
@@ -12,6 +12,7 @@
 
 Other TODOs:
 
+- There are two possible replies to CEC_MSG_INITIATE_ARC. How to handle that?
 - Add a flag to inhibit passing CEC RC messages to the rc subsystem.
   Applications should be able to choose this when calling S_LOG_ADDRS.
 - If the reply field of cec_msg is set then when the reply arrives it
diff --git a/drivers/staging/media/cec/cec-adap.c b/drivers/staging/media/cec/cec-adap.c
index b2393bba..946986f 100644
--- a/drivers/staging/media/cec/cec-adap.c
+++ b/drivers/staging/media/cec/cec-adap.c
@@ -124,10 +124,10 @@
 	u64 ts = ktime_get_ns();
 	struct cec_fh *fh;
 
-	mutex_lock(&adap->devnode.fhs_lock);
+	mutex_lock(&adap->devnode.lock);
 	list_for_each_entry(fh, &adap->devnode.fhs, list)
 		cec_queue_event_fh(fh, ev, ts);
-	mutex_unlock(&adap->devnode.fhs_lock);
+	mutex_unlock(&adap->devnode.lock);
 }
 
 /*
@@ -191,12 +191,12 @@
 	u32 monitor_mode = valid_la ? CEC_MODE_MONITOR :
 				      CEC_MODE_MONITOR_ALL;
 
-	mutex_lock(&adap->devnode.fhs_lock);
+	mutex_lock(&adap->devnode.lock);
 	list_for_each_entry(fh, &adap->devnode.fhs, list) {
 		if (fh->mode_follower >= monitor_mode)
 			cec_queue_msg_fh(fh, msg);
 	}
-	mutex_unlock(&adap->devnode.fhs_lock);
+	mutex_unlock(&adap->devnode.lock);
 }
 
 /*
@@ -207,12 +207,12 @@
 {
 	struct cec_fh *fh;
 
-	mutex_lock(&adap->devnode.fhs_lock);
+	mutex_lock(&adap->devnode.lock);
 	list_for_each_entry(fh, &adap->devnode.fhs, list) {
 		if (fh->mode_follower == CEC_MODE_FOLLOWER)
 			cec_queue_msg_fh(fh, msg);
 	}
-	mutex_unlock(&adap->devnode.fhs_lock);
+	mutex_unlock(&adap->devnode.lock);
 }
 
 /* Notify userspace of an adapter state change. */
@@ -851,6 +851,9 @@
 	if (!valid_la || msg->len <= 1)
 		return;
 
+	if (adap->log_addrs.log_addr_mask == 0)
+		return;
+
 	/*
 	 * Process the message on the protocol level. If is_reply is true,
 	 * then cec_receive_notify() won't pass on the reply to the listener(s)
@@ -1047,11 +1050,17 @@
 			dprintk(1, "could not claim LA %d\n", i);
 	}
 
+	if (adap->log_addrs.log_addr_mask == 0 &&
+	    !(las->flags & CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK))
+		goto unconfigure;
+
 configured:
 	if (adap->log_addrs.log_addr_mask == 0) {
 		/* Fall back to unregistered */
 		las->log_addr[0] = CEC_LOG_ADDR_UNREGISTERED;
 		las->log_addr_mask = 1 << las->log_addr[0];
+		for (i = 1; i < las->num_log_addrs; i++)
+			las->log_addr[i] = CEC_LOG_ADDR_INVALID;
 	}
 	adap->is_configured = true;
 	adap->is_configuring = false;
@@ -1070,6 +1079,8 @@
 			cec_report_features(adap, i);
 		cec_report_phys_addr(adap, i);
 	}
+	for (i = las->num_log_addrs; i < CEC_MAX_LOG_ADDRS; i++)
+		las->log_addr[i] = CEC_LOG_ADDR_INVALID;
 	mutex_lock(&adap->lock);
 	adap->kthread_config = NULL;
 	mutex_unlock(&adap->lock);
@@ -1398,7 +1409,6 @@
 	u8 init_laddr = cec_msg_initiator(msg);
 	u8 devtype = cec_log_addr2dev(adap, dest_laddr);
 	int la_idx = cec_log_addr2idx(adap, dest_laddr);
-	bool is_directed = la_idx >= 0;
 	bool from_unregistered = init_laddr == 0xf;
 	struct cec_msg tx_cec_msg = { };
 
@@ -1560,7 +1570,7 @@
 		 * Unprocessed messages are aborted if userspace isn't doing
 		 * any processing either.
 		 */
-		if (is_directed && !is_reply && !adap->follower_cnt &&
+		if (!is_broadcast && !is_reply && !adap->follower_cnt &&
 		    !adap->cec_follower && msg->msg[1] != CEC_MSG_FEATURE_ABORT)
 			return cec_feature_abort(adap, msg);
 		break;
diff --git a/drivers/staging/media/cec/cec-api.c b/drivers/staging/media/cec/cec-api.c
index 7be7615..e274e2f 100644
--- a/drivers/staging/media/cec/cec-api.c
+++ b/drivers/staging/media/cec/cec-api.c
@@ -162,7 +162,7 @@
 		return -ENOTTY;
 	if (copy_from_user(&log_addrs, parg, sizeof(log_addrs)))
 		return -EFAULT;
-	log_addrs.flags = 0;
+	log_addrs.flags &= CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK;
 	mutex_lock(&adap->lock);
 	if (!adap->is_configuring &&
 	    (!log_addrs.num_log_addrs || !adap->is_configured) &&
@@ -435,7 +435,7 @@
 	void __user *parg = (void __user *)arg;
 
 	if (!devnode->registered)
-		return -EIO;
+		return -ENODEV;
 
 	switch (cmd) {
 	case CEC_ADAP_G_CAPS:
@@ -508,14 +508,14 @@
 
 	filp->private_data = fh;
 
-	mutex_lock(&devnode->fhs_lock);
+	mutex_lock(&devnode->lock);
 	/* Queue up initial state events */
 	ev_state.state_change.phys_addr = adap->phys_addr;
 	ev_state.state_change.log_addr_mask = adap->log_addrs.log_addr_mask;
 	cec_queue_event_fh(fh, &ev_state, 0);
 
 	list_add(&fh->list, &devnode->fhs);
-	mutex_unlock(&devnode->fhs_lock);
+	mutex_unlock(&devnode->lock);
 
 	return 0;
 }
@@ -540,9 +540,9 @@
 		cec_monitor_all_cnt_dec(adap);
 	mutex_unlock(&adap->lock);
 
-	mutex_lock(&devnode->fhs_lock);
+	mutex_lock(&devnode->lock);
 	list_del(&fh->list);
-	mutex_unlock(&devnode->fhs_lock);
+	mutex_unlock(&devnode->lock);
 
 	/* Unhook pending transmits from this filehandle. */
 	mutex_lock(&adap->lock);
diff --git a/drivers/staging/media/cec/cec-core.c b/drivers/staging/media/cec/cec-core.c
index 112a5fa..3b1e4d2 100644
--- a/drivers/staging/media/cec/cec-core.c
+++ b/drivers/staging/media/cec/cec-core.c
@@ -51,31 +51,29 @@
 {
 	/*
 	 * Check if the cec device is available. This needs to be done with
-	 * the cec_devnode_lock held to prevent an open/unregister race:
+	 * the devnode->lock held to prevent an open/unregister race:
 	 * without the lock, the device could be unregistered and freed between
 	 * the devnode->registered check and get_device() calls, leading to
 	 * a crash.
 	 */
-	mutex_lock(&cec_devnode_lock);
+	mutex_lock(&devnode->lock);
 	/*
 	 * return ENXIO if the cec device has been removed
 	 * already or if it is not registered anymore.
 	 */
 	if (!devnode->registered) {
-		mutex_unlock(&cec_devnode_lock);
+		mutex_unlock(&devnode->lock);
 		return -ENXIO;
 	}
 	/* and increase the device refcount */
 	get_device(&devnode->dev);
-	mutex_unlock(&cec_devnode_lock);
+	mutex_unlock(&devnode->lock);
 	return 0;
 }
 
 void cec_put_device(struct cec_devnode *devnode)
 {
-	mutex_lock(&cec_devnode_lock);
 	put_device(&devnode->dev);
-	mutex_unlock(&cec_devnode_lock);
 }
 
 /* Called when the last user of the cec device exits. */
@@ -84,11 +82,10 @@
 	struct cec_devnode *devnode = to_cec_devnode(cd);
 
 	mutex_lock(&cec_devnode_lock);
-
 	/* Mark device node number as free */
 	clear_bit(devnode->minor, cec_devnode_nums);
-
 	mutex_unlock(&cec_devnode_lock);
+
 	cec_delete_adapter(to_cec_adapter(devnode));
 }
 
@@ -117,7 +114,7 @@
 
 	/* Initialization */
 	INIT_LIST_HEAD(&devnode->fhs);
-	mutex_init(&devnode->fhs_lock);
+	mutex_init(&devnode->lock);
 
 	/* Part 1: Find a free minor number */
 	mutex_lock(&cec_devnode_lock);
@@ -160,7 +157,9 @@
 cdev_del:
 	cdev_del(&devnode->cdev);
 clr_bit:
+	mutex_lock(&cec_devnode_lock);
 	clear_bit(devnode->minor, cec_devnode_nums);
+	mutex_unlock(&cec_devnode_lock);
 	return ret;
 }
 
@@ -177,17 +176,21 @@
 {
 	struct cec_fh *fh;
 
-	/* Check if devnode was never registered or already unregistered */
-	if (!devnode->registered || devnode->unregistered)
-		return;
+	mutex_lock(&devnode->lock);
 
-	mutex_lock(&devnode->fhs_lock);
+	/* Check if devnode was never registered or already unregistered */
+	if (!devnode->registered || devnode->unregistered) {
+		mutex_unlock(&devnode->lock);
+		return;
+	}
+
 	list_for_each_entry(fh, &devnode->fhs, list)
 		wake_up_interruptible(&fh->wait);
-	mutex_unlock(&devnode->fhs_lock);
 
 	devnode->registered = false;
 	devnode->unregistered = true;
+	mutex_unlock(&devnode->lock);
+
 	device_del(&devnode->dev);
 	cdev_del(&devnode->cdev);
 	put_device(&devnode->dev);
diff --git a/drivers/staging/media/pulse8-cec/pulse8-cec.c b/drivers/staging/media/pulse8-cec/pulse8-cec.c
index 94f8590..ed8bd95 100644
--- a/drivers/staging/media/pulse8-cec/pulse8-cec.c
+++ b/drivers/staging/media/pulse8-cec/pulse8-cec.c
@@ -114,14 +114,11 @@
 		cec_transmit_done(pulse8->adap, CEC_TX_STATUS_OK,
 				  0, 0, 0, 0);
 		break;
-	case MSGCODE_TRANSMIT_FAILED_LINE:
-		cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ARB_LOST,
-				  1, 0, 0, 0);
-		break;
 	case MSGCODE_TRANSMIT_FAILED_ACK:
 		cec_transmit_done(pulse8->adap, CEC_TX_STATUS_NACK,
 				  0, 1, 0, 0);
 		break;
+	case MSGCODE_TRANSMIT_FAILED_LINE:
 	case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA:
 	case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
 		cec_transmit_done(pulse8->adap, CEC_TX_STATUS_ERROR,
@@ -170,6 +167,9 @@
 		case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
 			schedule_work(&pulse8->work);
 			break;
+		case MSGCODE_HIGH_ERROR:
+		case MSGCODE_LOW_ERROR:
+		case MSGCODE_RECEIVE_FAILED:
 		case MSGCODE_TIMEOUT_ERROR:
 			break;
 		case MSGCODE_COMMAND_ACCEPTED:
@@ -388,7 +388,7 @@
 	int err;
 
 	cmd[0] = MSGCODE_TRANSMIT_IDLETIME;
-	cmd[1] = 3;
+	cmd[1] = signal_free_time;
 	err = pulse8_send_and_wait(pulse8, cmd, 2,
 				   MSGCODE_COMMAND_ACCEPTED, 1);
 	cmd[0] = MSGCODE_TRANSMIT_ACK_POLARITY;
diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c
index e13a4ab..1fde9c8 100644
--- a/drivers/staging/octeon/ethernet-mdio.c
+++ b/drivers/staging/octeon/ethernet-mdio.c
@@ -34,48 +34,23 @@
 	strlcpy(info->bus_info, "Builtin", sizeof(info->bus_info));
 }
 
-static int cvm_oct_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
-	if (priv->phydev)
-		return phy_ethtool_gset(priv->phydev, cmd);
-
-	return -EINVAL;
-}
-
-static int cvm_oct_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
-{
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (priv->phydev)
-		return phy_ethtool_sset(priv->phydev, cmd);
-
-	return -EINVAL;
-}
-
 static int cvm_oct_nway_reset(struct net_device *dev)
 {
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (priv->phydev)
-		return phy_start_aneg(priv->phydev);
+	if (dev->phydev)
+		return phy_start_aneg(dev->phydev);
 
 	return -EINVAL;
 }
 
 const struct ethtool_ops cvm_oct_ethtool_ops = {
 	.get_drvinfo = cvm_oct_get_drvinfo,
-	.get_settings = cvm_oct_get_settings,
-	.set_settings = cvm_oct_set_settings,
 	.nway_reset = cvm_oct_nway_reset,
 	.get_link = ethtool_op_get_link,
+	.get_link_ksettings = phy_ethtool_get_link_ksettings,
+	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
 
 /**
@@ -88,15 +63,13 @@
  */
 int cvm_oct_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
 	if (!netif_running(dev))
 		return -EINVAL;
 
-	if (!priv->phydev)
+	if (!dev->phydev)
 		return -EINVAL;
 
-	return phy_mii_ioctl(priv->phydev, rq, cmd);
+	return phy_mii_ioctl(dev->phydev, rq, cmd);
 }
 
 void cvm_oct_note_carrier(struct octeon_ethernet *priv,
@@ -119,9 +92,9 @@
 	cvmx_helper_link_info_t link_info;
 
 	link_info.u64		= 0;
-	link_info.s.link_up	= priv->phydev->link ? 1 : 0;
-	link_info.s.full_duplex = priv->phydev->duplex ? 1 : 0;
-	link_info.s.speed	= priv->phydev->speed;
+	link_info.s.link_up	= dev->phydev->link ? 1 : 0;
+	link_info.s.full_duplex = dev->phydev->duplex ? 1 : 0;
+	link_info.s.speed	= dev->phydev->speed;
 	priv->link_info		= link_info.u64;
 
 	/*
@@ -130,8 +103,8 @@
 	if (priv->poll)
 		priv->poll(dev);
 
-	if (priv->last_link != priv->phydev->link) {
-		priv->last_link = priv->phydev->link;
+	if (priv->last_link != dev->phydev->link) {
+		priv->last_link = dev->phydev->link;
 		cvmx_helper_link_set(priv->port, link_info);
 		cvm_oct_note_carrier(priv, link_info);
 	}
@@ -151,9 +124,8 @@
 
 	priv->poll = NULL;
 
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
-	priv->phydev = NULL;
+	if (dev->phydev)
+		phy_disconnect(dev->phydev);
 
 	if (priv->last_link) {
 		link_info.u64 = 0;
@@ -176,6 +148,7 @@
 {
 	struct octeon_ethernet *priv = netdev_priv(dev);
 	struct device_node *phy_node;
+	struct phy_device *phydev = NULL;
 
 	if (!priv->of_node)
 		goto no_phy;
@@ -193,14 +166,14 @@
 	if (!phy_node)
 		goto no_phy;
 
-	priv->phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0,
-				      PHY_INTERFACE_MODE_GMII);
+	phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0,
+				PHY_INTERFACE_MODE_GMII);
 
-	if (!priv->phydev)
+	if (!phydev)
 		return -ENODEV;
 
 	priv->last_link = 0;
-	phy_start_aneg(priv->phydev);
+	phy_start_aneg(phydev);
 
 	return 0;
 no_phy:
diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c
index 91b148c..48846df 100644
--- a/drivers/staging/octeon/ethernet-rgmii.c
+++ b/drivers/staging/octeon/ethernet-rgmii.c
@@ -145,7 +145,7 @@
 	if (ret)
 		return ret;
 
-	if (priv->phydev) {
+	if (dev->phydev) {
 		/*
 		 * In phydev mode, we need still periodic polling for the
 		 * preamble error checking, and we also need to call this
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index e9cd5f2..45d5763 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -457,10 +457,8 @@
 
 void cvm_oct_common_uninit(struct net_device *dev)
 {
-	struct octeon_ethernet *priv = netdev_priv(dev);
-
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
+	if (dev->phydev)
+		phy_disconnect(dev->phydev);
 }
 
 int cvm_oct_common_open(struct net_device *dev,
@@ -484,10 +482,10 @@
 	if (octeon_is_simulation())
 		return 0;
 
-	if (priv->phydev) {
-		int r = phy_read_status(priv->phydev);
+	if (dev->phydev) {
+		int r = phy_read_status(dev->phydev);
 
-		if (r == 0 && priv->phydev->link == 0)
+		if (r == 0 && dev->phydev->link == 0)
 			netif_carrier_off(dev);
 		cvm_oct_adjust_link(dev);
 	} else {
diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h
index 6275c15..d533aef 100644
--- a/drivers/staging/octeon/octeon-ethernet.h
+++ b/drivers/staging/octeon/octeon-ethernet.h
@@ -40,7 +40,6 @@
 	struct sk_buff_head tx_free_list[16];
 	/* Device statistics */
 	struct net_device_stats stats;
-	struct phy_device *phydev;
 	unsigned int last_speed;
 	unsigned int last_link;
 	/* Last negotiated link state */
diff --git a/drivers/staging/wilc1000/host_interface.c b/drivers/staging/wilc1000/host_interface.c
index 0b1760c..78f524f 100644
--- a/drivers/staging/wilc1000/host_interface.c
+++ b/drivers/staging/wilc1000/host_interface.c
@@ -3363,7 +3363,7 @@
 		if (!hif_workqueue) {
 			netdev_err(vif->ndev, "Failed to create workqueue\n");
 			result = -ENOMEM;
-			goto _fail_mq_;
+			goto _fail_;
 		}
 
 		setup_timer(&periodic_rssi, GetPeriodicRSSI,
@@ -3391,7 +3391,6 @@
 
 	clients_count++;
 
-_fail_mq_:
 	destroy_workqueue(hif_workqueue);
 _fail_:
 	return result;
diff --git a/drivers/staging/wilc1000/linux_wlan.c b/drivers/staging/wilc1000/linux_wlan.c
index 3a66255..3221511 100644
--- a/drivers/staging/wilc1000/linux_wlan.c
+++ b/drivers/staging/wilc1000/linux_wlan.c
@@ -648,7 +648,7 @@
 			mutex_unlock(&wl->hif_cs);
 		}
 		if (&wl->txq_event)
-			wait_for_completion(&wl->txq_event);
+			complete(&wl->txq_event);
 
 		wlan_deinitialize_threads(dev);
 		deinit_irq(dev);
diff --git a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
index 9092600..2c2e8ac 100644
--- a/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
+++ b/drivers/staging/wilc1000/wilc_wfi_cfgoperations.c
@@ -1191,7 +1191,7 @@
 	struct wilc_priv *priv;
 	struct wilc_vif *vif;
 	u32 i = 0;
-	u32 associatedsta = 0;
+	u32 associatedsta = ~0;
 	u32 inactive_time = 0;
 	priv = wiphy_priv(wiphy);
 	vif = netdev_priv(dev);
@@ -1204,7 +1204,7 @@
 			}
 		}
 
-		if (associatedsta == -1) {
+		if (associatedsta == ~0) {
 			netdev_err(dev, "sta required is not associated\n");
 			return -ENOENT;
 		}
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
index 0ae0b13..2fb1bf1 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c
@@ -24,6 +24,7 @@
 #include <net/ip6_route.h>
 #include <net/addrconf.h>
 
+#include <libcxgb_cm.h>
 #include "cxgbit.h"
 #include "clip_tbl.h"
 
@@ -72,15 +73,6 @@
 	return wr_waitp->ret;
 }
 
-/* Returns whether a CPL status conveys negative advice.
- */
-static int cxgbit_is_neg_adv(unsigned int status)
-{
-	return status == CPL_ERR_RTX_NEG_ADVICE ||
-		status == CPL_ERR_PERSIST_NEG_ADVICE ||
-		status == CPL_ERR_KEEPALV_NEG_ADVICE;
-}
-
 static int cxgbit_np_hashfn(const struct cxgbit_np *cnp)
 {
 	return ((unsigned long)cnp >> 10) & (NP_INFO_HASH_SIZE - 1);
@@ -623,21 +615,14 @@
 static void cxgbit_send_halfclose(struct cxgbit_sock *csk)
 {
 	struct sk_buff *skb;
-	struct cpl_close_con_req *req;
-	unsigned int len = roundup(sizeof(struct cpl_close_con_req), 16);
+	u32 len = roundup(sizeof(struct cpl_close_con_req), 16);
 
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (!skb)
 		return;
 
-	req = (struct cpl_close_con_req *)__skb_put(skb, len);
-	memset(req, 0, len);
-
-	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
-	INIT_TP_WR(req, csk->tid);
-	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
-						    csk->tid));
-	req->rsvd = 0;
+	cxgb_mk_close_con_req(skb, len, csk->tid, csk->txq_idx,
+			      NULL, NULL);
 
 	cxgbit_skcb_flags(skb) |= SKCBF_TX_FLAG_COMPL;
 	__skb_queue_tail(&csk->txq, skb);
@@ -662,9 +647,8 @@
 
 static int cxgbit_send_abort_req(struct cxgbit_sock *csk)
 {
-	struct cpl_abort_req *req;
-	unsigned int len = roundup(sizeof(*req), 16);
 	struct sk_buff *skb;
+	u32 len = roundup(sizeof(struct cpl_abort_req), 16);
 
 	pr_debug("%s: csk %p tid %u; state %d\n",
 		 __func__, csk, csk->tid, csk->com.state);
@@ -675,15 +659,9 @@
 		cxgbit_send_tx_flowc_wr(csk);
 
 	skb = __skb_dequeue(&csk->skbq);
-	req = (struct cpl_abort_req *)__skb_put(skb, len);
-	memset(req, 0, len);
+	cxgb_mk_abort_req(skb, len, csk->tid, csk->txq_idx,
+			  csk->com.cdev, cxgbit_abort_arp_failure);
 
-	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
-	t4_set_arp_err_handler(skb, csk->com.cdev, cxgbit_abort_arp_failure);
-	INIT_TP_WR(req, csk->tid);
-	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ,
-						    csk->tid));
-	req->cmd = CPL_ABORT_SEND_RST;
 	return cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t);
 }
 
@@ -789,109 +767,6 @@
 	kfree(csk);
 }
 
-static void
-cxgbit_get_tuple_info(struct cpl_pass_accept_req *req, int *iptype,
-		      __u8 *local_ip, __u8 *peer_ip, __be16 *local_port,
-		      __be16 *peer_port)
-{
-	u32 eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-	u32 ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-	struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
-	struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
-	struct tcphdr *tcp = (struct tcphdr *)
-			      ((u8 *)(req + 1) + eth_len + ip_len);
-
-	if (ip->version == 4) {
-		pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n",
-			 __func__,
-			 ntohl(ip->saddr), ntohl(ip->daddr),
-			 ntohs(tcp->source),
-			 ntohs(tcp->dest));
-		*iptype = 4;
-		memcpy(peer_ip, &ip->saddr, 4);
-		memcpy(local_ip, &ip->daddr, 4);
-	} else {
-		pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n",
-			 __func__,
-			 ip6->saddr.s6_addr, ip6->daddr.s6_addr,
-			 ntohs(tcp->source),
-			 ntohs(tcp->dest));
-		*iptype = 6;
-		memcpy(peer_ip, ip6->saddr.s6_addr, 16);
-		memcpy(local_ip, ip6->daddr.s6_addr, 16);
-	}
-
-	*peer_port = tcp->source;
-	*local_port = tcp->dest;
-}
-
-static int
-cxgbit_our_interface(struct cxgbit_device *cdev, struct net_device *egress_dev)
-{
-	u8 i;
-
-	egress_dev = cxgbit_get_real_dev(egress_dev);
-	for (i = 0; i < cdev->lldi.nports; i++)
-		if (cdev->lldi.ports[i] == egress_dev)
-			return 1;
-	return 0;
-}
-
-static struct dst_entry *
-cxgbit_find_route6(struct cxgbit_device *cdev, __u8 *local_ip, __u8 *peer_ip,
-		   __be16 local_port, __be16 peer_port, u8 tos,
-		   __u32 sin6_scope_id)
-{
-	struct dst_entry *dst = NULL;
-
-	if (IS_ENABLED(CONFIG_IPV6)) {
-		struct flowi6 fl6;
-
-		memset(&fl6, 0, sizeof(fl6));
-		memcpy(&fl6.daddr, peer_ip, 16);
-		memcpy(&fl6.saddr, local_ip, 16);
-		if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
-			fl6.flowi6_oif = sin6_scope_id;
-		dst = ip6_route_output(&init_net, NULL, &fl6);
-		if (!dst)
-			goto out;
-		if (!cxgbit_our_interface(cdev, ip6_dst_idev(dst)->dev) &&
-		    !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
-			dst_release(dst);
-			dst = NULL;
-		}
-	}
-out:
-	return dst;
-}
-
-static struct dst_entry *
-cxgbit_find_route(struct cxgbit_device *cdev, __be32 local_ip, __be32 peer_ip,
-		  __be16 local_port, __be16 peer_port, u8 tos)
-{
-	struct rtable *rt;
-	struct flowi4 fl4;
-	struct neighbour *n;
-
-	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip,
-				   local_ip,
-				   peer_port, local_port, IPPROTO_TCP,
-				   tos, 0);
-	if (IS_ERR(rt))
-		return NULL;
-	n = dst_neigh_lookup(&rt->dst, &peer_ip);
-	if (!n)
-		return NULL;
-	if (!cxgbit_our_interface(cdev, n->dev) &&
-	    !(n->dev->flags & IFF_LOOPBACK)) {
-		neigh_release(n);
-		dst_release(&rt->dst);
-		return NULL;
-	}
-	neigh_release(n);
-	return &rt->dst;
-}
-
 static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi)
 {
 	unsigned int linkspeed;
@@ -1072,21 +947,14 @@
 
 static void cxgbit_release_tid(struct cxgbit_device *cdev, u32 tid)
 {
-	struct cpl_tid_release *req;
-	unsigned int len = roundup(sizeof(*req), 16);
+	u32 len = roundup(sizeof(struct cpl_tid_release), 16);
 	struct sk_buff *skb;
 
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (!skb)
 		return;
 
-	req = (struct cpl_tid_release *)__skb_put(skb, len);
-	memset(req, 0, len);
-
-	INIT_TP_WR(req, tid);
-	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(
-		   CPL_TID_RELEASE, tid));
-	set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+	cxgb_mk_tid_release(skb, len, tid, 0);
 	cxgbit_ofld_send(cdev, skb);
 }
 
@@ -1108,20 +976,6 @@
 	return ret < 0 ? ret : 0;
 }
 
-static void
-cxgbit_best_mtu(const unsigned short *mtus, unsigned short mtu,
-		unsigned int *idx, int use_ts, int ipv6)
-{
-	unsigned short hdr_size = (ipv6 ? sizeof(struct ipv6hdr) :
-				   sizeof(struct iphdr)) +
-				   sizeof(struct tcphdr) +
-				   (use_ts ? round_up(TCPOLEN_TIMESTAMP,
-				    4) : 0);
-	unsigned short data_size = mtu - hdr_size;
-
-	cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
-}
-
 static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb)
 {
 	if (csk->com.state != CSK_STATE_ESTABLISHED) {
@@ -1140,22 +994,18 @@
 int cxgbit_rx_data_ack(struct cxgbit_sock *csk)
 {
 	struct sk_buff *skb;
-	struct cpl_rx_data_ack *req;
-	unsigned int len = roundup(sizeof(*req), 16);
+	u32 len = roundup(sizeof(struct cpl_rx_data_ack), 16);
+	u32 credit_dack;
 
 	skb = alloc_skb(len, GFP_KERNEL);
 	if (!skb)
 		return -1;
 
-	req = (struct cpl_rx_data_ack *)__skb_put(skb, len);
-	memset(req, 0, len);
+	credit_dack = RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) |
+		      RX_CREDITS_V(csk->rx_credits);
 
-	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->ctrlq_idx);
-	INIT_TP_WR(req, csk->tid);
-	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
-						    csk->tid));
-	req->credit_dack = cpu_to_be32(RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) |
-				       RX_CREDITS_V(csk->rx_credits));
+	cxgb_mk_rx_data_ack(skb, len, csk->tid, csk->ctrlq_idx,
+			    credit_dack);
 
 	csk->rx_credits = 0;
 
@@ -1210,15 +1060,6 @@
 	return -ENOMEM;
 }
 
-static u32 cxgbit_compute_wscale(u32 win)
-{
-	u32 wscale = 0;
-
-	while (wscale < 14 && (65535 << wscale) < win)
-		wscale++;
-	return wscale;
-}
-
 static void
 cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req)
 {
@@ -1246,10 +1087,10 @@
 	INIT_TP_WR(rpl5, csk->tid);
 	OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
 						     csk->tid));
-	cxgbit_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx,
-			req->tcpopt.tstamp,
-			(csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
-	wscale = cxgbit_compute_wscale(csk->rcv_win);
+	cxgb_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx,
+		      req->tcpopt.tstamp,
+		      (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+	wscale = cxgb_compute_wscale(csk->rcv_win);
 	/*
 	 * Specify the largest window that will fit in opt0. The
 	 * remainder will be specified in the rx_data_ack.
@@ -1340,8 +1181,8 @@
 		goto rel_skb;
 	}
 
-	cxgbit_get_tuple_info(req, &iptype, local_ip, peer_ip,
-			      &local_port, &peer_port);
+	cxgb_get_4tuple(req, cdev->lldi.adapter_type, &iptype, local_ip,
+			peer_ip, &local_port, &peer_port);
 
 	/* Find output route */
 	if (iptype == 4)  {
@@ -1350,21 +1191,23 @@
 			 , __func__, cnp, tid,
 			 local_ip, peer_ip, ntohs(local_port),
 			 ntohs(peer_port), peer_mss);
-		dst = cxgbit_find_route(cdev, *(__be32 *)local_ip,
-					*(__be32 *)peer_ip,
-					local_port, peer_port,
-					PASS_OPEN_TOS_G(ntohl(req->tos_stid)));
+		dst = cxgb_find_route(&cdev->lldi, cxgbit_get_real_dev,
+				      *(__be32 *)local_ip,
+				      *(__be32 *)peer_ip,
+				      local_port, peer_port,
+				      PASS_OPEN_TOS_G(ntohl(req->tos_stid)));
 	} else {
 		pr_debug("%s parent sock %p tid %u laddr %pI6 raddr %pI6 "
 			 "lport %d rport %d peer_mss %d\n"
 			 , __func__, cnp, tid,
 			 local_ip, peer_ip, ntohs(local_port),
 			 ntohs(peer_port), peer_mss);
-		dst = cxgbit_find_route6(cdev, local_ip, peer_ip,
-					 local_port, peer_port,
-					 PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
-					 ((struct sockaddr_in6 *)
-					 &cnp->com.local_addr)->sin6_scope_id);
+		dst = cxgb_find_route6(&cdev->lldi, cxgbit_get_real_dev,
+				       local_ip, peer_ip,
+				       local_port, peer_port,
+				       PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+				       ((struct sockaddr_in6 *)
+					&cnp->com.local_addr)->sin6_scope_id);
 	}
 	if (!dst) {
 		pr_err("%s - failed to find dst entry!\n",
@@ -1795,16 +1638,15 @@
 {
 	struct cpl_abort_req_rss *hdr = cplhdr(skb);
 	unsigned int tid = GET_TID(hdr);
-	struct cpl_abort_rpl *rpl;
 	struct sk_buff *rpl_skb;
 	bool release = false;
 	bool wakeup_thread = false;
-	unsigned int len = roundup(sizeof(*rpl), 16);
+	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
 	pr_debug("%s: csk %p; tid %u; state %d\n",
 		 __func__, csk, tid, csk->com.state);
 
-	if (cxgbit_is_neg_adv(hdr->status)) {
+	if (cxgb_is_neg_adv(hdr->status)) {
 		pr_err("%s: got neg advise %d on tid %u\n",
 		       __func__, hdr->status, tid);
 		goto rel_skb;
@@ -1839,14 +1681,8 @@
 		cxgbit_send_tx_flowc_wr(csk);
 
 	rpl_skb = __skb_dequeue(&csk->skbq);
-	set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx);
 
-	rpl = (struct cpl_abort_rpl *)__skb_put(rpl_skb, len);
-	memset(rpl, 0, len);
-
-	INIT_TP_WR(rpl, csk->tid);
-	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid));
-	rpl->cmd = CPL_ABORT_NO_RST;
+	cxgb_mk_abort_rpl(rpl_skb, len, csk->tid, csk->txq_idx);
 	cxgbit_ofld_send(csk->com.cdev, rpl_skb);
 
 	if (wakeup_thread) {
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index 27dd11a..ad26b93 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -652,6 +652,9 @@
 
 static struct cxgb4_uld_info cxgbit_uld_info = {
 	.name		= DRV_NAME,
+	.nrxq		= MAX_ULD_QSETS,
+	.rxq_size	= 1024,
+	.lro		= true,
 	.add		= cxgbit_uld_add,
 	.state_change	= cxgbit_uld_state_change,
 	.lro_rx_handler = cxgbit_uld_lro_rx_handler,
diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c
index 1b4ff0f..ed5dd0e 100644
--- a/drivers/thermal/clock_cooling.c
+++ b/drivers/thermal/clock_cooling.c
@@ -426,6 +426,7 @@
 	if (!ccdev)
 		return ERR_PTR(-ENOMEM);
 
+	mutex_init(&ccdev->lock);
 	ccdev->dev = dev;
 	ccdev->clk = devm_clk_get(dev, clock_name);
 	if (IS_ERR(ccdev->clk))
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 3788ed7..a32b417 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -740,12 +740,22 @@
 }
 
 /* Bind cpufreq callbacks to thermal cooling device ops */
+
 static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
 	.get_max_state = cpufreq_get_max_state,
 	.get_cur_state = cpufreq_get_cur_state,
 	.set_cur_state = cpufreq_set_cur_state,
 };
 
+static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
+	.get_max_state		= cpufreq_get_max_state,
+	.get_cur_state		= cpufreq_get_cur_state,
+	.set_cur_state		= cpufreq_set_cur_state,
+	.get_requested_power	= cpufreq_get_requested_power,
+	.state2power		= cpufreq_state2power,
+	.power2state		= cpufreq_power2state,
+};
+
 /* Notifier for cpufreq policy change */
 static struct notifier_block thermal_cpufreq_notifier_block = {
 	.notifier_call = cpufreq_thermal_notifier,
@@ -795,6 +805,7 @@
 	struct cpumask temp_mask;
 	unsigned int freq, i, num_cpus;
 	int ret;
+	struct thermal_cooling_device_ops *cooling_ops;
 
 	cpumask_and(&temp_mask, clip_cpus, cpu_online_mask);
 	policy = cpufreq_cpu_get(cpumask_first(&temp_mask));
@@ -850,10 +861,6 @@
 	cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
 
 	if (capacitance) {
-		cpufreq_cooling_ops.get_requested_power =
-			cpufreq_get_requested_power;
-		cpufreq_cooling_ops.state2power = cpufreq_state2power;
-		cpufreq_cooling_ops.power2state = cpufreq_power2state;
 		cpufreq_dev->plat_get_static_power = plat_static_func;
 
 		ret = build_dyn_power_table(cpufreq_dev, capacitance);
@@ -861,6 +868,10 @@
 			cool_dev = ERR_PTR(ret);
 			goto free_table;
 		}
+
+		cooling_ops = &cpufreq_power_cooling_ops;
+	} else {
+		cooling_ops = &cpufreq_cooling_ops;
 	}
 
 	ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
@@ -885,7 +896,7 @@
 		 cpufreq_dev->id);
 
 	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
-						      &cpufreq_cooling_ops);
+						      cooling_ops);
 	if (IS_ERR(cool_dev))
 		goto remove_idr;
 
diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c
index 34fe365..68bd1b5 100644
--- a/drivers/thermal/fair_share.c
+++ b/drivers/thermal/fair_share.c
@@ -116,7 +116,9 @@
 		instance->target = get_target_state(tz, cdev, percentage,
 						    cur_trip_level);
 
+		mutex_lock(&instance->cdev->lock);
 		instance->cdev->updated = false;
+		mutex_unlock(&instance->cdev->lock);
 		thermal_cdev_update(cdev);
 	}
 	return 0;
diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c
index fc52016..bb118a1 100644
--- a/drivers/thermal/gov_bang_bang.c
+++ b/drivers/thermal/gov_bang_bang.c
@@ -71,7 +71,9 @@
 		dev_dbg(&instance->cdev->device, "target=%d\n",
 					(int)instance->target);
 
+		mutex_lock(&instance->cdev->lock);
 		instance->cdev->updated = false; /* cdev needs update */
+		mutex_unlock(&instance->cdev->lock);
 	}
 
 	mutex_unlock(&tz->lock);
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index c5547bd..e473548 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c
@@ -471,8 +471,6 @@
 
 static int imx_thermal_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *of_id =
-		of_match_device(of_imx_thermal_match, &pdev->dev);
 	struct imx_thermal_data *data;
 	struct regmap *map;
 	int measure_freq;
@@ -490,7 +488,7 @@
 	}
 	data->tempmon = map;
 
-	data->socdata = of_id->data;
+	data->socdata = of_device_get_match_data(&pdev->dev);
 
 	/* make sure the IRQ flag is clear before enabling irq on i.MX6SX */
 	if (data->socdata->version == TEMPMON_IMX6SX) {
diff --git a/drivers/thermal/int340x_thermal/int3406_thermal.c b/drivers/thermal/int340x_thermal/int3406_thermal.c
index a578cd2..1891f34 100644
--- a/drivers/thermal/int340x_thermal/int3406_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3406_thermal.c
@@ -225,7 +225,6 @@
 	.remove = int3406_thermal_remove,
 	.driver = {
 		   .name = "int3406 thermal",
-		   .owner = THIS_MODULE,
 		   .acpi_match_table = int3406_thermal_match,
 		   },
 };
diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c
index 6a6ec1c..9b4815e 100644
--- a/drivers/thermal/intel_pch_thermal.c
+++ b/drivers/thermal/intel_pch_thermal.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/thermal.h>
+#include <linux/pm.h>
 
 /* Intel PCH thermal Device IDs */
 #define PCH_THERMAL_DID_WPT	0x9CA4 /* Wildcat Point */
@@ -65,6 +66,7 @@
 	unsigned long crt_temp;
 	int hot_trip_id;
 	unsigned long hot_temp;
+	bool bios_enabled;
 };
 
 static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
@@ -75,8 +77,10 @@
 	*nr_trips = 0;
 
 	/* Check if BIOS has already enabled thermal sensor */
-	if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS))
+	if (WPT_TSS_TSDSS & readb(ptd->hw_base + WPT_TSS)) {
+		ptd->bios_enabled = true;
 		goto read_trips;
+	}
 
 	tsel = readb(ptd->hw_base + WPT_TSEL);
 	/*
@@ -130,9 +134,39 @@
 	return 0;
 }
 
+static int pch_wpt_suspend(struct pch_thermal_device *ptd)
+{
+	u8 tsel;
+
+	if (ptd->bios_enabled)
+		return 0;
+
+	tsel = readb(ptd->hw_base + WPT_TSEL);
+
+	writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL);
+
+	return 0;
+}
+
+static int pch_wpt_resume(struct pch_thermal_device *ptd)
+{
+	u8 tsel;
+
+	if (ptd->bios_enabled)
+		return 0;
+
+	tsel = readb(ptd->hw_base + WPT_TSEL);
+
+	writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL);
+
+	return 0;
+}
+
 struct pch_dev_ops {
 	int (*hw_init)(struct pch_thermal_device *ptd, int *nr_trips);
 	int (*get_temp)(struct pch_thermal_device *ptd, int *temp);
+	int (*suspend)(struct pch_thermal_device *ptd);
+	int (*resume)(struct pch_thermal_device *ptd);
 };
 
 
@@ -140,6 +174,8 @@
 static const struct pch_dev_ops pch_dev_ops_wpt = {
 	.hw_init = pch_wpt_init,
 	.get_temp = pch_wpt_get_temp,
+	.suspend = pch_wpt_suspend,
+	.resume = pch_wpt_resume,
 };
 
 static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp)
@@ -269,6 +305,22 @@
 	pci_disable_device(pdev);
 }
 
+static int intel_pch_thermal_suspend(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+	return ptd->ops->suspend(ptd);
+}
+
+static int intel_pch_thermal_resume(struct device *device)
+{
+	struct pci_dev *pdev = to_pci_dev(device);
+	struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+	return ptd->ops->resume(ptd);
+}
+
 static struct pci_device_id intel_pch_thermal_id[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
@@ -276,11 +328,17 @@
 };
 MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
 
+static const struct dev_pm_ops intel_pch_pm_ops = {
+	.suspend = intel_pch_thermal_suspend,
+	.resume = intel_pch_thermal_resume,
+};
+
 static struct pci_driver intel_pch_thermal_driver = {
 	.name		= "intel_pch_thermal",
 	.id_table	= intel_pch_thermal_id,
 	.probe		= intel_pch_thermal_probe,
 	.remove		= intel_pch_thermal_remove,
+	.driver.pm	= &intel_pch_pm_ops,
 };
 
 module_pci_driver(intel_pch_thermal_driver);
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index 015ce2e..0e4dc0a 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -388,7 +388,7 @@
 		int sleeptime;
 		unsigned long target_jiffies;
 		unsigned int guard;
-		unsigned int compensation = 0;
+		unsigned int compensated_ratio;
 		int interval; /* jiffies to sleep for each attempt */
 		unsigned int duration_jiffies = msecs_to_jiffies(duration);
 		unsigned int window_size_now;
@@ -409,8 +409,11 @@
 		 * c-states, thus we need to compensate the injected idle ratio
 		 * to achieve the actual target reported by the HW.
 		 */
-		compensation = get_compensation(target_ratio);
-		interval = duration_jiffies*100/(target_ratio+compensation);
+		compensated_ratio = target_ratio +
+			get_compensation(target_ratio);
+		if (compensated_ratio <= 0)
+			compensated_ratio = 1;
+		interval = duration_jiffies * 100 / compensated_ratio;
 
 		/* align idle time */
 		target_jiffies = roundup(jiffies, interval);
@@ -647,8 +650,8 @@
 		goto exit_set;
 	} else	if (set_target_ratio > 0 && new_target_ratio == 0) {
 		pr_info("Stop forced idle injection\n");
-		set_target_ratio = 0;
 		end_power_clamp();
+		set_target_ratio = 0;
 	} else	/* adjust currently running */ {
 		set_target_ratio = new_target_ratio;
 		/* make new set_target_ratio visible to other cpus */
diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c
index 2f1a863..b4d3116 100644
--- a/drivers/thermal/power_allocator.c
+++ b/drivers/thermal/power_allocator.c
@@ -529,7 +529,9 @@
 			continue;
 
 		instance->target = 0;
+		mutex_lock(&instance->cdev->lock);
 		instance->cdev->updated = false;
+		mutex_unlock(&instance->cdev->lock);
 		thermal_cdev_update(instance->cdev);
 	}
 }
diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c
index 71a3392..5f81792 100644
--- a/drivers/thermal/rcar_thermal.c
+++ b/drivers/thermal/rcar_thermal.c
@@ -504,6 +504,7 @@
 		if (IS_ERR(priv->zone)) {
 			dev_err(dev, "can't register thermal zone\n");
 			ret = PTR_ERR(priv->zone);
+			priv->zone = NULL;
 			goto error_unregister;
 		}
 
diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c
index ea9366a..bcef2e7 100644
--- a/drivers/thermal/step_wise.c
+++ b/drivers/thermal/step_wise.c
@@ -175,7 +175,9 @@
 			update_passive_instance(tz, trip_type, -1);
 
 		instance->initialized = true;
+		mutex_lock(&instance->cdev->lock);
 		instance->cdev->updated = false; /* cdev needs update */
+		mutex_unlock(&instance->cdev->lock);
 	}
 
 	mutex_unlock(&tz->lock);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 5133cd1..e2fc616 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1093,7 +1093,9 @@
 		return ret;
 
 	instance->target = state;
+	mutex_lock(&cdev->lock);
 	cdev->updated = false;
+	mutex_unlock(&cdev->lock);
 	thermal_cdev_update(cdev);
 
 	return 0;
@@ -1623,11 +1625,13 @@
 	struct thermal_instance *instance;
 	unsigned long target = 0;
 
-	/* cooling device is updated*/
-	if (cdev->updated)
-		return;
-
 	mutex_lock(&cdev->lock);
+	/* cooling device is updated*/
+	if (cdev->updated) {
+		mutex_unlock(&cdev->lock);
+		return;
+	}
+
 	/* Make sure cdev enters the deepest cooling state */
 	list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
 		dev_dbg(&cdev->device, "zone%d->target=%lu\n",
@@ -1637,9 +1641,9 @@
 		if (instance->target > target)
 			target = instance->target;
 	}
-	mutex_unlock(&cdev->lock);
 	cdev->ops->set_cur_state(cdev, target);
 	cdev->updated = true;
+	mutex_unlock(&cdev->lock);
 	trace_cdev_update(cdev, target);
 	dev_dbg(&cdev->device, "set to state %lu\n", target);
 }
diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c
index 06fd2ed9..c41c774 100644
--- a/drivers/thermal/thermal_hwmon.c
+++ b/drivers/thermal/thermal_hwmon.c
@@ -232,6 +232,7 @@
 
 	return result;
 }
+EXPORT_SYMBOL_GPL(thermal_add_hwmon_sysfs);
 
 void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
 {
@@ -270,3 +271,4 @@
 	hwmon_device_unregister(hwmon->device);
 	kfree(hwmon);
 }
+EXPORT_SYMBOL_GPL(thermal_remove_hwmon_sysfs);
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 9c15344..a8c2041 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -651,6 +651,12 @@
 	{
 		.class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0,
 		.vendor = PCI_VENDOR_ID_INTEL,
+		.device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_NHI,
+		.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID,
+	},
+	{
+		.class = PCI_CLASS_SYSTEM_OTHER << 8, .class_mask = ~0,
+		.vendor = PCI_VENDOR_ID_INTEL,
 		.device = PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_NHI,
 		.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID,
 	},
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 1e116f5..9840fde 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -372,7 +372,9 @@
 
 	if (sw->config.device_id != PCI_DEVICE_ID_INTEL_LIGHT_RIDGE &&
 	    sw->config.device_id != PCI_DEVICE_ID_INTEL_CACTUS_RIDGE_4C &&
-	    sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE)
+	    sw->config.device_id != PCI_DEVICE_ID_INTEL_PORT_RIDGE &&
+	    sw->config.device_id != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_2C_BRIDGE &&
+	    sw->config.device_id != PCI_DEVICE_ID_INTEL_FALCON_RIDGE_4C_BRIDGE)
 		tb_sw_warn(sw, "unsupported switch device id %#x\n",
 			   sw->config.device_id);
 
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 51e0d32..a23fa5e 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -800,7 +800,7 @@
 	return retval;
 }
 
-static struct file_operations ptmx_fops;
+static struct file_operations ptmx_fops __ro_after_init;
 
 static void __init unix98_pty_init(void)
 {
diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 122e0e4..a697a85 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -15,8 +15,6 @@
 #include <linux/serial_reg.h>
 #include <linux/dmaengine.h>
 
-#include "../serial_mctrl_gpio.h"
-
 struct uart_8250_dma {
 	int (*tx_dma)(struct uart_8250_port *p);
 	int (*rx_dma)(struct uart_8250_port *p);
@@ -33,6 +31,11 @@
 	struct dma_chan		*rxchan;
 	struct dma_chan		*txchan;
 
+	/* Device address base for DMA operations */
+	phys_addr_t		rx_dma_addr;
+	phys_addr_t		tx_dma_addr;
+
+	/* DMA address of the buffer in memory */
 	dma_addr_t		rx_addr;
 	dma_addr_t		tx_addr;
 
@@ -133,43 +136,12 @@
 
 static inline void serial8250_out_MCR(struct uart_8250_port *up, int value)
 {
-	int mctrl_gpio = 0;
-
 	serial_out(up, UART_MCR, value);
-
-	if (value & UART_MCR_RTS)
-		mctrl_gpio |= TIOCM_RTS;
-	if (value & UART_MCR_DTR)
-		mctrl_gpio |= TIOCM_DTR;
-
-	mctrl_gpio_set(up->gpios, mctrl_gpio);
 }
 
 static inline int serial8250_in_MCR(struct uart_8250_port *up)
 {
-	int mctrl, mctrl_gpio = 0;
-
-	mctrl = serial_in(up, UART_MCR);
-
-	/* save current MCR values */
-	if (mctrl & UART_MCR_RTS)
-		mctrl_gpio |= TIOCM_RTS;
-	if (mctrl & UART_MCR_DTR)
-		mctrl_gpio |= TIOCM_DTR;
-
-	mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio);
-
-	if (mctrl_gpio & TIOCM_RTS)
-		mctrl |= UART_MCR_RTS;
-	else
-		mctrl &= ~UART_MCR_RTS;
-
-	if (mctrl_gpio & TIOCM_DTR)
-		mctrl |= UART_MCR_DTR;
-	else
-		mctrl &= ~UART_MCR_DTR;
-
-	return mctrl;
+	return serial_in(up, UART_MCR);
 }
 
 #if defined(__alpha__) && !defined(CONFIG_PCI)
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 13ad5c3..240a361 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -639,7 +639,7 @@
 {
 	char match[] = "uart";	/* 8250-specific earlycon name */
 	unsigned char iotype;
-	unsigned long addr;
+	resource_size_t addr;
 	int i;
 
 	if (strncmp(name, match, 4) != 0)
@@ -974,8 +974,6 @@
 
 	uart = serial8250_find_match_or_unused(&up->port);
 	if (uart && uart->port.type != PORT_8250_CIR) {
-		struct mctrl_gpios *gpios;
-
 		if (uart->port.dev)
 			uart_remove_one_port(&serial8250_reg, &uart->port);
 
@@ -1013,13 +1011,6 @@
 		if (up->port.flags & UPF_FIXED_TYPE)
 			uart->port.type = up->port.type;
 
-		gpios = mctrl_gpio_init(&uart->port, 0);
-		if (IS_ERR(gpios)) {
-			if (PTR_ERR(gpios) != -ENOSYS)
-				return PTR_ERR(gpios);
-		} else
-			uart->gpios = gpios;
-
 		serial8250_set_defaults(uart);
 
 		/* Possibly override default I/O functions.  */
diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index 3590d01..fdbddbc 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -142,7 +142,7 @@
 	if (dma->rx_running) {
 		dmaengine_pause(dma->rxchan);
 		__dma_rx_complete(p);
-		dmaengine_terminate_all(dma->rxchan);
+		dmaengine_terminate_async(dma->rxchan);
 	}
 }
 EXPORT_SYMBOL_GPL(serial8250_rx_dma_flush);
@@ -150,6 +150,10 @@
 int serial8250_request_dma(struct uart_8250_port *p)
 {
 	struct uart_8250_dma	*dma = p->dma;
+	phys_addr_t rx_dma_addr = dma->rx_dma_addr ?
+				  dma->rx_dma_addr : p->port.mapbase;
+	phys_addr_t tx_dma_addr = dma->tx_dma_addr ?
+				  dma->tx_dma_addr : p->port.mapbase;
 	dma_cap_mask_t		mask;
 	struct dma_slave_caps	caps;
 	int			ret;
@@ -157,11 +161,11 @@
 	/* Default slave configuration parameters */
 	dma->rxconf.direction		= DMA_DEV_TO_MEM;
 	dma->rxconf.src_addr_width	= DMA_SLAVE_BUSWIDTH_1_BYTE;
-	dma->rxconf.src_addr		= p->port.mapbase + UART_RX;
+	dma->rxconf.src_addr		= rx_dma_addr + UART_RX;
 
 	dma->txconf.direction		= DMA_MEM_TO_DEV;
 	dma->txconf.dst_addr_width	= DMA_SLAVE_BUSWIDTH_1_BYTE;
-	dma->txconf.dst_addr		= p->port.mapbase + UART_TX;
+	dma->txconf.dst_addr		= tx_dma_addr + UART_TX;
 
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
@@ -247,14 +251,14 @@
 		return;
 
 	/* Release RX resources */
-	dmaengine_terminate_all(dma->rxchan);
+	dmaengine_terminate_sync(dma->rxchan);
 	dma_free_coherent(dma->rxchan->device->dev, dma->rx_size, dma->rx_buf,
 			  dma->rx_addr);
 	dma_release_channel(dma->rxchan);
 	dma->rxchan = NULL;
 
 	/* Release TX resources */
-	dmaengine_terminate_all(dma->txchan);
+	dmaengine_terminate_sync(dma->txchan);
 	dma_unmap_single(dma->txchan->device->dev, dma->tx_addr,
 			 UART_XMIT_SIZE, DMA_TO_DEVICE);
 	dma_release_channel(dma->txchan);
diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index e199696..459d726 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c
@@ -298,12 +298,17 @@
 			p->serial_out = dw8250_serial_out32be;
 		}
 	} else if (has_acpi_companion(p->dev)) {
-		p->iotype = UPIO_MEM32;
-		p->regshift = 2;
-		p->serial_in = dw8250_serial_in32;
+		const struct acpi_device_id *id;
+
+		id = acpi_match_device(p->dev->driver->acpi_match_table,
+				       p->dev);
+		if (id && !strcmp(id->id, "APMC0D08")) {
+			p->iotype = UPIO_MEM32;
+			p->regshift = 2;
+			p->serial_in = dw8250_serial_in32;
+			data->uart_16550_compatible = true;
+		}
 		p->set_termios = dw8250_set_termios;
-		/* So far none of there implement the Busy Functionality */
-		data->uart_16550_compatible = true;
 	}
 
 	/* Platforms with iDMA */
@@ -360,18 +365,19 @@
 	struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	int irq = platform_get_irq(pdev, 0);
 	struct uart_port *p = &uart.port;
+	struct device *dev = &pdev->dev;
 	struct dw8250_data *data;
 	int err;
 	u32 val;
 
 	if (!regs) {
-		dev_err(&pdev->dev, "no registers defined\n");
+		dev_err(dev, "no registers defined\n");
 		return -EINVAL;
 	}
 
 	if (irq < 0) {
 		if (irq != -EPROBE_DEFER)
-			dev_err(&pdev->dev, "cannot get irq\n");
+			dev_err(dev, "cannot get irq\n");
 		return irq;
 	}
 
@@ -382,16 +388,16 @@
 	p->pm		= dw8250_do_pm;
 	p->type		= PORT_8250;
 	p->flags	= UPF_SHARE_IRQ | UPF_FIXED_PORT;
-	p->dev		= &pdev->dev;
+	p->dev		= dev;
 	p->iotype	= UPIO_MEM;
 	p->serial_in	= dw8250_serial_in;
 	p->serial_out	= dw8250_serial_out;
 
-	p->membase = devm_ioremap(&pdev->dev, regs->start, resource_size(regs));
+	p->membase = devm_ioremap(dev, regs->start, resource_size(regs));
 	if (!p->membase)
 		return -ENOMEM;
 
-	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
@@ -399,57 +405,57 @@
 	data->usr_reg = DW_UART_USR;
 	p->private_data = data;
 
-	data->uart_16550_compatible = device_property_read_bool(p->dev,
+	data->uart_16550_compatible = device_property_read_bool(dev,
 						"snps,uart-16550-compatible");
 
-	err = device_property_read_u32(p->dev, "reg-shift", &val);
+	err = device_property_read_u32(dev, "reg-shift", &val);
 	if (!err)
 		p->regshift = val;
 
-	err = device_property_read_u32(p->dev, "reg-io-width", &val);
+	err = device_property_read_u32(dev, "reg-io-width", &val);
 	if (!err && val == 4) {
 		p->iotype = UPIO_MEM32;
 		p->serial_in = dw8250_serial_in32;
 		p->serial_out = dw8250_serial_out32;
 	}
 
-	if (device_property_read_bool(p->dev, "dcd-override")) {
+	if (device_property_read_bool(dev, "dcd-override")) {
 		/* Always report DCD as active */
 		data->msr_mask_on |= UART_MSR_DCD;
 		data->msr_mask_off |= UART_MSR_DDCD;
 	}
 
-	if (device_property_read_bool(p->dev, "dsr-override")) {
+	if (device_property_read_bool(dev, "dsr-override")) {
 		/* Always report DSR as active */
 		data->msr_mask_on |= UART_MSR_DSR;
 		data->msr_mask_off |= UART_MSR_DDSR;
 	}
 
-	if (device_property_read_bool(p->dev, "cts-override")) {
+	if (device_property_read_bool(dev, "cts-override")) {
 		/* Always report CTS as active */
 		data->msr_mask_on |= UART_MSR_CTS;
 		data->msr_mask_off |= UART_MSR_DCTS;
 	}
 
-	if (device_property_read_bool(p->dev, "ri-override")) {
+	if (device_property_read_bool(dev, "ri-override")) {
 		/* Always report Ring indicator as inactive */
 		data->msr_mask_off |= UART_MSR_RI;
 		data->msr_mask_off |= UART_MSR_TERI;
 	}
 
 	/* Always ask for fixed clock rate from a property. */
-	device_property_read_u32(p->dev, "clock-frequency", &p->uartclk);
+	device_property_read_u32(dev, "clock-frequency", &p->uartclk);
 
 	/* If there is separate baudclk, get the rate from it. */
-	data->clk = devm_clk_get(&pdev->dev, "baudclk");
+	data->clk = devm_clk_get(dev, "baudclk");
 	if (IS_ERR(data->clk) && PTR_ERR(data->clk) != -EPROBE_DEFER)
-		data->clk = devm_clk_get(&pdev->dev, NULL);
+		data->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(data->clk) && PTR_ERR(data->clk) == -EPROBE_DEFER)
 		return -EPROBE_DEFER;
 	if (!IS_ERR_OR_NULL(data->clk)) {
 		err = clk_prepare_enable(data->clk);
 		if (err)
-			dev_warn(&pdev->dev, "could not enable optional baudclk: %d\n",
+			dev_warn(dev, "could not enable optional baudclk: %d\n",
 				 err);
 		else
 			p->uartclk = clk_get_rate(data->clk);
@@ -457,24 +463,24 @@
 
 	/* If no clock rate is defined, fail. */
 	if (!p->uartclk) {
-		dev_err(&pdev->dev, "clock rate not defined\n");
+		dev_err(dev, "clock rate not defined\n");
 		return -EINVAL;
 	}
 
-	data->pclk = devm_clk_get(&pdev->dev, "apb_pclk");
-	if (IS_ERR(data->clk) && PTR_ERR(data->clk) == -EPROBE_DEFER) {
+	data->pclk = devm_clk_get(dev, "apb_pclk");
+	if (IS_ERR(data->pclk) && PTR_ERR(data->pclk) == -EPROBE_DEFER) {
 		err = -EPROBE_DEFER;
 		goto err_clk;
 	}
 	if (!IS_ERR(data->pclk)) {
 		err = clk_prepare_enable(data->pclk);
 		if (err) {
-			dev_err(&pdev->dev, "could not enable apb_pclk\n");
+			dev_err(dev, "could not enable apb_pclk\n");
 			goto err_clk;
 		}
 	}
 
-	data->rst = devm_reset_control_get_optional(&pdev->dev, NULL);
+	data->rst = devm_reset_control_get_optional(dev, NULL);
 	if (IS_ERR(data->rst) && PTR_ERR(data->rst) == -EPROBE_DEFER) {
 		err = -EPROBE_DEFER;
 		goto err_pclk;
@@ -506,8 +512,8 @@
 
 	platform_set_drvdata(pdev, data);
 
-	pm_runtime_set_active(&pdev->dev);
-	pm_runtime_enable(&pdev->dev);
+	pm_runtime_set_active(dev);
+	pm_runtime_enable(dev);
 
 	return 0;
 
@@ -619,6 +625,7 @@
 	{ "APMC0D08", 0},
 	{ "AMD0020", 0 },
 	{ "AMDI0020", 0 },
+	{ "HISI0031", 0 },
 	{ },
 };
 MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match);
diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c
index 737b4b3..0facc78 100644
--- a/drivers/tty/serial/8250/8250_fintek.c
+++ b/drivers/tty/serial/8250/8250_fintek.c
@@ -31,7 +31,7 @@
 #define IO_ADDR2 0x60
 #define LDN 0x7
 
-#define IRQ_MODE	0x70
+#define FINTEK_IRQ_MODE	0x70
 #define IRQ_SHARE	BIT(4)
 #define IRQ_MODE_MASK	(BIT(6) | BIT(5))
 #define IRQ_LEVEL_LOW	0
@@ -195,7 +195,7 @@
 	outb(LDN, pdata->base_port + ADDR_PORT);
 	outb(pdata->index, pdata->base_port + DATA_PORT);
 
-	outb(IRQ_MODE, pdata->base_port + ADDR_PORT);
+	outb(FINTEK_IRQ_MODE, pdata->base_port + ADDR_PORT);
 	tmp = inb(pdata->base_port + DATA_PORT);
 
 	tmp &= ~IRQ_MODE_MASK;
diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c
new file mode 100644
index 0000000..886fcf3
--- /dev/null
+++ b/drivers/tty/serial/8250/8250_lpss.c
@@ -0,0 +1,378 @@
+/*
+ * 8250_lpss.c - Driver for UART on Intel Braswell and various other Intel SoCs
+ *
+ * Copyright (C) 2016 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/rational.h>
+
+#include <linux/dmaengine.h>
+#include <linux/dma/dw.h>
+
+#include "8250.h"
+
+#define PCI_DEVICE_ID_INTEL_QRK_UARTx	0x0936
+
+#define PCI_DEVICE_ID_INTEL_BYT_UART1	0x0f0a
+#define PCI_DEVICE_ID_INTEL_BYT_UART2	0x0f0c
+
+#define PCI_DEVICE_ID_INTEL_BSW_UART1	0x228a
+#define PCI_DEVICE_ID_INTEL_BSW_UART2	0x228c
+
+#define PCI_DEVICE_ID_INTEL_BDW_UART1	0x9ce3
+#define PCI_DEVICE_ID_INTEL_BDW_UART2	0x9ce4
+
+/* Intel LPSS specific registers */
+
+#define BYT_PRV_CLK			0x800
+#define BYT_PRV_CLK_EN			BIT(0)
+#define BYT_PRV_CLK_M_VAL_SHIFT		1
+#define BYT_PRV_CLK_N_VAL_SHIFT		16
+#define BYT_PRV_CLK_UPDATE		BIT(31)
+
+#define BYT_TX_OVF_INT			0x820
+#define BYT_TX_OVF_INT_MASK		BIT(1)
+
+struct lpss8250;
+
+struct lpss8250_board {
+	unsigned long freq;
+	unsigned int base_baud;
+	int (*setup)(struct lpss8250 *, struct uart_port *p);
+	void (*exit)(struct lpss8250 *);
+};
+
+struct lpss8250 {
+	int line;
+	struct lpss8250_board *board;
+
+	/* DMA parameters */
+	struct uart_8250_dma dma;
+	struct dw_dma_chip dma_chip;
+	struct dw_dma_slave dma_param;
+	u8 dma_maxburst;
+};
+
+static void byt_set_termios(struct uart_port *p, struct ktermios *termios,
+			    struct ktermios *old)
+{
+	unsigned int baud = tty_termios_baud_rate(termios);
+	struct lpss8250 *lpss = p->private_data;
+	unsigned long fref = lpss->board->freq, fuart = baud * 16;
+	unsigned long w = BIT(15) - 1;
+	unsigned long m, n;
+	u32 reg;
+
+	/* Gracefully handle the B0 case: fall back to B9600 */
+	fuart = fuart ? fuart : 9600 * 16;
+
+	/* Get Fuart closer to Fref */
+	fuart *= rounddown_pow_of_two(fref / fuart);
+
+	/*
+	 * For baud rates 0.5M, 1M, 1.5M, 2M, 2.5M, 3M, 3.5M and 4M the
+	 * dividers must be adjusted.
+	 *
+	 * uartclk = (m / n) * 100 MHz, where m <= n
+	 */
+	rational_best_approximation(fuart, fref, w, w, &m, &n);
+	p->uartclk = fuart;
+
+	/* Reset the clock */
+	reg = (m << BYT_PRV_CLK_M_VAL_SHIFT) | (n << BYT_PRV_CLK_N_VAL_SHIFT);
+	writel(reg, p->membase + BYT_PRV_CLK);
+	reg |= BYT_PRV_CLK_EN | BYT_PRV_CLK_UPDATE;
+	writel(reg, p->membase + BYT_PRV_CLK);
+
+	p->status &= ~UPSTAT_AUTOCTS;
+	if (termios->c_cflag & CRTSCTS)
+		p->status |= UPSTAT_AUTOCTS;
+
+	serial8250_do_set_termios(p, termios, old);
+}
+
+static unsigned int byt_get_mctrl(struct uart_port *port)
+{
+	unsigned int ret = serial8250_do_get_mctrl(port);
+
+	/* Force DCD and DSR signals to permanently be reported as active */
+	ret |= TIOCM_CAR | TIOCM_DSR;
+
+	return ret;
+}
+
+static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+{
+	struct dw_dma_slave *param = &lpss->dma_param;
+	struct uart_8250_port *up = up_to_u8250p(port);
+	struct pci_dev *pdev = to_pci_dev(port->dev);
+	unsigned int dma_devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0);
+	struct pci_dev *dma_dev = pci_get_slot(pdev->bus, dma_devfn);
+
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_BYT_UART1:
+	case PCI_DEVICE_ID_INTEL_BSW_UART1:
+	case PCI_DEVICE_ID_INTEL_BDW_UART1:
+		param->src_id = 3;
+		param->dst_id = 2;
+		break;
+	case PCI_DEVICE_ID_INTEL_BYT_UART2:
+	case PCI_DEVICE_ID_INTEL_BSW_UART2:
+	case PCI_DEVICE_ID_INTEL_BDW_UART2:
+		param->src_id = 5;
+		param->dst_id = 4;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	param->dma_dev = &dma_dev->dev;
+	param->m_master = 0;
+	param->p_master = 1;
+
+	/* TODO: Detect FIFO size automaticaly for DesignWare 8250 */
+	port->fifosize = 64;
+	up->tx_loadsz = 64;
+
+	lpss->dma_maxburst = 16;
+
+	port->set_termios = byt_set_termios;
+	port->get_mctrl = byt_get_mctrl;
+
+	/* Disable TX counter interrupts */
+	writel(BYT_TX_OVF_INT_MASK, port->membase + BYT_TX_OVF_INT);
+
+	return 0;
+}
+
+#ifdef CONFIG_SERIAL_8250_DMA
+static const struct dw_dma_platform_data qrk_serial_dma_pdata = {
+	.nr_channels = 2,
+	.is_private = true,
+	.is_nollp = true,
+	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+	.chan_priority = CHAN_PRIORITY_ASCENDING,
+	.block_size = 4095,
+	.nr_masters = 1,
+	.data_width = {4},
+};
+
+static void qrk_serial_setup_dma(struct lpss8250 *lpss, struct uart_port *port)
+{
+	struct uart_8250_dma *dma = &lpss->dma;
+	struct dw_dma_chip *chip = &lpss->dma_chip;
+	struct dw_dma_slave *param = &lpss->dma_param;
+	struct pci_dev *pdev = to_pci_dev(port->dev);
+	int ret;
+
+	chip->dev = &pdev->dev;
+	chip->irq = pdev->irq;
+	chip->regs = pci_ioremap_bar(pdev, 1);
+	chip->pdata = &qrk_serial_dma_pdata;
+
+	/* Falling back to PIO mode if DMA probing fails */
+	ret = dw_dma_probe(chip);
+	if (ret)
+		return;
+
+	/* Special DMA address for UART */
+	dma->rx_dma_addr = 0xfffff000;
+	dma->tx_dma_addr = 0xfffff000;
+
+	param->dma_dev = &pdev->dev;
+	param->src_id = 0;
+	param->dst_id = 1;
+	param->hs_polarity = true;
+
+	lpss->dma_maxburst = 8;
+}
+
+static void qrk_serial_exit_dma(struct lpss8250 *lpss)
+{
+	struct dw_dma_slave *param = &lpss->dma_param;
+
+	if (!param->dma_dev)
+		return;
+	dw_dma_remove(&lpss->dma_chip);
+}
+#else	/* CONFIG_SERIAL_8250_DMA */
+static void qrk_serial_setup_dma(struct lpss8250 *lpss, struct uart_port *port) {}
+static void qrk_serial_exit_dma(struct lpss8250 *lpss) {}
+#endif	/* !CONFIG_SERIAL_8250_DMA */
+
+static int qrk_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
+{
+	struct pci_dev *pdev = to_pci_dev(port->dev);
+	int ret;
+
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, 0);
+	if (ret < 0)
+		return ret;
+
+	port->irq = pci_irq_vector(pdev, 0);
+
+	qrk_serial_setup_dma(lpss, port);
+	return 0;
+}
+
+static void qrk_serial_exit(struct lpss8250 *lpss)
+{
+	qrk_serial_exit_dma(lpss);
+}
+
+static bool lpss8250_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_dma_slave *dws = param;
+
+	if (dws->dma_dev != chan->device->dev)
+		return false;
+
+	chan->private = dws;
+	return true;
+}
+
+static int lpss8250_dma_setup(struct lpss8250 *lpss, struct uart_8250_port *port)
+{
+	struct uart_8250_dma *dma = &lpss->dma;
+	struct dw_dma_slave *rx_param, *tx_param;
+	struct device *dev = port->port.dev;
+
+	if (!lpss->dma_param.dma_dev)
+		return 0;
+
+	rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL);
+	if (!rx_param)
+		return -ENOMEM;
+
+	tx_param = devm_kzalloc(dev, sizeof(*tx_param), GFP_KERNEL);
+	if (!tx_param)
+		return -ENOMEM;
+
+	*rx_param = lpss->dma_param;
+	dma->rxconf.src_maxburst = lpss->dma_maxburst;
+
+	*tx_param = lpss->dma_param;
+	dma->txconf.dst_maxburst = lpss->dma_maxburst;
+
+	dma->fn = lpss8250_dma_filter;
+	dma->rx_param = rx_param;
+	dma->tx_param = tx_param;
+
+	port->dma = dma;
+	return 0;
+}
+
+static int lpss8250_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct uart_8250_port uart;
+	struct lpss8250 *lpss;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	pci_set_master(pdev);
+
+	lpss = devm_kzalloc(&pdev->dev, sizeof(*lpss), GFP_KERNEL);
+	if (!lpss)
+		return -ENOMEM;
+
+	lpss->board = (struct lpss8250_board *)id->driver_data;
+
+	memset(&uart, 0, sizeof(struct uart_8250_port));
+
+	uart.port.dev = &pdev->dev;
+	uart.port.irq = pdev->irq;
+	uart.port.private_data = lpss;
+	uart.port.type = PORT_16550A;
+	uart.port.iotype = UPIO_MEM;
+	uart.port.regshift = 2;
+	uart.port.uartclk = lpss->board->base_baud * 16;
+	uart.port.flags = UPF_SHARE_IRQ | UPF_FIXED_PORT | UPF_FIXED_TYPE;
+	uart.capabilities = UART_CAP_FIFO | UART_CAP_AFE;
+	uart.port.mapbase = pci_resource_start(pdev, 0);
+	uart.port.membase = pcim_iomap(pdev, 0, 0);
+	if (!uart.port.membase)
+		return -ENOMEM;
+
+	ret = lpss->board->setup(lpss, &uart.port);
+	if (ret)
+		return ret;
+
+	ret = lpss8250_dma_setup(lpss, &uart);
+	if (ret)
+		goto err_exit;
+
+	ret = serial8250_register_8250_port(&uart);
+	if (ret < 0)
+		goto err_exit;
+
+	lpss->line = ret;
+
+	pci_set_drvdata(pdev, lpss);
+	return 0;
+
+err_exit:
+	if (lpss->board->exit)
+		lpss->board->exit(lpss);
+	return ret;
+}
+
+static void lpss8250_remove(struct pci_dev *pdev)
+{
+	struct lpss8250 *lpss = pci_get_drvdata(pdev);
+
+	if (lpss->board->exit)
+		lpss->board->exit(lpss);
+
+	serial8250_unregister_port(lpss->line);
+}
+
+static const struct lpss8250_board byt_board = {
+	.freq = 100000000,
+	.base_baud = 2764800,
+	.setup = byt_serial_setup,
+};
+
+static const struct lpss8250_board qrk_board = {
+	.freq = 44236800,
+	.base_baud = 2764800,
+	.setup = qrk_serial_setup,
+	.exit = qrk_serial_exit,
+};
+
+#define LPSS_DEVICE(id, board) { PCI_VDEVICE(INTEL, id), (kernel_ulong_t)&board }
+
+static const struct pci_device_id pci_ids[] = {
+	LPSS_DEVICE(PCI_DEVICE_ID_INTEL_QRK_UARTx, qrk_board),
+	LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BYT_UART1, byt_board),
+	LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BYT_UART2, byt_board),
+	LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BSW_UART1, byt_board),
+	LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BSW_UART2, byt_board),
+	LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BDW_UART1, byt_board),
+	LPSS_DEVICE(PCI_DEVICE_ID_INTEL_BDW_UART2, byt_board),
+	{ },
+};
+MODULE_DEVICE_TABLE(pci, pci_ids);
+
+static struct pci_driver lpss8250_pci_driver = {
+	.name           = "8250_lpss",
+	.id_table       = pci_ids,
+	.probe          = lpss8250_probe,
+	.remove         = lpss8250_remove,
+};
+
+module_pci_driver(lpss8250_pci_driver);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel LPSS UART driver");
diff --git a/drivers/tty/serial/8250/8250_mid.c b/drivers/tty/serial/8250/8250_mid.c
index 339de9c..39c2324 100644
--- a/drivers/tty/serial/8250/8250_mid.c
+++ b/drivers/tty/serial/8250/8250_mid.c
@@ -99,27 +99,27 @@
 	struct uart_8250_port *up = up_to_u8250p(p);
 	unsigned int fisr = serial_port_in(p, INTEL_MID_UART_DNV_FISR);
 	u32 status;
-	int ret = IRQ_NONE;
+	int ret = 0;
 	int err;
 
 	if (fisr & BIT(2)) {
 		err = hsu_dma_get_status(&mid->dma_chip, 1, &status);
 		if (err > 0) {
 			serial8250_rx_dma_flush(up);
-			ret |= IRQ_HANDLED;
+			ret |= 1;
 		} else if (err == 0)
 			ret |= hsu_dma_do_irq(&mid->dma_chip, 1, status);
 	}
 	if (fisr & BIT(1)) {
 		err = hsu_dma_get_status(&mid->dma_chip, 0, &status);
 		if (err > 0)
-			ret |= IRQ_HANDLED;
+			ret |= 1;
 		else if (err == 0)
 			ret |= hsu_dma_do_irq(&mid->dma_chip, 0, status);
 	}
 	if (fisr & BIT(0))
 		ret |= serial8250_handle_irq(p, serial_port_in(p, UART_IIR));
-	return ret;
+	return IRQ_RETVAL(ret);
 }
 
 #define DNV_DMA_CHAN_OFFSET 0x80
@@ -168,6 +168,9 @@
 	unsigned long w = BIT(24) - 1;
 	unsigned long mul, div;
 
+	/* Gracefully handle the B0 case: fall back to B9600 */
+	fuart = fuart ? fuart : 9600 * 16;
+
 	if (mid->board->freq < fuart) {
 		/* Find prescaler value that satisfies Fuart < Fref */
 		if (mid->board->freq > baud)
diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index 3611ec9..ce0cc47 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c
@@ -62,7 +62,7 @@
 	 */
 	baud = uart_get_baud_rate(port, termios, old,
 				  port->uartclk / 16 / 0xffff,
-				  port->uartclk / 16);
+				  port->uartclk);
 
 	if (baud <= 115200) {
 		serial_port_out(port, UART_MTK_HIGHS, 0x0);
@@ -76,10 +76,6 @@
 		quot = DIV_ROUND_UP(port->uartclk, 4 * baud);
 	} else {
 		serial_port_out(port, UART_MTK_HIGHS, 0x3);
-
-		/* Set to highest baudrate supported */
-		if (baud >= 1152000)
-			baud = 921600;
 		quot = DIV_ROUND_UP(port->uartclk, 256 * baud);
 	}
 
diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index 38963d7..7a8b5fc 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -195,6 +195,7 @@
 	switch (port_type) {
 	case PORT_8250 ... PORT_MAX_8250:
 	{
+		u32 tx_threshold;
 		struct uart_8250_port port8250;
 		memset(&port8250, 0, sizeof(port8250));
 		port8250.port = port;
@@ -202,6 +203,12 @@
 		if (port.fifosize)
 			port8250.capabilities = UART_CAP_FIFO;
 
+		/* Check for TX FIFO threshold & set tx_loadsz */
+		if ((of_property_read_u32(ofdev->dev.of_node, "tx-threshold",
+					  &tx_threshold) == 0) &&
+		    (tx_threshold < port.fifosize))
+			port8250.tx_loadsz = port.fifosize - tx_threshold;
+
 		if (of_property_read_bool(ofdev->dev.of_node,
 					  "auto-flow-control"))
 			port8250.capabilities |= UART_CAP_AFE;
diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index e14982f..61ad6c3 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -134,21 +134,18 @@
 
 	serial8250_do_set_mctrl(port, mctrl);
 
-	if (IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios,
-						UART_GPIO_RTS))) {
-		/*
-		 * Turn off autoRTS if RTS is lowered and restore autoRTS
-		 * setting if RTS is raised
-		 */
-		lcr = serial_in(up, UART_LCR);
-		serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
-		if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
-			priv->efr |= UART_EFR_RTS;
-		else
-			priv->efr &= ~UART_EFR_RTS;
-		serial_out(up, UART_EFR, priv->efr);
-		serial_out(up, UART_LCR, lcr);
-	}
+	/*
+	 * Turn off autoRTS if RTS is lowered and restore autoRTS setting
+	 * if RTS is raised
+	 */
+	lcr = serial_in(up, UART_LCR);
+	serial_out(up, UART_LCR, UART_LCR_CONF_MODE_B);
+	if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
+		priv->efr |= UART_EFR_RTS;
+	else
+		priv->efr &= ~UART_EFR_RTS;
+	serial_out(up, UART_EFR, priv->efr);
+	serial_out(up, UART_LCR, lcr);
 }
 
 /*
@@ -449,9 +446,7 @@
 	priv->efr = 0;
 	up->port.status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS | UPSTAT_AUTOXOFF);
 
-	if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW
-		&& IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(up->gpios,
-							UART_GPIO_RTS))) {
+	if (termios->c_cflag & CRTSCTS && up->port.flags & UPF_HARD_FLOW) {
 		/* Enable AUTOCTS (autoRTS is enabled when RTS is raised) */
 		up->port.status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS;
 		priv->efr |= UART_EFR_CTS;
diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 20ebaea..b98c157 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -21,14 +21,10 @@
 #include <linux/serial_core.h>
 #include <linux/8250_pci.h>
 #include <linux/bitops.h>
-#include <linux/rational.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
 
-#include <linux/dmaengine.h>
-#include <linux/platform_data/dma-dw.h>
-
 #include "8250.h"
 
 /*
@@ -1349,160 +1345,6 @@
 	return ret;
 }
 
-#define PCI_DEVICE_ID_INTEL_BYT_UART1	0x0f0a
-#define PCI_DEVICE_ID_INTEL_BYT_UART2	0x0f0c
-
-#define PCI_DEVICE_ID_INTEL_BSW_UART1	0x228a
-#define PCI_DEVICE_ID_INTEL_BSW_UART2	0x228c
-
-#define PCI_DEVICE_ID_INTEL_BDW_UART1	0x9ce3
-#define PCI_DEVICE_ID_INTEL_BDW_UART2	0x9ce4
-
-#define BYT_PRV_CLK			0x800
-#define BYT_PRV_CLK_EN			(1 << 0)
-#define BYT_PRV_CLK_M_VAL_SHIFT		1
-#define BYT_PRV_CLK_N_VAL_SHIFT		16
-#define BYT_PRV_CLK_UPDATE		(1 << 31)
-
-#define BYT_TX_OVF_INT			0x820
-#define BYT_TX_OVF_INT_MASK		(1 << 1)
-
-static void
-byt_set_termios(struct uart_port *p, struct ktermios *termios,
-		struct ktermios *old)
-{
-	unsigned int baud = tty_termios_baud_rate(termios);
-	unsigned long fref = 100000000, fuart = baud * 16;
-	unsigned long w = BIT(15) - 1;
-	unsigned long m, n;
-	u32 reg;
-
-	/* Gracefully handle the B0 case: fall back to B9600 */
-	fuart = fuart ? fuart : 9600 * 16;
-
-	/* Get Fuart closer to Fref */
-	fuart *= rounddown_pow_of_two(fref / fuart);
-
-	/*
-	 * For baud rates 0.5M, 1M, 1.5M, 2M, 2.5M, 3M, 3.5M and 4M the
-	 * dividers must be adjusted.
-	 *
-	 * uartclk = (m / n) * 100 MHz, where m <= n
-	 */
-	rational_best_approximation(fuart, fref, w, w, &m, &n);
-	p->uartclk = fuart;
-
-	/* Reset the clock */
-	reg = (m << BYT_PRV_CLK_M_VAL_SHIFT) | (n << BYT_PRV_CLK_N_VAL_SHIFT);
-	writel(reg, p->membase + BYT_PRV_CLK);
-	reg |= BYT_PRV_CLK_EN | BYT_PRV_CLK_UPDATE;
-	writel(reg, p->membase + BYT_PRV_CLK);
-
-	p->status &= ~UPSTAT_AUTOCTS;
-	if (termios->c_cflag & CRTSCTS)
-		p->status |= UPSTAT_AUTOCTS;
-
-	serial8250_do_set_termios(p, termios, old);
-}
-
-static bool byt_dma_filter(struct dma_chan *chan, void *param)
-{
-	struct dw_dma_slave *dws = param;
-
-	if (dws->dma_dev != chan->device->dev)
-		return false;
-
-	chan->private = dws;
-	return true;
-}
-
-static unsigned int
-byt_get_mctrl(struct uart_port *port)
-{
-	unsigned int ret = serial8250_do_get_mctrl(port);
-
-	/* Force DCD and DSR signals to permanently be reported as active. */
-	ret |= TIOCM_CAR | TIOCM_DSR;
-
-	return ret;
-}
-
-static int
-byt_serial_setup(struct serial_private *priv,
-		 const struct pciserial_board *board,
-		 struct uart_8250_port *port, int idx)
-{
-	struct pci_dev *pdev = priv->dev;
-	struct device *dev = port->port.dev;
-	struct uart_8250_dma *dma;
-	struct dw_dma_slave *tx_param, *rx_param;
-	struct pci_dev *dma_dev;
-	int ret;
-
-	dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL);
-	if (!dma)
-		return -ENOMEM;
-
-	tx_param = devm_kzalloc(dev, sizeof(*tx_param), GFP_KERNEL);
-	if (!tx_param)
-		return -ENOMEM;
-
-	rx_param = devm_kzalloc(dev, sizeof(*rx_param), GFP_KERNEL);
-	if (!rx_param)
-		return -ENOMEM;
-
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_BYT_UART1:
-	case PCI_DEVICE_ID_INTEL_BSW_UART1:
-	case PCI_DEVICE_ID_INTEL_BDW_UART1:
-		rx_param->src_id = 3;
-		tx_param->dst_id = 2;
-		break;
-	case PCI_DEVICE_ID_INTEL_BYT_UART2:
-	case PCI_DEVICE_ID_INTEL_BSW_UART2:
-	case PCI_DEVICE_ID_INTEL_BDW_UART2:
-		rx_param->src_id = 5;
-		tx_param->dst_id = 4;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	rx_param->m_master = 0;
-	rx_param->p_master = 1;
-
-	dma->rxconf.src_maxburst = 16;
-
-	tx_param->m_master = 0;
-	tx_param->p_master = 1;
-
-	dma->txconf.dst_maxburst = 16;
-
-	dma_dev = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
-	rx_param->dma_dev = &dma_dev->dev;
-	tx_param->dma_dev = &dma_dev->dev;
-
-	dma->fn = byt_dma_filter;
-	dma->rx_param = rx_param;
-	dma->tx_param = tx_param;
-
-	ret = pci_default_setup(priv, board, port, idx);
-	port->port.iotype = UPIO_MEM;
-	port->port.type = PORT_16550A;
-	port->port.flags = (port->port.flags | UPF_FIXED_PORT | UPF_FIXED_TYPE);
-	port->port.set_termios = byt_set_termios;
-	port->port.get_mctrl = byt_get_mctrl;
-	port->port.fifosize = 64;
-	port->tx_loadsz = 64;
-	port->dma = dma;
-	port->capabilities = UART_CAP_FIFO | UART_CAP_AFE;
-
-	/* Disable Tx counter interrupts */
-	writel(BYT_TX_OVF_INT_MASK, port->port.membase + BYT_TX_OVF_INT);
-
-	return ret;
-}
-
 static int
 pci_omegapci_setup(struct serial_private *priv,
 		      const struct pciserial_board *board,
@@ -1741,6 +1583,19 @@
 #define PCI_DEVICE_ID_EXAR_XR17V4358	0x4358
 #define PCI_DEVICE_ID_EXAR_XR17V8358	0x8358
 
+#define UART_EXAR_MPIOINT_7_0	0x8f	/* MPIOINT[7:0] */
+#define UART_EXAR_MPIOLVL_7_0	0x90	/* MPIOLVL[7:0] */
+#define UART_EXAR_MPIO3T_7_0	0x91	/* MPIO3T[7:0] */
+#define UART_EXAR_MPIOINV_7_0	0x92	/* MPIOINV[7:0] */
+#define UART_EXAR_MPIOSEL_7_0	0x93	/* MPIOSEL[7:0] */
+#define UART_EXAR_MPIOOD_7_0	0x94	/* MPIOOD[7:0] */
+#define UART_EXAR_MPIOINT_15_8	0x95	/* MPIOINT[15:8] */
+#define UART_EXAR_MPIOLVL_15_8	0x96	/* MPIOLVL[15:8] */
+#define UART_EXAR_MPIO3T_15_8	0x97	/* MPIO3T[15:8] */
+#define UART_EXAR_MPIOINV_15_8	0x98	/* MPIOINV[15:8] */
+#define UART_EXAR_MPIOSEL_15_8	0x99	/* MPIOSEL[15:8] */
+#define UART_EXAR_MPIOOD_15_8	0x9a	/* MPIOOD[15:8] */
+
 static int
 pci_xr17c154_setup(struct serial_private *priv,
 		  const struct pciserial_board *board,
@@ -1783,18 +1638,18 @@
 	 * Setup Multipurpose Input/Output pins.
 	 */
 	if (idx == 0) {
-		writeb(0x00, p + 0x8f); /*MPIOINT[7:0]*/
-		writeb(0x00, p + 0x90); /*MPIOLVL[7:0]*/
-		writeb(0x00, p + 0x91); /*MPIO3T[7:0]*/
-		writeb(0x00, p + 0x92); /*MPIOINV[7:0]*/
-		writeb(0x00, p + 0x93); /*MPIOSEL[7:0]*/
-		writeb(0x00, p + 0x94); /*MPIOOD[7:0]*/
-		writeb(0x00, p + 0x95); /*MPIOINT[15:8]*/
-		writeb(0x00, p + 0x96); /*MPIOLVL[15:8]*/
-		writeb(0x00, p + 0x97); /*MPIO3T[15:8]*/
-		writeb(0x00, p + 0x98); /*MPIOINV[15:8]*/
-		writeb(0x00, p + 0x99); /*MPIOSEL[15:8]*/
-		writeb(0x00, p + 0x9a); /*MPIOOD[15:8]*/
+		writeb(0x00, p + UART_EXAR_MPIOINT_7_0);
+		writeb(0x00, p + UART_EXAR_MPIOLVL_7_0);
+		writeb(0x00, p + UART_EXAR_MPIO3T_7_0);
+		writeb(0x00, p + UART_EXAR_MPIOINV_7_0);
+		writeb(0x00, p + UART_EXAR_MPIOSEL_7_0);
+		writeb(0x00, p + UART_EXAR_MPIOOD_7_0);
+		writeb(0x00, p + UART_EXAR_MPIOINT_15_8);
+		writeb(0x00, p + UART_EXAR_MPIOLVL_15_8);
+		writeb(0x00, p + UART_EXAR_MPIO3T_15_8);
+		writeb(0x00, p + UART_EXAR_MPIOINV_15_8);
+		writeb(0x00, p + UART_EXAR_MPIOSEL_15_8);
+		writeb(0x00, p + UART_EXAR_MPIOOD_15_8);
 	}
 	writeb(0x00, p + UART_EXAR_8XMODE);
 	writeb(UART_FCTR_EXAR_TRGD, p + UART_EXAR_FCTR);
@@ -1830,20 +1685,20 @@
 		switch (priv->dev->device) {
 		case PCI_DEVICE_ID_COMMTECH_4222PCI335:
 		case PCI_DEVICE_ID_COMMTECH_4224PCI335:
-			writeb(0x78, p + 0x90); /* MPIOLVL[7:0] */
-			writeb(0x00, p + 0x92); /* MPIOINV[7:0] */
-			writeb(0x00, p + 0x93); /* MPIOSEL[7:0] */
+			writeb(0x78, p + UART_EXAR_MPIOLVL_7_0);
+			writeb(0x00, p + UART_EXAR_MPIOINV_7_0);
+			writeb(0x00, p + UART_EXAR_MPIOSEL_7_0);
 			break;
 		case PCI_DEVICE_ID_COMMTECH_2324PCI335:
 		case PCI_DEVICE_ID_COMMTECH_2328PCI335:
-			writeb(0x00, p + 0x90); /* MPIOLVL[7:0] */
-			writeb(0xc0, p + 0x92); /* MPIOINV[7:0] */
-			writeb(0xc0, p + 0x93); /* MPIOSEL[7:0] */
+			writeb(0x00, p + UART_EXAR_MPIOLVL_7_0);
+			writeb(0xc0, p + UART_EXAR_MPIOINV_7_0);
+			writeb(0xc0, p + UART_EXAR_MPIOSEL_7_0);
 			break;
 		}
-		writeb(0x00, p + 0x8f); /* MPIOINT[7:0] */
-		writeb(0x00, p + 0x91); /* MPIO3T[7:0] */
-		writeb(0x00, p + 0x94); /* MPIOOD[7:0] */
+		writeb(0x00, p + UART_EXAR_MPIOINT_7_0);
+		writeb(0x00, p + UART_EXAR_MPIO3T_7_0);
+		writeb(0x00, p + UART_EXAR_MPIOOD_7_0);
 	}
 	writeb(0x00, p + UART_EXAR_8XMODE);
 	writeb(UART_FCTR_EXAR_TRGD, p + UART_EXAR_FCTR);
@@ -1934,7 +1789,6 @@
 #define PCI_DEVICE_ID_COMMTECH_4222PCIE	0x0022
 #define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a
 #define PCI_DEVICE_ID_AMCC_ADDIDATA_APCI7800 0x818e
-#define PCI_DEVICE_ID_INTEL_QRK_UART	0x0936
 
 #define PCI_VENDOR_ID_SUNIX		0x1fd4
 #define PCI_DEVICE_ID_SUNIX_1999	0x1999
@@ -1950,6 +1804,43 @@
 #define PCI_DEVICE_ID_PERICOM_PI7C9X7954	0x7954
 #define PCI_DEVICE_ID_PERICOM_PI7C9X7958	0x7958
 
+#define PCI_VENDOR_ID_ACCESIO			0x494f
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB	0x1051
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S	0x1053
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB	0x105C
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S	0x105E
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB	0x1091
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2	0x1093
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB	0x1099
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4	0x109B
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB	0x10D1
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM	0x10D3
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB	0x10DA
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM	0x10DC
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1	0x1108
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2	0x1110
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2	0x1111
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4	0x1118
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4	0x1119
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S	0x1152
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S	0x115A
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2	0x1190
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2	0x1191
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4	0x1198
+#define PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4	0x1199
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM	0x11D0
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4	0x105A
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4	0x105B
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8	0x106A
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8	0x106B
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4	0x1098
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8	0x10A9
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM	0x10D9
+#define PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM	0x10E9
+#define PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM	0x11D8
+
+
+
 /* Unknown vendors/cards - this should not be in linux/pci_ids.h */
 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1584	0x1584
 #define PCI_SUBDEVICE_ID_UNKNOWN_0x1588	0x1588
@@ -2041,48 +1932,6 @@
 		.subdevice	= PCI_ANY_ID,
 		.setup		= kt_serial_setup,
 	},
-	{
-		.vendor		= PCI_VENDOR_ID_INTEL,
-		.device		= PCI_DEVICE_ID_INTEL_BYT_UART1,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-		.setup		= byt_serial_setup,
-	},
-	{
-		.vendor		= PCI_VENDOR_ID_INTEL,
-		.device		= PCI_DEVICE_ID_INTEL_BYT_UART2,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-		.setup		= byt_serial_setup,
-	},
-	{
-		.vendor		= PCI_VENDOR_ID_INTEL,
-		.device		= PCI_DEVICE_ID_INTEL_BSW_UART1,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-		.setup		= byt_serial_setup,
-	},
-	{
-		.vendor		= PCI_VENDOR_ID_INTEL,
-		.device		= PCI_DEVICE_ID_INTEL_BSW_UART2,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-		.setup		= byt_serial_setup,
-	},
-	{
-		.vendor		= PCI_VENDOR_ID_INTEL,
-		.device		= PCI_DEVICE_ID_INTEL_BDW_UART1,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-		.setup		= byt_serial_setup,
-	},
-	{
-		.vendor		= PCI_VENDOR_ID_INTEL,
-		.device		= PCI_DEVICE_ID_INTEL_BDW_UART2,
-		.subvendor	= PCI_ANY_ID,
-		.subdevice	= PCI_ANY_ID,
-		.setup		= byt_serial_setup,
-	},
 	/*
 	 * ITE
 	 */
@@ -2955,8 +2804,6 @@
 	pbn_ADDIDATA_PCIe_4_3906250,
 	pbn_ADDIDATA_PCIe_8_3906250,
 	pbn_ce4100_1_115200,
-	pbn_byt,
-	pbn_qrk,
 	pbn_omegapci,
 	pbn_NETMOS9900_2s_115200,
 	pbn_brcm_trumanage,
@@ -3732,18 +3579,6 @@
 		.base_baud	= 921600,
 		.reg_shift      = 2,
 	},
-	[pbn_byt] = {
-		.flags		= FL_BASE0,
-		.num_ports	= 1,
-		.base_baud	= 2764800,
-		.reg_shift      = 2,
-	},
-	[pbn_qrk] = {
-		.flags		= FL_BASE0,
-		.num_ports	= 1,
-		.base_baud	= 2764800,
-		.reg_shift	= 2,
-	},
 	[pbn_omegapci] = {
 		.flags		= FL_BASE0,
 		.num_ports	= 8,
@@ -3855,6 +3690,15 @@
 	{ PCI_VDEVICE(INTEL, 0x081d), },
 	{ PCI_VDEVICE(INTEL, 0x1191), },
 	{ PCI_VDEVICE(INTEL, 0x19d8), },
+
+	/* Intel platforms with DesignWare UART */
+	{ PCI_VDEVICE(INTEL, 0x0936), },
+	{ PCI_VDEVICE(INTEL, 0x0f0a), },
+	{ PCI_VDEVICE(INTEL, 0x0f0c), },
+	{ PCI_VDEVICE(INTEL, 0x228a), },
+	{ PCI_VDEVICE(INTEL, 0x228c), },
+	{ PCI_VDEVICE(INTEL, 0x9ce3), },
+	{ PCI_VDEVICE(INTEL, 0x9ce4), },
 };
 
 /*
@@ -5113,6 +4957,108 @@
 		0,
 		0, pbn_pericom_PI7C9X7958 },
 	/*
+	 * ACCES I/O Products quad
+	 */
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SDB,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2S,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SDB,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4S,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_2DB,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_2,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4DB,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM232_4,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_2SMDB,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_2SM,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SMDB,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_COM_4SM,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_1,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_2,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_2,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM422_4,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM485_4,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2S,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_2,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_2,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_2SM,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7954 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_4,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_4,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM422_8,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM485_8,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_4,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM232_8,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_4SM,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_COM_8SM,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	{	PCI_VENDOR_ID_ACCESIO, PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4SM,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+		pbn_pericom_PI7C9X7958 },
+	/*
 	 * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke)
 	 */
 	{	PCI_VENDOR_ID_TOPIC, PCI_DEVICE_ID_TOPIC_TP560,
@@ -5520,40 +5466,7 @@
 	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100_UART,
 		PCI_ANY_ID,  PCI_ANY_ID, 0, 0,
 		pbn_ce4100_1_115200 },
-	/* Intel BayTrail */
-	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT_UART1,
-		PCI_ANY_ID,  PCI_ANY_ID,
-		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
-		pbn_byt },
-	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT_UART2,
-		PCI_ANY_ID,  PCI_ANY_ID,
-		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
-		pbn_byt },
-	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW_UART1,
-		PCI_ANY_ID,  PCI_ANY_ID,
-		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
-		pbn_byt },
-	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW_UART2,
-		PCI_ANY_ID,  PCI_ANY_ID,
-		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
-		pbn_byt },
 
-	/* Intel Broadwell */
-	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART1,
-		PCI_ANY_ID,  PCI_ANY_ID,
-		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
-		pbn_byt },
-	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_UART2,
-		PCI_ANY_ID,  PCI_ANY_ID,
-		PCI_CLASS_COMMUNICATION_SERIAL << 8, 0xff0000,
-		pbn_byt },
-
-	/*
-	 * Intel Quark x1000
-	 */
-	{	PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_QRK_UART,
-		PCI_ANY_ID, PCI_ANY_ID, 0, 0,
-		pbn_qrk },
 	/*
 	 * Cronyx Omega PCI
 	 */
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 7481b95..1bfb6fd 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -178,7 +178,7 @@
 		.fifo_size	= 16,
 		.tx_loadsz	= 16,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
-		.flags		= UART_CAP_FIFO | UART_CAP_AFE,
+		.flags		= UART_CAP_FIFO /* | UART_CAP_AFE */,
 	},
 	[PORT_U6_16550A] = {
 		.name		= "U6_16550A",
@@ -585,11 +585,11 @@
  */
 int serial8250_em485_init(struct uart_8250_port *p)
 {
-	if (p->em485 != NULL)
+	if (p->em485)
 		return 0;
 
 	p->em485 = kmalloc(sizeof(struct uart_8250_em485), GFP_ATOMIC);
-	if (p->em485 == NULL)
+	if (!p->em485)
 		return -ENOMEM;
 
 	setup_timer(&p->em485->stop_tx_timer,
@@ -619,7 +619,7 @@
  */
 void serial8250_em485_destroy(struct uart_8250_port *p)
 {
-	if (p->em485 == NULL)
+	if (!p->em485)
 		return;
 
 	del_timer(&p->em485->start_tx_timer);
@@ -1402,10 +1402,8 @@
 
 static void __do_stop_tx_rs485(struct uart_8250_port *p)
 {
-	if (!p->em485)
-		return;
-
 	serial8250_em485_rts_after_send(p);
+
 	/*
 	 * Empty the RX FIFO, we are not interested in anything
 	 * received during the half-duplex transmission.
@@ -1414,12 +1412,8 @@
 	if (!(p->port.rs485.flags & SER_RS485_RX_DURING_TX)) {
 		serial8250_clear_fifos(p);
 
-		serial8250_rpm_get(p);
-
 		p->ier |= UART_IER_RLSI | UART_IER_RDI;
 		serial_port_out(&p->port, UART_IER, p->ier);
-
-		serial8250_rpm_put(p);
 	}
 }
 
@@ -1429,6 +1423,7 @@
 	struct uart_8250_em485 *em485 = p->em485;
 	unsigned long flags;
 
+	serial8250_rpm_get(p);
 	spin_lock_irqsave(&p->port.lock, flags);
 	if (em485 &&
 	    em485->active_timer == &em485->stop_tx_timer) {
@@ -1436,15 +1431,13 @@
 		em485->active_timer = NULL;
 	}
 	spin_unlock_irqrestore(&p->port.lock, flags);
+	serial8250_rpm_put(p);
 }
 
 static void __stop_tx_rs485(struct uart_8250_port *p)
 {
 	struct uart_8250_em485 *em485 = p->em485;
 
-	if (!em485)
-		return;
-
 	/*
 	 * __do_stop_tx_rs485 is going to set RTS according to config
 	 * AND flush RX FIFO if required.
@@ -1475,7 +1468,7 @@
 		unsigned char lsr = serial_in(p, UART_LSR);
 		/*
 		 * To provide required timeing and allow FIFO transfer,
-		 * __stop_tx_rs485 must be called only when both FIFO and
+		 * __stop_tx_rs485() must be called only when both FIFO and
 		 * shift register are empty. It is for device driver to enable
 		 * interrupt on TEMT.
 		 */
@@ -1484,9 +1477,10 @@
 
 		del_timer(&em485->start_tx_timer);
 		em485->active_timer = NULL;
+
+		__stop_tx_rs485(p);
 	}
 	__do_stop_tx(p);
-	__stop_tx_rs485(p);
 }
 
 static void serial8250_stop_tx(struct uart_port *port)
@@ -1618,8 +1612,6 @@
 	if (up->bugs & UART_BUG_NOMSR)
 		return;
 
-	mctrl_gpio_disable_ms(up->gpios);
-
 	up->ier &= ~UART_IER_MSI;
 	serial_port_out(port, UART_IER, up->ier);
 }
@@ -1632,8 +1624,6 @@
 	if (up->bugs & UART_BUG_NOMSR)
 		return;
 
-	mctrl_gpio_enable_ms(up->gpios);
-
 	up->ier |= UART_IER_MSI;
 
 	serial8250_rpm_get(up);
@@ -1880,6 +1870,30 @@
 	return ret;
 }
 
+/*
+ * Newer 16550 compatible parts such as the SC16C650 & Altera 16550 Soft IP
+ * have a programmable TX threshold that triggers the THRE interrupt in
+ * the IIR register. In this case, the THRE interrupt indicates the FIFO
+ * has space available. Load it up with tx_loadsz bytes.
+ */
+static int serial8250_tx_threshold_handle_irq(struct uart_port *port)
+{
+	unsigned long flags;
+	unsigned int iir = serial_port_in(port, UART_IIR);
+
+	/* TX Threshold IRQ triggered so load up FIFO */
+	if ((iir & UART_IIR_ID) == UART_IIR_THRI) {
+		struct uart_8250_port *up = up_to_u8250p(port);
+
+		spin_lock_irqsave(&port->lock, flags);
+		serial8250_tx_chars(up);
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	iir = serial_port_in(port, UART_IIR);
+	return serial8250_handle_irq(port, iir);
+}
+
 static unsigned int serial8250_tx_empty(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -1917,8 +1931,7 @@
 		ret |= TIOCM_DSR;
 	if (status & UART_MSR_CTS)
 		ret |= TIOCM_CTS;
-
-	return mctrl_gpio_get(up->gpios, &ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(serial8250_do_get_mctrl);
 
@@ -1993,6 +2006,7 @@
 		if (--tmout == 0)
 			break;
 		udelay(1);
+		touch_nmi_watchdog();
 	}
 
 	/* Wait up to 1s for flow control if necessary */
@@ -2169,6 +2183,25 @@
 		serial_port_out(port, UART_LCR, 0);
 	}
 
+	/*
+	 * For the Altera 16550 variants, set TX threshold trigger level.
+	 */
+	if (((port->type == PORT_ALTR_16550_F32) ||
+	     (port->type == PORT_ALTR_16550_F64) ||
+	     (port->type == PORT_ALTR_16550_F128)) && (port->fifosize > 1)) {
+		/* Bounds checking of TX threshold (valid 0 to fifosize-2) */
+		if ((up->tx_loadsz < 2) || (up->tx_loadsz > port->fifosize)) {
+			pr_err("ttyS%d TX FIFO Threshold errors, skipping\n",
+			       serial_index(port));
+		} else {
+			serial_port_out(port, UART_ALTR_AFR,
+					UART_ALTR_EN_TXFIFO_LW);
+			serial_port_out(port, UART_ALTR_TX_LOW,
+					port->fifosize - up->tx_loadsz);
+			port->handle_irq = serial8250_tx_threshold_handle_irq;
+		}
+	}
+
 	if (port->irq) {
 		unsigned char iir1;
 		/*
@@ -2504,8 +2537,6 @@
 					     struct ktermios *termios,
 					     struct ktermios *old)
 {
-	unsigned int tolerance = port->uartclk / 100;
-
 	/*
 	 * Ask the core to calculate the divisor for us.
 	 * Allow 1% tolerance at the upper limit so uart clks marginally
@@ -2514,7 +2545,7 @@
 	 */
 	return uart_get_baud_rate(port, termios, old,
 				  port->uartclk / 16 / 0xffff,
-				  (port->uartclk + tolerance) / 16);
+				  port->uartclk);
 }
 
 void
@@ -2551,12 +2582,9 @@
 	/*
 	 * MCR-based auto flow control.  When AFE is enabled, RTS will be
 	 * deasserted when the receive FIFO contains more characters than
-	 * the trigger, or the MCR RTS bit is cleared.  In the case where
-	 * the remote UART is not using CTS auto flow control, we must
-	 * have sufficient FIFO entries for the latency of the remote
-	 * UART to respond.  IOW, at least 32 bytes of FIFO.
+	 * the trigger, or the MCR RTS bit is cleared.
 	 */
-	if (up->capabilities & UART_CAP_AFE && port->fifosize >= 32) {
+	if (up->capabilities & UART_CAP_AFE) {
 		up->mcr &= ~UART_MCR_AFE;
 		if (termios->c_cflag & CRTSCTS)
 			up->mcr |= UART_MCR_AFE;
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index c9ec839..8998347 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -6,7 +6,6 @@
 config SERIAL_8250
 	tristate "8250/16550 and compatible serial support"
 	select SERIAL_CORE
-	select SERIAL_MCTRL_GPIO if GPIOLIB
 	---help---
 	  This selects whether you want to include the driver for the standard
 	  serial ports.  The standard answer is Y.  People who might say N
@@ -121,7 +120,6 @@
 	tristate "8250/16550 PCI device support" if EXPERT
 	depends on SERIAL_8250 && PCI
 	default SERIAL_8250
-	select RATIONAL
 	help
 	  This builds standard PCI serial support. You may be able to
 	  disable this feature if you only need legacy serial support.
@@ -403,6 +401,21 @@
 	  If you have a system using an Ingenic SoC and wish to make use of
 	  its UARTs, say Y to this option. If unsure, say N.
 
+config SERIAL_8250_LPSS
+	tristate "Support for serial ports on Intel LPSS platforms" if EXPERT
+	default SERIAL_8250
+	depends on SERIAL_8250 && PCI
+	depends on X86 || COMPILE_TEST
+	select DW_DMAC_CORE if SERIAL_8250_DMA
+	select DW_DMAC_PCI if (SERIAL_8250_DMA && X86_INTEL_LPSS)
+	select RATIONAL
+	help
+	  Selecting this option will enable handling of the extra features
+	  present on the UART found on various Intel platforms such as:
+	    - Intel Baytrail SoC
+	    - Intel Braswell SoC
+	    - Intel Quark X1000 SoC
+
 config SERIAL_8250_MID
 	tristate "Support for serial ports on Intel MID platforms" if EXPERT
 	default SERIAL_8250
diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile
index 367d403..276c6fb 100644
--- a/drivers/tty/serial/8250/Makefile
+++ b/drivers/tty/serial/8250/Makefile
@@ -28,6 +28,7 @@
 obj-$(CONFIG_SERIAL_8250_MT6577)	+= 8250_mtk.o
 obj-$(CONFIG_SERIAL_8250_UNIPHIER)	+= 8250_uniphier.o
 obj-$(CONFIG_SERIAL_8250_INGENIC)	+= 8250_ingenic.o
+obj-$(CONFIG_SERIAL_8250_LPSS)		+= 8250_lpss.o
 obj-$(CONFIG_SERIAL_8250_MID)		+= 8250_mid.o
 obj-$(CONFIG_SERIAL_8250_MOXA)		+= 8250_moxa.o
 obj-$(CONFIG_SERIAL_OF_PLATFORM)	+= 8250_of.o
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 518db24a..c783140 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1380,7 +1380,7 @@
 
 config SERIAL_PCH_UART
 	tristate "Intel EG20T PCH/LAPIS Semicon IOH(ML7213/ML7223/ML7831) UART"
-	depends on PCI && (X86_32 || COMPILE_TEST)
+	depends on PCI && (X86_32 || MIPS ||  COMPILE_TEST)
 	select SERIAL_CORE
 	help
 	  This driver is for PCH(Platform controller Hub) UART of Intel EG20T
diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index 32df2a0..e409d7d 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -280,7 +280,7 @@
 /*
  *	Define the basic serial functions we support.
  */
-static struct uart_ops altera_jtaguart_ops = {
+static const struct uart_ops altera_jtaguart_ops = {
 	.tx_empty	= altera_jtaguart_tx_empty,
 	.get_mctrl	= altera_jtaguart_get_mctrl,
 	.set_mctrl	= altera_jtaguart_set_mctrl,
diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 61b607f..820a742 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c
@@ -404,7 +404,7 @@
 /*
  *	Define the basic serial functions we support.
  */
-static struct uart_ops altera_uart_ops = {
+static const struct uart_ops altera_uart_ops = {
 	.tx_empty	= altera_uart_tx_empty,
 	.get_mctrl	= altera_uart_get_mctrl,
 	.set_mctrl	= altera_uart_set_mctrl,
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 8a9e213..e2c33b9 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -93,6 +93,10 @@
 struct vendor_data {
 	const u16		*reg_offset;
 	unsigned int		ifls;
+	unsigned int		fr_busy;
+	unsigned int		fr_dsr;
+	unsigned int		fr_cts;
+	unsigned int		fr_ri;
 	bool			access_32b;
 	bool			oversampling;
 	bool			dma_threshold;
@@ -111,6 +115,10 @@
 static struct vendor_data vendor_arm = {
 	.reg_offset		= pl011_std_offsets,
 	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
+	.fr_busy		= UART01x_FR_BUSY,
+	.fr_dsr			= UART01x_FR_DSR,
+	.fr_cts			= UART01x_FR_CTS,
+	.fr_ri			= UART011_FR_RI,
 	.oversampling		= false,
 	.dma_threshold		= false,
 	.cts_event_workaround	= false,
@@ -121,6 +129,10 @@
 
 static struct vendor_data vendor_sbsa = {
 	.reg_offset		= pl011_std_offsets,
+	.fr_busy		= UART01x_FR_BUSY,
+	.fr_dsr			= UART01x_FR_DSR,
+	.fr_cts			= UART01x_FR_CTS,
+	.fr_ri			= UART011_FR_RI,
 	.access_32b		= true,
 	.oversampling		= false,
 	.dma_threshold		= false,
@@ -164,6 +176,10 @@
 static struct vendor_data vendor_st = {
 	.reg_offset		= pl011_st_offsets,
 	.ifls			= UART011_IFLS_RX_HALF|UART011_IFLS_TX_HALF,
+	.fr_busy		= UART01x_FR_BUSY,
+	.fr_dsr			= UART01x_FR_DSR,
+	.fr_cts			= UART01x_FR_CTS,
+	.fr_ri			= UART011_FR_RI,
 	.oversampling		= true,
 	.dma_threshold		= true,
 	.cts_event_workaround	= true,
@@ -188,11 +204,20 @@
 	[REG_DMACR] = ZX_UART011_DMACR,
 };
 
-static struct vendor_data vendor_zte __maybe_unused = {
+static unsigned int get_fifosize_zte(struct amba_device *dev)
+{
+	return 16;
+}
+
+static struct vendor_data vendor_zte = {
 	.reg_offset		= pl011_zte_offsets,
 	.access_32b		= true,
 	.ifls			= UART011_IFLS_RX4_8|UART011_IFLS_TX4_8,
-	.get_fifosize		= get_fifosize_arm,
+	.fr_busy		= ZX_UART01x_FR_BUSY,
+	.fr_dsr			= ZX_UART01x_FR_DSR,
+	.fr_cts			= ZX_UART01x_FR_CTS,
+	.fr_ri			= ZX_UART011_FR_RI,
+	.get_fifosize		= get_fifosize_zte,
 };
 
 /* Deals with DMA transactions */
@@ -1167,7 +1192,7 @@
 		return;
 
 	/* Disable RX and TX DMA */
-	while (pl011_read(uap, REG_FR) & UART01x_FR_BUSY)
+	while (pl011_read(uap, REG_FR) & uap->vendor->fr_busy)
 		cpu_relax();
 
 	spin_lock_irq(&uap->port.lock);
@@ -1416,11 +1441,12 @@
 	if (delta & UART01x_FR_DCD)
 		uart_handle_dcd_change(&uap->port, status & UART01x_FR_DCD);
 
-	if (delta & UART01x_FR_DSR)
+	if (delta & uap->vendor->fr_dsr)
 		uap->port.icount.dsr++;
 
-	if (delta & UART01x_FR_CTS)
-		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
+	if (delta & uap->vendor->fr_cts)
+		uart_handle_cts_change(&uap->port,
+				       status & uap->vendor->fr_cts);
 
 	wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
 }
@@ -1493,7 +1519,8 @@
 	struct uart_amba_port *uap =
 	    container_of(port, struct uart_amba_port, port);
 	unsigned int status = pl011_read(uap, REG_FR);
-	return status & (UART01x_FR_BUSY|UART01x_FR_TXFF) ? 0 : TIOCSER_TEMT;
+	return status & (uap->vendor->fr_busy | UART01x_FR_TXFF) ?
+							0 : TIOCSER_TEMT;
 }
 
 static unsigned int pl011_get_mctrl(struct uart_port *port)
@@ -1508,9 +1535,9 @@
 		result |= tiocmbit
 
 	TIOCMBIT(UART01x_FR_DCD, TIOCM_CAR);
-	TIOCMBIT(UART01x_FR_DSR, TIOCM_DSR);
-	TIOCMBIT(UART01x_FR_CTS, TIOCM_CTS);
-	TIOCMBIT(UART011_FR_RI, TIOCM_RNG);
+	TIOCMBIT(uap->vendor->fr_dsr, TIOCM_DSR);
+	TIOCMBIT(uap->vendor->fr_cts, TIOCM_CTS);
+	TIOCMBIT(uap->vendor->fr_ri, TIOCM_RNG);
 #undef TIOCMBIT
 	return result;
 }
@@ -2191,7 +2218,7 @@
 	 *	Finally, wait for transmitter to become empty
 	 *	and restore the TCR
 	 */
-	while (pl011_read(uap, REG_FR) & UART01x_FR_BUSY)
+	while (pl011_read(uap, REG_FR) & uap->vendor->fr_busy)
 		cpu_relax();
 	if (!uap->vendor->always_enabled)
 		pl011_write(old_cr, uap, REG_CR);
@@ -2555,7 +2582,8 @@
 
 	ret = platform_get_irq(pdev, 0);
 	if (ret < 0) {
-		dev_err(&pdev->dev, "cannot obtain irq\n");
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev, "cannot obtain irq\n");
 		return ret;
 	}
 	uap->port.irq	= ret;
@@ -2622,6 +2650,11 @@
 		.mask	= 0x00ffffff,
 		.data	= &vendor_st,
 	},
+	{
+		.id	= AMBA_LINUX_ID(0x00, 0x1, 0xffe),
+		.mask	= 0x00ffffff,
+		.data	= &vendor_zte,
+	},
 	{ 0, 0 },
 };
 
diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
index 3a1de5c..5ac06fc 100644
--- a/drivers/tty/serial/arc_uart.c
+++ b/drivers/tty/serial/arc_uart.c
@@ -464,7 +464,7 @@
 }
 #endif
 
-static struct uart_ops arc_serial_pops = {
+static const struct uart_ops arc_serial_pops = {
 	.tx_empty	= arc_serial_tx_empty,
 	.set_mctrl	= arc_serial_set_mctrl,
 	.get_mctrl	= arc_serial_get_mctrl,
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 2eaa18d..fd8aa1f 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -166,6 +166,7 @@
 	u32			rts_low;
 	bool			ms_irq_enabled;
 	u32			rtor;	/* address of receiver timeout register if it exists */
+	bool			has_frac_baudrate;
 	bool			has_hw_timer;
 	struct timer_list	uart_timer;
 
@@ -1634,8 +1635,8 @@
 
 	if (np) {
 		/* DMA/PDC usage specification */
-		if (of_get_property(np, "atmel,use-dma-rx", NULL)) {
-			if (of_get_property(np, "dmas", NULL)) {
+		if (of_property_read_bool(np, "atmel,use-dma-rx")) {
+			if (of_property_read_bool(np, "dmas")) {
 				atmel_port->use_dma_rx  = true;
 				atmel_port->use_pdc_rx  = false;
 			} else {
@@ -1647,8 +1648,8 @@
 			atmel_port->use_pdc_rx  = false;
 		}
 
-		if (of_get_property(np, "atmel,use-dma-tx", NULL)) {
-			if (of_get_property(np, "dmas", NULL)) {
+		if (of_property_read_bool(np, "atmel,use-dma-tx")) {
+			if (of_property_read_bool(np, "dmas")) {
 				atmel_port->use_dma_tx  = true;
 				atmel_port->use_pdc_tx  = false;
 			} else {
@@ -1745,6 +1746,11 @@
 	dbgu_uart = 0x44424755;	/* DBGU */
 	new_uart = 0x55415254;	/* UART */
 
+	/*
+	 * Only USART devices from at91sam9260 SOC implement fractional
+	 * baudrate.
+	 */
+	atmel_port->has_frac_baudrate = false;
 	atmel_port->has_hw_timer = false;
 
 	if (name == new_uart) {
@@ -1753,6 +1759,7 @@
 		atmel_port->rtor = ATMEL_UA_RTOR;
 	} else if (name == usart) {
 		dev_dbg(port->dev, "Usart\n");
+		atmel_port->has_frac_baudrate = true;
 		atmel_port->has_hw_timer = true;
 		atmel_port->rtor = ATMEL_US_RTOR;
 	} else if (name == dbgu_uart) {
@@ -1764,6 +1771,7 @@
 		case 0x302:
 		case 0x10213:
 			dev_dbg(port->dev, "This version is usart\n");
+			atmel_port->has_frac_baudrate = true;
 			atmel_port->has_hw_timer = true;
 			atmel_port->rtor = ATMEL_US_RTOR;
 			break;
@@ -1929,6 +1937,9 @@
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 
+	/* Disable modem control lines interrupts */
+	atmel_disable_ms(port);
+
 	/* Disable interrupts at device level */
 	atmel_uart_writel(port, ATMEL_US_IDR, -1);
 
@@ -1979,8 +1990,6 @@
 	 */
 	free_irq(port->irq, port);
 
-	atmel_port->ms_irq_enabled = false;
-
 	atmel_flush_buffer(port);
 }
 
@@ -2025,8 +2034,9 @@
 static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
 			      struct ktermios *old)
 {
+	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 	unsigned long flags;
-	unsigned int old_mode, mode, imr, quot, baud;
+	unsigned int old_mode, mode, imr, quot, baud, div, cd, fp = 0;
 
 	/* save the current mode register */
 	mode = old_mode = atmel_uart_readl(port, ATMEL_US_MR);
@@ -2036,12 +2046,6 @@
 		  ATMEL_US_PAR | ATMEL_US_USMODE);
 
 	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
-	quot = uart_get_divisor(port, baud);
-
-	if (quot > 65535) {	/* BRGR is 16-bit, so switch to slower clock */
-		quot /= 8;
-		mode |= ATMEL_US_USCLKS_MCK_DIV8;
-	}
 
 	/* byte size */
 	switch (termios->c_cflag & CSIZE) {
@@ -2160,7 +2164,31 @@
 		atmel_uart_writel(port, ATMEL_US_CR, rts_state);
 	}
 
-	/* set the baud rate */
+	/*
+	 * Set the baud rate:
+	 * Fractional baudrate allows to setup output frequency more
+	 * accurately. This feature is enabled only when using normal mode.
+	 * baudrate = selected clock / (8 * (2 - OVER) * (CD + FP / 8))
+	 * Currently, OVER is always set to 0 so we get
+	 * baudrate = selected clock / (16 * (CD + FP / 8))
+	 * then
+	 * 8 CD + FP = selected clock / (2 * baudrate)
+	 */
+	if (atmel_port->has_frac_baudrate &&
+	    (mode & ATMEL_US_USMODE) == ATMEL_US_USMODE_NORMAL) {
+		div = DIV_ROUND_CLOSEST(port->uartclk, baud * 2);
+		cd = div >> 3;
+		fp = div & ATMEL_US_FP_MASK;
+	} else {
+		cd = uart_get_divisor(port, baud);
+	}
+
+	if (cd > 65535) {	/* BRGR is 16-bit, so switch to slower clock */
+		cd /= 8;
+		mode |= ATMEL_US_USCLKS_MCK_DIV8;
+	}
+	quot = cd | fp << ATMEL_US_FP_OFFSET;
+
 	atmel_uart_writel(port, ATMEL_US_BRGR, quot);
 	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_RSTSTA | ATMEL_US_RSTRX);
 	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN | ATMEL_US_RXEN);
@@ -2292,7 +2320,7 @@
 }
 #endif
 
-static struct uart_ops atmel_pops = {
+static const struct uart_ops atmel_pops = {
 	.tx_empty	= atmel_tx_empty,
 	.set_mctrl	= atmel_set_mctrl,
 	.get_mctrl	= atmel_get_mctrl,
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 5108fab..583c9a0 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -631,7 +631,7 @@
 }
 
 /* serial core callbacks */
-static struct uart_ops bcm_uart_ops = {
+static const struct uart_ops bcm_uart_ops = {
 	.tx_empty	= bcm_uart_tx_empty,
 	.get_mctrl	= bcm_uart_get_mctrl,
 	.set_mctrl	= bcm_uart_set_mctrl,
diff --git a/drivers/tty/serial/earlycon-arm-semihost.c b/drivers/tty/serial/earlycon-arm-semihost.c
index 383db10..6bbeb69 100644
--- a/drivers/tty/serial/earlycon-arm-semihost.c
+++ b/drivers/tty/serial/earlycon-arm-semihost.c
@@ -53,7 +53,8 @@
 	uart_console_write(&dev->port, s, n, smh_putc);
 }
 
-int __init early_smh_setup(struct earlycon_device *device, const char *opt)
+static int
+__init early_smh_setup(struct earlycon_device *device, const char *opt)
 {
 	device->con->write = smh_write;
 	return 0;
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index 067783f..c365154 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -21,6 +21,7 @@
 #include <linux/sizes.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
+#include <linux/acpi.h>
 
 #ifdef CONFIG_FIX_EARLYCON_MEM
 #include <asm/fixmap.h>
@@ -38,7 +39,7 @@
 	.con = &early_con,
 };
 
-static void __iomem * __init earlycon_map(unsigned long paddr, size_t size)
+static void __iomem * __init earlycon_map(resource_size_t paddr, size_t size)
 {
 	void __iomem *base;
 #ifdef CONFIG_FIX_EARLYCON_MEM
@@ -49,8 +50,7 @@
 	base = ioremap(paddr, size);
 #endif
 	if (!base)
-		pr_err("%s: Couldn't map 0x%llx\n", __func__,
-		       (unsigned long long)paddr);
+		pr_err("%s: Couldn't map %pa\n", __func__, &paddr);
 
 	return base;
 }
@@ -92,7 +92,7 @@
 {
 	struct uart_port *port = &device->port;
 	int length;
-	unsigned long addr;
+	resource_size_t addr;
 
 	if (uart_parse_earlycon(options, &port->iotype, &addr, &options))
 		return -EINVAL;
@@ -199,6 +199,14 @@
 	return -ENOENT;
 }
 
+/*
+ * When CONFIG_ACPI_SPCR_TABLE is defined, "earlycon" without parameters in
+ * command line does not start DT earlycon immediately, instead it defers
+ * starting it until DT/ACPI decision is made.  At that time if ACPI is enabled
+ * call parse_spcr(), else call early_init_dt_scan_chosen_stdout()
+ */
+bool earlycon_init_is_deferred __initdata;
+
 /* early_param wrapper for setup_earlycon() */
 static int __init param_setup_earlycon(char *buf)
 {
@@ -208,8 +216,14 @@
 	 * Just 'earlycon' is a valid param for devicetree earlycons;
 	 * don't generate a warning from parse_early_params() in that case
 	 */
-	if (!buf || !buf[0])
-		return 0;
+	if (!buf || !buf[0]) {
+		if (IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) {
+			earlycon_init_is_deferred = true;
+			return 0;
+		} else {
+			return early_init_dt_scan_chosen_stdout();
+		}
+	}
 
 	err = setup_earlycon(buf);
 	if (err == -ENOENT || err == -EALREADY)
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 7f95f78..de9d510 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -224,7 +224,8 @@
 #define UARTWATER_TXWATER_OFF	0
 #define UARTWATER_RXWATER_OFF	16
 
-#define FSL_UART_RX_DMA_BUFFER_SIZE	64
+/* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */
+#define DMA_RX_TIMEOUT		(10)
 
 #define DRIVER_NAME	"fsl-lpuart"
 #define DEV_NAME	"ttyLP"
@@ -243,18 +244,18 @@
 	struct dma_chan		*dma_rx_chan;
 	struct dma_async_tx_descriptor  *dma_tx_desc;
 	struct dma_async_tx_descriptor  *dma_rx_desc;
-	dma_addr_t		dma_tx_buf_bus;
-	dma_addr_t		dma_rx_buf_bus;
 	dma_cookie_t		dma_tx_cookie;
 	dma_cookie_t		dma_rx_cookie;
-	unsigned char		*dma_tx_buf_virt;
-	unsigned char		*dma_rx_buf_virt;
 	unsigned int		dma_tx_bytes;
 	unsigned int		dma_rx_bytes;
-	int			dma_tx_in_progress;
-	int			dma_rx_in_progress;
+	bool			dma_tx_in_progress;
 	unsigned int		dma_rx_timeout;
 	struct timer_list	lpuart_timer;
+	struct scatterlist	rx_sgl, tx_sgl[2];
+	struct circ_buf		rx_ring;
+	int			rx_dma_rng_buf_len;
+	unsigned int		dma_tx_nents;
+	wait_queue_head_t	dma_wait;
 };
 
 static const struct of_device_id lpuart_dt_ids[] = {
@@ -270,7 +271,6 @@
 
 /* Forward declare this for the dma callbacks*/
 static void lpuart_dma_tx_complete(void *arg);
-static void lpuart_dma_rx_complete(void *arg);
 
 static u32 lpuart32_read(void __iomem *addr)
 {
@@ -316,217 +316,121 @@
 	lpuart32_write(temp & ~UARTCTRL_RE, port->membase + UARTCTRL);
 }
 
-static void lpuart_copy_rx_to_tty(struct lpuart_port *sport,
-		struct tty_port *tty, int count)
+static void lpuart_dma_tx(struct lpuart_port *sport)
 {
-	int copied;
+	struct circ_buf *xmit = &sport->port.state->xmit;
+	struct scatterlist *sgl = sport->tx_sgl;
+	struct device *dev = sport->port.dev;
+	int ret;
 
-	sport->port.icount.rx += count;
+	if (sport->dma_tx_in_progress)
+		return;
 
-	if (!tty) {
-		dev_err(sport->port.dev, "No tty port\n");
+	sport->dma_tx_bytes = uart_circ_chars_pending(xmit);
+
+	if (xmit->tail < xmit->head) {
+		sport->dma_tx_nents = 1;
+		sg_init_one(sgl, xmit->buf + xmit->tail, sport->dma_tx_bytes);
+	} else {
+		sport->dma_tx_nents = 2;
+		sg_init_table(sgl, 2);
+		sg_set_buf(sgl, xmit->buf + xmit->tail,
+				UART_XMIT_SIZE - xmit->tail);
+		sg_set_buf(sgl + 1, xmit->buf, xmit->head);
+	}
+
+	ret = dma_map_sg(dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+	if (!ret) {
+		dev_err(dev, "DMA mapping error for TX.\n");
 		return;
 	}
 
-	dma_sync_single_for_cpu(sport->port.dev, sport->dma_rx_buf_bus,
-			FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
-	copied = tty_insert_flip_string(tty,
-			((unsigned char *)(sport->dma_rx_buf_virt)), count);
-
-	if (copied != count) {
-		WARN_ON(1);
-		dev_err(sport->port.dev, "RxData copy to tty layer failed\n");
-	}
-
-	dma_sync_single_for_device(sport->port.dev, sport->dma_rx_buf_bus,
-			FSL_UART_RX_DMA_BUFFER_SIZE, DMA_TO_DEVICE);
-}
-
-static void lpuart_pio_tx(struct lpuart_port *sport)
-{
-	struct circ_buf *xmit = &sport->port.state->xmit;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sport->port.lock, flags);
-
-	while (!uart_circ_empty(xmit) &&
-		readb(sport->port.membase + UARTTCFIFO) < sport->txfifo_size) {
-		writeb(xmit->buf[xmit->tail], sport->port.membase + UARTDR);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		sport->port.icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&sport->port);
-
-	if (uart_circ_empty(xmit))
-		writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_TDMAS,
-			sport->port.membase + UARTCR5);
-
-	spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static int lpuart_dma_tx(struct lpuart_port *sport, unsigned long count)
-{
-	struct circ_buf *xmit = &sport->port.state->xmit;
-	dma_addr_t tx_bus_addr;
-
-	dma_sync_single_for_device(sport->port.dev, sport->dma_tx_buf_bus,
-				UART_XMIT_SIZE, DMA_TO_DEVICE);
-	sport->dma_tx_bytes = count & ~(sport->txfifo_size - 1);
-	tx_bus_addr = sport->dma_tx_buf_bus + xmit->tail;
-	sport->dma_tx_desc = dmaengine_prep_slave_single(sport->dma_tx_chan,
-					tx_bus_addr, sport->dma_tx_bytes,
+	sport->dma_tx_desc = dmaengine_prep_slave_sg(sport->dma_tx_chan, sgl,
+					sport->dma_tx_nents,
 					DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT);
-
 	if (!sport->dma_tx_desc) {
-		dev_err(sport->port.dev, "Not able to get desc for tx\n");
-		return -EIO;
+		dma_unmap_sg(dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+		dev_err(dev, "Cannot prepare TX slave DMA!\n");
+		return;
 	}
 
 	sport->dma_tx_desc->callback = lpuart_dma_tx_complete;
 	sport->dma_tx_desc->callback_param = sport;
-	sport->dma_tx_in_progress = 1;
+	sport->dma_tx_in_progress = true;
 	sport->dma_tx_cookie = dmaengine_submit(sport->dma_tx_desc);
 	dma_async_issue_pending(sport->dma_tx_chan);
 
-	return 0;
-}
-
-static void lpuart_prepare_tx(struct lpuart_port *sport)
-{
-	struct circ_buf *xmit = &sport->port.state->xmit;
-	unsigned long count =  CIRC_CNT_TO_END(xmit->head,
-					xmit->tail, UART_XMIT_SIZE);
-
-	if (!count)
-		return;
-
-	if (count < sport->txfifo_size)
-		writeb(readb(sport->port.membase + UARTCR5) & ~UARTCR5_TDMAS,
-				sport->port.membase + UARTCR5);
-	else {
-		writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_TDMAS,
-				sport->port.membase + UARTCR5);
-		lpuart_dma_tx(sport, count);
-	}
 }
 
 static void lpuart_dma_tx_complete(void *arg)
 {
 	struct lpuart_port *sport = arg;
+	struct scatterlist *sgl = &sport->tx_sgl[0];
 	struct circ_buf *xmit = &sport->port.state->xmit;
 	unsigned long flags;
 
-	async_tx_ack(sport->dma_tx_desc);
-
 	spin_lock_irqsave(&sport->port.lock, flags);
 
+	dma_unmap_sg(sport->port.dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE);
+
 	xmit->tail = (xmit->tail + sport->dma_tx_bytes) & (UART_XMIT_SIZE - 1);
-	sport->dma_tx_in_progress = 0;
+
+	sport->port.icount.tx += sport->dma_tx_bytes;
+	sport->dma_tx_in_progress = false;
+	spin_unlock_irqrestore(&sport->port.lock, flags);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(&sport->port);
 
-	lpuart_prepare_tx(sport);
+	if (waitqueue_active(&sport->dma_wait)) {
+		wake_up(&sport->dma_wait);
+		return;
+	}
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	if (!uart_circ_empty(xmit) && !uart_tx_stopped(&sport->port))
+		lpuart_dma_tx(sport);
 
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 }
 
-static int lpuart_dma_rx(struct lpuart_port *sport)
+static int lpuart_dma_tx_request(struct uart_port *port)
 {
-	dma_sync_single_for_device(sport->port.dev, sport->dma_rx_buf_bus,
-			FSL_UART_RX_DMA_BUFFER_SIZE, DMA_TO_DEVICE);
-	sport->dma_rx_desc = dmaengine_prep_slave_single(sport->dma_rx_chan,
-			sport->dma_rx_buf_bus, FSL_UART_RX_DMA_BUFFER_SIZE,
-			DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
+	struct lpuart_port *sport = container_of(port,
+					struct lpuart_port, port);
+	struct dma_slave_config dma_tx_sconfig = {};
+	int ret;
 
-	if (!sport->dma_rx_desc) {
-		dev_err(sport->port.dev, "Not able to get desc for rx\n");
-		return -EIO;
+	dma_tx_sconfig.dst_addr = sport->port.mapbase + UARTDR;
+	dma_tx_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	dma_tx_sconfig.dst_maxburst = 1;
+	dma_tx_sconfig.direction = DMA_MEM_TO_DEV;
+	ret = dmaengine_slave_config(sport->dma_tx_chan, &dma_tx_sconfig);
+
+	if (ret) {
+		dev_err(sport->port.dev,
+				"DMA slave config failed, err = %d\n", ret);
+		return ret;
 	}
 
-	sport->dma_rx_desc->callback = lpuart_dma_rx_complete;
-	sport->dma_rx_desc->callback_param = sport;
-	sport->dma_rx_in_progress = 1;
-	sport->dma_rx_cookie = dmaengine_submit(sport->dma_rx_desc);
-	dma_async_issue_pending(sport->dma_rx_chan);
-
 	return 0;
 }
 
 static void lpuart_flush_buffer(struct uart_port *port)
 {
 	struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
+
 	if (sport->lpuart_dma_tx_use) {
+		if (sport->dma_tx_in_progress) {
+			dma_unmap_sg(sport->port.dev, &sport->tx_sgl[0],
+				sport->dma_tx_nents, DMA_TO_DEVICE);
+			sport->dma_tx_in_progress = false;
+		}
 		dmaengine_terminate_all(sport->dma_tx_chan);
-		sport->dma_tx_in_progress = 0;
 	}
 }
 
-static void lpuart_dma_rx_complete(void *arg)
-{
-	struct lpuart_port *sport = arg;
-	struct tty_port *port = &sport->port.state->port;
-	unsigned long flags;
-
-	async_tx_ack(sport->dma_rx_desc);
-	mod_timer(&sport->lpuart_timer, jiffies + sport->dma_rx_timeout);
-
-	spin_lock_irqsave(&sport->port.lock, flags);
-
-	sport->dma_rx_in_progress = 0;
-	lpuart_copy_rx_to_tty(sport, port, FSL_UART_RX_DMA_BUFFER_SIZE);
-	tty_flip_buffer_push(port);
-	lpuart_dma_rx(sport);
-
-	spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static void lpuart_timer_func(unsigned long data)
-{
-	struct lpuart_port *sport = (struct lpuart_port *)data;
-	struct tty_port *port = &sport->port.state->port;
-	struct dma_tx_state state;
-	unsigned long flags;
-	unsigned char temp;
-	int count;
-
-	del_timer(&sport->lpuart_timer);
-	dmaengine_pause(sport->dma_rx_chan);
-	dmaengine_tx_status(sport->dma_rx_chan, sport->dma_rx_cookie, &state);
-	dmaengine_terminate_all(sport->dma_rx_chan);
-	count = FSL_UART_RX_DMA_BUFFER_SIZE - state.residue;
-	async_tx_ack(sport->dma_rx_desc);
-
-	spin_lock_irqsave(&sport->port.lock, flags);
-
-	sport->dma_rx_in_progress = 0;
-	lpuart_copy_rx_to_tty(sport, port, count);
-	tty_flip_buffer_push(port);
-	temp = readb(sport->port.membase + UARTCR5);
-	writeb(temp & ~UARTCR5_RDMAS, sport->port.membase + UARTCR5);
-
-	spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
-static inline void lpuart_prepare_rx(struct lpuart_port *sport)
-{
-	unsigned long flags;
-	unsigned char temp;
-
-	spin_lock_irqsave(&sport->port.lock, flags);
-
-	sport->lpuart_timer.expires = jiffies + sport->dma_rx_timeout;
-	add_timer(&sport->lpuart_timer);
-
-	lpuart_dma_rx(sport);
-	temp = readb(sport->port.membase + UARTCR5);
-	writeb(temp | UARTCR5_RDMAS, sport->port.membase + UARTCR5);
-
-	spin_unlock_irqrestore(&sport->port.lock, flags);
-}
-
 static inline void lpuart_transmit_buffer(struct lpuart_port *sport)
 {
 	struct circ_buf *xmit = &sport->port.state->xmit;
@@ -580,8 +484,8 @@
 	writeb(temp | UARTCR2_TIE, port->membase + UARTCR2);
 
 	if (sport->lpuart_dma_tx_use) {
-		if (!uart_circ_empty(xmit) && !sport->dma_tx_in_progress)
-			lpuart_prepare_tx(sport);
+		if (!uart_circ_empty(xmit) && !uart_tx_stopped(port))
+			lpuart_dma_tx(sport);
 	} else {
 		if (readb(port->membase + UARTSR1) & UARTSR1_TDRE)
 			lpuart_transmit_buffer(sport);
@@ -600,6 +504,29 @@
 		lpuart32_transmit_buffer(sport);
 }
 
+/* return TIOCSER_TEMT when transmitter is not busy */
+static unsigned int lpuart_tx_empty(struct uart_port *port)
+{
+	struct lpuart_port *sport = container_of(port,
+			struct lpuart_port, port);
+	unsigned char sr1 = readb(port->membase + UARTSR1);
+	unsigned char sfifo = readb(port->membase + UARTSFIFO);
+
+	if (sport->dma_tx_in_progress)
+		return 0;
+
+	if (sr1 & UARTSR1_TC && sfifo & UARTSFIFO_TXEMPT)
+		return TIOCSER_TEMT;
+
+	return 0;
+}
+
+static unsigned int lpuart32_tx_empty(struct uart_port *port)
+{
+	return (lpuart32_read(port->membase + UARTSTAT) & UARTSTAT_TC) ?
+		TIOCSER_TEMT : 0;
+}
+
 static irqreturn_t lpuart_txint(int irq, void *dev_id)
 {
 	struct lpuart_port *sport = dev_id;
@@ -766,23 +693,15 @@
 static irqreturn_t lpuart_int(int irq, void *dev_id)
 {
 	struct lpuart_port *sport = dev_id;
-	unsigned char sts, crdma;
+	unsigned char sts;
 
 	sts = readb(sport->port.membase + UARTSR1);
-	crdma = readb(sport->port.membase + UARTCR5);
 
-	if (sts & UARTSR1_RDRF && !(crdma & UARTCR5_RDMAS)) {
-		if (sport->lpuart_dma_rx_use)
-			lpuart_prepare_rx(sport);
-		else
-			lpuart_rxint(irq, dev_id);
-	}
-	if (sts & UARTSR1_TDRE && !(crdma & UARTCR5_TDMAS)) {
-		if (sport->lpuart_dma_tx_use)
-			lpuart_pio_tx(sport);
-		else
-			lpuart_txint(irq, dev_id);
-	}
+	if (sts & UARTSR1_RDRF)
+		lpuart_rxint(irq, dev_id);
+
+	if (sts & UARTSR1_TDRE)
+		lpuart_txint(irq, dev_id);
 
 	return IRQ_HANDLED;
 }
@@ -807,17 +726,241 @@
 	return IRQ_HANDLED;
 }
 
-/* return TIOCSER_TEMT when transmitter is not busy */
-static unsigned int lpuart_tx_empty(struct uart_port *port)
+static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
 {
-	return (readb(port->membase + UARTSR1) & UARTSR1_TC) ?
-		TIOCSER_TEMT : 0;
+	struct tty_port *port = &sport->port.state->port;
+	struct dma_tx_state state;
+	enum dma_status dmastat;
+	struct circ_buf *ring = &sport->rx_ring;
+	unsigned long flags;
+	int count = 0;
+	unsigned char sr;
+
+	sr = readb(sport->port.membase + UARTSR1);
+
+	if (sr & (UARTSR1_PE | UARTSR1_FE)) {
+		/* Read DR to clear the error flags */
+		readb(sport->port.membase + UARTDR);
+
+		if (sr & UARTSR1_PE)
+		    sport->port.icount.parity++;
+		else if (sr & UARTSR1_FE)
+		    sport->port.icount.frame++;
+	}
+
+	async_tx_ack(sport->dma_rx_desc);
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	dmastat = dmaengine_tx_status(sport->dma_rx_chan,
+				sport->dma_rx_cookie,
+				&state);
+
+	if (dmastat == DMA_ERROR) {
+		dev_err(sport->port.dev, "Rx DMA transfer failed!\n");
+		spin_unlock_irqrestore(&sport->port.lock, flags);
+		return;
+	}
+
+	/* CPU claims ownership of RX DMA buffer */
+	dma_sync_sg_for_cpu(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+
+	/*
+	 * ring->head points to the end of data already written by the DMA.
+	 * ring->tail points to the beginning of data to be read by the
+	 * framework.
+	 * The current transfer size should not be larger than the dma buffer
+	 * length.
+	 */
+	ring->head = sport->rx_sgl.length - state.residue;
+	BUG_ON(ring->head > sport->rx_sgl.length);
+	/*
+	 * At this point ring->head may point to the first byte right after the
+	 * last byte of the dma buffer:
+	 * 0 <= ring->head <= sport->rx_sgl.length
+	 *
+	 * However ring->tail must always points inside the dma buffer:
+	 * 0 <= ring->tail <= sport->rx_sgl.length - 1
+	 *
+	 * Since we use a ring buffer, we have to handle the case
+	 * where head is lower than tail. In such a case, we first read from
+	 * tail to the end of the buffer then reset tail.
+	 */
+	if (ring->head < ring->tail) {
+		count = sport->rx_sgl.length - ring->tail;
+
+		tty_insert_flip_string(port, ring->buf + ring->tail, count);
+		ring->tail = 0;
+		sport->port.icount.rx += count;
+	}
+
+	/* Finally we read data from tail to head */
+	if (ring->tail < ring->head) {
+		count = ring->head - ring->tail;
+		tty_insert_flip_string(port, ring->buf + ring->tail, count);
+		/* Wrap ring->head if needed */
+		if (ring->head >= sport->rx_sgl.length)
+			ring->head = 0;
+		ring->tail = ring->head;
+		sport->port.icount.rx += count;
+	}
+
+	dma_sync_sg_for_device(sport->port.dev, &sport->rx_sgl, 1,
+			       DMA_FROM_DEVICE);
+
+	spin_unlock_irqrestore(&sport->port.lock, flags);
+
+	tty_flip_buffer_push(port);
+	mod_timer(&sport->lpuart_timer, jiffies + sport->dma_rx_timeout);
 }
 
-static unsigned int lpuart32_tx_empty(struct uart_port *port)
+static void lpuart_dma_rx_complete(void *arg)
 {
-	return (lpuart32_read(port->membase + UARTSTAT) & UARTSTAT_TC) ?
-		TIOCSER_TEMT : 0;
+	struct lpuart_port *sport = arg;
+
+	lpuart_copy_rx_to_tty(sport);
+}
+
+static void lpuart_timer_func(unsigned long data)
+{
+	struct lpuart_port *sport = (struct lpuart_port *)data;
+
+	lpuart_copy_rx_to_tty(sport);
+}
+
+static inline int lpuart_start_rx_dma(struct lpuart_port *sport)
+{
+	struct dma_slave_config dma_rx_sconfig = {};
+	struct circ_buf *ring = &sport->rx_ring;
+	int ret, nent;
+	int bits, baud;
+	struct tty_struct *tty = tty_port_tty_get(&sport->port.state->port);
+	struct ktermios *termios = &tty->termios;
+
+	baud = tty_get_baud_rate(tty);
+
+	bits = (termios->c_cflag & CSIZE) == CS7 ? 9 : 10;
+	if (termios->c_cflag & PARENB)
+		bits++;
+
+	/*
+	 * Calculate length of one DMA buffer size to keep latency below
+	 * 10ms at any baud rate.
+	 */
+	sport->rx_dma_rng_buf_len = (DMA_RX_TIMEOUT * baud /  bits / 1000) * 2;
+	sport->rx_dma_rng_buf_len = (1 << (fls(sport->rx_dma_rng_buf_len) - 1));
+	if (sport->rx_dma_rng_buf_len < 16)
+		sport->rx_dma_rng_buf_len = 16;
+
+	ring->buf = kmalloc(sport->rx_dma_rng_buf_len, GFP_ATOMIC);
+	if (!ring->buf) {
+		dev_err(sport->port.dev, "Ring buf alloc failed\n");
+		return -ENOMEM;
+	}
+
+	sg_init_one(&sport->rx_sgl, ring->buf, sport->rx_dma_rng_buf_len);
+	sg_set_buf(&sport->rx_sgl, ring->buf, sport->rx_dma_rng_buf_len);
+	nent = dma_map_sg(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+
+	if (!nent) {
+		dev_err(sport->port.dev, "DMA Rx mapping error\n");
+		return -EINVAL;
+	}
+
+	dma_rx_sconfig.src_addr = sport->port.mapbase + UARTDR;
+	dma_rx_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	dma_rx_sconfig.src_maxburst = 1;
+	dma_rx_sconfig.direction = DMA_DEV_TO_MEM;
+	ret = dmaengine_slave_config(sport->dma_rx_chan, &dma_rx_sconfig);
+
+	if (ret < 0) {
+		dev_err(sport->port.dev,
+				"DMA Rx slave config failed, err = %d\n", ret);
+		return ret;
+	}
+
+	sport->dma_rx_desc = dmaengine_prep_dma_cyclic(sport->dma_rx_chan,
+				 sg_dma_address(&sport->rx_sgl),
+				 sport->rx_sgl.length,
+				 sport->rx_sgl.length / 2,
+				 DMA_DEV_TO_MEM,
+				 DMA_PREP_INTERRUPT);
+	if (!sport->dma_rx_desc) {
+		dev_err(sport->port.dev, "Cannot prepare cyclic DMA\n");
+		return -EFAULT;
+	}
+
+	sport->dma_rx_desc->callback = lpuart_dma_rx_complete;
+	sport->dma_rx_desc->callback_param = sport;
+	sport->dma_rx_cookie = dmaengine_submit(sport->dma_rx_desc);
+	dma_async_issue_pending(sport->dma_rx_chan);
+
+	writeb(readb(sport->port.membase + UARTCR5) | UARTCR5_RDMAS,
+				sport->port.membase + UARTCR5);
+
+	return 0;
+}
+
+static void lpuart_dma_rx_free(struct uart_port *port)
+{
+	struct lpuart_port *sport = container_of(port,
+					struct lpuart_port, port);
+
+	if (sport->dma_rx_chan)
+		dmaengine_terminate_all(sport->dma_rx_chan);
+
+	dma_unmap_sg(sport->port.dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
+	kfree(sport->rx_ring.buf);
+	sport->rx_ring.tail = 0;
+	sport->rx_ring.head = 0;
+	sport->dma_rx_desc = NULL;
+	sport->dma_rx_cookie = -EINVAL;
+}
+
+static int lpuart_config_rs485(struct uart_port *port,
+			struct serial_rs485 *rs485)
+{
+	struct lpuart_port *sport = container_of(port,
+			struct lpuart_port, port);
+
+	u8 modem = readb(sport->port.membase + UARTMODEM) &
+		~(UARTMODEM_TXRTSPOL | UARTMODEM_TXRTSE);
+	writeb(modem, sport->port.membase + UARTMODEM);
+
+	if (rs485->flags & SER_RS485_ENABLED) {
+		/* Enable auto RS-485 RTS mode */
+		modem |= UARTMODEM_TXRTSE;
+
+		/*
+		 * RTS needs to be logic HIGH either during transer _or_ after
+		 * transfer, other variants are not supported by the hardware.
+		 */
+
+		if (!(rs485->flags & (SER_RS485_RTS_ON_SEND |
+				SER_RS485_RTS_AFTER_SEND)))
+			rs485->flags |= SER_RS485_RTS_ON_SEND;
+
+		if (rs485->flags & SER_RS485_RTS_ON_SEND &&
+				rs485->flags & SER_RS485_RTS_AFTER_SEND)
+			rs485->flags &= ~SER_RS485_RTS_AFTER_SEND;
+
+		/*
+		 * The hardware defaults to RTS logic HIGH while transfer.
+		 * Switch polarity in case RTS shall be logic HIGH
+		 * after transfer.
+		 * Note: UART is assumed to be active high.
+		 */
+		if (rs485->flags & SER_RS485_RTS_ON_SEND)
+			modem &= ~UARTMODEM_TXRTSPOL;
+		else if (rs485->flags & SER_RS485_RTS_AFTER_SEND)
+			modem |= UARTMODEM_TXRTSPOL;
+	}
+
+	/* Store the new configuration */
+	sport->port.rs485 = *rs485;
+
+	writeb(modem, sport->port.membase + UARTMODEM);
+	return 0;
 }
 
 static unsigned int lpuart_get_mctrl(struct uart_port *port)
@@ -853,17 +996,22 @@
 static void lpuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
 	unsigned char temp;
+	struct lpuart_port *sport = container_of(port,
+				struct lpuart_port, port);
 
-	temp = readb(port->membase + UARTMODEM) &
+	/* Make sure RXRTSE bit is not set when RS485 is enabled */
+	if (!(sport->port.rs485.flags & SER_RS485_ENABLED)) {
+		temp = readb(sport->port.membase + UARTMODEM) &
 			~(UARTMODEM_RXRTSE | UARTMODEM_TXCTSE);
 
-	if (mctrl & TIOCM_RTS)
-		temp |= UARTMODEM_RXRTSE;
+		if (mctrl & TIOCM_RTS)
+			temp |= UARTMODEM_RXRTSE;
 
-	if (mctrl & TIOCM_CTS)
-		temp |= UARTMODEM_TXCTSE;
+		if (mctrl & TIOCM_CTS)
+			temp |= UARTMODEM_TXCTSE;
 
-	writeb(temp, port->membase + UARTMODEM);
+		writeb(temp, port->membase + UARTMODEM);
+	}
 }
 
 static void lpuart32_set_mctrl(struct uart_port *port, unsigned int mctrl)
@@ -921,13 +1069,16 @@
 	writeb(val | UARTPFIFO_TXFE | UARTPFIFO_RXFE,
 			sport->port.membase + UARTPFIFO);
 
-	/* explicitly clear RDRF */
-	readb(sport->port.membase + UARTSR1);
-
 	/* flush Tx and Rx FIFO */
 	writeb(UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH,
 			sport->port.membase + UARTCFIFO);
 
+	/* explicitly clear RDRF */
+	if (readb(sport->port.membase + UARTSR1) & UARTSR1_RDRF) {
+		readb(sport->port.membase + UARTDR);
+		writeb(UARTSFIFO_RXUF, sport->port.membase + UARTSFIFO);
+	}
+
 	writeb(0, sport->port.membase + UARTTWFIFO);
 	writeb(1, sport->port.membase + UARTRWFIFO);
 
@@ -960,110 +1111,12 @@
 	lpuart32_write(ctrl_saved, sport->port.membase + UARTCTRL);
 }
 
-static int lpuart_dma_tx_request(struct uart_port *port)
+static void rx_dma_timer_init(struct lpuart_port *sport)
 {
-	struct lpuart_port *sport = container_of(port,
-					struct lpuart_port, port);
-	struct dma_slave_config dma_tx_sconfig;
-	dma_addr_t dma_bus;
-	unsigned char *dma_buf;
-	int ret;
-
-	dma_bus = dma_map_single(sport->dma_tx_chan->device->dev,
-				sport->port.state->xmit.buf,
-				UART_XMIT_SIZE, DMA_TO_DEVICE);
-
-	if (dma_mapping_error(sport->dma_tx_chan->device->dev, dma_bus)) {
-		dev_err(sport->port.dev, "dma_map_single tx failed\n");
-		return -ENOMEM;
-	}
-
-	dma_buf = sport->port.state->xmit.buf;
-	dma_tx_sconfig.dst_addr = sport->port.mapbase + UARTDR;
-	dma_tx_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
-	dma_tx_sconfig.dst_maxburst = sport->txfifo_size;
-	dma_tx_sconfig.direction = DMA_MEM_TO_DEV;
-	ret = dmaengine_slave_config(sport->dma_tx_chan, &dma_tx_sconfig);
-
-	if (ret < 0) {
-		dev_err(sport->port.dev,
-				"Dma slave config failed, err = %d\n", ret);
-		return ret;
-	}
-
-	sport->dma_tx_buf_virt = dma_buf;
-	sport->dma_tx_buf_bus = dma_bus;
-	sport->dma_tx_in_progress = 0;
-
-	return 0;
-}
-
-static int lpuart_dma_rx_request(struct uart_port *port)
-{
-	struct lpuart_port *sport = container_of(port,
-					struct lpuart_port, port);
-	struct dma_slave_config dma_rx_sconfig;
-	dma_addr_t dma_bus;
-	unsigned char *dma_buf;
-	int ret;
-
-	dma_buf = devm_kzalloc(sport->port.dev,
-				FSL_UART_RX_DMA_BUFFER_SIZE, GFP_KERNEL);
-
-	if (!dma_buf) {
-		dev_err(sport->port.dev, "Dma rx alloc failed\n");
-		return -ENOMEM;
-	}
-
-	dma_bus = dma_map_single(sport->dma_rx_chan->device->dev, dma_buf,
-				FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
-
-	if (dma_mapping_error(sport->dma_rx_chan->device->dev, dma_bus)) {
-		dev_err(sport->port.dev, "dma_map_single rx failed\n");
-		return -ENOMEM;
-	}
-
-	dma_rx_sconfig.src_addr = sport->port.mapbase + UARTDR;
-	dma_rx_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
-	dma_rx_sconfig.src_maxburst = 1;
-	dma_rx_sconfig.direction = DMA_DEV_TO_MEM;
-	ret = dmaengine_slave_config(sport->dma_rx_chan, &dma_rx_sconfig);
-
-	if (ret < 0) {
-		dev_err(sport->port.dev,
-				"Dma slave config failed, err = %d\n", ret);
-		return ret;
-	}
-
-	sport->dma_rx_buf_virt = dma_buf;
-	sport->dma_rx_buf_bus = dma_bus;
-	sport->dma_rx_in_progress = 0;
-
-	return 0;
-}
-
-static void lpuart_dma_tx_free(struct uart_port *port)
-{
-	struct lpuart_port *sport = container_of(port,
-					struct lpuart_port, port);
-
-	dma_unmap_single(sport->port.dev, sport->dma_tx_buf_bus,
-			UART_XMIT_SIZE, DMA_TO_DEVICE);
-
-	sport->dma_tx_buf_bus = 0;
-	sport->dma_tx_buf_virt = NULL;
-}
-
-static void lpuart_dma_rx_free(struct uart_port *port)
-{
-	struct lpuart_port *sport = container_of(port,
-					struct lpuart_port, port);
-
-	dma_unmap_single(sport->port.dev, sport->dma_rx_buf_bus,
-			FSL_UART_RX_DMA_BUFFER_SIZE, DMA_FROM_DEVICE);
-
-	sport->dma_rx_buf_bus = 0;
-	sport->dma_rx_buf_virt = NULL;
+		setup_timer(&sport->lpuart_timer, lpuart_timer_func,
+				(unsigned long)sport);
+		sport->lpuart_timer.expires = jiffies + sport->dma_rx_timeout;
+		add_timer(&sport->lpuart_timer);
 }
 
 static int lpuart_startup(struct uart_port *port)
@@ -1084,22 +1137,6 @@
 	sport->rxfifo_size = 0x1 << (((temp >> UARTPFIFO_RXSIZE_OFF) &
 		UARTPFIFO_FIFOSIZE_MASK) + 1);
 
-	if (sport->dma_rx_chan && !lpuart_dma_rx_request(port)) {
-		sport->lpuart_dma_rx_use = true;
-		setup_timer(&sport->lpuart_timer, lpuart_timer_func,
-			    (unsigned long)sport);
-	} else
-		sport->lpuart_dma_rx_use = false;
-
-
-	if (sport->dma_tx_chan && !lpuart_dma_tx_request(port)) {
-		sport->lpuart_dma_tx_use = true;
-		temp = readb(port->membase + UARTCR5);
-		temp &= ~UARTCR5_RDMAS;
-		writeb(temp | UARTCR5_TDMAS, port->membase + UARTCR5);
-	} else
-		sport->lpuart_dma_tx_use = false;
-
 	ret = devm_request_irq(port->dev, port->irq, lpuart_int, 0,
 				DRIVER_NAME, sport);
 	if (ret)
@@ -1113,7 +1150,29 @@
 	temp |= (UARTCR2_RIE | UARTCR2_TIE | UARTCR2_RE | UARTCR2_TE);
 	writeb(temp, sport->port.membase + UARTCR2);
 
+	if (sport->dma_rx_chan && !lpuart_start_rx_dma(sport)) {
+		/* set Rx DMA timeout */
+		sport->dma_rx_timeout = msecs_to_jiffies(DMA_RX_TIMEOUT);
+		if (!sport->dma_rx_timeout)
+		     sport->dma_rx_timeout = 1;
+
+		sport->lpuart_dma_rx_use = true;
+		rx_dma_timer_init(sport);
+	} else {
+		sport->lpuart_dma_rx_use = false;
+	}
+
+	if (sport->dma_tx_chan && !lpuart_dma_tx_request(port)) {
+		init_waitqueue_head(&sport->dma_wait);
+		sport->lpuart_dma_tx_use = true;
+		temp = readb(port->membase + UARTCR5);
+		writeb(temp | UARTCR5_TDMAS, port->membase + UARTCR5);
+	} else {
+		sport->lpuart_dma_tx_use = false;
+	}
+
 	spin_unlock_irqrestore(&sport->port.lock, flags);
+
 	return 0;
 }
 
@@ -1170,12 +1229,19 @@
 	devm_free_irq(port->dev, port->irq, sport);
 
 	if (sport->lpuart_dma_rx_use) {
-		lpuart_dma_rx_free(&sport->port);
 		del_timer_sync(&sport->lpuart_timer);
+		lpuart_dma_rx_free(&sport->port);
 	}
 
-	if (sport->lpuart_dma_tx_use)
-		lpuart_dma_tx_free(&sport->port);
+	if (sport->lpuart_dma_tx_use) {
+		if (wait_event_interruptible(sport->dma_wait,
+			!sport->dma_tx_in_progress) != false) {
+			sport->dma_tx_in_progress = false;
+			dmaengine_terminate_all(sport->dma_tx_chan);
+		}
+
+		lpuart_stop_tx(port);
+	}
 }
 
 static void lpuart32_shutdown(struct uart_port *port)
@@ -1203,13 +1269,14 @@
 {
 	struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
 	unsigned long flags;
-	unsigned char cr1, old_cr1, old_cr2, cr4, bdh, modem;
+	unsigned char cr1, old_cr1, old_cr2, cr3, cr4, bdh, modem;
 	unsigned int  baud;
 	unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
 	unsigned int sbr, brfa;
 
 	cr1 = old_cr1 = readb(sport->port.membase + UARTCR1);
 	old_cr2 = readb(sport->port.membase + UARTCR2);
+	cr3 = readb(sport->port.membase + UARTCR3);
 	cr4 = readb(sport->port.membase + UARTCR4);
 	bdh = readb(sport->port.membase + UARTBDH);
 	modem = readb(sport->port.membase + UARTMODEM);
@@ -1240,6 +1307,13 @@
 		cr1 |= UARTCR1_M;
 	}
 
+	/*
+	 * When auto RS-485 RTS mode is enabled,
+	 * hardware flow control need to be disabled.
+	 */
+	if (sport->port.rs485.flags & SER_RS485_ENABLED)
+		termios->c_cflag &= ~CRTSCTS;
+
 	if (termios->c_cflag & CRTSCTS) {
 		modem |= (UARTMODEM_RXRTSE | UARTMODEM_TXCTSE);
 	} else {
@@ -1257,7 +1331,10 @@
 	if ((termios->c_cflag & PARENB)) {
 		if (termios->c_cflag & CMSPAR) {
 			cr1 &= ~UARTCR1_PE;
-			cr1 |= UARTCR1_M;
+			if (termios->c_cflag & PARODD)
+				cr3 |= UARTCR3_T8;
+			else
+				cr3 &= ~UARTCR3_T8;
 		} else {
 			cr1 |= UARTCR1_PE;
 			if ((termios->c_cflag & CSIZE) == CS8)
@@ -1297,17 +1374,6 @@
 	/* update the per-port timeout */
 	uart_update_timeout(port, termios->c_cflag, baud);
 
-	if (sport->lpuart_dma_rx_use) {
-		/* Calculate delay for 1.5 DMA buffers */
-		sport->dma_rx_timeout = (sport->port.timeout - HZ / 50) *
-					FSL_UART_RX_DMA_BUFFER_SIZE * 3 /
-					sport->rxfifo_size / 2;
-		dev_dbg(port->dev, "DMA Rx t-out %ums, tty t-out %u jiffies\n",
-			sport->dma_rx_timeout * 1000 / HZ, sport->port.timeout);
-		if (sport->dma_rx_timeout < msecs_to_jiffies(20))
-			sport->dma_rx_timeout = msecs_to_jiffies(20);
-	}
-
 	/* wait transmit engin complete */
 	while (!(readb(sport->port.membase + UARTSR1) & UARTSR1_TC))
 		barrier();
@@ -1325,12 +1391,31 @@
 	writeb(cr4 | brfa, sport->port.membase + UARTCR4);
 	writeb(bdh, sport->port.membase + UARTBDH);
 	writeb(sbr & 0xFF, sport->port.membase + UARTBDL);
+	writeb(cr3, sport->port.membase + UARTCR3);
 	writeb(cr1, sport->port.membase + UARTCR1);
 	writeb(modem, sport->port.membase + UARTMODEM);
 
 	/* restore control register */
 	writeb(old_cr2, sport->port.membase + UARTCR2);
 
+	/*
+	 * If new baud rate is set, we will also need to update the Ring buffer
+	 * length according to the selected baud rate and restart Rx DMA path.
+	 */
+	if (old) {
+		if (sport->lpuart_dma_rx_use) {
+			del_timer_sync(&sport->lpuart_timer);
+			lpuart_dma_rx_free(&sport->port);
+		}
+
+		if (sport->dma_rx_chan && !lpuart_start_rx_dma(sport)) {
+			sport->lpuart_dma_rx_use = true;
+			rx_dma_timer_init(sport);
+		} else {
+			sport->lpuart_dma_rx_use = false;
+		}
+	}
+
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 }
 
@@ -1494,7 +1579,7 @@
 	return ret;
 }
 
-static struct uart_ops lpuart_pops = {
+static const struct uart_ops lpuart_pops = {
 	.tx_empty	= lpuart_tx_empty,
 	.set_mctrl	= lpuart_set_mctrl,
 	.get_mctrl	= lpuart_get_mctrl,
@@ -1513,7 +1598,7 @@
 	.flush_buffer	= lpuart_flush_buffer,
 };
 
-static struct uart_ops lpuart32_pops = {
+static const struct uart_ops lpuart32_pops = {
 	.tx_empty	= lpuart32_tx_empty,
 	.set_mctrl	= lpuart32_set_mctrl,
 	.get_mctrl	= lpuart32_get_mctrl,
@@ -1843,6 +1928,8 @@
 		sport->port.ops = &lpuart_pops;
 	sport->port.flags = UPF_BOOT_AUTOCONF;
 
+	sport->port.rs485_config = lpuart_config_rs485;
+
 	sport->clk = devm_clk_get(&pdev->dev, "ipg");
 	if (IS_ERR(sport->clk)) {
 		ret = PTR_ERR(sport->clk);
@@ -1883,6 +1970,12 @@
 		dev_info(sport->port.dev, "DMA rx channel request failed, "
 				"operating without rx DMA\n");
 
+	if (of_property_read_bool(np, "linux,rs485-enabled-at-boot-time")) {
+		sport->port.rs485.flags |= SER_RS485_ENABLED;
+		sport->port.rs485.flags |= SER_RS485_RTS_ON_SEND;
+		writeb(UARTMODEM_TXRTSE, sport->port.membase + UARTMODEM);
+	}
+
 	return 0;
 }
 
@@ -1923,6 +2016,32 @@
 
 	uart_suspend_port(&lpuart_reg, &sport->port);
 
+	if (sport->lpuart_dma_rx_use) {
+		/*
+		 * EDMA driver during suspend will forcefully release any
+		 * non-idle DMA channels. If port wakeup is enabled or if port
+		 * is console port or 'no_console_suspend' is set the Rx DMA
+		 * cannot resume as as expected, hence gracefully release the
+		 * Rx DMA path before suspend and start Rx DMA path on resume.
+		 */
+		if (sport->port.irq_wake) {
+			del_timer_sync(&sport->lpuart_timer);
+			lpuart_dma_rx_free(&sport->port);
+		}
+
+		/* Disable Rx DMA to use UART port as wakeup source */
+		writeb(readb(sport->port.membase + UARTCR5) & ~UARTCR5_RDMAS,
+					sport->port.membase + UARTCR5);
+	}
+
+	if (sport->lpuart_dma_tx_use) {
+		sport->dma_tx_in_progress = false;
+		dmaengine_terminate_all(sport->dma_tx_chan);
+	}
+
+	if (sport->port.suspended && !sport->port.irq_wake)
+		clk_disable_unprepare(sport->clk);
+
 	return 0;
 }
 
@@ -1931,6 +2050,9 @@
 	struct lpuart_port *sport = dev_get_drvdata(dev);
 	unsigned long temp;
 
+	if (sport->port.suspended && !sport->port.irq_wake)
+		clk_prepare_enable(sport->clk);
+
 	if (sport->lpuart32) {
 		lpuart32_setup_watermark(sport);
 		temp = lpuart32_read(sport->port.membase + UARTCTRL);
@@ -1944,6 +2066,26 @@
 		writeb(temp, sport->port.membase + UARTCR2);
 	}
 
+	if (sport->lpuart_dma_rx_use) {
+		if (sport->port.irq_wake) {
+			if (!lpuart_start_rx_dma(sport)) {
+				sport->lpuart_dma_rx_use = true;
+				rx_dma_timer_init(sport);
+			} else {
+				sport->lpuart_dma_rx_use = false;
+			}
+		}
+	}
+
+	if (sport->dma_tx_chan && !lpuart_dma_tx_request(&sport->port)) {
+			init_waitqueue_head(&sport->dma_wait);
+			sport->lpuart_dma_tx_use = true;
+			writeb(readb(sport->port.membase + UARTCR5) |
+				UARTCR5_TDMAS, sport->port.membase + UARTCR5);
+	} else {
+		sport->lpuart_dma_tx_use = false;
+	}
+
 	uart_resume_port(&lpuart_reg, &sport->port);
 
 	return 0;
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 0df2b1c..a70356d 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -190,6 +190,7 @@
 enum imx_uart_type {
 	IMX1_UART,
 	IMX21_UART,
+	IMX53_UART,
 	IMX6Q_UART,
 };
 
@@ -222,6 +223,9 @@
 	struct dma_chan		*dma_chan_rx, *dma_chan_tx;
 	struct scatterlist	rx_sgl, tx_sgl[2];
 	void			*rx_buf;
+	struct circ_buf		rx_ring;
+	unsigned int		rx_periods;
+	dma_cookie_t		rx_cookie;
 	unsigned int		tx_bytes;
 	unsigned int		dma_tx_nents;
 	wait_queue_head_t	dma_wait;
@@ -244,6 +248,10 @@
 		.uts_reg = IMX21_UTS,
 		.devtype = IMX21_UART,
 	},
+	[IMX53_UART] = {
+		.uts_reg = IMX21_UTS,
+		.devtype = IMX53_UART,
+	},
 	[IMX6Q_UART] = {
 		.uts_reg = IMX21_UTS,
 		.devtype = IMX6Q_UART,
@@ -258,6 +266,9 @@
 		.name = "imx21-uart",
 		.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX21_UART],
 	}, {
+		.name = "imx53-uart",
+		.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX53_UART],
+	}, {
 		.name = "imx6q-uart",
 		.driver_data = (kernel_ulong_t) &imx_uart_devdata[IMX6Q_UART],
 	}, {
@@ -268,6 +279,7 @@
 
 static const struct of_device_id imx_uart_dt_ids[] = {
 	{ .compatible = "fsl,imx6q-uart", .data = &imx_uart_devdata[IMX6Q_UART], },
+	{ .compatible = "fsl,imx53-uart", .data = &imx_uart_devdata[IMX53_UART], },
 	{ .compatible = "fsl,imx1-uart", .data = &imx_uart_devdata[IMX1_UART], },
 	{ .compatible = "fsl,imx21-uart", .data = &imx_uart_devdata[IMX21_UART], },
 	{ /* sentinel */ }
@@ -289,6 +301,11 @@
 	return sport->devdata->devtype == IMX21_UART;
 }
 
+static inline int is_imx53_uart(struct imx_port *sport)
+{
+	return sport->devdata->devtype == IMX53_UART;
+}
+
 static inline int is_imx6q_uart(struct imx_port *sport)
 {
 	return sport->devdata->devtype == IMX6Q_UART;
@@ -701,6 +718,7 @@
 	return IRQ_HANDLED;
 }
 
+static void clear_rx_errors(struct imx_port *sport);
 static int start_rx_dma(struct imx_port *sport);
 /*
  * If the RXFIFO is filled with some data, and then we
@@ -726,6 +744,11 @@
 		temp &= ~(UCR2_ATEN);
 		writel(temp, sport->port.membase + UCR2);
 
+		/* disable the rx errors interrupts */
+		temp = readl(sport->port.membase + UCR4);
+		temp &= ~UCR4_OREN;
+		writel(temp, sport->port.membase + UCR4);
+
 		/* tell the DMA to receive the data. */
 		start_rx_dma(sport);
 	}
@@ -740,12 +763,13 @@
 {
 	unsigned int tmp = TIOCM_DSR;
 	unsigned usr1 = readl(sport->port.membase + USR1);
+	unsigned usr2 = readl(sport->port.membase + USR2);
 
 	if (usr1 & USR1_RTSS)
 		tmp |= TIOCM_CTS;
 
 	/* in DCE mode DCDIN is always 0 */
-	if (!(usr1 & USR2_DCDIN))
+	if (!(usr2 & USR2_DCDIN))
 		tmp |= TIOCM_CAR;
 
 	if (sport->dte_mode)
@@ -932,30 +956,6 @@
 }
 
 #define RX_BUF_SIZE	(PAGE_SIZE)
-static void imx_rx_dma_done(struct imx_port *sport)
-{
-	unsigned long temp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sport->port.lock, flags);
-
-	/* re-enable interrupts to get notified when new symbols are incoming */
-	temp = readl(sport->port.membase + UCR1);
-	temp |= UCR1_RRDYEN;
-	writel(temp, sport->port.membase + UCR1);
-
-	temp = readl(sport->port.membase + UCR2);
-	temp |= UCR2_ATEN;
-	writel(temp, sport->port.membase + UCR2);
-
-	sport->dma_is_rxing = 0;
-
-	/* Is the shutdown waiting for us? */
-	if (waitqueue_active(&sport->dma_wait))
-		wake_up(&sport->dma_wait);
-
-	spin_unlock_irqrestore(&sport->port.lock, flags);
-}
 
 /*
  * There are two kinds of RX DMA interrupts(such as in the MX6Q):
@@ -972,43 +972,76 @@
 	struct scatterlist *sgl = &sport->rx_sgl;
 	struct tty_port *port = &sport->port.state->port;
 	struct dma_tx_state state;
+	struct circ_buf *rx_ring = &sport->rx_ring;
 	enum dma_status status;
-	unsigned int count;
-
-	/* unmap it first */
-	dma_unmap_sg(sport->port.dev, sgl, 1, DMA_FROM_DEVICE);
+	unsigned int w_bytes = 0;
+	unsigned int r_bytes;
+	unsigned int bd_size;
 
 	status = dmaengine_tx_status(chan, (dma_cookie_t)0, &state);
-	count = RX_BUF_SIZE - state.residue;
 
-	dev_dbg(sport->port.dev, "We get %d bytes.\n", count);
-
-	if (count) {
-		if (!(sport->port.ignore_status_mask & URXD_DUMMY_READ)) {
-			int bytes = tty_insert_flip_string(port, sport->rx_buf,
-					count);
-
-			if (bytes != count)
-				sport->port.icount.buf_overrun++;
-		}
-		tty_flip_buffer_push(port);
-		sport->port.icount.rx += count;
+	if (status == DMA_ERROR) {
+		dev_err(sport->port.dev, "DMA transaction error.\n");
+		clear_rx_errors(sport);
+		return;
 	}
 
-	/*
-	 * Restart RX DMA directly if more data is available in order to skip
-	 * the roundtrip through the IRQ handler. If there is some data already
-	 * in the FIFO, DMA needs to be restarted soon anyways.
-	 *
-	 * Otherwise stop the DMA and reactivate FIFO IRQs to restart DMA once
-	 * data starts to arrive again.
-	 */
-	if (readl(sport->port.membase + USR2) & USR2_RDR)
-		start_rx_dma(sport);
-	else
-		imx_rx_dma_done(sport);
+	if (!(sport->port.ignore_status_mask & URXD_DUMMY_READ)) {
+
+		/*
+		 * The state-residue variable represents the empty space
+		 * relative to the entire buffer. Taking this in consideration
+		 * the head is always calculated base on the buffer total
+		 * length - DMA transaction residue. The UART script from the
+		 * SDMA firmware will jump to the next buffer descriptor,
+		 * once a DMA transaction if finalized (IMX53 RM - A.4.1.2.4).
+		 * Taking this in consideration the tail is always at the
+		 * beginning of the buffer descriptor that contains the head.
+		 */
+
+		/* Calculate the head */
+		rx_ring->head = sg_dma_len(sgl) - state.residue;
+
+		/* Calculate the tail. */
+		bd_size = sg_dma_len(sgl) / sport->rx_periods;
+		rx_ring->tail = ((rx_ring->head-1) / bd_size) * bd_size;
+
+		if (rx_ring->head <= sg_dma_len(sgl) &&
+		    rx_ring->head > rx_ring->tail) {
+
+			/* Move data from tail to head */
+			r_bytes = rx_ring->head - rx_ring->tail;
+
+			/* CPU claims ownership of RX DMA buffer */
+			dma_sync_sg_for_cpu(sport->port.dev, sgl, 1,
+				DMA_FROM_DEVICE);
+
+			w_bytes = tty_insert_flip_string(port,
+				sport->rx_buf + rx_ring->tail, r_bytes);
+
+			/* UART retrieves ownership of RX DMA buffer */
+			dma_sync_sg_for_device(sport->port.dev, sgl, 1,
+				DMA_FROM_DEVICE);
+
+			if (w_bytes != r_bytes)
+				sport->port.icount.buf_overrun++;
+
+			sport->port.icount.rx += w_bytes;
+		} else	{
+			WARN_ON(rx_ring->head > sg_dma_len(sgl));
+			WARN_ON(rx_ring->head <= rx_ring->tail);
+		}
+	}
+
+	if (w_bytes) {
+		tty_flip_buffer_push(port);
+		dev_dbg(sport->port.dev, "We get %d bytes.\n", w_bytes);
+	}
 }
 
+/* RX DMA buffer periods */
+#define RX_DMA_PERIODS 4
+
 static int start_rx_dma(struct imx_port *sport)
 {
 	struct scatterlist *sgl = &sport->rx_sgl;
@@ -1017,14 +1050,21 @@
 	struct dma_async_tx_descriptor *desc;
 	int ret;
 
+	sport->rx_ring.head = 0;
+	sport->rx_ring.tail = 0;
+	sport->rx_periods = RX_DMA_PERIODS;
+
 	sg_init_one(sgl, sport->rx_buf, RX_BUF_SIZE);
 	ret = dma_map_sg(dev, sgl, 1, DMA_FROM_DEVICE);
 	if (ret == 0) {
 		dev_err(dev, "DMA mapping error for RX.\n");
 		return -EINVAL;
 	}
-	desc = dmaengine_prep_slave_sg(chan, sgl, 1, DMA_DEV_TO_MEM,
-					DMA_PREP_INTERRUPT);
+
+	desc = dmaengine_prep_dma_cyclic(chan, sg_dma_address(sgl),
+		sg_dma_len(sgl), sg_dma_len(sgl) / sport->rx_periods,
+		DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT);
+
 	if (!desc) {
 		dma_unmap_sg(dev, sgl, 1, DMA_FROM_DEVICE);
 		dev_err(dev, "We cannot prepare for the RX slave dma!\n");
@@ -1034,11 +1074,36 @@
 	desc->callback_param = sport;
 
 	dev_dbg(dev, "RX: prepare for the DMA.\n");
-	dmaengine_submit(desc);
+	sport->rx_cookie = dmaengine_submit(desc);
 	dma_async_issue_pending(chan);
 	return 0;
 }
 
+static void clear_rx_errors(struct imx_port *sport)
+{
+	unsigned int status_usr1, status_usr2;
+
+	status_usr1 = readl(sport->port.membase + USR1);
+	status_usr2 = readl(sport->port.membase + USR2);
+
+	if (status_usr2 & USR2_BRCD) {
+		sport->port.icount.brk++;
+		writel(USR2_BRCD, sport->port.membase + USR2);
+	} else if (status_usr1 & USR1_FRAMERR) {
+		sport->port.icount.frame++;
+		writel(USR1_FRAMERR, sport->port.membase + USR1);
+	} else if (status_usr1 & USR1_PARITYERR) {
+		sport->port.icount.parity++;
+		writel(USR1_PARITYERR, sport->port.membase + USR1);
+	}
+
+	if (status_usr2 & USR2_ORE) {
+		sport->port.icount.overrun++;
+		writel(USR2_ORE, sport->port.membase + USR2);
+	}
+
+}
+
 #define TXTL_DEFAULT 2 /* reset default */
 #define RXTL_DEFAULT 1 /* reset default */
 #define TXTL_DMA 8 /* DMA burst setting */
@@ -1058,14 +1123,16 @@
 static void imx_uart_dma_exit(struct imx_port *sport)
 {
 	if (sport->dma_chan_rx) {
+		dmaengine_terminate_sync(sport->dma_chan_rx);
 		dma_release_channel(sport->dma_chan_rx);
 		sport->dma_chan_rx = NULL;
-
+		sport->rx_cookie = -EINVAL;
 		kfree(sport->rx_buf);
 		sport->rx_buf = NULL;
 	}
 
 	if (sport->dma_chan_tx) {
+		dmaengine_terminate_sync(sport->dma_chan_tx);
 		dma_release_channel(sport->dma_chan_tx);
 		sport->dma_chan_tx = NULL;
 	}
@@ -1103,6 +1170,7 @@
 		ret = -ENOMEM;
 		goto err;
 	}
+	sport->rx_ring.buf = sport->rx_buf;
 
 	/* Prepare for TX : */
 	sport->dma_chan_tx = dma_request_slave_channel(dev, "tx");
@@ -1201,8 +1269,7 @@
 	writel(temp & ~UCR4_DREN, sport->port.membase + UCR4);
 
 	/* Can we enable the DMA support? */
-	if (is_imx6q_uart(sport) && !uart_console(port) &&
-	    !sport->dma_is_inited)
+	if (!uart_console(port) && !sport->dma_is_inited)
 		imx_uart_dma_init(sport);
 
 	spin_lock_irqsave(&sport->port.lock, flags);
@@ -1283,17 +1350,11 @@
 	unsigned long flags;
 
 	if (sport->dma_is_enabled) {
-		int ret;
+		sport->dma_is_rxing = 0;
+		sport->dma_is_txing = 0;
+		dmaengine_terminate_sync(sport->dma_chan_tx);
+		dmaengine_terminate_sync(sport->dma_chan_rx);
 
-		/* We have to wait for the DMA to finish. */
-		ret = wait_event_interruptible(sport->dma_wait,
-			!sport->dma_is_rxing && !sport->dma_is_txing);
-		if (ret != 0) {
-			sport->dma_is_rxing = 0;
-			sport->dma_is_txing = 0;
-			dmaengine_terminate_all(sport->dma_chan_tx);
-			dmaengine_terminate_all(sport->dma_chan_rx);
-		}
 		spin_lock_irqsave(&sport->port.lock, flags);
 		imx_stop_tx(port);
 		imx_stop_rx(port);
@@ -1690,7 +1751,7 @@
 	return 0;
 }
 
-static struct uart_ops imx_pops = {
+static const struct uart_ops imx_pops = {
 	.tx_empty	= imx_tx_empty,
 	.set_mctrl	= imx_set_mctrl,
 	.get_mctrl	= imx_get_mctrl,
@@ -2077,8 +2138,10 @@
 
 	/* For register access, we only need to enable the ipg clock. */
 	ret = clk_prepare_enable(sport->clk_ipg);
-	if (ret)
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable per clk: %d\n", ret);
 		return ret;
+	}
 
 	/* Disable interrupts before requesting them */
 	reg = readl_relaxed(sport->port.membase + UCR1);
@@ -2095,18 +2158,26 @@
 	if (txirq > 0) {
 		ret = devm_request_irq(&pdev->dev, rxirq, imx_rxint, 0,
 				       dev_name(&pdev->dev), sport);
-		if (ret)
+		if (ret) {
+			dev_err(&pdev->dev, "failed to request rx irq: %d\n",
+				ret);
 			return ret;
+		}
 
 		ret = devm_request_irq(&pdev->dev, txirq, imx_txint, 0,
 				       dev_name(&pdev->dev), sport);
-		if (ret)
+		if (ret) {
+			dev_err(&pdev->dev, "failed to request tx irq: %d\n",
+				ret);
 			return ret;
+		}
 	} else {
 		ret = devm_request_irq(&pdev->dev, rxirq, imx_int, 0,
 				       dev_name(&pdev->dev), sport);
-		if (ret)
+		if (ret) {
+			dev_err(&pdev->dev, "failed to request irq: %d\n", ret);
 			return ret;
+		}
 	}
 
 	imx_ports[sport->port.line] = sport;
diff --git a/drivers/tty/serial/jsm/jsm_tty.c b/drivers/tty/serial/jsm/jsm_tty.c
index c5ddfe5..ec7d838 100644
--- a/drivers/tty/serial/jsm/jsm_tty.c
+++ b/drivers/tty/serial/jsm/jsm_tty.c
@@ -346,7 +346,7 @@
 	port->type = PORT_JSM;
 }
 
-static struct uart_ops jsm_ops = {
+static const struct uart_ops jsm_ops = {
 	.tx_empty	= jsm_tty_tx_empty,
 	.set_mctrl	= jsm_tty_set_mctrl,
 	.get_mctrl	= jsm_tty_get_mctrl,
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index 5c4c280..ace8264 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -712,7 +712,7 @@
 	dev_dbg(&s->spi->dev, "%s\n", __func__);
 }
 
-static struct uart_ops max3100_ops = {
+static const struct uart_ops max3100_ops = {
 	.tx_empty	= max3100_tx_empty,
 	.set_mctrl	= max3100_set_mctrl,
 	.get_mctrl	= max3100_get_mctrl,
diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index 9360801..8a3e926 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -1329,9 +1329,9 @@
 		const struct spi_device_id *id_entry = spi_get_device_id(spi);
 
 		devtype = (struct max310x_devtype *)id_entry->driver_data;
-		flags = IRQF_TRIGGER_FALLING;
 	}
 
+	flags = IRQF_TRIGGER_FALLING;
 	regcfg.max_register = devtype->nr * 0x20 - 1;
 	regmap = devm_regmap_init_spi(spi, &regcfg);
 
diff --git a/drivers/tty/serial/men_z135_uart.c b/drivers/tty/serial/men_z135_uart.c
index a44290e..e72ea61 100644
--- a/drivers/tty/serial/men_z135_uart.c
+++ b/drivers/tty/serial/men_z135_uart.c
@@ -775,7 +775,7 @@
 	return -EINVAL;
 }
 
-static struct uart_ops men_z135_ops = {
+static const struct uart_ops men_z135_ops = {
 	.tx_empty = men_z135_tx_empty,
 	.set_mctrl = men_z135_set_mctrl,
 	.get_mctrl = men_z135_get_mctrl,
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index eb54e5c..770454e 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -1317,7 +1317,7 @@
 		mxs_clr(AUART_LINECTRL_BRK, s, REG_LINECTRL);
 }
 
-static struct uart_ops mxs_auart_ops = {
+static const struct uart_ops mxs_auart_ops = {
 	.tx_empty       = mxs_auart_tx_empty,
 	.start_tx       = mxs_auart_start_tx,
 	.stop_tx	= mxs_auart_stop_tx,
@@ -1510,10 +1510,7 @@
 
 	if (!is_asm9260_auart(s)) {
 		s->clk = devm_clk_get(&pdev->dev, NULL);
-		if (IS_ERR(s->clk))
-			return PTR_ERR(s->clk);
-
-		return 0;
+		return PTR_ERR_OR_ZERO(s->clk);
 	}
 
 	s->clk = devm_clk_get(s->dev, "mod");
@@ -1537,16 +1534,20 @@
 	err = clk_set_rate(s->clk, clk_get_rate(s->clk_ahb));
 	if (err) {
 		dev_err(s->dev, "Failed to set rate!\n");
-		return err;
+		goto disable_clk_ahb;
 	}
 
 	err = clk_prepare_enable(s->clk);
 	if (err) {
 		dev_err(s->dev, "Failed to enable clk!\n");
-		return err;
+		goto disable_clk_ahb;
 	}
 
 	return 0;
+
+disable_clk_ahb:
+	clk_disable_unprepare(s->clk_ahb);
+	return err;
 }
 
 /*
diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index ea4ffc2..d391650 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -31,6 +31,7 @@
 #include <linux/dmi.h>
 #include <linux/nmi.h>
 #include <linux/delay.h>
+#include <linux/of.h>
 
 #include <linux/debugfs.h>
 #include <linux/dmaengine.h>
@@ -1603,7 +1604,7 @@
 }
 #endif /* CONFIG_CONSOLE_POLL */
 
-static struct uart_ops pch_uart_ops = {
+static const struct uart_ops pch_uart_ops = {
 	.tx_empty = pch_uart_tx_empty,
 	.set_mctrl = pch_uart_set_mctrl,
 	.get_mctrl = pch_uart_get_mctrl,
@@ -1826,6 +1827,10 @@
 	priv->trigger_level = 1;
 	priv->fcr = 0;
 
+	if (pdev->dev.of_node)
+		of_property_read_u32(pdev->dev.of_node, "clock-frequency"
+					 , &user_uartclk);
+
 #ifdef CONFIG_SERIAL_PCH_UART_CONSOLE
 	pch_uart_ports[board->line_no] = priv;
 #endif
diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c
index ae2095a..f44615f 100644
--- a/drivers/tty/serial/samsung.c
+++ b/drivers/tty/serial/samsung.c
@@ -1577,7 +1577,7 @@
 }
 
 
-#ifdef CONFIG_CPU_FREQ
+#ifdef CONFIG_ARM_S3C24XX_CPUFREQ
 
 static int s3c24xx_serial_cpufreq_transition(struct notifier_block *nb,
 					     unsigned long val, void *data)
diff --git a/drivers/tty/serial/samsung.h b/drivers/tty/serial/samsung.h
index 2ae4fce..a04acef 100644
--- a/drivers/tty/serial/samsung.h
+++ b/drivers/tty/serial/samsung.h
@@ -102,7 +102,7 @@
 
 	struct s3c24xx_uart_dma		*dma;
 
-#ifdef CONFIG_CPU_FREQ
+#ifdef CONFIG_ARM_S3C24XX_CPUFREQ
 	struct notifier_block		freq_transition;
 #endif
 };
diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index f36e6df..a9d94f7 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -1205,6 +1205,10 @@
 	}
 #endif
 
+	/* reset device, purging any pending irq / data */
+	regmap_write(s->regmap, SC16IS7XX_IOCONTROL_REG << SC16IS7XX_REG_SHIFT,
+			SC16IS7XX_IOCONTROL_SRESET_BIT);
+
 	for (i = 0; i < devtype->nr_uart; ++i) {
 		s->p[i].line		= i;
 		/* Initialize port data */
@@ -1234,6 +1238,22 @@
 		init_kthread_work(&s->p[i].reg_work, sc16is7xx_reg_proc);
 		/* Register port */
 		uart_add_one_port(&sc16is7xx_uart, &s->p[i].port);
+
+		/* Enable EFR */
+		sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG,
+				     SC16IS7XX_LCR_CONF_MODE_B);
+
+		regcache_cache_bypass(s->regmap, true);
+
+		/* Enable write access to enhanced features */
+		sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_EFR_REG,
+				     SC16IS7XX_EFR_ENABLE_BIT);
+
+		regcache_cache_bypass(s->regmap, false);
+
+		/* Restore access to general registers */
+		sc16is7xx_port_write(&s->p[i].port, SC16IS7XX_LCR_REG, 0x00);
+
 		/* Go to suspend mode */
 		sc16is7xx_power(&s->p[i].port, 0);
 	}
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 9fc1533..6e4f636 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -235,18 +235,9 @@
 	if (tty_port_initialized(port))
 		return 0;
 
-	/*
-	 * Set the TTY IO error marker - we will only clear this
-	 * once we have successfully opened the port.
-	 */
-	set_bit(TTY_IO_ERROR, &tty->flags);
-
 	retval = uart_port_startup(tty, state, init_hw);
-	if (!retval) {
-		tty_port_set_initialized(port, 1);
-		clear_bit(TTY_IO_ERROR, &tty->flags);
-	} else if (retval > 0)
-		retval = 0;
+	if (retval)
+		set_bit(TTY_IO_ERROR, &tty->flags);
 
 	return retval;
 }
@@ -972,8 +963,11 @@
 			}
 			uart_change_speed(tty, state, NULL);
 		}
-	} else
+	} else {
 		retval = uart_startup(tty, state, 1);
+		if (retval > 0)
+			retval = 0;
+	}
  exit:
 	return retval;
 }
@@ -1139,6 +1133,8 @@
 		uport->ops->config_port(uport, flags);
 
 		ret = uart_startup(tty, state, 1);
+		if (ret > 0)
+			ret = 0;
 	}
 out:
 	mutex_unlock(&port->mutex);
@@ -1465,7 +1461,6 @@
 {
 	struct uart_state *state = tty->driver_data;
 	struct tty_port *port;
-	struct uart_port *uport;
 
 	if (!state) {
 		struct uart_driver *drv = tty->driver->driver_state;
@@ -1481,56 +1476,36 @@
 	port = &state->port;
 	pr_debug("uart_close(%d) called\n", tty->index);
 
-	if (tty_port_close_start(port, tty, filp) == 0)
-		return;
+	tty_port_close(tty->port, tty, filp);
+}
 
-	mutex_lock(&port->mutex);
-	uport = uart_port_check(state);
+static void uart_tty_port_shutdown(struct tty_port *port)
+{
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport = uart_port_check(state);
 
 	/*
 	 * At this point, we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts.
 	 */
-	if (tty_port_initialized(port) &&
-	    !WARN(!uport, "detached port still initialized!\n")) {
-		spin_lock_irq(&uport->lock);
-		uport->ops->stop_rx(uport);
-		spin_unlock_irq(&uport->lock);
-		/*
-		 * Before we drop DTR, make sure the UART transmitter
-		 * has completely drained; this is especially
-		 * important if there is a transmit FIFO!
-		 */
-		uart_wait_until_sent(tty, uport->timeout);
-	}
+	if (WARN(!uport, "detached port still initialized!\n"))
+		return;
 
-	uart_shutdown(tty, state);
-	tty_port_tty_set(port, NULL);
+	spin_lock_irq(&uport->lock);
+	uport->ops->stop_rx(uport);
+	spin_unlock_irq(&uport->lock);
 
-	spin_lock_irq(&port->lock);
-
-	if (port->blocked_open) {
-		spin_unlock_irq(&port->lock);
-		if (port->close_delay)
-			msleep_interruptible(jiffies_to_msecs(port->close_delay));
-		spin_lock_irq(&port->lock);
-	} else if (uport && !uart_console(uport)) {
-		spin_unlock_irq(&port->lock);
-		uart_change_pm(state, UART_PM_STATE_OFF);
-		spin_lock_irq(&port->lock);
-	}
-	spin_unlock_irq(&port->lock);
-	tty_port_set_active(port, 0);
+	uart_port_shutdown(port);
 
 	/*
-	 * Wake up anyone trying to open this port.
+	 * It's possible for shutdown to be called after suspend if we get
+	 * a DCD drop (hangup) at just the right time.  Clear suspended bit so
+	 * we don't try to resume a port that has been shutdown.
 	 */
-	wake_up_interruptible(&port->open_wait);
+	tty_port_set_suspended(port, 0);
 
-	mutex_unlock(&port->mutex);
+	uart_change_pm(state, UART_PM_STATE_OFF);
 
-	tty_ldisc_flush(tty);
-	tty->closing = 0;
 }
 
 static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
@@ -1711,52 +1686,31 @@
 	struct uart_driver *drv = tty->driver->driver_state;
 	int retval, line = tty->index;
 	struct uart_state *state = drv->state + line;
-	struct tty_port *port = &state->port;
-	struct uart_port *uport;
-
-	pr_debug("uart_open(%d) called\n", line);
-
-	spin_lock_irq(&port->lock);
-	++port->count;
-	spin_unlock_irq(&port->lock);
-
-	/*
-	 * We take the semaphore here to guarantee that we won't be re-entered
-	 * while allocating the state structure, or while we request any IRQs
-	 * that the driver may need.  This also has the nice side-effect that
-	 * it delays the action of uart_hangup, so we can guarantee that
-	 * state->port.tty will always contain something reasonable.
-	 */
-	if (mutex_lock_interruptible(&port->mutex)) {
-		retval = -ERESTARTSYS;
-		goto end;
-	}
-
-	uport = uart_port_check(state);
-	if (!uport || uport->flags & UPF_DEAD) {
-		retval = -ENXIO;
-		goto err_unlock;
-	}
 
 	tty->driver_data = state;
-	uport->state = state;
+
+	retval = tty_port_open(&state->port, tty, filp);
+	if (retval > 0)
+		retval = 0;
+
+	return retval;
+}
+
+static int uart_port_activate(struct tty_port *port, struct tty_struct *tty)
+{
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport;
+
+	uport = uart_port_check(state);
+	if (!uport || uport->flags & UPF_DEAD)
+		return -ENXIO;
+
 	port->low_latency = (uport->flags & UPF_LOW_LATENCY) ? 1 : 0;
-	tty_port_tty_set(port, tty);
 
 	/*
 	 * Start up the serial port.
 	 */
-	retval = uart_startup(tty, state, 0);
-
-	/*
-	 * If we succeeded, wait until the port is ready.
-	 */
-err_unlock:
-	mutex_unlock(&port->mutex);
-	if (retval == 0)
-		retval = tty_port_block_til_ready(port, tty, filp);
-end:
-	return retval;
+	return uart_startup(tty, state, 0);
 }
 
 static const char *uart_type(struct uart_port *port)
@@ -1940,7 +1894,7 @@
  *
  *	Returns 0 on success or -EINVAL on failure
  */
-int uart_parse_earlycon(char *p, unsigned char *iotype, unsigned long *addr,
+int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
 			char **options)
 {
 	if (strncmp(p, "mmio,", 5) == 0) {
@@ -1968,7 +1922,11 @@
 		return -EINVAL;
 	}
 
-	*addr = simple_strtoul(p, NULL, 0);
+	/*
+	 * Before you replace it with kstrtoull(), think about options separator
+	 * (',') it will not tolerate
+	 */
+	*addr = simple_strtoull(p, NULL, 0);
 	p = strchr(p, ',');
 	if (p)
 		p++;
@@ -2470,6 +2428,8 @@
 static const struct tty_port_operations uart_port_ops = {
 	.carrier_raised = uart_carrier_raised,
 	.dtr_rts	= uart_dtr_rts,
+	.activate	= uart_port_activate,
+	.shutdown	= uart_tty_port_shutdown,
 };
 
 /**
@@ -2786,6 +2746,8 @@
 	uport->cons = drv->cons;
 	uport->minor = drv->tty_driver->minor_start + uport->line;
 
+	port->console = uart_console(uport);
+
 	/*
 	 * If this port is a console, then the spinlock is already
 	 * initialised.
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index d86eee3..4b26252 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -2533,7 +2533,7 @@
 	return 0;
 }
 
-static struct uart_ops sci_uart_ops = {
+static const struct uart_ops sci_uart_ops = {
 	.tx_empty	= sci_tx_empty,
 	.set_mctrl	= sci_set_mctrl,
 	.get_mctrl	= sci_get_mctrl,
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index 2d78cb3..379e5bd 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -639,7 +639,7 @@
 
 /*---------------------------------------------------------------------*/
 
-static struct uart_ops asc_uart_ops = {
+static const struct uart_ops asc_uart_ops = {
 	.tx_empty	= asc_tx_empty,
 	.set_mctrl	= asc_set_mctrl,
 	.get_mctrl	= asc_get_mctrl,
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index f89d1f7..0338562 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) Maxime Coquelin 2015
- * Author:  Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * Authors:  Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ *	     Gerald Baeza <gerald.baeza@st.com>
  * License terms:  GNU General Public License (GPL), version 2
  *
  * Inspired by st-asc.c from STMicroelectronics (c)
@@ -10,120 +11,31 @@
 #define SUPPORT_SYSRQ
 #endif
 
-#include <linux/module.h>
-#include <linux/serial.h>
+#include <linux/clk.h>
 #include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
 #include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/pm_runtime.h>
+#include <linux/dma-direction.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/serial_core.h>
-#include <linux/clk.h>
+#include <linux/serial.h>
+#include <linux/spinlock.h>
+#include <linux/sysrq.h>
+#include <linux/tty_flip.h>
+#include <linux/tty.h>
 
-#define DRIVER_NAME "stm32-usart"
-
-/* Register offsets */
-#define USART_SR		0x00
-#define USART_DR		0x04
-#define USART_BRR		0x08
-#define USART_CR1		0x0c
-#define USART_CR2		0x10
-#define USART_CR3		0x14
-#define USART_GTPR		0x18
-
-/* USART_SR */
-#define USART_SR_PE		BIT(0)
-#define USART_SR_FE		BIT(1)
-#define USART_SR_NF		BIT(2)
-#define USART_SR_ORE		BIT(3)
-#define USART_SR_IDLE		BIT(4)
-#define USART_SR_RXNE		BIT(5)
-#define USART_SR_TC		BIT(6)
-#define USART_SR_TXE		BIT(7)
-#define USART_SR_LBD		BIT(8)
-#define USART_SR_CTS		BIT(9)
-#define USART_SR_ERR_MASK	(USART_SR_LBD | USART_SR_ORE | \
-				 USART_SR_FE | USART_SR_PE)
-/* Dummy bits */
-#define USART_SR_DUMMY_RX	BIT(16)
-
-/* USART_DR */
-#define USART_DR_MASK		GENMASK(8, 0)
-
-/* USART_BRR */
-#define USART_BRR_DIV_F_MASK	GENMASK(3, 0)
-#define USART_BRR_DIV_M_MASK	GENMASK(15, 4)
-#define USART_BRR_DIV_M_SHIFT	4
-
-/* USART_CR1 */
-#define USART_CR1_SBK		BIT(0)
-#define USART_CR1_RWU		BIT(1)
-#define USART_CR1_RE		BIT(2)
-#define USART_CR1_TE		BIT(3)
-#define USART_CR1_IDLEIE	BIT(4)
-#define USART_CR1_RXNEIE	BIT(5)
-#define USART_CR1_TCIE		BIT(6)
-#define USART_CR1_TXEIE		BIT(7)
-#define USART_CR1_PEIE		BIT(8)
-#define USART_CR1_PS		BIT(9)
-#define USART_CR1_PCE		BIT(10)
-#define USART_CR1_WAKE		BIT(11)
-#define USART_CR1_M		BIT(12)
-#define USART_CR1_UE		BIT(13)
-#define USART_CR1_OVER8		BIT(15)
-#define USART_CR1_IE_MASK	GENMASK(8, 4)
-
-/* USART_CR2 */
-#define USART_CR2_ADD_MASK	GENMASK(3, 0)
-#define USART_CR2_LBDL		BIT(5)
-#define USART_CR2_LBDIE		BIT(6)
-#define USART_CR2_LBCL		BIT(8)
-#define USART_CR2_CPHA		BIT(9)
-#define USART_CR2_CPOL		BIT(10)
-#define USART_CR2_CLKEN		BIT(11)
-#define USART_CR2_STOP_2B	BIT(13)
-#define USART_CR2_STOP_MASK	GENMASK(13, 12)
-#define USART_CR2_LINEN		BIT(14)
-
-/* USART_CR3 */
-#define USART_CR3_EIE		BIT(0)
-#define USART_CR3_IREN		BIT(1)
-#define USART_CR3_IRLP		BIT(2)
-#define USART_CR3_HDSEL		BIT(3)
-#define USART_CR3_NACK		BIT(4)
-#define USART_CR3_SCEN		BIT(5)
-#define USART_CR3_DMAR		BIT(6)
-#define USART_CR3_DMAT		BIT(7)
-#define USART_CR3_RTSE		BIT(8)
-#define USART_CR3_CTSE		BIT(9)
-#define USART_CR3_CTSIE		BIT(10)
-#define USART_CR3_ONEBIT	BIT(11)
-
-/* USART_GTPR */
-#define USART_GTPR_PSC_MASK	GENMASK(7, 0)
-#define USART_GTPR_GT_MASK	GENMASK(15, 8)
-
-#define DRIVER_NAME "stm32-usart"
-#define STM32_SERIAL_NAME "ttyS"
-#define STM32_MAX_PORTS 6
-
-struct stm32_port {
-	struct uart_port port;
-	struct clk *clk;
-	bool hw_flow_control;
-};
-
-static struct stm32_port stm32_ports[STM32_MAX_PORTS];
-static struct uart_driver stm32_usart_driver;
+#include "stm32-usart.h"
 
 static void stm32_stop_tx(struct uart_port *port);
+static void stm32_transmit_chars(struct uart_port *port);
 
 static inline struct stm32_port *to_stm32_port(struct uart_port *port)
 {
@@ -148,19 +60,64 @@
 	writel_relaxed(val, port->membase + reg);
 }
 
-static void stm32_receive_chars(struct uart_port *port)
+static int stm32_pending_rx(struct uart_port *port, u32 *sr, int *last_res,
+			    bool threaded)
+{
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+	enum dma_status status;
+	struct dma_tx_state state;
+
+	*sr = readl_relaxed(port->membase + ofs->isr);
+
+	if (threaded && stm32_port->rx_ch) {
+		status = dmaengine_tx_status(stm32_port->rx_ch,
+					     stm32_port->rx_ch->cookie,
+					     &state);
+		if ((status == DMA_IN_PROGRESS) &&
+		    (*last_res != state.residue))
+			return 1;
+		else
+			return 0;
+	} else if (*sr & USART_SR_RXNE) {
+		return 1;
+	}
+	return 0;
+}
+
+static unsigned long
+stm32_get_char(struct uart_port *port, u32 *sr, int *last_res)
+{
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+	unsigned long c;
+
+	if (stm32_port->rx_ch) {
+		c = stm32_port->rx_buf[RX_BUF_L - (*last_res)--];
+		if ((*last_res) == 0)
+			*last_res = RX_BUF_L;
+		return c;
+	} else {
+		return readl_relaxed(port->membase + ofs->rdr);
+	}
+}
+
+static void stm32_receive_chars(struct uart_port *port, bool threaded)
 {
 	struct tty_port *tport = &port->state->port;
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
 	unsigned long c;
 	u32 sr;
 	char flag;
+	static int last_res = RX_BUF_L;
 
 	if (port->irq_wake)
 		pm_wakeup_event(tport->tty->dev, 0);
 
-	while ((sr = readl_relaxed(port->membase + USART_SR)) & USART_SR_RXNE) {
+	while (stm32_pending_rx(port, &sr, &last_res, threaded)) {
 		sr |= USART_SR_DUMMY_RX;
-		c = readl_relaxed(port->membase + USART_DR);
+		c = stm32_get_char(port, &sr, &last_res);
 		flag = TTY_NORMAL;
 		port->icount.rx++;
 
@@ -170,6 +127,10 @@
 				if (uart_handle_break(port))
 					continue;
 			} else if (sr & USART_SR_ORE) {
+				if (ofs->icr != UNDEF_REG)
+					writel_relaxed(USART_ICR_ORECF,
+						       port->membase +
+						       ofs->icr);
 				port->icount.overrun++;
 			} else if (sr & USART_SR_PE) {
 				port->icount.parity++;
@@ -197,14 +158,138 @@
 	spin_lock(&port->lock);
 }
 
+static void stm32_tx_dma_complete(void *arg)
+{
+	struct uart_port *port = arg;
+	struct stm32_port *stm32port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+	unsigned int isr;
+	int ret;
+
+	ret = readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
+						isr,
+						(isr & USART_SR_TC),
+						10, 100000);
+
+	if (ret)
+		dev_err(port->dev, "terminal count not set\n");
+
+	if (ofs->icr == UNDEF_REG)
+		stm32_clr_bits(port, ofs->isr, USART_SR_TC);
+	else
+		stm32_set_bits(port, ofs->icr, USART_CR_TC);
+
+	stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+	stm32port->tx_dma_busy = false;
+
+	/* Let's see if we have pending data to send */
+	stm32_transmit_chars(port);
+}
+
+static void stm32_transmit_chars_pio(struct uart_port *port)
+{
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+	struct circ_buf *xmit = &port->state->xmit;
+	unsigned int isr;
+	int ret;
+
+	if (stm32_port->tx_dma_busy) {
+		stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+		stm32_port->tx_dma_busy = false;
+	}
+
+	ret = readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr,
+						isr,
+						(isr & USART_SR_TXE),
+						10, 100);
+
+	if (ret)
+		dev_err(port->dev, "tx empty not set\n");
+
+	stm32_set_bits(port, ofs->cr1, USART_CR1_TXEIE);
+
+	writel_relaxed(xmit->buf[xmit->tail], port->membase + ofs->tdr);
+	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+	port->icount.tx++;
+}
+
+static void stm32_transmit_chars_dma(struct uart_port *port)
+{
+	struct stm32_port *stm32port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+	struct circ_buf *xmit = &port->state->xmit;
+	struct dma_async_tx_descriptor *desc = NULL;
+	dma_cookie_t cookie;
+	unsigned int count, i;
+
+	if (stm32port->tx_dma_busy)
+		return;
+
+	stm32port->tx_dma_busy = true;
+
+	count = uart_circ_chars_pending(xmit);
+
+	if (count > TX_BUF_L)
+		count = TX_BUF_L;
+
+	if (xmit->tail < xmit->head) {
+		memcpy(&stm32port->tx_buf[0], &xmit->buf[xmit->tail], count);
+	} else {
+		size_t one = UART_XMIT_SIZE - xmit->tail;
+		size_t two;
+
+		if (one > count)
+			one = count;
+		two = count - one;
+
+		memcpy(&stm32port->tx_buf[0], &xmit->buf[xmit->tail], one);
+		if (two)
+			memcpy(&stm32port->tx_buf[one], &xmit->buf[0], two);
+	}
+
+	desc = dmaengine_prep_slave_single(stm32port->tx_ch,
+					   stm32port->tx_dma_buf,
+					   count,
+					   DMA_MEM_TO_DEV,
+					   DMA_PREP_INTERRUPT);
+
+	if (!desc) {
+		for (i = count; i > 0; i--)
+			stm32_transmit_chars_pio(port);
+		return;
+	}
+
+	desc->callback = stm32_tx_dma_complete;
+	desc->callback_param = port;
+
+	/* Push current DMA TX transaction in the pending queue */
+	cookie = dmaengine_submit(desc);
+
+	/* Issue pending DMA TX requests */
+	dma_async_issue_pending(stm32port->tx_ch);
+
+	stm32_clr_bits(port, ofs->isr, USART_SR_TC);
+	stm32_set_bits(port, ofs->cr3, USART_CR3_DMAT);
+
+	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
+	port->icount.tx += count;
+}
+
 static void stm32_transmit_chars(struct uart_port *port)
 {
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
 	struct circ_buf *xmit = &port->state->xmit;
 
 	if (port->x_char) {
-		writel_relaxed(port->x_char, port->membase + USART_DR);
+		if (stm32_port->tx_dma_busy)
+			stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+		writel_relaxed(port->x_char, port->membase + ofs->tdr);
 		port->x_char = 0;
 		port->icount.tx++;
+		if (stm32_port->tx_dma_busy)
+			stm32_set_bits(port, ofs->cr3, USART_CR3_DMAT);
 		return;
 	}
 
@@ -218,9 +303,10 @@
 		return;
 	}
 
-	writel_relaxed(xmit->buf[xmit->tail], port->membase + USART_DR);
-	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-	port->icount.tx++;
+	if (stm32_port->tx_ch)
+		stm32_transmit_chars_dma(port);
+	else
+		stm32_transmit_chars_pio(port);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(port);
@@ -232,34 +318,60 @@
 static irqreturn_t stm32_interrupt(int irq, void *ptr)
 {
 	struct uart_port *port = ptr;
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
 	u32 sr;
 
 	spin_lock(&port->lock);
 
-	sr = readl_relaxed(port->membase + USART_SR);
+	sr = readl_relaxed(port->membase + ofs->isr);
 
-	if (sr & USART_SR_RXNE)
-		stm32_receive_chars(port);
+	if ((sr & USART_SR_RXNE) && !(stm32_port->rx_ch))
+		stm32_receive_chars(port, false);
 
-	if (sr & USART_SR_TXE)
+	if ((sr & USART_SR_TXE) && !(stm32_port->tx_ch))
 		stm32_transmit_chars(port);
 
 	spin_unlock(&port->lock);
 
+	if (stm32_port->rx_ch)
+		return IRQ_WAKE_THREAD;
+	else
+		return IRQ_HANDLED;
+}
+
+static irqreturn_t stm32_threaded_interrupt(int irq, void *ptr)
+{
+	struct uart_port *port = ptr;
+	struct stm32_port *stm32_port = to_stm32_port(port);
+
+	spin_lock(&port->lock);
+
+	if (stm32_port->rx_ch)
+		stm32_receive_chars(port, true);
+
+	spin_unlock(&port->lock);
+
 	return IRQ_HANDLED;
 }
 
 static unsigned int stm32_tx_empty(struct uart_port *port)
 {
-	return readl_relaxed(port->membase + USART_SR) & USART_SR_TXE;
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+	return readl_relaxed(port->membase + ofs->isr) & USART_SR_TXE;
 }
 
 static void stm32_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
 	if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
-		stm32_set_bits(port, USART_CR3, USART_CR3_RTSE);
+		stm32_set_bits(port, ofs->cr3, USART_CR3_RTSE);
 	else
-		stm32_clr_bits(port, USART_CR3, USART_CR3_RTSE);
+		stm32_clr_bits(port, ofs->cr3, USART_CR3_RTSE);
 }
 
 static unsigned int stm32_get_mctrl(struct uart_port *port)
@@ -271,7 +383,10 @@
 /* Transmit stop */
 static void stm32_stop_tx(struct uart_port *port)
 {
-	stm32_clr_bits(port, USART_CR1, USART_CR1_TXEIE);
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+	stm32_clr_bits(port, ofs->cr1, USART_CR1_TXEIE);
 }
 
 /* There are probably characters waiting to be transmitted. */
@@ -282,33 +397,40 @@
 	if (uart_circ_empty(xmit))
 		return;
 
-	stm32_set_bits(port, USART_CR1, USART_CR1_TXEIE | USART_CR1_TE);
+	stm32_transmit_chars(port);
 }
 
 /* Throttle the remote when input buffer is about to overflow. */
 static void stm32_throttle(struct uart_port *port)
 {
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
-	stm32_clr_bits(port, USART_CR1, USART_CR1_RXNEIE);
+	stm32_clr_bits(port, ofs->cr1, USART_CR1_RXNEIE);
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 
 /* Unthrottle the remote, the input buffer can now accept data. */
 static void stm32_unthrottle(struct uart_port *port)
 {
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
-	stm32_set_bits(port, USART_CR1, USART_CR1_RXNEIE);
+	stm32_set_bits(port, ofs->cr1, USART_CR1_RXNEIE);
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 
 /* Receive stop */
 static void stm32_stop_rx(struct uart_port *port)
 {
-	stm32_clr_bits(port, USART_CR1, USART_CR1_RXNEIE);
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+	stm32_clr_bits(port, ofs->cr1, USART_CR1_RXNEIE);
 }
 
 /* Handle breaks - ignored by us */
@@ -318,26 +440,34 @@
 
 static int stm32_startup(struct uart_port *port)
 {
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
 	const char *name = to_platform_device(port->dev)->name;
 	u32 val;
 	int ret;
 
-	ret = request_irq(port->irq, stm32_interrupt, 0, name, port);
+	ret = request_threaded_irq(port->irq, stm32_interrupt,
+				   stm32_threaded_interrupt,
+				   IRQF_NO_SUSPEND, name, port);
 	if (ret)
 		return ret;
 
 	val = USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE;
-	stm32_set_bits(port, USART_CR1, val);
+	stm32_set_bits(port, ofs->cr1, val);
 
 	return 0;
 }
 
 static void stm32_shutdown(struct uart_port *port)
 {
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+	struct stm32_usart_config *cfg = &stm32_port->info->cfg;
 	u32 val;
 
 	val = USART_CR1_TXEIE | USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE;
-	stm32_set_bits(port, USART_CR1, val);
+	val |= BIT(cfg->uart_enable_bit);
+	stm32_clr_bits(port, ofs->cr1, val);
 
 	free_irq(port->irq, port);
 }
@@ -346,6 +476,8 @@
 			    struct ktermios *old)
 {
 	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+	struct stm32_usart_config *cfg = &stm32_port->info->cfg;
 	unsigned int baud;
 	u32 usartdiv, mantissa, fraction, oversampling;
 	tcflag_t cflag = termios->c_cflag;
@@ -360,9 +492,10 @@
 	spin_lock_irqsave(&port->lock, flags);
 
 	/* Stop serial port and reset value */
-	writel_relaxed(0, port->membase + USART_CR1);
+	writel_relaxed(0, port->membase + ofs->cr1);
 
-	cr1 = USART_CR1_TE | USART_CR1_RE | USART_CR1_UE | USART_CR1_RXNEIE;
+	cr1 = USART_CR1_TE | USART_CR1_RE | USART_CR1_RXNEIE;
+	cr1 |= BIT(cfg->uart_enable_bit);
 	cr2 = 0;
 	cr3 = 0;
 
@@ -371,8 +504,12 @@
 
 	if (cflag & PARENB) {
 		cr1 |= USART_CR1_PCE;
-		if ((cflag & CSIZE) == CS8)
-			cr1 |= USART_CR1_M;
+		if ((cflag & CSIZE) == CS8) {
+			if (cfg->has_7bits_data)
+				cr1 |= USART_CR1_M0;
+			else
+				cr1 |= USART_CR1_M;
+		}
 	}
 
 	if (cflag & PARODD)
@@ -394,15 +531,15 @@
 	 */
 	if (usartdiv < 16) {
 		oversampling = 8;
-		stm32_set_bits(port, USART_CR1, USART_CR1_OVER8);
+		stm32_set_bits(port, ofs->cr1, USART_CR1_OVER8);
 	} else {
 		oversampling = 16;
-		stm32_clr_bits(port, USART_CR1, USART_CR1_OVER8);
+		stm32_clr_bits(port, ofs->cr1, USART_CR1_OVER8);
 	}
 
 	mantissa = (usartdiv / oversampling) << USART_BRR_DIV_M_SHIFT;
 	fraction = usartdiv % oversampling;
-	writel_relaxed(mantissa | fraction, port->membase + USART_BRR);
+	writel_relaxed(mantissa | fraction, port->membase + ofs->brr);
 
 	uart_update_timeout(port, cflag, baud);
 
@@ -430,9 +567,12 @@
 	if ((termios->c_cflag & CREAD) == 0)
 		port->ignore_status_mask |= USART_SR_DUMMY_RX;
 
-	writel_relaxed(cr3, port->membase + USART_CR3);
-	writel_relaxed(cr2, port->membase + USART_CR2);
-	writel_relaxed(cr1, port->membase + USART_CR1);
+	if (stm32_port->rx_ch)
+		cr3 |= USART_CR3_DMAR;
+
+	writel_relaxed(cr3, port->membase + ofs->cr3);
+	writel_relaxed(cr2, port->membase + ofs->cr2);
+	writel_relaxed(cr1, port->membase + ofs->cr1);
 
 	spin_unlock_irqrestore(&port->lock, flags);
 }
@@ -469,6 +609,8 @@
 {
 	struct stm32_port *stm32port = container_of(port,
 			struct stm32_port, port);
+	struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+	struct stm32_usart_config *cfg = &stm32port->info->cfg;
 	unsigned long flags = 0;
 
 	switch (state) {
@@ -477,7 +619,7 @@
 		break;
 	case UART_PM_STATE_OFF:
 		spin_lock_irqsave(&port->lock, flags);
-		stm32_clr_bits(port, USART_CR1, USART_CR1_UE);
+		stm32_clr_bits(port, ofs->cr1, BIT(cfg->uart_enable_bit));
 		spin_unlock_irqrestore(&port->lock, flags);
 		clk_disable_unprepare(stm32port->clk);
 		break;
@@ -539,8 +681,6 @@
 	if (!stm32port->port.uartclk)
 		ret = -EINVAL;
 
-	clk_disable_unprepare(stm32port->clk);
-
 	return ret;
 }
 
@@ -560,30 +700,162 @@
 		return NULL;
 
 	stm32_ports[id].hw_flow_control = of_property_read_bool(np,
-							"auto-flow-control");
+							"st,hw-flow-ctrl");
 	stm32_ports[id].port.line = id;
 	return &stm32_ports[id];
 }
 
 #ifdef CONFIG_OF
 static const struct of_device_id stm32_match[] = {
-	{ .compatible = "st,stm32-usart", },
-	{ .compatible = "st,stm32-uart", },
+	{ .compatible = "st,stm32-usart", .data = &stm32f4_info},
+	{ .compatible = "st,stm32-uart", .data = &stm32f4_info},
+	{ .compatible = "st,stm32f7-usart", .data = &stm32f7_info},
+	{ .compatible = "st,stm32f7-uart", .data = &stm32f7_info},
 	{},
 };
 
 MODULE_DEVICE_TABLE(of, stm32_match);
 #endif
 
+static int stm32_of_dma_rx_probe(struct stm32_port *stm32port,
+				 struct platform_device *pdev)
+{
+	struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+	struct uart_port *port = &stm32port->port;
+	struct device *dev = &pdev->dev;
+	struct dma_slave_config config;
+	struct dma_async_tx_descriptor *desc = NULL;
+	dma_cookie_t cookie;
+	int ret;
+
+	/* Request DMA RX channel */
+	stm32port->rx_ch = dma_request_slave_channel(dev, "rx");
+	if (!stm32port->rx_ch) {
+		dev_info(dev, "rx dma alloc failed\n");
+		return -ENODEV;
+	}
+	stm32port->rx_buf = dma_alloc_coherent(&pdev->dev, RX_BUF_L,
+						 &stm32port->rx_dma_buf,
+						 GFP_KERNEL);
+	if (!stm32port->rx_buf) {
+		ret = -ENOMEM;
+		goto alloc_err;
+	}
+
+	/* Configure DMA channel */
+	memset(&config, 0, sizeof(config));
+	config.src_addr = port->mapbase + ofs->rdr;
+	config.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+
+	ret = dmaengine_slave_config(stm32port->rx_ch, &config);
+	if (ret < 0) {
+		dev_err(dev, "rx dma channel config failed\n");
+		ret = -ENODEV;
+		goto config_err;
+	}
+
+	/* Prepare a DMA cyclic transaction */
+	desc = dmaengine_prep_dma_cyclic(stm32port->rx_ch,
+					 stm32port->rx_dma_buf,
+					 RX_BUF_L, RX_BUF_P, DMA_DEV_TO_MEM,
+					 DMA_PREP_INTERRUPT);
+	if (!desc) {
+		dev_err(dev, "rx dma prep cyclic failed\n");
+		ret = -ENODEV;
+		goto config_err;
+	}
+
+	/* No callback as dma buffer is drained on usart interrupt */
+	desc->callback = NULL;
+	desc->callback_param = NULL;
+
+	/* Push current DMA transaction in the pending queue */
+	cookie = dmaengine_submit(desc);
+
+	/* Issue pending DMA requests */
+	dma_async_issue_pending(stm32port->rx_ch);
+
+	return 0;
+
+config_err:
+	dma_free_coherent(&pdev->dev,
+			  RX_BUF_L, stm32port->rx_buf,
+			  stm32port->rx_dma_buf);
+
+alloc_err:
+	dma_release_channel(stm32port->rx_ch);
+	stm32port->rx_ch = NULL;
+
+	return ret;
+}
+
+static int stm32_of_dma_tx_probe(struct stm32_port *stm32port,
+				 struct platform_device *pdev)
+{
+	struct stm32_usart_offsets *ofs = &stm32port->info->ofs;
+	struct uart_port *port = &stm32port->port;
+	struct device *dev = &pdev->dev;
+	struct dma_slave_config config;
+	int ret;
+
+	stm32port->tx_dma_busy = false;
+
+	/* Request DMA TX channel */
+	stm32port->tx_ch = dma_request_slave_channel(dev, "tx");
+	if (!stm32port->tx_ch) {
+		dev_info(dev, "tx dma alloc failed\n");
+		return -ENODEV;
+	}
+	stm32port->tx_buf = dma_alloc_coherent(&pdev->dev, TX_BUF_L,
+						 &stm32port->tx_dma_buf,
+						 GFP_KERNEL);
+	if (!stm32port->tx_buf) {
+		ret = -ENOMEM;
+		goto alloc_err;
+	}
+
+	/* Configure DMA channel */
+	memset(&config, 0, sizeof(config));
+	config.dst_addr = port->mapbase + ofs->tdr;
+	config.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+
+	ret = dmaengine_slave_config(stm32port->tx_ch, &config);
+	if (ret < 0) {
+		dev_err(dev, "tx dma channel config failed\n");
+		ret = -ENODEV;
+		goto config_err;
+	}
+
+	return 0;
+
+config_err:
+	dma_free_coherent(&pdev->dev,
+			  TX_BUF_L, stm32port->tx_buf,
+			  stm32port->tx_dma_buf);
+
+alloc_err:
+	dma_release_channel(stm32port->tx_ch);
+	stm32port->tx_ch = NULL;
+
+	return ret;
+}
+
 static int stm32_serial_probe(struct platform_device *pdev)
 {
-	int ret;
+	const struct of_device_id *match;
 	struct stm32_port *stm32port;
+	int ret;
 
 	stm32port = stm32_of_get_stm32_port(pdev);
 	if (!stm32port)
 		return -ENODEV;
 
+	match = of_match_device(stm32_match, &pdev->dev);
+	if (match && match->data)
+		stm32port->info = (struct stm32_usart_info *)match->data;
+	else
+		return -EINVAL;
+
 	ret = stm32_init_port(stm32port, pdev);
 	if (ret)
 		return ret;
@@ -592,6 +864,14 @@
 	if (ret)
 		return ret;
 
+	ret = stm32_of_dma_rx_probe(stm32port, pdev);
+	if (ret)
+		dev_info(&pdev->dev, "interrupt mode used for rx (no dma)\n");
+
+	ret = stm32_of_dma_tx_probe(stm32port, pdev);
+	if (ret)
+		dev_info(&pdev->dev, "interrupt mode used for tx (no dma)\n");
+
 	platform_set_drvdata(pdev, &stm32port->port);
 
 	return 0;
@@ -600,6 +880,30 @@
 static int stm32_serial_remove(struct platform_device *pdev)
 {
 	struct uart_port *port = platform_get_drvdata(pdev);
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+	stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAR);
+
+	if (stm32_port->rx_ch)
+		dma_release_channel(stm32_port->rx_ch);
+
+	if (stm32_port->rx_dma_buf)
+		dma_free_coherent(&pdev->dev,
+				  RX_BUF_L, stm32_port->rx_buf,
+				  stm32_port->rx_dma_buf);
+
+	stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT);
+
+	if (stm32_port->tx_ch)
+		dma_release_channel(stm32_port->tx_ch);
+
+	if (stm32_port->tx_dma_buf)
+		dma_free_coherent(&pdev->dev,
+				  TX_BUF_L, stm32_port->tx_buf,
+				  stm32_port->tx_dma_buf);
+
+	clk_disable_unprepare(stm32_port->clk);
 
 	return uart_remove_one_port(&stm32_usart_driver, port);
 }
@@ -608,15 +912,21 @@
 #ifdef CONFIG_SERIAL_STM32_CONSOLE
 static void stm32_console_putchar(struct uart_port *port, int ch)
 {
-	while (!(readl_relaxed(port->membase + USART_SR) & USART_SR_TXE))
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+
+	while (!(readl_relaxed(port->membase + ofs->isr) & USART_SR_TXE))
 		cpu_relax();
 
-	writel_relaxed(ch, port->membase + USART_DR);
+	writel_relaxed(ch, port->membase + ofs->tdr);
 }
 
 static void stm32_console_write(struct console *co, const char *s, unsigned cnt)
 {
 	struct uart_port *port = &stm32_ports[co->index].port;
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	struct stm32_usart_offsets *ofs = &stm32_port->info->ofs;
+	struct stm32_usart_config *cfg = &stm32_port->info->cfg;
 	unsigned long flags;
 	u32 old_cr1, new_cr1;
 	int locked = 1;
@@ -629,15 +939,16 @@
 	else
 		spin_lock(&port->lock);
 
-	/* Save and disable interrupts */
-	old_cr1 = readl_relaxed(port->membase + USART_CR1);
+	/* Save and disable interrupts, enable the transmitter */
+	old_cr1 = readl_relaxed(port->membase + ofs->cr1);
 	new_cr1 = old_cr1 & ~USART_CR1_IE_MASK;
-	writel_relaxed(new_cr1, port->membase + USART_CR1);
+	new_cr1 |=  USART_CR1_TE | BIT(cfg->uart_enable_bit);
+	writel_relaxed(new_cr1, port->membase + ofs->cr1);
 
 	uart_console_write(port, s, cnt, stm32_console_putchar);
 
 	/* Restore interrupt state */
-	writel_relaxed(old_cr1, port->membase + USART_CR1);
+	writel_relaxed(old_cr1, port->membase + ofs->cr1);
 
 	if (locked)
 		spin_unlock(&port->lock);
diff --git a/drivers/tty/serial/stm32-usart.h b/drivers/tty/serial/stm32-usart.h
new file mode 100644
index 0000000..41d9749
--- /dev/null
+++ b/drivers/tty/serial/stm32-usart.h
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) Maxime Coquelin 2015
+ * Authors:  Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ *	     Gerald Baeza <gerald_baeza@yahoo.fr>
+ * License terms:  GNU General Public License (GPL), version 2
+ */
+
+#define DRIVER_NAME "stm32-usart"
+
+struct stm32_usart_offsets {
+	u8 cr1;
+	u8 cr2;
+	u8 cr3;
+	u8 brr;
+	u8 gtpr;
+	u8 rtor;
+	u8 rqr;
+	u8 isr;
+	u8 icr;
+	u8 rdr;
+	u8 tdr;
+};
+
+struct stm32_usart_config {
+	u8 uart_enable_bit; /* USART_CR1_UE */
+	bool has_7bits_data;
+};
+
+struct stm32_usart_info {
+	struct stm32_usart_offsets ofs;
+	struct stm32_usart_config cfg;
+};
+
+#define UNDEF_REG ~0
+
+/* Register offsets */
+struct stm32_usart_info stm32f4_info = {
+	.ofs = {
+		.isr	= 0x00,
+		.rdr	= 0x04,
+		.tdr	= 0x04,
+		.brr	= 0x08,
+		.cr1	= 0x0c,
+		.cr2	= 0x10,
+		.cr3	= 0x14,
+		.gtpr	= 0x18,
+		.rtor	= UNDEF_REG,
+		.rqr	= UNDEF_REG,
+		.icr	= UNDEF_REG,
+	},
+	.cfg = {
+		.uart_enable_bit = 13,
+		.has_7bits_data = false,
+	}
+};
+
+struct stm32_usart_info stm32f7_info = {
+	.ofs = {
+		.cr1	= 0x00,
+		.cr2	= 0x04,
+		.cr3	= 0x08,
+		.brr	= 0x0c,
+		.gtpr	= 0x10,
+		.rtor	= 0x14,
+		.rqr	= 0x18,
+		.isr	= 0x1c,
+		.icr	= 0x20,
+		.rdr	= 0x24,
+		.tdr	= 0x28,
+	},
+	.cfg = {
+		.uart_enable_bit = 0,
+		.has_7bits_data = true,
+	}
+};
+
+/* USART_SR (F4) / USART_ISR (F7) */
+#define USART_SR_PE		BIT(0)
+#define USART_SR_FE		BIT(1)
+#define USART_SR_NF		BIT(2)
+#define USART_SR_ORE		BIT(3)
+#define USART_SR_IDLE		BIT(4)
+#define USART_SR_RXNE		BIT(5)
+#define USART_SR_TC		BIT(6)
+#define USART_SR_TXE		BIT(7)
+#define USART_SR_LBD		BIT(8)
+#define USART_SR_CTSIF		BIT(9)
+#define USART_SR_CTS		BIT(10)		/* F7 */
+#define USART_SR_RTOF		BIT(11)		/* F7 */
+#define USART_SR_EOBF		BIT(12)		/* F7 */
+#define USART_SR_ABRE		BIT(14)		/* F7 */
+#define USART_SR_ABRF		BIT(15)		/* F7 */
+#define USART_SR_BUSY		BIT(16)		/* F7 */
+#define USART_SR_CMF		BIT(17)		/* F7 */
+#define USART_SR_SBKF		BIT(18)		/* F7 */
+#define USART_SR_TEACK		BIT(21)		/* F7 */
+#define USART_SR_ERR_MASK	(USART_SR_LBD | USART_SR_ORE | \
+				 USART_SR_FE | USART_SR_PE)
+/* Dummy bits */
+#define USART_SR_DUMMY_RX	BIT(16)
+
+/* USART_ICR (F7) */
+#define USART_CR_TC		BIT(6)
+
+/* USART_DR */
+#define USART_DR_MASK		GENMASK(8, 0)
+
+/* USART_BRR */
+#define USART_BRR_DIV_F_MASK	GENMASK(3, 0)
+#define USART_BRR_DIV_M_MASK	GENMASK(15, 4)
+#define USART_BRR_DIV_M_SHIFT	4
+
+/* USART_CR1 */
+#define USART_CR1_SBK		BIT(0)
+#define USART_CR1_RWU		BIT(1)		/* F4 */
+#define USART_CR1_RE		BIT(2)
+#define USART_CR1_TE		BIT(3)
+#define USART_CR1_IDLEIE	BIT(4)
+#define USART_CR1_RXNEIE	BIT(5)
+#define USART_CR1_TCIE		BIT(6)
+#define USART_CR1_TXEIE		BIT(7)
+#define USART_CR1_PEIE		BIT(8)
+#define USART_CR1_PS		BIT(9)
+#define USART_CR1_PCE		BIT(10)
+#define USART_CR1_WAKE		BIT(11)
+#define USART_CR1_M		BIT(12)
+#define USART_CR1_M0		BIT(12)		/* F7 */
+#define USART_CR1_MME		BIT(13)		/* F7 */
+#define USART_CR1_CMIE		BIT(14)		/* F7 */
+#define USART_CR1_OVER8		BIT(15)
+#define USART_CR1_DEDT_MASK	GENMASK(20, 16)	/* F7 */
+#define USART_CR1_DEAT_MASK	GENMASK(25, 21)	/* F7 */
+#define USART_CR1_RTOIE		BIT(26)		/* F7 */
+#define USART_CR1_EOBIE		BIT(27)		/* F7 */
+#define USART_CR1_M1		BIT(28)		/* F7 */
+#define USART_CR1_IE_MASK	(GENMASK(8, 4) | BIT(14) | BIT(26) | BIT(27))
+
+/* USART_CR2 */
+#define USART_CR2_ADD_MASK	GENMASK(3, 0)	/* F4 */
+#define USART_CR2_ADDM7		BIT(4)		/* F7 */
+#define USART_CR2_LBDL		BIT(5)
+#define USART_CR2_LBDIE		BIT(6)
+#define USART_CR2_LBCL		BIT(8)
+#define USART_CR2_CPHA		BIT(9)
+#define USART_CR2_CPOL		BIT(10)
+#define USART_CR2_CLKEN		BIT(11)
+#define USART_CR2_STOP_2B	BIT(13)
+#define USART_CR2_STOP_MASK	GENMASK(13, 12)
+#define USART_CR2_LINEN		BIT(14)
+#define USART_CR2_SWAP		BIT(15)		/* F7 */
+#define USART_CR2_RXINV		BIT(16)		/* F7 */
+#define USART_CR2_TXINV		BIT(17)		/* F7 */
+#define USART_CR2_DATAINV	BIT(18)		/* F7 */
+#define USART_CR2_MSBFIRST	BIT(19)		/* F7 */
+#define USART_CR2_ABREN		BIT(20)		/* F7 */
+#define USART_CR2_ABRMOD_MASK	GENMASK(22, 21)	/* F7 */
+#define USART_CR2_RTOEN		BIT(23)		/* F7 */
+#define USART_CR2_ADD_F7_MASK	GENMASK(31, 24)	/* F7 */
+
+/* USART_CR3 */
+#define USART_CR3_EIE		BIT(0)
+#define USART_CR3_IREN		BIT(1)
+#define USART_CR3_IRLP		BIT(2)
+#define USART_CR3_HDSEL		BIT(3)
+#define USART_CR3_NACK		BIT(4)
+#define USART_CR3_SCEN		BIT(5)
+#define USART_CR3_DMAR		BIT(6)
+#define USART_CR3_DMAT		BIT(7)
+#define USART_CR3_RTSE		BIT(8)
+#define USART_CR3_CTSE		BIT(9)
+#define USART_CR3_CTSIE		BIT(10)
+#define USART_CR3_ONEBIT	BIT(11)
+#define USART_CR3_OVRDIS	BIT(12)		/* F7 */
+#define USART_CR3_DDRE		BIT(13)		/* F7 */
+#define USART_CR3_DEM		BIT(14)		/* F7 */
+#define USART_CR3_DEP		BIT(15)		/* F7 */
+#define USART_CR3_SCARCNT_MASK	GENMASK(19, 17)	/* F7 */
+
+/* USART_GTPR */
+#define USART_GTPR_PSC_MASK	GENMASK(7, 0)
+#define USART_GTPR_GT_MASK	GENMASK(15, 8)
+
+/* USART_RTOR */
+#define USART_RTOR_RTO_MASK	GENMASK(23, 0)	/* F7 */
+#define USART_RTOR_BLEN_MASK	GENMASK(31, 24)	/* F7 */
+
+/* USART_RQR */
+#define USART_RQR_ABRRQ		BIT(0)		/* F7 */
+#define USART_RQR_SBKRQ		BIT(1)		/* F7 */
+#define USART_RQR_MMRQ		BIT(2)		/* F7 */
+#define USART_RQR_RXFRQ		BIT(3)		/* F7 */
+#define USART_RQR_TXFRQ		BIT(4)		/* F7 */
+
+/* USART_ICR */
+#define USART_ICR_PECF		BIT(0)		/* F7 */
+#define USART_ICR_FFECF		BIT(1)		/* F7 */
+#define USART_ICR_NCF		BIT(2)		/* F7 */
+#define USART_ICR_ORECF		BIT(3)		/* F7 */
+#define USART_ICR_IDLECF	BIT(4)		/* F7 */
+#define USART_ICR_TCCF		BIT(6)		/* F7 */
+#define USART_ICR_LBDCF		BIT(8)		/* F7 */
+#define USART_ICR_CTSCF		BIT(9)		/* F7 */
+#define USART_ICR_RTOCF		BIT(11)		/* F7 */
+#define USART_ICR_EOBCF		BIT(12)		/* F7 */
+#define USART_ICR_CMCF		BIT(17)		/* F7 */
+
+#define STM32_SERIAL_NAME "ttyS"
+#define STM32_MAX_PORTS 6
+
+#define RX_BUF_L 200		 /* dma rx buffer length     */
+#define RX_BUF_P RX_BUF_L	 /* dma rx buffer period     */
+#define TX_BUF_L 200		 /* dma tx buffer length     */
+
+struct stm32_port {
+	struct uart_port port;
+	struct clk *clk;
+	struct stm32_usart_info *info;
+	struct dma_chan *rx_ch;  /* dma rx channel            */
+	dma_addr_t rx_dma_buf;   /* dma rx buffer bus address */
+	unsigned char *rx_buf;   /* dma rx buffer cpu address */
+	struct dma_chan *tx_ch;  /* dma tx channel            */
+	dma_addr_t tx_dma_buf;   /* dma tx buffer bus address */
+	unsigned char *tx_buf;   /* dma tx buffer cpu address */
+	bool tx_dma_busy;	 /* dma tx busy               */
+	bool hw_flow_control;
+};
+
+static struct stm32_port stm32_ports[STM32_MAX_PORTS];
+static struct uart_driver stm32_usart_driver;
diff --git a/drivers/tty/serial/timbuart.c b/drivers/tty/serial/timbuart.c
index 512c162..5da7fe4 100644
--- a/drivers/tty/serial/timbuart.c
+++ b/drivers/tty/serial/timbuart.c
@@ -394,7 +394,7 @@
 	return -EINVAL;
 }
 
-static struct uart_ops timbuart_ops = {
+static const struct uart_ops timbuart_ops = {
 	.tx_empty = timbuart_tx_empty,
 	.set_mctrl = timbuart_set_mctrl,
 	.get_mctrl = timbuart_get_mctrl,
diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index 05089b6..817bb0d 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -387,7 +387,7 @@
 }
 #endif
 
-static struct uart_ops ulite_ops = {
+static const struct uart_ops ulite_ops = {
 	.tx_empty	= ulite_tx_empty,
 	.set_mctrl	= ulite_set_mctrl,
 	.get_mctrl	= ulite_get_mctrl,
diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c
index 23cfc5e..6b85adc 100644
--- a/drivers/tty/serial/vt8500_serial.c
+++ b/drivers/tty/serial/vt8500_serial.c
@@ -118,7 +118,7 @@
  * have been allocated as we can't use pdev->id in
  * devicetree
  */
-static unsigned long vt8500_ports_in_use;
+static DECLARE_BITMAP(vt8500_ports_in_use, VT8500_MAX_PORTS);
 
 static inline void vt8500_write(struct uart_port *port, unsigned int val,
 			     unsigned int off)
@@ -663,15 +663,15 @@
 
 	if (port < 0) {
 		/* calculate the port id */
-		port = find_first_zero_bit(&vt8500_ports_in_use,
-					sizeof(vt8500_ports_in_use));
+		port = find_first_zero_bit(vt8500_ports_in_use,
+					   VT8500_MAX_PORTS);
 	}
 
 	if (port >= VT8500_MAX_PORTS)
 		return -ENODEV;
 
 	/* reserve the port id */
-	if (test_and_set_bit(port, &vt8500_ports_in_use)) {
+	if (test_and_set_bit(port, vt8500_ports_in_use)) {
 		/* port already in use - shouldn't really happen */
 		return -EBUSY;
 	}
diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index 9ca1a4d..f37edaa 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -57,7 +57,7 @@
 #define CDNS_UART_IMR		0x10  /* Interrupt Mask */
 #define CDNS_UART_ISR		0x14  /* Interrupt Status */
 #define CDNS_UART_BAUDGEN	0x18  /* Baud Rate Generator */
-#define CDNS_UART_RXTOUT		0x1C  /* RX Timeout */
+#define CDNS_UART_RXTOUT	0x1C  /* RX Timeout */
 #define CDNS_UART_RXWM		0x20  /* RX FIFO Trigger Level */
 #define CDNS_UART_MODEMCR	0x24  /* Modem Control */
 #define CDNS_UART_MODEMSR	0x28  /* Modem Status */
@@ -68,6 +68,7 @@
 #define CDNS_UART_IRRX_PWIDTH	0x3C  /* IR Min Received Pulse Width */
 #define CDNS_UART_IRTX_PWIDTH	0x40  /* IR Transmitted pulse Width */
 #define CDNS_UART_TXWM		0x44  /* TX FIFO Trigger Level */
+#define CDNS_UART_RXBS		0x48  /* RX FIFO byte status register */
 
 /* Control Register Bit Definitions */
 #define CDNS_UART_CR_STOPBRK	0x00000100  /* Stop TX break */
@@ -79,6 +80,9 @@
 #define CDNS_UART_CR_TXRST	0x00000002  /* TX logic reset */
 #define CDNS_UART_CR_RXRST	0x00000001  /* RX logic reset */
 #define CDNS_UART_CR_RST_TO	0x00000040  /* Restart Timeout Counter */
+#define CDNS_UART_RXBS_PARITY    0x00000001 /* Parity error status */
+#define CDNS_UART_RXBS_FRAMING   0x00000002 /* Framing error status */
+#define CDNS_UART_RXBS_BRK       0x00000004 /* Overrun error status */
 
 /*
  * Mode Register:
@@ -126,13 +130,27 @@
 #define CDNS_UART_IXR_RXEMPTY	0x00000002 /* RX FIFO empty interrupt. */
 #define CDNS_UART_IXR_MASK	0x00001FFF /* Valid bit mask */
 
-#define CDNS_UART_RX_IRQS	(CDNS_UART_IXR_PARITY | CDNS_UART_IXR_FRAMING | \
-				 CDNS_UART_IXR_OVERRUN | CDNS_UART_IXR_RXTRIG | \
+	/*
+	 * Do not enable parity error interrupt for the following
+	 * reason: When parity error interrupt is enabled, each Rx
+	 * parity error always results in 2 events. The first one
+	 * being parity error interrupt and the second one with a
+	 * proper Rx interrupt with the incoming data.  Disabling
+	 * parity error interrupt ensures better handling of parity
+	 * error events. With this change, for a parity error case, we
+	 * get a Rx interrupt with parity error set in ISR register
+	 * and we still handle parity errors in the desired way.
+	 */
+
+#define CDNS_UART_RX_IRQS	(CDNS_UART_IXR_FRAMING | \
+				 CDNS_UART_IXR_OVERRUN | \
+				 CDNS_UART_IXR_RXTRIG |	 \
 				 CDNS_UART_IXR_TOUT)
 
 /* Goes in read_status_mask for break detection as the HW doesn't do it*/
-#define CDNS_UART_IXR_BRK	0x80000000
+#define CDNS_UART_IXR_BRK	0x00002000
 
+#define CDNS_UART_RXBS_SUPPORT BIT(1)
 /*
  * Modem Control register:
  * The read/write Modem Control register controls the interface with the modem
@@ -172,46 +190,66 @@
 	struct clk		*pclk;
 	unsigned int		baud;
 	struct notifier_block	clk_rate_change_nb;
+	u32			quirks;
+};
+struct cdns_platform_data {
+	u32 quirks;
 };
 #define to_cdns_uart(_nb) container_of(_nb, struct cdns_uart, \
 		clk_rate_change_nb);
 
-static void cdns_uart_handle_rx(struct uart_port *port, unsigned int isrstatus)
+/**
+ * cdns_uart_handle_rx - Handle the received bytes along with Rx errors.
+ * @dev_id: Id of the UART port
+ * @isrstatus: The interrupt status register value as read
+ * Return: None
+ */
+static void cdns_uart_handle_rx(void *dev_id, unsigned int isrstatus)
 {
-	/*
-	 * There is no hardware break detection, so we interpret framing
-	 * error with all-zeros data as a break sequence. Most of the time,
-	 * there's another non-zero byte at the end of the sequence.
-	 */
-	if (isrstatus & CDNS_UART_IXR_FRAMING) {
-		while (!(readl(port->membase + CDNS_UART_SR) &
-					CDNS_UART_SR_RXEMPTY)) {
-			if (!readl(port->membase + CDNS_UART_FIFO)) {
+	struct uart_port *port = (struct uart_port *)dev_id;
+	struct cdns_uart *cdns_uart = port->private_data;
+	unsigned int data;
+	unsigned int rxbs_status = 0;
+	unsigned int status_mask;
+	unsigned int framerrprocessed = 0;
+	char status = TTY_NORMAL;
+	bool is_rxbs_support;
+
+	is_rxbs_support = cdns_uart->quirks & CDNS_UART_RXBS_SUPPORT;
+
+	while ((readl(port->membase + CDNS_UART_SR) &
+		CDNS_UART_SR_RXEMPTY) != CDNS_UART_SR_RXEMPTY) {
+		if (is_rxbs_support)
+			rxbs_status = readl(port->membase + CDNS_UART_RXBS);
+		data = readl(port->membase + CDNS_UART_FIFO);
+		port->icount.rx++;
+		/*
+		 * There is no hardware break detection in Zynq, so we interpret
+		 * framing error with all-zeros data as a break sequence.
+		 * Most of the time, there's another non-zero byte at the
+		 * end of the sequence.
+		 */
+		if (!is_rxbs_support && (isrstatus & CDNS_UART_IXR_FRAMING)) {
+			if (!data) {
 				port->read_status_mask |= CDNS_UART_IXR_BRK;
-				isrstatus &= ~CDNS_UART_IXR_FRAMING;
+				framerrprocessed = 1;
+				continue;
 			}
 		}
-		writel(CDNS_UART_IXR_FRAMING, port->membase + CDNS_UART_ISR);
-	}
+		if (is_rxbs_support && (rxbs_status & CDNS_UART_RXBS_BRK)) {
+			port->icount.brk++;
+			status = TTY_BREAK;
+			if (uart_handle_break(port))
+				continue;
+		}
 
-	/* drop byte with parity error if IGNPAR specified */
-	if (isrstatus & port->ignore_status_mask & CDNS_UART_IXR_PARITY)
-		isrstatus &= ~(CDNS_UART_IXR_RXTRIG | CDNS_UART_IXR_TOUT);
+		isrstatus &= port->read_status_mask;
+		isrstatus &= ~port->ignore_status_mask;
+		status_mask = port->read_status_mask;
+		status_mask &= ~port->ignore_status_mask;
 
-	isrstatus &= port->read_status_mask;
-	isrstatus &= ~port->ignore_status_mask;
-
-	if (!(isrstatus & (CDNS_UART_IXR_TOUT | CDNS_UART_IXR_RXTRIG)))
-		return;
-
-	while (!(readl(port->membase + CDNS_UART_SR) & CDNS_UART_SR_RXEMPTY)) {
-		u32 data;
-		char status = TTY_NORMAL;
-
-		data = readl(port->membase + CDNS_UART_FIFO);
-
-		/* Non-NULL byte after BREAK is garbage (99%) */
-		if (data && (port->read_status_mask & CDNS_UART_IXR_BRK)) {
+		if (data &&
+		    (port->read_status_mask & CDNS_UART_IXR_BRK)) {
 			port->read_status_mask &= ~CDNS_UART_IXR_BRK;
 			port->icount.brk++;
 			if (uart_handle_break(port))
@@ -221,57 +259,83 @@
 		if (uart_handle_sysrq_char(port, data))
 			continue;
 
-		port->icount.rx++;
-
-		if (isrstatus & CDNS_UART_IXR_PARITY) {
-			port->icount.parity++;
-			status = TTY_PARITY;
-		} else if (isrstatus & CDNS_UART_IXR_FRAMING) {
-			port->icount.frame++;
-			status = TTY_FRAME;
-		} else if (isrstatus & CDNS_UART_IXR_OVERRUN) {
-			port->icount.overrun++;
+		if (is_rxbs_support) {
+			if ((rxbs_status & CDNS_UART_RXBS_PARITY)
+			    && (status_mask & CDNS_UART_IXR_PARITY)) {
+				port->icount.parity++;
+				status = TTY_PARITY;
+			}
+			if ((rxbs_status & CDNS_UART_RXBS_FRAMING)
+			    && (status_mask & CDNS_UART_IXR_PARITY)) {
+				port->icount.frame++;
+				status = TTY_FRAME;
+			}
+		} else {
+			if (isrstatus & CDNS_UART_IXR_PARITY) {
+				port->icount.parity++;
+				status = TTY_PARITY;
+			}
+			if ((isrstatus & CDNS_UART_IXR_FRAMING) &&
+			    !framerrprocessed) {
+				port->icount.frame++;
+				status = TTY_FRAME;
+			}
 		}
-
-		uart_insert_char(port, isrstatus, CDNS_UART_IXR_OVERRUN,
-				 data, status);
+		if (isrstatus & CDNS_UART_IXR_OVERRUN) {
+			port->icount.overrun++;
+			tty_insert_flip_char(&port->state->port, 0,
+					     TTY_OVERRUN);
+		}
+		tty_insert_flip_char(&port->state->port, data, status);
+		isrstatus = 0;
 	}
+	spin_unlock(&port->lock);
 	tty_flip_buffer_push(&port->state->port);
+	spin_lock(&port->lock);
 }
 
-static void cdns_uart_handle_tx(struct uart_port *port)
+/**
+ * cdns_uart_handle_tx - Handle the bytes to be Txed.
+ * @dev_id: Id of the UART port
+ * Return: None
+ */
+static void cdns_uart_handle_tx(void *dev_id)
 {
+	struct uart_port *port = (struct uart_port *)dev_id;
 	unsigned int numbytes;
 
 	if (uart_circ_empty(&port->state->xmit)) {
 		writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_IDR);
-		return;
+	} else {
+		numbytes = port->fifosize;
+		while (numbytes && !uart_circ_empty(&port->state->xmit) &&
+		       !(readl(port->membase + CDNS_UART_SR) & CDNS_UART_SR_TXFULL)) {
+			/*
+			 * Get the data from the UART circular buffer
+			 * and write it to the cdns_uart's TX_FIFO
+			 * register.
+			 */
+			writel(
+				port->state->xmit.buf[port->state->xmit.
+				tail], port->membase + CDNS_UART_FIFO);
+
+			port->icount.tx++;
+
+			/*
+			 * Adjust the tail of the UART buffer and wrap
+			 * the buffer if it reaches limit.
+			 */
+			port->state->xmit.tail =
+				(port->state->xmit.tail + 1) &
+					(UART_XMIT_SIZE - 1);
+
+			numbytes--;
+		}
+
+		if (uart_circ_chars_pending(
+				&port->state->xmit) < WAKEUP_CHARS)
+			uart_write_wakeup(port);
 	}
-
-	numbytes = port->fifosize;
-	while (numbytes && !uart_circ_empty(&port->state->xmit) &&
-	       !(readl(port->membase + CDNS_UART_SR) & CDNS_UART_SR_TXFULL)) {
-		/*
-		 * Get the data from the UART circular buffer
-		 * and write it to the cdns_uart's TX_FIFO
-		 * register.
-		 */
-		writel(port->state->xmit.buf[port->state->xmit.tail],
-			port->membase + CDNS_UART_FIFO);
-		port->icount.tx++;
-
-		/*
-		 * Adjust the tail of the UART buffer and wrap
-		 * the buffer if it reaches limit.
-		 */
-		port->state->xmit.tail =
-			(port->state->xmit.tail + 1) & (UART_XMIT_SIZE - 1);
-
-		numbytes--;
-	}
-
-	if (uart_circ_chars_pending(&port->state->xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
 }
 
 /**
@@ -284,27 +348,24 @@
 static irqreturn_t cdns_uart_isr(int irq, void *dev_id)
 {
 	struct uart_port *port = (struct uart_port *)dev_id;
-	unsigned long flags;
 	unsigned int isrstatus;
 
-	spin_lock_irqsave(&port->lock, flags);
+	spin_lock(&port->lock);
 
 	/* Read the interrupt status register to determine which
-	 * interrupt(s) is/are active.
+	 * interrupt(s) is/are active and clear them.
 	 */
 	isrstatus = readl(port->membase + CDNS_UART_ISR);
-
-	if (isrstatus & CDNS_UART_RX_IRQS)
-		cdns_uart_handle_rx(port, isrstatus);
-
-	if ((isrstatus & CDNS_UART_IXR_TXEMPTY) == CDNS_UART_IXR_TXEMPTY)
-		cdns_uart_handle_tx(port);
-
 	writel(isrstatus, port->membase + CDNS_UART_ISR);
 
-	/* be sure to release the lock and tty before leaving */
-	spin_unlock_irqrestore(&port->lock, flags);
+	if (isrstatus & CDNS_UART_IXR_TXEMPTY) {
+		cdns_uart_handle_tx(dev_id);
+		isrstatus &= ~CDNS_UART_IXR_TXEMPTY;
+	}
+	if (isrstatus & CDNS_UART_IXR_MASK)
+		cdns_uart_handle_rx(dev_id, isrstatus);
 
+	spin_unlock(&port->lock);
 	return IRQ_HANDLED;
 }
 
@@ -653,6 +714,10 @@
 	ctrl_reg |= CDNS_UART_CR_TXRST | CDNS_UART_CR_RXRST;
 	writel(ctrl_reg, port->membase + CDNS_UART_CR);
 
+	while (readl(port->membase + CDNS_UART_CR) &
+		(CDNS_UART_CR_TXRST | CDNS_UART_CR_RXRST))
+		cpu_relax();
+
 	/*
 	 * Clear the RX disable and TX disable bits and then set the TX enable
 	 * bit and RX enable bit to enable the transmitter and receiver.
@@ -736,10 +801,14 @@
  */
 static int cdns_uart_startup(struct uart_port *port)
 {
+	struct cdns_uart *cdns_uart = port->private_data;
+	bool is_brk_support;
 	int ret;
 	unsigned long flags;
 	unsigned int status = 0;
 
+	is_brk_support = cdns_uart->quirks & CDNS_UART_RXBS_SUPPORT;
+
 	spin_lock_irqsave(&port->lock, flags);
 
 	/* Disable the TX and RX */
@@ -752,6 +821,10 @@
 	writel(CDNS_UART_CR_TXRST | CDNS_UART_CR_RXRST,
 			port->membase + CDNS_UART_CR);
 
+	while (readl(port->membase + CDNS_UART_CR) &
+		(CDNS_UART_CR_TXRST | CDNS_UART_CR_RXRST))
+		cpu_relax();
+
 	/*
 	 * Clear the RX disable bit and then set the RX enable bit to enable
 	 * the receiver.
@@ -794,7 +867,11 @@
 	}
 
 	/* Set the Interrupt Registers with desired interrupts */
-	writel(CDNS_UART_RX_IRQS, port->membase + CDNS_UART_IER);
+	if (is_brk_support)
+		writel(CDNS_UART_RX_IRQS | CDNS_UART_IXR_BRK,
+					port->membase + CDNS_UART_IER);
+	else
+		writel(CDNS_UART_RX_IRQS, port->membase + CDNS_UART_IER);
 
 	return 0;
 }
@@ -993,7 +1070,7 @@
 	}
 }
 
-static struct uart_ops cdns_uart_ops = {
+static const struct uart_ops cdns_uart_ops = {
 	.set_mctrl	= cdns_uart_set_mctrl,
 	.get_mctrl	= cdns_uart_get_mctrl,
 	.start_tx	= cdns_uart_start_tx,
@@ -1088,9 +1165,34 @@
 static int __init cdns_early_console_setup(struct earlycon_device *device,
 					   const char *opt)
 {
-	if (!device->port.membase)
+	struct uart_port *port = &device->port;
+
+	if (!port->membase)
 		return -ENODEV;
 
+	/* initialise control register */
+	writel(CDNS_UART_CR_TX_EN|CDNS_UART_CR_TXRST|CDNS_UART_CR_RXRST,
+	       port->membase + CDNS_UART_CR);
+
+	/* only set baud if specified on command line - otherwise
+	 * assume it has been initialized by a boot loader.
+	 */
+	if (device->baud) {
+		u32 cd = 0, bdiv = 0;
+		u32 mr;
+		int div8;
+
+		cdns_uart_calc_baud_divs(port->uartclk, device->baud,
+					 &bdiv, &cd, &div8);
+		mr = CDNS_UART_MR_PARITY_NONE;
+		if (div8)
+			mr |= CDNS_UART_MR_CLKSEL;
+
+		writel(mr,   port->membase + CDNS_UART_MR);
+		writel(cd,   port->membase + CDNS_UART_BAUDGEN);
+		writel(bdiv, port->membase + CDNS_UART_BAUDDIV);
+	}
+
 	device->con->write = cdns_early_write;
 
 	return 0;
@@ -1328,6 +1430,18 @@
 static SIMPLE_DEV_PM_OPS(cdns_uart_dev_pm_ops, cdns_uart_suspend,
 		cdns_uart_resume);
 
+static const struct cdns_platform_data zynqmp_uart_def = {
+				.quirks = CDNS_UART_RXBS_SUPPORT, };
+
+/* Match table for of_platform binding */
+static const struct of_device_id cdns_uart_of_match[] = {
+	{ .compatible = "xlnx,xuartps", },
+	{ .compatible = "cdns,uart-r1p8", },
+	{ .compatible = "cdns,uart-r1p12", .data = &zynqmp_uart_def },
+	{}
+};
+MODULE_DEVICE_TABLE(of, cdns_uart_of_match);
+
 /**
  * cdns_uart_probe - Platform driver probe
  * @pdev: Pointer to the platform device structure
@@ -1340,12 +1454,20 @@
 	struct uart_port *port;
 	struct resource *res;
 	struct cdns_uart *cdns_uart_data;
+	const struct of_device_id *match;
 
 	cdns_uart_data = devm_kzalloc(&pdev->dev, sizeof(*cdns_uart_data),
 			GFP_KERNEL);
 	if (!cdns_uart_data)
 		return -ENOMEM;
 
+	match = of_match_node(cdns_uart_of_match, pdev->dev.of_node);
+	if (match && match->data) {
+		const struct cdns_platform_data *data = match->data;
+
+		cdns_uart_data->quirks = data->quirks;
+	}
+
 	cdns_uart_data->pclk = devm_clk_get(&pdev->dev, "pclk");
 	if (IS_ERR(cdns_uart_data->pclk)) {
 		cdns_uart_data->pclk = devm_clk_get(&pdev->dev, "aper_clk");
@@ -1471,14 +1593,6 @@
 	return rc;
 }
 
-/* Match table for of_platform binding */
-static const struct of_device_id cdns_uart_of_match[] = {
-	{ .compatible = "xlnx,xuartps", },
-	{ .compatible = "cdns,uart-r1p8", },
-	{}
-};
-MODULE_DEVICE_TABLE(of, cdns_uart_of_match);
-
 static struct platform_driver cdns_uart_platform_driver = {
 	.probe   = cdns_uart_probe,
 	.remove  = cdns_uart_remove,
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 2705ca9..e841a4e 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1312,12 +1312,12 @@
 	if (i > vc->vc_npar)
 		return i;
 
-	if (vc->vc_par[i] == 5 && i < vc->vc_npar) {
-		/* 256 colours -- ubiquitous */
+	if (vc->vc_par[i] == 5 && i + 1 <= vc->vc_npar) {
+		/* 256 colours */
 		i++;
 		rgb_from_256(vc->vc_par[i], &c);
-	} else if (vc->vc_par[i] == 2 && i <= vc->vc_npar + 3) {
-		/* 24 bit -- extremely rare */
+	} else if (vc->vc_par[i] == 2 && i + 3 <= vc->vc_npar) {
+		/* 24 bit */
 		c.r = vc->vc_par[i + 1];
 		c.g = vc->vc_par[i + 2];
 		c.b = vc->vc_par[i + 3];
@@ -1415,6 +1415,11 @@
 				(vc->vc_color & 0x0f);
 			break;
 		default:
+			if (vc->vc_par[i] >= 90 && vc->vc_par[i] <= 107) {
+				if (vc->vc_par[i] < 100)
+					vc->vc_intensity = 2;
+				vc->vc_par[i] -= 60;
+			}
 			if (vc->vc_par[i] >= 30 && vc->vc_par[i] <= 37)
 				vc->vc_color = color_table[vc->vc_par[i] - 30]
 					| (vc->vc_color & 0xf0);
diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
index 915facb..e1134a4 100644
--- a/drivers/uio/uio_dmem_genirq.c
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -229,7 +229,7 @@
 		++uiomem;
 	}
 
-	priv->dmem_region_start = i;
+	priv->dmem_region_start = uiomem - &uioinfo->mem[0];
 	priv->num_dmem_regions = pdata->num_dynamic_regions;
 
 	for (i = 0; i < pdata->num_dynamic_regions; ++i) {
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 8689dcb..644e978 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -152,7 +152,8 @@
 
 config USB_LED_TRIG
 	bool "USB LED Triggers"
-	depends on LEDS_CLASS && USB_COMMON && LEDS_TRIGGERS
+	depends on LEDS_CLASS && LEDS_TRIGGERS
+	select USB_COMMON
 	help
 	  This option adds LED triggers for USB host and/or gadget activity.
 
@@ -160,4 +161,25 @@
 	  LEDs and you want to use them as activity indicators for USB host or
 	  gadget.
 
+config USB_ULPI_BUS
+	tristate "USB ULPI PHY interface support"
+	select USB_COMMON
+	help
+	  UTMI+ Low Pin Interface (ULPI) is specification for a commonly used
+	  USB 2.0 PHY interface. The ULPI specification defines a standard set
+	  of registers that can be used to detect the vendor and product which
+	  allows ULPI to be handled as a bus. This module is the driver for that
+	  bus.
+
+	  The ULPI interfaces (the buses) are registered by the drivers for USB
+	  controllers which support ULPI register access and have ULPI PHY
+	  attached to them. The ULPI PHY drivers themselves are normal PHY
+	  drivers.
+
+	  ULPI PHYs provide often functions such as ADP sensing/probing (OTG
+	  protocol) and USB charger detection.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called ulpi.
+
 endif # USB_SUPPORT
diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index 0a866e9..f9fe86b6 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -1139,10 +1139,8 @@
 
 	/* instance init */
 	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
-	if (!instance) {
-		usb_dbg(usbatm_instance, "cxacru_bind: no memory for instance data\n");
+	if (!instance)
 		return -ENOMEM;
-	}
 
 	instance->usbatm = usbatm_instance;
 	instance->modem_type = (struct cxacru_modem_type *) id->driver_info;
@@ -1168,13 +1166,11 @@
 	}
 	instance->rcv_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!instance->rcv_urb) {
-		usb_dbg(usbatm_instance, "cxacru_bind: no memory for rcv_urb\n");
 		ret = -ENOMEM;
 		goto fail;
 	}
 	instance->snd_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!instance->snd_urb) {
-		usb_dbg(usbatm_instance, "cxacru_bind: no memory for snd_urb\n");
 		ret = -ENOMEM;
 		goto fail;
 	}
diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c
index 0270d13..5083eb5 100644
--- a/drivers/usb/atm/speedtch.c
+++ b/drivers/usb/atm/speedtch.c
@@ -817,7 +817,6 @@
 	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
 
 	if (!instance) {
-		usb_err(usbatm, "%s: no memory for instance data!\n", __func__);
 		ret = -ENOMEM;
 		goto fail_release;
 	}
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index 4333dc5..df67815 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -2196,17 +2196,12 @@
 		load_XILINX_firmware(sc);
 
 	intr = kmalloc(size, GFP_KERNEL);
-	if (!intr) {
-		uea_err(INS_TO_USBDEV(sc),
-		       "cannot allocate interrupt package\n");
+	if (!intr)
 		goto err0;
-	}
 
 	sc->urb_int = usb_alloc_urb(0, GFP_KERNEL);
-	if (!sc->urb_int) {
-		uea_err(INS_TO_USBDEV(sc), "cannot allocate interrupt URB\n");
+	if (!sc->urb_int)
 		goto err1;
-	}
 
 	usb_fill_int_urb(sc->urb_int, sc->usb_dev,
 			 usb_rcvintpipe(sc->usb_dev, UEA_INTR_PIPE),
@@ -2561,10 +2556,8 @@
 	}
 
 	sc = kzalloc(sizeof(struct uea_softc), GFP_KERNEL);
-	if (!sc) {
-		uea_err(usb, "uea_init: not enough memory !\n");
+	if (!sc)
 		return -ENOMEM;
-	}
 
 	sc->usb_dev = usb;
 	usbatm->driver_data = sc;
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index db322d9..4dec9df 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -819,7 +819,6 @@
 
 	new = kzalloc(sizeof(struct usbatm_vcc_data), GFP_KERNEL);
 	if (!new) {
-		atm_err(instance, "%s: no memory for vcc_data!\n", __func__);
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -1032,10 +1031,8 @@
 
 	/* instance init */
 	instance = kzalloc(sizeof(*instance) + sizeof(struct urb *) * (num_rcv_urbs + num_snd_urbs), GFP_KERNEL);
-	if (!instance) {
-		dev_err(dev, "%s: no memory for instance data!\n", __func__);
+	if (!instance)
 		return -ENOMEM;
-	}
 
 	/* public fields */
 
@@ -1141,7 +1138,6 @@
 
 		urb = usb_alloc_urb(iso_packets, GFP_KERNEL);
 		if (!urb) {
-			dev_err(dev, "%s: no memory for urb %d!\n", __func__, i);
 			error = -ENOMEM;
 			goto fail_unbind;
 		}
@@ -1151,7 +1147,6 @@
 		/* zero the tx padding to avoid leaking information */
 		buffer = kzalloc(channel->buf_size, GFP_KERNEL);
 		if (!buffer) {
-			dev_err(dev, "%s: no memory for buffer %d!\n", __func__, i);
 			error = -ENOMEM;
 			goto fail_unbind;
 		}
@@ -1182,7 +1177,6 @@
 	instance->cell_buf = kmalloc(instance->rx_channel.stride, GFP_KERNEL);
 
 	if (!instance->cell_buf) {
-		dev_err(dev, "%s: no memory for cell buffer!\n", __func__);
 		error = -ENOMEM;
 		goto fail_unbind;
 	}
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index dedc33e5..0991794 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -140,6 +140,9 @@
 	if (of_find_property(np, "disable-over-current", NULL))
 		data->disable_oc = 1;
 
+	if (of_find_property(np, "over-current-active-high", NULL))
+		data->oc_polarity = 1;
+
 	if (of_find_property(np, "external-vbus-divider", NULL))
 		data->evdo = 1;
 
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.h b/drivers/usb/chipidea/ci_hdrc_imx.h
index 635717e..409aa5ca8 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.h
+++ b/drivers/usb/chipidea/ci_hdrc_imx.h
@@ -17,6 +17,7 @@
 	int index;
 
 	unsigned int disable_oc:1; /* over current detect disabled */
+	unsigned int oc_polarity:1; /* over current polarity if oc enabled */
 	unsigned int evdo:1; /* set external vbus divider option */
 };
 
diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index 053bac9..96ae695 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -81,12 +81,15 @@
 {
 	struct device *dev = hcd->self.controller;
 	struct ci_hdrc *ci = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	int ret;
 
 	ret = ehci_setup(hcd);
 	if (ret)
 		return ret;
 
+	ehci->need_io_watchdog = 0;
+
 	ci_platform_configure(ci);
 
 	return ret;
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 065f5d9..661f43f 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -59,7 +59,7 @@
  */
 static inline int hw_ep_bit(int num, int dir)
 {
-	return num + (dir ? 16 : 0);
+	return num + ((dir == TX) ? 16 : 0);
 }
 
 static inline int ep_to_bit(struct ci_hdrc *ci, int n)
@@ -121,9 +121,8 @@
  */
 static int hw_ep_disable(struct ci_hdrc *ci, int num, int dir)
 {
-	hw_ep_flush(ci, num, dir);
 	hw_write(ci, OP_ENDPTCTRL + num,
-		 dir ? ENDPTCTRL_TXE : ENDPTCTRL_RXE, 0);
+		 (dir == TX) ? ENDPTCTRL_TXE : ENDPTCTRL_RXE, 0);
 	return 0;
 }
 
@@ -139,7 +138,7 @@
 {
 	u32 mask, data;
 
-	if (dir) {
+	if (dir == TX) {
 		mask  = ENDPTCTRL_TXT;  /* type    */
 		data  = type << __ffs(mask);
 
@@ -171,7 +170,7 @@
  */
 static int hw_ep_get_halt(struct ci_hdrc *ci, int num, int dir)
 {
-	u32 mask = dir ? ENDPTCTRL_TXS : ENDPTCTRL_RXS;
+	u32 mask = (dir == TX) ? ENDPTCTRL_TXS : ENDPTCTRL_RXS;
 
 	return hw_read(ci, OP_ENDPTCTRL + num, mask) ? 1 : 0;
 }
@@ -188,6 +187,9 @@
 {
 	int n = hw_ep_bit(num, dir);
 
+	/* Synchronize before ep prime */
+	wmb();
+
 	if (is_ctrl && dir == RX && hw_read(ci, OP_ENDPTSETUPSTAT, BIT(num)))
 		return -EAGAIN;
 
@@ -218,8 +220,8 @@
 
 	do {
 		enum ci_hw_regs reg = OP_ENDPTCTRL + num;
-		u32 mask_xs = dir ? ENDPTCTRL_TXS : ENDPTCTRL_RXS;
-		u32 mask_xr = dir ? ENDPTCTRL_TXR : ENDPTCTRL_RXR;
+		u32 mask_xs = (dir == TX) ? ENDPTCTRL_TXS : ENDPTCTRL_RXS;
+		u32 mask_xr = (dir == TX) ? ENDPTCTRL_TXR : ENDPTCTRL_RXR;
 
 		/* data toggle - reserved for EP0 but it's in ESS */
 		hw_write(ci, reg, mask_xs|mask_xr,
@@ -348,8 +350,7 @@
 	if (node == NULL)
 		return -ENOMEM;
 
-	node->ptr = dma_pool_zalloc(hwep->td_pool, GFP_ATOMIC,
-				   &node->dma);
+	node->ptr = dma_pool_zalloc(hwep->td_pool, GFP_ATOMIC, &node->dma);
 	if (node->ptr == NULL) {
 		kfree(node);
 		return -ENOMEM;
@@ -506,8 +507,6 @@
 		hwep->qh.ptr->cap |= mul << __ffs(QH_MULT);
 	}
 
-	wmb();   /* synchronize before ep prime */
-
 	ret = hw_ep_prime(ci, hwep->num, hwep->dir,
 			   hwep->type == USB_ENDPOINT_XFER_CONTROL);
 done:
@@ -534,9 +533,6 @@
 	hwep->qh.ptr->td.token &=
 		cpu_to_le32(~(TD_STATUS_HALTED | TD_STATUS_ACTIVE));
 
-	/* Synchronize before ep prime */
-	wmb();
-
 	return hw_ep_prime(ci, hwep->num, hwep->dir,
 				hwep->type == USB_ENDPOINT_XFER_CONTROL);
 }
@@ -590,7 +586,7 @@
 		}
 
 		if (remaining_length) {
-			if (hwep->dir) {
+			if (hwep->dir == TX) {
 				hwreq->req.status = -EPROTO;
 				break;
 			}
@@ -949,6 +945,15 @@
 	int retval;
 	struct ci_hw_ep *hwep;
 
+	/*
+	 * Unexpected USB controller behavior, caused by bad signal integrity
+	 * or ground reference problems, can lead to isr_setup_status_phase
+	 * being called with ci->status equal to NULL.
+	 * If this situation occurs, you should review your USB hardware design.
+	 */
+	if (WARN_ON_ONCE(!ci->status))
+		return -EPIPE;
+
 	hwep = (ci->ep0_dir == TX) ? ci->ep0out : ci->ep0in;
 	ci->status->context = ci;
 	ci->status->complete = isr_setup_status_complete;
@@ -1042,9 +1047,9 @@
 			if (req.wLength != 0)
 				break;
 			num  = le16_to_cpu(req.wIndex);
-			dir = num & USB_ENDPOINT_DIR_MASK;
+			dir = (num & USB_ENDPOINT_DIR_MASK) ? TX : RX;
 			num &= USB_ENDPOINT_NUMBER_MASK;
-			if (dir) /* TX */
+			if (dir == TX)
 				num += ci->hw_ep_max / 2;
 			if (!ci->ci_hw_ep[num].wedge) {
 				spin_unlock(&ci->lock);
@@ -1094,9 +1099,9 @@
 			if (req.wLength != 0)
 				break;
 			num  = le16_to_cpu(req.wIndex);
-			dir = num & USB_ENDPOINT_DIR_MASK;
+			dir = (num & USB_ENDPOINT_DIR_MASK) ? TX : RX;
 			num &= USB_ENDPOINT_NUMBER_MASK;
-			if (dir) /* TX */
+			if (dir == TX)
 				num += ci->hw_ep_max / 2;
 
 			spin_unlock(&ci->lock);
@@ -1596,8 +1601,11 @@
 {
 	struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget);
 
-	/* Data+ pullup controlled by OTG state machine in OTG fsm mode */
-	if (ci_otg_is_fsm_mode(ci))
+	/*
+	 * Data+ pullup controlled by OTG state machine in OTG fsm mode;
+	 * and don't touch Data+ in host mode for dual role config.
+	 */
+	if (ci_otg_is_fsm_mode(ci) || ci->role == CI_ROLE_HOST)
 		return 0;
 
 	pm_runtime_get_sync(&ci->gadget.dev);
@@ -1668,12 +1676,10 @@
 			usb_ep_set_maxpacket_limit(&hwep->ep, (unsigned short)~0);
 
 			INIT_LIST_HEAD(&hwep->qh.queue);
-			hwep->qh.ptr = dma_pool_alloc(ci->qh_pool, GFP_KERNEL,
-						     &hwep->qh.dma);
+			hwep->qh.ptr = dma_pool_zalloc(ci->qh_pool, GFP_KERNEL,
+						       &hwep->qh.dma);
 			if (hwep->qh.ptr == NULL)
 				retval = -ENOMEM;
-			else
-				memset(hwep->qh.ptr, 0, sizeof(*hwep->qh.ptr));
 
 			/*
 			 * set up shorthands for ep0 out and in endpoints,
@@ -1987,7 +1993,7 @@
 	if (!hw_read(ci, CAP_DCCPARAMS, DCCPARAMS_DC))
 		return -ENXIO;
 
-	rdrv = devm_kzalloc(ci->dev, sizeof(struct ci_role_driver), GFP_KERNEL);
+	rdrv = devm_kzalloc(ci->dev, sizeof(*rdrv), GFP_KERNEL);
 	if (!rdrv)
 		return -ENOMEM;
 
diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index ab8b027..20d02a5 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -56,6 +56,7 @@
 
 #define MX6_BM_NON_BURST_SETTING	BIT(1)
 #define MX6_BM_OVER_CUR_DIS		BIT(7)
+#define MX6_BM_OVER_CUR_POLARITY	BIT(8)
 #define MX6_BM_WAKEUP_ENABLE		BIT(10)
 #define MX6_BM_ID_WAKEUP		BIT(16)
 #define MX6_BM_VBUS_WAKEUP		BIT(17)
@@ -266,11 +267,14 @@
 
 	spin_lock_irqsave(&usbmisc->lock, flags);
 
+	reg = readl(usbmisc->base + data->index * 4);
 	if (data->disable_oc) {
-		reg = readl(usbmisc->base + data->index * 4);
-		writel(reg | MX6_BM_OVER_CUR_DIS,
-			usbmisc->base + data->index * 4);
+		reg |= MX6_BM_OVER_CUR_DIS;
+	} else if (data->oc_polarity == 1) {
+		/* High active */
+		reg &= ~(MX6_BM_OVER_CUR_DIS | MX6_BM_OVER_CUR_POLARITY);
 	}
+	writel(reg, usbmisc->base + data->index * 4);
 
 	/* SoC non-burst setting */
 	reg = readl(usbmisc->base + data->index * 4);
@@ -365,10 +369,14 @@
 		return -EINVAL;
 
 	spin_lock_irqsave(&usbmisc->lock, flags);
+	reg = readl(usbmisc->base);
 	if (data->disable_oc) {
-		reg = readl(usbmisc->base);
-		writel(reg | MX6_BM_OVER_CUR_DIS, usbmisc->base);
+		reg |= MX6_BM_OVER_CUR_DIS;
+	} else if (data->oc_polarity == 1) {
+		/* High active */
+		reg &= ~(MX6_BM_OVER_CUR_DIS | MX6_BM_OVER_CUR_POLARITY);
 	}
+	writel(reg, usbmisc->base);
 
 	reg = readl(usbmisc->base + MX7D_USBNC_USB_CTRL2);
 	reg &= ~MX7D_USB_VBUS_WAKEUP_SOURCE_MASK;
@@ -492,6 +500,10 @@
 		.compatible = "fsl,imx6ul-usbmisc",
 		.data = &imx6sx_usbmisc_ops,
 	},
+	{
+		.compatible = "fsl,imx7d-usbmisc",
+		.data = &imx7d_usbmisc_ops,
+	},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, usbmisc_imx_dt_ids);
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7191230..78f0f85 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -368,17 +368,17 @@
 	if (!test_and_clear_bit(index, &acm->read_urbs_free))
 		return 0;
 
-	dev_vdbg(&acm->data->dev, "%s - urb %d\n", __func__, index);
-
 	res = usb_submit_urb(acm->read_urbs[index], mem_flags);
 	if (res) {
 		if (res != -EPERM) {
 			dev_err(&acm->data->dev,
-					"%s - usb_submit_urb failed: %d\n",
-					__func__, res);
+					"urb %d failed submission with %d\n",
+					index, res);
 		}
 		set_bit(index, &acm->read_urbs_free);
 		return res;
+	} else {
+		dev_vdbg(&acm->data->dev, "submitted urb %d\n", index);
 	}
 
 	return 0;
@@ -415,8 +415,9 @@
 	unsigned long flags;
 	int status = urb->status;
 
-	dev_vdbg(&acm->data->dev, "%s - urb %d, len %d\n", __func__,
-					rb->index, urb->actual_length);
+	dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n",
+					rb->index, urb->actual_length,
+					status);
 
 	if (!acm->dev) {
 		set_bit(rb->index, &acm->read_urbs_free);
@@ -426,8 +427,6 @@
 
 	if (status) {
 		set_bit(rb->index, &acm->read_urbs_free);
-		dev_dbg(&acm->data->dev, "%s - non-zero urb status: %d\n",
-							__func__, status);
 		if ((status != -ENOENT) || (urb->actual_length == 0))
 			return;
 	}
@@ -462,8 +461,7 @@
 	int status = urb->status;
 
 	if (status || (urb->actual_length != urb->transfer_buffer_length))
-		dev_vdbg(&acm->data->dev, "%s - len %d/%d, status %d\n",
-			__func__,
+		dev_vdbg(&acm->data->dev, "wrote len %d/%d, status %d\n",
 			urb->actual_length,
 			urb->transfer_buffer_length,
 			status);
@@ -478,8 +476,6 @@
 {
 	struct acm *acm = container_of(work, struct acm, work);
 
-	dev_vdbg(&acm->data->dev, "%s\n", __func__);
-
 	tty_port_tty_wakeup(&acm->port);
 }
 
@@ -492,8 +488,6 @@
 	struct acm *acm;
 	int retval;
 
-	dev_dbg(tty->dev, "%s\n", __func__);
-
 	acm = acm_get_by_minor(tty->index);
 	if (!acm)
 		return -ENODEV;
@@ -515,8 +509,6 @@
 {
 	struct acm *acm = tty->driver_data;
 
-	dev_dbg(tty->dev, "%s\n", __func__);
-
 	return tty_port_open(&acm->port, tty, filp);
 }
 
@@ -545,8 +537,6 @@
 	int retval = -ENODEV;
 	int i;
 
-	dev_dbg(&acm->control->dev, "%s\n", __func__);
-
 	mutex_lock(&acm->mutex);
 	if (acm->disconnected)
 		goto disconnected;
@@ -607,8 +597,6 @@
 {
 	struct acm *acm = container_of(port, struct acm, port);
 
-	dev_dbg(&acm->control->dev, "%s\n", __func__);
-
 	acm_release_minor(acm);
 	usb_put_intf(acm->control);
 	kfree(acm->country_codes);
@@ -622,8 +610,6 @@
 	struct acm_wb *wb;
 	int i;
 
-	dev_dbg(&acm->control->dev, "%s\n", __func__);
-
 	/*
 	 * Need to grab write_lock to prevent race with resume, but no need to
 	 * hold it due to the tty-port initialised flag.
@@ -654,21 +640,21 @@
 static void acm_tty_cleanup(struct tty_struct *tty)
 {
 	struct acm *acm = tty->driver_data;
-	dev_dbg(&acm->control->dev, "%s\n", __func__);
+
 	tty_port_put(&acm->port);
 }
 
 static void acm_tty_hangup(struct tty_struct *tty)
 {
 	struct acm *acm = tty->driver_data;
-	dev_dbg(&acm->control->dev, "%s\n", __func__);
+
 	tty_port_hangup(&acm->port);
 }
 
 static void acm_tty_close(struct tty_struct *tty, struct file *filp)
 {
 	struct acm *acm = tty->driver_data;
-	dev_dbg(&acm->control->dev, "%s\n", __func__);
+
 	tty_port_close(&acm->port, tty, filp);
 }
 
@@ -684,7 +670,7 @@
 	if (!count)
 		return 0;
 
-	dev_vdbg(&acm->data->dev, "%s - count %d\n", __func__, count);
+	dev_vdbg(&acm->data->dev, "%d bytes from tty layer\n", count);
 
 	spin_lock_irqsave(&acm->write_lock, flags);
 	wbn = acm_wb_alloc(acm);
@@ -701,7 +687,7 @@
 	}
 
 	count = (count > acm->writesize) ? acm->writesize : count;
-	dev_vdbg(&acm->data->dev, "%s - write %d\n", __func__, count);
+	dev_vdbg(&acm->data->dev, "writing %d bytes\n", count);
 	memcpy(wb->buf, buf, count);
 	wb->len = count;
 
@@ -1193,6 +1179,9 @@
 		return -EINVAL;
 	}
 
+	if (!intf->cur_altsetting)
+		return -EINVAL;
+
 	if (!buflen) {
 		if (intf->cur_altsetting->endpoint &&
 				intf->cur_altsetting->endpoint->extralen &&
@@ -1246,6 +1235,8 @@
 		dev_dbg(&intf->dev, "no interfaces\n");
 		return -ENODEV;
 	}
+	if (!data_interface->cur_altsetting || !control_interface->cur_altsetting)
+		return -ENODEV;
 
 	if (data_intf_num != call_intf_num)
 		dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n");
@@ -1354,7 +1345,6 @@
 	spin_lock_init(&acm->write_lock);
 	spin_lock_init(&acm->read_lock);
 	mutex_init(&acm->mutex);
-	acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
 	acm->is_int_ep = usb_endpoint_xfer_int(epread);
 	if (acm->is_int_ep)
 		acm->bInterval = epread->bInterval;
@@ -1394,14 +1384,14 @@
 		urb->transfer_dma = rb->dma;
 		if (acm->is_int_ep) {
 			usb_fill_int_urb(urb, acm->dev,
-					 acm->rx_endpoint,
+					 usb_rcvintpipe(usb_dev, epread->bEndpointAddress),
 					 rb->base,
 					 acm->readsize,
 					 acm_read_bulk_callback, rb,
 					 acm->bInterval);
 		} else {
 			usb_fill_bulk_urb(urb, acm->dev,
-					  acm->rx_endpoint,
+					  usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress),
 					  rb->base,
 					  acm->readsize,
 					  acm_read_bulk_callback, rb);
@@ -1534,8 +1524,6 @@
 {
 	int i;
 
-	dev_dbg(&acm->control->dev, "%s\n", __func__);
-
 	usb_kill_urb(acm->ctrlurb);
 	for (i = 0; i < ACM_NW; i++)
 		usb_kill_urb(acm->wb[i].urb);
@@ -1552,8 +1540,6 @@
 	struct tty_struct *tty;
 	int i;
 
-	dev_dbg(&intf->dev, "%s\n", __func__);
-
 	/* sibling interface is already cleaning up */
 	if (!acm)
 		return;
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index 05ce308..1f1eabf 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -96,7 +96,6 @@
 	struct acm_rb read_buffers[ACM_NR];
 	struct acm_wb *putbuffer;			/* for acm_tty_put_char() */
 	int rx_buflimit;
-	int rx_endpoint;
 	spinlock_t read_lock;
 	int write_used;					/* number of non-empty write buffers */
 	int transmitting;
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 337948c..0a63695 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -58,6 +58,7 @@
 #define WDM_SUSPENDING		8
 #define WDM_RESETTING		9
 #define WDM_OVERFLOW		10
+#define WDM_DRAIN_ON_OPEN	11
 
 #define WDM_MAX			16
 
@@ -154,6 +155,9 @@
 	wake_up(&desc->wait);
 }
 
+/* forward declaration */
+static int service_outstanding_interrupt(struct wdm_device *desc);
+
 static void wdm_in_callback(struct urb *urb)
 {
 	struct wdm_device *desc = urb->context;
@@ -167,18 +171,18 @@
 		switch (status) {
 		case -ENOENT:
 			dev_dbg(&desc->intf->dev,
-				"nonzero urb status received: -ENOENT");
+				"nonzero urb status received: -ENOENT\n");
 			goto skip_error;
 		case -ECONNRESET:
 			dev_dbg(&desc->intf->dev,
-				"nonzero urb status received: -ECONNRESET");
+				"nonzero urb status received: -ECONNRESET\n");
 			goto skip_error;
 		case -ESHUTDOWN:
 			dev_dbg(&desc->intf->dev,
-				"nonzero urb status received: -ESHUTDOWN");
+				"nonzero urb status received: -ESHUTDOWN\n");
 			goto skip_error;
 		case -EPIPE:
-			dev_err(&desc->intf->dev,
+			dev_dbg(&desc->intf->dev,
 				"nonzero urb status received: -EPIPE\n");
 			break;
 		default:
@@ -188,7 +192,13 @@
 		}
 	}
 
-	desc->rerr = status;
+	/*
+	 * only set a new error if there is no previous error.
+	 * Errors are only cleared during read/open
+	 */
+	if (desc->rerr  == 0)
+		desc->rerr = status;
+
 	if (length + desc->length > desc->wMaxCommand) {
 		/* The buffer would overflow */
 		set_bit(WDM_OVERFLOW, &desc->flags);
@@ -200,10 +210,40 @@
 			desc->reslength = length;
 		}
 	}
+
+	/*
+	 * Handling devices with the WDM_DRAIN_ON_OPEN flag set:
+	 * If desc->resp_count is unset, then the urb was submitted
+	 * without a prior notification.  If the device returned any
+	 * data, then this implies that it had messages queued without
+	 * notifying us.  Continue reading until that queue is flushed.
+	 */
+	if (!desc->resp_count) {
+		if (!length) {
+			/* do not propagate the expected -EPIPE */
+			desc->rerr = 0;
+			goto unlock;
+		}
+		dev_dbg(&desc->intf->dev, "got %d bytes without notification\n", length);
+		set_bit(WDM_RESPONDING, &desc->flags);
+		usb_submit_urb(desc->response, GFP_ATOMIC);
+	}
+
 skip_error:
+	set_bit(WDM_READ, &desc->flags);
 	wake_up(&desc->wait);
 
-	set_bit(WDM_READ, &desc->flags);
+	if (desc->rerr) {
+		/*
+		 * Since there was an error, userspace may decide to not read
+		 * any data after poll'ing.
+		 * We should respond to further attempts from the device to send
+		 * data, so that we can get unstuck.
+		 */
+		service_outstanding_interrupt(desc);
+	}
+
+unlock:
 	spin_unlock(&desc->iuspin);
 }
 
@@ -244,18 +284,18 @@
 	switch (dr->bNotificationType) {
 	case USB_CDC_NOTIFY_RESPONSE_AVAILABLE:
 		dev_dbg(&desc->intf->dev,
-			"NOTIFY_RESPONSE_AVAILABLE received: index %d len %d",
+			"NOTIFY_RESPONSE_AVAILABLE received: index %d len %d\n",
 			le16_to_cpu(dr->wIndex), le16_to_cpu(dr->wLength));
 		break;
 
 	case USB_CDC_NOTIFY_NETWORK_CONNECTION:
 
 		dev_dbg(&desc->intf->dev,
-			"NOTIFY_NETWORK_CONNECTION %s network",
+			"NOTIFY_NETWORK_CONNECTION %s network\n",
 			dr->wValue ? "connected to" : "disconnected from");
 		goto exit;
 	case USB_CDC_NOTIFY_SPEED_CHANGE:
-		dev_dbg(&desc->intf->dev, "SPEED_CHANGE received (len %u)",
+		dev_dbg(&desc->intf->dev, "SPEED_CHANGE received (len %u)\n",
 			urb->actual_length);
 		goto exit;
 	default:
@@ -274,8 +314,7 @@
 		&& !test_bit(WDM_DISCONNECTING, &desc->flags)
 		&& !test_bit(WDM_SUSPENDING, &desc->flags)) {
 		rv = usb_submit_urb(desc->response, GFP_ATOMIC);
-		dev_dbg(&desc->intf->dev, "%s: usb_submit_urb %d",
-			__func__, rv);
+		dev_dbg(&desc->intf->dev, "submit response URB %d\n", rv);
 	}
 	spin_unlock(&desc->iuspin);
 	if (rv < 0) {
@@ -417,7 +456,7 @@
 		rv = usb_translate_errors(rv);
 		goto out_free_mem_pm;
 	} else {
-		dev_dbg(&desc->intf->dev, "Tx URB has been submitted index=%d",
+		dev_dbg(&desc->intf->dev, "Tx URB has been submitted index=%d\n",
 			le16_to_cpu(req->wIndex));
 	}
 
@@ -436,17 +475,14 @@
 }
 
 /*
- * clear WDM_READ flag and possibly submit the read urb if resp_count
- * is non-zero.
+ * Submit the read urb if resp_count is non-zero.
  *
  * Called with desc->iuspin locked
  */
-static int clear_wdm_read_flag(struct wdm_device *desc)
+static int service_outstanding_interrupt(struct wdm_device *desc)
 {
 	int rv = 0;
 
-	clear_bit(WDM_READ, &desc->flags);
-
 	/* submit read urb only if the device is waiting for it */
 	if (!desc->resp_count || !--desc->resp_count)
 		goto out;
@@ -537,8 +573,9 @@
 		}
 
 		if (!desc->reslength) { /* zero length read */
-			dev_dbg(&desc->intf->dev, "%s: zero length - clearing WDM_READ\n", __func__);
-			rv = clear_wdm_read_flag(desc);
+			dev_dbg(&desc->intf->dev, "zero length - clearing WDM_READ\n");
+			clear_bit(WDM_READ, &desc->flags);
+			rv = service_outstanding_interrupt(desc);
 			spin_unlock_irq(&desc->iuspin);
 			if (rv < 0)
 				goto err;
@@ -563,8 +600,10 @@
 
 	desc->length -= cntr;
 	/* in case we had outstanding data */
-	if (!desc->length)
-		clear_wdm_read_flag(desc);
+	if (!desc->length) {
+		clear_bit(WDM_READ, &desc->flags);
+		service_outstanding_interrupt(desc);
+	}
 	spin_unlock_irq(&desc->iuspin);
 	rv = cntr;
 
@@ -647,6 +686,17 @@
 			dev_err(&desc->intf->dev,
 				"Error submitting int urb - %d\n", rv);
 			rv = usb_translate_errors(rv);
+		} else if (test_bit(WDM_DRAIN_ON_OPEN, &desc->flags)) {
+			/*
+			 * Some devices keep pending messages queued
+			 * without resending notifications.  We must
+			 * flush the message queue before we can
+			 * assume a one-to-one relationship between
+			 * notifications and messages in the queue
+			 */
+			dev_dbg(&desc->intf->dev, "draining queued data\n");
+			set_bit(WDM_RESPONDING, &desc->flags);
+			rv = usb_submit_urb(desc->response, GFP_KERNEL);
 		}
 	} else {
 		rv = 0;
@@ -673,7 +723,7 @@
 
 	if (!desc->count) {
 		if (!test_bit(WDM_DISCONNECTING, &desc->flags)) {
-			dev_dbg(&desc->intf->dev, "wdm_release: cleanup");
+			dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n");
 			kill_urbs(desc);
 			spin_lock_irq(&desc->iuspin);
 			desc->resp_count = 0;
@@ -753,7 +803,8 @@
 /* --- hotplug --- */
 
 static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor *ep,
-		u16 bufsize, int (*manage_power)(struct usb_interface *, int))
+		u16 bufsize, int (*manage_power)(struct usb_interface *, int),
+		bool drain_on_open)
 {
 	int rv = -ENOMEM;
 	struct wdm_device *desc;
@@ -840,6 +891,68 @@
 
 	desc->manage_power = manage_power;
 
+	/*
+	 * "drain_on_open" enables a hack to work around a firmware
+	 * issue observed on network functions, in particular MBIM
+	 * functions.
+	 *
+	 * Quoting section 7 of the CDC-WMC r1.1 specification:
+	 *
+	 *  "The firmware shall interpret GetEncapsulatedResponse as a
+	 *   request to read response bytes. The firmware shall send
+	 *   the next wLength bytes from the response. The firmware
+	 *   shall allow the host to retrieve data using any number of
+	 *   GetEncapsulatedResponse requests. The firmware shall
+	 *   return a zero- length reply if there are no data bytes
+	 *   available.
+	 *
+	 *   The firmware shall send ResponseAvailable notifications
+	 *   periodically, using any appropriate algorithm, to inform
+	 *   the host that there is data available in the reply
+	 *   buffer. The firmware is allowed to send ResponseAvailable
+	 *   notifications even if there is no data available, but
+	 *   this will obviously reduce overall performance."
+	 *
+	 * These requirements, although they make equally sense, are
+	 * often not implemented by network functions. Some firmwares
+	 * will queue data indefinitely, without ever resending a
+	 * notification. The result is that the driver and firmware
+	 * loses "syncronization" if the driver ever fails to respond
+	 * to a single notification, something which easily can happen
+	 * on release(). When this happens, the driver will appear to
+	 * never receive notifications for the most current data. Each
+	 * notification will only cause a single read, which returns
+	 * the oldest data in the firmware's queue.
+	 *
+	 * The "drain_on_open" hack resolves the situation by draining
+	 * data from the firmware until none is returned, without a
+	 * prior notification.
+	 *
+	 * This will inevitably race with the firmware, risking that
+	 * we read data from the device before handling the associated
+	 * notification. To make things worse, some of the devices
+	 * needing the hack do not implement the "return zero if no
+	 * data is available" requirement either. Instead they return
+	 * an error on the subsequent read in this case.  This means
+	 * that "winning" the race can cause an unexpected EIO to
+	 * userspace.
+	 *
+	 * "winning" the race is more likely on resume() than on
+	 * open(), and the unexpected error is more harmful in the
+	 * middle of an open session. The hack is therefore only
+	 * applied on open(), and not on resume() where it logically
+	 * would be equally necessary. So we define open() as the only
+	 * driver <-> device "syncronization point".  Should we happen
+	 * to lose a notification after open(), then syncronization
+	 * will be lost until release()
+	 *
+	 * The hack should not be enabled for CDC WDM devices
+	 * conforming to the CDC-WMC r1.1 specification.  This is
+	 * ensured by setting drain_on_open to false in wdm_probe().
+	 */
+	if (drain_on_open)
+		set_bit(WDM_DRAIN_ON_OPEN, &desc->flags);
+
 	spin_lock(&wdm_device_list_lock);
 	list_add(&desc->device_list, &wdm_device_list);
 	spin_unlock(&wdm_device_list_lock);
@@ -893,7 +1006,7 @@
 		goto err;
 	ep = &iface->endpoint[0].desc;
 
-	rv = wdm_create(intf, ep, maxcom, &wdm_manage_power);
+	rv = wdm_create(intf, ep, maxcom, &wdm_manage_power, false);
 
 err:
 	return rv;
@@ -925,7 +1038,7 @@
 {
 	int rv = -EINVAL;
 
-	rv = wdm_create(intf, ep, bufsize, manage_power);
+	rv = wdm_create(intf, ep, bufsize, manage_power, true);
 	if (rv < 0)
 		goto err;
 
@@ -967,7 +1080,7 @@
 	if (!desc->count)
 		cleanup(desc);
 	else
-		dev_dbg(&intf->dev, "%s: %d open files - postponing cleanup\n", __func__, desc->count);
+		dev_dbg(&intf->dev, "%d open files - postponing cleanup\n", desc->count);
 	mutex_unlock(&wdm_mutex);
 }
 
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 917a55c..a6c1fae 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -141,6 +141,7 @@
 	struct usbtmc_device_data *data = to_usbtmc_data(kref);
 
 	usb_put_dev(data->usb_dev);
+	kfree(data);
 }
 
 static int usbtmc_open(struct inode *inode, struct file *filp)
@@ -1379,7 +1380,7 @@
 
 	dev_dbg(&intf->dev, "%s called\n", __func__);
 
-	data = devm_kzalloc(&intf->dev, sizeof(*data), GFP_KERNEL);
+	data = kmalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
@@ -1467,10 +1468,8 @@
 	if (data->iin_ep_present) {
 		/* allocate int urb */
 		data->iin_urb = usb_alloc_urb(0, GFP_KERNEL);
-		if (!data->iin_urb) {
-			dev_err(&intf->dev, "Failed to allocate int urb\n");
+		if (!data->iin_urb)
 			goto error_register;
-		}
 
 		/* will reference data in int urb */
 		kref_get(&data->kref);
@@ -1478,10 +1477,8 @@
 		/* allocate buffer for interrupt in */
 		data->iin_buffer = kmalloc(data->iin_wMaxPacketSize,
 					GFP_KERNEL);
-		if (!data->iin_buffer) {
-			dev_err(&intf->dev, "Failed to allocate int buf\n");
+		if (!data->iin_buffer)
 			goto error_register;
-		}
 
 		/* fill interrupt urb */
 		usb_fill_int_urb(data->iin_urb, data->usb_dev,
diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index 01c0c04..8b31770 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -21,13 +21,13 @@
 
 int ulpi_read(struct ulpi *ulpi, u8 addr)
 {
-	return ulpi->ops->read(ulpi->ops, addr);
+	return ulpi->ops->read(ulpi->dev.parent, addr);
 }
 EXPORT_SYMBOL_GPL(ulpi_read);
 
 int ulpi_write(struct ulpi *ulpi, u8 addr, u8 val)
 {
-	return ulpi->ops->write(ulpi->ops, addr, val);
+	return ulpi->ops->write(ulpi->dev.parent, addr, val);
 }
 EXPORT_SYMBOL_GPL(ulpi_write);
 
@@ -127,16 +127,17 @@
  *
  * Registers a driver with the ULPI bus.
  */
-int ulpi_register_driver(struct ulpi_driver *drv)
+int __ulpi_register_driver(struct ulpi_driver *drv, struct module *module)
 {
 	if (!drv->probe)
 		return -EINVAL;
 
+	drv->driver.owner = module;
 	drv->driver.bus = &ulpi_bus;
 
 	return driver_register(&drv->driver);
 }
-EXPORT_SYMBOL_GPL(ulpi_register_driver);
+EXPORT_SYMBOL_GPL(__ulpi_register_driver);
 
 /**
  * ulpi_unregister_driver - unregister a driver with the ULPI bus
@@ -156,6 +157,8 @@
 {
 	int ret;
 
+	ulpi->dev.parent = dev; /* needed early for ops */
+
 	/* Test the interface */
 	ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa);
 	if (ret < 0)
@@ -174,7 +177,6 @@
 	ulpi->id.product = ulpi_read(ulpi, ULPI_PRODUCT_ID_LOW);
 	ulpi->id.product |= ulpi_read(ulpi, ULPI_PRODUCT_ID_HIGH) << 8;
 
-	ulpi->dev.parent = dev;
 	ulpi->dev.bus = &ulpi_bus;
 	ulpi->dev.type = &ulpi_dev_type;
 	dev_set_name(&ulpi->dev, "%s.ulpi", dev_name(dev));
@@ -201,7 +203,8 @@
  * Allocates and registers a ULPI device and an interface for it. Called from
  * the USB controller that provides the ULPI interface.
  */
-struct ulpi *ulpi_register_interface(struct device *dev, struct ulpi_ops *ops)
+struct ulpi *ulpi_register_interface(struct device *dev,
+				     const struct ulpi_ops *ops)
 {
 	struct ulpi *ulpi;
 	int ret;
@@ -211,7 +214,6 @@
 		return ERR_PTR(-ENOMEM);
 
 	ulpi->ops = ops;
-	ops->dev = dev;
 
 	ret = ulpi_register(dev, ulpi);
 	if (ret) {
diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index dd28010..0e5a889 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig
@@ -83,23 +83,10 @@
 	  Implements OTG Finite State Machine as specified in On-The-Go
 	  and Embedded Host Supplement to the USB Revision 2.0 Specification.
 
-config USB_ULPI_BUS
-	tristate "USB ULPI PHY interface support"
-	depends on USB_SUPPORT
+config USB_LEDS_TRIGGER_USBPORT
+	tristate "USB port LED trigger"
+	depends on USB && LEDS_TRIGGERS
 	help
-	  UTMI+ Low Pin Interface (ULPI) is specification for a commonly used
-	  USB 2.0 PHY interface. The ULPI specification defines a standard set
-	  of registers that can be used to detect the vendor and product which
-	  allows ULPI to be handled as a bus. This module is the driver for that
-	  bus.
-
-	  The ULPI interfaces (the buses) are registered by the drivers for USB
-	  controllers which support ULPI register access and have ULPI PHY
-	  attached to them. The ULPI PHY drivers themselves are normal PHY
-	  drivers.
-
-	  ULPI PHYs provide often functions such as ADP sensing/probing (OTG
-	  protocol) and USB charger detection.
-
-	  To compile this driver as a module, choose M here: the module will
-	  be called ulpi.
+	  This driver allows LEDs to be controlled by USB events. Enabling this
+	  trigger allows specifying list of USB ports that should turn on LED
+	  when some USB device gets connected.
diff --git a/drivers/usb/core/Makefile b/drivers/usb/core/Makefile
index 9780877..b99b871 100644
--- a/drivers/usb/core/Makefile
+++ b/drivers/usb/core/Makefile
@@ -5,9 +5,12 @@
 usbcore-y := usb.o hub.o hcd.o urb.o message.o driver.o
 usbcore-y += config.o file.o buffer.o sysfs.o endpoint.o
 usbcore-y += devio.o notify.o generic.o quirks.o devices.o
-usbcore-y += port.o of.o
+usbcore-y += port.o
 
+usbcore-$(CONFIG_OF)		+= of.o
 usbcore-$(CONFIG_PCI)		+= hcd-pci.o
 usbcore-$(CONFIG_ACPI)		+= usb-acpi.o
 
 obj-$(CONFIG_USB)		+= usbcore.o
+
+obj-$(CONFIG_USB_LEDS_TRIGGER_USBPORT)	+= ledtrig-usbport.o
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 31ccdcc..a2d90ac 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -171,6 +171,31 @@
 							ep, buffer, size);
 }
 
+static const unsigned short low_speed_maxpacket_maxes[4] = {
+	[USB_ENDPOINT_XFER_CONTROL] = 8,
+	[USB_ENDPOINT_XFER_ISOC] = 0,
+	[USB_ENDPOINT_XFER_BULK] = 0,
+	[USB_ENDPOINT_XFER_INT] = 8,
+};
+static const unsigned short full_speed_maxpacket_maxes[4] = {
+	[USB_ENDPOINT_XFER_CONTROL] = 64,
+	[USB_ENDPOINT_XFER_ISOC] = 1023,
+	[USB_ENDPOINT_XFER_BULK] = 64,
+	[USB_ENDPOINT_XFER_INT] = 64,
+};
+static const unsigned short high_speed_maxpacket_maxes[4] = {
+	[USB_ENDPOINT_XFER_CONTROL] = 64,
+	[USB_ENDPOINT_XFER_ISOC] = 1024,
+	[USB_ENDPOINT_XFER_BULK] = 512,
+	[USB_ENDPOINT_XFER_INT] = 1024,
+};
+static const unsigned short super_speed_maxpacket_maxes[4] = {
+	[USB_ENDPOINT_XFER_CONTROL] = 512,
+	[USB_ENDPOINT_XFER_ISOC] = 1024,
+	[USB_ENDPOINT_XFER_BULK] = 1024,
+	[USB_ENDPOINT_XFER_INT] = 1024,
+};
+
 static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
     int asnum, struct usb_host_interface *ifp, int num_ep,
     unsigned char *buffer, int size)
@@ -179,6 +204,8 @@
 	struct usb_endpoint_descriptor *d;
 	struct usb_host_endpoint *endpoint;
 	int n, i, j, retval;
+	unsigned int maxp;
+	const unsigned short *maxpacket_maxes;
 
 	d = (struct usb_endpoint_descriptor *) buffer;
 	buffer += d->bLength;
@@ -213,8 +240,10 @@
 	memcpy(&endpoint->desc, d, n);
 	INIT_LIST_HEAD(&endpoint->urb_list);
 
-	/* Fix up bInterval values outside the legal range. Use 32 ms if no
-	 * proper value can be guessed. */
+	/*
+	 * Fix up bInterval values outside the legal range.
+	 * Use 10 or 8 ms if no proper value can be guessed.
+	 */
 	i = 0;		/* i = min, j = max, n = default */
 	j = 255;
 	if (usb_endpoint_xfer_int(d)) {
@@ -223,13 +252,15 @@
 		case USB_SPEED_SUPER_PLUS:
 		case USB_SPEED_SUPER:
 		case USB_SPEED_HIGH:
-			/* Many device manufacturers are using full-speed
+			/*
+			 * Many device manufacturers are using full-speed
 			 * bInterval values in high-speed interrupt endpoint
-			 * descriptors. Try to fix those and fall back to a
-			 * 32 ms default value otherwise. */
+			 * descriptors. Try to fix those and fall back to an
+			 * 8-ms default value otherwise.
+			 */
 			n = fls(d->bInterval*8);
 			if (n == 0)
-				n = 9;	/* 32 ms = 2^(9-1) uframes */
+				n = 7;	/* 8 ms = 2^(7-1) uframes */
 			j = 16;
 
 			/*
@@ -244,10 +275,12 @@
 			}
 			break;
 		default:		/* USB_SPEED_FULL or _LOW */
-			/* For low-speed, 10 ms is the official minimum.
+			/*
+			 * For low-speed, 10 ms is the official minimum.
 			 * But some "overclocked" devices might want faster
-			 * polling so we'll allow it. */
-			n = 32;
+			 * polling so we'll allow it.
+			 */
+			n = 10;
 			break;
 		}
 	} else if (usb_endpoint_xfer_isoc(d)) {
@@ -255,10 +288,10 @@
 		j = 16;
 		switch (to_usb_device(ddev)->speed) {
 		case USB_SPEED_HIGH:
-			n = 9;		/* 32 ms = 2^(9-1) uframes */
+			n = 7;		/* 8 ms = 2^(7-1) uframes */
 			break;
 		default:		/* USB_SPEED_FULL */
-			n = 6;		/* 32 ms = 2^(6-1) frames */
+			n = 4;		/* 8 ms = 2^(4-1) frames */
 			break;
 		}
 	}
@@ -286,6 +319,42 @@
 			endpoint->desc.wMaxPacketSize = cpu_to_le16(8);
 	}
 
+	/* Validate the wMaxPacketSize field */
+	maxp = usb_endpoint_maxp(&endpoint->desc);
+
+	/* Find the highest legal maxpacket size for this endpoint */
+	i = 0;		/* additional transactions per microframe */
+	switch (to_usb_device(ddev)->speed) {
+	case USB_SPEED_LOW:
+		maxpacket_maxes = low_speed_maxpacket_maxes;
+		break;
+	case USB_SPEED_FULL:
+		maxpacket_maxes = full_speed_maxpacket_maxes;
+		break;
+	case USB_SPEED_HIGH:
+		/* Bits 12..11 are allowed only for HS periodic endpoints */
+		if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) {
+			i = maxp & (BIT(12) | BIT(11));
+			maxp &= ~i;
+		}
+		/* fallthrough */
+	default:
+		maxpacket_maxes = high_speed_maxpacket_maxes;
+		break;
+	case USB_SPEED_SUPER:
+	case USB_SPEED_SUPER_PLUS:
+		maxpacket_maxes = super_speed_maxpacket_maxes;
+		break;
+	}
+	j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)];
+
+	if (maxp > j) {
+		dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n",
+		    cfgno, inum, asnum, d->bEndpointAddress, maxp, j);
+		maxp = j;
+		endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp);
+	}
+
 	/*
 	 * Some buggy high speed devices have bulk endpoints using
 	 * maxpacket sizes other than 512.  High speed HCDs may not
@@ -293,9 +362,6 @@
 	 */
 	if (to_usb_device(ddev)->speed == USB_SPEED_HIGH
 			&& usb_endpoint_xfer_bulk(d)) {
-		unsigned maxp;
-
-		maxp = usb_endpoint_maxp(&endpoint->desc) & 0x07ff;
 		if (maxp != 512)
 			dev_warn(ddev, "config %d interface %d altsetting %d "
 				"bulk endpoint 0x%X has invalid maxpacket %d\n",
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index e9f5043..09c8d9c 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -241,7 +241,8 @@
 		goto error_decrease_mem;
 	}
 
-	mem = usb_alloc_coherent(ps->dev, size, GFP_USER, &dma_handle);
+	mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN,
+			&dma_handle);
 	if (!mem) {
 		ret = -ENOMEM;
 		goto error_free_usbm;
@@ -1708,11 +1709,17 @@
 	as->urb->start_frame = uurb->start_frame;
 	as->urb->number_of_packets = number_of_packets;
 	as->urb->stream_id = stream_id;
-	if (uurb->type == USBDEVFS_URB_TYPE_ISO ||
-			ps->dev->speed == USB_SPEED_HIGH)
-		as->urb->interval = 1 << min(15, ep->desc.bInterval - 1);
-	else
-		as->urb->interval = ep->desc.bInterval;
+
+	if (ep->desc.bInterval) {
+		if (uurb->type == USBDEVFS_URB_TYPE_ISO ||
+				ps->dev->speed == USB_SPEED_HIGH ||
+				ps->dev->speed >= USB_SPEED_SUPER)
+			as->urb->interval = 1 <<
+					min(15, ep->desc.bInterval - 1);
+		else
+			as->urb->interval = ep->desc.bInterval;
+	}
+
 	as->urb->context = as;
 	as->urb->complete = async_completed;
 	for (totlen = u = 0; u < number_of_packets; u++) {
@@ -2582,7 +2589,9 @@
 	if (file->f_mode & FMODE_WRITE && !list_empty(&ps->async_completed))
 		mask |= POLLOUT | POLLWRNORM;
 	if (!connected(ps))
-		mask |= POLLERR | POLLHUP;
+		mask |= POLLHUP;
+	if (list_empty(&ps->list))
+		mask |= POLLERR;
 	return mask;
 }
 
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index d2e3f65..479e223 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -46,6 +46,7 @@
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
 #include <linux/usb/phy.h>
+#include <linux/usb/otg.h>
 
 #include "usb.h"
 
@@ -2517,10 +2518,8 @@
 	struct usb_hcd *hcd;
 
 	hcd = kzalloc(sizeof(*hcd) + driver->hcd_priv_size, GFP_KERNEL);
-	if (!hcd) {
-		dev_dbg (dev, "hcd alloc failed\n");
+	if (!hcd)
 		return NULL;
-	}
 	if (primary_hcd == NULL) {
 		hcd->address0_mutex = kmalloc(sizeof(*hcd->address0_mutex),
 				GFP_KERNEL);
@@ -3033,7 +3032,7 @@
 
 /*-------------------------------------------------------------------------*/
 
-#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
+#if IS_ENABLED(CONFIG_USB_MON)
 
 const struct usb_mon_operations *mon_ops;
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index bee1351..cbb1467 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1052,14 +1052,11 @@
 
 	/* Continue a partial initialization */
 	if (type == HUB_INIT2 || type == HUB_INIT3) {
-		device_lock(hub->intfdev);
+		device_lock(&hdev->dev);
 
 		/* Was the hub disconnected while we were waiting? */
-		if (hub->disconnected) {
-			device_unlock(hub->intfdev);
-			kref_put(&hub->kref, hub_release);
-			return;
-		}
+		if (hub->disconnected)
+			goto disconnected;
 		if (type == HUB_INIT2)
 			goto init2;
 		goto init3;
@@ -1262,7 +1259,7 @@
 			queue_delayed_work(system_power_efficient_wq,
 					&hub->init_work,
 					msecs_to_jiffies(delay));
-			device_unlock(hub->intfdev);
+			device_unlock(&hdev->dev);
 			return;		/* Continues at init3: below */
 		} else {
 			msleep(delay);
@@ -1281,12 +1278,12 @@
 	/* Scan all ports that need attention */
 	kick_hub_wq(hub);
 
-	/* Allow autosuspend if it was suppressed */
-	if (type <= HUB_INIT3)
+	if (type == HUB_INIT2 || type == HUB_INIT3) {
+		/* Allow autosuspend if it was suppressed */
+ disconnected:
 		usb_autopm_put_interface_async(to_usb_interface(hub->intfdev));
-
-	if (type == HUB_INIT2 || type == HUB_INIT3)
-		device_unlock(hub->intfdev);
+		device_unlock(&hdev->dev);
+	}
 
 	kref_put(&hub->kref, hub_release);
 }
@@ -1315,8 +1312,6 @@
 	struct usb_device *hdev = hub->hdev;
 	int i;
 
-	cancel_delayed_work_sync(&hub->init_work);
-
 	/* hub_wq and related activity won't re-trigger */
 	hub->quiescing = 1;
 
@@ -1828,10 +1823,8 @@
 	dev_info(&intf->dev, "USB hub found\n");
 
 	hub = kzalloc(sizeof(*hub), GFP_KERNEL);
-	if (!hub) {
-		dev_dbg(&intf->dev, "couldn't kmalloc hub struct\n");
+	if (!hub)
 		return -ENOMEM;
-	}
 
 	kref_init(&hub->kref);
 	hub->intfdev = &intf->dev;
@@ -3111,7 +3104,7 @@
 				USB_CTRL_SET_TIMEOUT);
 	else
 		return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
-				USB_REQ_CLEAR_FEATURE, USB_RECIP_INTERFACE,
+				USB_REQ_SET_FEATURE, USB_RECIP_INTERFACE,
 				USB_INTRF_FUNC_SUSPEND,	0, NULL, 0,
 				USB_CTRL_SET_TIMEOUT);
 }
@@ -5342,11 +5335,10 @@
 	}
 
 	buf = kmalloc(len, GFP_NOIO);
-	if (buf == NULL) {
-		dev_err(&udev->dev, "no mem to re-read configs after reset\n");
+	if (!buf)
 		/* assume the worst */
 		return 1;
-	}
+
 	for (index = 0; index < udev->descriptor.bNumConfigurations; index++) {
 		old_length = le16_to_cpu(udev->config[index].desc.wTotalLength);
 		length = usb_get_descriptor(udev, USB_DT_CONFIG, index, buf,
diff --git a/drivers/usb/core/ledtrig-usbport.c b/drivers/usb/core/ledtrig-usbport.c
new file mode 100644
index 0000000..3ed5162
--- /dev/null
+++ b/drivers/usb/core/ledtrig-usbport.c
@@ -0,0 +1,314 @@
+/*
+ * USB port LED trigger
+ *
+ * Copyright (C) 2016 Rafał Miłecki <rafal@milecki.pl>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+
+struct usbport_trig_data {
+	struct led_classdev *led_cdev;
+	struct list_head ports;
+	struct notifier_block nb;
+	int count; /* Amount of connected matching devices */
+};
+
+struct usbport_trig_port {
+	struct usbport_trig_data *data;
+	struct usb_device *hub;
+	int portnum;
+	char *port_name;
+	bool observed;
+	struct device_attribute attr;
+	struct list_head list;
+};
+
+/***************************************
+ * Helpers
+ ***************************************/
+
+/**
+ * usbport_trig_usb_dev_observed - Check if dev is connected to observed port
+ */
+static bool usbport_trig_usb_dev_observed(struct usbport_trig_data *usbport_data,
+					  struct usb_device *usb_dev)
+{
+	struct usbport_trig_port *port;
+
+	if (!usb_dev->parent)
+		return false;
+
+	list_for_each_entry(port, &usbport_data->ports, list) {
+		if (usb_dev->parent == port->hub &&
+		    usb_dev->portnum == port->portnum)
+			return port->observed;
+	}
+
+	return false;
+}
+
+static int usbport_trig_usb_dev_check(struct usb_device *usb_dev, void *data)
+{
+	struct usbport_trig_data *usbport_data = data;
+
+	if (usbport_trig_usb_dev_observed(usbport_data, usb_dev))
+		usbport_data->count++;
+
+	return 0;
+}
+
+/**
+ * usbport_trig_update_count - Recalculate amount of connected matching devices
+ */
+static void usbport_trig_update_count(struct usbport_trig_data *usbport_data)
+{
+	struct led_classdev *led_cdev = usbport_data->led_cdev;
+
+	usbport_data->count = 0;
+	usb_for_each_dev(usbport_data, usbport_trig_usb_dev_check);
+	led_cdev->brightness_set(led_cdev,
+				 usbport_data->count ? LED_FULL : LED_OFF);
+}
+
+/***************************************
+ * Device attr
+ ***************************************/
+
+static ssize_t usbport_trig_port_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct usbport_trig_port *port = container_of(attr,
+						      struct usbport_trig_port,
+						      attr);
+
+	return sprintf(buf, "%d\n", port->observed) + 1;
+}
+
+static ssize_t usbport_trig_port_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	struct usbport_trig_port *port = container_of(attr,
+						      struct usbport_trig_port,
+						      attr);
+
+	if (!strcmp(buf, "0") || !strcmp(buf, "0\n"))
+		port->observed = 0;
+	else if (!strcmp(buf, "1") || !strcmp(buf, "1\n"))
+		port->observed = 1;
+	else
+		return -EINVAL;
+
+	usbport_trig_update_count(port->data);
+
+	return size;
+}
+
+static struct attribute *ports_attrs[] = {
+	NULL,
+};
+static const struct attribute_group ports_group = {
+	.name = "ports",
+	.attrs = ports_attrs,
+};
+
+/***************************************
+ * Adding & removing ports
+ ***************************************/
+
+static int usbport_trig_add_port(struct usbport_trig_data *usbport_data,
+				 struct usb_device *usb_dev,
+				 const char *hub_name, int portnum)
+{
+	struct led_classdev *led_cdev = usbport_data->led_cdev;
+	struct usbport_trig_port *port;
+	size_t len;
+	int err;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	port->data = usbport_data;
+	port->hub = usb_dev;
+	port->portnum = portnum;
+
+	len = strlen(hub_name) + 8;
+	port->port_name = kzalloc(len, GFP_KERNEL);
+	if (!port->port_name) {
+		err = -ENOMEM;
+		goto err_free_port;
+	}
+	snprintf(port->port_name, len, "%s-port%d", hub_name, portnum);
+
+	port->attr.attr.name = port->port_name;
+	port->attr.attr.mode = S_IRUSR | S_IWUSR;
+	port->attr.show = usbport_trig_port_show;
+	port->attr.store = usbport_trig_port_store;
+
+	err = sysfs_add_file_to_group(&led_cdev->dev->kobj, &port->attr.attr,
+				      ports_group.name);
+	if (err)
+		goto err_free_port_name;
+
+	list_add_tail(&port->list, &usbport_data->ports);
+
+	return 0;
+
+err_free_port_name:
+	kfree(port->port_name);
+err_free_port:
+	kfree(port);
+err_out:
+	return err;
+}
+
+static int usbport_trig_add_usb_dev_ports(struct usb_device *usb_dev,
+					  void *data)
+{
+	struct usbport_trig_data *usbport_data = data;
+	int i;
+
+	for (i = 1; i <= usb_dev->maxchild; i++)
+		usbport_trig_add_port(usbport_data, usb_dev,
+				      dev_name(&usb_dev->dev), i);
+
+	return 0;
+}
+
+static void usbport_trig_remove_port(struct usbport_trig_data *usbport_data,
+				     struct usbport_trig_port *port)
+{
+	struct led_classdev *led_cdev = usbport_data->led_cdev;
+
+	list_del(&port->list);
+	sysfs_remove_file_from_group(&led_cdev->dev->kobj, &port->attr.attr,
+				     ports_group.name);
+	kfree(port->port_name);
+	kfree(port);
+}
+
+static void usbport_trig_remove_usb_dev_ports(struct usbport_trig_data *usbport_data,
+					      struct usb_device *usb_dev)
+{
+	struct usbport_trig_port *port, *tmp;
+
+	list_for_each_entry_safe(port, tmp, &usbport_data->ports, list) {
+		if (port->hub == usb_dev)
+			usbport_trig_remove_port(usbport_data, port);
+	}
+}
+
+/***************************************
+ * Init, exit, etc.
+ ***************************************/
+
+static int usbport_trig_notify(struct notifier_block *nb, unsigned long action,
+			       void *data)
+{
+	struct usbport_trig_data *usbport_data =
+		container_of(nb, struct usbport_trig_data, nb);
+	struct led_classdev *led_cdev = usbport_data->led_cdev;
+	struct usb_device *usb_dev = data;
+	bool observed;
+
+	observed = usbport_trig_usb_dev_observed(usbport_data, usb_dev);
+
+	switch (action) {
+	case USB_DEVICE_ADD:
+		usbport_trig_add_usb_dev_ports(usb_dev, usbport_data);
+		if (observed && usbport_data->count++ == 0)
+			led_cdev->brightness_set(led_cdev, LED_FULL);
+		return NOTIFY_OK;
+	case USB_DEVICE_REMOVE:
+		usbport_trig_remove_usb_dev_ports(usbport_data, usb_dev);
+		if (observed && --usbport_data->count == 0)
+			led_cdev->brightness_set(led_cdev, LED_OFF);
+		return NOTIFY_OK;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static void usbport_trig_activate(struct led_classdev *led_cdev)
+{
+	struct usbport_trig_data *usbport_data;
+	int err;
+
+	usbport_data = kzalloc(sizeof(*usbport_data), GFP_KERNEL);
+	if (!usbport_data)
+		return;
+	usbport_data->led_cdev = led_cdev;
+
+	/* List of ports */
+	INIT_LIST_HEAD(&usbport_data->ports);
+	err = sysfs_create_group(&led_cdev->dev->kobj, &ports_group);
+	if (err)
+		goto err_free;
+	usb_for_each_dev(usbport_data, usbport_trig_add_usb_dev_ports);
+
+	/* Notifications */
+	usbport_data->nb.notifier_call = usbport_trig_notify,
+	led_cdev->trigger_data = usbport_data;
+	usb_register_notify(&usbport_data->nb);
+
+	led_cdev->activated = true;
+	return;
+
+err_free:
+	kfree(usbport_data);
+}
+
+static void usbport_trig_deactivate(struct led_classdev *led_cdev)
+{
+	struct usbport_trig_data *usbport_data = led_cdev->trigger_data;
+	struct usbport_trig_port *port, *tmp;
+
+	if (!led_cdev->activated)
+		return;
+
+	list_for_each_entry_safe(port, tmp, &usbport_data->ports, list) {
+		usbport_trig_remove_port(usbport_data, port);
+	}
+
+	usb_unregister_notify(&usbport_data->nb);
+
+	sysfs_remove_group(&led_cdev->dev->kobj, &ports_group);
+
+	kfree(usbport_data);
+
+	led_cdev->activated = false;
+}
+
+static struct led_trigger usbport_led_trigger = {
+	.name     = "usbport",
+	.activate = usbport_trig_activate,
+	.deactivate = usbport_trig_deactivate,
+};
+
+static int __init usbport_trig_init(void)
+{
+	return led_trigger_register(&usbport_led_trigger);
+}
+
+static void __exit usbport_trig_exit(void)
+{
+	led_trigger_unregister(&usbport_led_trigger);
+}
+
+module_init(usbport_trig_init);
+module_exit(usbport_trig_exit);
+
+MODULE_AUTHOR("Rafał Miłecki <rafal@milecki.pl>");
+MODULE_DESCRIPTION("USB port trigger");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 0406a59..3a47077 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1760,17 +1760,14 @@
 		nintf = cp->desc.bNumInterfaces;
 		new_interfaces = kmalloc(nintf * sizeof(*new_interfaces),
 				GFP_NOIO);
-		if (!new_interfaces) {
-			dev_err(&dev->dev, "Out of memory\n");
+		if (!new_interfaces)
 			return -ENOMEM;
-		}
 
 		for (; n < nintf; ++n) {
 			new_interfaces[n] = kzalloc(
 					sizeof(struct usb_interface),
 					GFP_NOIO);
 			if (!new_interfaces[n]) {
-				dev_err(&dev->dev, "Out of memory\n");
 				ret = -ENOMEM;
 free_interfaces:
 				while (--n >= 0)
@@ -1862,7 +1859,12 @@
 		intf->dev.bus = &usb_bus_type;
 		intf->dev.type = &usb_if_device_type;
 		intf->dev.groups = usb_interface_groups;
+		/*
+		 * Please refer to usb_alloc_dev() to see why we set
+		 * dma_mask and dma_pfn_offset.
+		 */
 		intf->dev.dma_mask = dev->dev.dma_mask;
+		intf->dev.dma_pfn_offset = dev->dev.dma_pfn_offset;
 		INIT_WORK(&intf->reset_ws, __usb_queue_reset_device);
 		intf->minor = -1;
 		device_initialize(&intf->dev);
diff --git a/drivers/usb/core/of.c b/drivers/usb/core/of.c
index 2289700..3de4f88 100644
--- a/drivers/usb/core/of.c
+++ b/drivers/usb/core/of.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/of.h>
+#include <linux/usb/of.h>
 
 /**
  * usb_of_get_child_node - Find the device node match port number
diff --git a/drivers/usb/core/otg_whitelist.h b/drivers/usb/core/otg_whitelist.h
index a95b0c9..085049d 100644
--- a/drivers/usb/core/otg_whitelist.h
+++ b/drivers/usb/core/otg_whitelist.h
@@ -38,7 +38,7 @@
 { USB_DEVICE(0x0525, 0xa4a2), },
 #endif
 
-#if	defined(CONFIG_USB_TEST) || defined(CONFIG_USB_TEST_MODULE)
+#if	IS_ENABLED(CONFIG_USB_TEST)
 /* gadget zero, for testing */
 { USB_DEVICE(0x0525, 0xa4a0), },
 #endif
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index c601e25..a903969 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -68,10 +68,8 @@
 	urb = kmalloc(sizeof(struct urb) +
 		iso_packets * sizeof(struct usb_iso_packet_descriptor),
 		mem_flags);
-	if (!urb) {
-		printk(KERN_ERR "alloc_urb: kmalloc failed\n");
+	if (!urb)
 		return NULL;
-	}
 	usb_init_urb(urb);
 	return urb;
 }
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 5e80697..5921514 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -440,7 +440,18 @@
 	dev->dev.bus = &usb_bus_type;
 	dev->dev.type = &usb_device_type;
 	dev->dev.groups = usb_device_groups;
+	/*
+	 * Fake a dma_mask/offset for the USB device:
+	 * We cannot really use the dma-mapping API (dma_alloc_* and
+	 * dma_map_*) for USB devices but instead need to use
+	 * usb_alloc_coherent and pass data in 'urb's, but some subsystems
+	 * manually look into the mask/offset pair to determine whether
+	 * they need bounce buffers.
+	 * Note: calling dma_set_mask() on a USB device would set the
+	 * mask for the entire HCD, so don't do that.
+	 */
 	dev->dev.dma_mask = bus->controller->dma_mask;
+	dev->dev.dma_pfn_offset = bus->controller->dma_pfn_offset;
 	set_dev_node(&dev->dev, dev_to_node(bus->controller));
 	dev->state = USB_STATE_ATTACHED;
 	dev->lpm_disable_count = 1;
diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index 4135a5f..fa9b26b 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -238,6 +238,77 @@
 	return ret;
 }
 
+/**
+ * dwc2_wait_for_mode() - Waits for the controller mode.
+ * @hsotg:	Programming view of the DWC_otg controller.
+ * @host_mode:	If true, waits for host mode, otherwise device mode.
+ */
+static void dwc2_wait_for_mode(struct dwc2_hsotg *hsotg,
+			       bool host_mode)
+{
+	ktime_t start;
+	ktime_t end;
+	unsigned int timeout = 110;
+
+	dev_vdbg(hsotg->dev, "Waiting for %s mode\n",
+		 host_mode ? "host" : "device");
+
+	start = ktime_get();
+
+	while (1) {
+		s64 ms;
+
+		if (dwc2_is_host_mode(hsotg) == host_mode) {
+			dev_vdbg(hsotg->dev, "%s mode set\n",
+				 host_mode ? "Host" : "Device");
+			break;
+		}
+
+		end = ktime_get();
+		ms = ktime_to_ms(ktime_sub(end, start));
+
+		if (ms >= (s64)timeout) {
+			dev_warn(hsotg->dev, "%s: Couldn't set %s mode\n",
+				 __func__, host_mode ? "host" : "device");
+			break;
+		}
+
+		usleep_range(1000, 2000);
+	}
+}
+
+/**
+ * dwc2_iddig_filter_enabled() - Returns true if the IDDIG debounce
+ * filter is enabled.
+ */
+static bool dwc2_iddig_filter_enabled(struct dwc2_hsotg *hsotg)
+{
+	u32 gsnpsid;
+	u32 ghwcfg4;
+
+	if (!dwc2_hw_is_otg(hsotg))
+		return false;
+
+	/* Check if core configuration includes the IDDIG filter. */
+	ghwcfg4 = dwc2_readl(hsotg->regs + GHWCFG4);
+	if (!(ghwcfg4 & GHWCFG4_IDDIG_FILT_EN))
+		return false;
+
+	/*
+	 * Check if the IDDIG debounce filter is bypassed. Available
+	 * in core version >= 3.10a.
+	 */
+	gsnpsid = dwc2_readl(hsotg->regs + GSNPSID);
+	if (gsnpsid >= DWC2_CORE_REV_3_10a) {
+		u32 gotgctl = dwc2_readl(hsotg->regs + GOTGCTL);
+
+		if (gotgctl & GOTGCTL_DBNCE_FLTR_BYPASS)
+			return false;
+	}
+
+	return true;
+}
+
 /*
  * Do core a soft reset of the core.  Be careful with this because it
  * resets all the internal state machines of the core.
@@ -246,9 +317,30 @@
 {
 	u32 greset;
 	int count = 0;
+	bool wait_for_host_mode = false;
 
 	dev_vdbg(hsotg->dev, "%s()\n", __func__);
 
+	/*
+	 * If the current mode is host, either due to the force mode
+	 * bit being set (which persists after core reset) or the
+	 * connector id pin, a core soft reset will temporarily reset
+	 * the mode to device. A delay from the IDDIG debounce filter
+	 * will occur before going back to host mode.
+	 *
+	 * Determine whether we will go back into host mode after a
+	 * reset and account for this delay after the reset.
+	 */
+	if (dwc2_iddig_filter_enabled(hsotg)) {
+		u32 gotgctl = dwc2_readl(hsotg->regs + GOTGCTL);
+		u32 gusbcfg = dwc2_readl(hsotg->regs + GUSBCFG);
+
+		if (!(gotgctl & GOTGCTL_CONID_B) ||
+		    (gusbcfg & GUSBCFG_FORCEHOSTMODE)) {
+			wait_for_host_mode = true;
+		}
+	}
+
 	/* Core Soft Reset */
 	greset = dwc2_readl(hsotg->regs + GRSTCTL);
 	greset |= GRSTCTL_CSFTRST;
@@ -277,6 +369,9 @@
 		}
 	} while (!(greset & GRSTCTL_AHBIDLE));
 
+	if (wait_for_host_mode)
+		dwc2_wait_for_mode(hsotg, true);
+
 	return 0;
 }
 
@@ -300,9 +395,9 @@
  * Checks are done in this function to determine whether doing a force
  * would be valid or not.
  *
- * If a force is done, it requires a 25ms delay to take effect.
- *
- * Returns true if the mode was forced.
+ * If a force is done, it requires a IDDIG debounce filter delay if
+ * the filter is configured and enabled. We poll the current mode of
+ * the controller to account for this delay.
  */
 static bool dwc2_force_mode(struct dwc2_hsotg *hsotg, bool host)
 {
@@ -337,12 +432,18 @@
 	gusbcfg |= set;
 	dwc2_writel(gusbcfg, hsotg->regs + GUSBCFG);
 
-	msleep(25);
+	dwc2_wait_for_mode(hsotg, host);
 	return true;
 }
 
-/*
- * Clears the force mode bits.
+/**
+ * dwc2_clear_force_mode() - Clears the force mode bits.
+ *
+ * After clearing the bits, wait up to 100 ms to account for any
+ * potential IDDIG filter delay. We can't know if we expect this delay
+ * or not because the value of the connector ID status is affected by
+ * the force mode. We only need to call this once during probe if
+ * dr_mode == OTG.
  */
 static void dwc2_clear_force_mode(struct dwc2_hsotg *hsotg)
 {
@@ -353,11 +454,8 @@
 	gusbcfg &= ~GUSBCFG_FORCEDEVMODE;
 	dwc2_writel(gusbcfg, hsotg->regs + GUSBCFG);
 
-	/*
-	 * NOTE: This long sleep is _very_ important, otherwise the core will
-	 * not stay in host mode after a connector ID change!
-	 */
-	msleep(25);
+	if (dwc2_iddig_filter_enabled(hsotg))
+		usleep_range(100000, 110000);
 }
 
 /*
@@ -380,12 +478,6 @@
 			 __func__, hsotg->dr_mode);
 		break;
 	}
-
-	/*
-	 * NOTE: This is required for some rockchip soc based
-	 * platforms.
-	 */
-	msleep(50);
 }
 
 /*
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 9fae029..aad4107 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -259,13 +259,6 @@
 	DWC2_L3,	/* Off state */
 };
 
-/*
- * Gadget periodic tx fifo sizes as used by legacy driver
- * EP0 is not included
- */
-#define DWC2_G_P_LEGACY_TX_FIFO_SIZE {256, 256, 256, 256, 768, 768, 768, \
-					   768, 0, 0, 0, 0, 0, 0, 0}
-
 /* Gadget ep0 states */
 enum dwc2_ep0_state {
 	DWC2_EP0_SETUP,
@@ -868,6 +861,7 @@
 	void *priv;
 	int     irq;
 	struct clk *clk;
+	struct reset_control *reset;
 
 	unsigned int queuing_high_bandwidth:1;
 	unsigned int srp_success:1;
@@ -889,6 +883,7 @@
 #define DWC2_CORE_REV_2_92a	0x4f54292a
 #define DWC2_CORE_REV_2_94a	0x4f54294a
 #define DWC2_CORE_REV_3_00a	0x4f54300a
+#define DWC2_CORE_REV_3_10a	0x4f54310a
 
 #if IS_ENABLED(CONFIG_USB_DWC2_HOST) || IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE)
 	union dwc2_hcd_internal_flags {
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index af46adf..4cd6403 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -186,9 +186,10 @@
  */
 static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg)
 {
-	unsigned int ep;
+	unsigned int fifo;
 	unsigned int addr;
 	int timeout;
+	u32 dptxfsizn;
 	u32 val;
 
 	/* Reset fifo map if not correctly cleared during previous session */
@@ -216,16 +217,16 @@
 	 * them to endpoints dynamically according to maxpacket size value of
 	 * given endpoint.
 	 */
-	for (ep = 1; ep < MAX_EPS_CHANNELS; ep++) {
-		if (!hsotg->g_tx_fifo_sz[ep])
-			continue;
-		val = addr;
-		val |= hsotg->g_tx_fifo_sz[ep] << FIFOSIZE_DEPTH_SHIFT;
-		WARN_ONCE(addr + hsotg->g_tx_fifo_sz[ep] > hsotg->fifo_mem,
-			  "insufficient fifo memory");
-		addr += hsotg->g_tx_fifo_sz[ep];
+	for (fifo = 1; fifo < MAX_EPS_CHANNELS; fifo++) {
+		dptxfsizn = dwc2_readl(hsotg->regs + DPTXFSIZN(fifo));
 
-		dwc2_writel(val, hsotg->regs + DPTXFSIZN(ep));
+		val = (dptxfsizn & FIFOSIZE_DEPTH_MASK) | addr;
+		addr += dptxfsizn >> FIFOSIZE_DEPTH_SHIFT;
+
+		if (addr > hsotg->fifo_mem)
+			break;
+
+		dwc2_writel(val, hsotg->regs + DPTXFSIZN(fifo));
 	}
 
 	/*
@@ -388,7 +389,8 @@
 			return -ENOSPC;
 		}
 	} else if (hsotg->dedicated_fifos && hs_ep->index != 0) {
-		can_write = dwc2_readl(hsotg->regs + DTXFSTS(hs_ep->index));
+		can_write = dwc2_readl(hsotg->regs +
+				DTXFSTS(hs_ep->fifo_index));
 
 		can_write &= 0xffff;
 		can_write *= 4;
@@ -2432,7 +2434,7 @@
 
 	if (!hsotg->dedicated_fifos)
 		return;
-	size = (dwc2_readl(hsotg->regs + DTXFSTS(ep->index)) & 0xffff) * 4;
+	size = (dwc2_readl(hsotg->regs + DTXFSTS(ep->fifo_index)) & 0xffff) * 4;
 	if (size < ep->fifo_size)
 		dwc2_hsotg_txfifo_flush(hsotg, ep->fifo_index);
 }
@@ -3041,22 +3043,11 @@
 		break;
 	}
 
-	/* If fifo is already allocated for this ep */
-	if (hs_ep->fifo_index) {
-		size =  hs_ep->ep.maxpacket * hs_ep->mc;
-		/* If bigger fifo is required deallocate current one */
-		if (size > hs_ep->fifo_size) {
-			hsotg->fifo_map &= ~(1 << hs_ep->fifo_index);
-			hs_ep->fifo_index = 0;
-			hs_ep->fifo_size = 0;
-		}
-	}
-
 	/*
 	 * if the hardware has dedicated fifos, we must give each IN EP
 	 * a unique tx-fifo even if it is non-periodic.
 	 */
-	if (dir_in && hsotg->dedicated_fifos && !hs_ep->fifo_index) {
+	if (dir_in && hsotg->dedicated_fifos) {
 		u32 fifo_index = 0;
 		u32 fifo_size = UINT_MAX;
 		size = hs_ep->ep.maxpacket*hs_ep->mc;
@@ -3129,10 +3120,6 @@
 
 	spin_lock_irqsave(&hsotg->lock, flags);
 
-	hsotg->fifo_map &= ~(1<<hs_ep->fifo_index);
-	hs_ep->fifo_index = 0;
-	hs_ep->fifo_size = 0;
-
 	ctrl = dwc2_readl(hsotg->regs + epctrl_reg);
 	ctrl &= ~DXEPCTL_EPENA;
 	ctrl &= ~DXEPCTL_USBACTEP;
@@ -3147,6 +3134,10 @@
 	/* terminate all requests with shutdown */
 	kill_all_requests(hsotg, hs_ep, -ESHUTDOWN);
 
+	hsotg->fifo_map &= ~(1 << hs_ep->fifo_index);
+	hs_ep->fifo_index = 0;
+	hs_ep->fifo_size = 0;
+
 	spin_unlock_irqrestore(&hsotg->lock, flags);
 	return 0;
 }
@@ -3475,8 +3466,11 @@
 		otg_set_peripheral(hsotg->uphy->otg, &hsotg->gadget);
 
 	spin_lock_irqsave(&hsotg->lock, flags);
-	dwc2_hsotg_init(hsotg);
-	dwc2_hsotg_core_init_disconnected(hsotg, false);
+	if (dwc2_hw_is_device(hsotg)) {
+		dwc2_hsotg_init(hsotg);
+		dwc2_hsotg_core_init_disconnected(hsotg, false);
+	}
+
 	hsotg->enabled = 0;
 	spin_unlock_irqrestore(&hsotg->lock, flags);
 
@@ -3813,36 +3807,10 @@
 static void dwc2_hsotg_of_probe(struct dwc2_hsotg *hsotg)
 {
 	struct device_node *np = hsotg->dev->of_node;
-	u32 len = 0;
-	u32 i = 0;
 
 	/* Enable dma if requested in device tree */
 	hsotg->g_using_dma = of_property_read_bool(np, "g-use-dma");
 
-	/*
-	* Register TX periodic fifo size per endpoint.
-	* EP0 is excluded since it has no fifo configuration.
-	*/
-	if (!of_find_property(np, "g-tx-fifo-size", &len))
-		goto rx_fifo;
-
-	len /= sizeof(u32);
-
-	/* Read tx fifo sizes other than ep0 */
-	if (of_property_read_u32_array(np, "g-tx-fifo-size",
-						&hsotg->g_tx_fifo_sz[1], len))
-		goto rx_fifo;
-
-	/* Add ep0 */
-	len++;
-
-	/* Make remaining TX fifos unavailable */
-	if (len < MAX_EPS_CHANNELS) {
-		for (i = len; i < MAX_EPS_CHANNELS; i++)
-			hsotg->g_tx_fifo_sz[i] = 0;
-	}
-
-rx_fifo:
 	/* Register RX fifo size */
 	of_property_read_u32(np, "g-rx-fifo-size", &hsotg->g_rx_fifo_sz);
 
@@ -3864,13 +3832,10 @@
 	struct device *dev = hsotg->dev;
 	int epnum;
 	int ret;
-	int i;
-	u32 p_tx_fifo[] = DWC2_G_P_LEGACY_TX_FIFO_SIZE;
 
 	/* Initialize to legacy fifo configuration values */
 	hsotg->g_rx_fifo_sz = 2048;
 	hsotg->g_np_g_tx_fifo_sz = 1024;
-	memcpy(&hsotg->g_tx_fifo_sz[1], p_tx_fifo, sizeof(p_tx_fifo));
 	/* Device tree specific probe */
 	dwc2_hsotg_of_probe(hsotg);
 
@@ -3888,9 +3853,6 @@
 	dev_dbg(dev, "NonPeriodic TXFIFO size: %d\n",
 						hsotg->g_np_g_tx_fifo_sz);
 	dev_dbg(dev, "RXFIFO size: %d\n", hsotg->g_rx_fifo_sz);
-	for (i = 0; i < MAX_EPS_CHANNELS; i++)
-		dev_dbg(dev, "Periodic TXFIFO%2d size: %d\n", i,
-						hsotg->g_tx_fifo_sz[i]);
 
 	hsotg->gadget.max_speed = USB_SPEED_HIGH;
 	hsotg->gadget.ops = &dwc2_hsotg_gadget_ops;
@@ -3908,17 +3870,13 @@
 
 	hsotg->ctrl_buff = devm_kzalloc(hsotg->dev,
 			DWC2_CTRL_BUFF_SIZE, GFP_KERNEL);
-	if (!hsotg->ctrl_buff) {
-		dev_err(dev, "failed to allocate ctrl request buff\n");
+	if (!hsotg->ctrl_buff)
 		return -ENOMEM;
-	}
 
 	hsotg->ep0_buff = devm_kzalloc(hsotg->dev,
 			DWC2_CTRL_BUFF_SIZE, GFP_KERNEL);
-	if (!hsotg->ep0_buff) {
-		dev_err(dev, "failed to allocate ctrl reply buff\n");
+	if (!hsotg->ep0_buff)
 		return -ENOMEM;
-	}
 
 	ret = devm_request_irq(hsotg->dev, irq, dwc2_hsotg_irq, IRQF_SHARED,
 				dev_name(hsotg->dev), hsotg);
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 2df3d04..df5a065 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -5040,7 +5040,7 @@
 
 	/* Create new workqueue and init work */
 	retval = -ENOMEM;
-	hsotg->wq_otg = create_singlethread_workqueue("dwc2");
+	hsotg->wq_otg = alloc_ordered_workqueue("dwc2", 0);
 	if (!hsotg->wq_otg) {
 		dev_err(hsotg->dev, "Failed to create workqueue\n");
 		goto error2;
diff --git a/drivers/usb/dwc2/hw.h b/drivers/usb/dwc2/hw.h
index efc3bcd..9105844 100644
--- a/drivers/usb/dwc2/hw.h
+++ b/drivers/usb/dwc2/hw.h
@@ -48,6 +48,7 @@
 #define GOTGCTL_ASESVLD			(1 << 18)
 #define GOTGCTL_DBNC_SHORT		(1 << 17)
 #define GOTGCTL_CONID_B			(1 << 16)
+#define GOTGCTL_DBNCE_FLTR_BYPASS	(1 << 15)
 #define GOTGCTL_DEVHNPEN		(1 << 11)
 #define GOTGCTL_HSTSETHNPEN		(1 << 10)
 #define GOTGCTL_HNPREQ			(1 << 9)
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index fc6f525..530959a 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -45,6 +45,7 @@
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_data/s3c-hsotg.h>
+#include <linux/reset.h>
 
 #include <linux/usb/of.h>
 
@@ -337,6 +338,24 @@
 {
 	int i, ret;
 
+	hsotg->reset = devm_reset_control_get_optional(hsotg->dev, "dwc2");
+	if (IS_ERR(hsotg->reset)) {
+		ret = PTR_ERR(hsotg->reset);
+		switch (ret) {
+		case -ENOENT:
+		case -ENOTSUPP:
+			hsotg->reset = NULL;
+			break;
+		default:
+			dev_err(hsotg->dev, "error getting reset control %d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	if (hsotg->reset)
+		reset_control_deassert(hsotg->reset);
+
 	/* Set default UTMI width */
 	hsotg->phyif = GUSBCFG_PHYIF16;
 
@@ -434,6 +453,9 @@
 	if (hsotg->ll_hw_enabled)
 		dwc2_lowlevel_hw_disable(hsotg);
 
+	if (hsotg->reset)
+		reset_control_assert(hsotg->reset);
+
 	return 0;
 }
 
diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index a64ce1c..b97cde7 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -1,7 +1,7 @@
 config USB_DWC3
 	tristate "DesignWare USB3 DRD Core Support"
 	depends on (USB || USB_GADGET) && HAS_DMA
-	select USB_XHCI_PLATFORM if USB_SUPPORT && USB_XHCI_HCD
+	select USB_XHCI_PLATFORM if USB_XHCI_HCD
 	help
 	  Say Y or M here if your system has a Dual Role SuperSpeed
 	  USB controller based on the DesignWare USB3 IP Core.
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 9466431..7287a76 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -49,6 +49,57 @@
 
 #define DWC3_DEFAULT_AUTOSUSPEND_DELAY	5000 /* ms */
 
+/**
+ * dwc3_get_dr_mode - Validates and sets dr_mode
+ * @dwc: pointer to our context structure
+ */
+static int dwc3_get_dr_mode(struct dwc3 *dwc)
+{
+	enum usb_dr_mode mode;
+	struct device *dev = dwc->dev;
+	unsigned int hw_mode;
+
+	if (dwc->dr_mode == USB_DR_MODE_UNKNOWN)
+		dwc->dr_mode = USB_DR_MODE_OTG;
+
+	mode = dwc->dr_mode;
+	hw_mode = DWC3_GHWPARAMS0_MODE(dwc->hwparams.hwparams0);
+
+	switch (hw_mode) {
+	case DWC3_GHWPARAMS0_MODE_GADGET:
+		if (IS_ENABLED(CONFIG_USB_DWC3_HOST)) {
+			dev_err(dev,
+				"Controller does not support host mode.\n");
+			return -EINVAL;
+		}
+		mode = USB_DR_MODE_PERIPHERAL;
+		break;
+	case DWC3_GHWPARAMS0_MODE_HOST:
+		if (IS_ENABLED(CONFIG_USB_DWC3_GADGET)) {
+			dev_err(dev,
+				"Controller does not support device mode.\n");
+			return -EINVAL;
+		}
+		mode = USB_DR_MODE_HOST;
+		break;
+	default:
+		if (IS_ENABLED(CONFIG_USB_DWC3_HOST))
+			mode = USB_DR_MODE_HOST;
+		else if (IS_ENABLED(CONFIG_USB_DWC3_GADGET))
+			mode = USB_DR_MODE_PERIPHERAL;
+	}
+
+	if (mode != dwc->dr_mode) {
+		dev_warn(dev,
+			 "Configuration mismatch. dr_mode forced to %s\n",
+			 mode == USB_DR_MODE_HOST ? "host" : "gadget");
+
+		dwc->dr_mode = mode;
+	}
+
+	return 0;
+}
+
 void dwc3_set_mode(struct dwc3 *dwc, u32 mode)
 {
 	u32 reg;
@@ -448,6 +499,9 @@
 	if (dwc->dis_u3_susphy_quirk)
 		reg &= ~DWC3_GUSB3PIPECTL_SUSPHY;
 
+	if (dwc->dis_del_phy_power_chg_quirk)
+		reg &= ~DWC3_GUSB3PIPECTL_DEPOCHANGE;
+
 	dwc3_writel(dwc->regs, DWC3_GUSB3PIPECTL(0), reg);
 
 	reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0));
@@ -485,6 +539,23 @@
 		break;
 	}
 
+	switch (dwc->hsphy_mode) {
+	case USBPHY_INTERFACE_MODE_UTMI:
+		reg &= ~(DWC3_GUSB2PHYCFG_PHYIF_MASK |
+		       DWC3_GUSB2PHYCFG_USBTRDTIM_MASK);
+		reg |= DWC3_GUSB2PHYCFG_PHYIF(UTMI_PHYIF_8_BIT) |
+		       DWC3_GUSB2PHYCFG_USBTRDTIM(USBTRDTIM_UTMI_8_BIT);
+		break;
+	case USBPHY_INTERFACE_MODE_UTMIW:
+		reg &= ~(DWC3_GUSB2PHYCFG_PHYIF_MASK |
+		       DWC3_GUSB2PHYCFG_USBTRDTIM_MASK);
+		reg |= DWC3_GUSB2PHYCFG_PHYIF(UTMI_PHYIF_16_BIT) |
+		       DWC3_GUSB2PHYCFG_USBTRDTIM(USBTRDTIM_UTMI_16_BIT);
+		break;
+	default:
+		break;
+	}
+
 	/*
 	 * Above 1.94a, it is recommended to set DWC3_GUSB2PHYCFG_SUSPHY to
 	 * '0' during coreConsultant configuration. So default value will
@@ -500,6 +571,9 @@
 	if (dwc->dis_enblslpm_quirk)
 		reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
 
+	if (dwc->dis_u2_freeclk_exists_quirk)
+		reg &= ~DWC3_GUSB2PHYCFG_U2_FREECLK_EXISTS;
+
 	dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
 
 	return 0;
@@ -666,6 +740,32 @@
 		goto err4;
 	}
 
+	switch (dwc->dr_mode) {
+	case USB_DR_MODE_PERIPHERAL:
+		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_DEVICE);
+		break;
+	case USB_DR_MODE_HOST:
+		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_HOST);
+		break;
+	case USB_DR_MODE_OTG:
+		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_OTG);
+		break;
+	default:
+		dev_warn(dwc->dev, "Unsupported mode %d\n", dwc->dr_mode);
+		break;
+	}
+
+	/*
+	 * ENDXFER polling is available on version 3.10a and later of
+	 * the DWC_usb3 controller. It is NOT available in the
+	 * DWC_usb31 controller.
+	 */
+	if (!dwc3_is_usb31(dwc) && dwc->revision >= DWC3_REVISION_310A) {
+		reg = dwc3_readl(dwc->regs, DWC3_GUCTL2);
+		reg |= DWC3_GUCTL2_RST_ACTBITLATER;
+		dwc3_writel(dwc->regs, DWC3_GUCTL2, reg);
+	}
+
 	return 0;
 
 err4:
@@ -763,7 +863,6 @@
 
 	switch (dwc->dr_mode) {
 	case USB_DR_MODE_PERIPHERAL:
-		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_DEVICE);
 		ret = dwc3_gadget_init(dwc);
 		if (ret) {
 			if (ret != -EPROBE_DEFER)
@@ -772,7 +871,6 @@
 		}
 		break;
 	case USB_DR_MODE_HOST:
-		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_HOST);
 		ret = dwc3_host_init(dwc);
 		if (ret) {
 			if (ret != -EPROBE_DEFER)
@@ -781,7 +879,6 @@
 		}
 		break;
 	case USB_DR_MODE_OTG:
-		dwc3_set_mode(dwc, DWC3_GCTL_PRTCAP_OTG);
 		ret = dwc3_host_init(dwc);
 		if (ret) {
 			if (ret != -EPROBE_DEFER)
@@ -888,6 +985,7 @@
 
 	dwc->maximum_speed = usb_get_maximum_speed(dev);
 	dwc->dr_mode = usb_get_dr_mode(dev);
+	dwc->hsphy_mode = of_usb_get_phy_mode(dev->of_node);
 
 	dwc->has_lpm_erratum = device_property_read_bool(dev,
 				"snps,has-lpm-erratum");
@@ -924,6 +1022,10 @@
 				"snps,dis_enblslpm_quirk");
 	dwc->dis_rxdet_inp3_quirk = device_property_read_bool(dev,
 				"snps,dis_rxdet_inp3_quirk");
+	dwc->dis_u2_freeclk_exists_quirk = device_property_read_bool(dev,
+				"snps,dis-u2-freeclk-exists-quirk");
+	dwc->dis_del_phy_power_chg_quirk = device_property_read_bool(dev,
+				"snps,dis-del-phy-power-chg-quirk");
 
 	dwc->tx_de_emphasis_quirk = device_property_read_bool(dev,
 				"snps,tx_de_emphasis_quirk");
@@ -972,17 +1074,9 @@
 		goto err2;
 	}
 
-	if (IS_ENABLED(CONFIG_USB_DWC3_HOST) &&
-			(dwc->dr_mode == USB_DR_MODE_OTG ||
-					dwc->dr_mode == USB_DR_MODE_UNKNOWN))
-		dwc->dr_mode = USB_DR_MODE_HOST;
-	else if (IS_ENABLED(CONFIG_USB_DWC3_GADGET) &&
-			(dwc->dr_mode == USB_DR_MODE_OTG ||
-					dwc->dr_mode == USB_DR_MODE_UNKNOWN))
-		dwc->dr_mode = USB_DR_MODE_PERIPHERAL;
-
-	if (dwc->dr_mode == USB_DR_MODE_UNKNOWN)
-		dwc->dr_mode = USB_DR_MODE_OTG;
+	ret = dwc3_get_dr_mode(dwc);
+	if (ret)
+		goto err3;
 
 	ret = dwc3_alloc_scratch_buffers(dwc);
 	if (ret)
@@ -1192,6 +1286,7 @@
 	}
 
 	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put(dev);
 
 	return 0;
 }
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 45d6de5..6b60e42 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -109,6 +109,7 @@
 #define DWC3_GPRTBIMAP_HS1	0xc184
 #define DWC3_GPRTBIMAP_FS0	0xc188
 #define DWC3_GPRTBIMAP_FS1	0xc18c
+#define DWC3_GUCTL2		0xc19c
 
 #define DWC3_VER_NUMBER		0xc1a0
 #define DWC3_VER_TYPE		0xc1a4
@@ -199,9 +200,18 @@
 
 /* Global USB2 PHY Configuration Register */
 #define DWC3_GUSB2PHYCFG_PHYSOFTRST	(1 << 31)
+#define DWC3_GUSB2PHYCFG_U2_FREECLK_EXISTS	(1 << 30)
 #define DWC3_GUSB2PHYCFG_SUSPHY		(1 << 6)
 #define DWC3_GUSB2PHYCFG_ULPI_UTMI	(1 << 4)
 #define DWC3_GUSB2PHYCFG_ENBLSLPM	(1 << 8)
+#define DWC3_GUSB2PHYCFG_PHYIF(n)	(n << 3)
+#define DWC3_GUSB2PHYCFG_PHYIF_MASK	DWC3_GUSB2PHYCFG_PHYIF(1)
+#define DWC3_GUSB2PHYCFG_USBTRDTIM(n)	(n << 10)
+#define DWC3_GUSB2PHYCFG_USBTRDTIM_MASK	DWC3_GUSB2PHYCFG_USBTRDTIM(0xf)
+#define USBTRDTIM_UTMI_8_BIT		9
+#define USBTRDTIM_UTMI_16_BIT		5
+#define UTMI_PHYIF_16_BIT		1
+#define UTMI_PHYIF_8_BIT		0
 
 /* Global USB2 PHY Vendor Control Register */
 #define DWC3_GUSB2PHYACC_NEWREGREQ	(1 << 25)
@@ -235,7 +245,10 @@
 #define DWC3_GEVNTSIZ_SIZE(n)		((n) & 0xffff)
 
 /* Global HWPARAMS0 Register */
-#define DWC3_GHWPARAMS0_USB3_MODE(n)	((n) & 0x3)
+#define DWC3_GHWPARAMS0_MODE(n)		((n) & 0x3)
+#define DWC3_GHWPARAMS0_MODE_GADGET	0
+#define DWC3_GHWPARAMS0_MODE_HOST	1
+#define DWC3_GHWPARAMS0_MODE_DRD	2
 #define DWC3_GHWPARAMS0_MBUS_TYPE(n)	(((n) >> 3) & 0x7)
 #define DWC3_GHWPARAMS0_SBUS_TYPE(n)	(((n) >> 6) & 0x3)
 #define DWC3_GHWPARAMS0_MDWIDTH(n)	(((n) >> 8) & 0xff)
@@ -279,6 +292,9 @@
 #define DWC3_GFLADJ_30MHZ_SDBND_SEL		(1 << 7)
 #define DWC3_GFLADJ_30MHZ_MASK			0x3f
 
+/* Global User Control Register 2 */
+#define DWC3_GUCTL2_RST_ACTBITLATER		(1 << 14)
+
 /* Device Configuration Register */
 #define DWC3_DCFG_DEVADDR(addr)	((addr) << 3)
 #define DWC3_DCFG_DEVADDR_MASK	DWC3_DCFG_DEVADDR(0x7f)
@@ -685,6 +701,8 @@
  * @request: struct usb_request to be transferred
  * @list: a list_head used for request queueing
  * @dep: struct dwc3_ep owning this request
+ * @sg: pointer to first incomplete sg
+ * @num_pending_sgs: counter to pending sgs
  * @first_trb_index: index to first trb used by this request
  * @epnum: endpoint number to which this request refers
  * @trb: pointer to struct dwc3_trb
@@ -697,7 +715,9 @@
 	struct usb_request	request;
 	struct list_head	list;
 	struct dwc3_ep		*dep;
+	struct scatterlist	*sg;
 
+	unsigned		num_pending_sgs;
 	u8			first_trb_index;
 	u8			epnum;
 	struct dwc3_trb		*trb;
@@ -743,6 +763,9 @@
  * @maximum_speed: maximum speed requested (mainly for testing purposes)
  * @revision: revision register contents
  * @dr_mode: requested mode of operation
+ * @hsphy_mode: UTMI phy mode, one of following:
+ *		- USBPHY_INTERFACE_MODE_UTMI
+ *		- USBPHY_INTERFACE_MODE_UTMIW
  * @usb2_phy: pointer to USB2 PHY
  * @usb3_phy: pointer to USB3 PHY
  * @usb2_generic_phy: pointer to USB2 PHY
@@ -799,6 +822,11 @@
  * @dis_u2_susphy_quirk: set if we disable usb2 suspend phy
  * @dis_enblslpm_quirk: set if we clear enblslpm in GUSB2PHYCFG,
  *                      disabling the suspend signal to the PHY.
+ * @dis_u2_freeclk_exists_quirk : set if we clear u2_freeclk_exists
+ *			in GUSB2PHYCFG, specify that USB2 PHY doesn't
+ *			provide a free-running PHY clock.
+ * @dis_del_phy_power_chg_quirk: set if we disable delay phy power
+ *			change quirk.
  * @tx_de_emphasis_quirk: set if we enable Tx de-emphasis quirk
  * @tx_de_emphasis: Tx de-emphasis value
  * 	0	- -6dB de-emphasis
@@ -845,6 +873,7 @@
 	size_t			regs_size;
 
 	enum usb_dr_mode	dr_mode;
+	enum usb_phy_interface	hsphy_mode;
 
 	u32			fladj;
 	u32			irq_gadget;
@@ -880,6 +909,8 @@
 #define DWC3_REVISION_260A	0x5533260a
 #define DWC3_REVISION_270A	0x5533270a
 #define DWC3_REVISION_280A	0x5533280a
+#define DWC3_REVISION_300A	0x5533300a
+#define DWC3_REVISION_310A	0x5533310a
 
 /*
  * NOTICE: we're using bit 31 as a "is usb 3.1" flag. This is really
@@ -942,6 +973,8 @@
 	unsigned		dis_u2_susphy_quirk:1;
 	unsigned		dis_enblslpm_quirk:1;
 	unsigned		dis_rxdet_inp3_quirk:1;
+	unsigned		dis_u2_freeclk_exists_quirk:1;
+	unsigned		dis_del_phy_power_chg_quirk:1;
 
 	unsigned		tx_de_emphasis_quirk:1;
 	unsigned		tx_de_emphasis:2;
diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h
index 22dfc3d..33ab2a2 100644
--- a/drivers/usb/dwc3/debug.h
+++ b/drivers/usb/dwc3/debug.h
@@ -192,7 +192,7 @@
 	int ret;
 
 	ret = sprintf(str, "ep%d%s: ", epnum >> 1,
-			(epnum & 1) ? "in" : "in");
+			(epnum & 1) ? "in" : "out");
 	if (ret < 0)
 		return "UNKNOWN";
 
diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c
index 9743353..fe414e7 100644
--- a/drivers/usb/dwc3/dwc3-of-simple.c
+++ b/drivers/usb/dwc3/dwc3-of-simple.c
@@ -36,35 +36,25 @@
 	int			num_clocks;
 };
 
-static int dwc3_of_simple_probe(struct platform_device *pdev)
+static int dwc3_of_simple_clk_init(struct dwc3_of_simple *simple, int count)
 {
-	struct dwc3_of_simple	*simple;
-	struct device		*dev = &pdev->dev;
+	struct device		*dev = simple->dev;
 	struct device_node	*np = dev->of_node;
-
-	unsigned int		count;
-	int			ret;
 	int			i;
 
-	simple = devm_kzalloc(dev, sizeof(*simple), GFP_KERNEL);
-	if (!simple)
-		return -ENOMEM;
-
-	count = of_clk_get_parent_count(np);
-	if (!count)
-		return -ENOENT;
-
 	simple->num_clocks = count;
 
+	if (!count)
+		return 0;
+
 	simple->clks = devm_kcalloc(dev, simple->num_clocks,
 			sizeof(struct clk *), GFP_KERNEL);
 	if (!simple->clks)
 		return -ENOMEM;
 
-	simple->dev = dev;
-
 	for (i = 0; i < simple->num_clocks; i++) {
 		struct clk	*clk;
+		int		ret;
 
 		clk = of_clk_get(np, i);
 		if (IS_ERR(clk)) {
@@ -87,6 +77,29 @@
 		simple->clks[i] = clk;
 	}
 
+	return 0;
+}
+
+static int dwc3_of_simple_probe(struct platform_device *pdev)
+{
+	struct dwc3_of_simple	*simple;
+	struct device		*dev = &pdev->dev;
+	struct device_node	*np = dev->of_node;
+
+	int			ret;
+	int			i;
+
+	simple = devm_kzalloc(dev, sizeof(*simple), GFP_KERNEL);
+	if (!simple)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, simple);
+	simple->dev = dev;
+
+	ret = dwc3_of_simple_clk_init(simple, of_clk_get_parent_count(np));
+	if (ret)
+		return ret;
+
 	ret = of_platform_populate(np, NULL, NULL, dev);
 	if (ret) {
 		for (i = 0; i < simple->num_clocks; i++) {
@@ -111,7 +124,7 @@
 	int			i;
 
 	for (i = 0; i < simple->num_clocks; i++) {
-		clk_unprepare(simple->clks[i]);
+		clk_disable_unprepare(simple->clks[i]);
 		clk_put(simple->clks[i]);
 	}
 
@@ -161,7 +174,9 @@
 
 static const struct of_device_id of_dwc3_simple_match[] = {
 	{ .compatible = "qcom,dwc3" },
+	{ .compatible = "rockchip,rk3399-dwc3" },
 	{ .compatible = "xlnx,zynqmp-dwc3" },
+	{ .compatible = "cavium,octeon-7130-usb-uctl" },
 	{ /* Sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, of_dwc3_simple_match);
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 45f5a23..6df0f5d 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -37,6 +37,7 @@
 #define PCI_DEVICE_ID_INTEL_BXT			0x0aaa
 #define PCI_DEVICE_ID_INTEL_BXT_M		0x1aaa
 #define PCI_DEVICE_ID_INTEL_APL			0x5aaa
+#define PCI_DEVICE_ID_INTEL_KBP			0xa2b0
 
 static const struct acpi_gpio_params reset_gpios = { 0, 0, false };
 static const struct acpi_gpio_params cs_gpios = { 1, 0, false };
@@ -227,6 +228,7 @@
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), },
 	{  }	/* Terminating Entry */
 };
@@ -241,6 +243,15 @@
 	return -EBUSY;
 }
 
+static int dwc3_pci_runtime_resume(struct device *dev)
+{
+	struct platform_device *dwc3 = dev_get_drvdata(dev);
+
+	return pm_runtime_get(&dwc3->dev);
+}
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_PM_SLEEP
 static int dwc3_pci_pm_dummy(struct device *dev)
 {
 	/*
@@ -253,11 +264,11 @@
 	 */
 	return 0;
 }
-#endif /* CONFIG_PM */
+#endif /* CONFIG_PM_SLEEP */
 
 static struct dev_pm_ops dwc3_pci_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(dwc3_pci_pm_dummy, dwc3_pci_pm_dummy)
-	SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_pm_dummy,
+	SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_runtime_resume,
 		NULL)
 };
 
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 8f8c215..07cc892 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -174,15 +174,8 @@
 		int status)
 {
 	struct dwc3			*dwc = dep->dwc;
-	int				i;
 
-	if (req->started) {
-		i = 0;
-		do {
-			dwc3_ep_inc_deq(dep);
-		} while(++i < req->request.num_mapped_sgs);
-		req->started = false;
-	}
+	req->started = false;
 	list_del(&req->list);
 	req->trb = NULL;
 
@@ -348,7 +341,8 @@
 	 * IN transfers due to a mishandled error condition. Synopsys
 	 * STAR 9000614252.
 	 */
-	if (dep->direction && (dwc->revision >= DWC3_REVISION_260A))
+	if (dep->direction && (dwc->revision >= DWC3_REVISION_260A) &&
+	    (dwc->gadget.speed >= USB_SPEED_SUPER))
 		cmd |= DWC3_DEPCMD_CLEARPENDIN;
 
 	memset(&params, 0, sizeof(params));
@@ -490,7 +484,8 @@
 		params.param0 |= DWC3_DEPCFG_ACTION_INIT;
 	}
 
-	params.param1 = DWC3_DEPCFG_XFER_COMPLETE_EN;
+	if (usb_endpoint_xfer_control(desc))
+		params.param1 = DWC3_DEPCFG_XFER_COMPLETE_EN;
 
 	if (dep->number <= 1 || usb_endpoint_xfer_isoc(desc))
 		params.param1 |= DWC3_DEPCFG_XFER_NOT_READY_EN;
@@ -764,6 +759,8 @@
 	kfree(req);
 }
 
+static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep);
+
 /**
  * dwc3_prepare_one_trb - setup one TRB from one request
  * @dep: endpoint for which this request is prepared
@@ -771,15 +768,13 @@
  */
 static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
 		struct dwc3_request *req, dma_addr_t dma,
-		unsigned length, unsigned last, unsigned chain, unsigned node)
+		unsigned length, unsigned chain, unsigned node)
 {
 	struct dwc3_trb		*trb;
 
-	dwc3_trace(trace_dwc3_gadget, "%s: req %p dma %08llx length %d%s%s",
+	dwc3_trace(trace_dwc3_gadget, "%s: req %p dma %08llx length %d%s",
 			dep->name, req, (unsigned long long) dma,
-			length, last ? " last" : "",
-			chain ? " chain" : "");
-
+			length, chain ? " chain" : "");
 
 	trb = &dep->trb_pool[dep->trb_enqueue];
 
@@ -826,12 +821,10 @@
 	/* always enable Continue on Short Packet */
 	trb->ctrl |= DWC3_TRB_CTRL_CSP;
 
-	if (!req->request.no_interrupt && !chain)
+	if ((!req->request.no_interrupt && !chain) ||
+			(dwc3_calc_trbs_left(dep) == 0))
 		trb->ctrl |= DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_ISP_IMI;
 
-	if (last)
-		trb->ctrl |= DWC3_TRB_CTRL_LST;
-
 	if (chain)
 		trb->ctrl |= DWC3_TRB_CTRL_CHN;
 
@@ -856,12 +849,12 @@
  */
 static struct dwc3_trb *dwc3_ep_prev_trb(struct dwc3_ep *dep, u8 index)
 {
-	if (!index)
-		index = DWC3_TRB_NUM - 2;
-	else
-		index = dep->trb_enqueue - 1;
+	u8 tmp = index;
 
-	return &dep->trb_pool[index];
+	if (!tmp)
+		tmp = DWC3_TRB_NUM - 1;
+
+	return &dep->trb_pool[tmp - 1];
 }
 
 static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
@@ -894,65 +887,42 @@
 }
 
 static void dwc3_prepare_one_trb_sg(struct dwc3_ep *dep,
-		struct dwc3_request *req, unsigned int trbs_left,
-		unsigned int more_coming)
+		struct dwc3_request *req)
 {
-	struct usb_request *request = &req->request;
-	struct scatterlist *sg = request->sg;
+	struct scatterlist *sg = req->sg;
 	struct scatterlist *s;
-	unsigned int	last = false;
 	unsigned int	length;
 	dma_addr_t	dma;
 	int		i;
 
-	for_each_sg(sg, s, request->num_mapped_sgs, i) {
+	for_each_sg(sg, s, req->num_pending_sgs, i) {
 		unsigned chain = true;
 
 		length = sg_dma_len(s);
 		dma = sg_dma_address(s);
 
-		if (sg_is_last(s)) {
-			if (usb_endpoint_xfer_int(dep->endpoint.desc) ||
-				!more_coming)
-				last = true;
-
-			chain = false;
-		}
-
-		if (!trbs_left--)
-			last = true;
-
-		if (last)
+		if (sg_is_last(s))
 			chain = false;
 
 		dwc3_prepare_one_trb(dep, req, dma, length,
-				last, chain, i);
+				chain, i);
 
-		if (last)
+		if (!dwc3_calc_trbs_left(dep))
 			break;
 	}
 }
 
 static void dwc3_prepare_one_trb_linear(struct dwc3_ep *dep,
-		struct dwc3_request *req, unsigned int trbs_left,
-		unsigned int more_coming)
+		struct dwc3_request *req)
 {
-	unsigned int	last = false;
 	unsigned int	length;
 	dma_addr_t	dma;
 
 	dma = req->request.dma;
 	length = req->request.length;
 
-	if (!trbs_left)
-		last = true;
-
-	/* Is this the last request? */
-	if (usb_endpoint_xfer_int(dep->endpoint.desc) || !more_coming)
-		last = true;
-
 	dwc3_prepare_one_trb(dep, req, dma, length,
-			last, false, 0);
+			false, 0);
 }
 
 /*
@@ -966,26 +936,19 @@
 static void dwc3_prepare_trbs(struct dwc3_ep *dep)
 {
 	struct dwc3_request	*req, *n;
-	unsigned int		more_coming;
-	u32			trbs_left;
 
 	BUILD_BUG_ON_NOT_POWER_OF_2(DWC3_TRB_NUM);
 
-	trbs_left = dwc3_calc_trbs_left(dep);
-	if (!trbs_left)
+	if (!dwc3_calc_trbs_left(dep))
 		return;
 
-	more_coming = dep->allocated_requests - dep->queued_requests;
-
 	list_for_each_entry_safe(req, n, &dep->pending_list, list) {
-		if (req->request.num_mapped_sgs > 0)
-			dwc3_prepare_one_trb_sg(dep, req, trbs_left--,
-					more_coming);
+		if (req->num_pending_sgs > 0)
+			dwc3_prepare_one_trb_sg(dep, req);
 		else
-			dwc3_prepare_one_trb_linear(dep, req, trbs_left--,
-					more_coming);
+			dwc3_prepare_one_trb_linear(dep, req);
 
-		if (!trbs_left)
+		if (!dwc3_calc_trbs_left(dep))
 			return;
 	}
 }
@@ -1101,93 +1064,29 @@
 
 	trace_dwc3_ep_queue(req);
 
-	/*
-	 * We only add to our list of requests now and
-	 * start consuming the list once we get XferNotReady
-	 * IRQ.
-	 *
-	 * That way, we avoid doing anything that we don't need
-	 * to do now and defer it until the point we receive a
-	 * particular token from the Host side.
-	 *
-	 * This will also avoid Host cancelling URBs due to too
-	 * many NAKs.
-	 */
 	ret = usb_gadget_map_request(&dwc->gadget, &req->request,
 			dep->direction);
 	if (ret)
 		return ret;
 
+	req->sg			= req->request.sg;
+	req->num_pending_sgs	= req->request.num_mapped_sgs;
+
 	list_add_tail(&req->list, &dep->pending_list);
 
-	/*
-	 * If there are no pending requests and the endpoint isn't already
-	 * busy, we will just start the request straight away.
-	 *
-	 * This will save one IRQ (XFER_NOT_READY) and possibly make it a
-	 * little bit faster.
-	 */
-	if (!usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
-			!usb_endpoint_xfer_int(dep->endpoint.desc)) {
-		ret = __dwc3_gadget_kick_transfer(dep, 0);
-		goto out;
-	}
-
-	/*
-	 * There are a few special cases:
-	 *
-	 * 1. XferNotReady with empty list of requests. We need to kick the
-	 *    transfer here in that situation, otherwise we will be NAKing
-	 *    forever. If we get XferNotReady before gadget driver has a
-	 *    chance to queue a request, we will ACK the IRQ but won't be
-	 *    able to receive the data until the next request is queued.
-	 *    The following code is handling exactly that.
-	 *
-	 */
-	if (dep->flags & DWC3_EP_PENDING_REQUEST) {
-		/*
-		 * If xfernotready is already elapsed and it is a case
-		 * of isoc transfer, then issue END TRANSFER, so that
-		 * you can receive xfernotready again and can have
-		 * notion of current microframe.
-		 */
-		if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) {
-			if (list_empty(&dep->started_list)) {
-				dwc3_stop_active_transfer(dwc, dep->number, true);
-				dep->flags = DWC3_EP_ENABLED;
-			}
-			return 0;
-		}
-
-		ret = __dwc3_gadget_kick_transfer(dep, 0);
-		if (!ret)
-			dep->flags &= ~DWC3_EP_PENDING_REQUEST;
-
-		goto out;
-	}
-
-	/*
-	 * 2. XferInProgress on Isoc EP with an active transfer. We need to
-	 *    kick the transfer here after queuing a request, otherwise the
-	 *    core may not see the modified TRB(s).
-	 */
 	if (usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
-			(dep->flags & DWC3_EP_BUSY) &&
-			!(dep->flags & DWC3_EP_MISSED_ISOC)) {
-		WARN_ON_ONCE(!dep->resource_index);
-		ret = __dwc3_gadget_kick_transfer(dep, dep->resource_index);
-		goto out;
+			dep->flags & DWC3_EP_PENDING_REQUEST) {
+		if (list_empty(&dep->started_list)) {
+			dwc3_stop_active_transfer(dwc, dep->number, true);
+			dep->flags = DWC3_EP_ENABLED;
+		}
+		return 0;
 	}
 
-	/*
-	 * 4. Stream Capable Bulk Endpoints. We need to start the transfer
-	 * right away, otherwise host will not know we have streams to be
-	 * handled.
-	 */
-	if (dep->stream_capable)
-		ret = __dwc3_gadget_kick_transfer(dep, 0);
+	if (!dwc3_calc_trbs_left(dep))
+		return 0;
 
-out:
+	ret = __dwc3_gadget_kick_transfer(dep, 0);
 	if (ret && ret != -EBUSY)
 		dwc3_trace(trace_dwc3_gadget,
 				"%s: failed to kick transfers",
@@ -1433,7 +1332,7 @@
 
 static int __dwc3_gadget_wakeup(struct dwc3 *dwc)
 {
-	unsigned long		timeout;
+	int			retries;
 
 	int			ret;
 	u32			reg;
@@ -1484,9 +1383,9 @@
 	}
 
 	/* poll until Link State changes to ON */
-	timeout = jiffies + msecs_to_jiffies(100);
+	retries = 20000;
 
-	while (!time_after(jiffies, timeout)) {
+	while (retries--) {
 		reg = dwc3_readl(dwc->regs, DWC3_DSTS);
 
 		/* in HS, means ON */
@@ -1955,27 +1854,35 @@
 
 static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep,
 		struct dwc3_request *req, struct dwc3_trb *trb,
-		const struct dwc3_event_depevt *event, int status)
+		const struct dwc3_event_depevt *event, int status,
+		int chain)
 {
 	unsigned int		count;
 	unsigned int		s_pkt = 0;
 	unsigned int		trb_status;
 
 	dep->queued_requests--;
+	dwc3_ep_inc_deq(dep);
 	trace_dwc3_complete_trb(dep, trb);
 
+	/*
+	 * If we're in the middle of series of chained TRBs and we
+	 * receive a short transfer along the way, DWC3 will skip
+	 * through all TRBs including the last TRB in the chain (the
+	 * where CHN bit is zero. DWC3 will also avoid clearing HWO
+	 * bit and SW has to do it manually.
+	 *
+	 * We're going to do that here to avoid problems of HW trying
+	 * to use bogus TRBs for transfers.
+	 */
+	if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO))
+		trb->ctrl &= ~DWC3_TRB_CTRL_HWO;
+
 	if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN)
-		/*
-		 * We continue despite the error. There is not much we
-		 * can do. If we don't clean it up we loop forever. If
-		 * we skip the TRB then it gets overwritten after a
-		 * while since we use them in a ring buffer. A BUG()
-		 * would help. Lets hope that if this occurs, someone
-		 * fixes the root cause instead of looking away :)
-		 */
-		dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n",
-				dep->name, trb);
+		return 1;
+
 	count = trb->size & DWC3_TRB_SIZE_MASK;
+	req->request.actual += count;
 
 	if (dep->direction) {
 		if (count) {
@@ -2013,59 +1920,76 @@
 			s_pkt = 1;
 	}
 
-	/*
-	 * We assume here we will always receive the entire data block
-	 * which we should receive. Meaning, if we program RX to
-	 * receive 4K but we receive only 2K, we assume that's all we
-	 * should receive and we simply bounce the request back to the
-	 * gadget driver for further processing.
-	 */
-	req->request.actual += req->request.length - count;
-	if (s_pkt)
+	if (s_pkt && !chain)
 		return 1;
-	if ((event->status & DEPEVT_STATUS_LST) &&
-			(trb->ctrl & (DWC3_TRB_CTRL_LST |
-				DWC3_TRB_CTRL_HWO)))
-		return 1;
+
 	if ((event->status & DEPEVT_STATUS_IOC) &&
 			(trb->ctrl & DWC3_TRB_CTRL_IOC))
 		return 1;
+
 	return 0;
 }
 
 static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
 		const struct dwc3_event_depevt *event, int status)
 {
-	struct dwc3_request	*req;
+	struct dwc3_request	*req, *n;
 	struct dwc3_trb		*trb;
-	unsigned int		slot;
-	unsigned int		i;
+	bool			ioc = false;
 	int			ret;
 
-	do {
-		req = next_request(&dep->started_list);
-		if (WARN_ON_ONCE(!req))
-			return 1;
+	list_for_each_entry_safe(req, n, &dep->started_list, list) {
+		unsigned length;
+		unsigned actual;
+		int chain;
 
-		i = 0;
-		do {
-			slot = req->first_trb_index + i;
-			if (slot == DWC3_TRB_NUM - 1)
-				slot++;
-			slot %= DWC3_TRB_NUM;
-			trb = &dep->trb_pool[slot];
+		length = req->request.length;
+		chain = req->num_pending_sgs > 0;
+		if (chain) {
+			struct scatterlist *sg = req->sg;
+			struct scatterlist *s;
+			unsigned int pending = req->num_pending_sgs;
+			unsigned int i;
 
+			for_each_sg(sg, s, pending, i) {
+				trb = &dep->trb_pool[dep->trb_dequeue];
+
+				req->sg = sg_next(s);
+				req->num_pending_sgs--;
+
+				ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
+						event, status, chain);
+				if (ret)
+					break;
+			}
+		} else {
+			trb = &dep->trb_pool[dep->trb_dequeue];
 			ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
-					event, status);
-			if (ret)
-				break;
-		} while (++i < req->request.num_mapped_sgs);
+					event, status, chain);
+		}
+
+		/*
+		 * We assume here we will always receive the entire data block
+		 * which we should receive. Meaning, if we program RX to
+		 * receive 4K but we receive only 2K, we assume that's all we
+		 * should receive and we simply bounce the request back to the
+		 * gadget driver for further processing.
+		 */
+		actual = length - req->request.actual;
+		req->request.actual = actual;
+
+		if (ret && chain && (actual < length) && req->num_pending_sgs)
+			return __dwc3_gadget_kick_transfer(dep, 0);
 
 		dwc3_gadget_giveback(dep, req, status);
 
-		if (ret)
+		if (ret) {
+			if ((event->status & DEPEVT_STATUS_IOC) &&
+			    (trb->ctrl & DWC3_TRB_CTRL_IOC))
+				ioc = true;
 			break;
-	} while (1);
+		}
+	}
 
 	/*
 	 * Our endpoint might get disabled by another thread during
@@ -2092,10 +2016,9 @@
 		return 1;
 	}
 
-	if (usb_endpoint_xfer_isoc(dep->endpoint.desc))
-		if ((event->status & DEPEVT_STATUS_IOC) &&
-				(trb->ctrl & DWC3_TRB_CTRL_IOC))
-			return 0;
+	if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && ioc)
+		return 0;
+
 	return 1;
 }
 
@@ -2311,6 +2234,18 @@
 	 *
 	 * - Issue EndTransfer WITH CMDIOC bit set
 	 * - Wait 100us
+	 *
+	 * As of IP version 3.10a of the DWC_usb3 IP, the controller
+	 * supports a mode to work around the above limitation. The
+	 * software can poll the CMDACT bit in the DEPCMD register
+	 * after issuing a EndTransfer command. This mode is enabled
+	 * by writing GUCTL2[14]. This polling is already done in the
+	 * dwc3_send_gadget_ep_cmd() function so if the mode is
+	 * enabled, the EndTransfer command will have completed upon
+	 * returning from this function and we don't need to delay for
+	 * 100us.
+	 *
+	 * This mode is NOT available on the DWC_usb31 IP.
 	 */
 
 	cmd = DWC3_DEPCMD_ENDTRANSFER;
@@ -2322,7 +2257,9 @@
 	WARN_ON_ONCE(ret);
 	dep->resource_index = 0;
 	dep->flags &= ~DWC3_EP_BUSY;
-	udelay(100);
+
+	if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A)
+		udelay(100);
 }
 
 static void dwc3_stop_active_transfers(struct dwc3 *dwc)
diff --git a/drivers/usb/dwc3/ulpi.c b/drivers/usb/dwc3/ulpi.c
index ec004c6..bd86f84 100644
--- a/drivers/usb/dwc3/ulpi.c
+++ b/drivers/usb/dwc3/ulpi.c
@@ -35,9 +35,9 @@
 	return -ETIMEDOUT;
 }
 
-static int dwc3_ulpi_read(struct ulpi_ops *ops, u8 addr)
+static int dwc3_ulpi_read(struct device *dev, u8 addr)
 {
-	struct dwc3 *dwc = dev_get_drvdata(ops->dev);
+	struct dwc3 *dwc = dev_get_drvdata(dev);
 	u32 reg;
 	int ret;
 
@@ -53,9 +53,9 @@
 	return DWC3_GUSB2PHYACC_DATA(reg);
 }
 
-static int dwc3_ulpi_write(struct ulpi_ops *ops, u8 addr, u8 val)
+static int dwc3_ulpi_write(struct device *dev, u8 addr, u8 val)
 {
-	struct dwc3 *dwc = dev_get_drvdata(ops->dev);
+	struct dwc3 *dwc = dev_get_drvdata(dev);
 	u32 reg;
 
 	reg = DWC3_GUSB2PHYACC_NEWREGREQ | DWC3_ULPI_ADDR(addr);
@@ -65,7 +65,7 @@
 	return dwc3_ulpi_busyloop(dwc);
 }
 
-static struct ulpi_ops dwc3_ulpi_ops = {
+static const struct ulpi_ops dwc3_ulpi_ops = {
 	.read = dwc3_ulpi_read,
 	.write = dwc3_ulpi_write,
 };
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 3c3f31c..8ad2032 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -209,25 +209,6 @@
 config USB_F_TCM
 	tristate
 
-choice
-	tristate "USB Gadget Drivers"
-	default USB_ETH
-	help
-	  A Linux "Gadget Driver" talks to the USB Peripheral Controller
-	  driver through the abstract "gadget" API.  Some other operating
-	  systems call these "client" drivers, of which "class drivers"
-	  are a subset (implementing a USB device class specification).
-	  A gadget driver implements one or more USB functions using
-	  the peripheral hardware.
-
-	  Gadget drivers are hardware-neutral, or "platform independent",
-	  except that they sometimes must understand quirks or limitations
-	  of the particular controllers they work with.  For example, when
-	  a controller doesn't support alternate configurations or provide
-	  enough of the right types of endpoints, the gadget driver might
-	  not be able work with that controller, or might need to implement
-	  a less common variant of a device class protocol.
-
 # this first set of drivers all depend on bulk-capable hardware.
 
 config USB_CONFIGFS
@@ -439,6 +420,7 @@
 config USB_CONFIGFS_F_UVC
 	bool "USB Webcam function"
 	depends on USB_CONFIGFS
+	depends on VIDEO_V4L2
 	depends on VIDEO_DEV
 	select VIDEOBUF2_VMALLOC
 	select USB_F_UVC
@@ -475,6 +457,25 @@
 	  Both protocols can work on USB2.0 and USB3.0.
 	  UAS utilizes the USB 3.0 feature called streams support.
 
+choice
+	tristate "USB Gadget Drivers"
+	default USB_ETH
+	help
+	  A Linux "Gadget Driver" talks to the USB Peripheral Controller
+	  driver through the abstract "gadget" API.  Some other operating
+	  systems call these "client" drivers, of which "class drivers"
+	  are a subset (implementing a USB device class specification).
+	  A gadget driver implements one or more USB functions using
+	  the peripheral hardware.
+
+	  Gadget drivers are hardware-neutral, or "platform independent",
+	  except that they sometimes must understand quirks or limitations
+	  of the particular controllers they work with.  For example, when
+	  a controller doesn't support alternate configurations or provide
+	  enough of the right types of endpoints, the gadget driver might
+	  not be able work with that controller, or might need to implement
+	  a less common variant of a device class protocol.
+
 source "drivers/usb/gadget/legacy/Kconfig"
 
 endchoice
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index eb64848..32176f7 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1893,17 +1893,21 @@
 		/* functions always handle their interfaces and endpoints...
 		 * punt other recipients (other, WUSB, ...) to the current
 		 * configuration code.
-		 *
-		 * REVISIT it could make sense to let the composite device
-		 * take such requests too, if that's ever needed:  to work
-		 * in config 0, etc.
 		 */
 		if (cdev->config) {
 			list_for_each_entry(f, &cdev->config->functions, list)
-				if (f->req_match && f->req_match(f, ctrl))
+				if (f->req_match &&
+				    f->req_match(f, ctrl, false))
 					goto try_fun_setup;
-			f = NULL;
+		} else {
+			struct usb_configuration *c;
+			list_for_each_entry(c, &cdev->configs, list)
+				list_for_each_entry(f, &c->functions, list)
+					if (f->req_match &&
+					    f->req_match(f, ctrl, true))
+						goto try_fun_setup;
 		}
+		f = NULL;
 
 		switch (ctrl->bRequestType & USB_RECIP_MASK) {
 		case USB_RECIP_INTERFACE:
@@ -1913,6 +1917,8 @@
 			break;
 
 		case USB_RECIP_ENDPOINT:
+			if (!cdev->config)
+				break;
 			endp = ((w_index & 0x80) >> 3) | (w_index & 0x0f);
 			list_for_each_entry(f, &cdev->config->functions, list) {
 				if (test_bit(endp, f->endpoints))
@@ -2124,14 +2130,14 @@
 
 	cdev->os_desc_req = usb_ep_alloc_request(ep0, GFP_KERNEL);
 	if (!cdev->os_desc_req) {
-		ret = PTR_ERR(cdev->os_desc_req);
+		ret = -ENOMEM;
 		goto end;
 	}
 
 	/* OS feature descriptor length <= 4kB */
 	cdev->os_desc_req->buf = kmalloc(4096, GFP_KERNEL);
 	if (!cdev->os_desc_req->buf) {
-		ret = PTR_ERR(cdev->os_desc_req->buf);
+		ret = -ENOMEM;
 		kfree(cdev->os_desc_req);
 		goto end;
 	}
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 70cf347..3984787 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1211,8 +1211,9 @@
 
 			list_move_tail(&f->list, &cfg->func_list);
 			if (f->unbind) {
-				dev_err(&gi->cdev.gadget->dev, "unbind function"
-						" '%s'/%p\n", f->name, f);
+				dev_dbg(&gi->cdev.gadget->dev,
+				         "unbind function '%s'/%p\n",
+				         f->name, f);
 				f->unbind(c, f);
 			}
 		}
@@ -1490,7 +1491,9 @@
 {
 	struct gadget_info *gi = to_gadget_info(item);
 
+	mutex_lock(&gi->lock);
 	unregister_gadget(gi);
+	mutex_unlock(&gi->lock);
 }
 EXPORT_SYMBOL_GPL(unregister_gadget_item);
 
diff --git a/drivers/usb/gadget/function/f_eem.c b/drivers/usb/gadget/function/f_eem.c
index d58bfc3..007ec6e 100644
--- a/drivers/usb/gadget/function/f_eem.c
+++ b/drivers/usb/gadget/function/f_eem.c
@@ -341,11 +341,15 @@
 {
 	struct sk_buff	*skb2 = NULL;
 	struct usb_ep	*in = port->in_ep;
-	int		padlen = 0;
-	u16		len = skb->len;
+	int		headroom, tailroom, padlen = 0;
+	u16		len;
 
-	int headroom = skb_headroom(skb);
-	int tailroom = skb_tailroom(skb);
+	if (!skb)
+		return NULL;
+
+	len = skb->len;
+	headroom = skb_headroom(skb);
+	tailroom = skb_tailroom(skb);
 
 	/* When (len + EEM_HLEN + ETH_FCS_LEN) % in->maxpacket) is 0,
 	 * stick two bytes of zero-length EEM packet on the end.
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 5c8429f..0aeed85 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -98,6 +98,9 @@
 static void ffs_func_disable(struct usb_function *);
 static int ffs_func_setup(struct usb_function *,
 			  const struct usb_ctrlrequest *);
+static bool ffs_func_req_match(struct usb_function *,
+			       const struct usb_ctrlrequest *,
+			       bool config0);
 static void ffs_func_suspend(struct usb_function *);
 static void ffs_func_resume(struct usb_function *);
 
@@ -2243,7 +2246,9 @@
 			      FUNCTIONFS_HAS_SS_DESC |
 			      FUNCTIONFS_HAS_MS_OS_DESC |
 			      FUNCTIONFS_VIRTUAL_ADDR |
-			      FUNCTIONFS_EVENTFD)) {
+			      FUNCTIONFS_EVENTFD |
+			      FUNCTIONFS_ALL_CTRL_RECIP |
+			      FUNCTIONFS_CONFIG0_SETUP)) {
 			ret = -ENOSYS;
 			goto error;
 		}
@@ -3094,8 +3099,9 @@
 	 * handle them.  All other either handled by composite or
 	 * passed to usb_configuration->setup() (if one is set).  No
 	 * matter, we will handle requests directed to endpoint here
-	 * as well (as it's straightforward) but what to do with any
-	 * other request?
+	 * as well (as it's straightforward).  Other request recipient
+	 * types are only handled when the user flag FUNCTIONFS_ALL_CTRL_RECIP
+	 * is being used.
 	 */
 	if (ffs->state != FFS_ACTIVE)
 		return -ENODEV;
@@ -3116,7 +3122,10 @@
 		break;
 
 	default:
-		return -EOPNOTSUPP;
+		if (func->ffs->user_flags & FUNCTIONFS_ALL_CTRL_RECIP)
+			ret = le16_to_cpu(creq->wIndex);
+		else
+			return -EOPNOTSUPP;
 	}
 
 	spin_lock_irqsave(&ffs->ev.waitq.lock, flags);
@@ -3128,6 +3137,28 @@
 	return 0;
 }
 
+static bool ffs_func_req_match(struct usb_function *f,
+			       const struct usb_ctrlrequest *creq,
+			       bool config0)
+{
+	struct ffs_function *func = ffs_func_from_usb(f);
+
+	if (config0 && !(func->ffs->user_flags & FUNCTIONFS_CONFIG0_SETUP))
+		return false;
+
+	switch (creq->bRequestType & USB_RECIP_MASK) {
+	case USB_RECIP_INTERFACE:
+		return ffs_func_revmap_intf(func,
+					    le16_to_cpu(creq->wIndex) >= 0);
+	case USB_RECIP_ENDPOINT:
+		return ffs_func_revmap_ep(func,
+					  le16_to_cpu(creq->wIndex) >= 0);
+	default:
+		return (bool) (func->ffs->user_flags &
+			       FUNCTIONFS_ALL_CTRL_RECIP);
+	}
+}
+
 static void ffs_func_suspend(struct usb_function *f)
 {
 	ENTER();
@@ -3378,6 +3409,7 @@
 	func->function.set_alt = ffs_func_set_alt;
 	func->function.disable = ffs_func_disable;
 	func->function.setup   = ffs_func_setup;
+	func->function.req_match = ffs_func_req_match;
 	func->function.suspend = ffs_func_suspend;
 	func->function.resume  = ffs_func_resume;
 	func->function.free_func = ffs_free;
@@ -3470,6 +3502,11 @@
 	list_del(&dev->entry);
 	if (dev->name_allocated)
 		kfree(dev->name);
+
+	/* Clear the private_data pointer to stop incorrect dev access */
+	if (dev->ffs_data)
+		dev->ffs_data->private_data = NULL;
+
 	kfree(dev);
 	if (list_empty(&ffs_devices))
 		functionfs_cleanup();
diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 51980c5..e2966f8 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -365,7 +365,7 @@
 static inline struct usb_request *hidg_alloc_ep_req(struct usb_ep *ep,
 						    unsigned length)
 {
-	return alloc_ep_req(ep, length, length);
+	return alloc_ep_req(ep, length);
 }
 
 static void hidg_set_report_complete(struct usb_ep *ep, struct usb_request *req)
@@ -617,14 +617,10 @@
 
 	/* preallocate request and buffer */
 	status = -ENOMEM;
-	hidg->req = usb_ep_alloc_request(hidg->in_ep, GFP_KERNEL);
+	hidg->req = alloc_ep_req(hidg->in_ep, hidg->report_length);
 	if (!hidg->req)
 		goto fail;
 
-	hidg->req->buf = kmalloc(hidg->report_length, GFP_KERNEL);
-	if (!hidg->req->buf)
-		goto fail;
-
 	/* set descriptor dynamic values */
 	hidg_interface_desc.bInterfaceSubClass = hidg->bInterfaceSubClass;
 	hidg_interface_desc.bInterfaceProtocol = hidg->bInterfaceProtocol;
@@ -677,11 +673,8 @@
 	usb_free_all_descriptors(f);
 fail:
 	ERROR(f->config->cdev, "hidg_bind FAILED\n");
-	if (hidg->req != NULL) {
-		kfree(hidg->req->buf);
-		if (hidg->in_ep != NULL)
-			usb_ep_free_request(hidg->in_ep, hidg->req);
-	}
+	if (hidg->req != NULL)
+		free_ep_req(hidg->in_ep, hidg->req);
 
 	return status;
 }
@@ -809,11 +802,21 @@
 
 CONFIGFS_ATTR(f_hid_opts_, report_desc);
 
+static ssize_t f_hid_opts_dev_show(struct config_item *item, char *page)
+{
+	struct f_hid_opts *opts = to_f_hid_opts(item);
+
+	return sprintf(page, "%d:%d\n", major, opts->minor);
+}
+
+CONFIGFS_ATTR_RO(f_hid_opts_, dev);
+
 static struct configfs_attribute *hid_attrs[] = {
 	&f_hid_opts_attr_subclass,
 	&f_hid_opts_attr_protocol,
 	&f_hid_opts_attr_report_length,
 	&f_hid_opts_attr_report_desc,
+	&f_hid_opts_attr_dev,
 	NULL,
 };
 
@@ -910,8 +913,7 @@
 
 	/* disable/free request and end point */
 	usb_ep_disable(hidg->in_ep);
-	kfree(hidg->req->buf);
-	usb_ep_free_request(hidg->in_ep, hidg->req);
+	free_ep_req(hidg->in_ep, hidg->req);
 
 	usb_free_all_descriptors(f);
 }
diff --git a/drivers/usb/gadget/function/f_loopback.c b/drivers/usb/gadget/function/f_loopback.c
index 3a9f8f9..e700938 100644
--- a/drivers/usb/gadget/function/f_loopback.c
+++ b/drivers/usb/gadget/function/f_loopback.c
@@ -308,9 +308,7 @@
 
 static inline struct usb_request *lb_alloc_ep_req(struct usb_ep *ep, int len)
 {
-	struct f_loopback	*loop = ep->driver_data;
-
-	return alloc_ep_req(ep, len, loop->buflen);
+	return alloc_ep_req(ep, len);
 }
 
 static int alloc_requests(struct usb_composite_dev *cdev,
@@ -333,7 +331,7 @@
 		if (!in_req)
 			goto fail;
 
-		out_req = lb_alloc_ep_req(loop->out_ep, 0);
+		out_req = lb_alloc_ep_req(loop->out_ep, loop->buflen);
 		if (!out_req)
 			goto fail_in;
 
@@ -593,13 +591,9 @@
 
 int __init lb_modinit(void)
 {
-	int ret;
-
-	ret = usb_function_register(&Loopbackusb_func);
-	if (ret)
-		return ret;
-	return ret;
+	return usb_function_register(&Loopbackusb_func);
 }
+
 void __exit lb_modexit(void)
 {
 	usb_function_unregister(&Loopbackusb_func);
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 2505117..8f3659b 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -311,11 +311,7 @@
 	/* Gadget's private data. */
 	void			*private_data;
 
-	/*
-	 * Vendor (8 chars), product (16 chars), release (4
-	 * hexadecimal digits) and NUL byte
-	 */
-	char inquiry_string[8 + 16 + 4 + 1];
+	char inquiry_string[INQUIRY_STRING_LEN];
 
 	struct kref		ref;
 };
@@ -1107,7 +1103,12 @@
 	buf[5] = 0;		/* No special options */
 	buf[6] = 0;
 	buf[7] = 0;
-	memcpy(buf + 8, common->inquiry_string, sizeof common->inquiry_string);
+	if (curlun->inquiry_string[0])
+		memcpy(buf + 8, curlun->inquiry_string,
+		       sizeof(curlun->inquiry_string));
+	else
+		memcpy(buf + 8, common->inquiry_string,
+		       sizeof(common->inquiry_string));
 	return 36;
 }
 
@@ -3209,12 +3210,27 @@
 
 CONFIGFS_ATTR(fsg_lun_opts_, nofua);
 
+static ssize_t fsg_lun_opts_inquiry_string_show(struct config_item *item,
+						char *page)
+{
+	return fsg_show_inquiry_string(to_fsg_lun_opts(item)->lun, page);
+}
+
+static ssize_t fsg_lun_opts_inquiry_string_store(struct config_item *item,
+						 const char *page, size_t len)
+{
+	return fsg_store_inquiry_string(to_fsg_lun_opts(item)->lun, page, len);
+}
+
+CONFIGFS_ATTR(fsg_lun_opts_, inquiry_string);
+
 static struct configfs_attribute *fsg_lun_attrs[] = {
 	&fsg_lun_opts_attr_file,
 	&fsg_lun_opts_attr_ro,
 	&fsg_lun_opts_attr_removable,
 	&fsg_lun_opts_attr_cdrom,
 	&fsg_lun_opts_attr_nofua,
+	&fsg_lun_opts_attr_inquiry_string,
 	NULL,
 };
 
diff --git a/drivers/usb/gadget/function/f_mass_storage.h b/drivers/usb/gadget/function/f_mass_storage.h
index b6a9918..d390231 100644
--- a/drivers/usb/gadget/function/f_mass_storage.h
+++ b/drivers/usb/gadget/function/f_mass_storage.h
@@ -100,6 +100,7 @@
 	char removable;
 	char cdrom;
 	char nofua;
+	char inquiry_string[INQUIRY_STRING_LEN];
 };
 
 struct fsg_config {
diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
index 58fc199..a5719f2 100644
--- a/drivers/usb/gadget/function/f_midi.c
+++ b/drivers/usb/gadget/function/f_midi.c
@@ -51,6 +51,19 @@
  */
 #define MAX_PORTS 16
 
+/* MIDI message states */
+enum {
+	STATE_INITIAL = 0,	/* pseudo state */
+	STATE_1PARAM,
+	STATE_2PARAM_1,
+	STATE_2PARAM_2,
+	STATE_SYSEX_0,
+	STATE_SYSEX_1,
+	STATE_SYSEX_2,
+	STATE_REAL_TIME,
+	STATE_FINISHED,		/* pseudo state */
+};
+
 /*
  * This is a gadget, and the IN/OUT naming is from the host's perspective.
  * USB -> OUT endpoint -> rawmidi
@@ -61,13 +74,6 @@
 	int active;
 	uint8_t cable;
 	uint8_t state;
-#define STATE_UNKNOWN	0
-#define STATE_1PARAM	1
-#define STATE_2PARAM_1	2
-#define STATE_2PARAM_2	3
-#define STATE_SYSEX_0	4
-#define STATE_SYSEX_1	5
-#define STATE_SYSEX_2	6
 	uint8_t data[2];
 };
 
@@ -205,7 +211,7 @@
 static inline struct usb_request *midi_alloc_ep_req(struct usb_ep *ep,
 						    unsigned length)
 {
-	return alloc_ep_req(ep, length, length);
+	return alloc_ep_req(ep, length);
 }
 
 static const uint8_t f_midi_cin_length[] = {
@@ -299,6 +305,19 @@
 	}
 }
 
+static void f_midi_drop_out_substreams(struct f_midi *midi)
+{
+	unsigned int i;
+
+	for (i = 0; i < midi->in_ports; i++) {
+		struct gmidi_in_port *port = midi->in_ports_array + i;
+		struct snd_rawmidi_substream *substream = port->substream;
+
+		if (port->active && substream)
+			snd_rawmidi_drop_output(substream);
+	}
+}
+
 static int f_midi_start_ep(struct f_midi *midi,
 			   struct usb_function *f,
 			   struct usb_ep *ep)
@@ -360,9 +379,8 @@
 	/* allocate a bunch of read buffers and queue them all at once. */
 	for (i = 0; i < midi->qlen && err == 0; i++) {
 		struct usb_request *req =
-			midi_alloc_ep_req(midi->out_ep,
-				max_t(unsigned, midi->buflen,
-					bulk_out_desc.wMaxPacketSize));
+			midi_alloc_ep_req(midi->out_ep, midi->buflen);
+
 		if (req == NULL)
 			return -ENOMEM;
 
@@ -397,6 +415,8 @@
 	/* release IN requests */
 	while (kfifo_get(&midi->in_req_fifo, &req))
 		free_ep_req(midi->in_ep, req);
+
+	f_midi_drop_out_substreams(midi);
 }
 
 static int f_midi_snd_free(struct snd_device *device)
@@ -404,130 +424,166 @@
 	return 0;
 }
 
-static void f_midi_transmit_packet(struct usb_request *req, uint8_t p0,
-					uint8_t p1, uint8_t p2, uint8_t p3)
-{
-	unsigned length = req->length;
-	u8 *buf = (u8 *)req->buf + length;
-
-	buf[0] = p0;
-	buf[1] = p1;
-	buf[2] = p2;
-	buf[3] = p3;
-	req->length = length + 4;
-}
-
 /*
  * Converts MIDI commands to USB MIDI packets.
  */
 static void f_midi_transmit_byte(struct usb_request *req,
 				 struct gmidi_in_port *port, uint8_t b)
 {
-	uint8_t p0 = port->cable << 4;
+	uint8_t p[4] = { port->cable << 4, 0, 0, 0 };
+	uint8_t next_state = STATE_INITIAL;
 
-	if (b >= 0xf8) {
-		f_midi_transmit_packet(req, p0 | 0x0f, b, 0, 0);
-	} else if (b >= 0xf0) {
+	switch (b) {
+	case 0xf8 ... 0xff:
+		/* System Real-Time Messages */
+		p[0] |= 0x0f;
+		p[1] = b;
+		next_state = port->state;
+		port->state = STATE_REAL_TIME;
+		break;
+
+	case 0xf7:
+		/* End of SysEx */
+		switch (port->state) {
+		case STATE_SYSEX_0:
+			p[0] |= 0x05;
+			p[1] = 0xf7;
+			next_state = STATE_FINISHED;
+			break;
+		case STATE_SYSEX_1:
+			p[0] |= 0x06;
+			p[1] = port->data[0];
+			p[2] = 0xf7;
+			next_state = STATE_FINISHED;
+			break;
+		case STATE_SYSEX_2:
+			p[0] |= 0x07;
+			p[1] = port->data[0];
+			p[2] = port->data[1];
+			p[3] = 0xf7;
+			next_state = STATE_FINISHED;
+			break;
+		default:
+			/* Ignore byte */
+			next_state = port->state;
+			port->state = STATE_INITIAL;
+		}
+		break;
+
+	case 0xf0 ... 0xf6:
+		/* System Common Messages */
+		port->data[0] = port->data[1] = 0;
+		port->state = STATE_INITIAL;
 		switch (b) {
 		case 0xf0:
 			port->data[0] = b;
-			port->state = STATE_SYSEX_1;
+			port->data[1] = 0;
+			next_state = STATE_SYSEX_1;
 			break;
 		case 0xf1:
 		case 0xf3:
 			port->data[0] = b;
-			port->state = STATE_1PARAM;
+			next_state = STATE_1PARAM;
 			break;
 		case 0xf2:
 			port->data[0] = b;
-			port->state = STATE_2PARAM_1;
+			next_state = STATE_2PARAM_1;
 			break;
 		case 0xf4:
 		case 0xf5:
-			port->state = STATE_UNKNOWN;
+			next_state = STATE_INITIAL;
 			break;
 		case 0xf6:
-			f_midi_transmit_packet(req, p0 | 0x05, 0xf6, 0, 0);
-			port->state = STATE_UNKNOWN;
-			break;
-		case 0xf7:
-			switch (port->state) {
-			case STATE_SYSEX_0:
-				f_midi_transmit_packet(req,
-					p0 | 0x05, 0xf7, 0, 0);
-				break;
-			case STATE_SYSEX_1:
-				f_midi_transmit_packet(req,
-					p0 | 0x06, port->data[0], 0xf7, 0);
-				break;
-			case STATE_SYSEX_2:
-				f_midi_transmit_packet(req,
-					p0 | 0x07, port->data[0],
-					port->data[1], 0xf7);
-				break;
-			}
-			port->state = STATE_UNKNOWN;
+			p[0] |= 0x05;
+			p[1] = 0xf6;
+			next_state = STATE_FINISHED;
 			break;
 		}
-	} else if (b >= 0x80) {
+		break;
+
+	case 0x80 ... 0xef:
+		/*
+		 * Channel Voice Messages, Channel Mode Messages
+		 * and Control Change Messages.
+		 */
 		port->data[0] = b;
+		port->data[1] = 0;
+		port->state = STATE_INITIAL;
 		if (b >= 0xc0 && b <= 0xdf)
-			port->state = STATE_1PARAM;
+			next_state = STATE_1PARAM;
 		else
-			port->state = STATE_2PARAM_1;
-	} else { /* b < 0x80 */
+			next_state = STATE_2PARAM_1;
+		break;
+
+	case 0x00 ... 0x7f:
+		/* Message parameters */
 		switch (port->state) {
 		case STATE_1PARAM:
-			if (port->data[0] < 0xf0) {
-				p0 |= port->data[0] >> 4;
-			} else {
-				p0 |= 0x02;
-				port->state = STATE_UNKNOWN;
-			}
-			f_midi_transmit_packet(req, p0, port->data[0], b, 0);
+			if (port->data[0] < 0xf0)
+				p[0] |= port->data[0] >> 4;
+			else
+				p[0] |= 0x02;
+
+			p[1] = port->data[0];
+			p[2] = b;
+			/* This is to allow Running State Messages */
+			next_state = STATE_1PARAM;
 			break;
 		case STATE_2PARAM_1:
 			port->data[1] = b;
-			port->state = STATE_2PARAM_2;
+			next_state = STATE_2PARAM_2;
 			break;
 		case STATE_2PARAM_2:
-			if (port->data[0] < 0xf0) {
-				p0 |= port->data[0] >> 4;
-				port->state = STATE_2PARAM_1;
-			} else {
-				p0 |= 0x03;
-				port->state = STATE_UNKNOWN;
-			}
-			f_midi_transmit_packet(req,
-				p0, port->data[0], port->data[1], b);
+			if (port->data[0] < 0xf0)
+				p[0] |= port->data[0] >> 4;
+			else
+				p[0] |= 0x03;
+
+			p[1] = port->data[0];
+			p[2] = port->data[1];
+			p[3] = b;
+			/* This is to allow Running State Messages */
+			next_state = STATE_2PARAM_1;
 			break;
 		case STATE_SYSEX_0:
 			port->data[0] = b;
-			port->state = STATE_SYSEX_1;
+			next_state = STATE_SYSEX_1;
 			break;
 		case STATE_SYSEX_1:
 			port->data[1] = b;
-			port->state = STATE_SYSEX_2;
+			next_state = STATE_SYSEX_2;
 			break;
 		case STATE_SYSEX_2:
-			f_midi_transmit_packet(req,
-				p0 | 0x04, port->data[0], port->data[1], b);
-			port->state = STATE_SYSEX_0;
+			p[0] |= 0x04;
+			p[1] = port->data[0];
+			p[2] = port->data[1];
+			p[3] = b;
+			next_state = STATE_SYSEX_0;
 			break;
 		}
+		break;
 	}
-}
 
-static void f_midi_drop_out_substreams(struct f_midi *midi)
-{
-	unsigned int i;
+	/* States where we have to write into the USB request */
+	if (next_state == STATE_FINISHED ||
+	    port->state == STATE_SYSEX_2 ||
+	    port->state == STATE_1PARAM ||
+	    port->state == STATE_2PARAM_2 ||
+	    port->state == STATE_REAL_TIME) {
 
-	for (i = 0; i < midi->in_ports; i++) {
-		struct gmidi_in_port *port = midi->in_ports_array + i;
-		struct snd_rawmidi_substream *substream = port->substream;
-		if (port->active && substream)
-			snd_rawmidi_drop_output(substream);
+		unsigned int length = req->length;
+		u8 *buf = (u8 *)req->buf + length;
+
+		memcpy(buf, p, sizeof(p));
+		req->length = length + sizeof(p);
+
+		if (next_state == STATE_FINISHED) {
+			next_state = STATE_INITIAL;
+			port->data[0] = port->data[1] = 0;
+		}
 	}
+
+	port->state = next_state;
 }
 
 static int f_midi_do_transmit(struct f_midi *midi, struct usb_ep *ep)
@@ -642,7 +698,7 @@
 	VDBG(midi, "%s()\n", __func__);
 	port = midi->in_ports_array + substream->number;
 	port->substream = substream;
-	port->state = STATE_UNKNOWN;
+	port->state = STATE_INITIAL;
 	return 0;
 }
 
@@ -1123,7 +1179,7 @@
 	opts->func_inst.free_func_inst = f_midi_free_inst;
 	opts->index = SNDRV_DEFAULT_IDX1;
 	opts->id = SNDRV_DEFAULT_STR1;
-	opts->buflen = 256;
+	opts->buflen = 512;
 	opts->qlen = 32;
 	opts->in_ports = 1;
 	opts->out_ports = 1;
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index 97f0a9b..6396037 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -90,7 +90,9 @@
 /* peak (theoretical) bulk transfer rate in bits-per-second */
 static inline unsigned ncm_bitrate(struct usb_gadget *g)
 {
-	if (gadget_is_dualspeed(g) && g->speed == USB_SPEED_HIGH)
+	if (gadget_is_superspeed(g) && g->speed == USB_SPEED_SUPER)
+		return 13 * 1024 * 8 * 1000 * 8;
+	else if (gadget_is_dualspeed(g) && g->speed == USB_SPEED_HIGH)
 		return 13 * 512 * 8 * 1000 * 8;
 	else
 		return 19 *  64 * 1 * 1000 * 8;
@@ -333,6 +335,76 @@
 	NULL,
 };
 
+
+/* super speed support: */
+
+static struct usb_endpoint_descriptor ss_ncm_notify_desc = {
+	.bLength =		USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_XFER_INT,
+	.wMaxPacketSize =	cpu_to_le16(NCM_STATUS_BYTECOUNT),
+	.bInterval =		USB_MS_TO_HS_INTERVAL(NCM_STATUS_INTERVAL_MS)
+};
+
+static struct usb_ss_ep_comp_descriptor ss_ncm_notify_comp_desc = {
+	.bLength =		sizeof(ss_ncm_notify_comp_desc),
+	.bDescriptorType =	USB_DT_SS_ENDPOINT_COMP,
+
+	/* the following 3 values can be tweaked if necessary */
+	/* .bMaxBurst =		0, */
+	/* .bmAttributes =	0, */
+	.wBytesPerInterval =	cpu_to_le16(NCM_STATUS_BYTECOUNT),
+};
+
+static struct usb_endpoint_descriptor ss_ncm_in_desc = {
+	.bLength =		USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+
+	.bEndpointAddress =	USB_DIR_IN,
+	.bmAttributes =		USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize =	cpu_to_le16(1024),
+};
+
+static struct usb_endpoint_descriptor ss_ncm_out_desc = {
+	.bLength =		USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType =	USB_DT_ENDPOINT,
+
+	.bEndpointAddress =	USB_DIR_OUT,
+	.bmAttributes =		USB_ENDPOINT_XFER_BULK,
+	.wMaxPacketSize =	cpu_to_le16(1024),
+};
+
+static struct usb_ss_ep_comp_descriptor ss_ncm_bulk_comp_desc = {
+	.bLength =		sizeof(ss_ncm_bulk_comp_desc),
+	.bDescriptorType =	USB_DT_SS_ENDPOINT_COMP,
+
+	/* the following 2 values can be tweaked if necessary */
+	/* .bMaxBurst =		0, */
+	/* .bmAttributes =	0, */
+};
+
+static struct usb_descriptor_header *ncm_ss_function[] = {
+	(struct usb_descriptor_header *) &ncm_iad_desc,
+	/* CDC NCM control descriptors */
+	(struct usb_descriptor_header *) &ncm_control_intf,
+	(struct usb_descriptor_header *) &ncm_header_desc,
+	(struct usb_descriptor_header *) &ncm_union_desc,
+	(struct usb_descriptor_header *) &ecm_desc,
+	(struct usb_descriptor_header *) &ncm_desc,
+	(struct usb_descriptor_header *) &ss_ncm_notify_desc,
+	(struct usb_descriptor_header *) &ss_ncm_notify_comp_desc,
+	/* data interface, altsettings 0 and 1 */
+	(struct usb_descriptor_header *) &ncm_data_nop_intf,
+	(struct usb_descriptor_header *) &ncm_data_intf,
+	(struct usb_descriptor_header *) &ss_ncm_in_desc,
+	(struct usb_descriptor_header *) &ss_ncm_bulk_comp_desc,
+	(struct usb_descriptor_header *) &ss_ncm_out_desc,
+	(struct usb_descriptor_header *) &ss_ncm_bulk_comp_desc,
+	NULL,
+};
+
 /* string descriptors: */
 
 #define STRING_CTRL_IDX	0
@@ -852,6 +924,8 @@
 			 */
 			ncm->port.is_zlp_ok =
 				gadget_is_zlp_supported(cdev->gadget);
+			ncm->port.no_skb_reserve =
+				gadget_avoids_skb_reserve(cdev->gadget);
 			ncm->port.cdc_filter = DEFAULT_FILTER;
 			DBG(cdev, "activate ncm\n");
 			net = gether_connect(&ncm->port);
@@ -1431,8 +1505,13 @@
 	hs_ncm_notify_desc.bEndpointAddress =
 		fs_ncm_notify_desc.bEndpointAddress;
 
+	ss_ncm_in_desc.bEndpointAddress = fs_ncm_in_desc.bEndpointAddress;
+	ss_ncm_out_desc.bEndpointAddress = fs_ncm_out_desc.bEndpointAddress;
+	ss_ncm_notify_desc.bEndpointAddress =
+		fs_ncm_notify_desc.bEndpointAddress;
+
 	status = usb_assign_descriptors(f, ncm_fs_function, ncm_hs_function,
-			NULL, NULL);
+			ncm_ss_function, NULL);
 	if (status)
 		goto fail;
 
@@ -1450,6 +1529,7 @@
 	ncm->task_timer.function = ncm_tx_timeout;
 
 	DBG(cdev, "CDC Network: %s speed IN/%s OUT/%s NOTIFY/%s\n",
+			gadget_is_superspeed(c->cdev->gadget) ? "super" :
 			gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full",
 			ncm->port.in_ep->name, ncm->port.out_ep->name,
 			ncm->notify->name);
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index 64706a7..0de36cd 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -889,13 +889,17 @@
 /*-------------------------------------------------------------------------*/
 
 static bool gprinter_req_match(struct usb_function *f,
-			       const struct usb_ctrlrequest *ctrl)
+			       const struct usb_ctrlrequest *ctrl,
+			       bool config0)
 {
 	struct printer_dev	*dev = func_to_printer(f);
 	u16			w_index = le16_to_cpu(ctrl->wIndex);
 	u16			w_value = le16_to_cpu(ctrl->wValue);
 	u16			w_length = le16_to_cpu(ctrl->wLength);
 
+	if (config0)
+		return false;
+
 	if ((ctrl->bRequestType & USB_RECIP_MASK) != USB_RECIP_INTERFACE ||
 	    (ctrl->bRequestType & USB_TYPE_MASK) != USB_TYPE_CLASS)
 		return false;
diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c
index c800582..16562e4 100644
--- a/drivers/usb/gadget/function/f_rndis.c
+++ b/drivers/usb/gadget/function/f_rndis.c
@@ -374,6 +374,9 @@
 {
 	struct sk_buff *skb2;
 
+	if (!skb)
+		return NULL;
+
 	skb2 = skb_realloc_headroom(skb, sizeof(struct rndis_packet_msg_type));
 	rndis_add_hdr(skb2);
 
diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c
index df0189d..8784fa1 100644
--- a/drivers/usb/gadget/function/f_sourcesink.c
+++ b/drivers/usb/gadget/function/f_sourcesink.c
@@ -293,9 +293,7 @@
 
 static inline struct usb_request *ss_alloc_ep_req(struct usb_ep *ep, int len)
 {
-	struct f_sourcesink		*ss = ep->driver_data;
-
-	return alloc_ep_req(ep, len, ss->buflen);
+	return alloc_ep_req(ep, len);
 }
 
 static void disable_ep(struct usb_composite_dev *cdev, struct usb_ep *ep)
@@ -606,7 +604,7 @@
 	} else {
 		ep = is_in ? ss->in_ep : ss->out_ep;
 		qlen = ss->bulk_qlen;
-		size = 0;
+		size = ss->buflen;
 	}
 
 	for (i = 0; i < qlen; i++) {
diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index 29b41b5..27ed51b 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -258,6 +258,13 @@
 	memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req));
 	v4l2_event_queue(&uvc->vdev, &v4l2_event);
 
+	/* Pass additional setup data to userspace */
+	if (uvc->event_setup_out && uvc->event_length) {
+		uvc->control_req->length = uvc->event_length;
+		return usb_ep_queue(uvc->func.config->cdev->gadget->ep0,
+			uvc->control_req, GFP_ATOMIC);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c
index 943c21a..ab6ac1b 100644
--- a/drivers/usb/gadget/function/rndis.c
+++ b/drivers/usb/gadget/function/rndis.c
@@ -680,6 +680,12 @@
 {
 	rndis_reset_cmplt_type *resp;
 	rndis_resp_t *r;
+	u8 *xbuf;
+	u32 length;
+
+	/* drain the response queue */
+	while ((xbuf = rndis_get_next_response(params, &length)))
+		rndis_free_response(params, xbuf);
 
 	r = rndis_add_response(params, sizeof(rndis_reset_cmplt_type));
 	if (!r)
diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c
index 990df22..8fbf686 100644
--- a/drivers/usb/gadget/function/storage_common.c
+++ b/drivers/usb/gadget/function/storage_common.c
@@ -369,6 +369,12 @@
 }
 EXPORT_SYMBOL_GPL(fsg_show_removable);
 
+ssize_t fsg_show_inquiry_string(struct fsg_lun *curlun, char *buf)
+{
+	return sprintf(buf, "%s\n", curlun->inquiry_string);
+}
+EXPORT_SYMBOL_GPL(fsg_show_inquiry_string);
+
 /*
  * The caller must hold fsg->filesem for reading when calling this function.
  */
@@ -499,4 +505,22 @@
 }
 EXPORT_SYMBOL_GPL(fsg_store_removable);
 
+ssize_t fsg_store_inquiry_string(struct fsg_lun *curlun, const char *buf,
+				 size_t count)
+{
+	const size_t len = min(count, sizeof(curlun->inquiry_string));
+
+	if (len == 0 || buf[0] == '\n') {
+		curlun->inquiry_string[0] = 0;
+	} else {
+		snprintf(curlun->inquiry_string,
+			 sizeof(curlun->inquiry_string), "%-28s", buf);
+		if (curlun->inquiry_string[len-1] == '\n')
+			curlun->inquiry_string[len-1] = ' ';
+	}
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(fsg_store_inquiry_string);
+
 MODULE_LICENSE("GPL");
diff --git a/drivers/usb/gadget/function/storage_common.h b/drivers/usb/gadget/function/storage_common.h
index c3544e6..e698489 100644
--- a/drivers/usb/gadget/function/storage_common.h
+++ b/drivers/usb/gadget/function/storage_common.h
@@ -88,6 +88,12 @@
 #define ASC(x)		((u8) ((x) >> 8))
 #define ASCQ(x)		((u8) (x))
 
+/*
+ * Vendor (8 chars), product (16 chars), release (4 hexadecimal digits) and NUL
+ * byte
+ */
+#define INQUIRY_STRING_LEN ((size_t) (8 + 16 + 4 + 1))
+
 struct fsg_lun {
 	struct file	*filp;
 	loff_t		file_length;
@@ -112,6 +118,7 @@
 	struct device	dev;
 	const char	*name;		/* "lun.name" */
 	const char	**name_pfx;	/* "function.name" */
+	char		inquiry_string[INQUIRY_STRING_LEN];
 };
 
 static inline bool fsg_lun_is_open(struct fsg_lun *curlun)
@@ -210,6 +217,7 @@
 ssize_t fsg_show_nofua(struct fsg_lun *curlun, char *buf);
 ssize_t fsg_show_file(struct fsg_lun *curlun, struct rw_semaphore *filesem,
 		      char *buf);
+ssize_t fsg_show_inquiry_string(struct fsg_lun *curlun, char *buf);
 ssize_t fsg_show_cdrom(struct fsg_lun *curlun, char *buf);
 ssize_t fsg_show_removable(struct fsg_lun *curlun, char *buf);
 ssize_t fsg_store_ro(struct fsg_lun *curlun, struct rw_semaphore *filesem,
@@ -221,5 +229,7 @@
 			const char *buf, size_t count);
 ssize_t fsg_store_removable(struct fsg_lun *curlun, const char *buf,
 			    size_t count);
+ssize_t fsg_store_inquiry_string(struct fsg_lun *curlun, const char *buf,
+				 size_t count);
 
 #endif /* USB_STORAGE_COMMON_H */
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index a3f7e7c..9c8c9ed 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -82,6 +82,7 @@
 #define	WORK_RX_MEMORY		0
 
 	bool			zlp;
+	bool			no_skb_reserve;
 	u8			host_mac[ETH_ALEN];
 	u8			dev_mac[ETH_ALEN];
 };
@@ -233,7 +234,8 @@
 	 * but on at least one, checksumming fails otherwise.  Note:
 	 * RNDIS headers involve variable numbers of LE32 values.
 	 */
-	skb_reserve(skb, NET_IP_ALIGN);
+	if (likely(!dev->no_skb_reserve))
+		skb_reserve(skb, NET_IP_ALIGN);
 
 	req->buf = skb->data;
 	req->length = size;
@@ -556,7 +558,8 @@
 			/* Multi frame CDC protocols may store the frame for
 			 * later which is not a dropped frame.
 			 */
-			if (dev->port_usb->supports_multi_frame)
+			if (dev->port_usb &&
+					dev->port_usb->supports_multi_frame)
 				goto multiframe;
 			goto drop;
 		}
@@ -568,7 +571,8 @@
 	req->complete = tx_complete;
 
 	/* NCM requires no zlp if transfer is dwNtbInMaxSize */
-	if (dev->port_usb->is_fixed &&
+	if (dev->port_usb &&
+	    dev->port_usb->is_fixed &&
 	    length == dev->port_usb->fixed_in_len &&
 	    (length % in->maxpacket) == 0)
 		req->zero = 0;
@@ -1062,6 +1066,7 @@
 
 	if (result == 0) {
 		dev->zlp = link->is_zlp_ok;
+		dev->no_skb_reserve = link->no_skb_reserve;
 		DBG(dev, "qlen %d\n", qlen(dev->gadget, dev->qmult));
 
 		dev->header_len = link->header_len;
diff --git a/drivers/usb/gadget/function/u_ether.h b/drivers/usb/gadget/function/u_ether.h
index c77145b..81d94a7a 100644
--- a/drivers/usb/gadget/function/u_ether.h
+++ b/drivers/usb/gadget/function/u_ether.h
@@ -64,6 +64,7 @@
 	struct usb_ep			*out_ep;
 
 	bool				is_zlp_ok;
+	bool				no_skb_reserve;
 
 	u16				cdc_filter;
 
diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index 6ded634..e0cd1e4 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -375,10 +375,15 @@
 */
 {
 	struct list_head	*pool = &port->write_pool;
-	struct usb_ep		*in = port->port_usb->in;
+	struct usb_ep		*in;
 	int			status = 0;
 	bool			do_tty_wake = false;
 
+	if (!port->port_usb)
+		return status;
+
+	in = port->port_usb->in;
+
 	while (!port->write_busy && !list_empty(pool)) {
 		struct usb_request	*req;
 		int			len;
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index 66753ba7..31125a4 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -2023,7 +2023,7 @@
 	if (!data) {
 		kfree(*class_array);
 		*class_array = NULL;
-		ret = PTR_ERR(data);
+		ret = -ENOMEM;
 		goto unlock;
 	}
 	cl_arr = *class_array;
diff --git a/drivers/usb/gadget/legacy/gmidi.c b/drivers/usb/gadget/legacy/gmidi.c
index fc2ac15..0bf39c3 100644
--- a/drivers/usb/gadget/legacy/gmidi.c
+++ b/drivers/usb/gadget/legacy/gmidi.c
@@ -47,7 +47,7 @@
 module_param(id, charp, S_IRUGO);
 MODULE_PARM_DESC(id, "ID string for the USB MIDI Gadget adapter.");
 
-static unsigned int buflen = 256;
+static unsigned int buflen = 512;
 module_param(buflen, uint, S_IRUGO);
 MODULE_PARM_DESC(buflen, "MIDI buffer length");
 
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index aa3707b..16104b5e 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -542,7 +542,7 @@
 	 */
 	spin_lock_irq(&epdata->dev->lock);
 	value = -ENODEV;
-	if (unlikely(epdata->ep))
+	if (unlikely(epdata->ep == NULL))
 		goto fail;
 
 	req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
@@ -606,7 +606,7 @@
 	}
 	if (is_sync_kiocb(iocb)) {
 		value = ep_io(epdata, buf, len);
-		if (value >= 0 && copy_to_iter(buf, value, to))
+		if (value >= 0 && (copy_to_iter(buf, value, to) != value))
 			value = -EFAULT;
 	} else {
 		struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL);
diff --git a/drivers/usb/gadget/u_f.c b/drivers/usb/gadget/u_f.c
index 4bc7eea..1883973 100644
--- a/drivers/usb/gadget/u_f.c
+++ b/drivers/usb/gadget/u_f.c
@@ -12,14 +12,16 @@
  */
 
 #include "u_f.h"
+#include <linux/usb/ch9.h>
 
-struct usb_request *alloc_ep_req(struct usb_ep *ep, int len, int default_len)
+struct usb_request *alloc_ep_req(struct usb_ep *ep, size_t len)
 {
 	struct usb_request      *req;
 
 	req = usb_ep_alloc_request(ep, GFP_ATOMIC);
 	if (req) {
-		req->length = len ?: default_len;
+		req->length = usb_endpoint_dir_out(ep->desc) ?
+			usb_ep_align(ep, len) : len;
 		req->buf = kmalloc(req->length, GFP_ATOMIC);
 		if (!req->buf) {
 			usb_ep_free_request(ep, req);
diff --git a/drivers/usb/gadget/u_f.h b/drivers/usb/gadget/u_f.h
index 4247cc0..7d53a47 100644
--- a/drivers/usb/gadget/u_f.h
+++ b/drivers/usb/gadget/u_f.h
@@ -47,8 +47,21 @@
 struct usb_ep;
 struct usb_request;
 
-/* Requests allocated via alloc_ep_req() must be freed by free_ep_req(). */
-struct usb_request *alloc_ep_req(struct usb_ep *ep, int len, int default_len);
+/**
+ * alloc_ep_req - returns a usb_request allocated by the gadget driver and
+ * allocates the request's buffer.
+ *
+ * @ep: the endpoint to allocate a usb_request
+ * @len: usb_requests's buffer suggested size
+ *
+ * In case @ep direction is OUT, the @len will be aligned to ep's
+ * wMaxPacketSize. In order to avoid memory leaks or drops, *always* use
+ * usb_requests's length (req->length) to refer to the allocated buffer size.
+ * Requests allocated via alloc_ep_req() *must* be freed by free_ep_req().
+ */
+struct usb_request *alloc_ep_req(struct usb_ep *ep, size_t len);
+
+/* Frees a usb_request previously allocated by alloc_ep_req() */
 static inline void free_ep_req(struct usb_ep *ep, struct usb_request *req)
 {
 	kfree(req->buf);
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index ff8685e..9483489 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -107,10 +107,8 @@
 		goto out;
 
 	ret = ep->ops->enable(ep, ep->desc);
-	if (ret) {
-		ret = ret;
+	if (ret)
 		goto out;
-	}
 
 	ep->enabled = true;
 
@@ -827,7 +825,7 @@
 		return;
 
 	if (req->num_mapped_sgs) {
-		dma_unmap_sg(dev, req->sg, req->num_mapped_sgs,
+		dma_unmap_sg(dev, req->sg, req->num_sgs,
 				is_in ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
 		req->num_mapped_sgs = 0;
@@ -1145,7 +1143,7 @@
 			if (ret != -EPROBE_DEFER)
 				list_del(&driver->pending);
 			if (ret)
-				goto err4;
+				goto err5;
 			break;
 		}
 	}
@@ -1154,6 +1152,9 @@
 
 	return 0;
 
+err5:
+	device_del(&udc->dev);
+
 err4:
 	list_del(&udc->list);
 	mutex_unlock(&udc_lock);
diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c
index 93d28cb..4fff51b 100644
--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
+++ b/drivers/usb/gadget/udc/fsl_qe_udc.c
@@ -421,10 +421,8 @@
 	bd = ep->rxbase;
 
 	ep->rxframe = kmalloc(sizeof(*ep->rxframe), GFP_ATOMIC);
-	if (ep->rxframe == NULL) {
-		dev_err(ep->udc->dev, "malloc rxframe failed\n");
+	if (!ep->rxframe)
 		return -ENOMEM;
-	}
 
 	qe_frame_init(ep->rxframe);
 
@@ -435,9 +433,7 @@
 
 	size = (ep->ep.maxpacket + USB_CRC_SIZE + 2) * (bdring_len + 1);
 	ep->rxbuffer = kzalloc(size, GFP_ATOMIC);
-	if (ep->rxbuffer == NULL) {
-		dev_err(ep->udc->dev, "malloc rxbuffer failed,size=%d\n",
-				size);
+	if (!ep->rxbuffer) {
 		kfree(ep->rxframe);
 		return -ENOMEM;
 	}
@@ -668,10 +664,8 @@
 
 	if ((ep->tm == USBP_TM_CTL) || (ep->dir == USB_DIR_IN)) {
 		ep->txframe = kmalloc(sizeof(*ep->txframe), GFP_ATOMIC);
-		if (ep->txframe == NULL) {
-			dev_err(udc->dev, "malloc txframe failed\n");
+		if (!ep->txframe)
 			goto en_done2;
-		}
 		qe_frame_init(ep->txframe);
 	}
 
@@ -1878,11 +1872,8 @@
 
 	tmp = in_be16(&udc->usb_param->frame_n);
 	if (tmp & 0x8000)
-		tmp = tmp & 0x07ff;
-	else
-		tmp = -EINVAL;
-
-	return (int)tmp;
+		return tmp & 0x07ff;
+	return -EINVAL;
 }
 
 static int fsl_qe_start(struct usb_gadget *gadget,
@@ -2053,7 +2044,7 @@
 			struct qe_ep *ep;
 
 			if (wValue != 0 || wLength != 0
-				|| pipe > USB_MAX_ENDPOINTS)
+				|| pipe >= USB_MAX_ENDPOINTS)
 				break;
 			ep = &udc->eps[pipe];
 
@@ -2347,10 +2338,8 @@
 	u32 offset;
 
 	udc = kzalloc(sizeof(*udc), GFP_KERNEL);
-	if (udc == NULL) {
-		dev_err(&ofdev->dev, "malloc udc failed\n");
+	if (!udc)
 		goto cleanup;
-	}
 
 	udc->dev = &ofdev->dev;
 
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index d2205d9..5107987 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -1767,8 +1767,7 @@
 
 	/* alloc, and start init */
 	dev = kzalloc (sizeof *dev, GFP_KERNEL);
-	if (dev == NULL){
-		pr_debug("enomem %s\n", pci_name(pdev));
+	if (!dev) {
 		retval = -ENOMEM;
 		goto err;
 	}
@@ -1839,6 +1838,8 @@
 err:
 	if (dev)
 		goku_remove (pdev);
+	/* gadget_release is not registered yet, kfree explicitly */
+	kfree(dev);
 	return retval;
 }
 
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index 614ab951..61c938c 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -589,7 +589,7 @@
 
 	ep = container_of(_ep, struct net2280_ep, ep);
 	if (!_ep || !_req) {
-		dev_err(&ep->dev->pdev->dev, "%s: Inavlid ep=%p or req=%p\n",
+		dev_err(&ep->dev->pdev->dev, "%s: Invalid ep=%p or req=%p\n",
 							__func__, _ep, _req);
 		return;
 	}
@@ -1137,8 +1137,10 @@
 	done(ep, req, status);
 }
 
-static void scan_dma_completions(struct net2280_ep *ep)
+static int scan_dma_completions(struct net2280_ep *ep)
 {
+	int num_completed = 0;
+
 	/* only look at descriptors that were "naturally" retired,
 	 * so fifo and list head state won't matter
 	 */
@@ -1166,6 +1168,7 @@
 				break;
 			/* single transfer mode */
 			dma_done(ep, req, tmp, 0);
+			num_completed++;
 			break;
 		} else if (!ep->is_in &&
 			   (req->req.length % ep->ep.maxpacket) &&
@@ -1194,7 +1197,10 @@
 			}
 		}
 		dma_done(ep, req, tmp, 0);
+		num_completed++;
 	}
+
+	return num_completed;
 }
 
 static void restart_dma(struct net2280_ep *ep)
@@ -1567,6 +1573,44 @@
 			return ep;
 	}
 
+	/* USB3380: Only first four endpoints have DMA channels. Allocate
+	 * slower interrupt endpoints from PIO hw endpoints, to allow bulk/isoc
+	 * endpoints use DMA hw endpoints.
+	 */
+	if (usb_endpoint_type(desc) == USB_ENDPOINT_XFER_INT &&
+	    usb_endpoint_dir_in(desc)) {
+		ep = gadget_find_ep_by_name(_gadget, "ep2in");
+		if (ep && usb_gadget_ep_match_desc(_gadget, ep, desc, ep_comp))
+			return ep;
+		ep = gadget_find_ep_by_name(_gadget, "ep4in");
+		if (ep && usb_gadget_ep_match_desc(_gadget, ep, desc, ep_comp))
+			return ep;
+	} else if (usb_endpoint_type(desc) == USB_ENDPOINT_XFER_INT &&
+		   !usb_endpoint_dir_in(desc)) {
+		ep = gadget_find_ep_by_name(_gadget, "ep1out");
+		if (ep && usb_gadget_ep_match_desc(_gadget, ep, desc, ep_comp))
+			return ep;
+		ep = gadget_find_ep_by_name(_gadget, "ep3out");
+		if (ep && usb_gadget_ep_match_desc(_gadget, ep, desc, ep_comp))
+			return ep;
+	} else if (usb_endpoint_type(desc) != USB_ENDPOINT_XFER_BULK &&
+		   usb_endpoint_dir_in(desc)) {
+		ep = gadget_find_ep_by_name(_gadget, "ep1in");
+		if (ep && usb_gadget_ep_match_desc(_gadget, ep, desc, ep_comp))
+			return ep;
+		ep = gadget_find_ep_by_name(_gadget, "ep3in");
+		if (ep && usb_gadget_ep_match_desc(_gadget, ep, desc, ep_comp))
+			return ep;
+	} else if (usb_endpoint_type(desc) != USB_ENDPOINT_XFER_BULK &&
+		   !usb_endpoint_dir_in(desc)) {
+		ep = gadget_find_ep_by_name(_gadget, "ep2out");
+		if (ep && usb_gadget_ep_match_desc(_gadget, ep, desc, ep_comp))
+			return ep;
+		ep = gadget_find_ep_by_name(_gadget, "ep4out");
+		if (ep && usb_gadget_ep_match_desc(_gadget, ep, desc, ep_comp))
+			return ep;
+	}
+
 	/* USB3380: use same address for usb and hardware endpoints */
 	snprintf(name, sizeof(name), "ep%d%s", usb_endpoint_num(desc),
 			usb_endpoint_dir_in(desc) ? "in" : "out");
@@ -2547,8 +2591,11 @@
 	/* manual DMA queue advance after short OUT */
 	if (likely(ep->dma)) {
 		if (t & BIT(SHORT_PACKET_TRANSFERRED_INTERRUPT)) {
-			u32	count;
+			struct net2280_request *stuck_req = NULL;
 			int	stopped = ep->stopped;
+			int	num_completed;
+			int	stuck = 0;
+			u32	count;
 
 			/* TRANSFERRED works around OUT_DONE erratum 0112.
 			 * we expect (N <= maxpacket) bytes; host wrote M.
@@ -2560,7 +2607,7 @@
 				/* any preceding dma transfers must finish.
 				 * dma handles (M >= N), may empty the queue
 				 */
-				scan_dma_completions(ep);
+				num_completed = scan_dma_completions(ep);
 				if (unlikely(list_empty(&ep->queue) ||
 						ep->out_overflow)) {
 					req = NULL;
@@ -2580,6 +2627,31 @@
 						req = NULL;
 					break;
 				}
+
+				/* Escape loop if no dma transfers completed
+				 * after few retries.
+				 */
+				if (num_completed == 0) {
+					if (stuck_req == req &&
+					    readl(&ep->dma->dmadesc) !=
+						  req->td_dma && stuck++ > 5) {
+						count = readl(
+							&ep->dma->dmacount);
+						count &= DMA_BYTE_COUNT_MASK;
+						req = NULL;
+						ep_dbg(ep->dev, "%s escape stuck %d, count %u\n",
+							ep->ep.name, stuck,
+							count);
+						break;
+					} else if (stuck_req != req) {
+						stuck_req = req;
+						stuck = 0;
+					}
+				} else {
+					stuck_req = NULL;
+					stuck = 0;
+				}
+
 				udelay(1);
 			}
 
diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index 9b7d394..a8709f9 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -2875,7 +2875,7 @@
 	xceiv = NULL;
 	/* "udc" is now valid */
 	pullup_disable(udc);
-#if	defined(CONFIG_USB_OHCI_HCD) || defined(CONFIG_USB_OHCI_HCD_MODULE)
+#if	IS_ENABLED(CONFIG_USB_OHCI_HCD)
 	udc->gadget.is_otg = (config->otg != 0);
 #endif
 
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index ad140aa..7fa60f5 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -33,6 +33,7 @@
 #include <linux/usb.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/usb/phy.h>
 
 #include "pxa27x_udc.h"
 
@@ -1655,6 +1656,37 @@
 	return -EOPNOTSUPP;
 }
 
+/**
+ * pxa_udc_phy_event - Called by phy upon VBus event
+ * @nb: notifier block
+ * @action: phy action, is vbus connect or disconnect
+ * @data: the usb_gadget structure in pxa_udc
+ *
+ * Called by the USB Phy when a cable connect or disconnect is sensed.
+ *
+ * Returns 0
+ */
+static int pxa_udc_phy_event(struct notifier_block *nb, unsigned long action,
+			     void *data)
+{
+	struct usb_gadget *gadget = data;
+
+	switch (action) {
+	case USB_EVENT_VBUS:
+		usb_gadget_vbus_connect(gadget);
+		return NOTIFY_OK;
+	case USB_EVENT_NONE:
+		usb_gadget_vbus_disconnect(gadget);
+		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static struct notifier_block pxa27x_udc_phy = {
+	.notifier_call = pxa_udc_phy_event,
+};
+
 static int pxa27x_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
 static int pxa27x_udc_stop(struct usb_gadget *g);
@@ -2432,7 +2464,14 @@
 		return udc->irq;
 
 	udc->dev = &pdev->dev;
-	udc->transceiver = usb_get_phy(USB_PHY_TYPE_USB2);
+	if (of_have_populated_dt()) {
+		udc->transceiver =
+			devm_usb_get_phy_by_phandle(udc->dev, "phys", 0);
+		if (IS_ERR(udc->transceiver))
+			return PTR_ERR(udc->transceiver);
+	} else {
+		udc->transceiver = usb_get_phy(USB_PHY_TYPE_USB2);
+	}
 
 	if (IS_ERR(udc->gpiod)) {
 		dev_err(&pdev->dev, "Couldn't find or request D+ gpio : %ld\n",
@@ -2465,14 +2504,20 @@
 		goto err;
 	}
 
+	if (!IS_ERR_OR_NULL(udc->transceiver))
+		usb_register_notifier(udc->transceiver, &pxa27x_udc_phy);
 	retval = usb_add_gadget_udc(&pdev->dev, &udc->gadget);
 	if (retval)
-		goto err;
+		goto err_add_gadget;
 
 	pxa_init_debugfs(udc);
 	if (should_enable_udc(udc))
 		udc_enable(udc);
 	return 0;
+
+err_add_gadget:
+	if (!IS_ERR_OR_NULL(udc->transceiver))
+		usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy);
 err:
 	clk_unprepare(udc->clk);
 	return retval;
@@ -2489,6 +2534,8 @@
 	usb_del_gadget_udc(&udc->gadget);
 	pxa_cleanup_debugfs(udc);
 
+	if (!IS_ERR_OR_NULL(udc->transceiver))
+		usb_unregister_notifier(udc->transceiver, &pxa27x_udc_phy);
 	usb_put_phy(udc->transceiver);
 
 	udc->transceiver = NULL;
diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 93a3bec..fb8fc34 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -106,6 +106,7 @@
 
 /* DRD_CON */
 #define DRD_CON_PERI_CON	BIT(24)
+#define DRD_CON_VBOUT		BIT(0)
 
 /* USB_INT_ENA_1 and USB_INT_STA_1 */
 #define USB_INT_1_B3_PLLWKUP	BIT(31)
@@ -363,6 +364,7 @@
 {
 	/* FIXME: How to change host / peripheral mode as well? */
 	usb3_set_bit(usb3, DRD_CON_PERI_CON, USB3_DRD_CON);
+	usb3_clear_bit(usb3, DRD_CON_VBOUT, USB3_DRD_CON);
 
 	usb3_write(usb3, ~0, USB3_USB_INT_STA_1);
 	usb3_enable_irq_1(usb3, USB_INT_1_VBUS_CNG);
diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
index f8bf290..588e253 100644
--- a/drivers/usb/gadget/udc/udc-xilinx.c
+++ b/drivers/usb/gadget/udc/udc-xilinx.c
@@ -973,10 +973,8 @@
 
 	udc = ep->udc;
 	req = kzalloc(sizeof(*req), gfp_flags);
-	if (!req) {
-		dev_err(udc->dev, "%s:not enough memory", __func__);
+	if (!req)
 		return NULL;
-	}
 
 	req->ep = ep;
 	INIT_LIST_HEAD(&req->queue);
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 2e710a4..0b80cee 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -472,7 +472,7 @@
 
 config USB_OHCI_HCD_OMAP3
 	tristate "OHCI support for OMAP3 and later chips"
-	depends on (ARCH_OMAP3 || ARCH_OMAP4)
+	depends on (ARCH_OMAP3 || ARCH_OMAP4 || SOC_OMAP5)
 	default y
 	---help---
 	  Enables support for the on-chip OHCI controller on
diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c
index 172ef17..5f425c8 100644
--- a/drivers/usb/host/bcma-hcd.c
+++ b/drivers/usb/host/bcma-hcd.c
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
+#include <linux/of_platform.h>
 #include <linux/usb/ehci_pdriver.h>
 #include <linux/usb/ohci_pdriver.h>
 
@@ -34,6 +35,9 @@
 MODULE_DESCRIPTION("Common USB driver for BCMA Bus");
 MODULE_LICENSE("GPL");
 
+/* See BCMA_CLKCTLST_EXTRESREQ and BCMA_CLKCTLST_EXTRESST */
+#define USB_BCMA_CLKCTLST_USB_CLK_REQ			0x00000100
+
 struct bcma_hcd_device {
 	struct bcma_device *core;
 	struct platform_device *ehci_dev;
@@ -165,44 +169,80 @@
 	}
 }
 
-static void bcma_hcd_init_chip_arm_phy(struct bcma_device *dev)
+/**
+ * bcma_hcd_usb20_old_arm_init - Initialize old USB 2.0 controller on ARM
+ *
+ * Old USB 2.0 core is identified as BCMA_CORE_USB20_HOST and was introduced
+ * long before Northstar devices. It seems some cheaper chipsets like BCM53573
+ * still use it.
+ * Initialization of this old core differs between MIPS and ARM.
+ */
+static int bcma_hcd_usb20_old_arm_init(struct bcma_hcd_device *usb_dev)
 {
-	struct bcma_device *arm_core;
-	void __iomem *dmu;
+	struct bcma_device *core = usb_dev->core;
+	struct device *dev = &core->dev;
+	struct bcma_device *pmu_core;
 
-	arm_core = bcma_find_core(dev->bus, BCMA_CORE_ARMCA9);
-	if (!arm_core) {
-		dev_err(&dev->dev, "can not find ARM Cortex A9 ihost core\n");
-		return;
+	usleep_range(10000, 20000);
+	if (core->id.rev < 5)
+		return 0;
+
+	pmu_core = bcma_find_core(core->bus, BCMA_CORE_PMU);
+	if (!pmu_core) {
+		dev_err(dev, "Could not find PMU core\n");
+		return -ENOENT;
 	}
 
-	dmu = ioremap_nocache(arm_core->addr_s[0], 0x1000);
-	if (!dmu) {
-		dev_err(&dev->dev, "can not map ARM Cortex A9 ihost core\n");
-		return;
-	}
+	/* Take USB core out of reset */
+	bcma_awrite32(core, BCMA_IOCTL, BCMA_IOCTL_CLK | BCMA_IOCTL_FGC);
+	usleep_range(100, 200);
+	bcma_awrite32(core, BCMA_RESET_CTL, BCMA_RESET_CTL_RESET);
+	usleep_range(100, 200);
+	bcma_awrite32(core, BCMA_RESET_CTL, 0);
+	usleep_range(100, 200);
+	bcma_awrite32(core, BCMA_IOCTL, BCMA_IOCTL_CLK);
+	usleep_range(100, 200);
 
-	/* Unlock DMU PLL settings */
-	iowrite32(0x0000ea68, dmu + 0x180);
+	/* Enable Misc PLL */
+	bcma_write32(core, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT |
+					  BCMA_CLKCTLST_HQCLKREQ |
+					  USB_BCMA_CLKCTLST_USB_CLK_REQ);
+	usleep_range(100, 200);
 
-	/* Write USB 2.0 PLL control setting */
-	iowrite32(0x00dd10c3, dmu + 0x164);
+	bcma_write32(core, 0x510, 0xc7f85000);
+	bcma_write32(core, 0x510, 0xc7f85003);
+	usleep_range(300, 600);
 
-	/* Lock DMU PLL settings */
-	iowrite32(0x00000000, dmu + 0x180);
+	/* Program USB PHY PLL parameters */
+	bcma_write32(pmu_core, BCMA_CC_PMU_PLLCTL_ADDR, 0x6);
+	bcma_write32(pmu_core, BCMA_CC_PMU_PLLCTL_DATA, 0x005360c1);
+	usleep_range(100, 200);
+	bcma_write32(pmu_core, BCMA_CC_PMU_PLLCTL_ADDR, 0x7);
+	bcma_write32(pmu_core, BCMA_CC_PMU_PLLCTL_DATA, 0x0);
+	usleep_range(100, 200);
+	bcma_set32(pmu_core, BCMA_CC_PMU_CTL, BCMA_CC_PMU_CTL_PLL_UPD);
+	usleep_range(100, 200);
 
-	iounmap(dmu);
+	bcma_write32(core, 0x510, 0x7f8d007);
+	udelay(1000);
+
+	/* Take controller out of reset */
+	bcma_write32(core, 0x200, 0x4ff);
+	usleep_range(25, 50);
+	bcma_write32(core, 0x200, 0x6ff);
+	usleep_range(25, 50);
+	bcma_write32(core, 0x200, 0x7ff);
+	usleep_range(25, 50);
+
+	of_platform_default_populate(dev->of_node, NULL, dev);
+
+	return 0;
 }
 
-static void bcma_hcd_init_chip_arm_hc(struct bcma_device *dev)
+static void bcma_hcd_usb20_ns_init_hc(struct bcma_device *dev)
 {
 	u32 val;
 
-	/*
-	 * Delay after PHY initialized to ensure HC is ready to be configured
-	 */
-	usleep_range(1000, 2000);
-
 	/* Set packet buffer OUT threshold */
 	val = bcma_read32(dev, 0x94);
 	val &= 0xffff;
@@ -213,20 +253,33 @@
 	val = bcma_read32(dev, 0x9c);
 	val |= 1;
 	bcma_write32(dev, 0x9c, val);
+
+	/*
+	 * Broadcom initializes PHY and then waits to ensure HC is ready to be
+	 * configured. In our case the order is reversed. We just initialized
+	 * controller and we let HCD initialize PHY, so let's wait (sleep) now.
+	 */
+	usleep_range(1000, 2000);
 }
 
-static void bcma_hcd_init_chip_arm(struct bcma_device *dev)
+/**
+ * bcma_hcd_usb20_ns_init - Initialize Northstar USB 2.0 controller
+ */
+static int bcma_hcd_usb20_ns_init(struct bcma_hcd_device *bcma_hcd)
 {
-	bcma_core_enable(dev, 0);
+	struct bcma_device *core = bcma_hcd->core;
+	struct bcma_chipinfo *ci = &core->bus->chipinfo;
+	struct device *dev = &core->dev;
 
-	if (dev->bus->chipinfo.id == BCMA_CHIP_ID_BCM4707 ||
-	    dev->bus->chipinfo.id == BCMA_CHIP_ID_BCM53018) {
-		if (dev->bus->chipinfo.pkg == BCMA_PKG_ID_BCM4707 ||
-		    dev->bus->chipinfo.pkg == BCMA_PKG_ID_BCM4708)
-			bcma_hcd_init_chip_arm_phy(dev);
+	bcma_core_enable(core, 0);
 
-		bcma_hcd_init_chip_arm_hc(dev);
-	}
+	if (ci->id == BCMA_CHIP_ID_BCM4707 ||
+	    ci->id == BCMA_CHIP_ID_BCM53018)
+		bcma_hcd_usb20_ns_init_hc(core);
+
+	of_platform_default_populate(dev->of_node, NULL, dev);
+
+	return 0;
 }
 
 static void bcma_hci_platform_power_gpio(struct bcma_device *dev, bool val)
@@ -299,16 +352,7 @@
 	if (dma_set_mask_and_coherent(dev->dma_dev, DMA_BIT_MASK(32)))
 		return -EOPNOTSUPP;
 
-	switch (dev->id.id) {
-	case BCMA_CORE_NS_USB20:
-		bcma_hcd_init_chip_arm(dev);
-		break;
-	case BCMA_CORE_USB20_HOST:
-		bcma_hcd_init_chip_mips(dev);
-		break;
-	default:
-		return -ENODEV;
-	}
+	bcma_hcd_init_chip_mips(dev);
 
 	/* In AI chips EHCI is addrspace 0, OHCI is 1 */
 	ohci_addr = dev->addr_s[0];
@@ -338,6 +382,18 @@
 	return err;
 }
 
+static int bcma_hcd_usb30_init(struct bcma_hcd_device *bcma_hcd)
+{
+	struct bcma_device *core = bcma_hcd->core;
+	struct device *dev = &core->dev;
+
+	bcma_core_enable(core, 0);
+
+	of_platform_default_populate(dev->of_node, NULL, dev);
+
+	return 0;
+}
+
 static int bcma_hcd_probe(struct bcma_device *core)
 {
 	int err;
@@ -357,14 +413,24 @@
 
 	switch (core->id.id) {
 	case BCMA_CORE_USB20_HOST:
+		if (IS_ENABLED(CONFIG_ARM))
+			err = bcma_hcd_usb20_old_arm_init(usb_dev);
+		else if (IS_ENABLED(CONFIG_MIPS))
+			err = bcma_hcd_usb20_init(usb_dev);
+		else
+			err = -ENOTSUPP;
+		break;
 	case BCMA_CORE_NS_USB20:
-		err = bcma_hcd_usb20_init(usb_dev);
-		if (err)
-			return err;
+		err = bcma_hcd_usb20_ns_init(usb_dev);
+		break;
+	case BCMA_CORE_NS_USB30:
+		err = bcma_hcd_usb30_init(usb_dev);
 		break;
 	default:
 		return -ENODEV;
 	}
+	if (err)
+		return err;
 
 	bcma_set_drvdata(core, usb_dev);
 	return 0;
@@ -416,6 +482,7 @@
 static const struct bcma_device_id bcma_hcd_table[] = {
 	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_USB20_HOST, BCMA_ANY_REV, BCMA_ANY_CLASS),
 	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_NS_USB20, BCMA_ANY_REV, BCMA_ANY_CLASS),
+	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_NS_USB30, BCMA_ANY_REV, BCMA_ANY_CLASS),
 	{},
 };
 MODULE_DEVICE_TABLE(bcma, bcma_hcd_table);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index a962b89..1e5f529 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -332,11 +332,11 @@
 	int	port = HCS_N_PORTS(ehci->hcs_params);
 
 	while (port--) {
-		ehci_writel(ehci, PORT_RWC_BITS,
-				&ehci->regs->port_status[port]);
 		spin_unlock_irq(&ehci->lock);
 		ehci_port_power(ehci, port, false);
 		spin_lock_irq(&ehci->lock);
+		ehci_writel(ehci, PORT_RWC_BITS,
+				&ehci->regs->port_status[port]);
 	}
 }
 
diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c
index 6816b8c..876dca4 100644
--- a/drivers/usb/host/ehci-platform.c
+++ b/drivers/usb/host/ehci-platform.c
@@ -38,7 +38,7 @@
 #include "ehci.h"
 
 #define DRIVER_DESC "EHCI generic platform driver"
-#define EHCI_MAX_CLKS 3
+#define EHCI_MAX_CLKS 4
 #define EHCI_MAX_RSTS 3
 #define hcd_to_ehci_priv(h) ((struct ehci_platform_priv *)hcd_to_ehci(h)->priv)
 
diff --git a/drivers/usb/host/fhci-hcd.c b/drivers/usb/host/fhci-hcd.c
index 0960f41..55a0ae6 100644
--- a/drivers/usb/host/fhci-hcd.c
+++ b/drivers/usb/host/fhci-hcd.c
@@ -310,10 +310,8 @@
 
 	/* allocate memory for SCC data structure */
 	usb = kzalloc(sizeof(*usb), GFP_KERNEL);
-	if (!usb) {
-		fhci_err(fhci, "no memory for SCC data struct\n");
+	if (!usb)
 		return NULL;
-	}
 
 	usb->fhci = fhci;
 	usb->hc_list = fhci->hc_list;
diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c
index 1044b0f..f07ccb2 100644
--- a/drivers/usb/host/fsl-mph-dr-of.c
+++ b/drivers/usb/host/fsl-mph-dr-of.c
@@ -222,23 +222,17 @@
 	pdata->controller_ver = usb_get_ver_info(np);
 
 	/* Activate Erratum by reading property in device tree */
-	if (of_get_property(np, "fsl,usb-erratum-a007792", NULL))
-		pdata->has_fsl_erratum_a007792 = 1;
-	else
-		pdata->has_fsl_erratum_a007792 = 0;
-	if (of_get_property(np, "fsl,usb-erratum-a005275", NULL))
-		pdata->has_fsl_erratum_a005275 = 1;
-	else
-		pdata->has_fsl_erratum_a005275 = 0;
+	pdata->has_fsl_erratum_a007792 =
+		of_property_read_bool(np, "fsl,usb-erratum-a007792");
+	pdata->has_fsl_erratum_a005275 =
+		of_property_read_bool(np, "fsl,usb-erratum-a005275");
 
 	/*
 	 * Determine whether phy_clk_valid needs to be checked
 	 * by reading property in device tree
 	 */
-	if (of_get_property(np, "phy-clk-valid", NULL))
-		pdata->check_phy_clk_valid = 1;
-	else
-		pdata->check_phy_clk_valid = 0;
+	pdata->check_phy_clk_valid =
+		of_property_read_bool(np, "phy-clk-valid");
 
 	if (pdata->have_sysif_regs) {
 		if (pdata->controller_ver == FSL_USB_VER_NONE) {
diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c
index c369c29..369869a 100644
--- a/drivers/usb/host/max3421-hcd.c
+++ b/drivers/usb/host/max3421-hcd.c
@@ -1675,7 +1675,7 @@
 	if (pin_number > 7)
 		return;
 
-	mask = 1u << pin_number;
+	mask = 1u << (pin_number % 4);
 	idx = pin_number / 4;
 
 	if (value)
@@ -1856,15 +1856,11 @@
 	INIT_LIST_HEAD(&max3421_hcd->ep_list);
 
 	max3421_hcd->tx = kmalloc(sizeof(*max3421_hcd->tx), GFP_KERNEL);
-	if (!max3421_hcd->tx) {
-		dev_err(&spi->dev, "failed to kmalloc tx buffer\n");
+	if (!max3421_hcd->tx)
 		goto error;
-	}
 	max3421_hcd->rx = kmalloc(sizeof(*max3421_hcd->rx), GFP_KERNEL);
-	if (!max3421_hcd->rx) {
-		dev_err(&spi->dev, "failed to kmalloc rx buffer\n");
+	if (!max3421_hcd->rx)
 		goto error;
-	}
 
 	max3421_hcd->spi_thread = kthread_run(max3421_spi_thread, hcd,
 					      "max3421_spi_thread");
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index d177372..5b5880c 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -21,8 +21,11 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
+#include <soc/at91/atmel-sfr.h>
 
 #include "ohci.h"
 
@@ -51,6 +54,7 @@
 	struct clk *hclk;
 	bool clocked;
 	bool wakeup;		/* Saved wake-up state for resume */
+	struct regmap *sfr_regmap;
 };
 /* interface and function clocks; sometimes also an AHB clock */
 
@@ -134,6 +138,17 @@
 
 static void usb_hcd_at91_remove (struct usb_hcd *, struct platform_device *);
 
+static struct regmap *at91_dt_syscon_sfr(void)
+{
+	struct regmap *regmap;
+
+	regmap = syscon_regmap_lookup_by_compatible("atmel,sama5d2-sfr");
+	if (IS_ERR(regmap))
+		regmap = NULL;
+
+	return regmap;
+}
+
 /* configure so an HC device and id are always provided */
 /* always called with process context; sleeping is OK */
 
@@ -197,6 +212,10 @@
 		goto err;
 	}
 
+	ohci_at91->sfr_regmap = at91_dt_syscon_sfr();
+	if (!ohci_at91->sfr_regmap)
+		dev_warn(dev, "failed to find sfr node\n");
+
 	board = hcd->self.controller->platform_data;
 	ohci = hcd_to_ohci(hcd);
 	ohci->num_ports = board->ports;
@@ -282,6 +301,28 @@
 	return length;
 }
 
+static int ohci_at91_port_suspend(struct regmap *regmap, u8 set)
+{
+	u32 regval;
+	int ret;
+
+	if (!regmap)
+		return 0;
+
+	ret = regmap_read(regmap, AT91_SFR_OHCIICR, &regval);
+	if (ret)
+		return ret;
+
+	if (set)
+		regval |= AT91_OHCIICR_USB_SUSPEND;
+	else
+		regval &= ~AT91_OHCIICR_USB_SUSPEND;
+
+	regmap_write(regmap, AT91_SFR_OHCIICR, regval);
+
+	return 0;
+}
+
 /*
  * Look at the control requests to the root hub and see if we need to override.
  */
@@ -289,6 +330,7 @@
 				 u16 wIndex, char *buf, u16 wLength)
 {
 	struct at91_usbh_data *pdata = dev_get_platdata(hcd->self.controller);
+	struct ohci_at91_priv *ohci_at91 = hcd_to_ohci_at91_priv(hcd);
 	struct usb_hub_descriptor *desc;
 	int ret = -EINVAL;
 	u32 *data = (u32 *)buf;
@@ -301,7 +343,8 @@
 
 	switch (typeReq) {
 	case SetPortFeature:
-		if (wValue == USB_PORT_FEAT_POWER) {
+		switch (wValue) {
+		case USB_PORT_FEAT_POWER:
 			dev_dbg(hcd->self.controller, "SetPortFeat: POWER\n");
 			if (valid_port(wIndex)) {
 				ohci_at91_usb_set_power(pdata, wIndex, 1);
@@ -309,6 +352,15 @@
 			}
 
 			goto out;
+
+		case USB_PORT_FEAT_SUSPEND:
+			dev_dbg(hcd->self.controller, "SetPortFeat: SUSPEND\n");
+			if (valid_port(wIndex)) {
+				ohci_at91_port_suspend(ohci_at91->sfr_regmap,
+						       1);
+				return 0;
+			}
+			break;
 		}
 		break;
 
@@ -342,6 +394,16 @@
 				ohci_at91_usb_set_power(pdata, wIndex, 0);
 				return 0;
 			}
+			break;
+
+		case USB_PORT_FEAT_SUSPEND:
+			dev_dbg(hcd->self.controller, "ClearPortFeature: SUSPEND\n");
+			if (valid_port(wIndex)) {
+				ohci_at91_port_suspend(ohci_at91->sfr_regmap,
+						       0);
+				return 0;
+			}
+			break;
 		}
 		break;
 	}
@@ -599,6 +661,8 @@
 	if (ohci_at91->wakeup)
 		enable_irq_wake(hcd->irq);
 
+	ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1);
+
 	ret = ohci_suspend(hcd, ohci_at91->wakeup);
 	if (ret) {
 		if (ohci_at91->wakeup)
@@ -638,6 +702,9 @@
 	at91_start_clock(ohci_at91);
 
 	ohci_resume(hcd, false);
+
+	ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0);
+
 	return 0;
 }
 
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index de7c686..495c145 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -36,7 +36,6 @@
 #include <mach/mux.h>
 
 #include <mach/hardware.h>
-#include <mach/irqs.h>
 #include <mach/usb.h>
 
 
diff --git a/drivers/usb/host/ohci-sa1111.c b/drivers/usb/host/ohci-sa1111.c
index 2ac266d..3a9ea32 100644
--- a/drivers/usb/host/ohci-sa1111.c
+++ b/drivers/usb/host/ohci-sa1111.c
@@ -13,9 +13,7 @@
  * This file is licenced under the GPL.
  */
 
-#include <mach/hardware.h>
 #include <asm/mach-types.h>
-#include <mach/assabet.h>
 #include <asm/hardware/sa1111.h>
 
 #ifndef CONFIG_SA1111
@@ -127,7 +125,7 @@
 	dev_dbg(&dev->dev, "starting SA-1111 OHCI USB Controller\n");
 
 	if (machine_is_xp860() ||
-	    machine_has_neponset() ||
+	    machine_is_assabet() ||
 	    machine_is_pfs168() ||
 	    machine_is_badge4())
 		usb_rst = USB_RESET_PWRSENSELOW | USB_RESET_PWRCTRLLOW;
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index a7de8e8..5d3d914 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -601,11 +601,8 @@
 
 	uhci->frame_cpu = kcalloc(UHCI_NUMFRAMES, sizeof(*uhci->frame_cpu),
 			GFP_KERNEL);
-	if (!uhci->frame_cpu) {
-		dev_err(uhci_dev(uhci),
-			"unable to allocate memory for frame pointers\n");
+	if (!uhci->frame_cpu)
 		goto err_alloc_frame_cpu;
-	}
 
 	uhci->td_pool = dma_pool_create("uhci_td", uhci_dev(uhci),
 			sizeof(struct uhci_td), 16, 0);
diff --git a/drivers/usb/host/whci/init.c b/drivers/usb/host/whci/init.c
index e363723..ad8eb57 100644
--- a/drivers/usb/host/whci/init.c
+++ b/drivers/usb/host/whci/init.c
@@ -65,7 +65,7 @@
 	init_waitqueue_head(&whc->cmd_wq);
 	init_waitqueue_head(&whc->async_list_wq);
 	init_waitqueue_head(&whc->periodic_list_wq);
-	whc->workqueue = create_singlethread_workqueue(dev_name(&whc->umc->dev));
+	whc->workqueue = alloc_ordered_workqueue(dev_name(&whc->umc->dev), 0);
 	if (whc->workqueue == NULL) {
 		ret = -ENOMEM;
 		goto error;
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index d61fcc4..730b9fd 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -386,6 +386,9 @@
 
 	ret = 0;
 	virt_dev = xhci->devs[slot_id];
+	if (!virt_dev)
+		return -ENODEV;
+
 	cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO);
 	if (!cmd) {
 		xhci_dbg(xhci, "Couldn't allocate command structure.\n");
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 4fd041b..d7b0f97 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -314,11 +314,12 @@
 		usb_remove_hcd(xhci->shared_hcd);
 		usb_put_hcd(xhci->shared_hcd);
 	}
-	usb_hcd_pci_remove(dev);
 
 	/* Workaround for spurious wakeups at shutdown with HSW */
 	if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
 		pci_set_power_state(dev, PCI_D3hot);
+
+	usb_hcd_pci_remove(dev);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 918e0c7..797137e 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -850,6 +850,10 @@
 	spin_lock_irqsave(&xhci->lock, flags);
 
 	ep->stop_cmds_pending--;
+	if (xhci->xhc_state & XHCI_STATE_REMOVING) {
+		spin_unlock_irqrestore(&xhci->lock, flags);
+		return;
+	}
 	if (xhci->xhc_state & XHCI_STATE_DYING) {
 		xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
 				"Stop EP timer ran, but another timer marked "
@@ -903,7 +907,7 @@
 	spin_unlock_irqrestore(&xhci->lock, flags);
 	xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
 			"Calling usb_hc_died()");
-	usb_hc_died(xhci_to_hcd(xhci)->primary_hcd);
+	usb_hc_died(xhci_to_hcd(xhci));
 	xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
 			"xHCI host controller is dead.");
 }
@@ -1334,12 +1338,6 @@
 
 	cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list);
 
-	if (cmd->command_trb != xhci->cmd_ring->dequeue) {
-		xhci_err(xhci,
-			 "Command completion event does not match command\n");
-		return;
-	}
-
 	del_timer(&xhci->cmd_timer);
 
 	trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event);
@@ -1351,6 +1349,13 @@
 		xhci_handle_stopped_cmd_ring(xhci, cmd);
 		return;
 	}
+
+	if (cmd->command_trb != xhci->cmd_ring->dequeue) {
+		xhci_err(xhci,
+			 "Command completion event does not match command\n");
+		return;
+	}
+
 	/*
 	 * Host aborted the command ring, check if the current command was
 	 * supposed to be aborted, otherwise continue normally.
@@ -3243,7 +3248,8 @@
 	send_addr = addr;
 
 	/* Queue the TRBs, even if they are zero-length */
-	for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) {
+	for (enqd_len = 0; first_trb || enqd_len < full_len;
+			enqd_len += trb_buff_len) {
 		field = TRB_TYPE(TRB_NORMAL);
 
 		/* TRB buffer should not cross 64KB boundaries */
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 0f53ae0..a59fafb 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1033,7 +1033,6 @@
 	tegra->phys = devm_kcalloc(&pdev->dev, tegra->num_phys,
 				   sizeof(*tegra->phys), GFP_KERNEL);
 	if (!tegra->phys) {
-		dev_err(&pdev->dev, "failed to allocate PHY array\n");
 		err = -ENOMEM;
 		goto put_padctl;
 	}
@@ -1117,6 +1116,7 @@
 						 tegra->hcd);
 	if (!xhci->shared_hcd) {
 		dev_err(&pdev->dev, "failed to create shared HCD\n");
+		err = -ENOMEM;
 		goto remove_usb2;
 	}
 
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 01d96c9..1a4ca02 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -295,10 +295,8 @@
 	xhci->msix_entries =
 		kmalloc((sizeof(struct msix_entry))*xhci->msix_count,
 				GFP_KERNEL);
-	if (!xhci->msix_entries) {
-		xhci_err(xhci, "Failed to allocate MSI-X entries\n");
+	if (!xhci->msix_entries)
 		return -ENOMEM;
-	}
 
 	for (i = 0; i < xhci->msix_count; i++) {
 		xhci->msix_entries[i].entry = i;
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index eb8f8d3..47b3577 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -240,6 +240,12 @@
        help
          This option enables support for SMSC USB3503 HSIC to USB 2.0 Driver.
 
+config USB_HSIC_USB4604
+       tristate "USB4604 HSIC to USB20 Driver"
+       depends on I2C
+       help
+         This option enables support for SMSC USB4604 HSIC to USB 2.0 Driver.
+
 config USB_LINK_LAYER_TEST
 	tristate "USB Link Layer Test driver"
 	help
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index 3d79faa..3d19927 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -24,6 +24,7 @@
 obj-$(CONFIG_USB_SEVSEG)		+= usbsevseg.o
 obj-$(CONFIG_USB_YUREX)			+= yurex.o
 obj-$(CONFIG_USB_HSIC_USB3503)		+= usb3503.o
+obj-$(CONFIG_USB_HSIC_USB4604)		+= usb4604.o
 obj-$(CONFIG_USB_CHAOSKEY)		+= chaoskey.o
 obj-$(CONFIG_UCSI)			+= ucsi.o
 
diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c
index 3071c0e..564268f 100644
--- a/drivers/usb/misc/adutux.c
+++ b/drivers/usb/misc/adutux.c
@@ -672,8 +672,7 @@
 
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(struct adu_device), GFP_KERNEL);
-	if (dev == NULL) {
-		dev_err(&interface->dev, "Out of memory\n");
+	if (!dev) {
 		retval = -ENOMEM;
 		goto exit;
 	}
@@ -710,7 +709,6 @@
 
 	dev->read_buffer_primary = kmalloc((4 * in_end_size), GFP_KERNEL);
 	if (!dev->read_buffer_primary) {
-		dev_err(&interface->dev, "Couldn't allocate read_buffer_primary\n");
 		retval = -ENOMEM;
 		goto error;
 	}
@@ -723,7 +721,6 @@
 
 	dev->read_buffer_secondary = kmalloc((4 * in_end_size), GFP_KERNEL);
 	if (!dev->read_buffer_secondary) {
-		dev_err(&interface->dev, "Couldn't allocate read_buffer_secondary\n");
 		retval = -ENOMEM;
 		goto error;
 	}
@@ -735,29 +732,21 @@
 	memset(dev->read_buffer_secondary + (3 * in_end_size), 'h', in_end_size);
 
 	dev->interrupt_in_buffer = kmalloc(in_end_size, GFP_KERNEL);
-	if (!dev->interrupt_in_buffer) {
-		dev_err(&interface->dev, "Couldn't allocate interrupt_in_buffer\n");
+	if (!dev->interrupt_in_buffer)
 		goto error;
-	}
 
 	/* debug code prime the buffer */
 	memset(dev->interrupt_in_buffer, 'i', in_end_size);
 
 	dev->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->interrupt_in_urb) {
-		dev_err(&interface->dev, "Couldn't allocate interrupt_in_urb\n");
+	if (!dev->interrupt_in_urb)
 		goto error;
-	}
 	dev->interrupt_out_buffer = kmalloc(out_end_size, GFP_KERNEL);
-	if (!dev->interrupt_out_buffer) {
-		dev_err(&interface->dev, "Couldn't allocate interrupt_out_buffer\n");
+	if (!dev->interrupt_out_buffer)
 		goto error;
-	}
 	dev->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->interrupt_out_urb) {
-		dev_err(&interface->dev, "Couldn't allocate interrupt_out_urb\n");
+	if (!dev->interrupt_out_urb)
 		goto error;
-	}
 
 	if (!usb_string(udev, udev->descriptor.iSerialNumber, dev->serial_number,
 			sizeof(dev->serial_number))) {
diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index a0a3827..da5ff40 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -85,7 +85,6 @@
 };
 
 static atomic_t count_displays = ATOMIC_INIT(0);
-static struct workqueue_struct *wq;
 
 static void appledisplay_complete(struct urb *urb)
 {
@@ -122,7 +121,7 @@
 	case ACD_BTN_BRIGHT_UP:
 	case ACD_BTN_BRIGHT_DOWN:
 		pdata->button_pressed = 1;
-		queue_delayed_work(wq, &pdata->work, 0);
+		schedule_delayed_work(&pdata->work, 0);
 		break;
 	case ACD_BTN_NONE:
 	default:
@@ -239,7 +238,6 @@
 	pdata = kzalloc(sizeof(struct appledisplay), GFP_KERNEL);
 	if (!pdata) {
 		retval = -ENOMEM;
-		dev_err(&iface->dev, "Out of memory\n");
 		goto error;
 	}
 
@@ -253,8 +251,6 @@
 	pdata->msgdata = kmalloc(ACD_MSG_BUFFER_LEN, GFP_KERNEL);
 	if (!pdata->msgdata) {
 		retval = -ENOMEM;
-		dev_err(&iface->dev,
-			"Allocating buffer for control messages failed\n");
 		goto error;
 	}
 
@@ -262,7 +258,6 @@
 	pdata->urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!pdata->urb) {
 		retval = -ENOMEM;
-		dev_err(&iface->dev, "Allocating URB failed\n");
 		goto error;
 	}
 
@@ -344,7 +339,7 @@
 
 	if (pdata) {
 		usb_kill_urb(pdata->urb);
-		cancel_delayed_work(&pdata->work);
+		cancel_delayed_work_sync(&pdata->work);
 		backlight_device_unregister(pdata->bd);
 		usb_free_coherent(pdata->udev, ACD_URB_BUFFER_LEN,
 			pdata->urbdata, pdata->urb->transfer_dma);
@@ -365,19 +360,11 @@
 
 static int __init appledisplay_init(void)
 {
-	wq = create_singlethread_workqueue("appledisplay");
-	if (!wq) {
-		printk(KERN_ERR "appledisplay: Could not create work queue\n");
-		return -ENOMEM;
-	}
-
 	return usb_register(&appledisplay_driver);
 }
 
 static void __exit appledisplay_exit(void)
 {
-	flush_workqueue(wq);
-	destroy_workqueue(wq);
 	usb_deregister(&appledisplay_driver);
 }
 
diff --git a/drivers/usb/misc/cypress_cy7c63.c b/drivers/usb/misc/cypress_cy7c63.c
index 402b94d..5c93a88 100644
--- a/drivers/usb/misc/cypress_cy7c63.c
+++ b/drivers/usb/misc/cypress_cy7c63.c
@@ -79,7 +79,6 @@
 	/* allocate some memory for the i/o buffer*/
 	iobuf = kzalloc(CYPRESS_MAX_REQSIZE, GFP_KERNEL);
 	if (!iobuf) {
-		dev_err(&dev->udev->dev, "Out of memory!\n");
 		retval = -ENOMEM;
 		goto error;
 	}
@@ -208,10 +207,8 @@
 
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (dev == NULL) {
-		dev_err(&interface->dev, "Out of memory!\n");
+	if (!dev)
 		goto error_mem;
-	}
 
 	dev->udev = usb_get_dev(interface_to_usbdev(interface));
 
diff --git a/drivers/usb/misc/cytherm.c b/drivers/usb/misc/cytherm.c
index 9bab1a3..9d8bb8d 100644
--- a/drivers/usb/misc/cytherm.c
+++ b/drivers/usb/misc/cytherm.c
@@ -101,10 +101,8 @@
 	int retval;
    
 	buffer = kmalloc(8, GFP_KERNEL);
-	if (!buffer) {
-		dev_err(&cytherm->udev->dev, "out of memory\n");
+	if (!buffer)
 		return 0;
-	}
 
 	cytherm->brightness = simple_strtoul(buf, NULL, 10);
    
@@ -148,10 +146,8 @@
 	int temp, sign;
    
 	buffer = kmalloc(8, GFP_KERNEL);
-	if (!buffer) {
-		dev_err(&cytherm->udev->dev, "out of memory\n");
+	if (!buffer)
 		return 0;
-	}
 
 	/* read temperature */
 	retval = vendor_command(cytherm->udev, READ_RAM, TEMP, 0, buffer, 8);
@@ -192,10 +188,8 @@
 	unsigned char *buffer;
 
 	buffer = kmalloc(8, GFP_KERNEL);
-	if (!buffer) {
-		dev_err(&cytherm->udev->dev, "out of memory\n");
+	if (!buffer)
 		return 0;
-	}
 
 	/* check button */
 	retval = vendor_command(cytherm->udev, READ_RAM, BUTTON, 0, buffer, 8);
@@ -230,10 +224,8 @@
 	unsigned char *buffer;
 
 	buffer = kmalloc(8, GFP_KERNEL);
-	if (!buffer) {
-		dev_err(&cytherm->udev->dev, "out of memory\n");
+	if (!buffer)
 		return 0;
-	}
 
 	retval = vendor_command(cytherm->udev, READ_PORT, 0, 0, buffer, 8);
 	if (retval)
@@ -257,10 +249,8 @@
 	int tmp;
    
 	buffer = kmalloc(8, GFP_KERNEL);
-	if (!buffer) {
-		dev_err(&cytherm->udev->dev, "out of memory\n");
+	if (!buffer)
 		return 0;
-	}
 
 	tmp = simple_strtoul(buf, NULL, 10);
    
@@ -290,10 +280,8 @@
 	unsigned char *buffer;
 
 	buffer = kmalloc(8, GFP_KERNEL);
-	if (!buffer) {
-		dev_err(&cytherm->udev->dev, "out of memory\n");
+	if (!buffer)
 		return 0;
-	}
 
 	retval = vendor_command(cytherm->udev, READ_PORT, 1, 0, buffer, 8);
 	if (retval)
@@ -317,10 +305,8 @@
 	int tmp;
    
 	buffer = kmalloc(8, GFP_KERNEL);
-	if (!buffer) {
-		dev_err(&cytherm->udev->dev, "out of memory\n");
+	if (!buffer)
 		return 0;
-	}
 
 	tmp = simple_strtoul(buf, NULL, 10);
    
@@ -351,10 +337,8 @@
 	int retval = -ENOMEM;
 
 	dev = kzalloc (sizeof(struct usb_cytherm), GFP_KERNEL);
-	if (dev == NULL) {
-		dev_err (&interface->dev, "Out of memory\n");
+	if (!dev)
 		goto error_mem;
-	}
 
 	dev->udev = usb_get_dev(udev);
 
diff --git a/drivers/usb/misc/ezusb.c b/drivers/usb/misc/ezusb.c
index 947811b..837208f 100644
--- a/drivers/usb/misc/ezusb.c
+++ b/drivers/usb/misc/ezusb.c
@@ -22,7 +22,7 @@
 	unsigned short max_internal_adress;
 };
 
-static struct ezusb_fx_type ezusb_fx1 = {
+static const struct ezusb_fx_type ezusb_fx1 = {
 	.cpucs_reg = 0x7F92,
 	.max_internal_adress = 0x1B3F,
 };
diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
index 52c27ca..9a82f83 100644
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -61,9 +61,6 @@
 MODULE_PARM_DESC(distrust_firmware,
 		 "true to distrust firmware power/overcurrent setup");
 extern struct platform_driver u132_platform_driver;
-static struct workqueue_struct *status_queue;
-static struct workqueue_struct *command_queue;
-static struct workqueue_struct *respond_queue;
 /*
  * ftdi_module_lock exists to protect access to global variables
  *
@@ -228,56 +225,56 @@
 
 static void ftdi_status_requeue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-	if (!queue_delayed_work(status_queue, &ftdi->status_work, delta))
+	if (!schedule_delayed_work(&ftdi->status_work, delta))
 		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
 static void ftdi_status_queue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-	if (queue_delayed_work(status_queue, &ftdi->status_work, delta))
+	if (schedule_delayed_work(&ftdi->status_work, delta))
 		kref_get(&ftdi->kref);
 }
 
 static void ftdi_status_cancel_work(struct usb_ftdi *ftdi)
 {
-	if (cancel_delayed_work(&ftdi->status_work))
+	if (cancel_delayed_work_sync(&ftdi->status_work))
 		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
 static void ftdi_command_requeue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-	if (!queue_delayed_work(command_queue, &ftdi->command_work, delta))
+	if (!schedule_delayed_work(&ftdi->command_work, delta))
 		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
 static void ftdi_command_queue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-	if (queue_delayed_work(command_queue, &ftdi->command_work, delta))
+	if (schedule_delayed_work(&ftdi->command_work, delta))
 		kref_get(&ftdi->kref);
 }
 
 static void ftdi_command_cancel_work(struct usb_ftdi *ftdi)
 {
-	if (cancel_delayed_work(&ftdi->command_work))
+	if (cancel_delayed_work_sync(&ftdi->command_work))
 		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
 static void ftdi_response_requeue_work(struct usb_ftdi *ftdi,
 				       unsigned int delta)
 {
-	if (!queue_delayed_work(respond_queue, &ftdi->respond_work, delta))
+	if (!schedule_delayed_work(&ftdi->respond_work, delta))
 		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
 static void ftdi_respond_queue_work(struct usb_ftdi *ftdi, unsigned int delta)
 {
-	if (queue_delayed_work(respond_queue, &ftdi->respond_work, delta))
+	if (schedule_delayed_work(&ftdi->respond_work, delta))
 		kref_get(&ftdi->kref);
 }
 
 static void ftdi_response_cancel_work(struct usb_ftdi *ftdi)
 {
-	if (cancel_delayed_work(&ftdi->respond_work))
+	if (cancel_delayed_work_sync(&ftdi->respond_work))
 		kref_put(&ftdi->kref, ftdi_elan_delete);
 }
 
@@ -665,7 +662,7 @@
 {
 	char data[30 *3 + 4];
 	char *d = data;
-	int m = (sizeof(data) - 1) / 3;
+	int m = (sizeof(data) - 1) / 3 - 1;
 	int bytes_read = 0;
 	int retry_on_empty = 10;
 	int retry_on_timeout = 5;
@@ -785,11 +782,8 @@
 		return 0;
 	total_size = ftdi_elan_total_command_size(ftdi, command_size);
 	urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!urb) {
-		dev_err(&ftdi->udev->dev, "could not get a urb to write %d commands totaling %d bytes to the Uxxx\n",
-			command_size, total_size);
+	if (!urb)
 		return -ENOMEM;
-	}
 	buf = usb_alloc_coherent(ftdi->udev, total_size, GFP_KERNEL,
 				 &urb->transfer_dma);
 	if (!buf) {
@@ -1684,7 +1678,7 @@
 			int i = 0;
 			char data[30 *3 + 4];
 			char *d = data;
-			int m = (sizeof(data) - 1) / 3;
+			int m = (sizeof(data) - 1) / 3 - 1;
 			int l = 0;
 			struct u132_target *target = &ftdi->target[ed];
 			struct u132_command *command = &ftdi->command[
@@ -1876,7 +1870,7 @@
 		if (packet_bytes > 2) {
 			char diag[30 *3 + 4];
 			char *d = diag;
-			int m = (sizeof(diag) - 1) / 3;
+			int m = (sizeof(diag) - 1) / 3 - 1;
 			char *b = ftdi->bulk_in_buffer;
 			int bytes_read = 0;
 			diag[0] = 0;
@@ -1948,10 +1942,8 @@
 	int I = 257;
 	int i = 0;
 	urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!urb) {
-		dev_err(&ftdi->udev->dev, "could not alloc a urb for flush sequence\n");
+	if (!urb)
 		return -ENOMEM;
-	}
 	buf = usb_alloc_coherent(ftdi->udev, I, GFP_KERNEL, &urb->transfer_dma);
 	if (!buf) {
 		dev_err(&ftdi->udev->dev, "could not get a buffer for flush sequence\n");
@@ -1988,10 +1980,8 @@
 	int I = 4;
 	int i = 0;
 	urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!urb) {
-		dev_err(&ftdi->udev->dev, "could not get a urb for the reset sequence\n");
+	if (!urb)
 		return -ENOMEM;
-	}
 	buf = usb_alloc_coherent(ftdi->udev, I, GFP_KERNEL, &urb->transfer_dma);
 	if (!buf) {
 		dev_err(&ftdi->udev->dev, "could not get a buffer for the reset sequence\n");
@@ -2053,7 +2043,7 @@
 			if (packet_bytes > 2) {
 				char diag[30 *3 + 4];
 				char *d = diag;
-				int m = (sizeof(diag) - 1) / 3;
+				int m = (sizeof(diag) - 1) / 3 - 1;
 				char *b = ftdi->bulk_in_buffer;
 				int bytes_read = 0;
 				unsigned char c = 0;
@@ -2155,7 +2145,7 @@
 		if (packet_bytes > 2) {
 			char diag[30 *3 + 4];
 			char *d = diag;
-			int m = (sizeof(diag) - 1) / 3;
+			int m = (sizeof(diag) - 1) / 3 - 1;
 			char *b = ftdi->bulk_in_buffer;
 			int bytes_read = 0;
 			diag[0] = 0;
@@ -2740,7 +2730,6 @@
 			ftdi->bulk_in_endpointAddr = endpoint->bEndpointAddress;
 			ftdi->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL);
 			if (!ftdi->bulk_in_buffer) {
-				dev_err(&ftdi->udev->dev, "Could not allocate bulk_in_buffer\n");
 				retval = -ENOMEM;
 				goto error;
 			}
@@ -2823,9 +2812,6 @@
 			ftdi->initialized = 0;
 			ftdi->registered = 0;
 		}
-		flush_workqueue(status_queue);
-		flush_workqueue(command_queue);
-		flush_workqueue(respond_queue);
 		ftdi->disconnected += 1;
 		usb_set_intfdata(interface, NULL);
 		dev_info(&ftdi->udev->dev, "USB FTDI U132 host controller interface now disconnected\n");
@@ -2845,31 +2831,12 @@
 	pr_info("driver %s\n", ftdi_elan_driver.name);
 	mutex_init(&ftdi_module_lock);
 	INIT_LIST_HEAD(&ftdi_static_list);
-	status_queue = create_singlethread_workqueue("ftdi-status-control");
-	if (!status_queue)
-		goto err_status_queue;
-	command_queue = create_singlethread_workqueue("ftdi-command-engine");
-	if (!command_queue)
-		goto err_command_queue;
-	respond_queue = create_singlethread_workqueue("ftdi-respond-engine");
-	if (!respond_queue)
-		goto err_respond_queue;
 	result = usb_register(&ftdi_elan_driver);
 	if (result) {
-		destroy_workqueue(status_queue);
-		destroy_workqueue(command_queue);
-		destroy_workqueue(respond_queue);
 		pr_err("usb_register failed. Error number %d\n", result);
 	}
 	return result;
 
-err_respond_queue:
-	destroy_workqueue(command_queue);
-err_command_queue:
-	destroy_workqueue(status_queue);
-err_status_queue:
-	pr_err("%s couldn't create workqueue\n", ftdi_elan_driver.name);
-	return -ENOMEM;
 }
 
 static void __exit ftdi_elan_exit(void)
@@ -2882,15 +2849,7 @@
 		ftdi_status_cancel_work(ftdi);
 		ftdi_command_cancel_work(ftdi);
 		ftdi_response_cancel_work(ftdi);
-	} flush_workqueue(status_queue);
-	destroy_workqueue(status_queue);
-	status_queue = NULL;
-	flush_workqueue(command_queue);
-	destroy_workqueue(command_queue);
-	command_queue = NULL;
-	flush_workqueue(respond_queue);
-	destroy_workqueue(respond_queue);
-	respond_queue = NULL;
+	}
 }
 
 
diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index 5105397..2975e80 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -366,7 +366,6 @@
 			kmalloc(IMGSIZE + dev->bulk_in_size, GFP_KERNEL);
 
 		if (!dev->bulk_in_buffer) {
-			dev_err(&interface->dev, "Unable to allocate input buffer.\n");
 			idmouse_delete(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 1950e87..095778f 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -278,7 +278,7 @@
 	dev = file->private_data;
 
 	/* verify that the device wasn't unplugged */
-	if (dev == NULL || !dev->present)
+	if (!dev || !dev->present)
 		return -ENODEV;
 
 	dev_dbg(&dev->interface->dev, "minor %d, count = %zd\n",
@@ -413,8 +413,6 @@
 		int_out_urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (!int_out_urb) {
 			retval = -ENOMEM;
-			dev_dbg(&dev->interface->dev,
-				"Unable to allocate urb\n");
 			goto error_no_urb;
 		}
 		buf = usb_alloc_coherent(dev->udev, dev->report_size,
@@ -482,9 +480,8 @@
 	int io_res;		/* checks for bytes read/written and copy_to/from_user results */
 
 	dev = file->private_data;
-	if (dev == NULL) {
+	if (!dev)
 		return -ENODEV;
-	}
 
 	buffer = kzalloc(dev->report_size, GFP_KERNEL);
 	if (!buffer)
@@ -654,9 +651,8 @@
 	int retval = 0;
 
 	dev = file->private_data;
-	if (dev == NULL) {
+	if (!dev)
 		return -ENODEV;
-	}
 
 	dev_dbg(&dev->interface->dev, "minor %d\n", dev->minor);
 
@@ -766,10 +762,8 @@
 
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(struct iowarrior), GFP_KERNEL);
-	if (dev == NULL) {
-		dev_err(&interface->dev, "Out of memory\n");
+	if (!dev)
 		return retval;
-	}
 
 	mutex_init(&dev->mutex);
 
@@ -812,15 +806,11 @@
 
 	/* create the urb and buffer for reading */
 	dev->int_in_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->int_in_urb) {
-		dev_err(&interface->dev, "Couldn't allocate interrupt_in_urb\n");
+	if (!dev->int_in_urb)
 		goto error;
-	}
 	dev->int_in_buffer = kmalloc(dev->report_size, GFP_KERNEL);
-	if (!dev->int_in_buffer) {
-		dev_err(&interface->dev, "Couldn't allocate int_in_buffer\n");
+	if (!dev->int_in_buffer)
 		goto error;
-	}
 	usb_fill_int_urb(dev->int_in_urb, dev->udev,
 			 usb_rcvintpipe(dev->udev,
 					dev->int_in_endpoint->bEndpointAddress),
@@ -831,10 +821,8 @@
 	dev->read_queue =
 	    kmalloc(((dev->report_size + 1) * MAX_INTERRUPT_BUFFER),
 		    GFP_KERNEL);
-	if (!dev->read_queue) {
-		dev_err(&interface->dev, "Couldn't allocate read_queue\n");
+	if (!dev->read_queue)
 		goto error;
-	}
 	/* Get the serial-number of the chip */
 	memset(dev->chip_serial, 0x00, sizeof(dev->chip_serial));
 	usb_string(udev, udev->descriptor.iSerialNumber, dev->chip_serial,
diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c
index cce22ff..9ca5956 100644
--- a/drivers/usb/misc/ldusb.c
+++ b/drivers/usb/misc/ldusb.c
@@ -658,10 +658,8 @@
 	/* allocate memory for our device state and initialize it */
 
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (dev == NULL) {
-		dev_err(&intf->dev, "Out of memory\n");
+	if (!dev)
 		goto exit;
-	}
 	mutex_init(&dev->mutex);
 	spin_lock_init(&dev->rbsl);
 	dev->intf = intf;
@@ -674,10 +672,8 @@
 	     (le16_to_cpu(udev->descriptor.idProduct) == USB_DEVICE_ID_LD_COM3LAB)) &&
 	    (le16_to_cpu(udev->descriptor.bcdDevice) <= 0x103)) {
 		buffer = kmalloc(256, GFP_KERNEL);
-		if (buffer == NULL) {
-			dev_err(&intf->dev, "Couldn't allocate string buffer\n");
+		if (!buffer)
 			goto error;
-		}
 		/* usb_string makes SETUP+STALL to leave always ControlReadLoop */
 		usb_string(udev, 255, buffer, 256);
 		kfree(buffer);
@@ -704,32 +700,22 @@
 
 	dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint);
 	dev->ring_buffer = kmalloc(ring_buffer_size*(sizeof(size_t)+dev->interrupt_in_endpoint_size), GFP_KERNEL);
-	if (!dev->ring_buffer) {
-		dev_err(&intf->dev, "Couldn't allocate ring_buffer\n");
+	if (!dev->ring_buffer)
 		goto error;
-	}
 	dev->interrupt_in_buffer = kmalloc(dev->interrupt_in_endpoint_size, GFP_KERNEL);
-	if (!dev->interrupt_in_buffer) {
-		dev_err(&intf->dev, "Couldn't allocate interrupt_in_buffer\n");
+	if (!dev->interrupt_in_buffer)
 		goto error;
-	}
 	dev->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->interrupt_in_urb) {
-		dev_err(&intf->dev, "Couldn't allocate interrupt_in_urb\n");
+	if (!dev->interrupt_in_urb)
 		goto error;
-	}
 	dev->interrupt_out_endpoint_size = dev->interrupt_out_endpoint ? usb_endpoint_maxp(dev->interrupt_out_endpoint) :
 									 udev->descriptor.bMaxPacketSize0;
 	dev->interrupt_out_buffer = kmalloc(write_buffer_size*dev->interrupt_out_endpoint_size, GFP_KERNEL);
-	if (!dev->interrupt_out_buffer) {
-		dev_err(&intf->dev, "Couldn't allocate interrupt_out_buffer\n");
+	if (!dev->interrupt_out_buffer)
 		goto error;
-	}
 	dev->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->interrupt_out_urb) {
-		dev_err(&intf->dev, "Couldn't allocate interrupt_out_urb\n");
+	if (!dev->interrupt_out_urb)
 		goto error;
-	}
 	dev->interrupt_in_interval = min_interrupt_in_interval > dev->interrupt_in_endpoint->bInterval ? min_interrupt_in_interval : dev->interrupt_in_endpoint->bInterval;
 	if (dev->interrupt_out_endpoint)
 		dev->interrupt_out_interval = min_interrupt_out_interval > dev->interrupt_out_endpoint->bInterval ? min_interrupt_out_interval : dev->interrupt_out_endpoint->bInterval;
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index 7771be3..c8fbe7b 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -817,10 +817,8 @@
 
 	dev = kmalloc (sizeof(struct lego_usb_tower), GFP_KERNEL);
 
-	if (dev == NULL) {
-		dev_err(idev, "Out of memory\n");
+	if (!dev)
 		goto exit;
-	}
 
 	mutex_init(&dev->lock);
 
@@ -871,51 +869,23 @@
 	}
 
 	dev->read_buffer = kmalloc (read_buffer_size, GFP_KERNEL);
-	if (!dev->read_buffer) {
-		dev_err(idev, "Couldn't allocate read_buffer\n");
+	if (!dev->read_buffer)
 		goto error;
-	}
 	dev->interrupt_in_buffer = kmalloc (usb_endpoint_maxp(dev->interrupt_in_endpoint), GFP_KERNEL);
-	if (!dev->interrupt_in_buffer) {
-		dev_err(idev, "Couldn't allocate interrupt_in_buffer\n");
+	if (!dev->interrupt_in_buffer)
 		goto error;
-	}
 	dev->interrupt_in_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->interrupt_in_urb) {
-		dev_err(idev, "Couldn't allocate interrupt_in_urb\n");
+	if (!dev->interrupt_in_urb)
 		goto error;
-	}
 	dev->interrupt_out_buffer = kmalloc (write_buffer_size, GFP_KERNEL);
-	if (!dev->interrupt_out_buffer) {
-		dev_err(idev, "Couldn't allocate interrupt_out_buffer\n");
+	if (!dev->interrupt_out_buffer)
 		goto error;
-	}
 	dev->interrupt_out_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->interrupt_out_urb) {
-		dev_err(idev, "Couldn't allocate interrupt_out_urb\n");
+	if (!dev->interrupt_out_urb)
 		goto error;
-	}
 	dev->interrupt_in_interval = interrupt_in_interval ? interrupt_in_interval : dev->interrupt_in_endpoint->bInterval;
 	dev->interrupt_out_interval = interrupt_out_interval ? interrupt_out_interval : dev->interrupt_out_endpoint->bInterval;
 
-	/* we can register the device now, as it is ready */
-	usb_set_intfdata (interface, dev);
-
-	retval = usb_register_dev (interface, &tower_class);
-
-	if (retval) {
-		/* something prevented us from registering this driver */
-		dev_err(idev, "Not able to get a minor for this device.\n");
-		usb_set_intfdata (interface, NULL);
-		goto error;
-	}
-	dev->minor = interface->minor;
-
-	/* let the user know what node this device is now attached to */
-	dev_info(&interface->dev, "LEGO USB Tower #%d now attached to major "
-		 "%d minor %d\n", (dev->minor - LEGO_USB_TOWER_MINOR_BASE),
-		 USB_MAJOR, dev->minor);
-
 	/* get the firmware version and log it */
 	result = usb_control_msg (udev,
 				  usb_rcvctrlpipe(udev, 0),
@@ -936,6 +906,23 @@
 		 get_version_reply.minor,
 		 le16_to_cpu(get_version_reply.build_no));
 
+	/* we can register the device now, as it is ready */
+	usb_set_intfdata (interface, dev);
+
+	retval = usb_register_dev (interface, &tower_class);
+
+	if (retval) {
+		/* something prevented us from registering this driver */
+		dev_err(idev, "Not able to get a minor for this device.\n");
+		usb_set_intfdata (interface, NULL);
+		goto error;
+	}
+	dev->minor = interface->minor;
+
+	/* let the user know what node this device is now attached to */
+	dev_info(&interface->dev, "LEGO USB Tower #%d now attached to major "
+		 "%d minor %d\n", (dev->minor - LEGO_USB_TOWER_MINOR_BASE),
+		 USB_MAJOR, dev->minor);
 
 exit:
 	return retval;
diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c
index 86b4e4b..7717651 100644
--- a/drivers/usb/misc/lvstest.c
+++ b/drivers/usb/misc/lvstest.c
@@ -34,8 +34,6 @@
 	struct usb_hub_descriptor descriptor;
 	/* urb for polling interrupt pipe */
 	struct urb *urb;
-	/* LVS RH work queue */
-	struct workqueue_struct *rh_queue;
 	/* LVH RH work */
 	struct work_struct	rh_work;
 	/* RH port status */
@@ -247,10 +245,8 @@
 	int ret;
 
 	descriptor = kmalloc(sizeof(*descriptor), GFP_KERNEL);
-	if (!descriptor) {
-		dev_err(dev, "failed to allocate descriptor memory\n");
+	if (!descriptor)
 		return -ENOMEM;
-	}
 
 	udev = create_lvs_device(intf);
 	if (!udev) {
@@ -355,7 +351,7 @@
 {
 	struct lvs_rh *lvs = urb->context;
 
-	queue_work(lvs->rh_queue, &lvs->rh_work);
+	schedule_work(&lvs->rh_work);
 }
 
 static int lvs_rh_probe(struct usb_interface *intf,
@@ -397,24 +393,15 @@
 
 	/* submit urb to poll interrupt endpoint */
 	lvs->urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!lvs->urb) {
-		dev_err(&intf->dev, "couldn't allocate lvs urb\n");
+	if (!lvs->urb)
 		return -ENOMEM;
-	}
-
-	lvs->rh_queue = create_singlethread_workqueue("lvs_rh_queue");
-	if (!lvs->rh_queue) {
-		dev_err(&intf->dev, "couldn't create workqueue\n");
-		ret = -ENOMEM;
-		goto free_urb;
-	}
 
 	INIT_WORK(&lvs->rh_work, lvs_rh_work);
 
 	ret = sysfs_create_group(&intf->dev.kobj, &lvs_attr_group);
 	if (ret < 0) {
 		dev_err(&intf->dev, "Failed to create sysfs node %d\n", ret);
-		goto destroy_queue;
+		goto free_urb;
 	}
 
 	pipe = usb_rcvintpipe(hdev, endpoint->bEndpointAddress);
@@ -432,8 +419,6 @@
 
 sysfs_remove:
 	sysfs_remove_group(&intf->dev.kobj, &lvs_attr_group);
-destroy_queue:
-	destroy_workqueue(lvs->rh_queue);
 free_urb:
 	usb_free_urb(lvs->urb);
 	return ret;
@@ -444,7 +429,7 @@
 	struct lvs_rh *lvs = usb_get_intfdata(intf);
 
 	sysfs_remove_group(&intf->dev.kobj, &lvs_attr_group);
-	destroy_workqueue(lvs->rh_queue);
+	flush_work(&lvs->rh_work);
 	usb_free_urb(lvs->urb);
 }
 
diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index 02abfcd..05bd39d 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -3084,7 +3084,6 @@
 	/* Allocate URBs */
 	sisusb->sisurbin = usb_alloc_urb(0, GFP_KERNEL);
 	if (!sisusb->sisurbin) {
-		dev_err(&sisusb->sisusb_dev->dev, "Failed to allocate URBs\n");
 		retval = -ENOMEM;
 		goto error_3;
 	}
@@ -3093,8 +3092,6 @@
 	for (i = 0; i < sisusb->numobufs; i++) {
 		sisusb->sisurbout[i] = usb_alloc_urb(0, GFP_KERNEL);
 		if (!sisusb->sisurbout[i]) {
-			dev_err(&sisusb->sisusb_dev->dev,
-					"Failed to allocate URBs\n");
 			retval = -ENOMEM;
 			goto error_4;
 		}
diff --git a/drivers/usb/misc/trancevibrator.c b/drivers/usb/misc/trancevibrator.c
index 4145314..9795457 100644
--- a/drivers/usb/misc/trancevibrator.c
+++ b/drivers/usb/misc/trancevibrator.c
@@ -95,8 +95,7 @@
 	int retval;
 
 	dev = kzalloc(sizeof(struct trancevibrator), GFP_KERNEL);
-	if (dev == NULL) {
-		dev_err(&interface->dev, "Out of memory\n");
+	if (!dev) {
 		retval = -ENOMEM;
 		goto error;
 	}
diff --git a/drivers/usb/misc/usb4604.c b/drivers/usb/misc/usb4604.c
new file mode 100644
index 0000000..e9f37fb
--- /dev/null
+++ b/drivers/usb/misc/usb4604.c
@@ -0,0 +1,175 @@
+/*
+ * Driver for SMSC USB4604 USB HSIC 4-port 2.0 hub controller driver
+ * Based on usb3503 driver
+ *
+ * Copyright (c) 2012-2013 Dongjin Kim (tobetter@gmail.com)
+ * Copyright (c) 2016 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/i2c.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/gpio/consumer.h>
+
+enum usb4604_mode {
+	USB4604_MODE_UNKNOWN,
+	USB4604_MODE_HUB,
+	USB4604_MODE_STANDBY,
+};
+
+struct usb4604 {
+	enum usb4604_mode	mode;
+	struct device		*dev;
+	struct gpio_desc	*gpio_reset;
+};
+
+static void usb4604_reset(struct usb4604 *hub, int state)
+{
+	gpiod_set_value_cansleep(hub->gpio_reset, state);
+
+	/* Wait for i2c logic to come up */
+	if (state)
+		msleep(250);
+}
+
+static int usb4604_connect(struct usb4604 *hub)
+{
+	struct device *dev = hub->dev;
+	struct i2c_client *client = to_i2c_client(dev);
+	int err;
+	u8 connect_cmd[] = { 0xaa, 0x55, 0x00 };
+
+	usb4604_reset(hub, 1);
+
+	err = i2c_master_send(client, connect_cmd, ARRAY_SIZE(connect_cmd));
+	if (err < 0) {
+		usb4604_reset(hub, 0);
+		return err;
+	}
+
+	hub->mode = USB4604_MODE_HUB;
+	dev_dbg(dev, "switched to HUB mode\n");
+
+	return 0;
+}
+
+static int usb4604_switch_mode(struct usb4604 *hub, enum usb4604_mode mode)
+{
+	struct device *dev = hub->dev;
+	int err = 0;
+
+	switch (mode) {
+	case USB4604_MODE_HUB:
+		err = usb4604_connect(hub);
+		break;
+
+	case USB4604_MODE_STANDBY:
+		usb4604_reset(hub, 0);
+		dev_dbg(dev, "switched to STANDBY mode\n");
+		break;
+
+	default:
+		dev_err(dev, "unknown mode is requested\n");
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
+
+static int usb4604_probe(struct usb4604 *hub)
+{
+	struct device *dev = hub->dev;
+	struct device_node *np = dev->of_node;
+	struct gpio_desc *gpio;
+	u32 mode = USB4604_MODE_HUB;
+
+	gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(gpio))
+		return PTR_ERR(gpio);
+	hub->gpio_reset = gpio;
+
+	if (of_property_read_u32(np, "initial-mode", &hub->mode))
+		hub->mode = mode;
+
+	return usb4604_switch_mode(hub, hub->mode);
+}
+
+static int usb4604_i2c_probe(struct i2c_client *i2c,
+			     const struct i2c_device_id *id)
+{
+	struct usb4604 *hub;
+
+	hub = devm_kzalloc(&i2c->dev, sizeof(*hub), GFP_KERNEL);
+	if (!hub)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, hub);
+	hub->dev = &i2c->dev;
+
+	return usb4604_probe(hub);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int usb4604_i2c_suspend(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct usb4604 *hub = i2c_get_clientdata(client);
+
+	usb4604_switch_mode(hub, USB4604_MODE_STANDBY);
+
+	return 0;
+}
+
+static int usb4604_i2c_resume(struct device *dev)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct usb4604 *hub = i2c_get_clientdata(client);
+
+	usb4604_switch_mode(hub, hub->mode);
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(usb4604_i2c_pm_ops, usb4604_i2c_suspend,
+		usb4604_i2c_resume);
+
+static const struct i2c_device_id usb4604_id[] = {
+	{ "usb4604", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, usb4604_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id usb4604_of_match[] = {
+	{ .compatible = "smsc,usb4604" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, usb4604_of_match);
+#endif
+
+static struct i2c_driver usb4604_i2c_driver = {
+	.driver = {
+		.name = "usb4604",
+		.pm = &usb4604_i2c_pm_ops,
+		.of_match_table = of_match_ptr(usb4604_of_match),
+	},
+	.probe		= usb4604_i2c_probe,
+	.id_table	= usb4604_id,
+};
+module_i2c_driver(usb4604_i2c_driver);
+
+MODULE_DESCRIPTION("USB4604 USB HUB driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c
index 1184390..9f48419 100644
--- a/drivers/usb/misc/usblcd.c
+++ b/drivers/usb/misc/usblcd.c
@@ -321,10 +321,8 @@
 
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (dev == NULL) {
-		dev_err(&interface->dev, "Out of memory\n");
+	if (!dev)
 		goto error;
-	}
 	kref_init(&dev->kref);
 	sema_init(&dev->limit_sem, USB_LCD_CONCURRENT_WRITES);
 	init_usb_anchor(&dev->submitted);
@@ -351,11 +349,8 @@
 			dev->bulk_in_size = buffer_size;
 			dev->bulk_in_endpointAddr = endpoint->bEndpointAddress;
 			dev->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL);
-			if (!dev->bulk_in_buffer) {
-				dev_err(&interface->dev,
-					"Could not allocate bulk_in_buffer\n");
+			if (!dev->bulk_in_buffer)
 				goto error;
-			}
 		}
 
 		if (!dev->bulk_out_endpointAddr &&
diff --git a/drivers/usb/misc/usbsevseg.c b/drivers/usb/misc/usbsevseg.c
index 1fe6b73..a0ba529 100644
--- a/drivers/usb/misc/usbsevseg.c
+++ b/drivers/usb/misc/usbsevseg.c
@@ -128,10 +128,8 @@
 		return;
 
 	buffer = kzalloc(MAXLEN, mf);
-	if (!buffer) {
-		dev_err(&mydev->udev->dev, "out of memory\n");
+	if (!buffer)
 		return;
-	}
 
 	/* The device is right to left, where as you write left to right */
 	for (i = 0; i < mydev->textlength; i++)
@@ -346,10 +344,8 @@
 	int rc = -ENOMEM;
 
 	mydev = kzalloc(sizeof(struct usb_sevsegdev), GFP_KERNEL);
-	if (mydev == NULL) {
-		dev_err(&interface->dev, "Out of memory\n");
+	if (!mydev)
 		goto error_mem;
-	}
 
 	mydev->udev = usb_get_dev(udev);
 	mydev->intf = interface;
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 6b978f0..5c8210d 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -585,7 +585,6 @@
 {
 	struct usb_sg_request	*req = (struct usb_sg_request *) _req;
 
-	req->status = -ETIMEDOUT;
 	usb_sg_cancel(req);
 }
 
@@ -616,8 +615,10 @@
 		mod_timer(&sg_timer, jiffies +
 				msecs_to_jiffies(SIMPLE_IO_TIMEOUT));
 		usb_sg_wait(req);
-		del_timer_sync(&sg_timer);
-		retval = req->status;
+		if (!del_timer_sync(&sg_timer))
+			retval = -ETIMEDOUT;
+		else
+			retval = req->status;
 
 		/* FIXME check resulting data pattern */
 
@@ -2602,7 +2603,7 @@
 	ktime_get_ts64(&start);
 
 	retval = usbtest_do_ioctl(intf, param_32);
-	if (retval)
+	if (retval < 0)
 		goto free_mutex;
 
 	ktime_get_ts64(&end);
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index bbd029c..356d312 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -150,10 +150,8 @@
 	if (!usbdev)
 		return NULL;
 	rq = kzalloc(sizeof(struct uss720_async_request), mem_flags);
-	if (!rq) {
-		dev_err(&usbdev->dev, "submit_async_request out of memory\n");
+	if (!rq)
 		return NULL;
-	}
 	kref_init(&rq->ref_count);
 	INIT_LIST_HEAD(&rq->asynclist);
 	init_completion(&rq->compl);
@@ -162,7 +160,6 @@
 	rq->urb = usb_alloc_urb(0, mem_flags);
 	if (!rq->urb) {
 		kref_put(&rq->ref_count, destroy_async);
-		dev_err(&usbdev->dev, "submit_async_request out of memory\n");
 		return NULL;
 	}
 	rq->dr = kmalloc(sizeof(*rq->dr), mem_flags);
diff --git a/drivers/usb/misc/yurex.c b/drivers/usb/misc/yurex.c
index 343fa6f..54e53ac 100644
--- a/drivers/usb/misc/yurex.c
+++ b/drivers/usb/misc/yurex.c
@@ -200,10 +200,8 @@
 
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev) {
-		dev_err(&interface->dev, "Out of memory\n");
+	if (!dev)
 		goto error;
-	}
 	kref_init(&dev->kref);
 	mutex_init(&dev->io_mutex);
 	spin_lock_init(&dev->lock);
@@ -231,17 +229,13 @@
 
 	/* allocate control URB */
 	dev->cntl_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->cntl_urb) {
-		dev_err(&interface->dev, "Could not allocate control URB\n");
+	if (!dev->cntl_urb)
 		goto error;
-	}
 
 	/* allocate buffer for control req */
 	dev->cntl_req = kmalloc(YUREX_BUF_SIZE, GFP_KERNEL);
-	if (!dev->cntl_req) {
-		dev_err(&interface->dev, "Could not allocate cntl_req\n");
+	if (!dev->cntl_req)
 		goto error;
-	}
 
 	/* allocate buffer for control msg */
 	dev->cntl_buffer = usb_alloc_coherent(dev->udev, YUREX_BUF_SIZE,
@@ -269,10 +263,8 @@
 
 	/* allocate interrupt URB */
 	dev->urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!dev->urb) {
-		dev_err(&interface->dev, "Could not allocate URB\n");
+	if (!dev->urb)
 		goto error;
-	}
 
 	/* allocate buffer for interrupt in */
 	dev->int_buffer = usb_alloc_coherent(dev->udev, YUREX_BUF_SIZE,
diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 886526b..72a2a50 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -82,12 +82,12 @@
 	tristate "DA8xx/OMAP-L1x"
 	depends on ARCH_DAVINCI_DA8XX
 	depends on NOP_USB_XCEIV
-	depends on BROKEN
+	select PHY_DA8XX_USB
 
 config USB_MUSB_TUSB6010
 	tristate "TUSB6010"
 	depends on HAS_IOMEM
-	depends on ARCH_OMAP2PLUS || COMPILE_TEST
+	depends on (ARCH_OMAP2PLUS || COMPILE_TEST) && !BLACKFIN
 	depends on NOP_USB_XCEIV = USB_MUSB_HDRC # both built-in or both modules
 
 config USB_MUSB_OMAP2PLUS
diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index c41fe58..50ca805 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -474,10 +474,8 @@
 	int				ret = -ENOMEM;
 
 	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
-	if (!glue) {
-		dev_err(&pdev->dev, "failed to allocate glue context\n");
+	if (!glue)
 		goto err0;
-	}
 
 	phy_clk = clk_get(&pdev->dev, "fck");
 	if (IS_ERR(phy_clk)) {
@@ -512,8 +510,10 @@
 	pdata->platform_ops		= &am35x_ops;
 
 	glue->phy = usb_phy_generic_register();
-	if (IS_ERR(glue->phy))
+	if (IS_ERR(glue->phy)) {
+		ret = PTR_ERR(glue->phy);
 		goto err7;
+	}
 	platform_set_drvdata(pdev, glue);
 
 	pinfo = am35x_dev_info;
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index b03d3b8..210b7e4 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -30,13 +30,11 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/usb/usb_phy_generic.h>
 
-#include <mach/da8xx.h>
-#include <linux/platform_data/usb-davinci.h>
-
 #include "musb_core.h"
 
 /*
@@ -80,61 +78,15 @@
 
 #define DA8XX_MENTOR_CORE_OFFSET 0x400
 
-#define CFGCHIP2	IO_ADDRESS(DA8XX_SYSCFG0_BASE + DA8XX_CFGCHIP2_REG)
-
 struct da8xx_glue {
 	struct device		*dev;
 	struct platform_device	*musb;
-	struct platform_device	*phy;
+	struct platform_device	*usb_phy;
 	struct clk		*clk;
+	struct phy		*phy;
 };
 
 /*
- * REVISIT (PM): we should be able to keep the PHY in low power mode most
- * of the time (24 MHz oscillator and PLL off, etc.) by setting POWER.D0
- * and, when in host mode, autosuspending idle root ports... PHY_PLLON
- * (overriding SUSPENDM?) then likely needs to stay off.
- */
-
-static inline void phy_on(void)
-{
-	u32 cfgchip2 = __raw_readl(CFGCHIP2);
-
-	/*
-	 * Start the on-chip PHY and its PLL.
-	 */
-	cfgchip2 &= ~(CFGCHIP2_RESET | CFGCHIP2_PHYPWRDN | CFGCHIP2_OTGPWRDN);
-	cfgchip2 |= CFGCHIP2_PHY_PLLON;
-	__raw_writel(cfgchip2, CFGCHIP2);
-
-	pr_info("Waiting for USB PHY clock good...\n");
-	while (!(__raw_readl(CFGCHIP2) & CFGCHIP2_PHYCLKGD))
-		cpu_relax();
-}
-
-static inline void phy_off(void)
-{
-	u32 cfgchip2 = __raw_readl(CFGCHIP2);
-
-	/*
-	 * Ensure that USB 1.1 reference clock is not being sourced from
-	 * USB 2.0 PHY.  Otherwise do not power down the PHY.
-	 */
-	if (!(cfgchip2 & CFGCHIP2_USB1PHYCLKMUX) &&
-	     (cfgchip2 & CFGCHIP2_USB1SUSPENDM)) {
-		pr_warning("USB 1.1 clocked from USB 2.0 PHY -- "
-			   "can't power it down\n");
-		return;
-	}
-
-	/*
-	 * Power down the on-chip PHY.
-	 */
-	cfgchip2 |= CFGCHIP2_PHYPWRDN | CFGCHIP2_OTGPWRDN;
-	__raw_writel(cfgchip2, CFGCHIP2);
-}
-
-/*
  * Because we don't set CTRL.UINT, it's "important" to:
  *	- not read/write INTRUSB/INTRUSBE (except during
  *	  initial setup, as a workaround);
@@ -385,29 +337,29 @@
 
 static int da8xx_musb_set_mode(struct musb *musb, u8 musb_mode)
 {
-	u32 cfgchip2 = __raw_readl(CFGCHIP2);
+	struct da8xx_glue *glue = dev_get_drvdata(musb->controller->parent);
+	enum phy_mode phy_mode;
 
-	cfgchip2 &= ~CFGCHIP2_OTGMODE;
 	switch (musb_mode) {
 	case MUSB_HOST:		/* Force VBUS valid, ID = 0 */
-		cfgchip2 |= CFGCHIP2_FORCE_HOST;
+		phy_mode = PHY_MODE_USB_HOST;
 		break;
 	case MUSB_PERIPHERAL:	/* Force VBUS valid, ID = 1 */
-		cfgchip2 |= CFGCHIP2_FORCE_DEVICE;
+		phy_mode = PHY_MODE_USB_DEVICE;
 		break;
 	case MUSB_OTG:		/* Don't override the VBUS/ID comparators */
-		cfgchip2 |= CFGCHIP2_NO_OVERRIDE;
+		phy_mode = PHY_MODE_USB_OTG;
 		break;
 	default:
-		dev_dbg(musb->controller, "Trying to set unsupported mode %u\n", musb_mode);
+		return -EINVAL;
 	}
 
-	__raw_writel(cfgchip2, CFGCHIP2);
-	return 0;
+	return phy_set_mode(glue->phy, phy_mode);
 }
 
 static int da8xx_musb_init(struct musb *musb)
 {
+	struct da8xx_glue *glue = dev_get_drvdata(musb->controller->parent);
 	void __iomem *reg_base = musb->ctrl_base;
 	u32 rev;
 	int ret = -ENODEV;
@@ -425,32 +377,56 @@
 		goto fail;
 	}
 
+	ret = clk_prepare_enable(glue->clk);
+	if (ret) {
+		dev_err(glue->dev, "failed to enable clock\n");
+		goto fail;
+	}
+
 	setup_timer(&otg_workaround, otg_timer, (unsigned long)musb);
 
 	/* Reset the controller */
 	musb_writel(reg_base, DA8XX_USB_CTRL_REG, DA8XX_SOFT_RESET_MASK);
 
 	/* Start the on-chip PHY and its PLL. */
-	phy_on();
+	ret = phy_init(glue->phy);
+	if (ret) {
+		dev_err(glue->dev, "Failed to init phy.\n");
+		goto err_phy_init;
+	}
+
+	ret = phy_power_on(glue->phy);
+	if (ret) {
+		dev_err(glue->dev, "Failed to power on phy.\n");
+		goto err_phy_power_on;
+	}
 
 	msleep(5);
 
 	/* NOTE: IRQs are in mixed mode, not bypass to pure MUSB */
-	pr_debug("DA8xx OTG revision %08x, PHY %03x, control %02x\n",
-		 rev, __raw_readl(CFGCHIP2),
+	pr_debug("DA8xx OTG revision %08x, control %02x\n", rev,
 		 musb_readb(reg_base, DA8XX_USB_CTRL_REG));
 
 	musb->isr = da8xx_musb_interrupt;
 	return 0;
+
+err_phy_power_on:
+	phy_exit(glue->phy);
+err_phy_init:
+	clk_disable_unprepare(glue->clk);
 fail:
 	return ret;
 }
 
 static int da8xx_musb_exit(struct musb *musb)
 {
+	struct da8xx_glue *glue = dev_get_drvdata(musb->controller->parent);
+
 	del_timer_sync(&otg_workaround);
 
-	phy_off();
+	phy_power_off(glue->phy);
+	phy_exit(glue->phy);
+	clk_disable_unprepare(glue->clk);
 
 	usb_put_phy(musb->xceiv);
 
@@ -486,30 +462,25 @@
 {
 	struct resource musb_resources[2];
 	struct musb_hdrc_platform_data	*pdata = dev_get_platdata(&pdev->dev);
-	struct platform_device		*musb;
 	struct da8xx_glue		*glue;
 	struct platform_device_info	pinfo;
 	struct clk			*clk;
+	int				ret;
 
-	int				ret = -ENOMEM;
+	glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
+	if (!glue)
+		return -ENOMEM;
 
-	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
-	if (!glue) {
-		dev_err(&pdev->dev, "failed to allocate glue context\n");
-		goto err0;
-	}
-
-	clk = clk_get(&pdev->dev, "usb20");
+	clk = devm_clk_get(&pdev->dev, "usb20");
 	if (IS_ERR(clk)) {
 		dev_err(&pdev->dev, "failed to get clock\n");
-		ret = PTR_ERR(clk);
-		goto err3;
+		return PTR_ERR(clk);
 	}
 
-	ret = clk_enable(clk);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to enable clock\n");
-		goto err4;
+	glue->phy = devm_phy_get(&pdev->dev, "usb-phy");
+	if (IS_ERR(glue->phy)) {
+		dev_err(&pdev->dev, "failed to get phy\n");
+		return PTR_ERR(glue->phy);
 	}
 
 	glue->dev			= &pdev->dev;
@@ -517,10 +488,11 @@
 
 	pdata->platform_ops		= &da8xx_ops;
 
-	glue->phy = usb_phy_generic_register();
-	if (IS_ERR(glue->phy)) {
-		ret = PTR_ERR(glue->phy);
-		goto err5;
+	glue->usb_phy = usb_phy_generic_register();
+	ret = PTR_ERR_OR_ZERO(glue->usb_phy);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register usb_phy\n");
+		return ret;
 	}
 	platform_set_drvdata(pdev, glue);
 
@@ -544,28 +516,13 @@
 	pinfo.data = pdata;
 	pinfo.size_data = sizeof(*pdata);
 
-	glue->musb = musb = platform_device_register_full(&pinfo);
-	if (IS_ERR(musb)) {
-		ret = PTR_ERR(musb);
+	glue->musb = platform_device_register_full(&pinfo);
+	ret = PTR_ERR_OR_ZERO(glue->musb);
+	if (ret) {
 		dev_err(&pdev->dev, "failed to register musb device: %d\n", ret);
-		goto err6;
+		usb_phy_generic_unregister(glue->usb_phy);
 	}
 
-	return 0;
-
-err6:
-	usb_phy_generic_unregister(glue->phy);
-
-err5:
-	clk_disable(clk);
-
-err4:
-	clk_put(clk);
-
-err3:
-	kfree(glue);
-
-err0:
 	return ret;
 }
 
@@ -574,10 +531,7 @@
 	struct da8xx_glue		*glue = platform_get_drvdata(pdev);
 
 	platform_device_unregister(glue->musb);
-	usb_phy_generic_unregister(glue->phy);
-	clk_disable(glue->clk);
-	clk_put(glue->clk);
-	kfree(glue);
+	usb_phy_generic_unregister(glue->usb_phy);
 
 	return 0;
 }
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 74fc306..27dadc0 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1448,7 +1448,7 @@
 {
 	u8 reg;
 	char *type;
-	char aInfo[90], aRevision[32], aDate[12];
+	char aInfo[90];
 	void __iomem	*mbase = musb->mregs;
 	int		status = 0;
 	int		i;
@@ -1482,7 +1482,6 @@
 
 	pr_debug("%s: ConfigData=0x%02x (%s)\n", musb_driver_name, reg, aInfo);
 
-	aDate[0] = 0;
 	if (MUSB_CONTROLLER_MHDRC == musb_type) {
 		musb->is_multipoint = 1;
 		type = "M";
@@ -1497,11 +1496,10 @@
 
 	/* log release info */
 	musb->hwvers = musb_read_hwvers(mbase);
-	snprintf(aRevision, 32, "%d.%d%s", MUSB_HWVERS_MAJOR(musb->hwvers),
-		MUSB_HWVERS_MINOR(musb->hwvers),
-		(musb->hwvers & MUSB_HWVERS_RC) ? "RC" : "");
-	pr_debug("%s: %sHDRC RTL version %s %s\n",
-		 musb_driver_name, type, aRevision, aDate);
+	pr_debug("%s: %sHDRC RTL version %d.%d%s\n",
+		 musb_driver_name, type, MUSB_HWVERS_MAJOR(musb->hwvers),
+		 MUSB_HWVERS_MINOR(musb->hwvers),
+		 (musb->hwvers & MUSB_HWVERS_RC) ? "RC" : "");
 
 	/* configure ep0 */
 	musb_configure_ep0(musb);
@@ -1831,11 +1829,80 @@
 	.attrs = musb_attributes,
 };
 
+#define MUSB_QUIRK_B_INVALID_VBUS_91	(MUSB_DEVCTL_BDEVICE | \
+					 (2 << MUSB_DEVCTL_VBUS_SHIFT) | \
+					 MUSB_DEVCTL_SESSION)
+#define MUSB_QUIRK_A_DISCONNECT_19	((3 << MUSB_DEVCTL_VBUS_SHIFT) | \
+					 MUSB_DEVCTL_SESSION)
+
+/*
+ * Check the musb devctl session bit to determine if we want to
+ * allow PM runtime for the device. In general, we want to keep things
+ * active when the session bit is set except after host disconnect.
+ *
+ * Only called from musb_irq_work. If this ever needs to get called
+ * elsewhere, proper locking must be implemented for musb->session.
+ */
+static void musb_pm_runtime_check_session(struct musb *musb)
+{
+	u8 devctl, s;
+	int error;
+
+	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
+
+	/* Handle session status quirks first */
+	s = MUSB_DEVCTL_FSDEV | MUSB_DEVCTL_LSDEV |
+		MUSB_DEVCTL_HR;
+	switch (devctl & ~s) {
+	case MUSB_QUIRK_B_INVALID_VBUS_91:
+		if (!musb->session && !musb->quirk_invalid_vbus) {
+			musb->quirk_invalid_vbus = true;
+			musb_dbg(musb,
+				 "First invalid vbus, assume no session");
+			return;
+		}
+		break;
+	case MUSB_QUIRK_A_DISCONNECT_19:
+		if (!musb->session)
+			break;
+		musb_dbg(musb, "Allow PM on possible host mode disconnect");
+		pm_runtime_mark_last_busy(musb->controller);
+		pm_runtime_put_autosuspend(musb->controller);
+		musb->session = false;
+		return;
+	default:
+		break;
+	}
+
+	/* No need to do anything if session has not changed */
+	s = devctl & MUSB_DEVCTL_SESSION;
+	if (s == musb->session)
+		return;
+
+	/* Block PM or allow PM? */
+	if (s) {
+		musb_dbg(musb, "Block PM on active session: %02x", devctl);
+		error = pm_runtime_get_sync(musb->controller);
+		if (error < 0)
+			dev_err(musb->controller, "Could not enable: %i\n",
+				error);
+	} else {
+		musb_dbg(musb, "Allow PM with no session: %02x", devctl);
+		musb->quirk_invalid_vbus = false;
+		pm_runtime_mark_last_busy(musb->controller);
+		pm_runtime_put_autosuspend(musb->controller);
+	}
+
+	musb->session = s;
+}
+
 /* Only used to provide driver mode change events */
 static void musb_irq_work(struct work_struct *data)
 {
 	struct musb *musb = container_of(data, struct musb, irq_work);
 
+	musb_pm_runtime_check_session(musb);
+
 	if (musb->xceiv->otg->state != musb->xceiv_old_state) {
 		musb->xceiv_old_state = musb->xceiv->otg->state;
 		sysfs_notify(&musb->controller->kobj, NULL, "mode");
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index b55a776..2cb88a49 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -378,6 +378,8 @@
 	u8			min_power;	/* vbus for periph, in mA/2 */
 
 	int			port_mode;	/* MUSB_PORT_MODE_* */
+	bool			session;
+	bool			quirk_invalid_vbus;
 	bool			is_host;
 
 	int			a_wait_bcon;	/* VBUS timeout in msecs */
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 2537179..0f17d21 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -145,43 +145,6 @@
 	{ "mode",		0xe8 },
 };
 
-static void dsps_musb_try_idle(struct musb *musb, unsigned long timeout)
-{
-	struct device *dev = musb->controller;
-	struct dsps_glue *glue = dev_get_drvdata(dev->parent);
-
-	if (timeout == 0)
-		timeout = jiffies + msecs_to_jiffies(3);
-
-	/* Never idle if active, or when VBUS timeout is not set as host */
-	if (musb->is_active || (musb->a_wait_bcon == 0 &&
-			musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)) {
-		dev_dbg(musb->controller, "%s active, deleting timer\n",
-				usb_otg_state_string(musb->xceiv->otg->state));
-		del_timer(&glue->timer);
-		glue->last_timer = jiffies;
-		return;
-	}
-	if (musb->port_mode != MUSB_PORT_MODE_DUAL_ROLE)
-		return;
-
-	if (!musb->g.dev.driver)
-		return;
-
-	if (time_after(glue->last_timer, timeout) &&
-				timer_pending(&glue->timer)) {
-		dev_dbg(musb->controller,
-			"Longer idle timer already pending, ignoring...\n");
-		return;
-	}
-	glue->last_timer = timeout;
-
-	dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n",
-		usb_otg_state_string(musb->xceiv->otg->state),
-			jiffies_to_msecs(timeout - jiffies));
-	mod_timer(&glue->timer, timeout);
-}
-
 /**
  * dsps_musb_enable - enable interrupts
  */
@@ -206,7 +169,6 @@
 			musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
 		mod_timer(&glue->timer, jiffies +
 				msecs_to_jiffies(wrp->poll_timeout));
-	dsps_musb_try_idle(musb, 0);
 }
 
 /**
@@ -236,6 +198,11 @@
 	u8 devctl;
 	unsigned long flags;
 	int skip_session = 0;
+	int err;
+
+	err = pm_runtime_get_sync(dev);
+	if (err < 0)
+		dev_err(dev, "Poll could not pm_runtime_get: %i\n", err);
 
 	/*
 	 * We poll because DSPS IP's won't expose several OTG-critical
@@ -247,6 +214,10 @@
 
 	spin_lock_irqsave(&musb->lock, flags);
 	switch (musb->xceiv->otg->state) {
+	case OTG_STATE_A_WAIT_VRISE:
+		mod_timer(&glue->timer, jiffies +
+				msecs_to_jiffies(wrp->poll_timeout));
+		break;
 	case OTG_STATE_A_WAIT_BCON:
 		musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
 		skip_session = 1;
@@ -275,6 +246,9 @@
 		break;
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
+
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_put_autosuspend(dev);
 }
 
 static irqreturn_t dsps_interrupt(int irq, void *hci)
@@ -338,7 +312,8 @@
 			MUSB_HST_MODE(musb);
 			musb->xceiv->otg->default_a = 1;
 			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
-			del_timer(&glue->timer);
+			mod_timer(&glue->timer, jiffies +
+				  msecs_to_jiffies(wrp->poll_timeout));
 		} else {
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
@@ -358,11 +333,17 @@
 	if (musb->int_tx || musb->int_rx || musb->int_usb)
 		ret |= musb_interrupt(musb);
 
-	/* Poll for ID change in OTG port mode */
-	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE &&
-			musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
+	/* Poll for ID change and connect */
+	switch (musb->xceiv->otg->state) {
+	case OTG_STATE_B_IDLE:
+	case OTG_STATE_A_WAIT_BCON:
 		mod_timer(&glue->timer, jiffies +
 				msecs_to_jiffies(wrp->poll_timeout));
+		break;
+	default:
+		break;
+	}
+
 out:
 	spin_unlock_irqrestore(&musb->lock, flags);
 
@@ -461,6 +442,9 @@
 		musb_writeb(musb->mregs, MUSB_BABBLE_CTL, val);
 	}
 
+	mod_timer(&glue->timer, jiffies +
+		  msecs_to_jiffies(glue->wrp->poll_timeout));
+
 	return dsps_musb_dbg_init(musb, glue);
 }
 
@@ -620,7 +604,6 @@
 	.enable		= dsps_musb_enable,
 	.disable	= dsps_musb_disable,
 
-	.try_idle	= dsps_musb_try_idle,
 	.set_mode	= dsps_musb_set_mode,
 	.recover	= dsps_musb_recover,
 };
@@ -784,6 +767,8 @@
 
 	platform_set_drvdata(pdev, glue);
 	pm_runtime_enable(&pdev->dev);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, 200);
 
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
@@ -795,11 +780,15 @@
 	if (ret)
 		goto err3;
 
+	pm_runtime_mark_last_busy(&pdev->dev);
+	pm_runtime_put_autosuspend(&pdev->dev);
+
 	return 0;
 
 err3:
-	pm_runtime_put(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
 err2:
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	return ret;
 }
@@ -811,7 +800,8 @@
 	platform_device_unregister(glue->musb);
 
 	/* disable usbss clocks */
-	pm_runtime_put(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
 	return 0;
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 6d1e975..bff4869 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1964,6 +1964,9 @@
 	 * that currently misbehaves.
 	 */
 
+	/* Force check of devctl register for PM runtime */
+	schedule_work(&musb->irq_work);
+
 	pm_runtime_mark_last_busy(musb->controller);
 	pm_runtime_put_autosuspend(musb->controller);
 
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index 192248f..61b5f1c 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -245,6 +245,7 @@
 			usb_otg_state_string(musb->xceiv->otg->state));
 	}
 }
+EXPORT_SYMBOL_GPL(musb_root_disconnect);
 
 
 /*---------------------------------------------------------------------*/
@@ -290,6 +291,7 @@
 	u32		temp;
 	int		retval = 0;
 	unsigned long	flags;
+	bool		start_musb = false;
 
 	spin_lock_irqsave(&musb->lock, flags);
 
@@ -390,7 +392,7 @@
 			 * logic relating to VBUS power-up.
 			 */
 			if (!hcd->self.is_b_host && musb_has_gadget(musb))
-				musb_start(musb);
+				start_musb = true;
 			break;
 		case USB_PORT_FEAT_RESET:
 			musb_port_reset(musb, true);
@@ -451,5 +453,9 @@
 		retval = -EPIPE;
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
+
+	if (start_musb)
+		musb_start(musb);
+
 	return retval;
 }
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 0b4cec9..1ab6973 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -49,9 +49,6 @@
 	enum musb_vbus_id_status status;
 	struct work_struct	omap_musb_mailbox_work;
 	struct device		*control_otghs;
-	bool			cable_connected;
-	bool			enabled;
-	bool			powered;
 };
 #define glue_to_musb(g)		platform_get_drvdata(g->musb)
 
@@ -141,45 +138,6 @@
 	musb_writel(musb->mregs, OTG_FORCESTDBY, l);
 }
 
-/*
- * We can get multiple cable events so we need to keep track
- * of the power state. Only keep power enabled if USB cable is
- * connected and a gadget is started.
- */
-static void omap2430_set_power(struct musb *musb, bool enabled, bool cable)
-{
-	struct device *dev = musb->controller;
-	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
-	bool power_up;
-	int res;
-
-	if (glue->enabled != enabled)
-		glue->enabled = enabled;
-
-	if (glue->cable_connected != cable)
-		glue->cable_connected = cable;
-
-	power_up = glue->enabled && glue->cable_connected;
-	if (power_up == glue->powered) {
-		dev_warn(musb->controller, "power state already %i\n",
-			 power_up);
-		return;
-	}
-
-	glue->powered = power_up;
-
-	if (power_up) {
-		res = pm_runtime_get_sync(musb->controller);
-		if (res < 0) {
-			dev_err(musb->controller, "could not enable: %i", res);
-			glue->powered = false;
-		}
-	} else {
-		pm_runtime_mark_last_busy(musb->controller);
-		pm_runtime_put_autosuspend(musb->controller);
-	}
-}
-
 static int omap2430_musb_mailbox(enum musb_vbus_id_status status)
 {
 	struct omap2430_glue	*glue = _glue;
@@ -203,21 +161,15 @@
 static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 {
 	struct musb *musb = glue_to_musb(glue);
-	struct device *dev = musb->controller;
-	struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev);
+	struct musb_hdrc_platform_data *pdata =
+		dev_get_platdata(musb->controller);
 	struct omap_musb_board_data *data = pdata->board_data;
 	struct usb_otg *otg = musb->xceiv->otg;
-	bool cable_connected;
 
-	cable_connected = ((glue->status == MUSB_ID_GROUND) ||
-			   (glue->status == MUSB_VBUS_VALID));
-
-	if (cable_connected)
-		omap2430_set_power(musb, glue->enabled, cable_connected);
-
+	pm_runtime_get_sync(musb->controller);
 	switch (glue->status) {
 	case MUSB_ID_GROUND:
-		dev_dbg(dev, "ID GND\n");
+		dev_dbg(musb->controller, "ID GND\n");
 
 		otg->default_a = true;
 		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
@@ -230,7 +182,7 @@
 		break;
 
 	case MUSB_VBUS_VALID:
-		dev_dbg(dev, "VBUS Connect\n");
+		dev_dbg(musb->controller, "VBUS Connect\n");
 
 		otg->default_a = false;
 		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
@@ -240,7 +192,7 @@
 
 	case MUSB_ID_FLOAT:
 	case MUSB_VBUS_OFF:
-		dev_dbg(dev, "VBUS Disconnect\n");
+		dev_dbg(musb->controller, "VBUS Disconnect\n");
 
 		musb->xceiv->last_event = USB_EVENT_NONE;
 		if (musb->gadget_driver)
@@ -253,12 +205,10 @@
 			USB_MODE_DISCONNECT);
 		break;
 	default:
-		dev_dbg(dev, "ID float\n");
+		dev_dbg(musb->controller, "ID float\n");
 	}
-
-	if (!cable_connected)
-		omap2430_set_power(musb, glue->enabled, cable_connected);
-
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 	atomic_notifier_call_chain(&musb->xceiv->notifier,
 			musb->xceiv->last_event, NULL);
 }
@@ -376,8 +326,6 @@
 	if (!WARN_ON(!musb->phy))
 		phy_power_on(musb->phy);
 
-	omap2430_set_power(musb, true, glue->cable_connected);
-
 	switch (glue->status) {
 
 	case MUSB_ID_GROUND:
@@ -419,8 +367,6 @@
 	if (glue->status != MUSB_UNKNOWN)
 		omap_control_usb_set_mode(glue->control_otghs,
 			USB_MODE_DISCONNECT);
-
-	omap2430_set_power(musb, false, glue->cable_connected);
 }
 
 static int omap2430_musb_exit(struct musb *musb)
@@ -571,7 +517,7 @@
 
 	pm_runtime_enable(glue->dev);
 	pm_runtime_use_autosuspend(glue->dev);
-	pm_runtime_set_autosuspend_delay(glue->dev, 500);
+	pm_runtime_set_autosuspend_delay(glue->dev, 100);
 
 	ret = platform_device_add(musb);
 	if (ret) {
@@ -591,11 +537,9 @@
 static int omap2430_remove(struct platform_device *pdev)
 {
 	struct omap2430_glue *glue = platform_get_drvdata(pdev);
-	struct musb *musb = glue_to_musb(glue);
 
 	pm_runtime_get_sync(glue->dev);
 	platform_device_unregister(glue->musb);
-	omap2430_set_power(musb, false, false);
 	pm_runtime_put_sync(glue->dev);
 	pm_runtime_dont_use_autosuspend(glue->dev);
 	pm_runtime_disable(glue->dev);
diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c
index c6ee166..1408245 100644
--- a/drivers/usb/musb/sunxi.c
+++ b/drivers/usb/musb/sunxi.c
@@ -74,6 +74,7 @@
 #define SUNXI_MUSB_FL_HAS_SRAM			5
 #define SUNXI_MUSB_FL_HAS_RESET			6
 #define SUNXI_MUSB_FL_NO_CONFIGDATA		7
+#define SUNXI_MUSB_FL_PHY_MODE_PEND		8
 
 /* Our read/write methods need access and do not get passed in a musb ref :| */
 static struct musb *sunxi_musb;
@@ -87,6 +88,7 @@
 	struct phy		*phy;
 	struct platform_device	*usb_phy;
 	struct usb_phy		*xceiv;
+	enum phy_mode		phy_mode;
 	unsigned long		flags;
 	struct work_struct	work;
 	struct extcon_dev	*extcon;
@@ -140,6 +142,9 @@
 			clear_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags);
 		}
 	}
+
+	if (test_and_clear_bit(SUNXI_MUSB_FL_PHY_MODE_PEND, &glue->flags))
+		phy_set_mode(glue->phy, glue->phy_mode);
 }
 
 static void sunxi_musb_set_vbus(struct musb *musb, int is_on)
@@ -341,6 +346,50 @@
 {
 }
 
+static int sunxi_musb_set_mode(struct musb *musb, u8 mode)
+{
+	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
+	enum phy_mode new_mode;
+
+	switch (mode) {
+	case MUSB_HOST:
+		new_mode = PHY_MODE_USB_HOST;
+		break;
+	case MUSB_PERIPHERAL:
+		new_mode = PHY_MODE_USB_DEVICE;
+		break;
+	case MUSB_OTG:
+		new_mode = PHY_MODE_USB_OTG;
+		break;
+	default:
+		dev_err(musb->controller->parent,
+			"Error requested mode not supported by this kernel\n");
+		return -EINVAL;
+	}
+
+	if (glue->phy_mode == new_mode)
+		return 0;
+
+	if (musb->port_mode != MUSB_PORT_MODE_DUAL_ROLE) {
+		dev_err(musb->controller->parent,
+			"Error changing modes is only supported in dual role mode\n");
+		return -EINVAL;
+	}
+
+	if (musb->port1_status & USB_PORT_STAT_ENABLE)
+		musb_root_disconnect(musb);
+
+	/*
+	 * phy_set_mode may sleep, and we're called with a spinlock held,
+	 * so let sunxi_musb_work deal with it.
+	 */
+	glue->phy_mode = new_mode;
+	set_bit(SUNXI_MUSB_FL_PHY_MODE_PEND, &glue->flags);
+	schedule_work(&glue->work);
+
+	return 0;
+}
+
 /*
  * sunxi musb register layout
  * 0x00 - 0x17	fifo regs, 1 long per fifo
@@ -568,6 +617,7 @@
 	.writew		= sunxi_musb_writew,
 	.dma_init	= sunxi_musb_dma_controller_create,
 	.dma_exit	= sunxi_musb_dma_controller_destroy,
+	.set_mode	= sunxi_musb_set_mode,
 	.set_vbus	= sunxi_musb_set_vbus,
 	.pre_root_reset_end = sunxi_musb_pre_root_reset_end,
 	.post_root_reset_end = sunxi_musb_post_root_reset_end,
@@ -614,21 +664,28 @@
 		return -EINVAL;
 	}
 
+	glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
+	if (!glue)
+		return -ENOMEM;
+
 	memset(&pdata, 0, sizeof(pdata));
 	switch (usb_get_dr_mode(&pdev->dev)) {
 #if defined CONFIG_USB_MUSB_DUAL_ROLE || defined CONFIG_USB_MUSB_HOST
 	case USB_DR_MODE_HOST:
 		pdata.mode = MUSB_PORT_MODE_HOST;
+		glue->phy_mode = PHY_MODE_USB_HOST;
 		break;
 #endif
 #if defined CONFIG_USB_MUSB_DUAL_ROLE || defined CONFIG_USB_MUSB_GADGET
 	case USB_DR_MODE_PERIPHERAL:
 		pdata.mode = MUSB_PORT_MODE_GADGET;
+		glue->phy_mode = PHY_MODE_USB_DEVICE;
 		break;
 #endif
 #ifdef CONFIG_USB_MUSB_DUAL_ROLE
 	case USB_DR_MODE_OTG:
 		pdata.mode = MUSB_PORT_MODE_DUAL_ROLE;
+		glue->phy_mode = PHY_MODE_USB_OTG;
 		break;
 #endif
 	default:
@@ -638,10 +695,6 @@
 	pdata.platform_ops	= &sunxi_musb_ops;
 	pdata.config		= &sunxi_musb_hdrc_config;
 
-	glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
-	if (!glue)
-		return -ENOMEM;
-
 	glue->dev = &pdev->dev;
 	INIT_WORK(&glue->work, sunxi_musb_work);
 	glue->host_nb.notifier_call = sunxi_musb_host_notifier;
diff --git a/drivers/usb/phy/phy-ab8500-usb.c b/drivers/usb/phy/phy-ab8500-usb.c
index 0c912d3..a03caf4 100644
--- a/drivers/usb/phy/phy-ab8500-usb.c
+++ b/drivers/usb/phy/phy-ab8500-usb.c
@@ -1248,7 +1248,7 @@
 	err = abx500_set_register_interruptible(ab->dev,
 			AB8500_DEBUG, AB8500_USB_PHY_TUNE3, 0x78);
 	if (err < 0)
-		dev_err(ab->dev, "Failed to set PHY_TUNE3 regester err=%d\n",
+		dev_err(ab->dev, "Failed to set PHY_TUNE3 register err=%d\n",
 				err);
 
 	/* Switch to normal mode/disable Bank 0x12 access */
@@ -1290,7 +1290,7 @@
 			0xFC, 0x80);
 
 	if (err < 0)
-		dev_err(ab->dev, "Failed to set PHY_TUNE3 regester err=%d\n",
+		dev_err(ab->dev, "Failed to set PHY_TUNE3 register err=%d\n",
 				err);
 
 	/* Switch to normal mode/disable Bank 0x12 access */
@@ -1321,7 +1321,7 @@
 	err = abx500_set_register_interruptible(ab->dev,
 			AB8540_DEBUG, AB8500_USB_PHY_TUNE3, 0x90);
 	if (err < 0)
-		dev_err(ab->dev, "Failed to set PHY_TUNE3 regester ret=%d\n",
+		dev_err(ab->dev, "Failed to set PHY_TUNE3 register ret=%d\n",
 				err);
 }
 
@@ -1351,7 +1351,7 @@
 	err = abx500_set_register_interruptible(ab->dev,
 			AB8500_DEBUG, AB8500_USB_PHY_TUNE3, 0x80);
 	if (err < 0)
-		dev_err(ab->dev, "Failed to set PHY_TUNE3 regester err=%d\n",
+		dev_err(ab->dev, "Failed to set PHY_TUNE3 register err=%d\n",
 				err);
 
 	/* Switch to normal mode/disable Bank 0x12 access */
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 980c9de..8311ba2 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -118,8 +118,6 @@
 		status = USB_EVENT_VBUS;
 		otg->state = OTG_STATE_B_PERIPHERAL;
 		nop->phy.last_event = status;
-		if (otg->gadget)
-			usb_gadget_vbus_connect(otg->gadget);
 
 		/* drawing a "unit load" is *always* OK, except for OTG */
 		nop_set_vbus_draw(nop, 100);
@@ -129,8 +127,6 @@
 	} else {
 		nop_set_vbus_draw(nop, 0);
 
-		if (otg->gadget)
-			usb_gadget_vbus_disconnect(otg->gadget);
 		status = USB_EVENT_NONE;
 		otg->state = OTG_STATE_B_IDLE;
 		nop->phy.last_event = status;
@@ -144,14 +140,18 @@
 int usb_gen_phy_init(struct usb_phy *phy)
 {
 	struct usb_phy_generic *nop = dev_get_drvdata(phy->dev);
+	int ret;
 
 	if (!IS_ERR(nop->vcc)) {
 		if (regulator_enable(nop->vcc))
 			dev_err(phy->dev, "Failed to enable power\n");
 	}
 
-	if (!IS_ERR(nop->clk))
-		clk_prepare_enable(nop->clk);
+	if (!IS_ERR(nop->clk)) {
+		ret = clk_prepare_enable(nop->clk);
+		if (ret)
+			return ret;
+	}
 
 	nop_reset(nop);
 
@@ -187,7 +187,8 @@
 
 	otg->gadget = gadget;
 	if (otg->state == OTG_STATE_B_PERIPHERAL)
-		usb_gadget_vbus_connect(gadget);
+		atomic_notifier_call_chain(&otg->usb_phy->notifier,
+					   USB_EVENT_VBUS, otg->gadget);
 	else
 		otg->state = OTG_STATE_B_IDLE;
 	return 0;
@@ -322,6 +323,8 @@
 				gpiod_to_irq(nop->gpiod_vbus), err);
 			return err;
 		}
+		nop->phy.otg->state = gpiod_get_value(nop->gpiod_vbus) ?
+			OTG_STATE_B_PERIPHERAL : OTG_STATE_B_IDLE;
 	}
 
 	nop->phy.init		= usb_gen_phy_init;
diff --git a/drivers/usb/phy/phy-mxs-usb.c b/drivers/usb/phy/phy-mxs-usb.c
index 00bfea0..0e2f1a3 100644
--- a/drivers/usb/phy/phy-mxs-usb.c
+++ b/drivers/usb/phy/phy-mxs-usb.c
@@ -27,6 +27,7 @@
 #define DRIVER_NAME "mxs_phy"
 
 #define HW_USBPHY_PWD				0x00
+#define HW_USBPHY_TX				0x10
 #define HW_USBPHY_CTRL				0x30
 #define HW_USBPHY_CTRL_SET			0x34
 #define HW_USBPHY_CTRL_CLR			0x38
@@ -38,6 +39,10 @@
 #define HW_USBPHY_IP_SET			0x94
 #define HW_USBPHY_IP_CLR			0x98
 
+#define GM_USBPHY_TX_TXCAL45DP(x)            (((x) & 0xf) << 16)
+#define GM_USBPHY_TX_TXCAL45DN(x)            (((x) & 0xf) << 8)
+#define GM_USBPHY_TX_D_CAL(x)                (((x) & 0xf) << 0)
+
 #define BM_USBPHY_CTRL_SFTRST			BIT(31)
 #define BM_USBPHY_CTRL_CLKGATE			BIT(30)
 #define BM_USBPHY_CTRL_OTG_ID_VALUE		BIT(27)
@@ -115,6 +120,12 @@
  */
 #define MXS_PHY_NEED_IP_FIX			BIT(3)
 
+/* Minimum and maximum values for device tree entries */
+#define MXS_PHY_TX_CAL45_MIN			30
+#define MXS_PHY_TX_CAL45_MAX			55
+#define MXS_PHY_TX_D_CAL_MIN			79
+#define MXS_PHY_TX_D_CAL_MAX			119
+
 struct mxs_phy_data {
 	unsigned int flags;
 };
@@ -164,6 +175,8 @@
 	const struct mxs_phy_data *data;
 	struct regmap *regmap_anatop;
 	int port_id;
+	u32 tx_reg_set;
+	u32 tx_reg_mask;
 };
 
 static inline bool is_imx6q_phy(struct mxs_phy *mxs_phy)
@@ -185,6 +198,20 @@
 	usleep_range(300, 400);
 }
 
+static void mxs_phy_tx_init(struct mxs_phy *mxs_phy)
+{
+	void __iomem *base = mxs_phy->phy.io_priv;
+	u32 phytx;
+
+	/* Update TX register if there is anything to write */
+	if (mxs_phy->tx_reg_mask) {
+		phytx = readl(base + HW_USBPHY_TX);
+		phytx &= ~mxs_phy->tx_reg_mask;
+		phytx |= mxs_phy->tx_reg_set;
+		writel(phytx, base + HW_USBPHY_TX);
+	}
+}
+
 static int mxs_phy_hw_init(struct mxs_phy *mxs_phy)
 {
 	int ret;
@@ -214,6 +241,8 @@
 	if (mxs_phy->data->flags & MXS_PHY_NEED_IP_FIX)
 		writel(BM_USBPHY_IP_FIX, base + HW_USBPHY_IP_SET);
 
+	mxs_phy_tx_init(mxs_phy);
+
 	return 0;
 }
 
@@ -459,6 +488,7 @@
 	int ret;
 	const struct of_device_id *of_id;
 	struct device_node *np = pdev->dev.of_node;
+	u32 val;
 
 	of_id = of_match_device(mxs_phy_dt_ids, &pdev->dev);
 	if (!of_id)
@@ -491,6 +521,37 @@
 		}
 	}
 
+	/* Precompute which bits of the TX register are to be updated, if any */
+	if (!of_property_read_u32(np, "fsl,tx-cal-45-dn-ohms", &val) &&
+	    val >= MXS_PHY_TX_CAL45_MIN && val <= MXS_PHY_TX_CAL45_MAX) {
+		/* Scale to a 4-bit value */
+		val = (MXS_PHY_TX_CAL45_MAX - val) * 0xF
+			/ (MXS_PHY_TX_CAL45_MAX - MXS_PHY_TX_CAL45_MIN);
+		mxs_phy->tx_reg_mask |= GM_USBPHY_TX_TXCAL45DN(~0);
+		mxs_phy->tx_reg_set  |= GM_USBPHY_TX_TXCAL45DN(val);
+	}
+
+	if (!of_property_read_u32(np, "fsl,tx-cal-45-dp-ohms", &val) &&
+	    val >= MXS_PHY_TX_CAL45_MIN && val <= MXS_PHY_TX_CAL45_MAX) {
+		/* Scale to a 4-bit value. */
+		val = (MXS_PHY_TX_CAL45_MAX - val) * 0xF
+			/ (MXS_PHY_TX_CAL45_MAX - MXS_PHY_TX_CAL45_MIN);
+		mxs_phy->tx_reg_mask |= GM_USBPHY_TX_TXCAL45DP(~0);
+		mxs_phy->tx_reg_set  |= GM_USBPHY_TX_TXCAL45DP(val);
+	}
+
+	if (!of_property_read_u32(np, "fsl,tx-d-cal", &val) &&
+	    val >= MXS_PHY_TX_D_CAL_MIN && val <= MXS_PHY_TX_D_CAL_MAX) {
+		/* Scale to a 4-bit value.  Round up the values and heavily
+		 * weight the rounding by adding 2/3 of the denominator.
+		 */
+		val = ((MXS_PHY_TX_D_CAL_MAX - val) * 0xF
+			+ (MXS_PHY_TX_D_CAL_MAX - MXS_PHY_TX_D_CAL_MIN) * 2/3)
+			/ (MXS_PHY_TX_D_CAL_MAX - MXS_PHY_TX_D_CAL_MIN);
+		mxs_phy->tx_reg_mask |= GM_USBPHY_TX_D_CAL(~0);
+		mxs_phy->tx_reg_set  |= GM_USBPHY_TX_D_CAL(val);
+	}
+
 	ret = of_alias_get_id(np, "usbphy");
 	if (ret < 0)
 		dev_dbg(&pdev->dev, "failed to get alias id, errno %d\n", ret);
diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c
index 6f6d2a7..6523af4 100644
--- a/drivers/usb/phy/phy-omap-otg.c
+++ b/drivers/usb/phy/phy-omap-otg.c
@@ -140,6 +140,8 @@
 		 (rev >> 4) & 0xf, rev & 0xf, config->extcon, otg_dev->id,
 		 otg_dev->vbus);
 
+	platform_set_drvdata(pdev, otg_dev);
+
 	return 0;
 }
 
diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c
index 8fbbc2d..012a37a 100644
--- a/drivers/usb/renesas_usbhs/common.c
+++ b/drivers/usb/renesas_usbhs/common.c
@@ -482,6 +482,10 @@
 		.data = (void *)USBHS_TYPE_RCAR_GEN3,
 	},
 	{
+		.compatible = "renesas,usbhs-r8a7796",
+		.data = (void *)USBHS_TYPE_RCAR_GEN3,
+	},
+	{
 		.compatible = "renesas,rcar-gen2-usbhs",
 		.data = (void *)USBHS_TYPE_RCAR_GEN2,
 	},
@@ -514,7 +518,8 @@
 	if (gpio > 0)
 		dparam->enable_gpio = gpio;
 
-	if (dparam->type == USBHS_TYPE_RCAR_GEN2)
+	if (dparam->type == USBHS_TYPE_RCAR_GEN2 ||
+	    dparam->type == USBHS_TYPE_RCAR_GEN3)
 		dparam->has_usb_dmac = 1;
 
 	return info;
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index 280ed5f..857e783 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -871,7 +871,7 @@
 
 	/* use PIO if packet is less than pio_dma_border or pipe is DCP */
 	if ((len < usbhs_get_dparam(priv, pio_dma_border)) ||
-	    usbhs_pipe_is_dcp(pipe))
+	    usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
 		goto usbhsf_pio_prepare_push;
 
 	/* check data length if this driver don't use USB-DMAC */
@@ -976,7 +976,7 @@
 
 	/* use PIO if packet is less than pio_dma_border or pipe is DCP */
 	if ((pkt->length < usbhs_get_dparam(priv, pio_dma_border)) ||
-	    usbhs_pipe_is_dcp(pipe))
+	    usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC))
 		goto usbhsf_pio_prepare_pop;
 
 	fifo = usbhsf_get_dma_fifo(priv, pkt);
diff --git a/drivers/usb/renesas_usbhs/mod.c b/drivers/usb/renesas_usbhs/mod.c
index d4be5d5..28965ef 100644
--- a/drivers/usb/renesas_usbhs/mod.c
+++ b/drivers/usb/renesas_usbhs/mod.c
@@ -282,9 +282,16 @@
 	if (usbhs_mod_is_host(priv))
 		usbhs_write(priv, INTSTS1, ~irq_state.intsts1 & INTSTS1_MAGIC);
 
-	usbhs_write(priv, BRDYSTS, ~irq_state.brdysts);
+	/*
+	 * The driver should not clear the xxxSTS after the line of
+	 * "call irq callback functions" because each "if" statement is
+	 * possible to call the callback function for avoiding any side effects.
+	 */
+	if (irq_state.intsts0 & BRDY)
+		usbhs_write(priv, BRDYSTS, ~irq_state.brdysts);
 	usbhs_write(priv, NRDYSTS, ~irq_state.nrdysts);
-	usbhs_write(priv, BEMPSTS, ~irq_state.bempsts);
+	if (irq_state.intsts0 & BEMP)
+		usbhs_write(priv, BEMPSTS, ~irq_state.bempsts);
 
 	/*
 	 * call irq callback functions
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 50f3363..5bc7a61 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -335,7 +335,6 @@
 	buf = kmalloc(sizeof(*buf), GFP_ATOMIC);
 	if (!buf) {
 		usb_ep_free_request(&dcp->ep, req);
-		dev_err(dev, "recip data allocation fail\n");
 		return;
 	}
 
@@ -617,10 +616,13 @@
 		 * use dmaengine if possible.
 		 * It will use pio handler if impossible.
 		 */
-		if (usb_endpoint_dir_in(desc))
+		if (usb_endpoint_dir_in(desc)) {
 			pipe->handler = &usbhs_fifo_dma_push_handler;
-		else
+		} else {
 			pipe->handler = &usbhs_fifo_dma_pop_handler;
+			usbhs_xxxsts_clear(priv, BRDYSTS,
+					   usbhs_pipe_number(pipe));
+		}
 
 		ret = 0;
 	}
@@ -1059,21 +1061,18 @@
 	int ret;
 
 	gpriv = kzalloc(sizeof(struct usbhsg_gpriv), GFP_KERNEL);
-	if (!gpriv) {
-		dev_err(dev, "Could not allocate gadget priv\n");
+	if (!gpriv)
 		return -ENOMEM;
-	}
 
 	uep = kzalloc(sizeof(struct usbhsg_uep) * pipe_size, GFP_KERNEL);
 	if (!uep) {
-		dev_err(dev, "Could not allocate ep\n");
 		ret = -ENOMEM;
 		goto usbhs_mod_gadget_probe_err_gpriv;
 	}
 
 	gpriv->transceiver = usb_get_phy(USB_PHY_TYPE_UNDEFINED);
 	dev_info(dev, "%stransceiver found\n",
-		 gpriv->transceiver ? "" : "no ");
+		 !IS_ERR(gpriv->transceiver) ? "" : "no ");
 
 	/*
 	 * CAUTION
@@ -1103,6 +1102,8 @@
 	gpriv->gadget.name		= "renesas_usbhs_udc";
 	gpriv->gadget.ops		= &usbhsg_gadget_ops;
 	gpriv->gadget.max_speed		= USB_SPEED_HIGH;
+	gpriv->gadget.quirk_avoids_skb_reserve = usbhs_get_dparam(priv,
+								has_usb_dmac);
 
 	INIT_LIST_HEAD(&gpriv->gadget.ep_list);
 
diff --git a/drivers/usb/renesas_usbhs/mod_host.c b/drivers/usb/renesas_usbhs/mod_host.c
index 3bf0b72..165e81b 100644
--- a/drivers/usb/renesas_usbhs/mod_host.c
+++ b/drivers/usb/renesas_usbhs/mod_host.c
@@ -166,14 +166,10 @@
 					       gfp_t mem_flags)
 {
 	struct usbhsh_request *ureq;
-	struct usbhs_priv *priv = usbhsh_hpriv_to_priv(hpriv);
-	struct device *dev = usbhs_priv_to_dev(priv);
 
 	ureq = kzalloc(sizeof(struct usbhsh_request), mem_flags);
-	if (!ureq) {
-		dev_err(dev, "ureq alloc fail\n");
+	if (!ureq)
 		return NULL;
-	}
 
 	usbhs_pkt_init(&ureq->pkt);
 	ureq->urb = urb;
@@ -388,10 +384,8 @@
 	unsigned long flags;
 
 	uep = kzalloc(sizeof(struct usbhsh_ep), mem_flags);
-	if (!uep) {
-		dev_err(dev, "usbhsh_ep alloc fail\n");
+	if (!uep)
 		return -ENOMEM;
-	}
 
 	/********************  spin lock ********************/
 	usbhs_lock(priv, flags);
diff --git a/drivers/usb/renesas_usbhs/pipe.c b/drivers/usb/renesas_usbhs/pipe.c
index c238772..9396a8c 100644
--- a/drivers/usb/renesas_usbhs/pipe.c
+++ b/drivers/usb/renesas_usbhs/pipe.c
@@ -804,10 +804,8 @@
 	}
 
 	info->pipe = kzalloc(sizeof(struct usbhs_pipe) * pipe_size, GFP_KERNEL);
-	if (!info->pipe) {
-		dev_err(dev, "Could not allocate pipe\n");
+	if (!info->pipe)
 		return -ENOMEM;
-	}
 
 	info->size = pipe_size;
 
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 4d6a5c6..54a4de0 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -118,6 +118,7 @@
 	{ USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */
 	{ USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */
 	{ USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
+	{ USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
 	{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
 	{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
 	{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 0082080..b2d767e 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -648,6 +648,8 @@
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) },
+	{ USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) },
 	{ USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) },
 	{ USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) },
 	{ USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) },
@@ -1008,6 +1010,7 @@
 	{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
 	{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
 	{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) },
+	{ USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) },
 	{ }					/* Terminating entry */
 };
 
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index c5d6c1e..f87a938 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -406,6 +406,12 @@
 #define FTDI_4N_GALAXY_DE_3_PID	0xF3C2
 
 /*
+ * Ivium Technologies product IDs
+ */
+#define FTDI_PALMSENS_PID	0xf440
+#define FTDI_IVIUM_XSTAT_PID	0xf441
+
+/*
  * Linx Technologies product ids
  */
 #define LINX_SDMUSBQSS_PID	0xF448	/* Linx SDM-USB-QS-S */
@@ -673,6 +679,12 @@
 #define INTREPID_NEOVI_PID	0x0701
 
 /*
+ * WICED USB UART
+ */
+#define WICED_VID		0x0A5C
+#define WICED_USB20706V2_PID	0x6422
+
+/*
  * Definitions for ID TECH (www.idt-net.com) devices
  */
 #define IDTECH_VID		0x0ACD	/* ID TECH Vendor ID */
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index 4f7e072..e49ad0c 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -30,12 +30,12 @@
 #include <linux/usb/ezusb.h>
 
 /* make a simple define to handle if we are compiling keyspan_pda or xircom support */
-#if defined(CONFIG_USB_SERIAL_KEYSPAN_PDA) || defined(CONFIG_USB_SERIAL_KEYSPAN_PDA_MODULE)
+#if IS_ENABLED(CONFIG_USB_SERIAL_KEYSPAN_PDA)
 	#define KEYSPAN
 #else
 	#undef KEYSPAN
 #endif
-#if defined(CONFIG_USB_SERIAL_XIRCOM) || defined(CONFIG_USB_SERIAL_XIRCOM_MODULE)
+#if IS_ENABLED(CONFIG_USB_SERIAL_XIRCOM)
 	#define XIRCOM
 #else
 	#undef XIRCOM
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 5608af4..de9992b 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -1252,7 +1252,7 @@
 
 	if (urb->transfer_buffer == NULL) {
 		urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE,
-					       GFP_KERNEL);
+					       GFP_ATOMIC);
 		if (!urb->transfer_buffer)
 			goto exit;
 	}
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index ed378fb..57426d7 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -1340,8 +1340,8 @@
 	}
 
 	if (urb->transfer_buffer == NULL) {
-		urb->transfer_buffer =
-		    kmalloc(URB_TRANSFER_BUFFER_SIZE, GFP_KERNEL);
+		urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE,
+					       GFP_ATOMIC);
 		if (!urb->transfer_buffer)
 			goto exit;
 	}
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 8e07536..9894e34 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -274,6 +274,12 @@
 #define TELIT_PRODUCT_LE920			0x1200
 #define TELIT_PRODUCT_LE910			0x1201
 #define TELIT_PRODUCT_LE910_USBCFG4		0x1206
+#define TELIT_PRODUCT_LE920A4_1207		0x1207
+#define TELIT_PRODUCT_LE920A4_1208		0x1208
+#define TELIT_PRODUCT_LE920A4_1211		0x1211
+#define TELIT_PRODUCT_LE920A4_1212		0x1212
+#define TELIT_PRODUCT_LE920A4_1213		0x1213
+#define TELIT_PRODUCT_LE920A4_1214		0x1214
 
 /* ZTE PRODUCTS */
 #define ZTE_VENDOR_ID				0x19d2
@@ -519,6 +525,12 @@
 #define VIATELECOM_VENDOR_ID			0x15eb
 #define VIATELECOM_PRODUCT_CDS7			0x0001
 
+/* WeTelecom products */
+#define WETELECOM_VENDOR_ID			0x22de
+#define WETELECOM_PRODUCT_WMD200		0x6801
+#define WETELECOM_PRODUCT_6802			0x6802
+#define WETELECOM_PRODUCT_WMD300		0x6803
+
 struct option_blacklist_info {
 	/* bitmask of interface numbers blacklisted for send_setup */
 	const unsigned long sendsetup;
@@ -628,6 +640,11 @@
 	.reserved = BIT(1) | BIT(5),
 };
 
+static const struct option_blacklist_info telit_le920a4_blacklist_1 = {
+	.sendsetup = BIT(0),
+	.reserved = BIT(1),
+};
+
 static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = {
 	.sendsetup = BIT(2),
 	.reserved = BIT(0) | BIT(1) | BIT(3),
@@ -1203,6 +1220,16 @@
 		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
 	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920),
 		.driver_info = (kernel_ulong_t)&telit_le920_blacklist },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1207) },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1208),
+		.driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1211),
+		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1212),
+		.driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 },
+	{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) },
+	{ USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214),
+		.driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff),
 		.driver_info = (kernel_ulong_t)&net_intf1_blacklist },
@@ -1966,9 +1993,13 @@
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */
 	{ USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) },                /* OLICARD300 - MT6225 */
 	{ USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
 	{ USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_6802, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD300, 0xff, 0xff, 0xff) },
 	{ } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 07b4bf0..a8b9bdb 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -179,23 +179,23 @@
 
 /* Config struct */
 struct ti_uart_config {
-	__u16	wBaudRate;
-	__u16	wFlags;
-	__u8	bDataBits;
-	__u8	bParity;
-	__u8	bStopBits;
+	__be16	wBaudRate;
+	__be16	wFlags;
+	u8	bDataBits;
+	u8	bParity;
+	u8	bStopBits;
 	char	cXon;
 	char	cXoff;
-	__u8	bUartMode;
+	u8	bUartMode;
 } __packed;
 
 /* Get port status */
 struct ti_port_status {
-	__u8	bCmdCode;
-	__u8	bModuleId;
-	__u8	bErrorCode;
-	__u8	bMSR;
-	__u8	bLSR;
+	u8 bCmdCode;
+	u8 bModuleId;
+	u8 bErrorCode;
+	u8 bMSR;
+	u8 bLSR;
 } __packed;
 
 /* Purge modes */
@@ -218,12 +218,12 @@
 #define TI_RW_DATA_DOUBLE_WORD		0x04
 
 struct ti_write_data_bytes {
-	__u8	bAddrType;
-	__u8	bDataType;
-	__u8	bDataCounter;
+	u8	bAddrType;
+	u8	bDataType;
+	u8	bDataCounter;
 	__be16	wBaseAddrHi;
 	__be16	wBaseAddrLo;
-	__u8	bData[0];
+	u8	bData[0];
 } __packed;
 
 struct ti_read_data_request {
@@ -258,7 +258,7 @@
 /* Firmware image header */
 struct ti_firmware_header {
 	__le16	wLength;
-	__u8	bCheckSum;
+	u8	bCheckSum;
 } __packed;
 
 /* UART addresses */
@@ -276,9 +276,6 @@
 
 #define TI_DEFAULT_CLOSING_WAIT	4000		/* in .01 secs */
 
-/* supported setserial flags */
-#define TI_SET_SERIAL_FLAGS	0
-
 /* read urb states */
 #define TI_READ_URB_RUNNING	0
 #define TI_READ_URB_STOPPING	1
@@ -288,11 +285,10 @@
 
 struct ti_port {
 	int			tp_is_open;
-	__u8			tp_msr;
-	__u8			tp_shadow_mcr;
-	__u8			tp_uart_mode;	/* 232 or 485 modes */
+	u8			tp_msr;
+	u8			tp_shadow_mcr;
+	u8			tp_uart_mode;	/* 232 or 485 modes */
 	unsigned int		tp_uart_base_addr;
-	int			tp_flags;
 	struct ti_device	*tp_tdev;
 	struct usb_serial_port	*tp_port;
 	spinlock_t		tp_lock;
@@ -306,7 +302,6 @@
 	struct usb_serial	*td_serial;
 	int			td_is_3410;
 	bool			td_rs485_only;
-	int			td_urb_error;
 };
 
 static int ti_startup(struct usb_serial *serial);
@@ -343,7 +338,7 @@
 	struct serial_struct __user *ret_arg);
 static int ti_set_serial_info(struct tty_struct *tty, struct ti_port *tport,
 	struct serial_struct __user *new_arg);
-static void ti_handle_new_msr(struct ti_port *tport, __u8 msr);
+static void ti_handle_new_msr(struct ti_port *tport, u8 msr);
 
 static void ti_stop_read(struct ti_port *tport, struct tty_struct *tty);
 static int ti_restart_read(struct ti_port *tport, struct tty_struct *tty);
@@ -354,7 +349,7 @@
 	__u16 moduleid, __u16 value, __u8 *data, int size);
 
 static int ti_write_byte(struct usb_serial_port *port, struct ti_device *tdev,
-			 unsigned long addr, __u8 mask, __u8 byte);
+			 unsigned long addr, u8 mask, u8 byte);
 
 static int ti_download_firmware(struct ti_device *tdev);
 
@@ -647,12 +642,11 @@
 	struct urb *urb;
 	int port_number;
 	int status;
-	__u16 open_settings = (__u8)(TI_PIPE_MODE_CONTINUOUS |
-			     TI_PIPE_TIMEOUT_ENABLE |
-			     (TI_TRANSFER_TIMEOUT << 2));
+	u16 open_settings;
 
-	if (tport == NULL)
-		return -ENODEV;
+	open_settings = (TI_PIPE_MODE_CONTINUOUS |
+			 TI_PIPE_TIMEOUT_ENABLE |
+			 (TI_TRANSFER_TIMEOUT << 2));
 
 	dev = port->serial->dev;
 	tdev = tport->tp_tdev;
@@ -686,7 +680,6 @@
 	if (tty)
 		ti_set_termios(tty, port, &tty->termios);
 
-	dev_dbg(&port->dev, "%s - sending TI_OPEN_PORT\n", __func__);
 	status = ti_command_out_sync(tdev, TI_OPEN_PORT,
 		(__u8)(TI_UART1_PORT + port_number), open_settings, NULL, 0);
 	if (status) {
@@ -695,7 +688,6 @@
 		goto unlink_int_urb;
 	}
 
-	dev_dbg(&port->dev, "%s - sending TI_START_PORT\n", __func__);
 	status = ti_command_out_sync(tdev, TI_START_PORT,
 		(__u8)(TI_UART1_PORT + port_number), 0, NULL, 0);
 	if (status) {
@@ -704,7 +696,6 @@
 		goto unlink_int_urb;
 	}
 
-	dev_dbg(&port->dev, "%s - sending TI_PURGE_PORT\n", __func__);
 	status = ti_command_out_sync(tdev, TI_PURGE_PORT,
 		(__u8)(TI_UART1_PORT + port_number), TI_PURGE_INPUT, NULL, 0);
 	if (status) {
@@ -728,7 +719,6 @@
 	if (tty)
 		ti_set_termios(tty, port, &tty->termios);
 
-	dev_dbg(&port->dev, "%s - sending TI_OPEN_PORT (2)\n", __func__);
 	status = ti_command_out_sync(tdev, TI_OPEN_PORT,
 		(__u8)(TI_UART1_PORT + port_number), open_settings, NULL, 0);
 	if (status) {
@@ -737,7 +727,6 @@
 		goto unlink_int_urb;
 	}
 
-	dev_dbg(&port->dev, "%s - sending TI_START_PORT (2)\n", __func__);
 	status = ti_command_out_sync(tdev, TI_START_PORT,
 		(__u8)(TI_UART1_PORT + port_number), 0, NULL, 0);
 	if (status) {
@@ -747,7 +736,6 @@
 	}
 
 	/* start read urb */
-	dev_dbg(&port->dev, "%s - start read urb\n", __func__);
 	urb = port->read_urb;
 	if (!urb) {
 		dev_err(&port->dev, "%s - no read urb\n", __func__);
@@ -773,7 +761,6 @@
 		usb_kill_urb(port->serial->port[0]->interrupt_in_urb);
 release_lock:
 	mutex_unlock(&tdev->td_open_close_lock);
-	dev_dbg(&port->dev, "%s - exit %d\n", __func__, status);
 	return status;
 }
 
@@ -789,8 +776,6 @@
 
 	tdev = usb_get_serial_data(port->serial);
 	tport = usb_get_serial_port_data(port);
-	if (tdev == NULL || tport == NULL)
-		return;
 
 	tport->tp_is_open = 0;
 
@@ -803,7 +788,6 @@
 
 	port_number = port->port_number;
 
-	dev_dbg(&port->dev, "%s - sending TI_CLOSE_PORT\n", __func__);
 	status = ti_command_out_sync(tdev, TI_CLOSE_PORT,
 		     (__u8)(TI_UART1_PORT + port_number), 0, NULL, 0);
 	if (status)
@@ -830,11 +814,10 @@
 	struct ti_port *tport = usb_get_serial_port_data(port);
 
 	if (count == 0) {
-		dev_dbg(&port->dev, "%s - write request of 0 bytes\n", __func__);
 		return 0;
 	}
 
-	if (tport == NULL || !tport->tp_is_open)
+	if (!tport->tp_is_open)
 		return -ENODEV;
 
 	count = kfifo_in_locked(&port->write_fifo, data, count,
@@ -852,9 +835,6 @@
 	int room = 0;
 	unsigned long flags;
 
-	if (tport == NULL)
-		return 0;
-
 	spin_lock_irqsave(&tport->tp_lock, flags);
 	room = kfifo_avail(&port->write_fifo);
 	spin_unlock_irqrestore(&tport->tp_lock, flags);
@@ -871,9 +851,6 @@
 	int chars = 0;
 	unsigned long flags;
 
-	if (tport == NULL)
-		return 0;
-
 	spin_lock_irqsave(&tport->tp_lock, flags);
 	chars = kfifo_len(&port->write_fifo);
 	spin_unlock_irqrestore(&tport->tp_lock, flags);
@@ -900,9 +877,6 @@
 	struct usb_serial_port *port = tty->driver_data;
 	struct ti_port *tport = usb_get_serial_port_data(port);
 
-	if (tport == NULL)
-		return;
-
 	if (I_IXOFF(tty) || C_CRTSCTS(tty))
 		ti_stop_read(tport, tty);
 
@@ -915,9 +889,6 @@
 	struct ti_port *tport = usb_get_serial_port_data(port);
 	int status;
 
-	if (tport == NULL)
-		return;
-
 	if (I_IXOFF(tty) || C_CRTSCTS(tty)) {
 		status = ti_restart_read(tport, tty);
 		if (status)
@@ -932,16 +903,11 @@
 	struct usb_serial_port *port = tty->driver_data;
 	struct ti_port *tport = usb_get_serial_port_data(port);
 
-	if (tport == NULL)
-		return -ENODEV;
-
 	switch (cmd) {
 	case TIOCGSERIAL:
-		dev_dbg(&port->dev, "%s - TIOCGSERIAL\n", __func__);
 		return ti_get_serial_info(tport,
 				(struct serial_struct __user *)arg);
 	case TIOCSSERIAL:
-		dev_dbg(&port->dev, "%s - TIOCSSERIAL\n", __func__);
 		return ti_set_serial_info(tty, tport,
 				(struct serial_struct __user *)arg);
 	}
@@ -959,6 +925,8 @@
 	int status;
 	int port_number = port->port_number;
 	unsigned int mcr;
+	u16 wbaudrate;
+	u16 wflags = 0;
 
 	cflag = tty->termios.c_cflag;
 	iflag = tty->termios.c_iflag;
@@ -967,21 +935,16 @@
 	dev_dbg(&port->dev, "%s - old clfag %08x, old iflag %08x\n", __func__,
 		old_termios->c_cflag, old_termios->c_iflag);
 
-	if (tport == NULL)
-		return;
-
 	config = kmalloc(sizeof(*config), GFP_KERNEL);
 	if (!config)
 		return;
 
-	config->wFlags = 0;
-
 	/* these flags must be set */
-	config->wFlags |= TI_UART_ENABLE_MS_INTS;
-	config->wFlags |= TI_UART_ENABLE_AUTO_START_DMA;
-	config->bUartMode = (__u8)(tport->tp_uart_mode);
+	wflags |= TI_UART_ENABLE_MS_INTS;
+	wflags |= TI_UART_ENABLE_AUTO_START_DMA;
+	config->bUartMode = tport->tp_uart_mode;
 
-	switch (cflag & CSIZE) {
+	switch (C_CSIZE(tty)) {
 	case CS5:
 		    config->bDataBits = TI_UART_5_DATA_BITS;
 		    break;
@@ -1000,29 +963,29 @@
 	/* CMSPAR isn't supported by this driver */
 	tty->termios.c_cflag &= ~CMSPAR;
 
-	if (cflag & PARENB) {
-		if (cflag & PARODD) {
-			config->wFlags |= TI_UART_ENABLE_PARITY_CHECKING;
+	if (C_PARENB(tty)) {
+		if (C_PARODD(tty)) {
+			wflags |= TI_UART_ENABLE_PARITY_CHECKING;
 			config->bParity = TI_UART_ODD_PARITY;
 		} else {
-			config->wFlags |= TI_UART_ENABLE_PARITY_CHECKING;
+			wflags |= TI_UART_ENABLE_PARITY_CHECKING;
 			config->bParity = TI_UART_EVEN_PARITY;
 		}
 	} else {
-		config->wFlags &= ~TI_UART_ENABLE_PARITY_CHECKING;
+		wflags &= ~TI_UART_ENABLE_PARITY_CHECKING;
 		config->bParity = TI_UART_NO_PARITY;
 	}
 
-	if (cflag & CSTOPB)
+	if (C_CSTOPB(tty))
 		config->bStopBits = TI_UART_2_STOP_BITS;
 	else
 		config->bStopBits = TI_UART_1_STOP_BITS;
 
-	if (cflag & CRTSCTS) {
+	if (C_CRTSCTS(tty)) {
 		/* RTS flow control must be off to drop RTS for baud rate B0 */
-		if ((cflag & CBAUD) != B0)
-			config->wFlags |= TI_UART_ENABLE_RTS_IN;
-		config->wFlags |= TI_UART_ENABLE_CTS_OUT;
+		if ((C_BAUD(tty)) != B0)
+			wflags |= TI_UART_ENABLE_RTS_IN;
+		wflags |= TI_UART_ENABLE_CTS_OUT;
 	} else {
 		ti_restart_read(tport, tty);
 	}
@@ -1032,34 +995,34 @@
 		config->cXoff = STOP_CHAR(tty);
 
 		if (I_IXOFF(tty))
-			config->wFlags |= TI_UART_ENABLE_X_IN;
+			wflags |= TI_UART_ENABLE_X_IN;
 		else
 			ti_restart_read(tport, tty);
 
 		if (I_IXON(tty))
-			config->wFlags |= TI_UART_ENABLE_X_OUT;
+			wflags |= TI_UART_ENABLE_X_OUT;
 	}
 
 	baud = tty_get_baud_rate(tty);
 	if (!baud)
 		baud = 9600;
 	if (tport->tp_tdev->td_is_3410)
-		config->wBaudRate = (__u16)((923077 + baud/2) / baud);
+		wbaudrate = (923077 + baud/2) / baud;
 	else
-		config->wBaudRate = (__u16)((461538 + baud/2) / baud);
+		wbaudrate = (461538 + baud/2) / baud;
 
 	/* FIXME: Should calculate resulting baud here and report it back */
-	if ((cflag & CBAUD) != B0)
+	if ((C_BAUD(tty)) != B0)
 		tty_encode_baud_rate(tty, baud, baud);
 
 	dev_dbg(&port->dev,
 		"%s - BaudRate=%d, wBaudRate=%d, wFlags=0x%04X, bDataBits=%d, bParity=%d, bStopBits=%d, cXon=%d, cXoff=%d, bUartMode=%d\n",
-		__func__, baud, config->wBaudRate, config->wFlags,
+		__func__, baud, wbaudrate, wflags,
 		config->bDataBits, config->bParity, config->bStopBits,
 		config->cXon, config->cXoff, config->bUartMode);
 
-	cpu_to_be16s(&config->wBaudRate);
-	cpu_to_be16s(&config->wFlags);
+	config->wBaudRate = cpu_to_be16(wbaudrate);
+	config->wFlags = cpu_to_be16(wflags);
 
 	status = ti_command_out_sync(tport->tp_tdev, TI_SET_CONFIG,
 		(__u8)(TI_UART1_PORT + port_number), 0, (__u8 *)config,
@@ -1071,7 +1034,7 @@
 	/* SET_CONFIG asserts RTS and DTR, reset them correctly */
 	mcr = tport->tp_shadow_mcr;
 	/* if baud rate is B0, clear RTS and DTR */
-	if ((cflag & CBAUD) == B0)
+	if (C_BAUD(tty) == B0)
 		mcr &= ~(TI_MCR_DTR | TI_MCR_RTS);
 	status = ti_set_mcr(tport, mcr);
 	if (status)
@@ -1092,9 +1055,6 @@
 	unsigned int mcr;
 	unsigned long flags;
 
-	if (tport == NULL)
-		return -ENODEV;
-
 	spin_lock_irqsave(&tport->tp_lock, flags);
 	msr = tport->tp_msr;
 	mcr = tport->tp_shadow_mcr;
@@ -1122,9 +1082,6 @@
 	unsigned int mcr;
 	unsigned long flags;
 
-	if (tport == NULL)
-		return -ENODEV;
-
 	spin_lock_irqsave(&tport->tp_lock, flags);
 	mcr = tport->tp_shadow_mcr;
 
@@ -1155,9 +1112,6 @@
 
 	dev_dbg(&port->dev, "%s - state = %d\n", __func__, break_state);
 
-	if (tport == NULL)
-		return;
-
 	status = ti_write_byte(port, tport->tp_tdev,
 		tport->tp_uart_base_addr + TI_UART_OFFSET_LCR,
 		TI_LCR_BREAK, break_state == -1 ? TI_LCR_BREAK : 0);
@@ -1189,7 +1143,7 @@
 	int function;
 	int status = urb->status;
 	int retval;
-	__u8 msr;
+	u8 msr;
 
 	switch (status) {
 	case 0:
@@ -1198,11 +1152,9 @@
 	case -ENOENT:
 	case -ESHUTDOWN:
 		dev_dbg(dev, "%s - urb shutting down, %d\n", __func__, status);
-		tdev->td_urb_error = 1;
 		return;
 	default:
 		dev_err(dev, "%s - nonzero urb status, %d\n", __func__, status);
-		tdev->td_urb_error = 1;
 		goto exit;
 	}
 
@@ -1275,12 +1227,10 @@
 	case -ENOENT:
 	case -ESHUTDOWN:
 		dev_dbg(dev, "%s - urb shutting down, %d\n", __func__, status);
-		tport->tp_tdev->td_urb_error = 1;
 		return;
 	default:
 		dev_err(dev, "%s - nonzero urb status, %d\n",
 			__func__, status);
-		tport->tp_tdev->td_urb_error = 1;
 	}
 
 	if (status == -EPIPE)
@@ -1335,12 +1285,10 @@
 	case -ENOENT:
 	case -ESHUTDOWN:
 		dev_dbg(&port->dev, "%s - urb shutting down, %d\n", __func__, status);
-		tport->tp_tdev->td_urb_error = 1;
 		return;
 	default:
 		dev_err_console(port, "%s - nonzero urb status, %d\n",
 			__func__, status);
-		tport->tp_tdev->td_urb_error = 1;
 	}
 
 	/* send any buffered data */
@@ -1490,7 +1438,6 @@
 	ret_serial.type = PORT_16550A;
 	ret_serial.line = port->minor;
 	ret_serial.port = port->port_number;
-	ret_serial.flags = tport->tp_flags;
 	ret_serial.xmit_fifo_size = kfifo_size(&port->write_fifo);
 	ret_serial.baud_base = tport->tp_tdev->td_is_3410 ? 921600 : 460800;
 	ret_serial.closing_wait = cwait;
@@ -1515,14 +1462,13 @@
 	if (cwait != ASYNC_CLOSING_WAIT_NONE)
 		cwait = msecs_to_jiffies(10 * new_serial.closing_wait);
 
-	tport->tp_flags = new_serial.flags & TI_SET_SERIAL_FLAGS;
 	tport->tp_port->port.closing_wait = cwait;
 
 	return 0;
 }
 
 
-static void ti_handle_new_msr(struct ti_port *tport, __u8 msr)
+static void ti_handle_new_msr(struct ti_port *tport, u8 msr)
 {
 	struct async_icount *icount;
 	struct tty_struct *tty;
@@ -1634,8 +1580,8 @@
 
 
 static int ti_write_byte(struct usb_serial_port *port,
-			struct ti_device *tdev, unsigned long addr,
-			__u8 mask, __u8 byte)
+			 struct ti_device *tdev, unsigned long addr,
+			 u8 mask, u8 byte)
 {
 	int status;
 	unsigned int size;
@@ -1679,11 +1625,10 @@
 	int len;
 
 	for (pos = sizeof(struct ti_firmware_header); pos < size; pos++)
-		cs = (__u8)(cs + buffer[pos]);
+		cs = (u8)(cs + buffer[pos]);
 
 	header = (struct ti_firmware_header *)buffer;
-	header->wLength = cpu_to_le16((__u16)(size
-					- sizeof(struct ti_firmware_header)));
+	header->wLength = cpu_to_le16(size - sizeof(*header));
 	header->bCheckSum = cs;
 
 	dev_dbg(&dev->dev, "%s - downloading firmware\n", __func__);
@@ -1701,7 +1646,7 @@
 {
 	int status;
 	int buffer_size;
-	__u8 *buffer;
+	u8 *buffer;
 	struct usb_device *dev = tdev->td_serial->dev;
 	unsigned int pipe = usb_sndbulkpipe(dev,
 		tdev->td_serial->port[0]->bulk_out_endpointAddress);
diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c
index a204782..e98b6e5 100644
--- a/drivers/usb/serial/usb-serial-simple.c
+++ b/drivers/usb/serial/usb-serial-simple.c
@@ -54,7 +54,8 @@
 /* Infineon Flashloader driver */
 #define FLASHLOADER_IDS()		\
 	{ USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \
-	{ USB_DEVICE(0x8087, 0x0716) }
+	{ USB_DEVICE(0x8087, 0x0716) }, \
+	{ USB_DEVICE(0x8087, 0x0801) }
 DEVICE(flashloader, FLASHLOADER_IDS);
 
 /* Google Serial USB SubClass */
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index b1b9bac..d213cf4 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -1433,7 +1433,7 @@
 
 	rc = usb_register(udriver);
 	if (rc)
-		return rc;
+		goto failed_usb_register;
 
 	for (sd = serial_drivers; *sd; ++sd) {
 		(*sd)->usb_driver = udriver;
@@ -1451,6 +1451,8 @@
 	while (sd-- > serial_drivers)
 		usb_serial_deregister(*sd);
 	usb_deregister(udriver);
+failed_usb_register:
+	kfree(udriver);
 	return rc;
 }
 EXPORT_SYMBOL_GPL(usb_serial_register_drivers);
diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c
index 1d8b03c..878b4b8 100644
--- a/drivers/usb/storage/alauda.c
+++ b/drivers/usb/storage/alauda.c
@@ -939,10 +939,8 @@
 
 	len = min(sectors, blocksize) * (pagesize + 64);
 	buffer = kmalloc(len, GFP_NOIO);
-	if (buffer == NULL) {
-		printk(KERN_WARNING "alauda_read_data: Out of memory\n");
+	if (!buffer)
 		return USB_STOR_TRANSPORT_ERROR;
-	}
 
 	/* Figure out the initial LBA and page */
 	lba = address >> blockshift;
@@ -1033,18 +1031,15 @@
 
 	len = min(sectors, blocksize) * pagesize;
 	buffer = kmalloc(len, GFP_NOIO);
-	if (buffer == NULL) {
-		printk(KERN_WARNING "alauda_write_data: Out of memory\n");
+	if (!buffer)
 		return USB_STOR_TRANSPORT_ERROR;
-	}
 
 	/*
 	 * We also need a temporary block buffer, where we read in the old data,
 	 * overwrite parts with the new data, and manipulate the redundancy data
 	 */
 	blockbuffer = kmalloc((pagesize + 64) * blocksize, GFP_NOIO);
-	if (blockbuffer == NULL) {
-		printk(KERN_WARNING "alauda_write_data: Out of memory\n");
+	if (!blockbuffer) {
 		kfree(buffer);
 		return USB_STOR_TRANSPORT_ERROR;
 	}
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 33eb923..8cd2926 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -296,6 +296,14 @@
 		if (us->fflags & US_FL_BROKEN_FUA)
 			sdev->broken_fua = 1;
 
+		/* Some even totally fail to indicate a cache */
+		if (us->fflags & US_FL_ALWAYS_SYNC) {
+			/* don't read caching information */
+			sdev->skip_ms_page_8 = 1;
+			sdev->skip_ms_page_3f = 1;
+			/* assume sync is needed */
+			sdev->wce_default_on = 1;
+		}
 	} else {
 
 		/*
diff --git a/drivers/usb/storage/sddr09.c b/drivers/usb/storage/sddr09.c
index c5797fa..3aeaa53 100644
--- a/drivers/usb/storage/sddr09.c
+++ b/drivers/usb/storage/sddr09.c
@@ -766,10 +766,8 @@
 
 	len = min(sectors, (unsigned int) info->blocksize) * info->pagesize;
 	buffer = kmalloc(len, GFP_NOIO);
-	if (buffer == NULL) {
-		printk(KERN_WARNING "sddr09_read_data: Out of memory\n");
+	if (!buffer)
 		return -ENOMEM;
-	}
 
 	// This could be made much more efficient by checking for
 	// contiguous LBA's. Another exercise left to the student.
@@ -1004,10 +1002,8 @@
 	pagelen = (1 << info->pageshift) + (1 << CONTROL_SHIFT);
 	blocklen = (pagelen << info->blockshift);
 	blockbuffer = kmalloc(blocklen, GFP_NOIO);
-	if (!blockbuffer) {
-		printk(KERN_WARNING "sddr09_write_data: Out of memory\n");
+	if (!blockbuffer)
 		return -ENOMEM;
-	}
 
 	/*
 	 * Since we don't write the user data directly to the device,
@@ -1017,8 +1013,7 @@
 
 	len = min(sectors, (unsigned int) info->blocksize) * info->pagesize;
 	buffer = kmalloc(len, GFP_NOIO);
-	if (buffer == NULL) {
-		printk(KERN_WARNING "sddr09_write_data: Out of memory\n");
+	if (!buffer) {
 		kfree(blockbuffer);
 		return -ENOMEM;
 	}
@@ -1241,8 +1236,7 @@
 	alloc_blocks = min(numblocks, SDDR09_READ_MAP_BUFSZ >> CONTROL_SHIFT);
 	alloc_len = (alloc_blocks << CONTROL_SHIFT);
 	buffer = kmalloc(alloc_len, GFP_NOIO);
-	if (buffer == NULL) {
-		printk(KERN_WARNING "sddr09_read_map: out of memory\n");
+	if (!buffer) {
 		result = -1;
 		goto done;
 	}
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index aa35392..af3c7ee 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -338,6 +338,13 @@
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_WP_DETECT),
 
+/* Reported by Egbert Eich <eich@suse.com> */
+UNUSUAL_DEV(  0x0480, 0xd010, 0x0100, 0x9999,
+		"Toshiba",
+		"External USB 3.0",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_ALWAYS_SYNC),
+
 /* Patch submitted by Philipp Friedrich <philipp@void.at> */
 UNUSUAL_DEV(  0x0482, 0x0100, 0x0100, 0x0100,
 		"Kyocera",
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index ef2d8cd..2cba13a 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -498,7 +498,8 @@
 			US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 |
 			US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE |
 			US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES |
-			US_FL_MAX_SECTORS_240 | US_FL_NO_REPORT_LUNS);
+			US_FL_MAX_SECTORS_240 | US_FL_NO_REPORT_LUNS |
+			US_FL_ALWAYS_SYNC);
 
 	p = quirks;
 	while (*p) {
@@ -581,6 +582,9 @@
 		case 'w':
 			f |= US_FL_NO_WP_DETECT;
 			break;
+		case 'y':
+			f |= US_FL_ALWAYS_SYNC;
+			break;
 		/* Ignore unrecognized flag characters */
 		}
 	}
@@ -794,10 +798,8 @@
 	struct task_struct *th;
 
 	us->current_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!us->current_urb) {
-		usb_stor_dbg(us, "URB allocation failed\n");
+	if (!us->current_urb)
 		return -ENOMEM;
-	}
 
 	/*
 	 * Just before we start our control thread, initialize
@@ -1070,17 +1072,17 @@
 	result = usb_stor_acquire_resources(us);
 	if (result)
 		goto BadDevice;
+	usb_autopm_get_interface_no_resume(us->pusb_intf);
 	snprintf(us->scsi_name, sizeof(us->scsi_name), "usb-storage %s",
 					dev_name(&us->pusb_intf->dev));
 	result = scsi_add_host(us_to_host(us), dev);
 	if (result) {
 		dev_warn(dev,
 				"Unable to add the scsi host\n");
-		goto BadDevice;
+		goto HostAddErr;
 	}
 
 	/* Submit the delayed_work for SCSI-device scanning */
-	usb_autopm_get_interface_no_resume(us->pusb_intf);
 	set_bit(US_FLIDX_SCAN_PENDING, &us->dflags);
 
 	if (delay_use > 0)
@@ -1090,6 +1092,8 @@
 	return 0;
 
 	/* We come here if there are any problems */
+HostAddErr:
+	usb_autopm_put_interface_no_suspend(us->pusb_intf);
 BadDevice:
 	usb_stor_dbg(us, "storage_probe() failed\n");
 	release_everything(us);
diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
index 545d09b..5133a07 100644
--- a/drivers/usb/usb-skeleton.c
+++ b/drivers/usb/usb-skeleton.c
@@ -499,10 +499,8 @@
 
 	/* allocate memory for our device state and initialize it */
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (!dev) {
-		dev_err(&interface->dev, "Out of memory\n");
+	if (!dev)
 		goto error;
-	}
 	kref_init(&dev->kref);
 	sema_init(&dev->limit_sem, WRITES_IN_FLIGHT);
 	mutex_init(&dev->io_mutex);
@@ -526,17 +524,11 @@
 			dev->bulk_in_size = buffer_size;
 			dev->bulk_in_endpointAddr = endpoint->bEndpointAddress;
 			dev->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL);
-			if (!dev->bulk_in_buffer) {
-				dev_err(&interface->dev,
-					"Could not allocate bulk_in_buffer\n");
+			if (!dev->bulk_in_buffer)
 				goto error;
-			}
 			dev->bulk_in_urb = usb_alloc_urb(0, GFP_KERNEL);
-			if (!dev->bulk_in_urb) {
-				dev_err(&interface->dev,
-					"Could not allocate bulk_in_urb\n");
+			if (!dev->bulk_in_urb)
 				goto error;
-			}
 		}
 
 		if (!dev->bulk_out_endpointAddr &&
diff --git a/drivers/usb/usbip/Kconfig b/drivers/usb/usbip/Kconfig
index 17646b2..eeefa29 100644
--- a/drivers/usb/usbip/Kconfig
+++ b/drivers/usb/usbip/Kconfig
@@ -1,6 +1,7 @@
 config USBIP_CORE
 	tristate "USB/IP support"
-	depends on USB_COMMON && NET
+	depends on NET
+	select USB_COMMON
 	---help---
 	  This enables pushing USB packets over IP to allow remote
 	  machines direct access to USB devices. It provides the
@@ -24,6 +25,27 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called vhci-hcd.
 
+config USBIP_VHCI_HC_PORTS
+	int "Number of ports per USB/IP virtual host controller"
+	range 1 31
+	default 8
+	depends on USBIP_VHCI_HCD
+	---help---
+	  To increase number of ports available for USB/IP virtual
+	  host controller driver, this defines number of ports per
+	  USB/IP virtual host controller.
+
+config USBIP_VHCI_NR_HCS
+	int "Number of USB/IP virtual host controllers"
+	range 1 128
+	default 1
+	depends on USBIP_VHCI_HCD
+	---help---
+	  To increase number of ports available for USB/IP virtual
+	  host controller driver, this defines number of USB/IP
+	  virtual host controllers as if adding physical host
+	  controllers.
+
 config USBIP_HOST
 	tristate "Host driver"
 	depends on USBIP_CORE && USB
diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c
index 2df63e3..191b176 100644
--- a/drivers/usb/usbip/stub_rx.c
+++ b/drivers/usb/usbip/stub_rx.c
@@ -461,7 +461,6 @@
 		priv->urb = usb_alloc_urb(0, GFP_KERNEL);
 
 	if (!priv->urb) {
-		dev_err(&udev->dev, "malloc urb\n");
 		usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC);
 		return;
 	}
diff --git a/drivers/usb/usbip/vhci.h b/drivers/usb/usbip/vhci.h
index a863a98..88b71c4 100644
--- a/drivers/usb/usbip/vhci.h
+++ b/drivers/usb/usbip/vhci.h
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ * Copyright (C) 2015 Nobuo Iwata
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -72,13 +73,25 @@
 };
 
 /* Number of supported ports. Value has an upperbound of USB_MAXCHILDREN */
-#define VHCI_NPORTS 8
+#ifdef CONFIG_USBIP_VHCI_HC_PORTS
+#define VHCI_HC_PORTS CONFIG_USBIP_VHCI_HC_PORTS
+#else
+#define VHCI_HC_PORTS 8
+#endif
+
+#ifdef CONFIG_USBIP_VHCI_NR_HCS
+#define VHCI_NR_HCS CONFIG_USBIP_VHCI_NR_HCS
+#else
+#define VHCI_NR_HCS 1
+#endif
+
+#define MAX_STATUS_NAME 16
 
 /* for usb_bus.hcpriv */
 struct vhci_hcd {
 	spinlock_t lock;
 
-	u32 port_status[VHCI_NPORTS];
+	u32 port_status[VHCI_HC_PORTS];
 
 	unsigned resuming:1;
 	unsigned long re_timeout;
@@ -90,14 +103,19 @@
 	 * wIndex shows the port number and begins from 1.
 	 * But, the index of this array begins from 0.
 	 */
-	struct vhci_device vdev[VHCI_NPORTS];
+	struct vhci_device vdev[VHCI_HC_PORTS];
 };
 
-extern struct vhci_hcd *the_controller;
-extern const struct attribute_group dev_attr_group;
+extern int vhci_num_controllers;
+extern struct platform_device **vhci_pdevs;
+extern struct attribute_group vhci_attr_group;
 
 /* vhci_hcd.c */
-void rh_port_connect(int rhport, enum usb_device_speed speed);
+void rh_port_connect(struct vhci_device *vdev, enum usb_device_speed speed);
+
+/* vhci_sysfs.c */
+int vhci_init_attr_group(void);
+void vhci_finish_attr_group(void);
 
 /* vhci_rx.c */
 struct urb *pickup_urb_and_free_priv(struct vhci_device *vdev, __u32 seqnum);
@@ -106,9 +124,14 @@
 /* vhci_tx.c */
 int vhci_tx_loop(void *data);
 
-static inline struct vhci_device *port_to_vdev(__u32 port)
+static inline __u32 port_to_rhport(__u32 port)
 {
-	return &the_controller->vdev[port];
+	return port % VHCI_HC_PORTS;
+}
+
+static inline int port_to_pdev_nr(__u32 port)
+{
+	return port / VHCI_HC_PORTS;
 }
 
 static inline struct vhci_hcd *hcd_to_vhci(struct usb_hcd *hcd)
@@ -116,14 +139,25 @@
 	return (struct vhci_hcd *) (hcd->hcd_priv);
 }
 
+static inline struct device *hcd_dev(struct usb_hcd *hcd)
+{
+	return (hcd)->self.controller;
+}
+
+static inline const char *hcd_name(struct usb_hcd *hcd)
+{
+	return (hcd)->self.bus_name;
+}
+
 static inline struct usb_hcd *vhci_to_hcd(struct vhci_hcd *vhci)
 {
 	return container_of((void *) vhci, struct usb_hcd, hcd_priv);
 }
 
-static inline struct device *vhci_dev(struct vhci_hcd *vhci)
+static inline struct vhci_hcd *vdev_to_vhci(struct vhci_device *vdev)
 {
-	return vhci_to_hcd(vhci)->self.controller;
+	return container_of(
+			(void *)(vdev - vdev->rhport), struct vhci_hcd, vdev);
 }
 
 #endif /* __USBIP_VHCI_H */
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index 2e0450b..03eccf2 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ * Copyright (C) 2015-2016 Nobuo Iwata
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -56,7 +57,9 @@
 static const char driver_name[] = "vhci_hcd";
 static const char driver_desc[] = "USB/IP Virtual Host Controller";
 
-struct vhci_hcd *the_controller;
+int vhci_num_controllers = VHCI_NR_HCS;
+
+struct platform_device **vhci_pdevs;
 
 static const char * const bit_desc[] = {
 	"CONNECTION",		/*0*/
@@ -119,47 +122,59 @@
 	pr_debug("\n");
 }
 
-void rh_port_connect(int rhport, enum usb_device_speed speed)
+void rh_port_connect(struct vhci_device *vdev, enum usb_device_speed speed)
 {
+	struct vhci_hcd	*vhci = vdev_to_vhci(vdev);
+	int		rhport = vdev->rhport;
+	u32		status;
 	unsigned long	flags;
 
 	usbip_dbg_vhci_rh("rh_port_connect %d\n", rhport);
 
-	spin_lock_irqsave(&the_controller->lock, flags);
+	spin_lock_irqsave(&vhci->lock, flags);
 
-	the_controller->port_status[rhport] |= USB_PORT_STAT_CONNECTION
-		| (1 << USB_PORT_FEAT_C_CONNECTION);
+	status = vhci->port_status[rhport];
+
+	status |= USB_PORT_STAT_CONNECTION | (1 << USB_PORT_FEAT_C_CONNECTION);
 
 	switch (speed) {
 	case USB_SPEED_HIGH:
-		the_controller->port_status[rhport] |= USB_PORT_STAT_HIGH_SPEED;
+		status |= USB_PORT_STAT_HIGH_SPEED;
 		break;
 	case USB_SPEED_LOW:
-		the_controller->port_status[rhport] |= USB_PORT_STAT_LOW_SPEED;
+		status |= USB_PORT_STAT_LOW_SPEED;
 		break;
 	default:
 		break;
 	}
 
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	vhci->port_status[rhport] = status;
 
-	usb_hcd_poll_rh_status(vhci_to_hcd(the_controller));
+	spin_unlock_irqrestore(&vhci->lock, flags);
+
+	usb_hcd_poll_rh_status(vhci_to_hcd(vhci));
 }
 
-static void rh_port_disconnect(int rhport)
+static void rh_port_disconnect(struct vhci_device *vdev)
 {
+	struct vhci_hcd	*vhci = vdev_to_vhci(vdev);
+	int		rhport = vdev->rhport;
+	u32		status;
 	unsigned long	flags;
 
 	usbip_dbg_vhci_rh("rh_port_disconnect %d\n", rhport);
 
-	spin_lock_irqsave(&the_controller->lock, flags);
+	spin_lock_irqsave(&vhci->lock, flags);
 
-	the_controller->port_status[rhport] &= ~USB_PORT_STAT_CONNECTION;
-	the_controller->port_status[rhport] |=
-					(1 << USB_PORT_FEAT_C_CONNECTION);
+	status = vhci->port_status[rhport];
 
-	spin_unlock_irqrestore(&the_controller->lock, flags);
-	usb_hcd_poll_rh_status(vhci_to_hcd(the_controller));
+	status &= ~USB_PORT_STAT_CONNECTION;
+	status |= (1 << USB_PORT_FEAT_C_CONNECTION);
+
+	vhci->port_status[rhport] = status;
+
+	spin_unlock_irqrestore(&vhci->lock, flags);
+	usb_hcd_poll_rh_status(vhci_to_hcd(vhci));
 }
 
 #define PORT_C_MASK				\
@@ -188,7 +203,7 @@
 	int		changed = 0;
 	unsigned long	flags;
 
-	retval = DIV_ROUND_UP(VHCI_NPORTS + 1, 8);
+	retval = DIV_ROUND_UP(VHCI_HC_PORTS + 1, 8);
 	memset(buf, 0, retval);
 
 	vhci = hcd_to_vhci(hcd);
@@ -200,7 +215,7 @@
 	}
 
 	/* check pseudo status register for each port */
-	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
+	for (rhport = 0; rhport < VHCI_HC_PORTS; rhport++) {
 		if ((vhci->port_status[rhport] & PORT_C_MASK)) {
 			/* The status of a port has been changed, */
 			usbip_dbg_vhci_rh("port %d status changed\n", rhport);
@@ -225,7 +240,7 @@
 	desc->bDescLength = 9;
 	desc->wHubCharacteristics = cpu_to_le16(
 		HUB_CHAR_INDV_PORT_LPSM | HUB_CHAR_COMMON_OCPM);
-	desc->bNbrPorts = VHCI_NPORTS;
+	desc->bNbrPorts = VHCI_HC_PORTS;
 	desc->u.hs.DeviceRemovable[0] = 0xff;
 	desc->u.hs.DeviceRemovable[1] = 0xff;
 }
@@ -238,7 +253,7 @@
 	int		rhport;
 	unsigned long	flags;
 
-	u32 prev_port_status[VHCI_NPORTS];
+	u32 prev_port_status[VHCI_HC_PORTS];
 
 	if (!HCD_HW_ACCESSIBLE(hcd))
 		return -ETIMEDOUT;
@@ -249,7 +264,7 @@
 	 */
 	usbip_dbg_vhci_rh("typeReq %x wValue %x wIndex %x\n", typeReq, wValue,
 			  wIndex);
-	if (wIndex > VHCI_NPORTS)
+	if (wIndex > VHCI_HC_PORTS)
 		pr_err("invalid port number %d\n", wIndex);
 	rhport = ((__u8)(wIndex & 0x00ff)) - 1;
 
@@ -315,7 +330,7 @@
 		break;
 	case GetPortStatus:
 		usbip_dbg_vhci_rh(" GetPortStatus port %x\n", wIndex);
-		if (wIndex > VHCI_NPORTS || wIndex < 1) {
+		if (wIndex > VHCI_HC_PORTS || wIndex < 1) {
 			pr_err("invalid port number %d\n", wIndex);
 			retval = -EPIPE;
 		}
@@ -416,14 +431,27 @@
 
 static struct vhci_device *get_vdev(struct usb_device *udev)
 {
-	int i;
+	struct platform_device *pdev;
+	struct usb_hcd *hcd;
+	struct vhci_hcd *vhci;
+	int pdev_nr, rhport;
 
 	if (!udev)
 		return NULL;
 
-	for (i = 0; i < VHCI_NPORTS; i++)
-		if (the_controller->vdev[i].udev == udev)
-			return port_to_vdev(i);
+	for (pdev_nr = 0; pdev_nr < vhci_num_controllers; pdev_nr++) {
+		pdev = *(vhci_pdevs + pdev_nr);
+		if (pdev == NULL)
+			continue;
+		hcd = platform_get_drvdata(pdev);
+		if (hcd == NULL)
+			continue;
+		vhci = hcd_to_vhci(hcd);
+		for (rhport = 0; rhport < VHCI_HC_PORTS; rhport++) {
+			if (vhci->vdev[rhport].udev == udev)
+				return &vhci->vdev[rhport];
+		}
+	}
 
 	return NULL;
 }
@@ -432,6 +460,7 @@
 {
 	struct vhci_device *vdev = get_vdev(urb->dev);
 	struct vhci_priv *priv;
+	struct vhci_hcd *vhci = vdev_to_vhci(vdev);
 	unsigned long flags;
 
 	if (!vdev) {
@@ -447,7 +476,7 @@
 
 	spin_lock_irqsave(&vdev->priv_lock, flags);
 
-	priv->seqnum = atomic_inc_return(&the_controller->seqnum);
+	priv->seqnum = atomic_inc_return(&vhci->seqnum);
 	if (priv->seqnum == 0xffff)
 		dev_info(&urb->dev->dev, "seqnum max\n");
 
@@ -465,7 +494,9 @@
 static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
 			    gfp_t mem_flags)
 {
+	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
 	struct device *dev = &urb->dev->dev;
+	u8 portnum = urb->dev->portnum;
 	int ret = 0;
 	struct vhci_device *vdev;
 	unsigned long flags;
@@ -473,26 +504,30 @@
 	usbip_dbg_vhci_hc("enter, usb_hcd %p urb %p mem_flags %d\n",
 			  hcd, urb, mem_flags);
 
+	if (portnum > VHCI_HC_PORTS) {
+		pr_err("invalid port number %d\n", portnum);
+		return -ENODEV;
+	}
+	vdev = &vhci->vdev[portnum-1];
+
 	/* patch to usb_sg_init() is in 2.5.60 */
 	BUG_ON(!urb->transfer_buffer && urb->transfer_buffer_length);
 
-	spin_lock_irqsave(&the_controller->lock, flags);
+	spin_lock_irqsave(&vhci->lock, flags);
 
 	if (urb->status != -EINPROGRESS) {
 		dev_err(dev, "URB already unlinked!, status %d\n", urb->status);
-		spin_unlock_irqrestore(&the_controller->lock, flags);
+		spin_unlock_irqrestore(&vhci->lock, flags);
 		return urb->status;
 	}
 
-	vdev = port_to_vdev(urb->dev->portnum-1);
-
 	/* refuse enqueue for dead connection */
 	spin_lock(&vdev->ud.lock);
 	if (vdev->ud.status == VDEV_ST_NULL ||
 	    vdev->ud.status == VDEV_ST_ERROR) {
 		dev_err(dev, "enqueue for inactive port %d\n", vdev->rhport);
 		spin_unlock(&vdev->ud.lock);
-		spin_unlock_irqrestore(&the_controller->lock, flags);
+		spin_unlock_irqrestore(&vhci->lock, flags);
 		return -ENODEV;
 	}
 	spin_unlock(&vdev->ud.lock);
@@ -565,17 +600,16 @@
 
 out:
 	vhci_tx_urb(urb);
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 
 	return 0;
 
 no_need_xmit:
 	usb_hcd_unlink_urb_from_ep(hcd, urb);
 no_need_unlink:
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 	if (!ret)
-		usb_hcd_giveback_urb(vhci_to_hcd(the_controller),
-				     urb, urb->status);
+		usb_hcd_giveback_urb(hcd, urb, urb->status);
 	return ret;
 }
 
@@ -627,19 +661,20 @@
  */
 static int vhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
 {
+	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
 	struct vhci_priv *priv;
 	struct vhci_device *vdev;
 	unsigned long flags;
 
 	pr_info("dequeue a urb %p\n", urb);
 
-	spin_lock_irqsave(&the_controller->lock, flags);
+	spin_lock_irqsave(&vhci->lock, flags);
 
 	priv = urb->hcpriv;
 	if (!priv) {
 		/* URB was never linked! or will be soon given back by
 		 * vhci_rx. */
-		spin_unlock_irqrestore(&the_controller->lock, flags);
+		spin_unlock_irqrestore(&vhci->lock, flags);
 		return -EIDRM;
 	}
 
@@ -648,7 +683,7 @@
 
 		ret = usb_hcd_check_unlink_urb(hcd, urb, status);
 		if (ret) {
-			spin_unlock_irqrestore(&the_controller->lock, flags);
+			spin_unlock_irqrestore(&vhci->lock, flags);
 			return ret;
 		}
 	}
@@ -676,10 +711,9 @@
 
 		usb_hcd_unlink_urb_from_ep(hcd, urb);
 
-		spin_unlock_irqrestore(&the_controller->lock, flags);
-		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
-				     urb->status);
-		spin_lock_irqsave(&the_controller->lock, flags);
+		spin_unlock_irqrestore(&vhci->lock, flags);
+		usb_hcd_giveback_urb(vhci_to_hcd(vhci), urb, urb->status);
+		spin_lock_irqsave(&vhci->lock, flags);
 
 	} else {
 		/* tcp connection is alive */
@@ -691,12 +725,12 @@
 		unlink = kzalloc(sizeof(struct vhci_unlink), GFP_ATOMIC);
 		if (!unlink) {
 			spin_unlock(&vdev->priv_lock);
-			spin_unlock_irqrestore(&the_controller->lock, flags);
+			spin_unlock_irqrestore(&vhci->lock, flags);
 			usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_MALLOC);
 			return -ENOMEM;
 		}
 
-		unlink->seqnum = atomic_inc_return(&the_controller->seqnum);
+		unlink->seqnum = atomic_inc_return(&vhci->seqnum);
 		if (unlink->seqnum == 0xffff)
 			pr_info("seqnum max\n");
 
@@ -712,7 +746,7 @@
 		spin_unlock(&vdev->priv_lock);
 	}
 
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 
 	usbip_dbg_vhci_hc("leave\n");
 	return 0;
@@ -720,10 +754,12 @@
 
 static void vhci_device_unlink_cleanup(struct vhci_device *vdev)
 {
+	struct vhci_hcd *vhci = vdev_to_vhci(vdev);
+	struct usb_hcd *hcd = vhci_to_hcd(vhci);
 	struct vhci_unlink *unlink, *tmp;
 	unsigned long flags;
 
-	spin_lock_irqsave(&the_controller->lock, flags);
+	spin_lock_irqsave(&vhci->lock, flags);
 	spin_lock(&vdev->priv_lock);
 
 	list_for_each_entry_safe(unlink, tmp, &vdev->unlink_tx, list) {
@@ -752,24 +788,23 @@
 
 		urb->status = -ENODEV;
 
-		usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
+		usb_hcd_unlink_urb_from_ep(hcd, urb);
 
 		list_del(&unlink->list);
 
 		spin_unlock(&vdev->priv_lock);
-		spin_unlock_irqrestore(&the_controller->lock, flags);
+		spin_unlock_irqrestore(&vhci->lock, flags);
 
-		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
-				     urb->status);
+		usb_hcd_giveback_urb(hcd, urb, urb->status);
 
-		spin_lock_irqsave(&the_controller->lock, flags);
+		spin_lock_irqsave(&vhci->lock, flags);
 		spin_lock(&vdev->priv_lock);
 
 		kfree(unlink);
 	}
 
 	spin_unlock(&vdev->priv_lock);
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 }
 
 /*
@@ -827,7 +862,7 @@
 	 * is actually given back by vhci_rx after receiving its return pdu.
 	 *
 	 */
-	rh_port_disconnect(vdev->rhport);
+	rh_port_disconnect(vdev);
 
 	pr_info("disconnect device\n");
 }
@@ -866,7 +901,7 @@
 
 static void vhci_device_init(struct vhci_device *vdev)
 {
-	memset(vdev, 0, sizeof(*vdev));
+	memset(vdev, 0, sizeof(struct vhci_device));
 
 	vdev->ud.side   = USBIP_VHCI;
 	vdev->ud.status = VDEV_ST_NULL;
@@ -887,17 +922,34 @@
 	usbip_start_eh(&vdev->ud);
 }
 
+static int hcd_name_to_id(const char *name)
+{
+	char *c;
+	long val;
+	int ret;
+
+	c = strchr(name, '.');
+	if (c == NULL)
+		return 0;
+
+	ret = kstrtol(c+1, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	return val;
+}
+
 static int vhci_start(struct usb_hcd *hcd)
 {
 	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
-	int rhport;
+	int id, rhport;
 	int err = 0;
 
 	usbip_dbg_vhci_hc("enter vhci_start\n");
 
 	/* initialize private data of usb_hcd */
 
-	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
+	for (rhport = 0; rhport < VHCI_HC_PORTS; rhport++) {
 		struct vhci_device *vdev = &vhci->vdev[rhport];
 
 		vhci_device_init(vdev);
@@ -910,11 +962,26 @@
 	hcd->power_budget = 0; /* no limit */
 	hcd->uses_new_polling = 1;
 
+	id = hcd_name_to_id(hcd_name(hcd));
+	if (id < 0) {
+		pr_err("invalid vhci name %s\n", hcd_name(hcd));
+		return -EINVAL;
+	}
+
 	/* vhci_hcd is now ready to be controlled through sysfs */
-	err = sysfs_create_group(&vhci_dev(vhci)->kobj, &dev_attr_group);
-	if (err) {
-		pr_err("create sysfs files\n");
-		return err;
+	if (id == 0) {
+		err = vhci_init_attr_group();
+		if (err) {
+			pr_err("init attr group\n");
+			return err;
+		}
+		err = sysfs_create_group(&hcd_dev(hcd)->kobj, &vhci_attr_group);
+		if (err) {
+			pr_err("create sysfs files\n");
+			vhci_finish_attr_group();
+			return err;
+		}
+		pr_info("created sysfs %s\n", hcd_name(hcd));
 	}
 
 	return 0;
@@ -923,15 +990,19 @@
 static void vhci_stop(struct usb_hcd *hcd)
 {
 	struct vhci_hcd *vhci = hcd_to_vhci(hcd);
-	int rhport = 0;
+	int id, rhport;
 
 	usbip_dbg_vhci_hc("stop VHCI controller\n");
 
 	/* 1. remove the userland interface of vhci_hcd */
-	sysfs_remove_group(&vhci_dev(vhci)->kobj, &dev_attr_group);
+	id = hcd_name_to_id(hcd_name(hcd));
+	if (id == 0) {
+		sysfs_remove_group(&hcd_dev(hcd)->kobj, &vhci_attr_group);
+		vhci_finish_attr_group();
+	}
 
 	/* 2. shutdown all the ports of vhci_hcd */
-	for (rhport = 0; rhport < VHCI_NPORTS; rhport++) {
+	for (rhport = 0; rhport < VHCI_HC_PORTS; rhport++) {
 		struct vhci_device *vdev = &vhci->vdev[rhport];
 
 		usbip_event_add(&vdev->ud, VDEV_EVENT_REMOVED);
@@ -1025,9 +1096,6 @@
 	}
 	hcd->has_tt = 1;
 
-	/* this is private data for vhci_hcd */
-	the_controller = hcd_to_vhci(hcd);
-
 	/*
 	 * Finish generic HCD structure initialization and register.
 	 * Call the driver's reset() and start() routines.
@@ -1036,7 +1104,6 @@
 	if (ret != 0) {
 		pr_err("usb_add_hcd failed %d\n", ret);
 		usb_put_hcd(hcd);
-		the_controller = NULL;
 		return ret;
 	}
 
@@ -1059,7 +1126,6 @@
 	 */
 	usb_remove_hcd(hcd);
 	usb_put_hcd(hcd);
-	the_controller = NULL;
 
 	return 0;
 }
@@ -1070,21 +1136,24 @@
 static int vhci_hcd_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct usb_hcd *hcd;
-	int rhport = 0;
+	struct vhci_hcd *vhci;
+	int rhport;
 	int connected = 0;
 	int ret = 0;
 	unsigned long flags;
 
 	hcd = platform_get_drvdata(pdev);
+	if (!hcd)
+		return 0;
+	vhci = hcd_to_vhci(hcd);
 
-	spin_lock_irqsave(&the_controller->lock, flags);
+	spin_lock_irqsave(&vhci->lock, flags);
 
-	for (rhport = 0; rhport < VHCI_NPORTS; rhport++)
-		if (the_controller->port_status[rhport] &
-		    USB_PORT_STAT_CONNECTION)
+	for (rhport = 0; rhport < VHCI_HC_PORTS; rhport++)
+		if (vhci->port_status[rhport] & USB_PORT_STAT_CONNECTION)
 			connected += 1;
 
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 
 	if (connected > 0) {
 		dev_info(&pdev->dev,
@@ -1106,6 +1175,8 @@
 	dev_dbg(&pdev->dev, "%s\n", __func__);
 
 	hcd = platform_get_drvdata(pdev);
+	if (!hcd)
+		return 0;
 	set_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
 	usb_hcd_poll_rh_status(hcd);
 
@@ -1129,52 +1200,78 @@
 	},
 };
 
-/*
- * The VHCI 'device' is 'virtual'; not a real plug&play hardware.
- * We need to add this virtual device as a platform device arbitrarily:
- *	1. platform_device_register()
- */
-static void the_pdev_release(struct device *dev)
+static int add_platform_device(int id)
 {
+	struct platform_device *pdev;
+	int dev_nr;
+
+	if (id == 0)
+		dev_nr = -1;
+	else
+		dev_nr = id;
+
+	pdev = platform_device_register_simple(driver_name, dev_nr, NULL, 0);
+	if (IS_ERR(pdev))
+		return PTR_ERR(pdev);
+
+	*(vhci_pdevs + id) = pdev;
+	return 0;
 }
 
-static struct platform_device the_pdev = {
-	/* should be the same name as driver_name */
-	.name = driver_name,
-	.id = -1,
-	.dev = {
-		.release = the_pdev_release,
-	},
-};
+static void del_platform_devices(void)
+{
+	struct platform_device *pdev;
+	int i;
+
+	for (i = 0; i < vhci_num_controllers; i++) {
+		pdev = *(vhci_pdevs + i);
+		if (pdev != NULL)
+			platform_device_unregister(pdev);
+		*(vhci_pdevs + i) = NULL;
+	}
+	sysfs_remove_link(&platform_bus.kobj, driver_name);
+}
 
 static int __init vhci_hcd_init(void)
 {
-	int ret;
+	int i, ret;
 
 	if (usb_disabled())
 		return -ENODEV;
 
+	if (vhci_num_controllers < 1)
+		vhci_num_controllers = 1;
+
+	vhci_pdevs = kcalloc(vhci_num_controllers, sizeof(void *), GFP_KERNEL);
+	if (vhci_pdevs == NULL)
+		return -ENOMEM;
+
 	ret = platform_driver_register(&vhci_driver);
 	if (ret)
 		goto err_driver_register;
 
-	ret = platform_device_register(&the_pdev);
-	if (ret)
-		goto err_platform_device_register;
+	for (i = 0; i < vhci_num_controllers; i++) {
+		ret = add_platform_device(i);
+		if (ret)
+			goto err_platform_device_register;
+	}
 
 	pr_info(DRIVER_DESC " v" USBIP_VERSION "\n");
 	return ret;
 
 err_platform_device_register:
+	del_platform_devices();
 	platform_driver_unregister(&vhci_driver);
 err_driver_register:
+	kfree(vhci_pdevs);
 	return ret;
 }
 
 static void __exit vhci_hcd_exit(void)
 {
-	platform_device_unregister(&the_pdev);
+	del_platform_devices();
 	platform_driver_unregister(&vhci_driver);
+	kfree(vhci_pdevs);
 }
 
 module_init(vhci_hcd_init);
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
index d656e0e..fc2d319 100644
--- a/drivers/usb/usbip/vhci_rx.c
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -70,6 +70,7 @@
 static void vhci_recv_ret_submit(struct vhci_device *vdev,
 				 struct usbip_header *pdu)
 {
+	struct vhci_hcd *vhci = vdev_to_vhci(vdev);
 	struct usbip_device *ud = &vdev->ud;
 	struct urb *urb;
 	unsigned long flags;
@@ -81,7 +82,7 @@
 	if (!urb) {
 		pr_err("cannot find a urb of seqnum %u\n", pdu->base.seqnum);
 		pr_info("max seqnum %d\n",
-			atomic_read(&the_controller->seqnum));
+			atomic_read(&vhci->seqnum));
 		usbip_event_add(ud, VDEV_EVENT_ERROR_TCP);
 		return;
 	}
@@ -105,11 +106,11 @@
 
 	usbip_dbg_vhci_rx("now giveback urb %p\n", urb);
 
-	spin_lock_irqsave(&the_controller->lock, flags);
-	usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_lock_irqsave(&vhci->lock, flags);
+	usb_hcd_unlink_urb_from_ep(vhci_to_hcd(vhci), urb);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 
-	usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb, urb->status);
+	usb_hcd_giveback_urb(vhci_to_hcd(vhci), urb, urb->status);
 
 	usbip_dbg_vhci_rx("Leave\n");
 }
@@ -142,6 +143,7 @@
 static void vhci_recv_ret_unlink(struct vhci_device *vdev,
 				 struct usbip_header *pdu)
 {
+	struct vhci_hcd *vhci = vdev_to_vhci(vdev);
 	struct vhci_unlink *unlink;
 	struct urb *urb;
 	unsigned long flags;
@@ -174,12 +176,11 @@
 		urb->status = pdu->u.ret_unlink.status;
 		pr_info("urb->status %d\n", urb->status);
 
-		spin_lock_irqsave(&the_controller->lock, flags);
-		usb_hcd_unlink_urb_from_ep(vhci_to_hcd(the_controller), urb);
-		spin_unlock_irqrestore(&the_controller->lock, flags);
+		spin_lock_irqsave(&vhci->lock, flags);
+		usb_hcd_unlink_urb_from_ep(vhci_to_hcd(vhci), urb);
+		spin_unlock_irqrestore(&vhci->lock, flags);
 
-		usb_hcd_giveback_urb(vhci_to_hcd(the_controller), urb,
-				     urb->status);
+		usb_hcd_giveback_urb(vhci_to_hcd(vhci), urb, urb->status);
 	}
 
 	kfree(unlink);
diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c
index 5b5462e..c404017 100644
--- a/drivers/usb/usbip/vhci_sysfs.c
+++ b/drivers/usb/usbip/vhci_sysfs.c
@@ -1,5 +1,6 @@
 /*
  * Copyright (C) 2003-2008 Takahiro Hirofuchi
+ * Copyright (C) 2015-2016 Nobuo Iwata
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -20,6 +21,8 @@
 #include <linux/kthread.h>
 #include <linux/file.h>
 #include <linux/net.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
 
 #include "usbip_common.h"
 #include "vhci.h"
@@ -27,106 +30,190 @@
 /* TODO: refine locking ?*/
 
 /* Sysfs entry to show port status */
-static ssize_t status_show(struct device *dev, struct device_attribute *attr,
-			   char *out)
+static ssize_t status_show_vhci(int pdev_nr, char *out)
 {
+	struct platform_device *pdev = *(vhci_pdevs + pdev_nr);
+	struct vhci_hcd *vhci;
 	char *s = out;
 	int i = 0;
 	unsigned long flags;
 
-	BUG_ON(!the_controller || !out);
+	if (!pdev || !out) {
+		usbip_dbg_vhci_sysfs("show status error\n");
+		return 0;
+	}
 
-	spin_lock_irqsave(&the_controller->lock, flags);
+	vhci = hcd_to_vhci(platform_get_drvdata(pdev));
+
+	spin_lock_irqsave(&vhci->lock, flags);
 
 	/*
 	 * output example:
-	 * prt sta spd dev socket           local_busid
-	 * 000 004 000 000         c5a7bb80 1-2.3
-	 * 001 004 000 000         d8cee980 2-3.4
+	 * port sta spd dev      socket           local_busid
+	 * 0000 004 000 00000000         c5a7bb80 1-2.3
+	 * 0001 004 000 00000000         d8cee980 2-3.4
 	 *
 	 * IP address can be retrieved from a socket pointer address by looking
 	 * up /proc/net/{tcp,tcp6}. Also, a userland program may remember a
 	 * port number and its peer IP address.
 	 */
-	out += sprintf(out,
-		       "prt sta spd bus dev socket           local_busid\n");
-
-	for (i = 0; i < VHCI_NPORTS; i++) {
-		struct vhci_device *vdev = port_to_vdev(i);
+	for (i = 0; i < VHCI_HC_PORTS; i++) {
+		struct vhci_device *vdev = &vhci->vdev[i];
 
 		spin_lock(&vdev->ud.lock);
-		out += sprintf(out, "%03u %03u ", i, vdev->ud.status);
+		out += sprintf(out, "%04u %03u ",
+				    (pdev_nr * VHCI_HC_PORTS) + i,
+				    vdev->ud.status);
 
 		if (vdev->ud.status == VDEV_ST_USED) {
 			out += sprintf(out, "%03u %08x ",
-				       vdev->speed, vdev->devid);
-			out += sprintf(out, "%16p ", vdev->ud.tcp_socket);
-			out += sprintf(out, "%s", dev_name(&vdev->udev->dev));
+					    vdev->speed, vdev->devid);
+			out += sprintf(out, "%16p %s",
+					    vdev->ud.tcp_socket,
+					    dev_name(&vdev->udev->dev));
 
 		} else {
-			out += sprintf(out, "000 000 000 0000000000000000 0-0");
+			out += sprintf(out, "000 00000000 ");
+			out += sprintf(out, "0000000000000000 0-0");
 		}
 
 		out += sprintf(out, "\n");
 		spin_unlock(&vdev->ud.lock);
 	}
 
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 
 	return out - s;
 }
-static DEVICE_ATTR_RO(status);
+
+static ssize_t status_show_not_ready(int pdev_nr, char *out)
+{
+	char *s = out;
+	int i = 0;
+
+	for (i = 0; i < VHCI_HC_PORTS; i++) {
+		out += sprintf(out, "%04u %03u ",
+				    (pdev_nr * VHCI_HC_PORTS) + i,
+				    VDEV_ST_NOTASSIGNED);
+		out += sprintf(out, "000 00000000 0000000000000000 0-0");
+		out += sprintf(out, "\n");
+	}
+	return out - s;
+}
+
+static int status_name_to_id(const char *name)
+{
+	char *c;
+	long val;
+	int ret;
+
+	c = strchr(name, '.');
+	if (c == NULL)
+		return 0;
+
+	ret = kstrtol(c+1, 10, &val);
+	if (ret < 0)
+		return ret;
+
+	return val;
+}
+
+static ssize_t status_show(struct device *dev,
+			   struct device_attribute *attr, char *out)
+{
+	char *s = out;
+	int pdev_nr;
+
+	out += sprintf(out,
+		       "port sta spd dev      socket           local_busid\n");
+
+	pdev_nr = status_name_to_id(attr->attr.name);
+	if (pdev_nr < 0)
+		out += status_show_not_ready(pdev_nr, out);
+	else
+		out += status_show_vhci(pdev_nr, out);
+
+	return out - s;
+}
+
+static ssize_t nports_show(struct device *dev, struct device_attribute *attr,
+			   char *out)
+{
+	char *s = out;
+
+	out += sprintf(out, "%d\n", VHCI_HC_PORTS * vhci_num_controllers);
+	return out - s;
+}
+static DEVICE_ATTR_RO(nports);
 
 /* Sysfs entry to shutdown a virtual connection */
-static int vhci_port_disconnect(__u32 rhport)
+static int vhci_port_disconnect(struct vhci_hcd *vhci, __u32 rhport)
 {
-	struct vhci_device *vdev;
+	struct vhci_device *vdev = &vhci->vdev[rhport];
 	unsigned long flags;
 
 	usbip_dbg_vhci_sysfs("enter\n");
 
 	/* lock */
-	spin_lock_irqsave(&the_controller->lock, flags);
-
-	vdev = port_to_vdev(rhport);
-
+	spin_lock_irqsave(&vhci->lock, flags);
 	spin_lock(&vdev->ud.lock);
+
 	if (vdev->ud.status == VDEV_ST_NULL) {
 		pr_err("not connected %d\n", vdev->ud.status);
 
 		/* unlock */
 		spin_unlock(&vdev->ud.lock);
-		spin_unlock_irqrestore(&the_controller->lock, flags);
+		spin_unlock_irqrestore(&vhci->lock, flags);
 
 		return -EINVAL;
 	}
 
 	/* unlock */
 	spin_unlock(&vdev->ud.lock);
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 
 	usbip_event_add(&vdev->ud, VDEV_EVENT_DOWN);
 
 	return 0;
 }
 
+static int valid_port(__u32 pdev_nr, __u32 rhport)
+{
+	if (pdev_nr >= vhci_num_controllers) {
+		pr_err("pdev %u\n", pdev_nr);
+		return 0;
+	}
+	if (rhport >= VHCI_HC_PORTS) {
+		pr_err("rhport %u\n", rhport);
+		return 0;
+	}
+	return 1;
+}
+
 static ssize_t store_detach(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
-	int err;
-	__u32 rhport = 0;
+	__u32 port = 0, pdev_nr = 0, rhport = 0;
+	struct usb_hcd *hcd;
+	int ret;
 
-	if (sscanf(buf, "%u", &rhport) != 1)
+	if (kstrtoint(buf, 10, &port) < 0)
 		return -EINVAL;
 
-	/* check rhport */
-	if (rhport >= VHCI_NPORTS) {
-		dev_err(dev, "invalid port %u\n", rhport);
+	pdev_nr = port_to_pdev_nr(port);
+	rhport = port_to_rhport(port);
+
+	if (!valid_port(pdev_nr, rhport))
 		return -EINVAL;
+
+	hcd = platform_get_drvdata(*(vhci_pdevs + pdev_nr));
+	if (hcd == NULL) {
+		dev_err(dev, "port is not ready %u\n", port);
+		return -EAGAIN;
 	}
 
-	err = vhci_port_disconnect(rhport);
-	if (err < 0)
+	ret = vhci_port_disconnect(hcd_to_vhci(hcd), rhport);
+	if (ret < 0)
 		return -EINVAL;
 
 	usbip_dbg_vhci_sysfs("Leave\n");
@@ -135,16 +222,12 @@
 }
 static DEVICE_ATTR(detach, S_IWUSR, NULL, store_detach);
 
-/* Sysfs entry to establish a virtual connection */
-static int valid_args(__u32 rhport, enum usb_device_speed speed)
+static int valid_args(__u32 pdev_nr, __u32 rhport, enum usb_device_speed speed)
 {
-	/* check rhport */
-	if (rhport >= VHCI_NPORTS) {
-		pr_err("port %u\n", rhport);
-		return -EINVAL;
+	if (!valid_port(pdev_nr, rhport)) {
+		return 0;
 	}
 
-	/* check speed */
 	switch (speed) {
 	case USB_SPEED_LOW:
 	case USB_SPEED_FULL:
@@ -154,12 +237,13 @@
 	default:
 		pr_err("Failed attach request for unsupported USB speed: %s\n",
 			usb_speed_string(speed));
-		return -EINVAL;
+		return 0;
 	}
 
-	return 0;
+	return 1;
 }
 
+/* Sysfs entry to establish a virtual connection */
 /*
  * To start a new USB/IP attachment, a userland program needs to setup a TCP
  * connection and then write its socket descriptor with remote device
@@ -174,10 +258,12 @@
 static ssize_t store_attach(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
-	struct vhci_device *vdev;
 	struct socket *socket;
 	int sockfd = 0;
-	__u32 rhport = 0, devid = 0, speed = 0;
+	__u32 port = 0, pdev_nr = 0, rhport = 0, devid = 0, speed = 0;
+	struct usb_hcd *hcd;
+	struct vhci_hcd *vhci;
+	struct vhci_device *vdev;
 	int err;
 	unsigned long flags;
 
@@ -187,16 +273,28 @@
 	 * @devid: unique device identifier in a remote host
 	 * @speed: usb device speed in a remote host
 	 */
-	if (sscanf(buf, "%u %u %u %u", &rhport, &sockfd, &devid, &speed) != 4)
+	if (sscanf(buf, "%u %u %u %u", &port, &sockfd, &devid, &speed) != 4)
 		return -EINVAL;
+	pdev_nr = port_to_pdev_nr(port);
+	rhport = port_to_rhport(port);
 
-	usbip_dbg_vhci_sysfs("rhport(%u) sockfd(%u) devid(%u) speed(%u)\n",
-			     rhport, sockfd, devid, speed);
+	usbip_dbg_vhci_sysfs("port(%u) pdev(%d) rhport(%u)\n",
+			     port, pdev_nr, rhport);
+	usbip_dbg_vhci_sysfs("sockfd(%u) devid(%u) speed(%u)\n",
+			     sockfd, devid, speed);
 
 	/* check received parameters */
-	if (valid_args(rhport, speed) < 0)
+	if (!valid_args(pdev_nr, rhport, speed))
 		return -EINVAL;
 
+	hcd = platform_get_drvdata(*(vhci_pdevs + pdev_nr));
+	if (hcd == NULL) {
+		dev_err(dev, "port %d is not ready\n", port);
+		return -EAGAIN;
+	}
+	vhci = hcd_to_vhci(hcd);
+	vdev = &vhci->vdev[rhport];
+
 	/* Extract socket from fd. */
 	socket = sockfd_lookup(sockfd, &err);
 	if (!socket)
@@ -205,14 +303,13 @@
 	/* now need lock until setting vdev status as used */
 
 	/* begin a lock */
-	spin_lock_irqsave(&the_controller->lock, flags);
-	vdev = port_to_vdev(rhport);
+	spin_lock_irqsave(&vhci->lock, flags);
 	spin_lock(&vdev->ud.lock);
 
 	if (vdev->ud.status != VDEV_ST_NULL) {
 		/* end of the lock */
 		spin_unlock(&vdev->ud.lock);
-		spin_unlock_irqrestore(&the_controller->lock, flags);
+		spin_unlock_irqrestore(&vhci->lock, flags);
 
 		sockfd_put(socket);
 
@@ -220,9 +317,10 @@
 		return -EINVAL;
 	}
 
-	dev_info(dev,
-		 "rhport(%u) sockfd(%d) devid(%u) speed(%u) speed_str(%s)\n",
-		 rhport, sockfd, devid, speed, usb_speed_string(speed));
+	dev_info(dev, "pdev(%u) rhport(%u) sockfd(%d)\n",
+		 pdev_nr, rhport, sockfd);
+	dev_info(dev, "devid(%u) speed(%u) speed_str(%s)\n",
+		 devid, speed, usb_speed_string(speed));
 
 	vdev->devid         = devid;
 	vdev->speed         = speed;
@@ -230,26 +328,92 @@
 	vdev->ud.status     = VDEV_ST_NOTASSIGNED;
 
 	spin_unlock(&vdev->ud.lock);
-	spin_unlock_irqrestore(&the_controller->lock, flags);
+	spin_unlock_irqrestore(&vhci->lock, flags);
 	/* end the lock */
 
 	vdev->ud.tcp_rx = kthread_get_run(vhci_rx_loop, &vdev->ud, "vhci_rx");
 	vdev->ud.tcp_tx = kthread_get_run(vhci_tx_loop, &vdev->ud, "vhci_tx");
 
-	rh_port_connect(rhport, speed);
+	rh_port_connect(vdev, speed);
 
 	return count;
 }
 static DEVICE_ATTR(attach, S_IWUSR, NULL, store_attach);
 
-static struct attribute *dev_attrs[] = {
-	&dev_attr_status.attr,
-	&dev_attr_detach.attr,
-	&dev_attr_attach.attr,
-	&dev_attr_usbip_debug.attr,
-	NULL,
+#define MAX_STATUS_NAME 16
+
+struct status_attr {
+	struct device_attribute attr;
+	char name[MAX_STATUS_NAME+1];
 };
 
-const struct attribute_group dev_attr_group = {
-	.attrs = dev_attrs,
+static struct status_attr *status_attrs;
+
+static void set_status_attr(int id)
+{
+	struct status_attr *status;
+
+	status = status_attrs + id;
+	if (id == 0)
+		strcpy(status->name, "status");
+	else
+		snprintf(status->name, MAX_STATUS_NAME+1, "status.%d", id);
+	status->attr.attr.name = status->name;
+	status->attr.attr.mode = S_IRUGO;
+	status->attr.show = status_show;
+}
+
+static int init_status_attrs(void)
+{
+	int id;
+
+	status_attrs = kcalloc(vhci_num_controllers, sizeof(struct status_attr),
+			       GFP_KERNEL);
+	if (status_attrs == NULL)
+		return -ENOMEM;
+
+	for (id = 0; id < vhci_num_controllers; id++)
+		set_status_attr(id);
+
+	return 0;
+}
+
+static void finish_status_attrs(void)
+{
+	kfree(status_attrs);
+}
+
+struct attribute_group vhci_attr_group = {
+	.attrs = NULL,
 };
+
+int vhci_init_attr_group(void)
+{
+	struct attribute **attrs;
+	int ret, i;
+
+	attrs = kcalloc((vhci_num_controllers + 5), sizeof(struct attribute *),
+			GFP_KERNEL);
+	if (attrs == NULL)
+		return -ENOMEM;
+
+	ret = init_status_attrs();
+	if (ret) {
+		kfree(attrs);
+		return ret;
+	}
+	*attrs = &dev_attr_nports.attr;
+	*(attrs + 1) = &dev_attr_detach.attr;
+	*(attrs + 2) = &dev_attr_attach.attr;
+	*(attrs + 3) = &dev_attr_usbip_debug.attr;
+	for (i = 0; i < vhci_num_controllers; i++)
+		*(attrs + i + 4) = &((status_attrs + i)->attr.attr);
+	vhci_attr_group.attrs = attrs;
+	return 0;
+}
+
+void vhci_finish_attr_group(void)
+{
+	finish_status_attrs();
+	kfree(vhci_attr_group.attrs);
+}
diff --git a/drivers/usb/usbip/vudc_dev.c b/drivers/usb/usbip/vudc_dev.c
index 8994a13..7091848 100644
--- a/drivers/usb/usbip/vudc_dev.c
+++ b/drivers/usb/usbip/vudc_dev.c
@@ -450,7 +450,7 @@
 	if (ud->tcp_socket)
 		kernel_sock_shutdown(ud->tcp_socket, SHUT_RDWR);
 
-	if (ud->tcp_tx) {
+	if (ud->tcp_rx) {
 		kthread_stop_put(ud->tcp_rx);
 		ud->tcp_rx = NULL;
 	}
diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
index 344bd94..e429b59 100644
--- a/drivers/usb/usbip/vudc_rx.c
+++ b/drivers/usb/usbip/vudc_rx.c
@@ -142,7 +142,7 @@
 	urb_p->urb->status = -EINPROGRESS;
 
 	/* FIXME: more pipe setup to please usbip_common */
-	urb_p->urb->pipe &= ~(11 << 30);
+	urb_p->urb->pipe &= ~(3 << 30);
 	switch (urb_p->ep->type) {
 	case USB_ENDPOINT_XFER_BULK:
 		urb_p->urb->pipe |= (PIPE_BULK << 30);
diff --git a/drivers/usb/wusbcore/cbaf.c b/drivers/usb/wusbcore/cbaf.c
index da1b872..fb70cbef 100644
--- a/drivers/usb/wusbcore/cbaf.c
+++ b/drivers/usb/wusbcore/cbaf.c
@@ -610,8 +610,7 @@
 	cbaf->usb_iface = usb_get_intf(iface);
 	result = cbaf_check(cbaf);
 	if (result < 0) {
-		dev_err(dev, "This device is not WUSB-CBAF compliant"
-			"and is not supported yet.\n");
+		dev_err(dev, "This device is not WUSB-CBAF compliant and is not supported yet.\n");
 		goto error_check;
 	}
 
diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c
index 33acd15..79b2b62 100644
--- a/drivers/usb/wusbcore/crypto.c
+++ b/drivers/usb/wusbcore/crypto.c
@@ -229,10 +229,8 @@
 		zero_padding = sizeof(struct aes_ccm_block) - zero_padding;
 	dst_size = blen + sizeof(b0) + sizeof(b1) + zero_padding;
 	dst_buf = kzalloc(dst_size, GFP_KERNEL);
-	if (dst_buf == NULL) {
-		printk(KERN_ERR "E: can't alloc destination buffer\n");
+	if (!dst_buf)
 		goto error_dst_buf;
-	}
 
 	memset(iv, 0, sizeof(iv));
 
diff --git a/drivers/usb/wusbcore/security.c b/drivers/usb/wusbcore/security.c
index b66faaf..8c9421b 100644
--- a/drivers/usb/wusbcore/security.c
+++ b/drivers/usb/wusbcore/security.c
@@ -374,10 +374,8 @@
 	struct wusb_keydvt_out keydvt_out;
 
 	hs = kcalloc(3, sizeof(hs[0]), GFP_KERNEL);
-	if (hs == NULL) {
-		dev_err(dev, "can't allocate handshake data\n");
+	if (!hs)
 		goto error_kzalloc;
-	}
 
 	/* We need to turn encryption before beginning the 4way
 	 * hshake (WUSB1.0[.3.2.2]) */
diff --git a/drivers/usb/wusbcore/wa-nep.c b/drivers/usb/wusbcore/wa-nep.c
index 60a10d2..ed46222 100644
--- a/drivers/usb/wusbcore/wa-nep.c
+++ b/drivers/usb/wusbcore/wa-nep.c
@@ -271,16 +271,11 @@
 	epd = &iface->cur_altsetting->endpoint[0].desc;
 	wa->nep_buffer_size = 1024;
 	wa->nep_buffer = kmalloc(wa->nep_buffer_size, GFP_KERNEL);
-	if (wa->nep_buffer == NULL) {
-		dev_err(dev,
-			"Unable to allocate notification's read buffer\n");
+	if (!wa->nep_buffer)
 		goto error_nep_buffer;
-	}
 	wa->nep_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (wa->nep_urb == NULL) {
-		dev_err(dev, "Unable to allocate notification URB\n");
+	if (wa->nep_urb == NULL)
 		goto error_urb_alloc;
-	}
 	usb_fill_int_urb(wa->nep_urb, usb_dev,
 			 usb_rcvintpipe(usb_dev, epd->bEndpointAddress),
 			 wa->nep_buffer, wa->nep_buffer_size,
diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c
index 69af4fd..167fcc71 100644
--- a/drivers/usb/wusbcore/wa-xfer.c
+++ b/drivers/usb/wusbcore/wa-xfer.c
@@ -2865,10 +2865,8 @@
 		goto out;
 
 	wa->dti_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (wa->dti_urb == NULL) {
-		dev_err(dev, "Can't allocate DTI URB\n");
+	if (wa->dti_urb == NULL)
 		goto error_dti_urb_alloc;
-	}
 	usb_fill_bulk_urb(
 		wa->dti_urb, wa->usb_dev,
 		usb_rcvbulkpipe(wa->usb_dev, 0x80 | dti_epd->bEndpointAddress),
diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 0257f35..0aa6c3c 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -701,10 +701,8 @@
 		goto error_rd_buffer;
 	}
 	hwarc->neep_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (hwarc->neep_urb == NULL) {
-		dev_err(dev, "Unable to allocate notification URB\n");
+	if (hwarc->neep_urb == NULL)
 		goto error_urb_alloc;
-	}
 	usb_fill_int_urb(hwarc->neep_urb, usb_dev,
 			 usb_rcvintpipe(usb_dev, epd->bEndpointAddress),
 			 hwarc->rd_buffer, PAGE_SIZE,
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 15ecfc9..152b438 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -564,67 +564,80 @@
 }
 
 static int vfio_pci_set_ctx_trigger_single(struct eventfd_ctx **ctx,
-					   uint32_t flags, void *data)
+					   unsigned int count, uint32_t flags,
+					   void *data)
 {
-	int32_t fd = *(int32_t *)data;
-
-	if (!(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
-		return -EINVAL;
-
 	/* DATA_NONE/DATA_BOOL enables loopback testing */
 	if (flags & VFIO_IRQ_SET_DATA_NONE) {
-		if (*ctx)
-			eventfd_signal(*ctx, 1);
-		return 0;
+		if (*ctx) {
+			if (count) {
+				eventfd_signal(*ctx, 1);
+			} else {
+				eventfd_ctx_put(*ctx);
+				*ctx = NULL;
+			}
+			return 0;
+		}
 	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
-		uint8_t trigger = *(uint8_t *)data;
+		uint8_t trigger;
+
+		if (!count)
+			return -EINVAL;
+
+		trigger = *(uint8_t *)data;
 		if (trigger && *ctx)
 			eventfd_signal(*ctx, 1);
+
+		return 0;
+	} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+		int32_t fd;
+
+		if (!count)
+			return -EINVAL;
+
+		fd = *(int32_t *)data;
+		if (fd == -1) {
+			if (*ctx)
+				eventfd_ctx_put(*ctx);
+			*ctx = NULL;
+		} else if (fd >= 0) {
+			struct eventfd_ctx *efdctx;
+
+			efdctx = eventfd_ctx_fdget(fd);
+			if (IS_ERR(efdctx))
+				return PTR_ERR(efdctx);
+
+			if (*ctx)
+				eventfd_ctx_put(*ctx);
+
+			*ctx = efdctx;
+		}
 		return 0;
 	}
 
-	/* Handle SET_DATA_EVENTFD */
-	if (fd == -1) {
-		if (*ctx)
-			eventfd_ctx_put(*ctx);
-		*ctx = NULL;
-		return 0;
-	} else if (fd >= 0) {
-		struct eventfd_ctx *efdctx;
-		efdctx = eventfd_ctx_fdget(fd);
-		if (IS_ERR(efdctx))
-			return PTR_ERR(efdctx);
-		if (*ctx)
-			eventfd_ctx_put(*ctx);
-		*ctx = efdctx;
-		return 0;
-	} else
-		return -EINVAL;
+	return -EINVAL;
 }
 
 static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
 				    unsigned index, unsigned start,
 				    unsigned count, uint32_t flags, void *data)
 {
-	if (index != VFIO_PCI_ERR_IRQ_INDEX)
+	if (index != VFIO_PCI_ERR_IRQ_INDEX || start != 0 || count > 1)
 		return -EINVAL;
 
-	/*
-	 * We should sanitize start & count, but that wasn't caught
-	 * originally, so this IRQ index must forever ignore them :-(
-	 */
-
-	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger, flags, data);
+	return vfio_pci_set_ctx_trigger_single(&vdev->err_trigger,
+					       count, flags, data);
 }
 
 static int vfio_pci_set_req_trigger(struct vfio_pci_device *vdev,
 				    unsigned index, unsigned start,
 				    unsigned count, uint32_t flags, void *data)
 {
-	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count != 1)
+	if (index != VFIO_PCI_REQ_IRQ_INDEX || start != 0 || count > 1)
 		return -EINVAL;
 
-	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger, flags, data);
+	return vfio_pci_set_ctx_trigger_single(&vdev->req_trigger,
+					       count, flags, data);
 }
 
 int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 9d6320e..6e29d05 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -88,7 +88,7 @@
 	struct scatterlist *tvc_prot_sgl;
 	struct page **tvc_upages;
 	/* Pointer to response header iovec */
-	struct iovec *tvc_resp_iov;
+	struct iovec tvc_resp_iov;
 	/* Pointer to vhost_scsi for our device */
 	struct vhost_scsi *tvc_vhost;
 	/* Pointer to vhost_virtqueue for the cmd */
@@ -547,7 +547,7 @@
 		memcpy(v_rsp.sense, cmd->tvc_sense_buf,
 		       se_cmd->scsi_sense_length);
 
-		iov_iter_init(&iov_iter, READ, cmd->tvc_resp_iov,
+		iov_iter_init(&iov_iter, READ, &cmd->tvc_resp_iov,
 			      cmd->tvc_in_iovs, sizeof(v_rsp));
 		ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter);
 		if (likely(ret == sizeof(v_rsp))) {
@@ -1044,7 +1044,7 @@
 		}
 		cmd->tvc_vhost = vs;
 		cmd->tvc_vq = vq;
-		cmd->tvc_resp_iov = &vq->iov[out];
+		cmd->tvc_resp_iov = vq->iov[out];
 		cmd->tvc_in_iovs = in;
 
 		pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 388eec4..3cc98c0 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -220,20 +220,20 @@
 {
 	void *priv = NULL;
 	long err;
-	struct vhost_memory *memory;
+	struct vhost_umem *umem;
 
 	mutex_lock(&n->dev.mutex);
 	err = vhost_dev_check_owner(&n->dev);
 	if (err)
 		goto done;
-	memory = vhost_dev_reset_owner_prepare();
-	if (!memory) {
+	umem = vhost_dev_reset_owner_prepare();
+	if (!umem) {
 		err = -ENOMEM;
 		goto done;
 	}
 	vhost_test_stop(n, &priv);
 	vhost_test_flush(n);
-	vhost_dev_reset_owner(&n->dev, memory);
+	vhost_dev_reset_owner(&n->dev, umem);
 done:
 	mutex_unlock(&n->dev.mutex);
 	return err;
@@ -322,18 +322,7 @@
 	"vhost-test",
 	&vhost_test_fops,
 };
-
-static int vhost_test_init(void)
-{
-	return misc_register(&vhost_test_misc);
-}
-module_init(vhost_test_init);
-
-static void vhost_test_exit(void)
-{
-	misc_deregister(&vhost_test_misc);
-}
-module_exit(vhost_test_exit);
+module_misc_device(vhost_test_misc);
 
 MODULE_VERSION("0.0.1");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 0ddf3a2..e3b30ea 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -307,6 +307,8 @@
 
 	vhost_disable_notify(&vsock->dev, vq);
 	for (;;) {
+		u32 len;
+
 		if (!vhost_vsock_more_replies(vsock)) {
 			/* Stop tx until the device processes already
 			 * pending replies.  Leave tx virtqueue
@@ -334,13 +336,15 @@
 			continue;
 		}
 
+		len = pkt->len;
+
 		/* Only accept correctly addressed packets */
 		if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
 			virtio_transport_recv_pkt(pkt);
 		else
 			virtio_transport_free_pkt(pkt);
 
-		vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
+		vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
 		added = true;
 	}
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 114a0c8..ed9c9ee 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -167,7 +167,7 @@
  * making all of the arch DMA ops work on the vring device itself
  * is a mess.  For now, we use the parent device for DMA ops.
  */
-struct device *vring_dma_dev(const struct vring_virtqueue *vq)
+static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
 {
 	return vq->vq.vdev->dev.parent;
 }
@@ -327,6 +327,8 @@
 		 * host should service the ring ASAP. */
 		if (out_sgs)
 			vq->notify(&vq->vq);
+		if (indirect)
+			kfree(desc);
 		END_USE(vq);
 		return -ENOSPC;
 	}
@@ -426,6 +428,7 @@
 	if (indirect)
 		kfree(desc);
 
+	END_USE(vq);
 	return -EIO;
 }
 
diff --git a/drivers/vme/bridges/Kconfig b/drivers/vme/bridges/Kconfig
index f6d8545..f6ddc37 100644
--- a/drivers/vme/bridges/Kconfig
+++ b/drivers/vme/bridges/Kconfig
@@ -13,3 +13,11 @@
 	help
 	 If you say Y here you get support for the Tundra TSI148 VME bridge
 	 chip.
+
+config VME_FAKE
+	tristate "Fake"
+	help
+	 If you say Y here you get support for the fake VME bridge. This
+	 provides a virtualised VME Bus for devices with no VME bridge. This
+	 is mainly useful for VME development (in the absence of VME
+	 hardware).
diff --git a/drivers/vme/bridges/Makefile b/drivers/vme/bridges/Makefile
index 59638af..b074542 100644
--- a/drivers/vme/bridges/Makefile
+++ b/drivers/vme/bridges/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_VME_CA91CX42)	+= vme_ca91cx42.o
 obj-$(CONFIG_VME_TSI148)	+= vme_tsi148.o
+obj-$(CONFIG_VME_FAKE)		+= vme_fake.o
diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c
index 9f2c834..6b5ee89 100644
--- a/drivers/vme/bridges/vme_ca91cx42.c
+++ b/drivers/vme/bridges/vme_ca91cx42.c
@@ -47,6 +47,8 @@
 	{ },
 };
 
+MODULE_DEVICE_TABLE(pci, ca91cx42_ids);
+
 static struct pci_driver ca91cx42_driver = {
 	.name = driver_name,
 	.id_table = ca91cx42_ids,
@@ -69,7 +71,7 @@
 	for (i = 0; i < 4; i++) {
 		if (stat & CA91CX42_LINT_LM[i]) {
 			/* We only enable interrupts if the callback is set */
-			bridge->lm_callback[i](i);
+			bridge->lm_callback[i](bridge->lm_data[i]);
 			serviced |= CA91CX42_LINT_LM[i];
 		}
 	}
@@ -1410,7 +1412,7 @@
  * Callback will be passed the monitor triggered.
  */
 static int ca91cx42_lm_attach(struct vme_lm_resource *lm, int monitor,
-	void (*callback)(int))
+	void (*callback)(void *), void *data)
 {
 	u32 lm_ctl, tmp;
 	struct ca91cx42_driver *bridge;
@@ -1438,6 +1440,7 @@
 
 	/* Attach callback */
 	bridge->lm_callback[monitor] = callback;
+	bridge->lm_data[monitor] = data;
 
 	/* Enable Location Monitor interrupt */
 	tmp = ioread32(bridge->base + LINT_EN);
@@ -1477,6 +1480,7 @@
 
 	/* Detach callback */
 	bridge->lm_callback[monitor] = NULL;
+	bridge->lm_data[monitor] = NULL;
 
 	/* If all location monitors disabled, disable global Location Monitor */
 	if ((tmp & (CA91CX42_LINT_LM0 | CA91CX42_LINT_LM1 | CA91CX42_LINT_LM2 |
diff --git a/drivers/vme/bridges/vme_ca91cx42.h b/drivers/vme/bridges/vme_ca91cx42.h
index d54119e..f35c9f5 100644
--- a/drivers/vme/bridges/vme_ca91cx42.h
+++ b/drivers/vme/bridges/vme_ca91cx42.h
@@ -43,7 +43,8 @@
 	wait_queue_head_t dma_queue;
 	wait_queue_head_t iack_queue;
 	wait_queue_head_t mbox_queue;
-	void (*lm_callback[4])(int);	/* Called in interrupt handler */
+	void (*lm_callback[4])(void *);	/* Called in interrupt handler */
+	void *lm_data[4];
 	void *crcsr_kernel;
 	dma_addr_t crcsr_bus;
 	struct mutex vme_rmw;		/* Only one RMW cycle at a time */
diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c
new file mode 100644
index 0000000..30b3acc
--- /dev/null
+++ b/drivers/vme/bridges/vme_fake.c
@@ -0,0 +1,1306 @@
+/*
+ * Fake VME bridge support.
+ *
+ * This drive provides a fake VME bridge chip, this enables debugging of the
+ * VME framework in the absence of a VME system.
+ *
+ * This driver has to do a number of things in software that would be driven
+ * by hardware if it was available, it will also result in extra overhead at
+ * times when compared with driving actual hardware.
+ *
+ * Author: Martyn Welch <martyn@welches.me.uk>
+ * Copyright (c) 2014 Martyn Welch
+ *
+ * Based on vme_tsi148.c:
+ *
+ * Author: Martyn Welch <martyn.welch@ge.com>
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on work by Tom Armistead and Ajit Prem
+ * Copyright 2004 Motorola Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/vme.h>
+
+#include "../vme_bridge.h"
+
+/*
+ *  Define the number of each that the fake driver supports.
+ */
+#define FAKE_MAX_MASTER		8	/* Max Master Windows */
+#define FAKE_MAX_SLAVE		8	/* Max Slave Windows */
+
+/* Structures to hold information normally held in device registers */
+struct fake_slave_window {
+	int enabled;
+	unsigned long long vme_base;
+	unsigned long long size;
+	void *buf_base;
+	u32 aspace;
+	u32 cycle;
+};
+
+struct fake_master_window {
+	int enabled;
+	unsigned long long vme_base;
+	unsigned long long size;
+	u32 aspace;
+	u32 cycle;
+	u32 dwidth;
+};
+
+/* Structure used to hold driver specific information */
+struct fake_driver {
+	struct vme_bridge *parent;
+	struct fake_slave_window slaves[FAKE_MAX_SLAVE];
+	struct fake_master_window masters[FAKE_MAX_MASTER];
+	u32 lm_enabled;
+	unsigned long long lm_base;
+	u32 lm_aspace;
+	u32 lm_cycle;
+	void (*lm_callback[4])(void *);
+	void *lm_data[4];
+	struct tasklet_struct int_tasklet;
+	int int_level;
+	int int_statid;
+	void *crcsr_kernel;
+	dma_addr_t crcsr_bus;
+	/* Only one VME interrupt can be generated at a time, provide locking */
+	struct mutex vme_int;
+};
+
+/* Module parameter */
+static int geoid;
+
+static const char driver_name[] = "vme_fake";
+
+static struct vme_bridge *exit_pointer;
+
+static struct device *vme_root;
+
+/*
+ * Calling VME bus interrupt callback if provided.
+ */
+static void fake_VIRQ_tasklet(unsigned long data)
+{
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *bridge;
+
+	fake_bridge = (struct vme_bridge *) data;
+	bridge = fake_bridge->driver_priv;
+
+	vme_irq_handler(fake_bridge, bridge->int_level, bridge->int_statid);
+}
+
+/*
+ * Configure VME interrupt
+ */
+static void fake_irq_set(struct vme_bridge *fake_bridge, int level,
+		int state, int sync)
+{
+	/* Nothing to do */
+}
+
+static void *fake_pci_to_ptr(dma_addr_t addr)
+{
+	return (void *)(uintptr_t)addr;
+}
+
+static dma_addr_t fake_ptr_to_pci(void *addr)
+{
+	return (dma_addr_t)(uintptr_t)addr;
+}
+
+/*
+ * Generate a VME bus interrupt at the requested level & vector. Wait for
+ * interrupt to be acked.
+ */
+static int fake_irq_generate(struct vme_bridge *fake_bridge, int level,
+		int statid)
+{
+	struct fake_driver *bridge;
+
+	bridge = fake_bridge->driver_priv;
+
+	mutex_lock(&bridge->vme_int);
+
+	bridge->int_level = level;
+
+	bridge->int_statid = statid;
+
+	/*
+	 * Schedule tasklet to run VME handler to emulate normal VME interrupt
+	 * handler behaviour.
+	 */
+	tasklet_schedule(&bridge->int_tasklet);
+
+	mutex_unlock(&bridge->vme_int);
+
+	return 0;
+}
+
+/*
+ * Initialize a slave window with the requested attributes.
+ */
+static int fake_slave_set(struct vme_slave_resource *image, int enabled,
+		unsigned long long vme_base, unsigned long long size,
+		dma_addr_t buf_base, u32 aspace, u32 cycle)
+{
+	unsigned int i, granularity = 0;
+	unsigned long long vme_bound;
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *bridge;
+
+	fake_bridge = image->parent;
+	bridge = fake_bridge->driver_priv;
+
+	i = image->number;
+
+	switch (aspace) {
+	case VME_A16:
+		granularity = 0x10;
+		break;
+	case VME_A24:
+		granularity = 0x1000;
+		break;
+	case VME_A32:
+		granularity = 0x10000;
+		break;
+	case VME_A64:
+		granularity = 0x10000;
+		break;
+	case VME_CRCSR:
+	case VME_USER1:
+	case VME_USER2:
+	case VME_USER3:
+	case VME_USER4:
+	default:
+		pr_err("Invalid address space\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Bound address is a valid address for the window, adjust
+	 * accordingly
+	 */
+	vme_bound = vme_base + size - granularity;
+
+	if (vme_base & (granularity - 1)) {
+		pr_err("Invalid VME base alignment\n");
+		return -EINVAL;
+	}
+	if (vme_bound & (granularity - 1)) {
+		pr_err("Invalid VME bound alignment\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&image->mtx);
+
+	bridge->slaves[i].enabled = enabled;
+	bridge->slaves[i].vme_base = vme_base;
+	bridge->slaves[i].size = size;
+	bridge->slaves[i].buf_base = fake_pci_to_ptr(buf_base);
+	bridge->slaves[i].aspace = aspace;
+	bridge->slaves[i].cycle = cycle;
+
+	mutex_unlock(&image->mtx);
+
+	return 0;
+}
+
+/*
+ * Get slave window configuration.
+ */
+static int fake_slave_get(struct vme_slave_resource *image, int *enabled,
+		unsigned long long *vme_base, unsigned long long *size,
+		dma_addr_t *buf_base, u32 *aspace, u32 *cycle)
+{
+	unsigned int i;
+	struct fake_driver *bridge;
+
+	bridge = image->parent->driver_priv;
+
+	i = image->number;
+
+	mutex_lock(&image->mtx);
+
+	*enabled = bridge->slaves[i].enabled;
+	*vme_base = bridge->slaves[i].vme_base;
+	*size = bridge->slaves[i].size;
+	*buf_base = fake_ptr_to_pci(bridge->slaves[i].buf_base);
+	*aspace = bridge->slaves[i].aspace;
+	*cycle = bridge->slaves[i].cycle;
+
+	mutex_unlock(&image->mtx);
+
+	return 0;
+}
+
+/*
+ * Set the attributes of an outbound window.
+ */
+static int fake_master_set(struct vme_master_resource *image, int enabled,
+		unsigned long long vme_base, unsigned long long size,
+		u32 aspace, u32 cycle, u32 dwidth)
+{
+	int retval = 0;
+	unsigned int i;
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *bridge;
+
+	fake_bridge = image->parent;
+
+	bridge = fake_bridge->driver_priv;
+
+	/* Verify input data */
+	if (vme_base & 0xFFFF) {
+		pr_err("Invalid VME Window alignment\n");
+		retval = -EINVAL;
+		goto err_window;
+	}
+
+	if (size & 0xFFFF) {
+		pr_err("Invalid size alignment\n");
+		retval = -EINVAL;
+		goto err_window;
+	}
+
+	if ((size == 0) && (enabled != 0)) {
+		pr_err("Size must be non-zero for enabled windows\n");
+		retval = -EINVAL;
+		goto err_window;
+	}
+
+	/* Setup data width */
+	switch (dwidth) {
+	case VME_D8:
+	case VME_D16:
+	case VME_D32:
+		break;
+	default:
+		pr_err("Invalid data width\n");
+		retval = -EINVAL;
+		goto err_dwidth;
+	}
+
+	/* Setup address space */
+	switch (aspace) {
+	case VME_A16:
+	case VME_A24:
+	case VME_A32:
+	case VME_A64:
+	case VME_CRCSR:
+	case VME_USER1:
+	case VME_USER2:
+	case VME_USER3:
+	case VME_USER4:
+		break;
+	default:
+		pr_err("Invalid address space\n");
+		retval = -EINVAL;
+		goto err_aspace;
+	}
+
+	spin_lock(&image->lock);
+
+	i = image->number;
+
+	bridge->masters[i].enabled = enabled;
+	bridge->masters[i].vme_base = vme_base;
+	bridge->masters[i].size = size;
+	bridge->masters[i].aspace = aspace;
+	bridge->masters[i].cycle = cycle;
+	bridge->masters[i].dwidth = dwidth;
+
+	spin_unlock(&image->lock);
+
+	return 0;
+
+err_aspace:
+err_dwidth:
+err_window:
+	return retval;
+
+}
+
+/*
+ * Set the attributes of an outbound window.
+ */
+static int __fake_master_get(struct vme_master_resource *image, int *enabled,
+		unsigned long long *vme_base, unsigned long long *size,
+		u32 *aspace, u32 *cycle, u32 *dwidth)
+{
+	unsigned int i;
+	struct fake_driver *bridge;
+
+	bridge = image->parent->driver_priv;
+
+	i = image->number;
+
+	*enabled = bridge->masters[i].enabled;
+	*vme_base = bridge->masters[i].vme_base;
+	*size = bridge->masters[i].size;
+	*aspace = bridge->masters[i].aspace;
+	*cycle = bridge->masters[i].cycle;
+	*dwidth = bridge->masters[i].dwidth;
+
+	return 0;
+}
+
+
+static int fake_master_get(struct vme_master_resource *image, int *enabled,
+		unsigned long long *vme_base, unsigned long long *size,
+		u32 *aspace, u32 *cycle, u32 *dwidth)
+{
+	int retval;
+
+	spin_lock(&image->lock);
+
+	retval = __fake_master_get(image, enabled, vme_base, size, aspace,
+			cycle, dwidth);
+
+	spin_unlock(&image->lock);
+
+	return retval;
+}
+
+
+static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr,
+			  u32 aspace, u32 cycle)
+{
+	struct vme_bridge *fake_bridge;
+	unsigned long long lm_base;
+	u32 lm_aspace, lm_cycle;
+	int i;
+	struct vme_lm_resource *lm;
+	struct list_head *pos = NULL, *n;
+
+	/* Get vme_bridge */
+	fake_bridge = bridge->parent;
+
+	/* Loop through each location monitor resource */
+	list_for_each_safe(pos, n, &fake_bridge->lm_resources) {
+		lm = list_entry(pos, struct vme_lm_resource, list);
+
+		/* If disabled, we're done */
+		if (bridge->lm_enabled == 0)
+			return;
+
+		lm_base = bridge->lm_base;
+		lm_aspace = bridge->lm_aspace;
+		lm_cycle = bridge->lm_cycle;
+
+		/* First make sure that the cycle and address space match */
+		if ((lm_aspace == aspace) && (lm_cycle == cycle)) {
+			for (i = 0; i < lm->monitors; i++) {
+				/* Each location monitor covers 8 bytes */
+				if (((lm_base + (8 * i)) <= addr) &&
+				    ((lm_base + (8 * i) + 8) > addr)) {
+					if (bridge->lm_callback[i] != NULL)
+						bridge->lm_callback[i](
+							bridge->lm_data[i]);
+				}
+			}
+		}
+	}
+}
+
+static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr,
+		u32 aspace, u32 cycle)
+{
+	u8 retval = 0xff;
+	int i;
+	unsigned long long start, end, offset;
+	u8 *loc;
+
+	for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+		start = bridge->slaves[i].vme_base;
+		end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+		if (aspace != bridge->slaves[i].aspace)
+			continue;
+
+		if (cycle != bridge->slaves[i].cycle)
+			continue;
+
+		if ((addr >= start) && (addr < end)) {
+			offset = addr - bridge->slaves[i].vme_base;
+			loc = (u8 *)(bridge->slaves[i].buf_base + offset);
+			retval = *loc;
+
+			break;
+		}
+	}
+
+	fake_lm_check(bridge, addr, aspace, cycle);
+
+	return retval;
+}
+
+static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr,
+		u32 aspace, u32 cycle)
+{
+	u16 retval = 0xffff;
+	int i;
+	unsigned long long start, end, offset;
+	u16 *loc;
+
+	for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+		if (aspace != bridge->slaves[i].aspace)
+			continue;
+
+		if (cycle != bridge->slaves[i].cycle)
+			continue;
+
+		start = bridge->slaves[i].vme_base;
+		end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+		if ((addr >= start) && ((addr + 1) < end)) {
+			offset = addr - bridge->slaves[i].vme_base;
+			loc = (u16 *)(bridge->slaves[i].buf_base + offset);
+			retval = *loc;
+
+			break;
+		}
+	}
+
+	fake_lm_check(bridge, addr, aspace, cycle);
+
+	return retval;
+}
+
+static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr,
+		u32 aspace, u32 cycle)
+{
+	u32 retval = 0xffffffff;
+	int i;
+	unsigned long long start, end, offset;
+	u32 *loc;
+
+	for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+		if (aspace != bridge->slaves[i].aspace)
+			continue;
+
+		if (cycle != bridge->slaves[i].cycle)
+			continue;
+
+		start = bridge->slaves[i].vme_base;
+		end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+		if ((addr >= start) && ((addr + 3) < end)) {
+			offset = addr - bridge->slaves[i].vme_base;
+			loc = (u32 *)(bridge->slaves[i].buf_base + offset);
+			retval = *loc;
+
+			break;
+		}
+	}
+
+	fake_lm_check(bridge, addr, aspace, cycle);
+
+	return retval;
+}
+
+static ssize_t fake_master_read(struct vme_master_resource *image, void *buf,
+		size_t count, loff_t offset)
+{
+	int retval;
+	u32 aspace, cycle, dwidth;
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *priv;
+	int i;
+	unsigned long long addr;
+	unsigned int done = 0;
+	unsigned int count32;
+
+	fake_bridge = image->parent;
+
+	priv = fake_bridge->driver_priv;
+
+	i = image->number;
+
+	addr = (unsigned long long)priv->masters[i].vme_base + offset;
+	aspace = priv->masters[i].aspace;
+	cycle = priv->masters[i].cycle;
+	dwidth = priv->masters[i].dwidth;
+
+	spin_lock(&image->lock);
+
+	/* The following code handles VME address alignment. We cannot use
+	 * memcpy_xxx here because it may cut data transfers in to 8-bit
+	 * cycles when D16 or D32 cycles are required on the VME bus.
+	 * On the other hand, the bridge itself assures that the maximum data
+	 * cycle configured for the transfer is used and splits it
+	 * automatically for non-aligned addresses, so we don't want the
+	 * overhead of needlessly forcing small transfers for the entire cycle.
+	 */
+	if (addr & 0x1) {
+		*(u8 *)buf = fake_vmeread8(priv, addr, aspace, cycle);
+		done += 1;
+		if (done == count)
+			goto out;
+	}
+	if ((dwidth == VME_D16) || (dwidth == VME_D32)) {
+		if ((addr + done) & 0x2) {
+			if ((count - done) < 2) {
+				*(u8 *)(buf + done) = fake_vmeread8(priv,
+						addr + done, aspace, cycle);
+				done += 1;
+				goto out;
+			} else {
+				*(u16 *)(buf + done) = fake_vmeread16(priv,
+						addr + done, aspace, cycle);
+				done += 2;
+			}
+		}
+	}
+
+	if (dwidth == VME_D32) {
+		count32 = (count - done) & ~0x3;
+		while (done < count32) {
+			*(u32 *)(buf + done) = fake_vmeread32(priv, addr + done,
+					aspace, cycle);
+			done += 4;
+		}
+	} else if (dwidth == VME_D16) {
+		count32 = (count - done) & ~0x3;
+		while (done < count32) {
+			*(u16 *)(buf + done) = fake_vmeread16(priv, addr + done,
+					aspace, cycle);
+			done += 2;
+		}
+	} else if (dwidth == VME_D8) {
+		count32 = (count - done);
+		while (done < count32) {
+			*(u8 *)(buf + done) = fake_vmeread8(priv, addr + done,
+					aspace, cycle);
+			done += 1;
+		}
+
+	}
+
+	if ((dwidth == VME_D16) || (dwidth == VME_D32)) {
+		if ((count - done) & 0x2) {
+			*(u16 *)(buf + done) = fake_vmeread16(priv, addr + done,
+					aspace, cycle);
+			done += 2;
+		}
+	}
+	if ((count - done) & 0x1) {
+		*(u8 *)(buf + done) = fake_vmeread8(priv, addr + done, aspace,
+				cycle);
+		done += 1;
+	}
+
+out:
+	retval = count;
+
+	spin_unlock(&image->lock);
+
+	return retval;
+}
+
+static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf,
+			   unsigned long long addr, u32 aspace, u32 cycle)
+{
+	int i;
+	unsigned long long start, end, offset;
+	u8 *loc;
+
+	for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+		if (aspace != bridge->slaves[i].aspace)
+			continue;
+
+		if (cycle != bridge->slaves[i].cycle)
+			continue;
+
+		start = bridge->slaves[i].vme_base;
+		end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+		if ((addr >= start) && (addr < end)) {
+			offset = addr - bridge->slaves[i].vme_base;
+			loc = (u8 *)((void *)bridge->slaves[i].buf_base + offset);
+			*loc = *buf;
+
+			break;
+		}
+	}
+
+	fake_lm_check(bridge, addr, aspace, cycle);
+
+}
+
+static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf,
+			    unsigned long long addr, u32 aspace, u32 cycle)
+{
+	int i;
+	unsigned long long start, end, offset;
+	u16 *loc;
+
+	for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+		if (aspace != bridge->slaves[i].aspace)
+			continue;
+
+		if (cycle != bridge->slaves[i].cycle)
+			continue;
+
+		start = bridge->slaves[i].vme_base;
+		end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+		if ((addr >= start) && ((addr + 1) < end)) {
+			offset = addr - bridge->slaves[i].vme_base;
+			loc = (u16 *)((void *)bridge->slaves[i].buf_base + offset);
+			*loc = *buf;
+
+			break;
+		}
+	}
+
+	fake_lm_check(bridge, addr, aspace, cycle);
+
+}
+
+static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf,
+			    unsigned long long addr, u32 aspace, u32 cycle)
+{
+	int i;
+	unsigned long long start, end, offset;
+	u32 *loc;
+
+	for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+		if (aspace != bridge->slaves[i].aspace)
+			continue;
+
+		if (cycle != bridge->slaves[i].cycle)
+			continue;
+
+		start = bridge->slaves[i].vme_base;
+		end = bridge->slaves[i].vme_base + bridge->slaves[i].size;
+
+		if ((addr >= start) && ((addr + 3) < end)) {
+			offset = addr - bridge->slaves[i].vme_base;
+			loc = (u32 *)((void *)bridge->slaves[i].buf_base + offset);
+			*loc = *buf;
+
+			break;
+		}
+	}
+
+	fake_lm_check(bridge, addr, aspace, cycle);
+
+}
+
+static ssize_t fake_master_write(struct vme_master_resource *image, void *buf,
+		size_t count, loff_t offset)
+{
+	int retval = 0;
+	u32 aspace, cycle, dwidth;
+	unsigned long long addr;
+	int i;
+	unsigned int done = 0;
+	unsigned int count32;
+
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *bridge;
+
+	fake_bridge = image->parent;
+
+	bridge = fake_bridge->driver_priv;
+
+	i = image->number;
+
+	addr = bridge->masters[i].vme_base + offset;
+	aspace = bridge->masters[i].aspace;
+	cycle = bridge->masters[i].cycle;
+	dwidth = bridge->masters[i].dwidth;
+
+	spin_lock(&image->lock);
+
+	/* Here we apply for the same strategy we do in master_read
+	 * function in order to assure the correct cycles.
+	 */
+	if (addr & 0x1) {
+		fake_vmewrite8(bridge, (u8 *)buf, addr, aspace, cycle);
+		done += 1;
+		if (done == count)
+			goto out;
+	}
+
+	if ((dwidth == VME_D16) || (dwidth == VME_D32)) {
+		if ((addr + done) & 0x2) {
+			if ((count - done) < 2) {
+				fake_vmewrite8(bridge, (u8 *)(buf + done),
+						addr + done, aspace, cycle);
+				done += 1;
+				goto out;
+			} else {
+				fake_vmewrite16(bridge, (u16 *)(buf + done),
+						addr + done, aspace, cycle);
+				done += 2;
+			}
+		}
+	}
+
+	if (dwidth == VME_D32) {
+		count32 = (count - done) & ~0x3;
+		while (done < count32) {
+			fake_vmewrite32(bridge, (u32 *)(buf + done),
+					addr + done, aspace, cycle);
+			done += 4;
+		}
+	} else if (dwidth == VME_D16) {
+		count32 = (count - done) & ~0x3;
+		while (done < count32) {
+			fake_vmewrite16(bridge, (u16 *)(buf + done),
+					addr + done, aspace, cycle);
+			done += 2;
+		}
+	} else if (dwidth == VME_D8) {
+		count32 = (count - done);
+		while (done < count32) {
+			fake_vmewrite8(bridge, (u8 *)(buf + done), addr + done,
+					aspace, cycle);
+			done += 1;
+		}
+
+	}
+
+	if ((dwidth == VME_D16) || (dwidth == VME_D32)) {
+		if ((count - done) & 0x2) {
+			fake_vmewrite16(bridge, (u16 *)(buf + done),
+					addr + done, aspace, cycle);
+			done += 2;
+		}
+	}
+
+	if ((count - done) & 0x1) {
+		fake_vmewrite8(bridge, (u8 *)(buf + done), addr + done, aspace,
+				cycle);
+		done += 1;
+	}
+
+out:
+	retval = count;
+
+	spin_unlock(&image->lock);
+
+	return retval;
+}
+
+/*
+ * Perform an RMW cycle on the VME bus.
+ *
+ * Requires a previously configured master window, returns final value.
+ */
+static unsigned int fake_master_rmw(struct vme_master_resource *image,
+		unsigned int mask, unsigned int compare, unsigned int swap,
+		loff_t offset)
+{
+	u32 tmp, base;
+	u32 aspace, cycle;
+	int i;
+	struct fake_driver *bridge;
+
+	bridge = image->parent->driver_priv;
+
+	/* Find the PCI address that maps to the desired VME address */
+	i = image->number;
+
+	base = bridge->masters[i].vme_base;
+	aspace = bridge->masters[i].aspace;
+	cycle = bridge->masters[i].cycle;
+
+	/* Lock image */
+	spin_lock(&image->lock);
+
+	/* Read existing value */
+	tmp = fake_vmeread32(bridge, base + offset, aspace, cycle);
+
+	/* Perform check */
+	if ((tmp && mask) == (compare && mask)) {
+		tmp = tmp | (mask | swap);
+		tmp = tmp & (~mask | swap);
+
+		/* Write back */
+		fake_vmewrite32(bridge, &tmp, base + offset, aspace, cycle);
+	}
+
+	/* Unlock image */
+	spin_unlock(&image->lock);
+
+	return tmp;
+}
+
+/*
+ * All 4 location monitors reside at the same base - this is therefore a
+ * system wide configuration.
+ *
+ * This does not enable the LM monitor - that should be done when the first
+ * callback is attached and disabled when the last callback is removed.
+ */
+static int fake_lm_set(struct vme_lm_resource *lm, unsigned long long lm_base,
+		u32 aspace, u32 cycle)
+{
+	int i;
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *bridge;
+
+	fake_bridge = lm->parent;
+
+	bridge = fake_bridge->driver_priv;
+
+	mutex_lock(&lm->mtx);
+
+	/* If we already have a callback attached, we can't move it! */
+	for (i = 0; i < lm->monitors; i++) {
+		if (bridge->lm_callback[i] != NULL) {
+			mutex_unlock(&lm->mtx);
+			pr_err("Location monitor callback attached, can't reset\n");
+			return -EBUSY;
+		}
+	}
+
+	switch (aspace) {
+	case VME_A16:
+	case VME_A24:
+	case VME_A32:
+	case VME_A64:
+		break;
+	default:
+		mutex_unlock(&lm->mtx);
+		pr_err("Invalid address space\n");
+		return -EINVAL;
+	}
+
+	bridge->lm_base = lm_base;
+	bridge->lm_aspace = aspace;
+	bridge->lm_cycle = cycle;
+
+	mutex_unlock(&lm->mtx);
+
+	return 0;
+}
+
+/* Get configuration of the callback monitor and return whether it is enabled
+ * or disabled.
+ */
+static int fake_lm_get(struct vme_lm_resource *lm,
+		unsigned long long *lm_base, u32 *aspace, u32 *cycle)
+{
+	struct fake_driver *bridge;
+
+	bridge = lm->parent->driver_priv;
+
+	mutex_lock(&lm->mtx);
+
+	*lm_base = bridge->lm_base;
+	*aspace = bridge->lm_aspace;
+	*cycle = bridge->lm_cycle;
+
+	mutex_unlock(&lm->mtx);
+
+	return bridge->lm_enabled;
+}
+
+/*
+ * Attach a callback to a specific location monitor.
+ *
+ * Callback will be passed the monitor triggered.
+ */
+static int fake_lm_attach(struct vme_lm_resource *lm, int monitor,
+		void (*callback)(void *), void *data)
+{
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *bridge;
+
+	fake_bridge = lm->parent;
+
+	bridge = fake_bridge->driver_priv;
+
+	mutex_lock(&lm->mtx);
+
+	/* Ensure that the location monitor is configured - need PGM or DATA */
+	if (bridge->lm_cycle == 0) {
+		mutex_unlock(&lm->mtx);
+		pr_err("Location monitor not properly configured\n");
+		return -EINVAL;
+	}
+
+	/* Check that a callback isn't already attached */
+	if (bridge->lm_callback[monitor] != NULL) {
+		mutex_unlock(&lm->mtx);
+		pr_err("Existing callback attached\n");
+		return -EBUSY;
+	}
+
+	/* Attach callback */
+	bridge->lm_callback[monitor] = callback;
+	bridge->lm_data[monitor] = data;
+
+	/* Ensure that global Location Monitor Enable set */
+	bridge->lm_enabled = 1;
+
+	mutex_unlock(&lm->mtx);
+
+	return 0;
+}
+
+/*
+ * Detach a callback function forn a specific location monitor.
+ */
+static int fake_lm_detach(struct vme_lm_resource *lm, int monitor)
+{
+	u32 tmp;
+	int i;
+	struct fake_driver *bridge;
+
+	bridge = lm->parent->driver_priv;
+
+	mutex_lock(&lm->mtx);
+
+	/* Detach callback */
+	bridge->lm_callback[monitor] = NULL;
+	bridge->lm_data[monitor] = NULL;
+
+	/* If all location monitors disabled, disable global Location Monitor */
+	tmp = 0;
+	for (i = 0; i < lm->monitors; i++) {
+		if (bridge->lm_callback[i] != NULL)
+			tmp = 1;
+	}
+
+	if (tmp == 0)
+		bridge->lm_enabled = 0;
+
+	mutex_unlock(&lm->mtx);
+
+	return 0;
+}
+
+/*
+ * Determine Geographical Addressing
+ */
+static int fake_slot_get(struct vme_bridge *fake_bridge)
+{
+	return geoid;
+}
+
+static void *fake_alloc_consistent(struct device *parent, size_t size,
+		dma_addr_t *dma)
+{
+	void *alloc = kmalloc(size, GFP_KERNEL);
+
+	if (alloc != NULL)
+		*dma = fake_ptr_to_pci(alloc);
+
+	return alloc;
+}
+
+static void fake_free_consistent(struct device *parent, size_t size,
+		void *vaddr, dma_addr_t dma)
+{
+	kfree(vaddr);
+/*
+	dma_free_coherent(parent, size, vaddr, dma);
+*/
+}
+
+/*
+ * Configure CR/CSR space
+ *
+ * Access to the CR/CSR can be configured at power-up. The location of the
+ * CR/CSR registers in the CR/CSR address space is determined by the boards
+ * Geographic address.
+ *
+ * Each board has a 512kB window, with the highest 4kB being used for the
+ * boards registers, this means there is a fix length 508kB window which must
+ * be mapped onto PCI memory.
+ */
+static int fake_crcsr_init(struct vme_bridge *fake_bridge)
+{
+	u32 vstat;
+	struct fake_driver *bridge;
+
+	bridge = fake_bridge->driver_priv;
+
+	/* Allocate mem for CR/CSR image */
+	bridge->crcsr_kernel = kzalloc(VME_CRCSR_BUF_SIZE, GFP_KERNEL);
+	bridge->crcsr_bus = fake_ptr_to_pci(bridge->crcsr_kernel);
+	if (bridge->crcsr_kernel == NULL)
+		return -ENOMEM;
+
+	vstat = fake_slot_get(fake_bridge);
+
+	pr_info("CR/CSR Offset: %d\n", vstat);
+
+	return 0;
+}
+
+static void fake_crcsr_exit(struct vme_bridge *fake_bridge)
+{
+	struct fake_driver *bridge;
+
+	bridge = fake_bridge->driver_priv;
+
+	kfree(bridge->crcsr_kernel);
+}
+
+
+static int __init fake_init(void)
+{
+	int retval, i;
+	struct list_head *pos = NULL, *n;
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *fake_device;
+	struct vme_master_resource *master_image;
+	struct vme_slave_resource *slave_image;
+	struct vme_lm_resource *lm;
+
+	/* We need a fake parent device */
+	vme_root = __root_device_register("vme", THIS_MODULE);
+
+	/* If we want to support more than one bridge at some point, we need to
+	 * dynamically allocate this so we get one per device.
+	 */
+	fake_bridge = kzalloc(sizeof(struct vme_bridge), GFP_KERNEL);
+	if (fake_bridge == NULL) {
+		retval = -ENOMEM;
+		goto err_struct;
+	}
+
+	fake_device = kzalloc(sizeof(struct fake_driver), GFP_KERNEL);
+	if (fake_device == NULL) {
+		retval = -ENOMEM;
+		goto err_driver;
+	}
+
+	fake_bridge->driver_priv = fake_device;
+
+	fake_bridge->parent = vme_root;
+
+	fake_device->parent = fake_bridge;
+
+	/* Initialize wait queues & mutual exclusion flags */
+	mutex_init(&fake_device->vme_int);
+	mutex_init(&fake_bridge->irq_mtx);
+	tasklet_init(&fake_device->int_tasklet, fake_VIRQ_tasklet,
+			(unsigned long) fake_bridge);
+
+	strcpy(fake_bridge->name, driver_name);
+
+	/* Add master windows to list */
+	INIT_LIST_HEAD(&fake_bridge->master_resources);
+	for (i = 0; i < FAKE_MAX_MASTER; i++) {
+		master_image = kmalloc(sizeof(struct vme_master_resource),
+				GFP_KERNEL);
+		if (master_image == NULL) {
+			retval = -ENOMEM;
+			goto err_master;
+		}
+		master_image->parent = fake_bridge;
+		spin_lock_init(&master_image->lock);
+		master_image->locked = 0;
+		master_image->number = i;
+		master_image->address_attr = VME_A16 | VME_A24 | VME_A32 |
+			VME_A64;
+		master_image->cycle_attr = VME_SCT | VME_BLT | VME_MBLT |
+			VME_2eVME | VME_2eSST | VME_2eSSTB | VME_2eSST160 |
+			VME_2eSST267 | VME_2eSST320 | VME_SUPER | VME_USER |
+			VME_PROG | VME_DATA;
+		master_image->width_attr = VME_D16 | VME_D32;
+		memset(&master_image->bus_resource, 0,
+				sizeof(struct resource));
+		master_image->kern_base  = NULL;
+		list_add_tail(&master_image->list,
+				&fake_bridge->master_resources);
+	}
+
+	/* Add slave windows to list */
+	INIT_LIST_HEAD(&fake_bridge->slave_resources);
+	for (i = 0; i < FAKE_MAX_SLAVE; i++) {
+		slave_image = kmalloc(sizeof(struct vme_slave_resource),
+				GFP_KERNEL);
+		if (slave_image == NULL) {
+			retval = -ENOMEM;
+			goto err_slave;
+		}
+		slave_image->parent = fake_bridge;
+		mutex_init(&slave_image->mtx);
+		slave_image->locked = 0;
+		slave_image->number = i;
+		slave_image->address_attr = VME_A16 | VME_A24 | VME_A32 |
+			VME_A64 | VME_CRCSR | VME_USER1 | VME_USER2 |
+			VME_USER3 | VME_USER4;
+		slave_image->cycle_attr = VME_SCT | VME_BLT | VME_MBLT |
+			VME_2eVME | VME_2eSST | VME_2eSSTB | VME_2eSST160 |
+			VME_2eSST267 | VME_2eSST320 | VME_SUPER | VME_USER |
+			VME_PROG | VME_DATA;
+		list_add_tail(&slave_image->list,
+				&fake_bridge->slave_resources);
+	}
+
+	/* Add location monitor to list */
+	INIT_LIST_HEAD(&fake_bridge->lm_resources);
+	lm = kmalloc(sizeof(struct vme_lm_resource), GFP_KERNEL);
+	if (lm == NULL) {
+		pr_err("Failed to allocate memory for location monitor resource structure\n");
+		retval = -ENOMEM;
+		goto err_lm;
+	}
+	lm->parent = fake_bridge;
+	mutex_init(&lm->mtx);
+	lm->locked = 0;
+	lm->number = 1;
+	lm->monitors = 4;
+	list_add_tail(&lm->list, &fake_bridge->lm_resources);
+
+	fake_bridge->slave_get = fake_slave_get;
+	fake_bridge->slave_set = fake_slave_set;
+	fake_bridge->master_get = fake_master_get;
+	fake_bridge->master_set = fake_master_set;
+	fake_bridge->master_read = fake_master_read;
+	fake_bridge->master_write = fake_master_write;
+	fake_bridge->master_rmw = fake_master_rmw;
+	fake_bridge->irq_set = fake_irq_set;
+	fake_bridge->irq_generate = fake_irq_generate;
+	fake_bridge->lm_set = fake_lm_set;
+	fake_bridge->lm_get = fake_lm_get;
+	fake_bridge->lm_attach = fake_lm_attach;
+	fake_bridge->lm_detach = fake_lm_detach;
+	fake_bridge->slot_get = fake_slot_get;
+	fake_bridge->alloc_consistent = fake_alloc_consistent;
+	fake_bridge->free_consistent = fake_free_consistent;
+
+	pr_info("Board is%s the VME system controller\n",
+			(geoid == 1) ? "" : " not");
+
+	pr_info("VME geographical address is set to %d\n", geoid);
+
+	retval = fake_crcsr_init(fake_bridge);
+	if (retval) {
+		pr_err("CR/CSR configuration failed.\n");
+		goto err_crcsr;
+	}
+
+	retval = vme_register_bridge(fake_bridge);
+	if (retval != 0) {
+		pr_err("Chip Registration failed.\n");
+		goto err_reg;
+	}
+
+	exit_pointer = fake_bridge;
+
+	return 0;
+
+err_reg:
+	fake_crcsr_exit(fake_bridge);
+err_crcsr:
+err_lm:
+	/* resources are stored in link list */
+	list_for_each_safe(pos, n, &fake_bridge->lm_resources) {
+		lm = list_entry(pos, struct vme_lm_resource, list);
+		list_del(pos);
+		kfree(lm);
+	}
+err_slave:
+	/* resources are stored in link list */
+	list_for_each_safe(pos, n, &fake_bridge->slave_resources) {
+		slave_image = list_entry(pos, struct vme_slave_resource, list);
+		list_del(pos);
+		kfree(slave_image);
+	}
+err_master:
+	/* resources are stored in link list */
+	list_for_each_safe(pos, n, &fake_bridge->master_resources) {
+		master_image = list_entry(pos, struct vme_master_resource,
+				list);
+		list_del(pos);
+		kfree(master_image);
+	}
+
+	kfree(fake_device);
+err_driver:
+	kfree(fake_bridge);
+err_struct:
+	return retval;
+
+}
+
+
+static void __exit fake_exit(void)
+{
+	struct list_head *pos = NULL;
+	struct list_head *tmplist;
+	struct vme_master_resource *master_image;
+	struct vme_slave_resource *slave_image;
+	int i;
+	struct vme_bridge *fake_bridge;
+	struct fake_driver *bridge;
+
+	fake_bridge = exit_pointer;
+
+	bridge = fake_bridge->driver_priv;
+
+	pr_debug("Driver is being unloaded.\n");
+
+	/*
+	 *  Shutdown all inbound and outbound windows.
+	 */
+	for (i = 0; i < FAKE_MAX_MASTER; i++)
+		bridge->masters[i].enabled = 0;
+
+	for (i = 0; i < FAKE_MAX_SLAVE; i++)
+		bridge->slaves[i].enabled = 0;
+
+	/*
+	 *  Shutdown Location monitor.
+	 */
+	bridge->lm_enabled = 0;
+
+	vme_unregister_bridge(fake_bridge);
+
+	fake_crcsr_exit(fake_bridge);
+	/* resources are stored in link list */
+	list_for_each_safe(pos, tmplist, &fake_bridge->slave_resources) {
+		slave_image = list_entry(pos, struct vme_slave_resource, list);
+		list_del(pos);
+		kfree(slave_image);
+	}
+
+	/* resources are stored in link list */
+	list_for_each_safe(pos, tmplist, &fake_bridge->master_resources) {
+		master_image = list_entry(pos, struct vme_master_resource,
+				list);
+		list_del(pos);
+		kfree(master_image);
+	}
+
+	kfree(fake_bridge->driver_priv);
+
+	kfree(fake_bridge);
+
+	root_device_unregister(vme_root);
+}
+
+
+MODULE_PARM_DESC(geoid, "Set geographical addressing");
+module_param(geoid, int, 0);
+
+MODULE_DESCRIPTION("Fake VME bridge driver");
+MODULE_LICENSE("GPL");
+
+module_init(fake_init);
+module_exit(fake_exit);
diff --git a/drivers/vme/bridges/vme_tsi148.c b/drivers/vme/bridges/vme_tsi148.c
index 4bc5d45..fc1b634 100644
--- a/drivers/vme/bridges/vme_tsi148.c
+++ b/drivers/vme/bridges/vme_tsi148.c
@@ -50,6 +50,8 @@
 	{ },
 };
 
+MODULE_DEVICE_TABLE(pci, tsi148_ids);
+
 static struct pci_driver tsi148_driver = {
 	.name = driver_name,
 	.id_table = tsi148_ids,
@@ -102,7 +104,7 @@
 	for (i = 0; i < 4; i++) {
 		if (stat & TSI148_LCSR_INTS_LMS[i]) {
 			/* We only enable interrupts if the callback is set */
-			bridge->lm_callback[i](i);
+			bridge->lm_callback[i](bridge->lm_data[i]);
 			serviced |= TSI148_LCSR_INTC_LMC[i];
 		}
 	}
@@ -2047,7 +2049,7 @@
  * Callback will be passed the monitor triggered.
  */
 static int tsi148_lm_attach(struct vme_lm_resource *lm, int monitor,
-	void (*callback)(int))
+	void (*callback)(void *), void *data)
 {
 	u32 lm_ctl, tmp;
 	struct vme_bridge *tsi148_bridge;
@@ -2077,6 +2079,7 @@
 
 	/* Attach callback */
 	bridge->lm_callback[monitor] = callback;
+	bridge->lm_data[monitor] = data;
 
 	/* Enable Location Monitor interrupt */
 	tmp = ioread32be(bridge->base + TSI148_LCSR_INTEN);
@@ -2124,6 +2127,7 @@
 
 	/* Detach callback */
 	bridge->lm_callback[monitor] = NULL;
+	bridge->lm_data[monitor] = NULL;
 
 	/* If all location monitors disabled, disable global Location Monitor */
 	if ((lm_en & (TSI148_LCSR_INTS_LM0S | TSI148_LCSR_INTS_LM1S |
diff --git a/drivers/vme/bridges/vme_tsi148.h b/drivers/vme/bridges/vme_tsi148.h
index f5ed1438..0935d85 100644
--- a/drivers/vme/bridges/vme_tsi148.h
+++ b/drivers/vme/bridges/vme_tsi148.h
@@ -38,7 +38,8 @@
 	void __iomem *base;	/* Base Address of device registers */
 	wait_queue_head_t dma_queue[2];
 	wait_queue_head_t iack_queue;
-	void (*lm_callback[4])(int);	/* Called in interrupt handler */
+	void (*lm_callback[4])(void *);	/* Called in interrupt handler */
+	void *lm_data[4];
 	void *crcsr_kernel;
 	dma_addr_t crcsr_bus;
 	struct vme_master_resource *flush_image;
diff --git a/drivers/vme/vme.c b/drivers/vme/vme.c
index 37ac0a5..15b6407 100644
--- a/drivers/vme/vme.c
+++ b/drivers/vme/vme.c
@@ -13,8 +13,8 @@
  * option) any later version.
  */
 
-#include <linux/module.h>
-#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -39,7 +39,6 @@
 static LIST_HEAD(vme_bus_list);
 static DEFINE_MUTEX(vme_buses_lock);
 
-static void __exit vme_exit(void);
 static int __init vme_init(void);
 
 static struct vme_dev *dev_to_vme_dev(struct device *dev)
@@ -1321,7 +1320,7 @@
 EXPORT_SYMBOL(vme_lm_get);
 
 int vme_lm_attach(struct vme_resource *resource, int monitor,
-	void (*callback)(int))
+	void (*callback)(void *), void *data)
 {
 	struct vme_bridge *bridge = find_bridge(resource);
 	struct vme_lm_resource *lm;
@@ -1338,7 +1337,7 @@
 		return -EINVAL;
 	}
 
-	return bridge->lm_attach(lm, monitor, callback);
+	return bridge->lm_attach(lm, monitor, callback, data);
 }
 EXPORT_SYMBOL(vme_lm_attach);
 
@@ -1622,25 +1621,10 @@
 	return retval;
 }
 
-static int vme_bus_remove(struct device *dev)
-{
-	int retval = -ENODEV;
-	struct vme_driver *driver;
-	struct vme_dev *vdev = dev_to_vme_dev(dev);
-
-	driver = dev->platform_data;
-
-	if (driver->remove != NULL)
-		retval = driver->remove(vdev);
-
-	return retval;
-}
-
 struct bus_type vme_bus_type = {
 	.name = "vme",
 	.match = vme_bus_match,
 	.probe = vme_bus_probe,
-	.remove = vme_bus_remove,
 };
 EXPORT_SYMBOL(vme_bus_type);
 
@@ -1648,11 +1632,4 @@
 {
 	return bus_register(&vme_bus_type);
 }
-
-static void __exit vme_exit(void)
-{
-	bus_unregister(&vme_bus_type);
-}
-
 subsys_initcall(vme_init);
-module_exit(vme_exit);
diff --git a/drivers/vme/vme_bridge.h b/drivers/vme/vme_bridge.h
index cb8246f..2662e91 100644
--- a/drivers/vme/vme_bridge.h
+++ b/drivers/vme/vme_bridge.h
@@ -160,7 +160,8 @@
 	int (*lm_set) (struct vme_lm_resource *, unsigned long long, u32, u32);
 	int (*lm_get) (struct vme_lm_resource *, unsigned long long *, u32 *,
 		u32 *);
-	int (*lm_attach) (struct vme_lm_resource *, int, void (*callback)(int));
+	int (*lm_attach)(struct vme_lm_resource *, int,
+			 void (*callback)(void *), void *);
 	int (*lm_detach) (struct vme_lm_resource *, int);
 
 	/* CR/CSR space functions */
diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
index 581a300..82611f1 100644
--- a/drivers/w1/slaves/w1_therm.c
+++ b/drivers/w1/slaves/w1_therm.c
@@ -66,7 +66,7 @@
 
 /* return the address of the refcnt in the family data */
 #define THERM_REFCNT(family_data) \
-	(&((struct w1_therm_family_data*)family_data)->refcnt)
+	(&((struct w1_therm_family_data *)family_data)->refcnt)
 
 static int w1_therm_add_slave(struct w1_slave *sl)
 {
@@ -81,7 +81,8 @@
 static void w1_therm_remove_slave(struct w1_slave *sl)
 {
 	int refcnt = atomic_sub_return(1, THERM_REFCNT(sl->family_data));
-	while(refcnt) {
+
+	while (refcnt) {
 		msleep(1000);
 		refcnt = atomic_read(THERM_REFCNT(sl->family_data));
 	}
@@ -151,8 +152,7 @@
 	.fops = &w1_therm_fops,
 };
 
-struct w1_therm_family_converter
-{
+struct w1_therm_family_converter {
 	u8			broken;
 	u16			reserved;
 	struct w1_family	*f;
@@ -293,7 +293,7 @@
 	uint8_t precision_bits;
 	uint8_t mask = 0x60;
 
-	if(val > 12 || val < 9) {
+	if (val > 12 || val < 9) {
 		pr_warn("Unsupported precision\n");
 		return -1;
 	}
@@ -336,7 +336,8 @@
 
 			/* read values to only alter precision bits */
 			w1_write_8(dev, W1_READ_SCRATCHPAD);
-			if ((count = w1_read_block(dev, rom, 9)) != 9)
+			count = w1_read_block(dev, rom, 9);
+			if (count != 9)
 				dev_warn(device, "w1_read_block() returned %u instead of 9.\n",	count);
 
 			crc = w1_calc_crc8(rom, 8);
@@ -366,6 +367,7 @@
 static inline int w1_DS18B20_convert_temp(u8 rom[9])
 {
 	s16 t = le16_to_cpup((__le16 *)rom);
+
 	return t*1000/16;
 }
 
@@ -415,7 +417,7 @@
 	for (i = 0; i < ARRAY_SIZE(w1_therm_families); ++i) {
 		if (w1_therm_families[i].f->fid == sl->family->fid) {
 			/* zero value indicates to write current configuration to eeprom */
-			if (0 == val)
+			if (val == 0)
 				ret = w1_therm_families[i].eeprom(device);
 			else
 				ret = w1_therm_families[i].precision(device, val);
@@ -439,8 +441,7 @@
 	if (ret != 0)
 		goto post_unlock;
 
-	if(!sl->family_data)
-	{
+	if (!sl->family_data) {
 		ret = -ENODEV;
 		goto pre_unlock;
 	}
@@ -495,7 +496,8 @@
 			if (!w1_reset_select_slave(sl)) {
 
 				w1_write_8(dev, W1_READ_SCRATCHPAD);
-				if ((count = w1_read_block(dev, rom, 9)) != 9) {
+				count = w1_read_block(dev, rom, 9);
+				if (count != 9) {
 					dev_warn(device, "w1_read_block() "
 						"returned %u instead of 9.\n",
 						count);
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index bb34362..e213c67 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -53,8 +53,8 @@
 module_param_named(timeout, w1_timeout, int, 0);
 MODULE_PARM_DESC(timeout, "time in seconds between automatic slave searches");
 module_param_named(timeout_us, w1_timeout_us, int, 0);
-MODULE_PARM_DESC(timeout, "time in microseconds between automatic slave"
-		          " searches");
+MODULE_PARM_DESC(timeout_us,
+		 "time in microseconds between automatic slave searches");
 /* A search stops when w1_max_slave_count devices have been found in that
  * search.  The next search will start over and detect the same set of devices
  * on a static 1-wire bus.  Memory is not allocated based on this number, just
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 1bffe00..50dbaa8 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -152,6 +152,19 @@
 
 	  This driver can be built as a module. The module name is tangox_wdt.
 
+config WDAT_WDT
+	tristate "ACPI Watchdog Action Table (WDAT)"
+	depends on ACPI
+	select ACPI_WATCHDOG
+	help
+	  This driver adds support for systems with ACPI Watchdog Action
+	  Table (WDAT) table. Servers typically have this but it can be
+	  found on some desktop machines as well. This driver will take
+	  over the native iTCO watchdog driver found on many Intel CPUs.
+
+	  To compile this driver as module, choose M here: the module will
+	  be called wdat_wdt.
+
 config WM831X_WATCHDOG
 	tristate "WM831x watchdog"
 	depends on MFD_WM831X
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index c22ad3e..cba0043 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -202,6 +202,7 @@
 obj-$(CONFIG_DA9063_WATCHDOG) += da9063_wdt.o
 obj-$(CONFIG_GPIO_WATCHDOG)	+= gpio_wdt.o
 obj-$(CONFIG_TANGOX_WATCHDOG) += tangox_wdt.o
+obj-$(CONFIG_WDAT_WDT) += wdat_wdt.o
 obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o
 obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o
 obj-$(CONFIG_MAX63XX_WATCHDOG) += max63xx_wdt.o
diff --git a/drivers/watchdog/pcwd_usb.c b/drivers/watchdog/pcwd_usb.c
index 68952d9..99ebf6e 100644
--- a/drivers/watchdog/pcwd_usb.c
+++ b/drivers/watchdog/pcwd_usb.c
@@ -666,10 +666,8 @@
 
 	/* allocate the urb's */
 	usb_pcwd->intr_urb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!usb_pcwd->intr_urb) {
-		pr_err("Out of memory\n");
+	if (!usb_pcwd->intr_urb)
 		goto error;
-	}
 
 	/* initialise the intr urb's */
 	usb_fill_int_urb(usb_pcwd->intr_urb, udev, pipe,
diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
new file mode 100644
index 0000000..e473e3b
--- /dev/null
+++ b/drivers/watchdog/wdat_wdt.c
@@ -0,0 +1,526 @@
+/*
+ * ACPI Hardware Watchdog (WDAT) driver.
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/watchdog.h>
+
+#define MAX_WDAT_ACTIONS ACPI_WDAT_ACTION_RESERVED
+
+/**
+ * struct wdat_instruction - Single ACPI WDAT instruction
+ * @entry: Copy of the ACPI table instruction
+ * @reg: Register the instruction is accessing
+ * @node: Next instruction in action sequence
+ */
+struct wdat_instruction {
+	struct acpi_wdat_entry entry;
+	void __iomem *reg;
+	struct list_head node;
+};
+
+/**
+ * struct wdat_wdt - ACPI WDAT watchdog device
+ * @pdev: Parent platform device
+ * @wdd: Watchdog core device
+ * @period: How long is one watchdog period in ms
+ * @stopped_in_sleep: Is this watchdog stopped by the firmware in S1-S5
+ * @stopped: Was the watchdog stopped by the driver in suspend
+ * @actions: An array of instruction lists indexed by an action number from
+ *           the WDAT table. There can be %NULL entries for not implemented
+ *           actions.
+ */
+struct wdat_wdt {
+	struct platform_device *pdev;
+	struct watchdog_device wdd;
+	unsigned int period;
+	bool stopped_in_sleep;
+	bool stopped;
+	struct list_head *instructions[MAX_WDAT_ACTIONS];
+};
+
+#define to_wdat_wdt(wdd) container_of(wdd, struct wdat_wdt, wdd)
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+		 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int wdat_wdt_read(struct wdat_wdt *wdat,
+	 const struct wdat_instruction *instr, u32 *value)
+{
+	const struct acpi_generic_address *gas = &instr->entry.register_region;
+
+	switch (gas->access_width) {
+	case 1:
+		*value = ioread8(instr->reg);
+		break;
+	case 2:
+		*value = ioread16(instr->reg);
+		break;
+	case 3:
+		*value = ioread32(instr->reg);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(&wdat->pdev->dev, "Read %#x from 0x%08llx\n", *value,
+		gas->address);
+
+	return 0;
+}
+
+static int wdat_wdt_write(struct wdat_wdt *wdat,
+	const struct wdat_instruction *instr, u32 value)
+{
+	const struct acpi_generic_address *gas = &instr->entry.register_region;
+
+	switch (gas->access_width) {
+	case 1:
+		iowrite8((u8)value, instr->reg);
+		break;
+	case 2:
+		iowrite16((u16)value, instr->reg);
+		break;
+	case 3:
+		iowrite32(value, instr->reg);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	dev_dbg(&wdat->pdev->dev, "Wrote %#x to 0x%08llx\n", value,
+		gas->address);
+
+	return 0;
+}
+
+static int wdat_wdt_run_action(struct wdat_wdt *wdat, unsigned int action,
+			       u32 param, u32 *retval)
+{
+	struct wdat_instruction *instr;
+
+	if (action >= ARRAY_SIZE(wdat->instructions))
+		return -EINVAL;
+
+	if (!wdat->instructions[action])
+		return -EOPNOTSUPP;
+
+	dev_dbg(&wdat->pdev->dev, "Running action %#x\n", action);
+
+	/* Run each instruction sequentially */
+	list_for_each_entry(instr, wdat->instructions[action], node) {
+		const struct acpi_wdat_entry *entry = &instr->entry;
+		const struct acpi_generic_address *gas;
+		u32 flags, value, mask, x, y;
+		bool preserve;
+		int ret;
+
+		gas = &entry->register_region;
+
+		preserve = entry->instruction & ACPI_WDAT_PRESERVE_REGISTER;
+		flags = entry->instruction & ~ACPI_WDAT_PRESERVE_REGISTER;
+		value = entry->value;
+		mask = entry->mask;
+
+		switch (flags) {
+		case ACPI_WDAT_READ_VALUE:
+			ret = wdat_wdt_read(wdat, instr, &x);
+			if (ret)
+				return ret;
+			x >>= gas->bit_offset;
+			x &= mask;
+			if (retval)
+				*retval = x == value;
+			break;
+
+		case ACPI_WDAT_READ_COUNTDOWN:
+			ret = wdat_wdt_read(wdat, instr, &x);
+			if (ret)
+				return ret;
+			x >>= gas->bit_offset;
+			x &= mask;
+			if (retval)
+				*retval = x;
+			break;
+
+		case ACPI_WDAT_WRITE_VALUE:
+			x = value & mask;
+			x <<= gas->bit_offset;
+			if (preserve) {
+				ret = wdat_wdt_read(wdat, instr, &y);
+				if (ret)
+					return ret;
+				y = y & ~(mask << gas->bit_offset);
+				x |= y;
+			}
+			ret = wdat_wdt_write(wdat, instr, x);
+			if (ret)
+				return ret;
+			break;
+
+		case ACPI_WDAT_WRITE_COUNTDOWN:
+			x = param;
+			x &= mask;
+			x <<= gas->bit_offset;
+			if (preserve) {
+				ret = wdat_wdt_read(wdat, instr, &y);
+				if (ret)
+					return ret;
+				y = y & ~(mask << gas->bit_offset);
+				x |= y;
+			}
+			ret = wdat_wdt_write(wdat, instr, x);
+			if (ret)
+				return ret;
+			break;
+
+		default:
+			dev_err(&wdat->pdev->dev, "Unknown instruction: %u\n",
+				flags);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int wdat_wdt_enable_reboot(struct wdat_wdt *wdat)
+{
+	int ret;
+
+	/*
+	 * WDAT specification says that the watchdog is required to reboot
+	 * the system when it fires. However, it also states that it is
+	 * recommeded to make it configurable through hardware register. We
+	 * enable reboot now if it is configrable, just in case.
+	 */
+	ret = wdat_wdt_run_action(wdat, ACPI_WDAT_SET_REBOOT, 0, NULL);
+	if (ret && ret != -EOPNOTSUPP) {
+		dev_err(&wdat->pdev->dev,
+			"Failed to enable reboot when watchdog triggers\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void wdat_wdt_boot_status(struct wdat_wdt *wdat)
+{
+	u32 boot_status = 0;
+	int ret;
+
+	ret = wdat_wdt_run_action(wdat, ACPI_WDAT_GET_STATUS, 0, &boot_status);
+	if (ret && ret != -EOPNOTSUPP) {
+		dev_err(&wdat->pdev->dev, "Failed to read boot status\n");
+		return;
+	}
+
+	if (boot_status)
+		wdat->wdd.bootstatus = WDIOF_CARDRESET;
+
+	/* Clear the boot status in case BIOS did not do it */
+	ret = wdat_wdt_run_action(wdat, ACPI_WDAT_SET_STATUS, 0, NULL);
+	if (ret && ret != -EOPNOTSUPP)
+		dev_err(&wdat->pdev->dev, "Failed to clear boot status\n");
+}
+
+static void wdat_wdt_set_running(struct wdat_wdt *wdat)
+{
+	u32 running = 0;
+	int ret;
+
+	ret = wdat_wdt_run_action(wdat, ACPI_WDAT_GET_RUNNING_STATE, 0,
+				  &running);
+	if (ret && ret != -EOPNOTSUPP)
+		dev_err(&wdat->pdev->dev, "Failed to read running state\n");
+
+	if (running)
+		set_bit(WDOG_HW_RUNNING, &wdat->wdd.status);
+}
+
+static int wdat_wdt_start(struct watchdog_device *wdd)
+{
+	return wdat_wdt_run_action(to_wdat_wdt(wdd),
+				   ACPI_WDAT_SET_RUNNING_STATE, 0, NULL);
+}
+
+static int wdat_wdt_stop(struct watchdog_device *wdd)
+{
+	return wdat_wdt_run_action(to_wdat_wdt(wdd),
+				   ACPI_WDAT_SET_STOPPED_STATE, 0, NULL);
+}
+
+static int wdat_wdt_ping(struct watchdog_device *wdd)
+{
+	return wdat_wdt_run_action(to_wdat_wdt(wdd), ACPI_WDAT_RESET, 0, NULL);
+}
+
+static int wdat_wdt_set_timeout(struct watchdog_device *wdd,
+				unsigned int timeout)
+{
+	struct wdat_wdt *wdat = to_wdat_wdt(wdd);
+	unsigned int periods;
+	int ret;
+
+	periods = timeout * 1000 / wdat->period;
+	ret = wdat_wdt_run_action(wdat, ACPI_WDAT_SET_COUNTDOWN, periods, NULL);
+	if (!ret)
+		wdd->timeout = timeout;
+	return ret;
+}
+
+static unsigned int wdat_wdt_get_timeleft(struct watchdog_device *wdd)
+{
+	struct wdat_wdt *wdat = to_wdat_wdt(wdd);
+	u32 periods = 0;
+
+	wdat_wdt_run_action(wdat, ACPI_WDAT_GET_COUNTDOWN, 0, &periods);
+	return periods * wdat->period / 1000;
+}
+
+static const struct watchdog_info wdat_wdt_info = {
+	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+	.firmware_version = 0,
+	.identity = "wdat_wdt",
+};
+
+static const struct watchdog_ops wdat_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = wdat_wdt_start,
+	.stop = wdat_wdt_stop,
+	.ping = wdat_wdt_ping,
+	.set_timeout = wdat_wdt_set_timeout,
+	.get_timeleft = wdat_wdt_get_timeleft,
+};
+
+static int wdat_wdt_probe(struct platform_device *pdev)
+{
+	const struct acpi_wdat_entry *entries;
+	const struct acpi_table_wdat *tbl;
+	struct wdat_wdt *wdat;
+	struct resource *res;
+	void __iomem **regs;
+	acpi_status status;
+	int i, ret;
+
+	status = acpi_get_table(ACPI_SIG_WDAT, 0,
+				(struct acpi_table_header **)&tbl);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	wdat = devm_kzalloc(&pdev->dev, sizeof(*wdat), GFP_KERNEL);
+	if (!wdat)
+		return -ENOMEM;
+
+	regs = devm_kcalloc(&pdev->dev, pdev->num_resources, sizeof(*regs),
+			    GFP_KERNEL);
+	if (!regs)
+		return -ENOMEM;
+
+	/* WDAT specification wants to have >= 1ms period */
+	if (tbl->timer_period < 1)
+		return -EINVAL;
+	if (tbl->min_count > tbl->max_count)
+		return -EINVAL;
+
+	wdat->period = tbl->timer_period;
+	wdat->wdd.min_hw_heartbeat_ms = wdat->period * tbl->min_count;
+	wdat->wdd.max_hw_heartbeat_ms = wdat->period * tbl->max_count;
+	wdat->stopped_in_sleep = tbl->flags & ACPI_WDAT_STOPPED;
+	wdat->wdd.info = &wdat_wdt_info;
+	wdat->wdd.ops = &wdat_wdt_ops;
+	wdat->pdev = pdev;
+
+	/* Request and map all resources */
+	for (i = 0; i < pdev->num_resources; i++) {
+		void __iomem *reg;
+
+		res = &pdev->resource[i];
+		if (resource_type(res) == IORESOURCE_MEM) {
+			reg = devm_ioremap_resource(&pdev->dev, res);
+			if (IS_ERR(reg))
+				return PTR_ERR(reg);
+		} else if (resource_type(res) == IORESOURCE_IO) {
+			reg = devm_ioport_map(&pdev->dev, res->start, 1);
+			if (!reg)
+				return -ENOMEM;
+		} else {
+			dev_err(&pdev->dev, "Unsupported resource\n");
+			return -EINVAL;
+		}
+
+		regs[i] = reg;
+	}
+
+	entries = (struct acpi_wdat_entry *)(tbl + 1);
+	for (i = 0; i < tbl->entries; i++) {
+		const struct acpi_generic_address *gas;
+		struct wdat_instruction *instr;
+		struct list_head *instructions;
+		unsigned int action;
+		struct resource r;
+		int j;
+
+		action = entries[i].action;
+		if (action >= MAX_WDAT_ACTIONS) {
+			dev_dbg(&pdev->dev, "Skipping unknown action: %u\n",
+				action);
+			continue;
+		}
+
+		instr = devm_kzalloc(&pdev->dev, sizeof(*instr), GFP_KERNEL);
+		if (!instr)
+			return -ENOMEM;
+
+		INIT_LIST_HEAD(&instr->node);
+		instr->entry = entries[i];
+
+		gas = &entries[i].register_region;
+
+		memset(&r, 0, sizeof(r));
+		r.start = gas->address;
+		r.end = r.start + gas->access_width;
+		if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+			r.flags = IORESOURCE_MEM;
+		} else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) {
+			r.flags = IORESOURCE_IO;
+		} else {
+			dev_dbg(&pdev->dev, "Unsupported address space: %d\n",
+				gas->space_id);
+			continue;
+		}
+
+		/* Find the matching resource */
+		for (j = 0; j < pdev->num_resources; j++) {
+			res = &pdev->resource[j];
+			if (resource_contains(res, &r)) {
+				instr->reg = regs[j] + r.start - res->start;
+				break;
+			}
+		}
+
+		if (!instr->reg) {
+			dev_err(&pdev->dev, "I/O resource not found\n");
+			return -EINVAL;
+		}
+
+		instructions = wdat->instructions[action];
+		if (!instructions) {
+			instructions = devm_kzalloc(&pdev->dev,
+					sizeof(*instructions), GFP_KERNEL);
+			if (!instructions)
+				return -ENOMEM;
+
+			INIT_LIST_HEAD(instructions);
+			wdat->instructions[action] = instructions;
+		}
+
+		list_add_tail(&instr->node, instructions);
+	}
+
+	wdat_wdt_boot_status(wdat);
+	wdat_wdt_set_running(wdat);
+
+	ret = wdat_wdt_enable_reboot(wdat);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, wdat);
+
+	watchdog_set_nowayout(&wdat->wdd, nowayout);
+	return devm_watchdog_register_device(&pdev->dev, &wdat->wdd);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int wdat_wdt_suspend_noirq(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct wdat_wdt *wdat = platform_get_drvdata(pdev);
+	int ret;
+
+	if (!watchdog_active(&wdat->wdd))
+		return 0;
+
+	/*
+	 * We need to stop the watchdog if firmare is not doing it or if we
+	 * are going suspend to idle (where firmware is not involved). If
+	 * firmware is stopping the watchdog we kick it here one more time
+	 * to give it some time.
+	 */
+	wdat->stopped = false;
+	if (acpi_target_system_state() == ACPI_STATE_S0 ||
+	    !wdat->stopped_in_sleep) {
+		ret = wdat_wdt_stop(&wdat->wdd);
+		if (!ret)
+			wdat->stopped = true;
+	} else {
+		ret = wdat_wdt_ping(&wdat->wdd);
+	}
+
+	return ret;
+}
+
+static int wdat_wdt_resume_noirq(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct wdat_wdt *wdat = platform_get_drvdata(pdev);
+	int ret;
+
+	if (!watchdog_active(&wdat->wdd))
+		return 0;
+
+	if (!wdat->stopped) {
+		/*
+		 * Looks like the boot firmware reinitializes the watchdog
+		 * before it hands off to the OS on resume from sleep so we
+		 * stop and reprogram the watchdog here.
+		 */
+		ret = wdat_wdt_stop(&wdat->wdd);
+		if (ret)
+			return ret;
+
+		ret = wdat_wdt_set_timeout(&wdat->wdd, wdat->wdd.timeout);
+		if (ret)
+			return ret;
+
+		ret = wdat_wdt_enable_reboot(wdat);
+		if (ret)
+			return ret;
+	}
+
+	return wdat_wdt_start(&wdat->wdd);
+}
+#endif
+
+static const struct dev_pm_ops wdat_wdt_pm_ops = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(wdat_wdt_suspend_noirq,
+				      wdat_wdt_resume_noirq)
+};
+
+static struct platform_driver wdat_wdt_driver = {
+	.probe = wdat_wdt_probe,
+	.driver = {
+		.name = "wdat_wdt",
+		.pm = &wdat_wdt_pm_ops,
+	},
+};
+
+module_platform_driver(wdat_wdt_driver);
+
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
+MODULE_DESCRIPTION("ACPI Hardware Watchdog (WDAT) driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:wdat_wdt");
diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index 7487971..c1010f01 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -316,7 +316,7 @@
 			rc = -ENOMEM;
 			goto out;
 		}
-	} else {
+	} else if (msg_type == XS_TRANSACTION_END) {
 		list_for_each_entry(trans, &u->transactions, list)
 			if (trans->handle.id == u->u.msg.tx_id)
 				break;
diff --git a/fs/Kconfig b/fs/Kconfig
index 2bc7ad7..3ef62ba 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -79,6 +79,7 @@
 config FILE_LOCKING
 	bool "Enable POSIX file locking API" if EXPERT
 	default y
+	select PERCPU_RWSEM
 	help
 	  This option enables standard file locking support, required
           for filesystems like NFS and for the flock() system
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 7ef637d..1e9d2f8 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -461,8 +461,8 @@
  */
 int __init afs_callback_update_init(void)
 {
-	afs_callback_update_worker =
-		create_singlethread_workqueue("kafs_callbackd");
+	afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd",
+							     WQ_MEM_RECLAIM);
 	return afs_callback_update_worker ? 0 : -ENOMEM;
 }
 
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4b0eff6..2037e7a 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -17,19 +17,12 @@
 #include "internal.h"
 #include "afs_cm.h"
 
-#if 0
-struct workqueue_struct *afs_cm_workqueue;
-#endif  /*  0  */
-
-static int afs_deliver_cb_init_call_back_state(struct afs_call *,
-					       struct sk_buff *, bool);
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *,
-						struct sk_buff *, bool);
-static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool);
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *,
-						 struct sk_buff *, bool);
+static int afs_deliver_cb_init_call_back_state(struct afs_call *);
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
+static int afs_deliver_cb_probe(struct afs_call *);
+static int afs_deliver_cb_callback(struct afs_call *);
+static int afs_deliver_cb_probe_uuid(struct afs_call *);
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *);
 static void afs_cm_destructor(struct afs_call *);
 
 /*
@@ -134,7 +127,7 @@
 	 * received.  The step number here must match the final number in
 	 * afs_deliver_cb_callback().
 	 */
-	if (call->unmarshall == 6) {
+	if (call->unmarshall == 5) {
 		ASSERT(call->server && call->count && call->request);
 		afs_break_callbacks(call->server, call->count, call->request);
 	}
@@ -168,32 +161,29 @@
 /*
  * deliver request data to a CB.CallBack call
  */
-static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb,
-				   bool last)
+static int afs_deliver_cb_callback(struct afs_call *call)
 {
+	struct sockaddr_rxrpc srx;
 	struct afs_callback *cb;
 	struct afs_server *server;
-	struct in_addr addr;
 	__be32 *bp;
 	u32 tmp;
 	int ret, loop;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
+		rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
 		call->offset = 0;
 		call->unmarshall++;
 
 		/* extract the FID array and its count in two steps */
 	case 1:
 		_debug("extract FID count");
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("FID count: %u", call->count);
@@ -208,13 +198,10 @@
 
 	case 2:
 		_debug("extract FID array");
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       call->count * 3 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, call->buffer,
+				       call->count * 3 * 4, true);
+		if (ret < 0)
+			return ret;
 
 		_debug("unmarshall FID array");
 		call->request = kcalloc(call->count,
@@ -238,12 +225,9 @@
 		/* extract the callback array and its count in two steps */
 	case 3:
 		_debug("extract CB count");
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
 
 		tmp = ntohl(call->tmp);
 		_debug("CB count: %u", tmp);
@@ -251,18 +235,13 @@
 			return -EBADMSG;
 		call->offset = 0;
 		call->unmarshall++;
-		if (tmp == 0)
-			goto empty_cb_array;
 
 	case 4:
 		_debug("extract CB array");
-		ret = afs_extract_data(call, skb, last, call->request,
-				       call->count * 3 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, call->buffer,
+				       call->count * 3 * 4, false);
+		if (ret < 0)
+			return ret;
 
 		_debug("unmarshall CB array");
 		cb = call->request;
@@ -273,15 +252,9 @@
 			cb->type	= ntohl(*bp++);
 		}
 
-	empty_cb_array:
 		call->offset = 0;
 		call->unmarshall++;
 
-	case 5:
-		_debug("trailer");
-		if (skb->len != 0)
-			return -EBADMSG;
-
 		/* Record that the message was unmarshalled successfully so
 		 * that the call destructor can know do the callback breaking
 		 * work, even if the final ACK isn't received.
@@ -290,19 +263,15 @@
 		 * updated also.
 		 */
 		call->unmarshall++;
-	case 6:
+	case 5:
 		break;
 	}
 
-	if (!last)
-		return 0;
-
 	call->state = AFS_CALL_REPLYING;
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
-	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-	server = afs_find_server(&addr);
+	server = afs_find_server(&srx);
 	if (!server)
 		return -ENOTCONN;
 	call->server = server;
@@ -329,27 +298,26 @@
 /*
  * deliver request data to a CB.InitCallBackState call
  */
-static int afs_deliver_cb_init_call_back_state(struct afs_call *call,
-					       struct sk_buff *skb,
-					       bool last)
+static int afs_deliver_cb_init_call_back_state(struct afs_call *call)
 {
+	struct sockaddr_rxrpc srx;
 	struct afs_server *server;
-	struct in_addr addr;
+	int ret;
 
-	_enter(",{%u},%d", skb->len, last);
+	_enter("");
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+
+	ret = afs_extract_data(call, NULL, 0, false);
+	if (ret < 0)
+		return ret;
 
 	/* no unmarshalling required */
 	call->state = AFS_CALL_REPLYING;
 
 	/* we'll need the file server record as that tells us which set of
 	 * vnodes to operate upon */
-	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-	server = afs_find_server(&addr);
+	server = afs_find_server(&srx);
 	if (!server)
 		return -ENOTCONN;
 	call->server = server;
@@ -362,108 +330,20 @@
 /*
  * deliver request data to a CB.InitCallBackState3 call
  */
-static int afs_deliver_cb_init_call_back_state3(struct afs_call *call,
-						struct sk_buff *skb,
-						bool last)
+static int afs_deliver_cb_init_call_back_state3(struct afs_call *call)
 {
+	struct sockaddr_rxrpc srx;
 	struct afs_server *server;
-	struct in_addr addr;
-
-	_enter(",{%u},%d", skb->len, last);
-
-	if (!last)
-		return 0;
-
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
-
-	/* we'll need the file server record as that tells us which set of
-	 * vnodes to operate upon */
-	memcpy(&addr, &ip_hdr(skb)->saddr, 4);
-	server = afs_find_server(&addr);
-	if (!server)
-		return -ENOTCONN;
-	call->server = server;
-
-	INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
-	queue_work(afs_wq, &call->work);
-	return 0;
-}
-
-/*
- * allow the fileserver to see if the cache manager is still alive
- */
-static void SRXAFSCB_Probe(struct work_struct *work)
-{
-	struct afs_call *call = container_of(work, struct afs_call, work);
-
-	_enter("");
-	afs_send_empty_reply(call);
-	_leave("");
-}
-
-/*
- * deliver request data to a CB.Probe call
- */
-static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb,
-				bool last)
-{
-	_enter(",{%u},%d", skb->len, last);
-
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
-
-	/* no unmarshalling required */
-	call->state = AFS_CALL_REPLYING;
-
-	INIT_WORK(&call->work, SRXAFSCB_Probe);
-	queue_work(afs_wq, &call->work);
-	return 0;
-}
-
-/*
- * allow the fileserver to quickly find out if the fileserver has been rebooted
- */
-static void SRXAFSCB_ProbeUuid(struct work_struct *work)
-{
-	struct afs_call *call = container_of(work, struct afs_call, work);
-	struct afs_uuid *r = call->request;
-
-	struct {
-		__be32	match;
-	} reply;
-
-	_enter("");
-
-
-	if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
-		reply.match = htonl(0);
-	else
-		reply.match = htonl(1);
-
-	afs_send_simple_reply(call, &reply, sizeof(reply));
-	_leave("");
-}
-
-/*
- * deliver request data to a CB.ProbeUuid call
- */
-static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb,
-				     bool last)
-{
 	struct afs_uuid *r;
 	unsigned loop;
 	__be32 *b;
 	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("");
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx);
+
+	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
@@ -475,8 +355,8 @@
 
 	case 1:
 		_debug("extract UUID");
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       11 * sizeof(__be32));
+		ret = afs_extract_data(call, call->buffer,
+				       11 * sizeof(__be32), false);
 		switch (ret) {
 		case 0:		break;
 		case -EAGAIN:	return 0;
@@ -503,14 +383,132 @@
 		call->unmarshall++;
 
 	case 2:
-		_debug("trailer");
-		if (skb->len != 0)
-			return -EBADMSG;
 		break;
 	}
 
-	if (!last)
-		return 0;
+	/* no unmarshalling required */
+	call->state = AFS_CALL_REPLYING;
+
+	/* we'll need the file server record as that tells us which set of
+	 * vnodes to operate upon */
+	server = afs_find_server(&srx);
+	if (!server)
+		return -ENOTCONN;
+	call->server = server;
+
+	INIT_WORK(&call->work, SRXAFSCB_InitCallBackState);
+	queue_work(afs_wq, &call->work);
+	return 0;
+}
+
+/*
+ * allow the fileserver to see if the cache manager is still alive
+ */
+static void SRXAFSCB_Probe(struct work_struct *work)
+{
+	struct afs_call *call = container_of(work, struct afs_call, work);
+
+	_enter("");
+	afs_send_empty_reply(call);
+	_leave("");
+}
+
+/*
+ * deliver request data to a CB.Probe call
+ */
+static int afs_deliver_cb_probe(struct afs_call *call)
+{
+	int ret;
+
+	_enter("");
+
+	ret = afs_extract_data(call, NULL, 0, false);
+	if (ret < 0)
+		return ret;
+
+	/* no unmarshalling required */
+	call->state = AFS_CALL_REPLYING;
+
+	INIT_WORK(&call->work, SRXAFSCB_Probe);
+	queue_work(afs_wq, &call->work);
+	return 0;
+}
+
+/*
+ * allow the fileserver to quickly find out if the fileserver has been rebooted
+ */
+static void SRXAFSCB_ProbeUuid(struct work_struct *work)
+{
+	struct afs_call *call = container_of(work, struct afs_call, work);
+	struct afs_uuid *r = call->request;
+
+	struct {
+		__be32	match;
+	} reply;
+
+	_enter("");
+
+	if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0)
+		reply.match = htonl(0);
+	else
+		reply.match = htonl(1);
+
+	afs_send_simple_reply(call, &reply, sizeof(reply));
+	_leave("");
+}
+
+/*
+ * deliver request data to a CB.ProbeUuid call
+ */
+static int afs_deliver_cb_probe_uuid(struct afs_call *call)
+{
+	struct afs_uuid *r;
+	unsigned loop;
+	__be32 *b;
+	int ret;
+
+	_enter("{%u}", call->unmarshall);
+
+	switch (call->unmarshall) {
+	case 0:
+		call->offset = 0;
+		call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL);
+		if (!call->buffer)
+			return -ENOMEM;
+		call->unmarshall++;
+
+	case 1:
+		_debug("extract UUID");
+		ret = afs_extract_data(call, call->buffer,
+				       11 * sizeof(__be32), false);
+		switch (ret) {
+		case 0:		break;
+		case -EAGAIN:	return 0;
+		default:	return ret;
+		}
+
+		_debug("unmarshall UUID");
+		call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL);
+		if (!call->request)
+			return -ENOMEM;
+
+		b = call->buffer;
+		r = call->request;
+		r->time_low			= ntohl(b[0]);
+		r->time_mid			= ntohl(b[1]);
+		r->time_hi_and_version		= ntohl(b[2]);
+		r->clock_seq_hi_and_reserved 	= ntohl(b[3]);
+		r->clock_seq_low		= ntohl(b[4]);
+
+		for (loop = 0; loop < 6; loop++)
+			r->node[loop] = ntohl(b[loop + 5]);
+
+		call->offset = 0;
+		call->unmarshall++;
+
+	case 2:
+		break;
+	}
 
 	call->state = AFS_CALL_REPLYING;
 
@@ -585,15 +583,15 @@
 /*
  * deliver request data to a CB.TellMeAboutYourself call
  */
-static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call,
-						 struct sk_buff *skb, bool last)
+static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call)
 {
-	_enter(",{%u},%d", skb->len, last);
+	int ret;
 
-	if (skb->len > 0)
-		return -EBADMSG;
-	if (!last)
-		return 0;
+	_enter("");
+
+	ret = afs_extract_data(call, NULL, 0, false);
+	if (ret < 0)
+		return ret;
 
 	/* no unmarshalling required */
 	call->state = AFS_CALL_REPLYING;
diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index d91a9c9..3191dff 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -36,8 +36,8 @@
 	if (!afs_lock_manager) {
 		mutex_lock(&afs_lock_manager_mutex);
 		if (!afs_lock_manager) {
-			afs_lock_manager =
-				create_singlethread_workqueue("kafs_lockd");
+			afs_lock_manager = alloc_workqueue("kafs_lockd",
+							   WQ_MEM_RECLAIM, 0);
 			if (!afs_lock_manager)
 				ret = -ENOMEM;
 		}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index c2e930e..96f4d76 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -235,20 +235,17 @@
 /*
  * deliver reply data to an FS.FetchStatus
  */
-static int afs_deliver_fs_fetch_status(struct afs_call *call,
-				       struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_status(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
-	_enter(",,%u", last);
+	_enter("");
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -309,8 +306,7 @@
 /*
  * deliver reply data to an FS.FetchData
  */
-static int afs_deliver_fs_fetch_data(struct afs_call *call,
-				     struct sk_buff *skb, bool last)
+static int afs_deliver_fs_fetch_data(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
@@ -318,7 +314,7 @@
 	void *buffer;
 	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
@@ -334,12 +330,9 @@
 		 * client) */
 	case 1:
 		_debug("extract data length (MSW)");
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("DATA length MSW: %u", call->count);
@@ -352,12 +345,9 @@
 		/* extract the returned data length */
 	case 2:
 		_debug("extract data length");
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("DATA length: %u", call->count);
@@ -371,15 +361,12 @@
 		_debug("extract data");
 		if (call->count > 0) {
 			page = call->reply3;
-			buffer = kmap_atomic(page);
-			ret = afs_extract_data(call, skb, last, buffer,
-					       call->count);
-			kunmap_atomic(buffer);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
+			buffer = kmap(page);
+			ret = afs_extract_data(call, buffer,
+					       call->count, true);
+			kunmap(buffer);
+			if (ret < 0)
+				return ret;
 		}
 
 		call->offset = 0;
@@ -387,13 +374,10 @@
 
 		/* extract the metadata */
 	case 4:
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       (21 + 3 + 6) * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, call->buffer,
+				       (21 + 3 + 6) * 4, false);
+		if (ret < 0)
+			return ret;
 
 		bp = call->buffer;
 		xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
@@ -405,21 +389,15 @@
 		call->unmarshall++;
 
 	case 5:
-		_debug("trailer");
-		if (skb->len != 0)
-			return -EBADMSG;
 		break;
 	}
 
-	if (!last)
-		return 0;
-
 	if (call->count < PAGE_SIZE) {
 		_debug("clear");
 		page = call->reply3;
-		buffer = kmap_atomic(page);
+		buffer = kmap(page);
 		memset(buffer + call->count, 0, PAGE_SIZE - call->count);
-		kunmap_atomic(buffer);
+		kunmap(buffer);
 	}
 
 	_leave(" = 0 [done]");
@@ -532,14 +510,12 @@
 /*
  * deliver reply data to an FS.GiveUpCallBacks
  */
-static int afs_deliver_fs_give_up_callbacks(struct afs_call *call,
-					    struct sk_buff *skb, bool last)
+static int afs_deliver_fs_give_up_callbacks(struct afs_call *call)
 {
-	_enter(",{%u},%d", skb->len, last);
+	_enter("");
 
-	if (skb->len > 0)
-		return -EBADMSG; /* shouldn't be any reply data */
-	return 0;
+	/* shouldn't be any reply data */
+	return afs_extract_data(call, NULL, 0, false);
 }
 
 /*
@@ -617,20 +593,17 @@
 /*
  * deliver reply data to an FS.CreateFile or an FS.MakeDir
  */
-static int afs_deliver_fs_create_vnode(struct afs_call *call,
-				       struct sk_buff *skb, bool last)
+static int afs_deliver_fs_create_vnode(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -716,20 +689,17 @@
 /*
  * deliver reply data to an FS.RemoveFile or FS.RemoveDir
  */
-static int afs_deliver_fs_remove(struct afs_call *call,
-				 struct sk_buff *skb, bool last)
+static int afs_deliver_fs_remove(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -799,20 +769,17 @@
 /*
  * deliver reply data to an FS.Link
  */
-static int afs_deliver_fs_link(struct afs_call *call,
-			       struct sk_buff *skb, bool last)
+static int afs_deliver_fs_link(struct afs_call *call)
 {
 	struct afs_vnode *dvnode = call->reply, *vnode = call->reply2;
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -887,20 +854,17 @@
 /*
  * deliver reply data to an FS.Symlink
  */
-static int afs_deliver_fs_symlink(struct afs_call *call,
-				  struct sk_buff *skb, bool last)
+static int afs_deliver_fs_symlink(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -994,20 +958,17 @@
 /*
  * deliver reply data to an FS.Rename
  */
-static int afs_deliver_fs_rename(struct afs_call *call,
-				  struct sk_buff *skb, bool last)
+static int afs_deliver_fs_rename(struct afs_call *call)
 {
 	struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2;
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -1100,25 +1061,17 @@
 /*
  * deliver reply data to an FS.StoreData
  */
-static int afs_deliver_fs_store_data(struct afs_call *call,
-				     struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_data(struct afs_call *call)
 {
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
-	_enter(",,%u", last);
+	_enter("");
 
-	afs_transfer_reply(call, skb);
-	if (!last) {
-		_leave(" = 0 [more]");
-		return 0;
-	}
-
-	if (call->reply_size != call->reply_max) {
-		_leave(" = -EBADMSG [%u != %u]",
-		       call->reply_size, call->reply_max);
-		return -EBADMSG;
-	}
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
@@ -1286,26 +1239,18 @@
 /*
  * deliver reply data to an FS.StoreStatus
  */
-static int afs_deliver_fs_store_status(struct afs_call *call,
-				       struct sk_buff *skb, bool last)
+static int afs_deliver_fs_store_status(struct afs_call *call)
 {
 	afs_dataversion_t *store_version;
 	struct afs_vnode *vnode = call->reply;
 	const __be32 *bp;
+	int ret;
 
-	_enter(",,%u", last);
+	_enter("");
 
-	afs_transfer_reply(call, skb);
-	if (!last) {
-		_leave(" = 0 [more]");
-		return 0;
-	}
-
-	if (call->reply_size != call->reply_max) {
-		_leave(" = -EBADMSG [%u != %u]",
-		       call->reply_size, call->reply_max);
-		return -EBADMSG;
-	}
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	store_version = NULL;
@@ -1485,14 +1430,13 @@
 /*
  * deliver reply data to an FS.GetVolumeStatus
  */
-static int afs_deliver_fs_get_volume_status(struct afs_call *call,
-					    struct sk_buff *skb, bool last)
+static int afs_deliver_fs_get_volume_status(struct afs_call *call)
 {
 	const __be32 *bp;
 	char *p;
 	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
 	switch (call->unmarshall) {
 	case 0:
@@ -1502,13 +1446,10 @@
 		/* extract the returned status record */
 	case 1:
 		_debug("extract status");
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       12 * 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, call->buffer,
+				       12 * 4, true);
+		if (ret < 0)
+			return ret;
 
 		bp = call->buffer;
 		xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2);
@@ -1517,12 +1458,9 @@
 
 		/* extract the volume name length */
 	case 2:
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("volname length: %u", call->count);
@@ -1535,13 +1473,10 @@
 	case 3:
 		_debug("extract volname");
 		if (call->count > 0) {
-			ret = afs_extract_data(call, skb, last, call->reply3,
-					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
+			ret = afs_extract_data(call, call->reply3,
+					       call->count, true);
+			if (ret < 0)
+				return ret;
 		}
 
 		p = call->reply3;
@@ -1559,13 +1494,10 @@
 		call->count = 4 - (call->count & 3);
 
 	case 4:
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, call->buffer,
+				       call->count, true);
+		if (ret < 0)
+			return ret;
 
 		call->offset = 0;
 		call->unmarshall++;
@@ -1573,12 +1505,9 @@
 
 		/* extract the offline message length */
 	case 5:
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("offline msg length: %u", call->count);
@@ -1591,13 +1520,10 @@
 	case 6:
 		_debug("extract offline");
 		if (call->count > 0) {
-			ret = afs_extract_data(call, skb, last, call->reply3,
-					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
+			ret = afs_extract_data(call, call->reply3,
+					       call->count, true);
+			if (ret < 0)
+				return ret;
 		}
 
 		p = call->reply3;
@@ -1615,13 +1541,10 @@
 		call->count = 4 - (call->count & 3);
 
 	case 7:
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, call->buffer,
+				       call->count, true);
+		if (ret < 0)
+			return ret;
 
 		call->offset = 0;
 		call->unmarshall++;
@@ -1629,12 +1552,9 @@
 
 		/* extract the message of the day length */
 	case 8:
-		ret = afs_extract_data(call, skb, last, &call->tmp, 4);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, &call->tmp, 4, true);
+		if (ret < 0)
+			return ret;
 
 		call->count = ntohl(call->tmp);
 		_debug("motd length: %u", call->count);
@@ -1647,13 +1567,10 @@
 	case 9:
 		_debug("extract motd");
 		if (call->count > 0) {
-			ret = afs_extract_data(call, skb, last, call->reply3,
-					       call->count);
-			switch (ret) {
-			case 0:		break;
-			case -EAGAIN:	return 0;
-			default:	return ret;
-			}
+			ret = afs_extract_data(call, call->reply3,
+					       call->count, true);
+			if (ret < 0)
+				return ret;
 		}
 
 		p = call->reply3;
@@ -1664,35 +1581,20 @@
 		call->unmarshall++;
 
 		/* extract the message of the day padding */
-		if ((call->count & 3) == 0) {
-			call->unmarshall++;
-			goto no_motd_padding;
-		}
-		call->count = 4 - (call->count & 3);
+		call->count = (4 - (call->count & 3)) & 3;
 
 	case 10:
-		ret = afs_extract_data(call, skb, last, call->buffer,
-				       call->count);
-		switch (ret) {
-		case 0:		break;
-		case -EAGAIN:	return 0;
-		default:	return ret;
-		}
+		ret = afs_extract_data(call, call->buffer,
+				       call->count, false);
+		if (ret < 0)
+			return ret;
 
 		call->offset = 0;
 		call->unmarshall++;
-	no_motd_padding:
-
 	case 11:
-		_debug("trailer %d", skb->len);
-		if (skb->len != 0)
-			return -EBADMSG;
 		break;
 	}
 
-	if (!last)
-		return 0;
-
 	_leave(" = 0 [done]");
 	return 0;
 }
@@ -1760,19 +1662,16 @@
 /*
  * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock
  */
-static int afs_deliver_fs_xxxx_lock(struct afs_call *call,
-				    struct sk_buff *skb, bool last)
+static int afs_deliver_fs_xxxx_lock(struct afs_call *call)
 {
 	const __be32 *bp;
+	int ret;
 
-	_enter("{%u},{%u},%d", call->unmarshall, skb->len, last);
+	_enter("{%u}", call->unmarshall);
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	bp = call->buffer;
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 71d5982..5497c84 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -13,13 +13,13 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
-#include <linux/skbuff.h>
 #include <linux/rxrpc.h>
 #include <linux/key.h>
 #include <linux/workqueue.h>
 #include <linux/sched.h>
 #include <linux/fscache.h>
 #include <linux/backing-dev.h>
+#include <net/af_rxrpc.h>
 
 #include "afs.h"
 #include "afs_vl.h"
@@ -56,7 +56,7 @@
  */
 struct afs_wait_mode {
 	/* RxRPC received message notification */
-	void (*rx_wakeup)(struct afs_call *call);
+	rxrpc_notify_rx_t notify_rx;
 
 	/* synchronous call waiter and call dispatched notification */
 	int (*wait)(struct afs_call *call);
@@ -75,10 +75,8 @@
 	const struct afs_call_type *type;	/* type of call */
 	const struct afs_wait_mode *wait_mode;	/* completion wait mode */
 	wait_queue_head_t	waitq;		/* processes awaiting completion */
-	void (*async_workfn)(struct afs_call *call); /* asynchronous work function */
 	struct work_struct	async_work;	/* asynchronous work processor */
 	struct work_struct	work;		/* actual work processor */
-	struct sk_buff_head	rx_queue;	/* received packets */
 	struct rxrpc_call	*rxcall;	/* RxRPC call handle */
 	struct key		*key;		/* security for this call */
 	struct afs_server	*server;	/* server affected by incoming CM call */
@@ -92,6 +90,7 @@
 	void			*reply4;	/* reply buffer (fourth part) */
 	pgoff_t			first;		/* first page in mapping to deal with */
 	pgoff_t			last;		/* last page in mapping to deal with */
+	size_t			offset;		/* offset into received data store */
 	enum {					/* call state */
 		AFS_CALL_REQUESTING,	/* request is being sent for outgoing call */
 		AFS_CALL_AWAIT_REPLY,	/* awaiting reply to outgoing call */
@@ -99,21 +98,18 @@
 		AFS_CALL_AWAIT_REQUEST,	/* awaiting request data on incoming call */
 		AFS_CALL_REPLYING,	/* replying to incoming call */
 		AFS_CALL_AWAIT_ACK,	/* awaiting final ACK of incoming call */
-		AFS_CALL_COMPLETE,	/* successfully completed */
-		AFS_CALL_BUSY,		/* server was busy */
-		AFS_CALL_ABORTED,	/* call was aborted */
-		AFS_CALL_ERROR,		/* call failed due to error */
+		AFS_CALL_COMPLETE,	/* Completed or failed */
 	}			state;
 	int			error;		/* error code */
+	u32			abort_code;	/* Remote abort ID or 0 */
 	unsigned		request_size;	/* size of request data */
 	unsigned		reply_max;	/* maximum size of reply */
-	unsigned		reply_size;	/* current size of reply */
 	unsigned		first_offset;	/* offset into mapping[first] */
 	unsigned		last_to;	/* amount of mapping[last] */
-	unsigned		offset;		/* offset into received data store */
 	unsigned char		unmarshall;	/* unmarshalling phase */
 	bool			incoming;	/* T if incoming call */
 	bool			send_pages;	/* T if data from mapping should be sent */
+	bool			need_attention;	/* T if RxRPC poked us */
 	u16			service_id;	/* RxRPC service ID to call */
 	__be16			port;		/* target UDP port */
 	__be32			operation_ID;	/* operation ID for an incoming call */
@@ -128,8 +124,7 @@
 	/* deliver request or reply data to an call
 	 * - returning an error will cause the call to be aborted
 	 */
-	int (*deliver)(struct afs_call *call, struct sk_buff *skb,
-		       bool last);
+	int (*deliver)(struct afs_call *call);
 
 	/* map an abort code to an error number */
 	int (*abort_to_error)(u32 abort_code);
@@ -607,6 +602,8 @@
 /*
  * rxrpc.c
  */
+extern struct socket *afs_socket;
+
 extern int afs_open_socket(void);
 extern void afs_close_socket(void);
 extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t,
@@ -614,11 +611,14 @@
 extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *,
 					    size_t, size_t);
 extern void afs_flat_call_destructor(struct afs_call *);
-extern void afs_transfer_reply(struct afs_call *, struct sk_buff *);
 extern void afs_send_empty_reply(struct afs_call *);
 extern void afs_send_simple_reply(struct afs_call *, const void *, size_t);
-extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
-			    size_t);
+extern int afs_extract_data(struct afs_call *, void *, size_t, bool);
+
+static inline int afs_transfer_reply(struct afs_call *call)
+{
+	return afs_extract_data(call, call->buffer, call->reply_max, false);
+}
 
 /*
  * security.c
@@ -642,7 +642,7 @@
 
 extern struct afs_server *afs_lookup_server(struct afs_cell *,
 					    const struct in_addr *);
-extern struct afs_server *afs_find_server(const struct in_addr *);
+extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *);
 extern void afs_put_server(struct afs_server *);
 extern void __exit afs_purge_servers(void);
 
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 35de0c0..0b187ef 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/completion.h>
 #include <linux/sched.h>
+#include <linux/random.h>
 #include "internal.h"
 
 MODULE_DESCRIPTION("AFS Client File System");
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 4832de84..59bdaa7 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -16,34 +16,36 @@
 #include "internal.h"
 #include "afs_cm.h"
 
-static struct socket *afs_socket; /* my RxRPC socket */
+struct socket *afs_socket; /* my RxRPC socket */
 static struct workqueue_struct *afs_async_calls;
+static struct afs_call *afs_spare_incoming_call;
 static atomic_t afs_outstanding_calls;
-static atomic_t afs_outstanding_skbs;
 
-static void afs_wake_up_call_waiter(struct afs_call *);
+static void afs_free_call(struct afs_call *);
+static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long);
 static int afs_wait_for_call_to_complete(struct afs_call *);
-static void afs_wake_up_async_call(struct afs_call *);
+static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long);
 static int afs_dont_wait_for_call_to_complete(struct afs_call *);
-static void afs_process_async_call(struct afs_call *);
-static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *);
-static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool);
+static void afs_process_async_call(struct work_struct *);
+static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
+static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static int afs_deliver_cm_op_id(struct afs_call *);
 
 /* synchronous call management */
 const struct afs_wait_mode afs_sync_call = {
-	.rx_wakeup	= afs_wake_up_call_waiter,
+	.notify_rx	= afs_wake_up_call_waiter,
 	.wait		= afs_wait_for_call_to_complete,
 };
 
 /* asynchronous call management */
 const struct afs_wait_mode afs_async_call = {
-	.rx_wakeup	= afs_wake_up_async_call,
+	.notify_rx	= afs_wake_up_async_call,
 	.wait		= afs_dont_wait_for_call_to_complete,
 };
 
 /* asynchronous incoming call management */
 static const struct afs_wait_mode afs_async_incoming_call = {
-	.rx_wakeup	= afs_wake_up_async_call,
+	.notify_rx	= afs_wake_up_async_call,
 };
 
 /* asynchronous incoming call initial processing */
@@ -53,17 +55,9 @@
 	.abort_to_error	= afs_abort_to_error,
 };
 
-static void afs_collect_incoming_call(struct work_struct *);
+static void afs_charge_preallocation(struct work_struct *);
 
-static struct sk_buff_head afs_incoming_calls;
-static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call);
-
-static void afs_async_workfn(struct work_struct *work)
-{
-	struct afs_call *call = container_of(work, struct afs_call, async_work);
-
-	call->async_workfn(call);
-}
+static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation);
 
 static int afs_wait_atomic_t(atomic_t *p)
 {
@@ -83,10 +77,8 @@
 
 	_enter("");
 
-	skb_queue_head_init(&afs_incoming_calls);
-
 	ret = -ENOMEM;
-	afs_async_calls = create_singlethread_workqueue("kafsd");
+	afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0);
 	if (!afs_async_calls)
 		goto error_0;
 
@@ -110,13 +102,15 @@
 	if (ret < 0)
 		goto error_2;
 
+	rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
+					   afs_rx_discard_new_call);
+
 	ret = kernel_listen(socket, INT_MAX);
 	if (ret < 0)
 		goto error_2;
 
-	rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor);
-
 	afs_socket = socket;
+	afs_charge_preallocation(NULL);
 	_leave(" = 0");
 	return 0;
 
@@ -136,55 +130,28 @@
 {
 	_enter("");
 
+	if (afs_spare_incoming_call) {
+		atomic_inc(&afs_outstanding_calls);
+		afs_free_call(afs_spare_incoming_call);
+		afs_spare_incoming_call = NULL;
+	}
+
+	_debug("outstanding %u", atomic_read(&afs_outstanding_calls));
 	wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t,
 			 TASK_UNINTERRUPTIBLE);
 	_debug("no outstanding calls");
 
+	flush_workqueue(afs_async_calls);
+	kernel_sock_shutdown(afs_socket, SHUT_RDWR);
+	flush_workqueue(afs_async_calls);
 	sock_release(afs_socket);
 
 	_debug("dework");
 	destroy_workqueue(afs_async_calls);
-
-	ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0);
 	_leave("");
 }
 
 /*
- * note that the data in a socket buffer is now delivered and that the buffer
- * should be freed
- */
-static void afs_data_delivered(struct sk_buff *skb)
-{
-	if (!skb) {
-		_debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs));
-		dump_stack();
-	} else {
-		_debug("DLVR %p{%u} [%d]",
-		       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-		if (atomic_dec_return(&afs_outstanding_skbs) == -1)
-			BUG();
-		rxrpc_kernel_data_delivered(skb);
-	}
-}
-
-/*
- * free a socket buffer
- */
-static void afs_free_skb(struct sk_buff *skb)
-{
-	if (!skb) {
-		_debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs));
-		dump_stack();
-	} else {
-		_debug("FREE %p{%u} [%d]",
-		       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-		if (atomic_dec_return(&afs_outstanding_skbs) == -1)
-			BUG();
-		rxrpc_kernel_free_skb(skb);
-	}
-}
-
-/*
  * free a call
  */
 static void afs_free_call(struct afs_call *call)
@@ -194,7 +161,6 @@
 
 	ASSERTCMP(call->rxcall, ==, NULL);
 	ASSERT(!work_pending(&call->async_work));
-	ASSERT(skb_queue_empty(&call->rx_queue));
 	ASSERT(call->type->name != NULL);
 
 	kfree(call->request);
@@ -210,7 +176,7 @@
 static void afs_end_call_nofree(struct afs_call *call)
 {
 	if (call->rxcall) {
-		rxrpc_kernel_end_call(call->rxcall);
+		rxrpc_kernel_end_call(afs_socket, call->rxcall);
 		call->rxcall = NULL;
 	}
 	if (call->type->destructor)
@@ -230,7 +196,7 @@
  * allocate a call with flat request and reply buffers
  */
 struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type,
-				     size_t request_size, size_t reply_size)
+				     size_t request_size, size_t reply_max)
 {
 	struct afs_call *call;
 
@@ -244,7 +210,7 @@
 
 	call->type = type;
 	call->request_size = request_size;
-	call->reply_max = reply_size;
+	call->reply_max = reply_max;
 
 	if (request_size) {
 		call->request = kmalloc(request_size, GFP_NOFS);
@@ -252,14 +218,13 @@
 			goto nomem_free;
 	}
 
-	if (reply_size) {
-		call->buffer = kmalloc(reply_size, GFP_NOFS);
+	if (reply_max) {
+		call->buffer = kmalloc(reply_max, GFP_NOFS);
 		if (!call->buffer)
 			goto nomem_free;
 	}
 
 	init_waitqueue_head(&call->waitq);
-	skb_queue_head_init(&call->rx_queue);
 	return call;
 
 nomem_free:
@@ -328,8 +293,8 @@
 			 * returns from sending the request */
 			if (first + loop >= last)
 				call->state = AFS_CALL_AWAIT_REPLY;
-			ret = rxrpc_kernel_send_data(call->rxcall, msg,
-						     to - offset);
+			ret = rxrpc_kernel_send_data(afs_socket, call->rxcall,
+						     msg, to - offset);
 			kunmap(pages[loop]);
 			if (ret < 0)
 				break;
@@ -357,7 +322,6 @@
 	struct msghdr msg;
 	struct kvec iov[1];
 	int ret;
-	struct sk_buff *skb;
 
 	_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
 
@@ -369,8 +333,7 @@
 	       atomic_read(&afs_outstanding_calls));
 
 	call->wait_mode = wait_mode;
-	call->async_workfn = afs_process_async_call;
-	INIT_WORK(&call->async_work, afs_async_workfn);
+	INIT_WORK(&call->async_work, afs_process_async_call);
 
 	memset(&srx, 0, sizeof(srx));
 	srx.srx_family = AF_RXRPC;
@@ -383,7 +346,8 @@
 
 	/* create a call */
 	rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key,
-					 (unsigned long) call, gfp);
+					 (unsigned long) call, gfp,
+					 wait_mode->notify_rx);
 	call->key = NULL;
 	if (IS_ERR(rxcall)) {
 		ret = PTR_ERR(rxcall);
@@ -409,7 +373,8 @@
 	 * request */
 	if (!call->send_pages)
 		call->state = AFS_CALL_AWAIT_REPLY;
-	ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
+	ret = rxrpc_kernel_send_data(afs_socket, rxcall,
+				     &msg, call->request_size);
 	if (ret < 0)
 		goto error_do_abort;
 
@@ -424,9 +389,7 @@
 	return wait_mode->wait(call);
 
 error_do_abort:
-	rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
-	while ((skb = skb_dequeue(&call->rx_queue)))
-		afs_free_skb(skb);
+	rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD");
 error_kill_call:
 	afs_end_call(call);
 	_leave(" = %d", ret);
@@ -434,135 +397,77 @@
 }
 
 /*
- * Handles intercepted messages that were arriving in the socket's Rx queue.
- *
- * Called from the AF_RXRPC call processor in waitqueue process context.  For
- * each call, it is guaranteed this will be called in order of packet to be
- * delivered.
- */
-static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID,
-			       struct sk_buff *skb)
-{
-	struct afs_call *call = (struct afs_call *) user_call_ID;
-
-	_enter("%p,,%u", call, skb->mark);
-
-	_debug("ICPT %p{%u} [%d]",
-	       skb, skb->mark, atomic_read(&afs_outstanding_skbs));
-
-	ASSERTCMP(sk, ==, afs_socket->sk);
-	atomic_inc(&afs_outstanding_skbs);
-
-	if (!call) {
-		/* its an incoming call for our callback service */
-		skb_queue_tail(&afs_incoming_calls, skb);
-		queue_work(afs_wq, &afs_collect_incoming_call_work);
-	} else {
-		/* route the messages directly to the appropriate call */
-		skb_queue_tail(&call->rx_queue, skb);
-		call->wait_mode->rx_wakeup(call);
-	}
-
-	_leave("");
-}
-
-/*
  * deliver messages to a call
  */
 static void afs_deliver_to_call(struct afs_call *call)
 {
-	struct sk_buff *skb;
-	bool last;
 	u32 abort_code;
 	int ret;
 
-	_enter("");
+	_enter("%s", call->type->name);
 
-	while ((call->state == AFS_CALL_AWAIT_REPLY ||
-		call->state == AFS_CALL_AWAIT_OP_ID ||
-		call->state == AFS_CALL_AWAIT_REQUEST ||
-		call->state == AFS_CALL_AWAIT_ACK) &&
-	       (skb = skb_dequeue(&call->rx_queue))) {
-		switch (skb->mark) {
-		case RXRPC_SKB_MARK_DATA:
-			_debug("Rcv DATA");
-			last = rxrpc_kernel_is_data_last(skb);
-			ret = call->type->deliver(call, skb, last);
-			switch (ret) {
-			case 0:
-				if (last &&
-				    call->state == AFS_CALL_AWAIT_REPLY)
-					call->state = AFS_CALL_COMPLETE;
-				break;
-			case -ENOTCONN:
-				abort_code = RX_CALL_DEAD;
-				goto do_abort;
-			case -ENOTSUPP:
-				abort_code = RX_INVALID_OPERATION;
-				goto do_abort;
-			default:
-				abort_code = RXGEN_CC_UNMARSHAL;
-				if (call->state != AFS_CALL_AWAIT_REPLY)
-					abort_code = RXGEN_SS_UNMARSHAL;
-			do_abort:
-				rxrpc_kernel_abort_call(call->rxcall,
-							abort_code);
-				call->error = ret;
-				call->state = AFS_CALL_ERROR;
-				break;
+	while (call->state == AFS_CALL_AWAIT_REPLY ||
+	       call->state == AFS_CALL_AWAIT_OP_ID ||
+	       call->state == AFS_CALL_AWAIT_REQUEST ||
+	       call->state == AFS_CALL_AWAIT_ACK
+	       ) {
+		if (call->state == AFS_CALL_AWAIT_ACK) {
+			size_t offset = 0;
+			ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+						     NULL, 0, &offset, false,
+						     &call->abort_code);
+			if (ret == -EINPROGRESS || ret == -EAGAIN)
+				return;
+			if (ret == 1) {
+				call->state = AFS_CALL_COMPLETE;
+				goto done;
 			}
-			afs_data_delivered(skb);
-			skb = NULL;
-			continue;
-		case RXRPC_SKB_MARK_FINAL_ACK:
-			_debug("Rcv ACK");
-			call->state = AFS_CALL_COMPLETE;
-			break;
-		case RXRPC_SKB_MARK_BUSY:
-			_debug("Rcv BUSY");
-			call->error = -EBUSY;
-			call->state = AFS_CALL_BUSY;
-			break;
-		case RXRPC_SKB_MARK_REMOTE_ABORT:
-			abort_code = rxrpc_kernel_get_abort_code(skb);
-			call->error = call->type->abort_to_error(abort_code);
-			call->state = AFS_CALL_ABORTED;
-			_debug("Rcv ABORT %u -> %d", abort_code, call->error);
-			break;
-		case RXRPC_SKB_MARK_LOCAL_ABORT:
-			abort_code = rxrpc_kernel_get_abort_code(skb);
-			call->error = call->type->abort_to_error(abort_code);
-			call->state = AFS_CALL_ABORTED;
-			_debug("Loc ABORT %u -> %d", abort_code, call->error);
-			break;
-		case RXRPC_SKB_MARK_NET_ERROR:
-			call->error = -rxrpc_kernel_get_error_number(skb);
-			call->state = AFS_CALL_ERROR;
-			_debug("Rcv NET ERROR %d", call->error);
-			break;
-		case RXRPC_SKB_MARK_LOCAL_ERROR:
-			call->error = -rxrpc_kernel_get_error_number(skb);
-			call->state = AFS_CALL_ERROR;
-			_debug("Rcv LOCAL ERROR %d", call->error);
-			break;
-		default:
-			BUG();
-			break;
+			return;
 		}
 
-		afs_free_skb(skb);
+		ret = call->type->deliver(call);
+		switch (ret) {
+		case 0:
+			if (call->state == AFS_CALL_AWAIT_REPLY)
+				call->state = AFS_CALL_COMPLETE;
+			goto done;
+		case -EINPROGRESS:
+		case -EAGAIN:
+			goto out;
+		case -ENOTCONN:
+			abort_code = RX_CALL_DEAD;
+			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+						abort_code, -ret, "KNC");
+			goto do_abort;
+		case -ENOTSUPP:
+			abort_code = RX_INVALID_OPERATION;
+			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+						abort_code, -ret, "KIV");
+			goto do_abort;
+		case -ENODATA:
+		case -EBADMSG:
+		case -EMSGSIZE:
+		default:
+			abort_code = RXGEN_CC_UNMARSHAL;
+			if (call->state != AFS_CALL_AWAIT_REPLY)
+				abort_code = RXGEN_SS_UNMARSHAL;
+			rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+						abort_code, EBADMSG, "KUM");
+			goto do_abort;
+		}
 	}
 
-	/* make sure the queue is empty if the call is done with (we might have
-	 * aborted the call early because of an unmarshalling error) */
-	if (call->state >= AFS_CALL_COMPLETE) {
-		while ((skb = skb_dequeue(&call->rx_queue)))
-			afs_free_skb(skb);
-		if (call->incoming)
-			afs_end_call(call);
-	}
-
+done:
+	if (call->state == AFS_CALL_COMPLETE && call->incoming)
+		afs_end_call(call);
+out:
 	_leave("");
+	return;
+
+do_abort:
+	call->error = ret;
+	call->state = AFS_CALL_COMPLETE;
+	goto done;
 }
 
 /*
@@ -570,7 +475,7 @@
  */
 static int afs_wait_for_call_to_complete(struct afs_call *call)
 {
-	struct sk_buff *skb;
+	const char *abort_why;
 	int ret;
 
 	DECLARE_WAITQUEUE(myself, current);
@@ -582,15 +487,18 @@
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		/* deliver any messages that are in the queue */
-		if (!skb_queue_empty(&call->rx_queue)) {
+		if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+			call->need_attention = false;
 			__set_current_state(TASK_RUNNING);
 			afs_deliver_to_call(call);
 			continue;
 		}
 
+		abort_why = "KWC";
 		ret = call->error;
-		if (call->state >= AFS_CALL_COMPLETE)
+		if (call->state == AFS_CALL_COMPLETE)
 			break;
+		abort_why = "KWI";
 		ret = -EINTR;
 		if (signal_pending(current))
 			break;
@@ -603,9 +511,8 @@
 	/* kill the call */
 	if (call->state < AFS_CALL_COMPLETE) {
 		_debug("call incomplete");
-		rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD);
-		while ((skb = skb_dequeue(&call->rx_queue)))
-			afs_free_skb(skb);
+		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+					RX_CALL_DEAD, -ret, abort_why);
 	}
 
 	_debug("call complete");
@@ -617,17 +524,24 @@
 /*
  * wake up a waiting call
  */
-static void afs_wake_up_call_waiter(struct afs_call *call)
+static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall,
+				    unsigned long call_user_ID)
 {
+	struct afs_call *call = (struct afs_call *)call_user_ID;
+
+	call->need_attention = true;
 	wake_up(&call->waitq);
 }
 
 /*
  * wake up an asynchronous call
  */
-static void afs_wake_up_async_call(struct afs_call *call)
+static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall,
+				   unsigned long call_user_ID)
 {
-	_enter("");
+	struct afs_call *call = (struct afs_call *)call_user_ID;
+
+	call->need_attention = true;
 	queue_work(afs_async_calls, &call->async_work);
 }
 
@@ -645,8 +559,10 @@
 /*
  * delete an asynchronous call
  */
-static void afs_delete_async_call(struct afs_call *call)
+static void afs_delete_async_call(struct work_struct *work)
 {
+	struct afs_call *call = container_of(work, struct afs_call, async_work);
+
 	_enter("");
 
 	afs_free_call(call);
@@ -656,17 +572,19 @@
 
 /*
  * perform processing on an asynchronous call
- * - on a multiple-thread workqueue this work item may try to run on several
- *   CPUs at the same time
  */
-static void afs_process_async_call(struct afs_call *call)
+static void afs_process_async_call(struct work_struct *work)
 {
+	struct afs_call *call = container_of(work, struct afs_call, async_work);
+
 	_enter("");
 
-	if (!skb_queue_empty(&call->rx_queue))
+	if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
+		call->need_attention = false;
 		afs_deliver_to_call(call);
+	}
 
-	if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) {
+	if (call->state == AFS_CALL_COMPLETE && call->wait_mode) {
 		if (call->wait_mode->async_complete)
 			call->wait_mode->async_complete(call->reply,
 							call->error);
@@ -677,104 +595,93 @@
 
 		/* we can't just delete the call because the work item may be
 		 * queued */
-		call->async_workfn = afs_delete_async_call;
+		call->async_work.func = afs_delete_async_call;
 		queue_work(afs_async_calls, &call->async_work);
 	}
 
 	_leave("");
 }
 
-/*
- * empty a socket buffer into a flat reply buffer
- */
-void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID)
 {
-	size_t len = skb->len;
+	struct afs_call *call = (struct afs_call *)user_call_ID;
 
-	if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0)
-		BUG();
-	call->reply_size += len;
+	call->rxcall = rxcall;
 }
 
 /*
- * accept the backlog of incoming calls
+ * Charge the incoming call preallocation.
  */
-static void afs_collect_incoming_call(struct work_struct *work)
+static void afs_charge_preallocation(struct work_struct *work)
 {
-	struct rxrpc_call *rxcall;
-	struct afs_call *call = NULL;
-	struct sk_buff *skb;
+	struct afs_call *call = afs_spare_incoming_call;
 
-	while ((skb = skb_dequeue(&afs_incoming_calls))) {
-		_debug("new call");
-
-		/* don't need the notification */
-		afs_free_skb(skb);
-
+	for (;;) {
 		if (!call) {
 			call = kzalloc(sizeof(struct afs_call), GFP_KERNEL);
-			if (!call) {
-				rxrpc_kernel_reject_call(afs_socket);
-				return;
-			}
+			if (!call)
+				break;
 
-			call->async_workfn = afs_process_async_call;
-			INIT_WORK(&call->async_work, afs_async_workfn);
+			INIT_WORK(&call->async_work, afs_process_async_call);
 			call->wait_mode = &afs_async_incoming_call;
 			call->type = &afs_RXCMxxxx;
 			init_waitqueue_head(&call->waitq);
-			skb_queue_head_init(&call->rx_queue);
 			call->state = AFS_CALL_AWAIT_OP_ID;
-
-			_debug("CALL %p{%s} [%d]",
-			       call, call->type->name,
-			       atomic_read(&afs_outstanding_calls));
-			atomic_inc(&afs_outstanding_calls);
 		}
 
-		rxcall = rxrpc_kernel_accept_call(afs_socket,
-						  (unsigned long) call);
-		if (!IS_ERR(rxcall)) {
-			call->rxcall = rxcall;
-			call = NULL;
-		}
+		if (rxrpc_kernel_charge_accept(afs_socket,
+					       afs_wake_up_async_call,
+					       afs_rx_attach,
+					       (unsigned long)call,
+					       GFP_KERNEL) < 0)
+			break;
+		call = NULL;
 	}
-
-	if (call)
-		afs_free_call(call);
+	afs_spare_incoming_call = call;
 }
 
 /*
- * grab the operation ID from an incoming cache manager call
+ * Discard a preallocated call when a socket is shut down.
  */
-static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb,
-				bool last)
+static void afs_rx_discard_new_call(struct rxrpc_call *rxcall,
+				    unsigned long user_call_ID)
 {
-	size_t len = skb->len;
-	void *oibuf = (void *) &call->operation_ID;
+	struct afs_call *call = (struct afs_call *)user_call_ID;
 
-	_enter("{%u},{%zu},%d", call->offset, len, last);
+	atomic_inc(&afs_outstanding_calls);
+	call->rxcall = NULL;
+	afs_free_call(call);
+}
+
+/*
+ * Notification of an incoming call.
+ */
+static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall,
+			    unsigned long user_call_ID)
+{
+	atomic_inc(&afs_outstanding_calls);
+	queue_work(afs_wq, &afs_charge_preallocation_work);
+}
+
+/*
+ * Grab the operation ID from an incoming cache manager call.  The socket
+ * buffer is discarded on error or if we don't yet have sufficient data.
+ */
+static int afs_deliver_cm_op_id(struct afs_call *call)
+{
+	int ret;
+
+	_enter("{%zu}", call->offset);
 
 	ASSERTCMP(call->offset, <, 4);
 
 	/* the operation ID forms the first four bytes of the request data */
-	len = min_t(size_t, len, 4 - call->offset);
-	if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0)
-		BUG();
-	if (!pskb_pull(skb, len))
-		BUG();
-	call->offset += len;
-
-	if (call->offset < 4) {
-		if (last) {
-			_leave(" = -EBADMSG [op ID short]");
-			return -EBADMSG;
-		}
-		_leave(" = 0 [incomplete]");
-		return 0;
-	}
+	ret = afs_extract_data(call, &call->operation_ID, 4, true);
+	if (ret < 0)
+		return ret;
 
 	call->state = AFS_CALL_AWAIT_REQUEST;
+	call->offset = 0;
 
 	/* ask the cache manager to route the call (it'll change the call type
 	 * if successful) */
@@ -783,7 +690,7 @@
 
 	/* pass responsibility for the remainer of this message off to the
 	 * cache manager op */
-	return call->type->deliver(call, skb, last);
+	return call->type->deliver(call);
 }
 
 /*
@@ -803,14 +710,15 @@
 	msg.msg_flags		= 0;
 
 	call->state = AFS_CALL_AWAIT_ACK;
-	switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) {
+	switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) {
 	case 0:
 		_leave(" [replied]");
 		return;
 
 	case -ENOMEM:
 		_debug("oom");
-		rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+					RX_USER_ABORT, ENOMEM, "KOO");
 	default:
 		afs_end_call(call);
 		_leave(" [error]");
@@ -839,7 +747,7 @@
 	msg.msg_flags		= 0;
 
 	call->state = AFS_CALL_AWAIT_ACK;
-	n = rxrpc_kernel_send_data(call->rxcall, &msg, len);
+	n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len);
 	if (n >= 0) {
 		/* Success */
 		_leave(" [replied]");
@@ -848,37 +756,50 @@
 
 	if (n == -ENOMEM) {
 		_debug("oom");
-		rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT);
+		rxrpc_kernel_abort_call(afs_socket, call->rxcall,
+					RX_USER_ABORT, ENOMEM, "KOO");
 	}
 	afs_end_call(call);
 	_leave(" [error]");
 }
 
 /*
- * extract a piece of data from the received data socket buffers
+ * Extract a piece of data from the received data socket buffers.
  */
-int afs_extract_data(struct afs_call *call, struct sk_buff *skb,
-		     bool last, void *buf, size_t count)
+int afs_extract_data(struct afs_call *call, void *buf, size_t count,
+		     bool want_more)
 {
-	size_t len = skb->len;
+	int ret;
 
-	_enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count);
+	_enter("{%s,%zu},,%zu,%d",
+	       call->type->name, call->offset, count, want_more);
 
-	ASSERTCMP(call->offset, <, count);
+	ASSERTCMP(call->offset, <=, count);
 
-	len = min_t(size_t, len, count - call->offset);
-	if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 ||
-	    !pskb_pull(skb, len))
-		BUG();
-	call->offset += len;
+	ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
+				     buf, count, &call->offset,
+				     want_more, &call->abort_code);
+	if (ret == 0 || ret == -EAGAIN)
+		return ret;
 
-	if (call->offset < count) {
-		if (last) {
-			_leave(" = -EBADMSG [%d < %zu]", call->offset, count);
-			return -EBADMSG;
+	if (ret == 1) {
+		switch (call->state) {
+		case AFS_CALL_AWAIT_REPLY:
+			call->state = AFS_CALL_COMPLETE;
+			break;
+		case AFS_CALL_AWAIT_REQUEST:
+			call->state = AFS_CALL_REPLYING;
+			break;
+		default:
+			break;
 		}
-		_leave(" = -EAGAIN");
-		return -EAGAIN;
+		return 0;
 	}
-	return 0;
+
+	if (ret == -ECONNABORTED)
+		call->error = call->type->abort_to_error(call->abort_code);
+	else
+		call->error = ret;
+	call->state = AFS_CALL_COMPLETE;
+	return ret;
 }
diff --git a/fs/afs/server.c b/fs/afs/server.c
index f342acf..d4066ab 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -178,13 +178,18 @@
 /*
  * look up a server by its IP address
  */
-struct afs_server *afs_find_server(const struct in_addr *_addr)
+struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx)
 {
 	struct afs_server *server = NULL;
 	struct rb_node *p;
-	struct in_addr addr = *_addr;
+	struct in_addr addr = srx->transport.sin.sin_addr;
 
-	_enter("%pI4", &addr.s_addr);
+	_enter("{%d,%pI4}", srx->transport.family, &addr.s_addr);
+
+	if (srx->transport.family != AF_INET) {
+		WARN(true, "AFS does not yes support non-IPv4 addresses\n");
+		return NULL;
+	}
 
 	read_lock(&afs_servers_lock);
 
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index 340afd0..94bcd97 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -58,22 +58,18 @@
 /*
  * deliver reply data to a VL.GetEntryByXXX call
  */
-static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call,
-					   struct sk_buff *skb, bool last)
+static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call)
 {
 	struct afs_cache_vlocation *entry;
 	__be32 *bp;
 	u32 tmp;
-	int loop;
+	int loop, ret;
 
-	_enter(",,%u", last);
+	_enter("");
 
-	afs_transfer_reply(call, skb);
-	if (!last)
-		return 0;
-
-	if (call->reply_size != call->reply_max)
-		return -EBADMSG;
+	ret = afs_transfer_reply(call);
+	if (ret < 0)
+		return ret;
 
 	/* unmarshall the reply once we've received all of it */
 	entry = call->reply;
diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c
index 5297678..45a8639 100644
--- a/fs/afs/vlocation.c
+++ b/fs/afs/vlocation.c
@@ -594,8 +594,8 @@
  */
 int __init afs_vlocation_update_init(void)
 {
-	afs_vlocation_update_worker =
-		create_singlethread_workqueue("kafs_vlupdated");
+	afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated",
+						      WQ_MEM_RECLAIM, 0);
 	return afs_vlocation_update_worker ? 0 : -ENOMEM;
 }
 
diff --git a/fs/aio.c b/fs/aio.c
index fb8e45b..4fe81d1 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,7 +239,12 @@
 	static const struct dentry_operations ops = {
 		.d_dname	= simple_dname,
 	};
-	return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
+	struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+					   AIO_RING_MAGIC);
+
+	if (!IS_ERR(root))
+		root->d_sb->s_iflags |= SB_I_NOEXEC;
+	return root;
 }
 
 /* aio_setup
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index b493909..d8e6d42 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -417,6 +417,7 @@
 	}
 	return NULL;
 }
+
 /*
  * Find an eligible tree to time-out
  * A tree is eligible if :-
@@ -432,6 +433,7 @@
 	struct dentry *root = sb->s_root;
 	struct dentry *dentry;
 	struct dentry *expired;
+	struct dentry *found;
 	struct autofs_info *ino;
 
 	if (!root)
@@ -442,31 +444,46 @@
 
 	dentry = NULL;
 	while ((dentry = get_next_positive_subdir(dentry, root))) {
+		int flags = how;
+
 		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(dentry);
-		if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
-			expired = NULL;
-		else
-			expired = should_expire(dentry, mnt, timeout, how);
-		if (!expired) {
+		if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
 			spin_unlock(&sbi->fs_lock);
 			continue;
 		}
+		spin_unlock(&sbi->fs_lock);
+
+		expired = should_expire(dentry, mnt, timeout, flags);
+		if (!expired)
+			continue;
+
+		spin_lock(&sbi->fs_lock);
 		ino = autofs4_dentry_ino(expired);
 		ino->flags |= AUTOFS_INF_WANT_EXPIRE;
 		spin_unlock(&sbi->fs_lock);
 		synchronize_rcu();
-		spin_lock(&sbi->fs_lock);
-		if (should_expire(expired, mnt, timeout, how)) {
-			if (expired != dentry)
-				dput(dentry);
-			goto found;
-		}
 
+		/* Make sure a reference is not taken on found if
+		 * things have changed.
+		 */
+		flags &= ~AUTOFS_EXP_LEAVES;
+		found = should_expire(expired, mnt, timeout, how);
+		if (!found || found != expired)
+			/* Something has changed, continue */
+			goto next;
+
+		if (expired != dentry)
+			dput(dentry);
+
+		spin_lock(&sbi->fs_lock);
+		goto found;
+next:
+		spin_lock(&sbi->fs_lock);
 		ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
+		spin_unlock(&sbi->fs_lock);
 		if (expired != dentry)
 			dput(expired);
-		spin_unlock(&sbi->fs_lock);
 	}
 	return NULL;
 
@@ -483,6 +500,7 @@
 	struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
 	struct autofs_info *ino = autofs4_dentry_ino(dentry);
 	int status;
+	int state;
 
 	/* Block on any pending expire */
 	if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
@@ -490,8 +508,19 @@
 	if (rcu_walk)
 		return -ECHILD;
 
+retry:
 	spin_lock(&sbi->fs_lock);
-	if (ino->flags & AUTOFS_INF_EXPIRING) {
+	state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
+	if (state == AUTOFS_INF_WANT_EXPIRE) {
+		spin_unlock(&sbi->fs_lock);
+		/*
+		 * Possibly being selected for expire, wait until
+		 * it's selected or not.
+		 */
+		schedule_timeout_uninterruptible(HZ/10);
+		goto retry;
+	}
+	if (state & AUTOFS_INF_EXPIRING) {
 		spin_unlock(&sbi->fs_lock);
 
 		pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7f6aff3f..2472af2 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -853,6 +853,7 @@
 		current->flags |= PF_RANDOMIZE;
 
 	setup_new_exec(bprm);
+	install_exec_creds(bprm);
 
 	/* Do this so that we can load the interpreter, if need be.  We will
 	   change some of these later */
@@ -1044,7 +1045,6 @@
 		goto out;
 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
-	install_exec_creds(bprm);
 	retval = create_elf_tables(bprm, &loc->elf_ex,
 			  load_addr, interp_load_addr);
 	if (retval < 0)
@@ -1624,20 +1624,12 @@
 		regset->writeback(task, regset, 1);
 }
 
-#ifndef PR_REG_SIZE
-#define PR_REG_SIZE(S) sizeof(S)
-#endif
-
 #ifndef PRSTATUS_SIZE
-#define PRSTATUS_SIZE(S) sizeof(S)
-#endif
-
-#ifndef PR_REG_PTR
-#define PR_REG_PTR(S) (&((S)->pr_reg))
+#define PRSTATUS_SIZE(S, R) sizeof(S)
 #endif
 
 #ifndef SET_PR_FPVALID
-#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
+#define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
 #endif
 
 static int fill_thread_core_info(struct elf_thread_core_info *t,
@@ -1645,6 +1637,7 @@
 				 long signr, size_t *total)
 {
 	unsigned int i;
+	unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
 
 	/*
 	 * NT_PRSTATUS is the one special case, because the regset data
@@ -1653,12 +1646,11 @@
 	 * We assume that regset 0 is NT_PRSTATUS.
 	 */
 	fill_prstatus(&t->prstatus, t->task, signr);
-	(void) view->regsets[0].get(t->task, &view->regsets[0],
-				    0, PR_REG_SIZE(t->prstatus.pr_reg),
-				    PR_REG_PTR(&t->prstatus), NULL);
+	(void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
+				    &t->prstatus.pr_reg, NULL);
 
 	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
-		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
+		  PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
 	*total += notesize(&t->notes[0]);
 
 	do_thread_regset_writeback(t->task, &view->regsets[0]);
@@ -1688,7 +1680,8 @@
 						  regset->core_note_type,
 						  size, data);
 				else {
-					SET_PR_FPVALID(&t->prstatus, 1);
+					SET_PR_FPVALID(&t->prstatus,
+							1, regset_size);
 					fill_note(&t->notes[i], "CORE",
 						  NT_PRFPREG, size, data);
 				}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c3cdde8..08ae993 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -249,7 +249,8 @@
 		 * thaw_bdev drops it.
 		 */
 		sb = get_super(bdev);
-		drop_super(sb);
+		if (sb)
+			drop_super(sb);
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		return sb;
 	}
@@ -646,7 +647,7 @@
 {
 	struct dentry *dent;
 	dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
-	if (dent)
+	if (!IS_ERR(dent))
 		dent->d_sb->s_iflags |= SB_I_CGROUPWB;
 	return dent;
 }
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 2b88439..455a6b2 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -589,6 +589,7 @@
 
 			list_del(&ref2->list);
 			kmem_cache_free(btrfs_prelim_ref_cache, ref2);
+			cond_resched();
 		}
 
 	}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2fe8f89..33fe035 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -427,6 +427,7 @@
 	struct list_head ro_bgs;
 	struct list_head priority_tickets;
 	struct list_head tickets;
+	u64 tickets_id;
 
 	struct rw_semaphore groups_sem;
 	/* for block groups in our same type */
@@ -1028,6 +1029,7 @@
 	struct btrfs_workqueue *qgroup_rescan_workers;
 	struct completion qgroup_rescan_completion;
 	struct btrfs_work qgroup_rescan_work;
+	bool qgroup_rescan_running;	/* protected by qgroup_rescan_lock */
 
 	/* filesystem state */
 	unsigned long fs_state;
@@ -1079,6 +1081,8 @@
 	struct list_head pinned_chunks;
 
 	int creating_free_space_tree;
+	/* Used to record internally whether fs has been frozen */
+	int fs_frozen;
 };
 
 struct btrfs_subvolume_writers {
@@ -2578,7 +2582,7 @@
 				   struct btrfs_root *root,
 				   u64 root_objectid, u64 owner, u64 offset,
 				   struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes,
 			 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
 			 struct btrfs_key *ins, int is_data, int delalloc);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index b6d210e..ac02e04 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -541,7 +541,6 @@
 	struct btrfs_delayed_ref_head *existing;
 	struct btrfs_delayed_ref_head *head_ref = NULL;
 	struct btrfs_delayed_ref_root *delayed_refs;
-	struct btrfs_qgroup_extent_record *qexisting;
 	int count_mod = 1;
 	int must_insert_reserved = 0;
 
@@ -606,10 +605,8 @@
 		qrecord->num_bytes = num_bytes;
 		qrecord->old_roots = NULL;
 
-		qexisting = btrfs_qgroup_insert_dirty_extent(fs_info,
-							     delayed_refs,
-							     qrecord);
-		if (qexisting)
+		if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info,
+					delayed_refs, qrecord))
 			kfree(qrecord);
 	}
 
@@ -862,33 +859,6 @@
 	return 0;
 }
 
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
-				     struct btrfs_trans_handle *trans,
-				     u64 ref_root, u64 bytenr, u64 num_bytes)
-{
-	struct btrfs_delayed_ref_root *delayed_refs;
-	struct btrfs_delayed_ref_head *ref_head;
-	int ret = 0;
-
-	if (!fs_info->quota_enabled || !is_fstree(ref_root))
-		return 0;
-
-	delayed_refs = &trans->transaction->delayed_refs;
-
-	spin_lock(&delayed_refs->lock);
-	ref_head = find_ref_head(&delayed_refs->href_root, bytenr, 0);
-	if (!ref_head) {
-		ret = -ENOENT;
-		goto out;
-	}
-	WARN_ON(ref_head->qgroup_reserved || ref_head->qgroup_ref_root);
-	ref_head->qgroup_ref_root = ref_root;
-	ref_head->qgroup_reserved = num_bytes;
-out:
-	spin_unlock(&delayed_refs->lock);
-	return ret;
-}
-
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 				struct btrfs_trans_handle *trans,
 				u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 5fca953..43f3629 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -250,9 +250,6 @@
 			       u64 parent, u64 ref_root,
 			       u64 owner, u64 offset, u64 reserved, int action,
 			       struct btrfs_delayed_extent_op *extent_op);
-int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
-				     struct btrfs_trans_handle *trans,
-				     u64 ref_root, u64 bytenr, u64 num_bytes);
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 				struct btrfs_trans_handle *trans,
 				u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 59febfb..54bc8c7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -559,8 +559,29 @@
 	u32 nritems = btrfs_header_nritems(leaf);
 	int slot;
 
-	if (nritems == 0)
+	if (nritems == 0) {
+		struct btrfs_root *check_root;
+
+		key.objectid = btrfs_header_owner(leaf);
+		key.type = BTRFS_ROOT_ITEM_KEY;
+		key.offset = (u64)-1;
+
+		check_root = btrfs_get_fs_root(root->fs_info, &key, false);
+		/*
+		 * The only reason we also check NULL here is that during
+		 * open_ctree() some roots has not yet been set up.
+		 */
+		if (!IS_ERR_OR_NULL(check_root)) {
+			/* if leaf is the root, then it's fine */
+			if (leaf->start !=
+			    btrfs_root_bytenr(&check_root->root_item)) {
+				CORRUPT("non-root leaf's nritems is 0",
+					leaf, root, 0);
+				return -EIO;
+			}
+		}
 		return 0;
+	}
 
 	/* Check the 0 item */
 	if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
@@ -612,6 +633,19 @@
 	return 0;
 }
 
+static int check_node(struct btrfs_root *root, struct extent_buffer *node)
+{
+	unsigned long nr = btrfs_header_nritems(node);
+
+	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
+		btrfs_crit(root->fs_info,
+			   "corrupt node: block %llu root %llu nritems %lu",
+			   node->start, root->objectid, nr);
+		return -EIO;
+	}
+	return 0;
+}
+
 static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 				      u64 phy_offset, struct page *page,
 				      u64 start, u64 end, int mirror)
@@ -682,6 +716,9 @@
 		ret = -EIO;
 	}
 
+	if (found_level > 0 && check_node(root, eb))
+		ret = -EIO;
+
 	if (!ret)
 		set_extent_buffer_uptodate(eb);
 err:
@@ -1618,8 +1655,8 @@
 	return ret;
 }
 
-static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
-					       u64 root_id)
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+					u64 root_id)
 {
 	struct btrfs_root *root;
 
@@ -2298,6 +2335,7 @@
 	fs_info->quota_enabled = 0;
 	fs_info->pending_quota_state = 0;
 	fs_info->qgroup_ulist = NULL;
+	fs_info->qgroup_rescan_running = false;
 	mutex_init(&fs_info->qgroup_rescan_lock);
 }
 
@@ -2624,6 +2662,7 @@
 	atomic_set(&fs_info->qgroup_op_seq, 0);
 	atomic_set(&fs_info->reada_works_cnt, 0);
 	atomic64_set(&fs_info->tree_mod_seq, 0);
+	fs_info->fs_frozen = 0;
 	fs_info->sb = sb;
 	fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
 	fs_info->metadata_ratio = 0;
@@ -3739,8 +3778,15 @@
 	if (btrfs_root_refs(&root->root_item) == 0)
 		synchronize_srcu(&fs_info->subvol_srcu);
 
-	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
 		btrfs_free_log(NULL, root);
+		if (root->reloc_root) {
+			free_extent_buffer(root->reloc_root->node);
+			free_extent_buffer(root->reloc_root->commit_root);
+			btrfs_put_fs_root(root->reloc_root);
+			root->reloc_root = NULL;
+		}
+	}
 
 	if (root->free_ino_pinned)
 		__btrfs_remove_free_space_cache(root->free_ino_pinned);
@@ -3851,7 +3897,7 @@
 	smp_mb();
 
 	/* wait for the qgroup rescan worker to stop */
-	btrfs_qgroup_wait_for_completion(fs_info);
+	btrfs_qgroup_wait_for_completion(fs_info, false);
 
 	/* wait for the uuid_scan task to finish */
 	down(&fs_info->uuid_tree_rescan_sem);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b3207a0e..f19a982 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,6 +68,8 @@
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
 				      struct btrfs_key *location);
 int btrfs_init_fs_root(struct btrfs_root *root);
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+					u64 root_id);
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 			 struct btrfs_root *root);
 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 61b494e..665da8f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -60,21 +60,6 @@
 	CHUNK_ALLOC_FORCE = 2,
 };
 
-/*
- * Control how reservations are dealt with.
- *
- * RESERVE_FREE - freeing a reservation.
- * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for
- *   ENOSPC accounting
- * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update
- *   bytes_may_use as the ENOSPC accounting is done elsewhere
- */
-enum {
-	RESERVE_FREE = 0,
-	RESERVE_ALLOC = 1,
-	RESERVE_ALLOC_NO_ACCOUNT = 2,
-};
-
 static int update_block_group(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root, u64 bytenr,
 			      u64 num_bytes, int alloc);
@@ -104,9 +89,10 @@
 			 struct btrfs_key *key);
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
 			    int dump_block_groups);
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-				       u64 num_bytes, int reserve,
-				       int delalloc);
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+				    u64 ram_bytes, u64 num_bytes, int delalloc);
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+				     u64 num_bytes, int delalloc);
 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
 			       u64 num_bytes);
 int btrfs_pin_extent(struct btrfs_root *root,
@@ -3501,7 +3487,6 @@
 		dcs = BTRFS_DC_SETUP;
 	else if (ret == -ENOSPC)
 		set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags);
-	btrfs_free_reserved_data_space(inode, 0, num_pages);
 
 out_put:
 	iput(inode);
@@ -4286,13 +4271,10 @@
 	if (ret < 0)
 		return ret;
 
-	/*
-	 * Use new btrfs_qgroup_reserve_data to reserve precious data space
-	 *
-	 * TODO: Find a good method to avoid reserve data space for NOCOW
-	 * range, but don't impact performance on quota disable case.
-	 */
+	/* Use new btrfs_qgroup_reserve_data to reserve precious data space. */
 	ret = btrfs_qgroup_reserve_data(inode, start, len);
+	if (ret)
+		btrfs_free_reserved_data_space_noquota(inode, start, len);
 	return ret;
 }
 
@@ -4472,6 +4454,15 @@
 	}
 }
 
+/*
+ * If force is CHUNK_ALLOC_FORCE:
+ *    - return 1 if it successfully allocates a chunk,
+ *    - return errors including -ENOSPC otherwise.
+ * If force is NOT CHUNK_ALLOC_FORCE:
+ *    - return 0 if it doesn't need to allocate a new chunk,
+ *    - return 1 if it successfully allocates a chunk,
+ *    - return errors including -ENOSPC otherwise.
+ */
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 flags, int force)
 {
@@ -4882,7 +4873,7 @@
 				     btrfs_get_alloc_profile(root, 0),
 				     CHUNK_ALLOC_NO_FORCE);
 		btrfs_end_transaction(trans, root);
-		if (ret == -ENOSPC)
+		if (ret > 0 || ret == -ENOSPC)
 			ret = 0;
 		break;
 	case COMMIT_TRANS:
@@ -4907,11 +4898,6 @@
 	u64 expected;
 	u64 to_reclaim = 0;
 
-	to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
-	if (can_overcommit(root, space_info, to_reclaim,
-			   BTRFS_RESERVE_FLUSH_ALL))
-		return 0;
-
 	list_for_each_entry(ticket, &space_info->tickets, list)
 		to_reclaim += ticket->bytes;
 	list_for_each_entry(ticket, &space_info->priority_tickets, list)
@@ -4919,6 +4905,11 @@
 	if (to_reclaim)
 		return to_reclaim;
 
+	to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
+	if (can_overcommit(root, space_info, to_reclaim,
+			   BTRFS_RESERVE_FLUSH_ALL))
+		return 0;
+
 	used = space_info->bytes_used + space_info->bytes_reserved +
 	       space_info->bytes_pinned + space_info->bytes_readonly +
 	       space_info->bytes_may_use;
@@ -4972,12 +4963,12 @@
  */
 static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
 {
-	struct reserve_ticket *last_ticket = NULL;
 	struct btrfs_fs_info *fs_info;
 	struct btrfs_space_info *space_info;
 	u64 to_reclaim;
 	int flush_state;
 	int commit_cycles = 0;
+	u64 last_tickets_id;
 
 	fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
 	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
@@ -4990,8 +4981,7 @@
 		spin_unlock(&space_info->lock);
 		return;
 	}
-	last_ticket = list_first_entry(&space_info->tickets,
-				       struct reserve_ticket, list);
+	last_tickets_id = space_info->tickets_id;
 	spin_unlock(&space_info->lock);
 
 	flush_state = FLUSH_DELAYED_ITEMS_NR;
@@ -5011,10 +5001,10 @@
 							      space_info);
 		ticket = list_first_entry(&space_info->tickets,
 					  struct reserve_ticket, list);
-		if (last_ticket == ticket) {
+		if (last_tickets_id == space_info->tickets_id) {
 			flush_state++;
 		} else {
-			last_ticket = ticket;
+			last_tickets_id = space_info->tickets_id;
 			flush_state = FLUSH_DELAYED_ITEMS_NR;
 			if (commit_cycles)
 				commit_cycles--;
@@ -5390,6 +5380,7 @@
 			list_del_init(&ticket->list);
 			num_bytes -= ticket->bytes;
 			ticket->bytes = 0;
+			space_info->tickets_id++;
 			wake_up(&ticket->wait);
 		} else {
 			ticket->bytes -= num_bytes;
@@ -5432,6 +5423,7 @@
 			num_bytes -= ticket->bytes;
 			space_info->bytes_may_use += ticket->bytes;
 			ticket->bytes = 0;
+			space_info->tickets_id++;
 			wake_up(&ticket->wait);
 		} else {
 			trace_btrfs_space_reservation(fs_info, "space_info",
@@ -6497,19 +6489,15 @@
 }
 
 /**
- * btrfs_update_reserved_bytes - update the block_group and space info counters
+ * btrfs_add_reserved_bytes - update the block_group and space info counters
  * @cache:	The cache we are manipulating
+ * @ram_bytes:  The number of bytes of file content, and will be same to
+ *              @num_bytes except for the compress path.
  * @num_bytes:	The number of bytes in question
- * @reserve:	One of the reservation enums
  * @delalloc:   The blocks are allocated for the delalloc write
  *
- * This is called by the allocator when it reserves space, or by somebody who is
- * freeing space that was never actually used on disk.  For example if you
- * reserve some space for a new leaf in transaction A and before transaction A
- * commits you free that leaf, you call this with reserve set to 0 in order to
- * clear the reservation.
- *
- * Metadata reservations should be called with RESERVE_ALLOC so we do the proper
+ * This is called by the allocator when it reserves space. Metadata
+ * reservations should be called with RESERVE_ALLOC so we do the proper
  * ENOSPC accounting.  For data we handle the reservation through clearing the
  * delalloc bits in the io_tree.  We have to do this since we could end up
  * allocating less disk space for the amount of data we have reserved in the
@@ -6519,44 +6507,63 @@
  * make the reservation and return -EAGAIN, otherwise this function always
  * succeeds.
  */
-static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-				       u64 num_bytes, int reserve, int delalloc)
+static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
+				    u64 ram_bytes, u64 num_bytes, int delalloc)
 {
 	struct btrfs_space_info *space_info = cache->space_info;
 	int ret = 0;
 
 	spin_lock(&space_info->lock);
 	spin_lock(&cache->lock);
-	if (reserve != RESERVE_FREE) {
-		if (cache->ro) {
-			ret = -EAGAIN;
-		} else {
-			cache->reserved += num_bytes;
-			space_info->bytes_reserved += num_bytes;
-			if (reserve == RESERVE_ALLOC) {
-				trace_btrfs_space_reservation(cache->fs_info,
-						"space_info", space_info->flags,
-						num_bytes, 0);
-				space_info->bytes_may_use -= num_bytes;
-			}
-
-			if (delalloc)
-				cache->delalloc_bytes += num_bytes;
-		}
+	if (cache->ro) {
+		ret = -EAGAIN;
 	} else {
-		if (cache->ro)
-			space_info->bytes_readonly += num_bytes;
-		cache->reserved -= num_bytes;
-		space_info->bytes_reserved -= num_bytes;
+		cache->reserved += num_bytes;
+		space_info->bytes_reserved += num_bytes;
 
+		trace_btrfs_space_reservation(cache->fs_info,
+				"space_info", space_info->flags,
+				ram_bytes, 0);
+		space_info->bytes_may_use -= ram_bytes;
 		if (delalloc)
-			cache->delalloc_bytes -= num_bytes;
+			cache->delalloc_bytes += num_bytes;
 	}
 	spin_unlock(&cache->lock);
 	spin_unlock(&space_info->lock);
 	return ret;
 }
 
+/**
+ * btrfs_free_reserved_bytes - update the block_group and space info counters
+ * @cache:      The cache we are manipulating
+ * @num_bytes:  The number of bytes in question
+ * @delalloc:   The blocks are allocated for the delalloc write
+ *
+ * This is called by somebody who is freeing space that was never actually used
+ * on disk.  For example if you reserve some space for a new leaf in transaction
+ * A and before transaction A commits you free that leaf, you call this with
+ * reserve set to 0 in order to clear the reservation.
+ */
+
+static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
+				     u64 num_bytes, int delalloc)
+{
+	struct btrfs_space_info *space_info = cache->space_info;
+	int ret = 0;
+
+	spin_lock(&space_info->lock);
+	spin_lock(&cache->lock);
+	if (cache->ro)
+		space_info->bytes_readonly += num_bytes;
+	cache->reserved -= num_bytes;
+	space_info->bytes_reserved -= num_bytes;
+
+	if (delalloc)
+		cache->delalloc_bytes -= num_bytes;
+	spin_unlock(&cache->lock);
+	spin_unlock(&space_info->lock);
+	return ret;
+}
 void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root)
 {
@@ -7191,7 +7198,7 @@
 		WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
 
 		btrfs_add_free_space(cache, buf->start, buf->len);
-		btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
+		btrfs_free_reserved_bytes(cache, buf->len, 0);
 		btrfs_put_block_group(cache);
 		trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
 		pin = 0;
@@ -7416,9 +7423,9 @@
  * the free space extent currently.
  */
 static noinline int find_free_extent(struct btrfs_root *orig_root,
-				     u64 num_bytes, u64 empty_size,
-				     u64 hint_byte, struct btrfs_key *ins,
-				     u64 flags, int delalloc)
+				u64 ram_bytes, u64 num_bytes, u64 empty_size,
+				u64 hint_byte, struct btrfs_key *ins,
+				u64 flags, int delalloc)
 {
 	int ret = 0;
 	struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -7430,8 +7437,6 @@
 	struct btrfs_space_info *space_info;
 	int loop = 0;
 	int index = __get_raid_index(flags);
-	int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
-		RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
 	bool failed_cluster_refill = false;
 	bool failed_alloc = false;
 	bool use_cluster = true;
@@ -7763,8 +7768,8 @@
 					     search_start - offset);
 		BUG_ON(offset > search_start);
 
-		ret = btrfs_update_reserved_bytes(block_group, num_bytes,
-						  alloc_type, delalloc);
+		ret = btrfs_add_reserved_bytes(block_group, ram_bytes,
+				num_bytes, delalloc);
 		if (ret == -EAGAIN) {
 			btrfs_add_free_space(block_group, offset, num_bytes);
 			goto loop;
@@ -7936,7 +7941,7 @@
 	up_read(&info->groups_sem);
 }
 
-int btrfs_reserve_extent(struct btrfs_root *root,
+int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes,
 			 u64 num_bytes, u64 min_alloc_size,
 			 u64 empty_size, u64 hint_byte,
 			 struct btrfs_key *ins, int is_data, int delalloc)
@@ -7948,8 +7953,8 @@
 	flags = btrfs_get_alloc_profile(root, is_data);
 again:
 	WARN_ON(num_bytes < root->sectorsize);
-	ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
-			       flags, delalloc);
+	ret = find_free_extent(root, ram_bytes, num_bytes, empty_size,
+			       hint_byte, ins, flags, delalloc);
 	if (!ret && !is_data) {
 		btrfs_dec_block_group_reservations(root->fs_info,
 						   ins->objectid);
@@ -7958,6 +7963,7 @@
 			num_bytes = min(num_bytes >> 1, ins->offset);
 			num_bytes = round_down(num_bytes, root->sectorsize);
 			num_bytes = max(num_bytes, min_alloc_size);
+			ram_bytes = num_bytes;
 			if (num_bytes == min_alloc_size)
 				final_tried = true;
 			goto again;
@@ -7995,7 +8001,7 @@
 		if (btrfs_test_opt(root->fs_info, DISCARD))
 			ret = btrfs_discard_extent(root, start, len, NULL);
 		btrfs_add_free_space(cache, start, len);
-		btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
+		btrfs_free_reserved_bytes(cache, len, delalloc);
 		trace_btrfs_reserved_extent_free(root, start, len);
 	}
 
@@ -8208,6 +8214,7 @@
 {
 	int ret;
 	struct btrfs_block_group_cache *block_group;
+	struct btrfs_space_info *space_info;
 
 	/*
 	 * Mixed block groups will exclude before processing the log so we only
@@ -8223,9 +8230,14 @@
 	if (!block_group)
 		return -EINVAL;
 
-	ret = btrfs_update_reserved_bytes(block_group, ins->offset,
-					  RESERVE_ALLOC_NO_ACCOUNT, 0);
-	BUG_ON(ret); /* logic error */
+	space_info = block_group->space_info;
+	spin_lock(&space_info->lock);
+	spin_lock(&block_group->lock);
+	space_info->bytes_reserved += ins->offset;
+	block_group->reserved += ins->offset;
+	spin_unlock(&block_group->lock);
+	spin_unlock(&space_info->lock);
+
 	ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
 					 0, owner, offset, ins, 1);
 	btrfs_put_block_group(block_group);
@@ -8368,7 +8380,7 @@
 	if (IS_ERR(block_rsv))
 		return ERR_CAST(block_rsv);
 
-	ret = btrfs_reserve_extent(root, blocksize, blocksize,
+	ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize,
 				   empty_size, hint, &ins, 0, 0);
 	if (ret)
 		goto out_unuse;
@@ -8521,35 +8533,6 @@
 	wc->reada_slot = slot;
 }
 
-/*
- * These may not be seen by the usual inc/dec ref code so we have to
- * add them here.
- */
-static int record_one_subtree_extent(struct btrfs_trans_handle *trans,
-				     struct btrfs_root *root, u64 bytenr,
-				     u64 num_bytes)
-{
-	struct btrfs_qgroup_extent_record *qrecord;
-	struct btrfs_delayed_ref_root *delayed_refs;
-
-	qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS);
-	if (!qrecord)
-		return -ENOMEM;
-
-	qrecord->bytenr = bytenr;
-	qrecord->num_bytes = num_bytes;
-	qrecord->old_roots = NULL;
-
-	delayed_refs = &trans->transaction->delayed_refs;
-	spin_lock(&delayed_refs->lock);
-	if (btrfs_qgroup_insert_dirty_extent(trans->fs_info,
-					     delayed_refs, qrecord))
-		kfree(qrecord);
-	spin_unlock(&delayed_refs->lock);
-
-	return 0;
-}
-
 static int account_leaf_items(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      struct extent_buffer *eb)
@@ -8583,7 +8566,8 @@
 
 		num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
 
-		ret = record_one_subtree_extent(trans, root, bytenr, num_bytes);
+		ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+				bytenr, num_bytes, GFP_NOFS);
 		if (ret)
 			return ret;
 	}
@@ -8732,8 +8716,9 @@
 			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 			path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
 
-			ret = record_one_subtree_extent(trans, root, child_bytenr,
-							root->nodesize);
+			ret = btrfs_qgroup_insert_dirty_extent(trans,
+					root->fs_info, child_bytenr,
+					root->nodesize, GFP_NOFS);
 			if (ret)
 				goto out;
 		}
@@ -9906,6 +9891,7 @@
 			} else {
 				ret = 0;
 			}
+			free_extent_map(em);
 			goto out;
 		}
 		path->slots[0]++;
@@ -9942,6 +9928,7 @@
 		block_group->iref = 0;
 		block_group->inode = NULL;
 		spin_unlock(&block_group->lock);
+		ASSERT(block_group->io_ctl.inode == NULL);
 		iput(inode);
 		last = block_group->key.objectid + block_group->key.offset;
 		btrfs_put_block_group(block_group);
@@ -9999,6 +9986,10 @@
 			free_excluded_extents(info->extent_root, block_group);
 
 		btrfs_remove_free_space_cache(block_group);
+		ASSERT(list_empty(&block_group->dirty_list));
+		ASSERT(list_empty(&block_group->io_list));
+		ASSERT(list_empty(&block_group->bg_list));
+		ASSERT(atomic_read(&block_group->count) == 1);
 		btrfs_put_block_group(block_group);
 
 		spin_lock(&info->block_group_cache_lock);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc2729a..28cd88f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -20,6 +20,7 @@
 #define EXTENT_DAMAGED		(1U << 14)
 #define EXTENT_NORESERVE	(1U << 15)
 #define EXTENT_QGROUP_RESERVED	(1U << 16)
+#define EXTENT_CLEAR_DATA_RESV	(1U << 17)
 #define EXTENT_IOBITS		(EXTENT_LOCKED | EXTENT_WRITEBACK)
 #define EXTENT_CTLBITS		(EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC)
 
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9404121..fea31a4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2033,6 +2033,14 @@
 		 */
 		clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
 			  &BTRFS_I(inode)->runtime_flags);
+		/*
+		 * An ordered extent might have started before and completed
+		 * already with io errors, in which case the inode was not
+		 * updated and we end up here. So check the inode's mapping
+		 * flags for any errors that might have happened while doing
+		 * writeback of file data.
+		 */
+		ret = btrfs_inode_check_errors(inode);
 		inode_unlock(inode);
 		goto out;
 	}
@@ -2062,7 +2070,7 @@
 	}
 	trans->sync = true;
 
-	btrfs_init_log_ctx(&ctx);
+	btrfs_init_log_ctx(&ctx, inode);
 
 	ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
 	if (ret < 0) {
@@ -2667,6 +2675,7 @@
 
 	alloc_start = round_down(offset, blocksize);
 	alloc_end = round_up(offset + len, blocksize);
+	cur_offset = alloc_start;
 
 	/* Make sure we aren't being give some crap mode */
 	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@ -2759,7 +2768,6 @@
 
 	/* First, check if we exceed the qgroup limit */
 	INIT_LIST_HEAD(&reserve_list);
-	cur_offset = alloc_start;
 	while (1) {
 		em = btrfs_get_extent(inode, NULL, 0, cur_offset,
 				      alloc_end - cur_offset, 0);
@@ -2786,6 +2794,14 @@
 					last_byte - cur_offset);
 			if (ret < 0)
 				break;
+		} else {
+			/*
+			 * Do not need to reserve unwritten extent for this
+			 * range, free reserved data space first, otherwise
+			 * it'll result in false ENOSPC error.
+			 */
+			btrfs_free_reserved_data_space(inode, cur_offset,
+				last_byte - cur_offset);
 		}
 		free_extent_map(em);
 		cur_offset = last_byte;
@@ -2803,6 +2819,9 @@
 					range->start,
 					range->len, 1 << inode->i_blkbits,
 					offset + len, &alloc_hint);
+		else
+			btrfs_free_reserved_data_space(inode, range->start,
+						       range->len);
 		list_del(&range->list);
 		kfree(range);
 	}
@@ -2837,18 +2856,11 @@
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
 			     &cached_state, GFP_KERNEL);
 out:
-	/*
-	 * As we waited the extent range, the data_rsv_map must be empty
-	 * in the range, as written data range will be released from it.
-	 * And for prealloacted extent, it will also be released when
-	 * its metadata is written.
-	 * So this is completely used as cleanup.
-	 */
-	btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
 	inode_unlock(inode);
 	/* Let go of our reservation. */
-	btrfs_free_reserved_data_space(inode, alloc_start,
-				       alloc_end - alloc_start);
+	if (ret != 0)
+		btrfs_free_reserved_data_space(inode, alloc_start,
+				       alloc_end - cur_offset);
 	return ret;
 }
 
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index aa6faba..359ee86 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -495,10 +495,9 @@
 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc,
 					      prealloc, prealloc, &alloc_hint);
 	if (ret) {
-		btrfs_delalloc_release_space(inode, 0, prealloc);
+		btrfs_delalloc_release_metadata(inode, prealloc);
 		goto out_put;
 	}
-	btrfs_free_reserved_data_space(inode, 0, prealloc);
 
 	ret = btrfs_write_out_ino_cache(root, trans, path, inode);
 out_put:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2f59759..e6811c42 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -566,6 +566,8 @@
 						     PAGE_SET_WRITEBACK |
 						     page_error_op |
 						     PAGE_END_WRITEBACK);
+			btrfs_free_reserved_data_space_noquota(inode, start,
+						end - start + 1);
 			goto free_pages_out;
 		}
 	}
@@ -742,7 +744,7 @@
 		lock_extent(io_tree, async_extent->start,
 			    async_extent->start + async_extent->ram_size - 1);
 
-		ret = btrfs_reserve_extent(root,
+		ret = btrfs_reserve_extent(root, async_extent->ram_size,
 					   async_extent->compressed_size,
 					   async_extent->compressed_size,
 					   0, alloc_hint, &ins, 1, 1);
@@ -969,7 +971,8 @@
 				     EXTENT_DEFRAG, PAGE_UNLOCK |
 				     PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
 				     PAGE_END_WRITEBACK);
-
+			btrfs_free_reserved_data_space_noquota(inode, start,
+						end - start + 1);
 			*nr_written = *nr_written +
 			     (end - start + PAGE_SIZE) / PAGE_SIZE;
 			*page_started = 1;
@@ -989,7 +992,7 @@
 		unsigned long op;
 
 		cur_alloc_size = disk_num_bytes;
-		ret = btrfs_reserve_extent(root, cur_alloc_size,
+		ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
 					   root->sectorsize, 0, alloc_hint,
 					   &ins, 1, 1);
 		if (ret < 0)
@@ -1489,8 +1492,10 @@
 		extent_clear_unlock_delalloc(inode, cur_offset,
 					     cur_offset + num_bytes - 1,
 					     locked_page, EXTENT_LOCKED |
-					     EXTENT_DELALLOC, PAGE_UNLOCK |
-					     PAGE_SET_PRIVATE2);
+					     EXTENT_DELALLOC |
+					     EXTENT_CLEAR_DATA_RESV,
+					     PAGE_UNLOCK | PAGE_SET_PRIVATE2);
+
 		if (!nolock && nocow)
 			btrfs_end_write_no_snapshoting(root);
 		cur_offset = extent_end;
@@ -1807,7 +1812,9 @@
 			return;
 
 		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
-		    && do_list && !(state->state & EXTENT_NORESERVE))
+		    && do_list && !(state->state & EXTENT_NORESERVE)
+		    && (*bits & (EXTENT_DO_ACCOUNTING |
+		    EXTENT_CLEAR_DATA_RESV)))
 			btrfs_free_reserved_data_space_noquota(inode,
 					state->start, len);
 
@@ -3435,10 +3442,10 @@
 		found_key.offset = 0;
 		inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
 		ret = PTR_ERR_OR_ZERO(inode);
-		if (ret && ret != -ESTALE)
+		if (ret && ret != -ENOENT)
 			goto out;
 
-		if (ret == -ESTALE && root == root->fs_info->tree_root) {
+		if (ret == -ENOENT && root == root->fs_info->tree_root) {
 			struct btrfs_root *dead_root;
 			struct btrfs_fs_info *fs_info = root->fs_info;
 			int is_dead_root = 0;
@@ -3474,7 +3481,7 @@
 		 * Inode is already gone but the orphan item is still there,
 		 * kill the orphan item.
 		 */
-		if (ret == -ESTALE) {
+		if (ret == -ENOENT) {
 			trans = btrfs_start_transaction(root, 1);
 			if (IS_ERR(trans)) {
 				ret = PTR_ERR(trans);
@@ -3633,7 +3640,7 @@
 /*
  * read an inode from the btree into the in-memory inode
  */
-static void btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode)
 {
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
@@ -3652,14 +3659,19 @@
 		filled = true;
 
 	path = btrfs_alloc_path();
-	if (!path)
+	if (!path) {
+		ret = -ENOMEM;
 		goto make_bad;
+	}
 
 	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
 	ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
-	if (ret)
+	if (ret) {
+		if (ret > 0)
+			ret = -ENOENT;
 		goto make_bad;
+	}
 
 	leaf = path->nodes[0];
 
@@ -3812,11 +3824,12 @@
 	}
 
 	btrfs_update_iflags(inode);
-	return;
+	return 0;
 
 make_bad:
 	btrfs_free_path(path);
 	make_bad_inode(inode);
+	return ret;
 }
 
 /*
@@ -4204,6 +4217,7 @@
 	int err = 0;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_trans_handle *trans;
+	u64 last_unlink_trans;
 
 	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
 		return -ENOTEMPTY;
@@ -4226,11 +4240,27 @@
 	if (err)
 		goto out;
 
+	last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
+
 	/* now the directory is empty */
 	err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
 				 dentry->d_name.name, dentry->d_name.len);
-	if (!err)
+	if (!err) {
 		btrfs_i_size_write(inode, 0);
+		/*
+		 * Propagate the last_unlink_trans value of the deleted dir to
+		 * its parent directory. This is to prevent an unrecoverable
+		 * log tree in the case we do something like this:
+		 * 1) create dir foo
+		 * 2) create snapshot under dir foo
+		 * 3) delete the snapshot
+		 * 4) rmdir foo
+		 * 5) mkdir foo
+		 * 6) fsync foo or some file inside foo
+		 */
+		if (last_unlink_trans >= trans->transid)
+			BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
+	}
 out:
 	btrfs_end_transaction(trans, root);
 	btrfs_btree_balance_dirty(root);
@@ -5606,7 +5636,9 @@
 		return ERR_PTR(-ENOMEM);
 
 	if (inode->i_state & I_NEW) {
-		btrfs_read_locked_inode(inode);
+		int ret;
+
+		ret = btrfs_read_locked_inode(inode);
 		if (!is_bad_inode(inode)) {
 			inode_tree_add(inode);
 			unlock_new_inode(inode);
@@ -5615,7 +5647,8 @@
 		} else {
 			unlock_new_inode(inode);
 			iput(inode);
-			inode = ERR_PTR(-ESTALE);
+			ASSERT(ret < 0);
+			inode = ERR_PTR(ret < 0 ? ret : -ESTALE);
 		}
 	}
 
@@ -7225,7 +7258,7 @@
 	int ret;
 
 	alloc_hint = get_extent_allocation_hint(inode, start, len);
-	ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
+	ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0,
 				   alloc_hint, &ins, 1, 1);
 	if (ret)
 		return ERR_PTR(ret);
@@ -7725,6 +7758,13 @@
 				ret = PTR_ERR(em2);
 				goto unlock_err;
 			}
+			/*
+			 * For inode marked NODATACOW or extent marked PREALLOC,
+			 * use the existing or preallocated extent, so does not
+			 * need to adjust btrfs_space_info's bytes_may_use.
+			 */
+			btrfs_free_reserved_data_space_noquota(inode,
+					start, len);
 			goto unlock;
 		}
 	}
@@ -7759,7 +7799,6 @@
 			i_size_write(inode, start + len);
 
 		adjust_dio_outstanding_extents(inode, dio_data, len);
-		btrfs_free_reserved_data_space(inode, start, len);
 		WARN_ON(dio_data->reserve < len);
 		dio_data->reserve -= len;
 		dio_data->unsubmitted_oe_range_end = start + len;
@@ -10280,6 +10319,7 @@
 	u64 last_alloc = (u64)-1;
 	int ret = 0;
 	bool own_trans = true;
+	u64 end = start + num_bytes - 1;
 
 	if (trans)
 		own_trans = false;
@@ -10301,8 +10341,8 @@
 		 * sized chunks.
 		 */
 		cur_bytes = min(cur_bytes, last_alloc);
-		ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
-					   *alloc_hint, &ins, 1, 0);
+		ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes,
+				min_size, 0, *alloc_hint, &ins, 1, 0);
 		if (ret) {
 			if (own_trans)
 				btrfs_end_transaction(trans, root);
@@ -10388,6 +10428,9 @@
 		if (own_trans)
 			btrfs_end_transaction(trans, root);
 	}
+	if (cur_offset < end)
+		btrfs_free_reserved_data_space(inode, cur_offset,
+			end - cur_offset + 1);
 	return ret;
 }
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 14ed1e9..7fd939b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1634,6 +1634,9 @@
 	int namelen;
 	int ret = 0;
 
+	if (!S_ISDIR(file_inode(file)->i_mode))
+		return -ENOTDIR;
+
 	ret = mnt_want_write_file(file);
 	if (ret)
 		goto out;
@@ -1691,6 +1694,9 @@
 	struct btrfs_ioctl_vol_args *vol_args;
 	int ret;
 
+	if (!S_ISDIR(file_inode(file)->i_mode))
+		return -ENOTDIR;
+
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args))
 		return PTR_ERR(vol_args);
@@ -1714,6 +1720,9 @@
 	bool readonly = false;
 	struct btrfs_qgroup_inherit *inherit = NULL;
 
+	if (!S_ISDIR(file_inode(file)->i_mode))
+		return -ENOTDIR;
+
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args))
 		return PTR_ERR(vol_args);
@@ -2357,6 +2366,9 @@
 	int ret;
 	int err = 0;
 
+	if (!S_ISDIR(dir->i_mode))
+		return -ENOTDIR;
+
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args))
 		return PTR_ERR(vol_args);
@@ -5084,7 +5096,7 @@
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	return btrfs_qgroup_wait_for_completion(root->fs_info);
+	return btrfs_qgroup_wait_for_completion(root->fs_info, true);
 }
 
 static long _btrfs_ioctl_set_received_subvol(struct file *file,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 93ee1c1..8db2e29 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -995,7 +995,7 @@
 		goto out;
 	fs_info->quota_enabled = 0;
 	fs_info->pending_quota_state = 0;
-	btrfs_qgroup_wait_for_completion(fs_info);
+	btrfs_qgroup_wait_for_completion(fs_info, false);
 	spin_lock(&fs_info->qgroup_lock);
 	quota_root = fs_info->quota_root;
 	fs_info->quota_root = NULL;
@@ -1453,10 +1453,9 @@
 	return ret;
 }
 
-struct btrfs_qgroup_extent_record *
-btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
-				 struct btrfs_delayed_ref_root *delayed_refs,
-				 struct btrfs_qgroup_extent_record *record)
+int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info,
+				struct btrfs_delayed_ref_root *delayed_refs,
+				struct btrfs_qgroup_extent_record *record)
 {
 	struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
 	struct rb_node *parent_node = NULL;
@@ -1475,12 +1474,42 @@
 		else if (bytenr > entry->bytenr)
 			p = &(*p)->rb_right;
 		else
-			return entry;
+			return 1;
 	}
 
 	rb_link_node(&record->node, parent_node, p);
 	rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
-	return NULL;
+	return 0;
+}
+
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+		gfp_t gfp_flag)
+{
+	struct btrfs_qgroup_extent_record *record;
+	struct btrfs_delayed_ref_root *delayed_refs;
+	int ret;
+
+	if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0)
+		return 0;
+	if (WARN_ON(trans == NULL))
+		return -EINVAL;
+	record = kmalloc(sizeof(*record), gfp_flag);
+	if (!record)
+		return -ENOMEM;
+
+	delayed_refs = &trans->transaction->delayed_refs;
+	record->bytenr = bytenr;
+	record->num_bytes = num_bytes;
+	record->old_roots = NULL;
+
+	spin_lock(&delayed_refs->lock);
+	ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs,
+						      record);
+	spin_unlock(&delayed_refs->lock);
+	if (ret > 0)
+		kfree(record);
+	return 0;
 }
 
 #define UPDATE_NEW	0
@@ -2303,6 +2332,10 @@
 	int err = -ENOMEM;
 	int ret = 0;
 
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	fs_info->qgroup_rescan_running = true;
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
 	path = btrfs_alloc_path();
 	if (!path)
 		goto out;
@@ -2369,6 +2402,9 @@
 	}
 
 done:
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	fs_info->qgroup_rescan_running = false;
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
 	complete_all(&fs_info->qgroup_rescan_completion);
 }
 
@@ -2487,20 +2523,26 @@
 	return 0;
 }
 
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+				     bool interruptible)
 {
 	int running;
 	int ret = 0;
 
 	mutex_lock(&fs_info->qgroup_rescan_lock);
 	spin_lock(&fs_info->qgroup_lock);
-	running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+	running = fs_info->qgroup_rescan_running;
 	spin_unlock(&fs_info->qgroup_lock);
 	mutex_unlock(&fs_info->qgroup_rescan_lock);
 
-	if (running)
+	if (!running)
+		return 0;
+
+	if (interruptible)
 		ret = wait_for_completion_interruptible(
 					&fs_info->qgroup_rescan_completion);
+	else
+		wait_for_completion(&fs_info->qgroup_rescan_completion);
 
 	return ret;
 }
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 710887c..1bc64c8 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -46,7 +46,8 @@
 			struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+				     bool interruptible);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -63,10 +64,35 @@
 struct btrfs_delayed_extent_op;
 int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
 					 struct btrfs_fs_info *fs_info);
-struct btrfs_qgroup_extent_record *
-btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info,
-				 struct btrfs_delayed_ref_root *delayed_refs,
-				 struct btrfs_qgroup_extent_record *record);
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * No lock version, caller must acquire delayed ref lock and allocate memory.
+ *
+ * Return 0 for success insert
+ * Return >0 for existing record, caller can free @record safely.
+ * Error is not possible
+ */
+int btrfs_qgroup_insert_dirty_extent_nolock(
+		struct btrfs_fs_info *fs_info,
+		struct btrfs_delayed_ref_root *delayed_refs,
+		struct btrfs_qgroup_extent_record *record);
+
+/*
+ * Insert one dirty extent record into @delayed_refs, informing qgroup to
+ * account that extent at commit trans time.
+ *
+ * Better encapsulated version.
+ *
+ * Return 0 if the operation is done.
+ * Return <0 for error, like memory allocation failure or invalid parameter
+ * (NULL trans)
+ */
+int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans,
+		struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes,
+		gfp_t gfp_flag);
+
 int
 btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
 			    struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b26a5ae..c0c13dc 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -31,6 +31,7 @@
 #include "async-thread.h"
 #include "free-space-cache.h"
 #include "inode-map.h"
+#include "qgroup.h"
 
 /*
  * backref_node, mapping_node and tree_block start with this
@@ -3037,15 +3038,19 @@
 	u64 num_bytes;
 	int nr = 0;
 	int ret = 0;
+	u64 prealloc_start = cluster->start - offset;
+	u64 prealloc_end = cluster->end - offset;
+	u64 cur_offset;
 
 	BUG_ON(cluster->start != cluster->boundary[0]);
 	inode_lock(inode);
 
-	ret = btrfs_check_data_free_space(inode, cluster->start,
-					  cluster->end + 1 - cluster->start);
+	ret = btrfs_check_data_free_space(inode, prealloc_start,
+					  prealloc_end + 1 - prealloc_start);
 	if (ret)
 		goto out;
 
+	cur_offset = prealloc_start;
 	while (nr < cluster->nr) {
 		start = cluster->boundary[nr] - offset;
 		if (nr + 1 < cluster->nr)
@@ -3055,16 +3060,21 @@
 
 		lock_extent(&BTRFS_I(inode)->io_tree, start, end);
 		num_bytes = end + 1 - start;
+		if (cur_offset < start)
+			btrfs_free_reserved_data_space(inode, cur_offset,
+					start - cur_offset);
 		ret = btrfs_prealloc_file_range(inode, 0, start,
 						num_bytes, num_bytes,
 						end + 1, &alloc_hint);
+		cur_offset = end + 1;
 		unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
 		if (ret)
 			break;
 		nr++;
 	}
-	btrfs_free_reserved_data_space(inode, cluster->start,
-				       cluster->end + 1 - cluster->start);
+	if (cur_offset < prealloc_end)
+		btrfs_free_reserved_data_space(inode, cur_offset,
+				       prealloc_end + 1 - cur_offset);
 out:
 	inode_unlock(inode);
 	return ret;
@@ -3916,6 +3926,90 @@
 	return 0;
 }
 
+/*
+ * Qgroup fixer for data chunk relocation.
+ * The data relocation is done in the following steps
+ * 1) Copy data extents into data reloc tree
+ * 2) Create tree reloc tree(special snapshot) for related subvolumes
+ * 3) Modify file extents in tree reloc tree
+ * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks
+ *
+ * The problem is, data and tree reloc tree are not accounted to qgroup,
+ * and 4) will only info qgroup to track tree blocks change, not file extents
+ * in the tree blocks.
+ *
+ * The good news is, related data extents are all in data reloc tree, so we
+ * only need to info qgroup to track all file extents in data reloc tree
+ * before commit trans.
+ */
+static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans,
+					     struct reloc_control *rc)
+{
+	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
+	struct inode *inode = rc->data_inode;
+	struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root;
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	int ret = 0;
+
+	if (!fs_info->quota_enabled)
+		return 0;
+
+	/*
+	 * Only for stage where we update data pointers the qgroup fix is
+	 * valid.
+	 * For MOVING_DATA stage, we will miss the timing of swapping tree
+	 * blocks, and won't fix it.
+	 */
+	if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found))
+		return 0;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	key.objectid = btrfs_ino(inode);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = 0;
+
+	ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0);
+	if (ret < 0)
+		goto out;
+
+	lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1);
+	while (1) {
+		struct btrfs_file_extent_item *fi;
+
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		if (key.objectid > btrfs_ino(inode))
+			break;
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			goto next;
+		fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				    struct btrfs_file_extent_item);
+		if (btrfs_file_extent_type(path->nodes[0], fi) !=
+				BTRFS_FILE_EXTENT_REG)
+			goto next;
+		ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info,
+			btrfs_file_extent_disk_bytenr(path->nodes[0], fi),
+			btrfs_file_extent_disk_num_bytes(path->nodes[0], fi),
+			GFP_NOFS);
+		if (ret < 0)
+			break;
+next:
+		ret = btrfs_next_item(data_reloc_root, path);
+		if (ret < 0)
+			break;
+		if (ret > 0) {
+			ret = 0;
+			break;
+		}
+	}
+	unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1);
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
 {
 	struct rb_root blocks = RB_ROOT;
@@ -4102,10 +4196,18 @@
 
 	/* get rid of pinned extents */
 	trans = btrfs_join_transaction(rc->extent_root);
-	if (IS_ERR(trans))
+	if (IS_ERR(trans)) {
 		err = PTR_ERR(trans);
-	else
-		btrfs_commit_transaction(trans, rc->extent_root);
+		goto out_free;
+	}
+	ret = qgroup_fix_relocated_data_extents(trans, rc);
+	if (ret < 0) {
+		btrfs_abort_transaction(trans, ret);
+		if (!err)
+			err = ret;
+		goto out_free;
+	}
+	btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
 	btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
 	btrfs_free_path(path);
@@ -4468,10 +4570,16 @@
 	unset_reloc_control(rc);
 
 	trans = btrfs_join_transaction(rc->extent_root);
-	if (IS_ERR(trans))
+	if (IS_ERR(trans)) {
 		err = PTR_ERR(trans);
-	else
-		err = btrfs_commit_transaction(trans, rc->extent_root);
+		goto out_free;
+	}
+	err = qgroup_fix_relocated_data_extents(trans, rc);
+	if (err < 0) {
+		btrfs_abort_transaction(trans, err);
+		goto out_free;
+	}
+	err = btrfs_commit_transaction(trans, rc->extent_root);
 out_free:
 	kfree(rc);
 out:
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7fd7e18..0912960 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -272,6 +272,23 @@
 		root_key.objectid = key.offset;
 		key.offset++;
 
+		/*
+		 * The root might have been inserted already, as before we look
+		 * for orphan roots, log replay might have happened, which
+		 * triggers a transaction commit and qgroup accounting, which
+		 * in turn reads and inserts fs roots while doing backref
+		 * walking.
+		 */
+		root = btrfs_lookup_fs_root(tree_root->fs_info,
+					    root_key.objectid);
+		if (root) {
+			WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
+					  &root->state));
+			if (btrfs_root_refs(&root->root_item) == 0)
+				btrfs_add_dead_root(root);
+			continue;
+		}
+
 		root = btrfs_read_fs_root(tree_root, &root_key);
 		err = PTR_ERR_OR_ZERO(root);
 		if (err && err != -ENOENT) {
@@ -310,16 +327,8 @@
 		set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
 
 		err = btrfs_insert_fs_root(root->fs_info, root);
-		/*
-		 * The root might have been inserted already, as before we look
-		 * for orphan roots, log replay might have happened, which
-		 * triggers a transaction commit and qgroup accounting, which
-		 * in turn reads and inserts fs roots while doing backref
-		 * walking.
-		 */
-		if (err == -EEXIST)
-			err = 0;
 		if (err) {
+			BUG_ON(err == -EEXIST);
 			btrfs_free_fs_root(root);
 			break;
 		}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index b71dd298..a87675f 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -231,7 +231,6 @@
 	u64 parent_ino;
 	u64 ino;
 	u64 gen;
-	bool is_orphan;
 	struct list_head update_refs;
 };
 
@@ -274,6 +273,39 @@
 	char name[];
 };
 
+static void inconsistent_snapshot_error(struct send_ctx *sctx,
+					enum btrfs_compare_tree_result result,
+					const char *what)
+{
+	const char *result_string;
+
+	switch (result) {
+	case BTRFS_COMPARE_TREE_NEW:
+		result_string = "new";
+		break;
+	case BTRFS_COMPARE_TREE_DELETED:
+		result_string = "deleted";
+		break;
+	case BTRFS_COMPARE_TREE_CHANGED:
+		result_string = "updated";
+		break;
+	case BTRFS_COMPARE_TREE_SAME:
+		ASSERT(0);
+		result_string = "unchanged";
+		break;
+	default:
+		ASSERT(0);
+		result_string = "unexpected";
+	}
+
+	btrfs_err(sctx->send_root->fs_info,
+		  "Send: inconsistent snapshot, found %s %s for inode %llu without updated inode item, send root is %llu, parent root is %llu",
+		  result_string, what, sctx->cmp_key->objectid,
+		  sctx->send_root->root_key.objectid,
+		  (sctx->parent_root ?
+		   sctx->parent_root->root_key.objectid : 0));
+}
+
 static int is_waiting_for_move(struct send_ctx *sctx, u64 ino);
 
 static struct waiting_dir_move *
@@ -1861,7 +1893,8 @@
 	 * was already unlinked/moved, so we can safely assume that we will not
 	 * overwrite anything at this point in time.
 	 */
-	if (other_inode > sctx->send_progress) {
+	if (other_inode > sctx->send_progress ||
+	    is_waiting_for_move(sctx, other_inode)) {
 		ret = get_inode_info(sctx->parent_root, other_inode, NULL,
 				who_gen, NULL, NULL, NULL, NULL);
 		if (ret < 0)
@@ -2502,6 +2535,8 @@
 	key.type = BTRFS_INODE_ITEM_KEY;
 	key.offset = 0;
 	ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0);
+	if (ret > 0)
+		ret = -ENOENT;
 	if (ret < 0)
 		goto out;
 
@@ -2947,6 +2982,10 @@
 		}
 
 		if (loc.objectid > send_progress) {
+			struct orphan_dir_info *odi;
+
+			odi = get_orphan_dir_info(sctx, dir);
+			free_orphan_dir_info(sctx, odi);
 			ret = 0;
 			goto out;
 		}
@@ -3047,7 +3086,6 @@
 	pm->parent_ino = parent_ino;
 	pm->ino = ino;
 	pm->gen = ino_gen;
-	pm->is_orphan = is_orphan;
 	INIT_LIST_HEAD(&pm->list);
 	INIT_LIST_HEAD(&pm->update_refs);
 	RB_CLEAR_NODE(&pm->node);
@@ -3113,6 +3151,48 @@
 	return NULL;
 }
 
+static int path_loop(struct send_ctx *sctx, struct fs_path *name,
+		     u64 ino, u64 gen, u64 *ancestor_ino)
+{
+	int ret = 0;
+	u64 parent_inode = 0;
+	u64 parent_gen = 0;
+	u64 start_ino = ino;
+
+	*ancestor_ino = 0;
+	while (ino != BTRFS_FIRST_FREE_OBJECTID) {
+		fs_path_reset(name);
+
+		if (is_waiting_for_rm(sctx, ino))
+			break;
+		if (is_waiting_for_move(sctx, ino)) {
+			if (*ancestor_ino == 0)
+				*ancestor_ino = ino;
+			ret = get_first_ref(sctx->parent_root, ino,
+					    &parent_inode, &parent_gen, name);
+		} else {
+			ret = __get_cur_name_and_parent(sctx, ino, gen,
+							&parent_inode,
+							&parent_gen, name);
+			if (ret > 0) {
+				ret = 0;
+				break;
+			}
+		}
+		if (ret < 0)
+			break;
+		if (parent_inode == start_ino) {
+			ret = 1;
+			if (*ancestor_ino == 0)
+				*ancestor_ino = ino;
+			break;
+		}
+		ino = parent_inode;
+		gen = parent_gen;
+	}
+	return ret;
+}
+
 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 {
 	struct fs_path *from_path = NULL;
@@ -3123,6 +3203,8 @@
 	u64 parent_ino, parent_gen;
 	struct waiting_dir_move *dm = NULL;
 	u64 rmdir_ino = 0;
+	u64 ancestor;
+	bool is_orphan;
 	int ret;
 
 	name = fs_path_alloc();
@@ -3135,9 +3217,10 @@
 	dm = get_waiting_dir_move(sctx, pm->ino);
 	ASSERT(dm);
 	rmdir_ino = dm->rmdir_ino;
+	is_orphan = dm->orphanized;
 	free_waiting_dir_move(sctx, dm);
 
-	if (pm->is_orphan) {
+	if (is_orphan) {
 		ret = gen_unique_name(sctx, pm->ino,
 				      pm->gen, from_path);
 	} else {
@@ -3155,6 +3238,24 @@
 		goto out;
 
 	sctx->send_progress = sctx->cur_ino + 1;
+	ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
+	if (ret < 0)
+		goto out;
+	if (ret) {
+		LIST_HEAD(deleted_refs);
+		ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
+		ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
+					   &pm->update_refs, &deleted_refs,
+					   is_orphan);
+		if (ret < 0)
+			goto out;
+		if (rmdir_ino) {
+			dm = get_waiting_dir_move(sctx, pm->ino);
+			ASSERT(dm);
+			dm->rmdir_ino = rmdir_ino;
+		}
+		goto out;
+	}
 	fs_path_reset(name);
 	to_path = name;
 	name = NULL;
@@ -3174,7 +3275,7 @@
 			/* already deleted */
 			goto finish;
 		}
-		ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1);
+		ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino);
 		if (ret < 0)
 			goto out;
 		if (!ret)
@@ -3204,8 +3305,18 @@
 	 * and old parent(s).
 	 */
 	list_for_each_entry(cur, &pm->update_refs, list) {
-		if (cur->dir == rmdir_ino)
+		/*
+		 * The parent inode might have been deleted in the send snapshot
+		 */
+		ret = get_inode_info(sctx->send_root, cur->dir, NULL,
+				     NULL, NULL, NULL, NULL, NULL);
+		if (ret == -ENOENT) {
+			ret = 0;
 			continue;
+		}
+		if (ret < 0)
+			goto out;
+
 		ret = send_utimes(sctx, cur->dir, cur->dir_gen);
 		if (ret < 0)
 			goto out;
@@ -3325,6 +3436,7 @@
 	u64 left_gen;
 	u64 right_gen;
 	int ret = 0;
+	struct waiting_dir_move *wdm;
 
 	if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
 		return 0;
@@ -3383,7 +3495,8 @@
 		goto out;
 	}
 
-	if (is_waiting_for_move(sctx, di_key.objectid)) {
+	wdm = get_waiting_dir_move(sctx, di_key.objectid);
+	if (wdm && !wdm->orphanized) {
 		ret = add_pending_dir_move(sctx,
 					   sctx->cur_ino,
 					   sctx->cur_inode_gen,
@@ -3470,7 +3583,8 @@
 			ret = is_ancestor(sctx->parent_root,
 					  sctx->cur_ino, sctx->cur_inode_gen,
 					  ino, path_before);
-			break;
+			if (ret)
+				break;
 		}
 
 		fs_path_reset(path_before);
@@ -3643,11 +3757,26 @@
 				goto out;
 			if (ret) {
 				struct name_cache_entry *nce;
+				struct waiting_dir_move *wdm;
 
 				ret = orphanize_inode(sctx, ow_inode, ow_gen,
 						cur->full_path);
 				if (ret < 0)
 					goto out;
+
+				/*
+				 * If ow_inode has its rename operation delayed
+				 * make sure that its orphanized name is used in
+				 * the source path when performing its rename
+				 * operation.
+				 */
+				if (is_waiting_for_move(sctx, ow_inode)) {
+					wdm = get_waiting_dir_move(sctx,
+								   ow_inode);
+					ASSERT(wdm);
+					wdm->orphanized = true;
+				}
+
 				/*
 				 * Make sure we clear our orphanized inode's
 				 * name from the name cache. This is because the
@@ -3663,6 +3792,19 @@
 					name_cache_delete(sctx, nce);
 					kfree(nce);
 				}
+
+				/*
+				 * ow_inode might currently be an ancestor of
+				 * cur_ino, therefore compute valid_path (the
+				 * current path of cur_ino) again because it
+				 * might contain the pre-orphanization name of
+				 * ow_inode, which is no longer valid.
+				 */
+				fs_path_reset(valid_path);
+				ret = get_cur_path(sctx, sctx->cur_ino,
+					   sctx->cur_inode_gen, valid_path);
+				if (ret < 0)
+					goto out;
 			} else {
 				ret = send_unlink(sctx, cur->full_path);
 				if (ret < 0)
@@ -4126,10 +4268,12 @@
 	}
 	btrfs_release_path(path);
 
+	/*
+	 * We don't actually care about pending_move as we are simply
+	 * re-creating this inode and will be rename'ing it into place once we
+	 * rename the parent directory.
+	 */
 	ret = process_recorded_refs(sctx, &pending_move);
-	/* Only applicable to an incremental send. */
-	ASSERT(pending_move == 0);
-
 out:
 	btrfs_free_path(path);
 	return ret;
@@ -5602,7 +5746,10 @@
 {
 	int ret = 0;
 
-	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+	if (sctx->cur_ino != sctx->cmp_key->objectid) {
+		inconsistent_snapshot_error(sctx, result, "reference");
+		return -EIO;
+	}
 
 	if (!sctx->cur_inode_new_gen &&
 	    sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) {
@@ -5627,7 +5774,10 @@
 {
 	int ret = 0;
 
-	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+	if (sctx->cur_ino != sctx->cmp_key->objectid) {
+		inconsistent_snapshot_error(sctx, result, "xattr");
+		return -EIO;
+	}
 
 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
 		if (result == BTRFS_COMPARE_TREE_NEW)
@@ -5651,7 +5801,10 @@
 {
 	int ret = 0;
 
-	BUG_ON(sctx->cur_ino != sctx->cmp_key->objectid);
+	if (sctx->cur_ino != sctx->cmp_key->objectid) {
+		inconsistent_snapshot_error(sctx, result, "extent");
+		return -EIO;
+	}
 
 	if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
 		if (result != BTRFS_COMPARE_TREE_DELETED)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 864ce33..4071fe2 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -2241,6 +2241,13 @@
 	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = btrfs_sb(sb)->tree_root;
 
+	root->fs_info->fs_frozen = 1;
+	/*
+	 * We don't need a barrier here, we'll wait for any transaction that
+	 * could be in progress on other threads (and do delayed iputs that
+	 * we want to avoid on a frozen filesystem), or do the commit
+	 * ourselves.
+	 */
 	trans = btrfs_attach_transaction_barrier(root);
 	if (IS_ERR(trans)) {
 		/* no transaction, don't bother */
@@ -2251,6 +2258,14 @@
 	return btrfs_commit_transaction(trans, root);
 }
 
+static int btrfs_unfreeze(struct super_block *sb)
+{
+	struct btrfs_root *root = btrfs_sb(sb)->tree_root;
+
+	root->fs_info->fs_frozen = 0;
+	return 0;
+}
+
 static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
@@ -2299,6 +2314,7 @@
 	.statfs		= btrfs_statfs,
 	.remount_fs	= btrfs_remount,
 	.freeze_fs	= btrfs_freeze,
+	.unfreeze_fs	= btrfs_unfreeze,
 };
 
 static const struct file_operations btrfs_ctl_fops = {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 9cca0a7..95d4191 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -2278,8 +2278,13 @@
 
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 
+	/*
+	 * If fs has been frozen, we can not handle delayed iputs, otherwise
+	 * it'll result in deadlock about SB_FREEZE_FS.
+	 */
 	if (current != root->fs_info->transaction_kthread &&
-	    current != root->fs_info->cleaner_kthread)
+	    current != root->fs_info->cleaner_kthread &&
+	    !root->fs_info->fs_frozen)
 		btrfs_run_delayed_iputs(root);
 
 	return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d31a0c4..ef9c55b 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -27,6 +27,7 @@
 #include "backref.h"
 #include "hash.h"
 #include "compression.h"
+#include "qgroup.h"
 
 /* magic values for the inode_only field in btrfs_log_inode:
  *
@@ -680,6 +681,21 @@
 		ins.type = BTRFS_EXTENT_ITEM_KEY;
 		offset = key->offset - btrfs_file_extent_offset(eb, item);
 
+		/*
+		 * Manually record dirty extent, as here we did a shallow
+		 * file extent item copy and skip normal backref update,
+		 * but modifying extent tree all by ourselves.
+		 * So need to manually record dirty extent for qgroup,
+		 * as the owner of the file extent changed from log tree
+		 * (doesn't affect qgroup) to fs/file tree(affects qgroup)
+		 */
+		ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info,
+				btrfs_file_extent_disk_bytenr(eb, item),
+				btrfs_file_extent_disk_num_bytes(eb, item),
+				GFP_NOFS);
+		if (ret < 0)
+			goto out;
+
 		if (ins.objectid > 0) {
 			u64 csum_start;
 			u64 csum_end;
@@ -2807,7 +2823,7 @@
 	 */
 	mutex_unlock(&root->log_mutex);
 
-	btrfs_init_log_ctx(&root_log_ctx);
+	btrfs_init_log_ctx(&root_log_ctx, NULL);
 
 	mutex_lock(&log_root_tree->log_mutex);
 	atomic_inc(&log_root_tree->log_batch);
@@ -2851,6 +2867,7 @@
 
 	if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
 		blk_finish_plug(&plug);
+		list_del_init(&root_log_ctx.list);
 		mutex_unlock(&log_root_tree->log_mutex);
 		ret = root_log_ctx.log_ret;
 		goto out;
@@ -4469,7 +4486,8 @@
 static int btrfs_check_ref_name_override(struct extent_buffer *eb,
 					 const int slot,
 					 const struct btrfs_key *key,
-					 struct inode *inode)
+					 struct inode *inode,
+					 u64 *other_ino)
 {
 	int ret;
 	struct btrfs_path *search_path;
@@ -4528,7 +4546,16 @@
 					   search_path, parent,
 					   name, this_name_len, 0);
 		if (di && !IS_ERR(di)) {
-			ret = 1;
+			struct btrfs_key di_key;
+
+			btrfs_dir_item_key_to_cpu(search_path->nodes[0],
+						  di, &di_key);
+			if (di_key.type == BTRFS_INODE_ITEM_KEY) {
+				ret = 1;
+				*other_ino = di_key.objectid;
+			} else {
+				ret = -EAGAIN;
+			}
 			goto out;
 		} else if (IS_ERR(di)) {
 			ret = PTR_ERR(di);
@@ -4722,16 +4749,72 @@
 		if ((min_key.type == BTRFS_INODE_REF_KEY ||
 		     min_key.type == BTRFS_INODE_EXTREF_KEY) &&
 		    BTRFS_I(inode)->generation == trans->transid) {
+			u64 other_ino = 0;
+
 			ret = btrfs_check_ref_name_override(path->nodes[0],
 							    path->slots[0],
-							    &min_key, inode);
+							    &min_key, inode,
+							    &other_ino);
 			if (ret < 0) {
 				err = ret;
 				goto out_unlock;
-			} else if (ret > 0) {
-				err = 1;
-				btrfs_set_log_full_commit(root->fs_info, trans);
-				goto out_unlock;
+			} else if (ret > 0 && ctx &&
+				   other_ino != btrfs_ino(ctx->inode)) {
+				struct btrfs_key inode_key;
+				struct inode *other_inode;
+
+				if (ins_nr > 0) {
+					ins_nr++;
+				} else {
+					ins_nr = 1;
+					ins_start_slot = path->slots[0];
+				}
+				ret = copy_items(trans, inode, dst_path, path,
+						 &last_extent, ins_start_slot,
+						 ins_nr, inode_only,
+						 logged_isize);
+				if (ret < 0) {
+					err = ret;
+					goto out_unlock;
+				}
+				ins_nr = 0;
+				btrfs_release_path(path);
+				inode_key.objectid = other_ino;
+				inode_key.type = BTRFS_INODE_ITEM_KEY;
+				inode_key.offset = 0;
+				other_inode = btrfs_iget(root->fs_info->sb,
+							 &inode_key, root,
+							 NULL);
+				/*
+				 * If the other inode that had a conflicting dir
+				 * entry was deleted in the current transaction,
+				 * we don't need to do more work nor fallback to
+				 * a transaction commit.
+				 */
+				if (IS_ERR(other_inode) &&
+				    PTR_ERR(other_inode) == -ENOENT) {
+					goto next_key;
+				} else if (IS_ERR(other_inode)) {
+					err = PTR_ERR(other_inode);
+					goto out_unlock;
+				}
+				/*
+				 * We are safe logging the other inode without
+				 * acquiring its i_mutex as long as we log with
+				 * the LOG_INODE_EXISTS mode. We're safe against
+				 * concurrent renames of the other inode as well
+				 * because during a rename we pin the log and
+				 * update the log with the new name before we
+				 * unpin it.
+				 */
+				err = btrfs_log_inode(trans, root, other_inode,
+						      LOG_INODE_EXISTS,
+						      0, LLONG_MAX, ctx);
+				iput(other_inode);
+				if (err)
+					goto out_unlock;
+				else
+					goto next_key;
 			}
 		}
 
@@ -4799,7 +4882,7 @@
 			ins_nr = 0;
 		}
 		btrfs_release_path(path);
-
+next_key:
 		if (min_key.offset < (u64)-1) {
 			min_key.offset++;
 		} else if (min_key.type < max_key.type) {
@@ -4993,8 +5076,12 @@
 		if (!parent || d_really_is_negative(parent) || sb != parent->d_sb)
 			break;
 
-		if (IS_ROOT(parent))
+		if (IS_ROOT(parent)) {
+			inode = d_inode(parent);
+			if (btrfs_must_commit_transaction(trans, inode))
+				ret = 1;
 			break;
+		}
 
 		parent = dget_parent(parent);
 		dput(old_parent);
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index a9f1b75..ab858e3 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -30,15 +30,18 @@
 	int log_transid;
 	int io_err;
 	bool log_new_dentries;
+	struct inode *inode;
 	struct list_head list;
 };
 
-static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
+static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
+				      struct inode *inode)
 {
 	ctx->log_ret = 0;
 	ctx->log_transid = 0;
 	ctx->io_err = 0;
 	ctx->log_new_dentries = false;
+	ctx->inode = inode;
 	INIT_LIST_HEAD(&ctx->list);
 }
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 51f1255..035efce 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -834,10 +834,6 @@
 	struct btrfs_device *device;
 
 	device = container_of(work, struct btrfs_device, rcu_work);
-
-	if (device->bdev)
-		blkdev_put(device->bdev, device->mode);
-
 	rcu_string_free(device->name);
 	kfree(device);
 }
@@ -852,6 +848,17 @@
 	schedule_work(&device->rcu_work);
 }
 
+static void btrfs_close_bdev(struct btrfs_device *device)
+{
+	if (device->bdev && device->writeable) {
+		sync_blockdev(device->bdev);
+		invalidate_bdev(device->bdev);
+	}
+
+	if (device->bdev)
+		blkdev_put(device->bdev, device->mode);
+}
+
 static void btrfs_close_one_device(struct btrfs_device *device)
 {
 	struct btrfs_fs_devices *fs_devices = device->fs_devices;
@@ -870,10 +877,7 @@
 	if (device->missing)
 		fs_devices->missing_devices--;
 
-	if (device->bdev && device->writeable) {
-		sync_blockdev(device->bdev);
-		invalidate_bdev(device->bdev);
-	}
+	btrfs_close_bdev(device);
 
 	new_device = btrfs_alloc_device(NULL, &device->devid,
 					device->uuid);
@@ -1932,6 +1936,8 @@
 		btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
 	}
 
+	btrfs_close_bdev(device);
+
 	call_rcu(&device->rcu, free_device);
 
 	num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -2025,6 +2031,9 @@
 		/* zero out the old super if it is writable */
 		btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
 	}
+
+	btrfs_close_bdev(srcdev);
+
 	call_rcu(&srcdev->rcu, free_device);
 
 	/*
@@ -2080,6 +2089,8 @@
 	 * the device_list_mutex lock.
 	 */
 	btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+
+	btrfs_close_bdev(tgtdev);
 	call_rcu(&tgtdev->rcu, free_device);
 }
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 99115ca..16e6ded 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1347,9 +1347,12 @@
 {
 	struct inode *inode = &ci->vfs_inode;
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
-	struct ceph_mds_session *session = *psession;
+	struct ceph_mds_session *session = NULL;
 	int mds;
+
 	dout("ceph_flush_snaps %p\n", inode);
+	if (psession)
+		session = *psession;
 retry:
 	spin_lock(&ci->i_ceph_lock);
 	if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index c64a0b7..df4b3e6 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -597,7 +597,7 @@
 	if (is_hash_order(new_pos)) {
 		/* no need to reset last_name for a forward seek when
 		 * dentries are sotred in hash order */
-	} else if (fi->frag |= fpos_frag(new_pos)) {
+	} else if (fi->frag != fpos_frag(new_pos)) {
 		return true;
 	}
 	rinfo = fi->last_readdir ? &fi->last_readdir->r_reply_info : NULL;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fa59a85..f72d4ae 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2759,6 +2759,7 @@
 	} else {
 		path = NULL;
 		pathlen = 0;
+		pathbase = 0;
 	}
 
 	spin_lock(&ci->i_ceph_lock);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 6bbec5e..14ae4b8 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -609,6 +609,9 @@
 	char *s, *p;
 	char sep;
 
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+		return dget(sb->s_root);
+
 	full_path = cifs_build_path_to_root(vol, cifs_sb,
 					    cifs_sb_master_tcon(cifs_sb));
 	if (full_path == NULL)
@@ -686,26 +689,22 @@
 	cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
 	if (cifs_sb->mountdata == NULL) {
 		root = ERR_PTR(-ENOMEM);
-		goto out_cifs_sb;
+		goto out_free;
 	}
 
-	if (volume_info->prepath) {
-		cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL);
-		if (cifs_sb->prepath == NULL) {
-			root = ERR_PTR(-ENOMEM);
-			goto out_cifs_sb;
-		}
+	rc = cifs_setup_cifs_sb(volume_info, cifs_sb);
+	if (rc) {
+		root = ERR_PTR(rc);
+		goto out_free;
 	}
 
-	cifs_setup_cifs_sb(volume_info, cifs_sb);
-
 	rc = cifs_mount(cifs_sb, volume_info);
 	if (rc) {
 		if (!(flags & MS_SILENT))
 			cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n",
 				 rc);
 		root = ERR_PTR(rc);
-		goto out_mountdata;
+		goto out_free;
 	}
 
 	mnt_data.vol = volume_info;
@@ -735,11 +734,7 @@
 		sb->s_flags |= MS_ACTIVE;
 	}
 
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
-		root = dget(sb->s_root);
-	else
-		root = cifs_get_root(volume_info, sb);
-
+	root = cifs_get_root(volume_info, sb);
 	if (IS_ERR(root))
 		goto out_super;
 
@@ -752,9 +747,9 @@
 	cifs_cleanup_volume_info(volume_info);
 	return root;
 
-out_mountdata:
+out_free:
+	kfree(cifs_sb->prepath);
 	kfree(cifs_sb->mountdata);
-out_cifs_sb:
 	kfree(cifs_sb);
 out_nls:
 	unload_nls(volume_info->local_nls);
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 1243bd3..95dab43 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -184,7 +184,7 @@
 			         unsigned int to_read);
 extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
 				      struct page *page, unsigned int to_read);
-extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
+extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 			       struct cifs_sb_info *cifs_sb);
 extern int cifs_match_super(struct super_block *, void *);
 extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7ae0328..2e4f4ba 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2781,6 +2781,24 @@
 	return 1;
 }
 
+static int
+match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data)
+{
+	struct cifs_sb_info *old = CIFS_SB(sb);
+	struct cifs_sb_info *new = mnt_data->cifs_sb;
+
+	if (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) {
+		if (!(new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH))
+			return 0;
+		/* The prepath should be null terminated strings */
+		if (strcmp(new->prepath, old->prepath))
+			return 0;
+
+		return 1;
+	}
+	return 0;
+}
+
 int
 cifs_match_super(struct super_block *sb, void *data)
 {
@@ -2808,7 +2826,8 @@
 
 	if (!match_server(tcp_srv, volume_info) ||
 	    !match_session(ses, volume_info) ||
-	    !match_tcon(tcon, volume_info->UNC)) {
+	    !match_tcon(tcon, volume_info->UNC) ||
+	    !match_prepath(sb, mnt_data)) {
 		rc = 0;
 		goto out;
 	}
@@ -3222,7 +3241,7 @@
 	}
 }
 
-void cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
+int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 			struct cifs_sb_info *cifs_sb)
 {
 	INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
@@ -3316,6 +3335,14 @@
 
 	if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
 		cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n");
+
+	if (pvolume_info->prepath) {
+		cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL);
+		if (cifs_sb->prepath == NULL)
+			return -ENOMEM;
+	}
+
+	return 0;
 }
 
 static void
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index c30cf49..2c6312d 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -333,6 +333,7 @@
 		if (bin_attr->cb_max_size &&
 			*ppos + count > bin_attr->cb_max_size) {
 			len = -EFBIG;
+			goto out;
 		}
 
 		tbuf = vmalloc(*ppos + count);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 0f9961e..ed115ac 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -11,6 +11,7 @@
 #include <linux/random.h>
 #include <linux/string.h>
 #include <linux/fscrypto.h>
+#include <linux/mount.h>
 
 static int inode_has_encryption_context(struct inode *inode)
 {
@@ -92,26 +93,42 @@
 	return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
 }
 
-int fscrypt_process_policy(struct inode *inode,
+int fscrypt_process_policy(struct file *filp,
 				const struct fscrypt_policy *policy)
 {
+	struct inode *inode = file_inode(filp);
+	int ret;
+
+	if (!inode_owner_or_capable(inode))
+		return -EACCES;
+
 	if (policy->version != 0)
 		return -EINVAL;
 
+	ret = mnt_want_write_file(filp);
+	if (ret)
+		return ret;
+
 	if (!inode_has_encryption_context(inode)) {
-		if (!inode->i_sb->s_cop->empty_dir)
-			return -EOPNOTSUPP;
-		if (!inode->i_sb->s_cop->empty_dir(inode))
-			return -ENOTEMPTY;
-		return create_encryption_context_from_policy(inode, policy);
+		if (!S_ISDIR(inode->i_mode))
+			ret = -EINVAL;
+		else if (!inode->i_sb->s_cop->empty_dir)
+			ret = -EOPNOTSUPP;
+		else if (!inode->i_sb->s_cop->empty_dir(inode))
+			ret = -ENOTEMPTY;
+		else
+			ret = create_encryption_context_from_policy(inode,
+								    policy);
+	} else if (!is_encryption_context_consistent_with_policy(inode,
+								 policy)) {
+		printk(KERN_WARNING
+		       "%s: Policy inconsistent with encryption context\n",
+		       __func__);
+		ret = -EINVAL;
 	}
 
-	if (is_encryption_context_consistent_with_policy(inode, policy))
-		return 0;
-
-	printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
-	       __func__);
-	return -EINVAL;
+	mnt_drop_write_file(filp);
+	return ret;
 }
 EXPORT_SYMBOL(fscrypt_process_policy);
 
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 592059f..354e2ab 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -97,9 +97,6 @@
 
 #define F_DENTRY(filp) ((filp)->f_path.dentry)
 
-#define REAL_FOPS_DEREF(dentry)					\
-	((const struct file_operations *)(dentry)->d_fsdata)
-
 static int open_proxy_open(struct inode *inode, struct file *filp)
 {
 	const struct dentry *dentry = F_DENTRY(filp);
@@ -112,7 +109,7 @@
 		goto out;
 	}
 
-	real_fops = REAL_FOPS_DEREF(dentry);
+	real_fops = debugfs_real_fops(filp);
 	real_fops = fops_get(real_fops);
 	if (!real_fops) {
 		/* Huh? Module did not clean up after itself at exit? */
@@ -143,7 +140,7 @@
 {									\
 	const struct dentry *dentry = F_DENTRY(filp);			\
 	const struct file_operations *real_fops =			\
-		REAL_FOPS_DEREF(dentry);				\
+		debugfs_real_fops(filp);				\
 	int srcu_idx;							\
 	ret_type r;							\
 									\
@@ -176,7 +173,7 @@
 				struct poll_table_struct *wait)
 {
 	const struct dentry *dentry = F_DENTRY(filp);
-	const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
+	const struct file_operations *real_fops = debugfs_real_fops(filp);
 	int srcu_idx;
 	unsigned int r = 0;
 
@@ -193,7 +190,7 @@
 static int full_proxy_release(struct inode *inode, struct file *filp)
 {
 	const struct dentry *dentry = F_DENTRY(filp);
-	const struct file_operations *real_fops = REAL_FOPS_DEREF(dentry);
+	const struct file_operations *real_fops = debugfs_real_fops(filp);
 	const struct file_operations *proxy_fops = filp->f_op;
 	int r = 0;
 
@@ -209,7 +206,7 @@
 	replace_fops(filp, d_inode(dentry)->i_fop);
 	kfree((void *)proxy_fops);
 	fops_put(real_fops);
-	return 0;
+	return r;
 }
 
 static void __full_proxy_fops_init(struct file_operations *proxy_fops,
@@ -241,7 +238,7 @@
 		goto out;
 	}
 
-	real_fops = REAL_FOPS_DEREF(dentry);
+	real_fops = debugfs_real_fops(filp);
 	real_fops = fops_get(real_fops);
 	if (!real_fops) {
 		/* Huh? Module did not cleanup after itself at exit? */
diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
index bba5263..b3e8443 100644
--- a/fs/debugfs/internal.h
+++ b/fs/debugfs/internal.h
@@ -19,8 +19,4 @@
 extern const struct file_operations debugfs_open_proxy_file_operations;
 extern const struct file_operations debugfs_full_proxy_file_operations;
 
-struct dentry *debugfs_create_file_unsafe(const char *name, umode_t mode,
-					struct dentry *parent, void *data,
-					const struct file_operations *fops);
-
 #endif /* _DEBUGFS_INTERNAL_H_ */
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index d116453..442d1a7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -272,13 +272,8 @@
 	struct dentry *root = sb->s_root;
 	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 	struct pts_mount_opts *opts = &fsi->mount_opts;
-	kuid_t root_uid;
-	kgid_t root_gid;
-
-	root_uid = make_kuid(current_user_ns(), 0);
-	root_gid = make_kgid(current_user_ns(), 0);
-	if (!uid_valid(root_uid) || !gid_valid(root_gid))
-		return -EINVAL;
+	kuid_t ptmx_uid = current_fsuid();
+	kgid_t ptmx_gid = current_fsgid();
 
 	inode_lock(d_inode(root));
 
@@ -309,8 +304,8 @@
 
 	mode = S_IFCHR|opts->ptmxmode;
 	init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
-	inode->i_uid = root_uid;
-	inode->i_gid = root_gid;
+	inode->i_uid = ptmx_uid;
+	inode->i_gid = ptmx_gid;
 
 	d_add(dentry, inode);
 
@@ -336,7 +331,6 @@
 	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 	struct pts_mount_opts *opts = &fsi->mount_opts;
 
-	sync_filesystem(sb);
 	err = parse_mount_options(data, PARSE_REMOUNT, opts);
 
 	/*
@@ -395,6 +389,7 @@
 devpts_fill_super(struct super_block *s, void *data, int silent)
 {
 	struct inode *inode;
+	int error;
 
 	s->s_iflags &= ~SB_I_NODEV;
 	s->s_blocksize = 1024;
@@ -403,10 +398,16 @@
 	s->s_op = &devpts_sops;
 	s->s_time_gran = 1;
 
+	error = -ENOMEM;
 	s->s_fs_info = new_pts_fs_info(s);
 	if (!s->s_fs_info)
 		goto fail;
 
+	error = parse_mount_options(data, PARSE_MOUNT, &DEVPTS_SB(s)->mount_opts);
+	if (error)
+		goto fail;
+
+	error = -ENOMEM;
 	inode = new_inode(s);
 	if (!inode)
 		goto fail;
@@ -418,13 +419,21 @@
 	set_nlink(inode, 2);
 
 	s->s_root = d_make_root(inode);
-	if (s->s_root)
-		return 0;
+	if (!s->s_root) {
+		pr_err("get root dentry failed\n");
+		goto fail;
+	}
 
-	pr_err("get root dentry failed\n");
+	error = mknod_ptmx(s);
+	if (error)
+		goto fail_dput;
 
+	return 0;
+fail_dput:
+	dput(s->s_root);
+	s->s_root = NULL;
 fail:
-	return -ENOMEM;
+	return error;
 }
 
 /*
@@ -436,43 +445,15 @@
 static struct dentry *devpts_mount(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data)
 {
-	int error;
-	struct pts_mount_opts opts;
-	struct super_block *s;
-
-	error = parse_mount_options(data, PARSE_MOUNT, &opts);
-	if (error)
-		return ERR_PTR(error);
-
-	s = sget(fs_type, NULL, set_anon_super, flags, NULL);
-	if (IS_ERR(s))
-		return ERR_CAST(s);
-
-	if (!s->s_root) {
-		error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0);
-		if (error)
-			goto out_undo_sget;
-		s->s_flags |= MS_ACTIVE;
-	}
-
-	memcpy(&(DEVPTS_SB(s))->mount_opts, &opts, sizeof(opts));
-
-	error = mknod_ptmx(s);
-	if (error)
-		goto out_undo_sget;
-
-	return dget(s->s_root);
-
-out_undo_sget:
-	deactivate_locked_super(s);
-	return ERR_PTR(error);
+	return mount_nodev(fs_type, flags, data, devpts_fill_super);
 }
 
 static void devpts_kill_sb(struct super_block *sb)
 {
 	struct pts_fs_info *fsi = DEVPTS_SB(sb);
 
-	ida_destroy(&fsi->allocated_ptys);
+	if (fsi)
+		ida_destroy(&fsi->allocated_ptys);
 	kfree(fsi);
 	kill_litter_super(sb);
 }
@@ -585,7 +566,8 @@
  */
 void *devpts_get_priv(struct dentry *dentry)
 {
-	WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC);
+	if (dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC)
+		return NULL;
 	return dentry->d_fsdata;
 }
 
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index eea6491..466f7d6 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -607,20 +607,54 @@
 static const struct file_operations format3_fops;
 static const struct file_operations format4_fops;
 
-static int table_open(struct inode *inode, struct file *file)
+static int table_open1(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
-	int ret = -1;
+	int ret;
 
-	if (file->f_op == &format1_fops)
-		ret = seq_open(file, &format1_seq_ops);
-	else if (file->f_op == &format2_fops)
-		ret = seq_open(file, &format2_seq_ops);
-	else if (file->f_op == &format3_fops)
-		ret = seq_open(file, &format3_seq_ops);
-	else if (file->f_op == &format4_fops)
-		ret = seq_open(file, &format4_seq_ops);
+	ret = seq_open(file, &format1_seq_ops);
+	if (ret)
+		return ret;
 
+	seq = file->private_data;
+	seq->private = inode->i_private; /* the dlm_ls */
+	return 0;
+}
+
+static int table_open2(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &format2_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private; /* the dlm_ls */
+	return 0;
+}
+
+static int table_open3(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &format3_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private; /* the dlm_ls */
+	return 0;
+}
+
+static int table_open4(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &format4_seq_ops);
 	if (ret)
 		return ret;
 
@@ -631,7 +665,7 @@
 
 static const struct file_operations format1_fops = {
 	.owner   = THIS_MODULE,
-	.open    = table_open,
+	.open    = table_open1,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
 	.release = seq_release
@@ -639,7 +673,7 @@
 
 static const struct file_operations format2_fops = {
 	.owner   = THIS_MODULE,
-	.open    = table_open,
+	.open    = table_open2,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
 	.release = seq_release
@@ -647,7 +681,7 @@
 
 static const struct file_operations format3_fops = {
 	.owner   = THIS_MODULE,
-	.open    = table_open,
+	.open    = table_open3,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
 	.release = seq_release
@@ -655,7 +689,7 @@
 
 static const struct file_operations format4_fops = {
 	.owner   = THIS_MODULE,
-	.open    = table_open,
+	.open    = table_open4,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
 	.release = seq_release
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 1d73fc6..cbb50ca 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -105,7 +105,10 @@
 
 	inode->i_private = var;
 
-	efivar_entry_add(var, &efivarfs_list);
+	err = efivar_entry_add(var, &efivarfs_list);
+	if (err)
+		goto out;
+
 	d_instantiate(dentry, inode);
 	dget(dentry);
 out:
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 688ccc1..d7a7c53 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -157,12 +157,14 @@
 		goto fail_inode;
 	}
 
+	efivar_entry_size(entry, &size);
+	err = efivar_entry_add(entry, &efivarfs_list);
+	if (err)
+		goto fail_inode;
+
 	/* copied by the above to local storage in the dentry. */
 	kfree(name);
 
-	efivar_entry_size(entry, &size);
-	efivar_entry_add(entry, &efivarfs_list);
-
 	inode_lock(inode);
 	inode->i_private = entry;
 	i_size_write(inode, size + sizeof(entry->var.Attributes));
@@ -182,7 +184,10 @@
 
 static int efivarfs_destroy(struct efivar_entry *entry, void *data)
 {
-	efivar_entry_remove(entry);
+	int err = efivar_entry_remove(entry);
+
+	if (err)
+		return err;
 	kfree(entry);
 	return 0;
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3131747..c6ea25a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5466,8 +5466,6 @@
 						      sbi->s_want_extra_isize,
 						      iloc, handle);
 			if (ret) {
-				ext4_set_inode_state(inode,
-						     EXT4_STATE_NO_EXPAND);
 				if (mnt_count !=
 					le16_to_cpu(sbi->s_es->s_mnt_count)) {
 					ext4_warning(inode->i_sb,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 10686fd..1bb7df5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -776,7 +776,7 @@
 				   (struct fscrypt_policy __user *)arg,
 				   sizeof(policy)))
 			return -EFAULT;
-		return fscrypt_process_policy(inode, &policy);
+		return fscrypt_process_policy(filp, &policy);
 #else
 		return -EOPNOTSUPP;
 #endif
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1c593aa..3ec8708 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2211,6 +2211,7 @@
 
 /* Called at mount-time, super-block is locked */
 static int ext4_check_descriptors(struct super_block *sb,
+				  ext4_fsblk_t sb_block,
 				  ext4_group_t *first_not_zeroed)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2241,6 +2242,11 @@
 			grp = i;
 
 		block_bitmap = ext4_block_bitmap(sb, gdp);
+		if (block_bitmap == sb_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Block bitmap for group %u overlaps "
+				 "superblock", i);
+		}
 		if (block_bitmap < first_block || block_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Block bitmap for group %u not in group "
@@ -2248,6 +2254,11 @@
 			return 0;
 		}
 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
+		if (inode_bitmap == sb_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Inode bitmap for group %u overlaps "
+				 "superblock", i);
+		}
 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
 			       "Inode bitmap for group %u not in group "
@@ -2255,6 +2266,11 @@
 			return 0;
 		}
 		inode_table = ext4_inode_table(sb, gdp);
+		if (inode_table == sb_block) {
+			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+				 "Inode table for group %u overlaps "
+				 "superblock", i);
+		}
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -3757,7 +3773,7 @@
 			goto failed_mount2;
 		}
 	}
-	if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
+	if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
 		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
 		ret = -EFSCORRUPTED;
 		goto failed_mount2;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 39e9cfb..2eb935c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1353,15 +1353,19 @@
 	size_t min_offs, free;
 	int total_ino;
 	void *base, *start, *end;
-	int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
+	int error = 0, tried_min_extra_isize = 0;
 	int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
+	int isize_diff;	/* How much do we need to grow i_extra_isize */
 
 	down_write(&EXT4_I(inode)->xattr_sem);
+	/*
+	 * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
+	 */
+	ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
 retry:
-	if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
-		up_write(&EXT4_I(inode)->xattr_sem);
-		return 0;
-	}
+	isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
+	if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
+		goto out;
 
 	header = IHDR(inode, raw_inode);
 	entry = IFIRST(header);
@@ -1382,7 +1386,7 @@
 		goto cleanup;
 
 	free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
-	if (free >= new_extra_isize) {
+	if (free >= isize_diff) {
 		entry = IFIRST(header);
 		ext4_xattr_shift_entries(entry,	EXT4_I(inode)->i_extra_isize
 				- new_extra_isize, (void *)raw_inode +
@@ -1390,8 +1394,7 @@
 				(void *)header, total_ino,
 				inode->i_sb->s_blocksize);
 		EXT4_I(inode)->i_extra_isize = new_extra_isize;
-		error = 0;
-		goto cleanup;
+		goto out;
 	}
 
 	/*
@@ -1414,7 +1417,7 @@
 		end = bh->b_data + bh->b_size;
 		min_offs = end - base;
 		free = ext4_xattr_free_space(first, &min_offs, base, NULL);
-		if (free < new_extra_isize) {
+		if (free < isize_diff) {
 			if (!tried_min_extra_isize && s_min_extra_isize) {
 				tried_min_extra_isize++;
 				new_extra_isize = s_min_extra_isize;
@@ -1428,7 +1431,7 @@
 		free = inode->i_sb->s_blocksize;
 	}
 
-	while (new_extra_isize > 0) {
+	while (isize_diff > 0) {
 		size_t offs, size, entry_size;
 		struct ext4_xattr_entry *small_entry = NULL;
 		struct ext4_xattr_info i = {
@@ -1459,7 +1462,7 @@
 			EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
 					EXT4_XATTR_LEN(last->e_name_len);
 			if (total_size <= free && total_size < min_total_size) {
-				if (total_size < new_extra_isize) {
+				if (total_size < isize_diff) {
 					small_entry = last;
 				} else {
 					entry = last;
@@ -1514,22 +1517,22 @@
 		error = ext4_xattr_ibody_set(handle, inode, &i, is);
 		if (error)
 			goto cleanup;
+		total_ino -= entry_size;
 
 		entry = IFIRST(header);
-		if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
-			shift_bytes = new_extra_isize;
+		if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
+			shift_bytes = isize_diff;
 		else
-			shift_bytes = entry_size + size;
+			shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
 		/* Adjust the offsets and shift the remaining entries ahead */
-		ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
-			shift_bytes, (void *)raw_inode +
-			EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
-			(void *)header, total_ino - entry_size,
-			inode->i_sb->s_blocksize);
+		ext4_xattr_shift_entries(entry, -shift_bytes,
+			(void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+			EXT4_I(inode)->i_extra_isize + shift_bytes,
+			(void *)header, total_ino, inode->i_sb->s_blocksize);
 
-		extra_isize += shift_bytes;
-		new_extra_isize -= shift_bytes;
-		EXT4_I(inode)->i_extra_isize = extra_isize;
+		isize_diff -= shift_bytes;
+		EXT4_I(inode)->i_extra_isize += shift_bytes;
+		header = IHDR(inode, raw_inode);
 
 		i.name = b_entry_name;
 		i.value = buffer;
@@ -1551,6 +1554,8 @@
 		kfree(bs);
 	}
 	brelse(bh);
+out:
+	ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
 	up_write(&EXT4_I(inode)->xattr_sem);
 	return 0;
 
@@ -1562,6 +1567,10 @@
 	kfree(is);
 	kfree(bs);
 	brelse(bh);
+	/*
+	 * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
+	 * size expansion failed.
+	 */
 	up_write(&EXT4_I(inode)->xattr_sem);
 	return error;
 }
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 69dd3e6..a92e783 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -24,6 +24,7 @@
 #define EXT4_XATTR_INDEX_SYSTEM			7
 #define EXT4_XATTR_INDEX_RICHACL		8
 #define EXT4_XATTR_INDEX_ENCRYPTION		9
+#define EXT4_XATTR_INDEX_HURD			10 /* Reserved for Hurd */
 
 struct ext4_xattr_header {
 	__le32	h_magic;	/* magic number for identification */
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index d64d2a5..ccb401e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1699,11 +1699,11 @@
 	trace_f2fs_write_end(inode, pos, len, copied);
 
 	set_page_dirty(page);
-	f2fs_put_page(page, 1);
 
 	if (pos + copied > i_size_read(inode))
 		f2fs_i_size_write(inode, pos + copied);
 
+	f2fs_put_page(page, 1);
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
 	return copied;
 }
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 675fa79..14f5fe2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -538,7 +538,7 @@
 	/* NAT cache management */
 	struct radix_tree_root nat_root;/* root of the nat entry cache */
 	struct radix_tree_root nat_set_root;/* root of the nat set cache */
-	struct percpu_rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
+	struct rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	unsigned int nat_cnt;		/* the # of cached nat entries */
 	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
@@ -787,7 +787,7 @@
 	struct f2fs_checkpoint *ckpt;		/* raw checkpoint pointer */
 	struct inode *meta_inode;		/* cache meta blocks */
 	struct mutex cp_mutex;			/* checkpoint procedure lock */
-	struct percpu_rw_semaphore cp_rwsem;		/* blocking FS operations */
+	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
 	struct rw_semaphore node_write;		/* locking node writes */
 	wait_queue_head_t cp_wait;
 	unsigned long last_time[MAX_TIME];	/* to store time in jiffies */
@@ -1074,22 +1074,22 @@
 
 static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
 {
-	percpu_down_read(&sbi->cp_rwsem);
+	down_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
 {
-	percpu_up_read(&sbi->cp_rwsem);
+	up_read(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
 {
-	percpu_down_write(&sbi->cp_rwsem);
+	down_write(&sbi->cp_rwsem);
 }
 
 static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
 {
-	percpu_up_write(&sbi->cp_rwsem);
+	up_write(&sbi->cp_rwsem);
 }
 
 static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e493f6..28f4f4c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1757,21 +1757,14 @@
 {
 	struct fscrypt_policy policy;
 	struct inode *inode = file_inode(filp);
-	int ret;
 
 	if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
 							sizeof(policy)))
 		return -EFAULT;
 
-	ret = mnt_want_write_file(filp);
-	if (ret)
-		return ret;
-
 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
-	ret = fscrypt_process_policy(inode, &policy);
 
-	mnt_drop_write_file(filp);
-	return ret;
+	return fscrypt_process_policy(filp, &policy);
 }
 
 static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
@@ -2086,15 +2079,19 @@
 	if (unlikely(f2fs_readonly(src->i_sb)))
 		return -EROFS;
 
-	if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode))
-		return -EISDIR;
+	if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode))
+		return -EINVAL;
 
 	if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
 		return -EOPNOTSUPP;
 
 	inode_lock(src);
-	if (src != dst)
-		inode_lock(dst);
+	if (src != dst) {
+		if (!inode_trylock(dst)) {
+			ret = -EBUSY;
+			goto out;
+		}
+	}
 
 	ret = -EINVAL;
 	if (pos_in + len > src->i_size || pos_in + len < pos_in)
@@ -2152,6 +2149,7 @@
 out_unlock:
 	if (src != dst)
 		inode_unlock(dst);
+out:
 	inode_unlock(src);
 	return ret;
 }
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b2fa4b6..f75d197 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -206,14 +206,14 @@
 	struct nat_entry *e;
 	bool need = false;
 
-	percpu_down_read(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		if (!get_nat_flag(e, IS_CHECKPOINTED) &&
 				!get_nat_flag(e, HAS_FSYNCED_INODE))
 			need = true;
 	}
-	percpu_up_read(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 	return need;
 }
 
@@ -223,11 +223,11 @@
 	struct nat_entry *e;
 	bool is_cp = true;
 
-	percpu_down_read(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
 		is_cp = false;
-	percpu_up_read(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 	return is_cp;
 }
 
@@ -237,13 +237,13 @@
 	struct nat_entry *e;
 	bool need_update = true;
 
-	percpu_down_read(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ino);
 	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
 			(get_nat_flag(e, IS_CHECKPOINTED) ||
 			 get_nat_flag(e, HAS_FSYNCED_INODE)))
 		need_update = false;
-	percpu_up_read(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 	return need_update;
 }
 
@@ -284,7 +284,7 @@
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	struct nat_entry *e;
 
-	percpu_down_write(&nm_i->nat_tree_lock);
+	down_write(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, ni->nid);
 	if (!e) {
 		e = grab_nat_entry(nm_i, ni->nid);
@@ -334,7 +334,7 @@
 			set_nat_flag(e, HAS_FSYNCED_INODE, true);
 		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
 	}
-	percpu_up_write(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 }
 
 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -342,7 +342,8 @@
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
 	int nr = nr_shrink;
 
-	percpu_down_write(&nm_i->nat_tree_lock);
+	if (!down_write_trylock(&nm_i->nat_tree_lock))
+		return 0;
 
 	while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
 		struct nat_entry *ne;
@@ -351,7 +352,7 @@
 		__del_from_nat_cache(nm_i, ne);
 		nr_shrink--;
 	}
-	percpu_up_write(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 	return nr - nr_shrink;
 }
 
@@ -373,13 +374,13 @@
 	ni->nid = nid;
 
 	/* Check nat cache */
-	percpu_down_read(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 	e = __lookup_nat_cache(nm_i, nid);
 	if (e) {
 		ni->ino = nat_get_ino(e);
 		ni->blk_addr = nat_get_blkaddr(e);
 		ni->version = nat_get_version(e);
-		percpu_up_read(&nm_i->nat_tree_lock);
+		up_read(&nm_i->nat_tree_lock);
 		return;
 	}
 
@@ -403,11 +404,11 @@
 	node_info_from_raw_nat(ni, &ne);
 	f2fs_put_page(page, 1);
 cache:
-	percpu_up_read(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 	/* cache nat entry */
-	percpu_down_write(&nm_i->nat_tree_lock);
+	down_write(&nm_i->nat_tree_lock);
 	cache_nat_entry(sbi, nid, &ne);
-	percpu_up_write(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -1788,7 +1789,7 @@
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
 							META_NAT, true);
 
-	percpu_down_read(&nm_i->nat_tree_lock);
+	down_read(&nm_i->nat_tree_lock);
 
 	while (1) {
 		struct page *page = get_current_nat_page(sbi, nid);
@@ -1820,7 +1821,7 @@
 			remove_free_nid(nm_i, nid);
 	}
 	up_read(&curseg->journal_rwsem);
-	percpu_up_read(&nm_i->nat_tree_lock);
+	up_read(&nm_i->nat_tree_lock);
 
 	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
 					nm_i->ra_nid_pages, META_NAT, false);
@@ -2209,7 +2210,7 @@
 	if (!nm_i->dirty_nat_cnt)
 		return;
 
-	percpu_down_write(&nm_i->nat_tree_lock);
+	down_write(&nm_i->nat_tree_lock);
 
 	/*
 	 * if there are no enough space in journal to store dirty nat
@@ -2232,7 +2233,7 @@
 	list_for_each_entry_safe(set, tmp, &sets, set_list)
 		__flush_nat_entry_set(sbi, set);
 
-	percpu_up_write(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 
 	f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
@@ -2268,8 +2269,7 @@
 
 	mutex_init(&nm_i->build_lock);
 	spin_lock_init(&nm_i->free_nid_list_lock);
-	if (percpu_init_rwsem(&nm_i->nat_tree_lock))
-		return -ENOMEM;
+	init_rwsem(&nm_i->nat_tree_lock);
 
 	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
 	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2326,7 +2326,7 @@
 	spin_unlock(&nm_i->free_nid_list_lock);
 
 	/* destroy nat cache */
-	percpu_down_write(&nm_i->nat_tree_lock);
+	down_write(&nm_i->nat_tree_lock);
 	while ((found = __gang_lookup_nat_cache(nm_i,
 					nid, NATVEC_SIZE, natvec))) {
 		unsigned idx;
@@ -2351,9 +2351,8 @@
 			kmem_cache_free(nat_entry_set_slab, setvec[idx]);
 		}
 	}
-	percpu_up_write(&nm_i->nat_tree_lock);
+	up_write(&nm_i->nat_tree_lock);
 
-	percpu_free_rwsem(&nm_i->nat_tree_lock);
 	kfree(nm_i->nat_bitmap);
 	sbi->nm_info = NULL;
 	kfree(nm_i);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1b86d3f..7f863a6 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -706,8 +706,6 @@
 		percpu_counter_destroy(&sbi->nr_pages[i]);
 	percpu_counter_destroy(&sbi->alloc_valid_block_count);
 	percpu_counter_destroy(&sbi->total_valid_inode_count);
-
-	percpu_free_rwsem(&sbi->cp_rwsem);
 }
 
 static void f2fs_put_super(struct super_block *sb)
@@ -1483,9 +1481,6 @@
 {
 	int i, err;
 
-	if (percpu_init_rwsem(&sbi->cp_rwsem))
-		return -ENOMEM;
-
 	for (i = 0; i < NR_COUNT_TYPE; i++) {
 		err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
 		if (err)
@@ -1686,6 +1681,7 @@
 		sbi->write_io[i].bio = NULL;
 	}
 
+	init_rwsem(&sbi->cp_rwsem);
 	init_waitqueue_head(&sbi->cp_wait);
 	init_sb_info(sbi);
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 4d09d44..05713a5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1949,6 +1949,12 @@
 {
 	struct backing_dev_info *bdi;
 
+	/*
+	 * If we are expecting writeback progress we must submit plugged IO.
+	 */
+	if (blk_needs_flush_plug(current))
+		blk_schedule_flush_plug(current);
+
 	if (!nr_pages)
 		nr_pages = get_nr_dirty_pages();
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f394aff..3988b43 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -530,13 +530,13 @@
 	req->out.args[0].size = count;
 }
 
-static void fuse_release_user_pages(struct fuse_req *req, int write)
+static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
 {
 	unsigned i;
 
 	for (i = 0; i < req->num_pages; i++) {
 		struct page *page = req->pages[i];
-		if (write)
+		if (should_dirty)
 			set_page_dirty_lock(page);
 		put_page(page);
 	}
@@ -1320,6 +1320,7 @@
 		       loff_t *ppos, int flags)
 {
 	int write = flags & FUSE_DIO_WRITE;
+	bool should_dirty = !write && iter_is_iovec(iter);
 	int cuse = flags & FUSE_DIO_CUSE;
 	struct file *file = io->file;
 	struct inode *inode = file->f_mapping->host;
@@ -1363,7 +1364,7 @@
 			nres = fuse_send_read(req, io, pos, nbytes, owner);
 
 		if (!io->async)
-			fuse_release_user_pages(req, !write);
+			fuse_release_user_pages(req, should_dirty);
 		if (req->out.h.error) {
 			err = req->out.h.error;
 			break;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 82df368..5a6f52e 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -187,7 +187,7 @@
 		ClearPageChecked(page);
 		if (!page_has_buffers(page)) {
 			create_empty_buffers(page, inode->i_sb->s_blocksize,
-					     (1 << BH_Dirty)|(1 << BH_Uptodate));
+					     BIT(BH_Dirty)|BIT(BH_Uptodate));
 		}
 		gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
 	}
@@ -1147,6 +1147,16 @@
 	if (!page_has_buffers(page))
 		return 0;
 
+	/*
+	 * From xfs_vm_releasepage: mm accommodates an old ext3 case where
+	 * clean pages might not have had the dirty bit cleared.  Thus, it can
+	 * send actual dirty pages to ->releasepage() via shrink_active_list().
+	 *
+	 * As a workaround, we skip pages that contain dirty buffers below.
+	 * Once ->releasepage isn't called on dirty pages anymore, we can warn
+	 * on dirty buffers like we used to here again.
+	 */
+
 	gfs2_log_lock(sdp);
 	spin_lock(&sdp->sd_ail_lock);
 	head = bh = page_buffers(page);
@@ -1156,8 +1166,8 @@
 		bd = bh->b_private;
 		if (bd && bd->bd_tr)
 			goto cannot_release;
-		if (buffer_pinned(bh) || buffer_dirty(bh))
-			goto not_possible;
+		if (buffer_dirty(bh) || WARN_ON(buffer_pinned(bh)))
+			goto cannot_release;
 		bh = bh->b_this_page;
 	} while(bh != head);
 	spin_unlock(&sdp->sd_ail_lock);
@@ -1180,9 +1190,6 @@
 
 	return try_to_free_buffers(page);
 
-not_possible: /* Should never happen */
-	WARN_ON(buffer_dirty(bh));
-	WARN_ON(buffer_pinned(bh));
 cannot_release:
 	spin_unlock(&sdp->sd_ail_lock);
 	gfs2_log_unlock(sdp);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6e2bec1..645721f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -82,8 +82,8 @@
 	}
 
 	if (!page_has_buffers(page))
-		create_empty_buffers(page, 1 << inode->i_blkbits,
-				     (1 << BH_Uptodate));
+		create_empty_buffers(page, BIT(inode->i_blkbits),
+				     BIT(BH_Uptodate));
 
 	bh = page_buffers(page);
 
@@ -690,7 +690,7 @@
 	BUG_ON(!dblock);
 	BUG_ON(!new);
 
-	bh.b_size = 1 << (inode->i_blkbits + (create ? 0 : 5));
+	bh.b_size = BIT(inode->i_blkbits + (create ? 0 : 5));
 	ret = gfs2_block_map(inode, lblock, &bh, create);
 	*extlen = bh.b_size >> inode->i_blkbits;
 	*dblock = bh.b_blocknr;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index fcb59b2..db8fbeb 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -351,7 +351,7 @@
 	if (hc)
 		return hc;
 
-	hsize = 1 << ip->i_depth;
+	hsize = BIT(ip->i_depth);
 	hsize *= sizeof(__be64);
 	if (hsize != i_size_read(&ip->i_inode)) {
 		gfs2_consist_inode(ip);
@@ -819,8 +819,8 @@
 
 	if (ip->i_diskflags & GFS2_DIF_EXHASH) {
 		struct gfs2_leaf *leaf;
-		unsigned hsize = 1 << ip->i_depth;
-		unsigned index;
+		unsigned int hsize = BIT(ip->i_depth);
+		unsigned int index;
 		u64 ln;
 		if (hsize * sizeof(u64) != i_size_read(inode)) {
 			gfs2_consist_inode(ip);
@@ -932,7 +932,7 @@
 		return -ENOSPC;
 	bn = bh->b_blocknr;
 
-	gfs2_assert(sdp, dip->i_entries < (1 << 16));
+	gfs2_assert(sdp, dip->i_entries < BIT(16));
 	leaf->lf_entries = cpu_to_be16(dip->i_entries);
 
 	/*  Copy dirents  */
@@ -1041,7 +1041,7 @@
 	bn = nbh->b_blocknr;
 
 	/*  Compute the start and len of leaf pointers in the hash table.  */
-	len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth));
+	len = BIT(dip->i_depth - be16_to_cpu(oleaf->lf_depth));
 	half_len = len >> 1;
 	if (!half_len) {
 		pr_warn("i_depth %u lf_depth %u index %u\n",
@@ -1163,7 +1163,7 @@
 	int x;
 	int error = 0;
 
-	hsize = 1 << dip->i_depth;
+	hsize = BIT(dip->i_depth);
 	hsize_bytes = hsize * sizeof(__be64);
 
 	hc = gfs2_dir_get_hash_table(dip);
@@ -1539,7 +1539,7 @@
 	int error = 0;
 	unsigned depth = 0;
 
-	hsize = 1 << dip->i_depth;
+	hsize = BIT(dip->i_depth);
 	hash = gfs2_dir_offset2hash(ctx->pos);
 	index = hash >> (32 - dip->i_depth);
 
@@ -1558,7 +1558,7 @@
 		if (error)
 			break;
 
-		len = 1 << (dip->i_depth - depth);
+		len = BIT(dip->i_depth - depth);
 		index = (index & ~(len - 1)) + len;
 	}
 
@@ -2113,7 +2113,7 @@
 	u64 leaf_no;
 	int error = 0, last;
 
-	hsize = 1 << dip->i_depth;
+	hsize = BIT(dip->i_depth);
 
 	lp = gfs2_dir_get_hash_table(dip);
 	if (IS_ERR(lp))
@@ -2126,7 +2126,7 @@
 			if (error)
 				goto out;
 			leaf = (struct gfs2_leaf *)bh->b_data;
-			len = 1 << (dip->i_depth - be16_to_cpu(leaf->lf_depth));
+			len = BIT(dip->i_depth - be16_to_cpu(leaf->lf_depth));
 
 			next_index = (index & ~(len - 1)) + len;
 			last = ((next_index >= hsize) ? 1 : 0);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 320e65e..360188f 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -395,9 +395,6 @@
 
 	sb_start_pagefault(inode->i_sb);
 
-	/* Update file times before taking page lock */
-	file_update_time(vma->vm_file);
-
 	ret = gfs2_rsqa_alloc(ip);
 	if (ret)
 		goto out;
@@ -409,6 +406,9 @@
 	if (ret)
 		goto out_uninit;
 
+	/* Update file times before taking page lock */
+	file_update_time(vma->vm_file);
+
 	set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
 	set_bit(GIF_SW_PAGED, &ip->i_flags);
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3a90b2b..14cbf60 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -69,7 +69,7 @@
 static DEFINE_SPINLOCK(lru_lock);
 
 #define GFS2_GL_HASH_SHIFT      15
-#define GFS2_GL_HASH_SIZE       (1 << GFS2_GL_HASH_SHIFT)
+#define GFS2_GL_HASH_SIZE       BIT(GFS2_GL_HASH_SHIFT)
 
 static struct rhashtable_params ht_parms = {
 	.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
@@ -1781,7 +1781,13 @@
 		return -ENOMEM;
 	}
 
-	register_shrinker(&glock_shrinker);
+	ret = register_shrinker(&glock_shrinker);
+	if (ret) {
+		destroy_workqueue(gfs2_delete_workqueue);
+		destroy_workqueue(glock_workqueue);
+		rhashtable_destroy(&gl_hash_table);
+		return ret;
+	}
 
 	return 0;
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index e4da0ec..fb3a810 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -187,6 +187,10 @@
 		}
 
 		gfs2_set_iop(inode);
+
+		inode->i_atime.tv_sec = 0;
+		inode->i_atime.tv_nsec = 0;
+
 		unlock_new_inode(inode);
 	}
 
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 7710dfd..aace8ce 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -85,7 +85,7 @@
 	u64 size = i_size_read(inode);
 	if (size < minsize || size > maxsize)
 		goto err;
-	if (size & ((1 << inode->i_blkbits) - 1))
+	if (size & (BIT(inode->i_blkbits) - 1))
 		goto err;
 	return 0;
 err:
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 74fd013..67d1fc4 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -145,7 +145,9 @@
 	if (!gfs2_qadata_cachep)
 		goto fail;
 
-	register_shrinker(&gfs2_qd_shrinker);
+	error = register_shrinker(&gfs2_qd_shrinker);
+	if (error)
+		goto fail;
 
 	error = register_filesystem(&gfs2_fs_type);
 	if (error)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 950b8be..373639a5 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -216,23 +216,26 @@
 static void gfs2_submit_bhs(int op, int op_flags, struct buffer_head *bhs[],
 			    int num)
 {
-	struct buffer_head *bh = bhs[0];
-	struct bio *bio;
-	int i;
+	while (num > 0) {
+		struct buffer_head *bh = *bhs;
+		struct bio *bio;
 
-	if (!num)
-		return;
-
-	bio = bio_alloc(GFP_NOIO, num);
-	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-	bio->bi_bdev = bh->b_bdev;
-	for (i = 0; i < num; i++) {
-		bh = bhs[i];
-		bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+		bio = bio_alloc(GFP_NOIO, num);
+		bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+		bio->bi_bdev = bh->b_bdev;
+		while (num > 0) {
+			bh = *bhs;
+			if (!bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh))) {
+				BUG_ON(bio->bi_iter.bi_size == 0);
+				break;
+			}
+			bhs++;
+			num--;
+		}
+		bio->bi_end_io = gfs2_meta_read_endio;
+		bio_set_op_attrs(bio, op, op_flags);
+		submit_bio(bio);
 	}
-	bio->bi_end_io = gfs2_meta_read_endio;
-	bio_set_op_attrs(bio, op, op_flags);
-	submit_bio(bio);
 }
 
 /**
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index ef1e182..ff72ac6 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -58,7 +58,7 @@
 	gt->gt_quota_scale_num = 1;
 	gt->gt_quota_scale_den = 1;
 	gt->gt_new_files_jdata = 0;
-	gt->gt_max_readahead = 1 << 18;
+	gt->gt_max_readahead = BIT(18);
 	gt->gt_complain_secs = 10;
 }
 
@@ -284,7 +284,7 @@
 
 	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
 			       GFS2_BASIC_BLOCK_SHIFT;
-	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+	sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
 	sdp->sd_diptrs = (sdp->sd_sb.sb_bsize -
 			  sizeof(struct gfs2_dinode)) / sizeof(u64);
 	sdp->sd_inptrs = (sdp->sd_sb.sb_bsize -
@@ -302,7 +302,7 @@
 
 	/* Compute maximum reservation required to add a entry to a directory */
 
-	hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH),
+	hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH),
 			     sdp->sd_jbsize);
 
 	ind_blocks = 0;
@@ -1089,7 +1089,7 @@
 	sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits;
 	sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift -
                                GFS2_BASIC_BLOCK_SHIFT;
-	sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift;
+	sdp->sd_fsb2bb = BIT(sdp->sd_fsb2bb_shift);
 
 	sdp->sd_tune.gt_logd_secs = sdp->sd_args.ar_commit;
 	sdp->sd_tune.gt_quota_quantum = sdp->sd_args.ar_quota_quantum;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 77930ca..8af2dfa 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -75,7 +75,7 @@
 #include "util.h"
 
 #define GFS2_QD_HASH_SHIFT      12
-#define GFS2_QD_HASH_SIZE       (1 << GFS2_QD_HASH_SHIFT)
+#define GFS2_QD_HASH_SIZE       BIT(GFS2_QD_HASH_SHIFT)
 #define GFS2_QD_HASH_MASK       (GFS2_QD_HASH_SIZE - 1)
 
 /* Lock order: qd_lock -> bucket lock -> qd->lockref.lock -> lru lock */
@@ -384,7 +384,7 @@
 	block = qd->qd_slot / sdp->sd_qc_per_block;
 	offset = qd->qd_slot % sdp->sd_qc_per_block;
 
-	bh_map.b_size = 1 << ip->i_inode.i_blkbits;
+	bh_map.b_size = BIT(ip->i_inode.i_blkbits);
 	error = gfs2_block_map(&ip->i_inode, block, &bh_map, 0);
 	if (error)
 		goto fail;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 3a7e60b..e3ee387 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -359,7 +359,7 @@
 	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
 	u64 size = i_size_read(jd->jd_inode);
 
-	if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30))
+	if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, BIT(30)))
 		return -EIO;
 
 	jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 0f56deb..c415668 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -568,7 +568,7 @@
 	return thaw_super(sb);
 }
 
-static long ioctl_file_dedupe_range(struct file *file, void __user *arg)
+static int ioctl_file_dedupe_range(struct file *file, void __user *arg)
 {
 	struct file_dedupe_range __user *argp = arg;
 	struct file_dedupe_range *same = NULL;
@@ -582,6 +582,10 @@
 	}
 
 	size = offsetof(struct file_dedupe_range __user, info[count]);
+	if (size > PAGE_SIZE) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	same = memdup_user(argp, size);
 	if (IS_ERR(same)) {
diff --git a/fs/iomap.c b/fs/iomap.c
index 48141b8..706270f 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -84,8 +84,11 @@
 	 * Now the data has been copied, commit the range we've copied.  This
 	 * should not fail unless the filesystem has had a fatal error.
 	 */
-	ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0,
-			flags, &iomap);
+	if (ops->iomap_end) {
+		ret = ops->iomap_end(inode, pos, length,
+				     written > 0 ? written : 0,
+				     flags, &iomap);
+	}
 
 	return written ? written : ret;
 }
@@ -194,12 +197,9 @@
 		if (mapping_writably_mapped(inode->i_mapping))
 			flush_dcache_page(page);
 
-		pagefault_disable();
 		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
-		pagefault_enable();
 
 		flush_dcache_page(page);
-		mark_page_accessed(page);
 
 		status = iomap_write_end(inode, pos, bytes, copied, page);
 		if (unlikely(status < 0))
@@ -428,9 +428,12 @@
 		break;
 	}
 
+	if (iomap->flags & IOMAP_F_MERGED)
+		flags |= FIEMAP_EXTENT_MERGED;
+
 	return fiemap_fill_next_extent(fi, iomap->offset,
 			iomap->blkno != IOMAP_NULL_BLOCK ? iomap->blkno << 9: 0,
-			iomap->length, flags | FIEMAP_EXTENT_MERGED);
+			iomap->length, flags);
 
 }
 
@@ -470,13 +473,18 @@
 	if (ret)
 		return ret;
 
-	ret = filemap_write_and_wait(inode->i_mapping);
-	if (ret)
-		return ret;
+	if (fi->fi_flags & FIEMAP_FLAG_SYNC) {
+		ret = filemap_write_and_wait(inode->i_mapping);
+		if (ret)
+			return ret;
+	}
 
 	while (len > 0) {
 		ret = iomap_apply(inode, start, len, 0, ops, &ctx,
 				iomap_fiemap_actor);
+		/* inode with no (attribute) mapping will give ENOENT */
+		if (ret == -ENOENT)
+			break;
 		if (ret < 0)
 			return ret;
 		if (ret == 0)
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 2e58978..4d973524 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2893,8 +2893,7 @@
 	 * on anon_list2.  Let's check.
 	 */
 	if (!list_empty(&TxAnchor.anon_list2)) {
-		list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
-		INIT_LIST_HEAD(&TxAnchor.anon_list2);
+		list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
 		goto restart;
 	}
 	TXN_UNLOCK();
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 90b3bc2..bd9b641 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -379,8 +379,14 @@
 	 * cached in meta-data cache, and not written out
 	 * by txCommit();
 	 */
-	filemap_fdatawait(ipbmap->i_mapping);
-	filemap_write_and_wait(ipbmap->i_mapping);
+	rc = filemap_fdatawait(ipbmap->i_mapping);
+	if (rc)
+		goto error_out;
+
+	rc = filemap_write_and_wait(ipbmap->i_mapping);
+	if (rc)
+		goto error_out;
+
 	diWriteSpecial(ipbmap, 0);
 
 	newPage = nPages;	/* first new page number */
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e157400..2bcb86e 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -840,21 +840,35 @@
 	mutex_lock(&kernfs_mutex);
 
 	list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
+		struct kernfs_node *parent;
 		struct inode *inode;
-		struct dentry *dentry;
 
+		/*
+		 * We want fsnotify_modify() on @kn but as the
+		 * modifications aren't originating from userland don't
+		 * have the matching @file available.  Look up the inodes
+		 * and generate the events manually.
+		 */
 		inode = ilookup(info->sb, kn->ino);
 		if (!inode)
 			continue;
 
-		dentry = d_find_any_alias(inode);
-		if (dentry) {
-			fsnotify_parent(NULL, dentry, FS_MODIFY);
-			fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
-				 NULL, 0);
-			dput(dentry);
+		parent = kernfs_get_parent(kn);
+		if (parent) {
+			struct inode *p_inode;
+
+			p_inode = ilookup(info->sb, parent->ino);
+			if (p_inode) {
+				fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD,
+					 inode, FSNOTIFY_EVENT_INODE, kn->name, 0);
+				iput(p_inode);
+			}
+
+			kernfs_put(parent);
 		}
 
+		fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
+			 kn->name, 0);
 		iput(inode);
 	}
 
diff --git a/fs/locks.c b/fs/locks.c
index ee1b15f..90ec671 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -127,7 +127,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/hashtable.h>
 #include <linux/percpu.h>
-#include <linux/lglock.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/filelock.h>
@@ -158,12 +157,18 @@
 
 /*
  * The global file_lock_list is only used for displaying /proc/locks, so we
- * keep a list on each CPU, with each list protected by its own spinlock via
- * the file_lock_lglock. Note that alterations to the list also require that
- * the relevant flc_lock is held.
+ * keep a list on each CPU, with each list protected by its own spinlock.
+ * Global serialization is done using file_rwsem.
+ *
+ * Note that alterations to the list also require that the relevant flc_lock is
+ * held.
  */
-DEFINE_STATIC_LGLOCK(file_lock_lglock);
-static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
+struct file_lock_list_struct {
+	spinlock_t		lock;
+	struct hlist_head	hlist;
+};
+static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
+DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
 
 /*
  * The blocked_hash is used to find POSIX lock loops for deadlock detection.
@@ -587,15 +592,23 @@
 /* Must be called with the flc_lock held! */
 static void locks_insert_global_locks(struct file_lock *fl)
 {
-	lg_local_lock(&file_lock_lglock);
+	struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
+
+	percpu_rwsem_assert_held(&file_rwsem);
+
+	spin_lock(&fll->lock);
 	fl->fl_link_cpu = smp_processor_id();
-	hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
-	lg_local_unlock(&file_lock_lglock);
+	hlist_add_head(&fl->fl_link, &fll->hlist);
+	spin_unlock(&fll->lock);
 }
 
 /* Must be called with the flc_lock held! */
 static void locks_delete_global_locks(struct file_lock *fl)
 {
+	struct file_lock_list_struct *fll;
+
+	percpu_rwsem_assert_held(&file_rwsem);
+
 	/*
 	 * Avoid taking lock if already unhashed. This is safe since this check
 	 * is done while holding the flc_lock, and new insertions into the list
@@ -603,9 +616,11 @@
 	 */
 	if (hlist_unhashed(&fl->fl_link))
 		return;
-	lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu);
+
+	fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
+	spin_lock(&fll->lock);
 	hlist_del_init(&fl->fl_link);
-	lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu);
+	spin_unlock(&fll->lock);
 }
 
 static unsigned long
@@ -915,6 +930,7 @@
 			return -ENOMEM;
 	}
 
+	percpu_down_read_preempt_disable(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	if (request->fl_flags & FL_ACCESS)
 		goto find_conflict;
@@ -955,6 +971,7 @@
 
 out:
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read_preempt_enable(&file_rwsem);
 	if (new_fl)
 		locks_free_lock(new_fl);
 	locks_dispose_list(&dispose);
@@ -991,6 +1008,7 @@
 		new_fl2 = locks_alloc_lock();
 	}
 
+	percpu_down_read_preempt_disable(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	/*
 	 * New lock request. Walk all POSIX locks and look for conflicts. If
@@ -1162,6 +1180,7 @@
 	}
  out:
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read_preempt_enable(&file_rwsem);
 	/*
 	 * Free any unused locks.
 	 */
@@ -1436,6 +1455,7 @@
 		return error;
 	}
 
+	percpu_down_read_preempt_disable(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 
 	time_out_leases(inode, &dispose);
@@ -1487,9 +1507,13 @@
 	locks_insert_block(fl, new_fl);
 	trace_break_lease_block(inode, new_fl);
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read_preempt_enable(&file_rwsem);
+
 	locks_dispose_list(&dispose);
 	error = wait_event_interruptible_timeout(new_fl->fl_wait,
 						!new_fl->fl_next, break_time);
+
+	percpu_down_read_preempt_disable(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	trace_break_lease_unblock(inode, new_fl);
 	locks_delete_block(new_fl);
@@ -1506,6 +1530,7 @@
 	}
 out:
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read_preempt_enable(&file_rwsem);
 	locks_dispose_list(&dispose);
 	locks_free_lock(new_fl);
 	return error;
@@ -1660,6 +1685,7 @@
 		return -EINVAL;
 	}
 
+	percpu_down_read_preempt_disable(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	time_out_leases(inode, &dispose);
 	error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@ -1730,6 +1756,7 @@
 		lease->fl_lmops->lm_setup(lease, priv);
 out:
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read_preempt_enable(&file_rwsem);
 	locks_dispose_list(&dispose);
 	if (is_deleg)
 		inode_unlock(inode);
@@ -1752,6 +1779,7 @@
 		return error;
 	}
 
+	percpu_down_read_preempt_disable(&file_rwsem);
 	spin_lock(&ctx->flc_lock);
 	list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
 		if (fl->fl_file == filp &&
@@ -1764,6 +1792,7 @@
 	if (victim)
 		error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
 	spin_unlock(&ctx->flc_lock);
+	percpu_up_read_preempt_enable(&file_rwsem);
 	locks_dispose_list(&dispose);
 	return error;
 }
@@ -2574,9 +2603,20 @@
 	struct inode *inode = NULL;
 	unsigned int fl_pid;
 
-	if (fl->fl_nspid)
-		fl_pid = pid_vnr(fl->fl_nspid);
-	else
+	if (fl->fl_nspid) {
+		struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
+
+		/* Don't let fl_pid change based on who is reading the file */
+		fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns);
+
+		/*
+		 * If there isn't a fl_pid don't display who is waiting on
+		 * the lock if we are called from locks_show, or if we are
+		 * called from __show_fd_info - skip lock entirely
+		 */
+		if (fl_pid == 0)
+			return;
+	} else
 		fl_pid = fl->fl_pid;
 
 	if (fl->fl_file != NULL)
@@ -2648,9 +2688,13 @@
 {
 	struct locks_iterator *iter = f->private;
 	struct file_lock *fl, *bfl;
+	struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
 
 	fl = hlist_entry(v, struct file_lock, fl_link);
 
+	if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns))
+		return 0;
+
 	lock_get_status(f, fl, iter->li_pos, "");
 
 	list_for_each_entry(bfl, &fl->fl_block, fl_block)
@@ -2703,9 +2747,9 @@
 	struct locks_iterator *iter = f->private;
 
 	iter->li_pos = *pos + 1;
-	lg_global_lock(&file_lock_lglock);
+	percpu_down_write(&file_rwsem);
 	spin_lock(&blocked_lock_lock);
-	return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
+	return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
 }
 
 static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
@@ -2713,14 +2757,14 @@
 	struct locks_iterator *iter = f->private;
 
 	++iter->li_pos;
-	return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos);
+	return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
 }
 
 static void locks_stop(struct seq_file *f, void *v)
 	__releases(&blocked_lock_lock)
 {
 	spin_unlock(&blocked_lock_lock);
-	lg_global_unlock(&file_lock_lglock);
+	percpu_up_write(&file_rwsem);
 }
 
 static const struct seq_operations locks_seq_operations = {
@@ -2761,10 +2805,13 @@
 	filelock_cache = kmem_cache_create("file_lock_cache",
 			sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
 
-	lg_lock_init(&file_lock_lglock, "file_lock_lglock");
 
-	for_each_possible_cpu(i)
-		INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));
+	for_each_possible_cpu(i) {
+		struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
+
+		spin_lock_init(&fll->lock);
+		INIT_HLIST_HEAD(&fll->hlist);
+	}
 
 	return 0;
 }
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index f55a4e7..2178476 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -346,7 +346,7 @@
 			PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
 
 		ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
-					(end - start) >> SECTOR_SHIFT);
+					(end - start) >> SECTOR_SHIFT, end);
 	}
 
 	pnfs_ld_write_done(hdr);
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 18e6fd0..efc007f 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -141,6 +141,7 @@
 	struct rb_root		bl_ext_ro;
 	spinlock_t		bl_ext_lock;   /* Protects list manipulation */
 	bool			bl_scsi_layout;
+	u64			bl_lwb;
 };
 
 static inline struct pnfs_block_layout *
@@ -182,7 +183,7 @@
 int ext_tree_remove(struct pnfs_block_layout *bl, bool rw, sector_t start,
 		sector_t end);
 int ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
-		sector_t len);
+		sector_t len, u64 lwb);
 bool ext_tree_lookup(struct pnfs_block_layout *bl, sector_t isect,
 		struct pnfs_block_extent *ret, bool rw);
 int ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 992bcb1..c85fbfd 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -402,7 +402,7 @@
 
 int
 ext_tree_mark_written(struct pnfs_block_layout *bl, sector_t start,
-		sector_t len)
+		sector_t len, u64 lwb)
 {
 	struct rb_root *root = &bl->bl_ext_rw;
 	sector_t end = start + len;
@@ -471,6 +471,8 @@
 		}
 	}
 out:
+	if (bl->bl_lwb < lwb)
+		bl->bl_lwb = lwb;
 	spin_unlock(&bl->bl_ext_lock);
 
 	__ext_put_deviceids(&tmp);
@@ -518,7 +520,7 @@
 }
 
 static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
-		size_t buffer_size, size_t *count)
+		size_t buffer_size, size_t *count, __u64 *lastbyte)
 {
 	struct pnfs_block_extent *be;
 	int ret = 0;
@@ -542,6 +544,8 @@
 			p = encode_block_extent(be, p);
 		be->be_tag = EXTENT_COMMITTING;
 	}
+	*lastbyte = bl->bl_lwb - 1;
+	bl->bl_lwb = 0;
 	spin_unlock(&bl->bl_ext_lock);
 
 	return ret;
@@ -564,7 +568,7 @@
 	arg->layoutupdate_pages = &arg->layoutupdate_page;
 
 retry:
-	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count);
+	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
 	if (unlikely(ret)) {
 		ext_tree_free_commitdata(arg, buffer_size);
 
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a7f2e6e..52a2831 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -275,6 +275,7 @@
 err_socks:
 	svc_rpcb_cleanup(serv, net);
 err_bind:
+	nn->cb_users[minorversion]--;
 	dprintk("NFS: Couldn't create callback socket: err = %d; "
 			"net = %p\n", ret, net);
 	return ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c92a75e..f953ef6 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -454,11 +454,8 @@
 				((u32 *)&rclist->rcl_sessionid.data)[3],
 				ref->rc_sequenceid, ref->rc_slotid);
 
-			spin_lock(&tbl->slot_tbl_lock);
-			status = (test_bit(ref->rc_slotid, tbl->used_slots) &&
-				  tbl->slots[ref->rc_slotid].seq_nr ==
-					ref->rc_sequenceid);
-			spin_unlock(&tbl->slot_tbl_lock);
+			status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
+					ref->rc_sequenceid, HZ >> 1) < 0;
 			if (status)
 				goto out;
 		}
@@ -487,7 +484,6 @@
 		goto out;
 
 	tbl = &clp->cl_session->bc_slot_table;
-	slot = tbl->slots + args->csa_slotid;
 
 	/* Set up res before grabbing the spinlock */
 	memcpy(&res->csr_sessionid, &args->csa_sessionid,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 003ebce..1e10678 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -426,7 +426,7 @@
  * Initialise the timeout values for a connection
  */
 void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
-				    unsigned int timeo, unsigned int retrans)
+				    int timeo, int retrans)
 {
 	to->to_initval = timeo * HZ / 10;
 	to->to_retries = retrans;
@@ -434,9 +434,9 @@
 	switch (proto) {
 	case XPRT_TRANSPORT_TCP:
 	case XPRT_TRANSPORT_RDMA:
-		if (to->to_retries == 0)
+		if (retrans == NFS_UNSPEC_RETRANS)
 			to->to_retries = NFS_DEF_TCP_RETRANS;
-		if (to->to_initval == 0)
+		if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0)
 			to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10;
 		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
 			to->to_initval = NFS_MAX_TCP_TIMEOUT;
@@ -449,9 +449,9 @@
 		to->to_exponential = 0;
 		break;
 	case XPRT_TRANSPORT_UDP:
-		if (to->to_retries == 0)
+		if (retrans == NFS_UNSPEC_RETRANS)
 			to->to_retries = NFS_DEF_UDP_RETRANS;
-		if (!to->to_initval)
+		if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0)
 			to->to_initval = NFS_DEF_UDP_TIMEO * HZ / 10;
 		if (to->to_initval > NFS_MAX_UDP_TIMEOUT)
 			to->to_initval = NFS_MAX_UDP_TIMEOUT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7d62097..ca699dd 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -657,7 +657,10 @@
 	if (result <= 0)
 		goto out;
 
-	written = generic_write_sync(iocb, result);
+	result = generic_write_sync(iocb, result);
+	if (result < 0)
+		goto out;
+	written = result;
 	iocb->ki_pos += written;
 
 	/* Return error values */
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index e6206ea..51b5136 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -37,6 +37,7 @@
 	if (ffl) {
 		INIT_LIST_HEAD(&ffl->error_list);
 		INIT_LIST_HEAD(&ffl->mirrors);
+		ffl->last_report_time = ktime_get();
 		return &ffl->generic_hdr;
 	} else
 		return NULL;
@@ -640,19 +641,18 @@
 {
 	static const ktime_t notime = {0};
 	s64 report_interval = FF_LAYOUTSTATS_REPORT_INTERVAL;
+	struct nfs4_flexfile_layout *ffl = FF_LAYOUT_FROM_HDR(mirror->layout);
 
 	nfs4_ff_start_busy_timer(&layoutstat->busy_timer, now);
 	if (ktime_equal(mirror->start_time, notime))
 		mirror->start_time = now;
-	if (ktime_equal(mirror->last_report_time, notime))
-		mirror->last_report_time = now;
 	if (mirror->report_interval != 0)
 		report_interval = (s64)mirror->report_interval * 1000LL;
 	else if (layoutstats_timer != 0)
 		report_interval = (s64)layoutstats_timer * 1000LL;
-	if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >=
+	if (ktime_to_ms(ktime_sub(now, ffl->last_report_time)) >=
 			report_interval) {
-		mirror->last_report_time = now;
+		ffl->last_report_time = now;
 		return true;
 	}
 
@@ -806,11 +806,14 @@
 {
 	struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
 	struct nfs4_pnfs_ds *ds;
+	bool fail_return = false;
 	int idx;
 
 	/* mirrors are sorted by efficiency */
 	for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
-		ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
+		if (idx+1 == fls->mirror_array_cnt)
+			fail_return = true;
+		ds = nfs4_ff_layout_prepare_ds(lseg, idx, fail_return);
 		if (ds) {
 			*best_idx = idx;
 			return ds;
@@ -859,6 +862,7 @@
 	struct nfs4_pnfs_ds *ds;
 	int ds_idx;
 
+retry:
 	/* Use full layout for now */
 	if (!pgio->pg_lseg)
 		ff_layout_pg_get_read(pgio, req, false);
@@ -871,10 +875,13 @@
 
 	ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
 	if (!ds) {
-		if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
-			goto out_pnfs;
-		else
+		if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
 			goto out_mds;
+		pnfs_put_lseg(pgio->pg_lseg);
+		pgio->pg_lseg = NULL;
+		/* Sleep for 1 second before retrying */
+		ssleep(1);
+		goto retry;
 	}
 
 	mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
@@ -890,12 +897,6 @@
 	pnfs_put_lseg(pgio->pg_lseg);
 	pgio->pg_lseg = NULL;
 	nfs_pageio_reset_read_mds(pgio);
-	return;
-
-out_pnfs:
-	pnfs_set_lo_fail(pgio->pg_lseg);
-	pnfs_put_lseg(pgio->pg_lseg);
-	pgio->pg_lseg = NULL;
 }
 
 static void
@@ -909,6 +910,7 @@
 	int i;
 	int status;
 
+retry:
 	if (!pgio->pg_lseg) {
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   req->wb_context,
@@ -940,10 +942,13 @@
 	for (i = 0; i < pgio->pg_mirror_count; i++) {
 		ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true);
 		if (!ds) {
-			if (ff_layout_no_fallback_to_mds(pgio->pg_lseg))
-				goto out_pnfs;
-			else
+			if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
 				goto out_mds;
+			pnfs_put_lseg(pgio->pg_lseg);
+			pgio->pg_lseg = NULL;
+			/* Sleep for 1 second before retrying */
+			ssleep(1);
+			goto retry;
 		}
 		pgm = &pgio->pg_mirrors[i];
 		mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
@@ -956,12 +961,6 @@
 	pnfs_put_lseg(pgio->pg_lseg);
 	pgio->pg_lseg = NULL;
 	nfs_pageio_reset_write_mds(pgio);
-	return;
-
-out_pnfs:
-	pnfs_set_lo_fail(pgio->pg_lseg);
-	pnfs_put_lseg(pgio->pg_lseg);
-	pgio->pg_lseg = NULL;
 }
 
 static unsigned int
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h
index 1bcdb15..3ee0c9f 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.h
+++ b/fs/nfs/flexfilelayout/flexfilelayout.h
@@ -84,7 +84,6 @@
 	struct nfs4_ff_layoutstat	read_stat;
 	struct nfs4_ff_layoutstat	write_stat;
 	ktime_t				start_time;
-	ktime_t				last_report_time;
 	u32				report_interval;
 };
 
@@ -101,6 +100,7 @@
 	struct pnfs_ds_commit_info commit_info;
 	struct list_head	mirrors;
 	struct list_head	error_list; /* nfs4_ff_layout_ds_err */
+	ktime_t			last_report_time; /* Layoutstat report times */
 };
 
 static inline struct nfs4_flexfile_layout *
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 0aa36be..f7a3f6b 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -17,8 +17,8 @@
 
 #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
 
-static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
-static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
+static unsigned int dataserver_timeo = NFS_DEF_TCP_RETRANS;
+static unsigned int dataserver_retrans;
 
 void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
 {
@@ -379,7 +379,7 @@
 
 	devid = &mirror->mirror_ds->id_node;
 	if (ff_layout_test_devid_unavailable(devid))
-		goto out;
+		goto out_fail;
 
 	ds = mirror->mirror_ds->ds;
 	/* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
@@ -405,15 +405,16 @@
 			mirror->mirror_ds->ds_versions[0].rsize = max_payload;
 		if (mirror->mirror_ds->ds_versions[0].wsize > max_payload)
 			mirror->mirror_ds->ds_versions[0].wsize = max_payload;
-	} else {
-		ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
-					 mirror, lseg->pls_range.offset,
-					 lseg->pls_range.length, NFS4ERR_NXIO,
-					 OP_ILLEGAL, GFP_NOIO);
-		if (fail_return || !ff_layout_has_available_ds(lseg))
-			pnfs_error_mark_layout_for_return(ino, lseg);
-		ds = NULL;
+		goto out;
 	}
+	ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
+				 mirror, lseg->pls_range.offset,
+				 lseg->pls_range.length, NFS4ERR_NXIO,
+				 OP_ILLEGAL, GFP_NOIO);
+out_fail:
+	if (fail_return || !ff_layout_has_available_ds(lseg))
+		pnfs_error_mark_layout_for_return(ino, lseg);
+	ds = NULL;
 out:
 	return ds;
 }
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7ce5e02..74935a1 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -58,6 +58,9 @@
  */
 #define NFS_UNSPEC_PORT		(-1)
 
+#define NFS_UNSPEC_RETRANS	(UINT_MAX)
+#define NFS_UNSPEC_TIMEO	(UINT_MAX)
+
 /*
  * Maximum number of pages that readdir can use for creating
  * a vmapped array of pages.
@@ -156,7 +159,7 @@
 int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
 void nfs_server_insert_lists(struct nfs_server *);
 void nfs_server_remove_lists(struct nfs_server *);
-void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
+void nfs_init_timeout_values(struct rpc_timeout *to, int proto, int timeo, int retrans);
 int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
 		rpc_authflavor_t);
 struct nfs_server *nfs_alloc_server(void);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 33da841..64b43b4 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -318,10 +318,22 @@
 nfs42_layoutstat_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs42_layoutstat_data *data = calldata;
-	struct nfs_server *server = NFS_SERVER(data->args.inode);
+	struct inode *inode = data->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
+	struct pnfs_layout_hdr *lo;
 
+	spin_lock(&inode->i_lock);
+	lo = NFS_I(inode)->layout;
+	if (!pnfs_layout_is_valid(lo)) {
+		spin_unlock(&inode->i_lock);
+		rpc_exit(task, 0);
+		return;
+	}
+	nfs4_stateid_copy(&data->args.stateid, &lo->plh_stateid);
+	spin_unlock(&inode->i_lock);
 	nfs41_setup_sequence(nfs4_get_session(server), &data->args.seq_args,
 			     &data->res.seq_res, task);
+
 }
 
 static void
@@ -338,12 +350,14 @@
 	case 0:
 		break;
 	case -NFS4ERR_EXPIRED:
+	case -NFS4ERR_ADMIN_REVOKED:
+	case -NFS4ERR_DELEG_REVOKED:
 	case -NFS4ERR_STALE_STATEID:
-	case -NFS4ERR_OLD_STATEID:
 	case -NFS4ERR_BAD_STATEID:
 		spin_lock(&inode->i_lock);
 		lo = NFS_I(inode)->layout;
-		if (lo && nfs4_stateid_match(&data->args.stateid,
+		if (pnfs_layout_is_valid(lo) &&
+		    nfs4_stateid_match(&data->args.stateid,
 					     &lo->plh_stateid)) {
 			LIST_HEAD(head);
 
@@ -357,11 +371,23 @@
 		} else
 			spin_unlock(&inode->i_lock);
 		break;
+	case -NFS4ERR_OLD_STATEID:
+		spin_lock(&inode->i_lock);
+		lo = NFS_I(inode)->layout;
+		if (pnfs_layout_is_valid(lo) &&
+		    nfs4_stateid_match_other(&data->args.stateid,
+					&lo->plh_stateid)) {
+			/* Do we need to delay before resending? */
+			if (!nfs4_stateid_is_newer(&lo->plh_stateid,
+						&data->args.stateid))
+				rpc_delay(task, HZ);
+			rpc_restart_call_prepare(task);
+		}
+		spin_unlock(&inode->i_lock);
+		break;
 	case -ENOTSUPP:
 	case -EOPNOTSUPP:
 		NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
-	default:
-		break;
 	}
 
 	dprintk("%s server returns %d\n", __func__, task->tk_status);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 324bfdc..9bf64ea 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -396,6 +396,10 @@
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
 extern void nfs4_kill_renewd(struct nfs_client *);
 extern void nfs4_renew_state(struct work_struct *);
+extern void nfs4_set_lease_period(struct nfs_client *clp,
+		unsigned long lease,
+		unsigned long lastrenewed);
+
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8d7d08d..cd3b7cf 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -817,6 +817,11 @@
 		goto error;
 	}
 
+	if (server->nfs_client == clp) {
+		error = -ELOOP;
+		goto error;
+	}
+
 	/*
 	 * Query for the lease time on clientid setup or renewal
 	 *
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a036e93..a9dec32 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -634,15 +634,11 @@
 }
 EXPORT_SYMBOL_GPL(nfs40_setup_sequence);
 
-static int nfs40_sequence_done(struct rpc_task *task,
-			       struct nfs4_sequence_res *res)
+static void nfs40_sequence_free_slot(struct nfs4_sequence_res *res)
 {
 	struct nfs4_slot *slot = res->sr_slot;
 	struct nfs4_slot_table *tbl;
 
-	if (slot == NULL)
-		goto out;
-
 	tbl = slot->table;
 	spin_lock(&tbl->slot_tbl_lock);
 	if (!nfs41_wake_and_assign_slot(tbl, slot))
@@ -650,7 +646,13 @@
 	spin_unlock(&tbl->slot_tbl_lock);
 
 	res->sr_slot = NULL;
-out:
+}
+
+static int nfs40_sequence_done(struct rpc_task *task,
+			       struct nfs4_sequence_res *res)
+{
+	if (res->sr_slot != NULL)
+		nfs40_sequence_free_slot(res);
 	return 1;
 }
 
@@ -666,6 +668,11 @@
 	tbl = slot->table;
 	session = tbl->session;
 
+	/* Bump the slot sequence number */
+	if (slot->seq_done)
+		slot->seq_nr++;
+	slot->seq_done = 0;
+
 	spin_lock(&tbl->slot_tbl_lock);
 	/* Be nice to the server: try to ensure that the last transmitted
 	 * value for highest_user_slotid <= target_highest_slotid
@@ -686,9 +693,12 @@
 	res->sr_slot = NULL;
 	if (send_new_highest_used_slotid)
 		nfs41_notify_server(session->clp);
+	if (waitqueue_active(&tbl->slot_waitq))
+		wake_up_all(&tbl->slot_waitq);
 }
 
-int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+static int nfs41_sequence_process(struct rpc_task *task,
+		struct nfs4_sequence_res *res)
 {
 	struct nfs4_session *session;
 	struct nfs4_slot *slot = res->sr_slot;
@@ -714,7 +724,7 @@
 	switch (res->sr_status) {
 	case 0:
 		/* Update the slot's sequence and clientid lease timer */
-		++slot->seq_nr;
+		slot->seq_done = 1;
 		clp = session->clp;
 		do_renew_lease(clp, res->sr_timestamp);
 		/* Check sequence flags */
@@ -769,16 +779,16 @@
 		goto retry_nowait;
 	default:
 		/* Just update the slot sequence no. */
-		++slot->seq_nr;
+		slot->seq_done = 1;
 	}
 out:
 	/* The session may be reset by one of the error handlers. */
 	dprintk("%s: Error %d free the slot \n", __func__, res->sr_status);
-	nfs41_sequence_free_slot(res);
 out_noaction:
 	return ret;
 retry_nowait:
 	if (rpc_restart_call_prepare(task)) {
+		nfs41_sequence_free_slot(res);
 		task->tk_status = 0;
 		ret = 0;
 	}
@@ -789,8 +799,37 @@
 	rpc_delay(task, NFS4_POLL_RETRY_MAX);
 	return 0;
 }
+
+int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+	if (!nfs41_sequence_process(task, res))
+		return 0;
+	if (res->sr_slot != NULL)
+		nfs41_sequence_free_slot(res);
+	return 1;
+
+}
 EXPORT_SYMBOL_GPL(nfs41_sequence_done);
 
+static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+	if (res->sr_slot == NULL)
+		return 1;
+	if (res->sr_slot->table->session != NULL)
+		return nfs41_sequence_process(task, res);
+	return nfs40_sequence_done(task, res);
+}
+
+static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+	if (res->sr_slot != NULL) {
+		if (res->sr_slot->table->session != NULL)
+			nfs41_sequence_free_slot(res);
+		else
+			nfs40_sequence_free_slot(res);
+	}
+}
+
 int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
 	if (res->sr_slot == NULL)
@@ -920,6 +959,17 @@
 				    args, res, task);
 }
 
+static int nfs4_sequence_process(struct rpc_task *task, struct nfs4_sequence_res *res)
+{
+	return nfs40_sequence_done(task, res);
+}
+
+static void nfs4_sequence_free_slot(struct nfs4_sequence_res *res)
+{
+	if (res->sr_slot != NULL)
+		nfs40_sequence_free_slot(res);
+}
+
 int nfs4_sequence_done(struct rpc_task *task,
 		       struct nfs4_sequence_res *res)
 {
@@ -1197,6 +1247,7 @@
 	struct super_block *sb = p->dentry->d_sb;
 
 	nfs_free_seqid(p->o_arg.seqid);
+	nfs4_sequence_free_slot(&p->o_res.seq_res);
 	if (p->state != NULL)
 		nfs4_put_open_state(p->state);
 	nfs4_put_state_owner(p->owner);
@@ -1656,9 +1707,14 @@
 static struct nfs4_state *
 nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 {
+	struct nfs4_state *ret;
+
 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
-		return _nfs4_opendata_reclaim_to_nfs4_state(data);
-	return _nfs4_opendata_to_nfs4_state(data);
+		ret =_nfs4_opendata_reclaim_to_nfs4_state(data);
+	else
+		ret = _nfs4_opendata_to_nfs4_state(data);
+	nfs4_sequence_free_slot(&data->o_res.seq_res);
+	return ret;
 }
 
 static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -2056,7 +2112,7 @@
 
 	data->rpc_status = task->tk_status;
 
-	if (!nfs4_sequence_done(task, &data->o_res.seq_res))
+	if (!nfs4_sequence_process(task, &data->o_res.seq_res))
 		return;
 
 	if (task->tk_status == 0) {
@@ -4237,12 +4293,9 @@
 		err = _nfs4_do_fsinfo(server, fhandle, fsinfo);
 		trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err);
 		if (err == 0) {
-			struct nfs_client *clp = server->nfs_client;
-
-			spin_lock(&clp->cl_lock);
-			clp->cl_lease_time = fsinfo->lease_time * HZ;
-			clp->cl_last_renewal = now;
-			spin_unlock(&clp->cl_lock);
+			nfs4_set_lease_period(server->nfs_client,
+					fsinfo->lease_time * HZ,
+					now);
 			break;
 		}
 		err = nfs4_handle_exception(server, err, &exception);
@@ -7517,12 +7570,20 @@
 	status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
 	trace_nfs4_create_session(clp, status);
 
+	switch (status) {
+	case -NFS4ERR_STALE_CLIENTID:
+	case -NFS4ERR_DELAY:
+	case -ETIMEDOUT:
+	case -EACCES:
+	case -EAGAIN:
+		goto out;
+	};
+
+	clp->cl_seqid++;
 	if (!status) {
 		/* Verify the session's negotiated channel_attrs values */
 		status = nfs4_verify_channel_attrs(&args, &res);
 		/* Increment the clientid slot sequence id */
-		if (clp->cl_seqid == res.seqid)
-			clp->cl_seqid++;
 		if (status)
 			goto out;
 		nfs4_update_session(session, &res);
@@ -7867,7 +7928,7 @@
 	struct nfs4_layoutget *lgp = calldata;
 
 	dprintk("--> %s\n", __func__);
-	nfs41_sequence_done(task, &lgp->res.seq_res);
+	nfs41_sequence_process(task, &lgp->res.seq_res);
 	dprintk("<-- %s\n", __func__);
 }
 
@@ -8083,6 +8144,7 @@
 	/* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
 	if (status == 0 && lgp->res.layoutp->len)
 		lseg = pnfs_layout_process(lgp);
+	nfs4_sequence_free_slot(&lgp->res.seq_res);
 	rpc_put_task(task);
 	dprintk("<-- %s status=%d\n", __func__, status);
 	if (status)
@@ -8109,7 +8171,7 @@
 
 	dprintk("--> %s\n", __func__);
 
-	if (!nfs41_sequence_done(task, &lrp->res.seq_res))
+	if (!nfs41_sequence_process(task, &lrp->res.seq_res))
 		return;
 
 	server = NFS_SERVER(lrp->args.inode);
@@ -8121,6 +8183,7 @@
 	case -NFS4ERR_DELAY:
 		if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
 			break;
+		nfs4_sequence_free_slot(&lrp->res.seq_res);
 		rpc_restart_call_prepare(task);
 		return;
 	}
@@ -8135,12 +8198,16 @@
 
 	dprintk("--> %s\n", __func__);
 	spin_lock(&lo->plh_inode->i_lock);
-	pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range,
-			be32_to_cpu(lrp->args.stateid.seqid));
-	if (lrp->res.lrs_present && pnfs_layout_is_valid(lo))
+	if (lrp->res.lrs_present) {
+		pnfs_mark_matching_lsegs_invalid(lo, &freeme,
+				&lrp->args.range,
+				be32_to_cpu(lrp->args.stateid.seqid));
 		pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+	} else
+		pnfs_mark_layout_stateid_invalid(lo, &freeme);
 	pnfs_clear_layoutreturn_waitbit(lo);
 	spin_unlock(&lo->plh_inode->i_lock);
+	nfs4_sequence_free_slot(&lrp->res.seq_res);
 	pnfs_free_lseg_list(&freeme);
 	pnfs_put_layout_hdr(lrp->args.layout);
 	nfs_iput_and_deactive(lrp->inode);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index e1ba58c..82e7719 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -136,6 +136,26 @@
 	cancel_delayed_work_sync(&clp->cl_renewd);
 }
 
+/**
+ * nfs4_set_lease_period - Sets the lease period on a nfs_client
+ *
+ * @clp: pointer to nfs_client
+ * @lease: new value for lease period
+ * @lastrenewed: time at which lease was last renewed
+ */
+void nfs4_set_lease_period(struct nfs_client *clp,
+		unsigned long lease,
+		unsigned long lastrenewed)
+{
+	spin_lock(&clp->cl_lock);
+	clp->cl_lease_time = lease;
+	clp->cl_last_renewal = lastrenewed;
+	spin_unlock(&clp->cl_lock);
+
+	/* Cap maximum reconnect timeout at 1/2 lease period */
+	rpc_cap_max_reconnect_timeout(clp->cl_rpcclient, lease >> 1);
+}
+
 /*
  * Local variables:
  *   c-basic-offset: 8
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index 332d06e..b629730 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -28,6 +28,7 @@
 	tbl->highest_used_slotid = NFS4_NO_SLOT;
 	spin_lock_init(&tbl->slot_tbl_lock);
 	rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, queue);
+	init_waitqueue_head(&tbl->slot_waitq);
 	init_completion(&tbl->complete);
 }
 
@@ -172,6 +173,58 @@
 	return ERR_PTR(-E2BIG);
 }
 
+static int nfs4_slot_get_seqid(struct nfs4_slot_table  *tbl, u32 slotid,
+		u32 *seq_nr)
+	__must_hold(&tbl->slot_tbl_lock)
+{
+	struct nfs4_slot *slot;
+
+	slot = nfs4_lookup_slot(tbl, slotid);
+	if (IS_ERR(slot))
+		return PTR_ERR(slot);
+	*seq_nr = slot->seq_nr;
+	return 0;
+}
+
+/*
+ * nfs4_slot_seqid_in_use - test if a slot sequence id is still in use
+ *
+ * Given a slot table, slot id and sequence number, determine if the
+ * RPC call in question is still in flight. This function is mainly
+ * intended for use by the callback channel.
+ */
+static bool nfs4_slot_seqid_in_use(struct nfs4_slot_table *tbl,
+		u32 slotid, u32 seq_nr)
+{
+	u32 cur_seq;
+	bool ret = false;
+
+	spin_lock(&tbl->slot_tbl_lock);
+	if (nfs4_slot_get_seqid(tbl, slotid, &cur_seq) == 0 &&
+	    cur_seq == seq_nr && test_bit(slotid, tbl->used_slots))
+		ret = true;
+	spin_unlock(&tbl->slot_tbl_lock);
+	return ret;
+}
+
+/*
+ * nfs4_slot_wait_on_seqid - wait until a slot sequence id is complete
+ *
+ * Given a slot table, slot id and sequence number, wait until the
+ * corresponding RPC call completes. This function is mainly
+ * intended for use by the callback channel.
+ */
+int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl,
+		u32 slotid, u32 seq_nr,
+		unsigned long timeout)
+{
+	if (wait_event_timeout(tbl->slot_waitq,
+			!nfs4_slot_seqid_in_use(tbl, slotid, seq_nr),
+			timeout) == 0)
+		return -ETIMEDOUT;
+	return 0;
+}
+
 /*
  * nfs4_alloc_slot - efficiently look for a free slot
  *
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index 5b51298d..f703b75 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -21,7 +21,8 @@
 	unsigned long		generation;
 	u32			slot_nr;
 	u32		 	seq_nr;
-	unsigned int		interrupted : 1;
+	unsigned int		interrupted : 1,
+				seq_done : 1;
 };
 
 /* Sessions */
@@ -36,6 +37,7 @@
 	unsigned long   used_slots[SLOT_TABLE_SZ]; /* used/unused bitmap */
 	spinlock_t	slot_tbl_lock;
 	struct rpc_wait_queue	slot_tbl_waitq;	/* allocators may wait here */
+	wait_queue_head_t	slot_waitq;	/* Completion wait on slot */
 	u32		max_slots;		/* # slots in table */
 	u32		max_slotid;		/* Max allowed slotid value */
 	u32		highest_used_slotid;	/* sent to server on each SEQ.
@@ -78,6 +80,9 @@
 extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
 extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
 extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid);
+extern int nfs4_slot_wait_on_seqid(struct nfs4_slot_table *tbl,
+		u32 slotid, u32 seq_nr,
+		unsigned long timeout);
 extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
 extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
 extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 834b875..cada00a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -277,20 +277,17 @@
 {
 	int status;
 	struct nfs_fsinfo fsinfo;
+	unsigned long now;
 
 	if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
 		nfs4_schedule_state_renewal(clp);
 		return 0;
 	}
 
+	now = jiffies;
 	status = nfs4_proc_get_lease_time(clp, &fsinfo);
 	if (status == 0) {
-		/* Update lease time and schedule renewal */
-		spin_lock(&clp->cl_lock);
-		clp->cl_lease_time = fsinfo.lease_time * HZ;
-		clp->cl_last_renewal = jiffies;
-		spin_unlock(&clp->cl_lock);
-
+		nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now);
 		nfs4_schedule_state_renewal(clp);
 	}
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 70806ca..2c93a85 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -365,7 +365,8 @@
 	/* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
 	atomic_dec(&lo->plh_refcount);
 	if (list_empty(&lo->plh_segs)) {
-		set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+		if (atomic_read(&lo->plh_outstanding) == 0)
+			set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
 		clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
 	}
 	rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
@@ -768,17 +769,32 @@
 	pnfs_destroy_layouts_byclid(clp, false);
 }
 
+static void
+pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
+{
+	lo->plh_return_iomode = 0;
+	lo->plh_return_seq = 0;
+	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+}
+
 /* update lo->plh_stateid with new if is more recent */
 void
 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
 			bool update_barrier)
 {
 	u32 oldseq, newseq, new_barrier = 0;
-	bool invalid = !pnfs_layout_is_valid(lo);
 
 	oldseq = be32_to_cpu(lo->plh_stateid.seqid);
 	newseq = be32_to_cpu(new->seqid);
-	if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) {
+
+	if (!pnfs_layout_is_valid(lo)) {
+		nfs4_stateid_copy(&lo->plh_stateid, new);
+		lo->plh_barrier = newseq;
+		pnfs_clear_layoutreturn_info(lo);
+		clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+		return;
+	}
+	if (pnfs_seqid_is_newer(newseq, oldseq)) {
 		nfs4_stateid_copy(&lo->plh_stateid, new);
 		/*
 		 * Because of wraparound, we want to keep the barrier
@@ -790,7 +806,7 @@
 		new_barrier = be32_to_cpu(new->seqid);
 	else if (new_barrier == 0)
 		return;
-	if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
+	if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
 		lo->plh_barrier = new_barrier;
 }
 
@@ -886,19 +902,14 @@
 	rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
-static void
-pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
-{
-	lo->plh_return_iomode = 0;
-	lo->plh_return_seq = 0;
-	clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
-}
-
 static bool
 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
 		nfs4_stateid *stateid,
 		enum pnfs_iomode *iomode)
 {
+	/* Serialise LAYOUTGET/LAYOUTRETURN */
+	if (atomic_read(&lo->plh_outstanding) != 0)
+		return false;
 	if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
 		return false;
 	pnfs_get_layout_hdr(lo);
@@ -1555,6 +1566,7 @@
 	}
 
 lookup_again:
+	nfs4_client_recover_expired_lease(clp);
 	first = false;
 	spin_lock(&ino->i_lock);
 	lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
@@ -1797,16 +1809,11 @@
 		 */
 		pnfs_mark_layout_stateid_invalid(lo, &free_me);
 
-		nfs4_stateid_copy(&lo->plh_stateid, &res->stateid);
-		lo->plh_barrier = be32_to_cpu(res->stateid.seqid);
+		pnfs_set_layout_stateid(lo, &res->stateid, true);
 	}
 
 	pnfs_get_lseg(lseg);
 	pnfs_layout_insert_lseg(lo, lseg, &free_me);
-	if (!pnfs_layout_is_valid(lo)) {
-		pnfs_clear_layoutreturn_info(lo);
-		clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-	}
 
 
 	if (res->return_on_close)
@@ -2510,7 +2517,6 @@
 
 	data->args.fh = NFS_FH(inode);
 	data->args.inode = inode;
-	nfs4_stateid_copy(&data->args.stateid, &hdr->plh_stateid);
 	status = ld->prepare_layoutstats(&data->args);
 	if (status)
 		goto out_free;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 18d446e..d396013 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -923,6 +923,8 @@
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (data) {
+		data->timeo		= NFS_UNSPEC_TIMEO;
+		data->retrans		= NFS_UNSPEC_RETRANS;
 		data->acregmin		= NFS_DEF_ACREGMIN;
 		data->acregmax		= NFS_DEF_ACREGMAX;
 		data->acdirmin		= NFS_DEF_ACDIRMIN;
@@ -1189,6 +1191,19 @@
 	return rc;
 }
 
+static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option,
+		unsigned long l_bound, unsigned long u_bound)
+{
+	int ret;
+
+	ret = nfs_get_option_ul(args, option);
+	if (ret != 0)
+		return ret;
+	if (*option < l_bound || *option > u_bound)
+		return -ERANGE;
+	return 0;
+}
+
 /*
  * Error-check and convert a string of mount options from user space into
  * a data structure.  The whole mount string is processed; bad options are
@@ -1352,12 +1367,12 @@
 			mnt->bsize = option;
 			break;
 		case Opt_timeo:
-			if (nfs_get_option_ul(args, &option) || option == 0)
+			if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX))
 				goto out_invalid_value;
 			mnt->timeo = option;
 			break;
 		case Opt_retrans:
-			if (nfs_get_option_ul(args, &option) || option == 0)
+			if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX))
 				goto out_invalid_value;
 			mnt->retrans = option;
 			break;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8410ca2..a204d7e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4903,6 +4903,32 @@
 	return nfs_ok;
 }
 
+static __be32
+nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
+{
+	struct nfs4_ol_stateid *stp = openlockstateid(s);
+	__be32 ret;
+
+	mutex_lock(&stp->st_mutex);
+
+	ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+	if (ret)
+		goto out;
+
+	ret = nfserr_locks_held;
+	if (check_for_locks(stp->st_stid.sc_file,
+			    lockowner(stp->st_stateowner)))
+		goto out;
+
+	release_lock_stateid(stp);
+	ret = nfs_ok;
+
+out:
+	mutex_unlock(&stp->st_mutex);
+	nfs4_put_stid(s);
+	return ret;
+}
+
 __be32
 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		   struct nfsd4_free_stateid *free_stateid)
@@ -4910,7 +4936,6 @@
 	stateid_t *stateid = &free_stateid->fr_stateid;
 	struct nfs4_stid *s;
 	struct nfs4_delegation *dp;
-	struct nfs4_ol_stateid *stp;
 	struct nfs4_client *cl = cstate->session->se_client;
 	__be32 ret = nfserr_bad_stateid;
 
@@ -4929,18 +4954,9 @@
 		ret = nfserr_locks_held;
 		break;
 	case NFS4_LOCK_STID:
-		ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
-		if (ret)
-			break;
-		stp = openlockstateid(s);
-		ret = nfserr_locks_held;
-		if (check_for_locks(stp->st_stid.sc_file,
-				    lockowner(stp->st_stateowner)))
-			break;
-		WARN_ON(!unhash_lock_stateid(stp));
+		atomic_inc(&s->sc_count);
 		spin_unlock(&cl->cl_lock);
-		nfs4_put_stid(s);
-		ret = nfs_ok;
+		ret = nfsd4_free_lock_stateid(stateid, s);
 		goto out;
 	case NFS4_REVOKED_DELEG_STID:
 		dp = delegstateid(s);
@@ -5507,7 +5523,7 @@
 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
 			    struct nfs4_ol_stateid *ost,
 			    struct nfsd4_lock *lock,
-			    struct nfs4_ol_stateid **lst, bool *new)
+			    struct nfs4_ol_stateid **plst, bool *new)
 {
 	__be32 status;
 	struct nfs4_file *fi = ost->st_stid.sc_file;
@@ -5515,7 +5531,9 @@
 	struct nfs4_client *cl = oo->oo_owner.so_client;
 	struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
 	struct nfs4_lockowner *lo;
+	struct nfs4_ol_stateid *lst;
 	unsigned int strhashval;
+	bool hashed;
 
 	lo = find_lockowner_str(cl, &lock->lk_new_owner);
 	if (!lo) {
@@ -5531,12 +5549,27 @@
 			goto out;
 	}
 
-	*lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
-	if (*lst == NULL) {
+retry:
+	lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
+	if (lst == NULL) {
 		status = nfserr_jukebox;
 		goto out;
 	}
+
+	mutex_lock(&lst->st_mutex);
+
+	/* See if it's still hashed to avoid race with FREE_STATEID */
+	spin_lock(&cl->cl_lock);
+	hashed = !list_empty(&lst->st_perfile);
+	spin_unlock(&cl->cl_lock);
+
+	if (!hashed) {
+		mutex_unlock(&lst->st_mutex);
+		nfs4_put_stid(&lst->st_stid);
+		goto retry;
+	}
 	status = nfs_ok;
+	*plst = lst;
 out:
 	nfs4_put_stateowner(&lo->lo_owner);
 	return status;
@@ -5603,8 +5636,6 @@
 			goto out;
 		status = lookup_or_create_lock_state(cstate, open_stp, lock,
 							&lock_stp, &new);
-		if (status == nfs_ok)
-			mutex_lock(&lock_stp->st_mutex);
 	} else {
 		status = nfs4_preprocess_seqid_op(cstate,
 				       lock->lk_old_lock_seqid,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ba944123..ff476e6 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1252,10 +1252,13 @@
 	if (IS_ERR(dchild))
 		return nfserrno(host_err);
 	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
-	if (err) {
-		dput(dchild);
+	/*
+	 * We unconditionally drop our ref to dchild as fh_compose will have
+	 * already grabbed its own ref for it.
+	 */
+	dput(dchild);
+	if (err)
 		return err;
-	}
 	return nfsd_create_locked(rqstp, fhp, fname, flen, iap, type,
 					rdev, resfhp);
 }
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d2f97ec..e0e5f7c 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -67,18 +67,7 @@
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	wait_event(group->fanotify_data.access_waitq, event->response ||
-				atomic_read(&group->fanotify_data.bypass_perm));
-
-	if (!event->response) {	/* bypass_perm set */
-		/*
-		 * Event was canceled because group is being destroyed. Remove
-		 * it from group's event list because we are responsible for
-		 * freeing the permission event.
-		 */
-		fsnotify_remove_event(group, &event->fae.fse);
-		return 0;
-	}
+	wait_event(group->fanotify_data.access_waitq, event->response);
 
 	/* userspace responded, convert to something usable */
 	switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8e8e6bc..a643138 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -358,16 +358,20 @@
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	struct fanotify_perm_event_info *event, *next;
+	struct fsnotify_event *fsn_event;
 
 	/*
-	 * There may be still new events arriving in the notification queue
-	 * but since userspace cannot use fanotify fd anymore, no event can
-	 * enter or leave access_list by now.
+	 * Stop new events from arriving in the notification queue. since
+	 * userspace cannot use fanotify fd anymore, no event can enter or
+	 * leave access_list by now either.
+	 */
+	fsnotify_group_stop_queueing(group);
+
+	/*
+	 * Process all permission events on access_list and notification queue
+	 * and simulate reply from userspace.
 	 */
 	spin_lock(&group->fanotify_data.access_lock);
-
-	atomic_inc(&group->fanotify_data.bypass_perm);
-
 	list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
 				 fae.fse.list) {
 		pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -379,12 +383,21 @@
 	spin_unlock(&group->fanotify_data.access_lock);
 
 	/*
-	 * Since bypass_perm is set, newly queued events will not wait for
-	 * access response. Wake up the already sleeping ones now.
-	 * synchronize_srcu() in fsnotify_destroy_group() will wait for all
-	 * processes sleeping in fanotify_handle_event() waiting for access
-	 * response and thus also for all permission events to be freed.
+	 * Destroy all non-permission events. For permission events just
+	 * dequeue them and set the response. They will be freed once the
+	 * response is consumed and fanotify_get_response() returns.
 	 */
+	mutex_lock(&group->notification_mutex);
+	while (!fsnotify_notify_queue_is_empty(group)) {
+		fsn_event = fsnotify_remove_first_event(group);
+		if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
+			fsnotify_destroy_event(group, fsn_event);
+		else
+			FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+	}
+	mutex_unlock(&group->notification_mutex);
+
+	/* Response for all permission events it set, wakeup waiters */
 	wake_up(&group->fanotify_data.access_waitq);
 #endif
 
@@ -755,7 +768,6 @@
 	spin_lock_init(&group->fanotify_data.access_lock);
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
-	atomic_set(&group->fanotify_data.bypass_perm, 0);
 #endif
 	switch (flags & FAN_ALL_CLASS_BITS) {
 	case FAN_CLASS_NOTIF:
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 3e2dd85..b47f7cf 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -40,6 +40,17 @@
 }
 
 /*
+ * Stop queueing new events for this group. Once this function returns
+ * fsnotify_add_event() will not add any new events to the group's queue.
+ */
+void fsnotify_group_stop_queueing(struct fsnotify_group *group)
+{
+	mutex_lock(&group->notification_mutex);
+	group->shutdown = true;
+	mutex_unlock(&group->notification_mutex);
+}
+
+/*
  * Trying to get rid of a group. Remove all marks, flush all events and release
  * the group reference.
  * Note that another thread calling fsnotify_clear_marks_by_group() may still
@@ -47,6 +58,14 @@
  */
 void fsnotify_destroy_group(struct fsnotify_group *group)
 {
+	/*
+	 * Stop queueing new events. The code below is careful enough to not
+	 * require this but fanotify needs to stop queuing events even before
+	 * fsnotify_destroy_group() is called and this makes the other callers
+	 * of fsnotify_destroy_group() to see the same behavior.
+	 */
+	fsnotify_group_stop_queueing(group);
+
 	/* clear all inode marks for this group, attach them to destroy_list */
 	fsnotify_detach_group_marks(group);
 
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index a95d8e0..e455e83 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -82,7 +82,8 @@
  * Add an event to the group notification queue.  The group can later pull this
  * event off the queue to deal with.  The function returns 0 if the event was
  * added to the queue, 1 if the event was merged with some other queued event,
- * 2 if the queue of events has overflown.
+ * 2 if the event was not queued - either the queue of events has overflown
+ * or the group is shutting down.
  */
 int fsnotify_add_event(struct fsnotify_group *group,
 		       struct fsnotify_event *event,
@@ -96,6 +97,11 @@
 
 	mutex_lock(&group->notification_mutex);
 
+	if (group->shutdown) {
+		mutex_unlock(&group->notification_mutex);
+		return 2;
+	}
+
 	if (group->q_len >= group->max_events) {
 		ret = 2;
 		/* Queue overflow event only if it isn't already queued */
@@ -126,21 +132,6 @@
 }
 
 /*
- * Remove @event from group's notification queue. It is the responsibility of
- * the caller to destroy the event.
- */
-void fsnotify_remove_event(struct fsnotify_group *group,
-			   struct fsnotify_event *event)
-{
-	mutex_lock(&group->notification_mutex);
-	if (!list_empty(&event->list)) {
-		list_del_init(&event->list);
-		group->q_len--;
-	}
-	mutex_unlock(&group->notification_mutex);
-}
-
-/*
  * Remove and return the first event from the notification list.  It is the
  * responsibility of the caller to destroy the obtained event
  */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7dabbc3..f165f86 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5922,7 +5922,6 @@
 }
 
 static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
-					 handle_t *handle,
 					 struct inode *data_alloc_inode,
 					 struct buffer_head *data_alloc_bh)
 {
@@ -5935,11 +5934,19 @@
 	struct ocfs2_truncate_log *tl;
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct buffer_head *tl_bh = osb->osb_tl_bh;
+	handle_t *handle;
 
 	di = (struct ocfs2_dinode *) tl_bh->b_data;
 	tl = &di->id2.i_dealloc;
 	i = le16_to_cpu(tl->tl_used) - 1;
 	while (i >= 0) {
+		handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
+		if (IS_ERR(handle)) {
+			status = PTR_ERR(handle);
+			mlog_errno(status);
+			goto bail;
+		}
+
 		/* Caller has given us at least enough credits to
 		 * update the truncate log dinode */
 		status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
@@ -5974,12 +5981,7 @@
 			}
 		}
 
-		status = ocfs2_extend_trans(handle,
-				OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
-		if (status < 0) {
-			mlog_errno(status);
-			goto bail;
-		}
+		ocfs2_commit_trans(osb, handle);
 		i--;
 	}
 
@@ -5994,7 +5996,6 @@
 {
 	int status;
 	unsigned int num_to_flush;
-	handle_t *handle;
 	struct inode *tl_inode = osb->osb_tl_inode;
 	struct inode *data_alloc_inode = NULL;
 	struct buffer_head *tl_bh = osb->osb_tl_bh;
@@ -6038,21 +6039,11 @@
 		goto out_mutex;
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
-	if (IS_ERR(handle)) {
-		status = PTR_ERR(handle);
-		mlog_errno(status);
-		goto out_unlock;
-	}
-
-	status = ocfs2_replay_truncate_records(osb, handle, data_alloc_inode,
+	status = ocfs2_replay_truncate_records(osb, data_alloc_inode,
 					       data_alloc_bh);
 	if (status < 0)
 		mlog_errno(status);
 
-	ocfs2_commit_trans(osb, handle);
-
-out_unlock:
 	brelse(data_alloc_bh);
 	ocfs2_inode_unlock(data_alloc_inode, 1);
 
@@ -6413,43 +6404,34 @@
 		goto out_mutex;
 	}
 
-	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
-	if (IS_ERR(handle)) {
-		ret = PTR_ERR(handle);
-		mlog_errno(ret);
-		goto out_unlock;
-	}
-
 	while (head) {
 		if (head->free_bg)
 			bg_blkno = head->free_bg;
 		else
 			bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
 							      head->free_bit);
+		handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			mlog_errno(ret);
+			goto out_unlock;
+		}
+
 		trace_ocfs2_free_cached_blocks(
 		     (unsigned long long)head->free_blk, head->free_bit);
 
 		ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
 					       head->free_bit, bg_blkno, 1);
-		if (ret) {
+		if (ret)
 			mlog_errno(ret);
-			goto out_journal;
-		}
 
-		ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
-		if (ret) {
-			mlog_errno(ret);
-			goto out_journal;
-		}
+		ocfs2_commit_trans(osb, handle);
 
 		tmp = head;
 		head = head->free_next;
 		kfree(tmp);
 	}
 
-out_journal:
-	ocfs2_commit_trans(osb, handle);
-
 out_unlock:
 	ocfs2_inode_unlock(inode, 1);
 	brelse(di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 98d3654..bbb4b3e 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1842,6 +1842,16 @@
 	ocfs2_commit_trans(osb, handle);
 
 out:
+	/*
+	 * The mmapped page won't be unlocked in ocfs2_free_write_ctxt(),
+	 * even in case of error here like ENOSPC and ENOMEM. So, we need
+	 * to unlock the target page manually to prevent deadlocks when
+	 * retrying again on ENOSPC, or when returning non-VM_FAULT_LOCKED
+	 * to VM code.
+	 */
+	if (wc->w_target_locked)
+		unlock_page(mmap_page);
+
 	ocfs2_free_write_ctxt(inode, wc);
 
 	if (data_ac) {
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 94b1836..b95e7df 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -44,9 +44,6 @@
  * version here in tcp_internal.h should not need to be bumped for
  * filesystem locking changes.
  *
- * New in version 12
- *	- Negotiate hb timeout when storage is down.
- *
  * New in version 11
  * 	- Negotiation of filesystem locking in the dlm join.
  *
@@ -78,7 +75,7 @@
  * 	- full 64 bit i_size in the metadata lock lvbs
  * 	- introduction of "rw" lock and pushing meta/data locking down
  */
-#define O2NET_PROTOCOL_VERSION 12ULL
+#define O2NET_PROTOCOL_VERSION 11ULL
 struct o2net_handshake {
 	__be64	protocol_version;
 	__be64	connector_id;
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index cdeafb4..0bb1286 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -268,7 +268,6 @@
 				  struct dlm_lock *lock, int flags, int type)
 {
 	enum dlm_status status;
-	u8 old_owner = res->owner;
 
 	mlog(0, "type=%d, convert_type=%d, busy=%d\n", lock->ml.type,
 	     lock->ml.convert_type, res->state & DLM_LOCK_RES_IN_PROGRESS);
@@ -335,7 +334,6 @@
 
 	spin_lock(&res->spinlock);
 	res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
-	lock->convert_pending = 0;
 	/* if it failed, move it back to granted queue.
 	 * if master returns DLM_NORMAL and then down before sending ast,
 	 * it may have already been moved to granted queue, reset to
@@ -344,12 +342,14 @@
 		if (status != DLM_NOTQUEUED)
 			dlm_error(status);
 		dlm_revert_pending_convert(res, lock);
-	} else if ((res->state & DLM_LOCK_RES_RECOVERING) ||
-			(old_owner != res->owner)) {
-		mlog(0, "res %.*s is in recovering or has been recovered.\n",
-				res->lockname.len, res->lockname.name);
+	} else if (!lock->convert_pending) {
+		mlog(0, "%s: res %.*s, owner died and lock has been moved back "
+				"to granted list, retry convert.\n",
+				dlm->name, res->lockname.len, res->lockname.name);
 		status = DLM_RECOVERING;
 	}
+
+	lock->convert_pending = 0;
 bail:
 	spin_unlock(&res->spinlock);
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 4e7b0dc..0b055bf 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1506,7 +1506,8 @@
 				       u64 start, u64 len)
 {
 	int ret = 0;
-	u64 tmpend, end = start + len;
+	u64 tmpend = 0;
+	u64 end = start + len;
 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 	unsigned int csize = osb->s_clustersize;
 	handle_t *handle;
@@ -1538,18 +1539,31 @@
 	}
 
 	/*
-	 * We want to get the byte offset of the end of the 1st cluster.
+	 * If start is on a cluster boundary and end is somewhere in another
+	 * cluster, we have not COWed the cluster starting at start, unless
+	 * end is also within the same cluster. So, in this case, we skip this
+	 * first call to ocfs2_zero_range_for_truncate() truncate and move on
+	 * to the next one.
 	 */
-	tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
-	if (tmpend > end)
-		tmpend = end;
+	if ((start & (csize - 1)) != 0) {
+		/*
+		 * We want to get the byte offset of the end of the 1st
+		 * cluster.
+		 */
+		tmpend = (u64)osb->s_clustersize +
+			(start & ~(osb->s_clustersize - 1));
+		if (tmpend > end)
+			tmpend = end;
 
-	trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
-						 (unsigned long long)tmpend);
+		trace_ocfs2_zero_partial_clusters_range1(
+			(unsigned long long)start,
+			(unsigned long long)tmpend);
 
-	ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
-	if (ret)
-		mlog_errno(ret);
+		ret = ocfs2_zero_range_for_truncate(inode, handle, start,
+						    tmpend);
+		if (ret)
+			mlog_errno(ret);
+	}
 
 	if (tmpend < end) {
 		/*
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ea47120..6ad3533 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1199,14 +1199,24 @@
 			inode_unlock((*ac)->ac_inode);
 
 			ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
-			if (ret == 1)
+			if (ret == 1) {
+				iput((*ac)->ac_inode);
+				(*ac)->ac_inode = NULL;
 				goto retry;
+			}
 
 			if (ret < 0)
 				mlog_errno(ret);
 
 			inode_lock((*ac)->ac_inode);
-			ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+			ret = ocfs2_inode_lock((*ac)->ac_inode, NULL, 1);
+			if (ret < 0) {
+				mlog_errno(ret);
+				inode_unlock((*ac)->ac_inode);
+				iput((*ac)->ac_inode);
+				(*ac)->ac_inode = NULL;
+				goto bail;
+			}
 		}
 		if (status < 0) {
 			if (status != -ENOSPC)
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 54e5d66..db37a0e 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -80,6 +80,8 @@
 	}
 
 	for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+		if (ovl_is_private_xattr(name))
+			continue;
 retry:
 		size = vfs_getxattr(old, name, value, value_size);
 		if (size == -ERANGE)
@@ -103,6 +105,13 @@
 			goto retry;
 		}
 
+		error = security_inode_copy_up_xattr(name);
+		if (error < 0 && error != -EOPNOTSUPP)
+			break;
+		if (error == 1) {
+			error = 0;
+			continue; /* Discard */
+		}
 		error = vfs_setxattr(new, name, value, size, 0);
 		if (error)
 			break;
@@ -246,6 +255,8 @@
 	struct dentry *upper = NULL;
 	umode_t mode = stat->mode;
 	int err;
+	const struct cred *old_creds = NULL;
+	struct cred *new_creds = NULL;
 
 	newdentry = ovl_lookup_temp(workdir, dentry);
 	err = PTR_ERR(newdentry);
@@ -258,10 +269,23 @@
 	if (IS_ERR(upper))
 		goto out1;
 
+	err = security_inode_copy_up(dentry, &new_creds);
+	if (err < 0)
+		goto out2;
+
+	if (new_creds)
+		old_creds = override_creds(new_creds);
+
 	/* Can't properly set mode on creation because of the umask */
 	stat->mode &= S_IFMT;
 	err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
 	stat->mode = mode;
+
+	if (new_creds) {
+		revert_creds(old_creds);
+		put_cred(new_creds);
+	}
+
 	if (err)
 		goto out2;
 
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 12bcd07..b0ffa1d 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -12,6 +12,8 @@
 #include <linux/xattr.h>
 #include <linux/security.h>
 #include <linux/cred.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include "overlayfs.h"
 
 void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
@@ -186,6 +188,9 @@
 	struct dentry *newdentry;
 	int err;
 
+	if (!hardlink && !IS_POSIXACL(udir))
+		stat->mode &= ~current_umask();
+
 	inode_lock_nested(udir, I_MUTEX_PARENT);
 	newdentry = lookup_one_len(dentry->d_name.name, upperdir,
 				   dentry->d_name.len);
@@ -335,6 +340,32 @@
 	return ret;
 }
 
+static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
+			     const struct posix_acl *acl)
+{
+	void *buffer;
+	size_t size;
+	int err;
+
+	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !acl)
+		return 0;
+
+	size = posix_acl_to_xattr(NULL, acl, NULL, 0);
+	buffer = kmalloc(size, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+
+	size = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+	err = size;
+	if (err < 0)
+		goto out_free;
+
+	err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
+out_free:
+	kfree(buffer);
+	return err;
+}
+
 static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
 				    struct kstat *stat, const char *link,
 				    struct dentry *hardlink)
@@ -346,10 +377,18 @@
 	struct dentry *upper;
 	struct dentry *newdentry;
 	int err;
+	struct posix_acl *acl, *default_acl;
 
 	if (WARN_ON(!workdir))
 		return -EROFS;
 
+	if (!hardlink) {
+		err = posix_acl_create(dentry->d_parent->d_inode,
+				       &stat->mode, &default_acl, &acl);
+		if (err)
+			return err;
+	}
+
 	err = ovl_lock_rename_workdir(workdir, upperdir);
 	if (err)
 		goto out;
@@ -384,6 +423,17 @@
 		if (err)
 			goto out_cleanup;
 	}
+	if (!hardlink) {
+		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_ACCESS,
+					acl);
+		if (err)
+			goto out_cleanup;
+
+		err = ovl_set_upper_acl(newdentry, XATTR_NAME_POSIX_ACL_DEFAULT,
+					default_acl);
+		if (err)
+			goto out_cleanup;
+	}
 
 	if (!hardlink && S_ISDIR(stat->mode)) {
 		err = ovl_set_opaque(newdentry);
@@ -410,6 +460,10 @@
 out_unlock:
 	unlock_rename(workdir, upperdir);
 out:
+	if (!hardlink) {
+		posix_acl_release(acl);
+		posix_acl_release(default_acl);
+	}
 	return err;
 
 out_cleanup:
@@ -435,6 +489,15 @@
 	if (override_cred) {
 		override_cred->fsuid = inode->i_uid;
 		override_cred->fsgid = inode->i_gid;
+		if (!hardlink) {
+			err = security_dentry_create_files_as(dentry,
+					stat->mode, &dentry->d_name, old_cred,
+					override_cred);
+			if (err) {
+				put_cred(override_cred);
+				goto out_revert_creds;
+			}
+		}
 		put_cred(override_creds(override_cred));
 		put_cred(override_cred);
 
@@ -445,6 +508,7 @@
 			err = ovl_create_over_whiteout(dentry, inode, stat,
 							link, hardlink);
 	}
+out_revert_creds:
 	revert_creds(old_cred);
 	if (!err) {
 		struct inode *realinode = d_inode(ovl_dentry_upper(dentry));
@@ -950,9 +1014,9 @@
 	.permission	= ovl_permission,
 	.getattr	= ovl_dir_getattr,
 	.setxattr	= generic_setxattr,
-	.getxattr	= ovl_getxattr,
+	.getxattr	= generic_getxattr,
 	.listxattr	= ovl_listxattr,
-	.removexattr	= ovl_removexattr,
+	.removexattr	= generic_removexattr,
 	.get_acl	= ovl_get_acl,
 	.update_time	= ovl_update_time,
 };
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 1b885c1..c75625c 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
+#include <linux/posix_acl.h>
 #include "overlayfs.h"
 
 static int ovl_copy_up_truncate(struct dentry *dentry)
@@ -191,32 +192,44 @@
 	return err;
 }
 
-static bool ovl_is_private_xattr(const char *name)
+bool ovl_is_private_xattr(const char *name)
 {
-#define OVL_XATTR_PRE_NAME OVL_XATTR_PREFIX "."
-	return strncmp(name, OVL_XATTR_PRE_NAME,
-		       sizeof(OVL_XATTR_PRE_NAME) - 1) == 0;
+	return strncmp(name, OVL_XATTR_PREFIX,
+		       sizeof(OVL_XATTR_PREFIX) - 1) == 0;
 }
 
-int ovl_setxattr(struct dentry *dentry, struct inode *inode,
-		 const char *name, const void *value,
-		 size_t size, int flags)
+int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
+		  size_t size, int flags)
 {
 	int err;
-	struct dentry *upperdentry;
+	struct path realpath;
+	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
 	const struct cred *old_cred;
 
 	err = ovl_want_write(dentry);
 	if (err)
 		goto out;
 
+	if (!value && !OVL_TYPE_UPPER(type)) {
+		err = vfs_getxattr(realpath.dentry, name, NULL, 0);
+		if (err < 0)
+			goto out_drop_write;
+	}
+
 	err = ovl_copy_up(dentry);
 	if (err)
 		goto out_drop_write;
 
-	upperdentry = ovl_dentry_upper(dentry);
+	if (!OVL_TYPE_UPPER(type))
+		ovl_path_upper(dentry, &realpath);
+
 	old_cred = ovl_override_creds(dentry->d_sb);
-	err = vfs_setxattr(upperdentry, name, value, size, flags);
+	if (value)
+		err = vfs_setxattr(realpath.dentry, name, value, size, flags);
+	else {
+		WARN_ON(flags != XATTR_REPLACE);
+		err = vfs_removexattr(realpath.dentry, name);
+	}
 	revert_creds(old_cred);
 
 out_drop_write:
@@ -225,16 +238,13 @@
 	return err;
 }
 
-ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
-		     const char *name, void *value, size_t size)
+int ovl_xattr_get(struct dentry *dentry, const char *name,
+		  void *value, size_t size)
 {
 	struct dentry *realdentry = ovl_dentry_real(dentry);
 	ssize_t res;
 	const struct cred *old_cred;
 
-	if (ovl_is_private_xattr(name))
-		return -ENODATA;
-
 	old_cred = ovl_override_creds(dentry->d_sb);
 	res = vfs_getxattr(realdentry, name, value, size);
 	revert_creds(old_cred);
@@ -245,7 +255,8 @@
 {
 	struct dentry *realdentry = ovl_dentry_real(dentry);
 	ssize_t res;
-	int off;
+	size_t len;
+	char *s;
 	const struct cred *old_cred;
 
 	old_cred = ovl_override_creds(dentry->d_sb);
@@ -255,73 +266,39 @@
 		return res;
 
 	/* filter out private xattrs */
-	for (off = 0; off < res;) {
-		char *s = list + off;
-		size_t slen = strlen(s) + 1;
+	for (s = list, len = res; len;) {
+		size_t slen = strnlen(s, len) + 1;
 
-		BUG_ON(off + slen > res);
+		/* underlying fs providing us with an broken xattr list? */
+		if (WARN_ON(slen > len))
+			return -EIO;
 
+		len -= slen;
 		if (ovl_is_private_xattr(s)) {
 			res -= slen;
-			memmove(s, s + slen, res - off);
+			memmove(s, s + slen, len);
 		} else {
-			off += slen;
+			s += slen;
 		}
 	}
 
 	return res;
 }
 
-int ovl_removexattr(struct dentry *dentry, const char *name)
-{
-	int err;
-	struct path realpath;
-	enum ovl_path_type type = ovl_path_real(dentry, &realpath);
-	const struct cred *old_cred;
-
-	err = ovl_want_write(dentry);
-	if (err)
-		goto out;
-
-	err = -ENODATA;
-	if (ovl_is_private_xattr(name))
-		goto out_drop_write;
-
-	if (!OVL_TYPE_UPPER(type)) {
-		err = vfs_getxattr(realpath.dentry, name, NULL, 0);
-		if (err < 0)
-			goto out_drop_write;
-
-		err = ovl_copy_up(dentry);
-		if (err)
-			goto out_drop_write;
-
-		ovl_path_upper(dentry, &realpath);
-	}
-
-	old_cred = ovl_override_creds(dentry->d_sb);
-	err = vfs_removexattr(realpath.dentry, name);
-	revert_creds(old_cred);
-out_drop_write:
-	ovl_drop_write(dentry);
-out:
-	return err;
-}
-
 struct posix_acl *ovl_get_acl(struct inode *inode, int type)
 {
 	struct inode *realinode = ovl_inode_real(inode, NULL);
 	const struct cred *old_cred;
 	struct posix_acl *acl;
 
-	if (!IS_POSIXACL(realinode))
+	if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode))
 		return NULL;
 
 	if (!realinode->i_op->get_acl)
 		return NULL;
 
 	old_cred = ovl_override_creds(inode->i_sb);
-	acl = realinode->i_op->get_acl(realinode, type);
+	acl = get_acl(realinode, type);
 	revert_creds(old_cred);
 
 	return acl;
@@ -391,9 +368,9 @@
 	.permission	= ovl_permission,
 	.getattr	= ovl_getattr,
 	.setxattr	= generic_setxattr,
-	.getxattr	= ovl_getxattr,
+	.getxattr	= generic_getxattr,
 	.listxattr	= ovl_listxattr,
-	.removexattr	= ovl_removexattr,
+	.removexattr	= generic_removexattr,
 	.get_acl	= ovl_get_acl,
 	.update_time	= ovl_update_time,
 };
@@ -404,9 +381,9 @@
 	.readlink	= ovl_readlink,
 	.getattr	= ovl_getattr,
 	.setxattr	= generic_setxattr,
-	.getxattr	= ovl_getxattr,
+	.getxattr	= generic_getxattr,
 	.listxattr	= ovl_listxattr,
-	.removexattr	= ovl_removexattr,
+	.removexattr	= generic_removexattr,
 	.update_time	= ovl_update_time,
 };
 
@@ -415,6 +392,9 @@
 	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_flags |= S_NOCMTIME;
+#ifdef CONFIG_FS_POSIX_ACL
+	inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE;
+#endif
 
 	mode &= S_IFMT;
 	switch (mode) {
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e4f5c95..5813ccf 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -24,8 +24,8 @@
 	(OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
 
 
-#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay"
-#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX ".opaque"
+#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
+#define OVL_XATTR_OPAQUE OVL_XATTR_PREFIX "opaque"
 
 #define OVL_ISUPPER_MASK 1UL
 
@@ -179,20 +179,21 @@
 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
 void ovl_cache_free(struct list_head *list);
 int ovl_check_d_type_supported(struct path *realpath);
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+			 struct dentry *dentry, int level);
 
 /* inode.c */
 int ovl_setattr(struct dentry *dentry, struct iattr *attr);
 int ovl_permission(struct inode *inode, int mask);
-int ovl_setxattr(struct dentry *dentry, struct inode *inode,
-		 const char *name, const void *value,
-		 size_t size, int flags);
-ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode,
-		     const char *name, void *value, size_t size);
+int ovl_xattr_set(struct dentry *dentry, const char *name, const void *value,
+		  size_t size, int flags);
+int ovl_xattr_get(struct dentry *dentry, const char *name,
+		  void *value, size_t size);
 ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
-int ovl_removexattr(struct dentry *dentry, const char *name);
 struct posix_acl *ovl_get_acl(struct inode *inode, int type);
 int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags);
 int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
+bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode);
 struct inode *ovl_get_inode(struct super_block *sb, struct inode *realinode);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index cf37fc7..f241b4e 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -248,7 +248,7 @@
 			err = rdd->err;
 	} while (!err && rdd->count);
 
-	if (!err && rdd->first_maybe_whiteout)
+	if (!err && rdd->first_maybe_whiteout && rdd->dentry)
 		err = ovl_check_whiteouts(realpath->dentry, rdd);
 
 	fput(realfile);
@@ -606,3 +606,64 @@
 
 	return rdd.d_type_supported;
 }
+
+static void ovl_workdir_cleanup_recurse(struct path *path, int level)
+{
+	int err;
+	struct inode *dir = path->dentry->d_inode;
+	LIST_HEAD(list);
+	struct ovl_cache_entry *p;
+	struct ovl_readdir_data rdd = {
+		.ctx.actor = ovl_fill_merge,
+		.dentry = NULL,
+		.list = &list,
+		.root = RB_ROOT,
+		.is_lowest = false,
+	};
+
+	err = ovl_dir_read(path, &rdd);
+	if (err)
+		goto out;
+
+	inode_lock_nested(dir, I_MUTEX_PARENT);
+	list_for_each_entry(p, &list, l_node) {
+		struct dentry *dentry;
+
+		if (p->name[0] == '.') {
+			if (p->len == 1)
+				continue;
+			if (p->len == 2 && p->name[1] == '.')
+				continue;
+		}
+		dentry = lookup_one_len(p->name, path->dentry, p->len);
+		if (IS_ERR(dentry))
+			continue;
+		if (dentry->d_inode)
+			ovl_workdir_cleanup(dir, path->mnt, dentry, level);
+		dput(dentry);
+	}
+	inode_unlock(dir);
+out:
+	ovl_cache_free(&list);
+}
+
+void ovl_workdir_cleanup(struct inode *dir, struct vfsmount *mnt,
+			 struct dentry *dentry, int level)
+{
+	int err;
+
+	if (!d_is_dir(dentry) || level > 1) {
+		ovl_cleanup(dir, dentry);
+		return;
+	}
+
+	err = ovl_do_rmdir(dir, dentry);
+	if (err) {
+		struct path path = { .mnt = mnt, .dentry = dentry };
+
+		inode_unlock(dir);
+		ovl_workdir_cleanup_recurse(&path, level + 1);
+		inode_lock_nested(dir, I_MUTEX_PARENT);
+		ovl_cleanup(dir, dentry);
+	}
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4036132..e2a94a2 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -814,6 +814,10 @@
 		struct kstat stat = {
 			.mode = S_IFDIR | 0,
 		};
+		struct iattr attr = {
+			.ia_valid = ATTR_MODE,
+			.ia_mode = stat.mode,
+		};
 
 		if (work->d_inode) {
 			err = -EEXIST;
@@ -821,7 +825,7 @@
 				goto out_dput;
 
 			retried = true;
-			ovl_cleanup(dir, work);
+			ovl_workdir_cleanup(dir, mnt, work, 0);
 			dput(work);
 			goto retry;
 		}
@@ -829,6 +833,21 @@
 		err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
 		if (err)
 			goto out_dput;
+
+		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+		if (err && err != -ENODATA && err != -EOPNOTSUPP)
+			goto out_dput;
+
+		err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+		if (err && err != -ENODATA && err != -EOPNOTSUPP)
+			goto out_dput;
+
+		/* Clear any inherited mode bits */
+		inode_lock(work->d_inode);
+		err = notify_change(work, &attr, NULL);
+		inode_unlock(work->d_inode);
+		if (err)
+			goto out_dput;
 	}
 out_unlock:
 	inode_unlock(dir);
@@ -967,10 +986,19 @@
 	return ctr;
 }
 
-static int ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
-				   struct dentry *dentry, struct inode *inode,
-				   const char *name, const void *value,
-				   size_t size, int flags)
+static int __maybe_unused
+ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, void *buffer, size_t size)
+{
+	return ovl_xattr_get(dentry, handler->name, buffer, size);
+}
+
+static int __maybe_unused
+ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *value,
+			size_t size, int flags)
 {
 	struct dentry *workdir = ovl_workdir(dentry);
 	struct inode *realinode = ovl_inode_real(inode, NULL);
@@ -998,19 +1026,22 @@
 
 	posix_acl_release(acl);
 
-	return ovl_setxattr(dentry, inode, handler->name, value, size, flags);
+	err = ovl_xattr_set(dentry, handler->name, value, size, flags);
+	if (!err)
+		ovl_copyattr(ovl_inode_real(inode, NULL), inode);
+
+	return err;
 
 out_acl_release:
 	posix_acl_release(acl);
 	return err;
 }
 
-static int ovl_other_xattr_set(const struct xattr_handler *handler,
-			       struct dentry *dentry, struct inode *inode,
-			       const char *name, const void *value,
-			       size_t size, int flags)
+static int ovl_own_xattr_get(const struct xattr_handler *handler,
+			     struct dentry *dentry, struct inode *inode,
+			     const char *name, void *buffer, size_t size)
 {
-	return ovl_setxattr(dentry, inode, name, value, size, flags);
+	return -EPERM;
 }
 
 static int ovl_own_xattr_set(const struct xattr_handler *handler,
@@ -1021,42 +1052,59 @@
 	return -EPERM;
 }
 
-static const struct xattr_handler ovl_posix_acl_access_xattr_handler = {
+static int ovl_other_xattr_get(const struct xattr_handler *handler,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, void *buffer, size_t size)
+{
+	return ovl_xattr_get(dentry, name, buffer, size);
+}
+
+static int ovl_other_xattr_set(const struct xattr_handler *handler,
+			       struct dentry *dentry, struct inode *inode,
+			       const char *name, const void *value,
+			       size_t size, int flags)
+{
+	return ovl_xattr_set(dentry, name, value, size, flags);
+}
+
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_access_xattr_handler = {
 	.name = XATTR_NAME_POSIX_ACL_ACCESS,
 	.flags = ACL_TYPE_ACCESS,
+	.get = ovl_posix_acl_xattr_get,
 	.set = ovl_posix_acl_xattr_set,
 };
 
-static const struct xattr_handler ovl_posix_acl_default_xattr_handler = {
+static const struct xattr_handler __maybe_unused
+ovl_posix_acl_default_xattr_handler = {
 	.name = XATTR_NAME_POSIX_ACL_DEFAULT,
 	.flags = ACL_TYPE_DEFAULT,
+	.get = ovl_posix_acl_xattr_get,
 	.set = ovl_posix_acl_xattr_set,
 };
 
 static const struct xattr_handler ovl_own_xattr_handler = {
 	.prefix	= OVL_XATTR_PREFIX,
+	.get = ovl_own_xattr_get,
 	.set = ovl_own_xattr_set,
 };
 
 static const struct xattr_handler ovl_other_xattr_handler = {
 	.prefix	= "", /* catch all */
+	.get = ovl_other_xattr_get,
 	.set = ovl_other_xattr_set,
 };
 
 static const struct xattr_handler *ovl_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
 	&ovl_posix_acl_access_xattr_handler,
 	&ovl_posix_acl_default_xattr_handler,
+#endif
 	&ovl_own_xattr_handler,
 	&ovl_other_xattr_handler,
 	NULL
 };
 
-static const struct xattr_handler *ovl_xattr_noacl_handlers[] = {
-	&ovl_own_xattr_handler,
-	&ovl_other_xattr_handler,
-	NULL,
-};
-
 static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct path upperpath = { NULL, NULL };
@@ -1132,7 +1180,7 @@
 	err = -EINVAL;
 	stacklen = ovl_split_lowerdirs(lowertmp);
 	if (stacklen > OVL_MAX_STACK) {
-		pr_err("overlayfs: too many lower directries, limit is %d\n",
+		pr_err("overlayfs: too many lower directories, limit is %d\n",
 		       OVL_MAX_STACK);
 		goto out_free_lowertmp;
 	} else if (!ufs->config.upperdir && stacklen == 1) {
@@ -1269,10 +1317,7 @@
 
 	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
 	sb->s_op = &ovl_super_operations;
-	if (IS_ENABLED(CONFIG_FS_POSIX_ACL))
-		sb->s_xattr = ovl_xattr_handlers;
-	else
-		sb->s_xattr = ovl_xattr_noacl_handlers;
+	sb->s_xattr = ovl_xattr_handlers;
 	sb->s_root = root_dentry;
 	sb->s_fs_info = ufs;
 	sb->s_flags |= MS_POSIXACL;
diff --git a/fs/pipe.c b/fs/pipe.c
index 4b32928..4ebe6b2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -144,10 +144,8 @@
 	struct page *page = buf->page;
 
 	if (page_count(page) == 1) {
-		if (memcg_kmem_enabled()) {
+		if (memcg_kmem_enabled())
 			memcg_kmem_uncharge(page, 0);
-			__ClearPageKmemcg(page);
-		}
 		__SetPageLocked(page);
 		return 0;
 	}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 54e2702..3b792ab 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -483,7 +483,7 @@
 		save_stack_trace_tsk(task, &trace);
 
 		for (i = 0; i < trace.nr_entries; i++) {
-			seq_printf(m, "[<%pK>] %pS\n",
+			seq_printf(m, "[<%pK>] %pB\n",
 				   (void *)entries[i], (void *)entries[i]);
 		}
 		unlock_trace(task);
@@ -1556,18 +1556,13 @@
 static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 {
 	struct task_struct *task;
-	struct mm_struct *mm;
 	struct file *exe_file;
 
 	task = get_proc_task(d_inode(dentry));
 	if (!task)
 		return -ENOENT;
-	mm = get_task_mm(task);
+	exe_file = get_task_exe_file(task);
 	put_task_struct(task);
-	if (!mm)
-		return -ENOENT;
-	exe_file = get_mm_exe_file(mm);
-	mmput(mm);
 	if (exe_file) {
 		*exe_path = exe_file->f_path;
 		path_get(&exe_file->f_path);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index c633476..bca66d8 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -390,6 +390,8 @@
 	atomic_set(&ent->count, 1);
 	spin_lock_init(&ent->pde_unload_lock);
 	INIT_LIST_HEAD(&ent->pde_openers);
+	proc_set_user(ent, (*parent)->uid, (*parent)->gid);
+
 out:
 	return ent;
 }
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index a939f5e..5c89a07 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -430,6 +430,7 @@
 static ssize_t
 read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 {
+	char *buf = file->private_data;
 	ssize_t acc = 0;
 	size_t size, tsz;
 	size_t elf_buflen;
@@ -500,23 +501,20 @@
 			if (clear_user(buffer, tsz))
 				return -EFAULT;
 		} else if (is_vmalloc_or_module_addr((void *)start)) {
-			char * elf_buf;
-
-			elf_buf = kzalloc(tsz, GFP_KERNEL);
-			if (!elf_buf)
-				return -ENOMEM;
-			vread(elf_buf, (char *)start, tsz);
+			vread(buf, (char *)start, tsz);
 			/* we have to zero-fill user buffer even if no read */
-			if (copy_to_user(buffer, elf_buf, tsz)) {
-				kfree(elf_buf);
+			if (copy_to_user(buffer, buf, tsz))
 				return -EFAULT;
-			}
-			kfree(elf_buf);
 		} else {
 			if (kern_addr_valid(start)) {
 				unsigned long n;
 
-				n = copy_to_user(buffer, (char *)start, tsz);
+				/*
+				 * Using bounce buffer to bypass the
+				 * hardened user copy kernel text checks.
+				 */
+				memcpy(buf, (char *) start, tsz);
+				n = copy_to_user(buffer, buf, tsz);
 				/*
 				 * We cannot distinguish between fault on source
 				 * and fault on destination. When this happens
@@ -549,6 +547,11 @@
 {
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
+
+	filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!filp->private_data)
+		return -ENOMEM;
+
 	if (kcore_need_update)
 		kcore_update_ram();
 	if (i_size_read(inode) != proc_root_kcore->size) {
@@ -559,10 +562,16 @@
 	return 0;
 }
 
+static int release_kcore(struct inode *inode, struct file *file)
+{
+	kfree(file->private_data);
+	return 0;
+}
 
 static const struct file_operations proc_kcore_operations = {
 	.read		= read_kcore,
 	.open		= open_kcore,
+	.release	= release_kcore,
 	.llseek		= default_llseek,
 };
 
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 09e18fd..b9a8c81 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -46,7 +46,7 @@
 		cached = 0;
 
 	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
-		pages[lru] = global_page_state(NR_LRU_BASE + lru);
+		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
 	available = si_mem_available();
 
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index c8bbc68..7ae6b1d 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -21,6 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/mount.h>
 #include <linux/nsproxy.h>
+#include <linux/uidgid.h>
 #include <net/net_namespace.h>
 #include <linux/seq_file.h>
 
@@ -185,6 +186,8 @@
 static __net_init int proc_net_ns_init(struct net *net)
 {
 	struct proc_dir_entry *netd, *net_statd;
+	kuid_t uid;
+	kgid_t gid;
 	int err;
 
 	err = -ENOMEM;
@@ -199,6 +202,16 @@
 	netd->parent = &proc_root;
 	memcpy(netd->name, "net", 4);
 
+	uid = make_kuid(net->user_ns, 0);
+	if (!uid_valid(uid))
+		uid = netd->uid;
+
+	gid = make_kgid(net->user_ns, 0);
+	if (!gid_valid(gid))
+		gid = netd->gid;
+
+	proc_set_user(netd, uid, gid);
+
 	err = -EEXIST;
 	net_statd = proc_net_mkdir(net, "stat", netd);
 	if (!net_statd)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 1b93650..2ed3d71 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -430,6 +430,7 @@
 static struct inode *proc_sys_make_inode(struct super_block *sb,
 		struct ctl_table_header *head, struct ctl_table *table)
 {
+	struct ctl_table_root *root = head->root;
 	struct inode *inode;
 	struct proc_inode *ei;
 
@@ -457,6 +458,10 @@
 		if (is_empty_dir(head))
 			make_empty_dir_inode(inode);
 	}
+
+	if (root->set_ownership)
+		root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
+
 out:
 	return inode;
 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 187d84e..f6fa99e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -581,6 +581,8 @@
 		mss->anonymous_thp += HPAGE_PMD_SIZE;
 	else if (PageSwapBacked(page))
 		mss->shmem_thp += HPAGE_PMD_SIZE;
+	else if (is_zone_device_page(page))
+		/* pass */;
 	else
 		VM_BUG_ON_PAGE(1, page);
 	smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd));
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 183a212..12af049 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -27,9 +27,17 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/ramfs.h>
+#include <linux/sched.h>
 
 #include "internal.h"
 
+static unsigned long ramfs_mmu_get_unmapped_area(struct file *file,
+		unsigned long addr, unsigned long len, unsigned long pgoff,
+		unsigned long flags)
+{
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
 const struct file_operations ramfs_file_operations = {
 	.read_iter	= generic_file_read_iter,
 	.write_iter	= generic_file_write_iter,
@@ -38,6 +46,7 @@
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.llseek		= generic_file_llseek,
+	.get_unmapped_area	= ramfs_mmu_get_unmapped_area,
 };
 
 const struct inode_operations ramfs_file_inode_operations = {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 19f532e..6dc4296 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -223,8 +223,10 @@
 		size -= n;
 		buf += n;
 		copied += n;
-		if (!m->count)
+		if (!m->count) {
+			m->from = 0;
 			m->index++;
+		}
 		if (!size)
 			goto Done;
 	}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f35523d..b803213 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -114,9 +114,15 @@
 	 * If buf != of->prealloc_buf, we don't know how
 	 * large it is, so cannot safely pass it to ->show
 	 */
-	if (pos || WARN_ON_ONCE(buf != of->prealloc_buf))
+	if (WARN_ON_ONCE(buf != of->prealloc_buf))
 		return 0;
 	len = ops->show(kobj, of->kn->priv, buf);
+	if (pos) {
+		if (len <= pos)
+			return 0;
+		len -= pos;
+		memmove(buf, buf + pos, len);
+	}
 	return min(count, len);
 }
 
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index dc1358b..ac2de0e 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -233,8 +233,8 @@
 		kn = kernfs_find_and_get(parent, grp->name);
 		if (!kn) {
 			WARN(!kn, KERN_WARNING
-			     "sysfs group %p not found for kobject '%s'\n",
-			     grp, kobject_name(kobj));
+			     "sysfs group '%s' not found for kobject '%s'\n",
+			     grp->name, kobject_name(kobj));
 			return;
 		}
 	} else {
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index b45345d..51157da 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -370,7 +370,7 @@
 
 	p = c->gap_lebs;
 	do {
-		ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
+		ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs);
 		written = layout_leb_in_gaps(c, p);
 		if (written < 0) {
 			err = written;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index e237811..11a0041 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -575,7 +575,8 @@
 	dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name,
 		inode->i_ino, dentry, size);
 
-	return  __ubifs_getxattr(inode, name, buffer, size);
+	name = xattr_full_name(handler, name);
+	return __ubifs_getxattr(inode, name, buffer, size);
 }
 
 static int ubifs_xattr_set(const struct xattr_handler *handler,
@@ -586,6 +587,8 @@
 	dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
 		name, inode->i_ino, dentry, size);
 
+	name = xattr_full_name(handler, name);
+
 	if (value)
 		return __ubifs_setxattr(inode, name, value, size, flags);
 	else
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 776ae2f..05b5243 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1582,6 +1582,7 @@
 	xfs_extlen_t	*flenp,	/* result length */
 	int		*stat)	/* status: 0-freelist, 1-normal/none */
 {
+	struct xfs_owner_info	oinfo;
 	int		error;
 	xfs_agblock_t	fbno;
 	xfs_extlen_t	flen;
@@ -1624,6 +1625,18 @@
 				error0);
 			args->wasfromfl = 1;
 			trace_xfs_alloc_small_freelist(args);
+
+			/*
+			 * If we're feeding an AGFL block to something that
+			 * doesn't live in the free space, we need to clear
+			 * out the OWN_AG rmap.
+			 */
+			xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
+			error = xfs_rmap_free(args->tp, args->agbp, args->agno,
+					fbno, 1, &oinfo);
+			if (error)
+				goto error0;
+
 			*stat = 0;
 			return 0;
 		}
@@ -2264,6 +2277,9 @@
 		offsetof(xfs_agf_t, agf_longest),
 		offsetof(xfs_agf_t, agf_btreeblks),
 		offsetof(xfs_agf_t, agf_uuid),
+		offsetof(xfs_agf_t, agf_rmap_blocks),
+		/* needed so that we don't log the whole rest of the structure: */
+		offsetof(xfs_agf_t, agf_spare64),
 		sizeof(xfs_agf_t)
 	};
 
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index b5c213a..0856979 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -1814,6 +1814,10 @@
 
 	XFS_BTREE_STATS_INC(cur, lookup);
 
+	/* No such thing as a zero-level tree. */
+	if (cur->bc_nlevels == 0)
+		return -EFSCORRUPTED;
+
 	block = NULL;
 	keyno = 0;
 
@@ -4554,15 +4558,22 @@
 	if (error)
 		goto out;
 
+	/* Nothing?  See if there's anything to the right. */
+	if (!stat) {
+		error = xfs_btree_increment(cur, 0, &stat);
+		if (error)
+			goto out;
+	}
+
 	while (stat) {
 		/* Find the record. */
 		error = xfs_btree_get_rec(cur, &recp, &stat);
 		if (error || !stat)
 			break;
-		cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
 
 		/* Skip if high_key(rec) < low_key. */
 		if (firstrec) {
+			cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
 			firstrec = false;
 			diff = cur->bc_ops->diff_two_keys(cur, low_key,
 					&rec_key);
@@ -4571,6 +4582,7 @@
 		}
 
 		/* Stop if high_key < low_key(rec). */
+		cur->bc_ops->init_key_from_rec(&rec_key, recp);
 		diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
 		if (diff > 0)
 			break;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 054a203..c221d0e 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -194,7 +194,7 @@
 	/* Abort intent items. */
 	list_for_each_entry(dfp, &dop->dop_pending, dfp_list) {
 		trace_xfs_defer_pending_abort(tp->t_mountp, dfp);
-		if (dfp->dfp_committed)
+		if (!dfp->dfp_done)
 			dfp->dfp_type->abort_intent(dfp->dfp_intent);
 	}
 
@@ -290,7 +290,6 @@
 	struct xfs_defer_pending	*dfp;
 	struct list_head		*li;
 	struct list_head		*n;
-	void				*done_item = NULL;
 	void				*state;
 	int				error = 0;
 	void				(*cleanup_fn)(struct xfs_trans *, void *, int);
@@ -309,19 +308,11 @@
 		if (error)
 			goto out;
 
-		/* Mark all pending intents as committed. */
-		list_for_each_entry_reverse(dfp, &dop->dop_pending, dfp_list) {
-			if (dfp->dfp_committed)
-				break;
-			trace_xfs_defer_pending_commit((*tp)->t_mountp, dfp);
-			dfp->dfp_committed = true;
-		}
-
 		/* Log an intent-done item for the first pending item. */
 		dfp = list_first_entry(&dop->dop_pending,
 				struct xfs_defer_pending, dfp_list);
 		trace_xfs_defer_pending_finish((*tp)->t_mountp, dfp);
-		done_item = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
+		dfp->dfp_done = dfp->dfp_type->create_done(*tp, dfp->dfp_intent,
 				dfp->dfp_count);
 		cleanup_fn = dfp->dfp_type->finish_cleanup;
 
@@ -331,7 +322,7 @@
 			list_del(li);
 			dfp->dfp_count--;
 			error = dfp->dfp_type->finish_item(*tp, dop, li,
-					done_item, &state);
+					dfp->dfp_done, &state);
 			if (error) {
 				/*
 				 * Clean up after ourselves and jump out.
@@ -428,8 +419,8 @@
 		dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
 				KM_SLEEP | KM_NOFS);
 		dfp->dfp_type = defer_op_types[type];
-		dfp->dfp_committed = false;
 		dfp->dfp_intent = NULL;
+		dfp->dfp_done = NULL;
 		dfp->dfp_count = 0;
 		INIT_LIST_HEAD(&dfp->dfp_work);
 		list_add_tail(&dfp->dfp_list, &dop->dop_intake);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index cc3981c..e96533d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -30,8 +30,8 @@
 struct xfs_defer_pending {
 	const struct xfs_defer_op_type	*dfp_type;	/* function pointers */
 	struct list_head		dfp_list;	/* pending items */
-	bool				dfp_committed;	/* committed trans? */
 	void				*dfp_intent;	/* log intent item */
+	void				*dfp_done;	/* log done item */
 	struct list_head		dfp_work;	/* work items */
 	unsigned int			dfp_count;	/* # extent items */
 };
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index f814d42..270fb5c 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -640,12 +640,15 @@
 	__be32		agf_btreeblks;	/* # of blocks held in AGF btrees */
 	uuid_t		agf_uuid;	/* uuid of filesystem */
 
+	__be32		agf_rmap_blocks;	/* rmapbt blocks used */
+	__be32		agf_padding;		/* padding */
+
 	/*
 	 * reserve some contiguous space for future logged fields before we add
 	 * the unlogged fields. This makes the range logging via flags and
 	 * structure offsets much simpler.
 	 */
-	__be64		agf_spare64[16];
+	__be64		agf_spare64[15];
 
 	/* unlogged fields, written during buffer writeback. */
 	__be64		agf_lsn;	/* last write sequence */
@@ -670,7 +673,9 @@
 #define	XFS_AGF_LONGEST		0x00000400
 #define	XFS_AGF_BTREEBLKS	0x00000800
 #define	XFS_AGF_UUID		0x00001000
-#define	XFS_AGF_NUM_BITS	13
+#define	XFS_AGF_RMAP_BLOCKS	0x00002000
+#define	XFS_AGF_SPARE64		0x00004000
+#define	XFS_AGF_NUM_BITS	15
 #define	XFS_AGF_ALL_BITS	((1 << XFS_AGF_NUM_BITS) - 1)
 
 #define XFS_AGF_FLAGS \
@@ -686,7 +691,9 @@
 	{ XFS_AGF_FREEBLKS,	"FREEBLKS" }, \
 	{ XFS_AGF_LONGEST,	"LONGEST" }, \
 	{ XFS_AGF_BTREEBLKS,	"BTREEBLKS" }, \
-	{ XFS_AGF_UUID,		"UUID" }
+	{ XFS_AGF_UUID,		"UUID" }, \
+	{ XFS_AGF_RMAP_BLOCKS,	"RMAP_BLOCKS" }, \
+	{ XFS_AGF_SPARE64,	"SPARE64" }
 
 /* disk block (xfs_daddr_t) in the AG */
 #define XFS_AGF_DADDR(mp)	((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index bc1faeb..17b8eeb 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -98,6 +98,8 @@
 	union xfs_btree_ptr	*new,
 	int			*stat)
 {
+	struct xfs_buf		*agbp = cur->bc_private.a.agbp;
+	struct xfs_agf		*agf = XFS_BUF_TO_AGF(agbp);
 	int			error;
 	xfs_agblock_t		bno;
 
@@ -124,6 +126,8 @@
 
 	xfs_trans_agbtree_delta(cur->bc_tp, 1);
 	new->s = cpu_to_be32(bno);
+	be32_add_cpu(&agf->agf_rmap_blocks, 1);
+	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
 
 	XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
 	*stat = 1;
@@ -143,6 +147,8 @@
 	bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp));
 	trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno,
 			bno, 1);
+	be32_add_cpu(&agf->agf_rmap_blocks, -1);
+	xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
 	error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1);
 	if (error)
 		return error;
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 0e3d4f5..4aecc5f 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -583,7 +583,8 @@
 	 * Only check the in progress field for the primary superblock as
 	 * mkfs.xfs doesn't clear it from secondary superblocks.
 	 */
-	return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
+	return xfs_mount_validate_sb(mp, &sb,
+				     bp->b_maps[0].bm_bn == XFS_SB_DADDR,
 				     check_version);
 }
 
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 47a318c..b5b9bff 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -115,7 +115,6 @@
 	if (!(bp->b_flags & _XBF_IN_FLIGHT))
 		return;
 
-	ASSERT(bp->b_flags & XBF_ASYNC);
 	bp->b_flags &= ~_XBF_IN_FLIGHT;
 	percpu_counter_dec(&bp->b_target->bt_io_count);
 }
@@ -1612,7 +1611,7 @@
 	 */
 	while (percpu_counter_sum(&btp->bt_io_count))
 		delay(100);
-	drain_workqueue(btp->bt_mount->m_buf_workqueue);
+	flush_workqueue(btp->bt_mount->m_buf_workqueue);
 
 	/* loop until there is nothing left on the lru list. */
 	while (list_lru_count(&btp->bt_lru)) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index ed95e5b..e612a02 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -741,9 +741,20 @@
 	 * page is inserted into the pagecache when we have to serve a write
 	 * fault on a hole.  It should never be dirtied and can simply be
 	 * dropped from the pagecache once we get real data for the page.
+	 *
+	 * XXX: This is racy against mmap, and there's nothing we can do about
+	 * it. dax_do_io() should really do this invalidation internally as
+	 * it will know if we've allocated over a holei for this specific IO and
+	 * if so it needs to update the mapping tree and invalidate existing
+	 * PTEs over the newly allocated range. Remove this invalidation when
+	 * dax_do_io() is fixed up.
 	 */
 	if (mapping->nrpages) {
-		ret = invalidate_inode_pages2(mapping);
+		loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
+
+		ret = invalidate_inode_pages2_range(mapping,
+						    iocb->ki_pos >> PAGE_SHIFT,
+						    end >> PAGE_SHIFT);
 		WARN_ON_ONCE(ret);
 	}
 
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 0f96847..0b7f986 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -248,6 +248,7 @@
 			agf->agf_roots[XFS_BTNUM_RMAPi] =
 						cpu_to_be32(XFS_RMAP_BLOCK(mp));
 			agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
+			agf->agf_rmap_blocks = cpu_to_be32(1);
 		}
 
 		agf->agf_flfirst = cpu_to_be32(1);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 2114d53..2af0dda 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -715,12 +715,16 @@
 		 * is in the delayed allocation extent on which we sit
 		 * but before our buffer starts.
 		 */
-
 		nimaps = 0;
 		while (nimaps == 0) {
 			nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
-
-			error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres,
+			/*
+			 * We have already reserved space for the extent and any
+			 * indirect blocks when creating the delalloc extent,
+			 * there is no need to reserve space in this transaction
+			 * again.
+			 */
+			error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0,
 					0, XFS_TRANS_RESERVE, &tp);
 			if (error)
 				return error;
@@ -1037,20 +1041,14 @@
 			return error;
 
 		trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
-		xfs_bmbt_to_iomap(ip, iomap, &imap);
-	} else if (nimaps) {
+	} else {
+		ASSERT(nimaps);
+
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		trace_xfs_iomap_found(ip, offset, length, 0, &imap);
-		xfs_bmbt_to_iomap(ip, iomap, &imap);
-	} else {
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
-		iomap->blkno = IOMAP_NULL_BLOCK;
-		iomap->type = IOMAP_HOLE;
-		iomap->offset = offset;
-		iomap->length = length;
 	}
 
+	xfs_bmbt_to_iomap(ip, iomap, &imap);
 	return 0;
 }
 
@@ -1112,3 +1110,48 @@
 	.iomap_begin		= xfs_file_iomap_begin,
 	.iomap_end		= xfs_file_iomap_end,
 };
+
+static int
+xfs_xattr_iomap_begin(
+	struct inode		*inode,
+	loff_t			offset,
+	loff_t			length,
+	unsigned		flags,
+	struct iomap		*iomap)
+{
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
+	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + length);
+	struct xfs_bmbt_irec	imap;
+	int			nimaps = 1, error = 0;
+	unsigned		lockmode;
+
+	if (XFS_FORCED_SHUTDOWN(mp))
+		return -EIO;
+
+	lockmode = xfs_ilock_data_map_shared(ip);
+
+	/* if there are no attribute fork or extents, return ENOENT */
+	if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
+		error = -ENOENT;
+		goto out_unlock;
+	}
+
+	ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
+	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+			       &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+out_unlock:
+	xfs_iunlock(ip, lockmode);
+
+	if (!error) {
+		ASSERT(nimaps);
+		xfs_bmbt_to_iomap(ip, iomap, &imap);
+	}
+
+	return error;
+}
+
+struct iomap_ops xfs_xattr_iomap_ops = {
+	.iomap_begin		= xfs_xattr_iomap_begin,
+};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index e066d04..fb8aca3 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -35,5 +35,6 @@
 		struct xfs_bmbt_irec *);
 
 extern struct iomap_ops xfs_iomap_ops;
+extern struct iomap_ops xfs_xattr_iomap_ops;
 
 #endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ab820f8..b24c310 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1009,7 +1009,14 @@
 	int			error;
 
 	xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
-	error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
+	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
+		fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
+		error = iomap_fiemap(inode, fieinfo, start, length,
+				&xfs_xattr_iomap_ops);
+	} else {
+		error = iomap_fiemap(inode, fieinfo, start, length,
+				&xfs_iomap_ops);
+	}
 	xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
 
 	return error;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 24ef83e..fd6be45 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1574,9 +1574,16 @@
 		}
 	}
 
-	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
+		if (mp->m_sb.sb_rblocks) {
+			xfs_alert(mp,
+	"EXPERIMENTAL reverse mapping btree not compatible with realtime device!");
+			error = -EINVAL;
+			goto out_filestream_unmount;
+		}
 		xfs_alert(mp,
 	"EXPERIMENTAL reverse mapping btree feature enabled. Use at your own risk!");
+	}
 
 	error = xfs_mountfs(mp);
 	if (error)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 551b7e2..d303a66 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1298,7 +1298,6 @@
 DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
 DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
 DEFINE_IOMAP_EVENT(xfs_iomap_found);
-DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
 	TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -2296,7 +2295,7 @@
 		__entry->dev = mp ? mp->m_super->s_dev : 0;
 		__entry->type = dfp->dfp_type->type;
 		__entry->intent = dfp->dfp_intent;
-		__entry->committed = dfp->dfp_committed;
+		__entry->committed = dfp->dfp_done != NULL;
 		__entry->nr = dfp->dfp_count;
 	),
 	TP_printk("dev %d:%d optype %d intent %p committed %d nr %d\n",
diff --git a/include/acpi/acconfig.h b/include/acpi/acconfig.h
index fe2e3ac..12c2882 100644
--- a/include/acpi/acconfig.h
+++ b/include/acpi/acconfig.h
@@ -144,6 +144,10 @@
 
 #define ACPI_ADDRESS_RANGE_MAX          2
 
+/* Maximum number of While() loops before abort */
+
+#define ACPI_MAX_LOOP_COUNT             0xFFFF
+
 /******************************************************************************
  *
  * ACPI Specification constants (Do not change unless the specification changes)
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 34f601e..48eb4dd 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -366,7 +366,7 @@
 	ACPI_TRACE_ENTRY (name, acpi_ut_trace_u32, u32, value)
 
 #define ACPI_FUNCTION_TRACE_STR(name, string) \
-	ACPI_TRACE_ENTRY (name, acpi_ut_trace_str, char *, string)
+	ACPI_TRACE_ENTRY (name, acpi_ut_trace_str, const char *, string)
 
 #define ACPI_FUNCTION_ENTRY() \
 	acpi_ut_track_stack_ptr()
@@ -425,6 +425,9 @@
 #define return_PTR(pointer) \
 	ACPI_TRACE_EXIT (acpi_ut_ptr_exit, void *, pointer)
 
+#define return_STR(string) \
+	ACPI_TRACE_EXIT (acpi_ut_str_exit, const char *, string)
+
 #define return_VALUE(value) \
 	ACPI_TRACE_EXIT (acpi_ut_value_exit, u64, value)
 
@@ -478,6 +481,7 @@
 #define return_VOID                     return
 #define return_ACPI_STATUS(s)           return(s)
 #define return_PTR(s)                   return(s)
+#define return_STR(s)                   return(s)
 #define return_VALUE(s)                 return(s)
 #define return_UINT8(s)                 return(s)
 #define return_UINT32(s)                return(s)
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 562603d..f3414c8 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -371,6 +371,12 @@
 acpi_status acpi_os_notify_command_complete(void);
 #endif
 
+#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_trace_point
+void
+acpi_os_trace_point(acpi_trace_event_type type,
+		    u8 begin, u8 *aml, char *pathname);
+#endif
+
 /*
  * Obtain ACPI table(s)
  */
@@ -416,41 +422,4 @@
 void acpi_os_close_directory(void *dir_handle);
 #endif
 
-/*
- * File I/O and related support
- */
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_open_file
-ACPI_FILE acpi_os_open_file(const char *path, u8 modes);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_close_file
-void acpi_os_close_file(ACPI_FILE file);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_read_file
-int
-acpi_os_read_file(ACPI_FILE file,
-		  void *buffer, acpi_size size, acpi_size count);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_write_file
-int
-acpi_os_write_file(ACPI_FILE file,
-		   void *buffer, acpi_size size, acpi_size count);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_file_offset
-long acpi_os_get_file_offset(ACPI_FILE file);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_set_file_offset
-acpi_status acpi_os_set_file_offset(ACPI_FILE file, long offset, u8 from);
-#endif
-
-#ifndef ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_trace_point
-void
-acpi_os_trace_point(acpi_trace_event_type type,
-		    u8 begin, u8 *aml, char *pathname);
-#endif
-
 #endif				/* __ACPIOSXF_H__ */
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 1ff3a76..c7b3a13 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -46,7 +46,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20160422
+#define ACPI_CA_VERSION                 0x20160831
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
@@ -195,6 +195,13 @@
 ACPI_INIT_GLOBAL(u8, acpi_gbl_group_module_level_code, TRUE);
 
 /*
+ * Optionally support module level code by parsing the entire table as
+ * a term_list. Default is FALSE, do not execute entire table until some
+ * lock order issues are fixed.
+ */
+ACPI_INIT_GLOBAL(u8, acpi_gbl_parse_table_as_term_list, FALSE);
+
+/*
  * Optionally use 32-bit FADT addresses if and when there is a conflict
  * (address mismatch) between the 32-bit and 64-bit versions of the
  * address. Although ACPICA adheres to the ACPI specification which
@@ -416,18 +423,19 @@
 /*
  * Initialization
  */
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
 			    acpi_initialize_tables(struct acpi_table_desc
 						   *initial_storage,
 						   u32 initial_table_count,
 						   u8 allow_resize))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_initialize_subsystem(void))
-
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_enable_subsystem(u32 flags))
-
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
-			    acpi_initialize_objects(u32 flags))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_terminate(void))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+			     acpi_initialize_subsystem(void))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+			     acpi_enable_subsystem(u32 flags))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+			     acpi_initialize_objects(u32 flags))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+			     acpi_terminate(void))
 
 /*
  * Miscellaneous global interfaces
@@ -467,7 +475,7 @@
 /*
  * ACPI table load/unload interfaces
  */
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
 			    acpi_install_table(acpi_physical_address address,
 					       u8 physical))
 
@@ -476,14 +484,17 @@
 
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_unload_parent_table(acpi_handle object))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_load_tables(void))
+
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+			    acpi_load_tables(void))
 
 /*
  * ACPI table manipulation interfaces
  */
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init acpi_reallocate_root_table(void))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
+			    acpi_reallocate_root_table(void))
 
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status __init
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status ACPI_INIT_FUNCTION
 			    acpi_find_root_pointer(acpi_physical_address
 						   *rsdp_address))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
@@ -732,6 +743,10 @@
 						u32 gpe_number))
 
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
+				acpi_mask_gpe(acpi_handle gpe_device,
+					      u32 gpe_number, u8 is_masked))
+
+ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
 				acpi_mark_gpe_for_wake(acpi_handle gpe_device,
 						       u32 gpe_number))
 
@@ -935,9 +950,6 @@
 			       acpi_trace_point(acpi_trace_event_type type,
 						u8 begin,
 						u8 *aml, char *pathname))
-ACPI_APP_DEPENDENT_RETURN_VOID(ACPI_PRINTF_LIKE(1)
-				void ACPI_INTERNAL_VAR_XFACE
-				acpi_log_error(const char *format, ...))
 
 acpi_status acpi_initialize_debugger(void);
 
diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index c19700e..1b949e0 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -230,62 +230,72 @@
 /* Fields common to all versions of the FADT */
 
 struct acpi_table_fadt {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 facs;		/* 32-bit physical address of FACS */
-	u32 dsdt;		/* 32-bit physical address of DSDT */
-	u8 model;		/* System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */
-	u8 preferred_profile;	/* Conveys preferred power management profile to OSPM. */
-	u16 sci_interrupt;	/* System vector of SCI interrupt */
-	u32 smi_command;	/* 32-bit Port address of SMI command port */
-	u8 acpi_enable;		/* Value to write to SMI_CMD to enable ACPI */
-	u8 acpi_disable;	/* Value to write to SMI_CMD to disable ACPI */
-	u8 s4_bios_request;	/* Value to write to SMI_CMD to enter S4BIOS state */
-	u8 pstate_control;	/* Processor performance state control */
-	u32 pm1a_event_block;	/* 32-bit port address of Power Mgt 1a Event Reg Blk */
-	u32 pm1b_event_block;	/* 32-bit port address of Power Mgt 1b Event Reg Blk */
-	u32 pm1a_control_block;	/* 32-bit port address of Power Mgt 1a Control Reg Blk */
-	u32 pm1b_control_block;	/* 32-bit port address of Power Mgt 1b Control Reg Blk */
-	u32 pm2_control_block;	/* 32-bit port address of Power Mgt 2 Control Reg Blk */
-	u32 pm_timer_block;	/* 32-bit port address of Power Mgt Timer Ctrl Reg Blk */
-	u32 gpe0_block;		/* 32-bit port address of General Purpose Event 0 Reg Blk */
-	u32 gpe1_block;		/* 32-bit port address of General Purpose Event 1 Reg Blk */
-	u8 pm1_event_length;	/* Byte Length of ports at pm1x_event_block */
-	u8 pm1_control_length;	/* Byte Length of ports at pm1x_control_block */
-	u8 pm2_control_length;	/* Byte Length of ports at pm2_control_block */
-	u8 pm_timer_length;	/* Byte Length of ports at pm_timer_block */
-	u8 gpe0_block_length;	/* Byte Length of ports at gpe0_block */
-	u8 gpe1_block_length;	/* Byte Length of ports at gpe1_block */
-	u8 gpe1_base;		/* Offset in GPE number space where GPE1 events start */
-	u8 cst_control;		/* Support for the _CST object and C-States change notification */
-	u16 c2_latency;		/* Worst case HW latency to enter/exit C2 state */
-	u16 c3_latency;		/* Worst case HW latency to enter/exit C3 state */
-	u16 flush_size;		/* Processor memory cache line width, in bytes */
-	u16 flush_stride;	/* Number of flush strides that need to be read */
-	u8 duty_offset;		/* Processor duty cycle index in processor P_CNT reg */
-	u8 duty_width;		/* Processor duty cycle value bit width in P_CNT register */
-	u8 day_alarm;		/* Index to day-of-month alarm in RTC CMOS RAM */
-	u8 month_alarm;		/* Index to month-of-year alarm in RTC CMOS RAM */
-	u8 century;		/* Index to century in RTC CMOS RAM */
-	u16 boot_flags;		/* IA-PC Boot Architecture Flags (see below for individual flags) */
-	u8 reserved;		/* Reserved, must be zero */
-	u32 flags;		/* Miscellaneous flag bits (see below for individual flags) */
-	struct acpi_generic_address reset_register;	/* 64-bit address of the Reset register */
-	u8 reset_value;		/* Value to write to the reset_register port to reset the system */
-	u16 arm_boot_flags;	/* ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */
-	u8 minor_revision;	/* FADT Minor Revision (ACPI 5.1) */
-	u64 Xfacs;		/* 64-bit physical address of FACS */
-	u64 Xdsdt;		/* 64-bit physical address of DSDT */
-	struct acpi_generic_address xpm1a_event_block;	/* 64-bit Extended Power Mgt 1a Event Reg Blk address */
-	struct acpi_generic_address xpm1b_event_block;	/* 64-bit Extended Power Mgt 1b Event Reg Blk address */
-	struct acpi_generic_address xpm1a_control_block;	/* 64-bit Extended Power Mgt 1a Control Reg Blk address */
-	struct acpi_generic_address xpm1b_control_block;	/* 64-bit Extended Power Mgt 1b Control Reg Blk address */
-	struct acpi_generic_address xpm2_control_block;	/* 64-bit Extended Power Mgt 2 Control Reg Blk address */
-	struct acpi_generic_address xpm_timer_block;	/* 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */
-	struct acpi_generic_address xgpe0_block;	/* 64-bit Extended General Purpose Event 0 Reg Blk address */
-	struct acpi_generic_address xgpe1_block;	/* 64-bit Extended General Purpose Event 1 Reg Blk address */
-	struct acpi_generic_address sleep_control;	/* 64-bit Sleep Control register (ACPI 5.0) */
-	struct acpi_generic_address sleep_status;	/* 64-bit Sleep Status register (ACPI 5.0) */
-	u64 hypervisor_id;	/* Hypervisor Vendor ID (ACPI 6.0) */
+	struct acpi_table_header header;	/* [V1] Common ACPI table header */
+	u32 facs;		/* [V1] 32-bit physical address of FACS */
+	u32 dsdt;		/* [V1] 32-bit physical address of DSDT */
+	u8 model;		/* [V1] System Interrupt Model (ACPI 1.0) - not used in ACPI 2.0+ */
+	u8 preferred_profile;	/* [V1] Conveys preferred power management profile to OSPM. */
+	u16 sci_interrupt;	/* [V1] System vector of SCI interrupt */
+	u32 smi_command;	/* [V1] 32-bit Port address of SMI command port */
+	u8 acpi_enable;		/* [V1] Value to write to SMI_CMD to enable ACPI */
+	u8 acpi_disable;	/* [V1] Value to write to SMI_CMD to disable ACPI */
+	u8 s4_bios_request;	/* [V1] Value to write to SMI_CMD to enter S4BIOS state */
+	u8 pstate_control;	/* [V1] Processor performance state control */
+	u32 pm1a_event_block;	/* [V1] 32-bit port address of Power Mgt 1a Event Reg Blk */
+	u32 pm1b_event_block;	/* [V1] 32-bit port address of Power Mgt 1b Event Reg Blk */
+	u32 pm1a_control_block;	/* [V1] 32-bit port address of Power Mgt 1a Control Reg Blk */
+	u32 pm1b_control_block;	/* [V1] 32-bit port address of Power Mgt 1b Control Reg Blk */
+	u32 pm2_control_block;	/* [V1] 32-bit port address of Power Mgt 2 Control Reg Blk */
+	u32 pm_timer_block;	/* [V1] 32-bit port address of Power Mgt Timer Ctrl Reg Blk */
+	u32 gpe0_block;		/* [V1] 32-bit port address of General Purpose Event 0 Reg Blk */
+	u32 gpe1_block;		/* [V1] 32-bit port address of General Purpose Event 1 Reg Blk */
+	u8 pm1_event_length;	/* [V1] Byte Length of ports at pm1x_event_block */
+	u8 pm1_control_length;	/* [V1] Byte Length of ports at pm1x_control_block */
+	u8 pm2_control_length;	/* [V1] Byte Length of ports at pm2_control_block */
+	u8 pm_timer_length;	/* [V1] Byte Length of ports at pm_timer_block */
+	u8 gpe0_block_length;	/* [V1] Byte Length of ports at gpe0_block */
+	u8 gpe1_block_length;	/* [V1] Byte Length of ports at gpe1_block */
+	u8 gpe1_base;		/* [V1] Offset in GPE number space where GPE1 events start */
+	u8 cst_control;		/* [V1] Support for the _CST object and C-States change notification */
+	u16 c2_latency;		/* [V1] Worst case HW latency to enter/exit C2 state */
+	u16 c3_latency;		/* [V1] Worst case HW latency to enter/exit C3 state */
+	u16 flush_size;		/* [V1] Processor memory cache line width, in bytes */
+	u16 flush_stride;	/* [V1] Number of flush strides that need to be read */
+	u8 duty_offset;		/* [V1] Processor duty cycle index in processor P_CNT reg */
+	u8 duty_width;		/* [V1] Processor duty cycle value bit width in P_CNT register */
+	u8 day_alarm;		/* [V1] Index to day-of-month alarm in RTC CMOS RAM */
+	u8 month_alarm;		/* [V1] Index to month-of-year alarm in RTC CMOS RAM */
+	u8 century;		/* [V1] Index to century in RTC CMOS RAM */
+	u16 boot_flags;		/* [V3] IA-PC Boot Architecture Flags (see below for individual flags) */
+	u8 reserved;		/* [V1] Reserved, must be zero */
+	u32 flags;		/* [V1] Miscellaneous flag bits (see below for individual flags) */
+	/* End of Version 1 FADT fields (ACPI 1.0) */
+
+	struct acpi_generic_address reset_register;	/* [V3] 64-bit address of the Reset register */
+	u8 reset_value;		/* [V3] Value to write to the reset_register port to reset the system */
+	u16 arm_boot_flags;	/* [V5] ARM-Specific Boot Flags (see below for individual flags) (ACPI 5.1) */
+	u8 minor_revision;	/* [V5] FADT Minor Revision (ACPI 5.1) */
+	u64 Xfacs;		/* [V3] 64-bit physical address of FACS */
+	u64 Xdsdt;		/* [V3] 64-bit physical address of DSDT */
+	struct acpi_generic_address xpm1a_event_block;	/* [V3] 64-bit Extended Power Mgt 1a Event Reg Blk address */
+	struct acpi_generic_address xpm1b_event_block;	/* [V3] 64-bit Extended Power Mgt 1b Event Reg Blk address */
+	struct acpi_generic_address xpm1a_control_block;	/* [V3] 64-bit Extended Power Mgt 1a Control Reg Blk address */
+	struct acpi_generic_address xpm1b_control_block;	/* [V3] 64-bit Extended Power Mgt 1b Control Reg Blk address */
+	struct acpi_generic_address xpm2_control_block;	/* [V3] 64-bit Extended Power Mgt 2 Control Reg Blk address */
+	struct acpi_generic_address xpm_timer_block;	/* [V3] 64-bit Extended Power Mgt Timer Ctrl Reg Blk address */
+	struct acpi_generic_address xgpe0_block;	/* [V3] 64-bit Extended General Purpose Event 0 Reg Blk address */
+	struct acpi_generic_address xgpe1_block;	/* [V3] 64-bit Extended General Purpose Event 1 Reg Blk address */
+	/* End of Version 3 FADT fields (ACPI 2.0) */
+
+	struct acpi_generic_address sleep_control;	/* [V4] 64-bit Sleep Control register (ACPI 5.0) */
+	/* End of Version 4 FADT fields (ACPI 3.0 and ACPI 4.0) (Field was originally reserved in ACPI 3.0) */
+
+	struct acpi_generic_address sleep_status;	/* [V5] 64-bit Sleep Status register (ACPI 5.0) */
+	/* End of Version 5 FADT fields (ACPI 5.0) */
+
+	u64 hypervisor_id;	/* [V6] Hypervisor Vendor ID (ACPI 6.0) */
+	/* End of Version 6 FADT fields (ACPI 6.0) */
+
 };
 
 /* Masks for FADT IA-PC Boot Architecture Flags (boot_flags) [Vx]=Introduced in this FADT revision */
@@ -301,8 +311,8 @@
 
 /* Masks for FADT ARM Boot Architecture Flags (arm_boot_flags) ACPI 5.1 */
 
-#define ACPI_FADT_PSCI_COMPLIANT    (1)	/* 00: [V5+] PSCI 0.2+ is implemented */
-#define ACPI_FADT_PSCI_USE_HVC      (1<<1)	/* 01: [V5+] HVC must be used instead of SMC as the PSCI conduit */
+#define ACPI_FADT_PSCI_COMPLIANT    (1)	/* 00: [V5] PSCI 0.2+ is implemented */
+#define ACPI_FADT_PSCI_USE_HVC      (1<<1)	/* 01: [V5] HVC must be used instead of SMC as the PSCI conduit */
 
 /* Masks for FADT flags */
 
@@ -399,20 +409,34 @@
  * match the expected length. In other words, the length of the
  * FADT is the bottom line as to what the version really is.
  *
- * For reference, the values below are as follows:
- *     FADT V1 size: 0x074
- *     FADT V2 size: 0x084
- *     FADT V3 size: 0x0F4
- *     FADT V4 size: 0x0F4
- *     FADT V5 size: 0x10C
- *     FADT V6 size: 0x114
+ * NOTE: There is no officialy released V2 of the FADT. This
+ * version was used only for prototyping and testing during the
+ * 32-bit to 64-bit transition. V3 was the first official 64-bit
+ * version of the FADT.
+ *
+ * Update this list of defines when a new version of the FADT is
+ * added to the ACPI specification. Note that the FADT version is
+ * only incremented when new fields are appended to the existing
+ * version. Therefore, the FADT version is competely independent
+ * from the version of the ACPI specification where it is
+ * defined.
+ *
+ * For reference, the various FADT lengths are as follows:
+ *     FADT V1 size: 0x074      ACPI 1.0
+ *     FADT V3 size: 0x0F4      ACPI 2.0
+ *     FADT V4 size: 0x100      ACPI 3.0 and ACPI 4.0
+ *     FADT V5 size: 0x10C      ACPI 5.0
+ *     FADT V6 size: 0x114      ACPI 6.0
  */
-#define ACPI_FADT_V1_SIZE       (u32) (ACPI_FADT_OFFSET (flags) + 4)
-#define ACPI_FADT_V2_SIZE       (u32) (ACPI_FADT_OFFSET (minor_revision) + 1)
-#define ACPI_FADT_V3_SIZE       (u32) (ACPI_FADT_OFFSET (sleep_control))
-#define ACPI_FADT_V5_SIZE       (u32) (ACPI_FADT_OFFSET (hypervisor_id))
-#define ACPI_FADT_V6_SIZE       (u32) (sizeof (struct acpi_table_fadt))
+#define ACPI_FADT_V1_SIZE       (u32) (ACPI_FADT_OFFSET (flags) + 4)	/* ACPI 1.0 */
+#define ACPI_FADT_V3_SIZE       (u32) (ACPI_FADT_OFFSET (sleep_control))	/* ACPI 2.0 */
+#define ACPI_FADT_V4_SIZE       (u32) (ACPI_FADT_OFFSET (sleep_status))	/* ACPI 3.0 and ACPI 4.0 */
+#define ACPI_FADT_V5_SIZE       (u32) (ACPI_FADT_OFFSET (hypervisor_id))	/* ACPI 5.0 */
+#define ACPI_FADT_V6_SIZE       (u32) (sizeof (struct acpi_table_fadt))	/* ACPI 6.0 */
 
+/* Update these when new FADT versions are added */
+
+#define ACPI_FADT_MAX_VERSION   6
 #define ACPI_FADT_CONFORMANCE   "ACPI 6.1 (FADT version 6)"
 
 #endif				/* __ACTBL_H__ */
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index cb389ef..1d798ab 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -732,16 +732,17 @@
  * The encoding of acpi_event_status is illustrated below.
  * Note that a set bit (1) indicates the property is TRUE
  * (e.g. if bit 0 is set then the event is enabled).
- * +-------------+-+-+-+-+-+
- * |   Bits 31:5 |4|3|2|1|0|
- * +-------------+-+-+-+-+-+
- *          |     | | | | |
- *          |     | | | | +- Enabled?
- *          |     | | | +--- Enabled for wake?
- *          |     | | +----- Status bit set?
- *          |     | +------- Enable bit set?
- *          |     +--------- Has a handler?
- *          +--------------- <Reserved>
+ * +-------------+-+-+-+-+-+-+
+ * |   Bits 31:6 |5|4|3|2|1|0|
+ * +-------------+-+-+-+-+-+-+
+ *          |     | | | | | |
+ *          |     | | | | | +- Enabled?
+ *          |     | | | | +--- Enabled for wake?
+ *          |     | | | +----- Status bit set?
+ *          |     | | +------- Enable bit set?
+ *          |     | +--------- Has a handler?
+ *          |     +----------- Masked?
+ *          +----------------- <Reserved>
  */
 typedef u32 acpi_event_status;
 
@@ -751,6 +752,7 @@
 #define ACPI_EVENT_FLAG_STATUS_SET      (acpi_event_status) 0x04
 #define ACPI_EVENT_FLAG_ENABLE_SET      (acpi_event_status) 0x08
 #define ACPI_EVENT_FLAG_HAS_HANDLER     (acpi_event_status) 0x10
+#define ACPI_EVENT_FLAG_MASKED          (acpi_event_status) 0x20
 #define ACPI_EVENT_FLAG_SET             ACPI_EVENT_FLAG_STATUS_SET
 
 /* Actions for acpi_set_gpe, acpi_gpe_wakeup, acpi_hw_low_set_gpe */
@@ -761,14 +763,15 @@
 
 /*
  * GPE info flags - Per GPE
- * +-------+-+-+---+
- * |  7:5  |4|3|2:0|
- * +-------+-+-+---+
- *     |    | |  |
- *     |    | |  +-- Type of dispatch:to method, handler, notify, or none
- *     |    | +----- Interrupt type: edge or level triggered
- *     |    +------- Is a Wake GPE
- *     +------------ <Reserved>
+ * +---+-+-+-+---+
+ * |7:6|5|4|3|2:0|
+ * +---+-+-+-+---+
+ *   |  | | |  |
+ *   |  | | |  +-- Type of dispatch:to method, handler, notify, or none
+ *   |  | | +----- Interrupt type: edge or level triggered
+ *   |  | +------- Is a Wake GPE
+ *   |  +--------- Is GPE masked by the software GPE masking machanism
+ *   +------------ <Reserved>
  */
 #define ACPI_GPE_DISPATCH_NONE          (u8) 0x00
 #define ACPI_GPE_DISPATCH_METHOD        (u8) 0x01
@@ -1031,12 +1034,6 @@
 	u32 method_count;
 };
 
-/* Table Event Types */
-
-#define ACPI_TABLE_EVENT_LOAD           0x0
-#define ACPI_TABLE_EVENT_UNLOAD         0x1
-#define ACPI_NUM_TABLE_EVENTS           2
-
 /*
  * Types specific to the OS service interfaces
  */
@@ -1088,9 +1085,13 @@
 typedef
 acpi_status (*acpi_table_handler) (u32 event, void *table, void *context);
 
-#define ACPI_TABLE_LOAD             0x0
-#define ACPI_TABLE_UNLOAD           0x1
-#define ACPI_NUM_TABLE_EVENTS       2
+/* Table Event Types */
+
+#define ACPI_TABLE_EVENT_LOAD           0x0
+#define ACPI_TABLE_EVENT_UNLOAD         0x1
+#define ACPI_TABLE_EVENT_INSTALL        0x2
+#define ACPI_TABLE_EVENT_UNINSTALL      0x3
+#define ACPI_NUM_TABLE_EVENTS           4
 
 /* Address Spaces (For Operation Regions) */
 
@@ -1285,15 +1286,6 @@
 #define ACPI_OSI_WIN_8                  0x0C
 #define ACPI_OSI_WIN_10                 0x0D
 
-/* Definitions of file IO */
-
-#define ACPI_FILE_READING               0x01
-#define ACPI_FILE_WRITING               0x02
-#define ACPI_FILE_BINARY                0x04
-
-#define ACPI_FILE_BEGIN                 0x01
-#define ACPI_FILE_END                   0x02
-
 /* Definitions of getopt */
 
 #define ACPI_OPT_END                    -1
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 284965c..427a7c3 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -24,7 +24,9 @@
 #define CPPC_NUM_ENT	21
 #define CPPC_REV	2
 
-#define PCC_CMD_COMPLETE 1
+#define PCC_CMD_COMPLETE_MASK	(1 << 0)
+#define PCC_ERROR_MASK		(1 << 2)
+
 #define MAX_CPC_REG_ENT 19
 
 /* CPPC specific PCC commands. */
@@ -49,6 +51,7 @@
  */
 struct cpc_register_resource {
 	acpi_object_type type;
+	u64 __iomem *sys_mem_vaddr;
 	union {
 		struct cpc_reg reg;
 		u64 int_value;
@@ -60,8 +63,11 @@
 	int num_entries;
 	int version;
 	int cpu_id;
+	int write_cmd_status;
+	int write_cmd_id;
 	struct cpc_register_resource cpc_regs[MAX_CPC_REG_ENT];
 	struct acpi_psd_package domain_info;
+	struct kobject kobj;
 };
 
 /* These are indexes into the per-cpu cpc_regs[]. Order is important. */
@@ -96,7 +102,6 @@
 struct cppc_perf_caps {
 	u32 highest_perf;
 	u32 nominal_perf;
-	u32 reference_perf;
 	u32 lowest_perf;
 };
 
@@ -108,13 +113,13 @@
 
 struct cppc_perf_fb_ctrs {
 	u64 reference;
-	u64 prev_reference;
 	u64 delivered;
-	u64 prev_delivered;
+	u64 reference_perf;
+	u64 ctr_wrap_time;
 };
 
 /* Per CPU container for runtime CPPC management. */
-struct cpudata {
+struct cppc_cpudata {
 	int cpu;
 	struct cppc_perf_caps perf_caps;
 	struct cppc_perf_ctrls perf_ctrls;
@@ -127,6 +132,7 @@
 extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs);
 extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls);
 extern int cppc_get_perf_caps(int cpu, struct cppc_perf_caps *caps);
-extern int acpi_get_psd_map(struct cpudata **);
+extern int acpi_get_psd_map(struct cppc_cpudata **);
+extern unsigned int cppc_get_transition_latency(int cpu);
 
 #endif /* _CPPC_ACPI_H*/
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 86b5a84..34cce72 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -78,6 +78,7 @@
 	(defined ACPI_EXAMPLE_APP)
 #define ACPI_APPLICATION
 #define ACPI_SINGLE_THREADED
+#define USE_NATIVE_ALLOCATE_ZEROED
 #endif
 
 /* iASL configuration */
@@ -124,7 +125,6 @@
 
 #ifdef ACPI_DUMP_APP
 #define ACPI_USE_NATIVE_MEMORY_MAPPING
-#define USE_NATIVE_ALLOCATE_ZEROED
 #endif
 
 /* acpi_names/Example configuration. Hardware disabled */
@@ -149,7 +149,6 @@
 /* Common for all ACPICA applications */
 
 #ifdef ACPI_APPLICATION
-#define ACPI_USE_SYSTEM_CLIBRARY
 #define ACPI_USE_LOCAL_CACHE
 #endif
 
@@ -167,10 +166,21 @@
 /******************************************************************************
  *
  * Host configuration files. The compiler configuration files are included
- * by the host files.
+ * first.
  *
  *****************************************************************************/
 
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#include <acpi/platform/acgcc.h>
+
+#elif defined(_MSC_VER)
+#include "acmsvc.h"
+
+#elif defined(__INTEL_COMPILER)
+#include "acintel.h"
+
+#endif
+
 #if defined(_LINUX) || defined(__linux__)
 #include <acpi/platform/aclinux.h>
 
@@ -210,18 +220,20 @@
 #elif defined(__OS2__)
 #include "acos2.h"
 
-#elif defined(_AED_EFI)
-#include "acefi.h"
-
-#elif defined(_GNU_EFI)
-#include "acefi.h"
-
 #elif defined(__HAIKU__)
 #include "achaiku.h"
 
 #elif defined(__QNX__)
 #include "acqnx.h"
 
+/*
+ * EFI applications can be built with -nostdlib, in this case, it must be
+ * included after including all other host environmental definitions, in
+ * order to override the definitions.
+ */
+#elif defined(_AED_EFI) || defined(_GNU_EFI) || defined(_EDK2_EFI)
+#include "acefi.h"
+
 #else
 
 /* Unknown environment */
@@ -326,7 +338,8 @@
  * ACPI_USE_SYSTEM_CLIBRARY - Define this if linking to an actual C library.
  *      Otherwise, local versions of string/memory functions will be used.
  * ACPI_USE_STANDARD_HEADERS - Define this if linking to a C library and
- *      the standard header files may be used.
+ *      the standard header files may be used. Defining this implies that
+ *      ACPI_USE_SYSTEM_CLIBRARY has been defined.
  *
  * The ACPICA subsystem only uses low level C library functions that do not
  * call operating system services and may therefore be inlined in the code.
@@ -334,7 +347,6 @@
  * It may be necessary to tailor these include files to the target
  * generation environment.
  */
-#ifdef ACPI_USE_SYSTEM_CLIBRARY
 
 /* Use the standard C library headers. We want to keep these to a minimum. */
 
@@ -342,57 +354,20 @@
 
 /* Use the standard headers from the standard locations */
 
-#include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
+#ifdef ACPI_APPLICATION
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <signal.h>
+#endif
 
 #endif				/* ACPI_USE_STANDARD_HEADERS */
 
-/* We will be linking to the standard Clib functions */
-
-#else
-
-/******************************************************************************
- *
- * Not using native C library, use local implementations
- *
- *****************************************************************************/
-
-/*
- * Use local definitions of C library macros and functions. These function
- * implementations may not be as efficient as an inline or assembly code
- * implementation provided by a native C library, but they are functionally
- * equivalent.
- */
-#ifndef va_arg
-
-#ifndef _VALIST
-#define _VALIST
-typedef char *va_list;
-#endif				/* _VALIST */
-
-/* Storage alignment properties */
-
-#define  _AUPBND                (sizeof (acpi_native_int) - 1)
-#define  _ADNBND                (sizeof (acpi_native_int) - 1)
-
-/* Variable argument list macro definitions */
-
-#define _bnd(X, bnd)            (((sizeof (X)) + (bnd)) & (~(bnd)))
-#define va_arg(ap, T)           (*(T *)(((ap) += (_bnd (T, _AUPBND))) - (_bnd (T,_ADNBND))))
-#define va_end(ap)              (ap = (va_list) NULL)
-#define va_start(ap, A)         (void) ((ap) = (((char *) &(A)) + (_bnd (A,_AUPBND))))
-
-#endif				/* va_arg */
-
-/* Use the local (ACPICA) definitions of the clib functions */
-
-#endif				/* ACPI_USE_SYSTEM_CLIBRARY */
-
-#ifndef ACPI_FILE
 #ifdef ACPI_APPLICATION
-#include <stdio.h>
 #define ACPI_FILE              FILE *
 #define ACPI_FILE_OUT          stdout
 #define ACPI_FILE_ERR          stderr
@@ -401,6 +376,9 @@
 #define ACPI_FILE_OUT          NULL
 #define ACPI_FILE_ERR          NULL
 #endif				/* ACPI_APPLICATION */
-#endif				/* ACPI_FILE */
+
+#ifndef ACPI_INIT_FUNCTION
+#define ACPI_INIT_FUNCTION
+#endif
 
 #endif				/* __ACENV_H__ */
diff --git a/include/acpi/platform/acenvex.h b/include/acpi/platform/acenvex.h
index 4f15c1d..b3171b9 100644
--- a/include/acpi/platform/acenvex.h
+++ b/include/acpi/platform/acenvex.h
@@ -56,18 +56,25 @@
 #if defined(_LINUX) || defined(__linux__)
 #include <acpi/platform/aclinuxex.h>
 
-#elif defined(WIN32)
-#include "acwinex.h"
-
-#elif defined(_AED_EFI)
-#include "acefiex.h"
-
-#elif defined(_GNU_EFI)
-#include "acefiex.h"
-
 #elif defined(__DragonFly__)
 #include "acdragonflyex.h"
 
+/*
+ * EFI applications can be built with -nostdlib, in this case, it must be
+ * included after including all other host environmental definitions, in
+ * order to override the definitions.
+ */
+#elif defined(_AED_EFI) || defined(_GNU_EFI) || defined(_EDK2_EFI)
+#include "acefiex.h"
+
+#endif
+
+#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
+#include "acgccex.h"
+
+#elif defined(_MSC_VER)
+#include "acmsvcex.h"
+
 #endif
 
 /*! [End] no source code translation !*/
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index c5a216c9..8f66aaa 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -44,6 +44,12 @@
 #ifndef __ACGCC_H__
 #define __ACGCC_H__
 
+/*
+ * Use compiler specific <stdarg.h> is a good practice for even when
+ * -nostdinc is specified (i.e., ACPI_USE_STANDARD_HEADERS undefined.
+ */
+#include <stdarg.h>
+
 #define ACPI_INLINE             __inline__
 
 /* Function name is used for debug output. Non-ANSI, compiler-dependent */
@@ -64,17 +70,6 @@
  */
 #define ACPI_UNUSED_VAR __attribute__ ((unused))
 
-/*
- * Some versions of gcc implement strchr() with a buggy macro. So,
- * undef it here. Prevents error messages of this form (usually from the
- * file getopt.c):
- *
- * error: logical '&&' with non-zero constant will always evaluate as true
- */
-#ifdef strchr
-#undef strchr
-#endif
-
 /* GCC supports __VA_ARGS__ in macros */
 
 #define COMPILER_VA_MACRO               1
diff --git a/include/acpi/platform/acgccex.h b/include/acpi/platform/acgccex.h
new file mode 100644
index 0000000..46ead2c
--- /dev/null
+++ b/include/acpi/platform/acgccex.h
@@ -0,0 +1,58 @@
+/******************************************************************************
+ *
+ * Name: acgccex.h - Extra GCC specific defines, etc.
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2016, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#ifndef __ACGCCEX_H__
+#define __ACGCCEX_H__
+
+/*
+ * Some versions of gcc implement strchr() with a buggy macro. So,
+ * undef it here. Prevents error messages of this form (usually from the
+ * file getopt.c):
+ *
+ * error: logical '&&' with non-zero constant will always evaluate as true
+ */
+#ifdef strchr
+#undef strchr
+#endif
+
+#endif				/* __ACGCCEX_H__ */
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 93b61b1..a5d98d1 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -92,6 +92,8 @@
 #include <asm/acenv.h>
 #endif
 
+#define ACPI_INIT_FUNCTION __init
+
 #ifndef CONFIG_ACPI
 
 /* External globals for __KERNEL__, stubs is needed */
@@ -168,13 +170,21 @@
 #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_get_next_filename
 #define ACPI_USE_ALTERNATE_PROTOTYPE_acpi_os_close_directory
 
+#define ACPI_MSG_ERROR          KERN_ERR "ACPI Error: "
+#define ACPI_MSG_EXCEPTION      KERN_ERR "ACPI Exception: "
+#define ACPI_MSG_WARNING        KERN_WARNING "ACPI Warning: "
+#define ACPI_MSG_INFO           KERN_INFO "ACPI: "
+
+#define ACPI_MSG_BIOS_ERROR     KERN_ERR "ACPI BIOS Error (bug): "
+#define ACPI_MSG_BIOS_WARNING   KERN_WARNING "ACPI BIOS Warning (bug): "
+
 #else				/* !__KERNEL__ */
 
-#include <stdarg.h>
-#include <string.h>
-#include <stdlib.h>
-#include <ctype.h>
+#define ACPI_USE_STANDARD_HEADERS
+
+#ifdef ACPI_USE_STANDARD_HEADERS
 #include <unistd.h>
+#endif
 
 /* Define/disable kernel-specific declarators */
 
@@ -205,8 +215,4 @@
 
 #endif				/* __KERNEL__ */
 
-/* Linux uses GCC */
-
-#include <acpi/platform/acgcc.h>
-
 #endif				/* __ACLINUX_H__ */
diff --git a/include/acpi/platform/aclinuxex.h b/include/acpi/platform/aclinuxex.h
index f8bb0d8..a5509d8 100644
--- a/include/acpi/platform/aclinuxex.h
+++ b/include/acpi/platform/aclinuxex.h
@@ -71,7 +71,7 @@
 /*
  * Overrides for in-kernel ACPICA
  */
-acpi_status __init acpi_os_initialize(void);
+acpi_status ACPI_INIT_FUNCTION acpi_os_initialize(void);
 
 acpi_status acpi_os_terminate(void);
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index bfe6b2e..f3db11c 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -359,7 +359,7 @@
  * onlined/offlined. In such case the flags.throttling will be updated.
  */
 extern void acpi_processor_reevaluate_tstate(struct acpi_processor *pr,
-			unsigned long action);
+			bool is_dead);
 extern const struct file_operations acpi_processor_throttling_fops;
 extern void acpi_processor_throttling_init(void);
 #else
@@ -380,7 +380,7 @@
 }
 
 static inline void acpi_processor_reevaluate_tstate(struct acpi_processor *pr,
-			unsigned long action) {}
+			bool is_dead) {}
 
 static inline void acpi_processor_throttling_init(void) {}
 #endif	/* CONFIG_ACPI_CPU_FREQ_PSS */
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 54a8e65..7d026bf 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -25,7 +25,20 @@
 #include <asm-generic/qrwlock_types.h>
 
 /*
- * Writer states & reader shift and bias
+ * Writer states & reader shift and bias.
+ *
+ *       | +0 | +1 | +2 | +3 |
+ *   ----+----+----+----+----+
+ *    LE | 78 | 56 | 34 | 12 | 0x12345678
+ *   ----+----+----+----+----+
+ *       | wr |      rd      |
+ *       +----+----+----+----+
+ *
+ *   ----+----+----+----+----+
+ *    BE | 12 | 34 | 56 | 78 | 0x12345678
+ *   ----+----+----+----+----+
+ *       |      rd      | wr |
+ *       +----+----+----+----+
  */
 #define	_QW_WAITING	1		/* A writer is waiting	   */
 #define	_QW_LOCKED	0xff		/* A writer holds the lock */
@@ -134,12 +147,22 @@
 }
 
 /**
+ * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock
+ * @lock : Pointer to queue rwlock structure
+ * Return: the write byte address of a queue rwlock
+ */
+static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
+{
+	return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
+}
+
+/**
  * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
 static inline void queued_write_unlock(struct qrwlock *lock)
 {
-	smp_store_release((u8 *)&lock->cnts, 0);
+	smp_store_release(__qrwlock_write_byte(lock), 0);
 }
 
 /*
diff --git a/include/asm-generic/uaccess.h b/include/asm-generic/uaccess.h
index 1bfa602..6df9b07 100644
--- a/include/asm-generic/uaccess.h
+++ b/include/asm-generic/uaccess.h
@@ -72,6 +72,7 @@
 /* Returns 0 if exception not found and fixup otherwise.  */
 extern unsigned long search_exception_table(unsigned long);
 
+
 /*
  * architectures with an MMU should override these two
  */
@@ -230,14 +231,18 @@
 	might_fault();						\
 	access_ok(VERIFY_READ, __p, sizeof(*ptr)) ?		\
 		__get_user((x), (__typeof__(*(ptr)) *)__p) :	\
-		-EFAULT;					\
+		((x) = (__typeof__(*(ptr)))0,-EFAULT);		\
 })
 
 #ifndef __get_user_fn
 static inline int __get_user_fn(size_t size, const void __user *ptr, void *x)
 {
-	size = __copy_from_user(x, ptr, size);
-	return size ? -EFAULT : size;
+	size_t n = __copy_from_user(x, ptr, size);
+	if (unlikely(n)) {
+		memset(x + (size - n), 0, n);
+		return -EFAULT;
+	}
+	return 0;
 }
 
 #define __get_user_fn(sz, u, k)	__get_user_fn(sz, u, k)
@@ -257,11 +262,13 @@
 static inline long copy_from_user(void *to,
 		const void __user * from, unsigned long n)
 {
+	unsigned long res = n;
 	might_fault();
-	if (access_ok(VERIFY_READ, from, n))
-		return __copy_from_user(to, from, n);
-	else
-		return n;
+	if (likely(access_ok(VERIFY_READ, from, n)))
+		res = __copy_from_user(to, from, n);
+	if (unlikely(res))
+		memset(to + (n - res), 0, res);
+	return res;
 }
 
 static inline long copy_to_user(void __user *to,
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 4348d6d..99c6d01 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -962,6 +962,7 @@
  *
  * @bo: A pointer to a struct ttm_buffer_object.
  * @evict: 1: This is an eviction. Don't try to pipeline.
+ * @interruptible: Sleep interruptible if waiting.
  * @no_wait_gpu: Return immediately if the GPU is busy.
  * @new_mem: struct ttm_mem_reg indicating where to move.
  *
@@ -976,7 +977,7 @@
  */
 
 extern int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
-			   bool evict, bool no_wait_gpu,
+			   bool evict, bool interruptible, bool no_wait_gpu,
 			   struct ttm_mem_reg *new_mem);
 
 /**
diff --git a/include/dt-bindings/clock/exynos5410.h b/include/dt-bindings/clock/exynos5410.h
index 85b467b..6cb4e90 100644
--- a/include/dt-bindings/clock/exynos5410.h
+++ b/include/dt-bindings/clock/exynos5410.h
@@ -19,6 +19,7 @@
 #define CLK_FOUT_MPLL		4
 #define CLK_FOUT_BPLL		5
 #define CLK_FOUT_KPLL		6
+#define CLK_FOUT_EPLL		7
 
 /* gate for special clocks (sclk) */
 #define CLK_SCLK_UART0		128
@@ -55,6 +56,8 @@
 #define CLK_MMC0		351
 #define CLK_MMC1		352
 #define CLK_MMC2		353
+#define CLK_PDMA0		362
+#define CLK_PDMA1		363
 #define CLK_USBH20		365
 #define CLK_USBD300		366
 #define CLK_USBD301		367
diff --git a/include/dt-bindings/clock/exynos5420.h b/include/dt-bindings/clock/exynos5420.h
index 17ab839..6fd21c2 100644
--- a/include/dt-bindings/clock/exynos5420.h
+++ b/include/dt-bindings/clock/exynos5420.h
@@ -214,6 +214,9 @@
 #define CLK_MOUT_SW_ACLK400     651
 #define CLK_MOUT_USER_ACLK300_GSCL	652
 #define CLK_MOUT_SW_ACLK300_GSCL	653
+#define CLK_MOUT_MCLK_CDREX	654
+#define CLK_MOUT_BPLL		655
+#define CLK_MOUT_MX_MSPLL_CCORE	656
 
 /* divider clocks */
 #define CLK_DOUT_PIXEL		768
@@ -239,8 +242,14 @@
 #define CLK_DOUT_ACLK300_DISP1	788
 #define CLK_DOUT_ACLK300_GSCL	789
 #define CLK_DOUT_ACLK400_DISP1	790
+#define CLK_DOUT_PCLK_CDREX	791
+#define CLK_DOUT_SCLK_CDREX	792
+#define CLK_DOUT_ACLK_CDREX1	793
+#define CLK_DOUT_CCLK_DREX0	794
+#define CLK_DOUT_CLK2X_PHY0	795
+#define CLK_DOUT_PCLK_CORE_MEM	796
 
 /* must be greater than maximal clock id */
-#define CLK_NR_CLKS		791
+#define CLK_NR_CLKS		797
 
 #endif /* _DT_BINDINGS_CLOCK_EXYNOS_5420_H */
diff --git a/include/dt-bindings/clock/exynos5440.h b/include/dt-bindings/clock/exynos5440.h
index c66fc40..842cdc0 100644
--- a/include/dt-bindings/clock/exynos5440.h
+++ b/include/dt-bindings/clock/exynos5440.h
@@ -14,6 +14,8 @@
 
 #define CLK_XTAL		1
 #define CLK_ARM_CLK		2
+#define CLK_CPLLA		3
+#define CLK_CPLLB		4
 #define CLK_SPI_BAUD		16
 #define CLK_PB0_250		17
 #define CLK_PR0_250		18
diff --git a/include/dt-bindings/clock/gxbb-aoclkc.h b/include/dt-bindings/clock/gxbb-aoclkc.h
new file mode 100644
index 0000000..3175148
--- /dev/null
+++ b/include/dt-bindings/clock/gxbb-aoclkc.h
@@ -0,0 +1,66 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (c) 2016 BayLibre, SAS.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016 BayLibre, SAS.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DT_BINDINGS_CLOCK_AMLOGIC_MESON_GXBB_AOCLK
+#define DT_BINDINGS_CLOCK_AMLOGIC_MESON_GXBB_AOCLK
+
+#define CLKID_AO_REMOTE		0
+#define CLKID_AO_I2C_MASTER	1
+#define CLKID_AO_I2C_SLAVE	2
+#define CLKID_AO_UART1		3
+#define CLKID_AO_UART2		4
+#define CLKID_AO_IR_BLASTER	5
+
+#endif
diff --git a/include/dt-bindings/clock/gxbb-clkc.h b/include/dt-bindings/clock/gxbb-clkc.h
index f889d80..ce4ad63 100644
--- a/include/dt-bindings/clock/gxbb-clkc.h
+++ b/include/dt-bindings/clock/gxbb-clkc.h
@@ -6,7 +6,14 @@
 #define __GXBB_CLKC_H
 
 #define CLKID_CPUCLK		1
+#define CLKID_HDMI_PLL		2
+#define CLKID_FCLK_DIV2		4
+#define CLKID_FCLK_DIV3		5
+#define CLKID_FCLK_DIV4		6
 #define CLKID_CLK81		12
 #define CLKID_ETH		36
+#define CLKID_SD_EMMC_A		94
+#define CLKID_SD_EMMC_B		95
+#define CLKID_SD_EMMC_C		96
 
 #endif /* __GXBB_CLKC_H */
diff --git a/include/dt-bindings/clock/imx5-clock.h b/include/dt-bindings/clock/imx5-clock.h
index f4b7478..d382fc7 100644
--- a/include/dt-bindings/clock/imx5-clock.h
+++ b/include/dt-bindings/clock/imx5-clock.h
@@ -201,6 +201,19 @@
 #define IMX5_CLK_STEP_SEL		189
 #define IMX5_CLK_CPU_PODF_SEL		190
 #define IMX5_CLK_ARM			191
-#define IMX5_CLK_END			192
+#define IMX5_CLK_FIRI_PRED		192
+#define IMX5_CLK_FIRI_SEL		193
+#define IMX5_CLK_FIRI_PODF		194
+#define IMX5_CLK_FIRI_SERIAL_GATE	195
+#define IMX5_CLK_FIRI_IPG_GATE		196
+#define IMX5_CLK_CSI0_MCLK1_PRED	197
+#define IMX5_CLK_CSI0_MCLK1_SEL		198
+#define IMX5_CLK_CSI0_MCLK1_PODF	199
+#define IMX5_CLK_CSI0_MCLK1_GATE	200
+#define IMX5_CLK_IEEE1588_PRED		201
+#define IMX5_CLK_IEEE1588_SEL		202
+#define IMX5_CLK_IEEE1588_PODF		203
+#define IMX5_CLK_IEEE1588_GATE		204
+#define IMX5_CLK_END			205
 
 #endif /* __DT_BINDINGS_CLOCK_IMX5_H */
diff --git a/include/dt-bindings/clock/imx6qdl-clock.h b/include/dt-bindings/clock/imx6qdl-clock.h
index 2905033..da59fd9 100644
--- a/include/dt-bindings/clock/imx6qdl-clock.h
+++ b/include/dt-bindings/clock/imx6qdl-clock.h
@@ -269,6 +269,8 @@
 #define IMX6QDL_CLK_PRG0_APB			256
 #define IMX6QDL_CLK_PRG1_APB			257
 #define IMX6QDL_CLK_PRE_AXI			258
-#define IMX6QDL_CLK_END				259
+#define IMX6QDL_CLK_MLB_SEL			259
+#define IMX6QDL_CLK_MLB_PODF			260
+#define IMX6QDL_CLK_END				261
 
 #endif /* __DT_BINDINGS_CLOCK_IMX6QDL_H */
diff --git a/include/dt-bindings/clock/maxim,max77620.h b/include/dt-bindings/clock/maxim,max77620.h
new file mode 100644
index 0000000..82aba28
--- /dev/null
+++ b/include/dt-bindings/clock/maxim,max77620.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2016 NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Device Tree binding constants clocks for the Maxim 77620 PMIC.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_MAXIM_MAX77620_CLOCK_H
+#define _DT_BINDINGS_CLOCK_MAXIM_MAX77620_CLOCK_H
+
+/* Fixed rate clocks. */
+
+#define MAX77620_CLK_32K_OUT0		0
+
+/* Total number of clocks. */
+#define MAX77620_CLKS_NUM		(MAX77620_CLK_32K_OUT0 + 1)
+
+#endif /* _DT_BINDINGS_CLOCK_MAXIM_MAX77620_CLOCK_H */
diff --git a/include/dt-bindings/clock/meson8b-clkc.h b/include/dt-bindings/clock/meson8b-clkc.h
index 595a58d..a55ff8c 100644
--- a/include/dt-bindings/clock/meson8b-clkc.h
+++ b/include/dt-bindings/clock/meson8b-clkc.h
@@ -22,6 +22,4 @@
 #define CLKID_MPEG_SEL		14
 #define CLKID_MPEG_DIV		15
 
-#define CLK_NR_CLKS		(CLKID_MPEG_DIV + 1)
-
 #endif /* __MESON8B_CLKC_H */
diff --git a/include/dt-bindings/clock/mt2701-clk.h b/include/dt-bindings/clock/mt2701-clk.h
new file mode 100644
index 0000000..2062c67
--- /dev/null
+++ b/include/dt-bindings/clock/mt2701-clk.h
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2014 MediaTek Inc.
+ * Author: Shunli Wang <shunli.wang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_MT2701_H
+#define _DT_BINDINGS_CLK_MT2701_H
+
+/* TOPCKGEN */
+#define CLK_TOP_SYSPLL				1
+#define CLK_TOP_SYSPLL_D2			2
+#define CLK_TOP_SYSPLL_D3			3
+#define CLK_TOP_SYSPLL_D5			4
+#define CLK_TOP_SYSPLL_D7			5
+#define CLK_TOP_SYSPLL1_D2			6
+#define CLK_TOP_SYSPLL1_D4			7
+#define CLK_TOP_SYSPLL1_D8			8
+#define CLK_TOP_SYSPLL1_D16			9
+#define CLK_TOP_SYSPLL2_D2			10
+#define CLK_TOP_SYSPLL2_D4			11
+#define CLK_TOP_SYSPLL2_D8			12
+#define CLK_TOP_SYSPLL3_D2			13
+#define CLK_TOP_SYSPLL3_D4			14
+#define CLK_TOP_SYSPLL4_D2			15
+#define CLK_TOP_SYSPLL4_D4			16
+#define CLK_TOP_UNIVPLL				17
+#define CLK_TOP_UNIVPLL_D2			18
+#define CLK_TOP_UNIVPLL_D3			19
+#define CLK_TOP_UNIVPLL_D5			20
+#define CLK_TOP_UNIVPLL_D7			21
+#define CLK_TOP_UNIVPLL_D26			22
+#define CLK_TOP_UNIVPLL_D52			23
+#define CLK_TOP_UNIVPLL_D108			24
+#define CLK_TOP_USB_PHY48M			25
+#define CLK_TOP_UNIVPLL1_D2			26
+#define CLK_TOP_UNIVPLL1_D4			27
+#define CLK_TOP_UNIVPLL1_D8			28
+#define CLK_TOP_UNIVPLL2_D2			29
+#define CLK_TOP_UNIVPLL2_D4			30
+#define CLK_TOP_UNIVPLL2_D8			31
+#define CLK_TOP_UNIVPLL2_D16			32
+#define CLK_TOP_UNIVPLL2_D32			33
+#define CLK_TOP_UNIVPLL3_D2			34
+#define CLK_TOP_UNIVPLL3_D4			35
+#define CLK_TOP_UNIVPLL3_D8			36
+#define CLK_TOP_MSDCPLL				37
+#define CLK_TOP_MSDCPLL_D2			38
+#define CLK_TOP_MSDCPLL_D4			39
+#define CLK_TOP_MSDCPLL_D8			40
+#define CLK_TOP_MMPLL				41
+#define CLK_TOP_MMPLL_D2			42
+#define CLK_TOP_DMPLL				43
+#define CLK_TOP_DMPLL_D2			44
+#define CLK_TOP_DMPLL_D4			45
+#define CLK_TOP_DMPLL_X2			46
+#define CLK_TOP_TVDPLL				47
+#define CLK_TOP_TVDPLL_D2			48
+#define CLK_TOP_TVDPLL_D4			49
+#define CLK_TOP_TVD2PLL				50
+#define CLK_TOP_TVD2PLL_D2			51
+#define CLK_TOP_HADDS2PLL_98M			52
+#define CLK_TOP_HADDS2PLL_294M			53
+#define CLK_TOP_HADDS2_FB			54
+#define CLK_TOP_MIPIPLL_D2			55
+#define CLK_TOP_MIPIPLL_D4			56
+#define CLK_TOP_HDMIPLL				57
+#define CLK_TOP_HDMIPLL_D2			58
+#define CLK_TOP_HDMIPLL_D3			59
+#define CLK_TOP_HDMI_SCL_RX			60
+#define CLK_TOP_HDMI_0_PIX340M			61
+#define CLK_TOP_HDMI_0_DEEP340M			62
+#define CLK_TOP_HDMI_0_PLL340M			63
+#define CLK_TOP_AUD1PLL_98M			64
+#define CLK_TOP_AUD2PLL_90M			65
+#define CLK_TOP_AUDPLL				66
+#define CLK_TOP_AUDPLL_D4			67
+#define CLK_TOP_AUDPLL_D8			68
+#define CLK_TOP_AUDPLL_D16			69
+#define CLK_TOP_AUDPLL_D24			70
+#define CLK_TOP_ETHPLL_500M			71
+#define CLK_TOP_VDECPLL				72
+#define CLK_TOP_VENCPLL				73
+#define CLK_TOP_MIPIPLL				74
+#define CLK_TOP_ARMPLL_1P3G			75
+
+#define CLK_TOP_MM_SEL				76
+#define CLK_TOP_DDRPHYCFG_SEL			77
+#define CLK_TOP_MEM_SEL				78
+#define CLK_TOP_AXI_SEL				79
+#define CLK_TOP_CAMTG_SEL			80
+#define CLK_TOP_MFG_SEL				81
+#define CLK_TOP_VDEC_SEL			82
+#define CLK_TOP_PWM_SEL				83
+#define CLK_TOP_MSDC30_0_SEL			84
+#define CLK_TOP_USB20_SEL			85
+#define CLK_TOP_SPI0_SEL			86
+#define CLK_TOP_UART_SEL			87
+#define CLK_TOP_AUDINTBUS_SEL			88
+#define CLK_TOP_AUDIO_SEL			89
+#define CLK_TOP_MSDC30_2_SEL			90
+#define CLK_TOP_MSDC30_1_SEL			91
+#define CLK_TOP_DPI1_SEL			92
+#define CLK_TOP_DPI0_SEL			93
+#define CLK_TOP_SCP_SEL				94
+#define CLK_TOP_PMICSPI_SEL			95
+#define CLK_TOP_APLL_SEL			96
+#define CLK_TOP_HDMI_SEL			97
+#define CLK_TOP_TVE_SEL				98
+#define CLK_TOP_EMMC_HCLK_SEL			99
+#define CLK_TOP_NFI2X_SEL			100
+#define CLK_TOP_RTC_SEL				101
+#define CLK_TOP_OSD_SEL				102
+#define CLK_TOP_NR_SEL				103
+#define CLK_TOP_DI_SEL				104
+#define CLK_TOP_FLASH_SEL			105
+#define CLK_TOP_ASM_M_SEL			106
+#define CLK_TOP_ASM_I_SEL			107
+#define CLK_TOP_INTDIR_SEL			108
+#define CLK_TOP_HDMIRX_BIST_SEL			109
+#define CLK_TOP_ETHIF_SEL			110
+#define CLK_TOP_MS_CARD_SEL			111
+#define CLK_TOP_ASM_H_SEL			112
+#define CLK_TOP_SPI1_SEL			113
+#define CLK_TOP_CMSYS_SEL			114
+#define CLK_TOP_MSDC30_3_SEL			115
+#define CLK_TOP_HDMIRX26_24_SEL			116
+#define CLK_TOP_AUD2DVD_SEL			117
+#define CLK_TOP_8BDAC_SEL			118
+#define CLK_TOP_SPI2_SEL			119
+#define CLK_TOP_AUD_MUX1_SEL			120
+#define CLK_TOP_AUD_MUX2_SEL			121
+#define CLK_TOP_AUDPLL_MUX_SEL			122
+#define CLK_TOP_AUD_K1_SRC_SEL			123
+#define CLK_TOP_AUD_K2_SRC_SEL			124
+#define CLK_TOP_AUD_K3_SRC_SEL			125
+#define CLK_TOP_AUD_K4_SRC_SEL			126
+#define CLK_TOP_AUD_K5_SRC_SEL			127
+#define CLK_TOP_AUD_K6_SRC_SEL			128
+#define CLK_TOP_PADMCLK_SEL			129
+#define CLK_TOP_AUD_EXTCK1_DIV			130
+#define CLK_TOP_AUD_EXTCK2_DIV			131
+#define CLK_TOP_AUD_MUX1_DIV			132
+#define CLK_TOP_AUD_MUX2_DIV			133
+#define CLK_TOP_AUD_K1_SRC_DIV			134
+#define CLK_TOP_AUD_K2_SRC_DIV			135
+#define CLK_TOP_AUD_K3_SRC_DIV			136
+#define CLK_TOP_AUD_K4_SRC_DIV			137
+#define CLK_TOP_AUD_K5_SRC_DIV			138
+#define CLK_TOP_AUD_K6_SRC_DIV			139
+#define CLK_TOP_AUD_I2S1_MCLK			140
+#define CLK_TOP_AUD_I2S2_MCLK			141
+#define CLK_TOP_AUD_I2S3_MCLK			142
+#define CLK_TOP_AUD_I2S4_MCLK			143
+#define CLK_TOP_AUD_I2S5_MCLK			144
+#define CLK_TOP_AUD_I2S6_MCLK			145
+#define CLK_TOP_AUD_48K_TIMING			146
+#define CLK_TOP_AUD_44K_TIMING			147
+
+#define CLK_TOP_32K_INTERNAL			148
+#define CLK_TOP_32K_EXTERNAL			149
+#define CLK_TOP_CLK26M_D8			150
+#define CLK_TOP_8BDAC				151
+#define CLK_TOP_WBG_DIG_416M			152
+#define CLK_TOP_DPI				153
+#define CLK_TOP_HDMITX_CLKDIG_CTS		154
+#define CLK_TOP_DSI0_LNTC_DSI			155
+#define CLK_TOP_AUD_EXT1			156
+#define CLK_TOP_AUD_EXT2			157
+#define CLK_TOP_NFI1X_PAD			158
+#define CLK_TOP_NR				159
+
+/* APMIXEDSYS */
+
+#define CLK_APMIXED_ARMPLL			1
+#define CLK_APMIXED_MAINPLL			2
+#define CLK_APMIXED_UNIVPLL			3
+#define CLK_APMIXED_MMPLL			4
+#define CLK_APMIXED_MSDCPLL			5
+#define CLK_APMIXED_TVDPLL			6
+#define CLK_APMIXED_AUD1PLL			7
+#define CLK_APMIXED_TRGPLL			8
+#define CLK_APMIXED_ETHPLL			9
+#define CLK_APMIXED_VDECPLL			10
+#define CLK_APMIXED_HADDS2PLL			11
+#define CLK_APMIXED_AUD2PLL			12
+#define CLK_APMIXED_TVD2PLL			13
+#define CLK_APMIXED_NR				14
+
+/* DDRPHY */
+
+#define CLK_DDRPHY_VENCPLL			1
+#define CLK_DDRPHY_NR				2
+
+/* INFRACFG */
+
+#define CLK_INFRA_DBG				1
+#define CLK_INFRA_SMI				2
+#define CLK_INFRA_QAXI_CM4			3
+#define CLK_INFRA_AUD_SPLIN_B			4
+#define CLK_INFRA_AUDIO				5
+#define CLK_INFRA_EFUSE				6
+#define CLK_INFRA_L2C_SRAM			7
+#define CLK_INFRA_M4U				8
+#define CLK_INFRA_CONNMCU			9
+#define CLK_INFRA_TRNG				10
+#define CLK_INFRA_RAMBUFIF			11
+#define CLK_INFRA_CPUM				12
+#define CLK_INFRA_KP				13
+#define CLK_INFRA_CEC				14
+#define CLK_INFRA_IRRX				15
+#define CLK_INFRA_PMICSPI			16
+#define CLK_INFRA_PMICWRAP			17
+#define CLK_INFRA_DDCCI				18
+#define CLK_INFRA_CLK_13M			19
+#define CLK_INFRA_NR				20
+
+/* PERICFG */
+
+#define CLK_PERI_NFI				1
+#define CLK_PERI_THERM				2
+#define CLK_PERI_PWM1				3
+#define CLK_PERI_PWM2				4
+#define CLK_PERI_PWM3				5
+#define CLK_PERI_PWM4				6
+#define CLK_PERI_PWM5				7
+#define CLK_PERI_PWM6				8
+#define CLK_PERI_PWM7				9
+#define CLK_PERI_PWM				10
+#define CLK_PERI_USB0				11
+#define CLK_PERI_USB1				12
+#define CLK_PERI_AP_DMA				13
+#define CLK_PERI_MSDC30_0			14
+#define CLK_PERI_MSDC30_1			15
+#define CLK_PERI_MSDC30_2			16
+#define CLK_PERI_MSDC30_3			17
+#define CLK_PERI_MSDC50_3			18
+#define CLK_PERI_NLI				19
+#define CLK_PERI_UART0				20
+#define CLK_PERI_UART1				21
+#define CLK_PERI_UART2				22
+#define CLK_PERI_UART3				23
+#define CLK_PERI_BTIF				24
+#define CLK_PERI_I2C0				25
+#define CLK_PERI_I2C1				26
+#define CLK_PERI_I2C2				27
+#define CLK_PERI_I2C3				28
+#define CLK_PERI_AUXADC				29
+#define CLK_PERI_SPI0				30
+#define CLK_PERI_ETH				31
+#define CLK_PERI_USB0_MCU			32
+
+#define CLK_PERI_USB1_MCU			33
+#define CLK_PERI_USB_SLV			34
+#define CLK_PERI_GCPU				35
+#define CLK_PERI_NFI_ECC			36
+#define CLK_PERI_NFI_PAD			37
+#define CLK_PERI_FLASH				38
+#define CLK_PERI_HOST89_INT			39
+#define CLK_PERI_HOST89_SPI			40
+#define CLK_PERI_HOST89_DVD			41
+#define CLK_PERI_SPI1				42
+#define CLK_PERI_SPI2				43
+#define CLK_PERI_FCI				44
+
+#define CLK_PERI_UART0_SEL			45
+#define CLK_PERI_UART1_SEL			46
+#define CLK_PERI_UART2_SEL			47
+#define CLK_PERI_UART3_SEL			48
+#define CLK_PERI_NR				49
+
+/* AUDIO */
+
+#define CLK_AUD_AFE				1
+#define CLK_AUD_LRCK_DETECT			2
+#define CLK_AUD_I2S				3
+#define CLK_AUD_APLL_TUNER			4
+#define CLK_AUD_HDMI				5
+#define CLK_AUD_SPDF				6
+#define CLK_AUD_SPDF2				7
+#define CLK_AUD_APLL				8
+#define CLK_AUD_TML				9
+#define CLK_AUD_AHB_IDLE_EXT			10
+#define CLK_AUD_AHB_IDLE_INT			11
+
+#define CLK_AUD_I2SIN1				12
+#define CLK_AUD_I2SIN2				13
+#define CLK_AUD_I2SIN3				14
+#define CLK_AUD_I2SIN4				15
+#define CLK_AUD_I2SIN5				16
+#define CLK_AUD_I2SIN6				17
+#define CLK_AUD_I2SO1				18
+#define CLK_AUD_I2SO2				19
+#define CLK_AUD_I2SO3				20
+#define CLK_AUD_I2SO4				21
+#define CLK_AUD_I2SO5				22
+#define CLK_AUD_I2SO6				23
+#define CLK_AUD_ASRCI1				24
+#define CLK_AUD_ASRCI2				25
+#define CLK_AUD_ASRCO1				26
+#define CLK_AUD_ASRCO2				27
+#define CLK_AUD_ASRC11				28
+#define CLK_AUD_ASRC12				29
+#define CLK_AUD_HDMIRX				30
+#define CLK_AUD_INTDIR				31
+#define CLK_AUD_A1SYS				32
+#define CLK_AUD_A2SYS				33
+#define CLK_AUD_AFE_CONN			34
+#define CLK_AUD_AFE_PCMIF			35
+#define CLK_AUD_AFE_MRGIF			36
+
+#define CLK_AUD_MMIF_UL1			37
+#define CLK_AUD_MMIF_UL2			38
+#define CLK_AUD_MMIF_UL3			39
+#define CLK_AUD_MMIF_UL4			40
+#define CLK_AUD_MMIF_UL5			41
+#define CLK_AUD_MMIF_UL6			42
+#define CLK_AUD_MMIF_DL1			43
+#define CLK_AUD_MMIF_DL2			44
+#define CLK_AUD_MMIF_DL3			45
+#define CLK_AUD_MMIF_DL4			46
+#define CLK_AUD_MMIF_DL5			47
+#define CLK_AUD_MMIF_DL6			48
+#define CLK_AUD_MMIF_DLMCH			49
+#define CLK_AUD_MMIF_ARB1			50
+#define CLK_AUD_MMIF_AWB1			51
+#define CLK_AUD_MMIF_AWB2			52
+#define CLK_AUD_MMIF_DAI			53
+
+#define CLK_AUD_DMIC1				54
+#define CLK_AUD_DMIC2				55
+#define CLK_AUD_ASRCI3				56
+#define CLK_AUD_ASRCI4				57
+#define CLK_AUD_ASRCI5				58
+#define CLK_AUD_ASRCI6				59
+#define CLK_AUD_ASRCO3				60
+#define CLK_AUD_ASRCO4				61
+#define CLK_AUD_ASRCO5				62
+#define CLK_AUD_ASRCO6				63
+#define CLK_AUD_MEM_ASRC1			64
+#define CLK_AUD_MEM_ASRC2			65
+#define CLK_AUD_MEM_ASRC3			66
+#define CLK_AUD_MEM_ASRC4			67
+#define CLK_AUD_MEM_ASRC5			68
+#define CLK_AUD_DSD_ENC				69
+#define CLK_AUD_ASRC_BRG			70
+#define CLK_AUD_NR				71
+
+/* MMSYS */
+
+#define CLK_MM_SMI_COMMON			1
+#define CLK_MM_SMI_LARB0			2
+#define CLK_MM_CMDQ				3
+#define CLK_MM_MUTEX				4
+#define CLK_MM_DISP_COLOR			5
+#define CLK_MM_DISP_BLS				6
+#define CLK_MM_DISP_WDMA			7
+#define CLK_MM_DISP_RDMA			8
+#define CLK_MM_DISP_OVL				9
+#define CLK_MM_MDP_TDSHP			10
+#define CLK_MM_MDP_WROT				11
+#define CLK_MM_MDP_WDMA				12
+#define CLK_MM_MDP_RSZ1				13
+#define CLK_MM_MDP_RSZ0				14
+#define CLK_MM_MDP_RDMA				15
+#define CLK_MM_MDP_BLS_26M			16
+#define CLK_MM_CAM_MDP				17
+#define CLK_MM_FAKE_ENG				18
+#define CLK_MM_MUTEX_32K			19
+#define CLK_MM_DISP_RDMA1			20
+#define CLK_MM_DISP_UFOE			21
+
+#define CLK_MM_DSI_ENGINE			22
+#define CLK_MM_DSI_DIG				23
+#define CLK_MM_DPI_DIGL				24
+#define CLK_MM_DPI_ENGINE			25
+#define CLK_MM_DPI1_DIGL			26
+#define CLK_MM_DPI1_ENGINE			27
+#define CLK_MM_TVE_OUTPUT			28
+#define CLK_MM_TVE_INPUT			29
+#define CLK_MM_HDMI_PIXEL			30
+#define CLK_MM_HDMI_PLL				31
+#define CLK_MM_HDMI_AUDIO			32
+#define CLK_MM_HDMI_SPDIF			33
+#define CLK_MM_TVE_FMM				34
+#define CLK_MM_NR				35
+
+/* IMGSYS */
+
+#define CLK_IMG_SMI_COMM			1
+#define CLK_IMG_RESZ				2
+#define CLK_IMG_JPGDEC_SMI			3
+#define CLK_IMG_JPGDEC				4
+#define CLK_IMG_VENC_LT				5
+#define CLK_IMG_VENC				6
+#define CLK_IMG_NR				7
+
+/* VDEC */
+
+#define CLK_VDEC_CKGEN				1
+#define CLK_VDEC_LARB				2
+#define CLK_VDEC_NR				3
+
+/* HIFSYS */
+
+#define CLK_HIFSYS_USB0PHY			1
+#define CLK_HIFSYS_USB1PHY			2
+#define CLK_HIFSYS_PCIE0			3
+#define CLK_HIFSYS_PCIE1			4
+#define CLK_HIFSYS_PCIE2			5
+#define CLK_HIFSYS_NR				6
+
+/* ETHSYS */
+#define CLK_ETHSYS_HSDMA			1
+#define CLK_ETHSYS_ESW				2
+#define CLK_ETHSYS_GP2				3
+#define CLK_ETHSYS_GP1				4
+#define CLK_ETHSYS_PCM				5
+#define CLK_ETHSYS_GDMA				6
+#define CLK_ETHSYS_I2S				7
+#define CLK_ETHSYS_CRYPTO			8
+#define CLK_ETHSYS_NR				9
+
+/* BDP */
+
+#define CLK_BDP_BRG_BA				1
+#define CLK_BDP_BRG_DRAM			2
+#define CLK_BDP_LARB_DRAM			3
+#define CLK_BDP_WR_VDI_PXL			4
+#define CLK_BDP_WR_VDI_DRAM			5
+#define CLK_BDP_WR_B				6
+#define CLK_BDP_DGI_IN				7
+#define CLK_BDP_DGI_OUT				8
+#define CLK_BDP_FMT_MAST_27			9
+#define CLK_BDP_FMT_B				10
+#define CLK_BDP_OSD_B				11
+#define CLK_BDP_OSD_DRAM			12
+#define CLK_BDP_OSD_AGENT			13
+#define CLK_BDP_OSD_PXL				14
+#define CLK_BDP_RLE_B				15
+#define CLK_BDP_RLE_AGENT			16
+#define CLK_BDP_RLE_DRAM			17
+#define CLK_BDP_F27M				18
+#define CLK_BDP_F27M_VDOUT			19
+#define CLK_BDP_F27_74_74			20
+#define CLK_BDP_F2FS				21
+#define CLK_BDP_F2FS74_148			22
+#define CLK_BDP_FB				23
+#define CLK_BDP_VDO_DRAM			24
+#define CLK_BDP_VDO_2FS				25
+#define CLK_BDP_VDO_B				26
+#define CLK_BDP_WR_DI_PXL			27
+#define CLK_BDP_WR_DI_DRAM			28
+#define CLK_BDP_WR_DI_B				29
+#define CLK_BDP_NR_PXL				30
+#define CLK_BDP_NR_DRAM				31
+#define CLK_BDP_NR_B				32
+
+#define CLK_BDP_RX_F				33
+#define CLK_BDP_RX_X				34
+#define CLK_BDP_RXPDT				35
+#define CLK_BDP_RX_CSCL_N			36
+#define CLK_BDP_RX_CSCL				37
+#define CLK_BDP_RX_DDCSCL_N			38
+#define CLK_BDP_RX_DDCSCL			39
+#define CLK_BDP_RX_VCO				40
+#define CLK_BDP_RX_DP				41
+#define CLK_BDP_RX_P				42
+#define CLK_BDP_RX_M				43
+#define CLK_BDP_RX_PLL				44
+#define CLK_BDP_BRG_RT_B			45
+#define CLK_BDP_BRG_RT_DRAM			46
+#define CLK_BDP_LARBRT_DRAM			47
+#define CLK_BDP_TMDS_SYN			48
+#define CLK_BDP_HDMI_MON			49
+#define CLK_BDP_NR				50
+
+#endif /* _DT_BINDINGS_CLK_MT2701_H */
diff --git a/include/dt-bindings/clock/qcom,gcc-mdm9615.h b/include/dt-bindings/clock/qcom,gcc-mdm9615.h
new file mode 100644
index 0000000..9ab2c40
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,gcc-mdm9615.h
@@ -0,0 +1,327 @@
+/*
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) BayLibre, SAS.
+ * Author : Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_MDM_GCC_9615_H
+#define _DT_BINDINGS_CLK_MDM_GCC_9615_H
+
+#define AFAB_CLK_SRC				0
+#define AFAB_CORE_CLK				1
+#define SFAB_MSS_Q6_SW_A_CLK			2
+#define SFAB_MSS_Q6_FW_A_CLK			3
+#define QDSS_STM_CLK				4
+#define SCSS_A_CLK				5
+#define SCSS_H_CLK				6
+#define SCSS_XO_SRC_CLK				7
+#define AFAB_EBI1_CH0_A_CLK			8
+#define AFAB_EBI1_CH1_A_CLK			9
+#define AFAB_AXI_S0_FCLK			10
+#define AFAB_AXI_S1_FCLK			11
+#define AFAB_AXI_S2_FCLK			12
+#define AFAB_AXI_S3_FCLK			13
+#define AFAB_AXI_S4_FCLK			14
+#define SFAB_CORE_CLK				15
+#define SFAB_AXI_S0_FCLK			16
+#define SFAB_AXI_S1_FCLK			17
+#define SFAB_AXI_S2_FCLK			18
+#define SFAB_AXI_S3_FCLK			19
+#define SFAB_AXI_S4_FCLK			20
+#define SFAB_AHB_S0_FCLK			21
+#define SFAB_AHB_S1_FCLK			22
+#define SFAB_AHB_S2_FCLK			23
+#define SFAB_AHB_S3_FCLK			24
+#define SFAB_AHB_S4_FCLK			25
+#define SFAB_AHB_S5_FCLK			26
+#define SFAB_AHB_S6_FCLK			27
+#define SFAB_AHB_S7_FCLK			28
+#define QDSS_AT_CLK_SRC				29
+#define QDSS_AT_CLK				30
+#define QDSS_TRACECLKIN_CLK_SRC			31
+#define QDSS_TRACECLKIN_CLK			32
+#define QDSS_TSCTR_CLK_SRC			33
+#define QDSS_TSCTR_CLK				34
+#define SFAB_ADM0_M0_A_CLK			35
+#define SFAB_ADM0_M1_A_CLK			36
+#define SFAB_ADM0_M2_H_CLK			37
+#define ADM0_CLK				38
+#define ADM0_PBUS_CLK				39
+#define MSS_XPU_CLK				40
+#define IMEM0_A_CLK				41
+#define QDSS_H_CLK				42
+#define PCIE_A_CLK				43
+#define PCIE_AUX_CLK				44
+#define PCIE_PHY_REF_CLK			45
+#define PCIE_H_CLK				46
+#define SFAB_CLK_SRC				47
+#define MAHB0_CLK				48
+#define Q6SW_CLK_SRC				49
+#define Q6SW_CLK				50
+#define Q6FW_CLK_SRC				51
+#define Q6FW_CLK				52
+#define SFAB_MSS_M_A_CLK			53
+#define SFAB_USB3_M_A_CLK			54
+#define SFAB_LPASS_Q6_A_CLK			55
+#define SFAB_AFAB_M_A_CLK			56
+#define AFAB_SFAB_M0_A_CLK			57
+#define AFAB_SFAB_M1_A_CLK			58
+#define SFAB_SATA_S_H_CLK			59
+#define DFAB_CLK_SRC				60
+#define DFAB_CLK				61
+#define SFAB_DFAB_M_A_CLK			62
+#define DFAB_SFAB_M_A_CLK			63
+#define DFAB_SWAY0_H_CLK			64
+#define DFAB_SWAY1_H_CLK			65
+#define DFAB_ARB0_H_CLK				66
+#define DFAB_ARB1_H_CLK				67
+#define PPSS_H_CLK				68
+#define PPSS_PROC_CLK				69
+#define PPSS_TIMER0_CLK				70
+#define PPSS_TIMER1_CLK				71
+#define PMEM_A_CLK				72
+#define DMA_BAM_H_CLK				73
+#define SIC_H_CLK				74
+#define SPS_TIC_H_CLK				75
+#define SLIMBUS_H_CLK				76
+#define SLIMBUS_XO_SRC_CLK			77
+#define CFPB_2X_CLK_SRC				78
+#define CFPB_CLK				79
+#define CFPB0_H_CLK				80
+#define CFPB1_H_CLK				81
+#define CFPB2_H_CLK				82
+#define SFAB_CFPB_M_H_CLK			83
+#define CFPB_MASTER_H_CLK			84
+#define SFAB_CFPB_S_H_CLK			85
+#define CFPB_SPLITTER_H_CLK			86
+#define TSIF_H_CLK				87
+#define TSIF_INACTIVITY_TIMERS_CLK		88
+#define TSIF_REF_SRC				89
+#define TSIF_REF_CLK				90
+#define CE1_H_CLK				91
+#define CE1_CORE_CLK				92
+#define CE1_SLEEP_CLK				93
+#define CE2_H_CLK				94
+#define CE2_CORE_CLK				95
+#define SFPB_H_CLK_SRC				97
+#define SFPB_H_CLK				98
+#define SFAB_SFPB_M_H_CLK			99
+#define SFAB_SFPB_S_H_CLK			100
+#define RPM_PROC_CLK				101
+#define RPM_BUS_H_CLK				102
+#define RPM_SLEEP_CLK				103
+#define RPM_TIMER_CLK				104
+#define RPM_MSG_RAM_H_CLK			105
+#define PMIC_ARB0_H_CLK				106
+#define PMIC_ARB1_H_CLK				107
+#define PMIC_SSBI2_SRC				108
+#define PMIC_SSBI2_CLK				109
+#define SDC1_H_CLK				110
+#define SDC2_H_CLK				111
+#define SDC3_H_CLK				112
+#define SDC4_H_CLK				113
+#define SDC5_H_CLK				114
+#define SDC1_SRC				115
+#define SDC2_SRC				116
+#define SDC3_SRC				117
+#define SDC4_SRC				118
+#define SDC5_SRC				119
+#define SDC1_CLK				120
+#define SDC2_CLK				121
+#define SDC3_CLK				122
+#define SDC4_CLK				123
+#define SDC5_CLK				124
+#define DFAB_A2_H_CLK				125
+#define USB_HS1_H_CLK				126
+#define USB_HS1_XCVR_SRC			127
+#define USB_HS1_XCVR_CLK			128
+#define USB_HSIC_H_CLK				129
+#define USB_HSIC_XCVR_FS_SRC			130
+#define USB_HSIC_XCVR_FS_CLK			131
+#define USB_HSIC_SYSTEM_CLK_SRC			132
+#define USB_HSIC_SYSTEM_CLK			133
+#define CFPB0_C0_H_CLK				134
+#define CFPB0_C1_H_CLK				135
+#define CFPB0_D0_H_CLK				136
+#define CFPB0_D1_H_CLK				137
+#define USB_FS1_H_CLK				138
+#define USB_FS1_XCVR_FS_SRC			139
+#define USB_FS1_XCVR_FS_CLK			140
+#define USB_FS1_SYSTEM_CLK			141
+#define USB_FS2_H_CLK				142
+#define USB_FS2_XCVR_FS_SRC			143
+#define USB_FS2_XCVR_FS_CLK			144
+#define USB_FS2_SYSTEM_CLK			145
+#define GSBI_COMMON_SIM_SRC			146
+#define GSBI1_H_CLK				147
+#define GSBI2_H_CLK				148
+#define GSBI3_H_CLK				149
+#define GSBI4_H_CLK				150
+#define GSBI5_H_CLK				151
+#define GSBI6_H_CLK				152
+#define GSBI7_H_CLK				153
+#define GSBI8_H_CLK				154
+#define GSBI9_H_CLK				155
+#define GSBI10_H_CLK				156
+#define GSBI11_H_CLK				157
+#define GSBI12_H_CLK				158
+#define GSBI1_UART_SRC				159
+#define GSBI1_UART_CLK				160
+#define GSBI2_UART_SRC				161
+#define GSBI2_UART_CLK				162
+#define GSBI3_UART_SRC				163
+#define GSBI3_UART_CLK				164
+#define GSBI4_UART_SRC				165
+#define GSBI4_UART_CLK				166
+#define GSBI5_UART_SRC				167
+#define GSBI5_UART_CLK				168
+#define GSBI6_UART_SRC				169
+#define GSBI6_UART_CLK				170
+#define GSBI7_UART_SRC				171
+#define GSBI7_UART_CLK				172
+#define GSBI8_UART_SRC				173
+#define GSBI8_UART_CLK				174
+#define GSBI9_UART_SRC				175
+#define GSBI9_UART_CLK				176
+#define GSBI10_UART_SRC				177
+#define GSBI10_UART_CLK				178
+#define GSBI11_UART_SRC				179
+#define GSBI11_UART_CLK				180
+#define GSBI12_UART_SRC				181
+#define GSBI12_UART_CLK				182
+#define GSBI1_QUP_SRC				183
+#define GSBI1_QUP_CLK				184
+#define GSBI2_QUP_SRC				185
+#define GSBI2_QUP_CLK				186
+#define GSBI3_QUP_SRC				187
+#define GSBI3_QUP_CLK				188
+#define GSBI4_QUP_SRC				189
+#define GSBI4_QUP_CLK				190
+#define GSBI5_QUP_SRC				191
+#define GSBI5_QUP_CLK				192
+#define GSBI6_QUP_SRC				193
+#define GSBI6_QUP_CLK				194
+#define GSBI7_QUP_SRC				195
+#define GSBI7_QUP_CLK				196
+#define GSBI8_QUP_SRC				197
+#define GSBI8_QUP_CLK				198
+#define GSBI9_QUP_SRC				199
+#define GSBI9_QUP_CLK				200
+#define GSBI10_QUP_SRC				201
+#define GSBI10_QUP_CLK				202
+#define GSBI11_QUP_SRC				203
+#define GSBI11_QUP_CLK				204
+#define GSBI12_QUP_SRC				205
+#define GSBI12_QUP_CLK				206
+#define GSBI1_SIM_CLK				207
+#define GSBI2_SIM_CLK				208
+#define GSBI3_SIM_CLK				209
+#define GSBI4_SIM_CLK				210
+#define GSBI5_SIM_CLK				211
+#define GSBI6_SIM_CLK				212
+#define GSBI7_SIM_CLK				213
+#define GSBI8_SIM_CLK				214
+#define GSBI9_SIM_CLK				215
+#define GSBI10_SIM_CLK				216
+#define GSBI11_SIM_CLK				217
+#define GSBI12_SIM_CLK				218
+#define USB_HSIC_HSIC_CLK_SRC			219
+#define USB_HSIC_HSIC_CLK			220
+#define USB_HSIC_HSIO_CAL_CLK			221
+#define SPDM_CFG_H_CLK				222
+#define SPDM_MSTR_H_CLK				223
+#define SPDM_FF_CLK_SRC				224
+#define SPDM_FF_CLK				225
+#define SEC_CTRL_CLK				226
+#define SEC_CTRL_ACC_CLK_SRC			227
+#define SEC_CTRL_ACC_CLK			228
+#define TLMM_H_CLK				229
+#define TLMM_CLK				230
+#define SFAB_MSS_S_H_CLK			231
+#define MSS_SLP_CLK				232
+#define MSS_Q6SW_JTAG_CLK			233
+#define MSS_Q6FW_JTAG_CLK			234
+#define MSS_S_H_CLK				235
+#define MSS_CXO_SRC_CLK				236
+#define SATA_H_CLK				237
+#define SATA_CLK_SRC				238
+#define SATA_RXOOB_CLK				239
+#define SATA_PMALIVE_CLK			240
+#define SATA_PHY_REF_CLK			241
+#define TSSC_CLK_SRC				242
+#define TSSC_CLK				243
+#define PDM_SRC					244
+#define PDM_CLK					245
+#define GP0_SRC					246
+#define GP0_CLK					247
+#define GP1_SRC					248
+#define GP1_CLK					249
+#define GP2_SRC					250
+#define GP2_CLK					251
+#define MPM_CLK					252
+#define EBI1_CLK_SRC				253
+#define EBI1_CH0_CLK				254
+#define EBI1_CH1_CLK				255
+#define EBI1_2X_CLK				256
+#define EBI1_CH0_DQ_CLK				257
+#define EBI1_CH1_DQ_CLK				258
+#define EBI1_CH0_CA_CLK				259
+#define EBI1_CH1_CA_CLK				260
+#define EBI1_XO_CLK				261
+#define SFAB_SMPSS_S_H_CLK			262
+#define PRNG_SRC				263
+#define PRNG_CLK				264
+#define PXO_SRC					265
+#define LPASS_CXO_CLK				266
+#define LPASS_PXO_CLK				267
+#define SPDM_CY_PORT0_CLK			268
+#define SPDM_CY_PORT1_CLK			269
+#define SPDM_CY_PORT2_CLK			270
+#define SPDM_CY_PORT3_CLK			271
+#define SPDM_CY_PORT4_CLK			272
+#define SPDM_CY_PORT5_CLK			273
+#define SPDM_CY_PORT6_CLK			274
+#define SPDM_CY_PORT7_CLK			275
+#define PLL0					276
+#define PLL0_VOTE				277
+#define PLL3					278
+#define PLL3_VOTE				279
+#define PLL4_VOTE				280
+#define PLL5					281
+#define PLL5_VOTE				282
+#define PLL6					283
+#define PLL6_VOTE				284
+#define PLL7_VOTE				285
+#define PLL8					286
+#define PLL8_VOTE				287
+#define PLL9					288
+#define PLL10					289
+#define PLL11					290
+#define PLL12					291
+#define PLL13					292
+#define PLL14					293
+#define PLL14_VOTE				294
+#define USB_HS3_H_CLK				295
+#define USB_HS3_XCVR_SRC			296
+#define USB_HS3_XCVR_CLK			297
+#define USB_HS4_H_CLK				298
+#define USB_HS4_XCVR_SRC			299
+#define USB_HS4_XCVR_CLK			300
+#define SATA_PHY_CFG_CLK			301
+#define SATA_A_CLK				302
+#define CE3_SRC					303
+#define CE3_CORE_CLK				304
+#define CE3_H_CLK				305
+#define USB_HS1_SYSTEM_CLK_SRC			306
+#define USB_HS1_SYSTEM_CLK			307
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,gcc-msm8996.h b/include/dt-bindings/clock/qcom,gcc-msm8996.h
index 6f814db..1828723 100644
--- a/include/dt-bindings/clock/qcom,gcc-msm8996.h
+++ b/include/dt-bindings/clock/qcom,gcc-msm8996.h
@@ -335,6 +335,11 @@
 #define GCC_MSMPU_BCR						98
 #define GCC_MSS_Q6_BCR						99
 #define GCC_QREFS_VBG_CAL_BCR					100
+#define GCC_PCIE_PHY_COM_BCR					101
+#define GCC_PCIE_PHY_COM_NOCSR_BCR				102
+#define GCC_USB3_PHY_BCR					103
+#define GCC_USB3PHY_PHY_BCR					104
+
 
 /* Indexes for GDSCs */
 #define AGGRE0_NOC_GDSC			0
diff --git a/include/dt-bindings/clock/qcom,lcc-mdm9615.h b/include/dt-bindings/clock/qcom,lcc-mdm9615.h
new file mode 100644
index 0000000..cac963a
--- /dev/null
+++ b/include/dt-bindings/clock/qcom,lcc-mdm9615.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) BayLibre, SAS.
+ * Author : Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_CLK_LCC_MDM9615_H
+#define _DT_BINDINGS_CLK_LCC_MDM9615_H
+
+#define PLL4				0
+#define MI2S_OSR_SRC			1
+#define MI2S_OSR_CLK			2
+#define MI2S_DIV_CLK			3
+#define MI2S_BIT_DIV_CLK		4
+#define MI2S_BIT_CLK			5
+#define PCM_SRC				6
+#define PCM_CLK_OUT			7
+#define PCM_CLK				8
+#define SLIMBUS_SRC			9
+#define AUDIO_SLIMBUS_CLK		10
+#define SPS_SLIMBUS_CLK			11
+#define CODEC_I2S_MIC_OSR_SRC		12
+#define CODEC_I2S_MIC_OSR_CLK		13
+#define CODEC_I2S_MIC_DIV_CLK		14
+#define CODEC_I2S_MIC_BIT_DIV_CLK	15
+#define CODEC_I2S_MIC_BIT_CLK		16
+#define SPARE_I2S_MIC_OSR_SRC		17
+#define SPARE_I2S_MIC_OSR_CLK		18
+#define SPARE_I2S_MIC_DIV_CLK		19
+#define SPARE_I2S_MIC_BIT_DIV_CLK	20
+#define SPARE_I2S_MIC_BIT_CLK		21
+#define CODEC_I2S_SPKR_OSR_SRC		22
+#define CODEC_I2S_SPKR_OSR_CLK		23
+#define CODEC_I2S_SPKR_DIV_CLK		24
+#define CODEC_I2S_SPKR_BIT_DIV_CLK	25
+#define CODEC_I2S_SPKR_BIT_CLK		26
+#define SPARE_I2S_SPKR_OSR_SRC		27
+#define SPARE_I2S_SPKR_OSR_CLK		28
+#define SPARE_I2S_SPKR_DIV_CLK		29
+#define SPARE_I2S_SPKR_BIT_DIV_CLK	30
+#define SPARE_I2S_SPKR_BIT_CLK		31
+
+#endif
diff --git a/include/dt-bindings/clock/qcom,mmcc-msm8996.h b/include/dt-bindings/clock/qcom,mmcc-msm8996.h
index 7d3a7fa..5abc445 100644
--- a/include/dt-bindings/clock/qcom,mmcc-msm8996.h
+++ b/include/dt-bindings/clock/qcom,mmcc-msm8996.h
@@ -298,5 +298,6 @@
 #define FD_GDSC			12
 #define MDSS_GDSC		13
 #define GPU_GX_GDSC		14
+#define MMAGIC_BIMC_GDSC	15
 
 #endif
diff --git a/include/dt-bindings/clock/rk3399-cru.h b/include/dt-bindings/clock/rk3399-cru.h
index 50a44cf..220a60f 100644
--- a/include/dt-bindings/clock/rk3399-cru.h
+++ b/include/dt-bindings/clock/rk3399-cru.h
@@ -131,12 +131,15 @@
 #define SCLK_DPHY_RX0_CFG		165
 #define SCLK_RMII_SRC			166
 #define SCLK_PCIEPHY_REF100M		167
+#define SCLK_DDRC			168
 
 #define DCLK_VOP0			180
 #define DCLK_VOP1			181
 #define DCLK_VOP0_DIV			182
 #define DCLK_VOP1_DIV			183
 #define DCLK_M0_PERILP			184
+#define DCLK_VOP0_FRAC			185
+#define DCLK_VOP1_FRAC			186
 
 #define FCLK_CM0S			190
 
diff --git a/include/dt-bindings/clock/sun6i-a31-ccu.h b/include/dt-bindings/clock/sun6i-a31-ccu.h
new file mode 100644
index 0000000..4482530
--- /dev/null
+++ b/include/dt-bindings/clock/sun6i-a31-ccu.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2016 Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN6I_A31_H_
+#define _DT_BINDINGS_CLK_SUN6I_A31_H_
+
+#define CLK_PLL_PERIPH		10
+
+#define CLK_CPU			18
+
+#define CLK_AHB1_MIPIDSI	23
+#define CLK_AHB1_SS		24
+#define CLK_AHB1_DMA		25
+#define CLK_AHB1_MMC0		26
+#define CLK_AHB1_MMC1		27
+#define CLK_AHB1_MMC2		28
+#define CLK_AHB1_MMC3		29
+#define CLK_AHB1_NAND1		30
+#define CLK_AHB1_NAND0		31
+#define CLK_AHB1_SDRAM		32
+#define CLK_AHB1_EMAC		33
+#define CLK_AHB1_TS		34
+#define CLK_AHB1_HSTIMER	35
+#define CLK_AHB1_SPI0		36
+#define CLK_AHB1_SPI1		37
+#define CLK_AHB1_SPI2		38
+#define CLK_AHB1_SPI3		39
+#define CLK_AHB1_OTG		40
+#define CLK_AHB1_EHCI0		41
+#define CLK_AHB1_EHCI1		42
+#define CLK_AHB1_OHCI0		43
+#define CLK_AHB1_OHCI1		44
+#define CLK_AHB1_OHCI2		45
+#define CLK_AHB1_VE		46
+#define CLK_AHB1_LCD0		47
+#define CLK_AHB1_LCD1		48
+#define CLK_AHB1_CSI		49
+#define CLK_AHB1_HDMI		50
+#define CLK_AHB1_BE0		51
+#define CLK_AHB1_BE1		52
+#define CLK_AHB1_FE0		53
+#define CLK_AHB1_FE1		54
+#define CLK_AHB1_MP		55
+#define CLK_AHB1_GPU		56
+#define CLK_AHB1_DEU0		57
+#define CLK_AHB1_DEU1		58
+#define CLK_AHB1_DRC0		59
+#define CLK_AHB1_DRC1		60
+
+#define CLK_APB1_CODEC		61
+#define CLK_APB1_SPDIF		62
+#define CLK_APB1_DIGITAL_MIC	63
+#define CLK_APB1_PIO		64
+#define CLK_APB1_DAUDIO0	65
+#define CLK_APB1_DAUDIO1	66
+
+#define CLK_APB2_I2C0		67
+#define CLK_APB2_I2C1		68
+#define CLK_APB2_I2C2		69
+#define CLK_APB2_I2C3		70
+#define CLK_APB2_UART0		71
+#define CLK_APB2_UART1		72
+#define CLK_APB2_UART2		73
+#define CLK_APB2_UART3		74
+#define CLK_APB2_UART4		75
+#define CLK_APB2_UART5		76
+
+#define CLK_NAND0		77
+#define CLK_NAND1		78
+#define CLK_MMC0		79
+#define CLK_MMC0_SAMPLE		80
+#define CLK_MMC0_OUTPUT		81
+#define CLK_MMC1		82
+#define CLK_MMC1_SAMPLE		83
+#define CLK_MMC1_OUTPUT		84
+#define CLK_MMC2		85
+#define CLK_MMC2_SAMPLE		86
+#define CLK_MMC2_OUTPUT		87
+#define CLK_MMC3		88
+#define CLK_MMC3_SAMPLE		89
+#define CLK_MMC3_OUTPUT		90
+#define CLK_TS			91
+#define CLK_SS			92
+#define CLK_SPI0		93
+#define CLK_SPI1		94
+#define CLK_SPI2		95
+#define CLK_SPI3		96
+#define CLK_DAUDIO0		97
+#define CLK_DAUDIO1		98
+#define CLK_SPDIF		99
+#define CLK_USB_PHY0		100
+#define CLK_USB_PHY1		101
+#define CLK_USB_PHY2		102
+#define CLK_USB_OHCI0		103
+#define CLK_USB_OHCI1		104
+#define CLK_USB_OHCI2		105
+
+#define CLK_DRAM_VE		110
+#define CLK_DRAM_CSI_ISP	111
+#define CLK_DRAM_TS		112
+#define CLK_DRAM_DRC0		113
+#define CLK_DRAM_DRC1		114
+#define CLK_DRAM_DEU0		115
+#define CLK_DRAM_DEU1		116
+#define CLK_DRAM_FE0		117
+#define CLK_DRAM_FE1		118
+#define CLK_DRAM_BE0		119
+#define CLK_DRAM_BE1		120
+#define CLK_DRAM_MP		121
+
+#define CLK_BE0			122
+#define CLK_BE1			123
+#define CLK_FE0			124
+#define CLK_FE1			125
+#define CLK_MP			126
+#define CLK_LCD0_CH0		127
+#define CLK_LCD1_CH0		128
+#define CLK_LCD0_CH1		129
+#define CLK_LCD1_CH1		130
+#define CLK_CSI0_SCLK		131
+#define CLK_CSI0_MCLK		132
+#define CLK_CSI1_MCLK		133
+#define CLK_VE			134
+#define CLK_CODEC		135
+#define CLK_AVS			136
+#define CLK_DIGITAL_MIC		137
+#define CLK_HDMI		138
+#define CLK_HDMI_DDC		139
+#define CLK_PS			140
+
+#define CLK_MIPI_DSI		143
+#define CLK_MIPI_DSI_DPHY	144
+#define CLK_MIPI_CSI_DPHY	145
+#define CLK_IEP_DRC0		146
+#define CLK_IEP_DRC1		147
+#define CLK_IEP_DEU0		148
+#define CLK_IEP_DEU1		149
+#define CLK_GPU_CORE		150
+#define CLK_GPU_MEMORY		151
+#define CLK_GPU_HYD		152
+#define CLK_ATS			153
+#define CLK_TRACE		154
+
+#define CLK_OUT_A		155
+#define CLK_OUT_B		156
+#define CLK_OUT_C		157
+
+#endif /* _DT_BINDINGS_CLK_SUN6I_A31_H_ */
diff --git a/include/dt-bindings/clock/sun8i-a23-a33-ccu.h b/include/dt-bindings/clock/sun8i-a23-a33-ccu.h
new file mode 100644
index 0000000..f8222b6
--- /dev/null
+++ b/include/dt-bindings/clock/sun8i-a23-a33-ccu.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2016 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_CLK_SUN8I_A23_A33_H_
+#define _DT_BINDINGS_CLK_SUN8I_A23_A33_H_
+
+#define CLK_CPUX		18
+
+#define CLK_BUS_MIPI_DSI	23
+#define CLK_BUS_SS		24
+#define CLK_BUS_DMA		25
+#define CLK_BUS_MMC0		26
+#define CLK_BUS_MMC1		27
+#define CLK_BUS_MMC2		28
+#define CLK_BUS_NAND		29
+#define CLK_BUS_DRAM		30
+#define CLK_BUS_HSTIMER		31
+#define CLK_BUS_SPI0		32
+#define CLK_BUS_SPI1		33
+#define CLK_BUS_OTG		34
+#define CLK_BUS_EHCI		35
+#define CLK_BUS_OHCI		36
+#define CLK_BUS_VE		37
+#define CLK_BUS_LCD		38
+#define CLK_BUS_CSI		39
+#define CLK_BUS_DE_BE		40
+#define CLK_BUS_DE_FE		41
+#define CLK_BUS_GPU		42
+#define CLK_BUS_MSGBOX		43
+#define CLK_BUS_SPINLOCK	44
+#define CLK_BUS_DRC		45
+#define CLK_BUS_SAT		46
+#define CLK_BUS_CODEC		47
+#define CLK_BUS_PIO		48
+#define CLK_BUS_I2S0		49
+#define CLK_BUS_I2S1		50
+#define CLK_BUS_I2C0		51
+#define CLK_BUS_I2C1		52
+#define CLK_BUS_I2C2		53
+#define CLK_BUS_UART0		54
+#define CLK_BUS_UART1		55
+#define CLK_BUS_UART2		56
+#define CLK_BUS_UART3		57
+#define CLK_BUS_UART4		58
+#define CLK_NAND		59
+#define CLK_MMC0		60
+#define CLK_MMC0_SAMPLE		61
+#define CLK_MMC0_OUTPUT		62
+#define CLK_MMC1		63
+#define CLK_MMC1_SAMPLE		64
+#define CLK_MMC1_OUTPUT		65
+#define CLK_MMC2		66
+#define CLK_MMC2_SAMPLE		67
+#define CLK_MMC2_OUTPUT		68
+#define CLK_SS			69
+#define CLK_SPI0		70
+#define CLK_SPI1		71
+#define CLK_I2S0		72
+#define CLK_I2S1		73
+#define CLK_USB_PHY0		74
+#define CLK_USB_PHY1		75
+#define CLK_USB_HSIC		76
+#define CLK_USB_HSIC_12M	77
+#define CLK_USB_OHCI		78
+
+#define CLK_DRAM_VE		80
+#define CLK_DRAM_CSI		81
+#define CLK_DRAM_DRC		82
+#define CLK_DRAM_DE_FE		83
+#define CLK_DRAM_DE_BE		84
+#define CLK_DE_BE		85
+#define CLK_DE_FE		86
+#define CLK_LCD_CH0		87
+#define CLK_LCD_CH1		88
+#define CLK_CSI_SCLK		89
+#define CLK_CSI_MCLK		90
+#define CLK_VE			91
+#define CLK_AC_DIG		92
+#define CLK_AC_DIG_4X		93
+#define CLK_AVS			94
+
+#define CLK_DSI_SCLK		96
+#define CLK_DSI_DPHY		97
+#define CLK_DRC			98
+#define CLK_GPU			99
+#define CLK_ATS			100
+
+#endif /* _DT_BINDINGS_CLK_SUN8I_A23_A33_H_ */
diff --git a/include/dt-bindings/clock/zx296718-clock.h b/include/dt-bindings/clock/zx296718-clock.h
new file mode 100644
index 0000000..822d523
--- /dev/null
+++ b/include/dt-bindings/clock/zx296718-clock.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2015 - 2016 ZTE Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __DT_BINDINGS_CLOCK_ZX296718_H
+#define __DT_BINDINGS_CLOCK_ZX296718_H
+
+/* PLL */
+#define ZX296718_PLL_CPU	1
+#define ZX296718_PLL_MAC	2
+#define ZX296718_PLL_MM0	3
+#define ZX296718_PLL_MM1	4
+#define ZX296718_PLL_VGA	5
+#define ZX296718_PLL_DDR	6
+#define ZX296718_PLL_AUDIO	7
+#define ZX296718_PLL_HSIC	8
+#define CPU_DBG_GATE		9
+#define A72_GATE		10
+#define CPU_PERI_GATE		11
+#define A53_GATE		12
+#define DDR1_GATE		13
+#define DDR0_GATE		14
+#define SD1_WCLK		15
+#define SD1_AHB			16
+#define SD0_WCLK		17
+#define SD0_AHB			18
+#define EMMC_WCLK		19
+#define EMMC_NAND_AXI		20
+#define NAND_WCLK		21
+#define EMMC_NAND_AHB		22
+#define LSP1_148M5		23
+#define LSP1_99M		24
+#define LSP1_24M		25
+#define LSP0_74M25		26
+#define LSP0_32K		27
+#define LSP0_148M5		28
+#define LSP0_99M		29
+#define LSP0_24M		30
+#define DEMUX_AXI		31
+#define DEMUX_APB		32
+#define DEMUX_148M5		33
+#define DEMUX_108M		34
+#define AUDIO_APB		35
+#define AUDIO_99M		36
+#define AUDIO_24M		37
+#define AUDIO_16M384		38
+#define AUDIO_32K		39
+#define WDT_WCLK		40
+#define TIMER_WCLK		41
+#define VDE_ACLK		42
+#define VCE_ACLK		43
+#define HDE_ACLK		44
+#define GPU_ACLK		45
+#define SAPPU_ACLK		46
+#define SAPPU_WCLK		47
+#define VOU_ACLK		48
+#define VOU_MAIN_WCLK		49
+#define VOU_AUX_WCLK		50
+#define VOU_PPU_WCLK		51
+#define MIPI_CFG_CLK		52
+#define VGA_I2C_WCLK		53
+#define MIPI_REF_CLK		54
+#define HDMI_OSC_CEC		55
+#define HDMI_OSC_CLK		56
+#define HDMI_XCLK		57
+#define VIU_M0_ACLK		58
+#define VIU_M1_ACLK		59
+#define VIU_WCLK		60
+#define VIU_JPEG_WCLK		61
+#define VIU_CFG_CLK		62
+#define TS_SYS_WCLK		63
+#define TS_SYS_108M		64
+#define USB20_HCLK		65
+#define USB20_PHY_CLK		66
+#define USB21_HCLK		67
+#define USB21_PHY_CLK		68
+#define GMAC_RMIICLK		69
+#define GMAC_PCLK		70
+#define GMAC_ACLK		71
+#define GMAC_RFCLK		72
+#define TEMPSENSOR_GATE		73
+
+#define TOP_NR_CLKS		74
+
+
+#define LSP0_TIMER3_PCLK	1
+#define LSP0_TIMER3_WCLK	2
+#define LSP0_TIMER4_PCLK	3
+#define LSP0_TIMER4_WCLK	4
+#define LSP0_TIMER5_PCLK	5
+#define LSP0_TIMER5_WCLK	6
+#define LSP0_UART3_PCLK		7
+#define LSP0_UART3_WCLK		8
+#define LSP0_UART1_PCLK		9
+#define LSP0_UART1_WCLK		10
+#define LSP0_UART2_PCLK		11
+#define LSP0_UART2_WCLK		12
+#define LSP0_SPIFC0_PCLK	13
+#define LSP0_SPIFC0_WCLK	14
+#define LSP0_I2C4_PCLK		15
+#define LSP0_I2C4_WCLK		16
+#define LSP0_I2C5_PCLK		17
+#define LSP0_I2C5_WCLK		18
+#define LSP0_SSP0_PCLK		19
+#define LSP0_SSP0_WCLK		20
+#define LSP0_SSP1_PCLK		21
+#define LSP0_SSP1_WCLK		22
+#define LSP0_USIM_PCLK		23
+#define LSP0_USIM_WCLK		24
+#define LSP0_GPIO_PCLK		25
+#define LSP0_GPIO_WCLK		26
+#define LSP0_I2C3_PCLK		27
+#define LSP0_I2C3_WCLK		28
+
+#define LSP0_NR_CLKS		29
+
+
+#define LSP1_UART4_PCLK		1
+#define LSP1_UART4_WCLK		2
+#define LSP1_UART5_PCLK		3
+#define LSP1_UART5_WCLK		4
+#define LSP1_PWM_PCLK		5
+#define LSP1_PWM_WCLK		6
+#define LSP1_I2C2_PCLK		7
+#define LSP1_I2C2_WCLK		8
+#define LSP1_SSP2_PCLK		9
+#define LSP1_SSP2_WCLK		10
+#define LSP1_SSP3_PCLK		11
+#define LSP1_SSP3_WCLK		12
+#define LSP1_SSP4_PCLK		13
+#define LSP1_SSP4_WCLK		14
+#define LSP1_USIM1_PCLK		15
+#define LSP1_USIM1_WCLK		16
+
+#define LSP1_NR_CLKS		17
+
+
+#define AUDIO_I2S0_WCLK		1
+#define AUDIO_I2S0_PCLK		2
+#define AUDIO_I2S1_WCLK		3
+#define AUDIO_I2S1_PCLK		4
+#define AUDIO_I2S2_WCLK		5
+#define AUDIO_I2S2_PCLK		6
+#define AUDIO_I2S3_WCLK		7
+#define AUDIO_I2S3_PCLK		8
+#define AUDIO_I2C0_WCLK		9
+#define AUDIO_I2C0_PCLK		10
+#define AUDIO_SPDIF0_WCLK	11
+#define AUDIO_SPDIF0_PCLK	12
+#define AUDIO_SPDIF1_WCLK	13
+#define AUDIO_SPDIF1_PCLK	14
+#define AUDIO_TIMER_WCLK	15
+#define AUDIO_TIMER_PCLK	16
+#define AUDIO_TDM_WCLK		17
+#define AUDIO_TDM_PCLK		18
+#define AUDIO_TS_PCLK		19
+
+#define AUDIO_NR_CLKS		20
+
+#endif
diff --git a/include/dt-bindings/net/mscc-phy-vsc8531.h b/include/dt-bindings/net/mscc-phy-vsc8531.h
new file mode 100644
index 0000000..2383dd2
--- /dev/null
+++ b/include/dt-bindings/net/mscc-phy-vsc8531.h
@@ -0,0 +1,21 @@
+/*
+ * Device Tree constants for Microsemi VSC8531 PHY
+ *
+ * Author: Nagaraju Lakkaraju
+ *
+ * License: Dual MIT/GPL
+ * Copyright (c) 2016 Microsemi Corporation
+ */
+
+#ifndef _DT_BINDINGS_MSCC_VSC8531_H
+#define _DT_BINDINGS_MSCC_VSC8531_H
+
+/* MAC interface Edge rate control VDDMAC in milli Volts */
+#define MSCC_VDDMAC_3300		 3300
+#define MSCC_VDDMAC_2500		 2500
+#define MSCC_VDDMAC_1800		 1800
+#define MSCC_VDDMAC_1500		 1500
+#define MSCC_VDDMAC_MAX			 4
+#define MSCC_SLOWDOWN_MAX		 8
+
+#endif
diff --git a/include/dt-bindings/reset/gxbb-aoclkc.h b/include/dt-bindings/reset/gxbb-aoclkc.h
new file mode 100644
index 0000000..9e3fd60
--- /dev/null
+++ b/include/dt-bindings/reset/gxbb-aoclkc.h
@@ -0,0 +1,66 @@
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright (c) 2016 BayLibre, SAS.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * BSD LICENSE
+ *
+ * Copyright (c) 2016 BayLibre, SAS.
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ *   * Neither the name of Intel Corporation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DT_BINDINGS_RESET_AMLOGIC_MESON_GXBB_AOCLK
+#define DT_BINDINGS_RESET_AMLOGIC_MESON_GXBB_AOCLK
+
+#define RESET_AO_REMOTE		0
+#define RESET_AO_I2C_MASTER	1
+#define RESET_AO_I2C_SLAVE	2
+#define RESET_AO_UART1		3
+#define RESET_AO_UART2		4
+#define RESET_AO_IR_BLASTER	5
+
+#endif
diff --git a/include/dt-bindings/reset/mt2701-resets.h b/include/dt-bindings/reset/mt2701-resets.h
new file mode 100644
index 0000000..aaf0305
--- /dev/null
+++ b/include/dt-bindings/reset/mt2701-resets.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2015 MediaTek, Shunli Wang <shunli.wang@mediatek.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT2701
+#define _DT_BINDINGS_RESET_CONTROLLER_MT2701
+
+/* INFRACFG resets */
+#define MT2701_INFRA_EMI_REG_RST		0
+#define MT2701_INFRA_DRAMC0_A0_RST		1
+#define MT2701_INFRA_FHCTL_RST			2
+#define MT2701_INFRA_APCIRQ_EINT_RST		3
+#define MT2701_INFRA_APXGPT_RST			4
+#define MT2701_INFRA_SCPSYS_RST			5
+#define MT2701_INFRA_KP_RST			6
+#define MT2701_INFRA_PMIC_WRAP_RST		7
+#define MT2701_INFRA_MIPI_RST			8
+#define MT2701_INFRA_IRRX_RST			9
+#define MT2701_INFRA_CEC_RST			10
+#define MT2701_INFRA_EMI_RST			32
+#define MT2701_INFRA_DRAMC0_RST			34
+#define MT2701_INFRA_TRNG_RST			37
+#define MT2701_INFRA_SYSIRQ_RST			38
+
+/*  PERICFG resets */
+#define MT2701_PERI_UART0_SW_RST		0
+#define MT2701_PERI_UART1_SW_RST		1
+#define MT2701_PERI_UART2_SW_RST		2
+#define MT2701_PERI_UART3_SW_RST		3
+#define MT2701_PERI_GCPU_SW_RST			5
+#define MT2701_PERI_BTIF_SW_RST			6
+#define MT2701_PERI_PWM_SW_RST			8
+#define MT2701_PERI_AUXADC_SW_RST		10
+#define MT2701_PERI_DMA_SW_RST			11
+#define MT2701_PERI_NFI_SW_RST			14
+#define MT2701_PERI_NLI_SW_RST			15
+#define MT2701_PERI_THERM_SW_RST		16
+#define MT2701_PERI_MSDC2_SW_RST		17
+#define MT2701_PERI_MSDC0_SW_RST		19
+#define MT2701_PERI_MSDC1_SW_RST		20
+#define MT2701_PERI_I2C0_SW_RST			22
+#define MT2701_PERI_I2C1_SW_RST			23
+#define MT2701_PERI_I2C2_SW_RST			24
+#define MT2701_PERI_I2C3_SW_RST			25
+#define MT2701_PERI_USB_SW_RST			28
+#define MT2701_PERI_ETH_SW_RST			29
+#define MT2701_PERI_SPI0_SW_RST			33
+
+/* TOPRGU resets */
+#define MT2701_TOPRGU_INFRA_RST			0
+#define MT2701_TOPRGU_MM_RST			1
+#define MT2701_TOPRGU_MFG_RST			2
+#define MT2701_TOPRGU_ETHDMA_RST		3
+#define MT2701_TOPRGU_VDEC_RST			4
+#define MT2701_TOPRGU_VENC_IMG_RST		5
+#define MT2701_TOPRGU_DDRPHY_RST		6
+#define MT2701_TOPRGU_MD_RST			7
+#define MT2701_TOPRGU_INFRA_AO_RST		8
+#define MT2701_TOPRGU_CONN_RST			9
+#define MT2701_TOPRGU_APMIXED_RST		10
+#define MT2701_TOPRGU_HIFSYS_RST		11
+#define MT2701_TOPRGU_CONN_MCU_RST		12
+#define MT2701_TOPRGU_BDP_DISP_RST		13
+
+/* HIFSYS resets */
+#define MT2701_HIFSYS_UHOST0_RST		3
+#define MT2701_HIFSYS_UHOST1_RST		4
+#define MT2701_HIFSYS_UPHY0_RST			21
+#define MT2701_HIFSYS_UPHY1_RST			22
+#define MT2701_HIFSYS_PCIE0_RST			24
+#define MT2701_HIFSYS_PCIE1_RST			25
+#define MT2701_HIFSYS_PCIE2_RST			26
+
+#endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT2701 */
diff --git a/include/dt-bindings/reset/qcom,gcc-mdm9615.h b/include/dt-bindings/reset/qcom,gcc-mdm9615.h
new file mode 100644
index 0000000..7f86e9a
--- /dev/null
+++ b/include/dt-bindings/reset/qcom,gcc-mdm9615.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) BayLibre, SAS.
+ * Author : Neil Armstrong <narmstrong@baylibre.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _DT_BINDINGS_RESET_GCC_MDM9615_H
+#define _DT_BINDINGS_RESET_GCC_MDM9615_H
+
+#define SFAB_MSS_Q6_SW_RESET				0
+#define SFAB_MSS_Q6_FW_RESET				1
+#define QDSS_STM_RESET					2
+#define AFAB_SMPSS_S_RESET				3
+#define AFAB_SMPSS_M1_RESET				4
+#define AFAB_SMPSS_M0_RESET				5
+#define AFAB_EBI1_CH0_RESET				6
+#define AFAB_EBI1_CH1_RESET				7
+#define SFAB_ADM0_M0_RESET				8
+#define SFAB_ADM0_M1_RESET				9
+#define SFAB_ADM0_M2_RESET				10
+#define ADM0_C2_RESET					11
+#define ADM0_C1_RESET					12
+#define ADM0_C0_RESET					13
+#define ADM0_PBUS_RESET					14
+#define ADM0_RESET					15
+#define QDSS_CLKS_SW_RESET				16
+#define QDSS_POR_RESET					17
+#define QDSS_TSCTR_RESET				18
+#define QDSS_HRESET_RESET				19
+#define QDSS_AXI_RESET					20
+#define QDSS_DBG_RESET					21
+#define PCIE_A_RESET					22
+#define PCIE_AUX_RESET					23
+#define PCIE_H_RESET					24
+#define SFAB_PCIE_M_RESET				25
+#define SFAB_PCIE_S_RESET				26
+#define SFAB_MSS_M_RESET				27
+#define SFAB_USB3_M_RESET				28
+#define SFAB_RIVA_M_RESET				29
+#define SFAB_LPASS_RESET				30
+#define SFAB_AFAB_M_RESET				31
+#define AFAB_SFAB_M0_RESET				32
+#define AFAB_SFAB_M1_RESET				33
+#define SFAB_SATA_S_RESET				34
+#define SFAB_DFAB_M_RESET				35
+#define DFAB_SFAB_M_RESET				36
+#define DFAB_SWAY0_RESET				37
+#define DFAB_SWAY1_RESET				38
+#define DFAB_ARB0_RESET					39
+#define DFAB_ARB1_RESET					40
+#define PPSS_PROC_RESET					41
+#define PPSS_RESET					42
+#define DMA_BAM_RESET					43
+#define SPS_TIC_H_RESET					44
+#define SLIMBUS_H_RESET					45
+#define SFAB_CFPB_M_RESET				46
+#define SFAB_CFPB_S_RESET				47
+#define TSIF_H_RESET					48
+#define CE1_H_RESET					49
+#define CE1_CORE_RESET					50
+#define CE1_SLEEP_RESET					51
+#define CE2_H_RESET					52
+#define CE2_CORE_RESET					53
+#define SFAB_SFPB_M_RESET				54
+#define SFAB_SFPB_S_RESET				55
+#define RPM_PROC_RESET					56
+#define PMIC_SSBI2_RESET				57
+#define SDC1_RESET					58
+#define SDC2_RESET					59
+#define SDC3_RESET					60
+#define SDC4_RESET					61
+#define SDC5_RESET					62
+#define DFAB_A2_RESET					63
+#define USB_HS1_RESET					64
+#define USB_HSIC_RESET					65
+#define USB_FS1_XCVR_RESET				66
+#define USB_FS1_RESET					67
+#define USB_FS2_XCVR_RESET				68
+#define USB_FS2_RESET					69
+#define GSBI1_RESET					70
+#define GSBI2_RESET					71
+#define GSBI3_RESET					72
+#define GSBI4_RESET					73
+#define GSBI5_RESET					74
+#define GSBI6_RESET					75
+#define GSBI7_RESET					76
+#define GSBI8_RESET					77
+#define GSBI9_RESET					78
+#define GSBI10_RESET					79
+#define GSBI11_RESET					80
+#define GSBI12_RESET					81
+#define SPDM_RESET					82
+#define TLMM_H_RESET					83
+#define SFAB_MSS_S_RESET				84
+#define MSS_SLP_RESET					85
+#define MSS_Q6SW_JTAG_RESET				86
+#define MSS_Q6FW_JTAG_RESET				87
+#define MSS_RESET					88
+#define SATA_H_RESET					89
+#define SATA_RXOOB_RESE					90
+#define SATA_PMALIVE_RESET				91
+#define SATA_SFAB_M_RESET				92
+#define TSSC_RESET					93
+#define PDM_RESET					94
+#define MPM_H_RESET					95
+#define MPM_RESET					96
+#define SFAB_SMPSS_S_RESET				97
+#define PRNG_RESET					98
+#define RIVA_RESET					99
+#define USB_HS3_RESET					100
+#define USB_HS4_RESET					101
+#define CE3_RESET					102
+#define PCIE_EXT_PCI_RESET				103
+#define PCIE_PHY_RESET					104
+#define PCIE_PCI_RESET					105
+#define PCIE_POR_RESET					106
+#define PCIE_HCLK_RESET					107
+#define PCIE_ACLK_RESET					108
+#define CE3_H_RESET					109
+#define SFAB_CE3_M_RESET				110
+#define SFAB_CE3_S_RESET				111
+#define SATA_RESET					112
+#define CE3_SLEEP_RESET					113
+#define GSS_SLP_RESET					114
+#define GSS_RESET					115
+
+#endif
diff --git a/include/dt-bindings/reset/sun6i-a31-ccu.h b/include/dt-bindings/reset/sun6i-a31-ccu.h
new file mode 100644
index 0000000..fbff365
--- /dev/null
+++ b/include/dt-bindings/reset/sun6i-a31-ccu.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2016 Chen-Yu Tsai <wens@csie.org>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_RST_SUN6I_A31_H_
+#define _DT_BINDINGS_RST_SUN6I_A31_H_
+
+#define RST_USB_PHY0		0
+#define RST_USB_PHY1		1
+#define RST_USB_PHY2		2
+
+#define RST_AHB1_MIPI_DSI	3
+#define RST_AHB1_SS		4
+#define RST_AHB1_DMA		5
+#define RST_AHB1_MMC0		6
+#define RST_AHB1_MMC1		7
+#define RST_AHB1_MMC2		8
+#define RST_AHB1_MMC3		9
+#define RST_AHB1_NAND1		10
+#define RST_AHB1_NAND0		11
+#define RST_AHB1_SDRAM		12
+#define RST_AHB1_EMAC		13
+#define RST_AHB1_TS		14
+#define RST_AHB1_HSTIMER	15
+#define RST_AHB1_SPI0		16
+#define RST_AHB1_SPI1		17
+#define RST_AHB1_SPI2		18
+#define RST_AHB1_SPI3		19
+#define RST_AHB1_OTG		20
+#define RST_AHB1_EHCI0		21
+#define RST_AHB1_EHCI1		22
+#define RST_AHB1_OHCI0		23
+#define RST_AHB1_OHCI1		24
+#define RST_AHB1_OHCI2		25
+#define RST_AHB1_VE		26
+#define RST_AHB1_LCD0		27
+#define RST_AHB1_LCD1		28
+#define RST_AHB1_CSI		29
+#define RST_AHB1_HDMI		30
+#define RST_AHB1_BE0		31
+#define RST_AHB1_BE1		32
+#define RST_AHB1_FE0		33
+#define RST_AHB1_FE1		34
+#define RST_AHB1_MP		35
+#define RST_AHB1_GPU		36
+#define RST_AHB1_DEU0		37
+#define RST_AHB1_DEU1		38
+#define RST_AHB1_DRC0		39
+#define RST_AHB1_DRC1		40
+#define RST_AHB1_LVDS		41
+
+#define RST_APB1_CODEC		42
+#define RST_APB1_SPDIF		43
+#define RST_APB1_DIGITAL_MIC	44
+#define RST_APB1_DAUDIO0	45
+#define RST_APB1_DAUDIO1	46
+#define RST_APB2_I2C0		47
+#define RST_APB2_I2C1		48
+#define RST_APB2_I2C2		49
+#define RST_APB2_I2C3		50
+#define RST_APB2_UART0		51
+#define RST_APB2_UART1		52
+#define RST_APB2_UART2		53
+#define RST_APB2_UART3		54
+#define RST_APB2_UART4		55
+#define RST_APB2_UART5		56
+
+#endif /* _DT_BINDINGS_RST_SUN6I_A31_H_ */
diff --git a/include/dt-bindings/reset/sun8i-a23-a33-ccu.h b/include/dt-bindings/reset/sun8i-a23-a33-ccu.h
new file mode 100644
index 0000000..6121f2b
--- /dev/null
+++ b/include/dt-bindings/reset/sun8i-a23-a33-ccu.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2016 Maxime Ripard <maxime.ripard@free-electrons.com>
+ *
+ * This file is dual-licensed: you can use it either under the terms
+ * of the GPL or the X11 license, at your option. Note that this dual
+ * licensing only applies to this file, and not this project as a
+ * whole.
+ *
+ *  a) This file is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation; either version 2 of the
+ *     License, or (at your option) any later version.
+ *
+ *     This file is distributed in the hope that it will be useful,
+ *     but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *     GNU General Public License for more details.
+ *
+ * Or, alternatively,
+ *
+ *  b) Permission is hereby granted, free of charge, to any person
+ *     obtaining a copy of this software and associated documentation
+ *     files (the "Software"), to deal in the Software without
+ *     restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or
+ *     sell copies of the Software, and to permit persons to whom the
+ *     Software is furnished to do so, subject to the following
+ *     conditions:
+ *
+ *     The above copyright notice and this permission notice shall be
+ *     included in all copies or substantial portions of the Software.
+ *
+ *     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *     OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *     HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *     WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *     OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DT_BINDINGS_RST_SUN8I_A23_A33_H_
+#define _DT_BINDINGS_RST_SUN8I_A23_A33_H_
+
+#define RST_USB_PHY0		0
+#define RST_USB_PHY1		1
+#define RST_USB_HSIC		2
+#define RST_MBUS		3
+#define RST_BUS_MIPI_DSI	4
+#define RST_BUS_SS		5
+#define RST_BUS_DMA		6
+#define RST_BUS_MMC0		7
+#define RST_BUS_MMC1		8
+#define RST_BUS_MMC2		9
+#define RST_BUS_NAND		10
+#define RST_BUS_DRAM		11
+#define RST_BUS_HSTIMER		12
+#define RST_BUS_SPI0		13
+#define RST_BUS_SPI1		14
+#define RST_BUS_OTG		15
+#define RST_BUS_EHCI		16
+#define RST_BUS_OHCI		17
+#define RST_BUS_VE		18
+#define RST_BUS_LCD		19
+#define RST_BUS_CSI		20
+#define RST_BUS_DE_BE		21
+#define RST_BUS_DE_FE		22
+#define RST_BUS_GPU		23
+#define RST_BUS_MSGBOX		24
+#define RST_BUS_SPINLOCK	25
+#define RST_BUS_DRC		26
+#define RST_BUS_SAT		27
+#define RST_BUS_LVDS		28
+#define RST_BUS_CODEC		29
+#define RST_BUS_I2S0		30
+#define RST_BUS_I2S1		31
+#define RST_BUS_I2C0		32
+#define RST_BUS_I2C1		33
+#define RST_BUS_I2C2		34
+#define RST_BUS_UART0		35
+#define RST_BUS_UART1		36
+#define RST_BUS_UART2		37
+#define RST_BUS_UART3		38
+#define RST_BUS_UART4		39
+
+#endif /* _DT_BINDINGS_RST_SUN8I_A23_A33_H_ */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4d8452c..94afcb2c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -85,6 +85,8 @@
 	return dev_name(&adev->dev);
 }
 
+struct device *acpi_get_first_physical_node(struct acpi_device *adev);
+
 enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PIC = 0,
 	ACPI_IRQ_MODEL_IOAPIC,
@@ -267,12 +269,18 @@
 	return phys_id == PHYS_CPUID_INVALID;
 }
 
+/* Validate the processor object's proc_id */
+bool acpi_processor_validate_proc_id(int proc_id);
+
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
 int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
 int acpi_unmap_cpu(int cpu);
+int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
+void acpi_set_processor_mapping(void);
+
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
 int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr);
 #endif
@@ -634,6 +642,11 @@
 	return NULL;
 }
 
+static inline struct device *acpi_get_first_physical_node(struct acpi_device *adev)
+{
+	return NULL;
+}
+
 static inline void acpi_early_init(void) { }
 static inline void acpi_subsystem_init(void) { }
 
@@ -751,6 +764,12 @@
 
 #endif	/* !CONFIG_ACPI */
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+int acpi_ioapic_add(acpi_handle root);
+#else
+static inline int acpi_ioapic_add(acpi_handle root) { return 0; }
+#endif
+
 #ifdef CONFIG_ACPI
 void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
 			       u32 pm1a_ctrl,  u32 pm1b_ctrl));
@@ -1056,7 +1075,7 @@
 	return NULL;
 }
 
-#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, validate, data, fn) \
+#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \
 	static const void * __acpi_table_##name[]			\
 		__attribute__((unused))					\
 		 = { (void *) table_id,					\
@@ -1074,4 +1093,16 @@
 static inline void acpi_table_upgrade(void) { }
 #endif
 
+#if defined(CONFIG_ACPI) && defined(CONFIG_ACPI_WATCHDOG)
+extern bool acpi_has_watchdog(void);
+#else
+static inline bool acpi_has_watchdog(void) { return false; }
+#endif
+
+#ifdef CONFIG_ACPI_SPCR_TABLE
+int parse_spcr(bool earlycon);
+#else
+static inline int parse_spcr(bool earlycon) { return 0; }
+#endif
+
 #endif	/*_LINUX_ACPI_H*/
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
new file mode 100644
index 0000000..0e32dac
--- /dev/null
+++ b/include/linux/acpi_iort.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016, Semihalf
+ *	Author: Tomasz Nowicki <tn@semihalf.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __ACPI_IORT_H__
+#define __ACPI_IORT_H__
+
+#include <linux/acpi.h>
+#include <linux/fwnode.h>
+#include <linux/irqdomain.h>
+
+int iort_register_domain_token(int trans_id, struct fwnode_handle *fw_node);
+void iort_deregister_domain_token(int trans_id);
+struct fwnode_handle *iort_find_domain_token(int trans_id);
+#ifdef CONFIG_ACPI_IORT
+void acpi_iort_init(void);
+u32 iort_msi_map_rid(struct device *dev, u32 req_id);
+struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id);
+#else
+static inline void acpi_iort_init(void) { }
+static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id)
+{ return req_id; }
+static inline struct irq_domain *iort_get_device_domain(struct device *dev,
+							u32 req_id)
+{ return NULL; }
+#endif
+
+#endif /* __ACPI_IORT_H__ */
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 3d8dcdd..d143c13 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -53,8 +53,14 @@
 	AMBA_VENDOR_ST = 0x80,
 	AMBA_VENDOR_QCOM = 0x51,
 	AMBA_VENDOR_LSI = 0xb6,
+	AMBA_VENDOR_LINUX = 0xfe,	/* This value is not official */
 };
 
+/* This is used to generate pseudo-ID for AMBA device */
+#define AMBA_LINUX_ID(conf, rev, part) \
+	(((conf) & 0xff) << 24 | ((rev) & 0xf) << 20 | \
+	AMBA_VENDOR_LINUX << 12 | ((part) & 0xfff))
+
 extern struct bus_type amba_bustype;
 
 #define to_amba_device(d)	container_of(d, struct amba_device, dev)
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index d76a19b..ad0965e 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -104,6 +104,15 @@
 #define UART01x_FR_CTS 		0x001
 #define UART01x_FR_TMSK		(UART01x_FR_TXFF + UART01x_FR_BUSY)
 
+/*
+ * Some bits of Flag Register on ZTE device have different position from
+ * standard ones.
+ */
+#define ZX_UART01x_FR_BUSY	0x100
+#define ZX_UART01x_FR_DSR	0x008
+#define ZX_UART01x_FR_CTS	0x002
+#define ZX_UART011_FR_RI	0x001
+
 #define UART011_CR_CTSEN	0x8000	/* CTS hardware flow control */
 #define UART011_CR_RTSEN	0x4000	/* RTS hardware flow control */
 #define UART011_CR_OUT2		0x2000	/* OUT2 */
diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h
index 5a4d664..bd25605 100644
--- a/include/linux/atmel_serial.h
+++ b/include/linux/atmel_serial.h
@@ -118,6 +118,8 @@
 
 #define ATMEL_US_BRGR		0x20	/* Baud Rate Generator Register */
 #define	ATMEL_US_CD		GENMASK(15, 0)	/* Clock Divider */
+#define ATMEL_US_FP_OFFSET	16	/* Fractional Part */
+#define ATMEL_US_FP_MASK	0x7
 
 #define ATMEL_US_RTOR		0x24	/* Receiver Time-out Register for USART */
 #define ATMEL_UA_RTOR		0x28	/* Receiver Time-out Register for UART */
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 3db25df..8eeedb2 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -205,6 +205,9 @@
 #define  BCMA_PKG_ID_BCM4709	0
 #define BCMA_CHIP_ID_BCM47094	53030
 #define BCMA_CHIP_ID_BCM53018	53018
+#define BCMA_CHIP_ID_BCM53573	53573
+#define  BCMA_PKG_ID_BCM53573	0
+#define  BCMA_PKG_ID_BCM47189	1
 
 /* Board types (on PCI usually equals to the subsystem dev id) */
 /* BCM4313 */
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
index ebd5c1f..9986f82 100644
--- a/include/linux/bcma/bcma_regs.h
+++ b/include/linux/bcma/bcma_regs.h
@@ -10,6 +10,7 @@
 #define  BCMA_CLKCTLST_HAVEALPREQ	0x00000008 /* ALP available request */
 #define  BCMA_CLKCTLST_HAVEHTREQ	0x00000010 /* HT available request */
 #define  BCMA_CLKCTLST_HWCROFF		0x00000020 /* Force HW clock request off */
+#define  BCMA_CLKCTLST_HQCLKREQ		0x00000040 /* HQ Clock */
 #define  BCMA_CLKCTLST_EXTRESREQ	0x00000700 /* Mask of external resource requests */
 #define  BCMA_CLKCTLST_EXTRESREQ_SHIFT	8
 #define  BCMA_CLKCTLST_HAVEALP		0x00010000 /* ALP available */
@@ -23,6 +24,7 @@
 #define  BCMA_CLKCTLST_4328A0_HAVEALP	0x00020000 /* 4328a0 has reversed bits */
 
 /* Agent registers (common for every core) */
+#define BCMA_OOB_SEL_OUT_A30		0x0100
 #define BCMA_IOCTL			0x0408 /* IO control */
 #define  BCMA_IOCTL_CLK			0x0001
 #define  BCMA_IOCTL_FGC			0x0002
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 59ffaa6..23ddf4b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -71,7 +71,8 @@
 {
 	if (bio &&
 	    bio->bi_iter.bi_size &&
-	    bio_op(bio) != REQ_OP_DISCARD)
+	    bio_op(bio) != REQ_OP_DISCARD &&
+	    bio_op(bio) != REQ_OP_SECURE_ERASE)
 		return true;
 
 	return false;
@@ -79,7 +80,9 @@
 
 static inline bool bio_no_advance_iter(struct bio *bio)
 {
-	return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_WRITE_SAME;
+	return bio_op(bio) == REQ_OP_DISCARD ||
+	       bio_op(bio) == REQ_OP_SECURE_ERASE ||
+	       bio_op(bio) == REQ_OP_WRITE_SAME;
 }
 
 static inline bool bio_is_rw(struct bio *bio)
@@ -199,6 +202,9 @@
 	if (bio_op(bio) == REQ_OP_DISCARD)
 		return 1;
 
+	if (bio_op(bio) == REQ_OP_SECURE_ERASE)
+		return 1;
+
 	if (bio_op(bio) == REQ_OP_WRITE_SAME)
 		return 1;
 
diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h
new file mode 100644
index 0000000..f6505d8
--- /dev/null
+++ b/include/linux/bitfield.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2014 Felix Fietkau <nbd@nbd.name>
+ * Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_BITFIELD_H
+#define _LINUX_BITFIELD_H
+
+#include <linux/bug.h>
+
+/*
+ * Bitfield access macros
+ *
+ * FIELD_{GET,PREP} macros take as first parameter shifted mask
+ * from which they extract the base mask and shift amount.
+ * Mask must be a compilation time constant.
+ *
+ * Example:
+ *
+ *  #define REG_FIELD_A  GENMASK(6, 0)
+ *  #define REG_FIELD_B  BIT(7)
+ *  #define REG_FIELD_C  GENMASK(15, 8)
+ *  #define REG_FIELD_D  GENMASK(31, 16)
+ *
+ * Get:
+ *  a = FIELD_GET(REG_FIELD_A, reg);
+ *  b = FIELD_GET(REG_FIELD_B, reg);
+ *
+ * Set:
+ *  reg = FIELD_PREP(REG_FIELD_A, 1) |
+ *	  FIELD_PREP(REG_FIELD_B, 0) |
+ *	  FIELD_PREP(REG_FIELD_C, c) |
+ *	  FIELD_PREP(REG_FIELD_D, 0x40);
+ *
+ * Modify:
+ *  reg &= ~REG_FIELD_C;
+ *  reg |= FIELD_PREP(REG_FIELD_C, c);
+ */
+
+#define __bf_shf(x) (__builtin_ffsll(x) - 1)
+
+#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx)			\
+	({								\
+		BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask),		\
+				 _pfx "mask is not constant");		\
+		BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero");	\
+		BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ?		\
+				 ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
+				 _pfx "value too large for the field"); \
+		BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull,		\
+				 _pfx "type of reg too small for mask"); \
+		__BUILD_BUG_ON_NOT_POWER_OF_2((_mask) +			\
+					      (1ULL << __bf_shf(_mask))); \
+	})
+
+/**
+ * FIELD_PREP() - prepare a bitfield element
+ * @_mask: shifted mask defining the field's length and position
+ * @_val:  value to put in the field
+ *
+ * FIELD_PREP() masks and shifts up the value.  The result should
+ * be combined with other fields of the bitfield using logical OR.
+ */
+#define FIELD_PREP(_mask, _val)						\
+	({								\
+		__BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");	\
+		((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask);	\
+	})
+
+/**
+ * FIELD_GET() - extract a bitfield element
+ * @_mask: shifted mask defining the field's length and position
+ * @_reg:  32bit value of entire bitfield
+ *
+ * FIELD_GET() extracts the field specified by @_mask from the
+ * bitfield passed in as @_reg by masking and shifting it down.
+ */
+#define FIELD_GET(_mask, _reg)						\
+	({								\
+		__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");	\
+		(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask));	\
+	})
+
+#endif
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 598bc99..3b77588 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -339,6 +339,24 @@
 	return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
 }
 
+/*
+ * bitmap_from_u64 - Check and swap words within u64.
+ *  @mask: source bitmap
+ *  @dst:  destination bitmap
+ *
+ * In 32-bit Big Endian kernel, when using (u32 *)(&val)[*]
+ * to read u64 mask, we will get the wrong word.
+ * That is "(u32 *)(&val)[0]" gets the upper 32 bits,
+ * but we expect the lower 32-bits of u64.
+ */
+static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
+{
+	dst[0] = mask & ULONG_MAX;
+
+	if (sizeof(mask) > sizeof(unsigned long))
+		dst[1] = mask >> 32;
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __LINUX_BITMAP_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2c210b6..e79055c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -882,7 +882,7 @@
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 						     int op)
 {
-	if (unlikely(op == REQ_OP_DISCARD))
+	if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE))
 		return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
 	if (unlikely(op == REQ_OP_WRITE_SAME))
@@ -913,7 +913,9 @@
 	if (unlikely(rq->cmd_type != REQ_TYPE_FS))
 		return q->limits.max_hw_sectors;
 
-	if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD))
+	if (!q->limits.chunk_sectors ||
+	    req_op(rq) == REQ_OP_DISCARD ||
+	    req_op(rq) == REQ_OP_SECURE_ERASE)
 		return blk_queue_get_max_sectors(q, req_op(rq));
 
 	return min(blk_max_size_offset(q, offset),
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1113423..c201017 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -96,6 +96,7 @@
 struct bpf_func_proto {
 	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 	bool gpl_only;
+	bool pkt_access;
 	enum bpf_return_type ret_type;
 	enum bpf_arg_type arg1_type;
 	enum bpf_arg_type arg2_type;
@@ -138,6 +139,13 @@
 	 */
 	PTR_TO_PACKET,
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+
+	/* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map
+	 * elem value.  We only allow this if we can statically verify that
+	 * access from this register are going to fall within the size of the
+	 * map element.
+	 */
+	PTR_TO_MAP_VALUE_ADJ,
 };
 
 struct bpf_prog;
@@ -151,7 +159,8 @@
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type,
 				enum bpf_reg_type *reg_type);
-
+	int (*gen_prologue)(struct bpf_insn *insn, bool direct_write,
+			    const struct bpf_prog *prog);
 	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
 				  int src_reg, int ctx_off,
 				  struct bpf_insn *insn, struct bpf_prog *prog);
@@ -297,6 +306,10 @@
 static inline void bpf_prog_put(struct bpf_prog *prog)
 {
 }
+static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 /* verifier prototypes for helper functions called from eBPF programs */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
new file mode 100644
index 0000000..7035b99
--- /dev/null
+++ b/include/linux/bpf_verifier.h
@@ -0,0 +1,102 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_BPF_VERIFIER_H
+#define _LINUX_BPF_VERIFIER_H 1
+
+#include <linux/bpf.h> /* for enum bpf_reg_type */
+#include <linux/filter.h> /* for MAX_BPF_STACK */
+
+ /* Just some arbitrary values so we can safely do math without overflowing and
+  * are obviously wrong for any sort of memory access.
+  */
+#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024)
+#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024)
+
+struct bpf_reg_state {
+	enum bpf_reg_type type;
+	/*
+	 * Used to determine if any memory access using this register will
+	 * result in a bad access.
+	 */
+	u64 min_value, max_value;
+	union {
+		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
+		s64 imm;
+
+		/* valid when type == PTR_TO_PACKET* */
+		struct {
+			u32 id;
+			u16 off;
+			u16 range;
+		};
+
+		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
+		 *   PTR_TO_MAP_VALUE_OR_NULL
+		 */
+		struct bpf_map *map_ptr;
+	};
+};
+
+enum bpf_stack_slot_type {
+	STACK_INVALID,    /* nothing was stored in this stack slot */
+	STACK_SPILL,      /* register spilled into stack */
+	STACK_MISC	  /* BPF program wrote some data into this slot */
+};
+
+#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
+
+/* state of the program:
+ * type of all registers and stack info
+ */
+struct bpf_verifier_state {
+	struct bpf_reg_state regs[MAX_BPF_REG];
+	u8 stack_slot_type[MAX_BPF_STACK];
+	struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
+};
+
+/* linked list of verifier states used to prune search */
+struct bpf_verifier_state_list {
+	struct bpf_verifier_state state;
+	struct bpf_verifier_state_list *next;
+};
+
+struct bpf_insn_aux_data {
+	enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
+};
+
+#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+
+struct bpf_verifier_env;
+struct bpf_ext_analyzer_ops {
+	int (*insn_hook)(struct bpf_verifier_env *env,
+			 int insn_idx, int prev_insn_idx);
+};
+
+/* single container for all structs
+ * one verifier_env per bpf_check() call
+ */
+struct bpf_verifier_env {
+	struct bpf_prog *prog;		/* eBPF program being verified */
+	struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
+	int stack_size;			/* number of states to be processed */
+	struct bpf_verifier_state cur_state; /* current verifier state */
+	struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
+	const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */
+	void *analyzer_priv; /* pointer to external analyzer's private data */
+	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+	u32 used_map_cnt;		/* number of used maps */
+	u32 id_gen;			/* used to generate unique reg IDs */
+	bool allow_ptr_leaks;
+	bool seen_direct_write;
+	bool varlen_map_value_access;
+	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
+};
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+		 void *priv);
+
+#endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/include/linux/bug.h b/include/linux/bug.h
index e51b070..292d6a1 100644
--- a/include/linux/bug.h
+++ b/include/linux/bug.h
@@ -13,6 +13,7 @@
 struct pt_regs;
 
 #ifdef __CHECKER__
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0)
 #define BUILD_BUG_ON_ZERO(e) (0)
 #define BUILD_BUG_ON_NULL(e) ((void*)0)
@@ -24,6 +25,8 @@
 #else /* __CHECKER__ */
 
 /* Force a compilation error if a constant expression is not a power of 2 */
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n)	\
+	BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
 #define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
 	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
 
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 701b64a..89b65b82 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -74,7 +74,8 @@
 		  "Attempted to advance past end of bvec iter\n");
 
 	while (bytes) {
-		unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+		unsigned iter_len = bvec_iter_len(bv, *iter);
+		unsigned len = min(bytes, iter_len);
 
 		bytes -= len;
 		iter->bi_size -= len;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 5261751..5f52709 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -32,6 +32,7 @@
  * CAN common private data
  */
 struct can_priv {
+	struct net_device *dev;
 	struct can_device_stats can_stats;
 
 	struct can_bittiming bittiming, data_bittiming;
@@ -47,7 +48,7 @@
 	u32 ctrlmode_static;	/* static enabled options for driver/hardware */
 
 	int restart_ms;
-	struct timer_list restart_timer;
+	struct delayed_work restart_work;
 
 	int (*do_set_bittiming)(struct net_device *dev);
 	int (*do_set_data_bittiming)(struct net_device *dev);
diff --git a/include/linux/cec-funcs.h b/include/linux/cec-funcs.h
index 82c3d3b..138bbf7 100644
--- a/include/linux/cec-funcs.h
+++ b/include/linux/cec-funcs.h
@@ -162,10 +162,11 @@
 
 
 /* One Touch Record Feature */
-static inline void cec_msg_record_off(struct cec_msg *msg)
+static inline void cec_msg_record_off(struct cec_msg *msg, bool reply)
 {
 	msg->len = 2;
 	msg->msg[1] = CEC_MSG_RECORD_OFF;
+	msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
 }
 
 struct cec_op_arib_data {
@@ -227,7 +228,7 @@
 	if (digital->service_id_method == CEC_OP_SERVICE_ID_METHOD_BY_CHANNEL) {
 		*msg++ = (digital->channel.channel_number_fmt << 2) |
 			 (digital->channel.major >> 8);
-		*msg++ = digital->channel.major && 0xff;
+		*msg++ = digital->channel.major & 0xff;
 		*msg++ = digital->channel.minor >> 8;
 		*msg++ = digital->channel.minor & 0xff;
 		*msg++ = 0;
@@ -323,6 +324,7 @@
 }
 
 static inline void cec_msg_record_on(struct cec_msg *msg,
+				     bool reply,
 				     const struct cec_op_record_src *rec_src)
 {
 	switch (rec_src->type) {
@@ -346,6 +348,7 @@
 					    rec_src->ext_phys_addr.phys_addr);
 		break;
 	}
+	msg->reply = reply ? CEC_MSG_RECORD_STATUS : 0;
 }
 
 static inline void cec_ops_record_on(const struct cec_msg *msg,
@@ -1141,6 +1144,75 @@
 	msg->reply = reply ? CEC_MSG_DEVICE_VENDOR_ID : 0;
 }
 
+static inline void cec_msg_vendor_command(struct cec_msg *msg,
+					  __u8 size, const __u8 *vendor_cmd)
+{
+	if (size > 14)
+		size = 14;
+	msg->len = 2 + size;
+	msg->msg[1] = CEC_MSG_VENDOR_COMMAND;
+	memcpy(msg->msg + 2, vendor_cmd, size);
+}
+
+static inline void cec_ops_vendor_command(const struct cec_msg *msg,
+					  __u8 *size,
+					  const __u8 **vendor_cmd)
+{
+	*size = msg->len - 2;
+
+	if (*size > 14)
+		*size = 14;
+	*vendor_cmd = msg->msg + 2;
+}
+
+static inline void cec_msg_vendor_command_with_id(struct cec_msg *msg,
+						  __u32 vendor_id, __u8 size,
+						  const __u8 *vendor_cmd)
+{
+	if (size > 11)
+		size = 11;
+	msg->len = 5 + size;
+	msg->msg[1] = CEC_MSG_VENDOR_COMMAND_WITH_ID;
+	msg->msg[2] = vendor_id >> 16;
+	msg->msg[3] = (vendor_id >> 8) & 0xff;
+	msg->msg[4] = vendor_id & 0xff;
+	memcpy(msg->msg + 5, vendor_cmd, size);
+}
+
+static inline void cec_ops_vendor_command_with_id(const struct cec_msg *msg,
+						  __u32 *vendor_id,  __u8 *size,
+						  const __u8 **vendor_cmd)
+{
+	*size = msg->len - 5;
+
+	if (*size > 11)
+		*size = 11;
+	*vendor_id = (msg->msg[2] << 16) | (msg->msg[3] << 8) | msg->msg[4];
+	*vendor_cmd = msg->msg + 5;
+}
+
+static inline void cec_msg_vendor_remote_button_down(struct cec_msg *msg,
+						     __u8 size,
+						     const __u8 *rc_code)
+{
+	if (size > 14)
+		size = 14;
+	msg->len = 2 + size;
+	msg->msg[1] = CEC_MSG_VENDOR_REMOTE_BUTTON_DOWN;
+	memcpy(msg->msg + 2, rc_code, size);
+}
+
+static inline void cec_ops_vendor_remote_button_down(const struct cec_msg *msg,
+						     __u8 *size,
+						     const __u8 **rc_code)
+{
+	*size = msg->len - 2;
+
+	if (*size > 14)
+		*size = 14;
+	*rc_code = msg->msg + 2;
+}
+
 static inline void cec_msg_vendor_remote_button_up(struct cec_msg *msg)
 {
 	msg->len = 2;
@@ -1277,7 +1349,7 @@
 		msg->len += 4;
 		msg->msg[3] = (ui_cmd->channel_identifier.channel_number_fmt << 2) |
 			      (ui_cmd->channel_identifier.major >> 8);
-		msg->msg[4] = ui_cmd->channel_identifier.major && 0xff;
+		msg->msg[4] = ui_cmd->channel_identifier.major & 0xff;
 		msg->msg[5] = ui_cmd->channel_identifier.minor >> 8;
 		msg->msg[6] = ui_cmd->channel_identifier.minor & 0xff;
 		break;
diff --git a/include/linux/cec.h b/include/linux/cec.h
index b3e2289..851968e 100644
--- a/include/linux/cec.h
+++ b/include/linux/cec.h
@@ -364,7 +364,7 @@
  * @num_log_addrs: how many logical addresses should be claimed. Set by the
  *	caller.
  * @vendor_id: the vendor ID of the device. Set by the caller.
- * @flags: set to 0.
+ * @flags: flags.
  * @osd_name: the OSD name of the device. Set by the caller.
  * @primary_device_type: the primary device type for each logical address.
  *	Set by the caller.
@@ -389,6 +389,9 @@
 	__u8 features[CEC_MAX_LOG_ADDRS][12];
 };
 
+/* Allow a fallback to unregistered */
+#define CEC_LOG_ADDRS_FL_ALLOW_UNREG_FALLBACK	(1 << 0)
+
 /* Events */
 
 /* Event that occurs when the adapter state changes */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 984f73b..a4414a1 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -497,6 +497,23 @@
 	return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
 }
 
+/**
+ * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
+ * @task: the task to be tested
+ * @ancestor: possible ancestor of @task's cgroup
+ *
+ * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
+ * It follows all the same rules as cgroup_is_descendant, and only applies
+ * to the default hierarchy.
+ */
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+					       struct cgroup *ancestor)
+{
+	struct css_set *cset = task_css_set(task);
+
+	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
+}
+
 /* no synchronization, the result can only be used as a hint */
 static inline bool cgroup_is_populated(struct cgroup *cgrp)
 {
@@ -557,6 +574,7 @@
 #else /* !CONFIG_CGROUPS */
 
 struct cgroup_subsys_state;
+struct cgroup;
 
 static inline void css_put(struct cgroup_subsys_state *css) {}
 static inline int cgroup_attach_task_all(struct task_struct *from,
@@ -574,6 +592,11 @@
 static inline int cgroup_init_early(void) { return 0; }
 static inline int cgroup_init(void) { return 0; }
 
+static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
+					       struct cgroup *ancestor)
+{
+	return true;
+}
 #endif /* !CONFIG_CGROUPS */
 
 /*
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index a39c0c5..af59638 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -772,7 +772,7 @@
 };
 
 struct clk_hw_onecell_data {
-	size_t num;
+	unsigned int num;
 	struct clk_hw *hws[];
 };
 
@@ -780,6 +780,18 @@
 
 #define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn)
 
+/*
+ * Use this macro when you have a driver that requires two initialization
+ * routines, one at of_clk_init(), and one at platform device probe
+ */
+#define CLK_OF_DECLARE_DRIVER(name, compat, fn) \
+	static void name##_of_clk_init_driver(struct device_node *np)	\
+	{								\
+		of_node_clear_flag(np, OF_POPULATED);			\
+		fn(np);							\
+	}								\
+	OF_DECLARE_1(clk, name, compat, name##_of_clk_init_driver)
+
 #ifdef CONFIG_OF
 int of_clk_add_provider(struct device_node *np,
 			struct clk *(*clk_src_get)(struct of_phandle_args *args,
@@ -842,7 +854,7 @@
 {
 	return ERR_PTR(-ENOENT);
 }
-static inline int of_clk_get_parent_count(struct device_node *np)
+static inline unsigned int of_clk_get_parent_count(struct device_node *np)
 {
 	return 0;
 }
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index e294939..573c5a1 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -158,7 +158,7 @@
 #define __compiler_offsetof(a, b)					\
 	__builtin_offsetof(a, b)
 
-#if GCC_VERSION >= 40100 && GCC_VERSION < 40600
+#if GCC_VERSION >= 40100
 # define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 #endif
 
@@ -242,7 +242,11 @@
  */
 #define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
 
-#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+/*
+ * sparse (__CHECKER__) pretends to be gcc, but can't do constant
+ * folding in __builtin_bswap*() (yet), so don't set these for it.
+ */
+#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__)
 #if GCC_VERSION >= 40400
 #define __HAVE_BUILTIN_BSWAP32__
 #define __HAVE_BUILTIN_BSWAP64__
@@ -250,7 +254,7 @@
 #if GCC_VERSION >= 40800
 #define __HAVE_BUILTIN_BSWAP16__
 #endif
-#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */
 
 #if GCC_VERSION >= 50000
 #define KASAN_ABI_VERSION 4
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 1bb9548..6685698 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -527,13 +527,14 @@
  * object's lifetime is managed by something other than RCU.  That
  * "something other" might be reference counting or simple immortality.
  *
- * The seemingly unused void * variable is to validate @p is indeed a pointer
- * type. All pointer types silently cast to void *.
+ * The seemingly unused variable ___typecheck_p validates that @p is
+ * indeed a pointer type by using a pointer to typeof(*p) as the type.
+ * Taking a pointer to typeof(*p) again is needed in case p is void *.
  */
 #define lockless_dereference(p) \
 ({ \
 	typeof(p) _________p1 = READ_ONCE(p); \
-	__maybe_unused const void * const _________p2 = _________p1; \
+	typeof(*(p)) *___typecheck_p __maybe_unused; \
 	smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
 	(_________p1); \
 })
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 385d62e..2a5982c 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -232,8 +232,9 @@
 	int (*cpu_id)(struct coresight_device *csdev);
 	int (*trace_id)(struct coresight_device *csdev);
 	int (*enable)(struct coresight_device *csdev,
-		      struct perf_event_attr *attr,  u32 mode);
-	void (*disable)(struct coresight_device *csdev);
+		      struct perf_event *event,  u32 mode);
+	void (*disable)(struct coresight_device *csdev,
+			struct perf_event *event);
 };
 
 struct coresight_ops {
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 797d9c8..7572d9e 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -61,17 +61,8 @@
 #define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
 #define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
-#define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
-					* not handling interrupts, soon dead.
-					* Called on the dying cpu, interrupts
-					* are already disabled. Must not
-					* sleep, must not fail */
 #define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
 					* lock is dropped */
-#define CPU_STARTING		0x000A /* CPU (unsigned)v soon running.
-					* Called on the new cpu, just before
-					* enabling interrupts. Must not sleep,
-					* must not fail */
 #define CPU_BROKEN		0x000B /* CPU (unsigned)v did not die properly,
 					* perhaps due to preemption. */
 
@@ -86,9 +77,6 @@
 #define CPU_DOWN_PREPARE_FROZEN	(CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
 #define CPU_DOWN_FAILED_FROZEN	(CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
 #define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
-#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
-#define CPU_STARTING_FROZEN	(CPU_STARTING | CPU_TASKS_FROZEN)
-
 
 #ifdef CONFIG_SMP
 extern bool cpuhp_tasks_frozen;
@@ -228,7 +216,11 @@
 #endif		/* CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_PM_SLEEP_SMP
-extern int disable_nonboot_cpus(void);
+extern int freeze_secondary_cpus(int primary);
+static inline int disable_nonboot_cpus(void)
+{
+	return freeze_secondary_cpus(0);
+}
 extern void enable_nonboot_cpus(void);
 #else /* !CONFIG_PM_SLEEP_SMP */
 static inline int disable_nonboot_cpus(void) { return 0; }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 242bf53..7b6c446 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -1,6 +1,8 @@
 #ifndef __CPUHOTPLUG_H
 #define __CPUHOTPLUG_H
 
+#include <linux/types.h>
+
 enum cpuhp_state {
 	CPUHP_OFFLINE,
 	CPUHP_CREATE_THREADS,
@@ -14,15 +16,40 @@
 	CPUHP_PERF_SUPERH,
 	CPUHP_X86_HPET_DEAD,
 	CPUHP_X86_APB_DEAD,
+	CPUHP_VIRT_NET_DEAD,
+	CPUHP_SLUB_DEAD,
+	CPUHP_MM_WRITEBACK_DEAD,
+	CPUHP_SOFTIRQ_DEAD,
+	CPUHP_NET_MVNETA_DEAD,
+	CPUHP_CPUIDLE_DEAD,
+	CPUHP_ARM64_FPSIMD_DEAD,
+	CPUHP_ARM_OMAP_WAKE_DEAD,
+	CPUHP_IRQ_POLL_DEAD,
+	CPUHP_BLOCK_SOFTIRQ_DEAD,
+	CPUHP_VIRT_SCSI_DEAD,
+	CPUHP_ACPI_CPUDRV_DEAD,
+	CPUHP_S390_PFAULT_DEAD,
+	CPUHP_BLK_MQ_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
 	CPUHP_PROFILE_PREPARE,
 	CPUHP_X2APIC_PREPARE,
 	CPUHP_SMPCFD_PREPARE,
+	CPUHP_RELAY_PREPARE,
+	CPUHP_SLAB_PREPARE,
+	CPUHP_MD_RAID5_PREPARE,
 	CPUHP_RCUTREE_PREP,
+	CPUHP_CPUIDLE_COUPLED_PREPARE,
+	CPUHP_POWERPC_PMAC_PREPARE,
+	CPUHP_POWERPC_MMU_CTX_PREPARE,
 	CPUHP_NOTIFY_PREPARE,
+	CPUHP_ARM_SHMOBILE_SCU_PREPARE,
+	CPUHP_SH_SH3X_PREPARE,
+	CPUHP_BLK_MQ_PREPARE,
 	CPUHP_TIMERS_DEAD,
+	CPUHP_NOTF_ERR_INJ_PREPARE,
+	CPUHP_MIPS_SOC_PREPARE,
 	CPUHP_BRINGUP_CPU,
 	CPUHP_AP_IDLE_DEAD,
 	CPUHP_AP_OFFLINE,
@@ -45,6 +72,8 @@
 	CPUHP_AP_PERF_METAG_STARTING,
 	CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
 	CPUHP_AP_ARM_VFP_STARTING,
+	CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
+	CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
 	CPUHP_AP_PERF_ARM_STARTING,
 	CPUHP_AP_ARM_L2X0_STARTING,
 	CPUHP_AP_ARM_ARCH_TIMER_STARTING,
@@ -68,7 +97,6 @@
 	CPUHP_AP_ARM64_ISNDEP_STARTING,
 	CPUHP_AP_SMPCFD_DYING,
 	CPUHP_AP_X86_TBOOT_DYING,
-	CPUHP_AP_NOTIFY_STARTING,
 	CPUHP_AP_ONLINE,
 	CPUHP_TEARDOWN_CPU,
 	CPUHP_AP_ONLINE_IDLE,
@@ -99,7 +127,7 @@
 
 int __cpuhp_setup_state(enum cpuhp_state state,	const char *name, bool invoke,
 			int (*startup)(unsigned int cpu),
-			int (*teardown)(unsigned int cpu));
+			int (*teardown)(unsigned int cpu), bool multi_instance);
 
 /**
  * cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
@@ -116,7 +144,7 @@
 				    int (*startup)(unsigned int cpu),
 				    int (*teardown)(unsigned int cpu))
 {
-	return __cpuhp_setup_state(state, name, true, startup, teardown);
+	return __cpuhp_setup_state(state, name, true, startup, teardown, false);
 }
 
 /**
@@ -135,7 +163,66 @@
 					    int (*startup)(unsigned int cpu),
 					    int (*teardown)(unsigned int cpu))
 {
-	return __cpuhp_setup_state(state, name, false, startup, teardown);
+	return __cpuhp_setup_state(state, name, false, startup, teardown,
+				   false);
+}
+
+/**
+ * cpuhp_setup_state_multi - Add callbacks for multi state
+ * @state:	The state for which the calls are installed
+ * @name:	Name of the callback.
+ * @startup:	startup callback function
+ * @teardown:	teardown callback function
+ *
+ * Sets the internal multi_instance flag and prepares a state to work as a multi
+ * instance callback. No callbacks are invoked at this point. The callbacks are
+ * invoked once an instance for this state are registered via
+ * @cpuhp_state_add_instance or @cpuhp_state_add_instance_nocalls.
+ */
+static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
+					  const char *name,
+					  int (*startup)(unsigned int cpu,
+							 struct hlist_node *node),
+					  int (*teardown)(unsigned int cpu,
+							  struct hlist_node *node))
+{
+	return __cpuhp_setup_state(state, name, false,
+				   (void *) startup,
+				   (void *) teardown, true);
+}
+
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+			       bool invoke);
+
+/**
+ * cpuhp_state_add_instance - Add an instance for a state and invoke startup
+ *                            callback.
+ * @state:	The state for which the instance is installed
+ * @node:	The node for this individual state.
+ *
+ * Installs the instance for the @state and invokes the startup callback on
+ * the present cpus which have already reached the @state. The @state must have
+ * been earlier marked as multi-instance by @cpuhp_setup_state_multi.
+ */
+static inline int cpuhp_state_add_instance(enum cpuhp_state state,
+					   struct hlist_node *node)
+{
+	return __cpuhp_state_add_instance(state, node, true);
+}
+
+/**
+ * cpuhp_state_add_instance_nocalls - Add an instance for a state without
+ *                                    invoking the startup callback.
+ * @state:	The state for which the instance is installed
+ * @node:	The node for this individual state.
+ *
+ * Installs the instance for the @state The @state must have been earlier
+ * marked as multi-instance by @cpuhp_setup_state_multi.
+ */
+static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
+						   struct hlist_node *node)
+{
+	return __cpuhp_state_add_instance(state, node, false);
 }
 
 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
@@ -162,6 +249,51 @@
 	__cpuhp_remove_state(state, false);
 }
 
+/**
+ * cpuhp_remove_multi_state - Remove hotplug multi state callback
+ * @state:	The state for which the calls are removed
+ *
+ * Removes the callback functions from a multi state. This is the reverse of
+ * cpuhp_setup_state_multi(). All instances should have been removed before
+ * invoking this function.
+ */
+static inline void cpuhp_remove_multi_state(enum cpuhp_state state)
+{
+	__cpuhp_remove_state(state, false);
+}
+
+int __cpuhp_state_remove_instance(enum cpuhp_state state,
+				  struct hlist_node *node, bool invoke);
+
+/**
+ * cpuhp_state_remove_instance - Remove hotplug instance from state and invoke
+ *                               the teardown callback
+ * @state:	The state from which the instance is removed
+ * @node:	The node for this individual state.
+ *
+ * Removes the instance and invokes the teardown callback on the present cpus
+ * which have already reached the @state.
+ */
+static inline int cpuhp_state_remove_instance(enum cpuhp_state state,
+					      struct hlist_node *node)
+{
+	return __cpuhp_state_remove_instance(state, node, true);
+}
+
+/**
+ * cpuhp_state_remove_instance_nocalls - Remove hotplug instance from state
+ *					 without invoking the reatdown callback
+ * @state:	The state from which the instance is removed
+ * @node:	The node for this individual state.
+ *
+ * Removes the instance without invoking the teardown callback.
+ */
+static inline int cpuhp_state_remove_instance_nocalls(enum cpuhp_state state,
+						      struct hlist_node *node)
+{
+	return __cpuhp_state_remove_instance(state, node, false);
+}
+
 #ifdef CONFIG_SMP
 void cpuhp_online_idle(enum cpuhp_state state);
 #else
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 1438e23..4d3f0d1 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -45,6 +45,23 @@
 
 extern struct srcu_struct debugfs_srcu;
 
+/**
+ * debugfs_real_fops - getter for the real file operation
+ * @filp: a pointer to a struct file
+ *
+ * Must only be called under the protection established by
+ * debugfs_use_file_start().
+ */
+static inline const struct file_operations *debugfs_real_fops(struct file *filp)
+	__must_hold(&debugfs_srcu)
+{
+	/*
+	 * Neither the pointer to the struct file_operations, nor its
+	 * contents ever change -- srcu_dereference() is not needed here.
+	 */
+	return filp->f_path.dentry->d_fsdata;
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 struct dentry *debugfs_create_file(const char *name, umode_t mode,
diff --git a/include/linux/devfreq-event.h b/include/linux/devfreq-event.h
index 0a83a1e..4db00b0 100644
--- a/include/linux/devfreq-event.h
+++ b/include/linux/devfreq-event.h
@@ -148,11 +148,6 @@
 	return -EINVAL;
 }
 
-static inline void *devfreq_event_get_drvdata(struct devfreq_event_dev *edev)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 static inline struct devfreq_event_dev *devfreq_event_get_edev_by_phandle(
 					struct device *dev, int index)
 {
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 66533e1..dc69df0 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -718,7 +718,7 @@
 #define dma_mmap_writecombine dma_mmap_wc
 #endif
 
-#ifdef CONFIG_NEED_DMA_MAP_STATE
+#if defined(CONFIG_NEED_DMA_MAP_STATE) || defined(CONFIG_DMA_API_DEBUG)
 #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME)        dma_addr_t ADDR_NAME
 #define DEFINE_DMA_UNMAP_LEN(LEN_NAME)          __u32 LEN_NAME
 #define dma_unmap_addr(PTR, ADDR_NAME)           ((PTR)->ADDR_NAME)
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index f2e538a..ccfd0c3 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -40,8 +40,13 @@
 };
 
 /* Export to the platform drivers */
+#if IS_ENABLED(CONFIG_DW_DMAC_CORE)
 int dw_dma_probe(struct dw_dma_chip *chip);
 int dw_dma_remove(struct dw_dma_chip *chip);
+#else
+static inline int dw_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; }
+static inline int dw_dma_remove(struct dw_dma_chip *chip) { return 0; }
+#endif /* CONFIG_DW_DMAC_CORE */
 
 /* DMA API extensions */
 struct dw_desc;
diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h
index aaff68e..197eec6 100644
--- a/include/linux/dma/hsu.h
+++ b/include/linux/dma/hsu.h
@@ -41,8 +41,7 @@
 /* Export to the internal users */
 int hsu_dma_get_status(struct hsu_dma_chip *chip, unsigned short nr,
 		       u32 *status);
-irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr,
-			   u32 status);
+int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr, u32 status);
 
 /* Export to the platform drivers */
 int hsu_dma_probe(struct hsu_dma_chip *chip);
@@ -53,10 +52,10 @@
 {
 	return 0;
 }
-static inline irqreturn_t hsu_dma_do_irq(struct hsu_dma_chip *chip,
-					 unsigned short nr, u32 status)
+static inline int hsu_dma_do_irq(struct hsu_dma_chip *chip, unsigned short nr,
+				 u32 status)
 {
-	return IRQ_NONE;
+	return 0;
 }
 static inline int hsu_dma_probe(struct hsu_dma_chip *chip) { return -ENODEV; }
 static inline int hsu_dma_remove(struct hsu_dma_chip *chip) { return 0; }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 7f5a582..2d08948 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -20,6 +20,7 @@
 #include <linux/ioport.h>
 #include <linux/pfn.h>
 #include <linux/pstore.h>
+#include <linux/range.h>
 #include <linux/reboot.h>
 #include <linux/uuid.h>
 #include <linux/screen_info.h>
@@ -37,6 +38,7 @@
 #define EFI_WRITE_PROTECTED	( 8 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_OUT_OF_RESOURCES	( 9 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_NOT_FOUND		(14 | (1UL << (BITS_PER_LONG-1)))
+#define EFI_ABORTED		(21 | (1UL << (BITS_PER_LONG-1)))
 #define EFI_SECURITY_VIOLATION	(26 | (1UL << (BITS_PER_LONG-1)))
 
 typedef unsigned long efi_status_t;
@@ -118,6 +120,15 @@
 	u32 imagesize;
 } efi_capsule_header_t;
 
+struct efi_boot_memmap {
+	efi_memory_desc_t	**map;
+	unsigned long		*map_size;
+	unsigned long		*desc_size;
+	u32			*desc_ver;
+	unsigned long		*key_ptr;
+	unsigned long		*buff_size;
+};
+
 /*
  * EFI capsule flags
  */
@@ -669,6 +680,18 @@
 	unsigned long tables;
 } efi_system_table_t;
 
+/*
+ * Architecture independent structure for describing a memory map for the
+ * benefit of efi_memmap_init_early(), saving us the need to pass four
+ * parameters.
+ */
+struct efi_memory_map_data {
+	phys_addr_t phys_map;
+	unsigned long size;
+	unsigned long desc_version;
+	unsigned long desc_size;
+};
+
 struct efi_memory_map {
 	phys_addr_t phys_map;
 	void *map;
@@ -676,6 +699,12 @@
 	int nr_map;
 	unsigned long desc_version;
 	unsigned long desc_size;
+	bool late;
+};
+
+struct efi_mem_range {
+	struct range range;
+	u64 attribute;
 };
 
 struct efi_fdt_params {
@@ -900,6 +929,16 @@
 }
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
+
+extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
+extern int __init efi_memmap_init_late(phys_addr_t addr, unsigned long size);
+extern void __init efi_memmap_unmap(void);
+extern int __init efi_memmap_install(phys_addr_t addr, unsigned int nr_map);
+extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
+					 struct range *range);
+extern void __init efi_memmap_insert(struct efi_memory_map *old_memmap,
+				     void *buf, struct efi_mem_range *mem);
+
 extern int efi_config_init(efi_config_table_type_t *arch_tables);
 #ifdef CONFIG_EFI_ESRT
 extern void __init efi_esrt_init(void);
@@ -915,6 +954,7 @@
 extern int __init efi_uart_console_only (void);
 extern u64 efi_mem_desc_end(efi_memory_desc_t *md);
 extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
+extern void efi_mem_reserve(phys_addr_t addr, u64 size);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
 extern void efi_reserve_boot_services(void);
@@ -946,7 +986,7 @@
 /* Iterate through an efi_memory_map */
 #define for_each_efi_memory_desc_in_map(m, md)				   \
 	for ((md) = (m)->map;						   \
-	     ((void *)(md) + (m)->desc_size) <= (m)->map_end;		   \
+	     (md) && ((void *)(md) + (m)->desc_size) <= (m)->map_end;	   \
 	     (md) = (void *)(md) + (m)->desc_size)
 
 /**
@@ -1127,12 +1167,6 @@
 };
 
 struct efivars {
-	/*
-	 * ->lock protects two things:
-	 * 1) efivarfs_list and efivars_sysfs_list
-	 * 2) ->ops calls
-	 */
-	spinlock_t lock;
 	struct kset *kset;
 	struct kobject *kobject;
 	const struct efivar_operations *ops;
@@ -1273,8 +1307,8 @@
 int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
 		void *data, bool duplicates, struct list_head *head);
 
-void efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
-void efivar_entry_remove(struct efivar_entry *entry);
+int efivar_entry_add(struct efivar_entry *entry, struct list_head *head);
+int efivar_entry_remove(struct efivar_entry *entry);
 
 int __efivar_entry_delete(struct efivar_entry *entry);
 int efivar_entry_delete(struct efivar_entry *entry);
@@ -1291,7 +1325,7 @@
 int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
 			  bool block, unsigned long size, void *data);
 
-void efivar_entry_iter_begin(void);
+int efivar_entry_iter_begin(void);
 void efivar_entry_iter_end(void);
 
 int __efivar_entry_iter(int (*func)(struct efivar_entry *, void *),
@@ -1327,7 +1361,6 @@
 
 #ifdef CONFIG_EFI_RUNTIME_MAP
 int efi_runtime_map_init(struct kobject *);
-void efi_runtime_map_setup(void *, int, u32);
 int efi_get_runtime_map_size(void);
 int efi_get_runtime_map_desc_size(void);
 int efi_runtime_map_copy(void *buf, size_t bufsz);
@@ -1337,9 +1370,6 @@
 	return 0;
 }
 
-static inline void
-efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
-
 static inline int efi_get_runtime_map_size(void)
 {
 	return 0;
@@ -1371,11 +1401,7 @@
 			  efi_loaded_image_t *image, int *cmd_line_len);
 
 efi_status_t efi_get_memory_map(efi_system_table_t *sys_table_arg,
-				efi_memory_desc_t **map,
-				unsigned long *map_size,
-				unsigned long *desc_size,
-				u32 *desc_ver,
-				unsigned long *key_ptr);
+				struct efi_boot_memmap *map);
 
 efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg,
 			   unsigned long size, unsigned long align,
@@ -1457,4 +1483,14 @@
 	arch_efi_call_virt_teardown();					\
 })
 
+typedef efi_status_t (*efi_exit_boot_map_processing)(
+	efi_system_table_t *sys_table_arg,
+	struct efi_boot_memmap *map,
+	void *priv);
+
+efi_status_t efi_exit_boot_services(efi_system_table_t *sys_table,
+				    void *handle,
+				    struct efi_boot_memmap *map,
+				    void *priv,
+				    efi_exit_boot_map_processing priv_func);
 #endif /* _LINUX_EFI_H */
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 6100441..b871c0c 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -29,6 +29,15 @@
 #include <linux/device.h>
 
 /*
+ * Define the type of supported external connectors
+ */
+#define EXTCON_TYPE_USB		BIT(0)	/* USB connector */
+#define EXTCON_TYPE_CHG		BIT(1)	/* Charger connector */
+#define EXTCON_TYPE_JACK	BIT(2)	/* Jack connector */
+#define EXTCON_TYPE_DISP	BIT(3)	/* Display connector */
+#define EXTCON_TYPE_MISC	BIT(4)	/* Miscellaneous connector */
+
+/*
  * Define the unique id of supported external connectors
  */
 #define EXTCON_NONE		0
@@ -44,6 +53,7 @@
 #define EXTCON_CHG_USB_ACA	8	/* Accessory Charger Adapter */
 #define EXTCON_CHG_USB_FAST	9
 #define EXTCON_CHG_USB_SLOW	10
+#define EXTCON_CHG_WPT		11	/* Wireless Power Transfer */
 
 /* Jack external connector */
 #define EXTCON_JACK_MICROPHONE	20
@@ -60,6 +70,8 @@
 #define EXTCON_DISP_MHL		41	/* Mobile High-Definition Link */
 #define EXTCON_DISP_DVI		42	/* Digital Visual Interface */
 #define EXTCON_DISP_VGA		43	/* Video Graphics Array */
+#define EXTCON_DISP_DP		44	/* Display Port */
+#define EXTCON_DISP_HMD		45	/* Head-Mounted Display */
 
 /* Miscellaneous external connector */
 #define EXTCON_DOCK		60
@@ -68,6 +80,85 @@
 
 #define EXTCON_NUM		63
 
+/*
+ * Define the property of supported external connectors.
+ *
+ * When adding the new extcon property, they *must* have
+ * the type/value/default information. Also, you *have to*
+ * modify the EXTCON_PROP_[type]_START/END definitions
+ * which mean the range of the supported properties
+ * for each extcon type.
+ *
+ * The naming style of property
+ * : EXTCON_PROP_[type]_[property name]
+ *
+ * EXTCON_PROP_USB_[property name]	: USB property
+ * EXTCON_PROP_CHG_[property name]	: Charger property
+ * EXTCON_PROP_JACK_[property name]	: Jack property
+ * EXTCON_PROP_DISP_[property name]	: Display property
+ */
+
+/*
+ * Properties of EXTCON_TYPE_USB.
+ *
+ * - EXTCON_PROP_USB_VBUS
+ * @type:	integer (intval)
+ * @value:	0 (low) or 1 (high)
+ * @default:	0 (low)
+ * - EXTCON_PROP_USB_TYPEC_POLARITY
+ * @type:	integer (intval)
+ * @value:	0 (normal) or 1 (flip)
+ * @default:	0 (normal)
+ * - EXTCON_PROP_USB_SS (SuperSpeed)
+ * @type:       integer (intval)
+ * @value:      0 (USB/USB2) or 1 (USB3)
+ * @default:    0 (USB/USB2)
+ *
+ */
+#define EXTCON_PROP_USB_VBUS		0
+#define EXTCON_PROP_USB_TYPEC_POLARITY	1
+#define EXTCON_PROP_USB_SS		2
+
+#define EXTCON_PROP_USB_MIN		0
+#define EXTCON_PROP_USB_MAX		2
+#define EXTCON_PROP_USB_CNT	(EXTCON_PROP_USB_MAX - EXTCON_PROP_USB_MIN + 1)
+
+/* Properties of EXTCON_TYPE_CHG. */
+#define EXTCON_PROP_CHG_MIN		50
+#define EXTCON_PROP_CHG_MAX		50
+#define EXTCON_PROP_CHG_CNT	(EXTCON_PROP_CHG_MAX - EXTCON_PROP_CHG_MIN + 1)
+
+/* Properties of EXTCON_TYPE_JACK. */
+#define EXTCON_PROP_JACK_MIN		100
+#define EXTCON_PROP_JACK_MAX		100
+#define EXTCON_PROP_JACK_CNT (EXTCON_PROP_JACK_MAX - EXTCON_PROP_JACK_MIN + 1)
+
+/*
+ * Properties of EXTCON_TYPE_DISP.
+ *
+ * - EXTCON_PROP_DISP_HPD (Hot Plug Detect)
+ * @type:       integer (intval)
+ * @value:      0 (no hpd) or 1 (hpd)
+ * @default:    0 (no hpd)
+ *
+ */
+#define EXTCON_PROP_DISP_HPD		150
+
+/* Properties of EXTCON_TYPE_DISP. */
+#define EXTCON_PROP_DISP_MIN		150
+#define EXTCON_PROP_DISP_MAX		151
+#define EXTCON_PROP_DISP_CNT (EXTCON_PROP_DISP_MAX - EXTCON_PROP_DISP_MIN + 1)
+
+/*
+ * Define the type of property's value.
+ *
+ * Define the property's value as union type. Because each property
+ * would need the different data type to store it.
+ */
+union extcon_property_value {
+	int intval;	/* type : integer (intval) */
+};
+
 struct extcon_cable;
 
 /**
@@ -150,26 +241,42 @@
 extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev);
 
 /*
- * get/set/update_state access the 32b encoded state value, which represents
- * states of all possible cables of the multistate port. For example, if one
- * calls extcon_set_state(edev, 0x7), it may mean that all the three cables
- * are attached to the port.
- */
-static inline u32 extcon_get_state(struct extcon_dev *edev)
-{
-	return edev->state;
-}
-
-extern int extcon_set_state(struct extcon_dev *edev, u32 state);
-extern int extcon_update_state(struct extcon_dev *edev, u32 mask, u32 state);
-
-/*
- * get/set_cable_state access each bit of the 32b encoded state value.
+ * get/set_state access each bit of the 32b encoded state value.
  * They are used to access the status of each cable based on the cable id.
  */
-extern int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id);
-extern int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id,
+extern int extcon_get_state(struct extcon_dev *edev, unsigned int id);
+extern int extcon_set_state(struct extcon_dev *edev, unsigned int id,
 				   bool cable_state);
+extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id,
+				bool cable_state);
+/*
+ * Synchronize the state and property data for a specific external connector.
+ */
+extern int extcon_sync(struct extcon_dev *edev, unsigned int id);
+
+/*
+ * get/set_property access the property value of each external connector.
+ * They are used to access the property of each cable based on the property id.
+ */
+extern int extcon_get_property(struct extcon_dev *edev, unsigned int id,
+				unsigned int prop,
+				union extcon_property_value *prop_val);
+extern int extcon_set_property(struct extcon_dev *edev, unsigned int id,
+				unsigned int prop,
+				union extcon_property_value prop_val);
+extern int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id,
+				unsigned int prop,
+				union extcon_property_value prop_val);
+
+/*
+ * get/set_property_capability set the capability of the property for each
+ * external connector. They are used to set the capability of the property
+ * of each external connector based on the id and property.
+ */
+extern int extcon_get_property_capability(struct extcon_dev *edev,
+				unsigned int id, unsigned int prop);
+extern int extcon_set_property_capability(struct extcon_dev *edev,
+				unsigned int id, unsigned int prop);
 
 /*
  * Following APIs are to monitor every action of a notifier.
@@ -232,30 +339,57 @@
 
 static inline void devm_extcon_dev_free(struct extcon_dev *edev) { }
 
-static inline u32 extcon_get_state(struct extcon_dev *edev)
+
+static inline int extcon_get_state(struct extcon_dev *edev, unsigned int id)
 {
 	return 0;
 }
 
-static inline int extcon_set_state(struct extcon_dev *edev, u32 state)
+static inline int extcon_set_state(struct extcon_dev *edev, unsigned int id,
+				bool cable_state)
 {
 	return 0;
 }
 
-static inline int extcon_update_state(struct extcon_dev *edev, u32 mask,
-				       u32 state)
+static inline int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id,
+				bool cable_state)
 {
 	return 0;
 }
 
-static inline int extcon_get_cable_state_(struct extcon_dev *edev,
-					  unsigned int id)
+static inline int extcon_sync(struct extcon_dev *edev, unsigned int id)
 {
 	return 0;
 }
 
-static inline int extcon_set_cable_state_(struct extcon_dev *edev,
-					  unsigned int id, bool cable_state)
+static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id,
+					unsigned int prop,
+					union extcon_property_value *prop_val)
+{
+	return 0;
+}
+static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id,
+					unsigned int prop,
+					union extcon_property_value prop_val)
+{
+	return 0;
+}
+
+static inline int extcon_set_property_sync(struct extcon_dev *edev,
+					unsigned int id, unsigned int prop,
+					union extcon_property_value prop_val)
+{
+	return 0;
+}
+
+static inline int extcon_get_property_capability(struct extcon_dev *edev,
+					unsigned int id, unsigned int prop)
+{
+	return 0;
+}
+
+static inline int extcon_set_property_capability(struct extcon_dev *edev,
+					unsigned int id, unsigned int prop)
 {
 	return 0;
 }
@@ -320,4 +454,15 @@
 {
 	return -EINVAL;
 }
+
+static inline int extcon_get_cable_state_(struct extcon_dev *edev, unsigned int id)
+{
+	return extcon_get_state(edev, id);
+}
+
+static inline int extcon_set_cable_state_(struct extcon_dev *edev, unsigned int id,
+				   bool cable_state)
+{
+	return extcon_set_state_sync(edev, id, cable_state);
+}
 #endif /* __LINUX_EXTCON_H__ */
diff --git a/include/linux/extcon/extcon-adc-jack.h b/include/linux/extcon/extcon-adc-jack.h
index ac85f20..a0e03b1 100644
--- a/include/linux/extcon/extcon-adc-jack.h
+++ b/include/linux/extcon/extcon-adc-jack.h
@@ -20,8 +20,8 @@
 
 /**
  * struct adc_jack_cond - condition to use an extcon state
- * @state:		the corresponding extcon state (if 0, this struct
  *			denotes the last adc_jack_cond element among the array)
+ * @id:			the unique id of each external connector
  * @min_adc:		min adc value for this condition
  * @max_adc:		max adc value for this condition
  *
@@ -33,7 +33,7 @@
  * because when no adc_jack_cond is met, state = 0 is automatically chosen.
  */
 struct adc_jack_cond {
-	u32 state;	/* extcon state value. 0 if invalid */
+	unsigned int id;
 	u32 min_adc;
 	u32 max_adc;
 };
diff --git a/include/linux/fence.h b/include/linux/fence.h
index 8cc719a6..2ac6fa5 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -49,8 +49,6 @@
  * @timestamp: Timestamp when the fence was signaled.
  * @status: Optional, only valid if < 0, must be set before calling
  * fence_signal, indicates that the fence has completed with an error.
- * @child_list: list of children fences
- * @active_list: list of active fences
  *
  * the flags member must be manipulated and read using the appropriate
  * atomic ops (bit_*), so taking the spinlock will not be needed most
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a16439b..1f09c52 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -314,6 +314,70 @@
 	bpf_size;						\
 })
 
+#define BPF_SIZEOF(type)					\
+	({							\
+		const int __size = bytes_to_bpf_size(sizeof(type)); \
+		BUILD_BUG_ON(__size < 0);			\
+		__size;						\
+	})
+
+#define BPF_FIELD_SIZEOF(type, field)				\
+	({							\
+		const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \
+		BUILD_BUG_ON(__size < 0);			\
+		__size;						\
+	})
+
+#define __BPF_MAP_0(m, v, ...) v
+#define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
+#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
+#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__)
+#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__)
+#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__)
+
+#define __BPF_REG_0(...) __BPF_PAD(5)
+#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4)
+#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3)
+#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2)
+#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1)
+#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__)
+
+#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__)
+#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__)
+
+#define __BPF_CAST(t, a)						       \
+	(__force t)							       \
+	(__force							       \
+	 typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long),      \
+				      (unsigned long)0, (t)0))) a
+#define __BPF_V void
+#define __BPF_N
+
+#define __BPF_DECL_ARGS(t, a) t   a
+#define __BPF_DECL_REGS(t, a) u64 a
+
+#define __BPF_PAD(n)							       \
+	__BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2,       \
+		  u64, __ur_3, u64, __ur_4, u64, __ur_5)
+
+#define BPF_CALL_x(x, name, ...)					       \
+	static __always_inline						       \
+	u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
+	u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));	       \
+	u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))	       \
+	{								       \
+		return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
+	}								       \
+	static __always_inline						       \
+	u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
+
+#define BPF_CALL_0(name, ...)	BPF_CALL_x(0, name, __VA_ARGS__)
+#define BPF_CALL_1(name, ...)	BPF_CALL_x(1, name, __VA_ARGS__)
+#define BPF_CALL_2(name, ...)	BPF_CALL_x(2, name, __VA_ARGS__)
+#define BPF_CALL_3(name, ...)	BPF_CALL_x(3, name, __VA_ARGS__)
+#define BPF_CALL_4(name, ...)	BPF_CALL_x(4, name, __VA_ARGS__)
+#define BPF_CALL_5(name, ...)	BPF_CALL_x(5, name, __VA_ARGS__)
+
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3523bf6..901e25d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -574,6 +574,7 @@
 
 struct posix_acl;
 #define ACL_NOT_CACHED ((void *)(-1))
+#define ACL_DONT_CACHE ((void *)(-3))
 
 static inline struct posix_acl *
 uncached_acl_sentinel(struct task_struct *task)
diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h
index cfa6cde..76cff18 100644
--- a/include/linux/fscrypto.h
+++ b/include/linux/fscrypto.h
@@ -274,8 +274,7 @@
 extern int fscrypt_zeroout_range(struct inode *, pgoff_t, sector_t,
 						unsigned int);
 /* policy.c */
-extern int fscrypt_process_policy(struct inode *,
-					const struct fscrypt_policy *);
+extern int fscrypt_process_policy(struct file *, const struct fscrypt_policy *);
 extern int fscrypt_get_policy(struct inode *, struct fscrypt_policy *);
 extern int fscrypt_has_permitted_context(struct inode *, struct inode *);
 extern int fscrypt_inherit_context(struct inode *, struct inode *,
@@ -345,7 +344,7 @@
 }
 
 /* policy.c */
-static inline int fscrypt_notsupp_process_policy(struct inode *i,
+static inline int fscrypt_notsupp_process_policy(struct file *f,
 				const struct fscrypt_policy *p)
 {
 	return -EOPNOTSUPP;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 58205f3..7268ed0 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -148,6 +148,7 @@
 	#define FS_PRIO_1	1 /* fanotify content based access control */
 	#define FS_PRIO_2	2 /* fanotify pre-content access */
 	unsigned int priority;
+	bool shutdown;		/* group is being shut down, don't queue more events */
 
 	/* stores all fastpath marks assoc with this group so they can be cleaned on unregister */
 	struct mutex mark_mutex;	/* protect marks_list */
@@ -179,7 +180,6 @@
 			spinlock_t access_lock;
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
-			atomic_t bypass_perm;
 #endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
 			int f_flags;
 			unsigned int max_marks;
@@ -292,6 +292,8 @@
 extern void fsnotify_get_group(struct fsnotify_group *group);
 /* drop reference on a group from fsnotify_alloc_group */
 extern void fsnotify_put_group(struct fsnotify_group *group);
+/* group destruction begins, stop queuing new events */
+extern void fsnotify_group_stop_queueing(struct fsnotify_group *group);
 /* destroy group */
 extern void fsnotify_destroy_group(struct fsnotify_group *group);
 /* fasync handler function */
@@ -304,8 +306,6 @@
 			      struct fsnotify_event *event,
 			      int (*merge)(struct list_head *,
 					   struct fsnotify_event *));
-/* Remove passed event from groups notification queue */
-extern void fsnotify_remove_event(struct fsnotify_group *group, struct fsnotify_event *event);
 /* true if the group notification queue is empty */
 extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
 /* return, but do not dequeue the first event on the notification queue */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7d565af..6f93ac4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -795,7 +795,12 @@
 	unsigned long func;
 	unsigned long long calltime;
 	unsigned long long subtime;
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	unsigned long fp;
+#endif
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+	unsigned long *retp;
+#endif
 };
 
 /*
@@ -807,7 +812,10 @@
 
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
-			 unsigned long frame_pointer);
+			 unsigned long frame_pointer, unsigned long *retp);
+
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+				    unsigned long ret, unsigned long *retp);
 
 /*
  * Sometimes we don't want to trace a function with the function
@@ -870,6 +878,13 @@
 	return -1;
 }
 
+static inline unsigned long
+ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret,
+		      unsigned long *retp)
+{
+	return ret;
+}
+
 static inline void pause_graph_tracing(void) { }
 static inline void unpause_graph_tracing(void) { }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index d2ba7d3..1ffbf2a 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -304,6 +304,8 @@
 
 struct tegra_mipi_device *tegra_mipi_request(struct device *device);
 void tegra_mipi_free(struct tegra_mipi_device *device);
+int tegra_mipi_enable(struct tegra_mipi_device *device);
+int tegra_mipi_disable(struct tegra_mipi_device *device);
 int tegra_mipi_calibrate(struct tegra_mipi_device *device);
 
 #endif
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 09354f6..9d2f8bde 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -14,9 +14,341 @@
 #ifndef _HWMON_H_
 #define _HWMON_H_
 
+#include <linux/bitops.h>
+
 struct device;
 struct attribute_group;
 
+enum hwmon_sensor_types {
+	hwmon_chip,
+	hwmon_temp,
+	hwmon_in,
+	hwmon_curr,
+	hwmon_power,
+	hwmon_energy,
+	hwmon_humidity,
+	hwmon_fan,
+	hwmon_pwm,
+};
+
+enum hwmon_chip_attributes {
+	hwmon_chip_temp_reset_history,
+	hwmon_chip_in_reset_history,
+	hwmon_chip_curr_reset_history,
+	hwmon_chip_power_reset_history,
+	hwmon_chip_register_tz,
+	hwmon_chip_update_interval,
+	hwmon_chip_alarms,
+};
+
+#define HWMON_C_TEMP_RESET_HISTORY	BIT(hwmon_chip_temp_reset_history)
+#define HWMON_C_IN_RESET_HISTORY	BIT(hwmon_chip_in_reset_history)
+#define HWMON_C_CURR_RESET_HISTORY	BIT(hwmon_chip_curr_reset_history)
+#define HWMON_C_POWER_RESET_HISTORY	BIT(hwmon_chip_power_reset_history)
+#define HWMON_C_REGISTER_TZ		BIT(hwmon_chip_register_tz)
+#define HWMON_C_UPDATE_INTERVAL		BIT(hwmon_chip_update_interval)
+#define HWMON_C_ALARMS			BIT(hwmon_chip_alarms)
+
+enum hwmon_temp_attributes {
+	hwmon_temp_input = 0,
+	hwmon_temp_type,
+	hwmon_temp_lcrit,
+	hwmon_temp_lcrit_hyst,
+	hwmon_temp_min,
+	hwmon_temp_min_hyst,
+	hwmon_temp_max,
+	hwmon_temp_max_hyst,
+	hwmon_temp_crit,
+	hwmon_temp_crit_hyst,
+	hwmon_temp_emergency,
+	hwmon_temp_emergency_hyst,
+	hwmon_temp_alarm,
+	hwmon_temp_lcrit_alarm,
+	hwmon_temp_min_alarm,
+	hwmon_temp_max_alarm,
+	hwmon_temp_crit_alarm,
+	hwmon_temp_emergency_alarm,
+	hwmon_temp_fault,
+	hwmon_temp_offset,
+	hwmon_temp_label,
+	hwmon_temp_lowest,
+	hwmon_temp_highest,
+	hwmon_temp_reset_history,
+};
+
+#define HWMON_T_INPUT		BIT(hwmon_temp_input)
+#define HWMON_T_TYPE		BIT(hwmon_temp_type)
+#define HWMON_T_LCRIT		BIT(hwmon_temp_lcrit)
+#define HWMON_T_LCRIT_HYST	BIT(hwmon_temp_lcrit_hyst)
+#define HWMON_T_MIN		BIT(hwmon_temp_min)
+#define HWMON_T_MIN_HYST	BIT(hwmon_temp_min_hyst)
+#define HWMON_T_MAX		BIT(hwmon_temp_max)
+#define HWMON_T_MAX_HYST	BIT(hwmon_temp_max_hyst)
+#define HWMON_T_CRIT		BIT(hwmon_temp_crit)
+#define HWMON_T_CRIT_HYST	BIT(hwmon_temp_crit_hyst)
+#define HWMON_T_EMERGENCY	BIT(hwmon_temp_emergency)
+#define HWMON_T_EMERGENCY_HYST	BIT(hwmon_temp_emergency_hyst)
+#define HWMON_T_MIN_ALARM	BIT(hwmon_temp_min_alarm)
+#define HWMON_T_MAX_ALARM	BIT(hwmon_temp_max_alarm)
+#define HWMON_T_CRIT_ALARM	BIT(hwmon_temp_crit_alarm)
+#define HWMON_T_EMERGENCY_ALARM	BIT(hwmon_temp_emergency_alarm)
+#define HWMON_T_FAULT		BIT(hwmon_temp_fault)
+#define HWMON_T_OFFSET		BIT(hwmon_temp_offset)
+#define HWMON_T_LABEL		BIT(hwmon_temp_label)
+#define HWMON_T_LOWEST		BIT(hwmon_temp_lowest)
+#define HWMON_T_HIGHEST		BIT(hwmon_temp_highest)
+#define HWMON_T_RESET_HISTORY	BIT(hwmon_temp_reset_history)
+
+enum hwmon_in_attributes {
+	hwmon_in_input,
+	hwmon_in_min,
+	hwmon_in_max,
+	hwmon_in_lcrit,
+	hwmon_in_crit,
+	hwmon_in_average,
+	hwmon_in_lowest,
+	hwmon_in_highest,
+	hwmon_in_reset_history,
+	hwmon_in_label,
+	hwmon_in_alarm,
+	hwmon_in_min_alarm,
+	hwmon_in_max_alarm,
+	hwmon_in_lcrit_alarm,
+	hwmon_in_crit_alarm,
+};
+
+#define HWMON_I_INPUT		BIT(hwmon_in_input)
+#define HWMON_I_MIN		BIT(hwmon_in_min)
+#define HWMON_I_MAX		BIT(hwmon_in_max)
+#define HWMON_I_LCRIT		BIT(hwmon_in_lcrit)
+#define HWMON_I_CRIT		BIT(hwmon_in_crit)
+#define HWMON_I_AVERAGE		BIT(hwmon_in_average)
+#define HWMON_I_LOWEST		BIT(hwmon_in_lowest)
+#define HWMON_I_HIGHEST		BIT(hwmon_in_highest)
+#define HWMON_I_RESET_HISTORY	BIT(hwmon_in_reset_history)
+#define HWMON_I_LABEL		BIT(hwmon_in_label)
+#define HWMON_I_ALARM		BIT(hwmon_in_alarm)
+#define HWMON_I_MIN_ALARM	BIT(hwmon_in_min_alarm)
+#define HWMON_I_MAX_ALARM	BIT(hwmon_in_max_alarm)
+#define HWMON_I_LCRIT_ALARM	BIT(hwmon_in_lcrit_alarm)
+#define HWMON_I_CRIT_ALARM	BIT(hwmon_in_crit_alarm)
+
+enum hwmon_curr_attributes {
+	hwmon_curr_input,
+	hwmon_curr_min,
+	hwmon_curr_max,
+	hwmon_curr_lcrit,
+	hwmon_curr_crit,
+	hwmon_curr_average,
+	hwmon_curr_lowest,
+	hwmon_curr_highest,
+	hwmon_curr_reset_history,
+	hwmon_curr_label,
+	hwmon_curr_alarm,
+	hwmon_curr_min_alarm,
+	hwmon_curr_max_alarm,
+	hwmon_curr_lcrit_alarm,
+	hwmon_curr_crit_alarm,
+};
+
+#define HWMON_C_INPUT		BIT(hwmon_curr_input)
+#define HWMON_C_MIN		BIT(hwmon_curr_min)
+#define HWMON_C_MAX		BIT(hwmon_curr_max)
+#define HWMON_C_LCRIT		BIT(hwmon_curr_lcrit)
+#define HWMON_C_CRIT		BIT(hwmon_curr_crit)
+#define HWMON_C_AVERAGE		BIT(hwmon_curr_average)
+#define HWMON_C_LOWEST		BIT(hwmon_curr_lowest)
+#define HWMON_C_HIGHEST		BIT(hwmon_curr_highest)
+#define HWMON_C_RESET_HISTORY	BIT(hwmon_curr_reset_history)
+#define HWMON_C_LABEL		BIT(hwmon_curr_label)
+#define HWMON_C_ALARM		BIT(hwmon_curr_alarm)
+#define HWMON_C_MIN_ALARM	BIT(hwmon_curr_min_alarm)
+#define HWMON_C_MAX_ALARM	BIT(hwmon_curr_max_alarm)
+#define HWMON_C_LCRIT_ALARM	BIT(hwmon_curr_lcrit_alarm)
+#define HWMON_C_CRIT_ALARM	BIT(hwmon_curr_crit_alarm)
+
+enum hwmon_power_attributes {
+	hwmon_power_average,
+	hwmon_power_average_interval,
+	hwmon_power_average_interval_max,
+	hwmon_power_average_interval_min,
+	hwmon_power_average_highest,
+	hwmon_power_average_lowest,
+	hwmon_power_average_max,
+	hwmon_power_average_min,
+	hwmon_power_input,
+	hwmon_power_input_highest,
+	hwmon_power_input_lowest,
+	hwmon_power_reset_history,
+	hwmon_power_accuracy,
+	hwmon_power_cap,
+	hwmon_power_cap_hyst,
+	hwmon_power_cap_max,
+	hwmon_power_cap_min,
+	hwmon_power_max,
+	hwmon_power_crit,
+	hwmon_power_label,
+	hwmon_power_alarm,
+	hwmon_power_cap_alarm,
+	hwmon_power_max_alarm,
+	hwmon_power_crit_alarm,
+};
+
+#define HWMON_P_AVERAGE			BIT(hwmon_power_average)
+#define HWMON_P_AVERAGE_INTERVAL	BIT(hwmon_power_average_interval)
+#define HWMON_P_AVERAGE_INTERVAL_MAX	BIT(hwmon_power_average_interval_max)
+#define HWMON_P_AVERAGE_INTERVAL_MIN	BIT(hwmon_power_average_interval_min)
+#define HWMON_P_AVERAGE_HIGHEST		BIT(hwmon_power_average_highest)
+#define HWMON_P_AVERAGE_LOWEST		BIT(hwmon_power_average_lowest)
+#define HWMON_P_AVERAGE_MAX		BIT(hwmon_power_average_max)
+#define HWMON_P_AVERAGE_MIN		BIT(hwmon_power_average_min)
+#define HWMON_P_INPUT			BIT(hwmon_power_input)
+#define HWMON_P_INPUT_HIGHEST		BIT(hwmon_power_input_highest)
+#define HWMON_P_INPUT_LOWEST		BIT(hwmon_power_input_lowest)
+#define HWMON_P_RESET_HISTORY		BIT(hwmon_power_reset_history)
+#define HWMON_P_ACCURACY		BIT(hwmon_power_accuracy)
+#define HWMON_P_CAP			BIT(hwmon_power_cap)
+#define HWMON_P_CAP_HYST		BIT(hwmon_power_cap_hyst)
+#define HWMON_P_CAP_MAX			BIT(hwmon_power_cap_max)
+#define HWMON_P_CAP_MIN			BIT(hwmon_power_cap_min)
+#define HWMON_P_MAX			BIT(hwmon_power_max)
+#define HWMON_P_CRIT			BIT(hwmon_power_crit)
+#define HWMON_P_LABEL			BIT(hwmon_power_label)
+#define HWMON_P_ALARM			BIT(hwmon_power_alarm)
+#define HWMON_P_CAP_ALARM		BIT(hwmon_power_cap_alarm)
+#define HWMON_P_MAX_ALARM		BIT(hwmon_power_max_alarm)
+#define HWMON_P_CRIT_ALARM		BIT(hwmon_power_crit_alarm)
+
+enum hwmon_energy_attributes {
+	hwmon_energy_input,
+	hwmon_energy_label,
+};
+
+#define HWMON_E_INPUT			BIT(hwmon_energy_input)
+#define HWMON_E_LABEL			BIT(hwmon_energy_label)
+
+enum hwmon_humidity_attributes {
+	hwmon_humidity_input,
+	hwmon_humidity_label,
+	hwmon_humidity_min,
+	hwmon_humidity_min_hyst,
+	hwmon_humidity_max,
+	hwmon_humidity_max_hyst,
+	hwmon_humidity_alarm,
+	hwmon_humidity_fault,
+};
+
+#define HWMON_H_INPUT			BIT(hwmon_humidity_input)
+#define HWMON_H_LABEL			BIT(hwmon_humidity_label)
+#define HWMON_H_MIN			BIT(hwmon_humidity_min)
+#define HWMON_H_MIN_HYST		BIT(hwmon_humidity_min_hyst)
+#define HWMON_H_MAX			BIT(hwmon_humidity_max)
+#define HWMON_H_MAX_HYST		BIT(hwmon_humidity_max_hyst)
+#define HWMON_H_ALARM			BIT(hwmon_humidity_alarm)
+#define HWMON_H_FAULT			BIT(hwmon_humidity_fault)
+
+enum hwmon_fan_attributes {
+	hwmon_fan_input,
+	hwmon_fan_label,
+	hwmon_fan_min,
+	hwmon_fan_max,
+	hwmon_fan_div,
+	hwmon_fan_pulses,
+	hwmon_fan_target,
+	hwmon_fan_alarm,
+	hwmon_fan_min_alarm,
+	hwmon_fan_max_alarm,
+	hwmon_fan_fault,
+};
+
+#define HWMON_F_INPUT			BIT(hwmon_fan_input)
+#define HWMON_F_LABEL			BIT(hwmon_fan_label)
+#define HWMON_F_MIN			BIT(hwmon_fan_min)
+#define HWMON_F_MAX			BIT(hwmon_fan_max)
+#define HWMON_F_DIV			BIT(hwmon_fan_div)
+#define HWMON_F_PULSES			BIT(hwmon_fan_pulses)
+#define HWMON_F_TARGET			BIT(hwmon_fan_target)
+#define HWMON_F_ALARM			BIT(hwmon_fan_alarm)
+#define HWMON_F_MIN_ALARM		BIT(hwmon_fan_min_alarm)
+#define HWMON_F_MAX_ALARM		BIT(hwmon_fan_max_alarm)
+#define HWMON_F_FAULT			BIT(hwmon_fan_fault)
+
+enum hwmon_pwm_attributes {
+	hwmon_pwm_input,
+	hwmon_pwm_enable,
+	hwmon_pwm_mode,
+	hwmon_pwm_freq,
+};
+
+#define HWMON_PWM_INPUT			BIT(hwmon_pwm_input)
+#define HWMON_PWM_ENABLE		BIT(hwmon_pwm_enable)
+#define HWMON_PWM_MODE			BIT(hwmon_pwm_mode)
+#define HWMON_PWM_FREQ			BIT(hwmon_pwm_freq)
+
+/**
+ * struct hwmon_ops - hwmon device operations
+ * @is_visible: Callback to return attribute visibility. Mandatory.
+ *		Parameters are:
+ *		@const void *drvdata:
+ *			Pointer to driver-private data structure passed
+ *			as argument to hwmon_device_register_with_info().
+ *		@type:	Sensor type
+ *		@attr:	Sensor attribute
+ *		@channel:
+ *			Channel number
+ *		The function returns the file permissions.
+ *		If the return value is 0, no attribute will be created.
+ * @read:       Read callback. Optional. If not provided, attributes
+ *		will not be readable.
+ *		Parameters are:
+ *		@dev:	Pointer to hardware monitoring device
+ *		@type:	Sensor type
+ *		@attr:	Sensor attribute
+ *		@channel:
+ *			Channel number
+ *		@val:	Pointer to returned value
+ *		The function returns 0 on success or a negative error number.
+ * @write:	Write callback. Optional. If not provided, attributes
+ *		will not be writable.
+ *		Parameters are:
+ *		@dev:	Pointer to hardware monitoring device
+ *		@type:	Sensor type
+ *		@attr:	Sensor attribute
+ *		@channel:
+ *			Channel number
+ *		@val:	Value to write
+ *		The function returns 0 on success or a negative error number.
+ */
+struct hwmon_ops {
+	umode_t (*is_visible)(const void *drvdata, enum hwmon_sensor_types type,
+			      u32 attr, int channel);
+	int (*read)(struct device *dev, enum hwmon_sensor_types type,
+		    u32 attr, int channel, long *val);
+	int (*write)(struct device *dev, enum hwmon_sensor_types type,
+		     u32 attr, int channel, long val);
+};
+
+/**
+ * Channel information
+ * @type:	Channel type.
+ * @config:	Pointer to NULL-terminated list of channel parameters.
+ *		Use for per-channel attributes.
+ */
+struct hwmon_channel_info {
+	enum hwmon_sensor_types type;
+	const u32 *config;
+};
+
+/**
+ * Chip configuration
+ * @ops:	Pointer to hwmon operations.
+ * @info:	Null-terminated list of channel information.
+ */
+struct hwmon_chip_info {
+	const struct hwmon_ops *ops;
+	const struct hwmon_channel_info **info;
+};
+
 struct device *hwmon_device_register(struct device *dev);
 struct device *
 hwmon_device_register_with_groups(struct device *dev, const char *name,
@@ -26,6 +358,16 @@
 devm_hwmon_device_register_with_groups(struct device *dev, const char *name,
 				       void *drvdata,
 				       const struct attribute_group **groups);
+struct device *
+hwmon_device_register_with_info(struct device *dev,
+				const char *name, void *drvdata,
+				const struct hwmon_chip_info *info,
+				const struct attribute_group **groups);
+struct device *
+devm_hwmon_device_register_with_info(struct device *dev,
+				     const char *name, void *drvdata,
+				     const struct hwmon_chip_info *info,
+				     const struct attribute_group **groups);
 
 void hwmon_device_unregister(struct device *dev);
 void devm_hwmon_device_unregister(struct device *dev);
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b10954a..6824556d 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -674,6 +674,11 @@
 	HV_SIGNAL_POLICY_EXPLICIT,
 };
 
+enum hv_numa_policy {
+	HV_BALANCED = 0,
+	HV_LOCALIZED,
+};
+
 enum vmbus_device_type {
 	HV_IDE = 0,
 	HV_SCSI,
@@ -701,9 +706,6 @@
 };
 
 struct vmbus_channel {
-	/* Unique channel id */
-	int id;
-
 	struct list_head listentry;
 
 	struct hv_device *device_obj;
@@ -850,6 +852,43 @@
 	 * ring lock to preserve the current behavior.
 	 */
 	bool acquire_ring_lock;
+	/*
+	 * For performance critical channels (storage, networking
+	 * etc,), Hyper-V has a mechanism to enhance the throughput
+	 * at the expense of latency:
+	 * When the host is to be signaled, we just set a bit in a shared page
+	 * and this bit will be inspected by the hypervisor within a certain
+	 * window and if the bit is set, the host will be signaled. The window
+	 * of time is the monitor latency - currently around 100 usecs. This
+	 * mechanism improves throughput by:
+	 *
+	 * A) Making the host more efficient - each time it wakes up,
+	 *    potentially it will process morev number of packets. The
+	 *    monitor latency allows a batch to build up.
+	 * B) By deferring the hypercall to signal, we will also minimize
+	 *    the interrupts.
+	 *
+	 * Clearly, these optimizations improve throughput at the expense of
+	 * latency. Furthermore, since the channel is shared for both
+	 * control and data messages, control messages currently suffer
+	 * unnecessary latency adversley impacting performance and boot
+	 * time. To fix this issue, permit tagging the channel as being
+	 * in "low latency" mode. In this mode, we will bypass the monitor
+	 * mechanism.
+	 */
+	bool low_latency;
+
+	/*
+	 * NUMA distribution policy:
+	 * We support teo policies:
+	 * 1) Balanced: Here all performance critical channels are
+	 *    distributed evenly amongst all the NUMA nodes.
+	 *    This policy will be the default policy.
+	 * 2) Localized: All channels of a given instance of a
+	 *    performance critical service will be assigned CPUs
+	 *    within a selected NUMA node.
+	 */
+	enum hv_numa_policy affinity_policy;
 
 };
 
@@ -870,6 +909,12 @@
 	c->signal_policy = policy;
 }
 
+static inline void set_channel_affinity_state(struct vmbus_channel *c,
+					      enum hv_numa_policy policy)
+{
+	c->affinity_policy = policy;
+}
+
 static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
 {
 	c->batched_reading = state;
@@ -891,6 +936,16 @@
 	c->outbound.ring_buffer->pending_send_sz = size;
 }
 
+static inline void set_low_latency_mode(struct vmbus_channel *c)
+{
+	c->low_latency = true;
+}
+
+static inline void clear_low_latency_mode(struct vmbus_channel *c)
+{
+	c->low_latency = false;
+}
+
 void vmbus_onmessage(void *context);
 
 int vmbus_request_offers(void);
@@ -1114,6 +1169,13 @@
 					 const char *mod_name);
 void vmbus_driver_unregister(struct hv_driver *hv_driver);
 
+static inline const char *vmbus_dev_name(const struct hv_device *device_obj)
+{
+	const struct kobject *kobj = &device_obj->device.kobj;
+
+	return kobj->name;
+}
+
 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel);
 
 int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj,
@@ -1257,6 +1319,27 @@
 			0x80, 0x2e, 0x27, 0xed, 0xe1, 0x9f)
 
 /*
+ * Linux doesn't support the 3 devices: the first two are for
+ * Automatic Virtual Machine Activation, and the third is for
+ * Remote Desktop Virtualization.
+ * {f8e65716-3cb3-4a06-9a60-1889c5cccab5}
+ * {3375baf4-9e15-4b30-b765-67acb10d607b}
+ * {276aacf4-ac15-426c-98dd-7521ad3f01fe}
+ */
+
+#define HV_AVMA1_GUID \
+	.guid = UUID_LE(0xf8e65716, 0x3cb3, 0x4a06, 0x9a, 0x60, \
+			0x18, 0x89, 0xc5, 0xcc, 0xca, 0xb5)
+
+#define HV_AVMA2_GUID \
+	.guid = UUID_LE(0x3375baf4, 0x9e15, 0x4b30, 0xb7, 0x65, \
+			0x67, 0xac, 0xb1, 0x0d, 0x60, 0x7b)
+
+#define HV_RDV_GUID \
+	.guid = UUID_LE(0x276aacf4, 0xac15, 0x426c, 0x98, 0xdd, \
+			0x75, 0x21, 0xad, 0x3f, 0x01, 0xfe)
+
+/*
  * Common header for Hyper-V ICs
  */
 
@@ -1344,6 +1427,15 @@
 	u8 flags;
 } __packed;
 
+struct ictimesync_ref_data {
+	u64 parenttime;
+	u64 vmreferencetime;
+	u8 flags;
+	char leapflags;
+	char stratum;
+	u8 reserved[3];
+} __packed;
+
 struct hyperv_service_callback {
 	u8 msg_type;
 	char *log_msg;
@@ -1357,6 +1449,9 @@
 					struct icmsg_negotiate *, u8 *, int,
 					int);
 
+void hv_event_tasklet_disable(struct vmbus_channel *channel);
+void hv_event_tasklet_enable(struct vmbus_channel *channel);
+
 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
 
 /*
diff --git a/include/linux/hypervisor.h b/include/linux/hypervisor.h
new file mode 100644
index 0000000..3fa5ef2
--- /dev/null
+++ b/include/linux/hypervisor.h
@@ -0,0 +1,17 @@
+#ifndef __LINUX_HYPEVISOR_H
+#define __LINUX_HYPEVISOR_H
+
+/*
+ *	Generic Hypervisor support
+ *		Juergen Gross <jgross@suse.com>
+ */
+
+#ifdef CONFIG_HYPERVISOR_GUEST
+#include <asm/hypervisor.h>
+#else
+static inline void hypervisor_pin_vcpu(int cpu)
+{
+}
+#endif
+
+#endif /* __LINUX_HYPEVISOR_H */
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index dcb89e3..c6587c0 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -45,6 +45,7 @@
 #define BR_PROXYARP		BIT(8)
 #define BR_LEARNING_SYNC	BIT(9)
 #define BR_PROXYARP_WIFI	BIT(10)
+#define BR_MCAST_FLOOD		BIT(11)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index f923d15..0b17c58 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -25,5 +25,6 @@
 	__u32 max_tx_rate;
 	__u32 rss_query_en;
 	__u32 trusted;
+	__be16 vlan_proto;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index a5f6ce6..3319d97 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -81,6 +81,7 @@
 #define skb_vlan_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
 #define skb_vlan_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
 #define skb_vlan_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
+#define skb_vlan_tag_get_prio(__skb)	((__skb)->vlan_tci & VLAN_PRIO_MASK)
 
 /**
  *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
@@ -271,6 +272,23 @@
 }
 #endif
 
+/**
+ * eth_type_vlan - check for valid vlan ether type.
+ * @ethertype: ether type to check
+ *
+ * Returns true if the ether type is a vlan ether type.
+ */
+static inline bool eth_type_vlan(__be16 ethertype)
+{
+	switch (ethertype) {
+	case htons(ETH_P_8021Q):
+	case htons(ETH_P_8021AD):
+		return true;
+	default:
+		return false;
+	}
+}
+
 static inline bool vlan_hw_offload_capable(netdev_features_t features,
 					   __be16 proto)
 {
@@ -424,8 +442,7 @@
 {
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data;
 
-	if (veth->h_vlan_proto != htons(ETH_P_8021Q) &&
-	    veth->h_vlan_proto != htons(ETH_P_8021AD))
+	if (!eth_type_vlan(veth->h_vlan_proto))
 		return -EINVAL;
 
 	*vlan_tci = ntohs(veth->h_vlan_TCI);
@@ -487,7 +504,7 @@
 	 * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
 	 * ETH_HLEN otherwise
 	 */
-	if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
+	if (eth_type_vlan(type)) {
 		if (vlan_depth) {
 			if (WARN_ON(vlan_depth < VLAN_HLEN))
 				return 0;
@@ -505,8 +522,7 @@
 			vh = (struct vlan_hdr *)(skb->data + vlan_depth);
 			type = vh->h_vlan_encapsulated_proto;
 			vlan_depth += VLAN_HLEN;
-		} while (type == htons(ETH_P_8021Q) ||
-			 type == htons(ETH_P_8021AD));
+		} while (eth_type_vlan(type));
 	}
 
 	if (depth)
@@ -571,8 +587,7 @@
 static inline bool skb_vlan_tagged(const struct sk_buff *skb)
 {
 	if (!skb_vlan_tag_present(skb) &&
-	    likely(skb->protocol != htons(ETH_P_8021Q) &&
-		   skb->protocol != htons(ETH_P_8021AD)))
+	    likely(!eth_type_vlan(skb->protocol)))
 		return false;
 
 	return true;
@@ -592,15 +607,14 @@
 	if (!skb_vlan_tag_present(skb)) {
 		struct vlan_ethhdr *veh;
 
-		if (likely(protocol != htons(ETH_P_8021Q) &&
-			   protocol != htons(ETH_P_8021AD)))
+		if (likely(!eth_type_vlan(protocol)))
 			return false;
 
 		veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	}
 
-	if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD))
+	if (!eth_type_vlan(protocol))
 		return false;
 
 	return true;
diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h
index 5198f8e..c97eab6 100644
--- a/include/linux/iio/sw_trigger.h
+++ b/include/linux/iio/sw_trigger.h
@@ -62,7 +62,7 @@
 				  const char *name,
 				  struct config_item_type *type)
 {
-#ifdef CONFIG_CONFIGFS_FS
+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
 	config_group_init_type_name(&t->group, name, type);
 #endif
 }
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index feb04ea..65da430 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -37,7 +37,7 @@
 		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
 		      struct user_namespace *user_ns,
 		      u32 pid, u32 seq, u16 nlmsg_flags,
-		      const struct nlmsghdr *unlh);
+		      const struct nlmsghdr *unlh, bool net_admin);
 void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
 			 struct netlink_callback *cb,
 			 const struct inet_diag_req_v2 *r,
@@ -56,7 +56,7 @@
 
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 			     struct inet_diag_msg *r, int ext,
-			     struct user_namespace *user_ns);
+			     struct user_namespace *user_ns, bool net_admin);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f8834f8..325f649 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,6 +15,8 @@
 #include <net/net_namespace.h>
 #include <linux/sched/rt.h>
 
+#include <asm/thread_info.h>
+
 #ifdef CONFIG_SMP
 # define INIT_PUSHABLE_TASKS(tsk)					\
 	.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
@@ -183,12 +185,21 @@
 # define INIT_KASAN(tsk)
 #endif
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+# define INIT_TASK_TI(tsk)			\
+	.thread_info = INIT_THREAD_INFO(tsk),	\
+	.stack_refcount = ATOMIC_INIT(1),
+#else
+# define INIT_TASK_TI(tsk)
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
  */
 #define INIT_TASK(tsk)	\
 {									\
+	INIT_TASK_TI(tsk)						\
 	.state		= 0,						\
 	.stack		= init_stack,					\
 	.usage		= ATOMIC_INIT(2),				\
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index b6683f0..72f0721 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -278,7 +278,8 @@
 extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
-struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs);
+struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity, int nvec);
+int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec);
 
 #else /* CONFIG_SMP */
 
@@ -311,11 +312,18 @@
 	return 0;
 }
 
-static inline struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+static inline struct cpumask *
+irq_create_affinity_masks(const struct cpumask *affinity, int nvec)
 {
-	*nr_vecs = 1;
 	return NULL;
 }
+
+static inline int
+irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+{
+	return maxvec;
+}
+
 #endif /* CONFIG_SMP */
 
 /*
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3267df4..3d70ece 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -19,6 +19,11 @@
 #define IOMAP_UNWRITTEN	0x04	/* blocks allocated @blkno in unwritten state */
 
 /*
+ * Flags for iomap mappings:
+ */
+#define IOMAP_F_MERGED	0x01	/* contains multiple blocks/extents */
+
+/*
  * Magic value for blkno:
  */
 #define IOMAP_NULL_BLOCK -1LL	/* blkno is not valid */
@@ -27,7 +32,8 @@
 	sector_t		blkno;	/* 1st sector of mapping, 512b units */
 	loff_t			offset;	/* file offset of mapping, bytes */
 	u64			length;	/* length of mapping, bytes */
-	int			type;	/* type of mapping */
+	u16			type;	/* type of mapping */
+	u16			flags;	/* flags for mapping */
 	struct block_device	*bdev;	/* block device for I/O */
 };
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c6dbcd8..7e9a789 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -18,6 +18,7 @@
 	__s32		dad_transmits;
 	__s32		rtr_solicits;
 	__s32		rtr_solicit_interval;
+	__s32		rtr_solicit_max_interval;
 	__s32		rtr_solicit_delay;
 	__s32		force_mld_version;
 	__s32		mldv1_unsolicited_report_interval;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index b52424e..e798755 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -916,12 +916,20 @@
 			     unsigned int clr, unsigned int set);
 
 struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq);
-int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
-				   int num_ct, const char *name,
-				   irq_flow_handler_t handler,
-				   unsigned int clr, unsigned int set,
-				   enum irq_gc_flags flags);
 
+int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
+				     int num_ct, const char *name,
+				     irq_flow_handler_t handler,
+				     unsigned int clr, unsigned int set,
+				     enum irq_gc_flags flags);
+
+#define irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,	\
+				       handler,	clr, set, flags)	\
+({									\
+	MAYBE_BUILD_BUG_ON(irqs_per_chip > 32);				\
+	__irq_alloc_domain_generic_chips(d, irqs_per_chip, num_ct, name,\
+					 handler, clr, set, flags);	\
+})
 
 static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d)
 {
@@ -945,6 +953,16 @@
 static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
 #endif
 
+/*
+ * The irqsave variants are for usage in non interrupt code. Do not use
+ * them in irq_chip callbacks. Use irq_gc_lock() instead.
+ */
+#define irq_gc_lock_irqsave(gc, flags)	\
+	raw_spin_lock_irqsave(&(gc)->lock, flags)
+
+#define irq_gc_unlock_irqrestore(gc, flags)	\
+	raw_spin_unlock_irqrestore(&(gc)->lock, flags)
+
 static inline void irq_reg_writel(struct irq_chip_generic *gc,
 				  u32 val, int reg_offset)
 {
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 56b0b7e..8361c8d 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -337,6 +337,7 @@
  */
 #define E_ITS_MOVI_UNMAPPED_INTERRUPT		0x010107
 #define E_ITS_MOVI_UNMAPPED_COLLECTION		0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT		0x010307
 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT		0x010507
 #define E_ITS_MAPD_DEVICE_OOR			0x010801
 #define E_ITS_MAPC_PROCNUM_OOR			0x010902
@@ -429,9 +430,9 @@
 };
 
 struct irq_domain;
-struct device_node;
+struct fwnode_handle;
 int its_cpu_init(void);
-int its_init(struct device_node *node, struct rdists *rdists,
+int its_init(struct fwnode_handle *handle, struct rdists *rdists,
 	     struct irq_domain *domain);
 
 static inline bool gic_enable_sre(void)
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index b51beeb..c9be579 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -2,6 +2,7 @@
 #define _LINUX_IRQDESC_H
 
 #include <linux/rcupdate.h>
+#include <linux/kobject.h>
 
 /*
  * Core internal functions to deal with irq descriptors
@@ -43,6 +44,7 @@
  * @force_resume_depth:	number of irqactions on a irq descriptor with
  *			IRQF_FORCE_RESUME set
  * @rcu:		rcu head for delayed free
+ * @kobj:		kobject used to represent this struct in sysfs
  * @dir:		/proc/irq/ procfs entry
  * @name:		flow handler name for /proc/interrupts output
  */
@@ -88,6 +90,7 @@
 #endif
 #ifdef CONFIG_SPARSE_IRQ
 	struct rcu_head		rcu;
+	struct kobject		kobj;
 #endif
 	int			parent_irq;
 	struct module		*owner;
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 661af56..a0547c5 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -21,6 +21,8 @@
  *
  * DEFINE_STATIC_KEY_TRUE(key);
  * DEFINE_STATIC_KEY_FALSE(key);
+ * DEFINE_STATIC_KEY_ARRAY_TRUE(keys, count);
+ * DEFINE_STATIC_KEY_ARRAY_FALSE(keys, count);
  * static_branch_likely()
  * static_branch_unlikely()
  *
@@ -267,9 +269,25 @@
 #define DEFINE_STATIC_KEY_TRUE(name)	\
 	struct static_key_true name = STATIC_KEY_TRUE_INIT
 
+#define DECLARE_STATIC_KEY_TRUE(name)	\
+	extern struct static_key_true name
+
 #define DEFINE_STATIC_KEY_FALSE(name)	\
 	struct static_key_false name = STATIC_KEY_FALSE_INIT
 
+#define DECLARE_STATIC_KEY_FALSE(name)	\
+	extern struct static_key_false name
+
+#define DEFINE_STATIC_KEY_ARRAY_TRUE(name, count)		\
+	struct static_key_true name[count] = {			\
+		[0 ... (count) - 1] = STATIC_KEY_TRUE_INIT,	\
+	}
+
+#define DEFINE_STATIC_KEY_ARRAY_FALSE(name, count)		\
+	struct static_key_false name[count] = {			\
+		[0 ... (count) - 1] = STATIC_KEY_FALSE_INIT,	\
+	}
+
 extern bool ____wrong_branch_error(void);
 
 #define static_key_enabled(x)							\
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a611..74fd6f0 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -259,17 +259,14 @@
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
 __printf(1, 2)
-void panic(const char *fmt, ...)
-	__noreturn __cold;
+void panic(const char *fmt, ...) __noreturn __cold;
 void nmi_panic(struct pt_regs *regs, const char *msg);
 extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
-void do_exit(long error_code)
-	__noreturn;
-void complete_and_exit(struct completion *, long)
-	__noreturn;
+void do_exit(long error_code) __noreturn;
+void complete_and_exit(struct completion *, long) __noreturn;
 
 /* Internal, do not use. */
 int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res);
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 2b6a204..0fb7ffb 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -64,6 +64,13 @@
 		({ (ktime_t){ .tv64 = (lhs).tv64 + (rhs).tv64 }; })
 
 /*
+ * Same as ktime_add(), but avoids undefined behaviour on overflow; however,
+ * this means that you must check the result for overflow yourself.
+ */
+#define ktime_add_unsafe(lhs, rhs) \
+		({ (ktime_t){ .tv64 = (u64) (lhs).tv64 + (rhs).tv64 }; })
+
+/*
  * Add a ktime_t variable and a scalar nanosecond value.
  * res = kt + nsval:
  */
@@ -231,6 +238,11 @@
 	return ktime_sub_ns(kt, usec * NSEC_PER_USEC);
 }
 
+static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec)
+{
+	return ktime_sub_ns(kt, msec * NSEC_PER_MSEC);
+}
+
 extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
 
 /**
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 01e908a..9c28b4d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1113,9 +1113,21 @@
 /* create, destroy, and name are mandatory */
 struct kvm_device_ops {
 	const char *name;
+
+	/*
+	 * create is called holding kvm->lock and any operations not suitable
+	 * to do while holding the lock should be deferred to init (see
+	 * below).
+	 */
 	int (*create)(struct kvm_device *dev, u32 type);
 
 	/*
+	 * init is called after create if create is successful and is called
+	 * outside of holding kvm->lock.
+	 */
+	void (*init)(struct kvm_device *dev);
+
+	/*
 	 * Destroy is responsible for freeing dev.
 	 *
 	 * Destroy may be called before or after destructors are called
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 8a3b5d2..ddfcb2d 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -359,6 +359,11 @@
 	struct led_info	*leds;
 };
 
+struct gpio_desc;
+typedef int (*gpio_blink_set_t)(struct gpio_desc *desc, int state,
+				unsigned long *delay_on,
+				unsigned long *delay_off);
+
 /* For the leds-gpio driver */
 struct gpio_led {
 	const char *name;
@@ -382,9 +387,7 @@
 #define GPIO_LED_NO_BLINK_LOW	0	/* No blink GPIO state low */
 #define GPIO_LED_NO_BLINK_HIGH	1	/* No blink GPIO state high */
 #define GPIO_LED_BLINK		2	/* Please, blink */
-	int		(*gpio_blink_set)(struct gpio_desc *desc, int state,
-					unsigned long *delay_on,
-					unsigned long *delay_off);
+	gpio_blink_set_t	gpio_blink_set;
 };
 
 #ifdef CONFIG_NEW_LEDS
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
deleted file mode 100644
index c92ebd1..0000000
--- a/include/linux/lglock.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Specialised local-global spinlock. Can only be declared as global variables
- * to avoid overhead and keep things simple (and we don't want to start using
- * these inside dynamically allocated structures).
- *
- * "local/global locks" (lglocks) can be used to:
- *
- * - Provide fast exclusive access to per-CPU data, with exclusive access to
- *   another CPU's data allowed but possibly subject to contention, and to
- *   provide very slow exclusive access to all per-CPU data.
- * - Or to provide very fast and scalable read serialisation, and to provide
- *   very slow exclusive serialisation of data (not necessarily per-CPU data).
- *
- * Brlocks are also implemented as a short-hand notation for the latter use
- * case.
- *
- * Copyright 2009, 2010, Nick Piggin, Novell Inc.
- */
-#ifndef __LINUX_LGLOCK_H
-#define __LINUX_LGLOCK_H
-
-#include <linux/spinlock.h>
-#include <linux/lockdep.h>
-#include <linux/percpu.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-
-#ifdef CONFIG_SMP
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#define LOCKDEP_INIT_MAP lockdep_init_map
-#else
-#define LOCKDEP_INIT_MAP(a, b, c, d)
-#endif
-
-struct lglock {
-	arch_spinlock_t __percpu *lock;
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lock_class_key lock_key;
-	struct lockdep_map    lock_dep_map;
-#endif
-};
-
-#define DEFINE_LGLOCK(name)						\
-	static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)		\
-	= __ARCH_SPIN_LOCK_UNLOCKED;					\
-	struct lglock name = { .lock = &name ## _lock }
-
-#define DEFINE_STATIC_LGLOCK(name)					\
-	static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)		\
-	= __ARCH_SPIN_LOCK_UNLOCKED;					\
-	static struct lglock name = { .lock = &name ## _lock }
-
-void lg_lock_init(struct lglock *lg, char *name);
-
-void lg_local_lock(struct lglock *lg);
-void lg_local_unlock(struct lglock *lg);
-void lg_local_lock_cpu(struct lglock *lg, int cpu);
-void lg_local_unlock_cpu(struct lglock *lg, int cpu);
-
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2);
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2);
-
-void lg_global_lock(struct lglock *lg);
-void lg_global_unlock(struct lglock *lg);
-
-#else
-/* When !CONFIG_SMP, map lglock to spinlock */
-#define lglock spinlock
-#define DEFINE_LGLOCK(name) DEFINE_SPINLOCK(name)
-#define DEFINE_STATIC_LGLOCK(name) static DEFINE_SPINLOCK(name)
-#define lg_lock_init(lg, name) spin_lock_init(lg)
-#define lg_local_lock spin_lock
-#define lg_local_unlock spin_unlock
-#define lg_local_lock_cpu(lg, cpu) spin_lock(lg)
-#define lg_local_unlock_cpu(lg, cpu) spin_unlock(lg)
-#define lg_global_lock spin_lock
-#define lg_global_unlock spin_unlock
-#endif
-
-#endif
diff --git a/include/linux/list.h b/include/linux/list.h
index 5183138..5809e9a2 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -381,8 +381,11 @@
  *
  * Note that if the list is empty, it returns NULL.
  */
-#define list_first_entry_or_null(ptr, type, member) \
-	(!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
+#define list_first_entry_or_null(ptr, type, member) ({ \
+	struct list_head *head__ = (ptr); \
+	struct list_head *pos__ = READ_ONCE(head__->next); \
+	pos__ != head__ ? list_entry(pos__, type, member) : NULL; \
+})
 
 /**
  * list_next_entry - get the next element in list
diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index ffb9c9d..e58e577 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -59,6 +59,7 @@
 #define LSM_AUDIT_DATA_INODE	9
 #define LSM_AUDIT_DATA_DENTRY	10
 #define LSM_AUDIT_DATA_IOCTL_OP	11
+#define LSM_AUDIT_DATA_FILE	12
 	union 	{
 		struct path path;
 		struct dentry *dentry;
@@ -75,6 +76,7 @@
 #endif
 		char *kmod_name;
 		struct lsm_ioctlop_audit *op;
+		struct file *file;
 	} u;
 	/* this union contains LSM specific data */
 	union {
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 101bf19..558adfa 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -151,6 +151,16 @@
  *	@name name of the last path component used to create file
  *	@ctx pointer to place the pointer to the resulting context in.
  *	@ctxlen point to place the length of the resulting context.
+ * @dentry_create_files_as:
+ *	Compute a context for a dentry as the inode is not yet available
+ *	and set that context in passed in creds so that new files are
+ *	created using that context. Context is calculated using the
+ *	passed in creds and not the creds of the caller.
+ *	@dentry dentry to use in calculating the context.
+ *	@mode mode used to determine resource type.
+ *	@name name of the last path component used to create file
+ *	@old creds which should be used for context calculation
+ *	@new creds to modify
  *
  *
  * Security hooks for inode operations.
@@ -401,6 +411,23 @@
  *	@inode contains a pointer to the inode.
  *	@secid contains a pointer to the location where result will be saved.
  *	In case of failure, @secid will be set to zero.
+ * @inode_copy_up:
+ *	A file is about to be copied up from lower layer to upper layer of
+ *	overlay filesystem. Security module can prepare a set of new creds
+ *	and modify as need be and return new creds. Caller will switch to
+ *	new creds temporarily to create new file and release newly allocated
+ *	creds.
+ *	@src indicates the union dentry of file that is being copied up.
+ *	@new pointer to pointer to return newly allocated creds.
+ *	Returns 0 on success or a negative error code on error.
+ * @inode_copy_up_xattr:
+ *	Filter the xattrs being copied up when a unioned file is copied
+ *	up from a lower layer to the union/overlay layer.
+ *	@name indicates the name of the xattr.
+ *	Returns 0 to accept the xattr, 1 to discard the xattr, -EOPNOTSUPP if
+ *	security module does not know about attribute or a negative error code
+ *	to abort the copy up. Note that the caller is responsible for reading
+ *	and writing the xattrs as this hook is merely a filter.
  *
  * Security hooks for file operations
  *
@@ -1358,6 +1385,10 @@
 	int (*dentry_init_security)(struct dentry *dentry, int mode,
 					const struct qstr *name, void **ctx,
 					u32 *ctxlen);
+	int (*dentry_create_files_as)(struct dentry *dentry, int mode,
+					struct qstr *name,
+					const struct cred *old,
+					struct cred *new);
 
 
 #ifdef CONFIG_SECURITY_PATH
@@ -1425,6 +1456,8 @@
 	int (*inode_listsecurity)(struct inode *inode, char *buffer,
 					size_t buffer_size);
 	void (*inode_getsecid)(struct inode *inode, u32 *secid);
+	int (*inode_copy_up)(struct dentry *src, struct cred **new);
+	int (*inode_copy_up_xattr)(const char *name);
 
 	int (*file_permission)(struct file *file, int mask);
 	int (*file_alloc_security)(struct file *file);
@@ -1455,7 +1488,6 @@
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
-	int (*kernel_module_from_file)(struct file *file);
 	int (*kernel_read_file)(struct file *file, enum kernel_read_file_id id);
 	int (*kernel_post_read_file)(struct file *file, char *buf, loff_t size,
 				     enum kernel_read_file_id id);
@@ -1656,6 +1688,7 @@
 	struct list_head sb_clone_mnt_opts;
 	struct list_head sb_parse_opts_str;
 	struct list_head dentry_init_security;
+	struct list_head dentry_create_files_as;
 #ifdef CONFIG_SECURITY_PATH
 	struct list_head path_unlink;
 	struct list_head path_mkdir;
@@ -1696,6 +1729,8 @@
 	struct list_head inode_setsecurity;
 	struct list_head inode_listsecurity;
 	struct list_head inode_getsecid;
+	struct list_head inode_copy_up;
+	struct list_head inode_copy_up_xattr;
 	struct list_head file_permission;
 	struct list_head file_alloc_security;
 	struct list_head file_free_security;
diff --git a/include/linux/mcb.h b/include/linux/mcb.h
index ead13d2..4097ac9 100644
--- a/include/linux/mcb.h
+++ b/include/linux/mcb.h
@@ -41,15 +41,17 @@
 	char name[CHAMELEON_FILENAME_LEN + 1];
 	int (*get_irq)(struct mcb_device *dev);
 };
-#define to_mcb_bus(b) container_of((b), struct mcb_bus, dev)
+
+static inline struct mcb_bus *to_mcb_bus(struct device *dev)
+{
+	return container_of(dev, struct mcb_bus, dev);
+}
 
 /**
  * struct mcb_device - MEN Chameleon Bus device
  *
- * @bus_list: internal list handling for bus code
  * @dev: device in kernel representation
  * @bus: mcb bus the device is plugged to
- * @subordinate: subordinate MCBus in case of bridge
  * @is_added: flag to check if device is added to bus
  * @driver: associated mcb_driver
  * @id: mcb device id
@@ -62,10 +64,8 @@
  * @memory: memory resource
  */
 struct mcb_device {
-	struct list_head bus_list;
 	struct device dev;
 	struct mcb_bus *bus;
-	struct mcb_bus *subordinate;
 	bool is_added;
 	struct mcb_driver *driver;
 	u16 id;
@@ -76,8 +76,13 @@
 	int rev;
 	struct resource irq;
 	struct resource mem;
+	struct device *dma_dev;
 };
-#define to_mcb_device(x) container_of((x), struct mcb_device, dev)
+
+static inline struct mcb_device *to_mcb_device(struct device *dev)
+{
+	return container_of(dev, struct mcb_device, dev);
+}
 
 /**
  * struct mcb_driver - MEN Chameleon Bus device driver
@@ -95,7 +100,11 @@
 	void (*remove)(struct mcb_device *mdev);
 	void (*shutdown)(struct mcb_device *mdev);
 };
-#define to_mcb_driver(x) container_of((x), struct mcb_driver, driver)
+
+static inline struct mcb_driver *to_mcb_driver(struct device_driver *drv)
+{
+	return container_of(drv, struct mcb_driver, driver);
+}
 
 static inline void *mcb_get_drvdata(struct mcb_device *dev)
 {
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 4429d25..5e5b296 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -195,6 +195,7 @@
 }
 
 extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
+extern void mpol_put_task_policy(struct task_struct *);
 
 #else
 
@@ -297,5 +298,8 @@
 	return -1; /* no node preference */
 }
 
+static inline void mpol_put_task_policy(struct task_struct *task)
+{
+}
 #endif /* CONFIG_NUMA */
 #endif
diff --git a/include/linux/mfd/da8xx-cfgchip.h b/include/linux/mfd/da8xx-cfgchip.h
new file mode 100644
index 0000000..304985e
--- /dev/null
+++ b/include/linux/mfd/da8xx-cfgchip.h
@@ -0,0 +1,153 @@
+/*
+ * TI DaVinci DA8xx CHIPCFGx registers for syscon consumers.
+ *
+ * Copyright (C) 2016 David Lechner <david@lechnology.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_MFD_DA8XX_CFGCHIP_H
+#define __LINUX_MFD_DA8XX_CFGCHIP_H
+
+#include <linux/bitops.h>
+
+/* register offset (32-bit registers) */
+#define CFGCHIP(n)				((n) * 4)
+
+/* CFGCHIP0 (PLL0/EDMA3_0) register bits */
+#define CFGCHIP0_PLL_MASTER_LOCK		BIT(4)
+#define CFGCHIP0_EDMA30TC1DBS(n)		((n) << 2)
+#define CFGCHIP0_EDMA30TC1DBS_MASK		CFGCHIP0_EDMA30TC1DBS(0x3)
+#define CFGCHIP0_EDMA30TC1DBS_16		CFGCHIP0_EDMA30TC1DBS(0x0)
+#define CFGCHIP0_EDMA30TC1DBS_32		CFGCHIP0_EDMA30TC1DBS(0x1)
+#define CFGCHIP0_EDMA30TC1DBS_64		CFGCHIP0_EDMA30TC1DBS(0x2)
+#define CFGCHIP0_EDMA30TC0DBS(n)		((n) << 0)
+#define CFGCHIP0_EDMA30TC0DBS_MASK		CFGCHIP0_EDMA30TC0DBS(0x3)
+#define CFGCHIP0_EDMA30TC0DBS_16		CFGCHIP0_EDMA30TC0DBS(0x0)
+#define CFGCHIP0_EDMA30TC0DBS_32		CFGCHIP0_EDMA30TC0DBS(0x1)
+#define CFGCHIP0_EDMA30TC0DBS_64		CFGCHIP0_EDMA30TC0DBS(0x2)
+
+/* CFGCHIP1 (eCAP/HPI/EDMA3_1/eHRPWM TBCLK/McASP0 AMUTEIN) register bits */
+#define CFGCHIP1_CAP2SRC(n)			((n) << 27)
+#define CFGCHIP1_CAP2SRC_MASK			CFGCHIP1_CAP2SRC(0x1f)
+#define CFGCHIP1_CAP2SRC_ECAP_PIN		CFGCHIP1_CAP2SRC(0x0)
+#define CFGCHIP1_CAP2SRC_MCASP0_TX		CFGCHIP1_CAP2SRC(0x1)
+#define CFGCHIP1_CAP2SRC_MCASP0_RX		CFGCHIP1_CAP2SRC(0x2)
+#define CFGCHIP1_CAP2SRC_EMAC_C0_RX_THRESHOLD	CFGCHIP1_CAP2SRC(0x7)
+#define CFGCHIP1_CAP2SRC_EMAC_C0_RX		CFGCHIP1_CAP2SRC(0x8)
+#define CFGCHIP1_CAP2SRC_EMAC_C0_TX		CFGCHIP1_CAP2SRC(0x9)
+#define CFGCHIP1_CAP2SRC_EMAC_C0_MISC		CFGCHIP1_CAP2SRC(0xa)
+#define CFGCHIP1_CAP2SRC_EMAC_C1_RX_THRESHOLD	CFGCHIP1_CAP2SRC(0xb)
+#define CFGCHIP1_CAP2SRC_EMAC_C1_RX		CFGCHIP1_CAP2SRC(0xc)
+#define CFGCHIP1_CAP2SRC_EMAC_C1_TX		CFGCHIP1_CAP2SRC(0xd)
+#define CFGCHIP1_CAP2SRC_EMAC_C1_MISC		CFGCHIP1_CAP2SRC(0xe)
+#define CFGCHIP1_CAP2SRC_EMAC_C2_RX_THRESHOLD	CFGCHIP1_CAP2SRC(0xf)
+#define CFGCHIP1_CAP2SRC_EMAC_C2_RX		CFGCHIP1_CAP2SRC(0x10)
+#define CFGCHIP1_CAP2SRC_EMAC_C2_TX		CFGCHIP1_CAP2SRC(0x11)
+#define CFGCHIP1_CAP2SRC_EMAC_C2_MISC		CFGCHIP1_CAP2SRC(0x12)
+#define CFGCHIP1_CAP1SRC(n)			((n) << 22)
+#define CFGCHIP1_CAP1SRC_MASK			CFGCHIP1_CAP1SRC(0x1f)
+#define CFGCHIP1_CAP1SRC_ECAP_PIN		CFGCHIP1_CAP1SRC(0x0)
+#define CFGCHIP1_CAP1SRC_MCASP0_TX		CFGCHIP1_CAP1SRC(0x1)
+#define CFGCHIP1_CAP1SRC_MCASP0_RX		CFGCHIP1_CAP1SRC(0x2)
+#define CFGCHIP1_CAP1SRC_EMAC_C0_RX_THRESHOLD	CFGCHIP1_CAP1SRC(0x7)
+#define CFGCHIP1_CAP1SRC_EMAC_C0_RX		CFGCHIP1_CAP1SRC(0x8)
+#define CFGCHIP1_CAP1SRC_EMAC_C0_TX		CFGCHIP1_CAP1SRC(0x9)
+#define CFGCHIP1_CAP1SRC_EMAC_C0_MISC		CFGCHIP1_CAP1SRC(0xa)
+#define CFGCHIP1_CAP1SRC_EMAC_C1_RX_THRESHOLD	CFGCHIP1_CAP1SRC(0xb)
+#define CFGCHIP1_CAP1SRC_EMAC_C1_RX		CFGCHIP1_CAP1SRC(0xc)
+#define CFGCHIP1_CAP1SRC_EMAC_C1_TX		CFGCHIP1_CAP1SRC(0xd)
+#define CFGCHIP1_CAP1SRC_EMAC_C1_MISC		CFGCHIP1_CAP1SRC(0xe)
+#define CFGCHIP1_CAP1SRC_EMAC_C2_RX_THRESHOLD	CFGCHIP1_CAP1SRC(0xf)
+#define CFGCHIP1_CAP1SRC_EMAC_C2_RX		CFGCHIP1_CAP1SRC(0x10)
+#define CFGCHIP1_CAP1SRC_EMAC_C2_TX		CFGCHIP1_CAP1SRC(0x11)
+#define CFGCHIP1_CAP1SRC_EMAC_C2_MISC		CFGCHIP1_CAP1SRC(0x12)
+#define CFGCHIP1_CAP0SRC(n)			((n) << 17)
+#define CFGCHIP1_CAP0SRC_MASK			CFGCHIP1_CAP0SRC(0x1f)
+#define CFGCHIP1_CAP0SRC_ECAP_PIN		CFGCHIP1_CAP0SRC(0x0)
+#define CFGCHIP1_CAP0SRC_MCASP0_TX		CFGCHIP1_CAP0SRC(0x1)
+#define CFGCHIP1_CAP0SRC_MCASP0_RX		CFGCHIP1_CAP0SRC(0x2)
+#define CFGCHIP1_CAP0SRC_EMAC_C0_RX_THRESHOLD	CFGCHIP1_CAP0SRC(0x7)
+#define CFGCHIP1_CAP0SRC_EMAC_C0_RX		CFGCHIP1_CAP0SRC(0x8)
+#define CFGCHIP1_CAP0SRC_EMAC_C0_TX		CFGCHIP1_CAP0SRC(0x9)
+#define CFGCHIP1_CAP0SRC_EMAC_C0_MISC		CFGCHIP1_CAP0SRC(0xa)
+#define CFGCHIP1_CAP0SRC_EMAC_C1_RX_THRESHOLD	CFGCHIP1_CAP0SRC(0xb)
+#define CFGCHIP1_CAP0SRC_EMAC_C1_RX		CFGCHIP1_CAP0SRC(0xc)
+#define CFGCHIP1_CAP0SRC_EMAC_C1_TX		CFGCHIP1_CAP0SRC(0xd)
+#define CFGCHIP1_CAP0SRC_EMAC_C1_MISC		CFGCHIP1_CAP0SRC(0xe)
+#define CFGCHIP1_CAP0SRC_EMAC_C2_RX_THRESHOLD	CFGCHIP1_CAP0SRC(0xf)
+#define CFGCHIP1_CAP0SRC_EMAC_C2_RX		CFGCHIP1_CAP0SRC(0x10)
+#define CFGCHIP1_CAP0SRC_EMAC_C2_TX		CFGCHIP1_CAP0SRC(0x11)
+#define CFGCHIP1_CAP0SRC_EMAC_C2_MISC		CFGCHIP1_CAP0SRC(0x12)
+#define CFGCHIP1_HPIBYTEAD			BIT(16)
+#define CFGCHIP1_HPIENA				BIT(15)
+#define CFGCHIP0_EDMA31TC0DBS(n)		((n) << 13)
+#define CFGCHIP0_EDMA31TC0DBS_MASK		CFGCHIP0_EDMA31TC0DBS(0x3)
+#define CFGCHIP0_EDMA31TC0DBS_16		CFGCHIP0_EDMA31TC0DBS(0x0)
+#define CFGCHIP0_EDMA31TC0DBS_32		CFGCHIP0_EDMA31TC0DBS(0x1)
+#define CFGCHIP0_EDMA31TC0DBS_64		CFGCHIP0_EDMA31TC0DBS(0x2)
+#define CFGCHIP1_TBCLKSYNC			BIT(12)
+#define CFGCHIP1_AMUTESEL0(n)			((n) << 0)
+#define CFGCHIP1_AMUTESEL0_MASK			CFGCHIP1_AMUTESEL0(0xf)
+#define CFGCHIP1_AMUTESEL0_LOW			CFGCHIP1_AMUTESEL0(0x0)
+#define CFGCHIP1_AMUTESEL0_BANK_0		CFGCHIP1_AMUTESEL0(0x1)
+#define CFGCHIP1_AMUTESEL0_BANK_1		CFGCHIP1_AMUTESEL0(0x2)
+#define CFGCHIP1_AMUTESEL0_BANK_2		CFGCHIP1_AMUTESEL0(0x3)
+#define CFGCHIP1_AMUTESEL0_BANK_3		CFGCHIP1_AMUTESEL0(0x4)
+#define CFGCHIP1_AMUTESEL0_BANK_4		CFGCHIP1_AMUTESEL0(0x5)
+#define CFGCHIP1_AMUTESEL0_BANK_5		CFGCHIP1_AMUTESEL0(0x6)
+#define CFGCHIP1_AMUTESEL0_BANK_6		CFGCHIP1_AMUTESEL0(0x7)
+#define CFGCHIP1_AMUTESEL0_BANK_7		CFGCHIP1_AMUTESEL0(0x8)
+
+/* CFGCHIP2 (USB PHY) register bits */
+#define CFGCHIP2_PHYCLKGD			BIT(17)
+#define CFGCHIP2_VBUSSENSE			BIT(16)
+#define CFGCHIP2_RESET				BIT(15)
+#define CFGCHIP2_OTGMODE(n)			((n) << 13)
+#define CFGCHIP2_OTGMODE_MASK			CFGCHIP2_OTGMODE(0x3)
+#define CFGCHIP2_OTGMODE_NO_OVERRIDE		CFGCHIP2_OTGMODE(0x0)
+#define CFGCHIP2_OTGMODE_FORCE_HOST		CFGCHIP2_OTGMODE(0x1)
+#define CFGCHIP2_OTGMODE_FORCE_DEVICE		CFGCHIP2_OTGMODE(0x2)
+#define CFGCHIP2_OTGMODE_FORCE_HOST_VBUS_LOW	CFGCHIP2_OTGMODE(0x3)
+#define CFGCHIP2_USB1PHYCLKMUX			BIT(12)
+#define CFGCHIP2_USB2PHYCLKMUX			BIT(11)
+#define CFGCHIP2_PHYPWRDN			BIT(10)
+#define CFGCHIP2_OTGPWRDN			BIT(9)
+#define CFGCHIP2_DATPOL				BIT(8)
+#define CFGCHIP2_USB1SUSPENDM			BIT(7)
+#define CFGCHIP2_PHY_PLLON			BIT(6)
+#define CFGCHIP2_SESENDEN			BIT(5)
+#define CFGCHIP2_VBDTCTEN			BIT(4)
+#define CFGCHIP2_REFFREQ(n)			((n) << 0)
+#define CFGCHIP2_REFFREQ_MASK			CFGCHIP2_REFFREQ(0xf)
+#define CFGCHIP2_REFFREQ_12MHZ			CFGCHIP2_REFFREQ(0x1)
+#define CFGCHIP2_REFFREQ_24MHZ			CFGCHIP2_REFFREQ(0x2)
+#define CFGCHIP2_REFFREQ_48MHZ			CFGCHIP2_REFFREQ(0x3)
+#define CFGCHIP2_REFFREQ_19_2MHZ		CFGCHIP2_REFFREQ(0x4)
+#define CFGCHIP2_REFFREQ_38_4MHZ		CFGCHIP2_REFFREQ(0x5)
+#define CFGCHIP2_REFFREQ_13MHZ			CFGCHIP2_REFFREQ(0x6)
+#define CFGCHIP2_REFFREQ_26MHZ			CFGCHIP2_REFFREQ(0x7)
+#define CFGCHIP2_REFFREQ_20MHZ			CFGCHIP2_REFFREQ(0x8)
+#define CFGCHIP2_REFFREQ_40MHZ			CFGCHIP2_REFFREQ(0x9)
+
+/* CFGCHIP3 (EMAC/uPP/PLL1/ASYNC3/PRU/DIV4.5/EMIFA) register bits */
+#define CFGCHIP3_RMII_SEL			BIT(8)
+#define CFGCHIP3_UPP_TX_CLKSRC			BIT(6)
+#define CFGCHIP3_PLL1_MASTER_LOCK		BIT(5)
+#define CFGCHIP3_ASYNC3_CLKSRC			BIT(4)
+#define CFGCHIP3_PRUEVTSEL			BIT(3)
+#define CFGCHIP3_DIV45PENA			BIT(2)
+#define CFGCHIP3_EMA_CLKSRC			BIT(1)
+
+/* CFGCHIP4 (McASP0 AMUNTEIN) register bits */
+#define CFGCHIP4_AMUTECLR0			BIT(0)
+
+#endif /* __LINUX_MFD_DA8XX_CFGCHIP_H */
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 2567a87..7f55b8b 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -138,16 +138,16 @@
 /*
  * time in us for processing a single channel, calculated as follows:
  *
- * num cycles = open delay + (sample delay + conv time) * averaging
+ * max num cycles = open delay + (sample delay + conv time) * averaging
  *
- * num cycles: 152 + (1 + 13) * 16 = 376
+ * max num cycles: 262143 + (255 + 13) * 16 = 266431
  *
  * clock frequency: 26MHz / 8 = 3.25MHz
  * clock period: 1 / 3.25MHz = 308ns
  *
- * processing time: 376 * 308ns = 116us
+ * max processing time: 266431 * 308ns = 83ms(approx)
  */
-#define IDLE_TIMEOUT 116 /* microsec */
+#define IDLE_TIMEOUT 83 /* milliseconds */
 
 #define TSCADC_CELLS		2
 
diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h
index 7fdf532..d1db952 100644
--- a/include/linux/mfd/tps65218.h
+++ b/include/linux/mfd/tps65218.h
@@ -63,6 +63,11 @@
 #define TPS65218_CHIPID_CHIP_MASK	0xF8
 #define TPS65218_CHIPID_REV_MASK	0x07
 
+#define TPS65218_REV_1_0		0x0
+#define TPS65218_REV_1_1		0x1
+#define TPS65218_REV_2_0		0x2
+#define TPS65218_REV_2_1		0x3
+
 #define TPS65218_INT1_VPRG		BIT(5)
 #define TPS65218_INT1_AC		BIT(4)
 #define TPS65218_INT1_PB		BIT(3)
@@ -267,6 +272,7 @@
 struct tps65218 {
 	struct device *dev;
 	unsigned int id;
+	u8 rev;
 
 	struct mutex tps_lock;		/* lock guarding the data structure */
 	/* IRQ Data */
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 5430374..722698a 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -3,6 +3,7 @@
 #include <linux/major.h>
 #include <linux/list.h>
 #include <linux/types.h>
+#include <linux/device.h>
 
 /*
  *	These allocations are managed by device@lanana.org. If you use an
@@ -70,6 +71,13 @@
 extern int misc_register(struct miscdevice *misc);
 extern void misc_deregister(struct miscdevice *misc);
 
+/*
+ * Helper macro for drivers that don't do anything special in module init / exit
+ * call. This helps in eleminating of boilerplate code.
+ */
+#define module_misc_device(__misc_device) \
+	module_driver(__misc_device, misc_register, misc_deregister)
+
 #define MODULE_ALIAS_MISCDEV(minor)				\
 	MODULE_ALIAS("char-major-" __stringify(MISC_MAJOR)	\
 	"-" __stringify(minor))
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 116b284..1f35686 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -309,7 +309,8 @@
 		      struct ifla_vf_stats *vf_stats);
 u32 mlx4_comm_get_version(void);
 int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac);
-int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
+int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan,
+		     u8 qos, __be16 proto);
 int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate,
 		     int max_tx_rate);
 int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 42da355..59b50d3 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -221,6 +221,7 @@
 	MLX4_DEV_CAP_FLAG2_ROCE_V1_V2		= 1ULL <<  33,
 	MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER   = 1ULL <<  34,
 	MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT	= 1ULL <<  35,
+	MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP          = 1ULL <<  36,
 };
 
 enum {
@@ -1371,6 +1372,8 @@
 int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable);
 int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val);
 int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv);
+int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port,
+				      bool *vlan_offload_disabled);
 int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index deaa221..b4ee8f6 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -160,6 +160,7 @@
 
 enum { /* fl */
 	MLX4_FL_CV	= 1 << 6,
+	MLX4_FL_SV	= 1 << 5,
 	MLX4_FL_ETH_HIDE_CQE_VLAN	= 1 << 2,
 	MLX4_FL_ETH_SRC_CHECK_MC_LB	= 1 << 1,
 	MLX4_FL_ETH_SRC_CHECK_UC_LB	= 1 << 0,
@@ -267,6 +268,7 @@
 	MLX4_UPD_QP_PATH_MASK_FVL_RX			= 16 + 32,
 	MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB	= 18 + 32,
 	MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB	= 19 + 32,
+	MLX4_UPD_QP_PATH_MASK_SV			= 22 + 32,
 };
 
 enum { /* param3 */
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 2566f6d..7c3c0d3 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -170,12 +170,12 @@
 int mlx5_init_cq_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev);
 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-			struct mlx5_create_cq_mbox_in *in, int inlen);
+			u32 *in, int inlen);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-		       struct mlx5_query_cq_mbox_out *out);
+		       u32 *out, int outlen);
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-			struct mlx5_modify_cq_mbox_in *in, int in_sz);
+			u32 *in, int inlen);
 int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
 				   struct mlx5_core_cq *cq, u16 cq_period,
 				   u16 cq_max_count);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0b6d15c..77c1417 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -198,19 +198,6 @@
 };
 
 enum {
-	MLX5_ACCESS_MODE_PA	= 0,
-	MLX5_ACCESS_MODE_MTT	= 1,
-	MLX5_ACCESS_MODE_KLM	= 2
-};
-
-enum {
-	MLX5_MKEY_REMOTE_INVAL	= 1 << 24,
-	MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29,
-	MLX5_MKEY_BSF_EN	= 1 << 30,
-	MLX5_MKEY_LEN64		= 1 << 31,
-};
-
-enum {
 	MLX5_EN_RD	= (u64)1,
 	MLX5_EN_WR	= (u64)2
 };
@@ -411,33 +398,6 @@
 	MLX5_MAX_SGE_RD	= (512 - 16 - 16) / 16
 };
 
-struct mlx5_inbox_hdr {
-	__be16		opcode;
-	u8		rsvd[4];
-	__be16		opmod;
-};
-
-struct mlx5_outbox_hdr {
-	u8		status;
-	u8		rsvd[3];
-	__be32		syndrome;
-};
-
-struct mlx5_cmd_query_adapter_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_cmd_query_adapter_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[24];
-	u8			intapin;
-	u8			rsvd1[13];
-	__be16			vsd_vendor_id;
-	u8			vsd[208];
-	u8			vsd_psid[16];
-};
-
 enum mlx5_odp_transport_cap_bits {
 	MLX5_ODP_SUPPORT_SEND	 = 1 << 31,
 	MLX5_ODP_SUPPORT_RECV	 = 1 << 30,
@@ -455,30 +415,6 @@
 	char reserved2[0xe4];
 };
 
-struct mlx5_cmd_init_hca_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd0[2];
-	__be16			profile;
-	u8			rsvd1[4];
-};
-
-struct mlx5_cmd_init_hca_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_cmd_teardown_hca_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd0[2];
-	__be16			profile;
-	u8			rsvd1[4];
-};
-
-struct mlx5_cmd_teardown_hca_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
 struct mlx5_cmd_layout {
 	u8		type;
 	u8		rsvd0[3];
@@ -494,7 +430,6 @@
 	u8		status_own;
 };
 
-
 struct health_buffer {
 	__be32		assert_var[5];
 	__be32		rsvd0[3];
@@ -856,245 +791,15 @@
 	struct mlx5_cqe64	cqe64;
 };
 
-struct mlx5_srq_ctx {
-	u8			state_log_sz;
-	u8			rsvd0[3];
-	__be32			flags_xrcd;
-	__be32			pgoff_cqn;
-	u8			rsvd1[4];
-	u8			log_pg_sz;
-	u8			rsvd2[7];
-	__be32			pd;
-	__be16			lwm;
-	__be16			wqe_cnt;
-	u8			rsvd3[8];
-	__be64			db_record;
-};
-
-struct mlx5_create_srq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			input_srqn;
-	u8			rsvd0[4];
-	struct mlx5_srq_ctx	ctx;
-	u8			rsvd1[208];
-	__be64			pas[0];
-};
-
-struct mlx5_create_srq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			srqn;
-	u8			rsvd[4];
-};
-
-struct mlx5_destroy_srq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			srqn;
-	u8			rsvd[4];
-};
-
-struct mlx5_destroy_srq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_query_srq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			srqn;
-	u8			rsvd0[4];
-};
-
-struct mlx5_query_srq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-	struct mlx5_srq_ctx	ctx;
-	u8			rsvd1[32];
-	__be64			pas[0];
-};
-
-struct mlx5_arm_srq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			srqn;
-	__be16			rsvd;
-	__be16			lwm;
-};
-
-struct mlx5_arm_srq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_cq_context {
-	u8			status;
-	u8			cqe_sz_flags;
-	u8			st;
-	u8			rsvd3;
-	u8			rsvd4[6];
-	__be16			page_offset;
-	__be32			log_sz_usr_page;
-	__be16			cq_period;
-	__be16			cq_max_count;
-	__be16			rsvd20;
-	__be16			c_eqn;
-	u8			log_pg_sz;
-	u8			rsvd25[7];
-	__be32			last_notified_index;
-	__be32			solicit_producer_index;
-	__be32			consumer_counter;
-	__be32			producer_counter;
-	u8			rsvd48[8];
-	__be64			db_record_addr;
-};
-
-struct mlx5_create_cq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			input_cqn;
-	u8			rsvdx[4];
-	struct mlx5_cq_context	ctx;
-	u8			rsvd6[192];
-	__be64			pas[0];
-};
-
-struct mlx5_create_cq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			cqn;
-	u8			rsvd0[4];
-};
-
-struct mlx5_destroy_cq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			cqn;
-	u8			rsvd0[4];
-};
-
-struct mlx5_destroy_cq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-};
-
-struct mlx5_query_cq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			cqn;
-	u8			rsvd0[4];
-};
-
-struct mlx5_query_cq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-	struct mlx5_cq_context	ctx;
-	u8			rsvd6[16];
-	__be64			pas[0];
-};
-
-struct mlx5_modify_cq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			cqn;
-	__be32			field_select;
-	struct mlx5_cq_context	ctx;
-	u8			rsvd[192];
-	__be64			pas[0];
-};
-
-struct mlx5_modify_cq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_enable_hca_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_enable_hca_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_disable_hca_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_disable_hca_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_eq_context {
-	u8			status;
-	u8			ec_oi;
-	u8			st;
-	u8			rsvd2[7];
-	__be16			page_pffset;
-	__be32			log_sz_usr_page;
-	u8			rsvd3[7];
-	u8			intr;
-	u8			log_page_size;
-	u8			rsvd4[15];
-	__be32			consumer_counter;
-	__be32			produser_counter;
-	u8			rsvd5[16];
-};
-
-struct mlx5_create_eq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd0[3];
-	u8			input_eqn;
-	u8			rsvd1[4];
-	struct mlx5_eq_context	ctx;
-	u8			rsvd2[8];
-	__be64			events_mask;
-	u8			rsvd3[176];
-	__be64			pas[0];
-};
-
-struct mlx5_create_eq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[3];
-	u8			eq_number;
-	u8			rsvd1[4];
-};
-
-struct mlx5_destroy_eq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd0[3];
-	u8			eqn;
-	u8			rsvd1[4];
-};
-
-struct mlx5_destroy_eq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_map_eq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be64			mask;
-	u8			mu;
-	u8			rsvd0[2];
-	u8			eqn;
-	u8			rsvd1[24];
-};
-
-struct mlx5_map_eq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_query_eq_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd0[3];
-	u8			eqn;
-	u8			rsvd1[4];
-};
-
-struct mlx5_query_eq_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-	struct mlx5_eq_context	ctx;
+enum {
+	MLX5_MKEY_STATUS_FREE = 1 << 6,
 };
 
 enum {
-	MLX5_MKEY_STATUS_FREE = 1 << 6,
+	MLX5_MKEY_REMOTE_INVAL	= 1 << 24,
+	MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29,
+	MLX5_MKEY_BSF_EN	= 1 << 30,
+	MLX5_MKEY_LEN64		= 1 << 31,
 };
 
 struct mlx5_mkey_seg {
@@ -1119,134 +824,12 @@
 	u8		rsvd4[4];
 };
 
-struct mlx5_query_special_ctxs_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_query_special_ctxs_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			dump_fill_mkey;
-	__be32			reserved_lkey;
-};
-
-struct mlx5_create_mkey_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			input_mkey_index;
-	__be32			flags;
-	struct mlx5_mkey_seg	seg;
-	u8			rsvd1[16];
-	__be32			xlat_oct_act_size;
-	__be32			rsvd2;
-	u8			rsvd3[168];
-	__be64			pas[0];
-};
-
-struct mlx5_create_mkey_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			mkey;
-	u8			rsvd[4];
-};
-
-struct mlx5_destroy_mkey_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			mkey;
-	u8			rsvd[4];
-};
-
-struct mlx5_destroy_mkey_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_query_mkey_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			mkey;
-};
-
-struct mlx5_query_mkey_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be64			pas[0];
-};
-
-struct mlx5_modify_mkey_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			mkey;
-	__be64			pas[0];
-};
-
-struct mlx5_modify_mkey_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_dump_mkey_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-};
-
-struct mlx5_dump_mkey_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			mkey;
-};
-
-struct mlx5_mad_ifc_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be16			remote_lid;
-	u8			rsvd0;
-	u8			port;
-	u8			rsvd1[4];
-	u8			data[256];
-};
-
-struct mlx5_mad_ifc_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-	u8			data[256];
-};
-
-struct mlx5_access_reg_mbox_in {
-	struct mlx5_inbox_hdr		hdr;
-	u8				rsvd0[2];
-	__be16				register_id;
-	__be32				arg;
-	__be32				data[0];
-};
-
-struct mlx5_access_reg_mbox_out {
-	struct mlx5_outbox_hdr		hdr;
-	u8				rsvd[8];
-	__be32				data[0];
-};
-
 #define MLX5_ATTR_EXTENDED_PORT_INFO	cpu_to_be16(0xff90)
 
 enum {
 	MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO	= 1 <<  0
 };
 
-struct mlx5_allocate_psv_in {
-	struct mlx5_inbox_hdr   hdr;
-	__be32			npsv_pd;
-	__be32			rsvd_psv0;
-};
-
-struct mlx5_allocate_psv_out {
-	struct mlx5_outbox_hdr  hdr;
-	u8			rsvd[8];
-	__be32			psv_idx[4];
-};
-
-struct mlx5_destroy_psv_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32                  psv_number;
-	u8                      rsvd[4];
-};
-
-struct mlx5_destroy_psv_out {
-	struct mlx5_outbox_hdr  hdr;
-	u8                      rsvd[8];
-};
-
 enum {
 	VPORT_STATE_DOWN		= 0x0,
 	VPORT_STATE_UP			= 0x1,
@@ -1381,6 +964,18 @@
 #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \
 	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap)
 
+#define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \
+	MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap)
+
+#define MLX5_CAP_FLOWTABLE_SNIFFER_RX_MAX(mdev, cap) \
+	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_sniffer.cap)
+
+#define MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) \
+	MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_sniffer.cap)
+
+#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \
+	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap)
+
 #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
 	MLX5_GET(flow_table_eswitch_cap, \
 		 mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ccea6fb..85c4786 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -49,10 +49,6 @@
 #include <linux/mlx5/srq.h>
 
 enum {
-	MLX5_RQ_BITMASK_VSD = 1 << 1,
-};
-
-enum {
 	MLX5_BOARD_ID_LEN = 64,
 	MLX5_MAX_NAME_LEN = 16,
 };
@@ -481,6 +477,7 @@
 };
 
 struct mlx5_eswitch;
+struct mlx5_lag;
 
 struct mlx5_rl_entry {
 	u32                     rate;
@@ -554,6 +551,7 @@
 	struct mlx5_flow_steering *steering;
 	struct mlx5_eswitch     *eswitch;
 	struct mlx5_core_sriov	sriov;
+	struct mlx5_lag		*lag;
 	unsigned long		pci_dev_data;
 	struct mlx5_fc_stats		fc_stats;
 	struct mlx5_rl_table            rl_table;
@@ -771,14 +769,15 @@
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
-int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
-int mlx5_cmd_status_to_err_v2(void *ptr);
-int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
+
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
 		     void *out, int out_size, mlx5_cmd_cbk_t callback,
 		     void *context);
+void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
+
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari);
@@ -807,15 +806,18 @@
 		      u16 lwm, int is_srq);
 void mlx5_init_mkey_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev);
+int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev,
+			     struct mlx5_core_mkey *mkey,
+			     u32 *in, int inlen,
+			     u32 *out, int outlen,
+			     mlx5_cmd_cbk_t callback, void *context);
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 			  struct mlx5_core_mkey *mkey,
-			  struct mlx5_create_mkey_mbox_in *in, int inlen,
-			  mlx5_cmd_cbk_t callback, void *context,
-			  struct mlx5_create_mkey_mbox_out *out);
+			  u32 *in, int inlen);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 			   struct mlx5_core_mkey *mkey);
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
-			 struct mlx5_query_mkey_mbox_out *out, int outlen);
+			 u32 *out, int outlen);
 int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey,
 			     u32 *mkey);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
@@ -826,8 +828,6 @@
 void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev);
 int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
-int mlx5_sriov_init(struct mlx5_core_dev *dev);
-int mlx5_sriov_cleanup(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
 				 s32 npages);
 int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
@@ -865,7 +865,7 @@
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq);
 int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-		       struct mlx5_query_eq_mbox_out *out, int outlen);
+		       u32 *out, int outlen);
 int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev);
@@ -930,6 +930,8 @@
 struct mlx5_interface {
 	void *			(*add)(struct mlx5_core_dev *dev);
 	void			(*remove)(struct mlx5_core_dev *dev, void *context);
+	int			(*attach)(struct mlx5_core_dev *dev, void *context);
+	void			(*detach)(struct mlx5_core_dev *dev, void *context);
 	void			(*event)(struct mlx5_core_dev *dev, void *context,
 					 enum mlx5_dev_event event, unsigned long param);
 	void *                  (*get_dev)(void *context);
@@ -942,6 +944,11 @@
 void mlx5_unregister_interface(struct mlx5_interface *intf);
 int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id);
 
+int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
+int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
+bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+
 struct mlx5_profile {
 	u64	mask;
 	u8	log_max_qp;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index e036d60..93ebc5e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -54,6 +54,7 @@
 
 enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_BYPASS,
+	MLX5_FLOW_NAMESPACE_LAG,
 	MLX5_FLOW_NAMESPACE_OFFLOADS,
 	MLX5_FLOW_NAMESPACE_ETHTOOL,
 	MLX5_FLOW_NAMESPACE_KERNEL,
@@ -62,6 +63,8 @@
 	MLX5_FLOW_NAMESPACE_FDB,
 	MLX5_FLOW_NAMESPACE_ESW_EGRESS,
 	MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+	MLX5_FLOW_NAMESPACE_SNIFFER_RX,
+	MLX5_FLOW_NAMESPACE_SNIFFER_TX,
 };
 
 struct mlx5_flow_table;
@@ -106,6 +109,9 @@
 			     int prio,
 			     int num_flow_table_entries,
 			     u32 level, u16 vport);
+struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
+					       struct mlx5_flow_namespace *ns,
+					       int prio, u32 level);
 int mlx5_destroy_flow_table(struct mlx5_flow_table *ft);
 
 /* inbox should be set with the following values:
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 21bc455..6045d4d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -152,7 +152,7 @@
 	MLX5_CMD_OP_CONFIG_INT_MODERATION         = 0x804,
 	MLX5_CMD_OP_ACCESS_REG                    = 0x805,
 	MLX5_CMD_OP_ATTACH_TO_MCG                 = 0x806,
-	MLX5_CMD_OP_DETTACH_FROM_MCG              = 0x807,
+	MLX5_CMD_OP_DETACH_FROM_MCG               = 0x807,
 	MLX5_CMD_OP_GET_DROPPED_PACKET_LOG        = 0x80a,
 	MLX5_CMD_OP_MAD_IFC                       = 0x50d,
 	MLX5_CMD_OP_QUERY_MAD_DEMUX               = 0x80b,
@@ -174,6 +174,12 @@
 	MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY         = 0x82b,
 	MLX5_CMD_OP_SET_WOL_ROL                   = 0x830,
 	MLX5_CMD_OP_QUERY_WOL_ROL                 = 0x831,
+	MLX5_CMD_OP_CREATE_LAG                    = 0x840,
+	MLX5_CMD_OP_MODIFY_LAG                    = 0x841,
+	MLX5_CMD_OP_QUERY_LAG                     = 0x842,
+	MLX5_CMD_OP_DESTROY_LAG                   = 0x843,
+	MLX5_CMD_OP_CREATE_VPORT_LAG              = 0x844,
+	MLX5_CMD_OP_DESTROY_VPORT_LAG             = 0x845,
 	MLX5_CMD_OP_CREATE_TIR                    = 0x900,
 	MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
 	MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
@@ -212,6 +218,8 @@
 	MLX5_CMD_OP_DEALLOC_FLOW_COUNTER          = 0x93a,
 	MLX5_CMD_OP_QUERY_FLOW_COUNTER            = 0x93b,
 	MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
+	MLX5_CMD_OP_ALLOC_ENCAP_HEADER            = 0x93d,
+	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
 	MLX5_CMD_OP_MAX
 };
 
@@ -281,7 +289,9 @@
 	u8         modify_root[0x1];
 	u8         identified_miss_table_mode[0x1];
 	u8         flow_table_modify[0x1];
-	u8         reserved_at_7[0x19];
+	u8         encap[0x1];
+	u8         decap[0x1];
+	u8         reserved_at_9[0x17];
 
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
@@ -473,7 +483,9 @@
 
 struct mlx5_ifc_flow_table_nic_cap_bits {
 	u8         nic_rx_multi_path_tirs[0x1];
-	u8         reserved_at_1[0x1ff];
+	u8         nic_rx_multi_path_tirs_fts[0x1];
+	u8         allow_sniffer_and_nic_rx_shared_tir[0x1];
+	u8         reserved_at_3[0x1fd];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
@@ -512,7 +524,15 @@
 	u8         nic_vport_node_guid_modify[0x1];
 	u8         nic_vport_port_guid_modify[0x1];
 
-	u8         reserved_at_20[0x7e0];
+	u8         vxlan_encap_decap[0x1];
+	u8         nvgre_encap_decap[0x1];
+	u8         reserved_at_22[0x9];
+	u8         log_max_encap_headers[0x5];
+	u8         reserved_2b[0x6];
+	u8         max_encap_header_size[0xa];
+
+	u8         reserved_40[0x7c0];
+
 };
 
 struct mlx5_ifc_qos_cap_bits {
@@ -767,7 +787,9 @@
 	u8         out_of_seq_cnt[0x1];
 	u8         vport_counters[0x1];
 	u8         retransmission_q_counters[0x1];
-	u8         reserved_at_183[0x3];
+	u8         reserved_at_183[0x1];
+	u8         modify_rq_counter_set_id[0x1];
+	u8         reserved_at_185[0x1];
 	u8         max_qp_cnt[0xa];
 	u8         pkey_table_size[0x10];
 
@@ -870,7 +892,10 @@
 	u8         pad_tx_eth_packet[0x1];
 	u8         reserved_at_263[0x8];
 	u8         log_bf_reg_size[0x5];
-	u8         reserved_at_270[0x10];
+
+	u8         reserved_at_270[0xb];
+	u8         lag_master[0x1];
+	u8         num_lag_ports[0x4];
 
 	u8         reserved_at_280[0x10];
 	u8         max_wqe_sz_sq[0x10];
@@ -1904,7 +1929,7 @@
 
 struct mlx5_ifc_qpc_bits {
 	u8         state[0x4];
-	u8         reserved_at_4[0x4];
+	u8         lag_tx_port_affinity[0x4];
 	u8         st[0x8];
 	u8         reserved_at_10[0x3];
 	u8         pm_state[0x2];
@@ -1966,7 +1991,10 @@
 	u8         reserved_at_3e0[0x8];
 	u8         cqn_snd[0x18];
 
-	u8         reserved_at_400[0x40];
+	u8         reserved_at_400[0x8];
+	u8         deth_sqpn[0x18];
+
+	u8         reserved_at_420[0x20];
 
 	u8         reserved_at_440[0x8];
 	u8         last_acked_psn[0x18];
@@ -2064,6 +2092,8 @@
 	MLX5_FLOW_CONTEXT_ACTION_DROP      = 0x2,
 	MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
 	MLX5_FLOW_CONTEXT_ACTION_COUNT     = 0x8,
+	MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
+	MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
 };
 
 struct mlx5_ifc_flow_context_bits {
@@ -2083,7 +2113,9 @@
 	u8         reserved_at_a0[0x8];
 	u8         flow_counter_list_size[0x18];
 
-	u8         reserved_at_c0[0x140];
+	u8         encap_id[0x20];
+
+	u8         reserved_at_e0[0x120];
 
 	struct mlx5_ifc_fte_match_param_bits match_value;
 
@@ -2146,7 +2178,11 @@
 };
 
 struct mlx5_ifc_tisc_bits {
-	u8         reserved_at_0[0xc];
+	u8         strict_lag_tx_port_affinity[0x1];
+	u8         reserved_at_1[0x3];
+	u8         lag_tx_port_affinity[0x04];
+
+	u8         reserved_at_8[0x4];
 	u8         prio[0x4];
 	u8         reserved_at_10[0x10];
 
@@ -2808,7 +2844,7 @@
 
 	struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context;
 
-	u8         reserved_at_180[0x180];
+	u8         reserved_at_180[0x200];
 
 	struct mlx5_ifc_wq_bits wq;
 };
@@ -3489,7 +3525,7 @@
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x20];
+	u8         dump_fill_mkey[0x20];
 
 	u8         resd_lkey[0x20];
 };
@@ -4213,6 +4249,85 @@
 	u8         reserved_at_60[0x20];
 };
 
+struct mlx5_ifc_encap_header_in_bits {
+	u8         reserved_at_0[0x5];
+	u8         header_type[0x3];
+	u8         reserved_at_8[0xe];
+	u8         encap_header_size[0xa];
+
+	u8         reserved_at_20[0x10];
+	u8         encap_header[2][0x8];
+
+	u8         more_encap_header[0][0x8];
+};
+
+struct mlx5_ifc_query_encap_header_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0xa0];
+
+	struct mlx5_ifc_encap_header_in_bits encap_header[0];
+};
+
+struct mlx5_ifc_query_encap_header_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         encap_id[0x20];
+
+	u8         reserved_at_60[0xa0];
+};
+
+struct mlx5_ifc_alloc_encap_header_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         encap_id[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_encap_header_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0xa0];
+
+	struct mlx5_ifc_encap_header_in_bits encap_header;
+};
+
+struct mlx5_ifc_dealloc_encap_header_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_encap_header_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         encap_id[0x20];
+
+	u8         reserved_60[0x20];
+};
+
 struct mlx5_ifc_query_dct_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
@@ -4517,7 +4632,9 @@
 struct mlx5_ifc_modify_tis_bitmask_bits {
 	u8         reserved_at_0[0x20];
 
-	u8         reserved_at_20[0x1f];
+	u8         reserved_at_20[0x1d];
+	u8         lag_tx_port_affinity[0x1];
+	u8         strict_lag_tx_port_affinity[0x1];
 	u8         prio[0x1];
 };
 
@@ -4652,6 +4769,11 @@
 	u8         reserved_at_40[0x40];
 };
 
+enum {
+	MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
+	MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3,
+};
+
 struct mlx5_ifc_modify_rq_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
@@ -4721,7 +4843,7 @@
 	u8         reserved_at_0[0x16];
 	u8         node_guid[0x1];
 	u8         port_guid[0x1];
-	u8         reserved_at_18[0x1];
+	u8         min_inline[0x1];
 	u8         mtu[0x1];
 	u8         change_event[0x1];
 	u8         promisc[0x1];
@@ -6099,7 +6221,9 @@
 
 	u8         reserved_at_a0[0x20];
 
-	u8         reserved_at_c0[0x4];
+	u8         encap_en[0x1];
+	u8         decap_en[0x1];
+	u8         reserved_at_c2[0x2];
 	u8         table_miss_mode[0x4];
 	u8         level[0x8];
 	u8         reserved_at_d0[0x8];
@@ -6108,7 +6232,10 @@
 	u8         reserved_at_e0[0x8];
 	u8         table_miss_id[0x18];
 
-	u8         reserved_at_100[0x100];
+	u8         reserved_at_100[0x8];
+	u8         lag_master_next_table_id[0x18];
+
+	u8         reserved_at_120[0x80];
 };
 
 struct mlx5_ifc_create_flow_group_out_bits {
@@ -6710,9 +6837,10 @@
 };
 
 struct mlx5_ifc_ptys_reg_bits {
-	u8         an_disable_cap[0x1];
+	u8         reserved_at_0[0x1];
 	u8         an_disable_admin[0x1];
-	u8         reserved_at_2[0x6];
+	u8         an_disable_cap[0x1];
+	u8         reserved_at_3[0x5];
 	u8         local_port[0x8];
 	u8         reserved_at_10[0xd];
 	u8         proto_mask[0x3];
@@ -7562,7 +7690,8 @@
 };
 
 enum {
-	MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1,
+	MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID     = (1UL << 0),
+	MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID = (1UL << 15),
 };
 
 struct mlx5_ifc_modify_flow_table_out_bits {
@@ -7601,7 +7730,10 @@
 	u8         reserved_at_e0[0x8];
 	u8         table_miss_id[0x18];
 
-	u8         reserved_at_100[0x100];
+	u8         reserved_at_100[0x8];
+	u8         lag_master_next_table_id[0x18];
+
+	u8         reserved_at_120[0x80];
 };
 
 struct mlx5_ifc_ets_tcn_config_reg_bits {
@@ -7709,4 +7841,134 @@
 	u8         error[0x8];
 	u8         reserved_at_a0[0x160];
 };
+
+struct mlx5_ifc_lagc_bits {
+	u8         reserved_at_0[0x1d];
+	u8         lag_state[0x3];
+
+	u8         reserved_at_20[0x14];
+	u8         tx_remap_affinity_2[0x4];
+	u8         reserved_at_38[0x4];
+	u8         tx_remap_affinity_1[0x4];
+};
+
+struct mlx5_ifc_create_lag_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_create_lag_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	struct mlx5_ifc_lagc_bits ctx;
+};
+
+struct mlx5_ifc_modify_lag_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_modify_lag_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x20];
+	u8         field_select[0x20];
+
+	struct mlx5_ifc_lagc_bits ctx;
+};
+
+struct mlx5_ifc_query_lag_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+
+	struct mlx5_ifc_lagc_bits ctx;
+};
+
+struct mlx5_ifc_query_lag_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_lag_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_lag_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_create_vport_lag_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_create_vport_lag_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_vport_lag_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_destroy_vport_lag_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_at_40[0x40];
+};
+
 #endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index e3012cc..b3065ac 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -61,6 +61,39 @@
 #define MLX5_I2C_ADDR_HIGH		0x51
 #define MLX5_EEPROM_PAGE_LENGTH		256
 
+enum mlx5e_link_mode {
+	MLX5E_1000BASE_CX_SGMII	 = 0,
+	MLX5E_1000BASE_KX	 = 1,
+	MLX5E_10GBASE_CX4	 = 2,
+	MLX5E_10GBASE_KX4	 = 3,
+	MLX5E_10GBASE_KR	 = 4,
+	MLX5E_20GBASE_KR2	 = 5,
+	MLX5E_40GBASE_CR4	 = 6,
+	MLX5E_40GBASE_KR4	 = 7,
+	MLX5E_56GBASE_R4	 = 8,
+	MLX5E_10GBASE_CR	 = 12,
+	MLX5E_10GBASE_SR	 = 13,
+	MLX5E_10GBASE_ER	 = 14,
+	MLX5E_40GBASE_SR4	 = 15,
+	MLX5E_40GBASE_LR4	 = 16,
+	MLX5E_50GBASE_SR2	 = 18,
+	MLX5E_100GBASE_CR4	 = 20,
+	MLX5E_100GBASE_SR4	 = 21,
+	MLX5E_100GBASE_KR4	 = 22,
+	MLX5E_100GBASE_LR4	 = 23,
+	MLX5E_100BASE_TX	 = 24,
+	MLX5E_1000BASE_T	 = 25,
+	MLX5E_10GBASE_T		 = 26,
+	MLX5E_25GBASE_CR	 = 27,
+	MLX5E_25GBASE_KR	 = 28,
+	MLX5E_25GBASE_SR	 = 29,
+	MLX5E_50GBASE_CR2	 = 30,
+	MLX5E_50GBASE_KR2	 = 31,
+	MLX5E_LINK_MODES_NUMBER,
+};
+
+#define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
+
 int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps);
 int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys,
 			 int ptys_size, int proto_mask, u8 local_port);
@@ -70,9 +103,10 @@
 				u32 *proto_admin, int proto_mask);
 int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev,
 				    u8 *link_width_oper, u8 local_port);
-int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev,
-			       u8 *proto_oper, int proto_mask,
-			       u8 local_port);
+int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev,
+				  u8 *proto_oper, u8 local_port);
+int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev,
+				   u32 *proto_oper, u8 local_port);
 int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable,
 		       u32 proto_admin, int proto_mask);
 void mlx5_toggle_port_link(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 7879bf4..0aacb2a 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -123,12 +123,13 @@
 };
 
 enum {
-	MLX5_NON_ZERO_RQ	= 0 << 24,
-	MLX5_SRQ_RQ		= 1 << 24,
-	MLX5_CRQ_RQ		= 2 << 24,
-	MLX5_ZERO_LEN_RQ	= 3 << 24
+	MLX5_NON_ZERO_RQ	= 0x0,
+	MLX5_SRQ_RQ		= 0x1,
+	MLX5_CRQ_RQ		= 0x2,
+	MLX5_ZERO_LEN_RQ	= 0x3
 };
 
+/* TODO REM */
 enum {
 	/* params1 */
 	MLX5_QP_BIT_SRE				= 1 << 15,
@@ -178,12 +179,6 @@
 };
 
 enum {
-	MLX5_QP_LAT_SENSITIVE	= 1 << 28,
-	MLX5_QP_BLOCK_MCAST	= 1 << 30,
-	MLX5_QP_ENABLE_SIG	= 1 << 31,
-};
-
-enum {
 	MLX5_RCV_DBR	= 0,
 	MLX5_SND_DBR	= 1,
 };
@@ -484,6 +479,7 @@
 	u8			rmac[6];
 };
 
+/* FIXME: use mlx5_ifc.h qpc */
 struct mlx5_qp_context {
 	__be32			flags;
 	__be32			flags_pd;
@@ -525,99 +521,6 @@
 	u8			rsvd1[24];
 };
 
-struct mlx5_create_qp_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			input_qpn;
-	u8			rsvd0[4];
-	__be32			opt_param_mask;
-	u8			rsvd1[4];
-	struct mlx5_qp_context	ctx;
-	u8			rsvd3[16];
-	__be64			pas[0];
-};
-
-struct mlx5_create_qp_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			qpn;
-	u8			rsvd0[4];
-};
-
-struct mlx5_destroy_qp_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			qpn;
-	u8			rsvd0[4];
-};
-
-struct mlx5_destroy_qp_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-};
-
-struct mlx5_modify_qp_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			qpn;
-	u8			rsvd0[4];
-	__be32			optparam;
-	u8			rsvd1[4];
-	struct mlx5_qp_context	ctx;
-	u8			rsvd2[16];
-};
-
-struct mlx5_modify_qp_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-};
-
-struct mlx5_query_qp_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			qpn;
-	u8			rsvd[4];
-};
-
-struct mlx5_query_qp_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd1[8];
-	__be32			optparam;
-	u8			rsvd0[4];
-	struct mlx5_qp_context	ctx;
-	u8			rsvd2[16];
-	__be64			pas[0];
-};
-
-struct mlx5_conf_sqp_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			qpn;
-	u8			rsvd[3];
-	u8			type;
-};
-
-struct mlx5_conf_sqp_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_alloc_xrcd_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-struct mlx5_alloc_xrcd_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	__be32			xrcdn;
-	u8			rsvd[4];
-};
-
-struct mlx5_dealloc_xrcd_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			xrcdn;
-	u8			rsvd[4];
-};
-
-struct mlx5_dealloc_xrcd_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
 static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn)
 {
 	return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
@@ -628,28 +531,17 @@
 	return radix_tree_lookup(&dev->priv.mkey_table.tree, key);
 }
 
-struct mlx5_page_fault_resume_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	__be32			flags_qpn;
-	u8			reserved[4];
-};
-
-struct mlx5_page_fault_resume_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
-			struct mlx5_create_qp_mbox_in *in,
+			u32 *in,
 			int inlen);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation,
-			struct mlx5_modify_qp_mbox_in *in, int sqd_event,
+int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
+			u32 opt_param_mask, void *qpc,
 			struct mlx5_core_qp *qp);
 int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 			 struct mlx5_core_qp *qp);
 int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
-		       struct mlx5_query_qp_mbox_out *out, int outlen);
+		       u32 *out, int outlen);
 
 int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn);
 int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index e087b7d..451b0bd 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -45,6 +45,8 @@
 				     u16 vport, u8 *addr);
 void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 				     u8 *min_inline);
+int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				     u16 vport, u8 min_inline);
 int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev,
 				      u16 vport, u8 *addr);
 int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 08ed53e..5f14534 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2014,10 +2014,13 @@
 
 extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
 extern struct file *get_mm_exe_file(struct mm_struct *mm);
+extern struct file *get_task_exe_file(struct task_struct *task);
 
 extern bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long npages);
 extern void vm_stat_account(struct mm_struct *, vm_flags_t, long npages);
 
+extern bool vma_is_special_mapping(const struct vm_area_struct *vma,
+				   const struct vm_special_mapping *sm);
 extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
 				   unsigned long flags,
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 0d126ae..d43ef96 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -32,6 +32,7 @@
 #define SDIO_DEVICE_ID_BROADCOM_43340		0xa94c
 #define SDIO_DEVICE_ID_BROADCOM_43341		0xa94d
 #define SDIO_DEVICE_ID_BROADCOM_4335_4339	0x4335
+#define SDIO_DEVICE_ID_BROADCOM_4339		0x4339
 #define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
 #define SDIO_DEVICE_ID_BROADCOM_43430		0xa9a6
 #define SDIO_DEVICE_ID_BROADCOM_4345		0x4345
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d572b78..7f2ae99 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -828,9 +828,21 @@
  */
 #define zone_idx(zone)		((zone) - (zone)->zone_pgdat->node_zones)
 
-static inline int populated_zone(struct zone *zone)
+/*
+ * Returns true if a zone has pages managed by the buddy allocator.
+ * All the reclaim decisions have to use this function rather than
+ * populated_zone(). If the whole zone is reserved then we can easily
+ * end up with populated_zone() && !managed_zone().
+ */
+static inline bool managed_zone(struct zone *zone)
 {
-	return (!!zone->present_pages);
+	return zone->managed_pages;
+}
+
+/* Returns true if a zone has memory */
+static inline bool populated_zone(struct zone *zone)
+{
+	return zone->present_pages;
 }
 
 extern int movable_zone;
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index d351fd3..e5fb813 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -120,5 +120,5 @@
 struct rtmsg;
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
-		   struct rtmsg *rtm, int nowait);
+		   struct rtmsg *rtm, int nowait, u32 portid);
 #endif
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 3987b64..19a1c0c 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -116,7 +116,7 @@
 
 struct rtmsg;
 extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
-			   struct rtmsg *rtm, int nowait);
+			   struct rtmsg *rtm, int nowait, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
 extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 4f0bfe5..0db320b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -68,7 +68,7 @@
 	unsigned int			nvec_used;
 	struct device			*dev;
 	struct msi_msg			msg;
-	const struct cpumask		*affinity;
+	struct cpumask			*affinity;
 
 	union {
 		/* PCI MSI/X specific data */
@@ -123,7 +123,8 @@
 }
 #endif /* CONFIG_PCI_MSI */
 
-struct msi_desc *alloc_msi_entry(struct device *dev);
+struct msi_desc *alloc_msi_entry(struct device *dev, int nvec,
+				 const struct cpumask *affinity);
 void free_msi_entry(struct msi_desc *entry);
 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
@@ -270,6 +271,8 @@
 	MSI_FLAG_MULTI_PCI_MSI		= (1 << 2),
 	/* Support PCI MSIX interrupts */
 	MSI_FLAG_PCI_MSIX		= (1 << 3),
+	/* Needs early activate, required for PCI */
+	MSI_FLAG_ACTIVATE_EARLY		= (1 << 4),
 };
 
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
diff --git a/include/linux/net.h b/include/linux/net.h
index b9f0ff4..cd0c8bd 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -25,6 +25,7 @@
 #include <linux/kmemcheck.h>
 #include <linux/rcupdate.h>
 #include <linux/once.h>
+#include <linux/fs.h>
 
 #include <uapi/linux/net.h>
 
@@ -128,6 +129,9 @@
 struct sockaddr;
 struct msghdr;
 struct module;
+struct sk_buff;
+typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
+			       unsigned int, size_t);
 
 struct proto_ops {
 	int		family;
@@ -186,6 +190,8 @@
 				       struct pipe_inode_info *pipe, size_t len, unsigned int flags);
 	int		(*set_peek_off)(struct sock *sk, int val);
 	int		(*peek_len)(struct socket *sock);
+	int		(*read_sock)(struct sock *sk, read_descriptor_t *desc,
+				     sk_read_actor_t recv_actor);
 };
 
 #define DECLARE_SOCKADDR(type, dst, src)	\
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 076df536..136ae6bb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@
 #include <uapi/linux/netdevice.h>
 #include <uapi/linux/if_bonding.h>
 #include <uapi/linux/pkt_cls.h>
+#include <linux/hashtable.h>
 
 struct netpoll_info;
 struct device;
@@ -788,6 +789,7 @@
 	TC_SETUP_CLSU32,
 	TC_SETUP_CLSFLOWER,
 	TC_SETUP_MATCHALL,
+	TC_SETUP_CLSBPF,
 };
 
 struct tc_cls_u32_offload;
@@ -799,6 +801,7 @@
 		struct tc_cls_u32_offload *cls_u32;
 		struct tc_cls_flower_offload *cls_flower;
 		struct tc_cls_matchall_offload *cls_mall;
+		struct tc_cls_bpf_offload *cls_bpf;
 	};
 };
 
@@ -923,6 +926,14 @@
  *	3. Update dev->stats asynchronously and atomically, and define
  *	   neither operation.
  *
+ * bool (*ndo_has_offload_stats)(int attr_id)
+ *	Return true if this device supports offload stats of this attr_id.
+ *
+ * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev,
+ *	void *attr_data)
+ *	Get statistics for offload operations by attr_id. Write it into the
+ *	attr_data pointer.
+ *
  * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid);
  *	If device supports VLAN filtering this function is called when a
  *	VLAN id is registered.
@@ -935,7 +946,8 @@
  *
  *	SR-IOV management functions.
  * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
- * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
+ * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan,
+ *			  u8 qos, __be16 proto);
  * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
  *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
@@ -1030,7 +1042,7 @@
  *	Deletes the FDB entry from dev coresponding to addr.
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
  *		       struct net_device *dev, struct net_device *filter_dev,
- *		       int idx)
+ *		       int *idx)
  *	Used to add FDB entries to dump requests. Implementers should add
  *	entries to skb and update idx with the number of entries.
  *
@@ -1154,6 +1166,10 @@
 
 	struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev,
 						     struct rtnl_link_stats64 *storage);
+	bool			(*ndo_has_offload_stats)(int attr_id);
+	int			(*ndo_get_offload_stats)(int attr_id,
+							 const struct net_device *dev,
+							 void *attr_data);
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 
 	int			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
@@ -1172,7 +1188,8 @@
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
 	int			(*ndo_set_vf_vlan)(struct net_device *dev,
-						   int queue, u16 vlan, u8 qos);
+						   int queue, u16 vlan,
+						   u8 qos, __be16 proto);
 	int			(*ndo_set_vf_rate)(struct net_device *dev,
 						   int vf, int min_tx_rate,
 						   int max_tx_rate);
@@ -1262,7 +1279,7 @@
 						struct netlink_callback *cb,
 						struct net_device *dev,
 						struct net_device *filter_dev,
-						int idx);
+						int *idx);
 
 	int			(*ndo_bridge_setlink)(struct net_device *dev,
 						      struct nlmsghdr *nlh,
@@ -1561,8 +1578,6 @@
  *
  *	@xps_maps:	XXX: need comments on this one
  *
- *	@offload_fwd_mark:	Offload device fwding mark
- *
  *	@watchdog_timeo:	Represents the timeout that is used by
  *				the watchdog (see dev_watchdog())
  *	@watchdog_timer:	List of timers
@@ -1784,7 +1799,7 @@
 #endif
 	struct netdev_queue __rcu *ingress_queue;
 #ifdef CONFIG_NETFILTER_INGRESS
-	struct list_head	nf_hooks_ingress;
+	struct nf_hook_entry __rcu *nf_hooks_ingress;
 #endif
 
 	unsigned char		broadcast[MAX_ADDR_LEN];
@@ -1800,6 +1815,9 @@
 	unsigned int		num_tx_queues;
 	unsigned int		real_num_tx_queues;
 	struct Qdisc		*qdisc;
+#ifdef CONFIG_NET_SCHED
+	DECLARE_HASHTABLE	(qdisc_hash, 4);
+#endif
 	unsigned long		tx_queue_len;
 	spinlock_t		tx_global_lock;
 	int			watchdog_timeo;
@@ -1810,9 +1828,6 @@
 #ifdef CONFIG_NET_CLS_ACT
 	struct tcf_proto __rcu  *egress_cl_list;
 #endif
-#ifdef CONFIG_NET_SWITCHDEV
-	u32			offload_fwd_mark;
-#endif
 
 	/* These may be needed for future network-power-down code. */
 	struct timer_list	watchdog_timer;
@@ -3267,6 +3282,7 @@
 	napi->skb = NULL;
 }
 
+bool netdev_is_rx_handler_busy(struct net_device *dev);
 int netdev_rx_handler_register(struct net_device *dev,
 			       rx_handler_func_t *rx_handler,
 			       void *rx_handler_data);
@@ -3891,8 +3907,7 @@
 extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
 void netdev_rss_key_fill(void *buffer, size_t len);
 
-int dev_get_nest_level(struct net_device *dev,
-		       bool (*type_check)(const struct net_device *dev));
+int dev_get_nest_level(struct net_device *dev);
 int skb_checksum_help(struct sk_buff *skb);
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 9230f9a..abc7fdc 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -55,12 +55,34 @@
 	struct net_device *out;
 	struct sock *sk;
 	struct net *net;
-	struct list_head *hook_list;
+	struct nf_hook_entry __rcu *hook_entries;
 	int (*okfn)(struct net *, struct sock *, struct sk_buff *);
 };
 
+typedef unsigned int nf_hookfn(void *priv,
+			       struct sk_buff *skb,
+			       const struct nf_hook_state *state);
+struct nf_hook_ops {
+	struct list_head	list;
+
+	/* User fills in from here down. */
+	nf_hookfn		*hook;
+	struct net_device	*dev;
+	void			*priv;
+	u_int8_t		pf;
+	unsigned int		hooknum;
+	/* Hooks are ordered in ascending priority. */
+	int			priority;
+};
+
+struct nf_hook_entry {
+	struct nf_hook_entry __rcu	*next;
+	struct nf_hook_ops		ops;
+	const struct nf_hook_ops	*orig_ops;
+};
+
 static inline void nf_hook_state_init(struct nf_hook_state *p,
-				      struct list_head *hook_list,
+				      struct nf_hook_entry *hook_entry,
 				      unsigned int hook,
 				      int thresh, u_int8_t pf,
 				      struct net_device *indev,
@@ -76,26 +98,11 @@
 	p->out = outdev;
 	p->sk = sk;
 	p->net = net;
-	p->hook_list = hook_list;
+	RCU_INIT_POINTER(p->hook_entries, hook_entry);
 	p->okfn = okfn;
 }
 
-typedef unsigned int nf_hookfn(void *priv,
-			       struct sk_buff *skb,
-			       const struct nf_hook_state *state);
 
-struct nf_hook_ops {
-	struct list_head 	list;
-
-	/* User fills in from here down. */
-	nf_hookfn		*hook;
-	struct net_device	*dev;
-	void			*priv;
-	u_int8_t		pf;
-	unsigned int		hooknum;
-	/* Hooks are ordered in ascending priority. */
-	int			priority;
-};
 
 struct nf_sockopt_ops {
 	struct list_head list;
@@ -133,6 +140,8 @@
 void nf_unregister_hook(struct nf_hook_ops *reg);
 int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
 void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
+int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
+void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
 
 /* Functions to register get/setsockopt ranges (non-inclusive).  You
    need to check permissions yourself! */
@@ -161,7 +170,8 @@
 				 int (*okfn)(struct net *, struct sock *, struct sk_buff *),
 				 int thresh)
 {
-	struct list_head *hook_list;
+	struct nf_hook_entry *hook_head;
+	int ret = 1;
 
 #ifdef HAVE_JUMP_LABEL
 	if (__builtin_constant_p(pf) &&
@@ -170,16 +180,19 @@
 		return 1;
 #endif
 
-	hook_list = &net->nf.hooks[pf][hook];
-
-	if (!list_empty(hook_list)) {
+	rcu_read_lock();
+	hook_head = rcu_dereference(net->nf.hooks[pf][hook]);
+	if (hook_head) {
 		struct nf_hook_state state;
 
-		nf_hook_state_init(&state, hook_list, hook, thresh,
+		nf_hook_state_init(&state, hook_head, hook, thresh,
 				   pf, indev, outdev, sk, net, okfn);
-		return nf_hook_slow(skb, &state);
+
+		ret = nf_hook_slow(skb, &state);
 	}
-	return 1;
+	rcu_read_unlock();
+
+	return ret;
 }
 
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 2755057..1d1ef4e 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -4,13 +4,9 @@
 #include <uapi/linux/netfilter/nf_conntrack_common.h>
 
 struct ip_conntrack_stat {
-	unsigned int searched;
 	unsigned int found;
-	unsigned int new;
 	unsigned int invalid;
 	unsigned int ignore;
-	unsigned int delete;
-	unsigned int delete_list;
 	unsigned int insert;
 	unsigned int insert_failed;
 	unsigned int drop;
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index df78dc2..dee0acd 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -1,68 +1,8 @@
 #ifndef _CONNTRACK_PROTO_GRE_H
 #define _CONNTRACK_PROTO_GRE_H
 #include <asm/byteorder.h>
-
-/* GRE PROTOCOL HEADER */
-
-/* GRE Version field */
-#define GRE_VERSION_1701	0x0
-#define GRE_VERSION_PPTP	0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP	0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C		0x80
-#define GRE_FLAG_R		0x40
-#define GRE_FLAG_K		0x20
-#define GRE_FLAG_S		0x10
-#define GRE_FLAG_A		0x80
-
-#define GRE_IS_C(f)	((f)&GRE_FLAG_C)
-#define GRE_IS_R(f)	((f)&GRE_FLAG_R)
-#define GRE_IS_K(f)	((f)&GRE_FLAG_K)
-#define GRE_IS_S(f)	((f)&GRE_FLAG_S)
-#define GRE_IS_A(f)	((f)&GRE_FLAG_A)
-
-/* GRE is a mess: Four different standards */
-struct gre_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u16	rec:3,
-		srr:1,
-		seq:1,
-		key:1,
-		routing:1,
-		csum:1,
-		version:3,
-		reserved:4,
-		ack:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-	__u16	csum:1,
-		routing:1,
-		key:1,
-		seq:1,
-		srr:1,
-		rec:3,
-		ack:1,
-		reserved:4,
-		version:3;
-#else
-#error "Adjust your <asm/byteorder.h> defines"
-#endif
-	__be16	protocol;
-};
-
-/* modified GRE header for PPTP */
-struct gre_hdr_pptp {
-	__u8   flags;		/* bitfield */
-	__u8   version;		/* should be GRE_VERSION_PPTP */
-	__be16 protocol;	/* should be GRE_PROTOCOL_PPTP */
-	__be16 payload_len;	/* size of ppp payload, not inc. gre header */
-	__be16 call_id;		/* peer's call_id for this session */
-	__be32 seq;		/* sequence number.  Present if S==1 */
-	__be32 ack;		/* seq number of highest packet received by */
-				/*  sender in this session */
-};
+#include <net/gre.h>
+#include <net/pptp.h>
 
 struct nf_ct_gre {
 	unsigned int stream_timeout;
diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index 80ca889..664da00 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -15,6 +15,6 @@
 struct nf_acct *nfnl_acct_find_get(struct net *net, const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-extern int nfnl_acct_overquota(const struct sk_buff *skb,
-			      struct nf_acct *nfacct);
+int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
+			struct nf_acct *nfacct);
 #endif /* _NFNL_ACCT_H */
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 5fcd375..33e37fb 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -11,22 +11,30 @@
 	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
 		return false;
 #endif
-	return !list_empty(&skb->dev->nf_hooks_ingress);
+	return rcu_access_pointer(skb->dev->nf_hooks_ingress);
 }
 
+/* caller must hold rcu_read_lock */
 static inline int nf_hook_ingress(struct sk_buff *skb)
 {
+	struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
 	struct nf_hook_state state;
 
-	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
-			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
-			   skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
+	/* Must recheck the ingress hook head, in the event it became NULL
+	 * after the check in nf_hook_ingress_active evaluated to true.
+	 */
+	if (unlikely(!e))
+		return 0;
+
+	nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN,
+			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
+			   dev_net(skb->dev), NULL);
 	return nf_hook_slow(skb, &state);
 }
 
 static inline void nf_hook_ingress_init(struct net_device *dev)
 {
-	INIT_LIST_HEAD(&dev->nf_hooks_ingress);
+	RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
 }
 #else /* CONFIG_NETFILTER_INGRESS */
 static inline int nf_hook_ingress_active(struct sk_buff *skb)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index d8b37ba..7676557 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -794,7 +794,7 @@
 };
 
 struct nvmf_connect_data {
-	uuid_le		hostid;
+	uuid_be		hostid;
 	__le16		cntlid;
 	char		resv4[238];
 	char		subsysnqn[NVMF_NQN_FIELD_LEN];
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 26c3302..4341f32 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -14,6 +14,7 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/errno.h>
 
 /* Definitions used by the flattened device tree */
 #define OF_DT_HEADER		0xd00dfeed	/* marker */
@@ -66,6 +67,7 @@
 				     int depth, void *data);
 extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data);
+extern int early_init_dt_scan_chosen_stdout(void);
 extern void early_init_fdt_scan_reserved_mem(void);
 extern void early_init_fdt_reserve_self(void);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
@@ -94,6 +96,7 @@
 extern u64 of_flat_dt_translate_address(unsigned long node);
 extern void of_fdt_limit_memory(int limit);
 #else /* CONFIG_OF_FLATTREE */
+static inline int early_init_dt_scan_chosen_stdout(void) { return -ENODEV; }
 static inline void early_init_fdt_scan_reserved_mem(void) {}
 static inline void early_init_fdt_reserve_self(void) {}
 static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 113ee62..0f9e567 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -151,7 +151,7 @@
  * @flags: padata flags.
  */
 struct padata_instance {
-	struct notifier_block		 cpu_notifier;
+	struct hlist_node		 node;
 	struct workqueue_struct		*wq;
 	struct parallel_data		*pd;
 	struct padata_cpumask		cpumask;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 66a1260..01e8443 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -571,56 +571,57 @@
  */
 static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
 {
-	int ret = 0;
 	char __user *end = uaddr + size - 1;
 
 	if (unlikely(size == 0))
-		return ret;
+		return 0;
 
+	if (unlikely(uaddr > end))
+		return -EFAULT;
 	/*
 	 * Writing zeroes into userspace here is OK, because we know that if
 	 * the zero gets there, we'll be overwriting it.
 	 */
-	while (uaddr <= end) {
-		ret = __put_user(0, uaddr);
-		if (ret != 0)
-			return ret;
+	do {
+		if (unlikely(__put_user(0, uaddr) != 0))
+			return -EFAULT;
 		uaddr += PAGE_SIZE;
-	}
+	} while (uaddr <= end);
 
 	/* Check whether the range spilled into the next page. */
 	if (((unsigned long)uaddr & PAGE_MASK) ==
 			((unsigned long)end & PAGE_MASK))
-		ret = __put_user(0, end);
+		return __put_user(0, end);
 
-	return ret;
+	return 0;
 }
 
 static inline int fault_in_multipages_readable(const char __user *uaddr,
 					       int size)
 {
 	volatile char c;
-	int ret = 0;
 	const char __user *end = uaddr + size - 1;
 
 	if (unlikely(size == 0))
-		return ret;
+		return 0;
 
-	while (uaddr <= end) {
-		ret = __get_user(c, uaddr);
-		if (ret != 0)
-			return ret;
+	if (unlikely(uaddr > end))
+		return -EFAULT;
+
+	do {
+		if (unlikely(__get_user(c, uaddr) != 0))
+			return -EFAULT;
 		uaddr += PAGE_SIZE;
-	}
+	} while (uaddr <= end);
 
 	/* Check whether the range spilled into the next page. */
 	if (((unsigned long)uaddr & PAGE_MASK) ==
 			((unsigned long)end & PAGE_MASK)) {
-		ret = __get_user(c, end);
-		(void)c;
+		return __get_user(c, end);
 	}
 
-	return ret;
+	(void)c;
+	return 0;
 }
 
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2599a98..7cc0acb 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -683,15 +683,6 @@
 #define	to_pci_driver(drv) container_of(drv, struct pci_driver, driver)
 
 /**
- * DEFINE_PCI_DEVICE_TABLE - macro used to describe a pci device table
- * @_table: device table name
- *
- * This macro is deprecated and should not be used in new code.
- */
-#define DEFINE_PCI_DEVICE_TABLE(_table) \
-	const struct pci_device_id _table[]
-
-/**
  * PCI_DEVICE - macro used to describe a specific pci device
  * @vend: the 16 bit PCI Vendor ID
  * @dev: the 16 bit PCI Device ID
@@ -1135,6 +1126,7 @@
 int pci_enable_resources(struct pci_dev *, int mask);
 void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *),
 		    int (*)(const struct pci_dev *, u8, u8));
+struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res);
 #define HAVE_PCI_REQ_REGIONS	2
 int __must_check pci_request_regions(struct pci_dev *, const char *);
 int __must_check pci_request_regions_exclusive(struct pci_dev *, const char *);
@@ -1251,10 +1243,12 @@
 int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, u32 flags);
 
-#define PCI_IRQ_NOLEGACY	(1 << 0) /* don't use legacy interrupts */
-#define PCI_IRQ_NOMSI		(1 << 1) /* don't use MSI interrupts */
-#define PCI_IRQ_NOMSIX		(1 << 2) /* don't use MSI-X interrupts */
-#define PCI_IRQ_NOAFFINITY	(1 << 3) /* don't auto-assign affinity */
+#define PCI_IRQ_LEGACY		(1 << 0) /* allow legacy interrupts */
+#define PCI_IRQ_MSI		(1 << 1) /* allow MSI interrupts */
+#define PCI_IRQ_MSIX		(1 << 2) /* allow MSI-X interrupts */
+#define PCI_IRQ_AFFINITY	(1 << 3) /* auto-assign affinity */
+#define PCI_IRQ_ALL_TYPES \
+	(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
 
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
@@ -1307,6 +1301,7 @@
 		unsigned int max_vecs, unsigned int flags);
 void pci_free_irq_vectors(struct pci_dev *dev);
 int pci_irq_vector(struct pci_dev *dev, unsigned int nr);
+const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec);
 
 #else
 static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1349,6 +1344,11 @@
 		return -EINVAL;
 	return dev->irq;
 }
+static inline const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev,
+		int vec)
+{
+	return cpu_possible_mask;
+}
 #endif
 
 #ifdef CONFIG_PCIEPORTBUS
@@ -1549,6 +1549,9 @@
 				  int enable)
 { return 0; }
 
+static inline struct resource *pci_find_resource(struct pci_dev *dev,
+						 struct resource *res)
+{ return NULL; }
 static inline int pci_request_regions(struct pci_dev *dev, const char *res_name)
 { return -EIO; }
 static inline void pci_release_regions(struct pci_dev *dev) { }
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index c2fa3ec..5b2e615 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -10,32 +10,122 @@
 
 struct percpu_rw_semaphore {
 	struct rcu_sync		rss;
-	unsigned int __percpu	*fast_read_ctr;
+	unsigned int __percpu	*read_count;
 	struct rw_semaphore	rw_sem;
-	atomic_t		slow_read_ctr;
-	wait_queue_head_t	write_waitq;
+	wait_queue_head_t	writer;
+	int			readers_block;
 };
 
-extern void percpu_down_read(struct percpu_rw_semaphore *);
-extern int  percpu_down_read_trylock(struct percpu_rw_semaphore *);
-extern void percpu_up_read(struct percpu_rw_semaphore *);
+#define DEFINE_STATIC_PERCPU_RWSEM(name)				\
+static DEFINE_PER_CPU(unsigned int, __percpu_rwsem_rc_##name);		\
+static struct percpu_rw_semaphore name = {				\
+	.rss = __RCU_SYNC_INITIALIZER(name.rss, RCU_SCHED_SYNC),	\
+	.read_count = &__percpu_rwsem_rc_##name,			\
+	.rw_sem = __RWSEM_INITIALIZER(name.rw_sem),			\
+	.writer = __WAIT_QUEUE_HEAD_INITIALIZER(name.writer),		\
+}
+
+extern int __percpu_down_read(struct percpu_rw_semaphore *, int);
+extern void __percpu_up_read(struct percpu_rw_semaphore *);
+
+static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore *sem)
+{
+	might_sleep();
+
+	rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 0, _RET_IP_);
+
+	preempt_disable();
+	/*
+	 * We are in an RCU-sched read-side critical section, so the writer
+	 * cannot both change sem->state from readers_fast and start checking
+	 * counters while we are here. So if we see !sem->state, we know that
+	 * the writer won't be checking until we're past the preempt_enable()
+	 * and that one the synchronize_sched() is done, the writer will see
+	 * anything we did within this RCU-sched read-size critical section.
+	 */
+	__this_cpu_inc(*sem->read_count);
+	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+		__percpu_down_read(sem, false); /* Unconditional memory barrier */
+	barrier();
+	/*
+	 * The barrier() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+}
+
+static inline void percpu_down_read(struct percpu_rw_semaphore *sem)
+{
+	percpu_down_read_preempt_disable(sem);
+	preempt_enable();
+}
+
+static inline int percpu_down_read_trylock(struct percpu_rw_semaphore *sem)
+{
+	int ret = 1;
+
+	preempt_disable();
+	/*
+	 * Same as in percpu_down_read().
+	 */
+	__this_cpu_inc(*sem->read_count);
+	if (unlikely(!rcu_sync_is_idle(&sem->rss)))
+		ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */
+	preempt_enable();
+	/*
+	 * The barrier() from preempt_enable() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+
+	if (ret)
+		rwsem_acquire_read(&sem->rw_sem.dep_map, 0, 1, _RET_IP_);
+
+	return ret;
+}
+
+static inline void percpu_up_read_preempt_enable(struct percpu_rw_semaphore *sem)
+{
+	/*
+	 * The barrier() prevents the compiler from
+	 * bleeding the critical section out.
+	 */
+	barrier();
+	/*
+	 * Same as in percpu_down_read().
+	 */
+	if (likely(rcu_sync_is_idle(&sem->rss)))
+		__this_cpu_dec(*sem->read_count);
+	else
+		__percpu_up_read(sem); /* Unconditional memory barrier */
+	preempt_enable();
+
+	rwsem_release(&sem->rw_sem.dep_map, 1, _RET_IP_);
+}
+
+static inline void percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+	preempt_disable();
+	percpu_up_read_preempt_enable(sem);
+}
 
 extern void percpu_down_write(struct percpu_rw_semaphore *);
 extern void percpu_up_write(struct percpu_rw_semaphore *);
 
 extern int __percpu_init_rwsem(struct percpu_rw_semaphore *,
 				const char *, struct lock_class_key *);
+
 extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
 
-#define percpu_init_rwsem(brw)	\
+#define percpu_init_rwsem(sem)					\
 ({								\
 	static struct lock_class_key rwsem_key;			\
-	__percpu_init_rwsem(brw, #brw, &rwsem_key);		\
+	__percpu_init_rwsem(sem, #sem, &rwsem_key);		\
 })
 
-
 #define percpu_rwsem_is_held(sem) lockdep_is_held(&(sem)->rw_sem)
 
+#define percpu_rwsem_assert_held(sem)				\
+	lockdep_assert_held(&(sem)->rw_sem)
+
 static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem,
 					bool read, unsigned long ip)
 {
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index e188438..8462da2 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -14,7 +14,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/perf_event.h>
-
+#include <linux/sysfs.h>
 #include <asm/cputype.h>
 
 /*
@@ -77,6 +77,13 @@
 	struct arm_pmu		*percpu_pmu;
 };
 
+enum armpmu_attr_groups {
+	ARMPMU_ATTR_GROUP_COMMON,
+	ARMPMU_ATTR_GROUP_EVENTS,
+	ARMPMU_ATTR_GROUP_FORMATS,
+	ARMPMU_NR_ATTR_GROUPS
+};
+
 struct arm_pmu {
 	struct pmu	pmu;
 	cpumask_t	active_irqs;
@@ -109,8 +116,10 @@
 	DECLARE_BITMAP(pmceid_bitmap, ARMV8_PMUV3_MAX_COMMON_EVENTS);
 	struct platform_device	*plat_device;
 	struct pmu_hw_events	__percpu *hw_events;
-	struct list_head	entry;
+	struct hlist_node	node;
 	struct notifier_block	cpu_pm_nb;
+	/* the attr_groups array must be NULL-terminated */
+	const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1];
 };
 
 #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
@@ -151,6 +160,8 @@
 			 const struct of_device_id *of_table,
 			 const struct pmu_probe_info *probe_table);
 
+#define ARMV8_PMU_PDEV_NAME "armv8-pmu"
+
 #endif /* CONFIG_ARM_PMU */
 
 #endif /* __ARM_PMU_H__ */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8ed43261..060d0ed 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -510,9 +510,15 @@
 					struct perf_sample_data *,
 					struct pt_regs *regs);
 
-enum perf_group_flag {
-	PERF_GROUP_SOFTWARE		= 0x1,
-};
+/*
+ * Event capabilities. For event_caps and groups caps.
+ *
+ * PERF_EV_CAP_SOFTWARE: Is a software event.
+ * PERF_EV_CAP_READ_ACTIVE_PKG: A CPU event (or cgroup event) that can be read
+ * from any CPU in the package where it is active.
+ */
+#define PERF_EV_CAP_SOFTWARE		BIT(0)
+#define PERF_EV_CAP_READ_ACTIVE_PKG	BIT(1)
 
 #define SWEVENT_HLIST_BITS		8
 #define SWEVENT_HLIST_SIZE		(1 << SWEVENT_HLIST_BITS)
@@ -568,7 +574,12 @@
 	struct hlist_node		hlist_entry;
 	struct list_head		active_entry;
 	int				nr_siblings;
-	int				group_flags;
+
+	/* Not serialized. Only written during event initialization. */
+	int				event_caps;
+	/* The cumulative AND of all event_caps for events in this group. */
+	int				group_caps;
+
 	struct perf_event		*group_leader;
 	struct pmu			*pmu;
 	void				*pmu_private;
@@ -679,6 +690,10 @@
 	u64				(*clock)(void);
 	perf_overflow_handler_t		overflow_handler;
 	void				*overflow_handler_context;
+#ifdef CONFIG_BPF_SYSCALL
+	perf_overflow_handler_t		orig_overflow_handler;
+	struct bpf_prog			*prog;
+#endif
 
 #ifdef CONFIG_EVENT_TRACING
 	struct trace_event_call		*tp_event;
@@ -743,7 +758,9 @@
 	u64				parent_gen;
 	u64				generation;
 	int				pin_count;
+#ifdef CONFIG_CGROUP_PERF
 	int				nr_cgroups;	 /* cgroup evts */
+#endif
 	void				*task_ctx_data; /* pmu specific data */
 	struct rcu_head			rcu_head;
 };
@@ -769,7 +786,12 @@
 	unsigned int			hrtimer_active;
 
 	struct pmu			*unique_pmu;
+#ifdef CONFIG_CGROUP_PERF
 	struct perf_cgroup		*cgrp;
+#endif
+
+	struct list_head		sched_cb_entry;
+	int				sched_cb_usage;
 };
 
 struct perf_output_handle {
@@ -784,6 +806,11 @@
 	int				page;
 };
 
+struct bpf_perf_event_data_kern {
+	struct pt_regs *regs;
+	struct perf_sample_data *data;
+};
+
 #ifdef CONFIG_CGROUP_PERF
 
 /*
@@ -981,7 +1008,7 @@
  */
 static inline int is_software_event(struct perf_event *event)
 {
-	return event->pmu->task_ctx_nr == perf_sw_context;
+	return event->event_caps & PERF_EV_CAP_SOFTWARE;
 }
 
 extern struct static_key perf_swevent_enabled[PERF_COUNT_SW_MAX];
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 2d24b28..e25f183 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -80,6 +80,7 @@
 	PHY_INTERFACE_MODE_XGMII,
 	PHY_INTERFACE_MODE_MOCA,
 	PHY_INTERFACE_MODE_QSGMII,
+	PHY_INTERFACE_MODE_TRGMII,
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
@@ -123,6 +124,8 @@
 		return "moca";
 	case PHY_INTERFACE_MODE_QSGMII:
 		return "qsgmii";
+	case PHY_INTERFACE_MODE_TRGMII:
+		return "trgmii";
 	default:
 		return "unknown";
 	}
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index f08b672..ee1bed7 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -36,6 +36,7 @@
  * @power_on: powering on the phy
  * @power_off: powering off the phy
  * @set_mode: set the mode of the phy
+ * @reset: resetting the phy
  * @owner: the module owner containing the ops
  */
 struct phy_ops {
@@ -44,6 +45,7 @@
 	int	(*power_on)(struct phy *phy);
 	int	(*power_off)(struct phy *phy);
 	int	(*set_mode)(struct phy *phy, enum phy_mode mode);
+	int	(*reset)(struct phy *phy);
 	struct module *owner;
 };
 
@@ -136,6 +138,7 @@
 int phy_power_on(struct phy *phy);
 int phy_power_off(struct phy *phy);
 int phy_set_mode(struct phy *phy, enum phy_mode mode);
+int phy_reset(struct phy *phy);
 static inline int phy_get_bus_width(struct phy *phy)
 {
 	return phy->attrs.bus_width;
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index d15d8ba..5f0e11e 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -23,6 +23,7 @@
  * @dst_id:	dst request line
  * @m_master:	memory master for transfers on allocated channel
  * @p_master:	peripheral master for transfers on allocated channel
+ * @hs_polarity:set active low polarity of handshake interface
  */
 struct dw_dma_slave {
 	struct device		*dma_dev;
@@ -30,6 +31,7 @@
 	u8			dst_id;
 	u8			m_master;
 	u8			p_master;
+	bool			hs_polarity;
 };
 
 /**
@@ -38,6 +40,7 @@
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
  * @is_memcpy: The device channels do support memory-to-memory transfers.
+ * @is_nollp: The device channels does not support multi block transfers.
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
@@ -49,6 +52,7 @@
 	unsigned int	nr_channels;
 	bool		is_private;
 	bool		is_memcpy;
+	bool		is_nollp;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 31fec85..a09fe5c 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -51,6 +51,8 @@
 	struct mutex lock;
 	struct dev_power_governor *gov;
 	struct work_struct power_off_work;
+	struct fwnode_handle *provider;	/* Identity of the domain provider */
+	bool has_provider;
 	const char *name;
 	atomic_t sd_count;	/* Number of subdomains with power "on" */
 	enum gpd_status status;	/* Current state of the domain */
@@ -116,7 +118,6 @@
 	return to_gpd_data(dev->power.subsys_data->domain_data);
 }
 
-extern struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev);
 extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 				 struct device *dev,
 				 struct gpd_timing_data *td);
@@ -129,6 +130,7 @@
 				     struct generic_pm_domain *target);
 extern int pm_genpd_init(struct generic_pm_domain *genpd,
 			 struct dev_power_governor *gov, bool is_off);
+extern int pm_genpd_remove(struct generic_pm_domain *genpd);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
@@ -138,10 +140,6 @@
 {
 	return ERR_PTR(-ENOSYS);
 }
-static inline struct generic_pm_domain *pm_genpd_lookup_dev(struct device *dev)
-{
-	return NULL;
-}
 static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 					struct device *dev,
 					struct gpd_timing_data *td)
@@ -168,6 +166,10 @@
 {
 	return -ENOSYS;
 }
+static inline int pm_genpd_remove(struct generic_pm_domain *genpd)
+{
+	return -ENOTSUPP;
+}
 #endif
 
 static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
@@ -192,57 +194,57 @@
 	unsigned int num_domains;
 };
 
-typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
-						void *data);
-
 #ifdef CONFIG_PM_GENERIC_DOMAINS_OF
-int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
-			void *data);
+int of_genpd_add_provider_simple(struct device_node *np,
+				 struct generic_pm_domain *genpd);
+int of_genpd_add_provider_onecell(struct device_node *np,
+				  struct genpd_onecell_data *data);
 void of_genpd_del_provider(struct device_node *np);
-struct generic_pm_domain *of_genpd_get_from_provider(
-			struct of_phandle_args *genpdspec);
-
-struct generic_pm_domain *__of_genpd_xlate_simple(
-					struct of_phandle_args *genpdspec,
-					void *data);
-struct generic_pm_domain *__of_genpd_xlate_onecell(
-					struct of_phandle_args *genpdspec,
-					void *data);
+extern int of_genpd_add_device(struct of_phandle_args *args,
+			       struct device *dev);
+extern int of_genpd_add_subdomain(struct of_phandle_args *parent,
+				  struct of_phandle_args *new_subdomain);
+extern struct generic_pm_domain *of_genpd_remove_last(struct device_node *np);
 
 int genpd_dev_pm_attach(struct device *dev);
 #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
-static inline int __of_genpd_add_provider(struct device_node *np,
-					genpd_xlate_t xlate, void *data)
+static inline int of_genpd_add_provider_simple(struct device_node *np,
+					struct generic_pm_domain *genpd)
 {
-	return 0;
+	return -ENOTSUPP;
 }
+
+static inline int of_genpd_add_provider_onecell(struct device_node *np,
+					struct genpd_onecell_data *data)
+{
+	return -ENOTSUPP;
+}
+
 static inline void of_genpd_del_provider(struct device_node *np) {}
 
-static inline struct generic_pm_domain *of_genpd_get_from_provider(
-			struct of_phandle_args *genpdspec)
+static inline int of_genpd_add_device(struct of_phandle_args *args,
+				      struct device *dev)
 {
-	return NULL;
+	return -ENODEV;
 }
 
-#define __of_genpd_xlate_simple		NULL
-#define __of_genpd_xlate_onecell	NULL
+static inline int of_genpd_add_subdomain(struct of_phandle_args *parent,
+					 struct of_phandle_args *new_subdomain)
+{
+	return -ENODEV;
+}
 
 static inline int genpd_dev_pm_attach(struct device *dev)
 {
 	return -ENODEV;
 }
-#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
 
-static inline int of_genpd_add_provider_simple(struct device_node *np,
-					struct generic_pm_domain *genpd)
+static inline
+struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 {
-	return __of_genpd_add_provider(np, __of_genpd_xlate_simple, genpd);
+	return ERR_PTR(-ENOTSUPP);
 }
-static inline int of_genpd_add_provider_onecell(struct device_node *np,
-					struct genpd_onecell_data *data)
-{
-	return __of_genpd_add_provider(np, __of_genpd_xlate_onecell, data);
-}
+#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
 
 #ifdef CONFIG_PM
 extern int dev_pm_domain_attach(struct device *dev, bool power_on);
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8dc155d..696a56b 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -266,39 +266,21 @@
  * and other debug macros are compiled out unless either DEBUG is defined
  * or CONFIG_DYNAMIC_DEBUG is set.
  */
-
-#ifdef CONFIG_PRINTK
-
-asmlinkage __printf(1, 2) __cold void __pr_emerg(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_alert(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_crit(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_err(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_warn(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_notice(const char *fmt, ...);
-asmlinkage __printf(1, 2) __cold void __pr_info(const char *fmt, ...);
-
-#define pr_emerg(fmt, ...)	__pr_emerg(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert(fmt, ...)	__pr_alert(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit(fmt, ...)	__pr_crit(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err(fmt, ...)	__pr_err(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...)	__pr_warn(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_notice(fmt, ...)	__pr_notice(pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...)	__pr_info(pr_fmt(fmt), ##__VA_ARGS__)
-
-#else
-
-#define pr_emerg(fmt, ...)	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_alert(fmt, ...)	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_crit(fmt, ...)	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_err(fmt, ...)	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn(fmt, ...)	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_notice(fmt, ...)	printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_info(fmt, ...)	printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-
-#endif
-
-#define pr_warning pr_warn
-
+#define pr_emerg(fmt, ...) \
+	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_alert(fmt, ...) \
+	printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_crit(fmt, ...) \
+	printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_err(fmt, ...) \
+	printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+	printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warn pr_warning
+#define pr_notice(fmt, ...) \
+	printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+	printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 /*
  * Like KERN_CONT, pr_cont() should only be used when continuing
  * a line with no newline ('\n') enclosed. Otherwise it defaults
diff --git a/include/linux/property.h b/include/linux/property.h
index 3a2f9ae..856e50b 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -190,7 +190,7 @@
 	.length = ARRAY_SIZE(_val_) * sizeof(_type_),		\
 	.is_array = true,					\
 	.is_string = false,					\
-	{ .pointer = { _type_##_data = _val_ } },		\
+	{ .pointer = { ._type_##_data = _val_ } },		\
 }
 
 #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_)			\
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 6b15e16..5ad54fc 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -127,6 +127,11 @@
  *
  * @info:   Structure describing the new clock.
  * @parent: Pointer to the parent device of the new clock.
+ *
+ * Returns a valid pointer on success or PTR_ERR on failure.  If PHC
+ * support is missing at the configuration level, this function
+ * returns NULL, and drivers are expected to gracefully handle that
+ * case separately.
  */
 
 extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 2a097d1..2d6f0c39 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -83,7 +83,6 @@
 #define SSSR_RFS	(1 << 6)	/* Receive FIFO Service Request */
 #define SSSR_ROR	(1 << 7)	/* Receive FIFO Overrun */
 
-#ifdef CONFIG_ARCH_PXA
 #define RX_THRESH_DFLT	8
 #define TX_THRESH_DFLT	8
 
@@ -95,19 +94,16 @@
 #define SSCR1_RFT	(0x00003c00)	/* Receive FIFO Threshold (mask) */
 #define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..16] */
 
-#else
+#define RX_THRESH_CE4100_DFLT	2
+#define TX_THRESH_CE4100_DFLT	2
 
-#define RX_THRESH_DFLT	2
-#define TX_THRESH_DFLT	2
+#define CE4100_SSSR_TFL_MASK	(0x3 << 8)	/* Transmit FIFO Level mask */
+#define CE4100_SSSR_RFL_MASK	(0x3 << 12)	/* Receive FIFO Level mask */
 
-#define SSSR_TFL_MASK	(0x3 << 8)	/* Transmit FIFO Level mask */
-#define SSSR_RFL_MASK	(0x3 << 12)	/* Receive FIFO Level mask */
-
-#define SSCR1_TFT	(0x000000c0)	/* Transmit FIFO Threshold (mask) */
-#define SSCR1_TxTresh(x) (((x) - 1) << 6) /* level [1..4] */
-#define SSCR1_RFT	(0x00000c00)	/* Receive FIFO Threshold (mask) */
-#define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..4] */
-#endif
+#define CE4100_SSCR1_TFT	(0x000000c0)	/* Transmit FIFO Threshold (mask) */
+#define CE4100_SSCR1_TxTresh(x) (((x) - 1) << 6)	/* level [1..4] */
+#define CE4100_SSCR1_RFT	(0x00000c00)	/* Receive FIFO Threshold (mask) */
+#define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10)	/* level [1..4] */
 
 /* QUARK_X1000 SSCR0 bit definition */
 #define QUARK_X1000_SSCR0_DSS	(0x1F)		/* Data Size Select (mask) */
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 40c0ada..734deb0 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -5,28 +5,77 @@
  * (GPL) Version 2, available from the file COPYING in the main directory of
  * this source tree.
  */
+#ifndef _COMMON_HSI_H
+#define _COMMON_HSI_H
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+
+/* dma_addr_t manip */
+#define DMA_LO_LE(x)		cpu_to_le32(lower_32_bits(x))
+#define DMA_HI_LE(x)		cpu_to_le32(upper_32_bits(x))
+#define DMA_REGPAIR_LE(x, val)	do { \
+					(x).hi = DMA_HI_LE((val)); \
+					(x).lo = DMA_LO_LE((val)); \
+				} while (0)
+
+#define HILO_GEN(hi, lo, type)  ((((type)(hi)) << 32) + (lo))
+#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64)
+#define HILO_64_REGPAIR(regpair)        (HILO_64(regpair.hi, regpair.lo))
+#define HILO_DMA_REGPAIR(regpair)	((dma_addr_t)HILO_64_REGPAIR(regpair))
 
 #ifndef __COMMON_HSI__
 #define __COMMON_HSI__
 
-#define CORE_SPQE_PAGE_SIZE_BYTES                       4096
 
 #define X_FINAL_CLEANUP_AGG_INT 1
+
+#define EVENT_RING_PAGE_SIZE_BYTES          4096
+
 #define NUM_OF_GLOBAL_QUEUES                            128
+#define COMMON_QUEUE_ENTRY_MAX_BYTE_SIZE        64
+
+#define ISCSI_CDU_TASK_SEG_TYPE       0
+#define RDMA_CDU_TASK_SEG_TYPE        1
+
+#define FW_ASSERT_GENERAL_ATTN_IDX    32
+
+#define MAX_PINNED_CCFC                 32
 
 /* Queue Zone sizes in bytes */
 #define TSTORM_QZONE_SIZE 8
-#define MSTORM_QZONE_SIZE 0
+#define MSTORM_QZONE_SIZE 16
 #define USTORM_QZONE_SIZE 8
 #define XSTORM_QZONE_SIZE 8
 #define YSTORM_QZONE_SIZE 0
 #define PSTORM_QZONE_SIZE 0
 
-#define ETH_MAX_NUM_RX_QUEUES_PER_VF 16
+#define MSTORM_VF_ZONE_DEFAULT_SIZE_LOG	7
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DEFAULT	16
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DOUBLE	48
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD	112
+
+/********************************/
+/* CORE (LIGHT L2) FW CONSTANTS */
+/********************************/
+
+#define CORE_LL2_MAX_RAMROD_PER_CON	8
+#define CORE_LL2_TX_BD_PAGE_SIZE_BYTES	4096
+#define CORE_LL2_RX_BD_PAGE_SIZE_BYTES	4096
+#define CORE_LL2_RX_CQE_PAGE_SIZE_BYTES	4096
+#define CORE_LL2_RX_NUM_NEXT_PAGE_BDS	1
+
+#define CORE_LL2_TX_MAX_BDS_PER_PACKET	12
+
+#define CORE_SPQE_PAGE_SIZE_BYTES	4096
+
+#define MAX_NUM_LL2_RX_QUEUES		32
+#define MAX_NUM_LL2_TX_STATS_COUNTERS	32
 
 #define FW_MAJOR_VERSION	8
 #define FW_MINOR_VERSION	10
-#define FW_REVISION_VERSION	5
+#define FW_REVISION_VERSION	10
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -83,6 +132,20 @@
 #define NUM_OF_LCIDS		(320)
 #define NUM_OF_LTIDS		(320)
 
+/* Clock values */
+#define MASTER_CLK_FREQ_E4	(375e6)
+#define STORM_CLK_FREQ_E4	(1000e6)
+#define CLK25M_CLK_FREQ_E4	(25e6)
+
+/* Global PXP windows (GTT) */
+#define NUM_OF_GTT		19
+#define GTT_DWORD_SIZE_BITS	10
+#define GTT_BYTE_SIZE_BITS	(GTT_DWORD_SIZE_BITS + 2)
+#define GTT_DWORD_SIZE		BIT(GTT_DWORD_SIZE_BITS)
+
+/* Tools Version */
+#define TOOLS_VERSION 10
+
 /*****************/
 /* CDU CONSTANTS */
 /*****************/
@@ -90,6 +153,8 @@
 #define CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT              (17)
 #define CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK             (0x1ffff)
 
+#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_TYPE_SHIFT	(12)
+#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_OFFSET_MASK	(0xfff)
 /*****************/
 /* DQ CONSTANTS  */
 /*****************/
@@ -115,6 +180,11 @@
 #define	DQ_XCM_ETH_TX_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD3
 #define	DQ_XCM_ETH_TX_BD_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
 #define	DQ_XCM_ETH_GO_TO_BD_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD5
+#define DQ_XCM_ISCSI_SQ_CONS_CMD	DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_ISCSI_SQ_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3
+#define DQ_XCM_ISCSI_EXP_STAT_SN_CMD	DQ_XCM_AGG_VAL_SEL_REG6
+#define DQ_XCM_ROCE_SQ_PROD_CMD	DQ_XCM_AGG_VAL_SEL_WORD4
 
 /* UCM agg val selection (HW) */
 #define	DQ_UCM_AGG_VAL_SEL_WORD0	0
@@ -159,13 +229,16 @@
 #define	DQ_XCM_AGG_FLG_SHIFT_CF23	7
 
 /* XCM agg counter flag selection */
-#define DQ_XCM_CORE_DQ_CF_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
-#define DQ_XCM_CORE_TERMINATE_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
-#define DQ_XCM_CORE_SLOW_PATH_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
-#define DQ_XCM_ETH_DQ_CF_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
-#define DQ_XCM_ETH_TERMINATE_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
-#define DQ_XCM_ETH_SLOW_PATH_CMD	(1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
-#define DQ_XCM_ETH_TPH_EN_CMD		(1 << DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_CORE_DQ_CF_CMD		BIT(DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_CORE_TERMINATE_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_CORE_SLOW_PATH_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_DQ_CF_CMD		BIT(DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_ETH_TERMINATE_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ETH_SLOW_PATH_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_TPH_EN_CMD		BIT(DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_ISCSI_DQ_FLUSH_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ISCSI_SLOW_PATH_CMD	BIT(DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23)
 
 /* UCM agg counter flag selection (HW) */
 #define	DQ_UCM_AGG_FLG_SHIFT_CF0	0
@@ -178,9 +251,45 @@
 #define	DQ_UCM_AGG_FLG_SHIFT_RULE1EN	7
 
 /* UCM agg counter flag selection (FW) */
-#define DQ_UCM_ETH_PMD_TX_ARM_CMD	(1 << DQ_UCM_AGG_FLG_SHIFT_CF4)
-#define DQ_UCM_ETH_PMD_RX_ARM_CMD	(1 << DQ_UCM_AGG_FLG_SHIFT_CF5)
+#define DQ_UCM_ETH_PMD_TX_ARM_CMD	BIT(DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_ETH_PMD_RX_ARM_CMD	BIT(DQ_UCM_AGG_FLG_SHIFT_CF5)
+#define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD	BIT(DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_ROCE_CQ_ARM_CF_CMD	BIT(DQ_UCM_AGG_FLG_SHIFT_CF5)
 
+/* TCM agg counter flag selection (HW) */
+#define DQ_TCM_AGG_FLG_SHIFT_CF0	0
+#define DQ_TCM_AGG_FLG_SHIFT_CF1	1
+#define DQ_TCM_AGG_FLG_SHIFT_CF2	2
+#define DQ_TCM_AGG_FLG_SHIFT_CF3	3
+#define DQ_TCM_AGG_FLG_SHIFT_CF4	4
+#define DQ_TCM_AGG_FLG_SHIFT_CF5	5
+#define DQ_TCM_AGG_FLG_SHIFT_CF6	6
+#define DQ_TCM_AGG_FLG_SHIFT_CF7	7
+/* TCM agg counter flag selection (FW) */
+#define DQ_TCM_ISCSI_FLUSH_Q0_CMD	BIT(DQ_TCM_AGG_FLG_SHIFT_CF1)
+#define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD	BIT(DQ_TCM_AGG_FLG_SHIFT_CF3)
+
+/* PWM address mapping */
+#define DQ_PWM_OFFSET_DPM_BASE	0x0
+#define DQ_PWM_OFFSET_DPM_END	0x27
+#define DQ_PWM_OFFSET_XCM16_BASE	0x40
+#define DQ_PWM_OFFSET_XCM32_BASE	0x44
+#define DQ_PWM_OFFSET_UCM16_BASE	0x48
+#define DQ_PWM_OFFSET_UCM32_BASE	0x4C
+#define DQ_PWM_OFFSET_UCM16_4	0x50
+#define DQ_PWM_OFFSET_TCM16_BASE	0x58
+#define DQ_PWM_OFFSET_TCM32_BASE	0x5C
+#define DQ_PWM_OFFSET_XCM_FLAGS	0x68
+#define DQ_PWM_OFFSET_UCM_FLAGS	0x69
+#define DQ_PWM_OFFSET_TCM_FLAGS	0x6B
+
+#define DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD		(DQ_PWM_OFFSET_XCM16_BASE + 2)
+#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT	(DQ_PWM_OFFSET_UCM32_BASE)
+#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_16BIT	(DQ_PWM_OFFSET_UCM16_4)
+#define DQ_PWM_OFFSET_UCM_RDMA_INT_TIMEOUT	(DQ_PWM_OFFSET_UCM16_BASE + 2)
+#define DQ_PWM_OFFSET_UCM_RDMA_ARM_FLAGS	(DQ_PWM_OFFSET_UCM_FLAGS)
+#define DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD		(DQ_PWM_OFFSET_TCM16_BASE + 1)
+#define DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD		(DQ_PWM_OFFSET_TCM16_BASE + 3)
 #define	DQ_REGION_SHIFT	(12)
 
 /* DPM */
@@ -214,15 +323,17 @@
  */
 #define CM_TX_PQ_BASE	0x200
 
+/* number of global Vport/QCN rate limiters */
+#define MAX_QM_GLOBAL_RLS	256
 /* QM registers data */
 #define QM_LINE_CRD_REG_WIDTH		16
-#define QM_LINE_CRD_REG_SIGN_BIT	(1 << (QM_LINE_CRD_REG_WIDTH - 1))
+#define QM_LINE_CRD_REG_SIGN_BIT	BIT((QM_LINE_CRD_REG_WIDTH - 1))
 #define QM_BYTE_CRD_REG_WIDTH		24
-#define QM_BYTE_CRD_REG_SIGN_BIT	(1 << (QM_BYTE_CRD_REG_WIDTH - 1))
+#define QM_BYTE_CRD_REG_SIGN_BIT	BIT((QM_BYTE_CRD_REG_WIDTH - 1))
 #define QM_WFQ_CRD_REG_WIDTH		32
-#define QM_WFQ_CRD_REG_SIGN_BIT		(1 << (QM_WFQ_CRD_REG_WIDTH - 1))
+#define QM_WFQ_CRD_REG_SIGN_BIT		BIT((QM_WFQ_CRD_REG_WIDTH - 1))
 #define QM_RL_CRD_REG_WIDTH		32
-#define QM_RL_CRD_REG_SIGN_BIT		(1 << (QM_RL_CRD_REG_WIDTH - 1))
+#define QM_RL_CRD_REG_SIGN_BIT		BIT((QM_RL_CRD_REG_WIDTH - 1))
 
 /*****************/
 /* CAU CONSTANTS */
@@ -287,6 +398,17 @@
 /* PXP CONSTANTS */
 /*****************/
 
+/* Bars for Blocks */
+#define PXP_BAR_GRC	0
+#define PXP_BAR_TSDM	0
+#define PXP_BAR_USDM	0
+#define PXP_BAR_XSDM	0
+#define PXP_BAR_MSDM	0
+#define PXP_BAR_YSDM	0
+#define PXP_BAR_PSDM	0
+#define PXP_BAR_IGU	0
+#define PXP_BAR_DQ	1
+
 /* PTT and GTT */
 #define PXP_NUM_PF_WINDOWS		12
 #define PXP_PER_PF_ENTRY_SIZE		8
@@ -334,6 +456,52 @@
 	(PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \
 	 PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1)
 
+/* PF BAR */
+#define PXP_BAR0_START_GRC	0x0000
+#define PXP_BAR0_GRC_LENGTH	0x1C00000
+#define PXP_BAR0_END_GRC	(PXP_BAR0_START_GRC + \
+				 PXP_BAR0_GRC_LENGTH - 1)
+
+#define PXP_BAR0_START_IGU	0x1C00000
+#define PXP_BAR0_IGU_LENGTH	0x10000
+#define PXP_BAR0_END_IGU	(PXP_BAR0_START_IGU + \
+				 PXP_BAR0_IGU_LENGTH - 1)
+
+#define PXP_BAR0_START_TSDM	0x1C80000
+#define PXP_BAR0_SDM_LENGTH	0x40000
+#define PXP_BAR0_SDM_RESERVED_LENGTH	0x40000
+#define PXP_BAR0_END_TSDM	(PXP_BAR0_START_TSDM + \
+				 PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_MSDM	0x1D00000
+#define PXP_BAR0_END_MSDM	(PXP_BAR0_START_MSDM + \
+				 PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_USDM	0x1D80000
+#define PXP_BAR0_END_USDM	(PXP_BAR0_START_USDM + \
+				 PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_XSDM	0x1E00000
+#define PXP_BAR0_END_XSDM	(PXP_BAR0_START_XSDM + \
+				 PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_YSDM	0x1E80000
+#define PXP_BAR0_END_YSDM	(PXP_BAR0_START_YSDM + \
+				 PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_PSDM	0x1F00000
+#define PXP_BAR0_END_PSDM	(PXP_BAR0_START_PSDM + \
+				 PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_FIRST_INVALID_ADDRESS	(PXP_BAR0_END_PSDM + 1)
+
+/* VF BAR */
+#define PXP_VF_BAR0	0
+
+#define PXP_VF_BAR0_START_GRC	0x3E00
+#define PXP_VF_BAR0_GRC_LENGTH	0x200
+#define PXP_VF_BAR0_END_GRC	(PXP_VF_BAR0_START_GRC + \
+				 PXP_VF_BAR0_GRC_LENGTH - 1)
 
 #define PXP_VF_BAR0_START_IGU                   0
 #define PXP_VF_BAR0_IGU_LENGTH                  0x3000
@@ -399,6 +567,20 @@
 #define PXP_NUM_ILT_RECORDS_BB 7600
 #define PXP_NUM_ILT_RECORDS_K2 11000
 #define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2)
+#define PXP_QUEUES_ZONE_MAX_NUM 320
+/*****************/
+/* PRM CONSTANTS */
+/*****************/
+#define PRM_DMA_PAD_BYTES_NUM	2
+/******************/
+/* SDMs CONSTANTS */
+/******************/
+#define SDM_OP_GEN_TRIG_NONE	0
+#define SDM_OP_GEN_TRIG_WAKE_THREAD	1
+#define SDM_OP_GEN_TRIG_AGG_INT	2
+#define SDM_OP_GEN_TRIG_LOADER	4
+#define SDM_OP_GEN_TRIG_INDICATE_ERROR	6
+#define SDM_OP_GEN_TRIG_RELEASE_THREAD	7
 
 #define SDM_COMP_TYPE_NONE              0
 #define SDM_COMP_TYPE_WAKE_THREAD       1
@@ -424,6 +606,8 @@
 /* PRS CONSTANTS */
 /*****************/
 
+#define PRS_GFT_CAM_LINES_NO_MATCH	31
+
 /* Async data KCQ CQE */
 struct async_data {
 	__le32	cid;
@@ -440,20 +624,6 @@
 #define	COALESCING_TIMESET_VALID_SHIFT		7
 };
 
-struct common_prs_pf_msg_info {
-	__le32 value;
-#define	COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_MASK	0x1
-#define	COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_SHIFT	0
-#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_MASK		0x1
-#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_SHIFT		1
-#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_MASK		0x1
-#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_SHIFT		2
-#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_MASK		0x1
-#define	COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_SHIFT		3
-#define	COMMON_PRS_PF_MSG_INFO_RESERVED_MASK		0xFFFFFFF
-#define	COMMON_PRS_PF_MSG_INFO_RESERVED_SHIFT		4
-};
-
 struct common_queue_zone {
 	__le16 ring_drv_data_consumer;
 	__le16 reserved;
@@ -473,6 +643,19 @@
 	struct regpair msg_addr;
 };
 
+struct iscsi_eqe_data {
+	__le32 cid;
+	__le16 conn_id;
+	u8 error_code;
+	u8 error_pdu_opcode_reserved;
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_MASK		0x3F
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_SHIFT		0
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_MASK	0x1
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_SHIFT	 6
+#define ISCSI_EQE_DATA_RESERVED0_MASK			0x1
+#define ISCSI_EQE_DATA_RESERVED0_SHIFT			7
+};
+
 struct malicious_vf_eqe_data {
 	u8 vf_id;
 	u8 err_id;
@@ -488,8 +671,10 @@
 union event_ring_data {
 	u8 bytes[8];
 	struct vf_pf_channel_eqe_data vf_pf_channel;
+	struct iscsi_eqe_data iscsi_info;
 	struct malicious_vf_eqe_data malicious_vf;
 	struct initial_cleanup_eqe_data vf_init_cleanup;
+	struct regpair roce_handle;
 };
 
 /* Event Ring Entry */
@@ -616,6 +801,52 @@
 	MAX_DB_DEST
 };
 
+/* Enum of doorbell DPM types */
+enum db_dpm_type {
+	DPM_LEGACY,
+	DPM_ROCE,
+	DPM_L2_INLINE,
+	DPM_L2_BD,
+	MAX_DB_DPM_TYPE
+};
+
+/* Structure for doorbell data, in L2 DPM mode, for 1st db in a DPM burst */
+struct db_l2_dpm_data {
+	__le16 icid;
+	__le16 bd_prod;
+	__le32 params;
+#define DB_L2_DPM_DATA_SIZE_MASK	0x3F
+#define DB_L2_DPM_DATA_SIZE_SHIFT	0
+#define DB_L2_DPM_DATA_DPM_TYPE_MASK	0x3
+#define DB_L2_DPM_DATA_DPM_TYPE_SHIFT	6
+#define DB_L2_DPM_DATA_NUM_BDS_MASK	0xFF
+#define DB_L2_DPM_DATA_NUM_BDS_SHIFT	8
+#define DB_L2_DPM_DATA_PKT_SIZE_MASK	0x7FF
+#define DB_L2_DPM_DATA_PKT_SIZE_SHIFT	16
+#define DB_L2_DPM_DATA_RESERVED0_MASK	0x1
+#define DB_L2_DPM_DATA_RESERVED0_SHIFT 27
+#define DB_L2_DPM_DATA_SGE_NUM_MASK	0x7
+#define DB_L2_DPM_DATA_SGE_NUM_SHIFT	28
+#define DB_L2_DPM_DATA_RESERVED1_MASK	0x1
+#define DB_L2_DPM_DATA_RESERVED1_SHIFT 31
+};
+
+/* Structure for SGE in a DPM doorbell of type DPM_L2_BD */
+struct db_l2_dpm_sge {
+	struct regpair addr;
+	__le16 nbytes;
+	__le16 bitfields;
+#define DB_L2_DPM_SGE_TPH_ST_INDEX_MASK	0x1FF
+#define DB_L2_DPM_SGE_TPH_ST_INDEX_SHIFT 0
+#define DB_L2_DPM_SGE_RESERVED0_MASK	0x3
+#define DB_L2_DPM_SGE_RESERVED0_SHIFT	9
+#define DB_L2_DPM_SGE_ST_VALID_MASK	0x1
+#define DB_L2_DPM_SGE_ST_VALID_SHIFT	11
+#define DB_L2_DPM_SGE_RESERVED1_MASK	0xF
+#define DB_L2_DPM_SGE_RESERVED1_SHIFT	12
+	__le32 reserved2;
+};
+
 /* Structure for doorbell address, in legacy mode */
 struct db_legacy_addr {
 	__le32 addr;
@@ -627,6 +858,49 @@
 #define DB_LEGACY_ADDR_ICID_SHIFT      5
 };
 
+/* Structure for doorbell address, in PWM mode */
+struct db_pwm_addr {
+	__le32 addr;
+#define DB_PWM_ADDR_RESERVED0_MASK	0x7
+#define DB_PWM_ADDR_RESERVED0_SHIFT 0
+#define DB_PWM_ADDR_OFFSET_MASK	0x7F
+#define DB_PWM_ADDR_OFFSET_SHIFT	3
+#define DB_PWM_ADDR_WID_MASK	0x3
+#define DB_PWM_ADDR_WID_SHIFT	10
+#define DB_PWM_ADDR_DPI_MASK	0xFFFF
+#define DB_PWM_ADDR_DPI_SHIFT	12
+#define DB_PWM_ADDR_RESERVED1_MASK	0xF
+#define DB_PWM_ADDR_RESERVED1_SHIFT 28
+};
+
+/* Parameters to RoCE firmware, passed in EDPM doorbell */
+struct db_roce_dpm_params {
+	__le32 params;
+#define DB_ROCE_DPM_PARAMS_SIZE_MASK		0x3F
+#define DB_ROCE_DPM_PARAMS_SIZE_SHIFT		0
+#define DB_ROCE_DPM_PARAMS_DPM_TYPE_MASK	0x3
+#define DB_ROCE_DPM_PARAMS_DPM_TYPE_SHIFT	6
+#define DB_ROCE_DPM_PARAMS_OPCODE_MASK		0xFF
+#define DB_ROCE_DPM_PARAMS_OPCODE_SHIFT		8
+#define DB_ROCE_DPM_PARAMS_WQE_SIZE_MASK	0x7FF
+#define DB_ROCE_DPM_PARAMS_WQE_SIZE_SHIFT	16
+#define DB_ROCE_DPM_PARAMS_RESERVED0_MASK	0x1
+#define DB_ROCE_DPM_PARAMS_RESERVED0_SHIFT	27
+#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_MASK	0x1
+#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_SHIFT 28
+#define DB_ROCE_DPM_PARAMS_S_FLG_MASK		0x1
+#define DB_ROCE_DPM_PARAMS_S_FLG_SHIFT		29
+#define DB_ROCE_DPM_PARAMS_RESERVED1_MASK	0x3
+#define DB_ROCE_DPM_PARAMS_RESERVED1_SHIFT	30
+};
+
+/* Structure for doorbell data, in ROCE DPM mode, for 1st db in a DPM burst */
+struct db_roce_dpm_data {
+	__le16 icid;
+	__le16 prod_val;
+	struct db_roce_dpm_params params;
+};
+
 /* Igu interrupt command */
 enum igu_int_cmd {
 	IGU_INT_ENABLE	= 0,
@@ -764,6 +1038,19 @@
 	struct pxp_pretend_cmd	pretend;
 };
 
+/* VF Zone A Permission Register. */
+struct pxp_vf_zone_a_permission {
+	__le32 control;
+#define PXP_VF_ZONE_A_PERMISSION_VFID_MASK	0xFF
+#define PXP_VF_ZONE_A_PERMISSION_VFID_SHIFT	0
+#define PXP_VF_ZONE_A_PERMISSION_VALID_MASK	0x1
+#define PXP_VF_ZONE_A_PERMISSION_VALID_SHIFT	8
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_MASK	0x7F
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_SHIFT 9
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_MASK	0xFFFF
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_SHIFT 16
+};
+
 /* RSS hash type */
 struct rdif_task_context {
 	__le32 initial_ref_tag;
@@ -831,6 +1118,7 @@
 	__le32 reserved2;
 };
 
+/* RSS hash type */
 enum rss_hash_type {
 	RSS_HASH_TYPE_DEFAULT	= 0,
 	RSS_HASH_TYPE_IPV4	= 1,
@@ -942,7 +1230,7 @@
 };
 
 struct timers_context {
-	__le32 logical_client0;
+	__le32 logical_client_0;
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK     0xFFFFFFF
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT    0
 #define TIMERS_CONTEXT_VALIDLC0_MASK              0x1
@@ -951,7 +1239,7 @@
 #define TIMERS_CONTEXT_ACTIVELC0_SHIFT            29
 #define TIMERS_CONTEXT_RESERVED0_MASK             0x3
 #define TIMERS_CONTEXT_RESERVED0_SHIFT            30
-	__le32 logical_client1;
+	__le32 logical_client_1;
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK     0xFFFFFFF
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT    0
 #define TIMERS_CONTEXT_VALIDLC1_MASK              0x1
@@ -960,7 +1248,7 @@
 #define TIMERS_CONTEXT_ACTIVELC1_SHIFT            29
 #define TIMERS_CONTEXT_RESERVED1_MASK             0x3
 #define TIMERS_CONTEXT_RESERVED1_SHIFT            30
-	__le32 logical_client2;
+	__le32 logical_client_2;
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK     0xFFFFFFF
 #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT    0
 #define TIMERS_CONTEXT_VALIDLC2_MASK              0x1
@@ -978,3 +1266,4 @@
 #define TIMERS_CONTEXT_RESERVED3_SHIFT            29
 };
 #endif /* __COMMON_HSI__ */
+#endif
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
index b5ebc69..1aa0727 100644
--- a/include/linux/qed/eth_common.h
+++ b/include/linux/qed/eth_common.h
@@ -13,9 +13,12 @@
 /* ETH FW CONSTANTS */
 /********************/
 #define ETH_HSI_VER_MAJOR                   3
-#define ETH_HSI_VER_MINOR                   0
-#define ETH_CACHE_LINE_SIZE                 64
+#define ETH_HSI_VER_MINOR	10
 
+#define ETH_HSI_VER_NO_PKT_LEN_TUNN	5
+
+#define ETH_CACHE_LINE_SIZE                 64
+#define ETH_RX_CQE_GAP	32
 #define ETH_MAX_RAMROD_PER_CON                          8
 #define ETH_TX_BD_PAGE_SIZE_BYTES                       4096
 #define ETH_RX_BD_PAGE_SIZE_BYTES                       4096
@@ -24,15 +27,25 @@
 
 #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT                          1
 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET                       18
+#define ETH_TX_MAX_BDS_PER_LSO_PACKET	255
 #define ETH_TX_MAX_LSO_HDR_NBD                                          4
 #define ETH_TX_MIN_BDS_PER_LSO_PKT                                      3
 #define ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT       3
 #define ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT            2
 #define ETH_TX_MIN_BDS_PER_PKT_W_LOOPBACK_MODE          2
-#define ETH_TX_MAX_NON_LSO_PKT_LEN                  (9700 - (4 + 12 + 8))
+#define ETH_TX_MAX_NON_LSO_PKT_LEN	(9700 - (4 + 4 + 12 + 8))
 #define ETH_TX_MAX_LSO_HDR_BYTES                    510
+#define ETH_TX_LSO_WINDOW_BDS_NUM	(18 - 1)
+#define ETH_TX_LSO_WINDOW_MIN_LEN	9700
+#define ETH_TX_MAX_LSO_PAYLOAD_LEN	0xFE000
+#define ETH_TX_NUM_SAME_AS_LAST_ENTRIES	320
+#define ETH_TX_INACTIVE_SAME_AS_LAST	0xFFFF
 
 #define ETH_NUM_STATISTIC_COUNTERS                      MAX_NUM_VPORTS
+#define ETH_NUM_STATISTIC_COUNTERS_DOUBLE_VF_ZONE \
+	(ETH_NUM_STATISTIC_COUNTERS - MAX_NUM_VFS / 2)
+#define ETH_NUM_STATISTIC_COUNTERS_QUAD_VF_ZONE \
+	(ETH_NUM_STATISTIC_COUNTERS - 3 * MAX_NUM_VFS / 4)
 
 /* Maximum number of buffers, used for RX packet placement */
 #define ETH_RX_MAX_BUFF_PER_PKT             5
@@ -59,6 +72,8 @@
 #define ETH_TPA_CQE_CONT_LEN_LIST_SIZE    6
 #define ETH_TPA_CQE_END_LEN_LIST_SIZE     4
 
+/* Control frame check constants */
+#define ETH_CTL_FRAME_ETH_TYPE_NUM	4
 
 struct eth_tx_1st_bd_flags {
 	u8 bitfields;
@@ -82,10 +97,10 @@
 
 /* The parsing information data fo rthe first tx bd of a given packet. */
 struct eth_tx_data_1st_bd {
-	__le16				vlan;
-	u8				nbds;
-	struct eth_tx_1st_bd_flags	bd_flags;
-	__le16				bitfields;
+	__le16 vlan;
+	u8 nbds;
+	struct eth_tx_1st_bd_flags bd_flags;
+	__le16 bitfields;
 #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK  0x1
 #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT 0
 #define ETH_TX_DATA_1ST_BD_RESERVED0_MASK          0x1
@@ -96,7 +111,7 @@
 
 /* The parsing information data for the second tx bd of a given packet. */
 struct eth_tx_data_2nd_bd {
-	__le16	tunn_ip_size;
+	__le16 tunn_ip_size;
 	__le16	bitfields1;
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK  0xF
 #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0
@@ -125,9 +140,14 @@
 #define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT                13
 };
 
+/* Firmware data for L2-EDPM packet. */
+struct eth_edpm_fw_data {
+	struct eth_tx_data_1st_bd data_1st_bd;
+	struct eth_tx_data_2nd_bd data_2nd_bd;
+	__le32 reserved;
+};
+
 struct eth_fast_path_cqe_fw_debug {
-	u8 reserved0;
-	u8 reserved1;
 	__le16 reserved2;
 };
 
@@ -148,6 +168,17 @@
 #define	ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT	7
 };
 
+/* PMD flow control bits */
+struct eth_pmd_flow_flags {
+	u8 flags;
+#define ETH_PMD_FLOW_FLAGS_VALID_MASK	0x1
+#define ETH_PMD_FLOW_FLAGS_VALID_SHIFT	0
+#define ETH_PMD_FLOW_FLAGS_TOGGLE_MASK	0x1
+#define ETH_PMD_FLOW_FLAGS_TOGGLE_SHIFT	1
+#define ETH_PMD_FLOW_FLAGS_RESERVED_MASK 0x3F
+#define ETH_PMD_FLOW_FLAGS_RESERVED_SHIFT 2
+};
+
 /* Regular ETH Rx FP CQE. */
 struct eth_fast_path_rx_reg_cqe {
 	u8 type;
@@ -166,64 +197,63 @@
 	u8 placement_offset;
 	struct eth_tunnel_parsing_flags tunnel_pars_flags;
 	u8 bd_num;
-	u8 reserved[7];
+	u8 reserved[9];
 	struct eth_fast_path_cqe_fw_debug fw_debug;
 	u8 reserved1[3];
-	u8 flags;
-#define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK          0x1
-#define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT         0
-#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK   0x1
-#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_SHIFT  1
-#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_MASK      0x3F
-#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_SHIFT     2
+	struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* TPA-continue ETH Rx FP CQE. */
 struct eth_fast_path_rx_tpa_cont_cqe {
-	u8	type;
-	u8	tpa_agg_index;
-	__le16	len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
-	u8	reserved[5];
-	u8	reserved1;
-	__le16	reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
+	u8 type;
+	u8 tpa_agg_index;
+	__le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
+	u8 reserved;
+	u8 reserved1;
+	__le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE];
+	u8 reserved3[3];
+	struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* TPA-end ETH Rx FP CQE. */
 struct eth_fast_path_rx_tpa_end_cqe {
-	u8	type;
-	u8	tpa_agg_index;
-	__le16	total_packet_len;
-	u8	num_of_bds;
-	u8	end_reason;
-	__le16	num_of_coalesced_segs;
-	__le32	ts_delta;
-	__le16	len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE];
-	u8	reserved1[3];
-	u8	reserved2;
-	__le16	reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE];
+	u8 type;
+	u8 tpa_agg_index;
+	__le16 total_packet_len;
+	u8 num_of_bds;
+	u8 end_reason;
+	__le16 num_of_coalesced_segs;
+	__le32 ts_delta;
+	__le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE];
+	__le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE];
+	__le16 reserved1;
+	u8 reserved2;
+	struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* TPA-start ETH Rx FP CQE. */
 struct eth_fast_path_rx_tpa_start_cqe {
-	u8	type;
-	u8	bitfields;
+	u8 type;
+	u8 bitfields;
 #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_MASK  0x7
 #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_SHIFT 0
 #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_MASK             0xF
 #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_SHIFT            3
 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_MASK      0x1
 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_SHIFT     7
-	__le16	seg_len;
+	__le16 seg_len;
 	struct parsing_and_err_flags pars_flags;
-	__le16	vlan_tag;
-	__le32	rss_hash;
-	__le16	len_on_first_bd;
-	u8	placement_offset;
+	__le16 vlan_tag;
+	__le32 rss_hash;
+	__le16 len_on_first_bd;
+	u8 placement_offset;
 	struct eth_tunnel_parsing_flags tunnel_pars_flags;
-	u8	tpa_agg_index;
-	u8	header_len;
-	__le16	ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE];
+	u8 tpa_agg_index;
+	u8 header_len;
+	__le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE];
 	struct eth_fast_path_cqe_fw_debug fw_debug;
+	u8 reserved;
+	struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* The L4 pseudo checksum mode for Ethernet */
@@ -245,15 +275,7 @@
 	u8	reserved[25];
 	__le16	echo;
 	u8	reserved1;
-	u8	flags;
-/* for PMD mode - valid indication */
-#define ETH_SLOW_PATH_RX_CQE_VALID_MASK         0x1
-#define ETH_SLOW_PATH_RX_CQE_VALID_SHIFT        0
-/* for PMD mode - valid toggle indication */
-#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_MASK  0x1
-#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_SHIFT 1
-#define ETH_SLOW_PATH_RX_CQE_RESERVED2_MASK     0x3F
-#define ETH_SLOW_PATH_RX_CQE_RESERVED2_SHIFT    2
+	struct eth_pmd_flow_flags pmd_flags;
 };
 
 /* union for all ETH Rx CQE types */
@@ -276,6 +298,11 @@
 	MAX_ETH_RX_CQE_TYPE
 };
 
+struct eth_rx_pmd_cqe {
+	union eth_rx_cqe cqe;
+	u8 reserved[ETH_RX_CQE_GAP];
+};
+
 enum eth_rx_tunn_type {
 	ETH_RX_NO_TUNN,
 	ETH_RX_TUNN_GENEVE,
@@ -313,8 +340,8 @@
 
 /* The parsing information data for the third tx bd of a given packet. */
 struct eth_tx_data_3rd_bd {
-	__le16	lso_mss;
-	__le16	bitfields;
+	__le16 lso_mss;
+	__le16 bitfields;
 #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK  0xF
 #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0
 #define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK         0xF
@@ -323,8 +350,8 @@
 #define ETH_TX_DATA_3RD_BD_START_BD_SHIFT       8
 #define ETH_TX_DATA_3RD_BD_RESERVED0_MASK       0x7F
 #define ETH_TX_DATA_3RD_BD_RESERVED0_SHIFT      9
-	u8	tunn_l4_hdr_start_offset_w;
-	u8	tunn_hdr_size_w;
+	u8 tunn_l4_hdr_start_offset_w;
+	u8 tunn_hdr_size_w;
 };
 
 /* The third tx bd of a given packet */
@@ -355,10 +382,10 @@
 };
 
 union eth_tx_bd_types {
-	struct eth_tx_1st_bd	first_bd;
-	struct eth_tx_2nd_bd	second_bd;
-	struct eth_tx_3rd_bd	third_bd;
-	struct eth_tx_bd	reg_bd;
+	struct eth_tx_1st_bd first_bd;
+	struct eth_tx_2nd_bd second_bd;
+	struct eth_tx_3rd_bd third_bd;
+	struct eth_tx_bd reg_bd;
 };
 
 /* Mstorm Queue Zone */
@@ -389,8 +416,8 @@
 #define ETH_DB_DATA_RESERVED_SHIFT    5
 #define ETH_DB_DATA_AGG_VAL_SEL_MASK  0x3
 #define ETH_DB_DATA_AGG_VAL_SEL_SHIFT 6
-	u8	agg_flags;
-	__le16	bd_prod;
+	u8 agg_flags;
+	__le16 bd_prod;
 };
 
 #endif /* __ETH_COMMON__ */
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
index b3c0feb..8f64b122 100644
--- a/include/linux/qed/iscsi_common.h
+++ b/include/linux/qed/iscsi_common.h
@@ -311,7 +311,7 @@
 #define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_SHIFT  0
 #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_MASK  0xFF
 #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_SHIFT 24
-	__le32 isid_TABC;
+	__le32 isid_tabc;
 	__le16 tsih;
 	__le16 isid_d;
 	__le32 itt;
@@ -464,7 +464,7 @@
 #define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_SHIFT  0
 #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_MASK  0xFF
 #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24
-	__le32 isid_TABC;
+	__le32 isid_tabc;
 	__le16 tsih;
 	__le16 isid_d;
 	__le32 itt;
@@ -688,8 +688,7 @@
 enum iscsi_cqes_type {
 	ISCSI_CQE_TYPE_SOLICITED = 1,
 	ISCSI_CQE_TYPE_UNSOLICITED,
-	ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE
-	   ,
+	ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE,
 	ISCSI_CQE_TYPE_TASK_CLEANUP,
 	ISCSI_CQE_TYPE_DUMMY,
 	MAX_ISCSI_CQES_TYPE
@@ -769,9 +768,9 @@
 	ISCSI_EVENT_TYPE_UPDATE_CONN,
 	ISCSI_EVENT_TYPE_CLEAR_SQ,
 	ISCSI_EVENT_TYPE_TERMINATE_CONN,
+	ISCSI_EVENT_TYPE_MAC_UPDATE_CONN,
 	ISCSI_EVENT_TYPE_ASYN_CONNECT_COMPLETE,
 	ISCSI_EVENT_TYPE_ASYN_TERMINATE_DONE,
-	RESERVED8,
 	RESERVED9,
 	ISCSI_EVENT_TYPE_START_OF_ERROR_TYPES = 10,
 	ISCSI_EVENT_TYPE_ASYN_ABORT_RCVD,
@@ -867,6 +866,7 @@
 	ISCSI_RAMROD_CMD_ID_UPDATE_CONN = 4,
 	ISCSI_RAMROD_CMD_ID_TERMINATION_CONN = 5,
 	ISCSI_RAMROD_CMD_ID_CLEAR_SQ = 6,
+	ISCSI_RAMROD_CMD_ID_MAC_UPDATE = 7,
 	MAX_ISCSI_RAMROD_CMD_ID
 };
 
@@ -883,6 +883,16 @@
 	__le16 r2t_sn;
 };
 
+struct iscsi_spe_conn_mac_update {
+	struct iscsi_slow_path_hdr hdr;
+	__le16 conn_id;
+	__le32 fw_cid;
+	__le16 remote_mac_addr_lo;
+	__le16 remote_mac_addr_mid;
+	__le16 remote_mac_addr_hi;
+	u8 reserved0[2];
+};
+
 struct iscsi_spe_conn_offload {
 	struct iscsi_slow_path_hdr hdr;
 	__le16 conn_id;
@@ -1302,14 +1312,6 @@
 	struct regpair iscsi_rx_dropped_pdus_task_not_valid;
 };
 
-struct ooo_opaque {
-	__le32 cid;
-	u8 drop_isle;
-	u8 drop_size;
-	u8 ooo_opcode;
-	u8 ooo_isle;
-};
-
 struct pstorm_iscsi_stats_drv {
 	struct regpair iscsi_tx_bytes_cnt;
 	struct regpair iscsi_tx_packet_cnt;
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index 7e441bd..72d88cf 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -16,19 +16,6 @@
 #include <linux/slab.h>
 #include <linux/qed/common_hsi.h>
 
-/* dma_addr_t manip */
-#define DMA_LO_LE(x)            cpu_to_le32(lower_32_bits(x))
-#define DMA_HI_LE(x)            cpu_to_le32(upper_32_bits(x))
-#define DMA_REGPAIR_LE(x, val)  do { \
-					(x).hi = DMA_HI_LE((val)); \
-					(x).lo = DMA_LO_LE((val)); \
-				} while (0)
-
-#define HILO_GEN(hi, lo, type)  ((((type)(hi)) << 32) + (lo))
-#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64)
-#define HILO_64_REGPAIR(regpair)        (HILO_64(regpair.hi, regpair.lo))
-#define HILO_DMA_REGPAIR(regpair)	((dma_addr_t)HILO_64_REGPAIR(regpair))
-
 enum qed_chain_mode {
 	/* Each Page contains a next pointer at its end */
 	QED_CHAIN_MODE_NEXT_PTR,
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 4475a9d..33c24eb 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -23,6 +23,9 @@
 
 	u8	port_mac[ETH_ALEN];
 	u8	num_vlan_filters;
+
+	/* Legacy VF - this affects the datapath, so qede has to know */
+	bool is_legacy;
 };
 
 struct qed_update_vport_rss_params {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index b1e3c57..f9ae903 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -34,6 +34,8 @@
 	DCBX_MAX_PROTOCOL_TYPE
 };
 
+#define QED_ROCE_PROTOCOL_INDEX (3)
+
 #ifdef CONFIG_DCB
 #define QED_LLDP_CHASSIS_ID_STAT_LEN 4
 #define QED_LLDP_PORT_ID_STAT_LEN 4
@@ -70,8 +72,16 @@
 	u8 max_tc;
 };
 
+enum qed_dcbx_sf_ieee_type {
+	QED_DCBX_SF_IEEE_ETHTYPE,
+	QED_DCBX_SF_IEEE_TCP_PORT,
+	QED_DCBX_SF_IEEE_UDP_PORT,
+	QED_DCBX_SF_IEEE_TCP_UDP_PORT
+};
+
 struct qed_app_entry {
 	bool ethtype;
+	enum qed_dcbx_sf_ieee_type sf_ieee;
 	bool enabled;
 	u8 prio;
 	u16 proto_id;
@@ -252,15 +262,15 @@
 	/* MFW version */
 	u32		mfw_rev;
 
-	bool rdma_supported;
-
 	u32		flash_size;
 	u8		mf_mode;
 	bool		tx_switching;
+	bool		rdma_supported;
 };
 
 enum qed_sb_type {
 	QED_SB_TYPE_L2_QUEUE,
+	QED_SB_TYPE_CNQ,
 };
 
 enum qed_protocol {
@@ -268,6 +278,21 @@
 	QED_PROTOCOL_ISCSI,
 };
 
+enum qed_link_mode_bits {
+	QED_LM_FIBRE_BIT = BIT(0),
+	QED_LM_Autoneg_BIT = BIT(1),
+	QED_LM_Asym_Pause_BIT = BIT(2),
+	QED_LM_Pause_BIT = BIT(3),
+	QED_LM_1000baseT_Half_BIT = BIT(4),
+	QED_LM_1000baseT_Full_BIT = BIT(5),
+	QED_LM_10000baseKR_Full_BIT = BIT(6),
+	QED_LM_25000baseKR_Full_BIT = BIT(7),
+	QED_LM_40000baseLR4_Full_BIT = BIT(8),
+	QED_LM_50000baseKR2_Full_BIT = BIT(9),
+	QED_LM_100000baseKR4_Full_BIT = BIT(10),
+	QED_LM_COUNT = 11
+};
+
 struct qed_link_params {
 	bool	link_up;
 
@@ -295,9 +320,11 @@
 struct qed_link_output {
 	bool	link_up;
 
-	u32	supported_caps;         /* In SUPPORTED defs */
-	u32	advertised_caps;        /* In ADVERTISED defs */
-	u32	lp_caps;                /* In ADVERTISED defs */
+	/* In QED_LM_* defs */
+	u32	supported_caps;
+	u32	advertised_caps;
+	u32	lp_caps;
+
 	u32	speed;                  /* In Mb/s */
 	u8	duplex;                 /* In DUPLEX defs */
 	u8	port;                   /* In PORT defs */
@@ -430,6 +457,10 @@
 	void		(*simd_handler_clean)(struct qed_dev *cdev,
 					      int index);
 
+	int (*dbg_all_data) (struct qed_dev *cdev, void *buffer);
+
+	int (*dbg_all_data_size) (struct qed_dev *cdev);
+
 /**
  * @brief can_link_change - can the instance change the link or not
  *
@@ -598,8 +629,9 @@
 	QED_MSG_SP	= 0x100000,
 	QED_MSG_STORAGE = 0x200000,
 	QED_MSG_CXT	= 0x800000,
+	QED_MSG_LL2	= 0x1000000,
 	QED_MSG_ILT	= 0x2000000,
-	QED_MSG_ROCE	= 0x4000000,
+	QED_MSG_RDMA	= 0x4000000,
 	QED_MSG_DEBUG	= 0x8000000,
 	/* to be added...up to 0x8000000 */
 };
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
new file mode 100644
index 0000000..fd75c26
--- /dev/null
+++ b/include/linux/qed/qed_ll2_if.h
@@ -0,0 +1,139 @@
+/* QLogic qed NIC Driver
+ *
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_LL2_IF_H
+#define _QED_LL2_IF_H
+
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/skbuff.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/qed/qed_if.h>
+
+struct qed_ll2_stats {
+	u64 gsi_invalid_hdr;
+	u64 gsi_invalid_pkt_length;
+	u64 gsi_unsupported_pkt_typ;
+	u64 gsi_crcchksm_error;
+
+	u64 packet_too_big_discard;
+	u64 no_buff_discard;
+
+	u64 rcv_ucast_bytes;
+	u64 rcv_mcast_bytes;
+	u64 rcv_bcast_bytes;
+	u64 rcv_ucast_pkts;
+	u64 rcv_mcast_pkts;
+	u64 rcv_bcast_pkts;
+
+	u64 sent_ucast_bytes;
+	u64 sent_mcast_bytes;
+	u64 sent_bcast_bytes;
+	u64 sent_ucast_pkts;
+	u64 sent_mcast_pkts;
+	u64 sent_bcast_pkts;
+};
+
+#define QED_LL2_UNUSED_HANDLE   (0xff)
+
+struct qed_ll2_cb_ops {
+	int (*rx_cb)(void *, struct sk_buff *, u32, u32);
+	int (*tx_cb)(void *, struct sk_buff *, bool);
+};
+
+struct qed_ll2_params {
+	u16 mtu;
+	bool drop_ttl0_packets;
+	bool rx_vlan_stripping;
+	u8 tx_tc;
+	bool frags_mapped;
+	u8 ll2_mac_address[ETH_ALEN];
+};
+
+struct qed_ll2_ops {
+/**
+ * @brief start - initializes ll2
+ *
+ * @param cdev
+ * @param params - protocol driver configuration for the ll2.
+ *
+ * @return 0 on success, otherwise error value.
+ */
+	int (*start)(struct qed_dev *cdev, struct qed_ll2_params *params);
+
+/**
+ * @brief stop - stops the ll2
+ *
+ * @param cdev
+ *
+ * @return 0 on success, otherwise error value.
+ */
+	int (*stop)(struct qed_dev *cdev);
+
+/**
+ * @brief start_xmit - transmits an skb over the ll2 interface
+ *
+ * @param cdev
+ * @param skb
+ *
+ * @return 0 on success, otherwise error value.
+ */
+	int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb);
+
+/**
+ * @brief register_cb_ops - protocol driver register the callback for Rx/Tx
+ * packets. Should be called before `start'.
+ *
+ * @param cdev
+ * @param cookie - to be passed to the callback functions.
+ * @param ops - the callback functions to register for Rx / Tx.
+ *
+ * @return 0 on success, otherwise error value.
+ */
+	void (*register_cb_ops)(struct qed_dev *cdev,
+				const struct qed_ll2_cb_ops *ops,
+				void *cookie);
+
+/**
+ * @brief get LL2 related statistics
+ *
+ * @param cdev
+ * @param stats - pointer to struct that would be filled with stats
+ *
+ * @return 0 on success, error otherwise.
+ */
+	int (*get_stats)(struct qed_dev *cdev, struct qed_ll2_stats *stats);
+};
+
+#ifdef CONFIG_QED_LL2
+int qed_ll2_alloc_if(struct qed_dev *);
+void qed_ll2_dealloc_if(struct qed_dev *);
+#else
+static const struct qed_ll2_ops qed_ll2_ops_pass = {
+	.start = NULL,
+	.stop = NULL,
+	.start_xmit = NULL,
+	.register_cb_ops = NULL,
+	.get_stats = NULL,
+};
+
+static inline int qed_ll2_alloc_if(struct qed_dev *cdev)
+{
+	return 0;
+}
+
+static inline void qed_ll2_dealloc_if(struct qed_dev *cdev)
+{
+}
+#endif
+#endif
diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h
new file mode 100644
index 0000000..53047d3
--- /dev/null
+++ b/include/linux/qed/qed_roce_if.h
@@ -0,0 +1,604 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _QED_ROCE_IF_H
+#define _QED_ROCE_IF_H
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qed_ll2_if.h>
+#include <linux/qed/rdma_common.h>
+
+enum qed_roce_ll2_tx_dest {
+	/* Light L2 TX Destination to the Network */
+	QED_ROCE_LL2_TX_DEST_NW,
+
+	/* Light L2 TX Destination to the Loopback */
+	QED_ROCE_LL2_TX_DEST_LB,
+	QED_ROCE_LL2_TX_DEST_MAX
+};
+
+#define QED_RDMA_MAX_CNQ_SIZE               (0xFFFF)
+
+/* rdma interface */
+
+enum qed_roce_qp_state {
+	QED_ROCE_QP_STATE_RESET,
+	QED_ROCE_QP_STATE_INIT,
+	QED_ROCE_QP_STATE_RTR,
+	QED_ROCE_QP_STATE_RTS,
+	QED_ROCE_QP_STATE_SQD,
+	QED_ROCE_QP_STATE_ERR,
+	QED_ROCE_QP_STATE_SQE
+};
+
+enum qed_rdma_tid_type {
+	QED_RDMA_TID_REGISTERED_MR,
+	QED_RDMA_TID_FMR,
+	QED_RDMA_TID_MW_TYPE1,
+	QED_RDMA_TID_MW_TYPE2A
+};
+
+struct qed_rdma_events {
+	void *context;
+	void (*affiliated_event)(void *context, u8 fw_event_code,
+				 void *fw_handle);
+	void (*unaffiliated_event)(void *context, u8 event_code);
+};
+
+struct qed_rdma_device {
+	u32 vendor_id;
+	u32 vendor_part_id;
+	u32 hw_ver;
+	u64 fw_ver;
+
+	u64 node_guid;
+	u64 sys_image_guid;
+
+	u8 max_cnq;
+	u8 max_sge;
+	u8 max_srq_sge;
+	u16 max_inline;
+	u32 max_wqe;
+	u32 max_srq_wqe;
+	u8 max_qp_resp_rd_atomic_resc;
+	u8 max_qp_req_rd_atomic_resc;
+	u64 max_dev_resp_rd_atomic_resc;
+	u32 max_cq;
+	u32 max_qp;
+	u32 max_srq;
+	u32 max_mr;
+	u64 max_mr_size;
+	u32 max_cqe;
+	u32 max_mw;
+	u32 max_fmr;
+	u32 max_mr_mw_fmr_pbl;
+	u64 max_mr_mw_fmr_size;
+	u32 max_pd;
+	u32 max_ah;
+	u8 max_pkey;
+	u16 max_srq_wr;
+	u8 max_stats_queues;
+	u32 dev_caps;
+
+	/* Abilty to support RNR-NAK generation */
+
+#define QED_RDMA_DEV_CAP_RNR_NAK_MASK                           0x1
+#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT                  0
+	/* Abilty to support shutdown port */
+#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK                     0x1
+#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT                    1
+	/* Abilty to support port active event */
+#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK         0x1
+#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT                2
+	/* Abilty to support port change event */
+#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK         0x1
+#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT                3
+	/* Abilty to support system image GUID */
+#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK                 0x1
+#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT                        4
+	/* Abilty to support bad P_Key counter support */
+#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK                      0x1
+#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT                     5
+	/* Abilty to support atomic operations */
+#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK                 0x1
+#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT                        6
+#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK                 0x1
+#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT                        7
+	/* Abilty to support modifying the maximum number of
+	 * outstanding work requests per QP
+	 */
+#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK                     0x1
+#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT                    8
+	/* Abilty to support automatic path migration */
+#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK                     0x1
+#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT                    9
+	/* Abilty to support the base memory management extensions */
+#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK                   0x1
+#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT          10
+#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK                    0x1
+#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT                   11
+	/* Abilty to support multipile page sizes per memory region */
+#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK             0x1
+#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT            12
+	/* Abilty to support block list physical buffer list */
+#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK                        0x1
+#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT                       13
+	/* Abilty to support zero based virtual addresses */
+#define QED_RDMA_DEV_CAP_ZBVA_MASK                              0x1
+#define QED_RDMA_DEV_CAP_ZBVA_SHIFT                             14
+	/* Abilty to support local invalidate fencing */
+#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK                   0x1
+#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT          15
+	/* Abilty to support Loopback on QP */
+#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK                      0x1
+#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT                     16
+	u64 page_size_caps;
+	u8 dev_ack_delay;
+	u32 reserved_lkey;
+	u32 bad_pkey_counter;
+	struct qed_rdma_events events;
+};
+
+enum qed_port_state {
+	QED_RDMA_PORT_UP,
+	QED_RDMA_PORT_DOWN,
+};
+
+enum qed_roce_capability {
+	QED_ROCE_V1 = 1 << 0,
+	QED_ROCE_V2 = 1 << 1,
+};
+
+struct qed_rdma_port {
+	enum qed_port_state port_state;
+	int link_speed;
+	u64 max_msg_size;
+	u8 source_gid_table_len;
+	void *source_gid_table_ptr;
+	u8 pkey_table_len;
+	void *pkey_table_ptr;
+	u32 pkey_bad_counter;
+	enum qed_roce_capability capability;
+};
+
+struct qed_rdma_cnq_params {
+	u8 num_pbl_pages;
+	u64 pbl_ptr;
+};
+
+/* The CQ Mode affects the CQ doorbell transaction size.
+ * 64/32 bit machines should configure to 32/16 bits respectively.
+ */
+enum qed_rdma_cq_mode {
+	QED_RDMA_CQ_MODE_16_BITS,
+	QED_RDMA_CQ_MODE_32_BITS,
+};
+
+struct qed_roce_dcqcn_params {
+	u8 notification_point;
+	u8 reaction_point;
+
+	/* fields for notification point */
+	u32 cnp_send_timeout;
+
+	/* fields for reaction point */
+	u32 rl_bc_rate;
+	u16 rl_max_rate;
+	u16 rl_r_ai;
+	u16 rl_r_hai;
+	u16 dcqcn_g;
+	u32 dcqcn_k_us;
+	u32 dcqcn_timeout_us;
+};
+
+struct qed_rdma_start_in_params {
+	struct qed_rdma_events *events;
+	struct qed_rdma_cnq_params cnq_pbl_list[128];
+	u8 desired_cnq;
+	enum qed_rdma_cq_mode cq_mode;
+	struct qed_roce_dcqcn_params dcqcn_params;
+	u16 max_mtu;
+	u8 mac_addr[ETH_ALEN];
+	u8 iwarp_flags;
+};
+
+struct qed_rdma_add_user_out_params {
+	u16 dpi;
+	u64 dpi_addr;
+	u64 dpi_phys_addr;
+	u32 dpi_size;
+};
+
+enum roce_mode {
+	ROCE_V1,
+	ROCE_V2_IPV4,
+	ROCE_V2_IPV6,
+	MAX_ROCE_MODE
+};
+
+union qed_gid {
+	u8 bytes[16];
+	u16 words[8];
+	u32 dwords[4];
+	u64 qwords[2];
+	u32 ipv4_addr;
+};
+
+struct qed_rdma_register_tid_in_params {
+	u32 itid;
+	enum qed_rdma_tid_type tid_type;
+	u8 key;
+	u16 pd;
+	bool local_read;
+	bool local_write;
+	bool remote_read;
+	bool remote_write;
+	bool remote_atomic;
+	bool mw_bind;
+	u64 pbl_ptr;
+	bool pbl_two_level;
+	u8 pbl_page_size_log;
+	u8 page_size_log;
+	u32 fbo;
+	u64 length;
+	u64 vaddr;
+	bool zbva;
+	bool phy_mr;
+	bool dma_mr;
+
+	bool dif_enabled;
+	u64 dif_error_addr;
+	u64 dif_runt_addr;
+};
+
+struct qed_rdma_create_cq_in_params {
+	u32 cq_handle_lo;
+	u32 cq_handle_hi;
+	u32 cq_size;
+	u16 dpi;
+	bool pbl_two_level;
+	u64 pbl_ptr;
+	u16 pbl_num_pages;
+	u8 pbl_page_size_log;
+	u8 cnq_id;
+	u16 int_timeout;
+};
+
+struct qed_rdma_create_srq_in_params {
+	u64 pbl_base_addr;
+	u64 prod_pair_addr;
+	u16 num_pages;
+	u16 pd_id;
+	u16 page_size;
+};
+
+struct qed_rdma_destroy_cq_in_params {
+	u16 icid;
+};
+
+struct qed_rdma_destroy_cq_out_params {
+	u16 num_cq_notif;
+};
+
+struct qed_rdma_create_qp_in_params {
+	u32 qp_handle_lo;
+	u32 qp_handle_hi;
+	u32 qp_handle_async_lo;
+	u32 qp_handle_async_hi;
+	bool use_srq;
+	bool signal_all;
+	bool fmr_and_reserved_lkey;
+	u16 pd;
+	u16 dpi;
+	u16 sq_cq_id;
+	u16 sq_num_pages;
+	u64 sq_pbl_ptr;
+	u8 max_sq_sges;
+	u16 rq_cq_id;
+	u16 rq_num_pages;
+	u64 rq_pbl_ptr;
+	u16 srq_id;
+	u8 stats_queue;
+};
+
+struct qed_rdma_create_qp_out_params {
+	u32 qp_id;
+	u16 icid;
+	void *rq_pbl_virt;
+	dma_addr_t rq_pbl_phys;
+	void *sq_pbl_virt;
+	dma_addr_t sq_pbl_phys;
+};
+
+struct qed_rdma_modify_qp_in_params {
+	u32 modify_flags;
+#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK               0x1
+#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT              0
+#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK                    0x1
+#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT                   1
+#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK             0x1
+#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT            2
+#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK                 0x1
+#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT                3
+#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK          0x1
+#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT         4
+#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK                  0x1
+#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT                 5
+#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK                  0x1
+#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT                 6
+#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK       0x1
+#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT      7
+#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK      0x1
+#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT     8
+#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK             0x1
+#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT            9
+#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK               0x1
+#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT              10
+#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK           0x1
+#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT          11
+#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK       0x1
+#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT      12
+#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK     0x1
+#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT    13
+#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK               0x1
+#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT              14
+
+	enum qed_roce_qp_state new_state;
+	u16 pkey;
+	bool incoming_rdma_read_en;
+	bool incoming_rdma_write_en;
+	bool incoming_atomic_en;
+	bool e2e_flow_control_en;
+	u32 dest_qp;
+	bool lb_indication;
+	u16 mtu;
+	u8 traffic_class_tos;
+	u8 hop_limit_ttl;
+	u32 flow_label;
+	union qed_gid sgid;
+	union qed_gid dgid;
+	u16 udp_src_port;
+
+	u16 vlan_id;
+
+	u32 rq_psn;
+	u32 sq_psn;
+	u8 max_rd_atomic_resp;
+	u8 max_rd_atomic_req;
+	u32 ack_timeout;
+	u8 retry_cnt;
+	u8 rnr_retry_cnt;
+	u8 min_rnr_nak_timer;
+	bool sqd_async;
+	u8 remote_mac_addr[6];
+	u8 local_mac_addr[6];
+	bool use_local_mac;
+	enum roce_mode roce_mode;
+};
+
+struct qed_rdma_query_qp_out_params {
+	enum qed_roce_qp_state state;
+	u32 rq_psn;
+	u32 sq_psn;
+	bool draining;
+	u16 mtu;
+	u32 dest_qp;
+	bool incoming_rdma_read_en;
+	bool incoming_rdma_write_en;
+	bool incoming_atomic_en;
+	bool e2e_flow_control_en;
+	union qed_gid sgid;
+	union qed_gid dgid;
+	u32 flow_label;
+	u8 hop_limit_ttl;
+	u8 traffic_class_tos;
+	u32 timeout;
+	u8 rnr_retry;
+	u8 retry_cnt;
+	u8 min_rnr_nak_timer;
+	u16 pkey_index;
+	u8 max_rd_atomic;
+	u8 max_dest_rd_atomic;
+	bool sqd_async;
+};
+
+struct qed_rdma_create_srq_out_params {
+	u16 srq_id;
+};
+
+struct qed_rdma_destroy_srq_in_params {
+	u16 srq_id;
+};
+
+struct qed_rdma_modify_srq_in_params {
+	u32 wqe_limit;
+	u16 srq_id;
+};
+
+struct qed_rdma_stats_out_params {
+	u64 sent_bytes;
+	u64 sent_pkts;
+	u64 rcv_bytes;
+	u64 rcv_pkts;
+};
+
+struct qed_rdma_counters_out_params {
+	u64 pd_count;
+	u64 max_pd;
+	u64 dpi_count;
+	u64 max_dpi;
+	u64 cq_count;
+	u64 max_cq;
+	u64 qp_count;
+	u64 max_qp;
+	u64 tid_count;
+	u64 max_tid;
+};
+
+#define QED_ROCE_TX_HEAD_FAILURE        (1)
+#define QED_ROCE_TX_FRAG_FAILURE        (2)
+
+struct qed_roce_ll2_header {
+	void *vaddr;
+	dma_addr_t baddr;
+	size_t len;
+};
+
+struct qed_roce_ll2_buffer {
+	dma_addr_t baddr;
+	size_t len;
+};
+
+struct qed_roce_ll2_packet {
+	struct qed_roce_ll2_header header;
+	int n_seg;
+	struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE];
+	int roce_mode;
+	enum qed_roce_ll2_tx_dest tx_dest;
+};
+
+struct qed_roce_ll2_tx_params {
+	int reserved;
+};
+
+struct qed_roce_ll2_rx_params {
+	u16 vlan_id;
+	u8 smac[ETH_ALEN];
+	int rc;
+};
+
+struct qed_roce_ll2_cbs {
+	void (*tx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt);
+
+	void (*rx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt,
+		      struct qed_roce_ll2_rx_params *params);
+};
+
+struct qed_roce_ll2_params {
+	u16 max_rx_buffers;
+	u16 max_tx_buffers;
+	u16 mtu;
+	u8 mac_address[ETH_ALEN];
+	struct qed_roce_ll2_cbs cbs;
+	void *cb_cookie;
+};
+
+struct qed_roce_ll2_info {
+	u8 handle;
+	struct qed_roce_ll2_cbs cbs;
+	u8 mac_address[ETH_ALEN];
+	void *cb_cookie;
+
+	/* Lock to protect ll2 */
+	struct mutex lock;
+};
+
+enum qed_rdma_type {
+	QED_RDMA_TYPE_ROCE,
+};
+
+struct qed_dev_rdma_info {
+	struct qed_dev_info common;
+	enum qed_rdma_type rdma_type;
+};
+
+struct qed_rdma_ops {
+	const struct qed_common_ops *common;
+
+	int (*fill_dev_info)(struct qed_dev *cdev,
+			     struct qed_dev_rdma_info *info);
+	void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev);
+
+	int (*rdma_init)(struct qed_dev *dev,
+			 struct qed_rdma_start_in_params *iparams);
+
+	int (*rdma_add_user)(void *rdma_cxt,
+			     struct qed_rdma_add_user_out_params *oparams);
+
+	void (*rdma_remove_user)(void *rdma_cxt, u16 dpi);
+	int (*rdma_stop)(void *rdma_cxt);
+	struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt);
+	struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt);
+	int (*rdma_get_start_sb)(struct qed_dev *cdev);
+	int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev);
+	void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod);
+	int (*rdma_get_rdma_int)(struct qed_dev *cdev,
+				 struct qed_int_info *info);
+	int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt);
+	int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd);
+	void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd);
+	int (*rdma_create_cq)(void *rdma_cxt,
+			      struct qed_rdma_create_cq_in_params *params,
+			      u16 *icid);
+	int (*rdma_destroy_cq)(void *rdma_cxt,
+			       struct qed_rdma_destroy_cq_in_params *iparams,
+			       struct qed_rdma_destroy_cq_out_params *oparams);
+	struct qed_rdma_qp *
+	(*rdma_create_qp)(void *rdma_cxt,
+			  struct qed_rdma_create_qp_in_params *iparams,
+			  struct qed_rdma_create_qp_out_params *oparams);
+
+	int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp,
+			      struct qed_rdma_modify_qp_in_params *iparams);
+
+	int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp,
+			     struct qed_rdma_query_qp_out_params *oparams);
+	int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp);
+	int
+	(*rdma_register_tid)(void *rdma_cxt,
+			     struct qed_rdma_register_tid_in_params *iparams);
+	int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid);
+	int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid);
+	void (*rdma_free_tid)(void *rdma_cxt, u32 itid);
+	int (*roce_ll2_start)(struct qed_dev *cdev,
+			      struct qed_roce_ll2_params *params);
+	int (*roce_ll2_stop)(struct qed_dev *cdev);
+	int (*roce_ll2_tx)(struct qed_dev *cdev,
+			   struct qed_roce_ll2_packet *packet,
+			   struct qed_roce_ll2_tx_params *params);
+	int (*roce_ll2_post_rx_buffer)(struct qed_dev *cdev,
+				       struct qed_roce_ll2_buffer *buf,
+				       u64 cookie, u8 notify_fw);
+	int (*roce_ll2_set_mac_filter)(struct qed_dev *cdev,
+				       u8 *old_mac_address,
+				       u8 *new_mac_address);
+	int (*roce_ll2_stats)(struct qed_dev *cdev,
+			      struct qed_ll2_stats *stats);
+};
+
+const struct qed_rdma_ops *qed_get_rdma_ops(void);
+
+#endif
diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h
new file mode 100644
index 0000000..99fbe6d
--- /dev/null
+++ b/include/linux/qed/qede_roce.h
@@ -0,0 +1,88 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef QEDE_ROCE_H
+#define QEDE_ROCE_H
+
+struct qedr_dev;
+struct qed_dev;
+struct qede_dev;
+
+enum qede_roce_event {
+	QEDE_UP,
+	QEDE_DOWN,
+	QEDE_CHANGE_ADDR,
+	QEDE_CLOSE
+};
+
+struct qede_roce_event_work {
+	struct list_head list;
+	struct work_struct work;
+	void *ptr;
+	enum qede_roce_event event;
+};
+
+struct qedr_driver {
+	unsigned char name[32];
+
+	struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *,
+				struct net_device *);
+
+	void (*remove)(struct qedr_dev *);
+	void (*notify)(struct qedr_dev *, enum qede_roce_event);
+};
+
+/* APIs for RoCE driver to register callback handlers,
+ * which will be invoked when device is added, removed, ifup, ifdown
+ */
+int qede_roce_register_driver(struct qedr_driver *drv);
+void qede_roce_unregister_driver(struct qedr_driver *drv);
+
+bool qede_roce_supported(struct qede_dev *dev);
+
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+int qede_roce_dev_add(struct qede_dev *dev);
+void qede_roce_dev_event_open(struct qede_dev *dev);
+void qede_roce_dev_event_close(struct qede_dev *dev);
+void qede_roce_dev_remove(struct qede_dev *dev);
+void qede_roce_event_changeaddr(struct qede_dev *qedr);
+#else
+static inline int qede_roce_dev_add(struct qede_dev *dev)
+{
+	return 0;
+}
+
+static inline void qede_roce_dev_event_open(struct qede_dev *dev) {}
+static inline void qede_roce_dev_event_close(struct qede_dev *dev) {}
+static inline void qede_roce_dev_remove(struct qede_dev *dev) {}
+static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {}
+#endif
+#endif
diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h
index 187991c..7663725 100644
--- a/include/linux/qed/rdma_common.h
+++ b/include/linux/qed/rdma_common.h
@@ -28,6 +28,7 @@
 #define RDMA_MAX_PDS                            (64 * 1024)
 
 #define RDMA_NUM_STATISTIC_COUNTERS                     MAX_NUM_VPORTS
+#define RDMA_NUM_STATISTIC_COUNTERS_BB			MAX_NUM_VPORTS_BB
 
 #define RDMA_TASK_TYPE (PROTOCOLID_ROCE)
 
diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h
index accba0e..dc3889d 100644
--- a/include/linux/qed/tcp_common.h
+++ b/include/linux/qed/tcp_common.h
@@ -11,6 +11,14 @@
 
 #define TCP_INVALID_TIMEOUT_VAL -1
 
+struct ooo_opaque {
+	__le32 cid;
+	u8 drop_isle;
+	u8 drop_size;
+	u8 ooo_opcode;
+	u8 ooo_isle;
+};
+
 enum tcp_connect_mode {
 	TCP_CONNECT_ACTIVE,
 	TCP_CONNECT_PASSIVE,
@@ -18,14 +26,10 @@
 };
 
 struct tcp_init_params {
-	__le32 max_cwnd;
-	__le16 dup_ack_threshold;
+	__le32 two_msl_timer;
 	__le16 tx_sws_timer;
-	__le16 min_rto;
-	__le16 min_rto_rt;
-	__le16 max_rto;
 	u8 maxfinrt;
-	u8 reserved[1];
+	u8 reserved[9];
 };
 
 enum tcp_ip_version {
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 4c45105..52b97db 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -280,9 +280,9 @@
 			      struct radix_tree_node *node);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
-struct radix_tree_node *radix_tree_replace_clear_tags(
-				struct radix_tree_root *root,
-				unsigned long index, void *entry);
+void radix_tree_clear_tags(struct radix_tree_root *root,
+			   struct radix_tree_node *node,
+			   void **slot);
 unsigned int radix_tree_gang_lookup(struct radix_tree_root *root,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index a0118d5..395a4c6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -103,6 +103,7 @@
 extern const struct raid6_calls raid6_avx2x2;
 extern const struct raid6_calls raid6_avx2x4;
 extern const struct raid6_calls raid6_tilegx8;
+extern const struct raid6_calls raid6_s390vx8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
@@ -115,6 +116,7 @@
 extern const struct raid6_recov_calls raid6_recov_intx1;
 extern const struct raid6_recov_calls raid6_recov_ssse3;
 extern const struct raid6_recov_calls raid6_recov_avx2;
+extern const struct raid6_recov_calls raid6_recov_s390xc;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index a63a33e..ece7ed9 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -59,6 +59,7 @@
 }
 
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_enter_start(struct rcu_sync *);
 extern void rcu_sync_enter(struct rcu_sync *);
 extern void rcu_sync_exit(struct rcu_sync *);
 extern void rcu_sync_dtor(struct rcu_sync *);
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 1aa62e1..321f9ed 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -334,6 +334,7 @@
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
+void rcu_cpu_starting(unsigned int cpu);
 
 #ifndef CONFIG_TINY_RCU
 void rcu_end_inkernel_boot(void);
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 2c12cc5..9adc7b2 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -241,9 +241,9 @@
  *                register cache support).
  * @num_reg_defaults: Number of elements in reg_defaults.
  *
- * @read_flag_mask: Mask to be set in the top byte of the register when doing
+ * @read_flag_mask: Mask to be set in the top bytes of the register when doing
  *                  a read.
- * @write_flag_mask: Mask to be set in the top byte of the register when doing
+ * @write_flag_mask: Mask to be set in the top bytes of the register when doing
  *                   a write. If both read_flag_mask and write_flag_mask are
  *                   empty the regmap_bus default masks are used.
  * @use_single_rw: If set, converts the bulk read and write operations into
@@ -299,8 +299,8 @@
 	const void *reg_defaults_raw;
 	unsigned int num_reg_defaults_raw;
 
-	u8 read_flag_mask;
-	u8 write_flag_mask;
+	unsigned long read_flag_mask;
+	unsigned long write_flag_mask;
 
 	bool use_single_rw;
 	bool can_multi_write;
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index cae500b..6921082 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -140,8 +140,6 @@
  *
  * @supply:   The name of the supply.  Initialised by the user before
  *            using the bulk regulator APIs.
- * @optional: The supply should be considered optional. Initialised by the user
- *            before using the bulk regulator APIs.
  * @consumer: The regulator consumer for the supply.  This will be managed
  *            by the bulk API.
  *
@@ -151,7 +149,6 @@
  */
 struct regulator_bulk_data {
 	const char *supply;
-	bool optional;
 	struct regulator *consumer;
 
 	/* private: Internal use */
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index fcfa40a..37b5324 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -113,10 +113,14 @@
  *               stabilise after being enabled, in microseconds.
  * @set_ramp_delay: Set the ramp delay for the regulator. The driver should
  *		select ramp delay equal to or less than(closest) ramp_delay.
+ * @set_voltage_time: Time taken for the regulator voltage output voltage
+ *               to stabilise after being set to a new value, in microseconds.
+ *               The function receives the from and to voltage as input, it
+ *               should return the worst case.
  * @set_voltage_time_sel: Time taken for the regulator voltage output voltage
  *               to stabilise after being set to a new value, in microseconds.
- *               The function provides the from and to voltage selector, the
- *               function should return the worst case.
+ *               The function receives the from and to voltage selector as
+ *               input, it should return the worst case.
  * @set_soft_start: Enable soft start for the regulator.
  *
  * @set_suspend_voltage: Set the voltage for the regulator when the system
@@ -168,6 +172,8 @@
 	/* Time taken to enable or set voltage on the regulator */
 	int (*enable_time) (struct regulator_dev *);
 	int (*set_ramp_delay) (struct regulator_dev *, int ramp_delay);
+	int (*set_voltage_time) (struct regulator_dev *, int old_uV,
+				 int new_uV);
 	int (*set_voltage_time_sel) (struct regulator_dev *,
 				     unsigned int old_selector,
 				     unsigned int new_selector);
diff --git a/include/linux/relay.h b/include/linux/relay.h
index d7c8359..ecbb34a 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/kref.h>
+#include <linux/percpu.h>
 
 /*
  * Tracks changes to rchan/rchan_buf structs
@@ -63,7 +64,7 @@
 	struct kref kref;		/* channel refcount */
 	void *private_data;		/* for user-defined data */
 	size_t last_toobig;		/* tried to log event > subbuf size */
-	struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
+	struct rchan_buf ** __percpu buf; /* per-cpu channel buffers */
 	int is_global;			/* One global buffer ? */
 	struct list_head list;		/* for channel list */
 	struct dentry *parent;		/* parent dentry passed to open */
@@ -204,7 +205,7 @@
 	struct rchan_buf *buf;
 
 	local_irq_save(flags);
-	buf = chan->buf[smp_processor_id()];
+	buf = *this_cpu_ptr(chan->buf);
 	if (unlikely(buf->offset + length > chan->subbuf_size))
 		length = relay_switch_subbuf(buf, length);
 	memcpy(buf->data + buf->offset, data, length);
@@ -230,12 +231,12 @@
 {
 	struct rchan_buf *buf;
 
-	buf = chan->buf[get_cpu()];
+	buf = *get_cpu_ptr(chan->buf);
 	if (unlikely(buf->offset + length > buf->chan->subbuf_size))
 		length = relay_switch_subbuf(buf, length);
 	memcpy(buf->data + buf->offset, data, length);
 	buf->offset += length;
-	put_cpu();
+	put_cpu_ptr(chan->buf);
 }
 
 /**
@@ -251,17 +252,19 @@
  */
 static inline void *relay_reserve(struct rchan *chan, size_t length)
 {
-	void *reserved;
-	struct rchan_buf *buf = chan->buf[smp_processor_id()];
+	void *reserved = NULL;
+	struct rchan_buf *buf = *get_cpu_ptr(chan->buf);
 
 	if (unlikely(buf->offset + length > buf->chan->subbuf_size)) {
 		length = relay_switch_subbuf(buf, length);
 		if (!length)
-			return NULL;
+			goto end;
 	}
 	reserved = buf->data + buf->offset;
 	buf->offset += length;
 
+end:
+	put_cpu_ptr(chan->buf);
 	return reserved;
 }
 
@@ -285,5 +288,11 @@
  */
 extern const struct file_operations relay_file_operations;
 
+#ifdef CONFIG_RELAY
+int relay_prepare_cpu(unsigned int cpu);
+#else
+#define relay_prepare_cpu     NULL
+#endif
+
 #endif /* _LINUX_RELAY_H */
 
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 3eef080..5c132d3 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -1,7 +1,7 @@
 /*
  * Resizable, Scalable, Concurrent Hash Table
  *
- * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
+ * Copyright (c) 2015-2016 Herbert Xu <herbert@gondor.apana.org.au>
  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
  *
@@ -53,6 +53,11 @@
 	struct rhash_head __rcu		*next;
 };
 
+struct rhlist_head {
+	struct rhash_head		rhead;
+	struct rhlist_head __rcu	*next;
+};
+
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
@@ -137,6 +142,7 @@
  * @key_len: Key length for hashfn
  * @elasticity: Maximum chain length before rehash
  * @p: Configuration parameters
+ * @rhlist: True if this is an rhltable
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
  * @lock: Spin lock to protect walker list
@@ -147,12 +153,21 @@
 	unsigned int			key_len;
 	unsigned int			elasticity;
 	struct rhashtable_params	p;
+	bool				rhlist;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
 	spinlock_t			lock;
 };
 
 /**
+ * struct rhltable - Hash table with duplicate objects in a list
+ * @ht: Underlying rhtable
+ */
+struct rhltable {
+	struct rhashtable ht;
+};
+
+/**
  * struct rhashtable_walker - Hash table walker
  * @list: List entry on list of walkers
  * @tbl: The table that we were walking over
@@ -163,9 +178,10 @@
 };
 
 /**
- * struct rhashtable_iter - Hash table iterator, fits into netlink cb
+ * struct rhashtable_iter - Hash table iterator
  * @ht: Table to iterate through
  * @p: Current pointer
+ * @list: Current hash list pointer
  * @walker: Associated rhashtable walker
  * @slot: Current slot
  * @skip: Number of entries to skip in slot
@@ -173,7 +189,8 @@
 struct rhashtable_iter {
 	struct rhashtable *ht;
 	struct rhash_head *p;
-	struct rhashtable_walker *walker;
+	struct rhlist_head *list;
+	struct rhashtable_walker walker;
 	unsigned int slot;
 	unsigned int skip;
 };
@@ -339,15 +356,14 @@
 
 int rhashtable_init(struct rhashtable *ht,
 		    const struct rhashtable_params *params);
+int rhltable_init(struct rhltable *hlt,
+		  const struct rhashtable_params *params);
 
-struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
-					    const void *key,
-					    struct rhash_head *obj,
-					    struct bucket_table *old_tbl);
-int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+			     struct rhash_head *obj);
 
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
-			 gfp_t gfp);
+void rhashtable_walk_enter(struct rhashtable *ht,
+			   struct rhashtable_iter *iter);
 void rhashtable_walk_exit(struct rhashtable_iter *iter);
 int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
 void *rhashtable_walk_next(struct rhashtable_iter *iter);
@@ -506,6 +522,31 @@
 	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
 					tbl, hash, member)
 
+/**
+ * rhl_for_each_rcu - iterate over rcu hash table list
+ * @pos:	the &struct rlist_head to use as a loop cursor.
+ * @list:	the head of the list
+ *
+ * This hash chain list-traversal primitive should be used on the
+ * list returned by rhltable_lookup.
+ */
+#define rhl_for_each_rcu(pos, list)					\
+	for (pos = list; pos; pos = rcu_dereference_raw(pos->next))
+
+/**
+ * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rlist_head to use as a loop cursor.
+ * @list:	the head of the list
+ * @member:	name of the &struct rlist_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive should be used on the
+ * list returned by rhltable_lookup.
+ */
+#define rhl_for_each_entry_rcu(tpos, pos, list, member)			\
+	for (pos = list; pos && rht_entry(tpos, pos, member);		\
+	     pos = rcu_dereference_raw(pos->next))
+
 static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
 				     const void *obj)
 {
@@ -515,18 +556,8 @@
 	return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
 }
 
-/**
- * rhashtable_lookup_fast - search hash table, inlined version
- * @ht:		hash table
- * @key:	the pointer to the key
- * @params:	hash table parameters
- *
- * Computes the hash value for the key and traverses the bucket chain looking
- * for a entry with an identical key. The first matching entry is returned.
- *
- * Returns the first entry on which the compare function returned true.
- */
-static inline void *rhashtable_lookup_fast(
+/* Internal function, do not use. */
+static inline struct rhash_head *__rhashtable_lookup(
 	struct rhashtable *ht, const void *key,
 	const struct rhashtable_params params)
 {
@@ -538,8 +569,6 @@
 	struct rhash_head *he;
 	unsigned int hash;
 
-	rcu_read_lock();
-
 	tbl = rht_dereference_rcu(ht->tbl, ht);
 restart:
 	hash = rht_key_hashfn(ht, tbl, key, params);
@@ -548,8 +577,7 @@
 		    params.obj_cmpfn(&arg, rht_obj(ht, he)) :
 		    rhashtable_compare(&arg, rht_obj(ht, he)))
 			continue;
-		rcu_read_unlock();
-		return rht_obj(ht, he);
+		return he;
 	}
 
 	/* Ensure we see any new tables. */
@@ -558,89 +586,165 @@
 	tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 	if (unlikely(tbl))
 		goto restart;
-	rcu_read_unlock();
 
 	return NULL;
 }
 
-/* Internal function, please use rhashtable_insert_fast() instead */
-static inline int __rhashtable_insert_fast(
-	struct rhashtable *ht, const void *key, struct rhash_head *obj,
+/**
+ * rhashtable_lookup - search hash table
+ * @ht:		hash table
+ * @key:	the pointer to the key
+ * @params:	hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * This must only be called under the RCU read lock.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup(
+	struct rhashtable *ht, const void *key,
 	const struct rhashtable_params params)
 {
+	struct rhash_head *he = __rhashtable_lookup(ht, key, params);
+
+	return he ? rht_obj(ht, he) : NULL;
+}
+
+/**
+ * rhashtable_lookup_fast - search hash table, without RCU read lock
+ * @ht:		hash table
+ * @key:	the pointer to the key
+ * @params:	hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key. The first matching entry is returned.
+ *
+ * Only use this function when you have other mechanisms guaranteeing
+ * that the object won't go away after the RCU read lock is released.
+ *
+ * Returns the first entry on which the compare function returned true.
+ */
+static inline void *rhashtable_lookup_fast(
+	struct rhashtable *ht, const void *key,
+	const struct rhashtable_params params)
+{
+	void *obj;
+
+	rcu_read_lock();
+	obj = rhashtable_lookup(ht, key, params);
+	rcu_read_unlock();
+
+	return obj;
+}
+
+/**
+ * rhltable_lookup - search hash list table
+ * @hlt:	hash table
+ * @key:	the pointer to the key
+ * @params:	hash table parameters
+ *
+ * Computes the hash value for the key and traverses the bucket chain looking
+ * for a entry with an identical key.  All matching entries are returned
+ * in a list.
+ *
+ * This must only be called under the RCU read lock.
+ *
+ * Returns the list of entries that match the given key.
+ */
+static inline struct rhlist_head *rhltable_lookup(
+	struct rhltable *hlt, const void *key,
+	const struct rhashtable_params params)
+{
+	struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params);
+
+	return he ? container_of(he, struct rhlist_head, rhead) : NULL;
+}
+
+/* Internal function, please use rhashtable_insert_fast() instead. This
+ * function returns the existing element already in hashes in there is a clash,
+ * otherwise it returns an error via ERR_PTR().
+ */
+static inline void *__rhashtable_insert_fast(
+	struct rhashtable *ht, const void *key, struct rhash_head *obj,
+	const struct rhashtable_params params, bool rhlist)
+{
 	struct rhashtable_compare_arg arg = {
 		.ht = ht,
 		.key = key,
 	};
-	struct bucket_table *tbl, *new_tbl;
+	struct rhash_head __rcu **pprev;
+	struct bucket_table *tbl;
 	struct rhash_head *head;
 	spinlock_t *lock;
-	unsigned int elasticity;
 	unsigned int hash;
-	int err;
+	int elasticity;
+	void *data;
 
-restart:
 	rcu_read_lock();
 
 	tbl = rht_dereference_rcu(ht->tbl, ht);
+	hash = rht_head_hashfn(ht, tbl, obj, params);
+	lock = rht_bucket_lock(tbl, hash);
+	spin_lock_bh(lock);
 
-	/* All insertions must grab the oldest table containing
-	 * the hashed bucket that is yet to be rehashed.
-	 */
-	for (;;) {
-		hash = rht_head_hashfn(ht, tbl, obj, params);
-		lock = rht_bucket_lock(tbl, hash);
-		spin_lock_bh(lock);
-
-		if (tbl->rehash <= hash)
-			break;
-
+	if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) {
+slow_path:
 		spin_unlock_bh(lock);
-		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+		rcu_read_unlock();
+		return rhashtable_insert_slow(ht, key, obj);
 	}
 
-	new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
-	if (unlikely(new_tbl)) {
-		tbl = rhashtable_insert_slow(ht, key, obj, new_tbl);
-		if (!IS_ERR_OR_NULL(tbl))
-			goto slow_path;
+	elasticity = ht->elasticity;
+	pprev = &tbl->buckets[hash];
+	rht_for_each(head, tbl, hash) {
+		struct rhlist_head *plist;
+		struct rhlist_head *list;
 
-		err = PTR_ERR(tbl);
-		goto out;
+		elasticity--;
+		if (!key ||
+		    (params.obj_cmpfn ?
+		     params.obj_cmpfn(&arg, rht_obj(ht, head)) :
+		     rhashtable_compare(&arg, rht_obj(ht, head))))
+			continue;
+
+		data = rht_obj(ht, head);
+
+		if (!rhlist)
+			goto out;
+
+
+		list = container_of(obj, struct rhlist_head, rhead);
+		plist = container_of(head, struct rhlist_head, rhead);
+
+		RCU_INIT_POINTER(list->next, plist);
+		head = rht_dereference_bucket(head->next, tbl, hash);
+		RCU_INIT_POINTER(list->rhead.next, head);
+		rcu_assign_pointer(*pprev, obj);
+
+		goto good;
 	}
 
-	err = -E2BIG;
+	if (elasticity <= 0)
+		goto slow_path;
+
+	data = ERR_PTR(-E2BIG);
 	if (unlikely(rht_grow_above_max(ht, tbl)))
 		goto out;
 
-	if (unlikely(rht_grow_above_100(ht, tbl))) {
-slow_path:
-		spin_unlock_bh(lock);
-		err = rhashtable_insert_rehash(ht, tbl);
-		rcu_read_unlock();
-		if (err)
-			return err;
-
-		goto restart;
-	}
-
-	err = -EEXIST;
-	elasticity = ht->elasticity;
-	rht_for_each(head, tbl, hash) {
-		if (key &&
-		    unlikely(!(params.obj_cmpfn ?
-			       params.obj_cmpfn(&arg, rht_obj(ht, head)) :
-			       rhashtable_compare(&arg, rht_obj(ht, head)))))
-			goto out;
-		if (!--elasticity)
-			goto slow_path;
-	}
-
-	err = 0;
+	if (unlikely(rht_grow_above_100(ht, tbl)))
+		goto slow_path;
 
 	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
+	if (rhlist) {
+		struct rhlist_head *list;
+
+		list = container_of(obj, struct rhlist_head, rhead);
+		RCU_INIT_POINTER(list->next, NULL);
+	}
 
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 
@@ -648,11 +752,14 @@
 	if (rht_grow_above_75(ht, tbl))
 		schedule_work(&ht->run_work);
 
+good:
+	data = NULL;
+
 out:
 	spin_unlock_bh(lock);
 	rcu_read_unlock();
 
-	return err;
+	return data;
 }
 
 /**
@@ -675,7 +782,65 @@
 	struct rhashtable *ht, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
-	return __rhashtable_insert_fast(ht, NULL, obj, params);
+	void *ret;
+
+	ret = __rhashtable_insert_fast(ht, NULL, obj, params, false);
+	if (IS_ERR(ret))
+		return PTR_ERR(ret);
+
+	return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhltable_insert_key - insert object into hash list table
+ * @hlt:	hash list table
+ * @key:	the pointer to the key
+ * @list:	pointer to hash list head inside object
+ * @params:	hash table parameters
+ *
+ * Will take a per bucket spinlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket lock.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
+ */
+static inline int rhltable_insert_key(
+	struct rhltable *hlt, const void *key, struct rhlist_head *list,
+	const struct rhashtable_params params)
+{
+	return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead,
+						params, true));
+}
+
+/**
+ * rhltable_insert - insert object into hash list table
+ * @hlt:	hash list table
+ * @list:	pointer to hash list head inside object
+ * @params:	hash table parameters
+ *
+ * Will take a per bucket spinlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket lock.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
+ */
+static inline int rhltable_insert(
+	struct rhltable *hlt, struct rhlist_head *list,
+	const struct rhashtable_params params)
+{
+	const char *key = rht_obj(&hlt->ht, &list->rhead);
+
+	key += params.key_offset;
+
+	return rhltable_insert_key(hlt, key, list, params);
 }
 
 /**
@@ -704,11 +869,16 @@
 	const struct rhashtable_params params)
 {
 	const char *key = rht_obj(ht, obj);
+	void *ret;
 
 	BUG_ON(ht->p.obj_hashfn);
 
-	return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj,
-					params);
+	ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params,
+				       false);
+	if (IS_ERR(ret))
+		return PTR_ERR(ret);
+
+	return ret == NULL ? 0 : -EEXIST;
 }
 
 /**
@@ -737,15 +907,42 @@
 	struct rhashtable *ht, const void *key, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
+	void *ret;
+
 	BUG_ON(!ht->p.obj_hashfn || !key);
 
-	return __rhashtable_insert_fast(ht, key, obj, params);
+	ret = __rhashtable_insert_fast(ht, key, obj, params, false);
+	if (IS_ERR(ret))
+		return PTR_ERR(ret);
+
+	return ret == NULL ? 0 : -EEXIST;
+}
+
+/**
+ * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @params:	hash table parameters
+ * @data:	pointer to element data already in hashes
+ *
+ * Just like rhashtable_lookup_insert_key(), but this function returns the
+ * object if it exists, NULL if it does not and the insertion was successful,
+ * and an ERR_PTR otherwise.
+ */
+static inline void *rhashtable_lookup_get_insert_key(
+	struct rhashtable *ht, const void *key, struct rhash_head *obj,
+	const struct rhashtable_params params)
+{
+	BUG_ON(!ht->p.obj_hashfn || !key);
+
+	return __rhashtable_insert_fast(ht, key, obj, params, false);
 }
 
 /* Internal function, please use rhashtable_remove_fast() instead */
-static inline int __rhashtable_remove_fast(
+static inline int __rhashtable_remove_fast_one(
 	struct rhashtable *ht, struct bucket_table *tbl,
-	struct rhash_head *obj, const struct rhashtable_params params)
+	struct rhash_head *obj, const struct rhashtable_params params,
+	bool rhlist)
 {
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
@@ -760,18 +957,86 @@
 
 	pprev = &tbl->buckets[hash];
 	rht_for_each(he, tbl, hash) {
+		struct rhlist_head *list;
+
+		list = container_of(he, struct rhlist_head, rhead);
+
 		if (he != obj) {
+			struct rhlist_head __rcu **lpprev;
+
 			pprev = &he->next;
-			continue;
+
+			if (!rhlist)
+				continue;
+
+			do {
+				lpprev = &list->next;
+				list = rht_dereference_bucket(list->next,
+							      tbl, hash);
+			} while (list && obj != &list->rhead);
+
+			if (!list)
+				continue;
+
+			list = rht_dereference_bucket(list->next, tbl, hash);
+			RCU_INIT_POINTER(*lpprev, list);
+			err = 0;
+			break;
 		}
 
-		rcu_assign_pointer(*pprev, obj->next);
-		err = 0;
+		obj = rht_dereference_bucket(obj->next, tbl, hash);
+		err = 1;
+
+		if (rhlist) {
+			list = rht_dereference_bucket(list->next, tbl, hash);
+			if (list) {
+				RCU_INIT_POINTER(list->rhead.next, obj);
+				obj = &list->rhead;
+				err = 0;
+			}
+		}
+
+		rcu_assign_pointer(*pprev, obj);
 		break;
 	}
 
 	spin_unlock_bh(lock);
 
+	if (err > 0) {
+		atomic_dec(&ht->nelems);
+		if (unlikely(ht->p.automatic_shrinking &&
+			     rht_shrink_below_30(ht, tbl)))
+			schedule_work(&ht->run_work);
+		err = 0;
+	}
+
+	return err;
+}
+
+/* Internal function, please use rhashtable_remove_fast() instead */
+static inline int __rhashtable_remove_fast(
+	struct rhashtable *ht, struct rhash_head *obj,
+	const struct rhashtable_params params, bool rhlist)
+{
+	struct bucket_table *tbl;
+	int err;
+
+	rcu_read_lock();
+
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+
+	/* Because we have already taken (and released) the bucket
+	 * lock in old_tbl, if we find that future_tbl is not yet
+	 * visible then that guarantees the entry to still be in
+	 * the old tbl if it exists.
+	 */
+	while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params,
+						   rhlist)) &&
+	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
+		;
+
+	rcu_read_unlock();
+
 	return err;
 }
 
@@ -794,34 +1059,29 @@
 	struct rhashtable *ht, struct rhash_head *obj,
 	const struct rhashtable_params params)
 {
-	struct bucket_table *tbl;
-	int err;
+	return __rhashtable_remove_fast(ht, obj, params, false);
+}
 
-	rcu_read_lock();
-
-	tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	/* Because we have already taken (and released) the bucket
-	 * lock in old_tbl, if we find that future_tbl is not yet
-	 * visible then that guarantees the entry to still be in
-	 * the old tbl if it exists.
-	 */
-	while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) &&
-	       (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
-		;
-
-	if (err)
-		goto out;
-
-	atomic_dec(&ht->nelems);
-	if (unlikely(ht->p.automatic_shrinking &&
-		     rht_shrink_below_30(ht, tbl)))
-		schedule_work(&ht->run_work);
-
-out:
-	rcu_read_unlock();
-
-	return err;
+/**
+ * rhltable_remove - remove object from hash list table
+ * @hlt:	hash list table
+ * @list:	pointer to hash list head inside object
+ * @params:	hash table parameters
+ *
+ * Since the hash chain is single linked, the removal operation needs to
+ * walk the bucket chain upon removal. The removal operation is thus
+ * considerable slow if the hash table is not correctly sized.
+ *
+ * Will automatically shrink the table via rhashtable_expand() if the
+ * shrink_decision function specified at rhashtable_init() returns true.
+ *
+ * Returns zero on success, -ENOENT if the entry could not be found.
+ */
+static inline int rhltable_remove(
+	struct rhltable *hlt, struct rhlist_head *list,
+	const struct rhashtable_params params)
+{
+	return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true);
 }
 
 /* Internal function, please use rhashtable_replace_fast() instead */
@@ -906,4 +1166,59 @@
 	return err;
 }
 
+/* Obsolete function, do not use in new code. */
+static inline int rhashtable_walk_init(struct rhashtable *ht,
+				       struct rhashtable_iter *iter, gfp_t gfp)
+{
+	rhashtable_walk_enter(ht, iter);
+	return 0;
+}
+
+/**
+ * rhltable_walk_enter - Initialise an iterator
+ * @hlt:	Table to walk over
+ * @iter:	Hash table Iterator
+ *
+ * This function prepares a hash table walk.
+ *
+ * Note that if you restart a walk after rhashtable_walk_stop you
+ * may see the same object twice.  Also, you may miss objects if
+ * there are removals in between rhashtable_walk_stop and the next
+ * call to rhashtable_walk_start.
+ *
+ * For a completely stable walk you should construct your own data
+ * structure outside the hash table.
+ *
+ * This function may sleep so you must not call it from interrupt
+ * context or with spin locks held.
+ *
+ * You must call rhashtable_walk_exit after this function returns.
+ */
+static inline void rhltable_walk_enter(struct rhltable *hlt,
+				       struct rhashtable_iter *iter)
+{
+	return rhashtable_walk_enter(&hlt->ht, iter);
+}
+
+/**
+ * rhltable_free_and_destroy - free elements and destroy hash list table
+ * @hlt:	the hash list table to destroy
+ * @free_fn:	callback to release resources of element
+ * @arg:	pointer passed to free_fn
+ *
+ * See documentation for rhashtable_free_and_destroy.
+ */
+static inline void rhltable_free_and_destroy(struct rhltable *hlt,
+					     void (*free_fn)(void *ptr,
+							     void *arg),
+					     void *arg)
+{
+	return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg);
+}
+
+static inline void rhltable_destroy(struct rhltable *hlt)
+{
+	return rhltable_free_and_destroy(hlt, NULL, NULL);
+}
+
 #endif /* _LINUX_RHASHTABLE_H */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 2daece8..57e5484 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -105,7 +105,7 @@
 			     struct netlink_callback *cb,
 			     struct net_device *dev,
 			     struct net_device *filter_dev,
-			     int idx);
+			     int *idx);
 extern int ndo_dflt_fdb_add(struct ndmsg *ndm,
 			    struct nlattr *tb[],
 			    struct net_device *dev,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c68e5..7543a47 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@
 	io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
 }
 
+void __noreturn do_task_dead(void);
+
 struct nsproxy;
 struct user_namespace;
 
@@ -1022,7 +1024,8 @@
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu power */
+#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
 #define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
@@ -1064,6 +1067,12 @@
 
 struct sched_group;
 
+struct sched_domain_shared {
+	atomic_t	ref;
+	atomic_t	nr_busy_cpus;
+	int		has_idle_cores;
+};
+
 struct sched_domain {
 	/* These fields must be setup */
 	struct sched_domain *parent;	/* top domain must be null terminated */
@@ -1094,6 +1103,8 @@
 	u64 max_newidle_lb_cost;
 	unsigned long next_decay_max_lb_cost;
 
+	u64 avg_scan_cost;		/* select_idle_sibling */
+
 #ifdef CONFIG_SCHEDSTATS
 	/* load_balance() stats */
 	unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1132,6 +1143,7 @@
 		void *private;		/* used during construction */
 		struct rcu_head rcu;	/* used during destruction */
 	};
+	struct sched_domain_shared *shared;
 
 	unsigned int span_weight;
 	/*
@@ -1165,6 +1177,7 @@
 
 struct sd_data {
 	struct sched_domain **__percpu sd;
+	struct sched_domain_shared **__percpu sds;
 	struct sched_group **__percpu sg;
 	struct sched_group_capacity **__percpu sgc;
 };
@@ -1458,6 +1471,13 @@
 };
 
 struct task_struct {
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+	/*
+	 * For reasons of header soup (see current_thread_info()), this
+	 * must be the first element of task_struct.
+	 */
+	struct thread_info thread_info;
+#endif
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	void *stack;
 	atomic_t usage;
@@ -1467,6 +1487,9 @@
 #ifdef CONFIG_SMP
 	struct llist_node wake_entry;
 	int on_cpu;
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+	unsigned int cpu;	/* current CPU */
+#endif
 	unsigned int wakee_flips;
 	unsigned long wakee_flip_decay_ts;
 	struct task_struct *last_wakee;
@@ -1923,6 +1946,13 @@
 #ifdef CONFIG_MMU
 	struct task_struct *oom_reaper_list;
 #endif
+#ifdef CONFIG_VMAP_STACK
+	struct vm_struct *stack_vm_area;
+#endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+	/* A live task holds one reference. */
+	atomic_t stack_refcount;
+#endif
 /* CPU-specific state of this task */
 	struct thread_struct thread;
 /*
@@ -1939,6 +1969,18 @@
 # define arch_task_struct_size (sizeof(struct task_struct))
 #endif
 
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+	return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+	return NULL;
+}
+#endif
+
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
 #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
 
@@ -2568,12 +2610,14 @@
 	return p->pid == 0;
 }
 extern struct task_struct *curr_task(int cpu);
-extern void set_curr_task(int cpu, struct task_struct *p);
+extern void ia64_set_curr_task(int cpu, struct task_struct *p);
 
 void yield(void);
 
 union thread_union {
+#ifndef CONFIG_THREAD_INFO_IN_TASK
 	struct thread_info thread_info;
+#endif
 	unsigned long stack[THREAD_SIZE/sizeof(long)];
 };
 
@@ -3061,10 +3105,34 @@
 	cgroup_threadgroup_change_end(tsk);
 }
 
-#ifndef __HAVE_THREAD_FUNCTIONS
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+
+static inline struct thread_info *task_thread_info(struct task_struct *task)
+{
+	return &task->thread_info;
+}
+
+/*
+ * When accessing the stack of a non-current task that might exit, use
+ * try_get_task_stack() instead.  task_stack_page will return a pointer
+ * that could get freed out from under you.
+ */
+static inline void *task_stack_page(const struct task_struct *task)
+{
+	return task->stack;
+}
+
+#define setup_thread_stack(new,old)	do { } while(0)
+
+static inline unsigned long *end_of_stack(const struct task_struct *task)
+{
+	return task->stack;
+}
+
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
 
 #define task_thread_info(task)	((struct thread_info *)(task)->stack)
-#define task_stack_page(task)	((task)->stack)
+#define task_stack_page(task)	((void *)(task)->stack)
 
 static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
 {
@@ -3091,6 +3159,24 @@
 }
 
 #endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+	return atomic_inc_not_zero(&tsk->stack_refcount) ?
+		task_stack_page(tsk) : NULL;
+}
+
+extern void put_task_stack(struct task_struct *tsk);
+#else
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+	return task_stack_page(tsk);
+}
+
+static inline void put_task_stack(struct task_struct *tsk) {}
+#endif
+
 #define task_stack_end_corrupted(task) \
 		(*(end_of_stack(task)) != STACK_END_MAGIC)
 
@@ -3206,7 +3292,11 @@
  * cond_resched_lock() will drop the spinlock before scheduling,
  * cond_resched_softirq() will enable bhs before scheduling.
  */
+#ifndef CONFIG_PREEMPT
 extern int _cond_resched(void);
+#else
+static inline int _cond_resched(void) { return 0; }
+#endif
 
 #define cond_resched() ({			\
 	___might_sleep(__FILE__, __LINE__, 0);	\
@@ -3236,6 +3326,15 @@
 #endif
 }
 
+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+	return p->preempt_disable_ip;
+#else
+	return 0;
+#endif
+}
+
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPT,
@@ -3364,7 +3463,11 @@
 
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+	return p->cpu;
+#else
 	return task_thread_info(p)->cpu;
+#endif
 }
 
 static inline int task_node(const struct task_struct *p)
@@ -3469,15 +3572,20 @@
 	return task_rlimit_max(current, limit);
 }
 
+#define SCHED_CPUFREQ_RT	(1U << 0)
+#define SCHED_CPUFREQ_DL	(1U << 1)
+#define SCHED_CPUFREQ_IOWAIT	(1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL	(SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
 #ifdef CONFIG_CPU_FREQ
 struct update_util_data {
-	void (*func)(struct update_util_data *data,
-		     u64 time, unsigned long util, unsigned long max);
+       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
 };
 
 void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
-			void (*func)(struct update_util_data *data, u64 time,
-				     unsigned long util, unsigned long max));
+                       void (*func)(struct update_util_data *data, u64 time,
+				    unsigned int flags));
 void cpufreq_remove_update_util_hook(int cpu);
 #endif /* CONFIG_CPU_FREQ */
 
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index de1f643..fcb4c36 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -705,70 +705,6 @@
 	sctp_authhdr_t auth_hdr;
 } __packed sctp_auth_chunk_t;
 
-struct sctp_info {
-	__u32	sctpi_tag;
-	__u32	sctpi_state;
-	__u32	sctpi_rwnd;
-	__u16	sctpi_unackdata;
-	__u16	sctpi_penddata;
-	__u16	sctpi_instrms;
-	__u16	sctpi_outstrms;
-	__u32	sctpi_fragmentation_point;
-	__u32	sctpi_inqueue;
-	__u32	sctpi_outqueue;
-	__u32	sctpi_overall_error;
-	__u32	sctpi_max_burst;
-	__u32	sctpi_maxseg;
-	__u32	sctpi_peer_rwnd;
-	__u32	sctpi_peer_tag;
-	__u8	sctpi_peer_capable;
-	__u8	sctpi_peer_sack;
-	__u16	__reserved1;
-
-	/* assoc status info */
-	__u64	sctpi_isacks;
-	__u64	sctpi_osacks;
-	__u64	sctpi_opackets;
-	__u64	sctpi_ipackets;
-	__u64	sctpi_rtxchunks;
-	__u64	sctpi_outofseqtsns;
-	__u64	sctpi_idupchunks;
-	__u64	sctpi_gapcnt;
-	__u64	sctpi_ouodchunks;
-	__u64	sctpi_iuodchunks;
-	__u64	sctpi_oodchunks;
-	__u64	sctpi_iodchunks;
-	__u64	sctpi_octrlchunks;
-	__u64	sctpi_ictrlchunks;
-
-	/* primary transport info */
-	struct sockaddr_storage	sctpi_p_address;
-	__s32	sctpi_p_state;
-	__u32	sctpi_p_cwnd;
-	__u32	sctpi_p_srtt;
-	__u32	sctpi_p_rto;
-	__u32	sctpi_p_hbinterval;
-	__u32	sctpi_p_pathmaxrxt;
-	__u32	sctpi_p_sackdelay;
-	__u32	sctpi_p_sackfreq;
-	__u32	sctpi_p_ssthresh;
-	__u32	sctpi_p_partial_bytes_acked;
-	__u32	sctpi_p_flight_size;
-	__u16	sctpi_p_error;
-	__u16	__reserved2;
-
-	/* sctp sock info */
-	__u32	sctpi_s_autoclose;
-	__u32	sctpi_s_adaptation_ind;
-	__u32	sctpi_s_pd_point;
-	__u8	sctpi_s_nodelay;
-	__u8	sctpi_s_disable_fragments;
-	__u8	sctpi_s_v4mapped;
-	__u8	sctpi_s_frag_interleave;
-	__u32	sctpi_s_type;
-	__u32	__reserved3;
-};
-
 struct sctp_infox {
 	struct sctp_info *sctpinfo;
 	struct sctp_association *asoc;
diff --git a/include/linux/security.h b/include/linux/security.h
index 7831cd5..c2125e90 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -242,6 +242,10 @@
 int security_dentry_init_security(struct dentry *dentry, int mode,
 					const struct qstr *name, void **ctx,
 					u32 *ctxlen);
+int security_dentry_create_files_as(struct dentry *dentry, int mode,
+					struct qstr *name,
+					const struct cred *old,
+					struct cred *new);
 
 int security_inode_alloc(struct inode *inode);
 void security_inode_free(struct inode *inode);
@@ -282,6 +286,8 @@
 int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
 int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size);
 void security_inode_getsecid(struct inode *inode, u32 *secid);
+int security_inode_copy_up(struct dentry *src, struct cred **new);
+int security_inode_copy_up_xattr(const char *name);
 int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_free(struct file *file);
@@ -307,7 +313,6 @@
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
-int security_kernel_module_from_file(struct file *file);
 int security_kernel_read_file(struct file *file, enum kernel_read_file_id id);
 int security_kernel_post_read_file(struct file *file, char *buf, loff_t size,
 				   enum kernel_read_file_id id);
@@ -598,6 +603,14 @@
 	return -EOPNOTSUPP;
 }
 
+static inline int security_dentry_create_files_as(struct dentry *dentry,
+						  int mode, struct qstr *name,
+						  const struct cred *old,
+						  struct cred *new)
+{
+	return 0;
+}
+
 
 static inline int security_inode_init_security(struct inode *inode,
 						struct inode *dir,
@@ -758,6 +771,16 @@
 	*secid = 0;
 }
 
+static inline int security_inode_copy_up(struct dentry *src, struct cred **new)
+{
+	return 0;
+}
+
+static inline int security_inode_copy_up_xattr(const char *name)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int security_file_permission(struct file *file, int mask)
 {
 	return 0;
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 923266c..48ec765 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -111,7 +111,6 @@
 						 *   if no_console_suspend
 						 */
 	unsigned char		probe;
-	struct mctrl_gpios	*gpios;
 #define UART_PROBE_RSA	(1 << 0)
 
 	/*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 2f44e20..3442014 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -367,14 +367,21 @@
 
 #define EARLYCON_DECLARE(_name, fn)	OF_EARLYCON_DECLARE(_name, "", fn)
 
-extern int setup_earlycon(char *buf);
 extern int of_setup_earlycon(const struct earlycon_id *match,
 			     unsigned long node,
 			     const char *options);
 
+#ifdef CONFIG_SERIAL_EARLYCON
+extern bool earlycon_init_is_deferred __initdata;
+int setup_earlycon(char *buf);
+#else
+static const bool earlycon_init_is_deferred;
+static inline int setup_earlycon(char *buf) { return 0; }
+#endif
+
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
 				   struct console *c);
-int uart_parse_earlycon(char *p, unsigned char *iotype, unsigned long *addr,
+int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
 			char **options);
 void uart_parse_options(char *options, int *baud, int *parity, int *bits,
 			int *flow);
@@ -412,7 +419,7 @@
 static inline int uart_tx_stopped(struct uart_port *port)
 {
 	struct tty_struct *tty = port->state->port.tty;
-	if (tty->stopped || port->hw_stopped)
+	if ((tty && tty->stopped) || port->hw_stopped)
 		return 1;
 	return 0;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6f0b3e0..9bf60b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -612,7 +612,6 @@
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
- *	@offload_fwd_mark: fwding offload mark
  *	@mark: Generic packet mark
  *	@vlan_proto: vlan encapsulation protocol
  *	@vlan_tci: vlan tag control information
@@ -677,13 +676,23 @@
 	 */
 	kmemcheck_bitfield_begin(flags1);
 	__u16			queue_mapping;
+
+/* if you move cloned around you also must adapt those constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define CLONED_MASK	(1 << 7)
+#else
+#define CLONED_MASK	1
+#endif
+#define CLONED_OFFSET()		offsetof(struct sk_buff, __cloned_offset)
+
+	__u8			__cloned_offset[0];
 	__u8			cloned:1,
 				nohdr:1,
 				fclone:2,
 				peeked:1,
 				head_frag:1,
-				xmit_more:1;
-	/* one bit hole */
+				xmit_more:1,
+				__unused:1; /* one bit hole */
 	kmemcheck_bitfield_end(flags1);
 
 	/* fields enclosed in headers_start/headers_end are copied
@@ -730,7 +739,10 @@
 	__u8			ipvs_property:1;
 	__u8			inner_protocol_type:1;
 	__u8			remcsum_offload:1;
-	/* 3 or 5 bit hole */
+#ifdef CONFIG_NET_SWITCHDEV
+	__u8			offload_fwd_mark:1;
+#endif
+	/* 2, 4 or 5 bit hole */
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
@@ -757,14 +769,9 @@
 		unsigned int	sender_cpu;
 	};
 #endif
-	union {
 #ifdef CONFIG_NETWORK_SECMARK
-		__u32		secmark;
+	__u32		secmark;
 #endif
-#ifdef CONFIG_NET_SWITCHDEV
-		__u32		offload_fwd_mark;
-#endif
-	};
 
 	union {
 		__u32		mark;
@@ -2295,7 +2302,7 @@
 
 int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
-static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+static inline void __skb_set_length(struct sk_buff *skb, unsigned int len)
 {
 	if (unlikely(skb_is_nonlinear(skb))) {
 		WARN_ON(1);
@@ -2305,6 +2312,11 @@
 	skb_set_tail_pointer(skb, len);
 }
 
+static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
+{
+	__skb_set_length(skb, len);
+}
+
 void skb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
@@ -2335,6 +2347,20 @@
 	BUG_ON(err);
 }
 
+static inline int __skb_grow(struct sk_buff *skb, unsigned int len)
+{
+	unsigned int diff = len - skb->len;
+
+	if (skb_tailroom(skb) < diff) {
+		int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb),
+					   GFP_ATOMIC);
+		if (ret)
+			return ret;
+	}
+	__skb_set_length(skb, len);
+	return 0;
+}
+
 /**
  *	skb_orphan - orphan a buffer
  *	@skb: buffer to orphan
@@ -2386,6 +2412,8 @@
 		kfree_skb(skb);
 }
 
+void skb_rbtree_purge(struct rb_root *root);
+
 void *netdev_alloc_frag(unsigned int fragsz);
 
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
@@ -2847,6 +2875,18 @@
 	       __skb_linearize(skb) : 0;
 }
 
+static __always_inline void
+__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+		     unsigned int off)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_block_sub(skb->csum,
+					   csum_partial(start, len, 0), off);
+	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
+		 skb_checksum_start_offset(skb) < 0)
+		skb->ip_summed = CHECKSUM_NONE;
+}
+
 /**
  *	skb_postpull_rcsum - update checksum for received skb after pull
  *	@skb: buffer to update
@@ -2857,36 +2897,38 @@
  *	update the CHECKSUM_COMPLETE checksum, or set ip_summed to
  *	CHECKSUM_NONE so that it can be recomputed from scratch.
  */
-
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
-	else if (skb->ip_summed == CHECKSUM_PARTIAL &&
-		 skb_checksum_start_offset(skb) < 0)
-		skb->ip_summed = CHECKSUM_NONE;
+	__skb_postpull_rcsum(skb, start, len, 0);
 }
 
-unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+static __always_inline void
+__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len,
+		     unsigned int off)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_block_add(skb->csum,
+					   csum_partial(start, len, 0), off);
+}
 
+/**
+ *	skb_postpush_rcsum - update checksum for received skb after push
+ *	@skb: buffer to update
+ *	@start: start of data after push
+ *	@len: length of data pushed
+ *
+ *	After doing a push on a received packet, you need to call this to
+ *	update the CHECKSUM_COMPLETE checksum.
+ */
 static inline void skb_postpush_rcsum(struct sk_buff *skb,
 				      const void *start, unsigned int len)
 {
-	/* For performing the reverse operation to skb_postpull_rcsum(),
-	 * we can instead of ...
-	 *
-	 *   skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
-	 *
-	 * ... just use this equivalent version here to save a few
-	 * instructions. Feeding csum of 0 in csum_partial() and later
-	 * on adding skb->csum is equivalent to feed skb->csum in the
-	 * first place.
-	 */
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_partial(start, len, skb->csum);
+	__skb_postpush_rcsum(skb, start, len, 0);
 }
 
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+
 /**
  *	skb_push_rcsum - push skb and update receive checksum
  *	@skb: buffer to update
@@ -2924,6 +2966,21 @@
 	return __pskb_trim(skb, len);
 }
 
+static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+	__skb_trim(skb, len);
+	return 0;
+}
+
+static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+	return __skb_grow(skb, len);
+}
+
 #define skb_queue_walk(queue, skb) \
 		for (skb = (queue)->next;					\
 		     skb != (struct sk_buff *)(queue);				\
@@ -3028,6 +3085,7 @@
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 int skb_ensure_writable(struct sk_buff *skb, int write_len);
+int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci);
 int skb_vlan_pop(struct sk_buff *skb);
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
@@ -3712,6 +3770,13 @@
 	return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
 }
 
+static inline void skb_gso_reset(struct sk_buff *skb)
+{
+	skb_shinfo(skb)->gso_size = 0;
+	skb_shinfo(skb)->gso_segs = 0;
+	skb_shinfo(skb)->gso_type = 0;
+}
+
 void __skb_warn_lro_forwarding(const struct sk_buff *skb);
 
 static inline bool skb_warn_if_lro(const struct sk_buff *skb)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4293808..084b12b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -650,4 +650,12 @@
 unsigned int kmem_cache_size(struct kmem_cache *s);
 void __init kmem_cache_init_late(void);
 
+#if defined(CONFIG_SMP) && defined(CONFIG_SLAB)
+int slab_prepare_cpu(unsigned int cpu);
+int slab_dead_cpu(unsigned int cpu);
+#else
+#define slab_prepare_cpu	NULL
+#define slab_dead_cpu		NULL
+#endif
+
 #endif	/* _LINUX_SLAB_H */
diff --git a/include/linux/smc91x.h b/include/linux/smc91x.h
index 76199b7..e302c44 100644
--- a/include/linux/smc91x.h
+++ b/include/linux/smc91x.h
@@ -1,6 +1,16 @@
 #ifndef __SMC91X_H__
 #define __SMC91X_H__
 
+/*
+ * These bits define which access sizes a platform can support, rather
+ * than the maximal access size.  So, if your platform can do 16-bit
+ * and 32-bit accesses to the SMC91x device, but not 8-bit, set both
+ * SMC91X_USE_16BIT and SMC91X_USE_32BIT.
+ *
+ * The SMC91x driver requires at least one of SMC91X_USE_8BIT or
+ * SMC91X_USE_16BIT to be supported - just setting SMC91X_USE_32BIT is
+ * an invalid configuration.
+ */
 #define SMC91X_USE_8BIT (1 << 0)
 #define SMC91X_USE_16BIT (1 << 1)
 #define SMC91X_USE_32BIT (1 << 2)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index eccae469..8e0cb7a 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -196,6 +196,9 @@
 
 void smp_setup_processor_id(void);
 
+int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par,
+		    bool phys);
+
 /* SMP core functions */
 int smpcfd_prepare_cpu(unsigned int cpu);
 int smpcfd_dead_cpu(unsigned int cpu);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 072cb2aa..4b743ac 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -312,6 +312,8 @@
  * @flags: other constraints relevant to this driver
  * @max_transfer_size: function that returns the max transfer size for
  *	a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
+ * @max_message_size: function that returns the max message size for
+ *	a &spi_device; may be %NULL, so the default %SIZE_MAX will be used.
  * @io_mutex: mutex for physical bus access
  * @bus_lock_spinlock: spinlock for SPI bus locking
  * @bus_lock_mutex: mutex for exclusion of multiple callers
@@ -442,10 +444,11 @@
 #define SPI_MASTER_MUST_TX      BIT(4)		/* requires tx */
 
 	/*
-	 * on some hardware transfer size may be constrained
+	 * on some hardware transfer / message size may be constrained
 	 * the limit may depend on device transfer settings
 	 */
 	size_t (*max_transfer_size)(struct spi_device *spi);
+	size_t (*max_message_size)(struct spi_device *spi);
 
 	/* I/O mutex */
 	struct mutex		io_mutex;
@@ -905,12 +908,26 @@
 			    struct spi_message *message);
 
 static inline size_t
+spi_max_message_size(struct spi_device *spi)
+{
+	struct spi_master *master = spi->master;
+	if (!master->max_message_size)
+		return SIZE_MAX;
+	return master->max_message_size(spi);
+}
+
+static inline size_t
 spi_max_transfer_size(struct spi_device *spi)
 {
 	struct spi_master *master = spi->master;
-	if (!master->max_transfer_size)
-		return SIZE_MAX;
-	return master->max_transfer_size(spi);
+	size_t tr_max = SIZE_MAX;
+	size_t msg_max = spi_max_message_size(spi);
+
+	if (master->max_transfer_size)
+		tr_max = master->max_transfer_size(spi);
+
+	/* transfer size limit must not be greater than messsage size limit */
+	return min(tr_max, msg_max);
 }
 
 /*---------------------------------------------------------------------------*/
@@ -980,58 +997,6 @@
 extern int spi_bus_unlock(struct spi_master *master);
 
 /**
- * spi_write - SPI synchronous write
- * @spi: device to which data will be written
- * @buf: data buffer
- * @len: data buffer size
- * Context: can sleep
- *
- * This function writes the buffer @buf.
- * Callable only from contexts that can sleep.
- *
- * Return: zero on success, else a negative error code.
- */
-static inline int
-spi_write(struct spi_device *spi, const void *buf, size_t len)
-{
-	struct spi_transfer	t = {
-			.tx_buf		= buf,
-			.len		= len,
-		};
-	struct spi_message	m;
-
-	spi_message_init(&m);
-	spi_message_add_tail(&t, &m);
-	return spi_sync(spi, &m);
-}
-
-/**
- * spi_read - SPI synchronous read
- * @spi: device from which data will be read
- * @buf: data buffer
- * @len: data buffer size
- * Context: can sleep
- *
- * This function reads the buffer @buf.
- * Callable only from contexts that can sleep.
- *
- * Return: zero on success, else a negative error code.
- */
-static inline int
-spi_read(struct spi_device *spi, void *buf, size_t len)
-{
-	struct spi_transfer	t = {
-			.rx_buf		= buf,
-			.len		= len,
-		};
-	struct spi_message	m;
-
-	spi_message_init(&m);
-	spi_message_add_tail(&t, &m);
-	return spi_sync(spi, &m);
-}
-
-/**
  * spi_sync_transfer - synchronous SPI data transfer
  * @spi: device with which data will be exchanged
  * @xfers: An array of spi_transfers
@@ -1055,6 +1020,52 @@
 	return spi_sync(spi, &msg);
 }
 
+/**
+ * spi_write - SPI synchronous write
+ * @spi: device to which data will be written
+ * @buf: data buffer
+ * @len: data buffer size
+ * Context: can sleep
+ *
+ * This function writes the buffer @buf.
+ * Callable only from contexts that can sleep.
+ *
+ * Return: zero on success, else a negative error code.
+ */
+static inline int
+spi_write(struct spi_device *spi, const void *buf, size_t len)
+{
+	struct spi_transfer	t = {
+			.tx_buf		= buf,
+			.len		= len,
+		};
+
+	return spi_sync_transfer(spi, &t, 1);
+}
+
+/**
+ * spi_read - SPI synchronous read
+ * @spi: device from which data will be read
+ * @buf: data buffer
+ * @len: data buffer size
+ * Context: can sleep
+ *
+ * This function reads the buffer @buf.
+ * Callable only from contexts that can sleep.
+ *
+ * Return: zero on success, else a negative error code.
+ */
+static inline int
+spi_read(struct spi_device *spi, void *buf, size_t len)
+{
+	struct spi_transfer	t = {
+			.rx_buf		= buf,
+			.len		= len,
+		};
+
+	return spi_sync_transfer(spi, &t, 1);
+}
+
 /* this copies txbuf and rxbuf data; for small transfers only! */
 extern int spi_write_then_read(struct spi_device *spi,
 		const void *txbuf, unsigned n_tx,
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b6810c9..5c02b06 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -195,6 +195,8 @@
 				struct rpc_xprt *,
 				void *),
 			void *data);
+void		rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+			unsigned long timeo);
 
 const char *rpc_proc_name(const struct rpc_task *task);
 #endif /* __KERNEL__ */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 5e3e1b6..a16070d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -218,7 +218,8 @@
 	struct work_struct	task_cleanup;
 	struct timer_list	timer;
 	unsigned long		last_used,
-				idle_timeout;
+				idle_timeout,
+				max_reconnect_timeout;
 
 	/*
 	 * Send stuff
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 7693e39..d971837 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -245,6 +245,7 @@
 	return unlikely(suspend_freeze_state == FREEZE_STATE_ENTER);
 }
 
+extern void __init pm_states_init(void);
 extern void freeze_set_ops(const struct platform_freeze_ops *ops);
 extern void freeze_wake(void);
 
@@ -279,6 +280,7 @@
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
 static inline bool idle_should_freeze(void) { return false; }
+static inline void __init pm_states_init(void) {}
 static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {}
 static inline void freeze_wake(void) {}
 #endif /* !CONFIG_SUSPEND */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b17cc48..e1d7614 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -257,6 +257,7 @@
 
 static inline void workingset_node_pages_dec(struct radix_tree_node *node)
 {
+	VM_WARN_ON_ONCE(!workingset_node_pages(node));
 	node->count--;
 }
 
@@ -272,6 +273,7 @@
 
 static inline void workingset_node_shadows_dec(struct radix_tree_node *node)
 {
+	VM_WARN_ON_ONCE(!workingset_node_shadows(node));
 	node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
 }
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 697e160..ecc3e07 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -25,6 +25,7 @@
 #include <linux/rcupdate.h>
 #include <linux/wait.h>
 #include <linux/rbtree.h>
+#include <linux/uidgid.h>
 #include <uapi/linux/sysctl.h>
 
 /* For the /proc/sys support */
@@ -42,6 +43,8 @@
 			 void __user *, size_t *, loff_t *);
 extern int proc_dointvec(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
+extern int proc_douintvec(struct ctl_table *, int,
+			 void __user *, size_t *, loff_t *);
 extern int proc_dointvec_minmax(struct ctl_table *, int,
 				void __user *, size_t *, loff_t *);
 extern int proc_dointvec_jiffies(struct ctl_table *, int,
@@ -157,6 +160,9 @@
 	struct ctl_table_set default_set;
 	struct ctl_table_set *(*lookup)(struct ctl_table_root *root,
 					   struct nsproxy *namespaces);
+	void (*set_ownership)(struct ctl_table_header *head,
+			      struct ctl_table *table,
+			      kuid_t *uid, kgid_t *gid);
 	int (*permissions)(struct ctl_table_header *head, struct ctl_table *table);
 };
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7be9b12..a17ae7b 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -19,6 +19,7 @@
 
 
 #include <linux/skbuff.h>
+#include <linux/win_minmax.h>
 #include <net/sock.h>
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -212,7 +213,8 @@
 		u8 reord;    /* reordering detected */
 	} rack;
 	u16	advmss;		/* Advertised MSS			*/
-	u8	unused;
+	u8	rate_app_limited:1,  /* rate_{delivered,interval_us} limited? */
+		unused:7;
 	u8	nonagle     : 4,/* Disable Nagle algorithm?             */
 		thin_lto    : 1,/* Use linear timeouts for thin streams */
 		thin_dupack : 1,/* Fast retransmit on first dupack      */
@@ -234,9 +236,7 @@
 	u32	mdev_max_us;	/* maximal mdev for the last rtt period	*/
 	u32	rttvar_us;	/* smoothed mdev_max			*/
 	u32	rtt_seq;	/* sequence number to update rttvar	*/
-	struct rtt_meas {
-		u32 rtt, ts;	/* RTT in usec and sampling time in jiffies. */
-	} rtt_min[3];
+	struct  minmax rtt_min;
 
 	u32	packets_out;	/* Packets which are "in flight"	*/
 	u32	retrans_out;	/* Retransmitted packets out		*/
@@ -268,6 +268,12 @@
 				 * receiver in Recovery. */
 	u32	prr_out;	/* Total number of pkts sent during Recovery. */
 	u32	delivered;	/* Total data packets delivered incl. rexmits */
+	u32	lost;		/* Total data packets lost incl. rexmits */
+	u32	app_limited;	/* limited until "delivered" reaches this val */
+	struct skb_mstamp first_tx_mstamp;  /* start of window send phase */
+	struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */
+	u32	rate_delivered;    /* saved rate sample: packets delivered */
+	u32	rate_interval_us;  /* saved rate sample: time elapsed */
 
  	u32	rcv_wnd;	/* Current receiver window		*/
 	u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
@@ -281,10 +287,9 @@
 	struct sk_buff* lost_skb_hint;
 	struct sk_buff *retransmit_skb_hint;
 
-	/* OOO segments go in this list. Note that socket lock must be held,
-	 * as we do not use sk_buff_head lock.
-	 */
-	struct sk_buff_head	out_of_order_queue;
+	/* OOO segments go in this rbtree. Socket lock must be held. */
+	struct rb_root	out_of_order_queue;
+	struct sk_buff	*ooo_last_skb; /* cache rb_last(out_of_order_queue) */
 
 	/* SACKs data, these 2 need to be together (see tcp_options_write) */
 	struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index cbd8990..45f004e 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -13,6 +13,21 @@
 struct timespec;
 struct compat_timespec;
 
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+struct thread_info {
+	unsigned long		flags;		/* low level flags */
+};
+
+#define INIT_THREAD_INFO(tsk)			\
+{						\
+	.flags		= 0,			\
+}
+#endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+#define current_thread_info() ((struct thread_info *)current)
+#endif
+
 /*
  * System call restart block.
  */
@@ -118,10 +133,11 @@
 extern void __check_object_size(const void *ptr, unsigned long n,
 					bool to_user);
 
-static inline void check_object_size(const void *ptr, unsigned long n,
-				     bool to_user)
+static __always_inline void check_object_size(const void *ptr, unsigned long n,
+					      bool to_user)
 {
-	__check_object_size(ptr, n, to_user);
+	if (!__builtin_constant_p(n))
+		__check_object_size(ptr, n, to_user);
 }
 #else
 static inline void check_object_size(const void *ptr, unsigned long n,
diff --git a/include/linux/time64.h b/include/linux/time64.h
index 7e5d2fa..980c71b 100644
--- a/include/linux/time64.h
+++ b/include/linux/time64.h
@@ -5,6 +5,7 @@
 #include <linux/math64.h>
 
 typedef __s64 time64_t;
+typedef __u64 timeu64_t;
 
 /*
  * This wants to go into uapi/linux/time.h once we agreed about the
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 816b754..09168c5 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_TIMEKEEPING_H
 #define _LINUX_TIMEKEEPING_H
 
-#include <asm-generic/errno-base.h>
+#include <linux/errno.h>
 
 /* Included from linux/ktime.h */
 
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 6685a73..a45702e 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -43,7 +43,7 @@
 
 #define TORTURE_FLAG "-torture:"
 #define TOROUT_STRING(s) \
-	pr_alert("%s" TORTURE_FLAG s "\n", torture_type)
+	pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s)
 #define VERBOSE_TOROUT_STRING(s) \
 	do { if (verbose) pr_alert("%s" TORTURE_FLAG " %s\n", torture_type, s); } while (0)
 #define VERBOSE_TOROUT_ERRSTRING(s) \
diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index d3a2bb7..650f3dd 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -103,29 +103,40 @@
 #endif
 }
 
-static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
 {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
 	return read_seqcount_begin(&syncp->seq);
 #else
-#if BITS_PER_LONG==32
+	return 0;
+#endif
+}
+
+static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
+{
+#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
 	preempt_disable();
 #endif
-	return 0;
+	return __u64_stats_fetch_begin(syncp);
+}
+
+static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
+					 unsigned int start)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	return read_seqcount_retry(&syncp->seq, start);
+#else
+	return false;
 #endif
 }
 
 static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
 					 unsigned int start)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-	return read_seqcount_retry(&syncp->seq, start);
-#else
-#if BITS_PER_LONG==32
+#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
 	preempt_enable();
 #endif
-	return false;
-#endif
+	return __u64_stats_fetch_retry(syncp, start);
 }
 
 /*
@@ -136,27 +147,19 @@
  */
 static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-	return read_seqcount_begin(&syncp->seq);
-#else
-#if BITS_PER_LONG==32
+#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
 	local_irq_disable();
 #endif
-	return 0;
-#endif
+	return __u64_stats_fetch_begin(syncp);
 }
 
 static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
-					 unsigned int start)
+					     unsigned int start)
 {
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-	return read_seqcount_retry(&syncp->seq, start);
-#else
-#if BITS_PER_LONG==32
+#if BITS_PER_LONG==32 && !defined(CONFIG_SMP)
 	local_irq_enable();
 #endif
-	return false;
-#endif
+	return __u64_stats_fetch_retry(syncp, start);
 }
 
 #endif /* _LINUX_U64_STATS_SYNC_H */
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 1b5d1cd..75b4aaf 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -76,7 +76,7 @@
 		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
-int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes);
+#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
diff --git a/include/linux/ulpi/driver.h b/include/linux/ulpi/driver.h
index 388f6e0..a7af21a 100644
--- a/include/linux/ulpi/driver.h
+++ b/include/linux/ulpi/driver.h
@@ -15,7 +15,7 @@
  */
 struct ulpi {
 	struct ulpi_device_id id;
-	struct ulpi_ops *ops;
+	const struct ulpi_ops *ops;
 	struct device dev;
 };
 
@@ -47,7 +47,11 @@
 
 #define to_ulpi_driver(d) container_of(d, struct ulpi_driver, driver)
 
-int ulpi_register_driver(struct ulpi_driver *drv);
+/*
+ * use a macro to avoid include chaining to get THIS_MODULE
+ */
+#define ulpi_register_driver(drv) __ulpi_register_driver(drv, THIS_MODULE)
+int __ulpi_register_driver(struct ulpi_driver *drv, struct module *module);
 void ulpi_unregister_driver(struct ulpi_driver *drv);
 
 #define module_ulpi_driver(__ulpi_driver) \
diff --git a/include/linux/ulpi/interface.h b/include/linux/ulpi/interface.h
index 4de8ab4..a2011a9 100644
--- a/include/linux/ulpi/interface.h
+++ b/include/linux/ulpi/interface.h
@@ -4,20 +4,19 @@
 #include <linux/types.h>
 
 struct ulpi;
+struct device;
 
 /**
  * struct ulpi_ops - ULPI register access
- * @dev: the interface provider
  * @read: read operation for ULPI register access
  * @write: write operation for ULPI register access
  */
 struct ulpi_ops {
-	struct device *dev;
-	int (*read)(struct ulpi_ops *ops, u8 addr);
-	int (*write)(struct ulpi_ops *ops, u8 addr, u8 val);
+	int (*read)(struct device *dev, u8 addr);
+	int (*write)(struct device *dev, u8 addr, u8 val);
 };
 
-struct ulpi *ulpi_register_interface(struct device *, struct ulpi_ops *);
+struct ulpi *ulpi_register_interface(struct device *, const struct ulpi_ops *);
 void ulpi_unregister_interface(struct ulpi *);
 
 #endif /* __LINUX_ULPI_INTERFACE_H */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 2b81b24..4616a49 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -220,7 +220,8 @@
 	int			(*setup)(struct usb_function *,
 					const struct usb_ctrlrequest *);
 	bool			(*req_match)(struct usb_function *,
-					const struct usb_ctrlrequest *);
+					const struct usb_ctrlrequest *,
+					bool config0);
 	void			(*suspend)(struct usb_function *);
 	void			(*resume)(struct usb_function *);
 
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 612dbdf..8e81f9e 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -346,6 +346,8 @@
  *	or B-Peripheral wants to take host role.
  * @quirk_ep_out_aligned_size: epout requires buffer size to be aligned to
  *	MaxPacketSize.
+ * @quirk_avoids_skb_reserve: udc/platform wants to avoid skb_reserve() in
+ *	u_ether.c to improve performance.
  * @is_selfpowered: if the gadget is self-powered.
  * @deactivated: True if gadget is deactivated - in deactivated state it cannot
  *	be connected.
@@ -398,6 +400,7 @@
 	unsigned			quirk_altset_not_supp:1;
 	unsigned			quirk_stall_not_supp:1;
 	unsigned			quirk_zlp_not_supp:1;
+	unsigned			quirk_avoids_skb_reserve:1;
 	unsigned			is_selfpowered:1;
 	unsigned			deactivated:1;
 	unsigned			connected:1;
@@ -418,8 +421,20 @@
 	list_for_each_entry(tmp, &(gadget)->ep_list, ep_list)
 
 /**
+ * usb_ep_align - returns @len aligned to ep's maxpacketsize.
+ * @ep: the endpoint whose maxpacketsize is used to align @len
+ * @len: buffer size's length to align to @ep's maxpacketsize
+ *
+ * This helper is used to align buffer's size to an ep's maxpacketsize.
+ */
+static inline size_t usb_ep_align(struct usb_ep *ep, size_t len)
+{
+	return round_up(len, (size_t)le16_to_cpu(ep->desc->wMaxPacketSize));
+}
+
+/**
  * usb_ep_align_maybe - returns @len aligned to ep's maxpacketsize if gadget
- *	requires quirk_ep_out_aligned_size, otherwise reguens len.
+ *	requires quirk_ep_out_aligned_size, otherwise returns len.
  * @g: controller to check for quirk
  * @ep: the endpoint whose maxpacketsize is used to align @len
  * @len: buffer size's length to align to @ep's maxpacketsize
@@ -430,8 +445,7 @@
 static inline size_t
 usb_ep_align_maybe(struct usb_gadget *g, struct usb_ep *ep, size_t len)
 {
-	return !g->quirk_ep_out_aligned_size ? len :
-			round_up(len, (size_t)ep->desc->wMaxPacketSize);
+	return g->quirk_ep_out_aligned_size ? usb_ep_align(ep, len) : len;
 }
 
 /**
@@ -463,6 +477,16 @@
 }
 
 /**
+ * gadget_avoids_skb_reserve - return true iff the hardware would like to avoid
+ *	skb_reserve to improve performance.
+ * @g: controller to check for quirk
+ */
+static inline int gadget_avoids_skb_reserve(struct usb_gadget *g)
+{
+	return g->quirk_avoids_skb_reserve;
+}
+
+/**
  * gadget_is_dualspeed - return true iff the hardware handles high speed
  * @g: controller that might support both high and full speeds
  */
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 245f57d..0aae1b2 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -81,6 +81,8 @@
 		/* Sets max_sectors to 240 */			\
 	US_FLAG(NO_REPORT_LUNS,	0x10000000)			\
 		/* Cannot handle REPORT_LUNS */			\
+	US_FLAG(ALWAYS_SYNC, 0x20000000)			\
+		/* lies about caching, so always sync */	\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
diff --git a/include/linux/vme.h b/include/linux/vme.h
index 71e4a6d..ea6095d 100644
--- a/include/linux/vme.h
+++ b/include/linux/vme.h
@@ -166,7 +166,7 @@
 int vme_lm_count(struct vme_resource *);
 int vme_lm_set(struct vme_resource *, unsigned long long, u32, u32);
 int vme_lm_get(struct vme_resource *, unsigned long long *, u32 *, u32 *);
-int vme_lm_attach(struct vme_resource *, int, void (*callback)(int));
+int vme_lm_attach(struct vme_resource *, int, void (*callback)(void *), void *);
 int vme_lm_detach(struct vme_resource *, int);
 void vme_lm_free(struct vme_resource *);
 
diff --git a/include/linux/wait.h b/include/linux/wait.h
index c3ff74d..2408e8d5 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -248,6 +248,8 @@
 	(!__builtin_constant_p(state) ||				\
 		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
 
+extern void init_wait_entry(wait_queue_t *__wait, int flags);
+
 /*
  * The below macro ___wait_event() has an explicit shadow of the __ret
  * variable when used from the wait_event_*() macros.
@@ -266,12 +268,7 @@
 	wait_queue_t __wait;						\
 	long __ret = ret;	/* explicit shadow */			\
 									\
-	INIT_LIST_HEAD(&__wait.task_list);				\
-	if (exclusive)							\
-		__wait.flags = WQ_FLAG_EXCLUSIVE;			\
-	else								\
-		__wait.flags = 0;					\
-									\
+	init_wait_entry(&__wait, exclusive ? WQ_FLAG_EXCLUSIVE : 0);	\
 	for (;;) {							\
 		long __int = prepare_to_wait_event(&wq, &__wait, state);\
 									\
@@ -280,12 +277,7 @@
 									\
 		if (___wait_is_interruptible(state) && __int) {		\
 			__ret = __int;					\
-			if (exclusive) {				\
-				abort_exclusive_wait(&wq, &__wait,	\
-						     state, NULL);	\
-				goto __out;				\
-			}						\
-			break;						\
+			goto __out;					\
 		}							\
 									\
 		cmd;							\
@@ -989,7 +981,6 @@
 void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
 long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
 long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
 int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h
new file mode 100644
index 0000000..5656960
--- /dev/null
+++ b/include/linux/win_minmax.h
@@ -0,0 +1,37 @@
+/**
+ * lib/minmax.c: windowed min/max tracker by Kathleen Nichols.
+ *
+ */
+#ifndef MINMAX_H
+#define MINMAX_H
+
+#include <linux/types.h>
+
+/* A single data point for our parameterized min-max tracker */
+struct minmax_sample {
+	u32	t;	/* time measurement was taken */
+	u32	v;	/* value measured */
+};
+
+/* State for the parameterized min-max tracker */
+struct minmax {
+	struct minmax_sample s[3];
+};
+
+static inline u32 minmax_get(const struct minmax *m)
+{
+	return m->s[0].v;
+}
+
+static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas)
+{
+	struct minmax_sample val = { .t = t, .v = meas };
+
+	m->s[2] = m->s[1] = m->s[0] = val;
+	return m->s[0].v;
+}
+
+u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas);
+u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas);
+
+#endif
diff --git a/include/media/cec.h b/include/media/cec.h
index dc7854b..fdb5d60 100644
--- a/include/media/cec.h
+++ b/include/media/cec.h
@@ -57,8 +57,8 @@
 	int minor;
 	bool registered;
 	bool unregistered;
-	struct mutex fhs_lock;
 	struct list_head fhs;
+	struct mutex lock;
 };
 
 struct cec_adapter;
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 41e6a24..82f3c91 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -176,8 +176,8 @@
 int tcf_unregister_action(struct tc_action_ops *a,
 			  struct pernet_operations *ops);
 int tcf_action_destroy(struct list_head *actions, int bind);
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
-		    struct tcf_result *res);
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+		    int nr_actions, struct tcf_result *res);
 int tcf_action_init(struct net *net, struct nlattr *nla,
 				  struct nlattr *est, char *n, int ovr,
 				  int bind, struct list_head *);
@@ -189,30 +189,17 @@
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 
-#define tc_no_actions(_exts) \
-	(list_empty(&(_exts)->actions))
-
-#define tc_for_each_action(_a, _exts) \
-	list_for_each_entry(a, &(_exts)->actions, list)
-
-#define tc_single_action(_exts) \
-	(list_is_singular(&(_exts)->actions))
+#endif /* CONFIG_NET_CLS_ACT */
 
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
 					   u64 packets, u64 lastuse)
 {
+#ifdef CONFIG_NET_CLS_ACT
 	if (!a->ops->stats_update)
 		return;
 
 	a->ops->stats_update(a, bytes, packets, lastuse);
+#endif
 }
 
-#else /* CONFIG_NET_CLS_ACT */
-
-#define tc_no_actions(_exts) true
-#define tc_for_each_action(_a, _exts) while ((void)(_a), 0)
-#define tc_single_action(_exts) false
-#define tcf_action_stats_update(a, bytes, packets, lastuse)
-
-#endif /* CONFIG_NET_CLS_ACT */
 #endif
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 9826d3a..f2d0727 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -1,8 +1,9 @@
 #ifndef _ADDRCONF_H
 #define _ADDRCONF_H
 
-#define MAX_RTR_SOLICITATIONS		3
+#define MAX_RTR_SOLICITATIONS		-1		/* unlimited */
 #define RTR_SOLICITATION_INTERVAL	(4*HZ)
+#define RTR_SOLICITATION_MAX_INTERVAL	(3600*HZ)	/* 1 hour */
 
 #define MIN_VALID_LIFETIME		(2*3600)	/* 2 hours */
 
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index ac1bc3c..1061a47 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -12,42 +12,39 @@
 #ifndef _NET_RXRPC_H
 #define _NET_RXRPC_H
 
-#include <linux/skbuff.h>
 #include <linux/rxrpc.h>
 
+struct key;
+struct sock;
+struct socket;
 struct rxrpc_call;
 
-/*
- * the mark applied to socket buffers that may be intercepted
- */
-enum rxrpc_skb_mark {
-	RXRPC_SKB_MARK_DATA,		/* data message */
-	RXRPC_SKB_MARK_FINAL_ACK,	/* final ACK received message */
-	RXRPC_SKB_MARK_BUSY,		/* server busy message */
-	RXRPC_SKB_MARK_REMOTE_ABORT,	/* remote abort message */
-	RXRPC_SKB_MARK_LOCAL_ABORT,	/* local abort message */
-	RXRPC_SKB_MARK_NET_ERROR,	/* network error message */
-	RXRPC_SKB_MARK_LOCAL_ERROR,	/* local error message */
-	RXRPC_SKB_MARK_NEW_CALL,	/* local error message */
-};
+typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *,
+				  unsigned long);
+typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *,
+					unsigned long);
+typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long);
+typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long);
 
-typedef void (*rxrpc_interceptor_t)(struct sock *, unsigned long,
-				    struct sk_buff *);
-void rxrpc_kernel_intercept_rx_messages(struct socket *, rxrpc_interceptor_t);
+void rxrpc_kernel_new_call_notification(struct socket *,
+					rxrpc_notify_new_call_t,
+					rxrpc_discard_new_call_t);
 struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
 					   struct sockaddr_rxrpc *,
 					   struct key *,
 					   unsigned long,
-					   gfp_t);
-int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t);
-void rxrpc_kernel_abort_call(struct rxrpc_call *, u32);
-void rxrpc_kernel_end_call(struct rxrpc_call *);
-bool rxrpc_kernel_is_data_last(struct sk_buff *);
-u32 rxrpc_kernel_get_abort_code(struct sk_buff *);
-int rxrpc_kernel_get_error_number(struct sk_buff *);
-void rxrpc_kernel_data_delivered(struct sk_buff *);
-void rxrpc_kernel_free_skb(struct sk_buff *);
-struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long);
-int rxrpc_kernel_reject_call(struct socket *);
+					   gfp_t,
+					   rxrpc_notify_rx_t);
+int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
+			   struct msghdr *, size_t);
+int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
+			   void *, size_t, size_t *, bool, u32 *);
+void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
+			     u32, int, const char *);
+void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
+void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
+			   struct sockaddr_rxrpc *);
+int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
+			       rxrpc_user_attach_call_t, unsigned long, gfp_t);
 
 #endif /* _NET_RXRPC_H */
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 9b4c418..fd60ecc 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -52,7 +52,7 @@
 	struct sock		sk;
 	struct unix_address     *addr;
 	struct path		path;
-	struct mutex		readlock;
+	struct mutex		iolock, bindlock;
 	struct sock		*peer;
 	struct list_head	link;
 	atomic_long_t		inflight;
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index bfd1590..0a1e21d7 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -29,7 +29,8 @@
 #include <net/sock.h>
 #include <linux/seq_file.h>
 
-#define BT_SUBSYS_VERSION "2.21"
+#define BT_SUBSYS_VERSION	2
+#define BT_SUBSYS_REVISION	22
 
 #ifndef AF_BLUETOOTH
 #define AF_BLUETOOTH	31
@@ -371,6 +372,7 @@
 void hci_sock_clear_flag(struct sock *sk, int nr);
 int hci_sock_test_flag(struct sock *sk, int nr);
 unsigned short hci_sock_get_channel(struct sock *sk);
+u32 hci_sock_get_cookie(struct sock *sk);
 
 int hci_sock_init(void);
 void hci_sock_cleanup(void);
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 003b252..99aa5e5 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -63,6 +63,7 @@
 #define HCI_SDIO	6
 #define HCI_SPI		7
 #define HCI_I2C		8
+#define HCI_SMD		9
 
 /* HCI controller types */
 #define HCI_PRIMARY	0x00
@@ -207,7 +208,11 @@
 	HCI_MGMT_INDEX_EVENTS,
 	HCI_MGMT_UNCONF_INDEX_EVENTS,
 	HCI_MGMT_EXT_INDEX_EVENTS,
-	HCI_MGMT_GENERIC_EVENTS,
+	HCI_MGMT_EXT_INFO_EVENTS,
+	HCI_MGMT_OPTION_EVENTS,
+	HCI_MGMT_SETTING_EVENTS,
+	HCI_MGMT_DEV_CLASS_EVENTS,
+	HCI_MGMT_LOCAL_NAME_EVENTS,
 	HCI_MGMT_OOB_DATA_EVENTS,
 };
 
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ee7fc47..f00bf66 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -211,6 +211,7 @@
 	__u8		dev_name[HCI_MAX_NAME_LENGTH];
 	__u8		short_name[HCI_MAX_SHORT_NAME_LENGTH];
 	__u8		eir[HCI_MAX_EIR_LENGTH];
+	__u16		appearance;
 	__u8		dev_class[3];
 	__u8		major_class;
 	__u8		minor_class;
@@ -399,7 +400,9 @@
 	struct delayed_work	rpa_expired;
 	bdaddr_t		rpa;
 
+#if IS_ENABLED(CONFIG_BT_LEDS)
 	struct led_trigger	*power_led;
+#endif
 
 	int (*open)(struct hci_dev *hdev);
 	int (*close)(struct hci_dev *hdev);
@@ -1026,8 +1029,8 @@
 int hci_reset_dev(struct hci_dev *hdev);
 int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
 int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
-void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...);
-void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...);
+__printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...);
+__printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...);
 int hci_dev_open(__u16 dev);
 int hci_dev_close(__u16 dev);
 int hci_dev_do_close(struct hci_dev *hdev);
@@ -1404,6 +1407,9 @@
 void hci_send_to_channel(unsigned short channel, struct sk_buff *skb,
 			 int flag, struct sock *skip_sk);
 void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb);
+void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
+				 void *data, u16 data_len, ktime_t tstamp,
+				 int flag, struct sock *skip_sk);
 
 void hci_sock_dev_event(struct hci_dev *hdev, int event);
 
@@ -1449,6 +1455,7 @@
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
 #define DISCOV_LE_RESTART_DELAY		msecs_to_jiffies(200)	/* msec */
 
+void mgmt_fill_version_info(void *ver);
 int mgmt_new_settings(struct hci_dev *hdev);
 void mgmt_index_added(struct hci_dev *hdev);
 void mgmt_index_removed(struct hci_dev *hdev);
diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h
index 587d013..240786b 100644
--- a/include/net/bluetooth/hci_mon.h
+++ b/include/net/bluetooth/hci_mon.h
@@ -45,6 +45,10 @@
 #define HCI_MON_VENDOR_DIAG	11
 #define HCI_MON_SYSTEM_NOTE	12
 #define HCI_MON_USER_LOGGING	13
+#define HCI_MON_CTRL_OPEN	14
+#define HCI_MON_CTRL_CLOSE	15
+#define HCI_MON_CTRL_COMMAND	16
+#define HCI_MON_CTRL_EVENT	17
 
 struct hci_mon_new_index {
 	__u8		type;
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 7647964..72a456b 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -586,6 +586,24 @@
 
 #define MGMT_OP_START_LIMITED_DISCOVERY	0x0041
 
+#define MGMT_OP_READ_EXT_INFO		0x0042
+#define MGMT_READ_EXT_INFO_SIZE		0
+struct mgmt_rp_read_ext_info {
+	bdaddr_t bdaddr;
+	__u8     version;
+	__le16   manufacturer;
+	__le32   supported_settings;
+	__le32   current_settings;
+	__le16   eir_len;
+	__u8     eir[0];
+} __packed;
+
+#define MGMT_OP_SET_APPEARANCE		0x0043
+struct mgmt_cp_set_appearance {
+	__u16	appearance;
+} __packed;
+#define MGMT_SET_APPEARANCE_SIZE	2
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
@@ -800,3 +818,9 @@
 struct mgmt_ev_advertising_removed {
 	__u8    instance;
 } __packed;
+
+#define MGMT_EV_EXT_INFO_CHANGED	0x0025
+struct mgmt_ev_ext_info_changed {
+	__le16	eir_len;
+	__u8	eir[0];
+} __packed;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9c23f4d3..fe78f02 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5,7 +5,7 @@
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2015	Intel Deutschland GmbH
+ * Copyright 2015-2016	Intel Deutschland GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -593,6 +593,8 @@
 	s8 noise;
 };
 
+#define CFG80211_MAX_WEP_KEYS	4
+
 /**
  * struct cfg80211_crypto_settings - Crypto settings
  * @wpa_versions: indicates which, if any, WPA versions are enabled
@@ -610,6 +612,9 @@
  *	allowed through even on unauthorized ports
  * @control_port_no_encrypt: TRUE to prevent encryption of control port
  *	protocol frames.
+ * @wep_keys: static WEP keys, if not NULL points to an array of
+ *	CFG80211_MAX_WEP_KEYS WEP keys
+ * @wep_tx_key: key index (0..3) of the default TX static WEP key
  */
 struct cfg80211_crypto_settings {
 	u32 wpa_versions;
@@ -621,6 +626,8 @@
 	bool control_port;
 	__be16 control_port_ethertype;
 	bool control_port_no_encrypt;
+	struct key_params *wep_keys;
+	int wep_tx_key;
 };
 
 /**
@@ -676,6 +683,18 @@
 	struct mac_address mac_addrs[];
 };
 
+/*
+ * cfg80211_bitrate_mask - masks for bitrate control
+ */
+struct cfg80211_bitrate_mask {
+	struct {
+		u32 legacy;
+		u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
+		u16 vht_mcs[NL80211_VHT_NSS_MAX];
+		enum nl80211_txrate_gi gi;
+	} control[NUM_NL80211_BANDS];
+};
+
 /**
  * struct cfg80211_ap_settings - AP configuration
  *
@@ -700,6 +719,7 @@
  *	MAC address based access control
  * @pbss: If set, start as a PCP instead of AP. Relevant for DMG
  *	networks.
+ * @beacon_rate: bitrate to be used for beacons
  */
 struct cfg80211_ap_settings {
 	struct cfg80211_chan_def chandef;
@@ -719,6 +739,7 @@
 	bool p2p_opp_ps;
 	const struct cfg80211_acl_data *acl;
 	bool pbss;
+	struct cfg80211_bitrate_mask beacon_rate;
 };
 
 /**
@@ -1102,6 +1123,7 @@
 	struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
 };
 
+#if IS_ENABLED(CONFIG_CFG80211)
 /**
  * cfg80211_get_station - retrieve information about a given station
  * @dev: the device where the station is supposed to be connected to
@@ -1114,6 +1136,14 @@
  */
 int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr,
 			 struct station_info *sinfo);
+#else
+static inline int cfg80211_get_station(struct net_device *dev,
+				       const u8 *mac_addr,
+				       struct station_info *sinfo)
+{
+	return -ENOENT;
+}
+#endif
 
 /**
  * enum monitor_flags - monitor flags
@@ -1342,6 +1372,7 @@
  * @beacon_interval: beacon interval to use
  * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a]
  * @basic_rates: basic rates to use when creating the mesh
+ * @beacon_rate: bitrate to be used for beacons
  *
  * These parameters are fixed when the mesh is created.
  */
@@ -1362,6 +1393,7 @@
 	u16 beacon_interval;
 	int mcast_rate[NUM_NL80211_BANDS];
 	u32 basic_rates;
+	struct cfg80211_bitrate_mask beacon_rate;
 };
 
 /**
@@ -2001,17 +2033,6 @@
 	WIPHY_PARAM_DYN_ACK		= 1 << 5,
 };
 
-/*
- * cfg80211_bitrate_mask - masks for bitrate control
- */
-struct cfg80211_bitrate_mask {
-	struct {
-		u32 legacy;
-		u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
-		u16 vht_mcs[NL80211_VHT_NSS_MAX];
-		enum nl80211_txrate_gi gi;
-	} control[NUM_NL80211_BANDS];
-};
 /**
  * struct cfg80211_pmksa - PMK Security Association
  *
@@ -2293,6 +2314,98 @@
 };
 
 /**
+ * struct cfg80211_nan_conf - NAN configuration
+ *
+ * This struct defines NAN configuration parameters
+ *
+ * @master_pref: master preference (1 - 255)
+ * @dual: dual band operation mode, see &enum nl80211_nan_dual_band_conf
+ */
+struct cfg80211_nan_conf {
+	u8 master_pref;
+	u8 dual;
+};
+
+/**
+ * enum cfg80211_nan_conf_changes - indicates changed fields in NAN
+ * configuration
+ *
+ * @CFG80211_NAN_CONF_CHANGED_PREF: master preference
+ * @CFG80211_NAN_CONF_CHANGED_DUAL: dual band operation
+ */
+enum cfg80211_nan_conf_changes {
+	CFG80211_NAN_CONF_CHANGED_PREF = BIT(0),
+	CFG80211_NAN_CONF_CHANGED_DUAL = BIT(1),
+};
+
+/**
+ * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter
+ *
+ * @filter: the content of the filter
+ * @len: the length of the filter
+ */
+struct cfg80211_nan_func_filter {
+	const u8 *filter;
+	u8 len;
+};
+
+/**
+ * struct cfg80211_nan_func - a NAN function
+ *
+ * @type: &enum nl80211_nan_function_type
+ * @service_id: the service ID of the function
+ * @publish_type: &nl80211_nan_publish_type
+ * @close_range: if true, the range should be limited. Threshold is
+ *	implementation specific.
+ * @publish_bcast: if true, the solicited publish should be broadcasted
+ * @subscribe_active: if true, the subscribe is active
+ * @followup_id: the instance ID for follow up
+ * @followup_reqid: the requestor instance ID for follow up
+ * @followup_dest: MAC address of the recipient of the follow up
+ * @ttl: time to live counter in DW.
+ * @serv_spec_info: Service Specific Info
+ * @serv_spec_info_len: Service Specific Info length
+ * @srf_include: if true, SRF is inclusive
+ * @srf_bf: Bloom Filter
+ * @srf_bf_len: Bloom Filter length
+ * @srf_bf_idx: Bloom Filter index
+ * @srf_macs: SRF MAC addresses
+ * @srf_num_macs: number of MAC addresses in SRF
+ * @rx_filters: rx filters that are matched with corresponding peer's tx_filter
+ * @tx_filters: filters that should be transmitted in the SDF.
+ * @num_rx_filters: length of &rx_filters.
+ * @num_tx_filters: length of &tx_filters.
+ * @instance_id: driver allocated id of the function.
+ * @cookie: unique NAN function identifier.
+ */
+struct cfg80211_nan_func {
+	enum nl80211_nan_function_type type;
+	u8 service_id[NL80211_NAN_FUNC_SERVICE_ID_LEN];
+	u8 publish_type;
+	bool close_range;
+	bool publish_bcast;
+	bool subscribe_active;
+	u8 followup_id;
+	u8 followup_reqid;
+	struct mac_address followup_dest;
+	u32 ttl;
+	const u8 *serv_spec_info;
+	u8 serv_spec_info_len;
+	bool srf_include;
+	const u8 *srf_bf;
+	u8 srf_bf_len;
+	u8 srf_bf_idx;
+	struct mac_address *srf_macs;
+	int srf_num_macs;
+	struct cfg80211_nan_func_filter *rx_filters;
+	struct cfg80211_nan_func_filter *tx_filters;
+	u8 num_tx_filters;
+	u8 num_rx_filters;
+	u8 instance_id;
+	u64 cookie;
+};
+
+/**
  * struct cfg80211_ops - backend description for wireless configuration
  *
  * This struct is registered by fullmac card drivers and/or wireless stacks
@@ -2423,7 +2536,8 @@
  *	cases, the result of roaming is indicated with a call to
  *	cfg80211_roamed() or cfg80211_roamed_bss().
  *	(invoked with the wireless_dev mutex held)
- * @disconnect: Disconnect from the BSS/ESS.
+ * @disconnect: Disconnect from the BSS/ESS. Once done, call
+ *	cfg80211_disconnected().
  *	(invoked with the wireless_dev mutex held)
  *
  * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call
@@ -2579,6 +2693,19 @@
  *	and returning to the base channel for communication with the AP.
  * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both
  *	peers must be on the base channel when the call completes.
+ * @start_nan: Start the NAN interface.
+ * @stop_nan: Stop the NAN interface.
+ * @add_nan_func: Add a NAN function. Returns negative value on failure.
+ *	On success @nan_func ownership is transferred to the driver and
+ *	it may access it outside of the scope of this function. The driver
+ *	should free the @nan_func when no longer needed by calling
+ *	cfg80211_free_nan_func().
+ *	On success the driver should assign an instance_id in the
+ *	provided @nan_func.
+ * @del_nan_func: Delete a NAN function.
+ * @nan_change_conf: changes NAN configuration. The changed parameters must
+ *	be specified in @changes (using &enum cfg80211_nan_conf_changes);
+ *	All other parameters must be ignored.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2844,6 +2971,17 @@
 	void	(*tdls_cancel_channel_switch)(struct wiphy *wiphy,
 					      struct net_device *dev,
 					      const u8 *addr);
+	int	(*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev,
+			     struct cfg80211_nan_conf *conf);
+	void	(*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev);
+	int	(*add_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev,
+				struct cfg80211_nan_func *nan_func);
+	void	(*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev,
+			       u64 cookie);
+	int	(*nan_change_conf)(struct wiphy *wiphy,
+				   struct wireless_dev *wdev,
+				   struct cfg80211_nan_conf *conf,
+				   u32 changes);
 };
 
 /*
@@ -2890,6 +3028,8 @@
  * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels.
  * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in
  *	beaconing mode (AP, IBSS, Mesh, ...).
+ * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation
+ *	before connection.
  */
 enum wiphy_flags {
 	/* use hole at 0 */
@@ -2915,6 +3055,7 @@
 	WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL	= BIT(21),
 	WIPHY_FLAG_SUPPORTS_5_10_MHZ		= BIT(22),
 	WIPHY_FLAG_HAS_CHANNEL_SWITCH		= BIT(23),
+	WIPHY_FLAG_HAS_STATIC_WEP		= BIT(24),
 };
 
 /**
@@ -3292,6 +3433,8 @@
  * @bss_select_support: bitmask indicating the BSS selection criteria supported
  *	by the driver in the .connect() callback. The bit position maps to the
  *	attribute indices defined in &enum nl80211_bss_select_attr.
+ *
+ * @cookie_counter: unique generic cookie counter, used to identify objects.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -3421,6 +3564,8 @@
 
 	u32 bss_select_support;
 
+	u64 cookie_counter;
+
 	char priv[0] __aligned(NETDEV_ALIGN);
 };
 
@@ -3601,6 +3746,7 @@
  *	beacons, 0 when not valid
  * @address: The address for this device, valid only if @netdev is %NULL
  * @p2p_started: true if this is a P2P Device that has been started
+ * @nan_started: true if this is a NAN interface that has been started
  * @cac_started: true if DFS channel availability check has been started
  * @cac_start_time: timestamp (jiffies) when the dfs state was entered.
  * @cac_time_ms: CAC time in ms
@@ -3632,7 +3778,7 @@
 
 	struct mutex mtx;
 
-	bool use_4addr, p2p_started;
+	bool use_4addr, p2p_started, nan_started;
 
 	u8 address[ETH_ALEN] __aligned(sizeof(u16));
 
@@ -3946,6 +4092,34 @@
 				    struct cfg80211_qos_map *qos_map);
 
 /**
+ * cfg80211_find_ie_match - match information element and byte array in data
+ *
+ * @eid: element ID
+ * @ies: data consisting of IEs
+ * @len: length of data
+ * @match: byte array to match
+ * @match_len: number of bytes in the match array
+ * @match_offset: offset in the IE where the byte array should match.
+ *	If match_len is zero, this must also be set to zero.
+ *	Otherwise this must be set to 2 or more, because the first
+ *	byte is the element id, which is already compared to eid, and
+ *	the second byte is the IE length.
+ *
+ * Return: %NULL if the element ID could not be found or if
+ * the element is invalid (claims to be longer than the given
+ * data) or if the byte array doesn't match, or a pointer to the first
+ * byte of the requested element, that is the byte containing the
+ * element ID.
+ *
+ * Note: There are no checks on the element length other than
+ * having to fit into the given data and being large enough for the
+ * byte array to match.
+ */
+const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len,
+				 const u8 *match, int match_len,
+				 int match_offset);
+
+/**
  * cfg80211_find_ie - find information element in data
  *
  * @eid: element ID
@@ -3960,7 +4134,10 @@
  * Note: There are no checks on the element length other than
  * having to fit into the given data.
  */
-const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len);
+static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
+{
+	return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0);
+}
 
 /**
  * cfg80211_find_vendor_ie - find vendor specific information element in data
@@ -5509,6 +5686,67 @@
 	return (ft_byte & BIT(ftidx % 8)) != 0;
 }
 
+/**
+ * cfg80211_free_nan_func - free NAN function
+ * @f: NAN function that should be freed
+ *
+ * Frees all the NAN function and all it's allocated members.
+ */
+void cfg80211_free_nan_func(struct cfg80211_nan_func *f);
+
+/**
+ * struct cfg80211_nan_match_params - NAN match parameters
+ * @type: the type of the function that triggered a match. If it is
+ *	 %NL80211_NAN_FUNC_SUBSCRIBE it means that we replied to a subscriber.
+ *	 If it is %NL80211_NAN_FUNC_PUBLISH, it means that we got a discovery
+ *	 result.
+ *	 If it is %NL80211_NAN_FUNC_FOLLOW_UP, we received a follow up.
+ * @inst_id: the local instance id
+ * @peer_inst_id: the instance id of the peer's function
+ * @addr: the MAC address of the peer
+ * @info_len: the length of the &info
+ * @info: the Service Specific Info from the peer (if any)
+ * @cookie: unique identifier of the corresponding function
+ */
+struct cfg80211_nan_match_params {
+	enum nl80211_nan_function_type type;
+	u8 inst_id;
+	u8 peer_inst_id;
+	const u8 *addr;
+	u8 info_len;
+	const u8 *info;
+	u64 cookie;
+};
+
+/**
+ * cfg80211_nan_match - report a match for a NAN function.
+ * @wdev: the wireless device reporting the match
+ * @match: match notification parameters
+ * @gfp: allocation flags
+ *
+ * This function reports that the a NAN function had a match. This
+ * can be a subscribe that had a match or a solicited publish that
+ * was sent. It can also be a follow up that was received.
+ */
+void cfg80211_nan_match(struct wireless_dev *wdev,
+			struct cfg80211_nan_match_params *match, gfp_t gfp);
+
+/**
+ * cfg80211_nan_func_terminated - notify about NAN function termination.
+ *
+ * @wdev: the wireless device reporting the match
+ * @inst_id: the local instance id
+ * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*)
+ * @cookie: unique NAN function identifier
+ * @gfp: allocation flags
+ *
+ * This function reports that the a NAN function is terminated.
+ */
+void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
+				  u8 inst_id,
+				  enum nl80211_nan_func_term_reason reason,
+				  u64 cookie, gfp_t gfp);
+
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
 
diff --git a/include/net/devlink.h b/include/net/devlink.h
index c99ffe8..211bd3c 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -50,7 +50,6 @@
 };
 
 struct devlink_ops {
-	size_t priv_size;
 	int (*port_type_set)(struct devlink_port *devlink_port,
 			     enum devlink_port_type port_type);
 	int (*port_split)(struct devlink *devlink, unsigned int port_index,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 2217a3f..b122196 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -26,6 +26,7 @@
 	DSA_TAG_PROTO_TRAILER,
 	DSA_TAG_PROTO_EDSA,
 	DSA_TAG_PROTO_BRCM,
+	DSA_TAG_PROTO_QCA,
 	DSA_TAG_LAST,		/* MUST BE LAST */
 };
 
@@ -142,6 +143,7 @@
 	struct net_device	*netdev;
 	struct device_node	*dn;
 	unsigned int		ageing_time;
+	u8			stp_state;
 };
 
 struct dsa_switch {
@@ -165,9 +167,9 @@
 	struct dsa_chip_data	*cd;
 
 	/*
-	 * The used switch driver.
+	 * The switch operations.
 	 */
-	struct dsa_switch_driver	*drv;
+	struct dsa_switch_ops	*ops;
 
 	/*
 	 * An array of which element [a] indicates which port on this
@@ -234,19 +236,21 @@
 struct switchdev_trans;
 struct switchdev_obj;
 struct switchdev_obj_port_fdb;
+struct switchdev_obj_port_mdb;
 struct switchdev_obj_port_vlan;
 
-struct dsa_switch_driver {
+struct dsa_switch_ops {
 	struct list_head	list;
 
-	enum dsa_tag_protocol	tag_protocol;
-
 	/*
 	 * Probing and setup.
 	 */
 	const char	*(*probe)(struct device *dsa_dev,
 				  struct device *host_dev, int sw_addr,
 				  void **priv);
+
+	enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds);
+
 	int	(*setup)(struct dsa_switch *ds);
 	int	(*set_addr)(struct dsa_switch *ds, u8 *addr);
 	u32	(*get_phy_flags)(struct dsa_switch *ds, int port);
@@ -336,6 +340,7 @@
 	void	(*port_bridge_leave)(struct dsa_switch *ds, int port);
 	void	(*port_stp_state_set)(struct dsa_switch *ds, int port,
 				      u8 state);
+	void	(*port_fast_age)(struct dsa_switch *ds, int port);
 
 	/*
 	 * VLAN support
@@ -368,17 +373,27 @@
 	int	(*port_fdb_dump)(struct dsa_switch *ds, int port,
 				 struct switchdev_obj_port_fdb *fdb,
 				 int (*cb)(struct switchdev_obj *obj));
+
+	/*
+	 * Multicast database
+	 */
+	int	(*port_mdb_prepare)(struct dsa_switch *ds, int port,
+				    const struct switchdev_obj_port_mdb *mdb,
+				    struct switchdev_trans *trans);
+	void	(*port_mdb_add)(struct dsa_switch *ds, int port,
+				const struct switchdev_obj_port_mdb *mdb,
+				struct switchdev_trans *trans);
+	int	(*port_mdb_del)(struct dsa_switch *ds, int port,
+				const struct switchdev_obj_port_mdb *mdb);
+	int	(*port_mdb_dump)(struct dsa_switch *ds, int port,
+				 struct switchdev_obj_port_mdb *mdb,
+				 int (*cb)(struct switchdev_obj *obj));
 };
 
-void register_switch_driver(struct dsa_switch_driver *type);
-void unregister_switch_driver(struct dsa_switch_driver *type);
+void register_switch_driver(struct dsa_switch_ops *type);
+void unregister_switch_driver(struct dsa_switch_ops *type);
 struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev);
 
-static inline void *ds_to_priv(struct dsa_switch *ds)
-{
-	return ds->priv;
-}
-
 static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 {
 	return dst->rcv != NULL;
@@ -386,4 +401,18 @@
 
 void dsa_unregister_switch(struct dsa_switch *ds);
 int dsa_register_switch(struct dsa_switch *ds, struct device_node *np);
+#ifdef CONFIG_PM_SLEEP
+int dsa_switch_suspend(struct dsa_switch *ds);
+int dsa_switch_resume(struct dsa_switch *ds);
+#else
+static inline int dsa_switch_suspend(struct dsa_switch *ds)
+{
+	return 0;
+}
+static inline int dsa_switch_resume(struct dsa_switch *ds)
+{
+	return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
 #endif
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 5db9f59..6965c8f 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -112,12 +112,13 @@
 	return &dst->u.tun_info;
 }
 
-static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
-						 __be16 flags,
-						 __be64 tunnel_id,
-						 int md_size)
+static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr,
+						    __be32 daddr,
+						    __u8 tos, __u8 ttl,
+						    __be16 flags,
+						    __be64 tunnel_id,
+						    int md_size)
 {
-	const struct iphdr *iph = ip_hdr(skb);
 	struct metadata_dst *tun_dst;
 
 	tun_dst = tun_rx_dst(md_size);
@@ -125,17 +126,30 @@
 		return NULL;
 
 	ip_tunnel_key_init(&tun_dst->u.tun_info.key,
-			   iph->saddr, iph->daddr, iph->tos, iph->ttl,
+			   saddr, daddr, tos, ttl,
 			   0, 0, 0, tunnel_id, flags);
 	return tun_dst;
 }
 
-static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
+static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
 						 __be16 flags,
 						 __be64 tunnel_id,
 						 int md_size)
 {
-	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	const struct iphdr *iph = ip_hdr(skb);
+
+	return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
+				flags, tunnel_id, md_size);
+}
+
+static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr,
+						      const struct in6_addr *daddr,
+						      __u8 tos, __u8 ttl,
+						      __be32 label,
+						      __be16 flags,
+						      __be64 tunnel_id,
+						      int md_size)
+{
 	struct metadata_dst *tun_dst;
 	struct ip_tunnel_info *info;
 
@@ -150,14 +164,26 @@
 	info->key.tp_src = 0;
 	info->key.tp_dst = 0;
 
-	info->key.u.ipv6.src = ip6h->saddr;
-	info->key.u.ipv6.dst = ip6h->daddr;
+	info->key.u.ipv6.src = *saddr;
+	info->key.u.ipv6.dst = *daddr;
 
-	info->key.tos = ipv6_get_dsfield(ip6h);
-	info->key.ttl = ip6h->hop_limit;
-	info->key.label = ip6_flowlabel(ip6h);
+	info->key.tos = tos;
+	info->key.ttl = ttl;
+	info->key.label = label;
 
 	return tun_dst;
 }
 
+static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb,
+						   __be16 flags,
+						   __be64 tunnel_id,
+						   int md_size)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+
+	return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr,
+				  ipv6_get_dsfield(ip6h), ip6h->hop_limit,
+				  ip6_flowlabel(ip6h), flags, tunnel_id,
+				  md_size);
+}
 #endif /* __NET_DST_METADATA_H */
diff --git a/include/net/flow.h b/include/net/flow.h
index d47ef4b..035aa77 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -34,8 +34,7 @@
 	__u8	flowic_flags;
 #define FLOWI_FLAG_ANYSRC		0x01
 #define FLOWI_FLAG_KNOWN_NH		0x02
-#define FLOWI_FLAG_L3MDEV_SRC		0x04
-#define FLOWI_FLAG_SKIP_NH_OIF		0x08
+#define FLOWI_FLAG_SKIP_NH_OIF		0x04
 	__u32	flowic_secid;
 	struct flowi_tunnel flowic_tun_key;
 };
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index d3d60dc..d953492 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -32,8 +32,13 @@
 };
 
 struct flow_dissector_key_tags {
-	u32	vlan_id:12,
-		flow_label:20;
+	u32	flow_label;
+};
+
+struct flow_dissector_key_vlan {
+	u16	vlan_id:12,
+		vlan_priority:3;
+	u16	padding;
 };
 
 struct flow_dissector_key_keyid {
@@ -119,7 +124,7 @@
 	FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */
 	FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */
 	FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */
-	FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */
+	FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */
 	FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */
 	FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */
 	FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */
@@ -148,6 +153,7 @@
 #define FLOW_KEYS_HASH_START_FIELD basic
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_tags tags;
+	struct flow_dissector_key_vlan vlan;
 	struct flow_dissector_key_keyid keyid;
 	struct flow_dissector_key_ports ports;
 	struct flow_dissector_key_addrs addrs;
@@ -177,7 +183,7 @@
 void make_flow_keys_digest(struct flow_keys_digest *digest,
 			   const struct flow_keys *flow);
 
-static inline bool flow_keys_have_l4(struct flow_keys *keys)
+static inline bool flow_keys_have_l4(const struct flow_keys *keys)
 {
 	return (keys->ports.ports || keys->tags.flow_label);
 }
diff --git a/include/net/fq.h b/include/net/fq.h
index 268b490..6d8521a 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -72,9 +72,12 @@
 	u32 flows_cnt;
 	u32 perturbation;
 	u32 limit;
+	u32 memory_limit;
+	u32 memory_usage;
 	u32 quantum;
 	u32 backlog;
 	u32 overlimit;
+	u32 overmemory;
 	u32 collisions;
 };
 
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index 163f3ed..4e6131c 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -29,6 +29,7 @@
 	tin->backlog_packets--;
 	flow->backlog -= skb->len;
 	fq->backlog--;
+	fq->memory_usage -= skb->truesize;
 
 	if (flow->backlog == 0) {
 		list_del_init(&flow->backlogchain);
@@ -154,6 +155,7 @@
 	flow->backlog += skb->len;
 	tin->backlog_bytes += skb->len;
 	tin->backlog_packets++;
+	fq->memory_usage += skb->truesize;
 	fq->backlog++;
 
 	fq_recalc_backlog(fq, tin, flow);
@@ -166,7 +168,7 @@
 
 	__skb_queue_tail(&flow->queue, skb);
 
-	if (fq->backlog > fq->limit) {
+	if (fq->backlog > fq->limit || fq->memory_usage > fq->memory_limit) {
 		flow = list_first_entry_or_null(&fq->backlogs,
 						struct fq_flow,
 						backlogchain);
@@ -181,6 +183,8 @@
 
 		flow->tin->overlimit++;
 		fq->overlimit++;
+		if (fq->memory_usage > fq->memory_limit)
+			fq->overmemory++;
 	}
 }
 
@@ -251,6 +255,7 @@
 	fq->perturbation = prandom_u32();
 	fq->quantum = 300;
 	fq->limit = 8192;
+	fq->memory_limit = 16 << 20; /* 16 MBytes */
 
 	fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
 	if (!fq->flows)
diff --git a/include/net/gre.h b/include/net/gre.h
index 7a54a31..d25d836 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -7,7 +7,15 @@
 struct gre_base_hdr {
 	__be16 flags;
 	__be16 protocol;
-};
+} __packed;
+
+struct gre_full_hdr {
+	struct gre_base_hdr fixed_header;
+	__be16 csum;
+	__be16 reserved1;
+	__be32 key;
+	__be32 seq;
+} __packed;
 #define GRE_HEADER_SECTION 4
 
 #define GREPROTO_CISCO		0
@@ -104,6 +112,7 @@
 
 	skb_push(skb, hdr_len);
 
+	skb_set_inner_protocol(skb, proto);
 	skb_reset_transport_header(skb);
 	greh = (struct gre_base_hdr *)skb->data;
 	greh->flags = gre_tnl_flags_to_gre_flags(flags);
diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h
index b0fd947..ba07b9d 100644
--- a/include/net/ieee80211_radiotap.h
+++ b/include/net/ieee80211_radiotap.h
@@ -190,6 +190,10 @@
  * IEEE80211_RADIOTAP_VHT	u16, u8, u8, u8[4], u8, u8, u16
  *
  *	Contains VHT information about this frame.
+ *
+ * IEEE80211_RADIOTAP_TIMESTAMP		u64, u16, u8, u8	variable
+ *
+ *	Contains timestamp information for this frame.
  */
 enum ieee80211_radiotap_type {
 	IEEE80211_RADIOTAP_TSFT = 0,
@@ -214,6 +218,7 @@
 	IEEE80211_RADIOTAP_MCS = 19,
 	IEEE80211_RADIOTAP_AMPDU_STATUS = 20,
 	IEEE80211_RADIOTAP_VHT = 21,
+	IEEE80211_RADIOTAP_TIMESTAMP = 22,
 
 	/* valid in every it_present bitmap, even vendor namespaces */
 	IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29,
@@ -321,6 +326,22 @@
 #define IEEE80211_RADIOTAP_CODING_LDPC_USER2			0x04
 #define IEEE80211_RADIOTAP_CODING_LDPC_USER3			0x08
 
+/* For IEEE80211_RADIOTAP_TIMESTAMP */
+#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK			0x000F
+#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS			0x0000
+#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US			0x0001
+#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS			0x0003
+#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK			0x00F0
+#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU		0x0000
+#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU		0x0010
+#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU		0x0020
+#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ		0x0030
+#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN		0x00F0
+
+#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT			0x00
+#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT			0x01
+#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY		0x02
+
 /* helpers */
 static inline int ieee80211_get_radiotap_len(unsigned char *data)
 {
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 1c8b682..515352c 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -201,6 +201,7 @@
 	struct ipv6_devstat	stats;
 
 	struct timer_list	rs_timer;
+	__s32			rs_interval;	/* in jiffies */
 	__u8			rs_probes;
 
 	__u8			addr_gen_mode;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 49dcad4..197a30d 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -134,8 +134,8 @@
 	} icsk_mtup;
 	u32			  icsk_user_timeout;
 
-	u64			  icsk_ca_priv[64 / sizeof(u64)];
-#define ICSK_CA_PRIV_SIZE      (8 * sizeof(u64))
+	u64			  icsk_ca_priv[88 / sizeof(u64)];
+#define ICSK_CA_PRIV_SIZE      (11 * sizeof(u64))
 };
 
 #define ICSK_TIME_RETRANS	1	/* Retransmit timer */
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index 0dc0a51..dce2d58 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -128,7 +128,8 @@
 	to = from | htonl(INET_ECN_CE << 20);
 	*(__be32 *)iph = to;
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
-		skb->csum = csum_add(csum_sub(skb->csum, from), to);
+		skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+				     (__force __wsum)to);
 	return 1;
 }
 
diff --git a/include/net/ip.h b/include/net/ip.h
index 9742b92..bc43c0f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -219,6 +219,29 @@
 }
 #endif
 
+#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \
+{ \
+	int i, c; \
+	for_each_possible_cpu(c) { \
+		for (i = 0; stats_list[i].name; i++) \
+			buff64[i] += snmp_get_cpu_field64( \
+					mib_statistic, \
+					c, stats_list[i].entry, \
+					offset); \
+	} \
+}
+
+#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \
+{ \
+	int i, c; \
+	for_each_possible_cpu(c) { \
+		for (i = 0; stats_list[i].name; i++) \
+			buff[i] += snmp_get_cpu_field( \
+						mib_statistic, \
+						c, stats_list[i].entry); \
+	} \
+}
+
 void inet_get_local_port_range(struct net *net, int *low, int *high);
 
 #ifdef CONFIG_SYSCTL
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index d97305d..e0cd318 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -64,6 +64,9 @@
 }
 
 void ip6_route_input(struct sk_buff *skb);
+struct dst_entry *ip6_route_input_lookup(struct net *net,
+					 struct net_device *dev,
+					 struct flowi6 *fl6, int flags);
 
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 					 struct flowi6 *fl6, int flags);
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 43a5a0e..20ed969 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -23,6 +23,7 @@
 	__u8 proto;		/* tunnel protocol */
 	__u8 encap_limit;	/* encapsulation limit for tunnel */
 	__u8 hop_limit;		/* hop limit for tunnel */
+	bool collect_md;
 	__be32 flowinfo;	/* traffic class and flowlabel for tunnel */
 	__u32 flags;		/* tunnel flags */
 	struct in6_addr laddr;	/* local tunnel end-point address */
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 4079fc1..b9314b4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -22,6 +22,7 @@
 #include <net/fib_rules.h>
 #include <net/inetpeer.h>
 #include <linux/percpu.h>
+#include <linux/notifier.h>
 
 struct fib_config {
 	u8			fc_dst_len;
@@ -111,6 +112,7 @@
 	unsigned char		fib_scope;
 	unsigned char		fib_type;
 	__be32			fib_prefsrc;
+	u32			fib_tb_id;
 	u32			fib_priority;
 	u32			*fib_metrics;
 #define fib_mtu fib_metrics[RTAX_MTU-1]
@@ -121,6 +123,7 @@
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	int			fib_weight;
 #endif
+	unsigned int		fib_offload_cnt;
 	struct rcu_head		rcu;
 	struct fib_nh		fib_nh[0];
 #define fib_dev		fib_nh[0].nh_dev
@@ -172,6 +175,18 @@
 
 __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
 
+static inline void fib_info_offload_inc(struct fib_info *fi)
+{
+	fi->fib_offload_cnt++;
+	fi->fib_flags |= RTNH_F_OFFLOAD;
+}
+
+static inline void fib_info_offload_dec(struct fib_info *fi)
+{
+	if (--fi->fib_offload_cnt == 0)
+		fi->fib_flags &= ~RTNH_F_OFFLOAD;
+}
+
 #define FIB_RES_SADDR(net, res)				\
 	((FIB_RES_NH(res).nh_saddr_genid ==		\
 	  atomic_read(&(net)->ipv4.dev_addr_genid)) ?	\
@@ -184,6 +199,33 @@
 #define FIB_RES_PREFSRC(net, res)	((res).fi->fib_prefsrc ? : \
 					 FIB_RES_SADDR(net, res))
 
+struct fib_notifier_info {
+	struct net *net;
+};
+
+struct fib_entry_notifier_info {
+	struct fib_notifier_info info; /* must be first */
+	u32 dst;
+	int dst_len;
+	struct fib_info *fi;
+	u8 tos;
+	u8 type;
+	u32 tb_id;
+	u32 nlflags;
+};
+
+enum fib_event_type {
+	FIB_EVENT_ENTRY_ADD,
+	FIB_EVENT_ENTRY_DEL,
+	FIB_EVENT_RULE_ADD,
+	FIB_EVENT_RULE_DEL,
+};
+
+int register_fib_notifier(struct notifier_block *nb);
+int unregister_fib_notifier(struct notifier_block *nb);
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info);
+
 struct fib_table {
 	struct hlist_node	tb_hlist;
 	u32			tb_id;
@@ -195,13 +237,12 @@
 
 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		     struct fib_result *res, int fib_flags);
-int fib_table_insert(struct fib_table *, struct fib_config *);
-int fib_table_delete(struct fib_table *, struct fib_config *);
+int fib_table_insert(struct net *, struct fib_table *, struct fib_config *);
+int fib_table_delete(struct net *, struct fib_table *, struct fib_config *);
 int fib_table_dump(struct fib_table *table, struct sk_buff *skb,
 		   struct netlink_callback *cb);
-int fib_table_flush(struct fib_table *table);
+int fib_table_flush(struct net *net, struct fib_table *table);
 struct fib_table *fib_trie_unmerge(struct fib_table *main_tb);
-void fib_table_flush_external(struct fib_table *table);
 void fib_free_table(struct fib_table *tb);
 
 #ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -314,12 +355,11 @@
 }
 #endif
 int fib_unmerge(struct net *net);
-void fib_flush_external(struct net *net);
 
 /* Exported by fib_semantics.c */
 int ip_fib_check_default(__be32 gw, struct net_device *dev);
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
-int fib_sync_down_addr(struct net *net, __be32 local);
+int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 
 extern u32 fib_multipath_secret __read_mostly;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index a5e7035..59557c0 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -222,6 +222,25 @@
 	return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
 }
 
+static inline __be64 key32_to_tunnel_id(__be32 key)
+{
+#ifdef __BIG_ENDIAN
+	return (__force __be64)key;
+#else
+	return (__force __be64)((__force u64)key << 32);
+#endif
+}
+
+/* Returns the least-significant 32 bits of a __be64. */
+static inline __be32 tunnel_id_to_key32(__be64 tun_id)
+{
+#ifdef __BIG_ENDIAN
+	return (__force __be32)tun_id;
+#else
+	return (__force __be32)((__force u64)tun_id >> 32);
+#endif
+}
+
 #ifdef CONFIG_INET
 
 int ip_tunnel_init(struct net_device *dev);
@@ -236,6 +255,8 @@
 
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, const u8 protocol);
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+		       const u8 proto);
 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
diff --git a/include/net/kcm.h b/include/net/kcm.h
index 2840b58..2a89658 100644
--- a/include/net/kcm.h
+++ b/include/net/kcm.h
@@ -13,6 +13,7 @@
 
 #include <linux/skbuff.h>
 #include <net/sock.h>
+#include <net/strparser.h>
 #include <uapi/linux/kcm.h>
 
 extern unsigned int kcm_net_id;
@@ -21,16 +22,8 @@
 #define KCM_STATS_INCR(stat) ((stat)++)
 
 struct kcm_psock_stats {
-	unsigned long long rx_msgs;
-	unsigned long long rx_bytes;
 	unsigned long long tx_msgs;
 	unsigned long long tx_bytes;
-	unsigned int rx_aborts;
-	unsigned int rx_mem_fail;
-	unsigned int rx_need_more_hdr;
-	unsigned int rx_msg_too_big;
-	unsigned int rx_msg_timeouts;
-	unsigned int rx_bad_hdr_len;
 	unsigned long long reserved;
 	unsigned long long unreserved;
 	unsigned int tx_aborts;
@@ -64,13 +57,6 @@
 	struct sk_buff *last_skb;
 };
 
-struct kcm_rx_msg {
-	int full_len;
-	int accum_len;
-	int offset;
-	int early_eaten;
-};
-
 /* Socket structure for KCM client sockets */
 struct kcm_sock {
 	struct sock sk;
@@ -87,6 +73,7 @@
 	struct work_struct tx_work;
 	struct list_head wait_psock_list;
 	struct sk_buff *seq_skb;
+	u32 tx_stopped : 1;
 
 	/* Don't use bit fields here, these are set under different locks */
 	bool tx_wait;
@@ -104,11 +91,11 @@
 /* Structure for an attached lower socket */
 struct kcm_psock {
 	struct sock *sk;
+	struct strparser strp;
 	struct kcm_mux *mux;
 	int index;
 
 	u32 tx_stopped : 1;
-	u32 rx_stopped : 1;
 	u32 done : 1;
 	u32 unattaching : 1;
 
@@ -121,18 +108,12 @@
 	struct kcm_psock_stats stats;
 
 	/* Receive */
-	struct sk_buff *rx_skb_head;
-	struct sk_buff **rx_skb_nextp;
-	struct sk_buff *ready_rx_msg;
 	struct list_head psock_ready_list;
-	struct work_struct rx_work;
-	struct delayed_work rx_delayed_work;
 	struct bpf_prog *bpf_prog;
 	struct kcm_sock *rx_kcm;
 	unsigned long long saved_rx_bytes;
 	unsigned long long saved_rx_msgs;
-	struct timer_list rx_msg_timer;
-	unsigned int rx_need_bytes;
+	struct sk_buff *ready_rx_msg;
 
 	/* Transmit */
 	struct kcm_sock *tx_kcm;
@@ -146,6 +127,7 @@
 	struct mutex mutex;
 	struct kcm_psock_stats aggregate_psock_stats;
 	struct kcm_mux_stats aggregate_mux_stats;
+	struct strp_aggr_stats aggregate_strp_stats;
 	struct list_head mux_list;
 	int count;
 };
@@ -163,6 +145,7 @@
 
 	struct kcm_mux_stats stats;
 	struct kcm_psock_stats aggregate_psock_stats;
+	struct strp_aggr_stats aggregate_strp_stats;
 
 	/* Receive */
 	spinlock_t rx_lock ____cacheline_aligned_in_smp;
@@ -190,14 +173,6 @@
 	/* Save psock statistics in the mux when psock is being unattached. */
 
 #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
-	SAVE_PSOCK_STATS(rx_msgs);
-	SAVE_PSOCK_STATS(rx_bytes);
-	SAVE_PSOCK_STATS(rx_aborts);
-	SAVE_PSOCK_STATS(rx_mem_fail);
-	SAVE_PSOCK_STATS(rx_need_more_hdr);
-	SAVE_PSOCK_STATS(rx_msg_too_big);
-	SAVE_PSOCK_STATS(rx_msg_timeouts);
-	SAVE_PSOCK_STATS(rx_bad_hdr_len);
 	SAVE_PSOCK_STATS(tx_msgs);
 	SAVE_PSOCK_STATS(tx_bytes);
 	SAVE_PSOCK_STATS(reserved);
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index e900950..b220dab 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -11,6 +11,7 @@
 #ifndef _NET_L3MDEV_H_
 #define _NET_L3MDEV_H_
 
+#include <net/dst.h>
 #include <net/fib_rules.h>
 
 /**
@@ -18,30 +19,24 @@
  *
  * @l3mdev_fib_table: Get FIB table id to use for lookups
  *
- * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
+ * @l3mdev_l3_rcv:    Hook in L3 receive path
  *
- * @l3mdev_get_saddr: Get source address for a flow
+ * @l3mdev_l3_out:    Hook in L3 output path
  *
- * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device
+ * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations
  */
 
 struct l3mdev_ops {
 	u32		(*l3mdev_fib_table)(const struct net_device *dev);
 	struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev,
 					  struct sk_buff *skb, u16 proto);
-
-	/* IPv4 ops */
-	struct rtable *	(*l3mdev_get_rtable)(const struct net_device *dev,
-					     const struct flowi4 *fl4);
-	int		(*l3mdev_get_saddr)(struct net_device *dev,
-					    struct flowi4 *fl4);
+	struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev,
+					  struct sock *sk, struct sk_buff *skb,
+					  u16 proto);
 
 	/* IPv6 ops */
-	struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev,
+	struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev,
 						 struct flowi6 *fl6);
-	int		   (*l3mdev_get_saddr6)(struct net_device *dev,
-						const struct sock *sk,
-						struct flowi6 *fl6);
 };
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
@@ -49,6 +44,8 @@
 int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
 			  struct fib_lookup_arg *arg);
 
+void l3mdev_update_flow(struct net *net, struct flowi *fl);
+
 int l3mdev_master_ifindex_rcu(const struct net_device *dev);
 static inline int l3mdev_master_ifindex(struct net_device *dev)
 {
@@ -80,7 +77,7 @@
 }
 
 static inline
-const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
+struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev)
 {
 	/* netdev_master_upper_dev_get_rcu calls
 	 * list_first_or_null_rcu to walk the upper dev list.
@@ -89,7 +86,7 @@
 	 * typecast to remove the const
 	 */
 	struct net_device *dev = (struct net_device *)_dev;
-	const struct net_device *master;
+	struct net_device *master;
 
 	if (!dev)
 		return NULL;
@@ -104,26 +101,6 @@
 	return master;
 }
 
-/* get index of an interface to use for FIB lookups. For devices
- * enslaved to an L3 master device FIB lookups are based on the
- * master index
- */
-static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
-{
-	return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
-}
-
-static inline int l3mdev_fib_oif(struct net_device *dev)
-{
-	int oif;
-
-	rcu_read_lock();
-	oif = l3mdev_fib_oif_rcu(dev);
-	rcu_read_unlock();
-
-	return oif;
-}
-
 u32 l3mdev_fib_table_rcu(const struct net_device *dev);
 u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
 static inline u32 l3mdev_fib_table(const struct net_device *dev)
@@ -137,39 +114,7 @@
 	return tb_id;
 }
 
-static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
-					       const struct flowi4 *fl4)
-{
-	if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
-		return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);
-
-	return NULL;
-}
-
-static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
-{
-	struct net_device *dev;
-	bool rc = false;
-
-	if (ifindex == 0)
-		return false;
-
-	rcu_read_lock();
-
-	dev = dev_get_by_index_rcu(net, ifindex);
-	if (dev)
-		rc = netif_is_l3_master(dev);
-
-	rcu_read_unlock();
-
-	return rc;
-}
-
-int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4);
-
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6);
-int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
-		      struct flowi6 *fl6);
+struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6);
 
 static inline
 struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto)
@@ -199,6 +144,34 @@
 	return l3mdev_l3_rcv(skb, AF_INET6);
 }
 
+static inline
+struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto)
+{
+	struct net_device *dev = skb_dst(skb)->dev;
+
+	if (netif_is_l3_slave(dev)) {
+		struct net_device *master;
+
+		master = netdev_master_upper_dev_get_rcu(dev);
+		if (master && master->l3mdev_ops->l3mdev_l3_out)
+			skb = master->l3mdev_ops->l3mdev_l3_out(master, sk,
+								skb, proto);
+	}
+
+	return skb;
+}
+
+static inline
+struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
+{
+	return l3mdev_l3_out(sk, skb, AF_INET);
+}
+
+static inline
+struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
+{
+	return l3mdev_l3_out(sk, skb, AF_INET6);
+}
 #else
 
 static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev)
@@ -216,20 +189,11 @@
 }
 
 static inline
-const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
+struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev)
 {
 	return NULL;
 }
 
-static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
-{
-	return dev ? dev->ifindex : 0;
-}
-static inline int l3mdev_fib_oif(struct net_device *dev)
-{
-	return dev ? dev->ifindex : 0;
-}
-
 static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
 {
 	return 0;
@@ -243,35 +207,12 @@
 	return 0;
 }
 
-static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
-					       const struct flowi4 *fl4)
-{
-	return NULL;
-}
-
-static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
-{
-	return false;
-}
-
-static inline int l3mdev_get_saddr(struct net *net, int ifindex,
-				   struct flowi4 *fl4)
-{
-	return 0;
-}
-
 static inline
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6)
+struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6)
 {
 	return NULL;
 }
 
-static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
-				    struct flowi6 *fl6)
-{
-	return 0;
-}
-
 static inline
 struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb)
 {
@@ -285,11 +226,27 @@
 }
 
 static inline
+struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb)
+{
+	return skb;
+}
+
+static inline
+struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
+{
+	return skb;
+}
+
+static inline
 int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
 			  struct fib_lookup_arg *arg)
 {
 	return 1;
 }
+static inline
+void l3mdev_update_flow(struct net *net, struct flowi *fl)
+{
+}
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h
index e9f116e..ea3f80f 100644
--- a/include/net/lwtunnel.h
+++ b/include/net/lwtunnel.h
@@ -13,6 +13,13 @@
 /* lw tunnel state flags */
 #define LWTUNNEL_STATE_OUTPUT_REDIRECT	BIT(0)
 #define LWTUNNEL_STATE_INPUT_REDIRECT	BIT(1)
+#define LWTUNNEL_STATE_XMIT_REDIRECT	BIT(2)
+
+enum {
+	LWTUNNEL_XMIT_DONE,
+	LWTUNNEL_XMIT_CONTINUE,
+};
+
 
 struct lwtunnel_state {
 	__u16		type;
@@ -21,6 +28,7 @@
 	int		(*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb);
 	int		(*orig_input)(struct sk_buff *);
 	int             len;
+	__u16		headroom;
 	__u8            data[0];
 };
 
@@ -34,6 +42,7 @@
 			  struct lwtunnel_state *lwtstate);
 	int (*get_encap_size)(struct lwtunnel_state *lwtstate);
 	int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
+	int (*xmit)(struct sk_buff *skb);
 };
 
 #ifdef CONFIG_LWTUNNEL
@@ -75,6 +84,24 @@
 
 	return false;
 }
+
+static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
+{
+	if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT))
+		return true;
+
+	return false;
+}
+
+static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
+					     unsigned int mtu)
+{
+	if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu)
+		return lwtstate->headroom;
+
+	return 0;
+}
+
 int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
 			   unsigned int num);
 int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
@@ -90,6 +117,7 @@
 int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
 int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int lwtunnel_input(struct sk_buff *skb);
+int lwtunnel_xmit(struct sk_buff *skb);
 
 #else
 
@@ -117,6 +145,17 @@
 	return false;
 }
 
+static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
+{
+	return false;
+}
+
+static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
+					     unsigned int mtu)
+{
+	return 0;
+}
+
 static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
 					 unsigned int num)
 {
@@ -170,6 +209,11 @@
 	return -EOPNOTSUPP;
 }
 
+static inline int lwtunnel_xmit(struct sk_buff *skb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* CONFIG_LWTUNNEL */
 
 #define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type))
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b4faadb..a810dfc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -715,6 +715,7 @@
  *	frame (PS-Poll or uAPSD).
  * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information
  * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
+ * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path
  *
  * These flags are used in tx_info->control.flags.
  */
@@ -723,6 +724,7 @@
 	IEEE80211_TX_CTRL_PS_RESPONSE		= BIT(1),
 	IEEE80211_TX_CTRL_RATE_INJECT		= BIT(2),
 	IEEE80211_TX_CTRL_AMSDU			= BIT(3),
+	IEEE80211_TX_CTRL_FAST_XMIT		= BIT(4),
 };
 
 /*
@@ -1735,6 +1737,9 @@
  * @supp_rates: Bitmap of supported rates (per band)
  * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
  * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
+ * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU
+ *	that this station is allowed to transmit to us.
+ *	Can be modified by driver.
  * @wme: indicates whether the STA supports QoS/WME (if local devices does,
  *	otherwise always false)
  * @drv_priv: data area for driver use, will always be aligned to
@@ -1775,6 +1780,7 @@
 	u16 aid;
 	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_sta_vht_cap vht_cap;
+	u8 max_rx_aggregation_subframes;
 	bool wme;
 	u8 uapsd_queues;
 	u8 max_sp;
@@ -2014,6 +2020,11 @@
  * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list
  *	skbs, needed for zero-copy software A-MSDU.
  *
+ * @IEEE80211_HW_REPORTS_LOW_ACK: The driver (or firmware) reports low ack event
+ *	by ieee80211_report_low_ack() based on its own algorithm. For such
+ *	drivers, mac80211 packet loss mechanism will not be triggered and driver
+ *	is completely depending on firmware event for station kickout.
+ *
  * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
  */
 enum ieee80211_hw_flags {
@@ -2054,6 +2065,7 @@
 	IEEE80211_HW_USES_RSS,
 	IEEE80211_HW_TX_AMSDU,
 	IEEE80211_HW_TX_FRAG_LIST,
+	IEEE80211_HW_REPORTS_LOW_ACK,
 
 	/* keep last, obviously */
 	NUM_IEEE80211_HW_FLAGS
@@ -2141,6 +2153,14 @@
  *	the default is _GI | _BANDWIDTH.
  *	Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values.
  *
+ * @radiotap_timestamp: Information for the radiotap timestamp field; if the
+ *	'units_pos' member is set to a non-negative value it must be set to
+ *	a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a
+ *	IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value, and then the timestamp
+ *	field will be added and populated from the &struct ieee80211_rx_status
+ *	device_timestamp. If the 'accuracy' member is non-negative, it's put
+ *	into the accuracy radiotap field and the accuracy known flag is set.
+ *
  * @netdev_features: netdev features to be set in each netdev created
  *	from this HW. Note that not all features are usable with mac80211,
  *	other features will be rejected during HW registration.
@@ -2159,6 +2179,8 @@
  * @n_cipher_schemes: a size of an array of cipher schemes definitions.
  * @cipher_schemes: a pointer to an array of cipher scheme definitions
  *	supported by HW.
+ * @max_nan_de_entries: maximum number of NAN DE functions supported by the
+ *	device.
  */
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
@@ -2184,11 +2206,16 @@
 	u8 offchannel_tx_hw_queue;
 	u8 radiotap_mcs_details;
 	u16 radiotap_vht_details;
+	struct {
+		int units_pos;
+		s16 accuracy;
+	} radiotap_timestamp;
 	netdev_features_t netdev_features;
 	u8 uapsd_queues;
 	u8 uapsd_max_sp_len;
 	u8 n_cipher_schemes;
 	const struct ieee80211_cipher_scheme *cipher_schemes;
+	u8 max_nan_de_entries;
 };
 
 static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw,
@@ -3085,11 +3112,8 @@
  *
  * @sta_add_debugfs: Drivers can use this callback to add debugfs files
  *	when a station is added to mac80211's station list. This callback
- *	and @sta_remove_debugfs should be within a CONFIG_MAC80211_DEBUGFS
- *	conditional. This callback can sleep.
- *
- * @sta_remove_debugfs: Remove the debugfs files which were added using
- *	@sta_add_debugfs. This callback can sleep.
+ *	should be within a CONFIG_MAC80211_DEBUGFS conditional. This
+ *	callback can sleep.
  *
  * @sta_notify: Notifies low level driver about power state transition of an
  *	associated station, AP,  IBSS/WDS/mesh peer etc. For a VIF operating
@@ -3147,6 +3171,12 @@
  *	required function.
  *	The callback can sleep.
  *
+ * @offset_tsf: Offset the TSF timer by the specified value in the
+ *	firmware/hardware.  Preferred to set_tsf as it avoids delay between
+ *	calling set_tsf() and hardware getting programmed, which will show up
+ *	as TSF delay. Is not a required function.
+ *	The callback can sleep.
+ *
  * @reset_tsf: Reset the TSF timer and allow firmware/hardware to synchronize
  *	with other STAs in the IBSS. This is only used in IBSS mode. This
  *	function is optional if the firmware/hardware takes full care of
@@ -3401,6 +3431,21 @@
  *	synchronization which is needed in case driver has in its RSS queues
  *	pending frames that were received prior to the control path action
  *	currently taken (e.g. disassociation) but are not processed yet.
+ *
+ * @start_nan: join an existing NAN cluster, or create a new one.
+ * @stop_nan: leave the NAN cluster.
+ * @nan_change_conf: change NAN configuration. The data in cfg80211_nan_conf
+ *	contains full new configuration and changes specify which parameters
+ *	are changed with respect to the last NAN config.
+ *	The driver gets both full configuration and the changed parameters since
+ *	some devices may need the full configuration while others need only the
+ *	changed parameters.
+ * @add_nan_func: Add a NAN function. Returns 0 on success. The data in
+ *	cfg80211_nan_func must not be referenced outside the scope of
+ *	this call.
+ * @del_nan_func: Remove a NAN function. The driver must call
+ *	ieee80211_nan_func_terminated() with
+ *	NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal.
  */
 struct ieee80211_ops {
 	void (*tx)(struct ieee80211_hw *hw,
@@ -3485,10 +3530,6 @@
 				struct ieee80211_vif *vif,
 				struct ieee80211_sta *sta,
 				struct dentry *dir);
-	void (*sta_remove_debugfs)(struct ieee80211_hw *hw,
-				   struct ieee80211_vif *vif,
-				   struct ieee80211_sta *sta,
-				   struct dentry *dir);
 #endif
 	void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			enum sta_notify_cmd, struct ieee80211_sta *sta);
@@ -3516,6 +3557,8 @@
 	u64 (*get_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 	void (*set_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			u64 tsf);
+	void (*offset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			   s64 offset);
 	void (*reset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 	int (*tx_last_beacon)(struct ieee80211_hw *hw);
 	int (*ampdu_action)(struct ieee80211_hw *hw,
@@ -3620,7 +3663,8 @@
 
 	int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
 	void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif);
-	u32 (*get_expected_throughput)(struct ieee80211_sta *sta);
+	u32 (*get_expected_throughput)(struct ieee80211_hw *hw,
+				       struct ieee80211_sta *sta);
 	int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			   int *dbm);
 
@@ -3639,6 +3683,21 @@
 	void (*wake_tx_queue)(struct ieee80211_hw *hw,
 			      struct ieee80211_txq *txq);
 	void (*sync_rx_queues)(struct ieee80211_hw *hw);
+
+	int (*start_nan)(struct ieee80211_hw *hw,
+			 struct ieee80211_vif *vif,
+			 struct cfg80211_nan_conf *conf);
+	int (*stop_nan)(struct ieee80211_hw *hw,
+			struct ieee80211_vif *vif);
+	int (*nan_change_conf)(struct ieee80211_hw *hw,
+			       struct ieee80211_vif *vif,
+			       struct cfg80211_nan_conf *conf, u32 changes);
+	int (*add_nan_func)(struct ieee80211_hw *hw,
+			    struct ieee80211_vif *vif,
+			    const struct cfg80211_nan_func *nan_func);
+	void (*del_nan_func)(struct ieee80211_hw *hw,
+			    struct ieee80211_vif *vif,
+			    u8 instance_id);
 };
 
 /**
@@ -5712,4 +5771,36 @@
 void ieee80211_txq_get_depth(struct ieee80211_txq *txq,
 			     unsigned long *frame_cnt,
 			     unsigned long *byte_cnt);
+
+/**
+ * ieee80211_nan_func_terminated - notify about NAN function termination.
+ *
+ * This function is used to notify mac80211 about NAN function termination.
+ * Note that this function can't be called from hard irq.
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @inst_id: the local instance id
+ * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*)
+ * @gfp: allocation flags
+ */
+void ieee80211_nan_func_terminated(struct ieee80211_vif *vif,
+				   u8 inst_id,
+				   enum nl80211_nan_func_term_reason reason,
+				   gfp_t gfp);
+
+/**
+ * ieee80211_nan_func_match - notify about NAN function match event.
+ *
+ * This function is used to notify mac80211 about NAN function match. The
+ * cookie inside the match struct will be assigned by mac80211.
+ * Note that this function can't be called from hard irq.
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @match: match event information
+ * @gfp: allocation flags
+ */
+void ieee80211_nan_func_match(struct ieee80211_vif *vif,
+			      struct cfg80211_nan_match_params *match,
+			      gfp_t gfp);
+
 #endif /* MAC80211_H */
diff --git a/include/net/mpls.h b/include/net/mpls.h
index 5b3b5ad..1dbc669 100644
--- a/include/net/mpls.h
+++ b/include/net/mpls.h
@@ -19,21 +19,18 @@
 
 #define MPLS_HLEN 4
 
+struct mpls_shim_hdr {
+	__be32 label_stack_entry;
+};
+
 static inline bool eth_p_mpls(__be16 eth_type)
 {
 	return eth_type == htons(ETH_P_MPLS_UC) ||
 		eth_type == htons(ETH_P_MPLS_MC);
 }
 
-/*
- * For non-MPLS skbs this will correspond to the network header.
- * For MPLS skbs it will be before the network_header as the MPLS
- * label stack lies between the end of the mac header and the network
- * header. That is, for MPLS skbs the end of the mac header
- * is the top of the MPLS label stack.
- */
-static inline unsigned char *skb_mpls_header(struct sk_buff *skb)
+static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
 {
-	return skb_mac_header(skb) + skb->mac_len;
+	return (struct mpls_shim_hdr *)skb_network_header(skb);
 }
 #endif
diff --git a/include/net/ncsi.h b/include/net/ncsi.h
index 1dbf42f..68680ba 100644
--- a/include/net/ncsi.h
+++ b/include/net/ncsi.h
@@ -31,6 +31,7 @@
 struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 				   void (*notifier)(struct ncsi_dev *nd));
 int ncsi_start_dev(struct ncsi_dev *nd);
+void ncsi_stop_dev(struct ncsi_dev *nd);
 void ncsi_unregister_dev(struct ncsi_dev *nd);
 #else /* !CONFIG_NET_NCSI */
 static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
@@ -44,6 +45,10 @@
 	return -ENOTTY;
 }
 
+static void ncsi_stop_dev(struct ncsi_dev *nd)
+{
+}
+
 static inline void ncsi_unregister_dev(struct ncsi_dev *nd)
 {
 }
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index e8d1448..0b0c35c 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -15,6 +15,12 @@
 
 void nf_bridge_update_protocol(struct sk_buff *skb);
 
+int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk,
+		      struct sk_buff *skb, struct net_device *indev,
+		      struct net_device *outdev,
+		      int (*okfn)(struct net *, struct sock *,
+				  struct sk_buff *));
+
 static inline struct nf_bridge_info *
 nf_bridge_info_get(const struct sk_buff *skb)
 {
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 445b019..5041805 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -42,7 +42,6 @@
 
 #include <linux/types.h>
 #include <linux/skbuff.h>
-#include <linux/timer.h>
 
 #ifdef CONFIG_NETFILTER_DEBUG
 #define NF_CT_ASSERT(x)		WARN_ON(!(x))
@@ -73,7 +72,7 @@
 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
 
 struct nf_conn {
-	/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
+	/* Usage count in here is 1 for hash table, 1 per skb,
 	 * plus 1 for any connection(s) we are `master' for
 	 *
 	 * Hint, SKB address this struct and refcnt via skb->nfct and
@@ -96,8 +95,8 @@
 	/* Have we seen traffic both ways yet? (bitset) */
 	unsigned long status;
 
-	/* Timer function; drops refcnt when it goes off. */
-	struct timer_list timeout;
+	/* jiffies32 when this ct is considered dead */
+	u32 timeout;
 
 	possible_net_t ct_net;
 
@@ -220,21 +219,14 @@
 	__nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0);
 }
 
-bool __nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
-		       const struct sk_buff *skb, int do_acct);
-
 /* kill conntrack and do accounting */
-static inline bool nf_ct_kill_acct(struct nf_conn *ct,
-				   enum ip_conntrack_info ctinfo,
-				   const struct sk_buff *skb)
-{
-	return __nf_ct_kill_acct(ct, ctinfo, skb, 1);
-}
+bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+		     const struct sk_buff *skb);
 
 /* kill conntrack without accounting */
 static inline bool nf_ct_kill(struct nf_conn *ct)
 {
-	return __nf_ct_kill_acct(ct, 0, NULL, 0);
+	return nf_ct_delete(ct, 0, 0);
 }
 
 /* These are for NAT.  Icky. */
@@ -291,21 +283,55 @@
 	return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK;
 }
 
+#define nfct_time_stamp ((u32)(jiffies))
+
 /* jiffies until ct expires, 0 if already expired */
 static inline unsigned long nf_ct_expires(const struct nf_conn *ct)
 {
-	long timeout = (long)ct->timeout.expires - (long)jiffies;
+	s32 timeout = ct->timeout - nfct_time_stamp;
 
 	return timeout > 0 ? timeout : 0;
 }
 
+static inline bool nf_ct_is_expired(const struct nf_conn *ct)
+{
+	return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
+}
+
+/* use after obtaining a reference count */
+static inline bool nf_ct_should_gc(const struct nf_conn *ct)
+{
+	return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) &&
+	       !nf_ct_is_dying(ct);
+}
+
 struct kernel_param;
 
 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
 int nf_conntrack_hash_resize(unsigned int hashsize);
+
+extern struct hlist_nulls_head *nf_conntrack_hash;
 extern unsigned int nf_conntrack_htable_size;
+extern seqcount_t nf_conntrack_generation;
 extern unsigned int nf_conntrack_max;
 
+/* must be called with rcu read lock held */
+static inline void
+nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+	struct hlist_nulls_head *hptr;
+	unsigned int sequence, hsz;
+
+	do {
+		sequence = read_seqcount_begin(&nf_conntrack_generation);
+		hsz = nf_conntrack_htable_size;
+		hptr = nf_conntrack_hash;
+	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+	*hash = hptr;
+	*hsize = hsz;
+}
+
 struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
 				 const struct nf_conntrack_zone *zone,
 				 gfp_t flags);
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 79d7ac5..62e17d1 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -51,8 +51,6 @@
 			const struct nf_conntrack_l3proto *l3proto,
 			const struct nf_conntrack_l4proto *l4proto);
 
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
-
 /* Find a connection corresponding to a tuple. */
 struct nf_conntrack_tuple_hash *
 nf_conntrack_find_get(struct net *net,
@@ -83,7 +81,6 @@
 
 #define CONNTRACK_LOCKS 1024
 
-extern struct hlist_nulls_head *nf_conntrack_hash;
 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
 void nf_conntrack_lock(spinlock_t *lock);
 
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index fa36447..12d967b 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -12,12 +12,19 @@
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
 #include <net/netfilter/nf_conntrack_extend.h>
 
+enum nf_ct_ecache_state {
+	NFCT_ECACHE_UNKNOWN,		/* destroy event not sent */
+	NFCT_ECACHE_DESTROY_FAIL,	/* tried but failed to send destroy event */
+	NFCT_ECACHE_DESTROY_SENT,	/* sent destroy event after failure */
+};
+
 struct nf_conntrack_ecache {
-	unsigned long cache;	/* bitops want long */
-	unsigned long missed;	/* missed events */
-	u16 ctmask;		/* bitmask of ct events to be delivered */
-	u16 expmask;		/* bitmask of expect events to be delivered */
-	u32 portid;		/* netlink portid of destroyer */
+	unsigned long cache;		/* bitops want long */
+	unsigned long missed;		/* missed events */
+	u16 ctmask;			/* bitmask of ct events to be delivered */
+	u16 expmask;			/* bitmask of expect events to be delivered */
+	u32 portid;			/* netlink portid of destroyer */
+	enum nf_ct_ecache_state state;	/* ecache state */
 };
 
 static inline struct nf_conntrack_ecache *
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index cdc920b..8992e42 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -63,10 +63,6 @@
 
 	size_t nla_size;
 
-#ifdef CONFIG_SYSCTL
-	const char		*ctl_table_path;
-#endif /* CONFIG_SYSCTL */
-
 	/* Init l3proto pernet data */
 	int (*init_net)(struct net *net);
 
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 1a5fb36..de629f1 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -134,14 +134,6 @@
 int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto);
 void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto);
 
-static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn)
-{
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-	kfree(pn->ctl_compat_table);
-	pn->ctl_compat_table = NULL;
-#endif
-}
-
 /* Generic netlink helpers */
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *tuple);
diff --git a/include/net/netfilter/nf_conntrack_synproxy.h b/include/net/netfilter/nf_conntrack_synproxy.h
index 6793614..e693731 100644
--- a/include/net/netfilter/nf_conntrack_synproxy.h
+++ b/include/net/netfilter/nf_conntrack_synproxy.h
@@ -27,6 +27,20 @@
 #endif
 }
 
+static inline bool nf_ct_add_synproxy(struct nf_conn *ct,
+				      const struct nf_conn *tmpl)
+{
+	if (tmpl && nfct_synproxy(tmpl)) {
+		if (!nfct_seqadj_ext_add(ct))
+			return false;
+
+		if (!nfct_synproxy_ext_add(ct))
+			return false;
+	}
+
+	return true;
+}
+
 struct synproxy_stats {
 	unsigned int			syn_received;
 	unsigned int			cookie_invalid;
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index 83d855b..309cd26 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -2,15 +2,10 @@
 #define _NF_LOG_H
 
 #include <linux/netfilter.h>
+#include <linux/netfilter/nf_log.h>
 
-/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
- * disappear once iptables is replaced with pkttables.  Please DO NOT use them
- * for any new code! */
-#define NF_LOG_TCPSEQ		0x01	/* Log TCP sequence numbers */
-#define NF_LOG_TCPOPT		0x02	/* Log TCP options */
-#define NF_LOG_IPOPT		0x04	/* Log IP options */
-#define NF_LOG_UID		0x08	/* Log UID owning local socket */
-#define NF_LOG_MASK		0x0f
+/* Log tcp sequence, tcp options, ip options and uid owning local socket */
+#define NF_LOG_DEFAULT_MASK	0x0f
 
 /* This flag indicates that copy_len field in nf_loginfo is set */
 #define NF_LOG_F_COPY_LEN	0x1
@@ -60,8 +55,7 @@
 int nf_log_register(u_int8_t pf, struct nf_logger *logger);
 void nf_log_unregister(struct nf_logger *logger);
 
-void nf_log_set(struct net *net, u_int8_t pf,
-		const struct nf_logger *logger);
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger);
 void nf_log_unset(struct net *net, const struct nf_logger *logger);
 
 int nf_log_bind_pf(struct net *net, u_int8_t pf,
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 0dbce55..2280cfe 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -11,7 +11,6 @@
 	struct sk_buff		*skb;
 	unsigned int		id;
 
-	struct nf_hook_ops	*elem;
 	struct nf_hook_state	state;
 	u16			size; /* sizeof(entry) + saved route keys */
 
@@ -22,10 +21,10 @@
 
 /* Packet queuing */
 struct nf_queue_handler {
-	int			(*outfn)(struct nf_queue_entry *entry,
-					 unsigned int queuenum);
-	void			(*nf_hook_drop)(struct net *net,
-						struct nf_hook_ops *ops);
+	int		(*outfn)(struct nf_queue_entry *entry,
+				 unsigned int queuenum);
+	void		(*nf_hook_drop)(struct net *net,
+					const struct nf_hook_entry *hooks);
 };
 
 void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
@@ -41,23 +40,19 @@
 		*jhash_initval = prandom_u32();
 }
 
-static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval)
+static inline u32 hash_v4(const struct iphdr *iph, u32 initval)
 {
-	const struct iphdr *iph = ip_hdr(skb);
-
 	/* packets in either direction go into same queue */
 	if ((__force u32)iph->saddr < (__force u32)iph->daddr)
 		return jhash_3words((__force u32)iph->saddr,
-			(__force u32)iph->daddr, iph->protocol, jhash_initval);
+			(__force u32)iph->daddr, iph->protocol, initval);
 
 	return jhash_3words((__force u32)iph->daddr,
-			(__force u32)iph->saddr, iph->protocol, jhash_initval);
+			(__force u32)iph->saddr, iph->protocol, initval);
 }
 
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
+static inline u32 hash_v6(const struct ipv6hdr *ip6h, u32 initval)
 {
-	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 	u32 a, b, c;
 
 	if ((__force u32)ip6h->saddr.s6_addr32[3] <
@@ -75,20 +70,50 @@
 	else
 		c = (__force u32) ip6h->daddr.s6_addr32[1];
 
-	return jhash_3words(a, b, c, jhash_initval);
+	return jhash_3words(a, b, c, initval);
 }
-#endif
+
+static inline u32 hash_bridge(const struct sk_buff *skb, u32 initval)
+{
+	struct ipv6hdr *ip6h, _ip6h;
+	struct iphdr *iph, _iph;
+
+	switch (eth_hdr(skb)->h_proto) {
+	case htons(ETH_P_IP):
+		iph = skb_header_pointer(skb, skb_network_offset(skb),
+					 sizeof(*iph), &_iph);
+		if (iph)
+			return hash_v4(iph, initval);
+		break;
+	case htons(ETH_P_IPV6):
+		ip6h = skb_header_pointer(skb, skb_network_offset(skb),
+					  sizeof(*ip6h), &_ip6h);
+		if (ip6h)
+			return hash_v6(ip6h, initval);
+		break;
+	}
+
+	return 0;
+}
 
 static inline u32
 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
-	     u32 jhash_initval)
+	     u32 initval)
 {
-	if (family == NFPROTO_IPV4)
-		queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32;
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-	else if (family == NFPROTO_IPV6)
-		queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32;
-#endif
+	switch (family) {
+	case NFPROTO_IPV4:
+		queue += reciprocal_scale(hash_v4(ip_hdr(skb), initval),
+					  queues_total);
+		break;
+	case NFPROTO_IPV6:
+		queue += reciprocal_scale(hash_v6(ipv6_hdr(skb), initval),
+					  queues_total);
+		break;
+	case NFPROTO_BRIDGE:
+		queue += reciprocal_scale(hash_bridge(skb, initval),
+					  queues_total);
+		break;
+	}
 
 	return queue;
 }
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f2f1339..5031e07 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -19,6 +19,7 @@
 	const struct net_device		*out;
 	u8				pf;
 	u8				hook;
+	bool				tprot_set;
 	u8				tprot;
 	/* for x_tables compatibility */
 	struct xt_action_param		xt;
@@ -36,6 +37,23 @@
 	pkt->pf = pkt->xt.family = state->pf;
 }
 
+static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
+						struct sk_buff *skb)
+{
+	pkt->tprot_set = false;
+	pkt->tprot = 0;
+	pkt->xt.thoff = 0;
+	pkt->xt.fragoff = 0;
+}
+
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
+					  struct sk_buff *skb,
+					  const struct nf_hook_state *state)
+{
+	nft_set_pktinfo(pkt, skb, state);
+	nft_set_pktinfo_proto_unspec(pkt, skb);
+}
+
 /**
  * 	struct nft_verdict - nf_tables verdict
  *
@@ -127,6 +145,7 @@
 	return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
 }
 
+unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
 unsigned int nft_parse_register(const struct nlattr *attr);
 int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
 
@@ -251,7 +270,8 @@
 
 	int				(*insert)(const struct net *net,
 						  const struct nft_set *set,
-						  const struct nft_set_elem *elem);
+						  const struct nft_set_elem *elem,
+						  struct nft_set_ext **ext);
 	void				(*activate)(const struct net *net,
 						    const struct nft_set *set,
 						    const struct nft_set_elem *elem);
diff --git a/include/net/netfilter/nf_tables_bridge.h b/include/net/netfilter/nf_tables_bridge.h
deleted file mode 100644
index 511fb79..0000000
--- a/include/net/netfilter/nf_tables_bridge.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _NET_NF_TABLES_BRIDGE_H
-#define _NET_NF_TABLES_BRIDGE_H
-
-int nft_bridge_iphdr_validate(struct sk_buff *skb);
-int nft_bridge_ip6hdr_validate(struct sk_buff *skb);
-
-#endif /* _NET_NF_TABLES_BRIDGE_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a9060dd..00f4f6b 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -28,6 +28,9 @@
 int nft_cmp_module_init(void);
 void nft_cmp_module_exit(void);
 
+int nft_range_module_init(void);
+void nft_range_module_exit(void);
+
 int nft_lookup_module_init(void);
 void nft_lookup_module_exit(void);
 
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index ca6ef6b..968f00b 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -14,11 +14,54 @@
 	nft_set_pktinfo(pkt, skb, state);
 
 	ip = ip_hdr(pkt->skb);
+	pkt->tprot_set = true;
 	pkt->tprot = ip->protocol;
 	pkt->xt.thoff = ip_hdrlen(pkt->skb);
 	pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 }
 
+static inline int
+__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+				struct sk_buff *skb,
+				const struct nf_hook_state *state)
+{
+	struct iphdr *iph, _iph;
+	u32 len, thoff;
+
+	iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
+				 &_iph);
+	if (!iph)
+		return -1;
+
+	iph = ip_hdr(skb);
+	if (iph->ihl < 5 || iph->version != 4)
+		return -1;
+
+	len = ntohs(iph->tot_len);
+	thoff = iph->ihl * 4;
+	if (skb->len < len)
+		return -1;
+	else if (len < thoff)
+		return -1;
+
+	pkt->tprot_set = true;
+	pkt->tprot = iph->protocol;
+	pkt->xt.thoff = thoff;
+	pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+
+	return 0;
+}
+
+static inline void
+nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+			      struct sk_buff *skb,
+			      const struct nf_hook_state *state)
+{
+	nft_set_pktinfo(pkt, skb, state);
+	if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0)
+		nft_set_pktinfo_proto_unspec(pkt, skb);
+}
+
 extern struct nft_af_info nft_af_ipv4;
 
 #endif
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index 8ad39a6..d150b50 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -4,7 +4,7 @@
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <net/ipv6.h>
 
-static inline int
+static inline void
 nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
 		     struct sk_buff *skb,
 		     const struct nf_hook_state *state)
@@ -15,15 +15,64 @@
 	nft_set_pktinfo(pkt, skb, state);
 
 	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
-	/* If malformed, drop it */
+	if (protohdr < 0) {
+		nft_set_pktinfo_proto_unspec(pkt, skb);
+		return;
+	}
+
+	pkt->tprot_set = true;
+	pkt->tprot = protohdr;
+	pkt->xt.thoff = thoff;
+	pkt->xt.fragoff = frag_off;
+}
+
+static inline int
+__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+				struct sk_buff *skb,
+				const struct nf_hook_state *state)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6hdr *ip6h, _ip6h;
+	unsigned int thoff = 0;
+	unsigned short frag_off;
+	int protohdr;
+	u32 pkt_len;
+
+	ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
+				  &_ip6h);
+	if (!ip6h)
+		return -1;
+
+	if (ip6h->version != 6)
+		return -1;
+
+	pkt_len = ntohs(ip6h->payload_len);
+	if (pkt_len + sizeof(*ip6h) > skb->len)
+		return -1;
+
+	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
 	if (protohdr < 0)
 		return -1;
 
+	pkt->tprot_set = true;
 	pkt->tprot = protohdr;
 	pkt->xt.thoff = thoff;
 	pkt->xt.fragoff = frag_off;
 
 	return 0;
+#else
+	return -1;
+#endif
+}
+
+static inline void
+nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+			      struct sk_buff *skb,
+			      const struct nf_hook_state *state)
+{
+	nft_set_pktinfo(pkt, skb, state);
+	if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0)
+		nft_set_pktinfo_proto_unspec(pkt, skb);
 }
 
 extern struct nft_af_info nft_af_ipv6;
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index d27588c..1139cde 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -36,4 +36,8 @@
 void nft_meta_set_destroy(const struct nft_ctx *ctx,
 			  const struct nft_expr *expr);
 
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nft_data **data);
+
 #endif
diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
index 60fa153..02e28c5 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -8,6 +8,10 @@
 
 extern const struct nla_policy nft_reject_policy[];
 
+int nft_reject_validate(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nft_data **data);
+
 int nft_reject_init(const struct nft_ctx *ctx,
 		    const struct nft_expr *expr,
 		    const struct nlattr * const tb[]);
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 38b1a80..e469e85 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -15,10 +15,6 @@
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *ctl_table_header;
 	struct ctl_table        *ctl_table;
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	struct ctl_table_header *ctl_compat_header;
-	struct ctl_table        *ctl_compat_table;
-#endif
 #endif
 	unsigned int		users;
 };
@@ -58,10 +54,6 @@
 	struct nf_udp_net	udp;
 	struct nf_icmp_net	icmp;
 	struct nf_icmp_net	icmpv6;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-	struct ctl_table_header *ctl_table_header;
-	struct ctl_table	*ctl_table;
-#endif
 };
 
 struct ct_pcpu {
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d061ffe..7adf438 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -40,7 +40,6 @@
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	struct fib_rules_ops	*rules_ops;
 	bool			fib_has_custom_rules;
-	struct fib_table __rcu	*fib_local;
 	struct fib_table __rcu	*fib_main;
 	struct fib_table __rcu	*fib_default;
 #endif
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 36d7235..58487b1 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -16,6 +16,6 @@
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header *nf_log_dir_header;
 #endif
-	struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+	struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 };
 #endif
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 24cd394..27bb963 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -11,7 +11,7 @@
 struct ctl_table_header;
 
 struct xfrm_policy_hash {
-	struct hlist_head	*table;
+	struct hlist_head	__rcu *table;
 	unsigned int		hmask;
 	u8			dbits4;
 	u8			sbits4;
@@ -38,14 +38,12 @@
 	 * mode. Also, it can be used by ah/esp icmp error handler to find
 	 * offending SA.
 	 */
-	struct hlist_head	*state_bydst;
-	struct hlist_head	*state_bysrc;
-	struct hlist_head	*state_byspi;
+	struct hlist_head	__rcu *state_bydst;
+	struct hlist_head	__rcu *state_bysrc;
+	struct hlist_head	__rcu *state_byspi;
 	unsigned int		state_hmask;
 	unsigned int		state_num;
 	struct work_struct	state_hash_work;
-	struct hlist_head	state_gc_list;
-	struct work_struct	state_gc_work;
 
 	struct list_head	policy_all;
 	struct hlist_head	*policy_byidx;
@@ -73,7 +71,7 @@
 	struct dst_ops		xfrm6_dst_ops;
 #endif
 	spinlock_t xfrm_state_lock;
-	rwlock_t xfrm_policy_lock;
+	spinlock_t xfrm_policy_lock;
 	struct mutex xfrm_cfg_mutex;
 
 	/* flow cache part */
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 6f8d653..767b03a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -59,7 +59,8 @@
 struct tcf_exts {
 #ifdef CONFIG_NET_CLS_ACT
 	__u32	type; /* for backward compat(TCA_OLD_COMPAT) */
-	struct list_head actions;
+	int nr_actions;
+	struct tc_action **actions;
 #endif
 	/* Map to export classifier specific extension TLV types to the
 	 * generic extensions API. Unsupported extensions must be set to 0.
@@ -68,14 +69,19 @@
 	int police;
 };
 
-static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
+static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	exts->type = 0;
-	INIT_LIST_HEAD(&exts->actions);
+	exts->nr_actions = 0;
+	exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
+				GFP_KERNEL);
+	if (!exts->actions)
+		return -ENOMEM;
 #endif
 	exts->action = action;
 	exts->police = police;
+	return 0;
 }
 
 /**
@@ -89,7 +95,7 @@
 tcf_exts_is_predicative(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	return !list_empty(&exts->actions);
+	return exts->nr_actions;
 #else
 	return 0;
 #endif
@@ -108,6 +114,20 @@
 	return tcf_exts_is_predicative(exts);
 }
 
+static inline void tcf_exts_to_list(const struct tcf_exts *exts,
+				    struct list_head *actions)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	int i;
+
+	for (i = 0; i < exts->nr_actions; i++) {
+		struct tc_action *a = exts->actions[i];
+
+		list_add_tail(&a->list, actions);
+	}
+#endif
+}
+
 /**
  * tcf_exts_exec - execute tc filter extensions
  * @skb: socket buffer
@@ -124,12 +144,25 @@
 	       struct tcf_result *res)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	if (!list_empty(&exts->actions))
-		return tcf_action_exec(skb, &exts->actions, res);
+	if (exts->nr_actions)
+		return tcf_action_exec(skb, exts->actions, exts->nr_actions,
+				       res);
 #endif
 	return 0;
 }
 
+#ifdef CONFIG_NET_CLS_ACT
+
+#define tc_no_actions(_exts)  ((_exts)->nr_actions == 0)
+#define tc_single_action(_exts) ((_exts)->nr_actions == 1)
+
+#else /* CONFIG_NET_CLS_ACT */
+
+#define tc_no_actions(_exts) true
+#define tc_single_action(_exts) false
+
+#endif /* CONFIG_NET_CLS_ACT */
+
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 		      struct nlattr **tb, struct nlattr *rate_tlv,
 		      struct tcf_exts *exts, bool ovr);
@@ -453,4 +486,20 @@
 	unsigned long cookie;
 };
 
+enum tc_clsbpf_command {
+	TC_CLSBPF_ADD,
+	TC_CLSBPF_REPLACE,
+	TC_CLSBPF_DESTROY,
+	TC_CLSBPF_STATS,
+};
+
+struct tc_cls_bpf_offload {
+	enum tc_clsbpf_command command;
+	struct tcf_exts *exts;
+	struct bpf_prog *prog;
+	const char *name;
+	bool exts_integrated;
+	u32 gen_flags;
+};
+
 #endif
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 7caa99b..cd334c9 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -90,8 +90,8 @@
 void qdisc_get_default(char *id, size_t len);
 int qdisc_set_default(const char *id);
 
-void qdisc_list_add(struct Qdisc *q);
-void qdisc_list_del(struct Qdisc *q);
+void qdisc_hash_add(struct Qdisc *q);
+void qdisc_hash_del(struct Qdisc *q);
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
 struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
diff --git a/include/net/pptp.h b/include/net/pptp.h
new file mode 100644
index 0000000..92e9f1f
--- /dev/null
+++ b/include/net/pptp.h
@@ -0,0 +1,23 @@
+#ifndef _NET_PPTP_H
+#define _NET_PPTP_H
+
+#define PPP_LCP_ECHOREQ 0x09
+#define PPP_LCP_ECHOREP 0x0A
+#define SC_RCV_BITS     (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP)
+
+#define MISSING_WINDOW 20
+#define WRAPPED(curseq, lastseq)\
+	((((curseq) & 0xffffff00) == 0) &&\
+	(((lastseq) & 0xffffff00) == 0xffffff00))
+
+#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header))
+struct pptp_gre_header {
+	struct gre_base_hdr gre_hd;
+	__be16 payload_len;
+	__be16 call_id;
+	__be32 seq;
+	__be32 ack;
+} __packed;
+
+
+#endif
diff --git a/include/net/route.h b/include/net/route.h
index ad777d7..0429d47 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -29,7 +29,6 @@
 #include <net/flow.h>
 #include <net/inet_sock.h>
 #include <net/ip_fib.h>
-#include <net/l3mdev.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
@@ -285,15 +284,6 @@
 	ip_route_connect_init(fl4, dst, src, tos, oif, protocol,
 			      sport, dport, sk);
 
-	if (!src && oif) {
-		int rc;
-
-		rc = l3mdev_get_saddr(net, oif, fl4);
-		if (rc < 0)
-			return ERR_PTR(rc);
-
-		src = fl4->saddr;
-	}
 	if (!dst || !src) {
 		rt = __ip_route_output_key(net, fl4);
 		if (IS_ERR(rt))
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 909aff2..e6aa0a2 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -36,6 +36,14 @@
 	u16			data[];
 };
 
+/* similar to sk_buff_head, but skb->prev pointer is undefined. */
+struct qdisc_skb_head {
+	struct sk_buff	*head;
+	struct sk_buff	*tail;
+	__u32		qlen;
+	spinlock_t	lock;
+};
+
 struct Qdisc {
 	int 			(*enqueue)(struct sk_buff *skb,
 					   struct Qdisc *sch,
@@ -61,7 +69,7 @@
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
-	struct list_head	list;
+	struct hlist_node       hash;
 	u32			handle;
 	u32			parent;
 	void			*u32_node;
@@ -76,7 +84,7 @@
 	 * For performance sake on SMP, we put highly modified fields at the end
 	 */
 	struct sk_buff		*gso_skb ____cacheline_aligned_in_smp;
-	struct sk_buff_head	q;
+	struct qdisc_skb_head	q;
 	struct gnet_stats_basic_packed bstats;
 	seqcount_t		running;
 	struct gnet_stats_queue	qstats;
@@ -592,7 +600,7 @@
 
 static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
 {
-	qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats));
+	this_cpu_inc(sch->cpu_qstats->drops);
 }
 
 static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
@@ -600,10 +608,27 @@
 	sch->qstats.overlimits++;
 }
 
-static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
-				       struct sk_buff_head *list)
+static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
 {
-	__skb_queue_tail(list, skb);
+	qh->head = NULL;
+	qh->tail = NULL;
+	qh->qlen = 0;
+}
+
+static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
+				       struct qdisc_skb_head *qh)
+{
+	struct sk_buff *last = qh->tail;
+
+	if (last) {
+		skb->next = NULL;
+		last->next = skb;
+		qh->tail = skb;
+	} else {
+		qh->tail = skb;
+		qh->head = skb;
+	}
+	qh->qlen++;
 	qdisc_qstats_backlog_inc(sch, skb);
 
 	return NET_XMIT_SUCCESS;
@@ -614,14 +639,16 @@
 	return __qdisc_enqueue_tail(skb, sch, &sch->q);
 }
 
-static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch,
-						   struct sk_buff_head *list)
+static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh)
 {
-	struct sk_buff *skb = __skb_dequeue(list);
+	struct sk_buff *skb = qh->head;
 
 	if (likely(skb != NULL)) {
-		qdisc_qstats_backlog_dec(sch, skb);
-		qdisc_bstats_update(sch, skb);
+		qh->head = skb->next;
+		qh->qlen--;
+		if (qh->head == NULL)
+			qh->tail = NULL;
+		skb->next = NULL;
 	}
 
 	return skb;
@@ -629,7 +656,14 @@
 
 static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
 {
-	return __qdisc_dequeue_head(sch, &sch->q);
+	struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
+
+	if (likely(skb != NULL)) {
+		qdisc_qstats_backlog_dec(sch, skb);
+		qdisc_bstats_update(sch, skb);
+	}
+
+	return skb;
 }
 
 /* Instead of calling kfree_skb() while root qdisc lock is held,
@@ -642,10 +676,10 @@
 }
 
 static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
-						   struct sk_buff_head *list,
+						   struct qdisc_skb_head *qh,
 						   struct sk_buff **to_free)
 {
-	struct sk_buff *skb = __skb_dequeue(list);
+	struct sk_buff *skb = __qdisc_dequeue_head(qh);
 
 	if (likely(skb != NULL)) {
 		unsigned int len = qdisc_pkt_len(skb);
@@ -666,7 +700,9 @@
 
 static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
 {
-	return skb_peek(&sch->q);
+	const struct qdisc_skb_head *qh = &sch->q;
+
+	return qh->head;
 }
 
 /* generic pseudo peek method for non-work-conserving qdisc */
@@ -701,15 +737,19 @@
 	return skb;
 }
 
-static inline void __qdisc_reset_queue(struct sk_buff_head *list)
+static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh)
 {
 	/*
 	 * We do not know the backlog in bytes of this list, it
 	 * is up to the caller to correct it
 	 */
-	if (!skb_queue_empty(list)) {
-		rtnl_kfree_skbs(list->next, list->prev);
-		__skb_queue_head_init(list);
+	ASSERT_RTNL();
+	if (qh->qlen) {
+		rtnl_kfree_skbs(qh->head, qh->tail);
+
+		qh->head = NULL;
+		qh->tail = NULL;
+		qh->qlen = 0;
 	}
 }
 
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 632e205..87a7f42 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -83,9 +83,9 @@
 #endif
 
 /* Round an int up to the next multiple of 4.  */
-#define WORD_ROUND(s) (((s)+3)&~3)
+#define SCTP_PAD4(s) (((s)+3)&~3)
 /* Truncate to the previous multiple of 4.  */
-#define WORD_TRUNC(s) ((s)&~3)
+#define SCTP_TRUNC4(s) ((s)&~3)
 
 /*
  * Function declarations.
@@ -433,7 +433,7 @@
 	if (asoc->user_frag)
 		frag = min_t(int, frag, asoc->user_frag);
 
-	frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
+	frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN));
 
 	return frag;
 }
@@ -462,7 +462,7 @@
 for (pos.v = chunk->member;\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
      ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\
-     pos.v += WORD_ROUND(ntohs(pos.p->length)))
+     pos.v += SCTP_PAD4(ntohs(pos.p->length)))
 
 #define sctp_walk_errors(err, chunk_hdr)\
 _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
@@ -472,7 +472,7 @@
 	    sizeof(sctp_chunkhdr_t));\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
-     err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length))))
+     err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length))))
 
 #define sctp_walk_fwdtsn(pos, chunk)\
 _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk))
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index efc0174..ca6c971 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -307,85 +307,27 @@
 }
 
 /* Compare two TSNs */
+#define TSN_lt(a,b)	\
+	(typecheck(__u32, a) && \
+	 typecheck(__u32, b) && \
+	 ((__s32)((a) - (b)) < 0))
 
-/* RFC 1982 - Serial Number Arithmetic
- *
- * 2. Comparison
- *  Then, s1 is said to be equal to s2 if and only if i1 is equal to i2,
- *  in all other cases, s1 is not equal to s2.
- *
- * s1 is said to be less than s2 if, and only if, s1 is not equal to s2,
- * and
- *
- *      (i1 < i2 and i2 - i1 < 2^(SERIAL_BITS - 1)) or
- *      (i1 > i2 and i1 - i2 > 2^(SERIAL_BITS - 1))
- *
- * s1 is said to be greater than s2 if, and only if, s1 is not equal to
- * s2, and
- *
- *      (i1 < i2 and i2 - i1 > 2^(SERIAL_BITS - 1)) or
- *      (i1 > i2 and i1 - i2 < 2^(SERIAL_BITS - 1))
- */
-
-/*
- * RFC 2960
- *  1.6 Serial Number Arithmetic
- *
- * Comparisons and arithmetic on TSNs in this document SHOULD use Serial
- * Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32.
- */
-
-enum {
-	TSN_SIGN_BIT = (1<<31)
-};
-
-static inline int TSN_lt(__u32 s, __u32 t)
-{
-	return ((s) - (t)) & TSN_SIGN_BIT;
-}
-
-static inline int TSN_lte(__u32 s, __u32 t)
-{
-	return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT);
-}
+#define TSN_lte(a,b)	\
+	(typecheck(__u32, a) && \
+	 typecheck(__u32, b) && \
+	 ((__s32)((a) - (b)) <= 0))
 
 /* Compare two SSNs */
+#define SSN_lt(a,b)		\
+	(typecheck(__u16, a) && \
+	 typecheck(__u16, b) && \
+	 ((__s16)((a) - (b)) < 0))
 
-/*
- * RFC 2960
- *  1.6 Serial Number Arithmetic
- *
- * Comparisons and arithmetic on Stream Sequence Numbers in this document
- * SHOULD use Serial Number Arithmetic as defined in [RFC1982] where
- * SERIAL_BITS = 16.
- */
-enum {
-	SSN_SIGN_BIT = (1<<15)
-};
-
-static inline int SSN_lt(__u16 s, __u16 t)
-{
-	return ((s) - (t)) & SSN_SIGN_BIT;
-}
-
-static inline int SSN_lte(__u16 s, __u16 t)
-{
-	return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT);
-}
-
-/*
- * ADDIP 3.1.1
- * The valid range of Serial Number is from 0 to 4294967295 (2**32 - 1). Serial
- * Numbers wrap back to 0 after reaching 4294967295.
- */
-enum {
-	ADDIP_SERIAL_SIGN_BIT = (1<<31)
-};
-
-static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t)
-{
-	return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT);
-}
+/* ADDIP 3.1.1 */
+#define ADDIP_SERIAL_gte(a,b)	\
+	(typecheck(__u32, a) && \
+	 typecheck(__u32, b) && \
+	 ((__s32)((b) - (a)) <= 0))
 
 /* Check VTAG of the packet matches the sender's own tag. */
 static inline int
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ce93c4b..11c3bf2 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -537,6 +537,7 @@
 struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
 					    struct sctp_sndrcvinfo *,
 					    struct iov_iter *);
+void sctp_datamsg_free(struct sctp_datamsg *);
 void sctp_datamsg_put(struct sctp_datamsg *);
 void sctp_chunk_fail(struct sctp_chunk *, int error);
 int sctp_chunk_abandoned(struct sctp_chunk *);
@@ -554,6 +555,9 @@
 
 	atomic_t refcnt;
 
+	/* How many times this chunk have been sent, for prsctp RTX policy */
+	int sent_count;
+
 	/* This is our link to the per-transport transmitted list.  */
 	struct list_head transmitted_list;
 
@@ -603,16 +607,6 @@
 	/* This needs to be recoverable for SCTP_SEND_FAILED events. */
 	struct sctp_sndrcvinfo sinfo;
 
-	/* We use this field to record param for prsctp policies,
-	 * for TTL policy, it is the time_to_drop of this chunk,
-	 * for RTX policy, it is the max_sent_count of this chunk,
-	 * for PRIO policy, it is the priority of this chunk.
-	 */
-	unsigned long prsctp_param;
-
-	/* How many times this chunk have been sent, for prsctp RTX policy */
-	int sent_count;
-
 	/* Which association does this belong to?  */
 	struct sctp_association *asoc;
 
@@ -1076,7 +1070,7 @@
 void sctp_outq_init(struct sctp_association *, struct sctp_outq *);
 void sctp_outq_teardown(struct sctp_outq *);
 void sctp_outq_free(struct sctp_outq*);
-int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
+void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t);
 int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *);
 int sctp_outq_is_empty(const struct sctp_outq *);
 void sctp_outq_restart(struct sctp_outq *);
@@ -1084,7 +1078,7 @@
 void sctp_retransmit(struct sctp_outq *, struct sctp_transport *,
 		     sctp_retransmit_reason_t);
 void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8);
-int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
+void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp);
 void sctp_prsctp_prune(struct sctp_association *asoc,
 		       struct sctp_sndrcvinfo *sinfo, int msg_len);
 /* Uncork and flush an outqueue.  */
diff --git a/include/net/sock.h b/include/net/sock.h
index ff5be7e..ebf75db 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1020,7 +1020,6 @@
 	void			(*unhash)(struct sock *sk);
 	void			(*rehash)(struct sock *sk);
 	int			(*get_port)(struct sock *sk, unsigned short snum);
-	void			(*clear_sk)(struct sock *sk, int size);
 
 	/* Keeping track of sockets in use */
 #ifdef CONFIG_PROC_FS
@@ -1114,6 +1113,16 @@
 	       sk_stream_memory_free(sk);
 }
 
+static inline int sk_under_cgroup_hierarchy(struct sock *sk,
+					    struct cgroup *ancestor)
+{
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data),
+				    ancestor);
+#else
+	return -ENOTSUPP;
+#endif
+}
 
 static inline bool sk_has_memory_pressure(const struct sock *sk)
 {
@@ -1232,8 +1241,6 @@
 	return sk->sk_prot->hash(sk);
 }
 
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
-
 /* About 10 seconds */
 #define SOCK_DESTROY_TIME (10*HZ)
 
@@ -1332,6 +1339,16 @@
 	if (!sk_has_account(sk))
 		return;
 	sk->sk_forward_alloc += size;
+
+	/* Avoid a possible overflow.
+	 * TCP send queues can make this happen, if sk_mem_reclaim()
+	 * is not called and more than 2 GBytes are released at once.
+	 *
+	 * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
+	 * no need to hold that much forward allocation anyway.
+	 */
+	if (unlikely(sk->sk_forward_alloc >= 1 << 21))
+		__sk_mem_reclaim(sk, 1 << 20);
 }
 
 static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
diff --git a/include/net/strparser.h b/include/net/strparser.h
new file mode 100644
index 0000000..0c28ad9
--- /dev/null
+++ b/include/net/strparser.h
@@ -0,0 +1,142 @@
+/*
+ * Stream Parser
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __NET_STRPARSER_H_
+#define __NET_STRPARSER_H_
+
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#define STRP_STATS_ADD(stat, count) ((stat) += (count))
+#define STRP_STATS_INCR(stat) ((stat)++)
+
+struct strp_stats {
+	unsigned long long rx_msgs;
+	unsigned long long rx_bytes;
+	unsigned int rx_mem_fail;
+	unsigned int rx_need_more_hdr;
+	unsigned int rx_msg_too_big;
+	unsigned int rx_msg_timeouts;
+	unsigned int rx_bad_hdr_len;
+};
+
+struct strp_aggr_stats {
+	unsigned long long rx_msgs;
+	unsigned long long rx_bytes;
+	unsigned int rx_mem_fail;
+	unsigned int rx_need_more_hdr;
+	unsigned int rx_msg_too_big;
+	unsigned int rx_msg_timeouts;
+	unsigned int rx_bad_hdr_len;
+	unsigned int rx_aborts;
+	unsigned int rx_interrupted;
+	unsigned int rx_unrecov_intr;
+};
+
+struct strparser;
+
+/* Callbacks are called with lock held for the attached socket */
+struct strp_callbacks {
+	int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
+	void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+	int (*read_sock_done)(struct strparser *strp, int err);
+	void (*abort_parser)(struct strparser *strp, int err);
+};
+
+struct strp_rx_msg {
+	int full_len;
+	int offset;
+};
+
+static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb)
+{
+	return (struct strp_rx_msg *)((void *)skb->cb +
+		offsetof(struct qdisc_skb_cb, data));
+}
+
+/* Structure for an attached lower socket */
+struct strparser {
+	struct sock *sk;
+
+	u32 rx_stopped : 1;
+	u32 rx_paused : 1;
+	u32 rx_aborted : 1;
+	u32 rx_interrupted : 1;
+	u32 rx_unrecov_intr : 1;
+
+	struct sk_buff **rx_skb_nextp;
+	struct timer_list rx_msg_timer;
+	struct sk_buff *rx_skb_head;
+	unsigned int rx_need_bytes;
+	struct delayed_work rx_delayed_work;
+	struct work_struct rx_work;
+	struct strp_stats stats;
+	struct strp_callbacks cb;
+};
+
+/* Must be called with lock held for attached socket */
+static inline void strp_pause(struct strparser *strp)
+{
+	strp->rx_paused = 1;
+}
+
+/* May be called without holding lock for attached socket */
+void strp_unpause(struct strparser *strp);
+
+static inline void save_strp_stats(struct strparser *strp,
+				   struct strp_aggr_stats *agg_stats)
+{
+	/* Save psock statistics in the mux when psock is being unattached. */
+
+#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat +=		\
+				 strp->stats._stat)
+	SAVE_PSOCK_STATS(rx_msgs);
+	SAVE_PSOCK_STATS(rx_bytes);
+	SAVE_PSOCK_STATS(rx_mem_fail);
+	SAVE_PSOCK_STATS(rx_need_more_hdr);
+	SAVE_PSOCK_STATS(rx_msg_too_big);
+	SAVE_PSOCK_STATS(rx_msg_timeouts);
+	SAVE_PSOCK_STATS(rx_bad_hdr_len);
+#undef SAVE_PSOCK_STATS
+
+	if (strp->rx_aborted)
+		agg_stats->rx_aborts++;
+	if (strp->rx_interrupted)
+		agg_stats->rx_interrupted++;
+	if (strp->rx_unrecov_intr)
+		agg_stats->rx_unrecov_intr++;
+}
+
+static inline void aggregate_strp_stats(struct strp_aggr_stats *stats,
+					struct strp_aggr_stats *agg_stats)
+{
+#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat)
+	SAVE_PSOCK_STATS(rx_msgs);
+	SAVE_PSOCK_STATS(rx_bytes);
+	SAVE_PSOCK_STATS(rx_mem_fail);
+	SAVE_PSOCK_STATS(rx_need_more_hdr);
+	SAVE_PSOCK_STATS(rx_msg_too_big);
+	SAVE_PSOCK_STATS(rx_msg_timeouts);
+	SAVE_PSOCK_STATS(rx_bad_hdr_len);
+	SAVE_PSOCK_STATS(rx_aborts);
+	SAVE_PSOCK_STATS(rx_interrupted);
+	SAVE_PSOCK_STATS(rx_unrecov_intr);
+#undef SAVE_PSOCK_STATS
+
+}
+
+void strp_done(struct strparser *strp);
+void strp_stop(struct strparser *strp);
+void strp_check_rcv(struct strparser *strp);
+int strp_init(struct strparser *strp, struct sock *csk,
+	      struct strp_callbacks *cb);
+void strp_data_ready(struct strparser *strp);
+
+#endif /* __NET_STRPARSER_H_ */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 62f6a96..eba80c4 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -68,7 +68,6 @@
 enum switchdev_obj_id {
 	SWITCHDEV_OBJ_ID_UNDEFINED,
 	SWITCHDEV_OBJ_ID_PORT_VLAN,
-	SWITCHDEV_OBJ_ID_IPV4_FIB,
 	SWITCHDEV_OBJ_ID_PORT_FDB,
 	SWITCHDEV_OBJ_ID_PORT_MDB,
 };
@@ -92,21 +91,6 @@
 #define SWITCHDEV_OBJ_PORT_VLAN(obj) \
 	container_of(obj, struct switchdev_obj_port_vlan, obj)
 
-/* SWITCHDEV_OBJ_ID_IPV4_FIB */
-struct switchdev_obj_ipv4_fib {
-	struct switchdev_obj obj;
-	u32 dst;
-	int dst_len;
-	struct fib_info *fi;
-	u8 tos;
-	u8 type;
-	u32 nlflags;
-	u32 tb_id;
-};
-
-#define SWITCHDEV_OBJ_IPV4_FIB(obj) \
-	container_of(obj, struct switchdev_obj_ipv4_fib, obj)
-
 /* SWITCHDEV_OBJ_ID_PORT_FDB */
 struct switchdev_obj_port_fdb {
 	struct switchdev_obj obj;
@@ -209,11 +193,6 @@
 				  struct nlmsghdr *nlh, u16 flags);
 int switchdev_port_bridge_dellink(struct net_device *dev,
 				  struct nlmsghdr *nlh, u16 flags);
-int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
-			   u8 tos, u8 type, u32 nlflags, u32 tb_id);
-int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
-			   u8 tos, u8 type, u32 tb_id);
-void switchdev_fib_ipv4_abort(struct fib_info *fi);
 int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev, const unsigned char *addr,
 			   u16 vid, u16 nlm_flags);
@@ -222,7 +201,7 @@
 			   u16 vid);
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *dev,
-			    struct net_device *filter_dev, int idx);
+			    struct net_device *filter_dev, int *idx);
 void switchdev_port_fwd_mark_set(struct net_device *dev,
 				 struct net_device *group_dev,
 				 bool joining);
@@ -304,25 +283,6 @@
 	return -EOPNOTSUPP;
 }
 
-static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len,
-					 struct fib_info *fi,
-					 u8 tos, u8 type,
-					 u32 nlflags, u32 tb_id)
-{
-	return 0;
-}
-
-static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len,
-					 struct fib_info *fi,
-					 u8 tos, u8 type, u32 tb_id)
-{
-	return 0;
-}
-
-static inline void switchdev_fib_ipv4_abort(struct fib_info *fi)
-{
-}
-
 static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 					 struct net_device *dev,
 					 const unsigned char *addr,
@@ -342,15 +302,9 @@
 					  struct netlink_callback *cb,
 					  struct net_device *dev,
 					  struct net_device *filter_dev,
-					  int idx)
+					  int *idx)
 {
-       return idx;
-}
-
-static inline void switchdev_port_fwd_mark_set(struct net_device *dev,
-					       struct net_device *group_dev,
-					       bool joining)
-{
+       return *idx;
 }
 
 static inline bool switchdev_port_same_parent_id(struct net_device *a,
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index 5164bd7..9fd2bea0 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -50,9 +50,11 @@
 int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp);
 int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp);
 int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi);
+int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi);
 int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi);
 int ife_validate_meta_u32(void *val, int len);
 int ife_validate_meta_u16(void *val, int len);
+int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi);
 void ife_release_meta_gen(struct tcf_meta_info *mi);
 int register_ife_op(struct tcf_meta_ops *mops);
 int unregister_ife_op(struct tcf_meta_ops *mops);
diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h
new file mode 100644
index 0000000..644a211
--- /dev/null
+++ b/include/net/tc_act/tc_skbmod.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __NET_TC_SKBMOD_H
+#define __NET_TC_SKBMOD_H
+
+#include <net/act_api.h>
+#include <linux/tc_act/tc_skbmod.h>
+
+struct tcf_skbmod_params {
+	struct rcu_head	rcu;
+	u64	flags; /*up to 64 types of operations; extend if needed */
+	u8	eth_dst[ETH_ALEN];
+	u16	eth_type;
+	u8	eth_src[ETH_ALEN];
+};
+
+struct tcf_skbmod {
+	struct tc_action	common;
+	struct tcf_skbmod_params __rcu *skbmod_p;
+};
+#define to_skbmod(a) ((struct tcf_skbmod *)a)
+
+#endif /* __NET_TC_SKBMOD_H */
diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h
new file mode 100644
index 0000000..253f8da
--- /dev/null
+++ b/include/net/tc_act/tc_tunnel_key.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NET_TC_TUNNEL_KEY_H
+#define __NET_TC_TUNNEL_KEY_H
+
+#include <net/act_api.h>
+
+struct tcf_tunnel_key_params {
+	struct rcu_head		rcu;
+	int			tcft_action;
+	int			action;
+	struct metadata_dst     *tcft_enc_metadata;
+};
+
+struct tcf_tunnel_key {
+	struct tc_action	      common;
+	struct tcf_tunnel_key_params __rcu *params;
+};
+
+#define to_tunnel_key(a) ((struct tcf_tunnel_key *)a)
+
+#endif /* __NET_TC_TUNNEL_KEY_H */
diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h
index e29f52e..48cca32 100644
--- a/include/net/tc_act/tc_vlan.h
+++ b/include/net/tc_act/tc_vlan.h
@@ -11,6 +11,7 @@
 #define __NET_TC_VLAN_H
 
 #include <net/act_api.h>
+#include <linux/tc_act/tc_vlan.h>
 
 #define VLAN_F_POP		0x1
 #define VLAN_F_PUSH		0x2
@@ -20,7 +21,32 @@
 	int			tcfv_action;
 	u16			tcfv_push_vid;
 	__be16			tcfv_push_proto;
+	u8			tcfv_push_prio;
 };
 #define to_vlan(a) ((struct tcf_vlan *)a)
 
+static inline bool is_tcf_vlan(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->type == TCA_ACT_VLAN)
+		return true;
+#endif
+	return false;
+}
+
+static inline u32 tcf_vlan_action(const struct tc_action *a)
+{
+	return to_vlan(a)->tcfv_action;
+}
+
+static inline u16 tcf_vlan_push_vid(const struct tc_action *a)
+{
+	return to_vlan(a)->tcfv_push_vid;
+}
+
+static inline __be16 tcf_vlan_push_proto(const struct tc_action *a)
+{
+	return to_vlan(a)->tcfv_push_proto;
+}
+
 #endif /* __NET_TC_VLAN_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c00e7d5..f83b7f2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -227,10 +227,9 @@
 #define	TFO_SERVER_COOKIE_NOT_REQD	0x200
 
 /* Force enable TFO on all listeners, i.e., not requiring the
- * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen.
+ * TCP_FASTOPEN socket option.
  */
 #define	TFO_SERVER_WO_SOCKOPT1	0x400
-#define	TFO_SERVER_WO_SOCKOPT2	0x800
 
 extern struct inet_timewait_death_row tcp_death_row;
 
@@ -534,6 +533,8 @@
 #endif
 /* tcp_output.c */
 
+u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
 bool tcp_may_send_now(struct sock *sk);
@@ -604,8 +605,6 @@
 void tcp_get_info(struct sock *, struct tcp_info *);
 
 /* Read 'sendfile()'-style from a TCP socket */
-typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
-				unsigned int, size_t);
 int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		  sk_read_actor_t recv_actor);
 
@@ -643,7 +642,7 @@
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (skb_queue_empty(&tp->out_of_order_queue) &&
+	if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
 	    tp->rcv_wnd &&
 	    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
 	    !tp->urg_data)
@@ -674,7 +673,7 @@
 /* Minimum RTT in usec. ~0 means not available. */
 static inline u32 tcp_min_rtt(const struct tcp_sock *tp)
 {
-	return tp->rtt_min[0].rtt;
+	return minmax_get(&tp->rtt_min);
 }
 
 /* Compute the actual receive window we are currently advertising.
@@ -766,8 +765,16 @@
 	__u32		ack_seq;	/* Sequence number ACK'd	*/
 	union {
 		struct {
-			/* There is space for up to 20 bytes */
-			__u32 in_flight;/* Bytes in flight when packet sent */
+			/* There is space for up to 24 bytes */
+			__u32 in_flight:30,/* Bytes in flight at transmit */
+			      is_app_limited:1, /* cwnd not fully used? */
+			      unused:1;
+			/* pkts S/ACKed so far upon tx of skb, incl retrans: */
+			__u32 delivered;
+			/* start of send pipeline phase */
+			struct skb_mstamp first_tx_mstamp;
+			/* when we reached the "delivered" count */
+			struct skb_mstamp delivered_mstamp;
 		} tx;   /* only used for outgoing skbs */
 		union {
 			struct inet_skb_parm	h4;
@@ -863,6 +870,27 @@
 	u32 in_flight;
 };
 
+/* A rate sample measures the number of (original/retransmitted) data
+ * packets delivered "delivered" over an interval of time "interval_us".
+ * The tcp_rate.c code fills in the rate sample, and congestion
+ * control modules that define a cong_control function to run at the end
+ * of ACK processing can optionally chose to consult this sample when
+ * setting cwnd and pacing rate.
+ * A sample is invalid if "delivered" or "interval_us" is negative.
+ */
+struct rate_sample {
+	struct	skb_mstamp prior_mstamp; /* starting timestamp for interval */
+	u32  prior_delivered;	/* tp->delivered at "prior_mstamp" */
+	s32  delivered;		/* number of packets delivered over interval */
+	long interval_us;	/* time for tp->delivered to incr "delivered" */
+	long rtt_us;		/* RTT of last (S)ACKed packet (or -1) */
+	int  losses;		/* number of packets marked lost upon ACK */
+	u32  acked_sacked;	/* number of packets newly (S)ACKed upon ACK */
+	u32  prior_in_flight;	/* in flight before this ACK */
+	bool is_app_limited;	/* is sample from packet with bubble in pipe? */
+	bool is_retrans;	/* is sample from retransmission? */
+};
+
 struct tcp_congestion_ops {
 	struct list_head	list;
 	u32 key;
@@ -887,6 +915,14 @@
 	u32  (*undo_cwnd)(struct sock *sk);
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
+	/* suggest number of segments for each skb to transmit (optional) */
+	u32 (*tso_segs_goal)(struct sock *sk);
+	/* returns the multiplier used in tcp_sndbuf_expand (optional) */
+	u32 (*sndbuf_expand)(struct sock *sk);
+	/* call when packets are delivered to update cwnd and pacing rate,
+	 * after all the ca_state processing. (optional)
+	 */
+	void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
 	/* get info for inet_diag (optional) */
 	size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
 			   union tcp_cc_info *info);
@@ -949,6 +985,14 @@
 		icsk->icsk_ca_ops->cwnd_event(sk, event);
 }
 
+/* From tcp_rate.c */
+void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb);
+void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
+			    struct rate_sample *rs);
+void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
+		  struct skb_mstamp *now, struct rate_sample *rs);
+void tcp_rate_check_app_limited(struct sock *sk);
+
 /* These functions determine how the current flow behaves in respect of SACK
  * handling. SACK is negotiated with the peer, and therefore it can vary
  * between different flows.
@@ -1164,6 +1208,7 @@
 }
 
 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
 
 #undef STATE_TRACE
 
@@ -1523,6 +1568,8 @@
 {
 	if (sk->sk_send_head == skb_unlinked)
 		sk->sk_send_head = NULL;
+	if (tcp_sk(sk)->highest_sack == skb_unlinked)
+		tcp_sk(sk)->highest_sack = NULL;
 }
 
 static inline void tcp_init_send_head(struct sock *sk)
@@ -1851,6 +1898,8 @@
 	return answ;
 }
 
+int tcp_peek_len(struct socket *sock);
+
 static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb)
 {
 	u16 segs_in;
diff --git a/include/net/udp.h b/include/net/udp.h
index 8894d71..ea53a87 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -251,6 +251,7 @@
 		 int (*saddr_cmp)(const struct sock *,
 				  const struct sock *));
 void udp_err(struct sk_buff *, u32);
+int udp_abort(struct sock *sk, int err);
 int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
 int udp_push_pending_frames(struct sock *sk);
 void udp_flush_pending_frames(struct sock *sk);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index b96d036..0255613 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -350,24 +350,6 @@
 #endif
 }
 
-static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id)
-{
-#if defined(__BIG_ENDIAN)
-	return (__force __be32)tun_id;
-#else
-	return (__force __be32)((__force u64)tun_id >> 32);
-#endif
-}
-
-static inline __be64 vxlan_vni_to_tun_id(__be32 vni)
-{
-#if defined(__BIG_ENDIAN)
-	return (__force __be64)vni;
-#else
-	return (__force __be64)((u64)(__force u32)vni << 32);
-#endif
-}
-
 static inline size_t vxlan_rco_start(__be32 vni_field)
 {
 	return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index adfebd6..31947b9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -187,7 +187,7 @@
 	struct xfrm_replay_state_esn *preplay_esn;
 
 	/* The functions for replay detection. */
-	struct xfrm_replay	*repl;
+	const struct xfrm_replay *repl;
 
 	/* internal flag that only holds state for delayed aevent at the
 	 * moment
@@ -1540,8 +1540,10 @@
 void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
 int xfrm6_extract_header(struct sk_buff *skb);
 int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi);
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+		  struct ip6_tnl *t);
 int xfrm6_transport_finish(struct sk_buff *skb, int async);
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
 int xfrm6_rcv(struct sk_buff *skb);
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 		     xfrm_address_t *saddr, u8 proto);
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
new file mode 100644
index 0000000..408439f
--- /dev/null
+++ b/include/rdma/ib_hdrs.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef IB_HDRS_H
+#define IB_HDRS_H
+
+#include <linux/types.h>
+#include <asm/unaligned.h>
+#include <rdma/ib_verbs.h>
+
+#define IB_SEQ_NAK	(3 << 29)
+
+/* AETH NAK opcode values */
+#define IB_RNR_NAK                      0x20
+#define IB_NAK_PSN_ERROR                0x60
+#define IB_NAK_INVALID_REQUEST          0x61
+#define IB_NAK_REMOTE_ACCESS_ERROR      0x62
+#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
+#define IB_NAK_INVALID_RD_REQUEST       0x64
+
+#define IB_BTH_REQ_ACK		BIT(31)
+#define IB_BTH_SOLICITED	BIT(23)
+#define IB_BTH_MIG_REQ		BIT(22)
+
+#define IB_GRH_VERSION		6
+#define IB_GRH_VERSION_MASK	0xF
+#define IB_GRH_VERSION_SHIFT	28
+#define IB_GRH_TCLASS_MASK	0xFF
+#define IB_GRH_TCLASS_SHIFT	20
+#define IB_GRH_FLOW_MASK	0xFFFFF
+#define IB_GRH_FLOW_SHIFT	0
+#define IB_GRH_NEXT_HDR		0x1B
+
+struct ib_reth {
+	__be64 vaddr;        /* potentially unaligned */
+	__be32 rkey;
+	__be32 length;
+} __packed;
+
+struct ib_atomic_eth {
+	__be64 vaddr;        /* potentially unaligned */
+	__be32 rkey;
+	__be64 swap_data;    /* potentially unaligned */
+	__be64 compare_data; /* potentially unaligned */
+} __packed;
+
+union ib_ehdrs {
+	struct {
+		__be32 deth[2];
+		__be32 imm_data;
+	} ud;
+	struct {
+		struct ib_reth reth;
+		__be32 imm_data;
+	} rc;
+	struct {
+		__be32 aeth;
+		__be64 atomic_ack_eth; /* potentially unaligned */
+	} __packed at;
+	__be32 imm_data;
+	__be32 aeth;
+	__be32 ieth;
+	struct ib_atomic_eth atomic_eth;
+}  __packed;
+
+struct ib_other_headers {
+	__be32 bth[3];
+	union ib_ehdrs u;
+} __packed;
+
+struct ib_header {
+	__be16 lrh[4];
+	union {
+		struct {
+			struct ib_grh grh;
+			struct ib_other_headers oth;
+		} l;
+		struct ib_other_headers oth;
+	} u;
+} __packed;
+
+/* accessors for unaligned __be64 items */
+
+static inline u64 ib_u64_get(__be64 *p)
+{
+	return get_unaligned_be64(p);
+}
+
+static inline void ib_u64_put(u64 val, __be64 *p)
+{
+	put_unaligned_be64(val, p);
+}
+
+static inline u64 get_ib_reth_vaddr(struct ib_reth *reth)
+{
+	return ib_u64_get(&reth->vaddr);
+}
+
+static inline void put_ib_reth_vaddr(u64 val, struct ib_reth *reth)
+{
+	ib_u64_put(val, &reth->vaddr);
+}
+
+static inline u64 get_ib_ateth_vaddr(struct ib_atomic_eth *ateth)
+{
+	return ib_u64_get(&ateth->vaddr);
+}
+
+static inline void put_ib_ateth_vaddr(u64 val, struct ib_atomic_eth *ateth)
+{
+	ib_u64_put(val, &ateth->vaddr);
+}
+
+static inline u64 get_ib_ateth_swap(struct ib_atomic_eth *ateth)
+{
+	return ib_u64_get(&ateth->swap_data);
+}
+
+static inline void put_ib_ateth_swap(u64 val, struct ib_atomic_eth *ateth)
+{
+	ib_u64_put(val, &ateth->swap_data);
+}
+
+static inline u64 get_ib_ateth_compare(struct ib_atomic_eth *ateth)
+{
+	return ib_u64_get(&ateth->compare_data);
+}
+
+static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth)
+{
+	ib_u64_put(val, &ateth->compare_data);
+}
+
+#endif                          /* IB_HDRS_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8e90dd2..e1f9673 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2115,22 +2115,17 @@
 				       size_t len)
 {
 	const void __user *p = udata->inbuf + offset;
-	bool ret = false;
+	bool ret;
 	u8 *buf;
 
 	if (len > USHRT_MAX)
 		return false;
 
-	buf = kmalloc(len, GFP_KERNEL);
-	if (!buf)
+	buf = memdup_user(p, len);
+	if (IS_ERR(buf))
 		return false;
 
-	if (copy_from_user(buf, p, len))
-		goto free;
-
 	ret = !memchr_inv(buf, 0, len);
-
-free:
 	kfree(buf);
 	return ret;
 }
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index bd34d0b..2c5183e 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -466,6 +466,25 @@
 }
 
 /**
+ * rvt_get_qp - get a QP reference
+ * @qp - the QP to hold
+ */
+static inline void rvt_get_qp(struct rvt_qp *qp)
+{
+	atomic_inc(&qp->refcount);
+}
+
+/**
+ * rvt_put_qp - release a QP reference
+ * @qp - the QP to release
+ */
+static inline void rvt_put_qp(struct rvt_qp *qp)
+{
+	if (qp && atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+/**
  * rvt_qp_wqe_reserve - reserve operation
  * @qp - the rvt qp
  * @wqe - the send wqe
diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h
index b201744..703a64b 100644
--- a/include/rxrpc/packet.h
+++ b/include/rxrpc/packet.h
@@ -24,6 +24,7 @@
  */
 struct rxrpc_wire_header {
 	__be32		epoch;		/* client boot timestamp */
+#define RXRPC_RANDOM_EPOCH	0x80000000	/* Random if set, date-based if not */
 
 	__be32		cid;		/* connection and channel ID */
 #define RXRPC_MAXCALLS		4			/* max active calls per conn */
@@ -33,8 +34,6 @@
 #define RXRPC_CID_INC		(1 << RXRPC_CIDSHIFT)	/* connection ID increment */
 
 	__be32		callNumber;	/* call ID (0 for connection-level packets) */
-#define RXRPC_PROCESS_MAXCALLS	(1<<2)	/* maximum number of active calls per conn (power of 2) */
-
 	__be32		seq;		/* sequence number of pkt in call stream */
 	__be32		serial;		/* serial number of pkt sent to network */
 
@@ -92,10 +91,14 @@
 struct rxrpc_jumbo_header {
 	uint8_t		flags;		/* packet flags (as per rxrpc_header) */
 	uint8_t		pad;
-	__be16		_rsvd;		/* reserved (used by kerberos security as cksum) */
+	union {
+		__be16	_rsvd;		/* reserved */
+		__be16	cksum;		/* kerberos security checksum */
+	};
 };
 
 #define RXRPC_JUMBO_DATALEN	1412	/* non-terminal jumbo packet data length */
+#define RXRPC_JUMBO_SUBPKTLEN	(RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header))
 
 /*****************************************************************************/
 /*
@@ -120,6 +123,7 @@
 #define RXRPC_ACK_PING_RESPONSE		7	/* response to RXRPC_ACK_PING */
 #define RXRPC_ACK_DELAY			8	/* nothing happened since received packet */
 #define RXRPC_ACK_IDLE			9	/* ACK due to fully received ACK window */
+#define RXRPC_ACK__INVALID		10	/* Representation of invalid ACK reason */
 
 	uint8_t		nAcks;		/* number of ACKs */
 #define RXRPC_MAXACKS	255
@@ -130,6 +134,13 @@
 
 } __packed;
 
+/* Some ACKs refer to specific packets and some are general and can be updated. */
+#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED)	|	\
+			      (1 << RXRPC_ACK_PING_RESPONSE)	|	\
+			      (1 << RXRPC_ACK_DELAY)		|	\
+			      (1 << RXRPC_ACK_IDLE))
+
+
 /*
  * ACK packets can have a further piece of information tagged on the end
  */
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 0dee7af..7e4cd53 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -771,12 +771,9 @@
 		shost->tmf_in_progress;
 }
 
-extern bool scsi_use_blk_mq;
-
 static inline bool shost_use_blk_mq(struct Scsi_Host *shost)
 {
-	return scsi_use_blk_mq;
-
+	return shost->use_blk_mq;
 }
 
 extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index 13c0b2b..73d8709 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -11,12 +11,12 @@
 struct request;
 
 #if !IS_ENABLED(CONFIG_SCSI_SAS_ATTRS)
-static inline int is_sas_attached(struct scsi_device *sdev)
+static inline int scsi_is_sas_rphy(const struct device *sdev)
 {
 	return 0;
 }
 #else
-extern int is_sas_attached(struct scsi_device *sdev);
+extern int scsi_is_sas_rphy(const struct device *);
 #endif
 
 static inline int sas_protocol_ata(enum sas_protocol proto)
@@ -202,7 +202,6 @@
 extern void sas_rphy_remove(struct sas_rphy *);
 extern void sas_rphy_delete(struct sas_rphy *);
 extern void sas_rphy_unlink(struct sas_rphy *);
-extern int scsi_is_sas_rphy(const struct device *);
 
 struct sas_port *sas_port_alloc(struct device *, int);
 struct sas_port *sas_port_alloc_num(struct device *);
diff --git a/include/soc/at91/atmel-sfr.h b/include/soc/at91/atmel-sfr.h
index 2f9bb98..506ea8f 100644
--- a/include/soc/at91/atmel-sfr.h
+++ b/include/soc/at91/atmel-sfr.h
@@ -13,6 +13,20 @@
 #ifndef _LINUX_MFD_SYSCON_ATMEL_SFR_H
 #define _LINUX_MFD_SYSCON_ATMEL_SFR_H
 
+#define AT91_SFR_DDRCFG		0x04	/* DDR Configuration Register */
+/* 0x08 ~ 0x0c: Reserved */
+#define AT91_SFR_OHCIICR	0x10	/* OHCI INT Configuration Register */
+#define AT91_SFR_OHCIISR	0x14	/* OHCI INT Status Register */
 #define AT91_SFR_I2SCLKSEL	0x90	/* I2SC Register */
 
+/* Field definitions */
+#define AT91_OHCIICR_SUSPEND_A	BIT(8)
+#define AT91_OHCIICR_SUSPEND_B	BIT(9)
+#define AT91_OHCIICR_SUSPEND_C	BIT(10)
+
+#define AT91_OHCIICR_USB_SUSPEND	(AT91_OHCIICR_SUSPEND_A | \
+					 AT91_OHCIICR_SUSPEND_B | \
+					 AT91_OHCIICR_SUSPEND_C)
+
+
 #endif /* _LINUX_MFD_SYSCON_ATMEL_SFR_H */
diff --git a/include/soc/rockchip/rockchip_sip.h b/include/soc/rockchip/rockchip_sip.h
new file mode 100644
index 0000000..7e28092
--- /dev/null
+++ b/include/soc/rockchip/rockchip_sip.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd
+ * Author: Lin Huang <hl@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#ifndef __SOC_ROCKCHIP_SIP_H
+#define __SOC_ROCKCHIP_SIP_H
+
+#define ROCKCHIP_SIP_DRAM_FREQ			0x82000008
+#define ROCKCHIP_SIP_CONFIG_DRAM_INIT		0x00
+#define ROCKCHIP_SIP_CONFIG_DRAM_SET_RATE	0x01
+#define ROCKCHIP_SIP_CONFIG_DRAM_ROUND_RATE	0x02
+#define ROCKCHIP_SIP_CONFIG_DRAM_SET_AT_SR	0x03
+#define ROCKCHIP_SIP_CONFIG_DRAM_GET_BW		0x04
+#define ROCKCHIP_SIP_CONFIG_DRAM_GET_RATE	0x05
+#define ROCKCHIP_SIP_CONFIG_DRAM_CLR_IRQ	0x06
+#define ROCKCHIP_SIP_CONFIG_DRAM_SET_PARAM	0x07
+
+#endif
diff --git a/include/trace/events/cpuhp.h b/include/trace/events/cpuhp.h
index a72bd93..996953d 100644
--- a/include/trace/events/cpuhp.h
+++ b/include/trace/events/cpuhp.h
@@ -33,6 +33,34 @@
 		  __entry->cpu, __entry->target, __entry->idx, __entry->fun)
 );
 
+TRACE_EVENT(cpuhp_multi_enter,
+
+	TP_PROTO(unsigned int cpu,
+		 int target,
+		 int idx,
+		 int (*fun)(unsigned int, struct hlist_node *),
+		 struct hlist_node *node),
+
+	TP_ARGS(cpu, target, idx, fun, node),
+
+	TP_STRUCT__entry(
+		__field( unsigned int,	cpu		)
+		__field( int,		target		)
+		__field( int,		idx		)
+		__field( void *,	fun		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu	= cpu;
+		__entry->target	= target;
+		__entry->idx	= idx;
+		__entry->fun	= fun;
+	),
+
+	TP_printk("cpu: %04u target: %3d step: %3d (%pf)",
+		  __entry->cpu, __entry->target, __entry->idx, __entry->fun)
+);
+
 TRACE_EVENT(cpuhp_exit,
 
 	TP_PROTO(unsigned int cpu,
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h
index 4cbbcef..70f0214 100644
--- a/include/trace/events/mce.h
+++ b/include/trace/events/mce.h
@@ -20,6 +20,8 @@
 		__field(	u64,		status		)
 		__field(	u64,		addr		)
 		__field(	u64,		misc		)
+		__field(	u64,		synd		)
+		__field(	u64,		ipid		)
 		__field(	u64,		ip		)
 		__field(	u64,		tsc		)
 		__field(	u64,		walltime	)
@@ -38,6 +40,8 @@
 		__entry->status		= m->status;
 		__entry->addr		= m->addr;
 		__entry->misc		= m->misc;
+		__entry->synd		= m->synd;
+		__entry->ipid		= m->ipid;
 		__entry->ip		= m->ip;
 		__entry->tsc		= m->tsc;
 		__entry->walltime	= m->time;
@@ -50,11 +54,12 @@
 		__entry->cpuvendor	= m->cpuvendor;
 	),
 
-	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
+	TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
 		__entry->cpu,
 		__entry->mcgcap, __entry->mcgstatus,
 		__entry->bank, __entry->status,
-		__entry->addr, __entry->misc,
+		__entry->ipid,
+		__entry->addr, __entry->misc, __entry->synd,
 		__entry->cs, __entry->ip,
 		__entry->tsc,
 		__entry->cpuvendor, __entry->cpuid,
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 19e5030..54e3aad 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -69,7 +69,8 @@
 		u64 mperf,
 		u64 aperf,
 		u64 tsc,
-		u32 freq
+		u32 freq,
+		u32 io_boost
 		),
 
 	TP_ARGS(core_busy,
@@ -79,7 +80,8 @@
 		mperf,
 		aperf,
 		tsc,
-		freq
+		freq,
+		io_boost
 		),
 
 	TP_STRUCT__entry(
@@ -91,6 +93,7 @@
 		__field(u64, aperf)
 		__field(u64, tsc)
 		__field(u32, freq)
+		__field(u32, io_boost)
 		),
 
 	TP_fast_assign(
@@ -102,9 +105,10 @@
 		__entry->aperf = aperf;
 		__entry->tsc = tsc;
 		__entry->freq = freq;
+		__entry->io_boost = io_boost;
 		),
 
-	TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ",
+	TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu",
 		(unsigned long)__entry->core_busy,
 		(unsigned long)__entry->scaled_busy,
 		(unsigned long)__entry->from,
@@ -112,7 +116,8 @@
 		(unsigned long long)__entry->mperf,
 		(unsigned long long)__entry->aperf,
 		(unsigned long long)__entry->tsc,
-		(unsigned long)__entry->freq
+		(unsigned long)__entry->freq,
+		(unsigned long)__entry->io_boost
 		)
 
 );
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
new file mode 100644
index 0000000..0383e5e
--- /dev/null
+++ b/include/trace/events/rxrpc.h
@@ -0,0 +1,625 @@
+/* AF_RXRPC tracepoints
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rxrpc
+
+#if !defined(_TRACE_RXRPC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RXRPC_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(rxrpc_conn,
+	    TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op,
+		     int usage, const void *where),
+
+	    TP_ARGS(conn, op, usage, where),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_connection *,	conn		)
+		    __field(int,			op		)
+		    __field(int,			usage		)
+		    __field(const void *,		where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->conn = conn;
+		    __entry->op = op;
+		    __entry->usage = usage;
+		    __entry->where = where;
+			   ),
+
+	    TP_printk("C=%p %s u=%d sp=%pSR",
+		      __entry->conn,
+		      rxrpc_conn_traces[__entry->op],
+		      __entry->usage,
+		      __entry->where)
+	    );
+
+TRACE_EVENT(rxrpc_client,
+	    TP_PROTO(struct rxrpc_connection *conn, int channel,
+		     enum rxrpc_client_trace op),
+
+	    TP_ARGS(conn, channel, op),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_connection *,	conn		)
+		    __field(u32,			cid		)
+		    __field(int,			channel		)
+		    __field(int,			usage		)
+		    __field(enum rxrpc_client_trace,	op		)
+		    __field(enum rxrpc_conn_cache_state, cs		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->conn = conn;
+		    __entry->channel = channel;
+		    __entry->usage = atomic_read(&conn->usage);
+		    __entry->op = op;
+		    __entry->cid = conn->proto.cid;
+		    __entry->cs = conn->cache_state;
+			   ),
+
+	    TP_printk("C=%p h=%2d %s %s i=%08x u=%d",
+		      __entry->conn,
+		      __entry->channel,
+		      rxrpc_client_traces[__entry->op],
+		      rxrpc_conn_cache_states[__entry->cs],
+		      __entry->cid,
+		      __entry->usage)
+	    );
+
+TRACE_EVENT(rxrpc_call,
+	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op,
+		     int usage, const void *where, const void *aux),
+
+	    TP_ARGS(call, op, usage, where, aux),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(int,			op		)
+		    __field(int,			usage		)
+		    __field(const void *,		where		)
+		    __field(const void *,		aux		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->op = op;
+		    __entry->usage = usage;
+		    __entry->where = where;
+		    __entry->aux = aux;
+			   ),
+
+	    TP_printk("c=%p %s u=%d sp=%pSR a=%p",
+		      __entry->call,
+		      rxrpc_call_traces[__entry->op],
+		      __entry->usage,
+		      __entry->where,
+		      __entry->aux)
+	    );
+
+TRACE_EVENT(rxrpc_skb,
+	    TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op,
+		     int usage, int mod_count, const void *where),
+
+	    TP_ARGS(skb, op, usage, mod_count, where),
+
+	    TP_STRUCT__entry(
+		    __field(struct sk_buff *,		skb		)
+		    __field(enum rxrpc_skb_trace,	op		)
+		    __field(int,			usage		)
+		    __field(int,			mod_count	)
+		    __field(const void *,		where		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->skb = skb;
+		    __entry->op = op;
+		    __entry->usage = usage;
+		    __entry->mod_count = mod_count;
+		    __entry->where = where;
+			   ),
+
+	    TP_printk("s=%p %s u=%d m=%d p=%pSR",
+		      __entry->skb,
+		      rxrpc_skb_traces[__entry->op],
+		      __entry->usage,
+		      __entry->mod_count,
+		      __entry->where)
+	    );
+
+TRACE_EVENT(rxrpc_rx_packet,
+	    TP_PROTO(struct rxrpc_skb_priv *sp),
+
+	    TP_ARGS(sp),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct rxrpc_host_header,	hdr		)
+			     ),
+
+	    TP_fast_assign(
+		    memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr));
+			   ),
+
+	    TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s",
+		      __entry->hdr.epoch, __entry->hdr.cid,
+		      __entry->hdr.callNumber, __entry->hdr.serviceId,
+		      __entry->hdr.serial, __entry->hdr.seq,
+		      __entry->hdr.type, __entry->hdr.flags,
+		      __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK")
+	    );
+
+TRACE_EVENT(rxrpc_rx_done,
+	    TP_PROTO(int result, int abort_code),
+
+	    TP_ARGS(result, abort_code),
+
+	    TP_STRUCT__entry(
+		    __field(int,			result		)
+		    __field(int,			abort_code	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->result = result;
+		    __entry->abort_code = abort_code;
+			   ),
+
+	    TP_printk("r=%d a=%d", __entry->result, __entry->abort_code)
+	    );
+
+TRACE_EVENT(rxrpc_abort,
+	    TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq,
+		     int abort_code, int error),
+
+	    TP_ARGS(why, cid, call_id, seq, abort_code, error),
+
+	    TP_STRUCT__entry(
+		    __array(char,			why, 4		)
+		    __field(u32,			cid		)
+		    __field(u32,			call_id		)
+		    __field(rxrpc_seq_t,		seq		)
+		    __field(int,			abort_code	)
+		    __field(int,			error		)
+			     ),
+
+	    TP_fast_assign(
+		    memcpy(__entry->why, why, 4);
+		    __entry->cid = cid;
+		    __entry->call_id = call_id;
+		    __entry->abort_code = abort_code;
+		    __entry->error = error;
+		    __entry->seq = seq;
+			   ),
+
+	    TP_printk("%08x:%08x s=%u a=%d e=%d %s",
+		      __entry->cid, __entry->call_id, __entry->seq,
+		      __entry->abort_code, __entry->error, __entry->why)
+	    );
+
+TRACE_EVENT(rxrpc_transmit,
+	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why),
+
+	    TP_ARGS(call, why),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(enum rxrpc_transmit_trace,	why		)
+		    __field(rxrpc_seq_t,		tx_hard_ack	)
+		    __field(rxrpc_seq_t,		tx_top		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->why = why;
+		    __entry->tx_hard_ack = call->tx_hard_ack;
+		    __entry->tx_top = call->tx_top;
+			   ),
+
+	    TP_printk("c=%p %s f=%08x n=%u",
+		      __entry->call,
+		      rxrpc_transmit_traces[__entry->why],
+		      __entry->tx_hard_ack + 1,
+		      __entry->tx_top - __entry->tx_hard_ack)
+	    );
+
+TRACE_EVENT(rxrpc_rx_ack,
+	    TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, u8 reason, u8 n_acks),
+
+	    TP_ARGS(call, first, reason, n_acks),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(rxrpc_seq_t,		first		)
+		    __field(u8,				reason		)
+		    __field(u8,				n_acks		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->first = first;
+		    __entry->reason = reason;
+		    __entry->n_acks = n_acks;
+			   ),
+
+	    TP_printk("c=%p %s f=%08x n=%u",
+		      __entry->call,
+		      rxrpc_ack_names[__entry->reason],
+		      __entry->first,
+		      __entry->n_acks)
+	    );
+
+TRACE_EVENT(rxrpc_tx_data,
+	    TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
+		     rxrpc_serial_t serial, u8 flags, bool retrans, bool lose),
+
+	    TP_ARGS(call, seq, serial, flags, retrans, lose),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(rxrpc_seq_t,		seq		)
+		    __field(rxrpc_serial_t,		serial		)
+		    __field(u8,				flags		)
+		    __field(bool,			retrans		)
+		    __field(bool,			lose		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->seq = seq;
+		    __entry->serial = serial;
+		    __entry->flags = flags;
+		    __entry->retrans = retrans;
+		    __entry->lose = lose;
+			   ),
+
+	    TP_printk("c=%p DATA %08x q=%08x fl=%02x%s%s",
+		      __entry->call,
+		      __entry->serial,
+		      __entry->seq,
+		      __entry->flags,
+		      __entry->retrans ? " *RETRANS*" : "",
+		      __entry->lose ? " *LOSE*" : "")
+	    );
+
+TRACE_EVENT(rxrpc_tx_ack,
+	    TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial,
+		     rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial,
+		     u8 reason, u8 n_acks),
+
+	    TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(rxrpc_serial_t,		serial		)
+		    __field(rxrpc_seq_t,		ack_first	)
+		    __field(rxrpc_serial_t,		ack_serial	)
+		    __field(u8,				reason		)
+		    __field(u8,				n_acks		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->serial = serial;
+		    __entry->ack_first = ack_first;
+		    __entry->ack_serial = ack_serial;
+		    __entry->reason = reason;
+		    __entry->n_acks = n_acks;
+			   ),
+
+	    TP_printk(" c=%p ACK  %08x %s f=%08x r=%08x n=%u",
+		      __entry->call,
+		      __entry->serial,
+		      rxrpc_ack_names[__entry->reason],
+		      __entry->ack_first,
+		      __entry->ack_serial,
+		      __entry->n_acks)
+	    );
+
+TRACE_EVENT(rxrpc_receive,
+	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_receive_trace why,
+		     rxrpc_serial_t serial, rxrpc_seq_t seq),
+
+	    TP_ARGS(call, why, serial, seq),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(enum rxrpc_receive_trace,	why		)
+		    __field(rxrpc_serial_t,		serial		)
+		    __field(rxrpc_seq_t,		seq		)
+		    __field(rxrpc_seq_t,		hard_ack	)
+		    __field(rxrpc_seq_t,		top		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->why = why;
+		    __entry->serial = serial;
+		    __entry->seq = seq;
+		    __entry->hard_ack = call->rx_hard_ack;
+		    __entry->top = call->rx_top;
+			   ),
+
+	    TP_printk("c=%p %s r=%08x q=%08x w=%08x-%08x",
+		      __entry->call,
+		      rxrpc_receive_traces[__entry->why],
+		      __entry->serial,
+		      __entry->seq,
+		      __entry->hard_ack,
+		      __entry->top)
+	    );
+
+TRACE_EVENT(rxrpc_recvmsg,
+	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why,
+		     rxrpc_seq_t seq, unsigned int offset, unsigned int len,
+		     int ret),
+
+	    TP_ARGS(call, why, seq, offset, len, ret),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(enum rxrpc_recvmsg_trace,	why		)
+		    __field(rxrpc_seq_t,		seq		)
+		    __field(unsigned int,		offset		)
+		    __field(unsigned int,		len		)
+		    __field(int,			ret		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->why = why;
+		    __entry->seq = seq;
+		    __entry->offset = offset;
+		    __entry->len = len;
+		    __entry->ret = ret;
+			   ),
+
+	    TP_printk("c=%p %s q=%08x o=%u l=%u ret=%d",
+		      __entry->call,
+		      rxrpc_recvmsg_traces[__entry->why],
+		      __entry->seq,
+		      __entry->offset,
+		      __entry->len,
+		      __entry->ret)
+	    );
+
+TRACE_EVENT(rxrpc_rtt_tx,
+	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_tx_trace why,
+		     rxrpc_serial_t send_serial),
+
+	    TP_ARGS(call, why, send_serial),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(enum rxrpc_rtt_tx_trace,	why		)
+		    __field(rxrpc_serial_t,		send_serial	)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->why = why;
+		    __entry->send_serial = send_serial;
+			   ),
+
+	    TP_printk("c=%p %s sr=%08x",
+		      __entry->call,
+		      rxrpc_rtt_tx_traces[__entry->why],
+		      __entry->send_serial)
+	    );
+
+TRACE_EVENT(rxrpc_rtt_rx,
+	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+		     rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+		     s64 rtt, u8 nr, s64 avg),
+
+	    TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(enum rxrpc_rtt_rx_trace,	why		)
+		    __field(u8,				nr		)
+		    __field(rxrpc_serial_t,		send_serial	)
+		    __field(rxrpc_serial_t,		resp_serial	)
+		    __field(s64,			rtt		)
+		    __field(u64,			avg		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->why = why;
+		    __entry->send_serial = send_serial;
+		    __entry->resp_serial = resp_serial;
+		    __entry->rtt = rtt;
+		    __entry->nr = nr;
+		    __entry->avg = avg;
+			   ),
+
+	    TP_printk("c=%p %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld",
+		      __entry->call,
+		      rxrpc_rtt_rx_traces[__entry->why],
+		      __entry->send_serial,
+		      __entry->resp_serial,
+		      __entry->rtt,
+		      __entry->nr,
+		      __entry->avg)
+	    );
+
+TRACE_EVENT(rxrpc_timer,
+	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why,
+		     ktime_t now, unsigned long now_j),
+
+	    TP_ARGS(call, why, now, now_j),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,		call		)
+		    __field(enum rxrpc_timer_trace,		why		)
+		    __field_struct(ktime_t,			now		)
+		    __field_struct(ktime_t,			expire_at	)
+		    __field_struct(ktime_t,			ack_at		)
+		    __field_struct(ktime_t,			resend_at	)
+		    __field(unsigned long,			now_j		)
+		    __field(unsigned long,			timer		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call	= call;
+		    __entry->why	= why;
+		    __entry->now	= now;
+		    __entry->expire_at	= call->expire_at;
+		    __entry->ack_at	= call->ack_at;
+		    __entry->resend_at	= call->resend_at;
+		    __entry->now_j	= now_j;
+		    __entry->timer	= call->timer.expires;
+			   ),
+
+	    TP_printk("c=%p %s x=%lld a=%lld r=%lld t=%ld",
+		      __entry->call,
+		      rxrpc_timer_traces[__entry->why],
+		      ktime_to_ns(ktime_sub(__entry->expire_at, __entry->now)),
+		      ktime_to_ns(ktime_sub(__entry->ack_at, __entry->now)),
+		      ktime_to_ns(ktime_sub(__entry->resend_at, __entry->now)),
+		      __entry->timer - __entry->now_j)
+	    );
+
+TRACE_EVENT(rxrpc_rx_lose,
+	    TP_PROTO(struct rxrpc_skb_priv *sp),
+
+	    TP_ARGS(sp),
+
+	    TP_STRUCT__entry(
+		    __field_struct(struct rxrpc_host_header,	hdr		)
+			     ),
+
+	    TP_fast_assign(
+		    memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr));
+			   ),
+
+	    TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s *LOSE*",
+		      __entry->hdr.epoch, __entry->hdr.cid,
+		      __entry->hdr.callNumber, __entry->hdr.serviceId,
+		      __entry->hdr.serial, __entry->hdr.seq,
+		      __entry->hdr.type, __entry->hdr.flags,
+		      __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK")
+	    );
+
+TRACE_EVENT(rxrpc_propose_ack,
+	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why,
+		     u8 ack_reason, rxrpc_serial_t serial, bool immediate,
+		     bool background, enum rxrpc_propose_ack_outcome outcome),
+
+	    TP_ARGS(call, why, ack_reason, serial, immediate, background,
+		    outcome),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,		call		)
+		    __field(enum rxrpc_propose_ack_trace,	why		)
+		    __field(rxrpc_serial_t,			serial		)
+		    __field(u8,					ack_reason	)
+		    __field(bool,				immediate	)
+		    __field(bool,				background	)
+		    __field(enum rxrpc_propose_ack_outcome,	outcome		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call	= call;
+		    __entry->why	= why;
+		    __entry->serial	= serial;
+		    __entry->ack_reason	= ack_reason;
+		    __entry->immediate	= immediate;
+		    __entry->background	= background;
+		    __entry->outcome	= outcome;
+			   ),
+
+	    TP_printk("c=%p %s %s r=%08x i=%u b=%u%s",
+		      __entry->call,
+		      rxrpc_propose_ack_traces[__entry->why],
+		      rxrpc_ack_names[__entry->ack_reason],
+		      __entry->serial,
+		      __entry->immediate,
+		      __entry->background,
+		      rxrpc_propose_ack_outcomes[__entry->outcome])
+	    );
+
+TRACE_EVENT(rxrpc_retransmit,
+	    TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, u8 annotation,
+		     s64 expiry),
+
+	    TP_ARGS(call, seq, annotation, expiry),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,	call		)
+		    __field(rxrpc_seq_t,		seq		)
+		    __field(u8,				annotation	)
+		    __field(s64,			expiry		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call = call;
+		    __entry->seq = seq;
+		    __entry->annotation = annotation;
+		    __entry->expiry = expiry;
+			   ),
+
+	    TP_printk("c=%p q=%x a=%02x xp=%lld",
+		      __entry->call,
+		      __entry->seq,
+		      __entry->annotation,
+		      __entry->expiry)
+	    );
+
+TRACE_EVENT(rxrpc_congest,
+	    TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary,
+		     rxrpc_serial_t ack_serial, enum rxrpc_congest_change change),
+
+	    TP_ARGS(call, summary, ack_serial, change),
+
+	    TP_STRUCT__entry(
+		    __field(struct rxrpc_call *,		call		)
+		    __field(enum rxrpc_congest_change,		change		)
+		    __field(rxrpc_seq_t,			hard_ack	)
+		    __field(rxrpc_seq_t,			top		)
+		    __field(rxrpc_seq_t,			lowest_nak	)
+		    __field(rxrpc_serial_t,			ack_serial	)
+		    __field_struct(struct rxrpc_ack_summary,	sum		)
+			     ),
+
+	    TP_fast_assign(
+		    __entry->call	= call;
+		    __entry->change	= change;
+		    __entry->hard_ack	= call->tx_hard_ack;
+		    __entry->top	= call->tx_top;
+		    __entry->lowest_nak	= call->acks_lowest_nak;
+		    __entry->ack_serial	= ack_serial;
+		    memcpy(&__entry->sum, summary, sizeof(__entry->sum));
+			   ),
+
+	    TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s",
+		      __entry->call,
+		      __entry->ack_serial,
+		      rxrpc_ack_names[__entry->sum.ack_reason],
+		      __entry->hard_ack,
+		      rxrpc_congest_modes[__entry->sum.mode],
+		      __entry->sum.cwnd,
+		      __entry->sum.ssthresh,
+		      __entry->sum.nr_acks, __entry->sum.nr_nacks,
+		      __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks,
+		      __entry->sum.nr_rot_new_acks,
+		      __entry->top - __entry->hard_ack,
+		      __entry->sum.cumulative_acks,
+		      __entry->sum.dup_acks,
+		      __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "",
+		      rxrpc_congest_changes[__entry->change],
+		      __entry->sum.retrans_timeo ? " rTxTo" : "")
+	    );
+
+#endif /* _TRACE_RXRPC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 5144013..28c5da6 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -330,24 +330,32 @@
 #ifdef CONFIG_NO_HZ_COMMON
 
 #define TICK_DEP_NAMES					\
-		tick_dep_name(NONE)			\
+		tick_dep_mask_name(NONE)		\
 		tick_dep_name(POSIX_TIMER)		\
 		tick_dep_name(PERF_EVENTS)		\
 		tick_dep_name(SCHED)			\
 		tick_dep_name_end(CLOCK_UNSTABLE)
 
 #undef tick_dep_name
+#undef tick_dep_mask_name
 #undef tick_dep_name_end
 
-#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
-#define tick_dep_name_end(sdep)  TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+/* The MASK will convert to their bits and they need to be processed too */
+#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
+	TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+#define tick_dep_name_end(sdep)  TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \
+	TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
+/* NONE only has a mask defined for it */
+#define tick_dep_mask_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
 
 TICK_DEP_NAMES
 
 #undef tick_dep_name
+#undef tick_dep_mask_name
 #undef tick_dep_name_end
 
 #define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
+#define tick_dep_mask_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
 #define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
 
 #define show_tick_dep_name(val)				\
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 185f8ea..d0352a9 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -71,6 +71,7 @@
 header-y += blkpg.h
 header-y += blktrace_api.h
 header-y += bpf_common.h
+header-y += bpf_perf_event.h
 header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
diff --git a/include/uapi/linux/atm_zatm.h b/include/uapi/linux/atm_zatm.h
index 9c9c6ad..5cd4d4d 100644
--- a/include/uapi/linux/atm_zatm.h
+++ b/include/uapi/linux/atm_zatm.h
@@ -14,6 +14,7 @@
 
 #include <linux/atmapi.h>
 #include <linux/atmioc.h>
+#include <linux/time.h>
 
 #define ZATM_GETPOOL	_IOW('a',ATMIOC_SARPRV+1,struct atmif_sioc)
 						/* get pool statistics */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 82e8aa5..208df7b 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -329,9 +329,11 @@
 #define AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT	0x00000001
 #define AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME	0x00000002
 #define AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH	0x00000004
+#define AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND	0x00000008
 #define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
 				  AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
-				  AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH)
+				  AUDIT_FEATURE_BITMAP_EXECUTABLE_PATH | \
+				  AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND)
 
 /* deprecated: AUDIT_VERSION_* */
 #define AUDIT_VERSION_LATEST 		AUDIT_FEATURE_BITMAP_ALL
diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h
index 0fbf6fd..734fe83 100644
--- a/include/uapi/linux/batman_adv.h
+++ b/include/uapi/linux/batman_adv.h
@@ -23,6 +23,42 @@
 #define BATADV_NL_MCAST_GROUP_TPMETER	"tpmeter"
 
 /**
+ * enum batadv_tt_client_flags - TT client specific flags
+ * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
+ * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
+ *  update telling its new real location has not been received/sent yet
+ * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
+ *  This information is used by the "AP Isolation" feature
+ * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
+ *  information is used by the Extended Isolation feature
+ * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
+ * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
+ *  not been announced yet
+ * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
+ *  in the table for one more originator interval for consistency purposes
+ * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
+ *  the network but no nnode has already announced it
+ *
+ * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
+ * Bits from 8 to 15 are called _local flags_ because they are used for local
+ * computations only.
+ *
+ * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
+ * the other nodes in the network. To achieve this goal these flags are included
+ * in the TT CRC computation.
+ */
+enum batadv_tt_client_flags {
+	BATADV_TT_CLIENT_DEL     = (1 << 0),
+	BATADV_TT_CLIENT_ROAM    = (1 << 1),
+	BATADV_TT_CLIENT_WIFI    = (1 << 4),
+	BATADV_TT_CLIENT_ISOLA	 = (1 << 5),
+	BATADV_TT_CLIENT_NOPURGE = (1 << 8),
+	BATADV_TT_CLIENT_NEW     = (1 << 9),
+	BATADV_TT_CLIENT_PENDING = (1 << 10),
+	BATADV_TT_CLIENT_TEMP	 = (1 << 11),
+};
+
+/**
  * enum batadv_nl_attrs - batman-adv netlink attributes
  *
  * @BATADV_ATTR_UNSPEC: unspecified attribute to catch errors
@@ -40,6 +76,26 @@
  * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run
  * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session
  * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment
+ * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active
+ * @BATADV_ATTR_TT_ADDRESS: Client MAC address
+ * @BATADV_ATTR_TT_TTVN: Translation table version
+ * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version
+ * @BATADV_ATTR_TT_CRC32: CRC32 over translation table
+ * @BATADV_ATTR_TT_VID: VLAN ID
+ * @BATADV_ATTR_TT_FLAGS: Translation table client flags
+ * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best
+ * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen
+ * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address
+ * @BATADV_ATTR_TQ: TQ to neighbour
+ * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour
+ * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth
+ * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth
+ * @BATADV_ATTR_ROUTER: Gateway router MAC address
+ * @BATADV_ATTR_BLA_OWN: Flag indicating own originator
+ * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address
+ * @BATADV_ATTR_BLA_VID: BLA VLAN ID
+ * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address
+ * @BATADV_ATTR_BLA_CRC: BLA CRC
  * @__BATADV_ATTR_AFTER_LAST: internal use
  * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available
  * @BATADV_ATTR_MAX: highest attribute number currently defined
@@ -60,6 +116,26 @@
 	BATADV_ATTR_TPMETER_BYTES,
 	BATADV_ATTR_TPMETER_COOKIE,
 	BATADV_ATTR_PAD,
+	BATADV_ATTR_ACTIVE,
+	BATADV_ATTR_TT_ADDRESS,
+	BATADV_ATTR_TT_TTVN,
+	BATADV_ATTR_TT_LAST_TTVN,
+	BATADV_ATTR_TT_CRC32,
+	BATADV_ATTR_TT_VID,
+	BATADV_ATTR_TT_FLAGS,
+	BATADV_ATTR_FLAG_BEST,
+	BATADV_ATTR_LAST_SEEN_MSECS,
+	BATADV_ATTR_NEIGH_ADDRESS,
+	BATADV_ATTR_TQ,
+	BATADV_ATTR_THROUGHPUT,
+	BATADV_ATTR_BANDWIDTH_UP,
+	BATADV_ATTR_BANDWIDTH_DOWN,
+	BATADV_ATTR_ROUTER,
+	BATADV_ATTR_BLA_OWN,
+	BATADV_ATTR_BLA_ADDRESS,
+	BATADV_ATTR_BLA_VID,
+	BATADV_ATTR_BLA_BACKBONE,
+	BATADV_ATTR_BLA_CRC,
 	/* add attributes above here, update the policy in netlink.c */
 	__BATADV_ATTR_AFTER_LAST,
 	NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST,
@@ -73,6 +149,15 @@
  * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device
  * @BATADV_CMD_TP_METER: Start a tp meter session
  * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session
+ * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms.
+ * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces
+ * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations
+ * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations
+ * @BATADV_CMD_GET_ORIGINATORS: Query list of originators
+ * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours
+ * @BATADV_CMD_GET_GATEWAYS: Query list of gateways
+ * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims
+ * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance backbones
  * @__BATADV_CMD_AFTER_LAST: internal use
  * @BATADV_CMD_MAX: highest used command number
  */
@@ -81,6 +166,15 @@
 	BATADV_CMD_GET_MESH_INFO,
 	BATADV_CMD_TP_METER,
 	BATADV_CMD_TP_METER_CANCEL,
+	BATADV_CMD_GET_ROUTING_ALGOS,
+	BATADV_CMD_GET_HARDIFS,
+	BATADV_CMD_GET_TRANSTABLE_LOCAL,
+	BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+	BATADV_CMD_GET_ORIGINATORS,
+	BATADV_CMD_GET_NEIGHBORS,
+	BATADV_CMD_GET_GATEWAYS,
+	BATADV_CMD_GET_BLA_CLAIM,
+	BATADV_CMD_GET_BLA_BACKBONE,
 	/* add new commands above here */
 	__BATADV_CMD_AFTER_LAST,
 	BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fe..f09c70b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -95,6 +95,7 @@
 	BPF_PROG_TYPE_SCHED_ACT,
 	BPF_PROG_TYPE_TRACEPOINT,
 	BPF_PROG_TYPE_XDP,
+	BPF_PROG_TYPE_PERF_EVENT,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
@@ -339,7 +340,7 @@
 	BPF_FUNC_skb_change_type,
 
 	/**
-	 * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+	 * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb
 	 * @skb: pointer to skb
 	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
 	 * @index: index of the cgroup in the bpf_map
@@ -348,7 +349,7 @@
 	 *   == 1 skb succeeded the cgroup2 descendant test
 	 *    < 0 error
 	 */
-	BPF_FUNC_skb_in_cgroup,
+	BPF_FUNC_skb_under_cgroup,
 
 	/**
 	 * bpf_get_hash_recalc(skb)
@@ -375,6 +376,56 @@
 	 */
 	BPF_FUNC_probe_write_user,
 
+	/**
+	 * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 current failed the cgroup2 descendant test
+	 *   == 1 current succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_current_task_under_cgroup,
+
+	/**
+	 * bpf_skb_change_tail(skb, len, flags)
+	 * The helper will resize the skb to the given new size,
+	 * to be used f.e. with control messages.
+	 * @skb: pointer to skb
+	 * @len: new skb length
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_tail,
+
+	/**
+	 * bpf_skb_pull_data(skb, len)
+	 * The helper will pull in non-linear data in case the
+	 * skb is non-linear and not all of len are part of the
+	 * linear section. Only needed for read/write with direct
+	 * packet access.
+	 * @skb: pointer to skb
+	 * @len: len to make read/writeable
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_pull_data,
+
+	/**
+	 * bpf_csum_update(skb, csum)
+	 * Adds csum into skb->csum in case of CHECKSUM_COMPLETE.
+	 * @skb: pointer to skb
+	 * @csum: csum to add
+	 * Return: csum on success or negative error
+	 */
+	BPF_FUNC_csum_update,
+
+	/**
+	 * bpf_set_hash_invalid(skb)
+	 * Invalidate current skb>hash.
+	 * @skb: pointer to skb
+	 */
+	BPF_FUNC_set_hash_invalid,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index 0000000..0674272
--- /dev/null
+++ b/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+	struct pt_regs regs;
+	__u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index b8f38e8..099a420 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1362,7 +1362,14 @@
 	ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT	= 37,
 	ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT	= 38,
 	ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT	= 39,
-	ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT         = 40,
+	ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT		= 40,
+	ETHTOOL_LINK_MODE_1000baseX_Full_BIT	= 41,
+	ETHTOOL_LINK_MODE_10000baseCR_Full_BIT	= 42,
+	ETHTOOL_LINK_MODE_10000baseSR_Full_BIT	= 43,
+	ETHTOOL_LINK_MODE_10000baseLR_Full_BIT	= 44,
+	ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT	= 45,
+	ETHTOOL_LINK_MODE_10000baseER_Full_BIT	= 46,
+
 
 	/* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit
 	 * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_*
@@ -1371,7 +1378,7 @@
 	 */
 
 	__ETHTOOL_LINK_MODE_LAST
-	  = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT,
+	  = ETHTOOL_LINK_MODE_10000baseER_Full_BIT,
 };
 
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name)	\
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index c186f64..ab92bca 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -140,7 +140,7 @@
 	__u64 tx_bytes;
 	__u64 tx_packets;
 	__u16 vid;
-	__u16 pad1;
+	__u16 flags;
 	__u32 pad2;
 };
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index a1b5202..b4fba66 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -318,6 +318,7 @@
 	IFLA_BRPORT_FLUSH,
 	IFLA_BRPORT_MULTICAST_ROUTER,
 	IFLA_BRPORT_PAD,
+	IFLA_BRPORT_MCAST_FLOOD,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -463,6 +464,7 @@
 enum ipvlan_mode {
 	IPVLAN_MODE_L2 = 0,
 	IPVLAN_MODE_L3,
+	IPVLAN_MODE_L3S,
 	IPVLAN_MODE_MAX
 };
 
@@ -617,7 +619,7 @@
 enum {
 	IFLA_VF_UNSPEC,
 	IFLA_VF_MAC,		/* Hardware queue specific attributes */
-	IFLA_VF_VLAN,
+	IFLA_VF_VLAN,		/* VLAN ID and QoS */
 	IFLA_VF_TX_RATE,	/* Max TX Bandwidth Allocation */
 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
@@ -629,6 +631,7 @@
 	IFLA_VF_TRUST,		/* Trust VF */
 	IFLA_VF_IB_NODE_GUID,	/* VF Infiniband node GUID */
 	IFLA_VF_IB_PORT_GUID,	/* VF Infiniband port GUID */
+	IFLA_VF_VLAN_LIST,	/* nested list of vlans, option for QinQ */
 	__IFLA_VF_MAX,
 };
 
@@ -645,6 +648,22 @@
 	__u32 qos;
 };
 
+enum {
+	IFLA_VF_VLAN_INFO_UNSPEC,
+	IFLA_VF_VLAN_INFO,	/* VLAN ID, QoS and VLAN protocol */
+	__IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+	__u32 vf;
+	__u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+	__u32 qos;
+	__be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
 struct ifla_vf_tx_rate {
 	__u32 vf;
 	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
@@ -825,6 +844,7 @@
 	IFLA_STATS_LINK_64,
 	IFLA_STATS_LINK_XSTATS,
 	IFLA_STATS_LINK_XSTATS_SLAVE,
+	IFLA_STATS_LINK_OFFLOAD_XSTATS,
 	__IFLA_STATS_MAX,
 };
 
@@ -844,6 +864,14 @@
 };
 #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
 
+/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
+enum {
+	IFLA_OFFLOAD_XSTATS_UNSPEC,
+	IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+	__IFLA_OFFLOAD_XSTATS_MAX
+};
+#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+
 /* XDP section */
 
 enum {
diff --git a/include/uapi/linux/if_pppol2tp.h b/include/uapi/linux/if_pppol2tp.h
index 163e8ad..4bd1f55 100644
--- a/include/uapi/linux/if_pppol2tp.h
+++ b/include/uapi/linux/if_pppol2tp.h
@@ -16,7 +16,8 @@
 #define _UAPI__LINUX_IF_PPPOL2TP_H
 
 #include <linux/types.h>
-
+#include <linux/in.h>
+#include <linux/in6.h>
 
 /* Structure used to connect() the socket to a particular tunnel UDP
  * socket over IPv4.
diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h
index e128769..d37bbb1 100644
--- a/include/uapi/linux/if_pppox.h
+++ b/include/uapi/linux/if_pppox.h
@@ -21,8 +21,11 @@
 #include <asm/byteorder.h>
 
 #include <linux/socket.h>
+#include <linux/if.h>
 #include <linux/if_ether.h>
 #include <linux/if_pppol2tp.h>
+#include <linux/in.h>
+#include <linux/in6.h>
 
 /* For user-space programs to pick up these definitions
  * which they wouldn't get otherwise without defining __KERNEL__
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 1046f55..92f3c86 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -2,6 +2,9 @@
 #define _UAPI_IF_TUNNEL_H_
 
 #include <linux/types.h>
+#include <linux/if.h>
+#include <linux/ip.h>
+#include <linux/in6.h>
 #include <asm/byteorder.h>
 
 
@@ -24,9 +27,23 @@
 #define GRE_SEQ		__cpu_to_be16(0x1000)
 #define GRE_STRICT	__cpu_to_be16(0x0800)
 #define GRE_REC		__cpu_to_be16(0x0700)
-#define GRE_FLAGS	__cpu_to_be16(0x00F8)
+#define GRE_ACK		__cpu_to_be16(0x0080)
+#define GRE_FLAGS	__cpu_to_be16(0x0078)
 #define GRE_VERSION	__cpu_to_be16(0x0007)
 
+#define GRE_IS_CSUM(f)		((f) & GRE_CSUM)
+#define GRE_IS_ROUTING(f)	((f) & GRE_ROUTING)
+#define GRE_IS_KEY(f)		((f) & GRE_KEY)
+#define GRE_IS_SEQ(f)		((f) & GRE_SEQ)
+#define GRE_IS_STRICT(f)	((f) & GRE_STRICT)
+#define GRE_IS_REC(f)		((f) & GRE_REC)
+#define GRE_IS_ACK(f)		((f) & GRE_ACK)
+
+#define GRE_VERSION_0		__cpu_to_be16(0x0000)
+#define GRE_VERSION_1		__cpu_to_be16(0x0001)
+#define GRE_PROTO_PPP		__cpu_to_be16(0x880b)
+#define GRE_PPTP_KEY_MASK	__cpu_to_be32(0xffff)
+
 struct ip_tunnel_parm {
 	char			name[IFNAMSIZ];
 	int			link;
@@ -57,6 +74,7 @@
 	IFLA_IPTUN_ENCAP_FLAGS,
 	IFLA_IPTUN_ENCAP_SPORT,
 	IFLA_IPTUN_ENCAP_DPORT,
+	IFLA_IPTUN_COLLECT_METADATA,
 	__IFLA_IPTUN_MAX,
 };
 #define IFLA_IPTUN_MAX	(__IFLA_IPTUN_MAX - 1)
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index abbd1dc..509cd96 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -73,6 +73,7 @@
 	INET_DIAG_BC_S_COND,
 	INET_DIAG_BC_D_COND,
 	INET_DIAG_BC_DEV_COND,   /* u32 ifindex */
+	INET_DIAG_BC_MARK_COND,
 };
 
 struct inet_diag_hostcond {
@@ -82,6 +83,11 @@
 	__be32	addr[0];
 };
 
+struct inet_diag_markcond {
+	__u32 mark;
+	__u32 mask;
+};
+
 /* Base info structure. It contains socket identity (addrs/ports/cookie)
  * and, alas, the information shown by netstat. */
 struct inet_diag_msg {
@@ -117,6 +123,8 @@
 	INET_DIAG_LOCALS,
 	INET_DIAG_PEERS,
 	INET_DIAG_PAD,
+	INET_DIAG_MARK,
+	INET_DIAG_BBRINFO,
 	__INET_DIAG_MAX,
 };
 
@@ -150,8 +158,20 @@
 	__u32	dctcp_ab_tot;
 };
 
+/* INET_DIAG_BBRINFO */
+
+struct tcp_bbr_info {
+	/* u64 bw: max-filtered BW (app throughput) estimate in Byte per sec: */
+	__u32	bbr_bw_lo;		/* lower 32 bits of bw */
+	__u32	bbr_bw_hi;		/* upper 32 bits of bw */
+	__u32	bbr_min_rtt;		/* min-filtered RTT in uSec */
+	__u32	bbr_pacing_gain;	/* pacing gain shifted left 8 bits */
+	__u32	bbr_cwnd_gain;		/* cwnd gain shifted left 8 bits */
+};
+
 union tcp_cc_info {
 	struct tcpvegas_info	vegas;
 	struct tcp_dctcp_info	dctcp;
+	struct tcp_bbr_info	bbr;
 };
 #endif /* _UAPI_INET_DIAG_H_ */
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 3958760..8c27723 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -177,6 +177,7 @@
 	DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	DEVCONF_DROP_UNSOLICITED_NA,
 	DEVCONF_KEEP_ADDR_ON_DOWN,
+	DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/ipx.h b/include/uapi/linux/ipx.h
index 3d48014..30f031d 100644
--- a/include/uapi/linux/ipx.h
+++ b/include/uapi/linux/ipx.h
@@ -1,11 +1,13 @@
 #ifndef _IPX_H_
 #define _IPX_H_
+#include <linux/libc-compat.h>	/* for compatibility with glibc netipx/ipx.h */
 #include <linux/types.h>
 #include <linux/sockios.h>
 #include <linux/socket.h>
 #define IPX_NODE_LEN	6
 #define IPX_MTU		576
 
+#if __UAPI_DEF_SOCKADDR_IPX
 struct sockaddr_ipx {
 	__kernel_sa_family_t sipx_family;
 	__be16		sipx_port;
@@ -14,6 +16,7 @@
 	__u8		sipx_type;
 	unsigned char	sipx_zero;	/* 16 byte fill */
 };
+#endif /* __UAPI_DEF_SOCKADDR_IPX */
 
 /*
  * So we can fit the extra info for SIOCSIFADDR into the address nicely
@@ -23,12 +26,15 @@
 #define IPX_DLTITF	0
 #define IPX_CRTITF	1
 
+#if __UAPI_DEF_IPX_ROUTE_DEFINITION
 struct ipx_route_definition {
 	__be32        ipx_network;
 	__be32        ipx_router_network;
 	unsigned char ipx_router_node[IPX_NODE_LEN];
 };
+#endif /* __UAPI_DEF_IPX_ROUTE_DEFINITION */
 
+#if __UAPI_DEF_IPX_INTERFACE_DEFINITION
 struct ipx_interface_definition {
 	__be32        ipx_network;
 	unsigned char ipx_device[16];
@@ -45,16 +51,20 @@
 #define IPX_INTERNAL		2
 	unsigned char ipx_node[IPX_NODE_LEN];
 };
-	
+#endif /* __UAPI_DEF_IPX_INTERFACE_DEFINITION */
+
+#if __UAPI_DEF_IPX_CONFIG_DATA
 struct ipx_config_data {
 	unsigned char	ipxcfg_auto_select_primary;
 	unsigned char	ipxcfg_auto_create_interfaces;
 };
+#endif /* __UAPI_DEF_IPX_CONFIG_DATA */
 
 /*
  * OLD Route Definition for backward compatibility.
  */
 
+#if __UAPI_DEF_IPX_ROUTE_DEF
 struct ipx_route_def {
 	__be32		ipx_network;
 	__be32		ipx_router_network;
@@ -67,6 +77,7 @@
 #define IPX_RT_BLUEBOOK		2
 #define IPX_RT_ROUTED		1
 };
+#endif /* __UAPI_DEF_IPX_ROUTE_DEF */
 
 #define SIOCAIPXITFCRT		(SIOCPROTOPRIVATE)
 #define SIOCAIPXPRISLT		(SIOCPROTOPRIVATE + 1)
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index e4f048e..44b8a6b 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -139,6 +139,25 @@
 
 #endif /* _NETINET_IN_H */
 
+/* Coordinate with glibc netipx/ipx.h header. */
+#if defined(__NETIPX_IPX_H)
+
+#define __UAPI_DEF_SOCKADDR_IPX			0
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION		0
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION	0
+#define __UAPI_DEF_IPX_CONFIG_DATA		0
+#define __UAPI_DEF_IPX_ROUTE_DEF		0
+
+#else /* defined(__NETIPX_IPX_H) */
+
+#define __UAPI_DEF_SOCKADDR_IPX			1
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION		1
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION	1
+#define __UAPI_DEF_IPX_CONFIG_DATA		1
+#define __UAPI_DEF_IPX_ROUTE_DEF		1
+
+#endif /* defined(__NETIPX_IPX_H) */
+
 /* Definitions for xattr.h */
 #if defined(_SYS_XATTR_H)
 #define __UAPI_DEF_XATTR		0
@@ -179,6 +198,13 @@
 #define __UAPI_DEF_IN6_PKTINFO		1
 #define __UAPI_DEF_IP6_MTUINFO		1
 
+/* Definitions for ipx.h */
+#define __UAPI_DEF_SOCKADDR_IPX			1
+#define __UAPI_DEF_IPX_ROUTE_DEFINITION		1
+#define __UAPI_DEF_IPX_INTERFACE_DEFINITION	1
+#define __UAPI_DEF_IPX_CONFIG_DATA		1
+#define __UAPI_DEF_IPX_ROUTE_DEF		1
+
 /* Definitions for xattr.h */
 #define __UAPI_DEF_XATTR		1
 
diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h
index 237fac4..15d8510 100644
--- a/include/uapi/linux/mii.h
+++ b/include/uapi/linux/mii.h
@@ -48,6 +48,7 @@
 #define BMCR_SPEED100		0x2000	/* Select 100Mbps              */
 #define BMCR_LOOPBACK		0x4000	/* TXD loopback bits           */
 #define BMCR_RESET		0x8000	/* Reset to default state      */
+#define BMCR_SPEED10		0x0000	/* Select 10Mbps               */
 
 /* Basic mode status register. */
 #define BMSR_ERCAP		0x0001	/* Ext-reg capability          */
diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h
new file mode 100644
index 0000000..8be21e0
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_log.h
@@ -0,0 +1,12 @@
+#ifndef _NETFILTER_NF_LOG_H
+#define _NETFILTER_NF_LOG_H
+
+#define NF_LOG_TCPSEQ		0x01	/* Log TCP sequence numbers */
+#define NF_LOG_TCPOPT		0x02	/* Log TCP options */
+#define NF_LOG_IPOPT		0x04	/* Log IP options */
+#define NF_LOG_UID		0x08	/* Log UID owning local socket */
+#define NF_LOG_NFLOG		0x10	/* Unsupported, don't reuse */
+#define NF_LOG_MACDECODE	0x20	/* Decode MAC header */
+#define NF_LOG_MASK		0x2f
+
+#endif /* _NETFILTER_NF_LOG_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 01751fa..c6c4477 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -24,7 +24,7 @@
 	__NFT_REG_MAX,
 
 	NFT_REG32_00	= 8,
-	MFT_REG32_01,
+	NFT_REG32_01,
 	NFT_REG32_02,
 	NFT_REG32_03,
 	NFT_REG32_04,
@@ -546,6 +546,35 @@
 };
 #define NFTA_CMP_MAX		(__NFTA_CMP_MAX - 1)
 
+/**
+ * enum nft_range_ops - nf_tables range operator
+ *
+ * @NFT_RANGE_EQ: equal
+ * @NFT_RANGE_NEQ: not equal
+ */
+enum nft_range_ops {
+	NFT_RANGE_EQ,
+	NFT_RANGE_NEQ,
+};
+
+/**
+ * enum nft_range_attributes - nf_tables range expression netlink attributes
+ *
+ * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers)
+ * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops)
+ * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes)
+ * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_range_attributes {
+	NFTA_RANGE_UNSPEC,
+	NFTA_RANGE_SREG,
+	NFTA_RANGE_OP,
+	NFTA_RANGE_FROM_DATA,
+	NFTA_RANGE_TO_DATA,
+	__NFTA_RANGE_MAX
+};
+#define NFTA_RANGE_MAX		(__NFTA_RANGE_MAX - 1)
+
 enum nft_lookup_flags {
 	NFT_LOOKUP_F_INV = (1 << 0),
 };
@@ -575,6 +604,10 @@
 	NFT_DYNSET_OP_UPDATE,
 };
 
+enum nft_dynset_flags {
+	NFT_DYNSET_F_INV	= (1 << 0),
+};
+
 /**
  * enum nft_dynset_attributes - dynset expression attributes
  *
@@ -585,6 +618,7 @@
  * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
  * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
  * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes)
+ * @NFTA_DYNSET_FLAGS: flags (NLA_U32)
  */
 enum nft_dynset_attributes {
 	NFTA_DYNSET_UNSPEC,
@@ -596,6 +630,7 @@
 	NFTA_DYNSET_TIMEOUT,
 	NFTA_DYNSET_EXPR,
 	NFTA_DYNSET_PAD,
+	NFTA_DYNSET_FLAGS,
 	__NFTA_DYNSET_MAX,
 };
 #define NFTA_DYNSET_MAX		(__NFTA_DYNSET_MAX - 1)
@@ -724,6 +759,28 @@
 };
 
 /**
+ * enum nft_hash_attributes - nf_tables hash expression netlink attributes
+ *
+ * @NFTA_HASH_SREG: source register (NLA_U32)
+ * @NFTA_HASH_DREG: destination register (NLA_U32)
+ * @NFTA_HASH_LEN: source data length (NLA_U32)
+ * @NFTA_HASH_MODULUS: modulus value (NLA_U32)
+ * @NFTA_HASH_SEED: seed value (NLA_U32)
+ * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32)
+ */
+enum nft_hash_attributes {
+	NFTA_HASH_UNSPEC,
+	NFTA_HASH_SREG,
+	NFTA_HASH_DREG,
+	NFTA_HASH_LEN,
+	NFTA_HASH_MODULUS,
+	NFTA_HASH_SEED,
+	NFTA_HASH_OFFSET,
+	__NFTA_HASH_MAX,
+};
+#define NFTA_HASH_MAX	(__NFTA_HASH_MAX - 1)
+
+/**
  * enum nft_meta_attributes - nf_tables meta expression netlink attributes
  *
  * @NFTA_META_DREG: destination register (NLA_U32)
@@ -866,12 +923,14 @@
  * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
  * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
  * @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers)
  */
 enum nft_queue_attributes {
 	NFTA_QUEUE_UNSPEC,
 	NFTA_QUEUE_NUM,
 	NFTA_QUEUE_TOTAL,
 	NFTA_QUEUE_FLAGS,
+	NFTA_QUEUE_SREG_QNUM,
 	__NFTA_QUEUE_MAX
 };
 #define NFTA_QUEUE_MAX		(__NFTA_QUEUE_MAX - 1)
@@ -880,6 +939,25 @@
 #define NFT_QUEUE_FLAG_CPU_FANOUT	0x02 /* use current CPU (no hashing) */
 #define NFT_QUEUE_FLAG_MASK		0x03
 
+enum nft_quota_flags {
+	NFT_QUOTA_F_INV		= (1 << 0),
+};
+
+/**
+ * enum nft_quota_attributes - nf_tables quota expression netlink attributes
+ *
+ * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16)
+ * @NFTA_QUOTA_FLAGS: flags (NLA_U32)
+ */
+enum nft_quota_attributes {
+	NFTA_QUOTA_UNSPEC,
+	NFTA_QUOTA_BYTES,
+	NFTA_QUOTA_FLAGS,
+	NFTA_QUOTA_PAD,
+	__NFTA_QUOTA_MAX
+};
+#define NFTA_QUOTA_MAX		(__NFTA_QUOTA_MAX - 1)
+
 /**
  * enum nft_reject_types - nf_tables reject expression reject types
  *
@@ -1051,7 +1129,7 @@
  * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32)
  * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32)
  */
-enum nft_trace_attibutes {
+enum nft_trace_attributes {
 	NFTA_TRACE_UNSPEC,
 	NFTA_TRACE_TABLE,
 	NFTA_TRACE_CHAIN,
@@ -1082,4 +1160,30 @@
 	__NFT_TRACETYPE_MAX
 };
 #define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1)
+
+/**
+ * enum nft_ng_attributes - nf_tables number generator expression netlink attributes
+ *
+ * @NFTA_NG_DREG: destination register (NLA_U32)
+ * @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
+ * @NFTA_NG_TYPE: operation type (NLA_U32)
+ * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32)
+ */
+enum nft_ng_attributes {
+	NFTA_NG_UNSPEC,
+	NFTA_NG_DREG,
+	NFTA_NG_MODULUS,
+	NFTA_NG_TYPE,
+	NFTA_NG_OFFSET,
+	__NFTA_NG_MAX
+};
+#define NFTA_NG_MAX	(__NFTA_NG_MAX - 1)
+
+enum nft_ng_types {
+	NFT_NG_INCREMENTAL,
+	NFT_NG_RANDOM,
+	__NFT_NG_MAX
+};
+#define NFT_NG_MAX	(__NFT_NG_MAX - 1)
+
 #endif /* _LINUX_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 9df78970..6deb886 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -231,13 +231,13 @@
 
 enum ctattr_stats_cpu {
 	CTA_STATS_UNSPEC,
-	CTA_STATS_SEARCHED,
+	CTA_STATS_SEARCHED,	/* no longer used */
 	CTA_STATS_FOUND,
-	CTA_STATS_NEW,
+	CTA_STATS_NEW,		/* no longer used */
 	CTA_STATS_INVALID,
 	CTA_STATS_IGNORE,
-	CTA_STATS_DELETE,
-	CTA_STATS_DELETE_LIST,
+	CTA_STATS_DELETE,	/* no longer used */
+	CTA_STATS_DELETE_LIST,	/* no longer used */
 	CTA_STATS_INSERT,
 	CTA_STATS_INSERT_FAILED,
 	CTA_STATS_DROP,
diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h
index 6db9037..3efc0ca 100644
--- a/include/uapi/linux/netfilter/xt_hashlimit.h
+++ b/include/uapi/linux/netfilter/xt_hashlimit.h
@@ -6,6 +6,7 @@
 
 /* timings are in milliseconds. */
 #define XT_HASHLIMIT_SCALE 10000
+#define XT_HASHLIMIT_SCALE_v2 1000000llu
 /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
  * seconds, or one packet every 59 hours.
  */
@@ -63,6 +64,20 @@
 	__u8 srcmask, dstmask;
 };
 
+struct hashlimit_cfg2 {
+	__u64 avg;		/* Average secs between packets * scale */
+	__u64 burst;		/* Period multiplier for upper limit. */
+	__u32 mode;		/* bitmask of XT_HASHLIMIT_HASH_* */
+
+	/* user specified */
+	__u32 size;		/* how many buckets */
+	__u32 max;		/* max number of entries */
+	__u32 gc_interval;	/* gc interval */
+	__u32 expire;		/* when do entries expire? */
+
+	__u8 srcmask, dstmask;
+};
+
 struct xt_hashlimit_mtinfo1 {
 	char name[IFNAMSIZ];
 	struct hashlimit_cfg1 cfg;
@@ -71,4 +86,12 @@
 	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
 };
 
+struct xt_hashlimit_mtinfo2 {
+	char name[NAME_MAX];
+	struct hashlimit_cfg2 cfg;
+
+	/* Used internally by the kernel */
+	struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
+};
+
 #endif /* _UAPI_XT_HASHLIMIT_H */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2206941..56368e9 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -48,6 +48,7 @@
 #define NL80211_MULTICAST_GROUP_REG		"regulatory"
 #define NL80211_MULTICAST_GROUP_MLME		"mlme"
 #define NL80211_MULTICAST_GROUP_VENDOR		"vendor"
+#define NL80211_MULTICAST_GROUP_NAN		"nan"
 #define NL80211_MULTICAST_GROUP_TESTMODE	"testmode"
 
 /**
@@ -838,6 +839,41 @@
  *	not running. The driver indicates the status of the scan through
  *	cfg80211_scan_done().
  *
+ * @NL80211_CMD_START_NAN: Start NAN operation, identified by its
+ *	%NL80211_ATTR_WDEV interface. This interface must have been previously
+ *	created with %NL80211_CMD_NEW_INTERFACE. After it has been started, the
+ *	NAN interface will create or join a cluster. This command must have a
+ *	valid %NL80211_ATTR_NAN_MASTER_PREF attribute and optional
+ *	%NL80211_ATTR_NAN_DUAL attributes.
+ *	After this command NAN functions can be added.
+ * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by
+ *	its %NL80211_ATTR_WDEV interface.
+ * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined
+ *	with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this
+ *	operation returns the strictly positive and unique instance id
+ *	(%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE)
+ *	of the function upon success.
+ *	Since instance ID's can be re-used, this cookie is the right
+ *	way to identify the function. This will avoid races when a termination
+ *	event is handled by the user space after it has already added a new
+ *	function that got the same instance id from the kernel as the one
+ *	which just terminated.
+ *	This cookie may be used in NAN events even before the command
+ *	returns, so userspace shouldn't process NAN events until it processes
+ *	the response to this command.
+ *	Look at %NL80211_ATTR_SOCKET_OWNER as well.
+ * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie.
+ *	This command is also used as a notification sent when a NAN function is
+ *	terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID
+ *	and %NL80211_ATTR_COOKIE attributes.
+ * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN
+ *	must be operational (%NL80211_CMD_START_NAN was executed).
+ *	It must contain at least one of the following attributes:
+ *	%NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL.
+ * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported.
+ *	This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and
+ *	%NL80211_ATTR_COOKIE.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1026,6 +1062,13 @@
 
 	NL80211_CMD_ABORT_SCAN,
 
+	NL80211_CMD_START_NAN,
+	NL80211_CMD_STOP_NAN,
+	NL80211_CMD_ADD_NAN_FUNCTION,
+	NL80211_CMD_DEL_NAN_FUNCTION,
+	NL80211_CMD_CHANGE_NAN_CONFIG,
+	NL80211_CMD_NAN_MATCH,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1343,7 +1386,13 @@
  *	enum nl80211_band value is used as the index (nla_type() of the nested
  *	data. If a band is not included, it will be configured to allow all
  *	rates based on negotiated supported rates information. This attribute
- *	is used with %NL80211_CMD_SET_TX_BITRATE_MASK.
+ *	is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP,
+ *	and joining mesh networks (not IBSS yet). In the later case, it must
+ *	specify just a single bitrate, which is to be used for the beacon.
+ *	The driver must also specify support for this with the extended
+ *	features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
+ *	NL80211_EXT_FEATURE_BEACON_RATE_HT and
+ *	NL80211_EXT_FEATURE_BEACON_RATE_VHT.
  *
  * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain
  *	at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME.
@@ -1733,6 +1782,12 @@
  *	regulatory indoor configuration would be owned by the netlink socket
  *	that configured the indoor setting, and the indoor operation would be
  *	cleared when the socket is closed.
+ *	If set during NAN interface creation, the interface will be destroyed
+ *	if the socket is closed just like any other interface. Moreover, only
+ *	the netlink socket that created the interface will be allowed to add
+ *	and remove functions. NAN notifications will be sent in unicast to that
+ *	socket. Without this attribute, any socket can add functions and the
+ *	notifications will be sent to the %NL80211_MCGRP_NAN multicast group.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
@@ -1867,6 +1922,21 @@
  * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is
  *	used to pull the stored data for mesh peer in power save state.
  *
+ * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by
+ *	%NL80211_CMD_START_NAN and optionally with
+ *	%NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0.
+ *	Also, values 1 and 255 are reserved for certification purposes and
+ *	should not be used during a normal device operation.
+ * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see
+ *	&enum nl80211_nan_dual_band_conf). This attribute is used with
+ *	%NL80211_CMD_START_NAN and optionally with
+ *	%NL80211_CMD_CHANGE_NAN_CONFIG.
+ * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See
+ *	&enum nl80211_nan_func_attributes for description of this nested
+ *	attribute.
+ * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute.
+ *	See &enum nl80211_nan_match_attributes.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2261,6 +2331,11 @@
 
 	NL80211_ATTR_MESH_PEER_AID,
 
+	NL80211_ATTR_NAN_MASTER_PREF,
+	NL80211_ATTR_NAN_DUAL,
+	NL80211_ATTR_NAN_FUNC,
+	NL80211_ATTR_NAN_MATCH,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2339,6 +2414,7 @@
  *	commands to create and destroy one
  * @NL80211_IF_TYPE_OCB: Outside Context of a BSS
  *	This mode corresponds to the MIB variable dot11OCBActivated=true
+ * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev)
  * @NL80211_IFTYPE_MAX: highest interface type number currently defined
  * @NUM_NL80211_IFTYPES: number of defined interface types
  *
@@ -2359,6 +2435,7 @@
 	NL80211_IFTYPE_P2P_GO,
 	NL80211_IFTYPE_P2P_DEVICE,
 	NL80211_IFTYPE_OCB,
+	NL80211_IFTYPE_NAN,
 
 	/* keep last */
 	NUM_NL80211_IFTYPES,
@@ -4551,6 +4628,12 @@
  *	(if available).
  * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of
  *	channel dwell time.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate
+ *	configuration (AP/mesh), supporting a legacy (non HT/VHT) rate.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate
+ *	configuration (AP/mesh) with HT rates.
+ * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate
+ *	configuration (AP/mesh) with VHT rates.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4562,6 +4645,9 @@
 	NL80211_EXT_FEATURE_SCAN_START_TIME,
 	NL80211_EXT_FEATURE_BSS_PARENT_TSF,
 	NL80211_EXT_FEATURE_SET_SCAN_DWELL,
+	NL80211_EXT_FEATURE_BEACON_RATE_LEGACY,
+	NL80211_EXT_FEATURE_BEACON_RATE_HT,
+	NL80211_EXT_FEATURE_BEACON_RATE_VHT,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -4855,4 +4941,186 @@
 	NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_nan_dual_band_conf - NAN dual band configuration
+ *
+ * Defines the NAN dual band mode of operation
+ *
+ * @NL80211_NAN_BAND_DEFAULT: device default mode
+ * @NL80211_NAN_BAND_2GHZ: 2.4GHz mode
+ * @NL80211_NAN_BAND_5GHZ: 5GHz mode
+  */
+enum nl80211_nan_dual_band_conf {
+	NL80211_NAN_BAND_DEFAULT	= 1 << 0,
+	NL80211_NAN_BAND_2GHZ		= 1 << 1,
+	NL80211_NAN_BAND_5GHZ		= 1 << 2,
+};
+
+/**
+ * enum nl80211_nan_function_type - NAN function type
+ *
+ * Defines the function type of a NAN function
+ *
+ * @NL80211_NAN_FUNC_PUBLISH: function is publish
+ * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe
+ * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up
+ */
+enum nl80211_nan_function_type {
+	NL80211_NAN_FUNC_PUBLISH,
+	NL80211_NAN_FUNC_SUBSCRIBE,
+	NL80211_NAN_FUNC_FOLLOW_UP,
+
+	/* keep last */
+	__NL80211_NAN_FUNC_TYPE_AFTER_LAST,
+	NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1,
+};
+
+/**
+ * enum nl80211_nan_publish_type - NAN publish tx type
+ *
+ * Defines how to send publish Service Discovery Frames
+ *
+ * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited
+ * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited
+ */
+enum nl80211_nan_publish_type {
+	NL80211_NAN_SOLICITED_PUBLISH = 1 << 0,
+	NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1,
+};
+
+/**
+ * enum nl80211_nan_func_term_reason - NAN functions termination reason
+ *
+ * Defines termination reasons of a NAN function
+ *
+ * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user
+ * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout
+ * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored
+ */
+enum nl80211_nan_func_term_reason {
+	NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST,
+	NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED,
+	NL80211_NAN_FUNC_TERM_REASON_ERROR,
+};
+
+#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6
+#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff
+#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff
+
+/**
+ * enum nl80211_nan_func_attributes - NAN function attributes
+ * @__NL80211_NAN_FUNC_INVALID: invalid
+ * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8).
+ * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as
+ *	specified in NAN spec. This is a binary attribute.
+ * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is
+ *	publish. Defines the transmission type for the publish Service Discovery
+ *	Frame, see &enum nl80211_nan_publish_type. Its type is u8.
+ * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited
+ *	publish. Should the solicited publish Service Discovery Frame be sent to
+ *	the NAN Broadcast address. This is a flag.
+ * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is
+ *	subscribe. Is the subscribe active. This is a flag.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up.
+ *	The instance ID for the follow up Service Discovery Frame. This is u8.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type
+ *	is follow up. This is a u8.
+ *	The requestor instance ID for the follow up Service Discovery Frame.
+ * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the
+ *	follow up Service Discovery Frame. This is a binary attribute.
+ * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a
+ *	close range. The range itself (RSSI) is defined by the device.
+ *	This is a flag.
+ * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should
+ *	stay active. If not present infinite TTL is assumed. This is a u32.
+ * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service
+ *	specific info. This is a binary attribute.
+ * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute.
+ *	See &enum nl80211_nan_srf_attributes.
+ * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested
+ *	attribute. It is a list of binary values.
+ * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a
+ *	nested attribute. It is a list of binary values.
+ * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function.
+ *	Its type is u8 and it cannot be 0.
+ * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason.
+ *	See &enum nl80211_nan_func_term_reason.
+ *
+ * @NUM_NL80211_NAN_FUNC_ATTR: internal
+ * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute
+ */
+enum nl80211_nan_func_attributes {
+	__NL80211_NAN_FUNC_INVALID,
+	NL80211_NAN_FUNC_TYPE,
+	NL80211_NAN_FUNC_SERVICE_ID,
+	NL80211_NAN_FUNC_PUBLISH_TYPE,
+	NL80211_NAN_FUNC_PUBLISH_BCAST,
+	NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE,
+	NL80211_NAN_FUNC_FOLLOW_UP_ID,
+	NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID,
+	NL80211_NAN_FUNC_FOLLOW_UP_DEST,
+	NL80211_NAN_FUNC_CLOSE_RANGE,
+	NL80211_NAN_FUNC_TTL,
+	NL80211_NAN_FUNC_SERVICE_INFO,
+	NL80211_NAN_FUNC_SRF,
+	NL80211_NAN_FUNC_RX_MATCH_FILTER,
+	NL80211_NAN_FUNC_TX_MATCH_FILTER,
+	NL80211_NAN_FUNC_INSTANCE_ID,
+	NL80211_NAN_FUNC_TERM_REASON,
+
+	/* keep last */
+	NUM_NL80211_NAN_FUNC_ATTR,
+	NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1
+};
+
+/**
+ * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes
+ * @__NL80211_NAN_SRF_INVALID: invalid
+ * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set.
+ *	This is a flag.
+ * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if
+ *	&NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary.
+ * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if
+ *	&NL80211_NAN_SRF_BF is present. This is a u8.
+ * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if
+ *	and only if &NL80211_NAN_SRF_BF isn't present. This is a nested
+ *	attribute. Each nested attribute is a MAC address.
+ * @NUM_NL80211_NAN_SRF_ATTR: internal
+ * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute
+ */
+enum nl80211_nan_srf_attributes {
+	__NL80211_NAN_SRF_INVALID,
+	NL80211_NAN_SRF_INCLUDE,
+	NL80211_NAN_SRF_BF,
+	NL80211_NAN_SRF_BF_IDX,
+	NL80211_NAN_SRF_MAC_ADDRS,
+
+	/* keep last */
+	NUM_NL80211_NAN_SRF_ATTR,
+	NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1,
+};
+
+/**
+ * enum nl80211_nan_match_attributes - NAN match attributes
+ * @__NL80211_NAN_MATCH_INVALID: invalid
+ * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the
+ *	match. This is a nested attribute.
+ *	See &enum nl80211_nan_func_attributes.
+ * @NL80211_NAN_MATCH_FUNC_PEER: the peer function
+ *	that caused the match. This is a nested attribute.
+ *	See &enum nl80211_nan_func_attributes.
+ *
+ * @NUM_NL80211_NAN_MATCH_ATTR: internal
+ * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute
+ */
+enum nl80211_nan_match_attributes {
+	__NL80211_NAN_MATCH_INVALID,
+	NL80211_NAN_MATCH_FUNC_LOCAL,
+	NL80211_NAN_MATCH_FUNC_PEER,
+
+	/* keep last */
+	NUM_NL80211_NAN_MATCH_ATTR,
+	NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index d95a301..59ed399 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -583,7 +583,7 @@
 #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
 
 struct ovs_action_trunc {
-	uint32_t max_len; /* Max packet size in bytes. */
+	__u32 max_len; /* Max packet size in bytes. */
 };
 
 /**
@@ -605,13 +605,13 @@
  * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
  * (but it will not be set in the 802.1Q header that is pushed).
  *
- * The @vlan_tpid value is typically %ETH_P_8021Q.  The only acceptable TPID
- * values are those that the kernel module also parses as 802.1Q headers, to
- * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
- * from having surprising results.
+ * The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD.
+ * The only acceptable TPID values are those that the kernel module also parses
+ * as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed
+ * by %OVS_ACTION_ATTR_POP_VLAN from having surprising results.
  */
 struct ovs_action_push_vlan {
-	__be16 vlan_tpid;	/* 802.1Q TPID. */
+	__be16 vlan_tpid;	/* 802.1Q or 802.1ad TPID. */
 	__be16 vlan_tci;	/* 802.1Q TCI (VLAN ID and priority). */
 };
 
@@ -632,8 +632,8 @@
  * @hash_basis: basis used for computing hash.
  */
 struct ovs_action_hash {
-	uint32_t  hash_alg;     /* One of ovs_hash_alg. */
-	uint32_t  hash_basis;
+	__u32  hash_alg;     /* One of ovs_hash_alg. */
+	__u32  hash_basis;
 };
 
 /**
@@ -721,9 +721,10 @@
  * is copied from the value to the packet header field, rest of the bits are
  * left unchanged.  The non-masked value bits must be passed in as zeroes.
  * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
- * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
- * packet.
- * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
+ * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header
+ * onto the packet.
+ * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header
+ * from the packet.
  * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
  * the nested %OVS_SAMPLE_ATTR_* attributes.
  * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index d1c1cca..8fd715f 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -396,6 +396,7 @@
 	TCA_BPF_FD,
 	TCA_BPF_NAME,
 	TCA_BPF_FLAGS,
+	TCA_BPF_FLAGS_GEN,
 	__TCA_BPF_MAX,
 };
 
@@ -428,6 +429,24 @@
 	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
 
 	TCA_FLOWER_FLAGS,
+	TCA_FLOWER_KEY_VLAN_ID,		/* be16 */
+	TCA_FLOWER_KEY_VLAN_PRIO,	/* u8   */
+	TCA_FLOWER_KEY_VLAN_ETH_TYPE,	/* be16 */
+
+	TCA_FLOWER_KEY_ENC_KEY_ID,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */
+
+	TCA_FLOWER_KEY_TCP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_TCP_DST_MASK,	/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC_MASK,	/* be16 */
+	TCA_FLOWER_KEY_UDP_DST_MASK,	/* be16 */
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 2382eed..df7451d 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -792,6 +792,8 @@
 
 	TCA_FQ_ORPHAN_MASK,	/* mask applied to orphaned skb hashes */
 
+	TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */
+
 	__TCA_FQ_MAX
 };
 
@@ -809,7 +811,7 @@
 	__u32	flows;
 	__u32	inactive_flows;
 	__u32	throttled_flows;
-	__u32	pad;
+	__u32	unthrottle_latency_ns;
 };
 
 /* Heavy-Hitter Filter */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index d304f4c..a406adc 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -944,4 +944,68 @@
 	__u16 pr_policy;
 };
 
+struct sctp_info {
+	__u32	sctpi_tag;
+	__u32	sctpi_state;
+	__u32	sctpi_rwnd;
+	__u16	sctpi_unackdata;
+	__u16	sctpi_penddata;
+	__u16	sctpi_instrms;
+	__u16	sctpi_outstrms;
+	__u32	sctpi_fragmentation_point;
+	__u32	sctpi_inqueue;
+	__u32	sctpi_outqueue;
+	__u32	sctpi_overall_error;
+	__u32	sctpi_max_burst;
+	__u32	sctpi_maxseg;
+	__u32	sctpi_peer_rwnd;
+	__u32	sctpi_peer_tag;
+	__u8	sctpi_peer_capable;
+	__u8	sctpi_peer_sack;
+	__u16	__reserved1;
+
+	/* assoc status info */
+	__u64	sctpi_isacks;
+	__u64	sctpi_osacks;
+	__u64	sctpi_opackets;
+	__u64	sctpi_ipackets;
+	__u64	sctpi_rtxchunks;
+	__u64	sctpi_outofseqtsns;
+	__u64	sctpi_idupchunks;
+	__u64	sctpi_gapcnt;
+	__u64	sctpi_ouodchunks;
+	__u64	sctpi_iuodchunks;
+	__u64	sctpi_oodchunks;
+	__u64	sctpi_iodchunks;
+	__u64	sctpi_octrlchunks;
+	__u64	sctpi_ictrlchunks;
+
+	/* primary transport info */
+	struct sockaddr_storage	sctpi_p_address;
+	__s32	sctpi_p_state;
+	__u32	sctpi_p_cwnd;
+	__u32	sctpi_p_srtt;
+	__u32	sctpi_p_rto;
+	__u32	sctpi_p_hbinterval;
+	__u32	sctpi_p_pathmaxrxt;
+	__u32	sctpi_p_sackdelay;
+	__u32	sctpi_p_sackfreq;
+	__u32	sctpi_p_ssthresh;
+	__u32	sctpi_p_partial_bytes_acked;
+	__u32	sctpi_p_flight_size;
+	__u16	sctpi_p_error;
+	__u16	__reserved2;
+
+	/* sctp sock info */
+	__u32	sctpi_s_autoclose;
+	__u32	sctpi_s_adaptation_ind;
+	__u32	sctpi_s_pd_point;
+	__u8	sctpi_s_nodelay;
+	__u8	sctpi_s_disable_fragments;
+	__u8	sctpi_s_v4mapped;
+	__u8	sctpi_s_frag_interleave;
+	__u32	sctpi_s_type;
+	__u32	__reserved3;
+};
+
 #endif /* _UAPI_SCTP_H */
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 1e5ac4e7..b4c0484 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -376,5 +376,13 @@
 #define UART_EXAR_TXTRG		0x0a	/* Tx FIFO trigger level write-only */
 #define UART_EXAR_RXTRG		0x0b	/* Rx FIFO trigger level write-only */
 
+/*
+ * These are definitions for the Altera ALTR_16550_F32/F64/F128
+ * Normalized from 0x100 to 0x40 because of shift by 2 (32 bit regs).
+ */
+#define UART_ALTR_AFR		0x40	/* Additional Features Register */
+#define UART_ALTR_EN_TXFIFO_LW	0x01	/* Enable the TX FIFO Low Watermark */
+#define UART_ALTR_TX_LOW	0x41	/* Tx FIFO Low Watermark */
+
 #endif /* _LINUX_SERIAL_REG_H */
 
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 25a9ad8..e7a31f8 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -235,6 +235,7 @@
 	LINUX_MIB_TCPSPURIOUSRTOS,		/* TCPSpuriousRTOs */
 	LINUX_MIB_TCPMD5NOTFOUND,		/* TCPMD5NotFound */
 	LINUX_MIB_TCPMD5UNEXPECTED,		/* TCPMD5Unexpected */
+	LINUX_MIB_TCPMD5FAILURE,		/* TCPMD5Failure */
 	LINUX_MIB_SACKSHIFTED,
 	LINUX_MIB_SACKMERGED,
 	LINUX_MIB_SACKSHIFTFALLBACK,
diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h
index 4ece02a..cd18360 100644
--- a/include/uapi/linux/tc_act/tc_ife.h
+++ b/include/uapi/linux/tc_act/tc_ife.h
@@ -32,8 +32,9 @@
 #define IFE_META_HASHID 2
 #define	IFE_META_PRIO 3
 #define	IFE_META_QMAP 4
+#define	IFE_META_TCINDEX 5
 /*Can be overridden at runtime by module option*/
-#define	__IFE_META_MAX 5
+#define	__IFE_META_MAX 6
 #define IFE_META_MAX (__IFE_META_MAX - 1)
 
 #endif
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
new file mode 100644
index 0000000..10fc07d
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Jamal Hadi Salim
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#ifndef __LINUX_TC_SKBMOD_H
+#define __LINUX_TC_SKBMOD_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_SKBMOD 15
+
+#define SKBMOD_F_DMAC	0x1
+#define SKBMOD_F_SMAC	0x2
+#define SKBMOD_F_ETYPE	0x4
+#define SKBMOD_F_SWAPMAC 0x8
+
+struct tc_skbmod {
+	tc_gen;
+	__u64 flags;
+};
+
+enum {
+	TCA_SKBMOD_UNSPEC,
+	TCA_SKBMOD_TM,
+	TCA_SKBMOD_PARMS,
+	TCA_SKBMOD_DMAC,
+	TCA_SKBMOD_SMAC,
+	TCA_SKBMOD_ETYPE,
+	TCA_SKBMOD_PAD,
+	__TCA_SKBMOD_MAX
+};
+#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1)
+
+#endif
diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
new file mode 100644
index 0000000..890106f
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_tunnel_key.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_TUNNEL_KEY_H
+#define __LINUX_TC_TUNNEL_KEY_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_TUNNEL_KEY 17
+
+#define TCA_TUNNEL_KEY_ACT_SET	    1
+#define TCA_TUNNEL_KEY_ACT_RELEASE  2
+
+struct tc_tunnel_key {
+	tc_gen;
+	int t_action;
+};
+
+enum {
+	TCA_TUNNEL_KEY_UNSPEC,
+	TCA_TUNNEL_KEY_TM,
+	TCA_TUNNEL_KEY_PARMS,
+	TCA_TUNNEL_KEY_ENC_IPV4_SRC,	/* be32 */
+	TCA_TUNNEL_KEY_ENC_IPV4_DST,	/* be32 */
+	TCA_TUNNEL_KEY_ENC_IPV6_SRC,	/* struct in6_addr */
+	TCA_TUNNEL_KEY_ENC_IPV6_DST,	/* struct in6_addr */
+	TCA_TUNNEL_KEY_ENC_KEY_ID,	/* be64 */
+	TCA_TUNNEL_KEY_PAD,
+	__TCA_TUNNEL_KEY_MAX,
+};
+
+#define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1)
+
+#endif
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index 31151ff..bddb272 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -16,6 +16,7 @@
 
 #define TCA_VLAN_ACT_POP	1
 #define TCA_VLAN_ACT_PUSH	2
+#define TCA_VLAN_ACT_MODIFY	3
 
 struct tc_vlan {
 	tc_gen;
@@ -29,6 +30,7 @@
 	TCA_VLAN_PUSH_VLAN_ID,
 	TCA_VLAN_PUSH_VLAN_PROTOCOL,
 	TCA_VLAN_PAD,
+	TCA_VLAN_PUSH_VLAN_PRIORITY,
 	__TCA_VLAN_MAX,
 };
 #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 482898f..73ac0db 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -167,6 +167,7 @@
 	__u8	tcpi_backoff;
 	__u8	tcpi_options;
 	__u8	tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
+	__u8	tcpi_delivery_rate_app_limited:1;
 
 	__u32	tcpi_rto;
 	__u32	tcpi_ato;
@@ -211,6 +212,8 @@
 	__u32	tcpi_min_rtt;
 	__u32	tcpi_data_segs_in;	/* RFC4898 tcpEStatsDataSegsIn */
 	__u32	tcpi_data_segs_out;	/* RFC4898 tcpEStatsDataSegsOut */
+
+	__u64   tcpi_delivery_rate;
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 5f3f6d0..f9edd20 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -59,6 +59,9 @@
 	TIPC_NL_MON_SET,
 	TIPC_NL_MON_GET,
 	TIPC_NL_MON_PEER_GET,
+	TIPC_NL_PEER_REMOVE,
+	TIPC_NL_BEARER_ADD,
+	TIPC_NL_UDP_GET_REMOTEIP,
 
 	__TIPC_NL_CMD_MAX,
 	TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
@@ -98,6 +101,7 @@
 	TIPC_NLA_UDP_UNSPEC,
 	TIPC_NLA_UDP_LOCAL,		/* sockaddr_storage */
 	TIPC_NLA_UDP_REMOTE,		/* sockaddr_storage */
+	TIPC_NLA_UDP_MULTI_REMOTEIP,	/* flag */
 
 	__TIPC_NLA_UDP_MAX,
 	TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1
diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h
index 108dd79..acc6369 100644
--- a/include/uapi/linux/usb/functionfs.h
+++ b/include/uapi/linux/usb/functionfs.h
@@ -21,6 +21,8 @@
 	FUNCTIONFS_HAS_MS_OS_DESC = 8,
 	FUNCTIONFS_VIRTUAL_ADDR = 16,
 	FUNCTIONFS_EVENTFD = 32,
+	FUNCTIONFS_ALL_CTRL_RECIP = 64,
+	FUNCTIONFS_CONFIG0_SETUP = 128,
 };
 
 /* Descriptor of an non-audio endpoint */
diff --git a/include/uapi/linux/virtio_vsock.h b/include/uapi/linux/virtio_vsock.h
index 6b011c1..1d57ed3 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -32,7 +32,7 @@
  */
 
 #ifndef _UAPI_LINUX_VIRTIO_VSOCK_H
-#define _UAPI_LINUX_VIRTIO_VOSCK_H
+#define _UAPI_LINUX_VIRTIO_VSOCK_H
 
 #include <linux/types.h>
 #include <linux/virtio_ids.h>
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 1433389..1fc62b2 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -298,7 +298,7 @@
 	XFRMA_ALG_AUTH_TRUNC,	/* struct xfrm_algo_auth */
 	XFRMA_MARK,		/* struct xfrm_mark */
 	XFRMA_TFCPAD,		/* __u32 */
-	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
+	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_state_esn */
 	XFRMA_SA_EXTRA_FLAGS,	/* __u32 */
 	XFRMA_PROTO,		/* __u8 */
 	XFRMA_ADDRESS_FILTER,	/* struct xfrm_address_filter */
diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h
index cbae529..180d526 100644
--- a/include/uapi/misc/cxl.h
+++ b/include/uapi/misc/cxl.h
@@ -136,8 +136,8 @@
 	 *
 	 * Of course the contents will be ABI, but that's up the AFU driver.
 	 */
-	size_t data_size;
-	u8 data[];
+	__u32 data_size;
+	__u8 data[];
 };
 
 struct cxl_event {
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
index f184909..a4c4d73 100644
--- a/include/xen/interface/sched.h
+++ b/include/xen/interface/sched.h
@@ -3,6 +3,24 @@
  *
  * Scheduler state interactions
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
  */
 
@@ -12,18 +30,30 @@
 #include <xen/interface/event_channel.h>
 
 /*
+ * Guest Scheduler Operations
+ *
+ * The SCHEDOP interface provides mechanisms for a guest to interact
+ * with the scheduler, including yield, blocking and shutting itself
+ * down.
+ */
+
+/*
  * The prototype for this hypercall is:
- *  long sched_op_new(int cmd, void *arg)
+ * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...)
+ *
  * @cmd == SCHEDOP_??? (scheduler operation).
  * @arg == Operation-specific extra argument(s), as described below.
+ * ...  == Additional Operation-specific extra arguments, described below.
  *
- * **NOTE**:
- * Versions of Xen prior to 3.0.2 provide only the following legacy version
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
  * of this hypercall, supporting only the commands yield, block and shutdown:
  *  long sched_op(int cmd, unsigned long arg)
  * @cmd == SCHEDOP_??? (scheduler operation).
  * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
  *      == SHUTDOWN_* code (SCHEDOP_shutdown)
+ *
+ * This legacy version is available to new guests as:
+ * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg)
  */
 
 /*
@@ -44,12 +74,17 @@
 /*
  * Halt execution of this domain (all VCPUs) and notify the system controller.
  * @arg == pointer to sched_shutdown structure.
+ *
+ * If the sched_shutdown_t reason is SHUTDOWN_suspend then
+ * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN
+ * of the guest's start info page.  RDX/EDX is the third hypercall
+ * argument.
+ *
+ * In addition, which reason is SHUTDOWN_suspend this hypercall
+ * returns 1 if suspend was cancelled or the domain was merely
+ * checkpointed, and 0 if it is resuming in a new domain.
  */
 #define SCHEDOP_shutdown    2
-struct sched_shutdown {
-    unsigned int reason; /* SHUTDOWN_* */
-};
-DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
 
 /*
  * Poll a set of event-channel ports. Return when one or more are pending. An
@@ -57,12 +92,6 @@
  * @arg == pointer to sched_poll structure.
  */
 #define SCHEDOP_poll        3
-struct sched_poll {
-    GUEST_HANDLE(evtchn_port_t) ports;
-    unsigned int nr_ports;
-    uint64_t timeout;
-};
-DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
 
 /*
  * Declare a shutdown for another domain. The main use of this function is
@@ -71,15 +100,11 @@
  * @arg == pointer to sched_remote_shutdown structure.
  */
 #define SCHEDOP_remote_shutdown        4
-struct sched_remote_shutdown {
-    domid_t domain_id;         /* Remote domain ID */
-    unsigned int reason;       /* SHUTDOWN_xxx reason */
-};
 
 /*
  * Latch a shutdown code, so that when the domain later shuts down it
  * reports this code to the control tools.
- * @arg == as for SCHEDOP_shutdown.
+ * @arg == sched_shutdown, as for SCHEDOP_shutdown.
  */
 #define SCHEDOP_shutdown_code 5
 
@@ -92,10 +117,47 @@
  * With id != 0 and timeout != 0, poke watchdog timer and set new timeout.
  */
 #define SCHEDOP_watchdog    6
+
+/*
+ * Override the current vcpu affinity by pinning it to one physical cpu or
+ * undo this override restoring the previous affinity.
+ * @arg == pointer to sched_pin_override structure.
+ *
+ * A negative pcpu value will undo a previous pin override and restore the
+ * previous cpu affinity.
+ * This call is allowed for the hardware domain only and requires the cpu
+ * to be part of the domain's cpupool.
+ */
+#define SCHEDOP_pin_override 7
+
+struct sched_shutdown {
+    unsigned int reason; /* SHUTDOWN_* => shutdown reason */
+};
+DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
+
+struct sched_poll {
+    GUEST_HANDLE(evtchn_port_t) ports;
+    unsigned int nr_ports;
+    uint64_t timeout;
+};
+DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
+
+struct sched_remote_shutdown {
+    domid_t domain_id;         /* Remote domain ID */
+    unsigned int reason;       /* SHUTDOWN_* => shutdown reason */
+};
+DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown);
+
 struct sched_watchdog {
     uint32_t id;                /* watchdog ID */
     uint32_t timeout;           /* timeout */
 };
+DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog);
+
+struct sched_pin_override {
+    int32_t pcpu;
+};
+DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override);
 
 /*
  * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
@@ -107,6 +169,7 @@
 #define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
 #define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
 #define SHUTDOWN_watchdog   4  /* Restart because watchdog time expired.     */
+
 /*
  * Domain asked to perform 'soft reset' for it. The expected behavior is to
  * reset internal Xen state for the domain returning it to the point where it
@@ -115,5 +178,6 @@
  * interfaces again.
  */
 #define SHUTDOWN_soft_reset 5
+#define SHUTDOWN_MAX        5  /* Maximum valid shutdown reason.             */
 
 #endif /* __XEN_PUBLIC_SCHED_H__ */
diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h
index 9a37c541..b5486e6 100644
--- a/include/xen/xen-ops.h
+++ b/include/xen/xen-ops.h
@@ -9,8 +9,8 @@
 
 DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 
-DECLARE_PER_CPU(int, xen_vcpu_id);
-static inline int xen_vcpu_nr(int cpu)
+DECLARE_PER_CPU(uint32_t, xen_vcpu_id);
+static inline uint32_t xen_vcpu_nr(int cpu)
 {
 	return per_cpu(xen_vcpu_id, cpu);
 }
diff --git a/init/Kconfig b/init/Kconfig
index cac3f09..3b9a47f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -26,6 +26,16 @@
 config BUILDTIME_EXTABLE_SORT
 	bool
 
+config THREAD_INFO_IN_TASK
+	bool
+	help
+	  Select this to move thread_info off the stack into task_struct.  To
+	  make this work, an arch will need to remove all thread_info fields
+	  except flags and fix any runtime bugs.
+
+	  One subtle change that will be needed is to use try_get_task_stack()
+	  and put_task_stack() in save_thread_stack_tsk() and get_wchan().
+
 menu "General setup"
 
 config BROKEN
diff --git a/init/init_task.c b/init/init_task.c
index ba0a7f36..11f83be1 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -22,5 +22,8 @@
  * Initial thread structure. Alignment of this is handled by a special
  * linker map entry.
  */
-union thread_union init_thread_union __init_task_data =
-	{ INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data = {
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+	INIT_THREAD_INFO(init_task)
+#endif
+};
diff --git a/kernel/audit.c b/kernel/audit.c
index a8a91bd..f1ca116 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -877,6 +877,12 @@
 				return err;
 		}
 		if (s.mask & AUDIT_STATUS_PID) {
+			/* NOTE: we are using task_tgid_vnr() below because
+			 *       the s.pid value is relative to the namespace
+			 *       of the caller; at present this doesn't matter
+			 *       much since you can really only run auditd
+			 *       from the initial pid namespace, but something
+			 *       to keep in mind if this changes */
 			int new_pid = s.pid;
 			pid_t requesting_pid = task_tgid_vnr(current);
 
@@ -1917,7 +1923,7 @@
 			 " euid=%u suid=%u fsuid=%u"
 			 " egid=%u sgid=%u fsgid=%u tty=%s ses=%u",
 			 task_ppid_nr(tsk),
-			 task_pid_nr(tsk),
+			 task_tgid_nr(tsk),
 			 from_kuid(&init_user_ns, audit_get_loginuid(tsk)),
 			 from_kuid(&init_user_ns, cred->uid),
 			 from_kgid(&init_user_ns, cred->gid),
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d6709eb..0d302a8 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -19,6 +19,7 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
+#include <linux/file.h>
 #include <linux/kernel.h>
 #include <linux/audit.h>
 #include <linux/kthread.h>
@@ -544,10 +545,11 @@
 	unsigned long ino;
 	dev_t dev;
 
-	rcu_read_lock();
-	exe_file = rcu_dereference(tsk->mm->exe_file);
+	exe_file = get_task_exe_file(tsk);
+	if (!exe_file)
+		return 0;
 	ino = exe_file->f_inode->i_ino;
 	dev = exe_file->f_inode->i_sb->s_dev;
-	rcu_read_unlock();
+	fput(exe_file);
 	return audit_mark_compare(mark, ino, dev);
 }
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 5abf1dc..2cd5256 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -457,7 +457,7 @@
 
 		switch (f->type) {
 		case AUDIT_PID:
-			pid = task_pid_nr(tsk);
+			pid = task_tgid_nr(tsk);
 			result = audit_comparator(pid, f->op, f->val);
 			break;
 		case AUDIT_PPID:
@@ -1993,7 +1993,7 @@
 	loginuid = from_kuid(&init_user_ns, kloginuid),
 	tty = audit_get_tty(current);
 
-	audit_log_format(ab, "pid=%d uid=%u", task_pid_nr(current), uid);
+	audit_log_format(ab, "pid=%d uid=%u", task_tgid_nr(current), uid);
 	audit_log_task_context(ab);
 	audit_log_format(ab, " old-auid=%u auid=%u tty=%s old-ses=%u ses=%u res=%d",
 			 oldloginuid, loginuid, tty ? tty_name(tty) : "(none)",
@@ -2220,7 +2220,7 @@
 {
 	struct audit_context *context = current->audit_context;
 
-	context->target_pid = task_pid_nr(t);
+	context->target_pid = task_tgid_nr(t);
 	context->target_auid = audit_get_loginuid(t);
 	context->target_uid = task_uid(t);
 	context->target_sessionid = audit_get_sessionid(t);
@@ -2245,7 +2245,7 @@
 
 	if (audit_pid && t->tgid == audit_pid) {
 		if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) {
-			audit_sig_pid = task_pid_nr(tsk);
+			audit_sig_pid = task_tgid_nr(tsk);
 			if (uid_valid(tsk->loginuid))
 				audit_sig_uid = tsk->loginuid;
 			else
@@ -2345,7 +2345,7 @@
 void __audit_log_capset(const struct cred *new, const struct cred *old)
 {
 	struct audit_context *context = current->audit_context;
-	context->capset.pid = task_pid_nr(current);
+	context->capset.pid = task_tgid_nr(current);
 	context->capset.cap.effective   = new->cap_effective;
 	context->capset.cap.inheritable = new->cap_effective;
 	context->capset.cap.permitted   = new->cap_permitted;
@@ -2377,7 +2377,7 @@
 			 from_kgid(&init_user_ns, gid),
 			 sessionid);
 	audit_log_task_context(ab);
-	audit_log_format(ab, " pid=%d comm=", task_pid_nr(current));
+	audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current));
 	audit_log_untrustedstring(ab, get_task_comm(comm, current));
 	audit_log_d_path_exe(ab, current->mm);
 }
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650..a2ac051 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@
 }
 late_initcall(register_perf_event_array_map);
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
 				     struct file *map_file /* not used */,
 				     int fd)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 03fd23d..aa6d981 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1018,7 +1018,7 @@
 	prandom_init_once(&bpf_user_rnd_state);
 }
 
-u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_user_rnd_u32)
 {
 	/* Should someone ever have the rather unwise idea to use some
 	 * of the registers passed into this function, then note that
@@ -1031,7 +1031,7 @@
 
 	state = &get_cpu_var(bpf_user_rnd_state);
 	res = prandom_u32_state(state);
-	put_cpu_var(state);
+	put_cpu_var(bpf_user_rnd_state);
 
 	return res;
 }
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index fff3650..570eeca 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -26,11 +26,18 @@
 	struct bucket *buckets;
 	void *elems;
 	struct pcpu_freelist freelist;
+	void __percpu *extra_elems;
 	atomic_t count;	/* number of elements in this hashtable */
 	u32 n_buckets;	/* number of hash buckets */
 	u32 elem_size;	/* size of each element in bytes */
 };
 
+enum extra_elem_state {
+	HTAB_NOT_AN_EXTRA_ELEM = 0,
+	HTAB_EXTRA_ELEM_FREE,
+	HTAB_EXTRA_ELEM_USED
+};
+
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
 	union {
@@ -38,7 +45,10 @@
 		struct bpf_htab *htab;
 		struct pcpu_freelist_node fnode;
 	};
-	struct rcu_head rcu;
+	union {
+		struct rcu_head rcu;
+		enum extra_elem_state state;
+	};
 	u32 hash;
 	char key[0] __aligned(8);
 };
@@ -113,6 +123,23 @@
 	return err;
 }
 
+static int alloc_extra_elems(struct bpf_htab *htab)
+{
+	void __percpu *pptr;
+	int cpu;
+
+	pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+	if (!pptr)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
+			HTAB_EXTRA_ELEM_FREE;
+	}
+	htab->extra_elems = pptr;
+	return 0;
+}
+
 /* Called from syscall */
 static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 {
@@ -185,6 +212,8 @@
 	if (percpu)
 		cost += (u64) round_up(htab->map.value_size, 8) *
 			num_possible_cpus() * htab->map.max_entries;
+	else
+	       cost += (u64) htab->elem_size * num_possible_cpus();
 
 	if (cost >= U32_MAX - PAGE_SIZE)
 		/* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@
 		raw_spin_lock_init(&htab->buckets[i].lock);
 	}
 
-	if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
-		err = prealloc_elems_and_freelist(htab);
+	if (!percpu) {
+		err = alloc_extra_elems(htab);
 		if (err)
 			goto free_buckets;
 	}
 
+	if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
+		err = prealloc_elems_and_freelist(htab);
+		if (err)
+			goto free_extra_elems;
+	}
+
 	return &htab->map;
 
+free_extra_elems:
+	free_percpu(htab->extra_elems);
 free_buckets:
 	kvfree(htab->buckets);
 free_htab:
@@ -349,7 +386,6 @@
 	if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
 		free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
 	kfree(l);
-
 }
 
 static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@
 
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
+	if (l->state == HTAB_EXTRA_ELEM_USED) {
+		l->state = HTAB_EXTRA_ELEM_FREE;
+		return;
+	}
+
 	if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
 		pcpu_freelist_push(&htab->freelist, &l->fnode);
 	} else {
@@ -381,25 +422,44 @@
 
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
-					 bool percpu, bool onallcpus)
+					 bool percpu, bool onallcpus,
+					 bool old_elem_exists)
 {
 	u32 size = htab->map.value_size;
 	bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
 	struct htab_elem *l_new;
 	void __percpu *pptr;
+	int err = 0;
 
 	if (prealloc) {
 		l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
 		if (!l_new)
-			return ERR_PTR(-E2BIG);
+			err = -E2BIG;
 	} else {
 		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
 			atomic_dec(&htab->count);
-			return ERR_PTR(-E2BIG);
+			err = -E2BIG;
+		} else {
+			l_new = kmalloc(htab->elem_size,
+					GFP_ATOMIC | __GFP_NOWARN);
+			if (!l_new)
+				return ERR_PTR(-ENOMEM);
 		}
-		l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
-		if (!l_new)
-			return ERR_PTR(-ENOMEM);
+	}
+
+	if (err) {
+		if (!old_elem_exists)
+			return ERR_PTR(err);
+
+		/* if we're updating the existing element and the hash table
+		 * is full, use per-cpu extra elems
+		 */
+		l_new = this_cpu_ptr(htab->extra_elems);
+		if (l_new->state != HTAB_EXTRA_ELEM_FREE)
+			return ERR_PTR(-E2BIG);
+		l_new->state = HTAB_EXTRA_ELEM_USED;
+	} else {
+		l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
 	}
 
 	memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@
 	if (ret)
 		goto err;
 
-	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
+	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
+				!!l_old);
 	if (IS_ERR(l_new)) {
 		/* all pre-allocated elements are in use or memory exhausted */
 		ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@
 		}
 	} else {
 		l_new = alloc_htab_elem(htab, key, value, key_size,
-					hash, true, onallcpus);
+					hash, true, onallcpus, false);
 		if (IS_ERR(l_new)) {
 			ret = PTR_ERR(l_new);
 			goto err;
@@ -652,6 +713,7 @@
 		htab_free_elems(htab);
 		pcpu_freelist_destroy(&htab->freelist);
 	}
+	free_percpu(htab->extra_elems);
 	kvfree(htab->buckets);
 	kfree(htab);
 }
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ea3afb..3991840 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -16,6 +16,7 @@
 #include <linux/ktime.h>
 #include <linux/sched.h>
 #include <linux/uidgid.h>
+#include <linux/filter.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -26,48 +27,32 @@
  * if program is allowed to access maps, so check rcu_read_lock_held in
  * all three functions.
  */
-static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
 {
-	/* verifier checked that R1 contains a valid pointer to bpf_map
-	 * and R2 points to a program stack and map->key_size bytes were
-	 * initialized
-	 */
-	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
-	void *key = (void *) (unsigned long) r2;
-	void *value;
-
 	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	value = map->ops->map_lookup_elem(map, key);
-
-	/* lookup() returns either pointer to element value or NULL
-	 * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
-	 */
-	return (unsigned long) value;
+	return (unsigned long) map->ops->map_lookup_elem(map, key);
 }
 
 const struct bpf_func_proto bpf_map_lookup_elem_proto = {
 	.func		= bpf_map_lookup_elem,
 	.gpl_only	= false,
+	.pkt_access	= true,
 	.ret_type	= RET_PTR_TO_MAP_VALUE_OR_NULL,
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
 };
 
-static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key,
+	   void *, value, u64, flags)
 {
-	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
-	void *key = (void *) (unsigned long) r2;
-	void *value = (void *) (unsigned long) r3;
-
 	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	return map->ops->map_update_elem(map, key, value, r4);
+	return map->ops->map_update_elem(map, key, value, flags);
 }
 
 const struct bpf_func_proto bpf_map_update_elem_proto = {
 	.func		= bpf_map_update_elem,
 	.gpl_only	= false,
+	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
@@ -75,19 +60,16 @@
 	.arg4_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key)
 {
-	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
-	void *key = (void *) (unsigned long) r2;
-
 	WARN_ON_ONCE(!rcu_read_lock_held());
-
 	return map->ops->map_delete_elem(map, key);
 }
 
 const struct bpf_func_proto bpf_map_delete_elem_proto = {
 	.func		= bpf_map_delete_elem,
 	.gpl_only	= false,
+	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_CONST_MAP_PTR,
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
@@ -99,7 +81,7 @@
 	.ret_type	= RET_INTEGER,
 };
 
-static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_smp_processor_id)
 {
 	return smp_processor_id();
 }
@@ -110,7 +92,7 @@
 	.ret_type	= RET_INTEGER,
 };
 
-static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_ktime_get_ns)
 {
 	/* NMI safe access to clock monotonic */
 	return ktime_get_mono_fast_ns();
@@ -122,11 +104,11 @@
 	.ret_type	= RET_INTEGER,
 };
 
-static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_pid_tgid)
 {
 	struct task_struct *task = current;
 
-	if (!task)
+	if (unlikely(!task))
 		return -EINVAL;
 
 	return (u64) task->tgid << 32 | task->pid;
@@ -138,18 +120,18 @@
 	.ret_type	= RET_INTEGER,
 };
 
-static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_uid_gid)
 {
 	struct task_struct *task = current;
 	kuid_t uid;
 	kgid_t gid;
 
-	if (!task)
+	if (unlikely(!task))
 		return -EINVAL;
 
 	current_uid_gid(&uid, &gid);
 	return (u64) from_kgid(&init_user_ns, gid) << 32 |
-		from_kuid(&init_user_ns, uid);
+		     from_kuid(&init_user_ns, uid);
 }
 
 const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
@@ -158,10 +140,9 @@
 	.ret_type	= RET_INTEGER,
 };
 
-static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size)
 {
 	struct task_struct *task = current;
-	char *buf = (char *) (long) r1;
 
 	if (unlikely(!task))
 		goto err_clear;
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index bf4495f..732ae16 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -116,10 +116,9 @@
 	return ERR_PTR(err);
 }
 
-u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
+	   u64, flags)
 {
-	struct pt_regs *regs = (struct pt_regs *) (long) r1;
-	struct bpf_map *map = (struct bpf_map *) (long) r2;
 	struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
 	struct perf_callchain_entry *trace;
 	struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index f72f23b..99a7e5b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
 #include <linux/filter.h>
 #include <net/netlink.h>
 #include <linux/file.h>
@@ -126,75 +127,16 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
-struct reg_state {
-	enum bpf_reg_type type;
-	union {
-		/* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */
-		s64 imm;
-
-		/* valid when type == PTR_TO_PACKET* */
-		struct {
-			u32 id;
-			u16 off;
-			u16 range;
-		};
-
-		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
-		 *   PTR_TO_MAP_VALUE_OR_NULL
-		 */
-		struct bpf_map *map_ptr;
-	};
-};
-
-enum bpf_stack_slot_type {
-	STACK_INVALID,    /* nothing was stored in this stack slot */
-	STACK_SPILL,      /* register spilled into stack */
-	STACK_MISC	  /* BPF program wrote some data into this slot */
-};
-
-#define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
-
-/* state of the program:
- * type of all registers and stack info
- */
-struct verifier_state {
-	struct reg_state regs[MAX_BPF_REG];
-	u8 stack_slot_type[MAX_BPF_STACK];
-	struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE];
-};
-
-/* linked list of verifier states used to prune search */
-struct verifier_state_list {
-	struct verifier_state state;
-	struct verifier_state_list *next;
-};
-
 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
-struct verifier_stack_elem {
+struct bpf_verifier_stack_elem {
 	/* verifer state is 'st'
 	 * before processing instruction 'insn_idx'
 	 * and after processing instruction 'prev_insn_idx'
 	 */
-	struct verifier_state st;
+	struct bpf_verifier_state st;
 	int insn_idx;
 	int prev_insn_idx;
-	struct verifier_stack_elem *next;
-};
-
-#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
-
-/* single container for all structs
- * one verifier_env per bpf_check() call
- */
-struct verifier_env {
-	struct bpf_prog *prog;		/* eBPF program being verified */
-	struct verifier_stack_elem *head; /* stack of verifier states to be processed */
-	int stack_size;			/* number of states to be processed */
-	struct verifier_state cur_state; /* current verifier state */
-	struct verifier_state_list **explored_states; /* search pruning optimization */
-	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
-	u32 used_map_cnt;		/* number of used maps */
-	bool allow_ptr_leaks;
+	struct bpf_verifier_stack_elem *next;
 };
 
 #define BPF_COMPLEXITY_LIMIT_INSNS	65536
@@ -203,6 +145,7 @@
 struct bpf_call_arg_meta {
 	struct bpf_map *map_ptr;
 	bool raw_mode;
+	bool pkt_access;
 	int regno;
 	int access_size;
 };
@@ -239,6 +182,7 @@
 	[CONST_PTR_TO_MAP]	= "map_ptr",
 	[PTR_TO_MAP_VALUE]	= "map_value",
 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
+	[PTR_TO_MAP_VALUE_ADJ]	= "map_value_adj",
 	[FRAME_PTR]		= "fp",
 	[PTR_TO_STACK]		= "fp",
 	[CONST_IMM]		= "imm",
@@ -246,9 +190,9 @@
 	[PTR_TO_PACKET_END]	= "pkt_end",
 };
 
-static void print_verifier_state(struct verifier_state *state)
+static void print_verifier_state(struct bpf_verifier_state *state)
 {
-	struct reg_state *reg;
+	struct bpf_reg_state *reg;
 	enum bpf_reg_type t;
 	int i;
 
@@ -266,10 +210,17 @@
 		else if (t == UNKNOWN_VALUE && reg->imm)
 			verbose("%lld", reg->imm);
 		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
-			 t == PTR_TO_MAP_VALUE_OR_NULL)
+			 t == PTR_TO_MAP_VALUE_OR_NULL ||
+			 t == PTR_TO_MAP_VALUE_ADJ)
 			verbose("(ks=%d,vs=%d)",
 				reg->map_ptr->key_size,
 				reg->map_ptr->value_size);
+		if (reg->min_value != BPF_REGISTER_MIN_RANGE)
+			verbose(",min_value=%llu",
+				(unsigned long long)reg->min_value);
+		if (reg->max_value != BPF_REGISTER_MAX_RANGE)
+			verbose(",max_value=%llu",
+				(unsigned long long)reg->max_value);
 	}
 	for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
 		if (state->stack_slot_type[i] == STACK_SPILL)
@@ -424,9 +375,9 @@
 	}
 }
 
-static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
 {
-	struct verifier_stack_elem *elem;
+	struct bpf_verifier_stack_elem *elem;
 	int insn_idx;
 
 	if (env->head == NULL)
@@ -443,12 +394,12 @@
 	return insn_idx;
 }
 
-static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
-					 int prev_insn_idx)
+static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
+					     int insn_idx, int prev_insn_idx)
 {
-	struct verifier_stack_elem *elem;
+	struct bpf_verifier_stack_elem *elem;
 
-	elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+	elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 	if (!elem)
 		goto err;
 
@@ -474,13 +425,15 @@
 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 };
 
-static void init_reg_state(struct reg_state *regs)
+static void init_reg_state(struct bpf_reg_state *regs)
 {
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++) {
 		regs[i].type = NOT_INIT;
 		regs[i].imm = 0;
+		regs[i].min_value = BPF_REGISTER_MIN_RANGE;
+		regs[i].max_value = BPF_REGISTER_MAX_RANGE;
 	}
 
 	/* frame pointer */
@@ -490,20 +443,26 @@
 	regs[BPF_REG_1].type = PTR_TO_CTX;
 }
 
-static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
 {
 	BUG_ON(regno >= MAX_BPF_REG);
 	regs[regno].type = UNKNOWN_VALUE;
 	regs[regno].imm = 0;
 }
 
+static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
+{
+	regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
+	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
+}
+
 enum reg_arg_type {
 	SRC_OP,		/* register is used as source operand */
 	DST_OP,		/* register is used as destination operand */
 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
 };
 
-static int check_reg_arg(struct reg_state *regs, u32 regno,
+static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
 			 enum reg_arg_type t)
 {
 	if (regno >= MAX_BPF_REG) {
@@ -563,8 +522,8 @@
 /* check_stack_read/write functions track spill/fill of registers,
  * stack boundary and alignment are checked in check_mem_access()
  */
-static int check_stack_write(struct verifier_state *state, int off, int size,
-			     int value_regno)
+static int check_stack_write(struct bpf_verifier_state *state, int off,
+			     int size, int value_regno)
 {
 	int i;
 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -589,7 +548,7 @@
 	} else {
 		/* regular write of data into stack */
 		state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
-			(struct reg_state) {};
+			(struct bpf_reg_state) {};
 
 		for (i = 0; i < size; i++)
 			state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@@ -597,7 +556,7 @@
 	return 0;
 }
 
-static int check_stack_read(struct verifier_state *state, int off, int size,
+static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 			    int value_regno)
 {
 	u8 *slot_type;
@@ -638,7 +597,7 @@
 }
 
 /* check read/write into map element returned by bpf_map_lookup_elem() */
-static int check_map_access(struct verifier_env *env, u32 regno, int off,
+static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int size)
 {
 	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
@@ -653,24 +612,31 @@
 
 #define MAX_PACKET_OFF 0xffff
 
-static bool may_write_pkt_data(enum bpf_prog_type type)
+static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
+				       const struct bpf_call_arg_meta *meta)
 {
-	switch (type) {
+	switch (env->prog->type) {
+	case BPF_PROG_TYPE_SCHED_CLS:
+	case BPF_PROG_TYPE_SCHED_ACT:
 	case BPF_PROG_TYPE_XDP:
+		if (meta)
+			return meta->pkt_access;
+
+		env->seen_direct_write = true;
 		return true;
 	default:
 		return false;
 	}
 }
 
-static int check_packet_access(struct verifier_env *env, u32 regno, int off,
+static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 			       int size)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *reg = &regs[regno];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *reg = &regs[regno];
 
 	off += reg->off;
-	if (off < 0 || off + size > reg->range) {
+	if (off < 0 || size <= 0 || off + size > reg->range) {
 		verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
 			off, size, regno, reg->id, reg->off, reg->range);
 		return -EACCES;
@@ -679,9 +645,13 @@
 }
 
 /* check access to 'struct bpf_context' fields */
-static int check_ctx_access(struct verifier_env *env, int off, int size,
+static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
 {
+	/* for analyzer ctx accesses are already validated and converted */
+	if (env->analyzer_ops)
+		return 0;
+
 	if (env->prog->aux->ops->is_valid_access &&
 	    env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
 		/* remember the offset of last byte accessed in ctx */
@@ -694,7 +664,7 @@
 	return -EACCES;
 }
 
-static bool is_pointer_value(struct verifier_env *env, int regno)
+static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 {
 	if (env->allow_ptr_leaks)
 		return false;
@@ -708,28 +678,19 @@
 	}
 }
 
-static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg,
-			       int off, int size)
+static int check_ptr_alignment(struct bpf_verifier_env *env,
+			       struct bpf_reg_state *reg, int off, int size)
 {
-	if (reg->type != PTR_TO_PACKET) {
+	if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) {
 		if (off % size != 0) {
-			verbose("misaligned access off %d size %d\n", off, size);
+			verbose("misaligned access off %d size %d\n",
+				off, size);
 			return -EACCES;
 		} else {
 			return 0;
 		}
 	}
 
-	switch (env->prog->type) {
-	case BPF_PROG_TYPE_SCHED_CLS:
-	case BPF_PROG_TYPE_SCHED_ACT:
-	case BPF_PROG_TYPE_XDP:
-		break;
-	default:
-		verbose("verifier is misconfigured\n");
-		return -EACCES;
-	}
-
 	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
 		/* misaligned access to packet is ok on x86,arm,arm64 */
 		return 0;
@@ -740,7 +701,8 @@
 	}
 
 	/* skb->data is NET_IP_ALIGN-ed */
-	if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
+	if (reg->type == PTR_TO_PACKET &&
+	    (NET_IP_ALIGN + reg->off + off) % size != 0) {
 		verbose("misaligned packet access off %d+%d+%d size %d\n",
 			NET_IP_ALIGN, reg->off, off, size);
 		return -EACCES;
@@ -754,12 +716,12 @@
  * if t==write && value_regno==-1, some unknown value is stored into memory
  * if t==read && value_regno==-1, don't care what we read from memory
  */
-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 			    int bpf_size, enum bpf_access_type t,
 			    int value_regno)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *reg = &state->regs[regno];
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *reg = &state->regs[regno];
 	int size, err = 0;
 
 	if (reg->type == PTR_TO_STACK)
@@ -773,12 +735,52 @@
 	if (err)
 		return err;
 
-	if (reg->type == PTR_TO_MAP_VALUE) {
+	if (reg->type == PTR_TO_MAP_VALUE ||
+	    reg->type == PTR_TO_MAP_VALUE_ADJ) {
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
 			verbose("R%d leaks addr into map\n", value_regno);
 			return -EACCES;
 		}
+
+		/* If we adjusted the register to this map value at all then we
+		 * need to change off and size to min_value and max_value
+		 * respectively to make sure our theoretical access will be
+		 * safe.
+		 */
+		if (reg->type == PTR_TO_MAP_VALUE_ADJ) {
+			if (log_level)
+				print_verifier_state(state);
+			env->varlen_map_value_access = true;
+			/* The minimum value is only important with signed
+			 * comparisons where we can't assume the floor of a
+			 * value is 0.  If we are using signed variables for our
+			 * index'es we need to make sure that whatever we use
+			 * will have a set floor within our range.
+			 */
+			if ((s64)reg->min_value < 0) {
+				verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+					regno);
+				return -EACCES;
+			}
+			err = check_map_access(env, regno, reg->min_value + off,
+					       size);
+			if (err) {
+				verbose("R%d min value is outside of the array range\n",
+					regno);
+				return err;
+			}
+
+			/* If we haven't set a max value then we need to bail
+			 * since we can't be sure we won't do bad things.
+			 */
+			if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+				verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n",
+					regno);
+				return -EACCES;
+			}
+			off += reg->max_value;
+		}
 		err = check_map_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown_value(state->regs, value_regno);
@@ -794,9 +796,8 @@
 		err = check_ctx_access(env, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
 			mark_reg_unknown_value(state->regs, value_regno);
-			if (env->allow_ptr_leaks)
-				/* note that reg.[id|off|range] == 0 */
-				state->regs[value_regno].type = reg_type;
+			/* note that reg.[id|off|range] == 0 */
+			state->regs[value_regno].type = reg_type;
 		}
 
 	} else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
@@ -816,7 +817,7 @@
 			err = check_stack_read(state, off, size, value_regno);
 		}
 	} else if (state->regs[regno].type == PTR_TO_PACKET) {
-		if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) {
+		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) {
 			verbose("cannot write into packet\n");
 			return -EACCES;
 		}
@@ -845,9 +846,9 @@
 	return err;
 }
 
-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	int err;
 
 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
@@ -881,12 +882,12 @@
  * bytes from that pointer, make sure that it's within stack boundary
  * and all elements of stack are initialized
  */
-static int check_stack_boundary(struct verifier_env *env, int regno,
+static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 				int access_size, bool zero_size_allowed,
 				struct bpf_call_arg_meta *meta)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *regs = state->regs;
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *regs = state->regs;
 	int off, i;
 
 	if (regs[regno].type != PTR_TO_STACK) {
@@ -925,18 +926,18 @@
 	return 0;
 }
 
-static int check_func_arg(struct verifier_env *env, u32 regno,
+static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			  enum bpf_arg_type arg_type,
 			  struct bpf_call_arg_meta *meta)
 {
-	struct reg_state *reg = env->cur_state.regs + regno;
-	enum bpf_reg_type expected_type;
+	struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
+	enum bpf_reg_type expected_type, type = reg->type;
 	int err = 0;
 
 	if (arg_type == ARG_DONTCARE)
 		return 0;
 
-	if (reg->type == NOT_INIT) {
+	if (type == NOT_INIT) {
 		verbose("R%d !read_ok\n", regno);
 		return -EACCES;
 	}
@@ -949,16 +950,29 @@
 		return 0;
 	}
 
+	if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) {
+		verbose("helper access to the packet is not allowed\n");
+		return -EACCES;
+	}
+
 	if (arg_type == ARG_PTR_TO_MAP_KEY ||
 	    arg_type == ARG_PTR_TO_MAP_VALUE) {
 		expected_type = PTR_TO_STACK;
+		if (type != PTR_TO_PACKET && type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_CONST_STACK_SIZE ||
 		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
 		expected_type = CONST_IMM;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_PTR_TO_CTX) {
 		expected_type = PTR_TO_CTX;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type == ARG_PTR_TO_STACK ||
 		   arg_type == ARG_PTR_TO_RAW_STACK) {
 		expected_type = PTR_TO_STACK;
@@ -966,20 +980,16 @@
 		 * passed in as argument, it's a CONST_IMM type. Final test
 		 * happens during stack boundary checking.
 		 */
-		if (reg->type == CONST_IMM && reg->imm == 0)
-			expected_type = CONST_IMM;
+		if (type == CONST_IMM && reg->imm == 0)
+			/* final test in check_stack_boundary() */;
+		else if (type != PTR_TO_PACKET && type != expected_type)
+			goto err_type;
 		meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
 	} else {
 		verbose("unsupported arg_type %d\n", arg_type);
 		return -EFAULT;
 	}
 
-	if (reg->type != expected_type) {
-		verbose("R%d type=%s expected=%s\n", regno,
-			reg_type_str[reg->type], reg_type_str[expected_type]);
-		return -EACCES;
-	}
-
 	if (arg_type == ARG_CONST_MAP_PTR) {
 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
 		meta->map_ptr = reg->map_ptr;
@@ -997,8 +1007,13 @@
 			verbose("invalid map_ptr to access map->key\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno, meta->map_ptr->key_size,
-					   false, NULL);
+		if (type == PTR_TO_PACKET)
+			err = check_packet_access(env, regno, 0,
+						  meta->map_ptr->key_size);
+		else
+			err = check_stack_boundary(env, regno,
+						   meta->map_ptr->key_size,
+						   false, NULL);
 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
 		 * check [value, value + map->value_size) validity
@@ -1008,9 +1023,13 @@
 			verbose("invalid map_ptr to access map->value\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno,
-					   meta->map_ptr->value_size,
-					   false, NULL);
+		if (type == PTR_TO_PACKET)
+			err = check_packet_access(env, regno, 0,
+						  meta->map_ptr->value_size);
+		else
+			err = check_stack_boundary(env, regno,
+						   meta->map_ptr->value_size,
+						   false, NULL);
 	} else if (arg_type == ARG_CONST_STACK_SIZE ||
 		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
 		bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
@@ -1024,11 +1043,18 @@
 			verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
 			return -EACCES;
 		}
-		err = check_stack_boundary(env, regno - 1, reg->imm,
-					   zero_size_allowed, meta);
+		if (regs[regno - 1].type == PTR_TO_PACKET)
+			err = check_packet_access(env, regno - 1, 0, reg->imm);
+		else
+			err = check_stack_boundary(env, regno - 1, reg->imm,
+						   zero_size_allowed, meta);
 	}
 
 	return err;
+err_type:
+	verbose("R%d type=%s expected=%s\n", regno,
+		reg_type_str[type], reg_type_str[expected_type]);
+	return -EACCES;
 }
 
 static int check_map_func_compatibility(struct bpf_map *map, int func_id)
@@ -1052,7 +1078,8 @@
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
-		if (func_id != BPF_FUNC_skb_in_cgroup)
+		if (func_id != BPF_FUNC_skb_under_cgroup &&
+		    func_id != BPF_FUNC_current_task_under_cgroup)
 			goto error;
 		break;
 	default:
@@ -1074,7 +1101,8 @@
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
-	case BPF_FUNC_skb_in_cgroup:
+	case BPF_FUNC_current_task_under_cgroup:
+	case BPF_FUNC_skb_under_cgroup:
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
 		break;
@@ -1107,10 +1135,10 @@
 	return count > 1 ? -EINVAL : 0;
 }
 
-static void clear_all_pkt_pointers(struct verifier_env *env)
+static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *regs = state->regs, *reg;
+	struct bpf_verifier_state *state = &env->cur_state;
+	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++)
@@ -1130,12 +1158,12 @@
 	}
 }
 
-static int check_call(struct verifier_env *env, int func_id)
+static int check_call(struct bpf_verifier_env *env, int func_id)
 {
-	struct verifier_state *state = &env->cur_state;
+	struct bpf_verifier_state *state = &env->cur_state;
 	const struct bpf_func_proto *fn = NULL;
-	struct reg_state *regs = state->regs;
-	struct reg_state *reg;
+	struct bpf_reg_state *regs = state->regs;
+	struct bpf_reg_state *reg;
 	struct bpf_call_arg_meta meta;
 	bool changes_data;
 	int i, err;
@@ -1163,6 +1191,7 @@
 	changes_data = bpf_helper_changes_skb_data(fn->func);
 
 	memset(&meta, 0, sizeof(meta));
+	meta.pkt_access = fn->pkt_access;
 
 	/* We only support one arg being in raw mode at the moment, which
 	 * is sufficient for the helper functions we have right now.
@@ -1213,6 +1242,7 @@
 		regs[BPF_REG_0].type = NOT_INIT;
 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
 		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+		regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
 		/* remember map_ptr, so that check_map_access()
 		 * can check 'value_size' boundary of memory access
 		 * to map element returned from bpf_map_lookup_elem()
@@ -1237,12 +1267,13 @@
 	return 0;
 }
 
-static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn)
+static int check_packet_ptr_add(struct bpf_verifier_env *env,
+				struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
-	struct reg_state *src_reg = &regs[insn->src_reg];
-	struct reg_state tmp_reg;
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
+	struct bpf_reg_state tmp_reg;
 	s32 imm;
 
 	if (BPF_SRC(insn->code) == BPF_K) {
@@ -1301,7 +1332,7 @@
 		/* dst_reg stays as pkt_ptr type and since some positive
 		 * integer value was added to the pointer, increment its 'id'
 		 */
-		dst_reg->id++;
+		dst_reg->id = ++env->id_gen;
 
 		/* something was added to pkt_ptr, set range and off to zero */
 		dst_reg->off = 0;
@@ -1310,10 +1341,10 @@
 	return 0;
 }
 
-static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
 	u8 opcode = BPF_OP(insn->code);
 	s64 imm_log2;
 
@@ -1323,7 +1354,7 @@
 	 */
 
 	if (BPF_SRC(insn->code) == BPF_X) {
-		struct reg_state *src_reg = &regs[insn->src_reg];
+		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 
 		if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
 		    dst_reg->imm && opcode == BPF_ADD) {
@@ -1412,11 +1443,12 @@
 	return 0;
 }
 
-static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn)
+static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
+				struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
-	struct reg_state *dst_reg = &regs[insn->dst_reg];
-	struct reg_state *src_reg = &regs[insn->src_reg];
+	struct bpf_reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
+	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 	u8 opcode = BPF_OP(insn->code);
 
 	/* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn.
@@ -1432,10 +1464,110 @@
 	return 0;
 }
 
-/* check validity of 32-bit and 64-bit arithmetic operations */
-static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn)
+static void check_reg_overflow(struct bpf_reg_state *reg)
 {
-	struct reg_state *regs = env->cur_state.regs, *dst_reg;
+	if (reg->max_value > BPF_REGISTER_MAX_RANGE)
+		reg->max_value = BPF_REGISTER_MAX_RANGE;
+	if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE)
+		reg->min_value = BPF_REGISTER_MIN_RANGE;
+}
+
+static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
+				    struct bpf_insn *insn)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
+	u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE;
+	bool min_set = false, max_set = false;
+	u8 opcode = BPF_OP(insn->code);
+
+	dst_reg = &regs[insn->dst_reg];
+	if (BPF_SRC(insn->code) == BPF_X) {
+		check_reg_overflow(&regs[insn->src_reg]);
+		min_val = regs[insn->src_reg].min_value;
+		max_val = regs[insn->src_reg].max_value;
+
+		/* If the source register is a random pointer then the
+		 * min_value/max_value values represent the range of the known
+		 * accesses into that value, not the actual min/max value of the
+		 * register itself.  In this case we have to reset the reg range
+		 * values so we know it is not safe to look at.
+		 */
+		if (regs[insn->src_reg].type != CONST_IMM &&
+		    regs[insn->src_reg].type != UNKNOWN_VALUE) {
+			min_val = BPF_REGISTER_MIN_RANGE;
+			max_val = BPF_REGISTER_MAX_RANGE;
+		}
+	} else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
+		   (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
+		min_val = max_val = insn->imm;
+		min_set = max_set = true;
+	}
+
+	/* We don't know anything about what was done to this register, mark it
+	 * as unknown.
+	 */
+	if (min_val == BPF_REGISTER_MIN_RANGE &&
+	    max_val == BPF_REGISTER_MAX_RANGE) {
+		reset_reg_range_values(regs, insn->dst_reg);
+		return;
+	}
+
+	switch (opcode) {
+	case BPF_ADD:
+		dst_reg->min_value += min_val;
+		dst_reg->max_value += max_val;
+		break;
+	case BPF_SUB:
+		dst_reg->min_value -= min_val;
+		dst_reg->max_value -= max_val;
+		break;
+	case BPF_MUL:
+		dst_reg->min_value *= min_val;
+		dst_reg->max_value *= max_val;
+		break;
+	case BPF_AND:
+		/* & is special since it could end up with 0 bits set. */
+		dst_reg->min_value &= min_val;
+		dst_reg->max_value = max_val;
+		break;
+	case BPF_LSH:
+		/* Gotta have special overflow logic here, if we're shifting
+		 * more than MAX_RANGE then just assume we have an invalid
+		 * range.
+		 */
+		if (min_val > ilog2(BPF_REGISTER_MAX_RANGE))
+			dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
+		else
+			dst_reg->min_value <<= min_val;
+
+		if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
+			dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
+		else
+			dst_reg->max_value <<= max_val;
+		break;
+	case BPF_RSH:
+		dst_reg->min_value >>= min_val;
+		dst_reg->max_value >>= max_val;
+		break;
+	case BPF_MOD:
+		/* % is special since it is an unsigned modulus, so the floor
+		 * will always be 0.
+		 */
+		dst_reg->min_value = 0;
+		dst_reg->max_value = max_val - 1;
+		break;
+	default:
+		reset_reg_range_values(regs, insn->dst_reg);
+		break;
+	}
+
+	check_reg_overflow(dst_reg);
+}
+
+/* check validity of 32-bit and 64-bit arithmetic operations */
+static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1495,6 +1627,11 @@
 		if (err)
 			return err;
 
+		/* we are setting our register to something new, we need to
+		 * reset its range values.
+		 */
+		reset_reg_range_values(regs, insn->dst_reg);
+
 		if (BPF_SRC(insn->code) == BPF_X) {
 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 				/* case: R1 = R2
@@ -1516,6 +1653,8 @@
 			 */
 			regs[insn->dst_reg].type = CONST_IMM;
 			regs[insn->dst_reg].imm = insn->imm;
+			regs[insn->dst_reg].max_value = insn->imm;
+			regs[insn->dst_reg].min_value = insn->imm;
 		}
 
 	} else if (opcode > BPF_END) {
@@ -1568,6 +1707,9 @@
 
 		dst_reg = &regs[insn->dst_reg];
 
+		/* first we want to adjust our ranges. */
+		adjust_reg_min_max_vals(env, insn);
+
 		/* pattern match 'bpf_add Rx, imm' instruction */
 		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
 		    dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) {
@@ -1602,28 +1744,58 @@
 			return -EACCES;
 		}
 
-		/* mark dest operand */
-		mark_reg_unknown_value(regs, insn->dst_reg);
+		/* If we did pointer math on a map value then just set it to our
+		 * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or
+		 * loads to this register appropriately, otherwise just mark the
+		 * register as unknown.
+		 */
+		if (env->allow_ptr_leaks &&
+		    (dst_reg->type == PTR_TO_MAP_VALUE ||
+		     dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
+			dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
+		else
+			mark_reg_unknown_value(regs, insn->dst_reg);
 	}
 
 	return 0;
 }
 
-static void find_good_pkt_pointers(struct verifier_env *env,
-				   struct reg_state *dst_reg)
+static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+				   struct bpf_reg_state *dst_reg)
 {
-	struct verifier_state *state = &env->cur_state;
-	struct reg_state *regs = state->regs, *reg;
+	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
-	/* r2 = r3;
-	 * r2 += 8
-	 * if (r2 > pkt_end) goto somewhere
-	 * r2 == dst_reg, pkt_end == src_reg,
-	 * r2=pkt(id=n,off=8,r=0)
-	 * r3=pkt(id=n,off=0,r=0)
-	 * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
-	 * so that range of bytes [r3, r3 + 8) is safe to access
+
+	/* LLVM can generate two kind of checks:
+	 *
+	 * Type 1:
+	 *
+	 *   r2 = r3;
+	 *   r2 += 8;
+	 *   if (r2 > pkt_end) goto <handle exception>
+	 *   <access okay>
+	 *
+	 *   Where:
+	 *     r2 == dst_reg, pkt_end == src_reg
+	 *     r2=pkt(id=n,off=8,r=0)
+	 *     r3=pkt(id=n,off=0,r=0)
+	 *
+	 * Type 2:
+	 *
+	 *   r2 = r3;
+	 *   r2 += 8;
+	 *   if (pkt_end >= r2) goto <access okay>
+	 *   <handle exception>
+	 *
+	 *   Where:
+	 *     pkt_end == dst_reg, r2 == src_reg
+	 *     r2=pkt(id=n,off=8,r=0)
+	 *     r3=pkt(id=n,off=0,r=0)
+	 *
+	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
+	 * so that range of bytes [r3, r3 + 8) is safe to access.
 	 */
+
 	for (i = 0; i < MAX_BPF_REG; i++)
 		if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
 			regs[i].range = dst_reg->off;
@@ -1637,11 +1809,109 @@
 	}
 }
 
-static int check_cond_jmp_op(struct verifier_env *env,
+/* Adjusts the register min/max values in the case that the dst_reg is the
+ * variable register that we are working on, and src_reg is a constant or we're
+ * simply doing a BPF_K check.
+ */
+static void reg_set_min_max(struct bpf_reg_state *true_reg,
+			    struct bpf_reg_state *false_reg, u64 val,
+			    u8 opcode)
+{
+	switch (opcode) {
+	case BPF_JEQ:
+		/* If this is false then we know nothing Jon Snow, but if it is
+		 * true then we know for sure.
+		 */
+		true_reg->max_value = true_reg->min_value = val;
+		break;
+	case BPF_JNE:
+		/* If this is true we know nothing Jon Snow, but if it is false
+		 * we know the value for sure;
+		 */
+		false_reg->max_value = false_reg->min_value = val;
+		break;
+	case BPF_JGT:
+		/* Unsigned comparison, the minimum value is 0. */
+		false_reg->min_value = 0;
+	case BPF_JSGT:
+		/* If this is false then we know the maximum val is val,
+		 * otherwise we know the min val is val+1.
+		 */
+		false_reg->max_value = val;
+		true_reg->min_value = val + 1;
+		break;
+	case BPF_JGE:
+		/* Unsigned comparison, the minimum value is 0. */
+		false_reg->min_value = 0;
+	case BPF_JSGE:
+		/* If this is false then we know the maximum value is val - 1,
+		 * otherwise we know the mimimum value is val.
+		 */
+		false_reg->max_value = val - 1;
+		true_reg->min_value = val;
+		break;
+	default:
+		break;
+	}
+
+	check_reg_overflow(false_reg);
+	check_reg_overflow(true_reg);
+}
+
+/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
+ * is the variable reg.
+ */
+static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
+				struct bpf_reg_state *false_reg, u64 val,
+				u8 opcode)
+{
+	switch (opcode) {
+	case BPF_JEQ:
+		/* If this is false then we know nothing Jon Snow, but if it is
+		 * true then we know for sure.
+		 */
+		true_reg->max_value = true_reg->min_value = val;
+		break;
+	case BPF_JNE:
+		/* If this is true we know nothing Jon Snow, but if it is false
+		 * we know the value for sure;
+		 */
+		false_reg->max_value = false_reg->min_value = val;
+		break;
+	case BPF_JGT:
+		/* Unsigned comparison, the minimum value is 0. */
+		true_reg->min_value = 0;
+	case BPF_JSGT:
+		/*
+		 * If this is false, then the val is <= the register, if it is
+		 * true the register <= to the val.
+		 */
+		false_reg->min_value = val;
+		true_reg->max_value = val - 1;
+		break;
+	case BPF_JGE:
+		/* Unsigned comparison, the minimum value is 0. */
+		true_reg->min_value = 0;
+	case BPF_JSGE:
+		/* If this is false then constant < register, if it is true then
+		 * the register < constant.
+		 */
+		false_reg->min_value = val + 1;
+		true_reg->max_value = val;
+		break;
+	default:
+		break;
+	}
+
+	check_reg_overflow(false_reg);
+	check_reg_overflow(true_reg);
+}
+
+static int check_cond_jmp_op(struct bpf_verifier_env *env,
 			     struct bpf_insn *insn, int *insn_idx)
 {
-	struct reg_state *regs = env->cur_state.regs, *dst_reg;
-	struct verifier_state *other_branch;
+	struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
+	struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
 	u8 opcode = BPF_OP(insn->code);
 	int err;
 
@@ -1703,7 +1973,24 @@
 	if (!other_branch)
 		return -EFAULT;
 
-	/* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
+	/* detect if we are comparing against a constant value so we can adjust
+	 * our min/max values for our dst register.
+	 */
+	if (BPF_SRC(insn->code) == BPF_X) {
+		if (regs[insn->src_reg].type == CONST_IMM)
+			reg_set_min_max(&other_branch->regs[insn->dst_reg],
+					dst_reg, regs[insn->src_reg].imm,
+					opcode);
+		else if (dst_reg->type == CONST_IMM)
+			reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+					    &regs[insn->src_reg], dst_reg->imm,
+					    opcode);
+	} else {
+		reg_set_min_max(&other_branch->regs[insn->dst_reg],
+					dst_reg, insn->imm, opcode);
+	}
+
+	/* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
 	if (BPF_SRC(insn->code) == BPF_K &&
 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
 	    dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
@@ -1722,13 +2009,17 @@
 	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
 		   dst_reg->type == PTR_TO_PACKET &&
 		   regs[insn->src_reg].type == PTR_TO_PACKET_END) {
-		find_good_pkt_pointers(env, dst_reg);
+		find_good_pkt_pointers(this_branch, dst_reg);
+	} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
+		   dst_reg->type == PTR_TO_PACKET_END &&
+		   regs[insn->src_reg].type == PTR_TO_PACKET) {
+		find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
 	} else if (is_pointer_value(env, insn->dst_reg)) {
 		verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
 		return -EACCES;
 	}
 	if (log_level)
-		print_verifier_state(&env->cur_state);
+		print_verifier_state(this_branch);
 	return 0;
 }
 
@@ -1741,9 +2032,9 @@
 }
 
 /* verify BPF_LD_IMM64 instruction */
-static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	int err;
 
 	if (BPF_SIZE(insn->code) != BPF_DW) {
@@ -1759,9 +2050,19 @@
 	if (err)
 		return err;
 
-	if (insn->src_reg == 0)
-		/* generic move 64-bit immediate into a register */
+	if (insn->src_reg == 0) {
+		/* generic move 64-bit immediate into a register,
+		 * only analyzer needs to collect the ld_imm value.
+		 */
+		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+
+		if (!env->analyzer_ops)
+			return 0;
+
+		regs[insn->dst_reg].type = CONST_IMM;
+		regs[insn->dst_reg].imm = imm;
 		return 0;
+	}
 
 	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
 	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
@@ -1798,11 +2099,11 @@
  * Output:
  *   R0 - 8/16/32-bit skb data converted to cpu endianness
  */
-static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
+static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-	struct reg_state *regs = env->cur_state.regs;
+	struct bpf_reg_state *regs = env->cur_state.regs;
 	u8 mode = BPF_MODE(insn->code);
-	struct reg_state *reg;
+	struct bpf_reg_state *reg;
 	int i, err;
 
 	if (!may_access_skb(env->prog->type)) {
@@ -1888,7 +2189,7 @@
 	BRANCH = 2,
 };
 
-#define STATE_LIST_MARK ((struct verifier_state_list *) -1L)
+#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
 
 static int *insn_stack;	/* stack of insns to process */
 static int cur_stack;	/* current stack index */
@@ -1899,7 +2200,7 @@
  * w - next instruction
  * e - edge
  */
-static int push_insn(int t, int w, int e, struct verifier_env *env)
+static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
 {
 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
 		return 0;
@@ -1940,7 +2241,7 @@
 /* non-recursive depth-first-search to detect loops in BPF program
  * loop == back-edge in directed graph
  */
-static int check_cfg(struct verifier_env *env)
+static int check_cfg(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insns = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -2049,7 +2350,8 @@
 /* the following conditions reduce the number of explored insns
  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
  */
-static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur)
+static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
+				   struct bpf_reg_state *cur)
 {
 	if (old->id != cur->id)
 		return false;
@@ -2124,9 +2426,11 @@
  * whereas register type in current state is meaningful, it means that
  * the current state will reach 'bpf_exit' instruction safely
  */
-static bool states_equal(struct verifier_state *old, struct verifier_state *cur)
+static bool states_equal(struct bpf_verifier_env *env,
+			 struct bpf_verifier_state *old,
+			 struct bpf_verifier_state *cur)
 {
-	struct reg_state *rold, *rcur;
+	struct bpf_reg_state *rold, *rcur;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++) {
@@ -2136,6 +2440,13 @@
 		if (memcmp(rold, rcur, sizeof(*rold)) == 0)
 			continue;
 
+		/* If the ranges were not the same, but everything else was and
+		 * we didn't do a variable access into a map then we are a-ok.
+		 */
+		if (!env->varlen_map_value_access &&
+		    rold->type == rcur->type && rold->imm == rcur->imm)
+			continue;
+
 		if (rold->type == NOT_INIT ||
 		    (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT))
 			continue;
@@ -2166,9 +2477,9 @@
 			 * the same, check that stored pointers types
 			 * are the same as well.
 			 * Ex: explored safe path could have stored
-			 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
 			 * but current path has stored:
-			 * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16}
+			 * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
 			 * such verifier states are not equivalent.
 			 * return false to continue verification of this path
 			 */
@@ -2179,10 +2490,10 @@
 	return true;
 }
 
-static int is_state_visited(struct verifier_env *env, int insn_idx)
+static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 {
-	struct verifier_state_list *new_sl;
-	struct verifier_state_list *sl;
+	struct bpf_verifier_state_list *new_sl;
+	struct bpf_verifier_state_list *sl;
 
 	sl = env->explored_states[insn_idx];
 	if (!sl)
@@ -2192,7 +2503,7 @@
 		return 0;
 
 	while (sl != STATE_LIST_MARK) {
-		if (states_equal(&sl->state, &env->cur_state))
+		if (states_equal(env, &sl->state, &env->cur_state))
 			/* reached equivalent register/stack state,
 			 * prune the search
 			 */
@@ -2206,7 +2517,7 @@
 	 * it will be rejected. Since there are no loops, we won't be
 	 * seeing this 'insn_idx' instruction again on the way to bpf_exit
 	 */
-	new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER);
+	new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
 	if (!new_sl)
 		return -ENOMEM;
 
@@ -2217,11 +2528,20 @@
 	return 0;
 }
 
-static int do_check(struct verifier_env *env)
+static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
+				  int insn_idx, int prev_insn_idx)
 {
-	struct verifier_state *state = &env->cur_state;
+	if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
+		return 0;
+
+	return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
+}
+
+static int do_check(struct bpf_verifier_env *env)
+{
+	struct bpf_verifier_state *state = &env->cur_state;
 	struct bpf_insn *insns = env->prog->insnsi;
-	struct reg_state *regs = state->regs;
+	struct bpf_reg_state *regs = state->regs;
 	int insn_cnt = env->prog->len;
 	int insn_idx, prev_insn_idx = 0;
 	int insn_processed = 0;
@@ -2229,6 +2549,7 @@
 
 	init_reg_state(regs);
 	insn_idx = 0;
+	env->varlen_map_value_access = false;
 	for (;;) {
 		struct bpf_insn *insn;
 		u8 class;
@@ -2275,13 +2596,17 @@
 			print_bpf_insn(insn);
 		}
 
+		err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
+		if (err)
+			return err;
+
 		if (class == BPF_ALU || class == BPF_ALU64) {
 			err = check_alu_op(env, insn);
 			if (err)
 				return err;
 
 		} else if (class == BPF_LDX) {
-			enum bpf_reg_type src_reg_type;
+			enum bpf_reg_type *prev_src_type, src_reg_type;
 
 			/* check for reserved fields is already done */
 
@@ -2305,21 +2630,25 @@
 			if (err)
 				return err;
 
-			if (BPF_SIZE(insn->code) != BPF_W) {
+			reset_reg_range_values(regs, insn->dst_reg);
+			if (BPF_SIZE(insn->code) != BPF_W &&
+			    BPF_SIZE(insn->code) != BPF_DW) {
 				insn_idx++;
 				continue;
 			}
 
-			if (insn->imm == 0) {
+			prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+			if (*prev_src_type == NOT_INIT) {
 				/* saw a valid insn
 				 * dst_reg = *(u32 *)(src_reg + off)
-				 * use reserved 'imm' field to mark this insn
+				 * save type to validate intersecting paths
 				 */
-				insn->imm = src_reg_type;
+				*prev_src_type = src_reg_type;
 
-			} else if (src_reg_type != insn->imm &&
+			} else if (src_reg_type != *prev_src_type &&
 				   (src_reg_type == PTR_TO_CTX ||
-				    insn->imm == PTR_TO_CTX)) {
+				    *prev_src_type == PTR_TO_CTX)) {
 				/* ABuser program is trying to use the same insn
 				 * dst_reg = *(u32*) (src_reg + off)
 				 * with different pointer types:
@@ -2332,7 +2661,7 @@
 			}
 
 		} else if (class == BPF_STX) {
-			enum bpf_reg_type dst_reg_type;
+			enum bpf_reg_type *prev_dst_type, dst_reg_type;
 
 			if (BPF_MODE(insn->code) == BPF_XADD) {
 				err = check_xadd(env, insn);
@@ -2360,11 +2689,13 @@
 			if (err)
 				return err;
 
-			if (insn->imm == 0) {
-				insn->imm = dst_reg_type;
-			} else if (dst_reg_type != insn->imm &&
+			prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
+
+			if (*prev_dst_type == NOT_INIT) {
+				*prev_dst_type = dst_reg_type;
+			} else if (dst_reg_type != *prev_dst_type &&
 				   (dst_reg_type == PTR_TO_CTX ||
-				    insn->imm == PTR_TO_CTX)) {
+				    *prev_dst_type == PTR_TO_CTX)) {
 				verbose("same insn cannot be used with different pointers\n");
 				return -EINVAL;
 			}
@@ -2470,6 +2801,7 @@
 				verbose("invalid BPF_LD mode\n");
 				return -EINVAL;
 			}
+			reset_reg_range_values(regs, insn->dst_reg);
 		} else {
 			verbose("unknown insn class %d\n", class);
 			return -EINVAL;
@@ -2482,14 +2814,28 @@
 	return 0;
 }
 
+static int check_map_prog_compatibility(struct bpf_map *map,
+					struct bpf_prog *prog)
+
+{
+	if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
+	    (map->map_type == BPF_MAP_TYPE_HASH ||
+	     map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
+	    (map->map_flags & BPF_F_NO_PREALLOC)) {
+		verbose("perf_event programs can only use preallocated hash map\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /* look for pseudo eBPF instructions that access map FDs and
  * replace them with actual map pointers
  */
-static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insn = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
-	int i, j;
+	int i, j, err;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		if (BPF_CLASS(insn->code) == BPF_LDX &&
@@ -2533,6 +2879,12 @@
 				return PTR_ERR(map);
 			}
 
+			err = check_map_prog_compatibility(map, env->prog);
+			if (err) {
+				fdput(f);
+				return err;
+			}
+
 			/* store map pointer inside BPF_LD_IMM64 instruction */
 			insn[0].imm = (u32) (unsigned long) map;
 			insn[1].imm = ((u64) (unsigned long) map) >> 32;
@@ -2576,7 +2928,7 @@
 }
 
 /* drop refcnt of maps used by the rejected program */
-static void release_maps(struct verifier_env *env)
+static void release_maps(struct bpf_verifier_env *env)
 {
 	int i;
 
@@ -2585,7 +2937,7 @@
 }
 
 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
-static void convert_pseudo_ld_imm64(struct verifier_env *env)
+static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
 {
 	struct bpf_insn *insn = env->prog->insnsi;
 	int insn_cnt = env->prog->len;
@@ -2599,62 +2951,74 @@
 /* convert load instructions that access fields of 'struct __sk_buff'
  * into sequence of instructions that access fields of 'struct sk_buff'
  */
-static int convert_ctx_accesses(struct verifier_env *env)
+static int convert_ctx_accesses(struct bpf_verifier_env *env)
 {
-	struct bpf_insn *insn = env->prog->insnsi;
-	int insn_cnt = env->prog->len;
-	struct bpf_insn insn_buf[16];
+	const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+	const int insn_cnt = env->prog->len;
+	struct bpf_insn insn_buf[16], *insn;
 	struct bpf_prog *new_prog;
 	enum bpf_access_type type;
-	int i;
+	int i, cnt, delta = 0;
 
-	if (!env->prog->aux->ops->convert_ctx_access)
+	if (ops->gen_prologue) {
+		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
+					env->prog);
+		if (cnt >= ARRAY_SIZE(insn_buf)) {
+			verbose("bpf verifier is misconfigured\n");
+			return -EINVAL;
+		} else if (cnt) {
+			new_prog = bpf_patch_insn_single(env->prog, 0,
+							 insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+			env->prog = new_prog;
+			delta += cnt - 1;
+		}
+	}
+
+	if (!ops->convert_ctx_access)
 		return 0;
 
-	for (i = 0; i < insn_cnt; i++, insn++) {
-		u32 insn_delta, cnt;
+	insn = env->prog->insnsi + delta;
 
-		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
 			type = BPF_READ;
-		else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+		else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
 			type = BPF_WRITE;
 		else
 			continue;
 
-		if (insn->imm != PTR_TO_CTX) {
-			/* clear internal mark */
-			insn->imm = 0;
+		if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
 			continue;
-		}
 
-		cnt = env->prog->aux->ops->
-			convert_ctx_access(type, insn->dst_reg, insn->src_reg,
-					   insn->off, insn_buf, env->prog);
+		cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg,
+					      insn->off, insn_buf, env->prog);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose("bpf verifier is misconfigured\n");
 			return -EINVAL;
 		}
 
-		new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt);
+		new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
+						 cnt);
 		if (!new_prog)
 			return -ENOMEM;
 
-		insn_delta = cnt - 1;
+		delta += cnt - 1;
 
 		/* keep walking new program and skip insns we just inserted */
 		env->prog = new_prog;
-		insn      = new_prog->insnsi + i + insn_delta;
-
-		insn_cnt += insn_delta;
-		i        += insn_delta;
+		insn      = new_prog->insnsi + i + delta;
 	}
 
 	return 0;
 }
 
-static void free_states(struct verifier_env *env)
+static void free_states(struct bpf_verifier_env *env)
 {
-	struct verifier_state_list *sl, *sln;
+	struct bpf_verifier_state_list *sl, *sln;
 	int i;
 
 	if (!env->explored_states)
@@ -2677,19 +3041,24 @@
 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 {
 	char __user *log_ubuf = NULL;
-	struct verifier_env *env;
+	struct bpf_verifier_env *env;
 	int ret = -EINVAL;
 
 	if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
 		return -E2BIG;
 
-	/* 'struct verifier_env' can be global, but since it's not small,
+	/* 'struct bpf_verifier_env' can be global, but since it's not small,
 	 * allocate/free it every time bpf_check() is called
 	 */
-	env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL);
+	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
 	if (!env)
 		return -ENOMEM;
 
+	env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+				     (*prog)->len);
+	ret = -ENOMEM;
+	if (!env->insn_aux_data)
+		goto err_free_env;
 	env->prog = *prog;
 
 	/* grab the mutex to protect few globals used by verifier */
@@ -2708,12 +3077,12 @@
 		/* log_* values have to be sane */
 		if (log_size < 128 || log_size > UINT_MAX >> 8 ||
 		    log_level == 0 || log_ubuf == NULL)
-			goto free_env;
+			goto err_unlock;
 
 		ret = -ENOMEM;
 		log_buf = vmalloc(log_size);
 		if (!log_buf)
-			goto free_env;
+			goto err_unlock;
 	} else {
 		log_level = 0;
 	}
@@ -2723,7 +3092,7 @@
 		goto skip_full_check;
 
 	env->explored_states = kcalloc(env->prog->len,
-				       sizeof(struct verifier_state_list *),
+				       sizeof(struct bpf_verifier_state_list *),
 				       GFP_USER);
 	ret = -ENOMEM;
 	if (!env->explored_states)
@@ -2782,14 +3151,67 @@
 free_log_buf:
 	if (log_level)
 		vfree(log_buf);
-free_env:
 	if (!env->prog->aux->used_maps)
 		/* if we didn't copy map pointers into bpf_prog_info, release
 		 * them now. Otherwise free_bpf_prog_info() will release them.
 		 */
 		release_maps(env);
 	*prog = env->prog;
-	kfree(env);
+err_unlock:
 	mutex_unlock(&bpf_verifier_lock);
+	vfree(env->insn_aux_data);
+err_free_env:
+	kfree(env);
 	return ret;
 }
+
+int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
+		 void *priv)
+{
+	struct bpf_verifier_env *env;
+	int ret;
+
+	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
+	if (!env)
+		return -ENOMEM;
+
+	env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
+				     prog->len);
+	ret = -ENOMEM;
+	if (!env->insn_aux_data)
+		goto err_free_env;
+	env->prog = prog;
+	env->analyzer_ops = ops;
+	env->analyzer_priv = priv;
+
+	/* grab the mutex to protect few globals used by verifier */
+	mutex_lock(&bpf_verifier_lock);
+
+	log_level = 0;
+
+	env->explored_states = kcalloc(env->prog->len,
+				       sizeof(struct bpf_verifier_state_list *),
+				       GFP_KERNEL);
+	ret = -ENOMEM;
+	if (!env->explored_states)
+		goto skip_full_check;
+
+	ret = check_cfg(env);
+	if (ret < 0)
+		goto skip_full_check;
+
+	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+
+	ret = do_check(env);
+
+skip_full_check:
+	while (pop_stack(env, NULL) >= 0);
+	free_states(env);
+
+	mutex_unlock(&bpf_verifier_lock);
+	vfree(env->insn_aux_data);
+err_free_env:
+	kfree(env);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bpf_analyzer);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7..9ba28310 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3446,9 +3446,28 @@
 	 * Except for the root, subtree_control must be zero for a cgroup
 	 * with tasks so that child cgroups don't compete against tasks.
 	 */
-	if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
-		ret = -EBUSY;
-		goto out_unlock;
+	if (enable && cgroup_parent(cgrp)) {
+		struct cgrp_cset_link *link;
+
+		/*
+		 * Because namespaces pin csets too, @cgrp->cset_links
+		 * might not be empty even when @cgrp is empty.  Walk and
+		 * verify each cset.
+		 */
+		spin_lock_irq(&css_set_lock);
+
+		ret = 0;
+		list_for_each_entry(link, &cgrp->cset_links, cset_link) {
+			if (css_set_populated(link->cset)) {
+				ret = -EBUSY;
+				break;
+			}
+		}
+
+		spin_unlock_irq(&css_set_lock);
+
+		if (ret)
+			goto out_unlock;
 	}
 
 	/* save and update control masks and prepare csses */
@@ -3899,7 +3918,9 @@
  * cgroup_task_count - count the number of tasks in a cgroup.
  * @cgrp: the cgroup in question
  *
- * Return the number of tasks in the cgroup.
+ * Return the number of tasks in the cgroup.  The returned number can be
+ * higher than the actual number of tasks due to css_set references from
+ * namespace roots and temporary usages.
  */
 static int cgroup_task_count(const struct cgroup *cgrp)
 {
@@ -5606,6 +5627,12 @@
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
 	BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
 
+	/*
+	 * The latency of the synchronize_sched() is too high for cgroups,
+	 * avoid it at the cost of forcing all readers into the slow path.
+	 */
+	rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
+
 	get_user_ns(init_cgroup_ns.user_ns);
 
 	mutex_lock(&cgroup_mutex);
@@ -6270,6 +6297,12 @@
 	if (cgroup_sk_alloc_disabled)
 		return;
 
+	/* Socket clone path */
+	if (skcd->val) {
+		cgroup_get(sock_cgroup_ptr(skcd));
+		return;
+	}
+
 	rcu_read_lock();
 
 	while (true) {
diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config
index c2de56a..7fa0c4a 100644
--- a/kernel/configs/tiny.config
+++ b/kernel/configs/tiny.config
@@ -1,4 +1,12 @@
+# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_KERNEL_GZIP is not set
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_KERNEL_XZ=y
+# CONFIG_KERNEL_LZO is not set
+# CONFIG_KERNEL_LZ4 is not set
 CONFIG_OPTIMIZE_INLINING=y
+# CONFIG_SLAB is not set
+# CONFIG_SLUB is not set
 CONFIG_SLOB=y
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 341bf80..5df20d6 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -23,6 +23,8 @@
 #include <linux/tick.h>
 #include <linux/irq.h>
 #include <linux/smpboot.h>
+#include <linux/relay.h>
+#include <linux/slab.h>
 
 #include <trace/events/power.h>
 #define CREATE_TRACE_POINTS
@@ -37,8 +39,9 @@
  * @thread:	Pointer to the hotplug thread
  * @should_run:	Thread should execute
  * @rollback:	Perform a rollback
- * @cb_stat:	The state for a single callback (install/uninstall)
- * @cb:		Single callback function (install/uninstall)
+ * @single:	Single callback invocation
+ * @bringup:	Single callback bringup or teardown selector
+ * @cb_state:	The state for a single callback (install/uninstall)
  * @result:	Result of the operation
  * @done:	Signal completion to the issuer of the task
  */
@@ -49,8 +52,10 @@
 	struct task_struct	*thread;
 	bool			should_run;
 	bool			rollback;
+	bool			single;
+	bool			bringup;
+	struct hlist_node	*node;
 	enum cpuhp_state	cb_state;
-	int			(*cb)(unsigned int cpu);
 	int			result;
 	struct completion	done;
 #endif
@@ -68,35 +73,103 @@
  * @cant_stop:	Bringup/teardown can't be stopped at this step
  */
 struct cpuhp_step {
-	const char	*name;
-	int		(*startup)(unsigned int cpu);
-	int		(*teardown)(unsigned int cpu);
-	bool		skip_onerr;
-	bool		cant_stop;
+	const char		*name;
+	union {
+		int		(*single)(unsigned int cpu);
+		int		(*multi)(unsigned int cpu,
+					 struct hlist_node *node);
+	} startup;
+	union {
+		int		(*single)(unsigned int cpu);
+		int		(*multi)(unsigned int cpu,
+					 struct hlist_node *node);
+	} teardown;
+	struct hlist_head	list;
+	bool			skip_onerr;
+	bool			cant_stop;
+	bool			multi_instance;
 };
 
 static DEFINE_MUTEX(cpuhp_state_mutex);
 static struct cpuhp_step cpuhp_bp_states[];
 static struct cpuhp_step cpuhp_ap_states[];
 
+static bool cpuhp_is_ap_state(enum cpuhp_state state)
+{
+	/*
+	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
+	 * purposes as that state is handled explicitly in cpu_down.
+	 */
+	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
+}
+
+static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
+{
+	struct cpuhp_step *sp;
+
+	sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
+	return sp + state;
+}
+
 /**
  * cpuhp_invoke_callback _ Invoke the callbacks for a given state
  * @cpu:	The cpu for which the callback should be invoked
  * @step:	The step in the state machine
- * @cb:		The callback function to invoke
+ * @bringup:	True if the bringup callback should be invoked
  *
- * Called from cpu hotplug and from the state register machinery
+ * Called from cpu hotplug and from the state register machinery.
  */
-static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state step,
-				 int (*cb)(unsigned int))
+static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
+				 bool bringup, struct hlist_node *node)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
-	int ret = 0;
+	struct cpuhp_step *step = cpuhp_get_step(state);
+	int (*cbm)(unsigned int cpu, struct hlist_node *node);
+	int (*cb)(unsigned int cpu);
+	int ret, cnt;
 
-	if (cb) {
-		trace_cpuhp_enter(cpu, st->target, step, cb);
+	if (!step->multi_instance) {
+		cb = bringup ? step->startup.single : step->teardown.single;
+		if (!cb)
+			return 0;
+		trace_cpuhp_enter(cpu, st->target, state, cb);
 		ret = cb(cpu);
-		trace_cpuhp_exit(cpu, st->state, step, ret);
+		trace_cpuhp_exit(cpu, st->state, state, ret);
+		return ret;
+	}
+	cbm = bringup ? step->startup.multi : step->teardown.multi;
+	if (!cbm)
+		return 0;
+
+	/* Single invocation for instance add/remove */
+	if (node) {
+		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+		ret = cbm(cpu, node);
+		trace_cpuhp_exit(cpu, st->state, state, ret);
+		return ret;
+	}
+
+	/* State transition. Invoke on all instances */
+	cnt = 0;
+	hlist_for_each(node, &step->list) {
+		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
+		ret = cbm(cpu, node);
+		trace_cpuhp_exit(cpu, st->state, state, ret);
+		if (ret)
+			goto err;
+		cnt++;
+	}
+	return 0;
+err:
+	/* Rollback the instances if one failed */
+	cbm = !bringup ? step->startup.multi : step->teardown.multi;
+	if (!cbm)
+		return ret;
+
+	hlist_for_each(node, &step->list) {
+		if (!cnt--)
+			break;
+		cbm(cpu, node);
 	}
 	return ret;
 }
@@ -260,10 +333,17 @@
 }
 EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
 
+static void __cpu_hotplug_enable(void)
+{
+	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
+		return;
+	cpu_hotplug_disabled--;
+}
+
 void cpu_hotplug_enable(void)
 {
 	cpu_maps_update_begin();
-	WARN_ON(--cpu_hotplug_disabled < 0);
+	__cpu_hotplug_enable();
 	cpu_maps_update_done();
 }
 EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
@@ -330,12 +410,6 @@
 	return 0;
 }
 
-static int notify_starting(unsigned int cpu)
-{
-	cpu_notify(CPU_STARTING, cpu);
-	return 0;
-}
-
 static int bringup_wait_for_ap(unsigned int cpu)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -349,8 +423,16 @@
 	struct task_struct *idle = idle_thread_get(cpu);
 	int ret;
 
+	/*
+	 * Some architectures have to walk the irq descriptors to
+	 * setup the vector space for the cpu which comes online.
+	 * Prevent irq alloc/free across the bringup.
+	 */
+	irq_lock_sparse();
+
 	/* Arch-specific enabling code. */
 	ret = __cpu_up(cpu, idle);
+	irq_unlock_sparse();
 	if (ret) {
 		cpu_notify(CPU_UP_CANCELED, cpu);
 		return ret;
@@ -363,62 +445,55 @@
 /*
  * Hotplug state machine related functions
  */
-static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st,
-			  struct cpuhp_step *steps)
+static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
 {
 	for (st->state++; st->state < st->target; st->state++) {
-		struct cpuhp_step *step = steps + st->state;
+		struct cpuhp_step *step = cpuhp_get_step(st->state);
 
 		if (!step->skip_onerr)
-			cpuhp_invoke_callback(cpu, st->state, step->startup);
+			cpuhp_invoke_callback(cpu, st->state, true, NULL);
 	}
 }
 
 static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-				struct cpuhp_step *steps, enum cpuhp_state target)
+				enum cpuhp_state target)
 {
 	enum cpuhp_state prev_state = st->state;
 	int ret = 0;
 
 	for (; st->state > target; st->state--) {
-		struct cpuhp_step *step = steps + st->state;
-
-		ret = cpuhp_invoke_callback(cpu, st->state, step->teardown);
+		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
 		if (ret) {
 			st->target = prev_state;
-			undo_cpu_down(cpu, st, steps);
+			undo_cpu_down(cpu, st);
 			break;
 		}
 	}
 	return ret;
 }
 
-static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st,
-			struct cpuhp_step *steps)
+static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
 {
 	for (st->state--; st->state > st->target; st->state--) {
-		struct cpuhp_step *step = steps + st->state;
+		struct cpuhp_step *step = cpuhp_get_step(st->state);
 
 		if (!step->skip_onerr)
-			cpuhp_invoke_callback(cpu, st->state, step->teardown);
+			cpuhp_invoke_callback(cpu, st->state, false, NULL);
 	}
 }
 
 static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
-			      struct cpuhp_step *steps, enum cpuhp_state target)
+			      enum cpuhp_state target)
 {
 	enum cpuhp_state prev_state = st->state;
 	int ret = 0;
 
 	while (st->state < target) {
-		struct cpuhp_step *step;
-
 		st->state++;
-		step = steps + st->state;
-		ret = cpuhp_invoke_callback(cpu, st->state, step->startup);
+		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
 		if (ret) {
 			st->target = prev_state;
-			undo_cpu_up(cpu, st, steps);
+			undo_cpu_up(cpu, st);
 			break;
 		}
 	}
@@ -447,13 +522,13 @@
 {
 	enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
 
-	return cpuhp_down_callbacks(cpu, st, cpuhp_ap_states, target);
+	return cpuhp_down_callbacks(cpu, st, target);
 }
 
 /* Execute the online startup callbacks. Used to be CPU_ONLINE */
 static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
 {
-	return cpuhp_up_callbacks(cpu, st, cpuhp_ap_states, st->target);
+	return cpuhp_up_callbacks(cpu, st, st->target);
 }
 
 /*
@@ -476,18 +551,20 @@
 	st->should_run = false;
 
 	/* Single callback invocation for [un]install ? */
-	if (st->cb) {
+	if (st->single) {
 		if (st->cb_state < CPUHP_AP_ONLINE) {
 			local_irq_disable();
-			ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+			ret = cpuhp_invoke_callback(cpu, st->cb_state,
+						    st->bringup, st->node);
 			local_irq_enable();
 		} else {
-			ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
+			ret = cpuhp_invoke_callback(cpu, st->cb_state,
+						    st->bringup, st->node);
 		}
 	} else if (st->rollback) {
 		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
 
-		undo_cpu_down(cpu, st, cpuhp_ap_states);
+		undo_cpu_down(cpu, st);
 		/*
 		 * This is a momentary workaround to keep the notifier users
 		 * happy. Will go away once we got rid of the notifiers.
@@ -509,8 +586,9 @@
 }
 
 /* Invoke a single callback on a remote cpu */
-static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state,
-				    int (*cb)(unsigned int))
+static int
+cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
+			 struct hlist_node *node)
 {
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 
@@ -522,10 +600,13 @@
 	 * we invoke the thread function directly.
 	 */
 	if (!st->thread)
-		return cpuhp_invoke_callback(cpu, state, cb);
+		return cpuhp_invoke_callback(cpu, state, bringup, node);
 
 	st->cb_state = state;
-	st->cb = cb;
+	st->single = true;
+	st->bringup = bringup;
+	st->node = node;
+
 	/*
 	 * Make sure the above stores are visible before should_run becomes
 	 * true. Paired with the mb() above in cpuhp_thread_fun()
@@ -541,7 +622,7 @@
 static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
 {
 	st->result = 0;
-	st->cb = NULL;
+	st->single = false;
 	/*
 	 * Make sure the above stores are visible before should_run becomes
 	 * true. Paired with the mb() above in cpuhp_thread_fun()
@@ -674,12 +755,6 @@
 	return err;
 }
 
-static int notify_dying(unsigned int cpu)
-{
-	cpu_notify(CPU_DYING, cpu);
-	return 0;
-}
-
 /* Take this CPU down. */
 static int take_cpu_down(void *_param)
 {
@@ -692,12 +767,16 @@
 	if (err < 0)
 		return err;
 
+	/*
+	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
+	 * do this step again.
+	 */
+	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
+	st->state--;
 	/* Invoke the former CPU_DYING callbacks */
-	for (; st->state > target; st->state--) {
-		struct cpuhp_step *step = cpuhp_ap_states + st->state;
+	for (; st->state > target; st->state--)
+		cpuhp_invoke_callback(cpu, st->state, false, NULL);
 
-		cpuhp_invoke_callback(cpu, st->state, step->teardown);
-	}
 	/* Give up timekeeping duties */
 	tick_handover_do_timer();
 	/* Park the stopper thread */
@@ -734,7 +813,7 @@
 	BUG_ON(cpu_online(cpu));
 
 	/*
-	 * The migration_call() CPU_DYING callback will have removed all
+	 * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
 	 * runnable tasks from the cpu, there's only the idle task left now
 	 * that the migration thread is done doing the stop_machine thing.
 	 *
@@ -787,7 +866,6 @@
 #define notify_down_prepare	NULL
 #define takedown_cpu		NULL
 #define notify_dead		NULL
-#define notify_dying		NULL
 #endif
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -836,7 +914,7 @@
 	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
 	 * to do the further cleanups.
 	 */
-	ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
+	ret = cpuhp_down_callbacks(cpu, st, target);
 	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
 		st->target = prev_state;
 		st->rollback = true;
@@ -877,10 +955,9 @@
 #endif /*CONFIG_HOTPLUG_CPU*/
 
 /**
- * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
+ * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
  * @cpu: cpu that just started
  *
- * This function calls the cpu_chain notifiers with CPU_STARTING.
  * It must be called by the arch code on the new cpu, before the new cpu
  * enables interrupts and before the "boot" cpu returns from __cpu_up().
  */
@@ -889,12 +966,10 @@
 	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
 	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
 
+	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
 	while (st->state < target) {
-		struct cpuhp_step *step;
-
 		st->state++;
-		step = cpuhp_ap_states + st->state;
-		cpuhp_invoke_callback(cpu, st->state, step->startup);
+		cpuhp_invoke_callback(cpu, st->state, true, NULL);
 	}
 }
 
@@ -979,7 +1054,7 @@
 	 * responsible for bringing it up to the target state.
 	 */
 	target = min((int)target, CPUHP_BRINGUP_CPU);
-	ret = cpuhp_up_callbacks(cpu, st, cpuhp_bp_states, target);
+	ret = cpuhp_up_callbacks(cpu, st, target);
 out:
 	cpu_hotplug_done();
 	return ret;
@@ -1024,12 +1099,13 @@
 #ifdef CONFIG_PM_SLEEP_SMP
 static cpumask_var_t frozen_cpus;
 
-int disable_nonboot_cpus(void)
+int freeze_secondary_cpus(int primary)
 {
-	int cpu, first_cpu, error = 0;
+	int cpu, error = 0;
 
 	cpu_maps_update_begin();
-	first_cpu = cpumask_first(cpu_online_mask);
+	if (!cpu_online(primary))
+		primary = cpumask_first(cpu_online_mask);
 	/*
 	 * We take down all of the non-boot CPUs in one shot to avoid races
 	 * with the userspace trying to use the CPU hotplug at the same time
@@ -1038,7 +1114,7 @@
 
 	pr_info("Disabling non-boot CPUs ...\n");
 	for_each_online_cpu(cpu) {
-		if (cpu == first_cpu)
+		if (cpu == primary)
 			continue;
 		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
 		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
@@ -1081,7 +1157,7 @@
 
 	/* Allow everyone to use the CPU hotplug again */
 	cpu_maps_update_begin();
-	WARN_ON(--cpu_hotplug_disabled < 0);
+	__cpu_hotplug_enable();
 	if (cpumask_empty(frozen_cpus))
 		goto out;
 
@@ -1170,40 +1246,50 @@
 static struct cpuhp_step cpuhp_bp_states[] = {
 	[CPUHP_OFFLINE] = {
 		.name			= "offline",
-		.startup		= NULL,
-		.teardown		= NULL,
+		.startup.single		= NULL,
+		.teardown.single	= NULL,
 	},
 #ifdef CONFIG_SMP
 	[CPUHP_CREATE_THREADS]= {
-		.name			= "threads:create",
-		.startup		= smpboot_create_threads,
-		.teardown		= NULL,
+		.name			= "threads:prepare",
+		.startup.single		= smpboot_create_threads,
+		.teardown.single	= NULL,
 		.cant_stop		= true,
 	},
 	[CPUHP_PERF_PREPARE] = {
-		.name = "perf prepare",
-		.startup = perf_event_init_cpu,
-		.teardown = perf_event_exit_cpu,
+		.name			= "perf:prepare",
+		.startup.single		= perf_event_init_cpu,
+		.teardown.single	= perf_event_exit_cpu,
 	},
 	[CPUHP_WORKQUEUE_PREP] = {
-		.name = "workqueue prepare",
-		.startup = workqueue_prepare_cpu,
-		.teardown = NULL,
+		.name			= "workqueue:prepare",
+		.startup.single		= workqueue_prepare_cpu,
+		.teardown.single	= NULL,
 	},
 	[CPUHP_HRTIMERS_PREPARE] = {
-		.name = "hrtimers prepare",
-		.startup = hrtimers_prepare_cpu,
-		.teardown = hrtimers_dead_cpu,
+		.name			= "hrtimers:prepare",
+		.startup.single		= hrtimers_prepare_cpu,
+		.teardown.single	= hrtimers_dead_cpu,
 	},
 	[CPUHP_SMPCFD_PREPARE] = {
-		.name = "SMPCFD prepare",
-		.startup = smpcfd_prepare_cpu,
-		.teardown = smpcfd_dead_cpu,
+		.name			= "smpcfd:prepare",
+		.startup.single		= smpcfd_prepare_cpu,
+		.teardown.single	= smpcfd_dead_cpu,
+	},
+	[CPUHP_RELAY_PREPARE] = {
+		.name			= "relay:prepare",
+		.startup.single		= relay_prepare_cpu,
+		.teardown.single	= NULL,
+	},
+	[CPUHP_SLAB_PREPARE] = {
+		.name			= "slab:prepare",
+		.startup.single		= slab_prepare_cpu,
+		.teardown.single	= slab_dead_cpu,
 	},
 	[CPUHP_RCUTREE_PREP] = {
-		.name = "RCU-tree prepare",
-		.startup = rcutree_prepare_cpu,
-		.teardown = rcutree_dead_cpu,
+		.name			= "RCU/tree:prepare",
+		.startup.single		= rcutree_prepare_cpu,
+		.teardown.single	= rcutree_dead_cpu,
 	},
 	/*
 	 * Preparatory and dead notifiers. Will be replaced once the notifiers
@@ -1211,8 +1297,8 @@
 	 */
 	[CPUHP_NOTIFY_PREPARE] = {
 		.name			= "notify:prepare",
-		.startup		= notify_prepare,
-		.teardown		= notify_dead,
+		.startup.single		= notify_prepare,
+		.teardown.single	= notify_dead,
 		.skip_onerr		= true,
 		.cant_stop		= true,
 	},
@@ -1222,20 +1308,21 @@
 	 * otherwise a RCU stall occurs.
 	 */
 	[CPUHP_TIMERS_DEAD] = {
-		.name = "timers dead",
-		.startup = NULL,
-		.teardown = timers_dead_cpu,
+		.name			= "timers:dead",
+		.startup.single		= NULL,
+		.teardown.single	= timers_dead_cpu,
 	},
 	/* Kicks the plugged cpu into life */
 	[CPUHP_BRINGUP_CPU] = {
 		.name			= "cpu:bringup",
-		.startup		= bringup_cpu,
-		.teardown		= NULL,
+		.startup.single		= bringup_cpu,
+		.teardown.single	= NULL,
 		.cant_stop		= true,
 	},
 	[CPUHP_AP_SMPCFD_DYING] = {
-		.startup = NULL,
-		.teardown = smpcfd_dying_cpu,
+		.name			= "smpcfd:dying",
+		.startup.single		= NULL,
+		.teardown.single	= smpcfd_dying_cpu,
 	},
 	/*
 	 * Handled on controll processor until the plugged processor manages
@@ -1243,8 +1330,8 @@
 	 */
 	[CPUHP_TEARDOWN_CPU] = {
 		.name			= "cpu:teardown",
-		.startup		= NULL,
-		.teardown		= takedown_cpu,
+		.startup.single		= NULL,
+		.teardown.single	= takedown_cpu,
 		.cant_stop		= true,
 	},
 #else
@@ -1270,24 +1357,13 @@
 	/* First state is scheduler control. Interrupts are disabled */
 	[CPUHP_AP_SCHED_STARTING] = {
 		.name			= "sched:starting",
-		.startup		= sched_cpu_starting,
-		.teardown		= sched_cpu_dying,
+		.startup.single		= sched_cpu_starting,
+		.teardown.single	= sched_cpu_dying,
 	},
 	[CPUHP_AP_RCUTREE_DYING] = {
-		.startup = NULL,
-		.teardown = rcutree_dying_cpu,
-	},
-	/*
-	 * Low level startup/teardown notifiers. Run with interrupts
-	 * disabled. Will be removed once the notifiers are converted to
-	 * states.
-	 */
-	[CPUHP_AP_NOTIFY_STARTING] = {
-		.name			= "notify:starting",
-		.startup		= notify_starting,
-		.teardown		= notify_dying,
-		.skip_onerr		= true,
-		.cant_stop		= true,
+		.name			= "RCU/tree:dying",
+		.startup.single		= NULL,
+		.teardown.single	= rcutree_dying_cpu,
 	},
 	/* Entry state on starting. Interrupts enabled from here on. Transient
 	 * state for synchronsization */
@@ -1296,24 +1372,24 @@
 	},
 	/* Handle smpboot threads park/unpark */
 	[CPUHP_AP_SMPBOOT_THREADS] = {
-		.name			= "smpboot:threads",
-		.startup		= smpboot_unpark_threads,
-		.teardown		= NULL,
+		.name			= "smpboot/threads:online",
+		.startup.single		= smpboot_unpark_threads,
+		.teardown.single	= NULL,
 	},
 	[CPUHP_AP_PERF_ONLINE] = {
-		.name = "perf online",
-		.startup = perf_event_init_cpu,
-		.teardown = perf_event_exit_cpu,
+		.name			= "perf:online",
+		.startup.single		= perf_event_init_cpu,
+		.teardown.single	= perf_event_exit_cpu,
 	},
 	[CPUHP_AP_WORKQUEUE_ONLINE] = {
-		.name = "workqueue online",
-		.startup = workqueue_online_cpu,
-		.teardown = workqueue_offline_cpu,
+		.name			= "workqueue:online",
+		.startup.single		= workqueue_online_cpu,
+		.teardown.single	= workqueue_offline_cpu,
 	},
 	[CPUHP_AP_RCUTREE_ONLINE] = {
-		.name = "RCU-tree online",
-		.startup = rcutree_online_cpu,
-		.teardown = rcutree_offline_cpu,
+		.name			= "RCU/tree:online",
+		.startup.single		= rcutree_online_cpu,
+		.teardown.single	= rcutree_offline_cpu,
 	},
 
 	/*
@@ -1322,8 +1398,8 @@
 	 */
 	[CPUHP_AP_NOTIFY_ONLINE] = {
 		.name			= "notify:online",
-		.startup		= notify_online,
-		.teardown		= notify_down_prepare,
+		.startup.single		= notify_online,
+		.teardown.single	= notify_down_prepare,
 		.skip_onerr		= true,
 	},
 #endif
@@ -1335,16 +1411,16 @@
 	/* Last state is scheduler control setting the cpu active */
 	[CPUHP_AP_ACTIVE] = {
 		.name			= "sched:active",
-		.startup		= sched_cpu_activate,
-		.teardown		= sched_cpu_deactivate,
+		.startup.single		= sched_cpu_activate,
+		.teardown.single	= sched_cpu_deactivate,
 	},
 #endif
 
 	/* CPU is fully up and running. */
 	[CPUHP_ONLINE] = {
 		.name			= "online",
-		.startup		= NULL,
-		.teardown		= NULL,
+		.startup.single		= NULL,
+		.teardown.single	= NULL,
 	},
 };
 
@@ -1356,54 +1432,42 @@
 	return 0;
 }
 
-static bool cpuhp_is_ap_state(enum cpuhp_state state)
-{
-	/*
-	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
-	 * purposes as that state is handled explicitely in cpu_down.
-	 */
-	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
-}
-
-static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
-{
-	struct cpuhp_step *sp;
-
-	sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
-	return sp + state;
-}
-
 static void cpuhp_store_callbacks(enum cpuhp_state state,
 				  const char *name,
 				  int (*startup)(unsigned int cpu),
-				  int (*teardown)(unsigned int cpu))
+				  int (*teardown)(unsigned int cpu),
+				  bool multi_instance)
 {
 	/* (Un)Install the callbacks for further cpu hotplug operations */
 	struct cpuhp_step *sp;
 
 	mutex_lock(&cpuhp_state_mutex);
 	sp = cpuhp_get_step(state);
-	sp->startup = startup;
-	sp->teardown = teardown;
+	sp->startup.single = startup;
+	sp->teardown.single = teardown;
 	sp->name = name;
+	sp->multi_instance = multi_instance;
+	INIT_HLIST_HEAD(&sp->list);
 	mutex_unlock(&cpuhp_state_mutex);
 }
 
 static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
 {
-	return cpuhp_get_step(state)->teardown;
+	return cpuhp_get_step(state)->teardown.single;
 }
 
 /*
  * Call the startup/teardown function for a step either on the AP or
  * on the current CPU.
  */
-static int cpuhp_issue_call(int cpu, enum cpuhp_state state,
-			    int (*cb)(unsigned int), bool bringup)
+static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
+			    struct hlist_node *node)
 {
+	struct cpuhp_step *sp = cpuhp_get_step(state);
 	int ret;
 
-	if (!cb)
+	if ((bringup && !sp->startup.single) ||
+	    (!bringup && !sp->teardown.single))
 		return 0;
 	/*
 	 * The non AP bound callbacks can fail on bringup. On teardown
@@ -1411,11 +1475,11 @@
 	 */
 #ifdef CONFIG_SMP
 	if (cpuhp_is_ap_state(state))
-		ret = cpuhp_invoke_ap_callback(cpu, state, cb);
+		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
 	else
-		ret = cpuhp_invoke_callback(cpu, state, cb);
+		ret = cpuhp_invoke_callback(cpu, state, bringup, node);
 #else
-	ret = cpuhp_invoke_callback(cpu, state, cb);
+	ret = cpuhp_invoke_callback(cpu, state, bringup, node);
 #endif
 	BUG_ON(ret && !bringup);
 	return ret;
@@ -1427,13 +1491,10 @@
  * Note: The teardown callbacks for rollback are not allowed to fail!
  */
 static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
-				   int (*teardown)(unsigned int cpu))
+				   struct hlist_node *node)
 {
 	int cpu;
 
-	if (!teardown)
-		return;
-
 	/* Roll back the already executed steps on the other cpus */
 	for_each_present_cpu(cpu) {
 		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
@@ -1444,7 +1505,7 @@
 
 		/* Did we invoke the startup call on that cpu ? */
 		if (cpustate >= state)
-			cpuhp_issue_call(cpu, state, teardown, false);
+			cpuhp_issue_call(cpu, state, false, node);
 	}
 }
 
@@ -1471,6 +1532,52 @@
 	return -ENOSPC;
 }
 
+int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
+			       bool invoke)
+{
+	struct cpuhp_step *sp;
+	int cpu;
+	int ret;
+
+	sp = cpuhp_get_step(state);
+	if (sp->multi_instance == false)
+		return -EINVAL;
+
+	get_online_cpus();
+
+	if (!invoke || !sp->startup.multi)
+		goto add_node;
+
+	/*
+	 * Try to call the startup callback for each present cpu
+	 * depending on the hotplug state of the cpu.
+	 */
+	for_each_present_cpu(cpu) {
+		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+		int cpustate = st->state;
+
+		if (cpustate < state)
+			continue;
+
+		ret = cpuhp_issue_call(cpu, state, true, node);
+		if (ret) {
+			if (sp->teardown.multi)
+				cpuhp_rollback_install(cpu, state, node);
+			goto err;
+		}
+	}
+add_node:
+	ret = 0;
+	mutex_lock(&cpuhp_state_mutex);
+	hlist_add_head(node, &sp->list);
+	mutex_unlock(&cpuhp_state_mutex);
+
+err:
+	put_online_cpus();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
+
 /**
  * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
  * @state:	The state to setup
@@ -1484,7 +1591,8 @@
 int __cpuhp_setup_state(enum cpuhp_state state,
 			const char *name, bool invoke,
 			int (*startup)(unsigned int cpu),
-			int (*teardown)(unsigned int cpu))
+			int (*teardown)(unsigned int cpu),
+			bool multi_instance)
 {
 	int cpu, ret = 0;
 	int dyn_state = 0;
@@ -1503,7 +1611,7 @@
 		state = ret;
 	}
 
-	cpuhp_store_callbacks(state, name, startup, teardown);
+	cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
 
 	if (!invoke || !startup)
 		goto out;
@@ -1519,10 +1627,11 @@
 		if (cpustate < state)
 			continue;
 
-		ret = cpuhp_issue_call(cpu, state, startup, true);
+		ret = cpuhp_issue_call(cpu, state, true, NULL);
 		if (ret) {
-			cpuhp_rollback_install(cpu, state, teardown);
-			cpuhp_store_callbacks(state, NULL, NULL, NULL);
+			if (teardown)
+				cpuhp_rollback_install(cpu, state, NULL);
+			cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
 			goto out;
 		}
 	}
@@ -1534,6 +1643,42 @@
 }
 EXPORT_SYMBOL(__cpuhp_setup_state);
 
+int __cpuhp_state_remove_instance(enum cpuhp_state state,
+				  struct hlist_node *node, bool invoke)
+{
+	struct cpuhp_step *sp = cpuhp_get_step(state);
+	int cpu;
+
+	BUG_ON(cpuhp_cb_check(state));
+
+	if (!sp->multi_instance)
+		return -EINVAL;
+
+	get_online_cpus();
+	if (!invoke || !cpuhp_get_teardown_cb(state))
+		goto remove;
+	/*
+	 * Call the teardown callback for each present cpu depending
+	 * on the hotplug state of the cpu. This function is not
+	 * allowed to fail currently!
+	 */
+	for_each_present_cpu(cpu) {
+		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
+		int cpustate = st->state;
+
+		if (cpustate >= state)
+			cpuhp_issue_call(cpu, state, false, node);
+	}
+
+remove:
+	mutex_lock(&cpuhp_state_mutex);
+	hlist_del(node);
+	mutex_unlock(&cpuhp_state_mutex);
+	put_online_cpus();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
 /**
  * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
  * @state:	The state to remove
@@ -1545,14 +1690,21 @@
  */
 void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
 {
-	int (*teardown)(unsigned int cpu) = cpuhp_get_teardown_cb(state);
+	struct cpuhp_step *sp = cpuhp_get_step(state);
 	int cpu;
 
 	BUG_ON(cpuhp_cb_check(state));
 
 	get_online_cpus();
 
-	if (!invoke || !teardown)
+	if (sp->multi_instance) {
+		WARN(!hlist_empty(&sp->list),
+		     "Error: Removing state %d which has instances left.\n",
+		     state);
+		goto remove;
+	}
+
+	if (!invoke || !cpuhp_get_teardown_cb(state))
 		goto remove;
 
 	/*
@@ -1565,10 +1717,10 @@
 		int cpustate = st->state;
 
 		if (cpustate >= state)
-			cpuhp_issue_call(cpu, state, teardown, false);
+			cpuhp_issue_call(cpu, state, false, NULL);
 	}
 remove:
-	cpuhp_store_callbacks(state, NULL, NULL, NULL);
+	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
 	put_online_cpus();
 }
 EXPORT_SYMBOL(__cpuhp_remove_state);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index c7fd277..2b4c20a 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -325,8 +325,7 @@
 /*
  * Return in pmask the portion of a cpusets's cpus_allowed that
  * are online.  If none are online, walk up the cpuset hierarchy
- * until we find one that does have some online cpus.  The top
- * cpuset always has some cpus online.
+ * until we find one that does have some online cpus.
  *
  * One way or another, we guarantee to return some non-empty subset
  * of cpu_online_mask.
@@ -335,8 +334,20 @@
  */
 static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
 {
-	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask))
+	while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) {
 		cs = parent_cs(cs);
+		if (unlikely(!cs)) {
+			/*
+			 * The top cpuset doesn't have any online cpu as a
+			 * consequence of a race between cpuset_hotplug_work
+			 * and cpu hotplug notifier.  But we know the top
+			 * cpuset's effective_cpus is on its way to to be
+			 * identical to cpu_online_mask.
+			 */
+			cpumask_copy(pmask, cpu_online_mask);
+			return;
+		}
+	}
 	cpumask_and(pmask, cs->effective_cpus, cpu_online_mask);
 }
 
@@ -2069,6 +2080,20 @@
 	mutex_unlock(&cpuset_mutex);
 }
 
+/*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+ * state from the parent and before it sits on the cgroup's task list.
+ */
+static void cpuset_fork(struct task_struct *task)
+{
+	if (task_css_is_root(task, cpuset_cgrp_id))
+		return;
+
+	set_cpus_allowed_ptr(task, &current->cpus_allowed);
+	task->mems_allowed = current->mems_allowed;
+}
+
 struct cgroup_subsys cpuset_cgrp_subsys = {
 	.css_alloc	= cpuset_css_alloc,
 	.css_online	= cpuset_css_online,
@@ -2079,6 +2104,7 @@
 	.attach		= cpuset_attach,
 	.post_attach	= cpuset_post_attach,
 	.bind		= cpuset_bind,
+	.fork		= cpuset_fork,
 	.legacy_cftypes	= files,
 	.early_init	= true,
 };
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a19550d..c6e47e9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -242,18 +242,6 @@
 	return ret;
 }
 
-static void event_function_local(struct perf_event *event, event_f func, void *data)
-{
-	struct event_function_struct efs = {
-		.event = event,
-		.func = func,
-		.data = data,
-	};
-
-	int ret = event_function(&efs);
-	WARN_ON_ONCE(ret);
-}
-
 static void event_function_call(struct perf_event *event, event_f func, void *data)
 {
 	struct perf_event_context *ctx = event->ctx;
@@ -303,6 +291,54 @@
 	raw_spin_unlock_irq(&ctx->lock);
 }
 
+/*
+ * Similar to event_function_call() + event_function(), but hard assumes IRQs
+ * are already disabled and we're on the right CPU.
+ */
+static void event_function_local(struct perf_event *event, event_f func, void *data)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
+	struct task_struct *task = READ_ONCE(ctx->task);
+	struct perf_event_context *task_ctx = NULL;
+
+	WARN_ON_ONCE(!irqs_disabled());
+
+	if (task) {
+		if (task == TASK_TOMBSTONE)
+			return;
+
+		task_ctx = ctx;
+	}
+
+	perf_ctx_lock(cpuctx, task_ctx);
+
+	task = ctx->task;
+	if (task == TASK_TOMBSTONE)
+		goto unlock;
+
+	if (task) {
+		/*
+		 * We must be either inactive or active and the right task,
+		 * otherwise we're screwed, since we cannot IPI to somewhere
+		 * else.
+		 */
+		if (ctx->is_active) {
+			if (WARN_ON_ONCE(task != current))
+				goto unlock;
+
+			if (WARN_ON_ONCE(cpuctx->task_ctx != ctx))
+				goto unlock;
+		}
+	} else {
+		WARN_ON_ONCE(&cpuctx->ctx != ctx);
+	}
+
+	func(event, cpuctx, ctx, data);
+unlock:
+	perf_ctx_unlock(cpuctx, task_ctx);
+}
+
 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
 		       PERF_FLAG_FD_OUTPUT  |\
 		       PERF_FLAG_PID_CGROUP |\
@@ -843,6 +879,32 @@
 		}
 	}
 }
+
+/*
+ * Update cpuctx->cgrp so that it is set when first cgroup event is added and
+ * cleared when last cgroup event is removed.
+ */
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+			 struct perf_event_context *ctx, bool add)
+{
+	struct perf_cpu_context *cpuctx;
+
+	if (!is_cgroup_event(event))
+		return;
+
+	if (add && ctx->nr_cgroups++)
+		return;
+	else if (!add && --ctx->nr_cgroups)
+		return;
+	/*
+	 * Because cgroup events are always per-cpu events,
+	 * this will always be called from the right CPU.
+	 */
+	cpuctx = __get_cpu_context(ctx);
+	cpuctx->cgrp = add ? event->cgrp : NULL;
+}
+
 #else /* !CONFIG_CGROUP_PERF */
 
 static inline bool
@@ -920,6 +982,13 @@
 			 struct perf_event_context *ctx)
 {
 }
+
+static inline void
+list_update_cgroup_event(struct perf_event *event,
+			 struct perf_event_context *ctx, bool add)
+{
+}
+
 #endif
 
 /*
@@ -1392,6 +1461,7 @@
 static void
 list_add_event(struct perf_event *event, struct perf_event_context *ctx)
 {
+
 	lockdep_assert_held(&ctx->lock);
 
 	WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1405,15 +1475,13 @@
 	if (event->group_leader == event) {
 		struct list_head *list;
 
-		if (is_software_event(event))
-			event->group_flags |= PERF_GROUP_SOFTWARE;
+		event->group_caps = event->event_caps;
 
 		list = ctx_group_list(event, ctx);
 		list_add_tail(&event->group_entry, list);
 	}
 
-	if (is_cgroup_event(event))
-		ctx->nr_cgroups++;
+	list_update_cgroup_event(event, ctx, true);
 
 	list_add_rcu(&event->event_entry, &ctx->event_list);
 	ctx->nr_events++;
@@ -1561,9 +1629,7 @@
 
 	WARN_ON_ONCE(group_leader->ctx != event->ctx);
 
-	if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
-			!is_software_event(event))
-		group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
+	group_leader->group_caps &= event->event_caps;
 
 	list_add_tail(&event->group_entry, &group_leader->sibling_list);
 	group_leader->nr_siblings++;
@@ -1581,8 +1647,6 @@
 static void
 list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 {
-	struct perf_cpu_context *cpuctx;
-
 	WARN_ON_ONCE(event->ctx != ctx);
 	lockdep_assert_held(&ctx->lock);
 
@@ -1594,20 +1658,7 @@
 
 	event->attach_state &= ~PERF_ATTACH_CONTEXT;
 
-	if (is_cgroup_event(event)) {
-		ctx->nr_cgroups--;
-		/*
-		 * Because cgroup events are always per-cpu events, this will
-		 * always be called from the right CPU.
-		 */
-		cpuctx = __get_cpu_context(ctx);
-		/*
-		 * If there are no more cgroup events then clear cgrp to avoid
-		 * stale pointer in update_cgrp_time_from_cpuctx().
-		 */
-		if (!ctx->nr_cgroups)
-			cpuctx->cgrp = NULL;
-	}
+	list_update_cgroup_event(event, ctx, false);
 
 	ctx->nr_events--;
 	if (event->attr.inherit_stat)
@@ -1669,7 +1720,7 @@
 		sibling->group_leader = sibling;
 
 		/* Inherit group flags from the previous leader */
-		sibling->group_flags = event->group_flags;
+		sibling->group_caps = event->group_caps;
 
 		WARN_ON_ONCE(sibling->ctx != event->ctx);
 	}
@@ -1716,8 +1767,8 @@
 static inline int
 event_filter_match(struct perf_event *event)
 {
-	return (event->cpu == -1 || event->cpu == smp_processor_id())
-	    && perf_cgroup_match(event) && pmu_filter_match(event);
+	return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
+	       perf_cgroup_match(event) && pmu_filter_match(event);
 }
 
 static void
@@ -1737,8 +1788,8 @@
 	 * maintained, otherwise bogus information is return
 	 * via read() for time_enabled, time_running:
 	 */
-	if (event->state == PERF_EVENT_STATE_INACTIVE
-	    && !event_filter_match(event)) {
+	if (event->state == PERF_EVENT_STATE_INACTIVE &&
+	    !event_filter_match(event)) {
 		delta = tstamp - event->tstamp_stopped;
 		event->tstamp_running += delta;
 		event->tstamp_stopped = tstamp;
@@ -1778,6 +1829,8 @@
 	struct perf_event *event;
 	int state = group_event->state;
 
+	perf_pmu_disable(ctx->pmu);
+
 	event_sched_out(group_event, cpuctx, ctx);
 
 	/*
@@ -1786,6 +1839,8 @@
 	list_for_each_entry(event, &group_event->sibling_list, group_entry)
 		event_sched_out(event, cpuctx, ctx);
 
+	perf_pmu_enable(ctx->pmu);
+
 	if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
 		cpuctx->exclusive = 0;
 }
@@ -2091,7 +2146,7 @@
 	/*
 	 * Groups consisting entirely of software events can always go on.
 	 */
-	if (event->group_flags & PERF_GROUP_SOFTWARE)
+	if (event->group_caps & PERF_EV_CAP_SOFTWARE)
 		return 1;
 	/*
 	 * If an exclusive group is already on, no other hardware
@@ -2236,10 +2291,15 @@
 
 	lockdep_assert_held(&ctx->mutex);
 
-	event->ctx = ctx;
 	if (event->cpu != -1)
 		event->cpu = cpu;
 
+	/*
+	 * Ensures that if we can observe event->ctx, both the event and ctx
+	 * will be 'complete'. See perf_iterate_sb_cpu().
+	 */
+	smp_store_release(&event->ctx, ctx);
+
 	if (!task) {
 		cpu_function_call(cpu, __perf_install_in_context, event);
 		return;
@@ -2432,16 +2492,16 @@
 	 * while restarting.
 	 */
 	if (sd->restart)
-		event->pmu->start(event, PERF_EF_START);
+		event->pmu->start(event, 0);
 
 	return 0;
 }
 
-static int perf_event_restart(struct perf_event *event)
+static int perf_event_stop(struct perf_event *event, int restart)
 {
 	struct stop_event_data sd = {
 		.event		= event,
-		.restart	= 1,
+		.restart	= restart,
 	};
 	int ret = 0;
 
@@ -2778,19 +2838,36 @@
 	}
 }
 
+static DEFINE_PER_CPU(struct list_head, sched_cb_list);
+
 void perf_sched_cb_dec(struct pmu *pmu)
 {
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
 	this_cpu_dec(perf_sched_cb_usages);
+
+	if (!--cpuctx->sched_cb_usage)
+		list_del(&cpuctx->sched_cb_entry);
 }
 
+
 void perf_sched_cb_inc(struct pmu *pmu)
 {
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+
+	if (!cpuctx->sched_cb_usage++)
+		list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
+
 	this_cpu_inc(perf_sched_cb_usages);
 }
 
 /*
  * This function provides the context switch callback to the lower code
  * layer. It is invoked ONLY when the context switch callback is enabled.
+ *
+ * This callback is relevant even to per-cpu events; for example multi event
+ * PEBS requires this to provide PID/TID information. This requires we flush
+ * all queued PEBS records before we context switch to a new task.
  */
 static void perf_pmu_sched_task(struct task_struct *prev,
 				struct task_struct *next,
@@ -2798,34 +2875,24 @@
 {
 	struct perf_cpu_context *cpuctx;
 	struct pmu *pmu;
-	unsigned long flags;
 
 	if (prev == next)
 		return;
 
-	local_irq_save(flags);
+	list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+		pmu = cpuctx->unique_pmu; /* software PMUs will not have sched_task */
 
-	rcu_read_lock();
+		if (WARN_ON_ONCE(!pmu->sched_task))
+			continue;
 
-	list_for_each_entry_rcu(pmu, &pmus, entry) {
-		if (pmu->sched_task) {
-			cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+		perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+		perf_pmu_disable(pmu);
 
-			perf_ctx_lock(cpuctx, cpuctx->task_ctx);
+		pmu->sched_task(cpuctx->task_ctx, sched_in);
 
-			perf_pmu_disable(pmu);
-
-			pmu->sched_task(cpuctx->task_ctx, sched_in);
-
-			perf_pmu_enable(pmu);
-
-			perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
-		}
+		perf_pmu_enable(pmu);
+		perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 	}
-
-	rcu_read_unlock();
-
-	local_irq_restore(flags);
 }
 
 static void perf_event_switch(struct task_struct *task,
@@ -3357,6 +3424,22 @@
 	int ret;
 };
 
+static int find_cpu_to_read(struct perf_event *event, int local_cpu)
+{
+	int event_cpu = event->oncpu;
+	u16 local_pkg, event_pkg;
+
+	if (event->group_caps & PERF_EV_CAP_READ_ACTIVE_PKG) {
+		event_pkg =  topology_physical_package_id(event_cpu);
+		local_pkg =  topology_physical_package_id(local_cpu);
+
+		if (event_pkg == local_pkg)
+			return local_cpu;
+	}
+
+	return event_cpu;
+}
+
 /*
  * Cross CPU call to read the hardware event
  */
@@ -3478,7 +3561,7 @@
 
 static int perf_event_read(struct perf_event *event, bool group)
 {
-	int ret = 0;
+	int ret = 0, cpu_to_read, local_cpu;
 
 	/*
 	 * If event is enabled and currently active on a CPU, update the
@@ -3490,8 +3573,22 @@
 			.group = group,
 			.ret = 0,
 		};
-		smp_call_function_single(event->oncpu,
-					 __perf_event_read, &data, 1);
+
+		local_cpu = get_cpu();
+		cpu_to_read = find_cpu_to_read(event, local_cpu);
+		put_cpu();
+
+		/*
+		 * Purposely ignore the smp_call_function_single() return
+		 * value.
+		 *
+		 * If event->oncpu isn't a valid CPU it means the event got
+		 * scheduled out and that will have updated the event count.
+		 *
+		 * Therefore, either way, we'll have an up-to-date event count
+		 * after this.
+		 */
+		(void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
 		ret = data.ret;
 	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
 		struct perf_event_context *ctx = event->ctx;
@@ -3861,7 +3958,7 @@
 
 static bool exclusive_event_match(struct perf_event *e1, struct perf_event *e2)
 {
-	if ((e1->pmu->capabilities & PERF_PMU_CAP_EXCLUSIVE) &&
+	if ((e1->pmu == e2->pmu) &&
 	    (e1->cpu == e2->cpu ||
 	     e1->cpu == -1 ||
 	     e2->cpu == -1))
@@ -4777,6 +4874,19 @@
 		spin_unlock_irqrestore(&rb->event_lock, flags);
 	}
 
+	/*
+	 * Avoid racing with perf_mmap_close(AUX): stop the event
+	 * before swizzling the event::rb pointer; if it's getting
+	 * unmapped, its aux_mmap_count will be 0 and it won't
+	 * restart. See the comment in __perf_pmu_output_stop().
+	 *
+	 * Data will inevitably be lost when set_output is done in
+	 * mid-air, but then again, whoever does it like this is
+	 * not in for the data anyway.
+	 */
+	if (has_aux(event))
+		perf_event_stop(event, 0);
+
 	rcu_assign_pointer(event->rb, rb);
 
 	if (old_rb) {
@@ -5269,9 +5379,10 @@
 			struct pt_regs *regs, u64 mask)
 {
 	int bit;
+	DECLARE_BITMAP(_mask, 64);
 
-	for_each_set_bit(bit, (const unsigned long *) &mask,
-			 sizeof(mask) * BITS_PER_BYTE) {
+	bitmap_from_u64(_mask, mask);
+	for_each_set_bit(bit, _mask, sizeof(mask) * BITS_PER_BYTE) {
 		u64 val;
 
 		val = perf_reg_value(regs, bit);
@@ -5969,6 +6080,14 @@
 	struct perf_event *event;
 
 	list_for_each_entry_rcu(event, &pel->list, sb_list) {
+		/*
+		 * Skip events that are not fully formed yet; ensure that
+		 * if we observe event->ctx, both event and ctx will be
+		 * complete enough. See perf_install_in_context().
+		 */
+		if (!smp_load_acquire(&event->ctx))
+			continue;
+
 		if (event->state < PERF_EVENT_STATE_INACTIVE)
 			continue;
 		if (!event_filter_match(event))
@@ -6044,7 +6163,7 @@
 	raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
 	if (restart)
-		perf_event_restart(event);
+		perf_event_stop(event, 1);
 }
 
 void perf_event_exec(void)
@@ -6088,7 +6207,13 @@
 
 	/*
 	 * In case of inheritance, it will be the parent that links to the
-	 * ring-buffer, but it will be the child that's actually using it:
+	 * ring-buffer, but it will be the child that's actually using it.
+	 *
+	 * We are using event::rb to determine if the event should be stopped,
+	 * however this may race with ring_buffer_attach() (through set_output),
+	 * which will make us skip the event that actually needs to be stopped.
+	 * So ring_buffer_attach() has to stop an aux event before re-assigning
+	 * its rb pointer.
 	 */
 	if (rcu_dereference(parent->rb) == rb)
 		ro->err = __perf_event_stop(&sd);
@@ -6098,7 +6223,7 @@
 {
 	struct perf_event *event = info;
 	struct pmu *pmu = event->pmu;
-	struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
+	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 	struct remote_output ro = {
 		.rb	= event->rb,
 	};
@@ -6553,15 +6678,6 @@
 }
 
 /*
- * Whether this @filter depends on a dynamic object which is not loaded
- * yet or its load addresses are not known.
- */
-static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter)
-{
-	return filter->filter && filter->inode;
-}
-
-/*
  * Check whether inode and address range match filter criteria.
  */
 static bool perf_addr_filter_match(struct perf_addr_filter *filter,
@@ -6611,7 +6727,7 @@
 	raw_spin_unlock_irqrestore(&ifh->lock, flags);
 
 	if (restart)
-		perf_event_restart(event);
+		perf_event_stop(event, 1);
 }
 
 /*
@@ -6622,6 +6738,13 @@
 	struct perf_event_context *ctx;
 	int ctxn;
 
+	/*
+	 * Data tracing isn't supported yet and as such there is no need
+	 * to keep track of anything that isn't related to executable code:
+	 */
+	if (!(vma->vm_flags & VM_EXEC))
+		return;
+
 	rcu_read_lock();
 	for_each_task_context_nr(ctxn) {
 		ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
@@ -6956,7 +7079,7 @@
 		irq_work_queue(&event->pending);
 	}
 
-	event->overflow_handler(event, data, regs);
+	READ_ONCE(event->overflow_handler)(event, data, regs);
 
 	if (*perf_event_fasync(event) && event->pending_kill) {
 		event->pending_wakeup = 1;
@@ -7571,11 +7694,83 @@
 	ftrace_profile_free_filter(event);
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+static void bpf_overflow_handler(struct perf_event *event,
+				 struct perf_sample_data *data,
+				 struct pt_regs *regs)
+{
+	struct bpf_perf_event_data_kern ctx = {
+		.data = data,
+		.regs = regs,
+	};
+	int ret = 0;
+
+	preempt_disable();
+	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+		goto out;
+	rcu_read_lock();
+	ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+	rcu_read_unlock();
+out:
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
+	if (!ret)
+		return;
+
+	event->orig_overflow_handler(event, data, regs);
+}
+
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+	struct bpf_prog *prog;
+
+	if (event->overflow_handler_context)
+		/* hw breakpoint or kernel counter */
+		return -EINVAL;
+
+	if (event->prog)
+		return -EEXIST;
+
+	prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	event->prog = prog;
+	event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+	WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+	return 0;
+}
+
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+	struct bpf_prog *prog = event->prog;
+
+	if (!prog)
+		return;
+
+	WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+	event->prog = NULL;
+	bpf_prog_put(prog);
+}
+#else
+static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+{
+	return -EOPNOTSUPP;
+}
+static void perf_event_free_bpf_handler(struct perf_event *event)
+{
+}
+#endif
+
 static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 {
 	bool is_kprobe, is_tracepoint;
 	struct bpf_prog *prog;
 
+	if (event->attr.type == PERF_TYPE_HARDWARE ||
+	    event->attr.type == PERF_TYPE_SOFTWARE)
+		return perf_event_set_bpf_handler(event, prog_fd);
+
 	if (event->attr.type != PERF_TYPE_TRACEPOINT)
 		return -EINVAL;
 
@@ -7616,6 +7811,8 @@
 {
 	struct bpf_prog *prog;
 
+	perf_event_free_bpf_handler(event);
+
 	if (!event->tp_event)
 		return;
 
@@ -7774,7 +7971,11 @@
 	list_for_each_entry(filter, &ifh->list, entry) {
 		event->addr_filters_offs[count] = 0;
 
-		if (perf_addr_filter_needs_mmap(filter))
+		/*
+		 * Adjust base offset if the filter is associated to a binary
+		 * that needs to be mapped:
+		 */
+		if (filter->inode)
 			event->addr_filters_offs[count] =
 				perf_addr_filter_apply(filter, mm);
 
@@ -7789,7 +7990,7 @@
 	mmput(mm);
 
 restart:
-	perf_event_restart(event);
+	perf_event_stop(event, 1);
 }
 
 /*
@@ -7905,8 +8106,10 @@
 					goto fail;
 			}
 
-			if (token == IF_SRC_FILE) {
-				filename = match_strdup(&args[2]);
+			if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) {
+				int fpos = filter->range ? 2 : 1;
+
+				filename = match_strdup(&args[fpos]);
 				if (!filename) {
 					ret = -ENOMEM;
 					goto fail;
@@ -8926,6 +9129,19 @@
 	if (!overflow_handler && parent_event) {
 		overflow_handler = parent_event->overflow_handler;
 		context = parent_event->overflow_handler_context;
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+		if (overflow_handler == bpf_overflow_handler) {
+			struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+
+			if (IS_ERR(prog)) {
+				err = PTR_ERR(prog);
+				goto err_ns;
+			}
+			event->prog = prog;
+			event->orig_overflow_handler =
+				parent_event->orig_overflow_handler;
+		}
+#endif
 	}
 
 	if (overflow_handler) {
@@ -9406,6 +9622,9 @@
 			goto err_alloc;
 	}
 
+	if (pmu->task_ctx_nr == perf_sw_context)
+		event->event_caps |= PERF_EV_CAP_SOFTWARE;
+
 	if (group_leader &&
 	    (is_software_event(event) != is_software_event(group_leader))) {
 		if (is_software_event(event)) {
@@ -9419,7 +9638,7 @@
 			 */
 			pmu = group_leader->pmu;
 		} else if (is_software_event(group_leader) &&
-			   (group_leader->group_flags & PERF_GROUP_SOFTWARE)) {
+			   (group_leader->group_caps & PERF_EV_CAP_SOFTWARE)) {
 			/*
 			 * In case the group is a pure software group, and we
 			 * try to add a hardware event, move the whole group to
@@ -10354,6 +10573,8 @@
 
 		INIT_LIST_HEAD(&per_cpu(pmu_sb_events.list, cpu));
 		raw_spin_lock_init(&per_cpu(pmu_sb_events.lock, cpu));
+
+		INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
 	}
 }
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index ae9b90d..257fa46 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -330,15 +330,22 @@
 	if (!rb)
 		return NULL;
 
-	if (!rb_has_aux(rb) || !atomic_inc_not_zero(&rb->aux_refcount))
+	if (!rb_has_aux(rb))
 		goto err;
 
 	/*
-	 * If rb::aux_mmap_count is zero (and rb_has_aux() above went through),
-	 * the aux buffer is in perf_mmap_close(), about to get freed.
+	 * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(),
+	 * about to get freed, so we leave immediately.
+	 *
+	 * Checking rb::aux_mmap_count and rb::refcount has to be done in
+	 * the same order, see perf_mmap_close. Otherwise we end up freeing
+	 * aux pages in this path, which is a bug, because in_atomic().
 	 */
 	if (!atomic_read(&rb->aux_mmap_count))
-		goto err_put;
+		goto err;
+
+	if (!atomic_inc_not_zero(&rb->aux_refcount))
+		goto err;
 
 	/*
 	 * Nesting is not supported for AUX area, make sure nested
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index b7a525a..d4129bb 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -150,7 +150,7 @@
  * Returns 0 on success, -EFAULT on failure.
  */
 static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
-				struct page *page, struct page *kpage)
+				struct page *old_page, struct page *new_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
@@ -161,48 +161,49 @@
 	const unsigned long mmun_end   = addr + PAGE_SIZE;
 	struct mem_cgroup *memcg;
 
-	err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
+	err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
 			false);
 	if (err)
 		return err;
 
 	/* For try_to_free_swap() and munlock_vma_page() below */
-	lock_page(page);
+	lock_page(old_page);
 
 	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
 	err = -EAGAIN;
-	ptep = page_check_address(page, mm, addr, &ptl, 0);
-	if (!ptep)
+	ptep = page_check_address(old_page, mm, addr, &ptl, 0);
+	if (!ptep) {
+		mem_cgroup_cancel_charge(new_page, memcg, false);
 		goto unlock;
+	}
 
-	get_page(kpage);
-	page_add_new_anon_rmap(kpage, vma, addr, false);
-	mem_cgroup_commit_charge(kpage, memcg, false, false);
-	lru_cache_add_active_or_unevictable(kpage, vma);
+	get_page(new_page);
+	page_add_new_anon_rmap(new_page, vma, addr, false);
+	mem_cgroup_commit_charge(new_page, memcg, false, false);
+	lru_cache_add_active_or_unevictable(new_page, vma);
 
-	if (!PageAnon(page)) {
-		dec_mm_counter(mm, mm_counter_file(page));
+	if (!PageAnon(old_page)) {
+		dec_mm_counter(mm, mm_counter_file(old_page));
 		inc_mm_counter(mm, MM_ANONPAGES);
 	}
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
 	ptep_clear_flush_notify(vma, addr, ptep);
-	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+	set_pte_at_notify(mm, addr, ptep, mk_pte(new_page, vma->vm_page_prot));
 
-	page_remove_rmap(page, false);
-	if (!page_mapped(page))
-		try_to_free_swap(page);
+	page_remove_rmap(old_page, false);
+	if (!page_mapped(old_page))
+		try_to_free_swap(old_page);
 	pte_unmap_unlock(ptep, ptl);
 
 	if (vma->vm_flags & VM_LOCKED)
-		munlock_vma_page(page);
-	put_page(page);
+		munlock_vma_page(old_page);
+	put_page(old_page);
 
 	err = 0;
  unlock:
-	mem_cgroup_cancel_charge(kpage, memcg, false);
 	mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-	unlock_page(page);
+	unlock_page(old_page);
 	return err;
 }
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 2f974ae..1e1d913 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@
 static inline void check_stack_usage(void) {}
 #endif
 
-void do_exit(long code)
+void __noreturn do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
@@ -848,12 +848,7 @@
 	TASKS_RCU(preempt_enable());
 	exit_notify(tsk, group_dead);
 	proc_exit_connector(tsk);
-#ifdef CONFIG_NUMA
-	task_lock(tsk);
-	mpol_put(tsk->mempolicy);
-	tsk->mempolicy = NULL;
-	task_unlock(tsk);
-#endif
+	mpol_put_task_policy(tsk);
 #ifdef CONFIG_FUTEX
 	if (unlikely(current->pi_state_cache))
 		kfree(current->pi_state_cache);
@@ -887,29 +882,7 @@
 	exit_rcu();
 	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
 
-	/*
-	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
-	 * when the following two conditions become true.
-	 *   - There is race condition of mmap_sem (It is acquired by
-	 *     exit_mm()), and
-	 *   - SMI occurs before setting TASK_RUNINNG.
-	 *     (or hypervisor of virtual machine switches to other guest)
-	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
-	 *
-	 * To avoid it, we have to wait for releasing tsk->pi_lock which
-	 * is held by try_to_wake_up()
-	 */
-	smp_mb();
-	raw_spin_unlock_wait(&tsk->pi_lock);
-
-	/* causes final put_task_struct in finish_task_switch(). */
-	tsk->state = TASK_DEAD;
-	tsk->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
-	schedule();
-	BUG();
-	/* Avoid "noreturn function does return".  */
-	for (;;)
-		cpu_relax();	/* For when BUG is null */
+	do_task_dead();
 }
 EXPORT_SYMBOL_GPL(do_exit);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 52e725d4..c060c7e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -158,19 +158,83 @@
  * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
  * kmemcache based allocator.
  */
-# if THREAD_SIZE >= PAGE_SIZE
-static unsigned long *alloc_thread_stack_node(struct task_struct *tsk,
-						  int node)
+# if THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK)
+
+#ifdef CONFIG_VMAP_STACK
+/*
+ * vmalloc() is a bit slow, and calling vfree() enough times will force a TLB
+ * flush.  Try to minimize the number of calls by caching stacks.
+ */
+#define NR_CACHED_STACKS 2
+static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+#endif
+
+static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
+#ifdef CONFIG_VMAP_STACK
+	void *stack;
+	int i;
+
+	local_irq_disable();
+	for (i = 0; i < NR_CACHED_STACKS; i++) {
+		struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+
+		if (!s)
+			continue;
+		this_cpu_write(cached_stacks[i], NULL);
+
+		tsk->stack_vm_area = s;
+		local_irq_enable();
+		return s->addr;
+	}
+	local_irq_enable();
+
+	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
+				     VMALLOC_START, VMALLOC_END,
+				     THREADINFO_GFP | __GFP_HIGHMEM,
+				     PAGE_KERNEL,
+				     0, node, __builtin_return_address(0));
+
+	/*
+	 * We can't call find_vm_area() in interrupt context, and
+	 * free_thread_stack() can be called in interrupt context,
+	 * so cache the vm_struct.
+	 */
+	if (stack)
+		tsk->stack_vm_area = find_vm_area(stack);
+	return stack;
+#else
 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
 					     THREAD_SIZE_ORDER);
 
 	return page ? page_address(page) : NULL;
+#endif
 }
 
-static inline void free_thread_stack(unsigned long *stack)
+static inline void free_thread_stack(struct task_struct *tsk)
 {
-	__free_pages(virt_to_page(stack), THREAD_SIZE_ORDER);
+#ifdef CONFIG_VMAP_STACK
+	if (task_stack_vm_area(tsk)) {
+		unsigned long flags;
+		int i;
+
+		local_irq_save(flags);
+		for (i = 0; i < NR_CACHED_STACKS; i++) {
+			if (this_cpu_read(cached_stacks[i]))
+				continue;
+
+			this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
+			local_irq_restore(flags);
+			return;
+		}
+		local_irq_restore(flags);
+
+		vfree(tsk->stack);
+		return;
+	}
+#endif
+
+	__free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_stack_cache;
@@ -181,9 +245,9 @@
 	return kmem_cache_alloc_node(thread_stack_cache, THREADINFO_GFP, node);
 }
 
-static void free_thread_stack(unsigned long *stack)
+static void free_thread_stack(struct task_struct *tsk)
 {
-	kmem_cache_free(thread_stack_cache, stack);
+	kmem_cache_free(thread_stack_cache, tsk->stack);
 }
 
 void thread_stack_cache_init(void)
@@ -213,24 +277,76 @@
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
-static void account_kernel_stack(unsigned long *stack, int account)
+static void account_kernel_stack(struct task_struct *tsk, int account)
 {
-	/* All stack pages are in the same zone and belong to the same memcg. */
-	struct page *first_page = virt_to_page(stack);
+	void *stack = task_stack_page(tsk);
+	struct vm_struct *vm = task_stack_vm_area(tsk);
 
-	mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
-			    THREAD_SIZE / 1024 * account);
+	BUILD_BUG_ON(IS_ENABLED(CONFIG_VMAP_STACK) && PAGE_SIZE % 1024 != 0);
 
-	memcg_kmem_update_page_stat(
-		first_page, MEMCG_KERNEL_STACK_KB,
-		account * (THREAD_SIZE / 1024));
+	if (vm) {
+		int i;
+
+		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
+
+		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+			mod_zone_page_state(page_zone(vm->pages[i]),
+					    NR_KERNEL_STACK_KB,
+					    PAGE_SIZE / 1024 * account);
+		}
+
+		/* All stack pages belong to the same memcg. */
+		memcg_kmem_update_page_stat(vm->pages[0], MEMCG_KERNEL_STACK_KB,
+					    account * (THREAD_SIZE / 1024));
+	} else {
+		/*
+		 * All stack pages are in the same zone and belong to the
+		 * same memcg.
+		 */
+		struct page *first_page = virt_to_page(stack);
+
+		mod_zone_page_state(page_zone(first_page), NR_KERNEL_STACK_KB,
+				    THREAD_SIZE / 1024 * account);
+
+		memcg_kmem_update_page_stat(first_page, MEMCG_KERNEL_STACK_KB,
+					    account * (THREAD_SIZE / 1024));
+	}
 }
 
+static void release_task_stack(struct task_struct *tsk)
+{
+	account_kernel_stack(tsk, -1);
+	arch_release_thread_stack(tsk->stack);
+	free_thread_stack(tsk);
+	tsk->stack = NULL;
+#ifdef CONFIG_VMAP_STACK
+	tsk->stack_vm_area = NULL;
+#endif
+}
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+void put_task_stack(struct task_struct *tsk)
+{
+	if (atomic_dec_and_test(&tsk->stack_refcount))
+		release_task_stack(tsk);
+}
+#endif
+
 void free_task(struct task_struct *tsk)
 {
-	account_kernel_stack(tsk->stack, -1);
-	arch_release_thread_stack(tsk->stack);
-	free_thread_stack(tsk->stack);
+#ifndef CONFIG_THREAD_INFO_IN_TASK
+	/*
+	 * The task is finally done with both the stack and thread_info,
+	 * so free both.
+	 */
+	release_task_stack(tsk);
+#else
+	/*
+	 * If the task had a separate stack allocation, it should be gone
+	 * by now.
+	 */
+	WARN_ON_ONCE(atomic_read(&tsk->stack_refcount) != 0);
+#endif
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
 	put_seccomp_filter(tsk);
@@ -342,6 +458,7 @@
 {
 	struct task_struct *tsk;
 	unsigned long *stack;
+	struct vm_struct *stack_vm_area;
 	int err;
 
 	if (node == NUMA_NO_NODE)
@@ -354,11 +471,26 @@
 	if (!stack)
 		goto free_tsk;
 
+	stack_vm_area = task_stack_vm_area(tsk);
+
 	err = arch_dup_task_struct(tsk, orig);
+
+	/*
+	 * arch_dup_task_struct() clobbers the stack-related fields.  Make
+	 * sure they're properly initialized before using any stack-related
+	 * functions again.
+	 */
+	tsk->stack = stack;
+#ifdef CONFIG_VMAP_STACK
+	tsk->stack_vm_area = stack_vm_area;
+#endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+	atomic_set(&tsk->stack_refcount, 1);
+#endif
+
 	if (err)
 		goto free_stack;
 
-	tsk->stack = stack;
 #ifdef CONFIG_SECCOMP
 	/*
 	 * We must handle setting up seccomp filters once we're under
@@ -390,14 +522,14 @@
 	tsk->task_frag.page = NULL;
 	tsk->wake_q.next = NULL;
 
-	account_kernel_stack(stack, 1);
+	account_kernel_stack(tsk, 1);
 
 	kcov_task_init(tsk);
 
 	return tsk;
 
 free_stack:
-	free_thread_stack(stack);
+	free_thread_stack(tsk);
 free_tsk:
 	free_task_struct(tsk);
 	return NULL;
@@ -799,6 +931,29 @@
 EXPORT_SYMBOL(get_mm_exe_file);
 
 /**
+ * get_task_exe_file - acquire a reference to the task's executable file
+ *
+ * Returns %NULL if task's mm (if any) has no associated executable file or
+ * this is a kernel thread with borrowed mm (see the comment above get_task_mm).
+ * User must release file via fput().
+ */
+struct file *get_task_exe_file(struct task_struct *task)
+{
+	struct file *exe_file = NULL;
+	struct mm_struct *mm;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm) {
+		if (!(task->flags & PF_KTHREAD))
+			exe_file = get_mm_exe_file(mm);
+	}
+	task_unlock(task);
+	return exe_file;
+}
+EXPORT_SYMBOL(get_task_exe_file);
+
+/**
  * get_task_mm - acquire a reference to the task's mm
  *
  * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
@@ -913,14 +1068,12 @@
 	deactivate_mm(tsk, mm);
 
 	/*
-	 * If we're exiting normally, clear a user-space tid field if
-	 * requested.  We leave this alone when dying by signal, to leave
-	 * the value intact in a core dump, and to save the unnecessary
-	 * trouble, say, a killed vfork parent shouldn't touch this mm.
-	 * Userland only wants this done for a sys_exit.
+	 * Signal userspace if we're not exiting with a core dump
+	 * because we want to leave the value intact for debugging
+	 * purposes.
 	 */
 	if (tsk->clear_child_tid) {
-		if (!(tsk->flags & PF_SIGNALED) &&
+		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
 		    atomic_read(&mm->mm_users) > 1) {
 			/*
 			 * We don't check the error code - if userspace has
@@ -1404,7 +1557,6 @@
 	p->real_start_time = ktime_get_boot_ns();
 	p->io_context = NULL;
 	p->audit_context = NULL;
-	threadgroup_change_begin(current);
 	cgroup_fork(p);
 #ifdef CONFIG_NUMA
 	p->mempolicy = mpol_dup(p->mempolicy);
@@ -1556,6 +1708,7 @@
 	INIT_LIST_HEAD(&p->thread_group);
 	p->task_works = NULL;
 
+	threadgroup_change_begin(current);
 	/*
 	 * Ensure that the cgroup subsystem policies allow the new process to be
 	 * forked. It should be noted the the new process's css_set can be changed
@@ -1656,6 +1809,7 @@
 bad_fork_cancel_cgroup:
 	cgroup_cancel_fork(p);
 bad_fork_free_pid:
+	threadgroup_change_end(current);
 	if (pid != &init_struct_pid)
 		free_pid(pid);
 bad_fork_cleanup_thread:
@@ -1688,12 +1842,12 @@
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_threadgroup_lock:
 #endif
-	threadgroup_change_end(current);
 	delayacct_tsk_free(p);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
 	exit_creds(p);
 bad_fork_free:
+	put_task_stack(p);
 	free_task(p);
 fork_out:
 	return ERR_PTR(retval);
diff --git a/kernel/futex.c b/kernel/futex.c
index 33664f7..2c4be46 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -179,7 +179,15 @@
  * Futex flags used to encode options to functions and preserve them across
  * restarts.
  */
-#define FLAGS_SHARED		0x01
+#ifdef CONFIG_MMU
+# define FLAGS_SHARED		0x01
+#else
+/*
+ * NOMMU does not have per process address space. Let the compiler optimize
+ * code away.
+ */
+# define FLAGS_SHARED		0x00
+#endif
 #define FLAGS_CLOCKRT		0x02
 #define FLAGS_HAS_TIMEOUT	0x04
 
@@ -373,8 +381,12 @@
 #endif
 }
 
-/*
- * We hash on the keys returned from get_futex_key (see below).
+/**
+ * hash_futex - Return the hash bucket in the global hash
+ * @key:	Pointer to the futex key for which the hash is calculated
+ *
+ * We hash on the keys returned from get_futex_key (see below) and return the
+ * corresponding hash bucket in the global hash.
  */
 static struct futex_hash_bucket *hash_futex(union futex_key *key)
 {
@@ -384,7 +396,12 @@
 	return &futex_queues[hash & (futex_hashsize - 1)];
 }
 
-/*
+
+/**
+ * match_futex - Check whether two futex keys are equal
+ * @key1:	Pointer to key1
+ * @key2:	Pointer to key2
+ *
  * Return 1 if two futex_keys are equal, 0 otherwise.
  */
 static inline int match_futex(union futex_key *key1, union futex_key *key2)
@@ -405,6 +422,16 @@
 	if (!key->both.ptr)
 		return;
 
+	/*
+	 * On MMU less systems futexes are always "private" as there is no per
+	 * process address space. We need the smp wmb nevertheless - yes,
+	 * arch/blackfin has MMU less SMP ...
+	 */
+	if (!IS_ENABLED(CONFIG_MMU)) {
+		smp_mb(); /* explicit smp_mb(); (B) */
+		return;
+	}
+
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
 		ihold(key->shared.inode); /* implies smp_mb(); (B) */
@@ -436,6 +463,9 @@
 		return;
 	}
 
+	if (!IS_ENABLED(CONFIG_MMU))
+		return;
+
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
 		iput(key->shared.inode);
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index d234022..432c3d7 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -117,7 +117,7 @@
 	pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
 		" disables this message.\n");
 	sched_show_task(t);
-	debug_show_held_locks(t);
+	debug_show_all_locks();
 
 	touch_nmi_watchdog();
 
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index f689593..17f51d63 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -4,58 +4,151 @@
 #include <linux/slab.h>
 #include <linux/cpu.h>
 
-static int get_first_sibling(unsigned int cpu)
+static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
+				int cpus_per_vec)
 {
-	unsigned int ret;
+	const struct cpumask *siblmsk;
+	int cpu, sibl;
 
-	ret = cpumask_first(topology_sibling_cpumask(cpu));
-	if (ret < nr_cpu_ids)
-		return ret;
-	return cpu;
+	for ( ; cpus_per_vec > 0; ) {
+		cpu = cpumask_first(nmsk);
+
+		/* Should not happen, but I'm too lazy to think about it */
+		if (cpu >= nr_cpu_ids)
+			return;
+
+		cpumask_clear_cpu(cpu, nmsk);
+		cpumask_set_cpu(cpu, irqmsk);
+		cpus_per_vec--;
+
+		/* If the cpu has siblings, use them first */
+		siblmsk = topology_sibling_cpumask(cpu);
+		for (sibl = -1; cpus_per_vec > 0; ) {
+			sibl = cpumask_next(sibl, siblmsk);
+			if (sibl >= nr_cpu_ids)
+				break;
+			if (!cpumask_test_and_clear_cpu(sibl, nmsk))
+				continue;
+			cpumask_set_cpu(sibl, irqmsk);
+			cpus_per_vec--;
+		}
+	}
 }
 
-/*
- * Take a map of online CPUs and the number of available interrupt vectors
- * and generate an output cpumask suitable for spreading MSI/MSI-X vectors
- * so that they are distributed as good as possible around the CPUs.  If
- * more vectors than CPUs are available we'll map one to each CPU,
- * otherwise we map one to the first sibling of each socket.
- *
- * If there are more vectors than CPUs we will still only have one bit
- * set per CPU, but interrupt code will keep on assigning the vectors from
- * the start of the bitmap until we run out of vectors.
- */
-struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
+static int get_nodes_in_cpumask(const struct cpumask *mask, nodemask_t *nodemsk)
 {
-	struct cpumask *affinity_mask;
-	unsigned int max_vecs = *nr_vecs;
+	int n, nodes;
 
-	if (max_vecs == 1)
-		return NULL;
-
-	affinity_mask = kzalloc(cpumask_size(), GFP_KERNEL);
-	if (!affinity_mask) {
-		*nr_vecs = 1;
-		return NULL;
+	/* Calculate the number of nodes in the supplied affinity mask */
+	for (n = 0, nodes = 0; n < num_online_nodes(); n++) {
+		if (cpumask_intersects(mask, cpumask_of_node(n))) {
+			node_set(n, *nodemsk);
+			nodes++;
+		}
 	}
+	return nodes;
+}
 
-	if (max_vecs >= num_online_cpus()) {
-		cpumask_copy(affinity_mask, cpu_online_mask);
-		*nr_vecs = num_online_cpus();
-	} else {
-		unsigned int vecs = 0, cpu;
+/**
+ * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
+ * @affinity:		The affinity mask to spread. If NULL cpu_online_mask
+ *			is used
+ * @nvecs:		The number of vectors
+ *
+ * Returns the masks pointer or NULL if allocation failed.
+ */
+struct cpumask *irq_create_affinity_masks(const struct cpumask *affinity,
+					  int nvec)
+{
+	int n, nodes, vecs_per_node, cpus_per_vec, extra_vecs, curvec = 0;
+	nodemask_t nodemsk = NODE_MASK_NONE;
+	struct cpumask *masks;
+	cpumask_var_t nmsk;
 
-		for_each_online_cpu(cpu) {
-			if (cpu == get_first_sibling(cpu)) {
-				cpumask_set_cpu(cpu, affinity_mask);
-				vecs++;
-			}
+	if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
+		return NULL;
 
-			if (--max_vecs == 0)
+	masks = kzalloc(nvec * sizeof(*masks), GFP_KERNEL);
+	if (!masks)
+		goto out;
+
+	/* Stabilize the cpumasks */
+	get_online_cpus();
+	/* If the supplied affinity mask is NULL, use cpu online mask */
+	if (!affinity)
+		affinity = cpu_online_mask;
+
+	nodes = get_nodes_in_cpumask(affinity, &nodemsk);
+
+	/*
+	 * If the number of nodes in the mask is less than or equal the
+	 * number of vectors we just spread the vectors across the nodes.
+	 */
+	if (nvec <= nodes) {
+		for_each_node_mask(n, nodemsk) {
+			cpumask_copy(masks + curvec, cpumask_of_node(n));
+			if (++curvec == nvec)
 				break;
 		}
-		*nr_vecs = vecs;
+		goto outonl;
 	}
 
-	return affinity_mask;
+	/* Spread the vectors per node */
+	vecs_per_node = nvec / nodes;
+	/* Account for rounding errors */
+	extra_vecs = nvec - (nodes * vecs_per_node);
+
+	for_each_node_mask(n, nodemsk) {
+		int ncpus, v, vecs_to_assign = vecs_per_node;
+
+		/* Get the cpus on this node which are in the mask */
+		cpumask_and(nmsk, affinity, cpumask_of_node(n));
+
+		/* Calculate the number of cpus per vector */
+		ncpus = cpumask_weight(nmsk);
+
+		for (v = 0; curvec < nvec && v < vecs_to_assign; curvec++, v++) {
+			cpus_per_vec = ncpus / vecs_to_assign;
+
+			/* Account for extra vectors to compensate rounding errors */
+			if (extra_vecs) {
+				cpus_per_vec++;
+				if (!--extra_vecs)
+					vecs_per_node++;
+			}
+			irq_spread_init_one(masks + curvec, nmsk, cpus_per_vec);
+		}
+
+		if (curvec >= nvec)
+			break;
+	}
+
+outonl:
+	put_online_cpus();
+out:
+	free_cpumask_var(nmsk);
+	return masks;
+}
+
+/**
+ * irq_calc_affinity_vectors - Calculate to optimal number of vectors for a given affinity mask
+ * @affinity:		The affinity mask to spread. If NULL cpu_online_mask
+ *			is used
+ * @maxvec:		The maximum number of vectors available
+ */
+int irq_calc_affinity_vectors(const struct cpumask *affinity, int maxvec)
+{
+	int cpus, ret;
+
+	/* Stabilize the cpumasks */
+	get_online_cpus();
+	/* If the supplied affinity mask is NULL, use cpu online mask */
+	if (!affinity)
+		affinity = cpu_online_mask;
+
+	cpus = cpumask_weight(affinity);
+	ret = (cpus < maxvec) ? cpus : maxvec;
+
+	put_online_cpus();
+	return ret;
 }
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index b4c1bc7..be3c34e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -76,7 +76,6 @@
 	if (!desc)
 		return -EINVAL;
 
-	type &= IRQ_TYPE_SENSE_MASK;
 	ret = __irq_set_trigger(desc, type);
 	irq_put_desc_busunlock(desc, flags);
 	return ret;
@@ -756,7 +755,6 @@
 {
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	struct irqaction *action = desc->action;
-	void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
 	unsigned int irq = irq_desc_get_irq(desc);
 	irqreturn_t res;
 
@@ -765,15 +763,26 @@
 	if (chip->irq_ack)
 		chip->irq_ack(&desc->irq_data);
 
-	trace_irq_handler_entry(irq, action);
-	res = action->handler(irq, dev_id);
-	trace_irq_handler_exit(irq, action, res);
+	if (likely(action)) {
+		trace_irq_handler_entry(irq, action);
+		res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
+		trace_irq_handler_exit(irq, action, res);
+	} else {
+		unsigned int cpu = smp_processor_id();
+		bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);
+
+		if (enabled)
+			irq_percpu_disable(desc, cpu);
+
+		pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n",
+			    enabled ? " and unmasked" : "", irq, cpu);
+	}
 
 	if (chip->irq_eoi)
 		chip->irq_eoi(&desc->irq_data);
 }
 
-void
+static void
 __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
 		     int is_chained, const char *name)
 {
@@ -820,6 +829,21 @@
 	desc->name = name;
 
 	if (handle != handle_bad_irq && is_chained) {
+		unsigned int type = irqd_get_trigger_type(&desc->irq_data);
+
+		/*
+		 * We're about to start this interrupt immediately,
+		 * hence the need to set the trigger configuration.
+		 * But the .set_type callback may have overridden the
+		 * flow handler, ignoring that we're dealing with a
+		 * chained interrupt. Reset it immediately because we
+		 * do know better.
+		 */
+		if (type != IRQ_TYPE_NONE) {
+			__irq_set_trigger(desc, type);
+			desc->handle_irq = handle;
+		}
+
 		irq_settings_set_noprobe(desc);
 		irq_settings_set_norequest(desc);
 		irq_settings_set_nothread(desc);
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index abd286a..ee32870 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -260,9 +260,9 @@
 }
 
 /**
- * irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain
+ * __irq_alloc_domain_generic_chip - Allocate generic chips for an irq domain
  * @d:			irq domain for which to allocate chips
- * @irqs_per_chip:	Number of interrupts each chip handles
+ * @irqs_per_chip:	Number of interrupts each chip handles (max 32)
  * @num_ct:		Number of irq_chip_type instances associated with this
  * @name:		Name of the irq chip
  * @handler:		Default flow handler associated with these chips
@@ -270,11 +270,11 @@
  * @set:		IRQ_* bits to set in the mapping function
  * @gcflags:		Generic chip specific setup flags
  */
-int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
-				   int num_ct, const char *name,
-				   irq_flow_handler_t handler,
-				   unsigned int clr, unsigned int set,
-				   enum irq_gc_flags gcflags)
+int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
+				     int num_ct, const char *name,
+				     irq_flow_handler_t handler,
+				     unsigned int clr, unsigned int set,
+				     enum irq_gc_flags gcflags)
 {
 	struct irq_domain_chip_generic *dgc;
 	struct irq_chip_generic *gc;
@@ -326,7 +326,21 @@
 	d->name = name;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips);
+EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);
+
+static struct irq_chip_generic *
+__irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
+{
+	struct irq_domain_chip_generic *dgc = d->gc;
+	int idx;
+
+	if (!dgc)
+		return ERR_PTR(-ENODEV);
+	idx = hw_irq / dgc->irqs_per_chip;
+	if (idx >= dgc->num_chips)
+		return ERR_PTR(-EINVAL);
+	return dgc->gc[idx];
+}
 
 /**
  * irq_get_domain_generic_chip - Get a pointer to the generic chip of a hw_irq
@@ -336,15 +350,9 @@
 struct irq_chip_generic *
 irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq)
 {
-	struct irq_domain_chip_generic *dgc = d->gc;
-	int idx;
+	struct irq_chip_generic *gc = __irq_get_domain_generic_chip(d, hw_irq);
 
-	if (!dgc)
-		return NULL;
-	idx = hw_irq / dgc->irqs_per_chip;
-	if (idx >= dgc->num_chips)
-		return NULL;
-	return dgc->gc[idx];
+	return !IS_ERR(gc) ? gc : NULL;
 }
 EXPORT_SYMBOL_GPL(irq_get_domain_generic_chip);
 
@@ -368,13 +376,9 @@
 	unsigned long flags;
 	int idx;
 
-	if (!d->gc)
-		return -ENODEV;
-
-	idx = hw_irq / dgc->irqs_per_chip;
-	if (idx >= dgc->num_chips)
-		return -EINVAL;
-	gc = dgc->gc[idx];
+	gc = __irq_get_domain_generic_chip(d, hw_irq);
+	if (IS_ERR(gc))
+		return PTR_ERR(gc);
 
 	idx = hw_irq % dgc->irqs_per_chip;
 
@@ -409,10 +413,30 @@
 	irq_modify_status(virq, dgc->irq_flags_to_clear, dgc->irq_flags_to_set);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(irq_map_generic_chip);
+
+static void irq_unmap_generic_chip(struct irq_domain *d, unsigned int virq)
+{
+	struct irq_data *data = irq_domain_get_irq_data(d, virq);
+	struct irq_domain_chip_generic *dgc = d->gc;
+	unsigned int hw_irq = data->hwirq;
+	struct irq_chip_generic *gc;
+	int irq_idx;
+
+	gc = irq_get_domain_generic_chip(d, hw_irq);
+	if (!gc)
+		return;
+
+	irq_idx = hw_irq % dgc->irqs_per_chip;
+
+	clear_bit(irq_idx, &gc->installed);
+	irq_domain_set_info(d, virq, hw_irq, &no_irq_chip, NULL, NULL, NULL,
+			    NULL);
+
+}
 
 struct irq_domain_ops irq_generic_chip_ops = {
 	.map	= irq_map_generic_chip,
+	.unmap  = irq_unmap_generic_chip,
 	.xlate	= irq_domain_xlate_onetwocell,
 };
 EXPORT_SYMBOL_GPL(irq_generic_chip_ops);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a623b44..00bb0ae 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -15,6 +15,7 @@
 #include <linux/radix-tree.h>
 #include <linux/bitmap.h>
 #include <linux/irqdomain.h>
+#include <linux/sysfs.h>
 
 #include "internals.h"
 
@@ -123,6 +124,181 @@
 
 #ifdef CONFIG_SPARSE_IRQ
 
+static void irq_kobj_release(struct kobject *kobj);
+
+#ifdef CONFIG_SYSFS
+static struct kobject *irq_kobj_base;
+
+#define IRQ_ATTR_RO(_name) \
+static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+static ssize_t per_cpu_count_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	int cpu, irq = desc->irq_data.irq;
+	ssize_t ret = 0;
+	char *p = "";
+
+	for_each_possible_cpu(cpu) {
+		unsigned int c = kstat_irqs_cpu(irq, cpu);
+
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c);
+		p = ",";
+	}
+
+	ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+	return ret;
+}
+IRQ_ATTR_RO(per_cpu_count);
+
+static ssize_t chip_name_show(struct kobject *kobj,
+			      struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	if (desc->irq_data.chip && desc->irq_data.chip->name) {
+		ret = scnprintf(buf, PAGE_SIZE, "%s\n",
+				desc->irq_data.chip->name);
+	}
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+}
+IRQ_ATTR_RO(chip_name);
+
+static ssize_t hwirq_show(struct kobject *kobj,
+			  struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	if (desc->irq_data.domain)
+		ret = sprintf(buf, "%d\n", (int)desc->irq_data.hwirq);
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+}
+IRQ_ATTR_RO(hwirq);
+
+static ssize_t type_show(struct kobject *kobj,
+			 struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	ret = sprintf(buf, "%s\n",
+		      irqd_is_level_type(&desc->irq_data) ? "level" : "edge");
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+
+}
+IRQ_ATTR_RO(type);
+
+static ssize_t name_show(struct kobject *kobj,
+			 struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	ssize_t ret = 0;
+
+	raw_spin_lock_irq(&desc->lock);
+	if (desc->name)
+		ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name);
+	raw_spin_unlock_irq(&desc->lock);
+
+	return ret;
+}
+IRQ_ATTR_RO(name);
+
+static ssize_t actions_show(struct kobject *kobj,
+			    struct kobj_attribute *attr, char *buf)
+{
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
+	struct irqaction *action;
+	ssize_t ret = 0;
+	char *p = "";
+
+	raw_spin_lock_irq(&desc->lock);
+	for (action = desc->action; action != NULL; action = action->next) {
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
+				 p, action->name);
+		p = ",";
+	}
+	raw_spin_unlock_irq(&desc->lock);
+
+	if (ret)
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+	return ret;
+}
+IRQ_ATTR_RO(actions);
+
+static struct attribute *irq_attrs[] = {
+	&per_cpu_count_attr.attr,
+	&chip_name_attr.attr,
+	&hwirq_attr.attr,
+	&type_attr.attr,
+	&name_attr.attr,
+	&actions_attr.attr,
+	NULL
+};
+
+static struct kobj_type irq_kobj_type = {
+	.release	= irq_kobj_release,
+	.sysfs_ops	= &kobj_sysfs_ops,
+	.default_attrs	= irq_attrs,
+};
+
+static void irq_sysfs_add(int irq, struct irq_desc *desc)
+{
+	if (irq_kobj_base) {
+		/*
+		 * Continue even in case of failure as this is nothing
+		 * crucial.
+		 */
+		if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq))
+			pr_warn("Failed to add kobject for irq %d\n", irq);
+	}
+}
+
+static int __init irq_sysfs_init(void)
+{
+	struct irq_desc *desc;
+	int irq;
+
+	/* Prevent concurrent irq alloc/free */
+	irq_lock_sparse();
+
+	irq_kobj_base = kobject_create_and_add("irq", kernel_kobj);
+	if (!irq_kobj_base) {
+		irq_unlock_sparse();
+		return -ENOMEM;
+	}
+
+	/* Add the already allocated interrupts */
+	for_each_irq_desc(irq, desc)
+		irq_sysfs_add(irq, desc);
+	irq_unlock_sparse();
+
+	return 0;
+}
+postcore_initcall(irq_sysfs_init);
+
+#else /* !CONFIG_SYSFS */
+
+static struct kobj_type irq_kobj_type = {
+	.release	= irq_kobj_release,
+};
+
+static void irq_sysfs_add(int irq, struct irq_desc *desc) {}
+
+#endif /* CONFIG_SYSFS */
+
 static RADIX_TREE(irq_desc_tree, GFP_KERNEL);
 
 static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -187,6 +363,7 @@
 
 	desc_set_defaults(irq, desc, node, affinity, owner);
 	irqd_set(&desc->irq_data, flags);
+	kobject_init(&desc->kobj, &irq_kobj_type);
 
 	return desc;
 
@@ -197,15 +374,22 @@
 	return NULL;
 }
 
-static void delayed_free_desc(struct rcu_head *rhp)
+static void irq_kobj_release(struct kobject *kobj)
 {
-	struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
+	struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj);
 
 	free_masks(desc);
 	free_percpu(desc->kstat_irqs);
 	kfree(desc);
 }
 
+static void delayed_free_desc(struct rcu_head *rhp)
+{
+	struct irq_desc *desc = container_of(rhp, struct irq_desc, rcu);
+
+	kobject_put(&desc->kobj);
+}
+
 static void free_desc(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -217,8 +401,12 @@
 	 * kstat_irq_usr(). Once we deleted the descriptor from the
 	 * sparse tree we can free it. Access in proc will fail to
 	 * lookup the descriptor.
+	 *
+	 * The sysfs entry must be serialized against a concurrent
+	 * irq_sysfs_init() as well.
 	 */
 	mutex_lock(&sparse_irq_lock);
+	kobject_del(&desc->kobj);
 	delete_irq_desc(irq);
 	mutex_unlock(&sparse_irq_lock);
 
@@ -236,31 +424,31 @@
 	const struct cpumask *mask = NULL;
 	struct irq_desc *desc;
 	unsigned int flags;
-	int i, cpu = -1;
+	int i;
 
-	if (affinity && cpumask_empty(affinity))
-		return -EINVAL;
+	/* Validate affinity mask(s) */
+	if (affinity) {
+		for (i = 0, mask = affinity; i < cnt; i++, mask++) {
+			if (cpumask_empty(mask))
+				return -EINVAL;
+		}
+	}
 
 	flags = affinity ? IRQD_AFFINITY_MANAGED : 0;
+	mask = NULL;
 
 	for (i = 0; i < cnt; i++) {
 		if (affinity) {
-			cpu = cpumask_next(cpu, affinity);
-			if (cpu >= nr_cpu_ids)
-				cpu = cpumask_first(affinity);
-			node = cpu_to_node(cpu);
-
-			/*
-			 * For single allocations we use the caller provided
-			 * mask otherwise we use the mask of the target cpu
-			 */
-			mask = cnt == 1 ? affinity : cpumask_of(cpu);
+			node = cpu_to_node(cpumask_first(affinity));
+			mask = affinity;
+			affinity++;
 		}
 		desc = alloc_desc(start + i, node, flags, mask, owner);
 		if (!desc)
 			goto err;
 		mutex_lock(&sparse_irq_lock);
 		irq_insert_desc(start + i, desc);
+		irq_sysfs_add(start + i, desc);
 		mutex_unlock(&sparse_irq_lock);
 	}
 	return start;
@@ -481,9 +669,9 @@
  * @cnt:	Number of consecutive irqs to allocate.
  * @node:	Preferred node on which the irq descriptor should be allocated
  * @owner:	Owning module (can be NULL)
- * @affinity:	Optional pointer to an affinity mask which hints where the
- *		irq descriptors should be allocated and which default
- *		affinities to use
+ * @affinity:	Optional pointer to an affinity mask array of size @cnt which
+ *		hints where the irq descriptors should be allocated and which
+ *		default affinities to use
  *
  * Returns the first irq number or error code
  */
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4752b43..8c0a0ae 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -80,7 +80,7 @@
 
 /**
  * __irq_domain_add() - Allocate a new irq_domain data structure
- * @of_node: optional device-tree node of the interrupt controller
+ * @fwnode: firmware node for the interrupt controller
  * @size: Size of linear map; 0 for radix mapping only
  * @hwirq_max: Maximum number of interrupts supported by controller
  * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
@@ -96,10 +96,8 @@
 				    const struct irq_domain_ops *ops,
 				    void *host_data)
 {
+	struct device_node *of_node = to_of_node(fwnode);
 	struct irq_domain *domain;
-	struct device_node *of_node;
-
-	of_node = to_of_node(fwnode);
 
 	domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
 			      GFP_KERNEL, of_node_to_nid(of_node));
@@ -868,7 +866,10 @@
 	if (WARN_ON(intsize < 1))
 		return -EINVAL;
 	*out_hwirq = intspec[0];
-	*out_type = (intsize > 1) ? intspec[1] : IRQ_TYPE_NONE;
+	if (intsize > 1)
+		*out_type = intspec[1] & IRQ_TYPE_SENSE_MASK;
+	else
+		*out_type = IRQ_TYPE_NONE;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 73a2b78..0c5f1a5 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -669,8 +669,6 @@
 		return 0;
 	}
 
-	flags &= IRQ_TYPE_SENSE_MASK;
-
 	if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
 		if (!irqd_irq_masked(&desc->irq_data))
 			mask_irq(desc);
@@ -678,7 +676,8 @@
 			unmask = 1;
 	}
 
-	/* caller masked out all except trigger mode flags */
+	/* Mask all flags except trigger mode */
+	flags &= IRQ_TYPE_SENSE_MASK;
 	ret = chip->irq_set_type(&desc->irq_data, flags);
 
 	switch (ret) {
@@ -1681,8 +1680,10 @@
 	action->dev_id = dev_id;
 
 	retval = irq_chip_pm_get(&desc->irq_data);
-	if (retval < 0)
+	if (retval < 0) {
+		kfree(action);
 		return retval;
+	}
 
 	chip_bus_lock(desc);
 	retval = __setup_irq(irq, desc, action);
@@ -1985,8 +1986,10 @@
 	action->percpu_dev_id = dev_id;
 
 	retval = irq_chip_pm_get(&desc->irq_data);
-	if (retval < 0)
+	if (retval < 0) {
+		kfree(action);
 		return retval;
+	}
 
 	chip_bus_lock(desc);
 	retval = __setup_irq(irq, desc, action);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 5499935..8a3e8727 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -18,20 +18,42 @@
 /* Temparory solution for building, will be removed later */
 #include <linux/pci.h>
 
-struct msi_desc *alloc_msi_entry(struct device *dev)
+/**
+ * alloc_msi_entry - Allocate an initialize msi_entry
+ * @dev:	Pointer to the device for which this is allocated
+ * @nvec:	The number of vectors used in this entry
+ * @affinity:	Optional pointer to an affinity mask array size of @nvec
+ *
+ * If @affinity is not NULL then a an affinity array[@nvec] is allocated
+ * and the affinity masks from @affinity are copied.
+ */
+struct msi_desc *
+alloc_msi_entry(struct device *dev, int nvec, const struct cpumask *affinity)
 {
-	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	struct msi_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
 	if (!desc)
 		return NULL;
 
 	INIT_LIST_HEAD(&desc->list);
 	desc->dev = dev;
+	desc->nvec_used = nvec;
+	if (affinity) {
+		desc->affinity = kmemdup(affinity,
+			nvec * sizeof(*desc->affinity), GFP_KERNEL);
+		if (!desc->affinity) {
+			kfree(desc);
+			return NULL;
+		}
+	}
 
 	return desc;
 }
 
 void free_msi_entry(struct msi_desc *entry)
 {
+	kfree(entry->affinity);
 	kfree(entry);
 }
 
@@ -359,6 +381,17 @@
 		else
 			dev_dbg(dev, "irq [%d-%d] for MSI\n",
 				virq, virq + desc->nvec_used - 1);
+		/*
+		 * This flag is set by the PCI layer as we need to activate
+		 * the MSI entries before the PCI layer enables MSI in the
+		 * card. Otherwise the card latches a random msi message.
+		 */
+		if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
+			struct irq_data *irq_data;
+
+			irq_data = irq_domain_get_irq_data(domain, desc->irq);
+			irq_domain_activate_irq(irq_data);
+		}
 	}
 
 	return 0;
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 503bc2d..037c321 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -887,7 +887,10 @@
 	return 0;
 out:
 	vfree(pi->sechdrs);
+	pi->sechdrs = NULL;
+
 	vfree(pi->purgatory_buf);
+	pi->purgatory_buf = NULL;
 	return ret;
 }
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 9ff173d..4ab4c37 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -64,7 +64,7 @@
 static struct kthread *to_live_kthread(struct task_struct *k)
 {
 	struct completion *vfork = ACCESS_ONCE(k->vfork_done);
-	if (likely(vfork))
+	if (likely(vfork) && try_get_task_stack(k))
 		return __to_kthread(vfork);
 	return NULL;
 }
@@ -425,8 +425,10 @@
 {
 	struct kthread *kthread = to_live_kthread(k);
 
-	if (kthread)
+	if (kthread) {
 		__kthread_unpark(k, kthread);
+		put_task_stack(k);
+	}
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
 
@@ -455,6 +457,7 @@
 				wait_for_completion(&kthread->parked);
 			}
 		}
+		put_task_stack(k);
 		ret = 0;
 	}
 	return ret;
@@ -490,6 +493,7 @@
 		__kthread_unpark(k, kthread);
 		wake_up_process(k);
 		wait_for_completion(&kthread->exited);
+		put_task_stack(k);
 	}
 	ret = k->exit_code;
 	put_task_struct(k);
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 31322a4..6f88e35 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -18,7 +18,6 @@
 endif
 obj-$(CONFIG_SMP) += spinlock.o
 obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
-obj-$(CONFIG_SMP) += lglock.o
 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
 obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
deleted file mode 100644
index 951cfcd..0000000
--- a/kernel/locking/lglock.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/* See include/linux/lglock.h for description */
-#include <linux/module.h>
-#include <linux/lglock.h>
-#include <linux/cpu.h>
-#include <linux/string.h>
-
-/*
- * Note there is no uninit, so lglocks cannot be defined in
- * modules (but it's fine to use them from there)
- * Could be added though, just undo lg_lock_init
- */
-
-void lg_lock_init(struct lglock *lg, char *name)
-{
-	LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
-}
-EXPORT_SYMBOL(lg_lock_init);
-
-void lg_local_lock(struct lglock *lg)
-{
-	arch_spinlock_t *lock;
-
-	preempt_disable();
-	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
-	lock = this_cpu_ptr(lg->lock);
-	arch_spin_lock(lock);
-}
-EXPORT_SYMBOL(lg_local_lock);
-
-void lg_local_unlock(struct lglock *lg)
-{
-	arch_spinlock_t *lock;
-
-	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
-	lock = this_cpu_ptr(lg->lock);
-	arch_spin_unlock(lock);
-	preempt_enable();
-}
-EXPORT_SYMBOL(lg_local_unlock);
-
-void lg_local_lock_cpu(struct lglock *lg, int cpu)
-{
-	arch_spinlock_t *lock;
-
-	preempt_disable();
-	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
-	lock = per_cpu_ptr(lg->lock, cpu);
-	arch_spin_lock(lock);
-}
-EXPORT_SYMBOL(lg_local_lock_cpu);
-
-void lg_local_unlock_cpu(struct lglock *lg, int cpu)
-{
-	arch_spinlock_t *lock;
-
-	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
-	lock = per_cpu_ptr(lg->lock, cpu);
-	arch_spin_unlock(lock);
-	preempt_enable();
-}
-EXPORT_SYMBOL(lg_local_unlock_cpu);
-
-void lg_double_lock(struct lglock *lg, int cpu1, int cpu2)
-{
-	BUG_ON(cpu1 == cpu2);
-
-	/* lock in cpu order, just like lg_global_lock */
-	if (cpu2 < cpu1)
-		swap(cpu1, cpu2);
-
-	preempt_disable();
-	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
-	arch_spin_lock(per_cpu_ptr(lg->lock, cpu1));
-	arch_spin_lock(per_cpu_ptr(lg->lock, cpu2));
-}
-
-void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2)
-{
-	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
-	arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1));
-	arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2));
-	preempt_enable();
-}
-
-void lg_global_lock(struct lglock *lg)
-{
-	int i;
-
-	preempt_disable();
-	lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
-	for_each_possible_cpu(i) {
-		arch_spinlock_t *lock;
-		lock = per_cpu_ptr(lg->lock, i);
-		arch_spin_lock(lock);
-	}
-}
-EXPORT_SYMBOL(lg_global_lock);
-
-void lg_global_unlock(struct lglock *lg)
-{
-	int i;
-
-	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
-	for_each_possible_cpu(i) {
-		arch_spinlock_t *lock;
-		lock = per_cpu_ptr(lg->lock, i);
-		arch_spin_unlock(lock);
-	}
-	preempt_enable();
-}
-EXPORT_SYMBOL(lg_global_unlock);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index bec0b64..ce18259 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -8,152 +8,186 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 
-int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
+int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
 			const char *name, struct lock_class_key *rwsem_key)
 {
-	brw->fast_read_ctr = alloc_percpu(int);
-	if (unlikely(!brw->fast_read_ctr))
+	sem->read_count = alloc_percpu(int);
+	if (unlikely(!sem->read_count))
 		return -ENOMEM;
 
 	/* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
-	__init_rwsem(&brw->rw_sem, name, rwsem_key);
-	rcu_sync_init(&brw->rss, RCU_SCHED_SYNC);
-	atomic_set(&brw->slow_read_ctr, 0);
-	init_waitqueue_head(&brw->write_waitq);
+	rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
+	__init_rwsem(&sem->rw_sem, name, rwsem_key);
+	init_waitqueue_head(&sem->writer);
+	sem->readers_block = 0;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
 
-void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
+void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
 {
 	/*
 	 * XXX: temporary kludge. The error path in alloc_super()
 	 * assumes that percpu_free_rwsem() is safe after kzalloc().
 	 */
-	if (!brw->fast_read_ctr)
+	if (!sem->read_count)
 		return;
 
-	rcu_sync_dtor(&brw->rss);
-	free_percpu(brw->fast_read_ctr);
-	brw->fast_read_ctr = NULL; /* catch use after free bugs */
+	rcu_sync_dtor(&sem->rss);
+	free_percpu(sem->read_count);
+	sem->read_count = NULL; /* catch use after free bugs */
 }
 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
 
-/*
- * This is the fast-path for down_read/up_read. If it succeeds we rely
- * on the barriers provided by rcu_sync_enter/exit; see the comments in
- * percpu_down_write() and percpu_up_write().
- *
- * If this helper fails the callers rely on the normal rw_semaphore and
- * atomic_dec_and_test(), so in this case we have the necessary barriers.
- */
-static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
-{
-	bool success;
-
-	preempt_disable();
-	success = rcu_sync_is_idle(&brw->rss);
-	if (likely(success))
-		__this_cpu_add(*brw->fast_read_ctr, val);
-	preempt_enable();
-
-	return success;
-}
-
-/*
- * Like the normal down_read() this is not recursive, the writer can
- * come after the first percpu_down_read() and create the deadlock.
- *
- * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
- * percpu_up_read() does rwsem_release(). This pairs with the usage
- * of ->rw_sem in percpu_down/up_write().
- */
-void percpu_down_read(struct percpu_rw_semaphore *brw)
-{
-	might_sleep();
-	rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
-
-	if (likely(update_fast_ctr(brw, +1)))
-		return;
-
-	/* Avoid rwsem_acquire_read() and rwsem_release() */
-	__down_read(&brw->rw_sem);
-	atomic_inc(&brw->slow_read_ctr);
-	__up_read(&brw->rw_sem);
-}
-EXPORT_SYMBOL_GPL(percpu_down_read);
-
-int percpu_down_read_trylock(struct percpu_rw_semaphore *brw)
-{
-	if (unlikely(!update_fast_ctr(brw, +1))) {
-		if (!__down_read_trylock(&brw->rw_sem))
-			return 0;
-		atomic_inc(&brw->slow_read_ctr);
-		__up_read(&brw->rw_sem);
-	}
-
-	rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 1, _RET_IP_);
-	return 1;
-}
-
-void percpu_up_read(struct percpu_rw_semaphore *brw)
-{
-	rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
-
-	if (likely(update_fast_ctr(brw, -1)))
-		return;
-
-	/* false-positive is possible but harmless */
-	if (atomic_dec_and_test(&brw->slow_read_ctr))
-		wake_up_all(&brw->write_waitq);
-}
-EXPORT_SYMBOL_GPL(percpu_up_read);
-
-static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
-{
-	unsigned int sum = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		sum += per_cpu(*brw->fast_read_ctr, cpu);
-		per_cpu(*brw->fast_read_ctr, cpu) = 0;
-	}
-
-	return sum;
-}
-
-void percpu_down_write(struct percpu_rw_semaphore *brw)
+int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
 {
 	/*
-	 * Make rcu_sync_is_idle() == F and thus disable the fast-path in
-	 * percpu_down_read() and percpu_up_read(), and wait for gp pass.
+	 * Due to having preemption disabled the decrement happens on
+	 * the same CPU as the increment, avoiding the
+	 * increment-on-one-CPU-and-decrement-on-another problem.
 	 *
-	 * The latter synchronises us with the preceding readers which used
-	 * the fast-past, so we can not miss the result of __this_cpu_add()
-	 * or anything else inside their criticial sections.
+	 * If the reader misses the writer's assignment of readers_block, then
+	 * the writer is guaranteed to see the reader's increment.
+	 *
+	 * Conversely, any readers that increment their sem->read_count after
+	 * the writer looks are guaranteed to see the readers_block value,
+	 * which in turn means that they are guaranteed to immediately
+	 * decrement their sem->read_count, so that it doesn't matter that the
+	 * writer missed them.
 	 */
-	rcu_sync_enter(&brw->rss);
 
-	/* exclude other writers, and block the new readers completely */
-	down_write(&brw->rw_sem);
+	smp_mb(); /* A matches D */
 
-	/* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
-	atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
+	/*
+	 * If !readers_block the critical section starts here, matched by the
+	 * release in percpu_up_write().
+	 */
+	if (likely(!smp_load_acquire(&sem->readers_block)))
+		return 1;
 
-	/* wait for all readers to complete their percpu_up_read() */
-	wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
+	/*
+	 * Per the above comment; we still have preemption disabled and
+	 * will thus decrement on the same CPU as we incremented.
+	 */
+	__percpu_up_read(sem);
+
+	if (try)
+		return 0;
+
+	/*
+	 * We either call schedule() in the wait, or we'll fall through
+	 * and reschedule on the preempt_enable() in percpu_down_read().
+	 */
+	preempt_enable_no_resched();
+
+	/*
+	 * Avoid lockdep for the down/up_read() we already have them.
+	 */
+	__down_read(&sem->rw_sem);
+	this_cpu_inc(*sem->read_count);
+	__up_read(&sem->rw_sem);
+
+	preempt_disable();
+	return 1;
+}
+EXPORT_SYMBOL_GPL(__percpu_down_read);
+
+void __percpu_up_read(struct percpu_rw_semaphore *sem)
+{
+	smp_mb(); /* B matches C */
+	/*
+	 * In other words, if they see our decrement (presumably to aggregate
+	 * zero, as that is the only time it matters) they will also see our
+	 * critical section.
+	 */
+	__this_cpu_dec(*sem->read_count);
+
+	/* Prod writer to recheck readers_active */
+	wake_up(&sem->writer);
+}
+EXPORT_SYMBOL_GPL(__percpu_up_read);
+
+#define per_cpu_sum(var)						\
+({									\
+	typeof(var) __sum = 0;						\
+	int cpu;							\
+	compiletime_assert_atomic_type(__sum);				\
+	for_each_possible_cpu(cpu)					\
+		__sum += per_cpu(var, cpu);				\
+	__sum;								\
+})
+
+/*
+ * Return true if the modular sum of the sem->read_count per-CPU variable is
+ * zero.  If this sum is zero, then it is stable due to the fact that if any
+ * newly arriving readers increment a given counter, they will immediately
+ * decrement that same counter.
+ */
+static bool readers_active_check(struct percpu_rw_semaphore *sem)
+{
+	if (per_cpu_sum(*sem->read_count) != 0)
+		return false;
+
+	/*
+	 * If we observed the decrement; ensure we see the entire critical
+	 * section.
+	 */
+
+	smp_mb(); /* C matches B */
+
+	return true;
+}
+
+void percpu_down_write(struct percpu_rw_semaphore *sem)
+{
+	/* Notify readers to take the slow path. */
+	rcu_sync_enter(&sem->rss);
+
+	down_write(&sem->rw_sem);
+
+	/*
+	 * Notify new readers to block; up until now, and thus throughout the
+	 * longish rcu_sync_enter() above, new readers could still come in.
+	 */
+	WRITE_ONCE(sem->readers_block, 1);
+
+	smp_mb(); /* D matches A */
+
+	/*
+	 * If they don't see our writer of readers_block, then we are
+	 * guaranteed to see their sem->read_count increment, and therefore
+	 * will wait for them.
+	 */
+
+	/* Wait for all now active readers to complete. */
+	wait_event(sem->writer, readers_active_check(sem));
 }
 EXPORT_SYMBOL_GPL(percpu_down_write);
 
-void percpu_up_write(struct percpu_rw_semaphore *brw)
+void percpu_up_write(struct percpu_rw_semaphore *sem)
 {
-	/* release the lock, but the readers can't use the fast-path */
-	up_write(&brw->rw_sem);
 	/*
-	 * Enable the fast-path in percpu_down_read() and percpu_up_read()
-	 * but only after another gp pass; this adds the necessary barrier
-	 * to ensure the reader can't miss the changes done by us.
+	 * Signal the writer is done, no fast path yet.
+	 *
+	 * One reason that we cannot just immediately flip to readers_fast is
+	 * that new readers might fail to see the results of this writer's
+	 * critical section.
+	 *
+	 * Therefore we force it through the slow path which guarantees an
+	 * acquire and thereby guarantees the critical section's consistency.
 	 */
-	rcu_sync_exit(&brw->rss);
+	smp_store_release(&sem->readers_block, 0);
+
+	/*
+	 * Release the write lock, this will allow readers back in the game.
+	 */
+	up_write(&sem->rw_sem);
+
+	/*
+	 * Once this completes (at least one RCU-sched grace period hence) the
+	 * reader fast path will be available again. Safe to use outside the
+	 * exclusive write lock because its counting.
+	 */
+	rcu_sync_exit(&sem->rss);
 }
 EXPORT_SYMBOL_GPL(percpu_up_write);
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 37649e6..e3b5520 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -70,11 +70,14 @@
 static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
 {
 	struct __qspinlock *l = (void *)lock;
-	int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
-		   (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
 
-	qstat_inc(qstat_pv_lock_stealing, ret);
-	return ret;
+	if (!(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
+	    (cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0)) {
+		qstat_inc(qstat_pv_lock_stealing, true);
+		return true;
+	}
+
+	return false;
 }
 
 /*
@@ -257,7 +260,6 @@
 static inline bool
 pv_wait_early(struct pv_node *prev, int loop)
 {
-
 	if ((loop & PV_PREV_CHECK_MASK) != 0)
 		return false;
 
@@ -286,12 +288,10 @@
 {
 	struct pv_node *pn = (struct pv_node *)node;
 	struct pv_node *pp = (struct pv_node *)prev;
-	int waitcnt = 0;
 	int loop;
 	bool wait_early;
 
-	/* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
-	for (;; waitcnt++) {
+	for (;;) {
 		for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
 			if (READ_ONCE(node->locked))
 				return;
@@ -315,7 +315,6 @@
 
 		if (!READ_ONCE(node->locked)) {
 			qstat_inc(qstat_pv_wait_node, true);
-			qstat_inc(qstat_pv_wait_again, waitcnt);
 			qstat_inc(qstat_pv_wait_early, wait_early);
 			pv_wait(&pn->state, vcpu_halted);
 		}
@@ -450,18 +449,15 @@
 				goto gotlock;
 			}
 		}
-		WRITE_ONCE(pn->state, vcpu_halted);
+		WRITE_ONCE(pn->state, vcpu_hashed);
 		qstat_inc(qstat_pv_wait_head, true);
 		qstat_inc(qstat_pv_wait_again, waitcnt);
 		pv_wait(&l->locked, _Q_SLOW_VAL);
 
 		/*
-		 * The unlocker should have freed the lock before kicking the
-		 * CPU. So if the lock is still not free, it is a spurious
-		 * wakeup or another vCPU has stolen the lock. The current
-		 * vCPU should spin again.
+		 * Because of lock stealing, the queue head vCPU may not be
+		 * able to acquire the lock before it has to wait again.
 		 */
-		qstat_inc(qstat_pv_spurious_wakeup, READ_ONCE(l->locked));
 	}
 
 	/*
@@ -544,7 +540,7 @@
 	 * unhash. Otherwise it would be possible to have multiple @lock
 	 * entries, which would be BAD.
 	 */
-	locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
+	locked = cmpxchg_release(&l->locked, _Q_LOCKED_VAL, 0);
 	if (likely(locked == _Q_LOCKED_VAL))
 		return;
 
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 22e0253..eb0a599 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -24,8 +24,8 @@
  *   pv_latency_wake	- average latency (ns) from vCPU kick to wakeup
  *   pv_lock_slowpath	- # of locking operations via the slowpath
  *   pv_lock_stealing	- # of lock stealing operations
- *   pv_spurious_wakeup	- # of spurious wakeups
- *   pv_wait_again	- # of vCPU wait's that happened after a vCPU kick
+ *   pv_spurious_wakeup	- # of spurious wakeups in non-head vCPUs
+ *   pv_wait_again	- # of wait's after a queue head vCPU kick
  *   pv_wait_early	- # of early vCPU wait's
  *   pv_wait_head	- # of vCPU wait's at the queue head
  *   pv_wait_node	- # of vCPU wait's at a non-head queue node
@@ -153,7 +153,6 @@
 		 */
 		if ((counter == qstat_pv_latency_kick) ||
 		    (counter == qstat_pv_latency_wake)) {
-			stat = 0;
 			if (kicks)
 				stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
 		}
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 447e08d..2337b4b 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -121,16 +121,19 @@
  * - woken process blocks are discarded from the list after having task zeroed
  * - writers are only marked woken if downgrading is false
  */
-static struct rw_semaphore *
-__rwsem_mark_wake(struct rw_semaphore *sem,
-		  enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
+static void __rwsem_mark_wake(struct rw_semaphore *sem,
+			      enum rwsem_wake_type wake_type,
+			      struct wake_q_head *wake_q)
 {
-	struct rwsem_waiter *waiter;
-	struct task_struct *tsk;
-	struct list_head *next;
-	long oldcount, woken, loop, adjustment;
+	struct rwsem_waiter *waiter, *tmp;
+	long oldcount, woken = 0, adjustment = 0;
 
-	waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
+	/*
+	 * Take a peek at the queue head waiter such that we can determine
+	 * the wakeup(s) to perform.
+	 */
+	waiter = list_first_entry(&sem->wait_list, struct rwsem_waiter, list);
+
 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
 		if (wake_type == RWSEM_WAKE_ANY) {
 			/*
@@ -142,19 +145,19 @@
 			 */
 			wake_q_add(wake_q, waiter->task);
 		}
-		goto out;
+
+		return;
 	}
 
-	/* Writers might steal the lock before we grant it to the next reader.
+	/*
+	 * Writers might steal the lock before we grant it to the next reader.
 	 * We prefer to do the first reader grant before counting readers
 	 * so we can bail out early if a writer stole the lock.
 	 */
-	adjustment = 0;
 	if (wake_type != RWSEM_WAKE_READ_OWNED) {
 		adjustment = RWSEM_ACTIVE_READ_BIAS;
  try_reader_grant:
 		oldcount = atomic_long_fetch_add(adjustment, &sem->count);
-
 		if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
 			/*
 			 * If the count is still less than RWSEM_WAITING_BIAS
@@ -164,7 +167,8 @@
 			 */
 			if (atomic_long_add_return(-adjustment, &sem->count) <
 			    RWSEM_WAITING_BIAS)
-				goto out;
+				return;
+
 			/* Last active locker left. Retry waking readers. */
 			goto try_reader_grant;
 		}
@@ -176,38 +180,23 @@
 		rwsem_set_reader_owned(sem);
 	}
 
-	/* Grant an infinite number of read locks to the readers at the front
-	 * of the queue.  Note we increment the 'active part' of the count by
-	 * the number of readers before waking any processes up.
+	/*
+	 * Grant an infinite number of read locks to the readers at the front
+	 * of the queue. We know that woken will be at least 1 as we accounted
+	 * for above. Note we increment the 'active part' of the count by the
+	 * number of readers before waking any processes up.
 	 */
-	woken = 0;
-	do {
-		woken++;
+	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
+		struct task_struct *tsk;
 
-		if (waiter->list.next == &sem->wait_list)
+		if (waiter->type == RWSEM_WAITING_FOR_WRITE)
 			break;
 
-		waiter = list_entry(waiter->list.next,
-					struct rwsem_waiter, list);
-
-	} while (waiter->type != RWSEM_WAITING_FOR_WRITE);
-
-	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
-	if (waiter->type != RWSEM_WAITING_FOR_WRITE)
-		/* hit end of list above */
-		adjustment -= RWSEM_WAITING_BIAS;
-
-	if (adjustment)
-		atomic_long_add(adjustment, &sem->count);
-
-	next = sem->wait_list.next;
-	loop = woken;
-	do {
-		waiter = list_entry(next, struct rwsem_waiter, list);
-		next = waiter->list.next;
+		woken++;
 		tsk = waiter->task;
 
 		wake_q_add(wake_q, tsk);
+		list_del(&waiter->list);
 		/*
 		 * Ensure that the last operation is setting the reader
 		 * waiter to nil such that rwsem_down_read_failed() cannot
@@ -215,13 +204,16 @@
 		 * to the task to wakeup.
 		 */
 		smp_store_release(&waiter->task, NULL);
-	} while (--loop);
+	}
 
-	sem->wait_list.next = next;
-	next->prev = &sem->wait_list;
+	adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+	if (list_empty(&sem->wait_list)) {
+		/* hit end of list above */
+		adjustment -= RWSEM_WAITING_BIAS;
+	}
 
- out:
-	return sem;
+	if (adjustment)
+		atomic_long_add(adjustment, &sem->count);
 }
 
 /*
@@ -235,7 +227,6 @@
 	struct task_struct *tsk = current;
 	WAKE_Q(wake_q);
 
-	/* set up my own style of waitqueue */
 	waiter.task = tsk;
 	waiter.type = RWSEM_WAITING_FOR_READ;
 
@@ -247,7 +238,8 @@
 	/* we're now waiting on the lock, but no longer actively locking */
 	count = atomic_long_add_return(adjustment, &sem->count);
 
-	/* If there are no active locks, wake the front queued process(es).
+	/*
+	 * If there are no active locks, wake the front queued process(es).
 	 *
 	 * If there are no writers and we are first in the queue,
 	 * wake our own waiter to join the existing active readers !
@@ -255,7 +247,7 @@
 	if (count == RWSEM_WAITING_BIAS ||
 	    (count > RWSEM_WAITING_BIAS &&
 	     adjustment != -RWSEM_ACTIVE_READ_BIAS))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irq(&sem->wait_lock);
 	wake_up_q(&wake_q);
@@ -505,7 +497,7 @@
 		if (count > RWSEM_WAITING_BIAS) {
 			WAKE_Q(wake_q);
 
-			sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+			__rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
 			/*
 			 * The wakeup is normally called _after_ the wait_lock
 			 * is released, but given that we are proactively waking
@@ -614,9 +606,8 @@
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 locked:
 
-	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	wake_up_q(&wake_q);
@@ -638,9 +629,8 @@
 
 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
 
-	/* do nothing if list empty */
 	if (!list_empty(&sem->wait_list))
-		sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
+		__rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
 
 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
 	wake_up_q(&wake_q);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 251d16b..b501e39 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -247,6 +247,7 @@
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(resource_size(res), SECTION_SIZE);
 	arch_remove_memory(align_start, align_size);
+	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
 	pgmap_radix_release(res);
 	dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
 			"%s: failed to free all reserved pages\n", __func__);
@@ -282,6 +283,7 @@
 		struct percpu_ref *ref, struct vmem_altmap *altmap)
 {
 	resource_size_t key, align_start, align_size, align_end;
+	pgprot_t pgprot = PAGE_KERNEL;
 	struct dev_pagemap *pgmap;
 	struct page_map *page_map;
 	int error, nid, is_ram;
@@ -351,6 +353,11 @@
 	if (nid < 0)
 		nid = numa_mem_id();
 
+	error = track_pfn_remap(NULL, &pgprot, PHYS_PFN(align_start), 0,
+			align_size);
+	if (error)
+		goto err_pfn_remap;
+
 	error = arch_add_memory(nid, align_start, align_size, true);
 	if (error)
 		goto err_add_memory;
@@ -371,6 +378,8 @@
 	return __va(res->start);
 
  err_add_memory:
+	untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
+ err_pfn_remap:
  err_radix:
 	pgmap_radix_release(res);
 	devres_free(page_map);
diff --git a/kernel/padata.c b/kernel/padata.c
index 9932788..7848f05 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -30,6 +30,7 @@
 #include <linux/slab.h>
 #include <linux/sysfs.h>
 #include <linux/rcupdate.h>
+#include <linux/module.h>
 
 #define MAX_OBJ_NUM 1000
 
@@ -769,52 +770,43 @@
 		cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
 }
 
-
-static int padata_cpu_callback(struct notifier_block *nfb,
-			       unsigned long action, void *hcpu)
+static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
 {
-	int err;
 	struct padata_instance *pinst;
-	int cpu = (unsigned long)hcpu;
+	int ret;
 
-	pinst = container_of(nfb, struct padata_instance, cpu_notifier);
+	pinst = hlist_entry_safe(node, struct padata_instance, node);
+	if (!pinst_has_cpu(pinst, cpu))
+		return 0;
 
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
-		if (!pinst_has_cpu(pinst, cpu))
-			break;
-		mutex_lock(&pinst->lock);
-		err = __padata_add_cpu(pinst, cpu);
-		mutex_unlock(&pinst->lock);
-		if (err)
-			return notifier_from_errno(err);
-		break;
-
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-		if (!pinst_has_cpu(pinst, cpu))
-			break;
-		mutex_lock(&pinst->lock);
-		err = __padata_remove_cpu(pinst, cpu);
-		mutex_unlock(&pinst->lock);
-		if (err)
-			return notifier_from_errno(err);
-		break;
-	}
-
-	return NOTIFY_OK;
+	mutex_lock(&pinst->lock);
+	ret = __padata_add_cpu(pinst, cpu);
+	mutex_unlock(&pinst->lock);
+	return ret;
 }
+
+static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node)
+{
+	struct padata_instance *pinst;
+	int ret;
+
+	pinst = hlist_entry_safe(node, struct padata_instance, node);
+	if (!pinst_has_cpu(pinst, cpu))
+		return 0;
+
+	mutex_lock(&pinst->lock);
+	ret = __padata_remove_cpu(pinst, cpu);
+	mutex_unlock(&pinst->lock);
+	return ret;
+}
+
+static enum cpuhp_state hp_online;
 #endif
 
 static void __padata_free(struct padata_instance *pinst)
 {
 #ifdef CONFIG_HOTPLUG_CPU
-	unregister_hotcpu_notifier(&pinst->cpu_notifier);
+	cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
 #endif
 
 	padata_stop(pinst);
@@ -1012,11 +1004,8 @@
 	mutex_init(&pinst->lock);
 
 #ifdef CONFIG_HOTPLUG_CPU
-	pinst->cpu_notifier.notifier_call = padata_cpu_callback;
-	pinst->cpu_notifier.priority = 0;
-	register_hotcpu_notifier(&pinst->cpu_notifier);
+	cpuhp_state_add_instance_nocalls(hp_online, &pinst->node);
 #endif
-
 	return pinst;
 
 err_free_masks:
@@ -1039,3 +1028,26 @@
 	kobject_put(&pinst->kobj);
 }
 EXPORT_SYMBOL(padata_free);
+
+#ifdef CONFIG_HOTPLUG_CPU
+
+static __init int padata_driver_init(void)
+{
+	int ret;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
+				      padata_cpu_online,
+				      padata_cpu_prep_down);
+	if (ret < 0)
+		return ret;
+	hp_online = ret;
+	return 0;
+}
+module_init(padata_driver_init);
+
+static __exit void padata_driver_exit(void)
+{
+	cpuhp_remove_multi_state(hp_online);
+}
+module_exit(padata_driver_exit);
+#endif
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 68d3ebc..e8517b6 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -186,7 +186,7 @@
 
 config DPM_WATCHDOG
 	bool "Device suspend/resume watchdog"
-	depends on PM_DEBUG && PSTORE
+	depends on PM_DEBUG && PSTORE && EXPERT
 	---help---
 	  Sets up a watchdog timer to capture drivers that are
 	  locked up attempting to suspend/resume a device.
@@ -197,7 +197,7 @@
 config DPM_WATCHDOG_TIMEOUT
 	int "Watchdog timeout in seconds"
 	range 1 120
-	default 60
+	default 120
 	depends on DPM_WATCHDOG
 
 config PM_TRACE
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index a881c6a..b26dbc4 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -300,14 +300,16 @@
 	save_processor_state();
 	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
 	error = swsusp_arch_suspend();
+	/* Restore control flow magically appears here */
+	restore_processor_state();
 	trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
 	if (error)
 		printk(KERN_ERR "PM: Error %d creating hibernation image\n",
 			error);
-	/* Restore control flow magically appears here */
-	restore_processor_state();
-	if (!in_suspend)
+	if (!in_suspend) {
 		events_check_enabled = false;
+		clear_free_pages();
+	}
 
 	platform_leave(platform_mode);
 
@@ -1189,22 +1191,6 @@
 	return 1;
 }
 
-static int __init page_poison_nohibernate_setup(char *str)
-{
-#ifdef CONFIG_PAGE_POISONING_ZERO
-	/*
-	 * The zeroing option for page poison skips the checks on alloc.
-	 * since hibernation doesn't save free pages there's no way to
-	 * guarantee the pages will still be zeroed.
-	 */
-	if (!strcmp(str, "on")) {
-		pr_info("Disabling hibernation due to page poisoning\n");
-		return nohibernate_setup(str);
-	}
-#endif
-	return 1;
-}
-
 __setup("noresume", noresume_setup);
 __setup("resume_offset=", resume_offset_setup);
 __setup("resume=", resume_setup);
@@ -1212,4 +1198,3 @@
 __setup("resumewait", resumewait_setup);
 __setup("resumedelay=", resumedelay_setup);
 __setup("nohibernate", nohibernate_setup);
-__setup("page_poison=", page_poison_nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 5ea50b1..281a697 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -644,6 +644,7 @@
 		return error;
 	hibernate_image_size_init();
 	hibernate_reserved_size_init();
+	pm_states_init();
 	power_kobj = kobject_create_and_add("power", NULL);
 	if (!power_kobj)
 		return -ENOMEM;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 242d8b8..56d1d0d 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -110,6 +110,8 @@
 extern void free_basic_memory_bitmaps(void);
 extern int hibernate_preallocate_memory(void);
 
+extern void clear_free_pages(void);
+
 /**
  *	Auxiliary structure used for reading the snapshot image data and
  *	metadata from and writing them to the list of page backup entries
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 97b0df7..168ff44 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -482,7 +482,16 @@
 		return;
 	}
 
-	cancel_delayed_work_sync(&req->work);
+	/*
+	 * This function may be called very early during boot, for example,
+	 * from of_clk_init(), where irq needs to stay disabled.
+	 * cancel_delayed_work_sync() assumes that irq is enabled on
+	 * invocation and re-enables it on return.  Avoid calling it until
+	 * workqueue is initialized.
+	 */
+	if (keventd_up())
+		cancel_delayed_work_sync(&req->work);
+
 	__pm_qos_update_request(req, new_value);
 }
 EXPORT_SYMBOL_GPL(pm_qos_update_request);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 9a0178c..4f0f060 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -835,9 +835,9 @@
  */
 static bool rtree_next_node(struct memory_bitmap *bm)
 {
-	bm->cur.node = list_entry(bm->cur.node->list.next,
-				  struct rtree_node, list);
-	if (&bm->cur.node->list != &bm->cur.zone->leaves) {
+	if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
+		bm->cur.node = list_entry(bm->cur.node->list.next,
+					  struct rtree_node, list);
 		bm->cur.node_pfn += BM_BITS_PER_BLOCK;
 		bm->cur.node_bit  = 0;
 		touch_softlockup_watchdog();
@@ -845,9 +845,9 @@
 	}
 
 	/* No more nodes, goto next zone */
-	bm->cur.zone = list_entry(bm->cur.zone->list.next,
+	if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
+		bm->cur.zone = list_entry(bm->cur.zone->list.next,
 				  struct mem_zone_bm_rtree, list);
-	if (&bm->cur.zone->list != &bm->zones) {
 		bm->cur.node = list_entry(bm->cur.zone->leaves.next,
 					  struct rtree_node, list);
 		bm->cur.node_pfn = 0;
@@ -1132,6 +1132,28 @@
 	pr_debug("PM: Basic memory bitmaps freed\n");
 }
 
+void clear_free_pages(void)
+{
+#ifdef CONFIG_PAGE_POISONING_ZERO
+	struct memory_bitmap *bm = free_pages_map;
+	unsigned long pfn;
+
+	if (WARN_ON(!(free_pages_map)))
+		return;
+
+	memory_bm_position_reset(bm);
+	pfn = memory_bm_next_pfn(bm);
+	while (pfn != BM_END_OF_MAP) {
+		if (pfn_valid(pfn))
+			clear_highpage(pfn_to_page(pfn));
+
+		pfn = memory_bm_next_pfn(bm);
+	}
+	memory_bm_position_reset(bm);
+	pr_info("PM: free pages cleared after restore\n");
+#endif /* PAGE_POISONING_ZERO */
+}
+
 /**
  * snapshot_additional_pages - Estimate the number of extra pages needed.
  * @zone: Memory zone to carry out the computation for.
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 0acab9d..1e7f5da 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -118,10 +118,18 @@
  */
 static bool relative_states;
 
+void __init pm_states_init(void)
+{
+	/*
+	 * freeze state should be supported even without any suspend_ops,
+	 * initialize pm_states accordingly here
+	 */
+	pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2];
+}
+
 static int __init sleep_states_setup(char *str)
 {
 	relative_states = !strncmp(str, "1", 1);
-	pm_states[PM_SUSPEND_FREEZE] = pm_labels[relative_states ? 0 : 2];
 	return 1;
 }
 
@@ -211,7 +219,7 @@
 {
 	if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin)
 		return freeze_ops->begin();
-	else if (suspend_ops->begin)
+	else if (suspend_ops && suspend_ops->begin)
 		return suspend_ops->begin(state);
 	else
 		return 0;
@@ -221,7 +229,7 @@
 {
 	if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end)
 		freeze_ops->end();
-	else if (suspend_ops->end)
+	else if (suspend_ops && suspend_ops->end)
 		suspend_ops->end();
 }
 
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
index 276762f..d5760c4 100644
--- a/kernel/printk/braille.c
+++ b/kernel/printk/braille.c
@@ -9,10 +9,10 @@
 
 char *_braille_console_setup(char **str, char **brl_options)
 {
-	if (!memcmp(*str, "brl,", 4)) {
+	if (!strncmp(*str, "brl,", 4)) {
 		*brl_options = "";
 		*str += 4;
-	} else if (!memcmp(str, "brl=", 4)) {
+	} else if (!strncmp(*str, "brl=", 4)) {
 		*brl_options = *str + 4;
 		*str = strchr(*brl_options, ',');
 		if (!*str)
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 5d4505f..7fd2838 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -16,11 +16,9 @@
  */
 #include <linux/percpu.h>
 
-typedef __printf(2, 0) int (*printk_func_t)(int level, const char *fmt,
-					    va_list args);
+typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
 
-__printf(2, 0)
-int vprintk_default(int level, const char *fmt, va_list args);
+int __printf(1, 0) vprintk_default(const char *fmt, va_list args);
 
 #ifdef CONFIG_PRINTK_NMI
 
@@ -33,10 +31,9 @@
  * via per-CPU variable.
  */
 DECLARE_PER_CPU(printk_func_t, printk_func);
-__printf(2, 0)
-static inline int vprintk_func(int level, const char *fmt, va_list args)
+static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
 {
-	return this_cpu_read(printk_func)(level, fmt, args);
+	return this_cpu_read(printk_func)(fmt, args);
 }
 
 extern atomic_t nmi_message_lost;
@@ -47,10 +44,9 @@
 
 #else /* CONFIG_PRINTK_NMI */
 
-__printf(2, 0)
-static inline int vprintk_func(int level, const char *fmt, va_list args)
+static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
 {
-	return vprintk_default(level, fmt, args);
+	return vprintk_default(fmt, args);
 }
 
 static inline int get_nmi_message_lost(void)
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
index bc3eeb1..16bab47 100644
--- a/kernel/printk/nmi.c
+++ b/kernel/printk/nmi.c
@@ -58,7 +58,7 @@
  * one writer running. But the buffer might get flushed from another
  * CPU, so we need to be careful.
  */
-static int vprintk_nmi(int level, const char *fmt, va_list args)
+static int vprintk_nmi(const char *fmt, va_list args)
 {
 	struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
 	int add = 0;
@@ -79,16 +79,7 @@
 	if (!len)
 		smp_rmb();
 
-	if (level != LOGLEVEL_DEFAULT) {
-		add = snprintf(s->buffer + len, sizeof(s->buffer) - len,
-				KERN_SOH "%c", '0' + level);
-		add += vsnprintf(s->buffer + len + add,
-				 sizeof(s->buffer) - len - add,
-				 fmt, args);
-	} else {
-		add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len,
-				fmt, args);
-	}
+	add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
 
 	/*
 	 * Do it once again if the buffer has been flushed in the meantime.
@@ -108,27 +99,33 @@
 	return add;
 }
 
-/*
- * printk one line from the temporary buffer from @start index until
- * and including the @end index.
- */
-static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end)
+static void printk_nmi_flush_line(const char *text, int len)
 {
-	const char *buf = s->buffer + start;
-
 	/*
 	 * The buffers are flushed in NMI only on panic.  The messages must
 	 * go only into the ring buffer at this stage.  Consoles will get
 	 * explicitly called later when a crashdump is not generated.
 	 */
 	if (in_nmi())
-		printk_deferred("%.*s", (end - start) + 1, buf);
+		printk_deferred("%.*s", len, text);
 	else
-		printk("%.*s", (end - start) + 1, buf);
+		printk("%.*s", len, text);
 
 }
 
 /*
+ * printk one line from the temporary buffer from @start index until
+ * and including the @end index.
+ */
+static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s,
+					int start, int end)
+{
+	const char *buf = s->buffer + start;
+
+	printk_nmi_flush_line(buf, (end - start) + 1);
+}
+
+/*
  * Flush data from the associated per_CPU buffer. The function
  * can be called either via IRQ work or independently.
  */
@@ -159,9 +156,11 @@
 	 * the buffer an unexpected way. If we printed something then
 	 * @len must only increase.
 	 */
-	if (i && i >= len)
-		pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n",
-		       i, len);
+	if (i && i >= len) {
+		const char *msg = "printk_nmi_flush: internal error\n";
+
+		printk_nmi_flush_line(msg, strlen(msg));
+	}
 
 	if (!len)
 		goto out; /* Someone else has already flushed the buffer. */
@@ -175,14 +174,14 @@
 	/* Print line by line. */
 	for (; i < size; i++) {
 		if (s->buffer[i] == '\n') {
-			print_nmi_seq_line(s, last_i, i);
+			printk_nmi_flush_seq_line(s, last_i, i);
 			last_i = i + 1;
 		}
 	}
 	/* Check if there was a partial line. */
 	if (last_i < size) {
-		print_nmi_seq_line(s, last_i, size - 1);
-		pr_cont("\n");
+		printk_nmi_flush_seq_line(s, last_i, size - 1);
+		printk_nmi_flush_line("\n", strlen("\n"));
 	}
 
 	/*
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a37fc8c..eea6dbc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1930,26 +1930,7 @@
 }
 EXPORT_SYMBOL(printk_emit);
 
-#define define_pr_level(func, loglevel)				\
-asmlinkage __visible void func(const char *fmt, ...)		\
-{								\
-	va_list args;						\
-								\
-	va_start(args, fmt);					\
-	vprintk_default(loglevel, fmt, args);			\
-	va_end(args);						\
-}								\
-EXPORT_SYMBOL(func)
-
-define_pr_level(__pr_emerg, LOGLEVEL_EMERG);
-define_pr_level(__pr_alert, LOGLEVEL_ALERT);
-define_pr_level(__pr_crit, LOGLEVEL_CRIT);
-define_pr_level(__pr_err, LOGLEVEL_ERR);
-define_pr_level(__pr_warn, LOGLEVEL_WARNING);
-define_pr_level(__pr_notice, LOGLEVEL_NOTICE);
-define_pr_level(__pr_info, LOGLEVEL_INFO);
-
-int vprintk_default(int level, const char *fmt, va_list args)
+int vprintk_default(const char *fmt, va_list args)
 {
 	int r;
 
@@ -1959,7 +1940,7 @@
 		return r;
 	}
 #endif
-	r = vprintk_emit(0, level, NULL, 0, fmt, args);
+	r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
 
 	return r;
 }
@@ -1992,7 +1973,7 @@
 	int r;
 
 	va_start(args, fmt);
-	r = vprintk_func(LOGLEVEL_DEFAULT, fmt, args);
+	r = vprintk_func(fmt, args);
 	va_end(args);
 
 	return r;
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index d38ab08..123ccbd 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -52,7 +52,7 @@
 
 #define PERF_FLAG "-perf:"
 #define PERFOUT_STRING(s) \
-	pr_alert("%s" PERF_FLAG s "\n", perf_type)
+	pr_alert("%s" PERF_FLAG " %s\n", perf_type, s)
 #define VERBOSE_PERFOUT_STRING(s) \
 	do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
 #define VERBOSE_PERFOUT_ERRSTRING(s) \
@@ -400,9 +400,8 @@
 			sp.sched_priority = 0;
 			sched_setscheduler_nocheck(current,
 						   SCHED_NORMAL, &sp);
-			pr_alert("%s" PERF_FLAG
-				 "rcu_perf_writer %ld has %d measurements\n",
-				 perf_type, me, MIN_MEAS);
+			pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n",
+				 perf_type, PERF_FLAG, me, MIN_MEAS);
 			if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
 			    nrealwriters) {
 				schedule_timeout_interruptible(10);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 971e2b1..bf08fee 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1238,6 +1238,7 @@
 	long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 	long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 	static unsigned long rtcv_snap = ULONG_MAX;
+	struct task_struct *wtp;
 
 	for_each_possible_cpu(cpu) {
 		for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
@@ -1258,8 +1259,9 @@
 		atomic_read(&n_rcu_torture_alloc),
 		atomic_read(&n_rcu_torture_alloc_fail),
 		atomic_read(&n_rcu_torture_free));
-	pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ",
+	pr_cont("rtmbe: %d rtbe: %ld rtbke: %ld rtbre: %ld ",
 		atomic_read(&n_rcu_torture_mberror),
+		n_rcu_torture_barrier_error,
 		n_rcu_torture_boost_ktrerror,
 		n_rcu_torture_boost_rterror);
 	pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
@@ -1312,10 +1314,12 @@
 
 		rcutorture_get_gp_data(cur_ops->ttype,
 				       &flags, &gpnum, &completed);
-		pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x\n",
+		wtp = READ_ONCE(writer_task);
+		pr_alert("??? Writer stall state %s(%d) g%lu c%lu f%#x ->state %#lx\n",
 			 rcu_torture_writer_state_getname(),
 			 rcu_torture_writer_state,
-			 gpnum, completed, flags);
+			 gpnum, completed, flags,
+			 wtp == NULL ? ~0UL : wtp->state);
 		show_rcu_gp_kthreads();
 		rcu_ftrace_dump(DUMP_ALL);
 	}
@@ -1362,12 +1366,12 @@
 		 onoff_interval, onoff_holdoff);
 }
 
-static void rcutorture_booster_cleanup(int cpu)
+static int rcutorture_booster_cleanup(unsigned int cpu)
 {
 	struct task_struct *t;
 
 	if (boost_tasks[cpu] == NULL)
-		return;
+		return 0;
 	mutex_lock(&boost_mutex);
 	t = boost_tasks[cpu];
 	boost_tasks[cpu] = NULL;
@@ -1375,9 +1379,10 @@
 
 	/* This must be outside of the mutex, otherwise deadlock! */
 	torture_stop_kthread(rcu_torture_boost, t);
+	return 0;
 }
 
-static int rcutorture_booster_init(int cpu)
+static int rcutorture_booster_init(unsigned int cpu)
 {
 	int retval;
 
@@ -1577,28 +1582,7 @@
 	}
 }
 
-static int rcutorture_cpu_notify(struct notifier_block *self,
-				 unsigned long action, void *hcpu)
-{
-	long cpu = (long)hcpu;
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		(void)rcutorture_booster_init(cpu);
-		break;
-	case CPU_DOWN_PREPARE:
-		rcutorture_booster_cleanup(cpu);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block rcutorture_cpu_nb = {
-	.notifier_call = rcutorture_cpu_notify,
-};
+static enum cpuhp_state rcutor_hp;
 
 static void
 rcu_torture_cleanup(void)
@@ -1638,11 +1622,8 @@
 	for (i = 0; i < ncbflooders; i++)
 		torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]);
 	if ((test_boost == 1 && cur_ops->can_boost) ||
-	    test_boost == 2) {
-		unregister_cpu_notifier(&rcutorture_cpu_nb);
-		for_each_possible_cpu(i)
-			rcutorture_booster_cleanup(i);
-	}
+	    test_boost == 2)
+		cpuhp_remove_state(rcutor_hp);
 
 	/*
 	 * Wait for all RCU callbacks to fire, then do flavor-specific
@@ -1869,14 +1850,13 @@
 	    test_boost == 2) {
 
 		boost_starttime = jiffies + test_boost_interval * HZ;
-		register_cpu_notifier(&rcutorture_cpu_nb);
-		for_each_possible_cpu(i) {
-			if (cpu_is_offline(i))
-				continue;  /* Heuristic: CPU can go offline. */
-			firsterr = rcutorture_booster_init(i);
-			if (firsterr)
-				goto unwind;
-		}
+
+		firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE",
+					     rcutorture_booster_init,
+					     rcutorture_booster_cleanup);
+		if (firsterr < 0)
+			goto unwind;
+		rcutor_hp = firsterr;
 	}
 	firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
 	if (firsterr)
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c
index be922c9..50d1861 100644
--- a/kernel/rcu/sync.c
+++ b/kernel/rcu/sync.c
@@ -68,6 +68,8 @@
 	RCU_LOCKDEP_WARN(!gp_ops[rsp->gp_type].held(),
 			 "suspicious rcu_sync_is_idle() usage");
 }
+
+EXPORT_SYMBOL_GPL(rcu_sync_lockdep_assert);
 #endif
 
 /**
@@ -83,6 +85,18 @@
 }
 
 /**
+ * Must be called after rcu_sync_init() and before first use.
+ *
+ * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}()
+ * pairs turn into NO-OPs.
+ */
+void rcu_sync_enter_start(struct rcu_sync *rsp)
+{
+	rsp->gp_count++;
+	rsp->gp_state = GP_PASSED;
+}
+
+/**
  * rcu_sync_enter() - Force readers onto slowpath
  * @rsp: Pointer to rcu_sync structure to use for synchronization
  *
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 5d80925..7e2e038 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -41,7 +41,6 @@
 #include <linux/export.h>
 #include <linux/completion.h>
 #include <linux/moduleparam.h>
-#include <linux/module.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
@@ -60,7 +59,6 @@
 #include "tree.h"
 #include "rcu.h"
 
-MODULE_ALIAS("rcutree");
 #ifdef MODULE_PARAM_PREFIX
 #undef MODULE_PARAM_PREFIX
 #endif
@@ -1848,6 +1846,7 @@
 			      struct rcu_data *rdp)
 {
 	bool ret;
+	bool need_gp;
 
 	/* Handle the ends of any preceding grace periods first. */
 	if (rdp->completed == rnp->completed &&
@@ -1874,9 +1873,10 @@
 		 */
 		rdp->gpnum = rnp->gpnum;
 		trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpustart"));
-		rdp->cpu_no_qs.b.norm = true;
+		need_gp = !!(rnp->qsmask & rdp->grpmask);
+		rdp->cpu_no_qs.b.norm = need_gp;
 		rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
-		rdp->core_needs_qs = !!(rnp->qsmask & rdp->grpmask);
+		rdp->core_needs_qs = need_gp;
 		zero_cpu_stall_ticks(rdp);
 		WRITE_ONCE(rdp->gpwrap, false);
 	}
@@ -2344,7 +2344,7 @@
 	WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
 	raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
-	swake_up(&rsp->gp_wq);  /* Memory barrier implied by swake_up() path. */
+	rcu_gp_kthread_wake(rsp);
 }
 
 /*
@@ -2970,7 +2970,7 @@
 	}
 	WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
 	raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
-	swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
+	rcu_gp_kthread_wake(rsp);
 }
 
 /*
@@ -3792,8 +3792,6 @@
 	rnp = rdp->mynode;
 	mask = rdp->grpmask;
 	raw_spin_lock_rcu_node(rnp);		/* irqs already disabled. */
-	rnp->qsmaskinitnext |= mask;
-	rnp->expmaskinitnext |= mask;
 	if (!rdp->beenonline)
 		WRITE_ONCE(rsp->ncpus, READ_ONCE(rsp->ncpus) + 1);
 	rdp->beenonline = true;	 /* We have now been online. */
@@ -3860,6 +3858,32 @@
 	return 0;
 }
 
+/*
+ * Mark the specified CPU as being online so that subsequent grace periods
+ * (both expedited and normal) will wait on it.  Note that this means that
+ * incoming CPUs are not allowed to use RCU read-side critical sections
+ * until this function is called.  Failing to observe this restriction
+ * will result in lockdep splats.
+ */
+void rcu_cpu_starting(unsigned int cpu)
+{
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp) {
+		rdp = this_cpu_ptr(rsp->rda);
+		rnp = rdp->mynode;
+		mask = rdp->grpmask;
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		rnp->qsmaskinitnext |= mask;
+		rnp->expmaskinitnext |= mask;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * The CPU is exiting the idle loop into the arch_cpu_idle_dead()
@@ -4209,8 +4233,10 @@
 	 * or the scheduler are operational.
 	 */
 	pm_notifier(rcu_pm_notify, 0);
-	for_each_online_cpu(cpu)
+	for_each_online_cpu(cpu) {
 		rcutree_prepare_cpu(cpu);
+		rcu_cpu_starting(cpu);
+	}
 }
 
 #include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f714f87..e99a523 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -400,6 +400,7 @@
 #ifdef CONFIG_RCU_FAST_NO_HZ
 	struct rcu_head oom_head;
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
+	atomic_long_t exp_workdone0;	/* # done by workqueue. */
 	atomic_long_t exp_workdone1;	/* # done by others #1. */
 	atomic_long_t exp_workdone2;	/* # done by others #2. */
 	atomic_long_t exp_workdone3;	/* # done by others #3. */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 6d86ab6..24343eb 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -359,7 +359,8 @@
 			struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 
 			if (raw_smp_processor_id() == cpu ||
-			    !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
+			    !(atomic_add_return(0, &rdtp->dynticks) & 0x1) ||
+			    !(rnp->qsmaskinitnext & rdp->grpmask))
 				mask_ofl_test |= rdp->grpmask;
 		}
 		mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
@@ -384,17 +385,16 @@
 				mask_ofl_ipi &= ~mask;
 				continue;
 			}
-			/* Failed, raced with offline. */
+			/* Failed, raced with CPU hotplug operation. */
 			raw_spin_lock_irqsave_rcu_node(rnp, flags);
-			if (cpu_online(cpu) &&
+			if ((rnp->qsmaskinitnext & mask) &&
 			    (rnp->expmask & mask)) {
+				/* Online, so delay for a bit and try again. */
 				raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 				schedule_timeout_uninterruptible(1);
-				if (cpu_online(cpu) &&
-				    (rnp->expmask & mask))
-					goto retry_ipi;
-				raw_spin_lock_irqsave_rcu_node(rnp, flags);
+				goto retry_ipi;
 			}
+			/* CPU really is offline, so we can ignore it. */
 			if (!(rnp->expmask & mask))
 				mask_ofl_ipi &= ~mask;
 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -427,12 +427,10 @@
 				jiffies_stall);
 		if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
 			return;
-		if (ret < 0) {
-			/* Hit a signal, disable CPU stall warnings. */
-			swait_event(rsp->expedited_wq,
-				   sync_rcu_preempt_exp_done(rnp_root));
-			return;
-		}
+		WARN_ON(ret < 0);  /* workqueues should not be signaled. */
+		if (rcu_cpu_stall_suppress)
+			continue;
+		panic_on_rcu_stall();
 		pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
 		       rsp->name);
 		ndetected = 0;
@@ -500,7 +498,6 @@
 	 * next GP, to proceed.
 	 */
 	mutex_lock(&rsp->exp_wake_mutex);
-	mutex_unlock(&rsp->exp_mutex);
 
 	rcu_for_each_node_breadth_first(rsp, rnp) {
 		if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
@@ -516,6 +513,70 @@
 	mutex_unlock(&rsp->exp_wake_mutex);
 }
 
+/* Let the workqueue handler know what it is supposed to do. */
+struct rcu_exp_work {
+	smp_call_func_t rew_func;
+	struct rcu_state *rew_rsp;
+	unsigned long rew_s;
+	struct work_struct rew_work;
+};
+
+/*
+ * Work-queue handler to drive an expedited grace period forward.
+ */
+static void wait_rcu_exp_gp(struct work_struct *wp)
+{
+	struct rcu_exp_work *rewp;
+
+	/* Initialize the rcu_node tree in preparation for the wait. */
+	rewp = container_of(wp, struct rcu_exp_work, rew_work);
+	sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func);
+
+	/* Wait and clean up, including waking everyone. */
+	rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s);
+}
+
+/*
+ * Given an rcu_state pointer and a smp_call_function() handler, kick
+ * off the specified flavor of expedited grace period.
+ */
+static void _synchronize_rcu_expedited(struct rcu_state *rsp,
+				       smp_call_func_t func)
+{
+	struct rcu_data *rdp;
+	struct rcu_exp_work rew;
+	struct rcu_node *rnp;
+	unsigned long s;
+
+	/* If expedited grace periods are prohibited, fall back to normal. */
+	if (rcu_gp_is_normal()) {
+		wait_rcu_gp(rsp->call);
+		return;
+	}
+
+	/* Take a snapshot of the sequence number.  */
+	s = rcu_exp_gp_seq_snap(rsp);
+	if (exp_funnel_lock(rsp, s))
+		return;  /* Someone else did our work for us. */
+
+	/* Marshall arguments and schedule the expedited grace period. */
+	rew.rew_func = func;
+	rew.rew_rsp = rsp;
+	rew.rew_s = s;
+	INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
+	schedule_work(&rew.rew_work);
+
+	/* Wait for expedited grace period to complete. */
+	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
+	rnp = rcu_get_root(rsp);
+	wait_event(rnp->exp_wq[(s >> 1) & 0x3],
+		   sync_exp_work_done(rsp,
+				      &rdp->exp_workdone0, s));
+
+	/* Let the next expedited grace period start. */
+	mutex_unlock(&rsp->exp_mutex);
+}
+
 /**
  * synchronize_sched_expedited - Brute-force RCU-sched grace period
  *
@@ -534,29 +595,13 @@
  */
 void synchronize_sched_expedited(void)
 {
-	unsigned long s;
 	struct rcu_state *rsp = &rcu_sched_state;
 
 	/* If only one CPU, this is automatically a grace period. */
 	if (rcu_blocking_is_gp())
 		return;
 
-	/* If expedited grace periods are prohibited, fall back to normal. */
-	if (rcu_gp_is_normal()) {
-		wait_rcu_gp(call_rcu_sched);
-		return;
-	}
-
-	/* Take a snapshot of the sequence number.  */
-	s = rcu_exp_gp_seq_snap(rsp);
-	if (exp_funnel_lock(rsp, s))
-		return;  /* Someone else did our work for us. */
-
-	/* Initialize the rcu_node tree in preparation for the wait. */
-	sync_rcu_exp_select_cpus(rsp, sync_sched_exp_handler);
-
-	/* Wait and clean up, including waking everyone. */
-	rcu_exp_wait_wake(rsp, s);
+	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
 }
 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
 
@@ -620,23 +665,8 @@
 void synchronize_rcu_expedited(void)
 {
 	struct rcu_state *rsp = rcu_state_p;
-	unsigned long s;
 
-	/* If expedited grace periods are prohibited, fall back to normal. */
-	if (rcu_gp_is_normal()) {
-		wait_rcu_gp(call_rcu);
-		return;
-	}
-
-	s = rcu_exp_gp_seq_snap(rsp);
-	if (exp_funnel_lock(rsp, s))
-		return;  /* Someone else did our work for us. */
-
-	/* Initialize the rcu_node tree in preparation for the wait. */
-	sync_rcu_exp_select_cpus(rsp, sync_rcu_exp_handler);
-
-	/* Wait for ->blkd_tasks lists to drain, then wake everyone up. */
-	rcu_exp_wait_wake(rsp, s);
+	_synchronize_rcu_expedited(rsp, sync_rcu_exp_handler);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0082fce..85c5a88 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2173,6 +2173,7 @@
 				cl++;
 			c++;
 			local_bh_enable();
+			cond_resched_rcu_qs();
 			list = next;
 		}
 		trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 86782f9a..b1f2897 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -185,16 +185,17 @@
 	int cpu;
 	struct rcu_state *rsp = (struct rcu_state *)m->private;
 	struct rcu_data *rdp;
-	unsigned long s1 = 0, s2 = 0, s3 = 0;
+	unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
 
 	for_each_possible_cpu(cpu) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);
+		s0 += atomic_long_read(&rdp->exp_workdone0);
 		s1 += atomic_long_read(&rdp->exp_workdone1);
 		s2 += atomic_long_read(&rdp->exp_workdone2);
 		s3 += atomic_long_read(&rdp->exp_workdone3);
 	}
-	seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
-		   rsp->expedited_sequence, s1, s2, s3,
+	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+		   rsp->expedited_sequence, s0, s1, s2, s3,
 		   atomic_long_read(&rsp->expedited_normal),
 		   atomic_read(&rsp->expedited_need_qs),
 		   rsp->expedited_sequence / 2);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index f0d8322..f19271d 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -46,7 +46,7 @@
 #include <linux/export.h>
 #include <linux/hardirq.h>
 #include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/kthread.h>
 #include <linux/tick.h>
 
@@ -54,7 +54,6 @@
 
 #include "rcu.h"
 
-MODULE_ALIAS("rcupdate");
 #ifdef MODULE_PARAM_PREFIX
 #undef MODULE_PARAM_PREFIX
 #endif
diff --git a/kernel/relay.c b/kernel/relay.c
index d797502..fc9b4a4 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -214,7 +214,7 @@
 			__free_page(buf->page_array[i]);
 		relay_free_page_array(buf->page_array);
 	}
-	chan->buf[buf->cpu] = NULL;
+	*per_cpu_ptr(chan->buf, buf->cpu) = NULL;
 	kfree(buf->padding);
 	kfree(buf);
 	kref_put(&chan->kref, relay_destroy_channel);
@@ -382,20 +382,21 @@
  */
 void relay_reset(struct rchan *chan)
 {
+	struct rchan_buf *buf;
 	unsigned int i;
 
 	if (!chan)
 		return;
 
-	if (chan->is_global && chan->buf[0]) {
-		__relay_reset(chan->buf[0], 0);
+	if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
+		__relay_reset(buf, 0);
 		return;
 	}
 
 	mutex_lock(&relay_channels_mutex);
 	for_each_possible_cpu(i)
-		if (chan->buf[i])
-			__relay_reset(chan->buf[i], 0);
+		if ((buf = *per_cpu_ptr(chan->buf, i)))
+			__relay_reset(buf, 0);
 	mutex_unlock(&relay_channels_mutex);
 }
 EXPORT_SYMBOL_GPL(relay_reset);
@@ -440,7 +441,7 @@
 	struct dentry *dentry;
 
  	if (chan->is_global)
-		return chan->buf[0];
+		return *per_cpu_ptr(chan->buf, 0);
 
 	buf = relay_create_buf(chan);
 	if (!buf)
@@ -464,7 +465,7 @@
  	__relay_reset(buf, 1);
 
  	if(chan->is_global) {
- 		chan->buf[0] = buf;
+		*per_cpu_ptr(chan->buf, 0) = buf;
  		buf->cpu = 0;
   	}
 
@@ -512,46 +513,25 @@
 	chan->cb = cb;
 }
 
-/**
- * 	relay_hotcpu_callback - CPU hotplug callback
- * 	@nb: notifier block
- * 	@action: hotplug action to take
- * 	@hcpu: CPU number
- *
- * 	Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
- */
-static int relay_hotcpu_callback(struct notifier_block *nb,
-				unsigned long action,
-				void *hcpu)
+int relay_prepare_cpu(unsigned int cpu)
 {
-	unsigned int hotcpu = (unsigned long)hcpu;
 	struct rchan *chan;
+	struct rchan_buf *buf;
 
-	switch(action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		mutex_lock(&relay_channels_mutex);
-		list_for_each_entry(chan, &relay_channels, list) {
-			if (chan->buf[hotcpu])
-				continue;
-			chan->buf[hotcpu] = relay_open_buf(chan, hotcpu);
-			if(!chan->buf[hotcpu]) {
-				printk(KERN_ERR
-					"relay_hotcpu_callback: cpu %d buffer "
-					"creation failed\n", hotcpu);
-				mutex_unlock(&relay_channels_mutex);
-				return notifier_from_errno(-ENOMEM);
-			}
+	mutex_lock(&relay_channels_mutex);
+	list_for_each_entry(chan, &relay_channels, list) {
+		if ((buf = *per_cpu_ptr(chan->buf, cpu)))
+			continue;
+		buf = relay_open_buf(chan, cpu);
+		if (!buf) {
+			pr_err("relay: cpu %d buffer creation failed\n", cpu);
+			mutex_unlock(&relay_channels_mutex);
+			return -ENOMEM;
 		}
-		mutex_unlock(&relay_channels_mutex);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		/* No need to flush the cpu : will be flushed upon
-		 * final relay_flush() call. */
-		break;
+		*per_cpu_ptr(chan->buf, cpu) = buf;
 	}
-	return NOTIFY_OK;
+	mutex_unlock(&relay_channels_mutex);
+	return 0;
 }
 
 /**
@@ -583,6 +563,7 @@
 {
 	unsigned int i;
 	struct rchan *chan;
+	struct rchan_buf *buf;
 
 	if (!(subbuf_size && n_subbufs))
 		return NULL;
@@ -593,6 +574,7 @@
 	if (!chan)
 		return NULL;
 
+	chan->buf = alloc_percpu(struct rchan_buf *);
 	chan->version = RELAYFS_CHANNEL_VERSION;
 	chan->n_subbufs = n_subbufs;
 	chan->subbuf_size = subbuf_size;
@@ -608,9 +590,10 @@
 
 	mutex_lock(&relay_channels_mutex);
 	for_each_online_cpu(i) {
-		chan->buf[i] = relay_open_buf(chan, i);
-		if (!chan->buf[i])
+		buf = relay_open_buf(chan, i);
+		if (!buf)
 			goto free_bufs;
+		*per_cpu_ptr(chan->buf, i) = buf;
 	}
 	list_add(&chan->list, &relay_channels);
 	mutex_unlock(&relay_channels_mutex);
@@ -619,8 +602,8 @@
 
 free_bufs:
 	for_each_possible_cpu(i) {
-		if (chan->buf[i])
-			relay_close_buf(chan->buf[i]);
+		if ((buf = *per_cpu_ptr(chan->buf, i)))
+			relay_close_buf(buf);
 	}
 
 	kref_put(&chan->kref, relay_destroy_channel);
@@ -666,6 +649,7 @@
 	unsigned int i, curr_cpu;
 	unsigned long flags;
 	struct dentry *dentry;
+	struct rchan_buf *buf;
 	struct rchan_percpu_buf_dispatcher disp;
 
 	if (!chan || !base_filename)
@@ -684,10 +668,11 @@
 
 	if (chan->is_global) {
 		err = -EINVAL;
-		if (!WARN_ON_ONCE(!chan->buf[0])) {
-			dentry = relay_create_buf_file(chan, chan->buf[0], 0);
+		buf = *per_cpu_ptr(chan->buf, 0);
+		if (!WARN_ON_ONCE(!buf)) {
+			dentry = relay_create_buf_file(chan, buf, 0);
 			if (dentry && !WARN_ON_ONCE(!chan->is_global)) {
-				relay_set_buf_dentry(chan->buf[0], dentry);
+				relay_set_buf_dentry(buf, dentry);
 				err = 0;
 			}
 		}
@@ -702,13 +687,14 @@
 	 * on all currently online CPUs.
 	 */
 	for_each_online_cpu(i) {
-		if (unlikely(!chan->buf[i])) {
+		buf = *per_cpu_ptr(chan->buf, i);
+		if (unlikely(!buf)) {
 			WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
 			err = -EINVAL;
 			break;
 		}
 
-		dentry = relay_create_buf_file(chan, chan->buf[i], i);
+		dentry = relay_create_buf_file(chan, buf, i);
 		if (unlikely(!dentry)) {
 			err = -EINVAL;
 			break;
@@ -716,10 +702,10 @@
 
 		if (curr_cpu == i) {
 			local_irq_save(flags);
-			relay_set_buf_dentry(chan->buf[i], dentry);
+			relay_set_buf_dentry(buf, dentry);
 			local_irq_restore(flags);
 		} else {
-			disp.buf = chan->buf[i];
+			disp.buf = buf;
 			disp.dentry = dentry;
 			smp_mb();
 			/* relay_channels_mutex must be held, so wait. */
@@ -822,11 +808,10 @@
 	if (!chan)
 		return;
 
-	if (cpu >= NR_CPUS || !chan->buf[cpu] ||
-					subbufs_consumed > chan->n_subbufs)
+	buf = *per_cpu_ptr(chan->buf, cpu);
+	if (cpu >= NR_CPUS || !buf || subbufs_consumed > chan->n_subbufs)
 		return;
 
-	buf = chan->buf[cpu];
 	if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed)
 		buf->subbufs_consumed = buf->subbufs_produced;
 	else
@@ -842,18 +827,19 @@
  */
 void relay_close(struct rchan *chan)
 {
+	struct rchan_buf *buf;
 	unsigned int i;
 
 	if (!chan)
 		return;
 
 	mutex_lock(&relay_channels_mutex);
-	if (chan->is_global && chan->buf[0])
-		relay_close_buf(chan->buf[0]);
+	if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0)))
+		relay_close_buf(buf);
 	else
 		for_each_possible_cpu(i)
-			if (chan->buf[i])
-				relay_close_buf(chan->buf[i]);
+			if ((buf = *per_cpu_ptr(chan->buf, i)))
+				relay_close_buf(buf);
 
 	if (chan->last_toobig)
 		printk(KERN_WARNING "relay: one or more items not logged "
@@ -874,20 +860,21 @@
  */
 void relay_flush(struct rchan *chan)
 {
+	struct rchan_buf *buf;
 	unsigned int i;
 
 	if (!chan)
 		return;
 
-	if (chan->is_global && chan->buf[0]) {
-		relay_switch_subbuf(chan->buf[0], 0);
+	if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) {
+		relay_switch_subbuf(buf, 0);
 		return;
 	}
 
 	mutex_lock(&relay_channels_mutex);
 	for_each_possible_cpu(i)
-		if (chan->buf[i])
-			relay_switch_subbuf(chan->buf[i], 0);
+		if ((buf = *per_cpu_ptr(chan->buf, i)))
+			relay_switch_subbuf(buf, 0);
 	mutex_unlock(&relay_channels_mutex);
 }
 EXPORT_SYMBOL_GPL(relay_flush);
@@ -1377,12 +1364,3 @@
 	.splice_read	= relay_file_splice_read,
 };
 EXPORT_SYMBOL_GPL(relay_file_operations);
-
-static __init int relay_init(void)
-{
-
-	hotcpu_notifier(relay_hotcpu_callback, 0);
-	return 0;
-}
-
-early_initcall(relay_init);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c883fe..94732d1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
 #include <linux/context_tracking.h>
 #include <linux/compiler.h>
 #include <linux/frame.h>
+#include <linux/prefetch.h>
 
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
@@ -580,6 +581,8 @@
 	 * If needed we can still optimize that later with an
 	 * empty IRQ.
 	 */
+	if (cpu_is_offline(cpu))
+		return true;  /* Don't try to wake offline CPUs. */
 	if (tick_nohz_full_cpu(cpu)) {
 		if (cpu != smp_processor_id() ||
 		    tick_nohz_tick_stopped())
@@ -590,6 +593,11 @@
 	return false;
 }
 
+/*
+ * Wake up the specified CPU.  If the CPU is going offline, it is the
+ * caller's responsibility to deal with the lost wakeup, for example,
+ * by hooking into the CPU_DEAD notifier like timers and hrtimers do.
+ */
 void wake_up_nohz_cpu(int cpu)
 {
 	if (!wake_up_full_nohz_cpu(cpu))
@@ -1062,8 +1070,12 @@
 	 * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
 	 * we're holding p->pi_lock.
 	 */
-	if (task_rq(p) == rq && task_on_rq_queued(p))
-		rq = __migrate_task(rq, p, arg->dest_cpu);
+	if (task_rq(p) == rq) {
+		if (task_on_rq_queued(p))
+			rq = __migrate_task(rq, p, arg->dest_cpu);
+		else
+			p->wake_cpu = arg->dest_cpu;
+	}
 	raw_spin_unlock(&rq->lock);
 	raw_spin_unlock(&p->pi_lock);
 
@@ -1104,10 +1116,10 @@
 
 	p->sched_class->set_cpus_allowed(p, new_mask);
 
-	if (running)
-		p->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, p, ENQUEUE_RESTORE);
+	if (running)
+		set_curr_task(rq, p);
 }
 
 /*
@@ -1264,7 +1276,7 @@
 		/*
 		 * Task isn't running anymore; make it appear like we migrated
 		 * it before it went to sleep. This means on wakeup we make the
-		 * previous cpu our targer instead of where it really is.
+		 * previous cpu our target instead of where it really is.
 		 */
 		p->wake_cpu = cpu;
 	}
@@ -1628,23 +1640,25 @@
 static void
 ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 {
-#ifdef CONFIG_SCHEDSTATS
-	struct rq *rq = this_rq();
+	struct rq *rq;
+
+	if (!schedstat_enabled())
+		return;
+
+	rq = this_rq();
 
 #ifdef CONFIG_SMP
-	int this_cpu = smp_processor_id();
-
-	if (cpu == this_cpu) {
-		schedstat_inc(rq, ttwu_local);
-		schedstat_inc(p, se.statistics.nr_wakeups_local);
+	if (cpu == rq->cpu) {
+		schedstat_inc(rq->ttwu_local);
+		schedstat_inc(p->se.statistics.nr_wakeups_local);
 	} else {
 		struct sched_domain *sd;
 
-		schedstat_inc(p, se.statistics.nr_wakeups_remote);
+		schedstat_inc(p->se.statistics.nr_wakeups_remote);
 		rcu_read_lock();
-		for_each_domain(this_cpu, sd) {
+		for_each_domain(rq->cpu, sd) {
 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-				schedstat_inc(sd, ttwu_wake_remote);
+				schedstat_inc(sd->ttwu_wake_remote);
 				break;
 			}
 		}
@@ -1652,17 +1666,14 @@
 	}
 
 	if (wake_flags & WF_MIGRATED)
-		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
-
+		schedstat_inc(p->se.statistics.nr_wakeups_migrate);
 #endif /* CONFIG_SMP */
 
-	schedstat_inc(rq, ttwu_count);
-	schedstat_inc(p, se.statistics.nr_wakeups);
+	schedstat_inc(rq->ttwu_count);
+	schedstat_inc(p->se.statistics.nr_wakeups);
 
 	if (wake_flags & WF_SYNC)
-		schedstat_inc(p, se.statistics.nr_wakeups_sync);
-
-#endif /* CONFIG_SCHEDSTATS */
+		schedstat_inc(p->se.statistics.nr_wakeups_sync);
 }
 
 static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
@@ -2015,6 +2026,28 @@
 	success = 1; /* we're going to change ->state */
 	cpu = task_cpu(p);
 
+	/*
+	 * Ensure we load p->on_rq _after_ p->state, otherwise it would
+	 * be possible to, falsely, observe p->on_rq == 0 and get stuck
+	 * in smp_cond_load_acquire() below.
+	 *
+	 * sched_ttwu_pending()                 try_to_wake_up()
+	 *   [S] p->on_rq = 1;                  [L] P->state
+	 *       UNLOCK rq->lock  -----.
+	 *                              \
+	 *				 +---   RMB
+	 * schedule()                   /
+	 *       LOCK rq->lock    -----'
+	 *       UNLOCK rq->lock
+	 *
+	 * [task p]
+	 *   [S] p->state = UNINTERRUPTIBLE     [L] p->on_rq
+	 *
+	 * Pairs with the UNLOCK+LOCK on rq->lock from the
+	 * last wakeup of our task and the schedule that got our task
+	 * current.
+	 */
+	smp_rmb();
 	if (p->on_rq && ttwu_remote(p, wake_flags))
 		goto stat;
 
@@ -2061,8 +2094,7 @@
 
 	ttwu_queue(p, cpu, wake_flags);
 stat:
-	if (schedstat_enabled())
-		ttwu_stat(p, cpu, wake_flags);
+	ttwu_stat(p, cpu, wake_flags);
 out:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
@@ -2072,6 +2104,7 @@
 /**
  * try_to_wake_up_local - try to wake up a local task with rq lock held
  * @p: the thread to be awakened
+ * @cookie: context's cookie for pinning
  *
  * Put @p on the run-queue if it's not already there. The caller must
  * ensure that this_rq() is locked, @p is bound to this_rq() and not
@@ -2110,8 +2143,7 @@
 		ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
 	ttwu_do_wakeup(rq, p, 0, cookie);
-	if (schedstat_enabled())
-		ttwu_stat(p, smp_processor_id(), 0);
+	ttwu_stat(p, smp_processor_id(), 0);
 out:
 	raw_spin_unlock(&p->pi_lock);
 }
@@ -2749,6 +2781,10 @@
 		 * task and put them back on the free list.
 		 */
 		kprobe_flush_task(prev);
+
+		/* Task is done with its stack. */
+		put_task_stack(prev);
+
 		put_task_struct(prev);
 	}
 
@@ -2972,6 +3008,23 @@
 EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
 
 /*
+ * The function fair_sched_class.update_curr accesses the struct curr
+ * and its field curr->exec_start; when called from task_sched_runtime(),
+ * we observe a high rate of cache misses in practice.
+ * Prefetching this data results in improved performance.
+ */
+static inline void prefetch_curr_exec_start(struct task_struct *p)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	struct sched_entity *curr = (&p->se)->cfs_rq->curr;
+#else
+	struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
+#endif
+	prefetch(curr);
+	prefetch(&curr->exec_start);
+}
+
+/*
  * Return accounted runtime for the task.
  * In case the task is currently running, return the runtime plus current's
  * pending runtime that have not been accounted yet.
@@ -3005,6 +3058,7 @@
 	 * thread, breaking clock_gettime().
 	 */
 	if (task_current(rq, p) && task_on_rq_queued(p)) {
+		prefetch_curr_exec_start(p);
 		update_rq_clock(rq);
 		p->sched_class->update_curr(rq);
 	}
@@ -3151,6 +3205,9 @@
  */
 static noinline void __schedule_bug(struct task_struct *prev)
 {
+	/* Save this before calling printk(), since that will clobber it */
+	unsigned long preempt_disable_ip = get_preempt_disable_ip(current);
+
 	if (oops_in_progress)
 		return;
 
@@ -3161,13 +3218,12 @@
 	print_modules();
 	if (irqs_disabled())
 		print_irqtrace_events(prev);
-#ifdef CONFIG_DEBUG_PREEMPT
-	if (in_atomic_preempt_off()) {
+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+	    && in_atomic_preempt_off()) {
 		pr_err("Preemption disabled at:");
-		print_ip_sym(current->preempt_disable_ip);
+		print_ip_sym(preempt_disable_ip);
 		pr_cont("\n");
 	}
-#endif
 	if (panic_on_warn)
 		panic("scheduling while atomic\n");
 
@@ -3193,7 +3249,7 @@
 
 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
 
-	schedstat_inc(this_rq(), sched_count);
+	schedstat_inc(this_rq()->sched_count);
 }
 
 /*
@@ -3286,17 +3342,6 @@
 	rq = cpu_rq(cpu);
 	prev = rq->curr;
 
-	/*
-	 * do_exit() calls schedule() with preemption disabled as an exception;
-	 * however we must fix that up, otherwise the next task will see an
-	 * inconsistent (higher) preempt count.
-	 *
-	 * It also avoids the below schedule_debug() test from complaining
-	 * about this.
-	 */
-	if (unlikely(prev->state == TASK_DEAD))
-		preempt_enable_no_resched_notrace();
-
 	schedule_debug(prev);
 
 	if (sched_feat(HRTICK))
@@ -3362,7 +3407,33 @@
 
 	balance_callback(rq);
 }
-STACK_FRAME_NON_STANDARD(__schedule); /* switch_to() */
+
+void __noreturn do_task_dead(void)
+{
+	/*
+	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
+	 * when the following two conditions become true.
+	 *   - There is race condition of mmap_sem (It is acquired by
+	 *     exit_mm()), and
+	 *   - SMI occurs before setting TASK_RUNINNG.
+	 *     (or hypervisor of virtual machine switches to other guest)
+	 *  As a result, we may become TASK_RUNNING after becoming TASK_DEAD
+	 *
+	 * To avoid it, we have to wait for releasing tsk->pi_lock which
+	 * is held by try_to_wake_up()
+	 */
+	smp_mb();
+	raw_spin_unlock_wait(&current->pi_lock);
+
+	/* causes final put_task_struct in finish_task_switch(). */
+	__set_current_state(TASK_DEAD);
+	current->flags |= PF_NOFREEZE;	/* tell freezer to ignore us */
+	__schedule(false);
+	BUG();
+	/* Avoid "noreturn function does return".  */
+	for (;;)
+		cpu_relax();	/* For when BUG is null */
+}
 
 static inline void sched_submit_work(struct task_struct *tsk)
 {
@@ -3646,10 +3717,10 @@
 
 	p->prio = prio;
 
-	if (running)
-		p->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, p, queue_flag);
+	if (running)
+		set_curr_task(rq, p);
 
 	check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
@@ -3663,7 +3734,8 @@
 
 void set_user_nice(struct task_struct *p, long nice)
 {
-	int old_prio, delta, queued;
+	bool queued, running;
+	int old_prio, delta;
 	struct rq_flags rf;
 	struct rq *rq;
 
@@ -3685,8 +3757,11 @@
 		goto out_unlock;
 	}
 	queued = task_on_rq_queued(p);
+	running = task_current(rq, p);
 	if (queued)
 		dequeue_task(rq, p, DEQUEUE_SAVE);
+	if (running)
+		put_prev_task(rq, p);
 
 	p->static_prio = NICE_TO_PRIO(nice);
 	set_load_weight(p);
@@ -3703,6 +3778,8 @@
 		if (delta < 0 || (delta > 0 && task_running(rq, p)))
 			resched_curr(rq);
 	}
+	if (running)
+		set_curr_task(rq, p);
 out_unlock:
 	task_rq_unlock(rq, p, &rf);
 }
@@ -4202,8 +4279,6 @@
 	prev_class = p->sched_class;
 	__setscheduler(rq, p, attr, pi);
 
-	if (running)
-		p->sched_class->set_curr_task(rq);
 	if (queued) {
 		/*
 		 * We enqueue to tail when the priority of a task is
@@ -4214,6 +4289,8 @@
 
 		enqueue_task(rq, p, queue_flags);
 	}
+	if (running)
+		set_curr_task(rq, p);
 
 	check_class_changed(rq, p, prev_class, oldprio);
 	preempt_disable(); /* avoid rq from going away on us */
@@ -4805,7 +4882,7 @@
 {
 	struct rq *rq = this_rq_lock();
 
-	schedstat_inc(rq, yld_count);
+	schedstat_inc(rq->yld_count);
 	current->sched_class->yield_task(rq);
 
 	/*
@@ -4822,6 +4899,7 @@
 	return 0;
 }
 
+#ifndef CONFIG_PREEMPT
 int __sched _cond_resched(void)
 {
 	if (should_resched(0)) {
@@ -4831,6 +4909,7 @@
 	return 0;
 }
 EXPORT_SYMBOL(_cond_resched);
+#endif
 
 /*
  * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
@@ -4956,7 +5035,7 @@
 
 	yielded = curr->sched_class->yield_to_task(rq, p, preempt);
 	if (yielded) {
-		schedstat_inc(rq, yld_count);
+		schedstat_inc(rq->yld_count);
 		/*
 		 * Make p's CPU reschedule; pick_next_entity takes care of
 		 * fairness.
@@ -5376,10 +5455,10 @@
 
 	p->numa_preferred_nid = nid;
 
-	if (running)
-		p->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, p, ENQUEUE_RESTORE);
+	if (running)
+		set_curr_task(rq, p);
 	task_rq_unlock(rq, p, &rf);
 }
 #endif /* CONFIG_NUMA_BALANCING */
@@ -5676,6 +5755,8 @@
 	}
 }
 #else /* !CONFIG_SCHED_DEBUG */
+
+# define sched_debug_enabled 0
 # define sched_domain_debug(sd, cpu) do { } while (0)
 static inline bool sched_debug(void)
 {
@@ -5694,6 +5775,7 @@
 			 SD_BALANCE_FORK |
 			 SD_BALANCE_EXEC |
 			 SD_SHARE_CPUCAPACITY |
+			 SD_ASYM_CPUCAPACITY |
 			 SD_SHARE_PKG_RESOURCES |
 			 SD_SHARE_POWERDOMAIN)) {
 		if (sd->groups != sd->groups->next)
@@ -5724,6 +5806,7 @@
 				SD_BALANCE_NEWIDLE |
 				SD_BALANCE_FORK |
 				SD_BALANCE_EXEC |
+				SD_ASYM_CPUCAPACITY |
 				SD_SHARE_CPUCAPACITY |
 				SD_SHARE_PKG_RESOURCES |
 				SD_PREFER_SIBLING |
@@ -5868,10 +5951,8 @@
 	} while (sg != first);
 }
 
-static void free_sched_domain(struct rcu_head *rcu)
+static void destroy_sched_domain(struct sched_domain *sd)
 {
-	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-
 	/*
 	 * If its an overlapping domain it has private groups, iterate and
 	 * nuke them all.
@@ -5882,18 +5963,26 @@
 		kfree(sd->groups->sgc);
 		kfree(sd->groups);
 	}
+	if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
+		kfree(sd->shared);
 	kfree(sd);
 }
 
-static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+static void destroy_sched_domains_rcu(struct rcu_head *rcu)
 {
-	call_rcu(&sd->rcu, free_sched_domain);
+	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+
+	while (sd) {
+		struct sched_domain *parent = sd->parent;
+		destroy_sched_domain(sd);
+		sd = parent;
+	}
 }
 
-static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+static void destroy_sched_domains(struct sched_domain *sd)
 {
-	for (; sd; sd = sd->parent)
-		destroy_sched_domain(sd, cpu);
+	if (sd)
+		call_rcu(&sd->rcu, destroy_sched_domains_rcu);
 }
 
 /*
@@ -5908,14 +5997,14 @@
 DEFINE_PER_CPU(struct sched_domain *, sd_llc);
 DEFINE_PER_CPU(int, sd_llc_size);
 DEFINE_PER_CPU(int, sd_llc_id);
+DEFINE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DEFINE_PER_CPU(struct sched_domain *, sd_numa);
-DEFINE_PER_CPU(struct sched_domain *, sd_busy);
 DEFINE_PER_CPU(struct sched_domain *, sd_asym);
 
 static void update_top_cache_domain(int cpu)
 {
+	struct sched_domain_shared *sds = NULL;
 	struct sched_domain *sd;
-	struct sched_domain *busy_sd = NULL;
 	int id = cpu;
 	int size = 1;
 
@@ -5923,13 +6012,13 @@
 	if (sd) {
 		id = cpumask_first(sched_domain_span(sd));
 		size = cpumask_weight(sched_domain_span(sd));
-		busy_sd = sd->parent; /* sd_busy */
+		sds = sd->shared;
 	}
-	rcu_assign_pointer(per_cpu(sd_busy, cpu), busy_sd);
 
 	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
 	per_cpu(sd_llc_size, cpu) = size;
 	per_cpu(sd_llc_id, cpu) = id;
+	rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
 
 	sd = lowest_flag_domain(cpu, SD_NUMA);
 	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
@@ -5965,7 +6054,7 @@
 			 */
 			if (parent->flags & SD_PREFER_SIBLING)
 				tmp->flags |= SD_PREFER_SIBLING;
-			destroy_sched_domain(parent, cpu);
+			destroy_sched_domain(parent);
 		} else
 			tmp = tmp->parent;
 	}
@@ -5973,7 +6062,7 @@
 	if (sd && sd_degenerate(sd)) {
 		tmp = sd;
 		sd = sd->parent;
-		destroy_sched_domain(tmp, cpu);
+		destroy_sched_domain(tmp);
 		if (sd)
 			sd->child = NULL;
 	}
@@ -5983,7 +6072,7 @@
 	rq_attach_root(rq, rd);
 	tmp = rq->sd;
 	rcu_assign_pointer(rq->sd, sd);
-	destroy_sched_domains(tmp, cpu);
+	destroy_sched_domains(tmp);
 
 	update_top_cache_domain(cpu);
 }
@@ -6226,7 +6315,6 @@
 		return;
 
 	update_group_capacity(sd, cpu);
-	atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
 /*
@@ -6314,6 +6402,9 @@
 	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
 	*per_cpu_ptr(sdd->sd, cpu) = NULL;
 
+	if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
+		*per_cpu_ptr(sdd->sds, cpu) = NULL;
+
 	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
 		*per_cpu_ptr(sdd->sg, cpu) = NULL;
 
@@ -6333,26 +6424,37 @@
 /*
  * SD_flags allowed in topology descriptions.
  *
- * SD_SHARE_CPUCAPACITY      - describes SMT topologies
- * SD_SHARE_PKG_RESOURCES - describes shared caches
- * SD_NUMA                - describes NUMA topologies
- * SD_SHARE_POWERDOMAIN   - describes shared power domain
+ * These flags are purely descriptive of the topology and do not prescribe
+ * behaviour. Behaviour is artificial and mapped in the below sd_init()
+ * function:
  *
- * Odd one out:
- * SD_ASYM_PACKING        - describes SMT quirks
+ *   SD_SHARE_CPUCAPACITY   - describes SMT topologies
+ *   SD_SHARE_PKG_RESOURCES - describes shared caches
+ *   SD_NUMA                - describes NUMA topologies
+ *   SD_SHARE_POWERDOMAIN   - describes shared power domain
+ *   SD_ASYM_CPUCAPACITY    - describes mixed capacity topologies
+ *
+ * Odd one out, which beside describing the topology has a quirk also
+ * prescribes the desired behaviour that goes along with it:
+ *
+ *   SD_ASYM_PACKING        - describes SMT quirks
  */
 #define TOPOLOGY_SD_FLAGS		\
 	(SD_SHARE_CPUCAPACITY |		\
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA |			\
 	 SD_ASYM_PACKING |		\
+	 SD_ASYM_CPUCAPACITY |		\
 	 SD_SHARE_POWERDOMAIN)
 
 static struct sched_domain *
-sd_init(struct sched_domain_topology_level *tl, int cpu)
+sd_init(struct sched_domain_topology_level *tl,
+	const struct cpumask *cpu_map,
+	struct sched_domain *child, int cpu)
 {
-	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
-	int sd_weight, sd_flags = 0;
+	struct sd_data *sdd = &tl->data;
+	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+	int sd_id, sd_weight, sd_flags = 0;
 
 #ifdef CONFIG_NUMA
 	/*
@@ -6401,15 +6503,26 @@
 		.smt_gain		= 0,
 		.max_newidle_lb_cost	= 0,
 		.next_decay_max_lb_cost	= jiffies,
+		.child			= child,
 #ifdef CONFIG_SCHED_DEBUG
 		.name			= tl->name,
 #endif
 	};
 
+	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
+	sd_id = cpumask_first(sched_domain_span(sd));
+
 	/*
 	 * Convert topological properties into behaviour.
 	 */
 
+	if (sd->flags & SD_ASYM_CPUCAPACITY) {
+		struct sched_domain *t = sd;
+
+		for_each_lower_domain(t)
+			t->flags |= SD_BALANCE_WAKE;
+	}
+
 	if (sd->flags & SD_SHARE_CPUCAPACITY) {
 		sd->flags |= SD_PREFER_SIBLING;
 		sd->imbalance_pct = 110;
@@ -6441,7 +6554,17 @@
 		sd->idle_idx = 1;
 	}
 
-	sd->private = &tl->data;
+	/*
+	 * For all levels sharing cache; connect a sched_domain_shared
+	 * instance.
+	 */
+	if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+		sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
+		atomic_inc(&sd->shared->ref);
+		atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
+	}
+
+	sd->private = sdd;
 
 	return sd;
 }
@@ -6468,6 +6591,9 @@
 
 void set_sched_topology(struct sched_domain_topology_level *tl)
 {
+	if (WARN_ON_ONCE(sched_smp_initialized))
+		return;
+
 	sched_domain_topology = tl;
 }
 
@@ -6748,6 +6874,10 @@
 		if (!sdd->sd)
 			return -ENOMEM;
 
+		sdd->sds = alloc_percpu(struct sched_domain_shared *);
+		if (!sdd->sds)
+			return -ENOMEM;
+
 		sdd->sg = alloc_percpu(struct sched_group *);
 		if (!sdd->sg)
 			return -ENOMEM;
@@ -6758,6 +6888,7 @@
 
 		for_each_cpu(j, cpu_map) {
 			struct sched_domain *sd;
+			struct sched_domain_shared *sds;
 			struct sched_group *sg;
 			struct sched_group_capacity *sgc;
 
@@ -6768,6 +6899,13 @@
 
 			*per_cpu_ptr(sdd->sd, j) = sd;
 
+			sds = kzalloc_node(sizeof(struct sched_domain_shared),
+					GFP_KERNEL, cpu_to_node(j));
+			if (!sds)
+				return -ENOMEM;
+
+			*per_cpu_ptr(sdd->sds, j) = sds;
+
 			sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
 			if (!sg)
@@ -6807,6 +6945,8 @@
 				kfree(*per_cpu_ptr(sdd->sd, j));
 			}
 
+			if (sdd->sds)
+				kfree(*per_cpu_ptr(sdd->sds, j));
 			if (sdd->sg)
 				kfree(*per_cpu_ptr(sdd->sg, j));
 			if (sdd->sgc)
@@ -6814,6 +6954,8 @@
 		}
 		free_percpu(sdd->sd);
 		sdd->sd = NULL;
+		free_percpu(sdd->sds);
+		sdd->sds = NULL;
 		free_percpu(sdd->sg);
 		sdd->sg = NULL;
 		free_percpu(sdd->sgc);
@@ -6825,16 +6967,12 @@
 		const struct cpumask *cpu_map, struct sched_domain_attr *attr,
 		struct sched_domain *child, int cpu)
 {
-	struct sched_domain *sd = sd_init(tl, cpu);
-	if (!sd)
-		return child;
+	struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
 
-	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
 	if (child) {
 		sd->level = child->level + 1;
 		sched_domain_level_max = max(sched_domain_level_max, sd->level);
 		child->parent = sd;
-		sd->child = child;
 
 		if (!cpumask_subset(sched_domain_span(child),
 				    sched_domain_span(sd))) {
@@ -6865,6 +7003,7 @@
 	enum s_alloc alloc_state;
 	struct sched_domain *sd;
 	struct s_data d;
+	struct rq *rq = NULL;
 	int i, ret = -ENOMEM;
 
 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
@@ -6915,11 +7054,22 @@
 	/* Attach the domains */
 	rcu_read_lock();
 	for_each_cpu(i, cpu_map) {
+		rq = cpu_rq(i);
 		sd = *per_cpu_ptr(d.sd, i);
+
+		/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
+		if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
+			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
+
 		cpu_attach_domain(sd, d.rd, i);
 	}
 	rcu_read_unlock();
 
+	if (rq && sched_debug_enabled) {
+		pr_info("span: %*pbl (max cpu_capacity = %lu)\n",
+			cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
+	}
+
 	ret = 0;
 error:
 	__free_domain_allocs(&d, alloc_state, cpu_map);
@@ -7278,6 +7428,22 @@
 }
 #endif
 
+#ifdef CONFIG_SCHED_SMT
+DEFINE_STATIC_KEY_FALSE(sched_smt_present);
+
+static void sched_init_smt(void)
+{
+	/*
+	 * We've enumerated all CPUs and will assume that if any CPU
+	 * has SMT siblings, CPU0 will too.
+	 */
+	if (cpumask_weight(cpu_smt_mask(0)) > 1)
+		static_branch_enable(&sched_smt_present);
+}
+#else
+static inline void sched_init_smt(void) { }
+#endif
+
 void __init sched_init_smp(void)
 {
 	cpumask_var_t non_isolated_cpus;
@@ -7307,6 +7473,9 @@
 
 	init_sched_rt_class();
 	init_sched_dl_class();
+
+	sched_init_smt();
+
 	sched_smp_initialized = true;
 }
 
@@ -7344,6 +7513,7 @@
 #endif
 
 DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
+DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
 
 void __init sched_init(void)
 {
@@ -7380,6 +7550,8 @@
 	for_each_possible_cpu(i) {
 		per_cpu(load_balance_mask, i) = (cpumask_var_t)kzalloc_node(
 			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
+		per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node(
+			cpumask_size(), GFP_KERNEL, cpu_to_node(i));
 	}
 #endif /* CONFIG_CPUMASK_OFFSTACK */
 
@@ -7482,10 +7654,6 @@
 
 	set_load_weight(&init_task);
 
-#ifdef CONFIG_PREEMPT_NOTIFIERS
-	INIT_HLIST_HEAD(&init_task.preempt_notifiers);
-#endif
-
 	/*
 	 * The boot idle thread does lazy MMU switching as well:
 	 */
@@ -7493,11 +7661,6 @@
 	enter_lazy_tlb(&init_mm, current);
 
 	/*
-	 * During early bootup we pretend to be a normal task:
-	 */
-	current->sched_class = &fair_sched_class;
-
-	/*
 	 * Make us the idle thread. Technically, schedule() should not be
 	 * called from this thread, however somewhere below it might be,
 	 * but because we are the idle thread, we just pick up running again
@@ -7551,6 +7714,7 @@
 void ___might_sleep(const char *file, int line, int preempt_offset)
 {
 	static unsigned long prev_jiffy;	/* ratelimiting */
+	unsigned long preempt_disable_ip;
 
 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
@@ -7561,6 +7725,9 @@
 		return;
 	prev_jiffy = jiffies;
 
+	/* Save this before calling printk(), since that will clobber it */
+	preempt_disable_ip = get_preempt_disable_ip(current);
+
 	printk(KERN_ERR
 		"BUG: sleeping function called from invalid context at %s:%d\n",
 			file, line);
@@ -7575,14 +7742,14 @@
 	debug_show_held_locks(current);
 	if (irqs_disabled())
 		print_irqtrace_events(current);
-#ifdef CONFIG_DEBUG_PREEMPT
-	if (!preempt_count_equals(preempt_offset)) {
+	if (IS_ENABLED(CONFIG_DEBUG_PREEMPT)
+	    && !preempt_count_equals(preempt_offset)) {
 		pr_err("Preemption disabled at:");
-		print_ip_sym(current->preempt_disable_ip);
+		print_ip_sym(preempt_disable_ip);
 		pr_cont("\n");
 	}
-#endif
 	dump_stack();
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
 EXPORT_SYMBOL(___might_sleep);
 #endif
@@ -7603,12 +7770,10 @@
 		if (p->flags & PF_KTHREAD)
 			continue;
 
-		p->se.exec_start		= 0;
-#ifdef CONFIG_SCHEDSTATS
-		p->se.statistics.wait_start	= 0;
-		p->se.statistics.sleep_start	= 0;
-		p->se.statistics.block_start	= 0;
-#endif
+		p->se.exec_start = 0;
+		schedstat_set(p->se.statistics.wait_start,  0);
+		schedstat_set(p->se.statistics.sleep_start, 0);
+		schedstat_set(p->se.statistics.block_start, 0);
 
 		if (!dl_task(p) && !rt_task(p)) {
 			/*
@@ -7669,7 +7834,7 @@
  *
  * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
  */
-void set_curr_task(int cpu, struct task_struct *p)
+void ia64_set_curr_task(int cpu, struct task_struct *p)
 {
 	cpu_curr(cpu) = p;
 }
@@ -7800,10 +7965,10 @@
 
 	sched_change_group(tsk, TASK_MOVE_GROUP);
 
-	if (unlikely(running))
-		tsk->sched_class->set_curr_task(rq);
 	if (queued)
 		enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
+	if (unlikely(running))
+		set_curr_task(rq, tsk);
 
 	task_rq_unlock(rq, tsk, &rf);
 }
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5be5882..e731190 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -31,56 +31,81 @@
 	return (i << 1) + 2;
 }
 
-static void cpudl_exchange(struct cpudl *cp, int a, int b)
+static void cpudl_heapify_down(struct cpudl *cp, int idx)
 {
-	int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
+	int l, r, largest;
 
-	swap(cp->elements[a].cpu, cp->elements[b].cpu);
-	swap(cp->elements[a].dl , cp->elements[b].dl );
+	int orig_cpu = cp->elements[idx].cpu;
+	u64 orig_dl = cp->elements[idx].dl;
 
-	swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx);
+	if (left_child(idx) >= cp->size)
+		return;
+
+	/* adapted from lib/prio_heap.c */
+	while(1) {
+		u64 largest_dl;
+		l = left_child(idx);
+		r = right_child(idx);
+		largest = idx;
+		largest_dl = orig_dl;
+
+		if ((l < cp->size) && dl_time_before(orig_dl,
+						cp->elements[l].dl)) {
+			largest = l;
+			largest_dl = cp->elements[l].dl;
+		}
+		if ((r < cp->size) && dl_time_before(largest_dl,
+						cp->elements[r].dl))
+			largest = r;
+
+		if (largest == idx)
+			break;
+
+		/* pull largest child onto idx */
+		cp->elements[idx].cpu = cp->elements[largest].cpu;
+		cp->elements[idx].dl = cp->elements[largest].dl;
+		cp->elements[cp->elements[idx].cpu].idx = idx;
+		idx = largest;
+	}
+	/* actual push down of saved original values orig_* */
+	cp->elements[idx].cpu = orig_cpu;
+	cp->elements[idx].dl = orig_dl;
+	cp->elements[cp->elements[idx].cpu].idx = idx;
+}
+
+static void cpudl_heapify_up(struct cpudl *cp, int idx)
+{
+	int p;
+
+	int orig_cpu = cp->elements[idx].cpu;
+	u64 orig_dl = cp->elements[idx].dl;
+
+	if (idx == 0)
+		return;
+
+	do {
+		p = parent(idx);
+		if (dl_time_before(orig_dl, cp->elements[p].dl))
+			break;
+		/* pull parent onto idx */
+		cp->elements[idx].cpu = cp->elements[p].cpu;
+		cp->elements[idx].dl = cp->elements[p].dl;
+		cp->elements[cp->elements[idx].cpu].idx = idx;
+		idx = p;
+	} while (idx != 0);
+	/* actual push up of saved original values orig_* */
+	cp->elements[idx].cpu = orig_cpu;
+	cp->elements[idx].dl = orig_dl;
+	cp->elements[cp->elements[idx].cpu].idx = idx;
 }
 
 static void cpudl_heapify(struct cpudl *cp, int idx)
 {
-	int l, r, largest;
-
-	/* adapted from lib/prio_heap.c */
-	while(1) {
-		l = left_child(idx);
-		r = right_child(idx);
-		largest = idx;
-
-		if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
-							cp->elements[l].dl))
-			largest = l;
-		if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
-							cp->elements[r].dl))
-			largest = r;
-		if (largest == idx)
-			break;
-
-		/* Push idx down the heap one level and bump one up */
-		cpudl_exchange(cp, largest, idx);
-		idx = largest;
-	}
-}
-
-static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)
-{
-	WARN_ON(idx == IDX_INVALID || !cpu_present(idx));
-
-	if (dl_time_before(new_dl, cp->elements[idx].dl)) {
-		cp->elements[idx].dl = new_dl;
-		cpudl_heapify(cp, idx);
-	} else {
-		cp->elements[idx].dl = new_dl;
-		while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
-					cp->elements[idx].dl)) {
-			cpudl_exchange(cp, idx, parent(idx));
-			idx = parent(idx);
-		}
-	}
+	if (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
+				cp->elements[idx].dl))
+		cpudl_heapify_up(cp, idx);
+	else
+		cpudl_heapify_down(cp, idx);
 }
 
 static inline int cpudl_maximum(struct cpudl *cp)
@@ -120,6 +145,45 @@
 }
 
 /*
+ * cpudl_clear - remove a cpu from the cpudl max-heap
+ * @cp: the cpudl max-heap context
+ * @cpu: the target cpu
+ *
+ * Notes: assumes cpu_rq(cpu)->lock is locked
+ *
+ * Returns: (void)
+ */
+void cpudl_clear(struct cpudl *cp, int cpu)
+{
+	int old_idx, new_cpu;
+	unsigned long flags;
+
+	WARN_ON(!cpu_present(cpu));
+
+	raw_spin_lock_irqsave(&cp->lock, flags);
+
+	old_idx = cp->elements[cpu].idx;
+	if (old_idx == IDX_INVALID) {
+		/*
+		 * Nothing to remove if old_idx was invalid.
+		 * This could happen if a rq_offline_dl is
+		 * called for a CPU without -dl tasks running.
+		 */
+	} else {
+		new_cpu = cp->elements[cp->size - 1].cpu;
+		cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
+		cp->elements[old_idx].cpu = new_cpu;
+		cp->size--;
+		cp->elements[new_cpu].idx = old_idx;
+		cp->elements[cpu].idx = IDX_INVALID;
+		cpudl_heapify(cp, old_idx);
+
+		cpumask_set_cpu(cpu, cp->free_cpus);
+	}
+	raw_spin_unlock_irqrestore(&cp->lock, flags);
+}
+
+/*
  * cpudl_set - update the cpudl max-heap
  * @cp: the cpudl max-heap context
  * @cpu: the target cpu
@@ -129,55 +193,28 @@
  *
  * Returns: (void)
  */
-void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
+void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
 {
-	int old_idx, new_cpu;
+	int old_idx;
 	unsigned long flags;
 
 	WARN_ON(!cpu_present(cpu));
 
 	raw_spin_lock_irqsave(&cp->lock, flags);
+
 	old_idx = cp->elements[cpu].idx;
-	if (!is_valid) {
-		/* remove item */
-		if (old_idx == IDX_INVALID) {
-			/*
-			 * Nothing to remove if old_idx was invalid.
-			 * This could happen if a rq_offline_dl is
-			 * called for a CPU without -dl tasks running.
-			 */
-			goto out;
-		}
-		new_cpu = cp->elements[cp->size - 1].cpu;
-		cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
-		cp->elements[old_idx].cpu = new_cpu;
-		cp->size--;
-		cp->elements[new_cpu].idx = old_idx;
-		cp->elements[cpu].idx = IDX_INVALID;
-		while (old_idx > 0 && dl_time_before(
-				cp->elements[parent(old_idx)].dl,
-				cp->elements[old_idx].dl)) {
-			cpudl_exchange(cp, old_idx, parent(old_idx));
-			old_idx = parent(old_idx);
-		}
-		cpumask_set_cpu(cpu, cp->free_cpus);
-                cpudl_heapify(cp, old_idx);
-
-		goto out;
-	}
-
 	if (old_idx == IDX_INVALID) {
-		cp->size++;
-		cp->elements[cp->size - 1].dl = 0;
-		cp->elements[cp->size - 1].cpu = cpu;
-		cp->elements[cpu].idx = cp->size - 1;
-		cpudl_change_key(cp, cp->size - 1, dl);
+		int new_idx = cp->size++;
+		cp->elements[new_idx].dl = dl;
+		cp->elements[new_idx].cpu = cpu;
+		cp->elements[cpu].idx = new_idx;
+		cpudl_heapify_up(cp, new_idx);
 		cpumask_clear_cpu(cpu, cp->free_cpus);
 	} else {
-		cpudl_change_key(cp, old_idx, dl);
+		cp->elements[old_idx].dl = dl;
+		cpudl_heapify(cp, old_idx);
 	}
 
-out:
 	raw_spin_unlock_irqrestore(&cp->lock, flags);
 }
 
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
index fcbdf83..f7da8c5 100644
--- a/kernel/sched/cpudeadline.h
+++ b/kernel/sched/cpudeadline.h
@@ -23,7 +23,8 @@
 #ifdef CONFIG_SMP
 int cpudl_find(struct cpudl *cp, struct task_struct *p,
 	       struct cpumask *later_mask);
-void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
+void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
+void cpudl_clear(struct cpudl *cp, int cpu);
 int cpudl_init(struct cpudl *cp);
 void cpudl_set_freecpu(struct cpudl *cp, int cpu);
 void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 1141954..dbc5144 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -33,7 +33,7 @@
  */
 void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
 			void (*func)(struct update_util_data *data, u64 time,
-				     unsigned long util, unsigned long max))
+				     unsigned int flags))
 {
 	if (WARN_ON(!data || !func))
 		return;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a84641b..69e0689 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -12,7 +12,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/cpufreq.h>
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <trace/events/power.h>
 
@@ -48,11 +47,14 @@
 	struct sugov_policy *sg_policy;
 
 	unsigned int cached_raw_freq;
+	unsigned long iowait_boost;
+	unsigned long iowait_boost_max;
+	u64 last_update;
 
 	/* The fields below are only needed when sharing a policy. */
 	unsigned long util;
 	unsigned long max;
-	u64 last_update;
+	unsigned int flags;
 };
 
 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -144,24 +146,75 @@
 	return cpufreq_driver_resolve_freq(policy, freq);
 }
 
+static void sugov_get_util(unsigned long *util, unsigned long *max)
+{
+	struct rq *rq = this_rq();
+	unsigned long cfs_max;
+
+	cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
+
+	*util = min(rq->cfs.avg.util_avg, cfs_max);
+	*max = cfs_max;
+}
+
+static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+				   unsigned int flags)
+{
+	if (flags & SCHED_CPUFREQ_IOWAIT) {
+		sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+	} else if (sg_cpu->iowait_boost) {
+		s64 delta_ns = time - sg_cpu->last_update;
+
+		/* Clear iowait_boost if the CPU apprears to have been idle. */
+		if (delta_ns > TICK_NSEC)
+			sg_cpu->iowait_boost = 0;
+	}
+}
+
+static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
+			       unsigned long *max)
+{
+	unsigned long boost_util = sg_cpu->iowait_boost;
+	unsigned long boost_max = sg_cpu->iowait_boost_max;
+
+	if (!boost_util)
+		return;
+
+	if (*util * boost_max < *max * boost_util) {
+		*util = boost_util;
+		*max = boost_max;
+	}
+	sg_cpu->iowait_boost >>= 1;
+}
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
-				unsigned long util, unsigned long max)
+				unsigned int flags)
 {
 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
+	unsigned long util, max;
 	unsigned int next_f;
 
+	sugov_set_iowait_boost(sg_cpu, time, flags);
+	sg_cpu->last_update = time;
+
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
-	next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
-			get_next_freq(sg_cpu, util, max);
+	if (flags & SCHED_CPUFREQ_RT_DL) {
+		next_f = policy->cpuinfo.max_freq;
+	} else {
+		sugov_get_util(&util, &max);
+		sugov_iowait_boost(sg_cpu, &util, &max);
+		next_f = get_next_freq(sg_cpu, util, max);
+	}
 	sugov_update_commit(sg_policy, time, next_f);
 }
 
 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
-					   unsigned long util, unsigned long max)
+					   unsigned long util, unsigned long max,
+					   unsigned int flags)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
@@ -169,9 +222,11 @@
 	u64 last_freq_update_time = sg_policy->last_freq_update_time;
 	unsigned int j;
 
-	if (util == ULONG_MAX)
+	if (flags & SCHED_CPUFREQ_RT_DL)
 		return max_f;
 
+	sugov_iowait_boost(sg_cpu, &util, &max);
+
 	for_each_cpu(j, policy->cpus) {
 		struct sugov_cpu *j_sg_cpu;
 		unsigned long j_util, j_max;
@@ -186,41 +241,50 @@
 		 * frequency update and the time elapsed between the last update
 		 * of the CPU utilization and the last frequency update is long
 		 * enough, don't take the CPU into account as it probably is
-		 * idle now.
+		 * idle now (and clear iowait_boost for it).
 		 */
 		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
-		if (delta_ns > TICK_NSEC)
+		if (delta_ns > TICK_NSEC) {
+			j_sg_cpu->iowait_boost = 0;
 			continue;
-
-		j_util = j_sg_cpu->util;
-		if (j_util == ULONG_MAX)
+		}
+		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
 			return max_f;
 
+		j_util = j_sg_cpu->util;
 		j_max = j_sg_cpu->max;
 		if (j_util * max > j_max * util) {
 			util = j_util;
 			max = j_max;
 		}
+
+		sugov_iowait_boost(j_sg_cpu, &util, &max);
 	}
 
 	return get_next_freq(sg_cpu, util, max);
 }
 
 static void sugov_update_shared(struct update_util_data *hook, u64 time,
-				unsigned long util, unsigned long max)
+				unsigned int flags)
 {
 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+	unsigned long util, max;
 	unsigned int next_f;
 
+	sugov_get_util(&util, &max);
+
 	raw_spin_lock(&sg_policy->update_lock);
 
 	sg_cpu->util = util;
 	sg_cpu->max = max;
+	sg_cpu->flags = flags;
+
+	sugov_set_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_cpu, util, max);
+		next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
 		sugov_update_commit(sg_policy, time, next_f);
 	}
 
@@ -444,10 +508,13 @@
 
 		sg_cpu->sg_policy = sg_policy;
 		if (policy_is_shared(policy)) {
-			sg_cpu->util = ULONG_MAX;
+			sg_cpu->util = 0;
 			sg_cpu->max = 0;
+			sg_cpu->flags = SCHED_CPUFREQ_RT;
 			sg_cpu->last_update = 0;
 			sg_cpu->cached_raw_freq = 0;
+			sg_cpu->iowait_boost = 0;
+			sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
 			cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
 						     sugov_update_shared);
 		} else {
@@ -495,28 +562,15 @@
 	.limits = sugov_limits,
 };
 
-static int __init sugov_module_init(void)
-{
-	return cpufreq_register_governor(&schedutil_gov);
-}
-
-static void __exit sugov_module_exit(void)
-{
-	cpufreq_unregister_governor(&schedutil_gov);
-}
-
-MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
-MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
-MODULE_LICENSE("GPL");
-
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
 struct cpufreq_governor *cpufreq_default_governor(void)
 {
 	return &schedutil_gov;
 }
-
-fs_initcall(sugov_module_init);
-#else
-module_init(sugov_module_init);
 #endif
-module_exit(sugov_module_exit);
+
+static int __init sugov_register(void)
+{
+	return cpufreq_register_governor(&schedutil_gov);
+}
+fs_initcall(sugov_register);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1934f65..5ebee31 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -23,10 +23,8 @@
  * task when irq is in progress while we read rq->clock. That is a worthy
  * compromise in place of having locks on each irq in account_system_time.
  */
-DEFINE_PER_CPU(u64, cpu_hardirq_time);
-DEFINE_PER_CPU(u64, cpu_softirq_time);
+DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
 
-static DEFINE_PER_CPU(u64, irq_start_time);
 static int sched_clock_irqtime;
 
 void enable_sched_clock_irqtime(void)
@@ -39,16 +37,13 @@
 	sched_clock_irqtime = 0;
 }
 
-#ifndef CONFIG_64BIT
-DEFINE_PER_CPU(seqcount_t, irq_time_seq);
-#endif /* CONFIG_64BIT */
-
 /*
  * Called before incrementing preempt_count on {soft,}irq_enter
  * and before decrementing preempt_count on {soft,}irq_exit.
  */
 void irqtime_account_irq(struct task_struct *curr)
 {
+	struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
 	s64 delta;
 	int cpu;
 
@@ -56,10 +51,10 @@
 		return;
 
 	cpu = smp_processor_id();
-	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
-	__this_cpu_add(irq_start_time, delta);
+	delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
+	irqtime->irq_start_time += delta;
 
-	irq_time_write_begin();
+	u64_stats_update_begin(&irqtime->sync);
 	/*
 	 * We do not account for softirq time from ksoftirqd here.
 	 * We want to continue accounting softirq time to ksoftirqd thread
@@ -67,42 +62,36 @@
 	 * that do not consume any time, but still wants to run.
 	 */
 	if (hardirq_count())
-		__this_cpu_add(cpu_hardirq_time, delta);
+		irqtime->hardirq_time += delta;
 	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
-		__this_cpu_add(cpu_softirq_time, delta);
+		irqtime->softirq_time += delta;
 
-	irq_time_write_end();
+	u64_stats_update_end(&irqtime->sync);
 }
 EXPORT_SYMBOL_GPL(irqtime_account_irq);
 
-static cputime_t irqtime_account_hi_update(cputime_t maxtime)
+static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime)
 {
 	u64 *cpustat = kcpustat_this_cpu->cpustat;
-	unsigned long flags;
 	cputime_t irq_cputime;
 
-	local_irq_save(flags);
-	irq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_hardirq_time)) -
-		      cpustat[CPUTIME_IRQ];
+	irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx];
 	irq_cputime = min(irq_cputime, maxtime);
-	cpustat[CPUTIME_IRQ] += irq_cputime;
-	local_irq_restore(flags);
+	cpustat[idx] += irq_cputime;
+
 	return irq_cputime;
 }
 
+static cputime_t irqtime_account_hi_update(cputime_t maxtime)
+{
+	return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time),
+				      CPUTIME_IRQ, maxtime);
+}
+
 static cputime_t irqtime_account_si_update(cputime_t maxtime)
 {
-	u64 *cpustat = kcpustat_this_cpu->cpustat;
-	unsigned long flags;
-	cputime_t softirq_cputime;
-
-	local_irq_save(flags);
-	softirq_cputime = nsecs_to_cputime64(this_cpu_read(cpu_softirq_time)) -
-			  cpustat[CPUTIME_SOFTIRQ];
-	softirq_cputime = min(softirq_cputime, maxtime);
-	cpustat[CPUTIME_SOFTIRQ] += softirq_cputime;
-	local_irq_restore(flags);
-	return softirq_cputime;
+	return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time),
+				      CPUTIME_SOFTIRQ, maxtime);
 }
 
 #else /* CONFIG_IRQ_TIME_ACCOUNTING */
@@ -263,6 +252,11 @@
 		cpustat[CPUTIME_IDLE] += (__force u64) cputime;
 }
 
+/*
+ * When a guest is interrupted for a longer amount of time, missed clock
+ * ticks are not redelivered later. Due to that, this function may on
+ * occasion account more time than the calling functions think elapsed.
+ */
 static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
 {
 #ifdef CONFIG_PARAVIRT
@@ -290,6 +284,9 @@
 {
 	cputime_t accounted;
 
+	/* Shall be converted to a lockdep-enabled lightweight check */
+	WARN_ON_ONCE(!irqs_disabled());
+
 	accounted = steal_account_process_time(max);
 
 	if (accounted < max)
@@ -301,6 +298,26 @@
 	return accounted;
 }
 
+#ifdef CONFIG_64BIT
+static inline u64 read_sum_exec_runtime(struct task_struct *t)
+{
+	return t->se.sum_exec_runtime;
+}
+#else
+static u64 read_sum_exec_runtime(struct task_struct *t)
+{
+	u64 ns;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(t, &rf);
+	ns = t->se.sum_exec_runtime;
+	task_rq_unlock(rq, t, &rf);
+
+	return ns;
+}
+#endif
+
 /*
  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  * tasks (sum on group iteration) belonging to @tsk's group.
@@ -313,6 +330,17 @@
 	unsigned int seq, nextseq;
 	unsigned long flags;
 
+	/*
+	 * Update current task runtime to account pending time since last
+	 * scheduler action or thread_group_cputime() call. This thread group
+	 * might have other running tasks on different CPUs, but updating
+	 * their runtime can affect syscall performance, so we skip account
+	 * those pending times and rely only on values updated on tick or
+	 * other scheduler action.
+	 */
+	if (same_thread_group(current, tsk))
+		(void) task_sched_runtime(current);
+
 	rcu_read_lock();
 	/* Attempt a lockless read on the first round. */
 	nextseq = 0;
@@ -327,7 +355,7 @@
 			task_cputime(t, &utime, &stime);
 			times->utime += utime;
 			times->stime += stime;
-			times->sum_exec_runtime += task_sched_runtime(t);
+			times->sum_exec_runtime += read_sum_exec_runtime(t);
 		}
 		/* If lockless access failed, take the lock. */
 		nextseq = 1;
@@ -371,7 +399,7 @@
 	 * idle, or potentially user or system time. Due to rounding,
 	 * other time can exceed ticks occasionally.
 	 */
-	other = account_other_time(cputime);
+	other = account_other_time(ULONG_MAX);
 	if (other >= cputime)
 		return;
 	cputime -= other;
@@ -486,7 +514,7 @@
 	}
 
 	cputime = cputime_one_jiffy;
-	steal = steal_account_process_time(cputime);
+	steal = steal_account_process_time(ULONG_MAX);
 
 	if (steal >= cputime)
 		return;
@@ -508,13 +536,21 @@
  */
 void account_idle_ticks(unsigned long ticks)
 {
+	cputime_t cputime, steal;
 
 	if (sched_clock_irqtime) {
 		irqtime_account_idle_ticks(ticks);
 		return;
 	}
 
-	account_idle_time(jiffies_to_cputime(ticks));
+	cputime = jiffies_to_cputime(ticks);
+	steal = steal_account_process_time(ULONG_MAX);
+
+	if (steal >= cputime)
+		return;
+
+	cputime -= steal;
+	account_idle_time(cputime);
 }
 
 /*
@@ -606,19 +642,25 @@
 	stime = curr->stime;
 	utime = curr->utime;
 
-	if (utime == 0) {
-		stime = rtime;
+	/*
+	 * If either stime or both stime and utime are 0, assume all runtime is
+	 * userspace. Once a task gets some ticks, the monotonicy code at
+	 * 'update' will ensure things converge to the observed ratio.
+	 */
+	if (stime == 0) {
+		utime = rtime;
 		goto update;
 	}
 
-	if (stime == 0) {
-		utime = rtime;
+	if (utime == 0) {
+		stime = rtime;
 		goto update;
 	}
 
 	stime = scale_stime((__force u64)stime, (__force u64)rtime,
 			    (__force u64)(stime + utime));
 
+update:
 	/*
 	 * Make sure stime doesn't go backwards; this preserves monotonicity
 	 * for utime because rtime is monotonic.
@@ -641,7 +683,6 @@
 		stime = rtime - utime;
 	}
 
-update:
 	prev->stime = stime;
 	prev->utime = utime;
 out:
@@ -686,6 +727,13 @@
 	unsigned long now = READ_ONCE(jiffies);
 	cputime_t delta, other;
 
+	/*
+	 * Unlike tick based timing, vtime based timing never has lost
+	 * ticks, and no need for steal time accounting to make up for
+	 * lost ticks. Vtime accounts a rounded version of actual
+	 * elapsed time. Limit account_other_time to prevent rounding
+	 * errors from causing elapsed vtime to go negative.
+	 */
 	delta = jiffies_to_cputime(now - tsk->vtime_snap);
 	other = account_other_time(delta);
 	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fcb7f02..37e2449 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -243,10 +243,8 @@
 static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p)
 {
 	struct rq *later_rq = NULL;
-	bool fallback = false;
 
 	later_rq = find_lock_later_rq(p, rq);
-
 	if (!later_rq) {
 		int cpu;
 
@@ -254,7 +252,6 @@
 		 * If we cannot preempt any rq, fall back to pick any
 		 * online cpu.
 		 */
-		fallback = true;
 		cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
 		if (cpu >= nr_cpu_ids) {
 			/*
@@ -274,16 +271,7 @@
 		double_lock_balance(rq, later_rq);
 	}
 
-	/*
-	 * By now the task is replenished and enqueued; migrate it.
-	 */
-	deactivate_task(rq, p, 0);
 	set_task_cpu(p, later_rq->cpu);
-	activate_task(later_rq, p, 0);
-
-	if (!fallback)
-		resched_curr(later_rq);
-
 	double_unlock_balance(later_rq, rq);
 
 	return later_rq;
@@ -346,12 +334,12 @@
  * one, and to (try to!) reconcile itself with its own scheduling
  * parameters.
  */
-static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
-				       struct sched_dl_entity *pi_se)
+static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
 {
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
+	WARN_ON(dl_se->dl_boosted);
 	WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
 
 	/*
@@ -367,8 +355,8 @@
 	 * future; in fact, we must consider execution overheads (time
 	 * spent on hardirq context, etc.).
 	 */
-	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
-	dl_se->runtime = pi_se->dl_runtime;
+	dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
+	dl_se->runtime = dl_se->dl_runtime;
 }
 
 /*
@@ -641,6 +629,24 @@
 		goto unlock;
 	}
 
+#ifdef CONFIG_SMP
+	if (unlikely(!rq->online)) {
+		/*
+		 * If the runqueue is no longer available, migrate the
+		 * task elsewhere. This necessarily changes rq.
+		 */
+		lockdep_unpin_lock(&rq->lock, rf.cookie);
+		rq = dl_task_offline_migration(rq, p);
+		rf.cookie = lockdep_pin_lock(&rq->lock);
+
+		/*
+		 * Now that the task has been migrated to the new RQ and we
+		 * have that locked, proceed as normal and enqueue the task
+		 * there.
+		 */
+	}
+#endif
+
 	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
 	if (dl_task(rq->curr))
 		check_preempt_curr_dl(rq, p, 0);
@@ -649,19 +655,6 @@
 
 #ifdef CONFIG_SMP
 	/*
-	 * Perform balancing operations here; after the replenishments.  We
-	 * cannot drop rq->lock before this, otherwise the assertion in
-	 * start_dl_timer() about not missing updates is not true.
-	 *
-	 * If we find that the rq the task was on is no longer available, we
-	 * need to select a new rq.
-	 *
-	 * XXX figure out if select_task_rq_dl() deals with offline cpus.
-	 */
-	if (unlikely(!rq->online))
-		rq = dl_task_offline_migration(rq, p);
-
-	/*
 	 * Queueing this task back might have overloaded rq, check if we need
 	 * to kick someone away.
 	 */
@@ -732,9 +725,8 @@
 		return;
 	}
 
-	/* kick cpufreq (see the comment in linux/cpufreq.h). */
-	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_trigger_update(rq_clock(rq));
+	/* kick cpufreq (see the comment in kernel/sched/sched.h). */
+	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
@@ -795,7 +787,7 @@
 	if (dl_rq->earliest_dl.curr == 0 ||
 	    dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
 		dl_rq->earliest_dl.curr = deadline;
-		cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
+		cpudl_set(&rq->rd->cpudl, rq->cpu, deadline);
 	}
 }
 
@@ -810,14 +802,14 @@
 	if (!dl_rq->dl_nr_running) {
 		dl_rq->earliest_dl.curr = 0;
 		dl_rq->earliest_dl.next = 0;
-		cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
+		cpudl_clear(&rq->rd->cpudl, rq->cpu);
 	} else {
 		struct rb_node *leftmost = dl_rq->rb_leftmost;
 		struct sched_dl_entity *entry;
 
 		entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
 		dl_rq->earliest_dl.curr = entry->deadline;
-		cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
+		cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline);
 	}
 }
 
@@ -1668,7 +1660,7 @@
 
 	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
 	if (rq->dl.dl_nr_running > 0)
-		cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
+		cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr);
 }
 
 /* Assumes rq->lock is held */
@@ -1677,7 +1669,7 @@
 	if (rq->dl.overloaded)
 		dl_clear_overload(rq);
 
-	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
+	cpudl_clear(&rq->rd->cpudl, rq->cpu);
 	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
 }
 
@@ -1720,10 +1712,20 @@
  */
 static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
-	if (dl_time_before(p->dl.deadline, rq_clock(rq)))
-		setup_new_dl_entity(&p->dl, &p->dl);
 
-	if (task_on_rq_queued(p) && rq->curr != p) {
+	/* If p is not queued we will update its parameters at next wakeup. */
+	if (!task_on_rq_queued(p))
+		return;
+
+	/*
+	 * If p is boosted we already updated its params in
+	 * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH),
+	 * p's deadline being now already after rq_clock(rq).
+	 */
+	if (dl_time_before(p->dl.deadline, rq_clock(rq)))
+		setup_new_dl_entity(&p->dl);
+
+	if (rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (tsk_nr_cpus_allowed(p) > 1 && rq->dl.overloaded)
 			queue_push_tasks(rq);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 2a0a999..1393588 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -369,8 +369,12 @@
 
 #define P(F) \
 	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
+#define P_SCHEDSTAT(F) \
+	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)schedstat_val(F))
 #define PN(F) \
 	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
+#define PN_SCHEDSTAT(F) \
+	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
 
 	if (!se)
 		return;
@@ -378,26 +382,27 @@
 	PN(se->exec_start);
 	PN(se->vruntime);
 	PN(se->sum_exec_runtime);
-#ifdef CONFIG_SCHEDSTATS
 	if (schedstat_enabled()) {
-		PN(se->statistics.wait_start);
-		PN(se->statistics.sleep_start);
-		PN(se->statistics.block_start);
-		PN(se->statistics.sleep_max);
-		PN(se->statistics.block_max);
-		PN(se->statistics.exec_max);
-		PN(se->statistics.slice_max);
-		PN(se->statistics.wait_max);
-		PN(se->statistics.wait_sum);
-		P(se->statistics.wait_count);
+		PN_SCHEDSTAT(se->statistics.wait_start);
+		PN_SCHEDSTAT(se->statistics.sleep_start);
+		PN_SCHEDSTAT(se->statistics.block_start);
+		PN_SCHEDSTAT(se->statistics.sleep_max);
+		PN_SCHEDSTAT(se->statistics.block_max);
+		PN_SCHEDSTAT(se->statistics.exec_max);
+		PN_SCHEDSTAT(se->statistics.slice_max);
+		PN_SCHEDSTAT(se->statistics.wait_max);
+		PN_SCHEDSTAT(se->statistics.wait_sum);
+		P_SCHEDSTAT(se->statistics.wait_count);
 	}
-#endif
 	P(se->load.weight);
 #ifdef CONFIG_SMP
 	P(se->avg.load_avg);
 	P(se->avg.util_avg);
 #endif
+
+#undef PN_SCHEDSTAT
 #undef PN
+#undef P_SCHEDSTAT
 #undef P
 }
 #endif
@@ -429,9 +434,9 @@
 		p->prio);
 
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		SPLIT_NS(schedstat_val(p, se.statistics.wait_sum)),
+		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(schedstat_val(p, se.statistics.sum_sleep_runtime)));
+		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
 
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@ -626,9 +631,7 @@
 #undef P64
 #endif
 
-#ifdef CONFIG_SCHEDSTATS
-#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
-
+#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, schedstat_val(rq->n));
 	if (schedstat_enabled()) {
 		P(yld_count);
 		P(sched_count);
@@ -636,9 +639,8 @@
 		P(ttwu_count);
 		P(ttwu_local);
 	}
-
 #undef P
-#endif
+
 	spin_lock_irqsave(&sched_debug_lock, flags);
 	print_cfs_stats(m, cpu);
 	print_rt_stats(m, cpu);
@@ -868,10 +870,14 @@
 	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
 #define P(F) \
 	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
+#define P_SCHEDSTAT(F) \
+	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
 #define __PN(F) \
 	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
 #define PN(F) \
 	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
+#define PN_SCHEDSTAT(F) \
+	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
 
 	PN(se.exec_start);
 	PN(se.vruntime);
@@ -881,37 +887,36 @@
 
 	P(se.nr_migrations);
 
-#ifdef CONFIG_SCHEDSTATS
 	if (schedstat_enabled()) {
 		u64 avg_atom, avg_per_cpu;
 
-		PN(se.statistics.sum_sleep_runtime);
-		PN(se.statistics.wait_start);
-		PN(se.statistics.sleep_start);
-		PN(se.statistics.block_start);
-		PN(se.statistics.sleep_max);
-		PN(se.statistics.block_max);
-		PN(se.statistics.exec_max);
-		PN(se.statistics.slice_max);
-		PN(se.statistics.wait_max);
-		PN(se.statistics.wait_sum);
-		P(se.statistics.wait_count);
-		PN(se.statistics.iowait_sum);
-		P(se.statistics.iowait_count);
-		P(se.statistics.nr_migrations_cold);
-		P(se.statistics.nr_failed_migrations_affine);
-		P(se.statistics.nr_failed_migrations_running);
-		P(se.statistics.nr_failed_migrations_hot);
-		P(se.statistics.nr_forced_migrations);
-		P(se.statistics.nr_wakeups);
-		P(se.statistics.nr_wakeups_sync);
-		P(se.statistics.nr_wakeups_migrate);
-		P(se.statistics.nr_wakeups_local);
-		P(se.statistics.nr_wakeups_remote);
-		P(se.statistics.nr_wakeups_affine);
-		P(se.statistics.nr_wakeups_affine_attempts);
-		P(se.statistics.nr_wakeups_passive);
-		P(se.statistics.nr_wakeups_idle);
+		PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
+		PN_SCHEDSTAT(se.statistics.wait_start);
+		PN_SCHEDSTAT(se.statistics.sleep_start);
+		PN_SCHEDSTAT(se.statistics.block_start);
+		PN_SCHEDSTAT(se.statistics.sleep_max);
+		PN_SCHEDSTAT(se.statistics.block_max);
+		PN_SCHEDSTAT(se.statistics.exec_max);
+		PN_SCHEDSTAT(se.statistics.slice_max);
+		PN_SCHEDSTAT(se.statistics.wait_max);
+		PN_SCHEDSTAT(se.statistics.wait_sum);
+		P_SCHEDSTAT(se.statistics.wait_count);
+		PN_SCHEDSTAT(se.statistics.iowait_sum);
+		P_SCHEDSTAT(se.statistics.iowait_count);
+		P_SCHEDSTAT(se.statistics.nr_migrations_cold);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
+		P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
+		P_SCHEDSTAT(se.statistics.nr_forced_migrations);
+		P_SCHEDSTAT(se.statistics.nr_wakeups);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_local);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
+		P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
 
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
@@ -930,7 +935,7 @@
 		__PN(avg_atom);
 		__PN(avg_per_cpu);
 	}
-#endif
+
 	__P(nr_switches);
 	SEQ_printf(m, "%-45s:%21Ld\n",
 		   "nr_voluntary_switches", (long long)p->nvcsw);
@@ -947,8 +952,10 @@
 #endif
 	P(policy);
 	P(prio);
+#undef PN_SCHEDSTAT
 #undef PN
 #undef __PN
+#undef P_SCHEDSTAT
 #undef P
 #undef __P
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4088eed..502e95a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -114,6 +114,12 @@
 unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
 #endif
 
+/*
+ * The margin used when comparing utilization with CPU capacity:
+ * util * 1024 < capacity * margin
+ */
+unsigned int capacity_margin = 1280; /* ~20% */
+
 static inline void update_load_add(struct load_weight *lw, unsigned long inc)
 {
 	lw->weight += inc;
@@ -256,9 +262,7 @@
 
 static inline struct task_struct *task_of(struct sched_entity *se)
 {
-#ifdef CONFIG_SCHED_DEBUG
-	WARN_ON_ONCE(!entity_is_task(se));
-#endif
+	SCHED_WARN_ON(!entity_is_task(se));
 	return container_of(se, struct task_struct, se);
 }
 
@@ -456,17 +460,23 @@
 
 static void update_min_vruntime(struct cfs_rq *cfs_rq)
 {
+	struct sched_entity *curr = cfs_rq->curr;
+
 	u64 vruntime = cfs_rq->min_vruntime;
 
-	if (cfs_rq->curr)
-		vruntime = cfs_rq->curr->vruntime;
+	if (curr) {
+		if (curr->on_rq)
+			vruntime = curr->vruntime;
+		else
+			curr = NULL;
+	}
 
 	if (cfs_rq->rb_leftmost) {
 		struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
 						   struct sched_entity,
 						   run_node);
 
-		if (!cfs_rq->curr)
+		if (!curr)
 			vruntime = se->vruntime;
 		else
 			vruntime = min_vruntime(vruntime, se->vruntime);
@@ -656,7 +666,7 @@
 }
 
 #ifdef CONFIG_SMP
-static int select_idle_sibling(struct task_struct *p, int cpu);
+static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
 static unsigned long task_h_load(struct task_struct *p);
 
 /*
@@ -726,7 +736,6 @@
 	struct sched_avg *sa = &se->avg;
 	long cap = (long)(SCHED_CAPACITY_SCALE - cfs_rq->avg.util_avg) / 2;
 	u64 now = cfs_rq_clock_task(cfs_rq);
-	int tg_update;
 
 	if (cap > 0) {
 		if (cfs_rq->avg.util_avg != 0) {
@@ -759,10 +768,9 @@
 		}
 	}
 
-	tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+	update_cfs_rq_load_avg(now, cfs_rq, false);
 	attach_entity_load_avg(cfs_rq, se);
-	if (tg_update)
-		update_tg_load_avg(cfs_rq, false);
+	update_tg_load_avg(cfs_rq, false);
 }
 
 #else /* !CONFIG_SMP */
@@ -799,7 +807,7 @@
 		      max(delta_exec, curr->statistics.exec_max));
 
 	curr->sum_exec_runtime += delta_exec;
-	schedstat_add(cfs_rq, exec_clock, delta_exec);
+	schedstat_add(cfs_rq->exec_clock, delta_exec);
 
 	curr->vruntime += calc_delta_fair(delta_exec, curr);
 	update_min_vruntime(cfs_rq);
@@ -820,26 +828,34 @@
 	update_curr(cfs_rq_of(&rq->curr->se));
 }
 
-#ifdef CONFIG_SCHEDSTATS
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	u64 wait_start = rq_clock(rq_of(cfs_rq));
+	u64 wait_start, prev_wait_start;
+
+	if (!schedstat_enabled())
+		return;
+
+	wait_start = rq_clock(rq_of(cfs_rq));
+	prev_wait_start = schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
-	    likely(wait_start > se->statistics.wait_start))
-		wait_start -= se->statistics.wait_start;
+	    likely(wait_start > prev_wait_start))
+		wait_start -= prev_wait_start;
 
-	se->statistics.wait_start = wait_start;
+	schedstat_set(se->statistics.wait_start, wait_start);
 }
 
-static void
+static inline void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	struct task_struct *p;
 	u64 delta;
 
-	delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
+	if (!schedstat_enabled())
+		return;
+
+	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -849,35 +865,114 @@
 			 * time stamp can be adjusted to accumulate wait time
 			 * prior to migration.
 			 */
-			se->statistics.wait_start = delta;
+			schedstat_set(se->statistics.wait_start, delta);
 			return;
 		}
 		trace_sched_stat_wait(p, delta);
 	}
 
-	se->statistics.wait_max = max(se->statistics.wait_max, delta);
-	se->statistics.wait_count++;
-	se->statistics.wait_sum += delta;
-	se->statistics.wait_start = 0;
+	schedstat_set(se->statistics.wait_max,
+		      max(schedstat_val(se->statistics.wait_max), delta));
+	schedstat_inc(se->statistics.wait_count);
+	schedstat_add(se->statistics.wait_sum, delta);
+	schedstat_set(se->statistics.wait_start, 0);
+}
+
+static inline void
+update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+	struct task_struct *tsk = NULL;
+	u64 sleep_start, block_start;
+
+	if (!schedstat_enabled())
+		return;
+
+	sleep_start = schedstat_val(se->statistics.sleep_start);
+	block_start = schedstat_val(se->statistics.block_start);
+
+	if (entity_is_task(se))
+		tsk = task_of(se);
+
+	if (sleep_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start;
+
+		if ((s64)delta < 0)
+			delta = 0;
+
+		if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
+			schedstat_set(se->statistics.sleep_max, delta);
+
+		schedstat_set(se->statistics.sleep_start, 0);
+		schedstat_add(se->statistics.sum_sleep_runtime, delta);
+
+		if (tsk) {
+			account_scheduler_latency(tsk, delta >> 10, 1);
+			trace_sched_stat_sleep(tsk, delta);
+		}
+	}
+	if (block_start) {
+		u64 delta = rq_clock(rq_of(cfs_rq)) - block_start;
+
+		if ((s64)delta < 0)
+			delta = 0;
+
+		if (unlikely(delta > schedstat_val(se->statistics.block_max)))
+			schedstat_set(se->statistics.block_max, delta);
+
+		schedstat_set(se->statistics.block_start, 0);
+		schedstat_add(se->statistics.sum_sleep_runtime, delta);
+
+		if (tsk) {
+			if (tsk->in_iowait) {
+				schedstat_add(se->statistics.iowait_sum, delta);
+				schedstat_inc(se->statistics.iowait_count);
+				trace_sched_stat_iowait(tsk, delta);
+			}
+
+			trace_sched_stat_blocked(tsk, delta);
+
+			/*
+			 * Blocking time is in units of nanosecs, so shift by
+			 * 20 to get a milliseconds-range estimation of the
+			 * amount of time that the task spent sleeping:
+			 */
+			if (unlikely(prof_on == SLEEP_PROFILING)) {
+				profile_hits(SLEEP_PROFILING,
+						(void *)get_wchan(tsk),
+						delta >> 20);
+			}
+			account_scheduler_latency(tsk, delta >> 10, 0);
+		}
+	}
 }
 
 /*
  * Task is being enqueued - update stats:
  */
 static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
+update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+	if (!schedstat_enabled())
+		return;
+
 	/*
 	 * Are we enqueueing a waiting task? (for current tasks
 	 * a dequeue/enqueue event is a NOP)
 	 */
 	if (se != cfs_rq->curr)
 		update_stats_wait_start(cfs_rq, se);
+
+	if (flags & ENQUEUE_WAKEUP)
+		update_stats_enqueue_sleeper(cfs_rq, se);
 }
 
 static inline void
 update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
+
+	if (!schedstat_enabled())
+		return;
+
 	/*
 	 * Mark the end of the wait period if dequeueing a
 	 * waiting task:
@@ -885,39 +980,17 @@
 	if (se != cfs_rq->curr)
 		update_stats_wait_end(cfs_rq, se);
 
-	if (flags & DEQUEUE_SLEEP) {
-		if (entity_is_task(se)) {
-			struct task_struct *tsk = task_of(se);
+	if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
+		struct task_struct *tsk = task_of(se);
 
-			if (tsk->state & TASK_INTERRUPTIBLE)
-				se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
-			if (tsk->state & TASK_UNINTERRUPTIBLE)
-				se->statistics.block_start = rq_clock(rq_of(cfs_rq));
-		}
+		if (tsk->state & TASK_INTERRUPTIBLE)
+			schedstat_set(se->statistics.sleep_start,
+				      rq_clock(rq_of(cfs_rq)));
+		if (tsk->state & TASK_UNINTERRUPTIBLE)
+			schedstat_set(se->statistics.block_start,
+				      rq_clock(rq_of(cfs_rq)));
 	}
-
 }
-#else
-static inline void
-update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-}
-
-static inline void
-update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
-{
-}
-#endif
 
 /*
  * We are picking a new current task - update its stats:
@@ -1513,8 +1586,16 @@
 	 * One idle CPU per node is evaluated for a task numa move.
 	 * Call select_idle_sibling to maybe find a better one.
 	 */
-	if (!cur)
-		env->dst_cpu = select_idle_sibling(env->p, env->dst_cpu);
+	if (!cur) {
+		/*
+		 * select_idle_siblings() uses an per-cpu cpumask that
+		 * can be used from IRQ context.
+		 */
+		local_irq_disable();
+		env->dst_cpu = select_idle_sibling(env->p, env->src_cpu,
+						   env->dst_cpu);
+		local_irq_enable();
+	}
 
 assign:
 	task_numa_assign(env, cur, imp);
@@ -2292,7 +2373,7 @@
 	unsigned long nr_pte_updates = 0;
 	long pages, virtpages;
 
-	WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
+	SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work));
 
 	work->next = work; /* protect against double add */
 	/*
@@ -2803,9 +2884,21 @@
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-/*
- * Updating tg's load_avg is necessary before update_cfs_share (which is done)
- * and effective_load (which is not done because it is too costly).
+/**
+ * update_tg_load_avg - update the tg's load avg
+ * @cfs_rq: the cfs_rq whose avg changed
+ * @force: update regardless of how small the difference
+ *
+ * This function 'ensures': tg->load_avg := \Sum tg->cfs_rq[]->avg.load.
+ * However, because tg->load_avg is a global value there are performance
+ * considerations.
+ *
+ * In order to avoid having to look at the other cfs_rq's, we use a
+ * differential update where we store the last value we propagated. This in
+ * turn allows skipping updates if the differential is 'small'.
+ *
+ * Updating tg's load_avg is necessary before update_cfs_share() (which is
+ * done) and effective_load() (which is not done because it is too costly).
  */
 static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
 {
@@ -2875,12 +2968,7 @@
 
 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 {
-	struct rq *rq = rq_of(cfs_rq);
-	int cpu = cpu_of(rq);
-
-	if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
-		unsigned long max = rq->cpu_capacity_orig;
-
+	if (&this_rq()->cfs == cfs_rq) {
 		/*
 		 * There are a few boundary cases this might miss but it should
 		 * get called often enough that that should (hopefully) not be
@@ -2897,8 +2985,7 @@
 		 *
 		 * See cpu_util().
 		 */
-		cpufreq_update_util(rq_clock(rq),
-				    min(cfs_rq->avg.util_avg, max), max);
+		cpufreq_update_util(rq_of(cfs_rq), 0);
 	}
 }
 
@@ -2931,10 +3018,10 @@
  *
  * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
  *
- * Returns true if the load decayed or we removed utilization. It is expected
- * that one calls update_tg_load_avg() on this condition, but after you've
- * modified the cfs_rq avg (attach/detach), such that we propagate the new
- * avg up.
+ * Returns true if the load decayed or we removed load.
+ *
+ * Since both these conditions indicate a changed cfs_rq->avg.load we should
+ * call update_tg_load_avg() when this function returns true.
  */
 static inline int
 update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
@@ -3159,10 +3246,7 @@
 
 static inline void update_load_avg(struct sched_entity *se, int not_used)
 {
-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
-	struct rq *rq = rq_of(cfs_rq);
-
-	cpufreq_trigger_update(rq_clock(rq));
+	cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
 }
 
 static inline void
@@ -3183,68 +3267,6 @@
 
 #endif /* CONFIG_SMP */
 
-static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-#ifdef CONFIG_SCHEDSTATS
-	struct task_struct *tsk = NULL;
-
-	if (entity_is_task(se))
-		tsk = task_of(se);
-
-	if (se->statistics.sleep_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
-
-		if ((s64)delta < 0)
-			delta = 0;
-
-		if (unlikely(delta > se->statistics.sleep_max))
-			se->statistics.sleep_max = delta;
-
-		se->statistics.sleep_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
-
-		if (tsk) {
-			account_scheduler_latency(tsk, delta >> 10, 1);
-			trace_sched_stat_sleep(tsk, delta);
-		}
-	}
-	if (se->statistics.block_start) {
-		u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
-
-		if ((s64)delta < 0)
-			delta = 0;
-
-		if (unlikely(delta > se->statistics.block_max))
-			se->statistics.block_max = delta;
-
-		se->statistics.block_start = 0;
-		se->statistics.sum_sleep_runtime += delta;
-
-		if (tsk) {
-			if (tsk->in_iowait) {
-				se->statistics.iowait_sum += delta;
-				se->statistics.iowait_count++;
-				trace_sched_stat_iowait(tsk, delta);
-			}
-
-			trace_sched_stat_blocked(tsk, delta);
-
-			/*
-			 * Blocking time is in units of nanosecs, so shift by
-			 * 20 to get a milliseconds-range estimation of the
-			 * amount of time that the task spent sleeping:
-			 */
-			if (unlikely(prof_on == SLEEP_PROFILING)) {
-				profile_hits(SLEEP_PROFILING,
-						(void *)get_wchan(tsk),
-						delta >> 20);
-			}
-			account_scheduler_latency(tsk, delta >> 10, 0);
-		}
-	}
-#endif
-}
-
 static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 #ifdef CONFIG_SCHED_DEBUG
@@ -3254,7 +3276,7 @@
 		d = -d;
 
 	if (d > 3*sysctl_sched_latency)
-		schedstat_inc(cfs_rq, nr_spread_over);
+		schedstat_inc(cfs_rq->nr_spread_over);
 #endif
 }
 
@@ -3371,17 +3393,12 @@
 	account_entity_enqueue(cfs_rq, se);
 	update_cfs_shares(cfs_rq);
 
-	if (flags & ENQUEUE_WAKEUP) {
+	if (flags & ENQUEUE_WAKEUP)
 		place_entity(cfs_rq, se, 0);
-		if (schedstat_enabled())
-			enqueue_sleeper(cfs_rq, se);
-	}
 
 	check_schedstat_required();
-	if (schedstat_enabled()) {
-		update_stats_enqueue(cfs_rq, se);
-		check_spread(cfs_rq, se);
-	}
+	update_stats_enqueue(cfs_rq, se, flags);
+	check_spread(cfs_rq, se);
 	if (!curr)
 		__enqueue_entity(cfs_rq, se);
 	se->on_rq = 1;
@@ -3448,8 +3465,7 @@
 	update_curr(cfs_rq);
 	dequeue_entity_load_avg(cfs_rq, se);
 
-	if (schedstat_enabled())
-		update_stats_dequeue(cfs_rq, se, flags);
+	update_stats_dequeue(cfs_rq, se, flags);
 
 	clear_buddies(cfs_rq, se);
 
@@ -3459,9 +3475,10 @@
 	account_entity_dequeue(cfs_rq, se);
 
 	/*
-	 * Normalize the entity after updating the min_vruntime because the
-	 * update can refer to the ->curr item and we need to reflect this
-	 * movement in our normalized position.
+	 * Normalize after update_curr(); which will also have moved
+	 * min_vruntime if @se is the one holding it back. But before doing
+	 * update_min_vruntime() again, which will discount @se's position and
+	 * can move min_vruntime forward still more.
 	 */
 	if (!(flags & DEQUEUE_SLEEP))
 		se->vruntime -= cfs_rq->min_vruntime;
@@ -3469,8 +3486,16 @@
 	/* return excess runtime on last dequeue */
 	return_cfs_rq_runtime(cfs_rq);
 
-	update_min_vruntime(cfs_rq);
 	update_cfs_shares(cfs_rq);
+
+	/*
+	 * Now advance min_vruntime if @se was the entity holding it back,
+	 * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
+	 * put back on, and if we advance min_vruntime, we'll be placed back
+	 * further than we started -- ie. we'll be penalized.
+	 */
+	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
+		update_min_vruntime(cfs_rq);
 }
 
 /*
@@ -3523,25 +3548,25 @@
 		 * a CPU. So account for the time it spent waiting on the
 		 * runqueue.
 		 */
-		if (schedstat_enabled())
-			update_stats_wait_end(cfs_rq, se);
+		update_stats_wait_end(cfs_rq, se);
 		__dequeue_entity(cfs_rq, se);
 		update_load_avg(se, 1);
 	}
 
 	update_stats_curr_start(cfs_rq, se);
 	cfs_rq->curr = se;
-#ifdef CONFIG_SCHEDSTATS
+
 	/*
 	 * Track our maximum slice length, if the CPU's load is at
 	 * least twice that of our own weight (i.e. dont track it
 	 * when there are only lesser-weight tasks around):
 	 */
 	if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
-		se->statistics.slice_max = max(se->statistics.slice_max,
-			se->sum_exec_runtime - se->prev_sum_exec_runtime);
+		schedstat_set(se->statistics.slice_max,
+			max((u64)schedstat_val(se->statistics.slice_max),
+			    se->sum_exec_runtime - se->prev_sum_exec_runtime));
 	}
-#endif
+
 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
 
@@ -3620,13 +3645,10 @@
 	/* throttle cfs_rqs exceeding runtime */
 	check_cfs_rq_runtime(cfs_rq);
 
-	if (schedstat_enabled()) {
-		check_spread(cfs_rq, prev);
-		if (prev->on_rq)
-			update_stats_wait_start(cfs_rq, prev);
-	}
+	check_spread(cfs_rq, prev);
 
 	if (prev->on_rq) {
+		update_stats_wait_start(cfs_rq, prev);
 		/* Put 'current' back into the tree. */
 		__enqueue_entity(cfs_rq, prev);
 		/* in !on_rq case, update occurred at dequeue */
@@ -4269,7 +4291,7 @@
 	pcfs_rq = tg->parent->cfs_rq[cpu];
 
 	cfs_rq->throttle_count = pcfs_rq->throttle_count;
-	pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
+	cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
 }
 
 /* conditionally throttle active cfs_rq's from put_prev_entity() */
@@ -4456,9 +4478,9 @@
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-	WARN_ON(task_rq(p) != rq);
+	SCHED_WARN_ON(task_rq(p) != rq);
 
-	if (cfs_rq->nr_running > 1) {
+	if (rq->cfs.h_nr_running > 1) {
 		u64 slice = sched_slice(cfs_rq, se);
 		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
 		s64 delta = slice - ran;
@@ -4509,6 +4531,14 @@
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
 
+	/*
+	 * If in_iowait is set, the code below may not trigger any cpufreq
+	 * utilization updates, so do it here explicitly with the IOWAIT flag
+	 * passed.
+	 */
+	if (p->in_iowait)
+		cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+
 	for_each_sched_entity(se) {
 		if (se->on_rq)
 			break;
@@ -4605,6 +4635,11 @@
 }
 
 #ifdef CONFIG_SMP
+
+/* Working cpumask for: load_balance, load_balance_newidle. */
+DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
+DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
+
 #ifdef CONFIG_NO_HZ_COMMON
 /*
  * per rq 'load' arrray crap; XXX kill this.
@@ -5006,9 +5041,9 @@
 		 * wl = S * s'_i; see (2)
 		 */
 		if (W > 0 && w < W)
-			wl = (w * (long)tg->shares) / W;
+			wl = (w * (long)scale_load_down(tg->shares)) / W;
 		else
-			wl = tg->shares;
+			wl = scale_load_down(tg->shares);
 
 		/*
 		 * Per the above, wl is the new se->load.weight value; since
@@ -5091,18 +5126,18 @@
 	return 1;
 }
 
-static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+static int wake_affine(struct sched_domain *sd, struct task_struct *p,
+		       int prev_cpu, int sync)
 {
 	s64 this_load, load;
 	s64 this_eff_load, prev_eff_load;
-	int idx, this_cpu, prev_cpu;
+	int idx, this_cpu;
 	struct task_group *tg;
 	unsigned long weight;
 	int balanced;
 
 	idx	  = sd->wake_idx;
 	this_cpu  = smp_processor_id();
-	prev_cpu  = task_cpu(p);
 	load	  = source_load(prev_cpu, idx);
 	this_load = target_load(this_cpu, idx);
 
@@ -5146,13 +5181,13 @@
 
 	balanced = this_eff_load <= prev_eff_load;
 
-	schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
+	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
 
 	if (!balanced)
 		return 0;
 
-	schedstat_inc(sd, ttwu_move_affine);
-	schedstat_inc(p, se.statistics.nr_wakeups_affine);
+	schedstat_inc(sd->ttwu_move_affine);
+	schedstat_inc(p->se.statistics.nr_wakeups_affine);
 
 	return 1;
 }
@@ -5228,6 +5263,10 @@
 	int shallowest_idle_cpu = -1;
 	int i;
 
+	/* Check if we have any choice: */
+	if (group->group_weight == 1)
+		return cpumask_first(sched_group_cpus(group));
+
 	/* Traverse only the allowed CPUs */
 	for_each_cpu_and(i, sched_group_cpus(group), tsk_cpus_allowed(p)) {
 		if (idle_cpu(i)) {
@@ -5265,64 +5304,237 @@
 }
 
 /*
- * Try and locate an idle CPU in the sched_domain.
+ * Implement a for_each_cpu() variant that starts the scan at a given cpu
+ * (@start), and wraps around.
+ *
+ * This is used to scan for idle CPUs; such that not all CPUs looking for an
+ * idle CPU find the same CPU. The down-side is that tasks tend to cycle
+ * through the LLC domain.
+ *
+ * Especially tbench is found sensitive to this.
  */
-static int select_idle_sibling(struct task_struct *p, int target)
+
+static int cpumask_next_wrap(int n, const struct cpumask *mask, int start, int *wrapped)
+{
+	int next;
+
+again:
+	next = find_next_bit(cpumask_bits(mask), nr_cpumask_bits, n+1);
+
+	if (*wrapped) {
+		if (next >= start)
+			return nr_cpumask_bits;
+	} else {
+		if (next >= nr_cpumask_bits) {
+			*wrapped = 1;
+			n = -1;
+			goto again;
+		}
+	}
+
+	return next;
+}
+
+#define for_each_cpu_wrap(cpu, mask, start, wrap)				\
+	for ((wrap) = 0, (cpu) = (start)-1;					\
+		(cpu) = cpumask_next_wrap((cpu), (mask), (start), &(wrap)),	\
+		(cpu) < nr_cpumask_bits; )
+
+#ifdef CONFIG_SCHED_SMT
+
+static inline void set_idle_cores(int cpu, int val)
+{
+	struct sched_domain_shared *sds;
+
+	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+	if (sds)
+		WRITE_ONCE(sds->has_idle_cores, val);
+}
+
+static inline bool test_idle_cores(int cpu, bool def)
+{
+	struct sched_domain_shared *sds;
+
+	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+	if (sds)
+		return READ_ONCE(sds->has_idle_cores);
+
+	return def;
+}
+
+/*
+ * Scans the local SMT mask to see if the entire core is idle, and records this
+ * information in sd_llc_shared->has_idle_cores.
+ *
+ * Since SMT siblings share all cache levels, inspecting this limited remote
+ * state should be fairly cheap.
+ */
+void __update_idle_core(struct rq *rq)
+{
+	int core = cpu_of(rq);
+	int cpu;
+
+	rcu_read_lock();
+	if (test_idle_cores(core, true))
+		goto unlock;
+
+	for_each_cpu(cpu, cpu_smt_mask(core)) {
+		if (cpu == core)
+			continue;
+
+		if (!idle_cpu(cpu))
+			goto unlock;
+	}
+
+	set_idle_cores(core, 1);
+unlock:
+	rcu_read_unlock();
+}
+
+/*
+ * Scan the entire LLC domain for idle cores; this dynamically switches off if
+ * there are no idle cores left in the system; tracked through
+ * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
+ */
+static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+{
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
+	int core, cpu, wrap;
+
+	if (!static_branch_likely(&sched_smt_present))
+		return -1;
+
+	if (!test_idle_cores(target, false))
+		return -1;
+
+	cpumask_and(cpus, sched_domain_span(sd), tsk_cpus_allowed(p));
+
+	for_each_cpu_wrap(core, cpus, target, wrap) {
+		bool idle = true;
+
+		for_each_cpu(cpu, cpu_smt_mask(core)) {
+			cpumask_clear_cpu(cpu, cpus);
+			if (!idle_cpu(cpu))
+				idle = false;
+		}
+
+		if (idle)
+			return core;
+	}
+
+	/*
+	 * Failed to find an idle core; stop looking for one.
+	 */
+	set_idle_cores(target, 0);
+
+	return -1;
+}
+
+/*
+ * Scan the local SMT mask for idle CPUs.
+ */
+static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+{
+	int cpu;
+
+	if (!static_branch_likely(&sched_smt_present))
+		return -1;
+
+	for_each_cpu(cpu, cpu_smt_mask(target)) {
+		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+			continue;
+		if (idle_cpu(cpu))
+			return cpu;
+	}
+
+	return -1;
+}
+
+#else /* CONFIG_SCHED_SMT */
+
+static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
+{
+	return -1;
+}
+
+static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
+{
+	return -1;
+}
+
+#endif /* CONFIG_SCHED_SMT */
+
+/*
+ * Scan the LLC domain for idle CPUs; this is dynamically regulated by
+ * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
+ * average idle time for this rq (as found in rq->avg_idle).
+ */
+static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
+{
+	struct sched_domain *this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
+	u64 avg_idle = this_rq()->avg_idle;
+	u64 avg_cost = this_sd->avg_scan_cost;
+	u64 time, cost;
+	s64 delta;
+	int cpu, wrap;
+
+	/*
+	 * Due to large variance we need a large fuzz factor; hackbench in
+	 * particularly is sensitive here.
+	 */
+	if ((avg_idle / 512) < avg_cost)
+		return -1;
+
+	time = local_clock();
+
+	for_each_cpu_wrap(cpu, sched_domain_span(sd), target, wrap) {
+		if (!cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
+			continue;
+		if (idle_cpu(cpu))
+			break;
+	}
+
+	time = local_clock() - time;
+	cost = this_sd->avg_scan_cost;
+	delta = (s64)(time - cost) / 8;
+	this_sd->avg_scan_cost += delta;
+
+	return cpu;
+}
+
+/*
+ * Try and locate an idle core/thread in the LLC cache domain.
+ */
+static int select_idle_sibling(struct task_struct *p, int prev, int target)
 {
 	struct sched_domain *sd;
-	struct sched_group *sg;
-	int i = task_cpu(p);
+	int i;
 
 	if (idle_cpu(target))
 		return target;
 
 	/*
-	 * If the prevous cpu is cache affine and idle, don't be stupid.
+	 * If the previous cpu is cache affine and idle, don't be stupid.
 	 */
-	if (i != target && cpus_share_cache(i, target) && idle_cpu(i))
+	if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
+		return prev;
+
+	sd = rcu_dereference(per_cpu(sd_llc, target));
+	if (!sd)
+		return target;
+
+	i = select_idle_core(p, sd, target);
+	if ((unsigned)i < nr_cpumask_bits)
 		return i;
 
-	/*
-	 * Otherwise, iterate the domains and find an eligible idle cpu.
-	 *
-	 * A completely idle sched group at higher domains is more
-	 * desirable than an idle group at a lower level, because lower
-	 * domains have smaller groups and usually share hardware
-	 * resources which causes tasks to contend on them, e.g. x86
-	 * hyperthread siblings in the lowest domain (SMT) can contend
-	 * on the shared cpu pipeline.
-	 *
-	 * However, while we prefer idle groups at higher domains
-	 * finding an idle cpu at the lowest domain is still better than
-	 * returning 'target', which we've already established, isn't
-	 * idle.
-	 */
-	sd = rcu_dereference(per_cpu(sd_llc, target));
-	for_each_lower_domain(sd) {
-		sg = sd->groups;
-		do {
-			if (!cpumask_intersects(sched_group_cpus(sg),
-						tsk_cpus_allowed(p)))
-				goto next;
+	i = select_idle_cpu(p, sd, target);
+	if ((unsigned)i < nr_cpumask_bits)
+		return i;
 
-			/* Ensure the entire group is idle */
-			for_each_cpu(i, sched_group_cpus(sg)) {
-				if (i == target || !idle_cpu(i))
-					goto next;
-			}
+	i = select_idle_smt(p, sd, target);
+	if ((unsigned)i < nr_cpumask_bits)
+		return i;
 
-			/*
-			 * It doesn't matter which cpu we pick, the
-			 * whole group is idle.
-			 */
-			target = cpumask_first_and(sched_group_cpus(sg),
-					tsk_cpus_allowed(p));
-			goto done;
-next:
-			sg = sg->next;
-		} while (sg != sd->groups);
-	}
-done:
 	return target;
 }
 
@@ -5360,6 +5572,32 @@
 	return (util >= capacity) ? capacity : util;
 }
 
+static inline int task_util(struct task_struct *p)
+{
+	return p->se.avg.util_avg;
+}
+
+/*
+ * Disable WAKE_AFFINE in the case where task @p doesn't fit in the
+ * capacity of either the waking CPU @cpu or the previous CPU @prev_cpu.
+ *
+ * In that case WAKE_AFFINE doesn't make sense and we'll let
+ * BALANCE_WAKE sort things out.
+ */
+static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
+{
+	long min_cap, max_cap;
+
+	min_cap = min(capacity_orig_of(prev_cpu), capacity_orig_of(cpu));
+	max_cap = cpu_rq(cpu)->rd->max_cpu_capacity;
+
+	/* Minimum capacity is close to max, no need to abort wake_affine */
+	if (max_cap - min_cap < max_cap >> 3)
+		return 0;
+
+	return min_cap * 1024 < task_util(p) * capacity_margin;
+}
+
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -5383,7 +5621,8 @@
 
 	if (sd_flag & SD_BALANCE_WAKE) {
 		record_wakee(p);
-		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
+		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
+			      && cpumask_test_cpu(cpu, tsk_cpus_allowed(p));
 	}
 
 	rcu_read_lock();
@@ -5409,13 +5648,13 @@
 
 	if (affine_sd) {
 		sd = NULL; /* Prefer wake_affine over balance flags */
-		if (cpu != prev_cpu && wake_affine(affine_sd, p, sync))
+		if (cpu != prev_cpu && wake_affine(affine_sd, p, prev_cpu, sync))
 			new_cpu = cpu;
 	}
 
 	if (!sd) {
 		if (sd_flag & SD_BALANCE_WAKE) /* XXX always ? */
-			new_cpu = select_idle_sibling(p, new_cpu);
+			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
 	} else while (sd) {
 		struct sched_group *group;
@@ -5939,7 +6178,7 @@
  *
  * The adjacency matrix of the resulting graph is given by:
  *
- *             log_2 n     
+ *             log_2 n
  *   A_i,j = \Union     (i % 2^k == 0) && i / 2^(k+1) == j / 2^(k+1)  (6)
  *             k = 0
  *
@@ -5985,7 +6224,7 @@
  *
  * [XXX write more on how we solve this.. _after_ merging pjt's patches that
  *      rewrite all of this once again.]
- */ 
+ */
 
 static unsigned long __read_mostly max_load_balance_interval = HZ/10;
 
@@ -6133,7 +6372,7 @@
 	if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
 		int cpu;
 
-		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
+		schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
 
 		env->flags |= LBF_SOME_PINNED;
 
@@ -6164,7 +6403,7 @@
 	env->flags &= ~LBF_ALL_PINNED;
 
 	if (task_running(env->src_rq, p)) {
-		schedstat_inc(p, se.statistics.nr_failed_migrations_running);
+		schedstat_inc(p->se.statistics.nr_failed_migrations_running);
 		return 0;
 	}
 
@@ -6181,13 +6420,13 @@
 	if (tsk_cache_hot <= 0 ||
 	    env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
 		if (tsk_cache_hot == 1) {
-			schedstat_inc(env->sd, lb_hot_gained[env->idle]);
-			schedstat_inc(p, se.statistics.nr_forced_migrations);
+			schedstat_inc(env->sd->lb_hot_gained[env->idle]);
+			schedstat_inc(p->se.statistics.nr_forced_migrations);
 		}
 		return 1;
 	}
 
-	schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
+	schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
 	return 0;
 }
 
@@ -6227,7 +6466,7 @@
 		 * so we can safely collect stats here rather than
 		 * inside detach_tasks().
 		 */
-		schedstat_inc(env->sd, lb_gained[env->idle]);
+		schedstat_inc(env->sd->lb_gained[env->idle]);
 		return p;
 	}
 	return NULL;
@@ -6319,7 +6558,7 @@
 	 * so we can safely collect detach_one_task() stats here rather
 	 * than inside detach_one_task().
 	 */
-	schedstat_add(env->sd, lb_gained[env->idle], detached);
+	schedstat_add(env->sd->lb_gained[env->idle], detached);
 
 	return detached;
 }
@@ -6647,7 +6886,7 @@
 		/*
 		 * !SD_OVERLAP domains can assume that child groups
 		 * span the current group.
-		 */ 
+		 */
 
 		group = child->groups;
 		do {
@@ -7147,7 +7386,7 @@
 		load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
 		if (load_above_capacity > busiest->group_capacity) {
 			load_above_capacity -= busiest->group_capacity;
-			load_above_capacity *= NICE_0_LOAD;
+			load_above_capacity *= scale_load_down(NICE_0_LOAD);
 			load_above_capacity /= busiest->group_capacity;
 		} else
 			load_above_capacity = ~0UL;
@@ -7354,9 +7593,6 @@
  */
 #define MAX_PINNED_INTERVAL	512
 
-/* Working cpumask for load_balance and load_balance_newidle. */
-DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
-
 static int need_active_balance(struct lb_env *env)
 {
 	struct sched_domain *sd = env->sd;
@@ -7460,7 +7696,7 @@
 
 	cpumask_copy(cpus, cpu_active_mask);
 
-	schedstat_inc(sd, lb_count[idle]);
+	schedstat_inc(sd->lb_count[idle]);
 
 redo:
 	if (!should_we_balance(&env)) {
@@ -7470,19 +7706,19 @@
 
 	group = find_busiest_group(&env);
 	if (!group) {
-		schedstat_inc(sd, lb_nobusyg[idle]);
+		schedstat_inc(sd->lb_nobusyg[idle]);
 		goto out_balanced;
 	}
 
 	busiest = find_busiest_queue(&env, group);
 	if (!busiest) {
-		schedstat_inc(sd, lb_nobusyq[idle]);
+		schedstat_inc(sd->lb_nobusyq[idle]);
 		goto out_balanced;
 	}
 
 	BUG_ON(busiest == env.dst_rq);
 
-	schedstat_add(sd, lb_imbalance[idle], env.imbalance);
+	schedstat_add(sd->lb_imbalance[idle], env.imbalance);
 
 	env.src_cpu = busiest->cpu;
 	env.src_rq = busiest;
@@ -7589,7 +7825,7 @@
 	}
 
 	if (!ld_moved) {
-		schedstat_inc(sd, lb_failed[idle]);
+		schedstat_inc(sd->lb_failed[idle]);
 		/*
 		 * Increment the failure counter only on periodic balance.
 		 * We do not want newidle balance, which can be very
@@ -7672,7 +7908,7 @@
 	 * we can't migrate them. Let the imbalance flag set so parent level
 	 * can try to migrate them.
 	 */
-	schedstat_inc(sd, lb_balanced[idle]);
+	schedstat_inc(sd->lb_balanced[idle]);
 
 	sd->nr_balance_failed = 0;
 
@@ -7704,11 +7940,12 @@
 }
 
 static inline void
-update_next_balance(struct sched_domain *sd, int cpu_busy, unsigned long *next_balance)
+update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
 {
 	unsigned long interval, next;
 
-	interval = get_sd_balance_interval(sd, cpu_busy);
+	/* used by idle balance, so cpu_busy = 0 */
+	interval = get_sd_balance_interval(sd, 0);
 	next = sd->last_balance + interval;
 
 	if (time_after(*next_balance, next))
@@ -7738,7 +7975,7 @@
 		rcu_read_lock();
 		sd = rcu_dereference_check_sched_domain(this_rq->sd);
 		if (sd)
-			update_next_balance(sd, 0, &next_balance);
+			update_next_balance(sd, &next_balance);
 		rcu_read_unlock();
 
 		goto out;
@@ -7756,7 +7993,7 @@
 			continue;
 
 		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
-			update_next_balance(sd, 0, &next_balance);
+			update_next_balance(sd, &next_balance);
 			break;
 		}
 
@@ -7774,7 +8011,7 @@
 			curr_cost += domain_cost;
 		}
 
-		update_next_balance(sd, 0, &next_balance);
+		update_next_balance(sd, &next_balance);
 
 		/*
 		 * Stop searching for tasks to pull if there are
@@ -7864,15 +8101,15 @@
 			.idle		= CPU_IDLE,
 		};
 
-		schedstat_inc(sd, alb_count);
+		schedstat_inc(sd->alb_count);
 
 		p = detach_one_task(&env);
 		if (p) {
-			schedstat_inc(sd, alb_pushed);
+			schedstat_inc(sd->alb_pushed);
 			/* Active balancing done, reset the failure counter. */
 			sd->nr_balance_failed = 0;
 		} else {
-			schedstat_inc(sd, alb_failed);
+			schedstat_inc(sd->alb_failed);
 		}
 	}
 	rcu_read_unlock();
@@ -7964,13 +8201,13 @@
 	int cpu = smp_processor_id();
 
 	rcu_read_lock();
-	sd = rcu_dereference(per_cpu(sd_busy, cpu));
+	sd = rcu_dereference(per_cpu(sd_llc, cpu));
 
 	if (!sd || !sd->nohz_idle)
 		goto unlock;
 	sd->nohz_idle = 0;
 
-	atomic_inc(&sd->groups->sgc->nr_busy_cpus);
+	atomic_inc(&sd->shared->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7981,13 +8218,13 @@
 	int cpu = smp_processor_id();
 
 	rcu_read_lock();
-	sd = rcu_dereference(per_cpu(sd_busy, cpu));
+	sd = rcu_dereference(per_cpu(sd_llc, cpu));
 
 	if (!sd || sd->nohz_idle)
 		goto unlock;
 	sd->nohz_idle = 1;
 
-	atomic_dec(&sd->groups->sgc->nr_busy_cpus);
+	atomic_dec(&sd->shared->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -8214,8 +8451,8 @@
 static inline bool nohz_kick_needed(struct rq *rq)
 {
 	unsigned long now = jiffies;
+	struct sched_domain_shared *sds;
 	struct sched_domain *sd;
-	struct sched_group_capacity *sgc;
 	int nr_busy, cpu = rq->cpu;
 	bool kick = false;
 
@@ -8243,11 +8480,13 @@
 		return true;
 
 	rcu_read_lock();
-	sd = rcu_dereference(per_cpu(sd_busy, cpu));
-	if (sd) {
-		sgc = sd->groups->sgc;
-		nr_busy = atomic_read(&sgc->nr_busy_cpus);
-
+	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
+	if (sds) {
+		/*
+		 * XXX: write a coherent comment on why we do this.
+		 * See also: http://lkml.kernel.org/r/20111202010832.602203411@sbsiddha-desk.sc.intel.com
+		 */
+		nr_busy = atomic_read(&sds->nr_busy_cpus);
 		if (nr_busy > 1) {
 			kick = true;
 			goto unlock;
@@ -8441,7 +8680,6 @@
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	u64 now = cfs_rq_clock_task(cfs_rq);
-	int tg_update;
 
 	if (!vruntime_normalized(p)) {
 		/*
@@ -8453,10 +8691,9 @@
 	}
 
 	/* Catch up with the cfs_rq and remove our load when we leave */
-	tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+	update_cfs_rq_load_avg(now, cfs_rq, false);
 	detach_entity_load_avg(cfs_rq, se);
-	if (tg_update)
-		update_tg_load_avg(cfs_rq, false);
+	update_tg_load_avg(cfs_rq, false);
 }
 
 static void attach_task_cfs_rq(struct task_struct *p)
@@ -8464,7 +8701,6 @@
 	struct sched_entity *se = &p->se;
 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
 	u64 now = cfs_rq_clock_task(cfs_rq);
-	int tg_update;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	/*
@@ -8475,10 +8711,9 @@
 #endif
 
 	/* Synchronize task with its cfs_rq */
-	tg_update = update_cfs_rq_load_avg(now, cfs_rq, false);
+	update_cfs_rq_load_avg(now, cfs_rq, false);
 	attach_entity_load_avg(cfs_rq, se);
-	if (tg_update)
-		update_tg_load_avg(cfs_rq, false);
+	update_tg_load_avg(cfs_rq, false);
 
 	if (!vruntime_normalized(p))
 		se->vruntime += cfs_rq->min_vruntime;
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 2ce5458..5405d3f 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -27,8 +27,8 @@
 pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie)
 {
 	put_prev_task(rq, prev);
-
-	schedstat_inc(rq, sched_goidle);
+	update_idle_core(rq);
+	schedstat_inc(rq->sched_goidle);
 	return rq->idle;
 }
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d5690b7..2516b8d 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -957,9 +957,8 @@
 	if (unlikely((s64)delta_exec <= 0))
 		return;
 
-	/* Kick cpufreq (see the comment in linux/cpufreq.h). */
-	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_trigger_update(rq_clock(rq));
+	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c64fc51..055f935 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2,6 +2,7 @@
 #include <linux/sched.h>
 #include <linux/sched/sysctl.h>
 #include <linux/sched/rt.h>
+#include <linux/u64_stats_sync.h>
 #include <linux/sched/deadline.h>
 #include <linux/binfmts.h>
 #include <linux/mutex.h>
@@ -15,6 +16,12 @@
 #include "cpudeadline.h"
 #include "cpuacct.h"
 
+#ifdef CONFIG_SCHED_DEBUG
+#define SCHED_WARN_ON(x)	WARN_ONCE(x, #x)
+#else
+#define SCHED_WARN_ON(x)	((void)(x))
+#endif
+
 struct rq;
 struct cpuidle_state;
 
@@ -565,6 +572,8 @@
 	 */
 	cpumask_var_t rto_mask;
 	struct cpupri cpupri;
+
+	unsigned long max_cpu_capacity;
 };
 
 extern struct root_domain def_root_domain;
@@ -597,7 +606,6 @@
 #ifdef CONFIG_SMP
 	unsigned long last_load_update_tick;
 #endif /* CONFIG_SMP */
-	u64 nohz_stamp;
 	unsigned long nohz_flags;
 #endif /* CONFIG_NO_HZ_COMMON */
 #ifdef CONFIG_NO_HZ_FULL
@@ -723,6 +731,23 @@
 #endif
 }
 
+
+#ifdef CONFIG_SCHED_SMT
+
+extern struct static_key_false sched_smt_present;
+
+extern void __update_idle_core(struct rq *rq);
+
+static inline void update_idle_core(struct rq *rq)
+{
+	if (static_branch_unlikely(&sched_smt_present))
+		__update_idle_core(rq);
+}
+
+#else
+static inline void update_idle_core(struct rq *rq) { }
+#endif
+
 DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
 #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
@@ -857,8 +882,8 @@
 DECLARE_PER_CPU(struct sched_domain *, sd_llc);
 DECLARE_PER_CPU(int, sd_llc_size);
 DECLARE_PER_CPU(int, sd_llc_id);
+DECLARE_PER_CPU(struct sched_domain_shared *, sd_llc_shared);
 DECLARE_PER_CPU(struct sched_domain *, sd_numa);
-DECLARE_PER_CPU(struct sched_domain *, sd_busy);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
 
 struct sched_group_capacity {
@@ -870,10 +895,6 @@
 	unsigned int capacity;
 	unsigned long next_update;
 	int imbalance; /* XXX unrelated to capacity but shared group state */
-	/*
-	 * Number of busy cpus in this group.
-	 */
-	atomic_t nr_busy_cpus;
 
 	unsigned long cpumask[0]; /* iteration mask */
 };
@@ -1000,7 +1021,11 @@
 	 * per-task data have been completed by this moment.
 	 */
 	smp_wmb();
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+	p->cpu = cpu;
+#else
 	task_thread_info(p)->cpu = cpu;
+#endif
 	p->wake_cpu = cpu;
 #endif
 }
@@ -1260,6 +1285,11 @@
 	prev->sched_class->put_prev_task(rq, prev);
 }
 
+static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
+{
+	curr->sched_class->set_curr_task(rq);
+}
+
 #define sched_class_highest (&stop_sched_class)
 #define for_each_class(class) \
    for (class = sched_class_highest; class; class = class->next)
@@ -1290,7 +1320,7 @@
 
 static inline struct cpuidle_state *idle_get_state(struct rq *rq)
 {
-	WARN_ON(!rcu_read_lock_held());
+	SCHED_WARN_ON(!rcu_read_lock_held());
 	return rq->idle_state;
 }
 #else
@@ -1710,52 +1740,28 @@
 #endif
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
+struct irqtime {
+	u64			hardirq_time;
+	u64			softirq_time;
+	u64			irq_start_time;
+	struct u64_stats_sync	sync;
+};
 
-DECLARE_PER_CPU(u64, cpu_hardirq_time);
-DECLARE_PER_CPU(u64, cpu_softirq_time);
-
-#ifndef CONFIG_64BIT
-DECLARE_PER_CPU(seqcount_t, irq_time_seq);
-
-static inline void irq_time_write_begin(void)
-{
-	__this_cpu_inc(irq_time_seq.sequence);
-	smp_wmb();
-}
-
-static inline void irq_time_write_end(void)
-{
-	smp_wmb();
-	__this_cpu_inc(irq_time_seq.sequence);
-}
+DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
 
 static inline u64 irq_time_read(int cpu)
 {
-	u64 irq_time;
-	unsigned seq;
+	struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+	unsigned int seq;
+	u64 total;
 
 	do {
-		seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
-		irq_time = per_cpu(cpu_softirq_time, cpu) +
-			   per_cpu(cpu_hardirq_time, cpu);
-	} while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
+		seq = __u64_stats_fetch_begin(&irqtime->sync);
+		total = irqtime->softirq_time + irqtime->hardirq_time;
+	} while (__u64_stats_fetch_retry(&irqtime->sync, seq));
 
-	return irq_time;
+	return total;
 }
-#else /* CONFIG_64BIT */
-static inline void irq_time_write_begin(void)
-{
-}
-
-static inline void irq_time_write_end(void)
-{
-}
-
-static inline u64 irq_time_read(int cpu)
-{
-	return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
-}
-#endif /* CONFIG_64BIT */
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
 #ifdef CONFIG_CPU_FREQ
@@ -1763,27 +1769,13 @@
 
 /**
  * cpufreq_update_util - Take a note about CPU utilization changes.
- * @time: Current time.
- * @util: Current utilization.
- * @max: Utilization ceiling.
+ * @rq: Runqueue to carry out the update for.
+ * @flags: Update reason flags.
  *
- * This function is called by the scheduler on every invocation of
- * update_load_avg() on the CPU whose utilization is being updated.
+ * This function is called by the scheduler on the CPU whose utilization is
+ * being updated.
  *
  * It can only be called from RCU-sched read-side critical sections.
- */
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
-{
-       struct update_util_data *data;
-
-       data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-       if (data)
-               data->func(data, time, util, max);
-}
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
  *
  * The way cpufreq is currently arranged requires it to evaluate the CPU
  * performance state (frequency/voltage) on a regular basis to prevent it from
@@ -1797,13 +1789,23 @@
  * but that really is a band-aid.  Going forward it should be replaced with
  * solutions targeted more specifically at RT and DL tasks.
  */
-static inline void cpufreq_trigger_update(u64 time)
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
 {
-	cpufreq_update_util(time, ULONG_MAX, 0);
+	struct update_util_data *data;
+
+	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+	if (data)
+		data->func(data, rq_clock(rq), flags);
+}
+
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
+{
+	if (cpu_of(rq) == smp_processor_id())
+		cpufreq_update_util(rq, flags);
 }
 #else
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef arch_scale_freq_capacity
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 78955cb..34659a8 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -29,11 +29,12 @@
 	if (rq)
 		rq->rq_sched_info.run_delay += delta;
 }
-# define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
-# define schedstat_inc(rq, field)	do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
-# define schedstat_add(rq, field, amt)	do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
-# define schedstat_set(var, val)	do { if (schedstat_enabled()) { var = (val); } } while (0)
-# define schedstat_val(rq, field)	((schedstat_enabled()) ? (rq)->field : 0)
+#define schedstat_enabled()		static_branch_unlikely(&sched_schedstats)
+#define schedstat_inc(var)		do { if (schedstat_enabled()) { var++; } } while (0)
+#define schedstat_add(var, amt)		do { if (schedstat_enabled()) { var += (amt); } } while (0)
+#define schedstat_set(var, val)		do { if (schedstat_enabled()) { var = (val); } } while (0)
+#define schedstat_val(var)		(var)
+#define schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
 
 #else /* !CONFIG_SCHEDSTATS */
 static inline void
@@ -45,12 +46,13 @@
 static inline void
 rq_sched_info_depart(struct rq *rq, unsigned long long delta)
 {}
-# define schedstat_enabled()		0
-# define schedstat_inc(rq, field)	do { } while (0)
-# define schedstat_add(rq, field, amt)	do { } while (0)
-# define schedstat_set(var, val)	do { } while (0)
-# define schedstat_val(rq, field)	0
-#endif
+#define schedstat_enabled()		0
+#define schedstat_inc(var)		do { } while (0)
+#define schedstat_add(var, amt)		do { } while (0)
+#define schedstat_set(var, val)		do { } while (0)
+#define schedstat_val(var)		0
+#define schedstat_val_or_zero(var)	0
+#endif /* CONFIG_SCHEDSTATS */
 
 #ifdef CONFIG_SCHED_INFO
 static inline void sched_info_reset_dequeued(struct task_struct *t)
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index f15d6b6..4f70535 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -196,27 +196,48 @@
 }
 EXPORT_SYMBOL(prepare_to_wait_exclusive);
 
+void init_wait_entry(wait_queue_t *wait, int flags)
+{
+	wait->flags = flags;
+	wait->private = current;
+	wait->func = autoremove_wake_function;
+	INIT_LIST_HEAD(&wait->task_list);
+}
+EXPORT_SYMBOL(init_wait_entry);
+
 long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state)
 {
 	unsigned long flags;
-
-	if (signal_pending_state(state, current))
-		return -ERESTARTSYS;
-
-	wait->private = current;
-	wait->func = autoremove_wake_function;
+	long ret = 0;
 
 	spin_lock_irqsave(&q->lock, flags);
-	if (list_empty(&wait->task_list)) {
-		if (wait->flags & WQ_FLAG_EXCLUSIVE)
-			__add_wait_queue_tail(q, wait);
-		else
-			__add_wait_queue(q, wait);
+	if (unlikely(signal_pending_state(state, current))) {
+		/*
+		 * Exclusive waiter must not fail if it was selected by wakeup,
+		 * it should "consume" the condition we were waiting for.
+		 *
+		 * The caller will recheck the condition and return success if
+		 * we were already woken up, we can not miss the event because
+		 * wakeup locks/unlocks the same q->lock.
+		 *
+		 * But we need to ensure that set-condition + wakeup after that
+		 * can't see us, it should wake up another exclusive waiter if
+		 * we fail.
+		 */
+		list_del_init(&wait->task_list);
+		ret = -ERESTARTSYS;
+	} else {
+		if (list_empty(&wait->task_list)) {
+			if (wait->flags & WQ_FLAG_EXCLUSIVE)
+				__add_wait_queue_tail(q, wait);
+			else
+				__add_wait_queue(q, wait);
+		}
+		set_current_state(state);
 	}
-	set_current_state(state);
 	spin_unlock_irqrestore(&q->lock, flags);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL(prepare_to_wait_event);
 
@@ -255,39 +276,6 @@
 }
 EXPORT_SYMBOL(finish_wait);
 
-/**
- * abort_exclusive_wait - abort exclusive waiting in a queue
- * @q: waitqueue waited on
- * @wait: wait descriptor
- * @mode: runstate of the waiter to be woken
- * @key: key to identify a wait bit queue or %NULL
- *
- * Sets current thread back to running state and removes
- * the wait descriptor from the given waitqueue if still
- * queued.
- *
- * Wakes up the next waiter if the caller is concurrently
- * woken up through the queue.
- *
- * This prevents waiter starvation where an exclusive waiter
- * aborts and is woken up concurrently and no one wakes up
- * the next waiter.
- */
-void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
-			unsigned int mode, void *key)
-{
-	unsigned long flags;
-
-	__set_current_state(TASK_RUNNING);
-	spin_lock_irqsave(&q->lock, flags);
-	if (!list_empty(&wait->task_list))
-		list_del_init(&wait->task_list);
-	else if (waitqueue_active(q))
-		__wake_up_locked_key(q, mode, key);
-	spin_unlock_irqrestore(&q->lock, flags);
-}
-EXPORT_SYMBOL(abort_exclusive_wait);
-
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
 	int ret = default_wake_function(wait, mode, sync, key);
@@ -425,20 +413,29 @@
 __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
 			wait_bit_action_f *action, unsigned mode)
 {
-	do {
-		int ret;
+	int ret = 0;
 
+	for (;;) {
 		prepare_to_wait_exclusive(wq, &q->wait, mode);
-		if (!test_bit(q->key.bit_nr, q->key.flags))
-			continue;
-		ret = action(&q->key, mode);
-		if (!ret)
-			continue;
-		abort_exclusive_wait(wq, &q->wait, mode, &q->key);
-		return ret;
-	} while (test_and_set_bit(q->key.bit_nr, q->key.flags));
-	finish_wait(wq, &q->wait);
-	return 0;
+		if (test_bit(q->key.bit_nr, q->key.flags)) {
+			ret = action(&q->key, mode);
+			/*
+			 * See the comment in prepare_to_wait_event().
+			 * finish_wait() does not necessarily takes wq->lock,
+			 * but test_and_set_bit() implies mb() which pairs with
+			 * smp_mb__after_atomic() before wake_up_page().
+			 */
+			if (ret)
+				finish_wait(wq, &q->wait);
+		}
+		if (!test_and_set_bit(q->key.bit_nr, q->key.flags)) {
+			if (!ret)
+				finish_wait(wq, &q->wait);
+			return 0;
+		} else if (ret) {
+			return ret;
+		}
+	}
 }
 EXPORT_SYMBOL(__wait_on_bit_lock);
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ef6c6c3..0db7c8a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -605,12 +605,16 @@
 		ptrace_event(PTRACE_EVENT_SECCOMP, data);
 		/*
 		 * The delivery of a fatal signal during event
-		 * notification may silently skip tracer notification.
-		 * Terminating the task now avoids executing a system
-		 * call that may not be intended.
+		 * notification may silently skip tracer notification,
+		 * which could leave us with a potentially unmodified
+		 * syscall that the tracer would have liked to have
+		 * changed. Since the process is about to die, we just
+		 * force the syscall to be skipped and let the signal
+		 * kill the process and correctly handle any tracer exit
+		 * notifications.
 		 */
 		if (fatal_signal_pending(current))
-			do_exit(SIGSYS);
+			goto skip;
 		/* Check if the tracer forced the syscall to be skipped. */
 		this_syscall = syscall_get_nr(current, task_pt_regs(current));
 		if (this_syscall < 0)
diff --git a/kernel/signal.c b/kernel/signal.c
index af21afc..75761ac 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3044,6 +3044,11 @@
 }
 EXPORT_SYMBOL(kernel_sigaction);
 
+void __weak sigaction_compat_abi(struct k_sigaction *act,
+		struct k_sigaction *oact)
+{
+}
+
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct task_struct *p = current, *t;
@@ -3059,6 +3064,8 @@
 	if (oact)
 		*oact = *k;
 
+	sigaction_compat_abi(act, oact);
+
 	if (act) {
 		sigdelsetmask(&act->sa.sa_mask,
 			      sigmask(SIGKILL) | sigmask(SIGSTOP));
diff --git a/kernel/smp.c b/kernel/smp.c
index 3aa642d..bba3b20 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -14,6 +14,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/sched.h>
+#include <linux/hypervisor.h>
 
 #include "smpboot.h"
 
@@ -724,3 +725,54 @@
 	preempt_enable();
 }
 EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
+
+/**
+ * smp_call_on_cpu - Call a function on a specific cpu
+ *
+ * Used to call a function on a specific cpu and wait for it to return.
+ * Optionally make sure the call is done on a specified physical cpu via vcpu
+ * pinning in order to support virtualized environments.
+ */
+struct smp_call_on_cpu_struct {
+	struct work_struct	work;
+	struct completion	done;
+	int			(*func)(void *);
+	void			*data;
+	int			ret;
+	int			cpu;
+};
+
+static void smp_call_on_cpu_callback(struct work_struct *work)
+{
+	struct smp_call_on_cpu_struct *sscs;
+
+	sscs = container_of(work, struct smp_call_on_cpu_struct, work);
+	if (sscs->cpu >= 0)
+		hypervisor_pin_vcpu(sscs->cpu);
+	sscs->ret = sscs->func(sscs->data);
+	if (sscs->cpu >= 0)
+		hypervisor_pin_vcpu(-1);
+
+	complete(&sscs->done);
+}
+
+int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
+{
+	struct smp_call_on_cpu_struct sscs = {
+		.done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
+		.func = func,
+		.data = par,
+		.cpu  = phys ? cpu : -1,
+	};
+
+	INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
+
+	if (cpu >= nr_cpu_ids || !cpu_online(cpu))
+		return -ENXIO;
+
+	queue_work_on(cpu, system_wq, &sscs.work);
+	wait_for_completion(&sscs.done);
+
+	return sscs.ret;
+}
+EXPORT_SYMBOL_GPL(smp_call_on_cpu);
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 13bc43d..fc0d8270 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -122,12 +122,12 @@
 
 		if (kthread_should_park()) {
 			__set_current_state(TASK_RUNNING);
-			preempt_enable();
 			if (ht->park && td->status == HP_THREAD_ACTIVE) {
 				BUG_ON(td->cpu != smp_processor_id());
 				ht->park(td->cpu);
 				td->status = HP_THREAD_PARKED;
 			}
+			preempt_enable();
 			kthread_parkme();
 			/* We might have been woken for stop */
 			continue;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 17caf4b..6676264 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -78,6 +78,17 @@
 }
 
 /*
+ * If ksoftirqd is scheduled, we do not want to process pending softirqs
+ * right now. Let ksoftirqd handle this at its own rate, to get fairness.
+ */
+static bool ksoftirqd_running(void)
+{
+	struct task_struct *tsk = __this_cpu_read(ksoftirqd);
+
+	return tsk && (tsk->state == TASK_RUNNING);
+}
+
+/*
  * preempt_count and SOFTIRQ_OFFSET usage:
  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
  *   softirq processing.
@@ -313,7 +324,7 @@
 
 	pending = local_softirq_pending();
 
-	if (pending)
+	if (pending && !ksoftirqd_running())
 		do_softirq_own_stack();
 
 	local_irq_restore(flags);
@@ -340,6 +351,9 @@
 
 static inline void invoke_softirq(void)
 {
+	if (ksoftirqd_running())
+		return;
+
 	if (!force_irqthreads) {
 #ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
 		/*
@@ -700,7 +714,7 @@
 	BUG();
 }
 
-static void takeover_tasklets(unsigned int cpu)
+static int takeover_tasklets(unsigned int cpu)
 {
 	/* CPU is dead, so no lock needed. */
 	local_irq_disable();
@@ -723,27 +737,12 @@
 	raise_softirq_irqoff(HI_SOFTIRQ);
 
 	local_irq_enable();
+	return 0;
 }
+#else
+#define takeover_tasklets	NULL
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static int cpu_callback(struct notifier_block *nfb, unsigned long action,
-			void *hcpu)
-{
-	switch (action) {
-#ifdef CONFIG_HOTPLUG_CPU
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		takeover_tasklets((unsigned long)hcpu);
-		break;
-#endif /* CONFIG_HOTPLUG_CPU */
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block cpu_nfb = {
-	.notifier_call = cpu_callback
-};
-
 static struct smp_hotplug_thread softirq_threads = {
 	.store			= &ksoftirqd,
 	.thread_should_run	= ksoftirqd_should_run,
@@ -753,8 +752,8 @@
 
 static __init int spawn_ksoftirqd(void)
 {
-	register_cpu_notifier(&cpu_nfb);
-
+	cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
+				  takeover_tasklets);
 	BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
 
 	return 0;
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 4a1ca5f..ec9ab2f 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -20,7 +20,6 @@
 #include <linux/kallsyms.h>
 #include <linux/smpboot.h>
 #include <linux/atomic.h>
-#include <linux/lglock.h>
 #include <linux/nmi.h>
 
 /*
@@ -47,13 +46,9 @@
 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
 static bool stop_machine_initialized = false;
 
-/*
- * Avoids a race between stop_two_cpus and global stop_cpus, where
- * the stoppers could get queued up in reverse order, leading to
- * system deadlock. Using an lglock means stop_two_cpus remains
- * relatively cheap.
- */
-DEFINE_STATIC_LGLOCK(stop_cpus_lock);
+/* static data for stop_cpus */
+static DEFINE_MUTEX(stop_cpus_mutex);
+static bool stop_cpus_in_progress;
 
 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
 {
@@ -126,6 +121,11 @@
 	cpu_stop_init_done(&done, 1);
 	if (!cpu_stop_queue_work(cpu, &work))
 		return -ENOENT;
+	/*
+	 * In case @cpu == smp_proccessor_id() we can avoid a sleep+wakeup
+	 * cycle by doing a preemption:
+	 */
+	cond_resched();
 	wait_for_completion(&done.completion);
 	return done.ret;
 }
@@ -230,14 +230,26 @@
 	struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
 	struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
 	int err;
-
-	lg_double_lock(&stop_cpus_lock, cpu1, cpu2);
+retry:
 	spin_lock_irq(&stopper1->lock);
 	spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING);
 
 	err = -ENOENT;
 	if (!stopper1->enabled || !stopper2->enabled)
 		goto unlock;
+	/*
+	 * Ensure that if we race with __stop_cpus() the stoppers won't get
+	 * queued up in reverse order leading to system deadlock.
+	 *
+	 * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has
+	 * queued a work on cpu1 but not on cpu2, we hold both locks.
+	 *
+	 * It can be falsely true but it is safe to spin until it is cleared,
+	 * queue_stop_cpus_work() does everything under preempt_disable().
+	 */
+	err = -EDEADLK;
+	if (unlikely(stop_cpus_in_progress))
+			goto unlock;
 
 	err = 0;
 	__cpu_stop_queue_work(stopper1, work1);
@@ -245,8 +257,12 @@
 unlock:
 	spin_unlock(&stopper2->lock);
 	spin_unlock_irq(&stopper1->lock);
-	lg_double_unlock(&stop_cpus_lock, cpu1, cpu2);
 
+	if (unlikely(err == -EDEADLK)) {
+		while (stop_cpus_in_progress)
+			cpu_relax();
+		goto retry;
+	}
 	return err;
 }
 /**
@@ -316,9 +332,6 @@
 	return cpu_stop_queue_work(cpu, work_buf);
 }
 
-/* static data for stop_cpus */
-static DEFINE_MUTEX(stop_cpus_mutex);
-
 static bool queue_stop_cpus_work(const struct cpumask *cpumask,
 				 cpu_stop_fn_t fn, void *arg,
 				 struct cpu_stop_done *done)
@@ -332,7 +345,8 @@
 	 * preempted by a stopper which might wait for other stoppers
 	 * to enter @fn which can lead to deadlock.
 	 */
-	lg_global_lock(&stop_cpus_lock);
+	preempt_disable();
+	stop_cpus_in_progress = true;
 	for_each_cpu(cpu, cpumask) {
 		work = &per_cpu(cpu_stopper.stop_work, cpu);
 		work->fn = fn;
@@ -341,7 +355,8 @@
 		if (cpu_stop_queue_work(cpu, work))
 			queued = true;
 	}
-	lg_global_unlock(&stop_cpus_lock);
+	stop_cpus_in_progress = false;
+	preempt_enable();
 
 	return queued;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b43d0b2..a13bbda 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2140,6 +2140,21 @@
 	return 0;
 }
 
+static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
+				 int *valp,
+				 int write, void *data)
+{
+	if (write) {
+		if (*negp)
+			return -EINVAL;
+		*valp = *lvalp;
+	} else {
+		unsigned int val = *valp;
+		*lvalp = (unsigned long)val;
+	}
+	return 0;
+}
+
 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 
 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
@@ -2259,8 +2274,27 @@
 int proc_dointvec(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,buffer,lenp,ppos,
-		    	    NULL,NULL);
+	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
+}
+
+/**
+ * proc_douintvec - read a vector of unsigned integers
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return do_proc_dointvec(table, write, buffer, lenp, ppos,
+				do_proc_douintvec_conv, NULL);
 }
 
 /*
@@ -2858,6 +2892,12 @@
 	return -ENOSYS;
 }
 
+int proc_douintvec(struct ctl_table *table, int write,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
 int proc_dointvec_minmax(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -2903,6 +2943,7 @@
  * exception granted :-)
  */
 EXPORT_SYMBOL(proc_dointvec);
+EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_jiffies);
 EXPORT_SYMBOL(proc_dointvec_minmax);
 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 6a5a310..7e4fad7 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -600,9 +600,18 @@
 		 */
 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
 			/* Override clocksource cannot be used. */
-			pr_warn("Override clocksource %s is not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
-				cs->name);
-			override_name[0] = 0;
+			if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
+				pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
+					cs->name);
+				override_name[0] = 0;
+			} else {
+				/*
+				 * The override cannot be currently verified.
+				 * Deferring to let the watchdog check.
+				 */
+				pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
+					cs->name);
+			}
 		} else
 			/* Override clocksource can be used. */
 			best = cs;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 9ba7c82..bb5ec42 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -307,7 +307,7 @@
  */
 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
 {
-	ktime_t res = ktime_add(lhs, rhs);
+	ktime_t res = ktime_add_unsafe(lhs, rhs);
 
 	/*
 	 * We use KTIME_SEC_MAX here, the maximum timeout which we can
@@ -703,7 +703,7 @@
 static DECLARE_WORK(hrtimer_work, clock_was_set_work);
 
 /*
- * Called from timekeeping and resume code to reprogramm the hrtimer
+ * Called from timekeeping and resume code to reprogram the hrtimer
  * interrupt device on all cpus.
  */
 void clock_was_set_delayed(void)
@@ -1241,7 +1241,7 @@
 
 	/*
 	 * Note: We clear the running state after enqueue_hrtimer and
-	 * we do not reprogramm the event hardware. Happens either in
+	 * we do not reprogram the event hardware. Happens either in
 	 * hrtimer_start_range_ns() or in hrtimer_interrupt()
 	 *
 	 * Note: Because we dropped the cpu_base->lock above,
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 204fdc8..3bcb61b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -186,10 +186,13 @@
 	return false;
 }
 
-static bool can_stop_full_tick(struct tick_sched *ts)
+static bool can_stop_full_tick(int cpu, struct tick_sched *ts)
 {
 	WARN_ON_ONCE(!irqs_disabled());
 
+	if (unlikely(!cpu_online(cpu)))
+		return false;
+
 	if (check_tick_dependency(&tick_dep_mask))
 		return false;
 
@@ -843,7 +846,7 @@
 	if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
 		return;
 
-	if (can_stop_full_tick(ts))
+	if (can_stop_full_tick(cpu, ts))
 		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
 	else if (ts->tick_stopped)
 		tick_nohz_restart_sched_tick(ts, ktime_get());
@@ -908,10 +911,11 @@
 	ktime_t now, expires;
 	int cpu = smp_processor_id();
 
+	now = tick_nohz_start_idle(ts);
+
 	if (can_stop_idle_tick(cpu, ts)) {
 		int was_stopped = ts->tick_stopped;
 
-		now = tick_nohz_start_idle(ts);
 		ts->idle_calls++;
 
 		expires = tick_nohz_stop_sched_tick(ts, now, cpu);
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 667b9335..bd62fb8 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -780,7 +780,7 @@
 {
 	struct timespec64 res;
 
-	set_normalized_timespec64(&res, lhs.tv_sec + rhs.tv_sec,
+	set_normalized_timespec64(&res, (timeu64_t) lhs.tv_sec + rhs.tv_sec,
 			lhs.tv_nsec + rhs.tv_nsec);
 
 	if (unlikely(res.tv_sec < lhs.tv_sec || res.tv_sec < rhs.tv_sec)) {
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3b65746..e07fb09 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -401,7 +401,10 @@
 	do {
 		seq = raw_read_seqcount_latch(&tkf->seq);
 		tkr = tkf->base + (seq & 0x01);
-		now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+		now = ktime_to_ns(tkr->base);
+
+		now += clocksource_delta(tkr->read(tkr->clock),
+					 tkr->cycle_last, tkr->mask);
 	} while (read_seqcount_retry(&tkf->seq, seq));
 
 	return now;
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index f6bd652..ca9fb80 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -23,7 +23,9 @@
 
 #include "timekeeping_internal.h"
 
-static unsigned int sleep_time_bin[32] = {0};
+#define NUM_BINS 32
+
+static unsigned int sleep_time_bin[NUM_BINS] = {0};
 
 static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
 {
@@ -69,6 +71,11 @@
 
 void tk_debug_account_sleep_time(struct timespec64 *t)
 {
-	sleep_time_bin[fls(t->tv_sec)]++;
+	/* Cap bin index so we don't overflow the array */
+	int bin = min(fls(t->tv_sec), NUM_BINS-1);
+
+	sleep_time_bin[bin]++;
+	pr_info("Suspended for %lld.%03lu seconds\n", (s64)t->tv_sec,
+			t->tv_nsec / NSEC_PER_MSEC);
 }
 
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 555670a..32bf6f7 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1496,6 +1496,7 @@
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 	u64 expires = KTIME_MAX;
 	unsigned long nextevt;
+	bool is_max_delta;
 
 	/*
 	 * Pretend that there is no timer pending if the cpu is offline.
@@ -1506,6 +1507,7 @@
 
 	spin_lock(&base->lock);
 	nextevt = __next_timer_interrupt(base);
+	is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
 	base->next_expiry = nextevt;
 	/*
 	 * We have a fresh next event. Check whether we can forward the base:
@@ -1519,7 +1521,8 @@
 		expires = basem;
 		base->is_idle = false;
 	} else {
-		expires = basem + (nextevt - basej) * TICK_NSEC;
+		if (!is_max_delta)
+			expires = basem + (nextevt - basej) * TICK_NSEC;
 		/*
 		 * If we expect to sleep more than a tick, mark the base idle:
 		 */
diff --git a/kernel/torture.c b/kernel/torture.c
index 75961b3..0d887eb 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -43,6 +43,7 @@
 #include <linux/stat.h>
 #include <linux/slab.h>
 #include <linux/trace_clock.h>
+#include <linux/ktime.h>
 #include <asm/byteorder.h>
 #include <linux/torture.h>
 
@@ -446,9 +447,8 @@
  * Variables for auto-shutdown.  This allows "lights out" torture runs
  * to be fully scripted.
  */
-static int shutdown_secs;		/* desired test duration in seconds. */
 static struct task_struct *shutdown_task;
-static unsigned long shutdown_time;	/* jiffies to system shutdown. */
+static ktime_t shutdown_time;		/* time to system shutdown. */
 static void (*torture_shutdown_hook)(void);
 
 /*
@@ -471,20 +471,20 @@
  */
 static int torture_shutdown(void *arg)
 {
-	long delta;
-	unsigned long jiffies_snap;
+	ktime_t ktime_snap;
 
 	VERBOSE_TOROUT_STRING("torture_shutdown task started");
-	jiffies_snap = jiffies;
-	while (ULONG_CMP_LT(jiffies_snap, shutdown_time) &&
+	ktime_snap = ktime_get();
+	while (ktime_before(ktime_snap, shutdown_time) &&
 	       !torture_must_stop()) {
-		delta = shutdown_time - jiffies_snap;
 		if (verbose)
 			pr_alert("%s" TORTURE_FLAG
-				 "torture_shutdown task: %lu jiffies remaining\n",
-				 torture_type, delta);
-		schedule_timeout_interruptible(delta);
-		jiffies_snap = jiffies;
+				 "torture_shutdown task: %llu ms remaining\n",
+				 torture_type,
+				 ktime_ms_delta(shutdown_time, ktime_snap));
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_hrtimeout(&shutdown_time, HRTIMER_MODE_ABS);
+		ktime_snap = ktime_get();
 	}
 	if (torture_must_stop()) {
 		torture_kthread_stopping("torture_shutdown");
@@ -511,10 +511,9 @@
 {
 	int ret = 0;
 
-	shutdown_secs = ssecs;
 	torture_shutdown_hook = cleanup;
-	if (shutdown_secs > 0) {
-		shutdown_time = jiffies + shutdown_secs * HZ;
+	if (ssecs > 0) {
+		shutdown_time = ktime_add(ktime_get(), ktime_set(ssecs, 0));
 		ret = torture_create_kthread(torture_shutdown, NULL,
 					     shutdown_task);
 	}
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f4b86e8..ba33267 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -24,11 +24,6 @@
 	help
 	  See Documentation/trace/ftrace-design.txt
 
-config HAVE_FUNCTION_GRAPH_FP_TEST
-	bool
-	help
-	  See Documentation/trace/ftrace-design.txt
-
 config HAVE_DYNAMIC_FTRACE
 	bool
 	help
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7598e6c..dbafc5d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -223,7 +223,7 @@
 	what |= MASK_TC_BIT(op_flags, META);
 	what |= MASK_TC_BIT(op_flags, PREFLUSH);
 	what |= MASK_TC_BIT(op_flags, FUA);
-	if (op == REQ_OP_DISCARD)
+	if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
 		what |= BLK_TC_ACT(BLK_TC_DISCARD);
 	if (op == REQ_OP_FLUSH)
 		what |= BLK_TC_ACT(BLK_TC_FLUSH);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438f..5dcb992 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1,4 +1,5 @@
 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -8,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bpf.h>
+#include <linux/bpf_perf_event.h>
 #include <linux/filter.h>
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
@@ -59,11 +61,9 @@
 }
 EXPORT_SYMBOL_GPL(trace_call_bpf);
 
-static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
-	void *dst = (void *) (long) r1;
-	int ret, size = (int) r2;
-	void *unsafe_ptr = (void *) (long) r3;
+	int ret;
 
 	ret = probe_kernel_read(dst, unsafe_ptr, size);
 	if (unlikely(ret < 0))
@@ -81,12 +81,9 @@
 	.arg3_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
+	   u32, size)
 {
-	void *unsafe_ptr = (void *) (long) r1;
-	void *src = (void *) (long) r2;
-	int size = (int) r3;
-
 	/*
 	 * Ensure we're in user context which is safe for the helper to
 	 * run. This helper has no business in a kthread.
@@ -128,9 +125,9 @@
  * limited trace_printk()
  * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
  */
-static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+	   u64, arg2, u64, arg3)
 {
-	char *fmt = (char *) (long) r1;
 	bool str_seen = false;
 	int mod[3] = {};
 	int fmt_cnt = 0;
@@ -176,16 +173,16 @@
 
 				switch (fmt_cnt) {
 				case 1:
-					unsafe_addr = r3;
-					r3 = (long) buf;
+					unsafe_addr = arg1;
+					arg1 = (long) buf;
 					break;
 				case 2:
-					unsafe_addr = r4;
-					r4 = (long) buf;
+					unsafe_addr = arg2;
+					arg2 = (long) buf;
 					break;
 				case 3:
-					unsafe_addr = r5;
-					r5 = (long) buf;
+					unsafe_addr = arg3;
+					arg3 = (long) buf;
 					break;
 				}
 				buf[0] = 0;
@@ -207,9 +204,9 @@
 	}
 
 	return __trace_printk(1/* fake ip will not be printed */, fmt,
-			      mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
-			      mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
-			      mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
+			      mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1,
+			      mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2,
+			      mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3);
 }
 
 static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -231,9 +228,8 @@
 	return &bpf_trace_printk_proto;
 }
 
-static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
 {
-	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	unsigned int cpu = smp_processor_id();
 	u64 index = flags & BPF_F_INDEX_MASK;
@@ -310,11 +306,9 @@
 	return 0;
 }
 
-static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
+	   u64, flags, void *, data, u64, size)
 {
-	struct pt_regs *regs = (struct pt_regs *)(long) r1;
-	struct bpf_map *map  = (struct bpf_map *)(long) r2;
-	void *data = (void *)(long) r4;
 	struct perf_raw_record raw = {
 		.frag = {
 			.size = size,
@@ -365,7 +359,7 @@
 	return __bpf_perf_event_output(regs, map, flags, &raw);
 }
 
-static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_0(bpf_get_current_task)
 {
 	return (long) current;
 }
@@ -376,6 +370,31 @@
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct cgroup *cgrp;
+
+	if (unlikely(in_interrupt()))
+		return -EINVAL;
+	if (unlikely(idx >= array->map.max_entries))
+		return -E2BIG;
+
+	cgrp = READ_ONCE(array->ptrs[idx]);
+	if (unlikely(!cgrp))
+		return -EAGAIN;
+
+	return task_under_cgroup_hierarchy(current, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
+	.func           = bpf_current_task_under_cgroup,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -407,6 +426,10 @@
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_probe_write_user:
 		return bpf_get_probe_write_proto();
+	case BPF_FUNC_current_task_under_cgroup:
+		return &bpf_current_task_under_cgroup_proto;
+	case BPF_FUNC_get_prandom_u32:
+		return &bpf_get_prandom_u32_proto;
 	default:
 		return NULL;
 	}
@@ -447,16 +470,17 @@
 	.type	= BPF_PROG_TYPE_KPROBE,
 };
 
-static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
+BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
+	   u64, flags, void *, data, u64, size)
 {
+	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
+
 	/*
 	 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
 	 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
-	 * from there and call the same bpf_perf_event_output() helper
+	 * from there and call the same bpf_perf_event_output() helper inline.
 	 */
-	u64 ctx = *(long *)(uintptr_t)r1;
-
-	return bpf_perf_event_output(ctx, r2, index, r4, size);
+	return ____bpf_perf_event_output(regs, map, flags, data, size);
 }
 
 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
@@ -470,11 +494,18 @@
 	.arg5_type	= ARG_CONST_STACK_SIZE,
 };
 
-static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
+	   u64, flags)
 {
-	u64 ctx = *(long *)(uintptr_t)r1;
+	struct pt_regs *regs = *(struct pt_regs **)tp_buff;
 
-	return bpf_get_stackid(ctx, r2, r3, r4, r5);
+	/*
+	 * Same comment as in bpf_perf_event_output_tp(), only that this time
+	 * the other helper's function body cannot be inlined due to being
+	 * external, thus we need to call raw helper function.
+	 */
+	return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
+			       flags, 0, 0);
 }
 
 static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
@@ -520,10 +551,69 @@
 	.type	= BPF_PROG_TYPE_TRACEPOINT,
 };
 
+static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
+				    enum bpf_reg_type *reg_type)
+{
+	if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
+		return false;
+	if (type != BPF_READ)
+		return false;
+	if (off % size != 0)
+		return false;
+	if (off == offsetof(struct bpf_perf_event_data, sample_period)) {
+		if (size != sizeof(u64))
+			return false;
+	} else {
+		if (size != sizeof(long))
+			return false;
+	}
+	return true;
+}
+
+static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+				      int src_reg, int ctx_off,
+				      struct bpf_insn *insn_buf,
+				      struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (ctx_off) {
+	case offsetof(struct bpf_perf_event_data, sample_period):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64));
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+						       data), dst_reg, src_reg,
+				      offsetof(struct bpf_perf_event_data_kern, data));
+		*insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg,
+				      offsetof(struct perf_sample_data, period));
+		break;
+	default:
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
+						       regs), dst_reg, src_reg,
+				      offsetof(struct bpf_perf_event_data_kern, regs));
+		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off);
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
+static const struct bpf_verifier_ops perf_event_prog_ops = {
+	.get_func_proto		= tp_prog_func_proto,
+	.is_valid_access	= pe_prog_is_valid_access,
+	.convert_ctx_access	= pe_prog_convert_ctx_access,
+};
+
+static struct bpf_prog_type_list perf_event_tl = {
+	.ops	= &perf_event_prog_ops,
+	.type	= BPF_PROG_TYPE_PERF_EVENT,
+};
+
 static int __init register_kprobe_prog_ops(void)
 {
 	bpf_register_prog_type(&kprobe_tl);
 	bpf_register_prog_type(&tracepoint_tl);
+	bpf_register_prog_type(&perf_event_tl);
 	return 0;
 }
 late_initcall(register_kprobe_prog_ops);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dade4c9..37824d9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4123,6 +4123,30 @@
 	"\t\t\t  traces\n"
 #endif
 #endif /* CONFIG_STACK_TRACER */
+#ifdef CONFIG_KPROBE_EVENT
+	"  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
+	"\t\t\t  Write into this file to define/undefine new trace events.\n"
+#endif
+#ifdef CONFIG_UPROBE_EVENT
+	"  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
+	"\t\t\t  Write into this file to define/undefine new trace events.\n"
+#endif
+#if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
+	"\t  accepts: event-definitions (one definition per line)\n"
+	"\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
+	"\t           -:[<group>/]<event>\n"
+#ifdef CONFIG_KPROBE_EVENT
+	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
+#endif
+#ifdef CONFIG_UPROBE_EVENT
+	"\t    place: <path>:<offset>\n"
+#endif
+	"\t     args: <name>=fetcharg[:type]\n"
+	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
+	"\t           $stack<index>, $stack, $retval, $comm\n"
+	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
+	"\t           b<bit-width>@<bit-offset>/<container-size>\n"
+#endif
 	"  events/\t\t- Directory containing all trace event subsystems:\n"
 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
@@ -5124,19 +5148,20 @@
 	struct trace_iterator *iter = filp->private_data;
 	ssize_t sret;
 
-	/* return any leftover data */
-	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
-	if (sret != -EBUSY)
-		return sret;
-
-	trace_seq_init(&iter->seq);
-
 	/*
 	 * Avoid more than one consumer on a single file descriptor
 	 * This is just a matter of traces coherency, the ring buffer itself
 	 * is protected.
 	 */
 	mutex_lock(&iter->mutex);
+
+	/* return any leftover data */
+	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+	if (sret != -EBUSY)
+		goto out;
+
+	trace_seq_init(&iter->seq);
+
 	if (iter->trace->read) {
 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
 		if (sret)
@@ -6163,9 +6188,6 @@
 		return -EBUSY;
 #endif
 
-	if (splice_grow_spd(pipe, &spd))
-		return -ENOMEM;
-
 	if (*ppos & (PAGE_SIZE - 1))
 		return -EINVAL;
 
@@ -6175,6 +6197,9 @@
 		len &= PAGE_MASK;
 	}
 
+	if (splice_grow_spd(pipe, &spd))
+		return -ENOMEM;
+
  again:
 	trace_access_lock(iter->cpu_file);
 	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
@@ -6232,19 +6257,21 @@
 	/* did we read anything? */
 	if (!spd.nr_pages) {
 		if (ret)
-			return ret;
+			goto out;
 
+		ret = -EAGAIN;
 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
-			return -EAGAIN;
+			goto out;
 
 		ret = wait_on_pipe(iter, true);
 		if (ret)
-			return ret;
+			goto out;
 
 		goto again;
 	}
 
 	ret = splice_to_pipe(pipe, &spd);
+out:
 	splice_shrink_spd(&spd);
 
 	return ret;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 7363ccf..0cbe38a 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -119,7 +119,7 @@
 /* Add a function return address to the trace stack on thread info.*/
 int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
-			 unsigned long frame_pointer)
+			 unsigned long frame_pointer, unsigned long *retp)
 {
 	unsigned long long calltime;
 	int index;
@@ -171,7 +171,12 @@
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
 	current->ret_stack[index].subtime = 0;
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	current->ret_stack[index].fp = frame_pointer;
+#endif
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+	current->ret_stack[index].retp = retp;
+#endif
 	*depth = current->curr_ret_stack;
 
 	return 0;
@@ -204,7 +209,7 @@
 		return;
 	}
 
-#if defined(CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST) && !defined(CC_USING_FENTRY)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	/*
 	 * The arch may choose to record the frame pointer used
 	 * and check it here to make sure that it is what we expect it
@@ -279,6 +284,64 @@
 	return ret;
 }
 
+/**
+ * ftrace_graph_ret_addr - convert a potentially modified stack return address
+ *			   to its original value
+ *
+ * This function can be called by stack unwinding code to convert a found stack
+ * return address ('ret') to its original value, in case the function graph
+ * tracer has modified it to be 'return_to_handler'.  If the address hasn't
+ * been modified, the unchanged value of 'ret' is returned.
+ *
+ * 'idx' is a state variable which should be initialized by the caller to zero
+ * before the first call.
+ *
+ * 'retp' is a pointer to the return address on the stack.  It's ignored if
+ * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined.
+ */
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+				    unsigned long ret, unsigned long *retp)
+{
+	int index = task->curr_ret_stack;
+	int i;
+
+	if (ret != (unsigned long)return_to_handler)
+		return ret;
+
+	if (index < -1)
+		index += FTRACE_NOTRACE_DEPTH;
+
+	if (index < 0)
+		return ret;
+
+	for (i = 0; i <= index; i++)
+		if (task->ret_stack[i].retp == retp)
+			return task->ret_stack[i].ret;
+
+	return ret;
+}
+#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
+				    unsigned long ret, unsigned long *retp)
+{
+	int task_idx;
+
+	if (ret != (unsigned long)return_to_handler)
+		return ret;
+
+	task_idx = task->curr_ret_stack;
+
+	if (!task->ret_stack || task_idx < *idx)
+		return ret;
+
+	task_idx -= *idx;
+	(*idx)++;
+
+	return task->ret_stack[task_idx].ret;
+}
+#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
+
 int __trace_graph_entry(struct trace_array *tr,
 				struct ftrace_graph_ent *trace,
 				unsigned long flags,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 9aedb0b..eb6c9f1 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -253,6 +253,10 @@
 	ASSIGN_FETCH_TYPE(s16, u16, 1),
 	ASSIGN_FETCH_TYPE(s32, u32, 1),
 	ASSIGN_FETCH_TYPE(s64, u64, 1),
+	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
+	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
 
 	ASSIGN_FETCH_TYPE_END
 };
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 74e80a5..8c0553d 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -36,24 +36,28 @@
 };
 
 /* Printing  in basic type function template */
-#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt)				\
-int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name,	\
+#define DEFINE_BASIC_PRINT_TYPE_FUNC(tname, type, fmt)			\
+int PRINT_TYPE_FUNC_NAME(tname)(struct trace_seq *s, const char *name,	\
 				void *data, void *ent)			\
 {									\
 	trace_seq_printf(s, " %s=" fmt, name, *(type *)data);		\
 	return !trace_seq_has_overflowed(s);				\
 }									\
-const char PRINT_TYPE_FMT_NAME(type)[] = fmt;				\
-NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
+const char PRINT_TYPE_FMT_NAME(tname)[] = fmt;				\
+NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(tname));
 
-DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u32, "0x%x")
-DEFINE_BASIC_PRINT_TYPE_FUNC(u64, "0x%Lx")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s8,  "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s16, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
-DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u8,  u8,  "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u16, u16, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u32, u32, "%u")
+DEFINE_BASIC_PRINT_TYPE_FUNC(u64, u64, "%Lu")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s8,  s8,  "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s16, s16, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s32, s32, "%d")
+DEFINE_BASIC_PRINT_TYPE_FUNC(s64, s64, "%Ld")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x8,  u8,  "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x16, u16, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x32, u32, "0x%x")
+DEFINE_BASIC_PRINT_TYPE_FUNC(x64, u64, "0x%Lx")
 
 /* Print type function for string type */
 int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 45400ca..0c0ae54 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -149,6 +149,11 @@
 DECLARE_BASIC_PRINT_TYPE_FUNC(s16);
 DECLARE_BASIC_PRINT_TYPE_FUNC(s32);
 DECLARE_BASIC_PRINT_TYPE_FUNC(s64);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x8);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x16);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x32);
+DECLARE_BASIC_PRINT_TYPE_FUNC(x64);
+
 DECLARE_BASIC_PRINT_TYPE_FUNC(string);
 
 #define FETCH_FUNC_NAME(method, type)	fetch_##method##_##type
@@ -203,7 +208,7 @@
 DEFINE_FETCH_##method(u64)
 
 /* Default (unsigned long) fetch type */
-#define __DEFAULT_FETCH_TYPE(t) u##t
+#define __DEFAULT_FETCH_TYPE(t) x##t
 #define _DEFAULT_FETCH_TYPE(t) __DEFAULT_FETCH_TYPE(t)
 #define DEFAULT_FETCH_TYPE _DEFAULT_FETCH_TYPE(BITS_PER_LONG)
 #define DEFAULT_FETCH_TYPE_STR __stringify(DEFAULT_FETCH_TYPE)
@@ -234,6 +239,10 @@
 #define ASSIGN_FETCH_TYPE(ptype, ftype, sign)			\
 	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #ptype)
 
+/* If ptype is an alias of atype, use this macro (show atype in format) */
+#define ASSIGN_FETCH_TYPE_ALIAS(ptype, atype, ftype, sign)		\
+	__ASSIGN_FETCH_TYPE(#ptype, ptype, ftype, sizeof(ftype), sign, #atype)
+
 #define ASSIGN_FETCH_TYPE_END {}
 
 #define FETCH_TYPE_STRING	0
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index c534854..7a68732 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -211,6 +211,10 @@
 	ASSIGN_FETCH_TYPE(s16, u16, 1),
 	ASSIGN_FETCH_TYPE(s32, u32, 1),
 	ASSIGN_FETCH_TYPE(s64, u64, 1),
+	ASSIGN_FETCH_TYPE_ALIAS(x8,  u8,  u8,  0),
+	ASSIGN_FETCH_TYPE_ALIAS(x16, u16, u16, 0),
+	ASSIGN_FETCH_TYPE_ALIAS(x32, u32, u32, 0),
+	ASSIGN_FETCH_TYPE_ALIAS(x64, u64, u64, 0),
 
 	ASSIGN_FETCH_TYPE_END
 };
diff --git a/kernel/up.c b/kernel/up.c
index 1760bf3..ee81ac9 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -6,6 +6,7 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/smp.h>
+#include <linux/hypervisor.h>
 
 int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 				int wait)
@@ -82,3 +83,20 @@
 	preempt_enable();
 }
 EXPORT_SYMBOL(on_each_cpu_cond);
+
+int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
+{
+	int ret;
+
+	if (cpu != 0)
+		return -ENXIO;
+
+	if (phys)
+		hypervisor_pin_vcpu(0);
+	ret = func(par);
+	if (phys)
+		hypervisor_pin_vcpu(-1);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(smp_call_on_cpu);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2307d7c..cab7405 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -821,7 +821,7 @@
 	help
 	  Say Y here to enable the kernel to detect "hung tasks",
 	  which are bugs that cause the task to be stuck in
-	  uninterruptible "D" state indefinitiley.
+	  uninterruptible "D" state indefinitely.
 
 	  When a hung task is detected, the kernel will print the
 	  current stack trace (which you should report), but the
@@ -1686,24 +1686,6 @@
 	  Enable this option if you want to use the LatencyTOP tool
 	  to find out which userspace is blocking on what kernel operations.
 
-config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
-	bool
-
-config DEBUG_STRICT_USER_COPY_CHECKS
-	bool "Strict user copy size checks"
-	depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
-	depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
-	help
-	  Enabling this option turns a certain set of sanity checks for user
-	  copy operations into compile time failures.
-
-	  The copy_from_user() etc checks are there to help test if there
-	  are sufficient security checks on the length argument of
-	  the copy operation, by having gcc prove that the argument is
-	  within bounds.
-
-	  If unsure, say N.
-
 source kernel/trace/Kconfig
 
 menu "Runtime Testing"
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 39494af..bc6e651 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,6 +1,9 @@
 config ARCH_HAS_UBSAN_SANITIZE_ALL
 	bool
 
+config ARCH_WANTS_UBSAN_NO_NULL
+	def_bool n
+
 config UBSAN
 	bool "Undefined behaviour sanity checker"
 	help
@@ -34,3 +37,11 @@
 	  This option enables detection of unaligned memory accesses.
 	  Enabling this option on architectures that support unaligned
 	  accesses may produce a lot of false positives.
+
+config UBSAN_NULL
+	bool "Enable checking of null pointers"
+	depends on UBSAN
+	default y if !ARCH_WANTS_UBSAN_NO_NULL
+	help
+	  This option enables detection of memory accesses via a
+	  null pointer.
diff --git a/lib/Makefile b/lib/Makefile
index cfa68eb..df747e5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,9 +22,8 @@
 	 sha1.o chacha20.o md5.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-	 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o
+	 earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o
 
-obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_HAS_DMA) += dma-noop.o
diff --git a/lib/cpu-notifier-error-inject.c b/lib/cpu-notifier-error-inject.c
index 707ca24..0e2c9a1 100644
--- a/lib/cpu-notifier-error-inject.c
+++ b/lib/cpu-notifier-error-inject.c
@@ -8,16 +8,47 @@
 module_param(priority, int, 0);
 MODULE_PARM_DESC(priority, "specify cpu notifier priority");
 
+#define UP_PREPARE 0
+#define UP_PREPARE_FROZEN 0
+#define DOWN_PREPARE 0
+#define DOWN_PREPARE_FROZEN 0
+
 static struct notifier_err_inject cpu_notifier_err_inject = {
 	.actions = {
-		{ NOTIFIER_ERR_INJECT_ACTION(CPU_UP_PREPARE) },
-		{ NOTIFIER_ERR_INJECT_ACTION(CPU_UP_PREPARE_FROZEN) },
-		{ NOTIFIER_ERR_INJECT_ACTION(CPU_DOWN_PREPARE) },
-		{ NOTIFIER_ERR_INJECT_ACTION(CPU_DOWN_PREPARE_FROZEN) },
+		{ NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE) },
+		{ NOTIFIER_ERR_INJECT_ACTION(UP_PREPARE_FROZEN) },
+		{ NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE) },
+		{ NOTIFIER_ERR_INJECT_ACTION(DOWN_PREPARE_FROZEN) },
 		{}
 	}
 };
 
+static int notf_err_handle(struct notifier_err_inject_action *action)
+{
+	int ret;
+
+	ret = action->error;
+	if (ret)
+		pr_info("Injecting error (%d) to %s\n", ret, action->name);
+	return ret;
+}
+
+static int notf_err_inj_up_prepare(unsigned int cpu)
+{
+	if (!cpuhp_tasks_frozen)
+		return notf_err_handle(&cpu_notifier_err_inject.actions[0]);
+	else
+		return notf_err_handle(&cpu_notifier_err_inject.actions[1]);
+}
+
+static int notf_err_inj_dead(unsigned int cpu)
+{
+	if (!cpuhp_tasks_frozen)
+		return notf_err_handle(&cpu_notifier_err_inject.actions[2]);
+	else
+		return notf_err_handle(&cpu_notifier_err_inject.actions[3]);
+}
+
 static struct dentry *dir;
 
 static int err_inject_init(void)
@@ -29,7 +60,10 @@
 	if (IS_ERR(dir))
 		return PTR_ERR(dir);
 
-	err = register_hotcpu_notifier(&cpu_notifier_err_inject.nb);
+	err = cpuhp_setup_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE,
+					"cpu-err-notif:prepare",
+					notf_err_inj_up_prepare,
+					notf_err_inj_dead);
 	if (err)
 		debugfs_remove_recursive(dir);
 
@@ -38,7 +72,7 @@
 
 static void err_inject_exit(void)
 {
-	unregister_hotcpu_notifier(&cpu_notifier_err_inject.nb);
+	cpuhp_remove_state_nocalls(CPUHP_NOTF_ERR_INJ_PREPARE);
 	debugfs_remove_recursive(dir);
 }
 
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index fcfa193..06f02f6 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -22,6 +22,7 @@
 #include <linux/stacktrace.h>
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
+#include <linux/vmalloc.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/export.h>
@@ -1164,11 +1165,32 @@
 	put_hash_bucket(bucket, &flags);
 }
 
-static void check_for_stack(struct device *dev, void *addr)
+static void check_for_stack(struct device *dev,
+			    struct page *page, size_t offset)
 {
-	if (object_is_on_stack(addr))
-		err_printk(dev, NULL, "DMA-API: device driver maps memory from "
-				"stack [addr=%p]\n", addr);
+	void *addr;
+	struct vm_struct *stack_vm_area = task_stack_vm_area(current);
+
+	if (!stack_vm_area) {
+		/* Stack is direct-mapped. */
+		if (PageHighMem(page))
+			return;
+		addr = page_address(page) + offset;
+		if (object_is_on_stack(addr))
+			err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
+	} else {
+		/* Stack is vmalloced. */
+		int i;
+
+		for (i = 0; i < stack_vm_area->nr_pages; i++) {
+			if (page != stack_vm_area->pages[i])
+				continue;
+
+			addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
+			err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
+			break;
+		}
+	}
 }
 
 static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
@@ -1291,10 +1313,11 @@
 	if (map_single)
 		entry->type = dma_debug_single;
 
+	check_for_stack(dev, page, offset);
+
 	if (!PageHighMem(page)) {
 		void *addr = page_address(page) + offset;
 
-		check_for_stack(dev, addr);
 		check_for_illegal_area(dev, addr, size);
 	}
 
@@ -1386,8 +1409,9 @@
 		entry->sg_call_ents   = nents;
 		entry->sg_mapped_ents = mapped_ents;
 
+		check_for_stack(dev, sg_page(s), s->offset);
+
 		if (!PageHighMem(sg_page(s))) {
-			check_for_stack(dev, sg_virt(s));
 			check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
 		}
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 9e8c738..7e3138c 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -291,33 +291,13 @@
 }
 
 /*
- * Fault in the first iovec of the given iov_iter, to a maximum length
- * of bytes. Returns 0 on success, or non-zero if the memory could not be
- * accessed (ie. because it is an invalid address).
- *
- * writev-intensive code may want this to prefault several iovecs -- that
- * would be possible (callers must not rely on the fact that _only_ the
- * first iovec will be faulted with the current implementation).
- */
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
-{
-	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
-		char __user *buf = i->iov->iov_base + i->iov_offset;
-		bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-		return fault_in_pages_readable(buf, bytes);
-	}
-	return 0;
-}
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
-
-/*
  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
  * bytes.  For each iovec, fault in each page that constitutes the iovec.
  *
  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
  * because it is an invalid address).
  */
-int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes)
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
 	size_t skip = i->iov_offset;
 	const struct iovec *iov;
@@ -334,7 +314,7 @@
 	}
 	return 0;
 }
-EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable);
+EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
 void iov_iter_init(struct iov_iter *i, int direction,
 			const struct iovec *iov, unsigned long nr_segs,
diff --git a/lib/irq_poll.c b/lib/irq_poll.c
index 836f7db..2be5569 100644
--- a/lib/irq_poll.c
+++ b/lib/irq_poll.c
@@ -184,30 +184,21 @@
 }
 EXPORT_SYMBOL(irq_poll_init);
 
-static int irq_poll_cpu_notify(struct notifier_block *self,
-				 unsigned long action, void *hcpu)
+static int irq_poll_cpu_dead(unsigned int cpu)
 {
 	/*
 	 * If a CPU goes away, splice its entries to the current CPU
 	 * and trigger a run of the softirq
 	 */
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		int cpu = (unsigned long) hcpu;
+	local_irq_disable();
+	list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+			 this_cpu_ptr(&blk_cpu_iopoll));
+	__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
+	local_irq_enable();
 
-		local_irq_disable();
-		list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
-				 this_cpu_ptr(&blk_cpu_iopoll));
-		__raise_softirq_irqoff(IRQ_POLL_SOFTIRQ);
-		local_irq_enable();
-	}
-
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block irq_poll_cpu_notifier = {
-	.notifier_call	= irq_poll_cpu_notify,
-};
-
 static __init int irq_poll_setup(void)
 {
 	int i;
@@ -216,7 +207,8 @@
 		INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
 
 	open_softirq(IRQ_POLL_SOFTIRQ, irq_poll_softirq);
-	register_hotcpu_notifier(&irq_poll_cpu_notifier);
+	cpuhp_setup_state_nocalls(CPUHP_IRQ_POLL_DEAD, "irq_poll:dead", NULL,
+				  irq_poll_cpu_dead);
 	return 0;
 }
 subsys_initcall(irq_poll_setup);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1b7bf73..8e6d552 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -105,10 +105,10 @@
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 	if (radix_tree_is_internal_node(entry)) {
-		unsigned long siboff = get_slot_offset(parent, entry);
-		if (siboff < RADIX_TREE_MAP_SIZE) {
-			offset = siboff;
-			entry = rcu_dereference_raw(parent->slots[offset]);
+		if (is_sibling_entry(parent, entry)) {
+			void **sibentry = (void **) entry_to_node(entry);
+			offset = get_slot_offset(parent, sibentry);
+			entry = rcu_dereference_raw(*sibentry);
 		}
 	}
 #endif
@@ -1583,15 +1583,10 @@
 }
 EXPORT_SYMBOL(radix_tree_delete);
 
-struct radix_tree_node *radix_tree_replace_clear_tags(
-			struct radix_tree_root *root,
-			unsigned long index, void *entry)
+void radix_tree_clear_tags(struct radix_tree_root *root,
+			   struct radix_tree_node *node,
+			   void **slot)
 {
-	struct radix_tree_node *node;
-	void **slot;
-
-	__radix_tree_lookup(root, index, &node, &slot);
-
 	if (node) {
 		unsigned int tag, offset = get_slot_offset(node, slot);
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
@@ -1600,9 +1595,6 @@
 		/* Clear root node tags */
 		root->gfp_mask &= __GFP_BITS_MASK;
 	}
-
-	radix_tree_replace_slot(slot, entry);
-	return node;
 }
 
 /**
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 0a7e494..f01b1cb 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -3,3 +3,4 @@
 int*.c
 tables.c
 neon?.c
+s390vx?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3b10a48..29f503e 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -7,6 +7,7 @@
 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
+raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 
 hostprogs-y	+= mktables
 
@@ -116,6 +117,11 @@
 $(obj)/tilegx8.c:   $(src)/tilegx.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+targets += s390vx8.c
+$(obj)/s390vx8.c:   UNROLL := 8
+$(obj)/s390vx8.c:   $(src)/s390vx.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 quiet_cmd_mktable = TABLE   $@
       cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
 
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 975c6e0..592ff49 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -69,6 +69,9 @@
 #if defined(CONFIG_TILEGX)
 	&raid6_tilegx8,
 #endif
+#if defined(CONFIG_S390)
+	&raid6_s390vx8,
+#endif
 	&raid6_intx1,
 	&raid6_intx2,
 	&raid6_intx4,
@@ -95,6 +98,9 @@
 #ifdef CONFIG_AS_SSSE3
 	&raid6_recov_ssse3,
 #endif
+#ifdef CONFIG_S390
+	&raid6_recov_s390xc,
+#endif
 	&raid6_recov_intx1,
 	NULL
 };
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c
new file mode 100644
index 0000000..b042dac
--- /dev/null
+++ b/lib/raid6/recov_s390xc.c
@@ -0,0 +1,116 @@
+/*
+ * RAID-6 data recovery in dual failure mode based on the XC instruction.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/raid/pq.h>
+
+static inline void xor_block(u8 *p1, u8 *p2)
+{
+	typedef struct { u8 _[256]; } addrtype;
+
+	asm volatile(
+		"	xc	0(256,%[p1]),0(%[p2])\n"
+		: "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2),
+		  [p1] "a" (p1), [p2] "a" (p2) : "cc");
+}
+
+/* Recover two failed data blocks. */
+static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
+		int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+	int i;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+	/* Now do it... */
+	while (bytes) {
+		xor_block(dp, p);
+		xor_block(dq, q);
+		for (i = 0; i < 256; i++)
+			dq[i] = pbmul[dp[i]] ^ qmul[dq[i]];
+		xor_block(dp, dq);
+		p += 256;
+		q += 256;
+		dp += 256;
+		dq += 256;
+		bytes -= 256;
+	}
+}
+
+/* Recover failure of one data block plus the P block */
+static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
+		void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+	int i;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	/* Now do it... */
+	while (bytes) {
+		xor_block(dq, q);
+		for (i = 0; i < 256; i++)
+			dq[i] = qmul[dq[i]];
+		xor_block(p, dq);
+		p += 256;
+		q += 256;
+		dq += 256;
+		bytes -= 256;
+	}
+}
+
+
+const struct raid6_recov_calls raid6_recov_s390xc = {
+	.data2 = raid6_2data_recov_s390xc,
+	.datap = raid6_datap_recov_s390xc,
+	.valid = NULL,
+	.name = "s390xc",
+	.priority = 1,
+};
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
new file mode 100644
index 0000000..7b45191
--- /dev/null
+++ b/lib/raid6/s390vx.uc
@@ -0,0 +1,168 @@
+/*
+ * raid6_vx$#.c
+ *
+ * $#-way unrolled RAID6 gen/xor functions for s390
+ * based on the vector facility
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This file is postprocessed using unroll.awk.
+ */
+
+#include <linux/raid/pq.h>
+#include <asm/fpu/api.h>
+
+asm(".include \"asm/vx-insn.h\"\n");
+
+#define NSIZE 16
+
+static inline void LOAD_CONST(void)
+{
+	asm volatile("VREPIB %v24,7");
+	asm volatile("VREPIB %v25,0x1d");
+}
+
+/*
+ * The SHLBYTE() operation shifts each of the 16 bytes in
+ * vector register y left by 1 bit and stores the result in
+ * vector register x.
+ */
+static inline void SHLBYTE(int x, int y)
+{
+	asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
+}
+
+/*
+ * For each of the 16 bytes in the vector register y the MASK()
+ * operation returns 0xFF if the high bit of the byte is 1,
+ * or 0x00 if the high bit is 0. The result is stored in vector
+ * register x.
+ */
+static inline void MASK(int x, int y)
+{
+	asm volatile ("VESRAVB	%0,%1,24" : : "i" (x), "i" (y));
+}
+
+static inline void AND(int x, int y, int z)
+{
+	asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+
+static inline void XOR(int x, int y, int z)
+{
+	asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+
+static inline void LOAD_DATA(int x, int n, u8 *ptr)
+{
+	typedef struct { u8 _[16*n]; } addrtype;
+	register addrtype *__ptr asm("1") = (addrtype *) ptr;
+
+	asm volatile ("VLM %2,%3,0,%r1"
+		      : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+
+static inline void STORE_DATA(int x, int n, u8 *ptr)
+{
+	typedef struct { u8 _[16*n]; } addrtype;
+	register addrtype *__ptr asm("1") = (addrtype *) ptr;
+
+	asm volatile ("VSTM %2,%3,0,1"
+		      : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+
+static inline void COPY_VEC(int x, int y)
+{
+	asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
+}
+
+static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	struct kernel_fpu vxstate;
+	u8 **dptr, *p, *q;
+	int d, z, z0;
+
+	kernel_fpu_begin(&vxstate, KERNEL_VXR);
+	LOAD_CONST();
+
+	dptr = (u8 **) ptrs;
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0 + 1];	/* XOR parity */
+	q = dptr[z0 + 2];	/* RS syndrome */
+
+	for (d = 0; d < bytes; d += $#*NSIZE) {
+		LOAD_DATA(0,$#,&dptr[z0][d]);
+		COPY_VEC(8+$$,0+$$);
+		for (z = z0 - 1; z >= 0; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+			LOAD_DATA(16,$#,&dptr[z][d]);
+			XOR(0+$$,0+$$,16+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		STORE_DATA(0,$#,&p[d]);
+		STORE_DATA(8,$#,&q[d]);
+	}
+	kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+
+static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
+					size_t bytes, void **ptrs)
+{
+	struct kernel_fpu vxstate;
+	u8 **dptr, *p, *q;
+	int d, z, z0;
+
+	dptr = (u8 **) ptrs;
+	z0 = stop;		/* P/Q right side optimization */
+	p = dptr[disks - 2];	/* XOR parity */
+	q = dptr[disks - 1];	/* RS syndrome */
+
+	kernel_fpu_begin(&vxstate, KERNEL_VXR);
+	LOAD_CONST();
+
+	for (d = 0; d < bytes; d += $#*NSIZE) {
+		/* P/Q data pages */
+		LOAD_DATA(0,$#,&dptr[z0][d]);
+		COPY_VEC(8+$$,0+$$);
+		for (z = z0 - 1; z >= start; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+			LOAD_DATA(16,$#,&dptr[z][d]);
+			XOR(0+$$,0+$$,16+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		/* P/Q left side optimization */
+		for (z = start - 1; z >= 0; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		LOAD_DATA(16,$#,&p[d]);
+		XOR(16+$$,16+$$,0+$$);
+		STORE_DATA(16,$#,&p[d]);
+		LOAD_DATA(16,$#,&q[d]);
+		XOR(16+$$,16+$$,8+$$);
+		STORE_DATA(16,$#,&q[d]);
+	}
+	kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+
+static int raid6_s390vx$#_valid(void)
+{
+	return MACHINE_HAS_VX;
+}
+
+const struct raid6_calls raid6_s390vx$# = {
+	raid6_s390vx$#_gen_syndrome,
+	raid6_s390vx$#_xor_syndrome,
+	raid6_s390vx$#_valid,
+	"vx128x$#",
+	1
+};
diff --git a/lib/random32.c b/lib/random32.c
index 69ed593..915982b 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -81,7 +81,7 @@
 	u32 res;
 
 	res = prandom_u32_state(state);
-	put_cpu_var(state);
+	put_cpu_var(net_rand_state);
 
 	return res;
 }
@@ -128,7 +128,7 @@
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
 
 	prandom_bytes_state(state, buf, bytes);
-	put_cpu_var(state);
+	put_cpu_var(net_rand_state);
 }
 EXPORT_SYMBOL(prandom_bytes);
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 5d845ff..32d0ad0 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -30,7 +30,7 @@
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4U
-#define BUCKET_LOCKS_PER_CPU   128UL
+#define BUCKET_LOCKS_PER_CPU	32UL
 
 static u32 head_hashfn(struct rhashtable *ht,
 		       const struct bucket_table *tbl,
@@ -70,21 +70,25 @@
 	unsigned int nr_pcpus = num_possible_cpus();
 #endif
 
-	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+	nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
 	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
 
 	/* Never allocate more than 0.5 locks per bucket */
 	size = min_t(unsigned int, size, tbl->size >> 1);
 
 	if (sizeof(spinlock_t) != 0) {
+		tbl->locks = NULL;
 #ifdef CONFIG_NUMA
 		if (size * sizeof(spinlock_t) > PAGE_SIZE &&
 		    gfp == GFP_KERNEL)
 			tbl->locks = vmalloc(size * sizeof(spinlock_t));
-		else
 #endif
-		tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
-					   gfp);
+		if (gfp != GFP_KERNEL)
+			gfp |= __GFP_NOWARN | __GFP_NORETRY;
+
+		if (!tbl->locks)
+			tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+						   gfp);
 		if (!tbl->locks)
 			return -ENOMEM;
 		for (i = 0; i < size; i++)
@@ -321,12 +325,14 @@
 static int rhashtable_shrink(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
-	unsigned int size;
+	unsigned int nelems = atomic_read(&ht->nelems);
+	unsigned int size = 0;
 	int err;
 
 	ASSERT_RHT_MUTEX(ht);
 
-	size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2);
+	if (nelems)
+		size = roundup_pow_of_two(nelems * 3 / 2);
 	if (size < ht->p.min_size)
 		size = ht->p.min_size;
 
@@ -372,22 +378,8 @@
 		schedule_work(&ht->run_work);
 }
 
-static bool rhashtable_check_elasticity(struct rhashtable *ht,
-					struct bucket_table *tbl,
-					unsigned int hash)
-{
-	unsigned int elasticity = ht->elasticity;
-	struct rhash_head *head;
-
-	rht_for_each(head, tbl, hash)
-		if (!--elasticity)
-			return true;
-
-	return false;
-}
-
-int rhashtable_insert_rehash(struct rhashtable *ht,
-			     struct bucket_table *tbl)
+static int rhashtable_insert_rehash(struct rhashtable *ht,
+				    struct bucket_table *tbl)
 {
 	struct bucket_table *old_tbl;
 	struct bucket_table *new_tbl;
@@ -433,61 +425,172 @@
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(rhashtable_insert_rehash);
 
-struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
-					    const void *key,
-					    struct rhash_head *obj,
-					    struct bucket_table *tbl)
+static void *rhashtable_lookup_one(struct rhashtable *ht,
+				   struct bucket_table *tbl, unsigned int hash,
+				   const void *key, struct rhash_head *obj)
 {
+	struct rhashtable_compare_arg arg = {
+		.ht = ht,
+		.key = key,
+	};
+	struct rhash_head __rcu **pprev;
 	struct rhash_head *head;
-	unsigned int hash;
-	int err;
+	int elasticity;
 
-	tbl = rhashtable_last_table(ht, tbl);
-	hash = head_hashfn(ht, tbl, obj);
-	spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
+	elasticity = ht->elasticity;
+	pprev = &tbl->buckets[hash];
+	rht_for_each(head, tbl, hash) {
+		struct rhlist_head *list;
+		struct rhlist_head *plist;
 
-	err = -EEXIST;
-	if (key && rhashtable_lookup_fast(ht, key, ht->p))
-		goto exit;
+		elasticity--;
+		if (!key ||
+		    (ht->p.obj_cmpfn ?
+		     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
+		     rhashtable_compare(&arg, rht_obj(ht, head))))
+			continue;
 
-	err = -E2BIG;
+		if (!ht->rhlist)
+			return rht_obj(ht, head);
+
+		list = container_of(obj, struct rhlist_head, rhead);
+		plist = container_of(head, struct rhlist_head, rhead);
+
+		RCU_INIT_POINTER(list->next, plist);
+		head = rht_dereference_bucket(head->next, tbl, hash);
+		RCU_INIT_POINTER(list->rhead.next, head);
+		rcu_assign_pointer(*pprev, obj);
+
+		return NULL;
+	}
+
+	if (elasticity <= 0)
+		return ERR_PTR(-EAGAIN);
+
+	return ERR_PTR(-ENOENT);
+}
+
+static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht,
+						  struct bucket_table *tbl,
+						  unsigned int hash,
+						  struct rhash_head *obj,
+						  void *data)
+{
+	struct bucket_table *new_tbl;
+	struct rhash_head *head;
+
+	if (!IS_ERR_OR_NULL(data))
+		return ERR_PTR(-EEXIST);
+
+	if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT)
+		return ERR_CAST(data);
+
+	new_tbl = rcu_dereference(tbl->future_tbl);
+	if (new_tbl)
+		return new_tbl;
+
+	if (PTR_ERR(data) != -ENOENT)
+		return ERR_CAST(data);
+
 	if (unlikely(rht_grow_above_max(ht, tbl)))
-		goto exit;
+		return ERR_PTR(-E2BIG);
 
-	err = -EAGAIN;
-	if (rhashtable_check_elasticity(ht, tbl, hash) ||
-	    rht_grow_above_100(ht, tbl))
-		goto exit;
-
-	err = 0;
+	if (unlikely(rht_grow_above_100(ht, tbl)))
+		return ERR_PTR(-EAGAIN);
 
 	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 
 	RCU_INIT_POINTER(obj->next, head);
+	if (ht->rhlist) {
+		struct rhlist_head *list;
+
+		list = container_of(obj, struct rhlist_head, rhead);
+		RCU_INIT_POINTER(list->next, NULL);
+	}
 
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 
 	atomic_inc(&ht->nelems);
+	if (rht_grow_above_75(ht, tbl))
+		schedule_work(&ht->run_work);
 
-exit:
-	spin_unlock(rht_bucket_lock(tbl, hash));
+	return NULL;
+}
 
-	if (err == 0)
-		return NULL;
-	else if (err == -EAGAIN)
-		return tbl;
-	else
-		return ERR_PTR(err);
+static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
+				   struct rhash_head *obj)
+{
+	struct bucket_table *new_tbl;
+	struct bucket_table *tbl;
+	unsigned int hash;
+	spinlock_t *lock;
+	void *data;
+
+	tbl = rcu_dereference(ht->tbl);
+
+	/* All insertions must grab the oldest table containing
+	 * the hashed bucket that is yet to be rehashed.
+	 */
+	for (;;) {
+		hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+		lock = rht_bucket_lock(tbl, hash);
+		spin_lock_bh(lock);
+
+		if (tbl->rehash <= hash)
+			break;
+
+		spin_unlock_bh(lock);
+		tbl = rcu_dereference(tbl->future_tbl);
+	}
+
+	data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
+	new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
+	if (PTR_ERR(new_tbl) != -EEXIST)
+		data = ERR_CAST(new_tbl);
+
+	while (!IS_ERR_OR_NULL(new_tbl)) {
+		tbl = new_tbl;
+		hash = rht_head_hashfn(ht, tbl, obj, ht->p);
+		spin_lock_nested(rht_bucket_lock(tbl, hash),
+				 SINGLE_DEPTH_NESTING);
+
+		data = rhashtable_lookup_one(ht, tbl, hash, key, obj);
+		new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data);
+		if (PTR_ERR(new_tbl) != -EEXIST)
+			data = ERR_CAST(new_tbl);
+
+		spin_unlock(rht_bucket_lock(tbl, hash));
+	}
+
+	spin_unlock_bh(lock);
+
+	if (PTR_ERR(data) == -EAGAIN)
+		data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?:
+			       -EAGAIN);
+
+	return data;
+}
+
+void *rhashtable_insert_slow(struct rhashtable *ht, const void *key,
+			     struct rhash_head *obj)
+{
+	void *data;
+
+	do {
+		rcu_read_lock();
+		data = rhashtable_try_insert(ht, key, obj);
+		rcu_read_unlock();
+	} while (PTR_ERR(data) == -EAGAIN);
+
+	return data;
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert_slow);
 
 /**
- * rhashtable_walk_init - Initialise an iterator
+ * rhashtable_walk_enter - Initialise an iterator
  * @ht:		Table to walk over
  * @iter:	Hash table Iterator
- * @gfp:	GFP flags for allocations
  *
  * This function prepares a hash table walk.
  *
@@ -502,30 +605,22 @@
  * This function may sleep so you must not call it from interrupt
  * context or with spin locks held.
  *
- * You must call rhashtable_walk_exit if this function returns
- * successfully.
+ * You must call rhashtable_walk_exit after this function returns.
  */
-int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter,
-			 gfp_t gfp)
+void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
 {
 	iter->ht = ht;
 	iter->p = NULL;
 	iter->slot = 0;
 	iter->skip = 0;
 
-	iter->walker = kmalloc(sizeof(*iter->walker), gfp);
-	if (!iter->walker)
-		return -ENOMEM;
-
 	spin_lock(&ht->lock);
-	iter->walker->tbl =
+	iter->walker.tbl =
 		rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock));
-	list_add(&iter->walker->list, &iter->walker->tbl->walkers);
+	list_add(&iter->walker.list, &iter->walker.tbl->walkers);
 	spin_unlock(&ht->lock);
-
-	return 0;
 }
-EXPORT_SYMBOL_GPL(rhashtable_walk_init);
+EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
 
 /**
  * rhashtable_walk_exit - Free an iterator
@@ -536,10 +631,9 @@
 void rhashtable_walk_exit(struct rhashtable_iter *iter)
 {
 	spin_lock(&iter->ht->lock);
-	if (iter->walker->tbl)
-		list_del(&iter->walker->list);
+	if (iter->walker.tbl)
+		list_del(&iter->walker.list);
 	spin_unlock(&iter->ht->lock);
-	kfree(iter->walker);
 }
 EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
 
@@ -565,12 +659,12 @@
 	rcu_read_lock();
 
 	spin_lock(&ht->lock);
-	if (iter->walker->tbl)
-		list_del(&iter->walker->list);
+	if (iter->walker.tbl)
+		list_del(&iter->walker.list);
 	spin_unlock(&ht->lock);
 
-	if (!iter->walker->tbl) {
-		iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
+	if (!iter->walker.tbl) {
+		iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht);
 		return -EAGAIN;
 	}
 
@@ -592,12 +686,17 @@
  */
 void *rhashtable_walk_next(struct rhashtable_iter *iter)
 {
-	struct bucket_table *tbl = iter->walker->tbl;
+	struct bucket_table *tbl = iter->walker.tbl;
+	struct rhlist_head *list = iter->list;
 	struct rhashtable *ht = iter->ht;
 	struct rhash_head *p = iter->p;
+	bool rhlist = ht->rhlist;
 
 	if (p) {
-		p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
+		if (!rhlist || !(list = rcu_dereference(list->next))) {
+			p = rcu_dereference(p->next);
+			list = container_of(p, struct rhlist_head, rhead);
+		}
 		goto next;
 	}
 
@@ -605,6 +704,18 @@
 		int skip = iter->skip;
 
 		rht_for_each_rcu(p, tbl, iter->slot) {
+			if (rhlist) {
+				list = container_of(p, struct rhlist_head,
+						    rhead);
+				do {
+					if (!skip)
+						goto next;
+					skip--;
+					list = rcu_dereference(list->next);
+				} while (list);
+
+				continue;
+			}
 			if (!skip)
 				break;
 			skip--;
@@ -614,7 +725,8 @@
 		if (!rht_is_a_nulls(p)) {
 			iter->skip++;
 			iter->p = p;
-			return rht_obj(ht, p);
+			iter->list = list;
+			return rht_obj(ht, rhlist ? &list->rhead : p);
 		}
 
 		iter->skip = 0;
@@ -625,8 +737,8 @@
 	/* Ensure we see any new tables. */
 	smp_rmb();
 
-	iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht);
-	if (iter->walker->tbl) {
+	iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+	if (iter->walker.tbl) {
 		iter->slot = 0;
 		iter->skip = 0;
 		return ERR_PTR(-EAGAIN);
@@ -646,7 +758,7 @@
 	__releases(RCU)
 {
 	struct rhashtable *ht;
-	struct bucket_table *tbl = iter->walker->tbl;
+	struct bucket_table *tbl = iter->walker.tbl;
 
 	if (!tbl)
 		goto out;
@@ -655,9 +767,9 @@
 
 	spin_lock(&ht->lock);
 	if (tbl->rehash < tbl->size)
-		list_add(&iter->walker->list, &tbl->walkers);
+		list_add(&iter->walker.list, &tbl->walkers);
 	else
-		iter->walker->tbl = NULL;
+		iter->walker.tbl = NULL;
 	spin_unlock(&ht->lock);
 
 	iter->p = NULL;
@@ -803,6 +915,48 @@
 EXPORT_SYMBOL_GPL(rhashtable_init);
 
 /**
+ * rhltable_init - initialize a new hash list table
+ * @hlt:	hash list table to be initialized
+ * @params:	configuration parameters
+ *
+ * Initializes a new hash list table.
+ *
+ * See documentation for rhashtable_init.
+ */
+int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params)
+{
+	int err;
+
+	/* No rhlist NULLs marking for now. */
+	if (params->nulls_base)
+		return -EINVAL;
+
+	err = rhashtable_init(&hlt->ht, params);
+	hlt->ht.rhlist = true;
+	return err;
+}
+EXPORT_SYMBOL_GPL(rhltable_init);
+
+static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj,
+				void (*free_fn)(void *ptr, void *arg),
+				void *arg)
+{
+	struct rhlist_head *list;
+
+	if (!ht->rhlist) {
+		free_fn(rht_obj(ht, obj), arg);
+		return;
+	}
+
+	list = container_of(obj, struct rhlist_head, rhead);
+	do {
+		obj = &list->rhead;
+		list = rht_dereference(list->next, ht);
+		free_fn(rht_obj(ht, obj), arg);
+	} while (list);
+}
+
+/**
  * rhashtable_free_and_destroy - free elements and destroy hash table
  * @ht:		the hash table to destroy
  * @free_fn:	callback to release resources of element
@@ -839,7 +993,7 @@
 			     pos = next,
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL)
-				free_fn(rht_obj(ht, pos), arg);
+				rhashtable_free_one(ht, pos, free_fn, arg);
 		}
 	}
 
diff --git a/lib/syscall.c b/lib/syscall.c
index e30e039..63239e0 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -7,9 +7,19 @@
 			   unsigned long args[6], unsigned int maxargs,
 			   unsigned long *sp, unsigned long *pc)
 {
-	struct pt_regs *regs = task_pt_regs(target);
-	if (unlikely(!regs))
+	struct pt_regs *regs;
+
+	if (!try_get_task_stack(target)) {
+		/* Task has no stack, so the task isn't in a syscall. */
+		*callno = -1;
+		return 0;
+	}
+
+	regs = task_pt_regs(target);
+	if (unlikely(!regs)) {
+		put_task_stack(target);
 		return -EAGAIN;
+	}
 
 	*sp = user_stack_pointer(regs);
 	*pc = instruction_pointer(regs);
@@ -18,6 +28,7 @@
 	if (*callno != -1L && maxargs > 0)
 		syscall_get_arguments(target, regs, 0, maxargs, args);
 
+	put_task_stack(target);
 	return 0;
 }
 
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 93f4501..94346b4 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -5485,6 +5485,7 @@
 	skb->hash = SKB_HASH;
 	skb->queue_mapping = SKB_QUEUE_MAP;
 	skb->vlan_tci = SKB_VLAN_TCI;
+	skb->vlan_proto = htons(ETH_P_IP);
 	skb->dev = &dev;
 	skb->dev->ifindex = SKB_DEV_IFINDEX;
 	skb->dev->type = SKB_DEV_TYPE;
diff --git a/lib/test_hash.c b/lib/test_hash.c
index 66c5fc8..cac20c5 100644
--- a/lib/test_hash.c
+++ b/lib/test_hash.c
@@ -143,7 +143,7 @@
 test_hash_init(void)
 {
 	char buf[SIZE+1];
-	u32 string_or = 0, hash_or[2][33] = { 0 };
+	u32 string_or = 0, hash_or[2][33] = { { 0, } };
 	unsigned tests = 0;
 	unsigned long long h64 = 0;
 	int i, j;
@@ -219,21 +219,27 @@
 	}
 
 	/* Issue notices about skipped tests. */
-#ifndef HAVE_ARCH__HASH_32
-	pr_info("__hash_32() has no arch implementation to test.");
-#elif HAVE_ARCH__HASH_32 != 1
+#ifdef HAVE_ARCH__HASH_32
+#if HAVE_ARCH__HASH_32 != 1
 	pr_info("__hash_32() is arch-specific; not compared to generic.");
 #endif
-#ifndef HAVE_ARCH_HASH_32
-	pr_info("hash_32() has no arch implementation to test.");
-#elif HAVE_ARCH_HASH_32 != 1
+#else
+	pr_info("__hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_32
+#if HAVE_ARCH_HASH_32 != 1
 	pr_info("hash_32() is arch-specific; not compared to generic.");
 #endif
-#ifndef HAVE_ARCH_HASH_64
-	pr_info("hash_64() has no arch implementation to test.");
-#elif HAVE_ARCH_HASH_64 != 1
+#else
+	pr_info("hash_32() has no arch implementation to test.");
+#endif
+#ifdef HAVE_ARCH_HASH_64
+#if HAVE_ARCH_HASH_64 != 1
 	pr_info("hash_64() is arch-specific; not compared to generic.");
 #endif
+#else
+	pr_info("hash_64() has no arch implementation to test.");
+#endif
 
 	pr_notice("%u tests passed.", tests);
 
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 297fdb5..64e899b 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -38,7 +38,7 @@
 
 static int max_size = 0;
 module_param(max_size, int, 0);
-MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)");
+MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)");
 
 static bool shrinking = false;
 module_param(shrinking, bool, 0);
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c
index f0b323a..ae8d249 100644
--- a/lib/ucs2_string.c
+++ b/lib/ucs2_string.c
@@ -56,7 +56,7 @@
 	unsigned long i;
 	unsigned long j = 0;
 
-	for (i = 0; i < ucs2_strlen(src); i++) {
+	for (i = 0; src[i]; i++) {
 		u16 c = src[i];
 
 		if (c >= 0x800)
diff --git a/lib/usercopy.c b/lib/usercopy.c
deleted file mode 100644
index 4f5b1dd..0000000
--- a/lib/usercopy.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <linux/export.h>
-#include <linux/bug.h>
-#include <linux/uaccess.h>
-
-void copy_from_user_overflow(void)
-{
-	WARN(1, "Buffer overflow detected!\n");
-}
-EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/lib/win_minmax.c b/lib/win_minmax.c
new file mode 100644
index 0000000..c8420d4
--- /dev/null
+++ b/lib/win_minmax.c
@@ -0,0 +1,98 @@
+/**
+ * lib/minmax.c: windowed min/max tracker
+ *
+ * Kathleen Nichols' algorithm for tracking the minimum (or maximum)
+ * value of a data stream over some fixed time interval.  (E.g.,
+ * the minimum RTT over the past five minutes.) It uses constant
+ * space and constant time per update yet almost always delivers
+ * the same minimum as an implementation that has to keep all the
+ * data in the window.
+ *
+ * The algorithm keeps track of the best, 2nd best & 3rd best min
+ * values, maintaining an invariant that the measurement time of
+ * the n'th best >= n-1'th best. It also makes sure that the three
+ * values are widely separated in the time window since that bounds
+ * the worse case error when that data is monotonically increasing
+ * over the window.
+ *
+ * Upon getting a new min, we can forget everything earlier because
+ * it has no value - the new min is <= everything else in the window
+ * by definition and it's the most recent. So we restart fresh on
+ * every new min and overwrites 2nd & 3rd choices. The same property
+ * holds for 2nd & 3rd best.
+ */
+#include <linux/module.h>
+#include <linux/win_minmax.h>
+
+/* As time advances, update the 1st, 2nd, and 3rd choices. */
+static u32 minmax_subwin_update(struct minmax *m, u32 win,
+				const struct minmax_sample *val)
+{
+	u32 dt = val->t - m->s[0].t;
+
+	if (unlikely(dt > win)) {
+		/*
+		 * Passed entire window without a new val so make 2nd
+		 * choice the new val & 3rd choice the new 2nd choice.
+		 * we may have to iterate this since our 2nd choice
+		 * may also be outside the window (we checked on entry
+		 * that the third choice was in the window).
+		 */
+		m->s[0] = m->s[1];
+		m->s[1] = m->s[2];
+		m->s[2] = *val;
+		if (unlikely(val->t - m->s[0].t > win)) {
+			m->s[0] = m->s[1];
+			m->s[1] = m->s[2];
+			m->s[2] = *val;
+		}
+	} else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) {
+		/*
+		 * We've passed a quarter of the window without a new val
+		 * so take a 2nd choice from the 2nd quarter of the window.
+		 */
+		m->s[2] = m->s[1] = *val;
+	} else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) {
+		/*
+		 * We've passed half the window without finding a new val
+		 * so take a 3rd choice from the last half of the window
+		 */
+		m->s[2] = *val;
+	}
+	return m->s[0].v;
+}
+
+/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */
+u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas)
+{
+	struct minmax_sample val = { .t = t, .v = meas };
+
+	if (unlikely(val.v >= m->s[0].v) ||	  /* found new max? */
+	    unlikely(val.t - m->s[2].t > win))	  /* nothing left in window? */
+		return minmax_reset(m, t, meas);  /* forget earlier samples */
+
+	if (unlikely(val.v >= m->s[1].v))
+		m->s[2] = m->s[1] = val;
+	else if (unlikely(val.v >= m->s[2].v))
+		m->s[2] = val;
+
+	return minmax_subwin_update(m, win, &val);
+}
+EXPORT_SYMBOL(minmax_running_max);
+
+/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */
+u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas)
+{
+	struct minmax_sample val = { .t = t, .v = meas };
+
+	if (unlikely(val.v <= m->s[0].v) ||	  /* found new min? */
+	    unlikely(val.t - m->s[2].t > win))	  /* nothing left in window? */
+		return minmax_reset(m, t, meas);  /* forget earlier samples */
+
+	if (unlikely(val.v <= m->s[1].v))
+		m->s[2] = m->s[1] = val;
+	else if (unlikely(val.v <= m->s[2].v))
+		m->s[2] = val;
+
+	return minmax_subwin_update(m, win, &val);
+}
diff --git a/mm/Kconfig b/mm/Kconfig
index 78a23c5..be0ee11 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -262,7 +262,14 @@
 	select MIGRATION
 	depends on MMU
 	help
-	  Allows the compaction of memory for the allocation of huge pages.
+          Compaction is the only memory management component to form
+          high order (larger physically contiguous) memory blocks
+          reliably. The page allocator relies on compaction heavily and
+          the lack of the feature can lead to unexpected OOM killer
+          invocations for high order memory requests. You shouldn't
+          disable this option unless there really is a strong reason for
+          it and then we would be really interested to hear about that at
+          linux-mm@kvack.org.
 
 #
 # support for page migration
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 22f4cd9..afcc550 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -76,8 +76,6 @@
 	   no longer necessary to write zeros when GFP_ZERO is used on
 	   allocation.
 
-	   Enabling page poisoning with this option will disable hibernation
-
 	   If unsure, say N
 	bool
 
diff --git a/mm/debug.c b/mm/debug.c
index 8865bfb..74c7cae 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -42,9 +42,11 @@
 
 void __dump_page(struct page *page, const char *reason)
 {
+	int mapcount = PageSlab(page) ? 0 : page_mapcount(page);
+
 	pr_emerg("page:%p count:%d mapcount:%d mapping:%p index:%#lx",
-		  page, page_ref_count(page), page_mapcount(page),
-		  page->mapping, page->index);
+		  page, page_ref_count(page), mapcount,
+		  page->mapping, page_to_pgoff(page));
 	if (PageCompound(page))
 		pr_cont(" compound_mapcount: %d", compound_mapcount(page));
 	pr_cont("\n");
diff --git a/mm/filemap.c b/mm/filemap.c
index 8a287df..4bad32d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -110,36 +110,94 @@
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
+static int page_cache_tree_insert(struct address_space *mapping,
+				  struct page *page, void **shadowp)
+{
+	struct radix_tree_node *node;
+	void **slot;
+	int error;
+
+	error = __radix_tree_create(&mapping->page_tree, page->index, 0,
+				    &node, &slot);
+	if (error)
+		return error;
+	if (*slot) {
+		void *p;
+
+		p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+		if (!radix_tree_exceptional_entry(p))
+			return -EEXIST;
+
+		mapping->nrexceptional--;
+		if (!dax_mapping(mapping)) {
+			if (shadowp)
+				*shadowp = p;
+			if (node)
+				workingset_node_shadows_dec(node);
+		} else {
+			/* DAX can replace empty locked entry with a hole */
+			WARN_ON_ONCE(p !=
+				(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
+					 RADIX_DAX_ENTRY_LOCK));
+			/* DAX accounts exceptional entries as normal pages */
+			if (node)
+				workingset_node_pages_dec(node);
+			/* Wakeup waiters for exceptional entry lock */
+			dax_wake_mapping_entry_waiter(mapping, page->index,
+						      false);
+		}
+	}
+	radix_tree_replace_slot(slot, page);
+	mapping->nrpages++;
+	if (node) {
+		workingset_node_pages_inc(node);
+		/*
+		 * Don't track node that contains actual pages.
+		 *
+		 * Avoid acquiring the list_lru lock if already
+		 * untracked.  The list_empty() test is safe as
+		 * node->private_list is protected by
+		 * mapping->tree_lock.
+		 */
+		if (!list_empty(&node->private_list))
+			list_lru_del(&workingset_shadow_nodes,
+				     &node->private_list);
+	}
+	return 0;
+}
+
 static void page_cache_tree_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
-	struct radix_tree_node *node;
 	int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageTail(page), page);
 	VM_BUG_ON_PAGE(nr != 1 && shadow, page);
 
-	if (shadow) {
-		mapping->nrexceptional += nr;
-		/*
-		 * Make sure the nrexceptional update is committed before
-		 * the nrpages update so that final truncate racing
-		 * with reclaim does not see both counters 0 at the
-		 * same time and miss a shadow entry.
-		 */
-		smp_wmb();
-	}
-	mapping->nrpages -= nr;
-
 	for (i = 0; i < nr; i++) {
-		node = radix_tree_replace_clear_tags(&mapping->page_tree,
-				page->index + i, shadow);
+		struct radix_tree_node *node;
+		void **slot;
+
+		__radix_tree_lookup(&mapping->page_tree, page->index + i,
+				    &node, &slot);
+
+		radix_tree_clear_tags(&mapping->page_tree, node, slot);
+
 		if (!node) {
 			VM_BUG_ON_PAGE(nr != 1, page);
-			return;
+			/*
+			 * We need a node to properly account shadow
+			 * entries. Don't plant any without. XXX
+			 */
+			shadow = NULL;
 		}
 
+		radix_tree_replace_slot(slot, shadow);
+
+		if (!node)
+			break;
+
 		workingset_node_pages_dec(node);
 		if (shadow)
 			workingset_node_shadows_inc(node);
@@ -163,6 +221,18 @@
 					&node->private_list);
 		}
 	}
+
+	if (shadow) {
+		mapping->nrexceptional += nr;
+		/*
+		 * Make sure the nrexceptional update is committed before
+		 * the nrpages update so that final truncate racing
+		 * with reclaim does not see both counters 0 at the
+		 * same time and miss a shadow entry.
+		 */
+		smp_wmb();
+	}
+	mapping->nrpages -= nr;
 }
 
 /*
@@ -561,9 +631,8 @@
 
 		spin_lock_irqsave(&mapping->tree_lock, flags);
 		__delete_from_page_cache(old, NULL);
-		error = radix_tree_insert(&mapping->page_tree, offset, new);
+		error = page_cache_tree_insert(mapping, new, NULL);
 		BUG_ON(error);
-		mapping->nrpages++;
 
 		/*
 		 * hugetlb pages do not participate in page cache accounting.
@@ -584,62 +653,6 @@
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_page);
 
-static int page_cache_tree_insert(struct address_space *mapping,
-				  struct page *page, void **shadowp)
-{
-	struct radix_tree_node *node;
-	void **slot;
-	int error;
-
-	error = __radix_tree_create(&mapping->page_tree, page->index, 0,
-				    &node, &slot);
-	if (error)
-		return error;
-	if (*slot) {
-		void *p;
-
-		p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
-		if (!radix_tree_exceptional_entry(p))
-			return -EEXIST;
-
-		mapping->nrexceptional--;
-		if (!dax_mapping(mapping)) {
-			if (shadowp)
-				*shadowp = p;
-			if (node)
-				workingset_node_shadows_dec(node);
-		} else {
-			/* DAX can replace empty locked entry with a hole */
-			WARN_ON_ONCE(p !=
-				(void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
-					 RADIX_DAX_ENTRY_LOCK));
-			/* DAX accounts exceptional entries as normal pages */
-			if (node)
-				workingset_node_pages_dec(node);
-			/* Wakeup waiters for exceptional entry lock */
-			dax_wake_mapping_entry_waiter(mapping, page->index,
-						      false);
-		}
-	}
-	radix_tree_replace_slot(slot, page);
-	mapping->nrpages++;
-	if (node) {
-		workingset_node_pages_inc(node);
-		/*
-		 * Don't track node that contains actual pages.
-		 *
-		 * Avoid acquiring the list_lru lock if already
-		 * untracked.  The list_empty() test is safe as
-		 * node->private_list is protected by
-		 * mapping->tree_lock.
-		 */
-		if (!list_empty(&node->private_list))
-			list_lru_del(&workingset_shadow_nodes,
-				     &node->private_list);
-	}
-	return 0;
-}
-
 static int __add_to_page_cache_locked(struct page *page,
 				      struct address_space *mapping,
 				      pgoff_t offset, gfp_t gfp_mask,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2373f0a..283583f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1078,7 +1078,7 @@
 		goto out;
 
 	page = pmd_page(*pmd);
-	VM_BUG_ON_PAGE(!PageHead(page), page);
+	VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
 	if (flags & FOLL_TOUCH)
 		touch_pmd(vma, addr, pmd);
 	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
@@ -1116,7 +1116,7 @@
 	}
 skip_mlock:
 	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
-	VM_BUG_ON_PAGE(!PageCompound(page), page);
+	VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
 	if (flags & FOLL_GET)
 		get_page(page);
 
@@ -1138,9 +1138,6 @@
 	bool was_writable;
 	int flags = 0;
 
-	/* A PROT_NONE fault should not end up here */
-	BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
 	fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
 	if (unlikely(!pmd_same(pmd, *fe->pmd)))
 		goto out_unlock;
@@ -1168,7 +1165,7 @@
 	}
 
 	/* See similar comment in do_numa_page for explanation */
-	if (!(vma->vm_flags & VM_WRITE))
+	if (!pmd_write(pmd))
 		flags |= TNF_NO_GROUP;
 
 	/*
@@ -1512,7 +1509,7 @@
 	struct page *page;
 	pgtable_t pgtable;
 	pmd_t _pmd;
-	bool young, write, dirty;
+	bool young, write, dirty, soft_dirty;
 	unsigned long addr;
 	int i;
 
@@ -1546,6 +1543,7 @@
 	write = pmd_write(*pmd);
 	young = pmd_young(*pmd);
 	dirty = pmd_dirty(*pmd);
+	soft_dirty = pmd_soft_dirty(*pmd);
 
 	pmdp_huge_split_prepare(vma, haddr, pmd);
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1562,6 +1560,8 @@
 			swp_entry_t swp_entry;
 			swp_entry = make_migration_entry(page + i, write);
 			entry = swp_entry_to_pte(swp_entry);
+			if (soft_dirty)
+				entry = pte_swp_mksoft_dirty(entry);
 		} else {
 			entry = mk_pte(page + i, vma->vm_page_prot);
 			entry = maybe_mkwrite(entry, vma);
@@ -1569,6 +1569,8 @@
 				entry = pte_wrprotect(entry);
 			if (!young)
 				entry = pte_mkold(entry);
+			if (soft_dirty)
+				entry = pte_mksoft_dirty(entry);
 		}
 		if (dirty)
 			SetPageDirty(page + i);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b9aa1b0..87e11d8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1448,6 +1448,7 @@
 		list_del(&page->lru);
 		h->free_huge_pages--;
 		h->free_huge_pages_node[nid]--;
+		h->max_huge_pages--;
 		update_and_free_page(h, page);
 	}
 	spin_unlock(&hugetlb_lock);
diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c
index b6728a3..baabaad 100644
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -217,11 +217,8 @@
 	new_quarantine_size = (READ_ONCE(totalram_pages) << PAGE_SHIFT) /
 		QUARANTINE_FRACTION;
 	percpu_quarantines = QUARANTINE_PERCPU_SIZE * num_online_cpus();
-	if (WARN_ONCE(new_quarantine_size < percpu_quarantines,
-		"Too little memory, disabling global KASAN quarantine.\n"))
-		new_quarantine_size = 0;
-	else
-		new_quarantine_size -= percpu_quarantines;
+	new_quarantine_size = (new_quarantine_size < percpu_quarantines) ?
+		0 : new_quarantine_size - percpu_quarantines;
 	WRITE_ONCE(quarantine_size, new_quarantine_size);
 
 	last = global_quarantine.head;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 79c52d0..728d779 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -838,7 +838,8 @@
  * value (scan code).
  */
 
-static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
+static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
+		struct vm_area_struct **vmap)
 {
 	struct vm_area_struct *vma;
 	unsigned long hstart, hend;
@@ -846,7 +847,7 @@
 	if (unlikely(khugepaged_test_exit(mm)))
 		return SCAN_ANY_PROCESS;
 
-	vma = find_vma(mm, address);
+	*vmap = vma = find_vma(mm, address);
 	if (!vma)
 		return SCAN_VMA_NULL;
 
@@ -881,6 +882,11 @@
 		.pmd = pmd,
 	};
 
+	/* we only decide to swapin, if there is enough young ptes */
+	if (referenced < HPAGE_PMD_NR/2) {
+		trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+		return false;
+	}
 	fe.pte = pte_offset_map(pmd, address);
 	for (; fe.address < address + HPAGE_PMD_NR*PAGE_SIZE;
 			fe.pte++, fe.address += PAGE_SIZE) {
@@ -888,17 +894,12 @@
 		if (!is_swap_pte(pteval))
 			continue;
 		swapped_in++;
-		/* we only decide to swapin, if there is enough young ptes */
-		if (referenced < HPAGE_PMD_NR/2) {
-			trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
-			return false;
-		}
 		ret = do_swap_page(&fe, pteval);
 
 		/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
 		if (ret & VM_FAULT_RETRY) {
 			down_read(&mm->mmap_sem);
-			if (hugepage_vma_revalidate(mm, address)) {
+			if (hugepage_vma_revalidate(mm, address, &fe.vma)) {
 				/* vma is no longer available, don't continue to swapin */
 				trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 				return false;
@@ -923,7 +924,6 @@
 static void collapse_huge_page(struct mm_struct *mm,
 				   unsigned long address,
 				   struct page **hpage,
-				   struct vm_area_struct *vma,
 				   int node, int referenced)
 {
 	pmd_t *pmd, _pmd;
@@ -933,6 +933,7 @@
 	spinlock_t *pmd_ptl, *pte_ptl;
 	int isolated = 0, result = 0;
 	struct mem_cgroup *memcg;
+	struct vm_area_struct *vma;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 	gfp_t gfp;
@@ -961,7 +962,7 @@
 	}
 
 	down_read(&mm->mmap_sem);
-	result = hugepage_vma_revalidate(mm, address);
+	result = hugepage_vma_revalidate(mm, address, &vma);
 	if (result) {
 		mem_cgroup_cancel_charge(new_page, memcg, true);
 		up_read(&mm->mmap_sem);
@@ -994,7 +995,7 @@
 	 * handled by the anon_vma lock + PG_lock.
 	 */
 	down_write(&mm->mmap_sem);
-	result = hugepage_vma_revalidate(mm, address);
+	result = hugepage_vma_revalidate(mm, address, &vma);
 	if (result)
 		goto out;
 	/* check if the pmd is still valid */
@@ -1202,7 +1203,7 @@
 	if (ret) {
 		node = khugepaged_find_target_node();
 		/* collapse_huge_page will return with the mmap_sem released */
-		collapse_huge_page(mm, address, hpage, vma, node, referenced);
+		collapse_huge_page(mm, address, hpage, node, referenced);
 	}
 out:
 	trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
diff --git a/mm/ksm.c b/mm/ksm.c
index 73d43ba..5048083 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -283,7 +283,8 @@
 {
 	struct rmap_item *rmap_item;
 
-	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL);
+	rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL |
+						__GFP_NORETRY | __GFP_NOWARN);
 	if (rmap_item)
 		ksm_rmap_items++;
 	return rmap_item;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 66beca1..4be518d4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1740,17 +1740,22 @@
 static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	struct memcg_stock_pcp *stock;
+	unsigned long flags;
 	bool ret = false;
 
 	if (nr_pages > CHARGE_BATCH)
 		return ret;
 
-	stock = &get_cpu_var(memcg_stock);
+	local_irq_save(flags);
+
+	stock = this_cpu_ptr(&memcg_stock);
 	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
 		stock->nr_pages -= nr_pages;
 		ret = true;
 	}
-	put_cpu_var(memcg_stock);
+
+	local_irq_restore(flags);
+
 	return ret;
 }
 
@@ -1771,15 +1776,18 @@
 	stock->cached = NULL;
 }
 
-/*
- * This must be called under preempt disabled or must be called by
- * a thread which is pinned to local cpu.
- */
 static void drain_local_stock(struct work_struct *dummy)
 {
-	struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
+	struct memcg_stock_pcp *stock;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	stock = this_cpu_ptr(&memcg_stock);
 	drain_stock(stock);
 	clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+
+	local_irq_restore(flags);
 }
 
 /*
@@ -1788,14 +1796,19 @@
  */
 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
+	struct memcg_stock_pcp *stock;
+	unsigned long flags;
 
+	local_irq_save(flags);
+
+	stock = this_cpu_ptr(&memcg_stock);
 	if (stock->cached != memcg) { /* reset if necessary */
 		drain_stock(stock);
 		stock->cached = memcg;
 	}
 	stock->nr_pages += nr_pages;
-	put_cpu_var(memcg_stock);
+
+	local_irq_restore(flags);
 }
 
 /*
@@ -2337,8 +2350,11 @@
 		return 0;
 
 	memcg = get_mem_cgroup_from_mm(current->mm);
-	if (!mem_cgroup_is_root(memcg))
+	if (!mem_cgroup_is_root(memcg)) {
 		ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
+		if (!ret)
+			__SetPageKmemcg(page);
+	}
 	css_put(&memcg->css);
 	return ret;
 }
@@ -2365,6 +2381,11 @@
 		page_counter_uncharge(&memcg->memsw, nr_pages);
 
 	page->mem_cgroup = NULL;
+
+	/* slab pages do not have PageKmemcg flag set */
+	if (PageKmemcg(page))
+		__ClearPageKmemcg(page);
+
 	css_put_many(&memcg->css, nr_pages);
 }
 #endif /* !CONFIG_SLOB */
@@ -4069,14 +4090,14 @@
 
 static DEFINE_IDR(mem_cgroup_idr);
 
-static void mem_cgroup_id_get(struct mem_cgroup *memcg)
+static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
-	atomic_inc(&memcg->id.ref);
+	atomic_add(n, &memcg->id.ref);
 }
 
-static void mem_cgroup_id_put(struct mem_cgroup *memcg)
+static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
-	if (atomic_dec_and_test(&memcg->id.ref)) {
+	if (atomic_sub_and_test(n, &memcg->id.ref)) {
 		idr_remove(&mem_cgroup_idr, memcg->id.id);
 		memcg->id.id = 0;
 
@@ -4085,6 +4106,16 @@
 	}
 }
 
+static inline void mem_cgroup_id_get(struct mem_cgroup *memcg)
+{
+	mem_cgroup_id_get_many(memcg, 1);
+}
+
+static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
+{
+	mem_cgroup_id_put_many(memcg, 1);
+}
+
 /**
  * mem_cgroup_from_id - look up a memcg from a memcg id
  * @id: the memcg id to look up
@@ -4719,6 +4750,8 @@
 		if (!mem_cgroup_is_root(mc.from))
 			page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
 
+		mem_cgroup_id_put_many(mc.from, mc.moved_swap);
+
 		/*
 		 * we charged both to->memory and to->memsw, so we
 		 * should uncharge to->memory.
@@ -4726,9 +4759,9 @@
 		if (!mem_cgroup_is_root(mc.to))
 			page_counter_uncharge(&mc.to->memory, mc.moved_swap);
 
-		css_put_many(&mc.from->css, mc.moved_swap);
+		mem_cgroup_id_get_many(mc.to, mc.moved_swap);
+		css_put_many(&mc.to->css, mc.moved_swap);
 
-		/* we've already done css_get(mc.to) */
 		mc.moved_swap = 0;
 	}
 	memcg_oom_recover(from);
@@ -5537,8 +5570,10 @@
 			else
 				nr_file += nr_pages;
 			pgpgout++;
-		} else
+		} else {
 			nr_kmem += 1 << compound_order(page);
+			__ClearPageKmemcg(page);
+		}
 
 		page->mem_cgroup = NULL;
 	} while (next != page_list);
@@ -5781,6 +5816,24 @@
 subsys_initcall(mem_cgroup_init);
 
 #ifdef CONFIG_MEMCG_SWAP
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
+{
+	while (!atomic_inc_not_zero(&memcg->id.ref)) {
+		/*
+		 * The root cgroup cannot be destroyed, so it's refcount must
+		 * always be >= 1.
+		 */
+		if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+			VM_BUG_ON(1);
+			break;
+		}
+		memcg = parent_mem_cgroup(memcg);
+		if (!memcg)
+			memcg = root_mem_cgroup;
+	}
+	return memcg;
+}
+
 /**
  * mem_cgroup_swapout - transfer a memsw charge to swap
  * @page: page whose memsw charge to transfer
@@ -5790,7 +5843,7 @@
  */
 void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 {
-	struct mem_cgroup *memcg;
+	struct mem_cgroup *memcg, *swap_memcg;
 	unsigned short oldid;
 
 	VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -5805,16 +5858,27 @@
 	if (!memcg)
 		return;
 
-	mem_cgroup_id_get(memcg);
-	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+	/*
+	 * In case the memcg owning these pages has been offlined and doesn't
+	 * have an ID allocated to it anymore, charge the closest online
+	 * ancestor for the swap instead and transfer the memory+swap charge.
+	 */
+	swap_memcg = mem_cgroup_id_get_online(memcg);
+	oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg));
 	VM_BUG_ON_PAGE(oldid, page);
-	mem_cgroup_swap_statistics(memcg, true);
+	mem_cgroup_swap_statistics(swap_memcg, true);
 
 	page->mem_cgroup = NULL;
 
 	if (!mem_cgroup_is_root(memcg))
 		page_counter_uncharge(&memcg->memory, 1);
 
+	if (memcg != swap_memcg) {
+		if (!mem_cgroup_is_root(swap_memcg))
+			page_counter_charge(&swap_memcg->memsw, 1);
+		page_counter_uncharge(&memcg->memsw, 1);
+	}
+
 	/*
 	 * Interrupts should be disabled here because the caller holds the
 	 * mapping->tree_lock lock which is taken with interrupts-off. It is
@@ -5853,11 +5917,14 @@
 	if (!memcg)
 		return 0;
 
-	if (!mem_cgroup_is_root(memcg) &&
-	    !page_counter_try_charge(&memcg->swap, 1, &counter))
-		return -ENOMEM;
+	memcg = mem_cgroup_id_get_online(memcg);
 
-	mem_cgroup_id_get(memcg);
+	if (!mem_cgroup_is_root(memcg) &&
+	    !page_counter_try_charge(&memcg->swap, 1, &counter)) {
+		mem_cgroup_id_put(memcg);
+		return -ENOMEM;
+	}
+
 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
 	VM_BUG_ON_PAGE(oldid, page);
 	mem_cgroup_swap_statistics(memcg, true);
diff --git a/mm/memory.c b/mm/memory.c
index 83be99d..f1a6804 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3351,9 +3351,6 @@
 	bool was_writable = pte_write(pte);
 	int flags = 0;
 
-	/* A PROT_NONE fault should not end up here */
-	BUG_ON(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)));
-
 	/*
 	* The "pte" at this point cannot be used safely without
 	* validation through pte_unmap_same(). It's of NUMA type but
@@ -3398,7 +3395,7 @@
 	 * pte_dirty has unpredictable behaviour between PTE scan updates,
 	 * background writeback, dirty balancing and application behaviour.
 	 */
-	if (!(vma->vm_flags & VM_WRITE))
+	if (!pte_write(pte))
 		flags |= TNF_NO_GROUP;
 
 	/*
@@ -3458,6 +3455,11 @@
 	return VM_FAULT_FALLBACK;
 }
 
+static inline bool vma_is_accessible(struct vm_area_struct *vma)
+{
+	return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE);
+}
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
@@ -3524,7 +3526,7 @@
 	if (!pte_present(entry))
 		return do_swap_page(fe, entry);
 
-	if (pte_protnone(entry))
+	if (pte_protnone(entry) && vma_is_accessible(fe->vma))
 		return do_numa_page(fe, entry);
 
 	fe->ptl = pte_lockptr(fe->vma->vm_mm, fe->pmd);
@@ -3590,7 +3592,7 @@
 
 		barrier();
 		if (pmd_trans_huge(orig_pmd) || pmd_devmap(orig_pmd)) {
-			if (pmd_protnone(orig_pmd))
+			if (pmd_protnone(orig_pmd) && vma_is_accessible(vma))
 				return do_huge_pmd_numa_page(&fe, orig_pmd);
 
 			if ((fe.flags & FAULT_FLAG_WRITE) &&
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3894b65..9d29ba0 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1219,6 +1219,7 @@
 
 	/* init node's zones as empty zones, we don't have any present pages.*/
 	free_area_init_node(nid, zones_size, start_pfn, zholes_size);
+	pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat);
 
 	/*
 	 * The node we allocated has no zone fallback lists. For avoiding
@@ -1249,6 +1250,7 @@
 static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
 {
 	arch_refresh_nodedata(nid, NULL);
+	free_percpu(pgdat->per_cpu_nodestats);
 	arch_free_nodedata(pgdat);
 	return;
 }
@@ -1553,8 +1555,8 @@
 {
 	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
 	int nid = page_to_nid(page);
-	nodemask_t nmask = node_online_map;
-	struct page *new_page;
+	nodemask_t nmask = node_states[N_MEMORY];
+	struct page *new_page = NULL;
 
 	/*
 	 * TODO: allocate a destination hugepage from a nearest neighbor node,
@@ -1566,11 +1568,13 @@
 					next_node_in(nid, nmask));
 
 	node_clear(nid, nmask);
+
 	if (PageHighMem(page)
 	    || (zone_idx(page_zone(page)) == ZONE_MOVABLE))
 		gfp_mask |= __GFP_HIGHMEM;
 
-	new_page = __alloc_pages_nodemask(gfp_mask, 0,
+	if (!nodes_empty(nmask))
+		new_page = __alloc_pages_nodemask(gfp_mask, 0,
 					node_zonelist(nid, gfp_mask), &nmask);
 	if (!new_page)
 		new_page = __alloc_pages(gfp_mask, 0,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d8c4e38..2da72a5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2336,6 +2336,23 @@
 	return ret;
 }
 
+/*
+ * Drop the (possibly final) reference to task->mempolicy.  It needs to be
+ * dropped after task->mempolicy is set to NULL so that any allocation done as
+ * part of its kmem_cache_free(), such as by KASAN, doesn't reference a freed
+ * policy.
+ */
+void mpol_put_task_policy(struct task_struct *task)
+{
+	struct mempolicy *pol;
+
+	task_lock(task);
+	pol = task->mempolicy;
+	task->mempolicy = NULL;
+	task_unlock(task);
+	mpol_put(pol);
+}
+
 static void sp_delete(struct shared_policy *sp, struct sp_node *n)
 {
 	pr_debug("deleting %lx-l%lx\n", n->start, n->end);
diff --git a/mm/mmap.c b/mm/mmap.c
index ca9d91b..7a0707a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,6 +88,11 @@
  *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
  *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
  *
+ * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
+ * MAP_PRIVATE:
+ *								r: (no) no
+ *								w: (no) no
+ *								x: (yes) yes
  */
 pgprot_t protection_map[16] = {
 	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
@@ -3063,6 +3068,14 @@
 	return ERR_PTR(ret);
 }
 
+bool vma_is_special_mapping(const struct vm_area_struct *vma,
+	const struct vm_special_mapping *sm)
+{
+	return vma->vm_private_data == sm &&
+		(vma->vm_ops == &special_mapping_vmops ||
+		 vma->vm_ops == &legacy_special_mapping_vmops);
+}
+
 /*
  * Called with mm->mmap_sem held for writing.
  * Insert a new vma covering the given region, with the given flags.
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 7d0a275..d53a9aa 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -764,7 +764,7 @@
 {
 	struct mm_struct *mm = task->mm;
 	struct task_struct *p;
-	bool ret;
+	bool ret = true;
 
 	/*
 	 * Skip tasks without mm because it might have passed its exit_mm and
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f4cd7d8..28d6f36 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2080,26 +2080,12 @@
 		ratelimit_pages = 16;
 }
 
-static int
-ratelimit_handler(struct notifier_block *self, unsigned long action,
-		  void *hcpu)
+static int page_writeback_cpu_online(unsigned int cpu)
 {
-
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
-	case CPU_DEAD:
-		writeback_set_ratelimit();
-		return NOTIFY_OK;
-	default:
-		return NOTIFY_DONE;
-	}
+	writeback_set_ratelimit();
+	return 0;
 }
 
-static struct notifier_block ratelimit_nb = {
-	.notifier_call	= ratelimit_handler,
-	.next		= NULL,
-};
-
 /*
  * Called early on to tune the page writeback dirty limits.
  *
@@ -2122,8 +2108,10 @@
 {
 	BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
 
-	writeback_set_ratelimit();
-	register_cpu_notifier(&ratelimit_nb);
+	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mm/writeback:online",
+			  page_writeback_cpu_online, NULL);
+	cpuhp_setup_state(CPUHP_MM_WRITEBACK_DEAD, "mm/writeback:dead", NULL,
+			  page_writeback_cpu_online);
 }
 
 /**
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fb975ce..a2214c6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1008,10 +1008,8 @@
 	}
 	if (PageMappingFlags(page))
 		page->mapping = NULL;
-	if (memcg_kmem_enabled() && PageKmemcg(page)) {
+	if (memcg_kmem_enabled() && PageKmemcg(page))
 		memcg_kmem_uncharge(page, order);
-		__ClearPageKmemcg(page);
-	}
 	if (check_free)
 		bad += free_pages_check(page);
 	if (bad)
@@ -3139,54 +3137,6 @@
 	return NULL;
 }
 
-static inline bool
-should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
-		     enum compact_result compact_result,
-		     enum compact_priority *compact_priority,
-		     int compaction_retries)
-{
-	int max_retries = MAX_COMPACT_RETRIES;
-
-	if (!order)
-		return false;
-
-	/*
-	 * compaction considers all the zone as desperately out of memory
-	 * so it doesn't really make much sense to retry except when the
-	 * failure could be caused by insufficient priority
-	 */
-	if (compaction_failed(compact_result)) {
-		if (*compact_priority > MIN_COMPACT_PRIORITY) {
-			(*compact_priority)--;
-			return true;
-		}
-		return false;
-	}
-
-	/*
-	 * make sure the compaction wasn't deferred or didn't bail out early
-	 * due to locks contention before we declare that we should give up.
-	 * But do not retry if the given zonelist is not suitable for
-	 * compaction.
-	 */
-	if (compaction_withdrawn(compact_result))
-		return compaction_zonelist_suitable(ac, order, alloc_flags);
-
-	/*
-	 * !costly requests are much more important than __GFP_REPEAT
-	 * costly ones because they are de facto nofail and invoke OOM
-	 * killer to move on while costly can fail and users are ready
-	 * to cope with that. 1/4 retries is rather arbitrary but we
-	 * would need much more detailed feedback from compaction to
-	 * make a better decision.
-	 */
-	if (order > PAGE_ALLOC_COSTLY_ORDER)
-		max_retries /= 4;
-	if (compaction_retries <= max_retries)
-		return true;
-
-	return false;
-}
 #else
 static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
@@ -3197,6 +3147,8 @@
 	return NULL;
 }
 
+#endif /* CONFIG_COMPACTION */
+
 static inline bool
 should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
 		     enum compact_result compact_result,
@@ -3223,7 +3175,6 @@
 	}
 	return false;
 }
-#endif /* CONFIG_COMPACTION */
 
 /* Perform direct synchronous page reclaim */
 static int
@@ -3756,12 +3707,10 @@
 	}
 
 out:
-	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) {
-		if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) {
-			__free_pages(page, order);
-			page = NULL;
-		} else
-			__SetPageKmemcg(page);
+	if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
+	    unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {
+		__free_pages(page, order);
+		page = NULL;
 	}
 
 	if (kmemcheck_enabled && page)
@@ -4064,7 +4013,7 @@
 	int lru;
 
 	for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
-		pages[lru] = global_page_state(NR_LRU_BASE + lru);
+		pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
 
 	for_each_zone(zone)
 		wmark_low += zone->watermark[WMARK_LOW];
@@ -4411,7 +4360,7 @@
 	do {
 		zone_type--;
 		zone = pgdat->node_zones + zone_type;
-		if (populated_zone(zone)) {
+		if (managed_zone(zone)) {
 			zoneref_set_zone(zone,
 				&zonelist->_zonerefs[nr_zones++]);
 			check_highest_zone(zone_type);
@@ -4649,7 +4598,7 @@
 		for (j = 0; j < nr_nodes; j++) {
 			node = node_order[j];
 			z = &NODE_DATA(node)->node_zones[zone_type];
-			if (populated_zone(z)) {
+			if (managed_zone(z)) {
 				zoneref_set_zone(z,
 					&zonelist->_zonerefs[pos++]);
 				check_highest_zone(zone_type);
@@ -4761,6 +4710,8 @@
 }
 #endif
 
+static void setup_min_unmapped_ratio(void);
+static void setup_min_slab_ratio(void);
 #else	/* CONFIG_NUMA */
 
 static void set_zonelist_order(void)
@@ -5882,9 +5833,6 @@
 		zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
 #ifdef CONFIG_NUMA
 		zone->node = nid;
-		pgdat->min_unmapped_pages += (freesize*sysctl_min_unmapped_ratio)
-						/ 100;
-		pgdat->min_slab_pages += (freesize * sysctl_min_slab_ratio) / 100;
 #endif
 		zone->name = zone_names[j];
 		zone->zone_pgdat = pgdat;
@@ -6805,6 +6753,12 @@
 	setup_per_zone_wmarks();
 	refresh_zone_stat_thresholds();
 	setup_per_zone_lowmem_reserve();
+
+#ifdef CONFIG_NUMA
+	setup_min_unmapped_ratio();
+	setup_min_slab_ratio();
+#endif
+
 	return 0;
 }
 core_initcall(init_per_zone_wmark_min)
@@ -6846,43 +6800,58 @@
 }
 
 #ifdef CONFIG_NUMA
-int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+static void setup_min_unmapped_ratio(void)
 {
-	struct pglist_data *pgdat;
+	pg_data_t *pgdat;
 	struct zone *zone;
-	int rc;
-
-	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
-	if (rc)
-		return rc;
 
 	for_each_online_pgdat(pgdat)
-		pgdat->min_slab_pages = 0;
+		pgdat->min_unmapped_pages = 0;
 
 	for_each_zone(zone)
 		zone->zone_pgdat->min_unmapped_pages += (zone->managed_pages *
 				sysctl_min_unmapped_ratio) / 100;
-	return 0;
 }
 
-int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+
+int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
 	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	struct pglist_data *pgdat;
-	struct zone *zone;
 	int rc;
 
 	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (rc)
 		return rc;
 
+	setup_min_unmapped_ratio();
+
+	return 0;
+}
+
+static void setup_min_slab_ratio(void)
+{
+	pg_data_t *pgdat;
+	struct zone *zone;
+
 	for_each_online_pgdat(pgdat)
 		pgdat->min_slab_pages = 0;
 
 	for_each_zone(zone)
 		zone->zone_pgdat->min_slab_pages += (zone->managed_pages *
 				sysctl_min_slab_ratio) / 100;
+}
+
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
+	void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int rc;
+
+	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
+	if (rc)
+		return rc;
+
+	setup_min_slab_ratio();
+
 	return 0;
 }
 #endif
diff --git a/mm/page_io.c b/mm/page_io.c
index 16bd82fa..eafe5dd 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -264,6 +264,7 @@
 	int ret;
 	struct swap_info_struct *sis = page_swap_info(page);
 
+	BUG_ON(!PageSwapCache(page));
 	if (sis->flags & SWP_FILE) {
 		struct kiocb kiocb;
 		struct file *swap_file = sis->swap_file;
@@ -337,6 +338,7 @@
 	int ret = 0;
 	struct swap_info_struct *sis = page_swap_info(page);
 
+	BUG_ON(!PageSwapCache(page));
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageUptodate(page), page);
 	if (frontswap_load(page) == 0) {
@@ -386,6 +388,7 @@
 
 	if (sis->flags & SWP_FILE) {
 		struct address_space *mapping = sis->swap_file->f_mapping;
+		BUG_ON(!PageSwapCache(page));
 		return mapping->a_ops->set_page_dirty(page);
 	} else {
 		return __set_page_dirty_no_writeback(page);
diff --git a/mm/readahead.c b/mm/readahead.c
index 65ec288..c8a955b 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/dax.h>
 #include <linux/gfp.h>
 #include <linux/export.h>
 #include <linux/blkdev.h>
@@ -544,6 +545,14 @@
 	if (!mapping || !mapping->a_ops)
 		return -EINVAL;
 
+	/*
+	 * Readahead doesn't make sense for DAX inodes, but we don't want it
+	 * to report a failure either.  Instead, we just return success and
+	 * don't do any work.
+	 */
+	if (dax_mapping(mapping))
+		return 0;
+
 	return force_page_cache_readahead(mapping, filp, index, nr);
 }
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 709bc83..1ef3640 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1284,8 +1284,9 @@
 		VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
 		__inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
 	} else {
-		if (PageTransCompound(page)) {
-			VM_BUG_ON_PAGE(!PageLocked(page), page);
+		if (PageTransCompound(page) && page_mapping(page)) {
+			VM_WARN_ON_ONCE(!PageLocked(page));
+
 			SetPageDoubleMap(compound_head(page));
 			if (PageMlocked(page))
 				clear_page_mlock(compound_head(page));
@@ -1303,7 +1304,7 @@
 {
 	int i, nr = 1;
 
-	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
+	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
 	lock_page_memcg(page);
 
 	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
diff --git a/mm/shmem.c b/mm/shmem.c
index 7f7748a..971fc83 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -270,7 +270,7 @@
 		info->alloced -= pages;
 		shmem_recalc_inode(inode);
 		spin_unlock_irqrestore(&info->lock, flags);
-
+		shmem_unacct_blocks(info->flags, pages);
 		return false;
 	}
 	percpu_counter_add(&sbinfo->used_blocks, pages);
@@ -291,6 +291,7 @@
 
 	if (sbinfo->max_blocks)
 		percpu_counter_sub(&sbinfo->used_blocks, pages);
+	shmem_unacct_blocks(info->flags, pages);
 }
 
 /*
@@ -1980,7 +1981,7 @@
 				return addr;
 			sb = shm_mnt->mnt_sb;
 		}
-		if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER)
+		if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER)
 			return addr;
 	}
 
@@ -3975,7 +3976,9 @@
 
 struct kobj_attribute shmem_enabled_attr =
 	__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
 
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
 bool shmem_huge_enabled(struct vm_area_struct *vma)
 {
 	struct inode *inode = file_inode(vma->vm_file);
@@ -4006,7 +4009,7 @@
 			return false;
 	}
 }
-#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE && CONFIG_SYSFS */
+#endif /* CONFIG_TRANSPARENT_HUGE_PAGECACHE */
 
 #else /* !CONFIG_SHMEM */
 
diff --git a/mm/slab.c b/mm/slab.c
index b672710..090fb26 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -886,6 +886,7 @@
 	return 0;
 }
 
+#if (defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)) || defined(CONFIG_SMP)
 /*
  * Allocates and initializes node for a node on each slab cache, used for
  * either memory or cpu hotplug.  If memory is being hot-added, the kmem_cache_node
@@ -908,6 +909,7 @@
 
 	return 0;
 }
+#endif
 
 static int setup_kmem_cache_node(struct kmem_cache *cachep,
 				int node, gfp_t gfp, bool force_change)
@@ -975,6 +977,8 @@
 	return ret;
 }
 
+#ifdef CONFIG_SMP
+
 static void cpuup_canceled(long cpu)
 {
 	struct kmem_cache *cachep;
@@ -1075,65 +1079,54 @@
 	return -ENOMEM;
 }
 
-static int cpuup_callback(struct notifier_block *nfb,
-				    unsigned long action, void *hcpu)
+int slab_prepare_cpu(unsigned int cpu)
 {
-	long cpu = (long)hcpu;
-	int err = 0;
+	int err;
 
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		mutex_lock(&slab_mutex);
-		err = cpuup_prepare(cpu);
-		mutex_unlock(&slab_mutex);
-		break;
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		start_cpu_timer(cpu);
-		break;
-#ifdef CONFIG_HOTPLUG_CPU
-  	case CPU_DOWN_PREPARE:
-  	case CPU_DOWN_PREPARE_FROZEN:
-		/*
-		 * Shutdown cache reaper. Note that the slab_mutex is
-		 * held so that if cache_reap() is invoked it cannot do
-		 * anything expensive but will only modify reap_work
-		 * and reschedule the timer.
-		*/
-		cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
-		/* Now the cache_reaper is guaranteed to be not running. */
-		per_cpu(slab_reap_work, cpu).work.func = NULL;
-  		break;
-  	case CPU_DOWN_FAILED:
-  	case CPU_DOWN_FAILED_FROZEN:
-		start_cpu_timer(cpu);
-  		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		/*
-		 * Even if all the cpus of a node are down, we don't free the
-		 * kmem_cache_node of any cache. This to avoid a race between
-		 * cpu_down, and a kmalloc allocation from another cpu for
-		 * memory from the node of the cpu going down.  The node
-		 * structure is usually allocated from kmem_cache_create() and
-		 * gets destroyed at kmem_cache_destroy().
-		 */
-		/* fall through */
-#endif
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-		mutex_lock(&slab_mutex);
-		cpuup_canceled(cpu);
-		mutex_unlock(&slab_mutex);
-		break;
-	}
-	return notifier_from_errno(err);
+	mutex_lock(&slab_mutex);
+	err = cpuup_prepare(cpu);
+	mutex_unlock(&slab_mutex);
+	return err;
 }
 
-static struct notifier_block cpucache_notifier = {
-	&cpuup_callback, NULL, 0
-};
+/*
+ * This is called for a failed online attempt and for a successful
+ * offline.
+ *
+ * Even if all the cpus of a node are down, we don't free the
+ * kmem_list3 of any cache. This to avoid a race between cpu_down, and
+ * a kmalloc allocation from another cpu for memory from the node of
+ * the cpu going down.  The list3 structure is usually allocated from
+ * kmem_cache_create() and gets destroyed at kmem_cache_destroy().
+ */
+int slab_dead_cpu(unsigned int cpu)
+{
+	mutex_lock(&slab_mutex);
+	cpuup_canceled(cpu);
+	mutex_unlock(&slab_mutex);
+	return 0;
+}
+#endif
+
+static int slab_online_cpu(unsigned int cpu)
+{
+	start_cpu_timer(cpu);
+	return 0;
+}
+
+static int slab_offline_cpu(unsigned int cpu)
+{
+	/*
+	 * Shutdown cache reaper. Note that the slab_mutex is held so
+	 * that if cache_reap() is invoked it cannot do anything
+	 * expensive but will only modify reap_work and reschedule the
+	 * timer.
+	 */
+	cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
+	/* Now the cache_reaper is guaranteed to be not running. */
+	per_cpu(slab_reap_work, cpu).work.func = NULL;
+	return 0;
+}
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
 /*
@@ -1336,12 +1329,6 @@
 	/* Done! */
 	slab_state = FULL;
 
-	/*
-	 * Register a cpu startup notifier callback that initializes
-	 * cpu_cache_get for all new cpus
-	 */
-	register_cpu_notifier(&cpucache_notifier);
-
 #ifdef CONFIG_NUMA
 	/*
 	 * Register a memory hotplug callback that initializes and frees
@@ -1358,13 +1345,14 @@
 
 static int __init cpucache_init(void)
 {
-	int cpu;
+	int ret;
 
 	/*
 	 * Register the timers that return unneeded pages to the page allocator
 	 */
-	for_each_online_cpu(cpu)
-		start_cpu_timer(cpu);
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "SLAB online",
+				slab_online_cpu, slab_offline_cpu);
+	WARN_ON(ret < 0);
 
 	/* Done! */
 	slab_state = FULL;
diff --git a/mm/slub.c b/mm/slub.c
index cead063..2b3e740 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -194,10 +194,6 @@
 #define __OBJECT_POISON		0x80000000UL /* Poison object */
 #define __CMPXCHG_DOUBLE	0x40000000UL /* Use cmpxchg_double */
 
-#ifdef CONFIG_SMP
-static struct notifier_block slab_notifier;
-#endif
-
 /*
  * Tracking user of a slab.
  */
@@ -2305,6 +2301,25 @@
 }
 
 /*
+ * Use the cpu notifier to insure that the cpu slabs are flushed when
+ * necessary.
+ */
+static int slub_cpu_dead(unsigned int cpu)
+{
+	struct kmem_cache *s;
+	unsigned long flags;
+
+	mutex_lock(&slab_mutex);
+	list_for_each_entry(s, &slab_caches, list) {
+		local_irq_save(flags);
+		__flush_cpu_slab(s, cpu);
+		local_irq_restore(flags);
+	}
+	mutex_unlock(&slab_mutex);
+	return 0;
+}
+
+/*
  * Check if the objects in a per cpu structure fit numa
  * locality expectations.
  */
@@ -3629,6 +3644,7 @@
  */
 static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 {
+	LIST_HEAD(discard);
 	struct page *page, *h;
 
 	BUG_ON(irqs_disabled());
@@ -3636,13 +3652,16 @@
 	list_for_each_entry_safe(page, h, &n->partial, lru) {
 		if (!page->inuse) {
 			remove_partial(n, page);
-			discard_slab(s, page);
+			list_add(&page->lru, &discard);
 		} else {
 			list_slab_objects(s, page,
 			"Objects remaining in %s on __kmem_cache_shutdown()");
 		}
 	}
 	spin_unlock_irq(&n->list_lock);
+
+	list_for_each_entry_safe(page, h, &discard, lru)
+		discard_slab(s, page);
 }
 
 /*
@@ -4140,9 +4159,8 @@
 	/* Setup random freelists for each cache */
 	init_freelist_randomization();
 
-#ifdef CONFIG_SMP
-	register_cpu_notifier(&slab_notifier);
-#endif
+	cpuhp_setup_state_nocalls(CPUHP_SLUB_DEAD, "slub:dead", NULL,
+				  slub_cpu_dead);
 
 	pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n",
 		cache_line_size(),
@@ -4206,43 +4224,6 @@
 	return err;
 }
 
-#ifdef CONFIG_SMP
-/*
- * Use the cpu notifier to insure that the cpu slabs are flushed when
- * necessary.
- */
-static int slab_cpuup_callback(struct notifier_block *nfb,
-		unsigned long action, void *hcpu)
-{
-	long cpu = (long)hcpu;
-	struct kmem_cache *s;
-	unsigned long flags;
-
-	switch (action) {
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		mutex_lock(&slab_mutex);
-		list_for_each_entry(s, &slab_caches, list) {
-			local_irq_save(flags);
-			__flush_cpu_slab(s, cpu);
-			local_irq_restore(flags);
-		}
-		mutex_unlock(&slab_mutex);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block slab_notifier = {
-	.notifier_call = slab_cpuup_callback
-};
-
-#endif
-
 void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
 {
 	struct kmem_cache *s;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 78cfa29..2657acc 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2724,7 +2724,6 @@
 struct swap_info_struct *page_swap_info(struct page *page)
 {
 	swp_entry_t swap = { .val = page_private(page) };
-	BUG_ON(!PageSwapCache(page));
 	return swap_info[swp_type(swap)];
 }
 
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 8ebae91..3c8da0a 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -83,7 +83,7 @@
 	unsigned long check_high = check_low + n;
 
 	/* Does not overlap if entirely above or entirely below. */
-	if (check_low >= high || check_high < low)
+	if (check_low >= high || check_high <= low)
 		return false;
 
 	return true;
@@ -124,7 +124,7 @@
 static inline const char *check_bogus_address(const void *ptr, unsigned long n)
 {
 	/* Reject if object wraps past end of memory. */
-	if (ptr + n < ptr)
+	if ((unsigned long)ptr + n < (unsigned long)ptr)
 		return "<wrapped address>";
 
 	/* Reject if NULL or ZERO-allocation. */
@@ -134,31 +134,16 @@
 	return NULL;
 }
 
-static inline const char *check_heap_object(const void *ptr, unsigned long n,
-					    bool to_user)
+/* Checks for allocs that are marked in some way as spanning multiple pages. */
+static inline const char *check_page_span(const void *ptr, unsigned long n,
+					  struct page *page, bool to_user)
 {
-	struct page *page, *endpage;
+#ifdef CONFIG_HARDENED_USERCOPY_PAGESPAN
 	const void *end = ptr + n - 1;
+	struct page *endpage;
 	bool is_reserved, is_cma;
 
 	/*
-	 * Some architectures (arm64) return true for virt_addr_valid() on
-	 * vmalloced addresses. Work around this by checking for vmalloc
-	 * first.
-	 */
-	if (is_vmalloc_addr(ptr))
-		return NULL;
-
-	if (!virt_addr_valid(ptr))
-		return NULL;
-
-	page = virt_to_head_page(ptr);
-
-	/* Check slab allocator for flags and size. */
-	if (PageSlab(page))
-		return __check_heap_object(ptr, n, page);
-
-	/*
 	 * Sometimes the kernel data regions are not marked Reserved (see
 	 * check below). And sometimes [_sdata,_edata) does not cover
 	 * rodata and/or bss, so check each range explicitly.
@@ -186,7 +171,7 @@
 		   ((unsigned long)end & (unsigned long)PAGE_MASK)))
 		return NULL;
 
-	/* Allow if start and end are inside the same compound page. */
+	/* Allow if fully inside the same compound (__GFP_COMP) page. */
 	endpage = virt_to_head_page(end);
 	if (likely(endpage == page))
 		return NULL;
@@ -199,20 +184,47 @@
 	is_reserved = PageReserved(page);
 	is_cma = is_migrate_cma_page(page);
 	if (!is_reserved && !is_cma)
-		goto reject;
+		return "<spans multiple pages>";
 
 	for (ptr += PAGE_SIZE; ptr <= end; ptr += PAGE_SIZE) {
 		page = virt_to_head_page(ptr);
 		if (is_reserved && !PageReserved(page))
-			goto reject;
+			return "<spans Reserved and non-Reserved pages>";
 		if (is_cma && !is_migrate_cma_page(page))
-			goto reject;
+			return "<spans CMA and non-CMA pages>";
 	}
+#endif
 
 	return NULL;
+}
 
-reject:
-	return "<spans multiple pages>";
+static inline const char *check_heap_object(const void *ptr, unsigned long n,
+					    bool to_user)
+{
+	struct page *page;
+
+	/*
+	 * Some architectures (arm64) return true for virt_addr_valid() on
+	 * vmalloced addresses. Work around this by checking for vmalloc
+	 * first.
+	 *
+	 * We also need to check for module addresses explicitly since we
+	 * may copy static data from modules to userspace
+	 */
+	if (is_vmalloc_or_module_addr(ptr))
+		return NULL;
+
+	if (!virt_addr_valid(ptr))
+		return NULL;
+
+	page = virt_to_head_page(ptr);
+
+	/* Check slab allocator for flags and size. */
+	if (PageSlab(page))
+		return __check_heap_object(ptr, n, page);
+
+	/* Verify object does not incorrectly span multiple pages. */
+	return check_page_span(ptr, n, page, to_user);
 }
 
 /*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 374d95d..0fe8b71 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1665,7 +1665,7 @@
 
 	for (zid = sc->reclaim_idx; zid >= 0; zid--) {
 		zone = &pgdat->node_zones[zid];
-		if (!populated_zone(zone))
+		if (!managed_zone(zone))
 			continue;
 
 		if (zone_page_state_snapshot(zone, NR_ZONE_LRU_BASE +
@@ -2036,7 +2036,7 @@
 		struct zone *zone = &pgdat->node_zones[zid];
 		unsigned long inactive_zone, active_zone;
 
-		if (!populated_zone(zone))
+		if (!managed_zone(zone))
 			continue;
 
 		inactive_zone = zone_page_state(zone,
@@ -2171,7 +2171,7 @@
 
 		for (z = 0; z < MAX_NR_ZONES; z++) {
 			struct zone *zone = &pgdat->node_zones[z];
-			if (!populated_zone(zone))
+			if (!managed_zone(zone))
 				continue;
 
 			total_high_wmark += high_wmark_pages(zone);
@@ -2303,23 +2303,6 @@
 	}
 }
 
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
-static void init_tlb_ubc(void)
-{
-	/*
-	 * This deliberately does not clear the cpumask as it's expensive
-	 * and unnecessary. If there happens to be data in there then the
-	 * first SWAP_CLUSTER_MAX pages will send an unnecessary IPI and
-	 * then will be cleared.
-	 */
-	current->tlb_ubc.flush_required = false;
-}
-#else
-static inline void init_tlb_ubc(void)
-{
-}
-#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
-
 /*
  * This is a basic per-node page freer.  Used by both kswapd and direct reclaim.
  */
@@ -2355,8 +2338,6 @@
 	scan_adjusted = (global_reclaim(sc) && !current_is_kswapd() &&
 			 sc->priority == DEF_PRIORITY);
 
-	init_tlb_ubc();
-
 	blk_start_plug(&plug);
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
 					nr[LRU_INACTIVE_FILE]) {
@@ -2510,7 +2491,7 @@
 	/* If compaction would go ahead or the allocation would succeed, stop */
 	for (z = 0; z <= sc->reclaim_idx; z++) {
 		struct zone *zone = &pgdat->node_zones[z];
-		if (!populated_zone(zone))
+		if (!managed_zone(zone))
 			continue;
 
 		switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
@@ -2840,7 +2821,7 @@
 
 	for (i = 0; i <= ZONE_NORMAL; i++) {
 		zone = &pgdat->node_zones[i];
-		if (!populated_zone(zone) ||
+		if (!managed_zone(zone) ||
 		    pgdat_reclaimable_pages(pgdat) == 0)
 			continue;
 
@@ -3141,7 +3122,7 @@
 	for (i = 0; i <= classzone_idx; i++) {
 		struct zone *zone = pgdat->node_zones + i;
 
-		if (!populated_zone(zone))
+		if (!managed_zone(zone))
 			continue;
 
 		if (!zone_balanced(zone, order, classzone_idx))
@@ -3169,7 +3150,7 @@
 	sc->nr_to_reclaim = 0;
 	for (z = 0; z <= sc->reclaim_idx; z++) {
 		zone = pgdat->node_zones + z;
-		if (!populated_zone(zone))
+		if (!managed_zone(zone))
 			continue;
 
 		sc->nr_to_reclaim += max(high_wmark_pages(zone), SWAP_CLUSTER_MAX);
@@ -3242,7 +3223,7 @@
 		if (buffer_heads_over_limit) {
 			for (i = MAX_NR_ZONES - 1; i >= 0; i--) {
 				zone = pgdat->node_zones + i;
-				if (!populated_zone(zone))
+				if (!managed_zone(zone))
 					continue;
 
 				sc.reclaim_idx = i;
@@ -3262,7 +3243,7 @@
 		 */
 		for (i = classzone_idx; i >= 0; i--) {
 			zone = pgdat->node_zones + i;
-			if (!populated_zone(zone))
+			if (!managed_zone(zone))
 				continue;
 
 			if (zone_balanced(zone, sc.order, classzone_idx))
@@ -3508,7 +3489,7 @@
 	pg_data_t *pgdat;
 	int z;
 
-	if (!populated_zone(zone))
+	if (!managed_zone(zone))
 		return;
 
 	if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
@@ -3522,7 +3503,7 @@
 	/* Only wake kswapd if all zones are unbalanced */
 	for (z = 0; z <= classzone_idx; z++) {
 		zone = pgdat->node_zones + z;
-		if (!populated_zone(zone))
+		if (!managed_zone(zone))
 			continue;
 
 		if (zone_balanced(zone, order, classzone_idx))
diff --git a/mm/workingset.c b/mm/workingset.c
index 69551cf..617475f 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -418,21 +418,19 @@
 	 * no pages, so we expect to be able to remove them all and
 	 * delete and free the empty node afterwards.
 	 */
-
-	BUG_ON(!node->count);
-	BUG_ON(node->count & RADIX_TREE_COUNT_MASK);
+	BUG_ON(!workingset_node_shadows(node));
+	BUG_ON(workingset_node_pages(node));
 
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		if (node->slots[i]) {
 			BUG_ON(!radix_tree_exceptional_entry(node->slots[i]));
 			node->slots[i] = NULL;
-			BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT));
-			node->count -= 1U << RADIX_TREE_COUNT_SHIFT;
+			workingset_node_shadows_dec(node);
 			BUG_ON(!mapping->nrexceptional);
 			mapping->nrexceptional--;
 		}
 	}
-	BUG_ON(node->count);
+	BUG_ON(workingset_node_shadows(node));
 	inc_node_state(page_pgdat(virt_to_page(node)), WORKINGSET_NODERECLAIM);
 	if (!__radix_tree_delete_node(&mapping->page_tree, node))
 		BUG();
diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c
index 86450b7..941df2f 100644
--- a/net/6lowpan/ndisc.c
+++ b/net/6lowpan/ndisc.c
@@ -101,8 +101,6 @@
 		ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short);
 		if (!lowpan_802154_is_valid_src_short_addr(neigh->short_addr))
 			neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
-	} else {
-		neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
 	}
 	write_unlock_bh(&n->lock);
 }
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 82a116b..8de138d 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,7 +169,7 @@
 	if (err < 0)
 		goto out_uninit_mvrp;
 
-	vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
+	vlan->nest_level = dev_get_nest_level(real_dev) + 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out_uninit_mvrp;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 4acb1d5..f24b25c 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -507,8 +507,8 @@
 		/* wakeup anybody waiting for slots to pin pages */
 		wake_up(&vp_wq);
 	}
-	kfree(in_pages);
-	kfree(out_pages);
+	kvfree(in_pages);
+	kvfree(out_pages);
 	return err;
 }
 
diff --git a/net/Kconfig b/net/Kconfig
index c2cdbce..7b6cd34 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -369,6 +369,7 @@
 source "net/bluetooth/Kconfig"
 source "net/rxrpc/Kconfig"
 source "net/kcm/Kconfig"
+source "net/strparser/Kconfig"
 
 config FIB_RULES
 	bool
diff --git a/net/Makefile b/net/Makefile
index 9bd20bb..4cafaa2 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -35,6 +35,7 @@
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_AF_KCM)		+= kcm/
+obj-$(CONFIG_STREAM_PARSER)	+= strparser/
 obj-$(CONFIG_ATM)		+= atm/
 obj-$(CONFIG_L2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index f066781..10d2bdc 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1278,7 +1278,7 @@
 	return err;
 }
 
-#if defined(CONFIG_IPDDP) || defined(CONFIG_IPDDP_MODULE)
+#if IS_ENABLED(CONFIG_IPDDP)
 static __inline__ int is_ip_over_ddp(struct sk_buff *skb)
 {
 	return skb->data[12] == 22;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index e574a7e..5d26938 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -31,7 +31,7 @@
 #include <linux/atmlec.h>
 
 /* Proxy LEC knows about bridging */
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE)
 #include "../bridge/br_private.h"
 
 static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 };
@@ -121,7 +121,7 @@
 /* Device structures */
 static struct net_device *dev_lec[MAX_LEC_ITF];
 
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE)
 static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
 {
 	char *buff;
@@ -155,7 +155,7 @@
 		sk->sk_data_ready(sk);
 	}
 }
-#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
+#endif /* IS_ENABLED(CONFIG_BRIDGE) */
 
 /*
  * Open/initialize the netdevice. This is called (in the current kernel)
@@ -222,7 +222,7 @@
 	pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n",
 		 (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb),
 		 (long)skb_end_pointer(skb));
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE)
 	if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0)
 		lec_handle_bridge(skb, dev);
 #endif
@@ -426,7 +426,7 @@
 		    (unsigned short)(0xffff & mesg->content.normal.flag);
 		break;
 	case l_should_bridge:
-#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
+#if IS_ENABLED(CONFIG_BRIDGE)
 	{
 		pr_debug("%s: bridge zeppelin asks about %pM\n",
 			 dev->name, mesg->content.proxy.mac_addr);
@@ -452,7 +452,7 @@
 			sk->sk_data_ready(sk);
 		}
 	}
-#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
+#endif /* IS_ENABLED(CONFIG_BRIDGE) */
 		break;
 	default:
 		pr_info("%s: Unknown message type %d\n", dev->name, mesg->type);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 0e98222..3b3b1a292 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1007,7 +1007,7 @@
 	if (!net_eq(dev_net(dev), &init_net))
 		return NOTIFY_DONE;
 
-	if (dev->name == NULL || strncmp(dev->name, "lec", 3))
+	if (strncmp(dev->name, "lec", 3))
 		return NOTIFY_DONE; /* we are only interested in lec:s */
 
 	switch (event) {
diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig
index 833bb14..f20742c 100644
--- a/net/batman-adv/Kconfig
+++ b/net/batman-adv/Kconfig
@@ -73,10 +73,21 @@
 	  reduce the air overhead while improving the reliability of
 	  multicast messages.
 
-config BATMAN_ADV_DEBUG
-	bool "B.A.T.M.A.N. debugging"
+config BATMAN_ADV_DEBUGFS
+	bool "batman-adv debugfs entries"
 	depends on BATMAN_ADV
 	depends on DEBUG_FS
+	default y
+	help
+	  Enable this to export routing related debug tables via debugfs.
+	  The information for each soft-interface and used hard-interface can be
+	  found under batman_adv/
+
+	  If unsure, say Y.
+
+config BATMAN_ADV_DEBUG
+	bool "B.A.T.M.A.N. debugging"
+	depends on BATMAN_ADV_DEBUGFS
 	help
 	  This is an option for use by developers; most people should
 	  say N here. This enables compilation of support for
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index a83fc6c..f724d3c 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -24,14 +24,14 @@
 batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o
 batman-adv-y += bitarray.o
 batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o
-batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += debugfs.o
 batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o
 batman-adv-y += fragmentation.o
 batman-adv-y += gateway_client.o
 batman-adv-y += gateway_common.o
 batman-adv-y += hard-interface.o
 batman-adv-y += hash.o
-batman-adv-y += icmp_socket.o
+batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += icmp_socket.o
 batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
 batman-adv-y += main.o
 batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c
index 81dbbf5..623d043 100644
--- a/net/batman-adv/bat_algo.c
+++ b/net/batman-adv/bat_algo.c
@@ -20,12 +20,18 @@
 #include <linux/errno.h>
 #include <linux/list.h>
 #include <linux/moduleparam.h>
+#include <linux/netlink.h>
 #include <linux/printk.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
+#include "netlink.h"
 
 char batadv_routing_algo[20] = "BATMAN_IV";
 static struct hlist_head batadv_algo_list;
@@ -95,6 +101,7 @@
 	return 0;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct batadv_algo_ops *bat_algo_ops;
@@ -107,6 +114,7 @@
 
 	return 0;
 }
+#endif
 
 static int batadv_param_set_ra(const char *val, const struct kernel_param *kp)
 {
@@ -138,3 +146,65 @@
 
 module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra,
 		0644);
+
+/**
+ * batadv_algo_dump_entry - fill in information about one supported routing
+ *  algorithm
+ * @msg: netlink message to be sent back
+ * @portid: Port to reply to
+ * @seq: Sequence number of message
+ * @bat_algo_ops: Algorithm to be dumped
+ *
+ * Return: Error number, or 0 on success
+ */
+static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				  struct batadv_algo_ops *bat_algo_ops)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_algo_dump - fill in information about supported routing
+ *  algorithms
+ * @msg: netlink message to be sent back
+ * @cb: Parameters to the netlink request
+ *
+ * Return: Length of reply message.
+ */
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_algo_ops *bat_algo_ops;
+	int skip = cb->args[0];
+	int i = 0;
+
+	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {
+		if (i++ < skip)
+			continue;
+
+		if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					   bat_algo_ops)) {
+			i--;
+			break;
+		}
+	}
+
+	cb->args[0] = i;
+
+	return msg->len;
+}
diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h
index 860d773..3b5b69c 100644
--- a/net/batman-adv/bat_algo.h
+++ b/net/batman-adv/bat_algo.h
@@ -22,7 +22,9 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 extern char batadv_routing_algo[];
 extern struct list_head batadv_hardif_list;
@@ -31,5 +33,6 @@
 int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops);
 int batadv_algo_select(struct batadv_priv *bat_priv, char *name);
 int batadv_algo_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb);
 
 #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 19b0abd..e2d18d0 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/pkt_sched.h>
 #include <linux/printk.h>
 #include <linux/random.h>
@@ -48,12 +49,17 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bitarray.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "originator.h"
 #include "packet.h"
@@ -318,17 +324,18 @@
 	if (!orig_node->bat_iv.bcast_own_sum)
 		goto free_orig_node;
 
+	kref_get(&orig_node->refcount);
 	hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
 				     batadv_choose_orig, orig_node,
 				     &orig_node->hash_entry);
 	if (hash_added != 0)
-		goto free_orig_node;
+		goto free_orig_node_hash;
 
 	return orig_node;
 
-free_orig_node:
-	/* free twice, as batadv_orig_node_new sets refcount to 2 */
+free_orig_node_hash:
 	batadv_orig_node_put(orig_node);
+free_orig_node:
 	batadv_orig_node_put(orig_node);
 
 	return NULL;
@@ -528,36 +535,25 @@
 static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
 {
 	struct net_device *soft_iface;
-	struct batadv_priv *bat_priv;
-	struct batadv_hard_iface *primary_if = NULL;
 
 	if (!forw_packet->if_incoming) {
 		pr_err("Error - can't forward packet: incoming iface not specified\n");
-		goto out;
+		return;
 	}
 
 	soft_iface = forw_packet->if_incoming->soft_iface;
-	bat_priv = netdev_priv(soft_iface);
 
 	if (WARN_ON(!forw_packet->if_outgoing))
-		goto out;
+		return;
 
 	if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
-		goto out;
+		return;
 
 	if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
-		goto out;
-
-	primary_if = batadv_primary_if_get_selected(bat_priv);
-	if (!primary_if)
-		goto out;
+		return;
 
 	/* only for one specific outgoing interface */
 	batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing);
-
-out:
-	if (primary_if)
-		batadv_hardif_put(primary_if);
 }
 
 /**
@@ -685,19 +681,12 @@
 	struct batadv_forw_packet *forw_packet_aggr;
 	unsigned char *skb_buff;
 	unsigned int skb_size;
+	atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left;
 
-	/* own packet should always be scheduled */
-	if (!own_packet) {
-		if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) {
-			batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-				   "batman packet queue full\n");
-			return;
-		}
-	}
-
-	forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC);
+	forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing,
+						    queue_left, bat_priv);
 	if (!forw_packet_aggr)
-		goto out_nomem;
+		return;
 
 	if (atomic_read(&bat_priv->aggregated_ogms) &&
 	    packet_len < BATADV_MAX_AGGREGATION_BYTES)
@@ -708,8 +697,11 @@
 	skb_size += ETH_HLEN;
 
 	forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size);
-	if (!forw_packet_aggr->skb)
-		goto out_free_forw_packet;
+	if (!forw_packet_aggr->skb) {
+		batadv_forw_packet_free(forw_packet_aggr);
+		return;
+	}
+
 	forw_packet_aggr->skb->priority = TC_PRIO_CONTROL;
 	skb_reserve(forw_packet_aggr->skb, ETH_HLEN);
 
@@ -717,12 +709,7 @@
 	forw_packet_aggr->packet_len = packet_len;
 	memcpy(skb_buff, packet_buff, packet_len);
 
-	kref_get(&if_incoming->refcount);
-	kref_get(&if_outgoing->refcount);
 	forw_packet_aggr->own = own_packet;
-	forw_packet_aggr->if_incoming = if_incoming;
-	forw_packet_aggr->if_outgoing = if_outgoing;
-	forw_packet_aggr->num_packets = 0;
 	forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS;
 	forw_packet_aggr->send_time = send_time;
 
@@ -741,13 +728,6 @@
 	queue_delayed_work(batadv_event_workqueue,
 			   &forw_packet_aggr->delayed_work,
 			   send_time - jiffies);
-
-	return;
-out_free_forw_packet:
-	kfree(forw_packet_aggr);
-out_nomem:
-	if (!own_packet)
-		atomic_inc(&bat_priv->batman_queue_left);
 }
 
 /* aggregate a new packet into the existing ogm packet */
@@ -1830,10 +1810,6 @@
 		batadv_iv_ogm_schedule(forw_packet->if_incoming);
 
 out:
-	/* don't count own packet */
-	if (!forw_packet->own)
-		atomic_inc(&bat_priv->batman_queue_left);
-
 	batadv_forw_packet_free(forw_packet);
 }
 
@@ -1879,6 +1855,7 @@
 	return NET_RX_SUCCESS;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table
  * @orig_node: the orig_node for which the neighbors are printed
@@ -1976,8 +1953,239 @@
 	if (batman_count == 0)
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
 
 /**
+ * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a
+ *  given outgoing interface.
+ * @neigh_node: Neighbour of interest
+ * @if_outgoing: Outgoing interface of interest
+ * @tq_avg: Pointer of where to store the TQ average
+ *
+ * Return: False if no average TQ available, otherwise true.
+ */
+static bool
+batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node,
+			       struct batadv_hard_iface *if_outgoing,
+			       u8 *tq_avg)
+{
+	struct batadv_neigh_ifinfo *n_ifinfo;
+
+	n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+	if (!n_ifinfo)
+		return false;
+
+	*tq_avg = n_ifinfo->bat_iv.tq_avg;
+	batadv_neigh_ifinfo_put(n_ifinfo);
+
+	return true;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+				 struct batadv_priv *bat_priv,
+				 struct batadv_hard_iface *if_outgoing,
+				 struct batadv_orig_node *orig_node,
+				 struct batadv_neigh_node *neigh_node,
+				 bool best)
+{
+	void *hdr;
+	u8 tq_avg;
+	unsigned int last_seen_msecs;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+	if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg))
+		return 0;
+
+	if (if_outgoing != BATADV_IF_DEFAULT &&
+	    if_outgoing != neigh_node->if_incoming)
+		return 0;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig_node->orig) ||
+	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    neigh_node->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			neigh_node->if_incoming->net_dev->ifindex) ||
+	    nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			      struct batadv_priv *bat_priv,
+			      struct batadv_hard_iface *if_outgoing,
+			      struct batadv_orig_node *orig_node, int *sub_s)
+{
+	struct batadv_neigh_node *neigh_node_best;
+	struct batadv_neigh_node *neigh_node;
+	int sub = 0;
+	bool best;
+	u8 tq_avg_best;
+
+	neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+	if (!neigh_node_best)
+		goto out;
+
+	if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing,
+					    &tq_avg_best))
+		goto out;
+
+	if (tq_avg_best == 0)
+		goto out;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (neigh_node == neigh_node_best);
+
+		if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq,
+						     bat_priv, if_outgoing,
+						     orig_node, neigh_node,
+						     best)) {
+			batadv_neigh_node_put(neigh_node_best);
+
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+ out:
+	if (neigh_node_best)
+		batadv_neigh_node_put(neigh_node_best);
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_priv *bat_priv,
+			       struct batadv_hard_iface *if_outgoing,
+			       struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_orig_node *orig_node;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv,
+						  if_outgoing, orig_node,
+						  sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			struct batadv_priv *bat_priv,
+			struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_iv_ogm_orig_dump_bucket(msg, portid,
+						   cb->nlh->nlmsg_seq,
+						   bat_priv, if_outgoing, head,
+						   &idx, &sub))
+			break;
+
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
  * batadv_iv_hardif_neigh_print - print a single hop neighbour node
  * @seq: neighbour table seq_file struct
  * @hardif_neigh: hardif neighbour information
@@ -2027,6 +2235,182 @@
 	if (batman_count == 0)
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
+
+/**
+ * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors
+ * @neigh1: the first neighbor object of the comparison
+ * @if_outgoing1: outgoing interface for the first neighbor
+ * @neigh2: the second neighbor object of the comparison
+ * @if_outgoing2: outgoing interface for the second neighbor
+ * @diff: pointer to integer receiving the calculated difference
+ *
+ * The content of *@diff is only valid when this function returns true.
+ * It is less, equal to or greater than 0 if the metric via neigh1 is lower,
+ * the same as or higher than the metric via neigh2
+ *
+ * Return: true when the difference could be calculated, false otherwise
+ */
+static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1,
+				     struct batadv_hard_iface *if_outgoing1,
+				     struct batadv_neigh_node *neigh2,
+				     struct batadv_hard_iface *if_outgoing2,
+				     int *diff)
+{
+	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
+	u8 tq1, tq2;
+	bool ret = true;
+
+	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
+	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+
+	if (!neigh1_ifinfo || !neigh2_ifinfo) {
+		ret = false;
+		goto out;
+	}
+
+	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
+	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
+	*diff = (int)tq1 - (int)tq2;
+
+out:
+	if (neigh1_ifinfo)
+		batadv_neigh_ifinfo_put(neigh1_ifinfo);
+	if (neigh2_ifinfo)
+		batadv_neigh_ifinfo_put(neigh2_ifinfo);
+
+	return ret;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_hardif_neigh_node *hardif_neigh)
+{
+	void *hdr;
+	unsigned int last_seen_msecs;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    hardif_neigh->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			hardif_neigh->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface
+ *  into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: Hard interface to dump the neighbours for
+ * @idx_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+				struct batadv_priv *bat_priv,
+				struct batadv_hard_iface *hard_iface,
+				int *idx_s)
+{
+	struct batadv_hardif_neigh_node *hardif_neigh;
+	int idx = 0;
+
+	hlist_for_each_entry_rcu(hardif_neigh,
+				 &hard_iface->neigh_list, list) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq,
+						   hardif_neigh)) {
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dump to this hard interfaace
+ */
+static void
+batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			 struct batadv_priv *bat_priv,
+			 struct batadv_hard_iface *single_hardif)
+{
+	struct batadv_hard_iface *hard_iface;
+	int i_hardif = 0;
+	int i_hardif_s = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	rcu_read_lock();
+	if (single_hardif) {
+		if (i_hardif_s == 0) {
+			if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+							    cb->nlh->nlmsg_seq,
+							    bat_priv,
+							    single_hardif,
+							    &idx) == 0)
+				i_hardif++;
+		}
+	} else {
+		list_for_each_entry_rcu(hard_iface, &batadv_hardif_list,
+					list) {
+			if (hard_iface->soft_iface != bat_priv->soft_iface)
+				continue;
+
+			if (i_hardif++ < i_hardif_s)
+				continue;
+
+			if (batadv_iv_ogm_neigh_dump_hardif(msg, portid,
+							    cb->nlh->nlmsg_seq,
+							    bat_priv,
+							    hard_iface, &idx)) {
+				i_hardif--;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i_hardif;
+	cb->args[1] = idx;
+}
 
 /**
  * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors
@@ -2043,27 +2427,13 @@
 				   struct batadv_neigh_node *neigh2,
 				   struct batadv_hard_iface *if_outgoing2)
 {
-	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
-	u8 tq1, tq2;
+	bool ret;
 	int diff;
 
-	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
-	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
-
-	if (!neigh1_ifinfo || !neigh2_ifinfo) {
-		diff = 0;
-		goto out;
-	}
-
-	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
-	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
-	diff = tq1 - tq2;
-
-out:
-	if (neigh1_ifinfo)
-		batadv_neigh_ifinfo_put(neigh1_ifinfo);
-	if (neigh2_ifinfo)
-		batadv_neigh_ifinfo_put(neigh2_ifinfo);
+	ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+				       if_outgoing2, &diff);
+	if (!ret)
+		return 0;
 
 	return diff;
 }
@@ -2085,29 +2455,15 @@
 			   struct batadv_neigh_node *neigh2,
 			   struct batadv_hard_iface *if_outgoing2)
 {
-	struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo;
-	u8 tq1, tq2;
 	bool ret;
+	int diff;
 
-	neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1);
-	neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2);
+	ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2,
+				       if_outgoing2, &diff);
+	if (!ret)
+		return false;
 
-	/* we can't say that the metric is better */
-	if (!neigh1_ifinfo || !neigh2_ifinfo) {
-		ret = false;
-		goto out;
-	}
-
-	tq1 = neigh1_ifinfo->bat_iv.tq_avg;
-	tq2 = neigh2_ifinfo->bat_iv.tq_avg;
-	ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD;
-
-out:
-	if (neigh1_ifinfo)
-		batadv_neigh_ifinfo_put(neigh1_ifinfo);
-	if (neigh2_ifinfo)
-		batadv_neigh_ifinfo_put(neigh2_ifinfo);
-
+	ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD;
 	return ret;
 }
 
@@ -2117,6 +2473,325 @@
 	batadv_iv_ogm_schedule(hard_iface);
 }
 
+static struct batadv_gw_node *
+batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo;
+	struct batadv_gw_node *gw_node, *curr_gw = NULL;
+	u64 max_gw_factor = 0;
+	u64 tmp_gw_factor = 0;
+	u8 max_tq = 0;
+	u8 tq_avg;
+	struct batadv_orig_node *orig_node;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		orig_node = gw_node->orig_node;
+		router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+		if (!router)
+			continue;
+
+		router_ifinfo = batadv_neigh_ifinfo_get(router,
+							BATADV_IF_DEFAULT);
+		if (!router_ifinfo)
+			goto next;
+
+		if (!kref_get_unless_zero(&gw_node->refcount))
+			goto next;
+
+		tq_avg = router_ifinfo->bat_iv.tq_avg;
+
+		switch (atomic_read(&bat_priv->gw.sel_class)) {
+		case 1: /* fast connection */
+			tmp_gw_factor = tq_avg * tq_avg;
+			tmp_gw_factor *= gw_node->bandwidth_down;
+			tmp_gw_factor *= 100 * 100;
+			tmp_gw_factor >>= 18;
+
+			if ((tmp_gw_factor > max_gw_factor) ||
+			    ((tmp_gw_factor == max_gw_factor) &&
+			     (tq_avg > max_tq))) {
+				if (curr_gw)
+					batadv_gw_node_put(curr_gw);
+				curr_gw = gw_node;
+				kref_get(&curr_gw->refcount);
+			}
+			break;
+
+		default: /* 2:  stable connection (use best statistic)
+			  * 3:  fast-switch (use best statistic but change as
+			  *     soon as a better gateway appears)
+			  * XX: late-switch (use best statistic but change as
+			  *     soon as a better gateway appears which has
+			  *     $routing_class more tq points)
+			  */
+			if (tq_avg > max_tq) {
+				if (curr_gw)
+					batadv_gw_node_put(curr_gw);
+				curr_gw = gw_node;
+				kref_get(&curr_gw->refcount);
+			}
+			break;
+		}
+
+		if (tq_avg > max_tq)
+			max_tq = tq_avg;
+
+		if (tmp_gw_factor > max_gw_factor)
+			max_gw_factor = tmp_gw_factor;
+
+		batadv_gw_node_put(gw_node);
+
+next:
+		batadv_neigh_node_put(router);
+		if (router_ifinfo)
+			batadv_neigh_ifinfo_put(router_ifinfo);
+	}
+	rcu_read_unlock();
+
+	return curr_gw;
+}
+
+static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv,
+				     struct batadv_orig_node *curr_gw_orig,
+				     struct batadv_orig_node *orig_node)
+{
+	struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL;
+	struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL;
+	struct batadv_neigh_node *router_gw = NULL;
+	struct batadv_neigh_node *router_orig = NULL;
+	u8 gw_tq_avg, orig_tq_avg;
+	bool ret = false;
+
+	/* dynamic re-election is performed only on fast or late switch */
+	if (atomic_read(&bat_priv->gw.sel_class) <= 2)
+		return false;
+
+	router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
+	if (!router_gw) {
+		ret = true;
+		goto out;
+	}
+
+	router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw,
+						   BATADV_IF_DEFAULT);
+	if (!router_gw_ifinfo) {
+		ret = true;
+		goto out;
+	}
+
+	router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+	if (!router_orig)
+		goto out;
+
+	router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig,
+						     BATADV_IF_DEFAULT);
+	if (!router_orig_ifinfo)
+		goto out;
+
+	gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg;
+	orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg;
+
+	/* the TQ value has to be better */
+	if (orig_tq_avg < gw_tq_avg)
+		goto out;
+
+	/* if the routing class is greater than 3 the value tells us how much
+	 * greater the TQ value of the new gateway must be
+	 */
+	if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
+	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
+		goto out;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
+		   gw_tq_avg, orig_tq_avg);
+
+	ret = true;
+out:
+	if (router_gw_ifinfo)
+		batadv_neigh_ifinfo_put(router_gw_ifinfo);
+	if (router_orig_ifinfo)
+		batadv_neigh_ifinfo_put(router_orig_ifinfo);
+	if (router_gw)
+		batadv_neigh_node_put(router_gw);
+	if (router_orig)
+		batadv_neigh_node_put(router_orig);
+
+	return ret;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/* fails if orig_node has no router */
+static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv,
+					  struct seq_file *seq,
+					  const struct batadv_gw_node *gw_node)
+{
+	struct batadv_gw_node *curr_gw;
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	int ret = -1;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
+		   (curr_gw == gw_node ? "=>" : "  "),
+		   gw_node->orig_node->orig,
+		   router_ifinfo->bat_iv.tq_avg, router->addr,
+		   router->if_incoming->net_dev->name,
+		   gw_node->bandwidth_down / 10,
+		   gw_node->bandwidth_down % 10,
+		   gw_node->bandwidth_up / 10,
+		   gw_node->bandwidth_up % 10);
+	ret = seq_has_overflowed(seq) ? -1 : 0;
+
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+static void batadv_iv_gw_print(struct batadv_priv *bat_priv,
+			       struct seq_file *seq)
+{
+	struct batadv_gw_node *gw_node;
+	int gw_count = 0;
+
+	seq_puts(seq,
+		 "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		/* fails if orig_node has no router */
+		if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+			continue;
+
+		gw_count++;
+	}
+	rcu_read_unlock();
+
+	if (gw_count == 0)
+		seq_puts(seq, "No gateways in range ...\n");
+}
+#endif
+
+/**
+ * batadv_iv_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				   struct batadv_priv *bat_priv,
+				   struct batadv_gw_node *gw_node)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_neigh_node *router;
+	struct batadv_gw_node *curr_gw;
+	int ret = -EINVAL;
+	void *hdr;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	ret = -EMSGSIZE;
+
+	if (curr_gw == gw_node)
+		if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    gw_node->orig_node->orig) ||
+	    nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) ||
+	    nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN,
+		    router->addr) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   router->if_incoming->net_dev->name) ||
+	    nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+			gw_node->bandwidth_down) ||
+	    nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP,
+			gw_node->bandwidth_up)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_iv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			      struct batadv_priv *bat_priv)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_gw_node *gw_node;
+	int idx_skip = cb->args[0];
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (idx++ < idx_skip)
+			continue;
+
+		if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					    bat_priv, gw_node)) {
+			idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+
+	cb->args[0] = idx_skip;
+}
+
 static struct batadv_algo_ops batadv_batman_iv __read_mostly = {
 	.name = "BATMAN_IV",
 	.iface = {
@@ -2129,14 +2804,28 @@
 	.neigh = {
 		.cmp = batadv_iv_ogm_neigh_cmp,
 		.is_similar_or_better = batadv_iv_ogm_neigh_is_sob,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 		.print = batadv_iv_neigh_print,
+#endif
+		.dump = batadv_iv_ogm_neigh_dump,
 	},
 	.orig = {
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 		.print = batadv_iv_ogm_orig_print,
+#endif
+		.dump = batadv_iv_ogm_orig_dump,
 		.free = batadv_iv_ogm_orig_free,
 		.add_if = batadv_iv_ogm_orig_add_if,
 		.del_if = batadv_iv_ogm_orig_del_if,
 	},
+	.gw = {
+		.get_best_gw_node = batadv_iv_gw_get_best_gw_node,
+		.is_eligible = batadv_iv_gw_is_eligible,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+		.print = batadv_iv_gw_print,
+#endif
+		.dump = batadv_iv_gw_dump,
+	},
 };
 
 int __init batadv_iv_init(void)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 0366cbf..e79f6f0 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -21,24 +21,38 @@
 #include <linux/atomic.h>
 #include <linux/bug.h>
 #include <linux/cache.h>
+#include <linux/errno.h>
+#include <linux/if_ether.h>
 #include <linux/init.h>
 #include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "bat_v_elp.h"
 #include "bat_v_ogm.h"
+#include "gateway_client.h"
+#include "gateway_common.h"
 #include "hard-interface.h"
 #include "hash.h"
+#include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 
+struct sk_buff;
+
 static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
@@ -115,6 +129,7 @@
 		  batadv_v_elp_throughput_metric_update);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_v_orig_print_neigh - print neighbors for the originator table
  * @orig_node: the orig_node for which the neighbors are printed
@@ -198,8 +213,142 @@
 	if (batman_count == 0)
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
 
 /**
+ * batadv_v_neigh_dump_neigh - Dump a neighbour into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hardif_neigh: Neighbour to dump
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_hardif_neigh_node *hardif_neigh)
+{
+	void *hdr;
+	unsigned int last_seen_msecs;
+	u32 throughput;
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen);
+	throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput);
+	throughput = throughput * 100;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_NEIGHBORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    hardif_neigh->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			hardif_neigh->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs) ||
+	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_neigh_dump_hardif - Dump the  neighbours of a hard interface  into
+ *  a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @hard_iface: The hard interface to be dumped
+ * @idx_s: Entries to be skipped
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq,
+			   struct batadv_priv *bat_priv,
+			   struct batadv_hard_iface *hard_iface,
+			   int *idx_s)
+{
+	struct batadv_hardif_neigh_node *hardif_neigh;
+	int idx = 0;
+
+	hlist_for_each_entry_rcu(hardif_neigh,
+				 &hard_iface->neigh_list, list) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) {
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_neigh_dump - Dump the neighbours of a hard interface  into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @single_hardif: Limit dumping to this hard interface
+ */
+static void
+batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb,
+		    struct batadv_priv *bat_priv,
+		    struct batadv_hard_iface *single_hardif)
+{
+	struct batadv_hard_iface *hard_iface;
+	int i_hardif = 0;
+	int i_hardif_s = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	rcu_read_lock();
+	if (single_hardif) {
+		if (i_hardif_s == 0) {
+			if (batadv_v_neigh_dump_hardif(msg, portid,
+						       cb->nlh->nlmsg_seq,
+						       bat_priv, single_hardif,
+						       &idx) == 0)
+				i_hardif++;
+		}
+	} else {
+		list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+			if (hard_iface->soft_iface != bat_priv->soft_iface)
+				continue;
+
+			if (i_hardif++ < i_hardif_s)
+				continue;
+
+			if (batadv_v_neigh_dump_hardif(msg, portid,
+						       cb->nlh->nlmsg_seq,
+						       bat_priv, hard_iface,
+						       &idx)) {
+				i_hardif--;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = i_hardif;
+	cb->args[1] = idx;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
  * batadv_v_orig_print - print the originator table
  * @bat_priv: the bat priv with all the soft interface information
  * @seq: debugfs table seq_file struct
@@ -265,6 +414,205 @@
 	if (batman_count == 0)
 		seq_puts(seq, "No batman nodes in range ...\n");
 }
+#endif
+
+/**
+ * batadv_v_orig_dump_subentry - Dump an originator subentry into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @neigh_node: Single hops neighbour
+ * @best: Is the best originator
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct batadv_hard_iface *if_outgoing,
+			    struct batadv_orig_node *orig_node,
+			    struct batadv_neigh_node *neigh_node,
+			    bool best)
+{
+	struct batadv_neigh_ifinfo *n_ifinfo;
+	unsigned int last_seen_msecs;
+	u32 throughput;
+	void *hdr;
+
+	n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing);
+	if (!n_ifinfo)
+		return 0;
+
+	throughput = n_ifinfo->bat_v.throughput * 100;
+
+	batadv_neigh_ifinfo_put(n_ifinfo);
+
+	last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen);
+
+	if (if_outgoing != BATADV_IF_DEFAULT &&
+	    if_outgoing != neigh_node->if_incoming)
+		return 0;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_ORIGINATORS);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) ||
+	    nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
+		    neigh_node->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			neigh_node->if_incoming->net_dev->ifindex) ||
+	    nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
+			last_seen_msecs))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_v_orig_dump_entry - Dump an originator entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @orig_node: Originator to dump
+ * @sub_s: Number of sub entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			 struct batadv_priv *bat_priv,
+			 struct batadv_hard_iface *if_outgoing,
+			 struct batadv_orig_node *orig_node, int *sub_s)
+{
+	struct batadv_neigh_node *neigh_node_best;
+	struct batadv_neigh_node *neigh_node;
+	int sub = 0;
+	bool best;
+
+	neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing);
+	if (!neigh_node_best)
+		goto out;
+
+	hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (neigh_node == neigh_node_best);
+
+		if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv,
+						if_outgoing, orig_node,
+						neigh_node, best)) {
+			batadv_neigh_node_put(neigh_node_best);
+
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+ out:
+	if (neigh_node_best)
+		batadv_neigh_node_put(neigh_node_best);
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_orig_dump_bucket - Dump an originator bucket into a
+ *  message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ * @head: Bucket to be dumped
+ * @idx_s: Number of entries to be skipped
+ * @sub: Number of sub entries to be skipped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			  struct batadv_priv *bat_priv,
+			  struct batadv_hard_iface *if_outgoing,
+			  struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_orig_node *orig_node;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(orig_node, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv,
+					     if_outgoing, orig_node, sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_v_orig_dump - Dump the originators into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ * @if_outgoing: Limit dump to entries with this outgoing interface
+ */
+static void
+batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb,
+		   struct batadv_priv *bat_priv,
+		   struct batadv_hard_iface *if_outgoing)
+{
+	struct batadv_hashtable *hash = bat_priv->orig_hash;
+	struct hlist_head *head;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_v_orig_dump_bucket(msg, portid,
+					      cb->nlh->nlmsg_seq,
+					      bat_priv, if_outgoing, head, &idx,
+					      &sub))
+			break;
+
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+}
 
 static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1,
 			      struct batadv_hard_iface *if_outgoing1,
@@ -320,6 +668,365 @@
 	return ret;
 }
 
+static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv,
+					char *buff, size_t count)
+{
+	u32 old_class, class;
+
+	if (!batadv_parse_throughput(bat_priv->soft_iface, buff,
+				     "B.A.T.M.A.N. V GW selection class",
+				     &class))
+		return -EINVAL;
+
+	old_class = atomic_read(&bat_priv->gw.sel_class);
+	atomic_set(&bat_priv->gw.sel_class, class);
+
+	if (old_class != class)
+		batadv_gw_reselect(bat_priv);
+
+	return count;
+}
+
+static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff)
+{
+	u32 class = atomic_read(&bat_priv->gw.sel_class);
+
+	return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10);
+}
+
+/**
+ * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW
+ * @gw_node: the GW to retrieve the metric for
+ * @bw: the pointer where the metric will be stored. The metric is computed as
+ *  the minimum between the GW advertised throughput and the path throughput to
+ *  it in the mesh
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_orig_node *orig_node;
+	struct batadv_neigh_node *router;
+	int ret = -1;
+
+	orig_node = gw_node->orig_node;
+	router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	/* the GW metric is computed as the minimum between the path throughput
+	 * to reach the GW itself and the advertised bandwidth.
+	 * This gives us an approximation of the effective throughput that the
+	 * client can expect via this particular GW node
+	 */
+	*bw = router_ifinfo->bat_v.throughput;
+	*bw = min_t(u32, *bw, gw_node->bandwidth_down);
+
+	ret = 0;
+out:
+	if (router)
+		batadv_neigh_node_put(router);
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+
+	return ret;
+}
+
+/**
+ * batadv_v_gw_get_best_gw_node - retrieve the best GW node
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Return: the GW node having the best GW-metric, NULL if no GW is known
+ */
+static struct batadv_gw_node *
+batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
+{
+	struct batadv_gw_node *gw_node, *curr_gw = NULL;
+	u32 max_bw = 0, bw;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (!kref_get_unless_zero(&gw_node->refcount))
+			continue;
+
+		if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
+			goto next;
+
+		if (curr_gw && (bw <= max_bw))
+			goto next;
+
+		if (curr_gw)
+			batadv_gw_node_put(curr_gw);
+
+		curr_gw = gw_node;
+		kref_get(&curr_gw->refcount);
+		max_bw = bw;
+
+next:
+		batadv_gw_node_put(gw_node);
+	}
+	rcu_read_unlock();
+
+	return curr_gw;
+}
+
+/**
+ * batadv_v_gw_is_eligible - check if a originator would be selected as GW
+ * @bat_priv: the bat priv with all the soft interface information
+ * @curr_gw_orig: originator representing the currently selected GW
+ * @orig_node: the originator representing the new candidate
+ *
+ * Return: true if orig_node can be selected as current GW, false otherwise
+ */
+static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv,
+				    struct batadv_orig_node *curr_gw_orig,
+				    struct batadv_orig_node *orig_node)
+{
+	struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL;
+	u32 gw_throughput, orig_throughput, threshold;
+	bool ret = false;
+
+	threshold = atomic_read(&bat_priv->gw.sel_class);
+
+	curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig);
+	if (!curr_gw) {
+		ret = true;
+		goto out;
+	}
+
+	if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) {
+		ret = true;
+		goto out;
+	}
+
+	orig_gw = batadv_gw_node_get(bat_priv, orig_node);
+	if (!orig_node)
+		goto out;
+
+	if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0)
+		goto out;
+
+	if (orig_throughput < gw_throughput)
+		goto out;
+
+	if ((orig_throughput - gw_throughput) < threshold)
+		goto out;
+
+	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+		   "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n",
+		   gw_throughput, orig_throughput);
+
+	ret = true;
+out:
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+	if (orig_gw)
+		batadv_gw_node_put(orig_gw);
+
+	return ret;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/* fails if orig_node has no router */
+static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv,
+					 struct seq_file *seq,
+					 const struct batadv_gw_node *gw_node)
+{
+	struct batadv_gw_node *curr_gw;
+	struct batadv_neigh_node *router;
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	int ret = -1;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n",
+		   (curr_gw == gw_node ? "=>" : "  "),
+		   gw_node->orig_node->orig,
+		   router_ifinfo->bat_v.throughput / 10,
+		   router_ifinfo->bat_v.throughput % 10, router->addr,
+		   router->if_incoming->net_dev->name,
+		   gw_node->bandwidth_down / 10,
+		   gw_node->bandwidth_down % 10,
+		   gw_node->bandwidth_up / 10,
+		   gw_node->bandwidth_up % 10);
+	ret = seq_has_overflowed(seq) ? -1 : 0;
+
+	if (curr_gw)
+		batadv_gw_node_put(curr_gw);
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_v_gw_print - print the gateway list
+ * @bat_priv: the bat priv with all the soft interface information
+ * @seq: gateway table seq_file struct
+ */
+static void batadv_v_gw_print(struct batadv_priv *bat_priv,
+			      struct seq_file *seq)
+{
+	struct batadv_gw_node *gw_node;
+	int gw_count = 0;
+
+	seq_puts(seq,
+		 "      Gateway        ( throughput)           Nexthop [outgoingIF]: advertised uplink bandwidth\n");
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		/* fails if orig_node has no router */
+		if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0)
+			continue;
+
+		gw_count++;
+	}
+	rcu_read_unlock();
+
+	if (gw_count == 0)
+		seq_puts(seq, "No gateways in range ...\n");
+}
+#endif
+
+/**
+ * batadv_v_gw_dump_entry - Dump a gateway into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @gw_node: Gateway to be dumped
+ *
+ * Return: Error code, or 0 on success
+ */
+static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				  struct batadv_priv *bat_priv,
+				  struct batadv_gw_node *gw_node)
+{
+	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
+	struct batadv_neigh_node *router;
+	struct batadv_gw_node *curr_gw;
+	int ret = -EINVAL;
+	void *hdr;
+
+	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
+	if (!router)
+		goto out;
+
+	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
+	if (!router_ifinfo)
+		goto out;
+
+	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	ret = -EMSGSIZE;
+
+	if (curr_gw == gw_node) {
+		if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+	}
+
+	if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    gw_node->orig_node->orig)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT,
+			router_ifinfo->bat_v.throughput)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   router->if_incoming->net_dev->name)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
+			gw_node->bandwidth_down)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	if (router_ifinfo)
+		batadv_neigh_ifinfo_put(router_ifinfo);
+	if (router)
+		batadv_neigh_node_put(router);
+	return ret;
+}
+
+/**
+ * batadv_v_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ * @bat_priv: The bat priv with all the soft interface information
+ */
+static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb,
+			     struct batadv_priv *bat_priv)
+{
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct batadv_gw_node *gw_node;
+	int idx_skip = cb->args[0];
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
+		if (idx++ < idx_skip)
+			continue;
+
+		if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq,
+					   bat_priv, gw_node)) {
+			idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+
+	cb->args[0] = idx_skip;
+}
+
 static struct batadv_algo_ops batadv_batman_v __read_mostly = {
 	.name = "BATMAN_V",
 	.iface = {
@@ -333,10 +1040,26 @@
 		.hardif_init = batadv_v_hardif_neigh_init,
 		.cmp = batadv_v_neigh_cmp,
 		.is_similar_or_better = batadv_v_neigh_is_sob,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 		.print = batadv_v_neigh_print,
+#endif
+		.dump = batadv_v_neigh_dump,
 	},
 	.orig = {
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 		.print = batadv_v_orig_print,
+#endif
+		.dump = batadv_v_orig_dump,
+	},
+	.gw = {
+		.store_sel_class = batadv_v_store_sel_class,
+		.show_sel_class = batadv_v_show_sel_class,
+		.get_best_gw_node = batadv_v_gw_get_best_gw_node,
+		.is_eligible = batadv_v_gw_is_eligible,
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+		.print = batadv_v_gw_print,
+#endif
+		.dump = batadv_v_gw_dump,
 	},
 };
 
@@ -363,7 +1086,16 @@
  */
 int batadv_v_mesh_init(struct batadv_priv *bat_priv)
 {
-	return batadv_v_ogm_init(bat_priv);
+	int ret = 0;
+
+	ret = batadv_v_ogm_init(bat_priv);
+	if (ret < 0)
+		return ret;
+
+	/* set default throughput difference threshold to 5Mbps */
+	atomic_set(&bat_priv->gw.sel_class, 50);
+
+	return 0;
 }
 
 /**
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 7d170010..ee08540 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -335,7 +335,7 @@
 		goto out;
 
 	skb_reserve(hard_iface->bat_v.elp_skb, ETH_HLEN + NET_IP_ALIGN);
-	elp_buff = skb_push(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
+	elp_buff = skb_put(hard_iface->bat_v.elp_skb, BATADV_ELP_HLEN);
 	elp_packet = (struct batadv_elp_packet *)elp_buff;
 	memset(elp_packet, 0, BATADV_ELP_HLEN);
 
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 6fbba4e..1aeeadca 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -73,13 +73,12 @@
 	if (!orig_node)
 		return NULL;
 
+	kref_get(&orig_node->refcount);
 	hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
 				     batadv_choose_orig, orig_node,
 				     &orig_node->hash_entry);
 	if (hash_added != 0) {
-		/* orig_node->refcounter is initialised to 2 by
-		 * batadv_orig_node_new()
-		 */
+		/* remove refcnt for newly created orig_node and hash entry */
 		batadv_orig_node_put(orig_node);
 		batadv_orig_node_put(orig_node);
 		orig_node = NULL;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index ad2ffe1..e7f690b 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -35,6 +35,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -45,12 +46,18 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -519,11 +526,9 @@
 	atomic_set(&entry->wait_periods, 0);
 	ether_addr_copy(entry->orig, orig);
 	INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report);
-
-	/* one for the hash, one for returning */
 	kref_init(&entry->refcount);
-	kref_get(&entry->refcount);
 
+	kref_get(&entry->refcount);
 	hash_added = batadv_hash_add(bat_priv->bla.backbone_hash,
 				     batadv_compare_backbone_gw,
 				     batadv_choose_backbone_gw, entry,
@@ -711,12 +716,13 @@
 		claim->lasttime = jiffies;
 		kref_get(&backbone_gw->refcount);
 		claim->backbone_gw = backbone_gw;
-
 		kref_init(&claim->refcount);
-		kref_get(&claim->refcount);
+
 		batadv_dbg(BATADV_DBG_BLA, bat_priv,
 			   "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n",
 			   mac, BATADV_PRINT_VID(vid));
+
+		kref_get(&claim->refcount);
 		hash_added = batadv_hash_add(bat_priv->bla.claim_hash,
 					     batadv_compare_claim,
 					     batadv_choose_claim, claim,
@@ -1148,7 +1154,7 @@
 
 	/* Let the loopdetect frames on the mesh in any case. */
 	if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT)
-		return 0;
+		return false;
 
 	/* check if it is a claim frame. */
 	ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst,
@@ -1990,6 +1996,7 @@
 	return ret;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file
  * @seq: seq file to print on
@@ -2050,8 +2057,172 @@
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
 
 /**
+ * batadv_bla_claim_dump_entry - dump one entry of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @claim: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_hard_iface *primary_if,
+			    struct batadv_bla_claim *claim)
+{
+	u8 *primary_addr = primary_if->net_dev->dev_addr;
+	u16 backbone_crc;
+	bool is_own;
+	void *hdr;
+	int ret = -EINVAL;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	is_own = batadv_compare_eth(claim->backbone_gw->orig,
+				    primary_addr);
+
+	spin_lock_bh(&claim->backbone_gw->crc_lock);
+	backbone_crc = claim->backbone_gw->crc;
+	spin_unlock_bh(&claim->backbone_gw->crc_lock);
+
+	if (is_own)
+		if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) ||
+	    nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+		    claim->backbone_gw->orig) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			backbone_crc)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_bla_claim_dump_bucket - dump one bucket of the claim table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct batadv_hard_iface *primary_if,
+			     struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_bla_claim *claim;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(claim, head, hash_entry) {
+		if (idx++ < *idx_skip)
+			continue;
+		if (batadv_bla_claim_dump_entry(msg, portid, seq,
+						primary_if, claim)) {
+			*idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	*idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+	return 0;
+}
+
+/**
+ * batadv_bla_claim_dump - dump claim table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->bla.claim_hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_bla_claim_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq,
+						 primary_if, head, &idx))
+			break;
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+/**
  * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq
  *  file
  * @seq: seq file to print on
@@ -2114,3 +2285,168 @@
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_bla_backbone_dump_entry - dump one entry of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @backbone_gw: entry to dump
+ *
+ * Return: 0 or error code.
+ */
+static int
+batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_hard_iface *primary_if,
+			       struct batadv_bla_backbone_gw *backbone_gw)
+{
+	u8 *primary_addr = primary_if->net_dev->dev_addr;
+	u16 backbone_crc;
+	bool is_own;
+	int msecs;
+	void *hdr;
+	int ret = -EINVAL;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE);
+	if (!hdr) {
+		ret = -ENOBUFS;
+		goto out;
+	}
+
+	is_own = batadv_compare_eth(backbone_gw->orig, primary_addr);
+
+	spin_lock_bh(&backbone_gw->crc_lock);
+	backbone_crc = backbone_gw->crc;
+	spin_unlock_bh(&backbone_gw->crc_lock);
+
+	msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime);
+
+	if (is_own)
+		if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) {
+			genlmsg_cancel(msg, hdr);
+			goto out;
+		}
+
+	if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN,
+		    backbone_gw->orig) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) ||
+	    nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			backbone_crc) ||
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) {
+		genlmsg_cancel(msg, hdr);
+		goto out;
+	}
+
+	genlmsg_end(msg, hdr);
+	ret = 0;
+
+out:
+	return ret;
+}
+
+/**
+ * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table
+ * to a netlink socket
+ * @msg: buffer for the message
+ * @portid: netlink port
+ * @seq: Sequence number of netlink message
+ * @primary_if: primary interface
+ * @head: bucket to dump
+ * @idx_skip: How many entries to skip
+ *
+ * Return: always 0.
+ */
+static int
+batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+				struct batadv_hard_iface *primary_if,
+				struct hlist_head *head, int *idx_skip)
+{
+	struct batadv_bla_backbone_gw *backbone_gw;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) {
+		if (idx++ < *idx_skip)
+			continue;
+		if (batadv_bla_backbone_dump_entry(msg, portid, seq,
+						   primary_if, backbone_gw)) {
+			*idx_skip = idx - 1;
+			goto unlock;
+		}
+	}
+
+	*idx_skip = idx;
+unlock:
+	rcu_read_unlock();
+	return 0;
+}
+
+/**
+ * batadv_bla_backbone_dump - dump backbone table to a netlink socket
+ * @msg: buffer for the message
+ * @cb: callback structure containing arguments
+ *
+ * Return: message length.
+ */
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	int portid = NETLINK_CB(cb->skb).portid;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hashtable *hash;
+	struct batadv_priv *bat_priv;
+	int bucket = cb->args[0];
+	struct hlist_head *head;
+	int idx = cb->args[1];
+	int ifindex;
+	int ret = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+	hash = bat_priv->bla.backbone_hash;
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_bla_backbone_dump_bucket(msg, portid,
+						    cb->nlh->nlmsg_seq,
+						    primary_if, head, &idx))
+			break;
+		bucket++;
+	}
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	ret = msg->len;
+
+out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h
index 0f01dae..1ae93e4 100644
--- a/net/batman-adv/bridge_loop_avoidance.h
+++ b/net/batman-adv/bridge_loop_avoidance.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct net_device;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -35,8 +36,10 @@
 			       struct batadv_orig_node *orig_node,
 			       int hdr_size);
 int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq,
 					     void *offset);
+int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig,
 				    unsigned short vid);
 bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv,
@@ -47,7 +50,7 @@
 void batadv_bla_status_update(struct net_device *net_dev);
 int batadv_bla_init(struct batadv_priv *bat_priv);
 void batadv_bla_free(struct batadv_priv *bat_priv);
-
+int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb);
 #define BATADV_BLA_CRC_INIT	0
 #else /* ifdef CONFIG_BATMAN_ADV_BLA */
 
@@ -112,6 +115,18 @@
 {
 }
 
+static inline int batadv_bla_claim_dump(struct sk_buff *msg,
+					struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int batadv_bla_backbone_dump(struct sk_buff *msg,
+					   struct netlink_callback *cb)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif /* ifdef CONFIG_BATMAN_ADV_BLA */
 
 #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 1d68b6e..b4ffba7 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -31,6 +31,7 @@
 #include <linux/stddef.h>
 #include <linux/stringify.h>
 #include <linux/sysfs.h>
+#include <net/net_namespace.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
@@ -305,12 +306,16 @@
  */
 int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
 {
+	struct net *net = dev_net(hard_iface->net_dev);
 	struct batadv_debuginfo **bat_debug;
 	struct dentry *file;
 
 	if (!batadv_debugfs)
 		goto out;
 
+	if (net != &init_net)
+		return 0;
+
 	hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name,
 						   batadv_debugfs);
 	if (!hard_iface->debug_dir)
@@ -341,6 +346,11 @@
  */
 void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
 {
+	struct net *net = dev_net(hard_iface->net_dev);
+
+	if (net != &init_net)
+		return;
+
 	if (batadv_debugfs) {
 		debugfs_remove_recursive(hard_iface->debug_dir);
 		hard_iface->debug_dir = NULL;
@@ -351,11 +361,15 @@
 {
 	struct batadv_priv *bat_priv = netdev_priv(dev);
 	struct batadv_debuginfo **bat_debug;
+	struct net *net = dev_net(dev);
 	struct dentry *file;
 
 	if (!batadv_debugfs)
 		goto out;
 
+	if (net != &init_net)
+		return 0;
+
 	bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs);
 	if (!bat_priv->debug_dir)
 		goto out;
@@ -392,6 +406,10 @@
 void batadv_debugfs_del_meshif(struct net_device *dev)
 {
 	struct batadv_priv *bat_priv = netdev_priv(dev);
+	struct net *net = dev_net(dev);
+
+	if (net != &init_net)
+		return;
 
 	batadv_debug_log_cleanup(bat_priv);
 
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 1ab4e2e6..c68ff3d 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -26,7 +26,7 @@
 
 #define BATADV_DEBUGFS_SUBDIR "batman_adv"
 
-#if IS_ENABLED(CONFIG_DEBUG_FS)
+#if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS)
 
 void batadv_debugfs_init(void);
 void batadv_debugfs_destroy(void);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b1cc8bf..e257efd 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -343,8 +343,8 @@
 	ether_addr_copy(dat_entry->mac_addr, mac_addr);
 	dat_entry->last_update = jiffies;
 	kref_init(&dat_entry->refcount);
-	kref_get(&dat_entry->refcount);
 
+	kref_get(&dat_entry->refcount);
 	hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat,
 				     batadv_hash_dat, dat_entry,
 				     &dat_entry->hash_entry);
@@ -795,6 +795,7 @@
 	batadv_dat_hash_free(bat_priv);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_dat_cache_seq_print_text - print the local DAT hash table
  * @seq: seq file to print on
@@ -846,6 +847,7 @@
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
 
 /**
  * batadv_arp_get_type - parse an ARP packet and gets the type
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index 63a805d..de055d6 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -20,6 +20,7 @@
 
 #include <linux/atomic.h>
 #include <linux/byteorder/generic.h>
+#include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
@@ -31,6 +32,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
@@ -39,13 +41,17 @@
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/udp.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "gateway_common.h"
 #include "hard-interface.h"
 #include "log.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "sysfs.h"
 #include "translation-table.h"
 
@@ -80,12 +86,12 @@
  * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it
  * @gw_node: gateway node to free
  */
-static void batadv_gw_node_put(struct batadv_gw_node *gw_node)
+void batadv_gw_node_put(struct batadv_gw_node *gw_node)
 {
 	kref_put(&gw_node->refcount, batadv_gw_node_release);
 }
 
-static struct batadv_gw_node *
+struct batadv_gw_node *
 batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv)
 {
 	struct batadv_gw_node *gw_node;
@@ -164,86 +170,6 @@
 	atomic_set(&bat_priv->gw.reselect, 1);
 }
 
-static struct batadv_gw_node *
-batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
-{
-	struct batadv_neigh_node *router;
-	struct batadv_neigh_ifinfo *router_ifinfo;
-	struct batadv_gw_node *gw_node, *curr_gw = NULL;
-	u64 max_gw_factor = 0;
-	u64 tmp_gw_factor = 0;
-	u8 max_tq = 0;
-	u8 tq_avg;
-	struct batadv_orig_node *orig_node;
-
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
-		orig_node = gw_node->orig_node;
-		router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
-		if (!router)
-			continue;
-
-		router_ifinfo = batadv_neigh_ifinfo_get(router,
-							BATADV_IF_DEFAULT);
-		if (!router_ifinfo)
-			goto next;
-
-		if (!kref_get_unless_zero(&gw_node->refcount))
-			goto next;
-
-		tq_avg = router_ifinfo->bat_iv.tq_avg;
-
-		switch (atomic_read(&bat_priv->gw.sel_class)) {
-		case 1: /* fast connection */
-			tmp_gw_factor = tq_avg * tq_avg;
-			tmp_gw_factor *= gw_node->bandwidth_down;
-			tmp_gw_factor *= 100 * 100;
-			tmp_gw_factor >>= 18;
-
-			if ((tmp_gw_factor > max_gw_factor) ||
-			    ((tmp_gw_factor == max_gw_factor) &&
-			     (tq_avg > max_tq))) {
-				if (curr_gw)
-					batadv_gw_node_put(curr_gw);
-				curr_gw = gw_node;
-				kref_get(&curr_gw->refcount);
-			}
-			break;
-
-		default: /* 2:  stable connection (use best statistic)
-			  * 3:  fast-switch (use best statistic but change as
-			  *     soon as a better gateway appears)
-			  * XX: late-switch (use best statistic but change as
-			  *     soon as a better gateway appears which has
-			  *     $routing_class more tq points)
-			  */
-			if (tq_avg > max_tq) {
-				if (curr_gw)
-					batadv_gw_node_put(curr_gw);
-				curr_gw = gw_node;
-				kref_get(&curr_gw->refcount);
-			}
-			break;
-		}
-
-		if (tq_avg > max_tq)
-			max_tq = tq_avg;
-
-		if (tmp_gw_factor > max_gw_factor)
-			max_gw_factor = tmp_gw_factor;
-
-		batadv_gw_node_put(gw_node);
-
-next:
-		batadv_neigh_node_put(router);
-		if (router_ifinfo)
-			batadv_neigh_ifinfo_put(router_ifinfo);
-	}
-	rcu_read_unlock();
-
-	return curr_gw;
-}
-
 /**
  * batadv_gw_check_client_stop - check if client mode has been switched off
  * @bat_priv: the bat priv with all the soft interface information
@@ -287,12 +213,19 @@
 	if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT)
 		goto out;
 
+	if (!bat_priv->algo_ops->gw.get_best_gw_node)
+		goto out;
+
 	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
 
 	if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw)
 		goto out;
 
-	next_gw = batadv_gw_get_best_gw_node(bat_priv);
+	/* if gw.reselect is set to 1 it means that a previous call to
+	 * gw.is_eligible() said that we have a new best GW, therefore it can
+	 * now be picked from the list and selected
+	 */
+	next_gw = bat_priv->algo_ops->gw.get_best_gw_node(bat_priv);
 
 	if (curr_gw == next_gw)
 		goto out;
@@ -360,70 +293,31 @@
 void batadv_gw_check_election(struct batadv_priv *bat_priv,
 			      struct batadv_orig_node *orig_node)
 {
-	struct batadv_neigh_ifinfo *router_orig_tq = NULL;
-	struct batadv_neigh_ifinfo *router_gw_tq = NULL;
 	struct batadv_orig_node *curr_gw_orig;
-	struct batadv_neigh_node *router_gw = NULL;
-	struct batadv_neigh_node *router_orig = NULL;
-	u8 gw_tq_avg, orig_tq_avg;
+
+	/* abort immediately if the routing algorithm does not support gateway
+	 * election
+	 */
+	if (!bat_priv->algo_ops->gw.is_eligible)
+		return;
 
 	curr_gw_orig = batadv_gw_get_selected_orig(bat_priv);
 	if (!curr_gw_orig)
 		goto reselect;
 
-	router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT);
-	if (!router_gw)
-		goto reselect;
-
-	router_gw_tq = batadv_neigh_ifinfo_get(router_gw,
-					       BATADV_IF_DEFAULT);
-	if (!router_gw_tq)
-		goto reselect;
-
 	/* this node already is the gateway */
 	if (curr_gw_orig == orig_node)
 		goto out;
 
-	router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT);
-	if (!router_orig)
+	if (!bat_priv->algo_ops->gw.is_eligible(bat_priv, curr_gw_orig,
+						orig_node))
 		goto out;
 
-	router_orig_tq = batadv_neigh_ifinfo_get(router_orig,
-						 BATADV_IF_DEFAULT);
-	if (!router_orig_tq)
-		goto out;
-
-	gw_tq_avg = router_gw_tq->bat_iv.tq_avg;
-	orig_tq_avg = router_orig_tq->bat_iv.tq_avg;
-
-	/* the TQ value has to be better */
-	if (orig_tq_avg < gw_tq_avg)
-		goto out;
-
-	/* if the routing class is greater than 3 the value tells us how much
-	 * greater the TQ value of the new gateway must be
-	 */
-	if ((atomic_read(&bat_priv->gw.sel_class) > 3) &&
-	    (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class)))
-		goto out;
-
-	batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-		   "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n",
-		   gw_tq_avg, orig_tq_avg);
-
 reselect:
 	batadv_gw_reselect(bat_priv);
 out:
 	if (curr_gw_orig)
 		batadv_orig_node_put(curr_gw_orig);
-	if (router_gw)
-		batadv_neigh_node_put(router_gw);
-	if (router_orig)
-		batadv_neigh_node_put(router_orig);
-	if (router_gw_tq)
-		batadv_neigh_ifinfo_put(router_gw_tq);
-	if (router_orig_tq)
-		batadv_neigh_ifinfo_put(router_orig_tq);
 }
 
 /**
@@ -445,14 +339,15 @@
 	if (!gw_node)
 		return;
 
-	kref_get(&orig_node->refcount);
+	kref_init(&gw_node->refcount);
 	INIT_HLIST_NODE(&gw_node->list);
+	kref_get(&orig_node->refcount);
 	gw_node->orig_node = orig_node;
 	gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
 	gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
-	kref_init(&gw_node->refcount);
 
 	spin_lock_bh(&bat_priv->gw.list_lock);
+	kref_get(&gw_node->refcount);
 	hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
 	spin_unlock_bh(&bat_priv->gw.list_lock);
 
@@ -463,6 +358,9 @@
 		   ntohl(gateway->bandwidth_down) % 10,
 		   ntohl(gateway->bandwidth_up) / 10,
 		   ntohl(gateway->bandwidth_up) % 10);
+
+	/* don't return reference to new gw_node */
+	batadv_gw_node_put(gw_node);
 }
 
 /**
@@ -472,9 +370,8 @@
  *
  * Return: gateway node if found or NULL otherwise.
  */
-static struct batadv_gw_node *
-batadv_gw_node_get(struct batadv_priv *bat_priv,
-		   struct batadv_orig_node *orig_node)
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig_node)
 {
 	struct batadv_gw_node *gw_node_tmp, *gw_node = NULL;
 
@@ -585,81 +482,87 @@
 	spin_unlock_bh(&bat_priv->gw.list_lock);
 }
 
-/* fails if orig_node has no router */
-static int batadv_write_buffer_text(struct batadv_priv *bat_priv,
-				    struct seq_file *seq,
-				    const struct batadv_gw_node *gw_node)
-{
-	struct batadv_gw_node *curr_gw;
-	struct batadv_neigh_node *router;
-	struct batadv_neigh_ifinfo *router_ifinfo = NULL;
-	int ret = -1;
-
-	router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT);
-	if (!router)
-		goto out;
-
-	router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT);
-	if (!router_ifinfo)
-		goto out;
-
-	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
-
-	seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n",
-		   (curr_gw == gw_node ? "=>" : "  "),
-		   gw_node->orig_node->orig,
-		   router_ifinfo->bat_iv.tq_avg, router->addr,
-		   router->if_incoming->net_dev->name,
-		   gw_node->bandwidth_down / 10,
-		   gw_node->bandwidth_down % 10,
-		   gw_node->bandwidth_up / 10,
-		   gw_node->bandwidth_up % 10);
-	ret = seq_has_overflowed(seq) ? -1 : 0;
-
-	if (curr_gw)
-		batadv_gw_node_put(curr_gw);
-out:
-	if (router_ifinfo)
-		batadv_neigh_ifinfo_put(router_ifinfo);
-	if (router)
-		batadv_neigh_node_put(router);
-	return ret;
-}
-
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct net_device *net_dev = (struct net_device *)seq->private;
 	struct batadv_priv *bat_priv = netdev_priv(net_dev);
 	struct batadv_hard_iface *primary_if;
-	struct batadv_gw_node *gw_node;
-	int gw_count = 0;
 
 	primary_if = batadv_seq_print_text_primary_if_get(seq);
 	if (!primary_if)
-		goto out;
+		return 0;
 
-	seq_printf(seq,
-		   "      Gateway      (#/255)           Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n",
+	seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n",
 		   BATADV_SOURCE_VERSION, primary_if->net_dev->name,
-		   primary_if->net_dev->dev_addr, net_dev->name);
+		   primary_if->net_dev->dev_addr, net_dev->name,
+		   bat_priv->algo_ops->name);
 
-	rcu_read_lock();
-	hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
-		/* fails if orig_node has no router */
-		if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0)
-			continue;
+	batadv_hardif_put(primary_if);
 
-		gw_count++;
+	if (!bat_priv->algo_ops->gw.print) {
+		seq_puts(seq,
+			 "No printing function for this routing protocol\n");
+		return 0;
 	}
-	rcu_read_unlock();
 
-	if (gw_count == 0)
-		seq_puts(seq, "No gateways in range ...\n");
+	bat_priv->algo_ops->gw.print(bat_priv, seq);
+
+	return 0;
+}
+#endif
+
+/**
+ * batadv_gw_dump - Dump gateways into a message
+ * @msg: Netlink message to dump into
+ * @cb: Control block containing additional options
+ *
+ * Return: Error code, or length of message
+ */
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct batadv_hard_iface *primary_if = NULL;
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	int ifindex;
+	int ret;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	if (!bat_priv->algo_ops->gw.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->gw.dump(msg, cb, bat_priv);
+
+	ret = msg->len;
 
 out:
 	if (primary_if)
 		batadv_hardif_put(primary_if);
-	return 0;
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
 }
 
 /**
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 582dd8c..859166d 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct batadv_tvlv_gateway_data;
+struct netlink_callback;
 struct seq_file;
 struct sk_buff;
 
@@ -39,10 +40,16 @@
 void batadv_gw_node_delete(struct batadv_priv *bat_priv,
 			   struct batadv_orig_node *orig_node);
 void batadv_gw_node_free(struct batadv_priv *bat_priv);
+void batadv_gw_node_put(struct batadv_gw_node *gw_node);
+struct batadv_gw_node *
+batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv);
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb);
 bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
 enum batadv_dhcp_recipient
 batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
 			     u8 *chaddr);
+struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv,
+					  struct batadv_orig_node *orig_node);
 
 #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index d7bc6a8..2118481 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -241,10 +241,9 @@
 
 	batadv_gw_node_update(bat_priv, orig, &gateway);
 
-	/* restart gateway selection if fast or late switching was enabled */
+	/* restart gateway selection */
 	if ((gateway.bandwidth_down != 0) &&
-	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) &&
-	    (atomic_read(&bat_priv->gw.sel_class) > 2))
+	    (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
 		batadv_gw_check_election(bat_priv, orig);
 }
 
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 1f90808..08ce361 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -35,7 +35,8 @@
 #include <linux/rtnetlink.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/workqueue.h>
+#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
 
 #include "bat_v.h"
 #include "bridge_loop_avoidance.h"
@@ -85,25 +86,55 @@
 }
 
 /**
+ * batadv_getlink_net - return link net namespace (of use fallback)
+ * @netdev: net_device to check
+ * @fallback_net: return in case get_link_net is not available for @netdev
+ *
+ * Return: result of rtnl_link_ops->get_link_net or @fallback_net
+ */
+static const struct net *batadv_getlink_net(const struct net_device *netdev,
+					    const struct net *fallback_net)
+{
+	if (!netdev->rtnl_link_ops)
+		return fallback_net;
+
+	if (!netdev->rtnl_link_ops->get_link_net)
+		return fallback_net;
+
+	return netdev->rtnl_link_ops->get_link_net(netdev);
+}
+
+/**
  * batadv_mutual_parents - check if two devices are each others parent
- * @dev1: 1st net_device
- * @dev2: 2nd net_device
+ * @dev1: 1st net dev
+ * @net1: 1st devices netns
+ * @dev2: 2nd net dev
+ * @net2: 2nd devices netns
  *
  * veth devices come in pairs and each is the parent of the other!
  *
  * Return: true if the devices are each others parent, otherwise false
  */
 static bool batadv_mutual_parents(const struct net_device *dev1,
-				  const struct net_device *dev2)
+				  const struct net *net1,
+				  const struct net_device *dev2,
+				  const struct net *net2)
 {
 	int dev1_parent_iflink = dev_get_iflink(dev1);
 	int dev2_parent_iflink = dev_get_iflink(dev2);
+	const struct net *dev1_parent_net;
+	const struct net *dev2_parent_net;
+
+	dev1_parent_net = batadv_getlink_net(dev1, net1);
+	dev2_parent_net = batadv_getlink_net(dev2, net2);
 
 	if (!dev1_parent_iflink || !dev2_parent_iflink)
 		return false;
 
 	return (dev1_parent_iflink == dev2->ifindex) &&
-	       (dev2_parent_iflink == dev1->ifindex);
+	       (dev2_parent_iflink == dev1->ifindex) &&
+	       net_eq(dev1_parent_net, net2) &&
+	       net_eq(dev2_parent_net, net1);
 }
 
 /**
@@ -121,8 +152,9 @@
  */
 static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
 {
-	struct net_device *parent_dev;
 	struct net *net = dev_net(net_dev);
+	struct net_device *parent_dev;
+	const struct net *parent_net;
 	bool ret;
 
 	/* check if this is a batman-adv mesh interface */
@@ -134,13 +166,16 @@
 	    dev_get_iflink(net_dev) == net_dev->ifindex)
 		return false;
 
+	parent_net = batadv_getlink_net(net_dev, net);
+
 	/* recurse over the parent device */
-	parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev));
+	parent_dev = __dev_get_by_index((struct net *)parent_net,
+					dev_get_iflink(net_dev));
 	/* if we got a NULL parent_dev there is something broken.. */
 	if (WARN(!parent_dev, "Cannot find parent device"))
 		return false;
 
-	if (batadv_mutual_parents(net_dev, parent_dev))
+	if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net))
 		return false;
 
 	ret = batadv_is_on_batman_iface(parent_dev);
@@ -625,25 +660,6 @@
 		batadv_hardif_put(primary_if);
 }
 
-/**
- * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif
- * @work: work queue item
- *
- * Free the parts of the hard interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_hardif_remove_interface_finish(struct work_struct *work)
-{
-	struct batadv_hard_iface *hard_iface;
-
-	hard_iface = container_of(work, struct batadv_hard_iface,
-				  cleanup_work);
-
-	batadv_debugfs_del_hardif(hard_iface);
-	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
-	batadv_hardif_put(hard_iface);
-}
-
 static struct batadv_hard_iface *
 batadv_hardif_add_interface(struct net_device *net_dev)
 {
@@ -676,10 +692,9 @@
 
 	INIT_LIST_HEAD(&hard_iface->list);
 	INIT_HLIST_HEAD(&hard_iface->neigh_list);
-	INIT_WORK(&hard_iface->cleanup_work,
-		  batadv_hardif_remove_interface_finish);
 
 	spin_lock_init(&hard_iface->neigh_list_lock);
+	kref_init(&hard_iface->refcount);
 
 	hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT;
 	if (batadv_is_wifi_netdev(net_dev))
@@ -687,11 +702,8 @@
 
 	batadv_v_hardif_init(hard_iface);
 
-	/* extra reference for return */
-	kref_init(&hard_iface->refcount);
-	kref_get(&hard_iface->refcount);
-
 	batadv_check_known_mac_addr(hard_iface->net_dev);
+	kref_get(&hard_iface->refcount);
 	list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
 
 	return hard_iface;
@@ -713,13 +725,15 @@
 	/* first deactivate interface */
 	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
 		batadv_hardif_disable_interface(hard_iface,
-						BATADV_IF_CLEANUP_AUTO);
+						BATADV_IF_CLEANUP_KEEP);
 
 	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
 		return;
 
 	hard_iface->if_status = BATADV_IF_TO_BE_REMOVED;
-	queue_work(batadv_event_workqueue, &hard_iface->cleanup_work);
+	batadv_debugfs_del_hardif(hard_iface);
+	batadv_sysfs_del_hardif(&hard_iface->hardif_obj);
+	batadv_hardif_put(hard_iface);
 }
 
 void batadv_hardif_remove_interfaces(void)
diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h
index 618d5de..e44a7da 100644
--- a/net/batman-adv/icmp_socket.h
+++ b/net/batman-adv/icmp_socket.h
@@ -26,9 +26,25 @@
 
 #define BATADV_ICMP_SOCKET "socket"
 
-void batadv_socket_init(void);
 int batadv_socket_setup(struct batadv_priv *bat_priv);
+
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+
+void batadv_socket_init(void);
 void batadv_socket_receive_packet(struct batadv_icmp_header *icmph,
 				  size_t icmp_len);
 
+#else
+
+static inline void batadv_socket_init(void)
+{
+}
+
+static inline void
+batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len)
+{
+}
+
+#endif
+
 #endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fe4c5e2..2c017ab 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -82,6 +82,12 @@
 
 static int __init batadv_init(void)
 {
+	int ret;
+
+	ret = batadv_tt_cache_init();
+	if (ret < 0)
+		return ret;
+
 	INIT_LIST_HEAD(&batadv_hardif_list);
 	batadv_algo_init();
 
@@ -93,9 +99,8 @@
 	batadv_tp_meter_init();
 
 	batadv_event_workqueue = create_singlethread_workqueue("bat_events");
-
 	if (!batadv_event_workqueue)
-		return -ENOMEM;
+		goto err_create_wq;
 
 	batadv_socket_init();
 	batadv_debugfs_init();
@@ -108,6 +113,11 @@
 		BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION);
 
 	return 0;
+
+err_create_wq:
+	batadv_tt_cache_destroy();
+
+	return -ENOMEM;
 }
 
 static void __exit batadv_exit(void)
@@ -123,6 +133,8 @@
 	batadv_event_workqueue = NULL;
 
 	rcu_barrier();
+
+	batadv_tt_cache_destroy();
 }
 
 int batadv_mesh_init(struct net_device *soft_iface)
@@ -270,6 +282,7 @@
 	return is_my_mac;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_seq_print_text_primary_if_get - called from debugfs table printing
  *  function that requires the primary interface
@@ -305,6 +318,7 @@
 out:
 	return primary_if;
 }
+#endif
 
 /**
  * batadv_max_header_len - calculate maximum encapsulation overhead for a
@@ -638,3 +652,4 @@
 MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
 MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
 MODULE_VERSION(BATADV_SOURCE_VERSION);
+MODULE_ALIAS_RTNL_LINK("batadv");
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 06a8608..09af21e 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2016.3"
+#define BATADV_SOURCE_VERSION "2016.4"
 #endif
 
 /* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index cc91507..13661f4 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -528,7 +528,7 @@
 	}
 
 	return !(mcast_data.flags &
-		 (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6));
+		 (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6));
 }
 
 /**
@@ -1134,6 +1134,7 @@
 				     BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_mcast_flags_print_header - print own mcast flags to debugfs table
  * @bat_priv: the bat priv with all the soft interface information
@@ -1234,6 +1235,7 @@
 
 	return 0;
 }
+#endif
 
 /**
  * batadv_mcast_free - free the multicast optimizations structures
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 231f8ea..64cb6ac 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -18,6 +18,8 @@
 #include "netlink.h"
 #include "main.h"
 
+#include <linux/atomic.h>
+#include <linux/byteorder/generic.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/genetlink.h>
@@ -26,24 +28,33 @@
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/printk.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <net/genetlink.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <uapi/linux/batman_adv.h>
 
+#include "bat_algo.h"
+#include "bridge_loop_avoidance.h"
+#include "gateway_client.h"
 #include "hard-interface.h"
+#include "originator.h"
+#include "packet.h"
 #include "soft-interface.h"
 #include "tp_meter.h"
+#include "translation-table.h"
 
-struct sk_buff;
-
-static struct genl_family batadv_netlink_family = {
+struct genl_family batadv_netlink_family = {
 	.id = GENL_ID_GENERATE,
 	.hdrsize = 0,
 	.name = BATADV_NL_NAME,
 	.version = 1,
 	.maxattr = BATADV_ATTR_MAX,
+	.netnsok = true,
 };
 
 /* multicast groups */
@@ -51,11 +62,11 @@
 	BATADV_NL_MCGRP_TPMETER,
 };
 
-static struct genl_multicast_group batadv_netlink_mcgrps[] = {
+static const struct genl_multicast_group batadv_netlink_mcgrps[] = {
 	[BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER },
 };
 
-static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
+static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = {
 	[BATADV_ATTR_VERSION]		= { .type = NLA_STRING },
 	[BATADV_ATTR_ALGO_NAME]		= { .type = NLA_STRING },
 	[BATADV_ATTR_MESH_IFINDEX]	= { .type = NLA_U32 },
@@ -69,9 +80,44 @@
 	[BATADV_ATTR_TPMETER_TEST_TIME]	= { .type = NLA_U32 },
 	[BATADV_ATTR_TPMETER_BYTES]	= { .type = NLA_U64 },
 	[BATADV_ATTR_TPMETER_COOKIE]	= { .type = NLA_U32 },
+	[BATADV_ATTR_ACTIVE]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_TT_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TT_TTVN]		= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_LAST_TTVN]	= { .type = NLA_U8 },
+	[BATADV_ATTR_TT_CRC32]		= { .type = NLA_U32 },
+	[BATADV_ATTR_TT_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_TT_FLAGS]		= { .type = NLA_U32 },
+	[BATADV_ATTR_FLAG_BEST]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_LAST_SEEN_MSECS]	= { .type = NLA_U32 },
+	[BATADV_ATTR_NEIGH_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_TQ]		= { .type = NLA_U8 },
+	[BATADV_ATTR_THROUGHPUT]	= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_UP]	= { .type = NLA_U32 },
+	[BATADV_ATTR_BANDWIDTH_DOWN]	= { .type = NLA_U32 },
+	[BATADV_ATTR_ROUTER]		= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_OWN]		= { .type = NLA_FLAG },
+	[BATADV_ATTR_BLA_ADDRESS]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_VID]		= { .type = NLA_U16 },
+	[BATADV_ATTR_BLA_BACKBONE]	= { .len = ETH_ALEN },
+	[BATADV_ATTR_BLA_CRC]		= { .type = NLA_U16 },
 };
 
 /**
+ * batadv_netlink_get_ifindex - Extract an interface index from a message
+ * @nlh: Message header
+ * @attrtype: Attribute which holds an interface index
+ *
+ * Return: interface index, or 0.
+ */
+int
+batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype)
+{
+	struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype);
+
+	return attr ? nla_get_u32(attr) : 0;
+}
+
+/**
  * batadv_netlink_mesh_info_put - fill in generic information about mesh
  *  interface
  * @msg: netlink message to be sent back
@@ -93,9 +139,17 @@
 	    nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) ||
 	    nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) ||
 	    nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN,
-		    soft_iface->dev_addr))
+		    soft_iface->dev_addr) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN,
+		       (u8)atomic_read(&bat_priv->tt.vn)))
 		goto out;
 
+#ifdef CONFIG_BATMAN_ADV_BLA
+	if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC,
+			ntohs(bat_priv->bla.claim_dest.group)))
+		goto out;
+#endif
+
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) {
 		hard_iface = primary_if->net_dev;
@@ -380,6 +434,106 @@
 	return ret;
 }
 
+/**
+ * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @hard_iface: Hard interface to dump
+ *
+ * Return: error code, or 0 on success
+ */
+static int
+batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq,
+				 struct batadv_hard_iface *hard_iface)
+{
+	struct net_device *net_dev = hard_iface->net_dev;
+	void *hdr;
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI,
+			  BATADV_CMD_GET_HARDIFS);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
+			net_dev->ifindex) ||
+	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
+			   net_dev->name) ||
+	    nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN,
+		    net_dev->dev_addr))
+		goto nla_put_failure;
+
+	if (hard_iface->if_status == BATADV_IF_ACTIVE) {
+		if (nla_put_flag(msg, BATADV_ATTR_ACTIVE))
+			goto nla_put_failure;
+	}
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_netlink_dump_hardifs - Dump all hard interface into a messages
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: error code, or length of reply message on success
+ */
+static int
+batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_hard_iface *hard_iface;
+	int ifindex;
+	int portid = NETLINK_CB(cb->skb).portid;
+	int seq = cb->nlh->nlmsg_seq;
+	int skip = cb->args[0];
+	int i = 0;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh,
+					     BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface)
+		return -ENODEV;
+
+	if (!batadv_softif_is_valid(soft_iface)) {
+		dev_put(soft_iface);
+		return -ENODEV;
+	}
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+		if (hard_iface->soft_iface != soft_iface)
+			continue;
+
+		if (i++ < skip)
+			continue;
+
+		if (batadv_netlink_dump_hardif_entry(msg, portid, seq,
+						     hard_iface)) {
+			i--;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+
+	dev_put(soft_iface);
+
+	cb->args[0] = i;
+
+	return msg->len;
+}
+
 static struct genl_ops batadv_netlink_ops[] = {
 	{
 		.cmd = BATADV_CMD_GET_MESH_INFO,
@@ -399,6 +553,61 @@
 		.policy = batadv_netlink_policy,
 		.doit = batadv_netlink_tp_meter_cancel,
 	},
+	{
+		.cmd = BATADV_CMD_GET_ROUTING_ALGOS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_algo_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_HARDIFS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_netlink_dump_hardifs,
+	},
+	{
+		.cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_tt_local_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_tt_global_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_ORIGINATORS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_orig_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_NEIGHBORS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_hardif_neigh_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_GATEWAYS,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_gw_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_BLA_CLAIM,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_bla_claim_dump,
+	},
+	{
+		.cmd = BATADV_CMD_GET_BLA_BACKBONE,
+		.flags = GENL_ADMIN_PERM,
+		.policy = batadv_netlink_policy,
+		.dumpit = batadv_bla_backbone_dump,
+	},
+
 };
 
 /**
diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h
index 945653a..52eb162 100644
--- a/net/batman-adv/netlink.h
+++ b/net/batman-adv/netlink.h
@@ -21,12 +21,18 @@
 #include "main.h"
 
 #include <linux/types.h>
+#include <net/genetlink.h>
+
+struct nlmsghdr;
 
 void batadv_netlink_register(void);
 void batadv_netlink_unregister(void);
+int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype);
 
 int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst,
 				  u8 result, u32 test_time, u64 total_bytes,
 				  u32 cookie);
 
+extern struct genl_family batadv_netlink_family;
+
 #endif /* _NET_BATMAN_ADV_NETLINK_H_ */
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 293ef4f..e3baf69 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -856,14 +856,12 @@
 	if (!nc_node)
 		return NULL;
 
-	kref_get(&orig_neigh_node->refcount);
-
 	/* Initialize nc_node */
 	INIT_LIST_HEAD(&nc_node->list);
-	ether_addr_copy(nc_node->addr, orig_node->orig);
-	nc_node->orig_node = orig_neigh_node;
 	kref_init(&nc_node->refcount);
-	kref_get(&nc_node->refcount);
+	ether_addr_copy(nc_node->addr, orig_node->orig);
+	kref_get(&orig_neigh_node->refcount);
+	nc_node->orig_node = orig_neigh_node;
 
 	/* Select ingoing or outgoing coding node */
 	if (in_coding) {
@@ -879,6 +877,7 @@
 
 	/* Add nc_node to orig_node */
 	spin_lock_bh(lock);
+	kref_get(&nc_node->refcount);
 	list_add_tail_rcu(&nc_node->list, list);
 	spin_unlock_bh(lock);
 
@@ -979,7 +978,6 @@
 	INIT_LIST_HEAD(&nc_path->packet_list);
 	spin_lock_init(&nc_path->packet_list_lock);
 	kref_init(&nc_path->refcount);
-	kref_get(&nc_path->refcount);
 	nc_path->last_valid = jiffies;
 	ether_addr_copy(nc_path->next_hop, dst);
 	ether_addr_copy(nc_path->prev_hop, src);
@@ -989,6 +987,7 @@
 		   nc_path->next_hop);
 
 	/* Add nc_path to hash table */
+	kref_get(&nc_path->refcount);
 	hash_added = batadv_hash_add(hash, batadv_nc_hash_compare,
 				     batadv_nc_hash_choose, &nc_path_key,
 				     &nc_path->hash_entry);
@@ -1882,6 +1881,7 @@
 	batadv_hash_destroy(bat_priv->nc.decoding_hash);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_nc_nodes_seq_print_text - print the nc node information
  * @seq: seq file to print on
@@ -1981,3 +1981,4 @@
 out:
 	return -ENOMEM;
 }
+#endif
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 3940b5d..5f3bfc4 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -28,11 +28,15 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bat_algo.h"
 #include "distributed-arp-table.h"
@@ -42,8 +46,10 @@
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "network-coding.h"
 #include "routing.h"
+#include "soft-interface.h"
 #include "translation-table.h"
 
 /* hash class keys */
@@ -127,9 +133,9 @@
 		goto out;
 
 	kref_init(&vlan->refcount);
-	kref_get(&vlan->refcount);
 	vlan->vid = vid;
 
+	kref_get(&vlan->refcount);
 	hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list);
 
 out:
@@ -380,6 +386,7 @@
 	orig_ifinfo->if_outgoing = if_outgoing;
 	INIT_HLIST_NODE(&orig_ifinfo->list);
 	kref_init(&orig_ifinfo->refcount);
+
 	kref_get(&orig_ifinfo->refcount);
 	hlist_add_head_rcu(&orig_ifinfo->list,
 			   &orig_node->ifinfo_list);
@@ -453,9 +460,9 @@
 
 	INIT_HLIST_NODE(&neigh_ifinfo->list);
 	kref_init(&neigh_ifinfo->refcount);
-	kref_get(&neigh_ifinfo->refcount);
 	neigh_ifinfo->if_outgoing = if_outgoing;
 
+	kref_get(&neigh_ifinfo->refcount);
 	hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list);
 
 out:
@@ -647,8 +654,8 @@
 
 	/* extra reference for return */
 	kref_init(&neigh_node->refcount);
-	kref_get(&neigh_node->refcount);
 
+	kref_get(&neigh_node->refcount);
 	hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
 
 	batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv,
@@ -686,6 +693,7 @@
 	return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list
  * @seq: neighbour table seq_file struct
@@ -719,6 +727,84 @@
 	bat_priv->algo_ops->neigh.print(bat_priv, seq);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct net_device *hard_iface = NULL;
+	struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	int ret;
+	int ifindex, hard_ifindex;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+						  BATADV_ATTR_HARD_IFINDEX);
+	if (hard_ifindex) {
+		hard_iface = dev_get_by_index(net, hard_ifindex);
+		if (hard_iface)
+			hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+		if (!hardif) {
+			ret = -ENODEV;
+			goto out;
+		}
+
+		if (hardif->soft_iface != soft_iface) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (!bat_priv->algo_ops->neigh.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif);
+
+	ret = msg->len;
+
+ out:
+	if (hardif)
+		batadv_hardif_put(hardif);
+	if (hard_iface)
+		dev_put(hard_iface);
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
 
 /**
  * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for
@@ -905,7 +991,6 @@
 
 	/* extra reference for return */
 	kref_init(&orig_node->refcount);
-	kref_get(&orig_node->refcount);
 
 	orig_node->bat_priv = bat_priv;
 	ether_addr_copy(orig_node->orig, addr);
@@ -1256,6 +1341,7 @@
 	_batadv_purge_orig(bat_priv);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1329,6 +1415,84 @@
 		batadv_hardif_put(hard_iface);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_orig_dump - Dump to netlink the originator infos for a specific
+ *  outgoing interface
+ * @msg: message to dump into
+ * @cb: parameters for the dump
+ *
+ * Return: 0 or error value
+ */
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct net_device *hard_iface = NULL;
+	struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	int ret;
+	int ifindex, hard_ifindex;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hard_ifindex = batadv_netlink_get_ifindex(cb->nlh,
+						  BATADV_ATTR_HARD_IFINDEX);
+	if (hard_ifindex) {
+		hard_iface = dev_get_by_index(net, hard_ifindex);
+		if (hard_iface)
+			hardif = batadv_hardif_get_by_netdev(hard_iface);
+
+		if (!hardif) {
+			ret = -ENODEV;
+			goto out;
+		}
+
+		if (hardif->soft_iface != soft_iface) {
+			ret = -ENOENT;
+			goto out;
+		}
+	}
+
+	if (!bat_priv->algo_ops->orig.dump) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif);
+
+	ret = msg->len;
+
+ out:
+	if (hardif)
+		batadv_hardif_put(hardif);
+	if (hard_iface)
+		dev_put(hard_iface);
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	return ret;
+}
 
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num)
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 566306b..ebc5618 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -31,7 +31,9 @@
 
 #include "hash.h"
 
+struct netlink_callback;
 struct seq_file;
+struct sk_buff;
 
 bool batadv_compare_orig(const struct hlist_node *node, const void *data2);
 int batadv_originator_init(struct batadv_priv *bat_priv);
@@ -61,6 +63,7 @@
 			struct batadv_hard_iface *if_outgoing);
 void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo);
 
+int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset);
 
 struct batadv_orig_ifinfo *
@@ -72,6 +75,7 @@
 void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo);
 
 int batadv_orig_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb);
 int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface,
 			    int max_if_num);
diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h
index 6b011ff..6afc0b8 100644
--- a/net/batman-adv/packet.h
+++ b/net/batman-adv/packet.h
@@ -129,42 +129,6 @@
 };
 
 /**
- * enum batadv_tt_client_flags - TT client specific flags
- * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table
- * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new
- *  update telling its new real location has not been received/sent yet
- * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface.
- *  This information is used by the "AP Isolation" feature
- * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This
- *  information is used by the Extended Isolation feature
- * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table
- * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has
- *  not been announced yet
- * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept
- *  in the table for one more originator interval for consistency purposes
- * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of
- *  the network but no nnode has already announced it
- *
- * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire.
- * Bits from 8 to 15 are called _local flags_ because they are used for local
- * computations only.
- *
- * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with
- * the other nodes in the network. To achieve this goal these flags are included
- * in the TT CRC computation.
- */
-enum batadv_tt_client_flags {
-	BATADV_TT_CLIENT_DEL     = BIT(0),
-	BATADV_TT_CLIENT_ROAM    = BIT(1),
-	BATADV_TT_CLIENT_WIFI    = BIT(4),
-	BATADV_TT_CLIENT_ISOLA	 = BIT(5),
-	BATADV_TT_CLIENT_NOPURGE = BIT(8),
-	BATADV_TT_CLIENT_NEW     = BIT(9),
-	BATADV_TT_CLIENT_PENDING = BIT(10),
-	BATADV_TT_CLIENT_TEMP	 = BIT(11),
-};
-
-/**
  * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field
  * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not
  */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 7602c00..7e8dc64 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -74,11 +74,23 @@
 	if (!orig_ifinfo)
 		return;
 
-	rcu_read_lock();
-	curr_router = rcu_dereference(orig_ifinfo->router);
-	if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
-		curr_router = NULL;
-	rcu_read_unlock();
+	spin_lock_bh(&orig_node->neigh_list_lock);
+	/* curr_router used earlier may not be the current orig_ifinfo->router
+	 * anymore because it was dereferenced outside of the neigh_list_lock
+	 * protected region. After the new best neighbor has replace the current
+	 * best neighbor the reference counter needs to decrease. Consequently,
+	 * the code needs to ensure the curr_router variable contains a pointer
+	 * to the replaced best neighbor.
+	 */
+	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
+	/* increase refcount of new best neighbor */
+	if (neigh_node)
+		kref_get(&neigh_node->refcount);
+
+	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+	batadv_orig_ifinfo_put(orig_ifinfo);
 
 	/* route deleted */
 	if ((curr_router) && (!neigh_node)) {
@@ -100,27 +112,6 @@
 			   curr_router->addr);
 	}
 
-	if (curr_router)
-		batadv_neigh_node_put(curr_router);
-
-	spin_lock_bh(&orig_node->neigh_list_lock);
-	/* curr_router used earlier may not be the current orig_ifinfo->router
-	 * anymore because it was dereferenced outside of the neigh_list_lock
-	 * protected region. After the new best neighbor has replace the current
-	 * best neighbor the reference counter needs to decrease. Consequently,
-	 * the code needs to ensure the curr_router variable contains a pointer
-	 * to the replaced best neighbor.
-	 */
-	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
-
-	/* increase refcount of new best neighbor */
-	if (neigh_node)
-		kref_get(&neigh_node->refcount);
-
-	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
-	spin_unlock_bh(&orig_node->neigh_list_lock);
-	batadv_orig_ifinfo_put(orig_ifinfo);
-
 	/* decrease refcount of previous best neighbor */
 	if (curr_router)
 		batadv_neigh_node_put(curr_router);
@@ -470,6 +461,29 @@
 }
 
 /**
+ * batadv_last_bonding_get - Get last_bonding_candidate of orig_node
+ * @orig_node: originator node whose last bonding candidate should be retrieved
+ *
+ * Return: last bonding candidate of router or NULL if not found
+ *
+ * The object is returned with refcounter increased by 1.
+ */
+static struct batadv_orig_ifinfo *
+batadv_last_bonding_get(struct batadv_orig_node *orig_node)
+{
+	struct batadv_orig_ifinfo *last_bonding_candidate;
+
+	spin_lock_bh(&orig_node->neigh_list_lock);
+	last_bonding_candidate = orig_node->last_bonding_candidate;
+
+	if (last_bonding_candidate)
+		kref_get(&last_bonding_candidate->refcount);
+	spin_unlock_bh(&orig_node->neigh_list_lock);
+
+	return last_bonding_candidate;
+}
+
+/**
  * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
  * @orig_node: originator node whose bonding candidates should be replaced
  * @new_candidate: new bonding candidate or NULL
@@ -539,7 +553,7 @@
 	 * router - obviously there are no other candidates.
 	 */
 	rcu_read_lock();
-	last_candidate = orig_node->last_bonding_candidate;
+	last_candidate = batadv_last_bonding_get(orig_node);
 	if (last_candidate)
 		last_cand_router = rcu_dereference(last_candidate->router);
 
@@ -631,6 +645,9 @@
 		batadv_orig_ifinfo_put(next_candidate);
 	}
 
+	if (last_candidate)
+		batadv_orig_ifinfo_put(last_candidate);
+
 	return router;
 }
 
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 6191159..8d4e1f5 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -315,8 +315,7 @@
  *
  * Wrap the given skb into a batman-adv unicast or unicast-4addr header
  * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied
- * as packet_type. Then send this frame to the given orig_node and release a
- * reference to this orig_node.
+ * as packet_type. Then send this frame to the given orig_node.
  *
  * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
  */
@@ -370,8 +369,6 @@
 		ret = NET_XMIT_SUCCESS;
 
 out:
-	if (orig_node)
-		batadv_orig_node_put(orig_node);
 	if (ret == NET_XMIT_DROP)
 		kfree_skb(skb);
 	return ret;
@@ -403,6 +400,7 @@
 	struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
 	struct batadv_orig_node *orig_node;
 	u8 *src, *dst;
+	int ret;
 
 	src = ethhdr->h_source;
 	dst = ethhdr->h_dest;
@@ -414,8 +412,13 @@
 	}
 	orig_node = batadv_transtable_search(bat_priv, src, dst, vid);
 
-	return batadv_send_skb_unicast(bat_priv, skb, packet_type,
-				       packet_subtype, orig_node, vid);
+	ret = batadv_send_skb_unicast(bat_priv, skb, packet_type,
+				      packet_subtype, orig_node, vid);
+
+	if (orig_node)
+		batadv_orig_node_put(orig_node);
+
+	return ret;
 }
 
 /**
@@ -433,12 +436,25 @@
 			   unsigned short vid)
 {
 	struct batadv_orig_node *orig_node;
+	int ret;
 
 	orig_node = batadv_gw_get_selected_orig(bat_priv);
-	return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
-				       BATADV_P_DATA, orig_node, vid);
+	ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR,
+				      BATADV_P_DATA, orig_node, vid);
+
+	if (orig_node)
+		batadv_orig_node_put(orig_node);
+
+	return ret;
 }
 
+/**
+ * batadv_forw_packet_free - free a forwarding packet
+ * @forw_packet: The packet to free
+ *
+ * This frees a forwarding packet and releases any resources it might
+ * have claimed.
+ */
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet)
 {
 	kfree_skb(forw_packet->skb);
@@ -446,9 +462,73 @@
 		batadv_hardif_put(forw_packet->if_incoming);
 	if (forw_packet->if_outgoing)
 		batadv_hardif_put(forw_packet->if_outgoing);
+	if (forw_packet->queue_left)
+		atomic_inc(forw_packet->queue_left);
 	kfree(forw_packet);
 }
 
+/**
+ * batadv_forw_packet_alloc - allocate a forwarding packet
+ * @if_incoming: The (optional) if_incoming to be grabbed
+ * @if_outgoing: The (optional) if_outgoing to be grabbed
+ * @queue_left: The (optional) queue counter to decrease
+ * @bat_priv: The bat_priv for the mesh of this forw_packet
+ *
+ * Allocates a forwarding packet and tries to get a reference to the
+ * (optional) if_incoming, if_outgoing and queue_left. If queue_left
+ * is NULL then bat_priv is optional, too.
+ *
+ * Return: An allocated forwarding packet on success, NULL otherwise.
+ */
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+			 struct batadv_hard_iface *if_outgoing,
+			 atomic_t *queue_left,
+			 struct batadv_priv *bat_priv)
+{
+	struct batadv_forw_packet *forw_packet;
+	const char *qname;
+
+	if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) {
+		qname = "unknown";
+
+		if (queue_left == &bat_priv->bcast_queue_left)
+			qname = "bcast";
+
+		if (queue_left == &bat_priv->batman_queue_left)
+			qname = "batman";
+
+		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+			   "%s queue is full\n", qname);
+
+		return NULL;
+	}
+
+	forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
+	if (!forw_packet)
+		goto err;
+
+	if (if_incoming)
+		kref_get(&if_incoming->refcount);
+
+	if (if_outgoing)
+		kref_get(&if_outgoing->refcount);
+
+	forw_packet->skb = NULL;
+	forw_packet->queue_left = queue_left;
+	forw_packet->if_incoming = if_incoming;
+	forw_packet->if_outgoing = if_outgoing;
+	forw_packet->num_packets = 0;
+
+	return forw_packet;
+
+err:
+	if (queue_left)
+		atomic_inc(queue_left);
+
+	return NULL;
+}
+
 static void
 _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv,
 				 struct batadv_forw_packet *forw_packet,
@@ -487,24 +567,20 @@
 	struct batadv_bcast_packet *bcast_packet;
 	struct sk_buff *newskb;
 
-	if (!batadv_atomic_dec_not_zero(&bat_priv->bcast_queue_left)) {
-		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
-			   "bcast packet queue full\n");
-		goto out;
-	}
-
 	primary_if = batadv_primary_if_get_selected(bat_priv);
 	if (!primary_if)
-		goto out_and_inc;
+		goto err;
 
-	forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC);
-
+	forw_packet = batadv_forw_packet_alloc(primary_if, NULL,
+					       &bat_priv->bcast_queue_left,
+					       bat_priv);
+	batadv_hardif_put(primary_if);
 	if (!forw_packet)
-		goto out_and_inc;
+		goto err;
 
 	newskb = skb_copy(skb, GFP_ATOMIC);
 	if (!newskb)
-		goto packet_free;
+		goto err_packet_free;
 
 	/* as we have a copy now, it is safe to decrease the TTL */
 	bcast_packet = (struct batadv_bcast_packet *)newskb->data;
@@ -513,11 +589,6 @@
 	skb_reset_mac_header(newskb);
 
 	forw_packet->skb = newskb;
-	forw_packet->if_incoming = primary_if;
-	forw_packet->if_outgoing = NULL;
-
-	/* how often did we send the bcast packet ? */
-	forw_packet->num_packets = 0;
 
 	INIT_DELAYED_WORK(&forw_packet->delayed_work,
 			  batadv_send_outstanding_bcast_packet);
@@ -525,13 +596,9 @@
 	_batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay);
 	return NETDEV_TX_OK;
 
-packet_free:
-	kfree(forw_packet);
-out_and_inc:
-	atomic_inc(&bat_priv->bcast_queue_left);
-out:
-	if (primary_if)
-		batadv_hardif_put(primary_if);
+err_packet_free:
+	batadv_forw_packet_free(forw_packet);
+err:
 	return NETDEV_TX_BUSY;
 }
 
@@ -592,7 +659,6 @@
 
 out:
 	batadv_forw_packet_free(forw_packet);
-	atomic_inc(&bat_priv->bcast_queue_left);
 }
 
 void
@@ -633,9 +699,6 @@
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
-			if (!forw_packet->own)
-				atomic_inc(&bat_priv->bcast_queue_left);
-
 			batadv_forw_packet_free(forw_packet);
 		}
 	}
@@ -663,9 +726,6 @@
 
 		if (pending) {
 			hlist_del(&forw_packet->list);
-			if (!forw_packet->own)
-				atomic_inc(&bat_priv->batman_queue_left);
-
 			batadv_forw_packet_free(forw_packet);
 		}
 	}
diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h
index 7cecb75..999f786 100644
--- a/net/batman-adv/send.h
+++ b/net/batman-adv/send.h
@@ -28,6 +28,12 @@
 struct sk_buff;
 
 void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet);
+struct batadv_forw_packet *
+batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming,
+			 struct batadv_hard_iface *if_outgoing,
+			 atomic_t *queue_left,
+			 struct batadv_priv *bat_priv);
+
 int batadv_send_skb_to_orig(struct sk_buff *skb,
 			    struct batadv_orig_node *orig_node,
 			    struct batadv_hard_iface *recv_if);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 7527c06..49e16b6 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/socket.h>
@@ -46,7 +47,6 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/workqueue.h>
 
 #include "bat_algo.h"
 #include "bridge_loop_avoidance.h"
@@ -57,6 +57,7 @@
 #include "hard-interface.h"
 #include "multicast.h"
 #include "network-coding.h"
+#include "originator.h"
 #include "packet.h"
 #include "send.h"
 #include "sysfs.h"
@@ -377,6 +378,8 @@
 dropped_freed:
 	batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
 end:
+	if (mcast_single_orig)
+		batadv_orig_node_put(mcast_single_orig);
 	if (primary_if)
 		batadv_hardif_put(primary_if);
 	return NETDEV_TX_OK;
@@ -591,6 +594,7 @@
 	}
 
 	spin_lock_bh(&bat_priv->softif_vlan_list_lock);
+	kref_get(&vlan->refcount);
 	hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list);
 	spin_unlock_bh(&bat_priv->softif_vlan_list_lock);
 
@@ -601,6 +605,9 @@
 			    bat_priv->soft_iface->dev_addr, vid,
 			    BATADV_NULL_IFINDEX, BATADV_NO_MARK);
 
+	/* don't return reference to new softif_vlan */
+	batadv_softif_vlan_put(vlan);
+
 	return 0;
 }
 
@@ -747,34 +754,6 @@
 }
 
 /**
- * batadv_softif_destroy_finish - cleans up the remains of a softif
- * @work: work queue item
- *
- * Free the parts of the soft interface which can not be removed under
- * rtnl lock (to prevent deadlock situations).
- */
-static void batadv_softif_destroy_finish(struct work_struct *work)
-{
-	struct batadv_softif_vlan *vlan;
-	struct batadv_priv *bat_priv;
-	struct net_device *soft_iface;
-
-	bat_priv = container_of(work, struct batadv_priv,
-				cleanup_work);
-	soft_iface = bat_priv->soft_iface;
-
-	/* destroy the "untagged" VLAN */
-	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
-	if (vlan) {
-		batadv_softif_destroy_vlan(bat_priv, vlan);
-		batadv_softif_vlan_put(vlan);
-	}
-
-	batadv_sysfs_del_meshif(soft_iface);
-	unregister_netdev(soft_iface);
-}
-
-/**
  * batadv_softif_init_late - late stage initialization of soft interface
  * @dev: registered network device to modify
  *
@@ -791,7 +770,6 @@
 
 	bat_priv = netdev_priv(dev);
 	bat_priv->soft_iface = dev;
-	INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);
 
 	/* batadv_interface_stats() needs to be available as soon as
 	 * register_netdevice() has been called
@@ -1028,8 +1006,19 @@
 void batadv_softif_destroy_sysfs(struct net_device *soft_iface)
 {
 	struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+	struct batadv_softif_vlan *vlan;
 
-	queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);
+	ASSERT_RTNL();
+
+	/* destroy the "untagged" VLAN */
+	vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS);
+	if (vlan) {
+		batadv_softif_destroy_vlan(bat_priv, vlan);
+		batadv_softif_vlan_put(vlan);
+	}
+
+	batadv_sysfs_del_meshif(soft_iface);
+	unregister_netdevice(soft_iface);
 }
 
 /**
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index fe9ca94..02d96f2 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -37,6 +37,7 @@
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/stringify.h>
+#include <linux/workqueue.h>
 
 #include "bridge_loop_avoidance.h"
 #include "distributed-arp-table.h"
@@ -428,6 +429,13 @@
 	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
 	int bytes_written;
 
+	/* GW mode is not available if the routing algorithm in use does not
+	 * implement the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -ENOENT;
+
 	switch (atomic_read(&bat_priv->gw.mode)) {
 	case BATADV_GW_MODE_CLIENT:
 		bytes_written = sprintf(buff, "%s\n",
@@ -455,6 +463,13 @@
 	char *curr_gw_mode_str;
 	int gw_mode_tmp = -1;
 
+	/* toggling GW mode is allowed only if the routing algorithm in use
+	 * provides the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -EINVAL;
+
 	if (buff[count - 1] == '\n')
 		buff[count - 1] = '\0';
 
@@ -514,6 +529,50 @@
 	return count;
 }
 
+static ssize_t batadv_show_gw_sel_class(struct kobject *kobj,
+					struct attribute *attr, char *buff)
+{
+	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+	/* GW selection class is not available if the routing algorithm in use
+	 * does not implement the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -ENOENT;
+
+	if (bat_priv->algo_ops->gw.show_sel_class)
+		return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff);
+
+	return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class));
+}
+
+static ssize_t batadv_store_gw_sel_class(struct kobject *kobj,
+					 struct attribute *attr, char *buff,
+					 size_t count)
+{
+	struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj);
+
+	/* setting the GW selection class is allowed only if the routing
+	 * algorithm in use implements the GW API
+	 */
+	if (!bat_priv->algo_ops->gw.get_best_gw_node ||
+	    !bat_priv->algo_ops->gw.is_eligible)
+		return -EINVAL;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if (bat_priv->algo_ops->gw.store_sel_class)
+		return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff,
+							      count);
+
+	return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE,
+					batadv_post_gw_reselect, attr,
+					&bat_priv->gw.sel_class,
+					bat_priv->soft_iface);
+}
+
 static ssize_t batadv_show_gw_bwidth(struct kobject *kobj,
 				     struct attribute *attr, char *buff)
 {
@@ -625,8 +684,8 @@
 		     2 * BATADV_JITTER, INT_MAX, NULL);
 BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0,
 		     BATADV_TQ_MAX_VALUE, NULL);
-BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1,
-		     BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect);
+static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class,
+		   batadv_store_gw_sel_class);
 static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,
 		   batadv_store_gw_bwidth);
 #ifdef CONFIG_BATMAN_ADV_MCAST
@@ -712,6 +771,8 @@
 	for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
 
+	kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+	kobject_del(bat_priv->mesh_obj);
 	kobject_put(bat_priv->mesh_obj);
 	bat_priv->mesh_obj = NULL;
 out:
@@ -726,6 +787,8 @@
 	for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr));
 
+	kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE);
+	kobject_del(bat_priv->mesh_obj);
 	kobject_put(bat_priv->mesh_obj);
 	bat_priv->mesh_obj = NULL;
 }
@@ -781,6 +844,10 @@
 	for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
 
+	if (vlan->kobj != bat_priv->mesh_obj) {
+		kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+		kobject_del(vlan->kobj);
+	}
 	kobject_put(vlan->kobj);
 	vlan->kobj = NULL;
 out:
@@ -800,6 +867,10 @@
 	for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr)
 		sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr));
 
+	if (vlan->kobj != bat_priv->mesh_obj) {
+		kobject_uevent(vlan->kobj, KOBJ_REMOVE);
+		kobject_del(vlan->kobj);
+	}
 	kobject_put(vlan->kobj);
 	vlan->kobj = NULL;
 }
@@ -828,31 +899,31 @@
 	return length;
 }
 
-static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
-				       struct attribute *attr, char *buff,
-				       size_t count)
+/**
+ * batadv_store_mesh_iface_finish - store new hardif mesh_iface state
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @ifname: name of soft-interface to modify
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ *
+ * Return: 0 on success, 0 < on failure
+ */
+static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
+					  char ifname[IFNAMSIZ])
 {
-	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
 	struct net *net = dev_net(net_dev);
 	struct batadv_hard_iface *hard_iface;
-	int status_tmp = -1;
-	int ret = count;
+	int status_tmp;
+	int ret = 0;
+
+	ASSERT_RTNL();
 
 	hard_iface = batadv_hardif_get_by_netdev(net_dev);
 	if (!hard_iface)
-		return count;
+		return 0;
 
-	if (buff[count - 1] == '\n')
-		buff[count - 1] = '\0';
-
-	if (strlen(buff) >= IFNAMSIZ) {
-		pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
-		       buff);
-		batadv_hardif_put(hard_iface);
-		return -EINVAL;
-	}
-
-	if (strncmp(buff, "none", 4) == 0)
+	if (strncmp(ifname, "none", 4) == 0)
 		status_tmp = BATADV_IF_NOT_IN_USE;
 	else
 		status_tmp = BATADV_IF_I_WANT_YOU;
@@ -861,15 +932,13 @@
 		goto out;
 
 	if ((hard_iface->soft_iface) &&
-	    (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0))
+	    (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
 		goto out;
 
-	rtnl_lock();
-
 	if (status_tmp == BATADV_IF_NOT_IN_USE) {
 		batadv_hardif_disable_interface(hard_iface,
 						BATADV_IF_CLEANUP_AUTO);
-		goto unlock;
+		goto out;
 	}
 
 	/* if the interface already is in use */
@@ -877,15 +946,71 @@
 		batadv_hardif_disable_interface(hard_iface,
 						BATADV_IF_CLEANUP_AUTO);
 
-	ret = batadv_hardif_enable_interface(hard_iface, net, buff);
-
-unlock:
-	rtnl_unlock();
+	ret = batadv_hardif_enable_interface(hard_iface, net, ifname);
 out:
 	batadv_hardif_put(hard_iface);
 	return ret;
 }
 
+/**
+ * batadv_store_mesh_iface_work - store new hardif mesh_iface state
+ * @work: work queue item
+ *
+ * Changes the parts of the hard+soft interface which can not be modified under
+ * sysfs lock (to prevent deadlock situations).
+ */
+static void batadv_store_mesh_iface_work(struct work_struct *work)
+{
+	struct batadv_store_mesh_work *store_work;
+	int ret;
+
+	store_work = container_of(work, struct batadv_store_mesh_work, work);
+
+	rtnl_lock();
+	ret = batadv_store_mesh_iface_finish(store_work->net_dev,
+					     store_work->soft_iface_name);
+	rtnl_unlock();
+
+	if (ret < 0)
+		pr_err("Failed to store new mesh_iface state %s for %s: %d\n",
+		       store_work->soft_iface_name, store_work->net_dev->name,
+		       ret);
+
+	dev_put(store_work->net_dev);
+	kfree(store_work);
+}
+
+static ssize_t batadv_store_mesh_iface(struct kobject *kobj,
+				       struct attribute *attr, char *buff,
+				       size_t count)
+{
+	struct net_device *net_dev = batadv_kobj_to_netdev(kobj);
+	struct batadv_store_mesh_work *store_work;
+
+	if (buff[count - 1] == '\n')
+		buff[count - 1] = '\0';
+
+	if (strlen(buff) >= IFNAMSIZ) {
+		pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n",
+		       buff);
+		return -EINVAL;
+	}
+
+	store_work = kmalloc(sizeof(*store_work), GFP_KERNEL);
+	if (!store_work)
+		return -ENOMEM;
+
+	dev_hold(net_dev);
+	INIT_WORK(&store_work->work, batadv_store_mesh_iface_work);
+	store_work->net_dev = net_dev;
+	strlcpy(store_work->soft_iface_name, buff,
+		sizeof(store_work->soft_iface_name));
+
+	queue_work(batadv_event_workqueue, &store_work->work);
+
+	return count;
+}
+
 static ssize_t batadv_show_iface_status(struct kobject *kobj,
 					struct attribute *attr, char *buff)
 {
@@ -1048,6 +1173,8 @@
 
 void batadv_sysfs_del_hardif(struct kobject **hardif_obj)
 {
+	kobject_uevent(*hardif_obj, KOBJ_REMOVE);
+	kobject_del(*hardif_obj);
 	kobject_put(*hardif_obj);
 	*hardif_obj = NULL;
 }
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 7e6df7a..7f66309 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -22,12 +22,14 @@
 #include <linux/bitops.h>
 #include <linux/bug.h>
 #include <linux/byteorder/generic.h>
+#include <linux/cache.h>
 #include <linux/compiler.h>
 #include <linux/crc32c.h>
 #include <linux/errno.h>
 #include <linux/etherdevice.h>
 #include <linux/fs.h>
 #include <linux/if_ether.h>
+#include <linux/init.h>
 #include <linux/jhash.h>
 #include <linux/jiffies.h>
 #include <linux/kernel.h>
@@ -35,25 +37,39 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/rculist.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
+#include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/stddef.h>
 #include <linux/string.h>
 #include <linux/workqueue.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <uapi/linux/batman_adv.h>
 
 #include "bridge_loop_avoidance.h"
 #include "hard-interface.h"
 #include "hash.h"
 #include "log.h"
 #include "multicast.h"
+#include "netlink.h"
 #include "originator.h"
 #include "packet.h"
 #include "soft-interface.h"
 #include "tvlv.h"
 
+static struct kmem_cache *batadv_tl_cache __read_mostly;
+static struct kmem_cache *batadv_tg_cache __read_mostly;
+static struct kmem_cache *batadv_tt_orig_cache __read_mostly;
+static struct kmem_cache *batadv_tt_change_cache __read_mostly;
+static struct kmem_cache *batadv_tt_req_cache __read_mostly;
+static struct kmem_cache *batadv_tt_roam_cache __read_mostly;
+
 /* hash class keys */
 static struct lock_class_key batadv_tt_local_hash_lock_class_key;
 static struct lock_class_key batadv_tt_global_hash_lock_class_key;
@@ -205,6 +221,20 @@
 }
 
 /**
+ * batadv_tt_local_entry_free_rcu - free the tt_local_entry
+ * @rcu: rcu pointer of the tt_local_entry
+ */
+static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_local_entry *tt_local_entry;
+
+	tt_local_entry = container_of(rcu, struct batadv_tt_local_entry,
+				      common.rcu);
+
+	kmem_cache_free(batadv_tl_cache, tt_local_entry);
+}
+
+/**
  * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
  *  for free after rcu grace period
  * @ref: kref pointer of the nc_node
@@ -218,7 +248,7 @@
 
 	batadv_softif_vlan_put(tt_local_entry->vlan);
 
-	kfree_rcu(tt_local_entry, common.rcu);
+	call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu);
 }
 
 /**
@@ -234,6 +264,20 @@
 }
 
 /**
+ * batadv_tt_global_entry_free_rcu - free the tt_global_entry
+ * @rcu: rcu pointer of the tt_global_entry
+ */
+static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_global_entry *tt_global_entry;
+
+	tt_global_entry = container_of(rcu, struct batadv_tt_global_entry,
+				       common.rcu);
+
+	kmem_cache_free(batadv_tg_cache, tt_global_entry);
+}
+
+/**
  * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
  *  for free after rcu grace period
  * @ref: kref pointer of the nc_node
@@ -246,7 +290,8 @@
 				       common.refcount);
 
 	batadv_tt_global_del_orig_list(tt_global_entry);
-	kfree_rcu(tt_global_entry, common.rcu);
+
+	call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu);
 }
 
 /**
@@ -384,6 +429,19 @@
 }
 
 /**
+ * batadv_tt_orig_list_entry_free_rcu - free the orig_entry
+ * @rcu: rcu pointer of the orig_entry
+ */
+static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu)
+{
+	struct batadv_tt_orig_list_entry *orig_entry;
+
+	orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu);
+
+	kmem_cache_free(batadv_tt_orig_cache, orig_entry);
+}
+
+/**
  * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
  *  queue for free after rcu grace period
  * @ref: kref pointer of the tt orig entry
@@ -396,7 +454,7 @@
 				  refcount);
 
 	batadv_orig_node_put(orig_entry->orig_node);
-	kfree_rcu(orig_entry, rcu);
+	call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu);
 }
 
 /**
@@ -426,7 +484,7 @@
 	bool event_removed = false;
 	bool del_op_requested, del_op_entry;
 
-	tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC);
+	tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC);
 	if (!tt_change_node)
 		return;
 
@@ -467,8 +525,8 @@
 		continue;
 del:
 		list_del(&entry->list);
-		kfree(entry);
-		kfree(tt_change_node);
+		kmem_cache_free(batadv_tt_change_cache, entry);
+		kmem_cache_free(batadv_tt_change_cache, tt_change_node);
 		event_removed = true;
 		goto unlock;
 	}
@@ -646,7 +704,7 @@
 		goto out;
 	}
 
-	tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC);
+	tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC);
 	if (!tt_local)
 		goto out;
 
@@ -656,7 +714,7 @@
 		net_ratelimited_function(batadv_info, soft_iface,
 					 "adding TT local entry %pM to non-existent VLAN %d\n",
 					 addr, BATADV_PRINT_VID(vid));
-		kfree(tt_local);
+		kmem_cache_free(batadv_tl_cache, tt_local);
 		tt_local = NULL;
 		goto out;
 	}
@@ -676,7 +734,6 @@
 	if (batadv_is_wifi_netdev(in_dev))
 		tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
 	kref_init(&tt_local->common.refcount);
-	kref_get(&tt_local->common.refcount);
 	tt_local->last_seen = jiffies;
 	tt_local->common.added_at = tt_local->last_seen;
 	tt_local->vlan = vlan;
@@ -688,6 +745,7 @@
 	    is_multicast_ether_addr(addr))
 		tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE;
 
+	kref_get(&tt_local->common.refcount);
 	hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt,
 				     batadv_choose_tt, &tt_local->common,
 				     &tt_local->common.hash_entry);
@@ -959,7 +1017,7 @@
 			tt_diff_entries_count++;
 		}
 		list_del(&entry->list);
-		kfree(entry);
+		kmem_cache_free(batadv_tt_change_cache, entry);
 	}
 	spin_unlock_bh(&bat_priv->tt.changes_list_lock);
 
@@ -989,6 +1047,7 @@
 	kfree(tt_data);
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset)
 {
 	struct net_device *net_dev = (struct net_device *)seq->private;
@@ -1056,6 +1115,165 @@
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_tt_local_dump_entry - Dump one TT local entry into a message
+ * @msg :Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			   struct batadv_priv *bat_priv,
+			   struct batadv_tt_common_entry *common)
+{
+	void *hdr;
+	struct batadv_softif_vlan *vlan;
+	struct batadv_tt_local_entry *local;
+	unsigned int last_seen_msecs;
+	u32 crc;
+
+	local = container_of(common, struct batadv_tt_local_entry, common);
+	last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen);
+
+	vlan = batadv_softif_vlan_get(bat_priv, common->vid);
+	if (!vlan)
+		return 0;
+
+	crc = vlan->tt.crc;
+
+	batadv_softif_vlan_put(vlan);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI,
+			  BATADV_CMD_GET_TRANSTABLE_LOCAL);
+	if (!hdr)
+		return -ENOBUFS;
+
+	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+		goto nla_put_failure;
+
+	if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) &&
+	    nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the local tt entries
+ * @idx_s: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct hlist_head *head, int *idx_s)
+{
+	struct batadv_tt_common_entry *common;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(common, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv,
+					       common)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_local_dump - Dump TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or 0 on success
+ */
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_hashtable *hash;
+	struct hlist_head *head;
+	int ret;
+	int ifindex;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hash = bat_priv->tt.local_hash;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq,
+						bat_priv, head, &idx))
+			break;
+
+		bucket++;
+	}
+
+	ret = msg->len;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+
+	return ret;
+}
 
 static void
 batadv_tt_local_set_pending(struct batadv_priv *bat_priv,
@@ -1259,7 +1477,7 @@
 	list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list,
 				 list) {
 		list_del(&entry->list);
-		kfree(entry);
+		kmem_cache_free(batadv_tt_change_cache, entry);
 	}
 
 	atomic_set(&bat_priv->tt.local_changes, 0);
@@ -1341,7 +1559,7 @@
 		goto out;
 	}
 
-	orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC);
+	orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC);
 	if (!orig_entry)
 		goto out;
 
@@ -1351,9 +1569,9 @@
 	orig_entry->orig_node = orig_node;
 	orig_entry->ttvn = ttvn;
 	kref_init(&orig_entry->refcount);
-	kref_get(&orig_entry->refcount);
 
 	spin_lock_bh(&tt_global->list_lock);
+	kref_get(&orig_entry->refcount);
 	hlist_add_head_rcu(&orig_entry->list,
 			   &tt_global->orig_list);
 	spin_unlock_bh(&tt_global->list_lock);
@@ -1411,7 +1629,8 @@
 		goto out;
 
 	if (!tt_global_entry) {
-		tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC);
+		tt_global_entry = kmem_cache_zalloc(batadv_tg_cache,
+						    GFP_ATOMIC);
 		if (!tt_global_entry)
 			goto out;
 
@@ -1428,13 +1647,13 @@
 		if (flags & BATADV_TT_CLIENT_ROAM)
 			tt_global_entry->roam_at = jiffies;
 		kref_init(&common->refcount);
-		kref_get(&common->refcount);
 		common->added_at = jiffies;
 
 		INIT_HLIST_HEAD(&tt_global_entry->orig_list);
 		atomic_set(&tt_global_entry->orig_list_count, 0);
 		spin_lock_init(&tt_global_entry->list_lock);
 
+		kref_get(&common->refcount);
 		hash_added = batadv_hash_add(bat_priv->tt.global_hash,
 					     batadv_compare_tt,
 					     batadv_choose_tt, common,
@@ -1579,6 +1798,7 @@
 	return best_entry;
 }
 
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 /**
  * batadv_tt_global_print_entry - print all orig nodes who announce the address
  *  for this global entry
@@ -1702,6 +1922,219 @@
 		batadv_hardif_put(primary_if);
 	return 0;
 }
+#endif
+
+/**
+ * batadv_tt_global_dump_subentry - Dump all TT local entries into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @common: tt local & tt global common data
+ * @orig: Originator node announcing a non-mesh client
+ * @best: Is the best originator for the TT entry
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
+			       struct batadv_tt_common_entry *common,
+			       struct batadv_tt_orig_list_entry *orig,
+			       bool best)
+{
+	void *hdr;
+	struct batadv_orig_node_vlan *vlan;
+	u8 last_ttvn;
+	u32 crc;
+
+	vlan = batadv_orig_node_vlan_get(orig->orig_node,
+					 common->vid);
+	if (!vlan)
+		return 0;
+
+	crc = vlan->tt.crc;
+
+	batadv_orig_node_vlan_put(vlan);
+
+	hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family,
+			  NLM_F_MULTI,
+			  BATADV_CMD_GET_TRANSTABLE_GLOBAL);
+	if (!hdr)
+		return -ENOBUFS;
+
+	last_ttvn = atomic_read(&orig->orig_node->last_ttvn);
+
+	if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) ||
+	    nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN,
+		    orig->orig_node->orig) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) ||
+	    nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
+	    nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
+	    nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+		goto nla_put_failure;
+
+	if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+/**
+ * batadv_tt_global_dump_entry - Dump one TT global entry into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @common: tt local & tt global common data
+ * @sub_s: Number of entries to skip
+ *
+ * This function assumes the caller holds rcu_read_lock().
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
+			    struct batadv_priv *bat_priv,
+			    struct batadv_tt_common_entry *common, int *sub_s)
+{
+	struct batadv_tt_orig_list_entry *orig_entry, *best_entry;
+	struct batadv_tt_global_entry *global;
+	struct hlist_head *head;
+	int sub = 0;
+	bool best;
+
+	global = container_of(common, struct batadv_tt_global_entry, common);
+	best_entry = batadv_transtable_best_orig(bat_priv, global);
+	head = &global->orig_list;
+
+	hlist_for_each_entry_rcu(orig_entry, head, list) {
+		if (sub++ < *sub_s)
+			continue;
+
+		best = (orig_entry == best_entry);
+
+		if (batadv_tt_global_dump_subentry(msg, portid, seq, common,
+						   orig_entry, best)) {
+			*sub_s = sub - 1;
+			return -EMSGSIZE;
+		}
+	}
+
+	*sub_s = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message
+ * @msg: Netlink message to dump into
+ * @portid: Port making netlink request
+ * @seq: Sequence number of netlink message
+ * @bat_priv: The bat priv with all the soft interface information
+ * @head: Pointer to the list containing the global tt entries
+ * @idx_s: Number of entries to skip
+ * @sub: Number of entries to skip
+ *
+ * Return: Error code, or 0 on success
+ */
+static int
+batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq,
+			     struct batadv_priv *bat_priv,
+			     struct hlist_head *head, int *idx_s, int *sub)
+{
+	struct batadv_tt_common_entry *common;
+	int idx = 0;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(common, head, hash_entry) {
+		if (idx++ < *idx_s)
+			continue;
+
+		if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv,
+						common, sub)) {
+			rcu_read_unlock();
+			*idx_s = idx - 1;
+			return -EMSGSIZE;
+		}
+	}
+	rcu_read_unlock();
+
+	*idx_s = 0;
+	*sub = 0;
+	return 0;
+}
+
+/**
+ * batadv_tt_global_dump -  Dump TT global entries into a message
+ * @msg: Netlink message to dump into
+ * @cb: Parameters from query
+ *
+ * Return: Error code, or length of message on success
+ */
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(cb->skb->sk);
+	struct net_device *soft_iface;
+	struct batadv_priv *bat_priv;
+	struct batadv_hard_iface *primary_if = NULL;
+	struct batadv_hashtable *hash;
+	struct hlist_head *head;
+	int ret;
+	int ifindex;
+	int bucket = cb->args[0];
+	int idx = cb->args[1];
+	int sub = cb->args[2];
+	int portid = NETLINK_CB(cb->skb).portid;
+
+	ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX);
+	if (!ifindex)
+		return -EINVAL;
+
+	soft_iface = dev_get_by_index(net, ifindex);
+	if (!soft_iface || !batadv_softif_is_valid(soft_iface)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	bat_priv = netdev_priv(soft_iface);
+
+	primary_if = batadv_primary_if_get_selected(bat_priv);
+	if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	hash = bat_priv->tt.global_hash;
+
+	while (bucket < hash->size) {
+		head = &hash->table[bucket];
+
+		if (batadv_tt_global_dump_bucket(msg, portid,
+						 cb->nlh->nlmsg_seq, bat_priv,
+						 head, &idx, &sub))
+			break;
+
+		bucket++;
+	}
+
+	ret = msg->len;
+
+ out:
+	if (primary_if)
+		batadv_hardif_put(primary_if);
+	if (soft_iface)
+		dev_put(soft_iface);
+
+	cb->args[0] = bucket;
+	cb->args[1] = idx;
+	cb->args[2] = sub;
+
+	return ret;
+}
 
 /**
  * _batadv_tt_global_del_orig_entry - remove and free an orig_entry
@@ -2280,7 +2713,7 @@
 
 	tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount);
 
-	kfree(tt_req_node);
+	kmem_cache_free(batadv_tt_req_cache, tt_req_node);
 }
 
 /**
@@ -2367,7 +2800,7 @@
 			goto unlock;
 	}
 
-	tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC);
+	tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC);
 	if (!tt_req_node)
 		goto unlock;
 
@@ -3104,7 +3537,7 @@
 
 	list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) {
 		list_del(&node->list);
-		kfree(node);
+		kmem_cache_free(batadv_tt_roam_cache, node);
 	}
 
 	spin_unlock_bh(&bat_priv->tt.roam_list_lock);
@@ -3121,7 +3554,7 @@
 			continue;
 
 		list_del(&node->list);
-		kfree(node);
+		kmem_cache_free(batadv_tt_roam_cache, node);
 	}
 	spin_unlock_bh(&bat_priv->tt.roam_list_lock);
 }
@@ -3162,7 +3595,8 @@
 	}
 
 	if (!ret) {
-		tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC);
+		tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache,
+						GFP_ATOMIC);
 		if (!tt_roam_node)
 			goto unlock;
 
@@ -3865,3 +4299,85 @@
 
 	return ret;
 }
+
+/**
+ * batadv_tt_cache_init - Initialize tt memory object cache
+ *
+ * Return: 0 on success or negative error number in case of failure.
+ */
+int __init batadv_tt_cache_init(void)
+{
+	size_t tl_size = sizeof(struct batadv_tt_local_entry);
+	size_t tg_size = sizeof(struct batadv_tt_global_entry);
+	size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry);
+	size_t tt_change_size = sizeof(struct batadv_tt_change_node);
+	size_t tt_req_size = sizeof(struct batadv_tt_req_node);
+	size_t tt_roam_size = sizeof(struct batadv_tt_roam_node);
+
+	batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0,
+					    SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tl_cache)
+		return -ENOMEM;
+
+	batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0,
+					    SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tg_cache)
+		goto err_tt_tl_destroy;
+
+	batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache",
+						 tt_orig_size, 0,
+						 SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_orig_cache)
+		goto err_tt_tg_destroy;
+
+	batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache",
+						   tt_change_size, 0,
+						   SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_change_cache)
+		goto err_tt_orig_destroy;
+
+	batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache",
+						tt_req_size, 0,
+						SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_req_cache)
+		goto err_tt_change_destroy;
+
+	batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache",
+						 tt_roam_size, 0,
+						 SLAB_HWCACHE_ALIGN, NULL);
+	if (!batadv_tt_roam_cache)
+		goto err_tt_req_destroy;
+
+	return 0;
+
+err_tt_req_destroy:
+	kmem_cache_destroy(batadv_tt_req_cache);
+	batadv_tt_req_cache = NULL;
+err_tt_change_destroy:
+	kmem_cache_destroy(batadv_tt_change_cache);
+	batadv_tt_change_cache = NULL;
+err_tt_orig_destroy:
+	kmem_cache_destroy(batadv_tt_orig_cache);
+	batadv_tt_orig_cache = NULL;
+err_tt_tg_destroy:
+	kmem_cache_destroy(batadv_tg_cache);
+	batadv_tg_cache = NULL;
+err_tt_tl_destroy:
+	kmem_cache_destroy(batadv_tl_cache);
+	batadv_tl_cache = NULL;
+
+	return -ENOMEM;
+}
+
+/**
+ * batadv_tt_cache_destroy - Destroy tt memory object cache
+ */
+void batadv_tt_cache_destroy(void)
+{
+	kmem_cache_destroy(batadv_tl_cache);
+	kmem_cache_destroy(batadv_tg_cache);
+	kmem_cache_destroy(batadv_tt_orig_cache);
+	kmem_cache_destroy(batadv_tt_change_cache);
+	kmem_cache_destroy(batadv_tt_req_cache);
+	kmem_cache_destroy(batadv_tt_roam_cache);
+}
diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h
index 7c7e2c0..783fdba 100644
--- a/net/batman-adv/translation-table.h
+++ b/net/batman-adv/translation-table.h
@@ -22,8 +22,10 @@
 
 #include <linux/types.h>
 
+struct netlink_callback;
 struct net_device;
 struct seq_file;
+struct sk_buff;
 
 int batadv_tt_init(struct batadv_priv *bat_priv);
 bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
@@ -33,6 +35,8 @@
 			   const char *message, bool roaming);
 int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset);
 int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset);
+int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb);
+int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb);
 void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
 			       struct batadv_orig_node *orig_node,
 			       s32 match_vid, const char *message);
@@ -59,4 +63,7 @@
 bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
 				  const u8 *addr, unsigned short vid);
 
+int batadv_tt_cache_init(void);
+void batadv_tt_cache_destroy(void);
+
 #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 3d1cf0f..77654f0 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -257,8 +257,13 @@
 	spin_lock_bh(&bat_priv->tvlv.container_list_lock);
 	tvlv_old = batadv_tvlv_container_get(bat_priv, type, version);
 	batadv_tvlv_container_remove(bat_priv, tvlv_old);
+
+	kref_get(&tvlv_new->refcount);
 	hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list);
 	spin_unlock_bh(&bat_priv->tvlv.container_list_lock);
+
+	/* don't return reference to new tvlv_container */
+	batadv_tvlv_container_put(tvlv_new);
 }
 
 /**
@@ -542,8 +547,12 @@
 	INIT_HLIST_NODE(&tvlv_handler->list);
 
 	spin_lock_bh(&bat_priv->tvlv.handler_list_lock);
+	kref_get(&tvlv_handler->refcount);
 	hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list);
 	spin_unlock_bh(&bat_priv->tvlv.handler_list_lock);
+
+	/* don't return reference to new tvlv_handler */
+	batadv_tvlv_handler_put(tvlv_handler);
 }
 
 /**
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index a64522c..b3dd1a3 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -28,6 +28,7 @@
 #include <linux/if_ether.h>
 #include <linux/kref.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/sched.h> /* for linux/wait.h */
 #include <linux/spinlock.h>
 #include <linux/types.h>
@@ -132,7 +133,6 @@
  * @rcu: struct used for freeing in an RCU-safe manner
  * @bat_iv: per hard-interface B.A.T.M.A.N. IV data
  * @bat_v: per hard-interface B.A.T.M.A.N. V data
- * @cleanup_work: work queue callback item for hard-interface deinit
  * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs
  * @neigh_list: list of unique single hop neighbors via this interface
  * @neigh_list_lock: lock protecting neigh_list
@@ -152,7 +152,6 @@
 #ifdef CONFIG_BATMAN_ADV_BATMAN_V
 	struct batadv_hard_iface_bat_v bat_v;
 #endif
-	struct work_struct cleanup_work;
 	struct dentry *debug_dir;
 	struct hlist_head neigh_list;
 	/* neigh_list_lock protects: neigh_list */
@@ -1015,7 +1014,6 @@
  * @forw_bcast_list_lock: lock protecting forw_bcast_list
  * @tp_list_lock: spinlock protecting @tp_list
  * @orig_work: work queue callback item for orig node purging
- * @cleanup_work: work queue callback item for soft-interface deinit
  * @primary_if: one of the hard-interfaces assigned to this mesh interface
  *  becomes the primary interface
  * @algo_ops: routing algorithm used by this mesh interface
@@ -1074,7 +1072,6 @@
 	spinlock_t tp_list_lock; /* protects tp_list */
 	atomic_t tp_num;
 	struct delayed_work orig_work;
-	struct work_struct cleanup_work;
 	struct batadv_hard_iface __rcu *primary_if;  /* rcu protected pointer */
 	struct batadv_algo_ops *algo_ops;
 	struct hlist_head softif_vlan_list;
@@ -1379,6 +1376,7 @@
  *  locally generated packet
  * @if_outgoing: packet where the packet should be sent to, or NULL if
  *  unspecified
+ * @queue_left: The queue (counter) this packet was applied to
  */
 struct batadv_forw_packet {
 	struct hlist_node list;
@@ -1391,11 +1389,13 @@
 	struct delayed_work delayed_work;
 	struct batadv_hard_iface *if_incoming;
 	struct batadv_hard_iface *if_outgoing;
+	atomic_t *queue_left;
 };
 
 /**
  * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific)
  * @activate: start routing mechanisms when hard-interface is brought up
+ *  (optional)
  * @enable: init routing info when hard-interface is enabled
  * @disable: de-init routing info when hard-interface is disabled
  * @update_mac: (re-)init mac addresses of the protocol information
@@ -1413,11 +1413,13 @@
 /**
  * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific)
  * @hardif_init: called on creation of single hop entry
+ *  (optional)
  * @cmp: compare the metrics of two neighbors for their respective outgoing
  *  interfaces
  * @is_similar_or_better: check if neigh1 is equally similar or better than
  *  neigh2 for their respective outgoing interface from the metric prospective
  * @print: print the single hop neighbor list (optional)
+ * @dump: dump neighbors to a netlink socket (optional)
  */
 struct batadv_algo_neigh_ops {
 	void (*hardif_init)(struct batadv_hardif_neigh_node *neigh);
@@ -1429,26 +1431,64 @@
 				     struct batadv_hard_iface *if_outgoing1,
 				     struct batadv_neigh_node *neigh2,
 				     struct batadv_hard_iface *if_outgoing2);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq);
+#endif
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv,
+		     struct batadv_hard_iface *hard_iface);
 };
 
 /**
  * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific)
  * @free: free the resources allocated by the routing algorithm for an orig_node
- *  object
+ *  object (optional)
  * @add_if: ask the routing algorithm to apply the needed changes to the
- *  orig_node due to a new hard-interface being added into the mesh
+ *  orig_node due to a new hard-interface being added into the mesh (optional)
  * @del_if: ask the routing algorithm to apply the needed changes to the
- *  orig_node due to an hard-interface being removed from the mesh
+ *  orig_node due to an hard-interface being removed from the mesh (optional)
  * @print: print the originator table (optional)
+ * @dump: dump originators to a netlink socket (optional)
  */
 struct batadv_algo_orig_ops {
 	void (*free)(struct batadv_orig_node *orig_node);
 	int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num);
 	int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num,
 		      int del_if_num);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
 	void (*print)(struct batadv_priv *priv, struct seq_file *seq,
 		      struct batadv_hard_iface *hard_iface);
+#endif
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv,
+		     struct batadv_hard_iface *hard_iface);
+};
+
+/**
+ * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific)
+ * @store_sel_class: parse and stores a new GW selection class (optional)
+ * @show_sel_class: prints the current GW selection class (optional)
+ * @get_best_gw_node: select the best GW from the list of available nodes
+ *  (optional)
+ * @is_eligible: check if a newly discovered GW is a potential candidate for
+ *  the election as best GW (optional)
+ * @print: print the gateway table (optional)
+ * @dump: dump gateways to a netlink socket (optional)
+ */
+struct batadv_algo_gw_ops {
+	ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff,
+				   size_t count);
+	ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff);
+	struct batadv_gw_node *(*get_best_gw_node)
+		(struct batadv_priv *bat_priv);
+	bool (*is_eligible)(struct batadv_priv *bat_priv,
+			    struct batadv_orig_node *curr_gw_orig,
+			    struct batadv_orig_node *orig_node);
+#ifdef CONFIG_BATMAN_ADV_DEBUGFS
+	void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq);
+#endif
+	void (*dump)(struct sk_buff *msg, struct netlink_callback *cb,
+		     struct batadv_priv *priv);
 };
 
 /**
@@ -1458,6 +1498,7 @@
  * @iface: callbacks related to interface handling
  * @neigh: callbacks related to neighbors handling
  * @orig: callbacks related to originators handling
+ * @gw: callbacks related to GW mode
  */
 struct batadv_algo_ops {
 	struct hlist_node list;
@@ -1465,6 +1506,7 @@
 	struct batadv_algo_iface_ops iface;
 	struct batadv_algo_neigh_ops neigh;
 	struct batadv_algo_orig_ops orig;
+	struct batadv_algo_gw_ops gw;
 };
 
 /**
@@ -1564,4 +1606,17 @@
 	BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2),
 };
 
+/**
+ * struct batadv_store_mesh_work - Work queue item to detach add/del interface
+ *  from sysfs locks
+ * @net_dev: netdevice to add/remove to/from batman-adv soft-interface
+ * @soft_iface_name: name of soft-interface to modify
+ * @work: work queue item
+ */
+struct batadv_store_mesh_work {
+	struct net_device *net_dev;
+	char soft_iface_name[IFNAMSIZ];
+	struct work_struct work;
+};
+
 #endif /* _NET_BATMAN_ADV_TYPES_H_ */
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ece45e0..1aff2da 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -26,11 +26,13 @@
 
 #include <linux/module.h>
 #include <linux/debugfs.h>
+#include <linux/stringify.h>
 #include <asm/ioctls.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <linux/proc_fs.h>
 
+#include "leds.h"
 #include "selftest.h"
 
 /* Bluetooth sockets */
@@ -250,7 +252,7 @@
 
 	skb_free_datagram(sk, skb);
 
-	if (msg->msg_flags & MSG_TRUNC)
+	if (flags & MSG_TRUNC)
 		copied = skblen;
 
 	return err ? : copied;
@@ -712,13 +714,16 @@
 struct dentry *bt_debugfs;
 EXPORT_SYMBOL_GPL(bt_debugfs);
 
+#define VERSION __stringify(BT_SUBSYS_VERSION) "." \
+		__stringify(BT_SUBSYS_REVISION)
+
 static int __init bt_init(void)
 {
 	int err;
 
 	sock_skb_cb_check_size(sizeof(struct bt_skb_cb));
 
-	BT_INFO("Core ver %s", BT_SUBSYS_VERSION);
+	BT_INFO("Core ver %s", VERSION);
 
 	err = bt_selftest();
 	if (err < 0)
@@ -726,6 +731,8 @@
 
 	bt_debugfs = debugfs_create_dir("bluetooth", NULL);
 
+	bt_leds_init();
+
 	err = bt_sysfs_init();
 	if (err < 0)
 		return err;
@@ -785,6 +792,8 @@
 
 	bt_sysfs_cleanup();
 
+	bt_leds_cleanup();
+
 	debugfs_remove_recursive(bt_debugfs);
 }
 
@@ -792,7 +801,7 @@
 module_exit(bt_exit);
 
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
-MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION);
-MODULE_VERSION(BT_SUBSYS_VERSION);
+MODULE_DESCRIPTION("Bluetooth Core ver " VERSION);
+MODULE_VERSION(VERSION);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_BLUETOOTH);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ddf8432..3ac89e9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1562,6 +1562,7 @@
 	auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
 
 	if (!auto_off && hdev->dev_type == HCI_PRIMARY &&
+	    !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
 	    hci_dev_test_flag(hdev, HCI_MGMT))
 		__mgmt_power_off(hdev);
 
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index c045b3c..c813568 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -262,6 +262,8 @@
 		break;
 	}
 
+	kfree_skb(hdev->req_skb);
+	hdev->req_skb = NULL;
 	hdev->req_status = hdev->req_result = 0;
 
 	BT_DBG("%s end: err %d", hdev->name, err);
@@ -969,14 +971,14 @@
 	hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable);
 }
 
-static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
+static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len)
 {
-	u8 ad_len = 0;
 	size_t name_len;
+	int max_len;
 
+	max_len = HCI_MAX_AD_LENGTH - ad_len - 2;
 	name_len = strlen(hdev->dev_name);
-	if (name_len > 0) {
-		size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2;
+	if (name_len > 0 && max_len > 0) {
 
 		if (name_len > max_len) {
 			name_len = max_len;
@@ -995,22 +997,42 @@
 	return ad_len;
 }
 
+static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr)
+{
+	return append_local_name(hdev, ptr, 0);
+}
+
 static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance,
 					u8 *ptr)
 {
 	struct adv_info *adv_instance;
+	u32 instance_flags;
+	u8 scan_rsp_len = 0;
 
 	adv_instance = hci_find_adv_instance(hdev, instance);
 	if (!adv_instance)
 		return 0;
 
-	/* TODO: Set the appropriate entries based on advertising instance flags
-	 * here once flags other than 0 are supported.
-	 */
+	instance_flags = adv_instance->flags;
+
+	if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) {
+		ptr[0] = 3;
+		ptr[1] = EIR_APPEARANCE;
+		put_unaligned_le16(hdev->appearance, ptr + 2);
+		scan_rsp_len += 4;
+		ptr += 4;
+	}
+
 	memcpy(ptr, adv_instance->scan_rsp_data,
 	       adv_instance->scan_rsp_len);
 
-	return adv_instance->scan_rsp_len;
+	scan_rsp_len += adv_instance->scan_rsp_len;
+	ptr += adv_instance->scan_rsp_len;
+
+	if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME)
+		scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len);
+
+	return scan_rsp_len;
 }
 
 void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
@@ -1192,7 +1214,7 @@
 
 	hci_req_init(&req, hdev);
 
-	hci_req_clear_adv_instance(hdev, &req, instance, false);
+	hci_req_clear_adv_instance(hdev, NULL, &req, instance, false);
 
 	if (list_empty(&hdev->adv_instances))
 		__hci_req_disable_advertising(&req);
@@ -1282,8 +1304,9 @@
  *   setting.
  * - force == false: Only instances that have a timeout will be removed.
  */
-void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req,
-				u8 instance, bool force)
+void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
+				struct hci_request *req, u8 instance,
+				bool force)
 {
 	struct adv_info *adv_instance, *n, *next_instance = NULL;
 	int err;
@@ -1309,7 +1332,7 @@
 			rem_inst = adv_instance->instance;
 			err = hci_remove_adv_instance(hdev, rem_inst);
 			if (!err)
-				mgmt_advertising_removed(NULL, hdev, rem_inst);
+				mgmt_advertising_removed(sk, hdev, rem_inst);
 		}
 	} else {
 		adv_instance = hci_find_adv_instance(hdev, instance);
@@ -1323,7 +1346,7 @@
 
 			err = hci_remove_adv_instance(hdev, instance);
 			if (!err)
-				mgmt_advertising_removed(NULL, hdev, instance);
+				mgmt_advertising_removed(sk, hdev, instance);
 		}
 	}
 
@@ -1714,7 +1737,7 @@
 			 * function. To be safe hard-code one of the
 			 * values that's suitable for SCO.
 			 */
-			rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES;
+			rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES;
 
 			hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ,
 				    sizeof(rej), &rej);
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index b2d044b..ac1e110 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -73,8 +73,9 @@
 
 int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance,
 				    bool force);
-void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req,
-				u8 instance, bool force);
+void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk,
+				struct hci_request *req, u8 instance,
+				bool force);
 
 void __hci_req_update_class(struct hci_request *req);
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 6ef8a01..48f9471 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -26,6 +26,7 @@
 
 #include <linux/export.h>
 #include <linux/utsname.h>
+#include <linux/sched.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -38,6 +39,8 @@
 static LIST_HEAD(mgmt_chan_list);
 static DEFINE_MUTEX(mgmt_chan_list_lock);
 
+static DEFINE_IDA(sock_cookie_ida);
+
 static atomic_t monitor_promisc = ATOMIC_INIT(0);
 
 /* ----- HCI socket interface ----- */
@@ -52,6 +55,8 @@
 	__u32             cmsg_mask;
 	unsigned short    channel;
 	unsigned long     flags;
+	__u32             cookie;
+	char              comm[TASK_COMM_LEN];
 };
 
 void hci_sock_set_flag(struct sock *sk, int nr)
@@ -74,6 +79,38 @@
 	return hci_pi(sk)->channel;
 }
 
+u32 hci_sock_get_cookie(struct sock *sk)
+{
+	return hci_pi(sk)->cookie;
+}
+
+static bool hci_sock_gen_cookie(struct sock *sk)
+{
+	int id = hci_pi(sk)->cookie;
+
+	if (!id) {
+		id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL);
+		if (id < 0)
+			id = 0xffffffff;
+
+		hci_pi(sk)->cookie = id;
+		get_task_comm(hci_pi(sk)->comm, current);
+		return true;
+	}
+
+	return false;
+}
+
+static void hci_sock_free_cookie(struct sock *sk)
+{
+	int id = hci_pi(sk)->cookie;
+
+	if (id) {
+		hci_pi(sk)->cookie = 0xffffffff;
+		ida_simple_remove(&sock_cookie_ida, id);
+	}
+}
+
 static inline int hci_test_bit(int nr, const void *addr)
 {
 	return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31));
@@ -305,6 +342,60 @@
 	kfree_skb(skb_copy);
 }
 
+void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event,
+				 void *data, u16 data_len, ktime_t tstamp,
+				 int flag, struct sock *skip_sk)
+{
+	struct sock *sk;
+	__le16 index;
+
+	if (hdev)
+		index = cpu_to_le16(hdev->id);
+	else
+		index = cpu_to_le16(MGMT_INDEX_NONE);
+
+	read_lock(&hci_sk_list.lock);
+
+	sk_for_each(sk, &hci_sk_list.head) {
+		struct hci_mon_hdr *hdr;
+		struct sk_buff *skb;
+
+		if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL)
+			continue;
+
+		/* Ignore socket without the flag set */
+		if (!hci_sock_test_flag(sk, flag))
+			continue;
+
+		/* Skip the original socket */
+		if (sk == skip_sk)
+			continue;
+
+		skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC);
+		if (!skb)
+			continue;
+
+		put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
+		put_unaligned_le16(event, skb_put(skb, 2));
+
+		if (data)
+			memcpy(skb_put(skb, data_len), data, data_len);
+
+		skb->tstamp = tstamp;
+
+		hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+		hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT);
+		hdr->index = index;
+		hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
+
+		hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+				    HCI_SOCK_TRUSTED, NULL);
+		kfree_skb(skb);
+	}
+
+	read_unlock(&hci_sk_list.lock);
+}
+
 static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
 {
 	struct hci_mon_hdr *hdr;
@@ -384,6 +475,129 @@
 	return skb;
 }
 
+static struct sk_buff *create_monitor_ctrl_open(struct sock *sk)
+{
+	struct hci_mon_hdr *hdr;
+	struct sk_buff *skb;
+	u16 format;
+	u8 ver[3];
+	u32 flags;
+
+	/* No message needed when cookie is not present */
+	if (!hci_pi(sk)->cookie)
+		return NULL;
+
+	switch (hci_pi(sk)->channel) {
+	case HCI_CHANNEL_RAW:
+		format = 0x0000;
+		ver[0] = BT_SUBSYS_VERSION;
+		put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1);
+		break;
+	case HCI_CHANNEL_USER:
+		format = 0x0001;
+		ver[0] = BT_SUBSYS_VERSION;
+		put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1);
+		break;
+	case HCI_CHANNEL_CONTROL:
+		format = 0x0002;
+		mgmt_fill_version_info(ver);
+		break;
+	default:
+		/* No message for unsupported format */
+		return NULL;
+	}
+
+	skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0;
+
+	put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
+	put_unaligned_le16(format, skb_put(skb, 2));
+	memcpy(skb_put(skb, sizeof(ver)), ver, sizeof(ver));
+	put_unaligned_le32(flags, skb_put(skb, 4));
+	*skb_put(skb, 1) = TASK_COMM_LEN;
+	memcpy(skb_put(skb, TASK_COMM_LEN), hci_pi(sk)->comm, TASK_COMM_LEN);
+
+	__net_timestamp(skb);
+
+	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN);
+	if (hci_pi(sk)->hdev)
+		hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id);
+	else
+		hdr->index = cpu_to_le16(HCI_DEV_NONE);
+	hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
+
+	return skb;
+}
+
+static struct sk_buff *create_monitor_ctrl_close(struct sock *sk)
+{
+	struct hci_mon_hdr *hdr;
+	struct sk_buff *skb;
+
+	/* No message needed when cookie is not present */
+	if (!hci_pi(sk)->cookie)
+		return NULL;
+
+	switch (hci_pi(sk)->channel) {
+	case HCI_CHANNEL_RAW:
+	case HCI_CHANNEL_USER:
+	case HCI_CHANNEL_CONTROL:
+		break;
+	default:
+		/* No message for unsupported format */
+		return NULL;
+	}
+
+	skb = bt_skb_alloc(4, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
+
+	__net_timestamp(skb);
+
+	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE);
+	if (hci_pi(sk)->hdev)
+		hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id);
+	else
+		hdr->index = cpu_to_le16(HCI_DEV_NONE);
+	hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
+
+	return skb;
+}
+
+static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index,
+						   u16 opcode, u16 len,
+						   const void *buf)
+{
+	struct hci_mon_hdr *hdr;
+	struct sk_buff *skb;
+
+	skb = bt_skb_alloc(6 + len, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4));
+	put_unaligned_le16(opcode, skb_put(skb, 2));
+
+	if (buf)
+		memcpy(skb_put(skb, len), buf, len);
+
+	__net_timestamp(skb);
+
+	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND);
+	hdr->index = cpu_to_le16(index);
+	hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
+
+	return skb;
+}
+
 static void __printf(2, 3)
 send_monitor_note(struct sock *sk, const char *fmt, ...)
 {
@@ -458,6 +672,26 @@
 	read_unlock(&hci_dev_list_lock);
 }
 
+static void send_monitor_control_replay(struct sock *mon_sk)
+{
+	struct sock *sk;
+
+	read_lock(&hci_sk_list.lock);
+
+	sk_for_each(sk, &hci_sk_list.head) {
+		struct sk_buff *skb;
+
+		skb = create_monitor_ctrl_open(sk);
+		if (!skb)
+			continue;
+
+		if (sock_queue_rcv_skb(mon_sk, skb))
+			kfree_skb(skb);
+	}
+
+	read_unlock(&hci_sk_list.lock);
+}
+
 /* Generate internal stack event */
 static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
 {
@@ -585,6 +819,7 @@
 {
 	struct sock *sk = sock->sk;
 	struct hci_dev *hdev;
+	struct sk_buff *skb;
 
 	BT_DBG("sock %p sk %p", sock, sk);
 
@@ -593,8 +828,24 @@
 
 	hdev = hci_pi(sk)->hdev;
 
-	if (hci_pi(sk)->channel == HCI_CHANNEL_MONITOR)
+	switch (hci_pi(sk)->channel) {
+	case HCI_CHANNEL_MONITOR:
 		atomic_dec(&monitor_promisc);
+		break;
+	case HCI_CHANNEL_RAW:
+	case HCI_CHANNEL_USER:
+	case HCI_CHANNEL_CONTROL:
+		/* Send event to monitor */
+		skb = create_monitor_ctrl_close(sk);
+		if (skb) {
+			hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+					    HCI_SOCK_TRUSTED, NULL);
+			kfree_skb(skb);
+		}
+
+		hci_sock_free_cookie(sk);
+		break;
+	}
 
 	bt_sock_unlink(&hci_sk_list, sk);
 
@@ -721,6 +972,27 @@
 		goto done;
 	}
 
+	/* When calling an ioctl on an unbound raw socket, then ensure
+	 * that the monitor gets informed. Ensure that the resulting event
+	 * is only send once by checking if the cookie exists or not. The
+	 * socket cookie will be only ever generated once for the lifetime
+	 * of a given socket.
+	 */
+	if (hci_sock_gen_cookie(sk)) {
+		struct sk_buff *skb;
+
+		if (capable(CAP_NET_ADMIN))
+			hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
+
+		/* Send event to monitor */
+		skb = create_monitor_ctrl_open(sk);
+		if (skb) {
+			hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+					    HCI_SOCK_TRUSTED, NULL);
+			kfree_skb(skb);
+		}
+	}
+
 	release_sock(sk);
 
 	switch (cmd) {
@@ -784,6 +1056,7 @@
 	struct sockaddr_hci haddr;
 	struct sock *sk = sock->sk;
 	struct hci_dev *hdev = NULL;
+	struct sk_buff *skb;
 	int len, err = 0;
 
 	BT_DBG("sock %p sk %p", sock, sk);
@@ -822,7 +1095,35 @@
 			atomic_inc(&hdev->promisc);
 		}
 
+		hci_pi(sk)->channel = haddr.hci_channel;
+
+		if (!hci_sock_gen_cookie(sk)) {
+			/* In the case when a cookie has already been assigned,
+			 * then there has been already an ioctl issued against
+			 * an unbound socket and with that triggerd an open
+			 * notification. Send a close notification first to
+			 * allow the state transition to bounded.
+			 */
+			skb = create_monitor_ctrl_close(sk);
+			if (skb) {
+				hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+						    HCI_SOCK_TRUSTED, NULL);
+				kfree_skb(skb);
+			}
+		}
+
+		if (capable(CAP_NET_ADMIN))
+			hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
+
 		hci_pi(sk)->hdev = hdev;
+
+		/* Send event to monitor */
+		skb = create_monitor_ctrl_open(sk);
+		if (skb) {
+			hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+					    HCI_SOCK_TRUSTED, NULL);
+			kfree_skb(skb);
+		}
 		break;
 
 	case HCI_CHANNEL_USER:
@@ -884,9 +1185,38 @@
 			}
 		}
 
-		atomic_inc(&hdev->promisc);
+		hci_pi(sk)->channel = haddr.hci_channel;
+
+		if (!hci_sock_gen_cookie(sk)) {
+			/* In the case when a cookie has already been assigned,
+			 * this socket will transition from a raw socket into
+			 * an user channel socket. For a clean transition, send
+			 * the close notification first.
+			 */
+			skb = create_monitor_ctrl_close(sk);
+			if (skb) {
+				hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+						    HCI_SOCK_TRUSTED, NULL);
+				kfree_skb(skb);
+			}
+		}
+
+		/* The user channel is restricted to CAP_NET_ADMIN
+		 * capabilities and with that implicitly trusted.
+		 */
+		hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
 
 		hci_pi(sk)->hdev = hdev;
+
+		/* Send event to monitor */
+		skb = create_monitor_ctrl_open(sk);
+		if (skb) {
+			hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+					    HCI_SOCK_TRUSTED, NULL);
+			kfree_skb(skb);
+		}
+
+		atomic_inc(&hdev->promisc);
 		break;
 
 	case HCI_CHANNEL_MONITOR:
@@ -900,6 +1230,8 @@
 			goto done;
 		}
 
+		hci_pi(sk)->channel = haddr.hci_channel;
+
 		/* The monitor interface is restricted to CAP_NET_RAW
 		 * capabilities and with that implicitly trusted.
 		 */
@@ -908,9 +1240,10 @@
 		send_monitor_note(sk, "Linux version %s (%s)",
 				  init_utsname()->release,
 				  init_utsname()->machine);
-		send_monitor_note(sk, "Bluetooth subsystem version %s",
-				  BT_SUBSYS_VERSION);
+		send_monitor_note(sk, "Bluetooth subsystem version %u.%u",
+				  BT_SUBSYS_VERSION, BT_SUBSYS_REVISION);
 		send_monitor_replay(sk);
+		send_monitor_control_replay(sk);
 
 		atomic_inc(&monitor_promisc);
 		break;
@@ -925,6 +1258,8 @@
 			err = -EPERM;
 			goto done;
 		}
+
+		hci_pi(sk)->channel = haddr.hci_channel;
 		break;
 
 	default:
@@ -946,6 +1281,8 @@
 		if (capable(CAP_NET_ADMIN))
 			hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
 
+		hci_pi(sk)->channel = haddr.hci_channel;
+
 		/* At the moment the index and unconfigured index events
 		 * are enabled unconditionally. Setting them on each
 		 * socket when binding keeps this functionality. They
@@ -956,16 +1293,40 @@
 		 * received by untrusted users. Example for such events
 		 * are changes to settings, class of device, name etc.
 		 */
-		if (haddr.hci_channel == HCI_CHANNEL_CONTROL) {
+		if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) {
+			if (!hci_sock_gen_cookie(sk)) {
+				/* In the case when a cookie has already been
+				 * assigned, this socket will transtion from
+				 * a raw socket into a control socket. To
+				 * allow for a clean transtion, send the
+				 * close notification first.
+				 */
+				skb = create_monitor_ctrl_close(sk);
+				if (skb) {
+					hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+							    HCI_SOCK_TRUSTED, NULL);
+					kfree_skb(skb);
+				}
+			}
+
+			/* Send event to monitor */
+			skb = create_monitor_ctrl_open(sk);
+			if (skb) {
+				hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+						    HCI_SOCK_TRUSTED, NULL);
+				kfree_skb(skb);
+			}
+
 			hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS);
 			hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS);
-			hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS);
+			hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS);
+			hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS);
+			hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS);
+			hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS);
 		}
 		break;
 	}
 
-
-	hci_pi(sk)->channel = haddr.hci_channel;
 	sk->sk_state = BT_BOUND;
 
 done:
@@ -1091,7 +1452,7 @@
 
 	skb_free_datagram(sk, skb);
 
-	if (msg->msg_flags & MSG_TRUNC)
+	if (flags & MSG_TRUNC)
 		copied = skblen;
 
 	return err ? : copied;
@@ -1133,6 +1494,19 @@
 		goto done;
 	}
 
+	if (chan->channel == HCI_CHANNEL_CONTROL) {
+		struct sk_buff *skb;
+
+		/* Send event to monitor */
+		skb = create_monitor_ctrl_command(sk, index, opcode, len,
+						  buf + sizeof(*hdr));
+		if (skb) {
+			hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
+					    HCI_SOCK_TRUSTED, NULL);
+			kfree_skb(skb);
+		}
+	}
+
 	if (opcode >= chan->handler_count ||
 	    chan->handlers[opcode].func == NULL) {
 		BT_DBG("Unknown op %u", opcode);
@@ -1440,6 +1814,9 @@
 
 	BT_DBG("sk %p, opt %d", sk, optname);
 
+	if (level != SOL_HCI)
+		return -ENOPROTOOPT;
+
 	lock_sock(sk);
 
 	if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
@@ -1523,6 +1900,9 @@
 
 	BT_DBG("sk %p, opt %d", sk, optname);
 
+	if (level != SOL_HCI)
+		return -ENOPROTOOPT;
+
 	if (get_user(len, optlen))
 		return -EFAULT;
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 54ceb1f..d4cad29b0 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -32,6 +32,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/crc16.h>
+#include <linux/filter.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -5835,6 +5836,9 @@
 		if (chan->sdu)
 			break;
 
+		if (!pskb_may_pull(skb, L2CAP_SDULEN_SIZE))
+			break;
+
 		chan->sdu_len = get_unaligned_le16(skb->data);
 		skb_pull(skb, L2CAP_SDULEN_SIZE);
 
@@ -6610,6 +6614,10 @@
 		goto drop;
 	}
 
+	if ((chan->mode == L2CAP_MODE_ERTM ||
+	     chan->mode == L2CAP_MODE_STREAMING) && sk_filter(chan->data, skb))
+		goto drop;
+
 	if (!control->sframe) {
 		int err;
 
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1842141..a8ba752 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1019,7 +1019,7 @@
 		goto done;
 
 	if (pi->rx_busy_skb) {
-		if (!sock_queue_rcv_skb(sk, pi->rx_busy_skb))
+		if (!__sock_queue_rcv_skb(sk, pi->rx_busy_skb))
 			pi->rx_busy_skb = NULL;
 		else
 			goto done;
@@ -1270,7 +1270,17 @@
 		goto done;
 	}
 
-	err = sock_queue_rcv_skb(sk, skb);
+	if (chan->mode != L2CAP_MODE_ERTM &&
+	    chan->mode != L2CAP_MODE_STREAMING) {
+		/* Even if no filter is attached, we could potentially
+		 * get errors from security modules, etc.
+		 */
+		err = sk_filter(sk, skb);
+		if (err)
+			goto done;
+	}
+
+	err = __sock_queue_rcv_skb(sk, skb);
 
 	/* For ERTM, handle one skb that doesn't fit into the recv
 	 * buffer.  This is important to do because the data frames
diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c
index 8319c84..cb670b5 100644
--- a/net/bluetooth/leds.c
+++ b/net/bluetooth/leds.c
@@ -11,6 +11,8 @@
 
 #include "leds.h"
 
+DEFINE_LED_TRIGGER(bt_power_led_trigger);
+
 struct hci_basic_led_trigger {
 	struct led_trigger	led_trigger;
 	struct hci_dev		*hdev;
@@ -24,6 +26,21 @@
 	if (hdev->power_led)
 		led_trigger_event(hdev->power_led,
 				  enabled ? LED_FULL : LED_OFF);
+
+	if (!enabled) {
+		struct hci_dev *d;
+
+		read_lock(&hci_dev_list_lock);
+
+		list_for_each_entry(d, &hci_dev_list, list) {
+			if (test_bit(HCI_UP, &d->flags))
+				enabled = true;
+		}
+
+		read_unlock(&hci_dev_list_lock);
+	}
+
+	led_trigger_event(bt_power_led_trigger, enabled ? LED_FULL : LED_OFF);
 }
 
 static void power_activate(struct led_classdev *led_cdev)
@@ -72,3 +89,13 @@
 	/* initialize power_led */
 	hdev->power_led = led_allocate_basic(hdev, power_activate, "power");
 }
+
+void bt_leds_init(void)
+{
+	led_trigger_register_simple("bluetooth-power", &bt_power_led_trigger);
+}
+
+void bt_leds_cleanup(void)
+{
+	led_trigger_unregister_simple(bt_power_led_trigger);
+}
diff --git a/net/bluetooth/leds.h b/net/bluetooth/leds.h
index a9c4d6e..08725a2 100644
--- a/net/bluetooth/leds.h
+++ b/net/bluetooth/leds.h
@@ -7,10 +7,20 @@
  */
 
 #if IS_ENABLED(CONFIG_BT_LEDS)
+
 void hci_leds_update_powered(struct hci_dev *hdev, bool enabled);
 void hci_leds_init(struct hci_dev *hdev);
+
+void bt_leds_init(void);
+void bt_leds_cleanup(void);
+
 #else
+
 static inline void hci_leds_update_powered(struct hci_dev *hdev,
 					   bool enabled) {}
 static inline void hci_leds_init(struct hci_dev *hdev) {}
+
+static inline void bt_leds_init(void) {}
+static inline void bt_leds_cleanup(void) {}
+
 #endif
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7639290..19b8a5e 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
 #include "mgmt_util.h"
 
 #define MGMT_VERSION	1
-#define MGMT_REVISION	13
+#define MGMT_REVISION	14
 
 static const u16 mgmt_commands[] = {
 	MGMT_OP_READ_INDEX_LIST,
@@ -104,6 +104,8 @@
 	MGMT_OP_REMOVE_ADVERTISING,
 	MGMT_OP_GET_ADV_SIZE_INFO,
 	MGMT_OP_START_LIMITED_DISCOVERY,
+	MGMT_OP_READ_EXT_INFO,
+	MGMT_OP_SET_APPEARANCE,
 };
 
 static const u16 mgmt_events[] = {
@@ -141,6 +143,7 @@
 	MGMT_EV_LOCAL_OOB_DATA_UPDATED,
 	MGMT_EV_ADVERTISING_ADDED,
 	MGMT_EV_ADVERTISING_REMOVED,
+	MGMT_EV_EXT_INFO_CHANGED,
 };
 
 static const u16 mgmt_untrusted_commands[] = {
@@ -149,6 +152,7 @@
 	MGMT_OP_READ_UNCONF_INDEX_LIST,
 	MGMT_OP_READ_CONFIG_INFO,
 	MGMT_OP_READ_EXT_INDEX_LIST,
+	MGMT_OP_READ_EXT_INFO,
 };
 
 static const u16 mgmt_untrusted_events[] = {
@@ -162,6 +166,7 @@
 	MGMT_EV_NEW_CONFIG_OPTIONS,
 	MGMT_EV_EXT_INDEX_ADDED,
 	MGMT_EV_EXT_INDEX_REMOVED,
+	MGMT_EV_EXT_INFO_CHANGED,
 };
 
 #define CACHE_TIMEOUT	msecs_to_jiffies(2 * 1000)
@@ -256,13 +261,6 @@
 			       flag, skip_sk);
 }
 
-static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data,
-			      u16 len, struct sock *skip_sk)
-{
-	return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len,
-			       HCI_MGMT_GENERIC_EVENTS, skip_sk);
-}
-
 static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len,
 		      struct sock *skip_sk)
 {
@@ -278,6 +276,14 @@
 		return ADDR_LE_DEV_RANDOM;
 }
 
+void mgmt_fill_version_info(void *ver)
+{
+	struct mgmt_rp_read_version *rp = ver;
+
+	rp->version = MGMT_VERSION;
+	rp->revision = cpu_to_le16(MGMT_REVISION);
+}
+
 static int read_version(struct sock *sk, struct hci_dev *hdev, void *data,
 			u16 data_len)
 {
@@ -285,8 +291,7 @@
 
 	BT_DBG("sock %p", sk);
 
-	rp.version = MGMT_VERSION;
-	rp.revision = cpu_to_le16(MGMT_REVISION);
+	mgmt_fill_version_info(&rp);
 
 	return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0,
 				 &rp, sizeof(rp));
@@ -572,8 +577,8 @@
 {
 	__le32 options = get_missing_options(hdev);
 
-	return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options,
-				  sizeof(options), skip);
+	return mgmt_limited_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options,
+				  sizeof(options), HCI_MGMT_OPTION_EVENTS, skip);
 }
 
 static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev)
@@ -862,6 +867,107 @@
 				 sizeof(rp));
 }
 
+static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data,
+				  u8 data_len)
+{
+	eir[eir_len++] = sizeof(type) + data_len;
+	eir[eir_len++] = type;
+	memcpy(&eir[eir_len], data, data_len);
+	eir_len += data_len;
+
+	return eir_len;
+}
+
+static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data)
+{
+	eir[eir_len++] = sizeof(type) + sizeof(data);
+	eir[eir_len++] = type;
+	put_unaligned_le16(data, &eir[eir_len]);
+	eir_len += sizeof(data);
+
+	return eir_len;
+}
+
+static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir)
+{
+	u16 eir_len = 0;
+	size_t name_len;
+
+	if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
+		eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV,
+					  hdev->dev_class, 3);
+
+	if (hci_dev_test_flag(hdev, HCI_LE_ENABLED))
+		eir_len = eir_append_le16(eir, eir_len, EIR_APPEARANCE,
+					  hdev->appearance);
+
+	name_len = strlen(hdev->dev_name);
+	eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE,
+				  hdev->dev_name, name_len);
+
+	name_len = strlen(hdev->short_name);
+	eir_len = eir_append_data(eir, eir_len, EIR_NAME_SHORT,
+				  hdev->short_name, name_len);
+
+	return eir_len;
+}
+
+static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev,
+				    void *data, u16 data_len)
+{
+	char buf[512];
+	struct mgmt_rp_read_ext_info *rp = (void *)buf;
+	u16 eir_len;
+
+	BT_DBG("sock %p %s", sk, hdev->name);
+
+	memset(&buf, 0, sizeof(buf));
+
+	hci_dev_lock(hdev);
+
+	bacpy(&rp->bdaddr, &hdev->bdaddr);
+
+	rp->version = hdev->hci_ver;
+	rp->manufacturer = cpu_to_le16(hdev->manufacturer);
+
+	rp->supported_settings = cpu_to_le32(get_supported_settings(hdev));
+	rp->current_settings = cpu_to_le32(get_current_settings(hdev));
+
+
+	eir_len = append_eir_data_to_buf(hdev, rp->eir);
+	rp->eir_len = cpu_to_le16(eir_len);
+
+	hci_dev_unlock(hdev);
+
+	/* If this command is called at least once, then the events
+	 * for class of device and local name changes are disabled
+	 * and only the new extended controller information event
+	 * is used.
+	 */
+	hci_sock_set_flag(sk, HCI_MGMT_EXT_INFO_EVENTS);
+	hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS);
+	hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS);
+
+	return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, rp,
+				 sizeof(*rp) + eir_len);
+}
+
+static int ext_info_changed(struct hci_dev *hdev, struct sock *skip)
+{
+	char buf[512];
+	struct mgmt_ev_ext_info_changed *ev = (void *)buf;
+	u16 eir_len;
+
+	memset(buf, 0, sizeof(buf));
+
+	eir_len = append_eir_data_to_buf(hdev, ev->eir);
+	ev->eir_len = cpu_to_le16(eir_len);
+
+	return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, ev,
+				  sizeof(*ev) + eir_len,
+				  HCI_MGMT_EXT_INFO_EVENTS, skip);
+}
+
 static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev)
 {
 	__le32 settings = cpu_to_le32(get_current_settings(hdev));
@@ -922,7 +1028,7 @@
 		hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
 	}
 
-	hci_req_clear_adv_instance(hdev, NULL, 0x00, false);
+	hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, false);
 
 	if (hci_dev_test_flag(hdev, HCI_LE_ADV))
 		__hci_req_disable_advertising(&req);
@@ -1000,8 +1106,8 @@
 {
 	__le32 ev = cpu_to_le32(get_current_settings(hdev));
 
-	return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev,
-				  sizeof(ev), skip);
+	return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev,
+				  sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip);
 }
 
 int mgmt_new_settings(struct hci_dev *hdev)
@@ -1690,7 +1796,7 @@
 	enabled = lmp_host_le_capable(hdev);
 
 	if (!val)
-		hci_req_clear_adv_instance(hdev, NULL, 0x00, true);
+		hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, true);
 
 	if (!hdev_is_powered(hdev) || val == enabled) {
 		bool changed = false;
@@ -2435,6 +2541,8 @@
 	if (!cmd)
 		return -ENOMEM;
 
+	cmd->cmd_complete = addr_cmd_complete;
+
 	err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY,
 			   sizeof(cp->addr.bdaddr), &cp->addr.bdaddr);
 	if (err < 0)
@@ -2513,8 +2621,8 @@
 	BT_DBG("");
 
 	if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY)
-		return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY,
-					 MGMT_STATUS_INVALID_PARAMS, NULL, 0);
+		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY,
+				       MGMT_STATUS_INVALID_PARAMS);
 
 	hci_dev_lock(hdev);
 
@@ -2932,6 +3040,35 @@
 				 HCI_OP_USER_PASSKEY_NEG_REPLY, 0);
 }
 
+static void adv_expire(struct hci_dev *hdev, u32 flags)
+{
+	struct adv_info *adv_instance;
+	struct hci_request req;
+	int err;
+
+	adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance);
+	if (!adv_instance)
+		return;
+
+	/* stop if current instance doesn't need to be changed */
+	if (!(adv_instance->flags & flags))
+		return;
+
+	cancel_adv_timeout(hdev);
+
+	adv_instance = hci_get_next_instance(hdev, adv_instance->instance);
+	if (!adv_instance)
+		return;
+
+	hci_req_init(&req, hdev);
+	err = __hci_req_schedule_adv_instance(&req, adv_instance->instance,
+					      true);
+	if (err)
+		return;
+
+	hci_req_run(&req, NULL);
+}
+
 static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct mgmt_cp_set_local_name *cp;
@@ -2947,13 +3084,17 @@
 
 	cp = cmd->param;
 
-	if (status)
+	if (status) {
 		mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME,
 			        mgmt_status(status));
-	else
+	} else {
 		mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0,
 				  cp, sizeof(*cp));
 
+		if (hci_dev_test_flag(hdev, HCI_LE_ADV))
+			adv_expire(hdev, MGMT_ADV_FLAG_LOCAL_NAME);
+	}
+
 	mgmt_pending_remove(cmd);
 
 unlock:
@@ -2993,8 +3134,9 @@
 		if (err < 0)
 			goto failed;
 
-		err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev,
-					 data, len, sk);
+		err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data,
+					 len, HCI_MGMT_LOCAL_NAME_EVENTS, sk);
+		ext_info_changed(hdev, sk);
 
 		goto failed;
 	}
@@ -3017,7 +3159,7 @@
 	/* The name is stored in the scan response data and so
 	 * no need to udpate the advertising data here.
 	 */
-	if (lmp_le_capable(hdev))
+	if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING))
 		__hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance);
 
 	err = hci_req_run(&req, set_name_complete);
@@ -3029,6 +3171,40 @@
 	return err;
 }
 
+static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data,
+			  u16 len)
+{
+	struct mgmt_cp_set_appearance *cp = data;
+	u16 apperance;
+	int err;
+
+	BT_DBG("");
+
+	if (!lmp_le_capable(hdev))
+		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE,
+				       MGMT_STATUS_NOT_SUPPORTED);
+
+	apperance = le16_to_cpu(cp->appearance);
+
+	hci_dev_lock(hdev);
+
+	if (hdev->appearance != apperance) {
+		hdev->appearance = apperance;
+
+		if (hci_dev_test_flag(hdev, HCI_LE_ADV))
+			adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE);
+
+		ext_info_changed(hdev, sk);
+	}
+
+	err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL,
+				0);
+
+	hci_dev_unlock(hdev);
+
+	return err;
+}
+
 static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
 				         u16 opcode, struct sk_buff *skb)
 {
@@ -4869,7 +5045,7 @@
 	int err;
 
 	memset(&rp, 0, sizeof(rp));
-	memcpy(&rp.addr, &cmd->param, sizeof(rp.addr));
+	memcpy(&rp.addr, cmd->param, sizeof(rp.addr));
 
 	if (status)
 		goto complete;
@@ -5501,17 +5677,6 @@
 	return err;
 }
 
-static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data,
-				  u8 data_len)
-{
-	eir[eir_len++] = sizeof(type) + data_len;
-	eir[eir_len++] = type;
-	memcpy(&eir[eir_len], data, data_len);
-	eir_len += data_len;
-
-	return eir_len;
-}
-
 static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status,
 					     u16 opcode, struct sk_buff *skb)
 {
@@ -5815,6 +5980,8 @@
 	flags |= MGMT_ADV_FLAG_DISCOV;
 	flags |= MGMT_ADV_FLAG_LIMITED_DISCOV;
 	flags |= MGMT_ADV_FLAG_MANAGED_FLAGS;
+	flags |= MGMT_ADV_FLAG_APPEARANCE;
+	flags |= MGMT_ADV_FLAG_LOCAL_NAME;
 
 	if (hdev->adv_tx_power != HCI_TX_POWER_INVALID)
 		flags |= MGMT_ADV_FLAG_TX_POWER;
@@ -5871,28 +6038,59 @@
 	return err;
 }
 
-static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
-			      u8 len, bool is_adv_data)
+static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data)
 {
 	u8 max_len = HCI_MAX_AD_LENGTH;
-	int i, cur_len;
-	bool flags_managed = false;
-	bool tx_power_managed = false;
 
 	if (is_adv_data) {
 		if (adv_flags & (MGMT_ADV_FLAG_DISCOV |
 				 MGMT_ADV_FLAG_LIMITED_DISCOV |
-				 MGMT_ADV_FLAG_MANAGED_FLAGS)) {
-			flags_managed = true;
+				 MGMT_ADV_FLAG_MANAGED_FLAGS))
 			max_len -= 3;
-		}
 
-		if (adv_flags & MGMT_ADV_FLAG_TX_POWER) {
-			tx_power_managed = true;
+		if (adv_flags & MGMT_ADV_FLAG_TX_POWER)
 			max_len -= 3;
-		}
+	} else {
+		/* at least 1 byte of name should fit in */
+		if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME)
+			max_len -= 3;
+
+		if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE))
+			max_len -= 4;
 	}
 
+	return max_len;
+}
+
+static bool flags_managed(u32 adv_flags)
+{
+	return adv_flags & (MGMT_ADV_FLAG_DISCOV |
+			    MGMT_ADV_FLAG_LIMITED_DISCOV |
+			    MGMT_ADV_FLAG_MANAGED_FLAGS);
+}
+
+static bool tx_power_managed(u32 adv_flags)
+{
+	return adv_flags & MGMT_ADV_FLAG_TX_POWER;
+}
+
+static bool name_managed(u32 adv_flags)
+{
+	return adv_flags & MGMT_ADV_FLAG_LOCAL_NAME;
+}
+
+static bool appearance_managed(u32 adv_flags)
+{
+	return adv_flags & MGMT_ADV_FLAG_APPEARANCE;
+}
+
+static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data)
+{
+	int i, cur_len;
+	u8 max_len;
+
+	max_len = tlv_data_max_len(adv_flags, is_adv_data);
+
 	if (len > max_len)
 		return false;
 
@@ -5900,10 +6098,21 @@
 	for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) {
 		cur_len = data[i];
 
-		if (flags_managed && data[i + 1] == EIR_FLAGS)
+		if (data[i + 1] == EIR_FLAGS &&
+		    (!is_adv_data || flags_managed(adv_flags)))
 			return false;
 
-		if (tx_power_managed && data[i + 1] == EIR_TX_POWER)
+		if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags))
+			return false;
+
+		if (data[i + 1] == EIR_NAME_COMPLETE && name_managed(adv_flags))
+			return false;
+
+		if (data[i + 1] == EIR_NAME_SHORT && name_managed(adv_flags))
+			return false;
+
+		if (data[i + 1] == EIR_APPEARANCE &&
+		    appearance_managed(adv_flags))
 			return false;
 
 		/* If the current field length would exceed the total data
@@ -6027,8 +6236,8 @@
 		goto unlock;
 	}
 
-	if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) ||
-	    !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len,
+	if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) ||
+	    !tlv_data_is_valid(flags, cp->data + cp->adv_data_len,
 			       cp->scan_rsp_len, false)) {
 		err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING,
 				      MGMT_STATUS_INVALID_PARAMS);
@@ -6175,7 +6384,7 @@
 
 	hci_req_init(&req, hdev);
 
-	hci_req_clear_adv_instance(hdev, &req, cp->instance, true);
+	hci_req_clear_adv_instance(hdev, sk, &req, cp->instance, true);
 
 	if (list_empty(&hdev->adv_instances))
 		__hci_req_disable_advertising(&req);
@@ -6211,23 +6420,6 @@
 	return err;
 }
 
-static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data)
-{
-	u8 max_len = HCI_MAX_AD_LENGTH;
-
-	if (is_adv_data) {
-		if (adv_flags & (MGMT_ADV_FLAG_DISCOV |
-				 MGMT_ADV_FLAG_LIMITED_DISCOV |
-				 MGMT_ADV_FLAG_MANAGED_FLAGS))
-			max_len -= 3;
-
-		if (adv_flags & MGMT_ADV_FLAG_TX_POWER)
-			max_len -= 3;
-	}
-
-	return max_len;
-}
-
 static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
 			     void *data, u16 data_len)
 {
@@ -6356,6 +6548,9 @@
 	{ remove_advertising,	   MGMT_REMOVE_ADVERTISING_SIZE },
 	{ get_adv_size_info,       MGMT_GET_ADV_SIZE_INFO_SIZE },
 	{ start_limited_discovery, MGMT_START_DISCOVERY_SIZE },
+	{ read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE,
+						HCI_MGMT_UNTRUSTED },
+	{ set_appearance,	   MGMT_SET_APPEARANCE_SIZE },
 };
 
 void mgmt_index_added(struct hci_dev *hdev)
@@ -6494,9 +6689,12 @@
 
 	mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
 
-	if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
-		mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
-				   zero_cod, sizeof(zero_cod), NULL);
+	if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) {
+		mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
+				   zero_cod, sizeof(zero_cod),
+				   HCI_MGMT_DEV_CLASS_EVENTS, NULL);
+		ext_info_changed(hdev, NULL);
+	}
 
 	new_settings(hdev, match.sk);
 
@@ -7092,9 +7290,11 @@
 	mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match);
 	mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);
 
-	if (!status)
-		mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
-				   dev_class, 3, NULL);
+	if (!status) {
+		mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class,
+				   3, HCI_MGMT_DEV_CLASS_EVENTS, NULL);
+		ext_info_changed(hdev, NULL);
+	}
 
 	if (match.sk)
 		sock_put(match.sk);
@@ -7123,8 +7323,9 @@
 			return;
 	}
 
-	mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
-			   cmd ? cmd->sk : NULL);
+	mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev),
+			   HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL);
+	ext_info_changed(hdev, cmd ? cmd->sk : NULL);
 }
 
 static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16])
diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c
index 8c30c7e..c933bd0 100644
--- a/net/bluetooth/mgmt_util.c
+++ b/net/bluetooth/mgmt_util.c
@@ -21,12 +21,41 @@
    SOFTWARE IS DISCLAIMED.
 */
 
+#include <asm/unaligned.h>
+
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
+#include <net/bluetooth/hci_mon.h>
 #include <net/bluetooth/mgmt.h>
 
 #include "mgmt_util.h"
 
+static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie,
+						 u16 opcode, u16 len, void *buf)
+{
+	struct hci_mon_hdr *hdr;
+	struct sk_buff *skb;
+
+	skb = bt_skb_alloc(6 + len, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	put_unaligned_le32(cookie, skb_put(skb, 4));
+	put_unaligned_le16(opcode, skb_put(skb, 2));
+
+	if (buf)
+		memcpy(skb_put(skb, len), buf, len);
+
+	__net_timestamp(skb);
+
+	hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE);
+	hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT);
+	hdr->index = index;
+	hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE);
+
+	return skb;
+}
+
 int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel,
 		    void *data, u16 data_len, int flag, struct sock *skip_sk)
 {
@@ -52,14 +81,18 @@
 	__net_timestamp(skb);
 
 	hci_send_to_channel(channel, skb, flag, skip_sk);
-	kfree_skb(skb);
 
+	if (channel == HCI_CHANNEL_CONTROL)
+		hci_send_monitor_ctrl_event(hdev, event, data, data_len,
+					    skb_get_ktime(skb), flag, skip_sk);
+
+	kfree_skb(skb);
 	return 0;
 }
 
 int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status)
 {
-	struct sk_buff *skb;
+	struct sk_buff *skb, *mskb;
 	struct mgmt_hdr *hdr;
 	struct mgmt_ev_cmd_status *ev;
 	int err;
@@ -80,17 +113,30 @@
 	ev->status = status;
 	ev->opcode = cpu_to_le16(cmd);
 
+	mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk),
+					 MGMT_EV_CMD_STATUS, sizeof(*ev), ev);
+	if (mskb)
+		skb->tstamp = mskb->tstamp;
+	else
+		__net_timestamp(skb);
+
 	err = sock_queue_rcv_skb(sk, skb);
 	if (err < 0)
 		kfree_skb(skb);
 
+	if (mskb) {
+		hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb,
+				    HCI_SOCK_TRUSTED, NULL);
+		kfree_skb(mskb);
+	}
+
 	return err;
 }
 
 int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status,
 		      void *rp, size_t rp_len)
 {
-	struct sk_buff *skb;
+	struct sk_buff *skb, *mskb;
 	struct mgmt_hdr *hdr;
 	struct mgmt_ev_cmd_complete *ev;
 	int err;
@@ -114,10 +160,24 @@
 	if (rp)
 		memcpy(ev->data, rp, rp_len);
 
+	mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk),
+					 MGMT_EV_CMD_COMPLETE,
+					 sizeof(*ev) + rp_len, ev);
+	if (mskb)
+		skb->tstamp = mskb->tstamp;
+	else
+		__net_timestamp(skb);
+
 	err = sock_queue_rcv_skb(sk, skb);
 	if (err < 0)
 		kfree_skb(skb);
 
+	if (mskb) {
+		hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb,
+				    HCI_SOCK_TRUSTED, NULL);
+		kfree_skb(mskb);
+	}
+
 	return err;
 }
 
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 4c1a16a..43faf2a 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3387,7 +3387,10 @@
 	if (!lmp_sc_capable(hdev)) {
 		debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs,
 				    hdev, &force_bredr_smp_fops);
-		return 0;
+
+		/* Flag can be already set here (due to power toggle) */
+		if (!hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP))
+			return 0;
 	}
 
 	if (WARN_ON(hdev->smp_bredr_data)) {
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index a1cda5d4..0aefc01 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -20,4 +20,6 @@
 
 bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
 
+bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
+
 obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 3addc05..889e564 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -227,9 +227,11 @@
 	br_fdb_test_addr_hook = br_fdb_test_addr;
 #endif
 
-	pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
-		"deprecated. Update your scripts to load br_netfilter if you "
+#if IS_MODULE(CONFIG_BRIDGE_NETFILTER)
+	pr_info("bridge: filtering via arp/ip/ip6tables is no longer available "
+		"by default. Update your scripts to load br_netfilter if you "
 		"need this.\n");
+#endif
 
 	return 0;
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 09f2694..89a687f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -62,10 +62,10 @@
 		goto out;
 
 	if (is_broadcast_ether_addr(dest)) {
-		br_flood(br, skb, false, false, true);
+		br_flood(br, skb, BR_PKT_BROADCAST, false, true);
 	} else if (is_multicast_ether_addr(dest)) {
 		if (unlikely(netpoll_tx_running(dev))) {
-			br_flood(br, skb, false, false, true);
+			br_flood(br, skb, BR_PKT_MULTICAST, false, true);
 			goto out;
 		}
 		if (br_multicast_rcv(br, NULL, skb, vid)) {
@@ -78,11 +78,11 @@
 		    br_multicast_querier_exists(br, eth_hdr(skb)))
 			br_multicast_flood(mdst, skb, false, true);
 		else
-			br_flood(br, skb, false, false, true);
+			br_flood(br, skb, BR_PKT_MULTICAST, false, true);
 	} else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) {
 		br_forward(dst->dst, skb, false, true);
 	} else {
-		br_flood(br, skb, true, false, true);
+		br_flood(br, skb, BR_PKT_UNICAST, false, true);
 	}
 out:
 	rcu_read_unlock();
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c18080a..6b43c8c 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -267,7 +267,7 @@
 
 	/* If old entry was unassociated with any port, then delete it. */
 	f = __br_fdb_get(br, br->dev->dev_addr, 0);
-	if (f && f->is_local && !f->dst)
+	if (f && f->is_local && !f->dst && !f->added_by_user)
 		fdb_delete_local(br, NULL, f);
 
 	fdb_insert(br, NULL, newaddr, 0);
@@ -282,7 +282,7 @@
 		if (!br_vlan_should_use(v))
 			continue;
 		f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
-		if (f && f->is_local && !f->dst)
+		if (f && f->is_local && !f->dst && !f->added_by_user)
 			fdb_delete_local(br, NULL, f);
 		fdb_insert(br, NULL, newaddr, v->vid);
 	}
@@ -710,24 +710,27 @@
 		struct netlink_callback *cb,
 		struct net_device *dev,
 		struct net_device *filter_dev,
-		int idx)
+		int *idx)
 {
 	struct net_bridge *br = netdev_priv(dev);
+	int err = 0;
 	int i;
 
 	if (!(dev->priv_flags & IFF_EBRIDGE))
 		goto out;
 
-	if (!filter_dev)
-		idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+	if (!filter_dev) {
+		err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
+		if (err < 0)
+			goto out;
+	}
 
 	for (i = 0; i < BR_HASH_SIZE; i++) {
 		struct net_bridge_fdb_entry *f;
 
 		hlist_for_each_entry_rcu(f, &br->hash[i], hlist) {
-			int err;
 
-			if (idx < cb->args[0])
+			if (*idx < cb->args[2])
 				goto skip;
 
 			if (filter_dev &&
@@ -750,34 +753,37 @@
 					    cb->nlh->nlmsg_seq,
 					    RTM_NEWNEIGH,
 					    NLM_F_MULTI);
-			if (err < 0) {
-				cb->args[1] = err;
-				break;
-			}
+			if (err < 0)
+				goto out;
 skip:
-			++idx;
+			*idx += 1;
 		}
 	}
 
 out:
-	return idx;
+	return err;
 }
 
 /* Update (create or replace) forwarding database entry */
-static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
-			 __u16 state, __u16 flags, __u16 vid)
+static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
+			 const __u8 *addr, __u16 state, __u16 flags, __u16 vid)
 {
-	struct net_bridge *br = source->br;
 	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
 	struct net_bridge_fdb_entry *fdb;
 	bool modified = false;
 
 	/* If the port cannot learn allow only local and static entries */
-	if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
+	if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
 	    !(source->state == BR_STATE_LEARNING ||
 	      source->state == BR_STATE_FORWARDING))
 		return -EPERM;
 
+	if (!source && !(state & NUD_PERMANENT)) {
+		pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n",
+			br->dev->name);
+		return -EINVAL;
+	}
+
 	fdb = fdb_find(head, addr, vid);
 	if (fdb == NULL) {
 		if (!(flags & NLM_F_CREATE))
@@ -832,22 +838,28 @@
 	return 0;
 }
 
-static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
-	       const unsigned char *addr, u16 nlh_flags, u16 vid)
+static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
+			struct net_bridge_port *p, const unsigned char *addr,
+			u16 nlh_flags, u16 vid)
 {
 	int err = 0;
 
 	if (ndm->ndm_flags & NTF_USE) {
+		if (!p) {
+			pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n",
+				br->dev->name);
+			return -EINVAL;
+		}
 		local_bh_disable();
 		rcu_read_lock();
-		br_fdb_update(p->br, p, addr, vid, true);
+		br_fdb_update(br, p, addr, vid, true);
 		rcu_read_unlock();
 		local_bh_enable();
 	} else {
-		spin_lock_bh(&p->br->hash_lock);
-		err = fdb_add_entry(p, addr, ndm->ndm_state,
+		spin_lock_bh(&br->hash_lock);
+		err = fdb_add_entry(br, p, addr, ndm->ndm_state,
 				    nlh_flags, vid);
-		spin_unlock_bh(&p->br->hash_lock);
+		spin_unlock_bh(&br->hash_lock);
 	}
 
 	return err;
@@ -884,6 +896,7 @@
 				dev->name);
 			return -EINVAL;
 		}
+		br = p->br;
 		vg = nbp_vlan_group(p);
 	}
 
@@ -895,15 +908,9 @@
 		}
 
 		/* VID was specified, so use it. */
-		if (dev->priv_flags & IFF_EBRIDGE)
-			err = br_fdb_insert(br, NULL, addr, vid);
-		else
-			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+		err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid);
 	} else {
-		if (dev->priv_flags & IFF_EBRIDGE)
-			err = br_fdb_insert(br, NULL, addr, 0);
-		else
-			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+		err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0);
 		if (err || !vg || !vg->num_vlans)
 			goto out;
 
@@ -914,11 +921,7 @@
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			if (!br_vlan_should_use(v))
 				continue;
-			if (dev->priv_flags & IFF_EBRIDGE)
-				err = br_fdb_insert(br, NULL, addr, v->vid);
-			else
-				err = __br_fdb_add(ndm, p, addr, nlh_flags,
-						   v->vid);
+			err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid);
 			if (err)
 				goto out;
 		}
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 63a83d8..7cb41ae 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -29,7 +29,8 @@
 
 	vg = nbp_vlan_group_rcu(p);
 	return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
-		br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
+		br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
+		nbp_switchdev_allowed_egress(p, skb);
 }
 
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -175,7 +176,7 @@
 
 /* called under rcu_read_lock */
 void br_flood(struct net_bridge *br, struct sk_buff *skb,
-	      bool unicast, bool local_rcv, bool local_orig)
+	      enum br_pkt_type pkt_type, bool local_rcv, bool local_orig)
 {
 	u8 igmp_type = br_multicast_igmp_type(skb);
 	struct net_bridge_port *prev = NULL;
@@ -183,7 +184,10 @@
 
 	list_for_each_entry_rcu(p, &br->port_list, list) {
 		/* Do not flood unicast traffic to ports that turn it off */
-		if (unicast && !(p->flags & BR_FLOOD))
+		if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
+			continue;
+		if (pkt_type == BR_PKT_MULTICAST &&
+		    !(p->flags & BR_MCAST_FLOOD))
 			continue;
 
 		/* Do not flood to ports that enable proxy ARP */
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f2fede0..ed0dd33 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -362,7 +362,7 @@
 	p->path_cost = port_cost(dev);
 	p->priority = 0x8000 >> BR_PORT_BITS;
 	p->port_no = index;
-	p->flags = BR_LEARNING | BR_FLOOD;
+	p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD;
 	br_init_port(p);
 	br_set_state(p, BR_STATE_DISABLED);
 	br_stp_port_timer_init(p);
@@ -545,6 +545,10 @@
 	if (err)
 		goto err5;
 
+	err = nbp_switchdev_mark_set(p);
+	if (err)
+		goto err6;
+
 	dev_disable_lro(dev);
 
 	list_add_rcu(&p->list, &br->port_list);
@@ -566,7 +570,7 @@
 	err = nbp_vlan_init(p);
 	if (err) {
 		netdev_err(dev, "failed to initialize vlan filtering on this port\n");
-		goto err6;
+		goto err7;
 	}
 
 	spin_lock_bh(&br->lock);
@@ -589,12 +593,12 @@
 
 	return 0;
 
-err6:
+err7:
 	list_del_rcu(&p->list);
 	br_fdb_delete_by_port(br, p, 0, 1);
 	nbp_update_port_count(br);
+err6:
 	netdev_upper_dev_unlink(dev, br->dev);
-
 err5:
 	dev->priv_flags &= ~IFF_BRIDGE_PORT;
 	netdev_rx_handler_unregister(dev);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8e48620..855b72f 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -80,13 +80,10 @@
 
 	BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
 
-	if (dev->flags & IFF_NOARP)
+	if ((dev->flags & IFF_NOARP) ||
+	    !pskb_may_pull(skb, arp_hdr_len(dev)))
 		return;
 
-	if (!pskb_may_pull(skb, arp_hdr_len(dev))) {
-		dev->stats.tx_dropped++;
-		return;
-	}
 	parp = arp_hdr(skb);
 
 	if (parp->ar_pro != htons(ETH_P_IP) ||
@@ -131,11 +128,12 @@
 /* note: already called with rcu_read_lock */
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	bool local_rcv = false, mcast_hit = false, unicast = true;
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
+	enum br_pkt_type pkt_type = BR_PKT_UNICAST;
 	struct net_bridge_fdb_entry *dst = NULL;
 	struct net_bridge_mdb_entry *mdst;
+	bool local_rcv, mcast_hit = false;
 	struct net_bridge *br;
 	u16 vid = 0;
 
@@ -145,29 +143,36 @@
 	if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
 		goto out;
 
+	nbp_switchdev_frame_mark(p, skb);
+
 	/* insert into forwarding database after filtering to avoid spoofing */
 	br = p->br;
 	if (p->flags & BR_LEARNING)
 		br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
 
-	if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
-	    br_multicast_rcv(br, p, skb, vid))
-		goto drop;
+	local_rcv = !!(br->dev->flags & IFF_PROMISC);
+	if (is_multicast_ether_addr(dest)) {
+		/* by definition the broadcast is also a multicast address */
+		if (is_broadcast_ether_addr(dest)) {
+			pkt_type = BR_PKT_BROADCAST;
+			local_rcv = true;
+		} else {
+			pkt_type = BR_PKT_MULTICAST;
+			if (br_multicast_rcv(br, p, skb, vid))
+				goto drop;
+		}
+	}
 
 	if (p->state == BR_STATE_LEARNING)
 		goto drop;
 
 	BR_INPUT_SKB_CB(skb)->brdev = br->dev;
 
-	local_rcv = !!(br->dev->flags & IFF_PROMISC);
-
 	if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
 		br_do_proxy_arp(skb, br, vid, p);
 
-	if (is_broadcast_ether_addr(dest)) {
-		local_rcv = true;
-		unicast = false;
-	} else if (is_multicast_ether_addr(dest)) {
+	switch (pkt_type) {
+	case BR_PKT_MULTICAST:
 		mdst = br_mdb_get(br, skb, vid);
 		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
 		    br_multicast_querier_exists(br, eth_hdr(skb))) {
@@ -181,18 +186,22 @@
 			local_rcv = true;
 			br->dev->stats.multicast++;
 		}
-		unicast = false;
-	} else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) {
-		/* Do not forward the packet since it's local. */
-		return br_pass_frame_up(skb);
+		break;
+	case BR_PKT_UNICAST:
+		dst = __br_fdb_get(br, dest, vid);
+	default:
+		break;
 	}
 
 	if (dst) {
+		if (dst->is_local)
+			return br_pass_frame_up(skb);
+
 		dst->used = jiffies;
 		br_forward(dst->dst, skb, local_rcv, false);
 	} else {
 		if (!mcast_hit)
-			br_flood(br, skb, unicast, local_rcv, false);
+			br_flood(br, skb, pkt_type, local_rcv, false);
 		else
 			br_multicast_flood(mdst, skb, local_rcv, false);
 	}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a5423a1..c5fea93 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1138,7 +1138,7 @@
 		} else {
 			err = br_ip6_multicast_add_group(br, port,
 							 &grec->grec_mca, vid);
-			if (!err)
+			if (err)
 				break;
 		}
 	}
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 77e7f69..2fe9345 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -30,6 +30,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_arp.h>
 #include <linux/in_route.h>
+#include <linux/rculist.h>
 #include <linux/inetdevice.h>
 
 #include <net/ip.h>
@@ -395,11 +396,10 @@
 				skb->dev = nf_bridge->physindev;
 				nf_bridge_update_protocol(skb);
 				nf_bridge_push_encap_header(skb);
-				NF_HOOK_THRESH(NFPROTO_BRIDGE,
-					       NF_BR_PRE_ROUTING,
-					       net, sk, skb, skb->dev, NULL,
-					       br_nf_pre_routing_finish_bridge,
-					       1);
+				br_nf_hook_thresh(NF_BR_PRE_ROUTING,
+						  net, sk, skb, skb->dev,
+						  NULL,
+						  br_nf_pre_routing_finish);
 				return 0;
 			}
 			ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -417,10 +417,8 @@
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
-		       skb->dev, NULL,
-		       br_handle_frame_finish, 1);
-
+	br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
+			  br_handle_frame_finish);
 	return 0;
 }
 
@@ -992,6 +990,43 @@
 	.notifier_call = brnf_device_event,
 };
 
+/* recursively invokes nf_hook_slow (again), skipping already-called
+ * hooks (< NF_BR_PRI_BRNF).
+ *
+ * Called with rcu read lock held.
+ */
+int br_nf_hook_thresh(unsigned int hook, struct net *net,
+		      struct sock *sk, struct sk_buff *skb,
+		      struct net_device *indev,
+		      struct net_device *outdev,
+		      int (*okfn)(struct net *, struct sock *,
+				  struct sk_buff *))
+{
+	struct nf_hook_entry *elem;
+	struct nf_hook_state state;
+	int ret;
+
+	elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+
+	while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF))
+		elem = rcu_dereference(elem->next);
+
+	if (!elem)
+		return okfn(net, sk, skb);
+
+	/* We may already have this, but read-locks nest anyway */
+	rcu_read_lock();
+	nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1,
+			   NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
+
+	ret = nf_hook_slow(skb, &state);
+	rcu_read_unlock();
+	if (ret == 1)
+		ret = okfn(net, sk, skb);
+
+	return ret;
+}
+
 #ifdef CONFIG_SYSCTL
 static
 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 5e59a84..5989661 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -187,10 +187,9 @@
 			skb->dev = nf_bridge->physindev;
 			nf_bridge_update_protocol(skb);
 			nf_bridge_push_encap_header(skb);
-			NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
-				       net, sk, skb, skb->dev, NULL,
-				       br_nf_pre_routing_finish_bridge,
-				       1);
+			br_nf_hook_thresh(NF_BR_PRE_ROUTING,
+					  net, sk, skb, skb->dev, NULL,
+					  br_nf_pre_routing_finish_bridge);
 			return 0;
 		}
 		ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -207,9 +206,8 @@
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
-		       skb->dev, NULL,
-		       br_handle_frame_finish, 1);
+	br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb,
+			  skb->dev, NULL, br_handle_frame_finish);
 
 	return 0;
 }
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index f2a29e4..e99037c 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -169,10 +169,15 @@
 	    nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) ||
 	    nla_put_u8(skb, IFLA_BRPORT_MODE, mode) ||
 	    nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_PROTECT,
+		       !!(p->flags & BR_ROOT_BLOCK)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE,
+		       !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD,
+		       !!(p->flags & BR_FLOOD)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD,
+		       !!(p->flags & BR_MCAST_FLOOD)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
 		       !!(p->flags & BR_PROXYARP_WIFI)) ||
@@ -630,6 +635,7 @@
 	br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK);
 	br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
 	br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
+	br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD);
 	br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
 	br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
 
@@ -1245,14 +1251,30 @@
 	return 0;
 }
 
-static size_t bridge_get_linkxstats_size(const struct net_device *dev)
+static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
 {
-	struct net_bridge *br = netdev_priv(dev);
+	struct net_bridge_port *p = NULL;
 	struct net_bridge_vlan_group *vg;
 	struct net_bridge_vlan *v;
+	struct net_bridge *br;
 	int numvls = 0;
 
-	vg = br_vlan_group(br);
+	switch (attr) {
+	case IFLA_STATS_LINK_XSTATS:
+		br = netdev_priv(dev);
+		vg = br_vlan_group(br);
+		break;
+	case IFLA_STATS_LINK_XSTATS_SLAVE:
+		p = br_port_get_rtnl(dev);
+		if (!p)
+			return 0;
+		br = p->br;
+		vg = nbp_vlan_group(p);
+		break;
+	default:
+		return 0;
+	}
+
 	if (vg) {
 		/* we need to count all, even placeholder entries */
 		list_for_each_entry(v, &vg->vlan_list, vlist)
@@ -1264,45 +1286,42 @@
 	       nla_total_size(0);
 }
 
-static size_t brport_get_linkxstats_size(const struct net_device *dev)
+static int br_fill_linkxstats(struct sk_buff *skb,
+			      const struct net_device *dev,
+			      int *prividx, int attr)
 {
-	return nla_total_size(sizeof(struct br_mcast_stats)) +
-	       nla_total_size(0);
-}
-
-static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
-{
-	size_t retsize = 0;
+	struct nlattr *nla __maybe_unused;
+	struct net_bridge_port *p = NULL;
+	struct net_bridge_vlan_group *vg;
+	struct net_bridge_vlan *v;
+	struct net_bridge *br;
+	struct nlattr *nest;
+	int vl_idx = 0;
 
 	switch (attr) {
 	case IFLA_STATS_LINK_XSTATS:
-		retsize = bridge_get_linkxstats_size(dev);
+		br = netdev_priv(dev);
+		vg = br_vlan_group(br);
 		break;
 	case IFLA_STATS_LINK_XSTATS_SLAVE:
-		retsize = brport_get_linkxstats_size(dev);
+		p = br_port_get_rtnl(dev);
+		if (!p)
+			return 0;
+		br = p->br;
+		vg = nbp_vlan_group(p);
 		break;
+	default:
+		return -EINVAL;
 	}
 
-	return retsize;
-}
-
-static int bridge_fill_linkxstats(struct sk_buff *skb,
-				  const struct net_device *dev,
-				  int *prividx)
-{
-	struct net_bridge *br = netdev_priv(dev);
-	struct nlattr *nla __maybe_unused;
-	struct net_bridge_vlan_group *vg;
-	struct net_bridge_vlan *v;
-	struct nlattr *nest;
-	int vl_idx = 0;
-
 	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
 	if (!nest)
 		return -EMSGSIZE;
 
-	vg = br_vlan_group(br);
 	if (vg) {
+		u16 pvid;
+
+		pvid = br_get_pvid(vg);
 		list_for_each_entry(v, &vg->vlan_list, vlist) {
 			struct bridge_vlan_xstats vxi;
 			struct br_vlan_stats stats;
@@ -1311,6 +1330,9 @@
 				continue;
 			memset(&vxi, 0, sizeof(vxi));
 			vxi.vid = v->vid;
+			vxi.flags = v->flags;
+			if (v->vid == pvid)
+				vxi.flags |= BRIDGE_VLAN_INFO_PVID;
 			br_vlan_get_stats(v, &stats);
 			vxi.rx_bytes = stats.rx_bytes;
 			vxi.rx_packets = stats.rx_packets;
@@ -1329,7 +1351,7 @@
 					BRIDGE_XSTATS_PAD);
 		if (!nla)
 			goto nla_put_failure;
-		br_multicast_get_stats(br, NULL, nla_data(nla));
+		br_multicast_get_stats(br, p, nla_data(nla));
 	}
 #endif
 	nla_nest_end(skb, nest);
@@ -1344,52 +1366,6 @@
 	return -EMSGSIZE;
 }
 
-static int brport_fill_linkxstats(struct sk_buff *skb,
-				  const struct net_device *dev,
-				  int *prividx)
-{
-	struct net_bridge_port *p = br_port_get_rtnl(dev);
-	struct nlattr *nla __maybe_unused;
-	struct nlattr *nest;
-
-	if (!p)
-		return 0;
-
-	nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE);
-	if (!nest)
-		return -EMSGSIZE;
-#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
-	nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST,
-				sizeof(struct br_mcast_stats),
-				BRIDGE_XSTATS_PAD);
-	if (!nla) {
-		nla_nest_end(skb, nest);
-		return -EMSGSIZE;
-	}
-	br_multicast_get_stats(p->br, p, nla_data(nla));
-#endif
-	nla_nest_end(skb, nest);
-
-	return 0;
-}
-
-static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev,
-			      int *prividx, int attr)
-{
-	int ret = -EINVAL;
-
-	switch (attr) {
-	case IFLA_STATS_LINK_XSTATS:
-		ret = bridge_fill_linkxstats(skb, dev, prividx);
-		break;
-	case IFLA_STATS_LINK_XSTATS_SLAVE:
-		ret = brport_fill_linkxstats(skb, dev, prividx);
-		break;
-	}
-
-	return ret;
-}
-
 static struct rtnl_af_ops br_af_ops __read_mostly = {
 	.family			= AF_BRIDGE,
 	.get_link_af_size	= br_get_link_af_size_filtered,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aac2a6e..1b63177 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -251,6 +251,9 @@
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	struct net_bridge_vlan_group	__rcu *vlgrp;
 #endif
+#ifdef CONFIG_NET_SWITCHDEV
+	int				offload_fwd_mark;
+#endif
 };
 
 #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -359,6 +362,11 @@
 	struct timer_list		gc_timer;
 	struct kobject			*ifobj;
 	u32				auto_cnt;
+
+#ifdef CONFIG_NET_SWITCHDEV
+	int offload_fwd_mark;
+#endif
+
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	struct net_bridge_vlan_group	__rcu *vlgrp;
 	u8				vlan_enabled;
@@ -381,6 +389,10 @@
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	bool vlan_filtered;
 #endif
+
+#ifdef CONFIG_NET_SWITCHDEV
+	int offload_fwd_mark;
+#endif
 };
 
 #define BR_INPUT_SKB_CB(__skb)	((struct br_input_skb_cb *)(__skb)->cb)
@@ -496,7 +508,7 @@
 int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
 	       const unsigned char *addr, u16 vid, u16 nlh_flags);
 int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
-		struct net_device *dev, struct net_device *fdev, int idx);
+		struct net_device *dev, struct net_device *fdev, int *idx);
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
 int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
@@ -505,12 +517,17 @@
 			      const unsigned char *addr, u16 vid);
 
 /* br_forward.c */
+enum br_pkt_type {
+	BR_PKT_UNICAST,
+	BR_PKT_MULTICAST,
+	BR_PKT_BROADCAST
+};
 int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_forward(const struct net_bridge_port *to, struct sk_buff *skb,
 		bool local_rcv, bool local_orig);
 int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_flood(struct net_bridge *br, struct sk_buff *skb,
-	      bool unicast, bool local_rcv, bool local_orig);
+	      enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
 
 /* br_if.c */
 void br_port_carrier_check(struct net_bridge_port *p);
@@ -1034,4 +1051,29 @@
 static inline void br_sysfs_delbr(struct net_device *dev) { return; }
 #endif /* CONFIG_SYSFS */
 
+/* br_switchdev.c */
+#ifdef CONFIG_NET_SWITCHDEV
+int nbp_switchdev_mark_set(struct net_bridge_port *p);
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+			      struct sk_buff *skb);
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+				  const struct sk_buff *skb);
+#else
+static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+	return 0;
+}
+
+static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+					    struct sk_buff *skb)
+{
+}
+
+static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+						const struct sk_buff *skb)
+{
+	return true;
+}
+#endif /* CONFIG_NET_SWITCHDEV */
+
 #endif
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 341caa0..d8ad73b 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -134,17 +134,36 @@
 		br_become_root_bridge(br);
 }
 
+static int br_stp_call_user(struct net_bridge *br, char *arg)
+{
+	char *argv[] = { BR_STP_PROG, br->dev->name, arg, NULL };
+	char *envp[] = { NULL };
+	int rc;
+
+	/* call userspace STP and report program errors */
+	rc = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+	if (rc > 0) {
+		if (rc & 0xff)
+			br_debug(br, BR_STP_PROG " received signal %d\n",
+				 rc & 0x7f);
+		else
+			br_debug(br, BR_STP_PROG " exited with code %d\n",
+				 (rc >> 8) & 0xff);
+	}
+
+	return rc;
+}
+
 static void br_stp_start(struct net_bridge *br)
 {
-	int r;
-	char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
-	char *envp[] = { NULL };
 	struct net_bridge_port *p;
+	int err = -ENOENT;
 
 	if (net_eq(dev_net(br->dev), &init_net))
-		r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
-	else
-		r = -ENOENT;
+		err = br_stp_call_user(br, "start");
+
+	if (err && err != -ENOENT)
+		br_err(br, "failed to start userspace STP (%d)\n", err);
 
 	spin_lock_bh(&br->lock);
 
@@ -153,9 +172,10 @@
 	else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY)
 		__br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
 
-	if (r == 0) {
+	if (!err) {
 		br->stp_enabled = BR_USER_STP;
 		br_debug(br, "userspace STP started\n");
+
 		/* Stop hello and hold timers */
 		del_timer(&br->hello_timer);
 		list_for_each_entry(p, &br->port_list, list)
@@ -173,14 +193,13 @@
 
 static void br_stp_stop(struct net_bridge *br)
 {
-	int r;
-	char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL };
-	char *envp[] = { NULL };
 	struct net_bridge_port *p;
+	int err;
 
 	if (br->stp_enabled == BR_USER_STP) {
-		r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
-		br_info(br, "userspace STP stopped, return code %d\n", r);
+		err = br_stp_call_user(br, "stop");
+		if (err)
+			br_err(br, "failed to stop userspace STP (%d)\n", err);
 
 		/* To start timers on any ports left in blocking */
 		mod_timer(&br->hello_timer, jiffies + br->hello_time);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
new file mode 100644
index 0000000..f4097b9
--- /dev/null
+++ b/net/bridge/br_switchdev.c
@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/switchdev.h>
+
+#include "br_private.h"
+
+static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
+{
+	struct net_bridge_port *p;
+
+	/* dev is yet to be added to the port list. */
+	list_for_each_entry(p, &br->port_list, list) {
+		if (switchdev_port_same_parent_id(dev, p->dev))
+			return p->offload_fwd_mark;
+	}
+
+	return ++br->offload_fwd_mark;
+}
+
+int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+	struct switchdev_attr attr = {
+		.orig_dev = p->dev,
+		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+	};
+	int err;
+
+	ASSERT_RTNL();
+
+	err = switchdev_port_attr_get(p->dev, &attr);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+
+	return 0;
+}
+
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+			      struct sk_buff *skb)
+{
+	if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
+		BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+}
+
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+				  const struct sk_buff *skb)
+{
+	return !skb->offload_fwd_mark ||
+	       BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+}
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 1e04d4d..e657258 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -171,6 +171,7 @@
 BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
 BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
 BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
+BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 152300d..9a11086 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -91,7 +91,7 @@
 	if (loginfo->type == NF_LOG_TYPE_LOG)
 		bitmask = loginfo->u.log.logflags;
 	else
-		bitmask = NF_LOG_MASK;
+		bitmask = NF_LOG_DEFAULT_MASK;
 
 	if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
 	   htons(ETH_P_IP)) {
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 20396499..2e7c4f9 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -24,7 +24,7 @@
 		return EBT_DROP;
 
 	if (par->hooknum != NF_BR_BROUTING)
-		/* rcu_read_lock()ed by nf_hook_slow */
+		/* rcu_read_lock()ed by nf_hook_thresh */
 		ether_addr_copy(eth_hdr(skb)->h_dest,
 				br_port_get_rcu(par->in)->br->dev->dev_addr);
 	else
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index cceac5b..f5c11bb 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -146,7 +146,7 @@
 		return 1;
 	if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out)))
 		return 1;
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	if (in && (p = br_port_get_rcu(in)) != NULL &&
 	    NF_INVF(e, EBT_ILOGICALIN,
 		    ebt_dev_check(e->logical_in, p->br->dev)))
@@ -368,6 +368,8 @@
 
 	match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
 	if (IS_ERR(match) || match->family != NFPROTO_BRIDGE) {
+		if (!IS_ERR(match))
+			module_put(match->me);
 		request_module("ebt_%s", m->u.name);
 		match = xt_find_match(NFPROTO_BRIDGE, m->u.name, 0);
 	}
diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c
index 5d9953a..1663df5 100644
--- a/net/bridge/netfilter/nf_log_bridge.c
+++ b/net/bridge/netfilter/nf_log_bridge.c
@@ -50,8 +50,7 @@
 
 static int __net_init nf_log_bridge_net_init(struct net *net)
 {
-	nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
-	return 0;
+	return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger);
 }
 
 static void __net_exit nf_log_bridge_net_exit(struct net *net)
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index a78c4e2..97afdc0 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -13,79 +13,11 @@
 #include <linux/module.h>
 #include <linux/netfilter_bridge.h>
 #include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_bridge.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <net/netfilter/nf_tables_ipv4.h>
 #include <net/netfilter/nf_tables_ipv6.h>
 
-int nft_bridge_iphdr_validate(struct sk_buff *skb)
-{
-	struct iphdr *iph;
-	u32 len;
-
-	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
-		return 0;
-
-	iph = ip_hdr(skb);
-	if (iph->ihl < 5 || iph->version != 4)
-		return 0;
-
-	len = ntohs(iph->tot_len);
-	if (skb->len < len)
-		return 0;
-	else if (len < (iph->ihl*4))
-		return 0;
-
-	if (!pskb_may_pull(skb, iph->ihl*4))
-		return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(nft_bridge_iphdr_validate);
-
-int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
-{
-	struct ipv6hdr *hdr;
-	u32 pkt_len;
-
-	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
-		return 0;
-
-	hdr = ipv6_hdr(skb);
-	if (hdr->version != 6)
-		return 0;
-
-	pkt_len = ntohs(hdr->payload_len);
-	if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
-		return 0;
-
-	return 1;
-}
-EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate);
-
-static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
-					       struct sk_buff *skb,
-					       const struct nf_hook_state *state)
-{
-	if (nft_bridge_iphdr_validate(skb))
-		nft_set_pktinfo_ipv4(pkt, skb, state);
-	else
-		nft_set_pktinfo(pkt, skb, state);
-}
-
-static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-					       struct sk_buff *skb,
-					       const struct nf_hook_state *state)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	if (nft_bridge_ip6hdr_validate(skb) &&
-	    nft_set_pktinfo_ipv6(pkt, skb, state) == 0)
-		return;
-#endif
-	nft_set_pktinfo(pkt, skb, state);
-}
-
 static unsigned int
 nft_do_chain_bridge(void *priv,
 		    struct sk_buff *skb,
@@ -95,13 +27,13 @@
 
 	switch (eth_hdr(skb)->h_proto) {
 	case htons(ETH_P_IP):
-		nft_bridge_set_pktinfo_ipv4(&pkt, skb, state);
+		nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
 		break;
 	case htons(ETH_P_IPV6):
-		nft_bridge_set_pktinfo_ipv6(&pkt, skb, state);
+		nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
 		break;
 	default:
-		nft_set_pktinfo(&pkt, skb, state);
+		nft_set_pktinfo_unspec(&pkt, skb, state);
 		break;
 	}
 
@@ -207,12 +139,20 @@
 	int ret;
 
 	nf_register_afinfo(&nf_br_afinfo);
-	nft_register_chain_type(&filter_bridge);
+	ret = nft_register_chain_type(&filter_bridge);
+	if (ret < 0)
+		goto err1;
+
 	ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
-	if (ret < 0) {
-		nft_unregister_chain_type(&filter_bridge);
-		nf_unregister_afinfo(&nf_br_afinfo);
-	}
+	if (ret < 0)
+		goto err2;
+
+	return ret;
+
+err2:
+	nft_unregister_chain_type(&filter_bridge);
+err1:
+	nf_unregister_afinfo(&nf_br_afinfo);
 	return ret;
 }
 
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 4b901d9..ad47a92 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -86,6 +86,7 @@
 	.init		= nft_meta_set_init,
 	.destroy	= nft_meta_set_destroy,
 	.dump		= nft_meta_set_dump,
+	.validate	= nft_meta_set_validate,
 };
 
 static const struct nft_expr_ops *
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 0b77ffb..4b3df6b 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -14,7 +14,6 @@
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nft_reject.h>
-#include <net/netfilter/nf_tables_bridge.h>
 #include <net/netfilter/ipv4/nf_reject.h>
 #include <net/netfilter/ipv6/nf_reject.h>
 #include <linux/ip.h>
@@ -37,6 +36,30 @@
 	skb_pull(nskb, ETH_HLEN);
 }
 
+static int nft_bridge_iphdr_validate(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	u32 len;
+
+	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+		return 0;
+
+	iph = ip_hdr(skb);
+	if (iph->ihl < 5 || iph->version != 4)
+		return 0;
+
+	len = ntohs(iph->tot_len);
+	if (skb->len < len)
+		return 0;
+	else if (len < (iph->ihl*4))
+		return 0;
+
+	if (!pskb_may_pull(skb, iph->ihl*4))
+		return 0;
+
+	return 1;
+}
+
 /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
  * or the bridge port (NF_BRIDGE PREROUTING).
  */
@@ -143,6 +166,25 @@
 	br_forward(br_port_get_rcu(dev), nskb, false, true);
 }
 
+static int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
+{
+	struct ipv6hdr *hdr;
+	u32 pkt_len;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		return 0;
+
+	hdr = ipv6_hdr(skb);
+	if (hdr->version != 6)
+		return 0;
+
+	pkt_len = ntohs(hdr->payload_len);
+	if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+		return 0;
+
+	return 1;
+}
+
 static void nft_reject_br_send_v6_tcp_reset(struct net *net,
 					    struct sk_buff *oldskb,
 					    const struct net_device *dev,
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index c83326c..ef34a02 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -574,7 +574,7 @@
 	put_generic_request(req);
 }
 
-void cancel_generic_request(struct ceph_mon_generic_request *req)
+static void cancel_generic_request(struct ceph_mon_generic_request *req)
 {
 	struct ceph_mon_client *monc = req->monc;
 	struct ceph_mon_generic_request *lookup_req;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b5ec096..a97e7b5 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -4220,7 +4220,7 @@
 
 		pages = ceph_alloc_page_vector(calc_pages_for(0, data_len),
 					       GFP_NOIO);
-		if (!pages) {
+		if (IS_ERR(pages)) {
 			ceph_msg_put(m);
 			return NULL;
 		}
diff --git a/net/ceph/string_table.c b/net/ceph/string_table.c
index ca53c83..22fb96e 100644
--- a/net/ceph/string_table.c
+++ b/net/ceph/string_table.c
@@ -84,12 +84,6 @@
 }
 EXPORT_SYMBOL(ceph_find_or_create_string);
 
-static void ceph_free_string(struct rcu_head *head)
-{
-	struct ceph_string *cs = container_of(head, struct ceph_string, rcu);
-	kfree(cs);
-}
-
 void ceph_release_string(struct kref *ref)
 {
 	struct ceph_string *cs = container_of(ref, struct ceph_string, kref);
@@ -101,7 +95,7 @@
 	}
 	spin_unlock(&string_tree_lock);
 
-	call_rcu(&cs->rcu, ceph_free_string);
+	kfree_rcu(cs, rcu);
 }
 EXPORT_SYMBOL(ceph_release_string);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 4ce07dc..f1fe26f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3355,16 +3355,6 @@
 	else
 		skb_dst_force(skb);
 
-#ifdef CONFIG_NET_SWITCHDEV
-	/* Don't forward if offload device already forwarded */
-	if (skb->offload_fwd_mark &&
-	    skb->offload_fwd_mark == dev->offload_fwd_mark) {
-		consume_skb(skb);
-		rc = NET_XMIT_SUCCESS;
-		goto out;
-	}
-#endif
-
 	txq = netdev_pick_tx(dev, skb, accel_priv);
 	q = rcu_dereference_bh(txq->qdisc);
 
@@ -3914,8 +3904,7 @@
 	}
 }
 
-#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
-    (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
+#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
 /* This hook is defined here for ATM LANE */
 int (*br_fdb_test_addr_hook)(struct net_device *dev,
 			     unsigned char *addr) __read_mostly;
@@ -3975,6 +3964,22 @@
 }
 
 /**
+ *	netdev_is_rx_handler_busy - check if receive handler is registered
+ *	@dev: device to check
+ *
+ *	Check if a receive handler is already registered for a given device.
+ *	Return true if there one.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+bool netdev_is_rx_handler_busy(struct net_device *dev)
+{
+	ASSERT_RTNL();
+	return dev && rtnl_dereference(dev->rx_handler);
+}
+EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
+
+/**
  *	netdev_rx_handler_register - register receive handler
  *	@dev: device to register a handler for
  *	@rx_handler: receive handler to register
@@ -4050,12 +4055,17 @@
 {
 #ifdef CONFIG_NETFILTER_INGRESS
 	if (nf_hook_ingress_active(skb)) {
+		int ingress_retval;
+
 		if (*pt_prev) {
 			*ret = deliver_skb(skb, *pt_prev, orig_dev);
 			*pt_prev = NULL;
 		}
 
-		return nf_hook_ingress(skb);
+		rcu_read_lock();
+		ingress_retval = nf_hook_ingress(skb);
+		rcu_read_unlock();
+		return ingress_retval;
 	}
 #endif /* CONFIG_NETFILTER_INGRESS */
 	return 0;
@@ -4292,32 +4302,53 @@
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-/* Network device is going away, flush any packets still pending
- * Called with irqs disabled.
- */
-static void flush_backlog(void *arg)
-{
-	struct net_device *dev = arg;
-	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
-	struct sk_buff *skb, *tmp;
+DEFINE_PER_CPU(struct work_struct, flush_works);
 
+/* Network device is going away, flush any packets still pending */
+static void flush_backlog(struct work_struct *work)
+{
+	struct sk_buff *skb, *tmp;
+	struct softnet_data *sd;
+
+	local_bh_disable();
+	sd = this_cpu_ptr(&softnet_data);
+
+	local_irq_disable();
 	rps_lock(sd);
 	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
-		if (skb->dev == dev) {
+		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
 		}
 	}
 	rps_unlock(sd);
+	local_irq_enable();
 
 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
-		if (skb->dev == dev) {
+		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
 			__skb_unlink(skb, &sd->process_queue);
 			kfree_skb(skb);
 			input_queue_head_incr(sd);
 		}
 	}
+	local_bh_enable();
+}
+
+static void flush_all_backlogs(void)
+{
+	unsigned int cpu;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu)
+		queue_work_on(cpu, system_highpri_wq,
+			      per_cpu_ptr(&flush_works, cpu));
+
+	for_each_online_cpu(cpu)
+		flush_work(per_cpu_ptr(&flush_works, cpu));
+
+	put_online_cpus();
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -4805,8 +4836,9 @@
 
 static int process_backlog(struct napi_struct *napi, int quota)
 {
-	int work = 0;
 	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
+	bool again = true;
+	int work = 0;
 
 	/* Check if we have pending ipi, its better to send them now,
 	 * not waiting net_rx_action() end.
@@ -4817,23 +4849,20 @@
 	}
 
 	napi->weight = weight_p;
-	local_irq_disable();
-	while (1) {
+	while (again) {
 		struct sk_buff *skb;
 
 		while ((skb = __skb_dequeue(&sd->process_queue))) {
 			rcu_read_lock();
-			local_irq_enable();
 			__netif_receive_skb(skb);
 			rcu_read_unlock();
-			local_irq_disable();
 			input_queue_head_incr(sd);
-			if (++work >= quota) {
-				local_irq_enable();
+			if (++work >= quota)
 				return work;
-			}
+
 		}
 
+		local_irq_disable();
 		rps_lock(sd);
 		if (skb_queue_empty(&sd->input_pkt_queue)) {
 			/*
@@ -4845,16 +4874,14 @@
 			 * and we dont need an smp_mb() memory barrier.
 			 */
 			napi->state = 0;
-			rps_unlock(sd);
-
-			break;
+			again = false;
+		} else {
+			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+						   &sd->process_queue);
 		}
-
-		skb_queue_splice_tail_init(&sd->input_pkt_queue,
-					   &sd->process_queue);
 		rps_unlock(sd);
+		local_irq_enable();
 	}
-	local_irq_enable();
 
 	return work;
 }
@@ -5562,6 +5589,7 @@
 
 static int __netdev_adjacent_dev_insert(struct net_device *dev,
 					struct net_device *adj_dev,
+					u16 ref_nr,
 					struct list_head *dev_list,
 					void *private, bool master)
 {
@@ -5571,7 +5599,7 @@
 	adj = __netdev_find_adj(adj_dev, dev_list);
 
 	if (adj) {
-		adj->ref_nr++;
+		adj->ref_nr += ref_nr;
 		return 0;
 	}
 
@@ -5581,7 +5609,7 @@
 
 	adj->dev = adj_dev;
 	adj->master = master;
-	adj->ref_nr = 1;
+	adj->ref_nr = ref_nr;
 	adj->private = private;
 	dev_hold(adj_dev);
 
@@ -5620,6 +5648,7 @@
 
 static void __netdev_adjacent_dev_remove(struct net_device *dev,
 					 struct net_device *adj_dev,
+					 u16 ref_nr,
 					 struct list_head *dev_list)
 {
 	struct netdev_adjacent *adj;
@@ -5632,10 +5661,10 @@
 		BUG();
 	}
 
-	if (adj->ref_nr > 1) {
-		pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
-			 adj->ref_nr-1);
-		adj->ref_nr--;
+	if (adj->ref_nr > ref_nr) {
+		pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name,
+			 ref_nr, adj->ref_nr-ref_nr);
+		adj->ref_nr -= ref_nr;
 		return;
 	}
 
@@ -5654,21 +5683,22 @@
 
 static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
 					    struct net_device *upper_dev,
+					    u16 ref_nr,
 					    struct list_head *up_list,
 					    struct list_head *down_list,
 					    void *private, bool master)
 {
 	int ret;
 
-	ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
-					   master);
+	ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list,
+					   private, master);
 	if (ret)
 		return ret;
 
-	ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
-					   false);
+	ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list,
+					   private, false);
 	if (ret) {
-		__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
+		__netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
 		return ret;
 	}
 
@@ -5676,9 +5706,10 @@
 }
 
 static int __netdev_adjacent_dev_link(struct net_device *dev,
-				      struct net_device *upper_dev)
+				      struct net_device *upper_dev,
+				      u16 ref_nr)
 {
-	return __netdev_adjacent_dev_link_lists(dev, upper_dev,
+	return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr,
 						&dev->all_adj_list.upper,
 						&upper_dev->all_adj_list.lower,
 						NULL, false);
@@ -5686,17 +5717,19 @@
 
 static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
 					       struct net_device *upper_dev,
+					       u16 ref_nr,
 					       struct list_head *up_list,
 					       struct list_head *down_list)
 {
-	__netdev_adjacent_dev_remove(dev, upper_dev, up_list);
-	__netdev_adjacent_dev_remove(upper_dev, dev, down_list);
+	__netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
+	__netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
 }
 
 static void __netdev_adjacent_dev_unlink(struct net_device *dev,
-					 struct net_device *upper_dev)
+					 struct net_device *upper_dev,
+					 u16 ref_nr)
 {
-	__netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+	__netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr,
 					   &dev->all_adj_list.upper,
 					   &upper_dev->all_adj_list.lower);
 }
@@ -5705,17 +5738,17 @@
 						struct net_device *upper_dev,
 						void *private, bool master)
 {
-	int ret = __netdev_adjacent_dev_link(dev, upper_dev);
+	int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1);
 
 	if (ret)
 		return ret;
 
-	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
+	ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1,
 					       &dev->adj_list.upper,
 					       &upper_dev->adj_list.lower,
 					       private, master);
 	if (ret) {
-		__netdev_adjacent_dev_unlink(dev, upper_dev);
+		__netdev_adjacent_dev_unlink(dev, upper_dev, 1);
 		return ret;
 	}
 
@@ -5725,8 +5758,8 @@
 static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
 						   struct net_device *upper_dev)
 {
-	__netdev_adjacent_dev_unlink(dev, upper_dev);
-	__netdev_adjacent_dev_unlink_lists(dev, upper_dev,
+	__netdev_adjacent_dev_unlink(dev, upper_dev, 1);
+	__netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
 					   &dev->adj_list.upper,
 					   &upper_dev->adj_list.lower);
 }
@@ -5779,7 +5812,7 @@
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
 			pr_debug("Interlinking %s with %s, non-neighbour\n",
 				 i->dev->name, j->dev->name);
-			ret = __netdev_adjacent_dev_link(i->dev, j->dev);
+			ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr);
 			if (ret)
 				goto rollback_mesh;
 		}
@@ -5789,7 +5822,7 @@
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
 		pr_debug("linking %s's upper device %s with %s\n",
 			 upper_dev->name, i->dev->name, dev->name);
-		ret = __netdev_adjacent_dev_link(dev, i->dev);
+		ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr);
 		if (ret)
 			goto rollback_upper_mesh;
 	}
@@ -5798,7 +5831,7 @@
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		pr_debug("linking %s's lower device %s with %s\n", dev->name,
 			 i->dev->name, upper_dev->name);
-		ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
+		ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr);
 		if (ret)
 			goto rollback_lower_mesh;
 	}
@@ -5816,7 +5849,7 @@
 	list_for_each_entry(i, &dev->all_adj_list.lower, list) {
 		if (i == to_i)
 			break;
-		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
+		__netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
 	}
 
 	i = NULL;
@@ -5826,7 +5859,7 @@
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
 		if (i == to_i)
 			break;
-		__netdev_adjacent_dev_unlink(dev, i->dev);
+		__netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
 	}
 
 	i = j = NULL;
@@ -5838,7 +5871,7 @@
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
 			if (i == to_i && j == to_j)
 				break;
-			__netdev_adjacent_dev_unlink(i->dev, j->dev);
+			__netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
 		}
 		if (i == to_i)
 			break;
@@ -5918,16 +5951,16 @@
 	 */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list)
 		list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
-			__netdev_adjacent_dev_unlink(i->dev, j->dev);
+			__netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
 
 	/* remove also the devices itself from lower/upper device
 	 * list
 	 */
 	list_for_each_entry(i, &dev->all_adj_list.lower, list)
-		__netdev_adjacent_dev_unlink(i->dev, upper_dev);
+		__netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
 
 	list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
-		__netdev_adjacent_dev_unlink(dev, i->dev);
+		__netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
 
 	call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
 				      &changeupper_info.info);
@@ -6045,8 +6078,7 @@
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 
 
-int dev_get_nest_level(struct net_device *dev,
-		       bool (*type_check)(const struct net_device *dev))
+int dev_get_nest_level(struct net_device *dev)
 {
 	struct net_device *lower = NULL;
 	struct list_head *iter;
@@ -6056,15 +6088,12 @@
 	ASSERT_RTNL();
 
 	netdev_for_each_lower_dev(dev, lower, iter) {
-		nest = dev_get_nest_level(lower, type_check);
+		nest = dev_get_nest_level(lower);
 		if (max_nest < nest)
 			max_nest = nest;
 	}
 
-	if (type_check(dev))
-		max_nest++;
-
-	return max_nest;
+	return max_nest + 1;
 }
 EXPORT_SYMBOL(dev_get_nest_level);
 
@@ -6711,8 +6740,8 @@
 		unlist_netdevice(dev);
 
 		dev->reg_state = NETREG_UNREGISTERING;
-		on_each_cpu(flush_backlog, dev, 1);
 	}
+	flush_all_backlogs();
 
 	synchronize_net();
 
@@ -7629,6 +7658,9 @@
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
 	INIT_LIST_HEAD(&dev->ptype_all);
 	INIT_LIST_HEAD(&dev->ptype_specific);
+#ifdef CONFIG_NET_SCHED
+	hash_init(dev->qdisc_hash);
+#endif
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
@@ -8274,8 +8306,11 @@
 	 */
 
 	for_each_possible_cpu(i) {
+		struct work_struct *flush = per_cpu_ptr(&flush_works, i);
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
+		INIT_WORK(flush, flush_backlog);
+
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 		INIT_LIST_HEAD(&sd->poll_list);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d6b3b57..72cfb0c 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -105,7 +105,7 @@
 	return skb;
 }
 
-static struct genl_multicast_group dropmon_mcgrps[] = {
+static const struct genl_multicast_group dropmon_mcgrps[] = {
 	{ .name = "events", },
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 5708999..00351cd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -94,14 +94,13 @@
 }
 EXPORT_SYMBOL(sk_filter_trim_cap);
 
-static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb)
 {
-	return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+	return skb_get_poff(skb);
 }
 
-static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
 {
-	struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
 	struct nlattr *nla;
 
 	if (skb_is_nonlinear(skb))
@@ -120,9 +119,8 @@
 	return 0;
 }
 
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
 {
-	struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx;
 	struct nlattr *nla;
 
 	if (skb_is_nonlinear(skb))
@@ -145,7 +143,7 @@
 	return 0;
 }
 
-static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
+BPF_CALL_0(__get_raw_cpu_id)
 {
 	return raw_smp_processor_id();
 }
@@ -233,9 +231,8 @@
 	case SKF_AD_OFF + SKF_AD_HATYPE:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
-		BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
 
-		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
 				      BPF_REG_TMP, BPF_REG_CTX,
 				      offsetof(struct sk_buff, dev));
 		/* if (tmp != 0) goto pc + 1 */
@@ -1350,61 +1347,58 @@
 
 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
 
+static inline int __bpf_try_make_writable(struct sk_buff *skb,
+					  unsigned int write_len)
+{
+	return skb_ensure_writable(skb, write_len);
+}
+
 static inline int bpf_try_make_writable(struct sk_buff *skb,
 					unsigned int write_len)
 {
-	int err;
+	int err = __bpf_try_make_writable(skb, write_len);
 
-	if (!skb_cloned(skb))
-		return 0;
-	if (skb_clone_writable(skb, write_len))
-		return 0;
-	err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-	if (!err)
-		bpf_compute_data_end(skb);
+	bpf_compute_data_end(skb);
 	return err;
 }
 
-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
+static int bpf_try_make_head_writable(struct sk_buff *skb)
 {
-	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	int offset = (int) r2;
-	void *from = (void *) (long) r3;
-	unsigned int len = (unsigned int) r4;
+	return bpf_try_make_writable(skb, skb_headlen(skb));
+}
+
+static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
+{
+	if (skb_at_tc_ingress(skb))
+		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
+static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
+{
+	if (skb_at_tc_ingress(skb))
+		skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
+}
+
+BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
+	   const void *, from, u32, len, u64, flags)
+{
 	void *ptr;
 
 	if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
 		return -EINVAL;
-
-	/* bpf verifier guarantees that:
-	 * 'from' pointer points to bpf program stack
-	 * 'len' bytes of it were initialized
-	 * 'len' > 0
-	 * 'skb' is a valid pointer to 'struct sk_buff'
-	 *
-	 * so check for invalid 'offset' and too large 'len'
-	 */
-	if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff)))
+	if (unlikely(offset > 0xffff))
 		return -EFAULT;
 	if (unlikely(bpf_try_make_writable(skb, offset + len)))
 		return -EFAULT;
 
-	ptr = skb_header_pointer(skb, offset, len, sp->buff);
-	if (unlikely(!ptr))
-		return -EFAULT;
-
+	ptr = skb->data + offset;
 	if (flags & BPF_F_RECOMPUTE_CSUM)
-		skb_postpull_rcsum(skb, ptr, len);
+		__skb_postpull_rcsum(skb, ptr, len, offset);
 
 	memcpy(ptr, from, len);
 
-	if (ptr == sp->buff)
-		/* skb_store_bits cannot return -EFAULT here */
-		skb_store_bits(skb, offset, ptr, len);
-
 	if (flags & BPF_F_RECOMPUTE_CSUM)
-		skb_postpush_rcsum(skb, ptr, len);
+		__skb_postpush_rcsum(skb, ptr, len, offset);
 	if (flags & BPF_F_INVALIDATE_HASH)
 		skb_clear_hash(skb);
 
@@ -1422,15 +1416,12 @@
 	.arg5_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
+	   void *, to, u32, len)
 {
-	const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1;
-	int offset = (int) r2;
-	void *to = (void *)(unsigned long) r3;
-	unsigned int len = (unsigned int) r4;
 	void *ptr;
 
-	if (unlikely((u32) offset > 0xffff))
+	if (unlikely(offset > 0xffff))
 		goto err_clear;
 
 	ptr = skb_header_pointer(skb, offset, len, to);
@@ -1455,23 +1446,41 @@
 	.arg4_type	= ARG_CONST_STACK_SIZE,
 };
 
-static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	int offset = (int) r2;
-	__sum16 sum, *ptr;
+	/* Idea is the following: should the needed direct read/write
+	 * test fail during runtime, we can pull in more data and redo
+	 * again, since implicitly, we invalidate previous checks here.
+	 *
+	 * Or, since we know how much we need to make read/writeable,
+	 * this can be done once at the program beginning for direct
+	 * access case. By this we overcome limitations of only current
+	 * headroom being accessible.
+	 */
+	return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto bpf_skb_pull_data_proto = {
+	.func		= bpf_skb_pull_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
+	   u64, from, u64, to, u64, flags)
+{
+	__sum16 *ptr;
 
 	if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
 		return -EINVAL;
-	if (unlikely((u32) offset > 0xffff))
+	if (unlikely(offset > 0xffff || offset & 1))
 		return -EFAULT;
-	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
 		return -EFAULT;
 
-	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
-	if (unlikely(!ptr))
-		return -EFAULT;
-
+	ptr = (__sum16 *)(skb->data + offset);
 	switch (flags & BPF_F_HDR_FIELD_MASK) {
 	case 0:
 		if (unlikely(from != 0))
@@ -1489,10 +1498,6 @@
 		return -EINVAL;
 	}
 
-	if (ptr == &sum)
-		/* skb_store_bits guaranteed to not return -EFAULT here */
-		skb_store_bits(skb, offset, ptr, sizeof(sum));
-
 	return 0;
 }
 
@@ -1507,25 +1512,22 @@
 	.arg5_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
+BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
+	   u64, from, u64, to, u64, flags)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
 	bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
-	int offset = (int) r2;
-	__sum16 sum, *ptr;
+	__sum16 *ptr;
 
 	if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR |
 			       BPF_F_HDR_FIELD_MASK)))
 		return -EINVAL;
-	if (unlikely((u32) offset > 0xffff))
+	if (unlikely(offset > 0xffff || offset & 1))
 		return -EFAULT;
-	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum))))
+	if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
 		return -EFAULT;
 
-	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
-	if (unlikely(!ptr))
-		return -EFAULT;
+	ptr = (__sum16 *)(skb->data + offset);
 	if (is_mmzero && !*ptr)
 		return 0;
 
@@ -1548,10 +1550,6 @@
 
 	if (is_mmzero && !*ptr)
 		*ptr = CSUM_MANGLED_0;
-	if (ptr == &sum)
-		/* skb_store_bits guaranteed to not return -EFAULT here */
-		skb_store_bits(skb, offset, ptr, sizeof(sum));
-
 	return 0;
 }
 
@@ -1566,12 +1564,11 @@
 	.arg5_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed)
+BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
+	   __be32 *, to, u32, to_size, __wsum, seed)
 {
 	struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
-	u64 diff_size = from_size + to_size;
-	__be32 *from = (__be32 *) (long) r1;
-	__be32 *to   = (__be32 *) (long) r3;
+	u32 diff_size = from_size + to_size;
 	int i, j = 0;
 
 	/* This is quite flexible, some examples:
@@ -1597,6 +1594,7 @@
 static const struct bpf_func_proto bpf_csum_diff_proto = {
 	.func		= bpf_csum_diff,
 	.gpl_only	= false,
+	.pkt_access	= true,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_STACK,
 	.arg2_type	= ARG_CONST_STACK_SIZE_OR_ZERO,
@@ -1605,11 +1603,28 @@
 	.arg5_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
+{
+	/* The interface is to be used in combination with bpf_csum_diff()
+	 * for direct packet writes. csum rotation for alignment as well
+	 * as emulating csum_sub() can be done from the eBPF program.
+	 */
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		return (skb->csum = csum_add(skb->csum, csum));
+
+	return -ENOTSUPP;
+}
+
+static const struct bpf_func_proto bpf_csum_update_proto = {
+	.func		= bpf_csum_update,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
 {
-	if (skb_at_tc_ingress(skb))
-		skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
-
 	return dev_forward_skb(dev, skb);
 }
 
@@ -1632,10 +1647,11 @@
 	return ret;
 }
 
-static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	struct net_device *dev;
+	struct sk_buff *clone;
+	int ret;
 
 	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return -EINVAL;
@@ -1644,12 +1660,25 @@
 	if (unlikely(!dev))
 		return -EINVAL;
 
-	skb = skb_clone(skb, GFP_ATOMIC);
-	if (unlikely(!skb))
+	clone = skb_clone(skb, GFP_ATOMIC);
+	if (unlikely(!clone))
 		return -ENOMEM;
 
+	/* For direct write, we need to keep the invariant that the skbs
+	 * we're dealing with need to be uncloned. Should uncloning fail
+	 * here, we need to free the just generated clone to unclone once
+	 * again.
+	 */
+	ret = bpf_try_make_head_writable(skb);
+	if (unlikely(ret)) {
+		kfree_skb(clone);
+		return -ENOMEM;
+	}
+
+	bpf_push_mac_rcsum(clone);
+
 	return flags & BPF_F_INGRESS ?
-	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
+	       __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone);
 }
 
 static const struct bpf_func_proto bpf_clone_redirect_proto = {
@@ -1668,7 +1697,7 @@
 
 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
 
-static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
 {
 	struct redirect_info *ri = this_cpu_ptr(&redirect_info);
 
@@ -1693,6 +1722,8 @@
 		return -EINVAL;
 	}
 
+	bpf_push_mac_rcsum(skb);
+
 	return ri->flags & BPF_F_INGRESS ?
 	       __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
 }
@@ -1705,9 +1736,9 @@
 	.arg2_type      = ARG_ANYTHING,
 };
 
-static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
-	return task_get_classid((struct sk_buff *) (unsigned long) r1);
+	return task_get_classid(skb);
 }
 
 static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
@@ -1717,9 +1748,9 @@
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
-static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
 {
-	return dst_tclassid((struct sk_buff *) (unsigned long) r1);
+	return dst_tclassid(skb);
 }
 
 static const struct bpf_func_proto bpf_get_route_realm_proto = {
@@ -1729,14 +1760,14 @@
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
-static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
 {
 	/* If skb_clear_hash() was called due to mangling, we can
 	 * trigger SW recalculation here. Later access to hash
 	 * can then use the inline skb->hash via context directly
 	 * instead of calling this helper again.
 	 */
-	return skb_get_hash((struct sk_buff *) (unsigned long) r1);
+	return skb_get_hash(skb);
 }
 
 static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
@@ -1746,17 +1777,35 @@
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
-static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	__be16 vlan_proto = (__force __be16) r2;
+	/* After all direct packet write, this can be used once for
+	 * triggering a lazy recalc on next skb_get_hash() invocation.
+	 */
+	skb_clear_hash(skb);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
+	.func		= bpf_set_hash_invalid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+
+BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
+	   u16, vlan_tci)
+{
 	int ret;
 
 	if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
 		     vlan_proto != htons(ETH_P_8021AD)))
 		vlan_proto = htons(ETH_P_8021Q);
 
+	bpf_push_mac_rcsum(skb);
 	ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
+	bpf_pull_mac_rcsum(skb);
+
 	bpf_compute_data_end(skb);
 	return ret;
 }
@@ -1771,12 +1820,14 @@
 };
 EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
 
-static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	int ret;
 
+	bpf_push_mac_rcsum(skb);
 	ret = skb_vlan_pop(skb);
+	bpf_pull_mac_rcsum(skb);
+
 	bpf_compute_data_end(skb);
 	return ret;
 }
@@ -1945,10 +1996,9 @@
 	return -ENOTSUPP;
 }
 
-static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
+	   u64, flags)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	__be16 proto = (__force __be16) r2;
 	int ret;
 
 	if (unlikely(flags))
@@ -1985,14 +2035,11 @@
 	.arg3_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	u32 pkt_type = r2;
-
 	/* We only allow a restricted subset to be changed for now. */
-	if (unlikely(skb->pkt_type > PACKET_OTHERHOST ||
-		     pkt_type > PACKET_OTHERHOST))
+	if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
+		     !skb_pkt_type_ok(pkt_type)))
 		return -EINVAL;
 
 	skb->pkt_type = pkt_type;
@@ -2007,19 +2054,100 @@
 	.arg2_type	= ARG_ANYTHING,
 };
 
+static u32 __bpf_skb_min_len(const struct sk_buff *skb)
+{
+	u32 min_len = skb_network_offset(skb);
+
+	if (skb_transport_header_was_set(skb))
+		min_len = skb_transport_offset(skb);
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		min_len = skb_checksum_start_offset(skb) +
+			  skb->csum_offset + sizeof(__sum16);
+	return min_len;
+}
+
+static u32 __bpf_skb_max_len(const struct sk_buff *skb)
+{
+	return skb->dev->mtu + skb->dev->hard_header_len;
+}
+
+static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+	unsigned int old_len = skb->len;
+	int ret;
+
+	ret = __skb_grow_rcsum(skb, new_len);
+	if (!ret)
+		memset(skb->data + old_len, 0, new_len - old_len);
+	return ret;
+}
+
+static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
+{
+	return __skb_trim_rcsum(skb, new_len);
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+	   u64, flags)
+{
+	u32 max_len = __bpf_skb_max_len(skb);
+	u32 min_len = __bpf_skb_min_len(skb);
+	int ret;
+
+	if (unlikely(flags || new_len > max_len || new_len < min_len))
+		return -EINVAL;
+	if (skb->encapsulation)
+		return -ENOTSUPP;
+
+	/* The basic idea of this helper is that it's performing the
+	 * needed work to either grow or trim an skb, and eBPF program
+	 * rewrites the rest via helpers like bpf_skb_store_bytes(),
+	 * bpf_lX_csum_replace() and others rather than passing a raw
+	 * buffer here. This one is a slow path helper and intended
+	 * for replies with control messages.
+	 *
+	 * Like in bpf_skb_change_proto(), we want to keep this rather
+	 * minimal and without protocol specifics so that we are able
+	 * to separate concerns as in bpf_skb_store_bytes() should only
+	 * be the one responsible for writing buffers.
+	 *
+	 * It's really expected to be a slow path operation here for
+	 * control message replies, so we're implicitly linearizing,
+	 * uncloning and drop offloads from the skb by this.
+	 */
+	ret = __bpf_try_make_writable(skb, skb->len);
+	if (!ret) {
+		if (new_len > skb->len)
+			ret = bpf_skb_grow_rcsum(skb, new_len);
+		else if (new_len < skb->len)
+			ret = bpf_skb_trim_rcsum(skb, new_len);
+		if (!ret && skb_is_gso(skb))
+			skb_gso_reset(skb);
+	}
+
+	bpf_compute_data_end(skb);
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_skb_change_tail_proto = {
+	.func		= bpf_skb_change_tail,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+};
+
 bool bpf_helper_changes_skb_data(void *func)
 {
-	if (func == bpf_skb_vlan_push)
-		return true;
-	if (func == bpf_skb_vlan_pop)
-		return true;
-	if (func == bpf_skb_store_bytes)
-		return true;
-	if (func == bpf_skb_change_proto)
-		return true;
-	if (func == bpf_l3_csum_replace)
-		return true;
-	if (func == bpf_l4_csum_replace)
+	if (func == bpf_skb_vlan_push ||
+	    func == bpf_skb_vlan_pop ||
+	    func == bpf_skb_store_bytes ||
+	    func == bpf_skb_change_proto ||
+	    func == bpf_skb_change_tail ||
+	    func == bpf_skb_pull_data ||
+	    func == bpf_l3_csum_replace ||
+	    func == bpf_l4_csum_replace)
 		return true;
 
 	return false;
@@ -2038,13 +2166,10 @@
 	return 0;
 }
 
-static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4,
-				u64 meta_size)
+BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
+	   u64, flags, void *, meta, u64, meta_size)
 {
-	struct sk_buff *skb = (struct sk_buff *)(long) r1;
-	struct bpf_map *map = (struct bpf_map *)(long) r2;
 	u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
-	void *meta = (void *)(long) r4;
 
 	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
 		return -EINVAL;
@@ -2071,10 +2196,9 @@
 	return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
 }
 
-static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
+	   u32, size, u64, flags)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
 	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
 	u8 compat[sizeof(struct bpf_tunnel_key)];
 	void *to_orig = to;
@@ -2139,10 +2263,8 @@
 	.arg4_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	u8 *to = (u8 *) (long) r2;
 	const struct ip_tunnel_info *info = skb_tunnel_info(skb);
 	int err;
 
@@ -2177,10 +2299,9 @@
 
 static struct metadata_dst __percpu *md_dst;
 
-static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
+	   const struct bpf_tunnel_key *, from, u32, size, u64, flags)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
 	struct metadata_dst *md = this_cpu_ptr(md_dst);
 	u8 compat[sizeof(struct bpf_tunnel_key)];
 	struct ip_tunnel_info *info;
@@ -2198,7 +2319,7 @@
 			 */
 			memcpy(compat, from, size);
 			memset(compat + size, 0, sizeof(compat) - size);
-			from = (struct bpf_tunnel_key *)compat;
+			from = (const struct bpf_tunnel_key *) compat;
 			break;
 		default:
 			return -EINVAL;
@@ -2248,10 +2369,9 @@
 	.arg4_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
+	   const u8 *, from, u32, size)
 {
-	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	u8 *from = (u8 *) (long) r2;
 	struct ip_tunnel_info *info = skb_tunnel_info(skb);
 	const struct metadata_dst *md = this_cpu_ptr(md_dst);
 
@@ -2297,39 +2417,66 @@
 	}
 }
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
-static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
+	   u32, idx)
 {
-	struct sk_buff *skb = (struct sk_buff *)(long)r1;
-	struct bpf_map *map = (struct bpf_map *)(long)r2;
 	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct cgroup *cgrp;
 	struct sock *sk;
-	u32 i = (u32)r3;
 
-	sk = skb->sk;
+	sk = skb_to_full_sk(skb);
 	if (!sk || !sk_fullsock(sk))
 		return -ENOENT;
-
-	if (unlikely(i >= array->map.max_entries))
+	if (unlikely(idx >= array->map.max_entries))
 		return -E2BIG;
 
-	cgrp = READ_ONCE(array->ptrs[i]);
+	cgrp = READ_ONCE(array->ptrs[idx]);
 	if (unlikely(!cgrp))
 		return -EAGAIN;
 
-	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
+	return sk_under_cgroup_hierarchy(sk, cgrp);
 }
 
-static const struct bpf_func_proto bpf_skb_in_cgroup_proto = {
-	.func		= bpf_skb_in_cgroup,
+static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
+	.func		= bpf_skb_under_cgroup,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
 	.arg2_type	= ARG_CONST_MAP_PTR,
 	.arg3_type	= ARG_ANYTHING,
 };
-#endif
+
+static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
+				  unsigned long off, unsigned long len)
+{
+	memcpy(dst_buff, src_buff + off, len);
+	return 0;
+}
+
+BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
+	   u64, flags, void *, meta, u64, meta_size)
+{
+	u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
+
+	if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
+		return -EINVAL;
+	if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
+		return -EFAULT;
+
+	return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size,
+				bpf_xdp_copy);
+}
+
+static const struct bpf_func_proto bpf_xdp_event_output_proto = {
+	.func		= bpf_xdp_event_output,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_STACK,
+	.arg5_type	= ARG_CONST_STACK_SIZE,
+};
 
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
@@ -2365,8 +2512,12 @@
 		return &bpf_skb_store_bytes_proto;
 	case BPF_FUNC_skb_load_bytes:
 		return &bpf_skb_load_bytes_proto;
+	case BPF_FUNC_skb_pull_data:
+		return &bpf_skb_pull_data_proto;
 	case BPF_FUNC_csum_diff:
 		return &bpf_csum_diff_proto;
+	case BPF_FUNC_csum_update:
+		return &bpf_csum_update_proto;
 	case BPF_FUNC_l3_csum_replace:
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace:
@@ -2383,6 +2534,8 @@
 		return &bpf_skb_change_proto_proto;
 	case BPF_FUNC_skb_change_type:
 		return &bpf_skb_change_type_proto;
+	case BPF_FUNC_skb_change_tail:
+		return &bpf_skb_change_tail_proto;
 	case BPF_FUNC_skb_get_tunnel_key:
 		return &bpf_skb_get_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_key:
@@ -2397,14 +2550,14 @@
 		return &bpf_get_route_realm_proto;
 	case BPF_FUNC_get_hash_recalc:
 		return &bpf_get_hash_recalc_proto;
+	case BPF_FUNC_set_hash_invalid:
+		return &bpf_set_hash_invalid_proto;
 	case BPF_FUNC_perf_event_output:
 		return &bpf_skb_event_output_proto;
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
-#ifdef CONFIG_SOCK_CGROUP_DATA
-	case BPF_FUNC_skb_in_cgroup:
-		return &bpf_skb_in_cgroup_proto;
-#endif
+	case BPF_FUNC_skb_under_cgroup:
+		return &bpf_skb_under_cgroup_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
@@ -2413,7 +2566,14 @@
 static const struct bpf_func_proto *
 xdp_func_proto(enum bpf_func_id func_id)
 {
-	return sk_filter_func_proto(func_id);
+	switch (func_id) {
+	case BPF_FUNC_perf_event_output:
+		return &bpf_xdp_event_output_proto;
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_smp_processor_id_proto;
+	default:
+		return sk_filter_func_proto(func_id);
+	}
 }
 
 static bool __is_valid_access(int off, int size, enum bpf_access_type type)
@@ -2453,6 +2613,45 @@
 	return __is_valid_access(off, size, type);
 }
 
+static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
+			       const struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	if (!direct_write)
+		return 0;
+
+	/* if (!skb->cloned)
+	 *       goto start;
+	 *
+	 * (Fast-path, otherwise approximation that we might be
+	 *  a clone, do the rest in helper.)
+	 */
+	*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
+	*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
+
+	/* ret = bpf_skb_pull_data(skb, 0); */
+	*insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+	*insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
+	*insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			       BPF_FUNC_skb_pull_data);
+	/* if (!ret)
+	 *      goto restore;
+	 * return TC_ACT_SHOT;
+	 */
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
+	*insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT);
+	*insn++ = BPF_EXIT_INSN();
+
+	/* restore: */
+	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+	/* start: */
+	*insn++ = prog->insnsi[0];
+
+	return insn - insn_buf;
+}
+
 static bool tc_cls_act_is_valid_access(int off, int size,
 				       enum bpf_access_type type,
 				       enum bpf_reg_type *reg_type)
@@ -2490,7 +2689,7 @@
 		return false;
 	if (off % size != 0)
 		return false;
-	if (size != 4)
+	if (size != sizeof(__u32))
 		return false;
 
 	return true;
@@ -2521,10 +2720,10 @@
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
-static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
-				      int src_reg, int ctx_off,
-				      struct bpf_insn *insn_buf,
-				      struct bpf_prog *prog)
+static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+					int src_reg, int ctx_off,
+					struct bpf_insn *insn_buf,
+					struct bpf_prog *prog)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -2571,7 +2770,7 @@
 	case offsetof(struct __sk_buff, ifindex):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 
-		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
 				      dst_reg, src_reg,
 				      offsetof(struct sk_buff, dev));
 		*insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
@@ -2612,7 +2811,7 @@
 					  dst_reg, src_reg, insn);
 
 	case offsetof(struct __sk_buff, cb[0]) ...
-		offsetof(struct __sk_buff, cb[4]):
+	     offsetof(struct __sk_buff, cb[4]):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
 
 		prog->cb_access = 1;
@@ -2636,7 +2835,7 @@
 		break;
 
 	case offsetof(struct __sk_buff, data):
-		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
 				      dst_reg, src_reg,
 				      offsetof(struct sk_buff, data));
 		break;
@@ -2645,8 +2844,8 @@
 		ctx_off -= offsetof(struct __sk_buff, data_end);
 		ctx_off += offsetof(struct sk_buff, cb);
 		ctx_off += offsetof(struct bpf_skb_data_end, data_end);
-		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)),
-				      dst_reg, src_reg, ctx_off);
+		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg,
+				      ctx_off);
 		break;
 
 	case offsetof(struct __sk_buff, tc_index):
@@ -2672,6 +2871,31 @@
 	return insn - insn_buf;
 }
 
+static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+					 int src_reg, int ctx_off,
+					 struct bpf_insn *insn_buf,
+					 struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (ctx_off) {
+	case offsetof(struct __sk_buff, ifindex):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
+				      dst_reg, src_reg,
+				      offsetof(struct sk_buff, dev));
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+				      offsetof(struct net_device, ifindex));
+		break;
+	default:
+		return sk_filter_convert_ctx_access(type, dst_reg, src_reg,
+						    ctx_off, insn_buf, prog);
+	}
+
+	return insn - insn_buf;
+}
+
 static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg,
 				  int src_reg, int ctx_off,
 				  struct bpf_insn *insn_buf,
@@ -2681,12 +2905,12 @@
 
 	switch (ctx_off) {
 	case offsetof(struct xdp_md, data):
-		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
 				      dst_reg, src_reg,
 				      offsetof(struct xdp_buff, data));
 		break;
 	case offsetof(struct xdp_md, data_end):
-		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
 				      dst_reg, src_reg,
 				      offsetof(struct xdp_buff, data_end));
 		break;
@@ -2698,13 +2922,14 @@
 static const struct bpf_verifier_ops sk_filter_ops = {
 	.get_func_proto		= sk_filter_func_proto,
 	.is_valid_access	= sk_filter_is_valid_access,
-	.convert_ctx_access	= bpf_net_convert_ctx_access,
+	.convert_ctx_access	= sk_filter_convert_ctx_access,
 };
 
 static const struct bpf_verifier_ops tc_cls_act_ops = {
 	.get_func_proto		= tc_cls_act_func_proto,
 	.is_valid_access	= tc_cls_act_is_valid_access,
-	.convert_ctx_access	= bpf_net_convert_ctx_access,
+	.convert_ctx_access	= tc_cls_act_convert_ctx_access,
+	.gen_prologue		= tc_cls_act_prologue,
 };
 
 static const struct bpf_verifier_ops xdp_ops = {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 61ad43f..1a7b80f 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -6,6 +6,8 @@
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/gre.h>
+#include <net/pptp.h>
 #include <linux/igmp.h>
 #include <linux/icmp.h>
 #include <linux/sctp.h>
@@ -116,13 +118,16 @@
 	struct flow_dissector_key_addrs *key_addrs;
 	struct flow_dissector_key_ports *key_ports;
 	struct flow_dissector_key_tags *key_tags;
+	struct flow_dissector_key_vlan *key_vlan;
 	struct flow_dissector_key_keyid *key_keyid;
+	bool skip_vlan = false;
 	u8 ip_proto = 0;
 	bool ret = false;
 
 	if (!data) {
 		data = skb->data;
-		proto = skb->protocol;
+		proto = skb_vlan_tag_present(skb) ?
+			 skb->vlan_proto : skb->protocol;
 		nhoff = skb_network_offset(skb);
 		hlen = skb_headlen(skb);
 	}
@@ -241,23 +246,45 @@
 	case htons(ETH_P_8021AD):
 	case htons(ETH_P_8021Q): {
 		const struct vlan_hdr *vlan;
-		struct vlan_hdr _vlan;
 
-		vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
-		if (!vlan)
-			goto out_bad;
+		if (skb_vlan_tag_present(skb))
+			proto = skb->protocol;
 
-		if (dissector_uses_key(flow_dissector,
-				       FLOW_DISSECTOR_KEY_VLANID)) {
-			key_tags = skb_flow_dissector_target(flow_dissector,
-							     FLOW_DISSECTOR_KEY_VLANID,
-							     target_container);
+		if (!skb_vlan_tag_present(skb) ||
+		    proto == cpu_to_be16(ETH_P_8021Q) ||
+		    proto == cpu_to_be16(ETH_P_8021AD)) {
+			struct vlan_hdr _vlan;
 
-			key_tags->vlan_id = skb_vlan_tag_get_id(skb);
+			vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan),
+						    data, hlen, &_vlan);
+			if (!vlan)
+				goto out_bad;
+			proto = vlan->h_vlan_encapsulated_proto;
+			nhoff += sizeof(*vlan);
+			if (skip_vlan)
+				goto again;
 		}
 
-		proto = vlan->h_vlan_encapsulated_proto;
-		nhoff += sizeof(*vlan);
+		skip_vlan = true;
+		if (dissector_uses_key(flow_dissector,
+				       FLOW_DISSECTOR_KEY_VLAN)) {
+			key_vlan = skb_flow_dissector_target(flow_dissector,
+							     FLOW_DISSECTOR_KEY_VLAN,
+							     target_container);
+
+			if (skb_vlan_tag_present(skb)) {
+				key_vlan->vlan_id = skb_vlan_tag_get_id(skb);
+				key_vlan->vlan_priority =
+					(skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT);
+			} else {
+				key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) &
+					VLAN_VID_MASK;
+				key_vlan->vlan_priority =
+					(ntohs(vlan->h_vlan_TCI) &
+					 VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+			}
+		}
+
 		goto again;
 	}
 	case htons(ETH_P_PPP_SES): {
@@ -338,32 +365,42 @@
 ip_proto_again:
 	switch (ip_proto) {
 	case IPPROTO_GRE: {
-		struct gre_hdr {
-			__be16 flags;
-			__be16 proto;
-		} *hdr, _hdr;
+		struct gre_base_hdr *hdr, _hdr;
+		u16 gre_ver;
+		int offset = 0;
 
 		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			goto out_bad;
-		/*
-		 * Only look inside GRE if version zero and no
-		 * routing
-		 */
-		if (hdr->flags & (GRE_VERSION | GRE_ROUTING))
+
+		/* Only look inside GRE without routing */
+		if (hdr->flags & GRE_ROUTING)
 			break;
 
-		proto = hdr->proto;
-		nhoff += 4;
+		/* Only look inside GRE for version 0 and 1 */
+		gre_ver = ntohs(hdr->flags & GRE_VERSION);
+		if (gre_ver > 1)
+			break;
+
+		proto = hdr->protocol;
+		if (gre_ver) {
+			/* Version1 must be PPTP, and check the flags */
+			if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY)))
+				break;
+		}
+
+		offset += sizeof(struct gre_base_hdr);
+
 		if (hdr->flags & GRE_CSUM)
-			nhoff += 4;
+			offset += sizeof(((struct gre_full_hdr *)0)->csum) +
+				  sizeof(((struct gre_full_hdr *)0)->reserved1);
+
 		if (hdr->flags & GRE_KEY) {
 			const __be32 *keyid;
 			__be32 _keyid;
 
-			keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid),
+			keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid),
 						     data, hlen, &_keyid);
-
 			if (!keyid)
 				goto out_bad;
 
@@ -372,32 +409,65 @@
 				key_keyid = skb_flow_dissector_target(flow_dissector,
 								      FLOW_DISSECTOR_KEY_GRE_KEYID,
 								      target_container);
-				key_keyid->keyid = *keyid;
+				if (gre_ver == 0)
+					key_keyid->keyid = *keyid;
+				else
+					key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
 			}
-			nhoff += 4;
+			offset += sizeof(((struct gre_full_hdr *)0)->key);
 		}
+
 		if (hdr->flags & GRE_SEQ)
-			nhoff += 4;
-		if (proto == htons(ETH_P_TEB)) {
-			const struct ethhdr *eth;
-			struct ethhdr _eth;
+			offset += sizeof(((struct pptp_gre_header *)0)->seq);
 
-			eth = __skb_header_pointer(skb, nhoff,
-						   sizeof(_eth),
-						   data, hlen, &_eth);
-			if (!eth)
+		if (gre_ver == 0) {
+			if (proto == htons(ETH_P_TEB)) {
+				const struct ethhdr *eth;
+				struct ethhdr _eth;
+
+				eth = __skb_header_pointer(skb, nhoff + offset,
+							   sizeof(_eth),
+							   data, hlen, &_eth);
+				if (!eth)
+					goto out_bad;
+				proto = eth->h_proto;
+				offset += sizeof(*eth);
+
+				/* Cap headers that we access via pointers at the
+				 * end of the Ethernet header as our maximum alignment
+				 * at that point is only 2 bytes.
+				 */
+				if (NET_IP_ALIGN)
+					hlen = (nhoff + offset);
+			}
+		} else { /* version 1, must be PPTP */
+			u8 _ppp_hdr[PPP_HDRLEN];
+			u8 *ppp_hdr;
+
+			if (hdr->flags & GRE_ACK)
+				offset += sizeof(((struct pptp_gre_header *)0)->ack);
+
+			ppp_hdr = skb_header_pointer(skb, nhoff + offset,
+						     sizeof(_ppp_hdr), _ppp_hdr);
+			if (!ppp_hdr)
 				goto out_bad;
-			proto = eth->h_proto;
-			nhoff += sizeof(*eth);
 
-			/* Cap headers that we access via pointers at the
-			 * end of the Ethernet header as our maximum alignment
-			 * at that point is only 2 bytes.
-			 */
-			if (NET_IP_ALIGN)
-				hlen = nhoff;
+			switch (PPP_PROTOCOL(ppp_hdr)) {
+			case PPP_IP:
+				proto = htons(ETH_P_IP);
+				break;
+			case PPP_IPV6:
+				proto = htons(ETH_P_IPV6);
+				break;
+			default:
+				/* Could probably catch some more like MPLS */
+				break;
+			}
+
+			offset += PPP_HDRLEN;
 		}
 
+		nhoff += offset;
 		key_control->flags |= FLOW_DIS_ENCAPSULATION;
 		if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP)
 			goto out_good;
@@ -680,11 +750,13 @@
 void __skb_get_hash(struct sk_buff *skb)
 {
 	struct flow_keys keys;
+	u32 hash;
 
 	__flow_hash_secret_init();
 
-	__skb_set_sw_hash(skb, ___skb_get_hash(skb, &keys, hashrnd),
-			  flow_keys_have_l4(&keys));
+	hash = ___skb_get_hash(skb, &keys, hashrnd);
+
+	__skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
 }
 EXPORT_SYMBOL(__skb_get_hash);
 
@@ -872,8 +944,8 @@
 		.offset = offsetof(struct flow_keys, ports),
 	},
 	{
-		.key_id = FLOW_DISSECTOR_KEY_VLANID,
-		.offset = offsetof(struct flow_keys, tags),
+		.key_id = FLOW_DISSECTOR_KEY_VLAN,
+		.offset = offsetof(struct flow_keys, vlan),
 	},
 	{
 		.key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL,
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 669ecc9..e5f84c2 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -251,6 +251,41 @@
 }
 EXPORT_SYMBOL(lwtunnel_output);
 
+int lwtunnel_xmit(struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	const struct lwtunnel_encap_ops *ops;
+	struct lwtunnel_state *lwtstate;
+	int ret = -EINVAL;
+
+	if (!dst)
+		goto drop;
+
+	lwtstate = dst->lwtstate;
+
+	if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+	    lwtstate->type > LWTUNNEL_ENCAP_MAX)
+		return 0;
+
+	ret = -EOPNOTSUPP;
+	rcu_read_lock();
+	ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+	if (likely(ops && ops->xmit))
+		ret = ops->xmit(skb);
+	rcu_read_unlock();
+
+	if (ret == -EOPNOTSUPP)
+		goto drop;
+
+	return ret;
+
+drop:
+	kfree_skb(skb);
+
+	return ret;
+}
+EXPORT_SYMBOL(lwtunnel_xmit);
+
 int lwtunnel_input(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cf26e04c4..2ae929f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1148,7 +1148,8 @@
 			} else
 				goto out;
 		} else {
-			if (lladdr == neigh->ha && new == NUD_STALE)
+			if (lladdr == neigh->ha && new == NUD_STALE &&
+			    !(flags & NEIGH_UPDATE_F_ADMIN))
 				new = old;
 		}
 	}
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2c2eb1b..42bdda0 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -37,6 +37,8 @@
 };
 EXPORT_SYMBOL(init_net);
 
+static bool init_net_initialized;
+
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
 
 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
@@ -213,31 +215,29 @@
  */
 int peernet2id_alloc(struct net *net, struct net *peer)
 {
-	unsigned long flags;
 	bool alloc;
 	int id;
 
-	spin_lock_irqsave(&net->nsid_lock, flags);
+	spin_lock_bh(&net->nsid_lock);
 	alloc = atomic_read(&peer->count) == 0 ? false : true;
 	id = __peernet2id_alloc(net, peer, &alloc);
-	spin_unlock_irqrestore(&net->nsid_lock, flags);
+	spin_unlock_bh(&net->nsid_lock);
 	if (alloc && id >= 0)
 		rtnl_net_notifyid(net, RTM_NEWNSID, id);
 	return id;
 }
-EXPORT_SYMBOL(peernet2id_alloc);
 
 /* This function returns, if assigned, the id of a peer netns. */
 int peernet2id(struct net *net, struct net *peer)
 {
-	unsigned long flags;
 	int id;
 
-	spin_lock_irqsave(&net->nsid_lock, flags);
+	spin_lock_bh(&net->nsid_lock);
 	id = __peernet2id(net, peer);
-	spin_unlock_irqrestore(&net->nsid_lock, flags);
+	spin_unlock_bh(&net->nsid_lock);
 	return id;
 }
+EXPORT_SYMBOL(peernet2id);
 
 /* This function returns true is the peer netns has an id assigned into the
  * current netns.
@@ -249,18 +249,17 @@
 
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
-	unsigned long flags;
 	struct net *peer;
 
 	if (id < 0)
 		return NULL;
 
 	rcu_read_lock();
-	spin_lock_irqsave(&net->nsid_lock, flags);
+	spin_lock_bh(&net->nsid_lock);
 	peer = idr_find(&net->netns_ids, id);
 	if (peer)
 		get_net(peer);
-	spin_unlock_irqrestore(&net->nsid_lock, flags);
+	spin_unlock_bh(&net->nsid_lock);
 	rcu_read_unlock();
 
 	return peer;
@@ -404,17 +403,17 @@
 		for_each_net(tmp) {
 			int id;
 
-			spin_lock_irq(&tmp->nsid_lock);
+			spin_lock_bh(&tmp->nsid_lock);
 			id = __peernet2id(tmp, net);
 			if (id >= 0)
 				idr_remove(&tmp->netns_ids, id);
-			spin_unlock_irq(&tmp->nsid_lock);
+			spin_unlock_bh(&tmp->nsid_lock);
 			if (id >= 0)
 				rtnl_net_notifyid(tmp, RTM_DELNSID, id);
 		}
-		spin_lock_irq(&net->nsid_lock);
+		spin_lock_bh(&net->nsid_lock);
 		idr_destroy(&net->netns_ids);
-		spin_unlock_irq(&net->nsid_lock);
+		spin_unlock_bh(&net->nsid_lock);
 
 	}
 	rtnl_unlock();
@@ -531,7 +530,7 @@
 	.exit = net_ns_net_exit,
 };
 
-static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
 	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
 	[NETNSA_NSID]		= { .type = NLA_S32 },
 	[NETNSA_PID]		= { .type = NLA_U32 },
@@ -542,7 +541,6 @@
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlattr *tb[NETNSA_MAX + 1];
-	unsigned long flags;
 	struct net *peer;
 	int nsid, err;
 
@@ -563,15 +561,15 @@
 	if (IS_ERR(peer))
 		return PTR_ERR(peer);
 
-	spin_lock_irqsave(&net->nsid_lock, flags);
+	spin_lock_bh(&net->nsid_lock);
 	if (__peernet2id(net, peer) >= 0) {
-		spin_unlock_irqrestore(&net->nsid_lock, flags);
+		spin_unlock_bh(&net->nsid_lock);
 		err = -EEXIST;
 		goto out;
 	}
 
 	err = alloc_netid(net, peer, nsid);
-	spin_unlock_irqrestore(&net->nsid_lock, flags);
+	spin_unlock_bh(&net->nsid_lock);
 	if (err >= 0) {
 		rtnl_net_notifyid(net, RTM_NEWNSID, err);
 		err = 0;
@@ -693,11 +691,10 @@
 		.idx = 0,
 		.s_idx = cb->args[0],
 	};
-	unsigned long flags;
 
-	spin_lock_irqsave(&net->nsid_lock, flags);
+	spin_lock_bh(&net->nsid_lock);
 	idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
-	spin_unlock_irqrestore(&net->nsid_lock, flags);
+	spin_unlock_bh(&net->nsid_lock);
 
 	cb->args[0] = net_cb.idx;
 	return skb->len;
@@ -750,6 +747,8 @@
 	if (setup_net(&init_net, &init_user_ns))
 		panic("Could not setup the initial network namespace");
 
+	init_net_initialized = true;
+
 	rtnl_lock();
 	list_add_tail_rcu(&init_net.list, &net_namespace_list);
 	rtnl_unlock();
@@ -811,15 +810,24 @@
 static int __register_pernet_operations(struct list_head *list,
 					struct pernet_operations *ops)
 {
+	if (!init_net_initialized) {
+		list_add_tail(&ops->list, list);
+		return 0;
+	}
+
 	return ops_init(ops, &init_net);
 }
 
 static void __unregister_pernet_operations(struct pernet_operations *ops)
 {
-	LIST_HEAD(net_exit_list);
-	list_add(&init_net.exit_list, &net_exit_list);
-	ops_exit_list(ops, &net_exit_list);
-	ops_free_list(ops, &net_exit_list);
+	if (!init_net_initialized) {
+		list_del(&ops->list);
+	} else {
+		LIST_HEAD(net_exit_list);
+		list_add(&init_net.exit_list, &net_exit_list);
+		ops_exit_list(ops, &net_exit_list);
+		ops_free_list(ops, &net_exit_list);
+	}
 }
 
 #endif /* CONFIG_NET_NS */
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index bbd118b..5219a9e 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2286,7 +2286,7 @@
 
 static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
 {
-	pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev);
+	pkt_dev->pkt_overhead = 0;
 	pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32);
 	pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
 	pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
@@ -2777,13 +2777,13 @@
 }
 
 static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
-					struct pktgen_dev *pkt_dev,
-					unsigned int extralen)
+					struct pktgen_dev *pkt_dev)
 {
+	unsigned int extralen = LL_RESERVED_SPACE(dev);
 	struct sk_buff *skb = NULL;
-	unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen +
-			    pkt_dev->pkt_overhead;
+	unsigned int size;
 
+	size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead;
 	if (pkt_dev->flags & F_NODE) {
 		int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id();
 
@@ -2796,8 +2796,9 @@
 		 skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
 	}
 
+	/* the caller pre-fetches from skb->data and reserves for the mac hdr */
 	if (likely(skb))
-		skb_reserve(skb, LL_RESERVED_SPACE(dev));
+		skb_reserve(skb, extralen - 16);
 
 	return skb;
 }
@@ -2830,16 +2831,14 @@
 	mod_cur_headers(pkt_dev);
 	queue_map = pkt_dev->cur_queue_map;
 
-	datalen = (odev->hard_header_len + 16) & ~0xf;
-
-	skb = pktgen_alloc_skb(odev, pkt_dev, datalen);
+	skb = pktgen_alloc_skb(odev, pkt_dev);
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
 	}
 
 	prefetchw(skb->data);
-	skb_reserve(skb, datalen);
+	skb_reserve(skb, 16);
 
 	/*  Reserve for ethernet and IP header  */
 	eth = (__u8 *) skb_push(skb, 14);
@@ -2959,7 +2958,7 @@
 	mod_cur_headers(pkt_dev);
 	queue_map = pkt_dev->cur_queue_map;
 
-	skb = pktgen_alloc_skb(odev, pkt_dev, 16);
+	skb = pktgen_alloc_skb(odev, pkt_dev);
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 189cc78..b06d2f4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -704,6 +704,8 @@
 			} else if (i == RTAX_FEATURES - 1) {
 				u32 user_features = metrics[i] & RTAX_FEATURE_MASK;
 
+				if (!user_features)
+					continue;
 				BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK);
 				if (nla_put_u32(skb, i + 1, user_features))
 					goto nla_put_failure;
@@ -841,7 +843,10 @@
 		size += nla_total_size(num_vfs * sizeof(struct nlattr));
 		size += num_vfs *
 			(nla_total_size(sizeof(struct ifla_vf_mac)) +
-			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
+			 nla_total_size(MAX_VLAN_LIST_LEN *
+					sizeof(struct nlattr)) +
+			 nla_total_size(MAX_VLAN_LIST_LEN *
+					sizeof(struct ifla_vf_vlan_info)) +
 			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
 			 nla_total_size(sizeof(struct ifla_vf_rate)) +
 			 nla_total_size(sizeof(struct ifla_vf_link_state)) +
@@ -1109,14 +1114,15 @@
 					       struct nlattr *vfinfo)
 {
 	struct ifla_vf_rss_query_en vf_rss_query_en;
+	struct nlattr *vf, *vfstats, *vfvlanlist;
 	struct ifla_vf_link_state vf_linkstate;
+	struct ifla_vf_vlan_info vf_vlan_info;
 	struct ifla_vf_spoofchk vf_spoofchk;
 	struct ifla_vf_tx_rate vf_tx_rate;
 	struct ifla_vf_stats vf_stats;
 	struct ifla_vf_trust vf_trust;
 	struct ifla_vf_vlan vf_vlan;
 	struct ifla_vf_rate vf_rate;
-	struct nlattr *vf, *vfstats;
 	struct ifla_vf_mac vf_mac;
 	struct ifla_vf_info ivi;
 
@@ -1133,11 +1139,14 @@
 	 * IFLA_VF_LINK_STATE_AUTO which equals zero
 	 */
 	ivi.linkstate = 0;
+	/* VLAN Protocol by default is 802.1Q */
+	ivi.vlan_proto = htons(ETH_P_8021Q);
 	if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi))
 		return 0;
 
 	vf_mac.vf =
 		vf_vlan.vf =
+		vf_vlan_info.vf =
 		vf_rate.vf =
 		vf_tx_rate.vf =
 		vf_spoofchk.vf =
@@ -1148,6 +1157,9 @@
 	memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 	vf_vlan.vlan = ivi.vlan;
 	vf_vlan.qos = ivi.qos;
+	vf_vlan_info.vlan = ivi.vlan;
+	vf_vlan_info.qos = ivi.qos;
+	vf_vlan_info.vlan_proto = ivi.vlan_proto;
 	vf_tx_rate.rate = ivi.max_tx_rate;
 	vf_rate.min_tx_rate = ivi.min_tx_rate;
 	vf_rate.max_tx_rate = ivi.max_tx_rate;
@@ -1156,10 +1168,8 @@
 	vf_rss_query_en.setting = ivi.rss_query_en;
 	vf_trust.setting = ivi.trusted;
 	vf = nla_nest_start(skb, IFLA_VF_INFO);
-	if (!vf) {
-		nla_nest_cancel(skb, vfinfo);
-		return -EMSGSIZE;
-	}
+	if (!vf)
+		goto nla_put_vfinfo_failure;
 	if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
 	    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
 	    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
@@ -1175,17 +1185,23 @@
 		    &vf_rss_query_en) ||
 	    nla_put(skb, IFLA_VF_TRUST,
 		    sizeof(vf_trust), &vf_trust))
-		return -EMSGSIZE;
+		goto nla_put_vf_failure;
+	vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST);
+	if (!vfvlanlist)
+		goto nla_put_vf_failure;
+	if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info),
+		    &vf_vlan_info)) {
+		nla_nest_cancel(skb, vfvlanlist);
+		goto nla_put_vf_failure;
+	}
+	nla_nest_end(skb, vfvlanlist);
 	memset(&vf_stats, 0, sizeof(vf_stats));
 	if (dev->netdev_ops->ndo_get_vf_stats)
 		dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
 						&vf_stats);
 	vfstats = nla_nest_start(skb, IFLA_VF_STATS);
-	if (!vfstats) {
-		nla_nest_cancel(skb, vf);
-		nla_nest_cancel(skb, vfinfo);
-		return -EMSGSIZE;
-	}
+	if (!vfstats)
+		goto nla_put_vf_failure;
 	if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
 			      vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
@@ -1197,11 +1213,19 @@
 	    nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
 			      vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
 	    nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
-			      vf_stats.multicast, IFLA_VF_STATS_PAD))
-		return -EMSGSIZE;
+			      vf_stats.multicast, IFLA_VF_STATS_PAD)) {
+		nla_nest_cancel(skb, vfstats);
+		goto nla_put_vf_failure;
+	}
 	nla_nest_end(skb, vfstats);
 	nla_nest_end(skb, vf);
 	return 0;
+
+nla_put_vf_failure:
+	nla_nest_cancel(skb, vf);
+nla_put_vfinfo_failure:
+	nla_nest_cancel(skb, vfinfo);
+	return -EMSGSIZE;
 }
 
 static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
@@ -1446,6 +1470,7 @@
 static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_MAC]		= { .len = sizeof(struct ifla_vf_mac) },
 	[IFLA_VF_VLAN]		= { .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_VLAN_LIST]     = { .type = NLA_NESTED },
 	[IFLA_VF_TX_RATE]	= { .len = sizeof(struct ifla_vf_tx_rate) },
 	[IFLA_VF_SPOOFCHK]	= { .len = sizeof(struct ifla_vf_spoofchk) },
 	[IFLA_VF_RATE]		= { .len = sizeof(struct ifla_vf_rate) },
@@ -1702,7 +1727,37 @@
 		err = -EOPNOTSUPP;
 		if (ops->ndo_set_vf_vlan)
 			err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
-						   ivv->qos);
+						   ivv->qos,
+						   htons(ETH_P_8021Q));
+		if (err < 0)
+			return err;
+	}
+
+	if (tb[IFLA_VF_VLAN_LIST]) {
+		struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN];
+		struct nlattr *attr;
+		int rem, len = 0;
+
+		err = -EOPNOTSUPP;
+		if (!ops->ndo_set_vf_vlan)
+			return err;
+
+		nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
+			if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
+			    nla_len(attr) < NLA_HDRLEN) {
+				return -EINVAL;
+			}
+			if (len >= MAX_VLAN_LIST_LEN)
+				return -EOPNOTSUPP;
+			ivvl[len] = nla_data(attr);
+
+			len++;
+		}
+		if (len == 0)
+			return -EINVAL;
+
+		err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
+					   ivvl[0]->qos, ivvl[0]->vlan_proto);
 		if (err < 0)
 			return err;
 	}
@@ -3066,7 +3121,7 @@
 	seq = cb->nlh->nlmsg_seq;
 
 	list_for_each_entry(ha, &list->list, list) {
-		if (*idx < cb->args[0])
+		if (*idx < cb->args[2])
 			goto skip;
 
 		err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0,
@@ -3093,19 +3148,18 @@
 		      struct netlink_callback *cb,
 		      struct net_device *dev,
 		      struct net_device *filter_dev,
-		      int idx)
+		      int *idx)
 {
 	int err;
 
 	netif_addr_lock_bh(dev);
-	err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc);
+	err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc);
 	if (err)
 		goto out;
-	nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc);
+	nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc);
 out:
 	netif_addr_unlock_bh(dev);
-	cb->args[1] = err;
-	return idx;
+	return err;
 }
 EXPORT_SYMBOL(ndo_dflt_fdb_dump);
 
@@ -3118,9 +3172,13 @@
 	const struct net_device_ops *cops = NULL;
 	struct ifinfomsg *ifm = nlmsg_data(cb->nlh);
 	struct net *net = sock_net(skb->sk);
+	struct hlist_head *head;
 	int brport_idx = 0;
 	int br_idx = 0;
-	int idx = 0;
+	int h, s_h;
+	int idx = 0, s_idx;
+	int err = 0;
+	int fidx = 0;
 
 	if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX,
 			ifla_policy) == 0) {
@@ -3138,49 +3196,71 @@
 		ops = br_dev->netdev_ops;
 	}
 
-	cb->args[1] = 0;
-	for_each_netdev(net, dev) {
-		if (brport_idx && (dev->ifindex != brport_idx))
-			continue;
+	s_h = cb->args[0];
+	s_idx = cb->args[1];
 
-		if (!br_idx) { /* user did not specify a specific bridge */
-			if (dev->priv_flags & IFF_BRIDGE_PORT) {
-				br_dev = netdev_master_upper_dev_get(dev);
-				cops = br_dev->netdev_ops;
+	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+		idx = 0;
+		head = &net->dev_index_head[h];
+		hlist_for_each_entry(dev, head, index_hlist) {
+
+			if (brport_idx && (dev->ifindex != brport_idx))
+				continue;
+
+			if (!br_idx) { /* user did not specify a specific bridge */
+				if (dev->priv_flags & IFF_BRIDGE_PORT) {
+					br_dev = netdev_master_upper_dev_get(dev);
+					cops = br_dev->netdev_ops;
+				}
+			} else {
+				if (dev != br_dev &&
+				    !(dev->priv_flags & IFF_BRIDGE_PORT))
+					continue;
+
+				if (br_dev != netdev_master_upper_dev_get(dev) &&
+				    !(dev->priv_flags & IFF_EBRIDGE))
+					continue;
+				cops = ops;
 			}
 
-		} else {
-			if (dev != br_dev &&
-			    !(dev->priv_flags & IFF_BRIDGE_PORT))
-				continue;
+			if (idx < s_idx)
+				goto cont;
 
-			if (br_dev != netdev_master_upper_dev_get(dev) &&
-			    !(dev->priv_flags & IFF_EBRIDGE))
-				continue;
+			if (dev->priv_flags & IFF_BRIDGE_PORT) {
+				if (cops && cops->ndo_fdb_dump) {
+					err = cops->ndo_fdb_dump(skb, cb,
+								br_dev, dev,
+								&fidx);
+					if (err == -EMSGSIZE)
+						goto out;
+				}
+			}
 
-			cops = ops;
+			if (dev->netdev_ops->ndo_fdb_dump)
+				err = dev->netdev_ops->ndo_fdb_dump(skb, cb,
+								    dev, NULL,
+								    &fidx);
+			else
+				err = ndo_dflt_fdb_dump(skb, cb, dev, NULL,
+							&fidx);
+			if (err == -EMSGSIZE)
+				goto out;
+
+			cops = NULL;
+
+			/* reset fdb offset to 0 for rest of the interfaces */
+			cb->args[2] = 0;
+			fidx = 0;
+cont:
+			idx++;
 		}
-
-		if (dev->priv_flags & IFF_BRIDGE_PORT) {
-			if (cops && cops->ndo_fdb_dump)
-				idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev,
-							 idx);
-		}
-		if (cb->args[1] == -EMSGSIZE)
-			break;
-
-		if (dev->netdev_ops->ndo_fdb_dump)
-			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL,
-							    idx);
-		else
-			idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
-		if (cb->args[1] == -EMSGSIZE)
-			break;
-
-		cops = NULL;
 	}
 
-	cb->args[0] = idx;
+out:
+	cb->args[0] = h;
+	cb->args[1] = idx;
+	cb->args[2] = fidx;
+
 	return skb->len;
 }
 
@@ -3550,6 +3630,91 @@
 	       (!idxattr || idxattr == attrid);
 }
 
+#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1)
+static int rtnl_get_offload_stats_attr_size(int attr_id)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		return sizeof(struct rtnl_link_stats64);
+	}
+
+	return 0;
+}
+
+static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev,
+				  int *prividx)
+{
+	struct nlattr *attr = NULL;
+	int attr_id, size;
+	void *attr_data;
+	int err;
+
+	if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
+	      dev->netdev_ops->ndo_get_offload_stats))
+		return -ENODATA;
+
+	for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
+	     attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
+		if (attr_id < *prividx)
+			continue;
+
+		size = rtnl_get_offload_stats_attr_size(attr_id);
+		if (!size)
+			continue;
+
+		if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+			continue;
+
+		attr = nla_reserve_64bit(skb, attr_id, size,
+					 IFLA_OFFLOAD_XSTATS_UNSPEC);
+		if (!attr)
+			goto nla_put_failure;
+
+		attr_data = nla_data(attr);
+		memset(attr_data, 0, size);
+		err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev,
+							     attr_data);
+		if (err)
+			goto get_offload_stats_failure;
+	}
+
+	if (!attr)
+		return -ENODATA;
+
+	*prividx = 0;
+	return 0;
+
+nla_put_failure:
+	err = -EMSGSIZE;
+get_offload_stats_failure:
+	*prividx = attr_id;
+	return err;
+}
+
+static int rtnl_get_offload_stats_size(const struct net_device *dev)
+{
+	int nla_size = 0;
+	int attr_id;
+	int size;
+
+	if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats &&
+	      dev->netdev_ops->ndo_get_offload_stats))
+		return 0;
+
+	for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST;
+	     attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) {
+		if (!dev->netdev_ops->ndo_has_offload_stats(attr_id))
+			continue;
+		size = rtnl_get_offload_stats_attr_size(attr_id);
+		nla_size += nla_total_size_64bit(size);
+	}
+
+	if (nla_size != 0)
+		nla_size += nla_total_size(0);
+
+	return nla_size;
+}
+
 static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
 			       int type, u32 pid, u32 seq, u32 change,
 			       unsigned int flags, unsigned int filter_mask,
@@ -3559,6 +3724,7 @@
 	struct nlmsghdr *nlh;
 	struct nlattr *attr;
 	int s_prividx = *prividx;
+	int err;
 
 	ASSERT_RTNL();
 
@@ -3587,8 +3753,6 @@
 		const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
 
 		if (ops && ops->fill_linkxstats) {
-			int err;
-
 			*idxattr = IFLA_STATS_LINK_XSTATS;
 			attr = nla_nest_start(skb,
 					      IFLA_STATS_LINK_XSTATS);
@@ -3612,8 +3776,6 @@
 		if (master)
 			ops = master->rtnl_link_ops;
 		if (ops && ops->fill_linkxstats) {
-			int err;
-
 			*idxattr = IFLA_STATS_LINK_XSTATS_SLAVE;
 			attr = nla_nest_start(skb,
 					      IFLA_STATS_LINK_XSTATS_SLAVE);
@@ -3628,6 +3790,24 @@
 		}
 	}
 
+	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS,
+			     *idxattr)) {
+		*idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS;
+		attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS);
+		if (!attr)
+			goto nla_put_failure;
+
+		err = rtnl_get_offload_stats(skb, dev, prividx);
+		if (err == -ENODATA)
+			nla_nest_cancel(skb, attr);
+		else
+			nla_nest_end(skb, attr);
+
+		if (err && err != -ENODATA)
+			goto nla_put_failure;
+		*idxattr = 0;
+	}
+
 	nlmsg_end(skb, nlh);
 
 	return 0;
@@ -3642,10 +3822,6 @@
 	return -EMSGSIZE;
 }
 
-static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
-	[IFLA_STATS_LINK_64]	= { .len = sizeof(struct rtnl_link_stats64) },
-};
-
 static size_t if_nlmsg_stats_size(const struct net_device *dev,
 				  u32 filter_mask)
 {
@@ -3685,6 +3861,9 @@
 		}
 	}
 
+	if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0))
+		size += rtnl_get_offload_stats_size(dev);
+
 	return size;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3864b4b6..cbd19d2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2445,6 +2445,25 @@
 EXPORT_SYMBOL(skb_queue_purge);
 
 /**
+ *	skb_rbtree_purge - empty a skb rbtree
+ *	@root: root of the rbtree to empty
+ *
+ *	Delete all buffers on an &sk_buff rbtree. Each buffer is removed from
+ *	the list and one reference dropped. This function does not take
+ *	any lock. Synchronization should be handled by the caller (e.g., TCP
+ *	out-of-order queue is protected by the socket lock).
+ */
+void skb_rbtree_purge(struct rb_root *root)
+{
+	struct sk_buff *skb, *next;
+
+	rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
+		kfree_skb(skb);
+
+	*root = RB_ROOT;
+}
+
+/**
  *	skb_queue_head - queue a buffer at the list head
  *	@list: list to use
  *	@newsk: buffer to queue
@@ -3078,11 +3097,31 @@
 	sg = !!(features & NETIF_F_SG);
 	csum = !!can_checksum_protocol(features, proto);
 
-	/* GSO partial only requires that we trim off any excess that
-	 * doesn't fit into an MSS sized block, so take care of that
-	 * now.
-	 */
-	if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) {
+	if (sg && csum && (mss != GSO_BY_FRAGS))  {
+		if (!(features & NETIF_F_GSO_PARTIAL)) {
+			struct sk_buff *iter;
+
+			if (!list_skb ||
+			    !net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
+				goto normal;
+
+			/* Split the buffer at the frag_list pointer.
+			 * This is based on the assumption that all
+			 * buffers in the chain excluding the last
+			 * containing the same amount of data.
+			 */
+			skb_walk_frags(head_skb, iter) {
+				if (skb_headlen(iter))
+					goto normal;
+
+				len -= iter->len;
+			}
+		}
+
+		/* GSO partial only requires that we trim off any excess that
+		 * doesn't fit into an MSS sized block, so take care of that
+		 * now.
+		 */
 		partial_segs = len / mss;
 		if (partial_segs > 1)
 			mss *= partial_segs;
@@ -3090,6 +3129,7 @@
 			partial_segs = 0;
 	}
 
+normal:
 	headroom = skb_headroom(head_skb);
 	pos = skb_headlen(head_skb);
 
@@ -3281,21 +3321,29 @@
 	 */
 	segs->prev = tail;
 
-	/* Update GSO info on first skb in partial sequence. */
 	if (partial_segs) {
+		struct sk_buff *iter;
 		int type = skb_shinfo(head_skb)->gso_type;
+		unsigned short gso_size = skb_shinfo(head_skb)->gso_size;
 
 		/* Update type to add partial and then remove dodgy if set */
-		type |= SKB_GSO_PARTIAL;
+		type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL;
 		type &= ~SKB_GSO_DODGY;
 
 		/* Update GSO info and prepare to start updating headers on
 		 * our way back down the stack of protocols.
 		 */
-		skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size;
-		skb_shinfo(segs)->gso_segs = partial_segs;
-		skb_shinfo(segs)->gso_type = type;
-		SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset;
+		for (iter = segs; iter; iter = iter->next) {
+			skb_shinfo(iter)->gso_size = gso_size;
+			skb_shinfo(iter)->gso_segs = partial_segs;
+			skb_shinfo(iter)->gso_type = type;
+			SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset;
+		}
+
+		if (tail->len - doffset <= gso_size)
+			skb_shinfo(tail)->gso_size = 0;
+		else if (tail != segs)
+			skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size);
 	}
 
 	/* Following permits correct backpressure, for protocols
@@ -4474,17 +4522,24 @@
 }
 EXPORT_SYMBOL(skb_ensure_writable);
 
-/* remove VLAN header from packet and update csum accordingly. */
-static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
+/* remove VLAN header from packet and update csum accordingly.
+ * expects a non skb_vlan_tag_present skb with a vlan tag payload
+ */
+int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
 {
 	struct vlan_hdr *vhdr;
-	unsigned int offset = skb->data - skb_mac_header(skb);
+	int offset = skb->data - skb_mac_header(skb);
 	int err;
 
-	__skb_push(skb, offset);
+	if (WARN_ONCE(offset,
+		      "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n",
+		      offset)) {
+		return -EINVAL;
+	}
+
 	err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
 	if (unlikely(err))
-		goto pull;
+		return err;
 
 	skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
 
@@ -4501,12 +4556,14 @@
 		skb_set_network_header(skb, ETH_HLEN);
 
 	skb_reset_mac_len(skb);
-pull:
-	__skb_pull(skb, offset);
 
 	return err;
 }
+EXPORT_SYMBOL(__skb_vlan_pop);
 
+/* Pop a vlan tag either from hwaccel or from payload.
+ * Expects skb->data at mac header.
+ */
 int skb_vlan_pop(struct sk_buff *skb)
 {
 	u16 vlan_tci;
@@ -4516,9 +4573,7 @@
 	if (likely(skb_vlan_tag_present(skb))) {
 		skb->vlan_tci = 0;
 	} else {
-		if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
-			      skb->protocol != htons(ETH_P_8021AD)) ||
-			     skb->len < VLAN_ETH_HLEN))
+		if (unlikely(!eth_type_vlan(skb->protocol)))
 			return 0;
 
 		err = __skb_vlan_pop(skb, &vlan_tci);
@@ -4526,9 +4581,7 @@
 			return err;
 	}
 	/* move next vlan tag to hw accel tag */
-	if (likely((skb->protocol != htons(ETH_P_8021Q) &&
-		    skb->protocol != htons(ETH_P_8021AD)) ||
-		   skb->len < VLAN_ETH_HLEN))
+	if (likely(!eth_type_vlan(skb->protocol)))
 		return 0;
 
 	vlan_proto = skb->protocol;
@@ -4541,29 +4594,30 @@
 }
 EXPORT_SYMBOL(skb_vlan_pop);
 
+/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present).
+ * Expects skb->data at mac header.
+ */
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 {
 	if (skb_vlan_tag_present(skb)) {
-		unsigned int offset = skb->data - skb_mac_header(skb);
+		int offset = skb->data - skb_mac_header(skb);
 		int err;
 
-		/* __vlan_insert_tag expect skb->data pointing to mac header.
-		 * So change skb->data before calling it and change back to
-		 * original position later
-		 */
-		__skb_push(skb, offset);
+		if (WARN_ONCE(offset,
+			      "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n",
+			      offset)) {
+			return -EINVAL;
+		}
+
 		err = __vlan_insert_tag(skb, skb->vlan_proto,
 					skb_vlan_tag_get(skb));
-		if (err) {
-			__skb_pull(skb, offset);
+		if (err)
 			return err;
-		}
 
 		skb->protocol = skb->vlan_proto;
 		skb->mac_len += VLAN_HLEN;
 
 		skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
-		__skb_pull(skb, offset);
 	}
 	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
 	return 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 25dab8b..038e660 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1315,24 +1315,6 @@
 #endif
 }
 
-void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
-{
-	unsigned long nulls1, nulls2;
-
-	nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
-	nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
-	if (nulls1 > nulls2)
-		swap(nulls1, nulls2);
-
-	if (nulls1 != 0)
-		memset((char *)sk, 0, nulls1);
-	memset((char *)sk + nulls1 + sizeof(void *), 0,
-	       nulls2 - nulls1 - sizeof(void *));
-	memset((char *)sk + nulls2 + sizeof(void *), 0,
-	       size - nulls2 - sizeof(void *));
-}
-EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
-
 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
 		int family)
 {
@@ -1344,12 +1326,8 @@
 		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
 		if (!sk)
 			return sk;
-		if (priority & __GFP_ZERO) {
-			if (prot->clear_sk)
-				prot->clear_sk(sk, prot->obj_size);
-			else
-				sk_prot_clear_nulls(sk, prot->obj_size);
-		}
+		if (priority & __GFP_ZERO)
+			sk_prot_clear_nulls(sk, prot->obj_size);
 	} else
 		sk = kmalloc(prot->obj_size, priority);
 
@@ -1362,7 +1340,6 @@
 		if (!try_module_get(prot->owner))
 			goto out_free_sec;
 		sk_tx_queue_clear(sk);
-		cgroup_sk_alloc(&sk->sk_cgrp_data);
 	}
 
 	return sk;
@@ -1422,6 +1399,7 @@
 		sock_net_set(sk, net);
 		atomic_set(&sk->sk_wmem_alloc, 1);
 
+		cgroup_sk_alloc(&sk->sk_cgrp_data);
 		sock_update_classid(&sk->sk_cgrp_data);
 		sock_update_netprioidx(&sk->sk_cgrp_data);
 	}
@@ -1566,6 +1544,9 @@
 		newsk->sk_priority = 0;
 		newsk->sk_incoming_cpu = raw_smp_processor_id();
 		atomic64_set(&newsk->sk_cookie, 0);
+
+		cgroup_sk_alloc(&newsk->sk_cgrp_data);
+
 		/*
 		 * Before updating sk_refcnt, we must commit prior changes to memory
 		 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/core/stream.c b/net/core/stream.c
index 159516a..1086c8b 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -43,7 +43,6 @@
 		rcu_read_unlock();
 	}
 }
-EXPORT_SYMBOL(sk_stream_write_space);
 
 /**
  * sk_stream_wait_connect - Wait for a socket to get into the connected state
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index ff7736f..96e47c5 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -38,4 +38,7 @@
 config NET_DSA_TAG_TRAILER
 	bool
 
+config NET_DSA_TAG_QCA
+	bool
+
 endif
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 8af4ded..a3380ed 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -7,3 +7,4 @@
 dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
 dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
 dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
+dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 7e68bc6..a6902c1 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -54,6 +54,9 @@
 #ifdef CONFIG_NET_DSA_TAG_BRCM
 	[DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops,
 #endif
+#ifdef CONFIG_NET_DSA_TAG_QCA
+	[DSA_TAG_PROTO_QCA] = &qca_netdev_ops,
+#endif
 	[DSA_TAG_PROTO_NONE] = &none_ops,
 };
 
@@ -61,27 +64,27 @@
 static DEFINE_MUTEX(dsa_switch_drivers_mutex);
 static LIST_HEAD(dsa_switch_drivers);
 
-void register_switch_driver(struct dsa_switch_driver *drv)
+void register_switch_driver(struct dsa_switch_ops *ops)
 {
 	mutex_lock(&dsa_switch_drivers_mutex);
-	list_add_tail(&drv->list, &dsa_switch_drivers);
+	list_add_tail(&ops->list, &dsa_switch_drivers);
 	mutex_unlock(&dsa_switch_drivers_mutex);
 }
 EXPORT_SYMBOL_GPL(register_switch_driver);
 
-void unregister_switch_driver(struct dsa_switch_driver *drv)
+void unregister_switch_driver(struct dsa_switch_ops *ops)
 {
 	mutex_lock(&dsa_switch_drivers_mutex);
-	list_del_init(&drv->list);
+	list_del_init(&ops->list);
 	mutex_unlock(&dsa_switch_drivers_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_switch_driver);
 
-static struct dsa_switch_driver *
+static struct dsa_switch_ops *
 dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr,
 		 const char **_name, void **priv)
 {
-	struct dsa_switch_driver *ret;
+	struct dsa_switch_ops *ret;
 	struct list_head *list;
 	const char *name;
 
@@ -90,13 +93,13 @@
 
 	mutex_lock(&dsa_switch_drivers_mutex);
 	list_for_each(list, &dsa_switch_drivers) {
-		struct dsa_switch_driver *drv;
+		struct dsa_switch_ops *ops;
 
-		drv = list_entry(list, struct dsa_switch_driver, list);
+		ops = list_entry(list, struct dsa_switch_ops, list);
 
-		name = drv->probe(parent, host_dev, sw_addr, priv);
+		name = ops->probe(parent, host_dev, sw_addr, priv);
 		if (name != NULL) {
-			ret = drv;
+			ret = ops;
 			break;
 		}
 	}
@@ -117,7 +120,7 @@
 	struct dsa_switch *ds = dev_get_drvdata(dev);
 	int temp, ret;
 
-	ret = ds->drv->get_temp(ds, &temp);
+	ret = ds->ops->get_temp(ds, &temp);
 	if (ret < 0)
 		return ret;
 
@@ -131,7 +134,7 @@
 	struct dsa_switch *ds = dev_get_drvdata(dev);
 	int temp, ret;
 
-	ret = ds->drv->get_temp_limit(ds, &temp);
+	ret = ds->ops->get_temp_limit(ds, &temp);
 	if (ret < 0)
 		return ret;
 
@@ -149,7 +152,7 @@
 	if (ret < 0)
 		return ret;
 
-	ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
+	ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000));
 	if (ret < 0)
 		return ret;
 
@@ -164,7 +167,7 @@
 	bool alarm;
 	int ret;
 
-	ret = ds->drv->get_temp_alarm(ds, &alarm);
+	ret = ds->ops->get_temp_alarm(ds, &alarm);
 	if (ret < 0)
 		return ret;
 
@@ -184,15 +187,15 @@
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
 	struct dsa_switch *ds = dev_get_drvdata(dev);
-	struct dsa_switch_driver *drv = ds->drv;
+	struct dsa_switch_ops *ops = ds->ops;
 	umode_t mode = attr->mode;
 
 	if (index == 1) {
-		if (!drv->get_temp_limit)
+		if (!ops->get_temp_limit)
 			mode = 0;
-		else if (!drv->set_temp_limit)
+		else if (!ops->set_temp_limit)
 			mode &= ~S_IWUSR;
-	} else if (index == 2 && !drv->get_temp_alarm) {
+	} else if (index == 2 && !ops->get_temp_alarm) {
 		mode = 0;
 	}
 	return mode;
@@ -228,8 +231,8 @@
 
 		genphy_config_init(phydev);
 		genphy_read_status(phydev);
-		if (ds->drv->adjust_link)
-			ds->drv->adjust_link(ds, port, phydev);
+		if (ds->ops->adjust_link)
+			ds->ops->adjust_link(ds, port, phydev);
 	}
 
 	return 0;
@@ -303,7 +306,7 @@
 
 static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
 {
-	struct dsa_switch_driver *drv = ds->drv;
+	struct dsa_switch_ops *ops = ds->ops;
 	struct dsa_switch_tree *dst = ds->dst;
 	struct dsa_chip_data *cd = ds->cd;
 	bool valid_name_found = false;
@@ -354,7 +357,10 @@
 	 * switch.
 	 */
 	if (dst->cpu_switch == index) {
-		dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol);
+		enum dsa_tag_protocol tag_protocol;
+
+		tag_protocol = ops->get_tag_protocol(ds);
+		dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
 		if (IS_ERR(dst->tag_ops)) {
 			ret = PTR_ERR(dst->tag_ops);
 			goto out;
@@ -368,15 +374,17 @@
 	/*
 	 * Do basic register setup.
 	 */
-	ret = drv->setup(ds);
+	ret = ops->setup(ds);
 	if (ret < 0)
 		goto out;
 
-	ret = drv->set_addr(ds, dst->master_netdev->dev_addr);
-	if (ret < 0)
-		goto out;
+	if (ops->set_addr) {
+		ret = ops->set_addr(ds, dst->master_netdev->dev_addr);
+		if (ret < 0)
+			goto out;
+	}
 
-	if (!ds->slave_mii_bus && drv->phy_read) {
+	if (!ds->slave_mii_bus && ops->phy_read) {
 		ds->slave_mii_bus = devm_mdiobus_alloc(parent);
 		if (!ds->slave_mii_bus) {
 			ret = -ENOMEM;
@@ -423,7 +431,7 @@
 	 * register with hardware monitoring subsystem.
 	 * Treat registration error as non-fatal and ignore it.
 	 */
-	if (drv->get_temp) {
+	if (ops->get_temp) {
 		const char *netname = netdev_name(dst->master_netdev);
 		char hname[IFNAMSIZ + 1];
 		int i, j;
@@ -454,7 +462,7 @@
 		 struct device *parent, struct device *host_dev)
 {
 	struct dsa_chip_data *cd = dst->pd->chip + index;
-	struct dsa_switch_driver *drv;
+	struct dsa_switch_ops *ops;
 	struct dsa_switch *ds;
 	int ret;
 	const char *name;
@@ -463,8 +471,8 @@
 	/*
 	 * Probe for switch model.
 	 */
-	drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
-	if (drv == NULL) {
+	ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv);
+	if (!ops) {
 		netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n",
 			   index);
 		return ERR_PTR(-EINVAL);
@@ -483,7 +491,7 @@
 	ds->dst = dst;
 	ds->index = index;
 	ds->cd = cd;
-	ds->drv = drv;
+	ds->ops = ops;
 	ds->priv = priv;
 	ds->dev = parent;
 
@@ -538,12 +546,12 @@
 		ds->dsa_port_mask |= ~(1 << port);
 	}
 
-	if (ds->slave_mii_bus && ds->drv->phy_read)
+	if (ds->slave_mii_bus && ds->ops->phy_read)
 		mdiobus_unregister(ds->slave_mii_bus);
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int dsa_switch_suspend(struct dsa_switch *ds)
+int dsa_switch_suspend(struct dsa_switch *ds)
 {
 	int i, ret = 0;
 
@@ -557,18 +565,19 @@
 			return ret;
 	}
 
-	if (ds->drv->suspend)
-		ret = ds->drv->suspend(ds);
+	if (ds->ops->suspend)
+		ret = ds->ops->suspend(ds);
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(dsa_switch_suspend);
 
-static int dsa_switch_resume(struct dsa_switch *ds)
+int dsa_switch_resume(struct dsa_switch *ds)
 {
 	int i, ret = 0;
 
-	if (ds->drv->resume)
-		ret = ds->drv->resume(ds);
+	if (ds->ops->resume)
+		ret = ds->ops->resume(ds);
 
 	if (ret)
 		return ret;
@@ -585,6 +594,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(dsa_switch_resume);
 #endif
 
 /* platform driver init and cleanup *****************************************/
@@ -1086,7 +1096,6 @@
 static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume);
 
 static const struct of_device_id dsa_of_match_table[] = {
-	{ .compatible = "brcm,bcm7445-switch-v4.0" },
 	{ .compatible = "marvell,dsa", },
 	{}
 };
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index f30bad9..f8a7d9a 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -294,25 +294,23 @@
 	int err;
 
 	/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
-	 * driver and before drv->setup() has run, since the switch drivers and
+	 * driver and before ops->setup() has run, since the switch drivers and
 	 * the slave MDIO bus driver rely on these values for probing PHY
 	 * devices or not
 	 */
 	ds->phys_mii_mask = ds->enabled_port_mask;
 
-	err = ds->drv->setup(ds);
+	err = ds->ops->setup(ds);
 	if (err < 0)
 		return err;
 
-	err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
-	if (err < 0)
-		return err;
+	if (ds->ops->set_addr) {
+		err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr);
+		if (err < 0)
+			return err;
+	}
 
-	err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr);
-	if (err < 0)
-		return err;
-
-	if (!ds->slave_mii_bus && ds->drv->phy_read) {
+	if (!ds->slave_mii_bus && ds->ops->phy_read) {
 		ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
 		if (!ds->slave_mii_bus)
 			return -ENOMEM;
@@ -374,7 +372,7 @@
 		dsa_user_port_unapply(port, index, ds);
 	}
 
-	if (ds->slave_mii_bus && ds->drv->phy_read)
+	if (ds->slave_mii_bus && ds->ops->phy_read)
 		mdiobus_unregister(ds->slave_mii_bus);
 }
 
@@ -443,6 +441,7 @@
 			 struct dsa_switch_tree *dst,
 			 struct dsa_switch *ds)
 {
+	enum dsa_tag_protocol tag_protocol;
 	struct net_device *ethernet_dev;
 	struct device_node *ethernet;
 
@@ -465,7 +464,8 @@
 		dst->cpu_port = index;
 	}
 
-	dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol);
+	tag_protocol = ds->ops->get_tag_protocol(ds);
+	dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
 	if (IS_ERR(dst->tag_ops)) {
 		dev_warn(ds->dev, "No tagger for this switch\n");
 		return PTR_ERR(dst->tag_ops);
@@ -541,7 +541,7 @@
 
 		ds->ports[reg].dn = port;
 
-		/* Initialize enabled_port_mask now for drv->setup()
+		/* Initialize enabled_port_mask now for ops->setup()
 		 * to have access to a correct value, just like what
 		 * net/dsa/dsa.c::dsa_switch_setup_one does.
 		 */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 00077a9..6cfd738 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -81,5 +81,7 @@
 /* tag_brcm.c */
 extern const struct dsa_device_ops brcm_netdev_ops;
 
+/* tag_qca.c */
+extern const struct dsa_device_ops qca_netdev_ops;
 
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index fc91967..6b1282c 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -28,7 +28,7 @@
 	struct dsa_switch *ds = bus->priv;
 
 	if (ds->phys_mii_mask & (1 << addr))
-		return ds->drv->phy_read(ds, addr, reg);
+		return ds->ops->phy_read(ds, addr, reg);
 
 	return 0xffff;
 }
@@ -38,7 +38,7 @@
 	struct dsa_switch *ds = bus->priv;
 
 	if (ds->phys_mii_mask & (1 << addr))
-		return ds->drv->phy_write(ds, addr, reg, val);
+		return ds->ops->phy_write(ds, addr, reg, val);
 
 	return 0;
 }
@@ -69,6 +69,30 @@
 	return !!p->bridge_dev;
 }
 
+static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state)
+{
+	struct dsa_port *dp = &ds->ports[port];
+
+	if (ds->ops->port_stp_state_set)
+		ds->ops->port_stp_state_set(ds, port, state);
+
+	if (ds->ops->port_fast_age) {
+		/* Fast age FDB entries or flush appropriate forwarding database
+		 * for the given port, if we are moving it from Learning or
+		 * Forwarding state, to Disabled or Blocking or Listening state.
+		 */
+
+		if ((dp->stp_state == BR_STATE_LEARNING ||
+		     dp->stp_state == BR_STATE_FORWARDING) &&
+		    (state == BR_STATE_DISABLED ||
+		     state == BR_STATE_BLOCKING ||
+		     state == BR_STATE_LISTENING))
+			ds->ops->port_fast_age(ds, port);
+	}
+
+	dp->stp_state = state;
+}
+
 static int dsa_slave_open(struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -98,14 +122,13 @@
 			goto clear_allmulti;
 	}
 
-	if (ds->drv->port_enable) {
-		err = ds->drv->port_enable(ds, p->port, p->phy);
+	if (ds->ops->port_enable) {
+		err = ds->ops->port_enable(ds, p->port, p->phy);
 		if (err)
 			goto clear_promisc;
 	}
 
-	if (ds->drv->port_stp_state_set)
-		ds->drv->port_stp_state_set(ds, p->port, stp_state);
+	dsa_port_set_stp_state(ds, p->port, stp_state);
 
 	if (p->phy)
 		phy_start(p->phy);
@@ -144,11 +167,10 @@
 	if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
 		dev_uc_del(master, dev->dev_addr);
 
-	if (ds->drv->port_disable)
-		ds->drv->port_disable(ds, p->port, p->phy);
+	if (ds->ops->port_disable)
+		ds->ops->port_disable(ds, p->port, p->phy);
 
-	if (ds->drv->port_stp_state_set)
-		ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED);
+	dsa_port_set_stp_state(ds, p->port, BR_STATE_DISABLED);
 
 	return 0;
 }
@@ -209,13 +231,13 @@
 	struct dsa_switch *ds = p->parent;
 
 	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add)
+		if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add)
 			return -EOPNOTSUPP;
 
-		return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans);
+		return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans);
 	}
 
-	ds->drv->port_vlan_add(ds, p->port, vlan, trans);
+	ds->ops->port_vlan_add(ds, p->port, vlan, trans);
 
 	return 0;
 }
@@ -226,10 +248,10 @@
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (!ds->drv->port_vlan_del)
+	if (!ds->ops->port_vlan_del)
 		return -EOPNOTSUPP;
 
-	return ds->drv->port_vlan_del(ds, p->port, vlan);
+	return ds->ops->port_vlan_del(ds, p->port, vlan);
 }
 
 static int dsa_slave_port_vlan_dump(struct net_device *dev,
@@ -239,8 +261,8 @@
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->port_vlan_dump)
-		return ds->drv->port_vlan_dump(ds, p->port, vlan, cb);
+	if (ds->ops->port_vlan_dump)
+		return ds->ops->port_vlan_dump(ds, p->port, vlan, cb);
 
 	return -EOPNOTSUPP;
 }
@@ -253,13 +275,13 @@
 	struct dsa_switch *ds = p->parent;
 
 	if (switchdev_trans_ph_prepare(trans)) {
-		if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add)
+		if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add)
 			return -EOPNOTSUPP;
 
-		return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans);
+		return ds->ops->port_fdb_prepare(ds, p->port, fdb, trans);
 	}
 
-	ds->drv->port_fdb_add(ds, p->port, fdb, trans);
+	ds->ops->port_fdb_add(ds, p->port, fdb, trans);
 
 	return 0;
 }
@@ -271,8 +293,8 @@
 	struct dsa_switch *ds = p->parent;
 	int ret = -EOPNOTSUPP;
 
-	if (ds->drv->port_fdb_del)
-		ret = ds->drv->port_fdb_del(ds, p->port, fdb);
+	if (ds->ops->port_fdb_del)
+		ret = ds->ops->port_fdb_del(ds, p->port, fdb);
 
 	return ret;
 }
@@ -284,8 +306,52 @@
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->port_fdb_dump)
-		return ds->drv->port_fdb_dump(ds, p->port, fdb, cb);
+	if (ds->ops->port_fdb_dump)
+		return ds->ops->port_fdb_dump(ds, p->port, fdb, cb);
+
+	return -EOPNOTSUPP;
+}
+
+static int dsa_slave_port_mdb_add(struct net_device *dev,
+				  const struct switchdev_obj_port_mdb *mdb,
+				  struct switchdev_trans *trans)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	if (switchdev_trans_ph_prepare(trans)) {
+		if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add)
+			return -EOPNOTSUPP;
+
+		return ds->ops->port_mdb_prepare(ds, p->port, mdb, trans);
+	}
+
+	ds->ops->port_mdb_add(ds, p->port, mdb, trans);
+
+	return 0;
+}
+
+static int dsa_slave_port_mdb_del(struct net_device *dev,
+				  const struct switchdev_obj_port_mdb *mdb)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	if (ds->ops->port_mdb_del)
+		return ds->ops->port_mdb_del(ds, p->port, mdb);
+
+	return -EOPNOTSUPP;
+}
+
+static int dsa_slave_port_mdb_dump(struct net_device *dev,
+				   struct switchdev_obj_port_mdb *mdb,
+				   switchdev_obj_dump_cb_t *cb)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch *ds = p->parent;
+
+	if (ds->ops->port_mdb_dump)
+		return ds->ops->port_mdb_dump(ds, p->port, mdb, cb);
 
 	return -EOPNOTSUPP;
 }
@@ -308,9 +374,9 @@
 	struct dsa_switch *ds = p->parent;
 
 	if (switchdev_trans_ph_prepare(trans))
-		return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP;
+		return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP;
 
-	ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state);
+	dsa_port_set_stp_state(ds, p->port, attr->u.stp_state);
 
 	return 0;
 }
@@ -326,8 +392,8 @@
 	if (switchdev_trans_ph_prepare(trans))
 		return 0;
 
-	if (ds->drv->port_vlan_filtering)
-		return ds->drv->port_vlan_filtering(ds, p->port,
+	if (ds->ops->port_vlan_filtering)
+		return ds->ops->port_vlan_filtering(ds, p->port,
 						    attr->u.vlan_filtering);
 
 	return 0;
@@ -365,8 +431,8 @@
 	ds->ports[p->port].ageing_time = ageing_time;
 	ageing_time = dsa_fastest_ageing_time(ds, ageing_time);
 
-	if (ds->drv->set_ageing_time)
-		return ds->drv->set_ageing_time(ds, ageing_time);
+	if (ds->ops->set_ageing_time)
+		return ds->ops->set_ageing_time(ds, ageing_time);
 
 	return 0;
 }
@@ -412,6 +478,10 @@
 					     SWITCHDEV_OBJ_PORT_FDB(obj),
 					     trans);
 		break;
+	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
+					     trans);
+		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		err = dsa_slave_port_vlan_add(dev,
 					      SWITCHDEV_OBJ_PORT_VLAN(obj),
@@ -435,6 +505,9 @@
 		err = dsa_slave_port_fdb_del(dev,
 					     SWITCHDEV_OBJ_PORT_FDB(obj));
 		break;
+	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj));
+		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		err = dsa_slave_port_vlan_del(dev,
 					      SWITCHDEV_OBJ_PORT_VLAN(obj));
@@ -459,6 +532,10 @@
 					      SWITCHDEV_OBJ_PORT_FDB(obj),
 					      cb);
 		break;
+	case SWITCHDEV_OBJ_ID_PORT_MDB:
+		err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj),
+					      cb);
+		break;
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		err = dsa_slave_port_vlan_dump(dev,
 					       SWITCHDEV_OBJ_PORT_VLAN(obj),
@@ -481,8 +558,8 @@
 
 	p->bridge_dev = br;
 
-	if (ds->drv->port_bridge_join)
-		ret = ds->drv->port_bridge_join(ds, p->port, br);
+	if (ds->ops->port_bridge_join)
+		ret = ds->ops->port_bridge_join(ds, p->port, br);
 
 	return ret == -EOPNOTSUPP ? 0 : ret;
 }
@@ -493,16 +570,15 @@
 	struct dsa_switch *ds = p->parent;
 
 
-	if (ds->drv->port_bridge_leave)
-		ds->drv->port_bridge_leave(ds, p->port);
+	if (ds->ops->port_bridge_leave)
+		ds->ops->port_bridge_leave(ds, p->port);
 
 	p->bridge_dev = NULL;
 
 	/* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer,
 	 * so allow it to be in BR_STATE_FORWARDING to be kept functional
 	 */
-	if (ds->drv->port_stp_state_set)
-		ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING);
+	dsa_port_set_stp_state(ds, p->port, BR_STATE_FORWARDING);
 }
 
 static int dsa_slave_port_attr_get(struct net_device *dev,
@@ -605,8 +681,8 @@
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->get_regs_len)
-		return ds->drv->get_regs_len(ds, p->port);
+	if (ds->ops->get_regs_len)
+		return ds->ops->get_regs_len(ds, p->port);
 
 	return -EOPNOTSUPP;
 }
@@ -617,8 +693,8 @@
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->get_regs)
-		ds->drv->get_regs(ds, p->port, regs, _p);
+	if (ds->ops->get_regs)
+		ds->ops->get_regs(ds, p->port, regs, _p);
 }
 
 static int dsa_slave_nway_reset(struct net_device *dev)
@@ -651,8 +727,8 @@
 	if (ds->cd && ds->cd->eeprom_len)
 		return ds->cd->eeprom_len;
 
-	if (ds->drv->get_eeprom_len)
-		return ds->drv->get_eeprom_len(ds);
+	if (ds->ops->get_eeprom_len)
+		return ds->ops->get_eeprom_len(ds);
 
 	return 0;
 }
@@ -663,8 +739,8 @@
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->get_eeprom)
-		return ds->drv->get_eeprom(ds, eeprom, data);
+	if (ds->ops->get_eeprom)
+		return ds->ops->get_eeprom(ds, eeprom, data);
 
 	return -EOPNOTSUPP;
 }
@@ -675,8 +751,8 @@
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->set_eeprom)
-		return ds->drv->set_eeprom(ds, eeprom, data);
+	if (ds->ops->set_eeprom)
+		return ds->ops->set_eeprom(ds, eeprom, data);
 
 	return -EOPNOTSUPP;
 }
@@ -694,8 +770,8 @@
 		strncpy(data + len, "tx_bytes", len);
 		strncpy(data + 2 * len, "rx_packets", len);
 		strncpy(data + 3 * len, "rx_bytes", len);
-		if (ds->drv->get_strings != NULL)
-			ds->drv->get_strings(ds, p->port, data + 4 * len);
+		if (ds->ops->get_strings)
+			ds->ops->get_strings(ds, p->port, data + 4 * len);
 	}
 }
 
@@ -714,8 +790,8 @@
 		dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data);
 	}
 
-	if (ds->drv->get_ethtool_stats)
-		ds->drv->get_ethtool_stats(ds, cpu_port, data + count);
+	if (ds->ops->get_ethtool_stats)
+		ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
 }
 
 static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
@@ -727,8 +803,8 @@
 	if (dst->master_ethtool_ops.get_sset_count)
 		count += dst->master_ethtool_ops.get_sset_count(dev, sset);
 
-	if (sset == ETH_SS_STATS && ds->drv->get_sset_count)
-		count += ds->drv->get_sset_count(ds);
+	if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
+		count += ds->ops->get_sset_count(ds);
 
 	return count;
 }
@@ -755,14 +831,14 @@
 		dst->master_ethtool_ops.get_strings(dev, stringset, data);
 	}
 
-	if (stringset == ETH_SS_STATS && ds->drv->get_strings) {
+	if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
 		ndata = data + mcount * len;
 		/* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
 		 * the output after to prepend our CPU port prefix we
 		 * constructed earlier
 		 */
-		ds->drv->get_strings(ds, cpu_port, ndata);
-		count = ds->drv->get_sset_count(ds);
+		ds->ops->get_strings(ds, cpu_port, ndata);
+		count = ds->ops->get_sset_count(ds);
 		for (i = 0; i < count; i++) {
 			memmove(ndata + (i * len + sizeof(pfx)),
 				ndata + i * len, len - sizeof(pfx));
@@ -782,8 +858,8 @@
 	data[1] = dev->stats.tx_bytes;
 	data[2] = dev->stats.rx_packets;
 	data[3] = dev->stats.rx_bytes;
-	if (ds->drv->get_ethtool_stats != NULL)
-		ds->drv->get_ethtool_stats(ds, p->port, data + 4);
+	if (ds->ops->get_ethtool_stats)
+		ds->ops->get_ethtool_stats(ds, p->port, data + 4);
 }
 
 static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
@@ -795,8 +871,8 @@
 		int count;
 
 		count = 4;
-		if (ds->drv->get_sset_count != NULL)
-			count += ds->drv->get_sset_count(ds);
+		if (ds->ops->get_sset_count)
+			count += ds->ops->get_sset_count(ds);
 
 		return count;
 	}
@@ -809,8 +885,8 @@
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct dsa_switch *ds = p->parent;
 
-	if (ds->drv->get_wol)
-		ds->drv->get_wol(ds, p->port, w);
+	if (ds->ops->get_wol)
+		ds->ops->get_wol(ds, p->port, w);
 }
 
 static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
@@ -819,8 +895,8 @@
 	struct dsa_switch *ds = p->parent;
 	int ret = -EOPNOTSUPP;
 
-	if (ds->drv->set_wol)
-		ret = ds->drv->set_wol(ds, p->port, w);
+	if (ds->ops->set_wol)
+		ret = ds->ops->set_wol(ds, p->port, w);
 
 	return ret;
 }
@@ -831,10 +907,10 @@
 	struct dsa_switch *ds = p->parent;
 	int ret;
 
-	if (!ds->drv->set_eee)
+	if (!ds->ops->set_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->drv->set_eee(ds, p->port, p->phy, e);
+	ret = ds->ops->set_eee(ds, p->port, p->phy, e);
 	if (ret)
 		return ret;
 
@@ -850,10 +926,10 @@
 	struct dsa_switch *ds = p->parent;
 	int ret;
 
-	if (!ds->drv->get_eee)
+	if (!ds->ops->get_eee)
 		return -EOPNOTSUPP;
 
-	ret = ds->drv->get_eee(ds, p->port, e);
+	ret = ds->ops->get_eee(ds, p->port, e);
 	if (ret)
 		return ret;
 
@@ -988,8 +1064,8 @@
 		p->old_pause = p->phy->pause;
 	}
 
-	if (ds->drv->adjust_link && status_changed)
-		ds->drv->adjust_link(ds, p->port, p->phy);
+	if (ds->ops->adjust_link && status_changed)
+		ds->ops->adjust_link(ds, p->port, p->phy);
 
 	if (status_changed)
 		phy_print_status(p->phy);
@@ -1004,8 +1080,8 @@
 	if (dev) {
 		p = netdev_priv(dev);
 		ds = p->parent;
-		if (ds->drv->fixed_link_update)
-			ds->drv->fixed_link_update(ds, p->port, status);
+		if (ds->ops->fixed_link_update)
+			ds->ops->fixed_link_update(ds, p->port, status);
 	}
 
 	return 0;
@@ -1062,8 +1138,8 @@
 		phy_dn = port_dn;
 	}
 
-	if (ds->drv->get_phy_flags)
-		phy_flags = ds->drv->get_phy_flags(ds, p->port);
+	if (ds->ops->get_phy_flags)
+		phy_flags = ds->ops->get_phy_flags(ds, p->port);
 
 	if (phy_dn) {
 		int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
new file mode 100644
index 0000000..0c90cac
--- /dev/null
+++ b/net/dsa/tag_qca.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/etherdevice.h>
+#include "dsa_priv.h"
+
+#define QCA_HDR_LEN	2
+#define QCA_HDR_VERSION	0x2
+
+#define QCA_HDR_RECV_VERSION_MASK	GENMASK(15, 14)
+#define QCA_HDR_RECV_VERSION_S		14
+#define QCA_HDR_RECV_PRIORITY_MASK	GENMASK(13, 11)
+#define QCA_HDR_RECV_PRIORITY_S		11
+#define QCA_HDR_RECV_TYPE_MASK		GENMASK(10, 6)
+#define QCA_HDR_RECV_TYPE_S		6
+#define QCA_HDR_RECV_FRAME_IS_TAGGED	BIT(3)
+#define QCA_HDR_RECV_SOURCE_PORT_MASK	GENMASK(2, 0)
+
+#define QCA_HDR_XMIT_VERSION_MASK	GENMASK(15, 14)
+#define QCA_HDR_XMIT_VERSION_S		14
+#define QCA_HDR_XMIT_PRIORITY_MASK	GENMASK(13, 11)
+#define QCA_HDR_XMIT_PRIORITY_S		11
+#define QCA_HDR_XMIT_CONTROL_MASK	GENMASK(10, 8)
+#define QCA_HDR_XMIT_CONTROL_S		8
+#define QCA_HDR_XMIT_FROM_CPU		BIT(7)
+#define QCA_HDR_XMIT_DP_BIT_MASK	GENMASK(6, 0)
+
+static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	u16 *phdr, hdr;
+
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	if (skb_cow_head(skb, 0) < 0)
+		goto out_free;
+
+	skb_push(skb, QCA_HDR_LEN);
+
+	memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN);
+	phdr = (u16 *)(skb->data + 2 * ETH_ALEN);
+
+	/* Set the version field, and set destination port information */
+	hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
+		QCA_HDR_XMIT_FROM_CPU |
+		BIT(p->port);
+
+	*phdr = htons(hdr);
+
+	return skb;
+
+out_free:
+	kfree_skb(skb);
+	return NULL;
+}
+
+static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
+		       struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dsa_switch_tree *dst = dev->dsa_ptr;
+	struct dsa_switch *ds;
+	u8 ver;
+	int port;
+	__be16 *phdr, hdr;
+
+	if (unlikely(!dst))
+		goto out_drop;
+
+	skb = skb_unshare(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN)))
+		goto out_drop;
+
+	/* The QCA header is added by the switch between src addr and Ethertype
+	 * At this point, skb->data points to ethertype so header should be
+	 * right before
+	 */
+	phdr = (__be16 *)(skb->data - 2);
+	hdr = ntohs(*phdr);
+
+	/* Make sure the version is correct */
+	ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S;
+	if (unlikely(ver != QCA_HDR_VERSION))
+		goto out_drop;
+
+	/* Remove QCA tag and recalculate checksum */
+	skb_pull_rcsum(skb, QCA_HDR_LEN);
+	memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
+		ETH_HLEN - QCA_HDR_LEN);
+
+	/* This protocol doesn't support cascading multiple switches so it's
+	 * safe to assume the switch is first in the tree
+	 */
+	ds = dst->ds[0];
+	if (!ds)
+		goto out_drop;
+
+	/* Get source port information */
+	port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
+	if (!ds->ports[port].netdev)
+		goto out_drop;
+
+	/* Update skb & forward the frame accordingly */
+	skb_push(skb, ETH_HLEN);
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = ds->ports[port].netdev;
+	skb->protocol = eth_type_trans(skb, skb->dev);
+
+	skb->dev->stats.rx_packets++;
+	skb->dev->stats.rx_bytes += skb->len;
+
+	netif_receive_skb(skb);
+
+	return 0;
+
+out_drop:
+	kfree_skb(skb);
+out:
+	return 0;
+}
+
+const struct dsa_device_ops qca_netdev_ops = {
+	.xmit	= qca_tag_xmit,
+	.rcv	= qca_tag_rcv,
+};
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 50d6a9b..300b068 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -640,6 +640,21 @@
 	  D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
 	  delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
 
+config TCP_CONG_BBR
+	tristate "BBR TCP"
+	default n
+	---help---
+
+	BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
+	maximize network utilization and minimize queues. It builds an explicit
+	model of the the bottleneck delivery rate and path round-trip
+	propagation delay. It tolerates packet loss and delay unrelated to
+	congestion. It can operate over LAN, WAN, cellular, wifi, or cable
+	modem links. It can coexist with flows that use loss-based congestion
+	control, and can operate with shallow buffers, deep buffers,
+	bufferbloat, policers, or AQM schemes that do not provide a delay
+	signal. It requires the fq ("Fair Queue") pacing packet scheduler.
+
 choice
 	prompt "Default TCP congestion control"
 	default DEFAULT_CUBIC
@@ -674,6 +689,9 @@
 	config DEFAULT_CDG
 		bool "CDG" if TCP_CONG_CDG=y
 
+	config DEFAULT_BBR
+		bool "BBR" if TCP_CONG_BBR=y
+
 	config DEFAULT_RENO
 		bool "Reno"
 endchoice
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 24629b6..bc6a6c8 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -8,7 +8,7 @@
 	     inet_timewait_sock.o inet_connection_sock.o \
 	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
 	     tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
-	     tcp_recovery.o \
+	     tcp_rate.o tcp_recovery.o \
 	     tcp_offload.o datagram.o raw.o udp.o udplite.o \
 	     udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
 	     fib_frontend.o fib_semantics.o fib_trie.o \
@@ -41,6 +41,7 @@
 obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
 obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o
 obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
+obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o
 obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
 obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o
 obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 55513e6..1effc98 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -211,24 +211,19 @@
 	 * we can only allow the backlog to be adjusted.
 	 */
 	if (old_state != TCP_LISTEN) {
-		/* Check special setups for testing purpose to enable TFO w/o
-		 * requiring TCP_FASTOPEN sockopt.
+		/* Enable TFO w/o requiring TCP_FASTOPEN socket option.
 		 * Note that only TCP sockets (SOCK_STREAM) will reach here.
-		 * Also fastopenq may already been allocated because this
-		 * socket was in TCP_LISTEN state previously but was
-		 * shutdown() (rather than close()).
+		 * Also fastopen backlog may already been set via the option
+		 * because the socket was in TCP_LISTEN state previously but
+		 * was shutdown() rather than close().
 		 */
-		if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
+		if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+		    (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
 		    !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
-			if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
-				fastopen_queue_tune(sk, backlog);
-			else if ((sysctl_tcp_fastopen &
-				  TFO_SERVER_WO_SOCKOPT2) != 0)
-				fastopen_queue_tune(sk,
-				    ((uint)sysctl_tcp_fastopen) >> 16);
-
+			fastopen_queue_tune(sk, backlog);
 			tcp_fastopen_init_key_once(true);
 		}
+
 		err = inet_csk_listen_start(sk, backlog);
 		if (err)
 			goto out;
@@ -921,6 +916,8 @@
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
+	.read_sock	   = tcp_read_sock,
+	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
@@ -1195,7 +1192,7 @@
 struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 				 netdev_features_t features)
 {
-	bool udpfrag = false, fixedid = false, encap;
+	bool udpfrag = false, fixedid = false, gso_partial, encap;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	const struct net_offload *ops;
 	unsigned int offset = 0;
@@ -1248,6 +1245,8 @@
 	if (IS_ERR_OR_NULL(segs))
 		goto out;
 
+	gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
+
 	skb = segs;
 	do {
 		iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
@@ -1262,9 +1261,13 @@
 				iph->id = htons(id);
 				id += skb_shinfo(skb)->gso_segs;
 			}
-			tot_len = skb_shinfo(skb)->gso_size +
-				  SKB_GSO_CB(skb)->data_offset +
-				  skb->head - (unsigned char *)iph;
+
+			if (gso_partial)
+				tot_len = skb_shinfo(skb)->gso_size +
+					  SKB_GSO_CB(skb)->data_offset +
+					  skb->head - (unsigned char *)iph;
+			else
+				tot_len = skb->len - nhoff;
 		} else {
 			if (!fixedid)
 				iph->id = htons(id++);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 415e117..062a67c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2232,7 +2232,7 @@
 };
 
 static int __devinet_sysctl_register(struct net *net, char *dev_name,
-					struct ipv4_devconf *p)
+				     int ifindex, struct ipv4_devconf *p)
 {
 	int i;
 	struct devinet_sysctl_table *t;
@@ -2255,6 +2255,8 @@
 		goto free;
 
 	p->sysctl = t;
+
+	inet_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
 	return 0;
 
 free:
@@ -2286,7 +2288,7 @@
 	if (err)
 		return err;
 	err = __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
-					&idev->cnf);
+					idev->dev->ifindex, &idev->cnf);
 	if (err)
 		neigh_sysctl_unregister(idev->arp_parms);
 	return err;
@@ -2347,11 +2349,12 @@
 	}
 
 #ifdef CONFIG_SYSCTL
-	err = __devinet_sysctl_register(net, "all", all);
+	err = __devinet_sysctl_register(net, "all", NETCONFA_IFINDEX_ALL, all);
 	if (err < 0)
 		goto err_reg_all;
 
-	err = __devinet_sysctl_register(net, "default", dflt);
+	err = __devinet_sysctl_register(net, "default",
+					NETCONFA_IFINDEX_DEFAULT, dflt);
 	if (err < 0)
 		goto err_reg_dflt;
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ef2ebeb..c3b8047 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -93,9 +93,6 @@
 		return NULL;
 
 	switch (id) {
-	case RT_TABLE_LOCAL:
-		rcu_assign_pointer(net->ipv4.fib_local, tb);
-		break;
 	case RT_TABLE_MAIN:
 		rcu_assign_pointer(net->ipv4.fib_main, tb);
 		break;
@@ -137,9 +134,6 @@
 {
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	switch (new->tb_id) {
-	case RT_TABLE_LOCAL:
-		rcu_assign_pointer(net->ipv4.fib_local, new);
-		break;
 	case RT_TABLE_MAIN:
 		rcu_assign_pointer(net->ipv4.fib_main, new);
 		break;
@@ -188,26 +182,13 @@
 		struct fib_table *tb;
 
 		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist)
-			flushed += fib_table_flush(tb);
+			flushed += fib_table_flush(net, tb);
 	}
 
 	if (flushed)
 		rt_cache_flush(net);
 }
 
-void fib_flush_external(struct net *net)
-{
-	struct fib_table *tb;
-	struct hlist_head *head;
-	unsigned int h;
-
-	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
-		head = &net->ipv4.fib_table_hash[h];
-		hlist_for_each_entry(tb, head, tb_hlist)
-			fib_table_flush_external(tb);
-	}
-}
-
 /*
  * Find address type as if only "dev" was present in the system. If
  * on_dev is NULL then all interfaces are taken into consideration.
@@ -509,6 +490,7 @@
 		if (!dev)
 			return -ENODEV;
 		cfg->fc_oif = dev->ifindex;
+		cfg->fc_table = l3mdev_fib_table(dev);
 		if (colon) {
 			struct in_ifaddr *ifa;
 			struct in_device *in_dev = __in_dev_get_rtnl(dev);
@@ -595,13 +577,13 @@
 			if (cmd == SIOCDELRT) {
 				tb = fib_get_table(net, cfg.fc_table);
 				if (tb)
-					err = fib_table_delete(tb, &cfg);
+					err = fib_table_delete(net, tb, &cfg);
 				else
 					err = -ESRCH;
 			} else {
 				tb = fib_new_table(net, cfg.fc_table);
 				if (tb)
-					err = fib_table_insert(tb, &cfg);
+					err = fib_table_insert(net, tb, &cfg);
 				else
 					err = -ENOBUFS;
 			}
@@ -724,7 +706,7 @@
 		goto errout;
 	}
 
-	err = fib_table_delete(tb, &cfg);
+	err = fib_table_delete(net, tb, &cfg);
 errout:
 	return err;
 }
@@ -746,7 +728,7 @@
 		goto errout;
 	}
 
-	err = fib_table_insert(tb, &cfg);
+	err = fib_table_insert(net, tb, &cfg);
 errout:
 	return err;
 }
@@ -833,9 +815,9 @@
 		cfg.fc_scope = RT_SCOPE_HOST;
 
 	if (cmd == RTM_NEWROUTE)
-		fib_table_insert(tb, &cfg);
+		fib_table_insert(net, tb, &cfg);
 	else
-		fib_table_delete(tb, &cfg);
+		fib_table_delete(net, tb, &cfg);
 }
 
 void fib_add_ifaddr(struct in_ifaddr *ifa)
@@ -1027,7 +1009,7 @@
 			 * First of all, we scan fib_info list searching
 			 * for stray nexthop entries, then ignite fib_flush.
 			 */
-			if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
+			if (fib_sync_down_addr(dev, ifa->ifa_local))
 				fib_flush(dev_net(dev));
 		}
 	}
@@ -1249,7 +1231,6 @@
 
 	rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
-	RCU_INIT_POINTER(net->ipv4.fib_local, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
 	RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
 #endif
@@ -1260,7 +1241,7 @@
 
 		hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) {
 			hlist_del(&tb->tb_hlist);
-			fib_table_flush(tb);
+			fib_table_flush(net, tb);
 			fib_free_table(tb);
 		}
 	}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 6e9ea69..2e50062 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -56,6 +56,9 @@
 	};
 	int err;
 
+	/* update flow if oif or iif point to device enslaved to l3mdev */
+	l3mdev_update_flow(net, flowi4_to_flowi(flp));
+
 	err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg);
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	if (arg.rule)
@@ -161,6 +164,14 @@
 	return NULL;
 }
 
+static int call_fib_rule_notifiers(struct net *net,
+				   enum fib_event_type event_type)
+{
+	struct fib_notifier_info info;
+
+	return call_fib_notifiers(net, event_type, &info);
+}
+
 static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
 	FRA_GENERIC_POLICY,
 	[FRA_FLOW]	= { .type = NLA_U32 },
@@ -217,7 +228,7 @@
 	rule4->tos = frh->tos;
 
 	net->ipv4.fib_has_custom_rules = true;
-	fib_flush_external(rule->fr_net);
+	call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD);
 
 	err = 0;
 errout:
@@ -239,7 +250,7 @@
 		net->ipv4.fib_num_tclassid_users--;
 #endif
 	net->ipv4.fib_has_custom_rules = true;
-	fib_flush_external(rule->fr_net);
+	call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL);
 errout:
 	return err;
 }
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 539fa26..388d3e2 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1057,6 +1057,7 @@
 	fi->fib_priority = cfg->fc_priority;
 	fi->fib_prefsrc = cfg->fc_prefsrc;
 	fi->fib_type = cfg->fc_type;
+	fi->fib_tb_id = cfg->fc_table;
 
 	fi->fib_nhs = nhs;
 	change_nexthops(fi) {
@@ -1337,18 +1338,21 @@
  *   referring to it.
  * - device went down -> we must shutdown all nexthops going via it.
  */
-int fib_sync_down_addr(struct net *net, __be32 local)
+int fib_sync_down_addr(struct net_device *dev, __be32 local)
 {
 	int ret = 0;
 	unsigned int hash = fib_laddr_hashfn(local);
 	struct hlist_head *head = &fib_info_laddrhash[hash];
+	struct net *net = dev_net(dev);
+	int tb_id = l3mdev_fib_table(dev);
 	struct fib_info *fi;
 
 	if (!fib_info_laddrhash || local == 0)
 		return 0;
 
 	hlist_for_each_entry(fi, head, fib_lhash) {
-		if (!net_eq(fi->fib_net, net))
+		if (!net_eq(fi->fib_net, net) ||
+		    fi->fib_tb_id != tb_id)
 			continue;
 		if (fi->fib_prefsrc == local) {
 			fi->fib_flags |= RTNH_F_DEAD;
@@ -1576,7 +1580,8 @@
 
 		rcu_read_lock_bh();
 
-		n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw);
+		n = __ipv4_neigh_lookup_noref(nh->nh_dev,
+					      (__force u32)nh->nh_gw);
 		if (n)
 			state = n->nud_state;
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d07fc07..31cef36 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -73,6 +73,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 #include <linux/vmalloc.h>
+#include <linux/notifier.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -80,10 +81,47 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <net/ip_fib.h>
-#include <net/switchdev.h>
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
+static BLOCKING_NOTIFIER_HEAD(fib_chain);
+
+int register_fib_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&fib_chain, nb);
+}
+EXPORT_SYMBOL(register_fib_notifier);
+
+int unregister_fib_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&fib_chain, nb);
+}
+EXPORT_SYMBOL(unregister_fib_notifier);
+
+int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
+		       struct fib_notifier_info *info)
+{
+	info->net = net;
+	return blocking_notifier_call_chain(&fib_chain, event_type, info);
+}
+
+static int call_fib_entry_notifiers(struct net *net,
+				    enum fib_event_type event_type, u32 dst,
+				    int dst_len, struct fib_info *fi,
+				    u8 tos, u8 type, u32 tb_id, u32 nlflags)
+{
+	struct fib_entry_notifier_info info = {
+		.dst = dst,
+		.dst_len = dst_len,
+		.fi = fi,
+		.tos = tos,
+		.type = type,
+		.tb_id = tb_id,
+		.nlflags = nlflags,
+	};
+	return call_fib_notifiers(net, event_type, &info.info);
+}
+
 #define MAX_STAT_DEPTH 32
 
 #define KEYLENGTH	(8*sizeof(t_key))
@@ -249,7 +287,7 @@
  * index into the parent's child array. That is, they will be used to find
  * 'n' among tp's children.
  *
- * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
+ * The bits from (n->pos + n->bits) to (tp->pos - 1) - "S" - are skipped bits
  * for the node n.
  *
  * All the bits we have seen so far are significant to the node n. The rest
@@ -258,7 +296,7 @@
  * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
  * n's child array, and will of course be different for each child.
  *
- * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
+ * The rest of the bits, from 0 to (n->pos -1) - "u" - are completely unknown
  * at this point.
  */
 
@@ -1076,12 +1114,13 @@
 }
 
 /* Caller must hold RTNL. */
-int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
+int fib_table_insert(struct net *net, struct fib_table *tb,
+		     struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *)tb->tb_data;
 	struct fib_alias *fa, *new_fa;
 	struct key_vector *l, *tp;
-	unsigned int nlflags = 0;
+	u16 nlflags = NLM_F_EXCL;
 	struct fib_info *fi;
 	u8 plen = cfg->fc_dst_len;
 	u8 slen = KEYLENGTH - plen;
@@ -1126,6 +1165,8 @@
 		if (cfg->fc_nlflags & NLM_F_EXCL)
 			goto out;
 
+		nlflags &= ~NLM_F_EXCL;
+
 		/* We have 2 goals:
 		 * 1. Find exact match for type, scope, fib_info to avoid
 		 * duplicate routes
@@ -1151,6 +1192,7 @@
 			struct fib_info *fi_drop;
 			u8 state;
 
+			nlflags |= NLM_F_REPLACE;
 			fa = fa_first;
 			if (fa_match) {
 				if (fa == fa_match)
@@ -1172,17 +1214,6 @@
 			new_fa->tb_id = tb->tb_id;
 			new_fa->fa_default = -1;
 
-			err = switchdev_fib_ipv4_add(key, plen, fi,
-						     new_fa->fa_tos,
-						     cfg->fc_type,
-						     cfg->fc_nlflags,
-						     tb->tb_id);
-			if (err) {
-				switchdev_fib_ipv4_abort(fi);
-				kmem_cache_free(fn_alias_kmem, new_fa);
-				goto out;
-			}
-
 			hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list);
 
 			alias_free_mem_rcu(fa);
@@ -1190,8 +1221,13 @@
 			fib_release_info(fi_drop);
 			if (state & FA_S_ACCESSED)
 				rt_cache_flush(cfg->fc_nlinfo.nl_net);
+
+			call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
+						 key, plen, fi,
+						 new_fa->fa_tos, cfg->fc_type,
+						 tb->tb_id, cfg->fc_nlflags);
 			rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
-				tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
+				tb->tb_id, &cfg->fc_nlinfo, nlflags);
 
 			goto succeeded;
 		}
@@ -1203,7 +1239,7 @@
 			goto out;
 
 		if (cfg->fc_nlflags & NLM_F_APPEND)
-			nlflags = NLM_F_APPEND;
+			nlflags |= NLM_F_APPEND;
 		else
 			fa = fa_first;
 	}
@@ -1211,6 +1247,7 @@
 	if (!(cfg->fc_nlflags & NLM_F_CREATE))
 		goto out;
 
+	nlflags |= NLM_F_CREATE;
 	err = -ENOBUFS;
 	new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
 	if (!new_fa)
@@ -1224,30 +1261,22 @@
 	new_fa->tb_id = tb->tb_id;
 	new_fa->fa_default = -1;
 
-	/* (Optionally) offload fib entry to switch hardware. */
-	err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type,
-				     cfg->fc_nlflags, tb->tb_id);
-	if (err) {
-		switchdev_fib_ipv4_abort(fi);
-		goto out_free_new_fa;
-	}
-
 	/* Insert new entry to the list. */
 	err = fib_insert_alias(t, tp, l, new_fa, fa, key);
 	if (err)
-		goto out_sw_fib_del;
+		goto out_free_new_fa;
 
 	if (!plen)
 		tb->tb_num_default++;
 
 	rt_cache_flush(cfg->fc_nlinfo.nl_net);
+	call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos,
+				 cfg->fc_type, tb->tb_id, cfg->fc_nlflags);
 	rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
 		  &cfg->fc_nlinfo, nlflags);
 succeeded:
 	return 0;
 
-out_sw_fib_del:
-	switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id);
 out_free_new_fa:
 	kmem_cache_free(fn_alias_kmem, new_fa);
 out:
@@ -1486,7 +1515,8 @@
 }
 
 /* Caller must hold RTNL. */
-int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
+int fib_table_delete(struct net *net, struct fib_table *tb,
+		     struct fib_config *cfg)
 {
 	struct trie *t = (struct trie *) tb->tb_data;
 	struct fib_alias *fa, *fa_to_delete;
@@ -1539,9 +1569,9 @@
 	if (!fa_to_delete)
 		return -ESRCH;
 
-	switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos,
-			       cfg->fc_type, tb->tb_id);
-
+	call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
+				 fa_to_delete->fa_info, tos, cfg->fc_type,
+				 tb->tb_id, 0);
 	rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
 		  &cfg->fc_nlinfo, 0);
 
@@ -1730,82 +1760,8 @@
 	return NULL;
 }
 
-/* Caller must hold RTNL */
-void fib_table_flush_external(struct fib_table *tb)
-{
-	struct trie *t = (struct trie *)tb->tb_data;
-	struct key_vector *pn = t->kv;
-	unsigned long cindex = 1;
-	struct hlist_node *tmp;
-	struct fib_alias *fa;
-
-	/* walk trie in reverse order */
-	for (;;) {
-		unsigned char slen = 0;
-		struct key_vector *n;
-
-		if (!(cindex--)) {
-			t_key pkey = pn->key;
-
-			/* cannot resize the trie vector */
-			if (IS_TRIE(pn))
-				break;
-
-			/* resize completed node */
-			pn = resize(t, pn);
-			cindex = get_index(pkey, pn);
-
-			continue;
-		}
-
-		/* grab the next available node */
-		n = get_child(pn, cindex);
-		if (!n)
-			continue;
-
-		if (IS_TNODE(n)) {
-			/* record pn and cindex for leaf walking */
-			pn = n;
-			cindex = 1ul << n->bits;
-
-			continue;
-		}
-
-		hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) {
-			struct fib_info *fi = fa->fa_info;
-
-			/* if alias was cloned to local then we just
-			 * need to remove the local copy from main
-			 */
-			if (tb->tb_id != fa->tb_id) {
-				hlist_del_rcu(&fa->fa_list);
-				alias_free_mem_rcu(fa);
-				continue;
-			}
-
-			/* record local slen */
-			slen = fa->fa_slen;
-
-			if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD))
-				continue;
-
-			switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
-					       fi, fa->fa_tos, fa->fa_type,
-					       tb->tb_id);
-		}
-
-		/* update leaf slen */
-		n->slen = slen;
-
-		if (hlist_empty(&n->leaf)) {
-			put_child_root(pn, n->key, NULL);
-			node_free(n);
-		}
-	}
-}
-
 /* Caller must hold RTNL. */
-int fib_table_flush(struct fib_table *tb)
+int fib_table_flush(struct net *net, struct fib_table *tb)
 {
 	struct trie *t = (struct trie *)tb->tb_data;
 	struct key_vector *pn = t->kv;
@@ -1854,9 +1810,11 @@
 				continue;
 			}
 
-			switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen,
-					       fi, fa->fa_tos, fa->fa_type,
-					       tb->tb_id);
+			call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
+						 n->key,
+						 KEYLENGTH - fa->fa_slen,
+						 fi, fa->fa_tos, fa->fa_type,
+						 tb->tb_id, 0);
 			hlist_del_rcu(&fa->fa_list);
 			fib_release_info(fa->fa_info);
 			alias_free_mem_rcu(fa);
@@ -2452,9 +2410,7 @@
 static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter,
 					    loff_t pos)
 {
-	struct fib_table *tb = iter->main_tb;
 	struct key_vector *l, **tp = &iter->tnode;
-	struct trie *t;
 	t_key key;
 
 	/* use cache location of next-to-find key */
@@ -2462,8 +2418,6 @@
 		pos -= iter->pos;
 		key = iter->key;
 	} else {
-		t = (struct trie *)tb->tb_data;
-		iter->tnode = t->kv;
 		iter->pos = 0;
 		key = 0;
 	}
@@ -2504,12 +2458,12 @@
 		return NULL;
 
 	iter->main_tb = tb;
+	t = (struct trie *)tb->tb_data;
+	iter->tnode = t->kv;
 
 	if (*pos != 0)
 		return fib_route_get_idx(iter, *pos);
 
-	t = (struct trie *)tb->tb_data;
-	iter->tnode = t->kv;
 	iter->pos = 0;
 	iter->key = 0;
 
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 321d57f..cf50f7e 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -631,7 +631,7 @@
 	.netnsok	= true,
 };
 
-static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
+static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
 	[FOU_ATTR_PORT] = { .type = NLA_U16, },
 	[FOU_ATTR_AF] = { .type = NLA_U8, },
 	[FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index ecd1e09..96e0efe 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -24,7 +24,7 @@
 	__be16 protocol = skb->protocol;
 	u16 mac_len = skb->mac_len;
 	int gre_offset, outer_hlen;
-	bool need_csum, ufo;
+	bool need_csum, ufo, gso_partial;
 
 	if (!skb->encapsulation)
 		goto out;
@@ -69,6 +69,8 @@
 		goto out;
 	}
 
+	gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
+
 	outer_hlen = skb_tnl_header_len(skb);
 	gre_offset = outer_hlen - tnl_hlen;
 	skb = segs;
@@ -96,7 +98,7 @@
 		greh = (struct gre_base_hdr *)skb_transport_header(skb);
 		pcsum = (__sum16 *)(greh + 1);
 
-		if (skb_is_gso(skb)) {
+		if (gso_partial) {
 			unsigned int partial_adj;
 
 			/* Adjust checksum to account for the fact that
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 9b4ca87..606cc3e 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -472,6 +472,15 @@
 			continue;
 		}
 
+		/* Based on RFC3376 5.1. Should not send source-list change
+		 * records when there is a filter mode change.
+		 */
+		if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) ||
+		     (!gdeleted && pmc->crcount)) &&
+		    (type == IGMPV3_ALLOW_NEW_SOURCES ||
+		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+			goto decrease_sf_crcount;
+
 		/* clear marks on query responses */
 		if (isquery)
 			psf->sf_gsresp = 0;
@@ -499,6 +508,7 @@
 		scount++; stotal++;
 		if ((type == IGMPV3_ALLOW_NEW_SOURCES ||
 		     type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
 			psf->sf_crcount--;
 			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
 				if (psf_prev)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 38c2c47..e4d16fc 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -45,6 +45,7 @@
 	u16 family;
 	u16 userlocks;
 	u32 ifindex;
+	u32 mark;
 };
 
 static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -98,6 +99,7 @@
 		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
 		+ nla_total_size(1) /* INET_DIAG_TOS */
 		+ nla_total_size(1) /* INET_DIAG_TCLASS */
+		+ nla_total_size(4) /* INET_DIAG_MARK */
 		+ nla_total_size(sizeof(struct inet_diag_meminfo))
 		+ nla_total_size(sizeof(struct inet_diag_msg))
 		+ nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
@@ -108,7 +110,8 @@
 
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 			     struct inet_diag_msg *r, int ext,
-			     struct user_namespace *user_ns)
+			     struct user_namespace *user_ns,
+			     bool net_admin)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 
@@ -135,6 +138,9 @@
 	}
 #endif
 
+	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+		goto errout;
+
 	r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
 	r->idiag_inode = sock_i_ino(sk);
 
@@ -148,7 +154,8 @@
 		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
 		      struct user_namespace *user_ns,
 		      u32 portid, u32 seq, u16 nlmsg_flags,
-		      const struct nlmsghdr *unlh)
+		      const struct nlmsghdr *unlh,
+		      bool net_admin)
 {
 	const struct tcp_congestion_ops *ca_ops;
 	const struct inet_diag_handler *handler;
@@ -174,7 +181,7 @@
 	r->idiag_timer = 0;
 	r->idiag_retrans = 0;
 
-	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
 		goto errout;
 
 	if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
@@ -273,10 +280,11 @@
 			      const struct inet_diag_req_v2 *req,
 			      struct user_namespace *user_ns,
 			      u32 portid, u32 seq, u16 nlmsg_flags,
-			      const struct nlmsghdr *unlh)
+			      const struct nlmsghdr *unlh,
+			      bool net_admin)
 {
-	return inet_sk_diag_fill(sk, inet_csk(sk), skb, req,
-				 user_ns, portid, seq, nlmsg_flags, unlh);
+	return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns,
+				 portid, seq, nlmsg_flags, unlh, net_admin);
 }
 
 static int inet_twsk_diag_fill(struct sock *sk,
@@ -318,8 +326,9 @@
 
 static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
 			      u32 portid, u32 seq, u16 nlmsg_flags,
-			      const struct nlmsghdr *unlh)
+			      const struct nlmsghdr *unlh, bool net_admin)
 {
+	struct request_sock *reqsk = inet_reqsk(sk);
 	struct inet_diag_msg *r;
 	struct nlmsghdr *nlh;
 	long tmo;
@@ -333,7 +342,7 @@
 	inet_diag_msg_common_fill(r, sk);
 	r->idiag_state = TCP_SYN_RECV;
 	r->idiag_timer = 1;
-	r->idiag_retrans = inet_reqsk(sk)->num_retrans;
+	r->idiag_retrans = reqsk->num_retrans;
 
 	BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
 		     offsetof(struct sock, sk_cookie));
@@ -345,6 +354,10 @@
 	r->idiag_uid	= 0;
 	r->idiag_inode	= 0;
 
+	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
+				     inet_rsk(reqsk)->ir_mark))
+		return -EMSGSIZE;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 }
@@ -353,7 +366,7 @@
 			const struct inet_diag_req_v2 *r,
 			struct user_namespace *user_ns,
 			u32 portid, u32 seq, u16 nlmsg_flags,
-			const struct nlmsghdr *unlh)
+			const struct nlmsghdr *unlh, bool net_admin)
 {
 	if (sk->sk_state == TCP_TIME_WAIT)
 		return inet_twsk_diag_fill(sk, skb, portid, seq,
@@ -361,10 +374,10 @@
 
 	if (sk->sk_state == TCP_NEW_SYN_RECV)
 		return inet_req_diag_fill(sk, skb, portid, seq,
-					  nlmsg_flags, unlh);
+					  nlmsg_flags, unlh, net_admin);
 
 	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
-				  nlmsg_flags, unlh);
+				  nlmsg_flags, unlh, net_admin);
 }
 
 struct sock *inet_diag_find_one_icsk(struct net *net,
@@ -434,7 +447,8 @@
 	err = sk_diag_fill(sk, rep, req,
 			   sk_user_ns(NETLINK_CB(in_skb).sk),
 			   NETLINK_CB(in_skb).portid,
-			   nlh->nlmsg_seq, 0, nlh);
+			   nlh->nlmsg_seq, 0, nlh,
+			   netlink_net_capable(in_skb, CAP_NET_ADMIN));
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		nlmsg_free(rep);
@@ -580,6 +594,14 @@
 				yes = 0;
 			break;
 		}
+		case INET_DIAG_BC_MARK_COND: {
+			struct inet_diag_markcond *cond;
+
+			cond = (struct inet_diag_markcond *)(op + 1);
+			if ((entry->mark & cond->mask) != cond->mark)
+				yes = 0;
+			break;
+		}
 		}
 
 		if (yes) {
@@ -624,6 +646,12 @@
 	entry.dport = ntohs(inet->inet_dport);
 	entry.ifindex = sk->sk_bound_dev_if;
 	entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
+	if (sk_fullsock(sk))
+		entry.mark = sk->sk_mark;
+	else if (sk->sk_state == TCP_NEW_SYN_RECV)
+		entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+	else
+		entry.mark = 0;
 
 	return inet_diag_bc_run(bc, &entry);
 }
@@ -706,10 +734,25 @@
 	return true;
 }
 
-static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
+static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
+			   int *min_len)
 {
-	const void *bc = bytecode;
-	int  len = bytecode_len;
+	*min_len += sizeof(struct inet_diag_markcond);
+	return len >= *min_len;
+}
+
+static int inet_diag_bc_audit(const struct nlattr *attr,
+			      const struct sk_buff *skb)
+{
+	bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
+	const void *bytecode, *bc;
+	int bytecode_len, len;
+
+	if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
+		return -EINVAL;
+
+	bytecode = bc = nla_data(attr);
+	len = bytecode_len = nla_len(attr);
 
 	while (len > 0) {
 		int min_len = sizeof(struct inet_diag_bc_op);
@@ -732,6 +775,12 @@
 			if (!valid_port_comparison(bc, len, &min_len))
 				return -EINVAL;
 			break;
+		case INET_DIAG_BC_MARK_COND:
+			if (!net_admin)
+				return -EPERM;
+			if (!valid_markcond(bc, len, &min_len))
+				return -EINVAL;
+			break;
 		case INET_DIAG_BC_AUTO:
 		case INET_DIAG_BC_JMP:
 		case INET_DIAG_BC_NOP:
@@ -760,7 +809,8 @@
 			      struct sk_buff *skb,
 			      struct netlink_callback *cb,
 			      const struct inet_diag_req_v2 *r,
-			      const struct nlattr *bc)
+			      const struct nlattr *bc,
+			      bool net_admin)
 {
 	if (!inet_diag_bc_sk(bc, sk))
 		return 0;
@@ -768,7 +818,8 @@
 	return inet_csk_diag_fill(sk, skb, r,
 				  sk_user_ns(NETLINK_CB(cb->skb).sk),
 				  NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh,
+				  net_admin);
 }
 
 static void twsk_build_assert(void)
@@ -804,6 +855,7 @@
 	struct net *net = sock_net(skb->sk);
 	int i, num, s_i, s_num;
 	u32 idiag_states = r->idiag_states;
+	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
 
 	if (idiag_states & TCPF_SYN_RECV)
 		idiag_states |= TCPF_NEW_SYN_RECV;
@@ -844,7 +896,8 @@
 				    cb->args[3] > 0)
 					goto next_listen;
 
-				if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
+				if (inet_csk_diag_dump(sk, skb, cb, r,
+						       bc, net_admin) < 0) {
 					spin_unlock_bh(&ilb->lock);
 					goto done;
 				}
@@ -912,7 +965,7 @@
 					   sk_user_ns(NETLINK_CB(cb->skb).sk),
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   cb->nlh);
+					   cb->nlh, net_admin);
 			if (res < 0) {
 				spin_unlock_bh(lock);
 				goto done;
@@ -1020,13 +1073,13 @@
 	if (nlh->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(nlh, hdrlen)) {
 			struct nlattr *attr;
+			int err;
 
 			attr = nlmsg_find_attr(nlh, hdrlen,
 					       INET_DIAG_REQ_BYTECODE);
-			if (!attr ||
-			    nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
-			    inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
-				return -EINVAL;
+			err = inet_diag_bc_audit(attr, skb);
+			if (err)
+				return err;
 		}
 		{
 			struct netlink_dump_control c = {
@@ -1051,13 +1104,13 @@
 	    h->nlmsg_flags & NLM_F_DUMP) {
 		if (nlmsg_attrlen(h, hdrlen)) {
 			struct nlattr *attr;
+			int err;
 
 			attr = nlmsg_find_attr(h, hdrlen,
 					       INET_DIAG_REQ_BYTECODE);
-			if (!attr ||
-			    nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
-			    inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
-				return -EINVAL;
+			err = inet_diag_bc_audit(attr, skb);
+			if (err)
+				return err;
 		}
 		{
 			struct netlink_dump_control c = {
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5b1481b..576f705 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -246,25 +246,6 @@
 	ipgre_err(skb, info, &tpi);
 }
 
-static __be64 key_to_tunnel_id(__be32 key)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __be64)((__force u32)key);
-#else
-	return (__force __be64)((__force u64)key << 32);
-#endif
-}
-
-/* Returns the least-significant 32 bits of a __be64. */
-static __be32 tunnel_id_to_key(__be64 x)
-{
-#ifdef __BIG_ENDIAN
-	return (__force __be32)x;
-#else
-	return (__force __be32)((__force u64)x >> 32);
-#endif
-}
-
 static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 		       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 {
@@ -290,7 +271,7 @@
 			__be64 tun_id;
 
 			flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
-			tun_id = key_to_tunnel_id(tpi->key);
+			tun_id = key32_to_tunnel_id(tpi->key);
 			tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
 			if (!tun_dst)
 				return PACKET_REJECT;
@@ -370,7 +351,6 @@
 			 tunnel->parms.o_flags, proto, tunnel->parms.o_key,
 			 htonl(tunnel->o_seqno));
 
-	skb_set_inner_protocol(skb, proto);
 	ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 }
 
@@ -447,7 +427,7 @@
 
 	flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
 	gre_build_header(skb, tunnel_hlen, flags, proto,
-			 tunnel_id_to_key(tun_info->key.tun_id), 0);
+			 tunnel_id_to_key32(tun_info->key.tun_id), 0);
 
 	df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 4b351af..d6feabb 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -312,6 +312,7 @@
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
+	struct net_device *dev = skb->dev;
 
 	/* if ingress device is enslaved to an L3 master device pass the
 	 * skb to its handler for processing
@@ -341,7 +342,7 @@
 	 */
 	if (!skb_valid_dst(skb)) {
 		int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
-					       iph->tos, skb->dev);
+					       iph->tos, dev);
 		if (unlikely(err)) {
 			if (err == -EXDEV)
 				__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
@@ -370,7 +371,7 @@
 		__IP_UPD_PO_STATS(net, IPSTATS_MIB_INBCAST, skb->len);
 	} else if (skb->pkt_type == PACKET_BROADCAST ||
 		   skb->pkt_type == PACKET_MULTICAST) {
-		struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+		struct in_device *in_dev = __in_dev_get_rcu(dev);
 
 		/* RFC 1122 3.3.6:
 		 *
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index dde37fb..05d1058 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -73,6 +73,7 @@
 #include <net/icmp.h>
 #include <net/checksum.h>
 #include <net/inetpeer.h>
+#include <net/lwtunnel.h>
 #include <linux/igmp.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_bridge.h>
@@ -98,6 +99,14 @@
 
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
+
+	/* if egress device is enslaved to an L3 master device pass the
+	 * skb to its handler for processing
+	 */
+	skb = l3mdev_ip_out(sk, skb);
+	if (unlikely(!skb))
+		return 0;
+
 	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
 		       net, sk, skb, NULL, skb_dst(skb)->dev,
 		       dst_output);
@@ -197,6 +206,13 @@
 		skb = skb2;
 	}
 
+	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+		int res = lwtunnel_xmit(skb);
+
+		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+			return res;
+	}
+
 	rcu_read_lock_bh();
 	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
 	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
@@ -482,7 +498,7 @@
 	to->tc_index = from->tc_index;
 #endif
 	nf_copy(to, from);
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
+#if IS_ENABLED(CONFIG_IP_VS)
 	to->ipvs_property = from->ipvs_property;
 #endif
 	skb_copy_secmark(to, from);
@@ -1566,8 +1582,7 @@
 	}
 
 	oif = arg->bound_dev_if;
-	if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
-		oif = skb->skb_iif;
+	oif = oif ? : skb->skb_iif;
 
 	flowi4_init_output(&fl4, oif,
 			   IP4_REPLY_MARK(net, skb->mark),
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 71a52f4d..af49197 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -284,9 +284,12 @@
 			ipc->ttl = val;
 			break;
 		case IP_TOS:
-			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+			if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
+				val = *(int *)CMSG_DATA(cmsg);
+			else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
+				val = *(u8 *)CMSG_DATA(cmsg);
+			else
 				return -EINVAL;
-			val = *(int *)CMSG_DATA(cmsg);
 			if (val < 0 || val > 255)
 				return -EINVAL;
 			ipc->tos = val;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 95649eb..5719d6b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -55,6 +55,7 @@
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
 #include <net/udp.h>
+#include <net/dst_metadata.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
@@ -546,6 +547,81 @@
 	return 0;
 }
 
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+	u32 headroom = sizeof(struct iphdr);
+	struct ip_tunnel_info *tun_info;
+	const struct ip_tunnel_key *key;
+	const struct iphdr *inner_iph;
+	struct rtable *rt;
+	struct flowi4 fl4;
+	__be16 df = 0;
+	u8 tos, ttl;
+
+	tun_info = skb_tunnel_info(skb);
+	if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+		     ip_tunnel_info_af(tun_info) != AF_INET))
+		goto tx_error;
+	key = &tun_info->key;
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+	tos = key->tos;
+	if (tos == 1) {
+		if (skb->protocol == htons(ETH_P_IP))
+			tos = inner_iph->tos;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
+	}
+	init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
+			 RT_TOS(tos), tunnel->parms.link);
+	if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
+		goto tx_error;
+	rt = ip_route_output_key(tunnel->net, &fl4);
+	if (IS_ERR(rt)) {
+		dev->stats.tx_carrier_errors++;
+		goto tx_error;
+	}
+	if (rt->dst.dev == dev) {
+		ip_rt_put(rt);
+		dev->stats.collisions++;
+		goto tx_error;
+	}
+	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
+	ttl = key->ttl;
+	if (ttl == 0) {
+		if (skb->protocol == htons(ETH_P_IP))
+			ttl = inner_iph->ttl;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
+		else
+			ttl = ip4_dst_hoplimit(&rt->dst);
+	}
+	if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
+		df = htons(IP_DF);
+	else if (skb->protocol == htons(ETH_P_IP))
+		df = inner_iph->frag_off & htons(IP_DF);
+	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+	if (headroom > dev->needed_headroom)
+		dev->needed_headroom = headroom;
+
+	if (skb_cow_head(skb, dev->needed_headroom)) {
+		ip_rt_put(rt);
+		goto tx_dropped;
+	}
+	iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
+		      key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+	return;
+tx_error:
+	dev->stats.tx_errors++;
+	goto kfree;
+tx_dropped:
+	dev->stats.tx_dropped++;
+kfree:
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
+
 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, u8 protocol)
 {
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 9d847c3..777bc18 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -69,13 +69,15 @@
 
 	skb_scrub_packet(skb, xnet);
 
-	skb_clear_hash(skb);
+	skb_clear_hash_if_not_l4(skb);
 	skb_dst_set(skb, &rt->dst);
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 
-	if (skb_iif && proto == IPPROTO_UDP) {
-		/* Arrived from an ingress interface and got udp encapuslated.
-		 * The encapsulated network segment length may exceed dst mtu.
+	if (skb_iif && !(df & htons(IP_DF))) {
+		/* Arrived from an ingress interface, got encapsulated, with
+		 * fragmentation of encapulating frames allowed.
+		 * If skb is gso, the resulting encapsulated network segments
+		 * may exceed dst mtu.
 		 * Allow IP Fragmentation of segments.
 		 */
 		IPCB(skb)->flags |= IPSKB_FRAG_SEGS;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index a917903..5d7944f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -88,6 +88,7 @@
 	struct net_device *dev;
 	struct pcpu_sw_netstats *tstats;
 	struct xfrm_state *x;
+	struct xfrm_mode *inner_mode;
 	struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
 	u32 orig_mark = skb->mark;
 	int ret;
@@ -105,7 +106,19 @@
 	}
 
 	x = xfrm_input_state(skb);
-	family = x->inner_mode->afinfo->family;
+
+	inner_mode = x->inner_mode;
+
+	if (x->sel.family == AF_UNSPEC) {
+		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+		if (inner_mode == NULL) {
+			XFRM_INC_STATS(dev_net(skb->dev),
+				       LINUX_MIB_XFRMINSTATEMODEERROR);
+			return -EINVAL;
+		}
+	}
+
+	family = inner_mode->afinfo->family;
 
 	skb->mark = be32_to_cpu(tunnel->parms.i_key);
 	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
@@ -557,6 +570,33 @@
 	.get_link_net	= ip_tunnel_get_link_net,
 };
 
+static bool is_vti_tunnel(const struct net_device *dev)
+{
+	return dev->netdev_ops == &vti_netdev_ops;
+}
+
+static int vti_device_event(struct notifier_block *unused,
+			    unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	if (!is_vti_tunnel(dev))
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_DOWN:
+		if (!net_eq(tunnel->net, dev_net(dev)))
+			xfrm_garbage_collect(tunnel->net);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vti_notifier_block __read_mostly = {
+	.notifier_call = vti_device_event,
+};
+
 static int __init vti_init(void)
 {
 	const char *msg;
@@ -564,6 +604,8 @@
 
 	pr_info("IPv4 over IPsec tunneling driver\n");
 
+	register_netdevice_notifier(&vti_notifier_block);
+
 	msg = "tunnel device";
 	err = register_pernet_device(&vti_net_ops);
 	if (err < 0)
@@ -596,6 +638,7 @@
 xfrm_proto_esp_failed:
 	unregister_pernet_device(&vti_net_ops);
 pernet_dev_failed:
+	unregister_netdevice_notifier(&vti_notifier_block);
 	pr_err("vti init: failed to register %s\n", msg);
 	return err;
 }
@@ -607,6 +650,7 @@
 	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
 	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
 	unregister_pernet_device(&vti_net_ops);
+	unregister_netdevice_notifier(&vti_notifier_block);
 }
 
 module_init(vti_init);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 1d71c40..071a785 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -85,7 +85,6 @@
 /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
 #define CONF_OPEN_RETRIES 	2	/* (Re)open devices twice */
 #define CONF_SEND_RETRIES 	6	/* Send six requests per open */
-#define CONF_INTER_TIMEOUT	(HZ)	/* Inter-device timeout: 1 second */
 #define CONF_BASE_TIMEOUT	(HZ*2)	/* Initial timeout: 2 seconds */
 #define CONF_TIMEOUT_RANDOM	(HZ)	/* Maximum amount of randomization */
 #define CONF_TIMEOUT_MULT	*7/4	/* Rate of timeout growth */
@@ -188,7 +187,7 @@
 };
 
 static struct ic_device *ic_first_dev __initdata;	/* List of open device */
-static struct net_device *ic_dev __initdata;		/* Selected device */
+static struct ic_device *ic_dev __initdata;		/* Selected device */
 
 static bool __init ic_is_init_dev(struct net_device *dev)
 {
@@ -307,7 +306,7 @@
 	while ((d = next)) {
 		next = d->next;
 		dev = d->dev;
-		if (dev != ic_dev && !netdev_uses_dsa(dev)) {
+		if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) {
 			pr_debug("IP-Config: Downing %s\n", dev->name);
 			dev_change_flags(dev, d->flags);
 		}
@@ -372,7 +371,7 @@
 	int err;
 
 	memset(&ir, 0, sizeof(ir));
-	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name);
+	strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name);
 	set_sockaddr(sin, ic_myaddr, 0);
 	if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) {
 		pr_err("IP-Config: Unable to set interface address (%d)\n",
@@ -396,7 +395,7 @@
 	 * out, we'll try to muddle along.
 	 */
 	if (ic_dev_mtu != 0) {
-		strcpy(ir.ifr_name, ic_dev->name);
+		strcpy(ir.ifr_name, ic_dev->dev->name);
 		ir.ifr_mtu = ic_dev_mtu;
 		if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0)
 			pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n",
@@ -568,7 +567,7 @@
 		goto drop_unlock;
 
 	/* We have a winner! */
-	ic_dev = dev;
+	ic_dev = d;
 	if (ic_myaddr == NONE)
 		ic_myaddr = tip;
 	ic_servaddr = sip;
@@ -655,8 +654,6 @@
 	.func =	ic_bootp_recv,
 };
 
-static __be32 ic_dev_xid;		/* Device under configuration */
-
 /*
  *  Initialize DHCP/BOOTP extension fields in the request.
  */
@@ -666,14 +663,14 @@
 #ifdef IPCONFIG_DHCP
 
 static void __init
-ic_dhcp_init_options(u8 *options)
+ic_dhcp_init_options(u8 *options, struct ic_device *d)
 {
 	u8 mt = ((ic_servaddr == NONE)
 		 ? DHCPDISCOVER : DHCPREQUEST);
 	u8 *e = options;
 	int len;
 
-	pr_debug("DHCP: Sending message type %d\n", mt);
+	pr_debug("DHCP: Sending message type %d (%s)\n", mt, d->dev->name);
 
 	memcpy(e, ic_bootp_cookie, 4);	/* RFC1048 Magic Cookie */
 	e += 4;
@@ -857,7 +854,7 @@
 	/* add DHCP options or BOOTP extensions */
 #ifdef IPCONFIG_DHCP
 	if (ic_proto_enabled & IC_USE_DHCP)
-		ic_dhcp_init_options(b->exten);
+		ic_dhcp_init_options(b->exten, d);
 	else
 #endif
 		ic_bootp_init_ext(b->exten);
@@ -1033,14 +1030,8 @@
 	/* Is it a reply to our BOOTP request? */
 	if (b->op != BOOTP_REPLY ||
 	    b->xid != d->xid) {
-		net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n",
-				    b->op, b->xid);
-		goto drop_unlock;
-	}
-
-	/* Is it a reply for the device we are configuring? */
-	if (b->xid != ic_dev_xid) {
-		net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n");
+		net_err_ratelimited("DHCP/BOOTP: Reply not for us on %s, op[%x] xid[%x]\n",
+				    d->dev->name, b->op, b->xid);
 		goto drop_unlock;
 	}
 
@@ -1075,7 +1066,7 @@
 				}
 			}
 
-			pr_debug("DHCP: Got message type %d\n", mt);
+			pr_debug("DHCP: Got message type %d (%s)\n", mt, d->dev->name);
 
 			switch (mt) {
 			case DHCPOFFER:
@@ -1130,7 +1121,7 @@
 	}
 
 	/* We have a winner! */
-	ic_dev = dev;
+	ic_dev = d;
 	ic_myaddr = b->your_ip;
 	ic_servaddr = b->server_ip;
 	ic_addrservaddr = b->iph.saddr;
@@ -1225,9 +1216,6 @@
 	timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM);
 	for (;;) {
 #ifdef IPCONFIG_BOOTP
-		/* Track the device we are configuring */
-		ic_dev_xid = d->xid;
-
 		if (do_bootp && (d->able & IC_BOOTP))
 			ic_bootp_send_if(d, jiffies - start_jiffies);
 #endif
@@ -1236,15 +1224,19 @@
 			ic_rarp_send_if(d);
 #endif
 
-		jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout);
-		while (time_before(jiffies, jiff) && !ic_got_reply)
-			schedule_timeout_uninterruptible(1);
+		if (!d->next) {
+			jiff = jiffies + timeout;
+			while (time_before(jiffies, jiff) && !ic_got_reply)
+				schedule_timeout_uninterruptible(1);
+		}
 #ifdef IPCONFIG_DHCP
 		/* DHCP isn't done until we get a DHCPACK. */
 		if ((ic_got_reply & IC_BOOTP) &&
 		    (ic_proto_enabled & IC_USE_DHCP) &&
 		    ic_dhcp_msgtype != DHCPACK) {
 			ic_got_reply = 0;
+			/* continue on device that got the reply */
+			d = ic_dev;
 			pr_cont(",");
 			continue;
 		}
@@ -1487,7 +1479,7 @@
 #endif /* IPCONFIG_DYNAMIC */
 	} else {
 		/* Device selected manually or only one device -> use it */
-		ic_dev = ic_first_dev->dev;
+		ic_dev = ic_first_dev;
 	}
 
 	addr = root_nfs_parse_addr(root_server_path);
@@ -1501,14 +1493,6 @@
 		return -1;
 
 	/*
-	 * Close all network devices except the device we've
-	 * autoconfigured and set up routes.
-	 */
-	ic_close_devs();
-	if (ic_setup_if() < 0 || ic_setup_routes() < 0)
-		return -1;
-
-	/*
 	 * Record which protocol was actually used.
 	 */
 #ifdef IPCONFIG_DYNAMIC
@@ -1522,7 +1506,7 @@
 	pr_info("IP-Config: Complete:\n");
 
 	pr_info("     device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n",
-		ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr,
+		ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr,
 		&ic_myaddr, &ic_netmask, &ic_gateway);
 	pr_info("     host=%s, domain=%s, nis-domain=%s\n",
 		utsname()->nodename, ic_domain, utsname()->domainname);
@@ -1542,7 +1526,18 @@
 	pr_cont("\n");
 #endif /* !SILENT */
 
-	return 0;
+	/*
+	 * Close all network devices except the device we've
+	 * autoconfigured and set up routes.
+	 */
+	if (ic_setup_if() < 0 || ic_setup_routes() < 0)
+		err = -1;
+	else
+		err = 0;
+
+	ic_close_devs();
+
+	return err;
 }
 
 late_initcall(ip_auto_config);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 4ae3f8e..c939258 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -115,6 +115,7 @@
 #include <net/xfrm.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/dst_metadata.h>
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
@@ -193,6 +194,7 @@
 {
 	struct net *net = dev_net(skb->dev);
 	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+	struct metadata_dst *tun_dst = NULL;
 	struct ip_tunnel *tunnel;
 	const struct iphdr *iph;
 
@@ -216,7 +218,12 @@
 			tpi = &ipip_tpi;
 		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+		if (tunnel->collect_md) {
+			tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
+			if (!tun_dst)
+				return 0;
+		}
+		return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 	}
 
 	return -1;
@@ -270,7 +277,10 @@
 
 	skb_set_inner_ipproto(skb, ipproto);
 
-	ip_tunnel_xmit(skb, dev, tiph, ipproto);
+	if (tunnel->collect_md)
+		ip_md_tunnel_xmit(skb, dev, ipproto);
+	else
+		ip_tunnel_xmit(skb, dev, tiph, ipproto);
 	return NETDEV_TX_OK;
 
 tx_error:
@@ -380,13 +390,14 @@
 }
 
 static void ipip_netlink_parms(struct nlattr *data[],
-			       struct ip_tunnel_parm *parms)
+			       struct ip_tunnel_parm *parms, bool *collect_md)
 {
 	memset(parms, 0, sizeof(*parms));
 
 	parms->iph.version = 4;
 	parms->iph.protocol = IPPROTO_IPIP;
 	parms->iph.ihl = 5;
+	*collect_md = false;
 
 	if (!data)
 		return;
@@ -414,6 +425,9 @@
 
 	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 		parms->iph.frag_off = htons(IP_DF);
+
+	if (data[IFLA_IPTUN_COLLECT_METADATA])
+		*collect_md = true;
 }
 
 /* This function returns true when ENCAP attributes are present in the nl msg */
@@ -453,18 +467,18 @@
 static int ipip_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
+	struct ip_tunnel *t = netdev_priv(dev);
 	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
 
 	if (ipip_netlink_encap_parms(data, &ipencap)) {
-		struct ip_tunnel *t = netdev_priv(dev);
 		int err = ip_tunnel_encap_setup(t, &ipencap);
 
 		if (err < 0)
 			return err;
 	}
 
-	ipip_netlink_parms(data, &p);
+	ipip_netlink_parms(data, &p, &t->collect_md);
 	return ip_tunnel_newlink(dev, tb, &p);
 }
 
@@ -473,6 +487,7 @@
 {
 	struct ip_tunnel_parm p;
 	struct ip_tunnel_encap ipencap;
+	bool collect_md;
 
 	if (ipip_netlink_encap_parms(data, &ipencap)) {
 		struct ip_tunnel *t = netdev_priv(dev);
@@ -482,7 +497,9 @@
 			return err;
 	}
 
-	ipip_netlink_parms(data, &p);
+	ipip_netlink_parms(data, &p, &collect_md);
+	if (collect_md)
+		return -EINVAL;
 
 	if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
 	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
@@ -516,6 +533,8 @@
 		nla_total_size(2) +
 		/* IFLA_IPTUN_ENCAP_DPORT */
 		nla_total_size(2) +
+		/* IFLA_IPTUN_COLLECT_METADATA */
+		nla_total_size(0) +
 		0;
 }
 
@@ -544,6 +563,9 @@
 			tunnel->encap.flags))
 		goto nla_put_failure;
 
+	if (tunnel->collect_md)
+		if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
+			goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -562,6 +584,7 @@
 	[IFLA_IPTUN_ENCAP_FLAGS]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_SPORT]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_DPORT]	= { .type = NLA_U16 },
+	[IFLA_IPTUN_COLLECT_METADATA]	= { .type = NLA_FLAG },
 };
 
 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2625332..5f006e1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2076,6 +2076,7 @@
 	struct rta_mfc_stats mfcs;
 	struct nlattr *mp_attr;
 	struct rtnexthop *nhp;
+	unsigned long lastuse;
 	int ct;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
@@ -2105,12 +2106,14 @@
 
 	nla_nest_end(skb, mp_attr);
 
+	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
 	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-	    nla_put_u64_64bit(skb, RTA_EXPIRES,
-			      jiffies_to_clock_t(c->mfc_un.res.lastuse),
+	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
 			      RTA_PAD))
 		return -EMSGSIZE;
 
@@ -2120,7 +2123,7 @@
 
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
 		   __be32 saddr, __be32 daddr,
-		   struct rtmsg *rtm, int nowait)
+		   struct rtmsg *rtm, int nowait, u32 portid)
 {
 	struct mfc_cache *cache;
 	struct mr_table *mrt;
@@ -2165,6 +2168,7 @@
 			return -ENOMEM;
 		}
 
+		NETLINK_CB(skb2).portid = portid;
 		skb_push(skb2, sizeof(struct iphdr));
 		skb_reset_network_header(skb2);
 		iph = ip_hdr(skb2);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index c187c60..d613309 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -25,17 +25,6 @@
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-config NF_CONNTRACK_PROC_COMPAT
-	bool "proc/sysctl compatibility with old connection tracking"
-	depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4
-	default y
-	help
-	  This option enables /proc and sysctl compatibility with the old
-	  layer 3 dependent connection tracking. This is needed to keep
-	  old programs that have not been adapted to the new names working.
-
-	  If unsure, say Y.
-
 if NF_TABLES
 
 config NF_TABLES_IPV4
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 87b073d..853328f 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -4,11 +4,6 @@
 
 # objects for l3 independent conntrack
 nf_conntrack_ipv4-y	:=  nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
-ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
-ifeq ($(CONFIG_PROC_FS),y)
-nf_conntrack_ipv4-objs	+= nf_conntrack_l3proto_ipv4_compat.o
-endif
-endif
 
 # connection tracking
 obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f993545..7c00ce9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -156,7 +156,7 @@
 	.u = {
 		.log = {
 			.level = 4,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index ae1a71a..713c09a 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -110,7 +110,7 @@
 	if (!help)
 		return NF_ACCEPT;
 
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	helper = rcu_dereference(help->helper);
 	if (!helper)
 		return NF_ACCEPT;
@@ -202,47 +202,6 @@
 	},
 };
 
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
-
-static struct ctl_table ip_ct_sysctl_table[] = {
-	{
-		.procname	= "ip_conntrack_max",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "ip_conntrack_count",
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "ip_conntrack_buckets",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "ip_conntrack_checksum",
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "ip_conntrack_log_invalid",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &log_invalid_proto_min,
-		.extra2		= &log_invalid_proto_max,
-	},
-	{ }
-};
-#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
-
 /* Fast function for those who don't want to parse /proc (and I don't
    blame them). */
 /* Reversing the socket's dst/src point of view gives us the reply
@@ -350,20 +309,6 @@
 
 static int ipv4_init_net(struct net *net)
 {
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-	struct nf_ip_net *in = &net->ct.nf_ct_proto;
-	in->ctl_table = kmemdup(ip_ct_sysctl_table,
-				sizeof(ip_ct_sysctl_table),
-				GFP_KERNEL);
-	if (!in->ctl_table)
-		return -ENOMEM;
-
-	in->ctl_table[0].data = &nf_conntrack_max;
-	in->ctl_table[1].data = &net->ct.count;
-	in->ctl_table[2].data = &nf_conntrack_htable_size;
-	in->ctl_table[3].data = &net->ct.sysctl_checksum;
-	in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
-#endif
 	return 0;
 }
 
@@ -380,9 +325,6 @@
 	.nlattr_to_tuple = ipv4_nlattr_to_tuple,
 	.nla_policy	 = ipv4_nla_policy,
 #endif
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-	.ctl_table_path  = "net/ipv4/netfilter",
-#endif
 	.init_net	 = ipv4_init_net,
 	.me		 = THIS_MODULE,
 };
@@ -492,16 +434,7 @@
 		goto cleanup_icmpv4;
 	}
 
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-	ret = nf_conntrack_ipv4_compat_init();
-	if (ret < 0)
-		goto cleanup_proto;
-#endif
 	return ret;
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- cleanup_proto:
-	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
-#endif
  cleanup_icmpv4:
 	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
  cleanup_udp4:
@@ -520,9 +453,6 @@
 static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 {
 	synchronize_net();
-#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-	nf_conntrack_ipv4_compat_fini();
-#endif
 	nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
 	nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
deleted file mode 100644
index 6392371..0000000
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ /dev/null
@@ -1,492 +0,0 @@
-/* ip_conntrack proc compat - based on ip_conntrack_standalone.c
- *
- * (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/types.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <net/netfilter/nf_conntrack_acct.h>
-#include <linux/rculist_nulls.h>
-#include <linux/export.h>
-
-struct ct_iter_state {
-	struct seq_net_private p;
-	struct hlist_nulls_head *hash;
-	unsigned int htable_size;
-	unsigned int bucket;
-};
-
-static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
-{
-	struct ct_iter_state *st = seq->private;
-	struct hlist_nulls_node *n;
-
-	for (st->bucket = 0;
-	     st->bucket < st->htable_size;
-	     st->bucket++) {
-		n = rcu_dereference(
-			hlist_nulls_first_rcu(&st->hash[st->bucket]));
-		if (!is_a_nulls(n))
-			return n;
-	}
-	return NULL;
-}
-
-static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
-				      struct hlist_nulls_node *head)
-{
-	struct ct_iter_state *st = seq->private;
-
-	head = rcu_dereference(hlist_nulls_next_rcu(head));
-	while (is_a_nulls(head)) {
-		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= st->htable_size)
-				return NULL;
-		}
-		head = rcu_dereference(
-			hlist_nulls_first_rcu(&st->hash[st->bucket]));
-	}
-	return head;
-}
-
-static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
-	struct hlist_nulls_node *head = ct_get_first(seq);
-
-	if (head)
-		while (pos && (head = ct_get_next(seq, head)))
-			pos--;
-	return pos ? NULL : head;
-}
-
-static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	struct ct_iter_state *st = seq->private;
-
-	rcu_read_lock();
-
-	nf_conntrack_get_ht(&st->hash, &st->htable_size);
-	return ct_get_idx(seq, *pos);
-}
-
-static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return ct_get_next(s, v);
-}
-
-static void ct_seq_stop(struct seq_file *s, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-#ifdef CONFIG_NF_CONNTRACK_SECMARK
-static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
-{
-	int ret;
-	u32 len;
-	char *secctx;
-
-	ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
-	if (ret)
-		return;
-
-	seq_printf(s, "secctx=%s ", secctx);
-
-	security_release_secctx(secctx, len);
-}
-#else
-static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
-{
-}
-#endif
-
-static bool ct_seq_should_skip(const struct nf_conn *ct,
-			       const struct net *net,
-			       const struct nf_conntrack_tuple_hash *hash)
-{
-	/* we only want to print DIR_ORIGINAL */
-	if (NF_CT_DIRECTION(hash))
-		return true;
-
-	if (nf_ct_l3num(ct) != AF_INET)
-		return true;
-
-	if (!net_eq(nf_ct_net(ct), net))
-		return true;
-
-	return false;
-}
-
-static int ct_seq_show(struct seq_file *s, void *v)
-{
-	struct nf_conntrack_tuple_hash *hash = v;
-	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
-	const struct nf_conntrack_l3proto *l3proto;
-	const struct nf_conntrack_l4proto *l4proto;
-	int ret = 0;
-
-	NF_CT_ASSERT(ct);
-	if (ct_seq_should_skip(ct, seq_file_net(s), hash))
-		return 0;
-
-	if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
-		return 0;
-
-	/* check if we raced w. object reuse */
-	if (!nf_ct_is_confirmed(ct) ||
-	    ct_seq_should_skip(ct, seq_file_net(s), hash))
-		goto release;
-
-	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
-	NF_CT_ASSERT(l3proto);
-	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-	NF_CT_ASSERT(l4proto);
-
-	ret = -ENOSPC;
-	seq_printf(s, "%-8s %u %ld ",
-		   l4proto->name, nf_ct_protonum(ct),
-		   timer_pending(&ct->timeout)
-		   ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
-
-	if (l4proto->print_conntrack)
-		l4proto->print_conntrack(s, ct);
-
-	if (seq_has_overflowed(s))
-		goto release;
-
-	print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-		    l3proto, l4proto);
-
-	if (seq_has_overflowed(s))
-		goto release;
-
-	if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
-		goto release;
-
-	if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
-		seq_printf(s, "[UNREPLIED] ");
-
-	print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-		    l3proto, l4proto);
-
-	if (seq_has_overflowed(s))
-		goto release;
-
-	if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
-		goto release;
-
-	if (test_bit(IPS_ASSURED_BIT, &ct->status))
-		seq_printf(s, "[ASSURED] ");
-
-#ifdef CONFIG_NF_CONNTRACK_MARK
-	seq_printf(s, "mark=%u ", ct->mark);
-#endif
-
-	ct_show_secctx(s, ct);
-
-	seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
-
-	if (seq_has_overflowed(s))
-		goto release;
-
-	ret = 0;
-release:
-	nf_ct_put(ct);
-	return ret;
-}
-
-static const struct seq_operations ct_seq_ops = {
-	.start = ct_seq_start,
-	.next  = ct_seq_next,
-	.stop  = ct_seq_stop,
-	.show  = ct_seq_show
-};
-
-static int ct_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ct_seq_ops,
-			    sizeof(struct ct_iter_state));
-}
-
-static const struct file_operations ct_file_ops = {
-	.owner   = THIS_MODULE,
-	.open    = ct_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-/* expects */
-struct ct_expect_iter_state {
-	struct seq_net_private p;
-	unsigned int bucket;
-};
-
-static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
-{
-	struct ct_expect_iter_state *st = seq->private;
-	struct hlist_node *n;
-
-	for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
-		n = rcu_dereference(
-			hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
-		if (n)
-			return n;
-	}
-	return NULL;
-}
-
-static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
-					     struct hlist_node *head)
-{
-	struct ct_expect_iter_state *st = seq->private;
-
-	head = rcu_dereference(hlist_next_rcu(head));
-	while (head == NULL) {
-		if (++st->bucket >= nf_ct_expect_hsize)
-			return NULL;
-		head = rcu_dereference(
-			hlist_first_rcu(&nf_ct_expect_hash[st->bucket]));
-	}
-	return head;
-}
-
-static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
-{
-	struct hlist_node *head = ct_expect_get_first(seq);
-
-	if (head)
-		while (pos && (head = ct_expect_get_next(seq, head)))
-			pos--;
-	return pos ? NULL : head;
-}
-
-static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(RCU)
-{
-	rcu_read_lock();
-	return ct_expect_get_idx(seq, *pos);
-}
-
-static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return ct_expect_get_next(seq, v);
-}
-
-static void exp_seq_stop(struct seq_file *seq, void *v)
-	__releases(RCU)
-{
-	rcu_read_unlock();
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
-	struct nf_conntrack_expect *exp;
-	const struct hlist_node *n = v;
-
-	exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
-
-	if (!net_eq(nf_ct_net(exp->master), seq_file_net(s)))
-		return 0;
-
-	if (exp->tuple.src.l3num != AF_INET)
-		return 0;
-
-	if (exp->timeout.function)
-		seq_printf(s, "%ld ", timer_pending(&exp->timeout)
-			   ? (long)(exp->timeout.expires - jiffies)/HZ : 0);
-	else
-		seq_printf(s, "- ");
-
-	seq_printf(s, "proto=%u ", exp->tuple.dst.protonum);
-
-	print_tuple(s, &exp->tuple,
-		    __nf_ct_l3proto_find(exp->tuple.src.l3num),
-		    __nf_ct_l4proto_find(exp->tuple.src.l3num,
-					 exp->tuple.dst.protonum));
-	seq_putc(s, '\n');
-
-	return 0;
-}
-
-static const struct seq_operations exp_seq_ops = {
-	.start = exp_seq_start,
-	.next = exp_seq_next,
-	.stop = exp_seq_stop,
-	.show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &exp_seq_ops,
-			    sizeof(struct ct_expect_iter_state));
-}
-
-static const struct file_operations ip_exp_file_ops = {
-	.owner   = THIS_MODULE,
-	.open    = exp_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
-	struct net *net = seq_file_net(seq);
-	int cpu;
-
-	if (*pos == 0)
-		return SEQ_START_TOKEN;
-
-	for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
-		if (!cpu_possible(cpu))
-			continue;
-		*pos = cpu+1;
-		return per_cpu_ptr(net->ct.stat, cpu);
-	}
-
-	return NULL;
-}
-
-static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct net *net = seq_file_net(seq);
-	int cpu;
-
-	for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
-		if (!cpu_possible(cpu))
-			continue;
-		*pos = cpu+1;
-		return per_cpu_ptr(net->ct.stat, cpu);
-	}
-
-	return NULL;
-}
-
-static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int ct_cpu_seq_show(struct seq_file *seq, void *v)
-{
-	struct net *net = seq_file_net(seq);
-	unsigned int nr_conntracks = atomic_read(&net->ct.count);
-	const struct ip_conntrack_stat *st = v;
-
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
-		return 0;
-	}
-
-	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
-		   nr_conntracks,
-		   st->searched,
-		   st->found,
-		   st->new,
-		   st->invalid,
-		   st->ignore,
-		   st->delete,
-		   st->delete_list,
-		   st->insert,
-		   st->insert_failed,
-		   st->drop,
-		   st->early_drop,
-		   st->error,
-
-		   st->expect_new,
-		   st->expect_create,
-		   st->expect_delete,
-		   st->search_restart
-		);
-	return 0;
-}
-
-static const struct seq_operations ct_cpu_seq_ops = {
-	.start  = ct_cpu_seq_start,
-	.next   = ct_cpu_seq_next,
-	.stop   = ct_cpu_seq_stop,
-	.show   = ct_cpu_seq_show,
-};
-
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &ct_cpu_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations ct_cpu_seq_fops = {
-	.owner   = THIS_MODULE,
-	.open    = ct_cpu_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-static int __net_init ip_conntrack_net_init(struct net *net)
-{
-	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-
-	proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops);
-	if (!proc)
-		goto err1;
-
-	proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net,
-			       &ip_exp_file_ops);
-	if (!proc_exp)
-		goto err2;
-
-	proc_stat = proc_create("ip_conntrack", S_IRUGO,
-				net->proc_net_stat, &ct_cpu_seq_fops);
-	if (!proc_stat)
-		goto err3;
-	return 0;
-
-err3:
-	remove_proc_entry("ip_conntrack_expect", net->proc_net);
-err2:
-	remove_proc_entry("ip_conntrack", net->proc_net);
-err1:
-	return -ENOMEM;
-}
-
-static void __net_exit ip_conntrack_net_exit(struct net *net)
-{
-	remove_proc_entry("ip_conntrack", net->proc_net_stat);
-	remove_proc_entry("ip_conntrack_expect", net->proc_net);
-	remove_proc_entry("ip_conntrack", net->proc_net);
-}
-
-static struct pernet_operations ip_conntrack_net_ops = {
-	.init = ip_conntrack_net_init,
-	.exit = ip_conntrack_net_exit,
-};
-
-int __init nf_conntrack_ipv4_compat_init(void)
-{
-	return register_pernet_subsys(&ip_conntrack_net_ops);
-}
-
-void __exit nf_conntrack_ipv4_compat_fini(void)
-{
-	unregister_pernet_subsys(&ip_conntrack_net_ops);
-}
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index c567e1b..d075b3c 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -149,7 +149,7 @@
 		return -NF_ACCEPT;
 	}
 
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
 
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
@@ -327,17 +327,6 @@
 	},
 	{ }
 };
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table icmp_compat_sysctl_table[] = {
-	{
-		.procname	= "ip_conntrack_icmp_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
 static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -355,40 +344,14 @@
 	return 0;
 }
 
-static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-					    struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
-				       sizeof(icmp_compat_sysctl_table),
-				       GFP_KERNEL);
-	if (!pn->ctl_compat_table)
-		return -ENOMEM;
-
-	pn->ctl_compat_table[0].data = &in->timeout;
-#endif
-#endif
-	return 0;
-}
-
 static int icmp_init_net(struct net *net, u_int16_t proto)
 {
-	int ret;
 	struct nf_icmp_net *in = icmp_pernet(net);
 	struct nf_proto_net *pn = &in->pn;
 
 	in->timeout = nf_ct_icmp_timeout;
 
-	ret = icmp_kmemdup_compat_sysctl_table(pn, in);
-	if (ret < 0)
-		return ret;
-
-	ret = icmp_kmemdup_sysctl_table(pn, in);
-	if (ret < 0)
-		nf_ct_kfree_compat_sysctl_table(pn);
-
-	return ret;
+	return icmp_kmemdup_sysctl_table(pn, in);
 }
 
 static struct nf_proto_net *icmp_get_net_proto(struct net *net)
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index ceb1873..cf986e1 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -74,21 +74,19 @@
 	nf_conntrack_get(skb->nfct);
 #endif
 	/*
-	 * If we are in PREROUTING/INPUT, the checksum must be recalculated
-	 * since the length could have changed as a result of defragmentation.
-	 *
-	 * We also decrease the TTL to mitigate potential loops between two
-	 * hosts.
+	 * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential
+	 * loops between two hosts.
 	 *
 	 * Set %IP_DF so that the original source is notified of a potentially
 	 * decreased MTU on the clone route. IPv6 does this too.
+	 *
+	 * IP header checksum will be recalculated at ip_local_out.
 	 */
 	iph = ip_hdr(skb);
 	iph->frag_off |= htons(IP_DF);
 	if (hooknum == NF_INET_PRE_ROUTING ||
 	    hooknum == NF_INET_LOCAL_IN)
 		--iph->ttl;
-	ip_send_check(iph);
 
 	if (nf_dup_ipv4_route(net, skb, gw, oif)) {
 		__this_cpu_write(nf_skb_duplicated, true);
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index e7ad950..b24795e 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -30,7 +30,7 @@
 	.u = {
 		.log = {
 			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
@@ -62,7 +62,7 @@
 	/* If it's for Ethernet and the lengths are OK, then log the ARP
 	 * payload.
 	 */
-	if (ah->ar_hrd != htons(1) ||
+	if (ah->ar_hrd != htons(ARPHRD_ETHER) ||
 	    ah->ar_hln != ETH_ALEN ||
 	    ah->ar_pln != sizeof(__be32))
 		return;
@@ -111,8 +111,7 @@
 
 static int __net_init nf_log_arp_net_init(struct net *net)
 {
-	nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
-	return 0;
+	return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger);
 }
 
 static void __net_exit nf_log_arp_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 076aadd..8566489 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -29,7 +29,7 @@
 	.u = {
 		.log = {
 			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
@@ -46,7 +46,7 @@
 	if (info->type == NF_LOG_TYPE_LOG)
 		logflags = info->u.log.logflags;
 	else
-		logflags = NF_LOG_MASK;
+		logflags = NF_LOG_DEFAULT_MASK;
 
 	ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
 	if (ih == NULL) {
@@ -76,7 +76,7 @@
 	if (ntohs(ih->frag_off) & IP_OFFSET)
 		nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
 
-	if ((logflags & XT_LOG_IPOPT) &&
+	if ((logflags & NF_LOG_IPOPT) &&
 	    ih->ihl * 4 > sizeof(struct iphdr)) {
 		const unsigned char *op;
 		unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
@@ -250,7 +250,7 @@
 	}
 
 	/* Max length: 15 "UID=4294967295 " */
-	if ((logflags & XT_LOG_UID) && !iphoff)
+	if ((logflags & NF_LOG_UID) && !iphoff)
 		nf_log_dump_sk_uid_gid(m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
@@ -282,7 +282,7 @@
 	if (info->type == NF_LOG_TYPE_LOG)
 		logflags = info->u.log.logflags;
 
-	if (!(logflags & XT_LOG_MACDECODE))
+	if (!(logflags & NF_LOG_MACDECODE))
 		goto fallback;
 
 	switch (dev->type) {
@@ -347,8 +347,7 @@
 
 static int __net_init nf_log_ipv4_net_init(struct net *net)
 {
-	nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
-	return 0;
+	return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger);
 }
 
 static void __net_exit nf_log_ipv4_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 9414923..edf0500 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -88,8 +88,8 @@
 	      const struct nf_conntrack_tuple *tuple,
 	      enum nf_nat_manip_type maniptype)
 {
-	const struct gre_hdr *greh;
-	struct gre_hdr_pptp *pgreh;
+	const struct gre_base_hdr *greh;
+	struct pptp_gre_header *pgreh;
 
 	/* pgreh includes two optional 32bit fields which are not required
 	 * to be there.  That's where the magic '8' comes from */
@@ -97,18 +97,19 @@
 		return false;
 
 	greh = (void *)skb->data + hdroff;
-	pgreh = (struct gre_hdr_pptp *)greh;
+	pgreh = (struct pptp_gre_header *)greh;
 
 	/* we only have destination manip of a packet, since 'source key'
 	 * is not present in the packet itself */
 	if (maniptype != NF_NAT_MANIP_DST)
 		return true;
-	switch (greh->version) {
-	case GRE_VERSION_1701:
+
+	switch (greh->flags & GRE_VERSION) {
+	case GRE_VERSION_0:
 		/* We do not currently NAT any GREv0 packets.
 		 * Try to behave like "nf_nat_proto_unknown" */
 		break;
-	case GRE_VERSION_PPTP:
+	case GRE_VERSION_1:
 		pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
 		pgreh->call_id = tuple->dst.u.gre.key;
 		break;
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index cd84d42..805c8dd 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -21,7 +21,7 @@
 {
 	struct nft_pktinfo pkt;
 
-	nft_set_pktinfo(&pkt, skb, state);
+	nft_set_pktinfo_unspec(&pkt, skb, state);
 
 	return nft_do_chain(&pkt, priv);
 }
@@ -80,7 +80,10 @@
 {
 	int ret;
 
-	nft_register_chain_type(&filter_arp);
+	ret = nft_register_chain_type(&filter_arp);
+	if (ret < 0)
+		return ret;
+
 	ret = register_pernet_subsys(&nf_tables_arp_net_ops);
 	if (ret < 0)
 		nft_unregister_chain_type(&filter_arp);
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index e44ba3b..2840a29 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -103,7 +103,10 @@
 {
 	int ret;
 
-	nft_register_chain_type(&filter_ipv4);
+	ret = nft_register_chain_type(&filter_ipv4);
+	if (ret < 0)
+		return ret;
+
 	ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
 	if (ret < 0)
 		nft_unregister_chain_type(&filter_ipv4);
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 2375b0a..30493be 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -31,6 +31,7 @@
 	__be32 saddr, daddr;
 	u_int8_t tos;
 	const struct iphdr *iph;
+	int err;
 
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct iphdr) ||
@@ -46,15 +47,17 @@
 	tos = iph->tos;
 
 	ret = nft_do_chain(&pkt, priv);
-	if (ret != NF_DROP && ret != NF_QUEUE) {
+	if (ret != NF_DROP && ret != NF_STOLEN) {
 		iph = ip_hdr(skb);
 
 		if (iph->saddr != saddr ||
 		    iph->daddr != daddr ||
 		    skb->mark != mark ||
-		    iph->tos != tos)
-			if (ip_route_me_harder(state->net, skb, RTN_UNSPEC))
-				ret = NF_DROP;
+		    iph->tos != tos) {
+			err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+			if (err < 0)
+				ret = NF_DROP_ERR(err);
+		}
 	}
 	return ret;
 }
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index c24f41c..2c2553b 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -46,6 +46,7 @@
 	.eval		= nft_reject_ipv4_eval,
 	.init		= nft_reject_init,
 	.dump		= nft_reject_dump,
+	.validate	= nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 9f665b6..7143ca1 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -46,6 +46,8 @@
 #include <net/sock.h>
 #include <net/raw.h>
 
+#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX)
+
 /*
  *	Report socket allocation statistics [mea@utu.fi]
  */
@@ -257,6 +259,7 @@
 	SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
 	SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND),
 	SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED),
+	SNMP_MIB_ITEM("TCPMD5Failure", LINUX_MIB_TCPMD5FAILURE),
 	SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED),
 	SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED),
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
@@ -355,22 +358,22 @@
 	atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
 
 	seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
-	for (i = 0; icmpmibmap[i].name != NULL; i++)
+	for (i = 0; icmpmibmap[i].name; i++)
 		seq_printf(seq, " In%s", icmpmibmap[i].name);
 	seq_puts(seq, " OutMsgs OutErrors");
-	for (i = 0; icmpmibmap[i].name != NULL; i++)
+	for (i = 0; icmpmibmap[i].name; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
 	seq_printf(seq, "\nIcmp: %lu %lu %lu",
 		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS),
 		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS),
 		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
-	for (i = 0; icmpmibmap[i].name != NULL; i++)
+	for (i = 0; icmpmibmap[i].name; i++)
 		seq_printf(seq, " %lu",
 			   atomic_long_read(ptr + icmpmibmap[i].index));
 	seq_printf(seq, " %lu %lu",
 		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
 		snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
-	for (i = 0; icmpmibmap[i].name != NULL; i++)
+	for (i = 0; icmpmibmap[i].name; i++)
 		seq_printf(seq, " %lu",
 			   atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
 }
@@ -378,14 +381,16 @@
 /*
  *	Called from the PROCfs module. This outputs /proc/net/snmp.
  */
-static int snmp_seq_show(struct seq_file *seq, void *v)
+static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
 {
-	int i;
 	struct net *net = seq->private;
+	u64 buff64[IPSTATS_MIB_MAX];
+	int i;
+
+	memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64));
 
 	seq_puts(seq, "Ip: Forwarding DefaultTTL");
-
-	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+	for (i = 0; snmp4_ipstats_list[i].name; i++)
 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
 
 	seq_printf(seq, "\nIp: %d %d",
@@ -393,54 +398,74 @@
 		   net->ipv4.sysctl_ip_default_ttl);
 
 	BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
-	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
-		seq_printf(seq, " %llu",
-			   snmp_fold_field64(net->mib.ip_statistics,
-					     snmp4_ipstats_list[i].entry,
-					     offsetof(struct ipstats_mib, syncp)));
+	snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
+				   net->mib.ip_statistics,
+				   offsetof(struct ipstats_mib, syncp));
+	for (i = 0; snmp4_ipstats_list[i].name; i++)
+		seq_printf(seq, " %llu", buff64[i]);
+
+	return 0;
+}
+
+static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
+{
+	unsigned long buff[TCPUDP_MIB_MAX];
+	struct net *net = seq->private;
+	int i;
+
+	memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+
+	seq_puts(seq, "\nTcp:");
+	for (i = 0; snmp4_tcp_list[i].name; i++)
+		seq_printf(seq, " %s", snmp4_tcp_list[i].name);
+
+	seq_puts(seq, "\nTcp:");
+	snmp_get_cpu_field_batch(buff, snmp4_tcp_list,
+				 net->mib.tcp_statistics);
+	for (i = 0; snmp4_tcp_list[i].name; i++) {
+		/* MaxConn field is signed, RFC 2012 */
+		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
+			seq_printf(seq, " %ld", buff[i]);
+		else
+			seq_printf(seq, " %lu", buff[i]);
+	}
+
+	memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+
+	snmp_get_cpu_field_batch(buff, snmp4_udp_list,
+				 net->mib.udp_statistics);
+	seq_puts(seq, "\nUdp:");
+	for (i = 0; snmp4_udp_list[i].name; i++)
+		seq_printf(seq, " %s", snmp4_udp_list[i].name);
+	seq_puts(seq, "\nUdp:");
+	for (i = 0; snmp4_udp_list[i].name; i++)
+		seq_printf(seq, " %lu", buff[i]);
+
+	memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+
+	/* the UDP and UDP-Lite MIBs are the same */
+	seq_puts(seq, "\nUdpLite:");
+	snmp_get_cpu_field_batch(buff, snmp4_udp_list,
+				 net->mib.udplite_statistics);
+	for (i = 0; snmp4_udp_list[i].name; i++)
+		seq_printf(seq, " %s", snmp4_udp_list[i].name);
+	seq_puts(seq, "\nUdpLite:");
+	for (i = 0; snmp4_udp_list[i].name; i++)
+		seq_printf(seq, " %lu", buff[i]);
+
+	seq_putc(seq, '\n');
+	return 0;
+}
+
+static int snmp_seq_show(struct seq_file *seq, void *v)
+{
+	snmp_seq_show_ipstats(seq, v);
 
 	icmp_put(seq);	/* RFC 2011 compatibility */
 	icmpmsg_put(seq);
 
-	seq_puts(seq, "\nTcp:");
-	for (i = 0; snmp4_tcp_list[i].name != NULL; i++)
-		seq_printf(seq, " %s", snmp4_tcp_list[i].name);
+	snmp_seq_show_tcp_udp(seq, v);
 
-	seq_puts(seq, "\nTcp:");
-	for (i = 0; snmp4_tcp_list[i].name != NULL; i++) {
-		/* MaxConn field is signed, RFC 2012 */
-		if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
-			seq_printf(seq, " %ld",
-				   snmp_fold_field(net->mib.tcp_statistics,
-						   snmp4_tcp_list[i].entry));
-		else
-			seq_printf(seq, " %lu",
-				   snmp_fold_field(net->mib.tcp_statistics,
-						   snmp4_tcp_list[i].entry));
-	}
-
-	seq_puts(seq, "\nUdp:");
-	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
-		seq_printf(seq, " %s", snmp4_udp_list[i].name);
-
-	seq_puts(seq, "\nUdp:");
-	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field(net->mib.udp_statistics,
-					   snmp4_udp_list[i].entry));
-
-	/* the UDP and UDP-Lite MIBs are the same */
-	seq_puts(seq, "\nUdpLite:");
-	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
-		seq_printf(seq, " %s", snmp4_udp_list[i].name);
-
-	seq_puts(seq, "\nUdpLite:");
-	for (i = 0; snmp4_udp_list[i].name != NULL; i++)
-		seq_printf(seq, " %lu",
-			   snmp_fold_field(net->mib.udplite_statistics,
-					   snmp4_udp_list[i].entry));
-
-	seq_putc(seq, '\n');
 	return 0;
 }
 
@@ -468,21 +493,21 @@
 	struct net *net = seq->private;
 
 	seq_puts(seq, "TcpExt:");
-	for (i = 0; snmp4_net_list[i].name != NULL; i++)
+	for (i = 0; snmp4_net_list[i].name; i++)
 		seq_printf(seq, " %s", snmp4_net_list[i].name);
 
 	seq_puts(seq, "\nTcpExt:");
-	for (i = 0; snmp4_net_list[i].name != NULL; i++)
+	for (i = 0; snmp4_net_list[i].name; i++)
 		seq_printf(seq, " %lu",
 			   snmp_fold_field(net->mib.net_statistics,
 					   snmp4_net_list[i].entry));
 
 	seq_puts(seq, "\nIpExt:");
-	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
+	for (i = 0; snmp4_ipextstats_list[i].name; i++)
 		seq_printf(seq, " %s", snmp4_ipextstats_list[i].name);
 
 	seq_puts(seq, "\nIpExt:");
-	for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++)
+	for (i = 0; snmp4_ipextstats_list[i].name; i++)
 		seq_printf(seq, " %llu",
 			   snmp_fold_field64(net->mib.ip_statistics,
 					     snmp4_ipextstats_list[i].entry,
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 438f50c..90a85c9 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -606,12 +606,6 @@
 			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
 			   daddr, saddr, 0, 0);
 
-	if (!saddr && ipc.oif) {
-		err = l3mdev_get_saddr(net, ipc.oif, &fl4);
-		if (err < 0)
-			goto done;
-	}
-
 	if (!inet->hdrincl) {
 		rfv.msg = msg;
 		rfv.hlen = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a1f2830..f2be689 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -476,12 +476,18 @@
 	atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
 	u32 old = ACCESS_ONCE(*p_tstamp);
 	u32 now = (u32)jiffies;
-	u32 delta = 0;
+	u32 new, delta = 0;
 
 	if (old != now && cmpxchg(p_tstamp, old, now) == old)
 		delta = prandom_u32_max(now - old);
 
-	return atomic_add_return(segs + delta, p_id) - segs;
+	/* Do not use atomic_add_return() as it makes UBSAN unhappy */
+	do {
+		old = (u32)atomic_read(p_id);
+		new = old + delta + segs;
+	} while (atomic_cmpxchg(p_id, old, new) != old);
+
+	return new - segs;
 }
 EXPORT_SYMBOL(ip_idents_reserve);
 
@@ -1246,7 +1252,9 @@
 			mtu = 576;
 	}
 
-	return min_t(unsigned int, mtu, IP_MAX_MTU);
+	mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
+
+	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
 static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
@@ -1829,7 +1837,7 @@
 	 *	Now we are ready to route packet.
 	 */
 	fl4.flowi4_oif = 0;
-	fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
+	fl4.flowi4_iif = dev->ifindex;
 	fl4.flowi4_mark = skb->mark;
 	fl4.flowi4_tos = tos;
 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -2016,7 +2024,9 @@
 		return ERR_PTR(-EINVAL);
 
 	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
-		if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+		if (ipv4_is_loopback(fl4->saddr) &&
+		    !(dev_out->flags & IFF_LOOPBACK) &&
+		    !netif_is_l3_master(dev_out))
 			return ERR_PTR(-EINVAL);
 
 	if (ipv4_is_lbcast(fl4->daddr))
@@ -2146,7 +2156,6 @@
 	unsigned int flags = 0;
 	struct fib_result res;
 	struct rtable *rth;
-	int master_idx;
 	int orig_oif;
 	int err = -ENETUNREACH;
 
@@ -2156,9 +2165,6 @@
 
 	orig_oif = fl4->flowi4_oif;
 
-	master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
-	if (master_idx)
-		fl4->flowi4_oif = master_idx;
 	fl4->flowi4_iif = LOOPBACK_IFINDEX;
 	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
 	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
@@ -2242,10 +2248,6 @@
 				fl4->saddr = inet_select_addr(dev_out, 0,
 							      RT_SCOPE_HOST);
 		}
-
-		rth = l3mdev_get_rtable(dev_out, fl4);
-		if (rth)
-			goto out;
 	}
 
 	if (!fl4->daddr) {
@@ -2263,8 +2265,7 @@
 	if (err) {
 		res.fi = NULL;
 		res.table = NULL;
-		if (fl4->flowi4_oif &&
-		    !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+		if (fl4->flowi4_oif) {
 			/* Apparently, routing tables are wrong. Assume,
 			   that the destination is on link.
 
@@ -2300,7 +2301,9 @@
 			else
 				fl4->saddr = fl4->daddr;
 		}
-		dev_out = net->loopback_dev;
+
+		/* L3 master device is the loopback for that domain */
+		dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
 		fl4->flowi4_oif = dev_out->ifindex;
 		flags |= RTCF_LOCAL;
 		goto make_route;
@@ -2497,7 +2500,8 @@
 		    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
 			int err = ipmr_get_route(net, skb,
 						 fl4->saddr, fl4->daddr,
-						 r, nowait);
+						 r, nowait, portid);
+
 			if (err <= 0) {
 				if (!nowait) {
 					if (err == 0)
@@ -2575,9 +2579,6 @@
 	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
 	fl4.flowi4_mark = mark;
 
-	if (netif_index_is_l3_master(net, fl4.flowi4_oif))
-		fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
-
 	if (iif) {
 		struct net_device *dev;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 032a96d..f253e5019d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -380,14 +380,14 @@
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	__skb_queue_head_init(&tp->out_of_order_queue);
+	tp->out_of_order_queue = RB_ROOT;
 	tcp_init_xmit_timers(sk);
 	tcp_prequeue_init(tp);
 	INIT_LIST_HEAD(&tp->tsq_node);
 
 	icsk->icsk_rto = TCP_TIMEOUT_INIT;
 	tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
-	tp->rtt_min[0].rtt = ~0U;
+	minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U);
 
 	/* So many TCP implementations out there (incorrectly) count the
 	 * initial SYN frame in their delayed-ACK and congestion control
@@ -396,6 +396,9 @@
 	 */
 	tp->snd_cwnd = TCP_INIT_CWND;
 
+	/* There's a bubble in the pipe until at least the first ACK. */
+	tp->app_limited = ~0U;
+
 	/* See draft-stevens-tcpca-spec-01 for discussion of the
 	 * initialization of these values.
 	 */
@@ -1014,23 +1017,40 @@
 					flags);
 
 	lock_sock(sk);
+
+	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
+
 	res = do_tcp_sendpages(sk, page, offset, size, flags);
 	release_sock(sk);
 	return res;
 }
 EXPORT_SYMBOL(tcp_sendpage);
 
-static inline int select_size(const struct sock *sk, bool sg)
+/* Do not bother using a page frag for very small frames.
+ * But use this heuristic only for the first skb in write queue.
+ *
+ * Having no payload in skb->head allows better SACK shifting
+ * in tcp_shift_skb_data(), reducing sack/rack overhead, because
+ * write queue has less skbs.
+ * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB.
+ * This also speeds up tso_fragment(), since it wont fallback
+ * to tcp_fragment().
+ */
+static int linear_payload_sz(bool first_skb)
+{
+	if (first_skb)
+		return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
+	return 0;
+}
+
+static int select_size(const struct sock *sk, bool sg, bool first_skb)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	int tmp = tp->mss_cache;
 
 	if (sg) {
 		if (sk_can_gso(sk)) {
-			/* Small frames wont use a full page:
-			 * Payload will immediately follow tcp header.
-			 */
-			tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
+			tmp = linear_payload_sz(first_skb);
 		} else {
 			int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
 
@@ -1101,6 +1121,8 @@
 
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
+	tcp_rate_check_app_limited(sk);  /* is sending application-limited? */
+
 	/* Wait for a connection to finish. One exception is TCP Fast Open
 	 * (passive side) where data is allowed to be sent before a connection
 	 * is fully established.
@@ -1161,6 +1183,8 @@
 		}
 
 		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
+			bool first_skb;
+
 new_segment:
 			/* Allocate new segment. If the interface is SG,
 			 * allocate skb fitting to single page.
@@ -1172,10 +1196,11 @@
 				process_backlog = false;
 				goto restart;
 			}
+			first_skb = skb_queue_empty(&sk->sk_write_queue);
 			skb = sk_stream_alloc_skb(sk,
-						  select_size(sk, sg),
+						  select_size(sk, sg, first_skb),
 						  sk->sk_allocation,
-						  skb_queue_empty(&sk->sk_write_queue));
+						  first_skb);
 			if (!skb)
 				goto wait_for_memory;
 
@@ -1570,6 +1595,12 @@
 }
 EXPORT_SYMBOL(tcp_read_sock);
 
+int tcp_peek_len(struct socket *sock)
+{
+	return tcp_inq(sock->sk);
+}
+EXPORT_SYMBOL(tcp_peek_len);
+
 /*
  *	This routine copies from a sock struct into the user buffer.
  *
@@ -2237,7 +2268,7 @@
 	tcp_clear_xmit_timers(sk);
 	__skb_queue_purge(&sk->sk_receive_queue);
 	tcp_write_queue_purge(sk);
-	__skb_queue_purge(&tp->out_of_order_queue);
+	skb_rbtree_purge(&tp->out_of_order_queue);
 
 	inet->inet_dport = 0;
 
@@ -2681,7 +2712,7 @@
 {
 	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	u32 now = tcp_time_stamp;
+	u32 now = tcp_time_stamp, intv;
 	unsigned int start;
 	int notsent_bytes;
 	u64 rate64;
@@ -2771,6 +2802,15 @@
 	info->tcpi_min_rtt = tcp_min_rtt(tp);
 	info->tcpi_data_segs_in = tp->data_segs_in;
 	info->tcpi_data_segs_out = tp->data_segs_out;
+
+	info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
+	rate = READ_ONCE(tp->rate_delivered);
+	intv = READ_ONCE(tp->rate_interval_us);
+	if (rate && intv) {
+		rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
+		do_div(rate64, intv);
+		put_unaligned(rate64, &info->tcpi_delivery_rate);
+	}
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
@@ -3092,23 +3132,6 @@
 }
 EXPORT_SYMBOL(tcp_get_md5sig_pool);
 
-int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
-			const struct tcphdr *th)
-{
-	struct scatterlist sg;
-	struct tcphdr hdr;
-
-	/* We are not allowed to change tcphdr, make a local copy */
-	memcpy(&hdr, th, sizeof(hdr));
-	hdr.check = 0;
-
-	/* options aren't included in the hash */
-	sg_init_one(&sg, &hdr, sizeof(hdr));
-	ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr));
-	return crypto_ahash_update(hp->md5_req);
-}
-EXPORT_SYMBOL(tcp_md5_hash_header);
-
 int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
 			  const struct sk_buff *skb, unsigned int header_len)
 {
@@ -3193,7 +3216,6 @@
 			local_bh_enable();
 			return 0;
 		}
-		sock_gen_put(sk);
 		return -EOPNOTSUPP;
 	}
 
@@ -3222,7 +3244,6 @@
 	bh_unlock_sock(sk);
 	local_bh_enable();
 	release_sock(sk);
-	sock_put(sk);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(tcp_abort);
@@ -3257,11 +3278,12 @@
 
 void __init tcp_init(void)
 {
-	unsigned long limit;
 	int max_rshare, max_wshare, cnt;
+	unsigned long limit;
 	unsigned int i;
 
-	sock_skb_cb_check_size(sizeof(struct tcp_skb_cb));
+	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
+		     FIELD_SIZEOF(struct sk_buff, cb));
 
 	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
 	percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
new file mode 100644
index 0000000..0ea66c2
--- /dev/null
+++ b/net/ipv4/tcp_bbr.c
@@ -0,0 +1,896 @@
+/* Bottleneck Bandwidth and RTT (BBR) congestion control
+ *
+ * BBR congestion control computes the sending rate based on the delivery
+ * rate (throughput) estimated from ACKs. In a nutshell:
+ *
+ *   On each ACK, update our model of the network path:
+ *      bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips)
+ *      min_rtt = windowed_min(rtt, 10 seconds)
+ *   pacing_rate = pacing_gain * bottleneck_bandwidth
+ *   cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4)
+ *
+ * The core algorithm does not react directly to packet losses or delays,
+ * although BBR may adjust the size of next send per ACK when loss is
+ * observed, or adjust the sending rate if it estimates there is a
+ * traffic policer, in order to keep the drop rate reasonable.
+ *
+ * BBR is described in detail in:
+ *   "BBR: Congestion-Based Congestion Control",
+ *   Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh,
+ *   Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016.
+ *
+ * There is a public e-mail list for discussing BBR development and testing:
+ *   https://groups.google.com/forum/#!forum/bbr-dev
+ *
+ * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled,
+ * since pacing is integral to the BBR design and implementation.
+ * BBR without pacing would not function properly, and may incur unnecessary
+ * high packet loss rates.
+ */
+#include <linux/module.h>
+#include <net/tcp.h>
+#include <linux/inet_diag.h>
+#include <linux/inet.h>
+#include <linux/random.h>
+#include <linux/win_minmax.h>
+
+/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth
+ * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps.
+ * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32.
+ * Since the minimum window is >=4 packets, the lower bound isn't
+ * an issue. The upper bound isn't an issue with existing technologies.
+ */
+#define BW_SCALE 24
+#define BW_UNIT (1 << BW_SCALE)
+
+#define BBR_SCALE 8	/* scaling factor for fractions in BBR (e.g. gains) */
+#define BBR_UNIT (1 << BBR_SCALE)
+
+/* BBR has the following modes for deciding how fast to send: */
+enum bbr_mode {
+	BBR_STARTUP,	/* ramp up sending rate rapidly to fill pipe */
+	BBR_DRAIN,	/* drain any queue created during startup */
+	BBR_PROBE_BW,	/* discover, share bw: pace around estimated bw */
+	BBR_PROBE_RTT,	/* cut cwnd to min to probe min_rtt */
+};
+
+/* BBR congestion control block */
+struct bbr {
+	u32	min_rtt_us;	        /* min RTT in min_rtt_win_sec window */
+	u32	min_rtt_stamp;	        /* timestamp of min_rtt_us */
+	u32	probe_rtt_done_stamp;   /* end time for BBR_PROBE_RTT mode */
+	struct minmax bw;	/* Max recent delivery rate in pkts/uS << 24 */
+	u32	rtt_cnt;	    /* count of packet-timed rounds elapsed */
+	u32     next_rtt_delivered; /* scb->tx.delivered at end of round */
+	struct skb_mstamp cycle_mstamp;  /* time of this cycle phase start */
+	u32     mode:3,		     /* current bbr_mode in state machine */
+		prev_ca_state:3,     /* CA state on previous ACK */
+		packet_conservation:1,  /* use packet conservation? */
+		restore_cwnd:1,	     /* decided to revert cwnd to old value */
+		round_start:1,	     /* start of packet-timed tx->ack round? */
+		tso_segs_goal:7,     /* segments we want in each skb we send */
+		idle_restart:1,	     /* restarting after idle? */
+		probe_rtt_round_done:1,  /* a BBR_PROBE_RTT round at 4 pkts? */
+		unused:5,
+		lt_is_sampling:1,    /* taking long-term ("LT") samples now? */
+		lt_rtt_cnt:7,	     /* round trips in long-term interval */
+		lt_use_bw:1;	     /* use lt_bw as our bw estimate? */
+	u32	lt_bw;		     /* LT est delivery rate in pkts/uS << 24 */
+	u32	lt_last_delivered;   /* LT intvl start: tp->delivered */
+	u32	lt_last_stamp;	     /* LT intvl start: tp->delivered_mstamp */
+	u32	lt_last_lost;	     /* LT intvl start: tp->lost */
+	u32	pacing_gain:10,	/* current gain for setting pacing rate */
+		cwnd_gain:10,	/* current gain for setting cwnd */
+		full_bw_cnt:3,	/* number of rounds without large bw gains */
+		cycle_idx:3,	/* current index in pacing_gain cycle array */
+		unused_b:6;
+	u32	prior_cwnd;	/* prior cwnd upon entering loss recovery */
+	u32	full_bw;	/* recent bw, to estimate if pipe is full */
+};
+
+#define CYCLE_LEN	8	/* number of phases in a pacing gain cycle */
+
+/* Window length of bw filter (in rounds): */
+static const int bbr_bw_rtts = CYCLE_LEN + 2;
+/* Window length of min_rtt filter (in sec): */
+static const u32 bbr_min_rtt_win_sec = 10;
+/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */
+static const u32 bbr_probe_rtt_mode_ms = 200;
+/* Skip TSO below the following bandwidth (bits/sec): */
+static const int bbr_min_tso_rate = 1200000;
+
+/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain
+ * that will allow a smoothly increasing pacing rate that will double each RTT
+ * and send the same number of packets per RTT that an un-paced, slow-starting
+ * Reno or CUBIC flow would:
+ */
+static const int bbr_high_gain  = BBR_UNIT * 2885 / 1000 + 1;
+/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain
+ * the queue created in BBR_STARTUP in a single round:
+ */
+static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885;
+/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */
+static const int bbr_cwnd_gain  = BBR_UNIT * 2;
+/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */
+static const int bbr_pacing_gain[] = {
+	BBR_UNIT * 5 / 4,	/* probe for more available bw */
+	BBR_UNIT * 3 / 4,	/* drain queue and/or yield bw to other flows */
+	BBR_UNIT, BBR_UNIT, BBR_UNIT,	/* cruise at 1.0*bw to utilize pipe, */
+	BBR_UNIT, BBR_UNIT, BBR_UNIT	/* without creating excess queue... */
+};
+/* Randomize the starting gain cycling phase over N phases: */
+static const u32 bbr_cycle_rand = 7;
+
+/* Try to keep at least this many packets in flight, if things go smoothly. For
+ * smooth functioning, a sliding window protocol ACKing every other packet
+ * needs at least 4 packets in flight:
+ */
+static const u32 bbr_cwnd_min_target = 4;
+
+/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */
+/* If bw has increased significantly (1.25x), there may be more bw available: */
+static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4;
+/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */
+static const u32 bbr_full_bw_cnt = 3;
+
+/* "long-term" ("LT") bandwidth estimator parameters... */
+/* The minimum number of rounds in an LT bw sampling interval: */
+static const u32 bbr_lt_intvl_min_rtts = 4;
+/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */
+static const u32 bbr_lt_loss_thresh = 50;
+/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */
+static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8;
+/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */
+static const u32 bbr_lt_bw_diff = 4000 / 8;
+/* If we estimate we're policed, use lt_bw for this many round trips: */
+static const u32 bbr_lt_bw_max_rtts = 48;
+
+/* Do we estimate that STARTUP filled the pipe? */
+static bool bbr_full_bw_reached(const struct sock *sk)
+{
+	const struct bbr *bbr = inet_csk_ca(sk);
+
+	return bbr->full_bw_cnt >= bbr_full_bw_cnt;
+}
+
+/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */
+static u32 bbr_max_bw(const struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return minmax_get(&bbr->bw);
+}
+
+/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */
+static u32 bbr_bw(const struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk);
+}
+
+/* Return rate in bytes per second, optionally with a gain.
+ * The order here is chosen carefully to avoid overflow of u64. This should
+ * work for input rates of up to 2.9Tbit/sec and gain of 2.89x.
+ */
+static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
+{
+	rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
+	rate *= gain;
+	rate >>= BBR_SCALE;
+	rate *= USEC_PER_SEC;
+	return rate >> BW_SCALE;
+}
+
+/* Pace using current bw estimate and a gain factor. In order to help drive the
+ * network toward lower queues while maintaining high utilization and low
+ * latency, the average pacing rate aims to be slightly (~1%) lower than the
+ * estimated bandwidth. This is an important aspect of the design. In this
+ * implementation this slightly lower pacing rate is achieved implicitly by not
+ * including link-layer headers in the packet size used for the pacing rate.
+ */
+static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 rate = bw;
+
+	rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+	rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+	if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
+		sk->sk_pacing_rate = rate;
+}
+
+/* Return count of segments we want in the skbs we send, or 0 for default. */
+static u32 bbr_tso_segs_goal(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	return bbr->tso_segs_goal;
+}
+
+static void bbr_set_tso_segs_goal(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 min_segs;
+
+	min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
+	bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs),
+				 0x7FU);
+}
+
+/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */
+static void bbr_save_cwnd(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT)
+		bbr->prior_cwnd = tp->snd_cwnd;  /* this cwnd is good enough */
+	else  /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */
+		bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd);
+}
+
+static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (event == CA_EVENT_TX_START && tp->app_limited) {
+		bbr->idle_restart = 1;
+		/* Avoid pointless buffer overflows: pace at est. bw if we don't
+		 * need more speed (we're restarting from idle and app-limited).
+		 */
+		if (bbr->mode == BBR_PROBE_BW)
+			bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT);
+	}
+}
+
+/* Find target cwnd. Right-size the cwnd based on min RTT and the
+ * estimated bottleneck bandwidth:
+ *
+ * cwnd = bw * min_rtt * gain = BDP * gain
+ *
+ * The key factor, gain, controls the amount of queue. While a small gain
+ * builds a smaller queue, it becomes more vulnerable to noise in RTT
+ * measurements (e.g., delayed ACKs or other ACK compression effects). This
+ * noise may cause BBR to under-estimate the rate.
+ *
+ * To achieve full performance in high-speed paths, we budget enough cwnd to
+ * fit full-sized skbs in-flight on both end hosts to fully utilize the path:
+ *   - one skb in sending host Qdisc,
+ *   - one skb in sending host TSO/GSO engine
+ *   - one skb being received by receiver host LRO/GRO/delayed-ACK engine
+ * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because
+ * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets,
+ * which allows 2 outstanding 2-packet sequences, to try to keep pipe
+ * full even with ACK-every-other-packet delayed ACKs.
+ */
+static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 cwnd;
+	u64 w;
+
+	/* If we've never had a valid RTT sample, cap cwnd at the initial
+	 * default. This should only happen when the connection is not using TCP
+	 * timestamps and has retransmitted all of the SYN/SYNACK/data packets
+	 * ACKed so far. In this case, an RTO can cut cwnd to 1, in which
+	 * case we need to slow-start up toward something safe: TCP_INIT_CWND.
+	 */
+	if (unlikely(bbr->min_rtt_us == ~0U))	 /* no valid RTT samples yet? */
+		return TCP_INIT_CWND;  /* be safe: cap at default initial cwnd*/
+
+	w = (u64)bw * bbr->min_rtt_us;
+
+	/* Apply a gain to the given value, then remove the BW_SCALE shift. */
+	cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT;
+
+	/* Allow enough full-sized skbs in flight to utilize end systems. */
+	cwnd += 3 * bbr->tso_segs_goal;
+
+	/* Reduce delayed ACKs by rounding up cwnd to the next even number. */
+	cwnd = (cwnd + 1) & ~1U;
+
+	return cwnd;
+}
+
+/* An optimization in BBR to reduce losses: On the first round of recovery, we
+ * follow the packet conservation principle: send P packets per P packets acked.
+ * After that, we slow-start and send at most 2*P packets per P packets acked.
+ * After recovery finishes, or upon undo, we restore the cwnd we had when
+ * recovery started (capped by the target cwnd based on estimated BDP).
+ *
+ * TODO(ycheng/ncardwell): implement a rate-based approach.
+ */
+static bool bbr_set_cwnd_to_recover_or_restore(
+	struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state;
+	u32 cwnd = tp->snd_cwnd;
+
+	/* An ACK for P pkts should release at most 2*P packets. We do this
+	 * in two steps. First, here we deduct the number of lost packets.
+	 * Then, in bbr_set_cwnd() we slow start up toward the target cwnd.
+	 */
+	if (rs->losses > 0)
+		cwnd = max_t(s32, cwnd - rs->losses, 1);
+
+	if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) {
+		/* Starting 1st round of Recovery, so do packet conservation. */
+		bbr->packet_conservation = 1;
+		bbr->next_rtt_delivered = tp->delivered;  /* start round now */
+		/* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */
+		cwnd = tcp_packets_in_flight(tp) + acked;
+	} else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) {
+		/* Exiting loss recovery; restore cwnd saved before recovery. */
+		bbr->restore_cwnd = 1;
+		bbr->packet_conservation = 0;
+	}
+	bbr->prev_ca_state = state;
+
+	if (bbr->restore_cwnd) {
+		/* Restore cwnd after exiting loss recovery or PROBE_RTT. */
+		cwnd = max(cwnd, bbr->prior_cwnd);
+		bbr->restore_cwnd = 0;
+	}
+
+	if (bbr->packet_conservation) {
+		*new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked);
+		return true;	/* yes, using packet conservation */
+	}
+	*new_cwnd = cwnd;
+	return false;
+}
+
+/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss
+ * has drawn us down below target), or snap down to target if we're above it.
+ */
+static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs,
+			 u32 acked, u32 bw, int gain)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 cwnd = 0, target_cwnd = 0;
+
+	if (!acked)
+		return;
+
+	if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd))
+		goto done;
+
+	/* If we're below target cwnd, slow start cwnd toward target cwnd. */
+	target_cwnd = bbr_target_cwnd(sk, bw, gain);
+	if (bbr_full_bw_reached(sk))  /* only cut cwnd if we filled the pipe */
+		cwnd = min(cwnd + acked, target_cwnd);
+	else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND)
+		cwnd = cwnd + acked;
+	cwnd = max(cwnd, bbr_cwnd_min_target);
+
+done:
+	tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);	/* apply global cap */
+	if (bbr->mode == BBR_PROBE_RTT)  /* drain queue, refresh min_rtt */
+		tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target);
+}
+
+/* End cycle phase if it's time and/or we hit the phase's in-flight target. */
+static bool bbr_is_next_cycle_phase(struct sock *sk,
+				    const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	bool is_full_length =
+		skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) >
+		bbr->min_rtt_us;
+	u32 inflight, bw;
+
+	/* The pacing_gain of 1.0 paces at the estimated bw to try to fully
+	 * use the pipe without increasing the queue.
+	 */
+	if (bbr->pacing_gain == BBR_UNIT)
+		return is_full_length;		/* just use wall clock time */
+
+	inflight = rs->prior_in_flight;  /* what was in-flight before ACK? */
+	bw = bbr_max_bw(sk);
+
+	/* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at
+	 * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is
+	 * small (e.g. on a LAN). We do not persist if packets are lost, since
+	 * a path with small buffers may not hold that much.
+	 */
+	if (bbr->pacing_gain > BBR_UNIT)
+		return is_full_length &&
+			(rs->losses ||  /* perhaps pacing_gain*BDP won't fit */
+			 inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain));
+
+	/* A pacing_gain < 1.0 tries to drain extra queue we added if bw
+	 * probing didn't find more bw. If inflight falls to match BDP then we
+	 * estimate queue is drained; persisting would underutilize the pipe.
+	 */
+	return is_full_length ||
+		inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT);
+}
+
+static void bbr_advance_cycle_phase(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1);
+	bbr->cycle_mstamp = tp->delivered_mstamp;
+	bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx];
+}
+
+/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */
+static void bbr_update_cycle_phase(struct sock *sk,
+				   const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw &&
+	    bbr_is_next_cycle_phase(sk, rs))
+		bbr_advance_cycle_phase(sk);
+}
+
+static void bbr_reset_startup_mode(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->mode = BBR_STARTUP;
+	bbr->pacing_gain = bbr_high_gain;
+	bbr->cwnd_gain	 = bbr_high_gain;
+}
+
+static void bbr_reset_probe_bw_mode(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->mode = BBR_PROBE_BW;
+	bbr->pacing_gain = BBR_UNIT;
+	bbr->cwnd_gain = bbr_cwnd_gain;
+	bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand);
+	bbr_advance_cycle_phase(sk);	/* flip to next phase of gain cycle */
+}
+
+static void bbr_reset_mode(struct sock *sk)
+{
+	if (!bbr_full_bw_reached(sk))
+		bbr_reset_startup_mode(sk);
+	else
+		bbr_reset_probe_bw_mode(sk);
+}
+
+/* Start a new long-term sampling interval. */
+static void bbr_reset_lt_bw_sampling_interval(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies;
+	bbr->lt_last_delivered = tp->delivered;
+	bbr->lt_last_lost = tp->lost;
+	bbr->lt_rtt_cnt = 0;
+}
+
+/* Completely reset long-term bandwidth sampling. */
+static void bbr_reset_lt_bw_sampling(struct sock *sk)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	bbr->lt_bw = 0;
+	bbr->lt_use_bw = 0;
+	bbr->lt_is_sampling = false;
+	bbr_reset_lt_bw_sampling_interval(sk);
+}
+
+/* Long-term bw sampling interval is done. Estimate whether we're policed. */
+static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 diff;
+
+	if (bbr->lt_bw) {  /* do we have bw from a previous interval? */
+		/* Is new bw close to the lt_bw from the previous interval? */
+		diff = abs(bw - bbr->lt_bw);
+		if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) ||
+		    (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <=
+		     bbr_lt_bw_diff)) {
+			/* All criteria are met; estimate we're policed. */
+			bbr->lt_bw = (bw + bbr->lt_bw) >> 1;  /* avg 2 intvls */
+			bbr->lt_use_bw = 1;
+			bbr->pacing_gain = BBR_UNIT;  /* try to avoid drops */
+			bbr->lt_rtt_cnt = 0;
+			return;
+		}
+	}
+	bbr->lt_bw = bw;
+	bbr_reset_lt_bw_sampling_interval(sk);
+}
+
+/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of
+ * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and
+ * explicitly models their policed rate, to reduce unnecessary losses. We
+ * estimate that we're policed if we see 2 consecutive sampling intervals with
+ * consistent throughput and high packet loss. If we think we're being policed,
+ * set lt_bw to the "long-term" average delivery rate from those 2 intervals.
+ */
+static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 lost, delivered;
+	u64 bw;
+	s32 t;
+
+	if (bbr->lt_use_bw) {	/* already using long-term rate, lt_bw? */
+		if (bbr->mode == BBR_PROBE_BW && bbr->round_start &&
+		    ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) {
+			bbr_reset_lt_bw_sampling(sk);    /* stop using lt_bw */
+			bbr_reset_probe_bw_mode(sk);  /* restart gain cycling */
+		}
+		return;
+	}
+
+	/* Wait for the first loss before sampling, to let the policer exhaust
+	 * its tokens and estimate the steady-state rate allowed by the policer.
+	 * Starting samples earlier includes bursts that over-estimate the bw.
+	 */
+	if (!bbr->lt_is_sampling) {
+		if (!rs->losses)
+			return;
+		bbr_reset_lt_bw_sampling_interval(sk);
+		bbr->lt_is_sampling = true;
+	}
+
+	/* To avoid underestimates, reset sampling if we run out of data. */
+	if (rs->is_app_limited) {
+		bbr_reset_lt_bw_sampling(sk);
+		return;
+	}
+
+	if (bbr->round_start)
+		bbr->lt_rtt_cnt++;	/* count round trips in this interval */
+	if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts)
+		return;		/* sampling interval needs to be longer */
+	if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) {
+		bbr_reset_lt_bw_sampling(sk);  /* interval is too long */
+		return;
+	}
+
+	/* End sampling interval when a packet is lost, so we estimate the
+	 * policer tokens were exhausted. Stopping the sampling before the
+	 * tokens are exhausted under-estimates the policed rate.
+	 */
+	if (!rs->losses)
+		return;
+
+	/* Calculate packets lost and delivered in sampling interval. */
+	lost = tp->lost - bbr->lt_last_lost;
+	delivered = tp->delivered - bbr->lt_last_delivered;
+	/* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */
+	if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered)
+		return;
+
+	/* Find average delivery rate in this sampling interval. */
+	t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp);
+	if (t < 1)
+		return;		/* interval is less than one jiffy, so wait */
+	t = jiffies_to_usecs(t);
+	/* Interval long enough for jiffies_to_usecs() to return a bogus 0? */
+	if (t < 1) {
+		bbr_reset_lt_bw_sampling(sk);  /* interval too long; reset */
+		return;
+	}
+	bw = (u64)delivered * BW_UNIT;
+	do_div(bw, t);
+	bbr_lt_bw_interval_done(sk, bw);
+}
+
+/* Estimate the bandwidth based on how fast packets are delivered */
+static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 bw;
+
+	bbr->round_start = 0;
+	if (rs->delivered < 0 || rs->interval_us <= 0)
+		return; /* Not a valid observation */
+
+	/* See if we've reached the next RTT */
+	if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) {
+		bbr->next_rtt_delivered = tp->delivered;
+		bbr->rtt_cnt++;
+		bbr->round_start = 1;
+		bbr->packet_conservation = 0;
+	}
+
+	bbr_lt_bw_sampling(sk, rs);
+
+	/* Divide delivered by the interval to find a (lower bound) bottleneck
+	 * bandwidth sample. Delivered is in packets and interval_us in uS and
+	 * ratio will be <<1 for most connections. So delivered is first scaled.
+	 */
+	bw = (u64)rs->delivered * BW_UNIT;
+	do_div(bw, rs->interval_us);
+
+	/* If this sample is application-limited, it is likely to have a very
+	 * low delivered count that represents application behavior rather than
+	 * the available network rate. Such a sample could drag down estimated
+	 * bw, causing needless slow-down. Thus, to continue to send at the
+	 * last measured network rate, we filter out app-limited samples unless
+	 * they describe the path bw at least as well as our bw model.
+	 *
+	 * So the goal during app-limited phase is to proceed with the best
+	 * network rate no matter how long. We automatically leave this
+	 * phase when app writes faster than the network can deliver :)
+	 */
+	if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) {
+		/* Incorporate new sample into our max bw filter. */
+		minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw);
+	}
+}
+
+/* Estimate when the pipe is full, using the change in delivery rate: BBR
+ * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by
+ * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited
+ * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the
+ * higher rwin, 3: we get higher delivery rate samples. Or transient
+ * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar
+ * design goal, but uses delay and inter-ACK spacing instead of bandwidth.
+ */
+static void bbr_check_full_bw_reached(struct sock *sk,
+				      const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 bw_thresh;
+
+	if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited)
+		return;
+
+	bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE;
+	if (bbr_max_bw(sk) >= bw_thresh) {
+		bbr->full_bw = bbr_max_bw(sk);
+		bbr->full_bw_cnt = 0;
+		return;
+	}
+	++bbr->full_bw_cnt;
+}
+
+/* If pipe is probably full, drain the queue and then enter steady-state. */
+static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) {
+		bbr->mode = BBR_DRAIN;	/* drain queue we created */
+		bbr->pacing_gain = bbr_drain_gain;	/* pace slow to drain */
+		bbr->cwnd_gain = bbr_high_gain;	/* maintain cwnd */
+	}	/* fall through to check if in-flight is already small: */
+	if (bbr->mode == BBR_DRAIN &&
+	    tcp_packets_in_flight(tcp_sk(sk)) <=
+	    bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT))
+		bbr_reset_probe_bw_mode(sk);  /* we estimate queue is drained */
+}
+
+/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and
+ * periodically drain the bottleneck queue, to converge to measure the true
+ * min_rtt (unloaded propagation delay). This allows the flows to keep queues
+ * small (reducing queuing delay and packet loss) and achieve fairness among
+ * BBR flows.
+ *
+ * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires,
+ * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets.
+ * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed
+ * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and
+ * re-enter the previous mode. BBR uses 200ms to approximately bound the
+ * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s).
+ *
+ * Note that flows need only pay 2% if they are busy sending over the last 10
+ * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have
+ * natural silences or low-rate periods within 10 seconds where the rate is low
+ * enough for long enough to drain its queue in the bottleneck. We pick up
+ * these min RTT measurements opportunistically with our min_rtt filter. :-)
+ */
+static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	bool filter_expired;
+
+	/* Track min RTT seen in the min_rtt_win_sec filter window: */
+	filter_expired = after(tcp_time_stamp,
+			       bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
+	if (rs->rtt_us >= 0 &&
+	    (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) {
+		bbr->min_rtt_us = rs->rtt_us;
+		bbr->min_rtt_stamp = tcp_time_stamp;
+	}
+
+	if (bbr_probe_rtt_mode_ms > 0 && filter_expired &&
+	    !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) {
+		bbr->mode = BBR_PROBE_RTT;  /* dip, drain queue */
+		bbr->pacing_gain = BBR_UNIT;
+		bbr->cwnd_gain = BBR_UNIT;
+		bbr_save_cwnd(sk);  /* note cwnd so we can restore it */
+		bbr->probe_rtt_done_stamp = 0;
+	}
+
+	if (bbr->mode == BBR_PROBE_RTT) {
+		/* Ignore low rate samples during this mode. */
+		tp->app_limited =
+			(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
+		/* Maintain min packets in flight for max(200 ms, 1 round). */
+		if (!bbr->probe_rtt_done_stamp &&
+		    tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) {
+			bbr->probe_rtt_done_stamp = tcp_time_stamp +
+				msecs_to_jiffies(bbr_probe_rtt_mode_ms);
+			bbr->probe_rtt_round_done = 0;
+			bbr->next_rtt_delivered = tp->delivered;
+		} else if (bbr->probe_rtt_done_stamp) {
+			if (bbr->round_start)
+				bbr->probe_rtt_round_done = 1;
+			if (bbr->probe_rtt_round_done &&
+			    after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) {
+				bbr->min_rtt_stamp = tcp_time_stamp;
+				bbr->restore_cwnd = 1;  /* snap to prior_cwnd */
+				bbr_reset_mode(sk);
+			}
+		}
+	}
+	bbr->idle_restart = 0;
+}
+
+static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
+{
+	bbr_update_bw(sk, rs);
+	bbr_update_cycle_phase(sk, rs);
+	bbr_check_full_bw_reached(sk, rs);
+	bbr_check_drain(sk, rs);
+	bbr_update_min_rtt(sk, rs);
+}
+
+static void bbr_main(struct sock *sk, const struct rate_sample *rs)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+	u32 bw;
+
+	bbr_update_model(sk, rs);
+
+	bw = bbr_bw(sk);
+	bbr_set_pacing_rate(sk, bw, bbr->pacing_gain);
+	bbr_set_tso_segs_goal(sk);
+	bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
+}
+
+static void bbr_init(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct bbr *bbr = inet_csk_ca(sk);
+	u64 bw;
+
+	bbr->prior_cwnd = 0;
+	bbr->tso_segs_goal = 0;	 /* default segs per skb until first ACK */
+	bbr->rtt_cnt = 0;
+	bbr->next_rtt_delivered = 0;
+	bbr->prev_ca_state = TCP_CA_Open;
+	bbr->packet_conservation = 0;
+
+	bbr->probe_rtt_done_stamp = 0;
+	bbr->probe_rtt_round_done = 0;
+	bbr->min_rtt_us = tcp_min_rtt(tp);
+	bbr->min_rtt_stamp = tcp_time_stamp;
+
+	minmax_reset(&bbr->bw, bbr->rtt_cnt, 0);  /* init max bw to 0 */
+
+	/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+	bw = (u64)tp->snd_cwnd * BW_UNIT;
+	do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
+	sk->sk_pacing_rate = 0;		/* force an update of sk_pacing_rate */
+	bbr_set_pacing_rate(sk, bw, bbr_high_gain);
+
+	bbr->restore_cwnd = 0;
+	bbr->round_start = 0;
+	bbr->idle_restart = 0;
+	bbr->full_bw = 0;
+	bbr->full_bw_cnt = 0;
+	bbr->cycle_mstamp.v64 = 0;
+	bbr->cycle_idx = 0;
+	bbr_reset_lt_bw_sampling(sk);
+	bbr_reset_startup_mode(sk);
+}
+
+static u32 bbr_sndbuf_expand(struct sock *sk)
+{
+	/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
+	return 3;
+}
+
+/* In theory BBR does not need to undo the cwnd since it does not
+ * always reduce cwnd on losses (see bbr_main()). Keep it for now.
+ */
+static u32 bbr_undo_cwnd(struct sock *sk)
+{
+	return tcp_sk(sk)->snd_cwnd;
+}
+
+/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
+static u32 bbr_ssthresh(struct sock *sk)
+{
+	bbr_save_cwnd(sk);
+	return TCP_INFINITE_SSTHRESH;	 /* BBR does not use ssthresh */
+}
+
+static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
+			   union tcp_cc_info *info)
+{
+	if (ext & (1 << (INET_DIAG_BBRINFO - 1)) ||
+	    ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
+		struct tcp_sock *tp = tcp_sk(sk);
+		struct bbr *bbr = inet_csk_ca(sk);
+		u64 bw = bbr_bw(sk);
+
+		bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE;
+		memset(&info->bbr, 0, sizeof(info->bbr));
+		info->bbr.bbr_bw_lo		= (u32)bw;
+		info->bbr.bbr_bw_hi		= (u32)(bw >> 32);
+		info->bbr.bbr_min_rtt		= bbr->min_rtt_us;
+		info->bbr.bbr_pacing_gain	= bbr->pacing_gain;
+		info->bbr.bbr_cwnd_gain		= bbr->cwnd_gain;
+		*attr = INET_DIAG_BBRINFO;
+		return sizeof(info->bbr);
+	}
+	return 0;
+}
+
+static void bbr_set_state(struct sock *sk, u8 new_state)
+{
+	struct bbr *bbr = inet_csk_ca(sk);
+
+	if (new_state == TCP_CA_Loss) {
+		struct rate_sample rs = { .losses = 1 };
+
+		bbr->prev_ca_state = TCP_CA_Loss;
+		bbr->full_bw = 0;
+		bbr->round_start = 1;	/* treat RTO like end of a round */
+		bbr_lt_bw_sampling(sk, &rs);
+	}
+}
+
+static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
+	.flags		= TCP_CONG_NON_RESTRICTED,
+	.name		= "bbr",
+	.owner		= THIS_MODULE,
+	.init		= bbr_init,
+	.cong_control	= bbr_main,
+	.sndbuf_expand	= bbr_sndbuf_expand,
+	.undo_cwnd	= bbr_undo_cwnd,
+	.cwnd_event	= bbr_cwnd_event,
+	.ssthresh	= bbr_ssthresh,
+	.tso_segs_goal	= bbr_tso_segs_goal,
+	.get_info	= bbr_get_info,
+	.set_state	= bbr_set_state,
+};
+
+static int __init bbr_register(void)
+{
+	BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
+	return tcp_register_congestion_control(&tcp_bbr_cong_ops);
+}
+
+static void __exit bbr_unregister(void)
+{
+	tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
+}
+
+module_init(bbr_register);
+module_exit(bbr_unregister);
+
+MODULE_AUTHOR("Van Jacobson <vanj@google.com>");
+MODULE_AUTHOR("Neal Cardwell <ncardwell@google.com>");
+MODULE_AUTHOR("Yuchung Cheng <ycheng@google.com>");
+MODULE_AUTHOR("Soheil Hassas Yeganeh <soheil@google.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)");
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 03725b2..35b2803 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -56,7 +56,7 @@
 module_param(use_tolerance, bool, 0644);
 MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic");
 
-struct minmax {
+struct cdg_minmax {
 	union {
 		struct {
 			s32 min;
@@ -74,10 +74,10 @@
 };
 
 struct cdg {
-	struct minmax rtt;
-	struct minmax rtt_prev;
-	struct minmax *gradients;
-	struct minmax gsum;
+	struct cdg_minmax rtt;
+	struct cdg_minmax rtt_prev;
+	struct cdg_minmax *gradients;
+	struct cdg_minmax gsum;
 	bool gfilled;
 	u8  tail;
 	u8  state;
@@ -353,7 +353,7 @@
 {
 	struct cdg *ca = inet_csk_ca(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct minmax *gradients;
+	struct cdg_minmax *gradients;
 
 	switch (ev) {
 	case CA_EVENT_CWND_RESTART:
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 882caa4..1294af4 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -69,7 +69,7 @@
 	int ret = 0;
 
 	/* all algorithms must implement ssthresh and cong_avoid ops */
-	if (!ca->ssthresh || !ca->cong_avoid) {
+	if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) {
 		pr_err("%s does not implement required ops\n", ca->name);
 		return -EINVAL;
 	}
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 4d61093..a748c74 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -54,11 +54,16 @@
 {
 	struct net *net = sock_net(in_skb->sk);
 	struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req);
+	int err;
 
 	if (IS_ERR(sk))
 		return PTR_ERR(sk);
 
-	return sock_diag_destroy(sk, ECONNABORTED);
+	err = sock_diag_destroy(sk, ECONNABORTED);
+
+	sock_gen_put(sk);
+
+	return err;
 }
 #endif
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 54d9f9b..4e777a3 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -150,6 +150,7 @@
 	tp->segs_in = 0;
 	tcp_segs_in(tp, skb);
 	__skb_pull(skb, tcp_hdrlen(skb));
+	sk_forced_mem_schedule(sk, skb->truesize);
 	skb_set_owner_r(skb, sk);
 
 	TCP_SKB_CB(skb)->seq++;
@@ -226,6 +227,7 @@
 	tcp_fastopen_add_skb(child, skb);
 
 	tcp_rsk(req)->rcv_nxt = tp->rcv_nxt;
+	tp->rcv_wup = tp->rcv_nxt;
 	/* tcp_conn_request() is sending the SYNACK,
 	 * and queues the child into listener accept queue.
 	 */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3ebf45b..a27b9c0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -289,6 +289,7 @@
 static void tcp_sndbuf_expand(struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
 	int sndmem, per_mss;
 	u32 nr_segs;
 
@@ -309,7 +310,8 @@
 	 * Cubic needs 1.7 factor, rounded to 2 to include
 	 * extra cushion (application might react slowly to POLLOUT)
 	 */
-	sndmem = 2 * nr_segs * per_mss;
+	sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
+	sndmem *= nr_segs * per_mss;
 
 	if (sk->sk_sndbuf < sndmem)
 		sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
@@ -899,12 +901,29 @@
 		tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
 }
 
+/* Sum the number of packets on the wire we have marked as lost.
+ * There are two cases we care about here:
+ * a) Packet hasn't been marked lost (nor retransmitted),
+ *    and this is the first loss.
+ * b) Packet has been marked both lost and retransmitted,
+ *    and this means we think it was lost again.
+ */
+static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
+{
+	__u8 sacked = TCP_SKB_CB(skb)->sacked;
+
+	if (!(sacked & TCPCB_LOST) ||
+	    ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
+		tp->lost += tcp_skb_pcount(skb);
+}
+
 static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
 {
 	if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
 		tcp_verify_retransmit_hint(tp, skb);
 
 		tp->lost_out += tcp_skb_pcount(skb);
+		tcp_sum_lost(tp, skb);
 		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 	}
 }
@@ -913,6 +932,7 @@
 {
 	tcp_verify_retransmit_hint(tp, skb);
 
+	tcp_sum_lost(tp, skb);
 	if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
 		tp->lost_out += tcp_skb_pcount(skb);
 		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1094,6 +1114,7 @@
 	 */
 	struct skb_mstamp first_sackt;
 	struct skb_mstamp last_sackt;
+	struct rate_sample *rate;
 	int	flag;
 };
 
@@ -1261,6 +1282,7 @@
 	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
 			start_seq, end_seq, dup_sack, pcount,
 			&skb->skb_mstamp);
+	tcp_rate_skb_delivered(sk, skb, state->rate);
 
 	if (skb == tp->lost_skb_hint)
 		tp->lost_cnt_hint += pcount;
@@ -1311,6 +1333,9 @@
 		tcp_advance_highest_sack(sk, skb);
 
 	tcp_skb_collapse_tstamp(prev, skb);
+	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
+		TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+
 	tcp_unlink_write_queue(skb, sk);
 	sk_wmem_free_skb(sk, skb);
 
@@ -1540,6 +1565,7 @@
 						dup_sack,
 						tcp_skb_pcount(skb),
 						&skb->skb_mstamp);
+			tcp_rate_skb_delivered(sk, skb, state->rate);
 
 			if (!before(TCP_SKB_CB(skb)->seq,
 				    tcp_highest_sack_seq(tp)))
@@ -1622,8 +1648,10 @@
 
 	found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
 					 num_sacks, prior_snd_una);
-	if (found_dup_sack)
+	if (found_dup_sack) {
 		state->flag |= FLAG_DSACKING_ACK;
+		tp->delivered++; /* A spurious retransmission is delivered */
+	}
 
 	/* Eliminate too old ACKs, but take into
 	 * account more or less fresh ones, they can
@@ -1890,6 +1918,7 @@
 	struct sk_buff *skb;
 	bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
 	bool is_reneg;			/* is receiver reneging on SACKs? */
+	bool mark_lost;
 
 	/* Reduce ssthresh if it has not yet been made inside this window. */
 	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
@@ -1923,8 +1952,12 @@
 		if (skb == tcp_send_head(sk))
 			break;
 
+		mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
+			     is_reneg);
+		if (mark_lost)
+			tcp_sum_lost(tp, skb);
 		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
-		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
+		if (mark_lost) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
 			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
 			tp->lost_out += tcp_skb_pcount(skb);
@@ -2329,10 +2362,9 @@
 	}
 #if IS_ENABLED(CONFIG_IPV6)
 	else if (sk->sk_family == AF_INET6) {
-		struct ipv6_pinfo *np = inet6_sk(sk);
 		pr_debug("Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
 			 msg,
-			 &np->daddr, ntohs(inet->inet_dport),
+			 &sk->sk_v6_daddr, ntohs(inet->inet_dport),
 			 tp->snd_cwnd, tcp_left_out(tp),
 			 tp->snd_ssthresh, tp->prior_ssthresh,
 			 tp->packets_out);
@@ -2503,6 +2535,9 @@
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
+	if (inet_csk(sk)->icsk_ca_ops->cong_control)
+		return;
+
 	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
 	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
 	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
@@ -2879,67 +2914,13 @@
 	*rexmit = REXMIT_LOST;
 }
 
-/* Kathleen Nichols' algorithm for tracking the minimum value of
- * a data stream over some fixed time interval. (E.g., the minimum
- * RTT over the past five minutes.) It uses constant space and constant
- * time per update yet almost always delivers the same minimum as an
- * implementation that has to keep all the data in the window.
- *
- * The algorithm keeps track of the best, 2nd best & 3rd best min
- * values, maintaining an invariant that the measurement time of the
- * n'th best >= n-1'th best. It also makes sure that the three values
- * are widely separated in the time window since that bounds the worse
- * case error when that data is monotonically increasing over the window.
- *
- * Upon getting a new min, we can forget everything earlier because it
- * has no value - the new min is <= everything else in the window by
- * definition and it's the most recent. So we restart fresh on every new min
- * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
- * best.
- */
 static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
 {
-	const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
-	struct rtt_meas *m = tcp_sk(sk)->rtt_min;
-	struct rtt_meas rttm = {
-		.rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
-		.ts = now,
-	};
-	u32 elapsed;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
 
-	/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
-	if (unlikely(rttm.rtt <= m[0].rtt))
-		m[0] = m[1] = m[2] = rttm;
-	else if (rttm.rtt <= m[1].rtt)
-		m[1] = m[2] = rttm;
-	else if (rttm.rtt <= m[2].rtt)
-		m[2] = rttm;
-
-	elapsed = now - m[0].ts;
-	if (unlikely(elapsed > wlen)) {
-		/* Passed entire window without a new min so make 2nd choice
-		 * the new min & 3rd choice the new 2nd. So forth and so on.
-		 */
-		m[0] = m[1];
-		m[1] = m[2];
-		m[2] = rttm;
-		if (now - m[0].ts > wlen) {
-			m[0] = m[1];
-			m[1] = rttm;
-			if (now - m[0].ts > wlen)
-				m[0] = rttm;
-		}
-	} else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
-		/* Passed a quarter of the window without a new min so
-		 * take 2nd choice from the 2nd quarter of the window.
-		 */
-		m[2] = m[1] = rttm;
-	} else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
-		/* Passed half the window without a new min so take the 3rd
-		 * choice from the last half of the window.
-		 */
-		m[2] = rttm;
-	}
+	minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+			   rtt_us ? : jiffies_to_usecs(1));
 }
 
 static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
@@ -3102,10 +3083,11 @@
  */
 static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 			       u32 prior_snd_una, int *acked,
-			       struct tcp_sacktag_state *sack)
+			       struct tcp_sacktag_state *sack,
+			       struct skb_mstamp *now)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
-	struct skb_mstamp first_ackt, last_ackt, now;
+	struct skb_mstamp first_ackt, last_ackt;
 	struct tcp_sock *tp = tcp_sk(sk);
 	u32 prior_sacked = tp->sacked_out;
 	u32 reord = tp->packets_out;
@@ -3137,7 +3119,6 @@
 			acked_pcount = tcp_tso_acked(sk, skb);
 			if (!acked_pcount)
 				break;
-
 			fully_acked = false;
 		} else {
 			/* Speedup tcp_unlink_write_queue() and next loop */
@@ -3173,6 +3154,7 @@
 
 		tp->packets_out -= acked_pcount;
 		pkts_acked += acked_pcount;
+		tcp_rate_skb_delivered(sk, skb, sack->rate);
 
 		/* Initial outgoing SYN's get put onto the write_queue
 		 * just like anything else we transmit.  It is not
@@ -3205,16 +3187,15 @@
 	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
 		flag |= FLAG_SACK_RENEGING;
 
-	skb_mstamp_get(&now);
 	if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
-		seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
-		ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+		seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
+		ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
 	}
 	if (sack->first_sackt.v64) {
-		sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
-		ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
+		sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
+		ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
 	}
-
+	sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
 	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
 					ca_rtt_us);
 
@@ -3242,7 +3223,7 @@
 		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
 	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
-		   sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
+		   sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
 		/* Do not re-arm RTO if the sack RTT is measured from data sent
 		 * after when the head was last (re)transmitted. Otherwise the
 		 * timeout may continue to extend in loss recovery.
@@ -3333,8 +3314,15 @@
  * information. All transmission or retransmission are delayed afterwards.
  */
 static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
-			     int flag)
+			     int flag, const struct rate_sample *rs)
 {
+	const struct inet_connection_sock *icsk = inet_csk(sk);
+
+	if (icsk->icsk_ca_ops->cong_control) {
+		icsk->icsk_ca_ops->cong_control(sk, rs);
+		return;
+	}
+
 	if (tcp_in_cwnd_reduction(sk)) {
 		/* Reduce cwnd if state mandates */
 		tcp_cwnd_reduction(sk, acked_sacked, flag);
@@ -3579,17 +3567,21 @@
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_sacktag_state sack_state;
+	struct rate_sample rs = { .prior_delivered = 0 };
 	u32 prior_snd_una = tp->snd_una;
 	u32 ack_seq = TCP_SKB_CB(skb)->seq;
 	u32 ack = TCP_SKB_CB(skb)->ack_seq;
 	bool is_dupack = false;
 	u32 prior_fackets;
 	int prior_packets = tp->packets_out;
-	u32 prior_delivered = tp->delivered;
+	u32 delivered = tp->delivered;
+	u32 lost = tp->lost;
 	int acked = 0; /* Number of packets newly acked */
 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+	struct skb_mstamp now;
 
 	sack_state.first_sackt.v64 = 0;
+	sack_state.rate = &rs;
 
 	/* We very likely will need to access write queue head. */
 	prefetchw(sk->sk_write_queue.next);
@@ -3612,6 +3604,8 @@
 	if (after(ack, tp->snd_nxt))
 		goto invalid_ack;
 
+	skb_mstamp_get(&now);
+
 	if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 		tcp_rearm_rto(sk);
@@ -3622,6 +3616,7 @@
 	}
 
 	prior_fackets = tp->fackets_out;
+	rs.prior_in_flight = tcp_packets_in_flight(tp);
 
 	/* ts_recent update must be made after we are sure that the packet
 	 * is in window.
@@ -3677,7 +3672,7 @@
 
 	/* See if we can take anything off of the retransmit queue. */
 	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
-				    &sack_state);
+				    &sack_state, &now);
 
 	if (tcp_ack_is_dubious(sk, flag)) {
 		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@ -3694,7 +3689,10 @@
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
 		tcp_schedule_loss_probe(sk);
-	tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+	delivered = tp->delivered - delivered;	/* freshly ACKed or SACKed */
+	lost = tp->lost - lost;			/* freshly marked lost */
+	tcp_rate_gen(sk, delivered, lost, &now, &rs);
+	tcp_cong_control(sk, ack, delivered, flag, &rs);
 	tcp_xmit_recovery(sk, rexmit);
 	return 1;
 
@@ -4108,7 +4106,7 @@
 	/* It _is_ possible, that we have something out-of-order _after_ FIN.
 	 * Probably, we should reset in this case. For now drop them.
 	 */
-	__skb_queue_purge(&tp->out_of_order_queue);
+	skb_rbtree_purge(&tp->out_of_order_queue);
 	if (tcp_is_sack(tp))
 		tcp_sack_reset(&tp->rx_opt);
 	sk_mem_reclaim(sk);
@@ -4268,7 +4266,7 @@
 	int this_sack;
 
 	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
-	if (skb_queue_empty(&tp->out_of_order_queue)) {
+	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
 		tp->rx_opt.num_sacks = 0;
 		return;
 	}
@@ -4344,10 +4342,13 @@
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	__u32 dsack_high = tp->rcv_nxt;
+	bool fin, fragstolen, eaten;
 	struct sk_buff *skb, *tail;
-	bool fragstolen, eaten;
+	struct rb_node *p;
 
-	while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+	p = rb_first(&tp->out_of_order_queue);
+	while (p) {
+		skb = rb_entry(p, struct sk_buff, rbnode);
 		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
 			break;
 
@@ -4357,9 +4358,10 @@
 				dsack_high = TCP_SKB_CB(skb)->end_seq;
 			tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
 		}
+		p = rb_next(p);
+		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
 
-		__skb_unlink(skb, &tp->out_of_order_queue);
-		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
 			SOCK_DEBUG(sk, "ofo packet was already received\n");
 			tcp_drop(sk, skb);
 			continue;
@@ -4371,12 +4373,19 @@
 		tail = skb_peek_tail(&sk->sk_receive_queue);
 		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
 		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
+		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
 		if (!eaten)
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
-		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
-			tcp_fin(sk);
-		if (eaten)
+		else
 			kfree_skb_partial(skb, fragstolen);
+
+		if (unlikely(fin)) {
+			tcp_fin(sk);
+			/* tcp_fin() purges tp->out_of_order_queue,
+			 * so we must end this loop right now.
+			 */
+			break;
+		}
 	}
 }
 
@@ -4392,12 +4401,9 @@
 		if (tcp_prune_queue(sk) < 0)
 			return -1;
 
-		if (!sk_rmem_schedule(sk, skb, size)) {
+		while (!sk_rmem_schedule(sk, skb, size)) {
 			if (!tcp_prune_ofo_queue(sk))
 				return -1;
-
-			if (!sk_rmem_schedule(sk, skb, size))
-				return -1;
 		}
 	}
 	return 0;
@@ -4406,8 +4412,10 @@
 static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct rb_node **p, *q, *parent;
 	struct sk_buff *skb1;
 	u32 seq, end_seq;
+	bool fragstolen;
 
 	tcp_ecn_check_ce(tp, skb);
 
@@ -4422,88 +4430,92 @@
 	inet_csk_schedule_ack(sk);
 
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+	seq = TCP_SKB_CB(skb)->seq;
+	end_seq = TCP_SKB_CB(skb)->end_seq;
 	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
-		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+		   tp->rcv_nxt, seq, end_seq);
 
-	skb1 = skb_peek_tail(&tp->out_of_order_queue);
-	if (!skb1) {
+	p = &tp->out_of_order_queue.rb_node;
+	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
 		/* Initial out of order segment, build 1 SACK. */
 		if (tcp_is_sack(tp)) {
 			tp->rx_opt.num_sacks = 1;
-			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
-			tp->selective_acks[0].end_seq =
-						TCP_SKB_CB(skb)->end_seq;
+			tp->selective_acks[0].start_seq = seq;
+			tp->selective_acks[0].end_seq = end_seq;
 		}
-		__skb_queue_head(&tp->out_of_order_queue, skb);
+		rb_link_node(&skb->rbnode, NULL, p);
+		rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
+		tp->ooo_last_skb = skb;
 		goto end;
 	}
 
-	seq = TCP_SKB_CB(skb)->seq;
-	end_seq = TCP_SKB_CB(skb)->end_seq;
-
-	if (seq == TCP_SKB_CB(skb1)->end_seq) {
-		bool fragstolen;
-
-		if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
-			__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-		} else {
-			tcp_grow_window(sk, skb);
-			kfree_skb_partial(skb, fragstolen);
-			skb = NULL;
-		}
-
-		if (!tp->rx_opt.num_sacks ||
-		    tp->selective_acks[0].end_seq != seq)
-			goto add_sack;
-
-		/* Common case: data arrive in order after hole. */
-		tp->selective_acks[0].end_seq = end_seq;
-		goto end;
+	/* In the typical case, we are adding an skb to the end of the list.
+	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+	 */
+	if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+coalesce_done:
+		tcp_grow_window(sk, skb);
+		kfree_skb_partial(skb, fragstolen);
+		skb = NULL;
+		goto add_sack;
+	}
+	/* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
+	if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
+		parent = &tp->ooo_last_skb->rbnode;
+		p = &parent->rb_right;
+		goto insert;
 	}
 
-	/* Find place to insert this segment. */
-	while (1) {
-		if (!after(TCP_SKB_CB(skb1)->seq, seq))
-			break;
-		if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
-			skb1 = NULL;
-			break;
+	/* Find place to insert this segment. Handle overlaps on the way. */
+	parent = NULL;
+	while (*p) {
+		parent = *p;
+		skb1 = rb_entry(parent, struct sk_buff, rbnode);
+		if (before(seq, TCP_SKB_CB(skb1)->seq)) {
+			p = &parent->rb_left;
+			continue;
 		}
-		skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
+		if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+				/* All the bits are present. Drop. */
+				NET_INC_STATS(sock_net(sk),
+					      LINUX_MIB_TCPOFOMERGE);
+				__kfree_skb(skb);
+				skb = NULL;
+				tcp_dsack_set(sk, seq, end_seq);
+				goto add_sack;
+			}
+			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+				/* Partial overlap. */
+				tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
+			} else {
+				/* skb's seq == skb1's seq and skb covers skb1.
+				 * Replace skb1 with skb.
+				 */
+				rb_replace_node(&skb1->rbnode, &skb->rbnode,
+						&tp->out_of_order_queue);
+				tcp_dsack_extend(sk,
+						 TCP_SKB_CB(skb1)->seq,
+						 TCP_SKB_CB(skb1)->end_seq);
+				NET_INC_STATS(sock_net(sk),
+					      LINUX_MIB_TCPOFOMERGE);
+				__kfree_skb(skb1);
+				goto merge_right;
+			}
+		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+			goto coalesce_done;
+		}
+		p = &parent->rb_right;
 	}
+insert:
+	/* Insert segment into RB tree. */
+	rb_link_node(&skb->rbnode, parent, p);
+	rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
 
-	/* Do skb overlap to previous one? */
-	if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
-		if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
-			/* All the bits are present. Drop. */
-			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
-			tcp_drop(sk, skb);
-			skb = NULL;
-			tcp_dsack_set(sk, seq, end_seq);
-			goto add_sack;
-		}
-		if (after(seq, TCP_SKB_CB(skb1)->seq)) {
-			/* Partial overlap. */
-			tcp_dsack_set(sk, seq,
-				      TCP_SKB_CB(skb1)->end_seq);
-		} else {
-			if (skb_queue_is_first(&tp->out_of_order_queue,
-					       skb1))
-				skb1 = NULL;
-			else
-				skb1 = skb_queue_prev(
-					&tp->out_of_order_queue,
-					skb1);
-		}
-	}
-	if (!skb1)
-		__skb_queue_head(&tp->out_of_order_queue, skb);
-	else
-		__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
-
-	/* And clean segments covered by new one as whole. */
-	while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
-		skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+merge_right:
+	/* Remove other segments covered by skb. */
+	while ((q = rb_next(&skb->rbnode)) != NULL) {
+		skb1 = rb_entry(q, struct sk_buff, rbnode);
 
 		if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
 			break;
@@ -4512,12 +4524,15 @@
 					 end_seq);
 			break;
 		}
-		__skb_unlink(skb1, &tp->out_of_order_queue);
+		rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
 		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
 				 TCP_SKB_CB(skb1)->end_seq);
 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
 		tcp_drop(sk, skb1);
 	}
+	/* If there is no skb after us, we are the last_skb ! */
+	if (!q)
+		tp->ooo_last_skb = skb;
 
 add_sack:
 	if (tcp_is_sack(tp))
@@ -4654,13 +4669,13 @@
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 			tcp_fin(sk);
 
-		if (!skb_queue_empty(&tp->out_of_order_queue)) {
+		if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
 			tcp_ofo_queue(sk);
 
 			/* RFC2581. 4.2. SHOULD send immediate ACK, when
 			 * gap in queue is filled.
 			 */
-			if (skb_queue_empty(&tp->out_of_order_queue))
+			if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
 				inet_csk(sk)->icsk_ack.pingpong = 0;
 		}
 
@@ -4714,48 +4729,76 @@
 	tcp_data_queue_ofo(sk, skb);
 }
 
-static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
-					struct sk_buff_head *list)
+static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
 {
-	struct sk_buff *next = NULL;
+	if (list)
+		return !skb_queue_is_last(list, skb) ? skb->next : NULL;
 
-	if (!skb_queue_is_last(list, skb))
-		next = skb_queue_next(list, skb);
+	return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+}
 
-	__skb_unlink(skb, list);
+static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
+					struct sk_buff_head *list,
+					struct rb_root *root)
+{
+	struct sk_buff *next = tcp_skb_next(skb, list);
+
+	if (list)
+		__skb_unlink(skb, list);
+	else
+		rb_erase(&skb->rbnode, root);
+
 	__kfree_skb(skb);
 	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
 
 	return next;
 }
 
+/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
+static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+{
+	struct rb_node **p = &root->rb_node;
+	struct rb_node *parent = NULL;
+	struct sk_buff *skb1;
+
+	while (*p) {
+		parent = *p;
+		skb1 = rb_entry(parent, struct sk_buff, rbnode);
+		if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+	rb_link_node(&skb->rbnode, parent, p);
+	rb_insert_color(&skb->rbnode, root);
+}
+
 /* Collapse contiguous sequence of skbs head..tail with
  * sequence numbers start..end.
  *
- * If tail is NULL, this means until the end of the list.
+ * If tail is NULL, this means until the end of the queue.
  *
  * Segments with FIN/SYN are not collapsed (only because this
  * simplifies code)
  */
 static void
-tcp_collapse(struct sock *sk, struct sk_buff_head *list,
-	     struct sk_buff *head, struct sk_buff *tail,
-	     u32 start, u32 end)
+tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
+	     struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
 {
-	struct sk_buff *skb, *n;
+	struct sk_buff *skb = head, *n;
+	struct sk_buff_head tmp;
 	bool end_of_skbs;
 
 	/* First, check that queue is collapsible and find
-	 * the point where collapsing can be useful. */
-	skb = head;
+	 * the point where collapsing can be useful.
+	 */
 restart:
-	end_of_skbs = true;
-	skb_queue_walk_from_safe(list, skb, n) {
-		if (skb == tail)
-			break;
+	for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
+		n = tcp_skb_next(skb, list);
+
 		/* No new bits? It is possible on ofo queue. */
 		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-			skb = tcp_collapse_one(sk, skb, list);
+			skb = tcp_collapse_one(sk, skb, list, root);
 			if (!skb)
 				break;
 			goto restart;
@@ -4773,13 +4816,10 @@
 			break;
 		}
 
-		if (!skb_queue_is_last(list, skb)) {
-			struct sk_buff *next = skb_queue_next(list, skb);
-			if (next != tail &&
-			    TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
-				end_of_skbs = false;
-				break;
-			}
+		if (n && n != tail &&
+		    TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
+			end_of_skbs = false;
+			break;
 		}
 
 		/* Decided to skip this, advance start seq. */
@@ -4789,17 +4829,22 @@
 	    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
 		return;
 
+	__skb_queue_head_init(&tmp);
+
 	while (before(start, end)) {
 		int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
 		struct sk_buff *nskb;
 
 		nskb = alloc_skb(copy, GFP_ATOMIC);
 		if (!nskb)
-			return;
+			break;
 
 		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
 		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
-		__skb_queue_before(list, skb, nskb);
+		if (list)
+			__skb_queue_before(list, skb, nskb);
+		else
+			__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
 		skb_set_owner_r(nskb, sk);
 
 		/* Copy data, releasing collapsed skbs. */
@@ -4817,14 +4862,17 @@
 				start += size;
 			}
 			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
-				skb = tcp_collapse_one(sk, skb, list);
+				skb = tcp_collapse_one(sk, skb, list, root);
 				if (!skb ||
 				    skb == tail ||
 				    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
-					return;
+					goto end;
 			}
 		}
 	}
+end:
+	skb_queue_walk_safe(&tmp, skb, n)
+		tcp_rbtree_insert(root, skb);
 }
 
 /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
@@ -4833,70 +4881,86 @@
 static void tcp_collapse_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
-	struct sk_buff *head;
+	struct sk_buff *skb, *head;
+	struct rb_node *p;
 	u32 start, end;
 
-	if (!skb)
+	p = rb_first(&tp->out_of_order_queue);
+	skb = rb_entry_safe(p, struct sk_buff, rbnode);
+new_range:
+	if (!skb) {
+		p = rb_last(&tp->out_of_order_queue);
+		/* Note: This is possible p is NULL here. We do not
+		 * use rb_entry_safe(), as ooo_last_skb is valid only
+		 * if rbtree is not empty.
+		 */
+		tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
 		return;
-
+	}
 	start = TCP_SKB_CB(skb)->seq;
 	end = TCP_SKB_CB(skb)->end_seq;
-	head = skb;
 
-	for (;;) {
-		struct sk_buff *next = NULL;
+	for (head = skb;;) {
+		skb = tcp_skb_next(skb, NULL);
 
-		if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
-			next = skb_queue_next(&tp->out_of_order_queue, skb);
-		skb = next;
-
-		/* Segment is terminated when we see gap or when
-		 * we are at the end of all the queue. */
+		/* Range is terminated when we see a gap or when
+		 * we are at the queue end.
+		 */
 		if (!skb ||
 		    after(TCP_SKB_CB(skb)->seq, end) ||
 		    before(TCP_SKB_CB(skb)->end_seq, start)) {
-			tcp_collapse(sk, &tp->out_of_order_queue,
+			tcp_collapse(sk, NULL, &tp->out_of_order_queue,
 				     head, skb, start, end);
-			head = skb;
-			if (!skb)
-				break;
-			/* Start new segment */
-			start = TCP_SKB_CB(skb)->seq;
-			end = TCP_SKB_CB(skb)->end_seq;
-		} else {
-			if (before(TCP_SKB_CB(skb)->seq, start))
-				start = TCP_SKB_CB(skb)->seq;
-			if (after(TCP_SKB_CB(skb)->end_seq, end))
-				end = TCP_SKB_CB(skb)->end_seq;
+			goto new_range;
 		}
+
+		if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
+			start = TCP_SKB_CB(skb)->seq;
+		if (after(TCP_SKB_CB(skb)->end_seq, end))
+			end = TCP_SKB_CB(skb)->end_seq;
 	}
 }
 
 /*
- * Purge the out-of-order queue.
- * Return true if queue was pruned.
+ * Clean the out-of-order queue to make room.
+ * We drop high sequences packets to :
+ * 1) Let a chance for holes to be filled.
+ * 2) not add too big latencies if thousands of packets sit there.
+ *    (But if application shrinks SO_RCVBUF, we could still end up
+ *     freeing whole queue here)
+ *
+ * Return true if queue has shrunk.
  */
 static bool tcp_prune_ofo_queue(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	bool res = false;
+	struct rb_node *node, *prev;
 
-	if (!skb_queue_empty(&tp->out_of_order_queue)) {
-		NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
-		__skb_queue_purge(&tp->out_of_order_queue);
+	if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
+		return false;
 
-		/* Reset SACK state.  A conforming SACK implementation will
-		 * do the same at a timeout based retransmit.  When a connection
-		 * is in a sad state like this, we care only about integrity
-		 * of the connection not performance.
-		 */
-		if (tp->rx_opt.sack_ok)
-			tcp_sack_reset(&tp->rx_opt);
+	NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+	node = &tp->ooo_last_skb->rbnode;
+	do {
+		prev = rb_prev(node);
+		rb_erase(node, &tp->out_of_order_queue);
+		tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
 		sk_mem_reclaim(sk);
-		res = true;
-	}
-	return res;
+		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+		    !tcp_under_memory_pressure(sk))
+			break;
+		node = prev;
+	} while (node);
+	tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+
+	/* Reset SACK state.  A conforming SACK implementation will
+	 * do the same at a timeout based retransmit.  When a connection
+	 * is in a sad state like this, we care only about integrity
+	 * of the connection not performance.
+	 */
+	if (tp->rx_opt.sack_ok)
+		tcp_sack_reset(&tp->rx_opt);
+	return true;
 }
 
 /* Reduce allocated memory if we can, trying to get
@@ -4921,7 +4985,7 @@
 
 	tcp_collapse_ofo_queue(sk);
 	if (!skb_queue_empty(&sk->sk_receive_queue))
-		tcp_collapse(sk, &sk->sk_receive_queue,
+		tcp_collapse(sk, &sk->sk_receive_queue, NULL,
 			     skb_peek(&sk->sk_receive_queue),
 			     NULL,
 			     tp->copied_seq, tp->rcv_nxt);
@@ -5026,7 +5090,7 @@
 	    /* We ACK each frame or... */
 	    tcp_in_quickack_mode(sk) ||
 	    /* We have out of order data. */
-	    (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
+	    (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
 		/* Then ack it now */
 		tcp_send_ack(sk);
 	} else {
@@ -5885,7 +5949,7 @@
 		 * so release it.
 		 */
 		if (req) {
-			tp->total_retrans = req->num_retrans;
+			inet_csk(sk)->icsk_retransmits = 0;
 			reqsk_fastopen_remove(sk, req, false);
 		} else {
 			/* Make sure socket is routed, for correct metrics. */
@@ -5927,7 +5991,8 @@
 		} else
 			tcp_init_metrics(sk);
 
-		tcp_update_pacing_rate(sk);
+		if (!inet_csk(sk)->icsk_ca_ops->cong_control)
+			tcp_update_pacing_rate(sk);
 
 		/* Prevent spurious tcp_cwnd_restart() on first data packet */
 		tp->lsndtime = tcp_time_stamp;
@@ -6260,6 +6325,7 @@
 
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb, sk);
+	inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
 
 	/* Note: tcp_v6_init_req() might override ir_iif for link locals */
 	inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 32b048e..7ac37c3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -814,8 +814,14 @@
 	u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
 					     tcp_sk(sk)->snd_nxt;
 
+	/* RFC 7323 2.3
+	 * The window field (SEG.WND) of every outgoing segment, with the
+	 * exception of <SYN> segments, MUST be right-shifted by
+	 * Rcv.Wind.Shift bits:
+	 */
 	tcp_v4_send_ack(sock_net(sk), skb, seq,
-			tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+			tcp_rsk(req)->rcv_nxt,
+			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 			tcp_time_stamp,
 			req->ts_recent,
 			0,
@@ -1169,6 +1175,7 @@
 				      NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
 				     &iph->saddr, ntohs(th->source),
 				     &iph->daddr, ntohs(th->dest),
@@ -1189,7 +1196,6 @@
 
 	sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
 	sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
-	ireq->no_srccheck = inet_sk(sk_listener)->transparent;
 	ireq->opt = tcp_v4_save_options(skb);
 }
 
@@ -1531,6 +1537,34 @@
 }
 EXPORT_SYMBOL(tcp_prequeue);
 
+bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb)
+{
+	u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf;
+
+	/* Only socket owner can try to collapse/prune rx queues
+	 * to reduce memory overhead, so add a little headroom here.
+	 * Few sockets backlog are possibly concurrently non empty.
+	 */
+	limit += 64*1024;
+
+	/* In case all data was pulled from skb frags (in __pskb_pull_tail()),
+	 * we can fix skb->truesize to its real value to avoid future drops.
+	 * This is valid because skb is not yet charged to the socket.
+	 * It has been noticed pure SACK packets were sometimes dropped
+	 * (if cooked by drivers without copybreak feature).
+	 */
+	if (!skb->data_len)
+		skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+
+	if (unlikely(sk_add_backlog(sk, skb, limit))) {
+		bh_unlock_sock(sk);
+		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP);
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(tcp_add_backlog);
+
 /*
  *	From tcp_input.c
  */
@@ -1602,6 +1636,7 @@
 
 		sk = req->rsk_listener;
 		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+			sk_drops_add(sk, skb);
 			reqsk_put(req);
 			goto discard_it;
 		}
@@ -1660,10 +1695,7 @@
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
 			ret = tcp_v4_do_rcv(sk, skb);
-	} else if (unlikely(sk_add_backlog(sk, skb,
-					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
-		bh_unlock_sock(sk);
-		__NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
@@ -1812,7 +1844,7 @@
 	tcp_write_queue_purge(sk);
 
 	/* Cleans up our, hopefully empty, out_of_order_queue. */
-	__skb_queue_purge(&tp->out_of_order_queue);
+	skb_rbtree_purge(&tp->out_of_order_queue);
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Clean up the MD5 key list, if any */
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index b617826..bf1f3b2 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -751,7 +751,7 @@
 	.netnsok	= true,
 };
 
-static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
+static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = {
 	[TCP_METRICS_ATTR_ADDR_IPV4]	= { .type = NLA_U32, },
 	[TCP_METRICS_ATTR_ADDR_IPV6]	= { .type = NLA_BINARY,
 					    .len = sizeof(struct in6_addr), },
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4b95ec4..6234eba 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -464,7 +464,7 @@
 
 		newtp->srtt_us = 0;
 		newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
-		newtp->rtt_min[0].rtt = ~0U;
+		minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U);
 		newicsk->icsk_rto = TCP_TIMEOUT_INIT;
 
 		newtp->packets_out = 0;
@@ -487,8 +487,10 @@
 		newtp->snd_cwnd = TCP_INIT_CWND;
 		newtp->snd_cwnd_cnt = 0;
 
+		/* There's a bubble in the pipe until at least the first ACK. */
+		newtp->app_limited = ~0U;
+
 		tcp_init_xmit_timers(newsk);
-		__skb_queue_head_init(&newtp->out_of_order_queue);
 		newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
 
 		newtp->rx_opt.saw_tstamp = 0;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 5c59649..bc68da3 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -90,12 +90,6 @@
 		goto out;
 	}
 
-	/* GSO partial only requires splitting the frame into an MSS
-	 * multiple and possibly a remainder.  So update the mss now.
-	 */
-	if (features & NETIF_F_GSO_PARTIAL)
-		mss = skb->len - (skb->len % mss);
-
 	copy_destructor = gso_skb->destructor == tcp_wfree;
 	ooo_okay = gso_skb->ooo_okay;
 	/* All segments but the first should have ooo_okay cleared */
@@ -108,6 +102,13 @@
 	/* Only first segment might have ooo_okay set */
 	segs->ooo_okay = ooo_okay;
 
+	/* GSO partial and frag_list segmentation only requires splitting
+	 * the frame into an MSS multiple and possibly a remainder, both
+	 * cases return a GSO skb. So update the mss now.
+	 */
+	if (skb_is_gso(segs))
+		mss *= skb_shinfo(segs)->gso_segs;
+
 	delta = htonl(oldlen + (thlen + mss));
 
 	skb = segs;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bdaef7f..896e9df 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -734,9 +734,16 @@
 {
 	if ((1 << sk->sk_state) &
 	    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
-	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK))
-		tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK)) {
+		struct tcp_sock *tp = tcp_sk(sk);
+
+		if (tp->lost_out > tp->retrans_out &&
+		    tp->snd_cwnd > tcp_packets_in_flight(tp))
+			tcp_xmit_retransmit_queue(sk);
+
+		tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
 			       0, GFP_ATOMIC);
+	}
 }
 /*
  * One tasklet per cpu tries to send more skbs.
@@ -918,6 +925,7 @@
 		skb_mstamp_get(&skb->skb_mstamp);
 		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
 			- tp->snd_una;
+		tcp_rate_skb_sent(sk, skb);
 
 		if (unlikely(skb_cloned(skb)))
 			skb = pskb_copy(skb, gfp_mask);
@@ -1213,6 +1221,9 @@
 	tcp_set_skb_tso_segs(skb, mss_now);
 	tcp_set_skb_tso_segs(buff, mss_now);
 
+	/* Update delivered info for the new segment */
+	TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
+
 	/* If this packet has been sent out already, we must
 	 * adjust the various packet counters.
 	 */
@@ -1358,6 +1369,7 @@
 	}
 	return mtu;
 }
+EXPORT_SYMBOL(tcp_mss_to_mtu);
 
 /* MTU probing init per socket */
 void tcp_mtup_init(struct sock *sk)
@@ -1545,7 +1557,8 @@
 /* Return how many segs we'd like on a TSO packet,
  * to send one TSO packet per ms
  */
-static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
+u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+		     int min_tso_segs)
 {
 	u32 bytes, segs;
 
@@ -1557,10 +1570,23 @@
 	 * This preserves ACK clocking and is consistent
 	 * with tcp_tso_should_defer() heuristic.
 	 */
-	segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
+	segs = max_t(u32, bytes / mss_now, min_tso_segs);
 
 	return min_t(u32, segs, sk->sk_gso_max_segs);
 }
+EXPORT_SYMBOL(tcp_tso_autosize);
+
+/* Return the number of segments we want in the skb we are transmitting.
+ * See if congestion control module wants to decide; otherwise, autosize.
+ */
+static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+{
+	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+	u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+
+	return tso_segs ? :
+		tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+}
 
 /* Returns the portion of skb which can be sent right away */
 static unsigned int tcp_mss_split_point(const struct sock *sk,
@@ -1966,12 +1992,14 @@
 	len = 0;
 	tcp_for_write_queue_from_safe(skb, next, sk) {
 		copy = min_t(int, skb->len, probe_size - len);
-		if (nskb->ip_summed)
+		if (nskb->ip_summed) {
 			skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
-		else
-			nskb->csum = skb_copy_and_csum_bits(skb, 0,
-							    skb_put(nskb, copy),
-							    copy, nskb->csum);
+		} else {
+			__wsum csum = skb_copy_and_csum_bits(skb, 0,
+							     skb_put(nskb, copy),
+							     copy, 0);
+			nskb->csum = csum_block_add(nskb->csum, csum, len);
+		}
 
 		if (skb->len <= copy) {
 			/* We've eaten all the data from this skb.
@@ -2020,6 +2048,39 @@
 	return -1;
 }
 
+/* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * (These limits are doubled for retransmits)
+ * This allows for :
+ *  - better RTT estimation and ACK scheduling
+ *  - faster recovery
+ *  - high rates
+ * Alas, some drivers / subsystems require a fair amount
+ * of queued bytes to ensure line rate.
+ * One example is wifi aggregation (802.11 AMPDU)
+ */
+static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+				  unsigned int factor)
+{
+	unsigned int limit;
+
+	limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+	limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+	limit <<= factor;
+
+	if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+		set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+		/* It is possible TX completion already happened
+		 * before we set TSQ_THROTTLED, so we must
+		 * test again the condition.
+		 */
+		smp_mb__after_atomic();
+		if (atomic_read(&sk->sk_wmem_alloc) > limit)
+			return true;
+	}
+	return false;
+}
+
 /* This routine writes packets to the network.  It advances the
  * send_head.  This happens as incoming acks open up the remote
  * window for us.
@@ -2057,7 +2118,7 @@
 		}
 	}
 
-	max_segs = tcp_tso_autosize(sk, mss_now);
+	max_segs = tcp_tso_segs(sk, mss_now);
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
@@ -2106,29 +2167,8 @@
 		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
 			break;
 
-		/* TCP Small Queues :
-		 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
-		 * This allows for :
-		 *  - better RTT estimation and ACK scheduling
-		 *  - faster recovery
-		 *  - high rates
-		 * Alas, some drivers / subsystems require a fair amount
-		 * of queued bytes to ensure line rate.
-		 * One example is wifi aggregation (802.11 AMPDU)
-		 */
-		limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
-		limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
-
-		if (atomic_read(&sk->sk_wmem_alloc) > limit) {
-			set_bit(TSQ_THROTTLED, &tp->tsq_flags);
-			/* It is possible TX completion already happened
-			 * before we set TSQ_THROTTLED, so we must
-			 * test again the condition.
-			 */
-			smp_mb__after_atomic();
-			if (atomic_read(&sk->sk_wmem_alloc) > limit)
-				break;
-		}
+		if (tcp_small_queue_check(sk, skb, 0))
+			break;
 
 		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
 			break;
@@ -2605,7 +2645,8 @@
 	 * copying overhead: fragmentation, tunneling, mangling etc.
 	 */
 	if (atomic_read(&sk->sk_wmem_alloc) >
-	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
+	    min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
+		  sk->sk_sndbuf))
 		return -EAGAIN;
 
 	if (skb_still_in_host_queue(sk, skb))
@@ -2774,9 +2815,9 @@
 		last_lost = tp->snd_una;
 	}
 
-	max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
+	max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
 	tcp_for_write_queue_from(skb, sk) {
-		__u8 sacked = TCP_SKB_CB(skb)->sacked;
+		__u8 sacked;
 		int segs;
 
 		if (skb == tcp_send_head(sk))
@@ -2788,6 +2829,7 @@
 		segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
 		if (segs <= 0)
 			return;
+		sacked = TCP_SKB_CB(skb)->sacked;
 		/* In case tcp_shift_skb_data() have aggregated large skbs,
 		 * we need to make sure not sending too bigs TSO packets
 		 */
@@ -2827,10 +2869,13 @@
 		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
 			continue;
 
+		if (tcp_small_queue_check(sk, skb, 1))
+			return;
+
 		if (tcp_retransmit_skb(sk, skb, segs))
 			return;
 
-		NET_INC_STATS(sock_net(sk), mib_idx);
+		NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
 
 		if (tcp_in_cwnd_reduction(sk))
 			tp->prr_out += tcp_skb_pcount(skb);
@@ -3567,6 +3612,8 @@
 	if (!res) {
 		__TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+		if (unlikely(tcp_passive_fastopen(sk)))
+			tcp_sk(sk)->total_retrans++;
 	}
 	return res;
 }
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
new file mode 100644
index 0000000..9be1581
--- /dev/null
+++ b/net/ipv4/tcp_rate.c
@@ -0,0 +1,186 @@
+#include <net/tcp.h>
+
+/* The bandwidth estimator estimates the rate at which the network
+ * can currently deliver outbound data packets for this flow. At a high
+ * level, it operates by taking a delivery rate sample for each ACK.
+ *
+ * A rate sample records the rate at which the network delivered packets
+ * for this flow, calculated over the time interval between the transmission
+ * of a data packet and the acknowledgment of that packet.
+ *
+ * Specifically, over the interval between each transmit and corresponding ACK,
+ * the estimator generates a delivery rate sample. Typically it uses the rate
+ * at which packets were acknowledged. However, the approach of using only the
+ * acknowledgment rate faces a challenge under the prevalent ACK decimation or
+ * compression: packets can temporarily appear to be delivered much quicker
+ * than the bottleneck rate. Since it is physically impossible to do that in a
+ * sustained fashion, when the estimator notices that the ACK rate is faster
+ * than the transmit rate, it uses the latter:
+ *
+ *    send_rate = #pkts_delivered/(last_snd_time - first_snd_time)
+ *    ack_rate  = #pkts_delivered/(last_ack_time - first_ack_time)
+ *    bw = min(send_rate, ack_rate)
+ *
+ * Notice the estimator essentially estimates the goodput, not always the
+ * network bottleneck link rate when the sending or receiving is limited by
+ * other factors like applications or receiver window limits.  The estimator
+ * deliberately avoids using the inter-packet spacing approach because that
+ * approach requires a large number of samples and sophisticated filtering.
+ *
+ * TCP flows can often be application-limited in request/response workloads.
+ * The estimator marks a bandwidth sample as application-limited if there
+ * was some moment during the sampled window of packets when there was no data
+ * ready to send in the write queue.
+ */
+
+/* Snapshot the current delivery information in the skb, to generate
+ * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered().
+ */
+void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	 /* In general we need to start delivery rate samples from the
+	  * time we received the most recent ACK, to ensure we include
+	  * the full time the network needs to deliver all in-flight
+	  * packets. If there are no packets in flight yet, then we
+	  * know that any ACKs after now indicate that the network was
+	  * able to deliver those packets completely in the sampling
+	  * interval between now and the next ACK.
+	  *
+	  * Note that we use packets_out instead of tcp_packets_in_flight(tp)
+	  * because the latter is a guess based on RTO and loss-marking
+	  * heuristics. We don't want spurious RTOs or loss markings to cause
+	  * a spuriously small time interval, causing a spuriously high
+	  * bandwidth estimate.
+	  */
+	if (!tp->packets_out) {
+		tp->first_tx_mstamp  = skb->skb_mstamp;
+		tp->delivered_mstamp = skb->skb_mstamp;
+	}
+
+	TCP_SKB_CB(skb)->tx.first_tx_mstamp	= tp->first_tx_mstamp;
+	TCP_SKB_CB(skb)->tx.delivered_mstamp	= tp->delivered_mstamp;
+	TCP_SKB_CB(skb)->tx.delivered		= tp->delivered;
+	TCP_SKB_CB(skb)->tx.is_app_limited	= tp->app_limited ? 1 : 0;
+}
+
+/* When an skb is sacked or acked, we fill in the rate sample with the (prior)
+ * delivery information when the skb was last transmitted.
+ *
+ * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is
+ * called multiple times. We favor the information from the most recently
+ * sent skb, i.e., the skb with the highest prior_delivered count.
+ */
+void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb,
+			    struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+
+	if (!scb->tx.delivered_mstamp.v64)
+		return;
+
+	if (!rs->prior_delivered ||
+	    after(scb->tx.delivered, rs->prior_delivered)) {
+		rs->prior_delivered  = scb->tx.delivered;
+		rs->prior_mstamp     = scb->tx.delivered_mstamp;
+		rs->is_app_limited   = scb->tx.is_app_limited;
+		rs->is_retrans	     = scb->sacked & TCPCB_RETRANS;
+
+		/* Find the duration of the "send phase" of this window: */
+		rs->interval_us      = skb_mstamp_us_delta(
+						&skb->skb_mstamp,
+						&scb->tx.first_tx_mstamp);
+
+		/* Record send time of most recently ACKed packet: */
+		tp->first_tx_mstamp  = skb->skb_mstamp;
+	}
+	/* Mark off the skb delivered once it's sacked to avoid being
+	 * used again when it's cumulatively acked. For acked packets
+	 * we don't need to reset since it'll be freed soon.
+	 */
+	if (scb->sacked & TCPCB_SACKED_ACKED)
+		scb->tx.delivered_mstamp.v64 = 0;
+}
+
+/* Update the connection delivery information and generate a rate sample. */
+void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
+		  struct skb_mstamp *now, struct rate_sample *rs)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 snd_us, ack_us;
+
+	/* Clear app limited if bubble is acked and gone. */
+	if (tp->app_limited && after(tp->delivered, tp->app_limited))
+		tp->app_limited = 0;
+
+	/* TODO: there are multiple places throughout tcp_ack() to get
+	 * current time. Refactor the code using a new "tcp_acktag_state"
+	 * to carry current time, flags, stats like "tcp_sacktag_state".
+	 */
+	if (delivered)
+		tp->delivered_mstamp = *now;
+
+	rs->acked_sacked = delivered;	/* freshly ACKed or SACKed */
+	rs->losses = lost;		/* freshly marked lost */
+	/* Return an invalid sample if no timing information is available. */
+	if (!rs->prior_mstamp.v64) {
+		rs->delivered = -1;
+		rs->interval_us = -1;
+		return;
+	}
+	rs->delivered   = tp->delivered - rs->prior_delivered;
+
+	/* Model sending data and receiving ACKs as separate pipeline phases
+	 * for a window. Usually the ACK phase is longer, but with ACK
+	 * compression the send phase can be longer. To be safe we use the
+	 * longer phase.
+	 */
+	snd_us = rs->interval_us;				/* send phase */
+	ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp);	/* ack phase */
+	rs->interval_us = max(snd_us, ack_us);
+
+	/* Normally we expect interval_us >= min-rtt.
+	 * Note that rate may still be over-estimated when a spuriously
+	 * retransmistted skb was first (s)acked because "interval_us"
+	 * is under-estimated (up to an RTT). However continuously
+	 * measuring the delivery rate during loss recovery is crucial
+	 * for connections suffer heavy or prolonged losses.
+	 */
+	if (unlikely(rs->interval_us < tcp_min_rtt(tp))) {
+		if (!rs->is_retrans)
+			pr_debug("tcp rate: %ld %d %u %u %u\n",
+				 rs->interval_us, rs->delivered,
+				 inet_csk(sk)->icsk_ca_state,
+				 tp->rx_opt.sack_ok, tcp_min_rtt(tp));
+		rs->interval_us = -1;
+		return;
+	}
+
+	/* Record the last non-app-limited or the highest app-limited bw */
+	if (!rs->is_app_limited ||
+	    ((u64)rs->delivered * tp->rate_interval_us >=
+	     (u64)tp->rate_delivered * rs->interval_us)) {
+		tp->rate_delivered = rs->delivered;
+		tp->rate_interval_us = rs->interval_us;
+		tp->rate_app_limited = rs->is_app_limited;
+	}
+}
+
+/* If a gap is detected between sends, mark the socket application-limited. */
+void tcp_rate_check_app_limited(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (/* We have less than one packet to send. */
+	    tp->write_seq - tp->snd_nxt < tp->mss_cache &&
+	    /* Nothing in sending host's qdisc queues or NIC tx queue. */
+	    sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) &&
+	    /* We are not limited by CWND. */
+	    tcp_packets_in_flight(tp) < tp->snd_cwnd &&
+	    /* All lost packets have been retransmitted. */
+	    tp->lost_out <= tp->retrans_out)
+		tp->app_limited =
+			(tp->delivered + tcp_packets_in_flight(tp)) ? : 1;
+}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index d84930b..3ea1cf8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -192,6 +192,8 @@
 			if (tp->syn_data && icsk->icsk_retransmits == 1)
 				NET_INC_STATS(sock_net(sk),
 					      LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+		} else if (!tp->syn_data && !tp->syn_fastopen) {
+			sk_rethink_txhash(sk);
 		}
 		retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
 		syn_set = true;
@@ -213,6 +215,8 @@
 			tcp_mtu_probing(icsk, sk);
 
 			dst_negative_advice(sk);
+		} else {
+			sk_rethink_txhash(sk);
 		}
 
 		retry_until = net->ipv4.sysctl_tcp_retries2;
@@ -384,6 +388,7 @@
 	 */
 	inet_rtx_syn_ack(sk, req);
 	req->num_timeout++;
+	icsk->icsk_retransmits++;
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 			  TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
 }
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 028eb04..9c5fc97 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -76,7 +76,7 @@
 	if (!tcp_is_cwnd_limited(sk))
 		return;
 
-	if (tp->snd_cwnd <= tp->snd_ssthresh)
+	if (tcp_in_slow_start(tp))
 		tcp_slow_start(tp, acked);
 
 	else if (!yeah->doing_reno_now) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index e61f7cd..7d96dc2 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,6 +114,7 @@
 #include <net/busy_poll.h>
 #include "udp_impl.h"
 #include <net/sock_reuseport.h>
+#include <net/addrconf.h>
 
 struct udp_table udp_table __read_mostly;
 EXPORT_SYMBOL(udp_table);
@@ -1020,12 +1021,6 @@
 				   flow_flags,
 				   faddr, saddr, dport, inet->inet_sport);
 
-		if (!saddr && ipc.oif) {
-			err = l3mdev_get_saddr(net, ipc.oif, fl4);
-			if (err < 0)
-				goto out;
-		}
-
 		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
 		rt = ip_route_output_flow(net, fl4, sk);
 		if (IS_ERR(rt)) {
@@ -1182,13 +1177,13 @@
  *	@sk: socket
  *
  *	Drops all bad checksum frames, until a valid one is found.
- *	Returns the length of found skb, or 0 if none is found.
+ *	Returns the length of found skb, or -1 if none is found.
  */
-static unsigned int first_packet_length(struct sock *sk)
+static int first_packet_length(struct sock *sk)
 {
 	struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
 	struct sk_buff *skb;
-	unsigned int res;
+	int res;
 
 	__skb_queue_head_init(&list_kill);
 
@@ -1203,7 +1198,7 @@
 		__skb_unlink(skb, rcvq);
 		__skb_queue_tail(&list_kill, skb);
 	}
-	res = skb ? skb->len : 0;
+	res = skb ? skb->len : -1;
 	spin_unlock_bh(&rcvq->lock);
 
 	if (!skb_queue_empty(&list_kill)) {
@@ -1232,7 +1227,7 @@
 
 	case SIOCINQ:
 	{
-		unsigned int amount = first_packet_length(sk);
+		int amount = max_t(int, 0, first_packet_length(sk));
 
 		return put_user(amount, (int __user *)arg);
 	}
@@ -2184,7 +2179,7 @@
 
 	/* Check for false positives due to checksum errors */
 	if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
-	    !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
+	    !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
 		mask &= ~(POLLIN | POLLRDNORM);
 
 	return mask;
@@ -2192,6 +2187,20 @@
 }
 EXPORT_SYMBOL(udp_poll);
 
+int udp_abort(struct sock *sk, int err)
+{
+	lock_sock(sk);
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+	udp_disconnect(sk, 0);
+
+	release_sock(sk);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(udp_abort);
+
 struct proto udp_prot = {
 	.name		   = "UDP",
 	.owner		   = THIS_MODULE,
@@ -2216,13 +2225,12 @@
 	.sysctl_wmem	   = &sysctl_udp_wmem_min,
 	.sysctl_rmem	   = &sysctl_udp_rmem_min,
 	.obj_size	   = sizeof(struct udp_sock),
-	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.h.udp_table	   = &udp_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
-	.clear_sk	   = sk_prot_clear_portaddr_nulls,
+	.diag_destroy	   = udp_abort,
 };
 EXPORT_SYMBOL(udp_prot);
 
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index 3d5ccf4..9a89c10 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -20,7 +20,7 @@
 static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
 			struct netlink_callback *cb,
 			const struct inet_diag_req_v2 *req,
-			struct nlattr *bc)
+			struct nlattr *bc, bool net_admin)
 {
 	if (!inet_diag_bc_sk(bc, sk))
 		return 0;
@@ -28,7 +28,7 @@
 	return inet_sk_diag_fill(sk, NULL, skb, req,
 			sk_user_ns(NETLINK_CB(cb->skb).sk),
 			NETLINK_CB(cb->skb).portid,
-			cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
+			cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin);
 }
 
 static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
@@ -76,7 +76,8 @@
 	err = inet_sk_diag_fill(sk, NULL, rep, req,
 			   sk_user_ns(NETLINK_CB(in_skb).sk),
 			   NETLINK_CB(in_skb).portid,
-			   nlh->nlmsg_seq, 0, nlh);
+			   nlh->nlmsg_seq, 0, nlh,
+			   netlink_net_capable(in_skb, CAP_NET_ADMIN));
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
 		kfree_skb(rep);
@@ -97,6 +98,7 @@
 		     struct netlink_callback *cb,
 		     const struct inet_diag_req_v2 *r, struct nlattr *bc)
 {
+	bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
 	struct net *net = sock_net(skb->sk);
 	int num, s_num, slot, s_slot;
 
@@ -132,7 +134,7 @@
 			    r->id.idiag_dport)
 				goto next;
 
-			if (sk_diag_dump(sk, skb, cb, r, bc) < 0) {
+			if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) {
 				spin_unlock_bh(&hslot->lock);
 				goto done;
 			}
@@ -165,12 +167,88 @@
 	r->idiag_wqueue = sk_wmem_alloc_get(sk);
 }
 
+#ifdef CONFIG_INET_DIAG_DESTROY
+static int __udp_diag_destroy(struct sk_buff *in_skb,
+			      const struct inet_diag_req_v2 *req,
+			      struct udp_table *tbl)
+{
+	struct net *net = sock_net(in_skb->sk);
+	struct sock *sk;
+	int err;
+
+	rcu_read_lock();
+
+	if (req->sdiag_family == AF_INET)
+		sk = __udp4_lib_lookup(net,
+				req->id.idiag_dst[0], req->id.idiag_dport,
+				req->id.idiag_src[0], req->id.idiag_sport,
+				req->id.idiag_if, tbl, NULL);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (req->sdiag_family == AF_INET6) {
+		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
+		    ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
+			sk = __udp4_lib_lookup(net,
+					req->id.idiag_dst[3], req->id.idiag_dport,
+					req->id.idiag_src[3], req->id.idiag_sport,
+					req->id.idiag_if, tbl, NULL);
+
+		else
+			sk = __udp6_lib_lookup(net,
+					(struct in6_addr *)req->id.idiag_dst,
+					req->id.idiag_dport,
+					(struct in6_addr *)req->id.idiag_src,
+					req->id.idiag_sport,
+					req->id.idiag_if, tbl, NULL);
+	}
+#endif
+	else {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+		sk = NULL;
+
+	rcu_read_unlock();
+
+	if (!sk)
+		return -ENOENT;
+
+	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
+		sock_put(sk);
+		return -ENOENT;
+	}
+
+	err = sock_diag_destroy(sk, ECONNABORTED);
+
+	sock_put(sk);
+
+	return err;
+}
+
+static int udp_diag_destroy(struct sk_buff *in_skb,
+			    const struct inet_diag_req_v2 *req)
+{
+	return __udp_diag_destroy(in_skb, req, &udp_table);
+}
+
+static int udplite_diag_destroy(struct sk_buff *in_skb,
+				const struct inet_diag_req_v2 *req)
+{
+	return __udp_diag_destroy(in_skb, req, &udplite_table);
+}
+
+#endif
+
 static const struct inet_diag_handler udp_diag_handler = {
 	.dump		 = udp_diag_dump,
 	.dump_one	 = udp_diag_dump_one,
 	.idiag_get_info  = udp_diag_get_info,
 	.idiag_type	 = IPPROTO_UDP,
 	.idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy	 = udp_diag_destroy,
+#endif
 };
 
 static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
@@ -192,6 +270,9 @@
 	.idiag_get_info  = udp_diag_get_info,
 	.idiag_type	 = IPPROTO_UDPLITE,
 	.idiag_info_size = 0,
+#ifdef CONFIG_INET_DIAG_DESTROY
+	.destroy	 = udplite_diag_destroy,
+#endif
 };
 
 static int __init udp_diag_init(void)
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 81f253b..f9333c9 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -21,7 +21,7 @@
 	__be16 new_protocol, bool is_ipv6)
 {
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
-	bool remcsum, need_csum, offload_csum, ufo;
+	bool remcsum, need_csum, offload_csum, ufo, gso_partial;
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct udphdr *uh = udp_hdr(skb);
 	u16 mac_offset = skb->mac_header;
@@ -88,6 +88,8 @@
 		goto out;
 	}
 
+	gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
+
 	outer_hlen = skb_tnl_header_len(skb);
 	udp_offset = outer_hlen - tnl_hlen;
 	skb = segs;
@@ -117,7 +119,7 @@
 		 * will be using a length value equal to only one MSS sized
 		 * segment instead of the entire frame.
 		 */
-		if (skb_is_gso(skb)) {
+		if (gso_partial) {
 			uh->len = htons(skb_shinfo(skb)->gso_size +
 					SKB_GSO_CB(skb)->data_offset +
 					skb->head - (unsigned char *)uh);
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 3b3efbd..af81715 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -55,13 +55,11 @@
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v4_get_port,
 	.obj_size	   = sizeof(struct udp_sock),
-	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.h.udp_table	   = &udplite_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udp_setsockopt,
 	.compat_getsockopt = compat_udp_getsockopt,
 #endif
-	.clear_sk	   = sk_prot_clear_portaddr_nulls,
 };
 EXPORT_SYMBOL(udplite_prot);
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index b644a23..6a7ff69 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -29,7 +29,7 @@
 	memset(fl4, 0, sizeof(*fl4));
 	fl4->daddr = daddr->a4;
 	fl4->flowi4_tos = tos;
-	fl4->flowi4_oif = oif;
+	fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif);
 	if (saddr)
 		fl4->saddr = saddr->a4;
 
@@ -112,7 +112,7 @@
 	int oif = 0;
 
 	if (skb_dst(skb))
-		oif = l3mdev_fib_oif(skb_dst(skb)->dev);
+		oif = skb_dst(skb)->dev->ifindex;
 
 	memset(fl4, 0, sizeof(struct flowi4));
 	fl4->flowi4_mark = skb->mark;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab3e796..cbd9343 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -112,6 +112,27 @@
 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
 }
 
+static inline s32 rfc3315_s14_backoff_init(s32 irt)
+{
+	/* multiply 'initial retransmission time' by 0.9 .. 1.1 */
+	u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt;
+	do_div(tmp, 1000000);
+	return (s32)tmp;
+}
+
+static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt)
+{
+	/* multiply 'retransmission timeout' by 1.9 .. 2.1 */
+	u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt;
+	do_div(tmp, 1000000);
+	if ((s32)tmp > mrt) {
+		/* multiply 'maximum retransmission time' by 0.9 .. 1.1 */
+		tmp = (900000 + prandom_u32() % 200001) * (u64)mrt;
+		do_div(tmp, 1000000);
+	}
+	return (s32)tmp;
+}
+
 #ifdef CONFIG_SYSCTL
 static int addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -187,6 +208,7 @@
 	.dad_transmits		= 1,
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
+	.rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
 	.use_tempaddr		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
@@ -232,6 +254,7 @@
 	.dad_transmits		= 1,
 	.rtr_solicits		= MAX_RTR_SOLICITATIONS,
 	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,
+	.rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL,
 	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY,
 	.use_tempaddr		= 0,
 	.temp_valid_lft		= TEMP_VALID_LIFETIME,
@@ -778,7 +801,14 @@
 	}
 
 	if (p == &net->ipv6.devconf_all->forwarding) {
+		int old_dflt = net->ipv6.devconf_dflt->forwarding;
+
 		net->ipv6.devconf_dflt->forwarding = newf;
+		if ((!newf) ^ (!old_dflt))
+			inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
+						     NETCONFA_IFINDEX_DEFAULT,
+						     net->ipv6.devconf_dflt);
+
 		addrconf_forward_change(net, newf);
 		if ((!newf) ^ (!old))
 			inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING,
@@ -1872,7 +1902,6 @@
 
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
-	struct in6_addr addr;
 	struct inet6_dev *idev = ifp->idev;
 	struct net *net = dev_net(ifp->idev->dev);
 
@@ -1934,18 +1963,6 @@
 		in6_ifa_put(ifp2);
 lock_errdad:
 		spin_lock_bh(&ifp->lock);
-	} else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
-		addr.s6_addr32[0] = htonl(0xfe800000);
-		addr.s6_addr32[1] = 0;
-
-		if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
-		    ipv6_addr_equal(&ifp->addr, &addr)) {
-			/* DAD failed for link-local based on MAC address */
-			idev->cnf.disable_ipv6 = 1;
-
-			pr_info("%s: IPv6 being disabled!\n",
-				ifp->idev->dev->name);
-		}
 	}
 
 errdad:
@@ -1954,6 +1971,7 @@
 	spin_unlock_bh(&ifp->lock);
 
 	addrconf_mod_dad_work(ifp, 0);
+	in6_ifa_put(ifp);
 }
 
 /* Join to solicited addr multicast group.
@@ -3543,7 +3561,7 @@
 	/* combine the user config with event to determine if permanent
 	 * addresses are to be removed from address hash table
 	 */
-	keep_addr = !(how || _keep_addr <= 0);
+	keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6);
 
 	/* Step 2: clear hash table */
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
@@ -3599,7 +3617,7 @@
 	/* re-combine the user config with event to determine if permanent
 	 * addresses are to be removed from the interface list
 	 */
-	keep_addr = (!how && _keep_addr > 0);
+	keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
 
 	INIT_LIST_HEAD(&del_list);
 	list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
@@ -3692,7 +3710,7 @@
 	if (idev->if_flags & IF_RA_RCVD)
 		goto out;
 
-	if (idev->rs_probes++ < idev->cnf.rtr_solicits) {
+	if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) {
 		write_unlock(&idev->lock);
 		if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE))
 			ndisc_send_rs(dev, &lladdr,
@@ -3701,11 +3719,13 @@
 			goto put;
 
 		write_lock(&idev->lock);
+		idev->rs_interval = rfc3315_s14_backoff_update(
+			idev->rs_interval, idev->cnf.rtr_solicit_max_interval);
 		/* The wait after the last probe can be shorter */
 		addrconf_mod_rs_timer(idev, (idev->rs_probes ==
 					     idev->cnf.rtr_solicits) ?
 				      idev->cnf.rtr_solicit_delay :
-				      idev->cnf.rtr_solicit_interval);
+				      idev->rs_interval);
 	} else {
 		/*
 		 * Note: we do not support deprecated "all on-link"
@@ -3821,6 +3841,7 @@
 						dad_work);
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
+	bool disable_ipv6 = false;
 
 	enum {
 		DAD_PROCESS,
@@ -3837,6 +3858,24 @@
 	} else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
 		action = DAD_ABORT;
 		ifp->state = INET6_IFADDR_STATE_POSTDAD;
+
+		if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 &&
+		    !(ifp->flags & IFA_F_STABLE_PRIVACY)) {
+			struct in6_addr addr;
+
+			addr.s6_addr32[0] = htonl(0xfe800000);
+			addr.s6_addr32[1] = 0;
+
+			if (!ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) &&
+			    ipv6_addr_equal(&ifp->addr, &addr)) {
+				/* DAD failed for link-local based on MAC */
+				idev->cnf.disable_ipv6 = 1;
+
+				pr_info("%s: IPv6 being disabled!\n",
+					ifp->idev->dev->name);
+				disable_ipv6 = true;
+			}
+		}
 	}
 	spin_unlock_bh(&ifp->lock);
 
@@ -3844,7 +3883,10 @@
 		addrconf_dad_begin(ifp);
 		goto out;
 	} else if (action == DAD_ABORT) {
+		in6_ifa_hold(ifp);
 		addrconf_dad_stop(ifp, 1);
+		if (disable_ipv6)
+			addrconf_ifdown(idev->dev, 0);
 		goto out;
 	}
 
@@ -3932,7 +3974,7 @@
 	send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);
 	send_rs = send_mld &&
 		  ipv6_accept_ra(ifp->idev) &&
-		  ifp->idev->cnf.rtr_solicits > 0 &&
+		  ifp->idev->cnf.rtr_solicits != 0 &&
 		  (dev->flags&IFF_LOOPBACK) == 0;
 	read_unlock_bh(&ifp->idev->lock);
 
@@ -3954,10 +3996,11 @@
 
 		write_lock_bh(&ifp->idev->lock);
 		spin_lock(&ifp->lock);
+		ifp->idev->rs_interval = rfc3315_s14_backoff_init(
+			ifp->idev->cnf.rtr_solicit_interval);
 		ifp->idev->rs_probes = 1;
 		ifp->idev->if_flags |= IF_RS_SENT;
-		addrconf_mod_rs_timer(ifp->idev,
-				      ifp->idev->cnf.rtr_solicit_interval);
+		addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval);
 		spin_unlock(&ifp->lock);
 		write_unlock_bh(&ifp->idev->lock);
 	}
@@ -4874,6 +4917,8 @@
 	array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
 	array[DEVCONF_RTR_SOLICIT_INTERVAL] =
 		jiffies_to_msecs(cnf->rtr_solicit_interval);
+	array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] =
+		jiffies_to_msecs(cnf->rtr_solicit_max_interval);
 	array[DEVCONF_RTR_SOLICIT_DELAY] =
 		jiffies_to_msecs(cnf->rtr_solicit_delay);
 	array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
@@ -4944,18 +4989,18 @@
 }
 
 static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,
-				      int items, int bytes)
+					int bytes)
 {
 	int i;
-	int pad = bytes - sizeof(u64) * items;
+	int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX;
 	BUG_ON(pad < 0);
 
 	/* Use put_unaligned() because stats may not be aligned for u64. */
-	put_unaligned(items, &stats[0]);
-	for (i = 1; i < items; i++)
+	put_unaligned(ICMP6_MIB_MAX, &stats[0]);
+	for (i = 1; i < ICMP6_MIB_MAX; i++)
 		put_unaligned(atomic_long_read(&mib[i]), &stats[i]);
 
-	memset(&stats[items], 0, pad);
+	memset(&stats[ICMP6_MIB_MAX], 0, pad);
 }
 
 static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,
@@ -4988,7 +5033,7 @@
 				     offsetof(struct ipstats_mib, syncp));
 		break;
 	case IFLA_INET6_ICMP6STATS:
-		__snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes);
+		__snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes);
 		break;
 	}
 }
@@ -5082,7 +5127,7 @@
 		return -EINVAL;
 	if (!ipv6_accept_ra(idev))
 		return -EINVAL;
-	if (idev->cnf.rtr_solicits <= 0)
+	if (idev->cnf.rtr_solicits == 0)
 		return -EINVAL;
 
 	write_lock_bh(&idev->lock);
@@ -5111,8 +5156,10 @@
 
 	if (update_rs) {
 		idev->if_flags |= IF_RS_SENT;
+		idev->rs_interval = rfc3315_s14_backoff_init(
+			idev->cnf.rtr_solicit_interval);
 		idev->rs_probes = 1;
-		addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval);
+		addrconf_mod_rs_timer(idev, idev->rs_interval);
 	}
 
 	/* Well, that's kinda nasty ... */
@@ -5450,20 +5497,6 @@
 }
 
 static
-int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write,
-                              void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	struct ctl_table lctl;
-	int min_hl = 1, max_hl = 255;
-
-	lctl = *ctl;
-	lctl.extra1 = &min_hl;
-	lctl.extra2 = &max_hl;
-
-	return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos);
-}
-
-static
 int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -5696,6 +5729,9 @@
 	return ret;
 }
 
+static const int one = 1;
+static const int two_five_five = 255;
+
 static const struct ctl_table addrconf_sysctl[] = {
 	{
 		.procname	= "forwarding",
@@ -5709,7 +5745,9 @@
 		.data		= &ipv6_devconf.hop_limit,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= addrconf_sysctl_hop_limit,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&one,
+		.extra2		= (void *)&two_five_five,
 	},
 	{
 		.procname	= "mtu",
@@ -5761,6 +5799,13 @@
 		.proc_handler	= proc_dointvec_jiffies,
 	},
 	{
+		.procname	= "router_solicitation_max_interval",
+		.data		= &ipv6_devconf.rtr_solicit_max_interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
 		.procname	= "router_solicitation_delay",
 		.data		= &ipv6_devconf.rtr_solicit_delay,
 		.maxlen		= sizeof(int),
@@ -6017,7 +6062,7 @@
 static int __addrconf_sysctl_register(struct net *net, char *dev_name,
 		struct inet6_dev *idev, struct ipv6_devconf *p)
 {
-	int i;
+	int i, ifindex;
 	struct ctl_table *table;
 	char path[sizeof("net/ipv6/conf/") + IFNAMSIZ];
 
@@ -6027,8 +6072,14 @@
 
 	for (i = 0; table[i].data; i++) {
 		table[i].data += (char *)p - (char *)&ipv6_devconf;
-		table[i].extra1 = idev; /* embedded; no ref */
-		table[i].extra2 = net;
+		/* If one of these is already set, then it is not safe to
+		 * overwrite either of them: this makes proc_dointvec_minmax
+		 * usable.
+		 */
+		if (!table[i].extra1 && !table[i].extra2) {
+			table[i].extra1 = idev; /* embedded; no ref */
+			table[i].extra2 = net;
+		}
 	}
 
 	snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name);
@@ -6037,6 +6088,13 @@
 	if (!p->sysctl_header)
 		goto free;
 
+	if (!strcmp(dev_name, "all"))
+		ifindex = NETCONFA_IFINDEX_ALL;
+	else if (!strcmp(dev_name, "default"))
+		ifindex = NETCONFA_IFINDEX_DEFAULT;
+	else
+		ifindex = idev->dev->ifindex;
+	inet6_netconf_notify_devconf(net, NETCONFA_ALL, ifindex, p);
 	return 0;
 
 free:
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b454055..46ad699 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -545,6 +545,8 @@
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 	.splice_read	   = tcp_splice_read,
+	.read_sock	   = tcp_read_sock,
+	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index c53b92c..37ac9de 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -952,8 +952,10 @@
 		memcpy(new, hop, start);
 	ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def,
 				 secattr);
-	if (ret_val < 0)
+	if (ret_val < 0) {
+		kfree(new);
 		return ERR_PTR(ret_val);
+	}
 
 	buf_len = start + ret_val;
 	/* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 5857c1f..eea23b5 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -38,6 +38,9 @@
 		.flags = FIB_LOOKUP_NOREF,
 	};
 
+	/* update flow if oif or iif point to device enslaved to l3mdev */
+	l3mdev_update_flow(net, flowi6_to_flowi(fl6));
+
 	fib_rules_lookup(net->ipv6.fib6_rules_ops,
 			 flowi6_to_flowi(fl6), flags, &arg);
 
diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c
index ec9efbc..aba0998 100644
--- a/net/ipv6/ila/ila_common.c
+++ b/net/ipv6/ila/ila_common.c
@@ -172,6 +172,5 @@
 
 module_init(ila_init);
 module_exit(ila_fini);
-MODULE_ALIAS_RTNL_LWT(ILA);
 MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
 MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c
index c8314c6..e50c27a 100644
--- a/net/ipv6/ila/ila_lwt.c
+++ b/net/ipv6/ila/ila_lwt.c
@@ -51,7 +51,7 @@
 	return -EINVAL;
 }
 
-static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
 	[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
 	[ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, },
 };
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index e6eca5f..e604013 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -128,7 +128,7 @@
 	.parallel_ops	= true,
 };
 
-static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
 	[ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
 	[ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, },
 	[ILA_ATTR_IFINDEX] = { .type = NLA_U32, },
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 771be1f..ef54852 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -743,6 +743,7 @@
 		   (info->nlh->nlmsg_flags & NLM_F_CREATE));
 	int found = 0;
 	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+	u16 nlflags = NLM_F_EXCL;
 	int err;
 
 	ins = &fn->leaf;
@@ -759,6 +760,8 @@
 			if (info->nlh &&
 			    (info->nlh->nlmsg_flags & NLM_F_EXCL))
 				return -EEXIST;
+
+			nlflags &= ~NLM_F_EXCL;
 			if (replace) {
 				if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
 					found++;
@@ -856,6 +859,7 @@
 			pr_warn("NLM_F_CREATE should be set when creating new route\n");
 
 add:
+		nlflags |= NLM_F_CREATE;
 		err = fib6_commit_metrics(&rt->dst, mxc);
 		if (err)
 			return err;
@@ -864,7 +868,7 @@
 		*ins = rt;
 		rt->rt6i_node = fn;
 		atomic_inc(&rt->rt6i_ref);
-		inet6_rt_notify(RTM_NEWROUTE, rt, info, 0);
+		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
 		info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
 
 		if (!(fn->fn_flags & RTN_RTINFO)) {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 776d145..d7d6d3a 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -61,12 +61,12 @@
 module_param(log_ecn_error, bool, 0644);
 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_GRE_HASH_SIZE_SHIFT  5
+#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT)
 
 static int ip6gre_net_id __read_mostly;
 struct ip6gre_net {
-	struct ip6_tnl __rcu *tunnels[4][HASH_SIZE];
+	struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
 
 	struct net_device *fb_tunnel_dev;
 };
@@ -96,12 +96,12 @@
    will match fallback tunnel.
  */
 
-#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1))
+#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1))
 static u32 HASH_ADDR(const struct in6_addr *addr)
 {
 	u32 hash = ipv6_addr_hash(addr);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT);
 }
 
 #define tunnels_r_l	tunnels[3]
@@ -519,8 +519,6 @@
 	gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
 			 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
 
-	skb_set_inner_protocol(skb, protocol);
-
 	return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
 			    NEXTHDR_GRE);
 }
@@ -650,7 +648,6 @@
 		encap_limit = t->parms.encap_limit;
 
 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-	fl6.flowi6_proto = skb->protocol;
 
 	err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
 	if (err)
@@ -1089,7 +1086,7 @@
 
 	for (prio = 0; prio < 4; prio++) {
 		int h;
-		for (h = 0; h < HASH_SIZE; h++) {
+		for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
 			struct ip6_tnl *t;
 
 			t = rtnl_dereference(ign->tunnels[prio][h]);
@@ -1241,7 +1238,7 @@
 		parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]);
 
 	if (data[IFLA_GRE_FLOWINFO])
-		parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]);
+		parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]);
 
 	if (data[IFLA_GRE_FLAGS])
 		parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 22e90e5..e7bfd55 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -69,6 +69,7 @@
 	int offset = 0;
 	bool encap, udpfrag;
 	int nhoff;
+	bool gso_partial;
 
 	skb_reset_network_header(skb);
 	nhoff = skb_network_header(skb) - skb_mac_header(skb);
@@ -101,9 +102,11 @@
 	if (IS_ERR(segs))
 		goto out;
 
+	gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL);
+
 	for (skb = segs; skb; skb = skb->next) {
 		ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
-		if (skb_is_gso(skb))
+		if (gso_partial)
 			payload_len = skb_shinfo(skb)->gso_size +
 				      SKB_GSO_CB(skb)->data_offset +
 				      skb->head - (unsigned char *)(ipv6h + 1);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1dfc402..6001e78 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,6 +56,7 @@
 #include <net/checksum.h>
 #include <linux/mroute6.h>
 #include <net/l3mdev.h>
+#include <net/lwtunnel.h>
 
 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
@@ -104,6 +105,13 @@
 		}
 	}
 
+	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+		int res = lwtunnel_xmit(skb);
+
+		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+			return res;
+	}
+
 	rcu_read_lock_bh();
 	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
@@ -228,6 +236,14 @@
 	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
+
+		/* if egress device is enslaved to an L3 master device pass the
+		 * skb to its handler for processing
+		 */
+		skb = l3mdev_ip6_out((struct sock *)sk, skb);
+		if (unlikely(!skb))
+			return 0;
+
 		/* hooks should never assume socket lock is held.
 		 * we promote our socket to non const
 		 */
@@ -910,13 +926,6 @@
 	int err;
 	int flags = 0;
 
-	if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif &&
-	    (!*dst || !(*dst)->error)) {
-		err = l3mdev_get_saddr6(net, sk, fl6);
-		if (err)
-			goto out_err;
-	}
-
 	/* The correct way to handle this would be to do
 	 * ip6_route_get_saddr, and then ip6_route_output; however,
 	 * the route-specific preferred source forces the
@@ -1008,7 +1017,7 @@
 out_err_release:
 	dst_release(*dst);
 	*dst = NULL;
-out_err:
+
 	if (err == -ENETUNREACH)
 		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
 	return err;
@@ -1054,8 +1063,6 @@
 		return ERR_PTR(err);
 	if (final_dst)
 		fl6->daddr = *final_dst;
-	if (!fl6->flowi6_oif)
-		fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
 
 	return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
 }
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 7b0481e..6a66adb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -57,6 +57,7 @@
 #include <net/inet_ecn.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <net/dst_metadata.h>
 
 MODULE_AUTHOR("Ville Nuorvala");
 MODULE_DESCRIPTION("IPv6 tunneling device");
@@ -64,8 +65,8 @@
 MODULE_ALIAS_RTNL_LINK("ip6tnl");
 MODULE_ALIAS_NETDEV("ip6tnl0");
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_TUNNEL_HASH_SIZE_SHIFT  5
+#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT)
 
 static bool log_ecn_error = true;
 module_param(log_ecn_error, bool, 0644);
@@ -75,7 +76,7 @@
 {
 	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
 }
 
 static int ip6_tnl_dev_init(struct net_device *dev);
@@ -87,9 +88,10 @@
 	/* the IPv6 tunnel fallback device */
 	struct net_device *fb_tnl_dev;
 	/* lists for storing tunnels in use */
-	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+	struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE];
 	struct ip6_tnl __rcu *tnls_wc[1];
 	struct ip6_tnl __rcu **tnls[2];
+	struct ip6_tnl __rcu *collect_md_tun;
 };
 
 static struct net_device_stats *ip6_get_stats(struct net_device *dev)
@@ -166,6 +168,10 @@
 			return t;
 	}
 
+	t = rcu_dereference(ip6n->collect_md_tun);
+	if (t)
+		return t;
+
 	t = rcu_dereference(ip6n->tnls_wc[0]);
 	if (t && (t->dev->flags & IFF_UP))
 		return t;
@@ -209,6 +215,8 @@
 {
 	struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
 
+	if (t->parms.collect_md)
+		rcu_assign_pointer(ip6n->collect_md_tun, t);
 	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
 	rcu_assign_pointer(*tp, t);
 }
@@ -224,6 +232,9 @@
 	struct ip6_tnl __rcu **tp;
 	struct ip6_tnl *iter;
 
+	if (t->parms.collect_md)
+		rcu_assign_pointer(ip6n->collect_md_tun, NULL);
+
 	for (tp = ip6_tnl_bucket(ip6n, &t->parms);
 	     (iter = rtnl_dereference(*tp)) != NULL;
 	     tp = &iter->next) {
@@ -829,6 +840,9 @@
 
 	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
 
+	if (tun_dst)
+		skb_dst_set(skb, (struct dst_entry *)tun_dst);
+
 	gro_cells_receive(&tunnel->gro_cells, skb);
 	return 0;
 
@@ -865,6 +879,7 @@
 {
 	struct ip6_tnl *t;
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+	struct metadata_dst *tun_dst = NULL;
 	int ret = -1;
 
 	rcu_read_lock();
@@ -881,7 +896,12 @@
 			goto drop;
 		if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 			goto drop;
-		ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate,
+		if (t->parms.collect_md) {
+			tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0);
+			if (!tun_dst)
+				return 0;
+		}
+		ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate,
 				    log_ecn_error);
 	}
 
@@ -1012,8 +1032,16 @@
 	int mtu;
 	unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
 	unsigned int max_headroom = psh_hlen;
+	u8 hop_limit;
 	int err = -1;
 
+	if (t->parms.collect_md) {
+		hop_limit = skb_tunnel_info(skb)->key.ttl;
+		goto route_lookup;
+	} else {
+		hop_limit = t->parms.hop_limit;
+	}
+
 	/* NBMA tunnel */
 	if (ipv6_addr_any(&t->parms.raddr)) {
 		struct in6_addr *addr6;
@@ -1043,6 +1071,7 @@
 		goto tx_err_link_failure;
 
 	if (!dst) {
+route_lookup:
 		dst = ip6_route_output(net, NULL, fl6);
 
 		if (dst->error)
@@ -1053,6 +1082,10 @@
 			dst = NULL;
 			goto tx_err_link_failure;
 		}
+		if (t->parms.collect_md &&
+		    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
+				       &fl6->daddr, 0, &fl6->saddr))
+			goto tx_err_link_failure;
 		ndst = dst;
 	}
 
@@ -1071,7 +1104,7 @@
 	}
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
-	if (skb_dst(skb))
+	if (skb_dst(skb) && !t->parms.collect_md)
 		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
 	if (skb->len > mtu && !skb_is_gso(skb)) {
 		*pmtu = mtu;
@@ -1111,8 +1144,13 @@
 		skb = new_skb;
 	}
 
-	if (!fl6->flowi6_mark && ndst)
-		dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+	if (t->parms.collect_md) {
+		if (t->encap.type != TUNNEL_ENCAP_NONE)
+			goto tx_err_dst_release;
+	} else {
+		if (!fl6->flowi6_mark && ndst)
+			dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr);
+	}
 	skb_dst_set(skb, dst);
 
 	if (encap_limit >= 0) {
@@ -1137,7 +1175,7 @@
 	ipv6h = ipv6_hdr(skb);
 	ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
-	ipv6h->hop_limit = t->parms.hop_limit;
+	ipv6h->hop_limit = hop_limit;
 	ipv6h->nexthdr = proto;
 	ipv6h->saddr = fl6->saddr;
 	ipv6h->daddr = fl6->daddr;
@@ -1170,18 +1208,34 @@
 	if (tproto != IPPROTO_IPIP && tproto != 0)
 		return -1;
 
-	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
-		encap_limit = t->parms.encap_limit;
-
-	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
 	dsfield = ipv4_get_dsfield(iph);
 
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
-					  & IPV6_TCLASS_MASK;
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
-		fl6.flowi6_mark = skb->mark;
+	if (t->parms.collect_md) {
+		struct ip_tunnel_info *tun_info;
+		const struct ip_tunnel_key *key;
+
+		tun_info = skb_tunnel_info(skb);
+		if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+			     ip_tunnel_info_af(tun_info) != AF_INET6))
+			return -1;
+		key = &tun_info->key;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_IPIP;
+		fl6.daddr = key->u.ipv6.dst;
+		fl6.flowlabel = key->label;
+	} else {
+		if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+			encap_limit = t->parms.encap_limit;
+
+		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_IPIP;
+
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+			fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
+					 & IPV6_TCLASS_MASK;
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+			fl6.flowi6_mark = skb->mark;
+	}
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
@@ -1219,28 +1273,47 @@
 	    ip6_tnl_addr_conflict(t, ipv6h))
 		return -1;
 
-	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
-	if (offset > 0) {
-		struct ipv6_tlv_tnl_enc_lim *tel;
-		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
-		if (tel->encap_limit == 0) {
-			icmpv6_send(skb, ICMPV6_PARAMPROB,
-				    ICMPV6_HDR_FIELD, offset + 2);
-			return -1;
-		}
-		encap_limit = tel->encap_limit - 1;
-	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
-		encap_limit = t->parms.encap_limit;
-
-	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
 	dsfield = ipv6_get_dsfield(ipv6h);
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
-		fl6.flowlabel |= ip6_flowlabel(ipv6h);
-	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
-		fl6.flowi6_mark = skb->mark;
+
+	if (t->parms.collect_md) {
+		struct ip_tunnel_info *tun_info;
+		const struct ip_tunnel_key *key;
+
+		tun_info = skb_tunnel_info(skb);
+		if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+			     ip_tunnel_info_af(tun_info) != AF_INET6))
+			return -1;
+		key = &tun_info->key;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_IPV6;
+		fl6.daddr = key->u.ipv6.dst;
+		fl6.flowlabel = key->label;
+	} else {
+		offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+		if (offset > 0) {
+			struct ipv6_tlv_tnl_enc_lim *tel;
+
+			tel = (void *)&skb_network_header(skb)[offset];
+			if (tel->encap_limit == 0) {
+				icmpv6_send(skb, ICMPV6_PARAMPROB,
+					    ICMPV6_HDR_FIELD, offset + 2);
+				return -1;
+			}
+			encap_limit = tel->encap_limit - 1;
+		} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+			encap_limit = t->parms.encap_limit;
+		}
+
+		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_IPV6;
+
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+			fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK);
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+			fl6.flowlabel |= ip6_flowlabel(ipv6h);
+		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+			fl6.flowi6_mark = skb->mark;
+	}
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
@@ -1739,6 +1812,10 @@
 	if (err)
 		return err;
 	ip6_tnl_link_config(t);
+	if (t->parms.collect_md) {
+		dev->features |= NETIF_F_NETNS_LOCAL;
+		netif_keep_dst(dev);
+	}
 	return 0;
 }
 
@@ -1809,6 +1886,9 @@
 
 	if (data[IFLA_IPTUN_PROTO])
 		parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
+
+	if (data[IFLA_IPTUN_COLLECT_METADATA])
+		parms->collect_md = true;
 }
 
 static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
@@ -1848,6 +1928,7 @@
 			   struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net *net = dev_net(dev);
+	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 	struct ip6_tnl *nt, *t;
 	struct ip_tunnel_encap ipencap;
 
@@ -1862,9 +1943,14 @@
 
 	ip6_tnl_netlink_parms(data, &nt->parms);
 
-	t = ip6_tnl_locate(net, &nt->parms, 0);
-	if (!IS_ERR(t))
-		return -EEXIST;
+	if (nt->parms.collect_md) {
+		if (rtnl_dereference(ip6n->collect_md_tun))
+			return -EEXIST;
+	} else {
+		t = ip6_tnl_locate(net, &nt->parms, 0);
+		if (!IS_ERR(t))
+			return -EEXIST;
+	}
 
 	return ip6_tnl_create2(dev);
 }
@@ -1888,6 +1974,8 @@
 			return err;
 	}
 	ip6_tnl_netlink_parms(data, &p);
+	if (p.collect_md)
+		return -EINVAL;
 
 	t = ip6_tnl_locate(net, &p, 0);
 	if (!IS_ERR(t)) {
@@ -1935,6 +2023,8 @@
 		nla_total_size(2) +
 		/* IFLA_IPTUN_ENCAP_DPORT */
 		nla_total_size(2) +
+		/* IFLA_IPTUN_COLLECT_METADATA */
+		nla_total_size(0) +
 		0;
 }
 
@@ -1953,16 +2043,15 @@
 	    nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto))
 		goto nla_put_failure;
 
-	if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
-			tunnel->encap.type) ||
-	nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
-		     tunnel->encap.sport) ||
-	nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
-		     tunnel->encap.dport) ||
-	nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
-		    tunnel->encap.flags))
+	if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
+	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
+	    nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
+	    nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags))
 		goto nla_put_failure;
 
+	if (parm->collect_md)
+		if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
+			goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
@@ -1990,6 +2079,7 @@
 	[IFLA_IPTUN_ENCAP_FLAGS]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_SPORT]	= { .type = NLA_U16 },
 	[IFLA_IPTUN_ENCAP_DPORT]	= { .type = NLA_U16 },
+	[IFLA_IPTUN_COLLECT_METADATA]	= { .type = NLA_FLAG },
 };
 
 static struct rtnl_link_ops ip6_link_ops __read_mostly = {
@@ -2031,7 +2121,7 @@
 		if (dev->rtnl_link_ops == &ip6_link_ops)
 			unregister_netdevice_queue(dev, &list);
 
-	for (h = 0; h < HASH_SIZE; h++) {
+	for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
 		t = rtnl_dereference(ip6n->tnls_r_l[h]);
 		while (t) {
 			/* If dev is in the same netns, it has already
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index d90a11f..8a02ca8 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -50,14 +50,14 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
-#define HASH_SIZE_SHIFT  5
-#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+#define IP6_VTI_HASH_SIZE_SHIFT  5
+#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
 
 static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
 {
 	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
 
-	return hash_32(hash, HASH_SIZE_SHIFT);
+	return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT);
 }
 
 static int vti6_dev_init(struct net_device *dev);
@@ -69,7 +69,7 @@
 	/* the vti6 tunnel fallback device */
 	struct net_device *fb_tnl_dev;
 	/* lists for storing tunnels in use */
-	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+	struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE];
 	struct ip6_tnl __rcu *tnls_wc[1];
 	struct ip6_tnl __rcu **tnls[2];
 };
@@ -321,11 +321,9 @@
 			goto discard;
 		}
 
-		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
-
 		rcu_read_unlock();
 
-		return xfrm6_rcv(skb);
+		return xfrm6_rcv_tnl(skb, t);
 	}
 	rcu_read_unlock();
 	return -EINVAL;
@@ -340,6 +338,7 @@
 	struct net_device *dev;
 	struct pcpu_sw_netstats *tstats;
 	struct xfrm_state *x;
+	struct xfrm_mode *inner_mode;
 	struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
 	u32 orig_mark = skb->mark;
 	int ret;
@@ -357,7 +356,19 @@
 	}
 
 	x = xfrm_input_state(skb);
-	family = x->inner_mode->afinfo->family;
+
+	inner_mode = x->inner_mode;
+
+	if (x->sel.family == AF_UNSPEC) {
+		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
+		if (inner_mode == NULL) {
+			XFRM_INC_STATS(dev_net(skb->dev),
+				       LINUX_MIB_XFRMINSTATEMODEERROR);
+			return -EINVAL;
+		}
+	}
+
+	family = inner_mode->afinfo->family;
 
 	skb->mark = be32_to_cpu(t->parms.i_key);
 	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
@@ -1040,7 +1051,7 @@
 	struct ip6_tnl *t;
 	LIST_HEAD(list);
 
-	for (h = 0; h < HASH_SIZE; h++) {
+	for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
 		t = rtnl_dereference(ip6n->tnls_r_l[h]);
 		while (t) {
 			unregister_netdevice_queue(t->dev, &list);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 6122f9c..7f4265b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2239,6 +2239,7 @@
 	struct rta_mfc_stats mfcs;
 	struct nlattr *mp_attr;
 	struct rtnexthop *nhp;
+	unsigned long lastuse;
 	int ct;
 
 	/* If cache is unresolved, don't try to parse IIF and OIF */
@@ -2269,12 +2270,14 @@
 
 	nla_nest_end(skb, mp_attr);
 
+	lastuse = READ_ONCE(c->mfc_un.res.lastuse);
+	lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
+
 	mfcs.mfcs_packets = c->mfc_un.res.pkt;
 	mfcs.mfcs_bytes = c->mfc_un.res.bytes;
 	mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
 	if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
-	    nla_put_u64_64bit(skb, RTA_EXPIRES,
-			      jiffies_to_clock_t(c->mfc_un.res.lastuse),
+	    nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
 			      RTA_PAD))
 		return -EMSGSIZE;
 
@@ -2282,8 +2285,8 @@
 	return 1;
 }
 
-int ip6mr_get_route(struct net *net,
-		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
+		    int nowait, u32 portid)
 {
 	int err;
 	struct mr6_table *mrt;
@@ -2328,6 +2331,7 @@
 			return -ENOMEM;
 		}
 
+		NETLINK_CB(skb2).portid = portid;
 		skb_reset_transport_header(skb2);
 
 		skb_put(skb2, sizeof(struct ipv6hdr));
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index d64ee7e..75c1fc5 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1739,6 +1739,15 @@
 			continue;
 		}
 
+		/* Based on RFC3810 6.1. Should not send source-list change
+		 * records when there is a filter mode change.
+		 */
+		if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) ||
+		     (!gdeleted && pmc->mca_crcount)) &&
+		    (type == MLD2_ALLOW_NEW_SOURCES ||
+		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount)
+			goto decrease_sf_crcount;
+
 		/* clear marks on query responses */
 		if (isquery)
 			psf->sf_gsresp = 0;
@@ -1766,6 +1775,7 @@
 		scount++; stotal++;
 		if ((type == MLD2_ALLOW_NEW_SOURCES ||
 		     type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+decrease_sf_crcount:
 			psf->sf_crcount--;
 			if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
 				if (psf_prev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index fe65cdc..d8e6714 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -67,7 +67,6 @@
 #include <net/flow.h>
 #include <net/ip6_checksum.h>
 #include <net/inet_common.h>
-#include <net/l3mdev.h>
 #include <linux/proc_fs.h>
 
 #include <linux/netfilter.h>
@@ -457,11 +456,9 @@
 
 	if (!dst) {
 		struct flowi6 fl6;
-		int oif = l3mdev_fib_oif(skb->dev);
+		int oif = skb->dev->ifindex;
 
 		icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
-		if (oif != skb->dev->ifindex)
-			fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
 		dst = icmp6_dst_alloc(skb->dev, &fl6);
 		if (IS_ERR(dst)) {
 			kfree_skb(skb);
@@ -1538,7 +1535,6 @@
 	int rd_len;
 	u8 ha_buf[MAX_ADDR_LEN], *ha = NULL,
 	   ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
-	int oif = l3mdev_fib_oif(dev);
 	bool ret;
 
 	if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1555,10 +1551,7 @@
 	}
 
 	icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
-			 &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
-
-	if (oif != skb->dev->ifindex)
-		fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
+			 &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
 
 	dst = ip6_route_output(net, NULL, &fl6);
 	if (dst->error) {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 552fac2..55aacea 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -190,7 +190,7 @@
 	.u = {
 		.log = {
 			.level = LOGLEVEL_WARNING,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 1aa5848..963ee38 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -115,7 +115,7 @@
 	help = nfct_help(ct);
 	if (!help)
 		return NF_ACCEPT;
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	helper = rcu_dereference(help->helper);
 	if (!helper)
 		return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 660bc10..f5a61bc 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -165,7 +165,7 @@
 		return -NF_ACCEPT;
 	}
 
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
 
 	/* Ordinarily, we'd expect the inverted tupleproto, but it's
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index 8dd8696..57d8606 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -30,7 +30,7 @@
 	.u = {
 		.log = {
 			.level	  = LOGLEVEL_NOTICE,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 		},
 	},
 };
@@ -52,7 +52,7 @@
 	if (info->type == NF_LOG_TYPE_LOG)
 		logflags = info->u.log.logflags;
 	else
-		logflags = NF_LOG_MASK;
+		logflags = NF_LOG_DEFAULT_MASK;
 
 	ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
 	if (ih == NULL) {
@@ -84,7 +84,7 @@
 		}
 
 		/* Max length: 48 "OPT (...) " */
-		if (logflags & XT_LOG_IPOPT)
+		if (logflags & NF_LOG_IPOPT)
 			nf_log_buf_add(m, "OPT ( ");
 
 		switch (currenthdr) {
@@ -121,7 +121,7 @@
 		case IPPROTO_ROUTING:
 		case IPPROTO_HOPOPTS:
 			if (fragment) {
-				if (logflags & XT_LOG_IPOPT)
+				if (logflags & NF_LOG_IPOPT)
 					nf_log_buf_add(m, ")");
 				return;
 			}
@@ -129,7 +129,7 @@
 			break;
 		/* Max Length */
 		case IPPROTO_AH:
-			if (logflags & XT_LOG_IPOPT) {
+			if (logflags & NF_LOG_IPOPT) {
 				struct ip_auth_hdr _ahdr;
 				const struct ip_auth_hdr *ah;
 
@@ -161,7 +161,7 @@
 			hdrlen = (hp->hdrlen+2)<<2;
 			break;
 		case IPPROTO_ESP:
-			if (logflags & XT_LOG_IPOPT) {
+			if (logflags & NF_LOG_IPOPT) {
 				struct ip_esp_hdr _esph;
 				const struct ip_esp_hdr *eh;
 
@@ -194,7 +194,7 @@
 			nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
 			return;
 		}
-		if (logflags & XT_LOG_IPOPT)
+		if (logflags & NF_LOG_IPOPT)
 			nf_log_buf_add(m, ") ");
 
 		currenthdr = hp->nexthdr;
@@ -277,7 +277,7 @@
 	}
 
 	/* Max length: 15 "UID=4294967295 " */
-	if ((logflags & XT_LOG_UID) && recurse)
+	if ((logflags & NF_LOG_UID) && recurse)
 		nf_log_dump_sk_uid_gid(m, skb->sk);
 
 	/* Max length: 16 "MARK=0xFFFFFFFF " */
@@ -295,7 +295,7 @@
 	if (info->type == NF_LOG_TYPE_LOG)
 		logflags = info->u.log.logflags;
 
-	if (!(logflags & XT_LOG_MACDECODE))
+	if (!(logflags & NF_LOG_MACDECODE))
 		goto fallback;
 
 	switch (dev->type) {
@@ -379,8 +379,7 @@
 
 static int __net_init nf_log_ipv6_net_init(struct net *net)
 {
-	nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
-	return 0;
+	return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger);
 }
 
 static void __net_exit nf_log_ipv6_net_exit(struct net *net)
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 30b22f4..d6e4ba5 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,9 +22,7 @@
 {
 	struct nft_pktinfo pkt;
 
-	/* malformed packet, drop it */
-	if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
-		return NF_DROP;
+	nft_set_pktinfo_ipv6(&pkt, skb, state);
 
 	return nft_do_chain(&pkt, priv);
 }
@@ -102,7 +100,10 @@
 {
 	int ret;
 
-	nft_register_chain_type(&filter_ipv6);
+	ret = nft_register_chain_type(&filter_ipv6);
+	if (ret < 0)
+		return ret;
+
 	ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
 	if (ret < 0)
 		nft_unregister_chain_type(&filter_ipv6);
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 71d995f..f272747 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -31,10 +31,9 @@
 	struct in6_addr saddr, daddr;
 	u_int8_t hop_limit;
 	u32 mark, flowlabel;
+	int err;
 
-	/* malformed packet, drop it */
-	if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
-		return NF_DROP;
+	nft_set_pktinfo_ipv6(&pkt, skb, state);
 
 	/* save source/dest address, mark, hoplimit, flowlabel, priority */
 	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
@@ -46,13 +45,16 @@
 	flowlabel = *((u32 *)ipv6_hdr(skb));
 
 	ret = nft_do_chain(&pkt, priv);
-	if (ret != NF_DROP && ret != NF_QUEUE &&
+	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
 	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
 	     skb->mark != mark ||
 	     ipv6_hdr(skb)->hop_limit != hop_limit ||
-	     flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
-		return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
+	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
+		err = ip6_route_me_harder(state->net, skb);
+		if (err < 0)
+			ret = NF_DROP_ERR(err);
+	}
 
 	return ret;
 }
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index 533cd57..92bda99 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -47,6 +47,7 @@
 	.eval		= nft_reject_ipv6_eval,
 	.init		= nft_reject_init,
 	.dump		= nft_reject_dump,
+	.validate	= nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 462f2a76b..7cca8ac 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -148,6 +148,13 @@
 	ipv6_hdr(skb)->payload_len = htons(len);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
+	/* if egress device is enslaved to an L3 master device pass the
+	 * skb to its handler for processing
+	 */
+	skb = l3mdev_ip6_out(sk, skb);
+	if (unlikely(!skb))
+		return 0;
+
 	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		       net, sk, skb, NULL, skb_dst(skb)->dev,
 		       dst_output);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index fed40d1..0e983b6 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -55,7 +55,7 @@
 	struct icmp6hdr user_icmph;
 	int addr_type;
 	struct in6_addr *daddr;
-	int iif = 0;
+	int oif = 0;
 	struct flowi6 fl6;
 	int err;
 	struct dst_entry *dst;
@@ -78,25 +78,30 @@
 		if (u->sin6_family != AF_INET6) {
 			return -EAFNOSUPPORT;
 		}
-		if (sk->sk_bound_dev_if &&
-		    sk->sk_bound_dev_if != u->sin6_scope_id) {
-			return -EINVAL;
-		}
 		daddr = &(u->sin6_addr);
-		iif = u->sin6_scope_id;
+		if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr)))
+			oif = u->sin6_scope_id;
 	} else {
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -EDESTADDRREQ;
 		daddr = &sk->sk_v6_daddr;
 	}
 
-	if (!iif)
-		iif = sk->sk_bound_dev_if;
+	if (!oif)
+		oif = sk->sk_bound_dev_if;
+
+	if (!oif)
+		oif = np->sticky_pktinfo.ipi6_ifindex;
+
+	if (!oif && ipv6_addr_is_multicast(daddr))
+		oif = np->mcast_oif;
+	else if (!oif)
+		oif = np->ucast_oif;
 
 	addr_type = ipv6_addr_type(daddr);
-	if (__ipv6_addr_needs_scope_id(addr_type) && !iif)
-		return -EINVAL;
-	if (addr_type & IPV6_ADDR_MAPPED)
+	if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
+	    (addr_type & IPV6_ADDR_MAPPED) ||
+	    (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
 		return -EINVAL;
 
 	/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
@@ -106,16 +111,12 @@
 	fl6.flowi6_proto = IPPROTO_ICMPV6;
 	fl6.saddr = np->saddr;
 	fl6.daddr = *daddr;
+	fl6.flowi6_oif = oif;
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_icmp_type = user_icmph.icmp6_type;
 	fl6.fl6_icmp_code = user_icmph.icmp6_code;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
-		fl6.flowi6_oif = np->mcast_oif;
-	else if (!fl6.flowi6_oif)
-		fl6.flowi6_oif = np->ucast_oif;
-
 	ipc6.tclass = np->tclass;
 	fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
 
@@ -125,8 +126,10 @@
 	rt = (struct rt6_info *) dst;
 
 	np = inet6_sk(sk);
-	if (!np)
-		return -EBADF;
+	if (!np) {
+		err = -EBADF;
+		goto dst_err_out;
+	}
 
 	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
 		fl6.flowi6_oif = np->mcast_oif;
@@ -162,6 +165,9 @@
 	}
 	release_sock(sk);
 
+dst_err_out:
+	dst_release(dst);
+
 	if (err)
 		return err;
 
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 679253d0..cc8e3ae 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -30,6 +30,11 @@
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 
+#define MAX4(a, b, c, d) \
+	max_t(u32, max_t(u32, a, b), max_t(u32, c, d))
+#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \
+			IPSTATS_MIB_MAX, ICMP_MIB_MAX)
+
 static int sockstat6_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq->private;
@@ -191,25 +196,34 @@
 				atomic_long_t *smib,
 				const struct snmp_mib *itemlist)
 {
+	unsigned long buff[SNMP_MIB_MAX];
 	int i;
-	unsigned long val;
 
-	for (i = 0; itemlist[i].name; i++) {
-		val = pcpumib ?
-			snmp_fold_field(pcpumib, itemlist[i].entry) :
-			atomic_long_read(smib + itemlist[i].entry);
-		seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val);
+	if (pcpumib) {
+		memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+
+		snmp_get_cpu_field_batch(buff, itemlist, pcpumib);
+		for (i = 0; itemlist[i].name; i++)
+			seq_printf(seq, "%-32s\t%lu\n",
+				   itemlist[i].name, buff[i]);
+	} else {
+		for (i = 0; itemlist[i].name; i++)
+			seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name,
+				   atomic_long_read(smib + itemlist[i].entry));
 	}
 }
 
 static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,
 				  const struct snmp_mib *itemlist, size_t syncpoff)
 {
+	u64 buff64[SNMP_MIB_MAX];
 	int i;
 
+	memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX);
+
+	snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff);
 	for (i = 0; itemlist[i].name; i++)
-		seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name,
-			   snmp_fold_field64(mib, itemlist[i].entry, syncpoff));
+		seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]);
 }
 
 static int snmp6_seq_show(struct seq_file *seq, void *v)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 590dd1f..54404f0 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -653,6 +653,13 @@
 	if (err)
 		goto error_fault;
 
+	/* if egress device is enslaved to an L3 master device pass the
+	 * skb to its handler for processing
+	 */
+	skb = l3mdev_ip6_out(sk, skb);
+	if (unlikely(!skb))
+		return 0;
+
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
 		      NULL, rt->dst.dev, dst_output);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4981755..bdbc38e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1147,15 +1147,16 @@
 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 }
 
-static struct dst_entry *ip6_route_input_lookup(struct net *net,
-						struct net_device *dev,
-						struct flowi6 *fl6, int flags)
+struct dst_entry *ip6_route_input_lookup(struct net *net,
+					 struct net_device *dev,
+					 struct flowi6 *fl6, int flags)
 {
 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
 		flags |= RT6_LOOKUP_F_IFACE;
 
 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
 }
+EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
 
 void ip6_route_input(struct sk_buff *skb)
 {
@@ -1164,7 +1165,7 @@
 	int flags = RT6_LOOKUP_F_HAS_SADDR;
 	struct ip_tunnel_info *tun_info;
 	struct flowi6 fl6 = {
-		.flowi6_iif = l3mdev_fib_oif(skb->dev),
+		.flowi6_iif = skb->dev->ifindex,
 		.daddr = iph->daddr,
 		.saddr = iph->saddr,
 		.flowlabel = ip6_flowinfo(iph),
@@ -1188,12 +1189,15 @@
 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
 					 struct flowi6 *fl6, int flags)
 {
-	struct dst_entry *dst;
 	bool any_src;
 
-	dst = l3mdev_get_rt6_dst(net, fl6);
-	if (dst)
-		return dst;
+	if (rt6_need_strict(&fl6->daddr)) {
+		struct dst_entry *dst;
+
+		dst = l3mdev_link_scope_lookup(net, fl6);
+		if (dst)
+			return dst;
+	}
 
 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
 
@@ -1604,7 +1608,9 @@
 	rcu_read_unlock();
 
 out:
-	return min_t(unsigned int, mtu, IP6_MAX_MTU);
+	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
 }
 
 static struct dst_entry *icmp6_dst_gc_list;
@@ -1986,9 +1992,18 @@
 			if (!(gwa_type & IPV6_ADDR_UNICAST))
 				goto out;
 
-			if (cfg->fc_table)
+			if (cfg->fc_table) {
 				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
 
+				if (grt) {
+					if (grt->rt6i_flags & RTF_GATEWAY ||
+					    (dev && dev != grt->dst.dev)) {
+						ip6_rt_put(grt);
+						grt = NULL;
+					}
+				}
+			}
+
 			if (!grt)
 				grt = rt6_lookup(net, gw_addr, NULL,
 						 cfg->fc_ifindex, 1);
@@ -2556,8 +2571,16 @@
 {
 	u32 tb_id;
 	struct net *net = dev_net(idev->dev);
-	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
-					    DST_NOCOUNT);
+	struct net_device *dev = net->loopback_dev;
+	struct rt6_info *rt;
+
+	/* use L3 Master device as loopback for host routes if device
+	 * is enslaved and address is not link local or multicast
+	 */
+	if (!rt6_need_strict(addr))
+		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+
+	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
 	if (!rt)
 		return ERR_PTR(-ENOMEM);
 
@@ -3193,7 +3216,9 @@
 	if (iif) {
 #ifdef CONFIG_IPV6_MROUTE
 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
-			int err = ip6mr_get_route(net, skb, rtm, nowait);
+			int err = ip6mr_get_route(net, skb, rtm, nowait,
+						  portid);
+
 			if (err <= 0) {
 				if (!nowait) {
 					if (err == 0)
@@ -3336,11 +3361,6 @@
 	} else {
 		fl6.flowi6_oif = oif;
 
-		if (netif_index_is_l3_master(net, oif)) {
-			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
-					   FLOWI_FLAG_SKIP_NH_OIF;
-		}
-
 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
 	}
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 182b6a9..b1cdf80 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -62,7 +62,7 @@
    For comments look at net/ipv4/ip_gre.c --ANK
  */
 
-#define HASH_SIZE  16
+#define IP6_SIT_HASH_SIZE  16
 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 
 static bool log_ecn_error = true;
@@ -78,9 +78,9 @@
 
 static int sit_net_id __read_mostly;
 struct sit_net {
-	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
-	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE];
+	struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE];
 	struct ip_tunnel __rcu *tunnels_wc[1];
 	struct ip_tunnel __rcu **tunnels[4];
 
@@ -1126,7 +1126,7 @@
 }
 #endif
 
-bool ipip6_valid_ip_proto(u8 ipproto)
+static bool ipip6_valid_ip_proto(u8 ipproto)
 {
 	return ipproto == IPPROTO_IPV6 ||
 		ipproto == IPPROTO_IPIP ||
@@ -1783,7 +1783,7 @@
 
 	for (prio = 1; prio < 4; prio++) {
 		int h;
-		for (h = 0; h < HASH_SIZE; h++) {
+		for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
 			struct ip_tunnel *t;
 
 			t = rtnl_dereference(sitn->tunnels[prio][h]);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 33df8b8..54cf719 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -671,6 +671,7 @@
 				      NULL, skb);
 
 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
 				     genhash ? "failed" : "mismatch",
 				     &ip6h->saddr, ntohs(th->source),
@@ -817,12 +818,8 @@
 	fl6.flowi6_proto = IPPROTO_TCP;
 	if (rt6_need_strict(&fl6.daddr) && !oif)
 		fl6.flowi6_oif = tcp_v6_iif(skb);
-	else {
-		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
-			oif = skb->skb_iif;
-
-		fl6.flowi6_oif = oif;
-	}
+	else
+		fl6.flowi6_oif = oif ? : skb->skb_iif;
 
 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);
 	fl6.fl6_dport = t1->dest;
@@ -944,9 +941,15 @@
 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
 	 */
+	/* RFC 7323 2.3
+	 * The window field (SEG.WND) of every outgoing segment, with the
+	 * exception of <SYN> segments, MUST be right-shifted by
+	 * Rcv.Wind.Shift bits:
+	 */
 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
-			tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
+			tcp_rsk(req)->rcv_nxt,
+			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
 			tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
 			0, 0);
@@ -1409,6 +1412,7 @@
 		sk = req->rsk_listener;
 		tcp_v6_fill_cb(skb, hdr, th);
 		if (tcp_v6_inbound_md5_hash(sk, skb)) {
+			sk_drops_add(sk, skb);
 			reqsk_put(req);
 			goto discard_it;
 		}
@@ -1465,10 +1469,7 @@
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
 			ret = tcp_v6_do_rcv(sk, skb);
-	} else if (unlikely(sk_add_backlog(sk, skb,
-					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
-		bh_unlock_sock(sk);
-		__NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
+	} else if (tcp_add_backlog(sk, skb)) {
 		goto discard_and_relse;
 	}
 	bh_unlock_sock(sk);
@@ -1862,17 +1863,6 @@
 }
 #endif
 
-static void tcp_v6_clear_sk(struct sock *sk, int size)
-{
-	struct inet_sock *inet = inet_sk(sk);
-
-	/* we do not want to clear pinet6 field, because of RCU lookups */
-	sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6));
-
-	size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
-	memset(&inet->pinet6 + 1, 0, size);
-}
-
 struct proto tcpv6_prot = {
 	.name			= "TCPv6",
 	.owner			= THIS_MODULE,
@@ -1914,7 +1904,6 @@
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
-	.clear_sk		= tcp_v6_clear_sk,
 	.diag_destroy		= tcp_abort,
 };
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 81e2f98..9aa7c1c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1424,17 +1424,6 @@
 }
 #endif /* CONFIG_PROC_FS */
 
-void udp_v6_clear_sk(struct sock *sk, int size)
-{
-	struct inet_sock *inet = inet_sk(sk);
-
-	/* we do not want to clear pinet6 field, because of RCU lookups */
-	sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6));
-
-	size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6);
-	memset(&inet->pinet6 + 1, 0, size);
-}
-
 /* ------------------------------------------------------------------------ */
 
 struct proto udpv6_prot = {
@@ -1460,13 +1449,12 @@
 	.sysctl_wmem	   = &sysctl_udp_wmem_min,
 	.sysctl_rmem	   = &sysctl_udp_rmem_min,
 	.obj_size	   = sizeof(struct udp6_sock),
-	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.h.udp_table	   = &udp_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udpv6_setsockopt,
 	.compat_getsockopt = compat_udpv6_getsockopt,
 #endif
-	.clear_sk	   = udp_v6_clear_sk,
+	.diag_destroy      = udp_abort,
 };
 
 static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 0682c03..f6eb1ab 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -29,8 +29,6 @@
 int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 void udpv6_destroy_sock(struct sock *sk);
 
-void udp_v6_clear_sk(struct sock *sk, int size);
-
 #ifdef CONFIG_PROC_FS
 int udp6_seq_show(struct seq_file *seq, void *v);
 #endif
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 9cf097e..47d0d2b 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -50,13 +50,11 @@
 	.unhash		   = udp_lib_unhash,
 	.get_port	   = udp_v6_get_port,
 	.obj_size	   = sizeof(struct udp6_sock),
-	.slab_flags	   = SLAB_DESTROY_BY_RCU,
 	.h.udp_table	   = &udplite_table,
 #ifdef CONFIG_COMPAT
 	.compat_setsockopt = compat_udpv6_setsockopt,
 	.compat_getsockopt = compat_udpv6_getsockopt,
 #endif
-	.clear_sk	   = udp_v6_clear_sk,
 };
 
 static struct inet_protosw udplite6_protosw = {
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 0eaab1f..b578956 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -21,8 +21,10 @@
 	return xfrm6_extract_header(skb);
 }
 
-int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
+int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
+		  struct ip6_tnl *t)
 {
+	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
 	XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
 	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
 	return xfrm_input(skb, nexthdr, spi, 0);
@@ -48,13 +50,18 @@
 	return -1;
 }
 
-int xfrm6_rcv(struct sk_buff *skb)
+int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
 {
 	return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
-			     0);
+			     0, t);
+}
+EXPORT_SYMBOL(xfrm6_rcv_tnl);
+
+int xfrm6_rcv(struct sk_buff *skb)
+{
+	return xfrm6_rcv_tnl(skb, NULL);
 }
 EXPORT_SYMBOL(xfrm6_rcv);
-
 int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 		     xfrm_address_t *saddr, u8 proto)
 {
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 6cc9700..e0f71c0 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -36,7 +36,7 @@
 	int err;
 
 	memset(&fl6, 0, sizeof(fl6));
-	fl6.flowi6_oif = oif;
+	fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif);
 	fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
 	memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
 	if (saddr)
@@ -134,7 +134,7 @@
 	nexthdr = nh[nhoff];
 
 	if (skb_dst(skb))
-		oif = l3mdev_fib_oif(skb_dst(skb)->dev);
+		oif = skb_dst(skb)->dev->ifindex;
 
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->flowi6_mark = skb->mark;
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 5743044..e1c0bbe 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -236,7 +236,7 @@
 	__be32 spi;
 
 	spi = xfrm6_tunnel_spi_lookup(net, (const xfrm_address_t *)&iph->saddr);
-	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi);
+	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL);
 }
 
 static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 8d2f7c9b..391c3cb 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -832,7 +832,7 @@
 	struct sock *sk = sock->sk;
 	struct irda_sock *new, *self = irda_sk(sk);
 	struct sock *newsk;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
 	int err;
 
 	err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
@@ -845,9 +845,6 @@
 	if (sock->state != SS_UNCONNECTED)
 		goto out;
 
-	if ((sk = sock->sk) == NULL)
-		goto out;
-
 	err = -EOPNOTSUPP;
 	if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
 	    (sk->sk_type != SOCK_DGRAM))
@@ -900,7 +897,6 @@
 	err = -EPERM; /* value does not seem to make sense. -arnd */
 	if (!new->tsap) {
 		pr_debug("%s(), dup failed!\n", __func__);
-		kfree_skb(skb);
 		goto out;
 	}
 
@@ -919,7 +915,6 @@
 	/* Clean up the original one to keep it in listen state */
 	irttp_listen(self->tsap);
 
-	kfree_skb(skb);
 	sk->sk_ack_backlog--;
 
 	newsock->state = SS_CONNECTED;
@@ -927,6 +922,7 @@
 	irda_connect_response(new);
 	err = 0;
 out:
+	kfree_skb(skb);
 	release_sock(sk);
 	return err;
 }
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 4a7ae32a..1138eaf 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -185,8 +185,12 @@
 
 	self->magic = IAS_MAGIC;
 	self->mode = mode;
-	if (mode == IAS_CLIENT)
-		iriap_register_lsap(self, slsap_sel, mode);
+	if (mode == IAS_CLIENT) {
+		if (iriap_register_lsap(self, slsap_sel, mode)) {
+			kfree(self);
+			return NULL;
+		}
+	}
 
 	self->confirm = callback;
 	self->priv = priv;
diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig
index 5db94d9..87fca36 100644
--- a/net/kcm/Kconfig
+++ b/net/kcm/Kconfig
@@ -3,6 +3,7 @@
 	tristate "KCM sockets"
 	depends on INET
 	select BPF_SYSCALL
+	select STREAM_PARSER
 	---help---
 	  KCM (Kernel Connection Multiplexor) sockets provide a method
 	  for multiplexing messages of a message based application
diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c
index 16c2e03..bf75c92 100644
--- a/net/kcm/kcmproc.c
+++ b/net/kcm/kcmproc.c
@@ -155,8 +155,8 @@
 	seq_printf(seq,
 		   "   psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ",
 		   psock->index,
-		   psock->stats.rx_msgs,
-		   psock->stats.rx_bytes,
+		   psock->strp.stats.rx_msgs,
+		   psock->strp.stats.rx_bytes,
 		   psock->stats.tx_msgs,
 		   psock->stats.tx_bytes,
 		   psock->sk->sk_receive_queue.qlen,
@@ -170,14 +170,27 @@
 	if (psock->tx_stopped)
 		seq_puts(seq, "TxStop ");
 
-	if (psock->rx_stopped)
+	if (psock->strp.rx_stopped)
 		seq_puts(seq, "RxStop ");
 
 	if (psock->tx_kcm)
 		seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index);
 
-	if (psock->ready_rx_msg)
-		seq_puts(seq, "RdyRx ");
+	if (!psock->strp.rx_paused && !psock->ready_rx_msg) {
+		if (psock->sk->sk_receive_queue.qlen) {
+			if (psock->strp.rx_need_bytes)
+				seq_printf(seq, "RxWait=%u ",
+					   psock->strp.rx_need_bytes);
+			else
+				seq_printf(seq, "RxWait ");
+		}
+	} else  {
+		if (psock->strp.rx_paused)
+			seq_puts(seq, "RxPause ");
+
+		if (psock->ready_rx_msg)
+			seq_puts(seq, "RdyRx ");
+	}
 
 	seq_puts(seq, "\n");
 }
@@ -275,6 +288,7 @@
 {
 	struct kcm_psock_stats psock_stats;
 	struct kcm_mux_stats mux_stats;
+	struct strp_aggr_stats strp_stats;
 	struct kcm_mux *mux;
 	struct kcm_psock *psock;
 	struct net *net = seq->private;
@@ -282,20 +296,28 @@
 
 	memset(&mux_stats, 0, sizeof(mux_stats));
 	memset(&psock_stats, 0, sizeof(psock_stats));
+	memset(&strp_stats, 0, sizeof(strp_stats));
 
 	mutex_lock(&knet->mutex);
 
 	aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats);
 	aggregate_psock_stats(&knet->aggregate_psock_stats,
 			      &psock_stats);
+	aggregate_strp_stats(&knet->aggregate_strp_stats,
+			     &strp_stats);
 
 	list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) {
 		spin_lock_bh(&mux->lock);
 		aggregate_mux_stats(&mux->stats, &mux_stats);
 		aggregate_psock_stats(&mux->aggregate_psock_stats,
 				      &psock_stats);
-		list_for_each_entry(psock, &mux->psocks, psock_list)
+		aggregate_strp_stats(&mux->aggregate_strp_stats,
+				     &strp_stats);
+		list_for_each_entry(psock, &mux->psocks, psock_list) {
 			aggregate_psock_stats(&psock->stats, &psock_stats);
+			save_strp_stats(&psock->strp, &strp_stats);
+		}
+
 		spin_unlock_bh(&mux->lock);
 	}
 
@@ -328,7 +350,7 @@
 		   mux_stats.rx_ready_drops);
 
 	seq_printf(seq,
-		   "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
+		   "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n",
 		   "Psock",
 		   "RX-Msgs",
 		   "RX-Bytes",
@@ -337,6 +359,8 @@
 		   "Reserved",
 		   "Unreserved",
 		   "RX-Aborts",
+		   "RX-Intr",
+		   "RX-Unrecov",
 		   "RX-MemFail",
 		   "RX-NeedMor",
 		   "RX-BadLen",
@@ -345,20 +369,22 @@
 		   "TX-Aborts");
 
 	seq_printf(seq,
-		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
+		   "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n",
 		   "",
-		   psock_stats.rx_msgs,
-		   psock_stats.rx_bytes,
+		   strp_stats.rx_msgs,
+		   strp_stats.rx_bytes,
 		   psock_stats.tx_msgs,
 		   psock_stats.tx_bytes,
 		   psock_stats.reserved,
 		   psock_stats.unreserved,
-		   psock_stats.rx_aborts,
-		   psock_stats.rx_mem_fail,
-		   psock_stats.rx_need_more_hdr,
-		   psock_stats.rx_bad_hdr_len,
-		   psock_stats.rx_msg_too_big,
-		   psock_stats.rx_msg_timeouts,
+		   strp_stats.rx_aborts,
+		   strp_stats.rx_interrupted,
+		   strp_stats.rx_unrecov_intr,
+		   strp_stats.rx_mem_fail,
+		   strp_stats.rx_need_more_hdr,
+		   strp_stats.rx_bad_hdr_len,
+		   strp_stats.rx_msg_too_big,
+		   strp_stats.rx_msg_timeouts,
 		   psock_stats.tx_aborts);
 
 	return 0;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index cb39e05..b7f869a8 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1,3 +1,13 @@
+/*
+ * Kernel Connection Multiplexor
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
 #include <linux/bpf.h>
 #include <linux/errno.h>
 #include <linux/errqueue.h>
@@ -13,10 +23,10 @@
 #include <linux/socket.h>
 #include <linux/uaccess.h>
 #include <linux/workqueue.h>
+#include <linux/syscalls.h>
 #include <net/kcm.h>
 #include <net/netns/generic.h>
 #include <net/sock.h>
-#include <net/tcp.h>
 #include <uapi/linux/kcm.h>
 
 unsigned int kcm_net_id;
@@ -35,38 +45,12 @@
 	return (struct kcm_tx_msg *)skb->cb;
 }
 
-static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb)
-{
-	return (struct kcm_rx_msg *)((void *)skb->cb +
-				     offsetof(struct qdisc_skb_cb, data));
-}
-
 static void report_csk_error(struct sock *csk, int err)
 {
 	csk->sk_err = EPIPE;
 	csk->sk_error_report(csk);
 }
 
-/* Callback lock held */
-static void kcm_abort_rx_psock(struct kcm_psock *psock, int err,
-			       struct sk_buff *skb)
-{
-	struct sock *csk = psock->sk;
-
-	/* Unrecoverable error in receive */
-
-	del_timer(&psock->rx_msg_timer);
-
-	if (psock->rx_stopped)
-		return;
-
-	psock->rx_stopped = 1;
-	KCM_STATS_INCR(psock->stats.rx_aborts);
-
-	/* Report an error on the lower socket */
-	report_csk_error(csk, err);
-}
-
 static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
 			       bool wakeup_kcm)
 {
@@ -109,12 +93,13 @@
 static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
 				    struct kcm_psock *psock)
 {
-	KCM_STATS_ADD(mux->stats.rx_bytes,
-		      psock->stats.rx_bytes - psock->saved_rx_bytes);
+	STRP_STATS_ADD(mux->stats.rx_bytes,
+		       psock->strp.stats.rx_bytes -
+		       psock->saved_rx_bytes);
 	mux->stats.rx_msgs +=
-		psock->stats.rx_msgs - psock->saved_rx_msgs;
-	psock->saved_rx_msgs = psock->stats.rx_msgs;
-	psock->saved_rx_bytes = psock->stats.rx_bytes;
+		psock->strp.stats.rx_msgs - psock->saved_rx_msgs;
+	psock->saved_rx_msgs = psock->strp.stats.rx_msgs;
+	psock->saved_rx_bytes = psock->strp.stats.rx_bytes;
 }
 
 static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
@@ -167,11 +152,11 @@
 		 */
 		list_del(&psock->psock_ready_list);
 		psock->ready_rx_msg = NULL;
-
 		/* Commit clearing of ready_rx_msg for queuing work */
 		smp_mb();
 
-		queue_work(kcm_wq, &psock->rx_work);
+		strp_unpause(&psock->strp);
+		strp_check_rcv(&psock->strp);
 	}
 
 	/* Buffer limit is okay now, add to ready list */
@@ -285,6 +270,7 @@
 
 	if (list_empty(&mux->kcm_rx_waiters)) {
 		psock->ready_rx_msg = head;
+		strp_pause(&psock->strp);
 		list_add_tail(&psock->psock_ready_list,
 			      &mux->psocks_ready);
 		spin_unlock_bh(&mux->rx_lock);
@@ -353,346 +339,60 @@
 	spin_unlock_bh(&mux->rx_lock);
 }
 
-static void kcm_start_rx_timer(struct kcm_psock *psock)
-{
-	if (psock->sk->sk_rcvtimeo)
-		mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo);
-}
-
-/* Macro to invoke filter function. */
-#define KCM_RUN_FILTER(prog, ctx) \
-	(*prog->bpf_func)(ctx, prog->insnsi)
-
-/* Lower socket lock held */
-static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
-			unsigned int orig_offset, size_t orig_len)
-{
-	struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data;
-	struct kcm_rx_msg *rxm;
-	struct kcm_sock *kcm;
-	struct sk_buff *head, *skb;
-	size_t eaten = 0, cand_len;
-	ssize_t extra;
-	int err;
-	bool cloned_orig = false;
-
-	if (psock->ready_rx_msg)
-		return 0;
-
-	head = psock->rx_skb_head;
-	if (head) {
-		/* Message already in progress */
-
-		rxm = kcm_rx_msg(head);
-		if (unlikely(rxm->early_eaten)) {
-			/* Already some number of bytes on the receive sock
-			 * data saved in rx_skb_head, just indicate they
-			 * are consumed.
-			 */
-			eaten = orig_len <= rxm->early_eaten ?
-				orig_len : rxm->early_eaten;
-			rxm->early_eaten -= eaten;
-
-			return eaten;
-		}
-
-		if (unlikely(orig_offset)) {
-			/* Getting data with a non-zero offset when a message is
-			 * in progress is not expected. If it does happen, we
-			 * need to clone and pull since we can't deal with
-			 * offsets in the skbs for a message expect in the head.
-			 */
-			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
-			if (!orig_skb) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = -ENOMEM;
-				return 0;
-			}
-			if (!pskb_pull(orig_skb, orig_offset)) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				kfree_skb(orig_skb);
-				desc->error = -ENOMEM;
-				return 0;
-			}
-			cloned_orig = true;
-			orig_offset = 0;
-		}
-
-		if (!psock->rx_skb_nextp) {
-			/* We are going to append to the frags_list of head.
-			 * Need to unshare the frag_list.
-			 */
-			err = skb_unclone(head, GFP_ATOMIC);
-			if (err) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = err;
-				return 0;
-			}
-
-			if (unlikely(skb_shinfo(head)->frag_list)) {
-				/* We can't append to an sk_buff that already
-				 * has a frag_list. We create a new head, point
-				 * the frag_list of that to the old head, and
-				 * then are able to use the old head->next for
-				 * appending to the message.
-				 */
-				if (WARN_ON(head->next)) {
-					desc->error = -EINVAL;
-					return 0;
-				}
-
-				skb = alloc_skb(0, GFP_ATOMIC);
-				if (!skb) {
-					KCM_STATS_INCR(psock->stats.rx_mem_fail);
-					desc->error = -ENOMEM;
-					return 0;
-				}
-				skb->len = head->len;
-				skb->data_len = head->len;
-				skb->truesize = head->truesize;
-				*kcm_rx_msg(skb) = *kcm_rx_msg(head);
-				psock->rx_skb_nextp = &head->next;
-				skb_shinfo(skb)->frag_list = head;
-				psock->rx_skb_head = skb;
-				head = skb;
-			} else {
-				psock->rx_skb_nextp =
-				    &skb_shinfo(head)->frag_list;
-			}
-		}
-	}
-
-	while (eaten < orig_len) {
-		/* Always clone since we will consume something */
-		skb = skb_clone(orig_skb, GFP_ATOMIC);
-		if (!skb) {
-			KCM_STATS_INCR(psock->stats.rx_mem_fail);
-			desc->error = -ENOMEM;
-			break;
-		}
-
-		cand_len = orig_len - eaten;
-
-		head = psock->rx_skb_head;
-		if (!head) {
-			head = skb;
-			psock->rx_skb_head = head;
-			/* Will set rx_skb_nextp on next packet if needed */
-			psock->rx_skb_nextp = NULL;
-			rxm = kcm_rx_msg(head);
-			memset(rxm, 0, sizeof(*rxm));
-			rxm->offset = orig_offset + eaten;
-		} else {
-			/* Unclone since we may be appending to an skb that we
-			 * already share a frag_list with.
-			 */
-			err = skb_unclone(skb, GFP_ATOMIC);
-			if (err) {
-				KCM_STATS_INCR(psock->stats.rx_mem_fail);
-				desc->error = err;
-				break;
-			}
-
-			rxm = kcm_rx_msg(head);
-			*psock->rx_skb_nextp = skb;
-			psock->rx_skb_nextp = &skb->next;
-			head->data_len += skb->len;
-			head->len += skb->len;
-			head->truesize += skb->truesize;
-		}
-
-		if (!rxm->full_len) {
-			ssize_t len;
-
-			len = KCM_RUN_FILTER(psock->bpf_prog, head);
-
-			if (!len) {
-				/* Need more header to determine length */
-				if (!rxm->accum_len) {
-					/* Start RX timer for new message */
-					kcm_start_rx_timer(psock);
-				}
-				rxm->accum_len += cand_len;
-				eaten += cand_len;
-				KCM_STATS_INCR(psock->stats.rx_need_more_hdr);
-				WARN_ON(eaten != orig_len);
-				break;
-			} else if (len > psock->sk->sk_rcvbuf) {
-				/* Message length exceeds maximum allowed */
-				KCM_STATS_INCR(psock->stats.rx_msg_too_big);
-				desc->error = -EMSGSIZE;
-				psock->rx_skb_head = NULL;
-				kcm_abort_rx_psock(psock, EMSGSIZE, head);
-				break;
-			} else if (len <= (ssize_t)head->len -
-					  skb->len - rxm->offset) {
-				/* Length must be into new skb (and also
-				 * greater than zero)
-				 */
-				KCM_STATS_INCR(psock->stats.rx_bad_hdr_len);
-				desc->error = -EPROTO;
-				psock->rx_skb_head = NULL;
-				kcm_abort_rx_psock(psock, EPROTO, head);
-				break;
-			}
-
-			rxm->full_len = len;
-		}
-
-		extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len;
-
-		if (extra < 0) {
-			/* Message not complete yet. */
-			if (rxm->full_len - rxm->accum_len >
-			    tcp_inq(psock->sk)) {
-				/* Don't have the whole messages in the socket
-				 * buffer. Set psock->rx_need_bytes to wait for
-				 * the rest of the message. Also, set "early
-				 * eaten" since we've already buffered the skb
-				 * but don't consume yet per tcp_read_sock.
-				 */
-
-				if (!rxm->accum_len) {
-					/* Start RX timer for new message */
-					kcm_start_rx_timer(psock);
-				}
-
-				psock->rx_need_bytes = rxm->full_len -
-						       rxm->accum_len;
-				rxm->accum_len += cand_len;
-				rxm->early_eaten = cand_len;
-				KCM_STATS_ADD(psock->stats.rx_bytes, cand_len);
-				desc->count = 0; /* Stop reading socket */
-				break;
-			}
-			rxm->accum_len += cand_len;
-			eaten += cand_len;
-			WARN_ON(eaten != orig_len);
-			break;
-		}
-
-		/* Positive extra indicates ore bytes than needed for the
-		 * message
-		 */
-
-		WARN_ON(extra > cand_len);
-
-		eaten += (cand_len - extra);
-
-		/* Hurray, we have a new message! */
-		del_timer(&psock->rx_msg_timer);
-		psock->rx_skb_head = NULL;
-		KCM_STATS_INCR(psock->stats.rx_msgs);
-
-try_queue:
-		kcm = reserve_rx_kcm(psock, head);
-		if (!kcm) {
-			/* Unable to reserve a KCM, message is held in psock. */
-			break;
-		}
-
-		if (kcm_queue_rcv_skb(&kcm->sk, head)) {
-			/* Should mean socket buffer full */
-			unreserve_rx_kcm(psock, false);
-			goto try_queue;
-		}
-	}
-
-	if (cloned_orig)
-		kfree_skb(orig_skb);
-
-	KCM_STATS_ADD(psock->stats.rx_bytes, eaten);
-
-	return eaten;
-}
-
-/* Called with lock held on lower socket */
-static int psock_tcp_read_sock(struct kcm_psock *psock)
-{
-	read_descriptor_t desc;
-
-	desc.arg.data = psock;
-	desc.error = 0;
-	desc.count = 1; /* give more than one skb per call */
-
-	/* sk should be locked here, so okay to do tcp_read_sock */
-	tcp_read_sock(psock->sk, &desc, kcm_tcp_recv);
-
-	unreserve_rx_kcm(psock, true);
-
-	return desc.error;
-}
-
 /* Lower sock lock held */
-static void psock_tcp_data_ready(struct sock *sk)
+static void psock_data_ready(struct sock *sk)
 {
 	struct kcm_psock *psock;
 
 	read_lock_bh(&sk->sk_callback_lock);
 
 	psock = (struct kcm_psock *)sk->sk_user_data;
-	if (unlikely(!psock || psock->rx_stopped))
-		goto out;
+	if (likely(psock))
+		strp_data_ready(&psock->strp);
 
-	if (psock->ready_rx_msg)
-		goto out;
-
-	if (psock->rx_need_bytes) {
-		if (tcp_inq(sk) >= psock->rx_need_bytes)
-			psock->rx_need_bytes = 0;
-		else
-			goto out;
-	}
-
-	if (psock_tcp_read_sock(psock) == -ENOMEM)
-		queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
-
-out:
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
-static void do_psock_rx_work(struct kcm_psock *psock)
+/* Called with lower sock held */
+static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
 {
-	read_descriptor_t rd_desc;
-	struct sock *csk = psock->sk;
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+	struct kcm_sock *kcm;
 
-	/* We need the read lock to synchronize with psock_tcp_data_ready. We
-	 * need the socket lock for calling tcp_read_sock.
-	 */
-	lock_sock(csk);
-	read_lock_bh(&csk->sk_callback_lock);
+try_queue:
+	kcm = reserve_rx_kcm(psock, skb);
+	if (!kcm) {
+		 /* Unable to reserve a KCM, message is held in psock and strp
+		  * is paused.
+		  */
+		return;
+	}
 
-	if (unlikely(csk->sk_user_data != psock))
-		goto out;
-
-	if (unlikely(psock->rx_stopped))
-		goto out;
-
-	if (psock->ready_rx_msg)
-		goto out;
-
-	rd_desc.arg.data = psock;
-
-	if (psock_tcp_read_sock(psock) == -ENOMEM)
-		queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0);
-
-out:
-	read_unlock_bh(&csk->sk_callback_lock);
-	release_sock(csk);
+	if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
+		/* Should mean socket buffer full */
+		unreserve_rx_kcm(psock, false);
+		goto try_queue;
+	}
 }
 
-static void psock_rx_work(struct work_struct *w)
+static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
 {
-	do_psock_rx_work(container_of(w, struct kcm_psock, rx_work));
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+	struct bpf_prog *prog = psock->bpf_prog;
+
+	return (*prog->bpf_func)(skb, prog->insnsi);
 }
 
-static void psock_rx_delayed_work(struct work_struct *w)
+static int kcm_read_sock_done(struct strparser *strp, int err)
 {
-	do_psock_rx_work(container_of(w, struct kcm_psock,
-				      rx_delayed_work.work));
+	struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
+
+	unreserve_rx_kcm(psock, true);
+
+	return err;
 }
 
-static void psock_tcp_state_change(struct sock *sk)
+static void psock_state_change(struct sock *sk)
 {
 	/* TCP only does a POLLIN for a half close. Do a POLLHUP here
 	 * since application will normally not poll with POLLIN
@@ -702,7 +402,7 @@
 	report_csk_error(sk, EPIPE);
 }
 
-static void psock_tcp_write_space(struct sock *sk)
+static void psock_write_space(struct sock *sk)
 {
 	struct kcm_psock *psock;
 	struct kcm_mux *mux;
@@ -713,14 +413,13 @@
 	psock = (struct kcm_psock *)sk->sk_user_data;
 	if (unlikely(!psock))
 		goto out;
-
 	mux = psock->mux;
 
 	spin_lock_bh(&mux->lock);
 
 	/* Check if the socket is reserved so someone is waiting for sending. */
 	kcm = psock->tx_kcm;
-	if (kcm)
+	if (kcm && !unlikely(kcm->tx_stopped))
 		queue_work(kcm_wq, &kcm->tx_work);
 
 	spin_unlock_bh(&mux->lock);
@@ -1411,7 +1110,7 @@
 	struct kcm_sock *kcm = kcm_sk(sk);
 	int err = 0;
 	long timeo;
-	struct kcm_rx_msg *rxm;
+	struct strp_rx_msg *rxm;
 	int copied = 0;
 	struct sk_buff *skb;
 
@@ -1425,7 +1124,7 @@
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = kcm_rx_msg(skb);
+	rxm = strp_rx_msg(skb);
 
 	if (len > rxm->full_len)
 		len = rxm->full_len;
@@ -1481,7 +1180,7 @@
 	struct sock *sk = sock->sk;
 	struct kcm_sock *kcm = kcm_sk(sk);
 	long timeo;
-	struct kcm_rx_msg *rxm;
+	struct strp_rx_msg *rxm;
 	int err = 0;
 	ssize_t copied;
 	struct sk_buff *skb;
@@ -1498,7 +1197,7 @@
 
 	/* Okay, have a message on the receive queue */
 
-	rxm = kcm_rx_msg(skb);
+	rxm = strp_rx_msg(skb);
 
 	if (len > rxm->full_len)
 		len = rxm->full_len;
@@ -1674,15 +1373,6 @@
 	spin_unlock_bh(&mux->rx_lock);
 }
 
-static void kcm_rx_msg_timeout(unsigned long arg)
-{
-	struct kcm_psock *psock = (struct kcm_psock *)arg;
-
-	/* Message assembly timed out */
-	KCM_STATS_INCR(psock->stats.rx_msg_timeouts);
-	kcm_abort_rx_psock(psock, ETIMEDOUT, NULL);
-}
-
 static int kcm_attach(struct socket *sock, struct socket *csock,
 		      struct bpf_prog *prog)
 {
@@ -1692,19 +1382,13 @@
 	struct kcm_psock *psock = NULL, *tpsock;
 	struct list_head *head;
 	int index = 0;
-
-	if (csock->ops->family != PF_INET &&
-	    csock->ops->family != PF_INET6)
-		return -EINVAL;
+	struct strp_callbacks cb;
+	int err;
 
 	csk = csock->sk;
 	if (!csk)
 		return -EINVAL;
 
-	/* Only support TCP for now */
-	if (csk->sk_protocol != IPPROTO_TCP)
-		return -EINVAL;
-
 	psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
 	if (!psock)
 		return -ENOMEM;
@@ -1713,11 +1397,16 @@
 	psock->sk = csk;
 	psock->bpf_prog = prog;
 
-	setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout,
-		    (unsigned long)psock);
+	cb.rcv_msg = kcm_rcv_strparser;
+	cb.abort_parser = NULL;
+	cb.parse_msg = kcm_parse_func_strparser;
+	cb.read_sock_done = kcm_read_sock_done;
 
-	INIT_WORK(&psock->rx_work, psock_rx_work);
-	INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work);
+	err = strp_init(&psock->strp, csk, &cb);
+	if (err) {
+		kmem_cache_free(kcm_psockp, psock);
+		return err;
+	}
 
 	sock_hold(csk);
 
@@ -1726,9 +1415,9 @@
 	psock->save_write_space = csk->sk_write_space;
 	psock->save_state_change = csk->sk_state_change;
 	csk->sk_user_data = psock;
-	csk->sk_data_ready = psock_tcp_data_ready;
-	csk->sk_write_space = psock_tcp_write_space;
-	csk->sk_state_change = psock_tcp_state_change;
+	csk->sk_data_ready = psock_data_ready;
+	csk->sk_write_space = psock_write_space;
+	csk->sk_state_change = psock_state_change;
 	write_unlock_bh(&csk->sk_callback_lock);
 
 	/* Finished initialization, now add the psock to the MUX. */
@@ -1750,7 +1439,7 @@
 	spin_unlock_bh(&mux->lock);
 
 	/* Schedule RX work in case there are already bytes queued */
-	queue_work(kcm_wq, &psock->rx_work);
+	strp_check_rcv(&psock->strp);
 
 	return 0;
 }
@@ -1790,6 +1479,8 @@
 	struct sock *csk = psock->sk;
 	struct kcm_mux *mux = psock->mux;
 
+	lock_sock(csk);
+
 	/* Stop getting callbacks from TCP socket. After this there should
 	 * be no way to reserve a kcm for this psock.
 	 */
@@ -1798,7 +1489,7 @@
 	csk->sk_data_ready = psock->save_data_ready;
 	csk->sk_write_space = psock->save_write_space;
 	csk->sk_state_change = psock->save_state_change;
-	psock->rx_stopped = 1;
+	strp_stop(&psock->strp);
 
 	if (WARN_ON(psock->rx_kcm)) {
 		write_unlock_bh(&csk->sk_callback_lock);
@@ -1821,18 +1512,17 @@
 
 	write_unlock_bh(&csk->sk_callback_lock);
 
-	del_timer_sync(&psock->rx_msg_timer);
-	cancel_work_sync(&psock->rx_work);
-	cancel_delayed_work_sync(&psock->rx_delayed_work);
+	/* Call strp_done without sock lock */
+	release_sock(csk);
+	strp_done(&psock->strp);
+	lock_sock(csk);
 
 	bpf_prog_put(psock->bpf_prog);
 
-	kfree_skb(psock->rx_skb_head);
-	psock->rx_skb_head = NULL;
-
 	spin_lock_bh(&mux->lock);
 
 	aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
+	save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
 
 	KCM_STATS_INCR(mux->stats.psock_unattach);
 
@@ -1875,6 +1565,8 @@
 		fput(csk->sk_socket->file);
 		kmem_cache_free(kcm_psockp, psock);
 	}
+
+	release_sock(csk);
 }
 
 static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
@@ -1915,6 +1607,7 @@
 
 		spin_unlock_bh(&mux->lock);
 
+		/* Lower socket lock should already be held */
 		kcm_unattach(psock);
 
 		err = 0;
@@ -2029,7 +1722,7 @@
 			if (copy_to_user((void __user *)arg, &info,
 					 sizeof(info))) {
 				err = -EFAULT;
-				sock_release(newsock);
+				sys_close(info.fd);
 			}
 		}
 
@@ -2072,6 +1765,8 @@
 	aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
 	aggregate_psock_stats(&mux->aggregate_psock_stats,
 			      &knet->aggregate_psock_stats);
+	aggregate_strp_stats(&mux->aggregate_strp_stats,
+			     &knet->aggregate_strp_stats);
 	list_del_rcu(&mux->kcm_mux_list);
 	knet->count--;
 	mutex_unlock(&knet->mutex);
@@ -2151,6 +1846,13 @@
 	 * it will just return.
 	 */
 	__skb_queue_purge(&sk->sk_write_queue);
+
+	/* Set tx_stopped. This is checked when psock is bound to a kcm and we
+	 * get a writespace callback. This prevents further work being queued
+	 * from the callback (unbinding the psock occurs after canceling work.
+	 */
+	kcm->tx_stopped = 1;
+
 	release_sock(sk);
 
 	spin_lock_bh(&mux->lock);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1e40dac..a2ed3bd 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1855,6 +1855,9 @@
 		(void)l2tp_tunnel_delete(tunnel);
 	}
 	rcu_read_unlock_bh();
+
+	flush_workqueue(l2tp_wq);
+	rcu_barrier();
 }
 
 static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 5871537a..2599af6 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -139,7 +139,7 @@
 	void (*session_close)(struct l2tp_session *session);
 	void (*ref)(struct l2tp_session *session);
 	void (*deref)(struct l2tp_session *session);
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 	void (*show)(struct seq_file *m, void *priv);
 #endif
 	uint8_t			priv[0];	/* private data */
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 57fc5a4..965f7e3 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -121,7 +121,7 @@
 }
 
 
-static struct net_device_ops l2tp_eth_netdev_ops = {
+static const struct net_device_ops l2tp_eth_netdev_ops = {
 	.ndo_init		= l2tp_eth_dev_init,
 	.ndo_uninit		= l2tp_eth_dev_uninit,
 	.ndo_start_xmit		= l2tp_eth_dev_xmit,
@@ -195,7 +195,7 @@
 	}
 }
 
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 static void l2tp_eth_show(struct seq_file *m, void *arg)
 {
 	struct l2tp_session *session = arg;
@@ -268,7 +268,7 @@
 	priv->tunnel_sock = tunnel->sock;
 	session->recv_skb = l2tp_eth_dev_recv;
 	session->session_close = l2tp_eth_delete;
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 	session->show = l2tp_eth_show;
 #endif
 
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 1d02e8d..bf31177 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -867,7 +867,7 @@
 	return skb->len;
 }
 
-static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
+static const struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
 	[L2TP_ATTR_NONE]		= { .type = NLA_UNSPEC, },
 	[L2TP_ATTR_PW_TYPE]		= { .type = NLA_U16, },
 	[L2TP_ATTR_ENCAP_TYPE]		= { .type = NLA_U16, },
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d9560aa..41d47bf 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -177,7 +177,7 @@
 	if (!pskb_may_pull(skb, 2))
 		return 1;
 
-	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+	if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI))
 		skb_pull(skb, 2);
 
 	return 0;
@@ -282,7 +282,6 @@
 static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m,
 			    size_t total_len)
 {
-	static const unsigned char ppph[2] = { 0xff, 0x03 };
 	struct sock *sk = sock->sk;
 	struct sk_buff *skb;
 	int error;
@@ -312,7 +311,7 @@
 	error = -ENOMEM;
 	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
 			   uhlen + session->hdr_len +
-			   sizeof(ppph) + total_len,
+			   2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
 			   0, GFP_KERNEL);
 	if (!skb)
 		goto error_put_sess_tun;
@@ -325,8 +324,8 @@
 	skb_reserve(skb, uhlen);
 
 	/* Add PPP header */
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
+	skb->data[0] = PPP_ALLSTATIONS;
+	skb->data[1] = PPP_UI;
 	skb_put(skb, 2);
 
 	/* Copy user data into skb */
@@ -369,7 +368,6 @@
  */
 static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 {
-	static const u8 ppph[2] = { 0xff, 0x03 };
 	struct sock *sk = (struct sock *) chan->private;
 	struct sock *sk_tun;
 	struct l2tp_session *session;
@@ -398,14 +396,14 @@
 		   sizeof(struct iphdr) + /* IP header */
 		   uhlen +		/* UDP header (if L2TP_ENCAPTYPE_UDP) */
 		   session->hdr_len +	/* L2TP header */
-		   sizeof(ppph);	/* PPP header */
+		   2;			/* 2 bytes for PPP_ALLSTATIONS & PPP_UI */
 	if (skb_cow_head(skb, headroom))
 		goto abort_put_sess_tun;
 
 	/* Setup PPP header */
-	__skb_push(skb, sizeof(ppph));
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
+	__skb_push(skb, 2);
+	skb->data[0] = PPP_ALLSTATIONS;
+	skb->data[1] = PPP_UI;
 
 	local_bh_disable();
 	l2tp_xmit_skb(session, skb, session->hdr_len);
@@ -440,7 +438,7 @@
 	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
 
 	if (sock) {
-		inet_shutdown(sock, 2);
+		inet_shutdown(sock, SEND_SHUTDOWN);
 		/* Don't let the session go away before our socket does */
 		l2tp_session_inc_refcount(session);
 	}
@@ -554,7 +552,7 @@
 	return error;
 }
 
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 static void pppol2tp_show(struct seq_file *m, void *arg)
 {
 	struct l2tp_session *session = arg;
@@ -725,7 +723,7 @@
 
 	session->recv_skb	= pppol2tp_recv;
 	session->session_close	= pppol2tp_session_close;
-#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
+#if IS_ENABLED(CONFIG_L2TP_DEBUGFS)
 	session->show		= pppol2tp_show;
 #endif
 
@@ -856,7 +854,7 @@
 	error = -ENOTCONN;
 	if (sk == NULL)
 		goto end;
-	if (sk->sk_state != PPPOX_CONNECTED)
+	if (!(sk->sk_state & PPPOX_CONNECTED))
 		goto end;
 
 	error = -EBADF;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index c4a1c3e..8da86ce 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -100,15 +100,14 @@
 EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
 
 /**
- *	l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns
- *			     cached route for L3 master device if relevant
- *			     to flow
+ *	l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link
+ *			     local and multicast addresses
  *	@net: network namespace for device index lookup
  *	@fl6: IPv6 flow struct for lookup
  */
 
-struct dst_entry *l3mdev_get_rt6_dst(struct net *net,
-				     struct flowi6 *fl6)
+struct dst_entry *l3mdev_link_scope_lookup(struct net *net,
+					   struct flowi6 *fl6)
 {
 	struct dst_entry *dst = NULL;
 	struct net_device *dev;
@@ -121,70 +120,15 @@
 			dev = netdev_master_upper_dev_get_rcu(dev);
 
 		if (dev && netif_is_l3_master(dev) &&
-		    dev->l3mdev_ops->l3mdev_get_rt6_dst)
-			dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6);
+		    dev->l3mdev_ops->l3mdev_link_scope_lookup)
+			dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6);
 
 		rcu_read_unlock();
 	}
 
 	return dst;
 }
-EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst);
-
-/**
- *	l3mdev_get_saddr - get source address for a flow based on an interface
- *			   enslaved to an L3 master device
- *	@net: network namespace for device index lookup
- *	@ifindex: Interface index
- *	@fl4: IPv4 flow struct
- */
-
-int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4)
-{
-	struct net_device *dev;
-	int rc = 0;
-
-	if (ifindex) {
-		rcu_read_lock();
-
-		dev = dev_get_by_index_rcu(net, ifindex);
-		if (dev && netif_is_l3_slave(dev))
-			dev = netdev_master_upper_dev_get_rcu(dev);
-
-		if (dev && netif_is_l3_master(dev) &&
-		    dev->l3mdev_ops->l3mdev_get_saddr)
-			rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4);
-
-		rcu_read_unlock();
-	}
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(l3mdev_get_saddr);
-
-int l3mdev_get_saddr6(struct net *net, const struct sock *sk,
-		      struct flowi6 *fl6)
-{
-	struct net_device *dev;
-	int rc = 0;
-
-	if (fl6->flowi6_oif) {
-		rcu_read_lock();
-
-		dev = dev_get_by_index_rcu(net, fl6->flowi6_oif);
-		if (dev && netif_is_l3_slave(dev))
-			dev = netdev_master_upper_dev_get_rcu(dev);
-
-		if (dev && netif_is_l3_master(dev) &&
-		    dev->l3mdev_ops->l3mdev_get_saddr6)
-			rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6);
-
-		rcu_read_unlock();
-	}
-
-	return rc;
-}
-EXPORT_SYMBOL_GPL(l3mdev_get_saddr6);
+EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup);
 
 /**
  *	l3mdev_fib_rule_match - Determine if flowi references an
@@ -222,3 +166,38 @@
 
 	return rc;
 }
+
+void l3mdev_update_flow(struct net *net, struct flowi *fl)
+{
+	struct net_device *dev;
+	int ifindex;
+
+	rcu_read_lock();
+
+	if (fl->flowi_oif) {
+		dev = dev_get_by_index_rcu(net, fl->flowi_oif);
+		if (dev) {
+			ifindex = l3mdev_master_ifindex_rcu(dev);
+			if (ifindex) {
+				fl->flowi_oif = ifindex;
+				fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+				goto out;
+			}
+		}
+	}
+
+	if (fl->flowi_iif) {
+		dev = dev_get_by_index_rcu(net, fl->flowi_iif);
+		if (dev) {
+			ifindex = l3mdev_master_ifindex_rcu(dev);
+			if (ifindex) {
+				fl->flowi_iif = ifindex;
+				fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF;
+			}
+		}
+	}
+
+out:
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(l3mdev_update_flow);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 8ae3ed9..db916cf 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -38,7 +38,7 @@
 static struct sockaddr_llc llc_ui_addrnull;
 static const struct proto_ops llc_ui_ops;
 
-static long llc_ui_wait_for_conn(struct sock *sk, long timeout);
+static bool llc_ui_wait_for_conn(struct sock *sk, long timeout);
 static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
 static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout);
 
@@ -551,7 +551,7 @@
 	return rc;
 }
 
-static long llc_ui_wait_for_conn(struct sock *sk, long timeout)
+static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
 {
 	DEFINE_WAIT(wait);
 
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index a9aff60..f6749dc 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -261,10 +261,16 @@
 		.timeout = timeout,
 		.ssn = start_seq_num,
 	};
-
 	int i, ret = -EOPNOTSUPP;
 	u16 status = WLAN_STATUS_REQUEST_DECLINED;
 
+	if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
+		ht_dbg(sta->sdata,
+		       "STA %pM requests BA session on unsupported tid %d\n",
+		       sta->sta.addr, tid);
+		goto end_no_lock;
+	}
+
 	if (!sta->sta.ht_cap.ht_supported) {
 		ht_dbg(sta->sdata,
 		       "STA %pM erroneously requests BA session on tid %d w/o QoS\n",
@@ -298,10 +304,13 @@
 		buf_size = IEEE80211_MAX_AMPDU_BUF;
 
 	/* make sure the size doesn't exceed the maximum supported by the hw */
-	if (buf_size > local->hw.max_rx_aggregation_subframes)
-		buf_size = local->hw.max_rx_aggregation_subframes;
+	if (buf_size > sta->sta.max_rx_aggregation_subframes)
+		buf_size = sta->sta.max_rx_aggregation_subframes;
 	params.buf_size = buf_size;
 
+	ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n",
+	       buf_size, sta->sta.addr);
+
 	/* examine state machine */
 	mutex_lock(&sta->ampdu_mlme.mtx);
 
@@ -406,8 +415,10 @@
 	}
 
 end:
-	if (status == WLAN_STATUS_SUCCESS)
+	if (status == WLAN_STATUS_SUCCESS) {
 		__set_bit(tid, sta->ampdu_mlme.agg_session_valid);
+		__clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
+	}
 	mutex_unlock(&sta->ampdu_mlme.mtx);
 
 end_no_lock:
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5650c46..45319cc 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -584,6 +584,9 @@
 	    ieee80211_hw_check(&local->hw, TX_AMPDU_SETUP_IN_HW))
 		return -EINVAL;
 
+	if (WARN_ON(tid >= IEEE80211_FIRST_TSPEC_TSID))
+		return -EINVAL;
+
 	ht_dbg(sdata, "Open BA session requested for %pM tid %u\n",
 	       pubsta->addr, tid);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 47e99ab8..fd6541f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3,6 +3,7 @@
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2015  Intel Mobile Communications GmbH
+ * Copyright (C) 2015-2016 Intel Deutschland GmbH
  *
  * This file is GPLv2 as found in COPYING.
  */
@@ -39,7 +40,7 @@
 
 	if (type == NL80211_IFTYPE_MONITOR && flags) {
 		sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
-		sdata->u.mntr_flags = *flags;
+		sdata->u.mntr.flags = *flags;
 	}
 
 	return wdev;
@@ -73,8 +74,29 @@
 		sdata->u.mgd.use_4addr = params->use_4addr;
 	}
 
-	if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) {
+	if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
 		struct ieee80211_local *local = sdata->local;
+		struct ieee80211_sub_if_data *monitor_sdata;
+		u32 mu_mntr_cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER;
+
+		monitor_sdata = rtnl_dereference(local->monitor_sdata);
+		if (monitor_sdata &&
+		    wiphy_ext_feature_isset(wiphy, mu_mntr_cap_flag)) {
+			memcpy(monitor_sdata->vif.bss_conf.mu_group.membership,
+			       params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN);
+			memcpy(monitor_sdata->vif.bss_conf.mu_group.position,
+			       params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN,
+			       WLAN_USER_POSITION_LEN);
+			monitor_sdata->vif.mu_mimo_owner = true;
+			ieee80211_bss_info_change_notify(monitor_sdata,
+							 BSS_CHANGED_MU_GROUPS);
+
+			ether_addr_copy(monitor_sdata->u.mntr.mu_follow_addr,
+					params->macaddr);
+		}
+
+		if (!flags)
+			return 0;
 
 		if (ieee80211_sdata_running(sdata)) {
 			u32 mask = MONITOR_FLAG_COOK_FRAMES |
@@ -89,11 +111,11 @@
 			 *	cooked_mntrs, monitor and all fif_* counters
 			 *	reconfigure hardware
 			 */
-			if ((*flags & mask) != (sdata->u.mntr_flags & mask))
+			if ((*flags & mask) != (sdata->u.mntr.flags & mask))
 				return -EBUSY;
 
 			ieee80211_adjust_monitor_flags(sdata, -1);
-			sdata->u.mntr_flags = *flags;
+			sdata->u.mntr.flags = *flags;
 			ieee80211_adjust_monitor_flags(sdata, 1);
 
 			ieee80211_configure_filter(local);
@@ -103,7 +125,7 @@
 			 * and ieee80211_do_open take care of "everything"
 			 * mentioned in the comment above.
 			 */
-			sdata->u.mntr_flags = *flags;
+			sdata->u.mntr.flags = *flags;
 		}
 	}
 
@@ -131,6 +153,149 @@
 	ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev));
 }
 
+static int ieee80211_start_nan(struct wiphy *wiphy,
+			       struct wireless_dev *wdev,
+			       struct cfg80211_nan_conf *conf)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+	int ret;
+
+	mutex_lock(&sdata->local->chanctx_mtx);
+	ret = ieee80211_check_combinations(sdata, NULL, 0, 0);
+	mutex_unlock(&sdata->local->chanctx_mtx);
+	if (ret < 0)
+		return ret;
+
+	ret = ieee80211_do_open(wdev, true);
+	if (ret)
+		return ret;
+
+	ret = drv_start_nan(sdata->local, sdata, conf);
+	if (ret)
+		ieee80211_sdata_stop(sdata);
+
+	sdata->u.nan.conf = *conf;
+
+	return ret;
+}
+
+static void ieee80211_stop_nan(struct wiphy *wiphy,
+			       struct wireless_dev *wdev)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+
+	drv_stop_nan(sdata->local, sdata);
+	ieee80211_sdata_stop(sdata);
+}
+
+static int ieee80211_nan_change_conf(struct wiphy *wiphy,
+				     struct wireless_dev *wdev,
+				     struct cfg80211_nan_conf *conf,
+				     u32 changes)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+	struct cfg80211_nan_conf new_conf;
+	int ret = 0;
+
+	if (sdata->vif.type != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (!ieee80211_sdata_running(sdata))
+		return -ENETDOWN;
+
+	new_conf = sdata->u.nan.conf;
+
+	if (changes & CFG80211_NAN_CONF_CHANGED_PREF)
+		new_conf.master_pref = conf->master_pref;
+
+	if (changes & CFG80211_NAN_CONF_CHANGED_DUAL)
+		new_conf.dual = conf->dual;
+
+	ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes);
+	if (!ret)
+		sdata->u.nan.conf = new_conf;
+
+	return ret;
+}
+
+static int ieee80211_add_nan_func(struct wiphy *wiphy,
+				  struct wireless_dev *wdev,
+				  struct cfg80211_nan_func *nan_func)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+	int ret;
+
+	if (sdata->vif.type != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (!ieee80211_sdata_running(sdata))
+		return -ENETDOWN;
+
+	spin_lock_bh(&sdata->u.nan.func_lock);
+
+	ret = idr_alloc(&sdata->u.nan.function_inst_ids,
+			nan_func, 1, sdata->local->hw.max_nan_de_entries + 1,
+			GFP_ATOMIC);
+	spin_unlock_bh(&sdata->u.nan.func_lock);
+
+	if (ret < 0)
+		return ret;
+
+	nan_func->instance_id = ret;
+
+	WARN_ON(nan_func->instance_id == 0);
+
+	ret = drv_add_nan_func(sdata->local, sdata, nan_func);
+	if (ret) {
+		spin_lock_bh(&sdata->u.nan.func_lock);
+		idr_remove(&sdata->u.nan.function_inst_ids,
+			   nan_func->instance_id);
+		spin_unlock_bh(&sdata->u.nan.func_lock);
+	}
+
+	return ret;
+}
+
+static struct cfg80211_nan_func *
+ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata,
+				  u64 cookie)
+{
+	struct cfg80211_nan_func *func;
+	int id;
+
+	lockdep_assert_held(&sdata->u.nan.func_lock);
+
+	idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) {
+		if (func->cookie == cookie)
+			return func;
+	}
+
+	return NULL;
+}
+
+static void ieee80211_del_nan_func(struct wiphy *wiphy,
+				  struct wireless_dev *wdev, u64 cookie)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+	struct cfg80211_nan_func *func;
+	u8 instance_id = 0;
+
+	if (sdata->vif.type != NL80211_IFTYPE_NAN ||
+	    !ieee80211_sdata_running(sdata))
+		return;
+
+	spin_lock_bh(&sdata->u.nan.func_lock);
+
+	func = ieee80211_find_nan_func_by_cookie(sdata, cookie);
+	if (func)
+		instance_id = func->instance_id;
+
+	spin_unlock_bh(&sdata->u.nan.func_lock);
+
+	if (instance_id)
+		drv_del_nan_func(sdata->local, sdata, instance_id);
+}
+
 static int ieee80211_set_noack_map(struct wiphy *wiphy,
 				  struct net_device *dev,
 				  u16 noack_map)
@@ -236,6 +401,7 @@
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_MONITOR:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
 	case NL80211_IFTYPE_P2P_CLIENT:
@@ -869,7 +1035,7 @@
 
 	/* free all potentially still buffered bcast frames */
 	local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf);
-	skb_queue_purge(&sdata->u.ap.ps.bc_buf);
+	ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf);
 
 	mutex_lock(&local->mtx);
 	ieee80211_vif_copy_chanctx_to_vlans(sdata, true);
@@ -2015,6 +2181,7 @@
 		     !(req->flags & NL80211_SCAN_FLAG_AP)))
 			return -EOPNOTSUPP;
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -2940,10 +3107,6 @@
 	}
 
 	chanctx = container_of(conf, struct ieee80211_chanctx, conf);
-	if (!chanctx) {
-		err = -EBUSY;
-		goto out;
-	}
 
 	ch_switch.timestamp = 0;
 	ch_switch.device_timestamp = 0;
@@ -3360,6 +3523,63 @@
 	return -ENOENT;
 }
 
+void ieee80211_nan_func_terminated(struct ieee80211_vif *vif,
+				   u8 inst_id,
+				   enum nl80211_nan_func_term_reason reason,
+				   gfp_t gfp)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct cfg80211_nan_func *func;
+	u64 cookie;
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_NAN))
+		return;
+
+	spin_lock_bh(&sdata->u.nan.func_lock);
+
+	func = idr_find(&sdata->u.nan.function_inst_ids, inst_id);
+	if (WARN_ON(!func)) {
+		spin_unlock_bh(&sdata->u.nan.func_lock);
+		return;
+	}
+
+	cookie = func->cookie;
+	idr_remove(&sdata->u.nan.function_inst_ids, inst_id);
+
+	spin_unlock_bh(&sdata->u.nan.func_lock);
+
+	cfg80211_free_nan_func(func);
+
+	cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id,
+				     reason, cookie, gfp);
+}
+EXPORT_SYMBOL(ieee80211_nan_func_terminated);
+
+void ieee80211_nan_func_match(struct ieee80211_vif *vif,
+			      struct cfg80211_nan_match_params *match,
+			      gfp_t gfp)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct cfg80211_nan_func *func;
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_NAN))
+		return;
+
+	spin_lock_bh(&sdata->u.nan.func_lock);
+
+	func = idr_find(&sdata->u.nan.function_inst_ids,  match->inst_id);
+	if (WARN_ON(!func)) {
+		spin_unlock_bh(&sdata->u.nan.func_lock);
+		return;
+	}
+	match->cookie = func->cookie;
+
+	spin_unlock_bh(&sdata->u.nan.func_lock);
+
+	cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp);
+}
+EXPORT_SYMBOL(ieee80211_nan_func_match);
+
 const struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -3445,4 +3665,9 @@
 	.set_ap_chanwidth = ieee80211_set_ap_chanwidth,
 	.add_tx_ts = ieee80211_add_tx_ts,
 	.del_tx_ts = ieee80211_del_tx_ts,
+	.start_nan = ieee80211_start_nan,
+	.stop_nan = ieee80211_stop_nan,
+	.nan_change_conf = ieee80211_nan_change_conf,
+	.add_nan_func = ieee80211_add_nan_func,
+	.del_nan_func = ieee80211_del_nan_func,
 };
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 74142d0..e75cbf6 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -274,6 +274,7 @@
 				    ieee80211_get_max_required_bw(sdata));
 			break;
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			continue;
 		case NL80211_IFTYPE_ADHOC:
 		case NL80211_IFTYPE_WDS:
@@ -646,6 +647,9 @@
 	struct ieee80211_chanctx *curr_ctx = NULL;
 	int ret = 0;
 
+	if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
+		return -ENOTSUPP;
+
 	conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
 					 lockdep_is_held(&local->chanctx_mtx));
 
@@ -718,6 +722,7 @@
 
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			continue;
 		case NL80211_IFTYPE_STATION:
 			if (!sdata->u.mgd.associated)
@@ -980,6 +985,7 @@
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 	case NUM_NL80211_IFTYPES:
 		WARN_ON(1);
 		break;
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 2906c10..f56e2f4 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -71,138 +71,45 @@
 DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s",
 	local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver");
 
-struct aqm_info {
-	struct ieee80211_local *local;
-	size_t size;
-	size_t len;
-	unsigned char buf[0];
-};
-
-#define AQM_HDR_LEN 200
-#define AQM_HW_ENTRY_LEN 40
-#define AQM_TXQ_ENTRY_LEN 110
-
-static int aqm_open(struct inode *inode, struct file *file)
-{
-	struct ieee80211_local *local = inode->i_private;
-	struct ieee80211_sub_if_data *sdata;
-	struct sta_info *sta;
-	struct txq_info *txqi;
-	struct fq *fq = &local->fq;
-	struct aqm_info *info = NULL;
-	int len = 0;
-	int i;
-
-	if (!local->ops->wake_tx_queue)
-		return -EOPNOTSUPP;
-
-	len += AQM_HDR_LEN;
-	len += 6 * AQM_HW_ENTRY_LEN;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(sdata, &local->interfaces, list)
-		len += AQM_TXQ_ENTRY_LEN;
-	list_for_each_entry_rcu(sta, &local->sta_list, list)
-		len += AQM_TXQ_ENTRY_LEN * ARRAY_SIZE(sta->sta.txq);
-	rcu_read_unlock();
-
-	info = vmalloc(len);
-	if (!info)
-		return -ENOMEM;
-
-	spin_lock_bh(&local->fq.lock);
-	rcu_read_lock();
-
-	file->private_data = info;
-	info->local = local;
-	info->size = len;
-	len = 0;
-
-	len += scnprintf(info->buf + len, info->size - len,
-			 "* hw\n"
-			 "access name value\n"
-			 "R fq_flows_cnt %u\n"
-			 "R fq_backlog %u\n"
-			 "R fq_overlimit %u\n"
-			 "R fq_collisions %u\n"
-			 "RW fq_limit %u\n"
-			 "RW fq_quantum %u\n",
-			 fq->flows_cnt,
-			 fq->backlog,
-			 fq->overlimit,
-			 fq->collisions,
-			 fq->limit,
-			 fq->quantum);
-
-	len += scnprintf(info->buf + len,
-			 info->size - len,
-			 "* vif\n"
-			 "ifname addr ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n");
-
-	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
-		txqi = to_txq_info(sdata->vif.txq);
-		len += scnprintf(info->buf + len, info->size - len,
-				 "%s %pM %u %u %u %u %u %u %u %u\n",
-				 sdata->name,
-				 sdata->vif.addr,
-				 txqi->txq.ac,
-				 txqi->tin.backlog_bytes,
-				 txqi->tin.backlog_packets,
-				 txqi->tin.flows,
-				 txqi->tin.overlimit,
-				 txqi->tin.collisions,
-				 txqi->tin.tx_bytes,
-				 txqi->tin.tx_packets);
-	}
-
-	len += scnprintf(info->buf + len,
-			 info->size - len,
-			 "* sta\n"
-			 "ifname addr tid ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n");
-
-	list_for_each_entry_rcu(sta, &local->sta_list, list) {
-		sdata = sta->sdata;
-		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
-			txqi = to_txq_info(sta->sta.txq[i]);
-			len += scnprintf(info->buf + len, info->size - len,
-					 "%s %pM %d %d %u %u %u %u %u %u %u\n",
-					 sdata->name,
-					 sta->sta.addr,
-					 txqi->txq.tid,
-					 txqi->txq.ac,
-					 txqi->tin.backlog_bytes,
-					 txqi->tin.backlog_packets,
-					 txqi->tin.flows,
-					 txqi->tin.overlimit,
-					 txqi->tin.collisions,
-					 txqi->tin.tx_bytes,
-					 txqi->tin.tx_packets);
-		}
-	}
-
-	info->len = len;
-
-	rcu_read_unlock();
-	spin_unlock_bh(&local->fq.lock);
-
-	return 0;
-}
-
-static int aqm_release(struct inode *inode, struct file *file)
-{
-	vfree(file->private_data);
-	return 0;
-}
-
 static ssize_t aqm_read(struct file *file,
 			char __user *user_buf,
 			size_t count,
 			loff_t *ppos)
 {
-	struct aqm_info *info = file->private_data;
+	struct ieee80211_local *local = file->private_data;
+	struct fq *fq = &local->fq;
+	char buf[200];
+	int len = 0;
+
+	spin_lock_bh(&local->fq.lock);
+	rcu_read_lock();
+
+	len = scnprintf(buf, sizeof(buf),
+			"access name value\n"
+			"R fq_flows_cnt %u\n"
+			"R fq_backlog %u\n"
+			"R fq_overlimit %u\n"
+			"R fq_overmemory %u\n"
+			"R fq_collisions %u\n"
+			"R fq_memory_usage %u\n"
+			"RW fq_memory_limit %u\n"
+			"RW fq_limit %u\n"
+			"RW fq_quantum %u\n",
+			fq->flows_cnt,
+			fq->backlog,
+			fq->overmemory,
+			fq->overlimit,
+			fq->collisions,
+			fq->memory_usage,
+			fq->memory_limit,
+			fq->limit,
+			fq->quantum);
+
+	rcu_read_unlock();
+	spin_unlock_bh(&local->fq.lock);
 
 	return simple_read_from_buffer(user_buf, count, ppos,
-				       info->buf, info->len);
+				       buf, len);
 }
 
 static ssize_t aqm_write(struct file *file,
@@ -210,8 +117,7 @@
 			 size_t count,
 			 loff_t *ppos)
 {
-	struct aqm_info *info = file->private_data;
-	struct ieee80211_local *local = info->local;
+	struct ieee80211_local *local = file->private_data;
 	char buf[100];
 	size_t len;
 
@@ -228,6 +134,8 @@
 
 	if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1)
 		return count;
+	else if (sscanf(buf, "fq_memory_limit %u", &local->fq.memory_limit) == 1)
+		return count;
 	else if (sscanf(buf, "fq_quantum %u", &local->fq.quantum) == 1)
 		return count;
 
@@ -237,8 +145,7 @@
 static const struct file_operations aqm_ops = {
 	.write = aqm_write,
 	.read = aqm_read,
-	.open = aqm_open,
-	.release = aqm_release,
+	.open = simple_open,
 	.llseek = default_llseek,
 };
 
@@ -302,6 +209,7 @@
 	FLAG(USES_RSS),
 	FLAG(TX_AMSDU),
 	FLAG(TX_FRAG_LIST),
+	FLAG(REPORTS_LOW_ACK),
 #undef FLAG
 };
 
@@ -428,7 +336,9 @@
 	DEBUGFS_ADD(hwflags);
 	DEBUGFS_ADD(user_power);
 	DEBUGFS_ADD(power);
-	DEBUGFS_ADD_MODE(aqm, 0600);
+
+	if (local->ops->wake_tx_queue)
+		DEBUGFS_ADD_MODE(aqm, 0600);
 
 	statsd = debugfs_create_dir("statistics", phyd);
 
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index a5ba739..bcec124 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -30,7 +30,7 @@
 	size_t count, loff_t *ppos,
 	ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int))
 {
-	char buf[70];
+	char buf[200];
 	ssize_t ret = -EINVAL;
 
 	read_lock(&dev_base_lock);
@@ -486,6 +486,38 @@
 }
 IEEE80211_IF_FILE_R(num_buffered_multicast);
 
+static ssize_t ieee80211_if_fmt_aqm(
+	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct txq_info *txqi = to_txq_info(sdata->vif.txq);
+	int len;
+
+	spin_lock_bh(&local->fq.lock);
+	rcu_read_lock();
+
+	len = scnprintf(buf,
+			buflen,
+			"ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n"
+			"%u %u %u %u %u %u %u %u %u %u\n",
+			txqi->txq.ac,
+			txqi->tin.backlog_bytes,
+			txqi->tin.backlog_packets,
+			txqi->tin.flows,
+			txqi->cstats.drop_count,
+			txqi->cstats.ecn_mark,
+			txqi->tin.overlimit,
+			txqi->tin.collisions,
+			txqi->tin.tx_bytes,
+			txqi->tin.tx_packets);
+
+	rcu_read_unlock();
+	spin_unlock_bh(&local->fq.lock);
+
+	return len;
+}
+IEEE80211_IF_FILE_R(aqm);
+
 /* IBSS attributes */
 static ssize_t ieee80211_if_fmt_tsf(
 	const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
@@ -524,9 +556,15 @@
 		ret = kstrtoull(buf, 10, &tsf);
 		if (ret < 0)
 			return ret;
-		if (tsf_is_delta)
-			tsf = drv_get_tsf(local, sdata) + tsf_is_delta * tsf;
-		if (local->ops->set_tsf) {
+		if (tsf_is_delta && local->ops->offset_tsf) {
+			drv_offset_tsf(local, sdata, tsf_is_delta * tsf);
+			wiphy_info(local->hw.wiphy,
+				   "debugfs offset TSF by %018lld\n",
+				   tsf_is_delta * tsf);
+		} else if (local->ops->set_tsf) {
+			if (tsf_is_delta)
+				tsf = drv_get_tsf(local, sdata) +
+				      tsf_is_delta * tsf;
 			drv_set_tsf(local, sdata, tsf);
 			wiphy_info(local->hw.wiphy,
 				   "debugfs set TSF to %#018llx\n", tsf);
@@ -618,6 +656,9 @@
 	DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_2ghz);
 	DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz);
 	DEBUGFS_ADD(hw_queues);
+
+	if (sdata->local->ops->wake_tx_queue)
+		DEBUGFS_ADD(aqm);
 }
 
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index fd33413..a2fcdb4 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -133,6 +133,55 @@
 }
 STA_OPS(last_seq_ctrl);
 
+#define AQM_TXQ_ENTRY_LEN 130
+
+static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
+			size_t count, loff_t *ppos)
+{
+	struct sta_info *sta = file->private_data;
+	struct ieee80211_local *local = sta->local;
+	size_t bufsz = AQM_TXQ_ENTRY_LEN*(IEEE80211_NUM_TIDS+1);
+	char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
+	struct txq_info *txqi;
+	ssize_t rv;
+	int i;
+
+	if (!buf)
+		return -ENOMEM;
+
+	spin_lock_bh(&local->fq.lock);
+	rcu_read_lock();
+
+	p += scnprintf(p,
+		       bufsz+buf-p,
+		       "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n");
+
+	for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
+		txqi = to_txq_info(sta->sta.txq[i]);
+		p += scnprintf(p, bufsz+buf-p,
+			       "%d %d %u %u %u %u %u %u %u %u %u\n",
+			       txqi->txq.tid,
+			       txqi->txq.ac,
+			       txqi->tin.backlog_bytes,
+			       txqi->tin.backlog_packets,
+			       txqi->tin.flows,
+			       txqi->cstats.drop_count,
+			       txqi->cstats.ecn_mark,
+			       txqi->tin.overlimit,
+			       txqi->tin.collisions,
+			       txqi->tin.tx_bytes,
+			       txqi->tin.tx_packets);
+	}
+
+	rcu_read_unlock();
+	spin_unlock_bh(&local->fq.lock);
+
+	rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+	kfree(buf);
+	return rv;
+}
+STA_OPS(aqm);
+
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
@@ -478,6 +527,9 @@
 	DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments);
 	DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered);
 
+	if (local->ops->wake_tx_queue)
+		DEBUGFS_ADD(aqm);
+
 	if (sizeof(sta->driver_buffered_tids) == sizeof(u32))
 		debugfs_create_x32("driver_buffered_tids", 0400,
 				   sta->debugfs_dir,
@@ -492,10 +544,6 @@
 
 void ieee80211_sta_debugfs_remove(struct sta_info *sta)
 {
-	struct ieee80211_local *local = sta->local;
-	struct ieee80211_sub_if_data *sdata = sta->sdata;
-
-	drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs_dir);
 	debugfs_remove_recursive(sta->debugfs_dir);
 	sta->debugfs_dir = NULL;
 }
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index c258f10..bb886e7 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -62,7 +62,7 @@
 	if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
 		    (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
 		     !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
-		     !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
+		     !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))))
 		return -EINVAL;
 
 	trace_drv_add_interface(local, sdata);
@@ -215,6 +215,21 @@
 	trace_drv_return_void(local);
 }
 
+void drv_offset_tsf(struct ieee80211_local *local,
+		    struct ieee80211_sub_if_data *sdata,
+		    s64 offset)
+{
+	might_sleep();
+
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_offset_tsf(local, sdata, offset);
+	if (local->ops->offset_tsf)
+		local->ops->offset_tsf(&local->hw, &sdata->vif, offset);
+	trace_drv_return_void(local);
+}
+
 void drv_reset_tsf(struct ieee80211_local *local,
 		   struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 184473c..09f77e4 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -162,7 +162,9 @@
 		return;
 
 	if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
-			 sdata->vif.type == NL80211_IFTYPE_MONITOR))
+			 sdata->vif.type == NL80211_IFTYPE_NAN ||
+			 (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+			  !sdata->vif.mu_mimo_owner)))
 		return;
 
 	if (!check_sdata_in_driver(sdata))
@@ -498,21 +500,6 @@
 		local->ops->sta_add_debugfs(&local->hw, &sdata->vif,
 					    sta, dir);
 }
-
-static inline void drv_sta_remove_debugfs(struct ieee80211_local *local,
-					  struct ieee80211_sub_if_data *sdata,
-					  struct ieee80211_sta *sta,
-					  struct dentry *dir)
-{
-	might_sleep();
-
-	sdata = get_bss_sdata(sdata);
-	check_sdata_in_driver(sdata);
-
-	if (local->ops->sta_remove_debugfs)
-		local->ops->sta_remove_debugfs(&local->hw, &sdata->vif,
-					       sta, dir);
-}
 #endif
 
 static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local,
@@ -582,6 +569,9 @@
 void drv_set_tsf(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
 		 u64 tsf);
+void drv_offset_tsf(struct ieee80211_local *local,
+		    struct ieee80211_sub_if_data *sdata,
+		    s64 offset);
 void drv_reset_tsf(struct ieee80211_local *local,
 		   struct ieee80211_sub_if_data *sdata);
 
@@ -1088,13 +1078,13 @@
 }
 
 static inline u32 drv_get_expected_throughput(struct ieee80211_local *local,
-					      struct ieee80211_sta *sta)
+					      struct sta_info *sta)
 {
 	u32 ret = 0;
 
-	trace_drv_get_expected_throughput(sta);
-	if (local->ops->get_expected_throughput)
-		ret = local->ops->get_expected_throughput(sta);
+	trace_drv_get_expected_throughput(&sta->sta);
+	if (local->ops->get_expected_throughput && sta->uploaded)
+		ret = local->ops->get_expected_throughput(&local->hw, &sta->sta);
 	trace_drv_return_u32(local, ret);
 
 	return ret;
@@ -1179,4 +1169,83 @@
 	local->ops->wake_tx_queue(&local->hw, &txq->txq);
 }
 
+static inline int drv_start_nan(struct ieee80211_local *local,
+				struct ieee80211_sub_if_data *sdata,
+				struct cfg80211_nan_conf *conf)
+{
+	int ret;
+
+	might_sleep();
+	check_sdata_in_driver(sdata);
+
+	trace_drv_start_nan(local, sdata, conf);
+	ret = local->ops->start_nan(&local->hw, &sdata->vif, conf);
+	trace_drv_return_int(local, ret);
+	return ret;
+}
+
+static inline void drv_stop_nan(struct ieee80211_local *local,
+				struct ieee80211_sub_if_data *sdata)
+{
+	might_sleep();
+	check_sdata_in_driver(sdata);
+
+	trace_drv_stop_nan(local, sdata);
+	local->ops->stop_nan(&local->hw, &sdata->vif);
+	trace_drv_return_void(local);
+}
+
+static inline int drv_nan_change_conf(struct ieee80211_local *local,
+				       struct ieee80211_sub_if_data *sdata,
+				       struct cfg80211_nan_conf *conf,
+				       u32 changes)
+{
+	int ret;
+
+	might_sleep();
+	check_sdata_in_driver(sdata);
+
+	if (!local->ops->nan_change_conf)
+		return -EOPNOTSUPP;
+
+	trace_drv_nan_change_conf(local, sdata, conf, changes);
+	ret = local->ops->nan_change_conf(&local->hw, &sdata->vif, conf,
+					  changes);
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
+
+static inline int drv_add_nan_func(struct ieee80211_local *local,
+				   struct ieee80211_sub_if_data *sdata,
+				   const struct cfg80211_nan_func *nan_func)
+{
+	int ret;
+
+	might_sleep();
+	check_sdata_in_driver(sdata);
+
+	if (!local->ops->add_nan_func)
+		return -EOPNOTSUPP;
+
+	trace_drv_add_nan_func(local, sdata, nan_func);
+	ret = local->ops->add_nan_func(&local->hw, &sdata->vif, nan_func);
+	trace_drv_return_int(local, ret);
+
+	return ret;
+}
+
+static inline void drv_del_nan_func(struct ieee80211_local *local,
+				   struct ieee80211_sub_if_data *sdata,
+				   u8 instance_id)
+{
+	might_sleep();
+	check_sdata_in_driver(sdata);
+
+	trace_drv_del_nan_func(local, sdata, instance_id);
+	if (local->ops->del_nan_func)
+		local->ops->del_nan_func(&local->hw, &sdata->vif, instance_id);
+	trace_drv_return_void(local);
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f56d342..34c2add 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -3,7 +3,7 @@
  * Copyright 2005, Devicescape Software, Inc.
  * Copyright 2006-2007	Jiri Benc <jbenc@suse.cz>
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
- * Copyright 2013-2014  Intel Mobile Communications GmbH
+ * Copyright 2013-2015  Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -86,6 +86,8 @@
 
 #define IEEE80211_DEAUTH_FRAME_LEN	(24 /* hdr */ + 2 /* reason */)
 
+#define IEEE80211_MAX_NAN_INSTANCE_ID 255
+
 struct ieee80211_fragment_entry {
 	struct sk_buff_head skb_list;
 	unsigned long first_frag_time;
@@ -813,17 +815,39 @@
  * @def_flow: used as a fallback flow when a packet destined to @tin hashes to
  *	a fq_flow which is already owned by a different tin
  * @def_cvars: codel vars for @def_flow
+ * @frags: used to keep fragments created after dequeue
  */
 struct txq_info {
 	struct fq_tin tin;
 	struct fq_flow def_flow;
 	struct codel_vars def_cvars;
+	struct codel_stats cstats;
+	struct sk_buff_head frags;
 	unsigned long flags;
 
 	/* keep last! */
 	struct ieee80211_txq txq;
 };
 
+struct ieee80211_if_mntr {
+	u32 flags;
+	u8 mu_follow_addr[ETH_ALEN] __aligned(2);
+};
+
+/**
+ * struct ieee80211_if_nan - NAN state
+ *
+ * @conf: current NAN configuration
+ * @func_ids: a bitmap of available instance_id's
+ */
+struct ieee80211_if_nan {
+	struct cfg80211_nan_conf conf;
+
+	/* protects function_inst_ids */
+	spinlock_t func_lock;
+	struct idr function_inst_ids;
+};
+
 struct ieee80211_sub_if_data {
 	struct list_head list;
 
@@ -922,7 +946,8 @@
 		struct ieee80211_if_ibss ibss;
 		struct ieee80211_if_mesh mesh;
 		struct ieee80211_if_ocb ocb;
-		u32 mntr_flags;
+		struct ieee80211_if_mntr mntr;
+		struct ieee80211_if_nan nan;
 	} u;
 
 #ifdef CONFIG_MAC80211_DEBUGFS
@@ -1112,7 +1137,6 @@
 	struct fq fq;
 	struct codel_vars *cvars;
 	struct codel_params cparams;
-	struct codel_stats cstats;
 
 	const struct ieee80211_ops *ops;
 
@@ -1208,7 +1232,7 @@
 	spinlock_t tim_lock;
 	unsigned long num_sta;
 	struct list_head sta_list;
-	struct rhashtable sta_hash;
+	struct rhltable sta_hash;
 	struct timer_list sta_cleanup;
 	int sta_generation;
 
@@ -1476,6 +1500,13 @@
 	return container_of(txq, struct txq_info, txq);
 }
 
+static inline bool txq_has_queue(struct ieee80211_txq *txq)
+{
+	struct txq_info *txqi = to_txq_info(txq);
+
+	return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets);
+}
+
 static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
 {
 	return ether_addr_equal(raddr, addr) ||
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b123a9e..638ec07 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -43,6 +43,8 @@
  * by either the RTNL, the iflist_mtx or RCU.
  */
 
+static void ieee80211_iface_work(struct work_struct *work);
+
 bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
 {
 	struct ieee80211_chanctx_conf *chanctx_conf;
@@ -188,7 +190,7 @@
 			continue;
 
 		if (iter->vif.type == NL80211_IFTYPE_MONITOR &&
-		    !(iter->u.mntr_flags & MONITOR_FLAG_ACTIVE))
+		    !(iter->u.mntr.flags & MONITOR_FLAG_ACTIVE))
 			continue;
 
 		m = iter->vif.addr;
@@ -217,7 +219,7 @@
 		return -EBUSY;
 
 	if (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
-	    !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
+	    !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
 		check_dup = false;
 
 	ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup);
@@ -325,6 +327,9 @@
 	int n_queues = sdata->local->hw.queues;
 	int i;
 
+	if (iftype == NL80211_IFTYPE_NAN)
+		return 0;
+
 	if (iftype != NL80211_IFTYPE_P2P_DEVICE) {
 		for (i = 0; i < IEEE80211_NUM_ACS; i++) {
 			if (WARN_ON_ONCE(sdata->vif.hw_queue[i] ==
@@ -357,7 +362,7 @@
 				    const int offset)
 {
 	struct ieee80211_local *local = sdata->local;
-	u32 flags = sdata->u.mntr_flags;
+	u32 flags = sdata->u.mntr.flags;
 
 #define ADJUST(_f, _s)	do {					\
 	if (flags & MONITOR_FLAG_##_f)				\
@@ -448,6 +453,9 @@
 		return ret;
 	}
 
+	skb_queue_head_init(&sdata->skb_queue);
+	INIT_WORK(&sdata->work, ieee80211_iface_work);
+
 	return 0;
 }
 
@@ -540,6 +548,7 @@
 	case NL80211_IFTYPE_ADHOC:
 	case NL80211_IFTYPE_P2P_DEVICE:
 	case NL80211_IFTYPE_OCB:
+	case NL80211_IFTYPE_NAN:
 		/* no special treatment */
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
@@ -589,12 +598,12 @@
 		}
 		break;
 	case NL80211_IFTYPE_MONITOR:
-		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) {
+		if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) {
 			local->cooked_mntrs++;
 			break;
 		}
 
-		if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) {
+		if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
 			res = drv_add_interface(local, sdata);
 			if (res)
 				goto err_stop;
@@ -641,7 +650,8 @@
 			local->fif_probe_req++;
 		}
 
-		if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE)
+		if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+		    sdata->vif.type != NL80211_IFTYPE_NAN)
 			changed |= ieee80211_reset_erp_info(sdata);
 		ieee80211_bss_info_change_notify(sdata, changed);
 
@@ -655,6 +665,7 @@
 			break;
 		case NL80211_IFTYPE_WDS:
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			break;
 		default:
 			/* not reached */
@@ -787,6 +798,7 @@
 	struct ps_data *ps;
 	struct cfg80211_chan_def chandef;
 	bool cancel_scan;
+	struct cfg80211_nan_func *func;
 
 	clear_bit(SDATA_STATE_RUNNING, &sdata->state);
 
@@ -926,7 +938,7 @@
 		/* no need to tell driver */
 		break;
 	case NL80211_IFTYPE_MONITOR:
-		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) {
+		if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) {
 			local->cooked_mntrs--;
 			break;
 		}
@@ -939,6 +951,18 @@
 
 		ieee80211_adjust_monitor_flags(sdata, -1);
 		break;
+	case NL80211_IFTYPE_NAN:
+		/* clean all the functions */
+		spin_lock_bh(&sdata->u.nan.func_lock);
+
+		idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) {
+			idr_remove(&sdata->u.nan.function_inst_ids, i);
+			cfg80211_free_nan_func(func);
+		}
+		idr_destroy(&sdata->u.nan.function_inst_ids);
+
+		spin_unlock_bh(&sdata->u.nan.func_lock);
+		break;
 	case NL80211_IFTYPE_P2P_DEVICE:
 		/* relies on synchronize_rcu() below */
 		RCU_INIT_POINTER(local->p2p_sdata, NULL);
@@ -1012,7 +1036,7 @@
 		ieee80211_recalc_idle(local);
 		mutex_unlock(&local->mtx);
 
-		if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
+		if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
 			break;
 
 		/* fall through */
@@ -1444,12 +1468,17 @@
 	case NL80211_IFTYPE_MONITOR:
 		sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP;
 		sdata->dev->netdev_ops = &ieee80211_monitorif_ops;
-		sdata->u.mntr_flags = MONITOR_FLAG_CONTROL |
+		sdata->u.mntr.flags = MONITOR_FLAG_CONTROL |
 				      MONITOR_FLAG_OTHER_BSS;
 		break;
 	case NL80211_IFTYPE_WDS:
 		sdata->vif.bss_conf.bssid = NULL;
 		break;
+	case NL80211_IFTYPE_NAN:
+		idr_init(&sdata->u.nan.function_inst_ids);
+		spin_lock_init(&sdata->u.nan.func_lock);
+		sdata->vif.bss_conf.bssid = sdata->vif.addr;
+		break;
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_P2P_DEVICE:
 		sdata->vif.bss_conf.bssid = sdata->vif.addr;
@@ -1717,7 +1746,7 @@
 
 	ASSERT_RTNL();
 
-	if (type == NL80211_IFTYPE_P2P_DEVICE) {
+	if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) {
 		struct wireless_dev *wdev;
 
 		sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size,
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index d00ea9b..1075ac2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -660,6 +660,9 @@
 
 	ieee80211_roc_setup(local);
 
+	local->hw.radiotap_timestamp.units_pos = -1;
+	local->hw.radiotap_timestamp.accuracy = -1;
+
 	return &local->hw;
  err_free:
 	wiphy_free(wiphy);
@@ -818,6 +821,11 @@
 	     !local->ops->tdls_recv_channel_switch))
 		return -EOPNOTSUPP;
 
+	if (WARN_ON(local->hw.wiphy->interface_modes &
+			BIT(NL80211_IFTYPE_NAN) &&
+		    (!local->ops->start_nan || !local->ops->stop_nan)))
+		return -EINVAL;
+
 #ifdef CONFIG_PM
 	if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume))
 		return -EINVAL;
@@ -1055,6 +1063,9 @@
 
 	local->dynamic_ps_forced_timeout = -1;
 
+	if (!local->hw.max_nan_de_entries)
+		local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID;
+
 	result = ieee80211_wep_init(local);
 	if (result < 0)
 		wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index c66411d..42120d9 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -881,20 +881,22 @@
 
 	netif_carrier_off(sdata->dev);
 
+	/* flush STAs and mpaths on this iface */
+	sta_info_flush(sdata);
+	mesh_path_flush_by_iface(sdata);
+
 	/* stop the beacon */
 	ifmsh->mesh_id_len = 0;
 	sdata->vif.bss_conf.enable_beacon = false;
 	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+
+	/* remove beacon */
 	bcn = rcu_dereference_protected(ifmsh->beacon,
 					lockdep_is_held(&sdata->wdev.mtx));
 	RCU_INIT_POINTER(ifmsh->beacon, NULL);
 	kfree_rcu(bcn, rcu_head);
 
-	/* flush STAs and mpaths on this iface */
-	sta_info_flush(sdata);
-	mesh_path_flush_by_iface(sdata);
-
 	/* free all potentially still buffered group-addressed frames */
 	local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf);
 	skb_queue_purge(&ifmsh->ps.bc_buf);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 8f9c3bd..b747c96 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -326,22 +326,33 @@
 	u32 tx_time, estimated_retx;
 	u64 result;
 
-	if (sta->mesh->fail_avg >= 100)
-		return MAX_METRIC;
+	/* Try to get rate based on HW/SW RC algorithm.
+	 * Rate is returned in units of Kbps, correct this
+	 * to comply with airtime calculation units
+	 * Round up in case we get rate < 100Kbps
+	 */
+	rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100);
 
-	sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
-	rate = cfg80211_calculate_bitrate(&rinfo);
-	if (WARN_ON(!rate))
-		return MAX_METRIC;
+	if (rate) {
+		err = 0;
+	} else {
+		if (sta->mesh->fail_avg >= 100)
+			return MAX_METRIC;
 
-	err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+		sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+		rate = cfg80211_calculate_bitrate(&rinfo);
+		if (WARN_ON(!rate))
+			return MAX_METRIC;
+
+		err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+	}
 
 	/* bitrate is in units of 100 Kbps, while we need rate in units of
 	 * 1Mbps. This will be corrected on tx_time computation.
 	 */
 	tx_time = (device_constant + 10 * test_frame_len / rate);
 	estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
-	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
+	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
 	return (u32)result;
 }
 
@@ -746,6 +757,7 @@
 		sta = next_hop_deref_protected(mpath);
 		if (mpath->flags & MESH_PATH_ACTIVE &&
 		    ether_addr_equal(ta, sta->sta.addr) &&
+		    !(mpath->flags & MESH_PATH_FIXED) &&
 		    (!(mpath->flags & MESH_PATH_SN_VALID) ||
 		    SN_GT(target_sn, mpath->sn)  || target_sn == 0)) {
 			mpath->flags &= ~MESH_PATH_ACTIVE;
@@ -1012,7 +1024,7 @@
 		goto enddiscovery;
 
 	spin_lock_bh(&mpath->state_lock);
-	if (mpath->flags & MESH_PATH_DELETED) {
+	if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) {
 		spin_unlock_bh(&mpath->state_lock);
 		goto enddiscovery;
 	}
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 6db2ddf..f0e6175 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -826,7 +826,7 @@
 	mpath->metric = 0;
 	mpath->hop_count = 0;
 	mpath->exp_time = 0;
-	mpath->flags |= MESH_PATH_FIXED;
+	mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID;
 	mesh_path_activate(mpath);
 	spin_unlock_bh(&mpath->state_lock);
 	mesh_path_tx_pending(mpath);
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 64bc22a..faca22c 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -28,7 +28,7 @@
  * could be, for instance, in case a neighbor is restarted and its TSF counter
  * reset.
  */
-#define TOFFSET_MAXIMUM_ADJUSTMENT 30000		/* 30 ms */
+#define TOFFSET_MAXIMUM_ADJUSTMENT 800		/* 0.8 ms */
 
 struct sync_method {
 	u8 method;
@@ -70,9 +70,13 @@
 	}
 	spin_unlock_bh(&ifmsh->sync_offset_lock);
 
-	tsf = drv_get_tsf(local, sdata);
-	if (tsf != -1ULL)
-		drv_set_tsf(local, sdata, tsf + tsfdelta);
+	if (local->ops->offset_tsf) {
+		drv_offset_tsf(local, sdata, tsfdelta);
+	} else {
+		tsf = drv_get_tsf(local, sdata);
+		if (tsf != -1ULL)
+			drv_set_tsf(local, sdata, tsf + tsfdelta);
+	}
 }
 
 static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8d426f6..7486f2d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1672,11 +1672,15 @@
 			     non_acm_ac++)
 				if (!(sdata->wmm_acm & BIT(7 - 2 * non_acm_ac)))
 					break;
-			/* The loop will result in using BK even if it requires
-			 * admission control, such configuration makes no sense
-			 * and we have to transmit somehow - the AC selection
-			 * does the same thing.
+			/* Usually the loop will result in using BK even if it
+			 * requires admission control, but such a configuration
+			 * makes no sense and we have to transmit somehow - the
+			 * AC selection does the same thing.
+			 * If we started out trying to downgrade from BK, then
+			 * the extra condition here might be needed.
 			 */
+			if (non_acm_ac >= IEEE80211_NUM_ACS)
+				non_acm_ac = IEEE80211_AC_BK;
 			if (drv_conf_tx(local, sdata, ac,
 					&sdata->tx_conf[non_acm_ac]))
 				sdata_err(sdata,
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index 55a9c5b..c3f610b 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -128,7 +128,8 @@
 		if (!ieee80211_sdata_running(sdata))
 			continue;
 
-		if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE)
+		if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE ||
+		    sdata->vif.type == NL80211_IFTYPE_NAN)
 			continue;
 
 		if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
@@ -838,6 +839,7 @@
 	case NL80211_IFTYPE_P2P_DEVICE:
 		need_offchan = true;
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 00a43a7..28a3a09 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -178,8 +178,7 @@
 	WARN_ON(!list_empty(&local->chanctx_list));
 
 	/* stop hardware - this must stop RX */
-	if (local->open_count)
-		ieee80211_stop_device(local);
+	ieee80211_stop_device(local);
 
  suspend:
 	local->suspended = true;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2e8a902..6175db3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -180,6 +180,11 @@
 		len += 12;
 	}
 
+	if (local->hw.radiotap_timestamp.units_pos >= 0) {
+		len = ALIGN(len, 8);
+		len += 12;
+	}
+
 	if (status->chains) {
 		/* antenna and antenna signal fields */
 		len += 2 * hweight8(status->chains);
@@ -447,6 +452,31 @@
 		pos += 2;
 	}
 
+	if (local->hw.radiotap_timestamp.units_pos >= 0) {
+		u16 accuracy = 0;
+		u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT;
+
+		rthdr->it_present |=
+			cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP);
+
+		/* ensure 8 byte alignment */
+		while ((pos - (u8 *)rthdr) & 7)
+			pos++;
+
+		put_unaligned_le64(status->device_timestamp, pos);
+		pos += sizeof(u64);
+
+		if (local->hw.radiotap_timestamp.accuracy >= 0) {
+			accuracy = local->hw.radiotap_timestamp.accuracy;
+			flags |= IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY;
+		}
+		put_unaligned_le16(accuracy, pos);
+		pos += sizeof(u16);
+
+		*pos++ = local->hw.radiotap_timestamp.units_pos;
+		*pos++ = flags;
+	}
+
 	for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) {
 		*pos++ = status->chain_signal[chain];
 		*pos++ = chain;
@@ -485,6 +515,9 @@
 	struct net_device *prev_dev = NULL;
 	int present_fcs_len = 0;
 	unsigned int rtap_vendor_space = 0;
+	struct ieee80211_mgmt *mgmt;
+	struct ieee80211_sub_if_data *monitor_sdata =
+		rcu_dereference(local->monitor_sdata);
 
 	if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) {
 		struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data;
@@ -567,7 +600,7 @@
 		if (sdata->vif.type != NL80211_IFTYPE_MONITOR)
 			continue;
 
-		if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)
+		if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)
 			continue;
 
 		if (!ieee80211_sdata_running(sdata))
@@ -585,6 +618,23 @@
 		ieee80211_rx_stats(sdata->dev, skb->len);
 	}
 
+	mgmt = (void *)skb->data;
+	if (monitor_sdata &&
+	    skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN &&
+	    ieee80211_is_action(mgmt->frame_control) &&
+	    mgmt->u.action.category == WLAN_CATEGORY_VHT &&
+	    mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT &&
+	    is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) &&
+	    ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) {
+		struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC);
+
+		if (mu_skb) {
+			mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
+			skb_queue_tail(&monitor_sdata->skb_queue, mu_skb);
+			ieee80211_queue_work(&local->hw, &monitor_sdata->work);
+		}
+	}
+
 	if (prev_dev) {
 		skb->dev = prev_dev;
 		netif_receive_skb(skb);
@@ -1072,8 +1122,15 @@
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
 	tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
-	if (!tid_agg_rx)
+	if (!tid_agg_rx) {
+		if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
+		    !test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) &&
+		    !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg))
+			ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid,
+					     WLAN_BACK_RECIPIENT,
+					     WLAN_REASON_QSTA_REQUIRE_SETUP);
 		goto dont_reorder;
+	}
 
 	/* qos null data frames are excluded */
 	if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC)))
@@ -1266,9 +1323,7 @@
 		return;
 
 	for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
-		struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
-
-		if (!txqi->tin.backlog_packets)
+		if (txq_has_queue(sta->sta.txq[tid]))
 			set_bit(tid, &sta->txq_buffered_tids);
 		else
 			clear_bit(tid, &sta->txq_buffered_tids);
@@ -2535,6 +2590,12 @@
 
 		tid = le16_to_cpu(bar_data.control) >> 12;
 
+		if (!test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) &&
+		    !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg))
+			ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid,
+					     WLAN_BACK_RECIPIENT,
+					     WLAN_REASON_QSTA_REQUIRE_SETUP);
+
 		tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]);
 		if (!tid_agg_rx)
 			return RX_DROP_MONITOR;
@@ -3147,7 +3208,7 @@
 			continue;
 
 		if (sdata->vif.type != NL80211_IFTYPE_MONITOR ||
-		    !(sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES))
+		    !(sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES))
 			continue;
 
 		if (prev_dev) {
@@ -3523,6 +3584,9 @@
 		       ieee80211_is_probe_req(hdr->frame_control) ||
 		       ieee80211_is_probe_resp(hdr->frame_control) ||
 		       ieee80211_is_beacon(hdr->frame_control);
+	case NL80211_IFTYPE_NAN:
+		/* Currently no frames on NAN interface are allowed */
+		return false;
 	default:
 		break;
 	}
@@ -3940,7 +4004,7 @@
 	__le16 fc;
 	struct ieee80211_rx_data rx;
 	struct ieee80211_sub_if_data *prev;
-	struct rhash_head *tmp;
+	struct rhlist_head *tmp;
 	int err = 0;
 
 	fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
@@ -3983,13 +4047,10 @@
 		goto out;
 	} else if (ieee80211_is_data(fc)) {
 		struct sta_info *sta, *prev_sta;
-		const struct bucket_table *tbl;
 
 		prev_sta = NULL;
 
-		tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
-
-		for_each_sta_info(local, tbl, hdr->addr2, sta, tmp) {
+		for_each_sta_info(local, hdr->addr2, sta, tmp) {
 			if (!prev_sta) {
 				prev_sta = sta;
 				continue;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 070b40f..23d8ac8 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -420,7 +420,7 @@
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
-	trace_api_scan_completed(local, info);
+	trace_api_scan_completed(local, info->aborted);
 
 	set_bit(SCAN_COMPLETED, &local->scanning);
 	if (info->aborted)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 76b737d..78e9ecb 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -67,12 +67,10 @@
 
 static const struct rhashtable_params sta_rht_params = {
 	.nelem_hint = 3, /* start small */
-	.insecure_elasticity = true, /* Disable chain-length checks. */
 	.automatic_shrinking = true,
 	.head_offset = offsetof(struct sta_info, hash_node),
 	.key_offset = offsetof(struct sta_info, addr),
 	.key_len = ETH_ALEN,
-	.hashfn = sta_addr_hash,
 	.max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
 };
 
@@ -80,8 +78,8 @@
 static int sta_info_hash_del(struct ieee80211_local *local,
 			     struct sta_info *sta)
 {
-	return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node,
-				      sta_rht_params);
+	return rhltable_remove(&local->sta_hash, &sta->hash_node,
+			       sta_rht_params);
 }
 
 static void __cleanup_single_sta(struct sta_info *sta)
@@ -157,19 +155,22 @@
 	sta_info_free(local, sta);
 }
 
+struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local,
+					 const u8 *addr)
+{
+	return rhltable_lookup(&local->sta_hash, addr, sta_rht_params);
+}
+
 /* protected by RCU */
 struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
 			      const u8 *addr)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct rhlist_head *tmp;
 	struct sta_info *sta;
-	struct rhash_head *tmp;
-	const struct bucket_table *tbl;
 
 	rcu_read_lock();
-	tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
-
-	for_each_sta_info(local, tbl, addr, sta, tmp) {
+	for_each_sta_info(local, addr, sta, tmp) {
 		if (sta->sdata == sdata) {
 			rcu_read_unlock();
 			/* this is safe as the caller must already hold
@@ -190,14 +191,11 @@
 				  const u8 *addr)
 {
 	struct ieee80211_local *local = sdata->local;
+	struct rhlist_head *tmp;
 	struct sta_info *sta;
-	struct rhash_head *tmp;
-	const struct bucket_table *tbl;
 
 	rcu_read_lock();
-	tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
-
-	for_each_sta_info(local, tbl, addr, sta, tmp) {
+	for_each_sta_info(local, addr, sta, tmp) {
 		if (sta->sdata == sdata ||
 		    (sta->sdata->bss && sta->sdata->bss == sdata->bss)) {
 			rcu_read_unlock();
@@ -263,8 +261,8 @@
 static int sta_info_hash_add(struct ieee80211_local *local,
 			     struct sta_info *sta)
 {
-	return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
-				      sta_rht_params);
+	return rhltable_insert(&local->sta_hash, &sta->hash_node,
+			       sta_rht_params);
 }
 
 static void sta_deliver_ps_frames(struct work_struct *wk)
@@ -340,6 +338,9 @@
 
 	memcpy(sta->addr, addr, ETH_ALEN);
 	memcpy(sta->sta.addr, addr, ETH_ALEN);
+	sta->sta.max_rx_aggregation_subframes =
+		local->hw.max_rx_aggregation_subframes;
+
 	sta->local = local;
 	sta->sdata = sdata;
 	sta->rx_stats.last_rx = jiffies;
@@ -450,9 +451,9 @@
 		    is_multicast_ether_addr(sta->sta.addr)))
 		return -EINVAL;
 
-	/* Strictly speaking this isn't necessary as we hold the mutex, but
-	 * the rhashtable code can't really deal with that distinction. We
-	 * do require the mutex for correctness though.
+	/* The RCU read lock is required by rhashtable due to
+	 * asynchronous resize/rehash.  We also require the mutex
+	 * for correctness.
 	 */
 	rcu_read_lock();
 	lockdep_assert_held(&sdata->local->sta_mtx);
@@ -687,7 +688,7 @@
 	}
 
 	/* No need to do anything if the driver does all */
-	if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
+	if (!local->ops->set_tim)
 		return;
 
 	if (sta->dead)
@@ -1040,16 +1041,11 @@
 		  round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL));
 }
 
-u32 sta_addr_hash(const void *key, u32 length, u32 seed)
-{
-	return jhash(key, ETH_ALEN, seed);
-}
-
 int sta_info_init(struct ieee80211_local *local)
 {
 	int err;
 
-	err = rhashtable_init(&local->sta_hash, &sta_rht_params);
+	err = rhltable_init(&local->sta_hash, &sta_rht_params);
 	if (err)
 		return err;
 
@@ -1065,7 +1061,7 @@
 void sta_info_stop(struct ieee80211_local *local)
 {
 	del_timer_sync(&local->sta_cleanup);
-	rhashtable_destroy(&local->sta_hash);
+	rhltable_destroy(&local->sta_hash);
 }
 
 
@@ -1135,17 +1131,14 @@
 						   const u8 *localaddr)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	struct rhlist_head *tmp;
 	struct sta_info *sta;
-	struct rhash_head *tmp;
-	const struct bucket_table *tbl;
-
-	tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
 
 	/*
 	 * Just return a random station if localaddr is NULL
 	 * ... first in list.
 	 */
-	for_each_sta_info(local, tbl, addr, sta, tmp) {
+	for_each_sta_info(local, addr, sta, tmp) {
 		if (localaddr &&
 		    !ether_addr_equal(sta->sdata->vif.addr, localaddr))
 			continue;
@@ -1209,12 +1202,10 @@
 
 	if (sta->sta.txq[0]) {
 		for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
-			struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
-
-			if (!txqi->tin.backlog_packets)
+			if (!txq_has_queue(sta->sta.txq[i]))
 				continue;
 
-			drv_wake_tx_queue(local, txqi);
+			drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i]));
 		}
 	}
 
@@ -1616,7 +1607,6 @@
 
 		sta_info_recalc_tim(sta);
 	} else {
-		unsigned long tids = sta->txq_buffered_tids & driver_release_tids;
 		int tid;
 
 		/*
@@ -1646,9 +1636,8 @@
 			return;
 
 		for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
-			struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
-
-			if (!(tids & BIT(tid)) || txqi->tin.backlog_packets)
+			if (!(driver_release_tids & BIT(tid)) ||
+			    txq_has_queue(sta->sta.txq[tid]))
 				continue;
 
 			sta_info_recalc_tim(sta);
@@ -2279,11 +2268,7 @@
 	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
 		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
 
-	/* check if the driver has a SW RC implementation */
-	if (ref && ref->ops->get_expected_throughput)
-		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
-	else
-		thr = drv_get_expected_throughput(local, &sta->sta);
+	thr = sta_get_expected_throughput(sta);
 
 	if (thr != 0) {
 		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
@@ -2291,6 +2276,25 @@
 	}
 }
 
+u32 sta_get_expected_throughput(struct sta_info *sta)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sdata->local;
+	struct rate_control_ref *ref = NULL;
+	u32 thr = 0;
+
+	if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+		ref = local->rate_ctrl;
+
+	/* check if the driver has a SW RC implementation */
+	if (ref && ref->ops->get_expected_throughput)
+		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+	else
+		thr = drv_get_expected_throughput(local, sta);
+
+	return thr;
+}
+
 unsigned long ieee80211_sta_last_active(struct sta_info *sta)
 {
 	struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 78b0ef3..ed5fcb9 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -230,6 +230,8 @@
  * @tid_rx_stop_requested:  bitmap indicating which BA sessions per TID the
  *	driver requested to close until the work for it runs
  * @agg_session_valid: bitmap indicating which TID has a rx BA session open on
+ * @unexpected_agg: bitmap indicating which TID already sent a delBA due to
+ *	unexpected aggregation related frames outside a session
  * @work: work struct for starting/stopping aggregation
  * @tid_tx: aggregation info for Tx per TID
  * @tid_start_tx: sessions where start was requested
@@ -244,6 +246,7 @@
 	unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
 	unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
 	unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
+	unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)];
 	/* tx */
 	struct work_struct work;
 	struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS];
@@ -452,7 +455,7 @@
 	/* General information, mostly static */
 	struct list_head list, free_list;
 	struct rcu_head rcu_head;
-	struct rhash_head hash_node;
+	struct rhlist_head hash_node;
 	u8 addr[ETH_ALEN];
 	struct ieee80211_local *local;
 	struct ieee80211_sub_if_data *sdata;
@@ -635,6 +638,9 @@
  */
 #define STA_INFO_CLEANUP_INTERVAL (10 * HZ)
 
+struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local,
+					 const u8 *addr);
+
 /*
  * Get a STA info, must be under RCU read lock.
  */
@@ -644,17 +650,9 @@
 struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
 				  const u8 *addr);
 
-u32 sta_addr_hash(const void *key, u32 length, u32 seed);
-
-#define _sta_bucket_idx(_tbl, _a)					\
-	rht_bucket_index(_tbl, sta_addr_hash(_a, ETH_ALEN, (_tbl)->hash_rnd))
-
-#define for_each_sta_info(local, tbl, _addr, _sta, _tmp)		\
-	rht_for_each_entry_rcu(_sta, _tmp, tbl, 			\
-			       _sta_bucket_idx(tbl, _addr),		\
-			       hash_node)				\
-	/* compare address and run code only if it matches */		\
-	if (ether_addr_equal(_sta->addr, (_addr)))
+#define for_each_sta_info(local, _addr, _sta, _tmp)			\
+	rhl_for_each_entry_rcu(_sta, _tmp,				\
+			       sta_info_hash_lookup(local, _addr), hash_node)
 
 /*
  * Get STA info by index, BROKEN!
@@ -712,6 +710,8 @@
 			  struct rate_info *rinfo);
 void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo);
 
+u32 sta_get_expected_throughput(struct sta_info *sta);
+
 void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
 			  unsigned long exp_time);
 u8 sta_info_tx_streams(struct sta_info *sta);
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index c6d5c72..ddf71c6 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -557,6 +557,12 @@
 static void ieee80211_lost_packet(struct sta_info *sta,
 				  struct ieee80211_tx_info *info)
 {
+	/* If driver relies on its own algorithm for station kickout, skip
+	 * mac80211 packet loss mechanism.
+	 */
+	if (ieee80211_hw_check(&sta->local->hw, REPORTS_LOW_ACK))
+		return;
+
 	/* This packet was aggregated but doesn't carry status info */
 	if ((info->flags & IEEE80211_TX_CTL_AMPDU) &&
 	    !(info->flags & IEEE80211_TX_STAT_AMPDU))
@@ -709,7 +715,7 @@
 			if (!ieee80211_sdata_running(sdata))
 				continue;
 
-			if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
+			if ((sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) &&
 			    !send_to_cooked)
 				continue;
 
@@ -740,8 +746,8 @@
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	__le16 fc;
 	struct ieee80211_supported_band *sband;
+	struct rhlist_head *tmp;
 	struct sta_info *sta;
-	struct rhash_head *tmp;
 	int retry_count;
 	int rates_idx;
 	bool send_to_cooked;
@@ -749,7 +755,6 @@
 	struct ieee80211_bar *bar;
 	int shift = 0;
 	int tid = IEEE80211_NUM_TIDS;
-	const struct bucket_table *tbl;
 
 	rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
 
@@ -758,9 +763,7 @@
 	sband = local->hw.wiphy->bands[info->band];
 	fc = hdr->frame_control;
 
-	tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
-
-	for_each_sta_info(local, tbl, hdr->addr1, sta, tmp) {
+	for_each_sta_info(local, hdr->addr1, sta, tmp) {
 		/* skip wrong virtual interface */
 		if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
 			continue;
@@ -771,6 +774,13 @@
 			clear_sta_flag(sta, WLAN_STA_SP);
 
 		acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+
+		/* mesh Peer Service Period support */
+		if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
+		    ieee80211_is_data_qos(fc))
+			ieee80211_mpsp_trigger_process(
+				ieee80211_get_qos_ctl(hdr), sta, true, acked);
+
 		if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) {
 			/*
 			 * The STA is in power save mode, so assume
@@ -781,13 +791,6 @@
 			return;
 		}
 
-		/* mesh Peer Service Period support */
-		if (ieee80211_vif_is_mesh(&sta->sdata->vif) &&
-		    ieee80211_is_data_qos(fc))
-			ieee80211_mpsp_trigger_process(
-					ieee80211_get_qos_ctl(hdr),
-					sta, true, acked);
-
 		if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) &&
 		    (ieee80211_is_data(hdr->frame_control)) &&
 		    (rates_idx != -1))
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index b5d28f1..afca7d1 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -333,10 +333,11 @@
 	if (!uc.center_freq1)
 		return;
 
-	/* proceed to downgrade the chandef until usable or the same */
+	/* proceed to downgrade the chandef until usable or the same as AP BW */
 	while (uc.width > max_width ||
-	       !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
-					      sdata->wdev.iftype))
+	       (uc.width > sta->tdls_chandef.width &&
+		!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
+					       sdata->wdev.iftype)))
 		ieee80211_chandef_downgrade(&uc);
 
 	if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 77e4c53..92a47af 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -984,6 +984,32 @@
 	)
 );
 
+TRACE_EVENT(drv_offset_tsf,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 s64 offset),
+
+	TP_ARGS(local, sdata, offset),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(s64, tsf_offset)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->tsf_offset = offset;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT  " tsf offset:%lld",
+		LOCAL_PR_ARG, VIF_PR_ARG,
+		(unsigned long long)__entry->tsf_offset
+	)
+);
+
 DEFINE_EVENT(local_sdata_evt, drv_reset_tsf,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata),
@@ -1700,6 +1726,139 @@
 	)
 );
 
+TRACE_EVENT(drv_start_nan,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct cfg80211_nan_conf *conf),
+
+	TP_ARGS(local, sdata, conf),
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(u8, master_pref)
+		__field(u8, dual)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->master_pref = conf->master_pref;
+		__entry->dual = conf->dual;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT
+		", master preference: %u, dual: %d",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref,
+		__entry->dual
+	)
+);
+
+TRACE_EVENT(drv_stop_nan,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata),
+
+	TP_ARGS(local, sdata),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT,
+		LOCAL_PR_ARG, VIF_PR_ARG
+	)
+);
+
+TRACE_EVENT(drv_nan_change_conf,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct cfg80211_nan_conf *conf,
+		 u32 changes),
+
+	TP_ARGS(local, sdata, conf, changes),
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(u8, master_pref)
+		__field(u8, dual)
+		__field(u32, changes)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->master_pref = conf->master_pref;
+		__entry->dual = conf->dual;
+		__entry->changes = changes;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT
+		", master preference: %u, dual: %d, changes: 0x%x",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref,
+		__entry->dual, __entry->changes
+	)
+);
+
+TRACE_EVENT(drv_add_nan_func,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 const struct cfg80211_nan_func *func),
+
+	TP_ARGS(local, sdata, func),
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(u8, type)
+		__field(u8, inst_id)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->type = func->type;
+		__entry->inst_id = func->instance_id;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT
+		", type: %u, inst_id: %u",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->type, __entry->inst_id
+	)
+);
+
+TRACE_EVENT(drv_del_nan_func,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 u8 instance_id),
+
+	TP_ARGS(local, sdata, instance_id),
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		VIF_ENTRY
+		__field(u8, instance_id)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		VIF_ASSIGN;
+		__entry->instance_id = instance_id;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT  VIF_PR_FMT
+		", instance_id: %u",
+		LOCAL_PR_ARG, VIF_PR_ARG, __entry->instance_id
+	)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 91461c4..1c56abc 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -368,7 +368,7 @@
 		skb = skb_dequeue(&ps->bc_buf);
 		if (skb) {
 			purged++;
-			dev_kfree_skb(skb);
+			ieee80211_free_txskb(&local->hw, skb);
 		}
 		total += skb_queue_len(&ps->bc_buf);
 	}
@@ -451,7 +451,7 @@
 	if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) {
 		ps_dbg(tx->sdata,
 		       "BC TX buffer full - dropping the oldest frame\n");
-		dev_kfree_skb(skb_dequeue(&ps->bc_buf));
+		ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf));
 	} else
 		tx->local->total_ps_buffered++;
 
@@ -853,8 +853,7 @@
 	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
 	tx->sta->tx_stats.msdu[tid]++;
 
-	if (!tx->sta->sta.txq[0])
-		hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
+	hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
 
 	return TX_CONTINUE;
 }
@@ -1343,7 +1342,7 @@
 	local = container_of(fq, struct ieee80211_local, fq);
 	txqi = container_of(tin, struct txq_info, tin);
 	cparams = &local->cparams;
-	cstats = &local->cstats;
+	cstats = &txqi->cstats;
 
 	if (flow == &txqi->def_flow)
 		cvars = &txqi->def_cvars;
@@ -1403,6 +1402,8 @@
 	fq_tin_init(&txqi->tin);
 	fq_flow_init(&txqi->def_flow);
 	codel_vars_init(&txqi->def_cvars);
+	codel_stats_init(&txqi->cstats);
+	__skb_queue_head_init(&txqi->frags);
 
 	txqi->txq.vif = &sdata->vif;
 
@@ -1425,6 +1426,7 @@
 	struct fq_tin *tin = &txqi->tin;
 
 	fq_tin_reset(fq, tin, fq_skb_free_func);
+	ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
 }
 
 int ieee80211_txq_setup_flows(struct ieee80211_local *local)
@@ -1432,6 +1434,8 @@
 	struct fq *fq = &local->fq;
 	int ret;
 	int i;
+	bool supp_vht = false;
+	enum nl80211_band band;
 
 	if (!local->ops->wake_tx_queue)
 		return 0;
@@ -1440,8 +1444,24 @@
 	if (ret)
 		return ret;
 
+	/*
+	 * If the hardware doesn't support VHT, it is safe to limit the maximum
+	 * queue size. 4 Mbytes is 64 max-size aggregates in 802.11n.
+	 */
+	for (band = 0; band < NUM_NL80211_BANDS; band++) {
+		struct ieee80211_supported_band *sband;
+
+		sband = local->hw.wiphy->bands[band];
+		if (!sband)
+			continue;
+
+		supp_vht = supp_vht || sband->vht_cap.vht_supported;
+	}
+
+	if (!supp_vht)
+		fq->memory_limit = 4 << 20; /* 4 Mbytes */
+
 	codel_params_init(&local->cparams);
-	codel_stats_init(&local->cstats);
 	local->cparams.interval = MS2TIME(100);
 	local->cparams.target = MS2TIME(20);
 	local->cparams.ecn = true;
@@ -1476,50 +1496,46 @@
 	spin_unlock_bh(&fq->lock);
 }
 
-struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
-				     struct ieee80211_txq *txq)
+static bool ieee80211_queue_skb(struct ieee80211_local *local,
+				struct ieee80211_sub_if_data *sdata,
+				struct sta_info *sta,
+				struct sk_buff *skb)
 {
-	struct ieee80211_local *local = hw_to_local(hw);
-	struct txq_info *txqi = container_of(txq, struct txq_info, txq);
-	struct ieee80211_hdr *hdr;
-	struct sk_buff *skb = NULL;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct fq *fq = &local->fq;
-	struct fq_tin *tin = &txqi->tin;
+	struct ieee80211_vif *vif;
+	struct txq_info *txqi;
+	struct ieee80211_sta *pubsta;
+
+	if (!local->ops->wake_tx_queue ||
+	    sdata->vif.type == NL80211_IFTYPE_MONITOR)
+		return false;
+
+	if (sta && sta->uploaded)
+		pubsta = &sta->sta;
+	else
+		pubsta = NULL;
+
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		sdata = container_of(sdata->bss,
+				     struct ieee80211_sub_if_data, u.ap);
+
+	vif = &sdata->vif;
+	txqi = ieee80211_get_txq(local, vif, pubsta, skb);
+
+	if (!txqi)
+		return false;
+
+	info->control.vif = vif;
 
 	spin_lock_bh(&fq->lock);
-
-	if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
-		goto out;
-
-	skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
-	if (!skb)
-		goto out;
-
-	ieee80211_set_skb_vif(skb, txqi);
-
-	hdr = (struct ieee80211_hdr *)skb->data;
-	if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
-		struct sta_info *sta = container_of(txq->sta, struct sta_info,
-						    sta);
-		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
-		hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid);
-		if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
-			info->flags |= IEEE80211_TX_CTL_AMPDU;
-		else
-			info->flags &= ~IEEE80211_TX_CTL_AMPDU;
-	}
-
-out:
+	ieee80211_txq_enqueue(local, txqi, skb);
 	spin_unlock_bh(&fq->lock);
 
-	if (skb && skb_has_frag_list(skb) &&
-	    !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
-		skb_linearize(skb);
+	drv_wake_tx_queue(local, txqi);
 
-	return skb;
+	return true;
 }
-EXPORT_SYMBOL(ieee80211_tx_dequeue);
 
 static bool ieee80211_tx_frags(struct ieee80211_local *local,
 			       struct ieee80211_vif *vif,
@@ -1528,9 +1544,7 @@
 			       bool txpending)
 {
 	struct ieee80211_tx_control control = {};
-	struct fq *fq = &local->fq;
 	struct sk_buff *skb, *tmp;
-	struct txq_info *txqi;
 	unsigned long flags;
 
 	skb_queue_walk_safe(skbs, skb, tmp) {
@@ -1545,21 +1559,6 @@
 		}
 #endif
 
-		txqi = ieee80211_get_txq(local, vif, sta, skb);
-		if (txqi) {
-			info->control.vif = vif;
-
-			__skb_unlink(skb, skbs);
-
-			spin_lock_bh(&fq->lock);
-			ieee80211_txq_enqueue(local, txqi, skb);
-			spin_unlock_bh(&fq->lock);
-
-			drv_wake_tx_queue(local, txqi);
-
-			continue;
-		}
-
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 		if (local->queue_stop_reasons[q] ||
 		    (!txpending && !skb_queue_empty(&local->pending[q]))) {
@@ -1643,7 +1642,7 @@
 
 	switch (sdata->vif.type) {
 	case NL80211_IFTYPE_MONITOR:
-		if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) {
+		if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
 			vif = &sdata->vif;
 			break;
 		}
@@ -1680,10 +1679,13 @@
 /*
  * Invoke TX handlers, return 0 on success and non-zero if the
  * frame was dropped or queued.
+ *
+ * The handlers are split into an early and late part. The latter is everything
+ * that can be sensitive to reordering, and will be deferred to after packets
+ * are dequeued from the intermediate queues (when they are enabled).
  */
-static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx)
 {
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	ieee80211_tx_result res = TX_DROP;
 
 #define CALL_TXH(txh) \
@@ -1701,6 +1703,31 @@
 	if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
 		CALL_TXH(ieee80211_tx_h_rate_ctrl);
 
+ txh_done:
+	if (unlikely(res == TX_DROP)) {
+		I802_DEBUG_INC(tx->local->tx_handlers_drop);
+		if (tx->skb)
+			ieee80211_free_txskb(&tx->local->hw, tx->skb);
+		else
+			ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
+		return -1;
+	} else if (unlikely(res == TX_QUEUED)) {
+		I802_DEBUG_INC(tx->local->tx_handlers_queued);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Late handlers can be called while the sta lock is held. Handlers that can
+ * cause packets to be generated will cause deadlock!
+ */
+static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+	ieee80211_tx_result res = TX_CONTINUE;
+
 	if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) {
 		__skb_queue_tail(&tx->skbs, tx->skb);
 		tx->skb = NULL;
@@ -1733,6 +1760,15 @@
 	return 0;
 }
 
+static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
+{
+	int r = invoke_tx_handlers_early(tx);
+
+	if (r)
+		return r;
+	return invoke_tx_handlers_late(tx);
+}
+
 bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
 			      struct ieee80211_vif *vif, struct sk_buff *skb,
 			      int band, struct ieee80211_sta **sta)
@@ -1807,7 +1843,13 @@
 		info->hw_queue =
 			sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
 
-	if (!invoke_tx_handlers(&tx))
+	if (invoke_tx_handlers_early(&tx))
+		return false;
+
+	if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb))
+		return true;
+
+	if (!invoke_tx_handlers_late(&tx))
 		result = __ieee80211_tx(local, &tx.skbs, led_len,
 					tx.sta, txpending);
 
@@ -2263,15 +2305,9 @@
 	case NL80211_IFTYPE_STATION:
 		if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) {
 			sta = sta_info_get(sdata, skb->data);
-			if (sta) {
-				bool tdls_peer, tdls_auth;
-
-				tdls_peer = test_sta_flag(sta,
-							  WLAN_STA_TDLS_PEER);
-				tdls_auth = test_sta_flag(sta,
-						WLAN_STA_TDLS_PEER_AUTH);
-
-				if (tdls_peer && tdls_auth) {
+			if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+				if (test_sta_flag(sta,
+						  WLAN_STA_TDLS_PEER_AUTH)) {
 					*sta_out = sta;
 					return 0;
 				}
@@ -2283,8 +2319,7 @@
 				 * after a TDLS sta is removed due to being
 				 * unreachable.
 				 */
-				if (tdls_peer && !tdls_auth &&
-				    !ieee80211_is_tdls_setup(skb))
+				if (!ieee80211_is_tdls_setup(skb))
 					return -EINVAL;
 			}
 
@@ -2334,7 +2369,6 @@
 	struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL;
 	const u8 *encaps_data;
 	int encaps_len, skip_header_bytes;
-	int nh_pos, h_pos;
 	bool wme_sta = false, authorized = false;
 	bool tdls_peer;
 	bool multicast;
@@ -2640,13 +2674,7 @@
 		encaps_len = 0;
 	}
 
-	nh_pos = skb_network_header(skb) - skb->data;
-	h_pos = skb_transport_header(skb) - skb->data;
-
 	skb_pull(skb, skip_header_bytes);
-	nh_pos -= skip_header_bytes;
-	h_pos -= skip_header_bytes;
-
 	head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
 
 	/*
@@ -2672,18 +2700,12 @@
 		}
 	}
 
-	if (encaps_data) {
+	if (encaps_data)
 		memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
-		nh_pos += encaps_len;
-		h_pos += encaps_len;
-	}
 
 #ifdef CONFIG_MAC80211_MESH
-	if (meshhdrlen > 0) {
+	if (meshhdrlen > 0)
 		memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
-		nh_pos += meshhdrlen;
-		h_pos += meshhdrlen;
-	}
 #endif
 
 	if (ieee80211_is_data_qos(fc)) {
@@ -2699,15 +2721,7 @@
 	} else
 		memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
 
-	nh_pos += hdrlen;
-	h_pos += hdrlen;
-
-	/* Update skb pointers to various headers since this modified frame
-	 * is going to go through Linux networking code that may potentially
-	 * need things like pointer to IP header. */
 	skb_reset_mac_header(skb);
-	skb_set_network_header(skb, nh_pos);
-	skb_set_transport_header(skb, h_pos);
 
 	info = IEEE80211_SKB_CB(skb);
 	memset(info, 0, sizeof(*info));
@@ -3179,8 +3193,71 @@
 	return ret;
 }
 
+/*
+ * Can be called while the sta lock is held. Anything that can cause packets to
+ * be generated will cause deadlock!
+ */
+static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
+				       struct sta_info *sta, u8 pn_offs,
+				       struct ieee80211_key *key,
+				       struct sk_buff *skb)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hdr *hdr = (void *)skb->data;
+	u8 tid = IEEE80211_NUM_TIDS;
+
+	if (key)
+		info->control.hw_key = &key->conf;
+
+	ieee80211_tx_stats(skb->dev, skb->len);
+
+	if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
+		tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
+		*ieee80211_get_qos_ctl(hdr) = tid;
+		hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+	} else {
+		info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+		hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
+		sdata->sequence_number += 0x10;
+	}
+
+	if (skb_shinfo(skb)->gso_size)
+		sta->tx_stats.msdu[tid] +=
+			DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
+	else
+		sta->tx_stats.msdu[tid]++;
+
+	info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+
+	/* statistics normally done by ieee80211_tx_h_stats (but that
+	 * has to consider fragmentation, so is more complex)
+	 */
+	sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
+	sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+
+	if (pn_offs) {
+		u64 pn;
+		u8 *crypto_hdr = skb->data + pn_offs;
+
+		switch (key->conf.cipher) {
+		case WLAN_CIPHER_SUITE_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP_256:
+		case WLAN_CIPHER_SUITE_GCMP:
+		case WLAN_CIPHER_SUITE_GCMP_256:
+			pn = atomic64_inc_return(&key->conf.tx_pn);
+			crypto_hdr[0] = pn;
+			crypto_hdr[1] = pn >> 8;
+			crypto_hdr[4] = pn >> 16;
+			crypto_hdr[5] = pn >> 24;
+			crypto_hdr[6] = pn >> 32;
+			crypto_hdr[7] = pn >> 40;
+			break;
+		}
+	}
+}
+
 static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
-				struct net_device *dev, struct sta_info *sta,
+				struct sta_info *sta,
 				struct ieee80211_fast_tx *fast_tx,
 				struct sk_buff *skb)
 {
@@ -3231,8 +3308,6 @@
 			return true;
 	}
 
-	ieee80211_tx_stats(dev, skb->len + extra_head);
-
 	if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
 	    ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
 		return true;
@@ -3261,24 +3336,7 @@
 	info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
 		      IEEE80211_TX_CTL_DONTFRAG |
 		      (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
-
-	if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
-		*ieee80211_get_qos_ctl(hdr) = tid;
-		if (!sta->sta.txq[0])
-			hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
-	} else {
-		info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
-		hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
-		sdata->sequence_number += 0x10;
-	}
-
-	if (skb_shinfo(skb)->gso_size)
-		sta->tx_stats.msdu[tid] +=
-			DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
-	else
-		sta->tx_stats.msdu[tid]++;
-
-	info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
+	info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
 
 	__skb_queue_head_init(&tx.skbs);
 
@@ -3288,9 +3346,6 @@
 	tx.sta = sta;
 	tx.key = fast_tx->key;
 
-	if (fast_tx->key)
-		info->control.hw_key = &fast_tx->key->conf;
-
 	if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
 		tx.skb = skb;
 		r = ieee80211_tx_h_rate_ctrl(&tx);
@@ -3304,31 +3359,11 @@
 		}
 	}
 
-	/* statistics normally done by ieee80211_tx_h_stats (but that
-	 * has to consider fragmentation, so is more complex)
-	 */
-	sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
-	sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
+	if (ieee80211_queue_skb(local, sdata, sta, skb))
+		return true;
 
-	if (fast_tx->pn_offs) {
-		u64 pn;
-		u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
-
-		switch (fast_tx->key->conf.cipher) {
-		case WLAN_CIPHER_SUITE_CCMP:
-		case WLAN_CIPHER_SUITE_CCMP_256:
-		case WLAN_CIPHER_SUITE_GCMP:
-		case WLAN_CIPHER_SUITE_GCMP_256:
-			pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn);
-			crypto_hdr[0] = pn;
-			crypto_hdr[1] = pn >> 8;
-			crypto_hdr[4] = pn >> 16;
-			crypto_hdr[5] = pn >> 24;
-			crypto_hdr[6] = pn >> 32;
-			crypto_hdr[7] = pn >> 40;
-			break;
-		}
-	}
+	ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
+				   fast_tx->key, skb);
 
 	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		sdata = container_of(sdata->bss,
@@ -3339,6 +3374,94 @@
 	return true;
 }
 
+struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
+				     struct ieee80211_txq *txq)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct txq_info *txqi = container_of(txq, struct txq_info, txq);
+	struct ieee80211_hdr *hdr;
+	struct sk_buff *skb = NULL;
+	struct fq *fq = &local->fq;
+	struct fq_tin *tin = &txqi->tin;
+	struct ieee80211_tx_info *info;
+	struct ieee80211_tx_data tx;
+	ieee80211_tx_result r;
+
+	spin_lock_bh(&fq->lock);
+
+	if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
+		goto out;
+
+	/* Make sure fragments stay together. */
+	skb = __skb_dequeue(&txqi->frags);
+	if (skb)
+		goto out;
+
+begin:
+	skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
+	if (!skb)
+		goto out;
+
+	ieee80211_set_skb_vif(skb, txqi);
+
+	hdr = (struct ieee80211_hdr *)skb->data;
+	info = IEEE80211_SKB_CB(skb);
+
+	memset(&tx, 0, sizeof(tx));
+	__skb_queue_head_init(&tx.skbs);
+	tx.local = local;
+	tx.skb = skb;
+	tx.sdata = vif_to_sdata(info->control.vif);
+
+	if (txq->sta)
+		tx.sta = container_of(txq->sta, struct sta_info, sta);
+
+	/*
+	 * The key can be removed while the packet was queued, so need to call
+	 * this here to get the current key.
+	 */
+	r = ieee80211_tx_h_select_key(&tx);
+	if (r != TX_CONTINUE) {
+		ieee80211_free_txskb(&local->hw, skb);
+		goto begin;
+	}
+
+	if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
+		struct sta_info *sta = container_of(txq->sta, struct sta_info,
+						    sta);
+		u8 pn_offs = 0;
+
+		if (tx.key &&
+		    (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
+			pn_offs = ieee80211_hdrlen(hdr->frame_control);
+
+		ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
+					   tx.key, skb);
+	} else {
+		if (invoke_tx_handlers_late(&tx))
+			goto begin;
+
+		skb = __skb_dequeue(&tx.skbs);
+
+		if (!skb_queue_empty(&tx.skbs))
+			skb_queue_splice_tail(&tx.skbs, &txqi->frags);
+	}
+
+	if (skb && skb_has_frag_list(skb) &&
+	    !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
+		if (skb_linearize(skb)) {
+			ieee80211_free_txskb(&local->hw, skb);
+			goto begin;
+		}
+	}
+
+out:
+	spin_unlock_bh(&fq->lock);
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_tx_dequeue);
+
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  u32 info_flags)
@@ -3363,7 +3486,7 @@
 		fast_tx = rcu_dereference(sta->fast_tx);
 
 		if (fast_tx &&
-		    ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
+		    ieee80211_xmit_fast(sdata, sta, fast_tx, skb))
 			goto out;
 	}
 
@@ -4275,7 +4398,7 @@
 			sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev);
 		if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb))
 			break;
-		dev_kfree_skb_any(skb);
+		ieee80211_free_txskb(hw, skb);
 	}
 
 	info = IEEE80211_SKB_CB(skb);
@@ -4390,9 +4513,6 @@
 	int ac = ieee802_1d_to_ac[tid & 7];
 
 	skb_reset_mac_header(skb);
-	skb_reset_network_header(skb);
-	skb_reset_transport_header(skb);
-
 	skb_set_queue_mapping(skb, ac);
 	skb->priority = tid;
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 42bf0b6..545c79a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -598,7 +598,7 @@
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_MONITOR:
-			if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))
+			if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE))
 				continue;
 			break;
 		case NL80211_IFTYPE_AP_VLAN:
@@ -1209,7 +1209,8 @@
 	}
 
 	if (sdata->vif.type != NL80211_IFTYPE_MONITOR &&
-	    sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) {
+	    sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
+	    sdata->vif.type != NL80211_IFTYPE_NAN) {
 		sdata->vif.bss_conf.qos = enable_qos;
 		if (bss_notify)
 			ieee80211_bss_info_change_notify(sdata,
@@ -1748,6 +1749,46 @@
 	mutex_unlock(&local->sta_mtx);
 }
 
+static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata)
+{
+	struct cfg80211_nan_func *func, **funcs;
+	int res, id, i = 0;
+
+	res = drv_start_nan(sdata->local, sdata,
+			    &sdata->u.nan.conf);
+	if (WARN_ON(res))
+		return res;
+
+	funcs = kzalloc((sdata->local->hw.max_nan_de_entries + 1) *
+			sizeof(*funcs), GFP_KERNEL);
+	if (!funcs)
+		return -ENOMEM;
+
+	/* Add all the functions:
+	 * This is a little bit ugly. We need to call a potentially sleeping
+	 * callback for each NAN function, so we can't hold the spinlock.
+	 */
+	spin_lock_bh(&sdata->u.nan.func_lock);
+
+	idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id)
+		funcs[i++] = func;
+
+	spin_unlock_bh(&sdata->u.nan.func_lock);
+
+	for (i = 0; funcs[i]; i++) {
+		res = drv_add_nan_func(sdata->local, sdata, funcs[i]);
+		if (WARN_ON(res))
+			ieee80211_nan_func_terminated(&sdata->vif,
+						      funcs[i]->instance_id,
+						      NL80211_NAN_FUNC_TERM_REASON_ERROR,
+						      GFP_KERNEL);
+	}
+
+	kfree(funcs);
+
+	return 0;
+}
+
 int ieee80211_reconfig(struct ieee80211_local *local)
 {
 	struct ieee80211_hw *hw = &local->hw;
@@ -1971,6 +2012,13 @@
 				ieee80211_bss_info_change_notify(sdata, changed);
 			}
 			break;
+		case NL80211_IFTYPE_NAN:
+			res = ieee80211_reconfig_nan(sdata);
+			if (res < 0) {
+				ieee80211_handle_reconfig_failure(local);
+				return res;
+			}
+			break;
 		case NL80211_IFTYPE_WDS:
 		case NL80211_IFTYPE_AP_VLAN:
 		case NL80211_IFTYPE_MONITOR:
@@ -2555,7 +2603,6 @@
 
 		if (need_basic && basic_rates & BIT(i))
 			basic = 0x80;
-		rate = sband->bitrates[i].bitrate;
 		rate = DIV_ROUND_UP(sband->bitrates[i].bitrate,
 				    5 * (1 << shift));
 		*pos++ = basic | (u8) rate;
@@ -3394,11 +3441,18 @@
 			     unsigned long *byte_cnt)
 {
 	struct txq_info *txqi = to_txq_info(txq);
+	u32 frag_cnt = 0, frag_bytes = 0;
+	struct sk_buff *skb;
+
+	skb_queue_walk(&txqi->frags, skb) {
+		frag_cnt++;
+		frag_bytes += skb->len;
+	}
 
 	if (frame_cnt)
-		*frame_cnt = txqi->tin.backlog_packets;
+		*frame_cnt = txqi->tin.backlog_packets + frag_cnt;
 
 	if (byte_cnt)
-		*byte_cnt = txqi->tin.backlog_bytes;
+		*byte_cnt = txqi->tin.backlog_bytes + frag_bytes;
 }
 EXPORT_SYMBOL(ieee80211_txq_get_depth);
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 7079cd3..06019db 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -663,6 +663,7 @@
 
 	/* TODO check this */
 	SET_NETDEV_DEV(ndev, &local->phy->dev);
+	dev_net_set(ndev, wpan_phy_net(local->hw.phy));
 	sdata = netdev_priv(ndev);
 	ndev->ieee802154_ptr = &sdata->wpan_dev;
 	memcpy(sdata->name, ndev->name, IFNAMSIZ);
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 446e130..4dcf6e1 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -101,11 +101,16 @@
 	sdata->dev->stats.rx_bytes += skb->len;
 
 	switch (mac_cb(skb)->type) {
+	case IEEE802154_FC_TYPE_BEACON:
+	case IEEE802154_FC_TYPE_ACK:
+	case IEEE802154_FC_TYPE_MAC_CMD:
+		goto fail;
+
 	case IEEE802154_FC_TYPE_DATA:
 		return ieee802154_deliver_skb(skb);
 	default:
-		pr_warn("ieee802154: bad frame received (type = %d)\n",
-			mac_cb(skb)->type);
+		pr_warn_ratelimited("ieee802154: bad frame received "
+				    "(type = %d)\n", mac_cb(skb)->type);
 		goto fail;
 	}
 
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 5c161e7..0e4334c 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -961,9 +961,6 @@
 				RCU_INIT_POINTER(nh->nh_dev, NULL);
 		} endfor_nexthops(rt);
 	}
-
-
-	return;
 }
 
 static void mpls_ifup(struct net_device *dev, unsigned int nh_flags)
@@ -997,8 +994,6 @@
 
 		ACCESS_ONCE(rt->rt_nhn_alive) = alive;
 	}
-
-	return;
 }
 
 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 732a5c1..bdfef6c 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -1,9 +1,6 @@
 #ifndef MPLS_INTERNAL_H
 #define MPLS_INTERNAL_H
-
-struct mpls_shim_hdr {
-	__be32 label_stack_entry;
-};
+#include <net/mpls.h>
 
 struct mpls_entry_decoded {
 	u32 label;
@@ -93,11 +90,6 @@
 
 #define endfor_nexthops(rt) }
 
-static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
-{
-	return (struct mpls_shim_hdr *)skb_network_header(skb);
-}
-
 static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
 {
 	struct mpls_shim_hdr result;
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 2055e57..b4da6d8 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -23,32 +23,50 @@
 				       netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	u16 mac_offset = skb->mac_header;
 	netdev_features_t mpls_features;
+	u16 mac_len = skb->mac_len;
 	__be16 mpls_protocol;
+	unsigned int mpls_hlen;
+
+	skb_reset_network_header(skb);
+	mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb);
+	if (unlikely(!pskb_may_pull(skb, mpls_hlen)))
+		goto out;
 
 	/* Setup inner SKB. */
 	mpls_protocol = skb->protocol;
 	skb->protocol = skb->inner_protocol;
 
-	/* Push back the mac header that skb_mac_gso_segment() has pulled.
-	 * It will be re-pulled by the call to skb_mac_gso_segment() below
-	 */
-	__skb_push(skb, skb->mac_len);
+	__skb_pull(skb, mpls_hlen);
+
+	skb->mac_len = 0;
+	skb_reset_mac_header(skb);
 
 	/* Segment inner packet. */
 	mpls_features = skb->dev->mpls_features & features;
 	segs = skb_mac_gso_segment(skb, mpls_features);
+	if (IS_ERR_OR_NULL(segs)) {
+		skb_gso_error_unwind(skb, mpls_protocol, mpls_hlen, mac_offset,
+				     mac_len);
+		goto out;
+	}
+	skb = segs;
 
+	mpls_hlen += mac_len;
+	do {
+		skb->mac_len = mac_len;
+		skb->protocol = mpls_protocol;
 
-	/* Restore outer protocol. */
-	skb->protocol = mpls_protocol;
+		skb_reset_inner_network_header(skb);
 
-	/* Re-pull the mac header that the call to skb_mac_gso_segment()
-	 * above pulled.  It will be re-pushed after returning
-	 * skb_mac_gso_segment(), an indirect caller of this function.
-	 */
-	__skb_pull(skb, skb->data - skb_mac_header(skb));
+		__skb_push(skb, mpls_hlen);
 
+		skb_reset_mac_header(skb);
+		skb_set_network_header(skb, mac_len);
+	} while ((skb = skb->next));
+
+out:
 	return segs;
 }
 
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 644a8da..cf52cf3 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -37,7 +37,7 @@
 	return en->labels * sizeof(struct mpls_shim_hdr);
 }
 
-static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int mpls_xmit(struct sk_buff *skb)
 {
 	struct mpls_iptunnel_encap *tun_encap_info;
 	struct mpls_shim_hdr *hdr;
@@ -90,7 +90,11 @@
 	if (skb_cow(skb, hh_len + new_header_size))
 		goto drop;
 
+	skb_set_inner_protocol(skb, skb->protocol);
+	skb_reset_inner_network_header(skb);
+
 	skb_push(skb, new_header_size);
+
 	skb_reset_network_header(skb);
 
 	skb->dev = out_dev;
@@ -115,7 +119,7 @@
 		net_dbg_ratelimited("%s: packet transmission failed: %d\n",
 				    __func__, err);
 
-	return 0;
+	return LWTUNNEL_XMIT_DONE;
 
 drop:
 	kfree_skb(skb);
@@ -153,7 +157,8 @@
 	if (ret)
 		goto errout;
 	newts->type = LWTUNNEL_ENCAP_MPLS;
-	newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+	newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
+	newts->headroom = mpls_encap_size(tun_encap_info);
 
 	*ts = newts;
 
@@ -209,7 +214,7 @@
 
 static const struct lwtunnel_encap_ops mpls_iptun_ops = {
 	.build_state = mpls_build_state,
-	.output = mpls_output,
+	.xmit = mpls_xmit,
 	.fill_encap = mpls_fill_encap_info,
 	.get_encap_size = mpls_encap_nlsize,
 	.cmp_encap = mpls_encap_cmp,
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 33738c0..13290a70 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -170,6 +170,7 @@
 
 #define NCSI_PACKAGE_SHIFT	5
 #define NCSI_PACKAGE_INDEX(c)	(((c) >> NCSI_PACKAGE_SHIFT) & 0x7)
+#define NCSI_RESERVED_CHANNEL	0x1f
 #define NCSI_CHANNEL_INDEX(c)	((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1))
 #define NCSI_TO_CHANNEL(p, c)	(((p) << NCSI_PACKAGE_SHIFT) | (c))
 
@@ -186,9 +187,15 @@
 	struct ncsi_channel_mode    modes[NCSI_MODE_MAX];
 	struct ncsi_channel_filter  *filters[NCSI_FILTER_MAX];
 	struct ncsi_channel_stats   stats;
-	struct timer_list           timer;	/* Link monitor timer  */
-	bool                        enabled;	/* Timer is enabled    */
-	unsigned int                timeout;	/* Times of timeout    */
+	struct {
+		struct timer_list   timer;
+		bool                enabled;
+		unsigned int        state;
+#define NCSI_CHANNEL_MONITOR_START	0
+#define NCSI_CHANNEL_MONITOR_RETRY	1
+#define NCSI_CHANNEL_MONITOR_WAIT	2
+#define NCSI_CHANNEL_MONITOR_WAIT_MAX	5
+	} monitor;
 	struct list_head            node;
 	struct list_head            link;
 };
@@ -206,7 +213,8 @@
 struct ncsi_request {
 	unsigned char        id;      /* Request ID - 0 to 255           */
 	bool                 used;    /* Request that has been assigned  */
-	bool                 driven;  /* Drive state machine             */
+	unsigned int         flags;   /* NCSI request property           */
+#define NCSI_REQ_FLAG_EVENT_DRIVEN	1
 	struct ncsi_dev_priv *ndp;    /* Associated NCSI device          */
 	struct sk_buff       *cmd;    /* Associated NCSI command packet  */
 	struct sk_buff       *rsp;    /* Associated NCSI response packet */
@@ -258,6 +266,7 @@
 	struct list_head    packages;        /* List of packages           */
 	struct ncsi_request requests[256];   /* Request table              */
 	unsigned int        request_id;      /* Last used request ID       */
+#define NCSI_REQ_START_IDX	1
 	unsigned int        pending_req_num; /* Number of pending requests */
 	struct ncsi_package *active_package; /* Currently handled package  */
 	struct ncsi_channel *active_channel; /* Currently handled channel  */
@@ -274,7 +283,7 @@
 	unsigned char        package;     /* Destination package ID        */
 	unsigned char        channel;     /* Detination channel ID or 0x1f */
 	unsigned short       payload;     /* Command packet payload length */
-	bool                 driven;      /* Drive the state machine?      */
+	unsigned int         req_flags;   /* NCSI request properties       */
 	union {
 		unsigned char  bytes[16]; /* Command packet specific data  */
 		unsigned short words[8];
@@ -313,7 +322,8 @@
 				   unsigned char id,
 				   struct ncsi_package **np,
 				   struct ncsi_channel **nc);
-struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven);
+struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp,
+					unsigned int req_flags);
 void ncsi_free_request(struct ncsi_request *nr);
 struct ncsi_dev *ncsi_find_dev(struct net_device *dev);
 int ncsi_process_next_channel(struct ncsi_dev_priv *ndp);
diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c
index d463468..b41a661 100644
--- a/net/ncsi/ncsi-aen.c
+++ b/net/ncsi/ncsi-aen.c
@@ -53,7 +53,9 @@
 	struct ncsi_aen_lsc_pkt *lsc;
 	struct ncsi_channel *nc;
 	struct ncsi_channel_mode *ncm;
-	unsigned long old_data;
+	bool chained;
+	int state;
+	unsigned long old_data, data;
 	unsigned long flags;
 
 	/* Find the NCSI channel */
@@ -62,20 +64,27 @@
 		return -ENODEV;
 
 	/* Update the link status */
-	ncm = &nc->modes[NCSI_MODE_LINK];
 	lsc = (struct ncsi_aen_lsc_pkt *)h;
+
+	spin_lock_irqsave(&nc->lock, flags);
+	ncm = &nc->modes[NCSI_MODE_LINK];
 	old_data = ncm->data[2];
-	ncm->data[2] = ntohl(lsc->status);
+	data = ntohl(lsc->status);
+	ncm->data[2] = data;
 	ncm->data[4] = ntohl(lsc->oem_status);
-	if (!((old_data ^ ncm->data[2]) & 0x1) ||
-	    !list_empty(&nc->link))
+
+	chained = !list_empty(&nc->link);
+	state = nc->state;
+	spin_unlock_irqrestore(&nc->lock, flags);
+
+	if (!((old_data ^ data) & 0x1) || chained)
 		return 0;
-	if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) &&
-	    !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1)))
+	if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) &&
+	    !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1)))
 		return 0;
 
 	if (!(ndp->flags & NCSI_DEV_HWA) &&
-	    nc->state == NCSI_CHANNEL_ACTIVE)
+	    state == NCSI_CHANNEL_ACTIVE)
 		ndp->flags |= NCSI_DEV_RESHUFFLE;
 
 	ncsi_stop_channel_monitor(nc);
@@ -97,13 +106,21 @@
 	if (!nc)
 		return -ENODEV;
 
+	spin_lock_irqsave(&nc->lock, flags);
 	if (!list_empty(&nc->link) ||
-	    nc->state != NCSI_CHANNEL_ACTIVE)
+	    nc->state != NCSI_CHANNEL_ACTIVE) {
+		spin_unlock_irqrestore(&nc->lock, flags);
 		return 0;
+	}
+	spin_unlock_irqrestore(&nc->lock, flags);
 
 	ncsi_stop_channel_monitor(nc);
+	spin_lock_irqsave(&nc->lock, flags);
+	nc->state = NCSI_CHANNEL_INVISIBLE;
+	spin_unlock_irqrestore(&nc->lock, flags);
+
 	spin_lock_irqsave(&ndp->lock, flags);
-	xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+	nc->state = NCSI_CHANNEL_INACTIVE;
 	list_add_tail_rcu(&nc->link, &ndp->channel_queue);
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 21057a8..db7083b 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -272,7 +272,7 @@
 	struct sk_buff *skb;
 	struct ncsi_request *nr;
 
-	nr = ncsi_alloc_request(ndp, nca->driven);
+	nr = ncsi_alloc_request(ndp, nca->req_flags);
 	if (!nr)
 		return NULL;
 
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index ef017b8..5e509e5 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -132,6 +132,7 @@
 	struct ncsi_dev *nd = &ndp->ndev;
 	struct ncsi_package *np;
 	struct ncsi_channel *nc;
+	unsigned long flags;
 
 	nd->state = ncsi_dev_state_functional;
 	if (force_down) {
@@ -142,14 +143,21 @@
 	nd->link_up = 0;
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			spin_lock_irqsave(&nc->lock, flags);
+
 			if (!list_empty(&nc->link) ||
-			    nc->state != NCSI_CHANNEL_ACTIVE)
+			    nc->state != NCSI_CHANNEL_ACTIVE) {
+				spin_unlock_irqrestore(&nc->lock, flags);
 				continue;
+			}
 
 			if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) {
+				spin_unlock_irqrestore(&nc->lock, flags);
 				nd->link_up = 1;
 				goto report;
 			}
+
+			spin_unlock_irqrestore(&nc->lock, flags);
 		}
 	}
 
@@ -163,43 +171,55 @@
 	struct ncsi_package *np = nc->package;
 	struct ncsi_dev_priv *ndp = np->ndp;
 	struct ncsi_cmd_arg nca;
-	bool enabled;
-	unsigned int timeout;
+	bool enabled, chained;
+	unsigned int monitor_state;
 	unsigned long flags;
-	int ret;
+	int state, ret;
 
 	spin_lock_irqsave(&nc->lock, flags);
-	timeout = nc->timeout;
-	enabled = nc->enabled;
+	state = nc->state;
+	chained = !list_empty(&nc->link);
+	enabled = nc->monitor.enabled;
+	monitor_state = nc->monitor.state;
 	spin_unlock_irqrestore(&nc->lock, flags);
 
-	if (!enabled || !list_empty(&nc->link))
+	if (!enabled || chained)
 		return;
-	if (nc->state != NCSI_CHANNEL_INACTIVE &&
-	    nc->state != NCSI_CHANNEL_ACTIVE)
+	if (state != NCSI_CHANNEL_INACTIVE &&
+	    state != NCSI_CHANNEL_ACTIVE)
 		return;
 
-	if (!(timeout % 2)) {
+	switch (monitor_state) {
+	case NCSI_CHANNEL_MONITOR_START:
+	case NCSI_CHANNEL_MONITOR_RETRY:
 		nca.ndp = ndp;
 		nca.package = np->id;
 		nca.channel = nc->id;
 		nca.type = NCSI_PKT_CMD_GLS;
-		nca.driven = false;
+		nca.req_flags = 0;
 		ret = ncsi_xmit_cmd(&nca);
 		if (ret) {
 			netdev_err(ndp->ndev.dev, "Error %d sending GLS\n",
 				   ret);
 			return;
 		}
-	}
 
-	if (timeout + 1 >= 3) {
+		break;
+	case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX:
+		break;
+	default:
 		if (!(ndp->flags & NCSI_DEV_HWA) &&
-		    nc->state == NCSI_CHANNEL_ACTIVE)
+		    state == NCSI_CHANNEL_ACTIVE) {
 			ncsi_report_link(ndp, true);
+			ndp->flags |= NCSI_DEV_RESHUFFLE;
+		}
+
+		spin_lock_irqsave(&nc->lock, flags);
+		nc->state = NCSI_CHANNEL_INVISIBLE;
+		spin_unlock_irqrestore(&nc->lock, flags);
 
 		spin_lock_irqsave(&ndp->lock, flags);
-		xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+		nc->state = NCSI_CHANNEL_INACTIVE;
 		list_add_tail_rcu(&nc->link, &ndp->channel_queue);
 		spin_unlock_irqrestore(&ndp->lock, flags);
 		ncsi_process_next_channel(ndp);
@@ -207,10 +227,9 @@
 	}
 
 	spin_lock_irqsave(&nc->lock, flags);
-	nc->timeout = timeout + 1;
-	nc->enabled = true;
+	nc->monitor.state++;
 	spin_unlock_irqrestore(&nc->lock, flags);
-	mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2)));
+	mod_timer(&nc->monitor.timer, jiffies + HZ);
 }
 
 void ncsi_start_channel_monitor(struct ncsi_channel *nc)
@@ -218,12 +237,12 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&nc->lock, flags);
-	WARN_ON_ONCE(nc->enabled);
-	nc->timeout = 0;
-	nc->enabled = true;
+	WARN_ON_ONCE(nc->monitor.enabled);
+	nc->monitor.enabled = true;
+	nc->monitor.state = NCSI_CHANNEL_MONITOR_START;
 	spin_unlock_irqrestore(&nc->lock, flags);
 
-	mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2)));
+	mod_timer(&nc->monitor.timer, jiffies + HZ);
 }
 
 void ncsi_stop_channel_monitor(struct ncsi_channel *nc)
@@ -231,14 +250,14 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&nc->lock, flags);
-	if (!nc->enabled) {
+	if (!nc->monitor.enabled) {
 		spin_unlock_irqrestore(&nc->lock, flags);
 		return;
 	}
-	nc->enabled = false;
+	nc->monitor.enabled = false;
 	spin_unlock_irqrestore(&nc->lock, flags);
 
-	del_timer_sync(&nc->timer);
+	del_timer_sync(&nc->monitor.timer);
 }
 
 struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np,
@@ -267,8 +286,9 @@
 	nc->id = id;
 	nc->package = np;
 	nc->state = NCSI_CHANNEL_INACTIVE;
-	nc->enabled = false;
-	setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc);
+	nc->monitor.enabled = false;
+	setup_timer(&nc->monitor.timer,
+		    ncsi_channel_monitor, (unsigned long)nc);
 	spin_lock_init(&nc->lock);
 	INIT_LIST_HEAD(&nc->link);
 	for (index = 0; index < NCSI_CAP_MAX; index++)
@@ -405,7 +425,8 @@
  * be same. Otherwise, the bogus response might be replied. So
  * the available IDs are allocated in round-robin fashion.
  */
-struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven)
+struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp,
+					unsigned int req_flags)
 {
 	struct ncsi_request *nr = NULL;
 	int i, limit = ARRAY_SIZE(ndp->requests);
@@ -413,30 +434,31 @@
 
 	/* Check if there is one available request until the ceiling */
 	spin_lock_irqsave(&ndp->lock, flags);
-	for (i = ndp->request_id; !nr && i < limit; i++) {
+	for (i = ndp->request_id; i < limit; i++) {
 		if (ndp->requests[i].used)
 			continue;
 
 		nr = &ndp->requests[i];
 		nr->used = true;
-		nr->driven = driven;
-		if (++ndp->request_id >= limit)
-			ndp->request_id = 0;
+		nr->flags = req_flags;
+		ndp->request_id = i + 1;
+		goto found;
 	}
 
 	/* Fail back to check from the starting cursor */
-	for (i = 0; !nr && i < ndp->request_id; i++) {
+	for (i = NCSI_REQ_START_IDX; i < ndp->request_id; i++) {
 		if (ndp->requests[i].used)
 			continue;
 
 		nr = &ndp->requests[i];
 		nr->used = true;
-		nr->driven = driven;
-		if (++ndp->request_id >= limit)
-			ndp->request_id = 0;
+		nr->flags = req_flags;
+		ndp->request_id = i + 1;
+		goto found;
 	}
-	spin_unlock_irqrestore(&ndp->lock, flags);
 
+found:
+	spin_unlock_irqrestore(&ndp->lock, flags);
 	return nr;
 }
 
@@ -458,7 +480,7 @@
 	nr->cmd = NULL;
 	nr->rsp = NULL;
 	nr->used = false;
-	driven = nr->driven;
+	driven = !!(nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN);
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
 	if (driven && cmd && --ndp->pending_req_num == 0)
@@ -508,10 +530,11 @@
 	struct ncsi_package *np = ndp->active_package;
 	struct ncsi_channel *nc = ndp->active_channel;
 	struct ncsi_cmd_arg nca;
+	unsigned long flags;
 	int ret;
 
 	nca.ndp = ndp;
-	nca.driven = true;
+	nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
 	switch (nd->state) {
 	case ncsi_dev_state_suspend:
 		nd->state = ncsi_dev_state_suspend_select;
@@ -527,7 +550,7 @@
 		nca.package = np->id;
 		if (nd->state == ncsi_dev_state_suspend_select) {
 			nca.type = NCSI_PKT_CMD_SP;
-			nca.channel = 0x1f;
+			nca.channel = NCSI_RESERVED_CHANNEL;
 			if (ndp->flags & NCSI_DEV_HWA)
 				nca.bytes[0] = 0;
 			else
@@ -544,7 +567,7 @@
 			nd->state = ncsi_dev_state_suspend_deselect;
 		} else if (nd->state == ncsi_dev_state_suspend_deselect) {
 			nca.type = NCSI_PKT_CMD_DP;
-			nca.channel = 0x1f;
+			nca.channel = NCSI_RESERVED_CHANNEL;
 			nd->state = ncsi_dev_state_suspend_done;
 		}
 
@@ -556,7 +579,9 @@
 
 		break;
 	case ncsi_dev_state_suspend_done:
-		xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+		spin_lock_irqsave(&nc->lock, flags);
+		nc->state = NCSI_CHANNEL_INACTIVE;
+		spin_unlock_irqrestore(&nc->lock, flags);
 		ncsi_process_next_channel(ndp);
 
 		break;
@@ -574,10 +599,11 @@
 	struct ncsi_channel *nc = ndp->active_channel;
 	struct ncsi_cmd_arg nca;
 	unsigned char index;
+	unsigned long flags;
 	int ret;
 
 	nca.ndp = ndp;
-	nca.driven = true;
+	nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
 	switch (nd->state) {
 	case ncsi_dev_state_config:
 	case ncsi_dev_state_config_sp:
@@ -590,7 +616,7 @@
 		else
 			nca.bytes[0] = 1;
 		nca.package = np->id;
-		nca.channel = 0x1f;
+		nca.channel = NCSI_RESERVED_CHANNEL;
 		ret = ncsi_xmit_cmd(&nca);
 		if (ret)
 			goto error;
@@ -675,10 +701,12 @@
 			goto error;
 		break;
 	case ncsi_dev_state_config_done:
+		spin_lock_irqsave(&nc->lock, flags);
 		if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1)
-			xchg(&nc->state, NCSI_CHANNEL_ACTIVE);
+			nc->state = NCSI_CHANNEL_ACTIVE;
 		else
-			xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
+			nc->state = NCSI_CHANNEL_INACTIVE;
+		spin_unlock_irqrestore(&nc->lock, flags);
 
 		ncsi_start_channel_monitor(nc);
 		ncsi_process_next_channel(ndp);
@@ -707,18 +735,25 @@
 	found = NULL;
 	NCSI_FOR_EACH_PACKAGE(ndp, np) {
 		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			spin_lock_irqsave(&nc->lock, flags);
+
 			if (!list_empty(&nc->link) ||
-			    nc->state != NCSI_CHANNEL_INACTIVE)
+			    nc->state != NCSI_CHANNEL_INACTIVE) {
+				spin_unlock_irqrestore(&nc->lock, flags);
 				continue;
+			}
 
 			if (!found)
 				found = nc;
 
 			ncm = &nc->modes[NCSI_MODE_LINK];
 			if (ncm->data[2] & 0x1) {
+				spin_unlock_irqrestore(&nc->lock, flags);
 				found = nc;
 				goto out;
 			}
+
+			spin_unlock_irqrestore(&nc->lock, flags);
 		}
 	}
 
@@ -797,7 +832,7 @@
 	int ret;
 
 	nca.ndp = ndp;
-	nca.driven = true;
+	nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN;
 	switch (nd->state) {
 	case ncsi_dev_state_probe:
 		nd->state = ncsi_dev_state_probe_deselect;
@@ -807,7 +842,7 @@
 
 		/* Deselect all possible packages */
 		nca.type = NCSI_PKT_CMD_DP;
-		nca.channel = 0x1f;
+		nca.channel = NCSI_RESERVED_CHANNEL;
 		for (index = 0; index < 8; index++) {
 			nca.package = index;
 			ret = ncsi_xmit_cmd(&nca);
@@ -823,7 +858,7 @@
 		/* Select all possible packages */
 		nca.type = NCSI_PKT_CMD_SP;
 		nca.bytes[0] = 1;
-		nca.channel = 0x1f;
+		nca.channel = NCSI_RESERVED_CHANNEL;
 		for (index = 0; index < 8; index++) {
 			nca.package = index;
 			ret = ncsi_xmit_cmd(&nca);
@@ -876,7 +911,7 @@
 		nca.type = NCSI_PKT_CMD_SP;
 		nca.bytes[0] = 1;
 		nca.package = ndp->active_package->id;
-		nca.channel = 0x1f;
+		nca.channel = NCSI_RESERVED_CHANNEL;
 		ret = ncsi_xmit_cmd(&nca);
 		if (ret)
 			goto error;
@@ -884,12 +919,12 @@
 		nd->state = ncsi_dev_state_probe_cis;
 		break;
 	case ncsi_dev_state_probe_cis:
-		ndp->pending_req_num = 32;
+		ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
 
 		/* Clear initial state */
 		nca.type = NCSI_PKT_CMD_CIS;
 		nca.package = ndp->active_package->id;
-		for (index = 0; index < 0x20; index++) {
+		for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) {
 			nca.channel = index;
 			ret = ncsi_xmit_cmd(&nca);
 			if (ret)
@@ -933,7 +968,7 @@
 		/* Deselect the active package */
 		nca.type = NCSI_PKT_CMD_DP;
 		nca.package = ndp->active_package->id;
-		nca.channel = 0x1f;
+		nca.channel = NCSI_RESERVED_CHANNEL;
 		ret = ncsi_xmit_cmd(&nca);
 		if (ret)
 			goto error;
@@ -987,11 +1022,14 @@
 		goto out;
 	}
 
-	old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE);
 	list_del_init(&nc->link);
-
 	spin_unlock_irqrestore(&ndp->lock, flags);
 
+	spin_lock_irqsave(&nc->lock, flags);
+	old_state = nc->state;
+	nc->state = NCSI_CHANNEL_INVISIBLE;
+	spin_unlock_irqrestore(&nc->lock, flags);
+
 	ndp->active_channel = nc;
 	ndp->active_package = nc->package;
 
@@ -1006,7 +1044,7 @@
 		break;
 	default:
 		netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n",
-			   nc->state, nc->package->id, nc->id);
+			   old_state, nc->package->id, nc->id);
 		ncsi_report_link(ndp, false);
 		return -EINVAL;
 	}
@@ -1070,7 +1108,7 @@
 		return NOTIFY_OK;
 
 	nca.ndp = ndp;
-	nca.driven = false;
+	nca.req_flags = 0;
 	nca.package = np->id;
 	nca.channel = nc->id;
 	nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap;
@@ -1118,7 +1156,7 @@
 	/* Initialize private NCSI device */
 	spin_lock_init(&ndp->lock);
 	INIT_LIST_HEAD(&ndp->packages);
-	ndp->request_id = 0;
+	ndp->request_id = NCSI_REQ_START_IDX;
 	for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) {
 		ndp->requests[i].id = i;
 		ndp->requests[i].ndp = ndp;
@@ -1149,9 +1187,7 @@
 int ncsi_start_dev(struct ncsi_dev *nd)
 {
 	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
-	struct ncsi_package *np;
-	struct ncsi_channel *nc;
-	int old_state, ret;
+	int ret;
 
 	if (nd->state != ncsi_dev_state_registered &&
 	    nd->state != ncsi_dev_state_functional)
@@ -1163,15 +1199,6 @@
 		return 0;
 	}
 
-	/* Reset channel's state and start over */
-	NCSI_FOR_EACH_PACKAGE(ndp, np) {
-		NCSI_FOR_EACH_CHANNEL(np, nc) {
-			old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE);
-			WARN_ON_ONCE(!list_empty(&nc->link) ||
-				     old_state == NCSI_CHANNEL_INVISIBLE);
-		}
-	}
-
 	if (ndp->flags & NCSI_DEV_HWA)
 		ret = ncsi_enable_hwa(ndp);
 	else
@@ -1181,6 +1208,35 @@
 }
 EXPORT_SYMBOL_GPL(ncsi_start_dev);
 
+void ncsi_stop_dev(struct ncsi_dev *nd)
+{
+	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
+	struct ncsi_package *np;
+	struct ncsi_channel *nc;
+	bool chained;
+	int old_state;
+	unsigned long flags;
+
+	/* Stop the channel monitor and reset channel's state */
+	NCSI_FOR_EACH_PACKAGE(ndp, np) {
+		NCSI_FOR_EACH_CHANNEL(np, nc) {
+			ncsi_stop_channel_monitor(nc);
+
+			spin_lock_irqsave(&nc->lock, flags);
+			chained = !list_empty(&nc->link);
+			old_state = nc->state;
+			nc->state = NCSI_CHANNEL_INACTIVE;
+			spin_unlock_irqrestore(&nc->lock, flags);
+
+			WARN_ON_ONCE(chained ||
+				     old_state == NCSI_CHANNEL_INVISIBLE);
+		}
+	}
+
+	ncsi_report_link(ndp, true);
+}
+EXPORT_SYMBOL_GPL(ncsi_stop_dev);
+
 void ncsi_unregister_dev(struct ncsi_dev *nd)
 {
 	struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index af84389..087db77 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -317,12 +317,12 @@
 	ncm->data[3] = ntohl(rsp->other);
 	ncm->data[4] = ntohl(rsp->oem_status);
 
-	if (nr->driven)
+	if (nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN)
 		return 0;
 
 	/* Reset the channel monitor if it has been enabled */
 	spin_lock_irqsave(&nc->lock, flags);
-	nc->timeout = 0;
+	nc->monitor.state = NCSI_CHANNEL_MONITOR_START;
 	spin_unlock_irqrestore(&nc->lock, flags);
 
 	return 0;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 9266cee..e8d56d9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -474,6 +474,12 @@
 	  This option adds the "meta" expression that you can use to match and
 	  to set packet metainformation such as the packet mark.
 
+config NFT_NUMGEN
+	tristate "Netfilter nf_tables number generator module"
+	help
+	  This option adds the number generator expression used to perform
+	  incremental counting and random numbers bound to a upper limit.
+
 config NFT_CT
 	depends on NF_CONNTRACK
 	tristate "Netfilter nf_tables conntrack module"
@@ -481,13 +487,13 @@
 	  This option adds the "meta" expression that you can use to match
 	  connection tracking information such as the flow state.
 
-config NFT_RBTREE
+config NFT_SET_RBTREE
 	tristate "Netfilter nf_tables rbtree set module"
 	help
 	  This option adds the "rbtree" set type (Red Black tree) that is used
 	  to build interval-based sets.
 
-config NFT_HASH
+config NFT_SET_HASH
 	tristate "Netfilter nf_tables hash set module"
 	help
 	  This option adds the "hash" set type that is used to build one-way
@@ -542,6 +548,12 @@
 	  This is required if you intend to use the userspace queueing
 	  infrastructure (also known as NFQUEUE) from nftables.
 
+config NFT_QUOTA
+	tristate "Netfilter nf_tables quota module"
+	help
+	  This option adds the "quota" expression that you can use to match
+	  enforce bytes quotas.
+
 config NFT_REJECT
 	default m if NETFILTER_ADVANCED=n
 	tristate "Netfilter nf_tables reject support"
@@ -563,6 +575,12 @@
 	  x_tables match/target extensions over the nf_tables
 	  framework.
 
+config NFT_HASH
+	tristate "Netfilter nf_tables hash module"
+	help
+	  This option adds the "hash" expression that you can use to perform
+	  a hash operation on registers.
+
 if NF_TABLES_NETDEV
 
 config NF_DUP_NETDEV
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6913454..c23c3c8 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -71,8 +71,9 @@
 
 # nf_tables
 nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o
 nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+nf_tables-objs += nft_lookup.o nft_dynset.o
 
 obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
 obj-$(CONFIG_NF_TABLES_INET)	+= nf_tables_inet.o
@@ -80,18 +81,21 @@
 obj-$(CONFIG_NFT_COMPAT)	+= nft_compat.o
 obj-$(CONFIG_NFT_EXTHDR)	+= nft_exthdr.o
 obj-$(CONFIG_NFT_META)		+= nft_meta.o
+obj-$(CONFIG_NFT_NUMGEN)	+= nft_numgen.o
 obj-$(CONFIG_NFT_CT)		+= nft_ct.o
 obj-$(CONFIG_NFT_LIMIT)		+= nft_limit.o
 obj-$(CONFIG_NFT_NAT)		+= nft_nat.o
 obj-$(CONFIG_NFT_QUEUE)		+= nft_queue.o
+obj-$(CONFIG_NFT_QUOTA)		+= nft_quota.o
 obj-$(CONFIG_NFT_REJECT) 	+= nft_reject.o
 obj-$(CONFIG_NFT_REJECT_INET)	+= nft_reject_inet.o
-obj-$(CONFIG_NFT_RBTREE)	+= nft_rbtree.o
-obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
+obj-$(CONFIG_NFT_SET_RBTREE)	+= nft_set_rbtree.o
+obj-$(CONFIG_NFT_SET_HASH)	+= nft_set_hash.o
 obj-$(CONFIG_NFT_COUNTER)	+= nft_counter.o
 obj-$(CONFIG_NFT_LOG)		+= nft_log.o
 obj-$(CONFIG_NFT_MASQ)		+= nft_masq.o
 obj-$(CONFIG_NFT_REDIR)		+= nft_redir.o
+obj-$(CONFIG_NFT_HASH)		+= nft_hash.o
 
 # nf_tables netdev
 obj-$(CONFIG_NFT_DUP_NETDEV)	+= nft_dup_netdev.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index f39276d..fa6715d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -22,6 +22,7 @@
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
@@ -61,33 +62,55 @@
 #endif
 
 static DEFINE_MUTEX(nf_hook_mutex);
+#define nf_entry_dereference(e) \
+	rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
-static struct list_head *nf_find_hook_list(struct net *net,
-					   const struct nf_hook_ops *reg)
+static struct nf_hook_entry *nf_hook_entry_head(struct net *net,
+						const struct nf_hook_ops *reg)
 {
-	struct list_head *hook_list = NULL;
+	struct nf_hook_entry *hook_head = NULL;
 
 	if (reg->pf != NFPROTO_NETDEV)
-		hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
+		hook_head = nf_entry_dereference(net->nf.hooks[reg->pf]
+						 [reg->hooknum]);
 	else if (reg->hooknum == NF_NETDEV_INGRESS) {
 #ifdef CONFIG_NETFILTER_INGRESS
 		if (reg->dev && dev_net(reg->dev) == net)
-			hook_list = &reg->dev->nf_hooks_ingress;
+			hook_head =
+				nf_entry_dereference(
+					reg->dev->nf_hooks_ingress);
 #endif
 	}
-	return hook_list;
+	return hook_head;
 }
 
-struct nf_hook_entry {
-	const struct nf_hook_ops	*orig_ops;
-	struct nf_hook_ops		ops;
-};
+/* must hold nf_hook_mutex */
+static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg,
+			      struct nf_hook_entry *entry)
+{
+	switch (reg->pf) {
+	case NFPROTO_NETDEV:
+		/* We already checked in nf_register_net_hook() that this is
+		 * used from ingress.
+		 */
+		rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry);
+		break;
+	default:
+		rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum],
+				   entry);
+		break;
+	}
+}
 
 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-	struct list_head *hook_list;
+	struct nf_hook_entry *hooks_entry;
 	struct nf_hook_entry *entry;
-	struct nf_hook_ops *elem;
+
+	if (reg->pf == NFPROTO_NETDEV &&
+	    (reg->hooknum != NF_NETDEV_INGRESS ||
+	     !reg->dev || dev_net(reg->dev) != net))
+		return -EINVAL;
 
 	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
 	if (!entry)
@@ -95,19 +118,30 @@
 
 	entry->orig_ops	= reg;
 	entry->ops	= *reg;
-
-	hook_list = nf_find_hook_list(net, reg);
-	if (!hook_list) {
-		kfree(entry);
-		return -ENOENT;
-	}
+	entry->next	= NULL;
 
 	mutex_lock(&nf_hook_mutex);
-	list_for_each_entry(elem, hook_list, list) {
-		if (reg->priority < elem->priority)
-			break;
+	hooks_entry = nf_hook_entry_head(net, reg);
+
+	if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) {
+		/* This is the case where we need to insert at the head */
+		entry->next = hooks_entry;
+		hooks_entry = NULL;
 	}
-	list_add_rcu(&entry->ops.list, elem->list.prev);
+
+	while (hooks_entry &&
+		reg->priority >= hooks_entry->orig_ops->priority &&
+		nf_entry_dereference(hooks_entry->next)) {
+		hooks_entry = nf_entry_dereference(hooks_entry->next);
+	}
+
+	if (hooks_entry) {
+		entry->next = nf_entry_dereference(hooks_entry->next);
+		rcu_assign_pointer(hooks_entry->next, entry);
+	} else {
+		nf_set_hooks_head(net, reg, entry);
+	}
+
 	mutex_unlock(&nf_hook_mutex);
 #ifdef CONFIG_NETFILTER_INGRESS
 	if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
@@ -122,24 +156,33 @@
 
 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 {
-	struct list_head *hook_list;
-	struct nf_hook_entry *entry;
-	struct nf_hook_ops *elem;
-
-	hook_list = nf_find_hook_list(net, reg);
-	if (!hook_list)
-		return;
+	struct nf_hook_entry *hooks_entry;
 
 	mutex_lock(&nf_hook_mutex);
-	list_for_each_entry(elem, hook_list, list) {
-		entry = container_of(elem, struct nf_hook_entry, ops);
-		if (entry->orig_ops == reg) {
-			list_del_rcu(&entry->ops.list);
-			break;
-		}
+	hooks_entry = nf_hook_entry_head(net, reg);
+	if (hooks_entry->orig_ops == reg) {
+		nf_set_hooks_head(net, reg,
+				  nf_entry_dereference(hooks_entry->next));
+		goto unlock;
 	}
+	while (hooks_entry && nf_entry_dereference(hooks_entry->next)) {
+		struct nf_hook_entry *next =
+			nf_entry_dereference(hooks_entry->next);
+		struct nf_hook_entry *nnext;
+
+		if (next->orig_ops != reg) {
+			hooks_entry = next;
+			continue;
+		}
+		nnext = nf_entry_dereference(next->next);
+		rcu_assign_pointer(hooks_entry->next, nnext);
+		hooks_entry = next;
+		break;
+	}
+
+unlock:
 	mutex_unlock(&nf_hook_mutex);
-	if (&elem->list == hook_list) {
+	if (!hooks_entry) {
 		WARN(1, "nf_unregister_net_hook: hook not found!\n");
 		return;
 	}
@@ -151,10 +194,10 @@
 	static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
 	synchronize_net();
-	nf_queue_nf_hook_drop(net, &entry->ops);
+	nf_queue_nf_hook_drop(net, hooks_entry);
 	/* other cpu might still process nfqueue verdict that used reg */
 	synchronize_net();
-	kfree(entry);
+	kfree(hooks_entry);
 }
 EXPORT_SYMBOL(nf_unregister_net_hook);
 
@@ -188,19 +231,17 @@
 
 static LIST_HEAD(nf_hook_list);
 
-int nf_register_hook(struct nf_hook_ops *reg)
+static int _nf_register_hook(struct nf_hook_ops *reg)
 {
 	struct net *net, *last;
 	int ret;
 
-	rtnl_lock();
 	for_each_net(net) {
 		ret = nf_register_net_hook(net, reg);
 		if (ret && ret != -ENOENT)
 			goto rollback;
 	}
 	list_add_tail(&reg->list, &nf_hook_list);
-	rtnl_unlock();
 
 	return 0;
 rollback:
@@ -210,19 +251,34 @@
 			break;
 		nf_unregister_net_hook(net, reg);
 	}
+	return ret;
+}
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+	int ret;
+
+	rtnl_lock();
+	ret = _nf_register_hook(reg);
 	rtnl_unlock();
+
 	return ret;
 }
 EXPORT_SYMBOL(nf_register_hook);
 
-void nf_unregister_hook(struct nf_hook_ops *reg)
+static void _nf_unregister_hook(struct nf_hook_ops *reg)
 {
 	struct net *net;
 
-	rtnl_lock();
 	list_del(&reg->list);
 	for_each_net(net)
 		nf_unregister_net_hook(net, reg);
+}
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+	rtnl_lock();
+	_nf_unregister_hook(reg);
 	rtnl_unlock();
 }
 EXPORT_SYMBOL(nf_unregister_hook);
@@ -246,6 +302,26 @@
 }
 EXPORT_SYMBOL(nf_register_hooks);
 
+/* Caller MUST take rtnl_lock() */
+int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < n; i++) {
+		err = _nf_register_hook(&reg[i]);
+		if (err)
+			goto err;
+	}
+	return err;
+
+err:
+	if (i > 0)
+		_nf_unregister_hooks(reg, i);
+	return err;
+}
+EXPORT_SYMBOL(_nf_register_hooks);
+
 void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
 {
 	while (n-- > 0)
@@ -253,10 +329,17 @@
 }
 EXPORT_SYMBOL(nf_unregister_hooks);
 
-unsigned int nf_iterate(struct list_head *head,
-			struct sk_buff *skb,
+/* Caller MUST take rtnl_lock */
+void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
+{
+	while (n-- > 0)
+		_nf_unregister_hook(&reg[n]);
+}
+EXPORT_SYMBOL(_nf_unregister_hooks);
+
+unsigned int nf_iterate(struct sk_buff *skb,
 			struct nf_hook_state *state,
-			struct nf_hook_ops **elemp)
+			struct nf_hook_entry **entryp)
 {
 	unsigned int verdict;
 
@@ -264,20 +347,23 @@
 	 * The caller must not block between calls to this
 	 * function because of risk of continuing from deleted element.
 	 */
-	list_for_each_entry_continue_rcu((*elemp), head, list) {
-		if (state->thresh > (*elemp)->priority)
+	while (*entryp) {
+		if (state->thresh > (*entryp)->ops.priority) {
+			*entryp = rcu_dereference((*entryp)->next);
 			continue;
+		}
 
 		/* Optimization: we don't need to hold module
 		   reference here, since function can't sleep. --RR */
 repeat:
-		verdict = (*elemp)->hook((*elemp)->priv, skb, state);
+		verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
 		if (verdict != NF_ACCEPT) {
 #ifdef CONFIG_NETFILTER_DEBUG
 			if (unlikely((verdict & NF_VERDICT_MASK)
 							> NF_MAX_VERDICT)) {
 				NFDEBUG("Evil return from %p(%u).\n",
-					(*elemp)->hook, state->hook);
+					(*entryp)->ops.hook, state->hook);
+				*entryp = rcu_dereference((*entryp)->next);
 				continue;
 			}
 #endif
@@ -285,25 +371,23 @@
 				return verdict;
 			goto repeat;
 		}
+		*entryp = rcu_dereference((*entryp)->next);
 	}
 	return NF_ACCEPT;
 }
 
 
 /* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
+ * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
 {
-	struct nf_hook_ops *elem;
+	struct nf_hook_entry *entry;
 	unsigned int verdict;
 	int ret = 0;
 
-	/* We may already have this, but read-locks nest anyway */
-	rcu_read_lock();
-
-	elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
+	entry = rcu_dereference(state->hook_entries);
 next_hook:
-	verdict = nf_iterate(state->hook_list, skb, state, &elem);
+	verdict = nf_iterate(skb, state, &entry);
 	if (verdict == NF_ACCEPT || verdict == NF_STOP) {
 		ret = 1;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
@@ -312,8 +396,10 @@
 		if (ret == 0)
 			ret = -EPERM;
 	} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
-		int err = nf_queue(skb, elem, state,
-				   verdict >> NF_VERDICT_QBITS);
+		int err;
+
+		RCU_INIT_POINTER(state->hook_entries, entry);
+		err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
 		if (err < 0) {
 			if (err == -ESRCH &&
 			   (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
@@ -321,7 +407,6 @@
 			kfree_skb(skb);
 		}
 	}
-	rcu_read_unlock();
 	return ret;
 }
 EXPORT_SYMBOL(nf_hook_slow);
@@ -441,7 +526,7 @@
 
 	for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
 		for (h = 0; h < NF_MAX_HOOKS; h++)
-			INIT_LIST_HEAD(&net->nf.hooks[i][h]);
+			RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
 	}
 
 #ifdef CONFIG_PROC_FS
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index f04fd8d..fc230d9 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -281,13 +281,10 @@
 	h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
 	if (h) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
-		/* Show what happens instead of calling nf_ct_kill() */
-		if (del_timer(&ct->timeout)) {
-			IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
+		if (nf_ct_kill(ct)) {
+			IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple="
 				FMT_TUPLE "\n",
 				__func__, ct, ARG_TUPLE(&tuple));
-			if (ct->timeout.function)
-				ct->timeout.function(ct->timeout.data);
 		} else {
 			IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
 				FMT_TUPLE "\n",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index dd2c43ab..ba6a1d4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -72,12 +72,24 @@
 struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_hash);
 
+struct conntrack_gc_work {
+	struct delayed_work	dwork;
+	u32			last_bucket;
+	bool			exiting;
+};
+
 static __read_mostly struct kmem_cache *nf_conntrack_cachep;
 static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
-static __read_mostly seqcount_t nf_conntrack_generation;
 static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
 static __read_mostly bool nf_conntrack_locks_all;
 
+#define GC_MAX_BUCKETS_DIV	64u
+#define GC_MAX_BUCKETS		8192u
+#define GC_INTERVAL		(5 * HZ)
+#define GC_MAX_EVICTS		256u
+
+static struct conntrack_gc_work conntrack_gc_work;
+
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
 {
 	spin_lock(lock);
@@ -164,7 +176,7 @@
 EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
 
 unsigned int nf_conntrack_max __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_max);
+seqcount_t nf_conntrack_generation __read_mostly;
 
 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@ -367,12 +379,10 @@
 destroy_conntrack(struct nf_conntrack *nfct)
 {
 	struct nf_conn *ct = (struct nf_conn *)nfct;
-	struct net *net = nf_ct_net(ct);
 	struct nf_conntrack_l4proto *l4proto;
 
 	pr_debug("destroy_conntrack(%p)\n", ct);
 	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
-	NF_CT_ASSERT(!timer_pending(&ct->timeout));
 
 	if (unlikely(nf_ct_is_template(ct))) {
 		nf_ct_tmpl_free(ct);
@@ -395,7 +405,6 @@
 
 	nf_ct_del_from_dying_or_unconfirmed_list(ct);
 
-	NF_CT_STAT_INC(net, delete);
 	local_bh_enable();
 
 	if (ct->master)
@@ -427,7 +436,6 @@
 
 	nf_ct_add_to_dying_list(ct);
 
-	NF_CT_STAT_INC(net, delete_list);
 	local_bh_enable();
 }
 
@@ -435,35 +443,30 @@
 {
 	struct nf_conn_tstamp *tstamp;
 
+	if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+		return false;
+
 	tstamp = nf_conn_tstamp_find(ct);
 	if (tstamp && tstamp->stop == 0)
 		tstamp->stop = ktime_get_real_ns();
 
-	if (nf_ct_is_dying(ct))
-		goto delete;
-
 	if (nf_conntrack_event_report(IPCT_DESTROY, ct,
 				    portid, report) < 0) {
-		/* destroy event was not delivered */
+		/* destroy event was not delivered. nf_ct_put will
+		 * be done by event cache worker on redelivery.
+		 */
 		nf_ct_delete_from_lists(ct);
 		nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
 		return false;
 	}
 
 	nf_conntrack_ecache_work(nf_ct_net(ct));
-	set_bit(IPS_DYING_BIT, &ct->status);
- delete:
 	nf_ct_delete_from_lists(ct);
 	nf_ct_put(ct);
 	return true;
 }
 EXPORT_SYMBOL_GPL(nf_ct_delete);
 
-static void death_by_timeout(unsigned long ul_conntrack)
-{
-	nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
-}
-
 static inline bool
 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
 		const struct nf_conntrack_tuple *tuple,
@@ -481,22 +484,17 @@
 	       net_eq(net, nf_ct_net(ct));
 }
 
-/* must be called with rcu read lock held */
-void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+/* caller must hold rcu readlock and none of the nf_conntrack_locks */
+static void nf_ct_gc_expired(struct nf_conn *ct)
 {
-	struct hlist_nulls_head *hptr;
-	unsigned int sequence, hsz;
+	if (!atomic_inc_not_zero(&ct->ct_general.use))
+		return;
 
-	do {
-		sequence = read_seqcount_begin(&nf_conntrack_generation);
-		hsz = nf_conntrack_htable_size;
-		hptr = nf_conntrack_hash;
-	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+	if (nf_ct_should_gc(ct))
+		nf_ct_kill(ct);
 
-	*hash = hptr;
-	*hsize = hsz;
+	nf_ct_put(ct);
 }
-EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
 
 /*
  * Warning :
@@ -510,21 +508,26 @@
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_head *ct_hash;
 	struct hlist_nulls_node *n;
-	unsigned int bucket, sequence;
+	unsigned int bucket, hsize;
 
 begin:
-	do {
-		sequence = read_seqcount_begin(&nf_conntrack_generation);
-		bucket = scale_hash(hash);
-		ct_hash = nf_conntrack_hash;
-	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+	nf_conntrack_get_ht(&ct_hash, &hsize);
+	bucket = reciprocal_scale(hash, hsize);
 
 	hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
-		if (nf_ct_key_equal(h, tuple, zone, net)) {
-			NF_CT_STAT_INC_ATOMIC(net, found);
-			return h;
+		struct nf_conn *ct;
+
+		ct = nf_ct_tuplehash_to_ctrack(h);
+		if (nf_ct_is_expired(ct)) {
+			nf_ct_gc_expired(ct);
+			continue;
 		}
-		NF_CT_STAT_INC_ATOMIC(net, searched);
+
+		if (nf_ct_is_dying(ct))
+			continue;
+
+		if (nf_ct_key_equal(h, tuple, zone, net))
+			return h;
 	}
 	/*
 	 * if the nulls value we got at the end of this lookup is
@@ -618,7 +621,6 @@
 				    zone, net))
 			goto out;
 
-	add_timer(&ct->timeout);
 	smp_wmb();
 	/* The caller holds a reference to this object */
 	atomic_set(&ct->ct_general.use, 2);
@@ -771,8 +773,7 @@
 	/* Timer relative to confirmation time, not original
 	   setting time, otherwise we'd get timer wrap in
 	   weird delay cases. */
-	ct->timeout.expires += jiffies;
-	add_timer(&ct->timeout);
+	ct->timeout += nfct_time_stamp;
 	atomic_inc(&ct->ct_general.use);
 	ct->status |= IPS_CONFIRMED;
 
@@ -791,7 +792,6 @@
 	 */
 	__nf_conntrack_hash_insert(ct, hash, reply_hash);
 	nf_conntrack_double_unlock(hash, reply_hash);
-	NF_CT_STAT_INC(net, insert);
 	local_bh_enable();
 
 	help = nfct_help(ct);
@@ -823,29 +823,40 @@
 	const struct nf_conntrack_zone *zone;
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_head *ct_hash;
-	unsigned int hash, sequence;
+	unsigned int hash, hsize;
 	struct hlist_nulls_node *n;
 	struct nf_conn *ct;
 
 	zone = nf_ct_zone(ignored_conntrack);
 
 	rcu_read_lock();
-	do {
-		sequence = read_seqcount_begin(&nf_conntrack_generation);
-		hash = hash_conntrack(net, tuple);
-		ct_hash = nf_conntrack_hash;
-	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ begin:
+	nf_conntrack_get_ht(&ct_hash, &hsize);
+	hash = __hash_conntrack(net, tuple, hsize);
 
 	hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
 		ct = nf_ct_tuplehash_to_ctrack(h);
-		if (ct != ignored_conntrack &&
-		    nf_ct_key_equal(h, tuple, zone, net)) {
+
+		if (ct == ignored_conntrack)
+			continue;
+
+		if (nf_ct_is_expired(ct)) {
+			nf_ct_gc_expired(ct);
+			continue;
+		}
+
+		if (nf_ct_key_equal(h, tuple, zone, net)) {
 			NF_CT_STAT_INC_ATOMIC(net, found);
 			rcu_read_unlock();
 			return 1;
 		}
-		NF_CT_STAT_INC_ATOMIC(net, searched);
 	}
+
+	if (get_nulls_value(n) != hash) {
+		NF_CT_STAT_INC_ATOMIC(net, search_restart);
+		goto begin;
+	}
+
 	rcu_read_unlock();
 
 	return 0;
@@ -867,6 +878,11 @@
 	hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
 		tmp = nf_ct_tuplehash_to_ctrack(h);
 
+		if (nf_ct_is_expired(tmp)) {
+			nf_ct_gc_expired(tmp);
+			continue;
+		}
+
 		if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
 		    !net_eq(nf_ct_net(tmp), net) ||
 		    nf_ct_is_dying(tmp))
@@ -884,7 +900,6 @@
 		 */
 		if (net_eq(nf_ct_net(tmp), net) &&
 		    nf_ct_is_confirmed(tmp) &&
-		    del_timer(&tmp->timeout) &&
 		    nf_ct_delete(tmp, 0, 0))
 			drops++;
 
@@ -900,14 +915,11 @@
 
 	for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
 		struct hlist_nulls_head *ct_hash;
-		unsigned hash, sequence, drops;
+		unsigned int hash, hsize, drops;
 
 		rcu_read_lock();
-		do {
-			sequence = read_seqcount_begin(&nf_conntrack_generation);
-			hash = scale_hash(_hash++);
-			ct_hash = nf_conntrack_hash;
-		} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+		nf_conntrack_get_ht(&ct_hash, &hsize);
+		hash = reciprocal_scale(_hash++, hsize);
 
 		drops = early_drop_list(net, &ct_hash[hash]);
 		rcu_read_unlock();
@@ -921,6 +933,69 @@
 	return false;
 }
 
+static void gc_worker(struct work_struct *work)
+{
+	unsigned int i, goal, buckets = 0, expired_count = 0;
+	unsigned long next_run = GC_INTERVAL;
+	unsigned int ratio, scanned = 0;
+	struct conntrack_gc_work *gc_work;
+
+	gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+
+	goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+	i = gc_work->last_bucket;
+
+	do {
+		struct nf_conntrack_tuple_hash *h;
+		struct hlist_nulls_head *ct_hash;
+		struct hlist_nulls_node *n;
+		unsigned int hashsz;
+		struct nf_conn *tmp;
+
+		i++;
+		rcu_read_lock();
+
+		nf_conntrack_get_ht(&ct_hash, &hashsz);
+		if (i >= hashsz)
+			i = 0;
+
+		hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+			tmp = nf_ct_tuplehash_to_ctrack(h);
+
+			scanned++;
+			if (nf_ct_is_expired(tmp)) {
+				nf_ct_gc_expired(tmp);
+				expired_count++;
+				continue;
+			}
+		}
+
+		/* could check get_nulls_value() here and restart if ct
+		 * was moved to another chain.  But given gc is best-effort
+		 * we will just continue with next hash slot.
+		 */
+		rcu_read_unlock();
+		cond_resched_rcu_qs();
+	} while (++buckets < goal &&
+		 expired_count < GC_MAX_EVICTS);
+
+	if (gc_work->exiting)
+		return;
+
+	ratio = scanned ? expired_count * 100 / scanned : 0;
+	if (ratio >= 90)
+		next_run = 0;
+
+	gc_work->last_bucket = i;
+	schedule_delayed_work(&gc_work->dwork, next_run);
+}
+
+static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+{
+	INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+	gc_work->exiting = false;
+}
+
 static struct nf_conn *
 __nf_conntrack_alloc(struct net *net,
 		     const struct nf_conntrack_zone *zone,
@@ -957,8 +1032,6 @@
 	/* save hash for reusing when confirming */
 	*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
 	ct->status = 0;
-	/* Don't set timer yet: wait for confirmation */
-	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
 	write_pnet(&ct->ct_net, net);
 	memset(&ct->__nfct_init_offset[0], 0,
 	       offsetof(struct nf_conn, proto) -
@@ -1035,9 +1108,9 @@
 	if (IS_ERR(ct))
 		return (struct nf_conntrack_tuple_hash *)ct;
 
-	if (tmpl && nfct_synproxy(tmpl)) {
-		nfct_seqadj_ext_add(ct);
-		nfct_synproxy_ext_add(ct);
+	if (!nf_ct_add_synproxy(ct, tmpl)) {
+		nf_conntrack_free(ct);
+		return ERR_PTR(-ENOMEM);
 	}
 
 	timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
@@ -1096,10 +1169,8 @@
 		}
 		spin_unlock(&nf_conntrack_expect_lock);
 	}
-	if (!exp) {
+	if (!exp)
 		__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
-		NF_CT_STAT_INC(net, new);
-	}
 
 	/* Now it is inserted into the unconfirmed list, bump refcount */
 	nf_conntrack_get(&ct->ct_general);
@@ -1204,7 +1275,7 @@
 		skb->nfct = NULL;
 	}
 
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	l3proto = __nf_ct_l3proto_find(pf);
 	ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
 				   &dataoff, &protonum);
@@ -1332,7 +1403,6 @@
 			  unsigned long extra_jiffies,
 			  int do_acct)
 {
-	NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
 	NF_CT_ASSERT(skb);
 
 	/* Only update if this is not a fixed timeout */
@@ -1340,39 +1410,25 @@
 		goto acct;
 
 	/* If not in hash table, timer will not be active yet */
-	if (!nf_ct_is_confirmed(ct)) {
-		ct->timeout.expires = extra_jiffies;
-	} else {
-		unsigned long newtime = jiffies + extra_jiffies;
+	if (nf_ct_is_confirmed(ct))
+		extra_jiffies += nfct_time_stamp;
 
-		/* Only update the timeout if the new timeout is at least
-		   HZ jiffies from the old timeout. Need del_timer for race
-		   avoidance (may already be dying). */
-		if (newtime - ct->timeout.expires >= HZ)
-			mod_timer_pending(&ct->timeout, newtime);
-	}
-
+	ct->timeout = extra_jiffies;
 acct:
 	if (do_acct)
 		nf_ct_acct_update(ct, ctinfo, skb->len);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
-bool __nf_ct_kill_acct(struct nf_conn *ct,
-		       enum ip_conntrack_info ctinfo,
-		       const struct sk_buff *skb,
-		       int do_acct)
+bool nf_ct_kill_acct(struct nf_conn *ct,
+		     enum ip_conntrack_info ctinfo,
+		     const struct sk_buff *skb)
 {
-	if (do_acct)
-		nf_ct_acct_update(ct, ctinfo, skb->len);
+	nf_ct_acct_update(ct, ctinfo, skb->len);
 
-	if (del_timer(&ct->timeout)) {
-		ct->timeout.function((unsigned long)ct);
-		return true;
-	}
-	return false;
+	return nf_ct_delete(ct, 0, 0);
 }
-EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
 
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
 
@@ -1505,11 +1561,8 @@
 
 	while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
 		/* Time to push up daises... */
-		if (del_timer(&ct->timeout))
-			nf_ct_delete(ct, portid, report);
 
-		/* ... else the timer will get him soon. */
-
+		nf_ct_delete(ct, portid, report);
 		nf_ct_put(ct);
 		cond_resched();
 	}
@@ -1545,6 +1598,7 @@
 
 void nf_conntrack_cleanup_start(void)
 {
+	conntrack_gc_work.exiting = true;
 	RCU_INIT_POINTER(ip_ct_attach, NULL);
 }
 
@@ -1554,6 +1608,7 @@
 	while (untrack_refs() > 0)
 		schedule();
 
+	cancel_delayed_work_sync(&conntrack_gc_work.dwork);
 	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
 
 	nf_conntrack_proto_fini();
@@ -1828,6 +1883,10 @@
 	}
 	/*  - and look it like as a confirmed connection */
 	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+
+	conntrack_gc_work_init(&conntrack_gc_work);
+	schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+
 	return 0;
 
 err_proto:
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d28011b..da9df2d 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -49,8 +49,13 @@
 
 	hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
 		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+		struct nf_conntrack_ecache *e;
 
-		if (nf_ct_is_dying(ct))
+		if (!nf_ct_is_confirmed(ct))
+			continue;
+
+		e = nf_ct_ecache_find(ct);
+		if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
 			continue;
 
 		if (nf_conntrack_event(IPCT_DESTROY, ct)) {
@@ -58,8 +63,7 @@
 			break;
 		}
 
-		/* we've got the event delivered, now it's dying */
-		set_bit(IPS_DYING_BIT, &ct->status);
+		e->state = NFCT_ECACHE_DESTROY_SENT;
 		refs[evicted] = ct;
 
 		if (++evicted >= ARRAY_SIZE(refs)) {
@@ -130,7 +134,7 @@
 	if (!e)
 		goto out_unlock;
 
-	if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
+	if (nf_ct_is_confirmed(ct)) {
 		struct nf_ct_event item = {
 			.ct	= ct,
 			.portid	= e->portid ? e->portid : portid,
@@ -150,11 +154,13 @@
 				 * triggered by a process, we store the PORTID
 				 * to include it in the retransmission.
 				 */
-				if (eventmask & (1 << IPCT_DESTROY) &&
-				    e->portid == 0 && portid != 0)
-					e->portid = portid;
-				else
+				if (eventmask & (1 << IPCT_DESTROY)) {
+					if (e->portid == 0 && portid != 0)
+						e->portid = portid;
+					e->state = NFCT_ECACHE_DESTROY_FAIL;
+				} else {
 					e->missed |= eventmask;
+				}
 			} else {
 				e->missed &= ~missed;
 			}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 9e36931..f8dbacf 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -574,7 +574,7 @@
 	helper = rcu_dereference(nfct_help(expect->master)->helper);
 	if (helper) {
 		seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
-		if (helper->expect_policy[expect->class].name)
+		if (helper->expect_policy[expect->class].name[0])
 			seq_printf(s, "/%s",
 				   helper->expect_policy[expect->class].name);
 	}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 4314700..e3ed200 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -237,7 +237,7 @@
 	}
 	delim = data[0];
 	if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) {
-		pr_debug("try_eprt: invalid delimitter.\n");
+		pr_debug("try_eprt: invalid delimiter.\n");
 		return 0;
 	}
 
@@ -301,8 +301,6 @@
 	size_t i = plen;
 
 	pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
-	if (dlen == 0)
-		return 0;
 
 	if (dlen <= plen) {
 		/* Short packet: try for partial? */
@@ -311,19 +309,8 @@
 		else return 0;
 	}
 
-	if (strncasecmp(data, pattern, plen) != 0) {
-#if 0
-		size_t i;
-
-		pr_debug("ftp: string mismatch\n");
-		for (i = 0; i < plen; i++) {
-			pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
-				 i, data[i], data[i],
-				 pattern[i], pattern[i]);
-		}
-#endif
+	if (strncasecmp(data, pattern, plen) != 0)
 		return 0;
-	}
 
 	pr_debug("Pattern matches!\n");
 	/* Now we've found the constant string, try to skip
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index bb77a97..f65d9363 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -736,7 +736,7 @@
 	const struct nf_afinfo *afinfo;
 	int ret = 0;
 
-	/* rcu_read_lock()ed by nf_hook_slow() */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	afinfo = nf_get_afinfo(family);
 	if (!afinfo)
 		return 0;
@@ -1473,7 +1473,8 @@
 				 "timeout to %u seconds for",
 				 info->timeout);
 			nf_ct_dump_tuple(&exp->tuple);
-			mod_timer(&exp->timeout, jiffies + info->timeout * HZ);
+			mod_timer_pending(&exp->timeout,
+					  jiffies + info->timeout * HZ);
 		}
 		spin_unlock_bh(&nf_conntrack_expect_lock);
 	}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index b989b81..336e215 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -189,7 +189,6 @@
 	struct nf_conntrack_helper *helper = NULL;
 	struct nf_conn_help *help;
 	struct net *net = nf_ct_net(ct);
-	int ret = 0;
 
 	/* We already got a helper explicitly attached. The function
 	 * nf_conntrack_alter_reply - in case NAT is in use - asks for looking
@@ -223,15 +222,13 @@
 	if (helper == NULL) {
 		if (help)
 			RCU_INIT_POINTER(help->helper, NULL);
-		goto out;
+		return 0;
 	}
 
 	if (help == NULL) {
 		help = nf_ct_helper_ext_add(ct, helper, flags);
-		if (help == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (help == NULL)
+			return -ENOMEM;
 	} else {
 		/* We only allow helper re-assignment of the same sort since
 		 * we cannot reallocate the helper extension area.
@@ -240,13 +237,13 @@
 
 		if (tmp && tmp->help != helper->help) {
 			RCU_INIT_POINTER(help->helper, NULL);
-			goto out;
+			return 0;
 		}
 	}
 
 	rcu_assign_pointer(help->helper, helper);
-out:
-	return ret;
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
 
@@ -349,7 +346,7 @@
 	/* Called from the helper function, this call never fails */
 	help = nfct_help(ct);
 
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	helper = rcu_dereference(help->helper);
 
 	nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 050bb34..2754045 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -149,10 +149,7 @@
 
 static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
 {
-	long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;
-
-	if (timeout < 0)
-		timeout = 0;
+	long timeout = nf_ct_expires(ct) / HZ;
 
 	if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout)))
 		goto nla_put_failure;
@@ -818,14 +815,23 @@
 	struct hlist_nulls_node *n;
 	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
 	u_int8_t l3proto = nfmsg->nfgen_family;
-	int res;
+	struct nf_conn *nf_ct_evict[8];
+	int res, i;
 	spinlock_t *lockp;
 
 	last = (struct nf_conn *)cb->args[1];
+	i = 0;
 
 	local_bh_disable();
 	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
 restart:
+		while (i) {
+			i--;
+			if (nf_ct_should_gc(nf_ct_evict[i]))
+				nf_ct_kill(nf_ct_evict[i]);
+			nf_ct_put(nf_ct_evict[i]);
+		}
+
 		lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS];
 		nf_conntrack_lock(lockp);
 		if (cb->args[0] >= nf_conntrack_htable_size) {
@@ -837,6 +843,13 @@
 			if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
 				continue;
 			ct = nf_ct_tuplehash_to_ctrack(h);
+			if (nf_ct_is_expired(ct)) {
+				if (i < ARRAY_SIZE(nf_ct_evict) &&
+				    atomic_inc_not_zero(&ct->ct_general.use))
+					nf_ct_evict[i++] = ct;
+				continue;
+			}
+
 			if (!net_eq(net, nf_ct_net(ct)))
 				continue;
 
@@ -878,6 +891,13 @@
 	if (last)
 		nf_ct_put(last);
 
+	while (i) {
+		i--;
+		if (nf_ct_should_gc(nf_ct_evict[i]))
+			nf_ct_kill(nf_ct_evict[i]);
+		nf_ct_put(nf_ct_evict[i]);
+	}
+
 	return skb->len;
 }
 
@@ -1147,9 +1167,7 @@
 		}
 	}
 
-	if (del_timer(&ct->timeout))
-		nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
-
+	nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh));
 	nf_ct_put(ct);
 
 	return 0;
@@ -1517,11 +1535,10 @@
 {
 	u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
-	if (!del_timer(&ct->timeout))
-		return -ETIME;
+	ct->timeout = nfct_time_stamp + timeout * HZ;
 
-	ct->timeout.expires = jiffies + timeout * HZ;
-	add_timer(&ct->timeout);
+	if (test_bit(IPS_DYING_BIT, &ct->status))
+		return -ETIME;
 
 	return 0;
 }
@@ -1719,9 +1736,8 @@
 
 	if (!cda[CTA_TIMEOUT])
 		goto err1;
-	ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
-	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+	ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ;
 
 	rcu_read_lock();
  	if (cda[CTA_HELP]) {
@@ -1894,6 +1910,8 @@
 
 			if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
 				return -EINVAL;
+			if (otuple.dst.protonum != rtuple.dst.protonum)
+				return -EINVAL;
 
 			ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
 							&rtuple, u3);
@@ -1966,13 +1984,9 @@
 	nfmsg->version      = NFNETLINK_V0;
 	nfmsg->res_id	    = htons(cpu);
 
-	if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) ||
-	    nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
-	    nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) ||
+	if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
 	    nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
 	    nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) ||
-	    nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) ||
-	    nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) ||
 	    nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
 	    nla_put_be32(skb, CTA_STATS_INSERT_FAILED,
 				htonl(st->insert_failed)) ||
@@ -2362,12 +2376,8 @@
 		return PTR_ERR(exp);
 
 	err = nf_ct_expect_related_report(exp, portid, report);
-	if (err < 0) {
-		nf_ct_expect_put(exp);
-		return err;
-	}
-
-	return 0;
+	nf_ct_expect_put(exp);
+	return err;
 }
 
 static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 5588c7a..f60a475 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -157,8 +157,7 @@
 		pr_debug("setting timeout of conntrack %p to 0\n", sibling);
 		sibling->proto.gre.timeout	  = 0;
 		sibling->proto.gre.stream_timeout = 0;
-		if (del_timer(&sibling->timeout))
-			sibling->timeout.function((unsigned long)sibling);
+		nf_ct_kill(sibling);
 		nf_ct_put(sibling);
 		return 1;
 	} else {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index b65d586..8d2c7d8 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -159,54 +159,6 @@
 	       nf_ct_l3num(i) == l4proto->l3proto;
 }
 
-static struct nf_ip_net *nf_ct_l3proto_net(struct net *net,
-					   struct nf_conntrack_l3proto *l3proto)
-{
-	if (l3proto->l3proto == PF_INET)
-		return &net->ct.nf_ct_proto;
-	else
-		return NULL;
-}
-
-static int nf_ct_l3proto_register_sysctl(struct net *net,
-					 struct nf_conntrack_l3proto *l3proto)
-{
-	int err = 0;
-	struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
-	/* nf_conntrack_l3proto_ipv6 doesn't support sysctl */
-	if (in == NULL)
-		return 0;
-
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-	if (in->ctl_table != NULL) {
-		err = nf_ct_register_sysctl(net,
-					    &in->ctl_table_header,
-					    l3proto->ctl_table_path,
-					    in->ctl_table);
-		if (err < 0) {
-			kfree(in->ctl_table);
-			in->ctl_table = NULL;
-		}
-	}
-#endif
-	return err;
-}
-
-static void nf_ct_l3proto_unregister_sysctl(struct net *net,
-					    struct nf_conntrack_l3proto *l3proto)
-{
-	struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto);
-
-	if (in == NULL)
-		return;
-#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
-	if (in->ctl_table_header != NULL)
-		nf_ct_unregister_sysctl(&in->ctl_table_header,
-					&in->ctl_table,
-					0);
-#endif
-}
-
 int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto)
 {
 	int ret = 0;
@@ -241,7 +193,7 @@
 int nf_ct_l3proto_pernet_register(struct net *net,
 				  struct nf_conntrack_l3proto *proto)
 {
-	int ret = 0;
+	int ret;
 
 	if (proto->init_net) {
 		ret = proto->init_net(net);
@@ -249,7 +201,7 @@
 			return ret;
 	}
 
-	return nf_ct_l3proto_register_sysctl(net, proto);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register);
 
@@ -272,8 +224,6 @@
 void nf_ct_l3proto_pernet_unregister(struct net *net,
 				     struct nf_conntrack_l3proto *proto)
 {
-	nf_ct_l3proto_unregister_sysctl(net, proto);
-
 	/* Remove all contrack entries for this protocol */
 	nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0);
 }
@@ -312,26 +262,6 @@
 			}
 		}
 	}
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) {
-		if (err < 0) {
-			nf_ct_kfree_compat_sysctl_table(pn);
-			goto out;
-		}
-		err = nf_ct_register_sysctl(net,
-					    &pn->ctl_compat_header,
-					    "net/ipv4/netfilter",
-					    pn->ctl_compat_table);
-		if (err == 0)
-			goto out;
-
-		nf_ct_kfree_compat_sysctl_table(pn);
-		nf_ct_unregister_sysctl(&pn->ctl_table_header,
-					&pn->ctl_table,
-					pn->users);
-	}
-out:
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 	return err;
 }
@@ -346,13 +276,6 @@
 		nf_ct_unregister_sysctl(&pn->ctl_table_header,
 					&pn->ctl_table,
 					pn->users);
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
-		nf_ct_unregister_sysctl(&pn->ctl_compat_header,
-					&pn->ctl_compat_table,
-					0);
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 }
 
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 399a38f..a45bee5 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -402,7 +402,8 @@
 {
 	struct dccp_hdr _hdr, *dh;
 
-	dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	/* Actually only need first 4 bytes to get ports. */
+	dh = skb_header_pointer(skb, dataoff, 4, &_hdr);
 	if (dh == NULL)
 		return false;
 
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 86dc752..d5868ba 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -151,17 +151,6 @@
 	},
 	{ }
 };
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table generic_compat_sysctl_table[] = {
-	{
-		.procname	= "ip_conntrack_generic_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
 static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -179,40 +168,14 @@
 	return 0;
 }
 
-static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-					       struct nf_generic_net *gn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table,
-				       sizeof(generic_compat_sysctl_table),
-				       GFP_KERNEL);
-	if (!pn->ctl_compat_table)
-		return -ENOMEM;
-
-	pn->ctl_compat_table[0].data = &gn->timeout;
-#endif
-#endif
-	return 0;
-}
-
 static int generic_init_net(struct net *net, u_int16_t proto)
 {
-	int ret;
 	struct nf_generic_net *gn = generic_pernet(net);
 	struct nf_proto_net *pn = &gn->pn;
 
 	gn->timeout = nf_ct_generic_timeout;
 
-	ret = generic_kmemdup_compat_sysctl_table(pn, gn);
-	if (ret < 0)
-		return ret;
-
-	ret = generic_kmemdup_sysctl_table(pn, gn);
-	if (ret < 0)
-		nf_ct_kfree_compat_sysctl_table(pn);
-
-	return ret;
+	return generic_kmemdup_sysctl_table(pn, gn);
 }
 
 static struct nf_proto_net *generic_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a96451a..9a715f8 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -192,15 +192,15 @@
 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
 			     struct net *net, struct nf_conntrack_tuple *tuple)
 {
-	const struct gre_hdr_pptp *pgrehdr;
-	struct gre_hdr_pptp _pgrehdr;
+	const struct pptp_gre_header *pgrehdr;
+	struct pptp_gre_header _pgrehdr;
 	__be16 srckey;
-	const struct gre_hdr *grehdr;
-	struct gre_hdr _grehdr;
+	const struct gre_base_hdr *grehdr;
+	struct gre_base_hdr _grehdr;
 
 	/* first only delinearize old RFC1701 GRE header */
 	grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
-	if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+	if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) {
 		/* try to behave like "nf_conntrack_proto_generic" */
 		tuple->src.u.all = 0;
 		tuple->dst.u.all = 0;
@@ -212,8 +212,8 @@
 	if (!pgrehdr)
 		return true;
 
-	if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
-		pr_debug("GRE_VERSION_PPTP but unknown proto\n");
+	if (grehdr->protocol != GRE_PROTO_PPP) {
+		pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol));
 		return false;
 	}
 
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 1d7ab96..982ea62 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -161,8 +161,8 @@
 	const struct sctphdr *hp;
 	struct sctphdr _hdr;
 
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+	/* Actually only need first 4 bytes to get ports. */
+	hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
 	if (hp == NULL)
 		return false;
 
@@ -705,54 +705,6 @@
 	},
 	{ }
 };
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table sctp_compat_sysctl_table[] = {
-	{
-		.procname	= "ip_conntrack_sctp_timeout_closed",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_sctp_timeout_cookie_wait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_sctp_timeout_cookie_echoed",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_sctp_timeout_established",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_sent",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_recd",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_sctp_timeout_shutdown_ack_sent",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif
 
 static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -781,32 +733,8 @@
 	return 0;
 }
 
-static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-					    struct sctp_net *sn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table,
-				       sizeof(sctp_compat_sysctl_table),
-				       GFP_KERNEL);
-	if (!pn->ctl_compat_table)
-		return -ENOMEM;
-
-	pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED];
-	pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT];
-	pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED];
-	pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED];
-	pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
-	pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
-	pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
-#endif
-#endif
-	return 0;
-}
-
 static int sctp_init_net(struct net *net, u_int16_t proto)
 {
-	int ret;
 	struct sctp_net *sn = sctp_pernet(net);
 	struct nf_proto_net *pn = &sn->pn;
 
@@ -817,18 +745,7 @@
 			sn->timeouts[i] = sctp_timeouts[i];
 	}
 
-	if (proto == AF_INET) {
-		ret = sctp_kmemdup_compat_sysctl_table(pn, sn);
-		if (ret < 0)
-			return ret;
-
-		ret = sctp_kmemdup_sysctl_table(pn, sn);
-		if (ret < 0)
-			nf_ct_kfree_compat_sysctl_table(pn);
-	} else
-		ret = sctp_kmemdup_sysctl_table(pn, sn);
-
-	return ret;
+	return sctp_kmemdup_sysctl_table(pn, sn);
 }
 
 static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70c8381..69f6877 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -282,8 +282,8 @@
 	const struct tcphdr *hp;
 	struct tcphdr _hdr;
 
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
+	/* Actually only need first 4 bytes to get ports. */
+	hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
 	if (hp == NULL)
 		return false;
 
@@ -1481,90 +1481,6 @@
 	},
 	{ }
 };
-
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table tcp_compat_sysctl_table[] = {
-	{
-		.procname	= "ip_conntrack_tcp_timeout_syn_sent",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_syn_sent2",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_syn_recv",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_established",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_fin_wait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_close_wait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_last_ack",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_time_wait",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_close",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_timeout_max_retrans",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_loose",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_be_liberal",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "ip_conntrack_tcp_max_retrans",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{ }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -1597,38 +1513,8 @@
 	return 0;
 }
 
-static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-					   struct nf_tcp_net *tn)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
-				       sizeof(tcp_compat_sysctl_table),
-				       GFP_KERNEL);
-	if (!pn->ctl_compat_table)
-		return -ENOMEM;
-
-	pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
-	pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2];
-	pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
-	pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
-	pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
-	pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
-	pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
-	pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
-	pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
-	pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
-	pn->ctl_compat_table[10].data = &tn->tcp_loose;
-	pn->ctl_compat_table[11].data = &tn->tcp_be_liberal;
-	pn->ctl_compat_table[12].data = &tn->tcp_max_retrans;
-#endif
-#endif
-	return 0;
-}
-
 static int tcp_init_net(struct net *net, u_int16_t proto)
 {
-	int ret;
 	struct nf_tcp_net *tn = tcp_pernet(net);
 	struct nf_proto_net *pn = &tn->pn;
 
@@ -1643,18 +1529,7 @@
 		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
 	}
 
-	if (proto == AF_INET) {
-		ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
-		if (ret < 0)
-			return ret;
-
-		ret = tcp_kmemdup_sysctl_table(pn, tn);
-		if (ret < 0)
-			nf_ct_kfree_compat_sysctl_table(pn);
-	} else
-		ret = tcp_kmemdup_sysctl_table(pn, tn);
-
-	return ret;
+	return tcp_kmemdup_sysctl_table(pn, tn);
 }
 
 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 4fd0405..20f35ed 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -44,8 +44,8 @@
 	const struct udphdr *hp;
 	struct udphdr _hdr;
 
-	/* Actually only need first 8 bytes. */
-	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	/* Actually only need first 4 bytes to get ports. */
+	hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
 	if (hp == NULL)
 		return false;
 
@@ -218,23 +218,6 @@
 	},
 	{ }
 };
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-static struct ctl_table udp_compat_sysctl_table[] = {
-	{
-		.procname	= "ip_conntrack_udp_timeout",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "ip_conntrack_udp_timeout_stream",
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{ }
-};
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
 #endif /* CONFIG_SYSCTL */
 
 static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
@@ -254,27 +237,8 @@
 	return 0;
 }
 
-static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
-					   struct nf_udp_net *un)
-{
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
-	pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table,
-				       sizeof(udp_compat_sysctl_table),
-				       GFP_KERNEL);
-	if (!pn->ctl_compat_table)
-		return -ENOMEM;
-
-	pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED];
-	pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED];
-#endif
-#endif
-	return 0;
-}
-
 static int udp_init_net(struct net *net, u_int16_t proto)
 {
-	int ret;
 	struct nf_udp_net *un = udp_pernet(net);
 	struct nf_proto_net *pn = &un->pn;
 
@@ -285,18 +249,7 @@
 			un->timeouts[i] = udp_timeouts[i];
 	}
 
-	if (proto == AF_INET) {
-		ret = udp_kmemdup_compat_sysctl_table(pn, un);
-		if (ret < 0)
-			return ret;
-
-		ret = udp_kmemdup_sysctl_table(pn, un);
-		if (ret < 0)
-			nf_ct_kfree_compat_sysctl_table(pn);
-	} else
-		ret = udp_kmemdup_sysctl_table(pn, un);
-
-	return ret;
+	return udp_kmemdup_sysctl_table(pn, un);
 }
 
 static struct nf_proto_net *udp_get_net_proto(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 9d692f5a..029206e8 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -54,7 +54,8 @@
 	const struct udphdr *hp;
 	struct udphdr _hdr;
 
-	hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
+	/* Actually only need first 4 bytes to get ports. */
+	hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
 	if (hp == NULL)
 		return false;
 
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..ef7063e 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -169,7 +169,7 @@
 	s32 seqoff, ackoff;
 	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
 	struct nf_ct_seqadj *this_way, *other_way;
-	int res;
+	int res = 1;
 
 	this_way  = &seqadj->seq[dir];
 	other_way = &seqadj->seq[!dir];
@@ -184,27 +184,31 @@
 	else
 		seqoff = this_way->offset_before;
 
+	newseq = htonl(ntohl(tcph->seq) + seqoff);
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
+	pr_debug("Adjusting sequence number from %u->%u\n",
+		 ntohl(tcph->seq), ntohl(newseq));
+	tcph->seq = newseq;
+
+	if (!tcph->ack)
+		goto out;
+
 	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
 		  other_way->correction_pos))
 		ackoff = other_way->offset_after;
 	else
 		ackoff = other_way->offset_before;
 
-	newseq = htonl(ntohl(tcph->seq) + seqoff);
 	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
-	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
 	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack,
 				 false);
-
-	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+	pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n",
 		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
 		 ntohl(newack));
-
-	tcph->seq = newseq;
 	tcph->ack_seq = newack;
 
 	res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+out:
 	spin_unlock_bh(&ct->lock);
 
 	return res;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 8d9db9d..621b81c 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -83,9 +83,10 @@
 static int iswordc(const char c)
 {
 	if (isalnum(c) || c == '!' || c == '"' || c == '%' ||
-	    (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' ||
+	    (c >= '(' && c <= '+') || c == ':' || c == '<' || c == '>' ||
 	    c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' ||
-	    c == '{' || c == '}' || c == '~')
+	    c == '{' || c == '}' || c == '~' || (c >= '-' && c <= '/') ||
+	    c == '\'')
 		return 1;
 	return 0;
 }
@@ -329,13 +330,12 @@
 static const char *sip_skip_whitespace(const char *dptr, const char *limit)
 {
 	for (; dptr < limit; dptr++) {
-		if (*dptr == ' ')
+		if (*dptr == ' ' || *dptr == '\t')
 			continue;
 		if (*dptr != '\r' && *dptr != '\n')
 			break;
 		dptr = sip_follow_continuation(dptr, limit);
-		if (dptr == NULL)
-			return NULL;
+		break;
 	}
 	return dptr;
 }
@@ -1383,7 +1383,7 @@
 		return NF_DROP;
 	}
 	cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-	if (!cseq) {
+	if (!cseq && *(*dptr + matchoff) != '0') {
 		nf_ct_helper_log(skb, ct, "cannot get cseq");
 		return NF_DROP;
 	}
@@ -1446,7 +1446,7 @@
 			return NF_DROP;
 		}
 		cseq = simple_strtoul(*dptr + matchoff, NULL, 10);
-		if (!cseq) {
+		if (!cseq && *(*dptr + matchoff) != '0') {
 			nf_ct_helper_log(skb, ct, "cannot get cseq");
 			return NF_DROP;
 		}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 958a145..5f446cd 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -205,16 +205,25 @@
 	struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
 	const struct nf_conntrack_l3proto *l3proto;
 	const struct nf_conntrack_l4proto *l4proto;
+	struct net *net = seq_file_net(s);
 	int ret = 0;
 
 	NF_CT_ASSERT(ct);
 	if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
 		return 0;
 
+	if (nf_ct_should_gc(ct)) {
+		nf_ct_kill(ct);
+		goto release;
+	}
+
 	/* we only want to print DIR_ORIGINAL */
 	if (NF_CT_DIRECTION(hash))
 		goto release;
 
+	if (!net_eq(nf_ct_net(ct), net))
+		goto release;
+
 	l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
 	NF_CT_ASSERT(l3proto);
 	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
@@ -224,8 +233,7 @@
 	seq_printf(s, "%-8s %u %-8s %u %ld ",
 		   l3proto->name, nf_ct_l3num(ct),
 		   l4proto->name, nf_ct_protonum(ct),
-		   timer_pending(&ct->timeout)
-		   ? (long)(ct->timeout.expires - jiffies)/HZ : 0);
+		   nf_ct_expires(ct)  / HZ);
 
 	if (l4proto->print_conntrack)
 		l4proto->print_conntrack(s, ct);
@@ -349,13 +357,13 @@
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
 			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
-		   st->searched,
+		   0,
 		   st->found,
-		   st->new,
+		   0,
 		   st->invalid,
 		   st->ignore,
-		   st->delete,
-		   st->delete_list,
+		   0,
+		   0,
 		   st->insert,
 		   st->insert_failed,
 		   st->drop,
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 0655225..e0adb59 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -13,13 +13,13 @@
 
 
 /* core.c */
-unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
-			struct nf_hook_state *state, struct nf_hook_ops **elemp);
+unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
+			struct nf_hook_entry **entryp);
 
 /* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
-	     struct nf_hook_state *state, unsigned int queuenum);
-void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops);
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+	     unsigned int queuenum);
+void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry);
 int __init netfilter_queue_init(void);
 
 /* nf_log.c */
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index aa5847a..30a17d6 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -39,12 +39,12 @@
 	return NULL;
 }
 
-void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
+int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
 {
 	const struct nf_logger *log;
 
-	if (pf == NFPROTO_UNSPEC)
-		return;
+	if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers))
+		return -EOPNOTSUPP;
 
 	mutex_lock(&nf_log_mutex);
 	log = nft_log_dereference(net->nf.nf_loggers[pf]);
@@ -52,6 +52,8 @@
 		rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
 
 	mutex_unlock(&nf_log_mutex);
+
+	return 0;
 }
 EXPORT_SYMBOL(nf_log_set);
 
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a5aa596..119fe1c 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -77,7 +77,7 @@
 	nf_log_buf_add(m, "SPT=%u DPT=%u ",
 		       ntohs(th->source), ntohs(th->dest));
 	/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
-	if (logflags & XT_LOG_TCPSEQ) {
+	if (logflags & NF_LOG_TCPSEQ) {
 		nf_log_buf_add(m, "SEQ=%u ACK=%u ",
 			       ntohl(th->seq), ntohl(th->ack_seq));
 	}
@@ -107,7 +107,7 @@
 	/* Max length: 11 "URGP=65535 " */
 	nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr));
 
-	if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
+	if ((logflags & NF_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
 		u_int8_t _opt[60 - sizeof(struct tcphdr)];
 		const u_int8_t *op;
 		unsigned int i;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index de31818..bbb8f3d 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -441,7 +441,8 @@
 			ct->status |= IPS_DST_NAT;
 
 		if (nfct_help(ct))
-			nfct_seqadj_ext_add(ct);
+			if (!nfct_seqadj_ext_add(ct))
+				return NF_DROP;
 	}
 
 	if (maniptype == NF_NAT_MANIP_SRC) {
@@ -565,16 +566,10 @@
 	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
 	 * will delete entry from already-freed table.
 	 */
-	if (!del_timer(&ct->timeout))
-		return 1;
-
 	ct->status &= ~IPS_NAT_DONE_MASK;
-
 	rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
 			       nf_nat_bysource_params);
 
-	add_timer(&ct->timeout);
-
 	/* don't delete conntrack.  Although that would make things a lot
 	 * simpler, we'd end up flushing all conntracks on nat rmmod.
 	 */
@@ -807,7 +802,7 @@
 	if (err < 0)
 		return err;
 
-	return nf_nat_setup_info(ct, &range, manip);
+	return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
 }
 #else
 static int
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index b19ad20..96964a0 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -96,14 +96,14 @@
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
 
-void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
+void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
 {
 	const struct nf_queue_handler *qh;
 
 	rcu_read_lock();
 	qh = rcu_dereference(net->nf.queue_handler);
 	if (qh)
-		qh->nf_hook_drop(net, ops);
+		qh->nf_hook_drop(net, entry);
 	rcu_read_unlock();
 }
 
@@ -112,7 +112,6 @@
  * through nf_reinject().
  */
 int nf_queue(struct sk_buff *skb,
-	     struct nf_hook_ops *elem,
 	     struct nf_hook_state *state,
 	     unsigned int queuenum)
 {
@@ -141,7 +140,6 @@
 
 	*entry = (struct nf_queue_entry) {
 		.skb	= skb,
-		.elem	= elem,
 		.state	= *state,
 		.size	= sizeof(*entry) + afinfo->route_key_size,
 	};
@@ -165,11 +163,15 @@
 
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 {
+	struct nf_hook_entry *hook_entry;
 	struct sk_buff *skb = entry->skb;
-	struct nf_hook_ops *elem = entry->elem;
 	const struct nf_afinfo *afinfo;
+	struct nf_hook_ops *elem;
 	int err;
 
+	hook_entry = rcu_dereference(entry->state.hook_entries);
+	elem = &hook_entry->ops;
+
 	nf_queue_entry_release_refs(entry);
 
 	/* Continue traversal iff userspace said ok... */
@@ -186,8 +188,7 @@
 
 	if (verdict == NF_ACCEPT) {
 	next_hook:
-		verdict = nf_iterate(entry->state.hook_list,
-				     skb, &entry->state, &elem);
+		verdict = nf_iterate(skb, &entry->state, &hook_entry);
 	}
 
 	switch (verdict & NF_VERDICT_MASK) {
@@ -198,7 +199,8 @@
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
-		err = nf_queue(skb, elem, &entry->state,
+		RCU_INIT_POINTER(entry->state.hook_entries, hook_entry);
+		err = nf_queue(skb, &entry->state,
 			       verdict >> NF_VERDICT_QBITS);
 		if (err < 0) {
 			if (err == -ESRCH &&
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 7e1c876..b70d3ea 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1196,6 +1196,83 @@
 	}
 }
 
+struct nft_chain_hook {
+	u32				num;
+	u32				priority;
+	const struct nf_chain_type	*type;
+	struct net_device		*dev;
+};
+
+static int nft_chain_parse_hook(struct net *net,
+				const struct nlattr * const nla[],
+				struct nft_af_info *afi,
+				struct nft_chain_hook *hook, bool create)
+{
+	struct nlattr *ha[NFTA_HOOK_MAX + 1];
+	const struct nf_chain_type *type;
+	struct net_device *dev;
+	int err;
+
+	err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
+			       nft_hook_policy);
+	if (err < 0)
+		return err;
+
+	if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
+	    ha[NFTA_HOOK_PRIORITY] == NULL)
+		return -EINVAL;
+
+	hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
+	if (hook->num >= afi->nhooks)
+		return -EINVAL;
+
+	hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
+
+	type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT];
+	if (nla[NFTA_CHAIN_TYPE]) {
+		type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE],
+						   create);
+		if (IS_ERR(type))
+			return PTR_ERR(type);
+	}
+	if (!(type->hook_mask & (1 << hook->num)))
+		return -EOPNOTSUPP;
+	if (!try_module_get(type->owner))
+		return -ENOENT;
+
+	hook->type = type;
+
+	hook->dev = NULL;
+	if (afi->flags & NFT_AF_NEEDS_DEV) {
+		char ifname[IFNAMSIZ];
+
+		if (!ha[NFTA_HOOK_DEV]) {
+			module_put(type->owner);
+			return -EOPNOTSUPP;
+		}
+
+		nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+		dev = dev_get_by_name(net, ifname);
+		if (!dev) {
+			module_put(type->owner);
+			return -ENOENT;
+		}
+		hook->dev = dev;
+	} else if (ha[NFTA_HOOK_DEV]) {
+		module_put(type->owner);
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void nft_chain_release_hook(struct nft_chain_hook *hook)
+{
+	module_put(hook->type->owner);
+	if (hook->dev != NULL)
+		dev_put(hook->dev);
+}
+
 static int nf_tables_newchain(struct net *net, struct sock *nlsk,
 			      struct sk_buff *skb, const struct nlmsghdr *nlh,
 			      const struct nlattr * const nla[])
@@ -1206,10 +1283,8 @@
 	struct nft_table *table;
 	struct nft_chain *chain;
 	struct nft_base_chain *basechain = NULL;
-	struct nlattr *ha[NFTA_HOOK_MAX + 1];
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
-	struct net_device *dev = NULL;
 	u8 policy = NF_ACCEPT;
 	u64 handle = 0;
 	unsigned int i;
@@ -1273,6 +1348,37 @@
 		if (nlh->nlmsg_flags & NLM_F_REPLACE)
 			return -EOPNOTSUPP;
 
+		if (nla[NFTA_CHAIN_HOOK]) {
+			struct nft_base_chain *basechain;
+			struct nft_chain_hook hook;
+			struct nf_hook_ops *ops;
+
+			if (!(chain->flags & NFT_BASE_CHAIN))
+				return -EBUSY;
+
+			err = nft_chain_parse_hook(net, nla, afi, &hook,
+						   create);
+			if (err < 0)
+				return err;
+
+			basechain = nft_base_chain(chain);
+			if (basechain->type != hook.type) {
+				nft_chain_release_hook(&hook);
+				return -EBUSY;
+			}
+
+			for (i = 0; i < afi->nops; i++) {
+				ops = &basechain->ops[i];
+				if (ops->hooknum != hook.num ||
+				    ops->priority != hook.priority ||
+				    ops->dev != hook.dev) {
+					nft_chain_release_hook(&hook);
+					return -EBUSY;
+				}
+			}
+			nft_chain_release_hook(&hook);
+		}
+
 		if (nla[NFTA_CHAIN_HANDLE] && name) {
 			struct nft_chain *chain2;
 
@@ -1320,102 +1426,53 @@
 		return -EOVERFLOW;
 
 	if (nla[NFTA_CHAIN_HOOK]) {
-		const struct nf_chain_type *type;
+		struct nft_chain_hook hook;
 		struct nf_hook_ops *ops;
 		nf_hookfn *hookfn;
-		u32 hooknum, priority;
 
-		type = chain_type[family][NFT_CHAIN_T_DEFAULT];
-		if (nla[NFTA_CHAIN_TYPE]) {
-			type = nf_tables_chain_type_lookup(afi,
-							   nla[NFTA_CHAIN_TYPE],
-							   create);
-			if (IS_ERR(type))
-				return PTR_ERR(type);
-		}
-
-		err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
-				       nft_hook_policy);
+		err = nft_chain_parse_hook(net, nla, afi, &hook, create);
 		if (err < 0)
 			return err;
-		if (ha[NFTA_HOOK_HOOKNUM] == NULL ||
-		    ha[NFTA_HOOK_PRIORITY] == NULL)
-			return -EINVAL;
-
-		hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM]));
-		if (hooknum >= afi->nhooks)
-			return -EINVAL;
-		priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
-
-		if (!(type->hook_mask & (1 << hooknum)))
-			return -EOPNOTSUPP;
-		if (!try_module_get(type->owner))
-			return -ENOENT;
-		hookfn = type->hooks[hooknum];
-
-		if (afi->flags & NFT_AF_NEEDS_DEV) {
-			char ifname[IFNAMSIZ];
-
-			if (!ha[NFTA_HOOK_DEV]) {
-				module_put(type->owner);
-				return -EOPNOTSUPP;
-			}
-
-			nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
-			dev = dev_get_by_name(net, ifname);
-			if (!dev) {
-				module_put(type->owner);
-				return -ENOENT;
-			}
-		} else if (ha[NFTA_HOOK_DEV]) {
-			module_put(type->owner);
-			return -EOPNOTSUPP;
-		}
 
 		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
 		if (basechain == NULL) {
-			module_put(type->owner);
-			if (dev != NULL)
-				dev_put(dev);
+			nft_chain_release_hook(&hook);
 			return -ENOMEM;
 		}
 
-		if (dev != NULL)
-			strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+		if (hook.dev != NULL)
+			strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ);
 
 		if (nla[NFTA_CHAIN_COUNTERS]) {
 			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
 			if (IS_ERR(stats)) {
-				module_put(type->owner);
+				nft_chain_release_hook(&hook);
 				kfree(basechain);
-				if (dev != NULL)
-					dev_put(dev);
 				return PTR_ERR(stats);
 			}
 			basechain->stats = stats;
 		} else {
 			stats = netdev_alloc_pcpu_stats(struct nft_stats);
 			if (stats == NULL) {
-				module_put(type->owner);
+				nft_chain_release_hook(&hook);
 				kfree(basechain);
-				if (dev != NULL)
-					dev_put(dev);
 				return -ENOMEM;
 			}
 			rcu_assign_pointer(basechain->stats, stats);
 		}
 
-		basechain->type = type;
+		hookfn = hook.type->hooks[hook.num];
+		basechain->type = hook.type;
 		chain = &basechain->chain;
 
 		for (i = 0; i < afi->nops; i++) {
 			ops = &basechain->ops[i];
 			ops->pf		= family;
-			ops->hooknum	= hooknum;
-			ops->priority	= priority;
+			ops->hooknum	= hook.num;
+			ops->priority	= hook.priority;
 			ops->priv	= chain;
 			ops->hook	= afi->hooks[ops->hooknum];
-			ops->dev	= dev;
+			ops->dev	= hook.dev;
 			if (hookfn)
 				ops->hook = hookfn;
 			if (afi->hook_ops_init)
@@ -3426,12 +3483,12 @@
 }
 
 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
-			    const struct nlattr *attr)
+			    const struct nlattr *attr, u32 nlmsg_flags)
 {
 	struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
 	struct nft_data_desc d1, d2;
 	struct nft_set_ext_tmpl tmpl;
-	struct nft_set_ext *ext;
+	struct nft_set_ext *ext, *ext2;
 	struct nft_set_elem elem;
 	struct nft_set_binding *binding;
 	struct nft_userdata *udata;
@@ -3558,9 +3615,19 @@
 		goto err4;
 
 	ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
-	err = set->ops->insert(ctx->net, set, &elem);
-	if (err < 0)
+	err = set->ops->insert(ctx->net, set, &elem, &ext2);
+	if (err) {
+		if (err == -EEXIST) {
+			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
+			    nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
+			    memcmp(nft_set_ext_data(ext),
+				   nft_set_ext_data(ext2), set->dlen) != 0)
+				err = -EBUSY;
+			else if (!(nlmsg_flags & NLM_F_EXCL))
+				err = 0;
+		}
 		goto err5;
+	}
 
 	nft_trans_elem(trans) = elem;
 	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
@@ -3616,7 +3683,7 @@
 		    !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
 			return -ENFILE;
 
-		err = nft_add_set_elem(&ctx, set, attr);
+		err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
 		if (err < 0) {
 			atomic_dec(&set->nelems);
 			break;
@@ -4343,6 +4410,31 @@
 }
 
 /**
+ *	nft_parse_u32_check - fetch u32 attribute and check for maximum value
+ *
+ *	@attr: netlink attribute to fetch value from
+ *	@max: maximum value to be stored in dest
+ *	@dest: pointer to the variable
+ *
+ *	Parse, check and store a given u32 netlink attribute into variable.
+ *	This function returns -ERANGE if the value goes over maximum value.
+ *	Otherwise a 0 is returned and the attribute value is stored in the
+ *	destination variable.
+ */
+unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+{
+	int val;
+
+	val = ntohl(nla_get_be32(attr));
+	if (val > max)
+		return -ERANGE;
+
+	*dest = val;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_u32_check);
+
+/**
  *	nft_parse_register - parse a register value from a netlink attribute
  *
  *	@attr: netlink attribute
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index fb8b589..0dd5c69 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -34,7 +34,7 @@
 	.u = {
 		.log = {
 			.level = LOGLEVEL_WARNING,
-			.logflags = NF_LOG_MASK,
+			.logflags = NF_LOG_DEFAULT_MASK,
 	        },
 	},
 };
@@ -93,12 +93,15 @@
 
 	if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
 		ptr = skb_network_header(skb);
-	else
+	else {
+		if (!pkt->tprot_set)
+			return false;
 		ptr = skb_network_header(skb) + pkt->xt.thoff;
+	}
 
 	ptr += priv->offset;
 
-	if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
+	if (unlikely(ptr + priv->len > skb_tail_pointer(skb)))
 		return false;
 
 	*dest = 0;
@@ -260,8 +263,13 @@
 	if (err < 0)
 		goto err7;
 
-	return 0;
+	err = nft_range_module_init();
+	if (err < 0)
+		goto err8;
 
+	return 0;
+err8:
+	nft_dynset_module_exit();
 err7:
 	nft_payload_module_exit();
 err6:
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index 6b5f762..f713cc2 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -82,7 +82,10 @@
 {
 	int ret;
 
-	nft_register_chain_type(&filter_inet);
+	ret = nft_register_chain_type(&filter_inet);
+	if (ret < 0)
+		return ret;
+
 	ret = register_pernet_subsys(&nf_tables_inet_net_ops);
 	if (ret < 0)
 		nft_unregister_chain_type(&filter_inet);
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 5eefe4a..9e2ae42 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -15,79 +15,6 @@
 #include <net/netfilter/nf_tables_ipv4.h>
 #include <net/netfilter/nf_tables_ipv6.h>
 
-static inline void
-nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
-			    struct sk_buff *skb,
-			    const struct nf_hook_state *state)
-{
-	struct iphdr *iph, _iph;
-	u32 len, thoff;
-
-	nft_set_pktinfo(pkt, skb, state);
-
-	iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
-				 &_iph);
-	if (!iph)
-		return;
-
-	iph = ip_hdr(skb);
-	if (iph->ihl < 5 || iph->version != 4)
-		return;
-
-	len = ntohs(iph->tot_len);
-	thoff = iph->ihl * 4;
-	if (skb->len < len)
-		return;
-	else if (len < thoff)
-		return;
-
-	pkt->tprot = iph->protocol;
-	pkt->xt.thoff = thoff;
-	pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
-}
-
-static inline void
-__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-			      struct sk_buff *skb,
-			      const struct nf_hook_state *state)
-{
-#if IS_ENABLED(CONFIG_IPV6)
-	struct ipv6hdr *ip6h, _ip6h;
-	unsigned int thoff = 0;
-	unsigned short frag_off;
-	int protohdr;
-	u32 pkt_len;
-
-	ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
-				  &_ip6h);
-	if (!ip6h)
-		return;
-
-	if (ip6h->version != 6)
-		return;
-
-	pkt_len = ntohs(ip6h->payload_len);
-	if (pkt_len + sizeof(*ip6h) > skb->len)
-		return;
-
-	protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
-	if (protohdr < 0)
-                return;
-
-	pkt->tprot = protohdr;
-	pkt->xt.thoff = thoff;
-	pkt->xt.fragoff = frag_off;
-#endif
-}
-
-static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
-					       struct sk_buff *skb,
-					       const struct nf_hook_state *state)
-{
-	nft_set_pktinfo(pkt, skb, state);
-	__nft_netdev_set_pktinfo_ipv6(pkt, skb, state);
-}
-
 static unsigned int
 nft_do_chain_netdev(void *priv, struct sk_buff *skb,
 		    const struct nf_hook_state *state)
@@ -96,13 +23,13 @@
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
+		nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
 		break;
 	case htons(ETH_P_IPV6):
-		nft_netdev_set_pktinfo_ipv6(&pkt, skb, state);
+		nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
 		break;
 	default:
-		nft_set_pktinfo(&pkt, skb, state);
+		nft_set_pktinfo_unspec(&pkt, skb, state);
 		break;
 	}
 
@@ -222,14 +149,25 @@
 {
 	int ret;
 
-	nft_register_chain_type(&nft_filter_chain_netdev);
-	ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
-	if (ret < 0) {
-		nft_unregister_chain_type(&nft_filter_chain_netdev);
+	ret = nft_register_chain_type(&nft_filter_chain_netdev);
+	if (ret)
 		return ret;
-	}
-	register_netdevice_notifier(&nf_tables_netdev_notifier);
+
+	ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
+	if (ret)
+		goto err1;
+
+	ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
+	if (ret)
+		goto err2;
+
 	return 0;
+
+err2:
+	unregister_pernet_subsys(&nf_tables_netdev_net_ops);
+err1:
+	nft_unregister_chain_type(&nft_filter_chain_netdev);
+	return ret;
 }
 
 static void __exit nf_tables_netdev_exit(void)
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 39eb1cc..ab695f8 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -113,20 +113,22 @@
 				  const struct nft_pktinfo *pkt)
 {
 	const struct sk_buff *skb = pkt->skb;
-	unsigned int len = min_t(unsigned int,
-				 pkt->xt.thoff - skb_network_offset(skb),
-				 NFT_TRACETYPE_NETWORK_HSIZE);
 	int off = skb_network_offset(skb);
+	unsigned int len, nh_end;
 
+	nh_end = pkt->tprot_set ? pkt->xt.thoff : skb->len;
+	len = min_t(unsigned int, nh_end - skb_network_offset(skb),
+		    NFT_TRACETYPE_NETWORK_HSIZE);
 	if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
 		return -1;
 
-	len = min_t(unsigned int, skb->len - pkt->xt.thoff,
-		    NFT_TRACETYPE_TRANSPORT_HSIZE);
-
-	if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
-			      pkt->xt.thoff, len))
-		return -1;
+	if (pkt->tprot_set) {
+		len = min_t(unsigned int, skb->len - pkt->xt.thoff,
+			    NFT_TRACETYPE_TRANSPORT_HSIZE);
+		if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
+				      pkt->xt.thoff, len))
+			return -1;
+	}
 
 	if (!skb_mac_header_was_set(skb))
 		return 0;
@@ -237,7 +239,7 @@
 		break;
 	case NFT_TRACETYPE_POLICY:
 		if (nla_put_be32(skb, NFTA_TRACE_POLICY,
-				 info->basechain->policy))
+				 htonl(info->basechain->policy)))
 			goto nla_put_failure;
 		break;
 	}
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 1b4de4b..d44d89b 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -326,14 +326,14 @@
 {
 	int ret = 0;
 
-	/* we want to avoid races with nfnl_acct_find_get. */
-	if (atomic_dec_and_test(&cur->refcnt)) {
+	/* We want to avoid races with nfnl_acct_put. So only when the current
+	 * refcnt is 1, we decrease it to 0.
+	 */
+	if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
 		/* We are protected by nfnl mutex. */
 		list_del_rcu(&cur->head);
 		kfree_rcu(cur, rcu_head);
 	} else {
-		/* still in use, restore reference counter. */
-		atomic_inc(&cur->refcnt);
 		ret = -EBUSY;
 	}
 	return ret;
@@ -343,12 +343,12 @@
 			 struct sk_buff *skb, const struct nlmsghdr *nlh,
 			 const struct nlattr * const tb[])
 {
-	char *acct_name;
-	struct nf_acct *cur;
+	struct nf_acct *cur, *tmp;
 	int ret = -ENOENT;
+	char *acct_name;
 
 	if (!tb[NFACCT_NAME]) {
-		list_for_each_entry(cur, &net->nfnl_acct_list, head)
+		list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head)
 			nfnl_acct_try_del(cur);
 
 		return 0;
@@ -443,7 +443,7 @@
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
-static void nfnl_overquota_report(struct nf_acct *nfacct)
+static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
 {
 	int ret;
 	struct sk_buff *skb;
@@ -458,11 +458,12 @@
 		kfree_skb(skb);
 		return;
 	}
-	netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+	netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
 			  GFP_ATOMIC);
 }
 
-int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
+			struct nf_acct *nfacct)
 {
 	u64 now;
 	u64 *quota;
@@ -480,7 +481,7 @@
 
 	if (now >= *quota &&
 	    !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
-		nfnl_overquota_report(nfacct);
+		nfnl_overquota_report(net, nfacct);
 	}
 
 	return ret;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index e924e95..3b79f34 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -43,7 +43,7 @@
 	if (help == NULL)
 		return NF_DROP;
 
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	helper = rcu_dereference(help->helper);
 	if (helper == NULL)
 		return NF_DROP;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 4cdcd96..139e086 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -98,31 +98,28 @@
 		break;
 	}
 
-	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
-
-	/* This protocol is not supportted, skip. */
-	if (l4proto->l4proto != l4num) {
-		ret = -EOPNOTSUPP;
-		goto err_proto_put;
-	}
-
 	if (matching) {
 		if (nlh->nlmsg_flags & NLM_F_REPLACE) {
 			/* You cannot replace one timeout policy by another of
 			 * different kind, sorry.
 			 */
 			if (matching->l3num != l3num ||
-			    matching->l4proto->l4proto != l4num) {
-				ret = -EINVAL;
-				goto err_proto_put;
-			}
+			    matching->l4proto->l4proto != l4num)
+				return -EINVAL;
 
-			ret = ctnl_timeout_parse_policy(&matching->data,
-							l4proto, net,
-							cda[CTA_TIMEOUT_DATA]);
-			return ret;
+			return ctnl_timeout_parse_policy(&matching->data,
+							 matching->l4proto, net,
+							 cda[CTA_TIMEOUT_DATA]);
 		}
-		ret = -EBUSY;
+
+		return -EBUSY;
+	}
+
+	l4proto = nf_ct_l4proto_find_get(l3num, l4num);
+
+	/* This protocol is not supportted, skip. */
+	if (l4proto->l4proto != l4num) {
+		ret = -EOPNOTSUPP;
 		goto err_proto_put;
 	}
 
@@ -305,7 +302,16 @@
 	const struct hlist_nulls_node *nn;
 	unsigned int last_hsize;
 	spinlock_t *lock;
-	int i;
+	int i, cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
+
+		spin_lock_bh(&pcpu->lock);
+		hlist_nulls_for_each_entry(h, nn, &pcpu->unconfirmed, hnnode)
+			untimeout(h, timeout);
+		spin_unlock_bh(&pcpu->lock);
+	}
 
 	local_bh_disable();
 restart:
@@ -330,16 +336,16 @@
 {
 	int ret = 0;
 
-	/* we want to avoid races with nf_ct_timeout_find_get. */
-	if (atomic_dec_and_test(&timeout->refcnt)) {
+	/* We want to avoid races with ctnl_timeout_put. So only when the
+	 * current refcnt is 1, we decrease it to 0.
+	 */
+	if (atomic_cmpxchg(&timeout->refcnt, 1, 0) == 1) {
 		/* We are protected by nfnl mutex. */
 		list_del_rcu(&timeout->head);
 		nf_ct_l4proto_put(timeout->l4proto);
 		ctnl_untimeout(net, timeout);
 		kfree_rcu(timeout, rcu_head);
 	} else {
-		/* still in use, restore reference counter. */
-		atomic_inc(&timeout->refcnt);
 		ret = -EBUSY;
 	}
 	return ret;
@@ -350,12 +356,13 @@
 				 const struct nlmsghdr *nlh,
 				 const struct nlattr * const cda[])
 {
-	struct ctnl_timeout *cur;
+	struct ctnl_timeout *cur, *tmp;
 	int ret = -ENOENT;
 	char *name;
 
 	if (!cda[CTA_TIMEOUT_NAME]) {
-		list_for_each_entry(cur, &net->nfct_timeout_list, head)
+		list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list,
+					 head)
 			ctnl_timeout_try_del(net, cur);
 
 		return 0;
@@ -543,7 +550,9 @@
 
 static void ctnl_timeout_put(struct ctnl_timeout *timeout)
 {
-	atomic_dec(&timeout->refcnt);
+	if (atomic_dec_and_test(&timeout->refcnt))
+		kfree_rcu(timeout, rcu_head);
+
 	module_put(THIS_MODULE);
 }
 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
@@ -591,7 +600,9 @@
 	list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
 		list_del_rcu(&cur->head);
 		nf_ct_l4proto_put(cur->l4proto);
-		kfree_rcu(cur, rcu_head);
+
+		if (atomic_dec_and_test(&cur->refcnt))
+			kfree_rcu(cur, rcu_head);
 	}
 }
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index cbcfdfb..eb086a1 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -442,7 +442,9 @@
 			if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
 					 htonl(indev->ifindex)) ||
 			/* this is the bridge group "brX" */
-			/* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
+			/* rcu_read_lock()ed by nf_hook_thresh or
+			 * nf_log_packet.
+			 */
 			    nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
 					 htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
 				goto nla_put_failure;
@@ -477,7 +479,9 @@
 			if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
 					 htonl(outdev->ifindex)) ||
 			/* this is the bridge group "brX" */
-			/* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
+			/* rcu_read_lock()ed by nf_hook_thresh or
+			 * nf_log_packet.
+			 */
 			    nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
 					 htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
 				goto nla_put_failure;
@@ -1147,6 +1151,7 @@
 MODULE_ALIAS_NF_LOGGER(AF_INET, 1);
 MODULE_ALIAS_NF_LOGGER(AF_INET6, 1);
 MODULE_ALIAS_NF_LOGGER(AF_BRIDGE, 1);
+MODULE_ALIAS_NF_LOGGER(3, 1); /* NFPROTO_ARP */
 
 module_init(nfnetlink_log_init);
 module_exit(nfnetlink_log_fini);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 5d36a09..af832c5 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -740,7 +740,7 @@
 	struct net *net = entry->state.net;
 	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 
-	/* rcu_read_lock()ed by nf_hook_slow() */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	queue = instance_lookup(q, queuenum);
 	if (!queue)
 		return -ESRCH;
@@ -917,12 +917,14 @@
 	.notifier_call	= nfqnl_rcv_dev_event,
 };
 
-static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr)
+static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
 {
-	return entry->elem == (struct nf_hook_ops *)ops_ptr;
+	return rcu_access_pointer(entry->state.hook_entries) ==
+		(struct nf_hook_entry *)entry_ptr;
 }
 
-static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook)
+static void nfqnl_nf_hook_drop(struct net *net,
+			       const struct nf_hook_entry *hook)
 {
 	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 	int i;
@@ -1145,10 +1147,8 @@
 	struct nfnl_queue_net *q = nfnl_queue_pernet(net);
 	int err;
 
-	queue = instance_lookup(q, queue_num);
-	if (!queue)
-		queue = verdict_instance_lookup(q, queue_num,
-						NETLINK_CB(skb).portid);
+	queue = verdict_instance_lookup(q, queue_num,
+					NETLINK_CB(skb).portid);
 	if (IS_ERR(queue))
 		return PTR_ERR(queue);
 
@@ -1524,9 +1524,16 @@
 		goto cleanup_netlink_notifier;
 	}
 
-	register_netdevice_notifier(&nfqnl_dev_notifier);
+	status = register_netdevice_notifier(&nfqnl_dev_notifier);
+	if (status < 0) {
+		pr_err("nf_queue: failed to register netdevice notifier\n");
+		goto cleanup_netlink_subsys;
+	}
+
 	return status;
 
+cleanup_netlink_subsys:
+	nfnetlink_subsys_unregister(&nfqnl_subsys);
 cleanup_netlink_notifier:
 	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
 	unregister_pernet_subsys(&nfnl_queue_net_ops);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index d71cc18..31c15ed 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -52,6 +52,7 @@
 {
 	struct nft_bitwise *priv = nft_expr_priv(expr);
 	struct nft_data_desc d1, d2;
+	u32 len;
 	int err;
 
 	if (tb[NFTA_BITWISE_SREG] == NULL ||
@@ -61,7 +62,12 @@
 	    tb[NFTA_BITWISE_XOR] == NULL)
 		return -EINVAL;
 
-	priv->len  = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+	err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
+	priv->len = len;
+
 	priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
 	err = nft_validate_register_load(priv->sreg, priv->len);
 	if (err < 0)
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index b78c28b..ee63d98 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -99,6 +99,7 @@
 			      const struct nlattr * const tb[])
 {
 	struct nft_byteorder *priv = nft_expr_priv(expr);
+	u32 size, len;
 	int err;
 
 	if (tb[NFTA_BYTEORDER_SREG] == NULL ||
@@ -117,7 +118,12 @@
 		return -EINVAL;
 	}
 
-	priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+	err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size);
+	if (err < 0)
+		return err;
+
+	priv->size = size;
+
 	switch (priv->size) {
 	case 2:
 	case 4:
@@ -128,7 +134,12 @@
 	}
 
 	priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
-	priv->len  = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+	err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
+	priv->len = len;
+
 	err = nft_validate_register_load(priv->sreg, priv->len);
 	if (err < 0)
 		return err;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index e25b35d..2e53739 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -84,6 +84,9 @@
 	if (err < 0)
 		return err;
 
+	if (desc.len > U8_MAX)
+		return -ERANGE;
+
 	priv->op  = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
 	priv->len = desc.len;
 	return 0;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 51e180f..d7b0d171 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -128,15 +128,18 @@
 		memcpy(dest, &count, sizeof(count));
 		return;
 	}
+	case NFT_CT_L3PROTOCOL:
+		*dest = nf_ct_l3num(ct);
+		return;
+	case NFT_CT_PROTOCOL:
+		*dest = nf_ct_protonum(ct);
+		return;
 	default:
 		break;
 	}
 
 	tuple = &ct->tuplehash[priv->dir].tuple;
 	switch (priv->key) {
-	case NFT_CT_L3PROTOCOL:
-		*dest = nf_ct_l3num(ct);
-		return;
 	case NFT_CT_SRC:
 		memcpy(dest, tuple->src.u3.all,
 		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
@@ -145,9 +148,6 @@
 		memcpy(dest, tuple->dst.u3.all,
 		       nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
 		return;
-	case NFT_CT_PROTOCOL:
-		*dest = nf_ct_protonum(ct);
-		return;
 	case NFT_CT_PROTO_SRC:
 		*dest = (__force __u16)tuple->src.u.all;
 		return;
@@ -283,8 +283,9 @@
 
 	case NFT_CT_L3PROTOCOL:
 	case NFT_CT_PROTOCOL:
-		if (tb[NFTA_CT_DIRECTION] == NULL)
-			return -EINVAL;
+		/* For compatibility, do not report error if NFTA_CT_DIRECTION
+		 * attribute is specified.
+		 */
 		len = sizeof(u8);
 		break;
 	case NFT_CT_SRC:
@@ -363,6 +364,8 @@
 	switch (priv->key) {
 #ifdef CONFIG_NF_CONNTRACK_MARK
 	case NFT_CT_MARK:
+		if (tb[NFTA_CT_DIRECTION])
+			return -EINVAL;
 		len = FIELD_SIZEOF(struct nf_conn, mark);
 		break;
 #endif
@@ -432,8 +435,6 @@
 		goto nla_put_failure;
 
 	switch (priv->key) {
-	case NFT_CT_L3PROTOCOL:
-	case NFT_CT_PROTOCOL:
 	case NFT_CT_SRC:
 	case NFT_CT_DST:
 	case NFT_CT_PROTO_SRC:
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 0af2669..e3b83c3 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -22,6 +22,7 @@
 	enum nft_dynset_ops		op:8;
 	enum nft_registers		sreg_key:8;
 	enum nft_registers		sreg_data:8;
+	bool				invert;
 	u64				timeout;
 	struct nft_expr			*expr;
 	struct nft_set_binding		binding;
@@ -82,10 +83,14 @@
 
 		if (sexpr != NULL)
 			sexpr->ops->eval(sexpr, regs, pkt);
+
+		if (priv->invert)
+			regs->verdict.code = NFT_BREAK;
 		return;
 	}
 out:
-	regs->verdict.code = NFT_BREAK;
+	if (!priv->invert)
+		regs->verdict.code = NFT_BREAK;
 }
 
 static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
@@ -96,6 +101,7 @@
 	[NFTA_DYNSET_SREG_DATA]	= { .type = NLA_U32 },
 	[NFTA_DYNSET_TIMEOUT]	= { .type = NLA_U64 },
 	[NFTA_DYNSET_EXPR]	= { .type = NLA_NESTED },
+	[NFTA_DYNSET_FLAGS]	= { .type = NLA_U32 },
 };
 
 static int nft_dynset_init(const struct nft_ctx *ctx,
@@ -113,6 +119,15 @@
 	    tb[NFTA_DYNSET_SREG_KEY] == NULL)
 		return -EINVAL;
 
+	if (tb[NFTA_DYNSET_FLAGS]) {
+		u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
+
+		if (flags & ~NFT_DYNSET_F_INV)
+			return -EINVAL;
+		if (flags & NFT_DYNSET_F_INV)
+			priv->invert = true;
+	}
+
 	set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
 				   genmask);
 	if (IS_ERR(set)) {
@@ -220,6 +235,7 @@
 static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_dynset *priv = nft_expr_priv(expr);
+	u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0;
 
 	if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
 		goto nla_put_failure;
@@ -235,6 +251,8 @@
 		goto nla_put_failure;
 	if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
 		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags)))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index ba7aed1..a84cf3d 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -59,6 +59,7 @@
 			   const struct nlattr * const tb[])
 {
 	struct nft_exthdr *priv = nft_expr_priv(expr);
+	u32 offset, len, err;
 
 	if (tb[NFTA_EXTHDR_DREG] == NULL ||
 	    tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -66,9 +67,17 @@
 	    tb[NFTA_EXTHDR_LEN] == NULL)
 		return -EINVAL;
 
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+	if (err < 0)
+		return err;
+
+	err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+	if (err < 0)
+		return err;
+
 	priv->type   = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
-	priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
-	priv->len    = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+	priv->offset = offset;
+	priv->len    = len;
 	priv->dreg   = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
 
 	return nft_validate_register_store(ctx, priv->dreg, NULL,
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 564fa79..09473b4 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -1,395 +1,145 @@
 /*
- * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
  */
 
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/list.h>
-#include <linux/log2.h>
-#include <linux/jhash.h>
 #include <linux/netlink.h>
-#include <linux/workqueue.h>
-#include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
 #include <net/netfilter/nf_tables.h>
-
-/* We target a hash table size of 4, element hint is 75% of final size */
-#define NFT_HASH_ELEMENT_HINT 3
+#include <net/netfilter/nf_tables_core.h>
+#include <linux/jhash.h>
 
 struct nft_hash {
-	struct rhashtable		ht;
-	struct delayed_work		gc_work;
+	enum nft_registers      sreg:8;
+	enum nft_registers      dreg:8;
+	u8			len;
+	u32			modulus;
+	u32			seed;
+	u32			offset;
 };
 
-struct nft_hash_elem {
-	struct rhash_head		node;
-	struct nft_set_ext		ext;
+static void nft_hash_eval(const struct nft_expr *expr,
+			  struct nft_regs *regs,
+			  const struct nft_pktinfo *pkt)
+{
+	struct nft_hash *priv = nft_expr_priv(expr);
+	const void *data = &regs->data[priv->sreg];
+	u32 h;
+
+	h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus);
+	regs->data[priv->dreg] = h + priv->offset;
+}
+
+static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
+	[NFTA_HASH_SREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_DREG]	= { .type = NLA_U32 },
+	[NFTA_HASH_LEN]		= { .type = NLA_U32 },
+	[NFTA_HASH_MODULUS]	= { .type = NLA_U32 },
+	[NFTA_HASH_SEED]	= { .type = NLA_U32 },
 };
 
-struct nft_hash_cmp_arg {
-	const struct nft_set		*set;
-	const u32			*key;
-	u8				genmask;
-};
-
-static const struct rhashtable_params nft_hash_params;
-
-static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
-{
-	const struct nft_hash_cmp_arg *arg = data;
-
-	return jhash(arg->key, len, seed);
-}
-
-static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
-{
-	const struct nft_hash_elem *he = data;
-
-	return jhash(nft_set_ext_key(&he->ext), len, seed);
-}
-
-static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
-			       const void *ptr)
-{
-	const struct nft_hash_cmp_arg *x = arg->key;
-	const struct nft_hash_elem *he = ptr;
-
-	if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
-		return 1;
-	if (nft_set_elem_expired(&he->ext))
-		return 1;
-	if (!nft_set_elem_active(&he->ext, x->genmask))
-		return 1;
-	return 0;
-}
-
-static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
-			    const u32 *key, const struct nft_set_ext **ext)
-{
-	struct nft_hash *priv = nft_set_priv(set);
-	const struct nft_hash_elem *he;
-	struct nft_hash_cmp_arg arg = {
-		.genmask = nft_genmask_cur(net),
-		.set	 = set,
-		.key	 = key,
-	};
-
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-	if (he != NULL)
-		*ext = &he->ext;
-
-	return !!he;
-}
-
-static bool nft_hash_update(struct nft_set *set, const u32 *key,
-			    void *(*new)(struct nft_set *,
-					 const struct nft_expr *,
-					 struct nft_regs *regs),
-			    const struct nft_expr *expr,
-			    struct nft_regs *regs,
-			    const struct nft_set_ext **ext)
-{
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he;
-	struct nft_hash_cmp_arg arg = {
-		.genmask = NFT_GENMASK_ANY,
-		.set	 = set,
-		.key	 = key,
-	};
-
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-	if (he != NULL)
-		goto out;
-
-	he = new(set, expr, regs);
-	if (he == NULL)
-		goto err1;
-	if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
-					 nft_hash_params))
-		goto err2;
-out:
-	*ext = &he->ext;
-	return true;
-
-err2:
-	nft_set_elem_destroy(set, he);
-err1:
-	return false;
-}
-
-static int nft_hash_insert(const struct net *net, const struct nft_set *set,
-			   const struct nft_set_elem *elem)
-{
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he = elem->priv;
-	struct nft_hash_cmp_arg arg = {
-		.genmask = nft_genmask_next(net),
-		.set	 = set,
-		.key	 = elem->key.val.data,
-	};
-
-	return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
-					    nft_hash_params);
-}
-
-static void nft_hash_activate(const struct net *net, const struct nft_set *set,
-			      const struct nft_set_elem *elem)
-{
-	struct nft_hash_elem *he = elem->priv;
-
-	nft_set_elem_change_active(net, set, &he->ext);
-	nft_set_elem_clear_busy(&he->ext);
-}
-
-static void *nft_hash_deactivate(const struct net *net,
-				 const struct nft_set *set,
-				 const struct nft_set_elem *elem)
-{
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he;
-	struct nft_hash_cmp_arg arg = {
-		.genmask = nft_genmask_next(net),
-		.set	 = set,
-		.key	 = elem->key.val.data,
-	};
-
-	rcu_read_lock();
-	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-	if (he != NULL) {
-		if (!nft_set_elem_mark_busy(&he->ext) ||
-		    !nft_is_active(net, &he->ext))
-			nft_set_elem_change_active(net, set, &he->ext);
-		else
-			he = NULL;
-	}
-	rcu_read_unlock();
-
-	return he;
-}
-
-static void nft_hash_remove(const struct nft_set *set,
-			    const struct nft_set_elem *elem)
-{
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he = elem->priv;
-
-	rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
-}
-
-static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
-			  struct nft_set_iter *iter)
-{
-	struct nft_hash *priv = nft_set_priv(set);
-	struct nft_hash_elem *he;
-	struct rhashtable_iter hti;
-	struct nft_set_elem elem;
-	int err;
-
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
-	iter->err = err;
-	if (err)
-		return;
-
-	err = rhashtable_walk_start(&hti);
-	if (err && err != -EAGAIN) {
-		iter->err = err;
-		goto out;
-	}
-
-	while ((he = rhashtable_walk_next(&hti))) {
-		if (IS_ERR(he)) {
-			err = PTR_ERR(he);
-			if (err != -EAGAIN) {
-				iter->err = err;
-				goto out;
-			}
-
-			continue;
-		}
-
-		if (iter->count < iter->skip)
-			goto cont;
-		if (nft_set_elem_expired(&he->ext))
-			goto cont;
-		if (!nft_set_elem_active(&he->ext, iter->genmask))
-			goto cont;
-
-		elem.priv = he;
-
-		iter->err = iter->fn(ctx, set, iter, &elem);
-		if (iter->err < 0)
-			goto out;
-
-cont:
-		iter->count++;
-	}
-
-out:
-	rhashtable_walk_stop(&hti);
-	rhashtable_walk_exit(&hti);
-}
-
-static void nft_hash_gc(struct work_struct *work)
-{
-	struct nft_set *set;
-	struct nft_hash_elem *he;
-	struct nft_hash *priv;
-	struct nft_set_gc_batch *gcb = NULL;
-	struct rhashtable_iter hti;
-	int err;
-
-	priv = container_of(work, struct nft_hash, gc_work.work);
-	set  = nft_set_container_of(priv);
-
-	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
-	if (err)
-		goto schedule;
-
-	err = rhashtable_walk_start(&hti);
-	if (err && err != -EAGAIN)
-		goto out;
-
-	while ((he = rhashtable_walk_next(&hti))) {
-		if (IS_ERR(he)) {
-			if (PTR_ERR(he) != -EAGAIN)
-				goto out;
-			continue;
-		}
-
-		if (!nft_set_elem_expired(&he->ext))
-			continue;
-		if (nft_set_elem_mark_busy(&he->ext))
-			continue;
-
-		gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-		if (gcb == NULL)
-			goto out;
-		rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
-		atomic_dec(&set->nelems);
-		nft_set_gc_batch_add(gcb, he);
-	}
-out:
-	rhashtable_walk_stop(&hti);
-	rhashtable_walk_exit(&hti);
-
-	nft_set_gc_batch_complete(gcb);
-schedule:
-	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-			   nft_set_gc_interval(set));
-}
-
-static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
-{
-	return sizeof(struct nft_hash);
-}
-
-static const struct rhashtable_params nft_hash_params = {
-	.head_offset		= offsetof(struct nft_hash_elem, node),
-	.hashfn			= nft_hash_key,
-	.obj_hashfn		= nft_hash_obj,
-	.obj_cmpfn		= nft_hash_cmp,
-	.automatic_shrinking	= true,
-};
-
-static int nft_hash_init(const struct nft_set *set,
-			 const struct nft_set_desc *desc,
+static int nft_hash_init(const struct nft_ctx *ctx,
+			 const struct nft_expr *expr,
 			 const struct nlattr * const tb[])
 {
-	struct nft_hash *priv = nft_set_priv(set);
-	struct rhashtable_params params = nft_hash_params;
-	int err;
+	struct nft_hash *priv = nft_expr_priv(expr);
+	u32 len;
 
-	params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
-	params.key_len	  = set->klen;
+	if (!tb[NFTA_HASH_SREG] ||
+	    !tb[NFTA_HASH_DREG] ||
+	    !tb[NFTA_HASH_LEN]  ||
+	    !tb[NFTA_HASH_SEED] ||
+	    !tb[NFTA_HASH_MODULUS])
+		return -EINVAL;
 
-	err = rhashtable_init(&priv->ht, &params);
-	if (err < 0)
-		return err;
+	if (tb[NFTA_HASH_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
 
-	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
-	if (set->flags & NFT_SET_TIMEOUT)
-		queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
-				   nft_set_gc_interval(set));
+	priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
+	priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
+
+	len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN]));
+	if (len == 0 || len > U8_MAX)
+		return -ERANGE;
+
+	priv->len = len;
+
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
+	if (priv->modulus <= 1)
+		return -ERANGE;
+
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
+	priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
+
+	return nft_validate_register_load(priv->sreg, len) &&
+	       nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_hash_dump(struct sk_buff *skb,
+			 const struct nft_expr *expr)
+{
+	const struct nft_hash *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
+		goto nla_put_failure;
+	if (priv->offset != 0)
+		if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+			goto nla_put_failure;
 	return 0;
+
+nla_put_failure:
+	return -1;
 }
 
-static void nft_hash_elem_destroy(void *ptr, void *arg)
-{
-	nft_set_elem_destroy((const struct nft_set *)arg, ptr);
-}
-
-static void nft_hash_destroy(const struct nft_set *set)
-{
-	struct nft_hash *priv = nft_set_priv(set);
-
-	cancel_delayed_work_sync(&priv->gc_work);
-	rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
-				    (void *)set);
-}
-
-static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
-			      struct nft_set_estimate *est)
-{
-	unsigned int esize;
-
-	esize = sizeof(struct nft_hash_elem);
-	if (desc->size) {
-		est->size = sizeof(struct nft_hash) +
-			    roundup_pow_of_two(desc->size * 4 / 3) *
-			    sizeof(struct nft_hash_elem *) +
-			    desc->size * esize;
-	} else {
-		/* Resizing happens when the load drops below 30% or goes
-		 * above 75%. The average of 52.5% load (approximated by 50%)
-		 * is used for the size estimation of the hash buckets,
-		 * meaning we calculate two buckets per element.
-		 */
-		est->size = esize + 2 * sizeof(struct nft_hash_elem *);
-	}
-
-	est->class = NFT_SET_CLASS_O_1;
-
-	return true;
-}
-
-static struct nft_set_ops nft_hash_ops __read_mostly = {
-	.privsize       = nft_hash_privsize,
-	.elemsize	= offsetof(struct nft_hash_elem, ext),
-	.estimate	= nft_hash_estimate,
+static struct nft_expr_type nft_hash_type;
+static const struct nft_expr_ops nft_hash_ops = {
+	.type		= &nft_hash_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_hash)),
+	.eval		= nft_hash_eval,
 	.init		= nft_hash_init,
-	.destroy	= nft_hash_destroy,
-	.insert		= nft_hash_insert,
-	.activate	= nft_hash_activate,
-	.deactivate	= nft_hash_deactivate,
-	.remove		= nft_hash_remove,
-	.lookup		= nft_hash_lookup,
-	.update		= nft_hash_update,
-	.walk		= nft_hash_walk,
-	.features	= NFT_SET_MAP | NFT_SET_TIMEOUT,
+	.dump		= nft_hash_dump,
+};
+
+static struct nft_expr_type nft_hash_type __read_mostly = {
+	.name		= "hash",
+	.ops		= &nft_hash_ops,
+	.policy		= nft_hash_policy,
+	.maxattr	= NFTA_HASH_MAX,
 	.owner		= THIS_MODULE,
 };
 
 static int __init nft_hash_module_init(void)
 {
-	return nft_register_set(&nft_hash_ops);
+	return nft_register_expr(&nft_hash_type);
 }
 
 static void __exit nft_hash_module_exit(void)
 {
-	nft_unregister_set(&nft_hash_ops);
+	nft_unregister_expr(&nft_hash_type);
 }
 
 module_init(nft_hash_module_init);
 module_exit(nft_hash_module_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("hash");
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index db3b746..d17018f 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -53,6 +53,10 @@
 			    tb[NFTA_IMMEDIATE_DATA]);
 	if (err < 0)
 		return err;
+
+	if (desc.len > U8_MAX)
+		return -ERANGE;
+
 	priv->dlen = desc.len;
 
 	priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 24a73bb..1b01404 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -58,8 +58,11 @@
 	if (tb[NFTA_LOG_LEVEL] != NULL &&
 	    tb[NFTA_LOG_GROUP] != NULL)
 		return -EINVAL;
-	if (tb[NFTA_LOG_GROUP] != NULL)
+	if (tb[NFTA_LOG_GROUP] != NULL) {
 		li->type = NF_LOG_TYPE_ULOG;
+		if (tb[NFTA_LOG_FLAGS] != NULL)
+			return -EINVAL;
+	}
 
 	nla = tb[NFTA_LOG_PREFIX];
 	if (nla != NULL) {
@@ -87,6 +90,10 @@
 		if (tb[NFTA_LOG_FLAGS] != NULL) {
 			li->u.log.logflags =
 				ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
+			if (li->u.log.logflags & ~NF_LOG_MASK) {
+				err = -EINVAL;
+				goto err1;
+			}
 		}
 		break;
 	case NF_LOG_TYPE_ULOG:
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index e164325..8166b69 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -43,7 +43,7 @@
 		return;
 	}
 
-	if (found && set->flags & NFT_SET_MAP)
+	if (set->flags & NFT_SET_MAP)
 		nft_data_copy(&regs->data[priv->dreg],
 			      nft_set_ext_data(ext), set->dlen);
 
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 2863f34..6c1e024 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -52,6 +52,8 @@
 		*dest = pkt->pf;
 		break;
 	case NFT_META_L4PROTO:
+		if (!pkt->tprot_set)
+			goto err;
 		*dest = pkt->tprot;
 		break;
 	case NFT_META_PRIORITY:
@@ -291,10 +293,16 @@
 }
 EXPORT_SYMBOL_GPL(nft_meta_get_init);
 
-static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
+int nft_meta_set_validate(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nft_data **data)
 {
+	struct nft_meta *priv = nft_expr_priv(expr);
 	unsigned int hooks;
 
+	if (priv->key != NFT_META_PKTTYPE)
+		return 0;
+
 	switch (ctx->afi->family) {
 	case NFPROTO_BRIDGE:
 		hooks = 1 << NF_BR_PRE_ROUTING;
@@ -308,6 +316,7 @@
 
 	return nft_chain_validate_hooks(ctx->chain, hooks);
 }
+EXPORT_SYMBOL_GPL(nft_meta_set_validate);
 
 int nft_meta_set_init(const struct nft_ctx *ctx,
 		      const struct nft_expr *expr,
@@ -327,15 +336,16 @@
 		len = sizeof(u8);
 		break;
 	case NFT_META_PKTTYPE:
-		err = nft_meta_set_init_pkttype(ctx);
-		if (err)
-			return err;
 		len = sizeof(u8);
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
 
+	err = nft_meta_set_validate(ctx, expr, NULL);
+	if (err < 0)
+		return err;
+
 	priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
 	err = nft_validate_register_load(priv->sreg, len);
 	if (err < 0)
@@ -407,6 +417,7 @@
 	.init		= nft_meta_set_init,
 	.destroy	= nft_meta_set_destroy,
 	.dump		= nft_meta_set_dump,
+	.validate	= nft_meta_set_validate,
 };
 
 static const struct nft_expr_ops *
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
new file mode 100644
index 0000000..55bc5ab
--- /dev/null
+++ b/net/netfilter/nft_numgen.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2016 Laura Garcia <nevola@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <linux/static_key.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
+
+struct nft_ng_inc {
+	enum nft_registers      dreg:8;
+	u32			modulus;
+	atomic_t		counter;
+	u32			offset;
+};
+
+static void nft_ng_inc_eval(const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_pktinfo *pkt)
+{
+	struct nft_ng_inc *priv = nft_expr_priv(expr);
+	u32 nval, oval;
+
+	do {
+		oval = atomic_read(&priv->counter);
+		nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
+	} while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
+
+	regs->data[priv->dreg] = nval + priv->offset;
+}
+
+static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
+	[NFTA_NG_DREG]		= { .type = NLA_U32 },
+	[NFTA_NG_MODULUS]	= { .type = NLA_U32 },
+	[NFTA_NG_TYPE]		= { .type = NLA_U32 },
+	[NFTA_NG_OFFSET]	= { .type = NLA_U32 },
+};
+
+static int nft_ng_inc_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_NG_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+	if (priv->modulus == 0)
+		return -ERANGE;
+
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
+	priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+	atomic_set(&priv->counter, 0);
+
+	return nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
+		       u32 modulus, enum nft_ng_types type, u32 offset)
+{
+	if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ng_inc *priv = nft_expr_priv(expr);
+
+	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL,
+			   priv->offset);
+}
+
+struct nft_ng_random {
+	enum nft_registers      dreg:8;
+	u32			modulus;
+	u32			offset;
+};
+
+static void nft_ng_random_eval(const struct nft_expr *expr,
+			       struct nft_regs *regs,
+			       const struct nft_pktinfo *pkt)
+{
+	struct nft_ng_random *priv = nft_expr_priv(expr);
+	struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+	u32 val;
+
+	val = reciprocal_scale(prandom_u32_state(state), priv->modulus);
+	regs->data[priv->dreg] = val + priv->offset;
+}
+
+static int nft_ng_random_init(const struct nft_ctx *ctx,
+			      const struct nft_expr *expr,
+			      const struct nlattr * const tb[])
+{
+	struct nft_ng_random *priv = nft_expr_priv(expr);
+
+	if (tb[NFTA_NG_OFFSET])
+		priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
+	priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+	if (priv->modulus == 0)
+		return -ERANGE;
+
+	if (priv->offset + priv->modulus - 1 < priv->offset)
+		return -EOVERFLOW;
+
+	prandom_init_once(&nft_numgen_prandom_state);
+
+	priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
+
+	return nft_validate_register_store(ctx, priv->dreg, NULL,
+					   NFT_DATA_VALUE, sizeof(u32));
+}
+
+static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_ng_random *priv = nft_expr_priv(expr);
+
+	return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM,
+			   priv->offset);
+}
+
+static struct nft_expr_type nft_ng_type;
+static const struct nft_expr_ops nft_ng_inc_ops = {
+	.type		= &nft_ng_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)),
+	.eval		= nft_ng_inc_eval,
+	.init		= nft_ng_inc_init,
+	.dump		= nft_ng_inc_dump,
+};
+
+static const struct nft_expr_ops nft_ng_random_ops = {
+	.type		= &nft_ng_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_ng_random)),
+	.eval		= nft_ng_random_eval,
+	.init		= nft_ng_random_init,
+	.dump		= nft_ng_random_dump,
+};
+
+static const struct nft_expr_ops *
+nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[])
+{
+	u32 type;
+
+	if (!tb[NFTA_NG_DREG]	 ||
+	    !tb[NFTA_NG_MODULUS] ||
+	    !tb[NFTA_NG_TYPE])
+		return ERR_PTR(-EINVAL);
+
+	type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE]));
+
+	switch (type) {
+	case NFT_NG_INCREMENTAL:
+		return &nft_ng_inc_ops;
+	case NFT_NG_RANDOM:
+		return &nft_ng_random_ops;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static struct nft_expr_type nft_ng_type __read_mostly = {
+	.name		= "numgen",
+	.select_ops	= &nft_ng_select_ops,
+	.policy		= nft_ng_policy,
+	.maxattr	= NFTA_NG_MAX,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_ng_module_init(void)
+{
+	return nft_register_expr(&nft_ng_type);
+}
+
+static void __exit nft_ng_module_exit(void)
+{
+	nft_unregister_expr(&nft_ng_type);
+}
+
+module_init(nft_ng_module_init);
+module_exit(nft_ng_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Laura Garcia <nevola@gmail.com>");
+MODULE_ALIAS_NFT_EXPR("numgen");
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 12cd4bf..b2f8861 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -92,6 +92,8 @@
 		offset = skb_network_offset(skb);
 		break;
 	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		if (!pkt->tprot_set)
+			goto err;
 		offset = pkt->xt.thoff;
 		break;
 	default:
@@ -184,6 +186,8 @@
 		offset = skb_network_offset(skb);
 		break;
 	case NFT_PAYLOAD_TRANSPORT_HEADER:
+		if (!pkt->tprot_set)
+			goto err;
 		offset = pkt->xt.thoff;
 		break;
 	default:
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 61d216e..393d359 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -22,9 +22,10 @@
 static u32 jhash_initval __read_mostly;
 
 struct nft_queue {
-	u16	queuenum;
-	u16	queues_total;
-	u16	flags;
+	enum nft_registers	sreg_qnum:8;
+	u16			queuenum;
+	u16			queues_total;
+	u16			flags;
 };
 
 static void nft_queue_eval(const struct nft_expr *expr,
@@ -54,27 +55,51 @@
 	regs->verdict.code = ret;
 }
 
+static void nft_queue_sreg_eval(const struct nft_expr *expr,
+				struct nft_regs *regs,
+				const struct nft_pktinfo *pkt)
+{
+	struct nft_queue *priv = nft_expr_priv(expr);
+	u32 queue, ret;
+
+	queue = regs->data[priv->sreg_qnum];
+
+	ret = NF_QUEUE_NR(queue);
+	if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
+		ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+	regs->verdict.code = ret;
+}
+
 static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
 	[NFTA_QUEUE_NUM]	= { .type = NLA_U16 },
 	[NFTA_QUEUE_TOTAL]	= { .type = NLA_U16 },
 	[NFTA_QUEUE_FLAGS]	= { .type = NLA_U16 },
+	[NFTA_QUEUE_SREG_QNUM]	= { .type = NLA_U32 },
 };
 
 static int nft_queue_init(const struct nft_ctx *ctx,
-			   const struct nft_expr *expr,
-			   const struct nlattr * const tb[])
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
 {
 	struct nft_queue *priv = nft_expr_priv(expr);
+	u32 maxid;
 
-	if (tb[NFTA_QUEUE_NUM] == NULL)
-		return -EINVAL;
-
-	init_hashrandom(&jhash_initval);
 	priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
 
-	if (tb[NFTA_QUEUE_TOTAL] != NULL)
+	if (tb[NFTA_QUEUE_TOTAL])
 		priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL]));
-	if (tb[NFTA_QUEUE_FLAGS] != NULL) {
+	else
+		priv->queues_total = 1;
+
+	if (priv->queues_total == 0)
+		return -EINVAL;
+
+	maxid = priv->queues_total - 1 + priv->queuenum;
+	if (maxid > U16_MAX)
+		return -ERANGE;
+
+	if (tb[NFTA_QUEUE_FLAGS]) {
 		priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
 		if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
 			return -EINVAL;
@@ -82,6 +107,29 @@
 	return 0;
 }
 
+static int nft_queue_sreg_init(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr,
+			       const struct nlattr * const tb[])
+{
+	struct nft_queue *priv = nft_expr_priv(expr);
+	int err;
+
+	priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]);
+	err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32));
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_QUEUE_FLAGS]) {
+		priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
+		if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
+			return -EINVAL;
+		if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT)
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_queue *priv = nft_expr_priv(expr);
@@ -97,6 +145,21 @@
 	return -1;
 }
 
+static int
+nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_queue *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) ||
+	    nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
 static struct nft_expr_type nft_queue_type;
 static const struct nft_expr_ops nft_queue_ops = {
 	.type		= &nft_queue_type,
@@ -106,9 +169,35 @@
 	.dump		= nft_queue_dump,
 };
 
+static const struct nft_expr_ops nft_queue_sreg_ops = {
+	.type		= &nft_queue_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_queue)),
+	.eval		= nft_queue_sreg_eval,
+	.init		= nft_queue_sreg_init,
+	.dump		= nft_queue_sreg_dump,
+};
+
+static const struct nft_expr_ops *
+nft_queue_select_ops(const struct nft_ctx *ctx,
+		     const struct nlattr * const tb[])
+{
+	if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM])
+		return ERR_PTR(-EINVAL);
+
+	init_hashrandom(&jhash_initval);
+
+	if (tb[NFTA_QUEUE_NUM])
+		return &nft_queue_ops;
+
+	if (tb[NFTA_QUEUE_SREG_QNUM])
+		return &nft_queue_sreg_ops;
+
+	return ERR_PTR(-EINVAL);
+}
+
 static struct nft_expr_type nft_queue_type __read_mostly = {
 	.name		= "queue",
-	.ops		= &nft_queue_ops,
+	.select_ops	= &nft_queue_select_ops,
 	.policy		= nft_queue_policy,
 	.maxattr	= NFTA_QUEUE_MAX,
 	.owner		= THIS_MODULE,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
new file mode 100644
index 0000000..c00104c
--- /dev/null
+++ b/net/netfilter/nft_quota.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/atomic.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_quota {
+	u64		quota;
+	bool		invert;
+	atomic64_t	remain;
+};
+
+static inline bool nft_overquota(struct nft_quota *priv,
+				 const struct nft_pktinfo *pkt)
+{
+	return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0;
+}
+
+static void nft_quota_eval(const struct nft_expr *expr,
+			   struct nft_regs *regs,
+			   const struct nft_pktinfo *pkt)
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+
+	if (nft_overquota(priv, pkt) ^ priv->invert)
+		regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = {
+	[NFTA_QUOTA_BYTES]	= { .type = NLA_U64 },
+	[NFTA_QUOTA_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int nft_quota_init(const struct nft_ctx *ctx,
+			  const struct nft_expr *expr,
+			  const struct nlattr * const tb[])
+{
+	struct nft_quota *priv = nft_expr_priv(expr);
+	u32 flags = 0;
+	u64 quota;
+
+	if (!tb[NFTA_QUOTA_BYTES])
+		return -EINVAL;
+
+	quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES]));
+	if (quota > S64_MAX)
+		return -EOVERFLOW;
+
+	if (tb[NFTA_QUOTA_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS]));
+		if (flags & ~NFT_QUOTA_F_INV)
+			return -EINVAL;
+	}
+
+	priv->quota = quota;
+	priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false;
+	atomic64_set(&priv->remain, quota);
+
+	return 0;
+}
+
+static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_quota *priv = nft_expr_priv(expr);
+	u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0;
+
+	if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota),
+			 NFTA_QUOTA_PAD) ||
+	    nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_quota_type;
+static const struct nft_expr_ops nft_quota_ops = {
+	.type		= &nft_quota_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_quota)),
+	.eval		= nft_quota_eval,
+	.init		= nft_quota_init,
+	.dump		= nft_quota_dump,
+};
+
+static struct nft_expr_type nft_quota_type __read_mostly = {
+	.name		= "quota",
+	.ops		= &nft_quota_ops,
+	.policy		= nft_quota_policy,
+	.maxattr	= NFTA_QUOTA_MAX,
+	.flags		= NFT_EXPR_STATEFUL,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_quota_module_init(void)
+{
+        return nft_register_expr(&nft_quota_type);
+}
+
+static void __exit nft_quota_module_exit(void)
+{
+        nft_unregister_expr(&nft_quota_type);
+}
+
+module_init(nft_quota_module_init);
+module_exit(nft_quota_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_EXPR("quota");
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
new file mode 100644
index 0000000..c6d5358
--- /dev/null
+++ b/net/netfilter/nft_range.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_range_expr {
+	struct nft_data		data_from;
+	struct nft_data		data_to;
+	enum nft_registers	sreg:8;
+	u8			len;
+	enum nft_range_ops	op:8;
+};
+
+static void nft_range_eval(const struct nft_expr *expr,
+			 struct nft_regs *regs,
+			 const struct nft_pktinfo *pkt)
+{
+	const struct nft_range_expr *priv = nft_expr_priv(expr);
+	bool mismatch;
+	int d1, d2;
+
+	d1 = memcmp(&regs->data[priv->sreg], &priv->data_from, priv->len);
+	d2 = memcmp(&regs->data[priv->sreg], &priv->data_to, priv->len);
+	switch (priv->op) {
+	case NFT_RANGE_EQ:
+		mismatch = (d1 < 0 || d2 > 0);
+		break;
+	case NFT_RANGE_NEQ:
+		mismatch = (d1 >= 0 && d2 <= 0);
+		break;
+	}
+
+	if (mismatch)
+		regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = {
+	[NFTA_RANGE_SREG]		= { .type = NLA_U32 },
+	[NFTA_RANGE_OP]			= { .type = NLA_U32 },
+	[NFTA_RANGE_FROM_DATA]		= { .type = NLA_NESTED },
+	[NFTA_RANGE_TO_DATA]		= { .type = NLA_NESTED },
+};
+
+static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+			const struct nlattr * const tb[])
+{
+	struct nft_range_expr *priv = nft_expr_priv(expr);
+	struct nft_data_desc desc_from, desc_to;
+	int err;
+
+	err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
+			    &desc_from, tb[NFTA_RANGE_FROM_DATA]);
+	if (err < 0)
+		return err;
+
+	err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
+			    &desc_to, tb[NFTA_RANGE_TO_DATA]);
+	if (err < 0)
+		goto err1;
+
+	if (desc_from.len != desc_to.len) {
+		err = -EINVAL;
+		goto err2;
+	}
+
+	priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]);
+	err = nft_validate_register_load(priv->sreg, desc_from.len);
+	if (err < 0)
+		goto err2;
+
+	priv->op  = ntohl(nla_get_be32(tb[NFTA_RANGE_OP]));
+	priv->len = desc_from.len;
+	return 0;
+err2:
+	nft_data_uninit(&priv->data_to, desc_to.type);
+err1:
+	nft_data_uninit(&priv->data_from, desc_from.type);
+	return err;
+}
+
+static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_range_expr *priv = nft_expr_priv(expr);
+
+	if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op)))
+		goto nla_put_failure;
+
+	if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from,
+			  NFT_DATA_VALUE, priv->len) < 0 ||
+	    nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to,
+			  NFT_DATA_VALUE, priv->len) < 0)
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_range_type;
+static const struct nft_expr_ops nft_range_ops = {
+	.type		= &nft_range_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_range_expr)),
+	.eval		= nft_range_eval,
+	.init		= nft_range_init,
+	.dump		= nft_range_dump,
+};
+
+static struct nft_expr_type nft_range_type __read_mostly = {
+	.name		= "range",
+	.ops		= &nft_range_ops,
+	.policy		= nft_range_policy,
+	.maxattr	= NFTA_RANGE_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_range_module_init(void)
+{
+	return nft_register_expr(&nft_range_type);
+}
+
+void nft_range_module_exit(void)
+{
+	nft_unregister_expr(&nft_range_type);
+}
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 0522fc9..c64de3f7 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -26,11 +26,27 @@
 };
 EXPORT_SYMBOL_GPL(nft_reject_policy);
 
+int nft_reject_validate(const struct nft_ctx *ctx,
+			const struct nft_expr *expr,
+			const struct nft_data **data)
+{
+	return nft_chain_validate_hooks(ctx->chain,
+					(1 << NF_INET_LOCAL_IN) |
+					(1 << NF_INET_FORWARD) |
+					(1 << NF_INET_LOCAL_OUT));
+}
+EXPORT_SYMBOL_GPL(nft_reject_validate);
+
 int nft_reject_init(const struct nft_ctx *ctx,
 		    const struct nft_expr *expr,
 		    const struct nlattr * const tb[])
 {
 	struct nft_reject *priv = nft_expr_priv(expr);
+	int err;
+
+	err = nft_reject_validate(ctx, expr, NULL);
+	if (err < 0)
+		return err;
 
 	if (tb[NFTA_REJECT_TYPE] == NULL)
 		return -EINVAL;
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 759ca52..e79d9ca 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -66,7 +66,11 @@
 				const struct nlattr * const tb[])
 {
 	struct nft_reject *priv = nft_expr_priv(expr);
-	int icmp_code;
+	int icmp_code, err;
+
+	err = nft_reject_validate(ctx, expr, NULL);
+	if (err < 0)
+		return err;
 
 	if (tb[NFTA_REJECT_TYPE] == NULL)
 		return -EINVAL;
@@ -124,6 +128,7 @@
 	.eval		= nft_reject_inet_eval,
 	.init		= nft_reject_inet_init,
 	.dump		= nft_reject_inet_dump,
+	.validate	= nft_reject_validate,
 };
 
 static struct nft_expr_type nft_reject_inet_type __read_mostly = {
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
new file mode 100644
index 0000000..3794cb2
--- /dev/null
+++ b/net/netfilter/nft_set_hash.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/jhash.h>
+#include <linux/netlink.h>
+#include <linux/workqueue.h>
+#include <linux/rhashtable.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+
+/* We target a hash table size of 4, element hint is 75% of final size */
+#define NFT_HASH_ELEMENT_HINT 3
+
+struct nft_hash {
+	struct rhashtable		ht;
+	struct delayed_work		gc_work;
+};
+
+struct nft_hash_elem {
+	struct rhash_head		node;
+	struct nft_set_ext		ext;
+};
+
+struct nft_hash_cmp_arg {
+	const struct nft_set		*set;
+	const u32			*key;
+	u8				genmask;
+};
+
+static const struct rhashtable_params nft_hash_params;
+
+static inline u32 nft_hash_key(const void *data, u32 len, u32 seed)
+{
+	const struct nft_hash_cmp_arg *arg = data;
+
+	return jhash(arg->key, len, seed);
+}
+
+static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed)
+{
+	const struct nft_hash_elem *he = data;
+
+	return jhash(nft_set_ext_key(&he->ext), len, seed);
+}
+
+static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
+			       const void *ptr)
+{
+	const struct nft_hash_cmp_arg *x = arg->key;
+	const struct nft_hash_elem *he = ptr;
+
+	if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
+		return 1;
+	if (nft_set_elem_expired(&he->ext))
+		return 1;
+	if (!nft_set_elem_active(&he->ext, x->genmask))
+		return 1;
+	return 0;
+}
+
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+			    const u32 *key, const struct nft_set_ext **ext)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	const struct nft_hash_elem *he;
+	struct nft_hash_cmp_arg arg = {
+		.genmask = nft_genmask_cur(net),
+		.set	 = set,
+		.key	 = key,
+	};
+
+	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+	if (he != NULL)
+		*ext = &he->ext;
+
+	return !!he;
+}
+
+static bool nft_hash_update(struct nft_set *set, const u32 *key,
+			    void *(*new)(struct nft_set *,
+					 const struct nft_expr *,
+					 struct nft_regs *regs),
+			    const struct nft_expr *expr,
+			    struct nft_regs *regs,
+			    const struct nft_set_ext **ext)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct nft_hash_cmp_arg arg = {
+		.genmask = NFT_GENMASK_ANY,
+		.set	 = set,
+		.key	 = key,
+	};
+
+	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+	if (he != NULL)
+		goto out;
+
+	he = new(set, expr, regs);
+	if (he == NULL)
+		goto err1;
+	if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+					 nft_hash_params))
+		goto err2;
+out:
+	*ext = &he->ext;
+	return true;
+
+err2:
+	nft_set_elem_destroy(set, he);
+err1:
+	return false;
+}
+
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
+			   const struct nft_set_elem *elem,
+			   struct nft_set_ext **ext)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he = elem->priv;
+	struct nft_hash_cmp_arg arg = {
+		.genmask = nft_genmask_next(net),
+		.set	 = set,
+		.key	 = elem->key.val.data,
+	};
+	struct nft_hash_elem *prev;
+
+	prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
+					       nft_hash_params);
+	if (IS_ERR(prev))
+		return PTR_ERR(prev);
+	if (prev) {
+		*ext = &prev->ext;
+		return -EEXIST;
+	}
+	return 0;
+}
+
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
+			      const struct nft_set_elem *elem)
+{
+	struct nft_hash_elem *he = elem->priv;
+
+	nft_set_elem_change_active(net, set, &he->ext);
+	nft_set_elem_clear_busy(&he->ext);
+}
+
+static void *nft_hash_deactivate(const struct net *net,
+				 const struct nft_set *set,
+				 const struct nft_set_elem *elem)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct nft_hash_cmp_arg arg = {
+		.genmask = nft_genmask_next(net),
+		.set	 = set,
+		.key	 = elem->key.val.data,
+	};
+
+	rcu_read_lock();
+	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+	if (he != NULL) {
+		if (!nft_set_elem_mark_busy(&he->ext) ||
+		    !nft_is_active(net, &he->ext))
+			nft_set_elem_change_active(net, set, &he->ext);
+		else
+			he = NULL;
+	}
+	rcu_read_unlock();
+
+	return he;
+}
+
+static void nft_hash_remove(const struct nft_set *set,
+			    const struct nft_set_elem *elem)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he = elem->priv;
+
+	rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+}
+
+static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
+			  struct nft_set_iter *iter)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct rhashtable_iter hti;
+	struct nft_set_elem elem;
+	int err;
+
+	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+	iter->err = err;
+	if (err)
+		return;
+
+	err = rhashtable_walk_start(&hti);
+	if (err && err != -EAGAIN) {
+		iter->err = err;
+		goto out;
+	}
+
+	while ((he = rhashtable_walk_next(&hti))) {
+		if (IS_ERR(he)) {
+			err = PTR_ERR(he);
+			if (err != -EAGAIN) {
+				iter->err = err;
+				goto out;
+			}
+
+			continue;
+		}
+
+		if (iter->count < iter->skip)
+			goto cont;
+		if (nft_set_elem_expired(&he->ext))
+			goto cont;
+		if (!nft_set_elem_active(&he->ext, iter->genmask))
+			goto cont;
+
+		elem.priv = he;
+
+		iter->err = iter->fn(ctx, set, iter, &elem);
+		if (iter->err < 0)
+			goto out;
+
+cont:
+		iter->count++;
+	}
+
+out:
+	rhashtable_walk_stop(&hti);
+	rhashtable_walk_exit(&hti);
+}
+
+static void nft_hash_gc(struct work_struct *work)
+{
+	struct nft_set *set;
+	struct nft_hash_elem *he;
+	struct nft_hash *priv;
+	struct nft_set_gc_batch *gcb = NULL;
+	struct rhashtable_iter hti;
+	int err;
+
+	priv = container_of(work, struct nft_hash, gc_work.work);
+	set  = nft_set_container_of(priv);
+
+	err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL);
+	if (err)
+		goto schedule;
+
+	err = rhashtable_walk_start(&hti);
+	if (err && err != -EAGAIN)
+		goto out;
+
+	while ((he = rhashtable_walk_next(&hti))) {
+		if (IS_ERR(he)) {
+			if (PTR_ERR(he) != -EAGAIN)
+				goto out;
+			continue;
+		}
+
+		if (!nft_set_elem_expired(&he->ext))
+			continue;
+		if (nft_set_elem_mark_busy(&he->ext))
+			continue;
+
+		gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+		if (gcb == NULL)
+			goto out;
+		rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+		atomic_dec(&set->nelems);
+		nft_set_gc_batch_add(gcb, he);
+	}
+out:
+	rhashtable_walk_stop(&hti);
+	rhashtable_walk_exit(&hti);
+
+	nft_set_gc_batch_complete(gcb);
+schedule:
+	queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+			   nft_set_gc_interval(set));
+}
+
+static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
+{
+	return sizeof(struct nft_hash);
+}
+
+static const struct rhashtable_params nft_hash_params = {
+	.head_offset		= offsetof(struct nft_hash_elem, node),
+	.hashfn			= nft_hash_key,
+	.obj_hashfn		= nft_hash_obj,
+	.obj_cmpfn		= nft_hash_cmp,
+	.automatic_shrinking	= true,
+};
+
+static int nft_hash_init(const struct nft_set *set,
+			 const struct nft_set_desc *desc,
+			 const struct nlattr * const tb[])
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct rhashtable_params params = nft_hash_params;
+	int err;
+
+	params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
+	params.key_len	  = set->klen;
+
+	err = rhashtable_init(&priv->ht, &params);
+	if (err < 0)
+		return err;
+
+	INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+	if (set->flags & NFT_SET_TIMEOUT)
+		queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+				   nft_set_gc_interval(set));
+	return 0;
+}
+
+static void nft_hash_elem_destroy(void *ptr, void *arg)
+{
+	nft_set_elem_destroy((const struct nft_set *)arg, ptr);
+}
+
+static void nft_hash_destroy(const struct nft_set *set)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+
+	cancel_delayed_work_sync(&priv->gc_work);
+	rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
+				    (void *)set);
+}
+
+static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features,
+			      struct nft_set_estimate *est)
+{
+	unsigned int esize;
+
+	esize = sizeof(struct nft_hash_elem);
+	if (desc->size) {
+		est->size = sizeof(struct nft_hash) +
+			    roundup_pow_of_two(desc->size * 4 / 3) *
+			    sizeof(struct nft_hash_elem *) +
+			    desc->size * esize;
+	} else {
+		/* Resizing happens when the load drops below 30% or goes
+		 * above 75%. The average of 52.5% load (approximated by 50%)
+		 * is used for the size estimation of the hash buckets,
+		 * meaning we calculate two buckets per element.
+		 */
+		est->size = esize + 2 * sizeof(struct nft_hash_elem *);
+	}
+
+	est->class = NFT_SET_CLASS_O_1;
+
+	return true;
+}
+
+static struct nft_set_ops nft_hash_ops __read_mostly = {
+	.privsize       = nft_hash_privsize,
+	.elemsize	= offsetof(struct nft_hash_elem, ext),
+	.estimate	= nft_hash_estimate,
+	.init		= nft_hash_init,
+	.destroy	= nft_hash_destroy,
+	.insert		= nft_hash_insert,
+	.activate	= nft_hash_activate,
+	.deactivate	= nft_hash_deactivate,
+	.remove		= nft_hash_remove,
+	.lookup		= nft_hash_lookup,
+	.update		= nft_hash_update,
+	.walk		= nft_hash_walk,
+	.features	= NFT_SET_MAP | NFT_SET_TIMEOUT,
+	.owner		= THIS_MODULE,
+};
+
+static int __init nft_hash_module_init(void)
+{
+	return nft_register_set(&nft_hash_ops);
+}
+
+static void __exit nft_hash_module_exit(void)
+{
+	nft_unregister_set(&nft_hash_ops);
+}
+
+module_init(nft_hash_module_init);
+module_exit(nft_hash_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_set_rbtree.c
similarity index 94%
rename from net/netfilter/nft_rbtree.c
rename to net/netfilter/nft_set_rbtree.c
index 6473936..38b5bda 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -70,7 +70,6 @@
 		} else if (d > 0)
 			parent = parent->rb_right;
 		else {
-found:
 			if (!nft_set_elem_active(&rbe->ext, genmask)) {
 				parent = parent->rb_left;
 				continue;
@@ -84,9 +83,12 @@
 		}
 	}
 
-	if (set->flags & NFT_SET_INTERVAL && interval != NULL) {
-		rbe = interval;
-		goto found;
+	if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
+	    nft_set_elem_active(&interval->ext, genmask) &&
+	    !nft_rbtree_interval_end(interval)) {
+		spin_unlock_bh(&nft_rbtree_lock);
+		*ext = &interval->ext;
+		return true;
 	}
 out:
 	spin_unlock_bh(&nft_rbtree_lock);
@@ -94,7 +96,8 @@
 }
 
 static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
-			       struct nft_rbtree_elem *new)
+			       struct nft_rbtree_elem *new,
+			       struct nft_set_ext **ext)
 {
 	struct nft_rbtree *priv = nft_set_priv(set);
 	u8 genmask = nft_genmask_next(net);
@@ -122,8 +125,10 @@
 				else if (!nft_rbtree_interval_end(rbe) &&
 					 nft_rbtree_interval_end(new))
 					p = &parent->rb_right;
-				else
+				else {
+					*ext = &rbe->ext;
 					return -EEXIST;
+				}
 			}
 		}
 	}
@@ -133,13 +138,14 @@
 }
 
 static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
-			     const struct nft_set_elem *elem)
+			     const struct nft_set_elem *elem,
+			     struct nft_set_ext **ext)
 {
 	struct nft_rbtree_elem *rbe = elem->priv;
 	int err;
 
 	spin_lock_bh(&nft_rbtree_lock);
-	err = __nft_rbtree_insert(net, set, rbe);
+	err = __nft_rbtree_insert(net, set, rbe, ext);
 	spin_unlock_bh(&nft_rbtree_lock);
 
 	return err;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 515131f..dbd6c4a 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -24,7 +24,6 @@
 #define RATEEST_HSIZE	16
 static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
 static unsigned int jhash_rnd __read_mostly;
-static bool rnd_inited __read_mostly;
 
 static unsigned int xt_rateest_hash(const char *name)
 {
@@ -99,10 +98,7 @@
 	} cfg;
 	int ret;
 
-	if (unlikely(!rnd_inited)) {
-		get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
-		rnd_inited = true;
-	}
+	net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
 
 	est = xt_rateest_lookup(info->name);
 	if (est) {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index e118397..872db2d 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -110,18 +110,14 @@
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
 		struct net *net = par->net;
 		unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
+		unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
 
-		if (dst_mtu(skb_dst(skb)) <= minlen) {
+		if (min_mtu <= minlen) {
 			net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
-					    dst_mtu(skb_dst(skb)));
+					    min_mtu);
 			return -1;
 		}
-		if (in_mtu <= minlen) {
-			net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
-					    in_mtu);
-			return -1;
-		}
-		newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
+		newmss = min_mtu - minlen;
 	} else
 		newmss = info->mss;
 
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 6e57a39..0471db4 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -89,6 +89,8 @@
 		return -EINVAL;
 
 	if (info->oif[0]) {
+		int ret;
+
 		if (info->oif[sizeof(info->oif)-1] != '\0')
 			return -EINVAL;
 
@@ -101,7 +103,11 @@
 		priv->notifier.notifier_call = tee_netdev_event;
 		info->priv    = priv;
 
-		register_netdevice_notifier(&priv->notifier);
+		ret = register_netdevice_notifier(&priv->notifier);
+		if (ret) {
+			kfree(priv);
+			return ret;
+		}
 	} else
 		info->priv = NULL;
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 7f4414d..663c4c3 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -127,6 +127,8 @@
 						    daddr, dport,
 						    in->ifindex);
 
+			if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+				sk = NULL;
 			/* NOTE: we return listeners even if bound to
 			 * 0.0.0.0, those are filtered out in
 			 * xt_socket, since xt_TPROXY needs 0 bound
@@ -195,6 +197,8 @@
 						   daddr, ntohs(dport),
 						   in->ifindex);
 
+			if (sk && !atomic_inc_not_zero(&sk->sk_refcnt))
+				sk = NULL;
 			/* NOTE: we return listeners even if bound to
 			 * 0.0.0.0, those are filtered out in
 			 * xt_socket, since xt_TPROXY needs 0 bound
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 99bbc82..b6dc322 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -366,14 +366,8 @@
 	unsigned int i;
 	int ret;
 
-	if (unlikely(!connlimit_rnd)) {
-		u_int32_t rand;
+	net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd));
 
-		do {
-			get_random_bytes(&rand, sizeof(rand));
-		} while (!rand);
-		cmpxchg(&connlimit_rnd, 0, rand);
-	}
 	ret = nf_ct_l3proto_try_module_get(par->family);
 	if (ret < 0) {
 		pr_info("cannot load conntrack support for "
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 188404b9..a3b8f69 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -233,10 +233,8 @@
 		return false;
 
 	if (info->match_flags & XT_CONNTRACK_EXPIRES) {
-		unsigned long expires = 0;
+		unsigned long expires = nf_ct_expires(ct) / HZ;
 
-		if (timer_pending(&ct->timeout))
-			expires = (ct->timeout.expires - jiffies) / HZ;
 		if ((expires >= info->expires_min &&
 		    expires <= info->expires_max) ^
 		    !(info->invert_flags & XT_CONNTRACK_EXPIRES))
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 1786968..44a095e 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -56,6 +56,7 @@
 }
 
 /* need to declare this at the top */
+static const struct file_operations dl_file_ops_v1;
 static const struct file_operations dl_file_ops;
 
 /* hash table crap */
@@ -86,8 +87,8 @@
 	unsigned long expires;		/* precalculated expiry time */
 	struct {
 		unsigned long prev;	/* last modification */
-		u_int32_t credit;
-		u_int32_t credit_cap, cost;
+		u_int64_t credit;
+		u_int64_t credit_cap, cost;
 	} rateinfo;
 	struct rcu_head rcu;
 };
@@ -98,7 +99,7 @@
 	u_int8_t family;
 	bool rnd_initialized;
 
-	struct hashlimit_cfg1 cfg;	/* config */
+	struct hashlimit_cfg2 cfg;	/* config */
 
 	/* used internally */
 	spinlock_t lock;		/* lock for list_head */
@@ -114,6 +115,30 @@
 	struct hlist_head hash[0];	/* hashtable itself */
 };
 
+static int
+cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+{
+	if (revision == 1) {
+		struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
+
+		to->mode = cfg->mode;
+		to->avg = cfg->avg;
+		to->burst = cfg->burst;
+		to->size = cfg->size;
+		to->max = cfg->max;
+		to->gc_interval = cfg->gc_interval;
+		to->expire = cfg->expire;
+		to->srcmask = cfg->srcmask;
+		to->dstmask = cfg->dstmask;
+	} else if (revision == 2) {
+		memcpy(to, from, sizeof(struct hashlimit_cfg2));
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static DEFINE_MUTEX(hashlimit_mutex);	/* protects htables list */
 static struct kmem_cache *hashlimit_cachep __read_mostly;
 
@@ -215,16 +240,18 @@
 }
 static void htable_gc(struct work_struct *work);
 
-static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
-			 u_int8_t family)
+static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+			 const char *name, u_int8_t family,
+			 struct xt_hashlimit_htable **out_hinfo,
+			 int revision)
 {
 	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
 	struct xt_hashlimit_htable *hinfo;
-	unsigned int size;
-	unsigned int i;
+	unsigned int size, i;
+	int ret;
 
-	if (minfo->cfg.size) {
-		size = minfo->cfg.size;
+	if (cfg->size) {
+		size = cfg->size;
 	} else {
 		size = (totalram_pages << PAGE_SHIFT) / 16384 /
 		       sizeof(struct list_head);
@@ -238,10 +265,14 @@
 	                sizeof(struct list_head) * size);
 	if (hinfo == NULL)
 		return -ENOMEM;
-	minfo->hinfo = hinfo;
+	*out_hinfo = hinfo;
 
 	/* copy match config into hashtable config */
-	memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
+	ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+
+	if (ret)
+		return ret;
+
 	hinfo->cfg.size = size;
 	if (hinfo->cfg.max == 0)
 		hinfo->cfg.max = 8 * hinfo->cfg.size;
@@ -255,17 +286,18 @@
 	hinfo->count = 0;
 	hinfo->family = family;
 	hinfo->rnd_initialized = false;
-	hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
+	hinfo->name = kstrdup(name, GFP_KERNEL);
 	if (!hinfo->name) {
 		vfree(hinfo);
 		return -ENOMEM;
 	}
 	spin_lock_init(&hinfo->lock);
 
-	hinfo->pde = proc_create_data(minfo->name, 0,
+	hinfo->pde = proc_create_data(name, 0,
 		(family == NFPROTO_IPV4) ?
 		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-		&dl_file_ops, hinfo);
+		(revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
+		hinfo);
 	if (hinfo->pde == NULL) {
 		kfree(hinfo->name);
 		vfree(hinfo);
@@ -398,7 +430,8 @@
    (slowest userspace tool allows), which means
    CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
 */
-#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24))
 
 /* Repeated shift and or gives us all 1s, final shift and add 1 gives
  * us the power of 2 below the theoretical max, so GCC simply does a
@@ -408,9 +441,12 @@
 #define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
 #define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
 #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32))
 #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1)
 
-#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ)
+#define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1)
 
 /* in byte mode, the lowest possible rate is one packet/second.
  * credit_cap is used as a counter that tells us how many times we can
@@ -425,14 +461,24 @@
 }
 
 /* Precision saver. */
-static u32 user2credits(u32 user)
+static u64 user2credits(u64 user, int revision)
 {
-	/* If multiplying would overflow... */
-	if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
-		/* Divide first. */
-		return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+	if (revision == 1) {
+		/* If multiplying would overflow... */
+		if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1))
+			/* Divide first. */
+			return (user / XT_HASHLIMIT_SCALE) *\
+						HZ * CREDITS_PER_JIFFY_v1;
 
-	return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
+		return (user * HZ * CREDITS_PER_JIFFY_v1) \
+						/ XT_HASHLIMIT_SCALE;
+	} else {
+		if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+			return (user / XT_HASHLIMIT_SCALE_v2) *\
+						HZ * CREDITS_PER_JIFFY;
+
+		return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2;
+	}
 }
 
 static u32 user2credits_byte(u32 user)
@@ -442,10 +488,11 @@
 	return (u32) (us >> 32);
 }
 
-static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
+static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
+			    u32 mode, int revision)
 {
 	unsigned long delta = now - dh->rateinfo.prev;
-	u32 cap;
+	u64 cap, cpj;
 
 	if (delta == 0)
 		return;
@@ -453,7 +500,7 @@
 	dh->rateinfo.prev = now;
 
 	if (mode & XT_HASHLIMIT_BYTES) {
-		u32 tmp = dh->rateinfo.credit;
+		u64 tmp = dh->rateinfo.credit;
 		dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta;
 		cap = CREDITS_PER_JIFFY_BYTES * HZ;
 		if (tmp >= dh->rateinfo.credit) {/* overflow */
@@ -461,7 +508,9 @@
 			return;
 		}
 	} else {
-		dh->rateinfo.credit += delta * CREDITS_PER_JIFFY;
+		cpj = (revision == 1) ?
+			CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY;
+		dh->rateinfo.credit += delta * cpj;
 		cap = dh->rateinfo.credit_cap;
 	}
 	if (dh->rateinfo.credit > cap)
@@ -469,7 +518,7 @@
 }
 
 static void rateinfo_init(struct dsthash_ent *dh,
-			  struct xt_hashlimit_htable *hinfo)
+			  struct xt_hashlimit_htable *hinfo, int revision)
 {
 	dh->rateinfo.prev = jiffies;
 	if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
@@ -478,8 +527,8 @@
 		dh->rateinfo.credit_cap = hinfo->cfg.burst;
 	} else {
 		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
-						   hinfo->cfg.burst);
-		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+						   hinfo->cfg.burst, revision);
+		dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision);
 		dh->rateinfo.credit_cap = dh->rateinfo.credit;
 	}
 }
@@ -603,15 +652,15 @@
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
+		    struct xt_hashlimit_htable *hinfo,
+		    const struct hashlimit_cfg2 *cfg, int revision)
 {
-	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
-	struct xt_hashlimit_htable *hinfo = info->hinfo;
 	unsigned long now = jiffies;
 	struct dsthash_ent *dh;
 	struct dsthash_dst dst;
 	bool race = false;
-	u32 cost;
+	u64 cost;
 
 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
@@ -626,18 +675,18 @@
 		} else if (race) {
 			/* Already got an entry, update expiration timeout */
 			dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-			rateinfo_recalc(dh, now, hinfo->cfg.mode);
+			rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
 		} else {
 			dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
-			rateinfo_init(dh, hinfo);
+			rateinfo_init(dh, hinfo, revision);
 		}
 	} else {
 		/* update expiration timeout */
 		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-		rateinfo_recalc(dh, now, hinfo->cfg.mode);
+		rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
 	}
 
-	if (info->cfg.mode & XT_HASHLIMIT_BYTES)
+	if (cfg->mode & XT_HASHLIMIT_BYTES)
 		cost = hashlimit_byte_cost(skb->len, dh);
 	else
 		cost = dh->rateinfo.cost;
@@ -647,73 +696,136 @@
 		dh->rateinfo.credit -= cost;
 		spin_unlock(&dh->lock);
 		rcu_read_unlock_bh();
-		return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
+		return !(cfg->mode & XT_HASHLIMIT_INVERT);
 	}
 
 	spin_unlock(&dh->lock);
 	rcu_read_unlock_bh();
 	/* default match is underlimit - so over the limit, we need to invert */
-	return info->cfg.mode & XT_HASHLIMIT_INVERT;
+	return cfg->mode & XT_HASHLIMIT_INVERT;
 
  hotdrop:
 	par->hotdrop = true;
 	return false;
 }
 
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static bool
+hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	struct net *net = par->net;
-	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
+	struct hashlimit_cfg2 cfg = {};
 	int ret;
 
-	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
-		return -EINVAL;
-	if (info->name[sizeof(info->name)-1] != '\0')
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_common(skb, par, hinfo, &cfg, 1);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+	struct xt_hashlimit_htable *hinfo = info->hinfo;
+
+	return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+}
+
+static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
+				     struct xt_hashlimit_htable **hinfo,
+				     struct hashlimit_cfg2 *cfg,
+				     const char *name, int revision)
+{
+	struct net *net = par->net;
+	int ret;
+
+	if (cfg->gc_interval == 0 || cfg->expire == 0)
 		return -EINVAL;
 	if (par->family == NFPROTO_IPV4) {
-		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
+		if (cfg->srcmask > 32 || cfg->dstmask > 32)
 			return -EINVAL;
 	} else {
-		if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
+		if (cfg->srcmask > 128 || cfg->dstmask > 128)
 			return -EINVAL;
 	}
 
-	if (info->cfg.mode & ~XT_HASHLIMIT_ALL) {
+	if (cfg->mode & ~XT_HASHLIMIT_ALL) {
 		pr_info("Unknown mode mask %X, kernel too old?\n",
-						info->cfg.mode);
+						cfg->mode);
 		return -EINVAL;
 	}
 
 	/* Check for overflow. */
-	if (info->cfg.mode & XT_HASHLIMIT_BYTES) {
-		if (user2credits_byte(info->cfg.avg) == 0) {
-			pr_info("overflow, rate too high: %u\n", info->cfg.avg);
+	if (cfg->mode & XT_HASHLIMIT_BYTES) {
+		if (user2credits_byte(cfg->avg) == 0) {
+			pr_info("overflow, rate too high: %llu\n", cfg->avg);
 			return -EINVAL;
 		}
-	} else if (info->cfg.burst == 0 ||
-		    user2credits(info->cfg.avg * info->cfg.burst) <
-		    user2credits(info->cfg.avg)) {
-			pr_info("overflow, try lower: %u/%u\n",
-				info->cfg.avg, info->cfg.burst);
+	} else if (cfg->burst == 0 ||
+		    user2credits(cfg->avg * cfg->burst, revision) <
+		    user2credits(cfg->avg, revision)) {
+			pr_info("overflow, try lower: %llu/%llu\n",
+				cfg->avg, cfg->burst);
 			return -ERANGE;
 	}
 
 	mutex_lock(&hashlimit_mutex);
-	info->hinfo = htable_find_get(net, info->name, par->family);
-	if (info->hinfo == NULL) {
-		ret = htable_create(net, info, par->family);
+	*hinfo = htable_find_get(net, name, par->family);
+	if (*hinfo == NULL) {
+		ret = htable_create(net, cfg, name, par->family,
+				    hinfo, revision);
 		if (ret < 0) {
 			mutex_unlock(&hashlimit_mutex);
 			return ret;
 		}
 	}
 	mutex_unlock(&hashlimit_mutex);
+
 	return 0;
 }
 
+static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	struct hashlimit_cfg2 cfg = {};
+	int ret;
+
+	if (info->name[sizeof(info->name) - 1] != '\0')
+		return -EINVAL;
+
+	ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+
+	if (ret)
+		return ret;
+
+	return hashlimit_mt_check_common(par, &info->hinfo,
+					 &cfg, info->name, 1);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+	struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+	if (info->name[sizeof(info->name) - 1] != '\0')
+		return -EINVAL;
+
+	return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
+					 info->name, 2);
+}
+
+static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+
+	htable_put(info->hinfo);
+}
+
 static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
 
 	htable_put(info->hinfo);
 }
@@ -723,8 +835,18 @@
 		.name           = "hashlimit",
 		.revision       = 1,
 		.family         = NFPROTO_IPV4,
-		.match          = hashlimit_mt,
+		.match          = hashlimit_mt_v1,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
+		.checkentry     = hashlimit_mt_check_v1,
+		.destroy        = hashlimit_mt_destroy_v1,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 2,
+		.family         = NFPROTO_IPV4,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
 		.checkentry     = hashlimit_mt_check,
 		.destroy        = hashlimit_mt_destroy,
 		.me             = THIS_MODULE,
@@ -734,8 +856,18 @@
 		.name           = "hashlimit",
 		.revision       = 1,
 		.family         = NFPROTO_IPV6,
-		.match          = hashlimit_mt,
+		.match          = hashlimit_mt_v1,
 		.matchsize      = sizeof(struct xt_hashlimit_mtinfo1),
+		.checkentry     = hashlimit_mt_check_v1,
+		.destroy        = hashlimit_mt_destroy_v1,
+		.me             = THIS_MODULE,
+	},
+	{
+		.name           = "hashlimit",
+		.revision       = 2,
+		.family         = NFPROTO_IPV6,
+		.match          = hashlimit_mt,
+		.matchsize      = sizeof(struct xt_hashlimit_mtinfo2),
 		.checkentry     = hashlimit_mt_check,
 		.destroy        = hashlimit_mt_destroy,
 		.me             = THIS_MODULE,
@@ -786,18 +918,12 @@
 	spin_unlock_bh(&htable->lock);
 }
 
-static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
-				   struct seq_file *s)
+static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
+			 struct seq_file *s)
 {
-	const struct xt_hashlimit_htable *ht = s->private;
-
-	spin_lock(&ent->lock);
-	/* recalculate to show accurate numbers */
-	rateinfo_recalc(ent, jiffies, ht->cfg.mode);
-
 	switch (family) {
 	case NFPROTO_IPV4:
-		seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
+		seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n",
 			   (long)(ent->expires - jiffies)/HZ,
 			   &ent->dst.ip.src,
 			   ntohs(ent->dst.src_port),
@@ -808,7 +934,7 @@
 		break;
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
 	case NFPROTO_IPV6:
-		seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
+		seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n",
 			   (long)(ent->expires - jiffies)/HZ,
 			   &ent->dst.ip6.src,
 			   ntohs(ent->dst.src_port),
@@ -821,10 +947,52 @@
 	default:
 		BUG();
 	}
+}
+
+static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
+			       struct seq_file *s)
+{
+	const struct xt_hashlimit_htable *ht = s->private;
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1);
+
+	dl_seq_print(ent, family, s);
+
 	spin_unlock(&ent->lock);
 	return seq_has_overflowed(s);
 }
 
+static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
+			    struct seq_file *s)
+{
+	const struct xt_hashlimit_htable *ht = s->private;
+
+	spin_lock(&ent->lock);
+	/* recalculate to show accurate numbers */
+	rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+	dl_seq_print(ent, family, s);
+
+	spin_unlock(&ent->lock);
+	return seq_has_overflowed(s);
+}
+
+static int dl_seq_show_v1(struct seq_file *s, void *v)
+{
+	struct xt_hashlimit_htable *htable = s->private;
+	unsigned int *bucket = (unsigned int *)v;
+	struct dsthash_ent *ent;
+
+	if (!hlist_empty(&htable->hash[*bucket])) {
+		hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+			if (dl_seq_real_show_v1(ent, htable->family, s))
+				return -1;
+	}
+	return 0;
+}
+
 static int dl_seq_show(struct seq_file *s, void *v)
 {
 	struct xt_hashlimit_htable *htable = s->private;
@@ -839,6 +1007,13 @@
 	return 0;
 }
 
+static const struct seq_operations dl_seq_ops_v1 = {
+	.start = dl_seq_start,
+	.next  = dl_seq_next,
+	.stop  = dl_seq_stop,
+	.show  = dl_seq_show_v1
+};
+
 static const struct seq_operations dl_seq_ops = {
 	.start = dl_seq_start,
 	.next  = dl_seq_next,
@@ -846,9 +1021,9 @@
 	.show  = dl_seq_show
 };
 
-static int dl_proc_open(struct inode *inode, struct file *file)
+static int dl_proc_open_v1(struct inode *inode, struct file *file)
 {
-	int ret = seq_open(file, &dl_seq_ops);
+	int ret = seq_open(file, &dl_seq_ops_v1);
 
 	if (!ret) {
 		struct seq_file *sf = file->private_data;
@@ -857,6 +1032,26 @@
 	return ret;
 }
 
+static int dl_proc_open(struct inode *inode, struct file *file)
+{
+	int ret = seq_open(file, &dl_seq_ops);
+
+	if (!ret) {
+		struct seq_file *sf = file->private_data;
+
+		sf->private = PDE_DATA(inode);
+	}
+	return ret;
+}
+
+static const struct file_operations dl_file_ops_v1 = {
+	.owner   = THIS_MODULE,
+	.open    = dl_proc_open_v1,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
 static const struct file_operations dl_file_ops = {
 	.owner   = THIS_MODULE,
 	.open    = dl_proc_open,
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 9f4ab00..f679dd4 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -41,7 +41,7 @@
 	if (!master_help)
 		return ret;
 
-	/* rcu_read_lock()ed by nf_hook_slow */
+	/* rcu_read_lock()ed by nf_hook_thresh */
 	helper = rcu_dereference(master_help->helper);
 	if (!helper)
 		return ret;
@@ -65,7 +65,7 @@
 			par->family);
 		return ret;
 	}
-	info->name[29] = '\0';
+	info->name[sizeof(info->name) - 1] = '\0';
 	return 0;
 }
 
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 3048a7e..cf32759 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -26,7 +26,7 @@
 
 	nfnl_acct_update(skb, info->nfacct);
 
-	overquota = nfnl_acct_overquota(skb, info->nfacct);
+	overquota = nfnl_acct_overquota(par->net, skb, info->nfacct);
 
 	return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index e5f18988..bb33598 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -107,8 +107,8 @@
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
 	    par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
 	    (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
-		pr_info("using --physdev-out and --physdev-is-out are only"
-			"supported in the FORWARD and POSTROUTING chains with"
+		pr_info("using --physdev-out and --physdev-is-out are only "
+			"supported in the FORWARD and POSTROUTING chains with "
 			"bridged traffic.\n");
 		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return -EINVAL;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index d725a27..e3b7a09 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -110,7 +110,6 @@
 #endif
 
 static u_int32_t hash_rnd __read_mostly;
-static bool hash_rnd_inited __read_mostly;
 
 static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
 {
@@ -340,10 +339,8 @@
 	int ret = -EINVAL;
 	size_t sz;
 
-	if (unlikely(!hash_rnd_inited)) {
-		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
-		hash_rnd_inited = true;
-	}
+	net_get_random_once(&hash_rnd, sizeof(hash_rnd));
+
 	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
 		pr_info("Unsupported user space flags (%08x)\n",
 			info->check_set);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index ef36a56..4dedb96 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -68,7 +68,7 @@
 			 ++i, offset, sch->type, htons(sch->length),
 			 sch->flags);
 #endif
-		offset += WORD_ROUND(ntohs(sch->length));
+		offset += SCTP_PAD4(ntohs(sch->length));
 
 		pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
 
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 8dd836a..b2f0e98 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -63,43 +63,74 @@
 static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 				int protocol, int s_num)
 {
+	struct rhashtable_iter *hti = (void *)cb->args[2];
 	struct netlink_table *tbl = &nl_table[protocol];
-	struct rhashtable *ht = &tbl->hash;
-	const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht);
 	struct net *net = sock_net(skb->sk);
 	struct netlink_diag_req *req;
 	struct netlink_sock *nlsk;
 	struct sock *sk;
-	int ret = 0, num = 0, i;
+	int num = 2;
+	int ret = 0;
 
 	req = nlmsg_data(cb->nlh);
 
-	for (i = 0; i < htbl->size; i++) {
-		struct rhash_head *pos;
+	if (s_num > 1)
+		goto mc_list;
 
-		rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) {
-			sk = (struct sock *)nlsk;
+	num--;
 
-			if (!net_eq(sock_net(sk), net))
-				continue;
-			if (num < s_num) {
-				num++;
+	if (!hti) {
+		hti = kmalloc(sizeof(*hti), GFP_KERNEL);
+		if (!hti)
+			return -ENOMEM;
+
+		cb->args[2] = (long)hti;
+	}
+
+	if (!s_num)
+		rhashtable_walk_enter(&tbl->hash, hti);
+
+	ret = rhashtable_walk_start(hti);
+	if (ret == -EAGAIN)
+		ret = 0;
+	if (ret)
+		goto stop;
+
+	while ((nlsk = rhashtable_walk_next(hti))) {
+		if (IS_ERR(nlsk)) {
+			ret = PTR_ERR(nlsk);
+			if (ret == -EAGAIN) {
+				ret = 0;
 				continue;
 			}
+			break;
+		}
 
-			if (sk_diag_fill(sk, skb, req,
-					 NETLINK_CB(cb->skb).portid,
-					 cb->nlh->nlmsg_seq,
-					 NLM_F_MULTI,
-					 sock_i_ino(sk)) < 0) {
-				ret = 1;
-				goto done;
-			}
+		sk = (struct sock *)nlsk;
 
-			num++;
+		if (!net_eq(sock_net(sk), net))
+			continue;
+
+		if (sk_diag_fill(sk, skb, req,
+				 NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq,
+				 NLM_F_MULTI,
+				 sock_i_ino(sk)) < 0) {
+			ret = 1;
+			break;
 		}
 	}
 
+stop:
+	rhashtable_walk_stop(hti);
+	if (ret)
+		goto done;
+
+	rhashtable_walk_exit(hti);
+	num++;
+
+mc_list:
+	read_lock(&nl_table_lock);
 	sk_for_each_bound(sk, &tbl->mc_list) {
 		if (sk_hashed(sk))
 			continue;
@@ -116,13 +147,14 @@
 				 NLM_F_MULTI,
 				 sock_i_ino(sk)) < 0) {
 			ret = 1;
-			goto done;
+			break;
 		}
 		num++;
 	}
+	read_unlock(&nl_table_lock);
+
 done:
 	cb->args[0] = num;
-	cb->args[1] = protocol;
 
 	return ret;
 }
@@ -131,20 +163,20 @@
 {
 	struct netlink_diag_req *req;
 	int s_num = cb->args[0];
+	int err = 0;
 
 	req = nlmsg_data(cb->nlh);
 
-	rcu_read_lock();
-	read_lock(&nl_table_lock);
-
 	if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
 		int i;
 
 		for (i = cb->args[1]; i < MAX_LINKS; i++) {
-			if (__netlink_diag_dump(skb, cb, i, s_num))
+			err = __netlink_diag_dump(skb, cb, i, s_num);
+			if (err)
 				break;
 			s_num = 0;
 		}
+		cb->args[1] = i;
 	} else {
 		if (req->sdiag_protocol >= MAX_LINKS) {
 			read_unlock(&nl_table_lock);
@@ -152,13 +184,22 @@
 			return -ENOENT;
 		}
 
-		__netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
+		err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
 	}
 
-	read_unlock(&nl_table_lock);
-	rcu_read_unlock();
+	return err < 0 ? err : skb->len;
+}
 
-	return skb->len;
+static int netlink_diag_dump_done(struct netlink_callback *cb)
+{
+	struct rhashtable_iter *hti = (void *)cb->args[2];
+
+	if (cb->args[0] == 1)
+		rhashtable_walk_exit(hti);
+
+	kfree(hti);
+
+	return 0;
 }
 
 static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
@@ -172,6 +213,7 @@
 	if (h->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = netlink_diag_dump,
+			.done = netlink_diag_dump_done,
 		};
 		return netlink_dump_start(net->diag_nlsk, skb, h, &c);
 	} else
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a09132a..23cc126 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -977,7 +977,7 @@
 	return 0;
 }
 
-static struct genl_ops genl_ctrl_ops[] = {
+static const struct genl_ops genl_ctrl_ops[] = {
 	{
 		.cmd		= CTRL_CMD_GETFAMILY,
 		.doit		= ctrl_getfamily,
@@ -986,7 +986,7 @@
 	},
 };
 
-static struct genl_multicast_group genl_ctrl_groups[] = {
+static const struct genl_multicast_group genl_ctrl_groups[] = {
 	{ .name = "notify", },
 };
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 1ecbd77..4e03f64 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -71,6 +71,8 @@
 static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage);
 
 #define DEFERRED_ACTION_FIFO_SIZE 10
+#define OVS_RECURSION_LIMIT 5
+#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2)
 struct action_fifo {
 	int head;
 	int tail;
@@ -78,7 +80,12 @@
 	struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
 };
 
+struct recirc_keys {
+	struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD];
+};
+
 static struct action_fifo __percpu *action_fifos;
+static struct recirc_keys __percpu *recirc_keys;
 static DEFINE_PER_CPU(int, exec_actions_level);
 
 static void action_fifo_init(struct action_fifo *fifo)
@@ -153,7 +160,7 @@
 static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_mpls *mpls)
 {
-	__be32 *new_mpls_lse;
+	struct mpls_shim_hdr *new_mpls_lse;
 
 	/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
 	if (skb->encapsulation)
@@ -162,19 +169,23 @@
 	if (skb_cow_head(skb, MPLS_HLEN) < 0)
 		return -ENOMEM;
 
+	if (!skb->inner_protocol) {
+		skb_set_inner_network_header(skb, skb->mac_len);
+		skb_set_inner_protocol(skb, skb->protocol);
+	}
+
 	skb_push(skb, MPLS_HLEN);
 	memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
 		skb->mac_len);
 	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb->mac_len);
 
-	new_mpls_lse = (__be32 *)skb_mpls_header(skb);
-	*new_mpls_lse = mpls->mpls_lse;
+	new_mpls_lse = mpls_hdr(skb);
+	new_mpls_lse->label_stack_entry = mpls->mpls_lse;
 
 	skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
 
 	update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
-	if (!skb->inner_protocol)
-		skb_set_inner_protocol(skb, skb->protocol);
 	skb->protocol = mpls->mpls_ethertype;
 
 	invalidate_flow_key(key);
@@ -191,18 +202,19 @@
 	if (unlikely(err))
 		return err;
 
-	skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN);
+	skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN);
 
 	memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
 		skb->mac_len);
 
 	__skb_pull(skb, MPLS_HLEN);
 	skb_reset_mac_header(skb);
+	skb_set_network_header(skb, skb->mac_len);
 
-	/* skb_mpls_header() is used to locate the ethertype
-	 * field correctly in the presence of VLAN tags.
+	/* mpls_hdr() is used to locate the ethertype field correctly in the
+	 * presence of VLAN tags.
 	 */
-	hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
+	hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN);
 	update_ethertype(skb, hdr, ethertype);
 	if (eth_p_mpls(skb->protocol))
 		skb->protocol = ethertype;
@@ -214,7 +226,7 @@
 static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
 		    const __be32 *mpls_lse, const __be32 *mask)
 {
-	__be32 *stack;
+	struct mpls_shim_hdr *stack;
 	__be32 lse;
 	int err;
 
@@ -222,16 +234,16 @@
 	if (unlikely(err))
 		return err;
 
-	stack = (__be32 *)skb_mpls_header(skb);
-	lse = OVS_MASKED(*stack, *mpls_lse, *mask);
+	stack = mpls_hdr(skb);
+	lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask);
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__be32 diff[] = { ~(*stack), lse };
+		__be32 diff[] = { ~(stack->label_stack_entry), lse };
 
 		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
 					  ~skb->csum);
 	}
 
-	*stack = lse;
+	stack->label_stack_entry = lse;
 	flow_key->mpls.top_lse = lse;
 	return 0;
 }
@@ -241,20 +253,24 @@
 	int err;
 
 	err = skb_vlan_pop(skb);
-	if (skb_vlan_tag_present(skb))
+	if (skb_vlan_tag_present(skb)) {
 		invalidate_flow_key(key);
-	else
-		key->eth.tci = 0;
+	} else {
+		key->eth.vlan.tci = 0;
+		key->eth.vlan.tpid = 0;
+	}
 	return err;
 }
 
 static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_vlan *vlan)
 {
-	if (skb_vlan_tag_present(skb))
+	if (skb_vlan_tag_present(skb)) {
 		invalidate_flow_key(key);
-	else
-		key->eth.tci = vlan->vlan_tci;
+	} else {
+		key->eth.vlan.tci = vlan->vlan_tci;
+		key->eth.vlan.tpid = vlan->vlan_tpid;
+	}
 	return skb_vlan_push(skb, vlan->vlan_tpid,
 			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 }
@@ -1011,6 +1027,7 @@
 			  const struct nlattr *a, int rem)
 {
 	struct deferred_action *da;
+	int level;
 
 	if (!is_flow_key_valid(key)) {
 		int err;
@@ -1034,6 +1051,18 @@
 			return 0;
 	}
 
+	level = this_cpu_read(exec_actions_level);
+	if (level <= OVS_DEFERRED_ACTION_THRESHOLD) {
+		struct recirc_keys *rks = this_cpu_ptr(recirc_keys);
+		struct sw_flow_key *recirc_key = &rks->key[level - 1];
+
+		*recirc_key = *key;
+		recirc_key->recirc_id = nla_get_u32(a);
+		ovs_dp_process_packet(skb, recirc_key);
+
+		return 0;
+	}
+
 	da = add_deferred_actions(skb, key, NULL);
 	if (da) {
 		da->pkt_key.recirc_id = nla_get_u32(a);
@@ -1200,11 +1229,10 @@
 			const struct sw_flow_actions *acts,
 			struct sw_flow_key *key)
 {
-	static const int ovs_recursion_limit = 5;
 	int err, level;
 
 	level = __this_cpu_inc_return(exec_actions_level);
-	if (unlikely(level > ovs_recursion_limit)) {
+	if (unlikely(level > OVS_RECURSION_LIMIT)) {
 		net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
 				     ovs_dp_name(dp));
 		kfree_skb(skb);
@@ -1229,10 +1257,17 @@
 	if (!action_fifos)
 		return -ENOMEM;
 
+	recirc_keys = alloc_percpu(struct recirc_keys);
+	if (!recirc_keys) {
+		free_percpu(action_fifos);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
 void action_fifos_exit(void)
 {
 	free_percpu(action_fifos);
+	free_percpu(recirc_keys);
 }
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c644c78..31045ef 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -433,7 +433,6 @@
 	struct nf_conntrack_l4proto *l4proto;
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_tuple_hash *h;
-	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
 	unsigned int dataoff;
 	u8 protonum;
@@ -458,13 +457,8 @@
 
 	ct = nf_ct_tuplehash_to_ctrack(h);
 
-	ctinfo = ovs_ct_get_info(h);
-	if (ctinfo == IP_CT_NEW) {
-		/* This should not happen. */
-		WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct);
-	}
 	skb->nfct = &ct->ct_general;
-	skb->nfctinfo = ctinfo;
+	skb->nfctinfo = ovs_ct_get_info(h);
 	return ct;
 }
 
@@ -1373,7 +1367,7 @@
 	if (ct_info->helper)
 		module_put(ct_info->helper->me);
 	if (ct_info->ct)
-		nf_ct_put(ct_info->ct);
+		nf_ct_tmpl_free(ct_info->ct);
 }
 
 void ovs_ct_init(struct net *net)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 524c0fd..4d67ea8 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -928,7 +928,6 @@
 	struct sw_flow_mask mask;
 	struct sk_buff *reply;
 	struct datapath *dp;
-	struct sw_flow_key key;
 	struct sw_flow_actions *acts;
 	struct sw_flow_match match;
 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
@@ -956,20 +955,24 @@
 	}
 
 	/* Extract key. */
-	ovs_match_init(&match, &key, &mask);
+	ovs_match_init(&match, &new_flow->key, false, &mask);
 	error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 				  a[OVS_FLOW_ATTR_MASK], log);
 	if (error)
 		goto err_kfree_flow;
 
-	ovs_flow_mask_key(&new_flow->key, &key, true, &mask);
-
 	/* Extract flow identifier. */
 	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
-				       &key, log);
+				       &new_flow->key, log);
 	if (error)
 		goto err_kfree_flow;
 
+	/* unmasked key is needed to match when ufid is not used. */
+	if (ovs_identifier_is_key(&new_flow->id))
+		match.key = new_flow->id.unmasked_key;
+
+	ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
+
 	/* Validate actions. */
 	error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
 				     &new_flow->key, &acts, log);
@@ -996,7 +999,7 @@
 	if (ovs_identifier_is_ufid(&new_flow->id))
 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
 	if (!flow)
-		flow = ovs_flow_tbl_lookup(&dp->table, &key);
+		flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
 	if (likely(!flow)) {
 		rcu_assign_pointer(new_flow->sf_acts, acts);
 
@@ -1121,7 +1124,7 @@
 
 	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
 	if (a[OVS_FLOW_ATTR_KEY]) {
-		ovs_match_init(&match, &key, &mask);
+		ovs_match_init(&match, &key, true, &mask);
 		error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 					  a[OVS_FLOW_ATTR_MASK], log);
 	} else if (!ufid_present) {
@@ -1238,7 +1241,7 @@
 
 	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
 	if (a[OVS_FLOW_ATTR_KEY]) {
-		ovs_match_init(&match, &key, NULL);
+		ovs_match_init(&match, &key, true, NULL);
 		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
 					log);
 	} else if (!ufid_present) {
@@ -1297,7 +1300,7 @@
 
 	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
 	if (a[OVS_FLOW_ATTR_KEY]) {
-		ovs_match_init(&match, &key, NULL);
+		ovs_match_init(&match, &key, true, NULL);
 		err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 					NULL, log);
 		if (unlikely(err))
@@ -2437,3 +2440,7 @@
 
 MODULE_DESCRIPTION("Open vSwitch switching datapath");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
+MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 0ea128e..c8c82e1 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/in.h>
 #include <linux/rcupdate.h>
+#include <linux/cpumask.h>
 #include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
@@ -72,32 +73,33 @@
 {
 	struct flow_stats *stats;
 	int node = numa_node_id();
+	int cpu = smp_processor_id();
 	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
-	stats = rcu_dereference(flow->stats[node]);
+	stats = rcu_dereference(flow->stats[cpu]);
 
-	/* Check if already have node-specific stats. */
+	/* Check if already have CPU-specific stats. */
 	if (likely(stats)) {
 		spin_lock(&stats->lock);
 		/* Mark if we write on the pre-allocated stats. */
-		if (node == 0 && unlikely(flow->stats_last_writer != node))
-			flow->stats_last_writer = node;
+		if (cpu == 0 && unlikely(flow->stats_last_writer != cpu))
+			flow->stats_last_writer = cpu;
 	} else {
 		stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
 		spin_lock(&stats->lock);
 
-		/* If the current NUMA-node is the only writer on the
+		/* If the current CPU is the only writer on the
 		 * pre-allocated stats keep using them.
 		 */
-		if (unlikely(flow->stats_last_writer != node)) {
+		if (unlikely(flow->stats_last_writer != cpu)) {
 			/* A previous locker may have already allocated the
-			 * stats, so we need to check again.  If node-specific
+			 * stats, so we need to check again.  If CPU-specific
 			 * stats were already allocated, we update the pre-
 			 * allocated stats as we have already locked them.
 			 */
-			if (likely(flow->stats_last_writer != NUMA_NO_NODE)
-			    && likely(!rcu_access_pointer(flow->stats[node]))) {
-				/* Try to allocate node-specific stats. */
+			if (likely(flow->stats_last_writer != -1) &&
+			    likely(!rcu_access_pointer(flow->stats[cpu]))) {
+				/* Try to allocate CPU-specific stats. */
 				struct flow_stats *new_stats;
 
 				new_stats =
@@ -114,12 +116,12 @@
 					new_stats->tcp_flags = tcp_flags;
 					spin_lock_init(&new_stats->lock);
 
-					rcu_assign_pointer(flow->stats[node],
+					rcu_assign_pointer(flow->stats[cpu],
 							   new_stats);
 					goto unlock;
 				}
 			}
-			flow->stats_last_writer = node;
+			flow->stats_last_writer = cpu;
 		}
 	}
 
@@ -136,14 +138,15 @@
 			struct ovs_flow_stats *ovs_stats,
 			unsigned long *used, __be16 *tcp_flags)
 {
-	int node;
+	int cpu;
 
 	*used = 0;
 	*tcp_flags = 0;
 	memset(ovs_stats, 0, sizeof(*ovs_stats));
 
-	for_each_node(node) {
-		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]);
+	/* We open code this to make sure cpu 0 is always considered */
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+		struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
 
 		if (stats) {
 			/* Local CPU may write on non-local stats, so we must
@@ -163,10 +166,11 @@
 /* Called with ovs_mutex. */
 void ovs_flow_stats_clear(struct sw_flow *flow)
 {
-	int node;
+	int cpu;
 
-	for_each_node(node) {
-		struct flow_stats *stats = ovsl_dereference(flow->stats[node]);
+	/* We open code this to make sure cpu 0 is always considered */
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) {
+		struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
 
 		if (stats) {
 			spin_lock_bh(&stats->lock);
@@ -302,24 +306,57 @@
 				  sizeof(struct icmp6hdr));
 }
 
-static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+/**
+ * Parse vlan tag from vlan header.
+ * Returns ERROR on memory error.
+ * Returns 0 if it encounters a non-vlan or incomplete packet.
+ * Returns 1 after successfully parsing vlan tag.
+ */
+static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh)
 {
-	struct qtag_prefix {
-		__be16 eth_type; /* ETH_P_8021Q */
-		__be16 tci;
-	};
-	struct qtag_prefix *qp;
+	struct vlan_head *vh = (struct vlan_head *)skb->data;
 
-	if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
+	if (likely(!eth_type_vlan(vh->tpid)))
 		return 0;
 
-	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
-					 sizeof(__be16))))
+	if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16)))
+		return 0;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) +
+				 sizeof(__be16))))
 		return -ENOMEM;
 
-	qp = (struct qtag_prefix *) skb->data;
-	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
-	__skb_pull(skb, sizeof(struct qtag_prefix));
+	vh = (struct vlan_head *)skb->data;
+	key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT);
+	key_vh->tpid = vh->tpid;
+
+	__skb_pull(skb, sizeof(struct vlan_head));
+	return 1;
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	int res;
+
+	key->eth.vlan.tci = 0;
+	key->eth.vlan.tpid = 0;
+	key->eth.cvlan.tci = 0;
+	key->eth.cvlan.tpid = 0;
+
+	if (likely(skb_vlan_tag_present(skb))) {
+		key->eth.vlan.tci = htons(skb->vlan_tci);
+		key->eth.vlan.tpid = skb->vlan_proto;
+	} else {
+		/* Parse outer vlan tag in the non-accelerated case. */
+		res = parse_vlan_tag(skb, &key->eth.vlan);
+		if (res <= 0)
+			return res;
+	}
+
+	/* Parse inner vlan tag. */
+	res = parse_vlan_tag(skb, &key->eth.cvlan);
+	if (res <= 0)
+		return res;
 
 	return 0;
 }
@@ -480,12 +517,8 @@
 	 * update skb->csum here.
 	 */
 
-	key->eth.tci = 0;
-	if (skb_vlan_tag_present(skb))
-		key->eth.tci = htons(skb->vlan_tci);
-	else if (eth->h_proto == htons(ETH_P_8021Q))
-		if (unlikely(parse_vlan(skb, key)))
-			return -ENOMEM;
+	if (unlikely(parse_vlan(skb, key)))
+		return -ENOMEM;
 
 	key->eth.type = parse_ethertype(skb);
 	if (unlikely(key->eth.type == htons(0)))
@@ -600,12 +633,7 @@
 	} else if (eth_p_mpls(key->eth.type)) {
 		size_t stack_len = MPLS_HLEN;
 
-		/* In the presence of an MPLS label stack the end of the L2
-		 * header and the beginning of the L3 header differ.
-		 *
-		 * Advance network_header to the beginning of the L3
-		 * header. mac_len corresponds to the end of the L2 header.
-		 */
+		skb_set_inner_network_header(skb, skb->mac_len);
 		while (1) {
 			__be32 lse;
 
@@ -613,12 +641,12 @@
 			if (unlikely(error))
 				return 0;
 
-			memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
+			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
 
 			if (stack_len == MPLS_HLEN)
 				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
 
-			skb_set_network_header(skb, skb->mac_len + stack_len);
+			skb_set_inner_network_header(skb, skb->mac_len + stack_len);
 			if (lse & htonl(MPLS_LS_S_MASK))
 				break;
 
@@ -734,8 +762,6 @@
 {
 	int err;
 
-	memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE);
-
 	/* Extract metadata from netlink attributes. */
 	err = ovs_nla_get_flow_metadata(net, attr, key, log);
 	if (err)
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 03378e7..ae783f5 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -50,6 +50,11 @@
 	struct metadata_dst	*tun_dst;
 };
 
+struct vlan_head {
+	__be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/
+	__be16 tci;  /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+};
+
 #define OVS_SW_FLOW_KEY_METADATA_SIZE			\
 	(offsetof(struct sw_flow_key, recirc_id) +	\
 	FIELD_SIZEOF(struct sw_flow_key, recirc_id))
@@ -69,7 +74,8 @@
 	struct {
 		u8     src[ETH_ALEN];	/* Ethernet source address. */
 		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
-		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+		struct vlan_head vlan;
+		struct vlan_head cvlan;
 		__be16 type;		/* Ethernet frame type. */
 	} eth;
 	union {
@@ -172,14 +178,14 @@
 		struct hlist_node node[2];
 		u32 hash;
 	} flow_table, ufid_table;
-	int stats_last_writer;		/* NUMA-node id of the last writer on
+	int stats_last_writer;		/* CPU id of the last writer on
 					 * 'stats[0]'.
 					 */
 	struct sw_flow_key key;
 	struct sw_flow_id id;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
-	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one
+	struct flow_stats __rcu *stats[]; /* One for each CPU.  First one
 					   * is allocated at flow creation time,
 					   * the rest are allocated on demand
 					   * while holding the 'stats[0].lock'.
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index c78a6a1..ae25ded 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -808,6 +808,167 @@
 				  ip_tunnel_info_af(tun_info));
 }
 
+static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
+				    const struct nlattr *a[],
+				    bool is_mask, bool inner)
+{
+	__be16 tci = 0;
+	__be16 tpid = 0;
+
+	if (a[OVS_KEY_ATTR_VLAN])
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+	if (a[OVS_KEY_ATTR_ETHERTYPE])
+		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+	if (likely(!inner)) {
+		SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
+		SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
+	} else {
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
+	}
+	return 0;
+}
+
+static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
+				      u64 key_attrs, bool inner,
+				      const struct nlattr **a, bool log)
+{
+	__be16 tci = 0;
+
+	if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
+	      (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
+	       eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
+		/* Not a VLAN. */
+		return 0;
+	}
+
+	if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
+	      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
+		OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
+		return -EINVAL;
+	}
+
+	if (a[OVS_KEY_ATTR_VLAN])
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+	if (!(tci & htons(VLAN_TAG_PRESENT))) {
+		if (tci) {
+			OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
+				  (inner) ? "C-VLAN" : "VLAN");
+			return -EINVAL;
+		} else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
+			/* Corner case for truncated VLAN header. */
+			OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
+				  (inner) ? "C-VLAN" : "VLAN");
+			return -EINVAL;
+		}
+	}
+
+	return 1;
+}
+
+static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
+					   u64 key_attrs, bool inner,
+					   const struct nlattr **a, bool log)
+{
+	__be16 tci = 0;
+	__be16 tpid = 0;
+	bool encap_valid = !!(match->key->eth.vlan.tci &
+			      htons(VLAN_TAG_PRESENT));
+	bool i_encap_valid = !!(match->key->eth.cvlan.tci &
+				htons(VLAN_TAG_PRESENT));
+
+	if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
+		/* Not a VLAN. */
+		return 0;
+	}
+
+	if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
+		OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
+			  (inner) ? "C-VLAN" : "VLAN");
+		return -EINVAL;
+	}
+
+	if (a[OVS_KEY_ATTR_VLAN])
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+
+	if (a[OVS_KEY_ATTR_ETHERTYPE])
+		tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+
+	if (tpid != htons(0xffff)) {
+		OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
+			  (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
+		return -EINVAL;
+	}
+	if (!(tci & htons(VLAN_TAG_PRESENT))) {
+		OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
+			  (inner) ? "C-VLAN" : "VLAN");
+		return -EINVAL;
+	}
+
+	return 1;
+}
+
+static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
+				     u64 *key_attrs, bool inner,
+				     const struct nlattr **a, bool is_mask,
+				     bool log)
+{
+	int err;
+	const struct nlattr *encap;
+
+	if (!is_mask)
+		err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
+						 a, log);
+	else
+		err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
+						      a, log);
+	if (err <= 0)
+		return err;
+
+	err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
+	if (err)
+		return err;
+
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+	*key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+
+	encap = a[OVS_KEY_ATTR_ENCAP];
+
+	if (!is_mask)
+		err = parse_flow_nlattrs(encap, a, key_attrs, log);
+	else
+		err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
+
+	return err;
+}
+
+static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
+				   u64 *key_attrs, const struct nlattr **a,
+				   bool is_mask, bool log)
+{
+	int err;
+	bool encap_valid = false;
+
+	err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
+					is_mask, log);
+	if (err)
+		return err;
+
+	encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
+	if (encap_valid) {
+		err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
+						is_mask, log);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
 				 u64 *attrs, const struct nlattr **a,
 				 bool is_mask, bool log)
@@ -923,20 +1084,11 @@
 	}
 
 	if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
-		__be16 tci;
-
-		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-		if (!(tci & htons(VLAN_TAG_PRESENT))) {
-			if (is_mask)
-				OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit.");
-			else
-				OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set.");
-
-			return -EINVAL;
-		}
-
-		SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask);
-		attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
+		/* VLAN attribute is always parsed before getting here since it
+		 * may occur multiple times.
+		 */
+		OVS_NLERR(log, "VLAN attribute unexpected.");
+		return -EINVAL;
 	}
 
 	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
@@ -1182,49 +1334,18 @@
 		      bool log)
 {
 	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
-	const struct nlattr *encap;
 	struct nlattr *newmask = NULL;
 	u64 key_attrs = 0;
 	u64 mask_attrs = 0;
-	bool encap_valid = false;
 	int err;
 
 	err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
 	if (err)
 		return err;
 
-	if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
-	    (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
-	    (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) {
-		__be16 tci;
-
-		if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
-		      (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
-			OVS_NLERR(log, "Invalid Vlan frame.");
-			return -EINVAL;
-		}
-
-		key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-		encap = a[OVS_KEY_ATTR_ENCAP];
-		key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
-		encap_valid = true;
-
-		if (tci & htons(VLAN_TAG_PRESENT)) {
-			err = parse_flow_nlattrs(encap, a, &key_attrs, log);
-			if (err)
-				return err;
-		} else if (!tci) {
-			/* Corner case for truncated 802.1Q header. */
-			if (nla_len(encap)) {
-				OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute.");
-				return -EINVAL;
-			}
-		} else {
-			OVS_NLERR(log, "Encap attr is set for non-VLAN frame");
-			return  -EINVAL;
-		}
-	}
+	err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
+	if (err)
+		return err;
 
 	err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
 	if (err)
@@ -1265,46 +1386,12 @@
 			goto free_newmask;
 
 		/* Always match on tci. */
-		SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true);
+		SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
+		SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
 
-		if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) {
-			__be16 eth_type = 0;
-			__be16 tci = 0;
-
-			if (!encap_valid) {
-				OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame.");
-				err = -EINVAL;
-				goto free_newmask;
-			}
-
-			mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
-			if (a[OVS_KEY_ATTR_ETHERTYPE])
-				eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-
-			if (eth_type == htons(0xffff)) {
-				mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
-				encap = a[OVS_KEY_ATTR_ENCAP];
-				err = parse_flow_mask_nlattrs(encap, a,
-							      &mask_attrs, log);
-				if (err)
-					goto free_newmask;
-			} else {
-				OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).",
-					  ntohs(eth_type));
-				err = -EINVAL;
-				goto free_newmask;
-			}
-
-			if (a[OVS_KEY_ATTR_VLAN])
-				tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
-
-			if (!(tci & htons(VLAN_TAG_PRESENT))) {
-				OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).",
-					  ntohs(tci));
-				err = -EINVAL;
-				goto free_newmask;
-			}
-		}
+		err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
+		if (err)
+			goto free_newmask;
 
 		err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
 					   log);
@@ -1410,12 +1497,25 @@
 	return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
 }
 
+static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
+			    bool is_mask)
+{
+	__be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
+
+	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
+	    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
+		return -EMSGSIZE;
+	return 0;
+}
+
 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 			     const struct sw_flow_key *output, bool is_mask,
 			     struct sk_buff *skb)
 {
 	struct ovs_key_ethernet *eth_key;
-	struct nlattr *nla, *encap;
+	struct nlattr *nla;
+	struct nlattr *encap = NULL;
+	struct nlattr *in_encap = NULL;
 
 	if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
 		goto nla_put_failure;
@@ -1464,17 +1564,21 @@
 	ether_addr_copy(eth_key->eth_src, output->eth.src);
 	ether_addr_copy(eth_key->eth_dst, output->eth.dst);
 
-	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
-		__be16 eth_type;
-		eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff);
-		if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
-		    nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci))
+	if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
+		if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
 			goto nla_put_failure;
 		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
-		if (!swkey->eth.tci)
+		if (!swkey->eth.vlan.tci)
 			goto unencap;
-	} else
-		encap = NULL;
+
+		if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
+			if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
+				goto nla_put_failure;
+			in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+			if (!swkey->eth.cvlan.tci)
+				goto unencap;
+		}
+	}
 
 	if (swkey->eth.type == htons(ETH_P_802_2)) {
 		/*
@@ -1493,6 +1597,14 @@
 	if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
 		goto nla_put_failure;
 
+	if (eth_type_vlan(swkey->eth.type)) {
+		/* There are 3 VLAN tags, we don't know anything about the rest
+		 * of the packet, so truncate here.
+		 */
+		WARN_ON_ONCE(!(encap && in_encap));
+		goto unencap;
+	}
+
 	if (swkey->eth.type == htons(ETH_P_IP)) {
 		struct ovs_key_ipv4 *ipv4_key;
 
@@ -1619,6 +1731,8 @@
 	}
 
 unencap:
+	if (in_encap)
+		nla_nest_end(skb, in_encap);
 	if (encap)
 		nla_nest_end(skb, encap);
 
@@ -1882,13 +1996,15 @@
 
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key,
+		    bool reset_key,
 		    struct sw_flow_mask *mask)
 {
 	memset(match, 0, sizeof(*match));
 	match->key = key;
 	match->mask = mask;
 
-	memset(key, 0, sizeof(*key));
+	if (reset_key)
+		memset(key, 0, sizeof(*key));
 
 	if (mask) {
 		memset(&mask->key, 0, sizeof(mask->key));
@@ -1935,7 +2051,7 @@
 	struct nlattr *a;
 	int err = 0, start, opts_type;
 
-	ovs_match_init(&match, &key, NULL);
+	ovs_match_init(&match, &key, true, NULL);
 	opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
 	if (opts_type < 0)
 		return opts_type;
@@ -2283,7 +2399,7 @@
 
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 			vlan = nla_data(a);
-			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+			if (!eth_type_vlan(vlan->vlan_tpid))
 				return -EINVAL;
 			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
 				return -EINVAL;
@@ -2388,7 +2504,7 @@
 
 	(*sfa)->orig_len = nla_len(attr);
 	err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type,
-				     key->eth.tci, log);
+				     key->eth.vlan.tci, log);
 	if (err)
 		ovs_nla_free_flow_actions(*sfa);
 
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 47dd142..45f9769 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -41,7 +41,8 @@
 size_t ovs_key_attr_size(void);
 
 void ovs_match_init(struct sw_flow_match *match,
-		    struct sw_flow_key *key, struct sw_flow_mask *mask);
+		    struct sw_flow_key *key, bool reset_key,
+		    struct sw_flow_mask *mask);
 
 int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
 		    int attr, bool is_mask, struct sk_buff *);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index d073fff..ea7a807 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -32,6 +32,7 @@
 #include <linux/module.h>
 #include <linux/in.h>
 #include <linux/rcupdate.h>
+#include <linux/cpumask.h>
 #include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
@@ -79,17 +80,12 @@
 {
 	struct sw_flow *flow;
 	struct flow_stats *stats;
-	int node;
 
-	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+	flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
 	if (!flow)
 		return ERR_PTR(-ENOMEM);
 
-	flow->sf_acts = NULL;
-	flow->mask = NULL;
-	flow->id.unmasked_key = NULL;
-	flow->id.ufid_len = 0;
-	flow->stats_last_writer = NUMA_NO_NODE;
+	flow->stats_last_writer = -1;
 
 	/* Initialize the default stat node. */
 	stats = kmem_cache_alloc_node(flow_stats_cache,
@@ -102,10 +98,6 @@
 
 	RCU_INIT_POINTER(flow->stats[0], stats);
 
-	for_each_node(node)
-		if (node != 0)
-			RCU_INIT_POINTER(flow->stats[node], NULL);
-
 	return flow;
 err:
 	kmem_cache_free(flow_cache, flow);
@@ -142,16 +134,17 @@
 
 static void flow_free(struct sw_flow *flow)
 {
-	int node;
+	int cpu;
 
 	if (ovs_identifier_is_key(&flow->id))
 		kfree(flow->id.unmasked_key);
 	if (flow->sf_acts)
 		ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
-	for_each_node(node)
-		if (flow->stats[node])
+	/* We open code this to make sure cpu 0 is always considered */
+	for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask))
+		if (flow->stats[cpu])
 			kmem_cache_free(flow_stats_cache,
-					(struct flow_stats __force *)flow->stats[node]);
+					(struct flow_stats __force *)flow->stats[cpu]);
 	kmem_cache_free(flow_cache, flow);
 }
 
@@ -756,7 +749,7 @@
 	BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
 
 	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
-				       + (nr_node_ids
+				       + (nr_cpu_ids
 					  * sizeof(struct flow_stats *)),
 				       0, 0, NULL);
 	if (flow_cache == NULL)
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 1a1fcec..5aaf3ba 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -93,7 +93,14 @@
 		return ERR_CAST(dev);
 	}
 
-	dev_change_flags(dev, dev->flags | IFF_UP);
+	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	if (err < 0) {
+		rtnl_delete_link(dev);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		goto error;
+	}
+
 	rtnl_unlock();
 	return vport;
 error:
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 7f8897f..0e72d95 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -54,6 +54,7 @@
 	struct net *net = ovs_dp_get_net(parms->dp);
 	struct net_device *dev;
 	struct vport *vport;
+	int err;
 
 	vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
 	if (IS_ERR(vport))
@@ -67,9 +68,15 @@
 		return ERR_CAST(dev);
 	}
 
-	dev_change_flags(dev, dev->flags | IFF_UP);
-	rtnl_unlock();
+	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	if (err < 0) {
+		rtnl_delete_link(dev);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		return ERR_PTR(err);
+	}
 
+	rtnl_unlock();
 	return vport;
 }
 
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 434e04c..95c3614 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -140,7 +140,7 @@
 
 static void internal_set_rx_headroom(struct net_device *dev, int new_hr)
 {
-	dev->needed_headroom = new_hr;
+	dev->needed_headroom = new_hr < 0 ? 0 : new_hr;
 }
 
 static const struct net_device_ops internal_dev_netdev_ops = {
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 5eb7694..7eb955e 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -130,7 +130,14 @@
 		return ERR_CAST(dev);
 	}
 
-	dev_change_flags(dev, dev->flags | IFF_UP);
+	err = dev_change_flags(dev, dev->flags | IFF_UP);
+	if (err < 0) {
+		rtnl_delete_link(dev);
+		rtnl_unlock();
+		ovs_vport_free(vport);
+		goto error;
+	}
+
 	rtnl_unlock();
 	return vport;
 error:
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 6b21fd0..8f19843 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -485,9 +485,14 @@
 {
 	unsigned int length = skb->len - ETH_HLEN;
 
-	if (skb->protocol == htons(ETH_P_8021Q))
+	if (skb_vlan_tagged(skb))
 		length -= VLAN_HLEN;
 
+	/* Don't subtract for multiple VLAN tags. Most (all?) drivers allow
+	 * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none
+	 * account for 802.1ad. e.g. is_skb_forwardable().
+	 */
+
 	return length;
 }
 
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 046f750..45ac8e8 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -333,6 +333,7 @@
 void rds_ib_state_change(struct sock *sk);
 int rds_ib_listen_init(void);
 void rds_ib_listen_stop(void);
+__printf(2, 3)
 void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
 int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
 			     struct rdma_cm_event *event);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index b2d17f0..fd0bccb 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -688,6 +688,7 @@
 #define rds_conn_error(conn, fmt...) \
 	__rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
 
+__printf(2, 3)
 void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...);
 #define rds_conn_path_error(cp, fmt...) \
 	__rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt)
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 784c531..86f8853 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -19,6 +19,20 @@
 
 	  See Documentation/networking/rxrpc.txt.
 
+config AF_RXRPC_IPV6
+	bool "IPv6 support for RxRPC"
+	depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
+	help
+	  Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as
+	  its network transport.
+
+config AF_RXRPC_INJECT_LOSS
+	bool "Inject packet loss into RxRPC packet stream"
+	depends on AF_RXRPC
+	help
+	  Say Y here to inject packet loss by discarding some received and some
+	  transmitted packets.
+
 
 config AF_RXRPC_DEBUG
 	bool "RxRPC dynamic debugging"
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 10f3f48..8fc6ea3 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -22,6 +22,7 @@
 	peer_object.o \
 	recvmsg.o \
 	security.o \
+	sendmsg.o \
 	skbuff.o \
 	utils.o
 
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 88effad..44c9c2b 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -16,12 +16,14 @@
 #include <linux/net.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
+#include <linux/random.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/key-type.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#define CREATE_TRACE_POINTS
 #include "ar-internal.h"
 
 MODULE_DESCRIPTION("RxRPC network protocol");
@@ -43,7 +45,7 @@
 atomic_t rxrpc_debug_id;
 
 /* count of skbs currently in use */
-atomic_t rxrpc_n_skbs;
+atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
 
 struct workqueue_struct *rxrpc_workqueue;
 
@@ -104,19 +106,25 @@
 	case AF_INET:
 		if (srx->transport_len < sizeof(struct sockaddr_in))
 			return -EINVAL;
-		_debug("INET: %x @ %pI4",
-		       ntohs(srx->transport.sin.sin_port),
-		       &srx->transport.sin.sin_addr);
 		tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
 		break;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case AF_INET6:
+		if (srx->transport_len < sizeof(struct sockaddr_in6))
+			return -EINVAL;
+		tail = offsetof(struct sockaddr_rxrpc, transport) +
+			sizeof(struct sockaddr_in6);
+		break;
+#endif
+
 	default:
 		return -EAFNOSUPPORT;
 	}
 
 	if (tail < len)
 		memset((void *)srx + tail, 0, len - tail);
+	_debug("INET: %pISp", &srx->transport);
 	return 0;
 }
 
@@ -128,7 +136,8 @@
 	struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
 	struct sock *sk = sock->sk;
 	struct rxrpc_local *local;
-	struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	u16 service_id = srx->srx_service;
 	int ret;
 
 	_enter("%p,%p,%d", rx, saddr, len);
@@ -152,16 +161,13 @@
 		goto error_unlock;
 	}
 
-	if (rx->srx.srx_service) {
-		write_lock_bh(&local->services_lock);
-		list_for_each_entry(prx, &local->services, listen_link) {
-			if (prx->srx.srx_service == rx->srx.srx_service)
-				goto service_in_use;
-		}
-
+	if (service_id) {
+		write_lock(&local->services_lock);
+		if (rcu_access_pointer(local->service))
+			goto service_in_use;
 		rx->local = local;
-		list_add_tail(&rx->listen_link, &local->services);
-		write_unlock_bh(&local->services_lock);
+		rcu_assign_pointer(local->service, rx);
+		write_unlock(&local->services_lock);
 
 		rx->sk.sk_state = RXRPC_SERVER_BOUND;
 	} else {
@@ -174,7 +180,7 @@
 	return 0;
 
 service_in_use:
-	write_unlock_bh(&local->services_lock);
+	write_unlock(&local->services_lock);
 	rxrpc_put_local(local);
 	ret = -EADDRINUSE;
 error_unlock:
@@ -191,7 +197,7 @@
 {
 	struct sock *sk = sock->sk;
 	struct rxrpc_sock *rx = rxrpc_sk(sk);
-	unsigned int max;
+	unsigned int max, old;
 	int ret;
 
 	_enter("%p,%d", rx, backlog);
@@ -210,9 +216,13 @@
 			backlog = max;
 		else if (backlog < 0 || backlog > max)
 			break;
+		old = sk->sk_max_ack_backlog;
 		sk->sk_max_ack_backlog = backlog;
-		rx->sk.sk_state = RXRPC_SERVER_LISTENING;
-		ret = 0;
+		ret = rxrpc_service_prealloc(rx, GFP_KERNEL);
+		if (ret == 0)
+			rx->sk.sk_state = RXRPC_SERVER_LISTENING;
+		else
+			sk->sk_max_ack_backlog = old;
 		break;
 	default:
 		ret = -EBUSY;
@@ -230,6 +240,8 @@
  * @srx: The address of the peer to contact
  * @key: The security context to use (defaults to socket setting)
  * @user_call_ID: The ID to use
+ * @gfp: The allocation constraints
+ * @notify_rx: Where to send notifications instead of socket queue
  *
  * Allow a kernel service to begin a call on the nominated socket.  This just
  * sets up all the internal tracking structures and allocates connection and
@@ -242,7 +254,8 @@
 					   struct sockaddr_rxrpc *srx,
 					   struct key *key,
 					   unsigned long user_call_ID,
-					   gfp_t gfp)
+					   gfp_t gfp,
+					   rxrpc_notify_rx_t notify_rx)
 {
 	struct rxrpc_conn_parameters cp;
 	struct rxrpc_call *call;
@@ -269,6 +282,8 @@
 	cp.exclusive		= false;
 	cp.service_id		= srx->srx_service;
 	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+	if (!IS_ERR(call))
+		call->notify_rx = notify_rx;
 
 	release_sock(&rx->sk);
 	_leave(" = %p", call);
@@ -278,40 +293,39 @@
 
 /**
  * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
+ * @sock: The socket the call is on
  * @call: The call to end
  *
  * Allow a kernel service to end a call it was using.  The call must be
  * complete before this is called (the call should be aborted if necessary).
  */
-void rxrpc_kernel_end_call(struct rxrpc_call *call)
+void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
 {
 	_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
-	rxrpc_remove_user_ID(call->socket, call);
-	rxrpc_put_call(call);
+	rxrpc_release_call(rxrpc_sk(sock->sk), call);
+	rxrpc_put_call(call, rxrpc_call_put_kernel);
 }
 EXPORT_SYMBOL(rxrpc_kernel_end_call);
 
 /**
- * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages
+ * rxrpc_kernel_new_call_notification - Get notifications of new calls
  * @sock: The socket to intercept received messages on
- * @interceptor: The function to pass the messages to
+ * @notify_new_call: Function to be called when new calls appear
+ * @discard_new_call: Function to discard preallocated calls
  *
- * Allow a kernel service to intercept messages heading for the Rx queue on an
- * RxRPC socket.  They get passed to the specified function instead.
- * @interceptor should free the socket buffers it is given.  @interceptor is
- * called with the socket receive queue spinlock held and softirqs disabled -
- * this ensures that the messages will be delivered in the right order.
+ * Allow a kernel service to be given notifications about new calls.
  */
-void rxrpc_kernel_intercept_rx_messages(struct socket *sock,
-					rxrpc_interceptor_t interceptor)
+void rxrpc_kernel_new_call_notification(
+	struct socket *sock,
+	rxrpc_notify_new_call_t notify_new_call,
+	rxrpc_discard_new_call_t discard_new_call)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 
-	_enter("");
-	rx->interceptor = interceptor;
+	rx->notify_new_call = notify_new_call;
+	rx->discard_new_call = discard_new_call;
 }
-
-EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
+EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
 
 /*
  * connect an RxRPC socket
@@ -391,6 +405,23 @@
 
 	switch (rx->sk.sk_state) {
 	case RXRPC_UNBOUND:
+		rx->srx.srx_family = AF_RXRPC;
+		rx->srx.srx_service = 0;
+		rx->srx.transport_type = SOCK_DGRAM;
+		rx->srx.transport.family = rx->family;
+		switch (rx->family) {
+		case AF_INET:
+			rx->srx.transport_len = sizeof(struct sockaddr_in);
+			break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+		case AF_INET6:
+			rx->srx.transport_len = sizeof(struct sockaddr_in6);
+			break;
+#endif
+		default:
+			ret = -EAFNOSUPPORT;
+			goto error_unlock;
+		}
 		local = rxrpc_lookup_local(&rx->srx);
 		if (IS_ERR(local)) {
 			ret = PTR_ERR(local);
@@ -505,15 +536,16 @@
 static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
 			       poll_table *wait)
 {
-	unsigned int mask;
 	struct sock *sk = sock->sk;
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	unsigned int mask;
 
 	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* the socket is readable if there are any messages waiting on the Rx
 	 * queue */
-	if (!skb_queue_empty(&sk->sk_receive_queue))
+	if (!list_empty(&rx->recvmsg_q))
 		mask |= POLLIN | POLLRDNORM;
 
 	/* the socket is writable if there is space to add new data to the
@@ -540,7 +572,8 @@
 		return -EAFNOSUPPORT;
 
 	/* we support transport protocol UDP/UDP6 only */
-	if (protocol != PF_INET)
+	if (protocol != PF_INET &&
+	    IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6)
 		return -EPROTONOSUPPORT;
 
 	if (sock->type != SOCK_DGRAM)
@@ -554,6 +587,7 @@
 		return -ENOMEM;
 
 	sock_init_data(sock, sk);
+	sock_set_flag(sk, SOCK_RCU_FREE);
 	sk->sk_state		= RXRPC_UNBOUND;
 	sk->sk_write_space	= rxrpc_write_space;
 	sk->sk_max_ack_backlog	= 0;
@@ -563,9 +597,11 @@
 	rx->family = protocol;
 	rx->calls = RB_ROOT;
 
-	INIT_LIST_HEAD(&rx->listen_link);
-	INIT_LIST_HEAD(&rx->secureq);
-	INIT_LIST_HEAD(&rx->acceptq);
+	spin_lock_init(&rx->incoming_lock);
+	INIT_LIST_HEAD(&rx->sock_calls);
+	INIT_LIST_HEAD(&rx->to_be_accepted);
+	INIT_LIST_HEAD(&rx->recvmsg_q);
+	rwlock_init(&rx->recvmsg_lock);
 	rwlock_init(&rx->call_lock);
 	memset(&rx->srx, 0, sizeof(rx->srx));
 
@@ -574,6 +610,39 @@
 }
 
 /*
+ * Kill all the calls on a socket and shut it down.
+ */
+static int rxrpc_shutdown(struct socket *sock, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct rxrpc_sock *rx = rxrpc_sk(sk);
+	int ret = 0;
+
+	_enter("%p,%d", sk, flags);
+
+	if (flags != SHUT_RDWR)
+		return -EOPNOTSUPP;
+	if (sk->sk_state == RXRPC_CLOSE)
+		return -ESHUTDOWN;
+
+	lock_sock(sk);
+
+	spin_lock_bh(&sk->sk_receive_queue.lock);
+	if (sk->sk_state < RXRPC_CLOSE) {
+		sk->sk_state = RXRPC_CLOSE;
+		sk->sk_shutdown = SHUTDOWN_MASK;
+	} else {
+		ret = -ESHUTDOWN;
+	}
+	spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+	rxrpc_discard_prealloc(rx);
+
+	release_sock(sk);
+	return ret;
+}
+
+/*
  * RxRPC socket destructor
  */
 static void rxrpc_sock_destructor(struct sock *sk)
@@ -609,15 +678,14 @@
 	sk->sk_state = RXRPC_CLOSE;
 	spin_unlock_bh(&sk->sk_receive_queue.lock);
 
-	ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
-
-	if (!list_empty(&rx->listen_link)) {
-		write_lock_bh(&rx->local->services_lock);
-		list_del(&rx->listen_link);
-		write_unlock_bh(&rx->local->services_lock);
+	if (rx->local && rx->local->service == rx) {
+		write_lock(&rx->local->services_lock);
+		rx->local->service = NULL;
+		write_unlock(&rx->local->services_lock);
 	}
 
 	/* try to flush out this socket */
+	rxrpc_discard_prealloc(rx);
 	rxrpc_release_calls_on_socket(rx);
 	flush_workqueue(rxrpc_workqueue);
 	rxrpc_purge_queue(&sk->sk_receive_queue);
@@ -666,7 +734,7 @@
 	.poll		= rxrpc_poll,
 	.ioctl		= sock_no_ioctl,
 	.listen		= rxrpc_listen,
-	.shutdown	= sock_no_shutdown,
+	.shutdown	= rxrpc_shutdown,
 	.setsockopt	= rxrpc_setsockopt,
 	.getsockopt	= sock_no_getsockopt,
 	.sendmsg	= rxrpc_sendmsg,
@@ -697,7 +765,13 @@
 
 	BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
 
-	rxrpc_epoch = get_seconds();
+	get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
+	rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
+	get_random_bytes(&rxrpc_client_conn_ids.cur,
+			 sizeof(rxrpc_client_conn_ids.cur));
+	rxrpc_client_conn_ids.cur &= 0x3fffffff;
+	if (rxrpc_client_conn_ids.cur == 0)
+		rxrpc_client_conn_ids.cur = 1;
 
 	ret = -ENOMEM;
 	rxrpc_call_jar = kmem_cache_create(
@@ -788,7 +862,8 @@
 	proto_unregister(&rxrpc_proto);
 	rxrpc_destroy_all_calls();
 	rxrpc_destroy_all_connections();
-	ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
+	ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0);
+	ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0);
 	rxrpc_destroy_all_locals();
 
 	remove_proc_entry("rxrpc_conns", init_net.proc_net);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 1bb9e7a..d38dffd 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -35,11 +35,23 @@
 #define rxrpc_queue_delayed_work(WS,D)	\
 	queue_delayed_work(rxrpc_workqueue, (WS), (D))
 
-#define rxrpc_queue_call(CALL)	rxrpc_queue_work(&(CALL)->processor)
-
 struct rxrpc_connection;
 
 /*
+ * Mark applied to socket buffers.
+ */
+enum rxrpc_skb_mark {
+	RXRPC_SKB_MARK_DATA,		/* data message */
+	RXRPC_SKB_MARK_FINAL_ACK,	/* final ACK received message */
+	RXRPC_SKB_MARK_BUSY,		/* server busy message */
+	RXRPC_SKB_MARK_REMOTE_ABORT,	/* remote abort message */
+	RXRPC_SKB_MARK_LOCAL_ABORT,	/* local abort message */
+	RXRPC_SKB_MARK_NET_ERROR,	/* network error message */
+	RXRPC_SKB_MARK_LOCAL_ERROR,	/* local error message */
+	RXRPC_SKB_MARK_NEW_CALL,	/* local error message */
+};
+
+/*
  * sk_state for RxRPC sockets
  */
 enum {
@@ -52,19 +64,44 @@
 };
 
 /*
+ * Service backlog preallocation.
+ *
+ * This contains circular buffers of preallocated peers, connections and calls
+ * for incoming service calls and their head and tail pointers.  This allows
+ * calls to be set up in the data_ready handler, thereby avoiding the need to
+ * shuffle packets around so much.
+ */
+struct rxrpc_backlog {
+	unsigned short		peer_backlog_head;
+	unsigned short		peer_backlog_tail;
+	unsigned short		conn_backlog_head;
+	unsigned short		conn_backlog_tail;
+	unsigned short		call_backlog_head;
+	unsigned short		call_backlog_tail;
+#define RXRPC_BACKLOG_MAX	32
+	struct rxrpc_peer	*peer_backlog[RXRPC_BACKLOG_MAX];
+	struct rxrpc_connection	*conn_backlog[RXRPC_BACKLOG_MAX];
+	struct rxrpc_call	*call_backlog[RXRPC_BACKLOG_MAX];
+};
+
+/*
  * RxRPC socket definition
  */
 struct rxrpc_sock {
 	/* WARNING: sk has to be the first member */
 	struct sock		sk;
-	rxrpc_interceptor_t	interceptor;	/* kernel service Rx interceptor function */
+	rxrpc_notify_new_call_t	notify_new_call; /* Func to notify of new call */
+	rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */
 	struct rxrpc_local	*local;		/* local endpoint */
-	struct list_head	listen_link;	/* link in the local endpoint's listen list */
-	struct list_head	secureq;	/* calls awaiting connection security clearance */
-	struct list_head	acceptq;	/* calls awaiting acceptance */
+	struct rxrpc_backlog	*backlog;	/* Preallocation for services */
+	spinlock_t		incoming_lock;	/* Incoming call vs service shutdown lock */
+	struct list_head	sock_calls;	/* List of calls owned by this socket */
+	struct list_head	to_be_accepted;	/* calls awaiting acceptance */
+	struct list_head	recvmsg_q;	/* Calls awaiting recvmsg's attention  */
+	rwlock_t		recvmsg_lock;	/* Lock for recvmsg_q */
 	struct key		*key;		/* security for this socket */
 	struct key		*securities;	/* list of server security descriptors */
-	struct rb_root		calls;		/* outstanding calls on this socket */
+	struct rb_root		calls;		/* User ID -> call mapping */
 	unsigned long		flags;
 #define RXRPC_SOCK_CONNECTED		0	/* connect_srx is set */
 	rwlock_t		call_lock;	/* lock for calls */
@@ -103,13 +140,11 @@
  * - max 48 bytes (struct sk_buff::cb)
  */
 struct rxrpc_skb_priv {
-	struct rxrpc_call	*call;		/* call with which associated */
-	unsigned long		resend_at;	/* time in jiffies at which to resend */
 	union {
-		unsigned int	offset;		/* offset into buffer of next read */
+		u8		nr_jumbo;	/* Number of jumbo subpackets */
+	};
+	union {
 		int		remain;		/* amount of space remaining for next write */
-		u32		error;		/* network error code */
-		bool		need_resend;	/* T if needs resending */
 	};
 
 	struct rxrpc_host_header hdr;		/* RxRPC packet header from this packet */
@@ -117,13 +152,6 @@
 
 #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
 
-enum rxrpc_command {
-	RXRPC_CMD_SEND_DATA,		/* send data message */
-	RXRPC_CMD_SEND_ABORT,		/* request abort generation */
-	RXRPC_CMD_ACCEPT,		/* [server] accept incoming call */
-	RXRPC_CMD_REJECT_BUSY,		/* [server] reject a call as busy */
-};
-
 /*
  * RxRPC security module interface
  */
@@ -150,7 +178,12 @@
 			     void *);
 
 	/* verify the security on a received packet */
-	int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *);
+	int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
+			     unsigned int, unsigned int, rxrpc_seq_t, u16);
+
+	/* Locate the data in a received packet that has been verified. */
+	void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
+			    unsigned int *, unsigned int *);
 
 	/* issue a challenge */
 	int (*issue_challenge)(struct rxrpc_connection *);
@@ -180,9 +213,8 @@
 	struct list_head	link;
 	struct socket		*socket;	/* my UDP socket */
 	struct work_struct	processor;
-	struct list_head	services;	/* services listening on this endpoint */
+	struct rxrpc_sock __rcu	*service;	/* Service(s) listening on this endpoint */
 	struct rw_semaphore	defrag_sem;	/* control re-enablement of IP DF bit */
-	struct sk_buff_head	accept_queue;	/* incoming calls awaiting acceptance */
 	struct sk_buff_head	reject_queue;	/* packets awaiting rejection */
 	struct sk_buff_head	event_queue;	/* endpoint event packets awaiting processing */
 	struct rb_root		client_conns;	/* Client connections by socket params */
@@ -220,10 +252,12 @@
 
 	/* calculated RTT cache */
 #define RXRPC_RTT_CACHE_SIZE 32
-	suseconds_t		rtt;		/* current RTT estimate (in uS) */
-	unsigned int		rtt_point;	/* next entry at which to insert */
-	unsigned int		rtt_usage;	/* amount of cache actually used */
-	suseconds_t		rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
+	ktime_t			rtt_last_req;	/* Time of last RTT request */
+	u64			rtt;		/* Current RTT estimate (in nS) */
+	u64			rtt_sum;	/* Sum of cache contents */
+	u64			rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */
+	u8			rtt_cursor;	/* next entry at which to insert */
+	u8			rtt_usage;	/* amount of cache actually used */
 };
 
 /*
@@ -255,6 +289,9 @@
 	RXRPC_CONN_HAS_IDR,		/* Has a client conn ID assigned */
 	RXRPC_CONN_IN_SERVICE_CONNS,	/* Conn is in peer->service_conns */
 	RXRPC_CONN_IN_CLIENT_CONNS,	/* Conn is in local->client_conns */
+	RXRPC_CONN_EXPOSED,		/* Conn has extra ref for exposure */
+	RXRPC_CONN_DONT_REUSE,		/* Don't reuse this connection */
+	RXRPC_CONN_COUNTED,		/* Counted by rxrpc_nr_client_conns */
 };
 
 /*
@@ -265,17 +302,29 @@
 };
 
 /*
+ * The connection cache state.
+ */
+enum rxrpc_conn_cache_state {
+	RXRPC_CONN_CLIENT_INACTIVE,	/* Conn is not yet listed */
+	RXRPC_CONN_CLIENT_WAITING,	/* Conn is on wait list, waiting for capacity */
+	RXRPC_CONN_CLIENT_ACTIVE,	/* Conn is on active list, doing calls */
+	RXRPC_CONN_CLIENT_CULLED,	/* Conn is culled and delisted, doing calls */
+	RXRPC_CONN_CLIENT_IDLE,		/* Conn is on idle list, doing mostly nothing */
+	RXRPC_CONN__NR_CACHE_STATES
+};
+
+/*
  * The connection protocol state.
  */
 enum rxrpc_conn_proto_state {
 	RXRPC_CONN_UNUSED,		/* Connection not yet attempted */
 	RXRPC_CONN_CLIENT,		/* Client connection */
+	RXRPC_CONN_SERVICE_PREALLOC,	/* Service connection preallocation */
 	RXRPC_CONN_SERVICE_UNSECURED,	/* Service unsecured connection */
 	RXRPC_CONN_SERVICE_CHALLENGING,	/* Service challenging for security */
 	RXRPC_CONN_SERVICE,		/* Service secured connection */
 	RXRPC_CONN_REMOTELY_ABORTED,	/* Conn aborted by peer */
 	RXRPC_CONN_LOCALLY_ABORTED,	/* Conn aborted locally */
-	RXRPC_CONN_NETWORK_ERROR,	/* Conn terminated by network error */
 	RXRPC_CONN__NR_STATES
 };
 
@@ -288,23 +337,33 @@
 	struct rxrpc_conn_proto	proto;
 	struct rxrpc_conn_parameters params;
 
-	spinlock_t		channel_lock;
+	atomic_t		usage;
+	struct rcu_head		rcu;
+	struct list_head	cache_link;
 
+	spinlock_t		channel_lock;
+	unsigned char		active_chans;	/* Mask of active channels */
+#define RXRPC_ACTIVE_CHANS_MASK	((1 << RXRPC_MAXCALLS) - 1)
+	struct list_head	waiting_calls;	/* Calls waiting for channels */
 	struct rxrpc_channel {
 		struct rxrpc_call __rcu	*call;		/* Active call */
 		u32			call_id;	/* ID of current call */
 		u32			call_counter;	/* Call ID counter */
 		u32			last_call;	/* ID of last call */
-		u32			last_result;	/* Result of last call (0/abort) */
+		u8			last_type;	/* Type of last packet */
+		u16			last_service_id;
+		union {
+			u32		last_seq;
+			u32		last_abort;
+		};
 	} channels[RXRPC_MAXCALLS];
-	wait_queue_head_t	channel_wq;	/* queue to wait for channel to become available */
 
-	struct rcu_head		rcu;
 	struct work_struct	processor;	/* connection event processor */
 	union {
 		struct rb_node	client_node;	/* Node in local->client_conns */
 		struct rb_node	service_node;	/* Node in peer->service_conns */
 	};
+	struct list_head	proc_link;	/* link in procfs list */
 	struct list_head	link;		/* link in master connection list */
 	struct sk_buff_head	rx_queue;	/* received conn-level packets */
 	const struct rxrpc_security *security;	/* applied security module */
@@ -313,21 +372,18 @@
 	struct rxrpc_crypt	csum_iv;	/* packet checksum base */
 	unsigned long		flags;
 	unsigned long		events;
-	unsigned long		put_time;	/* Time at which last put */
+	unsigned long		idle_timestamp;	/* Time at which last became idle */
 	spinlock_t		state_lock;	/* state-change lock */
-	atomic_t		usage;
-	enum rxrpc_conn_proto_state state : 8;	/* current state of connection */
+	enum rxrpc_conn_cache_state cache_state;
+	enum rxrpc_conn_proto_state state;	/* current state of connection */
 	u32			local_abort;	/* local abort code */
 	u32			remote_abort;	/* remote abort code */
-	int			error;		/* local error incurred */
 	int			debug_id;	/* debug ID for printks */
 	atomic_t		serial;		/* packet serial number counter */
-	atomic_t		hi_serial;	/* highest serial number received */
-	atomic_t		avail_chans;	/* number of channels available */
-	u8			size_align;	/* data size alignment (for security) */
-	u8			header_size;	/* rxrpc + security header size */
-	u8			security_size;	/* security header size */
+	unsigned int		hi_serial;	/* highest serial number received */
 	u32			security_nonce;	/* response re-use preventer */
+	u8			size_align;	/* data size alignment (for security) */
+	u8			security_size;	/* security header size */
 	u8			security_ix;	/* security type */
 	u8			out_clientflag;	/* RXRPC_CLIENT_INITIATED if we are client */
 };
@@ -337,37 +393,23 @@
  */
 enum rxrpc_call_flag {
 	RXRPC_CALL_RELEASED,		/* call has been released - no more message to userspace */
-	RXRPC_CALL_TERMINAL_MSG,	/* call has given the socket its final message */
-	RXRPC_CALL_RCVD_LAST,		/* all packets received */
-	RXRPC_CALL_RUN_RTIMER,		/* Tx resend timer started */
-	RXRPC_CALL_TX_SOFT_ACK,		/* sent some soft ACKs */
-	RXRPC_CALL_PROC_BUSY,		/* the processor is busy */
-	RXRPC_CALL_INIT_ACCEPT,		/* acceptance was initiated */
 	RXRPC_CALL_HAS_USERID,		/* has a user ID attached */
-	RXRPC_CALL_EXPECT_OOS,		/* expect out of sequence packets */
+	RXRPC_CALL_IS_SERVICE,		/* Call is service call */
+	RXRPC_CALL_EXPOSED,		/* The call was exposed to the world */
+	RXRPC_CALL_RX_LAST,		/* Received the last packet (at rxtx_top) */
+	RXRPC_CALL_TX_LAST,		/* Last packet in Tx buffer (at rxtx_top) */
+	RXRPC_CALL_PINGING,		/* Ping in process */
+	RXRPC_CALL_RETRANS_TIMEOUT,	/* Retransmission due to timeout occurred */
 };
 
 /*
  * Events that can be raised on a call.
  */
 enum rxrpc_call_event {
-	RXRPC_CALL_EV_RCVD_ACKALL,	/* ACKALL or reply received */
-	RXRPC_CALL_EV_RCVD_BUSY,	/* busy packet received */
-	RXRPC_CALL_EV_RCVD_ABORT,	/* abort packet received */
-	RXRPC_CALL_EV_RCVD_ERROR,	/* network error received */
-	RXRPC_CALL_EV_ACK_FINAL,	/* need to generate final ACK (and release call) */
 	RXRPC_CALL_EV_ACK,		/* need to generate ACK */
-	RXRPC_CALL_EV_REJECT_BUSY,	/* need to generate busy message */
 	RXRPC_CALL_EV_ABORT,		/* need to generate abort */
-	RXRPC_CALL_EV_CONN_ABORT,	/* local connection abort generated */
-	RXRPC_CALL_EV_RESEND_TIMER,	/* Tx resend timer expired */
+	RXRPC_CALL_EV_TIMER,		/* Timer expired */
 	RXRPC_CALL_EV_RESEND,		/* Tx resend required */
-	RXRPC_CALL_EV_DRAIN_RX_OOS,	/* drain the Rx out of sequence queue */
-	RXRPC_CALL_EV_LIFE_TIMER,	/* call's lifetimer ran out */
-	RXRPC_CALL_EV_ACCEPTED,		/* incoming call accepted by userspace app */
-	RXRPC_CALL_EV_SECURED,		/* incoming call's connection is now secure */
-	RXRPC_CALL_EV_POST_ACCEPT,	/* need to post an "accept?" message to the app */
-	RXRPC_CALL_EV_RELEASE,		/* need to release the call's resources */
 };
 
 /*
@@ -379,20 +421,38 @@
 	RXRPC_CALL_CLIENT_SEND_REQUEST,	/* - client sending request phase */
 	RXRPC_CALL_CLIENT_AWAIT_REPLY,	/* - client awaiting reply */
 	RXRPC_CALL_CLIENT_RECV_REPLY,	/* - client receiving reply phase */
-	RXRPC_CALL_CLIENT_FINAL_ACK,	/* - client sending final ACK phase */
+	RXRPC_CALL_SERVER_PREALLOC,	/* - service preallocation */
 	RXRPC_CALL_SERVER_SECURING,	/* - server securing request connection */
 	RXRPC_CALL_SERVER_ACCEPTING,	/* - server accepting request */
 	RXRPC_CALL_SERVER_RECV_REQUEST,	/* - server receiving request */
 	RXRPC_CALL_SERVER_ACK_REQUEST,	/* - server pending ACK of request */
 	RXRPC_CALL_SERVER_SEND_REPLY,	/* - server sending reply */
 	RXRPC_CALL_SERVER_AWAIT_ACK,	/* - server awaiting final ACK */
-	RXRPC_CALL_COMPLETE,		/* - call completed */
-	RXRPC_CALL_SERVER_BUSY,		/* - call rejected by busy server */
+	RXRPC_CALL_COMPLETE,		/* - call complete */
+	NR__RXRPC_CALL_STATES
+};
+
+/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
 	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
 	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
+	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
 	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
-	RXRPC_CALL_DEAD,		/* - call is dead */
-	NR__RXRPC_CALL_STATES
+	NR__RXRPC_CALL_COMPLETIONS
+};
+
+/*
+ * Call Tx congestion management modes.
+ */
+enum rxrpc_congest_mode {
+	RXRPC_CALL_SLOW_START,
+	RXRPC_CALL_CONGEST_AVOIDANCE,
+	RXRPC_CALL_PACKET_LOSS,
+	RXRPC_CALL_FAST_RETRANSMIT,
+	NR__RXRPC_CONGEST_MODES
 };
 
 /*
@@ -402,91 +462,329 @@
 struct rxrpc_call {
 	struct rcu_head		rcu;
 	struct rxrpc_connection	*conn;		/* connection carrying call */
-	struct rxrpc_sock	*socket;	/* socket responsible */
-	struct timer_list	lifetimer;	/* lifetime remaining on call */
-	struct timer_list	deadspan;	/* reap timer for re-ACK'ing, etc  */
-	struct timer_list	ack_timer;	/* ACK generation timer */
-	struct timer_list	resend_timer;	/* Tx resend timer */
-	struct work_struct	destroyer;	/* call destroyer */
-	struct work_struct	processor;	/* packet processor and ACK generator */
+	struct rxrpc_peer	*peer;		/* Peer record for remote address */
+	struct rxrpc_sock __rcu	*socket;	/* socket responsible */
+	ktime_t			ack_at;		/* When deferred ACK needs to happen */
+	ktime_t			resend_at;	/* When next resend needs to happen */
+	ktime_t			expire_at;	/* When the call times out */
+	struct timer_list	timer;		/* Combined event timer */
+	struct work_struct	processor;	/* Event processor */
+	rxrpc_notify_rx_t	notify_rx;	/* kernel service Rx notification function */
 	struct list_head	link;		/* link in master call list */
+	struct list_head	chan_wait_link;	/* Link in conn->waiting_calls */
 	struct hlist_node	error_link;	/* link in error distribution list */
-	struct list_head	accept_link;	/* calls awaiting acceptance */
-	struct rb_node		sock_node;	/* node in socket call tree */
-	struct sk_buff_head	rx_queue;	/* received packets */
-	struct sk_buff_head	rx_oos_queue;	/* packets received out of sequence */
+	struct list_head	accept_link;	/* Link in rx->acceptq */
+	struct list_head	recvmsg_link;	/* Link in rx->recvmsg_q */
+	struct list_head	sock_link;	/* Link in rx->sock_calls */
+	struct rb_node		sock_node;	/* Node in rx->calls */
 	struct sk_buff		*tx_pending;	/* Tx socket buffer being filled */
-	wait_queue_head_t	tx_waitq;	/* wait for Tx window space to become available */
+	wait_queue_head_t	waitq;		/* Wait queue for channel or Tx */
 	__be32			crypto_buf[2];	/* Temporary packet crypto buffer */
 	unsigned long		user_call_ID;	/* user-defined call ID */
-	unsigned long		creation_jif;	/* time of call creation */
 	unsigned long		flags;
 	unsigned long		events;
 	spinlock_t		lock;
 	rwlock_t		state_lock;	/* lock for state transition */
-	atomic_t		usage;
-	atomic_t		sequence;	/* Tx data packet sequence counter */
-	u32			local_abort;	/* local abort code */
-	u32			remote_abort;	/* remote abort code */
-	int			error_report;	/* Network error (ICMP/local transport) */
+	u32			abort_code;	/* Local/remote abort code */
 	int			error;		/* Local error incurred */
-	enum rxrpc_call_state	state : 8;	/* current state of call */
-	int			debug_id;	/* debug ID for printks */
-	u8			channel;	/* connection channel occupied by this call */
-
-	/* transmission-phase ACK management */
-	u8			acks_head;	/* offset into window of first entry */
-	u8			acks_tail;	/* offset into window of last entry */
-	u8			acks_winsz;	/* size of un-ACK'd window */
-	u8			acks_unacked;	/* lowest unacked packet in last ACK received */
-	int			acks_latest;	/* serial number of latest ACK received */
-	rxrpc_seq_t		acks_hard;	/* highest definitively ACK'd msg seq */
-	unsigned long		*acks_window;	/* sent packet window
-						 * - elements are pointers with LSB set if ACK'd
-						 */
-
-	/* receive-phase ACK management */
-	rxrpc_seq_t		rx_data_expect;	/* next data seq ID expected to be received */
-	rxrpc_seq_t		rx_data_post;	/* next data seq ID expected to be posted */
-	rxrpc_seq_t		rx_data_recv;	/* last data seq ID encountered by recvmsg */
-	rxrpc_seq_t		rx_data_eaten;	/* last data seq ID consumed by recvmsg */
-	rxrpc_seq_t		rx_first_oos;	/* first packet in rx_oos_queue (or 0) */
-	rxrpc_seq_t		ackr_win_top;	/* top of ACK window (rx_data_eaten is bottom) */
-	rxrpc_seq_t		ackr_prev_seq;	/* previous sequence number received */
-	u8			ackr_reason;	/* reason to ACK */
-	rxrpc_serial_t		ackr_serial;	/* serial of packet being ACK'd */
-	atomic_t		ackr_not_idle;	/* number of packets in Rx queue */
-
-	/* received packet records, 1 bit per record */
-#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
-	unsigned long		ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
-
-	u8			in_clientflag;	/* Copy of conn->in_clientflag */
-	struct rxrpc_local	*local;		/* Local endpoint. */
+	enum rxrpc_call_state	state;		/* current state of call */
+	enum rxrpc_call_completion completion;	/* Call completion condition */
+	atomic_t		usage;
+	u16			service_id;	/* service ID */
+	u8			security_ix;	/* Security type */
 	u32			call_id;	/* call ID on connection  */
 	u32			cid;		/* connection ID plus channel index */
-	u32			epoch;		/* epoch of this connection */
-	u16			service_id;	/* service ID */
+	int			debug_id;	/* debug ID for printks */
+	unsigned short		rx_pkt_offset;	/* Current recvmsg packet offset */
+	unsigned short		rx_pkt_len;	/* Current recvmsg packet len */
+
+	/* Rx/Tx circular buffer, depending on phase.
+	 *
+	 * In the Rx phase, packets are annotated with 0 or the number of the
+	 * segment of a jumbo packet each buffer refers to.  There can be up to
+	 * 47 segments in a maximum-size UDP packet.
+	 *
+	 * In the Tx phase, packets are annotated with which buffers have been
+	 * acked.
+	 */
+#define RXRPC_RXTX_BUFF_SIZE	64
+#define RXRPC_RXTX_BUFF_MASK	(RXRPC_RXTX_BUFF_SIZE - 1)
+#define RXRPC_INIT_RX_WINDOW_SIZE 32
+	struct sk_buff		**rxtx_buffer;
+	u8			*rxtx_annotations;
+#define RXRPC_TX_ANNO_ACK	0
+#define RXRPC_TX_ANNO_UNACK	1
+#define RXRPC_TX_ANNO_NAK	2
+#define RXRPC_TX_ANNO_RETRANS	3
+#define RXRPC_TX_ANNO_MASK	0x03
+#define RXRPC_TX_ANNO_LAST	0x04
+#define RXRPC_TX_ANNO_RESENT	0x08
+
+#define RXRPC_RX_ANNO_JUMBO	0x3f		/* Jumbo subpacket number + 1 if not zero */
+#define RXRPC_RX_ANNO_JLAST	0x40		/* Set if last element of a jumbo packet */
+#define RXRPC_RX_ANNO_VERIFIED	0x80		/* Set if verified and decrypted */
+	rxrpc_seq_t		tx_hard_ack;	/* Dead slot in buffer; the first transmitted but
+						 * not hard-ACK'd packet follows this.
+						 */
+	rxrpc_seq_t		tx_top;		/* Highest Tx slot allocated. */
+
+	/* TCP-style slow-start congestion control [RFC5681].  Since the SMSS
+	 * is fixed, we keep these numbers in terms of segments (ie. DATA
+	 * packets) rather than bytes.
+	 */
+#define RXRPC_TX_SMSS		RXRPC_JUMBO_DATALEN
+	u8			cong_cwnd;	/* Congestion window size */
+	u8			cong_extra;	/* Extra to send for congestion management */
+	u8			cong_ssthresh;	/* Slow-start threshold */
+	enum rxrpc_congest_mode	cong_mode:8;	/* Congestion management mode */
+	u8			cong_dup_acks;	/* Count of ACKs showing missing packets */
+	u8			cong_cumul_acks; /* Cumulative ACK count */
+	ktime_t			cong_tstamp;	/* Last time cwnd was changed */
+
+	rxrpc_seq_t		rx_hard_ack;	/* Dead slot in buffer; the first received but not
+						 * consumed packet follows this.
+						 */
+	rxrpc_seq_t		rx_top;		/* Highest Rx slot allocated. */
+	rxrpc_seq_t		rx_expect_next;	/* Expected next packet sequence number */
+	u8			rx_winsize;	/* Size of Rx window */
+	u8			tx_winsize;	/* Maximum size of Tx window */
+	bool			tx_phase;	/* T if transmission phase, F if receive phase */
+	u8			nr_jumbo_bad;	/* Number of jumbo dups/exceeds-windows */
+
+	/* receive-phase ACK management */
+	u8			ackr_reason;	/* reason to ACK */
+	u16			ackr_skew;	/* skew on packet being ACK'd */
+	rxrpc_serial_t		ackr_serial;	/* serial of packet being ACK'd */
+	rxrpc_seq_t		ackr_prev_seq;	/* previous sequence number received */
+	rxrpc_seq_t		ackr_consumed;	/* Highest packet shown consumed */
+	rxrpc_seq_t		ackr_seen;	/* Highest packet shown seen */
+	rxrpc_serial_t		ackr_ping;	/* Last ping sent */
+	ktime_t			ackr_ping_time;	/* Time last ping sent */
+
+	/* transmission-phase ACK management */
+	ktime_t			acks_latest_ts;	/* Timestamp of latest ACK received */
+	rxrpc_serial_t		acks_latest;	/* serial number of latest ACK received */
+	rxrpc_seq_t		acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
 };
 
 /*
- * locally abort an RxRPC call
+ * Summary of a new ACK and the changes it made to the Tx buffer packet states.
  */
-static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
-{
-	write_lock_bh(&call->state_lock);
-	if (call->state < RXRPC_CALL_COMPLETE) {
-		call->local_abort = abort_code;
-		call->state = RXRPC_CALL_LOCALLY_ABORTED;
-		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-	}
-	write_unlock_bh(&call->state_lock);
-}
+struct rxrpc_ack_summary {
+	u8			ack_reason;
+	u8			nr_acks;		/* Number of ACKs in packet */
+	u8			nr_nacks;		/* Number of NACKs in packet */
+	u8			nr_new_acks;		/* Number of new ACKs in packet */
+	u8			nr_new_nacks;		/* Number of new NACKs in packet */
+	u8			nr_rot_new_acks;	/* Number of rotated new ACKs */
+	bool			new_low_nack;		/* T if new low NACK found */
+	bool			retrans_timeo;		/* T if reTx due to timeout happened */
+	u8			flight_size;		/* Number of unreceived transmissions */
+	/* Place to stash values for tracing */
+	enum rxrpc_congest_mode	mode:8;
+	u8			cwnd;
+	u8			ssthresh;
+	u8			dup_acks;
+	u8			cumulative_acks;
+};
+
+enum rxrpc_skb_trace {
+	rxrpc_skb_rx_cleaned,
+	rxrpc_skb_rx_freed,
+	rxrpc_skb_rx_got,
+	rxrpc_skb_rx_lost,
+	rxrpc_skb_rx_received,
+	rxrpc_skb_rx_rotated,
+	rxrpc_skb_rx_purged,
+	rxrpc_skb_rx_seen,
+	rxrpc_skb_tx_cleaned,
+	rxrpc_skb_tx_freed,
+	rxrpc_skb_tx_got,
+	rxrpc_skb_tx_new,
+	rxrpc_skb_tx_rotated,
+	rxrpc_skb_tx_seen,
+	rxrpc_skb__nr_trace
+};
+
+extern const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7];
+
+enum rxrpc_conn_trace {
+	rxrpc_conn_new_client,
+	rxrpc_conn_new_service,
+	rxrpc_conn_queued,
+	rxrpc_conn_seen,
+	rxrpc_conn_got,
+	rxrpc_conn_put_client,
+	rxrpc_conn_put_service,
+	rxrpc_conn__nr_trace
+};
+
+extern const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4];
+
+enum rxrpc_client_trace {
+	rxrpc_client_activate_chans,
+	rxrpc_client_alloc,
+	rxrpc_client_chan_activate,
+	rxrpc_client_chan_disconnect,
+	rxrpc_client_chan_pass,
+	rxrpc_client_chan_unstarted,
+	rxrpc_client_cleanup,
+	rxrpc_client_count,
+	rxrpc_client_discard,
+	rxrpc_client_duplicate,
+	rxrpc_client_exposed,
+	rxrpc_client_replace,
+	rxrpc_client_to_active,
+	rxrpc_client_to_culled,
+	rxrpc_client_to_idle,
+	rxrpc_client_to_inactive,
+	rxrpc_client_to_waiting,
+	rxrpc_client_uncount,
+	rxrpc_client__nr_trace
+};
+
+extern const char rxrpc_client_traces[rxrpc_client__nr_trace][7];
+extern const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5];
+
+enum rxrpc_call_trace {
+	rxrpc_call_new_client,
+	rxrpc_call_new_service,
+	rxrpc_call_queued,
+	rxrpc_call_queued_ref,
+	rxrpc_call_seen,
+	rxrpc_call_connected,
+	rxrpc_call_release,
+	rxrpc_call_got,
+	rxrpc_call_got_userid,
+	rxrpc_call_got_kernel,
+	rxrpc_call_put,
+	rxrpc_call_put_userid,
+	rxrpc_call_put_kernel,
+	rxrpc_call_put_noqueue,
+	rxrpc_call_error,
+	rxrpc_call__nr_trace
+};
+
+extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4];
+
+enum rxrpc_transmit_trace {
+	rxrpc_transmit_wait,
+	rxrpc_transmit_queue,
+	rxrpc_transmit_queue_last,
+	rxrpc_transmit_rotate,
+	rxrpc_transmit_rotate_last,
+	rxrpc_transmit_await_reply,
+	rxrpc_transmit_end,
+	rxrpc_transmit__nr_trace
+};
+
+extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4];
+
+enum rxrpc_receive_trace {
+	rxrpc_receive_incoming,
+	rxrpc_receive_queue,
+	rxrpc_receive_queue_last,
+	rxrpc_receive_front,
+	rxrpc_receive_rotate,
+	rxrpc_receive_end,
+	rxrpc_receive__nr_trace
+};
+
+extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4];
+
+enum rxrpc_recvmsg_trace {
+	rxrpc_recvmsg_enter,
+	rxrpc_recvmsg_wait,
+	rxrpc_recvmsg_dequeue,
+	rxrpc_recvmsg_hole,
+	rxrpc_recvmsg_next,
+	rxrpc_recvmsg_cont,
+	rxrpc_recvmsg_full,
+	rxrpc_recvmsg_data_return,
+	rxrpc_recvmsg_terminal,
+	rxrpc_recvmsg_to_be_accepted,
+	rxrpc_recvmsg_return,
+	rxrpc_recvmsg__nr_trace
+};
+
+extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5];
+
+enum rxrpc_rtt_tx_trace {
+	rxrpc_rtt_tx_ping,
+	rxrpc_rtt_tx_data,
+	rxrpc_rtt_tx__nr_trace
+};
+
+extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5];
+
+enum rxrpc_rtt_rx_trace {
+	rxrpc_rtt_rx_ping_response,
+	rxrpc_rtt_rx_requested_ack,
+	rxrpc_rtt_rx__nr_trace
+};
+
+extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5];
+
+enum rxrpc_timer_trace {
+	rxrpc_timer_begin,
+	rxrpc_timer_init_for_reply,
+	rxrpc_timer_expired,
+	rxrpc_timer_set_for_ack,
+	rxrpc_timer_set_for_resend,
+	rxrpc_timer_set_for_send,
+	rxrpc_timer__nr_trace
+};
+
+extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8];
+
+enum rxrpc_propose_ack_trace {
+	rxrpc_propose_ack_client_tx_end,
+	rxrpc_propose_ack_input_data,
+	rxrpc_propose_ack_ping_for_lost_ack,
+	rxrpc_propose_ack_ping_for_lost_reply,
+	rxrpc_propose_ack_ping_for_params,
+	rxrpc_propose_ack_respond_to_ack,
+	rxrpc_propose_ack_respond_to_ping,
+	rxrpc_propose_ack_retry_tx,
+	rxrpc_propose_ack_rotate_rx,
+	rxrpc_propose_ack_terminal_ack,
+	rxrpc_propose_ack__nr_trace
+};
+
+enum rxrpc_propose_ack_outcome {
+	rxrpc_propose_ack_use,
+	rxrpc_propose_ack_update,
+	rxrpc_propose_ack_subsume,
+	rxrpc_propose_ack__nr_outcomes
+};
+
+extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8];
+extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes];
+
+enum rxrpc_congest_change {
+	rxrpc_cong_begin_retransmission,
+	rxrpc_cong_cleared_nacks,
+	rxrpc_cong_new_low_nack,
+	rxrpc_cong_no_change,
+	rxrpc_cong_progress,
+	rxrpc_cong_retransmit_again,
+	rxrpc_cong_rtt_window_end,
+	rxrpc_cong_saw_nack,
+	rxrpc_congest__nr_change
+};
+
+extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10];
+extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9];
+
+extern const char *const rxrpc_pkts[];
+extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];
+
+#include <trace/events/rxrpc.h>
 
 /*
  * af_rxrpc.c
  */
-extern atomic_t rxrpc_n_skbs;
+extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
 extern u32 rxrpc_epoch;
 extern atomic_t rxrpc_debug_id;
 extern struct workqueue_struct *rxrpc_workqueue;
@@ -494,70 +792,178 @@
 /*
  * call_accept.c
  */
+int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
+void rxrpc_discard_prealloc(struct rxrpc_sock *);
+struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *,
+					   struct rxrpc_connection *,
+					   struct sk_buff *);
 void rxrpc_accept_incoming_calls(struct rxrpc_local *);
-struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long,
+				     rxrpc_notify_rx_t);
 int rxrpc_reject_call(struct rxrpc_sock *);
 
 /*
  * call_event.c
  */
-void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
+void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
+		       enum rxrpc_propose_ack_trace);
 void rxrpc_process_call(struct work_struct *);
 
 /*
  * call_object.c
  */
+extern const char *const rxrpc_call_states[];
+extern const char *const rxrpc_call_completions[];
 extern unsigned int rxrpc_max_call_lifetime;
-extern unsigned int rxrpc_dead_call_expiry;
 extern struct kmem_cache *rxrpc_call_jar;
 extern struct list_head rxrpc_calls;
 extern rwlock_t rxrpc_call_lock;
 
 struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_alloc_call(gfp_t);
 struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
 					 struct rxrpc_conn_parameters *,
 					 struct sockaddr_rxrpc *,
 					 unsigned long, gfp_t);
-struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
-				       struct rxrpc_connection *,
-				       struct sk_buff *);
-void rxrpc_release_call(struct rxrpc_call *);
+void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
+			 struct sk_buff *);
+void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
-void __rxrpc_put_call(struct rxrpc_call *);
+bool __rxrpc_queue_call(struct rxrpc_call *);
+bool rxrpc_queue_call(struct rxrpc_call *);
+void rxrpc_see_call(struct rxrpc_call *);
+void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_cleanup_call(struct rxrpc_call *);
 void __exit rxrpc_destroy_all_calls(void);
 
+static inline bool rxrpc_is_service_call(const struct rxrpc_call *call)
+{
+	return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags);
+}
+
+static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
+{
+	return !rxrpc_is_service_call(call);
+}
+
+/*
+ * Transition a call to the complete state.
+ */
+static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call,
+					       enum rxrpc_call_completion compl,
+					       u32 abort_code,
+					       int error)
+{
+	if (call->state < RXRPC_CALL_COMPLETE) {
+		call->abort_code = abort_code;
+		call->error = error;
+		call->completion = compl,
+		call->state = RXRPC_CALL_COMPLETE;
+		wake_up(&call->waitq);
+		return true;
+	}
+	return false;
+}
+
+static inline bool rxrpc_set_call_completion(struct rxrpc_call *call,
+					     enum rxrpc_call_completion compl,
+					     u32 abort_code,
+					     int error)
+{
+	bool ret;
+
+	write_lock_bh(&call->state_lock);
+	ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
+	write_unlock_bh(&call->state_lock);
+	return ret;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+static inline bool __rxrpc_call_completed(struct rxrpc_call *call)
+{
+	return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+static inline bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+	bool ret;
+
+	write_lock_bh(&call->state_lock);
+	ret = __rxrpc_call_completed(call);
+	write_unlock_bh(&call->state_lock);
+	return ret;
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+				      rxrpc_seq_t seq,
+				      u32 abort_code, int error)
+{
+	trace_rxrpc_abort(why, call->cid, call->call_id, seq,
+			  abort_code, error);
+	return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+					   abort_code, error);
+}
+
+static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+				    rxrpc_seq_t seq, u32 abort_code, int error)
+{
+	bool ret;
+
+	write_lock_bh(&call->state_lock);
+	ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
+	write_unlock_bh(&call->state_lock);
+	return ret;
+}
+
 /*
  * conn_client.c
  */
+extern unsigned int rxrpc_max_client_connections;
+extern unsigned int rxrpc_reap_client_connections;
+extern unsigned int rxrpc_conn_idle_client_expiry;
+extern unsigned int rxrpc_conn_idle_client_fast_expiry;
 extern struct idr rxrpc_client_conn_ids;
 
 void rxrpc_destroy_client_conn_ids(void);
 int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
 		       struct sockaddr_rxrpc *, gfp_t);
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
+void rxrpc_expose_client_call(struct rxrpc_call *);
+void rxrpc_disconnect_client_call(struct rxrpc_call *);
+void rxrpc_put_client_conn(struct rxrpc_connection *);
+void __exit rxrpc_destroy_all_client_connections(void);
 
 /*
  * conn_event.c
  */
 void rxrpc_process_connection(struct work_struct *);
-void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
-void rxrpc_reject_packets(struct rxrpc_local *);
 
 /*
  * conn_object.c
  */
 extern unsigned int rxrpc_connection_expiry;
 extern struct list_head rxrpc_connections;
+extern struct list_head rxrpc_connection_proc_list;
 extern rwlock_t rxrpc_connection_lock;
 
 int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
 struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
 						   struct sk_buff *);
-void __rxrpc_disconnect_call(struct rxrpc_call *);
+void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
 void rxrpc_disconnect_call(struct rxrpc_call *);
-void rxrpc_put_connection(struct rxrpc_connection *);
+void rxrpc_kill_connection(struct rxrpc_connection *);
+bool rxrpc_queue_conn(struct rxrpc_connection *);
+void rxrpc_see_connection(struct rxrpc_connection *);
+void rxrpc_get_connection(struct rxrpc_connection *);
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *);
+void rxrpc_put_service_conn(struct rxrpc_connection *);
 void __exit rxrpc_destroy_all_connections(void);
 
 static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
@@ -570,24 +976,15 @@
 	return !rxrpc_conn_is_client(conn);
 }
 
-static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
+static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
 {
-	atomic_inc(&conn->usage);
-}
+	if (!conn)
+		return;
 
-static inline
-struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
-{
-	return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
-}
-
-static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
-{
-	if (!rxrpc_get_connection_maybe(conn))
-		return false;
-	if (!rxrpc_queue_work(&conn->processor))
-		rxrpc_put_connection(conn);
-	return true;
+	if (rxrpc_conn_is_client(conn))
+		rxrpc_put_client_conn(conn);
+	else
+		rxrpc_put_service_conn(conn);
 }
 
 /*
@@ -595,17 +992,14 @@
  */
 struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
 						     struct sk_buff *);
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
-						   struct sockaddr_rxrpc *,
-						   struct sk_buff *);
+struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t);
+void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *);
 void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
 
 /*
  * input.c
  */
 void rxrpc_data_ready(struct sock *);
-int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool);
-void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
 
 /*
  * insecure.c
@@ -667,25 +1061,24 @@
 extern unsigned int rxrpc_rx_window_size;
 extern unsigned int rxrpc_rx_mtu;
 extern unsigned int rxrpc_rx_jumbo_max;
+extern unsigned int rxrpc_resend_timeout;
 
-extern const char *const rxrpc_pkts[];
 extern const s8 rxrpc_ack_priority[];
 
-extern const char *rxrpc_acks(u8 reason);
-
 /*
  * output.c
  */
-extern unsigned int rxrpc_resend_timeout;
-
-int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *);
-int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+int rxrpc_send_call_packet(struct rxrpc_call *, u8);
+int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
+void rxrpc_reject_packets(struct rxrpc_local *);
 
 /*
  * peer_event.c
  */
 void rxrpc_error_report(struct sock *);
 void rxrpc_peer_error_distributor(struct work_struct *);
+void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
+			rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
 
 /*
  * peer_object.c
@@ -695,10 +1088,13 @@
 struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
 				     struct sockaddr_rxrpc *, gfp_t);
 struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
+struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *,
+					      struct rxrpc_peer *);
 
-static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
+static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
 {
 	atomic_inc(&peer->usage);
+	return peer;
 }
 
 static inline
@@ -717,14 +1113,13 @@
 /*
  * proc.c
  */
-extern const char *const rxrpc_call_states[];
 extern const struct file_operations rxrpc_call_seq_fops;
 extern const struct file_operations rxrpc_connection_seq_fops;
 
 /*
  * recvmsg.c
  */
-void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+void rxrpc_notify_socket(struct rxrpc_call *);
 int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
 
 /*
@@ -743,9 +1138,21 @@
 int rxrpc_init_server_conn_security(struct rxrpc_connection *);
 
 /*
+ * sendmsg.c
+ */
+int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+
+/*
  * skbuff.c
  */
+void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
 void rxrpc_packet_destructor(struct sk_buff *);
+void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_purge_queue(struct sk_buff_head *);
 
 /*
  * sysctl.c
@@ -763,6 +1170,23 @@
  */
 int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
 
+static inline bool before(u32 seq1, u32 seq2)
+{
+        return (s32)(seq1 - seq2) < 0;
+}
+static inline bool before_eq(u32 seq1, u32 seq2)
+{
+        return (s32)(seq1 - seq2) <= 0;
+}
+static inline bool after(u32 seq1, u32 seq2)
+{
+        return (s32)(seq1 - seq2) > 0;
+}
+static inline bool after_eq(u32 seq1, u32 seq2)
+{
+        return (s32)(seq1 - seq2) >= 0;
+}
+
 /*
  * debug tracing
  */
@@ -845,11 +1269,12 @@
 
 #define ASSERTCMP(X, OP, Y)						\
 do {									\
-	unsigned long _x = (unsigned long)(X);				\
-	unsigned long _y = (unsigned long)(Y);				\
+	__typeof__(X) _x = (X);						\
+	__typeof__(Y) _y = (__typeof__(X))(Y);				\
 	if (unlikely(!(_x OP _y))) {					\
-		pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n",			\
-		       _x, _x, #OP, _y, _y);				\
+		pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+		       (unsigned long)_x, (unsigned long)_x, #OP,	\
+		       (unsigned long)_y, (unsigned long)_y);		\
 		BUG();							\
 	}								\
 } while (0)
@@ -864,11 +1289,12 @@
 
 #define ASSERTIFCMP(C, X, OP, Y)					\
 do {									\
-	unsigned long _x = (unsigned long)(X);				\
-	unsigned long _y = (unsigned long)(Y);				\
+	__typeof__(X) _x = (X);						\
+	__typeof__(Y) _y = (__typeof__(X))(Y);				\
 	if (unlikely((C) && !(_x OP _y))) {				\
 		pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
-		       _x, _x, #OP, _y, _y);				\
+		       (unsigned long)_x, (unsigned long)_x, #OP,	\
+		       (unsigned long)_y, (unsigned long)_y);		\
 		BUG();							\
 	}								\
 } while (0)
@@ -892,54 +1318,3 @@
 } while (0)
 
 #endif /* __KDEBUGALL */
-
-/*
- * socket buffer accounting / leak finding
- */
-static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
-{
-	//_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
-	//atomic_inc(&rxrpc_n_skbs);
-}
-
-#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
-
-static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
-{
-	//_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
-	//atomic_dec(&rxrpc_n_skbs);
-}
-
-#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
-
-static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
-{
-	if (skb) {
-		CHECK_SLAB_OKAY(&skb->users);
-		//_net("free skb %p %s [%d]",
-		//     skb, fn, atomic_read(&rxrpc_n_skbs));
-		//atomic_dec(&rxrpc_n_skbs);
-		kfree_skb(skb);
-	}
-}
-
-#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
-
-static inline void rxrpc_purge_queue(struct sk_buff_head *list)
-{
-	struct sk_buff *skb;
-	while ((skb = skb_dequeue((list))) != NULL)
-		rxrpc_free_skb(skb);
-}
-
-#define rxrpc_get_call(CALL)				\
-do {							\
-	CHECK_SLAB_OKAY(&(CALL)->usage);		\
-	if (atomic_inc_return(&(CALL)->usage) == 1)	\
-		BUG();					\
-} while (0)
-
-#define rxrpc_put_call(CALL)				\
-do {							\
-	__rxrpc_put_call(CALL);				\
-} while (0)
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 0b28321..3cac231 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -20,264 +20,409 @@
 #include <linux/in6.h>
 #include <linux/icmp.h>
 #include <linux/gfp.h>
+#include <linux/circ_buf.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
 #include "ar-internal.h"
 
 /*
- * generate a connection-level abort
+ * Preallocate a single service call, connection and peer and, if possible,
+ * give them a user ID and attach the user's side of the ID to them.
  */
-static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
-		      struct rxrpc_wire_header *whdr)
+static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
+				      struct rxrpc_backlog *b,
+				      rxrpc_notify_rx_t notify_rx,
+				      rxrpc_user_attach_call_t user_attach_call,
+				      unsigned long user_call_ID, gfp_t gfp)
 {
-	struct msghdr msg;
-	struct kvec iov[1];
-	size_t len;
-	int ret;
+	const void *here = __builtin_return_address(0);
+	struct rxrpc_call *call;
+	int max, tmp;
+	unsigned int size = RXRPC_BACKLOG_MAX;
+	unsigned int head, tail, call_head, call_tail;
 
-	_enter("%d,,", local->debug_id);
+	max = rx->sk.sk_max_ack_backlog;
+	tmp = rx->sk.sk_ack_backlog;
+	if (tmp >= max) {
+		_leave(" = -ENOBUFS [full %u]", max);
+		return -ENOBUFS;
+	}
+	max -= tmp;
 
-	whdr->type	= RXRPC_PACKET_TYPE_BUSY;
-	whdr->serial	= htonl(1);
+	/* We don't need more conns and peers than we have calls, but on the
+	 * other hand, we shouldn't ever use more peers than conns or conns
+	 * than calls.
+	 */
+	call_head = b->call_backlog_head;
+	call_tail = READ_ONCE(b->call_backlog_tail);
+	tmp = CIRC_CNT(call_head, call_tail, size);
+	if (tmp >= max) {
+		_leave(" = -ENOBUFS [enough %u]", tmp);
+		return -ENOBUFS;
+	}
+	max = tmp + 1;
 
-	msg.msg_name	= &srx->transport.sin;
-	msg.msg_namelen	= sizeof(srx->transport.sin);
-	msg.msg_control	= NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags	= 0;
-
-	iov[0].iov_base	= whdr;
-	iov[0].iov_len	= sizeof(*whdr);
-
-	len = iov[0].iov_len;
-
-	_proto("Tx BUSY %%1");
-
-	ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
-	if (ret < 0) {
-		_leave(" = -EAGAIN [sendmsg failed: %d]", ret);
-		return -EAGAIN;
+	head = b->peer_backlog_head;
+	tail = READ_ONCE(b->peer_backlog_tail);
+	if (CIRC_CNT(head, tail, size) < max) {
+		struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp);
+		if (!peer)
+			return -ENOMEM;
+		b->peer_backlog[head] = peer;
+		smp_store_release(&b->peer_backlog_head,
+				  (head + 1) & (size - 1));
 	}
 
-	_leave(" = 0");
+	head = b->conn_backlog_head;
+	tail = READ_ONCE(b->conn_backlog_tail);
+	if (CIRC_CNT(head, tail, size) < max) {
+		struct rxrpc_connection *conn;
+
+		conn = rxrpc_prealloc_service_connection(gfp);
+		if (!conn)
+			return -ENOMEM;
+		b->conn_backlog[head] = conn;
+		smp_store_release(&b->conn_backlog_head,
+				  (head + 1) & (size - 1));
+
+		trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+				 atomic_read(&conn->usage), here);
+	}
+
+	/* Now it gets complicated, because calls get registered with the
+	 * socket here, particularly if a user ID is preassigned by the user.
+	 */
+	call = rxrpc_alloc_call(gfp);
+	if (!call)
+		return -ENOMEM;
+	call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
+	call->state = RXRPC_CALL_SERVER_PREALLOC;
+
+	trace_rxrpc_call(call, rxrpc_call_new_service,
+			 atomic_read(&call->usage),
+			 here, (const void *)user_call_ID);
+
+	write_lock(&rx->call_lock);
+	if (user_attach_call) {
+		struct rxrpc_call *xcall;
+		struct rb_node *parent, **pp;
+
+		/* Check the user ID isn't already in use */
+		pp = &rx->calls.rb_node;
+		parent = NULL;
+		while (*pp) {
+			parent = *pp;
+			xcall = rb_entry(parent, struct rxrpc_call, sock_node);
+			if (user_call_ID < call->user_call_ID)
+				pp = &(*pp)->rb_left;
+			else if (user_call_ID > call->user_call_ID)
+				pp = &(*pp)->rb_right;
+			else
+				goto id_in_use;
+		}
+
+		call->user_call_ID = user_call_ID;
+		call->notify_rx = notify_rx;
+		rxrpc_get_call(call, rxrpc_call_got_kernel);
+		user_attach_call(call, user_call_ID);
+		rxrpc_get_call(call, rxrpc_call_got_userid);
+		rb_link_node(&call->sock_node, parent, pp);
+		rb_insert_color(&call->sock_node, &rx->calls);
+		set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+	}
+
+	list_add(&call->sock_link, &rx->sock_calls);
+
+	write_unlock(&rx->call_lock);
+
+	write_lock(&rxrpc_call_lock);
+	list_add_tail(&call->link, &rxrpc_calls);
+	write_unlock(&rxrpc_call_lock);
+
+	b->call_backlog[call_head] = call;
+	smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
+	_leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID);
+	return 0;
+
+id_in_use:
+	write_unlock(&rx->call_lock);
+	rxrpc_cleanup_call(call);
+	_leave(" = -EBADSLT");
+	return -EBADSLT;
+}
+
+/*
+ * Preallocate sufficient service connections, calls and peers to cover the
+ * entire backlog of a socket.  When a new call comes in, if we don't have
+ * sufficient of each available, the call gets rejected as busy or ignored.
+ *
+ * The backlog is replenished when a connection is accepted or rejected.
+ */
+int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp)
+{
+	struct rxrpc_backlog *b = rx->backlog;
+
+	if (!b) {
+		b = kzalloc(sizeof(struct rxrpc_backlog), gfp);
+		if (!b)
+			return -ENOMEM;
+		rx->backlog = b;
+	}
+
+	if (rx->discard_new_call)
+		return 0;
+
+	while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0)
+		;
+
 	return 0;
 }
 
 /*
- * accept an incoming call that needs peer, transport and/or connection setting
- * up
+ * Discard the preallocation on a service.
  */
-static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
-				      struct rxrpc_sock *rx,
-				      struct sk_buff *skb,
-				      struct sockaddr_rxrpc *srx)
+void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
 {
-	struct rxrpc_connection *conn;
-	struct rxrpc_skb_priv *sp, *nsp;
+	struct rxrpc_backlog *b = rx->backlog;
+	unsigned int size = RXRPC_BACKLOG_MAX, head, tail;
+
+	if (!b)
+		return;
+	rx->backlog = NULL;
+
+	/* Make sure that there aren't any incoming calls in progress before we
+	 * clear the preallocation buffers.
+	 */
+	spin_lock_bh(&rx->incoming_lock);
+	spin_unlock_bh(&rx->incoming_lock);
+
+	head = b->peer_backlog_head;
+	tail = b->peer_backlog_tail;
+	while (CIRC_CNT(head, tail, size) > 0) {
+		struct rxrpc_peer *peer = b->peer_backlog[tail];
+		kfree(peer);
+		tail = (tail + 1) & (size - 1);
+	}
+
+	head = b->conn_backlog_head;
+	tail = b->conn_backlog_tail;
+	while (CIRC_CNT(head, tail, size) > 0) {
+		struct rxrpc_connection *conn = b->conn_backlog[tail];
+		write_lock(&rxrpc_connection_lock);
+		list_del(&conn->link);
+		list_del(&conn->proc_link);
+		write_unlock(&rxrpc_connection_lock);
+		kfree(conn);
+		tail = (tail + 1) & (size - 1);
+	}
+
+	head = b->call_backlog_head;
+	tail = b->call_backlog_tail;
+	while (CIRC_CNT(head, tail, size) > 0) {
+		struct rxrpc_call *call = b->call_backlog[tail];
+		if (rx->discard_new_call) {
+			_debug("discard %lx", call->user_call_ID);
+			rx->discard_new_call(call, call->user_call_ID);
+			rxrpc_put_call(call, rxrpc_call_put_kernel);
+		}
+		rxrpc_call_completed(call);
+		rxrpc_release_call(rx, call);
+		rxrpc_put_call(call, rxrpc_call_put);
+		tail = (tail + 1) & (size - 1);
+	}
+
+	kfree(b);
+}
+
+/*
+ * Allocate a new incoming call from the prealloc pool, along with a connection
+ * and a peer as necessary.
+ */
+static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
+						    struct rxrpc_local *local,
+						    struct rxrpc_connection *conn,
+						    struct sk_buff *skb)
+{
+	struct rxrpc_backlog *b = rx->backlog;
+	struct rxrpc_peer *peer, *xpeer;
 	struct rxrpc_call *call;
-	struct sk_buff *notification;
-	int ret;
+	unsigned short call_head, conn_head, peer_head;
+	unsigned short call_tail, conn_tail, peer_tail;
+	unsigned short call_count, conn_count;
+
+	/* #calls >= #conns >= #peers must hold true. */
+	call_head = smp_load_acquire(&b->call_backlog_head);
+	call_tail = b->call_backlog_tail;
+	call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX);
+	conn_head = smp_load_acquire(&b->conn_backlog_head);
+	conn_tail = b->conn_backlog_tail;
+	conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX);
+	ASSERTCMP(conn_count, >=, call_count);
+	peer_head = smp_load_acquire(&b->peer_backlog_head);
+	peer_tail = b->peer_backlog_tail;
+	ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=,
+		  conn_count);
+
+	if (call_count == 0)
+		return NULL;
+
+	if (!conn) {
+		/* No connection.  We're going to need a peer to start off
+		 * with.  If one doesn't yet exist, use a spare from the
+		 * preallocation set.  We dump the address into the spare in
+		 * anticipation - and to save on stack space.
+		 */
+		xpeer = b->peer_backlog[peer_tail];
+		if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0)
+			return NULL;
+
+		peer = rxrpc_lookup_incoming_peer(local, xpeer);
+		if (peer == xpeer) {
+			b->peer_backlog[peer_tail] = NULL;
+			smp_store_release(&b->peer_backlog_tail,
+					  (peer_tail + 1) &
+					  (RXRPC_BACKLOG_MAX - 1));
+		}
+
+		/* Now allocate and set up the connection */
+		conn = b->conn_backlog[conn_tail];
+		b->conn_backlog[conn_tail] = NULL;
+		smp_store_release(&b->conn_backlog_tail,
+				  (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
+		rxrpc_get_local(local);
+		conn->params.local = local;
+		conn->params.peer = peer;
+		rxrpc_see_connection(conn);
+		rxrpc_new_incoming_connection(conn, skb);
+	} else {
+		rxrpc_get_connection(conn);
+	}
+
+	/* And now we can allocate and set up a new call */
+	call = b->call_backlog[call_tail];
+	b->call_backlog[call_tail] = NULL;
+	smp_store_release(&b->call_backlog_tail,
+			  (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
+
+	rxrpc_see_call(call);
+	call->conn = conn;
+	call->peer = rxrpc_get_peer(conn->params.peer);
+	return call;
+}
+
+/*
+ * Set up a new incoming call.  Called in BH context with the RCU read lock
+ * held.
+ *
+ * If this is for a kernel service, when we allocate the call, it will have
+ * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the
+ * retainer ref obtained from the backlog buffer.  Prealloc calls for userspace
+ * services only have the ref from the backlog buffer.  We want to pass this
+ * ref to non-BH context to dispose of.
+ *
+ * If we want to report an error, we mark the skb with the packet type and
+ * abort code and return NULL.
+ */
+struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
+					   struct rxrpc_connection *conn,
+					   struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_sock *rx;
+	struct rxrpc_call *call;
+	u16 service_id = sp->hdr.serviceId;
 
 	_enter("");
 
-	sp = rxrpc_skb(skb);
+	/* Get the socket providing the service */
+	rx = rcu_dereference(local->service);
+	if (service_id == rx->srx.srx_service)
+		goto found_service;
 
-	/* get a notification message to send to the server app */
-	notification = alloc_skb(0, GFP_NOFS);
-	if (!notification) {
-		_debug("no memory");
-		ret = -ENOMEM;
-		goto error_nofree;
-	}
-	rxrpc_new_skb(notification);
-	notification->mark = RXRPC_SKB_MARK_NEW_CALL;
-
-	conn = rxrpc_incoming_connection(local, srx, skb);
-	if (IS_ERR(conn)) {
-		_debug("no conn");
-		ret = PTR_ERR(conn);
-		goto error;
-	}
-
-	call = rxrpc_incoming_call(rx, conn, skb);
-	rxrpc_put_connection(conn);
-	if (IS_ERR(call)) {
-		_debug("no call");
-		ret = PTR_ERR(call);
-		goto error;
-	}
-
-	/* attach the call to the socket */
-	read_lock_bh(&local->services_lock);
-	if (rx->sk.sk_state == RXRPC_CLOSE)
-		goto invalid_service;
-
-	write_lock(&rx->call_lock);
-	if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) {
-		rxrpc_get_call(call);
-
-		spin_lock(&call->conn->state_lock);
-		if (sp->hdr.securityIndex > 0 &&
-		    call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
-			_debug("await conn sec");
-			list_add_tail(&call->accept_link, &rx->secureq);
-			call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
-			set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
-			rxrpc_queue_conn(call->conn);
-		} else {
-			_debug("conn ready");
-			call->state = RXRPC_CALL_SERVER_ACCEPTING;
-			list_add_tail(&call->accept_link, &rx->acceptq);
-			rxrpc_get_call(call);
-			nsp = rxrpc_skb(notification);
-			nsp->call = call;
-
-			ASSERTCMP(atomic_read(&call->usage), >=, 3);
-
-			_debug("notify");
-			spin_lock(&call->lock);
-			ret = rxrpc_queue_rcv_skb(call, notification, true,
-						  false);
-			spin_unlock(&call->lock);
-			notification = NULL;
-			BUG_ON(ret < 0);
-		}
-		spin_unlock(&call->conn->state_lock);
-
-		_debug("queued");
-	}
-	write_unlock(&rx->call_lock);
-
-	_debug("process");
-	rxrpc_fast_process_packet(call, skb);
-
-	_debug("done");
-	read_unlock_bh(&local->services_lock);
-	rxrpc_free_skb(notification);
-	rxrpc_put_call(call);
-	_leave(" = 0");
-	return 0;
-
-invalid_service:
-	_debug("invalid");
-	read_unlock_bh(&local->services_lock);
-
-	read_lock_bh(&call->state_lock);
-	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-	    !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
-		rxrpc_get_call(call);
-		rxrpc_queue_call(call);
-	}
-	read_unlock_bh(&call->state_lock);
-	rxrpc_put_call(call);
-	ret = -ECONNREFUSED;
-error:
-	rxrpc_free_skb(notification);
-error_nofree:
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * accept incoming calls that need peer, transport and/or connection setting up
- * - the packets we get are all incoming client DATA packets that have seq == 1
- */
-void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
-{
-	struct rxrpc_skb_priv *sp;
-	struct sockaddr_rxrpc srx;
-	struct rxrpc_sock *rx;
-	struct rxrpc_wire_header whdr;
-	struct sk_buff *skb;
-	int ret;
-
-	_enter("%d", local->debug_id);
-
-	skb = skb_dequeue(&local->accept_queue);
-	if (!skb) {
-		_leave("\n");
-		return;
-	}
-
-	_net("incoming call skb %p", skb);
-
-	sp = rxrpc_skb(skb);
-
-	/* Set up a response packet header in case we need it */
-	whdr.epoch	= htonl(sp->hdr.epoch);
-	whdr.cid	= htonl(sp->hdr.cid);
-	whdr.callNumber	= htonl(sp->hdr.callNumber);
-	whdr.seq	= htonl(sp->hdr.seq);
-	whdr.serial	= 0;
-	whdr.flags	= 0;
-	whdr.type	= 0;
-	whdr.userStatus	= 0;
-	whdr.securityIndex = sp->hdr.securityIndex;
-	whdr._rsvd	= 0;
-	whdr.serviceId	= htons(sp->hdr.serviceId);
-
-	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
-		goto drop;
-
-	/* get the socket providing the service */
-	read_lock_bh(&local->services_lock);
-	list_for_each_entry(rx, &local->services, listen_link) {
-		if (rx->srx.srx_service == sp->hdr.serviceId &&
-		    rx->sk.sk_state != RXRPC_CLOSE)
-			goto found_service;
-	}
-	read_unlock_bh(&local->services_lock);
-	goto invalid_service;
+	trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+			  RX_INVALID_OPERATION, EOPNOTSUPP);
+	skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+	skb->priority = RX_INVALID_OPERATION;
+	_leave(" = NULL [service]");
+	return NULL;
 
 found_service:
-	_debug("found service %hd", rx->srx.srx_service);
-	if (sk_acceptq_is_full(&rx->sk))
-		goto backlog_full;
-	sk_acceptq_added(&rx->sk);
-	sock_hold(&rx->sk);
-	read_unlock_bh(&local->services_lock);
+	spin_lock(&rx->incoming_lock);
+	if (rx->sk.sk_state == RXRPC_CLOSE) {
+		trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber,
+				  sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
+		skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+		skb->priority = RX_INVALID_OPERATION;
+		_leave(" = NULL [close]");
+		call = NULL;
+		goto out;
+	}
 
-	ret = rxrpc_accept_incoming_call(local, rx, skb, &srx);
-	if (ret < 0)
-		sk_acceptq_removed(&rx->sk);
-	sock_put(&rx->sk);
-	switch (ret) {
-	case -ECONNRESET: /* old calls are ignored */
-	case -ECONNABORTED: /* aborted calls are reaborted or ignored */
-	case 0:
-		return;
-	case -ECONNREFUSED:
-		goto invalid_service;
-	case -EBUSY:
-		goto busy;
-	case -EKEYREJECTED:
-		goto security_mismatch;
+	call = rxrpc_alloc_incoming_call(rx, local, conn, skb);
+	if (!call) {
+		skb->mark = RXRPC_SKB_MARK_BUSY;
+		_leave(" = NULL [busy]");
+		call = NULL;
+		goto out;
+	}
+
+	trace_rxrpc_receive(call, rxrpc_receive_incoming,
+			    sp->hdr.serial, sp->hdr.seq);
+
+	/* Make the call live. */
+	rxrpc_incoming_call(rx, call, skb);
+	conn = call->conn;
+
+	if (rx->notify_new_call)
+		rx->notify_new_call(&rx->sk, call, call->user_call_ID);
+	else
+		sk_acceptq_added(&rx->sk);
+
+	spin_lock(&conn->state_lock);
+	switch (conn->state) {
+	case RXRPC_CONN_SERVICE_UNSECURED:
+		conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+		set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
+		rxrpc_queue_conn(call->conn);
+		break;
+
+	case RXRPC_CONN_SERVICE:
+		write_lock(&call->state_lock);
+		if (rx->discard_new_call)
+			call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+		else
+			call->state = RXRPC_CALL_SERVER_ACCEPTING;
+		write_unlock(&call->state_lock);
+		break;
+
+	case RXRPC_CONN_REMOTELY_ABORTED:
+		rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+					  conn->remote_abort, ECONNABORTED);
+		break;
+	case RXRPC_CONN_LOCALLY_ABORTED:
+		rxrpc_abort_call("CON", call, sp->hdr.seq,
+				 conn->local_abort, ECONNABORTED);
+		break;
 	default:
 		BUG();
 	}
+	spin_unlock(&conn->state_lock);
 
-backlog_full:
-	read_unlock_bh(&local->services_lock);
-busy:
-	rxrpc_busy(local, &srx, &whdr);
-	rxrpc_free_skb(skb);
-	return;
+	if (call->state == RXRPC_CALL_SERVER_ACCEPTING)
+		rxrpc_notify_socket(call);
 
-drop:
-	rxrpc_free_skb(skb);
-	return;
+	/* We have to discard the prealloc queue's ref here and rely on a
+	 * combination of the RCU read lock and refs held either by the socket
+	 * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel
+	 * service to prevent the call from being deallocated too early.
+	 */
+	rxrpc_put_call(call, rxrpc_call_put);
 
-invalid_service:
-	skb->priority = RX_INVALID_OPERATION;
-	rxrpc_reject_packet(local, skb);
-	return;
-
-	/* can't change connection security type mid-flow */
-security_mismatch:
-	skb->priority = RX_PROTOCOL_ERROR;
-	rxrpc_reject_packet(local, skb);
-	return;
+	_leave(" = %p{%d}", call, call->debug_id);
+out:
+	spin_unlock(&rx->incoming_lock);
+	return call;
 }
 
 /*
@@ -285,7 +430,8 @@
  * - assign the user call ID to the call at the front of the queue
  */
 struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
-				     unsigned long user_call_ID)
+				     unsigned long user_call_ID,
+				     rxrpc_notify_rx_t notify_rx)
 {
 	struct rxrpc_call *call;
 	struct rb_node *parent, **pp;
@@ -297,12 +443,13 @@
 
 	write_lock(&rx->call_lock);
 
-	ret = -ENODATA;
-	if (list_empty(&rx->acceptq))
-		goto out;
+	if (list_empty(&rx->to_be_accepted)) {
+		write_unlock(&rx->call_lock);
+		kleave(" = -ENODATA [empty]");
+		return ERR_PTR(-ENODATA);
+	}
 
 	/* check the user ID isn't already in use */
-	ret = -EBADSLT;
 	pp = &rx->calls.rb_node;
 	parent = NULL;
 	while (*pp) {
@@ -314,62 +461,59 @@
 		else if (user_call_ID > call->user_call_ID)
 			pp = &(*pp)->rb_right;
 		else
-			goto out;
+			goto id_in_use;
 	}
 
-	/* dequeue the first call and check it's still valid */
-	call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+	/* Dequeue the first call and check it's still valid.  We gain
+	 * responsibility for the queue's reference.
+	 */
+	call = list_entry(rx->to_be_accepted.next,
+			  struct rxrpc_call, accept_link);
 	list_del_init(&call->accept_link);
 	sk_acceptq_removed(&rx->sk);
+	rxrpc_see_call(call);
 
 	write_lock_bh(&call->state_lock);
 	switch (call->state) {
 	case RXRPC_CALL_SERVER_ACCEPTING:
 		call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
 		break;
-	case RXRPC_CALL_REMOTELY_ABORTED:
-	case RXRPC_CALL_LOCALLY_ABORTED:
-		ret = -ECONNABORTED;
+	case RXRPC_CALL_COMPLETE:
+		ret = call->error;
 		goto out_release;
-	case RXRPC_CALL_NETWORK_ERROR:
-		ret = call->conn->error;
-		goto out_release;
-	case RXRPC_CALL_DEAD:
-		ret = -ETIME;
-		goto out_discard;
 	default:
 		BUG();
 	}
 
 	/* formalise the acceptance */
+	call->notify_rx = notify_rx;
 	call->user_call_ID = user_call_ID;
+	rxrpc_get_call(call, rxrpc_call_got_userid);
 	rb_link_node(&call->sock_node, parent, pp);
 	rb_insert_color(&call->sock_node, &rx->calls);
 	if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
 		BUG();
-	if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events))
-		BUG();
-	rxrpc_queue_call(call);
 
-	rxrpc_get_call(call);
 	write_unlock_bh(&call->state_lock);
 	write_unlock(&rx->call_lock);
+	rxrpc_notify_socket(call);
+	rxrpc_service_prealloc(rx, GFP_KERNEL);
 	_leave(" = %p{%d}", call, call->debug_id);
 	return call;
 
-	/* if the call is already dying or dead, then we leave the socket's ref
-	 * on it to be released by rxrpc_dead_call_expired() as induced by
-	 * rxrpc_release_call() */
 out_release:
 	_debug("release %p", call);
-	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-	    !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-		rxrpc_queue_call(call);
-out_discard:
 	write_unlock_bh(&call->state_lock);
-	_debug("discard %p", call);
-out:
 	write_unlock(&rx->call_lock);
+	rxrpc_release_call(rx, call);
+	rxrpc_put_call(call, rxrpc_call_put);
+	goto out;
+
+id_in_use:
+	ret = -EBADSLT;
+	write_unlock(&rx->call_lock);
+out:
+	rxrpc_service_prealloc(rx, GFP_KERNEL);
 	_leave(" = %d", ret);
 	return ERR_PTR(ret);
 }
@@ -381,6 +525,7 @@
 int rxrpc_reject_call(struct rxrpc_sock *rx)
 {
 	struct rxrpc_call *call;
+	bool abort = false;
 	int ret;
 
 	_enter("");
@@ -389,88 +534,73 @@
 
 	write_lock(&rx->call_lock);
 
-	ret = -ENODATA;
-	if (list_empty(&rx->acceptq))
-		goto out;
+	if (list_empty(&rx->to_be_accepted)) {
+		write_unlock(&rx->call_lock);
+		return -ENODATA;
+	}
 
-	/* dequeue the first call and check it's still valid */
-	call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+	/* Dequeue the first call and check it's still valid.  We gain
+	 * responsibility for the queue's reference.
+	 */
+	call = list_entry(rx->to_be_accepted.next,
+			  struct rxrpc_call, accept_link);
 	list_del_init(&call->accept_link);
 	sk_acceptq_removed(&rx->sk);
+	rxrpc_see_call(call);
 
 	write_lock_bh(&call->state_lock);
 	switch (call->state) {
 	case RXRPC_CALL_SERVER_ACCEPTING:
-		call->state = RXRPC_CALL_SERVER_BUSY;
-		if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events))
-			rxrpc_queue_call(call);
-		ret = 0;
-		goto out_release;
-	case RXRPC_CALL_REMOTELY_ABORTED:
-	case RXRPC_CALL_LOCALLY_ABORTED:
-		ret = -ECONNABORTED;
-		goto out_release;
-	case RXRPC_CALL_NETWORK_ERROR:
-		ret = call->conn->error;
-		goto out_release;
-	case RXRPC_CALL_DEAD:
-		ret = -ETIME;
+		__rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED);
+		abort = true;
+		/* fall through */
+	case RXRPC_CALL_COMPLETE:
+		ret = call->error;
 		goto out_discard;
 	default:
 		BUG();
 	}
 
-	/* if the call is already dying or dead, then we leave the socket's ref
-	 * on it to be released by rxrpc_dead_call_expired() as induced by
-	 * rxrpc_release_call() */
-out_release:
-	_debug("release %p", call);
-	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-	    !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-		rxrpc_queue_call(call);
 out_discard:
 	write_unlock_bh(&call->state_lock);
-	_debug("discard %p", call);
-out:
 	write_unlock(&rx->call_lock);
+	if (abort) {
+		rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+		rxrpc_release_call(rx, call);
+		rxrpc_put_call(call, rxrpc_call_put);
+	}
+	rxrpc_service_prealloc(rx, GFP_KERNEL);
 	_leave(" = %d", ret);
 	return ret;
 }
 
-/**
- * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call
- * @sock: The socket on which the impending call is waiting
- * @user_call_ID: The tag to attach to the call
+/*
+ * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls
+ * @sock: The socket on which to preallocate
+ * @notify_rx: Event notification function for the call
+ * @user_attach_call: Func to attach call to user_call_ID
+ * @user_call_ID: The tag to attach to the preallocated call
+ * @gfp: The allocation conditions.
  *
- * Allow a kernel service to accept an incoming call, assuming the incoming
- * call is still valid.
- */
-struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
-					    unsigned long user_call_ID)
-{
-	struct rxrpc_call *call;
-
-	_enter(",%lx", user_call_ID);
-	call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID);
-	_leave(" = %p", call);
-	return call;
-}
-EXPORT_SYMBOL(rxrpc_kernel_accept_call);
-
-/**
- * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call
- * @sock: The socket on which the impending call is waiting
+ * Charge up the socket with preallocated calls, each with a user ID.  A
+ * function should be provided to effect the attachment from the user's side.
+ * The user is given a ref to hold on the call.
  *
- * Allow a kernel service to reject an incoming call with a BUSY message,
- * assuming the incoming call is still valid.
+ * Note that the call may be come connected before this function returns.
  */
-int rxrpc_kernel_reject_call(struct socket *sock)
+int rxrpc_kernel_charge_accept(struct socket *sock,
+			       rxrpc_notify_rx_t notify_rx,
+			       rxrpc_user_attach_call_t user_attach_call,
+			       unsigned long user_call_ID, gfp_t gfp)
 {
-	int ret;
+	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+	struct rxrpc_backlog *b = rx->backlog;
 
-	_enter("");
-	ret = rxrpc_reject_call(rxrpc_sk(sock->sk));
-	_leave(" = %d", ret);
-	return ret;
+	if (sock->sk->sk_state == RXRPC_CLOSE)
+		return -ESHUTDOWN;
+
+	return rxrpc_service_prealloc_one(rx, b, notify_rx,
+					  user_attach_call, user_call_ID,
+					  gfp);
 }
-EXPORT_SYMBOL(rxrpc_kernel_reject_call);
+EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index fc32aa5..4f00476 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -22,1274 +22,351 @@
 #include "ar-internal.h"
 
 /*
+ * Set the timer
+ */
+void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
+		     ktime_t now)
+{
+	unsigned long t_j, now_j = jiffies;
+	ktime_t t;
+	bool queue = false;
+
+	read_lock_bh(&call->state_lock);
+
+	if (call->state < RXRPC_CALL_COMPLETE) {
+		t = call->expire_at;
+		if (!ktime_after(t, now))
+			goto out;
+
+		if (!ktime_after(call->resend_at, now)) {
+			call->resend_at = call->expire_at;
+			if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+				queue = true;
+		} else if (ktime_before(call->resend_at, t)) {
+			t = call->resend_at;
+		}
+
+		if (!ktime_after(call->ack_at, now)) {
+			call->ack_at = call->expire_at;
+			if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
+				queue = true;
+		} else if (ktime_before(call->ack_at, t)) {
+			t = call->ack_at;
+		}
+
+		t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now)));
+		t_j += jiffies;
+
+		/* We have to make sure that the calculated jiffies value falls
+		 * at or after the nsec value, or we may loop ceaselessly
+		 * because the timer times out, but we haven't reached the nsec
+		 * timeout yet.
+		 */
+		t_j++;
+
+		if (call->timer.expires != t_j || !timer_pending(&call->timer)) {
+			mod_timer(&call->timer, t_j);
+			trace_rxrpc_timer(call, why, now, now_j);
+		}
+
+		if (queue)
+			rxrpc_queue_call(call);
+	}
+
+out:
+	read_unlock_bh(&call->state_lock);
+}
+
+/*
  * propose an ACK be sent
  */
-void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-			 u32 serial, bool immediate)
+static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
+				u16 skew, u32 serial, bool immediate,
+				bool background,
+				enum rxrpc_propose_ack_trace why)
 {
-	unsigned long expiry;
+	enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
+	unsigned int expiry = rxrpc_soft_ack_delay;
+	ktime_t now, ack_at;
 	s8 prior = rxrpc_ack_priority[ack_reason];
 
-	ASSERTCMP(prior, >, 0);
-
-	_enter("{%d},%s,%%%x,%u",
-	       call->debug_id, rxrpc_acks(ack_reason), serial, immediate);
-
-	if (prior < rxrpc_ack_priority[call->ackr_reason]) {
-		if (immediate)
-			goto cancel_timer;
-		return;
-	}
-
-	/* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
-	 * numbers */
-	if (prior == rxrpc_ack_priority[call->ackr_reason]) {
-		if (prior <= 4)
+	/* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
+	 * numbers, but we don't alter the timeout.
+	 */
+	_debug("prior %u %u vs %u %u",
+	       ack_reason, prior,
+	       call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]);
+	if (ack_reason == call->ackr_reason) {
+		if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) {
+			outcome = rxrpc_propose_ack_update;
 			call->ackr_serial = serial;
-		if (immediate)
-			goto cancel_timer;
-		return;
+			call->ackr_skew = skew;
+		}
+		if (!immediate)
+			goto trace;
+	} else if (prior > rxrpc_ack_priority[call->ackr_reason]) {
+		call->ackr_reason = ack_reason;
+		call->ackr_serial = serial;
+		call->ackr_skew = skew;
+	} else {
+		outcome = rxrpc_propose_ack_subsume;
 	}
 
-	call->ackr_reason = ack_reason;
-	call->ackr_serial = serial;
-
 	switch (ack_reason) {
-	case RXRPC_ACK_DELAY:
-		_debug("run delay timer");
-		expiry = rxrpc_soft_ack_delay;
-		goto run_timer;
-
-	case RXRPC_ACK_IDLE:
-		if (!immediate) {
-			_debug("run defer timer");
-			expiry = rxrpc_idle_ack_delay;
-			goto run_timer;
-		}
-		goto cancel_timer;
-
 	case RXRPC_ACK_REQUESTED:
-		expiry = rxrpc_requested_ack_delay;
-		if (!expiry)
-			goto cancel_timer;
-		if (!immediate || serial == 1) {
-			_debug("run defer timer");
-			goto run_timer;
-		}
+		if (rxrpc_requested_ack_delay < expiry)
+			expiry = rxrpc_requested_ack_delay;
+		if (serial == 1)
+			immediate = false;
+		break;
+
+	case RXRPC_ACK_DELAY:
+		if (rxrpc_soft_ack_delay < expiry)
+			expiry = rxrpc_soft_ack_delay;
+		break;
+
+	case RXRPC_ACK_PING:
+	case RXRPC_ACK_IDLE:
+		if (rxrpc_idle_ack_delay < expiry)
+			expiry = rxrpc_idle_ack_delay;
+		break;
 
 	default:
-		_debug("immediate ACK");
-		goto cancel_timer;
+		immediate = true;
+		break;
 	}
 
-run_timer:
-	expiry += jiffies;
-	if (!timer_pending(&call->ack_timer) ||
-	    time_after(call->ack_timer.expires, expiry))
-		mod_timer(&call->ack_timer, expiry);
-	return;
+	if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
+		_debug("already scheduled");
+	} else if (immediate || expiry == 0) {
+		_debug("immediate ACK %lx", call->events);
+		if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) &&
+		    background)
+			rxrpc_queue_call(call);
+	} else {
+		now = ktime_get_real();
+		ack_at = ktime_add_ms(now, expiry);
+		if (ktime_before(ack_at, call->ack_at)) {
+			call->ack_at = ack_at;
+			rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now);
+		}
+	}
 
-cancel_timer:
-	_debug("cancel timer %%%u", serial);
-	try_to_del_timer_sync(&call->ack_timer);
-	read_lock_bh(&call->state_lock);
-	if (call->state <= RXRPC_CALL_COMPLETE &&
-	    !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
-		rxrpc_queue_call(call);
-	read_unlock_bh(&call->state_lock);
+trace:
+	trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate,
+				background, outcome);
 }
 
 /*
  * propose an ACK be sent, locking the call structure
  */
 void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
-		       u32 serial, bool immediate)
+		       u16 skew, u32 serial, bool immediate, bool background,
+		       enum rxrpc_propose_ack_trace why)
 {
-	s8 prior = rxrpc_ack_priority[ack_reason];
-
-	if (prior > rxrpc_ack_priority[call->ackr_reason]) {
-		spin_lock_bh(&call->lock);
-		__rxrpc_propose_ACK(call, ack_reason, serial, immediate);
-		spin_unlock_bh(&call->lock);
-	}
+	spin_lock_bh(&call->lock);
+	__rxrpc_propose_ACK(call, ack_reason, skew, serial,
+			    immediate, background, why);
+	spin_unlock_bh(&call->lock);
 }
 
 /*
- * set the resend timer
+ * Handle congestion being detected by the retransmit timeout.
  */
-static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
-			     unsigned long resend_at)
+static void rxrpc_congestion_timeout(struct rxrpc_call *call)
 {
-	read_lock_bh(&call->state_lock);
-	if (call->state >= RXRPC_CALL_COMPLETE)
-		resend = 0;
-
-	if (resend & 1) {
-		_debug("SET RESEND");
-		set_bit(RXRPC_CALL_EV_RESEND, &call->events);
-	}
-
-	if (resend & 2) {
-		_debug("MODIFY RESEND TIMER");
-		set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-		mod_timer(&call->resend_timer, resend_at);
-	} else {
-		_debug("KILL RESEND TIMER");
-		del_timer_sync(&call->resend_timer);
-		clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
-		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-	}
-	read_unlock_bh(&call->state_lock);
+	set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags);
 }
 
 /*
- * resend packets
+ * Perform retransmission of NAK'd and unack'd packets.
  */
-static void rxrpc_resend(struct rxrpc_call *call)
+static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
 {
-	struct rxrpc_wire_header *whdr;
 	struct rxrpc_skb_priv *sp;
-	struct sk_buff *txb;
-	unsigned long *p_txb, resend_at;
-	bool stop;
-	int loop;
-	u8 resend;
+	struct sk_buff *skb;
+	rxrpc_seq_t cursor, seq, top;
+	ktime_t max_age, oldest, ack_ts;
+	int ix;
+	u8 annotation, anno_type, retrans = 0, unacked = 0;
 
-	_enter("{%d,%d,%d,%d},",
-	       call->acks_hard, call->acks_unacked,
-	       atomic_read(&call->sequence),
-	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
+	_enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
 
-	stop = false;
-	resend = 0;
-	resend_at = 0;
+	max_age = ktime_sub_ms(now, rxrpc_resend_timeout);
 
-	for (loop = call->acks_tail;
-	     loop != call->acks_head || stop;
-	     loop = (loop + 1) &  (call->acks_winsz - 1)
-	     ) {
-		p_txb = call->acks_window + loop;
-		smp_read_barrier_depends();
-		if (*p_txb & 1)
+	spin_lock_bh(&call->lock);
+
+	cursor = call->tx_hard_ack;
+	top = call->tx_top;
+	ASSERT(before_eq(cursor, top));
+	if (cursor == top)
+		goto out_unlock;
+
+	/* Scan the packet list without dropping the lock and decide which of
+	 * the packets in the Tx buffer we're going to resend and what the new
+	 * resend timeout will be.
+	 */
+	oldest = now;
+	for (seq = cursor + 1; before_eq(seq, top); seq++) {
+		ix = seq & RXRPC_RXTX_BUFF_MASK;
+		annotation = call->rxtx_annotations[ix];
+		anno_type = annotation & RXRPC_TX_ANNO_MASK;
+		annotation &= ~RXRPC_TX_ANNO_MASK;
+		if (anno_type == RXRPC_TX_ANNO_ACK)
 			continue;
 
-		txb = (struct sk_buff *) *p_txb;
-		sp = rxrpc_skb(txb);
+		skb = call->rxtx_buffer[ix];
+		rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
+		sp = rxrpc_skb(skb);
 
-		if (sp->need_resend) {
-			sp->need_resend = false;
-
-			/* each Tx packet has a new serial number */
-			sp->hdr.serial = atomic_inc_return(&call->conn->serial);
-
-			whdr = (struct rxrpc_wire_header *)txb->head;
-			whdr->serial = htonl(sp->hdr.serial);
-
-			_proto("Tx DATA %%%u { #%d }",
-			       sp->hdr.serial, sp->hdr.seq);
-			if (rxrpc_send_data_packet(call->conn, txb) < 0) {
-				stop = true;
-				sp->resend_at = jiffies + 3;
-			} else {
-				sp->resend_at =
-					jiffies + rxrpc_resend_timeout;
+		if (anno_type == RXRPC_TX_ANNO_UNACK) {
+			if (ktime_after(skb->tstamp, max_age)) {
+				if (ktime_before(skb->tstamp, oldest))
+					oldest = skb->tstamp;
+				continue;
 			}
+			if (!(annotation & RXRPC_TX_ANNO_RESENT))
+				unacked++;
 		}
 
-		if (time_after_eq(jiffies + 1, sp->resend_at)) {
-			sp->need_resend = true;
-			resend |= 1;
-		} else if (resend & 2) {
-			if (time_before(sp->resend_at, resend_at))
-				resend_at = sp->resend_at;
-		} else {
-			resend_at = sp->resend_at;
-			resend |= 2;
-		}
+		/* Okay, we need to retransmit a packet. */
+		call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation;
+		retrans++;
+		trace_rxrpc_retransmit(call, seq, annotation | anno_type,
+				       ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
 	}
 
-	rxrpc_set_resend(call, resend, resend_at);
+	call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout);
+
+	if (unacked)
+		rxrpc_congestion_timeout(call);
+
+	/* If there was nothing that needed retransmission then it's likely
+	 * that an ACK got lost somewhere.  Send a ping to find out instead of
+	 * retransmitting data.
+	 */
+	if (!retrans) {
+		rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
+		spin_unlock_bh(&call->lock);
+		ack_ts = ktime_sub(now, call->acks_latest_ts);
+		if (ktime_to_ns(ack_ts) < call->peer->rtt)
+			goto out;
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+				  rxrpc_propose_ack_ping_for_lost_ack);
+		rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+		goto out;
+	}
+
+	/* Now go through the Tx window and perform the retransmissions.  We
+	 * have to drop the lock for each send.  If an ACK comes in whilst the
+	 * lock is dropped, it may clear some of the retransmission markers for
+	 * packets that it soft-ACKs.
+	 */
+	for (seq = cursor + 1; before_eq(seq, top); seq++) {
+		ix = seq & RXRPC_RXTX_BUFF_MASK;
+		annotation = call->rxtx_annotations[ix];
+		anno_type = annotation & RXRPC_TX_ANNO_MASK;
+		if (anno_type != RXRPC_TX_ANNO_RETRANS)
+			continue;
+
+		skb = call->rxtx_buffer[ix];
+		rxrpc_get_skb(skb, rxrpc_skb_tx_got);
+		spin_unlock_bh(&call->lock);
+
+		if (rxrpc_send_data_packet(call, skb, true) < 0) {
+			rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+			return;
+		}
+
+		if (rxrpc_is_client_call(call))
+			rxrpc_expose_client_call(call);
+
+		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+		spin_lock_bh(&call->lock);
+
+		/* We need to clear the retransmit state, but there are two
+		 * things we need to be aware of: A new ACK/NAK might have been
+		 * received and the packet might have been hard-ACK'd (in which
+		 * case it will no longer be in the buffer).
+		 */
+		if (after(seq, call->tx_hard_ack)) {
+			annotation = call->rxtx_annotations[ix];
+			anno_type = annotation & RXRPC_TX_ANNO_MASK;
+			if (anno_type == RXRPC_TX_ANNO_RETRANS ||
+			    anno_type == RXRPC_TX_ANNO_NAK) {
+				annotation &= ~RXRPC_TX_ANNO_MASK;
+				annotation |= RXRPC_TX_ANNO_UNACK;
+			}
+			annotation |= RXRPC_TX_ANNO_RESENT;
+			call->rxtx_annotations[ix] = annotation;
+		}
+
+		if (after(call->tx_hard_ack, seq))
+			seq = call->tx_hard_ack;
+	}
+
+out_unlock:
+	spin_unlock_bh(&call->lock);
+out:
 	_leave("");
 }
 
 /*
- * handle resend timer expiry
- */
-static void rxrpc_resend_timer(struct rxrpc_call *call)
-{
-	struct rxrpc_skb_priv *sp;
-	struct sk_buff *txb;
-	unsigned long *p_txb, resend_at;
-	int loop;
-	u8 resend;
-
-	_enter("%d,%d,%d",
-	       call->acks_tail, call->acks_unacked, call->acks_head);
-
-	if (call->state >= RXRPC_CALL_COMPLETE)
-		return;
-
-	resend = 0;
-	resend_at = 0;
-
-	for (loop = call->acks_unacked;
-	     loop != call->acks_head;
-	     loop = (loop + 1) &  (call->acks_winsz - 1)
-	     ) {
-		p_txb = call->acks_window + loop;
-		smp_read_barrier_depends();
-		txb = (struct sk_buff *) (*p_txb & ~1);
-		sp = rxrpc_skb(txb);
-
-		ASSERT(!(*p_txb & 1));
-
-		if (sp->need_resend) {
-			;
-		} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
-			sp->need_resend = true;
-			resend |= 1;
-		} else if (resend & 2) {
-			if (time_before(sp->resend_at, resend_at))
-				resend_at = sp->resend_at;
-		} else {
-			resend_at = sp->resend_at;
-			resend |= 2;
-		}
-	}
-
-	rxrpc_set_resend(call, resend, resend_at);
-	_leave("");
-}
-
-/*
- * process soft ACKs of our transmitted packets
- * - these indicate packets the peer has or has not received, but hasn't yet
- *   given to the consumer, and so can still be discarded and re-requested
- */
-static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
-				   struct rxrpc_ackpacket *ack,
-				   struct sk_buff *skb)
-{
-	struct rxrpc_skb_priv *sp;
-	struct sk_buff *txb;
-	unsigned long *p_txb, resend_at;
-	int loop;
-	u8 sacks[RXRPC_MAXACKS], resend;
-
-	_enter("{%d,%d},{%d},",
-	       call->acks_hard,
-	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
-	       ack->nAcks);
-
-	if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
-		goto protocol_error;
-
-	resend = 0;
-	resend_at = 0;
-	for (loop = 0; loop < ack->nAcks; loop++) {
-		p_txb = call->acks_window;
-		p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
-		smp_read_barrier_depends();
-		txb = (struct sk_buff *) (*p_txb & ~1);
-		sp = rxrpc_skb(txb);
-
-		switch (sacks[loop]) {
-		case RXRPC_ACK_TYPE_ACK:
-			sp->need_resend = false;
-			*p_txb |= 1;
-			break;
-		case RXRPC_ACK_TYPE_NACK:
-			sp->need_resend = true;
-			*p_txb &= ~1;
-			resend = 1;
-			break;
-		default:
-			_debug("Unsupported ACK type %d", sacks[loop]);
-			goto protocol_error;
-		}
-	}
-
-	smp_mb();
-	call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);
-
-	/* anything not explicitly ACK'd is implicitly NACK'd, but may just not
-	 * have been received or processed yet by the far end */
-	for (loop = call->acks_unacked;
-	     loop != call->acks_head;
-	     loop = (loop + 1) &  (call->acks_winsz - 1)
-	     ) {
-		p_txb = call->acks_window + loop;
-		smp_read_barrier_depends();
-		txb = (struct sk_buff *) (*p_txb & ~1);
-		sp = rxrpc_skb(txb);
-
-		if (*p_txb & 1) {
-			/* packet must have been discarded */
-			sp->need_resend = true;
-			*p_txb &= ~1;
-			resend |= 1;
-		} else if (sp->need_resend) {
-			;
-		} else if (time_after_eq(jiffies + 1, sp->resend_at)) {
-			sp->need_resend = true;
-			resend |= 1;
-		} else if (resend & 2) {
-			if (time_before(sp->resend_at, resend_at))
-				resend_at = sp->resend_at;
-		} else {
-			resend_at = sp->resend_at;
-			resend |= 2;
-		}
-	}
-
-	rxrpc_set_resend(call, resend, resend_at);
-	_leave(" = 0");
-	return 0;
-
-protocol_error:
-	_leave(" = -EPROTO");
-	return -EPROTO;
-}
-
-/*
- * discard hard-ACK'd packets from the Tx window
- */
-static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
-{
-	unsigned long _skb;
-	int tail = call->acks_tail, old_tail;
-	int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
-
-	_enter("{%u,%u},%u", call->acks_hard, win, hard);
-
-	ASSERTCMP(hard - call->acks_hard, <=, win);
-
-	while (call->acks_hard < hard) {
-		smp_read_barrier_depends();
-		_skb = call->acks_window[tail] & ~1;
-		rxrpc_free_skb((struct sk_buff *) _skb);
-		old_tail = tail;
-		tail = (tail + 1) & (call->acks_winsz - 1);
-		call->acks_tail = tail;
-		if (call->acks_unacked == old_tail)
-			call->acks_unacked = tail;
-		call->acks_hard++;
-	}
-
-	wake_up(&call->tx_waitq);
-}
-
-/*
- * clear the Tx window in the event of a failure
- */
-static void rxrpc_clear_tx_window(struct rxrpc_call *call)
-{
-	rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
-}
-
-/*
- * drain the out of sequence received packet queue into the packet Rx queue
- */
-static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
-{
-	struct rxrpc_skb_priv *sp;
-	struct sk_buff *skb;
-	bool terminal;
-	int ret;
-
-	_enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);
-
-	spin_lock_bh(&call->lock);
-
-	ret = -ECONNRESET;
-	if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
-		goto socket_unavailable;
-
-	skb = skb_dequeue(&call->rx_oos_queue);
-	if (skb) {
-		sp = rxrpc_skb(skb);
-
-		_debug("drain OOS packet %d [%d]",
-		       sp->hdr.seq, call->rx_first_oos);
-
-		if (sp->hdr.seq != call->rx_first_oos) {
-			skb_queue_head(&call->rx_oos_queue, skb);
-			call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
-			_debug("requeue %p {%u}", skb, call->rx_first_oos);
-		} else {
-			skb->mark = RXRPC_SKB_MARK_DATA;
-			terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
-				!(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
-			ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
-			BUG_ON(ret < 0);
-			_debug("drain #%u", call->rx_data_post);
-			call->rx_data_post++;
-
-			/* find out what the next packet is */
-			skb = skb_peek(&call->rx_oos_queue);
-			if (skb)
-				call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
-			else
-				call->rx_first_oos = 0;
-			_debug("peek %p {%u}", skb, call->rx_first_oos);
-		}
-	}
-
-	ret = 0;
-socket_unavailable:
-	spin_unlock_bh(&call->lock);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * insert an out of sequence packet into the buffer
- */
-static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
-				    struct sk_buff *skb)
-{
-	struct rxrpc_skb_priv *sp, *psp;
-	struct sk_buff *p;
-	u32 seq;
-
-	sp = rxrpc_skb(skb);
-	seq = sp->hdr.seq;
-	_enter(",,{%u}", seq);
-
-	skb->destructor = rxrpc_packet_destructor;
-	ASSERTCMP(sp->call, ==, NULL);
-	sp->call = call;
-	rxrpc_get_call(call);
-
-	/* insert into the buffer in sequence order */
-	spin_lock_bh(&call->lock);
-
-	skb_queue_walk(&call->rx_oos_queue, p) {
-		psp = rxrpc_skb(p);
-		if (psp->hdr.seq > seq) {
-			_debug("insert oos #%u before #%u", seq, psp->hdr.seq);
-			skb_insert(p, skb, &call->rx_oos_queue);
-			goto inserted;
-		}
-	}
-
-	_debug("append oos #%u", seq);
-	skb_queue_tail(&call->rx_oos_queue, skb);
-inserted:
-
-	/* we might now have a new front to the queue */
-	if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
-		call->rx_first_oos = seq;
-
-	read_lock(&call->state_lock);
-	if (call->state < RXRPC_CALL_COMPLETE &&
-	    call->rx_data_post == call->rx_first_oos) {
-		_debug("drain rx oos now");
-		set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
-	}
-	read_unlock(&call->state_lock);
-
-	spin_unlock_bh(&call->lock);
-	_leave(" [stored #%u]", call->rx_first_oos);
-}
-
-/*
- * clear the Tx window on final ACK reception
- */
-static void rxrpc_zap_tx_window(struct rxrpc_call *call)
-{
-	struct rxrpc_skb_priv *sp;
-	struct sk_buff *skb;
-	unsigned long _skb, *acks_window;
-	u8 winsz = call->acks_winsz;
-	int tail;
-
-	acks_window = call->acks_window;
-	call->acks_window = NULL;
-
-	while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
-		tail = call->acks_tail;
-		smp_read_barrier_depends();
-		_skb = acks_window[tail] & ~1;
-		smp_mb();
-		call->acks_tail = (call->acks_tail + 1) & (winsz - 1);
-
-		skb = (struct sk_buff *) _skb;
-		sp = rxrpc_skb(skb);
-		_debug("+++ clear Tx %u", sp->hdr.seq);
-		rxrpc_free_skb(skb);
-	}
-
-	kfree(acks_window);
-}
-
-/*
- * process the extra information that may be appended to an ACK packet
- */
-static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
-				  unsigned int latest, int nAcks)
-{
-	struct rxrpc_ackinfo ackinfo;
-	struct rxrpc_peer *peer;
-	unsigned int mtu;
-
-	if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
-		_leave(" [no ackinfo]");
-		return;
-	}
-
-	_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
-	       latest,
-	       ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
-	       ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));
-
-	mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
-
-	peer = call->conn->params.peer;
-	if (mtu < peer->maxdata) {
-		spin_lock_bh(&peer->lock);
-		peer->maxdata = mtu;
-		peer->mtu = mtu + peer->hdrsize;
-		spin_unlock_bh(&peer->lock);
-		_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
-	}
-}
-
-/*
- * process packets in the reception queue
- */
-static int rxrpc_process_rx_queue(struct rxrpc_call *call,
-				  u32 *_abort_code)
-{
-	struct rxrpc_ackpacket ack;
-	struct rxrpc_skb_priv *sp;
-	struct sk_buff *skb;
-	bool post_ACK;
-	int latest;
-	u32 hard, tx;
-
-	_enter("");
-
-process_further:
-	skb = skb_dequeue(&call->rx_queue);
-	if (!skb)
-		return -EAGAIN;
-
-	_net("deferred skb %p", skb);
-
-	sp = rxrpc_skb(skb);
-
-	_debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);
-
-	post_ACK = false;
-
-	switch (sp->hdr.type) {
-		/* data packets that wind up here have been received out of
-		 * order, need security processing or are jumbo packets */
-	case RXRPC_PACKET_TYPE_DATA:
-		_proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
-
-		/* secured packets must be verified and possibly decrypted */
-		if (call->conn->security->verify_packet(call, skb,
-							_abort_code) < 0)
-			goto protocol_error;
-
-		rxrpc_insert_oos_packet(call, skb);
-		goto process_further;
-
-		/* partial ACK to process */
-	case RXRPC_PACKET_TYPE_ACK:
-		if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
-			_debug("extraction failure");
-			goto protocol_error;
-		}
-		if (!skb_pull(skb, sizeof(ack)))
-			BUG();
-
-		latest = sp->hdr.serial;
-		hard = ntohl(ack.firstPacket);
-		tx = atomic_read(&call->sequence);
-
-		_proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-		       latest,
-		       ntohs(ack.maxSkew),
-		       hard,
-		       ntohl(ack.previousPacket),
-		       ntohl(ack.serial),
-		       rxrpc_acks(ack.reason),
-		       ack.nAcks);
-
-		rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);
-
-		if (ack.reason == RXRPC_ACK_PING) {
-			_proto("Rx ACK %%%u PING Request", latest);
-			rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
-					  sp->hdr.serial, true);
-		}
-
-		/* discard any out-of-order or duplicate ACKs */
-		if (latest - call->acks_latest <= 0) {
-			_debug("discard ACK %d <= %d",
-			       latest, call->acks_latest);
-			goto discard;
-		}
-		call->acks_latest = latest;
-
-		if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-		    call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
-		    call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
-		    call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
-			goto discard;
-
-		_debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);
-
-		if (hard > 0) {
-			if (hard - 1 > tx) {
-				_debug("hard-ACK'd packet %d not transmitted"
-				       " (%d top)",
-				       hard - 1, tx);
-				goto protocol_error;
-			}
-
-			if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
-			     call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
-			    hard > tx) {
-				call->acks_hard = tx;
-				goto all_acked;
-			}
-
-			smp_rmb();
-			rxrpc_rotate_tx_window(call, hard - 1);
-		}
-
-		if (ack.nAcks > 0) {
-			if (hard - 1 + ack.nAcks > tx) {
-				_debug("soft-ACK'd packet %d+%d not"
-				       " transmitted (%d top)",
-				       hard - 1, ack.nAcks, tx);
-				goto protocol_error;
-			}
-
-			if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
-				goto protocol_error;
-		}
-		goto discard;
-
-		/* complete ACK to process */
-	case RXRPC_PACKET_TYPE_ACKALL:
-		goto all_acked;
-
-		/* abort and busy are handled elsewhere */
-	case RXRPC_PACKET_TYPE_BUSY:
-	case RXRPC_PACKET_TYPE_ABORT:
-		BUG();
-
-		/* connection level events - also handled elsewhere */
-	case RXRPC_PACKET_TYPE_CHALLENGE:
-	case RXRPC_PACKET_TYPE_RESPONSE:
-	case RXRPC_PACKET_TYPE_DEBUG:
-		BUG();
-	}
-
-	/* if we've had a hard ACK that covers all the packets we've sent, then
-	 * that ends that phase of the operation */
-all_acked:
-	write_lock_bh(&call->state_lock);
-	_debug("ack all %d", call->state);
-
-	switch (call->state) {
-	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
-		call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
-		break;
-	case RXRPC_CALL_SERVER_AWAIT_ACK:
-		_debug("srv complete");
-		call->state = RXRPC_CALL_COMPLETE;
-		post_ACK = true;
-		break;
-	case RXRPC_CALL_CLIENT_SEND_REQUEST:
-	case RXRPC_CALL_SERVER_RECV_REQUEST:
-		goto protocol_error_unlock; /* can't occur yet */
-	default:
-		write_unlock_bh(&call->state_lock);
-		goto discard; /* assume packet left over from earlier phase */
-	}
-
-	write_unlock_bh(&call->state_lock);
-
-	/* if all the packets we sent are hard-ACK'd, then we can discard
-	 * whatever we've got left */
-	_debug("clear Tx %d",
-	       CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
-
-	del_timer_sync(&call->resend_timer);
-	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-	clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
-
-	if (call->acks_window)
-		rxrpc_zap_tx_window(call);
-
-	if (post_ACK) {
-		/* post the final ACK message for userspace to pick up */
-		_debug("post ACK");
-		skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
-		sp->call = call;
-		rxrpc_get_call(call);
-		spin_lock_bh(&call->lock);
-		if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
-			BUG();
-		spin_unlock_bh(&call->lock);
-		goto process_further;
-	}
-
-discard:
-	rxrpc_free_skb(skb);
-	goto process_further;
-
-protocol_error_unlock:
-	write_unlock_bh(&call->state_lock);
-protocol_error:
-	rxrpc_free_skb(skb);
-	_leave(" = -EPROTO");
-	return -EPROTO;
-}
-
-/*
- * post a message to the socket Rx queue for recvmsg() to pick up
- */
-static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
-			      bool fatal)
-{
-	struct rxrpc_skb_priv *sp;
-	struct sk_buff *skb;
-	int ret;
-
-	_enter("{%d,%lx},%u,%u,%d",
-	       call->debug_id, call->flags, mark, error, fatal);
-
-	/* remove timers and things for fatal messages */
-	if (fatal) {
-		del_timer_sync(&call->resend_timer);
-		del_timer_sync(&call->ack_timer);
-		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-	}
-
-	if (mark != RXRPC_SKB_MARK_NEW_CALL &&
-	    !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
-		_leave("[no userid]");
-		return 0;
-	}
-
-	if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
-		skb = alloc_skb(0, GFP_NOFS);
-		if (!skb)
-			return -ENOMEM;
-
-		rxrpc_new_skb(skb);
-
-		skb->mark = mark;
-
-		sp = rxrpc_skb(skb);
-		memset(sp, 0, sizeof(*sp));
-		sp->error = error;
-		sp->call = call;
-		rxrpc_get_call(call);
-
-		spin_lock_bh(&call->lock);
-		ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
-		spin_unlock_bh(&call->lock);
-		BUG_ON(ret < 0);
-	}
-
-	return 0;
-}
-
-/*
- * handle background processing of incoming call packets and ACK / abort
- * generation
+ * Handle retransmission and deferred ACK/abort generation.
  */
 void rxrpc_process_call(struct work_struct *work)
 {
 	struct rxrpc_call *call =
 		container_of(work, struct rxrpc_call, processor);
-	struct rxrpc_wire_header whdr;
-	struct rxrpc_ackpacket ack;
-	struct rxrpc_ackinfo ackinfo;
-	struct msghdr msg;
-	struct kvec iov[5];
-	enum rxrpc_call_event genbit;
-	unsigned long bits;
-	__be32 data, pad;
-	size_t len;
-	int loop, nbit, ioc, ret, mtu;
-	u32 serial, abort_code = RX_PROTOCOL_ERROR;
-	u8 *acks = NULL;
+	ktime_t now;
+
+	rxrpc_see_call(call);
 
 	//printk("\n--------------------\n");
-	_enter("{%d,%s,%lx} [%lu]",
-	       call->debug_id, rxrpc_call_states[call->state], call->events,
-	       (jiffies - call->creation_jif) / (HZ / 10));
+	_enter("{%d,%s,%lx}",
+	       call->debug_id, rxrpc_call_states[call->state], call->events);
 
-	if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
-		_debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
-		return;
+recheck_state:
+	if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
+		rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+		goto recheck_state;
 	}
 
-	/* there's a good chance we're going to have to send a message, so set
-	 * one up in advance */
-	msg.msg_name	= &call->conn->params.peer->srx.transport;
-	msg.msg_namelen	= call->conn->params.peer->srx.transport_len;
-	msg.msg_control	= NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags	= 0;
-
-	whdr.epoch	= htonl(call->conn->proto.epoch);
-	whdr.cid	= htonl(call->cid);
-	whdr.callNumber	= htonl(call->call_id);
-	whdr.seq	= 0;
-	whdr.type	= RXRPC_PACKET_TYPE_ACK;
-	whdr.flags	= call->conn->out_clientflag;
-	whdr.userStatus	= 0;
-	whdr.securityIndex = call->conn->security_ix;
-	whdr._rsvd	= 0;
-	whdr.serviceId	= htons(call->service_id);
-
-	memset(iov, 0, sizeof(iov));
-	iov[0].iov_base	= &whdr;
-	iov[0].iov_len	= sizeof(whdr);
-
-	/* deal with events of a final nature */
-	if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
-		enum rxrpc_skb_mark mark;
-		int error;
-
-		clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
-		clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
-		clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
-
-		error = call->error_report;
-		if (error < RXRPC_LOCAL_ERROR_OFFSET) {
-			mark = RXRPC_SKB_MARK_NET_ERROR;
-			_debug("post net error %d", error);
-		} else {
-			mark = RXRPC_SKB_MARK_LOCAL_ERROR;
-			error -= RXRPC_LOCAL_ERROR_OFFSET;
-			_debug("post net local error %d", error);
-		}
-
-		if (rxrpc_post_message(call, mark, error, true) < 0)
-			goto no_mem;
-		clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
-		goto kill_ACKs;
+	if (call->state == RXRPC_CALL_COMPLETE) {
+		del_timer_sync(&call->timer);
+		goto out_put;
 	}
 
-	if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) {
-		ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
-
-		clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
-		clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
-
-		_debug("post conn abort");
-
-		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
-				       call->conn->error, true) < 0)
-			goto no_mem;
-		clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
-		goto kill_ACKs;
+	now = ktime_get_real();
+	if (ktime_before(call->expire_at, now)) {
+		rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
+		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+		goto recheck_state;
 	}
 
-	if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) {
-		whdr.type = RXRPC_PACKET_TYPE_BUSY;
-		genbit = RXRPC_CALL_EV_REJECT_BUSY;
-		goto send_message;
-	}
-
-	if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
-		ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
-
-		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
-				       ECONNABORTED, true) < 0)
-			goto no_mem;
-		whdr.type = RXRPC_PACKET_TYPE_ABORT;
-		data = htonl(call->local_abort);
-		iov[1].iov_base = &data;
-		iov[1].iov_len = sizeof(data);
-		genbit = RXRPC_CALL_EV_ABORT;
-		goto send_message;
-	}
-
-	if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) {
-		genbit = RXRPC_CALL_EV_ACK_FINAL;
-
-		ack.bufferSpace	= htons(8);
-		ack.maxSkew	= 0;
-		ack.serial	= 0;
-		ack.reason	= RXRPC_ACK_IDLE;
-		ack.nAcks	= 0;
-		call->ackr_reason = 0;
-
-		spin_lock_bh(&call->lock);
-		ack.serial	= htonl(call->ackr_serial);
-		ack.previousPacket = htonl(call->ackr_prev_seq);
-		ack.firstPacket	= htonl(call->rx_data_eaten + 1);
-		spin_unlock_bh(&call->lock);
-
-		pad = 0;
-
-		iov[1].iov_base = &ack;
-		iov[1].iov_len	= sizeof(ack);
-		iov[2].iov_base = &pad;
-		iov[2].iov_len	= 3;
-		iov[3].iov_base = &ackinfo;
-		iov[3].iov_len	= sizeof(ackinfo);
-		goto send_ACK;
-	}
-
-	if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) |
-			    (1 << RXRPC_CALL_EV_RCVD_ABORT))
-	    ) {
-		u32 mark;
-
-		if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events))
-			mark = RXRPC_SKB_MARK_REMOTE_ABORT;
-		else
-			mark = RXRPC_SKB_MARK_BUSY;
-
-		_debug("post abort/busy");
-		rxrpc_clear_tx_window(call);
-		if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
-			goto no_mem;
-
-		clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
-		clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
-		goto kill_ACKs;
-	}
-
-	if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) {
-		_debug("do implicit ackall");
-		rxrpc_clear_tx_window(call);
-	}
-
-	if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) {
-		write_lock_bh(&call->state_lock);
-		if (call->state <= RXRPC_CALL_COMPLETE) {
-			call->state = RXRPC_CALL_LOCALLY_ABORTED;
-			call->local_abort = RX_CALL_TIMEOUT;
-			set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-		}
-		write_unlock_bh(&call->state_lock);
-
-		_debug("post timeout");
-		if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
-				       ETIME, true) < 0)
-			goto no_mem;
-
-		clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
-		goto kill_ACKs;
-	}
-
-	/* deal with assorted inbound messages */
-	if (!skb_queue_empty(&call->rx_queue)) {
-		switch (rxrpc_process_rx_queue(call, &abort_code)) {
-		case 0:
-		case -EAGAIN:
-			break;
-		case -ENOMEM:
-			goto no_mem;
-		case -EKEYEXPIRED:
-		case -EKEYREJECTED:
-		case -EPROTO:
-			rxrpc_abort_call(call, abort_code);
-			goto kill_ACKs;
+	if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) {
+		call->ack_at = call->expire_at;
+		if (call->ackr_reason) {
+			rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+			goto recheck_state;
 		}
 	}
 
-	/* handle resending */
-	if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
-		rxrpc_resend_timer(call);
-	if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events))
-		rxrpc_resend(call);
-
-	/* consider sending an ordinary ACK */
-	if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
-		_debug("send ACK: window: %d - %d { %lx }",
-		       call->rx_data_eaten, call->ackr_win_top,
-		       call->ackr_window[0]);
-
-		if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
-		    call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
-			/* ACK by sending reply DATA packet in this state */
-			clear_bit(RXRPC_CALL_EV_ACK, &call->events);
-			goto maybe_reschedule;
-		}
-
-		genbit = RXRPC_CALL_EV_ACK;
-
-		acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
-			       GFP_NOFS);
-		if (!acks)
-			goto no_mem;
-
-		//hdr.flags	= RXRPC_SLOW_START_OK;
-		ack.bufferSpace	= htons(8);
-		ack.maxSkew	= 0;
-
-		spin_lock_bh(&call->lock);
-		ack.reason	= call->ackr_reason;
-		ack.serial	= htonl(call->ackr_serial);
-		ack.previousPacket = htonl(call->ackr_prev_seq);
-		ack.firstPacket = htonl(call->rx_data_eaten + 1);
-
-		ack.nAcks = 0;
-		for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
-			nbit = loop * BITS_PER_LONG;
-			for (bits = call->ackr_window[loop]; bits; bits >>= 1
-			     ) {
-				_debug("- l=%d n=%d b=%lx", loop, nbit, bits);
-				if (bits & 1) {
-					acks[nbit] = RXRPC_ACK_TYPE_ACK;
-					ack.nAcks = nbit + 1;
-				}
-				nbit++;
-			}
-		}
-		call->ackr_reason = 0;
-		spin_unlock_bh(&call->lock);
-
-		pad = 0;
-
-		iov[1].iov_base = &ack;
-		iov[1].iov_len	= sizeof(ack);
-		iov[2].iov_base = acks;
-		iov[2].iov_len	= ack.nAcks;
-		iov[3].iov_base = &pad;
-		iov[3].iov_len	= 3;
-		iov[4].iov_base = &ackinfo;
-		iov[4].iov_len	= sizeof(ackinfo);
-
-		switch (ack.reason) {
-		case RXRPC_ACK_REQUESTED:
-		case RXRPC_ACK_DUPLICATE:
-		case RXRPC_ACK_OUT_OF_SEQUENCE:
-		case RXRPC_ACK_EXCEEDS_WINDOW:
-		case RXRPC_ACK_NOSPACE:
-		case RXRPC_ACK_PING:
-		case RXRPC_ACK_PING_RESPONSE:
-			goto send_ACK_with_skew;
-		case RXRPC_ACK_DELAY:
-		case RXRPC_ACK_IDLE:
-			goto send_ACK;
-		}
+	if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
+		rxrpc_resend(call, now);
+		goto recheck_state;
 	}
 
-	/* handle completion of security negotiations on an incoming
-	 * connection */
-	if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) {
-		_debug("secured");
-		spin_lock_bh(&call->lock);
-
-		if (call->state == RXRPC_CALL_SERVER_SECURING) {
-			_debug("securing");
-			write_lock(&call->socket->call_lock);
-			if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-			    !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
-				_debug("not released");
-				call->state = RXRPC_CALL_SERVER_ACCEPTING;
-				list_move_tail(&call->accept_link,
-					       &call->socket->acceptq);
-			}
-			write_unlock(&call->socket->call_lock);
-			read_lock(&call->state_lock);
-			if (call->state < RXRPC_CALL_COMPLETE)
-				set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
-			read_unlock(&call->state_lock);
-		}
-
-		spin_unlock_bh(&call->lock);
-		if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events))
-			goto maybe_reschedule;
-	}
-
-	/* post a notification of an acceptable connection to the app */
-	if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) {
-		_debug("post accept");
-		if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
-				       0, false) < 0)
-			goto no_mem;
-		clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
-		goto maybe_reschedule;
-	}
-
-	/* handle incoming call acceptance */
-	if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) {
-		_debug("accepted");
-		ASSERTCMP(call->rx_data_post, ==, 0);
-		call->rx_data_post = 1;
-		read_lock_bh(&call->state_lock);
-		if (call->state < RXRPC_CALL_COMPLETE)
-			set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
-		read_unlock_bh(&call->state_lock);
-	}
-
-	/* drain the out of sequence received packet queue into the packet Rx
-	 * queue */
-	if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) {
-		while (call->rx_data_post == call->rx_first_oos)
-			if (rxrpc_drain_rx_oos_queue(call) < 0)
-				break;
-		goto maybe_reschedule;
-	}
-
-	if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
-		rxrpc_release_call(call);
-		clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
-	}
+	rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
 
 	/* other events may have been raised since we started checking */
-	goto maybe_reschedule;
-
-send_ACK_with_skew:
-	ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
-			    ntohl(ack.serial));
-send_ACK:
-	mtu = call->conn->params.peer->if_mtu;
-	mtu -= call->conn->params.peer->hdrsize;
-	ackinfo.maxMTU	= htonl(mtu);
-	ackinfo.rwind	= htonl(rxrpc_rx_window_size);
-
-	/* permit the peer to send us jumbo packets if it wants to */
-	ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
-	ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
-
-	serial = atomic_inc_return(&call->conn->serial);
-	whdr.serial = htonl(serial);
-	_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
-	       serial,
-	       ntohs(ack.maxSkew),
-	       ntohl(ack.firstPacket),
-	       ntohl(ack.previousPacket),
-	       ntohl(ack.serial),
-	       rxrpc_acks(ack.reason),
-	       ack.nAcks);
-
-	del_timer_sync(&call->ack_timer);
-	if (ack.nAcks > 0)
-		set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
-	goto send_message_2;
-
-send_message:
-	_debug("send message");
-
-	serial = atomic_inc_return(&call->conn->serial);
-	whdr.serial = htonl(serial);
-	_proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial);
-send_message_2:
-
-	len = iov[0].iov_len;
-	ioc = 1;
-	if (iov[4].iov_len) {
-		ioc = 5;
-		len += iov[4].iov_len;
-		len += iov[3].iov_len;
-		len += iov[2].iov_len;
-		len += iov[1].iov_len;
-	} else if (iov[3].iov_len) {
-		ioc = 4;
-		len += iov[3].iov_len;
-		len += iov[2].iov_len;
-		len += iov[1].iov_len;
-	} else if (iov[2].iov_len) {
-		ioc = 3;
-		len += iov[2].iov_len;
-		len += iov[1].iov_len;
-	} else if (iov[1].iov_len) {
-		ioc = 2;
-		len += iov[1].iov_len;
+	if (call->events && call->state < RXRPC_CALL_COMPLETE) {
+		__rxrpc_queue_call(call);
+		goto out;
 	}
 
-	ret = kernel_sendmsg(call->conn->params.local->socket,
-			     &msg, iov, ioc, len);
-	if (ret < 0) {
-		_debug("sendmsg failed: %d", ret);
-		read_lock_bh(&call->state_lock);
-		if (call->state < RXRPC_CALL_DEAD)
-			rxrpc_queue_call(call);
-		read_unlock_bh(&call->state_lock);
-		goto error;
-	}
-
-	switch (genbit) {
-	case RXRPC_CALL_EV_ABORT:
-		clear_bit(genbit, &call->events);
-		clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
-		goto kill_ACKs;
-
-	case RXRPC_CALL_EV_ACK_FINAL:
-		write_lock_bh(&call->state_lock);
-		if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
-			call->state = RXRPC_CALL_COMPLETE;
-		write_unlock_bh(&call->state_lock);
-		goto kill_ACKs;
-
-	default:
-		clear_bit(genbit, &call->events);
-		switch (call->state) {
-		case RXRPC_CALL_CLIENT_AWAIT_REPLY:
-		case RXRPC_CALL_CLIENT_RECV_REPLY:
-		case RXRPC_CALL_SERVER_RECV_REQUEST:
-		case RXRPC_CALL_SERVER_ACK_REQUEST:
-			_debug("start ACK timer");
-			rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
-					  call->ackr_serial, false);
-		default:
-			break;
-		}
-		goto maybe_reschedule;
-	}
-
-kill_ACKs:
-	del_timer_sync(&call->ack_timer);
-	if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events))
-		rxrpc_put_call(call);
-	clear_bit(RXRPC_CALL_EV_ACK, &call->events);
-
-maybe_reschedule:
-	if (call->events || !skb_queue_empty(&call->rx_queue)) {
-		read_lock_bh(&call->state_lock);
-		if (call->state < RXRPC_CALL_DEAD)
-			rxrpc_queue_call(call);
-		read_unlock_bh(&call->state_lock);
-	}
-
-	/* don't leave aborted connections on the accept queue */
-	if (call->state >= RXRPC_CALL_COMPLETE &&
-	    !list_empty(&call->accept_link)) {
-		_debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
-		       call, call->events, call->flags, call->conn->proto.cid);
-
-		read_lock_bh(&call->state_lock);
-		if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-		    !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-			rxrpc_queue_call(call);
-		read_unlock_bh(&call->state_lock);
-	}
-
-error:
-	clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
-	kfree(acks);
-
-	/* because we don't want two CPUs both processing the work item for one
-	 * call at the same time, we use a flag to note when it's busy; however
-	 * this means there's a race between clearing the flag and setting the
-	 * work pending bit and the work item being processed again */
-	if (call->events && !work_pending(&call->processor)) {
-		_debug("jumpstart %x", call->conn->proto.cid);
-		rxrpc_queue_call(call);
-	}
-
+out_put:
+	rxrpc_put_call(call, rxrpc_call_put);
+out:
 	_leave("");
-	return;
-
-no_mem:
-	_debug("out of memory");
-	goto maybe_reschedule;
 }
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 91287c9..364b42d 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -19,23 +19,13 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-/*
- * Maximum lifetime of a call (in jiffies).
- */
-unsigned int rxrpc_max_call_lifetime = 60 * HZ;
-
-/*
- * Time till dead call expires after last use (in jiffies).
- */
-unsigned int rxrpc_dead_call_expiry = 2 * HZ;
-
 const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
-	[RXRPC_CALL_UNINITIALISED]		= "Uninit",
+	[RXRPC_CALL_UNINITIALISED]		= "Uninit  ",
 	[RXRPC_CALL_CLIENT_AWAIT_CONN]		= "ClWtConn",
 	[RXRPC_CALL_CLIENT_SEND_REQUEST]	= "ClSndReq",
 	[RXRPC_CALL_CLIENT_AWAIT_REPLY]		= "ClAwtRpl",
 	[RXRPC_CALL_CLIENT_RECV_REPLY]		= "ClRcvRpl",
-	[RXRPC_CALL_CLIENT_FINAL_ACK]		= "ClFnlACK",
+	[RXRPC_CALL_SERVER_PREALLOC]		= "SvPrealc",
 	[RXRPC_CALL_SERVER_SECURING]		= "SvSecure",
 	[RXRPC_CALL_SERVER_ACCEPTING]		= "SvAccept",
 	[RXRPC_CALL_SERVER_RECV_REQUEST]	= "SvRcvReq",
@@ -43,22 +33,47 @@
 	[RXRPC_CALL_SERVER_SEND_REPLY]		= "SvSndRpl",
 	[RXRPC_CALL_SERVER_AWAIT_ACK]		= "SvAwtACK",
 	[RXRPC_CALL_COMPLETE]			= "Complete",
-	[RXRPC_CALL_SERVER_BUSY]		= "SvBusy  ",
+};
+
+const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
+	[RXRPC_CALL_SUCCEEDED]			= "Complete",
 	[RXRPC_CALL_REMOTELY_ABORTED]		= "RmtAbort",
 	[RXRPC_CALL_LOCALLY_ABORTED]		= "LocAbort",
+	[RXRPC_CALL_LOCAL_ERROR]		= "LocError",
 	[RXRPC_CALL_NETWORK_ERROR]		= "NetError",
-	[RXRPC_CALL_DEAD]			= "Dead    ",
+};
+
+const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = {
+	[rxrpc_call_new_client]		= "NWc",
+	[rxrpc_call_new_service]	= "NWs",
+	[rxrpc_call_queued]		= "QUE",
+	[rxrpc_call_queued_ref]		= "QUR",
+	[rxrpc_call_connected]		= "CON",
+	[rxrpc_call_release]		= "RLS",
+	[rxrpc_call_seen]		= "SEE",
+	[rxrpc_call_got]		= "GOT",
+	[rxrpc_call_got_userid]		= "Gus",
+	[rxrpc_call_got_kernel]		= "Gke",
+	[rxrpc_call_put]		= "PUT",
+	[rxrpc_call_put_userid]		= "Pus",
+	[rxrpc_call_put_kernel]		= "Pke",
+	[rxrpc_call_put_noqueue]	= "PNQ",
+	[rxrpc_call_error]		= "*E*",
 };
 
 struct kmem_cache *rxrpc_call_jar;
 LIST_HEAD(rxrpc_calls);
 DEFINE_RWLOCK(rxrpc_call_lock);
 
-static void rxrpc_destroy_call(struct work_struct *work);
-static void rxrpc_call_life_expired(unsigned long _call);
-static void rxrpc_dead_call_expired(unsigned long _call);
-static void rxrpc_ack_time_expired(unsigned long _call);
-static void rxrpc_resend_time_expired(unsigned long _call);
+static void rxrpc_call_timer_expired(unsigned long _call)
+{
+	struct rxrpc_call *call = (struct rxrpc_call *)_call;
+
+	_enter("%d", call->debug_id);
+
+	if (call->state < RXRPC_CALL_COMPLETE)
+		rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real());
+}
 
 /*
  * find an extant server call
@@ -91,7 +106,7 @@
 	return NULL;
 
 found_extant_call:
-	rxrpc_get_call(call);
+	rxrpc_get_call(call, rxrpc_call_got);
 	read_unlock(&rx->call_lock);
 	_leave(" = %p [%d]", call, atomic_read(&call->usage));
 	return call;
@@ -100,7 +115,7 @@
 /*
  * allocate a new call
  */
-static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 {
 	struct rxrpc_call *call;
 
@@ -108,29 +123,25 @@
 	if (!call)
 		return NULL;
 
-	call->acks_winsz = 16;
-	call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long),
+	call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE,
+				    sizeof(struct sk_buff *),
 				    gfp);
-	if (!call->acks_window) {
-		kmem_cache_free(rxrpc_call_jar, call);
-		return NULL;
-	}
+	if (!call->rxtx_buffer)
+		goto nomem;
 
-	setup_timer(&call->lifetimer, &rxrpc_call_life_expired,
-		    (unsigned long) call);
-	setup_timer(&call->deadspan, &rxrpc_dead_call_expired,
-		    (unsigned long) call);
-	setup_timer(&call->ack_timer, &rxrpc_ack_time_expired,
-		    (unsigned long) call);
-	setup_timer(&call->resend_timer, &rxrpc_resend_time_expired,
-		    (unsigned long) call);
-	INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
+	call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp);
+	if (!call->rxtx_annotations)
+		goto nomem_2;
+
+	setup_timer(&call->timer, rxrpc_call_timer_expired,
+		    (unsigned long)call);
 	INIT_WORK(&call->processor, &rxrpc_process_call);
 	INIT_LIST_HEAD(&call->link);
+	INIT_LIST_HEAD(&call->chan_wait_link);
 	INIT_LIST_HEAD(&call->accept_link);
-	skb_queue_head_init(&call->rx_queue);
-	skb_queue_head_init(&call->rx_oos_queue);
-	init_waitqueue_head(&call->tx_waitq);
+	INIT_LIST_HEAD(&call->recvmsg_link);
+	INIT_LIST_HEAD(&call->sock_link);
+	init_waitqueue_head(&call->waitq);
 	spin_lock_init(&call->lock);
 	rwlock_init(&call->state_lock);
 	atomic_set(&call->usage, 1);
@@ -138,70 +149,65 @@
 
 	memset(&call->sock_node, 0xed, sizeof(call->sock_node));
 
-	call->rx_data_expect = 1;
-	call->rx_data_eaten = 0;
-	call->rx_first_oos = 0;
-	call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size;
-	call->creation_jif = jiffies;
+	/* Leave space in the ring to handle a maxed-out jumbo packet */
+	call->rx_winsize = rxrpc_rx_window_size;
+	call->tx_winsize = 16;
+	call->rx_expect_next = 1;
+
+	if (RXRPC_TX_SMSS > 2190)
+		call->cong_cwnd = 2;
+	else if (RXRPC_TX_SMSS > 1095)
+		call->cong_cwnd = 3;
+	else
+		call->cong_cwnd = 4;
+	call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
 	return call;
+
+nomem_2:
+	kfree(call->rxtx_buffer);
+nomem:
+	kmem_cache_free(rxrpc_call_jar, call);
+	return NULL;
 }
 
 /*
  * Allocate a new client call.
  */
-static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
-						  struct sockaddr_rxrpc *srx,
+static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
 						  gfp_t gfp)
 {
 	struct rxrpc_call *call;
+	ktime_t now;
 
 	_enter("");
 
-	ASSERT(rx->local != NULL);
-
 	call = rxrpc_alloc_call(gfp);
 	if (!call)
 		return ERR_PTR(-ENOMEM);
 	call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
-
-	sock_hold(&rx->sk);
-	call->socket = rx;
-	call->rx_data_post = 1;
-
-	call->local = rx->local;
 	call->service_id = srx->srx_service;
-	call->in_clientflag = 0;
+	call->tx_phase = true;
+	now = ktime_get_real();
+	call->acks_latest_ts = now;
+	call->cong_tstamp = now;
 
 	_leave(" = %p", call);
 	return call;
 }
 
 /*
- * Begin client call.
+ * Initiate the call ack/resend/expiry timer.
  */
-static int rxrpc_begin_client_call(struct rxrpc_call *call,
-				   struct rxrpc_conn_parameters *cp,
-				   struct sockaddr_rxrpc *srx,
-				   gfp_t gfp)
+static void rxrpc_start_call_timer(struct rxrpc_call *call)
 {
-	int ret;
+	ktime_t now = ktime_get_real(), expire_at;
 
-	/* Set up or get a connection record and set the protocol parameters,
-	 * including channel number and call ID.
-	 */
-	ret = rxrpc_connect_call(call, cp, srx, gfp);
-	if (ret < 0)
-		return ret;
-
-	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
-
-	spin_lock(&call->conn->params.peer->lock);
-	hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
-	spin_unlock(&call->conn->params.peer->lock);
-
-	call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
-	add_timer(&call->lifetimer);
-	return 0;
+	expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime);
+	call->expire_at = expire_at;
+	call->ack_at = expire_at;
+	call->resend_at = expire_at;
+	call->timer.expires = jiffies + LONG_MAX / 2;
+	rxrpc_set_timer(call, rxrpc_timer_begin, now);
 }
 
 /*
@@ -216,20 +222,21 @@
 {
 	struct rxrpc_call *call, *xcall;
 	struct rb_node *parent, **pp;
+	const void *here = __builtin_return_address(0);
 	int ret;
 
 	_enter("%p,%lx", rx, user_call_ID);
 
-	call = rxrpc_alloc_client_call(rx, srx, gfp);
+	call = rxrpc_alloc_client_call(srx, gfp);
 	if (IS_ERR(call)) {
 		_leave(" = %ld", PTR_ERR(call));
 		return call;
 	}
 
-	/* Publish the call, even though it is incompletely set up as yet */
-	call->user_call_ID = user_call_ID;
-	__set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+	trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
+			 here, (const void *)user_call_ID);
 
+	/* Publish the call, even though it is incompletely set up as yet */
 	write_lock(&rx->call_lock);
 
 	pp = &rx->calls.rb_node;
@@ -243,372 +250,285 @@
 		else if (user_call_ID > xcall->user_call_ID)
 			pp = &(*pp)->rb_right;
 		else
-			goto found_user_ID_now_present;
+			goto error_dup_user_ID;
 	}
 
-	rxrpc_get_call(call);
-
+	rcu_assign_pointer(call->socket, rx);
+	call->user_call_ID = user_call_ID;
+	__set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+	rxrpc_get_call(call, rxrpc_call_got_userid);
 	rb_link_node(&call->sock_node, parent, pp);
 	rb_insert_color(&call->sock_node, &rx->calls);
+	list_add(&call->sock_link, &rx->sock_calls);
+
 	write_unlock(&rx->call_lock);
 
-	write_lock_bh(&rxrpc_call_lock);
+	write_lock(&rxrpc_call_lock);
 	list_add_tail(&call->link, &rxrpc_calls);
-	write_unlock_bh(&rxrpc_call_lock);
+	write_unlock(&rxrpc_call_lock);
 
-	ret = rxrpc_begin_client_call(call, cp, srx, gfp);
+	/* Set up or get a connection record and set the protocol parameters,
+	 * including channel number and call ID.
+	 */
+	ret = rxrpc_connect_call(call, cp, srx, gfp);
 	if (ret < 0)
 		goto error;
 
+	trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
+			 here, ERR_PTR(ret));
+
+	spin_lock_bh(&call->conn->params.peer->lock);
+	hlist_add_head(&call->error_link,
+		       &call->conn->params.peer->error_targets);
+	spin_unlock_bh(&call->conn->params.peer->lock);
+
+	rxrpc_start_call_timer(call);
+
 	_net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
 
 	_leave(" = %p [new]", call);
 	return call;
 
-error:
-	write_lock(&rx->call_lock);
-	rb_erase(&call->sock_node, &rx->calls);
-	write_unlock(&rx->call_lock);
-	rxrpc_put_call(call);
-
-	write_lock_bh(&rxrpc_call_lock);
-	list_del_init(&call->link);
-	write_unlock_bh(&rxrpc_call_lock);
-
-	call->state = RXRPC_CALL_DEAD;
-	rxrpc_put_call(call);
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
-
 	/* We unexpectedly found the user ID in the list after taking
 	 * the call_lock.  This shouldn't happen unless the user races
 	 * with itself and tries to add the same user ID twice at the
 	 * same time in different threads.
 	 */
-found_user_ID_now_present:
+error_dup_user_ID:
 	write_unlock(&rx->call_lock);
-	call->state = RXRPC_CALL_DEAD;
-	rxrpc_put_call(call);
-	_leave(" = -EEXIST [%p]", call);
-	return ERR_PTR(-EEXIST);
+	ret = -EEXIST;
+
+error:
+	__rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+				    RX_CALL_DEAD, ret);
+	trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
+			 here, ERR_PTR(ret));
+	rxrpc_release_call(rx, call);
+	rxrpc_put_call(call, rxrpc_call_put);
+	_leave(" = %d", ret);
+	return ERR_PTR(ret);
 }
 
 /*
- * set up an incoming call
- * - called in process context with IRQs enabled
+ * Set up an incoming call.  call->conn points to the connection.
+ * This is called in BH context and isn't allowed to fail.
  */
-struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
-				       struct rxrpc_connection *conn,
-				       struct sk_buff *skb)
+void rxrpc_incoming_call(struct rxrpc_sock *rx,
+			 struct rxrpc_call *call,
+			 struct sk_buff *skb)
 {
+	struct rxrpc_connection *conn = call->conn;
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct rxrpc_call *call, *candidate;
-	u32 call_id, chan;
+	u32 chan;
 
-	_enter(",%d", conn->debug_id);
+	_enter(",%d", call->conn->debug_id);
 
-	ASSERT(rx != NULL);
+	rcu_assign_pointer(call->socket, rx);
+	call->call_id		= sp->hdr.callNumber;
+	call->service_id	= sp->hdr.serviceId;
+	call->cid		= sp->hdr.cid;
+	call->state		= RXRPC_CALL_SERVER_ACCEPTING;
+	if (sp->hdr.securityIndex > 0)
+		call->state	= RXRPC_CALL_SERVER_SECURING;
+	call->cong_tstamp	= skb->tstamp;
 
-	candidate = rxrpc_alloc_call(GFP_NOIO);
-	if (!candidate)
-		return ERR_PTR(-EBUSY);
-
-	chan = sp->hdr.cid & RXRPC_CHANNELMASK;
-	candidate->socket	= rx;
-	candidate->conn		= conn;
-	candidate->cid		= sp->hdr.cid;
-	candidate->call_id	= sp->hdr.callNumber;
-	candidate->channel	= chan;
-	candidate->rx_data_post	= 0;
-	candidate->state	= RXRPC_CALL_SERVER_ACCEPTING;
-	if (conn->security_ix > 0)
-		candidate->state = RXRPC_CALL_SERVER_SECURING;
-
-	spin_lock(&conn->channel_lock);
-
-	/* set the channel for this call */
-	call = rcu_dereference_protected(conn->channels[chan].call,
-					 lockdep_is_held(&conn->channel_lock));
-
-	_debug("channel[%u] is %p", candidate->channel, call);
-	if (call && call->call_id == sp->hdr.callNumber) {
-		/* already set; must've been a duplicate packet */
-		_debug("extant call [%d]", call->state);
-		ASSERTCMP(call->conn, ==, conn);
-
-		read_lock(&call->state_lock);
-		switch (call->state) {
-		case RXRPC_CALL_LOCALLY_ABORTED:
-			if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
-				rxrpc_queue_call(call);
-		case RXRPC_CALL_REMOTELY_ABORTED:
-			read_unlock(&call->state_lock);
-			goto aborted_call;
-		default:
-			rxrpc_get_call(call);
-			read_unlock(&call->state_lock);
-			goto extant_call;
-		}
-	}
-
-	if (call) {
-		/* it seems the channel is still in use from the previous call
-		 * - ditch the old binding if its call is now complete */
-		_debug("CALL: %u { %s }",
-		       call->debug_id, rxrpc_call_states[call->state]);
-
-		if (call->state >= RXRPC_CALL_COMPLETE) {
-			__rxrpc_disconnect_call(call);
-		} else {
-			spin_unlock(&conn->channel_lock);
-			kmem_cache_free(rxrpc_call_jar, candidate);
-			_leave(" = -EBUSY");
-			return ERR_PTR(-EBUSY);
-		}
-	}
-
-	/* check the call number isn't duplicate */
-	_debug("check dup");
-	call_id = sp->hdr.callNumber;
-
-	/* We just ignore calls prior to the current call ID.  Terminated calls
-	 * are handled via the connection.
+	/* Set the channel for this call.  We don't get channel_lock as we're
+	 * only defending against the data_ready handler (which we're called
+	 * from) and the RESPONSE packet parser (which is only really
+	 * interested in call_counter and can cope with a disagreement with the
+	 * call pointer).
 	 */
-	if (call_id <= conn->channels[chan].call_counter)
-		goto old_call; /* TODO: Just drop packet */
-
-	/* make the call available */
-	_debug("new call");
-	call = candidate;
-	candidate = NULL;
-	conn->channels[chan].call_counter = call_id;
+	chan = sp->hdr.cid & RXRPC_CHANNELMASK;
+	conn->channels[chan].call_counter = call->call_id;
+	conn->channels[chan].call_id = call->call_id;
 	rcu_assign_pointer(conn->channels[chan].call, call);
-	sock_hold(&rx->sk);
-	rxrpc_get_connection(conn);
-	spin_unlock(&conn->channel_lock);
 
 	spin_lock(&conn->params.peer->lock);
 	hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
 	spin_unlock(&conn->params.peer->lock);
 
-	write_lock_bh(&rxrpc_call_lock);
-	list_add_tail(&call->link, &rxrpc_calls);
-	write_unlock_bh(&rxrpc_call_lock);
-
-	call->local = conn->params.local;
-	call->epoch = conn->proto.epoch;
-	call->service_id = conn->params.service_id;
-	call->in_clientflag = RXRPC_CLIENT_INITIATED;
-
 	_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
 
-	call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
-	add_timer(&call->lifetimer);
-	_leave(" = %p {%d} [new]", call, call->debug_id);
-	return call;
-
-extant_call:
-	spin_unlock(&conn->channel_lock);
-	kmem_cache_free(rxrpc_call_jar, candidate);
-	_leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
-	return call;
-
-aborted_call:
-	spin_unlock(&conn->channel_lock);
-	kmem_cache_free(rxrpc_call_jar, candidate);
-	_leave(" = -ECONNABORTED");
-	return ERR_PTR(-ECONNABORTED);
-
-old_call:
-	spin_unlock(&conn->channel_lock);
-	kmem_cache_free(rxrpc_call_jar, candidate);
-	_leave(" = -ECONNRESET [old]");
-	return ERR_PTR(-ECONNRESET);
+	rxrpc_start_call_timer(call);
+	_leave("");
 }
 
 /*
- * detach a call from a socket and set up for release
+ * Queue a call's work processor, getting a ref to pass to the work queue.
  */
-void rxrpc_release_call(struct rxrpc_call *call)
+bool rxrpc_queue_call(struct rxrpc_call *call)
 {
-	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_sock *rx = call->socket;
+	const void *here = __builtin_return_address(0);
+	int n = __atomic_add_unless(&call->usage, 1, 0);
+	if (n == 0)
+		return false;
+	if (rxrpc_queue_work(&call->processor))
+		trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL);
+	else
+		rxrpc_put_call(call, rxrpc_call_put_noqueue);
+	return true;
+}
 
-	_enter("{%d,%d,%d,%d}",
-	       call->debug_id, atomic_read(&call->usage),
-	       atomic_read(&call->ackr_not_idle),
-	       call->rx_first_oos);
+/*
+ * Queue a call's work processor, passing the callers ref to the work queue.
+ */
+bool __rxrpc_queue_call(struct rxrpc_call *call)
+{
+	const void *here = __builtin_return_address(0);
+	int n = atomic_read(&call->usage);
+	ASSERTCMP(n, >=, 1);
+	if (rxrpc_queue_work(&call->processor))
+		trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL);
+	else
+		rxrpc_put_call(call, rxrpc_call_put_noqueue);
+	return true;
+}
+
+/*
+ * Note the re-emergence of a call.
+ */
+void rxrpc_see_call(struct rxrpc_call *call)
+{
+	const void *here = __builtin_return_address(0);
+	if (call) {
+		int n = atomic_read(&call->usage);
+
+		trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL);
+	}
+}
+
+/*
+ * Note the addition of a ref on a call.
+ */
+void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+{
+	const void *here = __builtin_return_address(0);
+	int n = atomic_inc_return(&call->usage);
+
+	trace_rxrpc_call(call, op, n, here, NULL);
+}
+
+/*
+ * Detach a call from its owning socket.
+ */
+void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+	const void *here = __builtin_return_address(0);
+	struct rxrpc_connection *conn = call->conn;
+	bool put = false;
+	int i;
+
+	_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+
+	trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+			 here, (const void *)call->flags);
+
+	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
 
 	spin_lock_bh(&call->lock);
 	if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
 		BUG();
 	spin_unlock_bh(&call->lock);
 
-	/* dissociate from the socket
-	 * - the socket's ref on the call is passed to the death timer
-	 */
-	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+	del_timer_sync(&call->timer);
 
-	spin_lock(&conn->params.peer->lock);
-	hlist_del_init(&call->error_link);
-	spin_unlock(&conn->params.peer->lock);
+	/* Make sure we don't get any more notifications */
+	write_lock_bh(&rx->recvmsg_lock);
 
-	write_lock_bh(&rx->call_lock);
-	if (!list_empty(&call->accept_link)) {
+	if (!list_empty(&call->recvmsg_link)) {
 		_debug("unlinking once-pending call %p { e=%lx f=%lx }",
 		       call, call->events, call->flags);
-		ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
-		list_del_init(&call->accept_link);
-		sk_acceptq_removed(&rx->sk);
-	} else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+		list_del(&call->recvmsg_link);
+		put = true;
+	}
+
+	/* list_empty() must return false in rxrpc_notify_socket() */
+	call->recvmsg_link.next = NULL;
+	call->recvmsg_link.prev = NULL;
+
+	write_unlock_bh(&rx->recvmsg_lock);
+	if (put)
+		rxrpc_put_call(call, rxrpc_call_put);
+
+	write_lock(&rx->call_lock);
+
+	if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
 		rb_erase(&call->sock_node, &rx->calls);
 		memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
-		clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
-	}
-	write_unlock_bh(&rx->call_lock);
-
-	/* free up the channel for reuse */
-	write_lock_bh(&call->state_lock);
-
-	if (call->state < RXRPC_CALL_COMPLETE &&
-	    call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
-		_debug("+++ ABORTING STATE %d +++\n", call->state);
-		call->state = RXRPC_CALL_LOCALLY_ABORTED;
-		call->local_abort = RX_CALL_DEAD;
-	}
-	write_unlock_bh(&call->state_lock);
-
-	rxrpc_disconnect_call(call);
-
-	/* clean up the Rx queue */
-	if (!skb_queue_empty(&call->rx_queue) ||
-	    !skb_queue_empty(&call->rx_oos_queue)) {
-		struct rxrpc_skb_priv *sp;
-		struct sk_buff *skb;
-
-		_debug("purge Rx queues");
-
-		spin_lock_bh(&call->lock);
-		while ((skb = skb_dequeue(&call->rx_queue)) ||
-		       (skb = skb_dequeue(&call->rx_oos_queue))) {
-			sp = rxrpc_skb(skb);
-			if (sp->call) {
-				ASSERTCMP(sp->call, ==, call);
-				rxrpc_put_call(call);
-				sp->call = NULL;
-			}
-			skb->destructor = NULL;
-			spin_unlock_bh(&call->lock);
-
-			_debug("- zap %s %%%u #%u",
-			       rxrpc_pkts[sp->hdr.type],
-			       sp->hdr.serial, sp->hdr.seq);
-			rxrpc_free_skb(skb);
-			spin_lock_bh(&call->lock);
-		}
-		spin_unlock_bh(&call->lock);
-
-		ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE);
+		rxrpc_put_call(call, rxrpc_call_put_userid);
 	}
 
-	del_timer_sync(&call->resend_timer);
-	del_timer_sync(&call->ack_timer);
-	del_timer_sync(&call->lifetimer);
-	call->deadspan.expires = jiffies + rxrpc_dead_call_expiry;
-	add_timer(&call->deadspan);
+	list_del(&call->sock_link);
+	write_unlock(&rx->call_lock);
+
+	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+
+	if (conn)
+		rxrpc_disconnect_call(call);
+
+	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
+		rxrpc_free_skb(call->rxtx_buffer[i],
+			       (call->tx_phase ? rxrpc_skb_tx_cleaned :
+				rxrpc_skb_rx_cleaned));
+		call->rxtx_buffer[i] = NULL;
+	}
 
 	_leave("");
 }
 
 /*
- * handle a dead call being ready for reaping
- */
-static void rxrpc_dead_call_expired(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_enter("{%d}", call->debug_id);
-
-	write_lock_bh(&call->state_lock);
-	call->state = RXRPC_CALL_DEAD;
-	write_unlock_bh(&call->state_lock);
-	rxrpc_put_call(call);
-}
-
-/*
- * mark a call as to be released, aborting it if it's still in progress
- * - called with softirqs disabled
- */
-static void rxrpc_mark_call_released(struct rxrpc_call *call)
-{
-	bool sched;
-
-	write_lock(&call->state_lock);
-	if (call->state < RXRPC_CALL_DEAD) {
-		sched = false;
-		if (call->state < RXRPC_CALL_COMPLETE) {
-			_debug("abort call %p", call);
-			call->state = RXRPC_CALL_LOCALLY_ABORTED;
-			call->local_abort = RX_CALL_DEAD;
-			if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
-				sched = true;
-		}
-		if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-			sched = true;
-		if (sched)
-			rxrpc_queue_call(call);
-	}
-	write_unlock(&call->state_lock);
-}
-
-/*
  * release all the calls associated with a socket
  */
 void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
 {
 	struct rxrpc_call *call;
-	struct rb_node *p;
 
 	_enter("%p", rx);
 
-	read_lock_bh(&rx->call_lock);
-
-	/* mark all the calls as no longer wanting incoming packets */
-	for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
-		call = rb_entry(p, struct rxrpc_call, sock_node);
-		rxrpc_mark_call_released(call);
+	while (!list_empty(&rx->to_be_accepted)) {
+		call = list_entry(rx->to_be_accepted.next,
+				  struct rxrpc_call, accept_link);
+		list_del(&call->accept_link);
+		rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET);
+		rxrpc_put_call(call, rxrpc_call_put);
 	}
 
-	/* kill the not-yet-accepted incoming calls */
-	list_for_each_entry(call, &rx->secureq, accept_link) {
-		rxrpc_mark_call_released(call);
+	while (!list_empty(&rx->sock_calls)) {
+		call = list_entry(rx->sock_calls.next,
+				  struct rxrpc_call, sock_link);
+		rxrpc_get_call(call, rxrpc_call_got);
+		rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET);
+		rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+		rxrpc_release_call(rx, call);
+		rxrpc_put_call(call, rxrpc_call_put);
 	}
 
-	list_for_each_entry(call, &rx->acceptq, accept_link) {
-		rxrpc_mark_call_released(call);
-	}
-
-	read_unlock_bh(&rx->call_lock);
 	_leave("");
 }
 
 /*
  * release a call
  */
-void __rxrpc_put_call(struct rxrpc_call *call)
+void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
 {
+	const void *here = __builtin_return_address(0);
+	int n;
+
 	ASSERT(call != NULL);
 
-	_enter("%p{u=%d}", call, atomic_read(&call->usage));
-
-	ASSERTCMP(atomic_read(&call->usage), >, 0);
-
-	if (atomic_dec_and_test(&call->usage)) {
+	n = atomic_dec_return(&call->usage);
+	trace_rxrpc_call(call, op, n, here, NULL);
+	ASSERTCMP(n, >=, 0);
+	if (n == 0) {
 		_debug("call %d dead", call->debug_id);
-		ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
-		rxrpc_queue_work(&call->destroyer);
+		ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+
+		write_lock(&rxrpc_call_lock);
+		list_del_init(&call->link);
+		write_unlock(&rxrpc_call_lock);
+
+		rxrpc_cleanup_call(call);
 	}
-	_leave("");
 }
 
 /*
@@ -618,187 +538,70 @@
 {
 	struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
 
-	rxrpc_purge_queue(&call->rx_queue);
+	rxrpc_put_peer(call->peer);
+	kfree(call->rxtx_buffer);
+	kfree(call->rxtx_annotations);
 	kmem_cache_free(rxrpc_call_jar, call);
 }
 
 /*
  * clean up a call
  */
-static void rxrpc_cleanup_call(struct rxrpc_call *call)
+void rxrpc_cleanup_call(struct rxrpc_call *call)
 {
-	_net("DESTROY CALL %d", call->debug_id);
+	int i;
 
-	ASSERT(call->socket);
+	_net("DESTROY CALL %d", call->debug_id);
 
 	memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
 
-	del_timer_sync(&call->lifetimer);
-	del_timer_sync(&call->deadspan);
-	del_timer_sync(&call->ack_timer);
-	del_timer_sync(&call->resend_timer);
+	del_timer_sync(&call->timer);
 
+	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
 	ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
-	ASSERTCMP(call->events, ==, 0);
-	if (work_pending(&call->processor)) {
-		_debug("defer destroy");
-		rxrpc_queue_work(&call->destroyer);
-		return;
-	}
-
 	ASSERTCMP(call->conn, ==, NULL);
 
-	if (call->acks_window) {
-		_debug("kill Tx window %d",
-		       CIRC_CNT(call->acks_head, call->acks_tail,
-				call->acks_winsz));
-		smp_mb();
-		while (CIRC_CNT(call->acks_head, call->acks_tail,
-				call->acks_winsz) > 0) {
-			struct rxrpc_skb_priv *sp;
-			unsigned long _skb;
+	/* Clean up the Rx/Tx buffer */
+	for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++)
+		rxrpc_free_skb(call->rxtx_buffer[i],
+			       (call->tx_phase ? rxrpc_skb_tx_cleaned :
+				rxrpc_skb_rx_cleaned));
 
-			_skb = call->acks_window[call->acks_tail] & ~1;
-			sp = rxrpc_skb((struct sk_buff *)_skb);
-			_debug("+++ clear Tx %u", sp->hdr.seq);
-			rxrpc_free_skb((struct sk_buff *)_skb);
-			call->acks_tail =
-				(call->acks_tail + 1) & (call->acks_winsz - 1);
-		}
+	rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned);
 
-		kfree(call->acks_window);
-	}
-
-	rxrpc_free_skb(call->tx_pending);
-
-	rxrpc_purge_queue(&call->rx_queue);
-	ASSERT(skb_queue_empty(&call->rx_oos_queue));
-	sock_put(&call->socket->sk);
 	call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
 }
 
 /*
- * destroy a call
- */
-static void rxrpc_destroy_call(struct work_struct *work)
-{
-	struct rxrpc_call *call =
-		container_of(work, struct rxrpc_call, destroyer);
-
-	_enter("%p{%d,%d,%p}",
-	       call, atomic_read(&call->usage), call->channel, call->conn);
-
-	ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
-
-	write_lock_bh(&rxrpc_call_lock);
-	list_del_init(&call->link);
-	write_unlock_bh(&rxrpc_call_lock);
-
-	rxrpc_cleanup_call(call);
-	_leave("");
-}
-
-/*
- * preemptively destroy all the call records from a transport endpoint rather
- * than waiting for them to time out
+ * Make sure that all calls are gone.
  */
 void __exit rxrpc_destroy_all_calls(void)
 {
 	struct rxrpc_call *call;
 
 	_enter("");
-	write_lock_bh(&rxrpc_call_lock);
+
+	if (list_empty(&rxrpc_calls))
+		return;
+
+	write_lock(&rxrpc_call_lock);
 
 	while (!list_empty(&rxrpc_calls)) {
 		call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
 		_debug("Zapping call %p", call);
 
+		rxrpc_see_call(call);
 		list_del_init(&call->link);
 
-		switch (atomic_read(&call->usage)) {
-		case 0:
-			ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
-			break;
-		case 1:
-			if (del_timer_sync(&call->deadspan) != 0 &&
-			    call->state != RXRPC_CALL_DEAD)
-				rxrpc_dead_call_expired((unsigned long) call);
-			if (call->state != RXRPC_CALL_DEAD)
-				break;
-		default:
-			pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
-			       call, atomic_read(&call->usage),
-			       atomic_read(&call->ackr_not_idle),
-			       rxrpc_call_states[call->state],
-			       call->flags, call->events);
-			if (!skb_queue_empty(&call->rx_queue))
-				pr_err("Rx queue occupied\n");
-			if (!skb_queue_empty(&call->rx_oos_queue))
-				pr_err("OOS queue occupied\n");
-			break;
-		}
+		pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+		       call, atomic_read(&call->usage),
+		       rxrpc_call_states[call->state],
+		       call->flags, call->events);
 
-		write_unlock_bh(&rxrpc_call_lock);
+		write_unlock(&rxrpc_call_lock);
 		cond_resched();
-		write_lock_bh(&rxrpc_call_lock);
+		write_lock(&rxrpc_call_lock);
 	}
 
-	write_unlock_bh(&rxrpc_call_lock);
-	_leave("");
-}
-
-/*
- * handle call lifetime being exceeded
- */
-static void rxrpc_call_life_expired(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	if (call->state >= RXRPC_CALL_COMPLETE)
-		return;
-
-	_enter("{%d}", call->debug_id);
-	read_lock_bh(&call->state_lock);
-	if (call->state < RXRPC_CALL_COMPLETE) {
-		set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
-		rxrpc_queue_call(call);
-	}
-	read_unlock_bh(&call->state_lock);
-}
-
-/*
- * handle resend timer expiry
- * - may not take call->state_lock as this can deadlock against del_timer_sync()
- */
-static void rxrpc_resend_time_expired(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_enter("{%d}", call->debug_id);
-
-	if (call->state >= RXRPC_CALL_COMPLETE)
-		return;
-
-	clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-	if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
-		rxrpc_queue_call(call);
-}
-
-/*
- * handle ACK timer expiry
- */
-static void rxrpc_ack_time_expired(unsigned long _call)
-{
-	struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
-	_enter("{%d}", call->debug_id);
-
-	if (call->state >= RXRPC_CALL_COMPLETE)
-		return;
-
-	read_lock_bh(&call->state_lock);
-	if (call->state < RXRPC_CALL_COMPLETE &&
-	    !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
-		rxrpc_queue_call(call);
-	read_unlock_bh(&call->state_lock);
+	write_unlock(&rxrpc_call_lock);
 }
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 9e91f27..60ef960 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -7,6 +7,68 @@
  * modify it under the terms of the GNU General Public Licence
  * as published by the Free Software Foundation; either version
  * 2 of the Licence, or (at your option) any later version.
+ *
+ *
+ * Client connections need to be cached for a little while after they've made a
+ * call so as to handle retransmitted DATA packets in case the server didn't
+ * receive the final ACK or terminating ABORT we sent it.
+ *
+ * Client connections can be in one of a number of cache states:
+ *
+ *  (1) INACTIVE - The connection is not held in any list and may not have been
+ *      exposed to the world.  If it has been previously exposed, it was
+ *      discarded from the idle list after expiring.
+ *
+ *  (2) WAITING - The connection is waiting for the number of client conns to
+ *      drop below the maximum capacity.  Calls may be in progress upon it from
+ *      when it was active and got culled.
+ *
+ *	The connection is on the rxrpc_waiting_client_conns list which is kept
+ *	in to-be-granted order.  Culled conns with waiters go to the back of
+ *	the queue just like new conns.
+ *
+ *  (3) ACTIVE - The connection has at least one call in progress upon it, it
+ *      may freely grant available channels to new calls and calls may be
+ *      waiting on it for channels to become available.
+ *
+ *	The connection is on the rxrpc_active_client_conns list which is kept
+ *	in activation order for culling purposes.
+ *
+ *	rxrpc_nr_active_client_conns is held incremented also.
+ *
+ *  (4) CULLED - The connection got summarily culled to try and free up
+ *      capacity.  Calls currently in progress on the connection are allowed to
+ *      continue, but new calls will have to wait.  There can be no waiters in
+ *      this state - the conn would have to go to the WAITING state instead.
+ *
+ *  (5) IDLE - The connection has no calls in progress upon it and must have
+ *      been exposed to the world (ie. the EXPOSED flag must be set).  When it
+ *      expires, the EXPOSED flag is cleared and the connection transitions to
+ *      the INACTIVE state.
+ *
+ *	The connection is on the rxrpc_idle_client_conns list which is kept in
+ *	order of how soon they'll expire.
+ *
+ * There are flags of relevance to the cache:
+ *
+ *  (1) EXPOSED - The connection ID got exposed to the world.  If this flag is
+ *      set, an extra ref is added to the connection preventing it from being
+ *      reaped when it has no calls outstanding.  This flag is cleared and the
+ *      ref dropped when a conn is discarded from the idle list.
+ *
+ *      This allows us to move terminal call state retransmission to the
+ *      connection and to discard the call immediately we think it is done
+ *      with.  It also give us a chance to reuse the connection.
+ *
+ *  (2) DONT_REUSE - The connection should be discarded as soon as possible and
+ *      should not be reused.  This is set when an exclusive connection is used
+ *      or a call ID counter overflows.
+ *
+ * The caching state may only be changed if the cache lock is held.
+ *
+ * There are two idle client connection expiry durations.  If the total number
+ * of connections is below the reap threshold, we use the normal duration; if
+ * it's above, we use the fast duration.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -16,27 +78,50 @@
 #include <linux/timer.h>
 #include "ar-internal.h"
 
+__read_mostly unsigned int rxrpc_max_client_connections = 1000;
+__read_mostly unsigned int rxrpc_reap_client_connections = 900;
+__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
+__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+
+static unsigned int rxrpc_nr_client_conns;
+static unsigned int rxrpc_nr_active_client_conns;
+static __read_mostly bool rxrpc_kill_all_client_conns;
+
+static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock);
+static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex);
+static LIST_HEAD(rxrpc_waiting_client_conns);
+static LIST_HEAD(rxrpc_active_client_conns);
+static LIST_HEAD(rxrpc_idle_client_conns);
+
 /*
  * We use machine-unique IDs for our client connections.
  */
 DEFINE_IDR(rxrpc_client_conn_ids);
 static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
 
+static void rxrpc_cull_active_client_conns(void);
+static void rxrpc_discard_expired_client_conns(struct work_struct *);
+
+static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
+			    rxrpc_discard_expired_client_conns);
+
+const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5] = {
+	[RXRPC_CONN_CLIENT_INACTIVE]	= "Inac",
+	[RXRPC_CONN_CLIENT_WAITING]	= "Wait",
+	[RXRPC_CONN_CLIENT_ACTIVE]	= "Actv",
+	[RXRPC_CONN_CLIENT_CULLED]	= "Cull",
+	[RXRPC_CONN_CLIENT_IDLE]	= "Idle",
+};
+
 /*
  * Get a connection ID and epoch for a client connection from the global pool.
  * The connection struct pointer is then recorded in the idr radix tree.  The
- * epoch is changed if this wraps.
- *
- * TODO: The IDR tree gets very expensive on memory if the connection IDs are
- * widely scattered throughout the number space, so we shall need to retire
- * connections that have, say, an ID more than four times the maximum number of
- * client conns away from the current allocation point to try and keep the IDs
- * concentrated.  We will also need to retire connections from an old epoch.
+ * epoch doesn't change until the client is rebooted (or, at least, unless the
+ * module is unloaded).
  */
 static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
 					  gfp_t gfp)
 {
-	u32 epoch;
 	int id;
 
 	_enter("");
@@ -44,34 +129,18 @@
 	idr_preload(gfp);
 	spin_lock(&rxrpc_conn_id_lock);
 
-	epoch = rxrpc_epoch;
-
-	/* We could use idr_alloc_cyclic() here, but we really need to know
-	 * when the thing wraps so that we can advance the epoch.
-	 */
-	if (rxrpc_client_conn_ids.cur == 0)
-		rxrpc_client_conn_ids.cur = 1;
-	id = idr_alloc(&rxrpc_client_conn_ids, conn,
-		       rxrpc_client_conn_ids.cur, 0x40000000, GFP_NOWAIT);
-	if (id < 0) {
-		if (id != -ENOSPC)
-			goto error;
-		id = idr_alloc(&rxrpc_client_conn_ids, conn,
-			       1, 0x40000000, GFP_NOWAIT);
-		if (id < 0)
-			goto error;
-		epoch++;
-		rxrpc_epoch = epoch;
-	}
-	rxrpc_client_conn_ids.cur = id + 1;
+	id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn,
+			      1, 0x40000000, GFP_NOWAIT);
+	if (id < 0)
+		goto error;
 
 	spin_unlock(&rxrpc_conn_id_lock);
 	idr_preload_end();
 
-	conn->proto.epoch = epoch;
+	conn->proto.epoch = rxrpc_epoch;
 	conn->proto.cid = id << RXRPC_CIDSHIFT;
 	set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
-	_leave(" [CID %x:%x]", epoch, conn->proto.cid);
+	_leave(" [CID %x]", conn->proto.cid);
 	return 0;
 
 error:
@@ -114,8 +183,7 @@
 }
 
 /*
- * Allocate a client connection.  The caller must take care to clear any
- * padding bytes in *cp.
+ * Allocate a client connection.
  */
 static struct rxrpc_connection *
 rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
@@ -131,6 +199,10 @@
 		return ERR_PTR(-ENOMEM);
 	}
 
+	atomic_set(&conn->usage, 1);
+	if (cp->exclusive)
+		__set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+
 	conn->params		= *cp;
 	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
 	conn->state		= RXRPC_CONN_CLIENT;
@@ -148,7 +220,7 @@
 		goto error_2;
 
 	write_lock(&rxrpc_connection_lock);
-	list_add_tail(&conn->link, &rxrpc_connections);
+	list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
 	write_unlock(&rxrpc_connection_lock);
 
 	/* We steal the caller's peer ref. */
@@ -156,6 +228,9 @@
 	rxrpc_get_local(conn->params.local);
 	key_get(conn->params.key);
 
+	trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage),
+			 __builtin_return_address(0));
+	trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
 	_leave(" = %p", conn);
 	return conn;
 
@@ -170,32 +245,68 @@
 }
 
 /*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Determine if a connection may be reused.
  */
-int rxrpc_connect_call(struct rxrpc_call *call,
-		       struct rxrpc_conn_parameters *cp,
-		       struct sockaddr_rxrpc *srx,
-		       gfp_t gfp)
+static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
+{
+	int id_cursor, id, distance, limit;
+
+	if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
+		goto dont_reuse;
+
+	if (conn->proto.epoch != rxrpc_epoch)
+		goto mark_dont_reuse;
+
+	/* The IDR tree gets very expensive on memory if the connection IDs are
+	 * widely scattered throughout the number space, so we shall want to
+	 * kill off connections that, say, have an ID more than about four
+	 * times the maximum number of client conns away from the current
+	 * allocation point to try and keep the IDs concentrated.
+	 */
+	id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur);
+	id = conn->proto.cid >> RXRPC_CIDSHIFT;
+	distance = id - id_cursor;
+	if (distance < 0)
+		distance = -distance;
+	limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4;
+	if (distance > limit)
+		goto mark_dont_reuse;
+
+	return true;
+
+mark_dont_reuse:
+	set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+dont_reuse:
+	return false;
+}
+
+/*
+ * Create or find a client connection to use for a call.
+ *
+ * If we return with a connection, the call will be on its waiting list.  It's
+ * left to the caller to assign a channel and wake up the call.
+ */
+static int rxrpc_get_client_conn(struct rxrpc_call *call,
+				 struct rxrpc_conn_parameters *cp,
+				 struct sockaddr_rxrpc *srx,
+				 gfp_t gfp)
 {
 	struct rxrpc_connection *conn, *candidate = NULL;
 	struct rxrpc_local *local = cp->local;
 	struct rb_node *p, **pp, *parent;
 	long diff;
-	int chan;
-
-	DECLARE_WAITQUEUE(myself, current);
+	int ret = -ENOMEM;
 
 	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
 
 	cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
 	if (!cp->peer)
-		return -ENOMEM;
+		goto error;
 
+	/* If the connection is not meant to be exclusive, search the available
+	 * connections to see if the connection we want to use already exists.
+	 */
 	if (!cp->exclusive) {
-		/* Search for a existing client connection unless this is going
-		 * to be a connection that's used exclusively for a single call.
-		 */
 		_debug("search 1");
 		spin_lock(&local->client_conns_lock);
 		p = local->client_conns.rb_node;
@@ -206,39 +317,56 @@
 			diff = (cmp(peer) ?:
 				cmp(key) ?:
 				cmp(security_level));
-			if (diff < 0)
+#undef cmp
+			if (diff < 0) {
 				p = p->rb_left;
-			else if (diff > 0)
+			} else if (diff > 0) {
 				p = p->rb_right;
-			else
-				goto found_extant_conn;
+			} else {
+				if (rxrpc_may_reuse_conn(conn) &&
+				    rxrpc_get_connection_maybe(conn))
+					goto found_extant_conn;
+				/* The connection needs replacing.  It's better
+				 * to effect that when we have something to
+				 * replace it with so that we don't have to
+				 * rebalance the tree twice.
+				 */
+				break;
+			}
 		}
 		spin_unlock(&local->client_conns_lock);
 	}
 
-	/* We didn't find a connection or we want an exclusive one. */
-	_debug("get new conn");
+	/* There wasn't a connection yet or we need an exclusive connection.
+	 * We need to create a candidate and then potentially redo the search
+	 * in case we're racing with another thread also trying to connect on a
+	 * shareable connection.
+	 */
+	_debug("new conn");
 	candidate = rxrpc_alloc_client_connection(cp, gfp);
-	if (!candidate) {
-		_leave(" = -ENOMEM");
-		return -ENOMEM;
+	if (IS_ERR(candidate)) {
+		ret = PTR_ERR(candidate);
+		goto error_peer;
 	}
 
+	/* Add the call to the new connection's waiting list in case we're
+	 * going to have to wait for the connection to come live.  It's our
+	 * connection, so we want first dibs on the channel slots.  We would
+	 * normally have to take channel_lock but we do this before anyone else
+	 * can see the connection.
+	 */
+	list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+
 	if (cp->exclusive) {
-		/* Assign the call on an exclusive connection to channel 0 and
-		 * don't add the connection to the endpoint's shareable conn
-		 * lookup tree.
-		 */
-		_debug("exclusive chan 0");
-		conn = candidate;
-		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
-		spin_lock(&conn->channel_lock);
-		chan = 0;
-		goto found_channel;
+		call->conn = candidate;
+		call->security_ix = candidate->security_ix;
+		_leave(" = 0 [exclusive %d]", candidate->debug_id);
+		return 0;
 	}
 
-	/* We need to redo the search before attempting to add a new connection
-	 * lest we race with someone else adding a conflicting instance.
+	/* Publish the new connection for userspace to find.  We need to redo
+	 * the search before doing this lest we race with someone else adding a
+	 * conflicting instance.
 	 */
 	_debug("search 2");
 	spin_lock(&local->client_conns_lock);
@@ -249,124 +377,711 @@
 		parent = *pp;
 		conn = rb_entry(parent, struct rxrpc_connection, client_node);
 
+#define cmp(X) ((long)conn->params.X - (long)candidate->params.X)
 		diff = (cmp(peer) ?:
 			cmp(key) ?:
 			cmp(security_level));
-		if (diff < 0)
+#undef cmp
+		if (diff < 0) {
 			pp = &(*pp)->rb_left;
-		else if (diff > 0)
+		} else if (diff > 0) {
 			pp = &(*pp)->rb_right;
-		else
-			goto found_extant_conn;
+		} else {
+			if (rxrpc_may_reuse_conn(conn) &&
+			    rxrpc_get_connection_maybe(conn))
+				goto found_extant_conn;
+			/* The old connection is from an outdated epoch. */
+			_debug("replace conn");
+			clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+			rb_replace_node(&conn->client_node,
+					&candidate->client_node,
+					&local->client_conns);
+			trace_rxrpc_client(conn, -1, rxrpc_client_replace);
+			goto candidate_published;
+		}
 	}
 
-	/* The second search also failed; simply add the new connection with
-	 * the new call in channel 0.  Note that we need to take the channel
-	 * lock before dropping the client conn lock.
-	 */
 	_debug("new conn");
-	set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
 	rb_link_node(&candidate->client_node, parent, pp);
 	rb_insert_color(&candidate->client_node, &local->client_conns);
-attached:
-	conn = candidate;
-	candidate = NULL;
 
-	atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
-	spin_lock(&conn->channel_lock);
+candidate_published:
+	set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+	call->conn = candidate;
+	call->security_ix = candidate->security_ix;
 	spin_unlock(&local->client_conns_lock);
-	chan = 0;
-
-found_channel:
-	_debug("found chan");
-	call->conn	= conn;
-	call->channel	= chan;
-	call->epoch	= conn->proto.epoch;
-	call->cid	= conn->proto.cid | chan;
-	call->call_id	= ++conn->channels[chan].call_counter;
-	conn->channels[chan].call_id = call->call_id;
-	rcu_assign_pointer(conn->channels[chan].call, call);
-
-	_net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
-
-	spin_unlock(&conn->channel_lock);
-	rxrpc_put_peer(cp->peer);
-	cp->peer = NULL;
-	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
+	_leave(" = 0 [new %d]", candidate->debug_id);
 	return 0;
 
-	/* We found a potentially suitable connection already in existence.  If
-	 * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
-	 * reaper), discard any candidate we may have allocated, and try to get
-	 * a channel on this one, otherwise we have to replace it.
+	/* We come here if we found a suitable connection already in existence.
+	 * Discard any candidate we may have allocated, and try to get a
+	 * channel on this one.
 	 */
 found_extant_conn:
 	_debug("found conn");
-	if (!rxrpc_get_connection_maybe(conn)) {
-		set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
-		rb_replace_node(&conn->client_node,
-				&candidate->client_node,
-				&local->client_conns);
-		clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
-		goto attached;
-	}
-
 	spin_unlock(&local->client_conns_lock);
 
-	rxrpc_put_connection(candidate);
-
-	if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
-		if (!gfpflags_allow_blocking(gfp)) {
-			rxrpc_put_connection(conn);
-			_leave(" = -EAGAIN");
-			return -EAGAIN;
-		}
-
-		add_wait_queue(&conn->channel_wq, &myself);
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (atomic_add_unless(&conn->avail_chans, -1, 0))
-				break;
-			if (signal_pending(current))
-				goto interrupted;
-			schedule();
-		}
-		remove_wait_queue(&conn->channel_wq, &myself);
-		__set_current_state(TASK_RUNNING);
+	if (candidate) {
+		trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate);
+		rxrpc_put_connection(candidate);
+		candidate = NULL;
 	}
 
-	/* The connection allegedly now has a free channel and we can now
-	 * attach the call to it.
-	 */
 	spin_lock(&conn->channel_lock);
+	call->conn = conn;
+	call->security_ix = conn->security_ix;
+	list_add(&call->chan_wait_link, &conn->waiting_calls);
+	spin_unlock(&conn->channel_lock);
+	_leave(" = 0 [extant %d]", conn->debug_id);
+	return 0;
 
-	for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
-		if (!conn->channels[chan].call)
-			goto found_channel;
-	BUG();
-
-interrupted:
-	remove_wait_queue(&conn->channel_wq, &myself);
-	__set_current_state(TASK_RUNNING);
-	rxrpc_put_connection(conn);
+error_peer:
 	rxrpc_put_peer(cp->peer);
 	cp->peer = NULL;
-	_leave(" = -ERESTARTSYS");
-	return -ERESTARTSYS;
+error:
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
- * Remove a client connection from the local endpoint's tree, thereby removing
- * it as a target for reuse for new client calls.
+ * Activate a connection.
  */
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+static void rxrpc_activate_conn(struct rxrpc_connection *conn)
 {
-	struct rxrpc_local *local = conn->params.local;
+	trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
+	conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+	rxrpc_nr_active_client_conns++;
+	list_move_tail(&conn->cache_link, &rxrpc_active_client_conns);
+}
 
-	spin_lock(&local->client_conns_lock);
-	if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
-		rb_erase(&conn->client_node, &local->client_conns);
-	spin_unlock(&local->client_conns_lock);
+/*
+ * Attempt to animate a connection for a new call.
+ *
+ * If it's not exclusive, the connection is in the endpoint tree, and we're in
+ * the conn's list of those waiting to grab a channel.  There is, however, a
+ * limit on the number of live connections allowed at any one time, so we may
+ * have to wait for capacity to become available.
+ *
+ * Note that a connection on the waiting queue might *also* have active
+ * channels if it has been culled to make space and then re-requested by a new
+ * call.
+ */
+static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
+{
+	unsigned int nr_conns;
+
+	_enter("%d,%d", conn->debug_id, conn->cache_state);
+
+	if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
+		goto out;
+
+	spin_lock(&rxrpc_client_conn_cache_lock);
+
+	nr_conns = rxrpc_nr_client_conns;
+	if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
+		trace_rxrpc_client(conn, -1, rxrpc_client_count);
+		rxrpc_nr_client_conns = nr_conns + 1;
+	}
+
+	switch (conn->cache_state) {
+	case RXRPC_CONN_CLIENT_ACTIVE:
+	case RXRPC_CONN_CLIENT_WAITING:
+		break;
+
+	case RXRPC_CONN_CLIENT_INACTIVE:
+	case RXRPC_CONN_CLIENT_CULLED:
+	case RXRPC_CONN_CLIENT_IDLE:
+		if (nr_conns >= rxrpc_max_client_connections)
+			goto wait_for_capacity;
+		goto activate_conn;
+
+	default:
+		BUG();
+	}
+
+out_unlock:
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+out:
+	_leave(" [%d]", conn->cache_state);
+	return;
+
+activate_conn:
+	_debug("activate");
+	rxrpc_activate_conn(conn);
+	goto out_unlock;
+
+wait_for_capacity:
+	_debug("wait");
+	trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
+	conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+	list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns);
+	goto out_unlock;
+}
+
+/*
+ * Deactivate a channel.
+ */
+static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn,
+					 unsigned int channel)
+{
+	struct rxrpc_channel *chan = &conn->channels[channel];
+
+	rcu_assign_pointer(chan->call, NULL);
+	conn->active_chans &= ~(1 << channel);
+}
+
+/*
+ * Assign a channel to the call at the front of the queue and wake the call up.
+ * We don't increment the callNumber counter until this number has been exposed
+ * to the world.
+ */
+static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
+				       unsigned int channel)
+{
+	struct rxrpc_channel *chan = &conn->channels[channel];
+	struct rxrpc_call *call = list_entry(conn->waiting_calls.next,
+					     struct rxrpc_call, chan_wait_link);
+	u32 call_id = chan->call_counter + 1;
+
+	trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
+
+	write_lock_bh(&call->state_lock);
+	call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+	write_unlock_bh(&call->state_lock);
+
+	rxrpc_see_call(call);
+	list_del_init(&call->chan_wait_link);
+	conn->active_chans |= 1 << channel;
+	call->peer	= rxrpc_get_peer(conn->params.peer);
+	call->cid	= conn->proto.cid | channel;
+	call->call_id	= call_id;
+
+	_net("CONNECT call %08x:%08x as call %d on conn %d",
+	     call->cid, call->call_id, call->debug_id, conn->debug_id);
+
+	/* Paired with the read barrier in rxrpc_wait_for_channel().  This
+	 * orders cid and epoch in the connection wrt to call_id without the
+	 * need to take the channel_lock.
+	 *
+	 * We provisionally assign a callNumber at this point, but we don't
+	 * confirm it until the call is about to be exposed.
+	 *
+	 * TODO: Pair with a barrier in the data_ready handler when that looks
+	 * at the call ID through a connection channel.
+	 */
+	smp_wmb();
+	chan->call_id	= call_id;
+	rcu_assign_pointer(chan->call, call);
+	wake_up(&call->waitq);
+}
+
+/*
+ * Assign channels and callNumbers to waiting calls with channel_lock
+ * held by caller.
+ */
+static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn)
+{
+	u8 avail, mask;
+
+	switch (conn->cache_state) {
+	case RXRPC_CONN_CLIENT_ACTIVE:
+		mask = RXRPC_ACTIVE_CHANS_MASK;
+		break;
+	default:
+		return;
+	}
+
+	while (!list_empty(&conn->waiting_calls) &&
+	       (avail = ~conn->active_chans,
+		avail &= mask,
+		avail != 0))
+		rxrpc_activate_one_channel(conn, __ffs(avail));
+}
+
+/*
+ * Assign channels and callNumbers to waiting calls.
+ */
+static void rxrpc_activate_channels(struct rxrpc_connection *conn)
+{
+	_enter("%d", conn->debug_id);
+
+	trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans);
+
+	if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK)
+		return;
+
+	spin_lock(&conn->channel_lock);
+	rxrpc_activate_channels_locked(conn);
+	spin_unlock(&conn->channel_lock);
+	_leave("");
+}
+
+/*
+ * Wait for a callNumber and a channel to be granted to a call.
+ */
+static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
+{
+	int ret = 0;
+
+	_enter("%d", call->debug_id);
+
+	if (!call->call_id) {
+		DECLARE_WAITQUEUE(myself, current);
+
+		if (!gfpflags_allow_blocking(gfp)) {
+			ret = -EAGAIN;
+			goto out;
+		}
+
+		add_wait_queue_exclusive(&call->waitq, &myself);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (call->call_id)
+				break;
+			if (signal_pending(current)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
+			schedule();
+		}
+		remove_wait_queue(&call->waitq, &myself);
+		__set_current_state(TASK_RUNNING);
+	}
+
+	/* Paired with the write barrier in rxrpc_activate_one_channel(). */
+	smp_rmb();
+
+out:
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+		       struct rxrpc_conn_parameters *cp,
+		       struct sockaddr_rxrpc *srx,
+		       gfp_t gfp)
+{
+	int ret;
+
+	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+	rxrpc_discard_expired_client_conns(NULL);
+	rxrpc_cull_active_client_conns();
+
+	ret = rxrpc_get_client_conn(call, cp, srx, gfp);
+	if (ret < 0)
+		return ret;
+
+	rxrpc_animate_client_conn(call->conn);
+	rxrpc_activate_channels(call->conn);
+
+	ret = rxrpc_wait_for_channel(call, gfp);
+	if (ret < 0)
+		rxrpc_disconnect_client_call(call);
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Note that a connection is about to be exposed to the world.  Once it is
+ * exposed, we maintain an extra ref on it that stops it from being summarily
+ * discarded before it's (a) had a chance to deal with retransmission and (b)
+ * had a chance at re-use (the per-connection security negotiation is
+ * expensive).
+ */
+static void rxrpc_expose_client_conn(struct rxrpc_connection *conn,
+				     unsigned int channel)
+{
+	if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+		trace_rxrpc_client(conn, channel, rxrpc_client_exposed);
+		rxrpc_get_connection(conn);
+	}
+}
+
+/*
+ * Note that a call, and thus a connection, is about to be exposed to the
+ * world.
+ */
+void rxrpc_expose_client_call(struct rxrpc_call *call)
+{
+	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_channel *chan = &conn->channels[channel];
+
+	if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+		/* Mark the call ID as being used.  If the callNumber counter
+		 * exceeds ~2 billion, we kill the connection after its
+		 * outstanding calls have finished so that the counter doesn't
+		 * wrap.
+		 */
+		chan->call_counter++;
+		if (chan->call_counter >= INT_MAX)
+			set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+		rxrpc_expose_client_conn(conn, channel);
+	}
+}
+
+/*
+ * Disconnect a client call.
+ */
+void rxrpc_disconnect_client_call(struct rxrpc_call *call)
+{
+	unsigned int channel = call->cid & RXRPC_CHANNELMASK;
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_channel *chan = &conn->channels[channel];
+
+	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
+	call->conn = NULL;
+
+	spin_lock(&conn->channel_lock);
+
+	/* Calls that have never actually been assigned a channel can simply be
+	 * discarded.  If the conn didn't get used either, it will follow
+	 * immediately unless someone else grabs it in the meantime.
+	 */
+	if (!list_empty(&call->chan_wait_link)) {
+		_debug("call is waiting");
+		ASSERTCMP(call->call_id, ==, 0);
+		ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
+		list_del_init(&call->chan_wait_link);
+
+		trace_rxrpc_client(conn, channel, rxrpc_client_chan_unstarted);
+
+		/* We must deactivate or idle the connection if it's now
+		 * waiting for nothing.
+		 */
+		spin_lock(&rxrpc_client_conn_cache_lock);
+		if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING &&
+		    list_empty(&conn->waiting_calls) &&
+		    !conn->active_chans)
+			goto idle_connection;
+		goto out;
+	}
+
+	ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+
+	/* If a client call was exposed to the world, we save the result for
+	 * retransmission.
+	 *
+	 * We use a barrier here so that the call number and abort code can be
+	 * read without needing to take a lock.
+	 *
+	 * TODO: Make the incoming packet handler check this and handle
+	 * terminal retransmission without requiring access to the call.
+	 */
+	if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+		_debug("exposed %u,%u", call->call_id, call->abort_code);
+		__rxrpc_disconnect_call(conn, call);
+	}
+
+	/* See if we can pass the channel directly to another call. */
+	if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE &&
+	    !list_empty(&conn->waiting_calls)) {
+		trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
+		rxrpc_activate_one_channel(conn, channel);
+		goto out_2;
+	}
+
+	/* Things are more complex and we need the cache lock.  We might be
+	 * able to simply idle the conn or it might now be lurking on the wait
+	 * list.  It might even get moved back to the active list whilst we're
+	 * waiting for the lock.
+	 */
+	spin_lock(&rxrpc_client_conn_cache_lock);
+
+	switch (conn->cache_state) {
+	case RXRPC_CONN_CLIENT_ACTIVE:
+		if (list_empty(&conn->waiting_calls)) {
+			rxrpc_deactivate_one_channel(conn, channel);
+			if (!conn->active_chans) {
+				rxrpc_nr_active_client_conns--;
+				goto idle_connection;
+			}
+			goto out;
+		}
+
+		trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
+		rxrpc_activate_one_channel(conn, channel);
+		goto out;
+
+	case RXRPC_CONN_CLIENT_CULLED:
+		rxrpc_deactivate_one_channel(conn, channel);
+		ASSERT(list_empty(&conn->waiting_calls));
+		if (!conn->active_chans)
+			goto idle_connection;
+		goto out;
+
+	case RXRPC_CONN_CLIENT_WAITING:
+		rxrpc_deactivate_one_channel(conn, channel);
+		goto out;
+
+	default:
+		BUG();
+	}
+
+out:
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+out_2:
+	spin_unlock(&conn->channel_lock);
+	rxrpc_put_connection(conn);
+	_leave("");
+	return;
+
+idle_connection:
+	/* As no channels remain active, the connection gets deactivated
+	 * immediately or moved to the idle list for a short while.
+	 */
+	if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+		trace_rxrpc_client(conn, channel, rxrpc_client_to_idle);
+		conn->idle_timestamp = jiffies;
+		conn->cache_state = RXRPC_CONN_CLIENT_IDLE;
+		list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns);
+		if (rxrpc_idle_client_conns.next == &conn->cache_link &&
+		    !rxrpc_kill_all_client_conns)
+			queue_delayed_work(rxrpc_workqueue,
+					   &rxrpc_client_conn_reap,
+					   rxrpc_conn_idle_client_expiry);
+	} else {
+		trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
+		conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+		list_del_init(&conn->cache_link);
+	}
+	goto out;
+}
+
+/*
+ * Clean up a dead client connection.
+ */
+static struct rxrpc_connection *
+rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
+{
+	struct rxrpc_connection *next = NULL;
+	struct rxrpc_local *local = conn->params.local;
+	unsigned int nr_conns;
+
+	trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
+
+	if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) {
+		spin_lock(&local->client_conns_lock);
+		if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS,
+				       &conn->flags))
+			rb_erase(&conn->client_node, &local->client_conns);
+		spin_unlock(&local->client_conns_lock);
+	}
 
 	rxrpc_put_client_connection_id(conn);
+
+	ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE);
+
+	if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
+		trace_rxrpc_client(conn, -1, rxrpc_client_uncount);
+		spin_lock(&rxrpc_client_conn_cache_lock);
+		nr_conns = --rxrpc_nr_client_conns;
+
+		if (nr_conns < rxrpc_max_client_connections &&
+		    !list_empty(&rxrpc_waiting_client_conns)) {
+			next = list_entry(rxrpc_waiting_client_conns.next,
+					  struct rxrpc_connection, cache_link);
+			rxrpc_get_connection(next);
+			rxrpc_activate_conn(next);
+		}
+
+		spin_unlock(&rxrpc_client_conn_cache_lock);
+	}
+
+	rxrpc_kill_connection(conn);
+	if (next)
+		rxrpc_activate_channels(next);
+
+	/* We need to get rid of the temporary ref we took upon next, but we
+	 * can't call rxrpc_put_connection() recursively.
+	 */
+	return next;
+}
+
+/*
+ * Clean up a dead client connections.
+ */
+void rxrpc_put_client_conn(struct rxrpc_connection *conn)
+{
+	const void *here = __builtin_return_address(0);
+	int n;
+
+	do {
+		n = atomic_dec_return(&conn->usage);
+		trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here);
+		if (n > 0)
+			return;
+		ASSERTCMP(n, >=, 0);
+
+		conn = rxrpc_put_one_client_conn(conn);
+	} while (conn);
+}
+
+/*
+ * Kill the longest-active client connections to make room for new ones.
+ */
+static void rxrpc_cull_active_client_conns(void)
+{
+	struct rxrpc_connection *conn;
+	unsigned int nr_conns = rxrpc_nr_client_conns;
+	unsigned int nr_active, limit;
+
+	_enter("");
+
+	ASSERTCMP(nr_conns, >=, 0);
+	if (nr_conns < rxrpc_max_client_connections) {
+		_leave(" [ok]");
+		return;
+	}
+	limit = rxrpc_reap_client_connections;
+
+	spin_lock(&rxrpc_client_conn_cache_lock);
+	nr_active = rxrpc_nr_active_client_conns;
+
+	while (nr_active > limit) {
+		ASSERT(!list_empty(&rxrpc_active_client_conns));
+		conn = list_entry(rxrpc_active_client_conns.next,
+				  struct rxrpc_connection, cache_link);
+		ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
+
+		if (list_empty(&conn->waiting_calls)) {
+			trace_rxrpc_client(conn, -1, rxrpc_client_to_culled);
+			conn->cache_state = RXRPC_CONN_CLIENT_CULLED;
+			list_del_init(&conn->cache_link);
+		} else {
+			trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
+			conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+			list_move_tail(&conn->cache_link,
+				       &rxrpc_waiting_client_conns);
+		}
+
+		nr_active--;
+	}
+
+	rxrpc_nr_active_client_conns = nr_active;
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+	ASSERTCMP(nr_active, >=, 0);
+	_leave(" [culled]");
+}
+
+/*
+ * Discard expired client connections from the idle list.  Each conn in the
+ * idle list has been exposed and holds an extra ref because of that.
+ *
+ * This may be called from conn setup or from a work item so cannot be
+ * considered non-reentrant.
+ */
+static void rxrpc_discard_expired_client_conns(struct work_struct *work)
+{
+	struct rxrpc_connection *conn;
+	unsigned long expiry, conn_expires_at, now;
+	unsigned int nr_conns;
+	bool did_discard = false;
+
+	_enter("%c", work ? 'w' : 'n');
+
+	if (list_empty(&rxrpc_idle_client_conns)) {
+		_leave(" [empty]");
+		return;
+	}
+
+	/* Don't double up on the discarding */
+	if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) {
+		_leave(" [already]");
+		return;
+	}
+
+	/* We keep an estimate of what the number of conns ought to be after
+	 * we've discarded some so that we don't overdo the discarding.
+	 */
+	nr_conns = rxrpc_nr_client_conns;
+
+next:
+	spin_lock(&rxrpc_client_conn_cache_lock);
+
+	if (list_empty(&rxrpc_idle_client_conns))
+		goto out;
+
+	conn = list_entry(rxrpc_idle_client_conns.next,
+			  struct rxrpc_connection, cache_link);
+	ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags));
+
+	if (!rxrpc_kill_all_client_conns) {
+		/* If the number of connections is over the reap limit, we
+		 * expedite discard by reducing the expiry timeout.  We must,
+		 * however, have at least a short grace period to be able to do
+		 * final-ACK or ABORT retransmission.
+		 */
+		expiry = rxrpc_conn_idle_client_expiry;
+		if (nr_conns > rxrpc_reap_client_connections)
+			expiry = rxrpc_conn_idle_client_fast_expiry;
+
+		conn_expires_at = conn->idle_timestamp + expiry;
+
+		now = READ_ONCE(jiffies);
+		if (time_after(conn_expires_at, now))
+			goto not_yet_expired;
+	}
+
+	trace_rxrpc_client(conn, -1, rxrpc_client_discard);
+	if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+		BUG();
+	conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+	list_del_init(&conn->cache_link);
+
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+
+	/* When we cleared the EXPOSED flag, we took on responsibility for the
+	 * reference that that had on the usage count.  We deal with that here.
+	 * If someone re-sets the flag and re-gets the ref, that's fine.
+	 */
+	rxrpc_put_connection(conn);
+	did_discard = true;
+	nr_conns--;
+	goto next;
+
+not_yet_expired:
+	/* The connection at the front of the queue hasn't yet expired, so
+	 * schedule the work item for that point if we discarded something.
+	 *
+	 * We don't worry if the work item is already scheduled - it can look
+	 * after rescheduling itself at a later time.  We could cancel it, but
+	 * then things get messier.
+	 */
+	_debug("not yet");
+	if (!rxrpc_kill_all_client_conns)
+		queue_delayed_work(rxrpc_workqueue,
+				   &rxrpc_client_conn_reap,
+				   conn_expires_at - now);
+
+out:
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+	spin_unlock(&rxrpc_client_conn_discard_mutex);
+	_leave("");
+}
+
+/*
+ * Preemptively destroy all the client connection records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_client_connections(void)
+{
+	_enter("");
+
+	spin_lock(&rxrpc_client_conn_cache_lock);
+	rxrpc_kill_all_client_conns = true;
+	spin_unlock(&rxrpc_client_conn_cache_lock);
+
+	cancel_delayed_work(&rxrpc_client_conn_reap);
+
+	if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0))
+		_debug("destroy: queue failed");
+
+	_leave("");
 }
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index cee0f35..3f9d8d7 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -15,20 +15,128 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/errqueue.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
 #include "ar-internal.h"
 
 /*
+ * Retransmit terminal ACK or ABORT of the previous call.
+ */
+static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
+				       struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_channel *chan;
+	struct msghdr msg;
+	struct kvec iov;
+	struct {
+		struct rxrpc_wire_header whdr;
+		union {
+			struct {
+				__be32 code;
+			} abort;
+			struct {
+				struct rxrpc_ackpacket ack;
+				u8 padding[3];
+				struct rxrpc_ackinfo info;
+			};
+		};
+	} __attribute__((packed)) pkt;
+	size_t len;
+	u32 serial, mtu, call_id;
+
+	_enter("%d", conn->debug_id);
+
+	chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK];
+
+	/* If the last call got moved on whilst we were waiting to run, just
+	 * ignore this packet.
+	 */
+	call_id = READ_ONCE(chan->last_call);
+	/* Sync with __rxrpc_disconnect_call() */
+	smp_rmb();
+	if (call_id != sp->hdr.callNumber)
+		return;
+
+	msg.msg_name	= &conn->params.peer->srx.transport;
+	msg.msg_namelen	= conn->params.peer->srx.transport_len;
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	pkt.whdr.epoch		= htonl(sp->hdr.epoch);
+	pkt.whdr.cid		= htonl(sp->hdr.cid);
+	pkt.whdr.callNumber	= htonl(sp->hdr.callNumber);
+	pkt.whdr.seq		= 0;
+	pkt.whdr.type		= chan->last_type;
+	pkt.whdr.flags		= conn->out_clientflag;
+	pkt.whdr.userStatus	= 0;
+	pkt.whdr.securityIndex	= conn->security_ix;
+	pkt.whdr._rsvd		= 0;
+	pkt.whdr.serviceId	= htons(chan->last_service_id);
+
+	len = sizeof(pkt.whdr);
+	switch (chan->last_type) {
+	case RXRPC_PACKET_TYPE_ABORT:
+		pkt.abort.code	= htonl(chan->last_abort);
+		len += sizeof(pkt.abort);
+		break;
+
+	case RXRPC_PACKET_TYPE_ACK:
+		mtu = conn->params.peer->if_mtu;
+		mtu -= conn->params.peer->hdrsize;
+		pkt.ack.bufferSpace	= 0;
+		pkt.ack.maxSkew		= htons(skb->priority);
+		pkt.ack.firstPacket	= htonl(chan->last_seq);
+		pkt.ack.previousPacket	= htonl(chan->last_seq - 1);
+		pkt.ack.serial		= htonl(sp->hdr.serial);
+		pkt.ack.reason		= RXRPC_ACK_DUPLICATE;
+		pkt.ack.nAcks		= 0;
+		pkt.info.rxMTU		= htonl(rxrpc_rx_mtu);
+		pkt.info.maxMTU		= htonl(mtu);
+		pkt.info.rwind		= htonl(rxrpc_rx_window_size);
+		pkt.info.jumbo_max	= htonl(rxrpc_rx_jumbo_max);
+		pkt.whdr.flags		|= RXRPC_SLOW_START_OK;
+		len += sizeof(pkt.ack) + sizeof(pkt.info);
+		break;
+	}
+
+	/* Resync with __rxrpc_disconnect_call() and check that the last call
+	 * didn't get advanced whilst we were filling out the packets.
+	 */
+	smp_rmb();
+	if (READ_ONCE(chan->last_call) != call_id)
+		return;
+
+	iov.iov_base	= &pkt;
+	iov.iov_len	= len;
+
+	serial = atomic_inc_return(&conn->serial);
+	pkt.whdr.serial = htonl(serial);
+
+	switch (chan->last_type) {
+	case RXRPC_PACKET_TYPE_ABORT:
+		_proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
+		break;
+	case RXRPC_PACKET_TYPE_ACK:
+		trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0,
+				   RXRPC_ACK_DUPLICATE, 0);
+		_proto("Tx ACK %%%u [re]", serial);
+		break;
+	}
+
+	kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+	_leave("");
+	return;
+}
+
+/*
  * pass a connection-level abort onto all calls on that connection
  */
-static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
-			      u32 abort_code)
+static void rxrpc_abort_calls(struct rxrpc_connection *conn,
+			      enum rxrpc_call_completion compl,
+			      u32 abort_code, int error)
 {
 	struct rxrpc_call *call;
 	int i;
@@ -41,19 +149,15 @@
 		call = rcu_dereference_protected(
 			conn->channels[i].call,
 			lockdep_is_held(&conn->channel_lock));
-		write_lock_bh(&call->state_lock);
-		if (call->state <= RXRPC_CALL_COMPLETE) {
-			call->state = state;
-			if (state == RXRPC_CALL_LOCALLY_ABORTED) {
-				call->local_abort = conn->local_abort;
-				set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
-			} else {
-				call->remote_abort = conn->remote_abort;
-				set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
-			}
-			rxrpc_queue_call(call);
+		if (call) {
+			if (compl == RXRPC_CALL_LOCALLY_ABORTED)
+				trace_rxrpc_abort("CON", call->cid,
+						  call->call_id, 0,
+						  abort_code, error);
+			if (rxrpc_set_call_completion(call, compl,
+						      abort_code, error))
+				rxrpc_notify_socket(call);
 		}
-		write_unlock_bh(&call->state_lock);
 	}
 
 	spin_unlock(&conn->channel_lock);
@@ -78,17 +182,16 @@
 
 	/* generate a connection-level abort */
 	spin_lock_bh(&conn->state_lock);
-	if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) {
-		conn->state = RXRPC_CONN_LOCALLY_ABORTED;
-		conn->error = error;
-		spin_unlock_bh(&conn->state_lock);
-	} else {
+	if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
 		spin_unlock_bh(&conn->state_lock);
 		_leave(" = 0 [already dead]");
 		return 0;
 	}
 
-	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
+	conn->state = RXRPC_CONN_LOCALLY_ABORTED;
+	spin_unlock_bh(&conn->state_lock);
+
+	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error);
 
 	msg.msg_name	= &conn->params.peer->srx.transport;
 	msg.msg_namelen	= conn->params.peer->srx.transport_len;
@@ -132,17 +235,18 @@
 
 /*
  * mark a call as being on a now-secured channel
- * - must be called with softirqs disabled
+ * - must be called with BH's disabled.
  */
 static void rxrpc_call_is_secure(struct rxrpc_call *call)
 {
 	_enter("%p", call);
 	if (call) {
-		read_lock(&call->state_lock);
-		if (call->state < RXRPC_CALL_COMPLETE &&
-		    !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events))
-			rxrpc_queue_call(call);
-		read_unlock(&call->state_lock);
+		write_lock_bh(&call->state_lock);
+		if (call->state == RXRPC_CALL_SERVER_SECURING) {
+			call->state = RXRPC_CALL_SERVER_ACCEPTING;
+			rxrpc_notify_socket(call);
+		}
+		write_unlock_bh(&call->state_lock);
 	}
 }
 
@@ -159,22 +263,28 @@
 	int loop, ret;
 
 	if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
-		kleave(" = -ECONNABORTED [%u]", conn->state);
+		_leave(" = -ECONNABORTED [%u]", conn->state);
 		return -ECONNABORTED;
 	}
 
 	_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
 
 	switch (sp->hdr.type) {
+	case RXRPC_PACKET_TYPE_DATA:
+	case RXRPC_PACKET_TYPE_ACK:
+		rxrpc_conn_retransmit_call(conn, skb);
+		return 0;
+
 	case RXRPC_PACKET_TYPE_ABORT:
-		if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
+		if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+				  &wtmp, sizeof(wtmp)) < 0)
 			return -EPROTO;
 		abort_code = ntohl(wtmp);
 		_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
 
 		conn->state = RXRPC_CONN_REMOTELY_ABORTED;
 		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
-				  abort_code);
+				  abort_code, ECONNABORTED);
 		return -ECONNABORTED;
 
 	case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -199,14 +309,16 @@
 
 		if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
 			conn->state = RXRPC_CONN_SERVICE;
+			spin_unlock(&conn->state_lock);
 			for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
 				rxrpc_call_is_secure(
 					rcu_dereference_protected(
 						conn->channels[loop].call,
 						lockdep_is_held(&conn->channel_lock)));
+		} else {
+			spin_unlock(&conn->state_lock);
 		}
 
-		spin_unlock(&conn->state_lock);
 		spin_unlock(&conn->channel_lock);
 		return 0;
 
@@ -269,7 +381,7 @@
 	u32 abort_code = RX_PROTOCOL_ERROR;
 	int ret;
 
-	_enter("{%d}", conn->debug_id);
+	rxrpc_see_connection(conn);
 
 	if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
 		rxrpc_secure_connection(conn);
@@ -277,6 +389,7 @@
 	/* go through the conn-level event packets, releasing the ref on this
 	 * connection that each one has when we've finished with it */
 	while ((skb = skb_dequeue(&conn->rx_queue))) {
+		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
 		ret = rxrpc_process_event(conn, skb, &abort_code);
 		switch (ret) {
 		case -EPROTO:
@@ -287,7 +400,7 @@
 			goto requeue_and_leave;
 		case -ECONNABORTED:
 		default:
-			rxrpc_free_skb(skb);
+			rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 			break;
 		}
 	}
@@ -304,91 +417,7 @@
 protocol_error:
 	if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
 		goto requeue_and_leave;
-	rxrpc_free_skb(skb);
+	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 	_leave(" [EPROTO]");
 	goto out;
 }
-
-/*
- * put a packet up for transport-level abort
- */
-void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
-{
-	CHECK_SLAB_OKAY(&local->usage);
-
-	skb_queue_tail(&local->reject_queue, skb);
-	rxrpc_queue_local(local);
-}
-
-/*
- * reject packets through the local endpoint
- */
-void rxrpc_reject_packets(struct rxrpc_local *local)
-{
-	union {
-		struct sockaddr sa;
-		struct sockaddr_in sin;
-	} sa;
-	struct rxrpc_skb_priv *sp;
-	struct rxrpc_wire_header whdr;
-	struct sk_buff *skb;
-	struct msghdr msg;
-	struct kvec iov[2];
-	size_t size;
-	__be32 code;
-
-	_enter("%d", local->debug_id);
-
-	iov[0].iov_base = &whdr;
-	iov[0].iov_len = sizeof(whdr);
-	iov[1].iov_base = &code;
-	iov[1].iov_len = sizeof(code);
-	size = sizeof(whdr) + sizeof(code);
-
-	msg.msg_name = &sa;
-	msg.msg_control = NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags = 0;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.sa.sa_family = local->srx.transport.family;
-	switch (sa.sa.sa_family) {
-	case AF_INET:
-		msg.msg_namelen = sizeof(sa.sin);
-		break;
-	default:
-		msg.msg_namelen = 0;
-		break;
-	}
-
-	memset(&whdr, 0, sizeof(whdr));
-	whdr.type = RXRPC_PACKET_TYPE_ABORT;
-
-	while ((skb = skb_dequeue(&local->reject_queue))) {
-		sp = rxrpc_skb(skb);
-		switch (sa.sa.sa_family) {
-		case AF_INET:
-			sa.sin.sin_port = udp_hdr(skb)->source;
-			sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
-			code = htonl(skb->priority);
-
-			whdr.epoch	= htonl(sp->hdr.epoch);
-			whdr.cid	= htonl(sp->hdr.cid);
-			whdr.callNumber	= htonl(sp->hdr.callNumber);
-			whdr.serviceId	= htons(sp->hdr.serviceId);
-			whdr.flags	= sp->hdr.flags;
-			whdr.flags	^= RXRPC_CLIENT_INITIATED;
-			whdr.flags	&= RXRPC_CLIENT_INITIATED;
-
-			kernel_sendmsg(local->socket, &msg, iov, 2, size);
-			break;
-
-		default:
-			break;
-		}
-
-		rxrpc_free_skb(skb);
-	}
-
-	_leave("");
-}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 896d844..e1e83af 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -1,6 +1,6 @@
-/* RxRPC virtual connection handler
+/* RxRPC virtual connection handler, common bits.
  *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -15,8 +15,6 @@
 #include <linux/slab.h>
 #include <linux/net.h>
 #include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
 /*
@@ -27,9 +25,12 @@
 static void rxrpc_connection_reaper(struct work_struct *work);
 
 LIST_HEAD(rxrpc_connections);
+LIST_HEAD(rxrpc_connection_proc_list);
 DEFINE_RWLOCK(rxrpc_connection_lock);
 static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
 
+static void rxrpc_destroy_connection(struct rcu_head *);
+
 /*
  * allocate a new connection
  */
@@ -41,21 +42,18 @@
 
 	conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
 	if (conn) {
+		INIT_LIST_HEAD(&conn->cache_link);
 		spin_lock_init(&conn->channel_lock);
-		init_waitqueue_head(&conn->channel_wq);
+		INIT_LIST_HEAD(&conn->waiting_calls);
 		INIT_WORK(&conn->processor, &rxrpc_process_connection);
+		INIT_LIST_HEAD(&conn->proc_link);
 		INIT_LIST_HEAD(&conn->link);
 		skb_queue_head_init(&conn->rx_queue);
 		conn->security = &rxrpc_no_security;
 		spin_lock_init(&conn->state_lock);
-		/* We maintain an extra ref on the connection whilst it is
-		 * on the rxrpc_connections list.
-		 */
-		atomic_set(&conn->usage, 2);
 		conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
-		atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
 		conn->size_align = 4;
-		conn->header_size = sizeof(struct rxrpc_wire_header);
+		conn->idle_timestamp = jiffies;
 	}
 
 	_leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
@@ -135,6 +133,16 @@
 			    srx.transport.sin.sin_addr.s_addr)
 				goto not_found;
 			break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+		case AF_INET6:
+			if (peer->srx.transport.sin6.sin6_port !=
+			    srx.transport.sin6.sin6_port ||
+			    memcmp(&peer->srx.transport.sin6.sin6_addr,
+				   &srx.transport.sin6.sin6_addr,
+				   sizeof(struct in6_addr)) != 0)
+				goto not_found;
+			break;
+#endif
 		default:
 			BUG();
 		}
@@ -153,25 +161,32 @@
  * terminates.  The caller must hold the channel_lock and must release the
  * call's ref on the connection.
  */
-void __rxrpc_disconnect_call(struct rxrpc_call *call)
+void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
+			     struct rxrpc_call *call)
 {
-	struct rxrpc_connection *conn = call->conn;
-	struct rxrpc_channel *chan = &conn->channels[call->channel];
+	struct rxrpc_channel *chan =
+		&conn->channels[call->cid & RXRPC_CHANNELMASK];
 
-	_enter("%d,%d", conn->debug_id, call->channel);
+	_enter("%d,%x", conn->debug_id, call->cid);
 
 	if (rcu_access_pointer(chan->call) == call) {
 		/* Save the result of the call so that we can repeat it if necessary
 		 * through the channel, whilst disposing of the actual call record.
 		 */
-		chan->last_result = call->local_abort;
+		chan->last_service_id = call->service_id;
+		if (call->abort_code) {
+			chan->last_abort = call->abort_code;
+			chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+		} else {
+			chan->last_seq = call->rx_hard_ack;
+			chan->last_type = RXRPC_PACKET_TYPE_ACK;
+		}
+		/* Sync with rxrpc_conn_retransmit(). */
 		smp_wmb();
 		chan->last_call = chan->call_id;
 		chan->call_id = chan->call_counter;
 
 		rcu_assign_pointer(chan->call, NULL);
-		atomic_inc(&conn->avail_chans);
-		wake_up(&conn->channel_wq);
 	}
 
 	_leave("");
@@ -185,34 +200,122 @@
 {
 	struct rxrpc_connection *conn = call->conn;
 
+	spin_lock_bh(&conn->params.peer->lock);
+	hlist_del_init(&call->error_link);
+	spin_unlock_bh(&conn->params.peer->lock);
+
+	if (rxrpc_is_client_call(call))
+		return rxrpc_disconnect_client_call(call);
+
 	spin_lock(&conn->channel_lock);
-	__rxrpc_disconnect_call(call);
+	__rxrpc_disconnect_call(conn, call);
 	spin_unlock(&conn->channel_lock);
 
 	call->conn = NULL;
+	conn->idle_timestamp = jiffies;
 	rxrpc_put_connection(conn);
 }
 
 /*
- * release a virtual connection
+ * Kill off a connection.
  */
-void rxrpc_put_connection(struct rxrpc_connection *conn)
+void rxrpc_kill_connection(struct rxrpc_connection *conn)
 {
-	if (!conn)
-		return;
+	ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
+	       !rcu_access_pointer(conn->channels[1].call) &&
+	       !rcu_access_pointer(conn->channels[2].call) &&
+	       !rcu_access_pointer(conn->channels[3].call));
+	ASSERT(list_empty(&conn->cache_link));
 
-	_enter("%p{u=%d,d=%d}",
-	       conn, atomic_read(&conn->usage), conn->debug_id);
+	write_lock(&rxrpc_connection_lock);
+	list_del_init(&conn->proc_link);
+	write_unlock(&rxrpc_connection_lock);
 
-	ASSERTCMP(atomic_read(&conn->usage), >, 1);
+	/* Drain the Rx queue.  Note that even though we've unpublished, an
+	 * incoming packet could still be being added to our Rx queue, so we
+	 * will need to drain it again in the RCU cleanup handler.
+	 */
+	rxrpc_purge_queue(&conn->rx_queue);
 
-	conn->put_time = ktime_get_seconds();
-	if (atomic_dec_return(&conn->usage) == 1) {
-		_debug("zombie");
-		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+	/* Leave final destruction to RCU.  The connection processor work item
+	 * must carry a ref on the connection to prevent us getting here whilst
+	 * it is queued or running.
+	 */
+	call_rcu(&conn->rcu, rxrpc_destroy_connection);
+}
+
+/*
+ * Queue a connection's work processor, getting a ref to pass to the work
+ * queue.
+ */
+bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+{
+	const void *here = __builtin_return_address(0);
+	int n = __atomic_add_unless(&conn->usage, 1, 0);
+	if (n == 0)
+		return false;
+	if (rxrpc_queue_work(&conn->processor))
+		trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here);
+	else
+		rxrpc_put_connection(conn);
+	return true;
+}
+
+/*
+ * Note the re-emergence of a connection.
+ */
+void rxrpc_see_connection(struct rxrpc_connection *conn)
+{
+	const void *here = __builtin_return_address(0);
+	if (conn) {
+		int n = atomic_read(&conn->usage);
+
+		trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here);
 	}
+}
 
-	_leave("");
+/*
+ * Get a ref on a connection.
+ */
+void rxrpc_get_connection(struct rxrpc_connection *conn)
+{
+	const void *here = __builtin_return_address(0);
+	int n = atomic_inc_return(&conn->usage);
+
+	trace_rxrpc_conn(conn, rxrpc_conn_got, n, here);
+}
+
+/*
+ * Try to get a ref on a connection.
+ */
+struct rxrpc_connection *
+rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+{
+	const void *here = __builtin_return_address(0);
+
+	if (conn) {
+		int n = __atomic_add_unless(&conn->usage, 1, 0);
+		if (n > 0)
+			trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
+		else
+			conn = NULL;
+	}
+	return conn;
+}
+
+/*
+ * Release a service connection
+ */
+void rxrpc_put_service_conn(struct rxrpc_connection *conn)
+{
+	const void *here = __builtin_return_address(0);
+	int n;
+
+	n = atomic_dec_return(&conn->usage);
+	trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
+	ASSERTCMP(n, >=, 0);
+	if (n == 0)
+		rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
 }
 
 /*
@@ -242,19 +345,19 @@
 }
 
 /*
- * reap dead connections
+ * reap dead service connections
  */
 static void rxrpc_connection_reaper(struct work_struct *work)
 {
 	struct rxrpc_connection *conn, *_p;
-	unsigned long reap_older_than, earliest, put_time, now;
+	unsigned long reap_older_than, earliest, idle_timestamp, now;
 
 	LIST_HEAD(graveyard);
 
 	_enter("");
 
-	now = ktime_get_seconds();
-	reap_older_than =  now - rxrpc_connection_expiry;
+	now = jiffies;
+	reap_older_than = now - rxrpc_connection_expiry * HZ;
 	earliest = ULONG_MAX;
 
 	write_lock(&rxrpc_connection_lock);
@@ -262,11 +365,17 @@
 		ASSERTCMP(atomic_read(&conn->usage), >, 0);
 		if (likely(atomic_read(&conn->usage) > 1))
 			continue;
+		if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
+			continue;
 
-		put_time = READ_ONCE(conn->put_time);
-		if (time_after(put_time, reap_older_than)) {
-			if (time_before(put_time, earliest))
-				earliest = put_time;
+		idle_timestamp = READ_ONCE(conn->idle_timestamp);
+		_debug("reap CONN %d { u=%d,t=%ld }",
+		       conn->debug_id, atomic_read(&conn->usage),
+		       (long)reap_older_than - (long)idle_timestamp);
+
+		if (time_after(idle_timestamp, reap_older_than)) {
+			if (time_before(idle_timestamp, earliest))
+				earliest = idle_timestamp;
 			continue;
 		}
 
@@ -277,7 +386,7 @@
 			continue;
 
 		if (rxrpc_conn_is_client(conn))
-			rxrpc_unpublish_client_conn(conn);
+			BUG();
 		else
 			rxrpc_unpublish_service_conn(conn);
 
@@ -287,9 +396,9 @@
 
 	if (earliest != ULONG_MAX) {
 		_debug("reschedule reaper %ld", (long) earliest - now);
-		ASSERTCMP(earliest, >, now);
+		ASSERT(time_after(earliest, now));
 		rxrpc_queue_delayed_work(&rxrpc_connection_reap,
-					 (earliest - now) * HZ);
+					 earliest - now);
 	}
 
 	while (!list_empty(&graveyard)) {
@@ -298,16 +407,15 @@
 		list_del_init(&conn->link);
 
 		ASSERTCMP(atomic_read(&conn->usage), ==, 0);
-		skb_queue_purge(&conn->rx_queue);
-		call_rcu(&conn->rcu, rxrpc_destroy_connection);
+		rxrpc_kill_connection(conn);
 	}
 
 	_leave("");
 }
 
 /*
- * preemptively destroy all the connection records rather than waiting for them
- * to time out
+ * preemptively destroy all the service connection records rather than
+ * waiting for them to time out
  */
 void __exit rxrpc_destroy_all_connections(void)
 {
@@ -316,6 +424,8 @@
 
 	_enter("");
 
+	rxrpc_destroy_all_client_connections();
+
 	rxrpc_connection_expiry = 0;
 	cancel_delayed_work(&rxrpc_connection_reap);
 	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
@@ -330,6 +440,8 @@
 	write_unlock(&rxrpc_connection_lock);
 	BUG_ON(leak);
 
+	ASSERT(list_empty(&rxrpc_connection_proc_list));
+
 	/* Make sure the local and peer records pinned by any dying connections
 	 * are released.
 	 */
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index fd9027c..eef551f 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -65,9 +65,8 @@
  * Insert a service connection into a peer's tree, thereby making it a target
  * for incoming packets.
  */
-static struct rxrpc_connection *
-rxrpc_publish_service_conn(struct rxrpc_peer *peer,
-			   struct rxrpc_connection *conn)
+static void rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+				       struct rxrpc_connection *conn)
 {
 	struct rxrpc_connection *cursor = NULL;
 	struct rxrpc_conn_proto k = conn->proto;
@@ -96,7 +95,7 @@
 	set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
 	write_sequnlock_bh(&peer->service_conn_lock);
 	_leave(" = %d [new]", conn->debug_id);
-	return conn;
+	return;
 
 found_extant_conn:
 	if (atomic_read(&cursor->usage) == 0)
@@ -119,100 +118,58 @@
 }
 
 /*
- * get a record of an incoming connection
+ * Preallocate a service connection.  The connection is placed on the proc and
+ * reap lists so that we don't have to get the lock from BH context.
  */
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
-						   struct sockaddr_rxrpc *srx,
-						   struct sk_buff *skb)
+struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
 {
-	struct rxrpc_connection *conn;
+	struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp);
+
+	if (conn) {
+		/* We maintain an extra ref on the connection whilst it is on
+		 * the rxrpc_connections list.
+		 */
+		conn->state = RXRPC_CONN_SERVICE_PREALLOC;
+		atomic_set(&conn->usage, 2);
+
+		write_lock(&rxrpc_connection_lock);
+		list_add_tail(&conn->link, &rxrpc_connections);
+		list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
+		write_unlock(&rxrpc_connection_lock);
+
+		trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+				 atomic_read(&conn->usage),
+				 __builtin_return_address(0));
+	}
+
+	return conn;
+}
+
+/*
+ * Set up an incoming connection.  This is called in BH context with the RCU
+ * read lock held.
+ */
+void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
+				   struct sk_buff *skb)
+{
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct rxrpc_peer *peer;
-	const char *new = "old";
 
 	_enter("");
 
-	peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
-	if (!peer) {
-		_debug("no peer");
-		return ERR_PTR(-EBUSY);
-	}
-
-	ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
-
-	rcu_read_lock();
-	peer = rxrpc_lookup_peer_rcu(local, srx);
-	if (peer) {
-		conn = rxrpc_find_service_conn_rcu(peer, skb);
-		if (conn) {
-			if (sp->hdr.securityIndex != conn->security_ix)
-				goto security_mismatch_rcu;
-			if (rxrpc_get_connection_maybe(conn))
-				goto found_extant_connection_rcu;
-
-			/* The conn has expired but we can't remove it without
-			 * the appropriate lock, so we attempt to replace it
-			 * when we have a new candidate.
-			 */
-		}
-
-		if (!rxrpc_get_peer_maybe(peer))
-			peer = NULL;
-	}
-	rcu_read_unlock();
-
-	if (!peer) {
-		peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
-		if (!peer)
-			goto enomem;
-	}
-
-	/* We don't have a matching record yet. */
-	conn = rxrpc_alloc_connection(GFP_NOIO);
-	if (!conn)
-		goto enomem_peer;
-
 	conn->proto.epoch	= sp->hdr.epoch;
 	conn->proto.cid		= sp->hdr.cid & RXRPC_CIDMASK;
-	conn->params.local	= local;
-	conn->params.peer	= peer;
 	conn->params.service_id	= sp->hdr.serviceId;
 	conn->security_ix	= sp->hdr.securityIndex;
 	conn->out_clientflag	= 0;
-	conn->state		= RXRPC_CONN_SERVICE;
-	if (conn->params.service_id)
+	if (conn->security_ix)
 		conn->state	= RXRPC_CONN_SERVICE_UNSECURED;
-
-	rxrpc_get_local(local);
-
-	write_lock(&rxrpc_connection_lock);
-	list_add_tail(&conn->link, &rxrpc_connections);
-	write_unlock(&rxrpc_connection_lock);
+	else
+		conn->state	= RXRPC_CONN_SERVICE;
 
 	/* Make the connection a target for incoming packets. */
-	rxrpc_publish_service_conn(peer, conn);
+	rxrpc_publish_service_conn(conn->params.peer, conn);
 
-	new = "new";
-
-success:
-	_net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
-	_leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
-	return conn;
-
-found_extant_connection_rcu:
-	rcu_read_unlock();
-	goto success;
-
-security_mismatch_rcu:
-	rcu_read_unlock();
-	_leave(" = -EKEYREJECTED");
-	return ERR_PTR(-EKEYREJECTED);
-
-enomem_peer:
-	rxrpc_put_peer(peer);
-enomem:
-	_leave(" = -ENOMEM");
-	return ERR_PTR(-ENOMEM);
+	_net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid);
 }
 
 /*
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 991a20d..3ad9f75 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1,6 +1,6 @@
 /* RxRPC packet reception
  *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
  *
  * This program is free software; you can redistribute it and/or
@@ -27,561 +27,928 @@
 #include <net/net_namespace.h>
 #include "ar-internal.h"
 
-/*
- * queue a packet for recvmsg to pass to userspace
- * - the caller must hold a lock on call->lock
- * - must not be called with interrupts disabled (sk_filter() disables BH's)
- * - eats the packet whether successful or not
- * - there must be just one reference to the packet, which the caller passes to
- *   this function
- */
-int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
-			bool force, bool terminal)
+static void rxrpc_proto_abort(const char *why,
+			      struct rxrpc_call *call, rxrpc_seq_t seq)
 {
-	struct rxrpc_skb_priv *sp;
-	struct rxrpc_sock *rx = call->socket;
-	struct sock *sk;
-	int ret;
-
-	_enter(",,%d,%d", force, terminal);
-
-	ASSERT(!irqs_disabled());
-
-	sp = rxrpc_skb(skb);
-	ASSERTCMP(sp->call, ==, call);
-
-	/* if we've already posted the terminal message for a call, then we
-	 * don't post any more */
-	if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
-		_debug("already terminated");
-		ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
-		skb->destructor = NULL;
-		sp->call = NULL;
-		rxrpc_put_call(call);
-		rxrpc_free_skb(skb);
-		return 0;
+	if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) {
+		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+		rxrpc_queue_call(call);
 	}
-
-	sk = &rx->sk;
-
-	if (!force) {
-		/* cast skb->rcvbuf to unsigned...  It's pointless, but
-		 * reduces number of warnings when compiling with -W
-		 * --ANK */
-//		ret = -ENOBUFS;
-//		if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-//		    (unsigned int) sk->sk_rcvbuf)
-//			goto out;
-
-		ret = sk_filter(sk, skb);
-		if (ret < 0)
-			goto out;
-	}
-
-	spin_lock_bh(&sk->sk_receive_queue.lock);
-	if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) &&
-	    !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-	    call->socket->sk.sk_state != RXRPC_CLOSE) {
-		skb->destructor = rxrpc_packet_destructor;
-		skb->dev = NULL;
-		skb->sk = sk;
-		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
-
-		if (terminal) {
-			_debug("<<<< TERMINAL MESSAGE >>>>");
-			set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
-		}
-
-		/* allow interception by a kernel service */
-		if (rx->interceptor) {
-			rx->interceptor(sk, call->user_call_ID, skb);
-			spin_unlock_bh(&sk->sk_receive_queue.lock);
-		} else {
-			_net("post skb %p", skb);
-			__skb_queue_tail(&sk->sk_receive_queue, skb);
-			spin_unlock_bh(&sk->sk_receive_queue.lock);
-
-			if (!sock_flag(sk, SOCK_DEAD))
-				sk->sk_data_ready(sk);
-		}
-		skb = NULL;
-	} else {
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-	}
-	ret = 0;
-
-out:
-	/* release the socket buffer */
-	if (skb) {
-		skb->destructor = NULL;
-		sp->call = NULL;
-		rxrpc_put_call(call);
-		rxrpc_free_skb(skb);
-	}
-
-	_leave(" = %d", ret);
-	return ret;
 }
 
 /*
- * process a DATA packet, posting the packet to the appropriate queue
- * - eats the packet if successful
+ * Do TCP-style congestion management [RFC 5681].
  */
-static int rxrpc_fast_process_data(struct rxrpc_call *call,
-				   struct sk_buff *skb, u32 seq)
+static void rxrpc_congestion_management(struct rxrpc_call *call,
+					struct sk_buff *skb,
+					struct rxrpc_ack_summary *summary,
+					rxrpc_serial_t acked_serial)
 {
-	struct rxrpc_skb_priv *sp;
-	bool terminal;
-	int ret, ackbit, ack;
+	enum rxrpc_congest_change change = rxrpc_cong_no_change;
+	unsigned int cumulative_acks = call->cong_cumul_acks;
+	unsigned int cwnd = call->cong_cwnd;
+	bool resend = false;
 
-	_enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
+	summary->flight_size =
+		(call->tx_top - call->tx_hard_ack) - summary->nr_acks;
 
-	sp = rxrpc_skb(skb);
-	ASSERTCMP(sp->call, ==, NULL);
-
-	spin_lock(&call->lock);
-
-	if (call->state > RXRPC_CALL_COMPLETE)
-		goto discard;
-
-	ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post);
-	ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv);
-	ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten);
-
-	if (seq < call->rx_data_post) {
-		_debug("dup #%u [-%u]", seq, call->rx_data_post);
-		ack = RXRPC_ACK_DUPLICATE;
-		ret = -ENOBUFS;
-		goto discard_and_ack;
-	}
-
-	/* we may already have the packet in the out of sequence queue */
-	ackbit = seq - (call->rx_data_eaten + 1);
-	ASSERTCMP(ackbit, >=, 0);
-	if (__test_and_set_bit(ackbit, call->ackr_window)) {
-		_debug("dup oos #%u [%u,%u]",
-		       seq, call->rx_data_eaten, call->rx_data_post);
-		ack = RXRPC_ACK_DUPLICATE;
-		goto discard_and_ack;
-	}
-
-	if (seq >= call->ackr_win_top) {
-		_debug("exceed #%u [%u]", seq, call->ackr_win_top);
-		__clear_bit(ackbit, call->ackr_window);
-		ack = RXRPC_ACK_EXCEEDS_WINDOW;
-		goto discard_and_ack;
-	}
-
-	if (seq == call->rx_data_expect) {
-		clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags);
-		call->rx_data_expect++;
-	} else if (seq > call->rx_data_expect) {
-		_debug("oos #%u [%u]", seq, call->rx_data_expect);
-		call->rx_data_expect = seq + 1;
-		if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) {
-			ack = RXRPC_ACK_OUT_OF_SEQUENCE;
-			goto enqueue_and_ack;
+	if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
+		summary->retrans_timeo = true;
+		call->cong_ssthresh = max_t(unsigned int,
+					    summary->flight_size / 2, 2);
+		cwnd = 1;
+		if (cwnd >= call->cong_ssthresh &&
+		    call->cong_mode == RXRPC_CALL_SLOW_START) {
+			call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+			call->cong_tstamp = skb->tstamp;
+			cumulative_acks = 0;
 		}
-		goto enqueue_packet;
 	}
 
-	if (seq != call->rx_data_post) {
-		_debug("ahead #%u [%u]", seq, call->rx_data_post);
-		goto enqueue_packet;
-	}
+	cumulative_acks += summary->nr_new_acks;
+	cumulative_acks += summary->nr_rot_new_acks;
+	if (cumulative_acks > 255)
+		cumulative_acks = 255;
 
-	if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags))
-		goto protocol_error;
+	summary->mode = call->cong_mode;
+	summary->cwnd = call->cong_cwnd;
+	summary->ssthresh = call->cong_ssthresh;
+	summary->cumulative_acks = cumulative_acks;
+	summary->dup_acks = call->cong_dup_acks;
 
-	/* if the packet need security things doing to it, then it goes down
-	 * the slow path */
-	if (call->conn->security_ix)
-		goto enqueue_packet;
-
-	sp->call = call;
-	rxrpc_get_call(call);
-	terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
-		    !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
-	ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
-	if (ret < 0) {
-		if (ret == -ENOMEM || ret == -ENOBUFS) {
-			__clear_bit(ackbit, call->ackr_window);
-			ack = RXRPC_ACK_NOSPACE;
-			goto discard_and_ack;
+	switch (call->cong_mode) {
+	case RXRPC_CALL_SLOW_START:
+		if (summary->nr_nacks > 0)
+			goto packet_loss_detected;
+		if (summary->cumulative_acks > 0)
+			cwnd += 1;
+		if (cwnd >= call->cong_ssthresh) {
+			call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+			call->cong_tstamp = skb->tstamp;
 		}
 		goto out;
+
+	case RXRPC_CALL_CONGEST_AVOIDANCE:
+		if (summary->nr_nacks > 0)
+			goto packet_loss_detected;
+
+		/* We analyse the number of packets that get ACK'd per RTT
+		 * period and increase the window if we managed to fill it.
+		 */
+		if (call->peer->rtt_usage == 0)
+			goto out;
+		if (ktime_before(skb->tstamp,
+				 ktime_add_ns(call->cong_tstamp,
+					      call->peer->rtt)))
+			goto out_no_clear_ca;
+		change = rxrpc_cong_rtt_window_end;
+		call->cong_tstamp = skb->tstamp;
+		if (cumulative_acks >= cwnd)
+			cwnd++;
+		goto out;
+
+	case RXRPC_CALL_PACKET_LOSS:
+		if (summary->nr_nacks == 0)
+			goto resume_normality;
+
+		if (summary->new_low_nack) {
+			change = rxrpc_cong_new_low_nack;
+			call->cong_dup_acks = 1;
+			if (call->cong_extra > 1)
+				call->cong_extra = 1;
+			goto send_extra_data;
+		}
+
+		call->cong_dup_acks++;
+		if (call->cong_dup_acks < 3)
+			goto send_extra_data;
+
+		change = rxrpc_cong_begin_retransmission;
+		call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT;
+		call->cong_ssthresh = max_t(unsigned int,
+					    summary->flight_size / 2, 2);
+		cwnd = call->cong_ssthresh + 3;
+		call->cong_extra = 0;
+		call->cong_dup_acks = 0;
+		resend = true;
+		goto out;
+
+	case RXRPC_CALL_FAST_RETRANSMIT:
+		if (!summary->new_low_nack) {
+			if (summary->nr_new_acks == 0)
+				cwnd += 1;
+			call->cong_dup_acks++;
+			if (call->cong_dup_acks == 2) {
+				change = rxrpc_cong_retransmit_again;
+				call->cong_dup_acks = 0;
+				resend = true;
+			}
+		} else {
+			change = rxrpc_cong_progress;
+			cwnd = call->cong_ssthresh;
+			if (summary->nr_nacks == 0)
+				goto resume_normality;
+		}
+		goto out;
+
+	default:
+		BUG();
+		goto out;
 	}
 
-	skb = NULL;
-
-	_debug("post #%u", seq);
-	ASSERTCMP(call->rx_data_post, ==, seq);
-	call->rx_data_post++;
-
-	if (sp->hdr.flags & RXRPC_LAST_PACKET)
-		set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
-
-	/* if we've reached an out of sequence packet then we need to drain
-	 * that queue into the socket Rx queue now */
-	if (call->rx_data_post == call->rx_first_oos) {
-		_debug("drain rx oos now");
-		read_lock(&call->state_lock);
-		if (call->state < RXRPC_CALL_COMPLETE &&
-		    !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events))
-			rxrpc_queue_call(call);
-		read_unlock(&call->state_lock);
-	}
-
-	spin_unlock(&call->lock);
-	atomic_inc(&call->ackr_not_idle);
-	rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false);
-	_leave(" = 0 [posted]");
-	return 0;
-
-protocol_error:
-	ret = -EBADMSG;
+resume_normality:
+	change = rxrpc_cong_cleared_nacks;
+	call->cong_dup_acks = 0;
+	call->cong_extra = 0;
+	call->cong_tstamp = skb->tstamp;
+	if (cwnd < call->cong_ssthresh)
+		call->cong_mode = RXRPC_CALL_SLOW_START;
+	else
+		call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
 out:
-	spin_unlock(&call->lock);
-	_leave(" = %d", ret);
-	return ret;
-
-discard_and_ack:
-	_debug("discard and ACK packet %p", skb);
-	__rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
-discard:
-	spin_unlock(&call->lock);
-	rxrpc_free_skb(skb);
-	_leave(" = 0 [discarded]");
-	return 0;
-
-enqueue_and_ack:
-	__rxrpc_propose_ACK(call, ack, sp->hdr.serial, true);
-enqueue_packet:
-	_net("defer skb %p", skb);
-	spin_unlock(&call->lock);
-	skb_queue_tail(&call->rx_queue, skb);
-	atomic_inc(&call->ackr_not_idle);
-	read_lock(&call->state_lock);
-	if (call->state < RXRPC_CALL_DEAD)
+	cumulative_acks = 0;
+out_no_clear_ca:
+	if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1)
+		cwnd = RXRPC_RXTX_BUFF_SIZE - 1;
+	call->cong_cwnd = cwnd;
+	call->cong_cumul_acks = cumulative_acks;
+	trace_rxrpc_congest(call, summary, acked_serial, change);
+	if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
 		rxrpc_queue_call(call);
-	read_unlock(&call->state_lock);
-	_leave(" = 0 [queued]");
-	return 0;
+	return;
+
+packet_loss_detected:
+	change = rxrpc_cong_saw_nack;
+	call->cong_mode = RXRPC_CALL_PACKET_LOSS;
+	call->cong_dup_acks = 0;
+	goto send_extra_data;
+
+send_extra_data:
+	/* Send some previously unsent DATA if we have some to advance the ACK
+	 * state.
+	 */
+	if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
+	    RXRPC_TX_ANNO_LAST ||
+	    summary->nr_acks != call->tx_top - call->tx_hard_ack) {
+		call->cong_extra++;
+		wake_up(&call->waitq);
+	}
+	goto out_no_clear_ca;
 }
 
 /*
- * assume an implicit ACKALL of the transmission phase of a client socket upon
- * reception of the first reply packet
+ * Ping the other end to fill our RTT cache and to retrieve the rwind
+ * and MTU parameters.
  */
-static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
+static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb,
+			    int skew)
 {
-	write_lock_bh(&call->state_lock);
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	ktime_t now = skb->tstamp;
+
+	if (call->peer->rtt_usage < 3 ||
+	    ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+				  true, true,
+				  rxrpc_propose_ack_ping_for_params);
+}
+
+/*
+ * Apply a hard ACK by advancing the Tx window.
+ */
+static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
+				   struct rxrpc_ack_summary *summary)
+{
+	struct sk_buff *skb, *list = NULL;
+	int ix;
+	u8 annotation;
+
+	if (call->acks_lowest_nak == call->tx_hard_ack) {
+		call->acks_lowest_nak = to;
+	} else if (before_eq(call->acks_lowest_nak, to)) {
+		summary->new_low_nack = true;
+		call->acks_lowest_nak = to;
+	}
+
+	spin_lock(&call->lock);
+
+	while (before(call->tx_hard_ack, to)) {
+		call->tx_hard_ack++;
+		ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
+		skb = call->rxtx_buffer[ix];
+		annotation = call->rxtx_annotations[ix];
+		rxrpc_see_skb(skb, rxrpc_skb_tx_rotated);
+		call->rxtx_buffer[ix] = NULL;
+		call->rxtx_annotations[ix] = 0;
+		skb->next = list;
+		list = skb;
+
+		if (annotation & RXRPC_TX_ANNO_LAST)
+			set_bit(RXRPC_CALL_TX_LAST, &call->flags);
+		if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK)
+			summary->nr_rot_new_acks++;
+	}
+
+	spin_unlock(&call->lock);
+
+	trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ?
+				    rxrpc_transmit_rotate_last :
+				    rxrpc_transmit_rotate));
+	wake_up(&call->waitq);
+
+	while (list) {
+		skb = list;
+		list = skb->next;
+		skb->next = NULL;
+		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+	}
+}
+
+/*
+ * End the transmission phase of a call.
+ *
+ * This occurs when we get an ACKALL packet, the first DATA packet of a reply,
+ * or a final ACK packet.
+ */
+static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
+			       const char *abort_why)
+{
+
+	ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+
+	write_lock(&call->state_lock);
 
 	switch (call->state) {
+	case RXRPC_CALL_CLIENT_SEND_REQUEST:
 	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
-		call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
-		call->acks_latest = serial;
+		if (reply_begun)
+			call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+		else
+			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+		break;
 
-		_debug("implicit ACKALL %%%u", call->acks_latest);
-		set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events);
-		write_unlock_bh(&call->state_lock);
-
-		if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
-			clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
-			clear_bit(RXRPC_CALL_EV_RESEND, &call->events);
-			clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-		}
+	case RXRPC_CALL_SERVER_AWAIT_ACK:
+		__rxrpc_call_completed(call);
+		rxrpc_notify_socket(call);
 		break;
 
 	default:
-		write_unlock_bh(&call->state_lock);
-		break;
+		goto bad_state;
+	}
+
+	write_unlock(&call->state_lock);
+	if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
+		rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
+				  rxrpc_propose_ack_client_tx_end);
+		trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
+	} else {
+		trace_rxrpc_transmit(call, rxrpc_transmit_end);
+	}
+	_leave(" = ok");
+	return true;
+
+bad_state:
+	write_unlock(&call->state_lock);
+	kdebug("end_tx %s", rxrpc_call_states[call->state]);
+	rxrpc_proto_abort(abort_why, call, call->tx_top);
+	return false;
+}
+
+/*
+ * Begin the reply reception phase of a call.
+ */
+static bool rxrpc_receiving_reply(struct rxrpc_call *call)
+{
+	struct rxrpc_ack_summary summary = { 0 };
+	rxrpc_seq_t top = READ_ONCE(call->tx_top);
+
+	if (call->ackr_reason) {
+		spin_lock_bh(&call->lock);
+		call->ackr_reason = 0;
+		call->resend_at = call->expire_at;
+		call->ack_at = call->expire_at;
+		spin_unlock_bh(&call->lock);
+		rxrpc_set_timer(call, rxrpc_timer_init_for_reply,
+				ktime_get_real());
+	}
+
+	if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+		rxrpc_rotate_tx_window(call, top, &summary);
+	if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
+		rxrpc_proto_abort("TXL", call, top);
+		return false;
+	}
+	if (!rxrpc_end_tx_phase(call, true, "ETD"))
+		return false;
+	call->tx_phase = false;
+	return true;
+}
+
+/*
+ * Scan a jumbo packet to validate its structure and to work out how many
+ * subpackets it contains.
+ *
+ * A jumbo packet is a collection of consecutive packets glued together with
+ * little headers between that indicate how to change the initial header for
+ * each subpacket.
+ *
+ * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but
+ * the last are RXRPC_JUMBO_DATALEN in size.  The last subpacket may be of any
+ * size.
+ */
+static bool rxrpc_validate_jumbo(struct sk_buff *skb)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	unsigned int offset = sizeof(struct rxrpc_wire_header);
+	unsigned int len = skb->len;
+	int nr_jumbo = 1;
+	u8 flags = sp->hdr.flags;
+
+	do {
+		nr_jumbo++;
+		if (len - offset < RXRPC_JUMBO_SUBPKTLEN)
+			goto protocol_error;
+		if (flags & RXRPC_LAST_PACKET)
+			goto protocol_error;
+		offset += RXRPC_JUMBO_DATALEN;
+		if (skb_copy_bits(skb, offset, &flags, 1) < 0)
+			goto protocol_error;
+		offset += sizeof(struct rxrpc_jumbo_header);
+	} while (flags & RXRPC_JUMBO_PACKET);
+
+	sp->nr_jumbo = nr_jumbo;
+	return true;
+
+protocol_error:
+	return false;
+}
+
+/*
+ * Handle reception of a duplicate packet.
+ *
+ * We have to take care to avoid an attack here whereby we're given a series of
+ * jumbograms, each with a sequence number one before the preceding one and
+ * filled up to maximum UDP size.  If they never send us the first packet in
+ * the sequence, they can cause us to have to hold on to around 2MiB of kernel
+ * space until the call times out.
+ *
+ * We limit the space usage by only accepting three duplicate jumbo packets per
+ * call.  After that, we tell the other side we're no longer accepting jumbos
+ * (that information is encoded in the ACK packet).
+ */
+static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
+				 u8 annotation, bool *_jumbo_bad)
+{
+	/* Discard normal packets that are duplicates. */
+	if (annotation == 0)
+		return;
+
+	/* Skip jumbo subpackets that are duplicates.  When we've had three or
+	 * more partially duplicate jumbo packets, we refuse to take any more
+	 * jumbos for this call.
+	 */
+	if (!*_jumbo_bad) {
+		call->nr_jumbo_bad++;
+		*_jumbo_bad = true;
 	}
 }
 
 /*
- * post an incoming packet to the nominated call to deal with
- * - must get rid of the sk_buff, either by freeing it or by queuing it
+ * Process a DATA packet, adding the packet to the Rx ring.
  */
-void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
+static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
+			     u16 skew)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	unsigned int offset = sizeof(struct rxrpc_wire_header);
+	unsigned int ix;
+	rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
+	rxrpc_seq_t seq = sp->hdr.seq, hard_ack;
+	bool immediate_ack = false, jumbo_bad = false, queued;
+	u16 len;
+	u8 ack = 0, flags, annotation = 0;
+
+	_enter("{%u,%u},{%u,%u}",
+	       call->rx_hard_ack, call->rx_top, skb->len, seq);
+
+	_proto("Rx DATA %%%u { #%u f=%02x }",
+	       sp->hdr.serial, seq, sp->hdr.flags);
+
+	if (call->state >= RXRPC_CALL_COMPLETE)
+		return;
+
+	/* Received data implicitly ACKs all of the request packets we sent
+	 * when we're acting as a client.
+	 */
+	if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+	     call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+	    !rxrpc_receiving_reply(call))
+		return;
+
+	call->ackr_prev_seq = seq;
+
+	hard_ack = READ_ONCE(call->rx_hard_ack);
+	if (after(seq, hard_ack + call->rx_winsize)) {
+		ack = RXRPC_ACK_EXCEEDS_WINDOW;
+		ack_serial = serial;
+		goto ack;
+	}
+
+	flags = sp->hdr.flags;
+	if (flags & RXRPC_JUMBO_PACKET) {
+		if (call->nr_jumbo_bad > 3) {
+			ack = RXRPC_ACK_NOSPACE;
+			ack_serial = serial;
+			goto ack;
+		}
+		annotation = 1;
+	}
+
+next_subpacket:
+	queued = false;
+	ix = seq & RXRPC_RXTX_BUFF_MASK;
+	len = skb->len;
+	if (flags & RXRPC_JUMBO_PACKET)
+		len = RXRPC_JUMBO_DATALEN;
+
+	if (flags & RXRPC_LAST_PACKET) {
+		if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+		    seq != call->rx_top)
+			return rxrpc_proto_abort("LSN", call, seq);
+	} else {
+		if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+		    after_eq(seq, call->rx_top))
+			return rxrpc_proto_abort("LSA", call, seq);
+	}
+
+	if (before_eq(seq, hard_ack)) {
+		ack = RXRPC_ACK_DUPLICATE;
+		ack_serial = serial;
+		goto skip;
+	}
+
+	if (flags & RXRPC_REQUEST_ACK && !ack) {
+		ack = RXRPC_ACK_REQUESTED;
+		ack_serial = serial;
+	}
+
+	if (call->rxtx_buffer[ix]) {
+		rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad);
+		if (ack != RXRPC_ACK_DUPLICATE) {
+			ack = RXRPC_ACK_DUPLICATE;
+			ack_serial = serial;
+		}
+		immediate_ack = true;
+		goto skip;
+	}
+
+	/* Queue the packet.  We use a couple of memory barriers here as need
+	 * to make sure that rx_top is perceived to be set after the buffer
+	 * pointer and that the buffer pointer is set after the annotation and
+	 * the skb data.
+	 *
+	 * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
+	 * and also rxrpc_fill_out_ack().
+	 */
+	rxrpc_get_skb(skb, rxrpc_skb_rx_got);
+	call->rxtx_annotations[ix] = annotation;
+	smp_wmb();
+	call->rxtx_buffer[ix] = skb;
+	if (after(seq, call->rx_top)) {
+		smp_store_release(&call->rx_top, seq);
+	} else if (before(seq, call->rx_top)) {
+		/* Send an immediate ACK if we fill in a hole */
+		if (!ack) {
+			ack = RXRPC_ACK_DELAY;
+			ack_serial = serial;
+		}
+		immediate_ack = true;
+	}
+	if (flags & RXRPC_LAST_PACKET) {
+		set_bit(RXRPC_CALL_RX_LAST, &call->flags);
+		trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq);
+	} else {
+		trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq);
+	}
+	queued = true;
+
+	if (after_eq(seq, call->rx_expect_next)) {
+		if (after(seq, call->rx_expect_next)) {
+			_net("OOS %u > %u", seq, call->rx_expect_next);
+			ack = RXRPC_ACK_OUT_OF_SEQUENCE;
+			ack_serial = serial;
+		}
+		call->rx_expect_next = seq + 1;
+	}
+
+skip:
+	offset += len;
+	if (flags & RXRPC_JUMBO_PACKET) {
+		if (skb_copy_bits(skb, offset, &flags, 1) < 0)
+			return rxrpc_proto_abort("XJF", call, seq);
+		offset += sizeof(struct rxrpc_jumbo_header);
+		seq++;
+		serial++;
+		annotation++;
+		if (flags & RXRPC_JUMBO_PACKET)
+			annotation |= RXRPC_RX_ANNO_JLAST;
+		if (after(seq, hard_ack + call->rx_winsize)) {
+			ack = RXRPC_ACK_EXCEEDS_WINDOW;
+			ack_serial = serial;
+			if (!jumbo_bad) {
+				call->nr_jumbo_bad++;
+				jumbo_bad = true;
+			}
+			goto ack;
+		}
+
+		_proto("Rx DATA Jumbo %%%u", serial);
+		goto next_subpacket;
+	}
+
+	if (queued && flags & RXRPC_LAST_PACKET && !ack) {
+		ack = RXRPC_ACK_DELAY;
+		ack_serial = serial;
+	}
+
+ack:
+	if (ack)
+		rxrpc_propose_ACK(call, ack, skew, ack_serial,
+				  immediate_ack, true,
+				  rxrpc_propose_ack_input_data);
+
+	if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1)
+		rxrpc_notify_socket(call);
+	_leave(" [queued]");
+}
+
+/*
+ * Process a requested ACK.
+ */
+static void rxrpc_input_requested_ack(struct rxrpc_call *call,
+				      ktime_t resp_time,
+				      rxrpc_serial_t orig_serial,
+				      rxrpc_serial_t ack_serial)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	ktime_t sent_at;
+	int ix;
+
+	for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) {
+		skb = call->rxtx_buffer[ix];
+		if (!skb)
+			continue;
+
+		sp = rxrpc_skb(skb);
+		if (sp->hdr.serial != orig_serial)
+			continue;
+		smp_rmb();
+		sent_at = skb->tstamp;
+		goto found;
+	}
+	return;
+
+found:
+	rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack,
+			   orig_serial, ack_serial, sent_at, resp_time);
+}
+
+/*
+ * Process a ping response.
+ */
+static void rxrpc_input_ping_response(struct rxrpc_call *call,
+				      ktime_t resp_time,
+				      rxrpc_serial_t orig_serial,
+				      rxrpc_serial_t ack_serial)
+{
+	rxrpc_serial_t ping_serial;
+	ktime_t ping_time;
+
+	ping_time = call->ackr_ping_time;
+	smp_rmb();
+	ping_serial = call->ackr_ping;
+
+	if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
+	    before(orig_serial, ping_serial))
+		return;
+	clear_bit(RXRPC_CALL_PINGING, &call->flags);
+	if (after(orig_serial, ping_serial))
+		return;
+
+	rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response,
+			   orig_serial, ack_serial, ping_time, resp_time);
+}
+
+/*
+ * Process the extra information that may be appended to an ACK packet
+ */
+static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
+				struct rxrpc_ackinfo *ackinfo)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct rxrpc_peer *peer;
+	unsigned int mtu;
+	u32 rwind = ntohl(ackinfo->rwind);
+
+	_proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
+	       sp->hdr.serial,
+	       ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
+	       rwind, ntohl(ackinfo->jumbo_max));
+
+	if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+		rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+	call->tx_winsize = rwind;
+	if (call->cong_ssthresh > rwind)
+		call->cong_ssthresh = rwind;
+
+	mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
+
+	peer = call->peer;
+	if (mtu < peer->maxdata) {
+		spin_lock_bh(&peer->lock);
+		peer->maxdata = mtu;
+		peer->mtu = mtu + peer->hdrsize;
+		spin_unlock_bh(&peer->lock);
+		_net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
+	}
+}
+
+/*
+ * Process individual soft ACKs.
+ *
+ * Each ACK in the array corresponds to one packet and can be either an ACK or
+ * a NAK.  If we get find an explicitly NAK'd packet we resend immediately;
+ * packets that lie beyond the end of the ACK list are scheduled for resend by
+ * the timer on the basis that the peer might just not have processed them at
+ * the time the ACK was sent.
+ */
+static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
+				  rxrpc_seq_t seq, int nr_acks,
+				  struct rxrpc_ack_summary *summary)
+{
+	int ix;
+	u8 annotation, anno_type;
+
+	for (; nr_acks > 0; nr_acks--, seq++) {
+		ix = seq & RXRPC_RXTX_BUFF_MASK;
+		annotation = call->rxtx_annotations[ix];
+		anno_type = annotation & RXRPC_TX_ANNO_MASK;
+		annotation &= ~RXRPC_TX_ANNO_MASK;
+		switch (*acks++) {
+		case RXRPC_ACK_TYPE_ACK:
+			summary->nr_acks++;
+			if (anno_type == RXRPC_TX_ANNO_ACK)
+				continue;
+			summary->nr_new_acks++;
+			call->rxtx_annotations[ix] =
+				RXRPC_TX_ANNO_ACK | annotation;
+			break;
+		case RXRPC_ACK_TYPE_NACK:
+			if (!summary->nr_nacks &&
+			    call->acks_lowest_nak != seq) {
+				call->acks_lowest_nak = seq;
+				summary->new_low_nack = true;
+			}
+			summary->nr_nacks++;
+			if (anno_type == RXRPC_TX_ANNO_NAK)
+				continue;
+			summary->nr_new_nacks++;
+			if (anno_type == RXRPC_TX_ANNO_RETRANS)
+				continue;
+			call->rxtx_annotations[ix] =
+				RXRPC_TX_ANNO_NAK | annotation;
+			break;
+		default:
+			return rxrpc_proto_abort("SFT", call, 0);
+		}
+	}
+}
+
+/*
+ * Process an ACK packet.
+ *
+ * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet
+ * in the ACK array.  Anything before that is hard-ACK'd and may be discarded.
+ *
+ * A hard-ACK means that a packet has been processed and may be discarded; a
+ * soft-ACK means that the packet may be discarded and retransmission
+ * requested.  A phase is complete when all packets are hard-ACK'd.
+ */
+static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
+			    u16 skew)
+{
+	struct rxrpc_ack_summary summary = { 0 };
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	union {
+		struct rxrpc_ackpacket ack;
+		struct rxrpc_ackinfo info;
+		u8 acks[RXRPC_MAXACKS];
+	} buf;
+	rxrpc_serial_t acked_serial;
+	rxrpc_seq_t first_soft_ack, hard_ack;
+	int nr_acks, offset, ioffset;
+
+	_enter("");
+
+	offset = sizeof(struct rxrpc_wire_header);
+	if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) {
+		_debug("extraction failure");
+		return rxrpc_proto_abort("XAK", call, 0);
+	}
+	offset += sizeof(buf.ack);
+
+	acked_serial = ntohl(buf.ack.serial);
+	first_soft_ack = ntohl(buf.ack.firstPacket);
+	hard_ack = first_soft_ack - 1;
+	nr_acks = buf.ack.nAcks;
+	summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
+			      buf.ack.reason : RXRPC_ACK__INVALID);
+
+	trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks);
+
+	_proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+	       sp->hdr.serial,
+	       ntohs(buf.ack.maxSkew),
+	       first_soft_ack,
+	       ntohl(buf.ack.previousPacket),
+	       acked_serial,
+	       rxrpc_ack_names[summary.ack_reason],
+	       buf.ack.nAcks);
+
+	if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
+		rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
+					  sp->hdr.serial);
+	if (buf.ack.reason == RXRPC_ACK_REQUESTED)
+		rxrpc_input_requested_ack(call, skb->tstamp, acked_serial,
+					  sp->hdr.serial);
+
+	if (buf.ack.reason == RXRPC_ACK_PING) {
+		_proto("Rx ACK %%%u PING Request", sp->hdr.serial);
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
+				  skew, sp->hdr.serial, true, true,
+				  rxrpc_propose_ack_respond_to_ping);
+	} else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
+		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
+				  skew, sp->hdr.serial, true, true,
+				  rxrpc_propose_ack_respond_to_ack);
+	}
+
+	ioffset = offset + nr_acks + 3;
+	if (skb->len >= ioffset + sizeof(buf.info)) {
+		if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
+			return rxrpc_proto_abort("XAI", call, 0);
+		rxrpc_input_ackinfo(call, skb, &buf.info);
+	}
+
+	if (first_soft_ack == 0)
+		return rxrpc_proto_abort("AK0", call, 0);
+
+	/* Ignore ACKs unless we are or have just been transmitting. */
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_SEND_REQUEST:
+	case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+	case RXRPC_CALL_SERVER_SEND_REPLY:
+	case RXRPC_CALL_SERVER_AWAIT_ACK:
+		break;
+	default:
+		return;
+	}
+
+	/* Discard any out-of-order or duplicate ACKs. */
+	if (before_eq(sp->hdr.serial, call->acks_latest)) {
+		_debug("discard ACK %d <= %d",
+		       sp->hdr.serial, call->acks_latest);
+		return;
+	}
+	call->acks_latest_ts = skb->tstamp;
+	call->acks_latest = sp->hdr.serial;
+
+	if (before(hard_ack, call->tx_hard_ack) ||
+	    after(hard_ack, call->tx_top))
+		return rxrpc_proto_abort("AKW", call, 0);
+	if (nr_acks > call->tx_top - hard_ack)
+		return rxrpc_proto_abort("AKN", call, 0);
+
+	if (after(hard_ack, call->tx_hard_ack))
+		rxrpc_rotate_tx_window(call, hard_ack, &summary);
+
+	if (nr_acks > 0) {
+		if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0)
+			return rxrpc_proto_abort("XSA", call, 0);
+		rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks,
+				      &summary);
+	}
+
+	if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
+		rxrpc_end_tx_phase(call, false, "ETA");
+		return;
+	}
+
+	if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
+	    RXRPC_TX_ANNO_LAST &&
+	    summary.nr_acks == call->tx_top - hard_ack)
+		rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+				  false, true,
+				  rxrpc_propose_ack_ping_for_lost_reply);
+
+	return rxrpc_congestion_management(call, skb, &summary, acked_serial);
+}
+
+/*
+ * Process an ACKALL packet.
+ */
+static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
+{
+	struct rxrpc_ack_summary summary = { 0 };
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	_proto("Rx ACKALL %%%u", sp->hdr.serial);
+
+	rxrpc_rotate_tx_window(call, call->tx_top, &summary);
+	if (test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+		rxrpc_end_tx_phase(call, false, "ETL");
+}
+
+/*
+ * Process an ABORT packet.
+ */
+static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	__be32 wtmp;
-	u32 hi_serial, abort_code;
+	u32 abort_code = RX_CALL_DEAD;
+
+	_enter("");
+
+	if (skb->len >= 4 &&
+	    skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+			  &wtmp, sizeof(wtmp)) >= 0)
+		abort_code = ntohl(wtmp);
+
+	_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
+
+	if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+				      abort_code, ECONNABORTED))
+		rxrpc_notify_socket(call);
+}
+
+/*
+ * Process an incoming call packet.
+ */
+static void rxrpc_input_call_packet(struct rxrpc_call *call,
+				    struct sk_buff *skb, u16 skew)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
 	_enter("%p,%p", call, skb);
 
-	ASSERT(!irqs_disabled());
-
-#if 0 // INJECT RX ERROR
-	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
-		static int skip = 0;
-		if (++skip == 3) {
-			printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n");
-			skip = 0;
-			goto free_packet;
-		}
-	}
-#endif
-
-	/* track the latest serial number on this connection for ACK packet
-	 * information */
-	hi_serial = atomic_read(&call->conn->hi_serial);
-	while (sp->hdr.serial > hi_serial)
-		hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
-					   sp->hdr.serial);
-
-	/* request ACK generation for any ACK or DATA packet that requests
-	 * it */
-	if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
-		_proto("ACK Requested on %%%u", sp->hdr.serial);
-		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
-	}
-
 	switch (sp->hdr.type) {
-	case RXRPC_PACKET_TYPE_ABORT:
-		_debug("abort");
+	case RXRPC_PACKET_TYPE_DATA:
+		rxrpc_input_data(call, skb, skew);
+		break;
 
-		if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
-			goto protocol_error;
-
-		abort_code = ntohl(wtmp);
-		_proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
-
-		write_lock_bh(&call->state_lock);
-		if (call->state < RXRPC_CALL_COMPLETE) {
-			call->state = RXRPC_CALL_REMOTELY_ABORTED;
-			call->remote_abort = abort_code;
-			set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
-			rxrpc_queue_call(call);
-		}
-		goto free_packet_unlock;
+	case RXRPC_PACKET_TYPE_ACK:
+		rxrpc_input_ack(call, skb, skew);
+		break;
 
 	case RXRPC_PACKET_TYPE_BUSY:
 		_proto("Rx BUSY %%%u", sp->hdr.serial);
 
-		if (rxrpc_conn_is_service(call->conn))
-			goto protocol_error;
+		/* Just ignore BUSY packets from the server; the retry and
+		 * lifespan timers will take care of business.  BUSY packets
+		 * from the client don't make sense.
+		 */
+		break;
 
-		write_lock_bh(&call->state_lock);
-		switch (call->state) {
-		case RXRPC_CALL_CLIENT_SEND_REQUEST:
-			call->state = RXRPC_CALL_SERVER_BUSY;
-			set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
-			rxrpc_queue_call(call);
-		case RXRPC_CALL_SERVER_BUSY:
-			goto free_packet_unlock;
-		default:
-			goto protocol_error_locked;
-		}
+	case RXRPC_PACKET_TYPE_ABORT:
+		rxrpc_input_abort(call, skb);
+		break;
+
+	case RXRPC_PACKET_TYPE_ACKALL:
+		rxrpc_input_ackall(call, skb);
+		break;
 
 	default:
 		_proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
-		goto protocol_error;
-
-	case RXRPC_PACKET_TYPE_DATA:
-		_proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
-
-		if (sp->hdr.seq == 0)
-			goto protocol_error;
-
-		call->ackr_prev_seq = sp->hdr.seq;
-
-		/* received data implicitly ACKs all of the request packets we
-		 * sent when we're acting as a client */
-		if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
-			rxrpc_assume_implicit_ackall(call, sp->hdr.serial);
-
-		switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) {
-		case 0:
-			skb = NULL;
-			goto done;
-
-		default:
-			BUG();
-
-			/* data packet received beyond the last packet */
-		case -EBADMSG:
-			goto protocol_error;
-		}
-
-	case RXRPC_PACKET_TYPE_ACKALL:
-	case RXRPC_PACKET_TYPE_ACK:
-		/* ACK processing is done in process context */
-		read_lock_bh(&call->state_lock);
-		if (call->state < RXRPC_CALL_DEAD) {
-			skb_queue_tail(&call->rx_queue, skb);
-			rxrpc_queue_call(call);
-			skb = NULL;
-		}
-		read_unlock_bh(&call->state_lock);
-		goto free_packet;
-	}
-
-protocol_error:
-	_debug("protocol error");
-	write_lock_bh(&call->state_lock);
-protocol_error_locked:
-	if (call->state <= RXRPC_CALL_COMPLETE) {
-		call->state = RXRPC_CALL_LOCALLY_ABORTED;
-		call->local_abort = RX_PROTOCOL_ERROR;
-		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-		rxrpc_queue_call(call);
-	}
-free_packet_unlock:
-	write_unlock_bh(&call->state_lock);
-free_packet:
-	rxrpc_free_skb(skb);
-done:
-	_leave("");
-}
-
-/*
- * split up a jumbo data packet
- */
-static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
-				       struct sk_buff *jumbo)
-{
-	struct rxrpc_jumbo_header jhdr;
-	struct rxrpc_skb_priv *sp;
-	struct sk_buff *part;
-
-	_enter(",{%u,%u}", jumbo->data_len, jumbo->len);
-
-	sp = rxrpc_skb(jumbo);
-
-	do {
-		sp->hdr.flags &= ~RXRPC_JUMBO_PACKET;
-
-		/* make a clone to represent the first subpacket in what's left
-		 * of the jumbo packet */
-		part = skb_clone(jumbo, GFP_ATOMIC);
-		if (!part) {
-			/* simply ditch the tail in the event of ENOMEM */
-			pskb_trim(jumbo, RXRPC_JUMBO_DATALEN);
-			break;
-		}
-		rxrpc_new_skb(part);
-
-		pskb_trim(part, RXRPC_JUMBO_DATALEN);
-
-		if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN))
-			goto protocol_error;
-
-		if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0)
-			goto protocol_error;
-		if (!pskb_pull(jumbo, sizeof(jhdr)))
-			BUG();
-
-		sp->hdr.seq	+= 1;
-		sp->hdr.serial	+= 1;
-		sp->hdr.flags	= jhdr.flags;
-		sp->hdr._rsvd	= ntohs(jhdr._rsvd);
-
-		_proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
-
-		rxrpc_fast_process_packet(call, part);
-		part = NULL;
-
-	} while (sp->hdr.flags & RXRPC_JUMBO_PACKET);
-
-	rxrpc_fast_process_packet(call, jumbo);
-	_leave("");
-	return;
-
-protocol_error:
-	_debug("protocol error");
-	rxrpc_free_skb(part);
-	rxrpc_free_skb(jumbo);
-	write_lock_bh(&call->state_lock);
-	if (call->state <= RXRPC_CALL_COMPLETE) {
-		call->state = RXRPC_CALL_LOCALLY_ABORTED;
-		call->local_abort = RX_PROTOCOL_ERROR;
-		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-		rxrpc_queue_call(call);
-	}
-	write_unlock_bh(&call->state_lock);
-	_leave("");
-}
-
-/*
- * post an incoming packet to the appropriate call/socket to deal with
- * - must get rid of the sk_buff, either by freeing it or by queuing it
- */
-static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
-				      struct sk_buff *skb)
-{
-	struct rxrpc_skb_priv *sp;
-
-	_enter("%p,%p", call, skb);
-
-	sp = rxrpc_skb(skb);
-
-	_debug("extant call [%d]", call->state);
-
-	read_lock(&call->state_lock);
-	switch (call->state) {
-	case RXRPC_CALL_LOCALLY_ABORTED:
-		if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
-			rxrpc_queue_call(call);
-			goto free_unlock;
-		}
-	case RXRPC_CALL_REMOTELY_ABORTED:
-	case RXRPC_CALL_NETWORK_ERROR:
-	case RXRPC_CALL_DEAD:
-		goto dead_call;
-	case RXRPC_CALL_COMPLETE:
-	case RXRPC_CALL_CLIENT_FINAL_ACK:
-		/* complete server call */
-		if (rxrpc_conn_is_service(call->conn))
-			goto dead_call;
-		/* resend last packet of a completed call */
-		_debug("final ack again");
-		rxrpc_get_call(call);
-		set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
-		rxrpc_queue_call(call);
-		goto free_unlock;
-	default:
 		break;
 	}
 
-	read_unlock(&call->state_lock);
-	rxrpc_get_call(call);
-
-	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
-	    sp->hdr.flags & RXRPC_JUMBO_PACKET)
-		rxrpc_process_jumbo_packet(call, skb);
-	else
-		rxrpc_fast_process_packet(call, skb);
-
-	rxrpc_put_call(call);
-	goto done;
-
-dead_call:
-	if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
-		skb->priority = RX_CALL_DEAD;
-		rxrpc_reject_packet(call->conn->params.local, skb);
-		goto unlock;
-	}
-free_unlock:
-	rxrpc_free_skb(skb);
-unlock:
-	read_unlock(&call->state_lock);
-done:
 	_leave("");
 }
 
 /*
  * post connection-level events to the connection
- * - this includes challenges, responses and some aborts
+ * - this includes challenges, responses, some aborts and call terminal packet
+ *   retransmission.
  */
-static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 				      struct sk_buff *skb)
 {
 	_enter("%p,%p", conn, skb);
 
 	skb_queue_tail(&conn->rx_queue, skb);
-	return rxrpc_queue_conn(conn);
+	rxrpc_queue_conn(conn);
 }
 
 /*
@@ -598,6 +965,17 @@
 }
 
 /*
+ * put a packet up for transport-level abort
+ */
+static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+	CHECK_SLAB_OKAY(&local->usage);
+
+	skb_queue_tail(&local->reject_queue, skb);
+	rxrpc_queue_local(local);
+}
+
+/*
  * Extract the wire header from a packet and translate the byte order.
  */
 static noinline
@@ -608,8 +986,6 @@
 	/* dig out the RxRPC connection details */
 	if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
 		return -EBADMSG;
-	if (!pskb_pull(skb, sizeof(whdr)))
-		BUG();
 
 	memset(sp, 0, sizeof(*sp));
 	sp->hdr.epoch		= ntohl(whdr.epoch);
@@ -634,19 +1010,22 @@
  * shut down and the local endpoint from going away, thus sk_user_data will not
  * be cleared until this function returns.
  */
-void rxrpc_data_ready(struct sock *sk)
+void rxrpc_data_ready(struct sock *udp_sk)
 {
 	struct rxrpc_connection *conn;
+	struct rxrpc_channel *chan;
+	struct rxrpc_call *call;
 	struct rxrpc_skb_priv *sp;
-	struct rxrpc_local *local = sk->sk_user_data;
+	struct rxrpc_local *local = udp_sk->sk_user_data;
 	struct sk_buff *skb;
-	int ret;
+	unsigned int channel;
+	int ret, skew;
 
-	_enter("%p", sk);
+	_enter("%p", udp_sk);
 
 	ASSERT(!irqs_disabled());
 
-	skb = skb_recv_datagram(sk, 0, 1, &ret);
+	skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
 	if (!skb) {
 		if (ret == -EAGAIN)
 			return;
@@ -654,13 +1033,13 @@
 		return;
 	}
 
-	rxrpc_new_skb(skb);
+	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
 
 	_net("recv skb %p", skb);
 
 	/* we'll probably need to checksum it (didn't call sock_recvmsg) */
 	if (skb_checksum_complete(skb)) {
-		rxrpc_free_skb(skb);
+		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 		__UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
 		_leave(" [CSUM failed]");
 		return;
@@ -674,13 +1053,21 @@
 	skb_orphan(skb);
 	sp = rxrpc_skb(skb);
 
-	_net("Rx UDP packet from %08x:%04hu",
-	     ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
-
 	/* dig out the RxRPC connection details */
 	if (rxrpc_extract_header(sp, skb) < 0)
 		goto bad_message;
 
+	if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+		static int lose;
+		if ((lose++ & 7) == 7) {
+			trace_rxrpc_rx_lose(sp);
+			rxrpc_lose_skb(skb, rxrpc_skb_rx_lost);
+			return;
+		}
+	}
+
+	trace_rxrpc_rx_packet(sp);
+
 	_net("Rx RxRPC %s ep=%x call=%x:%x",
 	     sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
 	     sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber);
@@ -691,70 +1078,125 @@
 		goto bad_message;
 	}
 
-	if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) {
+	switch (sp->hdr.type) {
+	case RXRPC_PACKET_TYPE_VERSION:
 		rxrpc_post_packet_to_local(local, skb);
 		goto out;
-	}
 
-	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
-	    (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
-		goto bad_message;
+	case RXRPC_PACKET_TYPE_BUSY:
+		if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
+			goto discard;
+
+	case RXRPC_PACKET_TYPE_DATA:
+		if (sp->hdr.callNumber == 0)
+			goto bad_message;
+		if (sp->hdr.flags & RXRPC_JUMBO_PACKET &&
+		    !rxrpc_validate_jumbo(skb))
+			goto bad_message;
+		break;
+	}
 
 	rcu_read_lock();
 
-retry_find_conn:
 	conn = rxrpc_find_connection_rcu(local, skb);
-	if (!conn)
-		goto cant_route_call;
+	if (conn) {
+		if (sp->hdr.securityIndex != conn->security_ix)
+			goto wrong_security;
 
-	if (sp->hdr.callNumber == 0) {
-		/* Connection-level packet */
-		_debug("CONN %p {%d}", conn, conn->debug_id);
-		if (!rxrpc_post_packet_to_conn(conn, skb))
-			goto retry_find_conn;
-	} else {
-		/* Call-bound packets are routed by connection channel. */
-		unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
-		struct rxrpc_channel *chan = &conn->channels[channel];
-		struct rxrpc_call *call = rcu_dereference(chan->call);
-
-		if (!call || atomic_read(&call->usage) == 0)
-			goto cant_route_call;
-
-		rxrpc_post_packet_to_call(call, skb);
-	}
-
-	rcu_read_unlock();
-out:
-	return;
-
-cant_route_call:
-	rcu_read_unlock();
-
-	_debug("can't route call");
-	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
-	    sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
-		if (sp->hdr.seq == 1) {
-			_debug("first packet");
-			skb_queue_tail(&local->accept_queue, skb);
-			rxrpc_queue_work(&local->processor);
-			_leave(" [incoming]");
-			return;
+		if (sp->hdr.callNumber == 0) {
+			/* Connection-level packet */
+			_debug("CONN %p {%d}", conn, conn->debug_id);
+			rxrpc_post_packet_to_conn(conn, skb);
+			goto out_unlock;
 		}
-		skb->priority = RX_INVALID_OPERATION;
+
+		/* Note the serial number skew here */
+		skew = (int)sp->hdr.serial - (int)conn->hi_serial;
+		if (skew >= 0) {
+			if (skew > 0)
+				conn->hi_serial = sp->hdr.serial;
+		} else {
+			skew = -skew;
+			skew = min(skew, 65535);
+		}
+
+		/* Call-bound packets are routed by connection channel. */
+		channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+		chan = &conn->channels[channel];
+
+		/* Ignore really old calls */
+		if (sp->hdr.callNumber < chan->last_call)
+			goto discard_unlock;
+
+		if (sp->hdr.callNumber == chan->last_call) {
+			/* For the previous service call, if completed successfully, we
+			 * discard all further packets.
+			 */
+			if (rxrpc_conn_is_service(conn) &&
+			    (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
+			     sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
+				goto discard_unlock;
+
+			/* But otherwise we need to retransmit the final packet from
+			 * data cached in the connection record.
+			 */
+			rxrpc_post_packet_to_conn(conn, skb);
+			goto out_unlock;
+		}
+
+		call = rcu_dereference(chan->call);
 	} else {
-		skb->priority = RX_CALL_DEAD;
+		skew = 0;
+		call = NULL;
 	}
 
-	if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
-		_debug("reject type %d",sp->hdr.type);
-		rxrpc_reject_packet(local, skb);
+	if (!call || atomic_read(&call->usage) == 0) {
+		if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) ||
+		    sp->hdr.callNumber == 0 ||
+		    sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+			goto bad_message_unlock;
+		if (sp->hdr.seq != 1)
+			goto discard_unlock;
+		call = rxrpc_new_incoming_call(local, conn, skb);
+		if (!call) {
+			rcu_read_unlock();
+			goto reject_packet;
+		}
+		rxrpc_send_ping(call, skb, skew);
 	}
-	_leave(" [no call]");
+
+	rxrpc_input_call_packet(call, skb, skew);
+	goto discard_unlock;
+
+discard_unlock:
+	rcu_read_unlock();
+discard:
+	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+out:
+	trace_rxrpc_rx_done(0, 0);
 	return;
 
+out_unlock:
+	rcu_read_unlock();
+	goto out;
+
+wrong_security:
+	rcu_read_unlock();
+	trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+			  RXKADINCONSISTENCY, EBADMSG);
+	skb->priority = RXKADINCONSISTENCY;
+	goto post_abort;
+
+bad_message_unlock:
+	rcu_read_unlock();
 bad_message:
+	trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+			  RX_PROTOCOL_ERROR, EBADMSG);
 	skb->priority = RX_PROTOCOL_ERROR;
+post_abort:
+	skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+reject_packet:
+	trace_rxrpc_rx_done(skb->mark, skb->priority);
 	rxrpc_reject_packet(local, skb);
 	_leave(" [badmsg]");
 }
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index c21ad21..7d4375e 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -23,31 +23,36 @@
 }
 
 static int none_secure_packet(struct rxrpc_call *call,
-			       struct sk_buff *skb,
-			       size_t data_size,
-			       void *sechdr)
+			      struct sk_buff *skb,
+			      size_t data_size,
+			      void *sechdr)
 {
 	return 0;
 }
 
-static int none_verify_packet(struct rxrpc_call *call,
-			       struct sk_buff *skb,
-			       u32 *_abort_code)
+static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+			      unsigned int offset, unsigned int len,
+			      rxrpc_seq_t seq, u16 expected_cksum)
 {
 	return 0;
 }
 
+static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+			     unsigned int *_offset, unsigned int *_len)
+{
+}
+
 static int none_respond_to_challenge(struct rxrpc_connection *conn,
-				      struct sk_buff *skb,
-				      u32 *_abort_code)
+				     struct sk_buff *skb,
+				     u32 *_abort_code)
 {
 	*_abort_code = RX_PROTOCOL_ERROR;
 	return -EPROTO;
 }
 
 static int none_verify_response(struct rxrpc_connection *conn,
-				 struct sk_buff *skb,
-				 u32 *_abort_code)
+				struct sk_buff *skb,
+				u32 *_abort_code)
 {
 	*_abort_code = RX_PROTOCOL_ERROR;
 	return -EPROTO;
@@ -78,6 +83,7 @@
 	.prime_packet_security		= none_prime_packet_security,
 	.secure_packet			= none_secure_packet,
 	.verify_packet			= none_verify_packet,
+	.locate_data			= none_locate_data,
 	.respond_to_challenge		= none_respond_to_challenge,
 	.verify_response		= none_verify_response,
 	.clear				= none_clear,
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 31a3f86..540d395 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -15,8 +15,6 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <linux/slab.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <generated/utsrelease.h>
@@ -33,7 +31,7 @@
 {
 	struct rxrpc_wire_header whdr;
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct sockaddr_in sin;
+	struct sockaddr_rxrpc srx;
 	struct msghdr msg;
 	struct kvec iov[2];
 	size_t len;
@@ -41,12 +39,11 @@
 
 	_enter("");
 
-	sin.sin_family = AF_INET;
-	sin.sin_port = udp_hdr(skb)->source;
-	sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+	if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+		return;
 
-	msg.msg_name	= &sin;
-	msg.msg_namelen	= sizeof(sin);
+	msg.msg_name	= &srx.transport;
+	msg.msg_namelen	= srx.transport_len;
 	msg.msg_control	= NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags	= 0;
@@ -93,11 +90,13 @@
 	if (skb) {
 		struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
+		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
 		_debug("{%d},{%u}", local->debug_id, sp->hdr.type);
 
 		switch (sp->hdr.type) {
 		case RXRPC_PACKET_TYPE_VERSION:
-			if (skb_copy_bits(skb, 0, &v, 1) < 0)
+			if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+					  &v, 1) < 0)
 				return;
 			_proto("Rx VERSION { %02x }", v);
 			if (v == 0)
@@ -109,7 +108,7 @@
 			break;
 		}
 
-		rxrpc_free_skb(skb);
+		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 	}
 
 	_leave("");
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index a753796..ff4864d5 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -58,6 +58,17 @@
 			memcmp(&local->srx.transport.sin.sin_addr,
 			       &srx->transport.sin.sin_addr,
 			       sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+	case AF_INET6:
+		/* If the choice of UDP6 port is left up to the transport, then
+		 * the endpoint record doesn't match.
+		 */
+		return ((u16 __force)local->srx.transport.sin6.sin6_port -
+			(u16 __force)srx->transport.sin6.sin6_port) ?:
+			memcmp(&local->srx.transport.sin6.sin6_addr,
+			       &srx->transport.sin6.sin6_addr,
+			       sizeof(struct in6_addr));
+#endif
 	default:
 		BUG();
 	}
@@ -75,9 +86,7 @@
 		atomic_set(&local->usage, 1);
 		INIT_LIST_HEAD(&local->link);
 		INIT_WORK(&local->processor, rxrpc_local_processor);
-		INIT_LIST_HEAD(&local->services);
 		init_rwsem(&local->defrag_sem);
-		skb_queue_head_init(&local->accept_queue);
 		skb_queue_head_init(&local->reject_queue);
 		skb_queue_head_init(&local->event_queue);
 		local->client_conns = RB_ROOT;
@@ -101,11 +110,12 @@
 	struct sock *sock;
 	int ret, opt;
 
-	_enter("%p{%d}", local, local->srx.transport_type);
+	_enter("%p{%d,%d}",
+	       local, local->srx.transport_type, local->srx.transport.family);
 
 	/* create a socket to represent the local endpoint */
-	ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
-			       IPPROTO_UDP, &local->socket);
+	ret = sock_create_kern(&init_net, local->srx.transport.family,
+			       local->srx.transport_type, 0, &local->socket);
 	if (ret < 0) {
 		_leave(" = %d [socket]", ret);
 		return ret;
@@ -170,18 +180,8 @@
 	long diff;
 	int ret;
 
-	if (srx->transport.family == AF_INET) {
-		_enter("{%d,%u,%pI4+%hu}",
-		       srx->transport_type,
-		       srx->transport.family,
-		       &srx->transport.sin.sin_addr,
-		       ntohs(srx->transport.sin.sin_port));
-	} else {
-		_enter("{%d,%u}",
-		       srx->transport_type,
-		       srx->transport.family);
-		return ERR_PTR(-EAFNOSUPPORT);
-	}
+	_enter("{%d,%d,%pISp}",
+	       srx->transport_type, srx->transport.family, &srx->transport);
 
 	mutex_lock(&rxrpc_local_mutex);
 
@@ -234,13 +234,8 @@
 found:
 	mutex_unlock(&rxrpc_local_mutex);
 
-	_net("LOCAL %s %d {%d,%u,%pI4+%hu}",
-	     age,
-	     local->debug_id,
-	     local->srx.transport_type,
-	     local->srx.transport.family,
-	     &local->srx.transport.sin.sin_addr,
-	     ntohs(local->srx.transport.sin.sin_port));
+	_net("LOCAL %s %d {%pISp}",
+	     age, local->debug_id, &local->srx.transport);
 
 	_leave(" = %p", local);
 	return local;
@@ -296,7 +291,7 @@
 	mutex_unlock(&rxrpc_local_mutex);
 
 	ASSERT(RB_EMPTY_ROOT(&local->client_conns));
-	ASSERT(list_empty(&local->services));
+	ASSERT(!local->service);
 
 	if (socket) {
 		local->socket = NULL;
@@ -308,7 +303,6 @@
 	/* At this point, there should be no more packets coming in to the
 	 * local endpoint.
 	 */
-	rxrpc_purge_queue(&local->accept_queue);
 	rxrpc_purge_queue(&local->reject_queue);
 	rxrpc_purge_queue(&local->event_queue);
 
@@ -332,11 +326,6 @@
 		if (atomic_read(&local->usage) == 0)
 			return rxrpc_local_destroyer(local);
 
-		if (!skb_queue_empty(&local->accept_queue)) {
-			rxrpc_accept_incoming_calls(local);
-			again = true;
-		}
-
 		if (!skb_queue_empty(&local->reject_queue)) {
 			rxrpc_reject_packets(local);
 			again = true;
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index bdc5e42..9d1c721 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -21,28 +21,33 @@
 unsigned int rxrpc_max_backlog __read_mostly = 10;
 
 /*
+ * Maximum lifetime of a call (in mx).
+ */
+unsigned int rxrpc_max_call_lifetime = 60 * 1000;
+
+/*
  * How long to wait before scheduling ACK generation after seeing a
- * packet with RXRPC_REQUEST_ACK set (in jiffies).
+ * packet with RXRPC_REQUEST_ACK set (in ms).
  */
 unsigned int rxrpc_requested_ack_delay = 1;
 
 /*
- * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ * How long to wait before scheduling an ACK with subtype DELAY (in ms).
  *
  * We use this when we've received new data packets.  If those packets aren't
  * all consumed within this time we will send a DELAY ACK if an ACK was not
  * requested to let the sender know it doesn't need to resend.
  */
-unsigned int rxrpc_soft_ack_delay = 1 * HZ;
+unsigned int rxrpc_soft_ack_delay = 1 * 1000;
 
 /*
- * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ * How long to wait before scheduling an ACK with subtype IDLE (in ms).
  *
  * We use this when we've consumed some previously soft-ACK'd packets when
  * further packets aren't immediately received to decide when to send an IDLE
  * ACK let the other end know that it can free up its Tx buffer space.
  */
-unsigned int rxrpc_idle_ack_delay = 0.5 * HZ;
+unsigned int rxrpc_idle_ack_delay = 0.5 * 1000;
 
 /*
  * Receive window size in packets.  This indicates the maximum number of
@@ -50,7 +55,10 @@
  * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
  * packets.
  */
-unsigned int rxrpc_rx_window_size = 32;
+unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE;
+#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE
+#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE
+#endif
 
 /*
  * Maximum Rx MTU size.  This indicates to the sender the size of jumbo packet
@@ -64,6 +72,11 @@
  */
 unsigned int rxrpc_rx_jumbo_max = 4;
 
+/*
+ * Time till packet resend (in milliseconds).
+ */
+unsigned int rxrpc_resend_timeout = 4 * 1000;
+
 const char *const rxrpc_pkts[] = {
 	"?00",
 	"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
@@ -75,21 +88,152 @@
 	[RXRPC_ACK_DELAY]		= 1,
 	[RXRPC_ACK_REQUESTED]		= 2,
 	[RXRPC_ACK_IDLE]		= 3,
-	[RXRPC_ACK_PING_RESPONSE]	= 4,
-	[RXRPC_ACK_DUPLICATE]		= 5,
-	[RXRPC_ACK_OUT_OF_SEQUENCE]	= 6,
-	[RXRPC_ACK_EXCEEDS_WINDOW]	= 7,
-	[RXRPC_ACK_NOSPACE]		= 8,
+	[RXRPC_ACK_DUPLICATE]		= 4,
+	[RXRPC_ACK_OUT_OF_SEQUENCE]	= 5,
+	[RXRPC_ACK_EXCEEDS_WINDOW]	= 6,
+	[RXRPC_ACK_NOSPACE]		= 7,
+	[RXRPC_ACK_PING_RESPONSE]	= 8,
+	[RXRPC_ACK_PING]		= 9,
 };
 
-const char *rxrpc_acks(u8 reason)
-{
-	static const char *const str[] = {
-		"---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
-		"IDL", "-?-"
-	};
+const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = {
+	"---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
+	"IDL", "-?-"
+};
 
-	if (reason >= ARRAY_SIZE(str))
-		reason = ARRAY_SIZE(str) - 1;
-	return str[reason];
-}
+const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = {
+	[rxrpc_skb_rx_cleaned]		= "Rx CLN",
+	[rxrpc_skb_rx_freed]		= "Rx FRE",
+	[rxrpc_skb_rx_got]		= "Rx GOT",
+	[rxrpc_skb_rx_lost]		= "Rx *L*",
+	[rxrpc_skb_rx_received]		= "Rx RCV",
+	[rxrpc_skb_rx_purged]		= "Rx PUR",
+	[rxrpc_skb_rx_rotated]		= "Rx ROT",
+	[rxrpc_skb_rx_seen]		= "Rx SEE",
+	[rxrpc_skb_tx_cleaned]		= "Tx CLN",
+	[rxrpc_skb_tx_freed]		= "Tx FRE",
+	[rxrpc_skb_tx_got]		= "Tx GOT",
+	[rxrpc_skb_tx_new]		= "Tx NEW",
+	[rxrpc_skb_tx_rotated]		= "Tx ROT",
+	[rxrpc_skb_tx_seen]		= "Tx SEE",
+};
+
+const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = {
+	[rxrpc_conn_new_client]		= "NWc",
+	[rxrpc_conn_new_service]	= "NWs",
+	[rxrpc_conn_queued]		= "QUE",
+	[rxrpc_conn_seen]		= "SEE",
+	[rxrpc_conn_got]		= "GOT",
+	[rxrpc_conn_put_client]		= "PTc",
+	[rxrpc_conn_put_service]	= "PTs",
+};
+
+const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = {
+	[rxrpc_client_activate_chans]	= "Activa",
+	[rxrpc_client_alloc]		= "Alloc ",
+	[rxrpc_client_chan_activate]	= "ChActv",
+	[rxrpc_client_chan_disconnect]	= "ChDisc",
+	[rxrpc_client_chan_pass]	= "ChPass",
+	[rxrpc_client_chan_unstarted]	= "ChUnst",
+	[rxrpc_client_cleanup]		= "Clean ",
+	[rxrpc_client_count]		= "Count ",
+	[rxrpc_client_discard]		= "Discar",
+	[rxrpc_client_duplicate]	= "Duplic",
+	[rxrpc_client_exposed]		= "Expose",
+	[rxrpc_client_replace]		= "Replac",
+	[rxrpc_client_to_active]	= "->Actv",
+	[rxrpc_client_to_culled]	= "->Cull",
+	[rxrpc_client_to_idle]		= "->Idle",
+	[rxrpc_client_to_inactive]	= "->Inac",
+	[rxrpc_client_to_waiting]	= "->Wait",
+	[rxrpc_client_uncount]		= "Uncoun",
+};
+
+const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = {
+	[rxrpc_transmit_wait]		= "WAI",
+	[rxrpc_transmit_queue]		= "QUE",
+	[rxrpc_transmit_queue_last]	= "QLS",
+	[rxrpc_transmit_rotate]		= "ROT",
+	[rxrpc_transmit_rotate_last]	= "RLS",
+	[rxrpc_transmit_await_reply]	= "AWR",
+	[rxrpc_transmit_end]		= "END",
+};
+
+const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = {
+	[rxrpc_receive_incoming]	= "INC",
+	[rxrpc_receive_queue]		= "QUE",
+	[rxrpc_receive_queue_last]	= "QLS",
+	[rxrpc_receive_front]		= "FRN",
+	[rxrpc_receive_rotate]		= "ROT",
+	[rxrpc_receive_end]		= "END",
+};
+
+const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = {
+	[rxrpc_recvmsg_enter]		= "ENTR",
+	[rxrpc_recvmsg_wait]		= "WAIT",
+	[rxrpc_recvmsg_dequeue]		= "DEQU",
+	[rxrpc_recvmsg_hole]		= "HOLE",
+	[rxrpc_recvmsg_next]		= "NEXT",
+	[rxrpc_recvmsg_cont]		= "CONT",
+	[rxrpc_recvmsg_full]		= "FULL",
+	[rxrpc_recvmsg_data_return]	= "DATA",
+	[rxrpc_recvmsg_terminal]	= "TERM",
+	[rxrpc_recvmsg_to_be_accepted]	= "TBAC",
+	[rxrpc_recvmsg_return]		= "RETN",
+};
+
+const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = {
+	[rxrpc_rtt_tx_ping]		= "PING",
+	[rxrpc_rtt_tx_data]		= "DATA",
+};
+
+const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = {
+	[rxrpc_rtt_rx_ping_response]	= "PONG",
+	[rxrpc_rtt_rx_requested_ack]	= "RACK",
+};
+
+const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = {
+	[rxrpc_timer_begin]			= "Begin ",
+	[rxrpc_timer_expired]			= "*EXPR*",
+	[rxrpc_timer_init_for_reply]		= "IniRpl",
+	[rxrpc_timer_set_for_ack]		= "SetAck",
+	[rxrpc_timer_set_for_send]		= "SetTx ",
+	[rxrpc_timer_set_for_resend]		= "SetRTx",
+};
+
+const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = {
+	[rxrpc_propose_ack_client_tx_end]	= "ClTxEnd",
+	[rxrpc_propose_ack_input_data]		= "DataIn ",
+	[rxrpc_propose_ack_ping_for_lost_ack]	= "LostAck",
+	[rxrpc_propose_ack_ping_for_lost_reply]	= "LostRpl",
+	[rxrpc_propose_ack_ping_for_params]	= "Params ",
+	[rxrpc_propose_ack_respond_to_ack]	= "Rsp2Ack",
+	[rxrpc_propose_ack_respond_to_ping]	= "Rsp2Png",
+	[rxrpc_propose_ack_retry_tx]		= "RetryTx",
+	[rxrpc_propose_ack_rotate_rx]		= "RxAck  ",
+	[rxrpc_propose_ack_terminal_ack]	= "ClTerm ",
+};
+
+const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = {
+	[rxrpc_propose_ack_use]			= "",
+	[rxrpc_propose_ack_update]		= " Update",
+	[rxrpc_propose_ack_subsume]		= " Subsume",
+};
+
+const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10] = {
+	[RXRPC_CALL_SLOW_START]		= "SlowStart",
+	[RXRPC_CALL_CONGEST_AVOIDANCE]	= "CongAvoid",
+	[RXRPC_CALL_PACKET_LOSS]	= "PktLoss  ",
+	[RXRPC_CALL_FAST_RETRANSMIT]	= "FastReTx ",
+};
+
+const char rxrpc_congest_changes[rxrpc_congest__nr_change][9] = {
+	[rxrpc_cong_begin_retransmission]	= " Retrans",
+	[rxrpc_cong_cleared_nacks]		= " Cleared",
+	[rxrpc_cong_new_low_nack]		= " NewLowN",
+	[rxrpc_cong_no_change]			= "",
+	[rxrpc_cong_progress]			= " Progres",
+	[rxrpc_cong_retransmit_again]		= " ReTxAgn",
+	[rxrpc_cong_rtt_window_end]		= " RttWinE",
+	[rxrpc_cong_saw_nack]			= " SawNack",
+};
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f4bda06..0d47db8 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -14,336 +14,326 @@
 #include <linux/net.h>
 #include <linux/gfp.h>
 #include <linux/skbuff.h>
-#include <linux/circ_buf.h>
 #include <linux/export.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-/*
- * Time till packet resend (in jiffies).
- */
-unsigned int rxrpc_resend_timeout = 4 * HZ;
-
-static int rxrpc_send_data(struct rxrpc_sock *rx,
-			   struct rxrpc_call *call,
-			   struct msghdr *msg, size_t len);
+struct rxrpc_pkt_buffer {
+	struct rxrpc_wire_header whdr;
+	union {
+		struct {
+			struct rxrpc_ackpacket ack;
+			u8 acks[255];
+			u8 pad[3];
+		};
+		__be32 abort_code;
+	};
+	struct rxrpc_ackinfo ackinfo;
+};
 
 /*
- * extract control messages from the sendmsg() control buffer
+ * Fill out an ACK packet.
  */
-static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
-			      unsigned long *user_call_ID,
-			      enum rxrpc_command *command,
-			      u32 *abort_code,
-			      bool *_exclusive)
+static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
+				 struct rxrpc_pkt_buffer *pkt,
+				 rxrpc_seq_t *_hard_ack,
+				 rxrpc_seq_t *_top)
 {
-	struct cmsghdr *cmsg;
-	bool got_user_ID = false;
-	int len;
+	rxrpc_serial_t serial;
+	rxrpc_seq_t hard_ack, top, seq;
+	int ix;
+	u32 mtu, jmax;
+	u8 *ackp = pkt->acks;
 
-	*command = RXRPC_CMD_SEND_DATA;
+	/* Barrier against rxrpc_input_data(). */
+	serial = call->ackr_serial;
+	hard_ack = READ_ONCE(call->rx_hard_ack);
+	top = smp_load_acquire(&call->rx_top);
+	*_hard_ack = hard_ack;
+	*_top = top;
 
-	if (msg->msg_controllen == 0)
-		return -EINVAL;
+	pkt->ack.bufferSpace	= htons(8);
+	pkt->ack.maxSkew	= htons(call->ackr_skew);
+	pkt->ack.firstPacket	= htonl(hard_ack + 1);
+	pkt->ack.previousPacket	= htonl(call->ackr_prev_seq);
+	pkt->ack.serial		= htonl(serial);
+	pkt->ack.reason		= call->ackr_reason;
+	pkt->ack.nAcks		= top - hard_ack;
 
-	for_each_cmsghdr(cmsg, msg) {
-		if (!CMSG_OK(msg, cmsg))
-			return -EINVAL;
+	if (pkt->ack.reason == RXRPC_ACK_PING)
+		pkt->whdr.flags |= RXRPC_REQUEST_ACK;
 
-		len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
-		_debug("CMSG %d, %d, %d",
-		       cmsg->cmsg_level, cmsg->cmsg_type, len);
+	if (after(top, hard_ack)) {
+		seq = hard_ack + 1;
+		do {
+			ix = seq & RXRPC_RXTX_BUFF_MASK;
+			if (call->rxtx_buffer[ix])
+				*ackp++ = RXRPC_ACK_TYPE_ACK;
+			else
+				*ackp++ = RXRPC_ACK_TYPE_NACK;
+			seq++;
+		} while (before_eq(seq, top));
+	}
 
-		if (cmsg->cmsg_level != SOL_RXRPC)
-			continue;
+	mtu = call->conn->params.peer->if_mtu;
+	mtu -= call->conn->params.peer->hdrsize;
+	jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
+	pkt->ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
+	pkt->ackinfo.maxMTU	= htonl(mtu);
+	pkt->ackinfo.rwind	= htonl(call->rx_winsize);
+	pkt->ackinfo.jumbo_max	= htonl(jmax);
 
-		switch (cmsg->cmsg_type) {
-		case RXRPC_USER_CALL_ID:
-			if (msg->msg_flags & MSG_CMSG_COMPAT) {
-				if (len != sizeof(u32))
-					return -EINVAL;
-				*user_call_ID = *(u32 *) CMSG_DATA(cmsg);
-			} else {
-				if (len != sizeof(unsigned long))
-					return -EINVAL;
-				*user_call_ID = *(unsigned long *)
-					CMSG_DATA(cmsg);
-			}
-			_debug("User Call ID %lx", *user_call_ID);
-			got_user_ID = true;
-			break;
+	*ackp++ = 0;
+	*ackp++ = 0;
+	*ackp++ = 0;
+	return top - hard_ack + 3;
+}
 
-		case RXRPC_ABORT:
-			if (*command != RXRPC_CMD_SEND_DATA)
-				return -EINVAL;
-			*command = RXRPC_CMD_SEND_ABORT;
-			if (len != sizeof(*abort_code))
-				return -EINVAL;
-			*abort_code = *(unsigned int *) CMSG_DATA(cmsg);
-			_debug("Abort %x", *abort_code);
-			if (*abort_code == 0)
-				return -EINVAL;
-			break;
+/*
+ * Send an ACK or ABORT call packet.
+ */
+int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
+{
+	struct rxrpc_connection *conn = NULL;
+	struct rxrpc_pkt_buffer *pkt;
+	struct msghdr msg;
+	struct kvec iov[2];
+	rxrpc_serial_t serial;
+	rxrpc_seq_t hard_ack, top;
+	size_t len, n;
+	bool ping = false;
+	int ioc, ret;
+	u32 abort_code;
 
-		case RXRPC_ACCEPT:
-			if (*command != RXRPC_CMD_SEND_DATA)
-				return -EINVAL;
-			*command = RXRPC_CMD_ACCEPT;
-			if (len != 0)
-				return -EINVAL;
-			break;
+	_enter("%u,%s", call->debug_id, rxrpc_pkts[type]);
 
-		case RXRPC_EXCLUSIVE_CALL:
-			*_exclusive = true;
-			if (len != 0)
-				return -EINVAL;
-			break;
-		default:
-			return -EINVAL;
+	spin_lock_bh(&call->lock);
+	if (call->conn)
+		conn = rxrpc_get_connection_maybe(call->conn);
+	spin_unlock_bh(&call->lock);
+	if (!conn)
+		return -ECONNRESET;
+
+	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+	if (!pkt) {
+		rxrpc_put_connection(conn);
+		return -ENOMEM;
+	}
+
+	msg.msg_name	= &call->peer->srx.transport;
+	msg.msg_namelen	= call->peer->srx.transport_len;
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	pkt->whdr.epoch		= htonl(conn->proto.epoch);
+	pkt->whdr.cid		= htonl(call->cid);
+	pkt->whdr.callNumber	= htonl(call->call_id);
+	pkt->whdr.seq		= 0;
+	pkt->whdr.type		= type;
+	pkt->whdr.flags		= conn->out_clientflag;
+	pkt->whdr.userStatus	= 0;
+	pkt->whdr.securityIndex	= call->security_ix;
+	pkt->whdr._rsvd		= 0;
+	pkt->whdr.serviceId	= htons(call->service_id);
+
+	iov[0].iov_base	= pkt;
+	iov[0].iov_len	= sizeof(pkt->whdr);
+	len = sizeof(pkt->whdr);
+
+	switch (type) {
+	case RXRPC_PACKET_TYPE_ACK:
+		spin_lock_bh(&call->lock);
+		if (!call->ackr_reason) {
+			spin_unlock_bh(&call->lock);
+			ret = 0;
+			goto out;
+		}
+		ping = (call->ackr_reason == RXRPC_ACK_PING);
+		n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top);
+		call->ackr_reason = 0;
+
+		spin_unlock_bh(&call->lock);
+
+
+		pkt->whdr.flags |= RXRPC_SLOW_START_OK;
+
+		iov[0].iov_len += sizeof(pkt->ack) + n;
+		iov[1].iov_base = &pkt->ackinfo;
+		iov[1].iov_len	= sizeof(pkt->ackinfo);
+		len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo);
+		ioc = 2;
+		break;
+
+	case RXRPC_PACKET_TYPE_ABORT:
+		abort_code = call->abort_code;
+		pkt->abort_code = htonl(abort_code);
+		iov[0].iov_len += sizeof(pkt->abort_code);
+		len += sizeof(pkt->abort_code);
+		ioc = 1;
+		break;
+
+	default:
+		BUG();
+		ret = -ENOANO;
+		goto out;
+	}
+
+	serial = atomic_inc_return(&conn->serial);
+	pkt->whdr.serial = htonl(serial);
+	switch (type) {
+	case RXRPC_PACKET_TYPE_ACK:
+		trace_rxrpc_tx_ack(call, serial,
+				   ntohl(pkt->ack.firstPacket),
+				   ntohl(pkt->ack.serial),
+				   pkt->ack.reason, pkt->ack.nAcks);
+		break;
+	}
+
+	if (ping) {
+		call->ackr_ping = serial;
+		smp_wmb();
+		/* We need to stick a time in before we send the packet in case
+		 * the reply gets back before kernel_sendmsg() completes - but
+		 * asking UDP to send the packet can take a relatively long
+		 * time, so we update the time after, on the assumption that
+		 * the packet transmission is more likely to happen towards the
+		 * end of the kernel_sendmsg() call.
+		 */
+		call->ackr_ping_time = ktime_get_real();
+		set_bit(RXRPC_CALL_PINGING, &call->flags);
+		trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
+	}
+	ret = kernel_sendmsg(conn->params.local->socket,
+			     &msg, iov, ioc, len);
+	if (ping)
+		call->ackr_ping_time = ktime_get_real();
+
+	if (type == RXRPC_PACKET_TYPE_ACK &&
+	    call->state < RXRPC_CALL_COMPLETE) {
+		if (ret < 0) {
+			clear_bit(RXRPC_CALL_PINGING, &call->flags);
+			rxrpc_propose_ACK(call, pkt->ack.reason,
+					  ntohs(pkt->ack.maxSkew),
+					  ntohl(pkt->ack.serial),
+					  true, true,
+					  rxrpc_propose_ack_retry_tx);
+		} else {
+			spin_lock_bh(&call->lock);
+			if (after(hard_ack, call->ackr_consumed))
+				call->ackr_consumed = hard_ack;
+			if (after(top, call->ackr_seen))
+				call->ackr_seen = top;
+			spin_unlock_bh(&call->lock);
 		}
 	}
 
-	if (!got_user_ID)
-		return -EINVAL;
-	_leave(" = 0");
-	return 0;
-}
-
-/*
- * abort a call, sending an ABORT packet to the peer
- */
-static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
-{
-	write_lock_bh(&call->state_lock);
-
-	if (call->state <= RXRPC_CALL_COMPLETE) {
-		call->state = RXRPC_CALL_LOCALLY_ABORTED;
-		call->local_abort = abort_code;
-		set_bit(RXRPC_CALL_EV_ABORT, &call->events);
-		del_timer_sync(&call->resend_timer);
-		del_timer_sync(&call->ack_timer);
-		clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
-		clear_bit(RXRPC_CALL_EV_ACK, &call->events);
-		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-		rxrpc_queue_call(call);
-	}
-
-	write_unlock_bh(&call->state_lock);
-}
-
-/*
- * Create a new client call for sendmsg().
- */
-static struct rxrpc_call *
-rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
-				  unsigned long user_call_ID, bool exclusive)
-{
-	struct rxrpc_conn_parameters cp;
-	struct rxrpc_call *call;
-	struct key *key;
-
-	DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
-
-	_enter("");
-
-	if (!msg->msg_name)
-		return ERR_PTR(-EDESTADDRREQ);
-
-	key = rx->key;
-	if (key && !rx->key->payload.data[0])
-		key = NULL;
-
-	memset(&cp, 0, sizeof(cp));
-	cp.local		= rx->local;
-	cp.key			= rx->key;
-	cp.security_level	= rx->min_sec_level;
-	cp.exclusive		= rx->exclusive | exclusive;
-	cp.service_id		= srx->srx_service;
-	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
-
-	_leave(" = %p\n", call);
-	return call;
-}
-
-/*
- * send a message forming part of a client call through an RxRPC socket
- * - caller holds the socket locked
- * - the socket may be either a client socket or a server socket
- */
-int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
-{
-	enum rxrpc_command cmd;
-	struct rxrpc_call *call;
-	unsigned long user_call_ID = 0;
-	bool exclusive = false;
-	u32 abort_code = 0;
-	int ret;
-
-	_enter("");
-
-	ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
-				 &exclusive);
-	if (ret < 0)
-		return ret;
-
-	if (cmd == RXRPC_CMD_ACCEPT) {
-		if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
-			return -EINVAL;
-		call = rxrpc_accept_call(rx, user_call_ID);
-		if (IS_ERR(call))
-			return PTR_ERR(call);
-		rxrpc_put_call(call);
-		return 0;
-	}
-
-	call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
-	if (!call) {
-		if (cmd != RXRPC_CMD_SEND_DATA)
-			return -EBADSLT;
-		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
-							 exclusive);
-		if (IS_ERR(call))
-			return PTR_ERR(call);
-	}
-
-	_debug("CALL %d USR %lx ST %d on CONN %p",
-	       call->debug_id, call->user_call_ID, call->state, call->conn);
-
-	if (call->state >= RXRPC_CALL_COMPLETE) {
-		/* it's too late for this call */
-		ret = -ECONNRESET;
-	} else if (cmd == RXRPC_CMD_SEND_ABORT) {
-		rxrpc_send_abort(call, abort_code);
-		ret = 0;
-	} else if (cmd != RXRPC_CMD_SEND_DATA) {
-		ret = -EINVAL;
-	} else if (!call->in_clientflag &&
-		   call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
-		/* request phase complete for this client call */
-		ret = -EPROTO;
-	} else if (call->in_clientflag &&
-		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-		/* Reply phase not begun or not complete for service call. */
-		ret = -EPROTO;
-	} else {
-		ret = rxrpc_send_data(rx, call, msg, len);
-	}
-
-	rxrpc_put_call(call);
-	_leave(" = %d", ret);
+out:
+	rxrpc_put_connection(conn);
+	kfree(pkt);
 	return ret;
 }
 
-/**
- * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
- * @call: The call to send data through
- * @msg: The data to send
- * @len: The amount of data to send
- *
- * Allow a kernel service to send data on a call.  The call must be in an state
- * appropriate to sending data.  No control data should be supplied in @msg,
- * nor should an address be supplied.  MSG_MORE should be flagged if there's
- * more data to come, otherwise this data will end the transmission phase.
- */
-int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
-			   size_t len)
-{
-	int ret;
-
-	_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
-
-	ASSERTCMP(msg->msg_name, ==, NULL);
-	ASSERTCMP(msg->msg_control, ==, NULL);
-
-	lock_sock(&call->socket->sk);
-
-	_debug("CALL %d USR %lx ST %d on CONN %p",
-	       call->debug_id, call->user_call_ID, call->state, call->conn);
-
-	if (call->state >= RXRPC_CALL_COMPLETE) {
-		ret = -ESHUTDOWN; /* it's too late for this call */
-	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
-		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
-		ret = -EPROTO; /* request phase complete for this client call */
-	} else {
-		ret = rxrpc_send_data(call->socket, call, msg, len);
-	}
-
-	release_sock(&call->socket->sk);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_send_data);
-
-/**
- * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
- * @call: The call to be aborted
- * @abort_code: The abort code to stick into the ABORT packet
- *
- * Allow a kernel service to abort a call, if it's still in an abortable state.
- */
-void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
-{
-	_enter("{%d},%d", call->debug_id, abort_code);
-
-	lock_sock(&call->socket->sk);
-
-	_debug("CALL %d USR %lx ST %d on CONN %p",
-	       call->debug_id, call->user_call_ID, call->state, call->conn);
-
-	if (call->state < RXRPC_CALL_COMPLETE)
-		rxrpc_send_abort(call, abort_code);
-
-	release_sock(&call->socket->sk);
-	_leave("");
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_abort_call);
-
 /*
  * send a packet through the transport endpoint
  */
-int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
+			   bool retrans)
 {
-	struct kvec iov[1];
+	struct rxrpc_connection *conn = call->conn;
+	struct rxrpc_wire_header whdr;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct msghdr msg;
+	struct kvec iov[2];
+	rxrpc_serial_t serial;
+	size_t len;
+	bool lost = false;
 	int ret, opt;
 
 	_enter(",{%d}", skb->len);
 
-	iov[0].iov_base = skb->head;
-	iov[0].iov_len = skb->len;
+	/* Each transmission of a Tx packet needs a new serial number */
+	serial = atomic_inc_return(&conn->serial);
 
-	msg.msg_name = &conn->params.peer->srx.transport;
-	msg.msg_namelen = conn->params.peer->srx.transport_len;
+	whdr.epoch	= htonl(conn->proto.epoch);
+	whdr.cid	= htonl(call->cid);
+	whdr.callNumber	= htonl(call->call_id);
+	whdr.seq	= htonl(sp->hdr.seq);
+	whdr.serial	= htonl(serial);
+	whdr.type	= RXRPC_PACKET_TYPE_DATA;
+	whdr.flags	= sp->hdr.flags;
+	whdr.userStatus	= 0;
+	whdr.securityIndex = call->security_ix;
+	whdr._rsvd	= htons(sp->hdr._rsvd);
+	whdr.serviceId	= htons(call->service_id);
+
+	iov[0].iov_base = &whdr;
+	iov[0].iov_len = sizeof(whdr);
+	iov[1].iov_base = skb->head;
+	iov[1].iov_len = skb->len;
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	msg.msg_name = &call->peer->srx.transport;
+	msg.msg_namelen = call->peer->srx.transport_len;
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
 	msg.msg_flags = 0;
 
+	/* If our RTT cache needs working on, request an ACK.  Also request
+	 * ACKs if a DATA packet appears to have been lost.
+	 */
+	if (retrans ||
+	    call->cong_mode == RXRPC_CALL_SLOW_START ||
+	    (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
+	    ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+			 ktime_get_real()))
+		whdr.flags |= RXRPC_REQUEST_ACK;
+
+	if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+		static int lose;
+		if ((lose++ & 7) == 7) {
+			ret = 0;
+			lost = true;
+			goto done;
+		}
+	}
+
+	_proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
+
 	/* send the packet with the don't fragment bit set if we currently
 	 * think it's small enough */
-	if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) {
-		down_read(&conn->params.local->defrag_sem);
-		/* send the packet by UDP
-		 * - returns -EMSGSIZE if UDP would have to fragment the packet
-		 *   to go out of the interface
-		 *   - in which case, we'll have processed the ICMP error
-		 *     message and update the peer record
-		 */
-		ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
-				     iov[0].iov_len);
+	if (iov[1].iov_len >= call->peer->maxdata)
+		goto send_fragmentable;
 
-		up_read(&conn->params.local->defrag_sem);
-		if (ret == -EMSGSIZE)
-			goto send_fragmentable;
+	down_read(&conn->params.local->defrag_sem);
+	/* send the packet by UDP
+	 * - returns -EMSGSIZE if UDP would have to fragment the packet
+	 *   to go out of the interface
+	 *   - in which case, we'll have processed the ICMP error
+	 *     message and update the peer record
+	 */
+	ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
 
-		_leave(" = %d [%u]", ret, conn->params.peer->maxdata);
-		return ret;
+	up_read(&conn->params.local->defrag_sem);
+	if (ret == -EMSGSIZE)
+		goto send_fragmentable;
+
+done:
+	trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
+			    retrans, lost);
+	if (ret >= 0) {
+		ktime_t now = ktime_get_real();
+		skb->tstamp = now;
+		smp_wmb();
+		sp->hdr.serial = serial;
+		if (whdr.flags & RXRPC_REQUEST_ACK) {
+			call->peer->rtt_last_req = now;
+			trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
+		}
 	}
+	_leave(" = %d [%u]", ret, call->peer->maxdata);
+	return ret;
 
 send_fragmentable:
 	/* attempt to send this message with fragmentation enabled */
@@ -358,8 +348,8 @@
 					SOL_IP, IP_MTU_DISCOVER,
 					(char *)&opt, sizeof(opt));
 		if (ret == 0) {
-			ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
-					     iov[0].iov_len);
+			ret = kernel_sendmsg(conn->params.local->socket, &msg,
+					     iov, 2, len);
 
 			opt = IP_PMTUDISC_DO;
 			kernel_setsockopt(conn->params.local->socket, SOL_IP,
@@ -367,355 +357,82 @@
 					  (char *)&opt, sizeof(opt));
 		}
 		break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+	case AF_INET6:
+		opt = IPV6_PMTUDISC_DONT;
+		ret = kernel_setsockopt(conn->params.local->socket,
+					SOL_IPV6, IPV6_MTU_DISCOVER,
+					(char *)&opt, sizeof(opt));
+		if (ret == 0) {
+			ret = kernel_sendmsg(conn->params.local->socket, &msg,
+					     iov, 1, iov[0].iov_len);
+
+			opt = IPV6_PMTUDISC_DO;
+			kernel_setsockopt(conn->params.local->socket,
+					  SOL_IPV6, IPV6_MTU_DISCOVER,
+					  (char *)&opt, sizeof(opt));
+		}
+		break;
+#endif
 	}
 
 	up_write(&conn->params.local->defrag_sem);
-	_leave(" = %d [frag %u]", ret, conn->params.peer->maxdata);
-	return ret;
+	goto done;
 }
 
 /*
- * wait for space to appear in the transmit/ACK window
- * - caller holds the socket locked
+ * reject packets through the local endpoint
  */
-static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
-				    struct rxrpc_call *call,
-				    long *timeo)
+void rxrpc_reject_packets(struct rxrpc_local *local)
 {
-	DECLARE_WAITQUEUE(myself, current);
-	int ret;
+	struct sockaddr_rxrpc srx;
+	struct rxrpc_skb_priv *sp;
+	struct rxrpc_wire_header whdr;
+	struct sk_buff *skb;
+	struct msghdr msg;
+	struct kvec iov[2];
+	size_t size;
+	__be32 code;
 
-	_enter(",{%d},%ld",
-	       CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
-			  call->acks_winsz),
-	       *timeo);
+	_enter("%d", local->debug_id);
 
-	add_wait_queue(&call->tx_waitq, &myself);
+	iov[0].iov_base = &whdr;
+	iov[0].iov_len = sizeof(whdr);
+	iov[1].iov_base = &code;
+	iov[1].iov_len = sizeof(code);
+	size = sizeof(whdr) + sizeof(code);
 
-	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		ret = 0;
-		if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
-			       call->acks_winsz) > 0)
-			break;
-		if (signal_pending(current)) {
-			ret = sock_intr_errno(*timeo);
-			break;
+	msg.msg_name = &srx.transport;
+	msg.msg_control = NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags = 0;
+
+	memset(&whdr, 0, sizeof(whdr));
+	whdr.type = RXRPC_PACKET_TYPE_ABORT;
+
+	while ((skb = skb_dequeue(&local->reject_queue))) {
+		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		sp = rxrpc_skb(skb);
+
+		if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+			msg.msg_namelen = srx.transport_len;
+
+			code = htonl(skb->priority);
+
+			whdr.epoch	= htonl(sp->hdr.epoch);
+			whdr.cid	= htonl(sp->hdr.cid);
+			whdr.callNumber	= htonl(sp->hdr.callNumber);
+			whdr.serviceId	= htons(sp->hdr.serviceId);
+			whdr.flags	= sp->hdr.flags;
+			whdr.flags	^= RXRPC_CLIENT_INITIATED;
+			whdr.flags	&= RXRPC_CLIENT_INITIATED;
+
+			kernel_sendmsg(local->socket, &msg, iov, 2, size);
 		}
 
-		release_sock(&rx->sk);
-		*timeo = schedule_timeout(*timeo);
-		lock_sock(&rx->sk);
-	}
-
-	remove_wait_queue(&call->tx_waitq, &myself);
-	set_current_state(TASK_RUNNING);
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * attempt to schedule an instant Tx resend
- */
-static inline void rxrpc_instant_resend(struct rxrpc_call *call)
-{
-	read_lock_bh(&call->state_lock);
-	if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
-		clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
-		if (call->state < RXRPC_CALL_COMPLETE &&
-		    !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
-			rxrpc_queue_call(call);
-	}
-	read_unlock_bh(&call->state_lock);
-}
-
-/*
- * queue a packet for transmission, set the resend timer and attempt
- * to send the packet immediately
- */
-static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
-			       bool last)
-{
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	int ret;
-
-	_net("queue skb %p [%d]", skb, call->acks_head);
-
-	ASSERT(call->acks_window != NULL);
-	call->acks_window[call->acks_head] = (unsigned long) skb;
-	smp_wmb();
-	call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
-
-	if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
-		_debug("________awaiting reply/ACK__________");
-		write_lock_bh(&call->state_lock);
-		switch (call->state) {
-		case RXRPC_CALL_CLIENT_SEND_REQUEST:
-			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
-			break;
-		case RXRPC_CALL_SERVER_ACK_REQUEST:
-			call->state = RXRPC_CALL_SERVER_SEND_REPLY;
-			if (!last)
-				break;
-		case RXRPC_CALL_SERVER_SEND_REPLY:
-			call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
-			break;
-		default:
-			break;
-		}
-		write_unlock_bh(&call->state_lock);
-	}
-
-	_proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
-
-	sp->need_resend = false;
-	sp->resend_at = jiffies + rxrpc_resend_timeout;
-	if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
-		_debug("run timer");
-		call->resend_timer.expires = sp->resend_at;
-		add_timer(&call->resend_timer);
-	}
-
-	/* attempt to cancel the rx-ACK timer, deferring reply transmission if
-	 * we're ACK'ing the request phase of an incoming call */
-	ret = -EAGAIN;
-	if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
-		/* the packet may be freed by rxrpc_process_call() before this
-		 * returns */
-		ret = rxrpc_send_data_packet(call->conn, skb);
-		_net("sent skb %p", skb);
-	} else {
-		_debug("failed to delete ACK timer");
-	}
-
-	if (ret < 0) {
-		_debug("need instant resend %d", ret);
-		sp->need_resend = true;
-		rxrpc_instant_resend(call);
+		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 	}
 
 	_leave("");
 }
-
-/*
- * Convert a host-endian header into a network-endian header.
- */
-static void rxrpc_insert_header(struct sk_buff *skb)
-{
-	struct rxrpc_wire_header whdr;
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-	whdr.epoch	= htonl(sp->hdr.epoch);
-	whdr.cid	= htonl(sp->hdr.cid);
-	whdr.callNumber	= htonl(sp->hdr.callNumber);
-	whdr.seq	= htonl(sp->hdr.seq);
-	whdr.serial	= htonl(sp->hdr.serial);
-	whdr.type	= sp->hdr.type;
-	whdr.flags	= sp->hdr.flags;
-	whdr.userStatus	= sp->hdr.userStatus;
-	whdr.securityIndex = sp->hdr.securityIndex;
-	whdr._rsvd	= htons(sp->hdr._rsvd);
-	whdr.serviceId	= htons(sp->hdr.serviceId);
-
-	memcpy(skb->head, &whdr, sizeof(whdr));
-}
-
-/*
- * send data through a socket
- * - must be called in process context
- * - caller holds the socket locked
- */
-static int rxrpc_send_data(struct rxrpc_sock *rx,
-			   struct rxrpc_call *call,
-			   struct msghdr *msg, size_t len)
-{
-	struct rxrpc_skb_priv *sp;
-	struct sk_buff *skb;
-	struct sock *sk = &rx->sk;
-	long timeo;
-	bool more;
-	int ret, copied;
-
-	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-
-	/* this should be in poll */
-	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
-	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
-		return -EPIPE;
-
-	more = msg->msg_flags & MSG_MORE;
-
-	skb = call->tx_pending;
-	call->tx_pending = NULL;
-
-	copied = 0;
-	do {
-		if (!skb) {
-			size_t size, chunk, max, space;
-
-			_debug("alloc");
-
-			if (CIRC_SPACE(call->acks_head,
-				       ACCESS_ONCE(call->acks_tail),
-				       call->acks_winsz) <= 0) {
-				ret = -EAGAIN;
-				if (msg->msg_flags & MSG_DONTWAIT)
-					goto maybe_error;
-				ret = rxrpc_wait_for_tx_window(rx, call,
-							       &timeo);
-				if (ret < 0)
-					goto maybe_error;
-			}
-
-			max = call->conn->params.peer->maxdata;
-			max -= call->conn->security_size;
-			max &= ~(call->conn->size_align - 1UL);
-
-			chunk = max;
-			if (chunk > msg_data_left(msg) && !more)
-				chunk = msg_data_left(msg);
-
-			space = chunk + call->conn->size_align;
-			space &= ~(call->conn->size_align - 1UL);
-
-			size = space + call->conn->header_size;
-
-			_debug("SIZE: %zu/%zu/%zu", chunk, space, size);
-
-			/* create a buffer that we can retain until it's ACK'd */
-			skb = sock_alloc_send_skb(
-				sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
-			if (!skb)
-				goto maybe_error;
-
-			rxrpc_new_skb(skb);
-
-			_debug("ALLOC SEND %p", skb);
-
-			ASSERTCMP(skb->mark, ==, 0);
-
-			_debug("HS: %u", call->conn->header_size);
-			skb_reserve(skb, call->conn->header_size);
-			skb->len += call->conn->header_size;
-
-			sp = rxrpc_skb(skb);
-			sp->remain = chunk;
-			if (sp->remain > skb_tailroom(skb))
-				sp->remain = skb_tailroom(skb);
-
-			_net("skb: hr %d, tr %d, hl %d, rm %d",
-			       skb_headroom(skb),
-			       skb_tailroom(skb),
-			       skb_headlen(skb),
-			       sp->remain);
-
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
-		}
-
-		_debug("append");
-		sp = rxrpc_skb(skb);
-
-		/* append next segment of data to the current buffer */
-		if (msg_data_left(msg) > 0) {
-			int copy = skb_tailroom(skb);
-			ASSERTCMP(copy, >, 0);
-			if (copy > msg_data_left(msg))
-				copy = msg_data_left(msg);
-			if (copy > sp->remain)
-				copy = sp->remain;
-
-			_debug("add");
-			ret = skb_add_data(skb, &msg->msg_iter, copy);
-			_debug("added");
-			if (ret < 0)
-				goto efault;
-			sp->remain -= copy;
-			skb->mark += copy;
-			copied += copy;
-		}
-
-		/* check for the far side aborting the call or a network error
-		 * occurring */
-		if (call->state > RXRPC_CALL_COMPLETE)
-			goto call_aborted;
-
-		/* add the packet to the send queue if it's now full */
-		if (sp->remain <= 0 ||
-		    (msg_data_left(msg) == 0 && !more)) {
-			struct rxrpc_connection *conn = call->conn;
-			uint32_t seq;
-			size_t pad;
-
-			/* pad out if we're using security */
-			if (conn->security_ix) {
-				pad = conn->security_size + skb->mark;
-				pad = conn->size_align - pad;
-				pad &= conn->size_align - 1;
-				_debug("pad %zu", pad);
-				if (pad)
-					memset(skb_put(skb, pad), 0, pad);
-			}
-
-			seq = atomic_inc_return(&call->sequence);
-
-			sp->hdr.epoch	= conn->proto.epoch;
-			sp->hdr.cid	= call->cid;
-			sp->hdr.callNumber = call->call_id;
-			sp->hdr.seq	= seq;
-			sp->hdr.serial	= atomic_inc_return(&conn->serial);
-			sp->hdr.type	= RXRPC_PACKET_TYPE_DATA;
-			sp->hdr.userStatus = 0;
-			sp->hdr.securityIndex = conn->security_ix;
-			sp->hdr._rsvd	= 0;
-			sp->hdr.serviceId = call->service_id;
-
-			sp->hdr.flags = conn->out_clientflag;
-			if (msg_data_left(msg) == 0 && !more)
-				sp->hdr.flags |= RXRPC_LAST_PACKET;
-			else if (CIRC_SPACE(call->acks_head,
-					    ACCESS_ONCE(call->acks_tail),
-					    call->acks_winsz) > 1)
-				sp->hdr.flags |= RXRPC_MORE_PACKETS;
-			if (more && seq & 1)
-				sp->hdr.flags |= RXRPC_REQUEST_ACK;
-
-			ret = conn->security->secure_packet(
-				call, skb, skb->mark,
-				skb->head + sizeof(struct rxrpc_wire_header));
-			if (ret < 0)
-				goto out;
-
-			rxrpc_insert_header(skb);
-			rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
-			skb = NULL;
-		}
-	} while (msg_data_left(msg) > 0);
-
-success:
-	ret = copied;
-out:
-	call->tx_pending = skb;
-	_leave(" = %d", ret);
-	return ret;
-
-call_aborted:
-	rxrpc_free_skb(skb);
-	if (call->state == RXRPC_CALL_NETWORK_ERROR)
-		ret = call->error_report < RXRPC_LOCAL_ERROR_OFFSET ?
-			call->error_report :
-			call->error_report - RXRPC_LOCAL_ERROR_OFFSET;
-	else
-		ret = -ECONNABORTED;
-	_leave(" = %d", ret);
-	return ret;
-
-maybe_error:
-	if (copied)
-		goto success;
-	goto out;
-
-efault:
-	ret = -EFAULT;
-	goto out;
-}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 8940674..bf13b84 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -66,6 +66,32 @@
 		}
 		break;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
+	case AF_INET6:
+		srx.transport.sin6.sin6_port = serr->port;
+		srx.transport_len = sizeof(struct sockaddr_in6);
+		switch (serr->ee.ee_origin) {
+		case SO_EE_ORIGIN_ICMP6:
+			_net("Rx ICMP6");
+			memcpy(&srx.transport.sin6.sin6_addr,
+			       skb_network_header(skb) + serr->addr_offset,
+			       sizeof(struct in6_addr));
+			break;
+		case SO_EE_ORIGIN_ICMP:
+			_net("Rx ICMP on v6 sock");
+			memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
+			       skb_network_header(skb) + serr->addr_offset,
+			       sizeof(struct in_addr));
+			break;
+		default:
+			memcpy(&srx.transport.sin6.sin6_addr,
+			       &ipv6_hdr(skb)->saddr,
+			       sizeof(struct in6_addr));
+			break;
+		}
+		break;
+#endif
+
 	default:
 		BUG();
 	}
@@ -129,22 +155,21 @@
 		_leave("UDP socket errqueue empty");
 		return;
 	}
+	rxrpc_new_skb(skb, rxrpc_skb_rx_received);
 	serr = SKB_EXT_ERR(skb);
 	if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
 		_leave("UDP empty message");
-		kfree_skb(skb);
+		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 		return;
 	}
 
-	rxrpc_new_skb(skb);
-
 	rcu_read_lock();
 	peer = rxrpc_lookup_peer_icmp_rcu(local, skb);
 	if (peer && !rxrpc_get_peer_maybe(peer))
 		peer = NULL;
 	if (!peer) {
 		rcu_read_unlock();
-		rxrpc_free_skb(skb);
+		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 		_leave(" [no peer]");
 		return;
 	}
@@ -154,7 +179,7 @@
 	     serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
 		rxrpc_adjust_mtu(peer, serr);
 		rcu_read_unlock();
-		rxrpc_free_skb(skb);
+		rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 		rxrpc_put_peer(peer);
 		_leave(" [MTU update]");
 		return;
@@ -162,7 +187,7 @@
 
 	rxrpc_store_error(peer, serr);
 	rcu_read_unlock();
-	rxrpc_free_skb(skb);
+	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
 
 	/* The ref we obtained is passed off to the work item */
 	rxrpc_queue_work(&peer->error_distributor);
@@ -248,13 +273,20 @@
 	struct rxrpc_peer *peer =
 		container_of(work, struct rxrpc_peer, error_distributor);
 	struct rxrpc_call *call;
-	int error_report;
+	enum rxrpc_call_completion compl;
+	int error;
 
 	_enter("");
 
-	error_report = READ_ONCE(peer->error_report);
+	error = READ_ONCE(peer->error_report);
+	if (error < RXRPC_LOCAL_ERROR_OFFSET) {
+		compl = RXRPC_CALL_NETWORK_ERROR;
+	} else {
+		compl = RXRPC_CALL_LOCAL_ERROR;
+		error -= RXRPC_LOCAL_ERROR_OFFSET;
+	}
 
-	_debug("ISSUE ERROR %d", error_report);
+	_debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error);
 
 	spin_lock_bh(&peer->lock);
 
@@ -262,16 +294,10 @@
 		call = hlist_entry(peer->error_targets.first,
 				   struct rxrpc_call, error_link);
 		hlist_del_init(&call->error_link);
+		rxrpc_see_call(call);
 
-		write_lock(&call->state_lock);
-		if (call->state != RXRPC_CALL_COMPLETE &&
-		    call->state < RXRPC_CALL_NETWORK_ERROR) {
-			call->error_report = error_report;
-			call->state = RXRPC_CALL_NETWORK_ERROR;
-			set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
-			rxrpc_queue_call(call);
-		}
-		write_unlock(&call->state_lock);
+		if (rxrpc_set_call_completion(call, compl, 0, error))
+			rxrpc_notify_socket(call);
 	}
 
 	spin_unlock_bh(&peer->lock);
@@ -279,3 +305,44 @@
 	rxrpc_put_peer(peer);
 	_leave("");
 }
+
+/*
+ * Add RTT information to cache.  This is called in softirq mode and has
+ * exclusive access to the peer RTT data.
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+			rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+			ktime_t send_time, ktime_t resp_time)
+{
+	struct rxrpc_peer *peer = call->peer;
+	s64 rtt;
+	u64 sum = peer->rtt_sum, avg;
+	u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage;
+
+	rtt = ktime_to_ns(ktime_sub(resp_time, send_time));
+	if (rtt < 0)
+		return;
+
+	/* Replace the oldest datum in the RTT buffer */
+	sum -= peer->rtt_cache[cursor];
+	sum += rtt;
+	peer->rtt_cache[cursor] = rtt;
+	peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1);
+	peer->rtt_sum = sum;
+	if (usage < RXRPC_RTT_CACHE_SIZE) {
+		usage++;
+		peer->rtt_usage = usage;
+	}
+
+	/* Now recalculate the average */
+	if (usage == RXRPC_RTT_CACHE_SIZE) {
+		avg = sum / RXRPC_RTT_CACHE_SIZE;
+	} else {
+		avg = sum;
+		do_div(avg, usage);
+	}
+
+	peer->rtt = avg;
+	trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
+			   usage, avg);
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 538e983..941b724 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -16,12 +16,14 @@
 #include <linux/skbuff.h>
 #include <linux/udp.h>
 #include <linux/in.h>
+#include <linux/in6.h>
 #include <linux/slab.h>
 #include <linux/hashtable.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <net/ip.h>
 #include <net/route.h>
+#include <net/ip6_route.h>
 #include "ar-internal.h"
 
 static DEFINE_HASHTABLE(rxrpc_peer_hash, 10);
@@ -50,6 +52,13 @@
 		size = sizeof(srx->transport.sin.sin_addr);
 		p = (u16 *)&srx->transport.sin.sin_addr;
 		break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+	case AF_INET6:
+		hash_key += (u16 __force)srx->transport.sin.sin_port;
+		size = sizeof(srx->transport.sin6.sin6_addr);
+		p = (u16 *)&srx->transport.sin6.sin6_addr;
+		break;
+#endif
 	default:
 		WARN(1, "AF_RXRPC: Unsupported transport address family\n");
 		return 0;
@@ -93,6 +102,14 @@
 			memcmp(&peer->srx.transport.sin.sin_addr,
 			       &srx->transport.sin.sin_addr,
 			       sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+	case AF_INET6:
+		return ((u16 __force)peer->srx.transport.sin6.sin6_port -
+			(u16 __force)srx->transport.sin6.sin6_port) ?:
+			memcmp(&peer->srx.transport.sin6.sin6_addr,
+			       &srx->transport.sin6.sin6_addr,
+			       sizeof(struct in6_addr));
+#endif
 	default:
 		BUG();
 	}
@@ -130,17 +147,7 @@
 
 	peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
 	if (peer) {
-		switch (srx->transport.family) {
-		case AF_INET:
-			_net("PEER %d {%d,%u,%pI4+%hu}",
-			     peer->debug_id,
-			     peer->srx.transport_type,
-			     peer->srx.transport.family,
-			     &peer->srx.transport.sin.sin_addr,
-			     ntohs(peer->srx.transport.sin.sin_port));
-			break;
-		}
-
+		_net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
 		_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
 	}
 	return peer;
@@ -152,22 +159,53 @@
  */
 static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
 {
+	struct dst_entry *dst;
 	struct rtable *rt;
-	struct flowi4 fl4;
+	struct flowi fl;
+	struct flowi4 *fl4 = &fl.u.ip4;
+#ifdef CONFIG_AF_RXRPC_IPV6
+	struct flowi6 *fl6 = &fl.u.ip6;
+#endif
 
 	peer->if_mtu = 1500;
 
-	rt = ip_route_output_ports(&init_net, &fl4, NULL,
-				   peer->srx.transport.sin.sin_addr.s_addr, 0,
-				   htons(7000), htons(7001),
-				   IPPROTO_UDP, 0, 0);
-	if (IS_ERR(rt)) {
-		_leave(" [route err %ld]", PTR_ERR(rt));
-		return;
+	memset(&fl, 0, sizeof(fl));
+	switch (peer->srx.transport.family) {
+	case AF_INET:
+		rt = ip_route_output_ports(
+			&init_net, fl4, NULL,
+			peer->srx.transport.sin.sin_addr.s_addr, 0,
+			htons(7000), htons(7001), IPPROTO_UDP, 0, 0);
+		if (IS_ERR(rt)) {
+			_leave(" [route err %ld]", PTR_ERR(rt));
+			return;
+		}
+		dst = &rt->dst;
+		break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+	case AF_INET6:
+		fl6->flowi6_iif = LOOPBACK_IFINDEX;
+		fl6->flowi6_scope = RT_SCOPE_UNIVERSE;
+		fl6->flowi6_proto = IPPROTO_UDP;
+		memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr,
+		       sizeof(struct in6_addr));
+		fl6->fl6_dport = htons(7001);
+		fl6->fl6_sport = htons(7000);
+		dst = ip6_route_output(&init_net, NULL, fl6);
+		if (IS_ERR(dst)) {
+			_leave(" [route err %ld]", PTR_ERR(dst));
+			return;
+		}
+		break;
+#endif
+
+	default:
+		BUG();
 	}
 
-	peer->if_mtu = dst_mtu(&rt->dst);
-	dst_release(&rt->dst);
+	peer->if_mtu = dst_mtu(dst);
+	dst_release(dst);
 
 	_leave(" [if_mtu %u]", peer->if_mtu);
 }
@@ -199,6 +237,41 @@
 }
 
 /*
+ * Initialise peer record.
+ */
+static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key)
+{
+	peer->hash_key = hash_key;
+	rxrpc_assess_MTU_size(peer);
+	peer->mtu = peer->if_mtu;
+	peer->rtt_last_req = ktime_get_real();
+
+	switch (peer->srx.transport.family) {
+	case AF_INET:
+		peer->hdrsize = sizeof(struct iphdr);
+		break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+	case AF_INET6:
+		peer->hdrsize = sizeof(struct ipv6hdr);
+		break;
+#endif
+	default:
+		BUG();
+	}
+
+	switch (peer->srx.transport_type) {
+	case SOCK_DGRAM:
+		peer->hdrsize += sizeof(struct udphdr);
+		break;
+	default:
+		BUG();
+	}
+
+	peer->hdrsize += sizeof(struct rxrpc_wire_header);
+	peer->maxdata = peer->mtu - peer->hdrsize;
+}
+
+/*
  * Set up a new peer.
  */
 static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
@@ -212,28 +285,8 @@
 
 	peer = rxrpc_alloc_peer(local, gfp);
 	if (peer) {
-		peer->hash_key = hash_key;
 		memcpy(&peer->srx, srx, sizeof(*srx));
-
-		rxrpc_assess_MTU_size(peer);
-		peer->mtu = peer->if_mtu;
-
-		if (srx->transport.family == AF_INET) {
-			peer->hdrsize = sizeof(struct iphdr);
-			switch (srx->transport_type) {
-			case SOCK_DGRAM:
-				peer->hdrsize += sizeof(struct udphdr);
-				break;
-			default:
-				BUG();
-				break;
-			}
-		} else {
-			BUG();
-		}
-
-		peer->hdrsize += sizeof(struct rxrpc_wire_header);
-		peer->maxdata = peer->mtu - peer->hdrsize;
+		rxrpc_init_peer(peer, hash_key);
 	}
 
 	_leave(" = %p", peer);
@@ -241,6 +294,35 @@
 }
 
 /*
+ * Set up a new incoming peer.  The address is prestored in the preallocated
+ * peer.
+ */
+struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
+					      struct rxrpc_peer *prealloc)
+{
+	struct rxrpc_peer *peer;
+	unsigned long hash_key;
+
+	hash_key = rxrpc_peer_hash_key(local, &prealloc->srx);
+	prealloc->local = local;
+	rxrpc_init_peer(prealloc, hash_key);
+
+	spin_lock(&rxrpc_peer_hash_lock);
+
+	/* Need to check that we aren't racing with someone else */
+	peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key);
+	if (peer && !rxrpc_get_peer_maybe(peer))
+		peer = NULL;
+	if (!peer) {
+		peer = prealloc;
+		hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key);
+	}
+
+	spin_unlock(&rxrpc_peer_hash_lock);
+	return peer;
+}
+
+/*
  * obtain a remote transport endpoint for the specified address
  */
 struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
@@ -249,11 +331,7 @@
 	struct rxrpc_peer *peer, *candidate;
 	unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
 
-	_enter("{%d,%d,%pI4+%hu}",
-	       srx->transport_type,
-	       srx->transport_len,
-	       &srx->transport.sin.sin_addr,
-	       ntohs(srx->transport.sin.sin_port));
+	_enter("{%pISp}", &srx->transport);
 
 	/* search the peer list first */
 	rcu_read_lock();
@@ -272,7 +350,7 @@
 			return NULL;
 		}
 
-		spin_lock(&rxrpc_peer_hash_lock);
+		spin_lock_bh(&rxrpc_peer_hash_lock);
 
 		/* Need to check that we aren't racing with someone else */
 		peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
@@ -282,7 +360,7 @@
 			hash_add_rcu(rxrpc_peer_hash,
 				     &candidate->hash_link, hash_key);
 
-		spin_unlock(&rxrpc_peer_hash_lock);
+		spin_unlock_bh(&rxrpc_peer_hash_lock);
 
 		if (peer)
 			kfree(candidate);
@@ -290,11 +368,7 @@
 			peer = candidate;
 	}
 
-	_net("PEER %d {%d,%pI4+%hu}",
-	     peer->debug_id,
-	     peer->srx.transport_type,
-	     &peer->srx.transport.sin.sin_addr,
-	     ntohs(peer->srx.transport.sin.sin_port));
+	_net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
 
 	_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
 	return peer;
@@ -307,9 +381,24 @@
 {
 	ASSERT(hlist_empty(&peer->error_targets));
 
-	spin_lock(&rxrpc_peer_hash_lock);
+	spin_lock_bh(&rxrpc_peer_hash_lock);
 	hash_del_rcu(&peer->hash_link);
-	spin_unlock(&rxrpc_peer_hash_lock);
+	spin_unlock_bh(&rxrpc_peer_hash_lock);
 
 	kfree_rcu(peer, rcu);
 }
+
+/**
+ * rxrpc_kernel_get_peer - Get the peer address of a call
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ * @_srx: Where to place the result
+ *
+ * Get the address of the remote peer in a call.
+ */
+void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
+			   struct sockaddr_rxrpc *_srx)
+{
+	*_srx = call->peer->srx;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_peer);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index ced5f07..65cd980 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -17,12 +17,12 @@
 static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
 	[RXRPC_CONN_UNUSED]			= "Unused  ",
 	[RXRPC_CONN_CLIENT]			= "Client  ",
+	[RXRPC_CONN_SERVICE_PREALLOC]		= "SvPrealc",
 	[RXRPC_CONN_SERVICE_UNSECURED]		= "SvUnsec ",
 	[RXRPC_CONN_SERVICE_CHALLENGING]	= "SvChall ",
 	[RXRPC_CONN_SERVICE]			= "SvSecure",
 	[RXRPC_CONN_REMOTELY_ABORTED]		= "RmtAbort",
 	[RXRPC_CONN_LOCALLY_ABORTED]		= "LocAbort",
-	[RXRPC_CONN_NETWORK_ERROR]		= "NetError",
 };
 
 /*
@@ -30,6 +30,7 @@
  */
 static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
 {
+	rcu_read_lock();
 	read_lock(&rxrpc_call_lock);
 	return seq_list_start_head(&rxrpc_calls, *_pos);
 }
@@ -42,17 +43,21 @@
 static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
 {
 	read_unlock(&rxrpc_call_lock);
+	rcu_read_unlock();
 }
 
 static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
 {
-	struct rxrpc_connection *conn;
+	struct rxrpc_local *local;
+	struct rxrpc_sock *rx;
+	struct rxrpc_peer *peer;
 	struct rxrpc_call *call;
-	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+	char lbuff[50], rbuff[50];
 
 	if (v == &rxrpc_calls) {
 		seq_puts(seq,
-			 "Proto Local                  Remote                "
+			 "Proto Local                                          "
+			 " Remote                                         "
 			 " SvID ConnID   CallID   End Use State    Abort   "
 			 " UserID\n");
 		return 0;
@@ -60,30 +65,35 @@
 
 	call = list_entry(v, struct rxrpc_call, link);
 
-	sprintf(lbuff, "%pI4:%u",
-		&call->local->srx.transport.sin.sin_addr,
-		ntohs(call->local->srx.transport.sin.sin_port));
+	rx = rcu_dereference(call->socket);
+	if (rx) {
+		local = READ_ONCE(rx->local);
+		if (local)
+			sprintf(lbuff, "%pISpc", &local->srx.transport);
+		else
+			strcpy(lbuff, "no_local");
+	} else {
+		strcpy(lbuff, "no_socket");
+	}
 
-	conn = call->conn;
-	if (conn)
-		sprintf(rbuff, "%pI4:%u",
-			&conn->params.peer->srx.transport.sin.sin_addr,
-			ntohs(conn->params.peer->srx.transport.sin.sin_port));
+	peer = call->peer;
+	if (peer)
+		sprintf(rbuff, "%pISpc", &peer->srx.transport);
 	else
 		strcpy(rbuff, "no_connection");
 
 	seq_printf(seq,
-		   "UDP   %-22.22s %-22.22s %4x %08x %08x %s %3u"
+		   "UDP   %-47.47s %-47.47s %4x %08x %08x %s %3u"
 		   " %-8.8s %08x %lx\n",
 		   lbuff,
 		   rbuff,
 		   call->service_id,
 		   call->cid,
 		   call->call_id,
-		   call->in_clientflag ? "Svc" : "Clt",
+		   rxrpc_is_service_call(call) ? "Svc" : "Clt",
 		   atomic_read(&call->usage),
 		   rxrpc_call_states[call->state],
-		   call->remote_abort ?: call->local_abort,
+		   call->abort_code,
 		   call->user_call_ID);
 
 	return 0;
@@ -115,13 +125,13 @@
 static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
 {
 	read_lock(&rxrpc_connection_lock);
-	return seq_list_start_head(&rxrpc_connections, *_pos);
+	return seq_list_start_head(&rxrpc_connection_proc_list, *_pos);
 }
 
 static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
 				       loff_t *pos)
 {
-	return seq_list_next(v, &rxrpc_connections, pos);
+	return seq_list_next(v, &rxrpc_connection_proc_list, pos);
 }
 
 static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
@@ -132,29 +142,31 @@
 static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 {
 	struct rxrpc_connection *conn;
-	char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+	char lbuff[50], rbuff[50];
 
-	if (v == &rxrpc_connections) {
+	if (v == &rxrpc_connection_proc_list) {
 		seq_puts(seq,
-			 "Proto Local                  Remote                "
+			 "Proto Local                                          "
+			 " Remote                                         "
 			 " SvID ConnID   End Use State    Key     "
 			 " Serial   ISerial\n"
 			 );
 		return 0;
 	}
 
-	conn = list_entry(v, struct rxrpc_connection, link);
+	conn = list_entry(v, struct rxrpc_connection, proc_link);
+	if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) {
+		strcpy(lbuff, "no_local");
+		strcpy(rbuff, "no_connection");
+		goto print;
+	}
 
-	sprintf(lbuff, "%pI4:%u",
-		&conn->params.local->srx.transport.sin.sin_addr,
-		ntohs(conn->params.local->srx.transport.sin.sin_port));
+	sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport);
 
-	sprintf(rbuff, "%pI4:%u",
-		&conn->params.peer->srx.transport.sin.sin_addr,
-		ntohs(conn->params.peer->srx.transport.sin.sin_port));
-
+	sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport);
+print:
 	seq_printf(seq,
-		   "UDP   %-22.22s %-22.22s %4x %08x %s %3u"
+		   "UDP   %-47.47s %-47.47s %4x %08x %s %3u"
 		   " %s %08x %08x %08x\n",
 		   lbuff,
 		   rbuff,
@@ -165,7 +177,7 @@
 		   rxrpc_conn_states[conn->state],
 		   key_serial(conn->params.key),
 		   atomic_read(&conn->serial),
-		   atomic_read(&conn->hi_serial));
+		   conn->hi_serial);
 
 	return 0;
 }
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index a3fa2ed..f05ea0a 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -19,418 +19,645 @@
 #include "ar-internal.h"
 
 /*
- * removal a call's user ID from the socket tree to make the user ID available
- * again and so that it won't be seen again in association with that call
+ * Post a call for attention by the socket or kernel service.  Further
+ * notifications are suppressed by putting recvmsg_link on a dummy queue.
  */
-void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
+void rxrpc_notify_socket(struct rxrpc_call *call)
 {
-	_debug("RELEASE CALL %d", call->debug_id);
+	struct rxrpc_sock *rx;
+	struct sock *sk;
 
-	if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
-		write_lock_bh(&rx->call_lock);
-		rb_erase(&call->sock_node, &call->socket->calls);
-		clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
-		write_unlock_bh(&rx->call_lock);
+	_enter("%d", call->debug_id);
+
+	if (!list_empty(&call->recvmsg_link))
+		return;
+
+	rcu_read_lock();
+
+	rx = rcu_dereference(call->socket);
+	sk = &rx->sk;
+	if (rx && sk->sk_state < RXRPC_CLOSE) {
+		if (call->notify_rx) {
+			call->notify_rx(sk, call, call->user_call_ID);
+		} else {
+			write_lock_bh(&rx->recvmsg_lock);
+			if (list_empty(&call->recvmsg_link)) {
+				rxrpc_get_call(call, rxrpc_call_got);
+				list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
+			}
+			write_unlock_bh(&rx->recvmsg_lock);
+
+			if (!sock_flag(sk, SOCK_DEAD)) {
+				_debug("call %ps", sk->sk_data_ready);
+				sk->sk_data_ready(sk);
+			}
+		}
 	}
 
-	read_lock_bh(&call->state_lock);
-	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
-	    !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
-		rxrpc_queue_call(call);
-	read_unlock_bh(&call->state_lock);
+	rcu_read_unlock();
+	_leave("");
 }
 
 /*
- * receive a message from an RxRPC socket
+ * Pass a call terminating message to userspace.
+ */
+static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
+{
+	u32 tmp = 0;
+	int ret;
+
+	switch (call->completion) {
+	case RXRPC_CALL_SUCCEEDED:
+		ret = 0;
+		if (rxrpc_is_service_call(call))
+			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp);
+		break;
+	case RXRPC_CALL_REMOTELY_ABORTED:
+		tmp = call->abort_code;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
+		break;
+	case RXRPC_CALL_LOCALLY_ABORTED:
+		tmp = call->abort_code;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
+		break;
+	case RXRPC_CALL_NETWORK_ERROR:
+		tmp = call->error;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp);
+		break;
+	case RXRPC_CALL_LOCAL_ERROR:
+		tmp = call->error;
+		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
+		break;
+	default:
+		pr_err("Invalid terminal call state %u\n", call->state);
+		BUG();
+		break;
+	}
+
+	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack,
+			    call->rx_pkt_offset, call->rx_pkt_len, ret);
+	return ret;
+}
+
+/*
+ * Pass back notification of a new call.  The call is added to the
+ * to-be-accepted list.  This means that the next call to be accepted might not
+ * be the last call seen awaiting acceptance, but unless we leave this on the
+ * front of the queue and block all other messages until someone gives us a
+ * user_ID for it, there's not a lot we can do.
+ */
+static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx,
+				  struct rxrpc_call *call,
+				  struct msghdr *msg, int flags)
+{
+	int tmp = 0, ret;
+
+	ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp);
+
+	if (ret == 0 && !(flags & MSG_PEEK)) {
+		_debug("to be accepted");
+		write_lock_bh(&rx->recvmsg_lock);
+		list_del_init(&call->recvmsg_link);
+		write_unlock_bh(&rx->recvmsg_lock);
+
+		rxrpc_get_call(call, rxrpc_call_got);
+		write_lock(&rx->call_lock);
+		list_add_tail(&call->accept_link, &rx->to_be_accepted);
+		write_unlock(&rx->call_lock);
+	}
+
+	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret);
+	return ret;
+}
+
+/*
+ * End the packet reception phase.
+ */
+static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
+{
+	_enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]);
+
+	trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top);
+	ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
+
+	if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
+		rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
+				  rxrpc_propose_ack_terminal_ack);
+		rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+	}
+
+	write_lock_bh(&call->state_lock);
+
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_RECV_REPLY:
+		__rxrpc_call_completed(call);
+		break;
+
+	case RXRPC_CALL_SERVER_RECV_REQUEST:
+		call->tx_phase = true;
+		call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+		break;
+	default:
+		break;
+	}
+
+	write_unlock_bh(&call->state_lock);
+}
+
+/*
+ * Discard a packet we've used up and advance the Rx window by one.
+ */
+static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	rxrpc_serial_t serial;
+	rxrpc_seq_t hard_ack, top;
+	u8 flags;
+	int ix;
+
+	_enter("%d", call->debug_id);
+
+	hard_ack = call->rx_hard_ack;
+	top = smp_load_acquire(&call->rx_top);
+	ASSERT(before(hard_ack, top));
+
+	hard_ack++;
+	ix = hard_ack & RXRPC_RXTX_BUFF_MASK;
+	skb = call->rxtx_buffer[ix];
+	rxrpc_see_skb(skb, rxrpc_skb_rx_rotated);
+	sp = rxrpc_skb(skb);
+	flags = sp->hdr.flags;
+	serial = sp->hdr.serial;
+	if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO)
+		serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1;
+
+	call->rxtx_buffer[ix] = NULL;
+	call->rxtx_annotations[ix] = 0;
+	/* Barrier against rxrpc_input_data(). */
+	smp_store_release(&call->rx_hard_ack, hard_ack);
+
+	rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+
+	_debug("%u,%u,%02x", hard_ack, top, flags);
+	trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack);
+	if (flags & RXRPC_LAST_PACKET) {
+		rxrpc_end_rx_phase(call, serial);
+	} else {
+		/* Check to see if there's an ACK that needs sending. */
+		if (after_eq(hard_ack, call->ackr_consumed + 2) ||
+		    after_eq(top, call->ackr_seen + 2) ||
+		    (hard_ack == top && after(hard_ack, call->ackr_consumed)))
+			rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
+					  true, false,
+					  rxrpc_propose_ack_rotate_rx);
+		if (call->ackr_reason)
+			rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+	}
+}
+
+/*
+ * Decrypt and verify a (sub)packet.  The packet's length may be changed due to
+ * padding, but if this is the case, the packet length will be resident in the
+ * socket buffer.  Note that we can't modify the master skb info as the skb may
+ * be the home to multiple subpackets.
+ */
+static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+			       u8 annotation,
+			       unsigned int offset, unsigned int len)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	rxrpc_seq_t seq = sp->hdr.seq;
+	u16 cksum = sp->hdr.cksum;
+
+	_enter("");
+
+	/* For all but the head jumbo subpacket, the security checksum is in a
+	 * jumbo header immediately prior to the data.
+	 */
+	if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) {
+		__be16 tmp;
+		if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0)
+			BUG();
+		cksum = ntohs(tmp);
+		seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1;
+	}
+
+	return call->conn->security->verify_packet(call, skb, offset, len,
+						   seq, cksum);
+}
+
+/*
+ * Locate the data within a packet.  This is complicated by:
+ *
+ * (1) An skb may contain a jumbo packet - so we have to find the appropriate
+ *     subpacket.
+ *
+ * (2) The (sub)packets may be encrypted and, if so, the encrypted portion
+ *     contains an extra header which includes the true length of the data,
+ *     excluding any encrypted padding.
+ */
+static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+			     u8 *_annotation,
+			     unsigned int *_offset, unsigned int *_len)
+{
+	unsigned int offset = sizeof(struct rxrpc_wire_header);
+	unsigned int len = *_len;
+	int ret;
+	u8 annotation = *_annotation;
+
+	/* Locate the subpacket */
+	len = skb->len - offset;
+	if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) {
+		offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) *
+			   RXRPC_JUMBO_SUBPKTLEN);
+		len = (annotation & RXRPC_RX_ANNO_JLAST) ?
+			skb->len - offset : RXRPC_JUMBO_SUBPKTLEN;
+	}
+
+	if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) {
+		ret = rxrpc_verify_packet(call, skb, annotation, offset, len);
+		if (ret < 0)
+			return ret;
+		*_annotation |= RXRPC_RX_ANNO_VERIFIED;
+	}
+
+	*_offset = offset;
+	*_len = len;
+	call->conn->security->locate_data(call, skb, _offset, _len);
+	return 0;
+}
+
+/*
+ * Deliver messages to a call.  This keeps processing packets until the buffer
+ * is filled and we find either more DATA (returns 0) or the end of the DATA
+ * (returns 1).  If more packets are required, it returns -EAGAIN.
+ */
+static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
+			      struct msghdr *msg, struct iov_iter *iter,
+			      size_t len, int flags, size_t *_offset)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	rxrpc_seq_t hard_ack, top, seq;
+	size_t remain;
+	bool last;
+	unsigned int rx_pkt_offset, rx_pkt_len;
+	int ix, copy, ret = -EAGAIN, ret2;
+
+	rx_pkt_offset = call->rx_pkt_offset;
+	rx_pkt_len = call->rx_pkt_len;
+
+	if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
+		seq = call->rx_hard_ack;
+		ret = 1;
+		goto done;
+	}
+
+	/* Barriers against rxrpc_input_data(). */
+	hard_ack = call->rx_hard_ack;
+	top = smp_load_acquire(&call->rx_top);
+	for (seq = hard_ack + 1; before_eq(seq, top); seq++) {
+		ix = seq & RXRPC_RXTX_BUFF_MASK;
+		skb = call->rxtx_buffer[ix];
+		if (!skb) {
+			trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq,
+					    rx_pkt_offset, rx_pkt_len, 0);
+			break;
+		}
+		smp_rmb();
+		rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+		sp = rxrpc_skb(skb);
+
+		if (!(flags & MSG_PEEK))
+			trace_rxrpc_receive(call, rxrpc_receive_front,
+					    sp->hdr.serial, seq);
+
+		if (msg)
+			sock_recv_timestamp(msg, sock->sk, skb);
+
+		if (rx_pkt_offset == 0) {
+			ret2 = rxrpc_locate_data(call, skb,
+						 &call->rxtx_annotations[ix],
+						 &rx_pkt_offset, &rx_pkt_len);
+			trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq,
+					    rx_pkt_offset, rx_pkt_len, ret2);
+			if (ret2 < 0) {
+				ret = ret2;
+				goto out;
+			}
+		} else {
+			trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq,
+					    rx_pkt_offset, rx_pkt_len, 0);
+		}
+
+		/* We have to handle short, empty and used-up DATA packets. */
+		remain = len - *_offset;
+		copy = rx_pkt_len;
+		if (copy > remain)
+			copy = remain;
+		if (copy > 0) {
+			ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter,
+						      copy);
+			if (ret2 < 0) {
+				ret = ret2;
+				goto out;
+			}
+
+			/* handle piecemeal consumption of data packets */
+			rx_pkt_offset += copy;
+			rx_pkt_len -= copy;
+			*_offset += copy;
+		}
+
+		if (rx_pkt_len > 0) {
+			trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq,
+					    rx_pkt_offset, rx_pkt_len, 0);
+			ASSERTCMP(*_offset, ==, len);
+			ret = 0;
+			break;
+		}
+
+		/* The whole packet has been transferred. */
+		last = sp->hdr.flags & RXRPC_LAST_PACKET;
+		if (!(flags & MSG_PEEK))
+			rxrpc_rotate_rx_window(call);
+		rx_pkt_offset = 0;
+		rx_pkt_len = 0;
+
+		if (last) {
+			ASSERTCMP(seq, ==, READ_ONCE(call->rx_top));
+			ret = 1;
+			goto out;
+		}
+	}
+
+out:
+	if (!(flags & MSG_PEEK)) {
+		call->rx_pkt_offset = rx_pkt_offset;
+		call->rx_pkt_len = rx_pkt_len;
+	}
+done:
+	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq,
+			    rx_pkt_offset, rx_pkt_len, ret);
+	return ret;
+}
+
+/*
+ * Receive a message from an RxRPC socket
  * - we need to be careful about two or more threads calling recvmsg
  *   simultaneously
  */
 int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		  int flags)
 {
-	struct rxrpc_skb_priv *sp;
-	struct rxrpc_call *call = NULL, *continue_call = NULL;
+	struct rxrpc_call *call;
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
-	struct sk_buff *skb;
+	struct list_head *l;
+	size_t copied = 0;
 	long timeo;
-	int copy, ret, ullen, offset, copied = 0;
-	u32 abort_code;
+	int ret;
 
 	DEFINE_WAIT(wait);
 
-	_enter(",,,%zu,%d", len, flags);
+	trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0);
 
 	if (flags & (MSG_OOB | MSG_TRUNC))
 		return -EOPNOTSUPP;
 
-	ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long);
-
 	timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
-	msg->msg_flags |= MSG_MORE;
 
+try_again:
 	lock_sock(&rx->sk);
 
-	for (;;) {
-		/* return immediately if a client socket has no outstanding
-		 * calls */
-		if (RB_EMPTY_ROOT(&rx->calls)) {
-			if (copied)
-				goto out;
-			if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
-				release_sock(&rx->sk);
-				if (continue_call)
-					rxrpc_put_call(continue_call);
-				return -ENODATA;
-			}
-		}
-
-		/* get the next message on the Rx queue */
-		skb = skb_peek(&rx->sk.sk_receive_queue);
-		if (!skb) {
-			/* nothing remains on the queue */
-			if (copied &&
-			    (flags & MSG_PEEK || timeo == 0))
-				goto out;
-
-			/* wait for a message to turn up */
-			release_sock(&rx->sk);
-			prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
-						  TASK_INTERRUPTIBLE);
-			ret = sock_error(&rx->sk);
-			if (ret)
-				goto wait_error;
-
-			if (skb_queue_empty(&rx->sk.sk_receive_queue)) {
-				if (signal_pending(current))
-					goto wait_interrupted;
-				timeo = schedule_timeout(timeo);
-			}
-			finish_wait(sk_sleep(&rx->sk), &wait);
-			lock_sock(&rx->sk);
-			continue;
-		}
-
-	peek_next_packet:
-		sp = rxrpc_skb(skb);
-		call = sp->call;
-		ASSERT(call != NULL);
-
-		_debug("next pkt %s", rxrpc_pkts[sp->hdr.type]);
-
-		/* make sure we wait for the state to be updated in this call */
-		spin_lock_bh(&call->lock);
-		spin_unlock_bh(&call->lock);
-
-		if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
-			_debug("packet from released call");
-			if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-				BUG();
-			rxrpc_free_skb(skb);
-			continue;
-		}
-
-		/* determine whether to continue last data receive */
-		if (continue_call) {
-			_debug("maybe cont");
-			if (call != continue_call ||
-			    skb->mark != RXRPC_SKB_MARK_DATA) {
-				release_sock(&rx->sk);
-				rxrpc_put_call(continue_call);
-				_leave(" = %d [noncont]", copied);
-				return copied;
-			}
-		}
-
-		rxrpc_get_call(call);
-
-		/* copy the peer address and timestamp */
-		if (!continue_call) {
-			if (msg->msg_name) {
-				size_t len =
-					sizeof(call->conn->params.peer->srx);
-				memcpy(msg->msg_name,
-				       &call->conn->params.peer->srx, len);
-				msg->msg_namelen = len;
-			}
-			sock_recv_timestamp(msg, &rx->sk, skb);
-		}
-
-		/* receive the message */
-		if (skb->mark != RXRPC_SKB_MARK_DATA)
-			goto receive_non_data_message;
-
-		_debug("recvmsg DATA #%u { %d, %d }",
-		       sp->hdr.seq, skb->len, sp->offset);
-
-		if (!continue_call) {
-			/* only set the control data once per recvmsg() */
-			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
-				       ullen, &call->user_call_ID);
-			if (ret < 0)
-				goto copy_error;
-			ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
-		}
-
-		ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
-		ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
-		call->rx_data_recv = sp->hdr.seq;
-
-		ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
-
-		offset = sp->offset;
-		copy = skb->len - offset;
-		if (copy > len - copied)
-			copy = len - copied;
-
-		ret = skb_copy_datagram_msg(skb, offset, msg, copy);
-
-		if (ret < 0)
-			goto copy_error;
-
-		/* handle piecemeal consumption of data packets */
-		_debug("copied %d+%d", copy, copied);
-
-		offset += copy;
-		copied += copy;
-
-		if (!(flags & MSG_PEEK))
-			sp->offset = offset;
-
-		if (sp->offset < skb->len) {
-			_debug("buffer full");
-			ASSERTCMP(copied, ==, len);
-			break;
-		}
-
-		/* we transferred the whole data packet */
-		if (sp->hdr.flags & RXRPC_LAST_PACKET) {
-			_debug("last");
-			if (rxrpc_conn_is_client(call->conn)) {
-				 /* last byte of reply received */
-				ret = copied;
-				goto terminal_message;
-			}
-
-			/* last bit of request received */
-			if (!(flags & MSG_PEEK)) {
-				_debug("eat packet");
-				if (skb_dequeue(&rx->sk.sk_receive_queue) !=
-				    skb)
-					BUG();
-				rxrpc_free_skb(skb);
-			}
-			msg->msg_flags &= ~MSG_MORE;
-			break;
-		}
-
-		/* move on to the next data message */
-		_debug("next");
-		if (!continue_call)
-			continue_call = sp->call;
-		else
-			rxrpc_put_call(call);
-		call = NULL;
-
-		if (flags & MSG_PEEK) {
-			_debug("peek next");
-			skb = skb->next;
-			if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue)
-				break;
-			goto peek_next_packet;
-		}
-
-		_debug("eat packet");
-		if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-			BUG();
-		rxrpc_free_skb(skb);
+	/* Return immediately if a client socket has no outstanding calls */
+	if (RB_EMPTY_ROOT(&rx->calls) &&
+	    list_empty(&rx->recvmsg_q) &&
+	    rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+		release_sock(&rx->sk);
+		return -ENODATA;
 	}
 
-	/* end of non-terminal data packet reception for the moment */
-	_debug("end rcv data");
-out:
-	release_sock(&rx->sk);
-	if (call)
-		rxrpc_put_call(call);
-	if (continue_call)
-		rxrpc_put_call(continue_call);
-	_leave(" = %d [data]", copied);
-	return copied;
-
-	/* handle non-DATA messages such as aborts, incoming connections and
-	 * final ACKs */
-receive_non_data_message:
-	_debug("non-data");
-
-	if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) {
-		_debug("RECV NEW CALL");
-		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code);
-		if (ret < 0)
-			goto copy_error;
-		if (!(flags & MSG_PEEK)) {
-			if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-				BUG();
-			rxrpc_free_skb(skb);
+	if (list_empty(&rx->recvmsg_q)) {
+		ret = -EWOULDBLOCK;
+		if (timeo == 0) {
+			call = NULL;
+			goto error_no_call;
 		}
-		goto out;
+
+		release_sock(&rx->sk);
+
+		/* Wait for something to happen */
+		prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
+					  TASK_INTERRUPTIBLE);
+		ret = sock_error(&rx->sk);
+		if (ret)
+			goto wait_error;
+
+		if (list_empty(&rx->recvmsg_q)) {
+			if (signal_pending(current))
+				goto wait_interrupted;
+			trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait,
+					    0, 0, 0, 0);
+			timeo = schedule_timeout(timeo);
+		}
+		finish_wait(sk_sleep(&rx->sk), &wait);
+		goto try_again;
 	}
 
-	ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
-		       ullen, &call->user_call_ID);
-	if (ret < 0)
-		goto copy_error;
-	ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+	/* Find the next call and dequeue it if we're not just peeking.  If we
+	 * do dequeue it, that comes with a ref that we will need to release.
+	 */
+	write_lock_bh(&rx->recvmsg_lock);
+	l = rx->recvmsg_q.next;
+	call = list_entry(l, struct rxrpc_call, recvmsg_link);
+	if (!(flags & MSG_PEEK))
+		list_del_init(&call->recvmsg_link);
+	else
+		rxrpc_get_call(call, rxrpc_call_got);
+	write_unlock_bh(&rx->recvmsg_lock);
 
-	switch (skb->mark) {
-	case RXRPC_SKB_MARK_DATA:
+	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0);
+
+	if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
 		BUG();
-	case RXRPC_SKB_MARK_FINAL_ACK:
-		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code);
+
+	if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+		if (flags & MSG_CMSG_COMPAT) {
+			unsigned int id32 = call->user_call_ID;
+
+			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+				       sizeof(unsigned int), &id32);
+		} else {
+			ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+				       sizeof(unsigned long),
+				       &call->user_call_ID);
+		}
+		if (ret < 0)
+			goto error;
+	}
+
+	if (msg->msg_name) {
+		size_t len = sizeof(call->conn->params.peer->srx);
+		memcpy(msg->msg_name, &call->conn->params.peer->srx, len);
+		msg->msg_namelen = len;
+	}
+
+	switch (call->state) {
+	case RXRPC_CALL_SERVER_ACCEPTING:
+		ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
 		break;
-	case RXRPC_SKB_MARK_BUSY:
-		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code);
-		break;
-	case RXRPC_SKB_MARK_REMOTE_ABORT:
-		abort_code = call->remote_abort;
-		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
-		break;
-	case RXRPC_SKB_MARK_LOCAL_ABORT:
-		abort_code = call->local_abort;
-		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
-		break;
-	case RXRPC_SKB_MARK_NET_ERROR:
-		_debug("RECV NET ERROR %d", sp->error);
-		abort_code = sp->error;
-		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code);
-		break;
-	case RXRPC_SKB_MARK_LOCAL_ERROR:
-		_debug("RECV LOCAL ERROR %d", sp->error);
-		abort_code = sp->error;
-		ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4,
-			       &abort_code);
+	case RXRPC_CALL_CLIENT_RECV_REPLY:
+	case RXRPC_CALL_SERVER_RECV_REQUEST:
+	case RXRPC_CALL_SERVER_ACK_REQUEST:
+		ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
+					 flags, &copied);
+		if (ret == -EAGAIN)
+			ret = 0;
+
+		if (after(call->rx_top, call->rx_hard_ack) &&
+		    call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK])
+			rxrpc_notify_socket(call);
 		break;
 	default:
-		pr_err("Unknown packet mark %u\n", skb->mark);
-		BUG();
+		ret = 0;
 		break;
 	}
 
 	if (ret < 0)
-		goto copy_error;
+		goto error;
 
-terminal_message:
-	_debug("terminal");
-	msg->msg_flags &= ~MSG_MORE;
-	msg->msg_flags |= MSG_EOR;
-
-	if (!(flags & MSG_PEEK)) {
-		_net("free terminal skb %p", skb);
-		if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-			BUG();
-		rxrpc_free_skb(skb);
-		rxrpc_remove_user_ID(rx, call);
+	if (call->state == RXRPC_CALL_COMPLETE) {
+		ret = rxrpc_recvmsg_term(call, msg);
+		if (ret < 0)
+			goto error;
+		if (!(flags & MSG_PEEK))
+			rxrpc_release_call(rx, call);
+		msg->msg_flags |= MSG_EOR;
+		ret = 1;
 	}
 
-	release_sock(&rx->sk);
-	rxrpc_put_call(call);
-	if (continue_call)
-		rxrpc_put_call(continue_call);
-	_leave(" = %d", ret);
-	return ret;
+	if (ret == 0)
+		msg->msg_flags |= MSG_MORE;
+	else
+		msg->msg_flags &= ~MSG_MORE;
+	ret = copied;
 
-copy_error:
-	_debug("copy error");
+error:
+	rxrpc_put_call(call, rxrpc_call_put);
+error_no_call:
 	release_sock(&rx->sk);
-	rxrpc_put_call(call);
-	if (continue_call)
-		rxrpc_put_call(continue_call);
-	_leave(" = %d", ret);
+	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
 	return ret;
 
 wait_interrupted:
 	ret = sock_intr_errno(timeo);
 wait_error:
 	finish_wait(sk_sleep(&rx->sk), &wait);
-	if (continue_call)
-		rxrpc_put_call(continue_call);
-	if (copied)
-		copied = ret;
-	_leave(" = %d [waitfail %d]", copied, ret);
-	return copied;
-
+	call = NULL;
+	goto error_no_call;
 }
 
 /**
- * rxrpc_kernel_data_delivered - Record delivery of data message
- * @skb: Message holding data
+ * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
+ * @sock: The socket that the call exists on
+ * @call: The call to send data through
+ * @buf: The buffer to receive into
+ * @size: The size of the buffer, including data already read
+ * @_offset: The running offset into the buffer.
+ * @want_more: True if more data is expected to be read
+ * @_abort: Where the abort code is stored if -ECONNABORTED is returned
  *
- * Record the delivery of a data message.  This permits RxRPC to keep its
- * tracking correct.  The socket buffer will be deleted.
- */
-void rxrpc_kernel_data_delivered(struct sk_buff *skb)
-{
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct rxrpc_call *call = sp->call;
-
-	ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
-	ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
-	call->rx_data_recv = sp->hdr.seq;
-
-	ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
-	rxrpc_free_skb(skb);
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_data_delivered);
-
-/**
- * rxrpc_kernel_is_data_last - Determine if data message is last one
- * @skb: Message holding data
+ * Allow a kernel service to receive data and pick up information about the
+ * state of a call.  Returns 0 if got what was asked for and there's more
+ * available, 1 if we got what was asked for and we're at the end of the data
+ * and -EAGAIN if we need more data.
  *
- * Determine if data message is last one for the parent call.
- */
-bool rxrpc_kernel_is_data_last(struct sk_buff *skb)
-{
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-	ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA);
-
-	return sp->hdr.flags & RXRPC_LAST_PACKET;
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_is_data_last);
-
-/**
- * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message
- * @skb: Message indicating an abort
+ * Note that we may return -EAGAIN to drain empty packets at the end of the
+ * data, even if we've already copied over the requested data.
  *
- * Get the abort code from an RxRPC abort message.
+ * This function adds the amount it transfers to *_offset, so this should be
+ * precleared as appropriate.  Note that the amount remaining in the buffer is
+ * taken to be size - *_offset.
+ *
+ * *_abort should also be initialised to 0.
  */
-u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb)
+int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
+			   void *buf, size_t size, size_t *_offset,
+			   bool want_more, u32 *_abort)
 {
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	struct iov_iter iter;
+	struct kvec iov;
+	int ret;
 
-	switch (skb->mark) {
-	case RXRPC_SKB_MARK_REMOTE_ABORT:
-		return sp->call->remote_abort;
-	case RXRPC_SKB_MARK_LOCAL_ABORT:
-		return sp->call->local_abort;
+	_enter("{%d,%s},%zu/%zu,%d",
+	       call->debug_id, rxrpc_call_states[call->state],
+	       *_offset, size, want_more);
+
+	ASSERTCMP(*_offset, <=, size);
+	ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING);
+
+	iov.iov_base = buf + *_offset;
+	iov.iov_len = size - *_offset;
+	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
+
+	lock_sock(sock->sk);
+
+	switch (call->state) {
+	case RXRPC_CALL_CLIENT_RECV_REPLY:
+	case RXRPC_CALL_SERVER_RECV_REQUEST:
+	case RXRPC_CALL_SERVER_ACK_REQUEST:
+		ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0,
+					 _offset);
+		if (ret < 0)
+			goto out;
+
+		/* We can only reach here with a partially full buffer if we
+		 * have reached the end of the data.  We must otherwise have a
+		 * full buffer or have been given -EAGAIN.
+		 */
+		if (ret == 1) {
+			if (*_offset < size)
+				goto short_data;
+			if (!want_more)
+				goto read_phase_complete;
+			ret = 0;
+			goto out;
+		}
+
+		if (!want_more)
+			goto excess_data;
+		goto out;
+
+	case RXRPC_CALL_COMPLETE:
+		goto call_complete;
+
 	default:
-		BUG();
+		ret = -EINPROGRESS;
+		goto out;
 	}
+
+read_phase_complete:
+	ret = 1;
+out:
+	release_sock(sock->sk);
+	_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
+	return ret;
+
+short_data:
+	ret = -EBADMSG;
+	goto out;
+excess_data:
+	ret = -EMSGSIZE;
+	goto out;
+call_complete:
+	*_abort = call->abort_code;
+	ret = call->error;
+	if (call->completion == RXRPC_CALL_SUCCEEDED) {
+		ret = 1;
+		if (size > 0)
+			ret = -ECONNRESET;
+	}
+	goto out;
 }
-
-EXPORT_SYMBOL(rxrpc_kernel_get_abort_code);
-
-/**
- * rxrpc_kernel_get_error - Get the error number from an RxRPC error message
- * @skb: Message indicating an error
- *
- * Get the error number from an RxRPC error message.
- */
-int rxrpc_kernel_get_error_number(struct sk_buff *skb)
-{
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-	return sp->error;
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_get_error_number);
+EXPORT_SYMBOL(rxrpc_kernel_recv_data);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 63afa9e..627abed 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -80,12 +80,10 @@
 	case RXRPC_SECURITY_AUTH:
 		conn->size_align = 8;
 		conn->security_size = sizeof(struct rxkad_level1_hdr);
-		conn->header_size += sizeof(struct rxkad_level1_hdr);
 		break;
 	case RXRPC_SECURITY_ENCRYPT:
 		conn->size_align = 8;
 		conn->security_size = sizeof(struct rxkad_level2_hdr);
-		conn->header_size += sizeof(struct rxkad_level2_hdr);
 		break;
 	default:
 		ret = -EKEYREJECTED;
@@ -161,7 +159,7 @@
 
 	_enter("");
 
-	check = sp->hdr.seq ^ sp->hdr.callNumber;
+	check = sp->hdr.seq ^ call->call_id;
 	data_size |= (u32)check << 16;
 
 	hdr.data_size = htonl(data_size);
@@ -205,7 +203,7 @@
 
 	_enter("");
 
-	check = sp->hdr.seq ^ sp->hdr.callNumber;
+	check = sp->hdr.seq ^ call->call_id;
 
 	rxkhdr.data_size = htonl(data_size | (u32)check << 16);
 	rxkhdr.checksum = 0;
@@ -275,9 +273,9 @@
 	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
 	/* calculate the security checksum */
-	x = call->channel << (32 - RXRPC_CIDSHIFT);
+	x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
 	x |= sp->hdr.seq & 0x3fffffff;
-	call->crypto_buf[0] = htonl(sp->hdr.callNumber);
+	call->crypto_buf[0] = htonl(call->call_id);
 	call->crypto_buf[1] = htonl(x);
 
 	sg_init_one(&sg, call->crypto_buf, 8);
@@ -316,12 +314,11 @@
 /*
  * decrypt partial encryption on a packet (level 1 security)
  */
-static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
-				    struct sk_buff *skb,
-				    u32 *_abort_code)
+static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
+				 unsigned int offset, unsigned int len,
+				 rxrpc_seq_t seq)
 {
 	struct rxkad_level1_hdr sechdr;
-	struct rxrpc_skb_priv *sp;
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[16];
@@ -332,15 +329,20 @@
 
 	_enter("");
 
-	sp = rxrpc_skb(skb);
+	if (len < 8) {
+		rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO);
+		goto protocol_error;
+	}
 
-	/* we want to decrypt the skbuff in-place */
+	/* Decrypt the skbuff in-place.  TODO: We really want to decrypt
+	 * directly into the target buffer.
+	 */
 	nsg = skb_cow_data(skb, 0, &trailer);
 	if (nsg < 0 || nsg > 16)
 		goto nomem;
 
 	sg_init_table(sg, nsg);
-	skb_to_sgvec(skb, sg, 0, 8);
+	skb_to_sgvec(skb, sg, offset, 8);
 
 	/* start the decryption afresh */
 	memset(&iv, 0, sizeof(iv));
@@ -351,35 +353,35 @@
 	crypto_skcipher_decrypt(req);
 	skcipher_request_zero(req);
 
-	/* remove the decrypted packet length */
-	if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
-		goto datalen_error;
-	if (!skb_pull(skb, sizeof(sechdr)))
-		BUG();
+	/* Extract the decrypted packet length */
+	if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
+		rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO);
+		goto protocol_error;
+	}
+	offset += sizeof(sechdr);
+	len -= sizeof(sechdr);
 
 	buf = ntohl(sechdr.data_size);
 	data_size = buf & 0xffff;
 
 	check = buf >> 16;
-	check ^= sp->hdr.seq ^ sp->hdr.callNumber;
+	check ^= seq ^ call->call_id;
 	check &= 0xffff;
 	if (check != 0) {
-		*_abort_code = RXKADSEALEDINCON;
+		rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO);
 		goto protocol_error;
 	}
 
-	/* shorten the packet to remove the padding */
-	if (data_size > skb->len)
-		goto datalen_error;
-	else if (data_size < skb->len)
-		skb->len = data_size;
+	if (data_size > len) {
+		rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO);
+		goto protocol_error;
+	}
 
 	_leave(" = 0 [dlen=%x]", data_size);
 	return 0;
 
-datalen_error:
-	*_abort_code = RXKADDATALEN;
 protocol_error:
+	rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
 	_leave(" = -EPROTO");
 	return -EPROTO;
 
@@ -391,13 +393,12 @@
 /*
  * wholly decrypt a packet (level 2 security)
  */
-static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
-				       struct sk_buff *skb,
-				       u32 *_abort_code)
+static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
+				 unsigned int offset, unsigned int len,
+				 rxrpc_seq_t seq)
 {
 	const struct rxrpc_key_token *token;
 	struct rxkad_level2_hdr sechdr;
-	struct rxrpc_skb_priv *sp;
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
 	struct rxrpc_crypt iv;
 	struct scatterlist _sg[4], *sg;
@@ -408,9 +409,14 @@
 
 	_enter(",{%d}", skb->len);
 
-	sp = rxrpc_skb(skb);
+	if (len < 8) {
+		rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO);
+		goto protocol_error;
+	}
 
-	/* we want to decrypt the skbuff in-place */
+	/* Decrypt the skbuff in-place.  TODO: We really want to decrypt
+	 * directly into the target buffer.
+	 */
 	nsg = skb_cow_data(skb, 0, &trailer);
 	if (nsg < 0)
 		goto nomem;
@@ -423,7 +429,7 @@
 	}
 
 	sg_init_table(sg, nsg);
-	skb_to_sgvec(skb, sg, 0, skb->len);
+	skb_to_sgvec(skb, sg, offset, len);
 
 	/* decrypt from the session key */
 	token = call->conn->params.key->payload.data[0];
@@ -431,41 +437,41 @@
 
 	skcipher_request_set_tfm(req, call->conn->cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
-	skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
+	skcipher_request_set_crypt(req, sg, sg, len, iv.x);
 	crypto_skcipher_decrypt(req);
 	skcipher_request_zero(req);
 	if (sg != _sg)
 		kfree(sg);
 
-	/* remove the decrypted packet length */
-	if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
-		goto datalen_error;
-	if (!skb_pull(skb, sizeof(sechdr)))
-		BUG();
+	/* Extract the decrypted packet length */
+	if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
+		rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO);
+		goto protocol_error;
+	}
+	offset += sizeof(sechdr);
+	len -= sizeof(sechdr);
 
 	buf = ntohl(sechdr.data_size);
 	data_size = buf & 0xffff;
 
 	check = buf >> 16;
-	check ^= sp->hdr.seq ^ sp->hdr.callNumber;
+	check ^= seq ^ call->call_id;
 	check &= 0xffff;
 	if (check != 0) {
-		*_abort_code = RXKADSEALEDINCON;
+		rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO);
 		goto protocol_error;
 	}
 
-	/* shorten the packet to remove the padding */
-	if (data_size > skb->len)
-		goto datalen_error;
-	else if (data_size < skb->len)
-		skb->len = data_size;
+	if (data_size > len) {
+		rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO);
+		goto protocol_error;
+	}
 
 	_leave(" = 0 [dlen=%x]", data_size);
 	return 0;
 
-datalen_error:
-	*_abort_code = RXKADDATALEN;
 protocol_error:
+	rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
 	_leave(" = -EPROTO");
 	return -EPROTO;
 
@@ -475,40 +481,31 @@
 }
 
 /*
- * verify the security on a received packet
+ * Verify the security on a received packet or subpacket (if part of a
+ * jumbo packet).
  */
-static int rxkad_verify_packet(struct rxrpc_call *call,
-			       struct sk_buff *skb,
-			       u32 *_abort_code)
+static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+			       unsigned int offset, unsigned int len,
+			       rxrpc_seq_t seq, u16 expected_cksum)
 {
 	SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
-	struct rxrpc_skb_priv *sp;
 	struct rxrpc_crypt iv;
 	struct scatterlist sg;
 	u16 cksum;
 	u32 x, y;
-	int ret;
-
-	sp = rxrpc_skb(skb);
 
 	_enter("{%d{%x}},{#%u}",
-	       call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq);
+	       call->debug_id, key_serial(call->conn->params.key), seq);
 
 	if (!call->conn->cipher)
 		return 0;
 
-	if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) {
-		*_abort_code = RXKADINCONSISTENCY;
-		_leave(" = -EPROTO [not rxkad]");
-		return -EPROTO;
-	}
-
 	/* continue encrypting from where we left off */
 	memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
 
 	/* validate the security checksum */
-	x = call->channel << (32 - RXRPC_CIDSHIFT);
-	x |= sp->hdr.seq & 0x3fffffff;
+	x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+	x |= seq & 0x3fffffff;
 	call->crypto_buf[0] = htonl(call->call_id);
 	call->crypto_buf[1] = htonl(x);
 
@@ -524,29 +521,69 @@
 	if (cksum == 0)
 		cksum = 1; /* zero checksums are not permitted */
 
-	if (sp->hdr.cksum != cksum) {
-		*_abort_code = RXKADSEALEDINCON;
+	if (cksum != expected_cksum) {
+		rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO);
+		rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
 		_leave(" = -EPROTO [csum failed]");
 		return -EPROTO;
 	}
 
 	switch (call->conn->params.security_level) {
 	case RXRPC_SECURITY_PLAIN:
-		ret = 0;
-		break;
+		return 0;
 	case RXRPC_SECURITY_AUTH:
-		ret = rxkad_verify_packet_auth(call, skb, _abort_code);
-		break;
+		return rxkad_verify_packet_1(call, skb, offset, len, seq);
 	case RXRPC_SECURITY_ENCRYPT:
-		ret = rxkad_verify_packet_encrypt(call, skb, _abort_code);
-		break;
+		return rxkad_verify_packet_2(call, skb, offset, len, seq);
 	default:
-		ret = -ENOANO;
-		break;
+		return -ENOANO;
 	}
+}
 
-	_leave(" = %d", ret);
-	return ret;
+/*
+ * Locate the data contained in a packet that was partially encrypted.
+ */
+static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb,
+				unsigned int *_offset, unsigned int *_len)
+{
+	struct rxkad_level1_hdr sechdr;
+
+	if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0)
+		BUG();
+	*_offset += sizeof(sechdr);
+	*_len = ntohl(sechdr.data_size) & 0xffff;
+}
+
+/*
+ * Locate the data contained in a packet that was completely encrypted.
+ */
+static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb,
+				unsigned int *_offset, unsigned int *_len)
+{
+	struct rxkad_level2_hdr sechdr;
+
+	if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0)
+		BUG();
+	*_offset += sizeof(sechdr);
+	*_len = ntohl(sechdr.data_size) & 0xffff;
+}
+
+/*
+ * Locate the data contained in an already decrypted packet.
+ */
+static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+			      unsigned int *_offset, unsigned int *_len)
+{
+	switch (call->conn->params.security_level) {
+	case RXRPC_SECURITY_AUTH:
+		rxkad_locate_data_1(call, skb, _offset, _len);
+		return;
+	case RXRPC_SECURITY_ENCRYPT:
+		rxkad_locate_data_2(call, skb, _offset, _len);
+		return;
+	default:
+		return;
+	}
 }
 
 /*
@@ -716,7 +753,7 @@
 	struct rxkad_challenge challenge;
 	struct rxkad_response resp
 		__attribute__((aligned(8))); /* must be aligned for crypto */
-	struct rxrpc_skb_priv *sp;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	u32 version, nonce, min_level, abort_code;
 	int ret;
 
@@ -734,8 +771,8 @@
 	}
 
 	abort_code = RXKADPACKETSHORT;
-	sp = rxrpc_skb(skb);
-	if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0)
+	if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+			  &challenge, sizeof(challenge)) < 0)
 		goto protocol_error;
 
 	version = ntohl(challenge.version);
@@ -981,7 +1018,7 @@
 {
 	struct rxkad_response response
 		__attribute__((aligned(8))); /* must be aligned for crypto */
-	struct rxrpc_skb_priv *sp;
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_crypt session_key;
 	time_t expiry;
 	void *ticket;
@@ -992,7 +1029,8 @@
 	_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
 
 	abort_code = RXKADPACKETSHORT;
-	if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0)
+	if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+			  &response, sizeof(response)) < 0)
 		goto protocol_error;
 	if (!pskb_pull(skb, sizeof(response)))
 		BUG();
@@ -1000,7 +1038,6 @@
 	version = ntohl(response.version);
 	ticket_len = ntohl(response.ticket_len);
 	kvno = ntohl(response.kvno);
-	sp = rxrpc_skb(skb);
 	_proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
 	       sp->hdr.serial, version, kvno, ticket_len);
 
@@ -1022,7 +1059,8 @@
 		return -ENOMEM;
 
 	abort_code = RXKADPACKETSHORT;
-	if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0)
+	if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+			  ticket, ticket_len) < 0)
 		goto protocol_error_free;
 
 	ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
@@ -1147,6 +1185,7 @@
 	.prime_packet_security		= rxkad_prime_packet_security,
 	.secure_packet			= rxkad_secure_packet,
 	.verify_packet			= rxkad_verify_packet,
+	.locate_data			= rxkad_locate_data,
 	.issue_challenge		= rxkad_issue_challenge,
 	.respond_to_challenge		= rxkad_respond_to_challenge,
 	.verify_response		= rxkad_verify_response,
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 814d285..7d921e5 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -130,20 +130,20 @@
 	}
 
 	/* find the service */
-	read_lock_bh(&local->services_lock);
-	list_for_each_entry(rx, &local->services, listen_link) {
-		if (rx->srx.srx_service == conn->params.service_id)
-			goto found_service;
-	}
+	read_lock(&local->services_lock);
+	rx = rcu_dereference_protected(local->service,
+				       lockdep_is_held(&local->services_lock));
+	if (rx && rx->srx.srx_service == conn->params.service_id)
+		goto found_service;
 
 	/* the service appears to have died */
-	read_unlock_bh(&local->services_lock);
+	read_unlock(&local->services_lock);
 	_leave(" = -ENOENT");
 	return -ENOENT;
 
 found_service:
 	if (!rx->securities) {
-		read_unlock_bh(&local->services_lock);
+		read_unlock(&local->services_lock);
 		_leave(" = -ENOKEY");
 		return -ENOKEY;
 	}
@@ -152,13 +152,13 @@
 	kref = keyring_search(make_key_ref(rx->securities, 1UL),
 			      &key_type_rxrpc_s, kdesc);
 	if (IS_ERR(kref)) {
-		read_unlock_bh(&local->services_lock);
+		read_unlock(&local->services_lock);
 		_leave(" = %ld [search]", PTR_ERR(kref));
 		return PTR_ERR(kref);
 	}
 
 	key = key_ref_to_ptr(kref);
-	read_unlock_bh(&local->services_lock);
+	read_unlock(&local->services_lock);
 
 	conn->server_key = key;
 	conn->security = sec;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
new file mode 100644
index 0000000..3322543
--- /dev/null
+++ b/net/rxrpc/sendmsg.c
@@ -0,0 +1,606 @@
+/* AF_RXRPC sendmsg() implementation.
+ *
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/export.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+enum rxrpc_command {
+	RXRPC_CMD_SEND_DATA,		/* send data message */
+	RXRPC_CMD_SEND_ABORT,		/* request abort generation */
+	RXRPC_CMD_ACCEPT,		/* [server] accept incoming call */
+	RXRPC_CMD_REJECT_BUSY,		/* [server] reject a call as busy */
+};
+
+/*
+ * wait for space to appear in the transmit/ACK window
+ * - caller holds the socket locked
+ */
+static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
+				    struct rxrpc_call *call,
+				    long *timeo)
+{
+	DECLARE_WAITQUEUE(myself, current);
+	int ret;
+
+	_enter(",{%u,%u,%u}",
+	       call->tx_hard_ack, call->tx_top, call->tx_winsize);
+
+	add_wait_queue(&call->waitq, &myself);
+
+	for (;;) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		ret = 0;
+		if (call->tx_top - call->tx_hard_ack <
+		    min_t(unsigned int, call->tx_winsize,
+			  call->cong_cwnd + call->cong_extra))
+			break;
+		if (call->state >= RXRPC_CALL_COMPLETE) {
+			ret = -call->error;
+			break;
+		}
+		if (signal_pending(current)) {
+			ret = sock_intr_errno(*timeo);
+			break;
+		}
+
+		trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+		release_sock(&rx->sk);
+		*timeo = schedule_timeout(*timeo);
+		lock_sock(&rx->sk);
+	}
+
+	remove_wait_queue(&call->waitq, &myself);
+	set_current_state(TASK_RUNNING);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/*
+ * Schedule an instant Tx resend.
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
+{
+	spin_lock_bh(&call->lock);
+
+	if (call->state < RXRPC_CALL_COMPLETE) {
+		call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
+		if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+			rxrpc_queue_call(call);
+	}
+
+	spin_unlock_bh(&call->lock);
+}
+
+/*
+ * Queue a DATA packet for transmission, set the resend timeout and send the
+ * packet immediately
+ */
+static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
+			       bool last)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+	rxrpc_seq_t seq = sp->hdr.seq;
+	int ret, ix;
+	u8 annotation = RXRPC_TX_ANNO_UNACK;
+
+	_net("queue skb %p [%d]", skb, seq);
+
+	ASSERTCMP(seq, ==, call->tx_top + 1);
+
+	if (last)
+		annotation |= RXRPC_TX_ANNO_LAST;
+
+	/* We have to set the timestamp before queueing as the retransmit
+	 * algorithm can see the packet as soon as we queue it.
+	 */
+	skb->tstamp = ktime_get_real();
+
+	ix = seq & RXRPC_RXTX_BUFF_MASK;
+	rxrpc_get_skb(skb, rxrpc_skb_tx_got);
+	call->rxtx_annotations[ix] = annotation;
+	smp_wmb();
+	call->rxtx_buffer[ix] = skb;
+	call->tx_top = seq;
+	if (last)
+		trace_rxrpc_transmit(call, rxrpc_transmit_queue_last);
+	else
+		trace_rxrpc_transmit(call, rxrpc_transmit_queue);
+
+	if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
+		_debug("________awaiting reply/ACK__________");
+		write_lock_bh(&call->state_lock);
+		switch (call->state) {
+		case RXRPC_CALL_CLIENT_SEND_REQUEST:
+			call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+			break;
+		case RXRPC_CALL_SERVER_ACK_REQUEST:
+			call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+			if (!last)
+				break;
+		case RXRPC_CALL_SERVER_SEND_REPLY:
+			call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+			break;
+		default:
+			break;
+		}
+		write_unlock_bh(&call->state_lock);
+	}
+
+	if (seq == 1 && rxrpc_is_client_call(call))
+		rxrpc_expose_client_call(call);
+
+	ret = rxrpc_send_data_packet(call, skb, false);
+	if (ret < 0) {
+		_debug("need instant resend %d", ret);
+		rxrpc_instant_resend(call, ix);
+	} else {
+		ktime_t now = ktime_get_real(), resend_at;
+
+		resend_at = ktime_add_ms(now, rxrpc_resend_timeout);
+
+		if (ktime_before(resend_at, call->resend_at)) {
+			call->resend_at = resend_at;
+			rxrpc_set_timer(call, rxrpc_timer_set_for_send, now);
+		}
+	}
+
+	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+	_leave("");
+}
+
+/*
+ * send data through a socket
+ * - must be called in process context
+ * - caller holds the socket locked
+ */
+static int rxrpc_send_data(struct rxrpc_sock *rx,
+			   struct rxrpc_call *call,
+			   struct msghdr *msg, size_t len)
+{
+	struct rxrpc_skb_priv *sp;
+	struct sk_buff *skb;
+	struct sock *sk = &rx->sk;
+	long timeo;
+	bool more;
+	int ret, copied;
+
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+	/* this should be in poll */
+	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+		return -EPIPE;
+
+	more = msg->msg_flags & MSG_MORE;
+
+	skb = call->tx_pending;
+	call->tx_pending = NULL;
+	rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
+
+	copied = 0;
+	do {
+		/* Check to see if there's a ping ACK to reply to. */
+		if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
+			rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+
+		if (!skb) {
+			size_t size, chunk, max, space;
+
+			_debug("alloc");
+
+			if (call->tx_top - call->tx_hard_ack >=
+			    min_t(unsigned int, call->tx_winsize,
+				  call->cong_cwnd + call->cong_extra)) {
+				ret = -EAGAIN;
+				if (msg->msg_flags & MSG_DONTWAIT)
+					goto maybe_error;
+				ret = rxrpc_wait_for_tx_window(rx, call,
+							       &timeo);
+				if (ret < 0)
+					goto maybe_error;
+			}
+
+			max = RXRPC_JUMBO_DATALEN;
+			max -= call->conn->security_size;
+			max &= ~(call->conn->size_align - 1UL);
+
+			chunk = max;
+			if (chunk > msg_data_left(msg) && !more)
+				chunk = msg_data_left(msg);
+
+			space = chunk + call->conn->size_align;
+			space &= ~(call->conn->size_align - 1UL);
+
+			size = space + call->conn->security_size;
+
+			_debug("SIZE: %zu/%zu/%zu", chunk, space, size);
+
+			/* create a buffer that we can retain until it's ACK'd */
+			skb = sock_alloc_send_skb(
+				sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
+			if (!skb)
+				goto maybe_error;
+
+			rxrpc_new_skb(skb, rxrpc_skb_tx_new);
+
+			_debug("ALLOC SEND %p", skb);
+
+			ASSERTCMP(skb->mark, ==, 0);
+
+			_debug("HS: %u", call->conn->security_size);
+			skb_reserve(skb, call->conn->security_size);
+			skb->len += call->conn->security_size;
+
+			sp = rxrpc_skb(skb);
+			sp->remain = chunk;
+			if (sp->remain > skb_tailroom(skb))
+				sp->remain = skb_tailroom(skb);
+
+			_net("skb: hr %d, tr %d, hl %d, rm %d",
+			       skb_headroom(skb),
+			       skb_tailroom(skb),
+			       skb_headlen(skb),
+			       sp->remain);
+
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		}
+
+		_debug("append");
+		sp = rxrpc_skb(skb);
+
+		/* append next segment of data to the current buffer */
+		if (msg_data_left(msg) > 0) {
+			int copy = skb_tailroom(skb);
+			ASSERTCMP(copy, >, 0);
+			if (copy > msg_data_left(msg))
+				copy = msg_data_left(msg);
+			if (copy > sp->remain)
+				copy = sp->remain;
+
+			_debug("add");
+			ret = skb_add_data(skb, &msg->msg_iter, copy);
+			_debug("added");
+			if (ret < 0)
+				goto efault;
+			sp->remain -= copy;
+			skb->mark += copy;
+			copied += copy;
+		}
+
+		/* check for the far side aborting the call or a network error
+		 * occurring */
+		if (call->state == RXRPC_CALL_COMPLETE)
+			goto call_terminated;
+
+		/* add the packet to the send queue if it's now full */
+		if (sp->remain <= 0 ||
+		    (msg_data_left(msg) == 0 && !more)) {
+			struct rxrpc_connection *conn = call->conn;
+			uint32_t seq;
+			size_t pad;
+
+			/* pad out if we're using security */
+			if (conn->security_ix) {
+				pad = conn->security_size + skb->mark;
+				pad = conn->size_align - pad;
+				pad &= conn->size_align - 1;
+				_debug("pad %zu", pad);
+				if (pad)
+					memset(skb_put(skb, pad), 0, pad);
+			}
+
+			seq = call->tx_top + 1;
+
+			sp->hdr.seq	= seq;
+			sp->hdr._rsvd	= 0;
+			sp->hdr.flags	= conn->out_clientflag;
+
+			if (msg_data_left(msg) == 0 && !more)
+				sp->hdr.flags |= RXRPC_LAST_PACKET;
+			else if (call->tx_top - call->tx_hard_ack <
+				 call->tx_winsize)
+				sp->hdr.flags |= RXRPC_MORE_PACKETS;
+
+			ret = conn->security->secure_packet(
+				call, skb, skb->mark, skb->head);
+			if (ret < 0)
+				goto out;
+
+			rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
+			skb = NULL;
+		}
+	} while (msg_data_left(msg) > 0);
+
+success:
+	ret = copied;
+out:
+	call->tx_pending = skb;
+	_leave(" = %d", ret);
+	return ret;
+
+call_terminated:
+	rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+	_leave(" = %d", -call->error);
+	return -call->error;
+
+maybe_error:
+	if (copied)
+		goto success;
+	goto out;
+
+efault:
+	ret = -EFAULT;
+	goto out;
+}
+
+/*
+ * extract control messages from the sendmsg() control buffer
+ */
+static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
+			      unsigned long *user_call_ID,
+			      enum rxrpc_command *command,
+			      u32 *abort_code,
+			      bool *_exclusive)
+{
+	struct cmsghdr *cmsg;
+	bool got_user_ID = false;
+	int len;
+
+	*command = RXRPC_CMD_SEND_DATA;
+
+	if (msg->msg_controllen == 0)
+		return -EINVAL;
+
+	for_each_cmsghdr(cmsg, msg) {
+		if (!CMSG_OK(msg, cmsg))
+			return -EINVAL;
+
+		len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+		_debug("CMSG %d, %d, %d",
+		       cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+		if (cmsg->cmsg_level != SOL_RXRPC)
+			continue;
+
+		switch (cmsg->cmsg_type) {
+		case RXRPC_USER_CALL_ID:
+			if (msg->msg_flags & MSG_CMSG_COMPAT) {
+				if (len != sizeof(u32))
+					return -EINVAL;
+				*user_call_ID = *(u32 *) CMSG_DATA(cmsg);
+			} else {
+				if (len != sizeof(unsigned long))
+					return -EINVAL;
+				*user_call_ID = *(unsigned long *)
+					CMSG_DATA(cmsg);
+			}
+			_debug("User Call ID %lx", *user_call_ID);
+			got_user_ID = true;
+			break;
+
+		case RXRPC_ABORT:
+			if (*command != RXRPC_CMD_SEND_DATA)
+				return -EINVAL;
+			*command = RXRPC_CMD_SEND_ABORT;
+			if (len != sizeof(*abort_code))
+				return -EINVAL;
+			*abort_code = *(unsigned int *) CMSG_DATA(cmsg);
+			_debug("Abort %x", *abort_code);
+			if (*abort_code == 0)
+				return -EINVAL;
+			break;
+
+		case RXRPC_ACCEPT:
+			if (*command != RXRPC_CMD_SEND_DATA)
+				return -EINVAL;
+			*command = RXRPC_CMD_ACCEPT;
+			if (len != 0)
+				return -EINVAL;
+			break;
+
+		case RXRPC_EXCLUSIVE_CALL:
+			*_exclusive = true;
+			if (len != 0)
+				return -EINVAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (!got_user_ID)
+		return -EINVAL;
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * Create a new client call for sendmsg().
+ */
+static struct rxrpc_call *
+rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+				  unsigned long user_call_ID, bool exclusive)
+{
+	struct rxrpc_conn_parameters cp;
+	struct rxrpc_call *call;
+	struct key *key;
+
+	DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
+
+	_enter("");
+
+	if (!msg->msg_name)
+		return ERR_PTR(-EDESTADDRREQ);
+
+	key = rx->key;
+	if (key && !rx->key->payload.data[0])
+		key = NULL;
+
+	memset(&cp, 0, sizeof(cp));
+	cp.local		= rx->local;
+	cp.key			= rx->key;
+	cp.security_level	= rx->min_sec_level;
+	cp.exclusive		= rx->exclusive | exclusive;
+	cp.service_id		= srx->srx_service;
+	call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+
+	_leave(" = %p\n", call);
+	return call;
+}
+
+/*
+ * send a message forming part of a client call through an RxRPC socket
+ * - caller holds the socket locked
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+{
+	enum rxrpc_command cmd;
+	struct rxrpc_call *call;
+	unsigned long user_call_ID = 0;
+	bool exclusive = false;
+	u32 abort_code = 0;
+	int ret;
+
+	_enter("");
+
+	ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
+				 &exclusive);
+	if (ret < 0)
+		return ret;
+
+	if (cmd == RXRPC_CMD_ACCEPT) {
+		if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
+			return -EINVAL;
+		call = rxrpc_accept_call(rx, user_call_ID, NULL);
+		if (IS_ERR(call))
+			return PTR_ERR(call);
+		rxrpc_put_call(call, rxrpc_call_put);
+		return 0;
+	}
+
+	call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
+	if (!call) {
+		if (cmd != RXRPC_CMD_SEND_DATA)
+			return -EBADSLT;
+		call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
+							 exclusive);
+		if (IS_ERR(call))
+			return PTR_ERR(call);
+	}
+
+	_debug("CALL %d USR %lx ST %d on CONN %p",
+	       call->debug_id, call->user_call_ID, call->state, call->conn);
+
+	if (call->state >= RXRPC_CALL_COMPLETE) {
+		/* it's too late for this call */
+		ret = -ESHUTDOWN;
+	} else if (cmd == RXRPC_CMD_SEND_ABORT) {
+		ret = 0;
+		if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
+			ret = rxrpc_send_call_packet(call,
+						     RXRPC_PACKET_TYPE_ABORT);
+	} else if (cmd != RXRPC_CMD_SEND_DATA) {
+		ret = -EINVAL;
+	} else if (rxrpc_is_client_call(call) &&
+		   call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+		/* request phase complete for this client call */
+		ret = -EPROTO;
+	} else if (rxrpc_is_service_call(call) &&
+		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+		/* Reply phase not begun or not complete for service call. */
+		ret = -EPROTO;
+	} else {
+		ret = rxrpc_send_data(rx, call, msg, len);
+	}
+
+	rxrpc_put_call(call, rxrpc_call_put);
+	_leave(" = %d", ret);
+	return ret;
+}
+
+/**
+ * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
+ * @sock: The socket the call is on
+ * @call: The call to send data through
+ * @msg: The data to send
+ * @len: The amount of data to send
+ *
+ * Allow a kernel service to send data on a call.  The call must be in an state
+ * appropriate to sending data.  No control data should be supplied in @msg,
+ * nor should an address be supplied.  MSG_MORE should be flagged if there's
+ * more data to come, otherwise this data will end the transmission phase.
+ */
+int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+			   struct msghdr *msg, size_t len)
+{
+	int ret;
+
+	_enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+
+	ASSERTCMP(msg->msg_name, ==, NULL);
+	ASSERTCMP(msg->msg_control, ==, NULL);
+
+	lock_sock(sock->sk);
+
+	_debug("CALL %d USR %lx ST %d on CONN %p",
+	       call->debug_id, call->user_call_ID, call->state, call->conn);
+
+	if (call->state >= RXRPC_CALL_COMPLETE) {
+		ret = -ESHUTDOWN; /* it's too late for this call */
+	} else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+		   call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+		   call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+		ret = -EPROTO; /* request phase complete for this client call */
+	} else {
+		ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+	}
+
+	release_sock(sock->sk);
+	_leave(" = %d", ret);
+	return ret;
+}
+EXPORT_SYMBOL(rxrpc_kernel_send_data);
+
+/**
+ * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
+ * @sock: The socket the call is on
+ * @call: The call to be aborted
+ * @abort_code: The abort code to stick into the ABORT packet
+ * @error: Local error value
+ * @why: 3-char string indicating why.
+ *
+ * Allow a kernel service to abort a call, if it's still in an abortable state.
+ */
+void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
+			     u32 abort_code, int error, const char *why)
+{
+	_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+
+	lock_sock(sock->sk);
+
+	if (rxrpc_abort_call(why, call, 0, abort_code, error))
+		rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+
+	release_sock(sock->sk);
+	_leave("");
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_abort_call);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index eee0cfd9..67b02c4 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -18,121 +18,82 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
+#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs)
+
 /*
- * set up for the ACK at the end of the receive phase when we discard the final
- * receive phase data packet
- * - called with softirqs disabled
+ * Note the allocation or reception of a socket buffer.
  */
-static void rxrpc_request_final_ACK(struct rxrpc_call *call)
+void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
-	/* the call may be aborted before we have a chance to ACK it */
-	write_lock(&call->state_lock);
-
-	switch (call->state) {
-	case RXRPC_CALL_CLIENT_RECV_REPLY:
-		call->state = RXRPC_CALL_CLIENT_FINAL_ACK;
-		_debug("request final ACK");
-
-		/* get an extra ref on the call for the final-ACK generator to
-		 * release */
-		rxrpc_get_call(call);
-		set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
-		if (try_to_del_timer_sync(&call->ack_timer) >= 0)
-			rxrpc_queue_call(call);
-		break;
-
-	case RXRPC_CALL_SERVER_RECV_REQUEST:
-		call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
-	default:
-		break;
-	}
-
-	write_unlock(&call->state_lock);
+	const void *here = __builtin_return_address(0);
+	int n = atomic_inc_return(select_skb_count(op));
+	trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
 }
 
 /*
- * drop the bottom ACK off of the call ACK window and advance the window
+ * Note the re-emergence of a socket buffer from a queue or buffer.
  */
-static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
-				struct rxrpc_skb_priv *sp)
+void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
-	int loop;
-	u32 seq;
-
-	spin_lock_bh(&call->lock);
-
-	_debug("hard ACK #%u", sp->hdr.seq);
-
-	for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
-		call->ackr_window[loop] >>= 1;
-		call->ackr_window[loop] |=
-			call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
+	const void *here = __builtin_return_address(0);
+	if (skb) {
+		int n = atomic_read(select_skb_count(op));
+		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
 	}
-
-	seq = sp->hdr.seq;
-	ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
-	call->rx_data_eaten = seq;
-
-	if (call->ackr_win_top < UINT_MAX)
-		call->ackr_win_top++;
-
-	ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
-		    call->rx_data_post, >=, call->rx_data_recv);
-	ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
-		    call->rx_data_recv, >=, call->rx_data_eaten);
-
-	if (sp->hdr.flags & RXRPC_LAST_PACKET) {
-		rxrpc_request_final_ACK(call);
-	} else if (atomic_dec_and_test(&call->ackr_not_idle) &&
-		   test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
-		/* We previously soft-ACK'd some received packets that have now
-		 * been consumed, so send a hard-ACK if no more packets are
-		 * immediately forthcoming to allow the transmitter to free up
-		 * its Tx bufferage.
-		 */
-		_debug("send Rx idle ACK");
-		__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
-				    false);
-	}
-
-	spin_unlock_bh(&call->lock);
 }
 
 /*
- * destroy a packet that has an RxRPC control buffer
- * - advance the hard-ACK state of the parent call (done here in case something
- *   in the kernel bypasses recvmsg() and steals the packet directly off of the
- *   socket receive queue)
+ * Note the addition of a ref on a socket buffer.
  */
-void rxrpc_packet_destructor(struct sk_buff *skb)
+void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	struct rxrpc_call *call = sp->call;
+	const void *here = __builtin_return_address(0);
+	int n = atomic_inc_return(select_skb_count(op));
+	trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+	skb_get(skb);
+}
 
-	_enter("%p{%p}", skb, call);
-
-	if (call) {
-		/* send the final ACK on a client call */
-		if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA)
-			rxrpc_hard_ACK_data(call, sp);
-		rxrpc_put_call(call);
-		sp->call = NULL;
+/*
+ * Note the destruction of a socket buffer.
+ */
+void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+{
+	const void *here = __builtin_return_address(0);
+	if (skb) {
+		int n;
+		CHECK_SLAB_OKAY(&skb->users);
+		n = atomic_dec_return(select_skb_count(op));
+		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+		kfree_skb(skb);
 	}
-
-	if (skb->sk)
-		sock_rfree(skb);
-	_leave("");
 }
 
-/**
- * rxrpc_kernel_free_skb - Free an RxRPC socket buffer
- * @skb: The socket buffer to be freed
- *
- * Let RxRPC free its own socket buffer, permitting it to maintain debug
- * accounting.
+/*
+ * Note the injected loss of a socket buffer.
  */
-void rxrpc_kernel_free_skb(struct sk_buff *skb)
+void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
 {
-	rxrpc_free_skb(skb);
+	const void *here = __builtin_return_address(0);
+	if (skb) {
+		int n;
+		CHECK_SLAB_OKAY(&skb->users);
+		n = atomic_dec_return(select_skb_count(op));
+		trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+		kfree_skb(skb);
+	}
 }
-EXPORT_SYMBOL(rxrpc_kernel_free_skb);
+
+/*
+ * Clear a queue of socket buffers.
+ */
+void rxrpc_purge_queue(struct sk_buff_head *list)
+{
+	const void *here = __builtin_return_address(0);
+	struct sk_buff *skb;
+	while ((skb = skb_dequeue((list))) != NULL) {
+		int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged));
+		trace_rxrpc_skb(skb, rxrpc_skb_rx_purged,
+				atomic_read(&skb->users), n, here);
+		kfree_skb(skb);
+	}
+}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 03ad087..34c706d 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -20,7 +20,7 @@
 static const unsigned int four = 4;
 static const unsigned int thirtytwo = 32;
 static const unsigned int n_65535 = 65535;
-static const unsigned int n_max_acks = RXRPC_MAXACKS;
+static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
 
 /*
  * RxRPC operating parameters.
@@ -35,7 +35,7 @@
 		.data		= &rxrpc_requested_ack_delay,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_ms_jiffies,
+		.proc_handler	= proc_dointvec,
 		.extra1		= (void *)&zero,
 	},
 	{
@@ -43,7 +43,7 @@
 		.data		= &rxrpc_soft_ack_delay,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_ms_jiffies,
+		.proc_handler	= proc_dointvec,
 		.extra1		= (void *)&one,
 	},
 	{
@@ -51,7 +51,7 @@
 		.data		= &rxrpc_idle_ack_delay,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_ms_jiffies,
+		.proc_handler	= proc_dointvec,
 		.extra1		= (void *)&one,
 	},
 	{
@@ -59,6 +59,22 @@
 		.data		= &rxrpc_resend_timeout,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= (void *)&one,
+	},
+	{
+		.procname	= "idle_conn_expiry",
+		.data		= &rxrpc_conn_idle_client_expiry,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+		.extra1		= (void *)&one,
+	},
+	{
+		.procname	= "idle_conn_fast_expiry",
+		.data		= &rxrpc_conn_idle_client_fast_expiry,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
 		.proc_handler	= proc_dointvec_ms_jiffies,
 		.extra1		= (void *)&one,
 	},
@@ -69,30 +85,29 @@
 		.data		= &rxrpc_max_call_lifetime,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-		.extra1		= (void *)&one,
-	},
-	{
-		.procname	= "dead_call_expiry",
-		.data		= &rxrpc_dead_call_expiry,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-		.extra1		= (void *)&one,
-	},
-
-	/* Values measured in seconds */
-	{
-		.procname	= "connection_expiry",
-		.data		= &rxrpc_connection_expiry,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
+		.proc_handler	= proc_dointvec,
 		.extra1		= (void *)&one,
 	},
 
 	/* Non-time values */
 	{
+		.procname	= "max_client_conns",
+		.data		= &rxrpc_max_client_connections,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&rxrpc_reap_client_connections,
+	},
+	{
+		.procname	= "reap_client_conns",
+		.data		= &rxrpc_reap_client_connections,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&one,
+		.extra2		= (void *)&rxrpc_max_client_connections,
+	},
+	{
 		.procname	= "max_backlog",
 		.data		= &rxrpc_max_backlog,
 		.maxlen		= sizeof(unsigned int),
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index b88914d..ff7af71 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -30,6 +30,7 @@
 		srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
 		return 0;
 
+#ifdef CONFIG_AF_RXRPC_IPV6
 	case ETH_P_IPV6:
 		srx->transport_type = SOCK_DGRAM;
 		srx->transport_len = sizeof(srx->transport.sin6);
@@ -37,6 +38,7 @@
 		srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
 		srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
 		return 0;
+#endif
 
 	default:
 		pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index ccf931b..87956a7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -749,6 +749,17 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called act_connmark.
 
+config NET_ACT_SKBMOD
+        tristate "skb data modification action"
+        depends on NET_CLS_ACT
+        ---help---
+         Say Y here to allow modification of skb data
+
+         If unsure, say N.
+
+         To compile this code as a module, choose M here: the
+         module will be called act_skbmod.
+
 config NET_ACT_IFE
         tristate "Inter-FE action based on IETF ForCES InterFE LFB"
         depends on NET_CLS_ACT
@@ -761,6 +772,17 @@
 	  To compile this code as a module, choose M here: the
 	  module will be called act_ife.
 
+config NET_ACT_TUNNEL_KEY
+        tristate "IP tunnel metadata manipulation"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to set/release ip tunnel metadata.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_tunnel_key.
+
 config NET_IFE_SKBMARK
         tristate "Support to encoding decoding skb mark on IFE action"
         depends on NET_ACT_IFE
@@ -771,6 +793,11 @@
         depends on NET_ACT_IFE
         ---help---
 
+config NET_IFE_SKBTCINDEX
+        tristate "Support to encoding decoding skb tcindex on IFE action"
+        depends on NET_ACT_IFE
+        ---help---
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index ae088a5..4bdda36 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -19,9 +19,12 @@
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
+obj-$(CONFIG_NET_ACT_SKBMOD)	+= act_skbmod.o
 obj-$(CONFIG_NET_ACT_IFE)	+= act_ife.o
 obj-$(CONFIG_NET_IFE_SKBMARK)	+= act_meta_mark.o
 obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
+obj-$(CONFIG_NET_IFE_SKBTCINDEX)	+= act_meta_skbtcindex.o
+obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index e4a5f26..c910217 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -64,7 +64,6 @@
 		if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) {
 			if (p->ops->cleanup)
 				p->ops->cleanup(p, bind);
-			list_del(&p->list);
 			tcf_hash_destroy(p->hinfo, p);
 			ret = ACT_P_DELETED;
 		}
@@ -421,18 +420,19 @@
 	return res;
 }
 
-int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
-		    struct tcf_result *res)
+int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
+		    int nr_actions, struct tcf_result *res)
 {
-	const struct tc_action *a;
-	int ret = -1;
+	int ret = -1, i;
 
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
 		ret = TC_ACT_OK;
 		goto exec_done;
 	}
-	list_for_each_entry(a, actions, list) {
+	for (i = 0; i < nr_actions; i++) {
+		const struct tc_action *a = actions[i];
+
 repeat:
 		ret = a->ops->act(skb, a, res);
 		if (ret == TC_ACT_REPEAT)
@@ -592,9 +592,19 @@
 	return ERR_PTR(err);
 }
 
-int tcf_action_init(struct net *net, struct nlattr *nla,
-				  struct nlattr *est, char *name, int ovr,
-				  int bind, struct list_head *actions)
+static void cleanup_a(struct list_head *actions, int ovr)
+{
+	struct tc_action *a;
+
+	if (!ovr)
+		return;
+
+	list_for_each_entry(a, actions, list)
+		a->tcfa_refcnt--;
+}
+
+int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est,
+		    char *name, int ovr, int bind, struct list_head *actions)
 {
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
 	struct tc_action *act;
@@ -612,8 +622,15 @@
 			goto err;
 		}
 		act->order = i;
+		if (ovr)
+			act->tcfa_refcnt++;
 		list_add_tail(&act->list, actions);
 	}
+
+	/* Remove the temp refcnt which was necessary to protect against
+	 * destroying an existing action which was being replaced
+	 */
+	cleanup_a(actions, ovr);
 	return 0;
 
 err:
@@ -754,16 +771,6 @@
 	return ERR_PTR(err);
 }
 
-static void cleanup_a(struct list_head *actions)
-{
-	struct tc_action *a, *tmp;
-
-	list_for_each_entry_safe(a, tmp, actions, list) {
-		list_del(&a->list);
-		kfree(a);
-	}
-}
-
 static int tca_action_flush(struct net *net, struct nlattr *nla,
 			    struct nlmsghdr *n, u32 portid)
 {
@@ -893,6 +900,8 @@
 			goto err;
 		}
 		act->order = i;
+		if (event == RTM_GETACTION)
+			act->tcfa_refcnt++;
 		list_add_tail(&act->list, &actions);
 	}
 
@@ -905,7 +914,7 @@
 		return ret;
 	}
 err:
-	cleanup_a(&actions);
+	tcf_action_destroy(&actions, 0);
 	return ret;
 }
 
@@ -933,24 +942,17 @@
 	return err;
 }
 
-static int
-tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
-	       u32 portid, int ovr)
+static int tcf_action_add(struct net *net, struct nlattr *nla,
+			  struct nlmsghdr *n, u32 portid, int ovr)
 {
 	int ret = 0;
 	LIST_HEAD(actions);
 
 	ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
 	if (ret)
-		goto done;
+		return ret;
 
-	/* dump then free all the actions after update; inserted policy
-	 * stays intact
-	 */
-	ret = tcf_add_notify(net, n, &actions, portid);
-	cleanup_a(&actions);
-done:
-	return ret;
+	return tcf_add_notify(net, n, &actions, portid);
 }
 
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
@@ -1004,8 +1006,7 @@
 	return ret;
 }
 
-static struct nlattr *
-find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -1032,8 +1033,7 @@
 	return kind;
 }
 
-static int
-tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
+static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
 	struct nlmsghdr *nlh;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index bfa8707..1d39600 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -39,13 +39,10 @@
 static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 		   struct tcf_result *res)
 {
+	bool at_ingress = skb_at_tc_ingress(skb);
 	struct tcf_bpf *prog = to_bpf(act);
 	struct bpf_prog *filter;
 	int action, filter_res;
-	bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
-
-	if (unlikely(!skb_mac_header_was_set(skb)))
-		return TC_ACT_UNSPEC;
 
 	tcf_lastuse_update(&prog->tcf_tm);
 	bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index b5dbf63..e0defce 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -116,8 +116,8 @@
 		return (void *)(skb_network_header(skb) + ihl);
 }
 
-static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
-			      unsigned int ihl, unsigned int ipl)
+static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl,
+			      unsigned int ipl)
 {
 	struct icmphdr *icmph;
 
@@ -152,8 +152,8 @@
 	return 1;
 }
 
-static int tcf_csum_ipv6_icmp(struct sk_buff *skb,
-			      unsigned int ihl, unsigned int ipl)
+static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl,
+			      unsigned int ipl)
 {
 	struct icmp6hdr *icmp6h;
 	const struct ipv6hdr *ip6h;
@@ -174,8 +174,8 @@
 	return 1;
 }
 
-static int tcf_csum_ipv4_tcp(struct sk_buff *skb,
-			     unsigned int ihl, unsigned int ipl)
+static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl,
+			     unsigned int ipl)
 {
 	struct tcphdr *tcph;
 	const struct iphdr *iph;
@@ -195,8 +195,8 @@
 	return 1;
 }
 
-static int tcf_csum_ipv6_tcp(struct sk_buff *skb,
-			     unsigned int ihl, unsigned int ipl)
+static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl,
+			     unsigned int ipl)
 {
 	struct tcphdr *tcph;
 	const struct ipv6hdr *ip6h;
@@ -217,8 +217,8 @@
 	return 1;
 }
 
-static int tcf_csum_ipv4_udp(struct sk_buff *skb,
-			     unsigned int ihl, unsigned int ipl, int udplite)
+static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl,
+			     unsigned int ipl, int udplite)
 {
 	struct udphdr *udph;
 	const struct iphdr *iph;
@@ -270,8 +270,8 @@
 	return 1;
 }
 
-static int tcf_csum_ipv6_udp(struct sk_buff *skb,
-			     unsigned int ihl, unsigned int ipl, int udplite)
+static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl,
+			     unsigned int ipl, int udplite)
 {
 	struct udphdr *udph;
 	const struct ipv6hdr *ip6h;
@@ -380,8 +380,8 @@
 	return 0;
 }
 
-static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
-				 unsigned int ixhl, unsigned int *pl)
+static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl,
+				 unsigned int *pl)
 {
 	int off, len, optlen;
 	unsigned char *xh = (void *)ip6xh;
@@ -494,8 +494,8 @@
 	return 0;
 }
 
-static int tcf_csum(struct sk_buff *skb,
-		    const struct tc_action *a, struct tcf_result *res)
+static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
+		    struct tcf_result *res)
 {
 	struct tcf_csum *p = to_tcf_csum(a);
 	int action;
@@ -531,8 +531,8 @@
 	return TC_ACT_SHOT;
 }
 
-static int tcf_csum_dump(struct sk_buff *skb,
-			 struct tc_action *a, int bind, int ref)
+static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+			 int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_csum *p = to_tcf_csum(a);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e24a409..e0aa30f 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -156,7 +156,8 @@
 	int action = READ_ONCE(gact->tcf_action);
 	struct tcf_t *tm = &gact->tcf_tm;
 
-	_bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets);
+	_bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes,
+			   packets);
 	if (action == TC_ACT_SHOT)
 		this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
 
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 141a06e..95c463c 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -53,7 +53,7 @@
 	u32 *tlv = (u32 *)(skbdata);
 	u16 totlen = nla_total_size(dlen);	/*alignment + hdr */
 	char *dptr = (char *)tlv + NLA_HDRLEN;
-	u32 htlv = attrtype << 16 | totlen;
+	u32 htlv = attrtype << 16 | (dlen + NLA_HDRLEN);
 
 	*tlv = htonl(htlv);
 	memset(dptr, 0, totlen - NLA_HDRLEN);
@@ -63,6 +63,23 @@
 }
 EXPORT_SYMBOL_GPL(ife_tlv_meta_encode);
 
+int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi)
+{
+	u16 edata = 0;
+
+	if (mi->metaval)
+		edata = *(u16 *)mi->metaval;
+	else if (metaval)
+		edata = metaval;
+
+	if (!edata) /* will not encode */
+		return 0;
+
+	edata = htons(edata);
+	return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata);
+}
+EXPORT_SYMBOL_GPL(ife_encode_meta_u16);
+
 int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi)
 {
 	if (mi->metaval)
@@ -81,6 +98,15 @@
 }
 EXPORT_SYMBOL_GPL(ife_check_meta_u32);
 
+int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi)
+{
+	if (metaval || mi->metaval)
+		return 8; /* T+L+(V) == 2+2+(2+2bytepad) */
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ife_check_meta_u16);
+
 int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi)
 {
 	u32 edata = metaval;
@@ -135,7 +161,7 @@
 
 int ife_validate_meta_u32(void *val, int len)
 {
-	if (len == 4)
+	if (len == sizeof(u32))
 		return 0;
 
 	return -EINVAL;
@@ -144,8 +170,8 @@
 
 int ife_validate_meta_u16(void *val, int len)
 {
-	/* length will include padding */
-	if (len == NLA_ALIGN(2))
+	/* length will not include padding */
+	if (len == sizeof(u16))
 		return 0;
 
 	return -EINVAL;
@@ -627,7 +653,7 @@
 	struct tcf_ife_info *ife = to_ife(a);
 	int action = ife->tcf_action;
 	struct ifeheadr *ifehdr = (struct ifeheadr *)skb->data;
-	u16 ifehdrln = ifehdr->metalen;
+	int ifehdrln = (int)ifehdr->metalen;
 	struct meta_tlvhdr *tlv = (struct meta_tlvhdr *)(ifehdr->tlv_data);
 
 	spin_lock(&ife->tcf_lock);
@@ -652,12 +678,14 @@
 		u8 *tlvdata = (u8 *)tlv;
 		u16 mtype = tlv->type;
 		u16 mlen = tlv->len;
+		u16 alen;
 
 		mtype = ntohs(mtype);
 		mlen = ntohs(mlen);
+		alen = NLA_ALIGN(mlen);
 
-		if (find_decode_metaid(skb, ife, mtype, (mlen - 4),
-				       (void *)(tlvdata + 4))) {
+		if (find_decode_metaid(skb, ife, mtype, (mlen - NLA_HDRLEN),
+				       (void *)(tlvdata + NLA_HDRLEN))) {
 			/* abuse overlimits to count when we receive metadata
 			 * but dont have an ops for it
 			 */
@@ -666,8 +694,8 @@
 			ife->tcf_qstats.overlimits++;
 		}
 
-		tlvdata += mlen;
-		ifehdrln -= mlen;
+		tlvdata += alen;
+		ifehdrln -= alen;
 		tlv = (struct meta_tlvhdr *)tlvdata;
 	}
 
@@ -738,8 +766,6 @@
 		return TC_ACT_SHOT;
 	}
 
-	iethh = eth_hdr(skb);
-
 	err = skb_cow_head(skb, hdrm);
 	if (unlikely(err)) {
 		ife->tcf_qstats.drops++;
@@ -750,6 +776,7 @@
 	if (!(at & AT_EGRESS))
 		skb_push(skb, skb->dev->hard_header_len);
 
+	iethh = (struct ethhdr *)skb->data;
 	__skb_push(skb, hdrm);
 	memcpy(skb->data, iethh, skb->mac_len);
 	skb_reset_mac_header(skb);
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
new file mode 100644
index 0000000..3b35774
--- /dev/null
+++ b/net/sched/act_meta_skbtcindex.c
@@ -0,0 +1,79 @@
+/*
+ * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * copyright Jamal Hadi Salim (2016)
+ *
+*/
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <uapi/linux/tc_act/tc_ife.h>
+#include <net/tc_act/tc_ife.h>
+#include <linux/rtnetlink.h>
+
+static int skbtcindex_encode(struct sk_buff *skb, void *skbdata,
+			     struct tcf_meta_info *e)
+{
+	u32 ifetc_index = skb->tc_index;
+
+	return ife_encode_meta_u16(ifetc_index, skbdata, e);
+}
+
+static int skbtcindex_decode(struct sk_buff *skb, void *data, u16 len)
+{
+	u16 ifetc_index = *(u16 *)data;
+
+	skb->tc_index = ntohs(ifetc_index);
+	return 0;
+}
+
+static int skbtcindex_check(struct sk_buff *skb, struct tcf_meta_info *e)
+{
+	return ife_check_meta_u16(skb->tc_index, e);
+}
+
+static struct tcf_meta_ops ife_skbtcindex_ops = {
+	.metaid = IFE_META_TCINDEX,
+	.metatype = NLA_U16,
+	.name = "tc_index",
+	.synopsis = "skb tc_index 16 bit metadata",
+	.check_presence = skbtcindex_check,
+	.encode = skbtcindex_encode,
+	.decode = skbtcindex_decode,
+	.get = ife_get_meta_u16,
+	.alloc = ife_alloc_meta_u16,
+	.release = ife_release_meta_gen,
+	.validate = ife_validate_meta_u16,
+	.owner = THIS_MODULE,
+};
+
+static int __init ifetc_index_init_module(void)
+{
+	return register_ife_op(&ife_skbtcindex_ops);
+}
+
+static void __exit ifetc_index_cleanup_module(void)
+{
+	unregister_ife_op(&ife_skbtcindex_ops);
+}
+
+module_init(ifetc_index_init_module);
+module_exit(ifetc_index_cleanup_module);
+
+MODULE_AUTHOR("Jamal Hadi Salim(2016)");
+MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6038c85..667dc38 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -204,7 +204,15 @@
 	return retval;
 }
 
-static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+			     u64 lastuse)
+{
+	tcf_lastuse_update(&a->tcfa_tm);
+	_bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+}
+
+static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
+			   int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_mirred *m = to_mirred(a);
@@ -280,6 +288,7 @@
 	.type		=	TCA_ACT_MIRRED,
 	.owner		=	THIS_MODULE,
 	.act		=	tcf_mirred,
+	.stats_update	=	tcf_stats_update,
 	.dump		=	tcf_mirred_dump,
 	.cleanup	=	tcf_mirred_release,
 	.init		=	tcf_mirred_init,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index b3c7e97..d1bd248 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -63,49 +63,8 @@
 				 const struct tc_action_ops *ops)
 {
 	struct tc_action_net *tn = net_generic(net, police_net_id);
-	struct tcf_hashinfo *hinfo = tn->hinfo;
-	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
-	struct nlattr *nest;
 
-	spin_lock_bh(&hinfo->lock);
-
-	s_i = cb->args[0];
-
-	for (i = 0; i < (POL_TAB_MASK + 1); i++) {
-		struct hlist_head *head;
-		struct tc_action *p;
-
-		head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)];
-
-		hlist_for_each_entry_rcu(p, head, tcfa_head) {
-			index++;
-			if (index < s_i)
-				continue;
-			nest = nla_nest_start(skb, index);
-			if (nest == NULL)
-				goto nla_put_failure;
-			if (type == RTM_DELACTION)
-				err = tcf_action_dump_1(skb, p, 0, 1);
-			else
-				err = tcf_action_dump_1(skb, p, 0, 0);
-			if (err < 0) {
-				index--;
-				nla_nest_cancel(skb, nest);
-				goto done;
-			}
-			nla_nest_end(skb, nest);
-			n_i++;
-		}
-	}
-done:
-	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
-		cb->args[0] += n_i;
-	return n_i;
-
-nla_put_failure:
-	nla_nest_cancel(skb, nest);
-	goto done;
+	return tcf_generic_walker(tn, skb, cb, type, ops);
 }
 
 static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
@@ -125,6 +84,7 @@
 	struct tcf_police *police;
 	struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
 	struct tc_action_net *tn = net_generic(net, police_net_id);
+	bool exists = false;
 	int size;
 
 	if (nla == NULL)
@@ -139,24 +99,24 @@
 	size = nla_len(tb[TCA_POLICE_TBF]);
 	if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat))
 		return -EINVAL;
-	parm = nla_data(tb[TCA_POLICE_TBF]);
 
-	if (parm->index) {
-		if (tcf_hash_check(tn, parm->index, a, bind)) {
-			if (ovr)
-				goto override;
-			/* not replacing */
-			return -EEXIST;
-		}
-	} else {
+	parm = nla_data(tb[TCA_POLICE_TBF]);
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	if (!exists) {
 		ret = tcf_hash_create(tn, parm->index, NULL, a,
 				      &act_police_ops, bind, false);
 		if (ret)
 			return ret;
 		ret = ACT_P_CREATED;
+	} else {
+		tcf_hash_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
 	}
 
-override:
 	police = to_police(*a);
 	if (parm->rate.rate) {
 		err = -ENOMEM;
@@ -289,6 +249,8 @@
 			police->tcfp_t_c = now;
 			police->tcfp_toks = toks;
 			police->tcfp_ptoks = ptoks;
+			if (police->tcfp_result == TC_ACT_SHOT)
+				police->tcf_qstats.drops++;
 			spin_unlock(&police->tcf_lock);
 			return police->tcfp_result;
 		}
@@ -301,8 +263,8 @@
 	return police->tcf_action;
 }
 
-static int
-tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a,
+			       int bind, int ref)
 {
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tcf_police *police = to_police(a);
@@ -387,14 +349,12 @@
 	.size = sizeof(struct tc_action_net),
 };
 
-static int __init
-police_init_module(void)
+static int __init police_init_module(void)
 {
 	return tcf_register_action(&act_police_ops, &police_net_ops);
 }
 
-static void __exit
-police_cleanup_module(void)
+static void __exit police_cleanup_module(void)
 {
 	tcf_unregister_action(&act_police_ops, &police_net_ops);
 }
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
new file mode 100644
index 0000000..e7d9638
--- /dev/null
+++ b/net/sched/act_skbmod.c
@@ -0,0 +1,301 @@
+/*
+ * net/sched/act_skbmod.c  skb data modifier
+ *
+ * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbmod.h>
+#include <net/tc_act/tc_skbmod.h>
+
+#define SKBMOD_TAB_MASK     15
+
+static int skbmod_net_id;
+static struct tc_action_ops act_skbmod_ops;
+
+#define MAX_EDIT_LEN ETH_HLEN
+static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	int action;
+	struct tcf_skbmod_params *p;
+	u64 flags;
+	int err;
+
+	tcf_lastuse_update(&d->tcf_tm);
+	bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+
+	/* XXX: if you are going to edit more fields beyond ethernet header
+	 * (example when you add IP header replacement or vlan swap)
+	 * then MAX_EDIT_LEN needs to change appropriately
+	*/
+	err = skb_ensure_writable(skb, MAX_EDIT_LEN);
+	if (unlikely(err)) { /* best policy is to drop on the floor */
+		qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+		return TC_ACT_SHOT;
+	}
+
+	rcu_read_lock();
+	action = READ_ONCE(d->tcf_action);
+	if (unlikely(action == TC_ACT_SHOT)) {
+		qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats));
+		rcu_read_unlock();
+		return action;
+	}
+
+	p = rcu_dereference(d->skbmod_p);
+	flags = p->flags;
+	if (flags & SKBMOD_F_DMAC)
+		ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst);
+	if (flags & SKBMOD_F_SMAC)
+		ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src);
+	if (flags & SKBMOD_F_ETYPE)
+		eth_hdr(skb)->h_proto = p->eth_type;
+	rcu_read_unlock();
+
+	if (flags & SKBMOD_F_SWAPMAC) {
+		u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */
+		/*XXX: I am sure we can come up with more efficient swapping*/
+		ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest);
+		ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source);
+		ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr);
+	}
+
+	return action;
+}
+
+static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
+	[TCA_SKBMOD_PARMS]		= { .len = sizeof(struct tc_skbmod) },
+	[TCA_SKBMOD_DMAC]		= { .len = ETH_ALEN },
+	[TCA_SKBMOD_SMAC]		= { .len = ETH_ALEN },
+	[TCA_SKBMOD_ETYPE]		= { .type = NLA_U16 },
+};
+
+static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+	struct nlattr *tb[TCA_SKBMOD_MAX + 1];
+	struct tcf_skbmod_params *p, *p_old;
+	struct tc_skbmod *parm;
+	struct tcf_skbmod *d;
+	bool exists = false;
+	u8 *daddr = NULL;
+	u8 *saddr = NULL;
+	u16 eth_type = 0;
+	u32 lflags = 0;
+	int ret = 0, err;
+
+	if (!nla)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_SKBMOD_PARMS])
+		return -EINVAL;
+
+	if (tb[TCA_SKBMOD_DMAC]) {
+		daddr = nla_data(tb[TCA_SKBMOD_DMAC]);
+		lflags |= SKBMOD_F_DMAC;
+	}
+
+	if (tb[TCA_SKBMOD_SMAC]) {
+		saddr = nla_data(tb[TCA_SKBMOD_SMAC]);
+		lflags |= SKBMOD_F_SMAC;
+	}
+
+	if (tb[TCA_SKBMOD_ETYPE]) {
+		eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]);
+		lflags |= SKBMOD_F_ETYPE;
+	}
+
+	parm = nla_data(tb[TCA_SKBMOD_PARMS]);
+	if (parm->flags & SKBMOD_F_SWAPMAC)
+		lflags = SKBMOD_F_SWAPMAC;
+
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	if (!lflags)
+		return -EINVAL;
+
+	if (!exists) {
+		ret = tcf_hash_create(tn, parm->index, est, a,
+				      &act_skbmod_ops, bind, true);
+		if (ret)
+			return ret;
+
+		ret = ACT_P_CREATED;
+	} else {
+		tcf_hash_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
+	}
+
+	d = to_skbmod(*a);
+
+	ASSERT_RTNL();
+	p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
+	if (unlikely(!p)) {
+		if (ovr)
+			tcf_hash_release(*a, bind);
+		return -ENOMEM;
+	}
+
+	p->flags = lflags;
+	d->tcf_action = parm->action;
+
+	p_old = rtnl_dereference(d->skbmod_p);
+
+	if (ovr)
+		spin_lock_bh(&d->tcf_lock);
+
+	if (lflags & SKBMOD_F_DMAC)
+		ether_addr_copy(p->eth_dst, daddr);
+	if (lflags & SKBMOD_F_SMAC)
+		ether_addr_copy(p->eth_src, saddr);
+	if (lflags & SKBMOD_F_ETYPE)
+		p->eth_type = htons(eth_type);
+
+	rcu_assign_pointer(d->skbmod_p, p);
+	if (ovr)
+		spin_unlock_bh(&d->tcf_lock);
+
+	if (p_old)
+		kfree_rcu(p_old, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(tn, *a);
+	return ret;
+}
+
+static void tcf_skbmod_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	struct tcf_skbmod_params  *p;
+
+	p = rcu_dereference_protected(d->skbmod_p, 1);
+	kfree_rcu(p, rcu);
+}
+
+static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	struct tcf_skbmod *d = to_skbmod(a);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_skbmod_params  *p = rtnl_dereference(d->skbmod_p);
+	struct tc_skbmod opt = {
+		.index   = d->tcf_index,
+		.refcnt  = d->tcf_refcnt - ref,
+		.bindcnt = d->tcf_bindcnt - bind,
+		.action  = d->tcf_action,
+	};
+	struct tcf_t t;
+
+	opt.flags  = p->flags;
+	if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_DMAC) &&
+	    nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_SMAC) &&
+	    nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src))
+		goto nla_put_failure;
+	if ((p->flags & SKBMOD_F_ETYPE) &&
+	    nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type)))
+		goto nla_put_failure;
+
+	tcf_tm_dump(&t, &d->tcf_tm);
+	if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD))
+		goto nla_put_failure;
+
+	return skb->len;
+nla_put_failure:
+	rcu_read_unlock();
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_skbmod_ops = {
+	.kind		=	"skbmod",
+	.type		=	TCA_ACT_SKBMOD,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_skbmod_run,
+	.dump		=	tcf_skbmod_dump,
+	.init		=	tcf_skbmod_init,
+	.cleanup	=	tcf_skbmod_cleanup,
+	.walk		=	tcf_skbmod_walker,
+	.lookup		=	tcf_skbmod_search,
+	.size		=	sizeof(struct tcf_skbmod),
+};
+
+static __net_init int skbmod_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK);
+}
+
+static void __net_exit skbmod_exit_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+	tc_action_net_exit(tn);
+}
+
+static struct pernet_operations skbmod_net_ops = {
+	.init = skbmod_init_net,
+	.exit = skbmod_exit_net,
+	.id   = &skbmod_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>");
+MODULE_DESCRIPTION("SKB data mod-ing");
+MODULE_LICENSE("GPL");
+
+static int __init skbmod_init_module(void)
+{
+	return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+static void __exit skbmod_cleanup_module(void)
+{
+	tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops);
+}
+
+module_init(skbmod_init_module);
+module_exit(skbmod_cleanup_module);
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
new file mode 100644
index 0000000..af47bdf
--- /dev/null
+++ b/net/sched/act_tunnel_key.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/dst.h>
+#include <net/dst_metadata.h>
+
+#include <linux/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_tunnel_key.h>
+
+#define TUNNEL_KEY_TAB_MASK     15
+
+static int tunnel_key_net_id;
+static struct tc_action_ops act_tunnel_key_ops;
+
+static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
+			  struct tcf_result *res)
+{
+	struct tcf_tunnel_key *t = to_tunnel_key(a);
+	struct tcf_tunnel_key_params *params;
+	int action;
+
+	rcu_read_lock();
+
+	params = rcu_dereference(t->params);
+
+	tcf_lastuse_update(&t->tcf_tm);
+	bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
+	action = params->action;
+
+	switch (params->tcft_action) {
+	case TCA_TUNNEL_KEY_ACT_RELEASE:
+		skb_dst_drop(skb);
+		break;
+	case TCA_TUNNEL_KEY_ACT_SET:
+		skb_dst_drop(skb);
+		skb_dst_set(skb, dst_clone(&params->tcft_enc_metadata->dst));
+		break;
+	default:
+		WARN_ONCE(1, "Bad tunnel_key action %d.\n",
+			  params->tcft_action);
+		break;
+	}
+
+	rcu_read_unlock();
+
+	return action;
+}
+
+static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
+	[TCA_TUNNEL_KEY_PARMS]	    = { .len = sizeof(struct tc_tunnel_key) },
+	[TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
+	[TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 },
+	[TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+	[TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+	[TCA_TUNNEL_KEY_ENC_KEY_ID]   = { .type = NLA_U32 },
+};
+
+static int tunnel_key_init(struct net *net, struct nlattr *nla,
+			   struct nlattr *est, struct tc_action **a,
+			   int ovr, int bind)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+	struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
+	struct tcf_tunnel_key_params *params_old;
+	struct tcf_tunnel_key_params *params_new;
+	struct metadata_dst *metadata = NULL;
+	struct tc_tunnel_key *parm;
+	struct tcf_tunnel_key *t;
+	bool exists = false;
+	__be64 key_id;
+	int ret = 0;
+	int err;
+
+	if (!nla)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_TUNNEL_KEY_PARMS])
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
+	exists = tcf_hash_check(tn, parm->index, a, bind);
+	if (exists && bind)
+		return 0;
+
+	switch (parm->t_action) {
+	case TCA_TUNNEL_KEY_ACT_RELEASE:
+		break;
+	case TCA_TUNNEL_KEY_ACT_SET:
+		if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
+			ret = -EINVAL;
+			goto err_out;
+		}
+
+		key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]));
+
+		if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
+		    tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
+			__be32 saddr;
+			__be32 daddr;
+
+			saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
+			daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
+
+			metadata = __ip_tun_set_dst(saddr, daddr, 0, 0,
+						    TUNNEL_KEY, key_id, 0);
+		} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
+			   tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
+			struct in6_addr saddr;
+			struct in6_addr daddr;
+
+			saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
+			daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
+
+			metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0,
+						      TUNNEL_KEY, key_id, 0);
+		}
+
+		if (!metadata) {
+			ret = -EINVAL;
+			goto err_out;
+		}
+
+		metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
+		break;
+	default:
+		goto err_out;
+	}
+
+	if (!exists) {
+		ret = tcf_hash_create(tn, parm->index, est, a,
+				      &act_tunnel_key_ops, bind, true);
+		if (ret)
+			return ret;
+
+		ret = ACT_P_CREATED;
+	} else {
+		tcf_hash_release(*a, bind);
+		if (!ovr)
+			return -EEXIST;
+	}
+
+	t = to_tunnel_key(*a);
+
+	ASSERT_RTNL();
+	params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
+	if (unlikely(!params_new)) {
+		if (ret == ACT_P_CREATED)
+			tcf_hash_release(*a, bind);
+		return -ENOMEM;
+	}
+
+	params_old = rtnl_dereference(t->params);
+
+	params_new->action = parm->action;
+	params_new->tcft_action = parm->t_action;
+	params_new->tcft_enc_metadata = metadata;
+
+	rcu_assign_pointer(t->params, params_new);
+
+	if (params_old)
+		kfree_rcu(params_old, rcu);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(tn, *a);
+
+	return ret;
+
+err_out:
+	if (exists)
+		tcf_hash_release(*a, bind);
+	return ret;
+}
+
+static void tunnel_key_release(struct tc_action *a, int bind)
+{
+	struct tcf_tunnel_key *t = to_tunnel_key(a);
+	struct tcf_tunnel_key_params *params;
+
+	params = rcu_dereference_protected(t->params, 1);
+
+	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+		dst_release(&params->tcft_enc_metadata->dst);
+
+	kfree_rcu(params, rcu);
+}
+
+static int tunnel_key_dump_addresses(struct sk_buff *skb,
+				     const struct ip_tunnel_info *info)
+{
+	unsigned short family = ip_tunnel_info_af(info);
+
+	if (family == AF_INET) {
+		__be32 saddr = info->key.u.ipv4.src;
+		__be32 daddr = info->key.u.ipv4.dst;
+
+		if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) &&
+		    !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr))
+			return 0;
+	}
+
+	if (family == AF_INET6) {
+		const struct in6_addr *saddr6 = &info->key.u.ipv6.src;
+		const struct in6_addr *daddr6 = &info->key.u.ipv6.dst;
+
+		if (!nla_put_in6_addr(skb,
+				      TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) &&
+		    !nla_put_in6_addr(skb,
+				      TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6))
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
+			   int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_tunnel_key *t = to_tunnel_key(a);
+	struct tcf_tunnel_key_params *params;
+	struct tc_tunnel_key opt = {
+		.index    = t->tcf_index,
+		.refcnt   = t->tcf_refcnt - ref,
+		.bindcnt  = t->tcf_bindcnt - bind,
+	};
+	struct tcf_t tm;
+
+	params = rtnl_dereference(t->params);
+
+	opt.t_action = params->tcft_action;
+	opt.action = params->action;
+
+	if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
+		struct ip_tunnel_key *key =
+			&params->tcft_enc_metadata->u.tun_info.key;
+		__be32 key_id = tunnel_id_to_key32(key->tun_id);
+
+		if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) ||
+		    tunnel_key_dump_addresses(skb,
+					      &params->tcft_enc_metadata->u.tun_info))
+			goto nla_put_failure;
+	}
+
+	tcf_tm_dump(&tm, &t->tcf_tm);
+	if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm),
+			  &tm, TCA_TUNNEL_KEY_PAD))
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
+			     struct netlink_callback *cb, int type,
+			     const struct tc_action_ops *ops)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops);
+}
+
+static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tcf_hash_search(tn, a, index);
+}
+
+static struct tc_action_ops act_tunnel_key_ops = {
+	.kind		=	"tunnel_key",
+	.type		=	TCA_ACT_TUNNEL_KEY,
+	.owner		=	THIS_MODULE,
+	.act		=	tunnel_key_act,
+	.dump		=	tunnel_key_dump,
+	.init		=	tunnel_key_init,
+	.cleanup	=	tunnel_key_release,
+	.walk		=	tunnel_key_walker,
+	.lookup		=	tunnel_key_search,
+	.size		=	sizeof(struct tcf_tunnel_key),
+};
+
+static __net_init int tunnel_key_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK);
+}
+
+static void __net_exit tunnel_key_exit_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+	tc_action_net_exit(tn);
+}
+
+static struct pernet_operations tunnel_key_net_ops = {
+	.init = tunnel_key_init_net,
+	.exit = tunnel_key_exit_net,
+	.id   = &tunnel_key_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+static int __init tunnel_key_init_module(void)
+{
+	return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
+}
+
+static void __exit tunnel_key_cleanup_module(void)
+{
+	tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
+}
+
+module_init(tunnel_key_init_module);
+module_exit(tunnel_key_cleanup_module);
+
+MODULE_AUTHOR("Amir Vadai <amir@vadai.me>");
+MODULE_DESCRIPTION("ip tunnel manipulation actions");
+MODULE_LICENSE("GPL v2");
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 691409d..b57fcbc 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -30,12 +30,19 @@
 	struct tcf_vlan *v = to_vlan(a);
 	int action;
 	int err;
+	u16 tci;
 
 	spin_lock(&v->tcf_lock);
 	tcf_lastuse_update(&v->tcf_tm);
 	bstats_update(&v->tcf_bstats, skb);
 	action = v->tcf_action;
 
+	/* Ensure 'data' points at mac_header prior calling vlan manipulating
+	 * functions.
+	 */
+	if (skb_at_tc_ingress(skb))
+		skb_push_rcsum(skb, skb->mac_len);
+
 	switch (v->tcfv_action) {
 	case TCA_VLAN_ACT_POP:
 		err = skb_vlan_pop(skb);
@@ -43,10 +50,35 @@
 			goto drop;
 		break;
 	case TCA_VLAN_ACT_PUSH:
-		err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid);
+		err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid |
+				    (v->tcfv_push_prio << VLAN_PRIO_SHIFT));
 		if (err)
 			goto drop;
 		break;
+	case TCA_VLAN_ACT_MODIFY:
+		/* No-op if no vlan tag (either hw-accel or in-payload) */
+		if (!skb_vlan_tagged(skb))
+			goto unlock;
+		/* extract existing tag (and guarantee no hw-accel tag) */
+		if (skb_vlan_tag_present(skb)) {
+			tci = skb_vlan_tag_get(skb);
+			skb->vlan_tci = 0;
+		} else {
+			/* in-payload vlan tag, pop it */
+			err = __skb_vlan_pop(skb, &tci);
+			if (err)
+				goto drop;
+		}
+		/* replace the vid */
+		tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid;
+		/* replace prio bits, if tcfv_push_prio specified */
+		if (v->tcfv_push_prio) {
+			tci &= ~VLAN_PRIO_MASK;
+			tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT;
+		}
+		/* put updated tci as hwaccel tag */
+		__vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci);
+		break;
 	default:
 		BUG();
 	}
@@ -57,6 +89,9 @@
 	action = TC_ACT_SHOT;
 	v->tcf_qstats.drops++;
 unlock:
+	if (skb_at_tc_ingress(skb))
+		skb_pull_rcsum(skb, skb->mac_len);
+
 	spin_unlock(&v->tcf_lock);
 	return action;
 }
@@ -65,6 +100,7 @@
 	[TCA_VLAN_PARMS]		= { .len = sizeof(struct tc_vlan) },
 	[TCA_VLAN_PUSH_VLAN_ID]		= { .type = NLA_U16 },
 	[TCA_VLAN_PUSH_VLAN_PROTOCOL]	= { .type = NLA_U16 },
+	[TCA_VLAN_PUSH_VLAN_PRIORITY]	= { .type = NLA_U8 },
 };
 
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
@@ -78,6 +114,7 @@
 	int action;
 	__be16 push_vid = 0;
 	__be16 push_proto = 0;
+	u8 push_prio = 0;
 	bool exists = false;
 	int ret = 0, err;
 
@@ -99,6 +136,7 @@
 	case TCA_VLAN_ACT_POP:
 		break;
 	case TCA_VLAN_ACT_PUSH:
+	case TCA_VLAN_ACT_MODIFY:
 		if (!tb[TCA_VLAN_PUSH_VLAN_ID]) {
 			if (exists)
 				tcf_hash_release(*a, bind);
@@ -123,6 +161,9 @@
 		} else {
 			push_proto = htons(ETH_P_8021Q);
 		}
+
+		if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY])
+			push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]);
 		break;
 	default:
 		if (exists)
@@ -150,6 +191,7 @@
 
 	v->tcfv_action = action;
 	v->tcfv_push_vid = push_vid;
+	v->tcfv_push_prio = push_prio;
 	v->tcfv_push_proto = push_proto;
 
 	v->tcf_action = parm->action;
@@ -178,10 +220,13 @@
 	if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
-	if (v->tcfv_action == TCA_VLAN_ACT_PUSH &&
+	if ((v->tcfv_action == TCA_VLAN_ACT_PUSH ||
+	     v->tcfv_action == TCA_VLAN_ACT_MODIFY) &&
 	    (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) ||
 	     nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL,
-			  v->tcfv_push_proto)))
+			  v->tcfv_push_proto) ||
+	     (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY,
+					      v->tcfv_push_prio))))
 		goto nla_put_failure;
 
 	tcf_tm_dump(&t, &v->tcf_tm);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 843a716..11da7da 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -344,13 +344,15 @@
 			if (err == 0) {
 				struct tcf_proto *next = rtnl_dereference(tp->next);
 
-				tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
+				tfilter_notify(net, skb, n, tp, fh,
+					       RTM_DELTFILTER);
 				if (tcf_destroy(tp, false))
 					RCU_INIT_POINTER(*back, next);
 			}
 			goto errout;
 		case RTM_GETTFILTER:
-			err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
+			err = tfilter_notify(net, skb, n, tp, fh,
+					     RTM_NEWTFILTER);
 			goto errout;
 		default:
 			err = -EINVAL;
@@ -448,7 +450,8 @@
 	struct net *net = sock_net(a->skb->sk);
 
 	return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
-			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER);
+			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+			     RTM_NEWTFILTER);
 }
 
 /* called with RTNL */
@@ -541,36 +544,44 @@
 void tcf_exts_destroy(struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND);
-	INIT_LIST_HEAD(&exts->actions);
+	LIST_HEAD(actions);
+
+	tcf_exts_to_list(exts, &actions);
+	tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+	kfree(exts->actions);
+	exts->nr_actions = 0;
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_destroy);
 
 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
-		  struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
+		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	{
 		struct tc_action *act;
 
-		INIT_LIST_HEAD(&exts->actions);
 		if (exts->police && tb[exts->police]) {
 			act = tcf_action_init_1(net, tb[exts->police], rate_tlv,
-						"police", ovr,
-						TCA_ACT_BIND);
+						"police", ovr, TCA_ACT_BIND);
 			if (IS_ERR(act))
 				return PTR_ERR(act);
 
 			act->type = exts->type = TCA_OLD_COMPAT;
-			list_add(&act->list, &exts->actions);
+			exts->actions[0] = act;
+			exts->nr_actions = 1;
 		} else if (exts->action && tb[exts->action]) {
-			int err;
+			LIST_HEAD(actions);
+			int err, i = 0;
+
 			err = tcf_action_init(net, tb[exts->action], rate_tlv,
-					      NULL, ovr,
-					      TCA_ACT_BIND, &exts->actions);
+					      NULL, ovr, TCA_ACT_BIND,
+					      &actions);
 			if (err)
 				return err;
+			list_for_each_entry(act, &actions, list)
+				exts->actions[i++] = act;
+			exts->nr_actions = i;
 		}
 	}
 #else
@@ -587,37 +598,49 @@
 		     struct tcf_exts *src)
 {
 #ifdef CONFIG_NET_CLS_ACT
-	LIST_HEAD(tmp);
+	struct tcf_exts old = *dst;
+
 	tcf_tree_lock(tp);
-	list_splice_init(&dst->actions, &tmp);
-	list_splice(&src->actions, &dst->actions);
+	dst->nr_actions = src->nr_actions;
+	dst->actions = src->actions;
 	dst->type = src->type;
 	tcf_tree_unlock(tp);
-	tcf_action_destroy(&tmp, TCA_ACT_UNBIND);
+
+	tcf_exts_destroy(&old);
 #endif
 }
 EXPORT_SYMBOL(tcf_exts_change);
 
-#define tcf_exts_first_act(ext)					\
-	list_first_entry_or_null(&(exts)->actions,		\
-				 struct tc_action, list)
+#ifdef CONFIG_NET_CLS_ACT
+static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
+{
+	if (exts->nr_actions == 0)
+		return NULL;
+	else
+		return exts->actions[0];
+}
+#endif
 
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 {
 #ifdef CONFIG_NET_CLS_ACT
 	struct nlattr *nest;
 
-	if (exts->action && !list_empty(&exts->actions)) {
+	if (exts->action && exts->nr_actions) {
 		/*
 		 * again for backward compatible mode - we want
 		 * to work with both old and new modes of entering
 		 * tc data even if iproute2  was newer - jhs
 		 */
 		if (exts->type != TCA_OLD_COMPAT) {
+			LIST_HEAD(actions);
+
 			nest = nla_nest_start(skb, exts->action);
 			if (nest == NULL)
 				goto nla_put_failure;
-			if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0)
+
+			tcf_exts_to_list(exts, &actions);
+			if (tcf_action_dump(skb, &actions, 0, 0) < 0)
 				goto nla_put_failure;
 			nla_nest_end(skb, nest);
 		} else if (exts->police) {
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0b8c3ac..eb219b7 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -138,10 +138,12 @@
 	struct tcf_exts e;
 	struct tcf_ematch_tree t;
 
-	tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t);
 	if (err < 0)
@@ -189,7 +191,10 @@
 	if (!fnew)
 		return -ENOBUFS;
 
-	tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+	if (err < 0)
+		goto errout;
+
 	err = -EINVAL;
 	if (handle) {
 		fnew->handle = handle;
@@ -226,6 +231,7 @@
 
 	return 0;
 errout:
+	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
 	return err;
 }
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c3002c2..bb1d5a4 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -27,6 +27,8 @@
 MODULE_DESCRIPTION("TC BPF based classifier");
 
 #define CLS_BPF_NAME_LEN	256
+#define CLS_BPF_SUPPORTED_GEN_FLAGS		\
+	(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)
 
 struct cls_bpf_head {
 	struct list_head plist;
@@ -39,6 +41,8 @@
 	struct list_head link;
 	struct tcf_result res;
 	bool exts_integrated;
+	bool offloaded;
+	u32 gen_flags;
 	struct tcf_exts exts;
 	u32 handle;
 	union {
@@ -54,8 +58,10 @@
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
 	[TCA_BPF_FLAGS]		= { .type = NLA_U32 },
+	[TCA_BPF_FLAGS_GEN]	= { .type = NLA_U32 },
 	[TCA_BPF_FD]		= { .type = NLA_U32 },
-	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
+	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING,
+				    .len = CLS_BPF_NAME_LEN },
 	[TCA_BPF_OPS_LEN]	= { .type = NLA_U16 },
 	[TCA_BPF_OPS]		= { .type = NLA_BINARY,
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
@@ -83,9 +89,6 @@
 	struct cls_bpf_prog *prog;
 	int ret = -1;
 
-	if (unlikely(!skb_mac_header_was_set(skb)))
-		return -1;
-
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
 	list_for_each_entry_rcu(prog, &head->plist, link) {
@@ -93,7 +96,9 @@
 
 		qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
 
-		if (at_ingress) {
+		if (tc_skip_sw(prog->gen_flags)) {
+			filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0;
+		} else if (at_ingress) {
 			/* It is safe to push/pull even if skb_shared() */
 			__skb_push(skb, skb->mac_len);
 			bpf_compute_data_end(skb);
@@ -140,6 +145,91 @@
 	return !prog->bpf_ops;
 }
 
+static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+			       enum tc_clsbpf_command cmd)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct tc_cls_bpf_offload bpf_offload = {};
+	struct tc_to_netdev offload;
+
+	offload.type = TC_SETUP_CLSBPF;
+	offload.cls_bpf = &bpf_offload;
+
+	bpf_offload.command = cmd;
+	bpf_offload.exts = &prog->exts;
+	bpf_offload.prog = prog->filter;
+	bpf_offload.name = prog->bpf_name;
+	bpf_offload.exts_integrated = prog->exts_integrated;
+	bpf_offload.gen_flags = prog->gen_flags;
+
+	return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle,
+					     tp->protocol, &offload);
+}
+
+static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
+			   struct cls_bpf_prog *oldprog)
+{
+	struct net_device *dev = tp->q->dev_queue->dev;
+	struct cls_bpf_prog *obj = prog;
+	enum tc_clsbpf_command cmd;
+	bool skip_sw;
+	int ret;
+
+	skip_sw = tc_skip_sw(prog->gen_flags) ||
+		(oldprog && tc_skip_sw(oldprog->gen_flags));
+
+	if (oldprog && oldprog->offloaded) {
+		if (tc_should_offload(dev, tp, prog->gen_flags)) {
+			cmd = TC_CLSBPF_REPLACE;
+		} else if (!tc_skip_sw(prog->gen_flags)) {
+			obj = oldprog;
+			cmd = TC_CLSBPF_DESTROY;
+		} else {
+			return -EINVAL;
+		}
+	} else {
+		if (!tc_should_offload(dev, tp, prog->gen_flags))
+			return skip_sw ? -EINVAL : 0;
+		cmd = TC_CLSBPF_ADD;
+	}
+
+	ret = cls_bpf_offload_cmd(tp, obj, cmd);
+	if (ret)
+		return skip_sw ? ret : 0;
+
+	obj->offloaded = true;
+	if (oldprog)
+		oldprog->offloaded = false;
+
+	return 0;
+}
+
+static void cls_bpf_stop_offload(struct tcf_proto *tp,
+				 struct cls_bpf_prog *prog)
+{
+	int err;
+
+	if (!prog->offloaded)
+		return;
+
+	err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+	if (err) {
+		pr_err("Stopping hardware offload failed: %d\n", err);
+		return;
+	}
+
+	prog->offloaded = false;
+}
+
+static void cls_bpf_offload_update_stats(struct tcf_proto *tp,
+					 struct cls_bpf_prog *prog)
+{
+	if (!prog->offloaded)
+		return;
+
+	cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS);
+}
+
 static int cls_bpf_init(struct tcf_proto *tp)
 {
 	struct cls_bpf_head *head;
@@ -179,6 +269,7 @@
 {
 	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg;
 
+	cls_bpf_stop_offload(tp, prog);
 	list_del_rcu(&prog->link);
 	tcf_unbind_filter(tp, &prog->res);
 	call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -195,6 +286,7 @@
 		return false;
 
 	list_for_each_entry_safe(prog, tmp, &head->plist, link) {
+		cls_bpf_stop_offload(tp, prog);
 		list_del_rcu(&prog->link);
 		tcf_unbind_filter(tp, &prog->res);
 		call_rcu(&prog->rcu, __cls_bpf_delete_prog);
@@ -304,6 +396,7 @@
 {
 	bool is_bpf, is_ebpf, have_exts = false;
 	struct tcf_exts exts;
+	u32 gen_flags = 0;
 	int ret;
 
 	is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
@@ -311,30 +404,39 @@
 	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
 		return -EINVAL;
 
-	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
-	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+	ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
 	if (ret < 0)
 		return ret;
+	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+	if (ret < 0)
+		goto errout;
 
 	if (tb[TCA_BPF_FLAGS]) {
 		u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
 
 		if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
-			tcf_exts_destroy(&exts);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto errout;
 		}
 
 		have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
 	}
+	if (tb[TCA_BPF_FLAGS_GEN]) {
+		gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+		if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+		    !tc_flags_valid(gen_flags)) {
+			ret = -EINVAL;
+			goto errout;
+		}
+	}
 
 	prog->exts_integrated = have_exts;
+	prog->gen_flags = gen_flags;
 
 	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
 		       cls_bpf_prog_from_efd(tb, prog, tp);
-	if (ret < 0) {
-		tcf_exts_destroy(&exts);
-		return ret;
-	}
+	if (ret < 0)
+		goto errout;
 
 	if (tb[TCA_BPF_CLASSID]) {
 		prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
@@ -343,6 +445,10 @@
 
 	tcf_exts_change(tp, &prog->exts, &exts);
 	return 0;
+
+errout:
+	tcf_exts_destroy(&exts);
+	return ret;
 }
 
 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -388,7 +494,9 @@
 	if (!prog)
 		return -ENOBUFS;
 
-	tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	if (ret < 0)
+		goto errout;
 
 	if (oldprog) {
 		if (handle && oldprog->handle != handle) {
@@ -406,10 +514,17 @@
 		goto errout;
 	}
 
-	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
+	ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE],
+				      ovr);
 	if (ret < 0)
 		goto errout;
 
+	ret = cls_bpf_offload(tp, prog, oldprog);
+	if (ret) {
+		cls_bpf_delete_prog(tp, prog);
+		return ret;
+	}
+
 	if (oldprog) {
 		list_replace_rcu(&oldprog->link, &prog->link);
 		tcf_unbind_filter(tp, &oldprog->res);
@@ -420,9 +535,10 @@
 
 	*arg = (unsigned long) prog;
 	return 0;
-errout:
-	kfree(prog);
 
+errout:
+	tcf_exts_destroy(&prog->exts);
+	kfree(prog);
 	return ret;
 }
 
@@ -470,6 +586,8 @@
 
 	tm->tcm_handle = prog->handle;
 
+	cls_bpf_offload_update_stats(tp, prog);
+
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
@@ -492,6 +610,9 @@
 		bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
 	if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
 		goto nla_put_failure;
+	if (prog->gen_flags &&
+	    nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags))
+		goto nla_put_failure;
 
 	nla_nest_end(skb, nest);
 
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 4c85bd3..85233c47 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -93,7 +93,9 @@
 	if (!new)
 		return -ENOBUFS;
 
-	tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+	if (err < 0)
+		goto errout;
 	new->handle = handle;
 	new->tp = tp;
 	err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
@@ -101,10 +103,14 @@
 	if (err < 0)
 		goto errout;
 
-	tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
 	if (err < 0)
 		goto errout;
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	if (err < 0) {
+		tcf_exts_destroy(&e);
+		goto errout;
+	}
 
 	err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
 	if (err < 0) {
@@ -120,6 +126,7 @@
 		call_rcu(&head->rcu, cls_cgroup_destroy_rcu);
 	return 0;
 errout:
+	tcf_exts_destroy(&new->exts);
 	kfree(new);
 	return err;
 }
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index fbfec6a..e396723 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -29,7 +29,7 @@
 #include <net/route.h>
 #include <net/flow_dissector.h>
 
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #include <net/netfilter/nf_conntrack.h>
 #endif
 
@@ -87,12 +87,14 @@
 	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
 
-static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_proto(const struct sk_buff *skb,
+			  const struct flow_keys *flow)
 {
 	return flow->basic.ip_proto;
 }
 
-static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_proto_src(const struct sk_buff *skb,
+			      const struct flow_keys *flow)
 {
 	if (flow->ports.ports)
 		return ntohs(flow->ports.src);
@@ -100,7 +102,8 @@
 	return addr_fold(skb->sk);
 }
 
-static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_proto_dst(const struct sk_buff *skb,
+			      const struct flow_keys *flow)
 {
 	if (flow->ports.ports)
 		return ntohs(flow->ports.dst);
@@ -125,14 +128,14 @@
 
 static u32 flow_get_nfct(const struct sk_buff *skb)
 {
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	return addr_fold(skb->nfct);
 #else
 	return 0;
 #endif
 }
 
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 #define CTTUPLE(skb, member)						\
 ({									\
 	enum ip_conntrack_info ctinfo;					\
@@ -149,7 +152,8 @@
 })
 #endif
 
-static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_nfct_src(const struct sk_buff *skb,
+			     const struct flow_keys *flow)
 {
 	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
@@ -161,7 +165,8 @@
 	return flow_get_src(skb, flow);
 }
 
-static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_nfct_dst(const struct sk_buff *skb,
+			     const struct flow_keys *flow)
 {
 	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
@@ -173,14 +178,16 @@
 	return flow_get_dst(skb, flow);
 }
 
-static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_nfct_proto_src(const struct sk_buff *skb,
+				   const struct flow_keys *flow)
 {
 	return ntohs(CTTUPLE(skb, src.u.all));
 fallback:
 	return flow_get_proto_src(skb, flow);
 }
 
-static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow)
+static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb,
+				   const struct flow_keys *flow)
 {
 	return ntohs(CTTUPLE(skb, dst.u.all));
 fallback:
@@ -418,10 +425,12 @@
 			return -EOPNOTSUPP;
 	}
 
-	tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	if (err < 0)
+		goto err1;
 	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
 	if (err < 0)
-		return err;
+		goto err1;
 
 	err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t);
 	if (err < 0)
@@ -432,13 +441,15 @@
 	if (!fnew)
 		goto err2;
 
-	tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+	if (err < 0)
+		goto err3;
 
 	fold = (struct flow_filter *)*arg;
 	if (fold) {
 		err = -EINVAL;
 		if (fold->handle != handle && handle)
-			goto err2;
+			goto err3;
 
 		/* Copy fold into fnew */
 		fnew->tp = fold->tp;
@@ -458,31 +469,31 @@
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err2;
+			goto err3;
 
 		if (mode == FLOW_MODE_HASH)
 			perturb_period = fold->perturb_period;
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err2;
+				goto err3;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 	} else {
 		err = -EINVAL;
 		if (!handle)
-			goto err2;
+			goto err3;
 		if (!tb[TCA_FLOW_KEYS])
-			goto err2;
+			goto err3;
 
 		mode = FLOW_MODE_MAP;
 		if (tb[TCA_FLOW_MODE])
 			mode = nla_get_u32(tb[TCA_FLOW_MODE]);
 		if (mode != FLOW_MODE_HASH && nkeys > 1)
-			goto err2;
+			goto err3;
 
 		if (tb[TCA_FLOW_PERTURB]) {
 			if (mode != FLOW_MODE_HASH)
-				goto err2;
+				goto err3;
 			perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
 		}
 
@@ -542,6 +553,8 @@
 		call_rcu(&fold->rcu, flow_destroy_filter);
 	return 0;
 
+err3:
+	tcf_exts_destroy(&fnew->exts);
 err2:
 	tcf_em_tree_destroy(&t);
 	kfree(fnew);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 5060801..f6f40fb 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -23,17 +23,26 @@
 #include <net/ip.h>
 #include <net/flow_dissector.h>
 
+#include <net/dst.h>
+#include <net/dst_metadata.h>
+
 struct fl_flow_key {
 	int	indev_ifindex;
 	struct flow_dissector_key_control control;
+	struct flow_dissector_key_control enc_control;
 	struct flow_dissector_key_basic basic;
 	struct flow_dissector_key_eth_addrs eth;
-	struct flow_dissector_key_addrs ipaddrs;
+	struct flow_dissector_key_vlan vlan;
 	union {
 		struct flow_dissector_key_ipv4_addrs ipv4;
 		struct flow_dissector_key_ipv6_addrs ipv6;
 	};
 	struct flow_dissector_key_ports tp;
+	struct flow_dissector_key_keyid enc_key_id;
+	union {
+		struct flow_dissector_key_ipv4_addrs enc_ipv4;
+		struct flow_dissector_key_ipv6_addrs enc_ipv6;
+	};
 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
 
 struct fl_flow_mask_range {
@@ -123,11 +132,31 @@
 	struct cls_fl_filter *f;
 	struct fl_flow_key skb_key;
 	struct fl_flow_key skb_mkey;
+	struct ip_tunnel_info *info;
 
 	if (!atomic_read(&head->ht.nelems))
 		return -1;
 
 	fl_clear_masked_range(&skb_key, &head->mask);
+
+	info = skb_tunnel_info(skb);
+	if (info) {
+		struct ip_tunnel_key *key = &info->key;
+
+		switch (ip_tunnel_info_af(info)) {
+		case AF_INET:
+			skb_key.enc_ipv4.src = key->u.ipv4.src;
+			skb_key.enc_ipv4.dst = key->u.ipv4.dst;
+			break;
+		case AF_INET6:
+			skb_key.enc_ipv6.src = key->u.ipv6.src;
+			skb_key.enc_ipv6.dst = key->u.ipv6.dst;
+			break;
+		}
+
+		skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
+	}
+
 	skb_key.indev_ifindex = skb->skb_iif;
 	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
 	 * so do it rather here.
@@ -212,7 +241,8 @@
 	tc.type = TC_SETUP_CLSFLOWER;
 	tc.cls_flower = &offload;
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc);
+	err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol,
+					    &tc);
 
 	if (tc_skip_sw(flags))
 		return err;
@@ -293,6 +323,22 @@
 	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_UDP_SRC]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_UDP_DST]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_VLAN_ID]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_VLAN_PRIO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_VLAN_ETH_TYPE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_ENC_KEY_ID]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV4_SRC]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV4_DST]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_ENC_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_ENC_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_TCP_SRC_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_UDP_SRC_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_UDP_DST_MASK]	= { .type = NLA_U16 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -308,9 +354,29 @@
 		memcpy(mask, nla_data(tb[mask_type]), len);
 }
 
+static void fl_set_key_vlan(struct nlattr **tb,
+			    struct flow_dissector_key_vlan *key_val,
+			    struct flow_dissector_key_vlan *key_mask)
+{
+#define VLAN_PRIORITY_MASK	0x7
+
+	if (tb[TCA_FLOWER_KEY_VLAN_ID]) {
+		key_val->vlan_id =
+			nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK;
+		key_mask->vlan_id = VLAN_VID_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) {
+		key_val->vlan_priority =
+			nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) &
+			VLAN_PRIORITY_MASK;
+		key_mask->vlan_priority = VLAN_PRIORITY_MASK;
+	}
+}
+
 static int fl_set_key(struct net *net, struct nlattr **tb,
 		      struct fl_flow_key *key, struct fl_flow_key *mask)
 {
+	__be16 ethertype;
 #ifdef CONFIG_NET_CLS_IND
 	if (tb[TCA_FLOWER_INDEV]) {
 		int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
@@ -328,9 +394,20 @@
 		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
 		       sizeof(key->eth.src));
 
-	fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
-		       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
-		       sizeof(key->basic.n_proto));
+	if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
+		ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
+
+		if (ethertype == htons(ETH_P_8021Q)) {
+			fl_set_key_vlan(tb, &key->vlan, &mask->vlan);
+			fl_set_key_val(tb, &key->basic.n_proto,
+				       TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+				       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+				       sizeof(key->basic.n_proto));
+		} else {
+			key->basic.n_proto = ethertype;
+			mask->basic.n_proto = cpu_to_be16(~0);
+		}
+	}
 
 	if (key->basic.n_proto == htons(ETH_P_IP) ||
 	    key->basic.n_proto == htons(ETH_P_IPV6)) {
@@ -359,20 +436,54 @@
 
 	if (key->basic.ip_proto == IPPROTO_TCP) {
 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
-			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
 			       sizeof(key->tp.src));
 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
-			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
 			       sizeof(key->tp.dst));
 	} else if (key->basic.ip_proto == IPPROTO_UDP) {
 		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
-			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
 			       sizeof(key->tp.src));
 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
-			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
 			       sizeof(key->tp.dst));
 	}
 
+	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
+	    tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
+		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+		fl_set_key_val(tb, &key->enc_ipv4.src,
+			       TCA_FLOWER_KEY_ENC_IPV4_SRC,
+			       &mask->enc_ipv4.src,
+			       TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+			       sizeof(key->enc_ipv4.src));
+		fl_set_key_val(tb, &key->enc_ipv4.dst,
+			       TCA_FLOWER_KEY_ENC_IPV4_DST,
+			       &mask->enc_ipv4.dst,
+			       TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+			       sizeof(key->enc_ipv4.dst));
+	}
+
+	if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
+	    tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
+		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+		fl_set_key_val(tb, &key->enc_ipv6.src,
+			       TCA_FLOWER_KEY_ENC_IPV6_SRC,
+			       &mask->enc_ipv6.src,
+			       TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+			       sizeof(key->enc_ipv6.src));
+		fl_set_key_val(tb, &key->enc_ipv6.dst,
+			       TCA_FLOWER_KEY_ENC_IPV6_DST,
+			       &mask->enc_ipv6.dst,
+			       TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+			       sizeof(key->enc_ipv6.dst));
+	}
+
+	fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
+		       &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
+		       sizeof(key->enc_key_id.keyid));
+
 	return 0;
 }
 
@@ -404,12 +515,10 @@
 
 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
 #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
-#define FL_KEY_MEMBER_END_OFFSET(member)					\
-	(FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
 
-#define FL_KEY_IN_RANGE(mask, member)						\
-        (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end &&			\
-         FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+#define FL_KEY_IS_MASKED(mask, member)						\
+	memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),		\
+		   0, FL_KEY_MEMBER_SIZE(member))				\
 
 #define FL_KEY_SET(keys, cnt, id, member)					\
 	do {									\
@@ -418,9 +527,9 @@
 		cnt++;								\
 	} while(0);
 
-#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member)			\
+#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)			\
 	do {									\
-		if (FL_KEY_IN_RANGE(mask, member))				\
+		if (FL_KEY_IS_MASKED(mask, member))				\
 			FL_KEY_SET(keys, cnt, id, member);			\
 	} while(0);
 
@@ -432,14 +541,16 @@
 
 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
 	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
-	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-			       FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
-	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-			       FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
-	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-			       FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
-	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
-			       FLOW_DISSECTOR_KEY_PORTS, tp);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_PORTS, tp);
+	FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt,
+			     FLOW_DISSECTOR_KEY_VLAN, vlan);
 
 	skb_flow_dissector_init(&head->dissector, keys, cnt);
 }
@@ -478,10 +589,12 @@
 	struct tcf_exts e;
 	int err;
 
-	tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	if (tb[TCA_FLOWER_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
@@ -550,7 +663,9 @@
 	if (!fnew)
 		return -ENOBUFS;
 
-	tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+	if (err < 0)
+		goto errout;
 
 	if (!handle) {
 		handle = fl_grab_new_handle(tp, head);
@@ -614,6 +729,7 @@
 	return 0;
 
 errout:
+	tcf_exts_destroy(&fnew->exts);
 	kfree(fnew);
 	return err;
 }
@@ -668,6 +784,29 @@
 	return 0;
 }
 
+static int fl_dump_key_vlan(struct sk_buff *skb,
+			    struct flow_dissector_key_vlan *vlan_key,
+			    struct flow_dissector_key_vlan *vlan_mask)
+{
+	int err;
+
+	if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
+		return 0;
+	if (vlan_mask->vlan_id) {
+		err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID,
+				  vlan_key->vlan_id);
+		if (err)
+			return err;
+	}
+	if (vlan_mask->vlan_priority) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO,
+				 vlan_key->vlan_priority);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		   struct sk_buff *skb, struct tcmsg *t)
 {
@@ -712,6 +851,10 @@
 			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
 			    sizeof(key->basic.n_proto)))
 		goto nla_put_failure;
+
+	if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan))
+		goto nla_put_failure;
+
 	if ((key->basic.n_proto == htons(ETH_P_IP) ||
 	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
 	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
@@ -738,21 +881,48 @@
 
 	if (key->basic.ip_proto == IPPROTO_TCP &&
 	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
-			     &mask->tp.src, TCA_FLOWER_UNSPEC,
+			     &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
 			     sizeof(key->tp.src)) ||
 	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
-			     &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			     &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
 			     sizeof(key->tp.dst))))
 		goto nla_put_failure;
 	else if (key->basic.ip_proto == IPPROTO_UDP &&
 		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
-				  &mask->tp.src, TCA_FLOWER_UNSPEC,
+				  &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
 				  sizeof(key->tp.src)) ||
 		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
-				  &mask->tp.dst, TCA_FLOWER_UNSPEC,
+				  &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
 				  sizeof(key->tp.dst))))
 		goto nla_put_failure;
 
+	if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
+	    (fl_dump_key_val(skb, &key->enc_ipv4.src,
+			    TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
+			    TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+			    sizeof(key->enc_ipv4.src)) ||
+	     fl_dump_key_val(skb, &key->enc_ipv4.dst,
+			     TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
+			     TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+			     sizeof(key->enc_ipv4.dst))))
+		goto nla_put_failure;
+	else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
+		 (fl_dump_key_val(skb, &key->enc_ipv6.src,
+			    TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
+			    TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+			    sizeof(key->enc_ipv6.src)) ||
+		 fl_dump_key_val(skb, &key->enc_ipv6.dst,
+				 TCA_FLOWER_KEY_ENC_IPV6_DST,
+				 &mask->enc_ipv6.dst,
+				 TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+			    sizeof(key->enc_ipv6.dst))))
+		goto nla_put_failure;
+
+	if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
+			    &mask->enc_key_id, TCA_FLOWER_UNSPEC,
+			    sizeof(key->enc_key_id)))
+		goto nla_put_failure;
+
 	nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags);
 
 	if (tcf_exts_dump(skb, &f->exts))
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index f23a3b6..9dc63d54 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -57,7 +57,7 @@
 }
 
 static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
-			  struct tcf_result *res)
+		       struct tcf_result *res)
 {
 	struct fw_head *head = rcu_dereference_bh(tp->root);
 	struct fw_filter *f;
@@ -188,17 +188,20 @@
 
 static int
 fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f,
-	struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr)
+		struct nlattr **tb, struct nlattr **tca, unsigned long base,
+		bool ovr)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct tcf_exts e;
 	u32 mask;
 	int err;
 
-	tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	if (tb[TCA_FW_CLASSID]) {
 		f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
@@ -235,9 +238,8 @@
 
 static int fw_change(struct net *net, struct sk_buff *in_skb,
 		     struct tcf_proto *tp, unsigned long base,
-		     u32 handle,
-		     struct nlattr **tca,
-		     unsigned long *arg, bool ovr)
+		     u32 handle, struct nlattr **tca, unsigned long *arg,
+		     bool ovr)
 {
 	struct fw_head *head = rtnl_dereference(tp->root);
 	struct fw_filter *f = (struct fw_filter *) *arg;
@@ -270,10 +272,15 @@
 #endif /* CONFIG_NET_CLS_IND */
 		fnew->tp = f->tp;
 
-		tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+		err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+		if (err < 0) {
+			kfree(fnew);
+			return err;
+		}
 
 		err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr);
 		if (err < 0) {
+			tcf_exts_destroy(&fnew->exts);
 			kfree(fnew);
 			return err;
 		}
@@ -313,7 +320,9 @@
 	if (f == NULL)
 		return -ENOBUFS;
 
-	tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+	err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+	if (err < 0)
+		goto errout;
 	f->id = handle;
 	f->tp = tp;
 
@@ -328,6 +337,7 @@
 	return 0;
 
 errout:
+	tcf_exts_destroy(&f->exts);
 	kfree(f);
 	return err;
 }
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 08a3b0a..455fc8f 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -268,8 +268,7 @@
 	return 0;
 }
 
-static void
-route4_delete_filter(struct rcu_head *head)
+static void route4_delete_filter(struct rcu_head *head)
 {
 	struct route4_filter *f = container_of(head, struct route4_filter, rcu);
 
@@ -383,17 +382,19 @@
 			    struct nlattr **tb, struct nlattr *est, int new,
 			    bool ovr)
 {
-	int err;
 	u32 id = 0, to = 0, nhandle = 0x8000;
 	struct route4_filter *fp;
 	unsigned int h1;
 	struct route4_bucket *b;
 	struct tcf_exts e;
+	int err;
 
-	tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	err = -EINVAL;
 	if (tb[TCA_ROUTE4_TO]) {
@@ -472,10 +473,8 @@
 }
 
 static int route4_change(struct net *net, struct sk_buff *in_skb,
-		       struct tcf_proto *tp, unsigned long base,
-		       u32 handle,
-		       struct nlattr **tca,
-		       unsigned long *arg, bool ovr)
+			 struct tcf_proto *tp, unsigned long base, u32 handle,
+			 struct nlattr **tca, unsigned long *arg, bool ovr)
 {
 	struct route4_head *head = rtnl_dereference(tp->root);
 	struct route4_filter __rcu **fp;
@@ -503,7 +502,10 @@
 	if (!f)
 		goto errout;
 
-	tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+	if (err < 0)
+		goto errout;
+
 	if (fold) {
 		f->id = fold->id;
 		f->iif = fold->iif;
@@ -557,6 +559,8 @@
 	return 0;
 
 errout:
+	if (f)
+		tcf_exts_destroy(&f->exts);
 	kfree(f);
 	return err;
 }
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index f9c9fc0..4f05a19 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -487,10 +487,12 @@
 	if (err < 0)
 		return err;
 
-	tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
-	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
+	if (err < 0)
+		goto errout2;
 
 	f = (struct rsvp_filter *)*arg;
 	if (f) {
@@ -506,7 +508,11 @@
 			goto errout2;
 		}
 
-		tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+		err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+		if (err < 0) {
+			kfree(n);
+			goto errout2;
+		}
 
 		if (tb[TCA_RSVP_CLASSID]) {
 			n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
@@ -530,7 +536,9 @@
 	if (f == NULL)
 		goto errout2;
 
-	tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+	if (err < 0)
+		goto errout;
 	h2 = 16;
 	if (tb[TCA_RSVP_SRC]) {
 		memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
@@ -627,6 +635,7 @@
 	goto insert;
 
 errout:
+	tcf_exts_destroy(&f->exts);
 	kfree(f);
 errout2:
 	tcf_exts_destroy(&e);
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 944c8ff..96144bd 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -50,14 +50,13 @@
 	struct rcu_head rcu;
 };
 
-static inline int
-tcindex_filter_is_set(struct tcindex_filter_result *r)
+static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
 {
 	return tcf_exts_is_predicative(&r->exts) || r->res.classid;
 }
 
-static struct tcindex_filter_result *
-tcindex_lookup(struct tcindex_data *p, u16 key)
+static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
+						    u16 key)
 {
 	if (p->perfect) {
 		struct tcindex_filter_result *f = p->perfect + key;
@@ -144,7 +143,8 @@
 
 static void tcindex_destroy_fexts(struct rcu_head *head)
 {
-	struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu);
+	struct tcindex_filter *f = container_of(head, struct tcindex_filter,
+						rcu);
 
 	tcf_exts_destroy(&f->result.exts);
 	kfree(f);
@@ -219,10 +219,10 @@
 	[TCA_TCINDEX_CLASSID]		= { .type = NLA_U32 },
 };
 
-static void tcindex_filter_result_init(struct tcindex_filter_result *r)
+static int tcindex_filter_result_init(struct tcindex_filter_result *r)
 {
 	memset(r, 0, sizeof(*r));
-	tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+	return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 }
 
 static void __tcindex_partial_destroy(struct rcu_head *head)
@@ -233,23 +233,57 @@
 	kfree(p);
 }
 
+static void tcindex_free_perfect_hash(struct tcindex_data *cp)
+{
+	int i;
+
+	for (i = 0; i < cp->hash; i++)
+		tcf_exts_destroy(&cp->perfect[i].exts);
+	kfree(cp->perfect);
+}
+
+static int tcindex_alloc_perfect_hash(struct tcindex_data *cp)
+{
+	int i, err = 0;
+
+	cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
+			      GFP_KERNEL);
+	if (!cp->perfect)
+		return -ENOMEM;
+
+	for (i = 0; i < cp->hash; i++) {
+		err = tcf_exts_init(&cp->perfect[i].exts,
+				    TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+		if (err < 0)
+			goto errout;
+	}
+
+	return 0;
+
+errout:
+	tcindex_free_perfect_hash(cp);
+	return err;
+}
+
 static int
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		  u32 handle, struct tcindex_data *p,
 		  struct tcindex_filter_result *r, struct nlattr **tb,
 		  struct nlattr *est, bool ovr)
 {
-	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
 	struct tcindex_filter_result cr;
-	struct tcindex_data *cp, *oldp;
+	struct tcindex_data *cp = NULL, *oldp;
 	struct tcindex_filter *f = NULL; /* make gcc behave */
+	int err, balloc = 0;
 	struct tcf_exts e;
 
-	tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	err = -ENOMEM;
 	/* tcindex_data attributes must look atomic to classifier/lookup so
@@ -270,19 +304,20 @@
 	if (p->perfect) {
 		int i;
 
-		cp->perfect = kmemdup(p->perfect,
-				      sizeof(*r) * cp->hash, GFP_KERNEL);
-		if (!cp->perfect)
+		if (tcindex_alloc_perfect_hash(cp) < 0)
 			goto errout;
 		for (i = 0; i < cp->hash; i++)
-			tcf_exts_init(&cp->perfect[i].exts,
-				      TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+			cp->perfect[i].res = p->perfect[i].res;
 		balloc = 1;
 	}
 	cp->h = p->h;
 
-	tcindex_filter_result_init(&new_filter_result);
-	tcindex_filter_result_init(&cr);
+	err = tcindex_filter_result_init(&new_filter_result);
+	if (err < 0)
+		goto errout1;
+	err = tcindex_filter_result_init(&cr);
+	if (err < 0)
+		goto errout1;
 	if (old_r)
 		cr.res = r->res;
 
@@ -338,15 +373,8 @@
 	err = -ENOMEM;
 	if (!cp->perfect && !cp->h) {
 		if (valid_perfect_hash(cp)) {
-			int i;
-
-			cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
-			if (!cp->perfect)
+			if (tcindex_alloc_perfect_hash(cp) < 0)
 				goto errout_alloc;
-			for (i = 0; i < cp->hash; i++)
-				tcf_exts_init(&cp->perfect[i].exts,
-					      TCA_TCINDEX_ACT,
-					      TCA_TCINDEX_POLICE);
 			balloc = 1;
 		} else {
 			struct tcindex_filter __rcu **hash;
@@ -373,8 +401,12 @@
 		if (!f)
 			goto errout_alloc;
 		f->key = handle;
-		tcindex_filter_result_init(&f->result);
 		f->next = NULL;
+		err = tcindex_filter_result_init(&f->result);
+		if (err < 0) {
+			kfree(f);
+			goto errout_alloc;
+		}
 	}
 
 	if (tb[TCA_TCINDEX_CLASSID]) {
@@ -387,8 +419,13 @@
 	else
 		tcf_exts_change(tp, &cr.exts, &e);
 
-	if (old_r && old_r != r)
-		tcindex_filter_result_init(old_r);
+	if (old_r && old_r != r) {
+		err = tcindex_filter_result_init(old_r);
+		if (err < 0) {
+			kfree(f);
+			goto errout_alloc;
+		}
+	}
 
 	oldp = p;
 	r->res = cr.res;
@@ -415,9 +452,12 @@
 
 errout_alloc:
 	if (balloc == 1)
-		kfree(cp->perfect);
+		tcindex_free_perfect_hash(cp);
 	else if (balloc == 2)
 		kfree(cp->h);
+errout1:
+	tcf_exts_destroy(&cr.exts);
+	tcf_exts_destroy(&new_filter_result.exts);
 errout:
 	kfree(cp);
 	tcf_exts_destroy(&e);
@@ -510,7 +550,7 @@
 
 
 static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
-    struct sk_buff *skb, struct tcmsg *t)
+			struct sk_buff *skb, struct tcmsg *t)
 {
 	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ffe593e..ae83c3ae 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -104,7 +104,8 @@
 	return h;
 }
 
-static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res)
+static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+			struct tcf_result *res)
 {
 	struct {
 		struct tc_u_knode *knode;
@@ -256,8 +257,7 @@
 	return -1;
 }
 
-static struct tc_u_hnode *
-u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
+static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 {
 	struct tc_u_hnode *ht;
 
@@ -270,8 +270,7 @@
 	return ht;
 }
 
-static struct tc_u_knode *
-u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
+static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
 {
 	unsigned int sel;
 	struct tc_u_knode *n = NULL;
@@ -360,8 +359,7 @@
 	return 0;
 }
 
-static int u32_destroy_key(struct tcf_proto *tp,
-			   struct tc_u_knode *n,
+static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
 			   bool free_pf)
 {
 	tcf_exts_destroy(&n->exts);
@@ -448,9 +446,8 @@
 	}
 }
 
-static int u32_replace_hw_hnode(struct tcf_proto *tp,
-				 struct tc_u_hnode *h,
-				 u32 flags)
+static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
+				u32 flags)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_u32_offload u32_offload = {0};
@@ -496,9 +493,8 @@
 	}
 }
 
-static int u32_replace_hw_knode(struct tcf_proto *tp,
-				 struct tc_u_knode *n,
-				 u32 flags)
+static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
+				u32 flags)
 {
 	struct net_device *dev = tp->q->dev_queue->dev;
 	struct tc_cls_u32_offload u32_offload = {0};
@@ -709,13 +705,15 @@
 			 struct tc_u_knode *n, struct nlattr **tb,
 			 struct nlattr *est, bool ovr)
 {
-	int err;
 	struct tcf_exts e;
+	int err;
 
-	tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
-	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE);
 	if (err < 0)
 		return err;
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		goto errout;
 
 	err = -EINVAL;
 	if (tb[TCA_U32_LINK]) {
@@ -761,8 +759,7 @@
 	return err;
 }
 
-static void u32_replace_knode(struct tcf_proto *tp,
-			      struct tc_u_common *tp_c,
+static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
 			      struct tc_u_knode *n)
 {
 	struct tc_u_knode __rcu **ins;
@@ -833,15 +830,17 @@
 	new->tp = tp;
 	memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
 
-	tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE);
+	if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
+		kfree(new);
+		return NULL;
+	}
 
 	return new;
 }
 
 static int u32_change(struct net *net, struct sk_buff *in_skb,
 		      struct tcf_proto *tp, unsigned long base, u32 handle,
-		      struct nlattr **tca,
-		      unsigned long *arg, bool ovr)
+		      struct nlattr **tca, unsigned long *arg, bool ovr)
 {
 	struct tc_u_common *tp_c = tp->data;
 	struct tc_u_hnode *ht;
@@ -985,9 +984,12 @@
 	n->handle = handle;
 	n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
 	n->flags = flags;
-	tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
 	n->tp = tp;
 
+	err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+	if (err < 0)
+		goto errout;
+
 #ifdef CONFIG_CLS_U32_MARK
 	n->pcpu_success = alloc_percpu(u32);
 	if (!n->pcpu_success) {
@@ -1028,9 +1030,10 @@
 errhw:
 #ifdef CONFIG_CLS_U32_MARK
 	free_percpu(n->pcpu_success);
-errout:
 #endif
 
+errout:
+	tcf_exts_destroy(&n->exts);
 #ifdef CONFIG_CLS_U32_PERF
 	free_percpu(n->pf);
 #endif
@@ -1079,7 +1082,7 @@
 }
 
 static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
-		     struct sk_buff *skb, struct tcmsg *t)
+		    struct sk_buff *skb, struct tcmsg *t)
 {
 	struct tc_u_knode *n = (struct tc_u_knode *)fh;
 	struct tc_u_hnode *ht_up, *ht_down;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 12ebde8..206dc24 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -29,6 +29,7 @@
 #include <linux/hrtimer.h>
 #include <linux/lockdep.h>
 #include <linux/slab.h>
+#include <linux/hashtable.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -259,37 +260,40 @@
 {
 	struct Qdisc *q;
 
+	if (!qdisc_dev(root))
+		return (root->handle == handle ? root : NULL);
+
 	if (!(root->flags & TCQ_F_BUILTIN) &&
 	    root->handle == handle)
 		return root;
 
-	list_for_each_entry_rcu(q, &root->list, list) {
+	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
 		if (q->handle == handle)
 			return q;
 	}
 	return NULL;
 }
 
-void qdisc_list_add(struct Qdisc *q)
+void qdisc_hash_add(struct Qdisc *q)
 {
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 		struct Qdisc *root = qdisc_dev(q)->qdisc;
 
 		WARN_ON_ONCE(root == &noop_qdisc);
 		ASSERT_RTNL();
-		list_add_tail_rcu(&q->list, &root->list);
+		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
 	}
 }
-EXPORT_SYMBOL(qdisc_list_add);
+EXPORT_SYMBOL(qdisc_hash_add);
 
-void qdisc_list_del(struct Qdisc *q)
+void qdisc_hash_del(struct Qdisc *q)
 {
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
 		ASSERT_RTNL();
-		list_del_rcu(&q->list);
+		hash_del_rcu(&q->hash);
 	}
 }
-EXPORT_SYMBOL(qdisc_list_del);
+EXPORT_SYMBOL(qdisc_hash_del);
 
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
@@ -385,7 +389,8 @@
 
 static struct qdisc_rate_table *qdisc_rtab_list;
 
-struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
+struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
+					struct nlattr *tab)
 {
 	struct qdisc_rate_table *rtab;
 
@@ -537,7 +542,8 @@
 	return -1;
 }
 
-void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
+void __qdisc_calculate_pkt_len(struct sk_buff *skb,
+			       const struct qdisc_size_table *stab)
 {
 	int pkt_len, slot;
 
@@ -884,10 +890,10 @@
    Parameters are passed via opt.
  */
 
-static struct Qdisc *
-qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
-	     struct Qdisc *p, u32 parent, u32 handle,
-	     struct nlattr **tca, int *errp)
+static struct Qdisc *qdisc_create(struct net_device *dev,
+				  struct netdev_queue *dev_queue,
+				  struct Qdisc *p, u32 parent, u32 handle,
+				  struct nlattr **tca, int *errp)
 {
 	int err;
 	struct nlattr *kind = tca[TCA_KIND];
@@ -998,7 +1004,7 @@
 				goto err_out4;
 		}
 
-		qdisc_list_add(sch);
+		qdisc_hash_add(sch);
 
 		return sch;
 	}
@@ -1069,7 +1075,8 @@
 	int			depth;
 };
 
-static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
+static int check_loop_fn(struct Qdisc *q, unsigned long cl,
+			 struct qdisc_walker *w);
 
 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
 {
@@ -1431,10 +1438,11 @@
 
 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
 			      struct netlink_callback *cb,
-			      int *q_idx_p, int s_q_idx)
+			      int *q_idx_p, int s_q_idx, bool recur)
 {
 	int ret = 0, q_idx = *q_idx_p;
 	struct Qdisc *q;
+	int b;
 
 	if (!root)
 		return 0;
@@ -1445,18 +1453,30 @@
 	} else {
 		if (!tc_qdisc_dump_ignore(q) &&
 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				  RTM_NEWQDISC) <= 0)
 			goto done;
 		q_idx++;
 	}
-	list_for_each_entry(q, &root->list, list) {
+
+	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
+	 * itself has already been dumped.
+	 *
+	 * If we've already dumped the top-level (ingress) qdisc above and the global
+	 * qdisc hashtable, we don't want to hit it again
+	 */
+	if (!qdisc_dev(root) || !recur)
+		goto out;
+
+	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
 		if (q_idx < s_q_idx) {
 			q_idx++;
 			continue;
 		}
 		if (!tc_qdisc_dump_ignore(q) &&
 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
-				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
+				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				  RTM_NEWQDISC) <= 0)
 			goto done;
 		q_idx++;
 	}
@@ -1490,13 +1510,14 @@
 			s_q_idx = 0;
 		q_idx = 0;
 
-		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
+		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
+				       true) < 0)
 			goto done;
 
 		dev_queue = dev_ingress_queue(dev);
 		if (dev_queue &&
 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
-				       &q_idx, s_q_idx) < 0)
+				       &q_idx, s_q_idx, false) < 0)
 			goto done;
 
 cont:
@@ -1625,7 +1646,8 @@
 			if (cops->delete)
 				err = cops->delete(q, cl);
 			if (err == 0)
-				tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
+				tclass_notify(net, skb, n, q, cl,
+					      RTM_DELTCLASS);
 			goto out;
 		case RTM_GETTCLASS:
 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
@@ -1723,12 +1745,14 @@
 	struct netlink_callback	*cb;
 };
 
-static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
+static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
+			    struct qdisc_walker *arg)
 {
 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
 
 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
-			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
+			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+			      RTM_NEWTCLASS);
 }
 
 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
@@ -1765,6 +1789,7 @@
 			       int *t_p, int s_t)
 {
 	struct Qdisc *q;
+	int b;
 
 	if (!root)
 		return 0;
@@ -1772,7 +1797,10 @@
 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
 		return -1;
 
-	list_for_each_entry(q, &root->list, list) {
+	if (!qdisc_dev(root))
+		return 0;
+
+	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
 			return -1;
 	}
@@ -1957,10 +1985,12 @@
 
 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
+		      NULL);
 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
-	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
+		      NULL);
 
 	return 0;
 }
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index 4002df3..5bfa79e 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -69,7 +69,7 @@
 static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx)
 {
 	struct Qdisc *sch = ctx;
-	struct sk_buff *skb = __skb_dequeue(&sch->q);
+	struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
 
 	if (skb)
 		sch->qstats.backlog -= qdisc_pkt_len(skb);
@@ -172,7 +172,7 @@
 
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > sch->limit) {
-		struct sk_buff *skb = __skb_dequeue(&sch->q);
+		struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
 
 		dropped += qdisc_pkt_len(skb);
 		qdisc_qstats_backlog_dec(sch, skb);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index baeed6a..1e37247 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -31,7 +31,7 @@
 static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 			 struct sk_buff **to_free)
 {
-	if (likely(skb_queue_len(&sch->q) < sch->limit))
+	if (likely(sch->q.qlen < sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	return qdisc_drop(skb, sch, to_free);
@@ -42,7 +42,7 @@
 {
 	unsigned int prev_backlog;
 
-	if (likely(skb_queue_len(&sch->q) < sch->limit))
+	if (likely(sch->q.qlen < sch->limit))
 		return qdisc_enqueue_tail(skb, sch);
 
 	prev_backlog = sch->qstats.backlog;
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index e5458b9..18e7524 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -86,6 +86,7 @@
 
 	struct rb_root	delayed;	/* for rate limited flows */
 	u64		time_next_delayed_flow;
+	unsigned long	unthrottle_latency_ns;
 
 	struct fq_flow	internal;	/* for non classified or high prio packets */
 	u32		quantum;
@@ -94,6 +95,7 @@
 	u32		flow_max_rate;	/* optional max rate per flow */
 	u32		flow_plimit;	/* max packets per flow */
 	u32		orphan_mask;	/* mask for orphaned skb */
+	u32		low_rate_threshold;
 	struct rb_root	*fq_root;
 	u8		rate_enable;
 	u8		fq_trees_log;
@@ -407,11 +409,19 @@
 
 static void fq_check_throttled(struct fq_sched_data *q, u64 now)
 {
+	unsigned long sample;
 	struct rb_node *p;
 
 	if (q->time_next_delayed_flow > now)
 		return;
 
+	/* Update unthrottle latency EWMA.
+	 * This is cheap and can help diagnosing timer/latency problems.
+	 */
+	sample = (unsigned long)(now - q->time_next_delayed_flow);
+	q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3;
+	q->unthrottle_latency_ns += sample >> 3;
+
 	q->time_next_delayed_flow = ~0ULL;
 	while ((p = rb_first(&q->delayed)) != NULL) {
 		struct fq_flow *f = container_of(p, struct fq_flow, rate_node);
@@ -433,7 +443,7 @@
 	struct fq_flow_head *head;
 	struct sk_buff *skb;
 	struct fq_flow *f;
-	u32 rate;
+	u32 rate, plen;
 
 	skb = fq_dequeue_head(sch, &q->internal);
 	if (skb)
@@ -482,7 +492,7 @@
 	prefetch(&skb->end);
 	f->credit -= qdisc_pkt_len(skb);
 
-	if (f->credit > 0 || !q->rate_enable)
+	if (!q->rate_enable)
 		goto out;
 
 	/* Do not pace locally generated ack packets */
@@ -493,8 +503,15 @@
 	if (skb->sk)
 		rate = min(skb->sk->sk_pacing_rate, rate);
 
+	if (rate <= q->low_rate_threshold) {
+		f->credit = 0;
+		plen = qdisc_pkt_len(skb);
+	} else {
+		plen = max(qdisc_pkt_len(skb), q->quantum);
+		if (f->credit > 0)
+			goto out;
+	}
 	if (rate != ~0U) {
-		u32 plen = max(qdisc_pkt_len(skb), q->quantum);
 		u64 len = (u64)plen * NSEC_PER_SEC;
 
 		if (likely(rate))
@@ -507,7 +524,12 @@
 			len = NSEC_PER_SEC;
 			q->stat_pkts_too_long++;
 		}
-
+		/* Account for schedule/timers drifts.
+		 * f->time_next_packet was set when prior packet was sent,
+		 * and current time (@now) can be too late by tens of us.
+		 */
+		if (f->time_next_packet)
+			len -= min(len/2, now - f->time_next_packet);
 		f->time_next_packet = now + len;
 	}
 out:
@@ -662,6 +684,7 @@
 	[TCA_FQ_FLOW_MAX_RATE]		= { .type = NLA_U32 },
 	[TCA_FQ_BUCKETS_LOG]		= { .type = NLA_U32 },
 	[TCA_FQ_FLOW_REFILL_DELAY]	= { .type = NLA_U32 },
+	[TCA_FQ_LOW_RATE_THRESHOLD]	= { .type = NLA_U32 },
 };
 
 static int fq_change(struct Qdisc *sch, struct nlattr *opt)
@@ -716,6 +739,10 @@
 	if (tb[TCA_FQ_FLOW_MAX_RATE])
 		q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]);
 
+	if (tb[TCA_FQ_LOW_RATE_THRESHOLD])
+		q->low_rate_threshold =
+			nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]);
+
 	if (tb[TCA_FQ_RATE_ENABLE]) {
 		u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]);
 
@@ -774,6 +801,7 @@
 	q->initial_quantum	= 10 * psched_mtu(qdisc_dev(sch));
 	q->flow_refill_delay	= msecs_to_jiffies(40);
 	q->flow_max_rate	= ~0U;
+	q->time_next_delayed_flow = ~0ULL;
 	q->rate_enable		= 1;
 	q->new_flows.first	= NULL;
 	q->old_flows.first	= NULL;
@@ -781,6 +809,7 @@
 	q->fq_root		= NULL;
 	q->fq_trees_log		= ilog2(1024);
 	q->orphan_mask		= 1024 - 1;
+	q->low_rate_threshold	= 550000 / 8;
 	qdisc_watchdog_init(&q->watchdog, sch);
 
 	if (opt)
@@ -811,6 +840,8 @@
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
 			jiffies_to_usecs(q->flow_refill_delay)) ||
 	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
+	    nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD,
+			q->low_rate_threshold) ||
 	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
 		goto nla_put_failure;
 
@@ -823,20 +854,24 @@
 static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
-	u64 now = ktime_get_ns();
-	struct tc_fq_qd_stats st = {
-		.gc_flows		= q->stat_gc_flows,
-		.highprio_packets	= q->stat_internal_packets,
-		.tcp_retrans		= q->stat_tcp_retrans,
-		.throttled		= q->stat_throttled,
-		.flows_plimit		= q->stat_flows_plimit,
-		.pkts_too_long		= q->stat_pkts_too_long,
-		.allocation_errors	= q->stat_allocation_errors,
-		.flows			= q->flows,
-		.inactive_flows		= q->inactive_flows,
-		.throttled_flows	= q->throttled_flows,
-		.time_next_delayed_flow	= q->time_next_delayed_flow - now,
-	};
+	struct tc_fq_qd_stats st;
+
+	sch_tree_lock(sch);
+
+	st.gc_flows		  = q->stat_gc_flows;
+	st.highprio_packets	  = q->stat_internal_packets;
+	st.tcp_retrans		  = q->stat_tcp_retrans;
+	st.throttled		  = q->stat_throttled;
+	st.flows_plimit		  = q->stat_flows_plimit;
+	st.pkts_too_long	  = q->stat_pkts_too_long;
+	st.allocation_errors	  = q->stat_allocation_errors;
+	st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns();
+	st.flows		  = q->flows;
+	st.inactive_flows	  = q->inactive_flows;
+	st.throttled_flows	  = q->throttled_flows;
+	st.unthrottle_latency_ns  = min_t(unsigned long,
+					  q->unthrottle_latency_ns, ~0U);
+	sch_tree_unlock(sch);
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
 }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e95b67c..6cfb6e9 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -423,7 +423,6 @@
 	.dequeue	=	noop_dequeue,
 	.flags		=	TCQ_F_BUILTIN,
 	.ops		=	&noop_qdisc_ops,
-	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
 	.running	=	SEQCNT_ZERO(noop_qdisc.running),
@@ -467,7 +466,7 @@
  */
 struct pfifo_fast_priv {
 	u32 bitmap;
-	struct sk_buff_head q[PFIFO_FAST_BANDS];
+	struct qdisc_skb_head q[PFIFO_FAST_BANDS];
 };
 
 /*
@@ -478,7 +477,7 @@
  */
 static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0};
 
-static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
+static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv,
 					     int band)
 {
 	return priv->q + band;
@@ -487,10 +486,10 @@
 static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
 			      struct sk_buff **to_free)
 {
-	if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
+	if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) {
 		int band = prio2band[skb->priority & TC_PRIO_MAX];
 		struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
-		struct sk_buff_head *list = band2list(priv, band);
+		struct qdisc_skb_head *list = band2list(priv, band);
 
 		priv->bitmap |= (1 << band);
 		qdisc->q.qlen++;
@@ -506,11 +505,16 @@
 	int band = bitmap2band[priv->bitmap];
 
 	if (likely(band >= 0)) {
-		struct sk_buff_head *list = band2list(priv, band);
-		struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list);
+		struct qdisc_skb_head *qh = band2list(priv, band);
+		struct sk_buff *skb = __qdisc_dequeue_head(qh);
+
+		if (likely(skb != NULL)) {
+			qdisc_qstats_backlog_dec(qdisc, skb);
+			qdisc_bstats_update(qdisc, skb);
+		}
 
 		qdisc->q.qlen--;
-		if (skb_queue_empty(list))
+		if (qh->qlen == 0)
 			priv->bitmap &= ~(1 << band);
 
 		return skb;
@@ -525,9 +529,9 @@
 	int band = bitmap2band[priv->bitmap];
 
 	if (band >= 0) {
-		struct sk_buff_head *list = band2list(priv, band);
+		struct qdisc_skb_head *qh = band2list(priv, band);
 
-		return skb_peek(list);
+		return qh->head;
 	}
 
 	return NULL;
@@ -565,7 +569,7 @@
 	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
 
 	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
-		__skb_queue_head_init(band2list(priv, prio));
+		qdisc_skb_head_init(band2list(priv, prio));
 
 	/* Can by-pass the queue discipline */
 	qdisc->flags |= TCQ_F_CAN_BYPASS;
@@ -613,8 +617,8 @@
 		sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
 		sch->padded = (char *) sch - (char *) p;
 	}
-	INIT_LIST_HEAD(&sch->list);
-	skb_queue_head_init(&sch->q);
+	qdisc_skb_head_init(&sch->q);
+	spin_lock_init(&sch->q.lock);
 
 	spin_lock_init(&sch->busylock);
 	lockdep_set_class(&sch->busylock,
@@ -643,18 +647,19 @@
 	struct Qdisc *sch;
 
 	if (!try_module_get(ops->owner))
-		goto errout;
+		return NULL;
 
 	sch = qdisc_alloc(dev_queue, ops);
-	if (IS_ERR(sch))
-		goto errout;
+	if (IS_ERR(sch)) {
+		module_put(ops->owner);
+		return NULL;
+	}
 	sch->parent = parentid;
 
 	if (!ops->init || ops->init(sch, NULL) == 0)
 		return sch;
 
 	qdisc_destroy(sch);
-errout:
 	return NULL;
 }
 EXPORT_SYMBOL(qdisc_create_dflt);
@@ -700,7 +705,7 @@
 		return;
 
 #ifdef CONFIG_NET_SCHED
-	qdisc_list_del(qdisc);
+	qdisc_hash_del(qdisc);
 
 	qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
@@ -788,6 +793,10 @@
 			qdisc->ops->attach(qdisc);
 		}
 	}
+#ifdef CONFIG_NET_SCHED
+	if (dev->qdisc)
+		qdisc_hash_add(dev->qdisc);
+#endif
 }
 
 static void transition_one_qdisc(struct net_device *dev,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3ddc7bd..000f1d3 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -142,8 +142,6 @@
 					   link-sharing, max(myf, cfmin) */
 	u64	cl_myf;			/* my fit-time (calculated from this
 					   class's own upperlimit curve) */
-	u64	cl_myfadj;		/* my fit-time adjustment (to cancel
-					   history dependence) */
 	u64	cl_cfmin;		/* earliest children's fit-time (used
 					   with cl_myf to obtain cl_f) */
 	u64	cl_cvtmin;		/* minimal virtual time among the
@@ -151,11 +149,8 @@
 					   (monotonic within a period) */
 	u64	cl_vtadj;		/* intra-period cumulative vt
 					   adjustment */
-	u64	cl_vtoff;		/* inter-period cumulative vt offset */
-	u64	cl_cvtmax;		/* max child's vt in the last period */
-	u64	cl_cvtoff;		/* cumulative cvtmax of all periods */
-	u64	cl_pcvtoff;		/* parent's cvtoff at initialization
-					   time */
+	u64	cl_cvtoff;		/* largest virtual time seen among
+					   the children */
 
 	struct internal_sc cl_rsc;	/* internal real-time service curve */
 	struct internal_sc cl_fsc;	/* internal fair service curve */
@@ -701,28 +696,16 @@
 			} else {
 				/*
 				 * first child for a new parent backlog period.
-				 * add parent's cvtmax to cvtoff to make a new
-				 * vt (vtoff + vt) larger than the vt in the
-				 * last period for all children.
+				 * initialize cl_vt to the highest value seen
+				 * among the siblings. this is analogous to
+				 * what cur_time would provide in realtime case.
 				 */
-				vt = cl->cl_parent->cl_cvtmax;
-				cl->cl_parent->cl_cvtoff += vt;
-				cl->cl_parent->cl_cvtmax = 0;
+				cl->cl_vt = cl->cl_parent->cl_cvtoff;
 				cl->cl_parent->cl_cvtmin = 0;
-				cl->cl_vt = 0;
 			}
 
-			cl->cl_vtoff = cl->cl_parent->cl_cvtoff -
-							cl->cl_pcvtoff;
-
 			/* update the virtual curve */
-			vt = cl->cl_vt + cl->cl_vtoff;
-			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
-						      cl->cl_total);
-			if (cl->cl_virtual.x == vt) {
-				cl->cl_virtual.x -= cl->cl_vtoff;
-				cl->cl_vtoff = 0;
-			}
+			rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
 			cl->cl_vtadj = 0;
 
 			cl->cl_vtperiod++;  /* increment vt period */
@@ -745,7 +728,6 @@
 				/* compute myf */
 				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
 						      cl->cl_total);
-				cl->cl_myfadj = 0;
 			}
 		}
 
@@ -779,8 +761,7 @@
 			go_passive = 0;
 
 		/* update vt */
-		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
-			    - cl->cl_vtoff + cl->cl_vtadj;
+		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj;
 
 		/*
 		 * if vt of the class is smaller than cvtmin,
@@ -795,9 +776,9 @@
 		if (go_passive) {
 			/* no more active child, going passive */
 
-			/* update cvtmax of the parent class */
-			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
-				cl->cl_parent->cl_cvtmax = cl->cl_vt;
+			/* update cvtoff of the parent class */
+			if (cl->cl_vt > cl->cl_parent->cl_cvtoff)
+				cl->cl_parent->cl_cvtoff = cl->cl_vt;
 
 			/* remove this class from the vt tree */
 			vttree_remove(cl);
@@ -813,9 +794,10 @@
 
 		/* update f */
 		if (cl->cl_flags & HFSC_USC) {
+			cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
+#if 0
 			cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
 							      cl->cl_total);
-#if 0
 			/*
 			 * This code causes classes to stay way under their
 			 * limit when multiple classes are used at gigabit
@@ -940,7 +922,7 @@
 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
 {
 	sc2isc(fsc, &cl->cl_fsc);
-	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total);
+	rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
 	cl->cl_flags |= HFSC_FSC;
 }
 
@@ -1094,7 +1076,6 @@
 	if (parent->level == 0)
 		hfsc_purge_queue(sch, parent);
 	hfsc_adjust_levels(parent);
-	cl->cl_pcvtoff = parent->cl_cvtoff;
 	sch_tree_unlock(sch);
 
 	qdisc_class_hash_grow(sch, &q->clhash);
@@ -1482,16 +1463,12 @@
 	cl->cl_e            = 0;
 	cl->cl_vt           = 0;
 	cl->cl_vtadj        = 0;
-	cl->cl_vtoff        = 0;
 	cl->cl_cvtmin       = 0;
-	cl->cl_cvtmax       = 0;
 	cl->cl_cvtoff       = 0;
-	cl->cl_pcvtoff      = 0;
 	cl->cl_vtperiod     = 0;
 	cl->cl_parentperiod = 0;
 	cl->cl_f            = 0;
 	cl->cl_myf          = 0;
-	cl->cl_myfadj       = 0;
 	cl->cl_cfmin        = 0;
 	cl->cl_nactive      = 0;
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 53dbfa1..c798d0d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -162,7 +162,7 @@
 	struct work_struct	work;
 
 	/* non shaped skbs; let them go directly thru */
-	struct sk_buff_head	direct_queue;
+	struct qdisc_skb_head	direct_queue;
 	long			direct_pkts;
 
 	struct qdisc_watchdog	watchdog;
@@ -570,6 +570,22 @@
 	list_del_init(&cl->un.leaf.drop_list);
 }
 
+static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
+			     struct qdisc_skb_head *qh)
+{
+	struct sk_buff *last = qh->tail;
+
+	if (last) {
+		skb->next = NULL;
+		last->next = skb;
+		qh->tail = skb;
+	} else {
+		qh->tail = skb;
+		qh->head = skb;
+	}
+	qh->qlen++;
+}
+
 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		       struct sk_buff **to_free)
 {
@@ -580,7 +596,7 @@
 	if (cl == HTB_DIRECT) {
 		/* enqueue to helper queue */
 		if (q->direct_queue.qlen < q->direct_qlen) {
-			__skb_queue_tail(&q->direct_queue, skb);
+			htb_enqueue_tail(skb, sch, &q->direct_queue);
 			q->direct_pkts++;
 		} else {
 			return qdisc_drop(skb, sch, to_free);
@@ -888,7 +904,7 @@
 	unsigned long start_at;
 
 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
-	skb = __skb_dequeue(&q->direct_queue);
+	skb = __qdisc_dequeue_head(&q->direct_queue);
 	if (skb != NULL) {
 ok:
 		qdisc_bstats_update(sch, skb);
@@ -1019,7 +1035,7 @@
 
 	qdisc_watchdog_init(&q->watchdog, sch);
 	INIT_WORK(&q->work, htb_work_func);
-	__skb_queue_head_init(&q->direct_queue);
+	qdisc_skb_head_init(&q->direct_queue);
 
 	if (tb[TCA_HTB_DIRECT_QLEN])
 		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index b943982..2bc8d7f 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -88,7 +88,7 @@
 			qdisc_destroy(old);
 #ifdef CONFIG_NET_SCHED
 		if (ntx < dev->real_num_tx_queues)
-			qdisc_list_add(qdisc);
+			qdisc_hash_add(qdisc);
 #endif
 
 	}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 549c663..b5c502c 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -182,7 +182,7 @@
 		if (old)
 			qdisc_destroy(old);
 		if (ntx < dev->real_num_tx_queues)
-			qdisc_list_add(qdisc);
+			qdisc_hash_add(qdisc);
 	}
 	kfree(priv->qdiscs);
 	priv->qdiscs = NULL;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index aaaf021..9f7b380 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -413,6 +413,16 @@
 	return segs;
 }
 
+static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb)
+{
+	skb->next = qh->head;
+
+	if (!qh->head)
+		qh->tail = skb;
+	qh->head = skb;
+	qh->qlen++;
+}
+
 /*
  * Insert one skb into qdisc.
  * Note: parent depends on return value to account for queue length.
@@ -502,7 +512,7 @@
 			1<<(prandom_u32() % 8);
 	}
 
-	if (unlikely(skb_queue_len(&sch->q) >= sch->limit))
+	if (unlikely(sch->q.qlen >= sch->limit))
 		return qdisc_drop(skb, sch, to_free);
 
 	qdisc_qstats_backlog_inc(sch, skb);
@@ -522,8 +532,8 @@
 		if (q->rate) {
 			struct sk_buff *last;
 
-			if (!skb_queue_empty(&sch->q))
-				last = skb_peek_tail(&sch->q);
+			if (sch->q.qlen)
+				last = sch->q.tail;
 			else
 				last = netem_rb_to_skb(rb_last(&q->t_root));
 			if (last) {
@@ -552,7 +562,7 @@
 		cb->time_to_send = psched_get_time();
 		q->counter = 0;
 
-		__skb_queue_head(&sch->q, skb);
+		netem_enqueue_skb_head(&sch->q, skb);
 		sch->qstats.requeues++;
 	}
 
@@ -587,7 +597,7 @@
 	struct rb_node *p;
 
 tfifo_dequeue:
-	skb = __skb_dequeue(&sch->q);
+	skb = __qdisc_dequeue_head(&sch->q);
 	if (skb) {
 		qdisc_qstats_backlog_dec(sch, skb);
 deliver:
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index a570b0b..5c3a99d 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -231,7 +231,7 @@
 	/* Drop excess packets if new limit is lower */
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > sch->limit) {
-		struct sk_buff *skb = __skb_dequeue(&sch->q);
+		struct sk_buff *skb = __qdisc_dequeue_head(&sch->q);
 
 		dropped += qdisc_pkt_len(skb);
 		qdisc_qstats_backlog_dec(sch, skb);
@@ -511,7 +511,7 @@
 static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
 {
 	struct sk_buff *skb;
-	skb = __qdisc_dequeue_head(sch, &sch->q);
+	skb = qdisc_dequeue_head(sch);
 
 	if (!skb)
 		return NULL;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f27ffee..ca0516e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -1153,6 +1153,7 @@
 	if (!skb)
 		return NULL;
 
+	qdisc_qstats_backlog_dec(sch, skb);
 	sch->q.qlen--;
 	qdisc_bstats_update(sch, skb);
 
@@ -1256,6 +1257,7 @@
 	}
 
 	bstats_update(&cl->bstats, skb);
+	qdisc_qstats_backlog_inc(sch, skb);
 	++sch->q.qlen;
 
 	agg = cl->agg;
@@ -1476,6 +1478,7 @@
 			qdisc_reset(cl->qdisc);
 		}
 	}
+	sch->qstats.backlog = 0;
 	sch->q.qlen = 0;
 }
 
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index add3cc7..20a350b 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -400,6 +400,7 @@
 enqueue:
 	ret = qdisc_enqueue(skb, child, to_free);
 	if (likely(ret == NET_XMIT_SUCCESS)) {
+		qdisc_qstats_backlog_inc(sch, skb);
 		sch->q.qlen++;
 		increment_qlen(skb, q);
 	} else if (net_xmit_drop_count(ret)) {
@@ -428,6 +429,7 @@
 
 	if (skb) {
 		qdisc_bstats_update(sch, skb);
+		qdisc_qstats_backlog_dec(sch, skb);
 		sch->q.qlen--;
 		decrement_qlen(skb, q);
 	}
@@ -450,6 +452,7 @@
 	struct sfb_sched_data *q = qdisc_priv(sch);
 
 	qdisc_reset(q->qdisc);
+	sch->qstats.backlog = 0;
 	sch->q.qlen = 0;
 	q->slot = 0;
 	q->double_buffering = false;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 1c23060..f10d339 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1408,7 +1408,7 @@
 				transports) {
 		if (t->pmtu_pending && t->dst) {
 			sctp_transport_update_pmtu(sk, t,
-						   WORD_TRUNC(dst_mtu(t->dst)));
+						   SCTP_TRUNC4(dst_mtu(t->dst)));
 			t->pmtu_pending = 0;
 		}
 		if (!pmtu || (t->pathmtu < pmtu))
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 912eb16..f99d485 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -48,7 +48,7 @@
 		/* id 2 is reserved as well */
 		.hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2,
 	},
-#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE)
+#if IS_ENABLED(CONFIG_CRYPTO_SHA256)
 	{
 		.hmac_id = SCTP_AUTH_HMAC_ID_SHA256,
 		.hmac_name = "hmac(sha256)",
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index a55e547..7a1cdf4 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -70,6 +70,19 @@
 	return msg;
 }
 
+void sctp_datamsg_free(struct sctp_datamsg *msg)
+{
+	struct sctp_chunk *chunk;
+
+	/* This doesn't have to be a _safe vairant because
+	 * sctp_chunk_free() only drops the refs.
+	 */
+	list_for_each_entry(chunk, &msg->chunks, frag_list)
+		sctp_chunk_free(chunk);
+
+	sctp_datamsg_put(msg);
+}
+
 /* Final destructruction of datamsg memory. */
 static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
 {
@@ -179,12 +192,18 @@
 			 msg, msg->expires_at, jiffies);
 	}
 
+	if (asoc->peer.prsctp_capable &&
+	    SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
+		msg->expires_at =
+			jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
+
 	/* This is the biggest possible DATA chunk that can fit into
 	 * the packet
 	 */
-	max_data = (asoc->pathmtu -
-		sctp_sk(asoc->base.sk)->pf->af->net_header_len -
-		sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3;
+	max_data = asoc->pathmtu -
+		   sctp_sk(asoc->base.sk)->pf->af->net_header_len -
+		   sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk);
+	max_data = SCTP_TRUNC4(max_data);
 
 	max = asoc->frag_point;
 	/* If the the peer requested that we authenticate DATA chunks
@@ -195,8 +214,8 @@
 		struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc);
 
 		if (hmac_desc)
-			max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) +
-					    hmac_desc->hmac_len);
+			max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) +
+					      hmac_desc->hmac_len);
 	}
 
 	/* Now, check if we need to reduce our max */
@@ -216,7 +235,7 @@
 	    asoc->outqueue.out_qlen == 0 &&
 	    list_empty(&asoc->outqueue.retransmit) &&
 	    msg_len > max)
-		max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t));
+		max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t));
 
 	/* Encourage Cookie-ECHO bundling. */
 	if (asoc->state < SCTP_STATE_COOKIE_ECHOED)
@@ -335,7 +354,7 @@
 /* Check whether this message has expired. */
 int sctp_chunk_abandoned(struct sctp_chunk *chunk)
 {
-	if (!chunk->asoc->prsctp_enable ||
+	if (!chunk->asoc->peer.prsctp_capable ||
 	    !SCTP_PR_POLICY(chunk->sinfo.sinfo_flags)) {
 		struct sctp_datamsg *msg = chunk->msg;
 
@@ -349,14 +368,14 @@
 	}
 
 	if (SCTP_PR_TTL_ENABLED(chunk->sinfo.sinfo_flags) &&
-	    time_after(jiffies, chunk->prsctp_param)) {
+	    time_after(jiffies, chunk->msg->expires_at)) {
 		if (chunk->sent_count)
 			chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
 		else
 			chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
 		return 1;
 	} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
-		   chunk->sent_count > chunk->prsctp_param) {
+		   chunk->sent_count > chunk->sinfo.sinfo_timetolive) {
 		chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
 		return 1;
 	}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index c182db7..a2ea1d1 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -119,7 +119,13 @@
 		       skb_transport_offset(skb))
 		goto discard_it;
 
-	if (!pskb_may_pull(skb, sizeof(struct sctphdr)))
+	/* If the packet is fragmented and we need to do crc checking,
+	 * it's better to just linearize it otherwise crc computing
+	 * takes longer.
+	 */
+	if ((!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) &&
+	     skb_linearize(skb)) ||
+	    !pskb_may_pull(skb, sizeof(struct sctphdr)))
 		goto discard_it;
 
 	/* Pull up the IP header. */
@@ -599,7 +605,7 @@
 		/* PMTU discovery (RFC1191) */
 		if (ICMP_FRAG_NEEDED == code) {
 			sctp_icmp_frag_needed(sk, asoc, transport,
-					      WORD_TRUNC(info));
+					      SCTP_TRUNC4(info));
 			goto out_unlock;
 		} else {
 			if (ICMP_PROT_UNREACH == code) {
@@ -667,7 +673,7 @@
 		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
 			break;
 
-		ch_end = offset + WORD_ROUND(ntohs(ch->length));
+		ch_end = offset + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb->len)
 			break;
 
@@ -790,27 +796,34 @@
 static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
 				const void *ptr)
 {
+	struct sctp_transport *t = (struct sctp_transport *)ptr;
 	const struct sctp_hash_cmp_arg *x = arg->key;
-	const struct sctp_transport *t = ptr;
-	struct sctp_association *asoc = t->asoc;
-	const struct net *net = x->net;
+	struct sctp_association *asoc;
+	int err = 1;
 
 	if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
-		return 1;
-	if (!net_eq(sock_net(asoc->base.sk), net))
-		return 1;
+		return err;
+	if (!sctp_transport_hold(t))
+		return err;
+
+	asoc = t->asoc;
+	if (!net_eq(sock_net(asoc->base.sk), x->net))
+		goto out;
 	if (x->ep) {
 		if (x->ep != asoc->ep)
-			return 1;
+			goto out;
 	} else {
 		if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
-			return 1;
+			goto out;
 		if (!sctp_bind_addr_match(&asoc->base.bind_addr,
 					  x->laddr, sctp_sk(asoc->base.sk)))
-			return 1;
+			goto out;
 	}
 
-	return 0;
+	err = 0;
+out:
+	sctp_transport_put(t);
+	return err;
 }
 
 static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
@@ -1115,7 +1128,7 @@
 		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
 			break;
 
-		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb_tail_pointer(skb))
 			break;
 
@@ -1177,9 +1190,6 @@
 	if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP)
 		return NULL;
 
-	if (skb_linearize(skb))
-		return NULL;
-
 	ch = (sctp_chunkhdr_t *) skb->data;
 
 	/* The code below will attempt to walk the chunk and extract
@@ -1187,7 +1197,7 @@
 	 * that the chunk length doesn't cause overflow.  Otherwise, we'll
 	 * walk off the end.
 	 */
-	if (WORD_ROUND(ntohs(ch->length)) > skb->len)
+	if (SCTP_PAD4(ntohs(ch->length)) > skb->len)
 		return NULL;
 
 	/* If this is INIT/INIT-ACK look inside the chunk too. */
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index c30ddb0..f731de3e8 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -170,19 +170,6 @@
 
 		chunk = list_entry(entry, struct sctp_chunk, list);
 
-		/* Linearize if it's not GSO */
-		if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP &&
-		    skb_is_nonlinear(chunk->skb)) {
-			if (skb_linearize(chunk->skb)) {
-				__SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS);
-				sctp_chunk_free(chunk);
-				goto next_chunk;
-			}
-
-			/* Update sctp_hdr as it probably changed */
-			chunk->sctp_hdr = sctp_hdr(chunk->skb);
-		}
-
 		if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) {
 			/* GSO-marked skbs but without frags, handle
 			 * them normally
@@ -226,7 +213,7 @@
 	}
 
 	chunk->chunk_hdr = ch;
-	chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+	chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 	skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t));
 	chunk->subh.v = NULL; /* Subheader is no longer valid.  */
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 1f1682b..2a5c189 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -180,7 +180,6 @@
 				       int one_packet, gfp_t gfp)
 {
 	sctp_xmit_t retval;
-	int error = 0;
 
 	pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__,
 		 packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
@@ -188,6 +187,8 @@
 	switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
 	case SCTP_XMIT_PMTU_FULL:
 		if (!packet->has_cookie_echo) {
+			int error = 0;
+
 			error = sctp_packet_transmit(packet, gfp);
 			if (error < 0)
 				chunk->skb->sk->sk_err = -error;
@@ -296,7 +297,7 @@
 					      struct sctp_chunk *chunk)
 {
 	sctp_xmit_t retval = SCTP_XMIT_OK;
-	__u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length));
+	__u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length));
 
 	/* Check to see if this chunk will fit into the packet */
 	retval = sctp_packet_will_fit(packet, chunk, chunk_len);
@@ -441,14 +442,14 @@
 			 * time. Application may notice this error.
 			 */
 			pr_err_once("Trying to GSO but underlying device doesn't support it.");
-			goto nomem;
+			goto err;
 		}
 	} else {
 		pkt_size = packet->size;
 	}
 	head = alloc_skb(pkt_size + MAX_HEADER, gfp);
 	if (!head)
-		goto nomem;
+		goto err;
 	if (gso) {
 		NAPI_GRO_CB(head)->last = head;
 		skb_shinfo(head)->gso_type = sk->sk_gso_type;
@@ -469,8 +470,12 @@
 		}
 	}
 	dst = dst_clone(tp->dst);
-	if (!dst)
-		goto no_route;
+	if (!dst) {
+		if (asoc)
+			IP_INC_STATS(sock_net(asoc->base.sk),
+				     IPSTATS_MIB_OUTNOROUTES);
+		goto nodst;
+	}
 	skb_dst_set(head, dst);
 
 	/* Build the SCTP header.  */
@@ -503,7 +508,7 @@
 		if (gso) {
 			pkt_size = packet->overhead;
 			list_for_each_entry(chunk, &packet->chunk_list, list) {
-				int padded = WORD_ROUND(chunk->skb->len);
+				int padded = SCTP_PAD4(chunk->skb->len);
 
 				if (pkt_size + padded > tp->pathmtu)
 					break;
@@ -533,7 +538,7 @@
 		 * included in the chunk length field.  The sender should
 		 * never pad with more than 3 bytes.
 		 *
-		 * [This whole comment explains WORD_ROUND() below.]
+		 * [This whole comment explains SCTP_PAD4() below.]
 		 */
 
 		pkt_size -= packet->overhead;
@@ -555,7 +560,7 @@
 				has_data = 1;
 			}
 
-			padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
+			padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len;
 			if (padding)
 				memset(skb_put(chunk->skb, padding), 0, padding);
 
@@ -582,7 +587,7 @@
 			 * acknowledged or have failed.
 			 * Re-queue auth chunks if needed.
 			 */
-			pkt_size -= WORD_ROUND(chunk->skb->len);
+			pkt_size -= SCTP_PAD4(chunk->skb->len);
 
 			if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
 				sctp_chunk_free(chunk);
@@ -621,8 +626,10 @@
 		if (!gso)
 			break;
 
-		if (skb_gro_receive(&head, nskb))
+		if (skb_gro_receive(&head, nskb)) {
+			kfree_skb(nskb);
 			goto nomem;
+		}
 		nskb = NULL;
 		if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
 				 sk->sk_gso_max_segs))
@@ -716,18 +723,13 @@
 	}
 	head->ignore_df = packet->ipfragok;
 	tp->af_specific->sctp_xmit(head, tp);
+	goto out;
 
-out:
-	sctp_packet_reset(packet);
-	return err;
-no_route:
-	kfree_skb(head);
-	if (nskb != head)
-		kfree_skb(nskb);
+nomem:
+	if (packet->auth && list_empty(&packet->auth->list))
+		sctp_chunk_free(packet->auth);
 
-	if (asoc)
-		IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
-
+nodst:
 	/* FIXME: Returning the 'err' will effect all the associations
 	 * associated with a socket, although only one of the paths of the
 	 * association is unreachable.
@@ -736,22 +738,18 @@
 	 * required.
 	 */
 	 /* err = -EHOSTUNREACH; */
-err:
-	/* Control chunks are unreliable so just drop them.  DATA chunks
-	 * will get resent or dropped later.
-	 */
+	kfree_skb(head);
 
+err:
 	list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
 		list_del_init(&chunk->list);
 		if (!sctp_chunk_is_data(chunk))
 			sctp_chunk_free(chunk);
 	}
-	goto out;
-nomem:
-	if (packet->auth && list_empty(&packet->auth->list))
-		sctp_chunk_free(packet->auth);
-	err = -ENOMEM;
-	goto err;
+
+out:
+	sctp_packet_reset(packet);
+	return err;
 }
 
 /********************************************************************
@@ -878,7 +876,7 @@
 					struct sctp_chunk *chunk,
 					u16 chunk_len)
 {
-	size_t psize, pmtu;
+	size_t psize, pmtu, maxsize;
 	sctp_xmit_t retval = SCTP_XMIT_OK;
 
 	psize = packet->size;
@@ -906,6 +904,17 @@
 			goto out;
 		}
 
+		/* Similarly, if this chunk was built before a PMTU
+		 * reduction, we have to fragment it at IP level now. So
+		 * if the packet already contains something, we need to
+		 * flush.
+		 */
+		maxsize = pmtu - packet->overhead;
+		if (packet->auth)
+			maxsize -= SCTP_PAD4(packet->auth->skb->len);
+		if (chunk_len > maxsize)
+			retval = SCTP_XMIT_PMTU_FULL;
+
 		/* It is also okay to fragment if the chunk we are
 		 * adding is a control chunk, but only if current packet
 		 * is not a GSO one otherwise it causes fragmentation of
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 72e54a4..5825853 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -68,7 +68,7 @@
 
 static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
 
-static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
+static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
 
 /* Add data to the front of the queue. */
 static inline void sctp_outq_head_data(struct sctp_outq *q,
@@ -285,10 +285,9 @@
 }
 
 /* Put a new chunk in an sctp_outq.  */
-int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
+void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
 {
 	struct net *net = sock_net(q->asoc->base.sk);
-	int error = 0;
 
 	pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk,
 		 chunk && chunk->chunk_hdr ?
@@ -299,54 +298,26 @@
 	 * immediately.
 	 */
 	if (sctp_chunk_is_data(chunk)) {
-		/* Is it OK to queue data chunks?  */
-		/* From 9. Termination of Association
-		 *
-		 * When either endpoint performs a shutdown, the
-		 * association on each peer will stop accepting new
-		 * data from its user and only deliver data in queue
-		 * at the time of sending or receiving the SHUTDOWN
-		 * chunk.
-		 */
-		switch (q->asoc->state) {
-		case SCTP_STATE_CLOSED:
-		case SCTP_STATE_SHUTDOWN_PENDING:
-		case SCTP_STATE_SHUTDOWN_SENT:
-		case SCTP_STATE_SHUTDOWN_RECEIVED:
-		case SCTP_STATE_SHUTDOWN_ACK_SENT:
-			/* Cannot send after transport endpoint shutdown */
-			error = -ESHUTDOWN;
-			break;
+		pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
+			 __func__, q, chunk, chunk && chunk->chunk_hdr ?
+			 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
+			 "illegal chunk");
 
-		default:
-			pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
-				 __func__, q, chunk, chunk && chunk->chunk_hdr ?
-				 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
-				 "illegal chunk");
-
-			sctp_chunk_hold(chunk);
-			sctp_outq_tail_data(q, chunk);
-			if (chunk->asoc->prsctp_enable &&
-			    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
-				chunk->asoc->sent_cnt_removable++;
-			if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
-				SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
-			else
-				SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
-			break;
-		}
+		sctp_outq_tail_data(q, chunk);
+		if (chunk->asoc->peer.prsctp_capable &&
+		    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
+			chunk->asoc->sent_cnt_removable++;
+		if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
+			SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
+		else
+			SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
 	} else {
 		list_add_tail(&chunk->list, &q->control_chunk_list);
 		SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
 	}
 
-	if (error < 0)
-		return error;
-
 	if (!q->cork)
-		error = sctp_outq_flush(q, 0, gfp);
-
-	return error;
+		sctp_outq_flush(q, 0, gfp);
 }
 
 /* Insert a chunk into the sorted list based on the TSNs.  The retransmit list
@@ -383,7 +354,7 @@
 
 	list_for_each_entry_safe(chk, temp, queue, transmitted_list) {
 		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
-		    chk->prsctp_param <= sinfo->sinfo_timetolive)
+		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
 			continue;
 
 		list_del_init(&chk->transmitted_list);
@@ -418,7 +389,7 @@
 
 	list_for_each_entry_safe(chk, temp, queue, list) {
 		if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
-		    chk->prsctp_param <= sinfo->sinfo_timetolive)
+		    chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
 			continue;
 
 		list_del_init(&chk->list);
@@ -442,7 +413,7 @@
 {
 	struct sctp_transport *transport;
 
-	if (!asoc->prsctp_enable || !asoc->sent_cnt_removable)
+	if (!asoc->peer.prsctp_capable || !asoc->sent_cnt_removable)
 		return;
 
 	msg_len = sctp_prsctp_prune_sent(asoc, sinfo,
@@ -559,7 +530,6 @@
 		     sctp_retransmit_reason_t reason)
 {
 	struct net *net = sock_net(q->asoc->base.sk);
-	int error = 0;
 
 	switch (reason) {
 	case SCTP_RTXR_T3_RTX:
@@ -603,10 +573,7 @@
 	 * will be flushed at the end.
 	 */
 	if (reason != SCTP_RTXR_FAST_RTX)
-		error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
-
-	if (error)
-		q->asoc->base.sk->sk_err = -error;
+		sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC);
 }
 
 /*
@@ -778,12 +745,12 @@
 }
 
 /* Cork the outqueue so queued chunks are really queued. */
-int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
+void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp)
 {
 	if (q->cork)
 		q->cork = 0;
 
-	return sctp_outq_flush(q, 0, gfp);
+	sctp_outq_flush(q, 0, gfp);
 }
 
 
@@ -796,7 +763,7 @@
  * locking concerns must be made.  Today we use the sock lock to protect
  * this function.
  */
-static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
+static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
 {
 	struct sctp_packet *packet;
 	struct sctp_packet singleton;
@@ -919,8 +886,10 @@
 			sctp_packet_config(&singleton, vtag, 0);
 			sctp_packet_append_chunk(&singleton, chunk);
 			error = sctp_packet_transmit(&singleton, gfp);
-			if (error < 0)
-				return error;
+			if (error < 0) {
+				asoc->base.sk->sk_err = -error;
+				return;
+			}
 			break;
 
 		case SCTP_CID_ABORT:
@@ -1018,6 +987,8 @@
 		retran:
 			error = sctp_outq_flush_rtx(q, packet,
 						    rtx_timeout, &start_timer);
+			if (error < 0)
+				asoc->base.sk->sk_err = -error;
 
 			if (start_timer) {
 				sctp_transport_reset_t3_rtx(transport);
@@ -1055,7 +1026,7 @@
 
 				/* Mark as failed send. */
 				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
-				if (asoc->prsctp_enable &&
+				if (asoc->peer.prsctp_capable &&
 				    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
 					asoc->sent_cnt_removable--;
 				sctp_chunk_free(chunk);
@@ -1192,14 +1163,15 @@
 						      struct sctp_transport,
 						      send_ready);
 		packet = &t->packet;
-		if (!sctp_packet_empty(packet))
+		if (!sctp_packet_empty(packet)) {
 			error = sctp_packet_transmit(packet, gfp);
+			if (error < 0)
+				asoc->base.sk->sk_err = -error;
+		}
 
 		/* Clear the burst limited state, if any */
 		sctp_transport_burst_reset(t);
 	}
-
-	return error;
 }
 
 /* Update unack_data based on the incoming SACK chunk */
@@ -1347,7 +1319,7 @@
 		tsn = ntohl(tchunk->subh.data_hdr->tsn);
 		if (TSN_lte(tsn, ctsn)) {
 			list_del_init(&tchunk->transmitted_list);
-			if (asoc->prsctp_enable &&
+			if (asoc->peer.prsctp_capable &&
 			    SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
 				asoc->sent_cnt_removable--;
 			sctp_chunk_free(tchunk);
@@ -1747,7 +1719,7 @@
 {
 	int i;
 	sctp_sack_variable_t *frags;
-	__u16 gap;
+	__u16 tsn_offset, blocks;
 	__u32 ctsn = ntohl(sack->cum_tsn_ack);
 
 	if (TSN_lte(tsn, ctsn))
@@ -1766,10 +1738,11 @@
 	 */
 
 	frags = sack->variable;
-	gap = tsn - ctsn;
-	for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) {
-		if (TSN_lte(ntohs(frags[i].gab.start), gap) &&
-		    TSN_lte(gap, ntohs(frags[i].gab.end)))
+	blocks = ntohs(sack->num_gap_ack_blocks);
+	tsn_offset = tsn - ctsn;
+	for (i = 0; i < blocks; ++i) {
+		if (tsn_offset >= ntohs(frags[i].gab.start) &&
+		    tsn_offset <= ntohs(frags[i].gab.end))
 			goto pass;
 	}
 
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 4cb5aed..206377f 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -73,13 +73,17 @@
 /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */
 static int sctp_snmp_seq_show(struct seq_file *seq, void *v)
 {
+	unsigned long buff[SCTP_MIB_MAX];
 	struct net *net = seq->private;
 	int i;
 
-	for (i = 0; sctp_snmp_list[i].name != NULL; i++)
+	memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX);
+
+	snmp_get_cpu_field_batch(buff, sctp_snmp_list,
+				 net->sctp.sctp_statistics);
+	for (i = 0; sctp_snmp_list[i].name; i++)
 		seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name,
-			   snmp_fold_field(net->sctp.sctp_statistics,
-				      sctp_snmp_list[i].entry));
+						buff[i]);
 
 	return 0;
 }
@@ -293,6 +297,7 @@
 		return ERR_PTR(err);
 	}
 
+	iter->start_fail = 0;
 	return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
 }
 
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index f69edcf..048954e 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -13,6 +13,7 @@
 {
 	union sctp_addr laddr, paddr;
 	struct dst_entry *dst;
+	struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer;
 
 	laddr = list_entry(asoc->base.bind_addr.address_list.next,
 			   struct sctp_sockaddr_entry, list)->a;
@@ -40,10 +41,15 @@
 	}
 
 	r->idiag_state = asoc->state;
-	r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
-	r->idiag_retrans = asoc->rtx_data_chunks;
-	r->idiag_expires = jiffies_to_msecs(
-		asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies);
+	if (timer_pending(t3_rtx)) {
+		r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
+		r->idiag_retrans = asoc->rtx_data_chunks;
+		r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
+	} else {
+		r->idiag_timer = 0;
+		r->idiag_retrans = 0;
+		r->idiag_expires = 0;
+	}
 }
 
 static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
@@ -100,7 +106,8 @@
 			       const struct inet_diag_req_v2 *req,
 			       struct user_namespace *user_ns,
 			       int portid, u32 seq, u16 nlmsg_flags,
-			       const struct nlmsghdr *unlh)
+			       const struct nlmsghdr *unlh,
+			       bool net_admin)
 {
 	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct list_head *addr_list;
@@ -127,7 +134,7 @@
 		r->idiag_retrans = 0;
 	}
 
-	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns))
+	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
 		goto errout;
 
 	if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) {
@@ -197,6 +204,7 @@
 	struct netlink_callback *cb;
 	const struct inet_diag_req_v2 *r;
 	const struct nlmsghdr *nlh;
+	bool net_admin;
 };
 
 static size_t inet_assoc_attr_size(struct sctp_association *asoc)
@@ -213,6 +221,7 @@
 		+ nla_total_size(1) /* INET_DIAG_SHUTDOWN */
 		+ nla_total_size(1) /* INET_DIAG_TOS */
 		+ nla_total_size(1) /* INET_DIAG_TCLASS */
+		+ nla_total_size(4) /* INET_DIAG_MARK */
 		+ nla_total_size(addrlen * asoc->peer.transport_count)
 		+ nla_total_size(addrlen * addrcnt)
 		+ nla_total_size(sizeof(struct inet_diag_meminfo))
@@ -250,7 +259,8 @@
 	err = inet_sctp_diag_fill(sk, assoc, rep, req,
 				  sk_user_ns(NETLINK_CB(in_skb).sk),
 				  NETLINK_CB(in_skb).portid,
-				  nlh->nlmsg_seq, 0, nlh);
+				  nlh->nlmsg_seq, 0, nlh,
+				  commp->net_admin);
 	release_sock(sk);
 	if (err < 0) {
 		WARN_ON(err == -EMSGSIZE);
@@ -266,28 +276,17 @@
 	return err;
 }
 
-static int sctp_tsp_dump(struct sctp_transport *tsp, void *p)
+static int sctp_sock_dump(struct sock *sk, void *p)
 {
-	struct sctp_endpoint *ep = tsp->asoc->ep;
+	struct sctp_endpoint *ep = sctp_sk(sk)->ep;
 	struct sctp_comm_param *commp = p;
-	struct sock *sk = ep->base.sk;
 	struct sk_buff *skb = commp->skb;
 	struct netlink_callback *cb = commp->cb;
 	const struct inet_diag_req_v2 *r = commp->r;
-	struct sctp_association *assoc =
-		list_entry(ep->asocs.next, struct sctp_association, asocs);
+	struct sctp_association *assoc;
 	int err = 0;
 
-	/* find the ep only once through the transports by this condition */
-	if (tsp->asoc != assoc)
-		goto out;
-
-	if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
-		goto out;
-
 	lock_sock(sk);
-	if (sk != assoc->base.sk)
-		goto release;
 	list_for_each_entry(assoc, &ep->asocs, asocs) {
 		if (cb->args[4] < cb->args[1])
 			goto next;
@@ -304,9 +303,10 @@
 					sk_user_ns(NETLINK_CB(cb->skb).sk),
 					NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq,
-					NLM_F_MULTI, cb->nlh) < 0) {
+					NLM_F_MULTI, cb->nlh,
+					commp->net_admin) < 0) {
 			cb->args[3] = 1;
-			err = 2;
+			err = 1;
 			goto release;
 		}
 		cb->args[3] = 1;
@@ -314,8 +314,9 @@
 		if (inet_sctp_diag_fill(sk, assoc, skb, r,
 					sk_user_ns(NETLINK_CB(cb->skb).sk),
 					NETLINK_CB(cb->skb).portid,
-					cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) {
-			err = 2;
+					cb->nlh->nlmsg_seq, 0, cb->nlh,
+					commp->net_admin) < 0) {
+			err = 1;
 			goto release;
 		}
 next:
@@ -327,10 +328,35 @@
 	cb->args[4] = 0;
 release:
 	release_sock(sk);
+	sock_put(sk);
 	return err;
+}
+
+static int sctp_get_sock(struct sctp_transport *tsp, void *p)
+{
+	struct sctp_endpoint *ep = tsp->asoc->ep;
+	struct sctp_comm_param *commp = p;
+	struct sock *sk = ep->base.sk;
+	struct netlink_callback *cb = commp->cb;
+	const struct inet_diag_req_v2 *r = commp->r;
+	struct sctp_association *assoc =
+		list_entry(ep->asocs.next, struct sctp_association, asocs);
+
+	/* find the ep only once through the transports by this condition */
+	if (tsp->asoc != assoc)
+		goto out;
+
+	if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
+		goto out;
+
+	sock_hold(sk);
+	cb->args[5] = (long)sk;
+
+	return 1;
+
 out:
 	cb->args[2]++;
-	return err;
+	return 0;
 }
 
 static int sctp_ep_dump(struct sctp_endpoint *ep, void *p)
@@ -350,7 +376,7 @@
 	if (cb->args[4] < cb->args[1])
 		goto next;
 
-	if ((r->idiag_states & ~TCPF_LISTEN) && !list_empty(&ep->asocs))
+	if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs))
 		goto next;
 
 	if (r->sdiag_family != AF_UNSPEC &&
@@ -369,7 +395,7 @@
 				sk_user_ns(NETLINK_CB(cb->skb).sk),
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				cb->nlh) < 0) {
+				cb->nlh, commp->net_admin) < 0) {
 		err = 2;
 		goto out;
 	}
@@ -406,6 +432,7 @@
 		.skb = in_skb,
 		.r = req,
 		.nlh = nlh,
+		.net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN),
 	};
 
 	if (req->sdiag_family == AF_INET) {
@@ -418,11 +445,13 @@
 		paddr.v4.sin_family = AF_INET;
 	} else {
 		laddr.v6.sin6_port = req->id.idiag_sport;
-		memcpy(&laddr.v6.sin6_addr, req->id.idiag_src, 64);
+		memcpy(&laddr.v6.sin6_addr, req->id.idiag_src,
+		       sizeof(laddr.v6.sin6_addr));
 		laddr.v6.sin6_family = AF_INET6;
 
 		paddr.v6.sin6_port = req->id.idiag_dport;
-		memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst, 64);
+		memcpy(&paddr.v6.sin6_addr, req->id.idiag_dst,
+		       sizeof(paddr.v6.sin6_addr));
 		paddr.v6.sin6_family = AF_INET6;
 	}
 
@@ -439,6 +468,7 @@
 		.skb = skb,
 		.cb = cb,
 		.r = r,
+		.net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN),
 	};
 
 	/* eps hashtable dumps
@@ -464,10 +494,18 @@
 	 * 2 : to record the transport pos of this time's traversal
 	 * 3 : to mark if we have dumped the ep info of the current asoc
 	 * 4 : to work as a temporary variable to traversal list
+	 * 5 : to save the sk we get from travelsing the tsp list.
 	 */
-	if (!(idiag_states & ~TCPF_LISTEN))
+	if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
 		goto done;
-	sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp);
+
+next:
+	cb->args[5] = 0;
+	sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp);
+
+	if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp))
+		goto next;
+
 done:
 	cb->args[1] = cb->args[4];
 	cb->args[4] = 0;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 8c77b87..9e9690b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -253,7 +253,7 @@
 	num_types = sp->pf->supported_addrs(sp, types);
 
 	chunksize = sizeof(init) + addrs_len;
-	chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types));
+	chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types));
 	chunksize += sizeof(ecap_param);
 
 	if (asoc->prsctp_enable)
@@ -283,14 +283,14 @@
 		/* Add HMACS parameter length if any were defined */
 		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
-			chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		/* Add CHUNKS parameter length */
 		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
-			chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
 			auth_chunks = NULL;
 
@@ -300,8 +300,8 @@
 
 	/* If we have any extensions to report, account for that */
 	if (num_ext)
-		chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
-					num_ext);
+		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+				       num_ext);
 
 	/* RFC 2960 3.3.2 Initiation (INIT) (1)
 	 *
@@ -443,13 +443,13 @@
 
 		auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs;
 		if (auth_hmacs->length)
-			chunksize += WORD_ROUND(ntohs(auth_hmacs->length));
+			chunksize += SCTP_PAD4(ntohs(auth_hmacs->length));
 		else
 			auth_hmacs = NULL;
 
 		auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks;
 		if (auth_chunks->length)
-			chunksize += WORD_ROUND(ntohs(auth_chunks->length));
+			chunksize += SCTP_PAD4(ntohs(auth_chunks->length));
 		else
 			auth_chunks = NULL;
 
@@ -458,8 +458,8 @@
 	}
 
 	if (num_ext)
-		chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) +
-					num_ext);
+		chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) +
+				       num_ext);
 
 	/* Now allocate and fill out the chunk.  */
 	retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp);
@@ -706,20 +706,6 @@
 	return retval;
 }
 
-static void sctp_set_prsctp_policy(struct sctp_chunk *chunk,
-				   const struct sctp_sndrcvinfo *sinfo)
-{
-	if (!chunk->asoc->prsctp_enable)
-		return;
-
-	if (SCTP_PR_TTL_ENABLED(sinfo->sinfo_flags))
-		chunk->prsctp_param =
-			jiffies + msecs_to_jiffies(sinfo->sinfo_timetolive);
-	else if (SCTP_PR_RTX_ENABLED(sinfo->sinfo_flags) ||
-		 SCTP_PR_PRIO_ENABLED(sinfo->sinfo_flags))
-		chunk->prsctp_param = sinfo->sinfo_timetolive;
-}
-
 /* Make a DATA chunk for the given association from the provided
  * parameters.  However, do not populate the data payload.
  */
@@ -753,7 +739,6 @@
 
 	retval->subh.data_hdr = sctp_addto_chunk(retval, sizeof(dp), &dp);
 	memcpy(&retval->sinfo, sinfo, sizeof(struct sctp_sndrcvinfo));
-	sctp_set_prsctp_policy(retval, sinfo);
 
 nodata:
 	return retval;
@@ -1390,7 +1375,7 @@
 	struct sock *sk;
 
 	/* No need to allocate LL here, as this is only a chunk. */
-	skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp);
+	skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp);
 	if (!skb)
 		goto nodata;
 
@@ -1482,7 +1467,7 @@
 	void *target;
 	void *padding;
 	int chunklen = ntohs(chunk->chunk_hdr->length);
-	int padlen = WORD_ROUND(chunklen) - chunklen;
+	int padlen = SCTP_PAD4(chunklen) - chunklen;
 
 	padding = skb_put(chunk->skb, padlen);
 	target = skb_put(chunk->skb, len);
@@ -1900,7 +1885,7 @@
 	struct __sctp_missing report;
 	__u16 len;
 
-	len = WORD_ROUND(sizeof(report));
+	len = SCTP_PAD4(sizeof(report));
 
 	/* Make an ERROR chunk, preparing enough room for
 	 * returning multiple unknown parameters.
@@ -2098,9 +2083,9 @@
 
 		if (*errp) {
 			if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM,
-					WORD_ROUND(ntohs(param.p->length))))
+					SCTP_PAD4(ntohs(param.p->length))))
 				sctp_addto_chunk_fixed(*errp,
-						WORD_ROUND(ntohs(param.p->length)),
+						SCTP_PAD4(ntohs(param.p->length)),
 						param.v);
 		} else {
 			/* If there is no memory for generating the ERROR
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 12d4519..c345bf1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1020,19 +1020,13 @@
  * This way the whole message is queued up and bundling if
  * encouraged for small fragments.
  */
-static int sctp_cmd_send_msg(struct sctp_association *asoc,
-				struct sctp_datamsg *msg, gfp_t gfp)
+static void sctp_cmd_send_msg(struct sctp_association *asoc,
+			      struct sctp_datamsg *msg, gfp_t gfp)
 {
 	struct sctp_chunk *chunk;
-	int error = 0;
 
-	list_for_each_entry(chunk, &msg->chunks, frag_list) {
-		error = sctp_outq_tail(&asoc->outqueue, chunk, gfp);
-		if (error)
-			break;
-	}
-
-	return error;
+	list_for_each_entry(chunk, &msg->chunks, frag_list)
+		sctp_outq_tail(&asoc->outqueue, chunk, gfp);
 }
 
 
@@ -1427,8 +1421,7 @@
 				local_cork = 1;
 			}
 			/* Send a chunk to our peer.  */
-			error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk,
-					       gfp);
+			sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, gfp);
 			break;
 
 		case SCTP_CMD_SEND_PKT:
@@ -1682,7 +1675,7 @@
 		case SCTP_CMD_FORCE_PRIM_RETRAN:
 			t = asoc->peer.retran_path;
 			asoc->peer.retran_path = asoc->peer.primary_path;
-			error = sctp_outq_uncork(&asoc->outqueue, gfp);
+			sctp_outq_uncork(&asoc->outqueue, gfp);
 			local_cork = 0;
 			asoc->peer.retran_path = t;
 			break;
@@ -1709,7 +1702,7 @@
 				sctp_outq_cork(&asoc->outqueue);
 				local_cork = 1;
 			}
-			error = sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
+			sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp);
 			break;
 		case SCTP_CMD_SEND_NEXT_ASCONF:
 			sctp_cmd_send_asconf(asoc);
@@ -1739,9 +1732,9 @@
 	 */
 	if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) {
 		if (chunk->end_of_packet || chunk->singleton)
-			error = sctp_outq_uncork(&asoc->outqueue, gfp);
+			sctp_outq_uncork(&asoc->outqueue, gfp);
 	} else if (local_cork)
-		error = sctp_outq_uncork(&asoc->outqueue, gfp);
+		sctp_outq_uncork(&asoc->outqueue, gfp);
 
 	if (sp->data_ready_signalled)
 		sp->data_ready_signalled = 0;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d88bb2b..026e3bc 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -3454,7 +3454,7 @@
 		}
 
 		/* Report violation if chunk len overflows */
-		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
 		if (ch_end > skb_tail_pointer(skb))
 			return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
 						  commands);
@@ -4185,7 +4185,7 @@
 		hdr = unk_chunk->chunk_hdr;
 		err_chunk = sctp_make_op_error(asoc, unk_chunk,
 					       SCTP_ERROR_UNKNOWN_CHUNK, hdr,
-					       WORD_ROUND(ntohs(hdr->length)),
+					       SCTP_PAD4(ntohs(hdr->length)),
 					       0);
 		if (err_chunk) {
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
@@ -4203,7 +4203,7 @@
 		hdr = unk_chunk->chunk_hdr;
 		err_chunk = sctp_make_op_error(asoc, unk_chunk,
 					       SCTP_ERROR_UNKNOWN_CHUNK, hdr,
-					       WORD_ROUND(ntohs(hdr->length)),
+					       SCTP_PAD4(ntohs(hdr->length)),
 					       0);
 		if (err_chunk) {
 			sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9fc417a..fb02c70 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1958,6 +1958,8 @@
 
 	/* Now send the (possibly) fragmented message. */
 	list_for_each_entry(chunk, &datamsg->chunks, frag_list) {
+		sctp_chunk_hold(chunk);
+
 		/* Do accounting for the write space.  */
 		sctp_set_owner_w(chunk);
 
@@ -1970,13 +1972,15 @@
 	 * breaks.
 	 */
 	err = sctp_primitive_SEND(net, asoc, datamsg);
-	sctp_datamsg_put(datamsg);
 	/* Did the lower layer accept the chunk? */
-	if (err)
+	if (err) {
+		sctp_datamsg_free(datamsg);
 		goto out_free;
+	}
 
 	pr_debug("%s: we sent primitively\n", __func__);
 
+	sctp_datamsg_put(datamsg);
 	err = msg_len;
 
 	if (unlikely(wait_connect)) {
@@ -4469,17 +4473,21 @@
 				  const union sctp_addr *paddr, void *p)
 {
 	struct sctp_transport *transport;
-	int err = 0;
+	int err = -ENOENT;
 
 	rcu_read_lock();
 	transport = sctp_addrs_lookup_transport(net, laddr, paddr);
 	if (!transport || !sctp_transport_hold(transport))
 		goto out;
-	err = cb(transport, p);
+
+	sctp_association_hold(transport->asoc);
 	sctp_transport_put(transport);
 
-out:
 	rcu_read_unlock();
+	err = cb(transport, p);
+	sctp_association_put(transport->asoc);
+
+out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 81b8667..ce54dce 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -233,7 +233,7 @@
 	}
 
 	if (transport->dst) {
-		transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
+		transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
 	} else
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
@@ -287,7 +287,7 @@
 		return;
 	}
 	if (transport->dst) {
-		transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst));
+		transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst));
 
 		/* Initialize sk->sk_rcv_saddr, if the transport is the
 		 * association's active path for getsockname().
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 1bc4f71..bea0005 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -383,7 +383,7 @@
 
 	ch = (sctp_errhdr_t *)(chunk->skb->data);
 	cause = ch->cause;
-	elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
+	elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t);
 
 	/* Pull off the ERROR header.  */
 	skb_pull(chunk->skb, sizeof(sctp_errhdr_t));
@@ -688,7 +688,7 @@
 	 * MUST ignore the padding bytes.
 	 */
 	len = ntohs(chunk->chunk_hdr->length);
-	padding = WORD_ROUND(len) - len;
+	padding = SCTP_PAD4(len) - len;
 
 	/* Fixup cloned skb with just this chunks data.  */
 	skb_trim(skb, chunk->chunk_end - padding - skb->data);
@@ -702,14 +702,14 @@
 	 */
 	sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff));
 
-	sctp_ulpevent_receive_data(event, asoc);
-
 	/* And hold the chunk as we need it for getting the IP headers
 	 * later in recvmsg
 	 */
 	sctp_chunk_hold(chunk);
 	event->chunk = chunk;
 
+	sctp_ulpevent_receive_data(event, asoc);
+
 	event->stream = ntohs(chunk->subh.data_hdr->stream);
 	event->ssn = ntohs(chunk->subh.data_hdr->ssn);
 	event->ppid = chunk->subh.data_hdr->ppid;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 877e550..84d0fda 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -140,11 +140,8 @@
 		 * we can go ahead and clear out the lobby in one shot
 		 */
 		if (!skb_queue_empty(&sp->pd_lobby)) {
-			struct list_head *list;
 			skb_queue_splice_tail_init(&sp->pd_lobby,
 						   &sk->sk_receive_queue);
-			list = (struct list_head *)&sctp_sk(sk)->pd_lobby;
-			INIT_LIST_HEAD(list);
 			return 1;
 		}
 	} else {
diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig
new file mode 100644
index 0000000..6cff3f6
--- /dev/null
+++ b/net/strparser/Kconfig
@@ -0,0 +1,4 @@
+
+config STREAM_PARSER
+	tristate
+	default n
diff --git a/net/strparser/Makefile b/net/strparser/Makefile
new file mode 100644
index 0000000..858a126
--- /dev/null
+++ b/net/strparser/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_STREAM_PARSER) += strparser.o
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
new file mode 100644
index 0000000..5c7549b
--- /dev/null
+++ b/net/strparser/strparser.c
@@ -0,0 +1,510 @@
+/*
+ * Stream Parser
+ *
+ * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/errno.h>
+#include <linux/errqueue.h>
+#include <linux/file.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/poll.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/uaccess.h>
+#include <linux/workqueue.h>
+#include <net/strparser.h>
+#include <net/netns/generic.h>
+#include <net/sock.h>
+
+static struct workqueue_struct *strp_wq;
+
+struct _strp_rx_msg {
+	/* Internal cb structure. struct strp_rx_msg must be first for passing
+	 * to upper layer.
+	 */
+	struct strp_rx_msg strp;
+	int accum_len;
+	int early_eaten;
+};
+
+static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb)
+{
+	return (struct _strp_rx_msg *)((void *)skb->cb +
+		offsetof(struct qdisc_skb_cb, data));
+}
+
+/* Lower lock held */
+static void strp_abort_rx_strp(struct strparser *strp, int err)
+{
+	struct sock *csk = strp->sk;
+
+	/* Unrecoverable error in receive */
+
+	del_timer(&strp->rx_msg_timer);
+
+	if (strp->rx_stopped)
+		return;
+
+	strp->rx_stopped = 1;
+
+	/* Report an error on the lower socket */
+	csk->sk_err = err;
+	csk->sk_error_report(csk);
+}
+
+static void strp_start_rx_timer(struct strparser *strp)
+{
+	if (strp->sk->sk_rcvtimeo)
+		mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo);
+}
+
+/* Lower lock held */
+static void strp_parser_err(struct strparser *strp, int err,
+			    read_descriptor_t *desc)
+{
+	desc->error = err;
+	kfree_skb(strp->rx_skb_head);
+	strp->rx_skb_head = NULL;
+	strp->cb.abort_parser(strp, err);
+}
+
+static inline int strp_peek_len(struct strparser *strp)
+{
+	struct socket *sock = strp->sk->sk_socket;
+
+	return sock->ops->peek_len(sock);
+}
+
+/* Lower socket lock held */
+static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
+		     unsigned int orig_offset, size_t orig_len)
+{
+	struct strparser *strp = (struct strparser *)desc->arg.data;
+	struct _strp_rx_msg *rxm;
+	struct sk_buff *head, *skb;
+	size_t eaten = 0, cand_len;
+	ssize_t extra;
+	int err;
+	bool cloned_orig = false;
+
+	if (strp->rx_paused)
+		return 0;
+
+	head = strp->rx_skb_head;
+	if (head) {
+		/* Message already in progress */
+
+		rxm = _strp_rx_msg(head);
+		if (unlikely(rxm->early_eaten)) {
+			/* Already some number of bytes on the receive sock
+			 * data saved in rx_skb_head, just indicate they
+			 * are consumed.
+			 */
+			eaten = orig_len <= rxm->early_eaten ?
+				orig_len : rxm->early_eaten;
+			rxm->early_eaten -= eaten;
+
+			return eaten;
+		}
+
+		if (unlikely(orig_offset)) {
+			/* Getting data with a non-zero offset when a message is
+			 * in progress is not expected. If it does happen, we
+			 * need to clone and pull since we can't deal with
+			 * offsets in the skbs for a message expect in the head.
+			 */
+			orig_skb = skb_clone(orig_skb, GFP_ATOMIC);
+			if (!orig_skb) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = -ENOMEM;
+				return 0;
+			}
+			if (!pskb_pull(orig_skb, orig_offset)) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				kfree_skb(orig_skb);
+				desc->error = -ENOMEM;
+				return 0;
+			}
+			cloned_orig = true;
+			orig_offset = 0;
+		}
+
+		if (!strp->rx_skb_nextp) {
+			/* We are going to append to the frags_list of head.
+			 * Need to unshare the frag_list.
+			 */
+			err = skb_unclone(head, GFP_ATOMIC);
+			if (err) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = err;
+				return 0;
+			}
+
+			if (unlikely(skb_shinfo(head)->frag_list)) {
+				/* We can't append to an sk_buff that already
+				 * has a frag_list. We create a new head, point
+				 * the frag_list of that to the old head, and
+				 * then are able to use the old head->next for
+				 * appending to the message.
+				 */
+				if (WARN_ON(head->next)) {
+					desc->error = -EINVAL;
+					return 0;
+				}
+
+				skb = alloc_skb(0, GFP_ATOMIC);
+				if (!skb) {
+					STRP_STATS_INCR(strp->stats.rx_mem_fail);
+					desc->error = -ENOMEM;
+					return 0;
+				}
+				skb->len = head->len;
+				skb->data_len = head->len;
+				skb->truesize = head->truesize;
+				*_strp_rx_msg(skb) = *_strp_rx_msg(head);
+				strp->rx_skb_nextp = &head->next;
+				skb_shinfo(skb)->frag_list = head;
+				strp->rx_skb_head = skb;
+				head = skb;
+			} else {
+				strp->rx_skb_nextp =
+				    &skb_shinfo(head)->frag_list;
+			}
+		}
+	}
+
+	while (eaten < orig_len) {
+		/* Always clone since we will consume something */
+		skb = skb_clone(orig_skb, GFP_ATOMIC);
+		if (!skb) {
+			STRP_STATS_INCR(strp->stats.rx_mem_fail);
+			desc->error = -ENOMEM;
+			break;
+		}
+
+		cand_len = orig_len - eaten;
+
+		head = strp->rx_skb_head;
+		if (!head) {
+			head = skb;
+			strp->rx_skb_head = head;
+			/* Will set rx_skb_nextp on next packet if needed */
+			strp->rx_skb_nextp = NULL;
+			rxm = _strp_rx_msg(head);
+			memset(rxm, 0, sizeof(*rxm));
+			rxm->strp.offset = orig_offset + eaten;
+		} else {
+			/* Unclone since we may be appending to an skb that we
+			 * already share a frag_list with.
+			 */
+			err = skb_unclone(skb, GFP_ATOMIC);
+			if (err) {
+				STRP_STATS_INCR(strp->stats.rx_mem_fail);
+				desc->error = err;
+				break;
+			}
+
+			rxm = _strp_rx_msg(head);
+			*strp->rx_skb_nextp = skb;
+			strp->rx_skb_nextp = &skb->next;
+			head->data_len += skb->len;
+			head->len += skb->len;
+			head->truesize += skb->truesize;
+		}
+
+		if (!rxm->strp.full_len) {
+			ssize_t len;
+
+			len = (*strp->cb.parse_msg)(strp, head);
+
+			if (!len) {
+				/* Need more header to determine length */
+				if (!rxm->accum_len) {
+					/* Start RX timer for new message */
+					strp_start_rx_timer(strp);
+				}
+				rxm->accum_len += cand_len;
+				eaten += cand_len;
+				STRP_STATS_INCR(strp->stats.rx_need_more_hdr);
+				WARN_ON(eaten != orig_len);
+				break;
+			} else if (len < 0) {
+				if (len == -ESTRPIPE && rxm->accum_len) {
+					len = -ENODATA;
+					strp->rx_unrecov_intr = 1;
+				} else {
+					strp->rx_interrupted = 1;
+				}
+				strp_parser_err(strp, err, desc);
+				break;
+			} else if (len > strp->sk->sk_rcvbuf) {
+				/* Message length exceeds maximum allowed */
+				STRP_STATS_INCR(strp->stats.rx_msg_too_big);
+				strp_parser_err(strp, -EMSGSIZE, desc);
+				break;
+			} else if (len <= (ssize_t)head->len -
+					  skb->len - rxm->strp.offset) {
+				/* Length must be into new skb (and also
+				 * greater than zero)
+				 */
+				STRP_STATS_INCR(strp->stats.rx_bad_hdr_len);
+				strp_parser_err(strp, -EPROTO, desc);
+				break;
+			}
+
+			rxm->strp.full_len = len;
+		}
+
+		extra = (ssize_t)(rxm->accum_len + cand_len) -
+			rxm->strp.full_len;
+
+		if (extra < 0) {
+			/* Message not complete yet. */
+			if (rxm->strp.full_len - rxm->accum_len >
+			    strp_peek_len(strp)) {
+				/* Don't have the whole messages in the socket
+				 * buffer. Set strp->rx_need_bytes to wait for
+				 * the rest of the message. Also, set "early
+				 * eaten" since we've already buffered the skb
+				 * but don't consume yet per strp_read_sock.
+				 */
+
+				if (!rxm->accum_len) {
+					/* Start RX timer for new message */
+					strp_start_rx_timer(strp);
+				}
+
+				strp->rx_need_bytes = rxm->strp.full_len -
+						       rxm->accum_len;
+				rxm->accum_len += cand_len;
+				rxm->early_eaten = cand_len;
+				STRP_STATS_ADD(strp->stats.rx_bytes, cand_len);
+				desc->count = 0; /* Stop reading socket */
+				break;
+			}
+			rxm->accum_len += cand_len;
+			eaten += cand_len;
+			WARN_ON(eaten != orig_len);
+			break;
+		}
+
+		/* Positive extra indicates ore bytes than needed for the
+		 * message
+		 */
+
+		WARN_ON(extra > cand_len);
+
+		eaten += (cand_len - extra);
+
+		/* Hurray, we have a new message! */
+		del_timer(&strp->rx_msg_timer);
+		strp->rx_skb_head = NULL;
+		STRP_STATS_INCR(strp->stats.rx_msgs);
+
+		/* Give skb to upper layer */
+		strp->cb.rcv_msg(strp, head);
+
+		if (unlikely(strp->rx_paused)) {
+			/* Upper layer paused strp */
+			break;
+		}
+	}
+
+	if (cloned_orig)
+		kfree_skb(orig_skb);
+
+	STRP_STATS_ADD(strp->stats.rx_bytes, eaten);
+
+	return eaten;
+}
+
+static int default_read_sock_done(struct strparser *strp, int err)
+{
+	return err;
+}
+
+/* Called with lock held on lower socket */
+static int strp_read_sock(struct strparser *strp)
+{
+	struct socket *sock = strp->sk->sk_socket;
+	read_descriptor_t desc;
+
+	desc.arg.data = strp;
+	desc.error = 0;
+	desc.count = 1; /* give more than one skb per call */
+
+	/* sk should be locked here, so okay to do read_sock */
+	sock->ops->read_sock(strp->sk, &desc, strp_recv);
+
+	desc.error = strp->cb.read_sock_done(strp, desc.error);
+
+	return desc.error;
+}
+
+/* Lower sock lock held */
+void strp_data_ready(struct strparser *strp)
+{
+	if (unlikely(strp->rx_stopped))
+		return;
+
+	/* This check is needed to synchronize with do_strp_rx_work.
+	 * do_strp_rx_work acquires a process lock (lock_sock) whereas
+	 * the lock held here is bh_lock_sock. The two locks can be
+	 * held by different threads at the same time, but bh_lock_sock
+	 * allows a thread in BH context to safely check if the process
+	 * lock is held. In this case, if the lock is held, queue work.
+	 */
+	if (sock_owned_by_user(strp->sk)) {
+		queue_work(strp_wq, &strp->rx_work);
+		return;
+	}
+
+	if (strp->rx_paused)
+		return;
+
+	if (strp->rx_need_bytes) {
+		if (strp_peek_len(strp) >= strp->rx_need_bytes)
+			strp->rx_need_bytes = 0;
+		else
+			return;
+	}
+
+	if (strp_read_sock(strp) == -ENOMEM)
+		queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_data_ready);
+
+static void do_strp_rx_work(struct strparser *strp)
+{
+	read_descriptor_t rd_desc;
+	struct sock *csk = strp->sk;
+
+	/* We need the read lock to synchronize with strp_data_ready. We
+	 * need the socket lock for calling strp_read_sock.
+	 */
+	lock_sock(csk);
+
+	if (unlikely(strp->rx_stopped))
+		goto out;
+
+	if (strp->rx_paused)
+		goto out;
+
+	rd_desc.arg.data = strp;
+
+	if (strp_read_sock(strp) == -ENOMEM)
+		queue_work(strp_wq, &strp->rx_work);
+
+out:
+	release_sock(csk);
+}
+
+static void strp_rx_work(struct work_struct *w)
+{
+	do_strp_rx_work(container_of(w, struct strparser, rx_work));
+}
+
+static void strp_rx_msg_timeout(unsigned long arg)
+{
+	struct strparser *strp = (struct strparser *)arg;
+
+	/* Message assembly timed out */
+	STRP_STATS_INCR(strp->stats.rx_msg_timeouts);
+	lock_sock(strp->sk);
+	strp->cb.abort_parser(strp, ETIMEDOUT);
+	release_sock(strp->sk);
+}
+
+int strp_init(struct strparser *strp, struct sock *csk,
+	      struct strp_callbacks *cb)
+{
+	struct socket *sock = csk->sk_socket;
+
+	if (!cb || !cb->rcv_msg || !cb->parse_msg)
+		return -EINVAL;
+
+	if (!sock->ops->read_sock || !sock->ops->peek_len)
+		return -EAFNOSUPPORT;
+
+	memset(strp, 0, sizeof(*strp));
+
+	strp->sk = csk;
+
+	setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout,
+		    (unsigned long)strp);
+
+	INIT_WORK(&strp->rx_work, strp_rx_work);
+
+	strp->cb.rcv_msg = cb->rcv_msg;
+	strp->cb.parse_msg = cb->parse_msg;
+	strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done;
+	strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(strp_init);
+
+void strp_unpause(struct strparser *strp)
+{
+	strp->rx_paused = 0;
+
+	/* Sync setting rx_paused with RX work */
+	smp_mb();
+
+	queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_unpause);
+
+/* strp must already be stopped so that strp_recv will no longer be called.
+ * Note that strp_done is not called with the lower socket held.
+ */
+void strp_done(struct strparser *strp)
+{
+	WARN_ON(!strp->rx_stopped);
+
+	del_timer_sync(&strp->rx_msg_timer);
+	cancel_work_sync(&strp->rx_work);
+
+	if (strp->rx_skb_head) {
+		kfree_skb(strp->rx_skb_head);
+		strp->rx_skb_head = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(strp_done);
+
+void strp_stop(struct strparser *strp)
+{
+	strp->rx_stopped = 1;
+}
+EXPORT_SYMBOL_GPL(strp_stop);
+
+void strp_check_rcv(struct strparser *strp)
+{
+	queue_work(strp_wq, &strp->rx_work);
+}
+EXPORT_SYMBOL_GPL(strp_check_rcv);
+
+static int __init strp_mod_init(void)
+{
+	strp_wq = create_singlethread_workqueue("kstrp");
+
+	return 0;
+}
+
+static void __exit strp_mod_exit(void)
+{
+}
+module_init(strp_mod_init);
+module_exit(strp_mod_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 23c8e7c..976c781 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -340,12 +340,14 @@
 }
 
 static struct gss_upcall_msg *
-__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid)
+__gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth)
 {
 	struct gss_upcall_msg *pos;
 	list_for_each_entry(pos, &pipe->in_downcall, list) {
 		if (!uid_eq(pos->uid, uid))
 			continue;
+		if (auth && pos->auth->service != auth->service)
+			continue;
 		atomic_inc(&pos->count);
 		dprintk("RPC:       %s found msg %p\n", __func__, pos);
 		return pos;
@@ -365,7 +367,7 @@
 	struct gss_upcall_msg *old;
 
 	spin_lock(&pipe->lock);
-	old = __gss_find_upcall(pipe, gss_msg->uid);
+	old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth);
 	if (old == NULL) {
 		atomic_inc(&gss_msg->count);
 		list_add(&gss_msg->list, &pipe->in_downcall);
@@ -714,7 +716,7 @@
 	err = -ENOENT;
 	/* Find a matching upcall */
 	spin_lock(&pipe->lock);
-	gss_msg = __gss_find_upcall(pipe, uid);
+	gss_msg = __gss_find_upcall(pipe, uid, NULL);
 	if (gss_msg == NULL) {
 		spin_unlock(&pipe->lock);
 		goto err_put_ctx;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1d28181..d858202 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -569,9 +569,10 @@
 	struct rsc *found;
 
 	memset(&rsci, 0, sizeof(rsci));
-	rsci.handle.data = handle->data;
-	rsci.handle.len = handle->len;
+	if (dup_to_netobj(&rsci.handle, handle->data, handle->len))
+		return NULL;
 	found = rsc_lookup(cd, &rsci);
+	rsc_free(&rsci);
 	if (!found)
 		return NULL;
 	if (cache_check(cd, &found->h, NULL))
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index cb49898..66f23b3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -453,7 +453,7 @@
 	struct rpc_xprt_switch *xps;
 
 	if (args->bc_xprt && args->bc_xprt->xpt_bc_xps) {
-		WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+		WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
 		xps = args->bc_xprt->xpt_bc_xps;
 		xprt_switch_get(xps);
 	} else {
@@ -520,7 +520,7 @@
 	char servername[48];
 
 	if (args->bc_xprt) {
-		WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+		WARN_ON_ONCE(!(args->protocol & XPRT_TRANSPORT_BC));
 		xprt = args->bc_xprt->xpt_bc_xprt;
 		if (xprt) {
 			xprt_get(xprt);
@@ -2638,6 +2638,7 @@
 {
 	struct rpc_xprt_switch *xps;
 	struct rpc_xprt *xprt;
+	unsigned long reconnect_timeout;
 	unsigned char resvport;
 	int ret = 0;
 
@@ -2649,6 +2650,7 @@
 		return -EAGAIN;
 	}
 	resvport = xprt->resvport;
+	reconnect_timeout = xprt->max_reconnect_timeout;
 	rcu_read_unlock();
 
 	xprt = xprt_create_transport(xprtargs);
@@ -2657,6 +2659,7 @@
 		goto out_put_switch;
 	}
 	xprt->resvport = resvport;
+	xprt->max_reconnect_timeout = reconnect_timeout;
 
 	rpc_xprt_switch_set_roundrobin(xps);
 	if (setup) {
@@ -2673,6 +2676,27 @@
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
 
+static int
+rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+		struct rpc_xprt *xprt,
+		void *data)
+{
+	unsigned long timeout = *((unsigned long *)data);
+
+	if (timeout < xprt->max_reconnect_timeout)
+		xprt->max_reconnect_timeout = timeout;
+	return 0;
+}
+
+void
+rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
+{
+	rpc_clnt_iterate_for_each_xprt(clnt,
+			rpc_xprt_cap_max_reconnect_timeout,
+			&timeo);
+}
+EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static void rpc_show_header(void)
 {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8313960..ea244b2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -680,6 +680,20 @@
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
+static bool
+xprt_has_timer(const struct rpc_xprt *xprt)
+{
+	return xprt->idle_timeout != 0;
+}
+
+static void
+xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
+	__must_hold(&xprt->transport_lock)
+{
+	if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+		mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
+}
+
 static void
 xprt_init_autodisconnect(unsigned long data)
 {
@@ -688,6 +702,8 @@
 	spin_lock(&xprt->transport_lock);
 	if (!list_empty(&xprt->recv))
 		goto out_abort;
+	/* Reset xprt->last_used to avoid connect/autodisconnect cycling */
+	xprt->last_used = jiffies;
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		goto out_abort;
 	spin_unlock(&xprt->transport_lock);
@@ -725,6 +741,7 @@
 		goto out;
 	xprt->snd_task =NULL;
 	xprt->ops->release_xprt(xprt, NULL);
+	xprt_schedule_autodisconnect(xprt);
 out:
 	spin_unlock_bh(&xprt->transport_lock);
 	wake_up_bit(&xprt->state, XPRT_LOCKED);
@@ -888,11 +905,6 @@
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
-static inline int xprt_has_timer(struct rpc_xprt *xprt)
-{
-	return xprt->idle_timeout != 0;
-}
-
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
@@ -1280,9 +1292,7 @@
 	if (!list_empty(&req->rq_list))
 		list_del(&req->rq_list);
 	xprt->last_used = jiffies;
-	if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
-		mod_timer(&xprt->timer,
-				xprt->last_used + xprt->idle_timeout);
+	xprt_schedule_autodisconnect(xprt);
 	spin_unlock_bh(&xprt->transport_lock);
 	if (req->rq_buffer)
 		xprt->ops->buf_free(req->rq_buffer);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 536d0be..799cce6 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -51,6 +51,7 @@
 #include <linux/slab.h>
 #include <linux/prefetch.h>
 #include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/svc_rdma.h>
 #include <asm/bitops.h>
 #include <linux/module.h> /* try_module_get()/module_put() */
 
@@ -923,7 +924,7 @@
 	}
 
 	INIT_LIST_HEAD(&buf->rb_recv_bufs);
-	for (i = 0; i < buf->rb_max_requests; i++) {
+	for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
 		struct rpcrdma_rep *rep;
 
 		rep = rpcrdma_create_rep(r_xprt);
@@ -1018,6 +1019,7 @@
 		rep = rpcrdma_buffer_get_rep_locked(buf);
 		rpcrdma_destroy_rep(ia, rep);
 	}
+	buf->rb_send_count = 0;
 
 	spin_lock(&buf->rb_reqslock);
 	while (!list_empty(&buf->rb_allreqs)) {
@@ -1032,6 +1034,7 @@
 		spin_lock(&buf->rb_reqslock);
 	}
 	spin_unlock(&buf->rb_reqslock);
+	buf->rb_recv_count = 0;
 
 	rpcrdma_destroy_mrs(buf);
 }
@@ -1074,8 +1077,27 @@
 	spin_unlock(&buf->rb_mwlock);
 }
 
+static struct rpcrdma_rep *
+rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
+{
+	/* If an RPC previously completed without a reply (say, a
+	 * credential problem or a soft timeout occurs) then hold off
+	 * on supplying more Receive buffers until the number of new
+	 * pending RPCs catches up to the number of posted Receives.
+	 */
+	if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
+		return NULL;
+
+	if (unlikely(list_empty(&buffers->rb_recv_bufs)))
+		return NULL;
+	buffers->rb_recv_count++;
+	return rpcrdma_buffer_get_rep_locked(buffers);
+}
+
 /*
  * Get a set of request/reply buffers.
+ *
+ * Reply buffer (if available) is attached to send buffer upon return.
  */
 struct rpcrdma_req *
 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
@@ -1085,21 +1107,15 @@
 	spin_lock(&buffers->rb_lock);
 	if (list_empty(&buffers->rb_send_bufs))
 		goto out_reqbuf;
+	buffers->rb_send_count++;
 	req = rpcrdma_buffer_get_req_locked(buffers);
-	if (list_empty(&buffers->rb_recv_bufs))
-		goto out_repbuf;
-	req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+	req->rl_reply = rpcrdma_buffer_get_rep(buffers);
 	spin_unlock(&buffers->rb_lock);
 	return req;
 
 out_reqbuf:
 	spin_unlock(&buffers->rb_lock);
-	pr_warn("rpcrdma: out of request buffers (%p)\n", buffers);
-	return NULL;
-out_repbuf:
-	list_add(&req->rl_free, &buffers->rb_send_bufs);
-	spin_unlock(&buffers->rb_lock);
-	pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers);
+	pr_warn("RPC:       %s: out of request buffers\n", __func__);
 	return NULL;
 }
 
@@ -1117,9 +1133,12 @@
 	req->rl_reply = NULL;
 
 	spin_lock(&buffers->rb_lock);
+	buffers->rb_send_count--;
 	list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
-	if (rep)
+	if (rep) {
+		buffers->rb_recv_count--;
 		list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+	}
 	spin_unlock(&buffers->rb_lock);
 }
 
@@ -1133,8 +1152,7 @@
 	struct rpcrdma_buffer *buffers = req->rl_buffer;
 
 	spin_lock(&buffers->rb_lock);
-	if (!list_empty(&buffers->rb_recv_bufs))
-		req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+	req->rl_reply = rpcrdma_buffer_get_rep(buffers);
 	spin_unlock(&buffers->rb_lock);
 }
 
@@ -1148,6 +1166,7 @@
 	struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
 
 	spin_lock(&buffers->rb_lock);
+	buffers->rb_recv_count--;
 	list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
 	spin_unlock(&buffers->rb_lock);
 }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 670fad5..a71b0f5 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -321,6 +321,7 @@
 	char			*rb_pool;
 
 	spinlock_t		rb_lock;	/* protect buf lists */
+	int			rb_send_count, rb_recv_count;
 	struct list_head	rb_send_bufs;
 	struct list_head	rb_recv_bufs;
 	u32			rb_max_requests;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 111767a..bf16883 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -177,7 +177,6 @@
  * increase over time if the server is down or not responding.
  */
 #define XS_TCP_INIT_REEST_TO	(3U * HZ)
-#define XS_TCP_MAX_REEST_TO	(5U * 60 * HZ)
 
 /*
  * TCP idle timeout; client drops the transport socket if it is idle
@@ -1075,7 +1074,7 @@
 		skb = skb_recv_datagram(sk, 0, 1, &err);
 		if (skb != NULL) {
 			xs_udp_data_read_skb(&transport->xprt, sk, skb);
-			skb_free_datagram(sk, skb);
+			skb_free_datagram_locked(sk, skb);
 			continue;
 		}
 		if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
@@ -2173,6 +2172,8 @@
 		write_unlock_bh(&sk->sk_callback_lock);
 	}
 	xs_udp_do_set_buffer_size(xprt);
+
+	xprt->stat.connect_start = jiffies;
 }
 
 static void xs_udp_setup_socket(struct work_struct *work)
@@ -2236,6 +2237,7 @@
 		unsigned int keepcnt = xprt->timeout->to_retries + 1;
 		unsigned int opt_on = 1;
 		unsigned int timeo;
+		unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
 
 		/* TCP Keepalive options */
 		kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2247,6 +2249,16 @@
 		kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
 				(char *)&keepcnt, sizeof(keepcnt));
 
+		/* Avoid temporary address, they are bad for long-lived
+		 * connections such as NFS mounts.
+		 * RFC4941, section 3.6 suggests that:
+		 *    Individual applications, which have specific
+		 *    knowledge about the normal duration of connections,
+		 *    MAY override this as appropriate.
+		 */
+		kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
+				(char *)&addr_pref, sizeof(addr_pref));
+
 		/* TCP user timeout (see RFC5482) */
 		timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
 			(xprt->timeout->to_retries + 1);
@@ -2295,6 +2307,10 @@
 		/* SYN_SENT! */
 		if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
 			xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+		break;
+	case -EADDRNOTAVAIL:
+		/* Source port number is unavailable. Try a new one! */
+		transport->srcport = 0;
 	}
 out:
 	return ret;
@@ -2369,6 +2385,25 @@
 	xprt_wake_pending_tasks(xprt, status);
 }
 
+static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
+{
+	unsigned long start, now = jiffies;
+
+	start = xprt->stat.connect_start + xprt->reestablish_timeout;
+	if (time_after(start, now))
+		return start - now;
+	return 0;
+}
+
+static void xs_reconnect_backoff(struct rpc_xprt *xprt)
+{
+	xprt->reestablish_timeout <<= 1;
+	if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+		xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+	if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+		xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+}
+
 /**
  * xs_connect - connect a socket to a remote endpoint
  * @xprt: pointer to transport structure
@@ -2386,6 +2421,7 @@
 static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+	unsigned long delay = 0;
 
 	WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
 
@@ -2397,19 +2433,15 @@
 		/* Start by resetting any existing state */
 		xs_reset_transport(transport);
 
-		queue_delayed_work(xprtiod_workqueue,
-				   &transport->connect_worker,
-				   xprt->reestablish_timeout);
-		xprt->reestablish_timeout <<= 1;
-		if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
-			xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-		if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
-			xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
-	} else {
+		delay = xs_reconnect_delay(xprt);
+		xs_reconnect_backoff(xprt);
+
+	} else
 		dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
-		queue_delayed_work(xprtiod_workqueue,
-				   &transport->connect_worker, 0);
-	}
+
+	queue_delayed_work(xprtiod_workqueue,
+			&transport->connect_worker,
+			delay);
 }
 
 /**
@@ -2961,6 +2993,8 @@
 	xprt->ops = &xs_tcp_ops;
 	xprt->timeout = &xs_tcp_default_timeout;
 
+	xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+
 	INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
 	INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
 
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index a5fc9dd..02beb35 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -21,7 +21,6 @@
 #include <linux/workqueue.h>
 #include <linux/if_vlan.h>
 #include <linux/rtnetlink.h>
-#include <net/ip_fib.h>
 #include <net/switchdev.h>
 
 /**
@@ -344,8 +343,6 @@
 	switch (obj->id) {
 	case SWITCHDEV_OBJ_ID_PORT_VLAN:
 		return sizeof(struct switchdev_obj_port_vlan);
-	case SWITCHDEV_OBJ_ID_IPV4_FIB:
-		return sizeof(struct switchdev_obj_ipv4_fib);
 	case SWITCHDEV_OBJ_ID_PORT_FDB:
 		return sizeof(struct switchdev_obj_port_fdb);
 	case SWITCHDEV_OBJ_ID_PORT_MDB:
@@ -1042,7 +1039,7 @@
 	struct nlmsghdr *nlh;
 	struct ndmsg *ndm;
 
-	if (dump->idx < dump->cb->args[0])
+	if (dump->idx < dump->cb->args[2])
 		goto skip;
 
 	nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH,
@@ -1089,7 +1086,7 @@
  */
 int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 			    struct net_device *dev,
-			    struct net_device *filter_dev, int idx)
+			    struct net_device *filter_dev, int *idx)
 {
 	struct switchdev_fdb_dump dump = {
 		.fdb.obj.orig_dev = dev,
@@ -1097,207 +1094,27 @@
 		.dev = dev,
 		.skb = skb,
 		.cb = cb,
-		.idx = idx,
+		.idx = *idx,
 	};
 	int err;
 
 	err = switchdev_port_obj_dump(dev, &dump.fdb.obj,
 				      switchdev_port_fdb_dump_cb);
-	cb->args[1] = err;
-	return dump.idx;
+	*idx = dump.idx;
+	return err;
 }
 EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
 
-static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
-{
-	const struct switchdev_ops *ops = dev->switchdev_ops;
-	struct net_device *lower_dev;
-	struct net_device *port_dev;
-	struct list_head *iter;
-
-	/* Recusively search down until we find a sw port dev.
-	 * (A sw port dev supports switchdev_port_attr_get).
-	 */
-
-	if (ops && ops->switchdev_port_attr_get)
-		return dev;
-
-	netdev_for_each_lower_dev(dev, lower_dev, iter) {
-		port_dev = switchdev_get_lowest_dev(lower_dev);
-		if (port_dev)
-			return port_dev;
-	}
-
-	return NULL;
-}
-
-static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
-{
-	struct switchdev_attr attr = {
-		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-	};
-	struct switchdev_attr prev_attr;
-	struct net_device *dev = NULL;
-	int nhsel;
-
-	ASSERT_RTNL();
-
-	/* For this route, all nexthop devs must be on the same switch. */
-
-	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-		const struct fib_nh *nh = &fi->fib_nh[nhsel];
-
-		if (!nh->nh_dev)
-			return NULL;
-
-		dev = switchdev_get_lowest_dev(nh->nh_dev);
-		if (!dev)
-			return NULL;
-
-		attr.orig_dev = dev;
-		if (switchdev_port_attr_get(dev, &attr))
-			return NULL;
-
-		if (nhsel > 0 &&
-		    !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
-				return NULL;
-
-		prev_attr = attr;
-	}
-
-	return dev;
-}
-
-/**
- *	switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry
- *
- *	@dst: route's IPv4 destination address
- *	@dst_len: destination address length (prefix length)
- *	@fi: route FIB info structure
- *	@tos: route TOS
- *	@type: route type
- *	@nlflags: netlink flags passed in (NLM_F_*)
- *	@tb_id: route table ID
- *
- *	Add/modify switch IPv4 route entry.
- */
-int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
-			   u8 tos, u8 type, u32 nlflags, u32 tb_id)
-{
-	struct switchdev_obj_ipv4_fib ipv4_fib = {
-		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
-		.dst = dst,
-		.dst_len = dst_len,
-		.fi = fi,
-		.tos = tos,
-		.type = type,
-		.nlflags = nlflags,
-		.tb_id = tb_id,
-	};
-	struct net_device *dev;
-	int err = 0;
-
-	/* Don't offload route if using custom ip rules or if
-	 * IPv4 FIB offloading has been disabled completely.
-	 */
-
-#ifdef CONFIG_IP_MULTIPLE_TABLES
-	if (fi->fib_net->ipv4.fib_has_custom_rules)
-		return 0;
-#endif
-
-	if (fi->fib_net->ipv4.fib_offload_disabled)
-		return 0;
-
-	dev = switchdev_get_dev_by_nhs(fi);
-	if (!dev)
-		return 0;
-
-	ipv4_fib.obj.orig_dev = dev;
-	err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
-	if (!err)
-		fi->fib_flags |= RTNH_F_OFFLOAD;
-
-	return err == -EOPNOTSUPP ? 0 : err;
-}
-EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
-
-/**
- *	switchdev_fib_ipv4_del - Delete IPv4 route entry from switch
- *
- *	@dst: route's IPv4 destination address
- *	@dst_len: destination address length (prefix length)
- *	@fi: route FIB info structure
- *	@tos: route TOS
- *	@type: route type
- *	@tb_id: route table ID
- *
- *	Delete IPv4 route entry from switch device.
- */
-int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
-			   u8 tos, u8 type, u32 tb_id)
-{
-	struct switchdev_obj_ipv4_fib ipv4_fib = {
-		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
-		.dst = dst,
-		.dst_len = dst_len,
-		.fi = fi,
-		.tos = tos,
-		.type = type,
-		.nlflags = 0,
-		.tb_id = tb_id,
-	};
-	struct net_device *dev;
-	int err = 0;
-
-	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
-		return 0;
-
-	dev = switchdev_get_dev_by_nhs(fi);
-	if (!dev)
-		return 0;
-
-	ipv4_fib.obj.orig_dev = dev;
-	err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
-	if (!err)
-		fi->fib_flags &= ~RTNH_F_OFFLOAD;
-
-	return err == -EOPNOTSUPP ? 0 : err;
-}
-EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del);
-
-/**
- *	switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation
- *
- *	@fi: route FIB info structure
- */
-void switchdev_fib_ipv4_abort(struct fib_info *fi)
-{
-	/* There was a problem installing this route to the offload
-	 * device.  For now, until we come up with more refined
-	 * policy handling, abruptly end IPv4 fib offloading for
-	 * for entire net by flushing offload device(s) of all
-	 * IPv4 routes, and mark IPv4 fib offloading broken from
-	 * this point forward.
-	 */
-
-	fib_flush_external(fi->fib_net);
-	fi->fib_net->ipv4.fib_offload_disabled = true;
-}
-EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
-
 bool switchdev_port_same_parent_id(struct net_device *a,
 				   struct net_device *b)
 {
 	struct switchdev_attr a_attr = {
 		.orig_dev = a,
 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-		.flags = SWITCHDEV_F_NO_RECURSE,
 	};
 	struct switchdev_attr b_attr = {
 		.orig_dev = b,
 		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
-		.flags = SWITCHDEV_F_NO_RECURSE,
 	};
 
 	if (switchdev_port_attr_get(a, &a_attr) ||
@@ -1306,89 +1123,4 @@
 
 	return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
 }
-
-static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
-				       struct net_device *group_dev)
-{
-	struct net_device *lower_dev;
-	struct list_head *iter;
-
-	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
-		if (lower_dev == dev)
-			continue;
-		if (switchdev_port_same_parent_id(dev, lower_dev))
-			return lower_dev->offload_fwd_mark;
-		return switchdev_port_fwd_mark_get(dev, lower_dev);
-	}
-
-	return dev->ifindex;
-}
 EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
-
-static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
-					  u32 old_mark, u32 *reset_mark)
-{
-	struct net_device *lower_dev;
-	struct list_head *iter;
-
-	netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
-		if (lower_dev->offload_fwd_mark == old_mark) {
-			if (!*reset_mark)
-				*reset_mark = lower_dev->ifindex;
-			lower_dev->offload_fwd_mark = *reset_mark;
-		}
-		switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
-	}
-}
-
-/**
- *	switchdev_port_fwd_mark_set - Set port offload forwarding mark
- *
- *	@dev: port device
- *	@group_dev: containing device
- *	@joining: true if dev is joining group; false if leaving group
- *
- *	An ungrouped port's offload mark is just its ifindex.  A grouped
- *	port's (member of a bridge, for example) offload mark is the ifindex
- *	of one of the ports in the group with the same parent (switch) ID.
- *	Ports on the same device in the same group will have the same mark.
- *
- *	Example:
- *
- *		br0		ifindex=9
- *		  sw1p1		ifindex=2	mark=2
- *		  sw1p2		ifindex=3	mark=2
- *		  sw2p1		ifindex=4	mark=5
- *		  sw2p2		ifindex=5	mark=5
- *
- *	If sw2p2 leaves the bridge, we'll have:
- *
- *		br0		ifindex=9
- *		  sw1p1		ifindex=2	mark=2
- *		  sw1p2		ifindex=3	mark=2
- *		  sw2p1		ifindex=4	mark=4
- *		sw2p2		ifindex=5	mark=5
- */
-void switchdev_port_fwd_mark_set(struct net_device *dev,
-				 struct net_device *group_dev,
-				 bool joining)
-{
-	u32 mark = dev->ifindex;
-	u32 reset_mark = 0;
-
-	if (group_dev) {
-		ASSERT_RTNL();
-		if (joining)
-			mark = switchdev_port_fwd_mark_get(dev, group_dev);
-		else if (dev->offload_fwd_mark == mark)
-			/* Ohoh, this port was the mark reference port,
-			 * but it's leaving the group, so reset the
-			 * mark for the remaining ports in the group.
-			 */
-			switchdev_port_fwd_mark_reset(group_dev, mark,
-						      &reset_mark);
-	}
-
-	dev->offload_fwd_mark = mark;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 46a71c7..e0c71bd 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -42,26 +42,37 @@
 			       struct ctl_table *table)
 {
 	struct net *net = container_of(head->set, struct net, sysctls);
-	kuid_t root_uid = make_kuid(net->user_ns, 0);
-	kgid_t root_gid = make_kgid(net->user_ns, 0);
 
 	/* Allow network administrator to have same access as root. */
-	if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) ||
-	    uid_eq(root_uid, current_euid())) {
+	if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN)) {
 		int mode = (table->mode >> 6) & 7;
 		return (mode << 6) | (mode << 3) | mode;
 	}
-	/* Allow netns root group to have the same access as the root group */
-	if (in_egroup_p(root_gid)) {
-		int mode = (table->mode >> 3) & 7;
-		return (mode << 3) | mode;
-	}
+
 	return table->mode;
 }
 
+static void net_ctl_set_ownership(struct ctl_table_header *head,
+				  struct ctl_table *table,
+				  kuid_t *uid, kgid_t *gid)
+{
+	struct net *net = container_of(head->set, struct net, sysctls);
+	kuid_t ns_root_uid;
+	kgid_t ns_root_gid;
+
+	ns_root_uid = make_kuid(net->user_ns, 0);
+	if (uid_valid(ns_root_uid))
+		*uid = ns_root_uid;
+
+	ns_root_gid = make_kgid(net->user_ns, 0);
+	if (gid_valid(ns_root_gid))
+		*gid = ns_root_gid;
+}
+
 static struct ctl_table_root net_sysctl_root = {
 	.lookup = net_ctl_header_lookup,
 	.permissions = net_ctl_permissions,
+	.set_ownership = net_ctl_set_ownership,
 };
 
 static int __net_init sysctl_net_init(struct net *net)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index ae469b3..753f774 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -269,18 +269,19 @@
  *
  * RCU is locked, no other locks set
  */
-void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
-			 struct tipc_msg *hdr)
+int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+			struct tipc_msg *hdr)
 {
 	struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
 	struct sk_buff_head xmitq;
+	int rc = 0;
 
 	__skb_queue_head_init(&xmitq);
 
 	tipc_bcast_lock(net);
 	if (msg_type(hdr) == STATE_MSG) {
 		tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
-		tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+		rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
 	} else {
 		tipc_link_bc_init_rcv(l, hdr);
 	}
@@ -291,6 +292,7 @@
 	/* Any socket wakeup messages ? */
 	if (!skb_queue_empty(inputq))
 		tipc_sk_rcv(net, inputq);
+	return rc;
 }
 
 /* tipc_bcast_add_peer - add a peer node to broadcast link and bearer
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index d5e79b3..5ffe344 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -56,8 +56,8 @@
 int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list);
 int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
 void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked);
-void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
-			 struct tipc_msg *hdr);
+int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
+			struct tipc_msg *hdr);
 int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
 int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
 int tipc_bclink_reset_stats(struct net *net);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 65b1bbf..975dbeb 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -42,6 +42,7 @@
 #include "monitor.h"
 #include "bcast.h"
 #include "netlink.h"
+#include "udp_media.h"
 
 #define MAX_ADDR_STR 60
 
@@ -56,6 +57,13 @@
 	NULL
 };
 
+static struct tipc_bearer *bearer_get(struct net *net, int bearer_id)
+{
+	struct tipc_net *tn = tipc_net(net);
+
+	return rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+}
+
 static void bearer_disable(struct net *net, struct tipc_bearer *b);
 
 /**
@@ -323,6 +331,7 @@
 	b->domain = disc_domain;
 	b->net_plane = bearer_id + 'A';
 	b->priority = priority;
+	test_and_set_bit_lock(0, &b->up);
 
 	res = tipc_disc_create(net, b, &b->bcast_addr, &skb);
 	if (res) {
@@ -360,15 +369,24 @@
  */
 void tipc_bearer_reset_all(struct net *net)
 {
-	struct tipc_net *tn = tipc_net(net);
 	struct tipc_bearer *b;
 	int i;
 
 	for (i = 0; i < MAX_BEARERS; i++) {
-		b = rcu_dereference_rtnl(tn->bearer_list[i]);
+		b = bearer_get(net, i);
+		if (b)
+			clear_bit_unlock(0, &b->up);
+	}
+	for (i = 0; i < MAX_BEARERS; i++) {
+		b = bearer_get(net, i);
 		if (b)
 			tipc_reset_bearer(net, b);
 	}
+	for (i = 0; i < MAX_BEARERS; i++) {
+		b = bearer_get(net, i);
+		if (b)
+			test_and_set_bit_lock(0, &b->up);
+	}
 }
 
 /**
@@ -382,8 +400,9 @@
 	int bearer_id = b->identity;
 
 	pr_info("Disabling bearer <%s>\n", b->name);
-	b->media->disable_media(b);
+	clear_bit_unlock(0, &b->up);
 	tipc_node_delete_links(net, bearer_id);
+	b->media->disable_media(b);
 	RCU_INIT_POINTER(b->media_ptr, NULL);
 	if (b->link_req)
 		tipc_disc_delete(b->link_req);
@@ -440,22 +459,16 @@
 {
 	struct net_device *dev;
 	int delta;
-	void *tipc_ptr;
 
 	dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr);
 	if (!dev)
 		return 0;
 
-	/* Send RESET message even if bearer is detached from device */
-	tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr);
-	if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb))))
-		goto drop;
-
-	delta = dev->hard_header_len - skb_headroom(skb);
-	if ((delta > 0) &&
-	    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
-		goto drop;
-
+	delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb));
+	if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) {
+		kfree_skb(skb);
+		return 0;
+	}
 	skb_reset_network_header(skb);
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_TIPC);
@@ -463,9 +476,6 @@
 			dev->dev_addr, skb->len);
 	dev_queue_xmit(skb);
 	return 0;
-drop:
-	kfree_skb(skb);
-	return 0;
 }
 
 int tipc_bearer_mtu(struct net *net, u32 bearer_id)
@@ -487,12 +497,12 @@
 			  struct sk_buff *skb,
 			  struct tipc_media_addr *dest)
 {
-	struct tipc_net *tn = tipc_net(net);
+	struct tipc_msg *hdr = buf_msg(skb);
 	struct tipc_bearer *b;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-	if (likely(b))
+	b = bearer_get(net, bearer_id);
+	if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr))))
 		b->media->send_msg(net, skb, b, dest);
 	else
 		kfree_skb(skb);
@@ -505,7 +515,6 @@
 		      struct sk_buff_head *xmitq,
 		      struct tipc_media_addr *dst)
 {
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b;
 	struct sk_buff *skb, *tmp;
 
@@ -513,12 +522,15 @@
 		return;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+	b = bearer_get(net, bearer_id);
 	if (unlikely(!b))
 		__skb_queue_purge(xmitq);
 	skb_queue_walk_safe(xmitq, skb, tmp) {
 		__skb_dequeue(xmitq);
-		b->media->send_msg(net, skb, b, dst);
+		if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb))))
+			b->media->send_msg(net, skb, b, dst);
+		else
+			kfree_skb(skb);
 	}
 	rcu_read_unlock();
 }
@@ -535,8 +547,8 @@
 	struct tipc_msg *hdr;
 
 	rcu_read_lock();
-	b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
-	if (unlikely(!b))
+	b = bearer_get(net, bearer_id);
+	if (unlikely(!b || !test_bit(0, &b->up)))
 		__skb_queue_purge(xmitq);
 	skb_queue_walk_safe(xmitq, skb, tmp) {
 		hdr = buf_msg(skb);
@@ -566,7 +578,8 @@
 
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(dev->tipc_ptr);
-	if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) {
+	if (likely(b && test_bit(0, &b->up) &&
+		   (skb->pkt_type <= PACKET_BROADCAST))) {
 		skb->next = NULL;
 		tipc_rcv(dev_net(dev), skb, b);
 		rcu_read_unlock();
@@ -591,18 +604,9 @@
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct net *net = dev_net(dev);
-	struct tipc_net *tn = tipc_net(net);
 	struct tipc_bearer *b;
-	int i;
 
 	b = rtnl_dereference(dev->tipc_ptr);
-	if (!b) {
-		for (i = 0; i < MAX_BEARERS; b = NULL, i++) {
-			b = rtnl_dereference(tn->bearer_list[i]);
-			if (b && (b->media_ptr == dev))
-				break;
-		}
-	}
 	if (!b)
 		return NOTIFY_DONE;
 
@@ -613,11 +617,10 @@
 		if (netif_carrier_ok(dev))
 			break;
 	case NETDEV_UP:
-		rcu_assign_pointer(dev->tipc_ptr, b);
+		test_and_set_bit_lock(0, &b->up);
 		break;
 	case NETDEV_GOING_DOWN:
-		RCU_INIT_POINTER(dev->tipc_ptr, NULL);
-		synchronize_net();
+		clear_bit_unlock(0, &b->up);
 		tipc_reset_bearer(net, b);
 		break;
 	case NETDEV_CHANGEMTU:
@@ -709,6 +712,14 @@
 		goto prop_msg_full;
 
 	nla_nest_end(msg->skb, prop);
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) {
+		if (tipc_udp_nl_add_bearer_data(msg, bearer))
+			goto attr_msg_full;
+	}
+#endif
+
 	nla_nest_end(msg->skb, attrs);
 	genlmsg_end(msg->skb, hdr);
 
@@ -895,6 +906,49 @@
 	return 0;
 }
 
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+	char *name;
+	struct tipc_bearer *b;
+	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+
+	if (!info->attrs[TIPC_NLA_BEARER])
+		return -EINVAL;
+
+	err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
+			       info->attrs[TIPC_NLA_BEARER],
+			       tipc_nl_bearer_policy);
+	if (err)
+		return err;
+
+	if (!attrs[TIPC_NLA_BEARER_NAME])
+		return -EINVAL;
+	name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
+
+	rtnl_lock();
+	b = tipc_bearer_find(net, name);
+	if (!b) {
+		rtnl_unlock();
+		return -EINVAL;
+	}
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+		err = tipc_udp_nl_bearer_add(b,
+					     attrs[TIPC_NLA_BEARER_UDP_OPTS]);
+		if (err) {
+			rtnl_unlock();
+			return err;
+		}
+	}
+#endif
+	rtnl_unlock();
+
+	return 0;
+}
+
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
 {
 	int err;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 43757f1..78892e2f 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -150,6 +150,7 @@
 	u32 identity;
 	struct tipc_link_req *link_req;
 	char net_plane;
+	unsigned long up;
 };
 
 struct tipc_bearer_names {
@@ -180,6 +181,7 @@
 int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
 int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 877d94f..b36e16c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -181,7 +181,10 @@
 	u16 acked;
 	struct tipc_link *bc_rcvlink;
 	struct tipc_link *bc_sndlink;
-	int nack_state;
+	unsigned long prev_retr;
+	u16 prev_from;
+	u16 prev_to;
+	u8 nack_state;
 	bool bc_peer_is_up;
 
 	/* Statistics */
@@ -202,6 +205,8 @@
 	BC_NACK_SND_SUPPRESS,
 };
 
+#define TIPC_BC_RETR_LIMIT 10   /* [ms] */
+
 /*
  * Interval between NACKs when packets arrive out of order
  */
@@ -237,8 +242,8 @@
 				      u16 rcvgap, int tolerance, int priority,
 				      struct sk_buff_head *xmitq);
 static void link_print(struct tipc_link *l, const char *str);
-static void tipc_link_build_nack_msg(struct tipc_link *l,
-				     struct sk_buff_head *xmitq);
+static int tipc_link_build_nack_msg(struct tipc_link *l,
+				    struct sk_buff_head *xmitq);
 static void tipc_link_build_bc_init_msg(struct tipc_link *l,
 					struct sk_buff_head *xmitq);
 static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
@@ -367,6 +372,18 @@
 	return l->ackers;
 }
 
+u16 link_bc_rcv_gap(struct tipc_link *l)
+{
+	struct sk_buff *skb = skb_peek(&l->deferdq);
+	u16 gap = 0;
+
+	if (more(l->snd_nxt, l->rcv_nxt))
+		gap = l->snd_nxt - l->rcv_nxt;
+	if (skb)
+		gap = buf_seqno(skb) - l->rcv_nxt;
+	return gap;
+}
+
 void tipc_link_set_mtu(struct tipc_link *l, int mtu)
 {
 	l->mtu = mtu;
@@ -807,7 +824,7 @@
 
 	skb_queue_walk_safe(&l->wakeupq, skb, tmp) {
 		imp = TIPC_SKB_CB(skb)->chain_imp;
-		lim = l->window + l->backlog[imp].limit;
+		lim = l->backlog[imp].limit;
 		pnd[imp] += TIPC_SKB_CB(skb)->chain_sz;
 		if ((pnd[imp] + l->backlog[imp].len) >= lim)
 			break;
@@ -873,9 +890,11 @@
 	struct sk_buff *skb, *_skb, *bskb;
 
 	/* Match msg importance against this and all higher backlog limits: */
-	for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
-		if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
-			return link_schedule_user(l, list);
+	if (!skb_queue_empty(backlogq)) {
+		for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
+			if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
+				return link_schedule_user(l, list);
+		}
 	}
 	if (unlikely(msg_size(hdr) > mtu)) {
 		skb_queue_purge(list);
@@ -1133,7 +1152,10 @@
 		if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf)
 			return 0;
 		l->rcv_unacked = 0;
-		return TIPC_LINK_SND_BC_ACK;
+
+		/* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */
+		l->snd_nxt = l->rcv_nxt;
+		return TIPC_LINK_SND_STATE;
 	}
 
 	/* Unicast ACK */
@@ -1162,17 +1184,26 @@
 }
 
 /* tipc_link_build_nack_msg: prepare link nack message for transmission
+ * Note that sending of broadcast NACK is coordinated among nodes, to
+ * reduce the risk of NACK storms towards the sender
  */
-static void tipc_link_build_nack_msg(struct tipc_link *l,
-				     struct sk_buff_head *xmitq)
+static int tipc_link_build_nack_msg(struct tipc_link *l,
+				    struct sk_buff_head *xmitq)
 {
 	u32 def_cnt = ++l->stats.deferred_recv;
+	int match1, match2;
 
-	if (link_is_bc_rcvlink(l))
-		return;
+	if (link_is_bc_rcvlink(l)) {
+		match1 = def_cnt & 0xf;
+		match2 = tipc_own_addr(l->net) & 0xf;
+		if (match1 == match2)
+			return TIPC_LINK_SND_STATE;
+		return 0;
+	}
 
 	if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
 		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+	return 0;
 }
 
 /* tipc_link_rcv - process TIPC packets/messages arriving from off-node
@@ -1223,7 +1254,7 @@
 		/* Defer delivery if sequence gap */
 		if (unlikely(seqno != rcv_nxt)) {
 			__tipc_skb_queue_sorted(defq, seqno, skb);
-			tipc_link_build_nack_msg(l, xmitq);
+			rc |= tipc_link_build_nack_msg(l, xmitq);
 			break;
 		}
 
@@ -1234,7 +1265,7 @@
 			rc |= tipc_link_input(l, skb, l->inputq);
 		if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
 			rc |= tipc_link_build_state_msg(l, xmitq);
-		if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK))
+		if (unlikely(rc & ~TIPC_LINK_SND_STATE))
 			break;
 	} while ((skb = __skb_dequeue(defq)));
 
@@ -1248,10 +1279,11 @@
 				      u16 rcvgap, int tolerance, int priority,
 				      struct sk_buff_head *xmitq)
 {
+	struct tipc_link *bcl = l->bc_rcvlink;
 	struct sk_buff *skb;
 	struct tipc_msg *hdr;
 	struct sk_buff_head *dfq = &l->deferdq;
-	bool node_up = link_is_up(l->bc_rcvlink);
+	bool node_up = link_is_up(bcl);
 	struct tipc_mon_state *mstate = &l->mon_state;
 	int dlen = 0;
 	void *data;
@@ -1279,7 +1311,7 @@
 	msg_set_net_plane(hdr, l->net_plane);
 	msg_set_next_sent(hdr, l->snd_nxt);
 	msg_set_ack(hdr, l->rcv_nxt - 1);
-	msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1);
+	msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
 	msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1);
 	msg_set_link_tolerance(hdr, tolerance);
 	msg_set_linkprio(hdr, priority);
@@ -1289,6 +1321,7 @@
 
 	if (mtyp == STATE_MSG) {
 		msg_set_seq_gap(hdr, rcvgap);
+		msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
 		msg_set_probe(hdr, probe);
 		tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id);
 		msg_set_size(hdr, INT_H_SIZE + dlen);
@@ -1571,51 +1604,107 @@
 		l->rcv_nxt = peers_snd_nxt;
 }
 
+/* link_bc_retr eval()- check if the indicated range can be retransmitted now
+ * - Adjust permitted range if there is overlap with previous retransmission
+ */
+static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to)
+{
+	unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr);
+
+	if (less(*to, *from))
+		return false;
+
+	/* New retransmission request */
+	if ((elapsed > TIPC_BC_RETR_LIMIT) ||
+	    less(*to, l->prev_from) || more(*from, l->prev_to)) {
+		l->prev_from = *from;
+		l->prev_to = *to;
+		l->prev_retr = jiffies;
+		return true;
+	}
+
+	/* Inside range of previous retransmit */
+	if (!less(*from, l->prev_from) && !more(*to, l->prev_to))
+		return false;
+
+	/* Fully or partially outside previous range => exclude overlap */
+	if (less(*from, l->prev_from)) {
+		*to = l->prev_from - 1;
+		l->prev_from = *from;
+	}
+	if (more(*to, l->prev_to)) {
+		*from = l->prev_to + 1;
+		l->prev_to = *to;
+	}
+	l->prev_retr = jiffies;
+	return true;
+}
+
 /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state
  */
-void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
-			   struct sk_buff_head *xmitq)
+int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
+			  struct sk_buff_head *xmitq)
 {
+	struct tipc_link *snd_l = l->bc_sndlink;
 	u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
+	u16 from = msg_bcast_ack(hdr) + 1;
+	u16 to = from + msg_bc_gap(hdr) - 1;
+	int rc = 0;
 
 	if (!link_is_up(l))
-		return;
+		return rc;
 
 	if (!msg_peer_node_is_up(hdr))
-		return;
+		return rc;
 
 	/* Open when peer ackowledges our bcast init msg (pkt #1) */
 	if (msg_ack(hdr))
 		l->bc_peer_is_up = true;
 
 	if (!l->bc_peer_is_up)
-		return;
+		return rc;
+
+	l->stats.recv_nacks++;
 
 	/* Ignore if peers_snd_nxt goes beyond receive window */
 	if (more(peers_snd_nxt, l->rcv_nxt + l->window))
-		return;
+		return rc;
+
+	if (link_bc_retr_eval(snd_l, &from, &to))
+		rc = tipc_link_retrans(snd_l, from, to, xmitq);
+
+	l->snd_nxt = peers_snd_nxt;
+	if (link_bc_rcv_gap(l))
+		rc |= TIPC_LINK_SND_STATE;
+
+	/* Return now if sender supports nack via STATE messages */
+	if (l->peer_caps & TIPC_BCAST_STATE_NACK)
+		return rc;
+
+	/* Otherwise, be backwards compatible */
 
 	if (!more(peers_snd_nxt, l->rcv_nxt)) {
 		l->nack_state = BC_NACK_SND_CONDITIONAL;
-		return;
+		return 0;
 	}
 
 	/* Don't NACK if one was recently sent or peeked */
 	if (l->nack_state == BC_NACK_SND_SUPPRESS) {
 		l->nack_state = BC_NACK_SND_UNCONDITIONAL;
-		return;
+		return 0;
 	}
 
 	/* Conditionally delay NACK sending until next synch rcv */
 	if (l->nack_state == BC_NACK_SND_CONDITIONAL) {
 		l->nack_state = BC_NACK_SND_UNCONDITIONAL;
 		if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN)
-			return;
+			return 0;
 	}
 
 	/* Send NACK now but suppress next one */
 	tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq);
 	l->nack_state = BC_NACK_SND_SUPPRESS;
+	return 0;
 }
 
 void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
@@ -1652,6 +1741,8 @@
 }
 
 /* tipc_link_bc_nack_rcv(): receive broadcast nack message
+ * This function is here for backwards compatibility, since
+ * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5.
  */
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 			  struct sk_buff_head *xmitq)
@@ -1692,10 +1783,10 @@
 	int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE);
 
 	l->window = win;
-	l->backlog[TIPC_LOW_IMPORTANCE].limit      = win / 2;
-	l->backlog[TIPC_MEDIUM_IMPORTANCE].limit   = win;
-	l->backlog[TIPC_HIGH_IMPORTANCE].limit     = win / 2 * 3;
-	l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2;
+	l->backlog[TIPC_LOW_IMPORTANCE].limit      = max_t(u16, 50, win);
+	l->backlog[TIPC_MEDIUM_IMPORTANCE].limit   = max_t(u16, 100, win * 2);
+	l->backlog[TIPC_HIGH_IMPORTANCE].limit     = max_t(u16, 150, win * 3);
+	l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4);
 	l->backlog[TIPC_SYSTEM_IMPORTANCE].limit   = max_bulk;
 }
 
diff --git a/net/tipc/link.h b/net/tipc/link.h
index d7e9d42..d1bd178 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -63,7 +63,7 @@
 enum {
 	TIPC_LINK_UP_EVT       = 1,
 	TIPC_LINK_DOWN_EVT     = (1 << 1),
-	TIPC_LINK_SND_BC_ACK   = (1 << 2)
+	TIPC_LINK_SND_STATE    = (1 << 2)
 };
 
 /* Starting value for maximum packet size negotiation on unicast links
@@ -138,8 +138,8 @@
 void tipc_link_build_bc_sync_msg(struct tipc_link *l,
 				 struct sk_buff_head *xmitq);
 void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
-void tipc_link_bc_sync_rcv(struct tipc_link *l,   struct tipc_msg *hdr,
-			   struct sk_buff_head *xmitq);
+int tipc_link_bc_sync_rcv(struct tipc_link *l,   struct tipc_msg *hdr,
+			  struct sk_buff_head *xmitq);
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 			  struct sk_buff_head *xmitq);
 #endif
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index b62caa1..ed97a58 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -728,12 +728,13 @@
 			     u32 bearer_id, u32 *prev_node)
 {
 	struct tipc_monitor *mon = tipc_monitor(net, bearer_id);
-	struct tipc_peer *peer = mon->self;
+	struct tipc_peer *peer;
 
 	if (!mon)
 		return -EINVAL;
 
 	read_lock_bh(&mon->lock);
+	peer = mon->self;
 	do {
 		if (*prev_node) {
 			if (peer->addr == *prev_node)
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 7cf52fb..c3832cd 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -719,6 +719,16 @@
 	return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET];
 }
 
+static inline u32 msg_bc_gap(struct tipc_msg *m)
+{
+	return msg_bits(m, 8, 0, 0x3ff);
+}
+
+static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n)
+{
+	msg_set_bits(m, 8, 0, 0x3ff, n);
+}
+
 /*
  * Word 9
  */
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 6b626a6..a04fe9b 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -62,6 +62,8 @@
 
 /**
  * named_prepare_buf - allocate & initialize a publication message
+ *
+ * The buffer returned is of size INT_H_SIZE + payload size
  */
 static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size,
 					 u32 dest)
@@ -141,9 +143,9 @@
 	struct publication *publ;
 	struct sk_buff *skb = NULL;
 	struct distr_item *item = NULL;
-	uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) *
-			ITEM_SIZE;
-	uint msg_rem = msg_dsz;
+	u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) /
+			ITEM_SIZE) * ITEM_SIZE;
+	u32 msg_rem = msg_dsz;
 
 	list_for_each_entry(publ, pls, local_list) {
 		/* Prepare next buffer: */
diff --git a/net/tipc/net.h b/net/tipc/net.h
index 77a7a11..c7c2549 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -39,6 +39,8 @@
 
 #include <net/genetlink.h>
 
+extern const struct nla_policy tipc_nl_net_policy[];
+
 int tipc_net_start(struct net *net, u32 addr);
 
 void tipc_net_stop(struct net *net);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index a84daec..3200059 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -41,6 +41,7 @@
 #include "link.h"
 #include "node.h"
 #include "net.h"
+#include "udp_media.h"
 #include <net/genetlink.h>
 
 static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = {
@@ -161,6 +162,11 @@
 		.policy = tipc_nl_policy,
 	},
 	{
+		.cmd	= TIPC_NL_BEARER_ADD,
+		.doit	= tipc_nl_bearer_add,
+		.policy = tipc_nl_policy,
+	},
+	{
 		.cmd	= TIPC_NL_BEARER_SET,
 		.doit	= tipc_nl_bearer_set,
 		.policy = tipc_nl_policy,
@@ -238,6 +244,18 @@
 		.dumpit	= tipc_nl_node_dump_monitor_peer,
 		.policy = tipc_nl_policy,
 	},
+	{
+		.cmd	= TIPC_NL_PEER_REMOVE,
+		.doit	= tipc_nl_peer_rm,
+		.policy = tipc_nl_policy,
+	},
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	{
+		.cmd	= TIPC_NL_UDP_GET_REMOTEIP,
+		.dumpit	= tipc_udp_nl_dump_remoteip,
+		.policy = tipc_nl_policy,
+	},
+#endif
 };
 
 int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2197419..7ef14e2 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1262,6 +1262,34 @@
 	kfree_skb(skb);
 }
 
+static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
+				  int bearer_id, struct sk_buff_head *xmitq)
+{
+	struct tipc_link *ucl;
+	int rc;
+
+	rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
+
+	if (rc & TIPC_LINK_DOWN_EVT) {
+		tipc_bearer_reset_all(n->net);
+		return;
+	}
+
+	if (!(rc & TIPC_LINK_SND_STATE))
+		return;
+
+	/* If probe message, a STATE response will be sent anyway */
+	if (msg_probe(hdr))
+		return;
+
+	/* Produce a STATE message carrying broadcast NACK */
+	tipc_node_read_lock(n);
+	ucl = n->links[bearer_id].link;
+	if (ucl)
+		tipc_link_build_state_msg(ucl, xmitq);
+	tipc_node_read_unlock(n);
+}
+
 /**
  * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node
  * @net: the applicable net namespace
@@ -1298,7 +1326,7 @@
 	rc = tipc_bcast_rcv(net, be->link, skb);
 
 	/* Broadcast ACKs are sent on a unicast link */
-	if (rc & TIPC_LINK_SND_BC_ACK) {
+	if (rc & TIPC_LINK_SND_STATE) {
 		tipc_node_read_lock(n);
 		tipc_link_build_state_msg(le->link, &xmitq);
 		tipc_node_read_unlock(n);
@@ -1505,7 +1533,7 @@
 
 	/* Ensure broadcast reception is in synch with peer's send state */
 	if (unlikely(usr == LINK_PROTOCOL))
-		tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr);
+		tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
 	else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
 		tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack);
 
@@ -1553,6 +1581,69 @@
 	kfree_skb(skb);
 }
 
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
+	struct nlattr *attrs[TIPC_NLA_NET_MAX + 1];
+	struct tipc_node *peer;
+	u32 addr;
+	int err;
+	int i;
+
+	/* We identify the peer by its net */
+	if (!info->attrs[TIPC_NLA_NET])
+		return -EINVAL;
+
+	err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX,
+			       info->attrs[TIPC_NLA_NET],
+			       tipc_nl_net_policy);
+	if (err)
+		return err;
+
+	if (!attrs[TIPC_NLA_NET_ADDR])
+		return -EINVAL;
+
+	addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]);
+
+	if (in_own_node(net, addr))
+		return -ENOTSUPP;
+
+	spin_lock_bh(&tn->node_list_lock);
+	peer = tipc_node_find(net, addr);
+	if (!peer) {
+		spin_unlock_bh(&tn->node_list_lock);
+		return -ENXIO;
+	}
+
+	tipc_node_write_lock(peer);
+	if (peer->state != SELF_DOWN_PEER_DOWN &&
+	    peer->state != SELF_DOWN_PEER_LEAVING) {
+		tipc_node_write_unlock(peer);
+		err = -EBUSY;
+		goto err_out;
+	}
+
+	for (i = 0; i < MAX_BEARERS; i++) {
+		struct tipc_link_entry *le = &peer->links[i];
+
+		if (le->link) {
+			kfree(le->link);
+			le->link = NULL;
+			peer->link_cnt--;
+		}
+	}
+	tipc_node_write_unlock(peer);
+	tipc_node_delete(peer);
+
+	err = 0;
+err_out:
+	tipc_node_put(peer);
+	spin_unlock_bh(&tn->node_list_lock);
+
+	return err;
+}
+
 int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int err;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index d69fdfc..39ef54c 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -1,7 +1,7 @@
 /*
  * net/tipc/node.h: Include file for TIPC node management routines
  *
- * Copyright (c) 2000-2006, 2014-2015, Ericsson AB
+ * Copyright (c) 2000-2006, 2014-2016, Ericsson AB
  * Copyright (c) 2005, 2010-2014, Wind River Systems
  * All rights reserved.
  *
@@ -45,11 +45,14 @@
 /* Optional capabilities supported by this code version
  */
 enum {
-	TIPC_BCAST_SYNCH   = (1 << 1),
-	TIPC_BLOCK_FLOWCTL = (2 << 1)
+	TIPC_BCAST_SYNCH      = (1 << 1),
+	TIPC_BCAST_STATE_NACK = (1 << 2),
+	TIPC_BLOCK_FLOWCTL    = (1 << 3)
 };
 
-#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL)
+#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
+				TIPC_BCAST_STATE_NACK | \
+				TIPC_BLOCK_FLOWCTL)
 #define INVALID_BEARER_ID -1
 
 void tipc_node_stop(struct net *net);
@@ -77,6 +80,7 @@
 int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info);
 
 int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c49b8df..f9f5f3c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2180,7 +2180,8 @@
 					      TIPC_CONN_MSG, SHORT_H_SIZE,
 					      0, dnode, onode, dport, oport,
 					      TIPC_CONN_SHUTDOWN);
-			tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
+			if (skb)
+				tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
 		}
 		tsk->connected = 0;
 		sock->state = SS_DISCONNECTING;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index b016c01..d80cd3f 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -49,6 +49,7 @@
 #include "core.h"
 #include "bearer.h"
 #include "netlink.h"
+#include "msg.h"
 
 /* IANA assigned UDP port */
 #define UDP_PORT_DEFAULT	6118
@@ -70,6 +71,13 @@
 	};
 };
 
+/* struct udp_replicast - container for UDP remote addresses */
+struct udp_replicast {
+	struct udp_media_addr addr;
+	struct rcu_head rcu;
+	struct list_head list;
+};
+
 /**
  * struct udp_bearer - ip/udp bearer data structure
  * @bearer:	associated generic tipc bearer
@@ -82,8 +90,20 @@
 	struct socket *ubsock;
 	u32 ifindex;
 	struct work_struct work;
+	struct udp_replicast rcast;
 };
 
+static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
+{
+	if (ntohs(addr->proto) == ETH_P_IP)
+		return ipv4_is_multicast(addr->ipv4.s_addr);
+#if IS_ENABLED(CONFIG_IPV6)
+	else
+		return ipv6_addr_is_multicast(&addr->ipv6);
+#endif
+	return 0;
+}
+
 /* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
 static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
 				    struct udp_media_addr *ua)
@@ -91,15 +111,9 @@
 	memset(addr, 0, sizeof(struct tipc_media_addr));
 	addr->media_id = TIPC_MEDIA_TYPE_UDP;
 	memcpy(addr->value, ua, sizeof(struct udp_media_addr));
-	if (ntohs(ua->proto) == ETH_P_IP) {
-		if (ipv4_is_multicast(ua->ipv4.s_addr))
-			addr->broadcast = 1;
-	} else if (ntohs(ua->proto) == ETH_P_IPV6) {
-		if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST)
-			addr->broadcast = 1;
-	} else {
-		pr_err("Invalid UDP media address\n");
-	}
+
+	if (tipc_udp_is_mcast_addr(ua))
+		addr->broadcast = 1;
 }
 
 /* tipc_udp_addr2str - convert ip/udp address to string */
@@ -140,28 +154,13 @@
 }
 
 /* tipc_send_msg - enqueue a send request */
-static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
-			     struct tipc_bearer *b,
-			     struct tipc_media_addr *dest)
+static int tipc_udp_xmit(struct net *net, struct sk_buff *skb,
+			 struct udp_bearer *ub, struct udp_media_addr *src,
+			 struct udp_media_addr *dst)
 {
 	int ttl, err = 0;
-	struct udp_bearer *ub;
-	struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
-	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
 	struct rtable *rt;
 
-	if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
-		err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
-		if (err)
-			goto tx_error;
-	}
-
-	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
-	ub = rcu_dereference_rtnl(b->media_ptr);
-	if (!ub) {
-		err = -ENODEV;
-		goto tx_error;
-	}
 	if (dst->proto == htons(ETH_P_IP)) {
 		struct flowi4 fl = {
 			.daddr = dst->ipv4.s_addr,
@@ -207,29 +206,178 @@
 	return err;
 }
 
+static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+			     struct tipc_bearer *b,
+			     struct tipc_media_addr *addr)
+{
+	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+	struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value;
+	struct udp_replicast *rcast;
+	struct udp_bearer *ub;
+	int err = 0;
+
+	if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+		err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+		if (err)
+			goto out;
+	}
+
+	skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	if (!addr->broadcast || list_empty(&ub->rcast.list))
+		return tipc_udp_xmit(net, skb, ub, src, dst);
+
+	/* Replicast, send an skb to each configured IP address */
+	list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
+		struct sk_buff *_skb;
+
+		_skb = pskb_copy(skb, GFP_ATOMIC);
+		if (!_skb) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
+		if (err) {
+			kfree_skb(_skb);
+			goto out;
+		}
+	}
+	err = 0;
+out:
+	kfree_skb(skb);
+	return err;
+}
+
+static bool tipc_udp_is_known_peer(struct tipc_bearer *b,
+				   struct udp_media_addr *addr)
+{
+	struct udp_replicast *rcast, *tmp;
+	struct udp_bearer *ub;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		pr_err_ratelimited("UDP bearer instance not found\n");
+		return false;
+	}
+
+	list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+		if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr)))
+			return true;
+	}
+
+	return false;
+}
+
+static int tipc_udp_rcast_add(struct tipc_bearer *b,
+			      struct udp_media_addr *addr)
+{
+	struct udp_replicast *rcast;
+	struct udp_bearer *ub;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub)
+		return -ENODEV;
+
+	rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
+	if (!rcast)
+		return -ENOMEM;
+
+	memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
+
+	if (ntohs(addr->proto) == ETH_P_IP)
+		pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (ntohs(addr->proto) == ETH_P_IPV6)
+		pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
+#endif
+
+	list_add_rcu(&rcast->list, &ub->rcast.list);
+	return 0;
+}
+
+static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb)
+{
+	struct udp_media_addr src = {0};
+	struct udp_media_addr *dst;
+
+	dst = (struct udp_media_addr *)&b->bcast_addr.value;
+	if (tipc_udp_is_mcast_addr(dst))
+		return 0;
+
+	src.port = udp_hdr(skb)->source;
+
+	if (ip_hdr(skb)->version == 4) {
+		struct iphdr *iphdr = ip_hdr(skb);
+
+		src.proto = htons(ETH_P_IP);
+		src.ipv4.s_addr = iphdr->saddr;
+		if (ipv4_is_multicast(iphdr->daddr))
+			return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (ip_hdr(skb)->version == 6) {
+		struct ipv6hdr *iphdr = ipv6_hdr(skb);
+
+		src.proto = htons(ETH_P_IPV6);
+		src.ipv6 = iphdr->saddr;
+		if (ipv6_addr_is_multicast(&iphdr->daddr))
+			return 0;
+#endif
+	} else {
+		return 0;
+	}
+
+	if (likely(tipc_udp_is_known_peer(b, &src)))
+		return 0;
+
+	return tipc_udp_rcast_add(b, &src);
+}
+
 /* tipc_udp_recv - read data from bearer socket */
 static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_bearer *ub;
 	struct tipc_bearer *b;
+	struct tipc_msg *hdr;
+	int err;
 
 	ub = rcu_dereference_sk_user_data(sk);
 	if (!ub) {
 		pr_err_ratelimited("Failed to get UDP bearer reference");
-		kfree_skb(skb);
-		return 0;
+		goto out;
 	}
-
 	skb_pull(skb, sizeof(struct udphdr));
+	hdr = buf_msg(skb);
+
 	rcu_read_lock();
 	b = rcu_dereference_rtnl(ub->bearer);
+	if (!b)
+		goto rcu_out;
 
-	if (b) {
+	if (b && test_bit(0, &b->up)) {
 		tipc_rcv(sock_net(sk), skb, b);
 		rcu_read_unlock();
 		return 0;
 	}
+
+	if (unlikely(msg_user(hdr) == LINK_CONFIG)) {
+		err = tipc_udp_rcast_disc(b, skb);
+		if (err)
+			goto rcu_out;
+	}
+
+	tipc_rcv(sock_net(sk), skb, b);
 	rcu_read_unlock();
+	return 0;
+
+rcu_out:
+	rcu_read_unlock();
+out:
 	kfree_skb(skb);
 	return 0;
 }
@@ -241,15 +389,11 @@
 	struct sock *sk = ub->ubsock->sk;
 
 	if (ntohs(remote->proto) == ETH_P_IP) {
-		if (!ipv4_is_multicast(remote->ipv4.s_addr))
-			return 0;
 		mreqn.imr_multiaddr = remote->ipv4;
 		mreqn.imr_ifindex = ub->ifindex;
 		err = ip_mc_join_group(sk, &mreqn);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		if (!ipv6_addr_is_multicast(&remote->ipv6))
-			return 0;
 		err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex,
 						   &remote->ipv6);
 #endif
@@ -257,75 +401,234 @@
 	return err;
 }
 
-/**
- * parse_options - build local/remote addresses from configuration
- * @attrs:	netlink config data
- * @ub:		UDP bearer instance
- * @local:	local bearer IP address/port
- * @remote:	peer or multicast IP/port
- */
-static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
-			 struct udp_media_addr *local,
-			 struct udp_media_addr *remote)
+static int __tipc_nl_add_udp_addr(struct sk_buff *skb,
+				  struct udp_media_addr *addr, int nla_t)
 {
-	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
-	struct sockaddr_storage sa_local, sa_remote;
+	if (ntohs(addr->proto) == ETH_P_IP) {
+		struct sockaddr_in ip4;
 
-	if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
-		goto err;
-	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
-			     attrs[TIPC_NLA_BEARER_UDP_OPTS],
-			     tipc_nl_udp_policy))
-		goto err;
-	if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
-		nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL],
-			   sizeof(sa_local));
-		nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE],
-			   sizeof(sa_remote));
+		ip4.sin_family = AF_INET;
+		ip4.sin_port = addr->port;
+		ip4.sin_addr.s_addr = addr->ipv4.s_addr;
+		if (nla_put(skb, nla_t, sizeof(ip4), &ip4))
+			return -EMSGSIZE;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (ntohs(addr->proto) == ETH_P_IPV6) {
+		struct sockaddr_in6 ip6;
+
+		ip6.sin6_family = AF_INET6;
+		ip6.sin6_port  = addr->port;
+		memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr));
+		if (nla_put(skb, nla_t, sizeof(ip6), &ip6))
+			return -EMSGSIZE;
+#endif
+	}
+
+	return 0;
+}
+
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	u32 bid = cb->args[0];
+	u32 skip_cnt = cb->args[1];
+	u32 portid = NETLINK_CB(cb->skb).portid;
+	struct udp_replicast *rcast, *tmp;
+	struct tipc_bearer *b;
+	struct udp_bearer *ub;
+	void *hdr;
+	int err;
+	int i;
+
+	if (!bid && !skip_cnt) {
+		struct net *net = sock_net(skb->sk);
+		struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1];
+		struct nlattr **attrs;
+		char *bname;
+
+		err = tipc_nlmsg_parse(cb->nlh, &attrs);
+		if (err)
+			return err;
+
+		if (!attrs[TIPC_NLA_BEARER])
+			return -EINVAL;
+
+		err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX,
+				       attrs[TIPC_NLA_BEARER],
+				       tipc_nl_bearer_policy);
+		if (err)
+			return err;
+
+		if (!battrs[TIPC_NLA_BEARER_NAME])
+			return -EINVAL;
+
+		bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]);
+
+		rtnl_lock();
+		b = tipc_bearer_find(net, bname);
+		if (!b) {
+			rtnl_unlock();
+			return -EINVAL;
+		}
+		bid = b->identity;
 	} else {
-err:
-		pr_err("Invalid UDP bearer configuration");
+		struct net *net = sock_net(skb->sk);
+		struct tipc_net *tn = net_generic(net, tipc_net_id);
+
+		rtnl_lock();
+		b = rtnl_dereference(tn->bearer_list[bid]);
+		if (!b) {
+			rtnl_unlock();
+			return -EINVAL;
+		}
+	}
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		rtnl_unlock();
 		return -EINVAL;
 	}
-	if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) {
-		struct sockaddr_in *ip4;
 
-		ip4 = (struct sockaddr_in *)&sa_local;
-		local->proto = htons(ETH_P_IP);
-		local->port = ip4->sin_port;
-		local->ipv4.s_addr = ip4->sin_addr.s_addr;
+	i = 0;
+	list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+		if (i < skip_cnt)
+			goto count;
 
-		ip4 = (struct sockaddr_in *)&sa_remote;
-		remote->proto = htons(ETH_P_IP);
-		remote->port = ip4->sin_port;
-		remote->ipv4.s_addr = ip4->sin_addr.s_addr;
+		hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq,
+				  &tipc_genl_family, NLM_F_MULTI,
+				  TIPC_NL_BEARER_GET);
+		if (!hdr)
+			goto done;
+
+		err = __tipc_nl_add_udp_addr(skb, &rcast->addr,
+					     TIPC_NLA_UDP_REMOTE);
+		if (err) {
+			genlmsg_cancel(skb, hdr);
+			goto done;
+		}
+		genlmsg_end(skb, hdr);
+count:
+		i++;
+	}
+done:
+	rtnl_unlock();
+	cb->args[0] = bid;
+	cb->args[1] = i;
+
+	return skb->len;
+}
+
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b)
+{
+	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+	struct udp_media_addr *dst;
+	struct udp_bearer *ub;
+	struct nlattr *nest;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub)
+		return -ENODEV;
+
+	nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS);
+	if (!nest)
+		goto msg_full;
+
+	if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL))
+		goto msg_full;
+
+	dst = (struct udp_media_addr *)&b->bcast_addr.value;
+	if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE))
+		goto msg_full;
+
+	if (!list_empty(&ub->rcast.list)) {
+		if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP))
+			goto msg_full;
+	}
+
+	nla_nest_end(msg->skb, nest);
+	return 0;
+msg_full:
+	nla_nest_cancel(msg->skb, nest);
+	return -EMSGSIZE;
+}
+
+/**
+ * tipc_parse_udp_addr - build udp media address from netlink data
+ * @nlattr:	netlink attribute containing sockaddr storage aligned address
+ * @addr:	tipc media address to fill with address, port and protocol type
+ * @scope_id:	IPv6 scope id pointer, not NULL indicates it's required
+ */
+
+static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr,
+			       u32 *scope_id)
+{
+	struct sockaddr_storage sa;
+
+	nla_memcpy(&sa, nla, sizeof(sa));
+	if (sa.ss_family == AF_INET) {
+		struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa;
+
+		addr->proto = htons(ETH_P_IP);
+		addr->port = ip4->sin_port;
+		addr->ipv4.s_addr = ip4->sin_addr.s_addr;
 		return 0;
 
 #if IS_ENABLED(CONFIG_IPV6)
-	} else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) {
-		int atype;
-		struct sockaddr_in6 *ip6;
+	} else if (sa.ss_family == AF_INET6) {
+		struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa;
 
-		ip6 = (struct sockaddr_in6 *)&sa_local;
-		atype = ipv6_addr_type(&ip6->sin6_addr);
-		if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id)
-			return -EINVAL;
+		addr->proto = htons(ETH_P_IPV6);
+		addr->port = ip6->sin6_port;
+		memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
 
-		local->proto = htons(ETH_P_IPV6);
-		local->port = ip6->sin6_port;
-		memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
-		ub->ifindex = ip6->sin6_scope_id;
+		/* Scope ID is only interesting for local addresses */
+		if (scope_id) {
+			int atype;
 
-		ip6 = (struct sockaddr_in6 *)&sa_remote;
-		remote->proto = htons(ETH_P_IPV6);
-		remote->port = ip6->sin6_port;
-		memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr));
+			atype = ipv6_addr_type(&ip6->sin6_addr);
+			if (__ipv6_addr_needs_scope_id(atype) &&
+			    !ip6->sin6_scope_id) {
+				return -EINVAL;
+			}
+
+			*scope_id = ip6->sin6_scope_id ? : 0;
+		}
+
 		return 0;
 #endif
 	}
 	return -EADDRNOTAVAIL;
 }
 
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
+{
+	int err;
+	struct udp_media_addr addr = {0};
+	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+	struct udp_media_addr *dst;
+
+	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
+		return -EINVAL;
+
+	if (!opts[TIPC_NLA_UDP_REMOTE])
+		return -EINVAL;
+
+	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
+	if (err)
+		return err;
+
+	dst = (struct udp_media_addr *)&b->bcast_addr.value;
+	if (tipc_udp_is_mcast_addr(dst)) {
+		pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
+		return -EINVAL;
+	}
+
+	if (tipc_udp_is_known_peer(b, &addr))
+		return 0;
+
+	return tipc_udp_rcast_add(b, &addr);
+}
+
 /**
  * tipc_udp_enable - callback to create a new udp bearer instance
  * @net:	network namespace
@@ -340,18 +643,38 @@
 {
 	int err = -EINVAL;
 	struct udp_bearer *ub;
-	struct udp_media_addr *remote;
+	struct udp_media_addr remote = {0};
 	struct udp_media_addr local = {0};
 	struct udp_port_cfg udp_conf = {0};
 	struct udp_tunnel_sock_cfg tuncfg = {NULL};
+	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
 
 	ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
 	if (!ub)
 		return -ENOMEM;
 
-	remote = (struct udp_media_addr *)&b->bcast_addr.value;
-	memset(remote, 0, sizeof(struct udp_media_addr));
-	err = parse_options(attrs, ub, &local, remote);
+	INIT_LIST_HEAD(&ub->rcast.list);
+
+	if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
+		goto err;
+
+	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
+			     attrs[TIPC_NLA_BEARER_UDP_OPTS],
+			     tipc_nl_udp_policy))
+		goto err;
+
+	if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) {
+		pr_err("Invalid UDP bearer configuration");
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local,
+				  &ub->ifindex);
+	if (err)
+		goto err;
+
+	err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
 	if (err)
 		goto err;
 
@@ -396,10 +719,22 @@
 	tuncfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
 
-	if (enable_mcast(ub, remote))
+	/**
+	 * The bcast media address port is used for all peers and the ip
+	 * is used if it's a multicast address.
+	 */
+	memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
+	if (tipc_udp_is_mcast_addr(&remote))
+		err = enable_mcast(ub, &remote);
+	else
+		err = tipc_udp_rcast_add(b, &remote);
+	if (err)
 		goto err;
+
 	return 0;
 err:
+	if (ub->ubsock)
+		udp_tunnel_sock_release(ub->ubsock);
 	kfree(ub);
 	return err;
 }
@@ -408,6 +743,12 @@
 static void cleanup_bearer(struct work_struct *work)
 {
 	struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+	struct udp_replicast *rcast, *tmp;
+
+	list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
+		list_del_rcu(&rcast->list);
+		kfree_rcu(rcast, rcu);
+	}
 
 	if (ub->ubsock)
 		udp_tunnel_sock_release(ub->ubsock);
diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
new file mode 100644
index 0000000..281bbae
--- /dev/null
+++ b/net/tipc/udp_media.h
@@ -0,0 +1,46 @@
+/*
+ * net/tipc/udp_media.h: Include file for UDP bearer media
+ *
+ * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef CONFIG_TIPC_MEDIA_UDP
+#ifndef _TIPC_UDP_MEDIA_H
+#define _TIPC_UDP_MEDIA_H
+
+int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
+int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b);
+int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb);
+
+#endif
+#endif
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f1dffe8..8309687 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -661,11 +661,11 @@
 {
 	struct unix_sock *u = unix_sk(sk);
 
-	if (mutex_lock_interruptible(&u->readlock))
+	if (mutex_lock_interruptible(&u->iolock))
 		return -EINTR;
 
 	sk->sk_peek_off = val;
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 
 	return 0;
 }
@@ -779,7 +779,8 @@
 	spin_lock_init(&u->lock);
 	atomic_long_set(&u->inflight, 0);
 	INIT_LIST_HEAD(&u->link);
-	mutex_init(&u->readlock); /* single task reading lock */
+	mutex_init(&u->iolock); /* single task reading lock */
+	mutex_init(&u->bindlock); /* single task binding lock */
 	init_waitqueue_head(&u->peer_wait);
 	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
@@ -848,7 +849,7 @@
 	int err;
 	unsigned int retries = 0;
 
-	err = mutex_lock_interruptible(&u->readlock);
+	err = mutex_lock_interruptible(&u->bindlock);
 	if (err)
 		return err;
 
@@ -895,7 +896,7 @@
 	spin_unlock(&unix_table_lock);
 	err = 0;
 
-out:	mutex_unlock(&u->readlock);
+out:	mutex_unlock(&u->bindlock);
 	return err;
 }
 
@@ -954,20 +955,32 @@
 	return NULL;
 }
 
-static int unix_mknod(struct dentry *dentry, const struct path *path, umode_t mode,
-		      struct path *res)
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 {
-	int err;
+	struct dentry *dentry;
+	struct path path;
+	int err = 0;
+	/*
+	 * Get the parent directory, calculate the hash for last
+	 * component.
+	 */
+	dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+	err = PTR_ERR(dentry);
+	if (IS_ERR(dentry))
+		return err;
 
-	err = security_path_mknod(path, dentry, mode, 0);
+	/*
+	 * All right, let's create it.
+	 */
+	err = security_path_mknod(&path, dentry, mode, 0);
 	if (!err) {
-		err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
+		err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 		if (!err) {
-			res->mnt = mntget(path->mnt);
+			res->mnt = mntget(path.mnt);
 			res->dentry = dget(dentry);
 		}
 	}
-
+	done_path_create(&path, dentry);
 	return err;
 }
 
@@ -978,12 +991,10 @@
 	struct unix_sock *u = unix_sk(sk);
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 	char *sun_path = sunaddr->sun_path;
-	int err, name_err;
+	int err;
 	unsigned int hash;
 	struct unix_address *addr;
 	struct hlist_head *list;
-	struct path path;
-	struct dentry *dentry;
 
 	err = -EINVAL;
 	if (sunaddr->sun_family != AF_UNIX)
@@ -999,34 +1010,14 @@
 		goto out;
 	addr_len = err;
 
-	name_err = 0;
-	dentry = NULL;
-	if (sun_path[0]) {
-		/* Get the parent directory, calculate the hash for last
-		 * component.
-		 */
-		dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
-
-		if (IS_ERR(dentry)) {
-			/* delay report until after 'already bound' check */
-			name_err = PTR_ERR(dentry);
-			dentry = NULL;
-		}
-	}
-
-	err = mutex_lock_interruptible(&u->readlock);
+	err = mutex_lock_interruptible(&u->bindlock);
 	if (err)
-		goto out_path;
+		goto out;
 
 	err = -EINVAL;
 	if (u->addr)
 		goto out_up;
 
-	if (name_err) {
-		err = name_err == -EEXIST ? -EADDRINUSE : name_err;
-		goto out_up;
-	}
-
 	err = -ENOMEM;
 	addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 	if (!addr)
@@ -1037,11 +1028,11 @@
 	addr->hash = hash ^ sk->sk_type;
 	atomic_set(&addr->refcnt, 1);
 
-	if (dentry) {
-		struct path u_path;
+	if (sun_path[0]) {
+		struct path path;
 		umode_t mode = S_IFSOCK |
 		       (SOCK_INODE(sock)->i_mode & ~current_umask());
-		err = unix_mknod(dentry, &path, mode, &u_path);
+		err = unix_mknod(sun_path, mode, &path);
 		if (err) {
 			if (err == -EEXIST)
 				err = -EADDRINUSE;
@@ -1049,9 +1040,9 @@
 			goto out_up;
 		}
 		addr->hash = UNIX_HASH_SIZE;
-		hash = d_real_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
+		hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
 		spin_lock(&unix_table_lock);
-		u->path = u_path;
+		u->path = path;
 		list = &unix_socket_table[hash];
 	} else {
 		spin_lock(&unix_table_lock);
@@ -1073,11 +1064,7 @@
 out_unlock:
 	spin_unlock(&unix_table_lock);
 out_up:
-	mutex_unlock(&u->readlock);
-out_path:
-	if (dentry)
-		done_path_create(&path, dentry);
-
+	mutex_unlock(&u->bindlock);
 out:
 	return err;
 }
@@ -1969,17 +1956,17 @@
 	if (false) {
 alloc_skb:
 		unix_state_unlock(other);
-		mutex_unlock(&unix_sk(other)->readlock);
+		mutex_unlock(&unix_sk(other)->iolock);
 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
 					      &err, 0);
 		if (!newskb)
 			goto err;
 	}
 
-	/* we must acquire readlock as we modify already present
+	/* we must acquire iolock as we modify already present
 	 * skbs in the sk_receive_queue and mess with skb->len
 	 */
-	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
+	err = mutex_lock_interruptible(&unix_sk(other)->iolock);
 	if (err) {
 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
 		goto err;
@@ -2046,7 +2033,7 @@
 	}
 
 	unix_state_unlock(other);
-	mutex_unlock(&unix_sk(other)->readlock);
+	mutex_unlock(&unix_sk(other)->iolock);
 
 	other->sk_data_ready(other);
 	scm_destroy(&scm);
@@ -2055,7 +2042,7 @@
 err_state_unlock:
 	unix_state_unlock(other);
 err_unlock:
-	mutex_unlock(&unix_sk(other)->readlock);
+	mutex_unlock(&unix_sk(other)->iolock);
 err:
 	kfree_skb(newskb);
 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
@@ -2123,7 +2110,7 @@
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
 	do {
-		mutex_lock(&u->readlock);
+		mutex_lock(&u->iolock);
 
 		skip = sk_peek_offset(sk, flags);
 		skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
@@ -2131,14 +2118,14 @@
 		if (skb)
 			break;
 
-		mutex_unlock(&u->readlock);
+		mutex_unlock(&u->iolock);
 
 		if (err != -EAGAIN)
 			break;
 	} while (timeo &&
 		 !__skb_wait_for_more_packets(sk, &err, &timeo, last));
 
-	if (!skb) { /* implies readlock unlocked */
+	if (!skb) { /* implies iolock unlocked */
 		unix_state_lock(sk);
 		/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
 		if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
@@ -2203,7 +2190,7 @@
 
 out_free:
 	skb_free_datagram(sk, skb);
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 out:
 	return err;
 }
@@ -2298,7 +2285,7 @@
 	/* Lock the socket to prevent queue disordering
 	 * while sleeps in memcpy_tomsg
 	 */
-	mutex_lock(&u->readlock);
+	mutex_lock(&u->iolock);
 
 	if (flags & MSG_PEEK)
 		skip = sk_peek_offset(sk, flags);
@@ -2340,7 +2327,7 @@
 				break;
 			}
 
-			mutex_unlock(&u->readlock);
+			mutex_unlock(&u->iolock);
 
 			timeo = unix_stream_data_wait(sk, timeo, last,
 						      last_len);
@@ -2351,7 +2338,7 @@
 				goto out;
 			}
 
-			mutex_lock(&u->readlock);
+			mutex_lock(&u->iolock);
 			goto redo;
 unlock:
 			unix_state_unlock(sk);
@@ -2454,7 +2441,7 @@
 		}
 	} while (size);
 
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 	if (state->msg)
 		scm_recv(sock, state->msg, &scm, flags);
 	else
@@ -2495,9 +2482,9 @@
 	int ret;
 	struct unix_sock *u = unix_sk(sk);
 
-	mutex_unlock(&u->readlock);
+	mutex_unlock(&u->iolock);
 	ret = splice_to_pipe(pipe, spd);
-	mutex_lock(&u->readlock);
+	mutex_lock(&u->iolock);
 
 	return ret;
 }
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 17dbbe6..8a398b3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -465,6 +465,8 @@
 
 	if (vsock_is_pending(sk)) {
 		vsock_remove_pending(listener, sk);
+
+		listener->sk_ack_backlog--;
 	} else if (!vsk->rejected) {
 		/* We are not on the pending list and accept() did not reject
 		 * us, so we must have been accepted by our user process.  We
@@ -475,8 +477,6 @@
 		goto out;
 	}
 
-	listener->sk_ack_backlog--;
-
 	/* We need to remove ourself from the global connected sockets list so
 	 * incoming packets can't find this socket, and to reduce the reference
 	 * count.
@@ -2010,5 +2010,5 @@
 
 MODULE_AUTHOR("VMware, Inc.");
 MODULE_DESCRIPTION("VMware Virtual Socket Family");
-MODULE_VERSION("1.0.1.0-k");
+MODULE_VERSION("1.0.2.0-k");
 MODULE_LICENSE("GPL v2");
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 699dfab..936d7ee 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -87,9 +87,6 @@
 
 	vq = vsock->vqs[VSOCK_VQ_TX];
 
-	/* Avoid unnecessary interrupts while we're processing the ring */
-	virtqueue_disable_cb(vq);
-
 	for (;;) {
 		struct virtio_vsock_pkt *pkt;
 		struct scatterlist hdr, buf, *sgs[2];
@@ -99,7 +96,6 @@
 		spin_lock_bh(&vsock->send_pkt_list_lock);
 		if (list_empty(&vsock->send_pkt_list)) {
 			spin_unlock_bh(&vsock->send_pkt_list_lock);
-			virtqueue_enable_cb(vq);
 			break;
 		}
 
@@ -118,13 +114,13 @@
 		}
 
 		ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
+		/* Usually this means that there is no more space available in
+		 * the vq
+		 */
 		if (ret < 0) {
 			spin_lock_bh(&vsock->send_pkt_list_lock);
 			list_add(&pkt->list, &vsock->send_pkt_list);
 			spin_unlock_bh(&vsock->send_pkt_list_lock);
-
-			if (!virtqueue_enable_cb(vq) && ret == -ENOSPC)
-				continue; /* retry now that we have more space */
 			break;
 		}
 
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index b0e11b6..5497d022 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -372,6 +372,7 @@
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 		break;
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NUM_NL80211_IFTYPES:
@@ -513,6 +514,7 @@
 		r = cfg80211_get_chans_dfs_available(wiphy,
 						     chandef->center_freq2,
 						     width);
+		break;
 	default:
 		WARN_ON(chandef->center_freq2);
 		break;
@@ -945,6 +947,7 @@
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_WDS:
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 		/* these interface types don't really have a channel */
 		return;
 	case NL80211_IFTYPE_UNSPECIFIED:
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7645e97..8201e6d 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -225,6 +225,23 @@
 	}
 }
 
+void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
+		       struct wireless_dev *wdev)
+{
+	ASSERT_RTNL();
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN))
+		return;
+
+	if (!wdev->nan_started)
+		return;
+
+	rdev_stop_nan(rdev, wdev);
+	wdev->nan_started = false;
+
+	rdev->opencount--;
+}
+
 void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -242,6 +259,9 @@
 		case NL80211_IFTYPE_P2P_DEVICE:
 			cfg80211_stop_p2p_device(rdev, wdev);
 			break;
+		case NL80211_IFTYPE_NAN:
+			cfg80211_stop_nan(rdev, wdev);
+			break;
 		default:
 			break;
 		}
@@ -537,6 +557,11 @@
 				    c->limits[j].max > 1))
 				return -EINVAL;
 
+			/* Only a single NAN can be allowed */
+			if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) &&
+				    c->limits[j].max > 1))
+				return -EINVAL;
+
 			cnt += c->limits[j].max;
 			/*
 			 * Don't advertise an unsupported type
@@ -579,6 +604,11 @@
 		     !rdev->ops->tdls_cancel_channel_switch)))
 		return -EINVAL;
 
+	if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) &&
+		    (!rdev->ops->start_nan || !rdev->ops->stop_nan ||
+		     !rdev->ops->add_nan_func || !rdev->ops->del_nan_func)))
+		return -EINVAL;
+
 	/*
 	 * if a wiphy has unsupported modes for regulatory channel enforcement,
 	 * opt-out of enforcement checking
@@ -589,6 +619,7 @@
 				       BIT(NL80211_IFTYPE_P2P_GO) |
 				       BIT(NL80211_IFTYPE_ADHOC) |
 				       BIT(NL80211_IFTYPE_P2P_DEVICE) |
+				       BIT(NL80211_IFTYPE_NAN) |
 				       BIT(NL80211_IFTYPE_AP_VLAN) |
 				       BIT(NL80211_IFTYPE_MONITOR)))
 		wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF;
@@ -906,6 +937,8 @@
 	if (WARN_ON(wdev->netdev))
 		return;
 
+	nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
+
 	list_del_rcu(&wdev->list);
 	rdev->devlist_generation++;
 
@@ -914,6 +947,9 @@
 		cfg80211_mlme_purge_registrations(wdev);
 		cfg80211_stop_p2p_device(rdev, wdev);
 		break;
+	case NL80211_IFTYPE_NAN:
+		cfg80211_stop_nan(rdev, wdev);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		break;
@@ -977,6 +1013,7 @@
 		/* must be handled by mac80211/driver, has no APIs */
 		break;
 	case NL80211_IFTYPE_P2P_DEVICE:
+	case NL80211_IFTYPE_NAN:
 		/* cannot happen, has no netdev */
 		break;
 	case NL80211_IFTYPE_AP_VLAN:
@@ -1079,6 +1116,8 @@
 		     wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
 		     wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
 			dev->priv_flags |= IFF_DONT_BRIDGE;
+
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 		break;
 	case NETDEV_GOING_DOWN:
 		cfg80211_leave(rdev, wdev);
@@ -1157,6 +1196,8 @@
 		 * remove and clean it up.
 		 */
 		if (!list_empty(&wdev->list)) {
+			nl80211_notify_iface(rdev, wdev,
+					     NL80211_CMD_DEL_INTERFACE);
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
 			list_del_rcu(&wdev->list);
 			rdev->devlist_generation++;
@@ -1246,7 +1287,7 @@
 	if (err)
 		goto out_fail_reg;
 
-	cfg80211_wq = create_singlethread_workqueue("cfg80211");
+	cfg80211_wq = alloc_ordered_workqueue("cfg80211", WQ_MEM_RECLAIM);
 	if (!cfg80211_wq) {
 		err = -ENOMEM;
 		goto out_fail_wq;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index eee9144..08d2e94 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -249,9 +249,9 @@
 };
 
 struct cfg80211_cached_keys {
-	struct key_params params[6];
-	u8 data[6][WLAN_MAX_KEY_LEN];
-	int def, defmgmt;
+	struct key_params params[CFG80211_MAX_WEP_KEYS];
+	u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104];
+	int def;
 };
 
 enum cfg80211_chan_mode {
@@ -488,6 +488,9 @@
 void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,
 			      struct wireless_dev *wdev);
 
+void cfg80211_stop_nan(struct cfg80211_registered_device *rdev,
+		       struct wireless_dev *wdev);
+
 #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10
 
 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 4a4dda5..364f900 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -43,7 +43,8 @@
 	cfg80211_hold_bss(bss_from_pub(bss));
 	wdev->current_bss = bss_from_pub(bss);
 
-	cfg80211_upload_connect_keys(wdev);
+	if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
+		cfg80211_upload_connect_keys(wdev);
 
 	nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid,
 				GFP_KERNEL);
@@ -114,6 +115,9 @@
 		}
 	}
 
+	if (WARN_ON(connkeys && connkeys->def < 0))
+		return -EINVAL;
+
 	if (WARN_ON(wdev->connect_keys))
 		kzfree(wdev->connect_keys);
 	wdev->connect_keys = connkeys;
@@ -284,18 +288,16 @@
 	if (!netif_running(wdev->netdev))
 		return 0;
 
-	if (wdev->wext.keys) {
+	if (wdev->wext.keys)
 		wdev->wext.keys->def = wdev->wext.default_key;
-		wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key;
-	}
 
 	wdev->wext.ibss.privacy = wdev->wext.default_key != -1;
 
-	if (wdev->wext.keys) {
+	if (wdev->wext.keys && wdev->wext.keys->def != -1) {
 		ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
 		if (!ck)
 			return -ENOMEM;
-		for (i = 0; i < 6; i++)
+		for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
 			ck->params[i].key = ck->data[i];
 	}
 	err = __cfg80211_join_ibss(rdev, wdev->netdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index c284d88..cbb48e2 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -222,7 +222,7 @@
 	ASSERT_WDEV_LOCK(wdev);
 
 	if (auth_type == NL80211_AUTHTYPE_SHARED_KEY)
-		if (!key || !key_len || key_idx < 0 || key_idx > 4)
+		if (!key || !key_len || key_idx < 0 || key_idx > 3)
 			return -EINVAL;
 
 	if (wdev->current_bss &&
@@ -634,6 +634,7 @@
 			 * fall through, P2P device only supports
 			 * public action frames
 			 */
+		case NL80211_IFTYPE_NAN:
 		default:
 			err = -EOPNOTSUPP;
 			break;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 46417f9..c510810 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -56,6 +56,7 @@
 	NL80211_MCGRP_REGULATORY,
 	NL80211_MCGRP_MLME,
 	NL80211_MCGRP_VENDOR,
+	NL80211_MCGRP_NAN,
 	NL80211_MCGRP_TESTMODE /* keep last - ifdef! */
 };
 
@@ -65,6 +66,7 @@
 	[NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG },
 	[NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME },
 	[NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR },
+	[NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN },
 #ifdef CONFIG_NL80211_TESTMODE
 	[NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE }
 #endif
@@ -409,6 +411,9 @@
 		.len = VHT_MUMIMO_GROUPS_DATA_LEN
 	},
 	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN },
+	[NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 },
+	[NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 },
+	[NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
 };
 
 /* policy for the key attributes */
@@ -502,6 +507,39 @@
 	},
 };
 
+/* policy for NAN function attributes */
+static const struct nla_policy
+nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = {
+	[NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_SERVICE_ID] = { .type = NLA_BINARY,
+				    .len = NL80211_NAN_FUNC_SERVICE_ID_LEN },
+	[NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG },
+	[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG },
+	[NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_FOLLOW_UP_DEST] = { .len = ETH_ALEN },
+	[NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG },
+	[NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 },
+	[NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY,
+			.len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN },
+	[NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED },
+	[NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED },
+	[NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED },
+	[NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 },
+	[NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 },
+};
+
+/* policy for Service Response Filter attributes */
+static const struct nla_policy
+nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = {
+	[NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG },
+	[NL80211_NAN_SRF_BF] = { .type = NLA_BINARY,
+				 .len =  NL80211_NAN_FUNC_SRF_MAX_LEN },
+	[NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 },
+	[NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED },
+};
+
 static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
 				     struct netlink_callback *cb,
 				     struct cfg80211_registered_device **rdev,
@@ -848,13 +886,21 @@
 	struct nlattr *key;
 	struct cfg80211_cached_keys *result;
 	int rem, err, def = 0;
+	bool have_key = false;
+
+	nla_for_each_nested(key, keys, rem) {
+		have_key = true;
+		break;
+	}
+
+	if (!have_key)
+		return NULL;
 
 	result = kzalloc(sizeof(*result), GFP_KERNEL);
 	if (!result)
 		return ERR_PTR(-ENOMEM);
 
 	result->def = -1;
-	result->defmgmt = -1;
 
 	nla_for_each_nested(key, keys, rem) {
 		memset(&parse, 0, sizeof(parse));
@@ -866,7 +912,7 @@
 		err = -EINVAL;
 		if (!parse.p.key)
 			goto error;
-		if (parse.idx < 0 || parse.idx > 4)
+		if (parse.idx < 0 || parse.idx > 3)
 			goto error;
 		if (parse.def) {
 			if (def)
@@ -881,16 +927,24 @@
 						     parse.idx, false, NULL);
 		if (err)
 			goto error;
+		if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 &&
+		    parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) {
+			err = -EINVAL;
+			goto error;
+		}
 		result->params[parse.idx].cipher = parse.p.cipher;
 		result->params[parse.idx].key_len = parse.p.key_len;
 		result->params[parse.idx].key = result->data[parse.idx];
 		memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len);
 
-		if (parse.p.cipher == WLAN_CIPHER_SUITE_WEP40 ||
-		    parse.p.cipher == WLAN_CIPHER_SUITE_WEP104) {
-			if (no_ht)
-				*no_ht = true;
-		}
+		/* must be WEP key if we got here */
+		if (no_ht)
+			*no_ht = true;
+	}
+
+	if (result->def < 0) {
+		err = -EINVAL;
+		goto error;
 	}
 
 	return result;
@@ -918,6 +972,7 @@
 	case NL80211_IFTYPE_UNSPECIFIED:
 	case NL80211_IFTYPE_OCB:
 	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_NAN:
 	case NL80211_IFTYPE_P2P_DEVICE:
 	case NL80211_IFTYPE_WDS:
 	case NUM_NL80211_IFTYPES:
@@ -2525,10 +2580,35 @@
 	int if_idx = 0;
 	int wp_start = cb->args[0];
 	int if_start = cb->args[1];
+	int filter_wiphy = -1;
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
 
 	rtnl_lock();
+	if (!cb->args[2]) {
+		struct nl80211_dump_wiphy_state state = {
+			.filter_wiphy = -1,
+		};
+		int ret;
+
+		ret = nl80211_dump_wiphy_parse(skb, cb, &state);
+		if (ret)
+			return ret;
+
+		filter_wiphy = state.filter_wiphy;
+
+		/*
+		 * if filtering, set cb->args[2] to +1 since 0 is the default
+		 * value needed to determine that parsing is necessary.
+		 */
+		if (filter_wiphy >= 0)
+			cb->args[2] = filter_wiphy + 1;
+		else
+			cb->args[2] = -1;
+	} else if (cb->args[2] > 0) {
+		filter_wiphy = cb->args[2] - 1;
+	}
+
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
 		if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
 			continue;
@@ -2536,6 +2616,10 @@
 			wp_idx++;
 			continue;
 		}
+
+		if (filter_wiphy >= 0 && filter_wiphy != rdev->wiphy_idx)
+			continue;
+
 		if_idx = 0;
 
 		list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
@@ -2751,7 +2835,7 @@
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct vif_params params;
 	struct wireless_dev *wdev;
-	struct sk_buff *msg, *event;
+	struct sk_buff *msg;
 	int err;
 	enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
 	u32 flags;
@@ -2774,7 +2858,7 @@
 	    !(rdev->wiphy.interface_modes & (1 << type)))
 		return -EOPNOTSUPP;
 
-	if ((type == NL80211_IFTYPE_P2P_DEVICE ||
+	if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN ||
 	     rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) &&
 	    info->attrs[NL80211_ATTR_MAC]) {
 		nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC],
@@ -2830,9 +2914,10 @@
 		       wdev->mesh_id_up_len);
 		wdev_unlock(wdev);
 		break;
+	case NL80211_IFTYPE_NAN:
 	case NL80211_IFTYPE_P2P_DEVICE:
 		/*
-		 * P2P Device doesn't have a netdev, so doesn't go
+		 * P2P Device and NAN do not have a netdev, so don't go
 		 * through the netdev notifier and must be added here
 		 */
 		mutex_init(&wdev->mtx);
@@ -2855,20 +2940,15 @@
 		return -ENOBUFS;
 	}
 
-	event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (event) {
-		if (nl80211_send_iface(event, 0, 0, 0,
-				       rdev, wdev, false) < 0) {
-			nlmsg_free(event);
-			goto out;
-		}
+	/*
+	 * For wdevs which have no associated netdev object (e.g. of type
+	 * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
+	 * For all other types, the event will be generated from the
+	 * netdev notifier
+	 */
+	if (!wdev->netdev)
+		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
 
-		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
-					event, 0, NL80211_MCGRP_CONFIG,
-					GFP_KERNEL);
-	}
-
-out:
 	return genlmsg_reply(msg, info);
 }
 
@@ -2876,18 +2956,10 @@
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct wireless_dev *wdev = info->user_ptr[1];
-	struct sk_buff *msg;
-	int status;
 
 	if (!rdev->ops->del_virtual_intf)
 		return -EOPNOTSUPP;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) {
-		nlmsg_free(msg);
-		msg = NULL;
-	}
-
 	/*
 	 * If we remove a wireless device without a netdev then clear
 	 * user_ptr[1] so that nl80211_post_doit won't dereference it
@@ -2898,15 +2970,7 @@
 	if (!wdev->netdev)
 		info->user_ptr[1] = NULL;
 
-	status = rdev_del_virtual_intf(rdev, wdev);
-	if (status >= 0 && msg)
-		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
-					msg, 0, NL80211_MCGRP_CONFIG,
-					GFP_KERNEL);
-	else
-		nlmsg_free(msg);
-
-	return status;
+	return rdev_del_virtual_intf(rdev, wdev);
 }
 
 static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
@@ -3316,6 +3380,291 @@
 	return err;
 }
 
+static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
+			   u8 *rates, u8 rates_len)
+{
+	u8 i;
+	u32 mask = 0;
+
+	for (i = 0; i < rates_len; i++) {
+		int rate = (rates[i] & 0x7f) * 5;
+		int ridx;
+
+		for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
+			struct ieee80211_rate *srate =
+				&sband->bitrates[ridx];
+			if (rate == srate->bitrate) {
+				mask |= 1 << ridx;
+				break;
+			}
+		}
+		if (ridx == sband->n_bitrates)
+			return 0; /* rate not found */
+	}
+
+	return mask;
+}
+
+static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband,
+			       u8 *rates, u8 rates_len,
+			       u8 mcs[IEEE80211_HT_MCS_MASK_LEN])
+{
+	u8 i;
+
+	memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN);
+
+	for (i = 0; i < rates_len; i++) {
+		int ridx, rbit;
+
+		ridx = rates[i] / 8;
+		rbit = BIT(rates[i] % 8);
+
+		/* check validity */
+		if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN))
+			return false;
+
+		/* check availability */
+		if (sband->ht_cap.mcs.rx_mask[ridx] & rbit)
+			mcs[ridx] |= rbit;
+		else
+			return false;
+	}
+
+	return true;
+}
+
+static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map)
+{
+	u16 mcs_mask = 0;
+
+	switch (vht_mcs_map) {
+	case IEEE80211_VHT_MCS_NOT_SUPPORTED:
+		break;
+	case IEEE80211_VHT_MCS_SUPPORT_0_7:
+		mcs_mask = 0x00FF;
+		break;
+	case IEEE80211_VHT_MCS_SUPPORT_0_8:
+		mcs_mask = 0x01FF;
+		break;
+	case IEEE80211_VHT_MCS_SUPPORT_0_9:
+		mcs_mask = 0x03FF;
+		break;
+	default:
+		break;
+	}
+
+	return mcs_mask;
+}
+
+static void vht_build_mcs_mask(u16 vht_mcs_map,
+			       u16 vht_mcs_mask[NL80211_VHT_NSS_MAX])
+{
+	u8 nss;
+
+	for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) {
+		vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03);
+		vht_mcs_map >>= 2;
+	}
+}
+
+static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband,
+			     struct nl80211_txrate_vht *txrate,
+			     u16 mcs[NL80211_VHT_NSS_MAX])
+{
+	u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
+	u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {};
+	u8 i;
+
+	if (!sband->vht_cap.vht_supported)
+		return false;
+
+	memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX);
+
+	/* Build vht_mcs_mask from VHT capabilities */
+	vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask);
+
+	for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
+		if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i])
+			mcs[i] = txrate->mcs[i];
+		else
+			return false;
+	}
+
+	return true;
+}
+
+static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
+	[NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
+				    .len = NL80211_MAX_SUPP_RATES },
+	[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
+				.len = NL80211_MAX_SUPP_HT_RATES },
+	[NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)},
+	[NL80211_TXRATE_GI] = { .type = NLA_U8 },
+};
+
+static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
+					 struct cfg80211_bitrate_mask *mask)
+{
+	struct nlattr *tb[NL80211_TXRATE_MAX + 1];
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	int rem, i;
+	struct nlattr *tx_rates;
+	struct ieee80211_supported_band *sband;
+	u16 vht_tx_mcs_map;
+
+	memset(mask, 0, sizeof(*mask));
+	/* Default to all rates enabled */
+	for (i = 0; i < NUM_NL80211_BANDS; i++) {
+		sband = rdev->wiphy.bands[i];
+
+		if (!sband)
+			continue;
+
+		mask->control[i].legacy = (1 << sband->n_bitrates) - 1;
+		memcpy(mask->control[i].ht_mcs,
+		       sband->ht_cap.mcs.rx_mask,
+		       sizeof(mask->control[i].ht_mcs));
+
+		if (!sband->vht_cap.vht_supported)
+			continue;
+
+		vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
+		vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs);
+	}
+
+	/* if no rates are given set it back to the defaults */
+	if (!info->attrs[NL80211_ATTR_TX_RATES])
+		goto out;
+
+	/* The nested attribute uses enum nl80211_band as the index. This maps
+	 * directly to the enum nl80211_band values used in cfg80211.
+	 */
+	BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8);
+	nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) {
+		enum nl80211_band band = nla_type(tx_rates);
+		int err;
+
+		if (band < 0 || band >= NUM_NL80211_BANDS)
+			return -EINVAL;
+		sband = rdev->wiphy.bands[band];
+		if (sband == NULL)
+			return -EINVAL;
+		err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
+				nla_len(tx_rates), nl80211_txattr_policy);
+		if (err)
+			return err;
+		if (tb[NL80211_TXRATE_LEGACY]) {
+			mask->control[band].legacy = rateset_to_mask(
+				sband,
+				nla_data(tb[NL80211_TXRATE_LEGACY]),
+				nla_len(tb[NL80211_TXRATE_LEGACY]));
+			if ((mask->control[band].legacy == 0) &&
+			    nla_len(tb[NL80211_TXRATE_LEGACY]))
+				return -EINVAL;
+		}
+		if (tb[NL80211_TXRATE_HT]) {
+			if (!ht_rateset_to_mask(
+					sband,
+					nla_data(tb[NL80211_TXRATE_HT]),
+					nla_len(tb[NL80211_TXRATE_HT]),
+					mask->control[band].ht_mcs))
+				return -EINVAL;
+		}
+		if (tb[NL80211_TXRATE_VHT]) {
+			if (!vht_set_mcs_mask(
+					sband,
+					nla_data(tb[NL80211_TXRATE_VHT]),
+					mask->control[band].vht_mcs))
+				return -EINVAL;
+		}
+		if (tb[NL80211_TXRATE_GI]) {
+			mask->control[band].gi =
+				nla_get_u8(tb[NL80211_TXRATE_GI]);
+			if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI)
+				return -EINVAL;
+		}
+
+		if (mask->control[band].legacy == 0) {
+			/* don't allow empty legacy rates if HT or VHT
+			 * are not even supported.
+			 */
+			if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported ||
+			      rdev->wiphy.bands[band]->vht_cap.vht_supported))
+				return -EINVAL;
+
+			for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
+				if (mask->control[band].ht_mcs[i])
+					goto out;
+
+			for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
+				if (mask->control[band].vht_mcs[i])
+					goto out;
+
+			/* legacy and mcs rates may not be both empty */
+			return -EINVAL;
+		}
+	}
+
+out:
+	return 0;
+}
+
+static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev,
+				   enum nl80211_band band,
+				   struct cfg80211_bitrate_mask *beacon_rate)
+{
+	u32 count_ht, count_vht, i;
+	u32 rate = beacon_rate->control[band].legacy;
+
+	/* Allow only one rate */
+	if (hweight32(rate) > 1)
+		return -EINVAL;
+
+	count_ht = 0;
+	for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) {
+		if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) {
+			return -EINVAL;
+		} else if (beacon_rate->control[band].ht_mcs[i]) {
+			count_ht++;
+			if (count_ht > 1)
+				return -EINVAL;
+		}
+		if (count_ht && rate)
+			return -EINVAL;
+	}
+
+	count_vht = 0;
+	for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
+		if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) {
+			return -EINVAL;
+		} else if (beacon_rate->control[band].vht_mcs[i]) {
+			count_vht++;
+			if (count_vht > 1)
+				return -EINVAL;
+		}
+		if (count_vht && rate)
+			return -EINVAL;
+	}
+
+	if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht))
+		return -EINVAL;
+
+	if (rate &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_BEACON_RATE_LEGACY))
+		return -EINVAL;
+	if (count_ht &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_BEACON_RATE_HT))
+		return -EINVAL;
+	if (count_vht &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_BEACON_RATE_VHT))
+		return -EINVAL;
+
+	return 0;
+}
+
 static int nl80211_parse_beacon(struct nlattr *attrs[],
 				struct cfg80211_beacon_data *bcn)
 {
@@ -3545,6 +3894,17 @@
 					   wdev->iftype))
 		return -EINVAL;
 
+	if (info->attrs[NL80211_ATTR_TX_RATES]) {
+		err = nl80211_parse_tx_bitrate_mask(info, &params.beacon_rate);
+		if (err)
+			return err;
+
+		err = validate_beacon_tx_rate(rdev, params.chandef.chan->band,
+					      &params.beacon_rate);
+		if (err)
+			return err;
+	}
+
 	if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
 		params.smps_mode =
 			nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
@@ -5374,12 +5734,25 @@
 	return 0;
 }
 
+static int nl80211_check_power_mode(const struct nlattr *nla,
+				    enum nl80211_mesh_power_mode min,
+				    enum nl80211_mesh_power_mode max,
+				    enum nl80211_mesh_power_mode *out)
+{
+	u32 val = nla_get_u32(nla);
+	if (val < min || val > max)
+		return -EINVAL;
+	*out = val;
+	return 0;
+}
+
 static int nl80211_parse_mesh_config(struct genl_info *info,
 				     struct mesh_config *cfg,
 				     u32 *mask_out)
 {
 	struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
 	u32 mask = 0;
+	u16 ht_opmode;
 
 #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \
 do {									    \
@@ -5471,9 +5844,36 @@
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
 				  mask, NL80211_MESHCONF_RSSI_THRESHOLD,
 				  nl80211_check_s32);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
-				  mask, NL80211_MESHCONF_HT_OPMODE,
-				  nl80211_check_u16);
+	/*
+	 * Check HT operation mode based on
+	 * IEEE 802.11 2012 8.4.2.59 HT Operation element.
+	 */
+	if (tb[NL80211_MESHCONF_HT_OPMODE]) {
+		ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
+
+		if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION |
+				  IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT |
+				  IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+			return -EINVAL;
+
+		if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) &&
+		    (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+			return -EINVAL;
+
+		switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
+		case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
+		case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
+			if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
+				return -EINVAL;
+			break;
+		case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
+		case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
+			if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
+				return -EINVAL;
+			break;
+		}
+		cfg->ht_opmode = ht_opmode;
+	}
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout,
 				  1, 65535, mask,
 				  NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT,
@@ -5490,7 +5890,7 @@
 				  NL80211_MESH_POWER_ACTIVE,
 				  NL80211_MESH_POWER_MAX,
 				  mask, NL80211_MESHCONF_POWER_MODE,
-				  nl80211_check_u32);
+				  nl80211_check_power_mode);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
 				  0, 65535, mask,
 				  NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
@@ -6074,6 +6474,9 @@
 
 	wiphy = &rdev->wiphy;
 
+	if (wdev->iftype == NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
 	if (!rdev->ops->scan)
 		return -EOPNOTSUPP;
 
@@ -6950,7 +7353,7 @@
 
 		params.n_counter_offsets_presp = len / sizeof(u16);
 		if (rdev->wiphy.max_num_csa_counters &&
-		    (params.n_counter_offsets_beacon >
+		    (params.n_counter_offsets_presp >
 		     rdev->wiphy.max_num_csa_counters))
 			return -EINVAL;
 
@@ -7340,7 +7743,7 @@
 		    (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 ||
 		     key.p.key_len != WLAN_KEY_LEN_WEP104))
 			return -EINVAL;
-		if (key.idx > 4)
+		if (key.idx > 3)
 			return -EINVAL;
 	} else {
 		key.p.key_len = 0;
@@ -7745,12 +8148,13 @@
 
 	ibss.beacon_interval = 100;
 
-	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL])
 		ibss.beacon_interval =
 			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
-		if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000)
-			return -EINVAL;
-	}
+
+	err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+	if (err)
+		return err;
 
 	if (!rdev->ops->join_ibss)
 		return -EOPNOTSUPP;
@@ -7957,6 +8361,8 @@
 	}
 
 	data = nla_nest_start(skb, attr);
+	if (!data)
+		goto nla_put_failure;
 
 	((void **)skb->cb)[0] = rdev;
 	((void **)skb->cb)[1] = hdr;
@@ -8574,238 +8980,21 @@
 	return rdev_cancel_remain_on_channel(rdev, wdev, cookie);
 }
 
-static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
-			   u8 *rates, u8 rates_len)
-{
-	u8 i;
-	u32 mask = 0;
-
-	for (i = 0; i < rates_len; i++) {
-		int rate = (rates[i] & 0x7f) * 5;
-		int ridx;
-
-		for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
-			struct ieee80211_rate *srate =
-				&sband->bitrates[ridx];
-			if (rate == srate->bitrate) {
-				mask |= 1 << ridx;
-				break;
-			}
-		}
-		if (ridx == sband->n_bitrates)
-			return 0; /* rate not found */
-	}
-
-	return mask;
-}
-
-static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband,
-			       u8 *rates, u8 rates_len,
-			       u8 mcs[IEEE80211_HT_MCS_MASK_LEN])
-{
-	u8 i;
-
-	memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN);
-
-	for (i = 0; i < rates_len; i++) {
-		int ridx, rbit;
-
-		ridx = rates[i] / 8;
-		rbit = BIT(rates[i] % 8);
-
-		/* check validity */
-		if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN))
-			return false;
-
-		/* check availability */
-		if (sband->ht_cap.mcs.rx_mask[ridx] & rbit)
-			mcs[ridx] |= rbit;
-		else
-			return false;
-	}
-
-	return true;
-}
-
-static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map)
-{
-	u16 mcs_mask = 0;
-
-	switch (vht_mcs_map) {
-	case IEEE80211_VHT_MCS_NOT_SUPPORTED:
-		break;
-	case IEEE80211_VHT_MCS_SUPPORT_0_7:
-		mcs_mask = 0x00FF;
-		break;
-	case IEEE80211_VHT_MCS_SUPPORT_0_8:
-		mcs_mask = 0x01FF;
-		break;
-	case IEEE80211_VHT_MCS_SUPPORT_0_9:
-		mcs_mask = 0x03FF;
-		break;
-	default:
-		break;
-	}
-
-	return mcs_mask;
-}
-
-static void vht_build_mcs_mask(u16 vht_mcs_map,
-			       u16 vht_mcs_mask[NL80211_VHT_NSS_MAX])
-{
-	u8 nss;
-
-	for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) {
-		vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03);
-		vht_mcs_map >>= 2;
-	}
-}
-
-static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband,
-			     struct nl80211_txrate_vht *txrate,
-			     u16 mcs[NL80211_VHT_NSS_MAX])
-{
-	u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
-	u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {};
-	u8 i;
-
-	if (!sband->vht_cap.vht_supported)
-		return false;
-
-	memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX);
-
-	/* Build vht_mcs_mask from VHT capabilities */
-	vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask);
-
-	for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
-		if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i])
-			mcs[i] = txrate->mcs[i];
-		else
-			return false;
-	}
-
-	return true;
-}
-
-static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
-	[NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
-				    .len = NL80211_MAX_SUPP_RATES },
-	[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
-				.len = NL80211_MAX_SUPP_HT_RATES },
-	[NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)},
-	[NL80211_TXRATE_GI] = { .type = NLA_U8 },
-};
-
 static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
 				       struct genl_info *info)
 {
-	struct nlattr *tb[NL80211_TXRATE_MAX + 1];
-	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct cfg80211_bitrate_mask mask;
-	int rem, i;
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
-	struct nlattr *tx_rates;
-	struct ieee80211_supported_band *sband;
-	u16 vht_tx_mcs_map;
+	int err;
 
 	if (!rdev->ops->set_bitrate_mask)
 		return -EOPNOTSUPP;
 
-	memset(&mask, 0, sizeof(mask));
-	/* Default to all rates enabled */
-	for (i = 0; i < NUM_NL80211_BANDS; i++) {
-		sband = rdev->wiphy.bands[i];
+	err = nl80211_parse_tx_bitrate_mask(info, &mask);
+	if (err)
+		return err;
 
-		if (!sband)
-			continue;
-
-		mask.control[i].legacy = (1 << sband->n_bitrates) - 1;
-		memcpy(mask.control[i].ht_mcs,
-		       sband->ht_cap.mcs.rx_mask,
-		       sizeof(mask.control[i].ht_mcs));
-
-		if (!sband->vht_cap.vht_supported)
-			continue;
-
-		vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map);
-		vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs);
-	}
-
-	/* if no rates are given set it back to the defaults */
-	if (!info->attrs[NL80211_ATTR_TX_RATES])
-		goto out;
-
-	/*
-	 * The nested attribute uses enum nl80211_band as the index. This maps
-	 * directly to the enum nl80211_band values used in cfg80211.
-	 */
-	BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8);
-	nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) {
-		enum nl80211_band band = nla_type(tx_rates);
-		int err;
-
-		if (band < 0 || band >= NUM_NL80211_BANDS)
-			return -EINVAL;
-		sband = rdev->wiphy.bands[band];
-		if (sband == NULL)
-			return -EINVAL;
-		err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
-				nla_len(tx_rates), nl80211_txattr_policy);
-		if (err)
-			return err;
-		if (tb[NL80211_TXRATE_LEGACY]) {
-			mask.control[band].legacy = rateset_to_mask(
-				sband,
-				nla_data(tb[NL80211_TXRATE_LEGACY]),
-				nla_len(tb[NL80211_TXRATE_LEGACY]));
-			if ((mask.control[band].legacy == 0) &&
-			    nla_len(tb[NL80211_TXRATE_LEGACY]))
-				return -EINVAL;
-		}
-		if (tb[NL80211_TXRATE_HT]) {
-			if (!ht_rateset_to_mask(
-					sband,
-					nla_data(tb[NL80211_TXRATE_HT]),
-					nla_len(tb[NL80211_TXRATE_HT]),
-					mask.control[band].ht_mcs))
-				return -EINVAL;
-		}
-		if (tb[NL80211_TXRATE_VHT]) {
-			if (!vht_set_mcs_mask(
-					sband,
-					nla_data(tb[NL80211_TXRATE_VHT]),
-					mask.control[band].vht_mcs))
-				return -EINVAL;
-		}
-		if (tb[NL80211_TXRATE_GI]) {
-			mask.control[band].gi =
-				nla_get_u8(tb[NL80211_TXRATE_GI]);
-			if (mask.control[band].gi > NL80211_TXRATE_FORCE_LGI)
-				return -EINVAL;
-		}
-
-		if (mask.control[band].legacy == 0) {
-			/* don't allow empty legacy rates if HT or VHT
-			 * are not even supported.
-			 */
-			if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported ||
-			      rdev->wiphy.bands[band]->vht_cap.vht_supported))
-				return -EINVAL;
-
-			for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++)
-				if (mask.control[band].ht_mcs[i])
-					goto out;
-
-			for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
-				if (mask.control[band].vht_mcs[i])
-					goto out;
-
-			/* legacy and mcs rates may not be both empty */
-			return -EINVAL;
-		}
-	}
-
-out:
 	return rdev_set_bitrate_mask(rdev, dev, NULL, &mask);
 }
 
@@ -8831,6 +9020,7 @@
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -8876,6 +9066,7 @@
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_P2P_GO:
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -8992,6 +9183,7 @@
 	case NL80211_IFTYPE_P2P_GO:
 	case NL80211_IFTYPE_P2P_DEVICE:
 		break;
+	case NL80211_IFTYPE_NAN:
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -9224,9 +9416,10 @@
 	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
 		setup.beacon_interval =
 			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
-		if (setup.beacon_interval < 10 ||
-		    setup.beacon_interval > 10000)
-			return -EINVAL;
+
+		err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+		if (err)
+			return err;
 	}
 
 	if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
@@ -9272,6 +9465,17 @@
 			return err;
 	}
 
+	if (info->attrs[NL80211_ATTR_TX_RATES]) {
+		err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate);
+		if (err)
+			return err;
+
+		err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band,
+					      &setup.beacon_rate);
+		if (err)
+			return err;
+	}
+
 	return cfg80211_join_mesh(rdev, dev, &setup, &cfg);
 }
 
@@ -9385,18 +9589,27 @@
 	if (!freqs)
 		return -ENOBUFS;
 
-	for (i = 0; i < req->n_channels; i++)
-		nla_put_u32(msg, i, req->channels[i]->center_freq);
+	for (i = 0; i < req->n_channels; i++) {
+		if (nla_put_u32(msg, i, req->channels[i]->center_freq))
+			return -ENOBUFS;
+	}
 
 	nla_nest_end(msg, freqs);
 
 	if (req->n_match_sets) {
 		matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH);
+		if (!matches)
+			return -ENOBUFS;
+
 		for (i = 0; i < req->n_match_sets; i++) {
 			match = nla_nest_start(msg, i);
-			nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
-				req->match_sets[i].ssid.ssid_len,
-				req->match_sets[i].ssid.ssid);
+			if (!match)
+				return -ENOBUFS;
+
+			if (nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
+				    req->match_sets[i].ssid.ssid_len,
+				    req->match_sets[i].ssid.ssid))
+				return -ENOBUFS;
 			nla_nest_end(msg, match);
 		}
 		nla_nest_end(msg, matches);
@@ -9408,6 +9621,9 @@
 
 	for (i = 0; i < req->n_scan_plans; i++) {
 		scan_plan = nla_nest_start(msg, i + 1);
+		if (!scan_plan)
+			return -ENOBUFS;
+
 		if (!scan_plan ||
 		    nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
 				req->scan_plans[i].interval) ||
@@ -10334,6 +10550,549 @@
 	return 0;
 }
 
+static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	struct cfg80211_nan_conf conf = {};
+	int err;
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (wdev->nan_started)
+		return -EEXIST;
+
+	if (rfkill_blocked(rdev->rfkill))
+		return -ERFKILL;
+
+	if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_NAN_DUAL])
+		return -EINVAL;
+
+	conf.master_pref =
+		nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]);
+	if (!conf.master_pref)
+		return -EINVAL;
+
+	conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]);
+
+	err = rdev_start_nan(rdev, wdev, &conf);
+	if (err)
+		return err;
+
+	wdev->nan_started = true;
+	rdev->opencount++;
+
+	return 0;
+}
+
+static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	cfg80211_stop_nan(rdev, wdev);
+
+	return 0;
+}
+
+static int validate_nan_filter(struct nlattr *filter_attr)
+{
+	struct nlattr *attr;
+	int len = 0, n_entries = 0, rem;
+
+	nla_for_each_nested(attr, filter_attr, rem) {
+		len += nla_len(attr);
+		n_entries++;
+	}
+
+	if (len >= U8_MAX)
+		return -EINVAL;
+
+	return n_entries;
+}
+
+static int handle_nan_filter(struct nlattr *attr_filter,
+			     struct cfg80211_nan_func *func,
+			     bool tx)
+{
+	struct nlattr *attr;
+	int n_entries, rem, i;
+	struct cfg80211_nan_func_filter *filter;
+
+	n_entries = validate_nan_filter(attr_filter);
+	if (n_entries < 0)
+		return n_entries;
+
+	BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters));
+
+	filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	i = 0;
+	nla_for_each_nested(attr, attr_filter, rem) {
+		filter[i].filter = kmemdup(nla_data(attr), nla_len(attr),
+					   GFP_KERNEL);
+		filter[i].len = nla_len(attr);
+		i++;
+	}
+	if (tx) {
+		func->num_tx_filters = n_entries;
+		func->tx_filters = filter;
+	} else {
+		func->num_rx_filters = n_entries;
+		func->rx_filters = filter;
+	}
+
+	return 0;
+}
+
+static int nl80211_nan_add_func(struct sk_buff *skb,
+				struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr;
+	struct cfg80211_nan_func *func;
+	struct sk_buff *msg = NULL;
+	void *hdr = NULL;
+	int err = 0;
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (!wdev->nan_started)
+		return -ENOTCONN;
+
+	if (!info->attrs[NL80211_ATTR_NAN_FUNC])
+		return -EINVAL;
+
+	if (wdev->owner_nlportid &&
+	    wdev->owner_nlportid != info->snd_portid)
+		return -ENOTCONN;
+
+	err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX,
+			nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]),
+			nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]),
+			nl80211_nan_func_policy);
+	if (err)
+		return err;
+
+	func = kzalloc(sizeof(*func), GFP_KERNEL);
+	if (!func)
+		return -ENOMEM;
+
+	func->cookie = wdev->wiphy->cookie_counter++;
+
+	if (!tb[NL80211_NAN_FUNC_TYPE] ||
+	    nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) {
+		err = -EINVAL;
+		goto out;
+	}
+
+
+	func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]);
+
+	if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]),
+	       sizeof(func->service_id));
+
+	func->close_range =
+		nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]);
+
+	if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) {
+		func->serv_spec_info_len =
+			nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]);
+		func->serv_spec_info =
+			kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]),
+				func->serv_spec_info_len,
+				GFP_KERNEL);
+		if (!func->serv_spec_info) {
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	if (tb[NL80211_NAN_FUNC_TTL])
+		func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]);
+
+	switch (func->type) {
+	case NL80211_NAN_FUNC_PUBLISH:
+		if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		func->publish_type =
+			nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]);
+		func->publish_bcast =
+			nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]);
+
+		if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) &&
+			func->publish_bcast) {
+			err = -EINVAL;
+			goto out;
+		}
+		break;
+	case NL80211_NAN_FUNC_SUBSCRIBE:
+		func->subscribe_active =
+			nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]);
+		break;
+	case NL80211_NAN_FUNC_FOLLOW_UP:
+		if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] ||
+		    !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		func->followup_id =
+			nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]);
+		func->followup_reqid =
+			nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]);
+		memcpy(func->followup_dest.addr,
+		       nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]),
+		       sizeof(func->followup_dest.addr));
+		if (func->ttl) {
+			err = -EINVAL;
+			goto out;
+		}
+		break;
+	default:
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (tb[NL80211_NAN_FUNC_SRF]) {
+		struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR];
+
+		err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX,
+				nla_data(tb[NL80211_NAN_FUNC_SRF]),
+				nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL);
+		if (err)
+			goto out;
+
+		func->srf_include =
+			nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]);
+
+		if (srf_tb[NL80211_NAN_SRF_BF]) {
+			if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] ||
+			    !srf_tb[NL80211_NAN_SRF_BF_IDX]) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			func->srf_bf_len =
+				nla_len(srf_tb[NL80211_NAN_SRF_BF]);
+			func->srf_bf =
+				kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]),
+					func->srf_bf_len, GFP_KERNEL);
+			if (!func->srf_bf) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			func->srf_bf_idx =
+				nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]);
+		} else {
+			struct nlattr *attr, *mac_attr =
+				srf_tb[NL80211_NAN_SRF_MAC_ADDRS];
+			int n_entries, rem, i = 0;
+
+			if (!mac_attr) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			n_entries = validate_acl_mac_addrs(mac_attr);
+			if (n_entries <= 0) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			func->srf_num_macs = n_entries;
+			func->srf_macs =
+				kzalloc(sizeof(*func->srf_macs) * n_entries,
+					GFP_KERNEL);
+			if (!func->srf_macs) {
+				err = -ENOMEM;
+				goto out;
+			}
+
+			nla_for_each_nested(attr, mac_attr, rem)
+				memcpy(func->srf_macs[i++].addr, nla_data(attr),
+				       sizeof(*func->srf_macs));
+		}
+	}
+
+	if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) {
+		err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER],
+					func, true);
+		if (err)
+			goto out;
+	}
+
+	if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) {
+		err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER],
+					func, false);
+		if (err)
+			goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+			     NL80211_CMD_ADD_NAN_FUNCTION);
+	/* This can't really happen - we just allocated 4KB */
+	if (WARN_ON(!hdr)) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = rdev_add_nan_func(rdev, wdev, func);
+out:
+	if (err < 0) {
+		cfg80211_free_nan_func(func);
+		nlmsg_free(msg);
+		return err;
+	}
+
+	/* propagate the instance id and cookie to userspace  */
+	if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie,
+			      NL80211_ATTR_PAD))
+		goto nla_put_failure;
+
+	func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC);
+	if (!func_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID,
+		       func->instance_id))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, func_attr);
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_reply(msg, info);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+
+static int nl80211_nan_del_func(struct sk_buff *skb,
+			       struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	u64 cookie;
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (!wdev->nan_started)
+		return -ENOTCONN;
+
+	if (!info->attrs[NL80211_ATTR_COOKIE])
+		return -EINVAL;
+
+	if (wdev->owner_nlportid &&
+	    wdev->owner_nlportid != info->snd_portid)
+		return -ENOTCONN;
+
+	cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
+
+	rdev_del_nan_func(rdev, wdev, cookie);
+
+	return 0;
+}
+
+static int nl80211_nan_change_config(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	struct cfg80211_nan_conf conf = {};
+	u32 changed = 0;
+
+	if (wdev->iftype != NL80211_IFTYPE_NAN)
+		return -EOPNOTSUPP;
+
+	if (!wdev->nan_started)
+		return -ENOTCONN;
+
+	if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) {
+		conf.master_pref =
+			nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]);
+		if (conf.master_pref <= 1 || conf.master_pref == 255)
+			return -EINVAL;
+
+		changed |= CFG80211_NAN_CONF_CHANGED_PREF;
+	}
+
+	if (info->attrs[NL80211_ATTR_NAN_DUAL]) {
+		conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]);
+		changed |= CFG80211_NAN_CONF_CHANGED_DUAL;
+	}
+
+	if (!changed)
+		return -EINVAL;
+
+	return rdev_nan_change_conf(rdev, wdev, &conf, changed);
+}
+
+void cfg80211_nan_match(struct wireless_dev *wdev,
+			struct cfg80211_nan_match_params *match, gfp_t gfp)
+{
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct nlattr *match_attr, *local_func_attr, *peer_func_attr;
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
+					 wdev->netdev->ifindex)) ||
+	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+			      NL80211_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie,
+			      NL80211_ATTR_PAD) ||
+	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr))
+		goto nla_put_failure;
+
+	match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH);
+	if (!match_attr)
+		goto nla_put_failure;
+
+	local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL);
+	if (!local_func_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, local_func_attr);
+
+	peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER);
+	if (!peer_func_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) ||
+	    nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id))
+		goto nla_put_failure;
+
+	if (match->info && match->info_len &&
+	    nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len,
+		    match->info))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, peer_func_attr);
+	nla_nest_end(msg, match_attr);
+	genlmsg_end(msg, hdr);
+
+	if (!wdev->owner_nlportid)
+		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+					msg, 0, NL80211_MCGRP_NAN, gfp);
+	else
+		genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+				wdev->owner_nlportid);
+
+	return;
+
+nla_put_failure:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_nan_match);
+
+void cfg80211_nan_func_terminated(struct wireless_dev *wdev,
+				  u8 inst_id,
+				  enum nl80211_nan_func_term_reason reason,
+				  u64 cookie, gfp_t gfp)
+{
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+	struct sk_buff *msg;
+	struct nlattr *func_attr;
+	void *hdr;
+
+	if (WARN_ON(!inst_id))
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX,
+					 wdev->netdev->ifindex)) ||
+	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+			      NL80211_ATTR_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie,
+			      NL80211_ATTR_PAD))
+		goto nla_put_failure;
+
+	func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC);
+	if (!func_attr)
+		goto nla_put_failure;
+
+	if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) ||
+	    nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, func_attr);
+	genlmsg_end(msg, hdr);
+
+	if (!wdev->owner_nlportid)
+		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
+					msg, 0, NL80211_MCGRP_NAN, gfp);
+	else
+		genlmsg_unicast(wiphy_net(&rdev->wiphy), msg,
+				wdev->owner_nlportid);
+
+	return;
+
+nla_put_failure:
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_nan_func_terminated);
+
 static int nl80211_get_protocol_features(struct sk_buff *skb,
 					 struct genl_info *info)
 {
@@ -11035,7 +11794,14 @@
 
 			dev_hold(dev);
 		} else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) {
-			if (!wdev->p2p_started) {
+			if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE &&
+			    !wdev->p2p_started) {
+				if (rtnl)
+					rtnl_unlock();
+				return -ENETDOWN;
+			}
+			if (wdev->iftype == NL80211_IFTYPE_NAN &&
+			    !wdev->nan_started) {
 				if (rtnl)
 					rtnl_unlock();
 				return -ENETDOWN;
@@ -11669,6 +12435,46 @@
 				  NL80211_FLAG_NEED_RTNL,
 	},
 	{
+		.cmd = NL80211_CMD_START_NAN,
+		.doit = nl80211_start_nan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_STOP_NAN,
+		.doit = nl80211_stop_nan,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_ADD_NAN_FUNCTION,
+		.doit = nl80211_nan_add_func,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_NAN_FUNCTION,
+		.doit = nl80211_nan_del_func,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_CHANGE_NAN_CONFIG,
+		.doit = nl80211_nan_change_config,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
 		.cmd = NL80211_CMD_SET_MCAST_RATE,
 		.doit = nl80211_set_mcast_rate,
 		.policy = nl80211_policy,
@@ -11819,6 +12625,29 @@
 				NL80211_MCGRP_CONFIG, GFP_KERNEL);
 }
 
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+				struct wireless_dev *wdev,
+				enum nl80211_commands cmd)
+{
+	struct sk_buff *msg;
+
+	WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
+		cmd != NL80211_CMD_DEL_INTERFACE);
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev,
+			       cmd == NL80211_CMD_DEL_INTERFACE) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+				NL80211_MCGRP_CONFIG, GFP_KERNEL);
+}
+
 static int nl80211_add_scan_req(struct sk_buff *msg,
 				struct cfg80211_registered_device *rdev)
 {
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a63f402..7e3821d 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -7,6 +7,9 @@
 void nl80211_exit(void);
 void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev,
 			  enum nl80211_commands cmd);
+void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+			  struct wireless_dev *wdev,
+			  enum nl80211_commands cmd);
 void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 			     struct wireless_dev *wdev);
 struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 85ff30b..11cf83c 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -887,6 +887,64 @@
 	trace_rdev_return_void(&rdev->wiphy);
 }
 
+static inline int rdev_start_nan(struct cfg80211_registered_device *rdev,
+				 struct wireless_dev *wdev,
+				 struct cfg80211_nan_conf *conf)
+{
+	int ret;
+
+	trace_rdev_start_nan(&rdev->wiphy, wdev, conf);
+	ret = rdev->ops->start_nan(&rdev->wiphy, wdev, conf);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev,
+				 struct wireless_dev *wdev)
+{
+	trace_rdev_stop_nan(&rdev->wiphy, wdev);
+	rdev->ops->stop_nan(&rdev->wiphy, wdev);
+	trace_rdev_return_void(&rdev->wiphy);
+}
+
+static inline int
+rdev_add_nan_func(struct cfg80211_registered_device *rdev,
+		  struct wireless_dev *wdev,
+		  struct cfg80211_nan_func *nan_func)
+{
+	int ret;
+
+	trace_rdev_add_nan_func(&rdev->wiphy, wdev, nan_func);
+	ret = rdev->ops->add_nan_func(&rdev->wiphy, wdev, nan_func);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev,
+				    struct wireless_dev *wdev, u64 cookie)
+{
+	trace_rdev_del_nan_func(&rdev->wiphy, wdev, cookie);
+	rdev->ops->del_nan_func(&rdev->wiphy, wdev, cookie);
+	trace_rdev_return_void(&rdev->wiphy);
+}
+
+static inline int
+rdev_nan_change_conf(struct cfg80211_registered_device *rdev,
+		     struct wireless_dev *wdev,
+		     struct cfg80211_nan_conf *conf, u32 changes)
+{
+	int ret;
+
+	trace_rdev_nan_change_conf(&rdev->wiphy, wdev, conf, changes);
+	if (rdev->ops->nan_change_conf)
+		ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf,
+						 changes);
+	else
+		ret = -ENOTSUPP;
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
 static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
 				   struct net_device *dev,
 				   struct cfg80211_acl_data *params)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0358e12..b5bd58d 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -352,52 +352,48 @@
 	__cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE);
 }
 
-const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
+const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len,
+				 const u8 *match, int match_len,
+				 int match_offset)
 {
-	while (len > 2 && ies[0] != eid) {
+	/* match_offset can't be smaller than 2, unless match_len is
+	 * zero, in which case match_offset must be zero as well.
+	 */
+	if (WARN_ON((match_len && match_offset < 2) ||
+		    (!match_len && match_offset)))
+		return NULL;
+
+	while (len >= 2 && len >= ies[1] + 2) {
+		if ((ies[0] == eid) &&
+		    (ies[1] + 2 >= match_offset + match_len) &&
+		    !memcmp(ies + match_offset, match, match_len))
+			return ies;
+
 		len -= ies[1] + 2;
 		ies += ies[1] + 2;
 	}
-	if (len < 2)
-		return NULL;
-	if (len < 2 + ies[1])
-		return NULL;
-	return ies;
+
+	return NULL;
 }
-EXPORT_SYMBOL(cfg80211_find_ie);
+EXPORT_SYMBOL(cfg80211_find_ie_match);
 
 const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type,
 				  const u8 *ies, int len)
 {
-	struct ieee80211_vendor_ie *ie;
-	const u8 *pos = ies, *end = ies + len;
-	int ie_oui;
+	const u8 *ie;
+	u8 match[] = { oui >> 16, oui >> 8, oui, oui_type };
+	int match_len = (oui_type < 0) ? 3 : sizeof(match);
 
 	if (WARN_ON(oui_type > 0xff))
 		return NULL;
 
-	while (pos < end) {
-		pos = cfg80211_find_ie(WLAN_EID_VENDOR_SPECIFIC, pos,
-				       end - pos);
-		if (!pos)
-			return NULL;
+	ie = cfg80211_find_ie_match(WLAN_EID_VENDOR_SPECIFIC, ies, len,
+				    match, match_len, 2);
 
-		ie = (struct ieee80211_vendor_ie *)pos;
+	if (ie && (ie[1] < 4))
+		return NULL;
 
-		/* make sure we can access ie->len */
-		BUILD_BUG_ON(offsetof(struct ieee80211_vendor_ie, len) != 1);
-
-		if (ie->len < sizeof(*ie))
-			goto cont;
-
-		ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2];
-		if (ie_oui == oui &&
-		    (oui_type < 0 || ie->oui_type == oui_type))
-			return pos;
-cont:
-		pos += 2 + ie->len;
-	}
-	return NULL;
+	return ie;
 }
 EXPORT_SYMBOL(cfg80211_find_vendor_ie);
 
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index add6824..a77db33 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -726,7 +726,8 @@
 
 	wdev->current_bss = bss_from_pub(bss);
 
-	cfg80211_upload_connect_keys(wdev);
+	if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
+		cfg80211_upload_connect_keys(wdev);
 
 	rcu_read_lock();
 	country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY);
@@ -1043,6 +1044,12 @@
 				connect->crypto.ciphers_pairwise[0] = cipher;
 			}
 		}
+
+		connect->crypto.wep_keys = connkeys->params;
+		connect->crypto.wep_tx_key = connkeys->def;
+	} else {
+		if (WARN_ON(connkeys))
+			return -EINVAL;
 	}
 
 	wdev->connect_keys = connkeys;
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index e46469b..0082f4b 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -57,7 +57,7 @@
 		return sprintf(buf, "%pM\n", wiphy->perm_addr);
 
 	for (i = 0; i < wiphy->n_addresses; i++)
-		buf += sprintf(buf, "%pM\n", &wiphy->addresses[i].addr);
+		buf += sprintf(buf, "%pM\n", wiphy->addresses[i].addr);
 
 	return buf - start;
 }
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 72b5255..a3d0a91b 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1889,6 +1889,96 @@
 	TP_ARGS(wiphy, wdev)
 );
 
+TRACE_EVENT(rdev_start_nan,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 struct cfg80211_nan_conf *conf),
+	TP_ARGS(wiphy, wdev, conf),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u8, master_pref)
+		__field(u8, dual);
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->master_pref = conf->master_pref;
+		__entry->dual = conf->dual;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
+		  ", master preference: %u, dual: %d",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref,
+		  __entry->dual)
+);
+
+TRACE_EVENT(rdev_nan_change_conf,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 struct cfg80211_nan_conf *conf, u32 changes),
+	TP_ARGS(wiphy, wdev, conf, changes),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u8, master_pref)
+		__field(u8, dual);
+		__field(u32, changes);
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->master_pref = conf->master_pref;
+		__entry->dual = conf->dual;
+		__entry->changes = changes;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT
+		  ", master preference: %u, dual: %d, changes: %x",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref,
+		  __entry->dual, __entry->changes)
+);
+
+DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+	TP_ARGS(wiphy, wdev)
+);
+
+TRACE_EVENT(rdev_add_nan_func,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 const struct cfg80211_nan_func *func),
+	TP_ARGS(wiphy, wdev, func),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u8, func_type)
+		__field(u64, cookie)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->func_type = func->type;
+		__entry->cookie = func->cookie
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type,
+		  __entry->cookie)
+);
+
+TRACE_EVENT(rdev_del_nan_func,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 u64 cookie),
+	TP_ARGS(wiphy, wdev, cookie),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u64, cookie)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->cookie = cookie;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie)
+);
+
 TRACE_EVENT(rdev_set_mac_acl,
 	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
 		 struct cfg80211_acl_data *params),
diff --git a/net/wireless/util.c b/net/wireless/util.c
index b7d1592..8edce22 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -218,7 +218,7 @@
 				   struct key_params *params, int key_idx,
 				   bool pairwise, const u8 *mac_addr)
 {
-	if (key_idx > 5)
+	if (key_idx < 0 || key_idx > 5)
 		return -EINVAL;
 
 	if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
@@ -249,7 +249,13 @@
 		/* Disallow BIP (group-only) cipher as pairwise cipher */
 		if (pairwise)
 			return -EINVAL;
+		if (key_idx < 4)
+			return -EINVAL;
 		break;
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
+		if (key_idx > 3)
+			return -EINVAL;
 	default:
 		break;
 	}
@@ -906,7 +912,7 @@
 	if (!wdev->connect_keys)
 		return;
 
-	for (i = 0; i < 6; i++) {
+	for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) {
 		if (!wdev->connect_keys->params[i].cipher)
 			continue;
 		if (rdev_add_key(rdev, dev, i, false, NULL,
@@ -919,9 +925,6 @@
 				netdev_err(dev, "failed to set defkey %d\n", i);
 				continue;
 			}
-		if (wdev->connect_keys->defmgmt == i)
-			if (rdev_set_default_mgmt_key(rdev, dev, i))
-				netdev_err(dev, "failed to set mgtdef %d\n", i);
 	}
 
 	kzfree(wdev->connect_keys);
@@ -1005,8 +1008,9 @@
 	if (otype == NL80211_IFTYPE_AP_VLAN)
 		return -EOPNOTSUPP;
 
-	/* cannot change into P2P device type */
-	if (ntype == NL80211_IFTYPE_P2P_DEVICE)
+	/* cannot change into P2P device or NAN */
+	if (ntype == NL80211_IFTYPE_P2P_DEVICE ||
+	    ntype == NL80211_IFTYPE_NAN)
 		return -EOPNOTSUPP;
 
 	if (!rdev->ops->change_virtual_intf ||
@@ -1085,6 +1089,7 @@
 			/* not happening */
 			break;
 		case NL80211_IFTYPE_P2P_DEVICE:
+		case NL80211_IFTYPE_NAN:
 			WARN_ON(1);
 			break;
 		}
@@ -1559,7 +1564,7 @@
 	struct wireless_dev *wdev;
 	int res = 0;
 
-	if (!beacon_int)
+	if (beacon_int < 10 || beacon_int > 10000)
 		return -EINVAL;
 
 	list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
@@ -1757,6 +1762,28 @@
 }
 EXPORT_SYMBOL(cfg80211_get_station);
 
+void cfg80211_free_nan_func(struct cfg80211_nan_func *f)
+{
+	int i;
+
+	if (!f)
+		return;
+
+	kfree(f->serv_spec_info);
+	kfree(f->srf_bf);
+	kfree(f->srf_macs);
+	for (i = 0; i < f->num_rx_filters; i++)
+		kfree(f->rx_filters[i].filter);
+
+	for (i = 0; i < f->num_tx_filters; i++)
+		kfree(f->tx_filters[i].filter);
+
+	kfree(f->rx_filters);
+	kfree(f->tx_filters);
+	kfree(f);
+}
+EXPORT_SYMBOL(cfg80211_free_nan_func);
+
 /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
 /* Ethernet-II snap header (RFC1042 for most EtherTypes) */
 const unsigned char rfc1042_header[] __aligned(2) =
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 9f27221..a220156 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -406,12 +406,16 @@
 	if (pairwise && !addr)
 		return -EINVAL;
 
+	/*
+	 * In many cases we won't actually need this, but it's better
+	 * to do it first in case the allocation fails. Don't use wext.
+	 */
 	if (!wdev->wext.keys) {
 		wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys),
-					      GFP_KERNEL);
+					  GFP_KERNEL);
 		if (!wdev->wext.keys)
 			return -ENOMEM;
-		for (i = 0; i < 6; i++)
+		for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
 			wdev->wext.keys->params[i].key =
 				wdev->wext.keys->data[i];
 	}
@@ -460,7 +464,7 @@
 		if (err == -ENOENT)
 			err = 0;
 		if (!err) {
-			if (!addr) {
+			if (!addr && idx < 4) {
 				memset(wdev->wext.keys->data[idx], 0,
 				       sizeof(wdev->wext.keys->data[idx]));
 				wdev->wext.keys->params[idx].key_len = 0;
@@ -487,10 +491,19 @@
 	err = 0;
 	if (wdev->current_bss)
 		err = rdev_add_key(rdev, dev, idx, pairwise, addr, params);
+	else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 &&
+		 params->cipher != WLAN_CIPHER_SUITE_WEP104)
+		return -EINVAL;
 	if (err)
 		return err;
 
-	if (!addr) {
+	/*
+	 * We only need to store WEP keys, since they're the only keys that
+	 * can be be set before a connection is established and persist after
+	 * disconnecting.
+	 */
+	if (!addr && (params->cipher == WLAN_CIPHER_SUITE_WEP40 ||
+		      params->cipher == WLAN_CIPHER_SUITE_WEP104)) {
 		wdev->wext.keys->params[idx] = *params;
 		memcpy(wdev->wext.keys->data[idx],
 			params->key, params->key_len);
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index dbb2738e..6250b1c 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -958,29 +958,8 @@
 			return private(dev, iwr, cmd, info, handler);
 	}
 	/* Old driver API : call driver ioctl handler */
-	if (dev->netdev_ops->ndo_do_ioctl) {
-#ifdef CONFIG_COMPAT
-		if (info->flags & IW_REQUEST_FLAG_COMPAT) {
-			int ret = 0;
-			struct iwreq iwr_lcl;
-			struct compat_iw_point *iwp_compat = (void *) &iwr->u.data;
-
-			memcpy(&iwr_lcl, iwr, sizeof(struct iwreq));
-			iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer);
-			iwr_lcl.u.data.length = iwp_compat->length;
-			iwr_lcl.u.data.flags = iwp_compat->flags;
-
-			ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd);
-
-			iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer);
-			iwp_compat->length = iwr_lcl.u.data.length;
-			iwp_compat->flags = iwr_lcl.u.data.flags;
-
-			return ret;
-		} else
-#endif
-			return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
-	}
+	if (dev->netdev_ops->ndo_do_ioctl)
+		return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
 	return -EOPNOTSUPP;
 }
 
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index a4e8af3..9951638 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -35,7 +35,6 @@
 
 	if (wdev->wext.keys) {
 		wdev->wext.keys->def = wdev->wext.default_key;
-		wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key;
 		if (wdev->wext.default_key != -1)
 			wdev->wext.connect.privacy = true;
 	}
@@ -43,11 +42,11 @@
 	if (!wdev->wext.connect.ssid_len)
 		return 0;
 
-	if (wdev->wext.keys) {
+	if (wdev->wext.keys && wdev->wext.keys->def != -1) {
 		ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
 		if (!ck)
 			return -ENOMEM;
-		for (i = 0; i < 6; i++)
+		for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
 			ck->params[i].key = ck->data[i];
 	}
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index a750f33..f83b74d 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1500,12 +1500,8 @@
 			goto out_dtefac_release;
 		if (dtefacs.calling_len > X25_MAX_AE_LEN)
 			goto out_dtefac_release;
-		if (dtefacs.calling_ae == NULL)
-			goto out_dtefac_release;
 		if (dtefacs.called_len > X25_MAX_AE_LEN)
 			goto out_dtefac_release;
-		if (dtefacs.called_ae == NULL)
-			goto out_dtefac_release;
 		x25->dte_facilities = dtefacs;
 		rc = 0;
 out_dtefac_release:
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 250e567..44ac85f 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -17,7 +17,7 @@
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
 #include <net/xfrm.h>
-#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
+#if IS_ENABLED(CONFIG_INET_ESP) || IS_ENABLED(CONFIG_INET6_ESP)
 #include <net/esp.h>
 #endif
 
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 1c4ad47..6e3f025 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -207,15 +207,15 @@
 	family = XFRM_SPI_SKB_CB(skb)->family;
 
 	/* if tunnel is present override skb->mark value with tunnel i_key */
-	if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) {
-		switch (family) {
-		case AF_INET:
+	switch (family) {
+	case AF_INET:
+		if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4)
 			mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
-			break;
-		case AF_INET6:
+		break;
+	case AF_INET6:
+		if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6)
 			mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
-			break;
-		}
+		break;
 	}
 
 	/* Allocate new secpath or COW existing one. */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index b5e665b..fd69866 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -49,6 +49,7 @@
 						__read_mostly;
 
 static struct kmem_cache *xfrm_dst_cache __read_mostly;
+static __read_mostly seqcount_t xfrm_policy_hash_generation;
 
 static void xfrm_init_pmtu(struct dst_entry *dst);
 static int stale_bundle(struct dst_entry *dst);
@@ -59,6 +60,11 @@
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir);
 
+static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
+{
+	return atomic_inc_not_zero(&policy->refcnt);
+}
+
 static inline bool
 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
 {
@@ -385,9 +391,11 @@
 	__get_hash_thresh(net, family, dir, &dbits, &sbits);
 	hash = __sel_hash(sel, family, hmask, dbits, sbits);
 
-	return (hash == hmask + 1 ?
-		&net->xfrm.policy_inexact[dir] :
-		net->xfrm.policy_bydst[dir].table + hash);
+	if (hash == hmask + 1)
+		return &net->xfrm.policy_inexact[dir];
+
+	return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
+		     lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
 }
 
 static struct hlist_head *policy_hash_direct(struct net *net,
@@ -403,7 +411,8 @@
 	__get_hash_thresh(net, family, dir, &dbits, &sbits);
 	hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
 
-	return net->xfrm.policy_bydst[dir].table + hash;
+	return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
+		     lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
 }
 
 static void xfrm_dst_hash_transfer(struct net *net,
@@ -426,14 +435,14 @@
 		h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
 				pol->family, nhashmask, dbits, sbits);
 		if (!entry0) {
-			hlist_del(&pol->bydst);
-			hlist_add_head(&pol->bydst, ndsttable+h);
+			hlist_del_rcu(&pol->bydst);
+			hlist_add_head_rcu(&pol->bydst, ndsttable + h);
 			h0 = h;
 		} else {
 			if (h != h0)
 				continue;
-			hlist_del(&pol->bydst);
-			hlist_add_behind(&pol->bydst, entry0);
+			hlist_del_rcu(&pol->bydst);
+			hlist_add_behind_rcu(&pol->bydst, entry0);
 		}
 		entry0 = &pol->bydst;
 	}
@@ -468,22 +477,32 @@
 	unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 	unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 	unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
-	struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
 	struct hlist_head *ndst = xfrm_hash_alloc(nsize);
+	struct hlist_head *odst;
 	int i;
 
 	if (!ndst)
 		return;
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
+	write_seqcount_begin(&xfrm_policy_hash_generation);
+
+	odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
+				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
+
+	odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
+				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
 
 	for (i = hmask; i >= 0; i--)
 		xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
 
-	net->xfrm.policy_bydst[dir].table = ndst;
+	rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
 	net->xfrm.policy_bydst[dir].hmask = nhashmask;
 
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	write_seqcount_end(&xfrm_policy_hash_generation);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
+
+	synchronize_rcu();
 
 	xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
 }
@@ -500,7 +519,7 @@
 	if (!nidx)
 		return;
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 
 	for (i = hmask; i >= 0; i--)
 		xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
@@ -508,7 +527,7 @@
 	net->xfrm.policy_byidx = nidx;
 	net->xfrm.policy_idx_hmask = nhashmask;
 
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
 }
@@ -541,7 +560,6 @@
 
 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
 {
-	read_lock_bh(&net->xfrm.xfrm_policy_lock);
 	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
 	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
 	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
@@ -550,7 +568,6 @@
 	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
 	si->spdhcnt = net->xfrm.policy_idx_hmask;
 	si->spdhmcnt = xfrm_policy_hashmax;
-	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
 }
 EXPORT_SYMBOL(xfrm_spd_getinfo);
 
@@ -600,7 +617,7 @@
 		rbits6 = net->xfrm.policy_hthresh.rbits6;
 	} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 
 	/* reset the bydst and inexact table in all directions */
 	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
@@ -626,6 +643,10 @@
 
 	/* re-insert all policies by order of creation */
 	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
+		if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
+			/* skip socket policies */
+			continue;
+		}
 		newpos = NULL;
 		chain = policy_hash_bysel(net, &policy->selector,
 					  policy->family,
@@ -642,7 +663,7 @@
 			hlist_add_head(&policy->bydst, chain);
 	}
 
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	mutex_unlock(&hash_resize_mutex);
 }
@@ -753,7 +774,7 @@
 	struct hlist_head *chain;
 	struct hlist_node *newpos;
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 	chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
 	delpol = NULL;
 	newpos = NULL;
@@ -764,7 +785,7 @@
 		    xfrm_sec_ctx_match(pol->security, policy->security) &&
 		    !WARN_ON(delpol)) {
 			if (excl) {
-				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+				spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 				return -EEXIST;
 			}
 			delpol = pol;
@@ -800,7 +821,7 @@
 	policy->curlft.use_time = 0;
 	if (!mod_timer(&policy->timer, jiffies + HZ))
 		xfrm_pol_hold(policy);
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	if (delpol)
 		xfrm_policy_kill(delpol);
@@ -820,7 +841,7 @@
 	struct hlist_head *chain;
 
 	*err = 0;
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 	chain = policy_hash_bysel(net, sel, sel->family, dir);
 	ret = NULL;
 	hlist_for_each_entry(pol, chain, bydst) {
@@ -833,7 +854,7 @@
 				*err = security_xfrm_policy_delete(
 								pol->security);
 				if (*err) {
-					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+					spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 					return pol;
 				}
 				__xfrm_policy_unlink(pol, dir);
@@ -842,7 +863,7 @@
 			break;
 		}
 	}
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	if (ret && delete)
 		xfrm_policy_kill(ret);
@@ -861,7 +882,7 @@
 		return NULL;
 
 	*err = 0;
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 	chain = net->xfrm.policy_byidx + idx_hash(net, id);
 	ret = NULL;
 	hlist_for_each_entry(pol, chain, byidx) {
@@ -872,7 +893,7 @@
 				*err = security_xfrm_policy_delete(
 								pol->security);
 				if (*err) {
-					write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+					spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 					return pol;
 				}
 				__xfrm_policy_unlink(pol, dir);
@@ -881,7 +902,7 @@
 			break;
 		}
 	}
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	if (ret && delete)
 		xfrm_policy_kill(ret);
@@ -939,7 +960,7 @@
 {
 	int dir, err = 0, cnt = 0;
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 
 	err = xfrm_policy_flush_secctx_check(net, type, task_valid);
 	if (err)
@@ -955,14 +976,14 @@
 			if (pol->type != type)
 				continue;
 			__xfrm_policy_unlink(pol, dir);
-			write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+			spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 			cnt++;
 
 			xfrm_audit_policy_delete(pol, 1, task_valid);
 
 			xfrm_policy_kill(pol);
 
-			write_lock_bh(&net->xfrm.xfrm_policy_lock);
+			spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 			goto again1;
 		}
 
@@ -974,13 +995,13 @@
 				if (pol->type != type)
 					continue;
 				__xfrm_policy_unlink(pol, dir);
-				write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+				spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 				cnt++;
 
 				xfrm_audit_policy_delete(pol, 1, task_valid);
 				xfrm_policy_kill(pol);
 
-				write_lock_bh(&net->xfrm.xfrm_policy_lock);
+				spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 				goto again2;
 			}
 		}
@@ -989,7 +1010,7 @@
 	if (!cnt)
 		err = -ESRCH;
 out:
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_flush);
@@ -1009,7 +1030,7 @@
 	if (list_empty(&walk->walk.all) && walk->seq != 0)
 		return 0;
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 	if (list_empty(&walk->walk.all))
 		x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
 	else
@@ -1037,7 +1058,7 @@
 	}
 	list_del_init(&walk->walk.all);
 out:
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	return error;
 }
 EXPORT_SYMBOL(xfrm_policy_walk);
@@ -1056,9 +1077,9 @@
 	if (list_empty(&walk->walk.all))
 		return;
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
 	list_del(&walk->walk.all);
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 }
 EXPORT_SYMBOL(xfrm_policy_walk_done);
 
@@ -1096,17 +1117,24 @@
 	struct xfrm_policy *pol, *ret;
 	const xfrm_address_t *daddr, *saddr;
 	struct hlist_head *chain;
-	u32 priority = ~0U;
+	unsigned int sequence;
+	u32 priority;
 
 	daddr = xfrm_flowi_daddr(fl, family);
 	saddr = xfrm_flowi_saddr(fl, family);
 	if (unlikely(!daddr || !saddr))
 		return NULL;
 
-	read_lock_bh(&net->xfrm.xfrm_policy_lock);
-	chain = policy_hash_direct(net, daddr, saddr, family, dir);
+	rcu_read_lock();
+ retry:
+	do {
+		sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
+		chain = policy_hash_direct(net, daddr, saddr, family, dir);
+	} while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
+
+	priority = ~0U;
 	ret = NULL;
-	hlist_for_each_entry(pol, chain, bydst) {
+	hlist_for_each_entry_rcu(pol, chain, bydst) {
 		err = xfrm_policy_match(pol, fl, type, family, dir);
 		if (err) {
 			if (err == -ESRCH)
@@ -1122,7 +1150,7 @@
 		}
 	}
 	chain = &net->xfrm.policy_inexact[dir];
-	hlist_for_each_entry(pol, chain, bydst) {
+	hlist_for_each_entry_rcu(pol, chain, bydst) {
 		if ((pol->priority >= priority) && ret)
 			break;
 
@@ -1140,9 +1168,13 @@
 		}
 	}
 
-	xfrm_pol_hold(ret);
+	if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
+		goto retry;
+
+	if (ret && !xfrm_pol_hold_rcu(ret))
+		goto retry;
 fail:
-	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	rcu_read_unlock();
 
 	return ret;
 }
@@ -1219,10 +1251,9 @@
 						 const struct flowi *fl)
 {
 	struct xfrm_policy *pol;
-	struct net *net = sock_net(sk);
 
 	rcu_read_lock();
-	read_lock_bh(&net->xfrm.xfrm_policy_lock);
+ again:
 	pol = rcu_dereference(sk->sk_policy[dir]);
 	if (pol != NULL) {
 		bool match = xfrm_selector_match(&pol->selector, fl,
@@ -1237,8 +1268,8 @@
 			err = security_xfrm_policy_lookup(pol->security,
 						      fl->flowi_secid,
 						      policy_to_flow_dir(dir));
-			if (!err)
-				xfrm_pol_hold(pol);
+			if (!err && !xfrm_pol_hold_rcu(pol))
+				goto again;
 			else if (err == -ESRCH)
 				pol = NULL;
 			else
@@ -1247,7 +1278,6 @@
 			pol = NULL;
 	}
 out:
-	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	rcu_read_unlock();
 	return pol;
 }
@@ -1271,7 +1301,7 @@
 
 	/* Socket policies are not hashed. */
 	if (!hlist_unhashed(&pol->bydst)) {
-		hlist_del(&pol->bydst);
+		hlist_del_rcu(&pol->bydst);
 		hlist_del(&pol->byidx);
 	}
 
@@ -1295,9 +1325,9 @@
 {
 	struct net *net = xp_net(pol);
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 	pol = __xfrm_policy_unlink(pol, dir);
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 	if (pol) {
 		xfrm_policy_kill(pol);
 		return 0;
@@ -1316,7 +1346,7 @@
 		return -EINVAL;
 #endif
 
-	write_lock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 	old_pol = rcu_dereference_protected(sk->sk_policy[dir],
 				lockdep_is_held(&net->xfrm.xfrm_policy_lock));
 	if (pol) {
@@ -1334,7 +1364,7 @@
 		 */
 		xfrm_sk_policy_unlink(old_pol, dir);
 	}
-	write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	if (old_pol) {
 		xfrm_policy_kill(old_pol);
@@ -1364,9 +1394,9 @@
 		newp->type = old->type;
 		memcpy(newp->xfrm_vec, old->xfrm_vec,
 		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
-		write_lock_bh(&net->xfrm.xfrm_policy_lock);
+		spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 		xfrm_sk_policy_link(newp, dir);
-		write_unlock_bh(&net->xfrm.xfrm_policy_lock);
+		spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 		xfrm_pol_put(newp);
 	}
 	return newp;
@@ -3048,7 +3078,7 @@
 
 	/* Initialize the per-net locks here */
 	spin_lock_init(&net->xfrm.xfrm_state_lock);
-	rwlock_init(&net->xfrm.xfrm_policy_lock);
+	spin_lock_init(&net->xfrm.xfrm_policy_lock);
 	mutex_init(&net->xfrm.xfrm_cfg_mutex);
 
 	return 0;
@@ -3082,6 +3112,7 @@
 void __init xfrm_init(void)
 {
 	register_pernet_subsys(&xfrm_net_ops);
+	seqcount_init(&xfrm_policy_hash_generation);
 	xfrm_input_init();
 }
 
@@ -3179,7 +3210,7 @@
 	struct hlist_head *chain;
 	u32 priority = ~0U;
 
-	read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
+	spin_lock_bh(&net->xfrm.xfrm_policy_lock);
 	chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
 	hlist_for_each_entry(pol, chain, bydst) {
 		if (xfrm_migrate_selector_match(sel, &pol->selector) &&
@@ -3203,7 +3234,7 @@
 
 	xfrm_pol_hold(ret);
 
-	read_unlock_bh(&net->xfrm.xfrm_policy_lock);
+	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
 
 	return ret;
 }
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index 9c4fbd8..ba2b539 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -50,12 +50,18 @@
 
 static int xfrm_statistics_seq_show(struct seq_file *seq, void *v)
 {
+	unsigned long buff[LINUX_MIB_XFRMMAX];
 	struct net *net = seq->private;
 	int i;
+
+	memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX);
+
+	snmp_get_cpu_field_batch(buff, xfrm_mib_list,
+				 net->mib.xfrm_statistics);
 	for (i = 0; xfrm_mib_list[i].name; i++)
 		seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name,
-			   snmp_fold_field(net->mib.xfrm_statistics,
-					   xfrm_mib_list[i].entry));
+						buff[i]);
+
 	return 0;
 }
 
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index 4fd725a..cdc2e2e 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -558,7 +558,7 @@
 		x->repl->notify(x, XFRM_REPLAY_UPDATE);
 }
 
-static struct xfrm_replay xfrm_replay_legacy = {
+static const struct xfrm_replay xfrm_replay_legacy = {
 	.advance	= xfrm_replay_advance,
 	.check		= xfrm_replay_check,
 	.recheck	= xfrm_replay_check,
@@ -566,7 +566,7 @@
 	.overflow	= xfrm_replay_overflow,
 };
 
-static struct xfrm_replay xfrm_replay_bmp = {
+static const struct xfrm_replay xfrm_replay_bmp = {
 	.advance	= xfrm_replay_advance_bmp,
 	.check		= xfrm_replay_check_bmp,
 	.recheck	= xfrm_replay_check_bmp,
@@ -574,7 +574,7 @@
 	.overflow	= xfrm_replay_overflow_bmp,
 };
 
-static struct xfrm_replay xfrm_replay_esn = {
+static const struct xfrm_replay xfrm_replay_esn = {
 	.advance	= xfrm_replay_advance_esn,
 	.check		= xfrm_replay_check_esn,
 	.recheck	= xfrm_replay_recheck_esn,
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9895a8c..419bf5d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -28,6 +28,11 @@
 
 #include "xfrm_hash.h"
 
+#define xfrm_state_deref_prot(table, net) \
+	rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+
+static void xfrm_state_gc_task(struct work_struct *work);
+
 /* Each xfrm_state may be linked to two tables:
 
    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
@@ -36,6 +41,15 @@
  */
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
+
+static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
+static HLIST_HEAD(xfrm_state_gc_list);
+
+static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+{
+	return atomic_inc_not_zero(&x->refcnt);
+}
 
 static inline unsigned int xfrm_dst_hash(struct net *net,
 					 const xfrm_address_t *daddr,
@@ -76,18 +90,18 @@
 		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
 				    x->props.reqid, x->props.family,
 				    nhashmask);
-		hlist_add_head(&x->bydst, ndsttable+h);
+		hlist_add_head_rcu(&x->bydst, ndsttable + h);
 
 		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
 				    x->props.family,
 				    nhashmask);
-		hlist_add_head(&x->bysrc, nsrctable+h);
+		hlist_add_head_rcu(&x->bysrc, nsrctable + h);
 
 		if (x->id.spi) {
 			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
 					    x->id.proto, x->props.family,
 					    nhashmask);
-			hlist_add_head(&x->byspi, nspitable+h);
+			hlist_add_head_rcu(&x->byspi, nspitable + h);
 		}
 	}
 }
@@ -122,25 +136,29 @@
 	}
 
 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+	write_seqcount_begin(&xfrm_state_hash_generation);
 
 	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+	odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
 	for (i = net->xfrm.state_hmask; i >= 0; i--)
-		xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
-				   nhashmask);
+		xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
 
-	odst = net->xfrm.state_bydst;
-	osrc = net->xfrm.state_bysrc;
-	ospi = net->xfrm.state_byspi;
+	osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
+	ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
 	ohashmask = net->xfrm.state_hmask;
 
-	net->xfrm.state_bydst = ndst;
-	net->xfrm.state_bysrc = nsrc;
-	net->xfrm.state_byspi = nspi;
+	rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+	rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+	rcu_assign_pointer(net->xfrm.state_byspi, nspi);
 	net->xfrm.state_hmask = nhashmask;
 
+	write_seqcount_end(&xfrm_state_hash_generation);
 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
 	osize = (ohashmask + 1) * sizeof(struct hlist_head);
+
+	synchronize_rcu();
+
 	xfrm_hash_free(odst, osize);
 	xfrm_hash_free(osrc, osize);
 	xfrm_hash_free(ospi, osize);
@@ -332,6 +350,7 @@
 {
 	tasklet_hrtimer_cancel(&x->mtimer);
 	del_timer_sync(&x->rtimer);
+	kfree(x->aead);
 	kfree(x->aalg);
 	kfree(x->ealg);
 	kfree(x->calg);
@@ -355,15 +374,16 @@
 
 static void xfrm_state_gc_task(struct work_struct *work)
 {
-	struct net *net = container_of(work, struct net, xfrm.state_gc_work);
 	struct xfrm_state *x;
 	struct hlist_node *tmp;
 	struct hlist_head gc_list;
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
+	hlist_move_list(&xfrm_state_gc_list, &gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
 
+	synchronize_rcu();
+
 	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
 		xfrm_state_gc_destroy(x);
 }
@@ -500,14 +520,12 @@
 
 void __xfrm_state_destroy(struct xfrm_state *x)
 {
-	struct net *net = xs_net(x);
-
 	WARN_ON(x->km.state != XFRM_STATE_DEAD);
 
 	spin_lock_bh(&xfrm_state_gc_lock);
-	hlist_add_head(&x->gclist, &net->xfrm.state_gc_list);
+	hlist_add_head(&x->gclist, &xfrm_state_gc_list);
 	spin_unlock_bh(&xfrm_state_gc_lock);
-	schedule_work(&net->xfrm.state_gc_work);
+	schedule_work(&xfrm_state_gc_work);
 }
 EXPORT_SYMBOL(__xfrm_state_destroy);
 
@@ -520,10 +538,10 @@
 		x->km.state = XFRM_STATE_DEAD;
 		spin_lock(&net->xfrm.xfrm_state_lock);
 		list_del(&x->km.all);
-		hlist_del(&x->bydst);
-		hlist_del(&x->bysrc);
+		hlist_del_rcu(&x->bydst);
+		hlist_del_rcu(&x->bysrc);
 		if (x->id.spi)
-			hlist_del(&x->byspi);
+			hlist_del_rcu(&x->byspi);
 		net->xfrm.state_num--;
 		spin_unlock(&net->xfrm.xfrm_state_lock);
 
@@ -659,7 +677,7 @@
 	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
 	struct xfrm_state *x;
 
-	hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
+	hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
 		if (x->props.family != family ||
 		    x->id.spi       != spi ||
 		    x->id.proto     != proto ||
@@ -668,7 +686,8 @@
 
 		if ((mark & x->mark.m) != x->mark.v)
 			continue;
-		xfrm_state_hold(x);
+		if (!xfrm_state_hold_rcu(x))
+			continue;
 		return x;
 	}
 
@@ -683,7 +702,7 @@
 	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
 	struct xfrm_state *x;
 
-	hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+	hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
 		if (x->props.family != family ||
 		    x->id.proto     != proto ||
 		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@ -692,7 +711,8 @@
 
 		if ((mark & x->mark.m) != x->mark.v)
 			continue;
-		xfrm_state_hold(x);
+		if (!xfrm_state_hold_rcu(x))
+			continue;
 		return x;
 	}
 
@@ -775,13 +795,16 @@
 	struct xfrm_state *best = NULL;
 	u32 mark = pol->mark.v & pol->mark.m;
 	unsigned short encap_family = tmpl->encap_family;
+	unsigned int sequence;
 	struct km_event c;
 
 	to_put = NULL;
 
-	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+	sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+
+	rcu_read_lock();
 	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
-	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -797,7 +820,7 @@
 		goto found;
 
 	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
-	hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
+	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
 		if (x->props.family == encap_family &&
 		    x->props.reqid == tmpl->reqid &&
 		    (mark & x->mark.m) == x->mark.v &&
@@ -850,19 +873,21 @@
 		}
 
 		if (km_query(x, tmpl, pol) == 0) {
+			spin_lock_bh(&net->xfrm.xfrm_state_lock);
 			x->km.state = XFRM_STATE_ACQ;
 			list_add(&x->km.all, &net->xfrm.state_all);
-			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+			hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 			h = xfrm_src_hash(net, daddr, saddr, encap_family);
-			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+			hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 			if (x->id.spi) {
 				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
-				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+				hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 			}
 			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
 			tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
 			net->xfrm.state_num++;
 			xfrm_hash_grow_check(net, x->bydst.next != NULL);
+			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 		} else {
 			x->km.state = XFRM_STATE_DEAD;
 			to_put = x;
@@ -871,13 +896,26 @@
 		}
 	}
 out:
-	if (x)
-		xfrm_state_hold(x);
-	else
+	if (x) {
+		if (!xfrm_state_hold_rcu(x)) {
+			*err = -EAGAIN;
+			x = NULL;
+		}
+	} else {
 		*err = acquire_in_progress ? -EAGAIN : error;
-	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+	}
+	rcu_read_unlock();
 	if (to_put)
 		xfrm_state_put(to_put);
+
+	if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+		*err = -EAGAIN;
+		if (x) {
+			xfrm_state_put(x);
+			x = NULL;
+		}
+	}
+
 	return x;
 }
 
@@ -945,16 +983,16 @@
 
 	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
 			  x->props.reqid, x->props.family);
-	hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+	hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 
 	h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
-	hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+	hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 
 	if (x->id.spi) {
 		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
 				  x->props.family);
 
-		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 	}
 
 	tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
@@ -1063,9 +1101,9 @@
 		xfrm_state_hold(x);
 		tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
 		list_add(&x->km.all, &net->xfrm.state_all);
-		hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+		hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
 		h = xfrm_src_hash(net, daddr, saddr, family);
-		hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+		hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
 
 		net->xfrm.state_num++;
 
@@ -1394,9 +1432,9 @@
 {
 	struct xfrm_state *x;
 
-	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+	rcu_read_lock();
 	x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
-	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+	rcu_read_unlock();
 	return x;
 }
 EXPORT_SYMBOL(xfrm_state_lookup);
@@ -1581,7 +1619,7 @@
 	if (x->id.spi) {
 		spin_lock_bh(&net->xfrm.xfrm_state_lock);
 		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
-		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
 		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
 
 		err = 0;
@@ -2099,8 +2137,6 @@
 
 	net->xfrm.state_num = 0;
 	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
-	INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
-	INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
 	spin_lock_init(&net->xfrm.xfrm_state_lock);
 	return 0;
 
@@ -2118,7 +2154,7 @@
 
 	flush_work(&net->xfrm.state_hash_work);
 	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
-	flush_work(&net->xfrm.state_gc_work);
+	flush_work(&xfrm_state_gc_work);
 
 	WARN_ON(!list_empty(&net->xfrm.state_all));
 
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 05a6e3d..35a7e79 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -17,13 +17,13 @@
 		.procname	= "xfrm_aevent_etime",
 		.maxlen		= sizeof(u32),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_douintvec
 	},
 	{
 		.procname	= "xfrm_aevent_rseqth",
 		.maxlen		= sizeof(u32),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_douintvec
 	},
 	{
 		.procname	= "xfrm_larval_drop",
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d516845..0889209 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -581,9 +581,12 @@
 	if (err)
 		goto error;
 
-	if (attrs[XFRMA_SEC_CTX] &&
-	    security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])))
-		goto error;
+	if (attrs[XFRMA_SEC_CTX]) {
+		err = security_xfrm_state_alloc(x,
+						nla_data(attrs[XFRMA_SEC_CTX]));
+		if (err)
+			goto error;
+	}
 
 	if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
 					       attrs[XFRMA_REPLAY_ESN_VAL])))
@@ -896,7 +899,8 @@
 	struct sock *sk = cb->skb->sk;
 	struct net *net = sock_net(sk);
 
-	xfrm_state_walk_done(walk, net);
+	if (cb->args[0])
+		xfrm_state_walk_done(walk, net);
 	return 0;
 }
 
@@ -921,8 +925,6 @@
 		u8 proto = 0;
 		int err;
 
-		cb->args[0] = 1;
-
 		err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
 				  xfrma_policy);
 		if (err < 0)
@@ -939,6 +941,7 @@
 			proto = nla_get_u8(attrs[XFRMA_PROTO]);
 
 		xfrm_state_walk_init(walk, proto, filter);
+		cb->args[0] = 1;
 	}
 
 	(void) xfrm_state_walk(net, walk, dump_one_state, &info);
@@ -2051,9 +2054,6 @@
 	if (up->hard) {
 		xfrm_policy_delete(xp, p->dir);
 		xfrm_audit_policy_delete(xp, 1, true);
-	} else {
-		// reset the timers here?
-		WARN(1, "Don't know what to do with soft policy expire\n");
 	}
 	km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid);
 
@@ -2117,7 +2117,7 @@
 
 	err = verify_newpolicy_info(&ua->policy);
 	if (err)
-		goto bad_policy;
+		goto free_state;
 
 	/*   build an XP */
 	xp = xfrm_policy_construct(net, &ua->policy, attrs, &err);
@@ -2149,8 +2149,6 @@
 
 	return 0;
 
-bad_policy:
-	WARN(1, "BAD policy passed\n");
 free_state:
 	kfree(x);
 nomem:
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 90ebf7d..12b7304 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -24,6 +24,9 @@
 hostprogs-y += test_cgrp2_array_pin
 hostprogs-y += xdp1
 hostprogs-y += xdp2
+hostprogs-y += test_current_task_under_cgroup
+hostprogs-y += trace_event
+hostprogs-y += sampleip
 
 test_verifier-objs := test_verifier.o libbpf.o
 test_maps-objs := test_maps.o libbpf.o
@@ -49,6 +52,10 @@
 xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
 # reuse xdp1 source intentionally
 xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
+test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
+				       test_current_task_under_cgroup_user.o
+trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
+sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -64,6 +71,7 @@
 always += test_probe_write_user_kern.o
 always += trace_output_kern.o
 always += tcbpf1_kern.o
+always += tcbpf2_kern.o
 always += lathist_kern.o
 always += offwaketime_kern.o
 always += spintest_kern.o
@@ -74,6 +82,9 @@
 always += test_cgrp2_tc_kern.o
 always += xdp1_kern.o
 always += xdp2_kern.o
+always += test_current_task_under_cgroup_kern.o
+always += trace_event_kern.o
+always += sampleip_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
@@ -97,6 +108,9 @@
 HOSTLOADLIBES_test_overhead += -lelf -lrt
 HOSTLOADLIBES_xdp1 += -lelf
 HOSTLOADLIBES_xdp2 += -lelf
+HOSTLOADLIBES_test_current_task_under_cgroup += -lelf
+HOSTLOADLIBES_trace_event += -lelf
+HOSTLOADLIBES_sampleip += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 217c8d507..90f44bd 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -37,12 +37,26 @@
 	(void *) BPF_FUNC_clone_redirect;
 static int (*bpf_redirect)(int ifindex, int flags) =
 	(void *) BPF_FUNC_redirect;
-static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) =
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+				    unsigned long long flags, void *data,
+				    int size) =
 	(void *) BPF_FUNC_perf_event_output;
 static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
 	(void *) BPF_FUNC_get_stackid;
 static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
 	(void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+	(void *) BPF_FUNC_current_task_under_cgroup;
+static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
+	(void *) BPF_FUNC_skb_get_tunnel_key;
+static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
+	(void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
+	(void *) BPF_FUNC_skb_get_tunnel_opt;
+static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
+	(void *) BPF_FUNC_skb_set_tunnel_opt;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+	(void *) BPF_FUNC_get_prandom_u32;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -72,8 +86,8 @@
 	(void *) BPF_FUNC_l3_csum_replace;
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
 	(void *) BPF_FUNC_l4_csum_replace;
-static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) =
-	(void *) BPF_FUNC_skb_in_cgroup;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+	(void *) BPF_FUNC_skb_under_cgroup;
 
 #if defined(__x86_64__)
 
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 0cfda232..97913e1 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -51,6 +51,7 @@
 	bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0;
 	bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
 	bool is_xdp = strncmp(event, "xdp", 3) == 0;
+	bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
 	enum bpf_prog_type prog_type;
 	char buf[256];
 	int fd, efd, err, id;
@@ -69,6 +70,8 @@
 		prog_type = BPF_PROG_TYPE_TRACEPOINT;
 	} else if (is_xdp) {
 		prog_type = BPF_PROG_TYPE_XDP;
+	} else if (is_perf_event) {
+		prog_type = BPF_PROG_TYPE_PERF_EVENT;
 	} else {
 		printf("Unknown event '%s'\n", event);
 		return -1;
@@ -82,7 +85,7 @@
 
 	prog_fd[prog_cnt++] = fd;
 
-	if (is_xdp)
+	if (is_xdp || is_perf_event)
 		return 0;
 
 	if (is_socket) {
@@ -326,6 +329,7 @@
 			    memcmp(shname_prog, "kretprobe/", 10) == 0 ||
 			    memcmp(shname_prog, "tracepoint/", 11) == 0 ||
 			    memcmp(shname_prog, "xdp", 3) == 0 ||
+			    memcmp(shname_prog, "perf_event", 10) == 0 ||
 			    memcmp(shname_prog, "socket", 6) == 0)
 				load_and_attach(shname_prog, insns, data_prog->d_size);
 		}
@@ -344,6 +348,7 @@
 		    memcmp(shname, "kretprobe/", 10) == 0 ||
 		    memcmp(shname, "tracepoint/", 11) == 0 ||
 		    memcmp(shname, "xdp", 3) == 0 ||
+		    memcmp(shname, "perf_event", 10) == 0 ||
 		    memcmp(shname, "socket", 6) == 0)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 364582b..ac6edb6 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -85,6 +85,14 @@
 		.off   = 0,					\
 		.imm   = IMM })
 
+#define BPF_MOV32_IMM(DST, IMM)					\
+	((struct bpf_insn) {					\
+		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
 /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
 #define BPF_LD_IMM64(DST, IMM)					\
 	BPF_LD_IMM64_RAW(DST, 0, IMM)
diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
new file mode 100644
index 0000000..774a681
--- /dev/null
+++ b/samples/bpf/sampleip_kern.c
@@ -0,0 +1,38 @@
+/* Copyright 2016 Netflix, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/version.h>
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/bpf_perf_event.h>
+#include "bpf_helpers.h"
+
+#define MAX_IPS		8192
+
+struct bpf_map_def SEC("maps") ip_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(u64),
+	.value_size = sizeof(u32),
+	.max_entries = MAX_IPS,
+};
+
+SEC("perf_event")
+int do_sample(struct bpf_perf_event_data *ctx)
+{
+	u64 ip;
+	u32 *value, init_val = 1;
+
+	ip = ctx->regs.ip;
+	value = bpf_map_lookup_elem(&ip_map, &ip);
+	if (value)
+		*value += 1;
+	else
+		/* E2BIG not tested for this example only */
+		bpf_map_update_elem(&ip_map, &ip, &init_val, BPF_NOEXIST);
+
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
new file mode 100644
index 0000000..260a6bd
--- /dev/null
+++ b/samples/bpf/sampleip_user.c
@@ -0,0 +1,196 @@
+/*
+ * sampleip: sample instruction pointer and frequency count in a BPF map.
+ *
+ * Copyright 2016 Netflix, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+#include <linux/perf_event.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define DEFAULT_FREQ	99
+#define DEFAULT_SECS	5
+#define MAX_IPS		8192
+#define PAGE_OFFSET	0xffff880000000000
+
+static int nr_cpus;
+
+static void usage(void)
+{
+	printf("USAGE: sampleip [-F freq] [duration]\n");
+	printf("       -F freq    # sample frequency (Hertz), default 99\n");
+	printf("       duration   # sampling duration (seconds), default 5\n");
+}
+
+static int sampling_start(int *pmu_fd, int freq)
+{
+	int i;
+
+	struct perf_event_attr pe_sample_attr = {
+		.type = PERF_TYPE_SOFTWARE,
+		.freq = 1,
+		.sample_period = freq,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+		.inherit = 1,
+	};
+
+	for (i = 0; i < nr_cpus; i++) {
+		pmu_fd[i] = perf_event_open(&pe_sample_attr, -1 /* pid */, i,
+					    -1 /* group_fd */, 0 /* flags */);
+		if (pmu_fd[i] < 0) {
+			fprintf(stderr, "ERROR: Initializing perf sampling\n");
+			return 1;
+		}
+		assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF,
+			     prog_fd[0]) == 0);
+		assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+	}
+
+	return 0;
+}
+
+static void sampling_end(int *pmu_fd)
+{
+	int i;
+
+	for (i = 0; i < nr_cpus; i++)
+		close(pmu_fd[i]);
+}
+
+struct ipcount {
+	__u64 ip;
+	__u32 count;
+};
+
+/* used for sorting */
+struct ipcount counts[MAX_IPS];
+
+static int count_cmp(const void *p1, const void *p2)
+{
+	return ((struct ipcount *)p1)->count - ((struct ipcount *)p2)->count;
+}
+
+static void print_ip_map(int fd)
+{
+	struct ksym *sym;
+	__u64 key, next_key;
+	__u32 value;
+	int i, max;
+
+	printf("%-19s %-32s %s\n", "ADDR", "KSYM", "COUNT");
+
+	/* fetch IPs and counts */
+	key = 0, i = 0;
+	while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+		bpf_lookup_elem(fd, &next_key, &value);
+		counts[i].ip = next_key;
+		counts[i++].count = value;
+		key = next_key;
+	}
+	max = i;
+
+	/* sort and print */
+	qsort(counts, max, sizeof(struct ipcount), count_cmp);
+	for (i = 0; i < max; i++) {
+		if (counts[i].ip > PAGE_OFFSET) {
+			sym = ksym_search(counts[i].ip);
+			printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name,
+			       counts[i].count);
+		} else {
+			printf("0x%-17llx %-32s %u\n", counts[i].ip, "(user)",
+			       counts[i].count);
+		}
+	}
+
+	if (max == MAX_IPS) {
+		printf("WARNING: IP hash was full (max %d entries); ", max);
+		printf("may have dropped samples\n");
+	}
+}
+
+static void int_exit(int sig)
+{
+	printf("\n");
+	print_ip_map(map_fd[0]);
+	exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS;
+
+	/* process arguments */
+	while ((opt = getopt(argc, argv, "F:h")) != -1) {
+		switch (opt) {
+		case 'F':
+			freq = atoi(optarg);
+			break;
+		case 'h':
+		default:
+			usage();
+			return 0;
+		}
+	}
+	if (argc - optind == 1)
+		secs = atoi(argv[optind]);
+	if (freq == 0 || secs == 0) {
+		usage();
+		return 1;
+	}
+
+	/* initialize kernel symbol translation */
+	if (load_kallsyms()) {
+		fprintf(stderr, "ERROR: loading /proc/kallsyms\n");
+		return 2;
+	}
+
+	/* create perf FDs for each CPU */
+	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+	pmu_fd = malloc(nr_cpus * sizeof(int));
+	if (pmu_fd == NULL) {
+		fprintf(stderr, "ERROR: malloc of pmu_fd\n");
+		return 1;
+	}
+
+	/* load BPF program */
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	if (load_bpf_file(filename)) {
+		fprintf(stderr, "ERROR: loading BPF program (errno %d):\n",
+			errno);
+		if (strcmp(bpf_log_buf, "") == 0)
+			fprintf(stderr, "Try: ulimit -l unlimited\n");
+		else
+			fprintf(stderr, "%s", bpf_log_buf);
+		return 1;
+	}
+	signal(SIGINT, int_exit);
+
+	/* do sampling */
+	printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n",
+	       freq, secs);
+	if (sampling_start(pmu_fd, freq) != 0)
+		return 1;
+	sleep(secs);
+	sampling_end(pmu_fd);
+	free(pmu_fd);
+
+	/* output sample counts */
+	print_ip_map(map_fd[0]);
+
+	return 0;
+}
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index ba0e177..44e5846 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -14,7 +14,7 @@
 	__be16 h_vlan_encapsulated_proto;
 };
 
-struct flow_keys {
+struct bpf_flow_keys {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -59,7 +59,7 @@
 }
 
 static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
-			     struct flow_keys *flow)
+			     struct bpf_flow_keys *flow)
 {
 	__u64 verlen;
 
@@ -83,7 +83,7 @@
 }
 
 static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto,
-			       struct flow_keys *flow)
+			       struct bpf_flow_keys *flow)
 {
 	*ip_proto = load_byte(skb,
 			      nhoff + offsetof(struct ipv6hdr, nexthdr));
@@ -96,7 +96,7 @@
 	return nhoff;
 }
 
-static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow)
+static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow)
 {
 	__u64 nhoff = ETH_HLEN;
 	__u64 ip_proto;
@@ -198,7 +198,7 @@
 SEC("socket2")
 int bpf_prog2(struct __sk_buff *skb)
 {
-	struct flow_keys flow;
+	struct bpf_flow_keys flow;
 	struct pair *value;
 	u32 key;
 
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 41ae2fd..95907f8 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -61,7 +61,7 @@
 	__be16 h_vlan_encapsulated_proto;
 };
 
-struct flow_keys {
+struct bpf_flow_keys {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -88,7 +88,7 @@
 }
 
 struct globals {
-	struct flow_keys flow;
+	struct bpf_flow_keys flow;
 };
 
 struct bpf_map_def SEC("maps") percpu_map = {
@@ -114,14 +114,14 @@
 
 struct bpf_map_def SEC("maps") hash_map = {
 	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct flow_keys),
+	.key_size = sizeof(struct bpf_flow_keys),
 	.value_size = sizeof(struct pair),
 	.max_entries = 1024,
 };
 
 static void update_stats(struct __sk_buff *skb, struct globals *g)
 {
-	struct flow_keys key = g->flow;
+	struct bpf_flow_keys key = g->flow;
 	struct pair *value;
 
 	value = bpf_map_lookup_elem(&hash_map, &key);
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index d4184ab..3fcfd8c 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -7,7 +7,7 @@
 #include <arpa/inet.h>
 #include <sys/resource.h>
 
-struct flow_keys {
+struct bpf_flow_keys {
 	__be32 src;
 	__be32 dst;
 	union {
@@ -49,7 +49,7 @@
 	(void) f;
 
 	for (i = 0; i < 5; i++) {
-		struct flow_keys key = {}, next_key;
+		struct bpf_flow_keys key = {}, next_key;
 		struct pair value;
 
 		sleep(1);
diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c
new file mode 100644
index 0000000..3303bb8
--- /dev/null
+++ b/samples/bpf/tcbpf2_kern.c
@@ -0,0 +1,381 @@
+/* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+#include <uapi/linux/pkt_cls.h>
+#include <net/ipv6.h>
+#include "bpf_helpers.h"
+
+#define _htonl __builtin_bswap32
+#define ERROR(ret) do {\
+		char fmt[] = "ERROR line:%d ret:%d\n";\
+		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+	} while(0)
+
+struct geneve_opt {
+	__be16	opt_class;
+	u8	type;
+	u8	length:5;
+	u8	r3:1;
+	u8	r2:1;
+	u8	r1:1;
+	u8	opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+	u32     gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "key %d remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+	char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.gbp);
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+	int ret, ret2;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 1;
+	gopt.r2 = 0;
+	gopt.r3 = 1;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = 0xdeadbeef;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	return TC_ACT_OK;
+}
+
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.tunnel_ttl = 64;
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	} else {
+		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		if (tcp->dest == htons(5200))
+			key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+		else if (tcp->dest == htons(5201))
+			key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+		else
+			return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.remote_ipv6[0] = _htonl(0x2401db00);
+	key.tunnel_ttl = 64;
+
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv6[3] = _htonl(1);
+	} else {
+		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) {
+			ERROR(iph->protocol);
+			return TC_ACT_SHOT;
+		}
+
+		if (tcp->dest == htons(5200)) {
+			key.remote_ipv6[3] = _htonl(1);
+		} else if (tcp->dest == htons(5201)) {
+			key.remote_ipv6[3] = _htonl(2);
+		} else {
+			ERROR(tcp->dest);
+			return TC_ACT_SHOT;
+		}
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
+			 _htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct ipv6hdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.remote_ipv6[0] = _htonl(0x2401db00);
+	key.tunnel_ttl = 64;
+
+	if (iph->nexthdr == NEXTHDR_ICMP) {
+		key.remote_ipv6[3] = _htonl(1);
+	} else {
+		if (iph->nexthdr != NEXTHDR_TCP) {
+			ERROR(iph->nexthdr);
+			return TC_ACT_SHOT;
+		}
+
+		if (tcp->dest == htons(5200)) {
+			key.remote_ipv6[3] = _htonl(1);
+		} else if (tcp->dest == htons(5201)) {
+			key.remote_ipv6[3] = _htonl(2);
+		} else {
+			ERROR(tcp->dest);
+			return TC_ACT_SHOT;
+		}
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]),
+			 _htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c
index 2732c37..10ff734 100644
--- a/samples/bpf/test_cgrp2_tc_kern.c
+++ b/samples/bpf/test_cgrp2_tc_kern.c
@@ -57,7 +57,7 @@
 		bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
 				 eth->h_proto, ip6h->nexthdr);
 		return TC_ACT_OK;
-	} else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
+	} else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) {
 		bpf_trace_printk(pass_msg, sizeof(pass_msg));
 		return TC_ACT_OK;
 	} else {
diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c
new file mode 100644
index 0000000..86b28d7
--- /dev/null
+++ b/samples/bpf/test_current_task_under_cgroup_kern.c
@@ -0,0 +1,43 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/ptrace.h>
+#include <uapi/linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+#include <uapi/linux/utsname.h>
+
+struct bpf_map_def SEC("maps") cgroup_map = {
+	.type			= BPF_MAP_TYPE_CGROUP_ARRAY,
+	.key_size		= sizeof(u32),
+	.value_size		= sizeof(u32),
+	.max_entries	= 1,
+};
+
+struct bpf_map_def SEC("maps") perf_map = {
+	.type			= BPF_MAP_TYPE_ARRAY,
+	.key_size		= sizeof(u32),
+	.value_size		= sizeof(u64),
+	.max_entries	= 1,
+};
+
+/* Writes the last PID that called sync to a map at index 0 */
+SEC("kprobe/sys_sync")
+int bpf_prog1(struct pt_regs *ctx)
+{
+	u64 pid = bpf_get_current_pid_tgid();
+	int idx = 0;
+
+	if (!bpf_current_task_under_cgroup(&cgroup_map, 0))
+		return 0;
+
+	bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c
new file mode 100644
index 0000000..30b0bce
--- /dev/null
+++ b/samples/bpf/test_current_task_under_cgroup_user.c
@@ -0,0 +1,145 @@
+/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+
+#define CGROUP_MOUNT_PATH	"/mnt"
+#define CGROUP_PATH		"/mnt/my-cgroup"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+static int join_cgroup(char *path)
+{
+	int fd, rc = 0;
+	pid_t pid = getpid();
+	char cgroup_path[PATH_MAX + 1];
+
+	snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path);
+
+	fd = open(cgroup_path, O_WRONLY);
+	if (fd < 0) {
+		log_err("Opening Cgroup");
+		return 1;
+	}
+
+	if (dprintf(fd, "%d\n", pid) < 0) {
+		log_err("Joining Cgroup");
+		rc = 1;
+	}
+	close(fd);
+	return rc;
+}
+
+int main(int argc, char **argv)
+{
+	char filename[256];
+	int cg2, idx = 0;
+	pid_t remote_pid, local_pid = getpid();
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	/*
+	 * This is to avoid interfering with existing cgroups. Unfortunately,
+	 * most people don't have cgroupv2 enabled at this point in time.
+	 * It's easier to create our own mount namespace and manage it
+	 * ourselves.
+	 */
+	if (unshare(CLONE_NEWNS)) {
+		log_err("unshare");
+		return 1;
+	}
+
+	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+		log_err("mount fakeroot");
+		return 1;
+	}
+
+	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
+		log_err("mount cgroup2");
+		return 1;
+	}
+
+	if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) {
+		log_err("mkdir cgroup");
+		return 1;
+	}
+
+	cg2 = open(CGROUP_PATH, O_RDONLY);
+	if (cg2 < 0) {
+		log_err("opening target cgroup");
+		goto cleanup_cgroup_err;
+	}
+
+	if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
+		log_err("Adding target cgroup to map");
+		goto cleanup_cgroup_err;
+	}
+	if (join_cgroup("/mnt/my-cgroup")) {
+		log_err("Leaving target cgroup");
+		goto cleanup_cgroup_err;
+	}
+
+	/*
+	 * The installed helper program catched the sync call, and should
+	 * write it to the map.
+	 */
+
+	sync();
+	bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+	if (local_pid != remote_pid) {
+		fprintf(stderr,
+			"BPF Helper didn't write correct PID to map, but: %d\n",
+			remote_pid);
+		goto leave_cgroup_err;
+	}
+
+	/* Verify the negative scenario; leave the cgroup */
+	if (join_cgroup(CGROUP_MOUNT_PATH))
+		goto leave_cgroup_err;
+
+	remote_pid = 0;
+	bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);
+
+	sync();
+	bpf_lookup_elem(map_fd[1], &idx, &remote_pid);
+
+	if (local_pid == remote_pid) {
+		fprintf(stderr, "BPF cgroup negative test did not work\n");
+		goto cleanup_cgroup_err;
+	}
+
+	rmdir(CGROUP_PATH);
+	return 0;
+
+	/* Error condition, cleanup */
+leave_cgroup_err:
+	join_cgroup(CGROUP_MOUNT_PATH);
+cleanup_cgroup_err:
+	rmdir(CGROUP_PATH);
+	return 1;
+}
diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh
new file mode 100755
index 0000000..1969254
--- /dev/null
+++ b/samples/bpf/test_ipip.sh
@@ -0,0 +1,178 @@
+#!/bin/bash
+
+function config_device {
+	ip netns add at_ns0
+	ip netns add at_ns1
+	ip netns add at_ns2
+	ip link add veth0 type veth peer name veth0b
+	ip link add veth1 type veth peer name veth1b
+	ip link add veth2 type veth peer name veth2b
+	ip link set veth0b up
+	ip link set veth1b up
+	ip link set veth2b up
+	ip link set dev veth0b mtu 1500
+	ip link set dev veth1b mtu 1500
+	ip link set dev veth2b mtu 1500
+	ip link set veth0 netns at_ns0
+	ip link set veth1 netns at_ns1
+	ip link set veth2 netns at_ns2
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1
+	ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad
+	ip netns exec at_ns1 ip link set dev veth1 up
+	ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2
+	ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad
+	ip netns exec at_ns2 ip link set dev veth2 up
+	ip link add br0 type bridge
+	ip link set br0 up
+	ip link set dev br0 mtu 1500
+	ip link set veth0b master br0
+	ip link set veth1b master br0
+	ip link set veth2b master br0
+}
+
+function add_ipip_tunnel {
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns1 \
+		ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200
+	ip netns exec at_ns1 ip link set dev $DEV_NS up
+	# same inner IP address in at_ns0 and at_ns1
+	ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	ip netns exec at_ns2 ip link add dev $DEV type ipip external
+	ip netns exec at_ns2 ip link set dev $DEV up
+	ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24
+}
+
+function add_ipip6_tunnel {
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns1 \
+		ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64
+	ip netns exec at_ns1 ip link set dev $DEV_NS up
+	# same inner IP address in at_ns0 and at_ns1
+	ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external
+	ip netns exec at_ns2 ip link set dev $DEV up
+	ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24
+}
+
+function add_ip6ip6_tunnel {
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64
+	ip netns exec at_ns1 \
+		ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64
+	ip netns exec at_ns1 ip link set dev $DEV_NS up
+	# same inner IP address in at_ns0 and at_ns1
+	ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64
+
+	ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external
+	ip netns exec at_ns2 ip link set dev $DEV up
+	ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64
+}
+
+function attach_bpf {
+	DEV=$1
+	SET_TUNNEL=$2
+	GET_TUNNEL=$3
+	ip netns exec at_ns2 tc qdisc add dev $DEV clsact
+	ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
+	ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
+}
+
+function test_ipip {
+	DEV_NS=ipip_std
+	DEV=ipip_bpf
+	config_device
+#	tcpdump -nei br0 &
+	cat /sys/kernel/debug/tracing/trace_pipe &
+
+	add_ipip_tunnel
+	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	ip netns exec at_ns2 ping -c 1 10.1.1.100
+	ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
+	ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null
+	sleep 0.2
+	# tcp check _same_ IP over different tunnels
+	ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200
+	ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201
+	cleanup
+}
+
+# IPv4 over IPv6 tunnel
+function test_ipip6 {
+	DEV_NS=ipip_std
+	DEV=ipip_bpf
+	config_device
+#	tcpdump -nei br0 &
+	cat /sys/kernel/debug/tracing/trace_pipe &
+
+	add_ipip6_tunnel
+	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	ip netns exec at_ns2 ping -c 1 10.1.1.100
+	ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
+	ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null
+	sleep 0.2
+	# tcp check _same_ IP over different tunnels
+	ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200
+	ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201
+	cleanup
+}
+
+# IPv6 over IPv6 tunnel
+function test_ip6ip6 {
+	DEV_NS=ipip_std
+	DEV=ipip_bpf
+	config_device
+#	tcpdump -nei br0 &
+	cat /sys/kernel/debug/tracing/trace_pipe &
+
+	add_ip6ip6_tunnel
+	attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
+
+	ip netns exec at_ns0 ping -6 -c 1 2601:646::2
+	ip netns exec at_ns2 ping -6 -c 1 2601:646::1
+	ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null
+	ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null
+	sleep 0.2
+	# tcp check _same_ IP over different tunnels
+	ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200
+	ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201
+	cleanup
+}
+
+function cleanup {
+	set +ex
+	pkill iperf
+	ip netns delete at_ns0
+	ip netns delete at_ns1
+	ip netns delete at_ns2
+	ip link del veth0
+	ip link del veth1
+	ip link del veth2
+	ip link del br0
+	pkill tcpdump
+	pkill cat
+	set -ex
+}
+
+cleanup
+echo "Testing IP tunnels..."
+test_ipip
+test_ipip6
+test_ip6ip6
+echo "*** PASS ***"
diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c
index 47bf085..cce2b59 100644
--- a/samples/bpf/test_maps.c
+++ b/samples/bpf/test_maps.c
@@ -68,7 +68,16 @@
 	assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 &&
 	       errno == E2BIG);
 
+	/* update existing element, thought the map is full */
+	key = 1;
+	assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+	key = 2;
+	assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+	key = 1;
+	assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0);
+
 	/* check that key = 0 doesn't exist */
+	key = 0;
 	assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT);
 
 	/* iterate over two elements */
@@ -413,10 +422,12 @@
 
 	for (i = fn; i < MAP_SIZE; i += TASKS) {
 		key = value = i;
-		if (do_update)
+		if (do_update) {
 			assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0);
-		else
+			assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0);
+		} else {
 			assert(bpf_delete_elem(map_fd, &key) == 0);
+		}
 	}
 }
 
diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh
new file mode 100755
index 0000000..1ff634f
--- /dev/null
+++ b/samples/bpf/test_tunnel_bpf.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# In Namespace 0 (at_ns0) using native tunnel
+# Overlay IP: 10.1.1.100
+# local 192.16.1.100 remote 192.16.1.200
+# veth0 IP: 172.16.1.100, tunnel dev <type>00
+
+# Out of Namespace using BPF set/get on lwtunnel
+# Overlay IP: 10.1.1.200
+# local 172.16.1.200 remote 172.16.1.100
+# veth1 IP: 172.16.1.200, tunnel dev <type>11
+
+function config_device {
+	ip netns add at_ns0
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 netns at_ns0
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip link set dev veth1 up mtu 1500
+	ip addr add dev veth1 172.16.1.200/24
+}
+
+function add_gre_tunnel {
+	# in namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# out of namespace
+	ip link add dev $DEV type $TYPE key 2 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+function add_vxlan_tunnel {
+	# Set static ARP entry here because iptables set-mark works
+	# on L3 packet, as a result not applying to ARP packets,
+	# causing errors at get_tunnel_{key/opt}.
+
+	# in namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+	ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+	# out of namespace
+	ip link add dev $DEV type $TYPE external gbp dstport 4789
+	ip link set dev $DEV address 52:54:00:d9:02:00 up
+	ip addr add dev $DEV 10.1.1.200/24
+	arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+function add_geneve_tunnel {
+	# in namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# out of namespace
+	ip link add dev $DEV type $TYPE dstport 6081 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+function add_ipip_tunnel {
+	# in namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# out of namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+function attach_bpf {
+	DEV=$1
+	SET_TUNNEL=$2
+	GET_TUNNEL=$3
+	tc qdisc add dev $DEV clsact
+	tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL
+	tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL
+}
+
+function test_gre {
+	TYPE=gretap
+	DEV_NS=gretap00
+	DEV=gretap11
+	config_device
+	add_gre_tunnel
+	attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+	ping -c 1 10.1.1.100
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	cleanup
+}
+
+function test_vxlan {
+	TYPE=vxlan
+	DEV_NS=vxlan00
+	DEV=vxlan11
+	config_device
+	add_vxlan_tunnel
+	attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+	ping -c 1 10.1.1.100
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	cleanup
+}
+
+function test_geneve {
+	TYPE=geneve
+	DEV_NS=geneve00
+	DEV=geneve11
+	config_device
+	add_geneve_tunnel
+	attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+	ping -c 1 10.1.1.100
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	cleanup
+}
+
+function test_ipip {
+	TYPE=ipip
+	DEV_NS=ipip00
+	DEV=ipip11
+	config_device
+	tcpdump -nei veth1 &
+	cat /sys/kernel/debug/tracing/trace_pipe &
+	add_ipip_tunnel
+	ethtool -K veth1 gso off gro off rx off tx off
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+	ping -c 1 10.1.1.100
+	ip netns exec at_ns0 ping -c 1 10.1.1.200
+	ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null
+	sleep 0.2
+	iperf -c 10.1.1.100 -n 5k -p 5200
+	cleanup
+}
+
+function cleanup {
+	set +ex
+	pkill iperf
+	ip netns delete at_ns0
+	ip link del veth1
+	ip link del ipip11
+	ip link del gretap11
+	ip link del geneve11
+	pkill tcpdump
+	pkill cat
+	set -ex
+}
+
+cleanup
+echo "Testing GRE tunnel..."
+test_gre
+echo "Testing VXLAN tunnel..."
+test_vxlan
+echo "Testing GENEVE tunnel..."
+test_geneve
+echo "Testing IPIP tunnel..."
+test_ipip
+echo "*** PASS ***"
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index fe2fcec..369ffaa 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -29,6 +29,7 @@
 	struct bpf_insn	insns[MAX_INSNS];
 	int fixup[MAX_FIXUPS];
 	int prog_array_fixup[MAX_FIXUPS];
+	int test_val_map_fixup[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	enum {
@@ -39,6 +40,19 @@
 	enum bpf_prog_type prog_type;
 };
 
+/* Note we want this to be 64 bit aligned so that the end of our array is
+ * actually the end of the structure.
+ */
+#define MAX_ENTRIES 11
+struct test_val {
+	unsigned index;
+	int foo[MAX_ENTRIES];
+};
+
+struct other_val {
+	unsigned int action[32];
+};
+
 static struct bpf_test tests[] = {
 	{
 		"add+sub+mul",
@@ -291,6 +305,29 @@
 		.result = REJECT,
 	},
 	{
+		"invalid argument register",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"non-invalid argument register",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"check valid spill/fill",
 		.insns = {
 			/* spill R1(ctx) into stack */
@@ -1210,6 +1247,54 @@
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
+		"raw_stack: skb_load_bytes, negative len",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack type R3",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, negative len 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, ~0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack type R3",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, zero len",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack type R3",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"raw_stack: skb_load_bytes, no init",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_2, 4),
@@ -1449,7 +1534,7 @@
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"pkt: test1",
+		"direct packet access: test1",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -1466,7 +1551,7 @@
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"pkt: test2",
+		"direct packet access: test2",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_0, 1),
 			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
@@ -1499,7 +1584,7 @@
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"pkt: test3",
+		"direct packet access: test3",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -1511,7 +1596,7 @@
 		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
 	},
 	{
-		"pkt: test4",
+		"direct packet access: test4 (write)",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -1524,10 +1609,780 @@
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "cannot write",
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test5 (pkt_end >= reg, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test6 (pkt_end >= reg, bad access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid access to packet",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"direct packet access: test7 (pkt_end >= reg, both accesses)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test8 (double test, variant 1)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test9 (double test, variant 2)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test10 (write invalid)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test1, valid packet_ptr range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {5},
+		.result_unpriv = ACCEPT,
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test2, unchecked packet_ptr",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {1},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test3, variable add",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+					offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+					offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+			BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {11},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test4, packet_ptr with bad range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {7},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test5, packet_ptr with too short range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {6},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test6, cls valid packet_ptr range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {5},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test7, cls unchecked packet_ptr",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {1},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test8, cls variable add",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+					offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+					offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+			BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {11},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test9, cls packet_ptr with bad range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {7},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test10, cls packet_ptr with too short range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup = {6},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test11, cls unsuitable helper 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 42),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "helper access to the packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test12, cls unsuitable helper 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "helper access to the packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test13, cls helper ok",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test14, cls helper fail sub",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "type=inv expected=fp",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test15, cls helper fail range 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test16, cls helper fail range 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, -9),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test17, cls helper fail range 3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, ~0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test18, cls helper fail range zero",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test19, pkt end as input",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R1 type=pkt_end expected=fp",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test20, wrong reg",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"valid map access into an array with a constant",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"valid map access into an array with a register",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"valid map access into an array with a variable",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"valid map access into an array with a signed variable",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"invalid map access into an array with a constant",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr = "invalid access to map value, value_size=48 off=48 size=8",
+		.result = REJECT,
+	},
+	{
+		"invalid map access into an array with a register",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr = "R0 min value is outside of the array range",
+		.result = REJECT,
+	},
+	{
+		"invalid map access into an array with a variable",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.result = REJECT,
+	},
+	{
+		"invalid map access into an array with no floor check",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.result = REJECT,
+	},
+	{
+		"invalid map access into an array with a invalid max check",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3},
+		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
+		.result = REJECT,
+	},
+	{
+		"invalid map access into an array with a invalid max check",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.test_val_map_fixup = {3, 11},
+		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.result = REJECT,
+	},
 };
 
 static int probe_filter_length(struct bpf_insn *fp)
@@ -1541,12 +2396,12 @@
 	return len + 1;
 }
 
-static int create_map(void)
+static int create_map(size_t val_size, int num)
 {
 	int map_fd;
 
 	map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
-				sizeof(long long), sizeof(long long), 1024, 0);
+				sizeof(long long), val_size, num, 0);
 	if (map_fd < 0)
 		printf("failed to create map '%s'\n", strerror(errno));
 
@@ -1576,12 +2431,13 @@
 		int prog_len = probe_filter_length(prog);
 		int *fixup = tests[i].fixup;
 		int *prog_array_fixup = tests[i].prog_array_fixup;
+		int *test_val_map_fixup = tests[i].test_val_map_fixup;
 		int expected_result;
 		const char *expected_errstr;
-		int map_fd = -1, prog_array_fd = -1;
+		int map_fd = -1, prog_array_fd = -1, test_val_map_fd = -1;
 
 		if (*fixup) {
-			map_fd = create_map();
+			map_fd = create_map(sizeof(long long), 1024);
 
 			do {
 				prog[*fixup].imm = map_fd;
@@ -1596,6 +2452,18 @@
 				prog_array_fixup++;
 			} while (*prog_array_fixup);
 		}
+		if (*test_val_map_fixup) {
+			/* Unprivileged can't create a hash map.*/
+			if (unpriv)
+				continue;
+			test_val_map_fd = create_map(sizeof(struct test_val),
+						     256);
+			do {
+				prog[*test_val_map_fixup].imm = test_val_map_fd;
+				test_val_map_fixup++;
+			} while (*test_val_map_fixup);
+		}
+
 		printf("#%d %s ", i, tests[i].descr);
 
 		prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
@@ -1642,6 +2510,8 @@
 			close(map_fd);
 		if (prog_array_fd >= 0)
 			close(prog_array_fd);
+		if (test_val_map_fd >= 0)
+			close(test_val_map_fd);
 		close(prog_fd);
 
 	}
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
new file mode 100644
index 0000000..71a8ed3
--- /dev/null
+++ b/samples/bpf/trace_event_kern.c
@@ -0,0 +1,65 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/ptrace.h>
+#include <linux/version.h>
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/bpf_perf_event.h>
+#include <uapi/linux/perf_event.h>
+#include "bpf_helpers.h"
+
+struct key_t {
+	char comm[TASK_COMM_LEN];
+	u32 kernstack;
+	u32 userstack;
+};
+
+struct bpf_map_def SEC("maps") counts = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct key_t),
+	.value_size = sizeof(u64),
+	.max_entries = 10000,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(u32),
+	.value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
+	.max_entries = 10000,
+};
+
+#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
+#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)
+
+SEC("perf_event")
+int bpf_prog1(struct bpf_perf_event_data *ctx)
+{
+	char fmt[] = "CPU-%d period %lld ip %llx";
+	u32 cpu = bpf_get_smp_processor_id();
+	struct key_t key;
+	u64 *val, one = 1;
+
+	if (ctx->sample_period < 10000)
+		/* ignore warmup */
+		return 0;
+	bpf_get_current_comm(&key.comm, sizeof(key.comm));
+	key.kernstack = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS);
+	key.userstack = bpf_get_stackid(ctx, &stackmap, USER_STACKID_FLAGS);
+	if ((int)key.kernstack < 0 && (int)key.userstack < 0) {
+		bpf_trace_printk(fmt, sizeof(fmt), cpu, ctx->sample_period,
+				 ctx->regs.ip);
+		return 0;
+	}
+
+	val = bpf_map_lookup_elem(&counts, &key);
+	if (val)
+		(*val)++;
+	else
+		bpf_map_update_elem(&counts, &key, &one, BPF_NOEXIST);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
new file mode 100644
index 0000000..9a130d31
--- /dev/null
+++ b/samples/bpf/trace_event_user.c
@@ -0,0 +1,213 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#include <linux/bpf.h>
+#include <signal.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/resource.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define SAMPLE_FREQ 50
+
+static bool sys_read_seen, sys_write_seen;
+
+static void print_ksym(__u64 addr)
+{
+	struct ksym *sym;
+
+	if (!addr)
+		return;
+	sym = ksym_search(addr);
+	printf("%s;", sym->name);
+	if (!strcmp(sym->name, "sys_read"))
+		sys_read_seen = true;
+	else if (!strcmp(sym->name, "sys_write"))
+		sys_write_seen = true;
+}
+
+static void print_addr(__u64 addr)
+{
+	if (!addr)
+		return;
+	printf("%llx;", addr);
+}
+
+#define TASK_COMM_LEN 16
+
+struct key_t {
+	char comm[TASK_COMM_LEN];
+	__u32 kernstack;
+	__u32 userstack;
+};
+
+static void print_stack(struct key_t *key, __u64 count)
+{
+	__u64 ip[PERF_MAX_STACK_DEPTH] = {};
+	static bool warned;
+	int i;
+
+	printf("%3lld %s;", count, key->comm);
+	if (bpf_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) {
+		printf("---;");
+	} else {
+		for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
+			print_ksym(ip[i]);
+	}
+	printf("-;");
+	if (bpf_lookup_elem(map_fd[1], &key->userstack, ip) != 0) {
+		printf("---;");
+	} else {
+		for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--)
+			print_addr(ip[i]);
+	}
+	printf("\n");
+
+	if (key->kernstack == -EEXIST && !warned) {
+		printf("stackmap collisions seen. Consider increasing size\n");
+		warned = true;
+	} else if ((int)key->kernstack < 0 && (int)key->userstack < 0) {
+		printf("err stackid %d %d\n", key->kernstack, key->userstack);
+	}
+}
+
+static void int_exit(int sig)
+{
+	kill(0, SIGKILL);
+	exit(0);
+}
+
+static void print_stacks(void)
+{
+	struct key_t key = {}, next_key;
+	__u64 value;
+	__u32 stackid = 0, next_id;
+	int fd = map_fd[0], stack_map = map_fd[1];
+
+	sys_read_seen = sys_write_seen = false;
+	while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+		bpf_lookup_elem(fd, &next_key, &value);
+		print_stack(&next_key, value);
+		bpf_delete_elem(fd, &next_key);
+		key = next_key;
+	}
+
+	if (!sys_read_seen || !sys_write_seen) {
+		printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n");
+		int_exit(0);
+	}
+
+	/* clear stack map */
+	while (bpf_get_next_key(stack_map, &stackid, &next_id) == 0) {
+		bpf_delete_elem(stack_map, &next_id);
+		stackid = next_id;
+	}
+}
+
+static void test_perf_event_all_cpu(struct perf_event_attr *attr)
+{
+	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+	int *pmu_fd = malloc(nr_cpus * sizeof(int));
+	int i;
+
+	/* open perf_event on all cpus */
+	for (i = 0; i < nr_cpus; i++) {
+		pmu_fd[i] = perf_event_open(attr, -1, i, -1, 0);
+		if (pmu_fd[i] < 0) {
+			printf("perf_event_open failed\n");
+			goto all_cpu_err;
+		}
+		assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
+		assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0);
+	}
+	system("dd if=/dev/zero of=/dev/null count=5000k");
+	print_stacks();
+all_cpu_err:
+	for (i--; i >= 0; i--)
+		close(pmu_fd[i]);
+	free(pmu_fd);
+}
+
+static void test_perf_event_task(struct perf_event_attr *attr)
+{
+	int pmu_fd;
+
+	/* open task bound event */
+	pmu_fd = perf_event_open(attr, 0, -1, -1, 0);
+	if (pmu_fd < 0) {
+		printf("perf_event_open failed\n");
+		return;
+	}
+	assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0);
+	assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0);
+	system("dd if=/dev/zero of=/dev/null count=5000k");
+	print_stacks();
+	close(pmu_fd);
+}
+
+static void test_bpf_perf_event(void)
+{
+	struct perf_event_attr attr_type_hw = {
+		.sample_freq = SAMPLE_FREQ,
+		.freq = 1,
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+		.inherit = 1,
+	};
+	struct perf_event_attr attr_type_sw = {
+		.sample_freq = SAMPLE_FREQ,
+		.freq = 1,
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+		.inherit = 1,
+	};
+
+	test_perf_event_all_cpu(&attr_type_hw);
+	test_perf_event_task(&attr_type_hw);
+	test_perf_event_all_cpu(&attr_type_sw);
+	test_perf_event_task(&attr_type_sw);
+}
+
+
+int main(int argc, char **argv)
+{
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	char filename[256];
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	setrlimit(RLIMIT_MEMLOCK, &r);
+
+	signal(SIGINT, int_exit);
+
+	if (load_kallsyms()) {
+		printf("failed to process /proc/kallsyms\n");
+		return 1;
+	}
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 2;
+	}
+
+	if (fork() == 0) {
+		read_trace_pipe();
+		return 0;
+	}
+	test_bpf_perf_event();
+
+	int_exit(0);
+	return 0;
+}
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index f95f232..fd12d71 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -19,20 +19,18 @@
 	.max_entries = 1024,
 };
 
-SEC("kprobe/seccomp_phase1")
+SEC("kprobe/__seccomp_filter")
 int bpf_prog1(struct pt_regs *ctx)
 {
-	struct seccomp_data sd;
-
-	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+	int sc_nr = (int)PT_REGS_PARM1(ctx);
 
 	/* dispatch into next BPF program depending on syscall number */
-	bpf_tail_call(ctx, &progs, sd.nr);
+	bpf_tail_call(ctx, &progs, sc_nr);
 
 	/* fall through -> unknown syscall */
-	if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) {
+	if (sc_nr >= __NR_getuid && sc_nr <= __NR_getsid) {
 		char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n";
-		bpf_trace_printk(fmt, sizeof(fmt), sd.nr);
+		bpf_trace_printk(fmt, sizeof(fmt), sc_nr);
 	}
 	return 0;
 }
@@ -42,7 +40,7 @@
 {
 	struct seccomp_data sd;
 
-	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
 	if (sd.args[2] == 512) {
 		char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
 		bpf_trace_printk(fmt, sizeof(fmt),
@@ -55,7 +53,7 @@
 {
 	struct seccomp_data sd;
 
-	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
+	bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx));
 	if (sd.args[2] > 128 && sd.args[2] <= 1024) {
 		char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
 		bpf_trace_printk(fmt, sizeof(fmt),
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index a04dd3c..36b5925b 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -6,6 +6,7 @@
 #include <sys/prctl.h>
 #include "libbpf.h"
 #include "bpf_load.h"
+#include <sys/resource.h>
 
 /* install fake seccomp program to enable seccomp code path inside the kernel,
  * so that our kprobe attached to seccomp_phase1() can be triggered
@@ -27,8 +28,10 @@
 {
 	FILE *f;
 	char filename[256];
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	if (load_bpf_file(filename)) {
 		printf("%s", bpf_log_buf);
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 15b196f..1792198 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -108,16 +108,20 @@
 as-instr = $(call try-run,\
 	printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
 
+# Do not attempt to build with gcc plugins during cc-option tests.
+# (And this uses delayed resolution so the flags will be up to date.)
+CC_OPTION_CFLAGS = $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS))
+
 # cc-option
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
 
 cc-option = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
 cc-option-yn = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
+	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
 
 # cc-option-align
 # Prefix align with either -falign or -malign
@@ -127,7 +131,7 @@
 # cc-disable-warning
 # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
 cc-disable-warning = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+	$(CC) $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
 # cc-name
 # Expands to either gcc or clang
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 5e22b60..61f0e6d 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -19,25 +19,42 @@
     endif
   endif
 
-  GCC_PLUGINS_CFLAGS := $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y))
+  GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
 
-  export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN SANCOV_PLUGIN
+  export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR SANCOV_PLUGIN
 
-  ifeq ($(PLUGINCC),)
-    ifneq ($(GCC_PLUGINS_CFLAGS),)
-      ifeq ($(call cc-ifversion, -ge, 0405, y), y)
-        PLUGINCC := $(shell $(CONFIG_SHELL) -x $(srctree)/scripts/gcc-plugin.sh "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)")
-        $(warning warning: your gcc installation does not support plugins, perhaps the necessary headers are missing?)
-      else
-        $(warning warning: your gcc version does not support plugins, you should upgrade it to gcc 4.5 at least)
-      endif
-    endif
-  else
+  ifneq ($(PLUGINCC),)
     # SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication.
     GCC_PLUGINS_CFLAGS := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGINS_CFLAGS))
   endif
 
   KBUILD_CFLAGS += $(GCC_PLUGINS_CFLAGS)
   GCC_PLUGIN := $(gcc-plugin-y)
-
+  GCC_PLUGIN_SUBDIR := $(gcc-plugin-subdir-y)
 endif
+
+# If plugins aren't supported, abort the build before hard-to-read compiler
+# errors start getting spewed by the main build.
+PHONY += gcc-plugins-check
+gcc-plugins-check: FORCE
+ifdef CONFIG_GCC_PLUGINS
+  ifeq ($(PLUGINCC),)
+    ifneq ($(GCC_PLUGINS_CFLAGS),)
+      ifeq ($(call cc-ifversion, -ge, 0405, y), y)
+	$(Q)$(srctree)/scripts/gcc-plugin.sh --show-error "$(__PLUGINCC)" "$(HOSTCXX)" "$(CC)" || true
+	@echo "Cannot use CONFIG_GCC_PLUGINS: your gcc installation does not support plugins, perhaps the necessary headers are missing?" >&2 && exit 1
+      else
+	@echo "Cannot use CONFIG_GCC_PLUGINS: your gcc version does not support plugins, you should upgrade it to at least gcc 4.5" >&2 && exit 1
+      endif
+    endif
+  endif
+endif
+	@:
+
+# Actually do the build, if requested.
+PHONY += gcc-plugins
+gcc-plugins: scripts_basic gcc-plugins-check
+ifdef CONFIG_GCC_PLUGINS
+	$(Q)$(MAKE) $(build)=scripts/gcc-plugins
+endif
+	@:
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 8ab6867..dd779c4 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -3,7 +3,6 @@
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=vla-bound)
-      CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size)
@@ -14,4 +13,8 @@
 ifdef CONFIG_UBSAN_ALIGNMENT
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
 endif
+
+ifdef CONFIG_UBSAN_NULL
+      CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
+endif
 endif
diff --git a/scripts/checkkconfigsymbols.py b/scripts/checkkconfigsymbols.py
index df643f6..a32e4da 100755
--- a/scripts/checkkconfigsymbols.py
+++ b/scripts/checkkconfigsymbols.py
@@ -1,98 +1,99 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 
 """Find Kconfig symbols that are referenced but not defined."""
 
-# (c) 2014-2015 Valentin Rothberg <valentinrothberg@gmail.com>
+# (c) 2014-2016 Valentin Rothberg <valentinrothberg@gmail.com>
 # (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de>
 #
 # Licensed under the terms of the GNU GPL License version 2
 
 
+import argparse
 import difflib
 import os
 import re
 import signal
+import subprocess
 import sys
 from multiprocessing import Pool, cpu_count
-from optparse import OptionParser
-from subprocess import Popen, PIPE, STDOUT
 
 
 # regex expressions
 OPERATORS = r"&|\(|\)|\||\!"
-FEATURE = r"(?:\w*[A-Z0-9]\w*){2,}"
-DEF = r"^\s*(?:menu){,1}config\s+(" + FEATURE + r")\s*"
-EXPR = r"(?:" + OPERATORS + r"|\s|" + FEATURE + r")+"
+SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}"
+DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*"
+EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+"
 DEFAULT = r"default\s+.*?(?:if\s.+){,1}"
 STMT = r"^\s*(?:if|select|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR
-SOURCE_FEATURE = r"(?:\W|\b)+[D]{,1}CONFIG_(" + FEATURE + r")"
+SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")"
 
 # regex objects
 REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$")
-REGEX_FEATURE = re.compile(r'(?!\B)' + FEATURE + r'(?!\B)')
-REGEX_SOURCE_FEATURE = re.compile(SOURCE_FEATURE)
+REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)')
+REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL)
 REGEX_KCONFIG_DEF = re.compile(DEF)
 REGEX_KCONFIG_EXPR = re.compile(EXPR)
 REGEX_KCONFIG_STMT = re.compile(STMT)
 REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$")
-REGEX_FILTER_FEATURES = re.compile(r"[A-Za-z0-9]$")
+REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$")
 REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+")
 REGEX_QUOTES = re.compile("(\"(.*?)\")")
 
 
 def parse_options():
     """The user interface of this module."""
-    usage = "%prog [options]\n\n"                                              \
-            "Run this tool to detect Kconfig symbols that are referenced but " \
-            "not defined in\nKconfig.  The output of this tool has the "       \
-            "format \'Undefined symbol\\tFile list\'\n\n"                      \
-            "If no option is specified, %prog will default to check your\n"    \
-            "current tree.  Please note that specifying commits will "         \
-            "\'git reset --hard\'\nyour current tree!  You may save "          \
-            "uncommitted changes to avoid losing data."
+    usage = "Run this tool to detect Kconfig symbols that are referenced but " \
+            "not defined in Kconfig.  If no option is specified, "             \
+            "checkkconfigsymbols defaults to check your current tree.  "       \
+            "Please note that specifying commits will 'git reset --hard\' "    \
+            "your current tree!  You may save uncommitted changes to avoid "   \
+            "losing data."
 
-    parser = OptionParser(usage=usage)
+    parser = argparse.ArgumentParser(description=usage)
 
-    parser.add_option('-c', '--commit', dest='commit', action='store',
-                      default="",
-                      help="Check if the specified commit (hash) introduces "
-                           "undefined Kconfig symbols.")
+    parser.add_argument('-c', '--commit', dest='commit', action='store',
+                        default="",
+                        help="check if the specified commit (hash) introduces "
+                             "undefined Kconfig symbols")
 
-    parser.add_option('-d', '--diff', dest='diff', action='store',
-                      default="",
-                      help="Diff undefined symbols between two commits.  The "
-                           "input format bases on Git log's "
-                           "\'commmit1..commit2\'.")
+    parser.add_argument('-d', '--diff', dest='diff', action='store',
+                        default="",
+                        help="diff undefined symbols between two commits "
+                             "(e.g., -d commmit1..commit2)")
 
-    parser.add_option('-f', '--find', dest='find', action='store_true',
-                      default=False,
-                      help="Find and show commits that may cause symbols to be "
-                           "missing.  Required to run with --diff.")
+    parser.add_argument('-f', '--find', dest='find', action='store_true',
+                        default=False,
+                        help="find and show commits that may cause symbols to be "
+                             "missing (required to run with --diff)")
 
-    parser.add_option('-i', '--ignore', dest='ignore', action='store',
-                      default="",
-                      help="Ignore files matching this pattern.  Note that "
-                           "the pattern needs to be a Python regex.  To "
-                           "ignore defconfigs, specify -i '.*defconfig'.")
+    parser.add_argument('-i', '--ignore', dest='ignore', action='store',
+                        default="",
+                        help="ignore files matching this Python regex "
+                             "(e.g., -i '.*defconfig')")
 
-    parser.add_option('-s', '--sim', dest='sim', action='store', default="",
-                      help="Print a list of maximum 10 string-similar symbols.")
+    parser.add_argument('-s', '--sim', dest='sim', action='store', default="",
+                        help="print a list of max. 10 string-similar symbols")
 
-    parser.add_option('', '--force', dest='force', action='store_true',
-                      default=False,
-                      help="Reset current Git tree even when it's dirty.")
+    parser.add_argument('--force', dest='force', action='store_true',
+                        default=False,
+                        help="reset current Git tree even when it's dirty")
 
-    (opts, _) = parser.parse_args()
+    parser.add_argument('--no-color', dest='color', action='store_false',
+                        default=True,
+                        help="don't print colored output (default when not "
+                             "outputting to a terminal)")
 
-    if opts.commit and opts.diff:
+    args = parser.parse_args()
+
+    if args.commit and args.diff:
         sys.exit("Please specify only one option at once.")
 
-    if opts.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", opts.diff):
+    if args.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", args.diff):
         sys.exit("Please specify valid input in the following format: "
                  "\'commit1..commit2\'")
 
-    if opts.commit or opts.diff:
-        if not opts.force and tree_is_dirty():
+    if args.commit or args.diff:
+        if not args.force and tree_is_dirty():
             sys.exit("The current Git tree is dirty (see 'git status').  "
                      "Running this script may\ndelete important data since it "
                      "calls 'git reset --hard' for some performance\nreasons. "
@@ -100,138 +101,148 @@
                      "'--force' if you\nwant to ignore this warning and "
                      "continue.")
 
-    if opts.commit:
-        opts.find = False
+    if args.commit:
+        args.find = False
 
-    if opts.ignore:
+    if args.ignore:
         try:
-            re.match(opts.ignore, "this/is/just/a/test.c")
+            re.match(args.ignore, "this/is/just/a/test.c")
         except:
             sys.exit("Please specify a valid Python regex.")
 
-    return opts
+    return args
 
 
 def main():
     """Main function of this module."""
-    opts = parse_options()
+    args = parse_options()
 
-    if opts.sim and not opts.commit and not opts.diff:
-        sims = find_sims(opts.sim, opts.ignore)
+    global COLOR
+    COLOR = args.color and sys.stdout.isatty()
+
+    if args.sim and not args.commit and not args.diff:
+        sims = find_sims(args.sim, args.ignore)
         if sims:
-            print "%s: %s" % (yel("Similar symbols"), ', '.join(sims))
+            print("%s: %s" % (yel("Similar symbols"), ', '.join(sims)))
         else:
-            print "%s: no similar symbols found" % yel("Similar symbols")
+            print("%s: no similar symbols found" % yel("Similar symbols"))
         sys.exit(0)
 
     # dictionary of (un)defined symbols
     defined = {}
     undefined = {}
 
-    if opts.commit or opts.diff:
+    if args.commit or args.diff:
         head = get_head()
 
         # get commit range
         commit_a = None
         commit_b = None
-        if opts.commit:
-            commit_a = opts.commit + "~"
-            commit_b = opts.commit
-        elif opts.diff:
-            split = opts.diff.split("..")
+        if args.commit:
+            commit_a = args.commit + "~"
+            commit_b = args.commit
+        elif args.diff:
+            split = args.diff.split("..")
             commit_a = split[0]
             commit_b = split[1]
             undefined_a = {}
             undefined_b = {}
 
         # get undefined items before the commit
-        execute("git reset --hard %s" % commit_a)
-        undefined_a, _ = check_symbols(opts.ignore)
+        reset(commit_a)
+        undefined_a, _ = check_symbols(args.ignore)
 
         # get undefined items for the commit
-        execute("git reset --hard %s" % commit_b)
-        undefined_b, defined = check_symbols(opts.ignore)
+        reset(commit_b)
+        undefined_b, defined = check_symbols(args.ignore)
 
         # report cases that are present for the commit but not before
-        for feature in sorted(undefined_b):
-            # feature has not been undefined before
-            if not feature in undefined_a:
-                files = sorted(undefined_b.get(feature))
-                undefined[feature] = files
-            # check if there are new files that reference the undefined feature
+        for symbol in sorted(undefined_b):
+            # symbol has not been undefined before
+            if symbol not in undefined_a:
+                files = sorted(undefined_b.get(symbol))
+                undefined[symbol] = files
+            # check if there are new files that reference the undefined symbol
             else:
-                files = sorted(undefined_b.get(feature) -
-                               undefined_a.get(feature))
+                files = sorted(undefined_b.get(symbol) -
+                               undefined_a.get(symbol))
                 if files:
-                    undefined[feature] = files
+                    undefined[symbol] = files
 
         # reset to head
-        execute("git reset --hard %s" % head)
+        reset(head)
 
     # default to check the entire tree
     else:
-        undefined, defined = check_symbols(opts.ignore)
+        undefined, defined = check_symbols(args.ignore)
 
     # now print the output
-    for feature in sorted(undefined):
-        print red(feature)
+    for symbol in sorted(undefined):
+        print(red(symbol))
 
-        files = sorted(undefined.get(feature))
-        print "%s: %s" % (yel("Referencing files"), ", ".join(files))
+        files = sorted(undefined.get(symbol))
+        print("%s: %s" % (yel("Referencing files"), ", ".join(files)))
 
-        sims = find_sims(feature, opts.ignore, defined)
+        sims = find_sims(symbol, args.ignore, defined)
         sims_out = yel("Similar symbols")
         if sims:
-            print "%s: %s" % (sims_out, ', '.join(sims))
+            print("%s: %s" % (sims_out, ', '.join(sims)))
         else:
-            print "%s: %s" % (sims_out, "no similar symbols found")
+            print("%s: %s" % (sims_out, "no similar symbols found"))
 
-        if opts.find:
-            print "%s:" % yel("Commits changing symbol")
-            commits = find_commits(feature, opts.diff)
+        if args.find:
+            print("%s:" % yel("Commits changing symbol"))
+            commits = find_commits(symbol, args.diff)
             if commits:
                 for commit in commits:
                     commit = commit.split(" ", 1)
-                    print "\t- %s (\"%s\")" % (yel(commit[0]), commit[1])
+                    print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1]))
             else:
-                print "\t- no commit found"
-        print  #  new line
+                print("\t- no commit found")
+        print()  # new line
+
+
+def reset(commit):
+    """Reset current git tree to %commit."""
+    execute(["git", "reset", "--hard", commit])
 
 
 def yel(string):
     """
     Color %string yellow.
     """
-    return "\033[33m%s\033[0m" % string
+    return "\033[33m%s\033[0m" % string if COLOR else string
 
 
 def red(string):
     """
     Color %string red.
     """
-    return "\033[31m%s\033[0m" % string
+    return "\033[31m%s\033[0m" % string if COLOR else string
 
 
 def execute(cmd):
     """Execute %cmd and return stdout.  Exit in case of error."""
-    pop = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True)
-    (stdout, _) = pop.communicate()  # wait until finished
-    if pop.returncode != 0:
-        sys.exit(stdout)
+    try:
+        stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+        stdout = stdout.decode(errors='replace')
+    except subprocess.CalledProcessError as fail:
+        exit(fail)
     return stdout
 
 
 def find_commits(symbol, diff):
     """Find commits changing %symbol in the given range of %diff."""
-    commits = execute("git log --pretty=oneline --abbrev-commit -G %s %s"
-                      % (symbol, diff))
+    commits = execute(["git", "log", "--pretty=oneline",
+                       "--abbrev-commit", "-G",
+                       symbol, diff])
     return [x for x in commits.split("\n") if x]
 
 
 def tree_is_dirty():
     """Return true if the current working tree is dirty (i.e., if any file has
     been added, deleted, modified, renamed or copied but not committed)."""
-    stdout = execute("git status --porcelain")
+    stdout = execute(["git", "status", "--porcelain"])
     for line in stdout:
         if re.findall(r"[URMADC]{1}", line[:2]):
             return True
@@ -240,13 +251,13 @@
 
 def get_head():
     """Return commit hash of current HEAD."""
-    stdout = execute("git rev-parse HEAD")
+    stdout = execute(["git", "rev-parse", "HEAD"])
     return stdout.strip('\n')
 
 
 def partition(lst, size):
     """Partition list @lst into eveni-sized lists of size @size."""
-    return [lst[i::size] for i in xrange(size)]
+    return [lst[i::size] for i in range(size)]
 
 
 def init_worker():
@@ -254,7 +265,7 @@
     signal.signal(signal.SIGINT, signal.SIG_IGN)
 
 
-def find_sims(symbol, ignore, defined = []):
+def find_sims(symbol, ignore, defined=[]):
     """Return a list of max. ten Kconfig symbols that are string-similar to
     @symbol."""
     if defined:
@@ -279,7 +290,7 @@
 def get_files():
     """Return a list of all files in the current git directory."""
     # use 'git ls-files' to get the worklist
-    stdout = execute("git ls-files")
+    stdout = execute(["git", "ls-files"])
     if len(stdout) > 0 and stdout[-1] == "\n":
         stdout = stdout[:-1]
 
@@ -311,8 +322,8 @@
     check_symbols() in order to properly terminate running worker processes."""
     source_files = []
     kconfig_files = []
-    defined_features = []
-    referenced_features = dict()  # {file: [features]}
+    defined_symbols = []
+    referenced_symbols = dict()  # {file: [symbols]}
 
     for gitfile in get_files():
         if REGEX_FILE_KCONFIG.match(gitfile):
@@ -326,76 +337,75 @@
     # parse source files
     arglist = partition(source_files, cpu_count())
     for res in pool.map(parse_source_files, arglist):
-        referenced_features.update(res)
-
+        referenced_symbols.update(res)
 
     # parse kconfig files
     arglist = []
     for part in partition(kconfig_files, cpu_count()):
         arglist.append((part, ignore))
     for res in pool.map(parse_kconfig_files, arglist):
-        defined_features.extend(res[0])
-        referenced_features.update(res[1])
-    defined_features = set(defined_features)
+        defined_symbols.extend(res[0])
+        referenced_symbols.update(res[1])
+    defined_symbols = set(defined_symbols)
 
-    # inverse mapping of referenced_features to dict(feature: [files])
+    # inverse mapping of referenced_symbols to dict(symbol: [files])
     inv_map = dict()
-    for _file, features in referenced_features.iteritems():
-        for feature in features:
-            inv_map[feature] = inv_map.get(feature, set())
-            inv_map[feature].add(_file)
-    referenced_features = inv_map
+    for _file, symbols in referenced_symbols.items():
+        for symbol in symbols:
+            inv_map[symbol] = inv_map.get(symbol, set())
+            inv_map[symbol].add(_file)
+    referenced_symbols = inv_map
 
-    undefined = {}  # {feature: [files]}
-    for feature in sorted(referenced_features):
+    undefined = {}  # {symbol: [files]}
+    for symbol in sorted(referenced_symbols):
         # filter some false positives
-        if feature == "FOO" or feature == "BAR" or \
-                feature == "FOO_BAR" or feature == "XXX":
+        if symbol == "FOO" or symbol == "BAR" or \
+                symbol == "FOO_BAR" or symbol == "XXX":
             continue
-        if feature not in defined_features:
-            if feature.endswith("_MODULE"):
+        if symbol not in defined_symbols:
+            if symbol.endswith("_MODULE"):
                 # avoid false positives for kernel modules
-                if feature[:-len("_MODULE")] in defined_features:
+                if symbol[:-len("_MODULE")] in defined_symbols:
                     continue
-            undefined[feature] = referenced_features.get(feature)
-    return undefined, defined_features
+            undefined[symbol] = referenced_symbols.get(symbol)
+    return undefined, defined_symbols
 
 
 def parse_source_files(source_files):
     """Parse each source file in @source_files and return dictionary with source
     files as keys and lists of references Kconfig symbols as values."""
-    referenced_features = dict()
+    referenced_symbols = dict()
     for sfile in source_files:
-        referenced_features[sfile] = parse_source_file(sfile)
-    return referenced_features
+        referenced_symbols[sfile] = parse_source_file(sfile)
+    return referenced_symbols
 
 
 def parse_source_file(sfile):
-    """Parse @sfile and return a list of referenced Kconfig features."""
+    """Parse @sfile and return a list of referenced Kconfig symbols."""
     lines = []
     references = []
 
     if not os.path.exists(sfile):
         return references
 
-    with open(sfile, "r") as stream:
+    with open(sfile, "r", encoding='utf-8', errors='replace') as stream:
         lines = stream.readlines()
 
     for line in lines:
-        if not "CONFIG_" in line:
+        if "CONFIG_" not in line:
             continue
-        features = REGEX_SOURCE_FEATURE.findall(line)
-        for feature in features:
-            if not REGEX_FILTER_FEATURES.search(feature):
+        symbols = REGEX_SOURCE_SYMBOL.findall(line)
+        for symbol in symbols:
+            if not REGEX_FILTER_SYMBOLS.search(symbol):
                 continue
-            references.append(feature)
+            references.append(symbol)
 
     return references
 
 
-def get_features_in_line(line):
-    """Return mentioned Kconfig features in @line."""
-    return REGEX_FEATURE.findall(line)
+def get_symbols_in_line(line):
+    """Return mentioned Kconfig symbols in @line."""
+    return REGEX_SYMBOL.findall(line)
 
 
 def parse_kconfig_files(args):
@@ -404,21 +414,21 @@
     pattern."""
     kconfig_files = args[0]
     ignore = args[1]
-    defined_features = []
-    referenced_features = dict()
+    defined_symbols = []
+    referenced_symbols = dict()
 
     for kfile in kconfig_files:
         defined, references = parse_kconfig_file(kfile)
-        defined_features.extend(defined)
+        defined_symbols.extend(defined)
         if ignore and re.match(ignore, kfile):
             # do not collect references for files that match the ignore pattern
             continue
-        referenced_features[kfile] = references
-    return (defined_features, referenced_features)
+        referenced_symbols[kfile] = references
+    return (defined_symbols, referenced_symbols)
 
 
 def parse_kconfig_file(kfile):
-    """Parse @kfile and update feature definitions and references."""
+    """Parse @kfile and update symbol definitions and references."""
     lines = []
     defined = []
     references = []
@@ -427,7 +437,7 @@
     if not os.path.exists(kfile):
         return defined, references
 
-    with open(kfile, "r") as stream:
+    with open(kfile, "r", encoding='utf-8', errors='replace') as stream:
         lines = stream.readlines()
 
     for i in range(len(lines)):
@@ -436,8 +446,8 @@
         line = line.split("#")[0]  # ignore comments
 
         if REGEX_KCONFIG_DEF.match(line):
-            feature_def = REGEX_KCONFIG_DEF.findall(line)
-            defined.append(feature_def[0])
+            symbol_def = REGEX_KCONFIG_DEF.findall(line)
+            defined.append(symbol_def[0])
             skip = False
         elif REGEX_KCONFIG_HELP.match(line):
             skip = True
@@ -446,18 +456,18 @@
             pass
         elif REGEX_KCONFIG_STMT.match(line):
             line = REGEX_QUOTES.sub("", line)
-            features = get_features_in_line(line)
+            symbols = get_symbols_in_line(line)
             # multi-line statements
             while line.endswith("\\"):
                 i += 1
                 line = lines[i]
                 line = line.strip('\n')
-                features.extend(get_features_in_line(line))
-            for feature in set(features):
-                if REGEX_NUMERIC.match(feature):
+                symbols.extend(get_symbols_in_line(line))
+            for symbol in set(symbols):
+                if REGEX_NUMERIC.match(symbol):
                     # ignore numeric values
                     continue
-                references.append(feature)
+                references.append(symbol)
 
     return defined, references
 
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4de3cc4..206a6b3 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3570,15 +3570,6 @@
 			}
 		}
 
-# check for uses of DEFINE_PCI_DEVICE_TABLE
-		if ($line =~ /\bDEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=/) {
-			if (WARN("DEFINE_PCI_DEVICE_TABLE",
-				 "Prefer struct pci_device_id over deprecated DEFINE_PCI_DEVICE_TABLE\n" . $herecurr) &&
-			    $fix) {
-				$fixed[$fixlinenr] =~ s/\b(?:static\s+|)DEFINE_PCI_DEVICE_TABLE\s*\(\s*(\w+)\s*\)\s*=\s*/static const struct pci_device_id $1\[\] = /;
-			}
-		}
-
 # check for new typedefs, only function parameters and sparse annotations
 # make sense.
 		if ($line =~ /\btypedef\s/ &&
diff --git a/scripts/faddr2line b/scripts/faddr2line
new file mode 100755
index 0000000..450b332
--- /dev/null
+++ b/scripts/faddr2line
@@ -0,0 +1,177 @@
+#!/bin/bash
+#
+# Translate stack dump function offsets.
+#
+# addr2line doesn't work with KASLR addresses.  This works similarly to
+# addr2line, but instead takes the 'func+0x123' format as input:
+#
+#   $ ./scripts/faddr2line ~/k/vmlinux meminfo_proc_show+0x5/0x568
+#   meminfo_proc_show+0x5/0x568:
+#   meminfo_proc_show at fs/proc/meminfo.c:27
+#
+# If the address is part of an inlined function, the full inline call chain is
+# printed:
+#
+#   $ ./scripts/faddr2line ~/k/vmlinux native_write_msr+0x6/0x27
+#   native_write_msr+0x6/0x27:
+#   arch_static_branch at arch/x86/include/asm/msr.h:121
+#    (inlined by) static_key_false at include/linux/jump_label.h:125
+#    (inlined by) native_write_msr at arch/x86/include/asm/msr.h:125
+#
+# The function size after the '/' in the input is optional, but recommended.
+# It's used to help disambiguate any duplicate symbol names, which can occur
+# rarely.  If the size is omitted for a duplicate symbol then it's possible for
+# multiple code sites to be printed:
+#
+#   $ ./scripts/faddr2line ~/k/vmlinux raw_ioctl+0x5
+#   raw_ioctl+0x5/0x20:
+#   raw_ioctl at drivers/char/raw.c:122
+#
+#   raw_ioctl+0x5/0xb1:
+#   raw_ioctl at net/ipv4/raw.c:876
+#
+# Multiple addresses can be specified on a single command line:
+#
+#   $ ./scripts/faddr2line ~/k/vmlinux type_show+0x10/45 free_reserved_area+0x90
+#   type_show+0x10/0x2d:
+#   type_show at drivers/video/backlight/backlight.c:213
+#
+#   free_reserved_area+0x90/0x123:
+#   free_reserved_area at mm/page_alloc.c:6429 (discriminator 2)
+
+
+set -o errexit
+set -o nounset
+
+command -v awk >/dev/null 2>&1 || die "awk isn't installed"
+command -v readelf >/dev/null 2>&1 || die "readelf isn't installed"
+command -v addr2line >/dev/null 2>&1 || die "addr2line isn't installed"
+
+usage() {
+	echo "usage: faddr2line <object file> <func+offset> <func+offset>..." >&2
+	exit 1
+}
+
+warn() {
+	echo "$1" >&2
+}
+
+die() {
+	echo "ERROR: $1" >&2
+	exit 1
+}
+
+# Try to figure out the source directory prefix so we can remove it from the
+# addr2line output.  HACK ALERT: This assumes that start_kernel() is in
+# kernel/init.c!  This only works for vmlinux.  Otherwise it falls back to
+# printing the absolute path.
+find_dir_prefix() {
+	local objfile=$1
+
+	local start_kernel_addr=$(readelf -sW $objfile | awk '$8 == "start_kernel" {printf "0x%s", $2}')
+	[[ -z $start_kernel_addr ]] && return
+
+	local file_line=$(addr2line -e $objfile $start_kernel_addr)
+	[[ -z $file_line ]] && return
+
+	local prefix=${file_line%init/main.c:*}
+	if [[ -z $prefix ]] || [[ $prefix = $file_line ]]; then
+		return
+	fi
+
+	DIR_PREFIX=$prefix
+	return 0
+}
+
+__faddr2line() {
+	local objfile=$1
+	local func_addr=$2
+	local dir_prefix=$3
+	local print_warnings=$4
+
+	local func=${func_addr%+*}
+	local offset=${func_addr#*+}
+	offset=${offset%/*}
+	local size=
+	[[ $func_addr =~ "/" ]] && size=${func_addr#*/}
+
+	if [[ -z $func ]] || [[ -z $offset ]] || [[ $func = $func_addr ]]; then
+		warn "bad func+offset $func_addr"
+		DONE=1
+		return
+	fi
+
+	# Go through each of the object's symbols which match the func name.
+	# In rare cases there might be duplicates.
+	while read symbol; do
+		local fields=($symbol)
+		local sym_base=0x${fields[1]}
+		local sym_size=${fields[2]}
+		local sym_type=${fields[3]}
+
+		# calculate the address
+		local addr=$(($sym_base + $offset))
+		if [[ -z $addr ]] || [[ $addr = 0 ]]; then
+			warn "bad address: $sym_base + $offset"
+			DONE=1
+			return
+		fi
+		local hexaddr=0x$(printf %x $addr)
+
+		# weed out non-function symbols
+		if [[ $sym_type != "FUNC" ]]; then
+			[[ $print_warnings = 1 ]] &&
+				echo "skipping $func address at $hexaddr due to non-function symbol"
+			continue
+		fi
+
+		# if the user provided a size, make sure it matches the symbol's size
+		if [[ -n $size ]] && [[ $size -ne $sym_size ]]; then
+			[[ $print_warnings = 1 ]] &&
+				echo "skipping $func address at $hexaddr due to size mismatch ($size != $sym_size)"
+			continue;
+		fi
+
+		# make sure the provided offset is within the symbol's range
+		if [[ $offset -gt $sym_size ]]; then
+			[[ $print_warnings = 1 ]] &&
+				echo "skipping $func address at $hexaddr due to size mismatch ($offset > $sym_size)"
+			continue
+		fi
+
+		# separate multiple entries with a blank line
+		[[ $FIRST = 0 ]] && echo
+		FIRST=0
+
+		local hexsize=0x$(printf %x $sym_size)
+		echo "$func+$offset/$hexsize:"
+		addr2line -fpie $objfile $hexaddr | sed "s; $dir_prefix\(\./\)*; ;"
+		DONE=1
+
+	done < <(readelf -sW $objfile | awk -v f=$func '$8 == f {print}')
+}
+
+[[ $# -lt 2 ]] && usage
+
+objfile=$1
+[[ ! -f $objfile ]] && die "can't find objfile $objfile"
+shift
+
+DIR_PREFIX=supercalifragilisticexpialidocious
+find_dir_prefix $objfile
+
+FIRST=1
+while [[ $# -gt 0 ]]; do
+	func_addr=$1
+	shift
+
+	# print any matches found
+	DONE=0
+	__faddr2line $objfile $func_addr $DIR_PREFIX 0
+
+	# if no match was found, print warnings
+	if [[ $DONE = 0 ]]; then
+		__faddr2line $objfile $func_addr $DIR_PREFIX 1
+		warn "no match for $func_addr"
+	fi
+done
diff --git a/scripts/gcc-plugin.sh b/scripts/gcc-plugin.sh
index fb92075..b65224b 100755
--- a/scripts/gcc-plugin.sh
+++ b/scripts/gcc-plugin.sh
@@ -1,5 +1,12 @@
 #!/bin/sh
 srctree=$(dirname "$0")
+
+SHOW_ERROR=
+if [ "$1" = "--show-error" ] ; then
+	SHOW_ERROR=1
+	shift || true
+fi
+
 gccplugins_dir=$($3 -print-file-name=plugin)
 plugincc=$($1 -E -x c++ - -o /dev/null -I"${srctree}"/gcc-plugins -I"${gccplugins_dir}"/include 2>&1 <<EOF
 #include "gcc-common.h"
@@ -13,6 +20,9 @@
 
 if [ $? -ne 0 ]
 then
+	if [ -n "$SHOW_ERROR" ] ; then
+		echo "${plugincc}" >&2
+	fi
 	exit 1
 fi
 
@@ -48,4 +58,8 @@
 	echo "$2"
 	exit 0
 fi
+
+if [ -n "$SHOW_ERROR" ] ; then
+	echo "${plugincc}" >&2
+fi
 exit 1
diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile
index 88c8ec4..8b29dc1 100644
--- a/scripts/gcc-plugins/Makefile
+++ b/scripts/gcc-plugins/Makefile
@@ -12,16 +12,18 @@
   export HOST_EXTRACXXFLAGS
 endif
 
-export GCCPLUGINS_DIR HOSTLIBS
-
 ifneq ($(CFLAGS_KCOV), $(SANCOV_PLUGIN))
   GCC_PLUGIN := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGIN))
 endif
 
-$(HOSTLIBS)-y := $(GCC_PLUGIN)
+export HOSTLIBS
+
+$(HOSTLIBS)-y := $(foreach p,$(GCC_PLUGIN),$(if $(findstring /,$(p)),,$(p)))
 always := $($(HOSTLIBS)-y)
 
-cyc_complexity_plugin-objs := cyc_complexity_plugin.o
-sancov_plugin-objs := sancov_plugin.o
+$(foreach p,$($(HOSTLIBS)-y:%.so=%),$(eval $(p)-objs := $(p).o))
+
+subdir-y := $(GCC_PLUGIN_SUBDIR)
+subdir-  += $(GCC_PLUGIN_SUBDIR)
 
 clean-files += *.so
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index 122fcda..aed4511 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -432,7 +432,7 @@
 	    die "$P: file '${file}' not found\n";
 	}
     }
-    if ($from_filename || vcs_file_exists($file)) {
+    if ($from_filename || ($file ne "&STDIN" && vcs_file_exists($file))) {
 	$file =~ s/^\Q${cur_path}\E//;	#strip any absolute path
 	$file =~ s/^\Q${lk_path}\E//;	#or the path to the lk tree
 	push(@files, $file);
@@ -2136,9 +2136,11 @@
 
     my $cmd = $VCS_cmds{"file_exists_cmd"};
     $cmd =~ s/(\$\w+)/$1/eeg;		# interpolate $cmd
-
+    $cmd .= " 2>&1";
     $exists = &{$VCS_cmds{"execute_cmd"}}($cmd);
 
+    return 0 if ($? != 0);
+
     return $exists;
 }
 
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 4f2e904..93721f3 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -212,6 +212,7 @@
 my $type_constant = '\%([-_\w]+)';
 my $type_func = '(\w+)\(\)';
 my $type_param = '\@(\w+)';
+my $type_fp_param = '\@(\w+)\(\)';  # Special RST handling for func ptr params
 my $type_struct = '\&((struct\s*)*[_\w]+)';
 my $type_struct_xml = '\\&amp;((struct\s*)*[_\w]+)';
 my $type_env = '(\$\w+)';
@@ -292,6 +293,7 @@
                        # Note: need to escape () to avoid func matching later
                        [$type_member_func, "\\:c\\:type\\:`\$1\$2\\\\(\\\\) <\$1>`"],
                        [$type_member, "\\:c\\:type\\:`\$1\$2 <\$1>`"],
+		       [$type_fp_param, "**\$1\\\\(\\\\)**"],
                        [$type_func, "\\:c\\:func\\:`\$1()`"],
                        [$type_struct_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
                        [$type_enum_full, "\\:c\\:type\\:`\$1 \$2 <\$2>`"],
@@ -412,7 +414,7 @@
 my $doc_decl = $doc_com . '(\w+)';
 # @params and a strictly limited set of supported section names
 my $doc_sect = $doc_com . 
-    '\s*(\@\w+|description|context|returns?|notes?|examples?)\s*:(.*)';
+    '\s*(\@[.\w]+|\@\.\.\.|description|context|returns?|notes?|examples?)\s*:(.*)';
 my $doc_content = $doc_com_body . '(.*)';
 my $doc_block = $doc_com . 'DOC:\s*(.*)?';
 my $doc_inline_start = '^\s*/\*\*\s*$';
@@ -1831,13 +1833,22 @@
     my %args = %{$_[0]};
     my ($parameter, $section);
     my $oldprefix = $lineprefix;
-    my $start;
+    my $start = "";
 
-    print ".. c:function:: ";
-    if ($args{'functiontype'} ne "") {
-	$start = $args{'functiontype'} . " " . $args{'function'} . " (";
+    if ($args{'typedef'}) {
+	print ".. c:type:: ". $args{'function'} . "\n\n";
+	print_lineno($declaration_start_line);
+	print "   **Typedef**: ";
+	$lineprefix = "";
+	output_highlight_rst($args{'purpose'});
+	$start = "\n\n**Syntax**\n\n  ``";
     } else {
-	$start = $args{'function'} . " (";
+	print ".. c:function:: ";
+    }
+    if ($args{'functiontype'} ne "") {
+	$start .= $args{'functiontype'} . " " . $args{'function'} . " (";
+    } else {
+	$start .= $args{'function'} . " (";
     }
     print $start;
 
@@ -1849,9 +1860,6 @@
 	$count++;
 	$type = $args{'parametertypes'}{$parameter};
 
-	# RST doesn't like address_space tags at function prototypes
-	$type =~ s/__(user|kernel|iomem|percpu|pmem|rcu)\s*//;
-
 	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
 	    # pointer-to-function
 	    print $1 . $parameter . ") (" . $2;
@@ -1859,11 +1867,15 @@
 	    print $type . " " . $parameter;
 	}
     }
-    print ")\n\n";
-    print_lineno($declaration_start_line);
-    $lineprefix = "   ";
-    output_highlight_rst($args{'purpose'});
-    print "\n";
+    if ($args{'typedef'}) {
+	print ");``\n\n";
+    } else {
+	print ")\n\n";
+	print_lineno($declaration_start_line);
+	$lineprefix = "   ";
+	output_highlight_rst($args{'purpose'});
+	print "\n";
+    }
 
     print "**Parameters**\n\n";
     $lineprefix = "  ";
@@ -2003,7 +2015,7 @@
 	($args{'parameterdescs'}{$parameter_name} ne $undescribed) || next;
 	$type = $args{'parametertypes'}{$parameter};
         print_lineno($parameterdesc_start_lines{$parameter_name});
-	print "``$type $parameter``\n";
+	print "``" . $parameter . "``\n";
 	output_highlight_rst($args{'parameterdescs'}{$parameter_name});
 	print "\n";
     }
@@ -2193,7 +2205,9 @@
     $x =~ s@/\*.*?\*/@@gos;	# strip comments.
 
     # Parse function prototypes
-    if ($x =~ /typedef\s+(\w+)\s*\(\*\s*(\w\S+)\s*\)\s*\((.*)\);/) {
+    if ($x =~ /typedef\s+(\w+)\s*\(\*\s*(\w\S+)\s*\)\s*\((.*)\);/ ||
+	$x =~ /typedef\s+(\w+)\s*(\w\S+)\s*\s*\((.*)\);/) {
+
 	# Function typedefs
 	$return_type = $1;
 	$declaration_name = $2;
@@ -2204,6 +2218,7 @@
 	output_declaration($declaration_name,
 			   'function',
 			   {'function' => $declaration_name,
+			    'typedef' => 1,
 			    'module' => $modulename,
 			    'functiontype' => $return_type,
 			    'parameterlist' => \@parameterlist,
@@ -2338,6 +2353,7 @@
 
 	if ($type eq "" && $param =~ /\.\.\.$/)
 	{
+	    $param = "...";
 	    if (!defined $parameterdescs{$param} || $parameterdescs{$param} eq "") {
 		$parameterdescs{$param} = "variable arguments";
 	    }
diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index e1c09e2..8ea9fd2 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -332,7 +332,9 @@
 	(cd $objtree; find tools/objtool -type f -executable) >> "$objtree/debian/hdrobjfiles"
 fi
 (cd $objtree; find arch/$SRCARCH/include Module.symvers include scripts -type f) >> "$objtree/debian/hdrobjfiles"
-(cd $objtree; find scripts/gcc-plugins -name \*.so -o -name gcc-common.h) >> "$objtree/debian/hdrobjfiles"
+if grep -q '^CONFIG_GCC_PLUGINS=y' $KCONFIG_CONFIG ; then
+	(cd $objtree; find scripts/gcc-plugins -name \*.so -o -name gcc-common.h) >> "$objtree/debian/hdrobjfiles"
+fi
 destdir=$kernel_headers_dir/usr/src/linux-headers-$version
 mkdir -p "$destdir"
 (cd $srctree; tar -c -f - -T -) < "$objtree/debian/hdrsrcfiles" | (cd $destdir; tar -xf -)
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index 42396a7..a68f031 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -363,6 +363,7 @@
 		strcmp(".sched.text",    txtname) == 0 ||
 		strcmp(".spinlock.text", txtname) == 0 ||
 		strcmp(".irqentry.text", txtname) == 0 ||
+		strcmp(".softirqentry.text", txtname) == 0 ||
 		strcmp(".kprobes.text", txtname) == 0 ||
 		strcmp(".text.unlikely", txtname) == 0;
 }
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 96e2486..2d48011 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -134,6 +134,7 @@
      ".sched.text" => 1,
      ".spinlock.text" => 1,
      ".irqentry.text" => 1,
+     ".softirqentry.text" => 1,
      ".kprobes.text" => 1,
      ".text.unlikely" => 1,
 );
diff --git a/scripts/tags.sh b/scripts/tags.sh
index ed7eef2..b3775a96 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -206,7 +206,6 @@
 	'/\<DEFINE_PER_CPU_SHARED_ALIGNED([^,]*, *\([[:alnum:]_]*\)/\1/v/'
 	'/\<DECLARE_WAIT_QUEUE_HEAD(\([[:alnum:]_]*\)/\1/v/'
 	'/\<DECLARE_\(TASKLET\|WORK\|DELAYED_WORK\)(\([[:alnum:]_]*\)/\2/v/'
-	'/\<DEFINE_PCI_DEVICE_TABLE(\([[:alnum:]_]*\)/\1/v/'
 	'/\(^\s\)OFFSET(\([[:alnum:]_]*\)/\2/v/'
 	'/\(^\s\)DEFINE(\([[:alnum:]_]*\)/\2/v/'
 	'/\<DEFINE_HASHTABLE(\([[:alnum:]_]*\)/\1/v/'
diff --git a/scripts/ver_linux b/scripts/ver_linux
index 0d8bd29..430b201 100755
--- a/scripts/ver_linux
+++ b/scripts/ver_linux
@@ -1,193 +1,89 @@
-#!/bin/sh
+#!/bin/awk -f
 # Before running this script please ensure that your PATH is
 # typical as you use for compilation/installation. I use
 # /bin /sbin /usr/bin /usr/sbin /usr/local/bin, but it may
 # differ on your system.
-#
-echo 'If some fields are empty or look unusual you may have an old version.'
-echo 'Compare to the current minimal requirements in Documentation/Changes.'
-echo ' '
 
-uname -a
-echo ' '
+BEGIN {
+	usage = "If some fields are empty or look unusual you may have an old version.\n"
+	usage = usage "Compare to the current minimal requirements in Documentation/Changes.\n"
+	print usage
 
-gcc -dumpversion 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("GNU C\t\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
+	system("uname -a")
+	printf("\n")
 
-make --version 2>&1 |
-awk '/GNU Make/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("GNU Make\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
+	printversion("GNU C", version("gcc -dumpversion 2>&1"))
+	printversion("GNU Make", version("make --version 2>&1"))
+	printversion("Binutils", version("ld -v 2>&1"))
+	printversion("Util-linux", version("mount --version 2>&1"))
+	printversion("Mount", version("mount --version 2>&1"))
+	printversion("Module-init-tools", version("depmod -V  2>&1"))
+	printversion("E2fsprogs", version("tune2fs 2>&1"))
+	printversion("Jfsutils", version("fsck.jfs -V 2>&1"))
+	printversion("Reiserfsprogs", version("reiserfsck -V 2>&1"))
+	printversion("Reiser4fsprogs", version("fsck.reiser4 -V 2>&1"))
+	printversion("Xfsprogs", version("xfs_db -V 2>&1"))
+	printversion("Pcmciautils", version("pccardctl -V 2>&1"))
+	printversion("Pcmcia-cs", version("cardmgr -V 2>&1"))
+	printversion("Quota-tools", version("quota -V 2>&1"))
+	printversion("PPP", version("pppd --version 2>&1"))
+	printversion("Isdn4k-utils", version("isdnctrl 2>&1"))
+	printversion("Nfs-utils", version("showmount --version 2>&1"))
 
-ld -v 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Binutils\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-mount --version 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	$0 = substr($0,RSTART,RLENGTH)
-	printf("Util-linux\t\t%s\nMount\t\t\t%s\n",$0,$0)
-}'
-
-depmod -V  2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Module-init-tools\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-tune2fs 2>&1 |
-awk '/^tune2fs/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("E2fsprogs\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-fsck.jfs -V 2>&1 |
-awk '/version/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Jfsutils\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-reiserfsck -V 2>&1 |
-awk '/^reiserfsck/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Reiserfsprogs\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-fsck.reiser4 -V 2>&1 | grep ^fsck.reiser4 | awk \
-'NR==1{print "reiser4progs          ", $2}'
-
-xfs_db -V 2>&1 |
-awk '/version/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Xfsprogs\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-pccardctl -V 2>&1 |
-awk '/pcmciautils/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Pcmciautils\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-cardmgr -V 2>&1| grep version | awk \
-'NR==1{print "pcmcia-cs             ", $3}'
-
-quota -V 2>&1 |
-awk '/version/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Quota-tools\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-pppd --version 2>&1 |
-awk '/version/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("PPP\t\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-isdnctrl 2>&1 | grep version | awk \
-'NR==1{print "isdn4k-utils          ", $NF}'
-
-showmount --version 2>&1 | grep nfs-utils | awk \
-'NR==1{print "nfs-utils             ", $NF}'
-
-test -r /proc/self/maps &&
-sed '
-	/.*libc-\(.*\)\.so$/!d
-	s//Linux C Library\t\t\1/
-	q
-' /proc/self/maps
-
-ldd --version 2>&1 |
-awk '/^ldd/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Dynamic linker (ldd)\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-libcpp=`ldconfig -p 2>/dev/null |
-	awk '/(libg|stdc)[+]+\.so/ {
-	print $NF
-	exit
+	if (system("test -r /proc/self/maps") == 0) {
+		while (getline <"/proc/self/maps" > 0) {
+			n = split($0, procmaps, "/")
+			if (/libc.*so$/ && match(procmaps[n], /[0-9]+([.]?[0-9]+)+/)) {
+				ver = substr(procmaps[n], RSTART, RLENGTH)
+				printversion("Linux C Library", ver)
+				break
+			}
+		}
 	}
-'`
-test -r "$libcpp" &&
-ls -l $libcpp |
-sed '
-	s!.*so\.!!
-	s!^!Linux C++ Library\t!
-'
-ps --version 2>&1 |
-awk '/version/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Procps\t\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
 
-ifconfig --version 2>&1 |
-awk '/tools/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Net-tools\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
+	printversion("Dynamic linker (ldd)", version("ldd --version 2>&1"))
 
-loadkeys -V 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	$0 = substr($0,RSTART,RLENGTH)
-	printf("Kbd\t\t\t%s\nConsole-tools\t\t%s\n",$0,$0)
-}'
+	while ("ldconfig -p 2>/dev/null" | getline > 0) {
+		if (/(libg|stdc)[+]+\.so/) {
+			libcpp = $NF
+			break
+		}
+	}
+	if (system("test -r " libcpp) == 0)
+		printversion("Linux C++ Library", version("readlink " libcpp))
 
-oprofiled --version 2>&1 | awk \
-'(NR==1 && ($2 == "oprofile")) {print "oprofile              ", $3}'
+	printversion("Procps", version("ps --version 2>&1"))
+	printversion("Net-tools", version("ifconfig --version 2>&1"))
+	printversion("Kbd", version("loadkeys -V 2>&1"))
+	printversion("Console-tools", version("loadkeys -V 2>&1"))
+	printversion("Oprofile", version("oprofiled --version 2>&1"))
+	printversion("Sh-utils", version("expr --v 2>&1"))
+	printversion("Udev", version("udevadm --version 2>&1"))
+	printversion("Wireless-tools", version("iwconfig --version 2>&1"))
 
-expr --v 2>&1 |
-awk '/^expr/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Sh-utils\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-udevadm --version 2>&1 |
-awk '/[0-9]+([.]?[0-9]+)+/ && !/not found$/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Udev\t\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-iwconfig --version 2>&1 |
-awk '/version/{
-	match($0, /[0-9]+([.]?[0-9]+)+/)
-	printf("Wireless-tools\t\t%s\n",
-	substr($0,RSTART,RLENGTH))
-}'
-
-test -e /proc/modules &&
-sort /proc/modules |
-sed '
-	s/ .*//
-	H
-${
-	g
-	s/^\n/Modules Loaded\t\t/
-	y/\n/ /
-	q
+	if (system("test -r /proc/modules") == 0) {
+		while ("sort /proc/modules" | getline > 0) {
+			mods = mods sep $1
+			sep = " "
+		}
+		printversion("Modules Loaded", mods)
+	}
 }
-	d
-'
+
+function version(cmd,    ver) {
+	while (cmd | getline > 0) {
+		if (!/ver_linux/ && match($0, /[0-9]+([.]?[0-9]+)+/)) {
+			ver = substr($0, RSTART, RLENGTH)
+			break
+		}
+	}
+	close(cmd)
+	return ver
+}
+
+function printversion(name, value,  ofmt) {
+	if (value != "") {
+		ofmt = "%-20s\t%s\n"
+		printf(ofmt, name, value)
+	}
+}
diff --git a/security/Kconfig b/security/Kconfig
index df28f2b..118f454 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -136,6 +136,7 @@
 config HARDENED_USERCOPY
 	bool "Harden memory copies between kernel and userspace"
 	depends on HAVE_ARCH_HARDENED_USERCOPY
+	depends on HAVE_HARDENED_USERCOPY_ALLOCATOR
 	select BUG
 	help
 	  This option checks for obviously wrong memory regions when
@@ -146,6 +147,17 @@
 	  or are part of the kernel text. This kills entire classes
 	  of heap overflow exploits and similar kernel memory exposures.
 
+config HARDENED_USERCOPY_PAGESPAN
+	bool "Refuse to copy allocations that span multiple pages"
+	depends on HARDENED_USERCOPY
+	depends on EXPERT
+	help
+	  When a multi-page allocation is done without __GFP_COMP,
+	  hardened usercopy will reject attempts to copy it. There are,
+	  however, several cases of this in the kernel that have not all
+	  been removed. This config is intended to be used only while
+	  trying to find such users.
+
 source security/selinux/Kconfig
 source security/smack/Kconfig
 source security/tomoyo/Kconfig
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 5adbfc3..17a0610 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -29,6 +29,7 @@
 #include <linux/rcupdate.h>
 #include <linux/scatterlist.h>
 #include <linux/ctype.h>
+#include <crypto/aes.h>
 #include <crypto/hash.h>
 #include <crypto/sha.h>
 #include <crypto/skcipher.h>
@@ -478,6 +479,7 @@
 	struct crypto_skcipher *tfm;
 	struct skcipher_request *req;
 	unsigned int encrypted_datalen;
+	u8 iv[AES_BLOCK_SIZE];
 	unsigned int padlen;
 	char pad[16];
 	int ret;
@@ -500,8 +502,8 @@
 	sg_init_table(sg_out, 1);
 	sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen);
 
-	skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen,
-				   epayload->iv);
+	memcpy(iv, epayload->iv, sizeof(iv));
+	skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv);
 	ret = crypto_skcipher_encrypt(req);
 	tfm = crypto_skcipher_reqtfm(req);
 	skcipher_request_free(req);
@@ -581,6 +583,7 @@
 	struct crypto_skcipher *tfm;
 	struct skcipher_request *req;
 	unsigned int encrypted_datalen;
+	u8 iv[AES_BLOCK_SIZE];
 	char pad[16];
 	int ret;
 
@@ -599,8 +602,8 @@
 		   epayload->decrypted_datalen);
 	sg_set_buf(&sg_out[1], pad, sizeof pad);
 
-	skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen,
-				   epayload->iv);
+	memcpy(iv, epayload->iv, sizeof(iv));
+	skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv);
 	ret = crypto_skcipher_decrypt(req);
 	tfm = crypto_skcipher_reqtfm(req);
 	skcipher_request_free(req);
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index cccbf30..37f04da 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -99,7 +99,7 @@
 	}
 	return ret;
 }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * ipv6_skb_to_auditdata : fill auditdata from skb
  * @skb : the skb
@@ -220,7 +220,7 @@
 	 */
 	BUILD_BUG_ON(sizeof(a->u) > sizeof(void *)*2);
 
-	audit_log_format(ab, " pid=%d comm=", task_pid_nr(current));
+	audit_log_format(ab, " pid=%d comm=", task_tgid_nr(current));
 	audit_log_untrustedstring(ab, memcpy(comm, current->comm, sizeof(comm)));
 
 	switch (a->type) {
@@ -245,6 +245,19 @@
 		}
 		break;
 	}
+	case LSM_AUDIT_DATA_FILE: {
+		struct inode *inode;
+
+		audit_log_d_path(ab, " path=", &a->u.file->f_path);
+
+		inode = file_inode(a->u.file);
+		if (inode) {
+			audit_log_format(ab, " dev=");
+			audit_log_untrustedstring(ab, inode->i_sb->s_id);
+			audit_log_format(ab, " ino=%lu", inode->i_ino);
+		}
+		break;
+	}
 	case LSM_AUDIT_DATA_IOCTL_OP: {
 		struct inode *inode;
 
@@ -257,7 +270,7 @@
 			audit_log_format(ab, " ino=%lu", inode->i_ino);
 		}
 
-		audit_log_format(ab, " ioctlcmd=%hx", a->u.op->cmd);
+		audit_log_format(ab, " ioctlcmd=0x%hx", a->u.op->cmd);
 		break;
 	}
 	case LSM_AUDIT_DATA_DENTRY: {
@@ -294,7 +307,7 @@
 	case LSM_AUDIT_DATA_TASK: {
 		struct task_struct *tsk = a->u.tsk;
 		if (tsk) {
-			pid_t pid = task_pid_nr(tsk);
+			pid_t pid = task_tgid_nr(tsk);
 			if (pid) {
 				char comm[sizeof(tsk->comm)];
 				audit_log_format(ab, " opid=%d ocomm=", pid);
diff --git a/security/security.c b/security/security.c
index 4838e7f..f825304 100644
--- a/security/security.c
+++ b/security/security.c
@@ -364,6 +364,15 @@
 }
 EXPORT_SYMBOL(security_dentry_init_security);
 
+int security_dentry_create_files_as(struct dentry *dentry, int mode,
+				    struct qstr *name,
+				    const struct cred *old, struct cred *new)
+{
+	return call_int_hook(dentry_create_files_as, 0, dentry, mode,
+				name, old, new);
+}
+EXPORT_SYMBOL(security_dentry_create_files_as);
+
 int security_inode_init_security(struct inode *inode, struct inode *dir,
 				 const struct qstr *qstr,
 				 const initxattrs initxattrs, void *fs_data)
@@ -748,6 +757,18 @@
 	call_void_hook(inode_getsecid, inode, secid);
 }
 
+int security_inode_copy_up(struct dentry *src, struct cred **new)
+{
+	return call_int_hook(inode_copy_up, 0, src, new);
+}
+EXPORT_SYMBOL(security_inode_copy_up);
+
+int security_inode_copy_up_xattr(const char *name)
+{
+	return call_int_hook(inode_copy_up_xattr, -EOPNOTSUPP, name);
+}
+EXPORT_SYMBOL(security_inode_copy_up_xattr);
+
 int security_file_permission(struct file *file, int mask)
 {
 	int ret;
@@ -1623,6 +1644,8 @@
 		LIST_HEAD_INIT(security_hook_heads.sb_parse_opts_str),
 	.dentry_init_security =
 		LIST_HEAD_INIT(security_hook_heads.dentry_init_security),
+	.dentry_create_files_as =
+		LIST_HEAD_INIT(security_hook_heads.dentry_create_files_as),
 #ifdef CONFIG_SECURITY_PATH
 	.path_unlink =	LIST_HEAD_INIT(security_hook_heads.path_unlink),
 	.path_mkdir =	LIST_HEAD_INIT(security_hook_heads.path_mkdir),
@@ -1684,6 +1707,10 @@
 		LIST_HEAD_INIT(security_hook_heads.inode_listsecurity),
 	.inode_getsecid =
 		LIST_HEAD_INIT(security_hook_heads.inode_getsecid),
+	.inode_copy_up =
+		LIST_HEAD_INIT(security_hook_heads.inode_copy_up),
+	.inode_copy_up_xattr =
+		LIST_HEAD_INIT(security_hook_heads.inode_copy_up_xattr),
 	.file_permission =
 		LIST_HEAD_INIT(security_hook_heads.file_permission),
 	.file_alloc_security =
diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
index 8691e92..ea7e3ef 100644
--- a/security/selinux/Kconfig
+++ b/security/selinux/Kconfig
@@ -93,41 +93,3 @@
 	  via /selinux/checkreqprot if authorized by policy.
 
 	  If you are unsure how to answer this question, answer 0.
-
-config SECURITY_SELINUX_POLICYDB_VERSION_MAX
-	bool "NSA SELinux maximum supported policy format version"
-	depends on SECURITY_SELINUX
-	default n
-	help
-	  This option enables the maximum policy format version supported
-	  by SELinux to be set to a particular value.  This value is reported
-	  to userspace via /selinux/policyvers and used at policy load time.
-	  It can be adjusted downward to support legacy userland (init) that
-	  does not correctly handle kernels that support newer policy versions.
-
-	  Examples:
-	  For the Fedora Core 3 or 4 Linux distributions, enable this option
-	  and set the value via the next option. For Fedora Core 5 and later,
-	  do not enable this option.
-
-	  If you are unsure how to answer this question, answer N.
-
-config SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
-	int "NSA SELinux maximum supported policy format version value"
-	depends on SECURITY_SELINUX_POLICYDB_VERSION_MAX
-	range 15 23
-	default 19
-	help
-	  This option sets the value for the maximum policy format version
-	  supported by SELinux.
-
-	  Examples:
-	  For Fedora Core 3, use 18.
-	  For Fedora Core 4, use 19.
-
-	  If you are unsure how to answer this question, look for the
-	  policy format version supported by your policy toolchain, by
-	  running 'checkpolicy -V'. Or look at what policy you have
-	  installed under /etc/selinux/$SELINUXTYPE/policy, where
-	  SELINUXTYPE is defined in your /etc/selinux/config.
-
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 13185a6..2205ea2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1761,8 +1761,8 @@
 {
 	struct common_audit_data ad;
 
-	ad.type = LSM_AUDIT_DATA_PATH;
-	ad.u.path = file->f_path;
+	ad.type = LSM_AUDIT_DATA_FILE;
+	ad.u.file = file;
 	return inode_has_perm(cred, file_inode(file), av, &ad);
 }
 
@@ -1784,8 +1784,8 @@
 	u32 sid = cred_sid(cred);
 	int rc;
 
-	ad.type = LSM_AUDIT_DATA_PATH;
-	ad.u.path = file->f_path;
+	ad.type = LSM_AUDIT_DATA_FILE;
+	ad.u.file = file;
 
 	if (sid != fsec->sid) {
 		rc = avc_has_perm(sid, fsec->sid,
@@ -1808,13 +1808,13 @@
 /*
  * Determine the label for an inode that might be unioned.
  */
-static int selinux_determine_inode_label(struct inode *dir,
-					 const struct qstr *name,
-					 u16 tclass,
-					 u32 *_new_isid)
+static int
+selinux_determine_inode_label(const struct task_security_struct *tsec,
+				 struct inode *dir,
+				 const struct qstr *name, u16 tclass,
+				 u32 *_new_isid)
 {
 	const struct superblock_security_struct *sbsec = dir->i_sb->s_security;
-	const struct task_security_struct *tsec = current_security();
 
 	if ((sbsec->flags & SE_SBINITIALIZED) &&
 	    (sbsec->behavior == SECURITY_FS_USE_MNTPOINT)) {
@@ -1857,8 +1857,8 @@
 	if (rc)
 		return rc;
 
-	rc = selinux_determine_inode_label(dir, &dentry->d_name, tclass,
-					   &newsid);
+	rc = selinux_determine_inode_label(current_security(), dir,
+					   &dentry->d_name, tclass, &newsid);
 	if (rc)
 		return rc;
 
@@ -2365,8 +2365,8 @@
 			new_tsec->sid = old_tsec->sid;
 	}
 
-	ad.type = LSM_AUDIT_DATA_PATH;
-	ad.u.path = bprm->file->f_path;
+	ad.type = LSM_AUDIT_DATA_FILE;
+	ad.u.file = bprm->file;
 
 	if (new_tsec->sid == old_tsec->sid) {
 		rc = avc_has_perm(old_tsec->sid, isec->sid,
@@ -2838,7 +2838,8 @@
 	u32 newsid;
 	int rc;
 
-	rc = selinux_determine_inode_label(d_inode(dentry->d_parent), name,
+	rc = selinux_determine_inode_label(current_security(),
+					   d_inode(dentry->d_parent), name,
 					   inode_mode_to_security_class(mode),
 					   &newsid);
 	if (rc)
@@ -2847,6 +2848,27 @@
 	return security_sid_to_context(newsid, (char **)ctx, ctxlen);
 }
 
+static int selinux_dentry_create_files_as(struct dentry *dentry, int mode,
+					  struct qstr *name,
+					  const struct cred *old,
+					  struct cred *new)
+{
+	u32 newsid;
+	int rc;
+	struct task_security_struct *tsec;
+
+	rc = selinux_determine_inode_label(old->security,
+					   d_inode(dentry->d_parent), name,
+					   inode_mode_to_security_class(mode),
+					   &newsid);
+	if (rc)
+		return rc;
+
+	tsec = new->security;
+	tsec->create_sid = newsid;
+	return 0;
+}
+
 static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
 				       const struct qstr *qstr,
 				       const char **name,
@@ -2863,7 +2885,7 @@
 	sid = tsec->sid;
 	newsid = tsec->create_sid;
 
-	rc = selinux_determine_inode_label(
+	rc = selinux_determine_inode_label(current_security(),
 		dir, qstr,
 		inode_mode_to_security_class(inode->i_mode),
 		&newsid);
@@ -3293,6 +3315,41 @@
 	*secid = isec->sid;
 }
 
+static int selinux_inode_copy_up(struct dentry *src, struct cred **new)
+{
+	u32 sid;
+	struct task_security_struct *tsec;
+	struct cred *new_creds = *new;
+
+	if (new_creds == NULL) {
+		new_creds = prepare_creds();
+		if (!new_creds)
+			return -ENOMEM;
+	}
+
+	tsec = new_creds->security;
+	/* Get label from overlay inode and set it in create_sid */
+	selinux_inode_getsecid(d_inode(src), &sid);
+	tsec->create_sid = sid;
+	*new = new_creds;
+	return 0;
+}
+
+static int selinux_inode_copy_up_xattr(const char *name)
+{
+	/* The copy_up hook above sets the initial context on an inode, but we
+	 * don't then want to overwrite it by blindly copying all the lower
+	 * xattrs up.  Instead, we have to filter out SELinux-related xattrs.
+	 */
+	if (strcmp(name, XATTR_NAME_SELINUX) == 0)
+		return 1; /* Discard */
+	/*
+	 * Any other attribute apart from SELINUX is not claimed, supported
+	 * by selinux.
+	 */
+	return -EOPNOTSUPP;
+}
+
 /* file security operations */
 
 static int selinux_revalidate_file_permission(struct file *file, int mask)
@@ -3776,8 +3833,8 @@
 
 	/* finit_module */
 
-	ad.type = LSM_AUDIT_DATA_PATH;
-	ad.u.path = file->f_path;
+	ad.type = LSM_AUDIT_DATA_FILE;
+	ad.u.file = file;
 
 	fsec = file->f_security;
 	if (sid != fsec->sid) {
@@ -3984,7 +4041,7 @@
 	return ret;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 /* Returns error only if unable to parse addresses */
 static int selinux_parse_skb_ipv6(struct sk_buff *skb,
@@ -4075,7 +4132,7 @@
 				       &ad->u.net->v4info.daddr);
 		goto okay;
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case PF_INET6:
 		ret = selinux_parse_skb_ipv6(skb, ad, proto);
 		if (ret)
@@ -5029,7 +5086,7 @@
 	return selinux_ip_forward(skb, state->in, PF_INET);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static unsigned int selinux_ipv6_forward(void *priv,
 					 struct sk_buff *skb,
 					 const struct nf_hook_state *state)
@@ -5087,7 +5144,7 @@
 	return selinux_ip_output(skb, PF_INET);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static unsigned int selinux_ipv6_output(void *priv,
 					struct sk_buff *skb,
 					const struct nf_hook_state *state)
@@ -5273,7 +5330,7 @@
 	return selinux_ip_postroute(skb, state->out, PF_INET);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static unsigned int selinux_ipv6_postroute(void *priv,
 					   struct sk_buff *skb,
 					   const struct nf_hook_state *state)
@@ -6062,6 +6119,7 @@
 	LSM_HOOK_INIT(sb_parse_opts_str, selinux_parse_opts_str),
 
 	LSM_HOOK_INIT(dentry_init_security, selinux_dentry_init_security),
+	LSM_HOOK_INIT(dentry_create_files_as, selinux_dentry_create_files_as),
 
 	LSM_HOOK_INIT(inode_alloc_security, selinux_inode_alloc_security),
 	LSM_HOOK_INIT(inode_free_security, selinux_inode_free_security),
@@ -6088,6 +6146,8 @@
 	LSM_HOOK_INIT(inode_setsecurity, selinux_inode_setsecurity),
 	LSM_HOOK_INIT(inode_listsecurity, selinux_inode_listsecurity),
 	LSM_HOOK_INIT(inode_getsecid, selinux_inode_getsecid),
+	LSM_HOOK_INIT(inode_copy_up, selinux_inode_copy_up),
+	LSM_HOOK_INIT(inode_copy_up_xattr, selinux_inode_copy_up_xattr),
 
 	LSM_HOOK_INIT(file_permission, selinux_file_permission),
 	LSM_HOOK_INIT(file_alloc_security, selinux_file_alloc_security),
@@ -6317,7 +6377,7 @@
 		.hooknum =	NF_INET_LOCAL_OUT,
 		.priority =	NF_IP_PRI_SELINUX_FIRST,
 	},
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.hook =		selinux_ipv6_postroute,
 		.pf =		NFPROTO_IPV6,
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 38feb55..308a286 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -39,11 +39,7 @@
 
 /* Range of policy versions we understand*/
 #define POLICYDB_VERSION_MIN   POLICYDB_VERSION_BASE
-#ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX
-#define POLICYDB_VERSION_MAX	CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE
-#else
 #define POLICYDB_VERSION_MAX	POLICYDB_VERSION_XPERMS_IOCTL
-#endif
 
 /* Mask for just the mount related flags */
 #define SE_MNTMASK	0x0f
diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c
index 456e1a9..34afead 100644
--- a/security/selinux/ss/conditional.c
+++ b/security/selinux/ss/conditional.c
@@ -242,6 +242,8 @@
 		goto err;
 
 	len = le32_to_cpu(buf[2]);
+	if (((len == 0) || (len == (u32)-1)))
+		goto err;
 
 	rc = -ENOMEM;
 	key = kmalloc(len + 1, GFP_KERNEL);
diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c
index 894b6cd..7d10e5d 100644
--- a/security/selinux/ss/ebitmap.c
+++ b/security/selinux/ss/ebitmap.c
@@ -374,6 +374,9 @@
 		goto ok;
 	}
 
+	if (e->highbit && !count)
+		goto bad;
+
 	for (i = 0; i < count; i++) {
 		rc = next_entry(&startbit, fp, sizeof(u32));
 		if (rc < 0) {
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 992a315..ace6838 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -541,21 +541,21 @@
 
 	rc = -ENOMEM;
 	p->class_val_to_struct =
-		kmalloc(p->p_classes.nprim * sizeof(*(p->class_val_to_struct)),
+		kzalloc(p->p_classes.nprim * sizeof(*(p->class_val_to_struct)),
 			GFP_KERNEL);
 	if (!p->class_val_to_struct)
 		goto out;
 
 	rc = -ENOMEM;
 	p->role_val_to_struct =
-		kmalloc(p->p_roles.nprim * sizeof(*(p->role_val_to_struct)),
+		kzalloc(p->p_roles.nprim * sizeof(*(p->role_val_to_struct)),
 			GFP_KERNEL);
 	if (!p->role_val_to_struct)
 		goto out;
 
 	rc = -ENOMEM;
 	p->user_val_to_struct =
-		kmalloc(p->p_users.nprim * sizeof(*(p->user_val_to_struct)),
+		kzalloc(p->p_users.nprim * sizeof(*(p->user_val_to_struct)),
 			GFP_KERNEL);
 	if (!p->user_val_to_struct)
 		goto out;
@@ -964,7 +964,7 @@
 		 * Role must be authorized for the type.
 		 */
 		role = p->role_val_to_struct[c->role - 1];
-		if (!ebitmap_get_bit(&role->types, c->type - 1))
+		if (!role || !ebitmap_get_bit(&role->types, c->type - 1))
 			/* role may not be associated with type */
 			return 0;
 
@@ -1094,6 +1094,9 @@
 	int rc;
 	char *str;
 
+	if ((len == 0) || (len == (u32)-1))
+		return -EINVAL;
+
 	str = kmalloc(len + 1, flags);
 	if (!str)
 		return -ENOMEM;
@@ -2414,6 +2417,7 @@
 		} else
 			tr->tclass = p->process_class;
 
+		rc = -EINVAL;
 		if (!policydb_role_isvalid(p, tr->role) ||
 		    !policydb_type_isvalid(p, tr->type) ||
 		    !policydb_class_isvalid(p, tr->tclass) ||
diff --git a/security/smack/Kconfig b/security/smack/Kconfig
index 271adae..923b120 100644
--- a/security/smack/Kconfig
+++ b/security/smack/Kconfig
@@ -40,3 +40,15 @@
 	  This enables security marking of network packets using
 	  Smack labels.
 	  If you are unsure how to answer this question, answer N.
+
+config SECURITY_SMACK_APPEND_SIGNALS
+	bool "Treat delivering signals as an append operation"
+	depends on SECURITY_SMACK
+	default n
+	help
+	  Sending a signal has been treated as a write operation to the
+	  receiving process. If this option is selected, the delivery
+	  will be an append operation instead. This makes it possible
+	  to differentiate between delivering a network packet and
+	  delivering a signal in the Smack rules.
+	  If you are unsure how to answer this question, answer N.
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 26e58f1..51fd301 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -256,6 +256,16 @@
 #define MAY_LOCK	0x00002000	/* Locks should be writes, but ... */
 #define MAY_BRINGUP	0x00004000	/* Report use of this rule */
 
+/*
+ * The policy for delivering signals is configurable.
+ * It is usually "write", but can be "append".
+ */
+#ifdef CONFIG_SECURITY_SMACK_APPEND_SIGNALS
+#define MAY_DELIVER	MAY_APPEND	/* Signal delivery requires append */
+#else
+#define MAY_DELIVER	MAY_WRITE	/* Signal delivery requires write */
+#endif
+
 #define SMACK_BRINGUP_ALLOW		1	/* Allow bringup mode */
 #define SMACK_UNCONFINED_SUBJECT	2	/* Allow unconfined label */
 #define SMACK_UNCONFINED_OBJECT		3	/* Allow unconfined label */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 87a9741..caec225 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1857,14 +1857,14 @@
 
 	/* we don't log here as rc can be overriden */
 	skp = file->f_security;
-	rc = smk_access(skp, tkp, MAY_WRITE, NULL);
-	rc = smk_bu_note("sigiotask", skp, tkp, MAY_WRITE, rc);
+	rc = smk_access(skp, tkp, MAY_DELIVER, NULL);
+	rc = smk_bu_note("sigiotask", skp, tkp, MAY_DELIVER, rc);
 	if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE))
 		rc = 0;
 
 	smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK);
 	smk_ad_setfield_u_tsk(&ad, tsk);
-	smack_log(skp->smk_known, tkp->smk_known, MAY_WRITE, rc, &ad);
+	smack_log(skp->smk_known, tkp->smk_known, MAY_DELIVER, rc, &ad);
 	return rc;
 }
 
@@ -2265,8 +2265,8 @@
 	 * can write the receiver.
 	 */
 	if (secid == 0) {
-		rc = smk_curacc(tkp, MAY_WRITE, &ad);
-		rc = smk_bu_task(p, MAY_WRITE, rc);
+		rc = smk_curacc(tkp, MAY_DELIVER, &ad);
+		rc = smk_bu_task(p, MAY_DELIVER, rc);
 		return rc;
 	}
 	/*
@@ -2275,8 +2275,8 @@
 	 * we can't take privilege into account.
 	 */
 	skp = smack_from_secid(secid);
-	rc = smk_access(skp, tkp, MAY_WRITE, &ad);
-	rc = smk_bu_note("USB signal", skp, tkp, MAY_WRITE, rc);
+	rc = smk_access(skp, tkp, MAY_DELIVER, &ad);
+	rc = smk_bu_note("USB signal", skp, tkp, MAY_DELIVER, rc);
 	return rc;
 }
 
diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c
index aa6bf1b..205b785 100644
--- a/security/smack/smack_netfilter.c
+++ b/security/smack/smack_netfilter.c
@@ -20,7 +20,7 @@
 #include <net/inet_sock.h>
 #include "smack.h"
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 static unsigned int smack_ipv6_output(void *priv,
 					struct sk_buff *skb,
@@ -64,7 +64,7 @@
 		.hooknum =	NF_INET_LOCAL_OUT,
 		.priority =	NF_IP_PRI_SELINUX_FIRST,
 	},
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.hook =		smack_ipv6_output,
 		.pf =		NFPROTO_IPV6,
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index e249a66..6492fe9 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -2523,14 +2523,9 @@
 	if (count == 0 || count > SMK_LONGLABEL)
 		return -EINVAL;
 
-	data = kzalloc(count, GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-
-	if (copy_from_user(data, buf, count) != 0) {
-		rc = -EFAULT;
-		goto out_data;
-	}
+	data = memdup_user(buf, count);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
 
 	cp = smk_parse_smack(data, count);
 	if (IS_ERR(cp)) {
diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 795437b..b450a27 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -1633,11 +1633,13 @@
 		return -EBUSY;
 	}
 	list_add_tail(&rmidi->list, &snd_rawmidi_devices);
+	mutex_unlock(&register_mutex);
 	err = snd_register_device(SNDRV_DEVICE_TYPE_RAWMIDI,
 				  rmidi->card, rmidi->device,
 				  &snd_rawmidi_f_ops, rmidi, &rmidi->dev);
 	if (err < 0) {
 		rmidi_err(rmidi, "unable to register\n");
+		mutex_lock(&register_mutex);
 		list_del(&rmidi->list);
 		mutex_unlock(&register_mutex);
 		return err;
@@ -1645,6 +1647,7 @@
 	if (rmidi->ops && rmidi->ops->dev_register &&
 	    (err = rmidi->ops->dev_register(rmidi)) < 0) {
 		snd_unregister_device(&rmidi->dev);
+		mutex_lock(&register_mutex);
 		list_del(&rmidi->list);
 		mutex_unlock(&register_mutex);
 		return err;
@@ -1677,7 +1680,6 @@
 		}
 	}
 #endif /* CONFIG_SND_OSSEMUL */
-	mutex_unlock(&register_mutex);
 	sprintf(name, "midi%d", rmidi->device);
 	entry = snd_info_create_card_entry(rmidi->card, name, rmidi->card->proc_root);
 	if (entry) {
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 9a6157e..fc144f4 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -35,6 +35,9 @@
 #include <sound/initval.h>
 #include <linux/kmod.h>
 
+/* internal flags */
+#define SNDRV_TIMER_IFLG_PAUSED		0x00010000
+
 #if IS_ENABLED(CONFIG_SND_HRTIMER)
 #define DEFAULT_TIMER_LIMIT 4
 #else
@@ -294,8 +297,21 @@
 		get_device(&timer->card->card_dev);
 	timeri->slave_class = tid->dev_sclass;
 	timeri->slave_id = slave_id;
-	if (list_empty(&timer->open_list_head) && timer->hw.open)
-		timer->hw.open(timer);
+
+	if (list_empty(&timer->open_list_head) && timer->hw.open) {
+		int err = timer->hw.open(timer);
+		if (err) {
+			kfree(timeri->owner);
+			kfree(timeri);
+
+			if (timer->card)
+				put_device(&timer->card->card_dev);
+			module_put(timer->module);
+			mutex_unlock(&register_mutex);
+			return err;
+		}
+	}
+
 	list_add_tail(&timeri->open_list, &timer->open_list_head);
 	snd_timer_check_master(timeri);
 	mutex_unlock(&register_mutex);
@@ -526,6 +542,10 @@
 		}
 	}
 	timeri->flags &= ~(SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START);
+	if (stop)
+		timeri->flags &= ~SNDRV_TIMER_IFLG_PAUSED;
+	else
+		timeri->flags |= SNDRV_TIMER_IFLG_PAUSED;
 	snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP :
 			  SNDRV_TIMER_EVENT_CONTINUE);
  unlock:
@@ -587,6 +607,10 @@
  */
 int snd_timer_continue(struct snd_timer_instance *timeri)
 {
+	/* timer can continue only after pause */
+	if (!(timeri->flags & SNDRV_TIMER_IFLG_PAUSED))
+		return -EINVAL;
+
 	if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE)
 		return snd_timer_start_slave(timeri, false);
 	else
@@ -813,6 +837,7 @@
 	timer->tmr_subdevice = tid->subdevice;
 	if (id)
 		strlcpy(timer->id, id, sizeof(timer->id));
+	timer->sticks = 1;
 	INIT_LIST_HEAD(&timer->device_list);
 	INIT_LIST_HEAD(&timer->open_list_head);
 	INIT_LIST_HEAD(&timer->active_list_head);
@@ -1817,6 +1842,9 @@
 	tu = file->private_data;
 	if (!tu->timeri)
 		return -EBADFD;
+	/* start timer instead of continue if it's not used before */
+	if (!(tu->timeri->flags & SNDRV_TIMER_IFLG_PAUSED))
+		return snd_timer_user_start(file);
 	tu->timeri->lost = 0;
 	return (err = snd_timer_continue(tu->timeri)) < 0 ? err : 0;
 }
@@ -1958,6 +1986,7 @@
 		tu->qused--;
 		spin_unlock_irq(&tu->qlock);
 
+		mutex_lock(&tu->ioctl_lock);
 		if (tu->tread) {
 			if (copy_to_user(buffer, &tu->tqueue[qhead],
 					 sizeof(struct snd_timer_tread)))
@@ -1967,6 +1996,7 @@
 					 sizeof(struct snd_timer_read)))
 				err = -EFAULT;
 		}
+		mutex_unlock(&tu->ioctl_lock);
 
 		spin_lock_irq(&tu->qlock);
 		if (err < 0)
diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h
index 03ed352..d73c12b 100644
--- a/sound/firewire/fireworks/fireworks.h
+++ b/sound/firewire/fireworks/fireworks.h
@@ -108,7 +108,6 @@
 	u8 *resp_buf;
 	u8 *pull_ptr;
 	u8 *push_ptr;
-	unsigned int resp_queues;
 };
 
 int snd_efw_transaction_cmd(struct fw_unit *unit,
diff --git a/sound/firewire/fireworks/fireworks_hwdep.c b/sound/firewire/fireworks/fireworks_hwdep.c
index 33df865..2e1d9a2 100644
--- a/sound/firewire/fireworks/fireworks_hwdep.c
+++ b/sound/firewire/fireworks/fireworks_hwdep.c
@@ -25,6 +25,7 @@
 {
 	unsigned int length, till_end, type;
 	struct snd_efw_transaction *t;
+	u8 *pull_ptr;
 	long count = 0;
 
 	if (remained < sizeof(type) + sizeof(struct snd_efw_transaction))
@@ -38,8 +39,17 @@
 	buf += sizeof(type);
 
 	/* write into buffer as many responses as possible */
-	while (efw->resp_queues > 0) {
-		t = (struct snd_efw_transaction *)(efw->pull_ptr);
+	spin_lock_irq(&efw->lock);
+
+	/*
+	 * When another task reaches here during this task's access to user
+	 * space, it picks up current position in buffer and can read the same
+	 * series of responses.
+	 */
+	pull_ptr = efw->pull_ptr;
+
+	while (efw->push_ptr != pull_ptr) {
+		t = (struct snd_efw_transaction *)(pull_ptr);
 		length = be32_to_cpu(t->length) * sizeof(__be32);
 
 		/* confirm enough space for this response */
@@ -49,26 +59,39 @@
 		/* copy from ring buffer to user buffer */
 		while (length > 0) {
 			till_end = snd_efw_resp_buf_size -
-				(unsigned int)(efw->pull_ptr - efw->resp_buf);
+				(unsigned int)(pull_ptr - efw->resp_buf);
 			till_end = min_t(unsigned int, length, till_end);
 
-			if (copy_to_user(buf, efw->pull_ptr, till_end))
+			spin_unlock_irq(&efw->lock);
+
+			if (copy_to_user(buf, pull_ptr, till_end))
 				return -EFAULT;
 
-			efw->pull_ptr += till_end;
-			if (efw->pull_ptr >= efw->resp_buf +
-					     snd_efw_resp_buf_size)
-				efw->pull_ptr -= snd_efw_resp_buf_size;
+			spin_lock_irq(&efw->lock);
+
+			pull_ptr += till_end;
+			if (pull_ptr >= efw->resp_buf + snd_efw_resp_buf_size)
+				pull_ptr -= snd_efw_resp_buf_size;
 
 			length -= till_end;
 			buf += till_end;
 			count += till_end;
 			remained -= till_end;
 		}
-
-		efw->resp_queues--;
 	}
 
+	/*
+	 * All of tasks can read from the buffer nearly simultaneously, but the
+	 * last position for each task is different depending on the length of
+	 * given buffer. Here, for simplicity, a position of buffer is set by
+	 * the latest task. It's better for a listening application to allow one
+	 * thread to read from the buffer. Unless, each task can read different
+	 * sequence of responses depending on variation of buffer length.
+	 */
+	efw->pull_ptr = pull_ptr;
+
+	spin_unlock_irq(&efw->lock);
+
 	return count;
 }
 
@@ -76,14 +99,17 @@
 hwdep_read_locked(struct snd_efw *efw, char __user *buf, long count,
 		  loff_t *offset)
 {
-	union snd_firewire_event event;
+	union snd_firewire_event event = {
+		.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS,
+	};
 
-	memset(&event, 0, sizeof(event));
+	spin_lock_irq(&efw->lock);
 
-	event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS;
 	event.lock_status.status = (efw->dev_lock_count > 0);
 	efw->dev_lock_changed = false;
 
+	spin_unlock_irq(&efw->lock);
+
 	count = min_t(long, count, sizeof(event.lock_status));
 
 	if (copy_to_user(buf, &event, count))
@@ -98,10 +124,15 @@
 {
 	struct snd_efw *efw = hwdep->private_data;
 	DEFINE_WAIT(wait);
+	bool dev_lock_changed;
+	bool queued;
 
 	spin_lock_irq(&efw->lock);
 
-	while ((!efw->dev_lock_changed) && (efw->resp_queues == 0)) {
+	dev_lock_changed = efw->dev_lock_changed;
+	queued = efw->push_ptr != efw->pull_ptr;
+
+	while (!dev_lock_changed && !queued) {
 		prepare_to_wait(&efw->hwdep_wait, &wait, TASK_INTERRUPTIBLE);
 		spin_unlock_irq(&efw->lock);
 		schedule();
@@ -109,15 +140,17 @@
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 		spin_lock_irq(&efw->lock);
+		dev_lock_changed = efw->dev_lock_changed;
+		queued = efw->push_ptr != efw->pull_ptr;
 	}
 
-	if (efw->dev_lock_changed)
-		count = hwdep_read_locked(efw, buf, count, offset);
-	else if (efw->resp_queues > 0)
-		count = hwdep_read_resp_buf(efw, buf, count, offset);
-
 	spin_unlock_irq(&efw->lock);
 
+	if (dev_lock_changed)
+		count = hwdep_read_locked(efw, buf, count, offset);
+	else if (queued)
+		count = hwdep_read_resp_buf(efw, buf, count, offset);
+
 	return count;
 }
 
@@ -160,7 +193,7 @@
 	poll_wait(file, &efw->hwdep_wait, wait);
 
 	spin_lock_irq(&efw->lock);
-	if (efw->dev_lock_changed || (efw->resp_queues > 0))
+	if (efw->dev_lock_changed || efw->pull_ptr != efw->push_ptr)
 		events = POLLIN | POLLRDNORM;
 	else
 		events = 0;
diff --git a/sound/firewire/fireworks/fireworks_proc.c b/sound/firewire/fireworks/fireworks_proc.c
index 0639dcb..beb0a0f 100644
--- a/sound/firewire/fireworks/fireworks_proc.c
+++ b/sound/firewire/fireworks/fireworks_proc.c
@@ -188,8 +188,8 @@
 	else
 		consumed = (unsigned int)(efw->push_ptr - efw->pull_ptr);
 
-	snd_iprintf(buffer, "%d %d/%d\n",
-		    efw->resp_queues, consumed, snd_efw_resp_buf_size);
+	snd_iprintf(buffer, "%d/%d\n",
+		    consumed, snd_efw_resp_buf_size);
 }
 
 static void
diff --git a/sound/firewire/fireworks/fireworks_transaction.c b/sound/firewire/fireworks/fireworks_transaction.c
index f550808..36a08ba 100644
--- a/sound/firewire/fireworks/fireworks_transaction.c
+++ b/sound/firewire/fireworks/fireworks_transaction.c
@@ -121,11 +121,11 @@
 	size_t capacity, till_end;
 	struct snd_efw_transaction *t;
 
-	spin_lock_irq(&efw->lock);
-
 	t = (struct snd_efw_transaction *)data;
 	length = min_t(size_t, be32_to_cpu(t->length) * sizeof(u32), length);
 
+	spin_lock_irq(&efw->lock);
+
 	if (efw->push_ptr < efw->pull_ptr)
 		capacity = (unsigned int)(efw->pull_ptr - efw->push_ptr);
 	else
@@ -155,7 +155,6 @@
 	}
 
 	/* for hwdep */
-	efw->resp_queues++;
 	wake_up(&efw->hwdep_wait);
 
 	*rcode = RCODE_COMPLETE;
diff --git a/sound/firewire/tascam/tascam-hwdep.c b/sound/firewire/tascam/tascam-hwdep.c
index 131267c..106406c 100644
--- a/sound/firewire/tascam/tascam-hwdep.c
+++ b/sound/firewire/tascam/tascam-hwdep.c
@@ -16,31 +16,14 @@
 
 #include "tascam.h"
 
-static long hwdep_read_locked(struct snd_tscm *tscm, char __user *buf,
-			      long count)
-{
-	union snd_firewire_event event;
-
-	memset(&event, 0, sizeof(event));
-
-	event.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS;
-	event.lock_status.status = (tscm->dev_lock_count > 0);
-	tscm->dev_lock_changed = false;
-
-	count = min_t(long, count, sizeof(event.lock_status));
-
-	if (copy_to_user(buf, &event, count))
-		return -EFAULT;
-
-	return count;
-}
-
 static long hwdep_read(struct snd_hwdep *hwdep, char __user *buf, long count,
 		       loff_t *offset)
 {
 	struct snd_tscm *tscm = hwdep->private_data;
 	DEFINE_WAIT(wait);
-	union snd_firewire_event event;
+	union snd_firewire_event event = {
+		.lock_status.type = SNDRV_FIREWIRE_EVENT_LOCK_STATUS,
+	};
 
 	spin_lock_irq(&tscm->lock);
 
@@ -54,10 +37,16 @@
 		spin_lock_irq(&tscm->lock);
 	}
 
-	memset(&event, 0, sizeof(event));
-	count = hwdep_read_locked(tscm, buf, count);
+	event.lock_status.status = (tscm->dev_lock_count > 0);
+	tscm->dev_lock_changed = false;
+
 	spin_unlock_irq(&tscm->lock);
 
+	count = min_t(long, count, sizeof(event.lock_status));
+
+	if (copy_to_user(buf, &event, count))
+		return -EFAULT;
+
 	return count;
 }
 
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 89dacf9..160c7f7 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -906,20 +906,23 @@
 	struct snd_card *card = dev_get_drvdata(dev);
 	struct azx *chip;
 	struct hda_intel *hda;
+	struct hdac_bus *bus;
 
 	if (!card)
 		return 0;
 
 	chip = card->private_data;
 	hda = container_of(chip, struct hda_intel, chip);
+	bus = azx_bus(chip);
 	if (chip->disabled || hda->init_failed || !chip->running)
 		return 0;
 
-	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL
-		&& hda->need_i915_power) {
-		snd_hdac_display_power(azx_bus(chip), true);
-		snd_hdac_i915_set_bclk(azx_bus(chip));
+	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
+		snd_hdac_display_power(bus, true);
+		if (hda->need_i915_power)
+			snd_hdac_i915_set_bclk(bus);
 	}
+
 	if (chip->msi)
 		if (pci_enable_msi(pci) < 0)
 			chip->msi = 0;
@@ -929,6 +932,11 @@
 
 	hda_intel_init_chip(chip, true);
 
+	/* power down again for link-controlled chips */
+	if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+	    !hda->need_i915_power)
+		snd_hdac_display_power(bus, false);
+
 	snd_power_change_state(card, SNDRV_CTL_POWER_D0);
 
 	trace_azx_resume(chip);
@@ -1008,6 +1016,7 @@
 
 	chip = card->private_data;
 	hda = container_of(chip, struct hda_intel, chip);
+	bus = azx_bus(chip);
 	if (chip->disabled || hda->init_failed)
 		return 0;
 
@@ -1015,15 +1024,9 @@
 		return 0;
 
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL) {
-		bus = azx_bus(chip);
-		if (hda->need_i915_power) {
-			snd_hdac_display_power(bus, true);
+		snd_hdac_display_power(bus, true);
+		if (hda->need_i915_power)
 			snd_hdac_i915_set_bclk(bus);
-		} else {
-			/* toggle codec wakeup bit for STATESTS read */
-			snd_hdac_set_codec_wakeup(bus, true);
-			snd_hdac_set_codec_wakeup(bus, false);
-		}
 	}
 
 	/* Read STATESTS before controller reset */
@@ -1043,6 +1046,11 @@
 	azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) &
 			~STATESTS_INT_MASK);
 
+	/* power down again for link-controlled chips */
+	if ((chip->driver_caps & AZX_DCAPS_I915_POWERWELL) &&
+	    !hda->need_i915_power)
+		snd_hdac_display_power(bus, false);
+
 	trace_azx_runtime_resume(chip);
 	return 0;
 }
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 574b1b4..575cefd 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4828,7 +4828,7 @@
 	ALC293_FIXUP_DELL1_MIC_NO_PRESENCE,
 	ALC292_FIXUP_TPT440_DOCK,
 	ALC292_FIXUP_TPT440,
-	ALC283_FIXUP_BXBT2807_MIC,
+	ALC283_FIXUP_HEADSET_MIC,
 	ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED,
 	ALC282_FIXUP_ASPIRE_V5_PINS,
 	ALC280_FIXUP_HP_GPIO4,
@@ -4855,6 +4855,7 @@
 	ALC221_FIXUP_HP_FRONT_MIC,
 	ALC292_FIXUP_TPT460,
 	ALC298_FIXUP_SPK_VOLUME,
+	ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -5321,7 +5322,7 @@
 		.chained = true,
 		.chain_id = ALC292_FIXUP_TPT440_DOCK,
 	},
-	[ALC283_FIXUP_BXBT2807_MIC] = {
+	[ALC283_FIXUP_HEADSET_MIC] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
 			{ 0x19, 0x04a110f0 },
@@ -5516,6 +5517,15 @@
 		.chained = true,
 		.chain_id = ALC298_FIXUP_DELL1_MIC_NO_PRESENCE,
 	},
+	[ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x1b, 0x90170151 },
+			{ }
+		},
+		.chained = true,
+		.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
+	},
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -5560,6 +5570,7 @@
 	SND_PCI_QUIRK(0x1028, 0x06df, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
 	SND_PCI_QUIRK(0x1028, 0x06e0, "Dell", ALC293_FIXUP_DISABLE_AAMIX_MULTIJACK),
 	SND_PCI_QUIRK(0x1028, 0x0704, "Dell XPS 13 9350", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
+	SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),
 	SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE),
 	SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME),
@@ -5651,7 +5662,8 @@
 	SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN),
 	SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
 	SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
-	SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC),
+	SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
+	SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE),
@@ -5894,6 +5906,10 @@
 		{0x12, 0x90a60170},
 		{0x14, 0x90170120},
 		{0x21, 0x02211030}),
+	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell Inspiron 5468", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+		{0x12, 0x90a60180},
+		{0x14, 0x90170120},
+		{0x21, 0x02211030}),
 	SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
 		ALC256_STANDARD_PINS),
 	SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index 54c09ac..16e459a 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c
@@ -299,8 +299,9 @@
 	clk_enable(ssc_p->ssc->clk);
 	ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk);
 
-	/* Reset the SSC to keep it at a clean status */
-	ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST));
+	/* Reset the SSC unless initialized to keep it in a clean state */
+	if (!ssc_p->initialized)
+		ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST));
 
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
 		dir = 0;
diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c
index e5527bc..bcf1834 100644
--- a/sound/soc/codecs/da7213.c
+++ b/sound/soc/codecs/da7213.c
@@ -1247,8 +1247,8 @@
 		return -EINVAL;
 	}
 
-	/* By default only 32 BCLK per WCLK is supported */
-	dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_32;
+	/* By default only 64 BCLK per WCLK is supported */
+	dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_64;
 
 	snd_soc_write(codec, DA7213_DAI_CLK_MODE, dai_clk_mode);
 	snd_soc_update_bits(codec, DA7213_DAI_CTRL, DA7213_DAI_FORMAT_MASK,
diff --git a/sound/soc/codecs/max98371.c b/sound/soc/codecs/max98371.c
index cf0a39b..02352ed 100644
--- a/sound/soc/codecs/max98371.c
+++ b/sound/soc/codecs/max98371.c
@@ -412,6 +412,7 @@
 
 static const struct i2c_device_id max98371_i2c_id[] = {
 	{ "max98371", 0 },
+	{ }
 };
 
 MODULE_DEVICE_TABLE(i2c, max98371_i2c_id);
diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c
index 5c9707a..2e59a85 100644
--- a/sound/soc/codecs/nau8825.c
+++ b/sound/soc/codecs/nau8825.c
@@ -212,31 +212,6 @@
 	0xfa2f, 0xfaea, 0xfba5, 0xfc60, 0xfd1a, 0xfdd4, 0xfe8e, 0xff47
 };
 
-static struct snd_soc_dai *nau8825_get_codec_dai(struct nau8825 *nau8825)
-{
-	struct snd_soc_codec *codec = snd_soc_dapm_to_codec(nau8825->dapm);
-	struct snd_soc_component *component = &codec->component;
-	struct snd_soc_dai *codec_dai, *_dai;
-
-	list_for_each_entry_safe(codec_dai, _dai, &component->dai_list, list) {
-		if (!strncmp(codec_dai->name, NUVOTON_CODEC_DAI,
-			strlen(NUVOTON_CODEC_DAI)))
-			return codec_dai;
-	}
-	return NULL;
-}
-
-static bool nau8825_dai_is_active(struct nau8825 *nau8825)
-{
-	struct snd_soc_dai *codec_dai = nau8825_get_codec_dai(nau8825);
-
-	if (codec_dai) {
-		if (codec_dai->playback_active || codec_dai->capture_active)
-			return true;
-	}
-	return false;
-}
-
 /**
  * nau8825_sema_acquire - acquire the semaphore of nau88l25
  * @nau8825:  component to register the codec private data with
@@ -250,19 +225,26 @@
  * Acquires the semaphore without jiffies. If no more tasks are allowed
  * to acquire the semaphore, calling this function will put the task to
  * sleep until the semaphore is released.
- * It returns if the semaphore was acquired.
+ * If the semaphore is not released within the specified number of jiffies,
+ * this function returns -ETIME.
+ * If the sleep is interrupted by a signal, this function will return -EINTR.
+ * It returns 0 if the semaphore was acquired successfully.
  */
-static void nau8825_sema_acquire(struct nau8825 *nau8825, long timeout)
+static int nau8825_sema_acquire(struct nau8825 *nau8825, long timeout)
 {
 	int ret;
 
-	if (timeout)
+	if (timeout) {
 		ret = down_timeout(&nau8825->xtalk_sem, timeout);
-	else
+		if (ret < 0)
+			dev_warn(nau8825->dev, "Acquire semaphone timeout\n");
+	} else {
 		ret = down_interruptible(&nau8825->xtalk_sem);
+		if (ret < 0)
+			dev_warn(nau8825->dev, "Acquire semaphone fail\n");
+	}
 
-	if (ret < 0)
-		dev_warn(nau8825->dev, "Acquire semaphone fail\n");
+	return ret;
 }
 
 /**
@@ -1205,6 +1187,8 @@
 	struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
 	unsigned int val_len = 0;
 
+	nau8825_sema_acquire(nau8825, 2 * HZ);
+
 	switch (params_width(params)) {
 	case 16:
 		val_len |= NAU8825_I2S_DL_16;
@@ -1225,6 +1209,9 @@
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL1,
 		NAU8825_I2S_DL_MASK, val_len);
 
+	/* Release the semaphone. */
+	nau8825_sema_release(nau8825);
+
 	return 0;
 }
 
@@ -1234,6 +1221,8 @@
 	struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
 	unsigned int ctrl1_val = 0, ctrl2_val = 0;
 
+	nau8825_sema_acquire(nau8825, 2 * HZ);
+
 	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
 	case SND_SOC_DAIFMT_CBM_CFM:
 		ctrl2_val |= NAU8825_I2S_MS_MASTER;
@@ -1282,6 +1271,9 @@
 	regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2,
 		NAU8825_I2S_MS_MASK, ctrl2_val);
 
+	/* Release the semaphone. */
+	nau8825_sema_release(nau8825);
+
 	return 0;
 }
 
@@ -1611,8 +1603,11 @@
 					 * cess and restore changes if process
 					 * is ongoing when ejection.
 					 */
+					int ret;
 					nau8825->xtalk_protect = true;
-					nau8825_sema_acquire(nau8825, 0);
+					ret = nau8825_sema_acquire(nau8825, 0);
+					if (ret < 0)
+						nau8825->xtalk_protect = false;
 				}
 				/* Startup cross talk detection process */
 				nau8825->xtalk_state = NAU8825_XTALK_PREPARE;
@@ -2238,23 +2233,14 @@
 static int __maybe_unused nau8825_resume(struct snd_soc_codec *codec)
 {
 	struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec);
+	int ret;
 
 	regcache_cache_only(nau8825->regmap, false);
 	regcache_sync(nau8825->regmap);
-	if (nau8825_is_jack_inserted(nau8825->regmap)) {
-		/* If the jack is inserted, we need to check whether the play-
-		 * back is active before suspend. If active, the driver has to
-		 * raise the protection for cross talk function to avoid the
-		 * playback recovers before cross talk process finish. Other-
-		 * wise, the playback will be interfered by cross talk func-
-		 * tion. It is better to apply hardware related parameters
-		 * before starting playback or record.
-		 */
-		if (nau8825_dai_is_active(nau8825)) {
-			nau8825->xtalk_protect = true;
-			nau8825_sema_acquire(nau8825, 0);
-		}
-	}
+	nau8825->xtalk_protect = true;
+	ret = nau8825_sema_acquire(nau8825, 0);
+	if (ret < 0)
+		nau8825->xtalk_protect = false;
 	enable_irq(nau8825->irq);
 
 	return 0;
diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c
index a67ea10..f266439 100644
--- a/sound/soc/codecs/wm2000.c
+++ b/sound/soc/codecs/wm2000.c
@@ -581,7 +581,7 @@
 	if (anc_transitions[i].dest == ANC_OFF)
 		clk_disable_unprepare(wm2000->mclk);
 
-	return ret;
+	return 0;
 }
 
 static int wm2000_anc_set_mode(struct wm2000_priv *wm2000)
diff --git a/sound/soc/generic/Makefile b/sound/soc/generic/Makefile
index 45602ca..2d53c8d 100644
--- a/sound/soc/generic/Makefile
+++ b/sound/soc/generic/Makefile
@@ -1,5 +1,5 @@
-obj-$(CONFIG_SND_SIMPLE_CARD_UTILS) := simple-card-utils.o
-
+snd-soc-simple-card-utils-objs	:= simple-card-utils.o
 snd-soc-simple-card-objs	:= simple-card.o
 
-obj-$(CONFIG_SND_SIMPLE_CARD)	+= snd-soc-simple-card.o
+obj-$(CONFIG_SND_SIMPLE_CARD_UTILS)	+= snd-soc-simple-card-utils.o
+obj-$(CONFIG_SND_SIMPLE_CARD)		+= snd-soc-simple-card.o
diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c
index d89a9a1..9599de6 100644
--- a/sound/soc/generic/simple-card-utils.c
+++ b/sound/soc/generic/simple-card-utils.c
@@ -7,6 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#include <linux/module.h>
 #include <linux/of.h>
 #include <sound/simple_card_utils.h>
 
@@ -95,3 +96,8 @@
 	return 0;
 }
 EXPORT_SYMBOL_GPL(asoc_simple_card_parse_card_name);
+
+/* Module information */
+MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
+MODULE_DESCRIPTION("ALSA SoC Simple Card Utils");
+MODULE_LICENSE("GPL v2");
diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c
index 25fcb79..ddcb52a 100644
--- a/sound/soc/intel/skylake/skl-sst-utils.c
+++ b/sound/soc/intel/skylake/skl-sst-utils.c
@@ -123,6 +123,11 @@
 
 	uuid_mod = (uuid_le *)uuid;
 
+	if (list_empty(&ctx->uuid_list)) {
+		dev_err(ctx->dev, "Module list is empty\n");
+		return -EINVAL;
+	}
+
 	list_for_each_entry(module, &ctx->uuid_list, list) {
 		if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) {
 			dfw_config->module_id = module->id;
diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c
index cd59536..e3e7641 100644
--- a/sound/soc/intel/skylake/skl.c
+++ b/sound/soc/intel/skylake/skl.c
@@ -672,8 +672,10 @@
 
 	skl->nhlt = skl_nhlt_init(bus->dev);
 
-	if (skl->nhlt == NULL)
+	if (skl->nhlt == NULL) {
+		err = -ENODEV;
 		goto out_free;
+	}
 
 	skl_nhlt_update_topology_bin(skl);
 
diff --git a/sound/soc/omap/omap-abe-twl6040.c b/sound/soc/omap/omap-abe-twl6040.c
index 0843a68..f61b3b5 100644
--- a/sound/soc/omap/omap-abe-twl6040.c
+++ b/sound/soc/omap/omap-abe-twl6040.c
@@ -38,10 +38,10 @@
 struct abe_twl6040 {
 	int	jack_detection;	/* board can detect jack events */
 	int	mclk_freq;	/* MCLK frequency speed for twl6040 */
-
-	struct platform_device *dmic_codec_dev;
 };
 
+struct platform_device *dmic_codec_dev;
+
 static int omap_abe_hw_params(struct snd_pcm_substream *substream,
 	struct snd_pcm_hw_params *params)
 {
@@ -258,8 +258,6 @@
 	if (priv == NULL)
 		return -ENOMEM;
 
-	priv->dmic_codec_dev = ERR_PTR(-EINVAL);
-
 	if (snd_soc_of_parse_card_name(card, "ti,model")) {
 		dev_err(&pdev->dev, "Card name is not provided\n");
 		return -ENODEV;
@@ -284,13 +282,6 @@
 		num_links = 2;
 		abe_twl6040_dai_links[1].cpu_of_node = dai_node;
 		abe_twl6040_dai_links[1].platform_of_node = dai_node;
-
-		priv->dmic_codec_dev = platform_device_register_simple(
-						"dmic-codec", -1, NULL, 0);
-		if (IS_ERR(priv->dmic_codec_dev)) {
-			dev_err(&pdev->dev, "Can't instantiate dmic-codec\n");
-			return PTR_ERR(priv->dmic_codec_dev);
-		}
 	} else {
 		num_links = 1;
 	}
@@ -299,16 +290,14 @@
 	of_property_read_u32(node, "ti,mclk-freq", &priv->mclk_freq);
 	if (!priv->mclk_freq) {
 		dev_err(&pdev->dev, "MCLK frequency not provided\n");
-		ret = -EINVAL;
-		goto err_unregister;
+		return -EINVAL;
 	}
 
 	card->fully_routed = 1;
 
 	if (!priv->mclk_freq) {
 		dev_err(&pdev->dev, "MCLK frequency missing\n");
-		ret = -ENODEV;
-		goto err_unregister;
+		return -ENODEV;
 	}
 
 	card->dai_link = abe_twl6040_dai_links;
@@ -317,17 +306,9 @@
 	snd_soc_card_set_drvdata(card, priv);
 
 	ret = snd_soc_register_card(card);
-	if (ret) {
+	if (ret)
 		dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n",
 			ret);
-		goto err_unregister;
-	}
-
-	return 0;
-
-err_unregister:
-	if (!IS_ERR(priv->dmic_codec_dev))
-		platform_device_unregister(priv->dmic_codec_dev);
 
 	return ret;
 }
@@ -335,13 +316,9 @@
 static int omap_abe_remove(struct platform_device *pdev)
 {
 	struct snd_soc_card *card = platform_get_drvdata(pdev);
-	struct abe_twl6040 *priv = snd_soc_card_get_drvdata(card);
 
 	snd_soc_unregister_card(card);
 
-	if (!IS_ERR(priv->dmic_codec_dev))
-		platform_device_unregister(priv->dmic_codec_dev);
-
 	return 0;
 }
 
@@ -361,7 +338,33 @@
 	.remove = omap_abe_remove,
 };
 
-module_platform_driver(omap_abe_driver);
+static int __init omap_abe_init(void)
+{
+	int ret;
+
+	dmic_codec_dev = platform_device_register_simple("dmic-codec", -1, NULL,
+							 0);
+	if (IS_ERR(dmic_codec_dev)) {
+		pr_err("%s: dmic-codec device registration failed\n", __func__);
+		return PTR_ERR(dmic_codec_dev);
+	}
+
+	ret = platform_driver_register(&omap_abe_driver);
+	if (ret) {
+		pr_err("%s: platform driver registration failed\n", __func__);
+		platform_device_unregister(dmic_codec_dev);
+	}
+
+	return ret;
+}
+module_init(omap_abe_init);
+
+static void __exit omap_abe_exit(void)
+{
+	platform_driver_unregister(&omap_abe_driver);
+	platform_device_unregister(dmic_codec_dev);
+}
+module_exit(omap_abe_exit);
 
 MODULE_AUTHOR("Misael Lopez Cruz <misael.lopez@ti.com>");
 MODULE_DESCRIPTION("ALSA SoC for OMAP boards with ABE and twl6040 codec");
diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c
index e7cdc51..64609c7 100644
--- a/sound/soc/omap/omap-mcpdm.c
+++ b/sound/soc/omap/omap-mcpdm.c
@@ -31,7 +31,6 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/irq.h>
-#include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 #include <linux/of_device.h>
@@ -55,7 +54,6 @@
 	unsigned long phys_base;
 	void __iomem *io_base;
 	int irq;
-	struct clk *pdmclk;
 
 	struct mutex mutex;
 
@@ -390,15 +388,14 @@
 	struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
 	int ret;
 
-	clk_prepare_enable(mcpdm->pdmclk);
 	pm_runtime_enable(mcpdm->dev);
 
 	/* Disable lines while request is ongoing */
 	pm_runtime_get_sync(mcpdm->dev);
 	omap_mcpdm_write(mcpdm, MCPDM_REG_CTRL, 0x00);
 
-	ret = devm_request_irq(mcpdm->dev, mcpdm->irq, omap_mcpdm_irq_handler,
-				0, "McPDM", (void *)mcpdm);
+	ret = request_irq(mcpdm->irq, omap_mcpdm_irq_handler, 0, "McPDM",
+			  (void *)mcpdm);
 
 	pm_runtime_put_sync(mcpdm->dev);
 
@@ -423,9 +420,9 @@
 {
 	struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
 
+	free_irq(mcpdm->irq, (void *)mcpdm);
 	pm_runtime_disable(mcpdm->dev);
 
-	clk_disable_unprepare(mcpdm->pdmclk);
 	return 0;
 }
 
@@ -445,8 +442,6 @@
 		mcpdm->pm_active_count++;
 	}
 
-	clk_disable_unprepare(mcpdm->pdmclk);
-
 	return 0;
 }
 
@@ -454,8 +449,6 @@
 {
 	struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai);
 
-	clk_prepare_enable(mcpdm->pdmclk);
-
 	if (mcpdm->pm_active_count) {
 		while (mcpdm->pm_active_count--)
 			pm_runtime_get_sync(mcpdm->dev);
@@ -549,15 +542,6 @@
 
 	mcpdm->dev = &pdev->dev;
 
-	mcpdm->pdmclk = devm_clk_get(&pdev->dev, "pdmclk");
-	if (IS_ERR(mcpdm->pdmclk)) {
-		if (PTR_ERR(mcpdm->pdmclk) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
-		dev_warn(&pdev->dev, "Error getting pdmclk (%ld)!\n",
-			 PTR_ERR(mcpdm->pdmclk));
-		mcpdm->pdmclk = NULL;
-	}
-
 	ret =  devm_snd_soc_register_component(&pdev->dev,
 					       &omap_mcpdm_component,
 					       &omap_mcpdm_dai, 1);
diff --git a/sound/soc/samsung/s3c24xx_uda134x.c b/sound/soc/samsung/s3c24xx_uda134x.c
index 50849e1..92e88bc 100644
--- a/sound/soc/samsung/s3c24xx_uda134x.c
+++ b/sound/soc/samsung/s3c24xx_uda134x.c
@@ -58,10 +58,12 @@
 
 static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
 {
-	int ret = 0;
+	struct snd_soc_pcm_runtime *rtd = substream->private_data;
+	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
 #ifdef ENFORCE_RATES
 	struct snd_pcm_runtime *runtime = substream->runtime;
 #endif
+	int ret = 0;
 
 	mutex_lock(&clk_lock);
 	pr_debug("%s %d\n", __func__, clk_users);
@@ -71,8 +73,7 @@
 			printk(KERN_ERR "%s cannot get xtal\n", __func__);
 			ret = PTR_ERR(xtal);
 		} else {
-			pclk = clk_get(&s3c24xx_uda134x_snd_device->dev,
-				       "pclk");
+			pclk = clk_get(cpu_dai->dev, "iis");
 			if (IS_ERR(pclk)) {
 				printk(KERN_ERR "%s cannot get pclk\n",
 				       __func__);
diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c
index e39f916..969a516 100644
--- a/sound/soc/sh/rcar/src.c
+++ b/sound/soc/sh/rcar/src.c
@@ -226,8 +226,12 @@
 	ifscr = 0;
 	fsrate = 0;
 	if (fin != fout) {
+		u64 n;
+
 		ifscr = 1;
-		fsrate = 0x0400000 / fout * fin;
+		n = (u64)0x0400000 * fin;
+		do_div(n, fout);
+		fsrate = n;
 	}
 
 	/*
diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index d2df46c..bf7b52f 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c
@@ -121,7 +121,7 @@
 
 		dpcm_be_disconnect(fe, stream);
 		fe->dpcm[stream].runtime = NULL;
-		goto fe_err;
+		goto path_err;
 	}
 
 	dpcm_clear_pending_state(fe, stream);
@@ -136,6 +136,8 @@
 
 	return 0;
 
+path_err:
+	dpcm_path_put(&list);
 fe_err:
 	if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown)
 		fe->dai_link->compr_ops->shutdown(cstream);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 16369ca..4afa8db 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1056,7 +1056,7 @@
 	if (!rtd->platform) {
 		dev_err(card->dev, "ASoC: platform %s not registered\n",
 			dai_link->platform_name);
-		return -EPROBE_DEFER;
+		goto _err_defer;
 	}
 
 	soc_add_pcm_runtime(card, rtd);
@@ -2083,14 +2083,13 @@
 	/* remove auxiliary devices */
 	soc_remove_aux_devices(card);
 
+	snd_soc_dapm_free(&card->dapm);
 	soc_cleanup_card_debugfs(card);
 
 	/* remove the card */
 	if (card->remove)
 		card->remove(card);
 
-	snd_soc_dapm_free(&card->dapm);
-
 	snd_card_free(card->snd_card);
 	return 0;
 
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index 8698c26..d908ff8 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3493,6 +3493,7 @@
 	const struct snd_soc_pcm_stream *config = w->params + w->params_select;
 	struct snd_pcm_substream substream;
 	struct snd_pcm_hw_params *params = NULL;
+	struct snd_pcm_runtime *runtime = NULL;
 	u64 fmt;
 	int ret;
 
@@ -3541,6 +3542,14 @@
 
 	memset(&substream, 0, sizeof(substream));
 
+	/* Allocate a dummy snd_pcm_runtime for startup() and other ops() */
+	runtime = kzalloc(sizeof(*runtime), GFP_KERNEL);
+	if (!runtime) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	substream.runtime = runtime;
+
 	switch (event) {
 	case SND_SOC_DAPM_PRE_PMU:
 		substream.stream = SNDRV_PCM_STREAM_CAPTURE;
@@ -3606,6 +3615,7 @@
 	}
 
 out:
+	kfree(runtime);
 	kfree(params);
 	return ret;
 }
diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c
index 204cc07..41aa335 100644
--- a/sound/usb/line6/pcm.c
+++ b/sound/usb/line6/pcm.c
@@ -55,7 +55,6 @@
 		err = line6_pcm_acquire(line6pcm, LINE6_STREAM_IMPULSE);
 		if (err < 0) {
 			line6pcm->impulse_volume = 0;
-			line6_pcm_release(line6pcm, LINE6_STREAM_IMPULSE);
 			return err;
 		}
 	} else {
@@ -211,7 +210,9 @@
 	spin_lock_irqsave(&pstr->lock, flags);
 	clear_bit(type, &pstr->running);
 	if (!pstr->running) {
+		spin_unlock_irqrestore(&pstr->lock, flags);
 		line6_unlink_audio_urbs(line6pcm, pstr);
+		spin_lock_irqsave(&pstr->lock, flags);
 		if (direction == SNDRV_PCM_STREAM_CAPTURE) {
 			line6pcm->prev_fbuf = NULL;
 			line6pcm->prev_fsize = 0;
diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c
index daf81d16..45dd348 100644
--- a/sound/usb/line6/pod.c
+++ b/sound/usb/line6/pod.c
@@ -244,8 +244,8 @@
 static ssize_t serial_number_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
-	struct usb_interface *interface = to_usb_interface(dev);
-	struct usb_line6_pod *pod = usb_get_intfdata(interface);
+	struct snd_card *card = dev_to_snd_card(dev);
+	struct usb_line6_pod *pod = card->private_data;
 
 	return sprintf(buf, "%u\n", pod->serial_number);
 }
@@ -256,8 +256,8 @@
 static ssize_t firmware_version_show(struct device *dev,
 				     struct device_attribute *attr, char *buf)
 {
-	struct usb_interface *interface = to_usb_interface(dev);
-	struct usb_line6_pod *pod = usb_get_intfdata(interface);
+	struct snd_card *card = dev_to_snd_card(dev);
+	struct usb_line6_pod *pod = card->private_data;
 
 	return sprintf(buf, "%d.%02d\n", pod->firmware_version / 100,
 		       pod->firmware_version % 100);
@@ -269,8 +269,8 @@
 static ssize_t device_id_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
-	struct usb_interface *interface = to_usb_interface(dev);
-	struct usb_line6_pod *pod = usb_get_intfdata(interface);
+	struct snd_card *card = dev_to_snd_card(dev);
+	struct usb_line6_pod *pod = card->private_data;
 
 	return sprintf(buf, "%d\n", pod->device_id);
 }
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 6adde45..152292e 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1128,6 +1128,7 @@
 {
 	/* devices which do not support reading the sample rate. */
 	switch (chip->usb_id) {
+	case USB_ID(0x041E, 0x4080): /* Creative Live Cam VF0610 */
 	case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
 	case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
 	case USB_ID(0x045E, 0x076E): /* MS Lifecam HD-5001 */
@@ -1138,7 +1139,9 @@
 	case USB_ID(0x047F, 0xAA05): /* Plantronics DA45 */
 	case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
 	case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */
+	case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */
 	case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */
+	case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */
 	case USB_ID(0x1de7, 0x0013): /* Phoenix Audio MT202exe */
 	case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */
 	case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */
diff --git a/tools/arch/alpha/include/uapi/asm/mman.h b/tools/arch/alpha/include/uapi/asm/mman.h
new file mode 100644
index 0000000..e38b64c
--- /dev/null
+++ b/tools/arch/alpha/include/uapi/asm/mman.h
@@ -0,0 +1,47 @@
+#ifndef TOOLS_ARCH_ALPHA_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ALPHA_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP	17
+#define MADV_DOFORK	11
+#define MADV_DONTDUMP   16
+#define MADV_DONTFORK	10
+#define MADV_DONTNEED	6
+#define MADV_FREE	8
+#define MADV_HUGEPAGE	14
+#define MADV_MERGEABLE   12
+#define MADV_NOHUGEPAGE	15
+#define MADV_NORMAL	0
+#define MADV_RANDOM	1
+#define MADV_REMOVE	9
+#define MADV_SEQUENTIAL	2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED	3
+#define MAP_ANONYMOUS	0x10
+#define MAP_DENYWRITE	0x02000
+#define MAP_EXECUTABLE	0x04000
+#define MAP_FILE	0
+#define MAP_FIXED	0x100
+#define MAP_GROWSDOWN	0x01000
+#define MAP_HUGETLB	0x100000
+#define MAP_LOCKED	0x08000
+#define MAP_NONBLOCK	0x40000
+#define MAP_NORESERVE	0x10000
+#define MAP_POPULATE	0x20000
+#define MAP_PRIVATE	0x02
+#define MAP_SHARED	0x01
+#define MAP_STACK	0x80000
+#define PROT_EXEC	0x4
+#define PROT_GROWSDOWN	0x01000000
+#define PROT_GROWSUP	0x02000000
+#define PROT_NONE	0x0
+#define PROT_READ	0x1
+#define PROT_SEM	0x8
+#define PROT_WRITE	0x2
+/* MADV_HWPOISON is undefined on alpha, fix it for perf */
+#define MADV_HWPOISON	100
+/* MADV_SOFT_OFFLINE is undefined on alpha, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on alpha, fix it for perf */
+#define MAP_32BIT	0
+/* MAP_UNINITIALIZED is undefined on alpha, fix it for perf */
+#define MAP_UNINITIALIZED	0
+#endif
diff --git a/tools/arch/arc/include/uapi/asm/mman.h b/tools/arch/arc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..aa3acd2
--- /dev/null
+++ b/tools/arch/arc/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arc, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/arm/include/uapi/asm/mman.h b/tools/arch/arm/include/uapi/asm/mman.h
new file mode 100644
index 0000000..478f699
--- /dev/null
+++ b/tools/arch/arm/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arm, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index f209ea1..3051f86 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -87,9 +87,11 @@
 /* Supported VGICv3 address types  */
 #define KVM_VGIC_V3_ADDR_TYPE_DIST	2
 #define KVM_VGIC_V3_ADDR_TYPE_REDIST	3
+#define KVM_VGIC_ITS_ADDR_TYPE		4
 
 #define KVM_VGIC_V3_DIST_SIZE		SZ_64K
 #define KVM_VGIC_V3_REDIST_SIZE		(2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE		(2 * SZ_64K)
 
 #define KVM_ARM_VCPU_POWER_OFF		0 /* CPU is started in OFF state */
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
diff --git a/tools/arch/arm64/include/uapi/asm/mman.h b/tools/arch/arm64/include/uapi/asm/mman.h
new file mode 100644
index 0000000..70fd311
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arm64, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/frv/include/uapi/asm/mman.h b/tools/arch/frv/include/uapi/asm/mman.h
new file mode 100644
index 0000000..5be78ac
--- /dev/null
+++ b/tools/arch/frv/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_FRV_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_FRV_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on frv, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/h8300/include/uapi/asm/mman.h b/tools/arch/h8300/include/uapi/asm/mman.h
new file mode 100644
index 0000000..9d9ac54
--- /dev/null
+++ b/tools/arch/h8300/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on h8300, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/hexagon/include/uapi/asm/mman.h b/tools/arch/hexagon/include/uapi/asm/mman.h
new file mode 100644
index 0000000..102f3fa
--- /dev/null
+++ b/tools/arch/hexagon/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on hexagon, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/ia64/include/uapi/asm/mman.h b/tools/arch/ia64/include/uapi/asm/mman.h
new file mode 100644
index 0000000..1d6e5ac
--- /dev/null
+++ b/tools/arch/ia64/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on ia64, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/m32r/include/uapi/asm/mman.h b/tools/arch/m32r/include/uapi/asm/mman.h
new file mode 100644
index 0000000..1c29635
--- /dev/null
+++ b/tools/arch/m32r/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_M32R_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_M32R_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on m32r, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/microblaze/include/uapi/asm/mman.h b/tools/arch/microblaze/include/uapi/asm/mman.h
new file mode 100644
index 0000000..005cd50
--- /dev/null
+++ b/tools/arch/microblaze/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on microblaze, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/mips/include/uapi/asm/mman.h b/tools/arch/mips/include/uapi/asm/mman.h
new file mode 100644
index 0000000..c020529
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/mman.h
@@ -0,0 +1,46 @@
+#ifndef TOOLS_ARCH_MIPS_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MIPS_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP	17
+#define MADV_DOFORK	11
+#define MADV_DONTDUMP	16
+#define MADV_DONTFORK	10
+#define MADV_DONTNEED	4
+#define MADV_FREE	8
+#define MADV_HUGEPAGE	14
+#define MADV_HWPOISON	 100
+#define MADV_MERGEABLE	 12
+#define MADV_NOHUGEPAGE 15
+#define MADV_NORMAL	0
+#define MADV_RANDOM	1
+#define MADV_REMOVE	9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED	3
+#define MAP_ANONYMOUS	0x0800
+#define MAP_DENYWRITE	0x2000
+#define MAP_EXECUTABLE	0x4000
+#define MAP_FILE	0
+#define MAP_FIXED	0x010
+#define MAP_GROWSDOWN	0x1000
+#define MAP_HUGETLB	0x80000
+#define MAP_LOCKED	0x8000
+#define MAP_NONBLOCK	0x20000
+#define MAP_NORESERVE	0x0400
+#define MAP_POPULATE	0x10000
+#define MAP_PRIVATE	0x002
+#define MAP_SHARED	0x001
+#define MAP_STACK	0x40000
+#define PROT_EXEC	0x04
+#define PROT_GROWSDOWN	0x01000000
+#define PROT_GROWSUP	0x02000000
+#define PROT_NONE	0x00
+#define PROT_READ	0x01
+#define PROT_SEM	0x10
+#define PROT_WRITE	0x02
+/* MADV_SOFT_OFFLINE is undefined on mips, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on mips, fix it for perf */
+#define MAP_32BIT	0
+/* MAP_UNINITIALIZED is undefined on mips, fix it for perf */
+#define MAP_UNINITIALIZED	0
+#endif
diff --git a/tools/arch/mn10300/include/uapi/asm/mman.h b/tools/arch/mn10300/include/uapi/asm/mman.h
new file mode 100644
index 0000000..c1ea36d
--- /dev/null
+++ b/tools/arch/mn10300/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_MN10300_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MN10300_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on mn10300, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..03d8d5b
--- /dev/null
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -0,0 +1,47 @@
+#ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP	70
+#define MADV_DOFORK	11
+#define MADV_DONTDUMP   69
+#define MADV_DONTFORK	10
+#define MADV_DONTNEED   4
+#define MADV_FREE	8
+#define MADV_HUGEPAGE	67
+#define MADV_MERGEABLE   65
+#define MADV_NOHUGEPAGE	68
+#define MADV_NORMAL     0
+#define MADV_RANDOM     1
+#define MADV_REMOVE	9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 66
+#define MADV_WILLNEED   3
+#define MAP_ANONYMOUS	0x10
+#define MAP_DENYWRITE	0x0800
+#define MAP_EXECUTABLE	0x1000
+#define MAP_FILE	0
+#define MAP_FIXED	0x04
+#define MAP_GROWSDOWN	0x8000
+#define MAP_HUGETLB	0x80000
+#define MAP_LOCKED	0x2000
+#define MAP_NONBLOCK	0x20000
+#define MAP_NORESERVE	0x4000
+#define MAP_POPULATE	0x10000
+#define MAP_PRIVATE	0x02
+#define MAP_SHARED	0x01
+#define MAP_STACK	0x40000
+#define PROT_EXEC	0x4
+#define PROT_GROWSDOWN	0x01000000
+#define PROT_GROWSUP	0x02000000
+#define PROT_NONE	0x0
+#define PROT_READ	0x1
+#define PROT_SEM	0x8
+#define PROT_WRITE	0x2
+/* MADV_HWPOISON is undefined on parisc, fix it for perf */
+#define MADV_HWPOISON	100
+/* MADV_SOFT_OFFLINE is undefined on parisc, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on parisc, fix it for perf */
+#define MAP_32BIT	0
+/* MAP_UNINITIALIZED is undefined on parisc, fix it for perf */
+#define MAP_UNINITIALIZED	0
+#endif
diff --git a/tools/arch/powerpc/include/uapi/asm/mman.h b/tools/arch/powerpc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..761db43
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/mman.h
@@ -0,0 +1,15 @@
+#ifndef TOOLS_ARCH_POWERPC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_POWERPC_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE	0x0800
+#define MAP_EXECUTABLE	0x1000
+#define MAP_GROWSDOWN	0x0100
+#define MAP_HUGETLB	0x40000
+#define MAP_LOCKED	0x80
+#define MAP_NONBLOCK	0x10000
+#define MAP_NORESERVE   0x40
+#define MAP_POPULATE	0x8000
+#define MAP_STACK	0x20000
+#include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on powerpc, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 3b8e99e..a2ffec4 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -93,6 +93,47 @@
 	__u64 fac_list[256];
 };
 
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT	2
+#define KVM_S390_VM_CPU_MACHINE_FEAT	3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS	1024
+#define KVM_S390_VM_CPU_FEAT_ESOP	0
+#define KVM_S390_VM_CPU_FEAT_SIEF2	1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO	2
+#define KVM_S390_VM_CPU_FEAT_SIIF	3
+#define KVM_S390_VM_CPU_FEAT_GPERE	4
+#define KVM_S390_VM_CPU_FEAT_GSLS	5
+#define KVM_S390_VM_CPU_FEAT_IB		6
+#define KVM_S390_VM_CPU_FEAT_CEI	7
+#define KVM_S390_VM_CPU_FEAT_IBS	8
+#define KVM_S390_VM_CPU_FEAT_SKEY	9
+#define KVM_S390_VM_CPU_FEAT_CMMA	10
+#define KVM_S390_VM_CPU_FEAT_PFMFI	11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF	12
+struct kvm_s390_vm_cpu_feat {
+	__u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC	4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC		5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+	__u8 plo[32];		/* always */
+	__u8 ptff[16];		/* with TOD-clock steering */
+	__u8 kmac[16];		/* with MSA */
+	__u8 kmc[16];		/* with MSA */
+	__u8 km[16];		/* with MSA */
+	__u8 kimd[16];		/* with MSA */
+	__u8 klmd[16];		/* with MSA */
+	__u8 pckmo[16];		/* with MSA3 */
+	__u8 kmctr[16];		/* with MSA4 */
+	__u8 kmf[16];		/* with MSA4 */
+	__u8 kmo[16];		/* with MSA4 */
+	__u8 pcc[16];		/* with MSA4 */
+	__u8 ppno[16];		/* with MSA5 */
+	__u8 reserved[1824];
+};
+
 /* kvm attributes for crypto */
 #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW	0
 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW	1
diff --git a/tools/arch/s390/include/uapi/asm/mman.h b/tools/arch/s390/include/uapi/asm/mman.h
new file mode 100644
index 0000000..b03dea9
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on s390, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h
index 8fb5d4a..3ac6343 100644
--- a/tools/arch/s390/include/uapi/asm/sie.h
+++ b/tools/arch/s390/include/uapi/asm/sie.h
@@ -140,6 +140,7 @@
 	exit_code_ipa0(0xB2, 0x4c, "TAR"),	\
 	exit_code_ipa0(0xB2, 0x50, "CSP"),	\
 	exit_code_ipa0(0xB2, 0x54, "MVPG"),	\
+	exit_code_ipa0(0xB2, 0x56, "STHYI"),	\
 	exit_code_ipa0(0xB2, 0x58, "BSG"),	\
 	exit_code_ipa0(0xB2, 0x5a, "BSA"),	\
 	exit_code_ipa0(0xB2, 0x5f, "CHSC"),	\
diff --git a/tools/arch/score/include/uapi/asm/mman.h b/tools/arch/score/include/uapi/asm/mman.h
new file mode 100644
index 0000000..2f8fb89
--- /dev/null
+++ b/tools/arch/score/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_SCORE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SCORE_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on score, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/sh/include/uapi/asm/mman.h b/tools/arch/sh/include/uapi/asm/mman.h
new file mode 100644
index 0000000..26504f6
--- /dev/null
+++ b/tools/arch/sh/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+#ifndef TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on sh, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/sparc/include/uapi/asm/mman.h b/tools/arch/sparc/include/uapi/asm/mman.h
new file mode 100644
index 0000000..8640525
--- /dev/null
+++ b/tools/arch/sparc/include/uapi/asm/mman.h
@@ -0,0 +1,15 @@
+#ifndef TOOLS_ARCH_SPARC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SPARC_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE	0x0800
+#define MAP_EXECUTABLE	0x1000
+#define MAP_GROWSDOWN	0x0200
+#define MAP_HUGETLB	0x40000
+#define MAP_LOCKED      0x100
+#define MAP_NONBLOCK	0x10000
+#define MAP_NORESERVE   0x40
+#define MAP_POPULATE	0x8000
+#define MAP_STACK	0x20000
+#include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on sparc, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/tile/include/uapi/asm/mman.h b/tools/arch/tile/include/uapi/asm/mman.h
new file mode 100644
index 0000000..7116c4b
--- /dev/null
+++ b/tools/arch/tile/include/uapi/asm/mman.h
@@ -0,0 +1,15 @@
+#ifndef TOOLS_ARCH_TILE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_TILE_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE	0x0800
+#define MAP_EXECUTABLE	0x1000
+#define MAP_GROWSDOWN	0x0100
+#define MAP_HUGETLB	0x4000
+#define MAP_LOCKED	0x0200
+#define MAP_NONBLOCK	0x0080
+#define MAP_NORESERVE	0x0400
+#define MAP_POPULATE	0x0040
+#define MAP_STACK	MAP_GROWSDOWN
+#include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on tile, fix it for perf */
+#define MAP_32BIT	0
+#endif
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 4a41348..92a8308 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -225,7 +225,6 @@
 #define X86_FEATURE_RDSEED	( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX		( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP	( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_PCOMMIT	( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT	( 9*32+23) /* CLFLUSHOPT instruction */
 #define X86_FEATURE_CLWB	( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF	( 9*32+26) /* AVX-512 Prefetch */
@@ -301,10 +300,6 @@
 #define X86_BUG_FXSAVE_LEAK	X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR	X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
 #define X86_BUG_SYSRET_SS_ATTRS	X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
-#define X86_BUG_NULL_SEG	X86_BUG(9) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE	X86_BUG(10) /* SWAPGS without input dep on GS */
-
-
 #ifdef CONFIG_X86_32
 /*
  * 64-bit kernels don't use X86_BUG_ESPFIX.  Make the define conditional
@@ -312,5 +307,7 @@
  */
 #define X86_BUG_ESPFIX		X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
 #endif
-
+#define X86_BUG_NULL_SEG	X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE	X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR		X86_BUG(12) /* IPI required to wake up remote CPU */
 #endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 911e935..85599ad 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,5 +56,7 @@
 #define DISABLED_MASK14	0
 #define DISABLED_MASK15	0
 #define DISABLED_MASK16	(DISABLE_PKU|DISABLE_OSPKE)
+#define DISABLED_MASK17	0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index 4916144..fac9a5c 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -99,5 +99,7 @@
 #define REQUIRED_MASK14	0
 #define REQUIRED_MASK15	0
 #define REQUIRED_MASK16	0
+#define REQUIRED_MASK17	0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 18)
 
 #endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/arch/x86/include/uapi/asm/mman.h b/tools/arch/x86/include/uapi/asm/mman.h
new file mode 100644
index 0000000..b73c1af
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/mman.h
@@ -0,0 +1,5 @@
+#ifndef TOOLS_ARCH_X86_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_X86_UAPI_ASM_MMAN_FIX_H
+#define MAP_32BIT	0x40
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index 5b15d94..37fee27 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -78,7 +78,6 @@
 #define EXIT_REASON_PML_FULL            62
 #define EXIT_REASON_XSAVES              63
 #define EXIT_REASON_XRSTORS             64
-#define EXIT_REASON_PCOMMIT             65
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -127,8 +126,7 @@
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
 	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
-	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
-	{ EXIT_REASON_PCOMMIT,               "PCOMMIT" }
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #define VMX_ABORT_SAVE_GUEST_MSR_FAIL        1
 #define VMX_ABORT_LOAD_HOST_MSR_FAIL         4
diff --git a/tools/arch/xtensa/include/uapi/asm/mman.h b/tools/arch/xtensa/include/uapi/asm/mman.h
new file mode 100644
index 0000000..4453195
--- /dev/null
+++ b/tools/arch/xtensa/include/uapi/asm/mman.h
@@ -0,0 +1,47 @@
+#ifndef TOOLS_ARCH_XTENSA_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_XTENSA_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP	17
+#define MADV_DOFORK	11
+#define MADV_DONTDUMP   16
+#define MADV_DONTFORK	10
+#define MADV_DONTNEED	4
+#define MADV_FREE	8
+#define MADV_HUGEPAGE	14
+#define MADV_MERGEABLE   12
+#define MADV_NOHUGEPAGE	15
+#define MADV_NORMAL	0
+#define MADV_RANDOM	1
+#define MADV_REMOVE	9
+#define MADV_SEQUENTIAL	2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED	3
+#define MAP_ANONYMOUS	0x0800
+#define MAP_DENYWRITE	0x2000
+#define MAP_EXECUTABLE	0x4000
+#define MAP_FILE	0
+#define MAP_FIXED	0x010
+#define MAP_GROWSDOWN	0x1000
+#define MAP_HUGETLB	0x80000
+#define MAP_LOCKED	0x8000
+#define MAP_NONBLOCK	0x20000
+#define MAP_NORESERVE	0x0400
+#define MAP_POPULATE	0x10000
+#define MAP_PRIVATE	0x002
+#define MAP_SHARED	0x001
+#define MAP_STACK	0x40000
+#define PROT_EXEC	0x4
+#define PROT_GROWSDOWN	0x01000000
+#define PROT_GROWSUP	0x02000000
+#define PROT_NONE	0x0
+#define PROT_READ	0x1
+#define PROT_SEM	0x10
+#define PROT_WRITE	0x2
+/* MADV_HWPOISON is undefined on xtensa, fix it for perf */
+#define MADV_HWPOISON	100
+/* MADV_SOFT_OFFLINE is undefined on xtensa, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on xtensa, fix it for perf */
+#define MAP_32BIT	0
+/* MAP_UNINITIALIZED is undefined on xtensa, fix it for perf */
+#define MAP_UNINITIALIZED	0
+#endif
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 448ed96..1c14c25 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -1,5 +1,5 @@
 /*
- * gpio-hammer - example swiss army knife to shake GPIO lines on a system
+ * gpio-event-mon - monitor GPIO line events from userspace
  *
  * Copyright (C) 2016 Linus Walleij
  *
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 0d9f48e..bc7adb8 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1433,7 +1433,7 @@
 	openlog("KVP", 0, LOG_USER);
 	syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
 
-	kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR);
+	kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
 
 	if (kvp_fd < 0) {
 		syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 5d51d6f..e082980 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -250,6 +250,9 @@
 				syslog(LOG_ERR, "/etc/fstab and /proc/mounts");
 			}
 			break;
+		case VSS_OP_HOT_BACKUP:
+			syslog(LOG_INFO, "VSS: op=CHECK HOT BACKUP\n");
+			break;
 		default:
 			syslog(LOG_ERR, "Illegal op:%d\n", op);
 		}
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
index 0e8a1f7..f39c0e9 100644
--- a/tools/iio/iio_generic_buffer.c
+++ b/tools/iio/iio_generic_buffer.c
@@ -348,7 +348,7 @@
 	int notrigger = 0;
 	char *dummy;
 
-	struct iio_channel_info *channels;
+	struct iio_channel_info *channels = NULL;
 
 	register_cleanup();
 
@@ -456,7 +456,7 @@
 
 	if (notrigger) {
 		printf("trigger-less mode selected\n");
-	} if (trig_num >= 0) {
+	} else if (trig_num >= 0) {
 		char *trig_dev_name;
 		ret = asprintf(&trig_dev_name, "%strigger%d", iio_dir, trig_num);
 		if (ret < 0) {
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
new file mode 100644
index 0000000..7d41026
--- /dev/null
+++ b/tools/include/linux/coresight-pmu.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LINUX_CORESIGHT_PMU_H
+#define _LINUX_CORESIGHT_PMU_H
+
+#define CORESIGHT_ETM_PMU_NAME "cs_etm"
+#define CORESIGHT_ETM_PMU_SEED  0x10
+
+/* ETMv3.5/PTM's ETMCR config bit */
+#define ETM_OPT_CYCACC  12
+#define ETM_OPT_TS      28
+
+static inline int coresight_get_trace_id(int cpu)
+{
+	/*
+	 * A trace ID of value 0 is invalid, so let's start at some
+	 * random value that fits in 7 bits and go from there.  Since
+	 * the common convention is to have data trace IDs be I(N) + 1,
+	 * set instruction trace IDs as a function of the CPU number.
+	 */
+	return (CORESIGHT_ETM_PMU_SEED + (cpu * 2));
+}
+
+#endif
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index b968794..f436d24 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -8,7 +8,11 @@
 
 int strtobool(const char *s, bool *res);
 
-#ifdef __GLIBC__
+/*
+ * glibc based builds needs the extern while uClibc doesn't.
+ * However uClibc headers also define __GLIBC__ hence the hack below
+ */
+#if defined(__GLIBC__) && !defined(__UCLIBC__)
 extern size_t strlcpy(char *dest, const char *src, size_t size);
 #endif
 
diff --git a/tools/include/linux/time64.h b/tools/include/linux/time64.h
new file mode 100644
index 0000000..df92654
--- /dev/null
+++ b/tools/include/linux/time64.h
@@ -0,0 +1,12 @@
+#ifndef _TOOLS_LINUX_TIME64_H
+#define _TOOLS_LINUX_TIME64_H
+
+#define MSEC_PER_SEC	1000L
+#define USEC_PER_MSEC	1000L
+#define NSEC_PER_USEC	1000L
+#define NSEC_PER_MSEC	1000000L
+#define USEC_PER_SEC	1000000L
+#define NSEC_PER_SEC	1000000000L
+#define FSEC_PER_SEC	1000000000000000LL
+
+#endif /* _LINUX_TIME64_H */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
new file mode 100644
index 0000000..5827438
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -0,0 +1,75 @@
+#ifndef __ASM_GENERIC_MMAN_COMMON_H
+#define __ASM_GENERIC_MMAN_COMMON_H
+
+/*
+ Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
+ Based on: asm-xxx/mman.h
+*/
+
+#define PROT_READ	0x1		/* page can be read */
+#define PROT_WRITE	0x2		/* page can be written */
+#define PROT_EXEC	0x4		/* page can be executed */
+#define PROT_SEM	0x8		/* page may be used for atomic ops */
+#define PROT_NONE	0x0		/* page can not be accessed */
+#define PROT_GROWSDOWN	0x01000000	/* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP	0x02000000	/* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED	0x01		/* Share changes */
+#define MAP_PRIVATE	0x02		/* Changes are private */
+#define MAP_TYPE	0x0f		/* Mask for type of mapping */
+#define MAP_FIXED	0x10		/* Interpret addr exactly */
+#define MAP_ANONYMOUS	0x20		/* don't use a file */
+#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
+# define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be uninitialized */
+#else
+# define MAP_UNINITIALIZED 0x0		/* Don't support this flag */
+#endif
+
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT	0x01		/* Lock pages in range after they are faulted in, do not prefault */
+
+#define MS_ASYNC	1		/* sync memory asynchronously */
+#define MS_INVALIDATE	2		/* invalidate the caches */
+#define MS_SYNC		4		/* synchronous memory sync */
+
+#define MADV_NORMAL	0		/* no further special treatment */
+#define MADV_RANDOM	1		/* expect random page references */
+#define MADV_SEQUENTIAL	2		/* expect sequential page references */
+#define MADV_WILLNEED	3		/* will need these pages */
+#define MADV_DONTNEED	4		/* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_FREE	8		/* free pages only if memory pressure */
+#define MADV_REMOVE	9		/* remove these pages & resources */
+#define MADV_DONTFORK	10		/* don't inherit across fork */
+#define MADV_DOFORK	11		/* do inherit across fork */
+#define MADV_HWPOISON	100		/* poison a page for testing */
+#define MADV_SOFT_OFFLINE 101		/* soft offline page for testing */
+
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE	15		/* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_DONTDUMP flag */
+
+/* compatibility flags */
+#define MAP_FILE	0
+
+/*
+ * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
+ * This gives us 6 bits, which is enough until someone invents 128 bit address
+ * spaces.
+ *
+ * Assume these are all power of twos.
+ * When 0 use the default page size.
+ */
+#define MAP_HUGE_SHIFT	26
+#define MAP_HUGE_MASK	0x3f
+
+#endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
new file mode 100644
index 0000000..10fa785
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -0,0 +1,22 @@
+#ifndef __ASM_GENERIC_MMAN_H
+#define __ASM_GENERIC_MMAN_H
+
+#include <uapi/asm-generic/mman-common.h>
+
+#define MAP_GROWSDOWN	0x0100		/* stack-like segment */
+#define MAP_DENYWRITE	0x0800		/* ETXTBSY */
+#define MAP_EXECUTABLE	0x1000		/* mark it as an executable */
+#define MAP_LOCKED	0x2000		/* pages are locked */
+#define MAP_NORESERVE	0x4000		/* don't check for reservations */
+#define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
+#define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
+
+/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
+
+#define MCL_CURRENT	1		/* lock all current mappings */
+#define MCL_FUTURE	2		/* lock all future mappings */
+#define MCL_ONFAULT	4		/* lock all pages that are faulted in */
+
+#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 406459b..da218fe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -84,6 +84,7 @@
 	BPF_MAP_TYPE_PERCPU_HASH,
 	BPF_MAP_TYPE_PERCPU_ARRAY,
 	BPF_MAP_TYPE_STACK_TRACE,
+	BPF_MAP_TYPE_CGROUP_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -93,6 +94,7 @@
 	BPF_PROG_TYPE_SCHED_CLS,
 	BPF_PROG_TYPE_SCHED_ACT,
 	BPF_PROG_TYPE_TRACEPOINT,
+	BPF_PROG_TYPE_XDP,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
@@ -313,6 +315,66 @@
 	 */
 	BPF_FUNC_skb_get_tunnel_opt,
 	BPF_FUNC_skb_set_tunnel_opt,
+
+	/**
+	 * bpf_skb_change_proto(skb, proto, flags)
+	 * Change protocol of the skb. Currently supported is
+	 * v4 -> v6, v6 -> v4 transitions. The helper will also
+	 * resize the skb. eBPF program is expected to fill the
+	 * new headers via skb_store_bytes and lX_csum_replace.
+	 * @skb: pointer to skb
+	 * @proto: new skb->protocol type
+	 * @flags: reserved
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_proto,
+
+	/**
+	 * bpf_skb_change_type(skb, type)
+	 * Change packet type of skb.
+	 * @skb: pointer to skb
+	 * @type: new skb->pkt_type type
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_skb_change_type,
+
+	/**
+	 * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb
+	 * @skb: pointer to skb
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 skb failed the cgroup2 descendant test
+	 *   == 1 skb succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_skb_in_cgroup,
+
+	/**
+	 * bpf_get_hash_recalc(skb)
+	 * Retrieve and possibly recalculate skb->hash.
+	 * @skb: pointer to skb
+	 * Return: hash
+	 */
+	BPF_FUNC_get_hash_recalc,
+
+	/**
+	 * u64 bpf_get_current_task(void)
+	 * Returns current task_struct
+	 * Return: current
+	 */
+	BPF_FUNC_get_current_task,
+
+	/**
+	 * bpf_probe_write_user(void *dst, void *src, int len)
+	 * safely attempt to write to a location
+	 * @dst: destination address in userspace
+	 * @src: source address on stack
+	 * @len: number of bytes to copy
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_probe_write_user,
+
 	__BPF_FUNC_MAX_ID,
 };
 
@@ -347,9 +409,11 @@
 #define BPF_F_ZERO_CSUM_TX		(1ULL << 1)
 #define BPF_F_DONT_FRAGMENT		(1ULL << 2)
 
-/* BPF_FUNC_perf_event_output flags. */
+/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
 #define BPF_F_INDEX_MASK		0xffffffffULL
 #define BPF_F_CURRENT_CPU		BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK		(0xfffffULL << 32)
 
 /* user accessible mirror of in-kernel sk_buff.
  * new fields can only be added to the end of this structure
@@ -386,4 +450,24 @@
 	__u32 tunnel_label;
 };
 
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will result
+ * in packet drop.
+ */
+enum xdp_action {
+	XDP_ABORTED = 0,
+	XDP_DROP,
+	XDP_PASS,
+	XDP_TX,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+	__u32 data;
+	__u32 data_end;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
new file mode 100644
index 0000000..81d8edf
--- /dev/null
+++ b/tools/include/uapi/linux/mman.h
@@ -0,0 +1,13 @@
+#ifndef _UAPI_LINUX_MMAN_H
+#define _UAPI_LINUX_MMAN_H
+
+#include <uapi/asm/mman.h>
+
+#define MREMAP_MAYMOVE	1
+#define MREMAP_FIXED	2
+
+#define OVERCOMMIT_GUESS		0
+#define OVERCOMMIT_ALWAYS		1
+#define OVERCOMMIT_NEVER		2
+
+#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
index d9836c5..11c8d9b 100644
--- a/tools/lguest/lguest.c
+++ b/tools/lguest/lguest.c
@@ -3266,6 +3266,9 @@
 		}
 	}
 
+	/* If we exit via err(), this kills all the threads, restores tty. */
+	atexit(cleanup_devices);
+
 	/* We always have a console device, and it's always device 1. */
 	setup_console();
 
@@ -3369,9 +3372,6 @@
 	/* Ensure that we terminate if a device-servicing child dies. */
 	signal(SIGCHLD, kill_launcher);
 
-	/* If we exit via err(), this kills all the threads, restores tty. */
-	atexit(cleanup_devices);
-
 	/* If requested, chroot to a directory */
 	if (chroot_path) {
 		if (chroot(chroot_path) != 0)
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index ba7094b..f99f49e4 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -34,6 +34,10 @@
 #define TRACEFS_MAGIC          0x74726163
 #endif
 
+#ifndef HUGETLBFS_MAGIC
+#define HUGETLBFS_MAGIC        0x958458f6
+#endif
+
 static const char * const sysfs__fs_known_mountpoints[] = {
 	"/sys",
 	0,
@@ -67,6 +71,10 @@
 	0,
 };
 
+static const char * const hugetlbfs__known_mountpoints[] = {
+	0,
+};
+
 struct fs {
 	const char		*name;
 	const char * const	*mounts;
@@ -80,6 +88,7 @@
 	FS__PROCFS  = 1,
 	FS__DEBUGFS = 2,
 	FS__TRACEFS = 3,
+	FS__HUGETLBFS = 4,
 };
 
 #ifndef TRACEFS_MAGIC
@@ -107,6 +116,11 @@
 		.mounts	= tracefs__known_mountpoints,
 		.magic	= TRACEFS_MAGIC,
 	},
+	[FS__HUGETLBFS] = {
+		.name	= "hugetlbfs",
+		.mounts = hugetlbfs__known_mountpoints,
+		.magic	= HUGETLBFS_MAGIC,
+	},
 };
 
 static bool fs__read_mounts(struct fs *fs)
@@ -265,6 +279,7 @@
 FS(procfs,  FS__PROCFS);
 FS(debugfs, FS__DEBUGFS);
 FS(tracefs, FS__TRACEFS);
+FS(hugetlbfs, FS__HUGETLBFS);
 
 int filename__read_int(const char *filename, int *value)
 {
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 16c9c2e..a63269f 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -21,6 +21,7 @@
 FS(procfs)
 FS(debugfs)
 FS(tracefs)
+FS(hugetlbfs)
 
 #undef FS
 
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index bd09d0e..143b6cd 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -175,6 +175,7 @@
 		"__stack_chk_fail",
 		"panic",
 		"do_exit",
+		"do_task_dead",
 		"__module_put_and_exit",
 		"complete_and_exit",
 		"kvm_spurious_fault",
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 15949e2..cb081ac5 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -110,6 +110,14 @@
 		order = caller
 		sort-key = function
 
+	[report]
+		# Defaults
+		sort-order = comm,dso,symbol
+		percent-limit = 0
+		queue-size = 0
+		children = true
+		group = true
+
 Variables
 ~~~~~~~~~
 
@@ -382,6 +390,10 @@
 		histogram entry. Default is 0 which means no limitation.
 
 report.*::
+	report.sort_order::
+		Allows changing the default sort order from "comm,dso,symbol" to
+		some other default, for instance "sym,dso" may be more fitting for
+		kernel developers.
 	report.percent-limit::
 		This one is mostly the same as call-graph.threshold but works for
 		histogram entries. Entries having an overhead lower than this
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 736da44..e6c9902 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -21,6 +21,8 @@
 'perf probe' [options] --vars='PROBEPOINT'
 or
 'perf probe' [options] --funcs
+or
+'perf probe' [options] --definition='PROBE' [...]
 
 DESCRIPTION
 -----------
@@ -34,6 +36,8 @@
 -k::
 --vmlinux=PATH::
 	Specify vmlinux path which has debuginfo (Dwarf binary).
+	Only when using this with --definition, you can give an offline
+	vmlinux file.
 
 -m::
 --module=MODNAME|PATH::
@@ -96,6 +100,11 @@
 	can also list functions in a user space executable / shared library.
 	This also can accept a FILTER rule argument.
 
+-D::
+--definition=::
+	Show trace-event definition converted from given probe-event instead
+	of write it into tracing/[k,u]probe_events.
+
 --filter=FILTER::
 	(Only for --vars and --funcs) Set filter. FILTER is a combination of glob
 	pattern, see FILTER PATTERN for detail.
@@ -176,10 +185,17 @@
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
-'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
-
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
 
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (sNN/uNN) or hex (xNN). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. Moreover, you can use 'x' to explicitly specify to be shown in hexadecimal (the size is also auto-detected).
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
 LINE SYNTAX
 -----------
 Line range is described by following syntax.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 379a2be..9233519 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -35,15 +35,15 @@
 
 	- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
 	  'param1', 'param2', etc are defined as formats for the PMU in
-	  /sys/bus/event_sources/devices/<pmu>/format/*.
+	  /sys/bus/event_source/devices/<pmu>/format/*.
 
 	- a symbolically formed event like 'pmu/config=M,config1=N,config3=K/'
 
           where M, N, K are numbers (in decimal, hex, octal format). Acceptable
           values for each of 'config', 'config1' and 'config2' are defined by
-          corresponding entries in /sys/bus/event_sources/devices/<pmu>/format/*
+          corresponding entries in /sys/bus/event_source/devices/<pmu>/format/*
           param1 and param2 are defined as formats for the PMU in:
-          /sys/bus/event_sources/devices/<pmu>/format/*
+          /sys/bus/event_source/devices/<pmu>/format/*
 
 	  There are also some params which are not defined in .../<pmu>/format/*.
 	  These params can be used to overload default config values per event.
@@ -60,6 +60,18 @@
 	  Note: If user explicitly sets options which conflict with the params,
 	  the value set by the params will be overridden.
 
+	  Also not defined in .../<pmu>/format/* are PMU driver specific
+	  configuration parameters.  Any configuration parameter preceded by
+	  the letter '@' is not interpreted in user space and sent down directly
+	  to the PMU driver.  For example:
+
+	  perf record -e some_event/@cfg1,@cfg2=config/ ...
+
+	  will see 'cfg1' and 'cfg2=config' pushed to the PMU driver associated
+	  with the event for further processing.  There is no restriction on
+	  what the configuration parameters are, as long as their semantic is
+	  understood and supported by the PMU driver.
+
         - a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
           where addr is the address in memory you want to break in.
           Access is the memory access type (read, write, execute) it can
@@ -77,9 +89,62 @@
 
 --filter=<filter>::
         Event filter. This option should follow a event selector (-e) which
-	selects tracepoint event(s). Multiple '--filter' options are combined
+	selects either tracepoint event(s) or a hardware trace PMU
+	(e.g. Intel PT or CoreSight).
+
+	- tracepoint filters
+
+	In the case of tracepoints, multiple '--filter' options are combined
 	using '&&'.
 
+	- address filters
+
+	A hardware trace PMU advertises its ability to accept a number of
+	address filters	by specifying a non-zero value in
+	/sys/bus/event_source/devices/<pmu>/nr_addr_filters.
+
+	Address filters have the format:
+
+	filter|start|stop|tracestop <start> [/ <size>] [@<file name>]
+
+	Where:
+	- 'filter': defines a region that will be traced.
+	- 'start': defines an address at which tracing will begin.
+	- 'stop': defines an address at which tracing will stop.
+	- 'tracestop': defines a region in which tracing will stop.
+
+	<file name> is the name of the object file, <start> is the offset to the
+	code to trace in that file, and <size> is the size of the region to
+	trace. 'start' and 'stop' filters need not specify a <size>.
+
+	If no object file is specified then the kernel is assumed, in which case
+	the start address must be a current kernel memory address.
+
+	<start> can also be specified by providing the name of a symbol. If the
+	symbol name is not unique, it can be disambiguated by inserting #n where
+	'n' selects the n'th symbol in address order. Alternately #0, #g or #G
+	select only a global symbol. <size> can also be specified by providing
+	the name of a symbol, in which case the size is calculated to the end
+	of that symbol. For 'filter' and 'tracestop' filters, if <size> is
+	omitted and <start> is a symbol, then the size is calculated to the end
+	of that symbol.
+
+	If <size> is omitted and <start> is '*', then the start and size will
+	be calculated from the first and last symbols, i.e. to trace the whole
+	file.
+
+	If symbol names (or '*') are provided, they must be surrounded by white
+	space.
+
+	The filter passed to the kernel is not necessarily the same as entered.
+	To see the filter that is passed, use the -v option.
+
+	The kernel may not be able to configure a trace region if it is not
+	within a single mapping.  MMAP events (or /proc/<pid>/maps) can be
+	examined to determine if that is a possibility.
+
+	Multiple filters can be separated with space or comma.
+
 --exclude-perf::
 	Don't record events issued by perf itself. This option should follow
 	a event selector (-e) which selects tracepoint event(s). It adds a
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 1f6c705..053bbbd 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,8 +116,8 @@
 --fields::
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
-	srcline, period, iregs, brstack, brstacksym, flags.
-        Field list can be prepended with the type, trace, sw or hw,
+        srcline, period, iregs, brstack, brstacksym, flags, bpf-output,
+        callindent. Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
 
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index fdc99fe..b664b18 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -437,6 +437,10 @@
 quipper
 
 The quipper C++ parser is available at
-https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/
+https://chromium.googlesource.com/chromiumos/platform2
+
+It is under the chromiumos-wide-profiling/ subdirectory. This library can
+convert a perf data file to a protobuf and vice versa.
+
 Unfortunately this parser tends to be many versions behind and may not be able
 to parse data files generated by recent perf.
diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example
index 1d8d5bc..2b477c1 100644
--- a/tools/perf/Documentation/perfconfig.example
+++ b/tools/perf/Documentation/perfconfig.example
@@ -27,3 +27,12 @@
 	use_offset = true
 	jump_arrows = true
 	show_nr_jumps = false
+
+[report]
+
+	# Defaults
+	sort-order = comm,dso,symbol
+	percent-limit = 0
+	queue-size = 0
+	children = true
+	group = true
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index ad2534d..0bda2cc 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -60,14 +60,18 @@
 tools/include/linux/atomic.h
 tools/include/linux/bitops.h
 tools/include/linux/compiler.h
+tools/include/linux/coresight-pmu.h
 tools/include/linux/filter.h
 tools/include/linux/hash.h
 tools/include/linux/kernel.h
 tools/include/linux/list.h
 tools/include/linux/log2.h
+tools/include/uapi/asm-generic/mman-common.h
+tools/include/uapi/asm-generic/mman.h
 tools/include/uapi/linux/bpf.h
 tools/include/uapi/linux/bpf_common.h
 tools/include/uapi/linux/hw_breakpoint.h
+tools/include/uapi/linux/mman.h
 tools/include/uapi/linux/perf_event.h
 tools/include/linux/poison.h
 tools/include/linux/rbtree.h
@@ -77,4 +81,6 @@
 tools/include/linux/types.h
 tools/include/linux/err.h
 tools/include/linux/bitmap.h
+tools/include/linux/time64.h
+tools/arch/*/include/uapi/asm/mman.h
 tools/arch/*/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 24803c5..72edf83 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -746,10 +746,13 @@
 endif
 
 ifndef NO_AUXTRACE
-  ifeq ($(feature-get_cpuid), 0)
-    msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
-    NO_AUXTRACE := 1
-  else
+  ifeq ($(ARCH),x86)
+    ifeq ($(feature-get_cpuid), 0)
+      msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
+      NO_AUXTRACE := 1
+    endif
+  endif
+  ifndef NO_AUXTRACE
     $(call detected,CONFIG_AUXTRACE)
     CFLAGS += -DHAVE_AUXTRACE_SUPPORT
   endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 2d90875..d710db1 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -165,7 +165,7 @@
 # non-config cases
 config := 1
 
-NON_CONFIG_TARGETS := clean TAGS tags cscope help
+NON_CONFIG_TARGETS := clean TAGS tags cscope help install-doc
 
 ifdef MAKECMDGOALS
 ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
@@ -429,6 +429,18 @@
 	@(test -f ../../include/asm-generic/bitops/fls64.h && ( \
         (diff -B ../include/asm-generic/bitops/fls64.h ../../include/asm-generic/bitops/fls64.h >/dev/null) \
         || echo "Warning: tools/include/asm-generic/bitops/fls64.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/linux/coresight-pmu.h && ( \
+	(diff -B ../include/linux/coresight-pmu.h ../../include/linux/coresight-pmu.h >/dev/null) \
+	|| echo "Warning: tools/include/linux/coresight-pmu.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/uapi/asm-generic/mman-common.h && ( \
+	(diff -B ../include/uapi/asm-generic/mman-common.h ../../include/uapi/asm-generic/mman-common.h >/dev/null) \
+	|| echo "Warning: tools/include/uapi/asm-generic/mman-common.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/uapi/asm-generic/mman.h && ( \
+	(diff -B -I "^#include <\(uapi/\)*asm-generic/mman-common.h>$$" ../include/uapi/asm-generic/mman.h ../../include/uapi/asm-generic/mman.h >/dev/null) \
+	|| echo "Warning: tools/include/uapi/asm-generic/mman.h differs from kernel" >&2 )) || true
+	@(test -f ../../include/uapi/linux/mman.h && ( \
+	(diff -B -I "^#include <\(uapi/\)*asm/mman.h>$$" ../include/uapi/linux/mman.h ../../include/uapi/linux/mman.h >/dev/null) \
+	|| echo "Warning: tools/include/uapi/linux/mman.h differs from kernel" >&2 )) || true
 	$(Q)$(MAKE) $(build)=perf
 
 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
diff --git a/tools/perf/arch/arm/include/dwarf-regs-table.h b/tools/perf/arch/arm/include/dwarf-regs-table.h
new file mode 100644
index 0000000..f298d03
--- /dev/null
+++ b/tools/perf/arch/arm/include/dwarf-regs-table.h
@@ -0,0 +1,9 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const arm_regstr_tbl[] = {
+	"%r0", "%r1", "%r2", "%r3", "%r4",
+	"%r5", "%r6", "%r7", "%r8", "%r9", "%r10",
+	"%fp", "%ip", "%sp", "%lr", "%pc",
+};
+#endif
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index f98da17..e64c5f2 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -2,3 +2,5 @@
 
 libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind.o
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
new file mode 100644
index 0000000..8edf2cb
--- /dev/null
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdbool.h>
+#include <linux/coresight-pmu.h>
+
+#include "../../util/auxtrace.h"
+#include "../../util/evlist.h"
+#include "../../util/pmu.h"
+#include "cs-etm.h"
+
+struct auxtrace_record
+*auxtrace_record__init(struct perf_evlist *evlist, int *err)
+{
+	struct perf_pmu	*cs_etm_pmu;
+	struct perf_evsel *evsel;
+	bool found_etm = false;
+
+	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+	if (evlist) {
+		evlist__for_each_entry(evlist, evsel) {
+			if (cs_etm_pmu &&
+			    evsel->attr.type == cs_etm_pmu->type)
+				found_etm = true;
+		}
+	}
+
+	if (found_etm)
+		return cs_etm_record_init(err);
+
+	/*
+	 * Clear 'err' even if we haven't found a cs_etm event - that way perf
+	 * record can still be used even if tracers aren't present.  The NULL
+	 * return value will take care of telling the infrastructure HW tracing
+	 * isn't available.
+	 */
+	*err = 0;
+	return NULL;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
new file mode 100644
index 0000000..47d584d
--- /dev/null
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -0,0 +1,617 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <api/fs/fs.h>
+#include <linux/bitops.h>
+#include <linux/coresight-pmu.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+
+#include "cs-etm.h"
+#include "../../perf.h"
+#include "../../util/auxtrace.h"
+#include "../../util/cpumap.h"
+#include "../../util/evlist.h"
+#include "../../util/evsel.h"
+#include "../../util/pmu.h"
+#include "../../util/thread_map.h"
+#include "../../util/cs-etm.h"
+
+#include <stdlib.h>
+
+#define ENABLE_SINK_MAX	128
+#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
+
+struct cs_etm_recording {
+	struct auxtrace_record	itr;
+	struct perf_pmu		*cs_etm_pmu;
+	struct perf_evlist	*evlist;
+	bool			snapshot_mode;
+	size_t			snapshot_size;
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
+
+static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
+					 struct record_opts *opts,
+					 const char *str)
+{
+	struct cs_etm_recording *ptr =
+				container_of(itr, struct cs_etm_recording, itr);
+	unsigned long long snapshot_size = 0;
+	char *endptr;
+
+	if (str) {
+		snapshot_size = strtoull(str, &endptr, 0);
+		if (*endptr || snapshot_size > SIZE_MAX)
+			return -1;
+	}
+
+	opts->auxtrace_snapshot_mode = true;
+	opts->auxtrace_snapshot_size = snapshot_size;
+	ptr->snapshot_size = snapshot_size;
+
+	return 0;
+}
+
+static int cs_etm_recording_options(struct auxtrace_record *itr,
+				    struct perf_evlist *evlist,
+				    struct record_opts *opts)
+{
+	struct cs_etm_recording *ptr =
+				container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+	struct perf_evsel *evsel, *cs_etm_evsel = NULL;
+	const struct cpu_map *cpus = evlist->cpus;
+	bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
+
+	ptr->evlist = evlist;
+	ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == cs_etm_pmu->type) {
+			if (cs_etm_evsel) {
+				pr_err("There may be only one %s event\n",
+				       CORESIGHT_ETM_PMU_NAME);
+				return -EINVAL;
+			}
+			evsel->attr.freq = 0;
+			evsel->attr.sample_period = 1;
+			cs_etm_evsel = evsel;
+			opts->full_auxtrace = true;
+		}
+	}
+
+	/* no need to continue if at least one event of interest was found */
+	if (!cs_etm_evsel)
+		return 0;
+
+	if (opts->use_clockid) {
+		pr_err("Cannot use clockid (-k option) with %s\n",
+		       CORESIGHT_ETM_PMU_NAME);
+		return -EINVAL;
+	}
+
+	/* we are in snapshot mode */
+	if (opts->auxtrace_snapshot_mode) {
+		/*
+		 * No size were given to '-S' or '-m,', so go with
+		 * the default
+		 */
+		if (!opts->auxtrace_snapshot_size &&
+		    !opts->auxtrace_mmap_pages) {
+			if (privileged) {
+				opts->auxtrace_mmap_pages = MiB(4) / page_size;
+			} else {
+				opts->auxtrace_mmap_pages =
+							KiB(128) / page_size;
+				if (opts->mmap_pages == UINT_MAX)
+					opts->mmap_pages = KiB(256) / page_size;
+			}
+		} else if (!opts->auxtrace_mmap_pages && !privileged &&
+						opts->mmap_pages == UINT_MAX) {
+			opts->mmap_pages = KiB(256) / page_size;
+		}
+
+		/*
+		 * '-m,xyz' was specified but no snapshot size, so make the
+		 * snapshot size as big as the auxtrace mmap area.
+		 */
+		if (!opts->auxtrace_snapshot_size) {
+			opts->auxtrace_snapshot_size =
+				opts->auxtrace_mmap_pages * (size_t)page_size;
+		}
+
+		/*
+		 * -Sxyz was specified but no auxtrace mmap area, so make the
+		 * auxtrace mmap area big enough to fit the requested snapshot
+		 * size.
+		 */
+		if (!opts->auxtrace_mmap_pages) {
+			size_t sz = opts->auxtrace_snapshot_size;
+
+			sz = round_up(sz, page_size) / page_size;
+			opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+		}
+
+		/* Snapshost size can't be bigger than the auxtrace area */
+		if (opts->auxtrace_snapshot_size >
+				opts->auxtrace_mmap_pages * (size_t)page_size) {
+			pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+			       opts->auxtrace_snapshot_size,
+			       opts->auxtrace_mmap_pages * (size_t)page_size);
+			return -EINVAL;
+		}
+
+		/* Something went wrong somewhere - this shouldn't happen */
+		if (!opts->auxtrace_snapshot_size ||
+		    !opts->auxtrace_mmap_pages) {
+			pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+			return -EINVAL;
+		}
+	}
+
+	/* We are in full trace mode but '-m,xyz' wasn't specified */
+	if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+		if (privileged) {
+			opts->auxtrace_mmap_pages = MiB(4) / page_size;
+		} else {
+			opts->auxtrace_mmap_pages = KiB(128) / page_size;
+			if (opts->mmap_pages == UINT_MAX)
+				opts->mmap_pages = KiB(256) / page_size;
+		}
+
+	}
+
+	/* Validate auxtrace_mmap_pages provided by user */
+	if (opts->auxtrace_mmap_pages) {
+		unsigned int max_page = (KiB(128) / page_size);
+		size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+
+		if (!privileged &&
+		    opts->auxtrace_mmap_pages > max_page) {
+			opts->auxtrace_mmap_pages = max_page;
+			pr_err("auxtrace too big, truncating to %d\n",
+			       max_page);
+		}
+
+		if (!is_power_of_2(sz)) {
+			pr_err("Invalid mmap size for %s: must be a power of 2\n",
+			       CORESIGHT_ETM_PMU_NAME);
+			return -EINVAL;
+		}
+	}
+
+	if (opts->auxtrace_snapshot_mode)
+		pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
+			  opts->auxtrace_snapshot_size);
+
+	if (cs_etm_evsel) {
+		/*
+		 * To obtain the auxtrace buffer file descriptor, the auxtrace
+		 * event must come first.
+		 */
+		perf_evlist__to_front(evlist, cs_etm_evsel);
+		/*
+		 * In the case of per-cpu mmaps, we need the CPU on the
+		 * AUX event.
+		 */
+		if (!cpu_map__empty(cpus))
+			perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+	}
+
+	/* Add dummy event to keep tracking */
+	if (opts->full_auxtrace) {
+		struct perf_evsel *tracking_evsel;
+		int err;
+
+		err = parse_events(evlist, "dummy:u", NULL);
+		if (err)
+			return err;
+
+		tracking_evsel = perf_evlist__last(evlist);
+		perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+		tracking_evsel->attr.freq = 0;
+		tracking_evsel->attr.sample_period = 1;
+
+		/* In per-cpu case, always need the time of mmap events etc */
+		if (!cpu_map__empty(cpus))
+			perf_evsel__set_sample_bit(tracking_evsel, TIME);
+	}
+
+	return 0;
+}
+
+static u64 cs_etm_get_config(struct auxtrace_record *itr)
+{
+	u64 config = 0;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+	struct perf_evlist *evlist = ptr->evlist;
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == cs_etm_pmu->type) {
+			/*
+			 * Variable perf_event_attr::config is assigned to
+			 * ETMv3/PTM.  The bit fields have been made to match
+			 * the ETMv3.5 ETRMCR register specification.  See the
+			 * PMU_FORMAT_ATTR() declarations in
+			 * drivers/hwtracing/coresight/coresight-perf.c for
+			 * details.
+			 */
+			config = evsel->attr.config;
+			break;
+		}
+	}
+
+	return config;
+}
+
+static size_t
+cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+		      struct perf_evlist *evlist __maybe_unused)
+{
+	int i;
+	int etmv3 = 0, etmv4 = 0;
+	const struct cpu_map *cpus = evlist->cpus;
+
+	/* cpu map is not empty, we have specific CPUs to work with */
+	if (!cpu_map__empty(cpus)) {
+		for (i = 0; i < cpu_map__nr(cpus); i++) {
+			if (cs_etm_is_etmv4(itr, cpus->map[i]))
+				etmv4++;
+			else
+				etmv3++;
+		}
+	} else {
+		/* get configuration for all CPUs in the system */
+		for (i = 0; i < cpu__max_cpu(); i++) {
+			if (cs_etm_is_etmv4(itr, i))
+				etmv4++;
+			else
+				etmv3++;
+		}
+	}
+
+	return (CS_ETM_HEADER_SIZE +
+	       (etmv4 * CS_ETMV4_PRIV_SIZE) +
+	       (etmv3 * CS_ETMV3_PRIV_SIZE));
+}
+
+static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
+	[CS_ETM_ETMCCER]	= "mgmt/etmccer",
+	[CS_ETM_ETMIDR]		= "mgmt/etmidr",
+};
+
+static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
+	[CS_ETMV4_TRCIDR0]		= "trcidr/trcidr0",
+	[CS_ETMV4_TRCIDR1]		= "trcidr/trcidr1",
+	[CS_ETMV4_TRCIDR2]		= "trcidr/trcidr2",
+	[CS_ETMV4_TRCIDR8]		= "trcidr/trcidr8",
+	[CS_ETMV4_TRCAUTHSTATUS]	= "mgmt/trcauthstatus",
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu)
+{
+	bool ret = false;
+	char path[PATH_MAX];
+	int scan;
+	unsigned int val;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	/* Take any of the RO files for ETMv4 and see if it present */
+	snprintf(path, PATH_MAX, "cpu%d/%s",
+		 cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+	scan = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
+
+	/* The file was read successfully, we have a winner */
+	if (scan == 1)
+		ret = true;
+
+	return ret;
+}
+
+static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path)
+{
+	char pmu_path[PATH_MAX];
+	int scan;
+	unsigned int val = 0;
+
+	/* Get RO metadata from sysfs */
+	snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path);
+
+	scan = perf_pmu__scan_file(pmu, pmu_path, "%x", &val);
+	if (scan != 1)
+		pr_err("%s: error reading: %s\n", __func__, pmu_path);
+
+	return val;
+}
+
+static void cs_etm_get_metadata(int cpu, u32 *offset,
+				struct auxtrace_record *itr,
+				struct auxtrace_info_event *info)
+{
+	u32 increment;
+	u64 magic;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	/* first see what kind of tracer this cpu is affined to */
+	if (cs_etm_is_etmv4(itr, cpu)) {
+		magic = __perf_cs_etmv4_magic;
+		/* Get trace configuration register */
+		info->priv[*offset + CS_ETMV4_TRCCONFIGR] =
+						cs_etm_get_config(itr);
+		/* Get traceID from the framework */
+		info->priv[*offset + CS_ETMV4_TRCTRACEIDR] =
+						coresight_get_trace_id(cpu);
+		/* Get read-only information from sysFS */
+		info->priv[*offset + CS_ETMV4_TRCIDR0] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+		info->priv[*offset + CS_ETMV4_TRCIDR1] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR1]);
+		info->priv[*offset + CS_ETMV4_TRCIDR2] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
+		info->priv[*offset + CS_ETMV4_TRCIDR8] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
+		info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv4_ro
+				      [CS_ETMV4_TRCAUTHSTATUS]);
+
+		/* How much space was used */
+		increment = CS_ETMV4_PRIV_MAX;
+	} else {
+		magic = __perf_cs_etmv3_magic;
+		/* Get configuration register */
+		info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
+		/* Get traceID from the framework */
+		info->priv[*offset + CS_ETM_ETMTRACEIDR] =
+						coresight_get_trace_id(cpu);
+		/* Get read-only information from sysFS */
+		info->priv[*offset + CS_ETM_ETMCCER] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv3_ro[CS_ETM_ETMCCER]);
+		info->priv[*offset + CS_ETM_ETMIDR] =
+			cs_etm_get_ro(cs_etm_pmu, cpu,
+				      metadata_etmv3_ro[CS_ETM_ETMIDR]);
+
+		/* How much space was used */
+		increment = CS_ETM_PRIV_MAX;
+	}
+
+	/* Build generic header portion */
+	info->priv[*offset + CS_ETM_MAGIC] = magic;
+	info->priv[*offset + CS_ETM_CPU] = cpu;
+	/* Where the next CPU entry should start from */
+	*offset += increment;
+}
+
+static int cs_etm_info_fill(struct auxtrace_record *itr,
+			    struct perf_session *session,
+			    struct auxtrace_info_event *info,
+			    size_t priv_size)
+{
+	int i;
+	u32 offset;
+	u64 nr_cpu, type;
+	const struct cpu_map *cpus = session->evlist->cpus;
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+	if (priv_size != cs_etm_info_priv_size(itr, session->evlist))
+		return -EINVAL;
+
+	if (!session->evlist->nr_mmaps)
+		return -EINVAL;
+
+	/* If the cpu_map is empty all CPUs are involved */
+	nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus);
+	/* Get PMU type as dynamically assigned by the core */
+	type = cs_etm_pmu->type;
+
+	/* First fill out the session header */
+	info->type = PERF_AUXTRACE_CS_ETM;
+	info->priv[CS_HEADER_VERSION_0] = 0;
+	info->priv[CS_PMU_TYPE_CPUS] = type << 32;
+	info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu;
+	info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;
+
+	offset = CS_ETM_SNAPSHOT + 1;
+
+	/* cpu map is not empty, we have specific CPUs to work with */
+	if (!cpu_map__empty(cpus)) {
+		for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++)
+			cs_etm_get_metadata(cpus->map[i], &offset, itr, info);
+	} else {
+		/* get configuration for all CPUs in the system */
+		for (i = 0; i < cpu__max_cpu(); i++)
+			cs_etm_get_metadata(i, &offset, itr, info);
+	}
+
+	return 0;
+}
+
+static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
+				int idx, struct auxtrace_mmap *mm,
+				unsigned char *data __maybe_unused,
+				u64 *head, u64 *old)
+{
+	pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
+		  __func__, idx, (size_t)*old, (size_t)*head, mm->len);
+
+	*old = *head;
+	*head += mm->len;
+
+	return 0;
+}
+
+static int cs_etm_snapshot_start(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evsel__disable(evsel);
+	}
+	return -EINVAL;
+}
+
+static int cs_etm_snapshot_finish(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evsel__enable(evsel);
+	}
+	return -EINVAL;
+}
+
+static u64 cs_etm_reference(struct auxtrace_record *itr __maybe_unused)
+{
+	return (((u64) rand() <<  0) & 0x00000000FFFFFFFFull) |
+		(((u64) rand() << 32) & 0xFFFFFFFF00000000ull);
+}
+
+static void cs_etm_recording_free(struct auxtrace_record *itr)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	free(ptr);
+}
+
+static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
+{
+	struct cs_etm_recording *ptr =
+			container_of(itr, struct cs_etm_recording, itr);
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(ptr->evlist, evsel) {
+		if (evsel->attr.type == ptr->cs_etm_pmu->type)
+			return perf_evlist__enable_event_idx(ptr->evlist,
+							     evsel, idx);
+	}
+
+	return -EINVAL;
+}
+
+struct auxtrace_record *cs_etm_record_init(int *err)
+{
+	struct perf_pmu *cs_etm_pmu;
+	struct cs_etm_recording *ptr;
+
+	cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+	if (!cs_etm_pmu) {
+		*err = -EINVAL;
+		goto out;
+	}
+
+	ptr = zalloc(sizeof(struct cs_etm_recording));
+	if (!ptr) {
+		*err = -ENOMEM;
+		goto out;
+	}
+
+	ptr->cs_etm_pmu			= cs_etm_pmu;
+	ptr->itr.parse_snapshot_options	= cs_etm_parse_snapshot_options;
+	ptr->itr.recording_options	= cs_etm_recording_options;
+	ptr->itr.info_priv_size		= cs_etm_info_priv_size;
+	ptr->itr.info_fill		= cs_etm_info_fill;
+	ptr->itr.find_snapshot		= cs_etm_find_snapshot;
+	ptr->itr.snapshot_start		= cs_etm_snapshot_start;
+	ptr->itr.snapshot_finish	= cs_etm_snapshot_finish;
+	ptr->itr.reference		= cs_etm_reference;
+	ptr->itr.free			= cs_etm_recording_free;
+	ptr->itr.read_finish		= cs_etm_read_finish;
+
+	*err = 0;
+	return &ptr->itr;
+out:
+	return NULL;
+}
+
+static FILE *cs_device__open_file(const char *name)
+{
+	struct stat st;
+	char path[PATH_MAX];
+	const char *sysfs;
+
+	sysfs = sysfs__mountpoint();
+	if (!sysfs)
+		return NULL;
+
+	snprintf(path, PATH_MAX,
+		 "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name);
+
+	printf("path: %s\n", path);
+
+	if (stat(path, &st) < 0)
+		return NULL;
+
+	return fopen(path, "w");
+
+}
+
+static __attribute__((format(printf, 2, 3)))
+int cs_device__print_file(const char *name, const char *fmt, ...)
+{
+	va_list args;
+	FILE *file;
+	int ret = -EINVAL;
+
+	va_start(args, fmt);
+	file = cs_device__open_file(name);
+	if (file) {
+		ret = vfprintf(file, fmt, args);
+		fclose(file);
+	}
+	va_end(args);
+	return ret;
+}
+
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term)
+{
+	int ret;
+	char enable_sink[ENABLE_SINK_MAX];
+
+	snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s",
+		 term->val.drv_cfg, "enable_sink");
+
+	ret = cs_device__print_file(enable_sink, "%d", 1);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h
new file mode 100644
index 0000000..5256741
--- /dev/null
+++ b/tools/perf/arch/arm/util/cs-etm.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef INCLUDE__PERF_CS_ETM_H__
+#define INCLUDE__PERF_CS_ETM_H__
+
+#include "../../util/evsel.h"
+
+struct auxtrace_record *cs_etm_record_init(int *err);
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term);
+
+#endif
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
new file mode 100644
index 0000000..98d6739
--- /dev/null
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <linux/coresight-pmu.h>
+#include <linux/perf_event.h>
+
+#include "cs-etm.h"
+#include "../../util/pmu.h"
+
+struct perf_event_attr
+*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+	if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
+		/* add ETM default config here */
+		pmu->selectable = true;
+		pmu->set_drv_config = cs_etm_set_drv_config;
+	}
+#endif
+	return NULL;
+}
diff --git a/tools/perf/arch/arm64/include/dwarf-regs-table.h b/tools/perf/arch/arm64/include/dwarf-regs-table.h
new file mode 100644
index 0000000..2675936
--- /dev/null
+++ b/tools/perf/arch/arm64/include/dwarf-regs-table.h
@@ -0,0 +1,13 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const aarch64_regstr_tbl[] = {
+	"%r0", "%r1", "%r2", "%r3", "%r4",
+	"%r5", "%r6", "%r7", "%r8", "%r9",
+	"%r10", "%r11", "%r12", "%r13", "%r14",
+	"%r15", "%r16", "%r17", "%r18", "%r19",
+	"%r20", "%r21", "%r22", "%r23", "%r24",
+	"%r25", "%r26", "%r27", "%r28", "%r29",
+	"%lr", "%sp",
+};
+#endif
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 02f41db..cef6fb3 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,2 +1,6 @@
 libperf-$(CONFIG_DWARF)     += dwarf-regs.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+
+libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
+			      ../../arm/util/auxtrace.o \
+			      ../../arm/util/cs-etm.o
diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build
index 54afe4a..db52fa2 100644
--- a/tools/perf/arch/powerpc/Build
+++ b/tools/perf/arch/powerpc/Build
@@ -1 +1,2 @@
 libperf-y += util/
+libperf-y += tests/
diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h
new file mode 100644
index 0000000..84d8ded
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/arch-tests.h
@@ -0,0 +1,13 @@
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/dwarf-regs-table.h b/tools/perf/arch/powerpc/include/dwarf-regs-table.h
new file mode 100644
index 0000000..db4730f
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/dwarf-regs-table.h
@@ -0,0 +1,27 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+/*
+ * Reference:
+ * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html
+ * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf
+ */
+#define REG_DWARFNUM_NAME(reg, idx)	[idx] = "%" #reg
+
+static const char * const powerpc_regstr_tbl[] = {
+	"%gpr0", "%gpr1", "%gpr2", "%gpr3", "%gpr4",
+	"%gpr5", "%gpr6", "%gpr7", "%gpr8", "%gpr9",
+	"%gpr10", "%gpr11", "%gpr12", "%gpr13", "%gpr14",
+	"%gpr15", "%gpr16", "%gpr17", "%gpr18", "%gpr19",
+	"%gpr20", "%gpr21", "%gpr22", "%gpr23", "%gpr24",
+	"%gpr25", "%gpr26", "%gpr27", "%gpr28", "%gpr29",
+	"%gpr30", "%gpr31",
+	REG_DWARFNUM_NAME(msr,   66),
+	REG_DWARFNUM_NAME(ctr,   109),
+	REG_DWARFNUM_NAME(link,  108),
+	REG_DWARFNUM_NAME(xer,   101),
+	REG_DWARFNUM_NAME(dar,   119),
+	REG_DWARFNUM_NAME(dsisr, 118),
+};
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index 75de0e9..c12f4e8 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -5,6 +5,8 @@
 #include <linux/types.h>
 #include <asm/perf_regs.h>
 
+void perf_regs_load(u64 *regs);
+
 #define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
 #define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
 #ifdef __powerpc64__
diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build
new file mode 100644
index 0000000..d827ef3
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/Build
@@ -0,0 +1,4 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c b/tools/perf/arch/powerpc/tests/arch-tests.c
new file mode 100644
index 0000000..e24f462
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/arch-tests.c
@@ -0,0 +1,15 @@
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+	{
+		.desc = "Test dwarf unwind",
+		.func = test__dwarf_unwind,
+	},
+#endif
+	{
+		.func = NULL,
+	},
+};
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
new file mode 100644
index 0000000..0bac313
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+			 struct thread *thread, u64 *regs)
+{
+	struct stack_dump *stack = &sample->user_stack;
+	struct map *map;
+	unsigned long sp;
+	u64 stack_size, *buf;
+
+	buf = malloc(STACK_SIZE);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
+
+	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	if (!map) {
+		pr_debug("failed to get stack map\n");
+		free(buf);
+		return -1;
+	}
+
+	stack_size = map->end - sp;
+	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+	memcpy(buf, (void *) sp, stack_size);
+	stack->data = (char *) buf;
+	stack->size = stack_size;
+	return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+			     struct thread *thread)
+{
+	struct regs_dump *regs = &sample->user_regs;
+	u64 *buf;
+
+	buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	perf_regs_load(buf);
+	regs->abi  = PERF_SAMPLE_REGS_ABI;
+	regs->regs = buf;
+	regs->mask = PERF_REGS_MASK;
+
+	return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/powerpc/tests/regs_load.S b/tools/perf/arch/powerpc/tests/regs_load.S
new file mode 100644
index 0000000..d76c9a3
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/regs_load.S
@@ -0,0 +1,94 @@
+#include <linux/linkage.h>
+
+/* Offset is based on macros from arch/powerpc/include/uapi/asm/ptrace.h. */
+#define R0	 0
+#define R1	 1 * 8
+#define R2	 2 * 8
+#define R3	 3 * 8
+#define R4	 4 * 8
+#define R5	 5 * 8
+#define R6	 6 * 8
+#define R7	 7 * 8
+#define R8	 8 * 8
+#define R9	 9 * 8
+#define R10	10 * 8
+#define R11	11 * 8
+#define R12	12 * 8
+#define R13	13 * 8
+#define R14	14 * 8
+#define R15	15 * 8
+#define R16	16 * 8
+#define R17	17 * 8
+#define R18	18 * 8
+#define R19	19 * 8
+#define R20	20 * 8
+#define R21	21 * 8
+#define R22	22 * 8
+#define R23	23 * 8
+#define R24	24 * 8
+#define R25	25 * 8
+#define R26	26 * 8
+#define R27	27 * 8
+#define R28	28 * 8
+#define R29	29 * 8
+#define R30	30 * 8
+#define R31	31 * 8
+#define NIP	32 * 8
+#define CTR	35 * 8
+#define LINK	36 * 8
+#define XER	37 * 8
+
+.globl perf_regs_load
+perf_regs_load:
+	std 0, R0(3)
+	std 1, R1(3)
+	std 2, R2(3)
+	std 3, R3(3)
+	std 4, R4(3)
+	std 5, R5(3)
+	std 6, R6(3)
+	std 7, R7(3)
+	std 8, R8(3)
+	std 9, R9(3)
+	std 10, R10(3)
+	std 11, R11(3)
+	std 12, R12(3)
+	std 13, R13(3)
+	std 14, R14(3)
+	std 15, R15(3)
+	std 16, R16(3)
+	std 17, R17(3)
+	std 18, R18(3)
+	std 19, R19(3)
+	std 20, R20(3)
+	std 21, R21(3)
+	std 22, R22(3)
+	std 23, R23(3)
+	std 24, R24(3)
+	std 25, R25(3)
+	std 26, R26(3)
+	std 27, R27(3)
+	std 28, R28(3)
+	std 29, R29(3)
+	std 30, R30(3)
+	std 31, R31(3)
+
+	/* store NIP */
+	mflr 4
+	std 4, NIP(3)
+
+	/* Store LR */
+	std 4, LINK(3)
+
+	/* Store XER */
+	mfxer 4
+	std 4, XER(3)
+
+	/* Store CTR */
+	mfctr 4
+	std 4, CTR(3)
+
+	/* Restore original value of r4 */
+	ld 4, R4(3)
+
+	blr
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index c6d0f91..ed9d5d1 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -54,10 +54,6 @@
 #endif
 
 #if defined(_CALL_ELF) && _CALL_ELF == 2
-bool arch__prefers_symtab(void)
-{
-	return true;
-}
 
 #ifdef HAVE_LIBELF_SUPPORT
 void arch__sym_update(struct symbol *s, GElf_Sym *sym)
@@ -100,4 +96,29 @@
 			tev->point.offset += lep_offset;
 	}
 }
+
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs)
+{
+	struct probe_trace_event *tev;
+	struct map *map;
+	struct symbol *sym = NULL;
+	struct rb_node *tmp;
+	int i = 0;
+
+	map = get_target_map(pev->target, pev->uprobes);
+	if (!map || map__load(map) < 0)
+		return;
+
+	for (i = 0; i < ntevs; i++) {
+		tev = &pev->tevs[i];
+		map__for_each_symbol(map, sym, tmp) {
+			if (map->unmap_ip(map, sym->start) == tev->point.address)
+				arch__fix_tev_from_maps(pev, tev, map, sym);
+		}
+	}
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
 #endif
diff --git a/tools/perf/arch/s390/include/dwarf-regs-table.h b/tools/perf/arch/s390/include/dwarf-regs-table.h
new file mode 100644
index 0000000..9da74a9
--- /dev/null
+++ b/tools/perf/arch/s390/include/dwarf-regs-table.h
@@ -0,0 +1,8 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const s390_regstr_tbl[] = {
+	"%r0", "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+	"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+};
+#endif
diff --git a/tools/perf/arch/sh/include/dwarf-regs-table.h b/tools/perf/arch/sh/include/dwarf-regs-table.h
new file mode 100644
index 0000000..3a2deaf
--- /dev/null
+++ b/tools/perf/arch/sh/include/dwarf-regs-table.h
@@ -0,0 +1,25 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+const char * const sh_regstr_tbl[] = {
+	"r0",
+	"r1",
+	"r2",
+	"r3",
+	"r4",
+	"r5",
+	"r6",
+	"r7",
+	"r8",
+	"r9",
+	"r10",
+	"r11",
+	"r12",
+	"r13",
+	"r14",
+	"r15",
+	"pc",
+	"pr",
+};
+
+#endif
diff --git a/tools/perf/arch/sparc/include/dwarf-regs-table.h b/tools/perf/arch/sparc/include/dwarf-regs-table.h
new file mode 100644
index 0000000..12c0761
--- /dev/null
+++ b/tools/perf/arch/sparc/include/dwarf-regs-table.h
@@ -0,0 +1,18 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const sparc_regstr_tbl[] = {
+	"%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+	"%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+	"%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+	"%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+	"%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+	"%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+	"%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+	"%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+	"%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+	"%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+	"%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+	"%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+#endif
diff --git a/tools/perf/arch/x86/include/dwarf-regs-table.h b/tools/perf/arch/x86/include/dwarf-regs-table.h
new file mode 100644
index 0000000..9b5e5cb
--- /dev/null
+++ b/tools/perf/arch/x86/include/dwarf-regs-table.h
@@ -0,0 +1,14 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const x86_32_regstr_tbl[] = {
+	"%ax", "%cx", "%dx", "%bx", "$stack",/* Stack address instead of %sp */
+	"%bp", "%si", "%di",
+};
+
+static const char * const x86_64_regstr_tbl[] = {
+	"%ax", "%dx", "%cx", "%bx", "%si", "%di",
+	"%bp", "%sp", "%r8", "%r9", "%r10", "%r11",
+	"%r12", "%r13", "%r14", "%r15",
+};
+#endif
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index fb51457..90fa228 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -62,6 +62,7 @@
 	size_t				snapshot_ref_buf_size;
 	int				snapshot_ref_cnt;
 	struct intel_pt_snapshot_ref	*snapshot_refs;
+	size_t				priv_size;
 };
 
 static int intel_pt_parse_terms_with_default(struct list_head *formats,
@@ -273,11 +274,37 @@
 	return attr;
 }
 
-static size_t
-intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused,
-			struct perf_evlist *evlist __maybe_unused)
+static const char *intel_pt_find_filter(struct perf_evlist *evlist,
+					struct perf_pmu *intel_pt_pmu)
 {
-	return INTEL_PT_AUXTRACE_PRIV_SIZE;
+	struct perf_evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel->attr.type == intel_pt_pmu->type)
+			return evsel->filter;
+	}
+
+	return NULL;
+}
+
+static size_t intel_pt_filter_bytes(const char *filter)
+{
+	size_t len = filter ? strlen(filter) : 0;
+
+	return len ? roundup(len + 1, 8) : 0;
+}
+
+static size_t
+intel_pt_info_priv_size(struct auxtrace_record *itr, struct perf_evlist *evlist)
+{
+	struct intel_pt_recording *ptr =
+			container_of(itr, struct intel_pt_recording, itr);
+	const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu);
+
+	ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
+			 intel_pt_filter_bytes(filter);
+
+	return ptr->priv_size;
 }
 
 static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
@@ -302,9 +329,13 @@
 	bool cap_user_time_zero = false, per_cpu_mmaps;
 	u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
 	u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
+	unsigned long max_non_turbo_ratio;
+	size_t filter_str_len;
+	const char *filter;
+	u64 *info;
 	int err;
 
-	if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE)
+	if (priv_size != ptr->priv_size)
 		return -EINVAL;
 
 	intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
@@ -317,6 +348,13 @@
 
 	intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
 
+	if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
+				"%lu", &max_non_turbo_ratio) != 1)
+		max_non_turbo_ratio = 0;
+
+	filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
+	filter_str_len = filter ? strlen(filter) : 0;
+
 	if (!session->evlist->nr_mmaps)
 		return -EINVAL;
 
@@ -351,6 +389,17 @@
 	auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
 	auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
 	auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
+	auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
+	auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len;
+
+	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+
+	if (filter_str_len) {
+		size_t len = intel_pt_filter_bytes(filter);
+
+		strncpy((char *)info, filter, len);
+		info += len >> 3;
+	}
 
 	return 0;
 }
@@ -501,7 +550,7 @@
 	struct intel_pt_recording *ptr =
 			container_of(itr, struct intel_pt_recording, itr);
 	struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
-	bool have_timing_info;
+	bool have_timing_info, need_immediate = false;
 	struct perf_evsel *evsel, *intel_pt_evsel = NULL;
 	const struct cpu_map *cpus = evlist->cpus;
 	bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
@@ -655,6 +704,7 @@
 				ptr->have_sched_switch = 3;
 			} else {
 				opts->record_switch_events = true;
+				need_immediate = true;
 				if (cpu_wide)
 					ptr->have_sched_switch = 3;
 				else
@@ -700,6 +750,9 @@
 		tracking_evsel->attr.freq = 0;
 		tracking_evsel->attr.sample_period = 1;
 
+		if (need_immediate)
+			tracking_evsel->immediate = true;
+
 		/* In per-cpu case, always need the time of mmap events etc */
 		if (!cpu_map__empty(cpus)) {
 			perf_evsel__set_sample_bit(tracking_evsel, TIME);
diff --git a/tools/perf/arch/xtensa/include/dwarf-regs-table.h b/tools/perf/arch/xtensa/include/dwarf-regs-table.h
new file mode 100644
index 0000000..aa0444a
--- /dev/null
+++ b/tools/perf/arch/xtensa/include/dwarf-regs-table.h
@@ -0,0 +1,8 @@
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const xtensa_regstr_tbl[] = {
+	"a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+	"a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+#endif
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
index f96e22e..2b9705a 100644
--- a/tools/perf/bench/futex-requeue.c
+++ b/tools/perf/bench/futex-requeue.c
@@ -16,6 +16,7 @@
 #include <subcmd/parse-options.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/time64.h>
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
@@ -62,7 +63,7 @@
 	printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
 	       requeued_avg,
 	       nthreads,
-	       requeuetime_avg/1e3,
+	       requeuetime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
 }
 
@@ -184,7 +185,7 @@
 
 		if (!silent) {
 			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
-			       j + 1, nrequeued, nthreads, runtime.tv_usec/1e3);
+			       j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
 		}
 
 		/* everybody should be blocked on futex2, wake'em up */
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
index 4a2ecd7..2c8fa67 100644
--- a/tools/perf/bench/futex-wake-parallel.c
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -15,6 +15,7 @@
 #include <subcmd/parse-options.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/time64.h>
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
@@ -156,7 +157,7 @@
 
 	printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
 	       "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
-	       nblocked_threads, waketime_avg/1e3,
+	       nblocked_threads, waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
 
@@ -172,7 +173,7 @@
 	printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
 	       wakeup_avg,
 	       nblocked_threads,
-	       waketime_avg/1e3,
+	       waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
 
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 87d8f4f..e246b1b 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -16,6 +16,7 @@
 #include <subcmd/parse-options.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
+#include <linux/time64.h>
 #include <errno.h>
 #include "bench.h"
 #include "futex.h"
@@ -81,7 +82,7 @@
 	printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
 	       wakeup_avg,
 	       nthreads,
-	       waketime_avg/1e3,
+	       waketime_avg / USEC_PER_MSEC,
 	       rel_stddev_stats(waketime_stddev, waketime_avg));
 }
 
@@ -182,7 +183,7 @@
 
 		if (!silent) {
 			printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
-			       j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
+			       j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
 		}
 
 		for (i = 0; i < nthreads; i++) {
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 2b54d0f..c684910 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -21,6 +21,7 @@
 #include <string.h>
 #include <sys/time.h>
 #include <errno.h>
+#include <linux/time64.h>
 
 #define K 1024
 
@@ -89,7 +90,7 @@
 
 static double timeval2double(struct timeval *ts)
 {
-	return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
+	return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
 }
 
 #define print_bps(x) do {						\
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index f7f5300..8efe904 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -30,6 +30,7 @@
 #include <sys/wait.h>
 #include <sys/prctl.h>
 #include <sys/types.h>
+#include <linux/time64.h>
 
 #include <numa.h>
 #include <numaif.h>
@@ -1004,7 +1005,7 @@
 	if (strong && process_groups == g->p.nr_proc) {
 		if (!*convergence) {
 			*convergence = runtime_ns_max;
-			tprintf(" (%6.1fs converged)\n", *convergence/1e9);
+			tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
 			if (g->p.measure_convergence) {
 				g->all_converged = true;
 				g->stop_work = true;
@@ -1012,7 +1013,7 @@
 		}
 	} else {
 		if (*convergence) {
-			tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
+			tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
 			*convergence = 0;
 		}
 		tprintf("\n");
@@ -1022,7 +1023,7 @@
 static void show_summary(double runtime_ns_max, int l, double *convergence)
 {
 	tprintf("\r #  %5.1f%%  [%.1f mins]",
-		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
+		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
 
 	calc_convergence(runtime_ns_max, convergence);
 
@@ -1179,8 +1180,8 @@
 
 		if (details >= 3) {
 			timersub(&stop, &start, &diff);
-			runtime_ns_max = diff.tv_sec * 1000000000;
-			runtime_ns_max += diff.tv_usec * 1000;
+			runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+			runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
 
 			if (details >= 0) {
 				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
@@ -1192,23 +1193,23 @@
 			continue;
 
 		timersub(&stop, &start0, &diff);
-		runtime_ns_max = diff.tv_sec * 1000000000ULL;
-		runtime_ns_max += diff.tv_usec * 1000ULL;
+		runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+		runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
 
 		show_summary(runtime_ns_max, l, &convergence);
 	}
 
 	gettimeofday(&stop, NULL);
 	timersub(&stop, &start0, &diff);
-	td->runtime_ns = diff.tv_sec * 1000000000ULL;
-	td->runtime_ns += diff.tv_usec * 1000ULL;
-	td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
+	td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
+	td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
+	td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9;
 
 	getrusage(RUSAGE_THREAD, &rusage);
-	td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
-	td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
-	td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
-	td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
+	td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
+	td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
+	td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
+	td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
 
 	free_data(thread_data, g->p.bytes_thread);
 
@@ -1469,7 +1470,7 @@
 	}
 	/* Wait for all the threads to start up: */
 	while (g->nr_tasks_started != g->p.nr_tasks)
-		usleep(1000);
+		usleep(USEC_PER_MSEC);
 
 	BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
 
@@ -1488,9 +1489,9 @@
 
 		timersub(&stop, &start, &diff);
 
-		startup_sec = diff.tv_sec * 1000000000.0;
-		startup_sec += diff.tv_usec * 1000.0;
-		startup_sec /= 1e9;
+		startup_sec = diff.tv_sec * NSEC_PER_SEC;
+		startup_sec += diff.tv_usec * NSEC_PER_USEC;
+		startup_sec /= NSEC_PER_SEC;
 
 		tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
 		tprintf(" #\n");
@@ -1529,14 +1530,14 @@
 	tprintf("\n ###\n");
 	tprintf("\n");
 
-	runtime_sec_max = diff.tv_sec * 1000000000.0;
-	runtime_sec_max += diff.tv_usec * 1000.0;
-	runtime_sec_max /= 1e9;
+	runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
+	runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
+	runtime_sec_max /= NSEC_PER_SEC;
 
-	runtime_sec_min = runtime_ns_min/1e9;
+	runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
 
 	bytes = g->bytes_done;
-	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
+	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
 
 	if (g->p.measure_convergence) {
 		print_res(name, runtime_sec_max,
@@ -1562,7 +1563,7 @@
 	print_res(name, bytes / 1e9,
 		"GB,", "data-total",		"GB data processed, total");
 
-	print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
+	print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
 		"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
 
 	print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
@@ -1581,9 +1582,9 @@
 				snprintf(tname, 32, "process%d:thread%d", p, t);
 				print_res(tname, td->speed_gbs,
 					"GB/sec",	"thread-speed", "GB/sec/thread speed");
-				print_res(tname, td->system_time_ns / 1e9,
+				print_res(tname, td->system_time_ns / NSEC_PER_SEC,
 					"secs",	"thread-system-time", "system CPU time/thread");
-				print_res(tname, td->user_time_ns / 1e9,
+				print_res(tname, td->user_time_ns / NSEC_PER_SEC,
 					"secs",	"thread-user-time", "user CPU time/thread");
 			}
 		}
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index bfaf950..6a111e7 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -29,6 +29,7 @@
 #include <poll.h>
 #include <limits.h>
 #include <err.h>
+#include <linux/time64.h>
 
 #define DATASIZE 100
 
@@ -312,11 +313,11 @@
 		       thread_mode ? "threads" : "processes");
 		printf(" %14s: %lu.%03lu [sec]\n", "Total time",
 		       diff.tv_sec,
-		       (unsigned long) (diff.tv_usec/1000));
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 	case BENCH_FORMAT_SIMPLE:
 		printf("%lu.%03lu\n", diff.tv_sec,
-		       (unsigned long) (diff.tv_usec/1000));
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 	default:
 		/* reaching here is something disaster */
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 1dc2d13..2243f01 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -25,6 +25,7 @@
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/syscall.h>
+#include <linux/time64.h>
 
 #include <pthread.h>
 
@@ -153,24 +154,24 @@
 		printf("# Executed %d pipe operations between two %s\n\n",
 			loops, threaded ? "threads" : "processes");
 
-		result_usec = diff.tv_sec * 1000000;
+		result_usec = diff.tv_sec * USEC_PER_SEC;
 		result_usec += diff.tv_usec;
 
 		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
 		       diff.tv_sec,
-		       (unsigned long) (diff.tv_usec/1000));
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 
 		printf(" %14lf usecs/op\n",
 		       (double)result_usec / (double)loops);
 		printf(" %14d ops/sec\n",
 		       (int)((double)loops /
-			     ((double)result_usec / (double)1000000)));
+			     ((double)result_usec / (double)USEC_PER_SEC)));
 		break;
 
 	case BENCH_FORMAT_SIMPLE:
 		printf("%lu.%03lu\n",
 		       diff.tv_sec,
-		       (unsigned long) (diff.tv_usec / 1000));
+		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 
 	default:
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 9c1034d..ebb6283 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -30,6 +30,7 @@
 #include "util/tool.h"
 #include "util/data.h"
 #include "arch/common.h"
+#include "util/block-range.h"
 
 #include <dlfcn.h>
 #include <linux/bitmap.h>
@@ -46,6 +47,103 @@
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 };
 
+/*
+ * Given one basic block:
+ *
+ *	from	to		branch_i
+ *	* ----> *
+ *		|
+ *		| block
+ *		v
+ *		* ----> *
+ *		from	to	branch_i+1
+ *
+ * where the horizontal are the branches and the vertical is the executed
+ * block of instructions.
+ *
+ * We count, for each 'instruction', the number of blocks that covered it as
+ * well as count the ratio each branch is taken.
+ *
+ * We can do this without knowing the actual instruction stream by keeping
+ * track of the address ranges. We break down ranges such that there is no
+ * overlap and iterate from the start until the end.
+ *
+ * @acme: once we parse the objdump output _before_ processing the samples,
+ * we can easily fold the branch.cycles IPC bits in.
+ */
+static void process_basic_block(struct addr_map_symbol *start,
+				struct addr_map_symbol *end,
+				struct branch_flags *flags)
+{
+	struct symbol *sym = start->sym;
+	struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
+	struct block_range_iter iter;
+	struct block_range *entry;
+
+	/*
+	 * Sanity; NULL isn't executable and the CPU cannot execute backwards
+	 */
+	if (!start->addr || start->addr > end->addr)
+		return;
+
+	iter = block_range__create(start->addr, end->addr);
+	if (!block_range_iter__valid(&iter))
+		return;
+
+	/*
+	 * First block in range is a branch target.
+	 */
+	entry = block_range_iter(&iter);
+	assert(entry->is_target);
+	entry->entry++;
+
+	do {
+		entry = block_range_iter(&iter);
+
+		entry->coverage++;
+		entry->sym = sym;
+
+		if (notes)
+			notes->max_coverage = max(notes->max_coverage, entry->coverage);
+
+	} while (block_range_iter__next(&iter));
+
+	/*
+	 * Last block in rage is a branch.
+	 */
+	entry = block_range_iter(&iter);
+	assert(entry->is_branch);
+	entry->taken++;
+	if (flags->predicted)
+		entry->pred++;
+}
+
+static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
+				 struct perf_sample *sample)
+{
+	struct addr_map_symbol *prev = NULL;
+	struct branch_info *bi;
+	int i;
+
+	if (!bs || !bs->nr)
+		return;
+
+	bi = sample__resolve_bstack(sample, al);
+	if (!bi)
+		return;
+
+	for (i = bs->nr - 1; i >= 0; i--) {
+		/*
+		 * XXX filter against symbol
+		 */
+		if (prev)
+			process_basic_block(prev, &bi[i].from, &bi[i].flags);
+		prev = &bi[i].to;
+	}
+
+	free(bi);
+}
+
 static int perf_evsel__add_sample(struct perf_evsel *evsel,
 				  struct perf_sample *sample,
 				  struct addr_location *al,
@@ -72,6 +170,12 @@
 		return 0;
 	}
 
+	/*
+	 * XXX filtered samples can still have branch entires pointing into our
+	 * symbol and are missed.
+	 */
+	process_branch_stack(sample->branch_stack, al, sample);
+
 	sample->period = 1;
 	sample->weight = 1;
 
@@ -204,8 +308,6 @@
 	struct perf_evsel *pos;
 	u64 total_nr_samples;
 
-	machines__set_symbol_filter(&session->machines, symbol__annotate_init);
-
 	if (ann->cpu_list) {
 		ret = perf_session__cpu_bitmap(session, ann->cpu_list,
 					       ann->cpu_bitmap);
@@ -367,7 +469,10 @@
 	if (annotate.session == NULL)
 		return -1;
 
-	symbol_conf.priv_size = sizeof(struct annotation);
+	ret = symbol__annotation_init();
+	if (ret < 0)
+		goto out_delete;
+
 	symbol_conf.try_vmlinux_path = true;
 
 	ret = symbol__init(&annotate.session->header.env);
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 21ee753..9ff0db4 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1033,7 +1033,9 @@
 }
 
 static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-		       struct hists *hists __maybe_unused)
+		       struct hists *hists __maybe_unused,
+		       int line __maybe_unused,
+		       int *span __maybe_unused)
 {
 	struct diff_hpp_fmt *dfmt =
 		container_of(fmt, struct diff_hpp_fmt, fmt);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4c..b9bc7e3 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -429,7 +429,7 @@
 	if (al.map != NULL) {
 		if (!al.map->dso->hit) {
 			al.map->dso->hit = 1;
-			if (map__load(al.map, NULL) >= 0) {
+			if (map__load(al.map) >= 0) {
 				dso__inject_build_id(al.map->dso, tool, machine);
 				/*
 				 * If this fails, too bad, let the other side
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index fdde1bd..d426dcb 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -330,7 +330,7 @@
 	}
 
 	kernel_map = machine__kernel_map(machine);
-	if (map__load(kernel_map, NULL) < 0) {
+	if (map__load(kernel_map) < 0) {
 		pr_err("cannot load kernel map\n");
 		return -ENOENT;
 	}
@@ -979,7 +979,7 @@
 		if (is_caller) {
 			addr = data->call_site;
 			if (!raw_ip)
-				sym = machine__find_kernel_function(machine, addr, &map, NULL);
+				sym = machine__find_kernel_function(machine, addr, &map);
 		} else
 			addr = data->ptr;
 
@@ -1043,8 +1043,7 @@
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite,
-						    &map, NULL);
+		sym = machine__find_kernel_function(machine, data->callsite, &map);
 		if (sym && sym->name)
 			caller = sym->name;
 		else
@@ -1086,8 +1085,7 @@
 		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
-		sym = machine__find_kernel_function(machine, data->callsite,
-						    &map, NULL);
+		sym = machine__find_kernel_function(machine, data->callsite, &map);
 		if (sym && sym->name)
 			caller = sym->name;
 		else
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 5e2127e..08fa88f 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -24,6 +24,7 @@
 #include <sys/timerfd.h>
 #endif
 
+#include <linux/time64.h>
 #include <termios.h>
 #include <semaphore.h>
 #include <pthread.h>
@@ -362,7 +363,7 @@
 		if (!skip_event(decode)) {
 			pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
 				 sample->time, sample->pid, vcpu_record->vcpu_id,
-				 decode, time_diff/1000);
+				 decode, time_diff / NSEC_PER_USEC);
 		}
 	}
 
@@ -608,15 +609,15 @@
 		pr_info("%10llu ", (unsigned long long)ecount);
 		pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
 		pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
-		pr_info("%9.2fus ", (double)min / 1e3);
-		pr_info("%9.2fus ", (double)max / 1e3);
-		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
+		pr_info("%9.2fus ", (double)min / NSEC_PER_USEC);
+		pr_info("%9.2fus ", (double)max / NSEC_PER_USEC);
+		pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount / NSEC_PER_USEC,
 			kvm_event_rel_stddev(vcpu, event));
 		pr_info("\n");
 	}
 
 	pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
-		kvm->total_count, kvm->total_time / 1e3);
+		kvm->total_count, kvm->total_time / (double)NSEC_PER_USEC);
 
 	if (kvm->lost_events)
 		pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index d608a2c..d1ce29b 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -88,6 +88,9 @@
 	if (mem->operation & MEM_OPERATION_LOAD)
 		perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
 
+	if (mem->operation & MEM_OPERATION_STORE)
+		perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+
 	if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
 		rec_argv[i++] = "-W";
 
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index ee5b421..f87996b 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -326,6 +326,11 @@
 	if (ret < 0)
 		goto out_cleanup;
 
+	if (params.command == 'D') {	/* it shows definition */
+		ret = show_probe_trace_events(pevs, npevs);
+		goto out_cleanup;
+	}
+
 	ret = apply_perf_probe_events(pevs, npevs);
 	if (ret < 0)
 		goto out_cleanup;
@@ -454,6 +459,14 @@
 	return ret;
 }
 
+#ifdef HAVE_DWARF_SUPPORT
+#define PROBEDEF_STR	\
+	"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT [[NAME=]ARG ...]"
+#else
+#define PROBEDEF_STR	"[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]"
+#endif
+
+
 static int
 __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 {
@@ -479,13 +492,7 @@
 			     opt_set_filter_with_command, DEFAULT_LIST_FILTER),
 	OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
 		     opt_set_filter_with_command),
-	OPT_CALLBACK('a', "add", NULL,
-#ifdef HAVE_DWARF_SUPPORT
-		"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
-		" [[NAME=]ARG ...]",
-#else
-		"[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]",
-#endif
+	OPT_CALLBACK('a', "add", NULL, PROBEDEF_STR,
 		"probe point definition, where\n"
 		"\t\tGROUP:\tGroup name (optional)\n"
 		"\t\tEVENT:\tEvent name\n"
@@ -503,6 +510,9 @@
 		"\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
 #endif
 		opt_add_probe_event),
+	OPT_CALLBACK('D', "definition", NULL, PROBEDEF_STR,
+		"Show trace event definition of given traceevent for k/uprobe_events.",
+		opt_add_probe_event),
 	OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
 		    " with existing name"),
 	OPT_CALLBACK('L', "line", NULL,
@@ -548,6 +558,7 @@
 
 	set_option_flag(options, 'a', "add", PARSE_OPT_EXCLUSIVE);
 	set_option_flag(options, 'd', "del", PARSE_OPT_EXCLUSIVE);
+	set_option_flag(options, 'D', "definition", PARSE_OPT_EXCLUSIVE);
 	set_option_flag(options, 'l', "list", PARSE_OPT_EXCLUSIVE);
 #ifdef HAVE_DWARF_SUPPORT
 	set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
@@ -600,6 +611,14 @@
 	 */
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 
+	/*
+	 * Except for --list, --del and --add, other command doesn't depend
+	 * nor change running kernel. So if user gives offline vmlinux,
+	 * ignore its buildid.
+	 */
+	if (!strchr("lda", params.command) && symbol_conf.vmlinux_name)
+		symbol_conf.ignore_vmlinux_buildid = true;
+
 	switch (params.command) {
 	case 'l':
 		if (params.uprobes) {
@@ -643,7 +662,9 @@
 			return ret;
 		}
 		break;
+	case 'D':
 	case 'a':
+
 		/* Ensure the last given target is used */
 		if (params.target && !params.target_used) {
 			pr_err("  Error: -x/-m must follow the probe definitions.\n");
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6355902..67d2a90 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -22,6 +22,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/debug.h"
+#include "util/drv_configs.h"
 #include "util/session.h"
 #include "util/tool.h"
 #include "util/symbol.h"
@@ -42,7 +43,7 @@
 #include <sched.h>
 #include <sys/mman.h>
 #include <asm/bug.h>
-
+#include <linux/time64.h>
 
 struct record {
 	struct perf_tool	tool;
@@ -96,7 +97,7 @@
 	*start = head;
 	while (true) {
 		if (evt_head - head >= (unsigned int)size) {
-			pr_debug("Finshed reading backward ring buffer: rewind\n");
+			pr_debug("Finished reading backward ring buffer: rewind\n");
 			if (evt_head - head > (unsigned int)size)
 				evt_head -= pheader->size;
 			*end = evt_head;
@@ -106,7 +107,7 @@
 		pheader = (struct perf_event_header *)(buf + (evt_head & mask));
 
 		if (pheader->size == 0) {
-			pr_debug("Finshed reading backward ring buffer: get start\n");
+			pr_debug("Finished reading backward ring buffer: get start\n");
 			*end = evt_head;
 			return 0;
 		}
@@ -383,6 +384,7 @@
 	struct perf_evlist *evlist = rec->evlist;
 	struct perf_session *session = rec->session;
 	struct record_opts *opts = &rec->opts;
+	struct perf_evsel_config_term *err_term;
 	int rc = 0;
 
 	perf_evlist__config(evlist, opts, &callchain_param);
@@ -412,6 +414,14 @@
 		goto out;
 	}
 
+	if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
+		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		      err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+		      str_error_r(errno, msg, sizeof(msg)));
+		rc = -1;
+		goto out;
+	}
+
 	rc = record__mmap(rec);
 	if (rc)
 		goto out;
@@ -954,7 +964,7 @@
 	}
 
 	if (opts->initial_delay) {
-		usleep(opts->initial_delay * 1000);
+		usleep(opts->initial_delay * USEC_PER_MSEC);
 		perf_evlist__enable(rec->evlist);
 	}
 
@@ -1563,29 +1573,39 @@
 	if (!rec->itr) {
 		rec->itr = auxtrace_record__init(rec->evlist, &err);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
 					      rec->opts.auxtrace_snapshot_opts);
 	if (err)
-		return err;
+		goto out;
+
+	/*
+	 * Allow aliases to facilitate the lookup of symbols for address
+	 * filters. Refer to auxtrace_parse_filters().
+	 */
+	symbol_conf.allow_aliases = true;
+
+	symbol__init(NULL);
+
+	err = auxtrace_parse_filters(rec->evlist);
+	if (err)
+		goto out;
 
 	if (dry_run)
-		return 0;
+		goto out;
 
 	err = bpf__setup_stdout(rec->evlist);
 	if (err) {
 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
 		pr_err("ERROR: Setup BPF stdout failed: %s\n",
 			 errbuf);
-		return err;
+		goto out;
 	}
 
 	err = -ENOMEM;
 
-	symbol__init(NULL);
-
 	if (symbol_conf.kptr_restrict)
 		pr_warning(
 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
@@ -1633,7 +1653,7 @@
 	if (rec->evlist->nr_entries == 0 &&
 	    perf_evlist__add_default(rec->evlist) < 0) {
 		pr_err("Not enough memory for event selector list\n");
-		goto out_symbol_exit;
+		goto out;
 	}
 
 	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
@@ -1653,7 +1673,7 @@
 		ui__error("%s", errbuf);
 
 		err = -saved_errno;
-		goto out_symbol_exit;
+		goto out;
 	}
 
 	err = -ENOMEM;
@@ -1662,7 +1682,7 @@
 
 	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
 	if (err)
-		goto out_symbol_exit;
+		goto out;
 
 	/*
 	 * We take all buildids when the file contains
@@ -1674,11 +1694,11 @@
 
 	if (record_opts__config(&rec->opts)) {
 		err = -EINVAL;
-		goto out_symbol_exit;
+		goto out;
 	}
 
 	err = __cmd_record(&record, argc, argv);
-out_symbol_exit:
+out:
 	perf_evlist__delete(rec->evlist);
 	symbol__exit();
 	auxtrace_record__free(rec->itr);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 949e5a1..6e88460 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -89,6 +89,10 @@
 		rep->queue_size = perf_config_u64(var, value);
 		return 0;
 	}
+	if (!strcmp(var, "report.sort_order")) {
+		default_sort_order = strdup(value);
+		return 0;
+	}
 
 	return 0;
 }
@@ -931,7 +935,6 @@
 
 	if (symbol_conf.report_hierarchy) {
 		/* disable incompatible options */
-		symbol_conf.event_group = false;
 		symbol_conf.cumulate_callchain = false;
 
 		if (field_order) {
@@ -980,9 +983,9 @@
 	 * implementation.
 	 */
 	if (ui__has_annotation()) {
-		symbol_conf.priv_size = sizeof(struct annotation);
-		machines__set_symbol_filter(&session->machines,
-					    symbol__annotate_init);
+		ret = symbol__annotation_init();
+		if (ret < 0)
+			goto error;
 		/*
  		 * For searching by name on the "Browse map details".
  		 * providing it only in verbose mode not to bloat too
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 0dfe8df..f5503ca 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -26,6 +26,7 @@
 #include <pthread.h>
 #include <math.h>
 #include <api/fs/fs.h>
+#include <linux/time64.h>
 
 #define PR_SET_NAME		15               /* Set process name */
 #define MAX_CPUS		4096
@@ -199,7 +200,7 @@
 
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 
-	return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
 }
 
 static void burn_nsecs(struct perf_sched *sched, u64 nsecs)
@@ -223,7 +224,7 @@
 
 static void calibrate_run_measurement_overhead(struct perf_sched *sched)
 {
-	u64 T0, T1, delta, min_delta = 1000000000ULL;
+	u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
 	int i;
 
 	for (i = 0; i < 10; i++) {
@@ -240,7 +241,7 @@
 
 static void calibrate_sleep_measurement_overhead(struct perf_sched *sched)
 {
-	u64 T0, T1, delta, min_delta = 1000000000ULL;
+	u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
 	int i;
 
 	for (i = 0; i < 10; i++) {
@@ -452,8 +453,8 @@
 	err = getrusage(RUSAGE_SELF, &ru);
 	BUG_ON(err);
 
-	sum =  ru.ru_utime.tv_sec*1e9 + ru.ru_utime.tv_usec*1e3;
-	sum += ru.ru_stime.tv_sec*1e9 + ru.ru_stime.tv_usec*1e3;
+	sum =  ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC;
+	sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC;
 
 	return sum;
 }
@@ -667,12 +668,12 @@
 		sched->run_avg = delta;
 	sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat;
 
-	printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0);
+	printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC);
 
-	printf("ravg: %0.2f, ", (double)sched->run_avg / 1e6);
+	printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC);
 
 	printf("cpu: %0.2f / %0.2f",
-		(double)sched->cpu_usage / 1e6, (double)sched->runavg_cpu_usage / 1e6);
+		(double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC);
 
 #if 0
 	/*
@@ -680,8 +681,8 @@
 	 * accurate than the sched->sum_exec_runtime based statistics:
 	 */
 	printf(" [%0.2f / %0.2f]",
-		(double)sched->parent_cpu_usage/1e6,
-		(double)sched->runavg_parent_cpu_usage/1e6);
+		(double)sched->parent_cpu_usage / NSEC_PER_MSEC,
+		(double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC);
 #endif
 
 	printf("\n");
@@ -696,13 +697,13 @@
 	u64 T0, T1;
 
 	T0 = get_nsecs();
-	burn_nsecs(sched, 1e6);
+	burn_nsecs(sched, NSEC_PER_MSEC);
 	T1 = get_nsecs();
 
 	printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
 
 	T0 = get_nsecs();
-	sleep_nsecs(1e6);
+	sleep_nsecs(NSEC_PER_MSEC);
 	T1 = get_nsecs();
 
 	printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
@@ -1213,10 +1214,10 @@
 	avg = work_list->total_lat / work_list->nb_atoms;
 
 	printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n",
-	      (double)work_list->total_runtime / 1e6,
-		 work_list->nb_atoms, (double)avg / 1e6,
-		 (double)work_list->max_lat / 1e6,
-		 (double)work_list->max_lat_at / 1e9);
+	      (double)work_list->total_runtime / NSEC_PER_MSEC,
+		 work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
+		 (double)work_list->max_lat / NSEC_PER_MSEC,
+		 (double)work_list->max_lat_at / NSEC_PER_SEC);
 }
 
 static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
@@ -1491,7 +1492,7 @@
 	if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
 		goto out;
 
-	color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp/1e9);
+	color_fprintf(stdout, color, "  %12.6f secs ", (double)timestamp / NSEC_PER_SEC);
 	if (new_shortname) {
 		const char *pid_color = color;
 
@@ -1753,7 +1754,7 @@
 
 	printf(" -----------------------------------------------------------------------------------------------------------------\n");
 	printf("  TOTAL:                |%11.3f ms |%9" PRIu64 " |\n",
-		(double)sched->all_runtime / 1e6, sched->all_count);
+		(double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count);
 
 	printf(" ---------------------------------------------------\n");
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 971ff91..7228d14 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -24,6 +24,7 @@
 #include "util/thread-stack.h"
 #include <linux/bitmap.h>
 #include <linux/stringify.h>
+#include <linux/time64.h>
 #include "asm/bug.h"
 #include "util/mem-events.h"
 
@@ -371,14 +372,16 @@
 
 	if (!no_callchain) {
 		bool use_callchain = false;
+		bool not_pipe = false;
 
 		evlist__for_each_entry(session->evlist, evsel) {
+			not_pipe = true;
 			if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
 				use_callchain = true;
 				break;
 			}
 		}
-		if (!use_callchain)
+		if (not_pipe && !use_callchain)
 			symbol_conf.use_callchain = false;
 	}
 
@@ -462,9 +465,9 @@
 
 	if (PRINT_FIELD(TIME)) {
 		nsecs = sample->time;
-		secs = nsecs / NSECS_PER_SEC;
-		nsecs -= secs * NSECS_PER_SEC;
-		usecs = nsecs / NSECS_PER_USEC;
+		secs = nsecs / NSEC_PER_SEC;
+		nsecs -= secs * NSEC_PER_SEC;
+		usecs = nsecs / NSEC_PER_USEC;
 		if (nanosecs)
 			printf("%5lu.%09llu: ", secs, nsecs);
 		else
@@ -519,11 +522,11 @@
 
 		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
 		if (alf.map)
-			alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
+			alf.sym = map__find_symbol(alf.map, alf.addr);
 
 		thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
 		if (alt.map)
-			alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
+			alt.sym = map__find_symbol(alt.map, alt.addr);
 
 		symbol__fprintf_symname_offs(alf.sym, &alf, stdout);
 		putchar('/');
@@ -1690,8 +1693,13 @@
 	snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
 
 	scripts_dir = opendir(scripts_path);
-	if (!scripts_dir)
-		return -1;
+	if (!scripts_dir) {
+		fprintf(stdout,
+			"open(%s) failed.\n"
+			"Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
+			scripts_path);
+		exit(-1);
+	}
 
 	for_each_lang(scripts_path, scripts_dir, lang_dirent) {
 		snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
@@ -2116,7 +2124,7 @@
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "callindent", parse_output_fields),
+		     "bpf-output,callindent", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0c16d20..688dea7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -52,6 +52,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/debug.h"
+#include "util/drv_configs.h"
 #include "util/color.h"
 #include "util/stat.h"
 #include "util/header.h"
@@ -65,6 +66,7 @@
 #include "util/group.h"
 #include "asm/bug.h"
 
+#include <linux/time64.h>
 #include <api/fs/fs.h>
 #include <stdlib.h>
 #include <sys/prctl.h>
@@ -172,7 +174,7 @@
 {
 	r->tv_sec = a->tv_sec - b->tv_sec;
 	if (a->tv_nsec < b->tv_nsec) {
-		r->tv_nsec = a->tv_nsec + 1000000000L - b->tv_nsec;
+		r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
 		r->tv_sec--;
 	} else {
 		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
@@ -331,7 +333,7 @@
 	return 0;
 }
 
-static void read_counters(bool close_counters)
+static void read_counters(void)
 {
 	struct perf_evsel *counter;
 
@@ -341,11 +343,6 @@
 
 		if (perf_stat_process_counter(&stat_config, counter))
 			pr_warning("failed to process counter %s\n", counter->name);
-
-		if (close_counters) {
-			perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter),
-					     thread_map__nr(evsel_list->threads));
-		}
 	}
 }
 
@@ -353,13 +350,13 @@
 {
 	struct timespec ts, rs;
 
-	read_counters(false);
+	read_counters();
 
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	diff_timespec(&rs, &ts, &ref_time);
 
 	if (STAT_RECORD) {
-		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL))
+		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
 			pr_err("failed to write stat round event\n");
 	}
 
@@ -369,7 +366,7 @@
 static void enable_counters(void)
 {
 	if (initial_delay)
-		usleep(initial_delay * 1000);
+		usleep(initial_delay * USEC_PER_MSEC);
 
 	/*
 	 * We need to enable counters only if:
@@ -380,6 +377,17 @@
 		perf_evlist__enable(evsel_list);
 }
 
+static void disable_counters(void)
+{
+	/*
+	 * If we don't have tracee (attaching to task or cpu), counters may
+	 * still be running. To get accurate group ratios, we must stop groups
+	 * from counting before reading their constituent counters.
+	 */
+	if (!target__none(&target))
+		perf_evlist__disable(evsel_list);
+}
+
 static volatile int workload_exec_errno;
 
 /*
@@ -533,10 +541,11 @@
 	int status = 0;
 	const bool forks = (argc > 0);
 	bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false;
+	struct perf_evsel_config_term *err_term;
 
 	if (interval) {
-		ts.tv_sec  = interval / 1000;
-		ts.tv_nsec = (interval % 1000) * 1000000;
+		ts.tv_sec  = interval / USEC_PER_MSEC;
+		ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
 	} else {
 		ts.tv_sec  = 1;
 		ts.tv_nsec = 0;
@@ -604,6 +613,13 @@
 		return -1;
 	}
 
+	if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
+		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+		      err_term->val.drv_cfg, perf_evsel__name(counter), errno,
+		      str_error_r(errno, msg, sizeof(msg)));
+		return -1;
+	}
+
 	if (STAT_RECORD) {
 		int err, fd = perf_data_file__fd(&perf_stat.file);
 
@@ -657,11 +673,20 @@
 		}
 	}
 
+	disable_counters();
+
 	t1 = rdclock();
 
 	update_stats(&walltime_nsecs_stats, t1 - t0);
 
-	read_counters(true);
+	/*
+	 * Closing a group leader splits the group, and as we only disable
+	 * group leaders, results in remaining events becoming enabled. To
+	 * avoid arbitrary skew, we must read all counters before closing any
+	 * group leaders.
+	 */
+	read_counters();
+	perf_evlist__close(evsel_list);
 
 	return WEXITSTATUS(status);
 }
@@ -956,7 +981,7 @@
 static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	FILE *output = stat_config.output;
-	double msecs = avg / 1e6;
+	double msecs = avg / NSEC_PER_MSEC;
 	const char *fmt_v, *fmt_n;
 	char name[25];
 
@@ -1445,7 +1470,7 @@
 	if (!null_run)
 		fprintf(output, "\n");
 	fprintf(output, " %17.9f seconds time elapsed",
-			avg_stats(&walltime_nsecs_stats)/1e9);
+			avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC);
 	if (run_count > 1) {
 		fprintf(output, "                                        ");
 		print_noise_pct(stddev_stats(&walltime_nsecs_stats),
@@ -2160,8 +2185,8 @@
 		update_stats(&walltime_nsecs_stats, stat_round->time);
 
 	if (stat_config.interval && stat_round->time) {
-		tsh.tv_sec  = stat_round->time / NSECS_PER_SEC;
-		tsh.tv_nsec = stat_round->time % NSECS_PER_SEC;
+		tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
+		tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
 		ts = &tsh;
 	}
 
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 733a554..e7eaa29 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -24,6 +24,7 @@
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include <linux/rbtree.h>
+#include <linux/time64.h>
 #include "util/symbol.h"
 #include "util/callchain.h"
 #include "util/strlist.h"
@@ -1288,9 +1289,9 @@
 			if (c->comm) {
 				char comm[256];
 				if (c->total_time > 5000000000) /* 5 seconds */
-					sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / 1000000000.0);
+					sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / (double)NSEC_PER_SEC);
 				else
-					sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / 1000000.0);
+					sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / (double)NSEC_PER_MSEC);
 
 				svg_text(Y, c->start_time, comm);
 			}
@@ -1637,7 +1638,7 @@
 	write_svg_file(tchart, output_name);
 
 	pr_info("Written %2.1f seconds of trace to %s.\n",
-		(tchart->last_time - tchart->first_time) / 1000000000.0, output_name);
+		(tchart->last_time - tchart->first_time) / (double)NSEC_PER_SEC, output_name);
 out_delete:
 	perf_session__delete(session);
 	return ret;
@@ -1901,10 +1902,10 @@
 	if (sscanf(arg, "%" PRIu64 "%cs", value, &unit) > 0) {
 		switch (unit) {
 		case 'm':
-			*value *= 1000000;
+			*value *= NSEC_PER_MSEC;
 			break;
 		case 'u':
-			*value *= 1000;
+			*value *= NSEC_PER_USEC;
 			break;
 		case 'n':
 			break;
@@ -1928,7 +1929,7 @@
 			.ordered_events	 = true,
 		},
 		.proc_num = 15,
-		.min_time = 1000000,
+		.min_time = NSEC_PER_MSEC,
 		.merge_dist = 1000,
 	};
 	const char *output_name = "output.svg";
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 418ed94..fe3af95 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -24,6 +24,7 @@
 #include "util/annotate.h"
 #include "util/config.h"
 #include "util/color.h"
+#include "util/drv_configs.h"
 #include "util/evlist.h"
 #include "util/evsel.h"
 #include "util/machine.h"
@@ -68,6 +69,7 @@
 #include <sys/mman.h>
 
 #include <linux/stringify.h>
+#include <linux/time64.h>
 #include <linux/types.h>
 
 static volatile int done;
@@ -624,7 +626,7 @@
 	display_setup_sig();
 	pthread__unblock_sigwinch();
 repeat:
-	delay_msecs = top->delay_secs * 1000;
+	delay_msecs = top->delay_secs * MSEC_PER_SEC;
 	set_term_quiet_input(&save);
 	/* trash return*/
 	getc(stdin);
@@ -656,34 +658,6 @@
 	return NULL;
 }
 
-static int symbol_filter(struct map *map, struct symbol *sym)
-{
-	const char *name = sym->name;
-
-	if (!__map__is_kernel(map))
-		return 0;
-	/*
-	 * ppc64 uses function descriptors and appends a '.' to the
-	 * start of every instruction address. Remove it.
-	 */
-	if (name[0] == '.')
-		name++;
-
-	if (!strcmp(name, "_text") ||
-	    !strcmp(name, "_etext") ||
-	    !strcmp(name, "_sinittext") ||
-	    !strncmp("init_module", name, 11) ||
-	    !strncmp("cleanup_module", name, 14) ||
-	    strstr(name, "_text_start") ||
-	    strstr(name, "_text_end"))
-		return 1;
-
-	if (symbol__is_idle(sym))
-		sym->ignore = true;
-
-	return 0;
-}
-
 static int hist_iter__top_callback(struct hist_entry_iter *iter,
 				   struct addr_location *al, bool single,
 				   void *arg)
@@ -782,7 +756,7 @@
 		}
 	}
 
-	if (al.sym == NULL || !al.sym->ignore) {
+	if (al.sym == NULL || !al.sym->idle) {
 		struct hists *hists = evsel__hists(evsel);
 		struct hist_entry_iter iter = {
 			.evsel		= evsel,
@@ -940,6 +914,10 @@
 
 static int __cmd_top(struct perf_top *top)
 {
+	char msg[512];
+	struct perf_evsel *pos;
+	struct perf_evsel_config_term *err_term;
+	struct perf_evlist *evlist = top->evlist;
 	struct record_opts *opts = &top->record_opts;
 	pthread_t thread;
 	int ret;
@@ -948,8 +926,6 @@
 	if (top->session == NULL)
 		return -1;
 
-	machines__set_symbol_filter(&top->session->machines, symbol_filter);
-
 	if (!objdump_path) {
 		ret = perf_env__lookup_objdump(&top->session->header.env);
 		if (ret)
@@ -976,6 +952,14 @@
 	if (ret)
 		goto out_delete;
 
+	ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
+	if (ret) {
+		error("failed to set config \"%s\" on event %s with %d (%s)\n",
+			err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+			str_error_r(errno, msg, sizeof(msg)));
+		goto out_delete;
+	}
+
 	top->session->evlist = top->evlist;
 	perf_session__set_id_hdr_size(top->session);
 
@@ -1323,7 +1307,9 @@
 	if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
 		callchain_param.order = ORDER_CALLER;
 
-	symbol_conf.priv_size = sizeof(struct annotation);
+	status = symbol__annotation_init();
+	if (status < 0)
+		goto out_delete_evlist;
 
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 	if (symbol__init(NULL) < 0)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b8c6766..c298bd3 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -45,6 +45,7 @@
 #include <linux/audit.h>
 #include <linux/random.h>
 #include <linux/stringify.h>
+#include <linux/time64.h>
 
 #ifndef O_CLOEXEC
 # define O_CLOEXEC		02000000
@@ -741,6 +742,8 @@
 	  .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
 	{ .name	    = "rt_tgsigqueueinfo", .errmsg = true,
 	  .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
+	{ .name	    = "sched_getattr",	      .errmsg = true, },
+	{ .name	    = "sched_setattr",	      .errmsg = true, },
 	{ .name	    = "sched_setscheduler",   .errmsg = true,
 	  .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
 	{ .name	    = "seccomp", .errmsg = true,
@@ -2140,6 +2143,7 @@
 static int trace__set_ev_qualifier_filter(struct trace *trace)
 {
 	int err = -1;
+	struct perf_evsel *sys_exit;
 	char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
 						trace->ev_qualifier_ids.nr,
 						trace->ev_qualifier_ids.entries);
@@ -2147,8 +2151,11 @@
 	if (filter == NULL)
 		goto out_enomem;
 
-	if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
-		err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
+	if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
+					  filter)) {
+		sys_exit = trace->syscalls.events.sys_exit;
+		err = perf_evsel__append_tp_filter(sys_exit, filter);
+	}
 
 	free(filter);
 out:
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 7ed72a47..e4b717e 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -20,7 +20,6 @@
 #endif
 
 #ifdef __powerpc__
-#include "../../arch/powerpc/include/uapi/asm/unistd.h"
 #define CPUINFO_PROC	{"cpu"}
 #endif
 
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index cb0f135..9a0236a 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -14,13 +14,6 @@
 #define HAVE_ATTR_TEST
 #include "perf-sys.h"
 
-#ifndef NSEC_PER_SEC
-# define NSEC_PER_SEC			1000000000ULL
-#endif
-#ifndef NSEC_PER_USEC
-# define NSEC_PER_USEC			1000ULL
-#endif
-
 static inline unsigned long long rdclock(void)
 {
 	struct timespec ts;
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index dc51bc5..8a4ce49 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -71,7 +71,7 @@
 	$(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
 	$(Q)echo ';' >> $@
 
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64 powerpc))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
 
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
index 615780c..e6d1816 100644
--- a/tools/perf/tests/backward-ring-buffer.c
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -97,7 +97,7 @@
 
 	evlist = perf_evlist__new();
 	if (!evlist) {
-		pr_debug("No ehough memory to create evlist\n");
+		pr_debug("No enough memory to create evlist\n");
 		return TEST_FAIL;
 	}
 
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index fc54064..2673e86 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -125,7 +125,7 @@
 	/* Instead of perf_evlist__new_default, don't add default events */
 	evlist = perf_evlist__new();
 	if (!evlist) {
-		pr_debug("No ehough memory to create evlist\n");
+		pr_debug("No enough memory to create evlist\n");
 		return TEST_FAIL;
 	}
 
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 2af156a8..ff5bc63 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -263,7 +263,7 @@
 	 * Converting addresses for use by objdump requires more information.
 	 * map__load() does that.  See map__rip_2objdump() for details.
 	 */
-	if (map__load(al.map, NULL))
+	if (map__load(al.map))
 		return -1;
 
 	/* objdump struggles with kcore - try each map only once */
@@ -511,7 +511,7 @@
 
 	/* Load kernel map */
 	map = machine__kernel_map(machine);
-	ret = map__load(map, NULL);
+	ret = map__load(map);
 	if (ret < 0) {
 		pr_debug("map__load failed\n");
 		goto out_err;
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 8f6eb85..1046491 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -11,7 +11,7 @@
 #include "thread.h"
 #include "callchain.h"
 
-#if defined (__x86_64__) || defined (__i386__)
+#if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
 #include "arch-tests.h"
 #endif
 
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index e63abab..a508233 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -8,14 +8,6 @@
 #include "debug.h"
 #include "machine.h"
 
-static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused,
-					   struct symbol *sym)
-{
-	bool *visited = symbol__priv(sym);
-	*visited = true;
-	return 0;
-}
-
 #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
 
 int test__vmlinux_matches_kallsyms(int subtest __maybe_unused)
@@ -28,6 +20,7 @@
 	enum map_type type = MAP__FUNCTION;
 	struct maps *maps = &vmlinux.kmaps.maps[type];
 	u64 mem_start, mem_end;
+	bool header_printed;
 
 	/*
 	 * Step 1:
@@ -61,7 +54,7 @@
 	 * be compacted against the list of modules found in the "vmlinux"
 	 * code and with the one got from /proc/modules from the "kallsyms" code.
 	 */
-	if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true, NULL) <= 0) {
+	if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) {
 		pr_debug("dso__load_kallsyms ");
 		goto out;
 	}
@@ -99,8 +92,7 @@
 	 * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
 	 * to fixup the symbols.
 	 */
-	if (machine__load_vmlinux_path(&vmlinux, type,
-				       vmlinux_matches_kallsyms_filter) <= 0) {
+	if (machine__load_vmlinux_path(&vmlinux, type) <= 0) {
 		pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
 		err = TEST_SKIP;
 		goto out;
@@ -126,7 +118,7 @@
 		mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
 
 		first_pair = machine__find_kernel_symbol(&kallsyms, type,
-							 mem_start, NULL, NULL);
+							 mem_start, NULL);
 		pair = first_pair;
 
 		if (pair && UM(pair->start) == mem_start) {
@@ -143,7 +135,7 @@
 				 */
 				s64 skew = mem_end - UM(pair->end);
 				if (llabs(skew) >= page_size)
-					pr_debug("%#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
+					pr_debug("WARN: %#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
 						 mem_start, sym->name, mem_end,
 						 UM(pair->end));
 
@@ -154,22 +146,23 @@
 				 * kallsyms.
 				 */
 				continue;
-
 			} else {
-				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL);
+				pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL);
 				if (pair) {
 					if (UM(pair->start) == mem_start)
 						goto next_pair;
 
-					pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+					pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
 						 mem_start, sym->name, pair->name);
 				} else {
-					pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n",
+					pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
 						 mem_start, sym->name, first_pair->name);
 				}
+
+				continue;
 			}
 		} else
-			pr_debug("%#" PRIx64 ": %s not on kallsyms\n",
+			pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n",
 				 mem_start, sym->name);
 
 		err = -1;
@@ -178,7 +171,7 @@
 	if (!verbose)
 		goto out;
 
-	pr_info("Maps only in vmlinux:\n");
+	header_printed = false;
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *
@@ -192,13 +185,18 @@
 						(map->dso->kernel ?
 							map->dso->short_name :
 							map->dso->name));
-		if (pair)
+		if (pair) {
 			pair->priv = 1;
-		else
+		} else {
+			if (!header_printed) {
+				pr_info("WARN: Maps only in vmlinux:\n");
+				header_printed = true;
+			}
 			map__fprintf(map, stderr);
+		}
 	}
 
-	pr_info("Maps in vmlinux with a different name in kallsyms:\n");
+	header_printed = false;
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *pair;
@@ -211,24 +209,33 @@
 			continue;
 
 		if (pair->start == mem_start) {
-			pair->priv = 1;
-			pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
+			if (!header_printed) {
+				pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
+				header_printed = true;
+			}
+
+			pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
 				map->start, map->end, map->pgoff, map->dso->name);
 			if (mem_end != pair->end)
-				pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
+				pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64,
 					pair->start, pair->end, pair->pgoff);
 			pr_info(" %s\n", pair->dso->name);
 			pair->priv = 1;
 		}
 	}
 
-	pr_info("Maps only in kallsyms:\n");
+	header_printed = false;
 
 	maps = &kallsyms.kmaps.maps[type];
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
-		if (!map->priv)
+		if (!map->priv) {
+			if (!header_printed) {
+				pr_info("WARN: Maps only in kallsyms:\n");
+				header_printed = true;
+			}
 			map__fprintf(map, stderr);
+		}
 	}
 out:
 	machine__exit(&kallsyms);
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index d0a3a8e..fd710ab 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -1,8 +1,4 @@
-#include <sys/mman.h>
-
-#ifndef PROT_SEM
-#define PROT_SEM 0x8
-#endif
+#include <uapi/linux/mman.h>
 
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 					       struct syscall_arg *arg)
@@ -33,31 +29,6 @@
 
 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
 
-#ifndef MAP_FIXED
-#define MAP_FIXED		     0x10
-#endif
-
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS		     0x20
-#endif
-
-#ifndef MAP_32BIT
-#define MAP_32BIT		     0x40
-#endif
-
-#ifndef MAP_STACK
-#define MAP_STACK		  0x20000
-#endif
-
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB		  0x40000
-#endif
-
-#ifndef MAP_UNINITIALIZED
-#define MAP_UNINITIALIZED	0x4000000
-#endif
-
-
 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 						struct syscall_arg *arg)
 {
@@ -95,13 +66,6 @@
 
 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
 
-#ifndef MREMAP_MAYMOVE
-#define MREMAP_MAYMOVE 1
-#endif
-#ifndef MREMAP_FIXED
-#define MREMAP_FIXED 2
-#endif
-
 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
 						  struct syscall_arg *arg)
 {
@@ -125,39 +89,6 @@
 
 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
 
-#ifndef MADV_HWPOISON
-#define MADV_HWPOISON		100
-#endif
-
-#ifndef MADV_SOFT_OFFLINE
-#define MADV_SOFT_OFFLINE	101
-#endif
-
-#ifndef MADV_MERGEABLE
-#define MADV_MERGEABLE		 12
-#endif
-
-#ifndef MADV_UNMERGEABLE
-#define MADV_UNMERGEABLE	 13
-#endif
-
-#ifndef MADV_HUGEPAGE
-#define MADV_HUGEPAGE		 14
-#endif
-
-#ifndef MADV_NOHUGEPAGE
-#define MADV_NOHUGEPAGE		 15
-#endif
-
-#ifndef MADV_DONTDUMP
-#define MADV_DONTDUMP		 16
-#endif
-
-#ifndef MADV_DODUMP
-#define MADV_DODUMP		 17
-#endif
-
-
 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
 						      struct syscall_arg *arg)
 {
@@ -170,6 +101,7 @@
 	P_MADV_BHV(SEQUENTIAL);
 	P_MADV_BHV(WILLNEED);
 	P_MADV_BHV(DONTNEED);
+	P_MADV_BHV(FREE);
 	P_MADV_BHV(REMOVE);
 	P_MADV_BHV(DONTFORK);
 	P_MADV_BHV(DOFORK);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 2e2d100..4c18271 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -495,7 +495,7 @@
 	if (!ins__is_call(dl->ins))
 		return false;
 
-	if (map_groups__find_ams(&target, NULL) ||
+	if (map_groups__find_ams(&target) ||
 	    map__rip_2objdump(target.map, target.map->map_ip(target.map,
 							     target.addr)) !=
 	    dl->ops.target.addr) {
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 13d4143..fb8e42c 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -69,8 +69,11 @@
 static void hist_browser__update_rows(struct hist_browser *hb)
 {
 	struct ui_browser *browser = &hb->b;
-	u16 header_offset = hb->show_headers ? 1 : 0, index_row;
+	struct hists *hists = hb->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+	u16 header_offset, index_row;
 
+	header_offset = hb->show_headers ? hpp_list->nr_header_lines : 0;
 	browser->rows = browser->height - header_offset;
 	/*
 	 * Verify if we were at the last line and that line isn't
@@ -99,8 +102,11 @@
 
 static void hist_browser__gotorc(struct hist_browser *browser, int row, int column)
 {
-	u16 header_offset = browser->show_headers ? 1 : 0;
+	struct hists *hists = browser->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
+	u16 header_offset;
 
+	header_offset = browser->show_headers ? hpp_list->nr_header_lines : 0;
 	ui_browser__gotorc(&browser->b, row + header_offset, column);
 }
 
@@ -1074,7 +1080,7 @@
 	bool current_entry;
 };
 
-static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
 {
 	struct hpp_arg *arg = hpp->ptr;
 	int ret, len;
@@ -1091,7 +1097,6 @@
 	ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent);
 	ui_browser__printf(arg->b, "%s", hpp->buf);
 
-	advance_hpp(hpp, ret);
 	return ret;
 }
 
@@ -1496,7 +1501,9 @@
 	return hpp->size <= 0;
 }
 
-static int hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf, size_t size)
+static int
+hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
+				 size_t size, int line)
 {
 	struct hists *hists = browser->hists;
 	struct perf_hpp dummy_hpp = {
@@ -1506,6 +1513,7 @@
 	struct perf_hpp_fmt *fmt;
 	size_t ret = 0;
 	int column = 0;
+	int span = 0;
 
 	if (symbol_conf.use_callchain) {
 		ret = scnprintf(buf, size, "  ");
@@ -1517,10 +1525,13 @@
 		if (perf_hpp__should_skip(fmt, hists)  || column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists);
+		ret = fmt->header(fmt, &dummy_hpp, hists, line, &span);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
+		if (span)
+			continue;
+
 		ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "  ");
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
@@ -1554,7 +1565,7 @@
 		if (column++ < browser->b.horiz_scroll)
 			continue;
 
-		ret = fmt->header(fmt, &dummy_hpp, hists);
+		ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
 		if (advance_hpp_check(&dummy_hpp, ret))
 			break;
 
@@ -1591,7 +1602,7 @@
 			}
 			first_col = false;
 
-			ret = fmt->header(fmt, &dummy_hpp, hists);
+			ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
 			dummy_hpp.buf[ret] = '\0';
 
 			start = trim(dummy_hpp.buf);
@@ -1622,14 +1633,21 @@
 
 static void hists_browser__headers(struct hist_browser *browser)
 {
-	char headers[1024];
+	struct hists *hists = browser->hists;
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
 
-	hists_browser__scnprintf_headers(browser, headers,
-					 sizeof(headers));
+	int line;
 
-	ui_browser__gotorc(&browser->b, 0, 0);
-	ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
-	ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+	for (line = 0; line < hpp_list->nr_header_lines; line++) {
+		char headers[1024];
+
+		hists_browser__scnprintf_headers(browser, headers,
+						 sizeof(headers), line);
+
+		ui_browser__gotorc(&browser->b, line, 0);
+		ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+		ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+	}
 }
 
 static void hist_browser__show_headers(struct hist_browser *browser)
@@ -1656,10 +1674,13 @@
 	u16 header_offset = 0;
 	struct rb_node *nd;
 	struct hist_browser *hb = container_of(browser, struct hist_browser, b);
+	struct hists *hists = hb->hists;
 
 	if (hb->show_headers) {
+		struct perf_hpp_list *hpp_list = hists->hpp_list;
+
 		hist_browser__show_headers(hb);
-		header_offset = 1;
+		header_offset = hpp_list->nr_header_lines;
 	}
 
 	ui_browser__hists_init_top(browser);
@@ -2054,10 +2075,10 @@
 	browser->b.use_navkeypressed	= true;
 	browser->show_headers		= symbol_conf.show_hist_headers;
 
-	hists__for_each_format(hists, fmt) {
-		perf_hpp__reset_width(fmt, hists);
+	hists__for_each_format(hists, fmt)
 		++browser->b.columns;
-	}
+
+	hists__reset_column_width(hists);
 }
 
 struct hist_browser *hist_browser__new(struct hists *hists)
@@ -2418,8 +2439,6 @@
 		browser->hists->dso_filter = NULL;
 		ui_helpline__pop();
 	} else {
-		if (map == NULL)
-			return 0;
 		ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s DSO\"",
 				   __map__is_kernel(map) ? "the Kernel" : map->dso->short_name);
 		browser->hists->dso_filter = map->dso;
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 8091277..98a3466 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -52,9 +52,9 @@
 
 	if (target[0] == '0' && tolower(target[1]) == 'x') {
 		u64 addr = strtoull(target, NULL, 16);
-		sym = map__find_symbol(browser->map, addr, NULL);
+		sym = map__find_symbol(browser->map, addr);
 	} else
-		sym = map__find_symbol_by_name(browser->map, target, NULL);
+		sym = map__find_symbol_by_name(browser->map, target);
 
 	if (sym != NULL) {
 		u32 *idx = symbol__browser_index(sym);
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index c5f3677..a4f02de 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -549,7 +549,7 @@
 				strcat(buf, "+");
 			first_col = false;
 
-			fmt->header(fmt, &hpp, hists);
+			fmt->header(fmt, &hpp, hists, 0, NULL);
 			strcat(buf, ltrim(rtrim(hpp.buf)));
 		}
 	}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 4274969..3738839 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -230,13 +230,14 @@
 }
 
 static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			  struct hists *hists)
+			  struct hists *hists, int line __maybe_unused,
+			  int *span __maybe_unused)
 {
 	int len = hpp__width_fn(fmt, hpp, hists);
 	return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
 }
 
-static int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
 {
 	va_list args;
 	ssize_t ssize = hpp->size;
@@ -441,6 +442,7 @@
 struct perf_hpp_list perf_hpp_list = {
 	.fields	= LIST_HEAD_INIT(perf_hpp_list.fields),
 	.sorts	= LIST_HEAD_INIT(perf_hpp_list.sorts),
+	.nr_header_lines = 1,
 };
 
 #undef HPP__COLOR_PRINT_FNS
@@ -697,6 +699,21 @@
 	}
 }
 
+void hists__reset_column_width(struct hists *hists)
+{
+	struct perf_hpp_fmt *fmt;
+	struct perf_hpp_list_node *node;
+
+	hists__for_each_format(hists, fmt)
+		perf_hpp__reset_width(fmt, hists);
+
+	/* hierarchy entries have their own hpp list */
+	list_for_each_entry(node, &hists->hpp_formats, list) {
+		perf_hpp_list__for_each_format(&node->hpp, fmt)
+			perf_hpp__reset_width(fmt, hists);
+	}
+}
+
 void perf_hpp__set_user_width(const char *width_list_str)
 {
 	struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index f04a631..89d8441 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -373,7 +373,8 @@
 	return 0;
 }
 
-static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+			   struct perf_hpp_list *hpp_list)
 {
 	const char *sep = symbol_conf.field_sep;
 	struct perf_hpp_fmt *fmt;
@@ -384,7 +385,7 @@
 	if (symbol_conf.exclude_other && !he->parent)
 		return 0;
 
-	hists__for_each_format(he->hists, fmt) {
+	perf_hpp_list__for_each_format(hpp_list, fmt) {
 		if (perf_hpp__should_skip(fmt, he->hists))
 			continue;
 
@@ -410,6 +411,11 @@
 	return hpp->buf - start;
 }
 
+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+{
+	return __hist_entry__snprintf(he, hpp, he->hists->hpp_list);
+}
+
 static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
 					 struct perf_hpp *hpp,
 					 struct hists *hists,
@@ -528,8 +534,8 @@
 	return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
 }
 
-static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp,
-				  const char *sep, FILE *fp)
+static int hists__fprintf_hierarchy_headers(struct hists *hists,
+					    struct perf_hpp *hpp, FILE *fp)
 {
 	bool first_node, first_col;
 	int indent;
@@ -538,6 +544,7 @@
 	unsigned header_width = 0;
 	struct perf_hpp_fmt *fmt;
 	struct perf_hpp_list_node *fmt_node;
+	const char *sep = symbol_conf.field_sep;
 
 	indent = hists->nr_hpp_node;
 
@@ -549,7 +556,7 @@
 				    struct perf_hpp_list_node, list);
 
 	perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
-		fmt->header(fmt, hpp, hists);
+		fmt->header(fmt, hpp, hists, 0, NULL);
 		fprintf(fp, "%s%s", hpp->buf, sep ?: "  ");
 	}
 
@@ -569,7 +576,7 @@
 				header_width += fprintf(fp, "+");
 			first_col = false;
 
-			fmt->header(fmt, hpp, hists);
+			fmt->header(fmt, hpp, hists, 0, NULL);
 
 			header_width += fprintf(fp, "%s", trim(hpp->buf));
 		}
@@ -623,20 +630,28 @@
 	return 2;
 }
 
-static int
-hists__fprintf_hierarchy_headers(struct hists *hists,
-				 struct perf_hpp *hpp,
-				 FILE *fp)
+static void fprintf_line(struct hists *hists, struct perf_hpp *hpp,
+			 int line, FILE *fp)
 {
-	struct perf_hpp_list_node *fmt_node;
 	struct perf_hpp_fmt *fmt;
+	const char *sep = symbol_conf.field_sep;
+	bool first = true;
+	int span = 0;
 
-	list_for_each_entry(fmt_node, &hists->hpp_formats, list) {
-		perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
-			perf_hpp__reset_width(fmt, hists);
+	hists__for_each_format(hists, fmt) {
+		if (perf_hpp__should_skip(fmt, hists))
+			continue;
+
+		if (!first && !span)
+			fprintf(fp, "%s", sep ?: "  ");
+		else
+			first = false;
+
+		fmt->header(fmt, hpp, hists, line, &span);
+
+		if (!span)
+			fprintf(fp, "%s", hpp->buf);
 	}
-
-	return print_hierarchy_header(hists, hpp, symbol_conf.field_sep, fp);
 }
 
 static int
@@ -644,28 +659,23 @@
 				struct perf_hpp *hpp,
 				FILE *fp)
 {
+	struct perf_hpp_list *hpp_list = hists->hpp_list;
 	struct perf_hpp_fmt *fmt;
 	unsigned int width;
 	const char *sep = symbol_conf.field_sep;
 	bool first = true;
+	int line;
 
-	hists__for_each_format(hists, fmt) {
-		if (perf_hpp__should_skip(fmt, hists))
-			continue;
-
-		if (!first)
-			fprintf(fp, "%s", sep ?: "  ");
-		else
-			first = false;
-
-		fmt->header(fmt, hpp, hists);
-		fprintf(fp, "%s", hpp->buf);
+	for (line = 0; line < hpp_list->nr_header_lines; line++) {
+		/* first # is displayed one level up */
+		if (line)
+			fprintf(fp, "# ");
+		fprintf_line(hists, hpp, line, fp);
+		fprintf(fp, "\n");
 	}
 
-	fprintf(fp, "\n");
-
 	if (sep)
-		return 1;
+		return hpp_list->nr_header_lines;
 
 	first = true;
 
@@ -689,12 +699,12 @@
 
 	fprintf(fp, "\n");
 	fprintf(fp, "#\n");
-	return 3;
+	return hpp_list->nr_header_lines + 2;
 }
 
-static int hists__fprintf_headers(struct hists *hists, FILE *fp)
+int hists__fprintf_headers(struct hists *hists, FILE *fp)
 {
-	char bf[96];
+	char bf[1024];
 	struct perf_hpp dummy_hpp = {
 		.buf	= bf,
 		.size	= sizeof(bf),
@@ -713,7 +723,6 @@
 		      int max_cols, float min_pcnt, FILE *fp,
 		      bool use_callchain)
 {
-	struct perf_hpp_fmt *fmt;
 	struct rb_node *nd;
 	size_t ret = 0;
 	const char *sep = symbol_conf.field_sep;
@@ -724,8 +733,7 @@
 
 	init_rem_hits();
 
-	hists__for_each_format(hists, fmt)
-		perf_hpp__reset_width(fmt, hists);
+	hists__reset_column_width(hists);
 
 	if (symbol_conf.col_width_list_str)
 		perf_hpp__set_user_width(symbol_conf.col_width_list_str);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 91c5f6e..eb60e61 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,5 +1,6 @@
 libperf-y += alias.o
 libperf-y += annotate.o
+libperf-y += block-range.o
 libperf-y += build-id.o
 libperf-y += config.o
 libperf-y += ctype.o
@@ -85,6 +86,7 @@
 libperf-y += help-unknown-cmd.o
 libperf-y += mem-events.o
 libperf-y += vsprintf.o
+libperf-y += drv_configs.o
 
 libperf-$(CONFIG_LIBBPF) += bpf-loader.o
 libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
@@ -98,6 +100,7 @@
 
 libperf-$(CONFIG_DWARF) += probe-finder.o
 libperf-$(CONFIG_DWARF) += dwarf-aux.o
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
 
 libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
 libperf-$(CONFIG_LOCAL_LIBUNWIND)    += unwind-libunwind-local.o
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4024d30..aeb5a44 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -17,6 +17,7 @@
 #include "debug.h"
 #include "annotate.h"
 #include "evsel.h"
+#include "block-range.h"
 #include <regex.h>
 #include <pthread.h>
 #include <linux/bitops.h>
@@ -53,7 +54,7 @@
 	return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops, struct map *map)
 {
 	char *endptr, *tok, *name;
 
@@ -81,16 +82,16 @@
 	return ops->target.name == NULL ? -1 : 0;
 
 indirect_call:
-	tok = strchr(endptr, '(');
-	if (tok != NULL) {
-		ops->target.addr = 0;
+	tok = strchr(endptr, '*');
+	if (tok == NULL) {
+		struct symbol *sym = map__find_symbol(map, map->map_ip(map, ops->target.addr));
+		if (sym != NULL)
+			ops->target.name = strdup(sym->name);
+		else
+			ops->target.addr = 0;
 		return 0;
 	}
 
-	tok = strchr(endptr, '*');
-	if (tok == NULL)
-		return -1;
-
 	ops->target.addr = strtoull(tok + 1, NULL, 16);
 	return 0;
 }
@@ -117,7 +118,7 @@
 	return ins->ops == &call_ops;
 }
 
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 {
 	const char *s = strchr(ops->raw, '+');
 
@@ -172,7 +173,7 @@
 	return 0;
 }
 
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, struct map *map)
 {
 	char *name;
 
@@ -193,7 +194,7 @@
 		return 0;
 
 	if (ops->locked.ins->ops->parse &&
-	    ops->locked.ins->ops->parse(ops->locked.ops) < 0)
+	    ops->locked.ins->ops->parse(ops->locked.ops, map) < 0)
 		goto out_free_ops;
 
 	return 0;
@@ -236,7 +237,7 @@
 	.scnprintf = lock__scnprintf,
 };
 
-static int mov__parse(struct ins_operands *ops)
+static int mov__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 {
 	char *s = strchr(ops->raw, ','), *target, *comment, prev;
 
@@ -303,7 +304,7 @@
 	.scnprintf = mov__scnprintf,
 };
 
-static int dec__parse(struct ins_operands *ops)
+static int dec__parse(struct ins_operands *ops, struct map *map __maybe_unused)
 {
 	char *target, *comment, *s, prev;
 
@@ -491,13 +492,6 @@
 	return bsearch(name, instructions, nmemb, sizeof(struct ins), ins__key_cmp);
 }
 
-int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym)
-{
-	struct annotation *notes = symbol__annotation(sym);
-	pthread_mutex_init(&notes->lock, NULL);
-	return 0;
-}
-
 int symbol__alloc_hist(struct symbol *sym)
 {
 	struct annotation *notes = symbol__annotation(sym);
@@ -715,7 +709,7 @@
 	return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip);
 }
 
-static void disasm_line__init_ins(struct disasm_line *dl)
+static void disasm_line__init_ins(struct disasm_line *dl, struct map *map)
 {
 	dl->ins = ins__find(dl->name);
 
@@ -725,7 +719,7 @@
 	if (!dl->ins->ops)
 		return;
 
-	if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops) < 0)
+	if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops, map) < 0)
 		dl->ins = NULL;
 }
 
@@ -767,7 +761,8 @@
 }
 
 static struct disasm_line *disasm_line__new(s64 offset, char *line,
-					size_t privsize, int line_nr)
+					    size_t privsize, int line_nr,
+					    struct map *map)
 {
 	struct disasm_line *dl = zalloc(sizeof(*dl) + privsize);
 
@@ -782,7 +777,7 @@
 			if (disasm_line__parse(dl->line, &dl->name, &dl->ops.raw) < 0)
 				goto out_free_line;
 
-			disasm_line__init_ins(dl);
+			disasm_line__init_ins(dl, map);
 		}
 	}
 
@@ -866,6 +861,89 @@
 	return percent;
 }
 
+static const char *annotate__address_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark red for >75% coverage */
+		if (cov > 0.75)
+			return PERF_COLOR_RED;
+
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+	double cov = block_range__coverage(br);
+
+	if (cov >= 0) {
+		/* mark dull for <1% coverage */
+		if (cov < 0.01)
+			return PERF_COLOR_NORMAL;
+	}
+
+	return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+	bool emit_comment = true;
+
+	if (!br)
+		return;
+
+#if 1
+	if (br->is_target && br->start == addr) {
+		struct block_range *branch = br;
+		double p;
+
+		/*
+		 * Find matching branch to our target.
+		 */
+		while (!branch->is_branch)
+			branch = block_range__next(branch);
+
+		p = 100 *(double)br->entry / branch->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage joined at this target in relation
+			 * to the next branch.
+			 */
+			printf(" +%.2f%%", p);
+		}
+	}
+#endif
+	if (br->is_branch && br->end == addr) {
+		double p = 100*(double)br->taken / br->coverage;
+
+		if (p > 0.1) {
+			if (emit_comment) {
+				emit_comment = false;
+				printf("\t#");
+			}
+
+			/*
+			 * The percentage of coverage leaving at this branch, and
+			 * its prediction ratio.
+			 */
+			printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred  / br->taken);
+		}
+	}
+}
+
+
 static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start,
 		      struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
 		      int max_lines, struct disasm_line *queue)
@@ -885,6 +963,7 @@
 		s64 offset = dl->offset;
 		const u64 addr = start + offset;
 		struct disasm_line *next;
+		struct block_range *br;
 
 		next = disasm__get_next_ip_line(&notes->src->source, dl);
 
@@ -954,8 +1033,12 @@
 		}
 
 		printf(" :	");
-		color_fprintf(stdout, PERF_COLOR_MAGENTA, "  %" PRIx64 ":", addr);
-		color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", dl->line);
+
+		br = block_range__find(addr);
+		color_fprintf(stdout, annotate__address_color(br), "  %" PRIx64 ":", addr);
+		color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line);
+		annotate__branch_printf(br, addr);
+		printf("\n");
 
 		if (ppercents != &percent)
 			free(ppercents);
@@ -1066,7 +1149,7 @@
 			parsed_line = tmp2 + 1;
 	}
 
-	dl = disasm_line__new(offset, parsed_line, privsize, *line_nr);
+	dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, map);
 	free(line);
 	(*line_nr)++;
 
@@ -1084,7 +1167,7 @@
 			.addr = dl->ops.target.addr,
 		};
 
-		if (!map_groups__find_ams(&target, NULL) &&
+		if (!map_groups__find_ams(&target) &&
 		    target.sym->start == target.al_addr)
 			dl->ops.target.name = strdup(target.sym->name);
 	}
@@ -1162,14 +1245,46 @@
 	return 0;
 }
 
+static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
+{
+	char linkname[PATH_MAX];
+	char *build_id_filename;
+
+	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+	    !dso__is_kcore(dso))
+		return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+
+	build_id_filename = dso__build_id_filename(dso, NULL, 0);
+	if (build_id_filename) {
+		__symbol__join_symfs(filename, filename_size, build_id_filename);
+		free(build_id_filename);
+	} else {
+		if (dso->has_build_id)
+			return ENOMEM;
+		goto fallback;
+	}
+
+	if (dso__is_kcore(dso) ||
+	    readlink(filename, linkname, sizeof(linkname)) < 0 ||
+	    strstr(linkname, DSO__NAME_KALLSYMS) ||
+	    access(filename, R_OK)) {
+fallback:
+		/*
+		 * If we don't have build-ids or the build-id file isn't in the
+		 * cache, or is just a kallsyms file, well, lets hope that this
+		 * DSO is the same as when 'perf record' ran.
+		 */
+		__symbol__join_symfs(filename, filename_size, dso->long_name);
+	}
+
+	return 0;
+}
+
 int symbol__disassemble(struct symbol *sym, struct map *map, size_t privsize)
 {
 	struct dso *dso = map->dso;
-	char *filename = dso__build_id_filename(dso, NULL, 0);
-	bool free_filename = true;
 	char command[PATH_MAX * 2];
 	FILE *file;
-	int err = 0;
 	char symfs_filename[PATH_MAX];
 	struct kcore_extract kce;
 	bool delete_extract = false;
@@ -1177,38 +1292,13 @@
 	int lineno = 0;
 	int nline;
 	pid_t pid;
+	int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
 
-	if (filename)
-		symbol__join_symfs(symfs_filename, filename);
-
-	if (filename == NULL) {
-		if (dso->has_build_id)
-			return ENOMEM;
-		goto fallback;
-	} else if (dso__is_kcore(dso) ||
-		   readlink(symfs_filename, command, sizeof(command)) < 0 ||
-		   strstr(command, DSO__NAME_KALLSYMS) ||
-		   access(symfs_filename, R_OK)) {
-		free(filename);
-fallback:
-		/*
-		 * If we don't have build-ids or the build-id file isn't in the
-		 * cache, or is just a kallsyms file, well, lets hope that this
-		 * DSO is the same as when 'perf record' ran.
-		 */
-		filename = (char *)dso->long_name;
-		symbol__join_symfs(symfs_filename, filename);
-		free_filename = false;
-	}
-
-	if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
-	    !dso__is_kcore(dso)) {
-		err = SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
-		goto out_free_filename;
-	}
+	if (err)
+		return err;
 
 	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
-		 filename, sym->name, map->unmap_ip(map, sym->start),
+		 symfs_filename, sym->name, map->unmap_ip(map, sym->start),
 		 map->unmap_ip(map, sym->end));
 
 	pr_debug("annotating [%p] %30s : [%p] %30s\n",
@@ -1223,11 +1313,6 @@
 			delete_extract = true;
 			strlcpy(symfs_filename, kce.extract_filename,
 				sizeof(symfs_filename));
-			if (free_filename) {
-				free(filename);
-				free_filename = false;
-			}
-			filename = symfs_filename;
 		}
 	} else if (dso__needs_decompress(dso)) {
 		char tmp[PATH_MAX];
@@ -1236,14 +1321,14 @@
 		bool ret;
 
 		if (kmod_path__parse_ext(&m, symfs_filename))
-			goto out_free_filename;
+			goto out;
 
 		snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX");
 
 		fd = mkstemp(tmp);
 		if (fd < 0) {
 			free(m.ext);
-			goto out_free_filename;
+			goto out;
 		}
 
 		ret = decompress_to_file(m.ext, symfs_filename, fd);
@@ -1255,7 +1340,7 @@
 		close(fd);
 
 		if (!ret)
-			goto out_free_filename;
+			goto out;
 
 		strcpy(symfs_filename, tmp);
 	}
@@ -1271,7 +1356,7 @@
 		 map__rip_2objdump(map, sym->end),
 		 symbol_conf.annotate_asm_raw ? "" : "--no-show-raw",
 		 symbol_conf.annotate_src ? "-S" : "",
-		 symfs_filename, filename);
+		 symfs_filename, symfs_filename);
 
 	pr_debug("Executing: %s\n", command);
 
@@ -1333,11 +1418,10 @@
 
 	if (dso__needs_decompress(dso))
 		unlink(symfs_filename);
-out_free_filename:
+
 	if (delete_extract)
 		kcore_extract__delete(&kce);
-	if (free_filename)
-		free(filename);
+out:
 	return err;
 
 out_close_stdout:
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index f67ccb0..5bbcec1 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -36,7 +36,7 @@
 
 struct ins_ops {
 	void (*free)(struct ins_operands *ops);
-	int (*parse)(struct ins_operands *ops);
+	int (*parse)(struct ins_operands *ops, struct map *map);
 	int (*scnprintf)(struct ins *ins, char *bf, size_t size,
 			 struct ins_operands *ops);
 };
@@ -130,6 +130,7 @@
 
 struct annotation {
 	pthread_mutex_t		lock;
+	u64			max_coverage;
 	struct annotated_source *src;
 };
 
@@ -177,7 +178,6 @@
 int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
 				 int errnum, char *buf, size_t buflen);
 
-int symbol__annotate_init(struct map *map, struct symbol *sym);
 int symbol__annotate_printf(struct symbol *sym, struct map *map,
 			    struct perf_evsel *evsel, bool full_paths,
 			    int min_pcnt, int max_lines, int context);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c916901..c5a6e0b1 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -16,6 +16,10 @@
 #include <sys/types.h>
 #include <sys/mman.h>
 #include <stdbool.h>
+#include <ctype.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
 
 #include <linux/kernel.h>
 #include <linux/perf_event.h>
@@ -35,9 +39,14 @@
 #include "../perf.h"
 #include "util.h"
 #include "evlist.h"
+#include "dso.h"
+#include "map.h"
+#include "pmu.h"
+#include "evsel.h"
 #include "cpumap.h"
 #include "thread_map.h"
 #include "asm/bug.h"
+#include "symbol/kallsyms.h"
 #include "auxtrace.h"
 
 #include <linux/hash.h>
@@ -892,6 +901,7 @@
 		return intel_pt_process_auxtrace_info(event, session);
 	case PERF_AUXTRACE_INTEL_BTS:
 		return intel_bts_process_auxtrace_info(event, session);
+	case PERF_AUXTRACE_CS_ETM:
 	case PERF_AUXTRACE_UNKNOWN:
 	default:
 		return -EINVAL;
@@ -1398,3 +1408,731 @@
 
 	return NULL;
 }
+
+static void addr_filter__free_str(struct addr_filter *filt)
+{
+	free(filt->str);
+	filt->action   = NULL;
+	filt->sym_from = NULL;
+	filt->sym_to   = NULL;
+	filt->filename = NULL;
+	filt->str      = NULL;
+}
+
+static struct addr_filter *addr_filter__new(void)
+{
+	struct addr_filter *filt = zalloc(sizeof(*filt));
+
+	if (filt)
+		INIT_LIST_HEAD(&filt->list);
+
+	return filt;
+}
+
+static void addr_filter__free(struct addr_filter *filt)
+{
+	if (filt)
+		addr_filter__free_str(filt);
+	free(filt);
+}
+
+static void addr_filters__add(struct addr_filters *filts,
+			      struct addr_filter *filt)
+{
+	list_add_tail(&filt->list, &filts->head);
+	filts->cnt += 1;
+}
+
+static void addr_filters__del(struct addr_filters *filts,
+			      struct addr_filter *filt)
+{
+	list_del_init(&filt->list);
+	filts->cnt -= 1;
+}
+
+void addr_filters__init(struct addr_filters *filts)
+{
+	INIT_LIST_HEAD(&filts->head);
+	filts->cnt = 0;
+}
+
+void addr_filters__exit(struct addr_filters *filts)
+{
+	struct addr_filter *filt, *n;
+
+	list_for_each_entry_safe(filt, n, &filts->head, list) {
+		addr_filters__del(filts, filt);
+		addr_filter__free(filt);
+	}
+}
+
+static int parse_num_or_str(char **inp, u64 *num, const char **str,
+			    const char *str_delim)
+{
+	*inp += strspn(*inp, " ");
+
+	if (isdigit(**inp)) {
+		char *endptr;
+
+		if (!num)
+			return -EINVAL;
+		errno = 0;
+		*num = strtoull(*inp, &endptr, 0);
+		if (errno)
+			return -errno;
+		if (endptr == *inp)
+			return -EINVAL;
+		*inp = endptr;
+	} else {
+		size_t n;
+
+		if (!str)
+			return -EINVAL;
+		*inp += strspn(*inp, " ");
+		*str = *inp;
+		n = strcspn(*inp, str_delim);
+		if (!n)
+			return -EINVAL;
+		*inp += n;
+		if (**inp) {
+			**inp = '\0';
+			*inp += 1;
+		}
+	}
+	return 0;
+}
+
+static int parse_action(struct addr_filter *filt)
+{
+	if (!strcmp(filt->action, "filter")) {
+		filt->start = true;
+		filt->range = true;
+	} else if (!strcmp(filt->action, "start")) {
+		filt->start = true;
+	} else if (!strcmp(filt->action, "stop")) {
+		filt->start = false;
+	} else if (!strcmp(filt->action, "tracestop")) {
+		filt->start = false;
+		filt->range = true;
+		filt->action += 5; /* Change 'tracestop' to 'stop' */
+	} else {
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int parse_sym_idx(char **inp, int *idx)
+{
+	*idx = -1;
+
+	*inp += strspn(*inp, " ");
+
+	if (**inp != '#')
+		return 0;
+
+	*inp += 1;
+
+	if (**inp == 'g' || **inp == 'G') {
+		*inp += 1;
+		*idx = 0;
+	} else {
+		unsigned long num;
+		char *endptr;
+
+		errno = 0;
+		num = strtoul(*inp, &endptr, 0);
+		if (errno)
+			return -errno;
+		if (endptr == *inp || num > INT_MAX)
+			return -EINVAL;
+		*inp = endptr;
+		*idx = num;
+	}
+
+	return 0;
+}
+
+static int parse_addr_size(char **inp, u64 *num, const char **str, int *idx)
+{
+	int err = parse_num_or_str(inp, num, str, " ");
+
+	if (!err && *str)
+		err = parse_sym_idx(inp, idx);
+
+	return err;
+}
+
+static int parse_one_filter(struct addr_filter *filt, const char **filter_inp)
+{
+	char *fstr;
+	int err;
+
+	filt->str = fstr = strdup(*filter_inp);
+	if (!fstr)
+		return -ENOMEM;
+
+	err = parse_num_or_str(&fstr, NULL, &filt->action, " ");
+	if (err)
+		goto out_err;
+
+	err = parse_action(filt);
+	if (err)
+		goto out_err;
+
+	err = parse_addr_size(&fstr, &filt->addr, &filt->sym_from,
+			      &filt->sym_from_idx);
+	if (err)
+		goto out_err;
+
+	fstr += strspn(fstr, " ");
+
+	if (*fstr == '/') {
+		fstr += 1;
+		err = parse_addr_size(&fstr, &filt->size, &filt->sym_to,
+				      &filt->sym_to_idx);
+		if (err)
+			goto out_err;
+		filt->range = true;
+	}
+
+	fstr += strspn(fstr, " ");
+
+	if (*fstr == '@') {
+		fstr += 1;
+		err = parse_num_or_str(&fstr, NULL, &filt->filename, " ,");
+		if (err)
+			goto out_err;
+	}
+
+	fstr += strspn(fstr, " ,");
+
+	*filter_inp += fstr - filt->str;
+
+	return 0;
+
+out_err:
+	addr_filter__free_str(filt);
+
+	return err;
+}
+
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+				    const char *filter)
+{
+	struct addr_filter *filt;
+	const char *fstr = filter;
+	int err;
+
+	while (*fstr) {
+		filt = addr_filter__new();
+		err = parse_one_filter(filt, &fstr);
+		if (err) {
+			addr_filter__free(filt);
+			addr_filters__exit(filts);
+			return err;
+		}
+		addr_filters__add(filts, filt);
+	}
+
+	return 0;
+}
+
+struct sym_args {
+	const char	*name;
+	u64		start;
+	u64		size;
+	int		idx;
+	int		cnt;
+	bool		started;
+	bool		global;
+	bool		selected;
+	bool		duplicate;
+	bool		near;
+};
+
+static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+{
+	/* A function with the same name, and global or the n'th found or any */
+	return symbol_type__is_a(type, MAP__FUNCTION) &&
+	       !strcmp(name, args->name) &&
+	       ((args->global && isupper(type)) ||
+		(args->selected && ++(args->cnt) == args->idx) ||
+		(!args->global && !args->selected));
+}
+
+static int find_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (args->started) {
+		if (!args->size)
+			args->size = start - args->start;
+		if (args->selected) {
+			if (args->size)
+				return 1;
+		} else if (kern_sym_match(args, name, type)) {
+			args->duplicate = true;
+			return 1;
+		}
+	} else if (kern_sym_match(args, name, type)) {
+		args->started = true;
+		args->start = start;
+	}
+
+	return 0;
+}
+
+static int print_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (kern_sym_match(args, name, type)) {
+		pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+		       ++args->cnt, start, type, name);
+		args->near = true;
+	} else if (args->near) {
+		args->near = false;
+		pr_err("\t\twhich is near\t\t%s\n", name);
+	}
+
+	return 0;
+}
+
+static int sym_not_found_error(const char *sym_name, int idx)
+{
+	if (idx > 0) {
+		pr_err("N'th occurrence (N=%d) of symbol '%s' not found.\n",
+		       idx, sym_name);
+	} else if (!idx) {
+		pr_err("Global symbol '%s' not found.\n", sym_name);
+	} else {
+		pr_err("Symbol '%s' not found.\n", sym_name);
+	}
+	pr_err("Note that symbols must be functions.\n");
+
+	return -EINVAL;
+}
+
+static int find_kern_sym(const char *sym_name, u64 *start, u64 *size, int idx)
+{
+	struct sym_args args = {
+		.name = sym_name,
+		.idx = idx,
+		.global = !idx,
+		.selected = idx > 0,
+	};
+	int err;
+
+	*start = 0;
+	*size = 0;
+
+	err = kallsyms__parse("/proc/kallsyms", &args, find_kern_sym_cb);
+	if (err < 0) {
+		pr_err("Failed to parse /proc/kallsyms\n");
+		return err;
+	}
+
+	if (args.duplicate) {
+		pr_err("Multiple kernel symbols with name '%s'\n", sym_name);
+		args.cnt = 0;
+		kallsyms__parse("/proc/kallsyms", &args, print_kern_sym_cb);
+		pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+		       sym_name);
+		pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+		return -EINVAL;
+	}
+
+	if (!args.started) {
+		pr_err("Kernel symbol lookup: ");
+		return sym_not_found_error(sym_name, idx);
+	}
+
+	*start = args.start;
+	*size = args.size;
+
+	return 0;
+}
+
+static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+			       char type, u64 start)
+{
+	struct sym_args *args = arg;
+
+	if (!symbol_type__is_a(type, MAP__FUNCTION))
+		return 0;
+
+	if (!args->started) {
+		args->started = true;
+		args->start = start;
+	}
+	/* Don't know exactly where the kernel ends, so we add a page */
+	args->size = round_up(start, page_size) + page_size - args->start;
+
+	return 0;
+}
+
+static int addr_filter__entire_kernel(struct addr_filter *filt)
+{
+	struct sym_args args = { .started = false };
+	int err;
+
+	err = kallsyms__parse("/proc/kallsyms", &args, find_entire_kern_cb);
+	if (err < 0 || !args.started) {
+		pr_err("Failed to parse /proc/kallsyms\n");
+		return err;
+	}
+
+	filt->addr = args.start;
+	filt->size = args.size;
+
+	return 0;
+}
+
+static int check_end_after_start(struct addr_filter *filt, u64 start, u64 size)
+{
+	if (start + size >= filt->addr)
+		return 0;
+
+	if (filt->sym_from) {
+		pr_err("Symbol '%s' (0x%"PRIx64") comes before '%s' (0x%"PRIx64")\n",
+		       filt->sym_to, start, filt->sym_from, filt->addr);
+	} else {
+		pr_err("Symbol '%s' (0x%"PRIx64") comes before address 0x%"PRIx64")\n",
+		       filt->sym_to, start, filt->addr);
+	}
+
+	return -EINVAL;
+}
+
+static int addr_filter__resolve_kernel_syms(struct addr_filter *filt)
+{
+	bool no_size = false;
+	u64 start, size;
+	int err;
+
+	if (symbol_conf.kptr_restrict) {
+		pr_err("Kernel addresses are restricted. Unable to resolve kernel symbols.\n");
+		return -EINVAL;
+	}
+
+	if (filt->sym_from && !strcmp(filt->sym_from, "*"))
+		return addr_filter__entire_kernel(filt);
+
+	if (filt->sym_from) {
+		err = find_kern_sym(filt->sym_from, &start, &size,
+				    filt->sym_from_idx);
+		if (err)
+			return err;
+		filt->addr = start;
+		if (filt->range && !filt->size && !filt->sym_to) {
+			filt->size = size;
+			no_size = !!size;
+		}
+	}
+
+	if (filt->sym_to) {
+		err = find_kern_sym(filt->sym_to, &start, &size,
+				    filt->sym_to_idx);
+		if (err)
+			return err;
+
+		err = check_end_after_start(filt, start, size);
+		if (err)
+			return err;
+		filt->size = start + size - filt->addr;
+		no_size = !!size;
+	}
+
+	/* The very last symbol in kallsyms does not imply a particular size */
+	if (no_size) {
+		pr_err("Cannot determine size of symbol '%s'\n",
+		       filt->sym_to ? filt->sym_to : filt->sym_from);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct dso *load_dso(const char *name)
+{
+	struct map *map;
+	struct dso *dso;
+
+	map = dso__new_map(name);
+	if (!map)
+		return NULL;
+
+	map__load(map);
+
+	dso = dso__get(map->dso);
+
+	map__put(map);
+
+	return dso;
+}
+
+static bool dso_sym_match(struct symbol *sym, const char *name, int *cnt,
+			  int idx)
+{
+	/* Same name, and global or the n'th found or any */
+	return !arch__compare_symbol_names(name, sym->name) &&
+	       ((!idx && sym->binding == STB_GLOBAL) ||
+		(idx > 0 && ++*cnt == idx) ||
+		idx < 0);
+}
+
+static void print_duplicate_syms(struct dso *dso, const char *sym_name)
+{
+	struct symbol *sym;
+	bool near = false;
+	int cnt = 0;
+
+	pr_err("Multiple symbols with name '%s'\n", sym_name);
+
+	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	while (sym) {
+		if (dso_sym_match(sym, sym_name, &cnt, -1)) {
+			pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+			       ++cnt, sym->start,
+			       sym->binding == STB_GLOBAL ? 'g' :
+			       sym->binding == STB_LOCAL  ? 'l' : 'w',
+			       sym->name);
+			near = true;
+		} else if (near) {
+			near = false;
+			pr_err("\t\twhich is near\t\t%s\n", sym->name);
+		}
+		sym = dso__next_symbol(sym);
+	}
+
+	pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+	       sym_name);
+	pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+}
+
+static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+			u64 *size, int idx)
+{
+	struct symbol *sym;
+	int cnt = 0;
+
+	*start = 0;
+	*size = 0;
+
+	sym = dso__first_symbol(dso, MAP__FUNCTION);
+	while (sym) {
+		if (*start) {
+			if (!*size)
+				*size = sym->start - *start;
+			if (idx > 0) {
+				if (*size)
+					return 1;
+			} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+				print_duplicate_syms(dso, sym_name);
+				return -EINVAL;
+			}
+		} else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+			*start = sym->start;
+			*size = sym->end - sym->start;
+		}
+		sym = dso__next_symbol(sym);
+	}
+
+	if (!*start)
+		return sym_not_found_error(sym_name, idx);
+
+	return 0;
+}
+
+static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
+{
+	struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION);
+	struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION);
+
+	if (!first_sym || !last_sym) {
+		pr_err("Failed to determine filter for %s\nNo symbols found.\n",
+		       filt->filename);
+		return -EINVAL;
+	}
+
+	filt->addr = first_sym->start;
+	filt->size = last_sym->end - first_sym->start;
+
+	return 0;
+}
+
+static int addr_filter__resolve_syms(struct addr_filter *filt)
+{
+	u64 start, size;
+	struct dso *dso;
+	int err = 0;
+
+	if (!filt->sym_from && !filt->sym_to)
+		return 0;
+
+	if (!filt->filename)
+		return addr_filter__resolve_kernel_syms(filt);
+
+	dso = load_dso(filt->filename);
+	if (!dso) {
+		pr_err("Failed to load symbols from: %s\n", filt->filename);
+		return -EINVAL;
+	}
+
+	if (filt->sym_from && !strcmp(filt->sym_from, "*")) {
+		err = addr_filter__entire_dso(filt, dso);
+		goto put_dso;
+	}
+
+	if (filt->sym_from) {
+		err = find_dso_sym(dso, filt->sym_from, &start, &size,
+				   filt->sym_from_idx);
+		if (err)
+			goto put_dso;
+		filt->addr = start;
+		if (filt->range && !filt->size && !filt->sym_to)
+			filt->size = size;
+	}
+
+	if (filt->sym_to) {
+		err = find_dso_sym(dso, filt->sym_to, &start, &size,
+				   filt->sym_to_idx);
+		if (err)
+			goto put_dso;
+
+		err = check_end_after_start(filt, start, size);
+		if (err)
+			return err;
+
+		filt->size = start + size - filt->addr;
+	}
+
+put_dso:
+	dso__put(dso);
+
+	return err;
+}
+
+static char *addr_filter__to_str(struct addr_filter *filt)
+{
+	char filename_buf[PATH_MAX];
+	const char *at = "";
+	const char *fn = "";
+	char *filter;
+	int err;
+
+	if (filt->filename) {
+		at = "@";
+		fn = realpath(filt->filename, filename_buf);
+		if (!fn)
+			return NULL;
+	}
+
+	if (filt->range) {
+		err = asprintf(&filter, "%s 0x%"PRIx64"/0x%"PRIx64"%s%s",
+			       filt->action, filt->addr, filt->size, at, fn);
+	} else {
+		err = asprintf(&filter, "%s 0x%"PRIx64"%s%s",
+			       filt->action, filt->addr, at, fn);
+	}
+
+	return err < 0 ? NULL : filter;
+}
+
+static int parse_addr_filter(struct perf_evsel *evsel, const char *filter,
+			     int max_nr)
+{
+	struct addr_filters filts;
+	struct addr_filter *filt;
+	int err;
+
+	addr_filters__init(&filts);
+
+	err = addr_filters__parse_bare_filter(&filts, filter);
+	if (err)
+		goto out_exit;
+
+	if (filts.cnt > max_nr) {
+		pr_err("Error: number of address filters (%d) exceeds maximum (%d)\n",
+		       filts.cnt, max_nr);
+		err = -EINVAL;
+		goto out_exit;
+	}
+
+	list_for_each_entry(filt, &filts.head, list) {
+		char *new_filter;
+
+		err = addr_filter__resolve_syms(filt);
+		if (err)
+			goto out_exit;
+
+		new_filter = addr_filter__to_str(filt);
+		if (!new_filter) {
+			err = -ENOMEM;
+			goto out_exit;
+		}
+
+		if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+			err = -ENOMEM;
+			goto out_exit;
+		}
+	}
+
+out_exit:
+	addr_filters__exit(&filts);
+
+	if (err) {
+		pr_err("Failed to parse address filter: '%s'\n", filter);
+		pr_err("Filter format is: filter|start|stop|tracestop <start symbol or address> [/ <end symbol or size>] [@<file name>]\n");
+		pr_err("Where multiple filters are separated by space or comma.\n");
+	}
+
+	return err;
+}
+
+static struct perf_pmu *perf_evsel__find_pmu(struct perf_evsel *evsel)
+{
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (pmu->type == evsel->attr.type)
+			break;
+	}
+
+	return pmu;
+}
+
+static int perf_evsel__nr_addr_filter(struct perf_evsel *evsel)
+{
+	struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+	int nr_addr_filters = 0;
+
+	if (!pmu)
+		return 0;
+
+	perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters);
+
+	return nr_addr_filters;
+}
+
+int auxtrace_parse_filters(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+	char *filter;
+	int err, max_nr;
+
+	evlist__for_each_entry(evlist, evsel) {
+		filter = evsel->filter;
+		max_nr = perf_evsel__nr_addr_filter(evsel);
+		if (!filter || !max_nr)
+			continue;
+		evsel->filter = NULL;
+		err = parse_addr_filter(evsel, filter, max_nr);
+		free(filter);
+		if (err)
+			return err;
+		pr_debug("Address filter: %s\n", evsel->filter);
+	}
+
+	return 0;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index ac5f0d7..26fb1ee 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -41,6 +41,7 @@
 	PERF_AUXTRACE_UNKNOWN,
 	PERF_AUXTRACE_INTEL_PT,
 	PERF_AUXTRACE_INTEL_BTS,
+	PERF_AUXTRACE_CS_ETM,
 };
 
 enum itrace_period_type {
@@ -317,6 +318,48 @@
 	unsigned int alignment;
 };
 
+/**
+ * struct addr_filter - address filter.
+ * @list: list node
+ * @range: true if it is a range filter
+ * @start: true if action is 'filter' or 'start'
+ * @action: 'filter', 'start' or 'stop' ('tracestop' is accepted but converted
+ *          to 'stop')
+ * @sym_from: symbol name for the filter address
+ * @sym_to: symbol name that determines the filter size
+ * @sym_from_idx: selects n'th from symbols with the same name (0 means global
+ *                and less than 0 means symbol must be unique)
+ * @sym_to_idx: same as @sym_from_idx but for @sym_to
+ * @addr: filter address
+ * @size: filter region size (for range filters)
+ * @filename: DSO file name or NULL for the kernel
+ * @str: allocated string that contains the other string members
+ */
+struct addr_filter {
+	struct list_head	list;
+	bool			range;
+	bool			start;
+	const char		*action;
+	const char		*sym_from;
+	const char		*sym_to;
+	int			sym_from_idx;
+	int			sym_to_idx;
+	u64			addr;
+	u64			size;
+	const char		*filename;
+	char			*str;
+};
+
+/**
+ * struct addr_filters - list of address filters.
+ * @head: list of address filters
+ * @cnt: number of address filters
+ */
+struct addr_filters {
+	struct list_head	head;
+	int			cnt;
+};
+
 #ifdef HAVE_AUXTRACE_SUPPORT
 
 /*
@@ -481,6 +524,12 @@
 				      union perf_event *event);
 void events_stats__auxtrace_error_warn(const struct events_stats *stats);
 
+void addr_filters__init(struct addr_filters *filts);
+void addr_filters__exit(struct addr_filters *filts);
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+				    const char *filter);
+int auxtrace_parse_filters(struct perf_evlist *evlist);
+
 static inline int auxtrace__process_event(struct perf_session *session,
 					  union perf_event *event,
 					  struct perf_sample *sample,
@@ -639,6 +688,12 @@
 {
 }
 
+static inline
+int auxtrace_parse_filters(struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
 			struct auxtrace_mmap_params *mp,
 			void *userpg, int fd);
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
new file mode 100644
index 0000000..7b3e1d7
--- /dev/null
+++ b/tools/perf/util/block-range.c
@@ -0,0 +1,328 @@
+#include "block-range.h"
+#include "annotate.h"
+
+struct {
+	struct rb_root root;
+	u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+	/*
+	 * XXX still paranoid for now; see if we can make this depend on
+	 * DEBUG=1 builds.
+	 */
+#if 1
+	struct rb_node *rb;
+	u64 old = 0; /* NULL isn't executable */
+
+	for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+		struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+		assert(old < entry->start);
+		assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+		old = entry->end;
+	}
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *parent = NULL;
+	struct block_range *entry;
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (addr < entry->start)
+			p = &parent->rb_left;
+		else if (addr > entry->end)
+			p = &parent->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_left;
+	while (*p) {
+		node = *p;
+		p = &node->rb_right;
+	}
+	rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+	struct rb_node **p = &node->rb_right;
+	while (*p) {
+		node = *p;
+		p = &node->rb_left;
+	}
+	rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end:   branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+	struct rb_node **p = &block_ranges.root.rb_node;
+	struct rb_node *n, *parent = NULL;
+	struct block_range *next, *entry = NULL;
+	struct block_range_iter iter = { NULL, NULL };
+
+	while (*p != NULL) {
+		parent = *p;
+		entry = rb_entry(parent, struct block_range, node);
+
+		if (start < entry->start)
+			p = &parent->rb_left;
+		else if (start > entry->end)
+			p = &parent->rb_right;
+		else
+			break;
+	}
+
+	/*
+	 * Didn't find anything.. there's a hole at @start, however @end might
+	 * be inside/behind the next range.
+	 */
+	if (!*p) {
+		if (!entry) /* tree empty */
+			goto do_whole;
+
+		/*
+		 * If the last node is before, advance one to find the next.
+		 */
+		n = parent;
+		if (entry->end < start) {
+			n = rb_next(n);
+			if (!n)
+				goto do_whole;
+		}
+		next = rb_entry(n, struct block_range, node);
+
+		if (next->start <= end) { /* add head: [start...][n->start...] */
+			struct block_range *head = malloc(sizeof(struct block_range));
+			if (!head)
+				return iter;
+
+			*head = (struct block_range){
+				.start		= start,
+				.end		= next->start - 1,
+				.is_target	= 1,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&head->node, &next->node);
+			rb_insert_color(&head->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.start = head;
+			goto do_tail;
+		}
+
+do_whole:
+		/*
+		 * The whole [start..end] range is non-overlapping.
+		 */
+		entry = malloc(sizeof(struct block_range));
+		if (!entry)
+			return iter;
+
+		*entry = (struct block_range){
+			.start		= start,
+			.end		= end,
+			.is_target	= 1,
+			.is_branch	= 1,
+		};
+
+		rb_link_node(&entry->node, parent, p);
+		rb_insert_color(&entry->node, &block_ranges.root);
+		block_range__debug();
+
+		iter.start = entry;
+		iter.end   = entry;
+		goto done;
+	}
+
+	/*
+	 * We found a range that overlapped with ours, split if needed.
+	 */
+	if (entry->start < start) { /* split: [e->start...][start...] */
+		struct block_range *head = malloc(sizeof(struct block_range));
+		if (!head)
+			return iter;
+
+		*head = (struct block_range){
+			.start		= entry->start,
+			.end		= start - 1,
+			.is_target	= entry->is_target,
+			.is_branch	= 0,
+
+			.coverage	= entry->coverage,
+			.entry		= entry->entry,
+		};
+
+		entry->start		= start;
+		entry->is_target	= 1;
+		entry->entry		= 0;
+
+		rb_link_left_of_node(&head->node, &entry->node);
+		rb_insert_color(&head->node, &block_ranges.root);
+		block_range__debug();
+
+	} else if (entry->start == start)
+		entry->is_target = 1;
+
+	iter.start = entry;
+
+do_tail:
+	/*
+	 * At this point we've got: @iter.start = [@start...] but @end can still be
+	 * inside or beyond it.
+	 */
+	entry = iter.start;
+	for (;;) {
+		/*
+		 * If @end is inside @entry, split.
+		 */
+		if (end < entry->end) { /* split: [...end][...e->end] */
+			struct block_range *tail = malloc(sizeof(struct block_range));
+			if (!tail)
+				return iter;
+
+			*tail = (struct block_range){
+				.start		= end + 1,
+				.end		= entry->end,
+				.is_target	= 0,
+				.is_branch	= entry->is_branch,
+
+				.coverage	= entry->coverage,
+				.taken		= entry->taken,
+				.pred		= entry->pred,
+			};
+
+			entry->end		= end;
+			entry->is_branch	= 1;
+			entry->taken		= 0;
+			entry->pred		= 0;
+
+			rb_link_right_of_node(&tail->node, &entry->node);
+			rb_insert_color(&tail->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.end = entry;
+			goto done;
+		}
+
+		/*
+		 * If @end matches @entry, done
+		 */
+		if (end == entry->end) {
+			entry->is_branch = 1;
+			iter.end = entry;
+			goto done;
+		}
+
+		next = block_range__next(entry);
+		if (!next)
+			goto add_tail;
+
+		/*
+		 * If @end is in beyond @entry but not inside @next, add tail.
+		 */
+		if (end < next->start) { /* add tail: [...e->end][...end] */
+			struct block_range *tail;
+add_tail:
+			tail = malloc(sizeof(struct block_range));
+			if (!tail)
+				return iter;
+
+			*tail = (struct block_range){
+				.start		= entry->end + 1,
+				.end		= end,
+				.is_target	= 0,
+				.is_branch	= 1,
+			};
+
+			rb_link_right_of_node(&tail->node, &entry->node);
+			rb_insert_color(&tail->node, &block_ranges.root);
+			block_range__debug();
+
+			iter.end = tail;
+			goto done;
+		}
+
+		/*
+		 * If there is a hole between @entry and @next, fill it.
+		 */
+		if (entry->end + 1 != next->start) {
+			struct block_range *hole = malloc(sizeof(struct block_range));
+			if (!hole)
+				return iter;
+
+			*hole = (struct block_range){
+				.start		= entry->end + 1,
+				.end		= next->start - 1,
+				.is_target	= 0,
+				.is_branch	= 0,
+			};
+
+			rb_link_left_of_node(&hole->node, &next->node);
+			rb_insert_color(&hole->node, &block_ranges.root);
+			block_range__debug();
+		}
+
+		entry = next;
+	}
+
+done:
+	assert(iter.start->start == start && iter.start->is_target);
+	assert(iter.end->end == end && iter.end->is_branch);
+
+	block_ranges.blocks++;
+
+	return iter;
+}
+
+
+/*
+ * Compute coverage as:
+ *
+ *    br->coverage / br->sym->max_coverage
+ *
+ * This ensures each symbol has a 100% spot, to reflect that each symbol has a
+ * most covered section.
+ *
+ * Returns [0-1] for coverage and -1 if we had no data what so ever or the
+ * symbol does not exist.
+ */
+double block_range__coverage(struct block_range *br)
+{
+	struct symbol *sym;
+
+	if (!br) {
+		if (block_ranges.blocks)
+			return 0;
+
+		return -1;
+	}
+
+	sym = br->sym;
+	if (!sym)
+		return -1;
+
+	return (double)br->coverage / symbol__annotation(sym)->max_coverage;
+}
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h
new file mode 100644
index 0000000..a8c8413
--- /dev/null
+++ b/tools/perf/util/block-range.h
@@ -0,0 +1,71 @@
+#ifndef __PERF_BLOCK_RANGE_H
+#define __PERF_BLOCK_RANGE_H
+
+#include "symbol.h"
+
+/*
+ * struct block_range - non-overlapping parts of basic blocks
+ * @node:	treenode
+ * @start:	inclusive start of range
+ * @end:	inclusive end of range
+ * @is_target:	@start is a jump target
+ * @is_branch:	@end is a branch instruction
+ * @coverage:	number of blocks that cover this range
+ * @taken:	number of times the branch is taken (requires @is_branch)
+ * @pred:	number of times the taken branch was predicted
+ */
+struct block_range {
+	struct rb_node node;
+
+	struct symbol *sym;
+
+	u64 start;
+	u64 end;
+
+	int is_target, is_branch;
+
+	u64 coverage;
+	u64 entry;
+	u64 taken;
+	u64 pred;
+};
+
+static inline struct block_range *block_range__next(struct block_range *br)
+{
+	struct rb_node *n = rb_next(&br->node);
+	if (!n)
+		return NULL;
+	return rb_entry(n, struct block_range, node);
+}
+
+struct block_range_iter {
+	struct block_range *start;
+	struct block_range *end;
+};
+
+static inline struct block_range *block_range_iter(struct block_range_iter *iter)
+{
+	return iter->start;
+}
+
+static inline bool block_range_iter__next(struct block_range_iter *iter)
+{
+	if (iter->start == iter->end)
+		return false;
+
+	iter->start = block_range__next(iter->start);
+	return true;
+}
+
+static inline bool block_range_iter__valid(struct block_range_iter *iter)
+{
+	if (!iter->start || !iter->end)
+		return false;
+	return true;
+}
+
+extern struct block_range *block_range__find(u64 addr);
+extern struct block_range_iter block_range__create(u64 start, u64 end);
+extern double block_range__coverage(struct block_range *br);
+
+#endif /* __PERF_BLOCK_RANGE_H */
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 1f12e4e..2b2c9b8 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -531,7 +531,7 @@
 
 	ptevs = malloc(array_sz);
 	if (!ptevs) {
-		pr_debug("No ehough memory: alloc ptevs failed\n");
+		pr_debug("No enough memory: alloc ptevs failed\n");
 		return -ENOMEM;
 	}
 
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 5651f3c..e528c40 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -620,7 +620,7 @@
 
 	ret = probe_cache__scan_sdt(cache, realname);
 	if (ret >= 0) {
-		pr_debug("Found %d SDTs in %s\n", ret, realname);
+		pr_debug4("Found %d SDTs in %s\n", ret, realname);
 		if (probe_cache__commit(cache) < 0)
 			ret = -1;
 	}
@@ -691,7 +691,7 @@
 
 	/* Update SDT cache : error is just warned */
 	if (build_id_cache__add_sdt_cache(sbuild_id, realname) < 0)
-		pr_debug("Failed to update/scan SDT cache for %s\n", realname);
+		pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
 
 out_free:
 	if (!is_kallsyms)
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
new file mode 100644
index 0000000..3cc6bc3
--- /dev/null
+++ b/tools/perf/util/cs-etm.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
+#define INCLUDE__UTIL_PERF_CS_ETM_H__
+
+/* Versionning header in case things need tro change in the future.  That way
+ * decoding of old snapshot is still possible.
+ */
+enum {
+	/* Starting with 0x0 */
+	CS_HEADER_VERSION_0,
+	/* PMU->type (32 bit), total # of CPUs (32 bit) */
+	CS_PMU_TYPE_CPUS,
+	CS_ETM_SNAPSHOT,
+	CS_HEADER_VERSION_0_MAX,
+};
+
+/* Beginning of header common to both ETMv3 and V4 */
+enum {
+	CS_ETM_MAGIC,
+	CS_ETM_CPU,
+};
+
+/* ETMv3/PTM metadata */
+enum {
+	/* Dynamic, configurable parameters */
+	CS_ETM_ETMCR = CS_ETM_CPU + 1,
+	CS_ETM_ETMTRACEIDR,
+	/* RO, taken from sysFS */
+	CS_ETM_ETMCCER,
+	CS_ETM_ETMIDR,
+	CS_ETM_PRIV_MAX,
+};
+
+/* ETMv4 metadata */
+enum {
+	/* Dynamic, configurable parameters */
+	CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1,
+	CS_ETMV4_TRCTRACEIDR,
+	/* RO, taken from sysFS */
+	CS_ETMV4_TRCIDR0,
+	CS_ETMV4_TRCIDR1,
+	CS_ETMV4_TRCIDR2,
+	CS_ETMV4_TRCIDR8,
+	CS_ETMV4_TRCAUTHSTATUS,
+	CS_ETMV4_PRIV_MAX,
+};
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
+
+static const u64 __perf_cs_etmv3_magic   = 0x3030303030303030ULL;
+static const u64 __perf_cs_etmv4_magic   = 0x4040404040404040ULL;
+#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
+#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+
+#endif
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 4f979bb..7123f4d 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -437,7 +437,7 @@
 	int ret;
 
 	if (nr_elements * sizeof(u32) != raw_size)
-		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n",
+		pr_warning("Incorrect raw_size (%u) in bpf output event, skip %zu bytes\n",
 			   raw_size, nr_elements * sizeof(u32) - raw_size);
 
 	len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 8c4212a..c1838b6 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -6,6 +6,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include <api/debug.h>
+#include <linux/time64.h>
 
 #include "cache.h"
 #include "color.h"
@@ -14,9 +15,6 @@
 #include "util.h"
 #include "target.h"
 
-#define NSECS_PER_SEC  1000000000ULL
-#define NSECS_PER_USEC 1000ULL
-
 int verbose;
 bool dump_trace = false, quiet = false;
 int debug_ordered_events;
@@ -54,9 +52,9 @@
 	int ret = 0;
 	u64 secs, usecs, nsecs = t;
 
-	secs   = nsecs / NSECS_PER_SEC;
-	nsecs -= secs  * NSECS_PER_SEC;
-	usecs  = nsecs / NSECS_PER_USEC;
+	secs   = nsecs / NSEC_PER_SEC;
+	nsecs -= secs  * NSEC_PER_SEC;
+	usecs  = nsecs / NSEC_PER_USEC;
 
 	ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ",
 		      secs, usecs);
diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c
new file mode 100644
index 0000000..1647f28
--- /dev/null
+++ b/tools/perf/util/drv_configs.c
@@ -0,0 +1,77 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "pmu.h"
+
+static int
+perf_evsel__apply_drv_configs(struct perf_evsel *evsel,
+			      struct perf_evsel_config_term **err_term)
+{
+	bool found = false;
+	int err = 0;
+	struct perf_evsel_config_term *term;
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		if (pmu->type == evsel->attr.type) {
+			found = true;
+			break;
+		}
+
+	list_for_each_entry(term, &evsel->config_terms, list) {
+		if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+			continue;
+
+		/*
+		 * We have a configuration term, report an error if we
+		 * can't find the PMU or if the PMU driver doesn't support
+		 * cmd line driver configuration.
+		 */
+		if (!found || !pmu->set_drv_config) {
+			err = -EINVAL;
+			*err_term = term;
+			break;
+		}
+
+		err = pmu->set_drv_config(term);
+		if (err) {
+			*err_term = term;
+			break;
+		}
+	}
+
+	return err;
+}
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+				   struct perf_evsel **err_evsel,
+				   struct perf_evsel_config_term **err_term)
+{
+	struct perf_evsel *evsel;
+	int err = 0;
+
+	evlist__for_each_entry(evlist, evsel) {
+		err = perf_evsel__apply_drv_configs(evsel, err_term);
+		if (err) {
+			*err_evsel = evsel;
+			break;
+		}
+	}
+
+	return err;
+}
diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h
new file mode 100644
index 0000000..32bc9ba
--- /dev/null
+++ b/tools/perf/util/drv_configs.h
@@ -0,0 +1,26 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_DRV_CONFIGS_H
+#define __PERF_DRV_CONFIGS_H
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+				   struct perf_evsel **err_evsel,
+				   struct perf_evsel_config_term **term);
+#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 774f6ec..d2c6cdd 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -363,6 +363,9 @@
 		return -EINVAL;
 	}
 
+	if (!is_regular_file(name))
+		return -EINVAL;
+
 	fd = do_open(name);
 	free(name);
 	return fd;
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index a347b19..41e068e 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -130,6 +130,22 @@
 }
 
 /**
+ * die_get_linkage_name - Get the linkage name of the object
+ * @dw_die: A DIE of the object
+ *
+ * Get the linkage name attiribute of given @dw_die.
+ * For C++ binary, the linkage name will be the mangled symbol.
+ */
+const char *die_get_linkage_name(Dwarf_Die *dw_die)
+{
+	Dwarf_Attribute attr;
+
+	if (dwarf_attr_integrate(dw_die, DW_AT_linkage_name, &attr) == NULL)
+		return NULL;
+	return dwarf_formstring(&attr);
+}
+
+/**
  * die_compare_name - Compare diename and tname
  * @dw_die: a DIE
  * @tname: a string of target name
@@ -145,18 +161,26 @@
 }
 
 /**
- * die_match_name - Match diename and glob
+ * die_match_name - Match diename/linkage name and glob
  * @dw_die: a DIE
  * @glob: a string of target glob pattern
  *
  * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ * This also match linkage name.
  */
 bool die_match_name(Dwarf_Die *dw_die, const char *glob)
 {
 	const char *name;
 
 	name = dwarf_diename(dw_die);
-	return name ? strglobmatch(name, glob) : false;
+	if (name && strglobmatch(name, glob))
+		return true;
+	/* fall back to check linkage name */
+	name = die_get_linkage_name(dw_die);
+	if (name && strglobmatch(name, glob))
+		return true;
+
+	return false;
 }
 
 /**
@@ -1085,3 +1109,182 @@
 	return -ENOTSUP;
 }
 #endif
+
+/*
+ * die_has_loclist - Check if DW_AT_location of @vr_die is a location list
+ * @vr_die: a variable DIE
+ */
+static bool die_has_loclist(Dwarf_Die *vr_die)
+{
+	Dwarf_Attribute loc;
+	int tag = dwarf_tag(vr_die);
+
+	if (tag != DW_TAG_formal_parameter &&
+	    tag != DW_TAG_variable)
+		return false;
+
+	return (dwarf_attr_integrate(vr_die, DW_AT_location, &loc) &&
+		dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * die_is_optimized_target - Check if target program is compiled with
+ * optimization
+ * @cu_die: a CU DIE
+ *
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization. This is applicable to
+ * clang as well.
+ */
+bool die_is_optimized_target(Dwarf_Die *cu_die)
+{
+	Dwarf_Die tmp_die;
+
+	if (die_has_loclist(cu_die))
+		return true;
+
+	if (!dwarf_child(cu_die, &tmp_die) &&
+	    die_is_optimized_target(&tmp_die))
+		return true;
+
+	if (!dwarf_siblingof(cu_die, &tmp_die) &&
+	    die_is_optimized_target(&tmp_die))
+		return true;
+
+	return false;
+}
+
+/*
+ * die_search_idx - Search index of given line address
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @addr: address we are looking for
+ * @idx: index to be set by this function (return value)
+ *
+ * Search for @addr by looping over every lines of CU. If address
+ * matches, set index of that line in @idx. Note that single source
+ * line can have multiple line records. i.e. single source line can
+ * have multiple index.
+ */
+static bool die_search_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+			   Dwarf_Addr addr, unsigned long *idx)
+{
+	unsigned long i;
+	Dwarf_Addr tmp;
+
+	for (i = 0; i < nr_lines; i++) {
+		if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &tmp))
+			return false;
+
+		if (tmp == addr) {
+			*idx = i;
+			return true;
+		}
+	}
+	return false;
+}
+
+/*
+ * die_get_postprologue_addr - Search next address after function prologue
+ * @entrypc_idx: entrypc index
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @hignpc: high PC address of function
+ * @postprologue_addr: Next address after function prologue (return value)
+ *
+ * Look for prologue-end marker. If there is no explicit marker, return
+ * address of next line record or next source line.
+ */
+static bool die_get_postprologue_addr(unsigned long entrypc_idx,
+				      Dwarf_Lines *lines,
+				      unsigned long nr_lines,
+				      Dwarf_Addr highpc,
+				      Dwarf_Addr *postprologue_addr)
+{
+	unsigned long i;
+	int entrypc_lno, lno;
+	Dwarf_Line *line;
+	Dwarf_Addr addr;
+	bool p_end;
+
+	/* entrypc_lno is actual source line number */
+	line = dwarf_onesrcline(lines, entrypc_idx);
+	if (dwarf_lineno(line, &entrypc_lno))
+		return false;
+
+	for (i = entrypc_idx; i < nr_lines; i++) {
+		line = dwarf_onesrcline(lines, i);
+
+		if (dwarf_lineaddr(line, &addr) ||
+		    dwarf_lineno(line, &lno)    ||
+		    dwarf_lineprologueend(line, &p_end))
+			return false;
+
+		/* highpc is exclusive. [entrypc,highpc) */
+		if (addr >= highpc)
+			break;
+
+		/* clang supports prologue-end marker */
+		if (p_end)
+			break;
+
+		/* Actual next line in source */
+		if (lno != entrypc_lno)
+			break;
+
+		/*
+		 * Single source line can have multiple line records.
+		 * For Example,
+		 *     void foo() { printf("hello\n"); }
+		 * contains two line records. One points to declaration and
+		 * other points to printf() line. Variable 'lno' won't get
+		 * incremented in this case but 'i' will.
+		 */
+		if (i != entrypc_idx)
+			break;
+	}
+
+	dwarf_lineaddr(line, postprologue_addr);
+	if (*postprologue_addr >= highpc)
+		dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+			       postprologue_addr);
+
+	return true;
+}
+
+/*
+ * die_skip_prologue - Use next address after prologue as probe location
+ * @sp_die: a subprogram DIE
+ * @cu_die: a CU DIE
+ * @entrypc: entrypc of the function
+ *
+ * Function prologue prepares stack and registers before executing function
+ * logic. When target program is compiled without optimization, function
+ * parameter information is only valid after prologue. When we probe entrypc
+ * of the function, and try to record function parameter, it contains
+ * garbage value.
+ */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+		       Dwarf_Addr *entrypc)
+{
+	size_t nr_lines = 0;
+	unsigned long entrypc_idx = 0;
+	Dwarf_Lines *lines = NULL;
+	Dwarf_Addr postprologue_addr;
+	Dwarf_Addr highpc;
+
+	if (dwarf_highpc(sp_die, &highpc))
+		return;
+
+	if (dwarf_getsrclines(cu_die, &lines, &nr_lines))
+		return;
+
+	if (!die_search_idx(lines, nr_lines, *entrypc, &entrypc_idx))
+		return;
+
+	if (!die_get_postprologue_addr(entrypc_idx, lines, nr_lines,
+				       highpc, &postprologue_addr))
+		return;
+
+	*entrypc = postprologue_addr;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index dc0ce1a..8ac53bf 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -38,6 +38,9 @@
 int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
 			 int (*callback)(Dwarf_Die *, void *), void *data);
 
+/* Get DW_AT_linkage_name (should be NULL for C binary) */
+const char *die_get_linkage_name(Dwarf_Die *dw_die);
+
 /* Ensure that this DIE is a subprogram and definition (not declaration) */
 bool die_is_func_def(Dwarf_Die *dw_die);
 
@@ -125,4 +128,12 @@
 /* Get the name and type of given variable DIE, stored as "type\tname" */
 int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
 int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Check if target program is compiled with optimization */
+bool die_is_optimized_target(Dwarf_Die *cu_die);
+
+/* Use next address after prologue as probe location */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+		       Dwarf_Addr *entrypc);
+
 #endif
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
new file mode 100644
index 0000000..62bc4a8
--- /dev/null
+++ b/tools/perf/util/dwarf-regs.c
@@ -0,0 +1,59 @@
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Written by: Masami Hiramatsu <mhiramat@kernel.org>
+ */
+
+#include <util.h>
+#include <debug.h>
+#include <dwarf-regs.h>
+#include <elf.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64	183  /* ARM 64 bit */
+#endif
+
+/* Define const char * {arch}_register_tbl[] */
+#define DEFINE_DWARF_REGSTR_TABLE
+#include "../arch/x86/include/dwarf-regs-table.h"
+#include "../arch/arm/include/dwarf-regs-table.h"
+#include "../arch/arm64/include/dwarf-regs-table.h"
+#include "../arch/sh/include/dwarf-regs-table.h"
+#include "../arch/powerpc/include/dwarf-regs-table.h"
+#include "../arch/s390/include/dwarf-regs-table.h"
+#include "../arch/sparc/include/dwarf-regs-table.h"
+#include "../arch/xtensa/include/dwarf-regs-table.h"
+
+#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
+{
+	switch (machine) {
+	case EM_NONE:	/* Generic arch - use host arch */
+		return get_arch_regstr(n);
+	case EM_386:
+		return __get_dwarf_regstr(x86_32_regstr_tbl, n);
+	case EM_X86_64:
+		return __get_dwarf_regstr(x86_64_regstr_tbl, n);
+	case EM_ARM:
+		return __get_dwarf_regstr(arm_regstr_tbl, n);
+	case EM_AARCH64:
+		return __get_dwarf_regstr(aarch64_regstr_tbl, n);
+	case EM_SH:
+		return __get_dwarf_regstr(sh_regstr_tbl, n);
+	case EM_S390:
+		return __get_dwarf_regstr(s390_regstr_tbl, n);
+	case EM_PPC:
+	case EM_PPC64:
+		return __get_dwarf_regstr(powerpc_regstr_tbl, n);
+	case EM_SPARC:
+	case EM_SPARCV9:
+		return __get_dwarf_regstr(sparc_regstr_tbl, n);
+	case EM_XTENSA:
+		return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+	default:
+		pr_err("ELF MACHINE %x is not supported.\n", machine);
+	}
+	return NULL;
+}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index e20438b..8ab0d7d 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -1,5 +1,6 @@
 #include <linux/types.h>
-#include <sys/mman.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <api/fs/fs.h>
 #include "event.h"
 #include "debug.h"
 #include "hist.h"
@@ -248,6 +249,8 @@
 	bool truncation = false;
 	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
+	const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+	int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
 
 	if (machine__is_default_guest(machine))
 		return 0;
@@ -343,6 +346,12 @@
 		if (!strcmp(execname, ""))
 			strcpy(execname, anonstr);
 
+		if (hugetlbfs_mnt_len &&
+		    !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+			strcpy(execname, anonstr);
+			event->mmap2.flags |= MAP_HUGETLB;
+		}
+
 		size = strlen(execname) + 1;
 		memcpy(event->mmap2.filename, execname, size);
 		size = PERF_ALIGN(size, sizeof(u64));
@@ -1286,7 +1295,7 @@
 		 * must be done prior to using kernel maps.
 		 */
 		if (load_map)
-			map__load(al->map, machine->symbol_filter);
+			map__load(al->map);
 		al->addr = al->map->map_ip(al->map, al->addr);
 	}
 }
@@ -1297,8 +1306,7 @@
 {
 	thread__find_addr_map(thread, cpumode, type, addr, al);
 	if (al->map != NULL)
-		al->sym = map__find_symbol(al->map, al->addr,
-					   thread->mg->machine->symbol_filter);
+		al->sym = map__find_symbol(al->map, al->addr);
 	else
 		al->sym = NULL;
 }
@@ -1359,8 +1367,7 @@
 			al->filtered |= (1 << HIST_FILTER__DSO);
 		}
 
-		al->sym = map__find_symbol(al->map, al->addr,
-					   machine->symbol_filter);
+		al->sym = map__find_symbol(al->map, al->addr);
 	}
 
 	if (symbol_conf.sym_list &&
@@ -1416,5 +1423,5 @@
 	al->sym = NULL;
 
 	if (al->map)
-		al->sym = map__find_symbol(al->map, al->addr, NULL);
+		al->sym = map__find_symbol(al->map, al->addr);
 }
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 097b3ed..ea34c5a 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1032,16 +1032,18 @@
 }
 
 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
-				       struct mmap_params *mp, int cpu,
+				       struct mmap_params *mp, int cpu_idx,
 				       int thread, int *_output, int *_output_backward)
 {
 	struct perf_evsel *evsel;
 	int revent;
+	int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
 
 	evlist__for_each_entry(evlist, evsel) {
 		struct perf_mmap *maps = evlist->mmap;
 		int *output = _output;
 		int fd;
+		int cpu;
 
 		if (evsel->attr.write_backward) {
 			output = _output_backward;
@@ -1060,6 +1062,10 @@
 		if (evsel->system_wide && thread)
 			continue;
 
+		cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
+		if (cpu == -1)
+			continue;
+
 		fd = FD(evsel, cpu, thread);
 
 		if (*output == -1) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d9b80ef..380e84c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -507,17 +507,17 @@
 	u8 op, result, type = (config >>  0) & 0xff;
 	const char *err = "unknown-ext-hardware-cache-type";
 
-	if (type > PERF_COUNT_HW_CACHE_MAX)
+	if (type >= PERF_COUNT_HW_CACHE_MAX)
 		goto out_err;
 
 	op = (config >>  8) & 0xff;
 	err = "unknown-ext-hardware-cache-op";
-	if (op > PERF_COUNT_HW_CACHE_OP_MAX)
+	if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
 		goto out_err;
 
 	result = (config >> 16) & 0xff;
 	err = "unknown-ext-hardware-cache-result";
-	if (result > PERF_COUNT_HW_CACHE_RESULT_MAX)
+	if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		goto out_err;
 
 	err = "invalid-cache";
@@ -1045,15 +1045,15 @@
 	return -1;
 }
 
-int perf_evsel__append_filter(struct perf_evsel *evsel,
-			      const char *op, const char *filter)
+static int perf_evsel__append_filter(struct perf_evsel *evsel,
+				     const char *fmt, const char *filter)
 {
 	char *new_filter;
 
 	if (evsel->filter == NULL)
 		return perf_evsel__set_filter(evsel, filter);
 
-	if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) {
+	if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
 		free(evsel->filter);
 		evsel->filter = new_filter;
 		return 0;
@@ -1062,6 +1062,16 @@
 	return -1;
 }
 
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
+{
+	return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+}
+
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
+{
+	return perf_evsel__append_filter(evsel, "%s,%s", filter);
+}
+
 int perf_evsel__enable(struct perf_evsel *evsel)
 {
 	int nthreads = thread_map__nr(evsel->threads);
@@ -1728,7 +1738,6 @@
 	data->cpu = data->pid = data->tid = -1;
 	data->stream_id = data->id = data->time = -1ULL;
 	data->period = evsel->attr.sample_period;
-	data->weight = 0;
 	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
 	if (event->header.type != PERF_RECORD_SAMPLE) {
@@ -1935,7 +1944,6 @@
 		}
 	}
 
-	data->weight = 0;
 	if (type & PERF_SAMPLE_WEIGHT) {
 		OVERFLOW_CHECK_u64(array);
 		data->weight = *array;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4d44129..b1503b0 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -46,6 +46,7 @@
 	PERF_EVSEL__CONFIG_TERM_INHERIT,
 	PERF_EVSEL__CONFIG_TERM_MAX_STACK,
 	PERF_EVSEL__CONFIG_TERM_OVERWRITE,
+	PERF_EVSEL__CONFIG_TERM_DRV_CFG,
 	PERF_EVSEL__CONFIG_TERM_MAX,
 };
 
@@ -57,6 +58,7 @@
 		u64	freq;
 		bool	time;
 		char	*callgraph;
+		char	*drv_cfg;
 		u64	stack_user;
 		int	max_stack;
 		bool	inherit;
@@ -233,8 +235,9 @@
 			       bool use_sample_identifier);
 
 int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
-int perf_evsel__append_filter(struct perf_evsel *evsel,
-			      const char *op, const char *filter);
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel,
+				   const char *filter);
 int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads,
 			     const char *filter);
 int perf_evsel__enable(struct perf_evsel *evsel);
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 3674e77..662a0a6 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -122,9 +122,6 @@
 			if (!node)
 				break;
 
-			if (node->sym && node->sym->ignore)
-				goto next;
-
 			printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
 
 			if (print_ip)
@@ -158,7 +155,7 @@
 
 			if (!print_oneline)
 				printed += fprintf(fp, "\n");
-next:
+
 			callchain_cursor_advance(cursor);
 		}
 	}
@@ -181,7 +178,7 @@
 	if (cursor != NULL) {
 		printed += sample__fprintf_callchain(sample, left_alignment,
 						     print_opts, cursor, fp);
-	} else if (!(al->sym && al->sym->ignore)) {
+	} else {
 		printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
 
 		if (print_ip)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 8f0db40..85dd0db 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -828,8 +828,7 @@
  * default get_cpuid(): nothing gets recorded
  * actual implementation must be in arch/$(ARCH)/util/header.c
  */
-int __attribute__ ((weak)) get_cpuid(char *buffer __maybe_unused,
-				     size_t sz __maybe_unused)
+int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
 {
 	return -1;
 }
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index de15dbc..b02992e 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -177,8 +177,10 @@
 	hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
 	hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
 
-	if (h->srcline)
-		hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline));
+	if (h->srcline) {
+		len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
+		hists__new_col_len(hists, HISTC_SRCLINE, len);
+	}
 
 	if (h->srcfile)
 		hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
@@ -417,6 +419,8 @@
 	}
 	INIT_LIST_HEAD(&he->pairs.node);
 	thread__get(he->thread);
+	he->hroot_in  = RB_ROOT;
+	he->hroot_out = RB_ROOT;
 
 	if (!symbol_conf.report_hierarchy)
 		he->leaf = true;
@@ -2149,6 +2153,50 @@
 	return he;
 }
 
+static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
+						    struct rb_root *root,
+						    struct hist_entry *pair)
+{
+	struct rb_node **p;
+	struct rb_node *parent = NULL;
+	struct hist_entry *he;
+	struct perf_hpp_fmt *fmt;
+
+	p = &root->rb_node;
+	while (*p != NULL) {
+		int64_t cmp = 0;
+
+		parent = *p;
+		he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, he, pair);
+			if (cmp)
+				break;
+		}
+		if (!cmp)
+			goto out;
+
+		if (cmp < 0)
+			p = &parent->rb_left;
+		else
+			p = &parent->rb_right;
+	}
+
+	he = hist_entry__new(pair, true);
+	if (he) {
+		rb_link_node(&he->rb_node_in, parent, p);
+		rb_insert_color(&he->rb_node_in, root);
+
+		he->dummy = true;
+		he->hists = hists;
+		memset(&he->stat, 0, sizeof(he->stat));
+		hists__inc_stats(hists, he);
+	}
+out:
+	return he;
+}
+
 static struct hist_entry *hists__find_entry(struct hists *hists,
 					    struct hist_entry *he)
 {
@@ -2174,6 +2222,51 @@
 	return NULL;
 }
 
+static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root,
+						      struct hist_entry *he)
+{
+	struct rb_node *n = root->rb_node;
+
+	while (n) {
+		struct hist_entry *iter;
+		struct perf_hpp_fmt *fmt;
+		int64_t cmp = 0;
+
+		iter = rb_entry(n, struct hist_entry, rb_node_in);
+		perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+			cmp = fmt->collapse(fmt, iter, he);
+			if (cmp)
+				break;
+		}
+
+		if (cmp < 0)
+			n = n->rb_left;
+		else if (cmp > 0)
+			n = n->rb_right;
+		else
+			return iter;
+	}
+
+	return NULL;
+}
+
+static void hists__match_hierarchy(struct rb_root *leader_root,
+				   struct rb_root *other_root)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *pair;
+
+	for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) {
+		pos  = rb_entry(nd, struct hist_entry, rb_node_in);
+		pair = hists__find_hierarchy_entry(other_root, pos);
+
+		if (pair) {
+			hist_entry__add_pair(pair, pos);
+			hists__match_hierarchy(&pos->hroot_in, &pair->hroot_in);
+		}
+	}
+}
+
 /*
  * Look for pairs to link to the leader buckets (hist_entries):
  */
@@ -2183,6 +2276,12 @@
 	struct rb_node *nd;
 	struct hist_entry *pos, *pair;
 
+	if (symbol_conf.report_hierarchy) {
+		/* hierarchy report always collapses entries */
+		return hists__match_hierarchy(&leader->entries_collapsed,
+					      &other->entries_collapsed);
+	}
+
 	if (hists__has(leader, need_collapse))
 		root = &leader->entries_collapsed;
 	else
@@ -2197,6 +2296,50 @@
 	}
 }
 
+static int hists__link_hierarchy(struct hists *leader_hists,
+				 struct hist_entry *parent,
+				 struct rb_root *leader_root,
+				 struct rb_root *other_root)
+{
+	struct rb_node *nd;
+	struct hist_entry *pos, *leader;
+
+	for (nd = rb_first(other_root); nd; nd = rb_next(nd)) {
+		pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+		if (hist_entry__has_pairs(pos)) {
+			bool found = false;
+
+			list_for_each_entry(leader, &pos->pairs.head, pairs.node) {
+				if (leader->hists == leader_hists) {
+					found = true;
+					break;
+				}
+			}
+			if (!found)
+				return -1;
+		} else {
+			leader = add_dummy_hierarchy_entry(leader_hists,
+							   leader_root, pos);
+			if (leader == NULL)
+				return -1;
+
+			/* do not point parent in the pos */
+			leader->parent_he = parent;
+
+			hist_entry__add_pair(pos, leader);
+		}
+
+		if (!pos->leaf) {
+			if (hists__link_hierarchy(leader_hists, leader,
+						  &leader->hroot_in,
+						  &pos->hroot_in) < 0)
+				return -1;
+		}
+	}
+	return 0;
+}
+
 /*
  * Look for entries in the other hists that are not present in the leader, if
  * we find them, just add a dummy entry on the leader hists, with period=0,
@@ -2208,6 +2351,13 @@
 	struct rb_node *nd;
 	struct hist_entry *pos, *pair;
 
+	if (symbol_conf.report_hierarchy) {
+		/* hierarchy report always collapses entries */
+		return hists__link_hierarchy(leader, NULL,
+					     &leader->entries_collapsed,
+					     &other->entries_collapsed);
+	}
+
 	if (hists__has(other, need_collapse))
 		root = &other->entries_collapsed;
 	else
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 0a1edf1..9928fed 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -230,7 +230,7 @@
 struct perf_hpp_fmt {
 	const char *name;
 	int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-		      struct hists *hists);
+		      struct hists *hists, int line, int *span);
 	int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
 		     struct hists *hists);
 	int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
@@ -259,6 +259,7 @@
 	struct list_head fields;
 	struct list_head sorts;
 
+	int nr_header_lines;
 	int need_collapse;
 	int parent;
 	int sym;
@@ -367,6 +368,7 @@
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
 
 typedef u64 (*hpp_field_fn)(struct hist_entry *he);
 typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
@@ -483,5 +485,10 @@
 #define HIERARCHY_INDENT  3
 
 bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+			   struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);
 
 #endif	/* __PERF_HIST_H */
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
index 07c644e..43bfd8d 100644
--- a/tools/perf/util/include/dwarf-regs.h
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -3,6 +3,12 @@
 
 #ifdef HAVE_DWARF_SUPPORT
 const char *get_arch_regstr(unsigned int n);
+/*
+ * get_dwarf_regstr - Returns ftrace register string from DWARF regnum
+ * n: DWARF register number
+ * machine: ELF machine signature (EM_*)
+ */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
 #endif
 
 #ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 749e6f2..f545ec1 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -346,7 +346,7 @@
 		goto out_put;
 
 	/* Load maps to ensure dso->is_64_bit has been updated */
-	map__load(al.map, machine->symbol_filter);
+	map__load(al.map);
 
 	x86_64 = al.map->dso->is_64_bit;
 
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 9c8f15d..7591a0c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -80,6 +80,7 @@
 	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
 			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
 			 uint64_t max_insn_cnt, void *data);
+	bool (*pgd_ip)(uint64_t ip, void *data);
 	void *data;
 	struct intel_pt_state state;
 	const unsigned char *buf;
@@ -123,8 +124,6 @@
 	bool have_calc_cyc_to_tsc;
 	int exec_mode;
 	unsigned int insn_bytes;
-	uint64_t sign_bit;
-	uint64_t sign_bits;
 	uint64_t period;
 	enum intel_pt_period_type period_type;
 	uint64_t tot_insn_cnt;
@@ -188,12 +187,10 @@
 
 	decoder->get_trace          = params->get_trace;
 	decoder->walk_insn          = params->walk_insn;
+	decoder->pgd_ip             = params->pgd_ip;
 	decoder->data               = params->data;
 	decoder->return_compression = params->return_compression;
 
-	decoder->sign_bit           = (uint64_t)1 << 47;
-	decoder->sign_bits          = ~(((uint64_t)1 << 48) - 1);
-
 	decoder->period             = params->period;
 	decoder->period_type        = params->period_type;
 
@@ -362,21 +359,30 @@
 	return 0;
 }
 
-static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder,
-				 const struct intel_pt_pkt *packet,
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
 				 uint64_t last_ip)
 {
 	uint64_t ip;
 
 	switch (packet->count) {
-	case 2:
+	case 1:
 		ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
 		     packet->payload;
 		break;
-	case 4:
+	case 2:
 		ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
 		     packet->payload;
 		break;
+	case 3:
+		ip = packet->payload;
+		/* Sign-extend 6-byte ip */
+		if (ip & (uint64_t)0x800000000000ULL)
+			ip |= (uint64_t)0xffff000000000000ULL;
+		break;
+	case 4:
+		ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+		     packet->payload;
+		break;
 	case 6:
 		ip = packet->payload;
 		break;
@@ -384,16 +390,12 @@
 		return 0;
 	}
 
-	if (ip & decoder->sign_bit)
-		return ip | decoder->sign_bits;
-
 	return ip;
 }
 
 static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
 {
-	decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet,
-					    decoder->last_ip);
+	decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
 }
 
 static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -1008,6 +1010,19 @@
 	int err;
 
 	err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+	if (err == INTEL_PT_RETURN &&
+	    decoder->pgd_ip &&
+	    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+	    (decoder->state.type & INTEL_PT_BRANCH) &&
+	    decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+		/* Unconditional branch leaving filter region */
+		decoder->no_progress = 0;
+		decoder->pge = false;
+		decoder->continuous_period = false;
+		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+		decoder->state.to_ip = 0;
+		return 0;
+	}
 	if (err == INTEL_PT_RETURN)
 		return 0;
 	if (err)
@@ -1036,6 +1051,21 @@
 	}
 
 	if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+		uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+				 intel_pt_insn.rel;
+
+		if (decoder->pgd_ip &&
+		    decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+		    decoder->pgd_ip(to_ip, decoder->data)) {
+			/* Conditional branch leaving filter region */
+			decoder->pge = false;
+			decoder->continuous_period = false;
+			decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+			decoder->ip = to_ip;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+		}
 		intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
 				decoder->ip);
 		decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
@@ -1657,6 +1687,12 @@
 	}
 }
 
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+	return decoder->last_ip || decoder->packet.count == 0 ||
+	       decoder->packet.count == 3 || decoder->packet.count == 6;
+}
+
 /* Walk PSB+ packets to get in sync. */
 static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 {
@@ -1677,8 +1713,7 @@
 
 		case INTEL_PT_FUP:
 			decoder->pge = true;
-			if (decoder->last_ip || decoder->packet.count == 6 ||
-			    decoder->packet.count == 0) {
+			if (intel_pt_have_ip(decoder)) {
 				uint64_t current_ip = decoder->ip;
 
 				intel_pt_set_ip(decoder);
@@ -1767,8 +1802,7 @@
 		case INTEL_PT_TIP_PGE:
 		case INTEL_PT_TIP:
 			decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
-			if (decoder->last_ip || decoder->packet.count == 6 ||
-			    decoder->packet.count == 0)
+			if (intel_pt_have_ip(decoder))
 				intel_pt_set_ip(decoder);
 			if (decoder->ip)
 				return 0;
@@ -1776,9 +1810,7 @@
 
 		case INTEL_PT_FUP:
 			if (decoder->overflow) {
-				if (decoder->last_ip ||
-				    decoder->packet.count == 6 ||
-				    decoder->packet.count == 0)
+				if (intel_pt_have_ip(decoder))
 					intel_pt_set_ip(decoder);
 				if (decoder->ip)
 					return 0;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 02c38fe..8939998 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -83,6 +83,7 @@
 	int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
 			 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
 			 uint64_t max_insn_cnt, void *data);
+	bool (*pgd_ip)(uint64_t ip, void *data);
 	void *data;
 	bool return_compression;
 	uint64_t period;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index b1257c8..4f7b320 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -292,36 +292,46 @@
 			   const unsigned char *buf, size_t len,
 			   struct intel_pt_pkt *packet)
 {
-	switch (byte >> 5) {
+	int ip_len;
+
+	packet->count = byte >> 5;
+
+	switch (packet->count) {
 	case 0:
-		packet->count = 0;
+		ip_len = 0;
 		break;
 	case 1:
 		if (len < 3)
 			return INTEL_PT_NEED_MORE_BYTES;
-		packet->count = 2;
+		ip_len = 2;
 		packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
 		break;
 	case 2:
 		if (len < 5)
 			return INTEL_PT_NEED_MORE_BYTES;
-		packet->count = 4;
+		ip_len = 4;
 		packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
 		break;
 	case 3:
-	case 6:
+	case 4:
 		if (len < 7)
 			return INTEL_PT_NEED_MORE_BYTES;
-		packet->count = 6;
+		ip_len = 6;
 		memcpy_le64(&packet->payload, buf + 1, 6);
 		break;
+	case 6:
+		if (len < 9)
+			return INTEL_PT_NEED_MORE_BYTES;
+		ip_len = 8;
+		packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+		break;
 	default:
 		return INTEL_PT_BAD_PACKET;
 	}
 
 	packet->type = type;
 
-	return packet->count + 1;
+	return ip_len + 1;
 }
 
 static int intel_pt_get_mode(const unsigned char *buf, size_t len,
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 551ff6f..dc041d4 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -103,6 +103,9 @@
 	unsigned max_non_turbo_ratio;
 
 	unsigned long num_events;
+
+	char *filter;
+	struct addr_filters filts;
 };
 
 enum switch_state {
@@ -241,7 +244,7 @@
 	}
 
 	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
-
+next:
 	buffer = auxtrace_buffer__next(queue, buffer);
 	if (!buffer) {
 		if (old_buffer)
@@ -264,9 +267,6 @@
 	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
 		return -ENOMEM;
 
-	if (old_buffer)
-		auxtrace_buffer__drop_data(old_buffer);
-
 	if (buffer->use_data) {
 		b->len = buffer->use_size;
 		b->buf = buffer->use_data;
@@ -276,6 +276,16 @@
 	}
 	b->ref_timestamp = buffer->reference;
 
+	/*
+	 * If in snapshot mode and the buffer has no usable data, get next
+	 * buffer and again check overlap against old_buffer.
+	 */
+	if (ptq->pt->snapshot_mode && !b->len)
+		goto next;
+
+	if (old_buffer)
+		auxtrace_buffer__drop_data(old_buffer);
+
 	if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
 						      !buffer->consecutive)) {
 		b->consecutive = false;
@@ -477,7 +487,7 @@
 		start_ip = *ip;
 
 		/* Load maps to ensure dso->is_64_bit has been updated */
-		map__load(al.map, machine->symbol_filter);
+		map__load(al.map);
 
 		x86_64 = al.map->dso->is_64_bit;
 
@@ -541,6 +551,76 @@
 	return 0;
 }
 
+static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
+				  uint64_t offset, const char *filename)
+{
+	struct addr_filter *filt;
+	bool have_filter   = false;
+	bool hit_tracestop = false;
+	bool hit_filter    = false;
+
+	list_for_each_entry(filt, &pt->filts.head, list) {
+		if (filt->start)
+			have_filter = true;
+
+		if ((filename && !filt->filename) ||
+		    (!filename && filt->filename) ||
+		    (filename && strcmp(filename, filt->filename)))
+			continue;
+
+		if (!(offset >= filt->addr && offset < filt->addr + filt->size))
+			continue;
+
+		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
+			     ip, offset, filename ? filename : "[kernel]",
+			     filt->start ? "filter" : "stop",
+			     filt->addr, filt->size);
+
+		if (filt->start)
+			hit_filter = true;
+		else
+			hit_tracestop = true;
+	}
+
+	if (!hit_tracestop && !hit_filter)
+		intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
+			     ip, offset, filename ? filename : "[kernel]");
+
+	return hit_tracestop || (have_filter && !hit_filter);
+}
+
+static int __intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+	struct intel_pt_queue *ptq = data;
+	struct thread *thread;
+	struct addr_location al;
+	u8 cpumode;
+	u64 offset;
+
+	if (ip >= ptq->pt->kernel_start)
+		return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+
+	cpumode = PERF_RECORD_MISC_USER;
+
+	thread = ptq->thread;
+	if (!thread)
+		return -EINVAL;
+
+	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
+	if (!al.map || !al.map->dso)
+		return -EINVAL;
+
+	offset = al.map->map_ip(al.map, ip);
+
+	return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
+				     al.map->dso->long_name);
+}
+
+static bool intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+	return __intel_pt_pgd_ip(ip, data) > 0;
+}
+
 static bool intel_pt_get_config(struct intel_pt *pt,
 				struct perf_event_attr *attr, u64 *config)
 {
@@ -717,6 +797,9 @@
 	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
 	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
 
+	if (pt->filts.cnt > 0)
+		params.pgd_ip = intel_pt_pgd_ip;
+
 	if (pt->synth_opts.instructions) {
 		if (pt->synth_opts.period) {
 			switch (pt->synth_opts.period_type) {
@@ -1294,7 +1377,7 @@
 	if (!map)
 		return 0;
 
-	if (map__load(map, machine->symbol_filter))
+	if (map__load(map))
 		return 0;
 
 	start = dso__first_symbol(map->dso, MAP__FUNCTION);
@@ -1767,6 +1850,8 @@
 	intel_pt_free_events(session);
 	session->auxtrace = NULL;
 	thread__put(pt->unknown_thread);
+	addr_filters__exit(&pt->filts);
+	zfree(&pt->filter);
 	free(pt);
 }
 
@@ -2016,6 +2101,8 @@
 	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
 	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
 	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
+	[INTEL_PT_MAX_NONTURBO_RATIO]	= "  Max non-turbo ratio %"PRIu64"\n",
+	[INTEL_PT_FILTER_STR_LEN]	= "  Filter string len.  %"PRIu64"\n",
 };
 
 static void intel_pt_print_info(u64 *arr, int start, int finish)
@@ -2029,12 +2116,28 @@
 		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
 }
 
+static void intel_pt_print_info_str(const char *name, const char *str)
+{
+	if (!dump_trace)
+		return;
+
+	fprintf(stdout, "  %-20s%s\n", name, str ? str : "");
+}
+
+static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
+{
+	return auxtrace_info->header.size >=
+		sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
+}
+
 int intel_pt_process_auxtrace_info(union perf_event *event,
 				   struct perf_session *session)
 {
 	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
 	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
 	struct intel_pt *pt;
+	void *info_end;
+	u64 *info;
 	int err;
 
 	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
@@ -2045,6 +2148,8 @@
 	if (!pt)
 		return -ENOMEM;
 
+	addr_filters__init(&pt->filts);
+
 	perf_config(intel_pt_perf_config, pt);
 
 	err = auxtrace_queues__init(&pt->queues);
@@ -2069,8 +2174,7 @@
 	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
 			    INTEL_PT_PER_CPU_MMAPS);
 
-	if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
-					(sizeof(u64) * INTEL_PT_CYC_BIT)) {
+	if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
 		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
 		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
 		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
@@ -2080,6 +2184,54 @@
 				    INTEL_PT_CYC_BIT);
 	}
 
+	if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
+		pt->max_non_turbo_ratio =
+			auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
+		intel_pt_print_info(&auxtrace_info->priv[0],
+				    INTEL_PT_MAX_NONTURBO_RATIO,
+				    INTEL_PT_MAX_NONTURBO_RATIO);
+	}
+
+	info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+	info_end = (void *)info + auxtrace_info->header.size;
+
+	if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
+		size_t len;
+
+		len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
+		intel_pt_print_info(&auxtrace_info->priv[0],
+				    INTEL_PT_FILTER_STR_LEN,
+				    INTEL_PT_FILTER_STR_LEN);
+		if (len) {
+			const char *filter = (const char *)info;
+
+			len = roundup(len + 1, 8);
+			info += len >> 3;
+			if ((void *)info > info_end) {
+				pr_err("%s: bad filter string length\n", __func__);
+				err = -EINVAL;
+				goto err_free_queues;
+			}
+			pt->filter = memdup(filter, len);
+			if (!pt->filter) {
+				err = -ENOMEM;
+				goto err_free_queues;
+			}
+			if (session->header.needs_swap)
+				mem_bswap_64(pt->filter, len);
+			if (pt->filter[len - 1]) {
+				pr_err("%s: filter string not null terminated\n", __func__);
+				err = -EINVAL;
+				goto err_free_queues;
+			}
+			err = addr_filters__parse_bare_filter(&pt->filts,
+							      filter);
+			if (err)
+				goto err_free_queues;
+		}
+		intel_pt_print_info_str("Filter string", pt->filter);
+	}
+
 	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
 	pt->have_tsc = intel_pt_have_tsc(pt);
 	pt->sampling_mode = false;
@@ -2121,11 +2273,13 @@
 		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
 		if (!pt->switch_evsel) {
 			pr_err("%s: missing sched_switch event\n", __func__);
+			err = -EINVAL;
 			goto err_delete_thread;
 		}
 	} else if (pt->have_sched_switch == 2 &&
 		   !intel_pt_find_switch(session->evlist)) {
 		pr_err("%s: missing context_switch attribute flag\n", __func__);
+		err = -EINVAL;
 		goto err_delete_thread;
 	}
 
@@ -2149,7 +2303,9 @@
 	if (pt->tc.time_mult) {
 		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
 
-		pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
+		if (!pt->max_non_turbo_ratio)
+			pt->max_non_turbo_ratio =
+					(tsc_freq + 50000000) / 100000000;
 		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
 		intel_pt_log("Maximum non-turbo ratio %u\n",
 			     pt->max_non_turbo_ratio);
@@ -2193,6 +2349,8 @@
 	auxtrace_queues__free(&pt->queues);
 	session->auxtrace = NULL;
 err_free:
+	addr_filters__exit(&pt->filts);
+	zfree(&pt->filter);
 	free(pt);
 	return err;
 }
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
index 0065949..e13b14e 100644
--- a/tools/perf/util/intel-pt.h
+++ b/tools/perf/util/intel-pt.h
@@ -34,11 +34,11 @@
 	INTEL_PT_TSC_CTC_N,
 	INTEL_PT_TSC_CTC_D,
 	INTEL_PT_CYC_BIT,
+	INTEL_PT_MAX_NONTURBO_RATIO,
+	INTEL_PT_FILTER_STR_LEN,
 	INTEL_PT_AUXTRACE_PRIV_MAX,
 };
 
-#define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64))
-
 struct auxtrace_record;
 struct perf_tool;
 union perf_event;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 9f3305f..95f0884 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -1,3 +1,4 @@
+#include <sys/sysmacros.h>
 #include <sys/types.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
index 95a1acb..9ddea5c 100644
--- a/tools/perf/util/lzma.c
+++ b/tools/perf/util/lzma.c
@@ -29,6 +29,7 @@
 	lzma_action action = LZMA_RUN;
 	lzma_stream strm   = LZMA_STREAM_INIT;
 	lzma_ret ret;
+	int err = -1;
 
 	u8 buf_in[BUFSIZE];
 	u8 buf_out[BUFSIZE];
@@ -45,7 +46,7 @@
 	if (ret != LZMA_OK) {
 		pr_err("lzma: lzma_stream_decoder failed %s (%d)\n",
 			lzma_strerror(ret), ret);
-		return -1;
+		goto err_fclose;
 	}
 
 	strm.next_in   = NULL;
@@ -60,7 +61,7 @@
 
 			if (ferror(infile)) {
 				pr_err("lzma: read error: %s\n", strerror(errno));
-				return -1;
+				goto err_fclose;
 			}
 
 			if (feof(infile))
@@ -74,7 +75,7 @@
 
 			if (writen(output_fd, buf_out, write_size) != write_size) {
 				pr_err("lzma: write error: %s\n", strerror(errno));
-				return -1;
+				goto err_fclose;
 			}
 
 			strm.next_out  = buf_out;
@@ -83,13 +84,15 @@
 
 		if (ret != LZMA_OK) {
 			if (ret == LZMA_STREAM_END)
-				return 0;
+				break;
 
 			pr_err("lzma: failed %s\n", lzma_strerror(ret));
-			return -1;
+			goto err_fclose;
 		}
 	}
 
+	err = 0;
+err_fclose:
 	fclose(infile);
-	return 0;
+	return err;
 }
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index cb6388d..18e4519 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -41,7 +41,6 @@
 
 	machine->pid = pid;
 
-	machine->symbol_filter = NULL;
 	machine->id_hdr_size = 0;
 	machine->kptr_restrict_warned = false;
 	machine->comm_exec = false;
@@ -148,7 +147,6 @@
 {
 	machine__init(&machines->host, "", HOST_KERNEL_ID);
 	machines->guests = RB_ROOT;
-	machines->symbol_filter = NULL;
 }
 
 void machines__exit(struct machines *machines)
@@ -172,8 +170,6 @@
 		return NULL;
 	}
 
-	machine->symbol_filter = machines->symbol_filter;
-
 	while (*p != NULL) {
 		parent = *p;
 		pos = rb_entry(parent, struct machine, rb_node);
@@ -189,21 +185,6 @@
 	return machine;
 }
 
-void machines__set_symbol_filter(struct machines *machines,
-				 symbol_filter_t symbol_filter)
-{
-	struct rb_node *nd;
-
-	machines->symbol_filter = symbol_filter;
-	machines->host.symbol_filter = symbol_filter;
-
-	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
-		struct machine *machine = rb_entry(nd, struct machine, rb_node);
-
-		machine->symbol_filter = symbol_filter;
-	}
-}
-
 void machines__set_comm_exec(struct machines *machines, bool comm_exec)
 {
 	struct rb_node *nd;
@@ -916,10 +897,10 @@
 }
 
 int __machine__load_kallsyms(struct machine *machine, const char *filename,
-			     enum map_type type, bool no_kcore, symbol_filter_t filter)
+			     enum map_type type, bool no_kcore)
 {
 	struct map *map = machine__kernel_map(machine);
-	int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore, filter);
+	int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore);
 
 	if (ret > 0) {
 		dso__set_loaded(map->dso, type);
@@ -935,16 +916,15 @@
 }
 
 int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type, symbol_filter_t filter)
+			   enum map_type type)
 {
-	return __machine__load_kallsyms(machine, filename, type, false, filter);
+	return __machine__load_kallsyms(machine, filename, type, false);
 }
 
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
-			       symbol_filter_t filter)
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
 {
 	struct map *map = machine__kernel_map(machine);
-	int ret = dso__load_vmlinux_path(map->dso, map, filter);
+	int ret = dso__load_vmlinux_path(map->dso, map);
 
 	if (ret > 0)
 		dso__set_loaded(map->dso, type);
@@ -1313,7 +1293,7 @@
 			/*
 			 * preload dso of guest kernel and modules
 			 */
-			dso__load(kernel, machine__kernel_map(machine), NULL);
+			dso__load(kernel, machine__kernel_map(machine));
 		}
 	}
 	return 0;
@@ -2115,7 +2095,7 @@
 	 */
 	machine->kernel_start = 1ULL << 63;
 	if (map) {
-		err = map__load(map, machine->symbol_filter);
+		err = map__load(map);
 		if (map->start)
 			machine->kernel_start = map->start;
 	}
@@ -2131,7 +2111,7 @@
 {
 	struct machine *machine = vmachine;
 	struct map *map;
-	struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map,  NULL);
+	struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map);
 
 	if (sym == NULL)
 		return NULL;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 20739f7..354de6e 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -41,7 +41,6 @@
 	struct map_groups kmaps;
 	struct map	  *vmlinux_maps[MAP__NR_TYPES];
 	u64		  kernel_start;
-	symbol_filter_t	  symbol_filter;
 	pid_t		  *current_tid;
 	union { /* Tool specific area */
 		void	  *priv;
@@ -110,7 +109,6 @@
 struct machines {
 	struct machine host;
 	struct rb_root guests;
-	symbol_filter_t symbol_filter;
 };
 
 void machines__init(struct machines *machines);
@@ -128,8 +126,6 @@
 void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
 char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
 
-void machines__set_symbol_filter(struct machines *machines,
-				 symbol_filter_t symbol_filter);
 void machines__set_comm_exec(struct machines *machines, bool comm_exec);
 
 struct machine *machine__new_host(void);
@@ -178,40 +174,33 @@
 static inline
 struct symbol *machine__find_kernel_symbol(struct machine *machine,
 					   enum map_type type, u64 addr,
-					   struct map **mapp,
-					   symbol_filter_t filter)
+					   struct map **mapp)
 {
-	return map_groups__find_symbol(&machine->kmaps, type, addr,
-				       mapp, filter);
+	return map_groups__find_symbol(&machine->kmaps, type, addr, mapp);
 }
 
 static inline
 struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
 						   enum map_type type, const char *name,
-						   struct map **mapp,
-						   symbol_filter_t filter)
+						   struct map **mapp)
 {
-	return map_groups__find_symbol_by_name(&machine->kmaps, type, name,
-					       mapp, filter);
+	return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp);
 }
 
 static inline
 struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr,
-					     struct map **mapp,
-					     symbol_filter_t filter)
+					     struct map **mapp)
 {
 	return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr,
-					   mapp, filter);
+					   mapp);
 }
 
 static inline
 struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
 						     const char *name,
-						     struct map **mapp,
-						     symbol_filter_t filter)
+						     struct map **mapp)
 {
-	return map_groups__find_function_by_name(&machine->kmaps, name, mapp,
-						 filter);
+	return map_groups__find_function_by_name(&machine->kmaps, name, mapp);
 }
 
 struct map *machine__findnew_module_map(struct machine *machine, u64 start,
@@ -219,11 +208,10 @@
 int arch__fix_module_text_start(u64 *start, const char *name);
 
 int __machine__load_kallsyms(struct machine *machine, const char *filename,
-			     enum map_type type, bool no_kcore, symbol_filter_t filter);
+			     enum map_type type, bool no_kcore);
 int machine__load_kallsyms(struct machine *machine, const char *filename,
-			   enum map_type type, symbol_filter_t filter);
-int machine__load_vmlinux_path(struct machine *machine, enum map_type type,
-			       symbol_filter_t filter);
+			   enum map_type type);
+int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
 
 size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 728129a..c662fef 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -6,6 +6,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
 #include "map.h"
 #include "thread.h"
 #include "strlist.h"
@@ -24,9 +25,10 @@
 	[MAP__VARIABLE] = "Variables",
 };
 
-static inline int is_anon_memory(const char *filename)
+static inline int is_anon_memory(const char *filename, u32 flags)
 {
-	return !strcmp(filename, "//anon") ||
+	return flags & MAP_HUGETLB ||
+	       !strcmp(filename, "//anon") ||
 	       !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
 	       !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
 }
@@ -155,7 +157,7 @@
 		int anon, no_dso, vdso, android;
 
 		android = is_android_lib(filename);
-		anon = is_anon_memory(filename);
+		anon = is_anon_memory(filename, flags);
 		vdso = is_vdso_map(filename);
 		no_dso = is_no_dso_memory(filename);
 
@@ -279,7 +281,7 @@
 
 #define DSO__DELETED "(deleted)"
 
-int map__load(struct map *map, symbol_filter_t filter)
+int map__load(struct map *map)
 {
 	const char *name = map->dso->long_name;
 	int nr;
@@ -287,7 +289,7 @@
 	if (dso__loaded(map->dso, map->type))
 		return 0;
 
-	nr = dso__load(map->dso, map, filter);
+	nr = dso__load(map->dso, map);
 	if (nr < 0) {
 		if (map->dso->has_build_id) {
 			char sbuild_id[SBUILD_ID_SIZE];
@@ -312,9 +314,6 @@
 			pr_warning("%.*s was updated (is prelink enabled?). "
 				"Restart the long running apps that use it!\n",
 				   (int)real_len, name);
-		} else if (filter) {
-			pr_warning("no symbols passed the given filter.\n");
-			return -2;	/* Empty but maybe by the filter */
 		} else {
 			pr_warning("no symbols found in %s, maybe install "
 				   "a debug package?\n", name);
@@ -331,19 +330,17 @@
 	return strcmp(namea, nameb);
 }
 
-struct symbol *map__find_symbol(struct map *map, u64 addr,
-				symbol_filter_t filter)
+struct symbol *map__find_symbol(struct map *map, u64 addr)
 {
-	if (map__load(map, filter) < 0)
+	if (map__load(map) < 0)
 		return NULL;
 
 	return dso__find_symbol(map->dso, map->type, addr);
 }
 
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
-					symbol_filter_t filter)
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
 {
-	if (map__load(map, filter) < 0)
+	if (map__load(map) < 0)
 		return NULL;
 
 	if (!dso__sorted_by_name(map->dso, map->type))
@@ -556,23 +553,22 @@
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
 				       enum map_type type, u64 addr,
-				       struct map **mapp,
-				       symbol_filter_t filter)
+				       struct map **mapp)
 {
 	struct map *map = map_groups__find(mg, type, addr);
 
 	/* Ensure map is loaded before using map->map_ip */
-	if (map != NULL && map__load(map, filter) >= 0) {
+	if (map != NULL && map__load(map) >= 0) {
 		if (mapp != NULL)
 			*mapp = map;
-		return map__find_symbol(map, map->map_ip(map, addr), filter);
+		return map__find_symbol(map, map->map_ip(map, addr));
 	}
 
 	return NULL;
 }
 
 struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
-					 struct map **mapp, symbol_filter_t filter)
+					 struct map **mapp)
 {
 	struct symbol *sym;
 	struct rb_node *nd;
@@ -582,7 +578,7 @@
 	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
 
-		sym = map__find_symbol_by_name(pos, name, filter);
+		sym = map__find_symbol_by_name(pos, name);
 
 		if (sym == NULL)
 			continue;
@@ -600,15 +596,14 @@
 struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
 					       enum map_type type,
 					       const char *name,
-					       struct map **mapp,
-					       symbol_filter_t filter)
+					       struct map **mapp)
 {
-	struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp, filter);
+	struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp);
 
 	return sym;
 }
 
-int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
+int map_groups__find_ams(struct addr_map_symbol *ams)
 {
 	if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
 		if (ams->map->groups == NULL)
@@ -620,7 +615,7 @@
 	}
 
 	ams->al_addr = ams->map->map_ip(ams->map, ams->addr);
-	ams->sym = map__find_symbol(ams->map, ams->al_addr, filter);
+	ams->sym = map__find_symbol(ams->map, ams->al_addr);
 
 	return ams->sym ? 0 : -1;
 }
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index d83396c..abdacf8 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -127,17 +127,14 @@
  * @map: the 'struct map *' in which symbols itereated
  * @sym_name: the symbol name
  * @pos: the 'struct symbol *' to use as a loop cursor
- * @filter: to use when loading the DSO
  */
-#define __map__for_each_symbol_by_name(map, sym_name, pos, filter)	\
-	for (pos = map__find_symbol_by_name(map, sym_name, filter);	\
+#define __map__for_each_symbol_by_name(map, sym_name, pos)	\
+	for (pos = map__find_symbol_by_name(map, sym_name);	\
 	     pos && arch__compare_symbol_names(pos->name, sym_name) == 0;	\
 	     pos = symbol__next_by_name(pos))
 
 #define map__for_each_symbol_by_name(map, sym_name, pos)		\
-	__map__for_each_symbol_by_name(map, sym_name, (pos), NULL)
-
-typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
+	__map__for_each_symbol_by_name(map, sym_name, (pos))
 
 int arch__compare_symbol_names(const char *namea, const char *nameb);
 void map__init(struct map *map, enum map_type type,
@@ -173,11 +170,9 @@
 int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
 			 FILE *fp);
 
-int map__load(struct map *map, symbol_filter_t filter);
-struct symbol *map__find_symbol(struct map *map,
-				u64 addr, symbol_filter_t filter);
-struct symbol *map__find_symbol_by_name(struct map *map, const char *name,
-					symbol_filter_t filter);
+int map__load(struct map *map);
+struct symbol *map__find_symbol(struct map *map, u64 addr);
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
 void map__fixup_start(struct map *map);
 void map__fixup_end(struct map *map);
 
@@ -191,7 +186,7 @@
 struct map *maps__first(struct maps *maps);
 struct map *map__next(struct map *map);
 struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
-                                         struct map **mapp, symbol_filter_t filter);
+                                         struct map **mapp);
 void map_groups__init(struct map_groups *mg, struct machine *machine);
 void map_groups__exit(struct map_groups *mg);
 int map_groups__clone(struct thread *thread,
@@ -231,25 +226,22 @@
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
 				       enum map_type type, u64 addr,
-				       struct map **mapp,
-				       symbol_filter_t filter);
+				       struct map **mapp);
 
 struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
 					       enum map_type type,
 					       const char *name,
-					       struct map **mapp,
-					       symbol_filter_t filter);
+					       struct map **mapp);
 
 struct addr_map_symbol;
 
-int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter);
+int map_groups__find_ams(struct addr_map_symbol *ams);
 
 static inline
 struct symbol *map_groups__find_function_by_name(struct map_groups *mg,
-						 const char *name, struct map **mapp,
-						 symbol_filter_t filter)
+						 const char *name, struct map **mapp)
 {
-	return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp, filter);
+	return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp);
 }
 
 int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6c913c3..33546c3 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -904,6 +904,7 @@
 	[PARSE_EVENTS__TERM_TYPE_MAX_STACK]		= "max-stack",
 	[PARSE_EVENTS__TERM_TYPE_OVERWRITE]		= "overwrite",
 	[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE]		= "no-overwrite",
+	[PARSE_EVENTS__TERM_TYPE_DRV_CFG]		= "driver-config",
 };
 
 static bool config_term_shrinked;
@@ -1034,7 +1035,8 @@
 			   struct parse_events_term *term,
 			   struct parse_events_error *err)
 {
-	if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER)
+	if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
+	    term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG)
 		/*
 		 * Always succeed for sysfs terms, as we dont know
 		 * at this point what type they need to have.
@@ -1134,6 +1136,9 @@
 		case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
 			ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1);
 			break;
+		case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+			ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str);
+			break;
 		default:
 			break;
 		}
@@ -1755,20 +1760,49 @@
 static int set_filter(struct perf_evsel *evsel, const void *arg)
 {
 	const char *str = arg;
+	bool found = false;
+	int nr_addr_filters = 0;
+	struct perf_pmu *pmu = NULL;
 
-	if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
-		fprintf(stderr,
-			"--filter option should follow a -e tracepoint option\n");
-		return -1;
+	if (evsel == NULL)
+		goto err;
+
+	if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+		if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+			fprintf(stderr,
+				"not enough memory to hold filter string\n");
+			return -1;
+		}
+
+		return 0;
 	}
 
-	if (perf_evsel__append_filter(evsel, "&&", str) < 0) {
+	while ((pmu = perf_pmu__scan(pmu)) != NULL)
+		if (pmu->type == evsel->attr.type) {
+			found = true;
+			break;
+		}
+
+	if (found)
+		perf_pmu__scan_file(pmu, "nr_addr_filters",
+				    "%d", &nr_addr_filters);
+
+	if (!nr_addr_filters)
+		goto err;
+
+	if (perf_evsel__append_addr_filter(evsel, str) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
 	}
 
 	return 0;
+
+err:
+	fprintf(stderr,
+		"--filter option should follow a -e tracepoint or HW tracer option\n");
+
+	return -1;
 }
 
 int parse_filter(const struct option *opt, const char *str,
@@ -1793,7 +1827,7 @@
 
 	snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
 
-	if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) {
+	if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
 		fprintf(stderr,
 			"not enough memory to hold filter string\n");
 		return -1;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index d1edbf8..8d09a97 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,7 @@
 	PARSE_EVENTS__TERM_TYPE_MAX_STACK,
 	PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
 	PARSE_EVENTS__TERM_TYPE_OVERWRITE,
+	PARSE_EVENTS__TERM_TYPE_DRV_CFG,
 	__PARSE_EVENTS__TERM_TYPE_NR,
 };
 
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 7a25194..9f43fda 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -53,6 +53,26 @@
 	return token;
 }
 
+/*
+ * This function is called when the parser gets two kind of input:
+ *
+ * 	@cfg1 or @cfg2=config
+ *
+ * The leading '@' is stripped off before 'cfg1' and 'cfg2=config' are given to
+ * bison.  In the latter case it is necessary to keep the string intact so that
+ * the PMU kernel driver can determine what configurable is associated to
+ * 'config'.
+ */
+static int drv_str(yyscan_t scanner, int token)
+{
+	YYSTYPE *yylval = parse_events_get_lval(scanner);
+	char *text = parse_events_get_text(scanner);
+
+	/* Strip off the '@' */
+	yylval->str = strdup(text + 1);
+	return token;
+}
+
 #define REWIND(__alloc)				\
 do {								\
 	YYSTYPE *__yylval = parse_events_get_lval(yyscanner);	\
@@ -124,6 +144,7 @@
 num_raw_hex	[a-fA-F0-9]+
 name		[a-zA-Z_*?][a-zA-Z0-9_*?.]*
 name_minus	[a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+drv_cfg_term	[a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
 /* If you add a modifier you need to update check_modifier() */
 modifier_event	[ukhpPGHSDI]+
 modifier_bp	[rwx]{1,3}
@@ -209,6 +230,7 @@
 {name_minus}		{ return str(yyscanner, PE_NAME); }
 \[all\]			{ return PE_ARRAY_ALL; }
 "["			{ BEGIN(array); return '['; }
+@{drv_cfg_term}		{ return drv_str(yyscanner, PE_DRV_CFG_TERM); }
 }
 
 <mem>{
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 5be4a5f..879115f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -49,6 +49,7 @@
 %token PE_ERROR
 %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
 %token PE_ARRAY_ALL PE_ARRAY_RANGE
+%token PE_DRV_CFG_TERM
 %type <num> PE_VALUE
 %type <num> PE_VALUE_SYM_HW
 %type <num> PE_VALUE_SYM_SW
@@ -63,6 +64,7 @@
 %type <str> PE_MODIFIER_BP
 %type <str> PE_EVENT_NAME
 %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_DRV_CFG_TERM
 %type <num> value_sym
 %type <head> event_config
 %type <head> opt_event_config
@@ -599,6 +601,15 @@
 	term->array = $2;
 	$$ = term;
 }
+|
+PE_DRV_CFG_TERM
+{
+	struct parse_events_term *term;
+
+	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+					$1, $1, &@1, NULL));
+	$$ = term;
+}
 
 array:
 '[' array_terms ']'
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index ddb0261..2babcdf 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -445,14 +445,23 @@
 	FILE *file;
 	struct cpu_map *cpus;
 	const char *sysfs = sysfs__mountpoint();
+	const char *templates[] = {
+		 "%s/bus/event_source/devices/%s/cpumask",
+		 "%s/bus/event_source/devices/%s/cpus",
+		 NULL
+	};
+	const char **template;
 
 	if (!sysfs)
 		return NULL;
 
-	snprintf(path, PATH_MAX,
-		 "%s/bus/event_source/devices/%s/cpumask", sysfs, name);
+	for (template = templates; *template; template++) {
+		snprintf(path, PATH_MAX, *template, sysfs, name);
+		if (stat(path, &st) == 0)
+			break;
+	}
 
-	if (stat(path, &st) < 0)
+	if (!*template)
 		return NULL;
 
 	file = fopen(path, "r");
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 5d7e844..743422a 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -4,6 +4,7 @@
 #include <linux/bitmap.h>
 #include <linux/perf_event.h>
 #include <stdbool.h>
+#include "evsel.h"
 #include "parse-events.h"
 
 enum {
@@ -25,6 +26,7 @@
 	struct list_head format;  /* HEAD struct perf_pmu_format -> list */
 	struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
 	struct list_head list;    /* ELEM */
+	int (*set_drv_config)	(struct perf_evsel_config_term *term);
 };
 
 struct perf_pmu_info {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 953dc1a..fcfbef0 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -110,13 +110,12 @@
 static struct symbol *__find_kernel_function_by_name(const char *name,
 						     struct map **mapp)
 {
-	return machine__find_kernel_function_by_name(host_machine, name, mapp,
-						     NULL);
+	return machine__find_kernel_function_by_name(host_machine, name, mapp);
 }
 
 static struct symbol *__find_kernel_function(u64 addr, struct map **mapp)
 {
-	return machine__find_kernel_function(host_machine, addr, mapp, NULL);
+	return machine__find_kernel_function(host_machine, addr, mapp);
 }
 
 static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
@@ -125,7 +124,7 @@
 	struct kmap *kmap;
 	struct map *map = machine__kernel_map(host_machine);
 
-	if (map__load(map, NULL) < 0)
+	if (map__load(map) < 0)
 		return NULL;
 
 	kmap = map__kmap(map);
@@ -170,15 +169,17 @@
 		module = "kernel";
 
 	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+		/* short_name is "[module]" */
 		if (strncmp(pos->dso->short_name + 1, module,
-			    pos->dso->short_name_len - 2) == 0) {
+			    pos->dso->short_name_len - 2) == 0 &&
+		    module[pos->dso->short_name_len - 2] == '\0') {
 			return pos;
 		}
 	}
 	return NULL;
 }
 
-static struct map *get_target_map(const char *target, bool user)
+struct map *get_target_map(const char *target, bool user)
 {
 	/* Init maps of given executable or kernel */
 	if (user)
@@ -212,9 +213,13 @@
 		goto out;
 	}
 
-	ptr2 = strpbrk(ptr1, "-._");
-	if (ptr2)
-		*ptr2 = '\0';
+	for (ptr2 = ptr1; ptr2 != '\0'; ptr2++) {
+		if (!isalnum(*ptr2) && *ptr2 != '_') {
+			*ptr2 = '\0';
+			break;
+		}
+	}
+
 	ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
 	if (ret < 0)
 		goto out;
@@ -349,9 +354,9 @@
 	vmlinux_name = symbol_conf.vmlinux_name;
 	dso->load_errno = 0;
 	if (vmlinux_name)
-		ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
+		ret = dso__load_vmlinux(dso, map, vmlinux_name, false);
 	else
-		ret = dso__load_vmlinux_path(dso, map, NULL);
+		ret = dso__load_vmlinux_path(dso, map);
 found:
 	*pdso = dso;
 	return ret;
@@ -385,7 +390,7 @@
 		if (uprobes)
 			address = sym->start;
 		else
-			address = map->unmap_ip(map, sym->start);
+			address = map->unmap_ip(map, sym->start) - map->reloc;
 		break;
 	}
 	if (!address) {
@@ -664,21 +669,17 @@
 	return ret;
 }
 
-/* Post processing the probe events */
-static int post_process_probe_trace_events(struct probe_trace_event *tevs,
-					   int ntevs, const char *module,
-					   bool uprobe)
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+				       int ntevs)
 {
 	struct ref_reloc_sym *reloc_sym;
 	char *tmp;
 	int i, skipped = 0;
 
-	if (uprobe)
-		return add_exec_to_probe_trace_events(tevs, ntevs, module);
-
-	/* Note that currently ref_reloc_sym based probe is not for drivers */
-	if (module)
-		return add_module_to_probe_trace_events(tevs, ntevs, module);
+	/* Skip post process if the target is an offline kernel */
+	if (symbol_conf.ignore_vmlinux_buildid)
+		return 0;
 
 	reloc_sym = kernel_get_ref_reloc_sym();
 	if (!reloc_sym) {
@@ -711,6 +712,34 @@
 	return skipped;
 }
 
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+				      int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+					   struct probe_trace_event *tevs,
+					   int ntevs, const char *module,
+					   bool uprobe)
+{
+	int ret;
+
+	if (uprobe)
+		ret = add_exec_to_probe_trace_events(tevs, ntevs, module);
+	else if (module)
+		/* Currently ref_reloc_sym based probe is not for drivers */
+		ret = add_module_to_probe_trace_events(tevs, ntevs, module);
+	else
+		ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+	if (ret >= 0)
+		arch__post_process_probe_trace_events(pev, ntevs);
+
+	return ret;
+}
+
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 					  struct probe_trace_event **tevs)
@@ -749,7 +778,7 @@
 
 	if (ntevs > 0) {	/* Succeeded to find trace events */
 		pr_debug("Found %d probe_trace_events.\n", ntevs);
-		ret = post_process_probe_trace_events(*tevs, ntevs,
+		ret = post_process_probe_trace_events(pev, *tevs, ntevs,
 						pev->target, pev->uprobes);
 		if (ret < 0 || ret == ntevs) {
 			clear_probe_trace_events(*tevs, ntevs);
@@ -1592,19 +1621,27 @@
 	return ret;
 }
 
+/* Returns true if *any* ARG is either C variable, $params or $vars. */
+bool perf_probe_with_var(struct perf_probe_event *pev)
+{
+	int i = 0;
+
+	for (i = 0; i < pev->nargs; i++)
+		if (is_c_varname(pev->args[i].var)              ||
+		    !strcmp(pev->args[i].var, PROBE_ARG_PARAMS) ||
+		    !strcmp(pev->args[i].var, PROBE_ARG_VARS))
+			return true;
+	return false;
+}
+
 /* Return true if this perf_probe_event requires debuginfo */
 bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
 {
-	int i;
-
 	if (pev->point.file || pev->point.line || pev->point.lazy_line)
 		return true;
 
-	for (i = 0; i < pev->nargs; i++)
-		if (is_c_varname(pev->args[i].var) ||
-		    !strcmp(pev->args[i].var, "$params") ||
-		    !strcmp(pev->args[i].var, "$vars"))
-			return true;
+	if (perf_probe_with_var(pev))
+		return true;
 
 	return false;
 }
@@ -1965,7 +2002,7 @@
 		map = dso__new_map(tp->module);
 		if (!map)
 			goto out;
-		sym = map__find_symbol(map, addr, NULL);
+		sym = map__find_symbol(map, addr);
 	} else {
 		if (tp->symbol && !addr) {
 			if (kernel_get_symbol_address_by_name(tp->symbol,
@@ -2670,7 +2707,7 @@
 	struct symbol *sym;
 	struct rb_node *tmp;
 
-	if (map__load(map, NULL) < 0)
+	if (map__load(map) < 0)
 		return 0;
 
 	map__for_each_symbol(map, sym, tmp) {
@@ -2936,8 +2973,6 @@
 	return err;
 }
 
-bool __weak arch__prefers_symtab(void) { return false; }
-
 /* Concatinate two arrays */
 static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
 {
@@ -3158,12 +3193,6 @@
 	if (ret > 0 || pev->sdt)	/* SDT can be found only in the cache */
 		return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
 
-	if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
-		ret = find_probe_trace_events_from_map(pev, tevs);
-		if (ret > 0)
-			return ret; /* Found in symbol table */
-	}
-
 	/* Convert perf_probe_event with debuginfo */
 	ret = try_to_find_probe_trace_events(pev, tevs);
 	if (ret != 0)
@@ -3193,6 +3222,52 @@
 	return 0;
 }
 
+static int show_probe_trace_event(struct probe_trace_event *tev)
+{
+	char *buf = synthesize_probe_trace_command(tev);
+
+	if (!buf) {
+		pr_debug("Failed to synthesize probe trace event.\n");
+		return -EINVAL;
+	}
+
+	/* Showing definition always go stdout */
+	printf("%s\n", buf);
+	free(buf);
+
+	return 0;
+}
+
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs)
+{
+	struct strlist *namelist = strlist__new(NULL, NULL);
+	struct probe_trace_event *tev;
+	struct perf_probe_event *pev;
+	int i, j, ret = 0;
+
+	if (!namelist)
+		return -ENOMEM;
+
+	for (j = 0; j < npevs && !ret; j++) {
+		pev = &pevs[j];
+		for (i = 0; i < pev->ntevs && !ret; i++) {
+			tev = &pev->tevs[i];
+			/* Skip if the symbol is out of .text or blacklisted */
+			if (!tev->point.symbol && !pev->uprobes)
+				continue;
+
+			/* Set new name for tev (and update namelist) */
+			ret = probe_trace_event__set_name(tev, pev,
+							  namelist, true);
+			if (!ret)
+				ret = show_probe_trace_event(tev);
+		}
+	}
+	strlist__delete(namelist);
+
+	return ret;
+}
+
 int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs)
 {
 	int i, ret = 0;
@@ -3275,24 +3350,10 @@
 	return ret;
 }
 
-/* TODO: don't use a global variable for filter ... */
-static struct strfilter *available_func_filter;
-
-/*
- * If a symbol corresponds to a function with global binding and
- * matches filter return 0. For all others return 1.
- */
-static int filter_available_functions(struct map *map __maybe_unused,
-				      struct symbol *sym)
-{
-	if (strfilter__compare(available_func_filter, sym->name))
-		return 0;
-	return 1;
-}
-
 int show_available_funcs(const char *target, struct strfilter *_filter,
 					bool user)
 {
+        struct rb_node *nd;
 	struct map *map;
 	int ret;
 
@@ -3310,9 +3371,7 @@
 		return -EINVAL;
 	}
 
-	/* Load symbols with given filter */
-	available_func_filter = _filter;
-	ret = map__load(map, filter_available_functions);
+	ret = map__load(map);
 	if (ret) {
 		if (ret == -2) {
 			char *str = strfilter__string(_filter);
@@ -3329,7 +3388,14 @@
 
 	/* Show all (filtered) symbols */
 	setup_pager();
-	dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
+
+        for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) {
+		struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+
+		if (strfilter__compare(_filter, pos->sym.name))
+			printf("%s\n", pos->sym.name);
+        }
+
 end:
 	if (user) {
 		map__put(map);
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e18ea9f..8091d15 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -128,6 +128,8 @@
 int perf_probe_event__copy(struct perf_probe_event *dst,
 			   struct perf_probe_event *src);
 
+bool perf_probe_with_var(struct perf_probe_event *pev);
+
 /* Check the perf_probe_event needs debuginfo */
 bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
 
@@ -147,6 +149,7 @@
 int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
 int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
 int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
 void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
 int del_perf_probe_events(struct strfilter *filter);
 
@@ -158,7 +161,6 @@
 int show_available_vars(struct perf_probe_event *pevs, int npevs,
 			struct strfilter *filter);
 int show_available_funcs(const char *module, struct strfilter *filter, bool user);
-bool arch__prefers_symtab(void);
 void arch__fix_tev_from_maps(struct perf_probe_event *pev,
 			     struct probe_trace_event *tev, struct map *map,
 			     struct symbol *sym);
@@ -173,4 +175,9 @@
 int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
 			    struct perf_probe_arg *pvar);
 
+struct map *get_target_map(const char *target, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+					   int ntevs);
+
 #endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 9aed9c3..436b647 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -73,11 +73,10 @@
 static int open_probe_events(const char *trace_file, bool readwrite)
 {
 	char buf[PATH_MAX];
-	const char *tracing_dir = "";
 	int ret;
 
-	ret = e_snprintf(buf, PATH_MAX, "%s/%s%s",
-			 tracing_path, tracing_dir, trace_file);
+	ret = e_snprintf(buf, PATH_MAX, "%s/%s",
+			 tracing_path, trace_file);
 	if (ret >= 0) {
 		pr_debug("Opening %s write=%d\n", buf, readwrite);
 		if (readwrite && !probe_event_dry_run)
@@ -133,7 +132,7 @@
 /* Get raw string list of current kprobe_events  or uprobe_events */
 struct strlist *probe_file__get_rawlist(int fd)
 {
-	int ret, idx;
+	int ret, idx, fddup;
 	FILE *fp;
 	char buf[MAX_CMDLEN];
 	char *p;
@@ -143,8 +142,17 @@
 		return NULL;
 
 	sl = strlist__new(NULL, NULL);
+	if (sl == NULL)
+		return NULL;
 
-	fp = fdopen(dup(fd), "r");
+	fddup = dup(fd);
+	if (fddup < 0)
+		goto out_free_sl;
+
+	fp = fdopen(fddup, "r");
+	if (!fp)
+		goto out_close_fddup;
+
 	while (!feof(fp)) {
 		p = fgets(buf, MAX_CMDLEN, fp);
 		if (!p)
@@ -156,13 +164,21 @@
 		ret = strlist__add(sl, buf);
 		if (ret < 0) {
 			pr_debug("strlist__add failed (%d)\n", ret);
-			strlist__delete(sl);
-			return NULL;
+			goto out_close_fp;
 		}
 	}
 	fclose(fp);
 
 	return sl;
+
+out_close_fp:
+	fclose(fp);
+	goto out_free_sl;
+out_close_fddup:
+	close(fddup);
+out_free_sl:
+	strlist__delete(sl);
+	return NULL;
 }
 
 static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
@@ -447,12 +463,17 @@
 {
 	struct probe_cache_entry *entry = NULL;
 	char buf[MAX_CMDLEN], *p;
-	int ret = 0;
+	int ret = 0, fddup;
 	FILE *fp;
 
-	fp = fdopen(dup(pcache->fd), "r");
-	if (!fp)
+	fddup = dup(pcache->fd);
+	if (fddup < 0)
+		return -errno;
+	fp = fdopen(fddup, "r");
+	if (!fp) {
+		close(fddup);
 		return -EINVAL;
+	}
 
 	while (!feof(fp)) {
 		if (!fgets(buf, MAX_CMDLEN, fp))
@@ -678,7 +699,7 @@
 	INIT_LIST_HEAD(&sdtlist);
 	ret = get_sdt_note_list(&sdtlist, pathname);
 	if (ret < 0) {
-		pr_debug("Failed to get sdt note: %d\n", ret);
+		pr_debug4("Failed to get sdt note: %d\n", ret);
 		return ret;
 	}
 	list_for_each_entry(note, &sdtlist, note_list) {
@@ -855,3 +876,60 @@
 
 	return 0;
 }
+
+static struct {
+	const char *pattern;
+	bool	avail;
+	bool	checked;
+} probe_type_table[] = {
+#define DEFINE_TYPE(idx, pat, def_avail)	\
+	[idx] = {.pattern = pat, .avail = (def_avail)}
+	DEFINE_TYPE(PROBE_TYPE_U, "* u8/16/32/64,*", true),
+	DEFINE_TYPE(PROBE_TYPE_S, "* s8/16/32/64,*", true),
+	DEFINE_TYPE(PROBE_TYPE_X, "* x8/16/32/64,*", false),
+	DEFINE_TYPE(PROBE_TYPE_STRING, "* string,*", true),
+	DEFINE_TYPE(PROBE_TYPE_BITFIELD,
+		    "* b<bit-width>@<bit-offset>/<container-size>", true),
+};
+
+bool probe_type_is_available(enum probe_type type)
+{
+	FILE *fp;
+	char *buf = NULL;
+	size_t len = 0;
+	bool target_line = false;
+	bool ret = probe_type_table[type].avail;
+
+	if (type >= PROBE_TYPE_END)
+		return false;
+	/* We don't have to check the type which supported by default */
+	if (ret || probe_type_table[type].checked)
+		return ret;
+
+	if (asprintf(&buf, "%s/README", tracing_path) < 0)
+		return ret;
+
+	fp = fopen(buf, "r");
+	if (!fp)
+		goto end;
+
+	zfree(&buf);
+	while (getline(&buf, &len, fp) > 0 && !ret) {
+		if (!target_line) {
+			target_line = !!strstr(buf, " type: ");
+			if (!target_line)
+				continue;
+		} else if (strstr(buf, "\t          ") != buf)
+			break;
+		ret = strglobmatch(buf, probe_type_table[type].pattern);
+	}
+	/* Cache the result */
+	probe_type_table[type].checked = true;
+	probe_type_table[type].avail = ret;
+
+	fclose(fp);
+end:
+	free(buf);
+
+	return ret;
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 9577b5c..eba44c3 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -19,6 +19,15 @@
 	struct list_head entries;
 };
 
+enum probe_type {
+	PROBE_TYPE_U = 0,
+	PROBE_TYPE_S,
+	PROBE_TYPE_X,
+	PROBE_TYPE_STRING,
+	PROBE_TYPE_BITFIELD,
+	PROBE_TYPE_END,
+};
+
 #define PF_FL_UPROBE	1
 #define PF_FL_RW	2
 #define for_each_probe_cache_entry(entry, pcache) \
@@ -54,6 +63,7 @@
 struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
 					const char *group, const char *event);
 int probe_cache__show_all_caches(struct strfilter *filter);
+bool probe_type_is_available(enum probe_type type);
 #else	/* ! HAVE_LIBELF_SUPPORT */
 static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused)
 {
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f2d9ff0..df4debe 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -39,6 +39,7 @@
 #include "util.h"
 #include "symbol.h"
 #include "probe-finder.h"
+#include "probe-file.h"
 
 /* Kprobe tracer basic type is up to u64 */
 #define MAX_BASIC_TYPE_BITS	64
@@ -170,6 +171,7 @@
  */
 static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
 				     Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
+				     unsigned int machine,
 				     struct probe_trace_arg *tvar)
 {
 	Dwarf_Attribute attr;
@@ -265,7 +267,7 @@
 	if (!tvar)
 		return ret2;
 
-	regs = get_arch_regstr(regn);
+	regs = get_dwarf_regstr(regn, machine);
 	if (!regs) {
 		/* This should be a bug in DWARF or this tool */
 		pr_warning("Mapping for the register number %u "
@@ -297,10 +299,13 @@
 	char sbuf[STRERR_BUFSIZE];
 	int bsize, boffs, total;
 	int ret;
+	char prefix;
 
 	/* TODO: check all types */
-	if (cast && strcmp(cast, "string") != 0) {
+	if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "x") != 0 &&
+	    strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
 		/* Non string type is OK */
+		/* and respect signedness/hexadecimal cast */
 		tvar->type = strdup(cast);
 		return (tvar->type == NULL) ? -ENOMEM : 0;
 	}
@@ -361,6 +366,17 @@
 		return (tvar->type == NULL) ? -ENOMEM : 0;
 	}
 
+	if (cast && (strcmp(cast, "u") == 0))
+		prefix = 'u';
+	else if (cast && (strcmp(cast, "s") == 0))
+		prefix = 's';
+	else if (cast && (strcmp(cast, "x") == 0) &&
+		 probe_type_is_available(PROBE_TYPE_X))
+		prefix = 'x';
+	else
+		prefix = die_is_signed_type(&type) ? 's' :
+			 probe_type_is_available(PROBE_TYPE_X) ? 'x' : 'u';
+
 	ret = dwarf_bytesize(&type);
 	if (ret <= 0)
 		/* No size ... try to use default type */
@@ -373,8 +389,7 @@
 			dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
 		ret = MAX_BASIC_TYPE_BITS;
 	}
-	ret = snprintf(buf, 16, "%c%d",
-		       die_is_signed_type(&type) ? 's' : 'u', ret);
+	ret = snprintf(buf, 16, "%c%d", prefix, ret);
 
 formatted:
 	if (ret < 0 || ret >= 16) {
@@ -529,7 +544,7 @@
 		 dwarf_diename(vr_die));
 
 	ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
-					&pf->sp_die, pf->tvar);
+					&pf->sp_die, pf->machine, pf->tvar);
 	if (ret == -ENOENT || ret == -EINVAL) {
 		pr_err("Failed to find the location of the '%s' variable at this address.\n"
 		       " Perhaps it has been optimized out.\n"
@@ -892,6 +907,38 @@
 	return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
 }
 
+static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+	struct perf_probe_point *pp = &pf->pev->point;
+
+	/* Not uprobe? */
+	if (!pf->pev->uprobes)
+		return;
+
+	/* Compiled with optimization? */
+	if (die_is_optimized_target(&pf->cu_die))
+		return;
+
+	/* Don't know entrypc? */
+	if (!pf->addr)
+		return;
+
+	/* Only FUNC and FUNC@SRC are eligible. */
+	if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
+	    pp->offset || pp->abs_address)
+		return;
+
+	/* Not interested in func parameter? */
+	if (!perf_probe_with_var(pf->pev))
+		return;
+
+	pr_info("Target program is compiled without optimization. Skipping prologue.\n"
+		"Probe on address 0x%" PRIx64 " to force probing at the function entry.\n\n",
+		pf->addr);
+
+	die_skip_prologue(sp_die, &pf->cu_die, &pf->addr);
+}
+
 static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
 {
 	struct probe_finder *pf = data;
@@ -908,6 +955,11 @@
 				   dwarf_diename(in_die));
 			return -ENOENT;
 		}
+		if (addr == 0) {
+			pr_debug("%s has no valid entry address. skipped.\n",
+				 dwarf_diename(in_die));
+			return -ENOENT;
+		}
 		pf->addr = addr;
 		pf->addr += pp->offset;
 		pr_debug("found inline addr: 0x%jx\n",
@@ -941,7 +993,8 @@
 	if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
 		return DWARF_CB_OK;
 
-	pr_debug("Matched function: %s\n", dwarf_diename(sp_die));
+	pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
+		 (unsigned long)dwarf_dieoffset(sp_die));
 	pf->fname = dwarf_decl_file(sp_die);
 	if (pp->line) { /* Function relative line */
 		dwarf_decl_line(sp_die, &pf->lno);
@@ -950,10 +1003,16 @@
 	} else if (die_is_func_instance(sp_die)) {
 		/* Instances always have the entry address */
 		dwarf_entrypc(sp_die, &pf->addr);
+		/* But in some case the entry address is 0 */
+		if (pf->addr == 0) {
+			pr_debug("%s has no entry PC. Skipped\n",
+				 dwarf_diename(sp_die));
+			param->retval = 0;
 		/* Real function */
-		if (pp->lazy_line)
+		} else if (pp->lazy_line)
 			param->retval = find_probe_point_lazy(sp_die, pf);
 		else {
+			skip_prologue(sp_die, pf);
 			pf->addr += pp->offset;
 			/* TODO: Check the address in this function */
 			param->retval = call_probe_finder(sp_die, pf);
@@ -963,7 +1022,7 @@
 		param->retval = die_walk_instances(sp_die,
 					probe_point_inline_cb, (void *)pf);
 		/* This could be a non-existed inline definition */
-		if (param->retval == -ENOENT && strisglob(pp->function))
+		if (param->retval == -ENOENT)
 			param->retval = 0;
 	}
 
@@ -1092,11 +1151,8 @@
 				  struct probe_finder *pf)
 {
 	int ret = 0;
-
-#if _ELFUTILS_PREREQ(0, 142)
 	Elf *elf;
 	GElf_Ehdr ehdr;
-	GElf_Shdr shdr;
 
 	if (pf->cfi_eh || pf->cfi_dbg)
 		return debuginfo__find_probe_location(dbg, pf);
@@ -1109,11 +1165,18 @@
 	if (gelf_getehdr(elf, &ehdr) == NULL)
 		return -EINVAL;
 
-	if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
-	    shdr.sh_type == SHT_PROGBITS)
-		pf->cfi_eh = dwarf_getcfi_elf(elf);
+	pf->machine = ehdr.e_machine;
 
-	pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+#if _ELFUTILS_PREREQ(0, 142)
+	do {
+		GElf_Shdr shdr;
+
+		if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+		    shdr.sh_type == SHT_PROGBITS)
+			pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+		pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+	} while (0);
 #endif
 
 	ret = debuginfo__find_probe_location(dbg, pf);
@@ -1141,7 +1204,7 @@
 	    (tag == DW_TAG_variable && vf->vars)) {
 		if (convert_variable_location(die_mem, vf->pf->addr,
 					      vf->pf->fb_ops, &pf->sp_die,
-					      NULL) == 0) {
+					      pf->machine, NULL) == 0) {
 			vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
 			if (vf->args[vf->nargs].var == NULL) {
 				vf->ret = -ENOMEM;
@@ -1304,7 +1367,7 @@
 	    tag == DW_TAG_variable) {
 		ret = convert_variable_location(die_mem, af->pf.addr,
 						af->pf.fb_ops, &af->pf.sp_die,
-						NULL);
+						af->pf.machine, NULL);
 		if (ret == 0 || ret == -ERANGE) {
 			int ret2;
 			bool externs = !af->child;
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 51137fc..f1d8558 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -80,6 +80,7 @@
 	Dwarf_CFI		*cfi_dbg;
 #endif
 	Dwarf_Op		*fb_ops;	/* Frame base attribute */
+	unsigned int		machine;	/* Target machine arch */
 	struct perf_probe_arg	*pvar;		/* Current target variable */
 	struct probe_trace_arg	*tvar;		/* Current result variable */
 };
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 5d1eb1c..e55a132 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -25,6 +25,7 @@
 #include <ctype.h>
 #include <errno.h>
 #include <linux/bitmap.h>
+#include <linux/time64.h>
 
 #include "../util.h"
 #include <EXTERN.h>
@@ -359,8 +360,8 @@
 	if (!test_and_set_bit(event->id, events_defined))
 		define_event_symbols(event, handler, event->print_fmt.args);
 
-	s = nsecs / NSECS_PER_SEC;
-	ns = nsecs - s * NSECS_PER_SEC;
+	s = nsecs / NSEC_PER_SEC;
+	ns = nsecs - s * NSEC_PER_SEC;
 
 	scripting_context->event_data = data;
 	scripting_context->pevent = evsel->tp_format->pevent;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index e0203b9..089438d 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -27,6 +27,7 @@
 #include <stdbool.h>
 #include <errno.h>
 #include <linux/bitmap.h>
+#include <linux/time64.h>
 
 #include "../../perf.h"
 #include "../debug.h"
@@ -426,8 +427,8 @@
 		if (!dict)
 			Py_FatalError("couldn't create Python dict");
 	}
-	s = nsecs / NSECS_PER_SEC;
-	ns = nsecs - s * NSECS_PER_SEC;
+	s = nsecs / NSEC_PER_SEC;
+	ns = nsecs - s * NSEC_PER_SEC;
 
 	scripting_context->event_data = data;
 	scripting_context->pevent = evsel->tp_format->pevent;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 947d21f..452e15a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -11,7 +11,7 @@
 regex_t		parent_regex;
 const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
-const char	default_sort_order[] = "comm,dso,symbol";
+const char	*default_sort_order = "comm,dso,symbol";
 const char	default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
 const char	default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
 const char	default_top_sort_order[] = "dso,symbol";
@@ -588,7 +588,11 @@
 	} else {
 		pevent_event_info(&seq, evsel->tp_format, &rec);
 	}
-	return seq.buffer;
+	/*
+	 * Trim the buffer, it starts at 4KB and we're not going to
+	 * add anything more to this buffer.
+	 */
+	return realloc(seq.buffer, seq.len + 1);
 }
 
 static int64_t
@@ -863,7 +867,7 @@
 };
 
 /* --sort daddr_sym */
-static int64_t
+int64_t
 sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	uint64_t l = 0, r = 0;
@@ -892,7 +896,7 @@
 					 width);
 }
 
-static int64_t
+int64_t
 sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	uint64_t l = 0, r = 0;
@@ -1058,7 +1062,7 @@
 	return repsep_snprintf(bf, size, "%-*s", width, out);
 }
 
-static int64_t
+int64_t
 sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	u64 l, r;
@@ -1488,7 +1492,8 @@
 }
 
 static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			      struct hists *hists)
+			      struct hists *hists, int line __maybe_unused,
+			      int *span __maybe_unused)
 {
 	struct hpp_sort_entry *hse;
 	size_t len = fmt->user_len;
@@ -1793,7 +1798,9 @@
 }
 
 static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
-			      struct hists *hists __maybe_unused)
+			      struct hists *hists __maybe_unused,
+			      int line __maybe_unused,
+			      int *span __maybe_unused)
 {
 	struct hpp_dynamic_entry *hde;
 	size_t len = fmt->user_len;
@@ -2301,9 +2308,9 @@
 	return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
 }
 
-static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
-			       struct perf_evlist *evlist,
-			       int level)
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			struct perf_evlist *evlist,
+			int level)
 {
 	unsigned int i;
 
@@ -2678,7 +2685,7 @@
 	}
 }
 
-static int output_field_add(struct perf_hpp_list *list, char *tok)
+int output_field_add(struct perf_hpp_list *list, char *tok)
 {
 	unsigned int i;
 
@@ -2741,7 +2748,7 @@
 	return ret;
 }
 
-static void reset_dimensions(void)
+void reset_dimensions(void)
 {
 	unsigned int i;
 
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7ca37ea..099c975 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -28,7 +28,7 @@
 extern const char *field_order;
 extern const char default_parent_pattern[];
 extern const char *parent_pattern;
-extern const char default_sort_order[];
+extern const char *default_sort_order;
 extern regex_t ignore_callees_regex;
 extern int have_ignore_callees;
 extern enum sort_mode sort__mode;
@@ -40,6 +40,7 @@
 extern struct sort_entry sort_dso_to;
 extern struct sort_entry sort_sym_from;
 extern struct sort_entry sort_sym_to;
+extern struct sort_entry sort_srcline;
 extern enum sort_type sort__first_dimension;
 extern const char default_mem_sort_order[];
 
@@ -268,4 +269,15 @@
 bool is_strict_order(const char *order);
 
 int hpp_dimension__add_output(unsigned col);
+void reset_dimensions(void);
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+			struct perf_evlist *evlist,
+			int level);
+int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
 #endif	/* __PERF_SORT_H */
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index eec6c11..1cbada2 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -18,6 +18,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <linux/bitmap.h>
+#include <linux/time64.h>
 
 #include "perf.h"
 #include "svghelper.h"
@@ -274,14 +275,14 @@
 
 	text[0] = 0;
 
-	if (duration < 1000) /* less than 1 usec */
+	if (duration < NSEC_PER_USEC) /* less than 1 usec */
 		return text;
 
-	if (duration < 1000 * 1000) { /* less than 1 msec */
-		sprintf(text, "%.1f us", duration / 1000.0);
+	if (duration < NSEC_PER_MSEC) { /* less than 1 msec */
+		sprintf(text, "%.1f us", duration / (double)NSEC_PER_USEC);
 		return text;
 	}
-	sprintf(text, "%.1f ms", duration / 1000.0 / 1000);
+	sprintf(text, "%.1f ms", duration / (double)NSEC_PER_MSEC);
 
 	return text;
 }
@@ -297,7 +298,7 @@
 
 	style = "waiting";
 
-	if (end-start > 10 * 1000000) /* 10 msec */
+	if (end-start > 10 * NSEC_PER_MSEC) /* 10 msec */
 		style = "WAITING";
 
 	text = time_to_string(end-start);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a34321e..99400b0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -206,6 +206,37 @@
 	return NULL;
 }
 
+static bool want_demangle(bool is_kernel_sym)
+{
+	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+}
+
+static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+	int demangle_flags = verbose ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+	char *demangled = NULL;
+
+	/*
+	 * We need to figure out if the object was created from C++ sources
+	 * DWARF DW_compile_unit has this, but we don't always have access
+	 * to it...
+	 */
+	if (!want_demangle(dso->kernel || kmodule))
+	    return demangled;
+
+	demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+	if (demangled == NULL)
+		demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+	else if (rust_is_mangled(demangled))
+		/*
+		    * Input to Rust demangling is the BFD-demangled
+		    * name which it Rust-demangles in place.
+		    */
+		rust_demangle_sym(demangled);
+
+	return demangled;
+}
+
 #define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
 	for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
 	     idx < nr_entries; \
@@ -223,8 +254,7 @@
  * And always look at the original dso, not at debuginfo packages, that
  * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
  */
-int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map,
-				symbol_filter_t filter)
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map)
 {
 	uint32_t nr_rel_entries, idx;
 	GElf_Sym sym;
@@ -301,45 +331,53 @@
 
 		elf_section__for_each_rela(reldata, pos, pos_mem, idx,
 					   nr_rel_entries) {
+			const char *elf_name = NULL;
+			char *demangled = NULL;
 			symidx = GELF_R_SYM(pos->r_info);
 			plt_offset += shdr_plt.sh_entsize;
 			gelf_getsym(syms, symidx, &sym);
+
+			elf_name = elf_sym__name(&sym, symstrs);
+			demangled = demangle_sym(dso, 0, elf_name);
+			if (demangled != NULL)
+				elf_name = demangled;
 			snprintf(sympltname, sizeof(sympltname),
-				 "%s@plt", elf_sym__name(&sym, symstrs));
+				 "%s@plt", elf_name);
+			free(demangled);
 
 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
 					STB_GLOBAL, sympltname);
 			if (!f)
 				goto out_elf_end;
 
-			if (filter && filter(map, f))
-				symbol__delete(f);
-			else {
-				symbols__insert(&dso->symbols[map->type], f);
-				++nr;
-			}
+			symbols__insert(&dso->symbols[map->type], f);
+			++nr;
 		}
 	} else if (shdr_rel_plt.sh_type == SHT_REL) {
 		GElf_Rel pos_mem, *pos;
 		elf_section__for_each_rel(reldata, pos, pos_mem, idx,
 					  nr_rel_entries) {
+			const char *elf_name = NULL;
+			char *demangled = NULL;
 			symidx = GELF_R_SYM(pos->r_info);
 			plt_offset += shdr_plt.sh_entsize;
 			gelf_getsym(syms, symidx, &sym);
+
+			elf_name = elf_sym__name(&sym, symstrs);
+			demangled = demangle_sym(dso, 0, elf_name);
+			if (demangled != NULL)
+				elf_name = demangled;
 			snprintf(sympltname, sizeof(sympltname),
-				 "%s@plt", elf_sym__name(&sym, symstrs));
+				 "%s@plt", elf_name);
+			free(demangled);
 
 			f = symbol__new(plt_offset, shdr_plt.sh_entsize,
 					STB_GLOBAL, sympltname);
 			if (!f)
 				goto out_elf_end;
 
-			if (filter && filter(map, f))
-				symbol__delete(f);
-			else {
-				symbols__insert(&dso->symbols[map->type], f);
-				++nr;
-			}
+			symbols__insert(&dso->symbols[map->type], f);
+			++nr;
 		}
 	}
 
@@ -685,7 +723,7 @@
 	}
 
 	/* Always reject images with a mismatched build-id: */
-	if (dso->has_build_id) {
+	if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
 		u8 build_id[BUILD_ID_SIZE];
 
 		if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) {
@@ -775,17 +813,11 @@
 	return 0;
 }
 
-static bool want_demangle(bool is_kernel_sym)
-{
-	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
-}
-
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
 		GElf_Sym *sym __maybe_unused) { }
 
-int dso__load_sym(struct dso *dso, struct map *map,
-		  struct symsrc *syms_ss, struct symsrc *runtime_ss,
-		  symbol_filter_t filter, int kmodule)
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+		  struct symsrc *runtime_ss, int kmodule)
 {
 	struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
 	struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL;
@@ -837,7 +869,8 @@
 	sec = syms_ss->symtab;
 	shdr = syms_ss->symshdr;
 
-	if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+	if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+				".text", NULL))
 		dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
 
 	if (runtime_ss->opdsec)
@@ -1069,29 +1102,10 @@
 			sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 		}
 new_symbol:
-		/*
-		 * We need to figure out if the object was created from C++ sources
-		 * DWARF DW_compile_unit has this, but we don't always have access
-		 * to it...
-		 */
-		if (want_demangle(dso->kernel || kmodule)) {
-			int demangle_flags = DMGL_NO_OPTS;
-			if (verbose)
-				demangle_flags = DMGL_PARAMS | DMGL_ANSI;
+		demangled = demangle_sym(dso, kmodule, elf_name);
+		if (demangled != NULL)
+			elf_name = demangled;
 
-			demangled = bfd_demangle(NULL, elf_name, demangle_flags);
-			if (demangled == NULL)
-				demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
-			else if (rust_is_mangled(demangled))
-				/*
-				 * Input to Rust demangling is the BFD-demangled
-				 * name which it Rust-demangles in place.
-				 */
-				rust_demangle_sym(demangled);
-
-			if (demangled != NULL)
-				elf_name = demangled;
-		}
 		f = symbol__new(sym.st_value, sym.st_size,
 				GELF_ST_BIND(sym.st_info), elf_name);
 		free(demangled);
@@ -1100,21 +1114,16 @@
 
 		arch__sym_update(f, &sym);
 
-		if (filter && filter(curr_map, f))
-			symbol__delete(f);
-		else {
-			symbols__insert(&curr_dso->symbols[curr_map->type], f);
-			nr++;
-		}
+		__symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel);
+		nr++;
 	}
 
 	/*
 	 * For misannotated, zeroed, ASM function sizes.
 	 */
 	if (nr > 0) {
-		if (!symbol_conf.allow_aliases)
-			symbols__fixup_duplicate(&dso->symbols[map->type]);
 		symbols__fixup_end(&dso->symbols[map->type]);
+		symbols__fixup_duplicate(&dso->symbols[map->type]);
 		if (kmap) {
 			/*
 			 * We need to fixup this here too because we create new
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 4890633..11cdde9 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -287,8 +287,7 @@
 
 int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
 				struct symsrc *ss __maybe_unused,
-				struct map *map __maybe_unused,
-				symbol_filter_t filter __maybe_unused)
+				struct map *map __maybe_unused)
 {
 	return 0;
 }
@@ -334,7 +333,6 @@
 int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
 		  struct symsrc *ss,
 		  struct symsrc *runtime_ss __maybe_unused,
-		  symbol_filter_t filter __maybe_unused,
 		  int kmodule __maybe_unused)
 {
 	unsigned char build_id[BUILD_ID_SIZE];
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 37e8d20..aecff69 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -9,6 +9,7 @@
 #include <fcntl.h>
 #include <unistd.h>
 #include <inttypes.h>
+#include "annotate.h"
 #include "build-id.h"
 #include "util.h"
 #include "debug.h"
@@ -23,10 +24,10 @@
 #include <symbol/kallsyms.h>
 #include <sys/utsname.h>
 
-static int dso__load_kernel_sym(struct dso *dso, struct map *map,
-				symbol_filter_t filter);
-static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
-			symbol_filter_t filter);
+static int dso__load_kernel_sym(struct dso *dso, struct map *map);
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
+static bool symbol__is_idle(const char *name);
+
 int vmlinux_path__nr_entries;
 char **vmlinux_path;
 
@@ -152,6 +153,9 @@
 	struct rb_node *nd;
 	struct symbol *curr, *next;
 
+	if (symbol_conf.allow_aliases)
+		return;
+
 	nd = rb_first(symbols);
 
 	while (nd) {
@@ -235,8 +239,13 @@
 	if (sym == NULL)
 		return NULL;
 
-	if (symbol_conf.priv_size)
+	if (symbol_conf.priv_size) {
+		if (symbol_conf.init_annotation) {
+			struct annotation *notes = (void *)sym;
+			pthread_mutex_init(&notes->lock, NULL);
+		}
 		sym = ((void *)sym) + symbol_conf.priv_size;
+	}
 
 	sym->start   = start;
 	sym->end     = len ? start + len : start;
@@ -268,13 +277,24 @@
 	}
 }
 
-void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
 {
 	struct rb_node **p = &symbols->rb_node;
 	struct rb_node *parent = NULL;
 	const u64 ip = sym->start;
 	struct symbol *s;
 
+	if (kernel) {
+		const char *name = sym->name;
+		/*
+		 * ppc64 uses function descriptors and appends a '.' to the
+		 * start of every instruction address. Remove it.
+		 */
+		if (name[0] == '.')
+			name++;
+		sym->idle = symbol__is_idle(name);
+	}
+
 	while (*p != NULL) {
 		parent = *p;
 		s = rb_entry(parent, struct symbol, rb_node);
@@ -287,6 +307,11 @@
 	rb_insert_color(&sym->rb_node, symbols);
 }
 
+void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+{
+	__symbols__insert(symbols, sym, false);
+}
+
 static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
 {
 	struct rb_node *n;
@@ -320,6 +345,16 @@
 	return NULL;
 }
 
+static struct symbol *symbols__last(struct rb_root *symbols)
+{
+	struct rb_node *n = rb_last(symbols);
+
+	if (n)
+		return rb_entry(n, struct symbol, rb_node);
+
+	return NULL;
+}
+
 static struct symbol *symbols__next(struct symbol *sym)
 {
 	struct rb_node *n = rb_next(&sym->rb_node);
@@ -415,7 +450,7 @@
 
 void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym)
 {
-	symbols__insert(&dso->symbols[type], sym);
+	__symbols__insert(&dso->symbols[type], sym, dso->kernel);
 
 	/* update the symbol cache if necessary */
 	if (dso->last_find_result[type].addr >= sym->start &&
@@ -441,6 +476,11 @@
 	return symbols__first(&dso->symbols[type]);
 }
 
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type)
+{
+	return symbols__last(&dso->symbols[type]);
+}
+
 struct symbol *dso__next_symbol(struct symbol *sym)
 {
 	return symbols__next(sym);
@@ -537,7 +577,7 @@
  * These are symbols in the kernel image, so make sure that
  * sym is from a kernel DSO.
  */
-bool symbol__is_idle(struct symbol *sym)
+static bool symbol__is_idle(const char *name)
 {
 	const char * const idle_symbols[] = {
 		"cpu_idle",
@@ -554,14 +594,10 @@
 		"pseries_dedicated_idle_sleep",
 		NULL
 	};
-
 	int i;
 
-	if (!sym)
-		return false;
-
 	for (i = 0; idle_symbols[i]; i++) {
-		if (!strcmp(idle_symbols[i], sym->name))
+		if (!strcmp(idle_symbols[i], name))
 			return true;
 	}
 
@@ -590,7 +626,7 @@
 	 * We will pass the symbols to the filter later, in
 	 * map__split_kallsyms, when we have split the maps per module
 	 */
-	symbols__insert(root, sym);
+	__symbols__insert(root, sym, !strchr(name, '['));
 
 	return 0;
 }
@@ -607,8 +643,7 @@
 	return kallsyms__parse(filename, &args, map__process_kallsym_symbol);
 }
 
-static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
-					 symbol_filter_t filter)
+static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map)
 {
 	struct map_groups *kmaps = map__kmaps(map);
 	struct map *curr_map;
@@ -637,7 +672,7 @@
 
 		curr_map = map_groups__find(kmaps, map->type, pos->start);
 
-		if (!curr_map || (filter && filter(curr_map, pos))) {
+		if (!curr_map) {
 			symbol__delete(pos);
 			continue;
 		}
@@ -660,8 +695,7 @@
  * kernel range is broken in several maps, named [kernel].N, as we don't have
  * the original ELF section names vmlinux have.
  */
-static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
-			       symbol_filter_t filter)
+static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta)
 {
 	struct map_groups *kmaps = map__kmaps(map);
 	struct machine *machine;
@@ -738,7 +772,7 @@
 
 			if (count == 0) {
 				curr_map = map;
-				goto filter_symbol;
+				goto add_symbol;
 			}
 
 			if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
@@ -770,18 +804,18 @@
 			pos->start -= delta;
 			pos->end -= delta;
 		}
-filter_symbol:
-		if (filter && filter(curr_map, pos)) {
-discard_symbol:		rb_erase(&pos->rb_node, root);
-			symbol__delete(pos);
-		} else {
-			if (curr_map != map) {
-				rb_erase(&pos->rb_node, root);
-				symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
-				++moved;
-			} else
-				++count;
-		}
+add_symbol:
+		if (curr_map != map) {
+			rb_erase(&pos->rb_node, root);
+			symbols__insert(&curr_map->dso->symbols[curr_map->type], pos);
+			++moved;
+		} else
+			++count;
+
+		continue;
+discard_symbol:
+		rb_erase(&pos->rb_node, root);
+		symbol__delete(pos);
 	}
 
 	if (curr_map != map &&
@@ -1221,7 +1255,7 @@
 }
 
 int __dso__load_kallsyms(struct dso *dso, const char *filename,
-			 struct map *map, bool no_kcore, symbol_filter_t filter)
+			 struct map *map, bool no_kcore)
 {
 	u64 delta = 0;
 
@@ -1234,8 +1268,8 @@
 	if (kallsyms__delta(map, filename, &delta))
 		return -1;
 
-	symbols__fixup_duplicate(&dso->symbols[map->type]);
 	symbols__fixup_end(&dso->symbols[map->type]);
+	symbols__fixup_duplicate(&dso->symbols[map->type]);
 
 	if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
 		dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
@@ -1243,19 +1277,18 @@
 		dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
 
 	if (!no_kcore && !dso__load_kcore(dso, map, filename))
-		return dso__split_kallsyms_for_kcore(dso, map, filter);
+		return dso__split_kallsyms_for_kcore(dso, map);
 	else
-		return dso__split_kallsyms(dso, map, delta, filter);
+		return dso__split_kallsyms(dso, map, delta);
 }
 
 int dso__load_kallsyms(struct dso *dso, const char *filename,
-		       struct map *map, symbol_filter_t filter)
+		       struct map *map)
 {
-	return __dso__load_kallsyms(dso, filename, map, false, filter);
+	return __dso__load_kallsyms(dso, filename, map, false);
 }
 
-static int dso__load_perf_map(struct dso *dso, struct map *map,
-			      symbol_filter_t filter)
+static int dso__load_perf_map(struct dso *dso, struct map *map)
 {
 	char *line = NULL;
 	size_t n;
@@ -1297,12 +1330,8 @@
 		if (sym == NULL)
 			goto out_delete_line;
 
-		if (filter && filter(map, sym))
-			symbol__delete(sym);
-		else {
-			symbols__insert(&dso->symbols[map->type], sym);
-			nr_syms++;
-		}
+		symbols__insert(&dso->symbols[map->type], sym);
+		nr_syms++;
 	}
 
 	free(line);
@@ -1358,7 +1387,7 @@
 	}
 }
 
-int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
+int dso__load(struct dso *dso, struct map *map)
 {
 	char *name;
 	int ret = -1;
@@ -1381,9 +1410,9 @@
 
 	if (dso->kernel) {
 		if (dso->kernel == DSO_TYPE_KERNEL)
-			ret = dso__load_kernel_sym(dso, map, filter);
+			ret = dso__load_kernel_sym(dso, map);
 		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
-			ret = dso__load_guest_kernel_sym(dso, map, filter);
+			ret = dso__load_guest_kernel_sym(dso, map);
 
 		goto out;
 	}
@@ -1407,7 +1436,7 @@
 			goto out;
 		}
 
-		ret = dso__load_perf_map(dso, map, filter);
+		ret = dso__load_perf_map(dso, map);
 		dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
 					     DSO_BINARY_TYPE__NOT_FOUND;
 		goto out;
@@ -1498,14 +1527,14 @@
 			kmod = true;
 
 	if (syms_ss)
-		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod);
+		ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
 	else
 		ret = -1;
 
 	if (ret > 0) {
 		int nr_plt;
 
-		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map, filter);
+		nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map);
 		if (nr_plt > 0)
 			ret += nr_plt;
 	}
@@ -1544,8 +1573,7 @@
 }
 
 int dso__load_vmlinux(struct dso *dso, struct map *map,
-		      const char *vmlinux, bool vmlinux_allocated,
-		      symbol_filter_t filter)
+		      const char *vmlinux, bool vmlinux_allocated)
 {
 	int err = -1;
 	struct symsrc ss;
@@ -1565,7 +1593,7 @@
 	if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type))
 		return -1;
 
-	err = dso__load_sym(dso, map, &ss, &ss, filter, 0);
+	err = dso__load_sym(dso, map, &ss, &ss, 0);
 	symsrc__destroy(&ss);
 
 	if (err > 0) {
@@ -1581,8 +1609,7 @@
 	return err;
 }
 
-int dso__load_vmlinux_path(struct dso *dso, struct map *map,
-			   symbol_filter_t filter)
+int dso__load_vmlinux_path(struct dso *dso, struct map *map)
 {
 	int i, err = 0;
 	char *filename = NULL;
@@ -1591,7 +1618,7 @@
 		 vmlinux_path__nr_entries + 1);
 
 	for (i = 0; i < vmlinux_path__nr_entries; ++i) {
-		err = dso__load_vmlinux(dso, map, vmlinux_path[i], false, filter);
+		err = dso__load_vmlinux(dso, map, vmlinux_path[i], false);
 		if (err > 0)
 			goto out;
 	}
@@ -1599,7 +1626,7 @@
 	if (!symbol_conf.ignore_vmlinux_buildid)
 		filename = dso__build_id_filename(dso, NULL, 0);
 	if (filename != NULL) {
-		err = dso__load_vmlinux(dso, map, filename, true, filter);
+		err = dso__load_vmlinux(dso, map, filename, true);
 		if (err > 0)
 			goto out;
 		free(filename);
@@ -1713,8 +1740,7 @@
 	return strdup(path);
 }
 
-static int dso__load_kernel_sym(struct dso *dso, struct map *map,
-				symbol_filter_t filter)
+static int dso__load_kernel_sym(struct dso *dso, struct map *map)
 {
 	int err;
 	const char *kallsyms_filename = NULL;
@@ -1740,12 +1766,11 @@
 	}
 
 	if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) {
-		return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name,
-					 false, filter);
+		return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
 	}
 
 	if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
-		err = dso__load_vmlinux_path(dso, map, filter);
+		err = dso__load_vmlinux_path(dso, map);
 		if (err > 0)
 			return err;
 	}
@@ -1761,7 +1786,7 @@
 	kallsyms_filename = kallsyms_allocated_filename;
 
 do_kallsyms:
-	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
+	err = dso__load_kallsyms(dso, kallsyms_filename, map);
 	if (err > 0)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	free(kallsyms_allocated_filename);
@@ -1776,8 +1801,7 @@
 	return err;
 }
 
-static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map,
-				      symbol_filter_t filter)
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
 {
 	int err;
 	const char *kallsyms_filename = NULL;
@@ -1799,7 +1823,7 @@
 		if (symbol_conf.default_guest_vmlinux_name != NULL) {
 			err = dso__load_vmlinux(dso, map,
 						symbol_conf.default_guest_vmlinux_name,
-						false, filter);
+						false);
 			return err;
 		}
 
@@ -1811,7 +1835,7 @@
 		kallsyms_filename = path;
 	}
 
-	err = dso__load_kallsyms(dso, kallsyms_filename, map, filter);
+	err = dso__load_kallsyms(dso, kallsyms_filename, map);
 	if (err > 0)
 		pr_debug("Using %s for symbols\n", kallsyms_filename);
 	if (err > 0 && !dso__is_kcore(dso)) {
@@ -1948,6 +1972,23 @@
 	return value;
 }
 
+int symbol__annotation_init(void)
+{
+	if (symbol_conf.initialized) {
+		pr_err("Annotation needs to be init before symbol__init()\n");
+		return -1;
+	}
+
+	if (symbol_conf.init_annotation) {
+		pr_warning("Annotation being initialized multiple times\n");
+		return 0;
+	}
+
+	symbol_conf.priv_size += sizeof(struct annotation);
+	symbol_conf.init_annotation = true;
+	return 0;
+}
+
 int symbol__init(struct perf_env *env)
 {
 	const char *symfs;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 699f7cb..d964844 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -57,7 +57,7 @@
 	u64		end;
 	u16		namelen;
 	u8		binding;
-	bool		ignore;
+	u8		idle:1;
 	u8		arch_sym;
 	char		name[0];
 };
@@ -88,6 +88,7 @@
 	unsigned short	priv_size;
 	unsigned short	nr_events;
 	bool		try_vmlinux_path,
+			init_annotation,
 			force,
 			ignore_vmlinux,
 			ignore_vmlinux_buildid,
@@ -240,16 +241,13 @@
 bool symsrc__has_symtab(struct symsrc *ss);
 bool symsrc__possibly_runtime(struct symsrc *ss);
 
-int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter);
+int dso__load(struct dso *dso, struct map *map);
 int dso__load_vmlinux(struct dso *dso, struct map *map,
-		      const char *vmlinux, bool vmlinux_allocated,
-		      symbol_filter_t filter);
-int dso__load_vmlinux_path(struct dso *dso, struct map *map,
-			   symbol_filter_t filter);
+		      const char *vmlinux, bool vmlinux_allocated);
+int dso__load_vmlinux_path(struct dso *dso, struct map *map);
 int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
-			 bool no_kcore, symbol_filter_t filter);
-int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
-		       symbol_filter_t filter);
+			 bool no_kcore);
+int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
 
 void dso__insert_symbol(struct dso *dso, enum map_type type,
 			struct symbol *sym);
@@ -261,6 +259,7 @@
 struct symbol *symbol__next_by_name(struct symbol *sym);
 
 struct symbol *dso__first_symbol(struct dso *dso, enum map_type type);
+struct symbol *dso__last_symbol(struct dso *dso, enum map_type type);
 struct symbol *dso__next_symbol(struct symbol *sym);
 
 enum dso_type dso__type_fd(int fd);
@@ -277,6 +276,8 @@
 int symbol__init(struct perf_env *env);
 void symbol__exit(void);
 void symbol__elf_init(void);
+int symbol__annotation_init(void);
+
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name);
 size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
 				      const struct addr_location *al,
@@ -291,16 +292,15 @@
 bool symbol_type__is_a(char symbol_type, enum map_type map_type);
 bool symbol__restricted_filename(const char *filename,
 				 const char *restricted_filename);
-bool symbol__is_idle(struct symbol *sym);
 int symbol__config_symfs(const struct option *opt __maybe_unused,
 			 const char *dir, int unset __maybe_unused);
 
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
-		  struct symsrc *runtime_ss, symbol_filter_t filter,
-		  int kmodule);
+		  struct symsrc *runtime_ss, int kmodule);
 int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss,
-				struct map *map, symbol_filter_t filter);
+				struct map *map);
 
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel);
 void symbols__insert(struct rb_root *symbols, struct symbol *sym);
 void symbols__fixup_duplicate(struct rb_root *symbols);
 void symbols__fixup_end(struct rb_root *symbols);
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index cf5e250..783a53f 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -66,7 +66,7 @@
 	if (__report_module(&al, ip, ui))
 		return -1;
 
-	e->ip  = ip;
+	e->ip  = al.addr;
 	e->map = al.map;
 	e->sym = al.sym;
 
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 97c0f8f..20c2e57 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -542,7 +542,7 @@
 	thread__find_addr_location(thread, PERF_RECORD_MISC_USER,
 				   MAP__FUNCTION, ip, &al);
 
-	e.ip = ip;
+	e.ip = al.addr;
 	e.map = al.map;
 	e.sym = al.sym;
 
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index cee559d..85c5680 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -15,6 +15,7 @@
 #include <byteswap.h>
 #include <linux/kernel.h>
 #include <linux/log2.h>
+#include <linux/time64.h>
 #include <unistd.h>
 #include "callchain.h"
 #include "strlist.h"
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index e5f5547..43899e0 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -179,10 +179,6 @@
 #undef tolower
 #undef toupper
 
-#ifndef NSEC_PER_MSEC
-#define NSEC_PER_MSEC	1000000L
-#endif
-
 int parse_nsec_time(const char *str, u64 *ptime);
 
 extern unsigned char sane_ctype[256];
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index e73a79f..bc82596 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -44,7 +44,6 @@
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "acapps.h"
-#include <stdio.h>
 
 #define _COMPONENT          ACPI_TOOLS
 ACPI_MODULE_NAME("cmfsize")
@@ -69,24 +68,24 @@
 
 	/* Save the current file pointer, seek to EOF to obtain file size */
 
-	current_offset = acpi_os_get_file_offset(file);
+	current_offset = ftell(file);
 	if (current_offset < 0) {
 		goto offset_error;
 	}
 
-	status = acpi_os_set_file_offset(file, 0, ACPI_FILE_END);
+	status = fseek(file, 0, SEEK_END);
 	if (ACPI_FAILURE(status)) {
 		goto seek_error;
 	}
 
-	file_size = acpi_os_get_file_offset(file);
+	file_size = ftell(file);
 	if (file_size < 0) {
 		goto offset_error;
 	}
 
 	/* Restore original file pointer */
 
-	status = acpi_os_set_file_offset(file, current_offset, ACPI_FILE_BEGIN);
+	status = fseek(file, current_offset, SEEK_SET);
 	if (ACPI_FAILURE(status)) {
 		goto seek_error;
 	}
@@ -94,10 +93,10 @@
 	return ((u32)file_size);
 
 offset_error:
-	acpi_log_error("Could not get file offset");
+	fprintf(stderr, "Could not get file offset\n");
 	return (ACPI_UINT32_MAX);
 
 seek_error:
-	acpi_log_error("Could not set file offset");
+	fprintf(stderr, "Could not set file offset\n");
 	return (ACPI_UINT32_MAX);
 }
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
index 0bd343f..3919970 100644
--- a/tools/power/acpi/common/getopt.c
+++ b/tools/power/acpi/common/getopt.c
@@ -57,7 +57,7 @@
 #include "acapps.h"
 
 #define ACPI_OPTION_ERROR(msg, badchar) \
-	if (acpi_gbl_opterr) {acpi_log_error ("%s%c\n", msg, badchar);}
+	if (acpi_gbl_opterr) {fprintf (stderr, "%s%c\n", msg, badchar);}
 
 int acpi_gbl_opterr = 1;
 int acpi_gbl_optind = 1;
@@ -94,7 +94,7 @@
 		acpi_gbl_optarg =
 		    &argv[acpi_gbl_optind++][(int)(current_char_ptr + 1)];
 	} else if (++acpi_gbl_optind >= argc) {
-		ACPI_OPTION_ERROR("Option requires an argument: -", 'v');
+		ACPI_OPTION_ERROR("\nOption requires an argument", 0);
 
 		current_char_ptr = 1;
 		return (-1);
diff --git a/tools/power/acpi/os_specific/service_layers/oslibcfs.c b/tools/power/acpi/os_specific/service_layers/oslibcfs.c
deleted file mode 100644
index 11f4aba..0000000
--- a/tools/power/acpi/os_specific/service_layers/oslibcfs.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/******************************************************************************
- *
- * Module Name: oslibcfs - C library OSL for file I/O
- *
- *****************************************************************************/
-
-/*
- * Copyright (C) 2000 - 2016, Intel Corp.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions, and the following disclaimer,
- *    without modification.
- * 2. Redistributions in binary form must reproduce at minimum a disclaimer
- *    substantially similar to the "NO WARRANTY" disclaimer below
- *    ("Disclaimer") and any redistribution must be conditioned upon
- *    including a substantially similar Disclaimer requirement for further
- *    binary redistribution.
- * 3. Neither the names of the above-listed copyright holders nor the names
- *    of any contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * Alternatively, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") version 2 as published by the Free
- * Software Foundation.
- *
- * NO WARRANTY
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGES.
- */
-
-#include <acpi/acpi.h>
-#include <stdio.h>
-#include <stdarg.h>
-
-#define _COMPONENT          ACPI_OS_SERVICES
-ACPI_MODULE_NAME("oslibcfs")
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_os_open_file
- *
- * PARAMETERS:  path                - File path
- *              modes               - File operation type
- *
- * RETURN:      File descriptor.
- *
- * DESCRIPTION: Open a file for reading (ACPI_FILE_READING) or/and writing
- *              (ACPI_FILE_WRITING).
- *
- ******************************************************************************/
-ACPI_FILE acpi_os_open_file(const char *path, u8 modes)
-{
-	ACPI_FILE file;
-	u32 i = 0;
-	char modes_str[4];
-
-	if (modes & ACPI_FILE_READING) {
-		modes_str[i++] = 'r';
-	}
-	if (modes & ACPI_FILE_WRITING) {
-		modes_str[i++] = 'w';
-	}
-
-	if (modes & ACPI_FILE_BINARY) {
-		modes_str[i++] = 'b';
-	}
-
-	modes_str[i++] = '\0';
-
-	file = fopen(path, modes_str);
-	if (!file) {
-		perror("Could not open file");
-	}
-
-	return (file);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_os_close_file
- *
- * PARAMETERS:  file                - An open file descriptor
- *
- * RETURN:      None.
- *
- * DESCRIPTION: Close a file opened via acpi_os_open_file.
- *
- ******************************************************************************/
-
-void acpi_os_close_file(ACPI_FILE file)
-{
-
-	fclose(file);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_os_read_file
- *
- * PARAMETERS:  file                - An open file descriptor
- *              buffer              - Data buffer
- *              size                - Data block size
- *              count               - Number of data blocks
- *
- * RETURN:      Number of bytes actually read.
- *
- * DESCRIPTION: Read from a file.
- *
- ******************************************************************************/
-
-int
-acpi_os_read_file(ACPI_FILE file, void *buffer, acpi_size size, acpi_size count)
-{
-	int length;
-
-	length = fread(buffer, size, count, file);
-	if (length < 0) {
-		perror("Error reading file");
-	}
-
-	return (length);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_os_write_file
- *
- * PARAMETERS:  file                - An open file descriptor
- *              buffer              - Data buffer
- *              size                - Data block size
- *              count               - Number of data blocks
- *
- * RETURN:      Number of bytes actually written.
- *
- * DESCRIPTION: Write to a file.
- *
- ******************************************************************************/
-
-int
-acpi_os_write_file(ACPI_FILE file,
-		   void *buffer, acpi_size size, acpi_size count)
-{
-	int length;
-
-	length = fwrite(buffer, size, count, file);
-	if (length < 0) {
-		perror("Error writing file");
-	}
-
-	return (length);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_os_get_file_offset
- *
- * PARAMETERS:  file                - An open file descriptor
- *
- * RETURN:      Current file pointer position.
- *
- * DESCRIPTION: Get current file offset.
- *
- ******************************************************************************/
-
-long acpi_os_get_file_offset(ACPI_FILE file)
-{
-	long offset;
-
-	offset = ftell(file);
-	return (offset);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_os_set_file_offset
- *
- * PARAMETERS:  file                - An open file descriptor
- *              offset              - New file offset
- *              from                - From begin/end of file
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Set current file offset.
- *
- ******************************************************************************/
-
-acpi_status acpi_os_set_file_offset(ACPI_FILE file, long offset, u8 from)
-{
-	int ret = 0;
-
-	if (from == ACPI_FILE_BEGIN) {
-		ret = fseek(file, offset, SEEK_SET);
-	}
-
-	if (from == ACPI_FILE_END) {
-		ret = fseek(file, offset, SEEK_END);
-	}
-
-	if (ret < 0) {
-		return (AE_ERROR);
-	} else {
-		return (AE_OK);
-	}
-}
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
index 88aa66e..8d8003c 100644
--- a/tools/power/acpi/os_specific/service_layers/osunixxf.c
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -63,10 +63,7 @@
 #define _COMPONENT          ACPI_OS_SERVICES
 ACPI_MODULE_NAME("osunixxf")
 
-u8 acpi_gbl_debug_timeout = FALSE;
-
 /* Upcalls to acpi_exec */
-
 void
 ae_table_override(struct acpi_table_header *existing_table,
 		  struct acpi_table_header **new_table);
diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile
index 2942cdc..04b5db7 100644
--- a/tools/power/acpi/tools/acpidump/Makefile
+++ b/tools/power/acpi/tools/acpidump/Makefile
@@ -36,12 +36,13 @@
 	utdebug.o\
 	utexcep.o\
 	utglobal.o\
+	uthex.o\
 	utmath.o\
 	utnonansi.o\
 	utprint.o\
 	utstring.o\
+	utstrtoul64.o\
 	utxferror.o\
-	oslibcfs.o\
 	oslinuxtbl.o\
 	cmfsize.o\
 	getopt.o
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
index 025c232..00423fc 100644
--- a/tools/power/acpi/tools/acpidump/acpidump.h
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -55,11 +55,7 @@
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "actables.h"
-
-#include <stdio.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/stat.h>
+#include "acapps.h"
 
 /* Globals */
 
@@ -72,12 +68,6 @@
 EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL);
 EXTERN u64 INIT_GLOBAL(gbl_rsdp_base, 0);
 
-/* Globals required for use with ACPICA modules */
-
-#ifdef _DECLARE_GLOBALS
-u8 acpi_gbl_integer_byte_width = 8;
-#endif
-
 /* Action table used to defer requested options */
 
 struct ap_dump_action {
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
index fb8f1d9..9031be1 100644
--- a/tools/power/acpi/tools/acpidump/apdump.c
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -69,16 +69,17 @@
 		/* Make sure signature is all ASCII and a valid ACPI name */
 
 		if (!acpi_ut_valid_nameseg(table->signature)) {
-			acpi_log_error("Table signature (0x%8.8X) is invalid\n",
-				       *(u32 *)table->signature);
+			fprintf(stderr,
+				"Table signature (0x%8.8X) is invalid\n",
+				*(u32 *)table->signature);
 			return (FALSE);
 		}
 
 		/* Check for minimum table length */
 
 		if (table->length < sizeof(struct acpi_table_header)) {
-			acpi_log_error("Table length (0x%8.8X) is invalid\n",
-				       table->length);
+			fprintf(stderr, "Table length (0x%8.8X) is invalid\n",
+				table->length);
 			return (FALSE);
 		}
 	}
@@ -115,8 +116,8 @@
 	}
 
 	if (ACPI_FAILURE(status)) {
-		acpi_log_error("%4.4s: Warning: wrong checksum in table\n",
-			       table->signature);
+		fprintf(stderr, "%4.4s: Warning: wrong checksum in table\n",
+			table->signature);
 	}
 
 	return (AE_OK);
@@ -195,13 +196,13 @@
 	 * Note: simplest to just always emit a 64-bit address. acpi_xtract
 	 * utility can handle this.
 	 */
-	acpi_ut_file_printf(gbl_output_file, "%4.4s @ 0x%8.8X%8.8X\n",
-			    table->signature, ACPI_FORMAT_UINT64(address));
+	fprintf(gbl_output_file, "%4.4s @ 0x%8.8X%8.8X\n",
+		table->signature, ACPI_FORMAT_UINT64(address));
 
 	acpi_ut_dump_buffer_to_file(gbl_output_file,
 				    ACPI_CAST_PTR(u8, table), table_length,
 				    DB_BYTE_DISPLAY, 0);
-	acpi_ut_file_printf(gbl_output_file, "\n");
+	fprintf(gbl_output_file, "\n");
 	return (0);
 }
 
@@ -239,14 +240,14 @@
 			if (status == AE_LIMIT) {
 				return (0);
 			} else if (i == 0) {
-				acpi_log_error
-				    ("Could not get ACPI tables, %s\n",
-				     acpi_format_exception(status));
+				fprintf(stderr,
+					"Could not get ACPI tables, %s\n",
+					acpi_format_exception(status));
 				return (-1);
 			} else {
-				acpi_log_error
-				    ("Could not get ACPI table at index %u, %s\n",
-				     i, acpi_format_exception(status));
+				fprintf(stderr,
+					"Could not get ACPI table at index %u, %s\n",
+					i, acpi_format_exception(status));
 				continue;
 			}
 		}
@@ -286,20 +287,20 @@
 
 	/* Convert argument to an integer physical address */
 
-	status = acpi_ut_strtoul64(ascii_address, ACPI_ANY_BASE,
-				   ACPI_MAX64_BYTE_WIDTH, &long_address);
+	status = acpi_ut_strtoul64(ascii_address, ACPI_STRTOUL_64BIT,
+				   &long_address);
 	if (ACPI_FAILURE(status)) {
-		acpi_log_error("%s: Could not convert to a physical address\n",
-			       ascii_address);
+		fprintf(stderr, "%s: Could not convert to a physical address\n",
+			ascii_address);
 		return (-1);
 	}
 
 	address = (acpi_physical_address)long_address;
 	status = acpi_os_get_table_by_address(address, &table);
 	if (ACPI_FAILURE(status)) {
-		acpi_log_error("Could not get table at 0x%8.8X%8.8X, %s\n",
-			       ACPI_FORMAT_UINT64(address),
-			       acpi_format_exception(status));
+		fprintf(stderr, "Could not get table at 0x%8.8X%8.8X, %s\n",
+			ACPI_FORMAT_UINT64(address),
+			acpi_format_exception(status));
 		return (-1);
 	}
 
@@ -331,9 +332,9 @@
 	int table_status;
 
 	if (strlen(signature) != ACPI_NAME_SIZE) {
-		acpi_log_error
-		    ("Invalid table signature [%s]: must be exactly 4 characters\n",
-		     signature);
+		fprintf(stderr,
+			"Invalid table signature [%s]: must be exactly 4 characters\n",
+			signature);
 		return (-1);
 	}
 
@@ -363,9 +364,9 @@
 				return (0);
 			}
 
-			acpi_log_error
-			    ("Could not get ACPI table with signature [%s], %s\n",
-			     local_signature, acpi_format_exception(status));
+			fprintf(stderr,
+				"Could not get ACPI table with signature [%s], %s\n",
+				local_signature, acpi_format_exception(status));
 			return (-1);
 		}
 
@@ -408,24 +409,24 @@
 	}
 
 	if (!acpi_ut_valid_nameseg(table->signature)) {
-		acpi_log_error
-		    ("No valid ACPI signature was found in input file %s\n",
-		     pathname);
+		fprintf(stderr,
+			"No valid ACPI signature was found in input file %s\n",
+			pathname);
 	}
 
 	/* File must be at least as long as the table length */
 
 	if (table->length > file_size) {
-		acpi_log_error
-		    ("Table length (0x%X) is too large for input file (0x%X) %s\n",
-		     table->length, file_size, pathname);
+		fprintf(stderr,
+			"Table length (0x%X) is too large for input file (0x%X) %s\n",
+			table->length, file_size, pathname);
 		goto exit;
 	}
 
 	if (gbl_verbose_mode) {
-		acpi_log_error
-		    ("Input file:  %s contains table [%4.4s], 0x%X (%u) bytes\n",
-		     pathname, table->signature, file_size, file_size);
+		fprintf(stderr,
+			"Input file:  %s contains table [%4.4s], 0x%X (%u) bytes\n",
+			pathname, table->signature, file_size, file_size);
 	}
 
 	table_status = ap_dump_table_buffer(table, 0, 0);
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 5fcd970..dd5b861 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -42,7 +42,6 @@
  */
 
 #include "acpidump.h"
-#include "acapps.h"
 
 /* Local prototypes */
 
@@ -66,7 +65,8 @@
 	struct stat stat_info;
 
 	if (!stat(pathname, &stat_info)) {
-		acpi_log_error("Target path already exists, overwrite? [y|n] ");
+		fprintf(stderr,
+			"Target path already exists, overwrite? [y|n] ");
 
 		if (getchar() != 'y') {
 			return (-1);
@@ -102,9 +102,9 @@
 
 	/* Point stdout to the file */
 
-	file = acpi_os_open_file(pathname, ACPI_FILE_WRITING);
+	file = fopen(pathname, "w");
 	if (!file) {
-		acpi_log_error("Could not open output file: %s\n", pathname);
+		fprintf(stderr, "Could not open output file: %s\n", pathname);
 		return (-1);
 	}
 
@@ -134,7 +134,7 @@
 	char filename[ACPI_NAME_SIZE + 16];
 	char instance_str[16];
 	ACPI_FILE file;
-	size_t actual;
+	acpi_size actual;
 	u32 table_length;
 
 	/* Obtain table length */
@@ -158,37 +158,36 @@
 	/* Handle multiple SSDts - create different filenames for each */
 
 	if (instance > 0) {
-		acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u",
-				 instance);
+		snprintf(instance_str, sizeof(instance_str), "%u", instance);
 		strcat(filename, instance_str);
 	}
 
 	strcat(filename, FILE_SUFFIX_BINARY_TABLE);
 
 	if (gbl_verbose_mode) {
-		acpi_log_error
-		    ("Writing [%4.4s] to binary file: %s 0x%X (%u) bytes\n",
-		     table->signature, filename, table->length, table->length);
+		fprintf(stderr,
+			"Writing [%4.4s] to binary file: %s 0x%X (%u) bytes\n",
+			table->signature, filename, table->length,
+			table->length);
 	}
 
 	/* Open the file and dump the entire table in binary mode */
 
-	file = acpi_os_open_file(filename,
-				 ACPI_FILE_WRITING | ACPI_FILE_BINARY);
+	file = fopen(filename, "wb");
 	if (!file) {
-		acpi_log_error("Could not open output file: %s\n", filename);
+		fprintf(stderr, "Could not open output file: %s\n", filename);
 		return (-1);
 	}
 
-	actual = acpi_os_write_file(file, table, 1, table_length);
+	actual = fwrite(table, 1, table_length, file);
 	if (actual != table_length) {
-		acpi_log_error("Error writing binary output file: %s\n",
-			       filename);
-		acpi_os_close_file(file);
+		fprintf(stderr, "Error writing binary output file: %s\n",
+			filename);
+		fclose(file);
 		return (-1);
 	}
 
-	acpi_os_close_file(file);
+	fclose(file);
 	return (0);
 }
 
@@ -211,14 +210,13 @@
 	struct acpi_table_header *buffer = NULL;
 	ACPI_FILE file;
 	u32 file_size;
-	size_t actual;
+	acpi_size actual;
 
 	/* Must use binary mode */
 
-	file =
-	    acpi_os_open_file(pathname, ACPI_FILE_READING | ACPI_FILE_BINARY);
+	file = fopen(pathname, "rb");
 	if (!file) {
-		acpi_log_error("Could not open input file: %s\n", pathname);
+		fprintf(stderr, "Could not open input file: %s\n", pathname);
 		return (NULL);
 	}
 
@@ -226,7 +224,8 @@
 
 	file_size = cm_get_file_size(file);
 	if (file_size == ACPI_UINT32_MAX) {
-		acpi_log_error("Could not get input file size: %s\n", pathname);
+		fprintf(stderr,
+			"Could not get input file size: %s\n", pathname);
 		goto cleanup;
 	}
 
@@ -234,16 +233,17 @@
 
 	buffer = ACPI_ALLOCATE_ZEROED(file_size);
 	if (!buffer) {
-		acpi_log_error("Could not allocate file buffer of size: %u\n",
-			       file_size);
+		fprintf(stderr,
+			"Could not allocate file buffer of size: %u\n",
+			file_size);
 		goto cleanup;
 	}
 
 	/* Read the entire file */
 
-	actual = acpi_os_read_file(file, buffer, 1, file_size);
+	actual = fread(buffer, 1, file_size, file);
 	if (actual != file_size) {
-		acpi_log_error("Could not read input file: %s\n", pathname);
+		fprintf(stderr, "Could not read input file: %s\n", pathname);
 		ACPI_FREE(buffer);
 		buffer = NULL;
 		goto cleanup;
@@ -252,6 +252,6 @@
 	*out_file_size = file_size;
 
 cleanup:
-	acpi_os_close_file(file);
+	fclose(file);
 	return (buffer);
 }
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index 7692e6b..7ff46be 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -43,7 +43,6 @@
 
 #define _DECLARE_GLOBALS
 #include "acpidump.h"
-#include "acapps.h"
 
 /*
  * acpidump - A portable utility for obtaining system ACPI tables and dumping
@@ -140,8 +139,8 @@
 
 	current_action++;
 	if (current_action > AP_MAX_ACTIONS) {
-		acpi_log_error("Too many table options (max %u)\n",
-			       AP_MAX_ACTIONS);
+		fprintf(stderr, "Too many table options (max %u)\n",
+			AP_MAX_ACTIONS);
 		return (-1);
 	}
 
@@ -186,9 +185,9 @@
 			} else if (!strcmp(acpi_gbl_optarg, "off")) {
 				gbl_dump_customized_tables = FALSE;
 			} else {
-				acpi_log_error
-				    ("%s: Cannot handle this switch, please use on|off\n",
-				     acpi_gbl_optarg);
+				fprintf(stderr,
+					"%s: Cannot handle this switch, please use on|off\n",
+					acpi_gbl_optarg);
 				return (-1);
 			}
 			continue;
@@ -209,13 +208,13 @@
 		case 'r':	/* Dump tables from specified RSDP */
 
 			status =
-			    acpi_ut_strtoul64(acpi_gbl_optarg, ACPI_ANY_BASE,
-					      ACPI_MAX64_BYTE_WIDTH,
+			    acpi_ut_strtoul64(acpi_gbl_optarg,
+					      ACPI_STRTOUL_64BIT,
 					      &gbl_rsdp_base);
 			if (ACPI_FAILURE(status)) {
-				acpi_log_error
-				    ("%s: Could not convert to a physical address\n",
-				     acpi_gbl_optarg);
+				fprintf(stderr,
+					"%s: Could not convert to a physical address\n",
+					acpi_gbl_optarg);
 				return (-1);
 			}
 			continue;
@@ -242,7 +241,7 @@
 		case 'z':	/* Verbose mode */
 
 			gbl_verbose_mode = TRUE;
-			acpi_log_error(ACPI_COMMON_SIGNON(AP_UTILITY_NAME));
+			fprintf(stderr, ACPI_COMMON_SIGNON(AP_UTILITY_NAME));
 			continue;
 
 			/*
@@ -315,6 +314,7 @@
 	ACPI_DEBUG_INITIALIZE();	/* For debug version only */
 	acpi_os_initialize();
 	gbl_output_file = ACPI_FILE_OUT;
+	acpi_gbl_integer_byte_width = 8;
 
 	/* Process command line options */
 
@@ -353,8 +353,9 @@
 
 		default:
 
-			acpi_log_error("Internal error, invalid action: 0x%X\n",
-				       action->to_be_done);
+			fprintf(stderr,
+				"Internal error, invalid action: 0x%X\n",
+				action->to_be_done);
 			return (-1);
 		}
 
@@ -369,12 +370,12 @@
 			/* Summary for the output file */
 
 			file_size = cm_get_file_size(gbl_output_file);
-			acpi_log_error
-			    ("Output file %s contains 0x%X (%u) bytes\n\n",
-			     gbl_output_filename, file_size, file_size);
+			fprintf(stderr,
+				"Output file %s contains 0x%X (%u) bytes\n\n",
+				gbl_output_filename, file_size, file_size);
 		}
 
-		acpi_os_close_file(gbl_output_file);
+		fclose(gbl_output_file);
 	}
 
 	return (status);
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index cd0db62..3815b18 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -1,3 +1,5 @@
+CC = $(CROSS_COMPILE)gcc
+
 all: spidev_test spidev_fdx
 
 clean:
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 8a73d81..f046b77 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -19,6 +19,7 @@
 #include <getopt.h>
 #include <fcntl.h>
 #include <sys/ioctl.h>
+#include <linux/ioctl.h>
 #include <sys/stat.h>
 #include <linux/types.h>
 #include <linux/spi/spidev.h>
@@ -284,7 +285,7 @@
 
 static void transfer_escaped_string(int fd, char *str)
 {
-	size_t size = strlen(str + 1);
+	size_t size = strlen(str);
 	uint8_t *tx;
 	uint8_t *rx;
 
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 5404efa..f64c57b 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -13,6 +13,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
 #include <linux/libnvdimm.h>
 #include <linux/vmalloc.h>
 #include <linux/device.h>
@@ -602,7 +603,8 @@
 			return -ENOMEM;
 		sprintf(t->label[i], "label%d", i);
 
-		t->flush[i] = test_alloc(t, sizeof(u64) * NUM_HINTS,
+		t->flush[i] = test_alloc(t, max(PAGE_SIZE,
+					sizeof(u64) * NUM_HINTS),
 				&t->flush_dma[i]);
 		if (!t->flush[i])
 			return -ENOMEM;
@@ -1474,6 +1476,7 @@
 	if (nfit_test->setup != nfit_test0_setup)
 		return 0;
 
+	flush_work(&acpi_desc->work);
 	nfit_test->setup_hotplug = 1;
 	nfit_test->setup(nfit_test);
 
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 3b53046..9d0919ed 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,5 +1,5 @@
 
-CFLAGS += -I. -g -Wall -D_LGPL_SOURCE
+CFLAGS += -I. -g -O2 -Wall -D_LGPL_SOURCE
 LDFLAGS += -lpthread -lurcu
 TARGETS = main
 OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \
diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/radix-tree/linux/cpu.h
index 60a4045..7cf4121 100644
--- a/tools/testing/radix-tree/linux/cpu.h
+++ b/tools/testing/radix-tree/linux/cpu.h
@@ -7,19 +7,8 @@
 #define CPU_DOWN_PREPARE	0x0005 /* CPU (unsigned)v going down */
 #define CPU_DOWN_FAILED		0x0006 /* CPU (unsigned)v NOT going down */
 #define CPU_DEAD		0x0007 /* CPU (unsigned)v dead */
-#define CPU_DYING		0x0008 /* CPU (unsigned)v not running any task,
-					* not handling interrupts, soon dead.
-					* Called on the dying cpu, interrupts
-					* are already disabled. Must not
-					* sleep, must not fail */
 #define CPU_POST_DEAD		0x0009 /* CPU (unsigned)v dead, cpu_hotplug
 					* lock is dropped */
-#define CPU_STARTING		0x000A /* CPU (unsigned)v soon running.
-					* Called on the new cpu, just before
-					* enabling interrupts. Must not sleep,
-					* must not fail */
-#define CPU_DYING_IDLE		0x000B /* CPU (unsigned)v dying, reached
-					* idle loop. */
 #define CPU_BROKEN		0x000C /* CPU (unsigned)v did not die properly,
 					* perhaps due to preemption. */
 #define CPU_TASKS_FROZEN	0x0010
@@ -30,5 +19,3 @@
 #define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
 #define CPU_DOWN_FAILED_FROZEN  (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
 #define CPU_DEAD_FROZEN		(CPU_DEAD | CPU_TASKS_FROZEN)
-#define CPU_DYING_FROZEN	(CPU_DYING | CPU_TASKS_FROZEN)
-#define CPU_STARTING_FROZEN	(CPU_STARTING | CPU_TASKS_FROZEN)
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 39d9b95..05d7bc4 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -124,6 +124,8 @@
 	unsigned long i;
 	unsigned long min = index & ~((1UL << order) - 1);
 	unsigned long max = min + (1UL << order);
+	void **slot;
+	struct item *item2 = item_create(min);
 	RADIX_TREE(tree, GFP_KERNEL);
 
 	printf("Multiorder index %ld, order %d\n", index, order);
@@ -139,13 +141,19 @@
 		item_check_absent(&tree, i);
 	for (i = max; i < 2*max; i++)
 		item_check_absent(&tree, i);
+	for (i = min; i < max; i++)
+		assert(radix_tree_insert(&tree, i, item2) == -EEXIST);
+
+	slot = radix_tree_lookup_slot(&tree, index);
+	free(*slot);
+	radix_tree_replace_slot(slot, item2);
 	for (i = min; i < max; i++) {
-		static void *entry = (void *)
-					(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY);
-		assert(radix_tree_insert(&tree, i, entry) == -EEXIST);
+		struct item *item = item_lookup(&tree, i);
+		assert(item != 0);
+		assert(item->index == min);
 	}
 
-	assert(item_delete(&tree, index) != 0);
+	assert(item_delete(&tree, min) != 0);
 
 	for (i = 0; i < 2*max; i++)
 		item_check_absent(&tree, i);
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 3c40c9d..1cc6d64 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -8,7 +8,7 @@
 
 GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
 
-CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+CFLAGS := -std=gnu99 -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
 
 export CFLAGS
 
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 4214567..b037ce9c 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -147,7 +147,7 @@
 	if (args.nr != getpid() ||
 	    args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 ||
 	    args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
-		printf("[FAIL]\tgetpid() failed to preseve regs\n");
+		printf("[FAIL]\tgetpid() failed to preserve regs\n");
 		nerrs++;
 	} else {
 		printf("[OK]\tgetpid() preserves regs\n");
@@ -162,7 +162,7 @@
 	if (args.nr != 0 ||
 	    args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 ||
 	    args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
-		printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preseve regs\n");
+		printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preserve regs\n");
 		nerrs++;
 	} else {
 		printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n");
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 8a577e7..246145b 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -106,7 +106,7 @@
      ".type int3, @function\n\t"
      ".align 4096\n\t"
      "int3:\n\t"
-     "mov %ss,%eax\n\t"
+     "mov %ss,%ecx\n\t"
      "int3\n\t"
      ".size int3, . - int3\n\t"
      ".align 4096, 0xcc\n\t"
@@ -306,7 +306,7 @@
 #ifdef __x86_64__
 # define REG_IP REG_RIP
 # define REG_SP REG_RSP
-# define REG_AX REG_RAX
+# define REG_CX REG_RCX
 
 struct selectors {
 	unsigned short cs, gs, fs, ss;
@@ -326,7 +326,7 @@
 #else
 # define REG_IP REG_EIP
 # define REG_SP REG_ESP
-# define REG_AX REG_EAX
+# define REG_CX REG_ECX
 
 static greg_t *ssptr(ucontext_t *ctx)
 {
@@ -457,10 +457,10 @@
 	ctx->uc_mcontext.gregs[REG_IP] =
 		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
 	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
-	ctx->uc_mcontext.gregs[REG_AX] = 0;
+	ctx->uc_mcontext.gregs[REG_CX] = 0;
 
 	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
-	requested_regs[REG_AX] = *ssptr(ctx);	/* The asm code does this. */
+	requested_regs[REG_CX] = *ssptr(ctx);	/* The asm code does this. */
 
 	return;
 }
@@ -482,7 +482,7 @@
 	unsigned short ss;
 	asm ("mov %%ss,%0" : "=r" (ss));
 
-	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
+	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
 	if (asm_ss != sig_ss && sig == SIGTRAP) {
 		/* Sanity check failure. */
 		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
@@ -654,8 +654,8 @@
 #endif
 
 		/* Sanity check on the kernel */
-		if (i == REG_AX && requested_regs[i] != resulting_regs[i]) {
-			printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+		if (i == REG_CX && requested_regs[i] != resulting_regs[i]) {
+			printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
 			       (unsigned long long)requested_regs[i],
 			       (unsigned long long)resulting_regs[i]);
 			nerrs++;
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
index 4f93af8..18601f6 100644
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -14,4 +14,20 @@
 	DMA_NONE = 3,
 };
 
+#define dma_alloc_coherent(d, s, hp, f) ({ \
+	void *__dma_alloc_coherent_p = kmalloc((s), (f)); \
+	*(hp) = (unsigned long)__dma_alloc_coherent_p; \
+	__dma_alloc_coherent_p; \
+})
+
+#define dma_free_coherent(d, s, p, h) kfree(p)
+
+#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o))
+
+#define dma_map_single(d, p, s, dir) (virt_to_phys(p))
+#define dma_mapping_error(...) (0)
+
+#define dma_unmap_single(...) do { } while (0)
+#define dma_unmap_page(...) do { } while (0)
+
 #endif
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
index 0338499..d9554fc 100644
--- a/tools/virtio/linux/kernel.h
+++ b/tools/virtio/linux/kernel.h
@@ -20,7 +20,9 @@
 
 #define PAGE_SIZE getpagesize()
 #define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK)
 
+typedef unsigned long long phys_addr_t;
 typedef unsigned long long dma_addr_t;
 typedef size_t __kernel_size_t;
 typedef unsigned int __wsum;
@@ -57,6 +59,11 @@
 	return p;
 }
 
+static inline void *alloc_pages_exact(size_t s, gfp_t gfp)
+{
+	return kmalloc(s, gfp);
+}
+
 static inline void kfree(void *p)
 {
 	if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
@@ -64,6 +71,11 @@
 	free(p);
 }
 
+static inline void free_pages_exact(void *p, size_t s)
+{
+	kfree(p);
+}
+
 static inline void *krealloc(void *p, size_t s, gfp_t gfp)
 {
 	return realloc(p, s);
@@ -105,6 +117,8 @@
 #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
 
+#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n"))
+
 #define min(x, y) ({				\
 	typeof(x) _min1 = (x);			\
 	typeof(y) _min2 = (y);			\
diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h
index 81baeac..7e1c119 100644
--- a/tools/virtio/linux/slab.h
+++ b/tools/virtio/linux/slab.h
@@ -1,2 +1,6 @@
 #ifndef LINUX_SLAB_H
+#define GFP_KERNEL 0
+#define GFP_ATOMIC 0
+#define __GFP_NOWARN 0
+#define __GFP_ZERO 0
 #endif
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index ee125e7..9377c8b 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -3,8 +3,12 @@
 #include <linux/scatterlist.h>
 #include <linux/kernel.h>
 
+struct device {
+	void *parent;
+};
+
 struct virtio_device {
-	void *dev;
+	struct device dev;
 	u64 features;
 };
 
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 57a6964..9ba1181 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -40,6 +40,19 @@
 #define virtio_has_feature(dev, feature) \
 	(__virtio_test_bit((dev), feature))
 
+/**
+ * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * @vdev: the device
+ */
+static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+{
+	/*
+	 * Note the reverse polarity of the quirk feature (compared to most
+	 * other features), this is for compatibility with legacy systems.
+	 */
+	return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+}
+
 static inline bool virtio_is_little_endian(struct virtio_device *vdev)
 {
 	return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index 68e4f9f..bd2ad1d 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -13,6 +13,7 @@
 #define cache_line_size() SMP_CACHE_BYTES
 #define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
 #define unlikely(x)    (__builtin_expect(!!(x), 0))
+#define likely(x)    (__builtin_expect(!!(x), 1))
 #define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
 typedef pthread_spinlock_t  spinlock_t;
 
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 4fde8c7..77e6ccf 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -33,6 +33,7 @@
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
 static unsigned int host_vtimer_irq;
+static u32 host_vtimer_irq_flags;
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
@@ -365,7 +366,7 @@
 
 static void kvm_timer_init_interrupt(void *info)
 {
-	enable_percpu_irq(host_vtimer_irq, 0);
+	enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
 }
 
 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
@@ -432,6 +433,14 @@
 	}
 	host_vtimer_irq = info->virtual_irq;
 
+	host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq);
+	if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH &&
+	    host_vtimer_irq_flags != IRQF_TRIGGER_LOW) {
+		kvm_err("Invalid trigger for IRQ%d, assuming level low\n",
+			host_vtimer_irq);
+		host_vtimer_irq_flags = IRQF_TRIGGER_LOW;
+	}
+
 	err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
 				 "kvm guest timer", kvm_get_running_vcpus());
 	if (err) {
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index fb4b0a7..83777c1 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -73,12 +73,8 @@
 	int i, vcpu_lock_idx = -1, ret;
 	struct kvm_vcpu *vcpu;
 
-	mutex_lock(&kvm->lock);
-
-	if (irqchip_in_kernel(kvm)) {
-		ret = -EEXIST;
-		goto out;
-	}
+	if (irqchip_in_kernel(kvm))
+		return -EEXIST;
 
 	/*
 	 * This function is also called by the KVM_CREATE_IRQCHIP handler,
@@ -87,10 +83,8 @@
 	 * the proper checks already.
 	 */
 	if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
-		!kvm_vgic_global_state.can_emulate_gicv2) {
-		ret = -ENODEV;
-		goto out;
-	}
+		!kvm_vgic_global_state.can_emulate_gicv2)
+		return -ENODEV;
 
 	/*
 	 * Any time a vcpu is run, vcpu_load is called which tries to grab the
@@ -138,9 +132,6 @@
 		vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
 		mutex_unlock(&vcpu->mutex);
 	}
-
-out:
-	mutex_unlock(&kvm->lock);
 	return ret;
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 07411cf..4660a7d 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -51,7 +51,7 @@
 
 	irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL);
 	if (!irq)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	INIT_LIST_HEAD(&irq->lpi_list);
 	INIT_LIST_HEAD(&irq->ap_list);
@@ -441,39 +441,63 @@
  * Find the target VCPU and the LPI number for a given devid/eventid pair
  * and make this IRQ pending, possibly injecting it.
  * Must be called with the its_lock mutex held.
+ * Returns 0 on success, a positive error value for any ITS mapping
+ * related errors and negative error values for generic errors.
  */
-static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
-				 u32 devid, u32 eventid)
+static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
+				u32 devid, u32 eventid)
 {
+	struct kvm_vcpu *vcpu;
 	struct its_itte *itte;
 
 	if (!its->enabled)
-		return;
+		return -EBUSY;
 
 	itte = find_itte(its, devid, eventid);
-	/* Triggering an unmapped IRQ gets silently dropped. */
-	if (itte && its_is_collection_mapped(itte->collection)) {
-		struct kvm_vcpu *vcpu;
+	if (!itte || !its_is_collection_mapped(itte->collection))
+		return E_ITS_INT_UNMAPPED_INTERRUPT;
 
-		vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
-		if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) {
-			spin_lock(&itte->irq->irq_lock);
-			itte->irq->pending = true;
-			vgic_queue_irq_unlock(kvm, itte->irq);
-		}
-	}
+	vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr);
+	if (!vcpu)
+		return E_ITS_INT_UNMAPPED_INTERRUPT;
+
+	if (!vcpu->arch.vgic_cpu.lpis_enabled)
+		return -EBUSY;
+
+	spin_lock(&itte->irq->irq_lock);
+	itte->irq->pending = true;
+	vgic_queue_irq_unlock(kvm, itte->irq);
+
+	return 0;
+}
+
+static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+{
+	struct vgic_io_device *iodev;
+
+	if (dev->ops != &kvm_io_gic_ops)
+		return NULL;
+
+	iodev = container_of(dev, struct vgic_io_device, dev);
+
+	if (iodev->iodev_type != IODEV_ITS)
+		return NULL;
+
+	return iodev;
 }
 
 /*
  * Queries the KVM IO bus framework to get the ITS pointer from the given
  * doorbell address.
  * We then call vgic_its_trigger_msi() with the decoded data.
+ * According to the KVM_SIGNAL_MSI API description returns 1 on success.
  */
 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
 	u64 address;
 	struct kvm_io_device *kvm_io_dev;
 	struct vgic_io_device *iodev;
+	int ret;
 
 	if (!vgic_has_its(kvm))
 		return -ENODEV;
@@ -485,15 +509,28 @@
 
 	kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
 	if (!kvm_io_dev)
-		return -ENODEV;
+		return -EINVAL;
 
-	iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+	iodev = vgic_get_its_iodev(kvm_io_dev);
+	if (!iodev)
+		return -EINVAL;
 
 	mutex_lock(&iodev->its->its_lock);
-	vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
+	ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
 	mutex_unlock(&iodev->its->its_lock);
 
-	return 0;
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * KVM_SIGNAL_MSI demands a return value > 0 for success and 0
+	 * if the guest has blocked the MSI. So we map any LPI mapping
+	 * related error to that.
+	 */
+	if (ret)
+		return 0;
+	else
+		return 1;
 }
 
 /* Requires the its_lock to be held. */
@@ -502,7 +539,8 @@
 	list_del(&itte->itte_list);
 
 	/* This put matches the get in vgic_add_lpi. */
-	vgic_put_irq(kvm, itte->irq);
+	if (itte->irq)
+		vgic_put_irq(kvm, itte->irq);
 
 	kfree(itte);
 }
@@ -697,6 +735,7 @@
 	struct its_device *device;
 	struct its_collection *collection, *new_coll = NULL;
 	int lpi_nr;
+	struct vgic_irq *irq;
 
 	device = find_its_device(its, device_id);
 	if (!device)
@@ -710,6 +749,10 @@
 	    lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser))
 		return E_ITS_MAPTI_PHYSICALID_OOR;
 
+	/* If there is an existing mapping, behavior is UNPREDICTABLE. */
+	if (find_itte(its, device_id, event_id))
+		return 0;
+
 	collection = find_collection(its, coll_id);
 	if (!collection) {
 		int ret = vgic_its_alloc_collection(its, &collection, coll_id);
@@ -718,22 +761,28 @@
 		new_coll = collection;
 	}
 
-	itte = find_itte(its, device_id, event_id);
+	itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
 	if (!itte) {
-		itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL);
-		if (!itte) {
-			if (new_coll)
-				vgic_its_free_collection(its, coll_id);
-			return -ENOMEM;
-		}
-
-		itte->event_id	= event_id;
-		list_add_tail(&itte->itte_list, &device->itt_head);
+		if (new_coll)
+			vgic_its_free_collection(its, coll_id);
+		return -ENOMEM;
 	}
 
+	itte->event_id	= event_id;
+	list_add_tail(&itte->itte_list, &device->itt_head);
+
 	itte->collection = collection;
 	itte->lpi = lpi_nr;
-	itte->irq = vgic_add_lpi(kvm, lpi_nr);
+
+	irq = vgic_add_lpi(kvm, lpi_nr);
+	if (IS_ERR(irq)) {
+		if (new_coll)
+			vgic_its_free_collection(its, coll_id);
+		its_free_itte(kvm, itte);
+		return PTR_ERR(irq);
+	}
+	itte->irq = irq;
+
 	update_affinity_itte(kvm, itte);
 
 	/*
@@ -981,9 +1030,7 @@
 	u32 msi_data = its_cmd_get_id(its_cmd);
 	u64 msi_devid = its_cmd_get_deviceid(its_cmd);
 
-	vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
-
-	return 0;
+	return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data);
 }
 
 /*
@@ -1288,13 +1335,13 @@
 		its_sync_lpi_pending_table(vcpu);
 }
 
-static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its)
+static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its)
 {
 	struct vgic_io_device *iodev = &its->iodev;
 	int ret;
 
-	if (its->initialized)
-		return 0;
+	if (!its->initialized)
+		return -EBUSY;
 
 	if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base))
 		return -ENXIO;
@@ -1311,9 +1358,6 @@
 				      KVM_VGIC_V3_ITS_SIZE, &iodev->dev);
 	mutex_unlock(&kvm->slots_lock);
 
-	if (!ret)
-		its->initialized = true;
-
 	return ret;
 }
 
@@ -1435,9 +1479,6 @@
 		if (type != KVM_VGIC_ITS_ADDR_TYPE)
 			return -ENODEV;
 
-		if (its->initialized)
-			return -EBUSY;
-
 		if (copy_from_user(&addr, uaddr, sizeof(addr)))
 			return -EFAULT;
 
@@ -1453,7 +1494,9 @@
 	case KVM_DEV_ARM_VGIC_GRP_CTRL:
 		switch (attr->attr) {
 		case KVM_DEV_ARM_VGIC_CTRL_INIT:
-			return vgic_its_init_its(dev->kvm, its);
+			its->initialized = true;
+
+			return 0;
 		}
 		break;
 	}
@@ -1498,3 +1541,30 @@
 	return kvm_register_device_ops(&kvm_arm_vgic_its_ops,
 				       KVM_DEV_TYPE_ARM_VGIC_ITS);
 }
+
+/*
+ * Registers all ITSes with the kvm_io_bus framework.
+ * To follow the existing VGIC initialization sequence, this has to be
+ * done as late as possible, just before the first VCPU runs.
+ */
+int vgic_register_its_iodevs(struct kvm *kvm)
+{
+	struct kvm_device *dev;
+	int ret = 0;
+
+	list_for_each_entry(dev, &kvm->devices, vm_node) {
+		if (dev->ops != &kvm_arm_vgic_its_ops)
+			continue;
+
+		ret = vgic_register_its_iodev(kvm, dev->private);
+		if (ret)
+			return ret;
+		/*
+		 * We don't need to care about tearing down previously
+		 * registered ITSes, as the kvm_io_bus framework removes
+		 * them for us if the VM gets destroyed.
+		 */
+	}
+
+	return ret;
+}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index ff668e0..90d8181 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -306,16 +306,19 @@
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	u64 propbaser = dist->propbaser;
+	u64 old_propbaser, propbaser;
 
 	/* Storing a value with LPIs already enabled is undefined */
 	if (vgic_cpu->lpis_enabled)
 		return;
 
-	propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
-	propbaser = vgic_sanitise_propbaser(propbaser);
-
-	dist->propbaser = propbaser;
+	do {
+		old_propbaser = dist->propbaser;
+		propbaser = old_propbaser;
+		propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
+		propbaser = vgic_sanitise_propbaser(propbaser);
+	} while (cmpxchg64(&dist->propbaser, old_propbaser,
+			   propbaser) != old_propbaser);
 }
 
 static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu,
@@ -331,16 +334,19 @@
 				     unsigned long val)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	u64 pendbaser = vgic_cpu->pendbaser;
+	u64 old_pendbaser, pendbaser;
 
 	/* Storing a value with LPIs already enabled is undefined */
 	if (vgic_cpu->lpis_enabled)
 		return;
 
-	pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
-	pendbaser = vgic_sanitise_pendbaser(pendbaser);
-
-	vgic_cpu->pendbaser = pendbaser;
+	do {
+		old_pendbaser = vgic_cpu->pendbaser;
+		pendbaser = old_pendbaser;
+		pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
+		pendbaser = vgic_sanitise_pendbaser(pendbaser);
+	} while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser,
+			   pendbaser) != old_pendbaser);
 }
 
 /*
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 0506543..9f0dae3 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -289,6 +289,14 @@
 		goto out;
 	}
 
+	if (vgic_has_its(kvm)) {
+		ret = vgic_register_its_iodevs(kvm);
+		if (ret) {
+			kvm_err("Unable to register VGIC ITS MMIO regions\n");
+			goto out;
+		}
+	}
+
 	dist->ready = true;
 
 out:
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index e7aeac7..e83b7fe 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -117,17 +117,17 @@
 
 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
 {
-	struct vgic_dist *dist;
+	struct vgic_dist *dist = &kvm->arch.vgic;
 
 	if (irq->intid < VGIC_MIN_LPI)
 		return;
 
-	if (!kref_put(&irq->refcount, vgic_irq_release))
-		return;
-
-	dist = &kvm->arch.vgic;
-
 	spin_lock(&dist->lpi_list_lock);
+	if (!kref_put(&irq->refcount, vgic_irq_release)) {
+		spin_unlock(&dist->lpi_list_lock);
+		return;
+	};
+
 	list_del(&irq->lpi_list);
 	dist->lpi_list_count--;
 	spin_unlock(&dist->lpi_list_lock);
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 1d8e21d..6c4625c 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -84,6 +84,7 @@
 int vgic_v3_probe(const struct gic_kvm_info *info);
 int vgic_v3_map_resources(struct kvm *kvm);
 int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+int vgic_register_its_iodevs(struct kvm *kvm);
 bool vgic_has_its(struct kvm *kvm);
 int kvm_vgic_register_its_device(void);
 void vgic_enable_lpis(struct kvm_vcpu *vcpu);
@@ -140,6 +141,11 @@
 	return -ENODEV;
 }
 
+static inline int vgic_register_its_iodevs(struct kvm *kvm)
+{
+	return -ENODEV;
+}
+
 static inline bool vgic_has_its(struct kvm *kvm)
 {
 	return false;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index cc081cc..1950782 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -696,6 +696,11 @@
 {
 	struct kvm_device *dev, *tmp;
 
+	/*
+	 * We do not need to take the kvm->lock here, because nobody else
+	 * has a reference to the struct kvm at this point and therefore
+	 * cannot access the devices list anyhow.
+	 */
 	list_for_each_entry_safe(dev, tmp, &kvm->devices, vm_node) {
 		list_del(&dev->vm_node);
 		dev->ops->destroy(dev);
@@ -2832,19 +2837,28 @@
 	dev->ops = ops;
 	dev->kvm = kvm;
 
+	mutex_lock(&kvm->lock);
 	ret = ops->create(dev, cd->type);
 	if (ret < 0) {
+		mutex_unlock(&kvm->lock);
 		kfree(dev);
 		return ret;
 	}
+	list_add(&dev->vm_node, &kvm->devices);
+	mutex_unlock(&kvm->lock);
+
+	if (ops->init)
+		ops->init(dev);
 
 	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR | O_CLOEXEC);
 	if (ret < 0) {
 		ops->destroy(dev);
+		mutex_lock(&kvm->lock);
+		list_del(&dev->vm_node);
+		mutex_unlock(&kvm->lock);
 		return ret;
 	}
 
-	list_add(&dev->vm_node, &kvm->devices);
 	kvm_get_kvm(kvm);
 	cd->fd = ret;
 	return 0;